{ "best_metric": 1.5792005062103271, "best_model_checkpoint": "qlora_output/checkpoint-1200", "epoch": 1.4679393049437102, "eval_steps": 600, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03915810083210964, "grad_norm": 0.07319402694702148, "learning_rate": 3.555555555555556e-05, "loss": 2.4428, "step": 40 }, { "epoch": 0.07831620166421928, "grad_norm": 0.04330237954854965, "learning_rate": 7.111111111111112e-05, "loss": 2.268, "step": 80 }, { "epoch": 0.11747430249632893, "grad_norm": 0.05867455527186394, "learning_rate": 0.00010666666666666667, "loss": 2.1806, "step": 120 }, { "epoch": 0.15663240332843856, "grad_norm": 0.06936266273260117, "learning_rate": 0.00014222222222222224, "loss": 2.0778, "step": 160 }, { "epoch": 0.19579050416054822, "grad_norm": 0.08056484907865524, "learning_rate": 0.00017777777777777779, "loss": 2.0382, "step": 200 }, { "epoch": 0.23494860499265785, "grad_norm": 0.0779654011130333, "learning_rate": 0.0001999317060143023, "loss": 1.9227, "step": 240 }, { "epoch": 0.2741067058247675, "grad_norm": 0.11802724003791809, "learning_rate": 0.00019908312530915603, "loss": 1.9139, "step": 280 }, { "epoch": 0.3132648066568771, "grad_norm": 0.0852489247918129, "learning_rate": 0.00019727282722446047, "loss": 1.9423, "step": 320 }, { "epoch": 0.3524229074889868, "grad_norm": 0.1409972459077835, "learning_rate": 0.00019451838281608197, "loss": 1.8484, "step": 360 }, { "epoch": 0.39158100832109644, "grad_norm": 0.11129080504179001, "learning_rate": 0.00019084652718195238, "loss": 1.7694, "step": 400 }, { "epoch": 0.43073910915320607, "grad_norm": 0.10179898887872696, "learning_rate": 0.00018629289996673897, "loss": 1.8026, "step": 440 }, { "epoch": 0.4698972099853157, "grad_norm": 0.14124783873558044, "learning_rate": 0.00018090169943749476, "loss": 1.8217, "step": 480 }, { "epoch": 0.5090553108174254, "grad_norm": 0.16184218227863312, "learning_rate": 0.0001747252534878891, "loss": 1.7847, "step": 520 }, { "epoch": 0.548213411649535, "grad_norm": 0.11349498480558395, "learning_rate": 0.00016782351173492342, "loss": 1.6622, "step": 560 }, { "epoch": 0.5873715124816447, "grad_norm": 0.08884529024362564, "learning_rate": 0.00016026346363792567, "loss": 1.7633, "step": 600 }, { "epoch": 0.5873715124816447, "eval_loss": 1.6572695970535278, "eval_runtime": 1912.2507, "eval_samples_per_second": 1.425, "eval_steps_per_second": 0.713, "step": 600 }, { "epoch": 0.6265296133137542, "grad_norm": 0.09996389597654343, "learning_rate": 0.0001521184882876585, "loss": 1.6764, "step": 640 }, { "epoch": 0.6656877141458639, "grad_norm": 0.12769252061843872, "learning_rate": 0.00014346764217659653, "loss": 1.7871, "step": 680 }, { "epoch": 0.7048458149779736, "grad_norm": 0.13380451500415802, "learning_rate": 0.00013439489186339282, "loss": 1.7167, "step": 720 }, { "epoch": 0.7440039158100832, "grad_norm": 0.11822285503149033, "learning_rate": 0.0001249882989794231, "loss": 1.6789, "step": 760 }, { "epoch": 0.7831620166421929, "grad_norm": 0.12109290808439255, "learning_rate": 0.00011533916548786857, "loss": 1.583, "step": 800 }, { "epoch": 0.8223201174743024, "grad_norm": 0.12838001549243927, "learning_rate": 0.000105541147491597, "loss": 1.7412, "step": 840 }, { "epoch": 0.8614782183064121, "grad_norm": 0.16042716801166534, "learning_rate": 9.568934619137046e-05, "loss": 1.6519, "step": 880 }, { "epoch": 0.9006363191385218, "grad_norm": 0.1427149474620819, "learning_rate": 8.587938481769089e-05, "loss": 1.6598, "step": 920 }, { "epoch": 0.9397944199706314, "grad_norm": 0.118178591132164, "learning_rate": 7.620648049573815e-05, "loss": 1.7378, "step": 960 }, { "epoch": 0.9789525208027411, "grad_norm": 0.1253277212381363, "learning_rate": 6.676452005203406e-05, "loss": 1.6451, "step": 1000 }, { "epoch": 1.0176211453744493, "grad_norm": 0.15462452173233032, "learning_rate": 5.764514873320761e-05, "loss": 1.6475, "step": 1040 }, { "epoch": 1.056779246206559, "grad_norm": 0.106235072016716, "learning_rate": 4.893688068190932e-05, "loss": 1.6686, "step": 1080 }, { "epoch": 1.0959373470386686, "grad_norm": 0.09717393666505814, "learning_rate": 4.072423980374452e-05, "loss": 1.6824, "step": 1120 }, { "epoch": 1.1350954478707782, "grad_norm": 0.13711334764957428, "learning_rate": 3.308693936411421e-05, "loss": 1.6147, "step": 1160 }, { "epoch": 1.174253548702888, "grad_norm": 0.1265803724527359, "learning_rate": 2.6099108277934103e-05, "loss": 1.6174, "step": 1200 }, { "epoch": 1.174253548702888, "eval_loss": 1.5792005062103271, "eval_runtime": 1903.0333, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.716, "step": 1200 }, { "epoch": 1.2134116495349976, "grad_norm": 0.09578167647123337, "learning_rate": 1.982857160199334e-05, "loss": 1.6246, "step": 1240 }, { "epoch": 1.2525697503671072, "grad_norm": 0.14227357506752014, "learning_rate": 1.4336192213613742e-05, "loss": 1.5548, "step": 1280 }, { "epoch": 1.2917278511992167, "grad_norm": 0.1526080220937729, "learning_rate": 9.675280065387116e-06, "loss": 1.5454, "step": 1320 }, { "epoch": 1.3308859520313265, "grad_norm": 0.17356757819652557, "learning_rate": 5.891074749862857e-06, "loss": 1.5555, "step": 1360 }, { "epoch": 1.3700440528634361, "grad_norm": 0.1258653849363327, "learning_rate": 3.0203063964990617e-06, "loss": 1.5775, "step": 1400 }, { "epoch": 1.4092021536955457, "grad_norm": 0.12249883264303207, "learning_rate": 1.0908391628854041e-06, "loss": 1.5619, "step": 1440 }, { "epoch": 1.4483602545276555, "grad_norm": 0.1455027014017105, "learning_rate": 1.2140078057101266e-07, "loss": 1.5342, "step": 1480 }, { "epoch": 1.4679393049437102, "step": 1500, "total_flos": 1.105565365842985e+17, "train_loss": 1.763797264099121, "train_runtime": 30390.5778, "train_samples_per_second": 0.395, "train_steps_per_second": 0.049 } ], "logging_steps": 40, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 600, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.05 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.105565365842985e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }