{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.333950903195924, "eval_steps": 8, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014821676702176934, "eval_loss": 1.7156556844711304, "eval_runtime": 26.2876, "eval_samples_per_second": 8.673, "eval_steps_per_second": 4.337, "step": 1 }, { "epoch": 0.0444650301065308, "grad_norm": 2.3580570220947266, "learning_rate": 3e-05, "loss": 1.7416, "step": 3 }, { "epoch": 0.0889300602130616, "grad_norm": 1.4993271827697754, "learning_rate": 6e-05, "loss": 1.5326, "step": 6 }, { "epoch": 0.11857341361741547, "eval_loss": 0.9888727068901062, "eval_runtime": 26.3261, "eval_samples_per_second": 8.661, "eval_steps_per_second": 4.33, "step": 8 }, { "epoch": 0.1333950903195924, "grad_norm": 1.521828532218933, "learning_rate": 9e-05, "loss": 1.0888, "step": 9 }, { "epoch": 0.1778601204261232, "grad_norm": 1.2248362302780151, "learning_rate": 0.00012, "loss": 0.8213, "step": 12 }, { "epoch": 0.222325150532654, "grad_norm": 1.055378794670105, "learning_rate": 0.00015000000000000001, "loss": 0.7635, "step": 15 }, { "epoch": 0.23714682723483094, "eval_loss": 0.5862013101577759, "eval_runtime": 26.3342, "eval_samples_per_second": 8.658, "eval_steps_per_second": 4.329, "step": 16 }, { "epoch": 0.2667901806391848, "grad_norm": 1.1668015718460083, "learning_rate": 0.00018, "loss": 0.6722, "step": 18 }, { "epoch": 0.3112552107457156, "grad_norm": 1.0489511489868164, "learning_rate": 0.00019989930665413147, "loss": 0.6261, "step": 21 }, { "epoch": 0.3557202408522464, "grad_norm": 0.8146524429321289, "learning_rate": 0.00019839295885986296, "loss": 0.6004, "step": 24 }, { "epoch": 0.3557202408522464, "eval_loss": 0.5033943057060242, "eval_runtime": 26.4413, "eval_samples_per_second": 8.623, "eval_steps_per_second": 4.311, "step": 24 }, { "epoch": 0.4001852709587772, "grad_norm": 0.8943031430244446, "learning_rate": 0.00019510565162951537, "loss": 0.5921, "step": 27 }, { "epoch": 0.444650301065308, "grad_norm": 0.7065706849098206, "learning_rate": 0.0001900968867902419, "loss": 0.5142, "step": 30 }, { "epoch": 0.4742936544696619, "eval_loss": 0.4665619134902954, "eval_runtime": 26.3696, "eval_samples_per_second": 8.646, "eval_steps_per_second": 4.323, "step": 32 }, { "epoch": 0.4891153311718388, "grad_norm": 0.82402104139328, "learning_rate": 0.00018345732537213027, "loss": 0.5384, "step": 33 }, { "epoch": 0.5335803612783696, "grad_norm": 0.7770050764083862, "learning_rate": 0.00017530714660036112, "loss": 0.5215, "step": 36 }, { "epoch": 0.5780453913849004, "grad_norm": 0.6626347899436951, "learning_rate": 0.00016579387259397127, "loss": 0.5285, "step": 39 }, { "epoch": 0.5928670680870773, "eval_loss": 0.45161083340644836, "eval_runtime": 26.3307, "eval_samples_per_second": 8.659, "eval_steps_per_second": 4.33, "step": 40 }, { "epoch": 0.6225104214914312, "grad_norm": 0.6447142362594604, "learning_rate": 0.00015508969814521025, "loss": 0.5811, "step": 42 }, { "epoch": 0.666975451597962, "grad_norm": 0.6098252534866333, "learning_rate": 0.00014338837391175582, "loss": 0.4708, "step": 45 }, { "epoch": 0.7114404817044928, "grad_norm": 0.669792890548706, "learning_rate": 0.00013090169943749476, "loss": 0.4997, "step": 48 }, { "epoch": 0.7114404817044928, "eval_loss": 0.43807509541511536, "eval_runtime": 26.3421, "eval_samples_per_second": 8.655, "eval_steps_per_second": 4.328, "step": 48 }, { "epoch": 0.7559055118110236, "grad_norm": 0.6415955424308777, "learning_rate": 0.00011785568947986367, "loss": 0.5092, "step": 51 }, { "epoch": 0.8003705419175544, "grad_norm": 0.61659836769104, "learning_rate": 0.00010448648303505151, "loss": 0.5257, "step": 54 }, { "epoch": 0.8300138953219083, "eval_loss": 0.4247357249259949, "eval_runtime": 26.342, "eval_samples_per_second": 8.655, "eval_steps_per_second": 4.328, "step": 56 }, { "epoch": 0.8448355720240852, "grad_norm": 0.6660127639770508, "learning_rate": 9.103606910965666e-05, "loss": 0.5072, "step": 57 }, { "epoch": 0.889300602130616, "grad_norm": 0.6248095035552979, "learning_rate": 7.774790660436858e-05, "loss": 0.485, "step": 60 }, { "epoch": 0.9337656322371468, "grad_norm": 0.6779568195343018, "learning_rate": 6.486251759186572e-05, "loss": 0.4735, "step": 63 }, { "epoch": 0.9485873089393237, "eval_loss": 0.41895875334739685, "eval_runtime": 26.3269, "eval_samples_per_second": 8.66, "eval_steps_per_second": 4.33, "step": 64 }, { "epoch": 0.9782306623436776, "grad_norm": 0.5692128539085388, "learning_rate": 5.261313375270014e-05, "loss": 0.482, "step": 66 }, { "epoch": 1.0226956924502084, "grad_norm": 0.4736873507499695, "learning_rate": 4.12214747707527e-05, "loss": 0.4658, "step": 69 }, { "epoch": 1.0671607225567392, "grad_norm": 0.6735922694206238, "learning_rate": 3.089373510131354e-05, "loss": 0.4016, "step": 72 }, { "epoch": 1.0671607225567392, "eval_loss": 0.41547104716300964, "eval_runtime": 26.3371, "eval_samples_per_second": 8.657, "eval_steps_per_second": 4.329, "step": 72 }, { "epoch": 1.11162575266327, "grad_norm": 0.5336905121803284, "learning_rate": 2.181685175319702e-05, "loss": 0.3519, "step": 75 }, { "epoch": 1.1560907827698008, "grad_norm": 0.5846615433692932, "learning_rate": 1.415512063981339e-05, "loss": 0.3481, "step": 78 }, { "epoch": 1.1857341361741547, "eval_loss": 0.4153323173522949, "eval_runtime": 26.3347, "eval_samples_per_second": 8.658, "eval_steps_per_second": 4.329, "step": 80 }, { "epoch": 1.2005558128763316, "grad_norm": 0.49688470363616943, "learning_rate": 8.047222744854943e-06, "loss": 0.3458, "step": 81 }, { "epoch": 1.2450208429828624, "grad_norm": 0.6959128379821777, "learning_rate": 3.6037139304146762e-06, "loss": 0.354, "step": 84 }, { "epoch": 1.2894858730893932, "grad_norm": 0.6246429085731506, "learning_rate": 9.0502382320653e-07, "loss": 0.3348, "step": 87 }, { "epoch": 1.3043075497915702, "eval_loss": 0.4123484492301941, "eval_runtime": 26.3287, "eval_samples_per_second": 8.66, "eval_steps_per_second": 4.33, "step": 88 }, { "epoch": 1.333950903195924, "grad_norm": 0.6873133778572083, "learning_rate": 0.0, "loss": 0.3512, "step": 90 } ], "logging_steps": 3, "max_steps": 90, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 8, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.587618286712914e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }