{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9973661106233538, "eval_steps": 500, "global_step": 284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03511852502194908, "grad_norm": 194.98139599033956, "learning_rate": 1.724137931034483e-06, "loss": 2.1612, "step": 10 }, { "epoch": 0.07023705004389816, "grad_norm": 11.574681452175845, "learning_rate": 3.448275862068966e-06, "loss": 1.5263, "step": 20 }, { "epoch": 0.10535557506584724, "grad_norm": 4.620556133093483, "learning_rate": 4.999810275287077e-06, "loss": 1.1514, "step": 30 }, { "epoch": 0.14047410008779632, "grad_norm": 4.8618300415248425, "learning_rate": 4.977078132728901e-06, "loss": 1.0009, "step": 40 }, { "epoch": 0.17559262510974538, "grad_norm": 5.039664200269418, "learning_rate": 4.916796010672969e-06, "loss": 0.9159, "step": 50 }, { "epoch": 0.21071115013169447, "grad_norm": 4.677695184320373, "learning_rate": 4.819877724641437e-06, "loss": 0.8819, "step": 60 }, { "epoch": 0.24582967515364354, "grad_norm": 4.476208091052348, "learning_rate": 4.687792457057482e-06, "loss": 0.8205, "step": 70 }, { "epoch": 0.28094820017559263, "grad_norm": 4.076277674081994, "learning_rate": 4.522542485937369e-06, "loss": 0.8138, "step": 80 }, { "epoch": 0.3160667251975417, "grad_norm": 3.706466470362564, "learning_rate": 4.326632832396733e-06, "loss": 0.7546, "step": 90 }, { "epoch": 0.35118525021949076, "grad_norm": 3.807926746840706, "learning_rate": 4.1030332870839466e-06, "loss": 0.7552, "step": 100 }, { "epoch": 0.3863037752414399, "grad_norm": 4.448525410750735, "learning_rate": 3.855133391181124e-06, "loss": 0.742, "step": 110 }, { "epoch": 0.42142230026338895, "grad_norm": 3.9020262955384557, "learning_rate": 3.586691054414913e-06, "loss": 0.7188, "step": 120 }, { "epoch": 0.456540825285338, "grad_norm": 4.436756217145944, "learning_rate": 3.3017755889756382e-06, "loss": 0.7112, "step": 130 }, { "epoch": 0.4916593503072871, "grad_norm": 3.3937208218898154, "learning_rate": 3.0047060228925256e-06, "loss": 0.6893, "step": 140 }, { "epoch": 0.5267778753292361, "grad_norm": 5.294699703871525, "learning_rate": 2.699985627971354e-06, "loss": 0.6668, "step": 150 }, { "epoch": 0.5618964003511853, "grad_norm": 4.373298006073623, "learning_rate": 2.392233654784262e-06, "loss": 0.6659, "step": 160 }, { "epoch": 0.5970149253731343, "grad_norm": 4.608979891075067, "learning_rate": 2.086115309539675e-06, "loss": 0.643, "step": 170 }, { "epoch": 0.6321334503950834, "grad_norm": 5.305060414443979, "learning_rate": 1.7862710343116451e-06, "loss": 0.6655, "step": 180 }, { "epoch": 0.6672519754170325, "grad_norm": 3.420875543474142, "learning_rate": 1.4972461626682033e-06, "loss": 0.6251, "step": 190 }, { "epoch": 0.7023705004389815, "grad_norm": 4.603437178273803, "learning_rate": 1.2234220170477332e-06, "loss": 0.6449, "step": 200 }, { "epoch": 0.7374890254609306, "grad_norm": 5.3351263316177215, "learning_rate": 9.689494923768756e-07, "loss": 0.6284, "step": 210 }, { "epoch": 0.7726075504828798, "grad_norm": 3.607022821385861, "learning_rate": 7.376861327346325e-07, "loss": 0.6072, "step": 220 }, { "epoch": 0.8077260755048288, "grad_norm": 4.795667171198547, "learning_rate": 5.33137654916292e-07, "loss": 0.6187, "step": 230 }, { "epoch": 0.8428446005267779, "grad_norm": 3.9213724314825216, "learning_rate": 3.5840480534034355e-07, "loss": 0.6145, "step": 240 }, { "epoch": 0.8779631255487269, "grad_norm": 5.451274818264296, "learning_rate": 2.1613635589349756e-07, "loss": 0.6012, "step": 250 }, { "epoch": 0.913081650570676, "grad_norm": 4.312276909206807, "learning_rate": 1.0848895124889819e-07, "loss": 0.6184, "step": 260 }, { "epoch": 0.9482001755926251, "grad_norm": 4.611522839692978, "learning_rate": 3.709441633123367e-08, "loss": 0.5952, "step": 270 }, { "epoch": 0.9833187006145742, "grad_norm": 6.192176760901921, "learning_rate": 3.035019514275317e-09, "loss": 0.5973, "step": 280 }, { "epoch": 0.9973661106233538, "step": 284, "total_flos": 1.1831516853383987e+17, "train_loss": 0.7974992327287164, "train_runtime": 7644.4468, "train_samples_per_second": 4.767, "train_steps_per_second": 0.037 } ], "logging_steps": 10, "max_steps": 284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1831516853383987e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }