{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 285, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03508771929824561, "grad_norm": 6.543553342182306, "learning_rate": 3.448275862068966e-06, "loss": 1.8558, "step": 10 }, { "epoch": 0.07017543859649122, "grad_norm": 3.5154288610740023, "learning_rate": 6.896551724137932e-06, "loss": 1.4458, "step": 20 }, { "epoch": 0.10526315789473684, "grad_norm": 2.778543537236469, "learning_rate": 9.999623509195724e-06, "loss": 1.2409, "step": 30 }, { "epoch": 0.14035087719298245, "grad_norm": 2.5059789149316045, "learning_rate": 9.9545131771389e-06, "loss": 1.1153, "step": 40 }, { "epoch": 0.17543859649122806, "grad_norm": 2.33426924896289, "learning_rate": 9.834882355224261e-06, "loss": 1.0284, "step": 50 }, { "epoch": 0.21052631578947367, "grad_norm": 2.238033736817208, "learning_rate": 9.64253040236608e-06, "loss": 0.9903, "step": 60 }, { "epoch": 0.24561403508771928, "grad_norm": 2.172103858227053, "learning_rate": 9.380350470977033e-06, "loss": 0.9464, "step": 70 }, { "epoch": 0.2807017543859649, "grad_norm": 2.0775067726943957, "learning_rate": 9.052285991262975e-06, "loss": 0.9232, "step": 80 }, { "epoch": 0.3157894736842105, "grad_norm": 2.036467828021783, "learning_rate": 8.663271358362064e-06, "loss": 0.8927, "step": 90 }, { "epoch": 0.3508771929824561, "grad_norm": 2.092139892865783, "learning_rate": 8.219157714448957e-06, "loss": 0.8748, "step": 100 }, { "epoch": 0.38596491228070173, "grad_norm": 2.354936030454241, "learning_rate": 7.726624942110233e-06, "loss": 0.8712, "step": 110 }, { "epoch": 0.42105263157894735, "grad_norm": 2.4480552576537313, "learning_rate": 7.193081192692639e-06, "loss": 0.8413, "step": 120 }, { "epoch": 0.45614035087719296, "grad_norm": 2.225961040994733, "learning_rate": 6.626551460811316e-06, "loss": 0.8245, "step": 130 }, { "epoch": 0.49122807017543857, "grad_norm": 2.2280118933835227, "learning_rate": 6.035556880961093e-06, "loss": 0.7995, "step": 140 }, { "epoch": 0.5263157894736842, "grad_norm": 2.1092585894132694, "learning_rate": 5.4289865617222005e-06, "loss": 0.7919, "step": 150 }, { "epoch": 0.5614035087719298, "grad_norm": 2.2317869413724947, "learning_rate": 4.815963885293206e-06, "loss": 0.794, "step": 160 }, { "epoch": 0.5964912280701754, "grad_norm": 2.1555810331504976, "learning_rate": 4.205709283330694e-06, "loss": 0.7713, "step": 170 }, { "epoch": 0.631578947368421, "grad_norm": 2.0442009159494785, "learning_rate": 3.6074015530747354e-06, "loss": 0.7775, "step": 180 }, { "epoch": 0.6666666666666666, "grad_norm": 2.113365321780909, "learning_rate": 3.0300397996947604e-06, "loss": 0.7515, "step": 190 }, { "epoch": 0.7017543859649122, "grad_norm": 2.1674953587420838, "learning_rate": 2.482308081371413e-06, "loss": 0.765, "step": 200 }, { "epoch": 0.7368421052631579, "grad_norm": 2.2304080548395917, "learning_rate": 1.972444792978373e-06, "loss": 0.7528, "step": 210 }, { "epoch": 0.7719298245614035, "grad_norm": 2.40752310761597, "learning_rate": 1.508118752955136e-06, "loss": 0.7396, "step": 220 }, { "epoch": 0.8070175438596491, "grad_norm": 2.117568701663915, "learning_rate": 1.0963138571395277e-06, "loss": 0.7408, "step": 230 }, { "epoch": 0.8421052631578947, "grad_norm": 2.919503306825211, "learning_rate": 7.43224034473674e-07, "loss": 0.7486, "step": 240 }, { "epoch": 0.8771929824561403, "grad_norm": 2.125280125804679, "learning_rate": 4.5416008454738813e-07, "loss": 0.7359, "step": 250 }, { "epoch": 0.9122807017543859, "grad_norm": 2.066124288511879, "learning_rate": 2.3346979822903071e-07, "loss": 0.7375, "step": 260 }, { "epoch": 0.9473684210526315, "grad_norm": 1.9772889405004763, "learning_rate": 8.447256284391858e-08, "loss": 0.7245, "step": 270 }, { "epoch": 0.9824561403508771, "grad_norm": 1.9570228116416033, "learning_rate": 9.409435499254105e-09, "loss": 0.7378, "step": 280 }, { "epoch": 1.0, "step": 285, "total_flos": 1.4819175628485427e+17, "train_loss": 0.8990087810315583, "train_runtime": 350.8438, "train_samples_per_second": 103.861, "train_steps_per_second": 0.812 } ], "logging_steps": 10, "max_steps": 285, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4819175628485427e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }