{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1792, "eval_steps": 500, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0128, "grad_norm": 12.99861295213489, "learning_rate": 6.25e-07, "loss": 0.7998, "step": 1 }, { "epoch": 0.0256, "grad_norm": 12.355498309634275, "learning_rate": 1.25e-06, "loss": 0.7667, "step": 2 }, { "epoch": 0.0384, "grad_norm": 13.377026375153603, "learning_rate": 1.8750000000000003e-06, "loss": 0.8004, "step": 3 }, { "epoch": 0.0512, "grad_norm": 13.4959625671783, "learning_rate": 2.5e-06, "loss": 0.7769, "step": 4 }, { "epoch": 0.064, "grad_norm": 10.482327280051456, "learning_rate": 3.125e-06, "loss": 0.7544, "step": 5 }, { "epoch": 0.0768, "grad_norm": 18.566810425661483, "learning_rate": 3.7500000000000005e-06, "loss": 0.7639, "step": 6 }, { "epoch": 0.0896, "grad_norm": 22.961763121425445, "learning_rate": 4.3750000000000005e-06, "loss": 0.6802, "step": 7 }, { "epoch": 0.1024, "grad_norm": 23.288202176933027, "learning_rate": 5e-06, "loss": 0.6742, "step": 8 }, { "epoch": 0.1152, "grad_norm": 5.353762222622938, "learning_rate": 4.997482666353287e-06, "loss": 0.5583, "step": 9 }, { "epoch": 0.128, "grad_norm": 29.528240817452634, "learning_rate": 4.989935734988098e-06, "loss": 0.554, "step": 10 }, { "epoch": 0.1408, "grad_norm": 6.270913357192842, "learning_rate": 4.977374404419838e-06, "loss": 0.5101, "step": 11 }, { "epoch": 0.1536, "grad_norm": 6.437334442280678, "learning_rate": 4.959823971496575e-06, "loss": 0.4621, "step": 12 }, { "epoch": 0.1664, "grad_norm": 4.101541939034336, "learning_rate": 4.937319780454559e-06, "loss": 0.4742, "step": 13 }, { "epoch": 0.1792, "grad_norm": 2.038050058538055, "learning_rate": 4.909907151739634e-06, "loss": 0.4091, "step": 14 } ], "logging_steps": 1, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 17358889091072.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }