|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.3988863229751587, |
|
"eval_runtime": 1.1028, |
|
"eval_samples_per_second": 452.481, |
|
"eval_steps_per_second": 14.508, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.38775110244750977, |
|
"eval_runtime": 1.1024, |
|
"eval_samples_per_second": 452.63, |
|
"eval_steps_per_second": 14.513, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.727272727272727e-05, |
|
"loss": 0.4642, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.377765029668808, |
|
"eval_runtime": 1.1044, |
|
"eval_samples_per_second": 451.847, |
|
"eval_steps_per_second": 14.488, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.3701080083847046, |
|
"eval_runtime": 1.1029, |
|
"eval_samples_per_second": 452.461, |
|
"eval_steps_per_second": 14.508, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.4438, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.3616144061088562, |
|
"eval_runtime": 1.1041, |
|
"eval_samples_per_second": 451.939, |
|
"eval_steps_per_second": 14.491, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.35696589946746826, |
|
"eval_runtime": 1.3299, |
|
"eval_samples_per_second": 375.212, |
|
"eval_steps_per_second": 12.031, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 0.4262, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.3518909513950348, |
|
"eval_runtime": 1.5297, |
|
"eval_samples_per_second": 326.21, |
|
"eval_steps_per_second": 10.46, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.34925374388694763, |
|
"eval_runtime": 1.3276, |
|
"eval_samples_per_second": 375.855, |
|
"eval_steps_per_second": 12.051, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.34774351119995117, |
|
"eval_runtime": 1.3242, |
|
"eval_samples_per_second": 376.83, |
|
"eval_steps_per_second": 12.083, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.4197, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2200, |
|
"num_train_epochs": 10, |
|
"save_steps": 10, |
|
"total_flos": 2380323933388800.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|