File size: 1,694 Bytes
4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 81caae1 4bc4d19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.9186474680900574,
"learning_rate": 0.0002,
"loss": 0.8935,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.6858515739440918,
"learning_rate": 0.0002,
"loss": 0.4606,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 1.5318282842636108,
"learning_rate": 0.0002,
"loss": 0.3005,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.7992210984230042,
"learning_rate": 0.0002,
"loss": 0.235,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.5865496397018433,
"learning_rate": 0.0002,
"loss": 0.199,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.5286576151847839,
"learning_rate": 0.0002,
"loss": 0.1775,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.103034997322547e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|