mistral_strategyqa_sft / trainer_state.json
weijie210's picture
Model save
ae8e6ee verified
raw
history blame
3.89 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9996790752964305e-05,
"loss": 0.7636,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 1.991987177050743e-05,
"loss": 2.6172,
"step": 5
},
{
"epoch": 0.08,
"learning_rate": 1.9680771188662044e-05,
"loss": 1.5854,
"step": 10
},
{
"epoch": 0.12,
"learning_rate": 1.9286529995722624e-05,
"loss": 0.6342,
"step": 15
},
{
"epoch": 0.16,
"learning_rate": 1.8743466161445823e-05,
"loss": 0.4285,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 1.8060282634540053e-05,
"loss": 0.4147,
"step": 25
},
{
"epoch": 0.24,
"learning_rate": 1.72479278722912e-05,
"loss": 0.3739,
"step": 30
},
{
"epoch": 0.28,
"learning_rate": 1.631942038446304e-05,
"loss": 0.3636,
"step": 35
},
{
"epoch": 0.32,
"learning_rate": 1.5289640103269626e-05,
"loss": 0.2994,
"step": 40
},
{
"epoch": 0.36,
"learning_rate": 1.4175089922850633e-05,
"loss": 0.2938,
"step": 45
},
{
"epoch": 0.4,
"learning_rate": 1.2993631229733584e-05,
"loss": 0.3058,
"step": 50
},
{
"epoch": 0.44,
"learning_rate": 1.1764197662578087e-05,
"loss": 0.3086,
"step": 55
},
{
"epoch": 0.48,
"learning_rate": 1.0506491688387128e-05,
"loss": 0.3109,
"step": 60
},
{
"epoch": 0.52,
"learning_rate": 9.24066885774754e-06,
"loss": 0.3275,
"step": 65
},
{
"epoch": 0.56,
"learning_rate": 7.987014799113398e-06,
"loss": 0.3047,
"step": 70
},
{
"epoch": 0.6,
"learning_rate": 6.7656201285076195e-06,
"loss": 0.2778,
"step": 75
},
{
"epoch": 0.65,
"learning_rate": 5.5960584844236565e-06,
"loss": 0.269,
"step": 80
},
{
"epoch": 0.69,
"learning_rate": 4.497072847626087e-06,
"loss": 0.3077,
"step": 85
},
{
"epoch": 0.73,
"learning_rate": 3.48627517277778e-06,
"loss": 0.3078,
"step": 90
},
{
"epoch": 0.77,
"learning_rate": 2.5798641454908945e-06,
"loss": 0.2905,
"step": 95
},
{
"epoch": 0.81,
"learning_rate": 1.7923655879272395e-06,
"loss": 0.2927,
"step": 100
},
{
"epoch": 0.85,
"learning_rate": 1.1363996731159188e-06,
"loss": 0.3442,
"step": 105
},
{
"epoch": 0.89,
"learning_rate": 6.22478678529197e-07,
"loss": 0.2968,
"step": 110
},
{
"epoch": 0.93,
"learning_rate": 2.588385200461307e-07,
"loss": 0.2628,
"step": 115
},
{
"epoch": 0.97,
"learning_rate": 5.1306766081048456e-08,
"loss": 0.2698,
"step": 120
},
{
"epoch": 1.0,
"eval_loss": 0.2914734482765198,
"eval_runtime": 11.0655,
"eval_samples_per_second": 19.882,
"eval_steps_per_second": 0.361,
"step": 124
},
{
"epoch": 1.0,
"step": 124,
"total_flos": 880074424320.0,
"train_loss": 0.4545349882495019,
"train_runtime": 740.5857,
"train_samples_per_second": 2.664,
"train_steps_per_second": 0.167
}
],
"logging_steps": 5,
"max_steps": 124,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 880074424320.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}