|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 67, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 354.5310432434082, |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 26.582666397094727, |
|
"kl": 0.02456921637058258, |
|
"learning_rate": 2.981532510892707e-06, |
|
"loss": 0.001, |
|
"reward": 0.38080358956940474, |
|
"reward_std": 0.3429007441736758, |
|
"rewards/accuracy_reward": 0.12935268479632214, |
|
"rewards/format_reward": 0.2514509041327983, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 82.56049494743347, |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 6.207046031951904, |
|
"kl": 0.192266845703125, |
|
"learning_rate": 2.6657189421854562e-06, |
|
"loss": 0.0077, |
|
"reward": 1.1937500540167094, |
|
"reward_std": 0.2890436253976077, |
|
"rewards/accuracy_reward": 0.2324776900582947, |
|
"rewards/format_reward": 0.9612723555415869, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 163.46440453529357, |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 0.9332170486450195, |
|
"kl": 0.28681182861328125, |
|
"learning_rate": 2.03755192431795e-06, |
|
"loss": 0.0115, |
|
"reward": 1.377120592445135, |
|
"reward_std": 0.35549838868901135, |
|
"rewards/accuracy_reward": 0.4252232332248241, |
|
"rewards/format_reward": 0.9518973540514708, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 289.2572672843933, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.30436766147613525, |
|
"kl": 0.11348419189453125, |
|
"learning_rate": 1.2653483024396534e-06, |
|
"loss": 0.0045, |
|
"reward": 1.5142857864499093, |
|
"reward_std": 0.3293194776400924, |
|
"rewards/accuracy_reward": 0.5575893112458289, |
|
"rewards/format_reward": 0.9566964589059352, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 328.3682068824768, |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 0.4712439179420471, |
|
"kl": 0.0519775390625, |
|
"learning_rate": 5.560194134252441e-07, |
|
"loss": 0.0021, |
|
"reward": 1.4938616767525672, |
|
"reward_std": 0.352480823546648, |
|
"rewards/accuracy_reward": 0.5599330620840192, |
|
"rewards/format_reward": 0.9339286085218191, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 326.48863105773927, |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 1.6945326328277588, |
|
"kl": 0.05061798095703125, |
|
"learning_rate": 9.962936025419756e-08, |
|
"loss": 0.002, |
|
"reward": 1.528236673772335, |
|
"reward_std": 0.33247090512886646, |
|
"rewards/accuracy_reward": 0.574107170663774, |
|
"rewards/format_reward": 0.9541294977068902, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 317.54422964368547, |
|
"epoch": 1.0, |
|
"kl": 0.05271693638392857, |
|
"reward": 1.535608057464872, |
|
"reward_std": 0.3223346844315529, |
|
"rewards/accuracy_reward": 0.5788690721882241, |
|
"rewards/format_reward": 0.9567389748990536, |
|
"step": 67, |
|
"total_flos": 0.0, |
|
"train_loss": 0.004519763499943178, |
|
"train_runtime": 8988.1744, |
|
"train_samples_per_second": 0.834, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 67, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|