|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9158878504672896, |
|
"eval_steps": 50, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 61.21687204105591, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7639615535736084, |
|
"logits/rejected": -2.753985643386841, |
|
"logps/chosen": -265.36212158203125, |
|
"logps/rejected": -219.6597900390625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": 0.01777561381459236, |
|
"rewards/margins": 0.004832454025745392, |
|
"rewards/rejected": 0.012943158857524395, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 46.174004471862034, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.7134578227996826, |
|
"logits/rejected": -2.718207597732544, |
|
"logps/chosen": -287.67529296875, |
|
"logps/rejected": -230.2861785888672, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.6617986559867859, |
|
"rewards/margins": 0.29742884635925293, |
|
"rewards/rejected": 0.36436980962753296, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 46.436393575993726, |
|
"learning_rate": 9.867190271803463e-07, |
|
"logits/chosen": -2.5830750465393066, |
|
"logits/rejected": -2.587627410888672, |
|
"logps/chosen": -273.77752685546875, |
|
"logps/rejected": -220.65695190429688, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 1.6920897960662842, |
|
"rewards/margins": 0.8979904055595398, |
|
"rewards/rejected": 0.7940995097160339, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 43.837511462770195, |
|
"learning_rate": 9.475816456775312e-07, |
|
"logits/chosen": -2.471787929534912, |
|
"logits/rejected": -2.478492259979248, |
|
"logps/chosen": -246.120361328125, |
|
"logps/rejected": -214.670654296875, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.5776749849319458, |
|
"rewards/margins": 1.0568134784698486, |
|
"rewards/rejected": 0.5208614468574524, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 37.09162689266799, |
|
"learning_rate": 8.846669854914395e-07, |
|
"logits/chosen": -2.4390480518341064, |
|
"logits/rejected": -2.411414861679077, |
|
"logps/chosen": -269.1220397949219, |
|
"logps/rejected": -211.203369140625, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.8889753818511963, |
|
"rewards/margins": 1.2695468664169312, |
|
"rewards/rejected": 0.619428277015686, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 23.14113734554105, |
|
"learning_rate": 8.013173181896282e-07, |
|
"logits/chosen": -2.3594138622283936, |
|
"logits/rejected": -2.3916964530944824, |
|
"logps/chosen": -255.5403289794922, |
|
"logps/rejected": -206.4452667236328, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.106290340423584, |
|
"rewards/margins": 2.024784564971924, |
|
"rewards/rejected": 0.08150559663772583, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 20.944832198946614, |
|
"learning_rate": 7.019605024359474e-07, |
|
"logits/chosen": -2.3514552116394043, |
|
"logits/rejected": -2.332791805267334, |
|
"logps/chosen": -238.28518676757812, |
|
"logps/rejected": -240.40560913085938, |
|
"loss": 0.2842, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.428344249725342, |
|
"rewards/margins": 3.228165864944458, |
|
"rewards/rejected": -0.7998219728469849, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 30.791834420795908, |
|
"learning_rate": 5.918747589082852e-07, |
|
"logits/chosen": -2.33073353767395, |
|
"logits/rejected": -2.3257832527160645, |
|
"logps/chosen": -250.0792694091797, |
|
"logps/rejected": -221.3015594482422, |
|
"loss": 0.2477, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.562706232070923, |
|
"rewards/margins": 3.5494167804718018, |
|
"rewards/rejected": -0.9867107272148132, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 27.339770904226572, |
|
"learning_rate": 4.769082706771303e-07, |
|
"logits/chosen": -2.354419708251953, |
|
"logits/rejected": -2.345761775970459, |
|
"logps/chosen": -251.4537811279297, |
|
"logps/rejected": -224.0857391357422, |
|
"loss": 0.282, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.958096742630005, |
|
"rewards/margins": 3.320951461791992, |
|
"rewards/rejected": -0.3628546893596649, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 26.19581986930712, |
|
"learning_rate": 3.6316850496395855e-07, |
|
"logits/chosen": -2.395775318145752, |
|
"logits/rejected": -2.3773467540740967, |
|
"logps/chosen": -227.96786499023438, |
|
"logps/rejected": -228.518310546875, |
|
"loss": 0.3348, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.7191131114959717, |
|
"rewards/margins": 2.9623184204101562, |
|
"rewards/rejected": -0.24320510029792786, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_logits/chosen": -2.406548023223877, |
|
"eval_logits/rejected": -2.4020678997039795, |
|
"eval_logps/chosen": -249.0997314453125, |
|
"eval_logps/rejected": -190.4173583984375, |
|
"eval_loss": 0.5792086124420166, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": 2.028245210647583, |
|
"eval_rewards/margins": 1.727249026298523, |
|
"eval_rewards/rejected": 0.300996333360672, |
|
"eval_runtime": 50.8966, |
|
"eval_samples_per_second": 14.932, |
|
"eval_steps_per_second": 0.236, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.05607476635514, |
|
"grad_norm": 19.061520787069963, |
|
"learning_rate": 2.566977607165719e-07, |
|
"logits/chosen": -2.425044298171997, |
|
"logits/rejected": -2.430022716522217, |
|
"logps/chosen": -258.14617919921875, |
|
"logps/rejected": -232.40603637695312, |
|
"loss": 0.2745, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.87212872505188, |
|
"rewards/margins": 3.6639626026153564, |
|
"rewards/rejected": -0.7918335795402527, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.2429906542056073, |
|
"grad_norm": 15.460701798223598, |
|
"learning_rate": 1.631521781767214e-07, |
|
"logits/chosen": -2.4355878829956055, |
|
"logits/rejected": -2.4206368923187256, |
|
"logps/chosen": -223.0980987548828, |
|
"logps/rejected": -222.94241333007812, |
|
"loss": 0.1636, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.6068944931030273, |
|
"rewards/margins": 3.432859420776367, |
|
"rewards/rejected": -0.8259647488594055, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.4299065420560746, |
|
"grad_norm": 16.614589190551243, |
|
"learning_rate": 8.75012627008489e-08, |
|
"logits/chosen": -2.4444804191589355, |
|
"logits/rejected": -2.4162566661834717, |
|
"logps/chosen": -244.12841796875, |
|
"logps/rejected": -229.2833251953125, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.8097472190856934, |
|
"rewards/margins": 3.784661054611206, |
|
"rewards/rejected": -0.9749139547348022, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.616822429906542, |
|
"grad_norm": 19.056173520446013, |
|
"learning_rate": 3.376388529782215e-08, |
|
"logits/chosen": -2.4610393047332764, |
|
"logits/rejected": -2.440760612487793, |
|
"logps/chosen": -251.9339599609375, |
|
"logps/rejected": -247.04275512695312, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.2026991844177246, |
|
"rewards/margins": 4.342484474182129, |
|
"rewards/rejected": -1.1397849321365356, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.803738317757009, |
|
"grad_norm": 22.14435014976595, |
|
"learning_rate": 4.794784562397458e-09, |
|
"logits/chosen": -2.439197063446045, |
|
"logits/rejected": -2.4480080604553223, |
|
"logps/chosen": -245.83792114257812, |
|
"logps/rejected": -227.4940185546875, |
|
"loss": 0.1707, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 3.1310062408447266, |
|
"rewards/margins": 3.876368761062622, |
|
"rewards/rejected": -0.7453619241714478, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.9158878504672896, |
|
"step": 78, |
|
"total_flos": 919378820333568.0, |
|
"train_loss": 0.36702350164071107, |
|
"train_runtime": 2733.1708, |
|
"train_samples_per_second": 7.506, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 919378820333568.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|