|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8691588785046729, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09345794392523364, |
|
"grad_norm": 66.9687943936917, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.720803737640381, |
|
"logits/rejected": -2.7183666229248047, |
|
"logps/chosen": -237.3436279296875, |
|
"logps/rejected": -190.54464721679688, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.013445606455206871, |
|
"rewards/margins": 0.008647488430142403, |
|
"rewards/rejected": 0.00479811942204833, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 48.6419981873445, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.679405689239502, |
|
"logits/rejected": -2.670754909515381, |
|
"logps/chosen": -279.81866455078125, |
|
"logps/rejected": -226.22573852539062, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.4765666127204895, |
|
"rewards/margins": 0.12125828117132187, |
|
"rewards/rejected": 0.35530832409858704, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2803738317757009, |
|
"grad_norm": 44.827387730520904, |
|
"learning_rate": 9.972240926774166e-07, |
|
"logits/chosen": -2.5193655490875244, |
|
"logits/rejected": -2.510051965713501, |
|
"logps/chosen": -236.1126708984375, |
|
"logps/rejected": -201.1164093017578, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 1.2661212682724, |
|
"rewards/margins": 0.5709505677223206, |
|
"rewards/rejected": 0.6951709985733032, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 51.33633927747967, |
|
"learning_rate": 9.889271933555212e-07, |
|
"logits/chosen": -2.4093480110168457, |
|
"logits/rejected": -2.381843090057373, |
|
"logps/chosen": -258.9214782714844, |
|
"logps/rejected": -220.68408203125, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 1.2196061611175537, |
|
"rewards/margins": 0.9229635000228882, |
|
"rewards/rejected": 0.2966426610946655, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 58.55983526769123, |
|
"learning_rate": 9.752014277286431e-07, |
|
"logits/chosen": -2.319462299346924, |
|
"logits/rejected": -2.303922653198242, |
|
"logps/chosen": -254.1189422607422, |
|
"logps/rejected": -196.4254608154297, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.0977602005004883, |
|
"rewards/margins": 0.803708553314209, |
|
"rewards/rejected": 0.2940516173839569, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 38.96884797085844, |
|
"learning_rate": 9.561992016100291e-07, |
|
"logits/chosen": -2.380964994430542, |
|
"logits/rejected": -2.357675075531006, |
|
"logps/chosen": -242.76406860351562, |
|
"logps/rejected": -221.53903198242188, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.37621065974235535, |
|
"rewards/margins": 0.7926680445671082, |
|
"rewards/rejected": -0.4164574146270752, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6542056074766355, |
|
"grad_norm": 49.029442328071866, |
|
"learning_rate": 9.321315086741915e-07, |
|
"logits/chosen": -2.3943734169006348, |
|
"logits/rejected": -2.386823892593384, |
|
"logps/chosen": -251.5531463623047, |
|
"logps/rejected": -226.2257537841797, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.37054210901260376, |
|
"rewards/margins": 0.955781102180481, |
|
"rewards/rejected": -0.5852389931678772, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 47.16621948348876, |
|
"learning_rate": 9.032655876613635e-07, |
|
"logits/chosen": -2.323800802230835, |
|
"logits/rejected": -2.296937942504883, |
|
"logps/chosen": -255.12149047851562, |
|
"logps/rejected": -216.43679809570312, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7852829694747925, |
|
"rewards/margins": 1.0229356288909912, |
|
"rewards/rejected": -0.2376527041196823, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8411214953271028, |
|
"grad_norm": 45.06757129062354, |
|
"learning_rate": 8.699219550575952e-07, |
|
"logits/chosen": -2.2773144245147705, |
|
"logits/rejected": -2.2689876556396484, |
|
"logps/chosen": -242.54403686523438, |
|
"logps/rejected": -198.2088165283203, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 1.0134313106536865, |
|
"rewards/margins": 0.8711115121841431, |
|
"rewards/rejected": 0.1423199325799942, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 37.25379822397818, |
|
"learning_rate": 8.324708461985124e-07, |
|
"logits/chosen": -2.3120808601379395, |
|
"logits/rejected": -2.325521945953369, |
|
"logps/chosen": -244.45162963867188, |
|
"logps/rejected": -233.9671173095703, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8873047828674316, |
|
"rewards/margins": 1.0542502403259277, |
|
"rewards/rejected": -0.16694557666778564, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"eval_logits/chosen": -2.3802032470703125, |
|
"eval_logits/rejected": -2.3511736392974854, |
|
"eval_logps/chosen": -257.5635070800781, |
|
"eval_logps/rejected": -242.5312957763672, |
|
"eval_loss": 0.5823682546615601, |
|
"eval_rewards/accuracies": 0.7395833134651184, |
|
"eval_rewards/chosen": 0.9205262660980225, |
|
"eval_rewards/margins": 1.2713056802749634, |
|
"eval_rewards/rejected": -0.35077938437461853, |
|
"eval_runtime": 101.9106, |
|
"eval_samples_per_second": 14.915, |
|
"eval_steps_per_second": 0.236, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.02803738317757, |
|
"grad_norm": 17.588268217174, |
|
"learning_rate": 7.913281043133977e-07, |
|
"logits/chosen": -2.377732038497925, |
|
"logits/rejected": -2.4019691944122314, |
|
"logps/chosen": -251.0818328857422, |
|
"logps/rejected": -231.4687957763672, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.248857021331787, |
|
"rewards/margins": 1.5204874277114868, |
|
"rewards/rejected": -0.2716304361820221, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 17.6740148393279, |
|
"learning_rate": 7.469505631561317e-07, |
|
"logits/chosen": -2.4365363121032715, |
|
"logits/rejected": -2.3999149799346924, |
|
"logps/chosen": -235.5603790283203, |
|
"logps/rejected": -217.8879852294922, |
|
"loss": 0.2443, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.6067253351211548, |
|
"rewards/margins": 2.39015531539917, |
|
"rewards/rejected": -0.7834302186965942, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.2149532710280373, |
|
"grad_norm": 19.25307975460043, |
|
"learning_rate": 6.998309744925411e-07, |
|
"logits/chosen": -2.459545612335205, |
|
"logits/rejected": -2.443091869354248, |
|
"logps/chosen": -234.2629852294922, |
|
"logps/rejected": -229.77243041992188, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.0133259296417236, |
|
"rewards/margins": 3.0357606410980225, |
|
"rewards/rejected": -1.0224347114562988, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 18.33160467378068, |
|
"learning_rate": 6.504925367674594e-07, |
|
"logits/chosen": -2.5056710243225098, |
|
"logits/rejected": -2.481133222579956, |
|
"logps/chosen": -239.59219360351562, |
|
"logps/rejected": -222.05429077148438, |
|
"loss": 0.2154, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.9632623195648193, |
|
"rewards/margins": 3.0751612186431885, |
|
"rewards/rejected": -1.1118988990783691, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4018691588785046, |
|
"grad_norm": 26.100443454167603, |
|
"learning_rate": 5.994830857031499e-07, |
|
"logits/chosen": -2.4787604808807373, |
|
"logits/rejected": -2.477220058441162, |
|
"logps/chosen": -242.30496215820312, |
|
"logps/rejected": -246.5374298095703, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.2068536281585693, |
|
"rewards/margins": 3.7736289501190186, |
|
"rewards/rejected": -1.5667749643325806, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 23.947057848275417, |
|
"learning_rate": 5.473690113345342e-07, |
|
"logits/chosen": -2.4580140113830566, |
|
"logits/rejected": -2.4340569972991943, |
|
"logps/chosen": -232.0405731201172, |
|
"logps/rejected": -232.50894165039062, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.6286522150039673, |
|
"rewards/margins": 3.344123363494873, |
|
"rewards/rejected": -1.7154712677001953, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.588785046728972, |
|
"grad_norm": 25.173894130852474, |
|
"learning_rate": 4.947289690242102e-07, |
|
"logits/chosen": -2.3979992866516113, |
|
"logits/rejected": -2.372950553894043, |
|
"logps/chosen": -234.0653533935547, |
|
"logps/rejected": -226.2272491455078, |
|
"loss": 0.2526, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.000070333480835, |
|
"rewards/margins": 3.399747371673584, |
|
"rewards/rejected": -1.3996769189834595, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 25.381019801326666, |
|
"learning_rate": 4.421474542878194e-07, |
|
"logits/chosen": -2.402013063430786, |
|
"logits/rejected": -2.358625888824463, |
|
"logps/chosen": -240.19235229492188, |
|
"logps/rejected": -235.06942749023438, |
|
"loss": 0.2381, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.031113624572754, |
|
"rewards/margins": 3.7032268047332764, |
|
"rewards/rejected": -1.6721128225326538, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7757009345794392, |
|
"grad_norm": 22.605364041509517, |
|
"learning_rate": 3.902083127725186e-07, |
|
"logits/chosen": -2.3787314891815186, |
|
"logits/rejected": -2.382676601409912, |
|
"logps/chosen": -231.67434692382812, |
|
"logps/rejected": -207.5229949951172, |
|
"loss": 0.2348, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.045062780380249, |
|
"rewards/margins": 3.560044765472412, |
|
"rewards/rejected": -1.5149818658828735, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 26.859387828548858, |
|
"learning_rate": 3.394882574513519e-07, |
|
"logits/chosen": -2.3952929973602295, |
|
"logits/rejected": -2.3831348419189453, |
|
"logps/chosen": -234.7982940673828, |
|
"logps/rejected": -259.3247375488281, |
|
"loss": 0.2441, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.9878575801849365, |
|
"rewards/margins": 3.512976884841919, |
|
"rewards/rejected": -1.525119423866272, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_logits/chosen": -2.3957111835479736, |
|
"eval_logits/rejected": -2.363419532775879, |
|
"eval_logps/chosen": -256.04901123046875, |
|
"eval_logps/rejected": -246.68414306640625, |
|
"eval_loss": 0.5841386318206787, |
|
"eval_rewards/accuracies": 0.7708333134651184, |
|
"eval_rewards/chosen": 1.071976661682129, |
|
"eval_rewards/margins": 1.8380416631698608, |
|
"eval_rewards/rejected": -0.7660649418830872, |
|
"eval_runtime": 100.9496, |
|
"eval_samples_per_second": 15.057, |
|
"eval_steps_per_second": 0.238, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1178822762299392.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|