{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.981366459627329, "eval_steps": 50, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12422360248447205, "grad_norm": 65.02782550737439, "learning_rate": 5e-07, "logits/chosen": -2.7251429557800293, "logits/rejected": -2.70817494392395, "logps/chosen": -262.96563720703125, "logps/rejected": -182.58338928222656, "loss": 0.6897, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.01599644497036934, "rewards/margins": 0.006512208841741085, "rewards/rejected": 0.009484234265983105, "step": 5 }, { "epoch": 0.2484472049689441, "grad_norm": 48.3464252705335, "learning_rate": 1e-06, "logits/chosen": -2.6999757289886475, "logits/rejected": -2.6889986991882324, "logps/chosen": -268.0428771972656, "logps/rejected": -197.49484252929688, "loss": 0.6238, "rewards/accuracies": 0.71875, "rewards/chosen": 0.6579615473747253, "rewards/margins": 0.32934561371803284, "rewards/rejected": 0.3286159038543701, "step": 10 }, { "epoch": 0.37267080745341613, "grad_norm": 53.44161973291976, "learning_rate": 9.949107209404663e-07, "logits/chosen": -2.5064568519592285, "logits/rejected": -2.495807647705078, "logps/chosen": -242.85745239257812, "logps/rejected": -196.7808074951172, "loss": 0.616, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.777621865272522, "rewards/margins": 0.5862057209014893, "rewards/rejected": 1.1914160251617432, "step": 15 }, { "epoch": 0.4968944099378882, "grad_norm": 47.682650591660796, "learning_rate": 9.797464868072486e-07, "logits/chosen": -2.3988099098205566, "logits/rejected": -2.3708558082580566, "logps/chosen": -249.437255859375, "logps/rejected": -210.55868530273438, "loss": 0.5906, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 1.9457979202270508, "rewards/margins": 1.0106760263442993, "rewards/rejected": 0.9351221323013306, "step": 20 }, { "epoch": 0.6211180124223602, "grad_norm": 39.33765044259658, "learning_rate": 9.548159976772592e-07, "logits/chosen": -2.241548538208008, "logits/rejected": -2.1949617862701416, "logps/chosen": -244.6901397705078, "logps/rejected": -199.38278198242188, "loss": 0.594, "rewards/accuracies": 0.71875, "rewards/chosen": 1.7841804027557373, "rewards/margins": 1.113467812538147, "rewards/rejected": 0.6707127094268799, "step": 25 }, { "epoch": 0.7453416149068323, "grad_norm": 33.38735708969768, "learning_rate": 9.206267664155906e-07, "logits/chosen": -2.226879119873047, "logits/rejected": -2.1980159282684326, "logps/chosen": -261.0025329589844, "logps/rejected": -201.90567016601562, "loss": 0.5731, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": 1.9629528522491455, "rewards/margins": 1.5217477083206177, "rewards/rejected": 0.44120508432388306, "step": 30 }, { "epoch": 0.8695652173913043, "grad_norm": 40.7877020563695, "learning_rate": 8.778747871771291e-07, "logits/chosen": -2.303542375564575, "logits/rejected": -2.286126136779785, "logps/chosen": -282.151611328125, "logps/rejected": -185.19766235351562, "loss": 0.5745, "rewards/accuracies": 0.793749988079071, "rewards/chosen": 2.344021797180176, "rewards/margins": 2.0376055240631104, "rewards/rejected": 0.30641618371009827, "step": 35 }, { "epoch": 0.9937888198757764, "grad_norm": 44.86549702944521, "learning_rate": 8.274303669726426e-07, "logits/chosen": -2.4969606399536133, "logits/rejected": -2.464599847793579, "logps/chosen": -244.0958709716797, "logps/rejected": -183.03262329101562, "loss": 0.5178, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 1.7189185619354248, "rewards/margins": 1.3635226488113403, "rewards/rejected": 0.3553960919380188, "step": 40 }, { "epoch": 1.1180124223602483, "grad_norm": 19.636732005851645, "learning_rate": 7.703204087277988e-07, "logits/chosen": -2.5191662311553955, "logits/rejected": -2.48410701751709, "logps/chosen": -228.06103515625, "logps/rejected": -196.45220947265625, "loss": 0.232, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.1026101112365723, "rewards/margins": 2.677381992340088, "rewards/rejected": -0.5747714042663574, "step": 45 }, { "epoch": 1.2422360248447206, "grad_norm": 17.158865977777797, "learning_rate": 7.077075065009433e-07, "logits/chosen": -2.4344613552093506, "logits/rejected": -2.4072365760803223, "logps/chosen": -227.47802734375, "logps/rejected": -203.6940155029297, "loss": 0.2648, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.5955851078033447, "rewards/margins": 3.1633975505828857, "rewards/rejected": -0.567812442779541, "step": 50 }, { "epoch": 1.2422360248447206, "eval_logits/chosen": -2.338369846343994, "eval_logits/rejected": -2.3208906650543213, "eval_logps/chosen": -237.45626831054688, "eval_logps/rejected": -245.1334686279297, "eval_loss": 0.5693262219429016, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": 2.0296123027801514, "eval_rewards/margins": 2.5231986045837402, "eval_rewards/rejected": -0.4935866892337799, "eval_runtime": 77.4881, "eval_samples_per_second": 14.712, "eval_steps_per_second": 0.232, "step": 50 }, { "epoch": 1.3664596273291925, "grad_norm": 24.70055582817972, "learning_rate": 6.408662784207149e-07, "logits/chosen": -2.281872272491455, "logits/rejected": -2.279301881790161, "logps/chosen": -233.04708862304688, "logps/rejected": -215.3188934326172, "loss": 0.2485, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 2.539990186691284, "rewards/margins": 3.1891541481018066, "rewards/rejected": -0.6491641402244568, "step": 55 }, { "epoch": 1.4906832298136645, "grad_norm": 26.10474371944714, "learning_rate": 5.711574191366427e-07, "logits/chosen": -2.2054412364959717, "logits/rejected": -2.202352523803711, "logps/chosen": -234.28701782226562, "logps/rejected": -213.38143920898438, "loss": 0.2623, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 2.3911938667297363, "rewards/margins": 3.317472457885742, "rewards/rejected": -0.9262781143188477, "step": 60 }, { "epoch": 1.6149068322981366, "grad_norm": 22.51894757334605, "learning_rate": 5e-07, "logits/chosen": -2.232825756072998, "logits/rejected": -2.171570301055908, "logps/chosen": -241.08193969726562, "logps/rejected": -209.3240966796875, "loss": 0.2633, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 2.3284173011779785, "rewards/margins": 3.415198564529419, "rewards/rejected": -1.08678138256073, "step": 65 }, { "epoch": 1.7391304347826086, "grad_norm": 22.492973391255987, "learning_rate": 4.2884258086335745e-07, "logits/chosen": -2.169712543487549, "logits/rejected": -2.1445705890655518, "logps/chosen": -232.45443725585938, "logps/rejected": -222.10830688476562, "loss": 0.2837, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.3141350746154785, "rewards/margins": 3.6101813316345215, "rewards/rejected": -1.2960463762283325, "step": 70 }, { "epoch": 1.8633540372670807, "grad_norm": 21.249146991119204, "learning_rate": 3.591337215792851e-07, "logits/chosen": -2.073169469833374, "logits/rejected": -2.0878424644470215, "logps/chosen": -241.75015258789062, "logps/rejected": -206.7108154296875, "loss": 0.2855, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 2.4120543003082275, "rewards/margins": 3.6116981506347656, "rewards/rejected": -1.1996442079544067, "step": 75 }, { "epoch": 1.9875776397515528, "grad_norm": 23.84694325653629, "learning_rate": 2.922924934990568e-07, "logits/chosen": -2.0345282554626465, "logits/rejected": -2.0104496479034424, "logps/chosen": -230.2379913330078, "logps/rejected": -220.3455810546875, "loss": 0.2806, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.359086036682129, "rewards/margins": 3.5737903118133545, "rewards/rejected": -1.214704155921936, "step": 80 }, { "epoch": 2.111801242236025, "grad_norm": 17.085591816665293, "learning_rate": 2.2967959127220137e-07, "logits/chosen": -2.0804636478424072, "logits/rejected": -2.0491340160369873, "logps/chosen": -236.104736328125, "logps/rejected": -225.11471557617188, "loss": 0.1502, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 2.9891457557678223, "rewards/margins": 4.1706223487854, "rewards/rejected": -1.1814768314361572, "step": 85 }, { "epoch": 2.2360248447204967, "grad_norm": 18.158251153184455, "learning_rate": 1.725696330273575e-07, "logits/chosen": -2.092092514038086, "logits/rejected": -2.0654916763305664, "logps/chosen": -235.83578491210938, "logps/rejected": -234.26809692382812, "loss": 0.1426, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 2.776648759841919, "rewards/margins": 4.275698661804199, "rewards/rejected": -1.499050259590149, "step": 90 }, { "epoch": 2.360248447204969, "grad_norm": 15.808936697383274, "learning_rate": 1.2212521282287093e-07, "logits/chosen": -2.0797340869903564, "logits/rejected": -2.0396041870117188, "logps/chosen": -242.691162109375, "logps/rejected": -201.09994506835938, "loss": 0.1446, "rewards/accuracies": 0.918749988079071, "rewards/chosen": 2.843183994293213, "rewards/margins": 3.8034491539001465, "rewards/rejected": -0.9602655172348022, "step": 95 }, { "epoch": 2.4844720496894412, "grad_norm": 18.76592353835065, "learning_rate": 7.937323358440934e-08, "logits/chosen": -2.139120578765869, "logits/rejected": -2.091543674468994, "logps/chosen": -231.04006958007812, "logps/rejected": -220.3035888671875, "loss": 0.1487, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.8384525775909424, "rewards/margins": 4.084644317626953, "rewards/rejected": -1.246191382408142, "step": 100 }, { "epoch": 2.4844720496894412, "eval_logits/chosen": -2.1241261959075928, "eval_logits/rejected": -2.093987226486206, "eval_logps/chosen": -241.23301696777344, "eval_logps/rejected": -252.7169647216797, "eval_loss": 0.5524086356163025, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": 1.651939868927002, "eval_rewards/margins": 2.903874397277832, "eval_rewards/rejected": -1.2519348859786987, "eval_runtime": 76.7688, "eval_samples_per_second": 14.85, "eval_steps_per_second": 0.234, "step": 100 }, { "epoch": 2.608695652173913, "grad_norm": 20.331751556664074, "learning_rate": 4.518400232274078e-08, "logits/chosen": -2.1376147270202637, "logits/rejected": -2.092000961303711, "logps/chosen": -239.6346435546875, "logps/rejected": -220.95663452148438, "loss": 0.1515, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.8724117279052734, "rewards/margins": 4.305968761444092, "rewards/rejected": -1.4335569143295288, "step": 105 }, { "epoch": 2.732919254658385, "grad_norm": 16.10120397999774, "learning_rate": 2.025351319275137e-08, "logits/chosen": -2.1425278186798096, "logits/rejected": -2.1018919944763184, "logps/chosen": -245.0784149169922, "logps/rejected": -212.120361328125, "loss": 0.1297, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7527151107788086, "rewards/margins": 4.1609883308410645, "rewards/rejected": -1.408272624015808, "step": 110 }, { "epoch": 2.857142857142857, "grad_norm": 15.75267796946239, "learning_rate": 5.0892790595336575e-09, "logits/chosen": -2.1198809146881104, "logits/rejected": -2.1020970344543457, "logps/chosen": -221.77182006835938, "logps/rejected": -220.3450927734375, "loss": 0.1345, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 2.698946952819824, "rewards/margins": 4.416926383972168, "rewards/rejected": -1.7179794311523438, "step": 115 }, { "epoch": 2.981366459627329, "grad_norm": 17.27205952924189, "learning_rate": 0.0, "logits/chosen": -2.1219396591186523, "logits/rejected": -2.1288697719573975, "logps/chosen": -226.1340789794922, "logps/rejected": -211.1949462890625, "loss": 0.1647, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3404417037963867, "rewards/margins": 3.8494620323181152, "rewards/rejected": -1.5090203285217285, "step": 120 }, { "epoch": 2.981366459627329, "step": 120, "total_flos": 1414680891359232.0, "train_loss": 0.3361198857426643, "train_runtime": 4442.0256, "train_samples_per_second": 6.927, "train_steps_per_second": 0.027 } ], "logging_steps": 5, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1414680891359232.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }