|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.7777777777777777, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06944444444444445, |
|
"grad_norm": 36.21096742368932, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.735914945602417, |
|
"logits/rejected": -2.7412195205688477, |
|
"logps/chosen": -166.00094604492188, |
|
"logps/rejected": -162.81643676757812, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.00429560337215662, |
|
"rewards/margins": 0.0009204222005791962, |
|
"rewards/rejected": 0.0033751812297850847, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 35.97443184449595, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.742196798324585, |
|
"logits/rejected": -2.7352712154388428, |
|
"logps/chosen": -163.42056274414062, |
|
"logps/rejected": -168.62094116210938, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.10343559086322784, |
|
"rewards/margins": 0.005917676724493504, |
|
"rewards/rejected": 0.09751791507005692, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 35.479696664348296, |
|
"learning_rate": 9.985471028179154e-07, |
|
"logits/chosen": -2.715827465057373, |
|
"logits/rejected": -2.7099735736846924, |
|
"logps/chosen": -164.28744506835938, |
|
"logps/rejected": -166.86209106445312, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.31278976798057556, |
|
"rewards/margins": 0.10602164268493652, |
|
"rewards/rejected": 0.20676811039447784, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 34.26437345645622, |
|
"learning_rate": 9.94196854912548e-07, |
|
"logits/chosen": -2.6752734184265137, |
|
"logits/rejected": -2.670536518096924, |
|
"logps/chosen": -162.92506408691406, |
|
"logps/rejected": -162.58132934570312, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.23271910846233368, |
|
"rewards/margins": 0.16488614678382874, |
|
"rewards/rejected": 0.06783294677734375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3472222222222222, |
|
"grad_norm": 38.12636989971247, |
|
"learning_rate": 9.869745381355905e-07, |
|
"logits/chosen": -2.612743854522705, |
|
"logits/rejected": -2.601036310195923, |
|
"logps/chosen": -169.65054321289062, |
|
"logps/rejected": -170.94589233398438, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.23453514277935028, |
|
"rewards/margins": 0.1853707879781723, |
|
"rewards/rejected": 0.04916436970233917, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 37.15638790112506, |
|
"learning_rate": 9.769221256218162e-07, |
|
"logits/chosen": -2.6376729011535645, |
|
"logits/rejected": -2.6211869716644287, |
|
"logps/chosen": -169.70230102539062, |
|
"logps/rejected": -169.1073760986328, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.197641983628273, |
|
"rewards/margins": 0.23505587875843048, |
|
"rewards/rejected": -0.03741389513015747, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4861111111111111, |
|
"grad_norm": 31.96578650923538, |
|
"learning_rate": 9.64098037858483e-07, |
|
"logits/chosen": -2.6476080417633057, |
|
"logits/rejected": -2.638826847076416, |
|
"logps/chosen": -164.2353515625, |
|
"logps/rejected": -171.78424072265625, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.011483956128358841, |
|
"rewards/margins": 0.3633750379085541, |
|
"rewards/rejected": -0.35189107060432434, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 35.60629409012632, |
|
"learning_rate": 9.485768031694871e-07, |
|
"logits/chosen": -2.6523194313049316, |
|
"logits/rejected": -2.621492385864258, |
|
"logps/chosen": -168.99270629882812, |
|
"logps/rejected": -177.50718688964844, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2566075921058655, |
|
"rewards/margins": 0.5398613214492798, |
|
"rewards/rejected": -0.79646897315979, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 32.80701192573668, |
|
"learning_rate": 9.304486245873971e-07, |
|
"logits/chosen": -2.657984495162964, |
|
"logits/rejected": -2.6483747959136963, |
|
"logps/chosen": -163.6527557373047, |
|
"logps/rejected": -167.71705627441406, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.24404068291187286, |
|
"rewards/margins": 0.5225220918655396, |
|
"rewards/rejected": -0.766562819480896, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 32.94692651420362, |
|
"learning_rate": 9.098188556305262e-07, |
|
"logits/chosen": -2.732595682144165, |
|
"logits/rejected": -2.7179951667785645, |
|
"logps/chosen": -159.82009887695312, |
|
"logps/rejected": -163.01516723632812, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.31872302293777466, |
|
"rewards/margins": 0.4740700125694275, |
|
"rewards/rejected": -0.7927930951118469, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"eval_logits/chosen": -2.7981717586517334, |
|
"eval_logits/rejected": -2.7966415882110596, |
|
"eval_logps/chosen": -171.31138610839844, |
|
"eval_logps/rejected": -180.07443237304688, |
|
"eval_loss": 0.5679606199264526, |
|
"eval_rewards/accuracies": 0.69140625, |
|
"eval_rewards/chosen": -0.5232083201408386, |
|
"eval_rewards/margins": 0.6266617178916931, |
|
"eval_rewards/rejected": -1.1498699188232422, |
|
"eval_runtime": 127.2891, |
|
"eval_samples_per_second": 16.05, |
|
"eval_steps_per_second": 0.251, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7638888888888888, |
|
"grad_norm": 34.97882221943595, |
|
"learning_rate": 8.868073880316123e-07, |
|
"logits/chosen": -2.835651397705078, |
|
"logits/rejected": -2.836982250213623, |
|
"logps/chosen": -173.93702697753906, |
|
"logps/rejected": -180.1125030517578, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5288220643997192, |
|
"rewards/margins": 0.7004331350326538, |
|
"rewards/rejected": -1.229255199432373, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 31.166211388759624, |
|
"learning_rate": 8.615479549763755e-07, |
|
"logits/chosen": -2.8652548789978027, |
|
"logits/rejected": -2.8450732231140137, |
|
"logps/chosen": -169.28530883789062, |
|
"logps/rejected": -171.57772827148438, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.237405925989151, |
|
"rewards/margins": 0.7358155846595764, |
|
"rewards/rejected": -0.973221480846405, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9027777777777778, |
|
"grad_norm": 31.846767846888632, |
|
"learning_rate": 8.341873539012443e-07, |
|
"logits/chosen": -2.8254175186157227, |
|
"logits/rejected": -2.8189828395843506, |
|
"logps/chosen": -178.87318420410156, |
|
"logps/rejected": -185.26193237304688, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3370054364204407, |
|
"rewards/margins": 0.6962798237800598, |
|
"rewards/rejected": -1.0332852602005005, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 30.952350544641195, |
|
"learning_rate": 8.048845933670271e-07, |
|
"logits/chosen": -2.7731075286865234, |
|
"logits/rejected": -2.7470154762268066, |
|
"logps/chosen": -181.693359375, |
|
"logps/rejected": -192.0342254638672, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.617510974407196, |
|
"rewards/margins": 0.8088364601135254, |
|
"rewards/rejected": -1.4263474941253662, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 20.244570418413698, |
|
"learning_rate": 7.738099689665539e-07, |
|
"logits/chosen": -2.679137706756592, |
|
"logits/rejected": -2.676011800765991, |
|
"logps/chosen": -172.0131072998047, |
|
"logps/rejected": -184.72222900390625, |
|
"loss": 0.3552, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.7075360417366028, |
|
"rewards/margins": 1.6730060577392578, |
|
"rewards/rejected": -2.380542278289795, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 18.51901755323729, |
|
"learning_rate": 7.41144073636728e-07, |
|
"logits/chosen": -2.663628339767456, |
|
"logits/rejected": -2.6579511165618896, |
|
"logps/chosen": -181.416748046875, |
|
"logps/rejected": -201.2371063232422, |
|
"loss": 0.2457, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.018878469243645668, |
|
"rewards/margins": 2.686278820037842, |
|
"rewards/rejected": -2.667400360107422, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1805555555555556, |
|
"grad_norm": 20.24347077505837, |
|
"learning_rate": 7.070767481266492e-07, |
|
"logits/chosen": -2.68660569190979, |
|
"logits/rejected": -2.6812427043914795, |
|
"logps/chosen": -160.11874389648438, |
|
"logps/rejected": -179.59771728515625, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.016312014311552048, |
|
"rewards/margins": 2.060859203338623, |
|
"rewards/rejected": -2.0771713256835938, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 20.15132514330672, |
|
"learning_rate": 6.718059777212565e-07, |
|
"logits/chosen": -2.69787859916687, |
|
"logits/rejected": -2.7063913345336914, |
|
"logps/chosen": -165.69448852539062, |
|
"logps/rejected": -190.65296936035156, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18171457946300507, |
|
"rewards/margins": 2.763362407684326, |
|
"rewards/rejected": -2.9450771808624268, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3194444444444444, |
|
"grad_norm": 21.629755831470078, |
|
"learning_rate": 6.355367416322778e-07, |
|
"logits/chosen": -2.7282795906066895, |
|
"logits/rejected": -2.7191052436828613, |
|
"logps/chosen": -176.59262084960938, |
|
"logps/rejected": -204.98123168945312, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1713067591190338, |
|
"rewards/margins": 3.3725147247314453, |
|
"rewards/rejected": -3.543820858001709, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 26.9593328849758, |
|
"learning_rate": 5.984798217433531e-07, |
|
"logits/chosen": -2.690068006515503, |
|
"logits/rejected": -2.69694185256958, |
|
"logps/chosen": -170.9009246826172, |
|
"logps/rejected": -199.11679077148438, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.24792905151844025, |
|
"rewards/margins": 3.1425349712371826, |
|
"rewards/rejected": -3.3904640674591064, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"eval_logits/chosen": -2.6713719367980957, |
|
"eval_logits/rejected": -2.6708080768585205, |
|
"eval_logps/chosen": -177.34860229492188, |
|
"eval_logps/rejected": -191.46810913085938, |
|
"eval_loss": 0.541614830493927, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -1.1269280910491943, |
|
"eval_rewards/margins": 1.162311315536499, |
|
"eval_rewards/rejected": -2.2892394065856934, |
|
"eval_runtime": 126.9171, |
|
"eval_samples_per_second": 16.097, |
|
"eval_steps_per_second": 0.252, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 20.618955156266612, |
|
"learning_rate": 5.608505776324157e-07, |
|
"logits/chosen": -2.6715927124023438, |
|
"logits/rejected": -2.678304433822632, |
|
"logps/chosen": -160.5848388671875, |
|
"logps/rejected": -191.11619567871094, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.33186909556388855, |
|
"rewards/margins": 2.948606491088867, |
|
"rewards/rejected": -3.280475616455078, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5277777777777777, |
|
"grad_norm": 27.47403889294586, |
|
"learning_rate": 5.228676949903973e-07, |
|
"logits/chosen": -2.677685022354126, |
|
"logits/rejected": -2.6649279594421387, |
|
"logps/chosen": -167.76614379882812, |
|
"logps/rejected": -201.45071411132812, |
|
"loss": 0.2051, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7044020295143127, |
|
"rewards/margins": 3.09299373626709, |
|
"rewards/rejected": -3.7973952293395996, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5972222222222223, |
|
"grad_norm": 25.78263768190195, |
|
"learning_rate": 4.847519147099294e-07, |
|
"logits/chosen": -2.673830986022949, |
|
"logits/rejected": -2.667365789413452, |
|
"logps/chosen": -165.1271209716797, |
|
"logps/rejected": -192.34664916992188, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.7367149591445923, |
|
"rewards/margins": 2.9380996227264404, |
|
"rewards/rejected": -3.674814224243164, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 20.347389718573403, |
|
"learning_rate": 4.46724750030062e-07, |
|
"logits/chosen": -2.6994948387145996, |
|
"logits/rejected": -2.657327175140381, |
|
"logps/chosen": -167.97816467285156, |
|
"logps/rejected": -196.3680877685547, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.4436143934726715, |
|
"rewards/margins": 3.0548558235168457, |
|
"rewards/rejected": -3.4984703063964844, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7361111111111112, |
|
"grad_norm": 23.654938219435277, |
|
"learning_rate": 4.0900719919241935e-07, |
|
"logits/chosen": -2.6885862350463867, |
|
"logits/rejected": -2.6681549549102783, |
|
"logps/chosen": -174.38291931152344, |
|
"logps/rejected": -210.3325653076172, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.4140690863132477, |
|
"rewards/margins": 3.468705654144287, |
|
"rewards/rejected": -3.8827743530273438, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.8055555555555556, |
|
"grad_norm": 24.500980504365117, |
|
"learning_rate": 3.7181846109031e-07, |
|
"logits/chosen": -2.6993517875671387, |
|
"logits/rejected": -2.6847097873687744, |
|
"logps/chosen": -162.849609375, |
|
"logps/rejected": -192.41830444335938, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.6508005857467651, |
|
"rewards/margins": 3.144455671310425, |
|
"rewards/rejected": -3.7952563762664795, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 26.706394086119467, |
|
"learning_rate": 3.353746613749093e-07, |
|
"logits/chosen": -2.6868338584899902, |
|
"logits/rejected": -2.6891016960144043, |
|
"logps/chosen": -171.8800506591797, |
|
"logps/rejected": -201.1620330810547, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5446206331253052, |
|
"rewards/margins": 3.2972817420959473, |
|
"rewards/rejected": -3.841902256011963, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 21.039975863861176, |
|
"learning_rate": 2.9988759642186093e-07, |
|
"logits/chosen": -2.677610397338867, |
|
"logits/rejected": -2.6499438285827637, |
|
"logps/chosen": -178.9335174560547, |
|
"logps/rejected": -216.8883056640625, |
|
"loss": 0.1809, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4429135322570801, |
|
"rewards/margins": 3.727440595626831, |
|
"rewards/rejected": -4.17035436630249, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.013888888888889, |
|
"grad_norm": 15.18765450974654, |
|
"learning_rate": 2.655635024578483e-07, |
|
"logits/chosen": -2.678591012954712, |
|
"logits/rejected": -2.6765263080596924, |
|
"logps/chosen": -175.04360961914062, |
|
"logps/rejected": -206.4744415283203, |
|
"loss": 0.1621, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7178301215171814, |
|
"rewards/margins": 3.4394123554229736, |
|
"rewards/rejected": -4.1572418212890625, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 11.856035922703338, |
|
"learning_rate": 2.3260185700046292e-07, |
|
"logits/chosen": -2.6947238445281982, |
|
"logits/rejected": -2.6655373573303223, |
|
"logps/chosen": -173.5478057861328, |
|
"logps/rejected": -222.5522918701172, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.3790014088153839, |
|
"rewards/margins": 4.613499164581299, |
|
"rewards/rejected": -4.9925007820129395, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"eval_logits/chosen": -2.6673920154571533, |
|
"eval_logits/rejected": -2.6701459884643555, |
|
"eval_logps/chosen": -187.42156982421875, |
|
"eval_logps/rejected": -204.27391052246094, |
|
"eval_loss": 0.5558860898017883, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -2.1342270374298096, |
|
"eval_rewards/margins": 1.435591697692871, |
|
"eval_rewards/rejected": -3.5698184967041016, |
|
"eval_runtime": 126.9326, |
|
"eval_samples_per_second": 16.095, |
|
"eval_steps_per_second": 0.252, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.1527777777777777, |
|
"grad_norm": 19.648989383169503, |
|
"learning_rate": 2.0119421957691218e-07, |
|
"logits/chosen": -2.655550956726074, |
|
"logits/rejected": -2.667914390563965, |
|
"logps/chosen": -181.70692443847656, |
|
"logps/rejected": -226.82400512695312, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7839339375495911, |
|
"rewards/margins": 4.58644962310791, |
|
"rewards/rejected": -5.370383262634277, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 15.624018161097496, |
|
"learning_rate": 1.7152311845883094e-07, |
|
"logits/chosen": -2.6638529300689697, |
|
"logits/rejected": -2.619544267654419, |
|
"logps/chosen": -173.3532257080078, |
|
"logps/rejected": -220.2028350830078, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1546170711517334, |
|
"rewards/margins": 4.321033000946045, |
|
"rewards/rejected": -5.475650310516357, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.2916666666666665, |
|
"grad_norm": 17.11020248803683, |
|
"learning_rate": 1.4376098988303404e-07, |
|
"logits/chosen": -2.63069224357605, |
|
"logits/rejected": -2.6121063232421875, |
|
"logps/chosen": -173.0404510498047, |
|
"logps/rejected": -211.6892852783203, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.21498703956604, |
|
"rewards/margins": 4.448957443237305, |
|
"rewards/rejected": -5.663943767547607, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.361111111111111, |
|
"grad_norm": 17.112572527447032, |
|
"learning_rate": 1.1806917592302761e-07, |
|
"logits/chosen": -2.6293816566467285, |
|
"logits/rejected": -2.597991943359375, |
|
"logps/chosen": -176.73304748535156, |
|
"logps/rejected": -224.79714965820312, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8656209707260132, |
|
"rewards/margins": 5.170679092407227, |
|
"rewards/rejected": -6.036300182342529, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.4305555555555554, |
|
"grad_norm": 16.22760001506656, |
|
"learning_rate": 9.459698683523204e-08, |
|
"logits/chosen": -2.6276373863220215, |
|
"logits/rejected": -2.627812147140503, |
|
"logps/chosen": -182.4290008544922, |
|
"logps/rejected": -230.18063354492188, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.0544099807739258, |
|
"rewards/margins": 5.050833702087402, |
|
"rewards/rejected": -6.1052446365356445, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 18.33379110443006, |
|
"learning_rate": 7.348083332917926e-08, |
|
"logits/chosen": -2.625272035598755, |
|
"logits/rejected": -2.597888946533203, |
|
"logps/chosen": -174.5447235107422, |
|
"logps/rejected": -223.33822631835938, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2181237936019897, |
|
"rewards/margins": 4.818295478820801, |
|
"rewards/rejected": -6.0364203453063965, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.5694444444444446, |
|
"grad_norm": 19.469578336412205, |
|
"learning_rate": 5.484343380457124e-08, |
|
"logits/chosen": -2.604079008102417, |
|
"logits/rejected": -2.6138792037963867, |
|
"logps/chosen": -168.84449768066406, |
|
"logps/rejected": -218.6962432861328, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.0644819736480713, |
|
"rewards/margins": 4.62592077255249, |
|
"rewards/rejected": -5.690402030944824, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.638888888888889, |
|
"grad_norm": 21.805307710932198, |
|
"learning_rate": 3.879310116241041e-08, |
|
"logits/chosen": -2.6105732917785645, |
|
"logits/rejected": -2.596642017364502, |
|
"logps/chosen": -174.59005737304688, |
|
"logps/rejected": -219.40878295898438, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.2059953212738037, |
|
"rewards/margins": 4.756261348724365, |
|
"rewards/rejected": -5.962257385253906, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 18.608644466504245, |
|
"learning_rate": 2.5423113334966218e-08, |
|
"logits/chosen": -2.6064913272857666, |
|
"logits/rejected": -2.616105794906616, |
|
"logps/chosen": -185.19998168945312, |
|
"logps/rejected": -225.87014770507812, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8651708364486694, |
|
"rewards/margins": 4.7674455642700195, |
|
"rewards/rejected": -5.6326165199279785, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 17.74237497662627, |
|
"learning_rate": 1.4811171192794624e-08, |
|
"logits/chosen": -2.6110963821411133, |
|
"logits/rejected": -2.6080284118652344, |
|
"logps/chosen": -171.61239624023438, |
|
"logps/rejected": -220.25830078125, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1769065856933594, |
|
"rewards/margins": 4.8281941413879395, |
|
"rewards/rejected": -6.005099773406982, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_logits/chosen": -2.60565185546875, |
|
"eval_logits/rejected": -2.607104539871216, |
|
"eval_logps/chosen": -193.79588317871094, |
|
"eval_logps/rejected": -210.87051391601562, |
|
"eval_loss": 0.590232253074646, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -2.7716591358184814, |
|
"eval_rewards/margins": 1.457817554473877, |
|
"eval_rewards/rejected": -4.2294769287109375, |
|
"eval_runtime": 127.0051, |
|
"eval_samples_per_second": 16.086, |
|
"eval_steps_per_second": 0.252, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 216, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2358113407598592.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|