{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 50, "global_step": 216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06944444444444445, "grad_norm": 36.21096742368932, "learning_rate": 5e-07, "logits/chosen": -2.735914945602417, "logits/rejected": -2.7412195205688477, "logps/chosen": -166.00094604492188, "logps/rejected": -162.81643676757812, "loss": 0.6928, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": 0.00429560337215662, "rewards/margins": 0.0009204222005791962, "rewards/rejected": 0.0033751812297850847, "step": 5 }, { "epoch": 0.1388888888888889, "grad_norm": 35.97443184449595, "learning_rate": 1e-06, "logits/chosen": -2.742196798324585, "logits/rejected": -2.7352712154388428, "logps/chosen": -163.42056274414062, "logps/rejected": -168.62094116210938, "loss": 0.6867, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.10343559086322784, "rewards/margins": 0.005917676724493504, "rewards/rejected": 0.09751791507005692, "step": 10 }, { "epoch": 0.20833333333333334, "grad_norm": 35.479696664348296, "learning_rate": 9.985471028179154e-07, "logits/chosen": -2.715827465057373, "logits/rejected": -2.7099735736846924, "logps/chosen": -164.28744506835938, "logps/rejected": -166.86209106445312, "loss": 0.6686, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.31278976798057556, "rewards/margins": 0.10602164268493652, "rewards/rejected": 0.20676811039447784, "step": 15 }, { "epoch": 0.2777777777777778, "grad_norm": 34.26437345645622, "learning_rate": 9.94196854912548e-07, "logits/chosen": -2.6752734184265137, "logits/rejected": -2.670536518096924, "logps/chosen": -162.92506408691406, "logps/rejected": -162.58132934570312, "loss": 0.653, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.23271910846233368, "rewards/margins": 0.16488614678382874, "rewards/rejected": 0.06783294677734375, "step": 20 }, { "epoch": 0.3472222222222222, "grad_norm": 38.12636989971247, "learning_rate": 9.869745381355905e-07, "logits/chosen": -2.612743854522705, "logits/rejected": -2.601036310195923, "logps/chosen": -169.65054321289062, "logps/rejected": -170.94589233398438, "loss": 0.6341, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.23453514277935028, "rewards/margins": 0.1853707879781723, "rewards/rejected": 0.04916436970233917, "step": 25 }, { "epoch": 0.4166666666666667, "grad_norm": 37.15638790112506, "learning_rate": 9.769221256218162e-07, "logits/chosen": -2.6376729011535645, "logits/rejected": -2.6211869716644287, "logps/chosen": -169.70230102539062, "logps/rejected": -169.1073760986328, "loss": 0.6276, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.197641983628273, "rewards/margins": 0.23505587875843048, "rewards/rejected": -0.03741389513015747, "step": 30 }, { "epoch": 0.4861111111111111, "grad_norm": 31.96578650923538, "learning_rate": 9.64098037858483e-07, "logits/chosen": -2.6476080417633057, "logits/rejected": -2.638826847076416, "logps/chosen": -164.2353515625, "logps/rejected": -171.78424072265625, "loss": 0.6001, "rewards/accuracies": 0.65625, "rewards/chosen": 0.011483956128358841, "rewards/margins": 0.3633750379085541, "rewards/rejected": -0.35189107060432434, "step": 35 }, { "epoch": 0.5555555555555556, "grad_norm": 35.60629409012632, "learning_rate": 9.485768031694871e-07, "logits/chosen": -2.6523194313049316, "logits/rejected": -2.621492385864258, "logps/chosen": -168.99270629882812, "logps/rejected": -177.50718688964844, "loss": 0.5939, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.2566075921058655, "rewards/margins": 0.5398613214492798, "rewards/rejected": -0.79646897315979, "step": 40 }, { "epoch": 0.625, "grad_norm": 32.80701192573668, "learning_rate": 9.304486245873971e-07, "logits/chosen": -2.657984495162964, "logits/rejected": -2.6483747959136963, "logps/chosen": -163.6527557373047, "logps/rejected": -167.71705627441406, "loss": 0.5942, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24404068291187286, "rewards/margins": 0.5225220918655396, "rewards/rejected": -0.766562819480896, "step": 45 }, { "epoch": 0.6944444444444444, "grad_norm": 32.94692651420362, "learning_rate": 9.098188556305262e-07, "logits/chosen": -2.732595682144165, "logits/rejected": -2.7179951667785645, "logps/chosen": -159.82009887695312, "logps/rejected": -163.01516723632812, "loss": 0.575, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.31872302293777466, "rewards/margins": 0.4740700125694275, "rewards/rejected": -0.7927930951118469, "step": 50 }, { "epoch": 0.6944444444444444, "eval_logits/chosen": -2.7981717586517334, "eval_logits/rejected": -2.7966415882110596, "eval_logps/chosen": -171.31138610839844, "eval_logps/rejected": -180.07443237304688, "eval_loss": 0.5679606199264526, "eval_rewards/accuracies": 0.69140625, "eval_rewards/chosen": -0.5232083201408386, "eval_rewards/margins": 0.6266617178916931, "eval_rewards/rejected": -1.1498699188232422, "eval_runtime": 127.2891, "eval_samples_per_second": 16.05, "eval_steps_per_second": 0.251, "step": 50 }, { "epoch": 0.7638888888888888, "grad_norm": 34.97882221943595, "learning_rate": 8.868073880316123e-07, "logits/chosen": -2.835651397705078, "logits/rejected": -2.836982250213623, "logps/chosen": -173.93702697753906, "logps/rejected": -180.1125030517578, "loss": 0.5562, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5288220643997192, "rewards/margins": 0.7004331350326538, "rewards/rejected": -1.229255199432373, "step": 55 }, { "epoch": 0.8333333333333334, "grad_norm": 31.166211388759624, "learning_rate": 8.615479549763755e-07, "logits/chosen": -2.8652548789978027, "logits/rejected": -2.8450732231140137, "logps/chosen": -169.28530883789062, "logps/rejected": -171.57772827148438, "loss": 0.5334, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.237405925989151, "rewards/margins": 0.7358155846595764, "rewards/rejected": -0.973221480846405, "step": 60 }, { "epoch": 0.9027777777777778, "grad_norm": 31.846767846888632, "learning_rate": 8.341873539012443e-07, "logits/chosen": -2.8254175186157227, "logits/rejected": -2.8189828395843506, "logps/chosen": -178.87318420410156, "logps/rejected": -185.26193237304688, "loss": 0.5483, "rewards/accuracies": 0.65625, "rewards/chosen": -0.3370054364204407, "rewards/margins": 0.6962798237800598, "rewards/rejected": -1.0332852602005005, "step": 65 }, { "epoch": 0.9722222222222222, "grad_norm": 30.952350544641195, "learning_rate": 8.048845933670271e-07, "logits/chosen": -2.7731075286865234, "logits/rejected": -2.7470154762268066, "logps/chosen": -181.693359375, "logps/rejected": -192.0342254638672, "loss": 0.5378, "rewards/accuracies": 0.71875, "rewards/chosen": -0.617510974407196, "rewards/margins": 0.8088364601135254, "rewards/rejected": -1.4263474941253662, "step": 70 }, { "epoch": 1.0416666666666667, "grad_norm": 20.244570418413698, "learning_rate": 7.738099689665539e-07, "logits/chosen": -2.679137706756592, "logits/rejected": -2.676011800765991, "logps/chosen": -172.0131072998047, "logps/rejected": -184.72222900390625, "loss": 0.3552, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.7075360417366028, "rewards/margins": 1.6730060577392578, "rewards/rejected": -2.380542278289795, "step": 75 }, { "epoch": 1.1111111111111112, "grad_norm": 18.51901755323729, "learning_rate": 7.41144073636728e-07, "logits/chosen": -2.663628339767456, "logits/rejected": -2.6579511165618896, "logps/chosen": -181.416748046875, "logps/rejected": -201.2371063232422, "loss": 0.2457, "rewards/accuracies": 0.90625, "rewards/chosen": 0.018878469243645668, "rewards/margins": 2.686278820037842, "rewards/rejected": -2.667400360107422, "step": 80 }, { "epoch": 1.1805555555555556, "grad_norm": 20.24347077505837, "learning_rate": 7.070767481266492e-07, "logits/chosen": -2.68660569190979, "logits/rejected": -2.6812427043914795, "logps/chosen": -160.11874389648438, "logps/rejected": -179.59771728515625, "loss": 0.227, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.016312014311552048, "rewards/margins": 2.060859203338623, "rewards/rejected": -2.0771713256835938, "step": 85 }, { "epoch": 1.25, "grad_norm": 20.15132514330672, "learning_rate": 6.718059777212565e-07, "logits/chosen": -2.69787859916687, "logits/rejected": -2.7063913345336914, "logps/chosen": -165.69448852539062, "logps/rejected": -190.65296936035156, "loss": 0.2041, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18171457946300507, "rewards/margins": 2.763362407684326, "rewards/rejected": -2.9450771808624268, "step": 90 }, { "epoch": 1.3194444444444444, "grad_norm": 21.629755831470078, "learning_rate": 6.355367416322778e-07, "logits/chosen": -2.7282795906066895, "logits/rejected": -2.7191052436828613, "logps/chosen": -176.59262084960938, "logps/rejected": -204.98123168945312, "loss": 0.2028, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1713067591190338, "rewards/margins": 3.3725147247314453, "rewards/rejected": -3.543820858001709, "step": 95 }, { "epoch": 1.3888888888888888, "grad_norm": 26.9593328849758, "learning_rate": 5.984798217433531e-07, "logits/chosen": -2.690068006515503, "logits/rejected": -2.69694185256958, "logps/chosen": -170.9009246826172, "logps/rejected": -199.11679077148438, "loss": 0.2161, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.24792905151844025, "rewards/margins": 3.1425349712371826, "rewards/rejected": -3.3904640674591064, "step": 100 }, { "epoch": 1.3888888888888888, "eval_logits/chosen": -2.6713719367980957, "eval_logits/rejected": -2.6708080768585205, "eval_logps/chosen": -177.34860229492188, "eval_logps/rejected": -191.46810913085938, "eval_loss": 0.541614830493927, "eval_rewards/accuracies": 0.74609375, "eval_rewards/chosen": -1.1269280910491943, "eval_rewards/margins": 1.162311315536499, "eval_rewards/rejected": -2.2892394065856934, "eval_runtime": 126.9171, "eval_samples_per_second": 16.097, "eval_steps_per_second": 0.252, "step": 100 }, { "epoch": 1.4583333333333333, "grad_norm": 20.618955156266612, "learning_rate": 5.608505776324157e-07, "logits/chosen": -2.6715927124023438, "logits/rejected": -2.678304433822632, "logps/chosen": -160.5848388671875, "logps/rejected": -191.11619567871094, "loss": 0.1956, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.33186909556388855, "rewards/margins": 2.948606491088867, "rewards/rejected": -3.280475616455078, "step": 105 }, { "epoch": 1.5277777777777777, "grad_norm": 27.47403889294586, "learning_rate": 5.228676949903973e-07, "logits/chosen": -2.677685022354126, "logits/rejected": -2.6649279594421387, "logps/chosen": -167.76614379882812, "logps/rejected": -201.45071411132812, "loss": 0.2051, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7044020295143127, "rewards/margins": 3.09299373626709, "rewards/rejected": -3.7973952293395996, "step": 110 }, { "epoch": 1.5972222222222223, "grad_norm": 25.78263768190195, "learning_rate": 4.847519147099294e-07, "logits/chosen": -2.673830986022949, "logits/rejected": -2.667365789413452, "logps/chosen": -165.1271209716797, "logps/rejected": -192.34664916992188, "loss": 0.2023, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.7367149591445923, "rewards/margins": 2.9380996227264404, "rewards/rejected": -3.674814224243164, "step": 115 }, { "epoch": 1.6666666666666665, "grad_norm": 20.347389718573403, "learning_rate": 4.46724750030062e-07, "logits/chosen": -2.6994948387145996, "logits/rejected": -2.657327175140381, "logps/chosen": -167.97816467285156, "logps/rejected": -196.3680877685547, "loss": 0.1879, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.4436143934726715, "rewards/margins": 3.0548558235168457, "rewards/rejected": -3.4984703063964844, "step": 120 }, { "epoch": 1.7361111111111112, "grad_norm": 23.654938219435277, "learning_rate": 4.0900719919241935e-07, "logits/chosen": -2.6885862350463867, "logits/rejected": -2.6681549549102783, "logps/chosen": -174.38291931152344, "logps/rejected": -210.3325653076172, "loss": 0.1744, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.4140690863132477, "rewards/margins": 3.468705654144287, "rewards/rejected": -3.8827743530273438, "step": 125 }, { "epoch": 1.8055555555555556, "grad_norm": 24.500980504365117, "learning_rate": 3.7181846109031e-07, "logits/chosen": -2.6993517875671387, "logits/rejected": -2.6847097873687744, "logps/chosen": -162.849609375, "logps/rejected": -192.41830444335938, "loss": 0.1788, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.6508005857467651, "rewards/margins": 3.144455671310425, "rewards/rejected": -3.7952563762664795, "step": 130 }, { "epoch": 1.875, "grad_norm": 26.706394086119467, "learning_rate": 3.353746613749093e-07, "logits/chosen": -2.6868338584899902, "logits/rejected": -2.6891016960144043, "logps/chosen": -171.8800506591797, "logps/rejected": -201.1620330810547, "loss": 0.1877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5446206331253052, "rewards/margins": 3.2972817420959473, "rewards/rejected": -3.841902256011963, "step": 135 }, { "epoch": 1.9444444444444444, "grad_norm": 21.039975863861176, "learning_rate": 2.9988759642186093e-07, "logits/chosen": -2.677610397338867, "logits/rejected": -2.6499438285827637, "logps/chosen": -178.9335174560547, "logps/rejected": -216.8883056640625, "loss": 0.1809, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4429135322570801, "rewards/margins": 3.727440595626831, "rewards/rejected": -4.17035436630249, "step": 140 }, { "epoch": 2.013888888888889, "grad_norm": 15.18765450974654, "learning_rate": 2.655635024578483e-07, "logits/chosen": -2.678591012954712, "logits/rejected": -2.6765263080596924, "logps/chosen": -175.04360961914062, "logps/rejected": -206.4744415283203, "loss": 0.1621, "rewards/accuracies": 0.96875, "rewards/chosen": -0.7178301215171814, "rewards/margins": 3.4394123554229736, "rewards/rejected": -4.1572418212890625, "step": 145 }, { "epoch": 2.0833333333333335, "grad_norm": 11.856035922703338, "learning_rate": 2.3260185700046292e-07, "logits/chosen": -2.6947238445281982, "logits/rejected": -2.6655373573303223, "logps/chosen": -173.5478057861328, "logps/rejected": -222.5522918701172, "loss": 0.0912, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.3790014088153839, "rewards/margins": 4.613499164581299, "rewards/rejected": -4.9925007820129395, "step": 150 }, { "epoch": 2.0833333333333335, "eval_logits/chosen": -2.6673920154571533, "eval_logits/rejected": -2.6701459884643555, "eval_logps/chosen": -187.42156982421875, "eval_logps/rejected": -204.27391052246094, "eval_loss": 0.5558860898017883, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -2.1342270374298096, "eval_rewards/margins": 1.435591697692871, "eval_rewards/rejected": -3.5698184967041016, "eval_runtime": 126.9326, "eval_samples_per_second": 16.095, "eval_steps_per_second": 0.252, "step": 150 }, { "epoch": 2.1527777777777777, "grad_norm": 19.648989383169503, "learning_rate": 2.0119421957691218e-07, "logits/chosen": -2.655550956726074, "logits/rejected": -2.667914390563965, "logps/chosen": -181.70692443847656, "logps/rejected": -226.82400512695312, "loss": 0.0896, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7839339375495911, "rewards/margins": 4.58644962310791, "rewards/rejected": -5.370383262634277, "step": 155 }, { "epoch": 2.2222222222222223, "grad_norm": 15.624018161097496, "learning_rate": 1.7152311845883094e-07, "logits/chosen": -2.6638529300689697, "logits/rejected": -2.619544267654419, "logps/chosen": -173.3532257080078, "logps/rejected": -220.2028350830078, "loss": 0.0812, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1546170711517334, "rewards/margins": 4.321033000946045, "rewards/rejected": -5.475650310516357, "step": 160 }, { "epoch": 2.2916666666666665, "grad_norm": 17.11020248803683, "learning_rate": 1.4376098988303404e-07, "logits/chosen": -2.63069224357605, "logits/rejected": -2.6121063232421875, "logps/chosen": -173.0404510498047, "logps/rejected": -211.6892852783203, "loss": 0.0818, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.21498703956604, "rewards/margins": 4.448957443237305, "rewards/rejected": -5.663943767547607, "step": 165 }, { "epoch": 2.361111111111111, "grad_norm": 17.112572527447032, "learning_rate": 1.1806917592302761e-07, "logits/chosen": -2.6293816566467285, "logits/rejected": -2.597991943359375, "logps/chosen": -176.73304748535156, "logps/rejected": -224.79714965820312, "loss": 0.078, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8656209707260132, "rewards/margins": 5.170679092407227, "rewards/rejected": -6.036300182342529, "step": 170 }, { "epoch": 2.4305555555555554, "grad_norm": 16.22760001506656, "learning_rate": 9.459698683523204e-08, "logits/chosen": -2.6276373863220215, "logits/rejected": -2.627812147140503, "logps/chosen": -182.4290008544922, "logps/rejected": -230.18063354492188, "loss": 0.0849, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.0544099807739258, "rewards/margins": 5.050833702087402, "rewards/rejected": -6.1052446365356445, "step": 175 }, { "epoch": 2.5, "grad_norm": 18.33379110443006, "learning_rate": 7.348083332917926e-08, "logits/chosen": -2.625272035598755, "logits/rejected": -2.597888946533203, "logps/chosen": -174.5447235107422, "logps/rejected": -223.33822631835938, "loss": 0.0853, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2181237936019897, "rewards/margins": 4.818295478820801, "rewards/rejected": -6.0364203453063965, "step": 180 }, { "epoch": 2.5694444444444446, "grad_norm": 19.469578336412205, "learning_rate": 5.484343380457124e-08, "logits/chosen": -2.604079008102417, "logits/rejected": -2.6138792037963867, "logps/chosen": -168.84449768066406, "logps/rejected": -218.6962432861328, "loss": 0.0846, "rewards/accuracies": 0.96875, "rewards/chosen": -1.0644819736480713, "rewards/margins": 4.62592077255249, "rewards/rejected": -5.690402030944824, "step": 185 }, { "epoch": 2.638888888888889, "grad_norm": 21.805307710932198, "learning_rate": 3.879310116241041e-08, "logits/chosen": -2.6105732917785645, "logits/rejected": -2.596642017364502, "logps/chosen": -174.59005737304688, "logps/rejected": -219.40878295898438, "loss": 0.0821, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2059953212738037, "rewards/margins": 4.756261348724365, "rewards/rejected": -5.962257385253906, "step": 190 }, { "epoch": 2.7083333333333335, "grad_norm": 18.608644466504245, "learning_rate": 2.5423113334966218e-08, "logits/chosen": -2.6064913272857666, "logits/rejected": -2.616105794906616, "logps/chosen": -185.19998168945312, "logps/rejected": -225.87014770507812, "loss": 0.0772, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8651708364486694, "rewards/margins": 4.7674455642700195, "rewards/rejected": -5.6326165199279785, "step": 195 }, { "epoch": 2.7777777777777777, "grad_norm": 17.74237497662627, "learning_rate": 1.4811171192794624e-08, "logits/chosen": -2.6110963821411133, "logits/rejected": -2.6080284118652344, "logps/chosen": -171.61239624023438, "logps/rejected": -220.25830078125, "loss": 0.0828, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1769065856933594, "rewards/margins": 4.8281941413879395, "rewards/rejected": -6.005099773406982, "step": 200 }, { "epoch": 2.7777777777777777, "eval_logits/chosen": -2.60565185546875, "eval_logits/rejected": -2.607104539871216, "eval_logps/chosen": -193.79588317871094, "eval_logps/rejected": -210.87051391601562, "eval_loss": 0.590232253074646, "eval_rewards/accuracies": 0.72265625, "eval_rewards/chosen": -2.7716591358184814, "eval_rewards/margins": 1.457817554473877, "eval_rewards/rejected": -4.2294769287109375, "eval_runtime": 127.0051, "eval_samples_per_second": 16.086, "eval_steps_per_second": 0.252, "step": 200 }, { "epoch": 2.8472222222222223, "grad_norm": 20.206548432698874, "learning_rate": 7.018946979234997e-09, "logits/chosen": -2.6121597290039062, "logits/rejected": -2.6113522052764893, "logps/chosen": -171.62460327148438, "logps/rejected": -223.3975830078125, "loss": 0.088, "rewards/accuracies": 0.96875, "rewards/chosen": -1.0721832513809204, "rewards/margins": 4.880965232849121, "rewards/rejected": -5.95314884185791, "step": 205 }, { "epoch": 2.9166666666666665, "grad_norm": 16.26368007303619, "learning_rate": 2.0917258966953734e-09, "logits/chosen": -2.628420114517212, "logits/rejected": -2.607481002807617, "logps/chosen": -178.46401977539062, "logps/rejected": -225.849365234375, "loss": 0.0771, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.2336267232894897, "rewards/margins": 4.748377323150635, "rewards/rejected": -5.982003211975098, "step": 210 }, { "epoch": 2.986111111111111, "grad_norm": 13.799681183452929, "learning_rate": 5.814292768108187e-11, "logits/chosen": -2.610588550567627, "logits/rejected": -2.587486505508423, "logps/chosen": -188.3278045654297, "logps/rejected": -241.2031707763672, "loss": 0.0775, "rewards/accuracies": 0.96875, "rewards/chosen": -0.9807193875312805, "rewards/margins": 5.365724563598633, "rewards/rejected": -6.346443176269531, "step": 215 }, { "epoch": 3.0, "step": 216, "total_flos": 2546799910846464.0, "train_loss": 0.29657687264046184, "train_runtime": 7656.725, "train_samples_per_second": 7.203, "train_steps_per_second": 0.028 } ], "logging_steps": 5, "max_steps": 216, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2546799910846464.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }