{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9703504043126685, "eval_steps": 500, "global_step": 276, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05390835579514825, "grad_norm": 502.0, "learning_rate": 8.92857142857143e-06, "log_odds_chosen": 5.912805557250977, "log_odds_ratio": -7.710684299468994, "logps/chosen": -24.25197982788086, "logps/rejected": -30.16664695739746, "loss": 186.4347, "nll_loss": 11.652168273925781, "rewards/accuracies": 0.484375, "rewards/chosen": -1.2125989198684692, "rewards/margins": 0.29573339223861694, "rewards/rejected": -1.5083322525024414, "step": 5 }, { "epoch": 0.1078167115902965, "grad_norm": 164.0, "learning_rate": 1.785714285714286e-05, "log_odds_chosen": 4.08050537109375, "log_odds_ratio": -5.399485111236572, "logps/chosen": -20.047760009765625, "logps/rejected": -24.1300106048584, "loss": 155.9258, "nll_loss": 9.745362281799316, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -1.0023880004882812, "rewards/margins": 0.20411260426044464, "rewards/rejected": -1.2065006494522095, "step": 10 }, { "epoch": 0.16172506738544473, "grad_norm": 304.0, "learning_rate": 2.6785714285714288e-05, "log_odds_chosen": 3.733511447906494, "log_odds_ratio": -6.636049747467041, "logps/chosen": -21.219013214111328, "logps/rejected": -24.95370864868164, "loss": 155.6621, "nll_loss": 9.728882789611816, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -1.060950517654419, "rewards/margins": 0.18673481047153473, "rewards/rejected": -1.2476855516433716, "step": 15 }, { "epoch": 0.215633423180593, "grad_norm": 540.0, "learning_rate": 3.571428571428572e-05, "log_odds_chosen": 0.06586956977844238, "log_odds_ratio": -5.591992378234863, "logps/chosen": -15.408183097839355, "logps/rejected": -15.473505020141602, "loss": 106.968, "nll_loss": 6.6854963302612305, "rewards/accuracies": 0.53125, "rewards/chosen": -0.7704092264175415, "rewards/margins": 0.0032660537399351597, "rewards/rejected": -0.7736751437187195, "step": 20 }, { "epoch": 0.2695417789757412, "grad_norm": 114.5, "learning_rate": 4.464285714285715e-05, "log_odds_chosen": 0.1850651204586029, "log_odds_ratio": -0.870222270488739, "logps/chosen": -2.379927158355713, "logps/rejected": -2.544480562210083, "loss": 38.8752, "nll_loss": 2.429699420928955, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.11899634450674057, "rewards/margins": 0.00822767335921526, "rewards/rejected": -0.12722402811050415, "step": 25 }, { "epoch": 0.32345013477088946, "grad_norm": 119.0, "learning_rate": 4.999197688241076e-05, "log_odds_chosen": 0.2413506954908371, "log_odds_ratio": -0.7524750232696533, "logps/chosen": -1.8451862335205078, "logps/rejected": -2.054591655731201, "loss": 33.8684, "nll_loss": 2.1167733669281006, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.09225932508707047, "rewards/margins": 0.010470272973179817, "rewards/rejected": -0.10272959619760513, "step": 30 }, { "epoch": 0.37735849056603776, "grad_norm": 65.0, "learning_rate": 4.9901775939413026e-05, "log_odds_chosen": 0.25174349546432495, "log_odds_ratio": -0.7253037691116333, "logps/chosen": -1.6166225671768188, "logps/rejected": -1.8298925161361694, "loss": 29.1245, "nll_loss": 1.820279836654663, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.0808311253786087, "rewards/margins": 0.010663499124348164, "rewards/rejected": -0.091494619846344, "step": 35 }, { "epoch": 0.431266846361186, "grad_norm": 101.5, "learning_rate": 4.971170810820279e-05, "log_odds_chosen": 0.2364540547132492, "log_odds_ratio": -0.7084470391273499, "logps/chosen": -1.5713794231414795, "logps/rejected": -1.7505134344100952, "loss": 29.3433, "nll_loss": 1.8339534997940063, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.07856898009777069, "rewards/margins": 0.008956688456237316, "rewards/rejected": -0.08752566576004028, "step": 40 }, { "epoch": 0.48517520215633425, "grad_norm": 43.75, "learning_rate": 4.942253564296218e-05, "log_odds_chosen": 0.18266446888446808, "log_odds_ratio": -0.7139922976493835, "logps/chosen": -1.3910434246063232, "logps/rejected": -1.5377957820892334, "loss": 27.6051, "nll_loss": 1.725320816040039, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.06955216825008392, "rewards/margins": 0.007337613496929407, "rewards/rejected": -0.07688979059457779, "step": 45 }, { "epoch": 0.5390835579514824, "grad_norm": 35.75, "learning_rate": 4.9035418250305314e-05, "log_odds_chosen": 0.15111494064331055, "log_odds_ratio": -0.7071037292480469, "logps/chosen": -1.3401615619659424, "logps/rejected": -1.4416334629058838, "loss": 26.5156, "nll_loss": 1.6572233438491821, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.06700807809829712, "rewards/margins": 0.005073595326393843, "rewards/rejected": -0.07208167761564255, "step": 50 }, { "epoch": 0.5929919137466307, "grad_norm": 117.0, "learning_rate": 4.8551908438353374e-05, "log_odds_chosen": 0.20908907055854797, "log_odds_ratio": -0.6856271028518677, "logps/chosen": -1.2825366258621216, "logps/rejected": -1.4346181154251099, "loss": 25.8045, "nll_loss": 1.6127817630767822, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.06412683427333832, "rewards/margins": 0.007604071404784918, "rewards/rejected": -0.07173089683055878, "step": 55 }, { "epoch": 0.6469002695417789, "grad_norm": 55.25, "learning_rate": 4.7973945290505766e-05, "log_odds_chosen": 0.13437321782112122, "log_odds_ratio": -0.7068942785263062, "logps/chosen": -1.2431588172912598, "logps/rejected": -1.3494141101837158, "loss": 24.9189, "nll_loss": 1.5574296712875366, "rewards/accuracies": 0.578125, "rewards/chosen": -0.06215794011950493, "rewards/margins": 0.005312758963555098, "rewards/rejected": -0.06747071444988251, "step": 60 }, { "epoch": 0.7008086253369272, "grad_norm": 115.5, "learning_rate": 4.73038466888773e-05, "log_odds_chosen": 0.11516331136226654, "log_odds_ratio": -0.7177757024765015, "logps/chosen": -1.2244114875793457, "logps/rejected": -1.3131043910980225, "loss": 24.9998, "nll_loss": 1.5624865293502808, "rewards/accuracies": 0.515625, "rewards/chosen": -0.061220575124025345, "rewards/margins": 0.004434647969901562, "rewards/rejected": -0.06565522402524948, "step": 65 }, { "epoch": 0.7547169811320755, "grad_norm": 85.5, "learning_rate": 4.654430001858874e-05, "log_odds_chosen": 0.13228605687618256, "log_odds_ratio": -0.7034366726875305, "logps/chosen": -1.1982498168945312, "logps/rejected": -1.2874435186386108, "loss": 24.5123, "nll_loss": 1.532017469406128, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.05991249158978462, "rewards/margins": 0.004459693096578121, "rewards/rejected": -0.06437218189239502, "step": 70 }, { "epoch": 0.8086253369272237, "grad_norm": 68.0, "learning_rate": 4.569835139019054e-05, "log_odds_chosen": 0.22792398929595947, "log_odds_ratio": -0.6663814783096313, "logps/chosen": -1.1594752073287964, "logps/rejected": -1.320555329322815, "loss": 24.2124, "nll_loss": 1.5132750272750854, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.05797375366091728, "rewards/margins": 0.00805400125682354, "rewards/rejected": -0.06602776050567627, "step": 75 }, { "epoch": 0.862533692722372, "grad_norm": 70.5, "learning_rate": 4.476939342344246e-05, "log_odds_chosen": 0.21846911311149597, "log_odds_ratio": -0.6512280702590942, "logps/chosen": -1.0879645347595215, "logps/rejected": -1.2357169389724731, "loss": 23.5682, "nll_loss": 1.4730117321014404, "rewards/accuracies": 0.59375, "rewards/chosen": -0.054398227483034134, "rewards/margins": 0.007387618534266949, "rewards/rejected": -0.06178584694862366, "step": 80 }, { "epoch": 0.9164420485175202, "grad_norm": 38.0, "learning_rate": 4.376115164144157e-05, "log_odds_chosen": 0.15174248814582825, "log_odds_ratio": -0.6834980845451355, "logps/chosen": -1.0666790008544922, "logps/rejected": -1.1634466648101807, "loss": 22.9808, "nll_loss": 1.4362987279891968, "rewards/accuracies": 0.578125, "rewards/chosen": -0.05333394929766655, "rewards/margins": 0.004838378168642521, "rewards/rejected": -0.058172326534986496, "step": 85 }, { "epoch": 0.9703504043126685, "grad_norm": 64.5, "learning_rate": 4.267766952966369e-05, "log_odds_chosen": 0.1273517906665802, "log_odds_ratio": -0.6930577158927917, "logps/chosen": -1.040971279144287, "logps/rejected": -1.1280105113983154, "loss": 22.4486, "nll_loss": 1.4030355215072632, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.05204857140779495, "rewards/margins": 0.004351964220404625, "rewards/rejected": -0.05640053004026413, "step": 90 }, { "epoch": 1.0215633423180592, "grad_norm": 25.875, "learning_rate": 4.1523292319838524e-05, "log_odds_chosen": 0.21998043358325958, "log_odds_ratio": -0.6607629656791687, "logps/chosen": -0.9751634001731873, "logps/rejected": -1.1191428899765015, "loss": 20.5047, "nll_loss": 1.348992943763733, "rewards/accuracies": 0.5953947305679321, "rewards/chosen": -0.04875817149877548, "rewards/margins": 0.007198969833552837, "rewards/rejected": -0.0559571348130703, "step": 95 }, { "epoch": 1.0754716981132075, "grad_norm": 39.0, "learning_rate": 4.030264956369157e-05, "log_odds_chosen": 0.4434036314487457, "log_odds_ratio": -0.6344673037528992, "logps/chosen": -0.9448977708816528, "logps/rejected": -1.1891727447509766, "loss": 21.1138, "nll_loss": 1.319615125656128, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04724489524960518, "rewards/margins": 0.012213751673698425, "rewards/rejected": -0.059458643198013306, "step": 100 }, { "epoch": 1.1293800539083558, "grad_norm": 29.625, "learning_rate": 3.902063656644012e-05, "log_odds_chosen": 0.5363696813583374, "log_odds_ratio": -0.5325912833213806, "logps/chosen": -0.8145742416381836, "logps/rejected": -1.1247040033340454, "loss": 18.4662, "nll_loss": 1.1541385650634766, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.04072871431708336, "rewards/margins": 0.015506483614444733, "rewards/rejected": -0.05623519420623779, "step": 105 }, { "epoch": 1.1832884097035041, "grad_norm": 25.625, "learning_rate": 3.768239475450269e-05, "log_odds_chosen": 0.5174719095230103, "log_odds_ratio": -0.5490429401397705, "logps/chosen": -0.8520506620407104, "logps/rejected": -1.1713144779205322, "loss": 19.2575, "nll_loss": 1.20359206199646, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0426025353372097, "rewards/margins": 0.01596318557858467, "rewards/rejected": -0.05856572464108467, "step": 110 }, { "epoch": 1.2371967654986522, "grad_norm": 44.0, "learning_rate": 3.629329105615617e-05, "log_odds_chosen": 0.5610077977180481, "log_odds_ratio": -0.5220470428466797, "logps/chosen": -0.8756101727485657, "logps/rejected": -1.2160618305206299, "loss": 19.0513, "nll_loss": 1.1907049417495728, "rewards/accuracies": 0.765625, "rewards/chosen": -0.04378051310777664, "rewards/margins": 0.01702258363366127, "rewards/rejected": -0.060803093016147614, "step": 115 }, { "epoch": 1.2911051212938005, "grad_norm": 39.5, "learning_rate": 3.4858896377832966e-05, "log_odds_chosen": 0.5247588157653809, "log_odds_ratio": -0.5250480771064758, "logps/chosen": -0.8190716505050659, "logps/rejected": -1.128647804260254, "loss": 17.8761, "nll_loss": 1.117258906364441, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.040953584015369415, "rewards/margins": 0.015478810295462608, "rewards/rejected": -0.05643239617347717, "step": 120 }, { "epoch": 1.3450134770889488, "grad_norm": 45.25, "learning_rate": 3.338496326237743e-05, "log_odds_chosen": 0.4806355834007263, "log_odds_ratio": -0.550317108631134, "logps/chosen": -0.8094690442085266, "logps/rejected": -1.100089430809021, "loss": 18.3711, "nll_loss": 1.148194432258606, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.04047344997525215, "rewards/margins": 0.01453101821243763, "rewards/rejected": -0.05500447005033493, "step": 125 }, { "epoch": 1.398921832884097, "grad_norm": 35.0, "learning_rate": 3.187740281886195e-05, "log_odds_chosen": 0.6415280699729919, "log_odds_ratio": -0.48781052231788635, "logps/chosen": -0.8225423693656921, "logps/rejected": -1.2189260721206665, "loss": 18.4471, "nll_loss": 1.152944564819336, "rewards/accuracies": 0.784375011920929, "rewards/chosen": -0.04112711548805237, "rewards/margins": 0.01981918141245842, "rewards/rejected": -0.06094629690051079, "step": 130 }, { "epoch": 1.4528301886792452, "grad_norm": 21.5, "learning_rate": 3.034226101648377e-05, "log_odds_chosen": 0.6234865784645081, "log_odds_ratio": -0.4981662333011627, "logps/chosen": -0.8078993558883667, "logps/rejected": -1.180347204208374, "loss": 18.1349, "nll_loss": 1.1334304809570312, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.040394969284534454, "rewards/margins": 0.01862238720059395, "rewards/rejected": -0.059017352759838104, "step": 135 }, { "epoch": 1.5067385444743935, "grad_norm": 54.0, "learning_rate": 2.878569443761442e-05, "log_odds_chosen": 0.5453085899353027, "log_odds_ratio": -0.520926833152771, "logps/chosen": -0.8231021165847778, "logps/rejected": -1.1491215229034424, "loss": 18.0104, "nll_loss": 1.1256530284881592, "rewards/accuracies": 0.78125, "rewards/chosen": -0.04115510731935501, "rewards/margins": 0.016300970688462257, "rewards/rejected": -0.057456083595752716, "step": 140 }, { "epoch": 1.5606469002695418, "grad_norm": 35.5, "learning_rate": 2.7213945587242508e-05, "log_odds_chosen": 0.4767599105834961, "log_odds_ratio": -0.5450612902641296, "logps/chosen": -0.8506708145141602, "logps/rejected": -1.1368257999420166, "loss": 19.6345, "nll_loss": 1.2271578311920166, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.04253353923559189, "rewards/margins": 0.014307747595012188, "rewards/rejected": -0.05684129148721695, "step": 145 }, { "epoch": 1.61455525606469, "grad_norm": 24.75, "learning_rate": 2.5633317857829697e-05, "log_odds_chosen": 0.5109966993331909, "log_odds_ratio": -0.5330369472503662, "logps/chosen": -0.7959780693054199, "logps/rejected": -1.100124716758728, "loss": 17.9396, "nll_loss": 1.1212230920791626, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.03979891166090965, "rewards/margins": 0.01520733255892992, "rewards/rejected": -0.0550062358379364, "step": 150 }, { "epoch": 1.6684636118598384, "grad_norm": 28.125, "learning_rate": 2.4050150249981522e-05, "log_odds_chosen": 0.5154935717582703, "log_odds_ratio": -0.5551471710205078, "logps/chosen": -0.7858830094337463, "logps/rejected": -1.0894118547439575, "loss": 17.763, "nll_loss": 1.1101869344711304, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.039294153451919556, "rewards/margins": 0.015176435932517052, "rewards/rejected": -0.05447059124708176, "step": 155 }, { "epoch": 1.7223719676549867, "grad_norm": 25.625, "learning_rate": 2.24707919503142e-05, "log_odds_chosen": 0.612939715385437, "log_odds_ratio": -0.5060396790504456, "logps/chosen": -0.8042556643486023, "logps/rejected": -1.1593209505081177, "loss": 17.9033, "nll_loss": 1.1189591884613037, "rewards/accuracies": 0.765625, "rewards/chosen": -0.04021278768777847, "rewards/margins": 0.01775326207280159, "rewards/rejected": -0.057966046035289764, "step": 160 }, { "epoch": 1.7762803234501348, "grad_norm": 30.5, "learning_rate": 2.0901576868471125e-05, "log_odds_chosen": 0.7125197649002075, "log_odds_ratio": -0.4848386347293854, "logps/chosen": -0.7776973843574524, "logps/rejected": -1.216399908065796, "loss": 17.609, "nll_loss": 1.1005606651306152, "rewards/accuracies": 0.8031250238418579, "rewards/chosen": -0.03888486698269844, "rewards/margins": 0.02193513885140419, "rewards/rejected": -0.06082000210881233, "step": 165 }, { "epoch": 1.830188679245283, "grad_norm": 25.5, "learning_rate": 1.934879823540663e-05, "log_odds_chosen": 0.6370295286178589, "log_odds_ratio": -0.5132786631584167, "logps/chosen": -0.7767125368118286, "logps/rejected": -1.1403437852859497, "loss": 18.0793, "nll_loss": 1.1299545764923096, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03883562982082367, "rewards/margins": 0.018181564286351204, "rewards/rejected": -0.05701719596982002, "step": 170 }, { "epoch": 1.8840970350404311, "grad_norm": 24.125, "learning_rate": 1.7818683364808884e-05, "log_odds_chosen": 0.5283645391464233, "log_odds_ratio": -0.5327858328819275, "logps/chosen": -0.8068645596504211, "logps/rejected": -1.107076644897461, "loss": 17.5256, "nll_loss": 1.0953474044799805, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.04034322127699852, "rewards/margins": 0.015010610222816467, "rewards/rejected": -0.055353838950395584, "step": 175 }, { "epoch": 1.9380053908355794, "grad_norm": 30.375, "learning_rate": 1.6317368678879495e-05, "log_odds_chosen": 0.4843871593475342, "log_odds_ratio": -0.5502706170082092, "logps/chosen": -0.8421553373336792, "logps/rejected": -1.1288360357284546, "loss": 18.9738, "nll_loss": 1.1858609914779663, "rewards/accuracies": 0.734375, "rewards/chosen": -0.04210776835680008, "rewards/margins": 0.014334036037325859, "rewards/rejected": -0.05644180253148079, "step": 180 }, { "epoch": 1.9919137466307277, "grad_norm": 28.375, "learning_rate": 1.4850875098627326e-05, "log_odds_chosen": 0.5836633443832397, "log_odds_ratio": -0.5226461291313171, "logps/chosen": -0.799677312374115, "logps/rejected": -1.1371490955352783, "loss": 18.0322, "nll_loss": 1.1270129680633545, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.03998386859893799, "rewards/margins": 0.01687358133494854, "rewards/rejected": -0.056857455521821976, "step": 185 }, { "epoch": 2.0431266846361185, "grad_norm": 21.125, "learning_rate": 1.3425083897371981e-05, "log_odds_chosen": 0.8875333666801453, "log_odds_ratio": -0.4172805845737457, "logps/chosen": -0.6325610876083374, "logps/rejected": -1.107661247253418, "loss": 13.71, "nll_loss": 0.901972234249115, "rewards/accuracies": 0.8585526347160339, "rewards/chosen": -0.03162805363535881, "rewards/margins": 0.023755012080073357, "rewards/rejected": -0.055383067578077316, "step": 190 }, { "epoch": 2.0970350404312668, "grad_norm": 29.5, "learning_rate": 1.204571311429496e-05, "log_odds_chosen": 1.1873928308486938, "log_odds_ratio": -0.3502134680747986, "logps/chosen": -0.6132036447525024, "logps/rejected": -1.2639284133911133, "loss": 14.8674, "nll_loss": 0.9292107820510864, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": -0.030660182237625122, "rewards/margins": 0.032536230981349945, "rewards/rejected": -0.06319641321897507, "step": 195 }, { "epoch": 2.150943396226415, "grad_norm": 27.25, "learning_rate": 1.0718294622630188e-05, "log_odds_chosen": 1.1184864044189453, "log_odds_ratio": -0.3575947880744934, "logps/chosen": -0.6175593137741089, "logps/rejected": -1.2147563695907593, "loss": 13.9253, "nll_loss": 0.8703301548957825, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.030877966433763504, "rewards/margins": 0.02985985204577446, "rewards/rejected": -0.06073781102895737, "step": 200 }, { "epoch": 2.2048517520215634, "grad_norm": 25.125, "learning_rate": 9.448151944460657e-06, "log_odds_chosen": 1.1300182342529297, "log_odds_ratio": -0.3609935939311981, "logps/chosen": -0.5816246271133423, "logps/rejected": -1.1945774555206299, "loss": 13.5606, "nll_loss": 0.8475350141525269, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.029081230983138084, "rewards/margins": 0.03064764477312565, "rewards/rejected": -0.059728872030973434, "step": 205 }, { "epoch": 2.2587601078167117, "grad_norm": 30.25, "learning_rate": 8.240378901093034e-06, "log_odds_chosen": 1.1773656606674194, "log_odds_ratio": -0.36282655596733093, "logps/chosen": -0.6218483448028564, "logps/rejected": -1.2632155418395996, "loss": 14.1748, "nll_loss": 0.8859266042709351, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.03109242022037506, "rewards/margins": 0.03206836059689522, "rewards/rejected": -0.06316077709197998, "step": 210 }, { "epoch": 2.31266846361186, "grad_norm": 27.75, "learning_rate": 7.099819184631928e-06, "log_odds_chosen": 1.1673331260681152, "log_odds_ratio": -0.35059279203414917, "logps/chosen": -0.5780085325241089, "logps/rejected": -1.1937094926834106, "loss": 13.6113, "nll_loss": 0.8507078289985657, "rewards/accuracies": 0.890625, "rewards/chosen": -0.028900425881147385, "rewards/margins": 0.030785048380494118, "rewards/rejected": -0.05968547612428665, "step": 215 }, { "epoch": 2.3665768194070083, "grad_norm": 21.75, "learning_rate": 6.031046932680229e-06, "log_odds_chosen": 1.1187890768051147, "log_odds_ratio": -0.36328989267349243, "logps/chosen": -0.5981144309043884, "logps/rejected": -1.2099401950836182, "loss": 13.4124, "nll_loss": 0.838273823261261, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029905717819929123, "rewards/margins": 0.030591288581490517, "rewards/rejected": -0.06049700453877449, "step": 220 }, { "epoch": 2.420485175202156, "grad_norm": 23.25, "learning_rate": 5.038348384069663e-06, "log_odds_chosen": 1.0922346115112305, "log_odds_ratio": -0.36317089200019836, "logps/chosen": -0.6168917417526245, "logps/rejected": -1.194858193397522, "loss": 13.8242, "nll_loss": 0.8640131950378418, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.030844587832689285, "rewards/margins": 0.028898322954773903, "rewards/rejected": -0.059742916375398636, "step": 225 }, { "epoch": 2.4743935309973044, "grad_norm": 24.875, "learning_rate": 4.125704689189819e-06, "log_odds_chosen": 1.099705696105957, "log_odds_ratio": -0.3621538579463959, "logps/chosen": -0.5992009043693542, "logps/rejected": -1.1859813928604126, "loss": 14.0204, "nll_loss": 0.8762725591659546, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029960045590996742, "rewards/margins": 0.02933902107179165, "rewards/rejected": -0.05929907411336899, "step": 230 }, { "epoch": 2.5283018867924527, "grad_norm": 30.75, "learning_rate": 3.296775943853789e-06, "log_odds_chosen": 1.1012507677078247, "log_odds_ratio": -0.37290987372398376, "logps/chosen": -0.600740909576416, "logps/rejected": -1.1732831001281738, "loss": 13.7312, "nll_loss": 0.8582011461257935, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.0300370454788208, "rewards/margins": 0.02862711250782013, "rewards/rejected": -0.05866416543722153, "step": 235 }, { "epoch": 2.582210242587601, "grad_norm": 21.875, "learning_rate": 2.5548865107314607e-06, "log_odds_chosen": 1.1265591382980347, "log_odds_ratio": -0.36651021242141724, "logps/chosen": -0.6239336729049683, "logps/rejected": -1.243729591369629, "loss": 14.1878, "nll_loss": 0.8867388963699341, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.031196683645248413, "rewards/margins": 0.030989795923233032, "rewards/rejected": -0.06218648701906204, "step": 240 }, { "epoch": 2.6361185983827493, "grad_norm": 24.0, "learning_rate": 1.9030116872178316e-06, "log_odds_chosen": 1.1169674396514893, "log_odds_ratio": -0.3630684018135071, "logps/chosen": -0.5876578092575073, "logps/rejected": -1.1631311178207397, "loss": 14.3677, "nll_loss": 0.8979824185371399, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.029382890090346336, "rewards/margins": 0.02877367101609707, "rewards/rejected": -0.058156561106443405, "step": 245 }, { "epoch": 2.6900269541778976, "grad_norm": 30.625, "learning_rate": 1.3437657732040782e-06, "log_odds_chosen": 1.2451064586639404, "log_odds_ratio": -0.33962780237197876, "logps/chosen": -0.559921145439148, "logps/rejected": -1.2268016338348389, "loss": 13.2893, "nll_loss": 0.83058100938797, "rewards/accuracies": 0.921875, "rewards/chosen": -0.027996059507131577, "rewards/margins": 0.033344022929668427, "rewards/rejected": -0.061340074986219406, "step": 250 }, { "epoch": 2.743935309973046, "grad_norm": 21.625, "learning_rate": 8.793915866046359e-07, "log_odds_chosen": 1.1345646381378174, "log_odds_ratio": -0.36994147300720215, "logps/chosen": -0.5653634071350098, "logps/rejected": -1.1617649793624878, "loss": 14.1254, "nll_loss": 0.8828363418579102, "rewards/accuracies": 0.871874988079071, "rewards/chosen": -0.02826816774904728, "rewards/margins": 0.02982008457183838, "rewards/rejected": -0.05808825045824051, "step": 255 }, { "epoch": 2.797843665768194, "grad_norm": 20.0, "learning_rate": 5.117514686876379e-07, "log_odds_chosen": 0.9944526553153992, "log_odds_ratio": -0.3954170346260071, "logps/chosen": -0.6244685649871826, "logps/rejected": -1.1500699520111084, "loss": 14.1807, "nll_loss": 0.8862916231155396, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.03122343122959137, "rewards/margins": 0.02628006599843502, "rewards/rejected": -0.05750349164009094, "step": 260 }, { "epoch": 2.8517520215633425, "grad_norm": 25.375, "learning_rate": 2.423198152812306e-07, "log_odds_chosen": 0.9933083653450012, "log_odds_ratio": -0.3899889588356018, "logps/chosen": -0.6133357286453247, "logps/rejected": -1.120845913887024, "loss": 13.8159, "nll_loss": 0.8634947538375854, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.030666787177324295, "rewards/margins": 0.02537550963461399, "rewards/rejected": -0.05604229494929314, "step": 265 }, { "epoch": 2.9056603773584904, "grad_norm": 25.0, "learning_rate": 7.217716380881479e-08, "log_odds_chosen": 1.142899751663208, "log_odds_ratio": -0.3680952191352844, "logps/chosen": -0.5903482437133789, "logps/rejected": -1.1948912143707275, "loss": 13.7773, "nll_loss": 0.8610836863517761, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": -0.029517415910959244, "rewards/margins": 0.03022714890539646, "rewards/rejected": -0.05974455922842026, "step": 270 }, { "epoch": 2.9595687331536387, "grad_norm": 25.75, "learning_rate": 2.0058598667854756e-09, "log_odds_chosen": 1.1750845909118652, "log_odds_ratio": -0.3487832844257355, "logps/chosen": -0.562160313129425, "logps/rejected": -1.1947548389434814, "loss": 13.3099, "nll_loss": 0.8318702578544617, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.028108015656471252, "rewards/margins": 0.03162972629070282, "rewards/rejected": -0.05973774194717407, "step": 275 }, { "epoch": 2.9703504043126685, "step": 276, "total_flos": 0.0, "train_loss": 28.78522514605868, "train_runtime": 3021.362, "train_samples_per_second": 5.894, "train_steps_per_second": 0.091 } ], "logging_steps": 5, "max_steps": 276, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }