|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9703504043126685, |
|
"eval_steps": 500, |
|
"global_step": 276, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05390835579514825, |
|
"grad_norm": 502.0, |
|
"learning_rate": 8.92857142857143e-06, |
|
"log_odds_chosen": 5.912805557250977, |
|
"log_odds_ratio": -7.710684299468994, |
|
"logps/chosen": -24.25197982788086, |
|
"logps/rejected": -30.16664695739746, |
|
"loss": 186.4347, |
|
"nll_loss": 11.652168273925781, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -1.2125989198684692, |
|
"rewards/margins": 0.29573339223861694, |
|
"rewards/rejected": -1.5083322525024414, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1078167115902965, |
|
"grad_norm": 164.0, |
|
"learning_rate": 1.785714285714286e-05, |
|
"log_odds_chosen": 4.08050537109375, |
|
"log_odds_ratio": -5.399485111236572, |
|
"logps/chosen": -20.047760009765625, |
|
"logps/rejected": -24.1300106048584, |
|
"loss": 155.9258, |
|
"nll_loss": 9.745362281799316, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -1.0023880004882812, |
|
"rewards/margins": 0.20411260426044464, |
|
"rewards/rejected": -1.2065006494522095, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16172506738544473, |
|
"grad_norm": 304.0, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"log_odds_chosen": 3.733511447906494, |
|
"log_odds_ratio": -6.636049747467041, |
|
"logps/chosen": -21.219013214111328, |
|
"logps/rejected": -24.95370864868164, |
|
"loss": 155.6621, |
|
"nll_loss": 9.728882789611816, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -1.060950517654419, |
|
"rewards/margins": 0.18673481047153473, |
|
"rewards/rejected": -1.2476855516433716, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.215633423180593, |
|
"grad_norm": 540.0, |
|
"learning_rate": 3.571428571428572e-05, |
|
"log_odds_chosen": 0.06586956977844238, |
|
"log_odds_ratio": -5.591992378234863, |
|
"logps/chosen": -15.408183097839355, |
|
"logps/rejected": -15.473505020141602, |
|
"loss": 106.968, |
|
"nll_loss": 6.6854963302612305, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.7704092264175415, |
|
"rewards/margins": 0.0032660537399351597, |
|
"rewards/rejected": -0.7736751437187195, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2695417789757412, |
|
"grad_norm": 114.5, |
|
"learning_rate": 4.464285714285715e-05, |
|
"log_odds_chosen": 0.1850651204586029, |
|
"log_odds_ratio": -0.870222270488739, |
|
"logps/chosen": -2.379927158355713, |
|
"logps/rejected": -2.544480562210083, |
|
"loss": 38.8752, |
|
"nll_loss": 2.429699420928955, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.11899634450674057, |
|
"rewards/margins": 0.00822767335921526, |
|
"rewards/rejected": -0.12722402811050415, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.32345013477088946, |
|
"grad_norm": 119.0, |
|
"learning_rate": 4.999197688241076e-05, |
|
"log_odds_chosen": 0.2413506954908371, |
|
"log_odds_ratio": -0.7524750232696533, |
|
"logps/chosen": -1.8451862335205078, |
|
"logps/rejected": -2.054591655731201, |
|
"loss": 33.8684, |
|
"nll_loss": 2.1167733669281006, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.09225932508707047, |
|
"rewards/margins": 0.010470272973179817, |
|
"rewards/rejected": -0.10272959619760513, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 65.0, |
|
"learning_rate": 4.9901775939413026e-05, |
|
"log_odds_chosen": 0.25174349546432495, |
|
"log_odds_ratio": -0.7253037691116333, |
|
"logps/chosen": -1.6166225671768188, |
|
"logps/rejected": -1.8298925161361694, |
|
"loss": 29.1245, |
|
"nll_loss": 1.820279836654663, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.0808311253786087, |
|
"rewards/margins": 0.010663499124348164, |
|
"rewards/rejected": -0.091494619846344, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.431266846361186, |
|
"grad_norm": 101.5, |
|
"learning_rate": 4.971170810820279e-05, |
|
"log_odds_chosen": 0.2364540547132492, |
|
"log_odds_ratio": -0.7084470391273499, |
|
"logps/chosen": -1.5713794231414795, |
|
"logps/rejected": -1.7505134344100952, |
|
"loss": 29.3433, |
|
"nll_loss": 1.8339534997940063, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07856898009777069, |
|
"rewards/margins": 0.008956688456237316, |
|
"rewards/rejected": -0.08752566576004028, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48517520215633425, |
|
"grad_norm": 43.75, |
|
"learning_rate": 4.942253564296218e-05, |
|
"log_odds_chosen": 0.18266446888446808, |
|
"log_odds_ratio": -0.7139922976493835, |
|
"logps/chosen": -1.3910434246063232, |
|
"logps/rejected": -1.5377957820892334, |
|
"loss": 27.6051, |
|
"nll_loss": 1.725320816040039, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.06955216825008392, |
|
"rewards/margins": 0.007337613496929407, |
|
"rewards/rejected": -0.07688979059457779, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5390835579514824, |
|
"grad_norm": 35.75, |
|
"learning_rate": 4.9035418250305314e-05, |
|
"log_odds_chosen": 0.15111494064331055, |
|
"log_odds_ratio": -0.7071037292480469, |
|
"logps/chosen": -1.3401615619659424, |
|
"logps/rejected": -1.4416334629058838, |
|
"loss": 26.5156, |
|
"nll_loss": 1.6572233438491821, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.06700807809829712, |
|
"rewards/margins": 0.005073595326393843, |
|
"rewards/rejected": -0.07208167761564255, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5929919137466307, |
|
"grad_norm": 117.0, |
|
"learning_rate": 4.8551908438353374e-05, |
|
"log_odds_chosen": 0.20908907055854797, |
|
"log_odds_ratio": -0.6856271028518677, |
|
"logps/chosen": -1.2825366258621216, |
|
"logps/rejected": -1.4346181154251099, |
|
"loss": 25.8045, |
|
"nll_loss": 1.6127817630767822, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.06412683427333832, |
|
"rewards/margins": 0.007604071404784918, |
|
"rewards/rejected": -0.07173089683055878, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6469002695417789, |
|
"grad_norm": 55.25, |
|
"learning_rate": 4.7973945290505766e-05, |
|
"log_odds_chosen": 0.13437321782112122, |
|
"log_odds_ratio": -0.7068942785263062, |
|
"logps/chosen": -1.2431588172912598, |
|
"logps/rejected": -1.3494141101837158, |
|
"loss": 24.9189, |
|
"nll_loss": 1.5574296712875366, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.06215794011950493, |
|
"rewards/margins": 0.005312758963555098, |
|
"rewards/rejected": -0.06747071444988251, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7008086253369272, |
|
"grad_norm": 115.5, |
|
"learning_rate": 4.73038466888773e-05, |
|
"log_odds_chosen": 0.11516331136226654, |
|
"log_odds_ratio": -0.7177757024765015, |
|
"logps/chosen": -1.2244114875793457, |
|
"logps/rejected": -1.3131043910980225, |
|
"loss": 24.9998, |
|
"nll_loss": 1.5624865293502808, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.061220575124025345, |
|
"rewards/margins": 0.004434647969901562, |
|
"rewards/rejected": -0.06565522402524948, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 85.5, |
|
"learning_rate": 4.654430001858874e-05, |
|
"log_odds_chosen": 0.13228605687618256, |
|
"log_odds_ratio": -0.7034366726875305, |
|
"logps/chosen": -1.1982498168945312, |
|
"logps/rejected": -1.2874435186386108, |
|
"loss": 24.5123, |
|
"nll_loss": 1.532017469406128, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.05991249158978462, |
|
"rewards/margins": 0.004459693096578121, |
|
"rewards/rejected": -0.06437218189239502, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8086253369272237, |
|
"grad_norm": 68.0, |
|
"learning_rate": 4.569835139019054e-05, |
|
"log_odds_chosen": 0.22792398929595947, |
|
"log_odds_ratio": -0.6663814783096313, |
|
"logps/chosen": -1.1594752073287964, |
|
"logps/rejected": -1.320555329322815, |
|
"loss": 24.2124, |
|
"nll_loss": 1.5132750272750854, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.05797375366091728, |
|
"rewards/margins": 0.00805400125682354, |
|
"rewards/rejected": -0.06602776050567627, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.862533692722372, |
|
"grad_norm": 70.5, |
|
"learning_rate": 4.476939342344246e-05, |
|
"log_odds_chosen": 0.21846911311149597, |
|
"log_odds_ratio": -0.6512280702590942, |
|
"logps/chosen": -1.0879645347595215, |
|
"logps/rejected": -1.2357169389724731, |
|
"loss": 23.5682, |
|
"nll_loss": 1.4730117321014404, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.054398227483034134, |
|
"rewards/margins": 0.007387618534266949, |
|
"rewards/rejected": -0.06178584694862366, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9164420485175202, |
|
"grad_norm": 38.0, |
|
"learning_rate": 4.376115164144157e-05, |
|
"log_odds_chosen": 0.15174248814582825, |
|
"log_odds_ratio": -0.6834980845451355, |
|
"logps/chosen": -1.0666790008544922, |
|
"logps/rejected": -1.1634466648101807, |
|
"loss": 22.9808, |
|
"nll_loss": 1.4362987279891968, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.05333394929766655, |
|
"rewards/margins": 0.004838378168642521, |
|
"rewards/rejected": -0.058172326534986496, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9703504043126685, |
|
"grad_norm": 64.5, |
|
"learning_rate": 4.267766952966369e-05, |
|
"log_odds_chosen": 0.1273517906665802, |
|
"log_odds_ratio": -0.6930577158927917, |
|
"logps/chosen": -1.040971279144287, |
|
"logps/rejected": -1.1280105113983154, |
|
"loss": 22.4486, |
|
"nll_loss": 1.4030355215072632, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.05204857140779495, |
|
"rewards/margins": 0.004351964220404625, |
|
"rewards/rejected": -0.05640053004026413, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0215633423180592, |
|
"grad_norm": 25.875, |
|
"learning_rate": 4.1523292319838524e-05, |
|
"log_odds_chosen": 0.21998043358325958, |
|
"log_odds_ratio": -0.6607629656791687, |
|
"logps/chosen": -0.9751634001731873, |
|
"logps/rejected": -1.1191428899765015, |
|
"loss": 20.5047, |
|
"nll_loss": 1.348992943763733, |
|
"rewards/accuracies": 0.5953947305679321, |
|
"rewards/chosen": -0.04875817149877548, |
|
"rewards/margins": 0.007198969833552837, |
|
"rewards/rejected": -0.0559571348130703, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.0754716981132075, |
|
"grad_norm": 39.0, |
|
"learning_rate": 4.030264956369157e-05, |
|
"log_odds_chosen": 0.4434036314487457, |
|
"log_odds_ratio": -0.6344673037528992, |
|
"logps/chosen": -0.9448977708816528, |
|
"logps/rejected": -1.1891727447509766, |
|
"loss": 21.1138, |
|
"nll_loss": 1.319615125656128, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04724489524960518, |
|
"rewards/margins": 0.012213751673698425, |
|
"rewards/rejected": -0.059458643198013306, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1293800539083558, |
|
"grad_norm": 29.625, |
|
"learning_rate": 3.902063656644012e-05, |
|
"log_odds_chosen": 0.5363696813583374, |
|
"log_odds_ratio": -0.5325912833213806, |
|
"logps/chosen": -0.8145742416381836, |
|
"logps/rejected": -1.1247040033340454, |
|
"loss": 18.4662, |
|
"nll_loss": 1.1541385650634766, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.04072871431708336, |
|
"rewards/margins": 0.015506483614444733, |
|
"rewards/rejected": -0.05623519420623779, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.1832884097035041, |
|
"grad_norm": 25.625, |
|
"learning_rate": 3.768239475450269e-05, |
|
"log_odds_chosen": 0.5174719095230103, |
|
"log_odds_ratio": -0.5490429401397705, |
|
"logps/chosen": -0.8520506620407104, |
|
"logps/rejected": -1.1713144779205322, |
|
"loss": 19.2575, |
|
"nll_loss": 1.20359206199646, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0426025353372097, |
|
"rewards/margins": 0.01596318557858467, |
|
"rewards/rejected": -0.05856572464108467, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2371967654986522, |
|
"grad_norm": 44.0, |
|
"learning_rate": 3.629329105615617e-05, |
|
"log_odds_chosen": 0.5610077977180481, |
|
"log_odds_ratio": -0.5220470428466797, |
|
"logps/chosen": -0.8756101727485657, |
|
"logps/rejected": -1.2160618305206299, |
|
"loss": 19.0513, |
|
"nll_loss": 1.1907049417495728, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.04378051310777664, |
|
"rewards/margins": 0.01702258363366127, |
|
"rewards/rejected": -0.060803093016147614, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.2911051212938005, |
|
"grad_norm": 39.5, |
|
"learning_rate": 3.4858896377832966e-05, |
|
"log_odds_chosen": 0.5247588157653809, |
|
"log_odds_ratio": -0.5250480771064758, |
|
"logps/chosen": -0.8190716505050659, |
|
"logps/rejected": -1.128647804260254, |
|
"loss": 17.8761, |
|
"nll_loss": 1.117258906364441, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.040953584015369415, |
|
"rewards/margins": 0.015478810295462608, |
|
"rewards/rejected": -0.05643239617347717, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3450134770889488, |
|
"grad_norm": 45.25, |
|
"learning_rate": 3.338496326237743e-05, |
|
"log_odds_chosen": 0.4806355834007263, |
|
"log_odds_ratio": -0.550317108631134, |
|
"logps/chosen": -0.8094690442085266, |
|
"logps/rejected": -1.100089430809021, |
|
"loss": 18.3711, |
|
"nll_loss": 1.148194432258606, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.04047344997525215, |
|
"rewards/margins": 0.01453101821243763, |
|
"rewards/rejected": -0.05500447005033493, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.398921832884097, |
|
"grad_norm": 35.0, |
|
"learning_rate": 3.187740281886195e-05, |
|
"log_odds_chosen": 0.6415280699729919, |
|
"log_odds_ratio": -0.48781052231788635, |
|
"logps/chosen": -0.8225423693656921, |
|
"logps/rejected": -1.2189260721206665, |
|
"loss": 18.4471, |
|
"nll_loss": 1.152944564819336, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.04112711548805237, |
|
"rewards/margins": 0.01981918141245842, |
|
"rewards/rejected": -0.06094629690051079, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4528301886792452, |
|
"grad_norm": 21.5, |
|
"learning_rate": 3.034226101648377e-05, |
|
"log_odds_chosen": 0.6234865784645081, |
|
"log_odds_ratio": -0.4981662333011627, |
|
"logps/chosen": -0.8078993558883667, |
|
"logps/rejected": -1.180347204208374, |
|
"loss": 18.1349, |
|
"nll_loss": 1.1334304809570312, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.040394969284534454, |
|
"rewards/margins": 0.01862238720059395, |
|
"rewards/rejected": -0.059017352759838104, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.5067385444743935, |
|
"grad_norm": 54.0, |
|
"learning_rate": 2.878569443761442e-05, |
|
"log_odds_chosen": 0.5453085899353027, |
|
"log_odds_ratio": -0.520926833152771, |
|
"logps/chosen": -0.8231021165847778, |
|
"logps/rejected": -1.1491215229034424, |
|
"loss": 18.0104, |
|
"nll_loss": 1.1256530284881592, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.04115510731935501, |
|
"rewards/margins": 0.016300970688462257, |
|
"rewards/rejected": -0.057456083595752716, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5606469002695418, |
|
"grad_norm": 35.5, |
|
"learning_rate": 2.7213945587242508e-05, |
|
"log_odds_chosen": 0.4767599105834961, |
|
"log_odds_ratio": -0.5450612902641296, |
|
"logps/chosen": -0.8506708145141602, |
|
"logps/rejected": -1.1368257999420166, |
|
"loss": 19.6345, |
|
"nll_loss": 1.2271578311920166, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.04253353923559189, |
|
"rewards/margins": 0.014307747595012188, |
|
"rewards/rejected": -0.05684129148721695, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.61455525606469, |
|
"grad_norm": 24.75, |
|
"learning_rate": 2.5633317857829697e-05, |
|
"log_odds_chosen": 0.5109966993331909, |
|
"log_odds_ratio": -0.5330369472503662, |
|
"logps/chosen": -0.7959780693054199, |
|
"logps/rejected": -1.100124716758728, |
|
"loss": 17.9396, |
|
"nll_loss": 1.1212230920791626, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03979891166090965, |
|
"rewards/margins": 0.01520733255892992, |
|
"rewards/rejected": -0.0550062358379364, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6684636118598384, |
|
"grad_norm": 28.125, |
|
"learning_rate": 2.4050150249981522e-05, |
|
"log_odds_chosen": 0.5154935717582703, |
|
"log_odds_ratio": -0.5551471710205078, |
|
"logps/chosen": -0.7858830094337463, |
|
"logps/rejected": -1.0894118547439575, |
|
"loss": 17.763, |
|
"nll_loss": 1.1101869344711304, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.039294153451919556, |
|
"rewards/margins": 0.015176435932517052, |
|
"rewards/rejected": -0.05447059124708176, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.7223719676549867, |
|
"grad_norm": 25.625, |
|
"learning_rate": 2.24707919503142e-05, |
|
"log_odds_chosen": 0.612939715385437, |
|
"log_odds_ratio": -0.5060396790504456, |
|
"logps/chosen": -0.8042556643486023, |
|
"logps/rejected": -1.1593209505081177, |
|
"loss": 17.9033, |
|
"nll_loss": 1.1189591884613037, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.04021278768777847, |
|
"rewards/margins": 0.01775326207280159, |
|
"rewards/rejected": -0.057966046035289764, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7762803234501348, |
|
"grad_norm": 30.5, |
|
"learning_rate": 2.0901576868471125e-05, |
|
"log_odds_chosen": 0.7125197649002075, |
|
"log_odds_ratio": -0.4848386347293854, |
|
"logps/chosen": -0.7776973843574524, |
|
"logps/rejected": -1.216399908065796, |
|
"loss": 17.609, |
|
"nll_loss": 1.1005606651306152, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.03888486698269844, |
|
"rewards/margins": 0.02193513885140419, |
|
"rewards/rejected": -0.06082000210881233, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.830188679245283, |
|
"grad_norm": 25.5, |
|
"learning_rate": 1.934879823540663e-05, |
|
"log_odds_chosen": 0.6370295286178589, |
|
"log_odds_ratio": -0.5132786631584167, |
|
"logps/chosen": -0.7767125368118286, |
|
"logps/rejected": -1.1403437852859497, |
|
"loss": 18.0793, |
|
"nll_loss": 1.1299545764923096, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03883562982082367, |
|
"rewards/margins": 0.018181564286351204, |
|
"rewards/rejected": -0.05701719596982002, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8840970350404311, |
|
"grad_norm": 24.125, |
|
"learning_rate": 1.7818683364808884e-05, |
|
"log_odds_chosen": 0.5283645391464233, |
|
"log_odds_ratio": -0.5327858328819275, |
|
"logps/chosen": -0.8068645596504211, |
|
"logps/rejected": -1.107076644897461, |
|
"loss": 17.5256, |
|
"nll_loss": 1.0953474044799805, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.04034322127699852, |
|
"rewards/margins": 0.015010610222816467, |
|
"rewards/rejected": -0.055353838950395584, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.9380053908355794, |
|
"grad_norm": 30.375, |
|
"learning_rate": 1.6317368678879495e-05, |
|
"log_odds_chosen": 0.4843871593475342, |
|
"log_odds_ratio": -0.5502706170082092, |
|
"logps/chosen": -0.8421553373336792, |
|
"logps/rejected": -1.1288360357284546, |
|
"loss": 18.9738, |
|
"nll_loss": 1.1858609914779663, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.04210776835680008, |
|
"rewards/margins": 0.014334036037325859, |
|
"rewards/rejected": -0.05644180253148079, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9919137466307277, |
|
"grad_norm": 28.375, |
|
"learning_rate": 1.4850875098627326e-05, |
|
"log_odds_chosen": 0.5836633443832397, |
|
"log_odds_ratio": -0.5226461291313171, |
|
"logps/chosen": -0.799677312374115, |
|
"logps/rejected": -1.1371490955352783, |
|
"loss": 18.0322, |
|
"nll_loss": 1.1270129680633545, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.03998386859893799, |
|
"rewards/margins": 0.01687358133494854, |
|
"rewards/rejected": -0.056857455521821976, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.0431266846361185, |
|
"grad_norm": 21.125, |
|
"learning_rate": 1.3425083897371981e-05, |
|
"log_odds_chosen": 0.8875333666801453, |
|
"log_odds_ratio": -0.4172805845737457, |
|
"logps/chosen": -0.6325610876083374, |
|
"logps/rejected": -1.107661247253418, |
|
"loss": 13.71, |
|
"nll_loss": 0.901972234249115, |
|
"rewards/accuracies": 0.8585526347160339, |
|
"rewards/chosen": -0.03162805363535881, |
|
"rewards/margins": 0.023755012080073357, |
|
"rewards/rejected": -0.055383067578077316, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0970350404312668, |
|
"grad_norm": 29.5, |
|
"learning_rate": 1.204571311429496e-05, |
|
"log_odds_chosen": 1.1873928308486938, |
|
"log_odds_ratio": -0.3502134680747986, |
|
"logps/chosen": -0.6132036447525024, |
|
"logps/rejected": -1.2639284133911133, |
|
"loss": 14.8674, |
|
"nll_loss": 0.9292107820510864, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.030660182237625122, |
|
"rewards/margins": 0.032536230981349945, |
|
"rewards/rejected": -0.06319641321897507, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.150943396226415, |
|
"grad_norm": 27.25, |
|
"learning_rate": 1.0718294622630188e-05, |
|
"log_odds_chosen": 1.1184864044189453, |
|
"log_odds_ratio": -0.3575947880744934, |
|
"logps/chosen": -0.6175593137741089, |
|
"logps/rejected": -1.2147563695907593, |
|
"loss": 13.9253, |
|
"nll_loss": 0.8703301548957825, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.030877966433763504, |
|
"rewards/margins": 0.02985985204577446, |
|
"rewards/rejected": -0.06073781102895737, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2048517520215634, |
|
"grad_norm": 25.125, |
|
"learning_rate": 9.448151944460657e-06, |
|
"log_odds_chosen": 1.1300182342529297, |
|
"log_odds_ratio": -0.3609935939311981, |
|
"logps/chosen": -0.5816246271133423, |
|
"logps/rejected": -1.1945774555206299, |
|
"loss": 13.5606, |
|
"nll_loss": 0.8475350141525269, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.029081230983138084, |
|
"rewards/margins": 0.03064764477312565, |
|
"rewards/rejected": -0.059728872030973434, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.2587601078167117, |
|
"grad_norm": 30.25, |
|
"learning_rate": 8.240378901093034e-06, |
|
"log_odds_chosen": 1.1773656606674194, |
|
"log_odds_ratio": -0.36282655596733093, |
|
"logps/chosen": -0.6218483448028564, |
|
"logps/rejected": -1.2632155418395996, |
|
"loss": 14.1748, |
|
"nll_loss": 0.8859266042709351, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.03109242022037506, |
|
"rewards/margins": 0.03206836059689522, |
|
"rewards/rejected": -0.06316077709197998, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.31266846361186, |
|
"grad_norm": 27.75, |
|
"learning_rate": 7.099819184631928e-06, |
|
"log_odds_chosen": 1.1673331260681152, |
|
"log_odds_ratio": -0.35059279203414917, |
|
"logps/chosen": -0.5780085325241089, |
|
"logps/rejected": -1.1937094926834106, |
|
"loss": 13.6113, |
|
"nll_loss": 0.8507078289985657, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.028900425881147385, |
|
"rewards/margins": 0.030785048380494118, |
|
"rewards/rejected": -0.05968547612428665, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.3665768194070083, |
|
"grad_norm": 21.75, |
|
"learning_rate": 6.031046932680229e-06, |
|
"log_odds_chosen": 1.1187890768051147, |
|
"log_odds_ratio": -0.36328989267349243, |
|
"logps/chosen": -0.5981144309043884, |
|
"logps/rejected": -1.2099401950836182, |
|
"loss": 13.4124, |
|
"nll_loss": 0.838273823261261, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.029905717819929123, |
|
"rewards/margins": 0.030591288581490517, |
|
"rewards/rejected": -0.06049700453877449, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.420485175202156, |
|
"grad_norm": 23.25, |
|
"learning_rate": 5.038348384069663e-06, |
|
"log_odds_chosen": 1.0922346115112305, |
|
"log_odds_ratio": -0.36317089200019836, |
|
"logps/chosen": -0.6168917417526245, |
|
"logps/rejected": -1.194858193397522, |
|
"loss": 13.8242, |
|
"nll_loss": 0.8640131950378418, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.030844587832689285, |
|
"rewards/margins": 0.028898322954773903, |
|
"rewards/rejected": -0.059742916375398636, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.4743935309973044, |
|
"grad_norm": 24.875, |
|
"learning_rate": 4.125704689189819e-06, |
|
"log_odds_chosen": 1.099705696105957, |
|
"log_odds_ratio": -0.3621538579463959, |
|
"logps/chosen": -0.5992009043693542, |
|
"logps/rejected": -1.1859813928604126, |
|
"loss": 14.0204, |
|
"nll_loss": 0.8762725591659546, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.029960045590996742, |
|
"rewards/margins": 0.02933902107179165, |
|
"rewards/rejected": -0.05929907411336899, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.5283018867924527, |
|
"grad_norm": 30.75, |
|
"learning_rate": 3.296775943853789e-06, |
|
"log_odds_chosen": 1.1012507677078247, |
|
"log_odds_ratio": -0.37290987372398376, |
|
"logps/chosen": -0.600740909576416, |
|
"logps/rejected": -1.1732831001281738, |
|
"loss": 13.7312, |
|
"nll_loss": 0.8582011461257935, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.0300370454788208, |
|
"rewards/margins": 0.02862711250782013, |
|
"rewards/rejected": -0.05866416543722153, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.582210242587601, |
|
"grad_norm": 21.875, |
|
"learning_rate": 2.5548865107314607e-06, |
|
"log_odds_chosen": 1.1265591382980347, |
|
"log_odds_ratio": -0.36651021242141724, |
|
"logps/chosen": -0.6239336729049683, |
|
"logps/rejected": -1.243729591369629, |
|
"loss": 14.1878, |
|
"nll_loss": 0.8867388963699341, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.031196683645248413, |
|
"rewards/margins": 0.030989795923233032, |
|
"rewards/rejected": -0.06218648701906204, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.6361185983827493, |
|
"grad_norm": 24.0, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"log_odds_chosen": 1.1169674396514893, |
|
"log_odds_ratio": -0.3630684018135071, |
|
"logps/chosen": -0.5876578092575073, |
|
"logps/rejected": -1.1631311178207397, |
|
"loss": 14.3677, |
|
"nll_loss": 0.8979824185371399, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.029382890090346336, |
|
"rewards/margins": 0.02877367101609707, |
|
"rewards/rejected": -0.058156561106443405, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.6900269541778976, |
|
"grad_norm": 30.625, |
|
"learning_rate": 1.3437657732040782e-06, |
|
"log_odds_chosen": 1.2451064586639404, |
|
"log_odds_ratio": -0.33962780237197876, |
|
"logps/chosen": -0.559921145439148, |
|
"logps/rejected": -1.2268016338348389, |
|
"loss": 13.2893, |
|
"nll_loss": 0.83058100938797, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -0.027996059507131577, |
|
"rewards/margins": 0.033344022929668427, |
|
"rewards/rejected": -0.061340074986219406, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.743935309973046, |
|
"grad_norm": 21.625, |
|
"learning_rate": 8.793915866046359e-07, |
|
"log_odds_chosen": 1.1345646381378174, |
|
"log_odds_ratio": -0.36994147300720215, |
|
"logps/chosen": -0.5653634071350098, |
|
"logps/rejected": -1.1617649793624878, |
|
"loss": 14.1254, |
|
"nll_loss": 0.8828363418579102, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.02826816774904728, |
|
"rewards/margins": 0.02982008457183838, |
|
"rewards/rejected": -0.05808825045824051, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.797843665768194, |
|
"grad_norm": 20.0, |
|
"learning_rate": 5.117514686876379e-07, |
|
"log_odds_chosen": 0.9944526553153992, |
|
"log_odds_ratio": -0.3954170346260071, |
|
"logps/chosen": -0.6244685649871826, |
|
"logps/rejected": -1.1500699520111084, |
|
"loss": 14.1807, |
|
"nll_loss": 0.8862916231155396, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.03122343122959137, |
|
"rewards/margins": 0.02628006599843502, |
|
"rewards/rejected": -0.05750349164009094, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.8517520215633425, |
|
"grad_norm": 25.375, |
|
"learning_rate": 2.423198152812306e-07, |
|
"log_odds_chosen": 0.9933083653450012, |
|
"log_odds_ratio": -0.3899889588356018, |
|
"logps/chosen": -0.6133357286453247, |
|
"logps/rejected": -1.120845913887024, |
|
"loss": 13.8159, |
|
"nll_loss": 0.8634947538375854, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.030666787177324295, |
|
"rewards/margins": 0.02537550963461399, |
|
"rewards/rejected": -0.05604229494929314, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.9056603773584904, |
|
"grad_norm": 25.0, |
|
"learning_rate": 7.217716380881479e-08, |
|
"log_odds_chosen": 1.142899751663208, |
|
"log_odds_ratio": -0.3680952191352844, |
|
"logps/chosen": -0.5903482437133789, |
|
"logps/rejected": -1.1948912143707275, |
|
"loss": 13.7773, |
|
"nll_loss": 0.8610836863517761, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.029517415910959244, |
|
"rewards/margins": 0.03022714890539646, |
|
"rewards/rejected": -0.05974455922842026, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.9595687331536387, |
|
"grad_norm": 25.75, |
|
"learning_rate": 2.0058598667854756e-09, |
|
"log_odds_chosen": 1.1750845909118652, |
|
"log_odds_ratio": -0.3487832844257355, |
|
"logps/chosen": -0.562160313129425, |
|
"logps/rejected": -1.1947548389434814, |
|
"loss": 13.3099, |
|
"nll_loss": 0.8318702578544617, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.028108015656471252, |
|
"rewards/margins": 0.03162972629070282, |
|
"rewards/rejected": -0.05973774194717407, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.9703504043126685, |
|
"step": 276, |
|
"total_flos": 0.0, |
|
"train_loss": 28.78522514605868, |
|
"train_runtime": 3021.362, |
|
"train_samples_per_second": 5.894, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 276, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|