silviasapora's picture
Model save
2c93a43 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9703504043126685,
"eval_steps": 500,
"global_step": 276,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05390835579514825,
"grad_norm": 502.0,
"learning_rate": 8.92857142857143e-06,
"log_odds_chosen": 5.912805557250977,
"log_odds_ratio": -7.710684299468994,
"logps/chosen": -24.25197982788086,
"logps/rejected": -30.16664695739746,
"loss": 186.4347,
"nll_loss": 11.652168273925781,
"rewards/accuracies": 0.484375,
"rewards/chosen": -1.2125989198684692,
"rewards/margins": 0.29573339223861694,
"rewards/rejected": -1.5083322525024414,
"step": 5
},
{
"epoch": 0.1078167115902965,
"grad_norm": 164.0,
"learning_rate": 1.785714285714286e-05,
"log_odds_chosen": 4.08050537109375,
"log_odds_ratio": -5.399485111236572,
"logps/chosen": -20.047760009765625,
"logps/rejected": -24.1300106048584,
"loss": 155.9258,
"nll_loss": 9.745362281799316,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -1.0023880004882812,
"rewards/margins": 0.20411260426044464,
"rewards/rejected": -1.2065006494522095,
"step": 10
},
{
"epoch": 0.16172506738544473,
"grad_norm": 304.0,
"learning_rate": 2.6785714285714288e-05,
"log_odds_chosen": 3.733511447906494,
"log_odds_ratio": -6.636049747467041,
"logps/chosen": -21.219013214111328,
"logps/rejected": -24.95370864868164,
"loss": 155.6621,
"nll_loss": 9.728882789611816,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -1.060950517654419,
"rewards/margins": 0.18673481047153473,
"rewards/rejected": -1.2476855516433716,
"step": 15
},
{
"epoch": 0.215633423180593,
"grad_norm": 540.0,
"learning_rate": 3.571428571428572e-05,
"log_odds_chosen": 0.06586956977844238,
"log_odds_ratio": -5.591992378234863,
"logps/chosen": -15.408183097839355,
"logps/rejected": -15.473505020141602,
"loss": 106.968,
"nll_loss": 6.6854963302612305,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.7704092264175415,
"rewards/margins": 0.0032660537399351597,
"rewards/rejected": -0.7736751437187195,
"step": 20
},
{
"epoch": 0.2695417789757412,
"grad_norm": 114.5,
"learning_rate": 4.464285714285715e-05,
"log_odds_chosen": 0.1850651204586029,
"log_odds_ratio": -0.870222270488739,
"logps/chosen": -2.379927158355713,
"logps/rejected": -2.544480562210083,
"loss": 38.8752,
"nll_loss": 2.429699420928955,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.11899634450674057,
"rewards/margins": 0.00822767335921526,
"rewards/rejected": -0.12722402811050415,
"step": 25
},
{
"epoch": 0.32345013477088946,
"grad_norm": 119.0,
"learning_rate": 4.999197688241076e-05,
"log_odds_chosen": 0.2413506954908371,
"log_odds_ratio": -0.7524750232696533,
"logps/chosen": -1.8451862335205078,
"logps/rejected": -2.054591655731201,
"loss": 33.8684,
"nll_loss": 2.1167733669281006,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.09225932508707047,
"rewards/margins": 0.010470272973179817,
"rewards/rejected": -0.10272959619760513,
"step": 30
},
{
"epoch": 0.37735849056603776,
"grad_norm": 65.0,
"learning_rate": 4.9901775939413026e-05,
"log_odds_chosen": 0.25174349546432495,
"log_odds_ratio": -0.7253037691116333,
"logps/chosen": -1.6166225671768188,
"logps/rejected": -1.8298925161361694,
"loss": 29.1245,
"nll_loss": 1.820279836654663,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.0808311253786087,
"rewards/margins": 0.010663499124348164,
"rewards/rejected": -0.091494619846344,
"step": 35
},
{
"epoch": 0.431266846361186,
"grad_norm": 101.5,
"learning_rate": 4.971170810820279e-05,
"log_odds_chosen": 0.2364540547132492,
"log_odds_ratio": -0.7084470391273499,
"logps/chosen": -1.5713794231414795,
"logps/rejected": -1.7505134344100952,
"loss": 29.3433,
"nll_loss": 1.8339534997940063,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.07856898009777069,
"rewards/margins": 0.008956688456237316,
"rewards/rejected": -0.08752566576004028,
"step": 40
},
{
"epoch": 0.48517520215633425,
"grad_norm": 43.75,
"learning_rate": 4.942253564296218e-05,
"log_odds_chosen": 0.18266446888446808,
"log_odds_ratio": -0.7139922976493835,
"logps/chosen": -1.3910434246063232,
"logps/rejected": -1.5377957820892334,
"loss": 27.6051,
"nll_loss": 1.725320816040039,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.06955216825008392,
"rewards/margins": 0.007337613496929407,
"rewards/rejected": -0.07688979059457779,
"step": 45
},
{
"epoch": 0.5390835579514824,
"grad_norm": 35.75,
"learning_rate": 4.9035418250305314e-05,
"log_odds_chosen": 0.15111494064331055,
"log_odds_ratio": -0.7071037292480469,
"logps/chosen": -1.3401615619659424,
"logps/rejected": -1.4416334629058838,
"loss": 26.5156,
"nll_loss": 1.6572233438491821,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.06700807809829712,
"rewards/margins": 0.005073595326393843,
"rewards/rejected": -0.07208167761564255,
"step": 50
},
{
"epoch": 0.5929919137466307,
"grad_norm": 117.0,
"learning_rate": 4.8551908438353374e-05,
"log_odds_chosen": 0.20908907055854797,
"log_odds_ratio": -0.6856271028518677,
"logps/chosen": -1.2825366258621216,
"logps/rejected": -1.4346181154251099,
"loss": 25.8045,
"nll_loss": 1.6127817630767822,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.06412683427333832,
"rewards/margins": 0.007604071404784918,
"rewards/rejected": -0.07173089683055878,
"step": 55
},
{
"epoch": 0.6469002695417789,
"grad_norm": 55.25,
"learning_rate": 4.7973945290505766e-05,
"log_odds_chosen": 0.13437321782112122,
"log_odds_ratio": -0.7068942785263062,
"logps/chosen": -1.2431588172912598,
"logps/rejected": -1.3494141101837158,
"loss": 24.9189,
"nll_loss": 1.5574296712875366,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.06215794011950493,
"rewards/margins": 0.005312758963555098,
"rewards/rejected": -0.06747071444988251,
"step": 60
},
{
"epoch": 0.7008086253369272,
"grad_norm": 115.5,
"learning_rate": 4.73038466888773e-05,
"log_odds_chosen": 0.11516331136226654,
"log_odds_ratio": -0.7177757024765015,
"logps/chosen": -1.2244114875793457,
"logps/rejected": -1.3131043910980225,
"loss": 24.9998,
"nll_loss": 1.5624865293502808,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.061220575124025345,
"rewards/margins": 0.004434647969901562,
"rewards/rejected": -0.06565522402524948,
"step": 65
},
{
"epoch": 0.7547169811320755,
"grad_norm": 85.5,
"learning_rate": 4.654430001858874e-05,
"log_odds_chosen": 0.13228605687618256,
"log_odds_ratio": -0.7034366726875305,
"logps/chosen": -1.1982498168945312,
"logps/rejected": -1.2874435186386108,
"loss": 24.5123,
"nll_loss": 1.532017469406128,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.05991249158978462,
"rewards/margins": 0.004459693096578121,
"rewards/rejected": -0.06437218189239502,
"step": 70
},
{
"epoch": 0.8086253369272237,
"grad_norm": 68.0,
"learning_rate": 4.569835139019054e-05,
"log_odds_chosen": 0.22792398929595947,
"log_odds_ratio": -0.6663814783096313,
"logps/chosen": -1.1594752073287964,
"logps/rejected": -1.320555329322815,
"loss": 24.2124,
"nll_loss": 1.5132750272750854,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.05797375366091728,
"rewards/margins": 0.00805400125682354,
"rewards/rejected": -0.06602776050567627,
"step": 75
},
{
"epoch": 0.862533692722372,
"grad_norm": 70.5,
"learning_rate": 4.476939342344246e-05,
"log_odds_chosen": 0.21846911311149597,
"log_odds_ratio": -0.6512280702590942,
"logps/chosen": -1.0879645347595215,
"logps/rejected": -1.2357169389724731,
"loss": 23.5682,
"nll_loss": 1.4730117321014404,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.054398227483034134,
"rewards/margins": 0.007387618534266949,
"rewards/rejected": -0.06178584694862366,
"step": 80
},
{
"epoch": 0.9164420485175202,
"grad_norm": 38.0,
"learning_rate": 4.376115164144157e-05,
"log_odds_chosen": 0.15174248814582825,
"log_odds_ratio": -0.6834980845451355,
"logps/chosen": -1.0666790008544922,
"logps/rejected": -1.1634466648101807,
"loss": 22.9808,
"nll_loss": 1.4362987279891968,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.05333394929766655,
"rewards/margins": 0.004838378168642521,
"rewards/rejected": -0.058172326534986496,
"step": 85
},
{
"epoch": 0.9703504043126685,
"grad_norm": 64.5,
"learning_rate": 4.267766952966369e-05,
"log_odds_chosen": 0.1273517906665802,
"log_odds_ratio": -0.6930577158927917,
"logps/chosen": -1.040971279144287,
"logps/rejected": -1.1280105113983154,
"loss": 22.4486,
"nll_loss": 1.4030355215072632,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.05204857140779495,
"rewards/margins": 0.004351964220404625,
"rewards/rejected": -0.05640053004026413,
"step": 90
},
{
"epoch": 1.0215633423180592,
"grad_norm": 25.875,
"learning_rate": 4.1523292319838524e-05,
"log_odds_chosen": 0.21998043358325958,
"log_odds_ratio": -0.6607629656791687,
"logps/chosen": -0.9751634001731873,
"logps/rejected": -1.1191428899765015,
"loss": 20.5047,
"nll_loss": 1.348992943763733,
"rewards/accuracies": 0.5953947305679321,
"rewards/chosen": -0.04875817149877548,
"rewards/margins": 0.007198969833552837,
"rewards/rejected": -0.0559571348130703,
"step": 95
},
{
"epoch": 1.0754716981132075,
"grad_norm": 39.0,
"learning_rate": 4.030264956369157e-05,
"log_odds_chosen": 0.4434036314487457,
"log_odds_ratio": -0.6344673037528992,
"logps/chosen": -0.9448977708816528,
"logps/rejected": -1.1891727447509766,
"loss": 21.1138,
"nll_loss": 1.319615125656128,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.04724489524960518,
"rewards/margins": 0.012213751673698425,
"rewards/rejected": -0.059458643198013306,
"step": 100
},
{
"epoch": 1.1293800539083558,
"grad_norm": 29.625,
"learning_rate": 3.902063656644012e-05,
"log_odds_chosen": 0.5363696813583374,
"log_odds_ratio": -0.5325912833213806,
"logps/chosen": -0.8145742416381836,
"logps/rejected": -1.1247040033340454,
"loss": 18.4662,
"nll_loss": 1.1541385650634766,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.04072871431708336,
"rewards/margins": 0.015506483614444733,
"rewards/rejected": -0.05623519420623779,
"step": 105
},
{
"epoch": 1.1832884097035041,
"grad_norm": 25.625,
"learning_rate": 3.768239475450269e-05,
"log_odds_chosen": 0.5174719095230103,
"log_odds_ratio": -0.5490429401397705,
"logps/chosen": -0.8520506620407104,
"logps/rejected": -1.1713144779205322,
"loss": 19.2575,
"nll_loss": 1.20359206199646,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.0426025353372097,
"rewards/margins": 0.01596318557858467,
"rewards/rejected": -0.05856572464108467,
"step": 110
},
{
"epoch": 1.2371967654986522,
"grad_norm": 44.0,
"learning_rate": 3.629329105615617e-05,
"log_odds_chosen": 0.5610077977180481,
"log_odds_ratio": -0.5220470428466797,
"logps/chosen": -0.8756101727485657,
"logps/rejected": -1.2160618305206299,
"loss": 19.0513,
"nll_loss": 1.1907049417495728,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.04378051310777664,
"rewards/margins": 0.01702258363366127,
"rewards/rejected": -0.060803093016147614,
"step": 115
},
{
"epoch": 1.2911051212938005,
"grad_norm": 39.5,
"learning_rate": 3.4858896377832966e-05,
"log_odds_chosen": 0.5247588157653809,
"log_odds_ratio": -0.5250480771064758,
"logps/chosen": -0.8190716505050659,
"logps/rejected": -1.128647804260254,
"loss": 17.8761,
"nll_loss": 1.117258906364441,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.040953584015369415,
"rewards/margins": 0.015478810295462608,
"rewards/rejected": -0.05643239617347717,
"step": 120
},
{
"epoch": 1.3450134770889488,
"grad_norm": 45.25,
"learning_rate": 3.338496326237743e-05,
"log_odds_chosen": 0.4806355834007263,
"log_odds_ratio": -0.550317108631134,
"logps/chosen": -0.8094690442085266,
"logps/rejected": -1.100089430809021,
"loss": 18.3711,
"nll_loss": 1.148194432258606,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.04047344997525215,
"rewards/margins": 0.01453101821243763,
"rewards/rejected": -0.05500447005033493,
"step": 125
},
{
"epoch": 1.398921832884097,
"grad_norm": 35.0,
"learning_rate": 3.187740281886195e-05,
"log_odds_chosen": 0.6415280699729919,
"log_odds_ratio": -0.48781052231788635,
"logps/chosen": -0.8225423693656921,
"logps/rejected": -1.2189260721206665,
"loss": 18.4471,
"nll_loss": 1.152944564819336,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.04112711548805237,
"rewards/margins": 0.01981918141245842,
"rewards/rejected": -0.06094629690051079,
"step": 130
},
{
"epoch": 1.4528301886792452,
"grad_norm": 21.5,
"learning_rate": 3.034226101648377e-05,
"log_odds_chosen": 0.6234865784645081,
"log_odds_ratio": -0.4981662333011627,
"logps/chosen": -0.8078993558883667,
"logps/rejected": -1.180347204208374,
"loss": 18.1349,
"nll_loss": 1.1334304809570312,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.040394969284534454,
"rewards/margins": 0.01862238720059395,
"rewards/rejected": -0.059017352759838104,
"step": 135
},
{
"epoch": 1.5067385444743935,
"grad_norm": 54.0,
"learning_rate": 2.878569443761442e-05,
"log_odds_chosen": 0.5453085899353027,
"log_odds_ratio": -0.520926833152771,
"logps/chosen": -0.8231021165847778,
"logps/rejected": -1.1491215229034424,
"loss": 18.0104,
"nll_loss": 1.1256530284881592,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.04115510731935501,
"rewards/margins": 0.016300970688462257,
"rewards/rejected": -0.057456083595752716,
"step": 140
},
{
"epoch": 1.5606469002695418,
"grad_norm": 35.5,
"learning_rate": 2.7213945587242508e-05,
"log_odds_chosen": 0.4767599105834961,
"log_odds_ratio": -0.5450612902641296,
"logps/chosen": -0.8506708145141602,
"logps/rejected": -1.1368257999420166,
"loss": 19.6345,
"nll_loss": 1.2271578311920166,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.04253353923559189,
"rewards/margins": 0.014307747595012188,
"rewards/rejected": -0.05684129148721695,
"step": 145
},
{
"epoch": 1.61455525606469,
"grad_norm": 24.75,
"learning_rate": 2.5633317857829697e-05,
"log_odds_chosen": 0.5109966993331909,
"log_odds_ratio": -0.5330369472503662,
"logps/chosen": -0.7959780693054199,
"logps/rejected": -1.100124716758728,
"loss": 17.9396,
"nll_loss": 1.1212230920791626,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.03979891166090965,
"rewards/margins": 0.01520733255892992,
"rewards/rejected": -0.0550062358379364,
"step": 150
},
{
"epoch": 1.6684636118598384,
"grad_norm": 28.125,
"learning_rate": 2.4050150249981522e-05,
"log_odds_chosen": 0.5154935717582703,
"log_odds_ratio": -0.5551471710205078,
"logps/chosen": -0.7858830094337463,
"logps/rejected": -1.0894118547439575,
"loss": 17.763,
"nll_loss": 1.1101869344711304,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.039294153451919556,
"rewards/margins": 0.015176435932517052,
"rewards/rejected": -0.05447059124708176,
"step": 155
},
{
"epoch": 1.7223719676549867,
"grad_norm": 25.625,
"learning_rate": 2.24707919503142e-05,
"log_odds_chosen": 0.612939715385437,
"log_odds_ratio": -0.5060396790504456,
"logps/chosen": -0.8042556643486023,
"logps/rejected": -1.1593209505081177,
"loss": 17.9033,
"nll_loss": 1.1189591884613037,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.04021278768777847,
"rewards/margins": 0.01775326207280159,
"rewards/rejected": -0.057966046035289764,
"step": 160
},
{
"epoch": 1.7762803234501348,
"grad_norm": 30.5,
"learning_rate": 2.0901576868471125e-05,
"log_odds_chosen": 0.7125197649002075,
"log_odds_ratio": -0.4848386347293854,
"logps/chosen": -0.7776973843574524,
"logps/rejected": -1.216399908065796,
"loss": 17.609,
"nll_loss": 1.1005606651306152,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.03888486698269844,
"rewards/margins": 0.02193513885140419,
"rewards/rejected": -0.06082000210881233,
"step": 165
},
{
"epoch": 1.830188679245283,
"grad_norm": 25.5,
"learning_rate": 1.934879823540663e-05,
"log_odds_chosen": 0.6370295286178589,
"log_odds_ratio": -0.5132786631584167,
"logps/chosen": -0.7767125368118286,
"logps/rejected": -1.1403437852859497,
"loss": 18.0793,
"nll_loss": 1.1299545764923096,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03883562982082367,
"rewards/margins": 0.018181564286351204,
"rewards/rejected": -0.05701719596982002,
"step": 170
},
{
"epoch": 1.8840970350404311,
"grad_norm": 24.125,
"learning_rate": 1.7818683364808884e-05,
"log_odds_chosen": 0.5283645391464233,
"log_odds_ratio": -0.5327858328819275,
"logps/chosen": -0.8068645596504211,
"logps/rejected": -1.107076644897461,
"loss": 17.5256,
"nll_loss": 1.0953474044799805,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.04034322127699852,
"rewards/margins": 0.015010610222816467,
"rewards/rejected": -0.055353838950395584,
"step": 175
},
{
"epoch": 1.9380053908355794,
"grad_norm": 30.375,
"learning_rate": 1.6317368678879495e-05,
"log_odds_chosen": 0.4843871593475342,
"log_odds_ratio": -0.5502706170082092,
"logps/chosen": -0.8421553373336792,
"logps/rejected": -1.1288360357284546,
"loss": 18.9738,
"nll_loss": 1.1858609914779663,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.04210776835680008,
"rewards/margins": 0.014334036037325859,
"rewards/rejected": -0.05644180253148079,
"step": 180
},
{
"epoch": 1.9919137466307277,
"grad_norm": 28.375,
"learning_rate": 1.4850875098627326e-05,
"log_odds_chosen": 0.5836633443832397,
"log_odds_ratio": -0.5226461291313171,
"logps/chosen": -0.799677312374115,
"logps/rejected": -1.1371490955352783,
"loss": 18.0322,
"nll_loss": 1.1270129680633545,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.03998386859893799,
"rewards/margins": 0.01687358133494854,
"rewards/rejected": -0.056857455521821976,
"step": 185
},
{
"epoch": 2.0431266846361185,
"grad_norm": 21.125,
"learning_rate": 1.3425083897371981e-05,
"log_odds_chosen": 0.8875333666801453,
"log_odds_ratio": -0.4172805845737457,
"logps/chosen": -0.6325610876083374,
"logps/rejected": -1.107661247253418,
"loss": 13.71,
"nll_loss": 0.901972234249115,
"rewards/accuracies": 0.8585526347160339,
"rewards/chosen": -0.03162805363535881,
"rewards/margins": 0.023755012080073357,
"rewards/rejected": -0.055383067578077316,
"step": 190
},
{
"epoch": 2.0970350404312668,
"grad_norm": 29.5,
"learning_rate": 1.204571311429496e-05,
"log_odds_chosen": 1.1873928308486938,
"log_odds_ratio": -0.3502134680747986,
"logps/chosen": -0.6132036447525024,
"logps/rejected": -1.2639284133911133,
"loss": 14.8674,
"nll_loss": 0.9292107820510864,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": -0.030660182237625122,
"rewards/margins": 0.032536230981349945,
"rewards/rejected": -0.06319641321897507,
"step": 195
},
{
"epoch": 2.150943396226415,
"grad_norm": 27.25,
"learning_rate": 1.0718294622630188e-05,
"log_odds_chosen": 1.1184864044189453,
"log_odds_ratio": -0.3575947880744934,
"logps/chosen": -0.6175593137741089,
"logps/rejected": -1.2147563695907593,
"loss": 13.9253,
"nll_loss": 0.8703301548957825,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.030877966433763504,
"rewards/margins": 0.02985985204577446,
"rewards/rejected": -0.06073781102895737,
"step": 200
},
{
"epoch": 2.2048517520215634,
"grad_norm": 25.125,
"learning_rate": 9.448151944460657e-06,
"log_odds_chosen": 1.1300182342529297,
"log_odds_ratio": -0.3609935939311981,
"logps/chosen": -0.5816246271133423,
"logps/rejected": -1.1945774555206299,
"loss": 13.5606,
"nll_loss": 0.8475350141525269,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.029081230983138084,
"rewards/margins": 0.03064764477312565,
"rewards/rejected": -0.059728872030973434,
"step": 205
},
{
"epoch": 2.2587601078167117,
"grad_norm": 30.25,
"learning_rate": 8.240378901093034e-06,
"log_odds_chosen": 1.1773656606674194,
"log_odds_ratio": -0.36282655596733093,
"logps/chosen": -0.6218483448028564,
"logps/rejected": -1.2632155418395996,
"loss": 14.1748,
"nll_loss": 0.8859266042709351,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.03109242022037506,
"rewards/margins": 0.03206836059689522,
"rewards/rejected": -0.06316077709197998,
"step": 210
},
{
"epoch": 2.31266846361186,
"grad_norm": 27.75,
"learning_rate": 7.099819184631928e-06,
"log_odds_chosen": 1.1673331260681152,
"log_odds_ratio": -0.35059279203414917,
"logps/chosen": -0.5780085325241089,
"logps/rejected": -1.1937094926834106,
"loss": 13.6113,
"nll_loss": 0.8507078289985657,
"rewards/accuracies": 0.890625,
"rewards/chosen": -0.028900425881147385,
"rewards/margins": 0.030785048380494118,
"rewards/rejected": -0.05968547612428665,
"step": 215
},
{
"epoch": 2.3665768194070083,
"grad_norm": 21.75,
"learning_rate": 6.031046932680229e-06,
"log_odds_chosen": 1.1187890768051147,
"log_odds_ratio": -0.36328989267349243,
"logps/chosen": -0.5981144309043884,
"logps/rejected": -1.2099401950836182,
"loss": 13.4124,
"nll_loss": 0.838273823261261,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.029905717819929123,
"rewards/margins": 0.030591288581490517,
"rewards/rejected": -0.06049700453877449,
"step": 220
},
{
"epoch": 2.420485175202156,
"grad_norm": 23.25,
"learning_rate": 5.038348384069663e-06,
"log_odds_chosen": 1.0922346115112305,
"log_odds_ratio": -0.36317089200019836,
"logps/chosen": -0.6168917417526245,
"logps/rejected": -1.194858193397522,
"loss": 13.8242,
"nll_loss": 0.8640131950378418,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": -0.030844587832689285,
"rewards/margins": 0.028898322954773903,
"rewards/rejected": -0.059742916375398636,
"step": 225
},
{
"epoch": 2.4743935309973044,
"grad_norm": 24.875,
"learning_rate": 4.125704689189819e-06,
"log_odds_chosen": 1.099705696105957,
"log_odds_ratio": -0.3621538579463959,
"logps/chosen": -0.5992009043693542,
"logps/rejected": -1.1859813928604126,
"loss": 14.0204,
"nll_loss": 0.8762725591659546,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.029960045590996742,
"rewards/margins": 0.02933902107179165,
"rewards/rejected": -0.05929907411336899,
"step": 230
},
{
"epoch": 2.5283018867924527,
"grad_norm": 30.75,
"learning_rate": 3.296775943853789e-06,
"log_odds_chosen": 1.1012507677078247,
"log_odds_ratio": -0.37290987372398376,
"logps/chosen": -0.600740909576416,
"logps/rejected": -1.1732831001281738,
"loss": 13.7312,
"nll_loss": 0.8582011461257935,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.0300370454788208,
"rewards/margins": 0.02862711250782013,
"rewards/rejected": -0.05866416543722153,
"step": 235
},
{
"epoch": 2.582210242587601,
"grad_norm": 21.875,
"learning_rate": 2.5548865107314607e-06,
"log_odds_chosen": 1.1265591382980347,
"log_odds_ratio": -0.36651021242141724,
"logps/chosen": -0.6239336729049683,
"logps/rejected": -1.243729591369629,
"loss": 14.1878,
"nll_loss": 0.8867388963699341,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.031196683645248413,
"rewards/margins": 0.030989795923233032,
"rewards/rejected": -0.06218648701906204,
"step": 240
},
{
"epoch": 2.6361185983827493,
"grad_norm": 24.0,
"learning_rate": 1.9030116872178316e-06,
"log_odds_chosen": 1.1169674396514893,
"log_odds_ratio": -0.3630684018135071,
"logps/chosen": -0.5876578092575073,
"logps/rejected": -1.1631311178207397,
"loss": 14.3677,
"nll_loss": 0.8979824185371399,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.029382890090346336,
"rewards/margins": 0.02877367101609707,
"rewards/rejected": -0.058156561106443405,
"step": 245
},
{
"epoch": 2.6900269541778976,
"grad_norm": 30.625,
"learning_rate": 1.3437657732040782e-06,
"log_odds_chosen": 1.2451064586639404,
"log_odds_ratio": -0.33962780237197876,
"logps/chosen": -0.559921145439148,
"logps/rejected": -1.2268016338348389,
"loss": 13.2893,
"nll_loss": 0.83058100938797,
"rewards/accuracies": 0.921875,
"rewards/chosen": -0.027996059507131577,
"rewards/margins": 0.033344022929668427,
"rewards/rejected": -0.061340074986219406,
"step": 250
},
{
"epoch": 2.743935309973046,
"grad_norm": 21.625,
"learning_rate": 8.793915866046359e-07,
"log_odds_chosen": 1.1345646381378174,
"log_odds_ratio": -0.36994147300720215,
"logps/chosen": -0.5653634071350098,
"logps/rejected": -1.1617649793624878,
"loss": 14.1254,
"nll_loss": 0.8828363418579102,
"rewards/accuracies": 0.871874988079071,
"rewards/chosen": -0.02826816774904728,
"rewards/margins": 0.02982008457183838,
"rewards/rejected": -0.05808825045824051,
"step": 255
},
{
"epoch": 2.797843665768194,
"grad_norm": 20.0,
"learning_rate": 5.117514686876379e-07,
"log_odds_chosen": 0.9944526553153992,
"log_odds_ratio": -0.3954170346260071,
"logps/chosen": -0.6244685649871826,
"logps/rejected": -1.1500699520111084,
"loss": 14.1807,
"nll_loss": 0.8862916231155396,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.03122343122959137,
"rewards/margins": 0.02628006599843502,
"rewards/rejected": -0.05750349164009094,
"step": 260
},
{
"epoch": 2.8517520215633425,
"grad_norm": 25.375,
"learning_rate": 2.423198152812306e-07,
"log_odds_chosen": 0.9933083653450012,
"log_odds_ratio": -0.3899889588356018,
"logps/chosen": -0.6133357286453247,
"logps/rejected": -1.120845913887024,
"loss": 13.8159,
"nll_loss": 0.8634947538375854,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.030666787177324295,
"rewards/margins": 0.02537550963461399,
"rewards/rejected": -0.05604229494929314,
"step": 265
},
{
"epoch": 2.9056603773584904,
"grad_norm": 25.0,
"learning_rate": 7.217716380881479e-08,
"log_odds_chosen": 1.142899751663208,
"log_odds_ratio": -0.3680952191352844,
"logps/chosen": -0.5903482437133789,
"logps/rejected": -1.1948912143707275,
"loss": 13.7773,
"nll_loss": 0.8610836863517761,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": -0.029517415910959244,
"rewards/margins": 0.03022714890539646,
"rewards/rejected": -0.05974455922842026,
"step": 270
},
{
"epoch": 2.9595687331536387,
"grad_norm": 25.75,
"learning_rate": 2.0058598667854756e-09,
"log_odds_chosen": 1.1750845909118652,
"log_odds_ratio": -0.3487832844257355,
"logps/chosen": -0.562160313129425,
"logps/rejected": -1.1947548389434814,
"loss": 13.3099,
"nll_loss": 0.8318702578544617,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.028108015656471252,
"rewards/margins": 0.03162972629070282,
"rewards/rejected": -0.05973774194717407,
"step": 275
},
{
"epoch": 2.9703504043126685,
"step": 276,
"total_flos": 0.0,
"train_loss": 28.78522514605868,
"train_runtime": 3021.362,
"train_samples_per_second": 5.894,
"train_steps_per_second": 0.091
}
],
"logging_steps": 5,
"max_steps": 276,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}