silviasapora's picture
Model save
0ad3cf8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9595687331536387,
"eval_steps": 500,
"global_step": 368,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05390835579514825,
"grad_norm": 700.0,
"learning_rate": 6.7567567567567575e-06,
"log_odds_chosen": 5.927034378051758,
"log_odds_ratio": -7.722090721130371,
"logps/chosen": -24.312976837158203,
"logps/rejected": -30.241857528686523,
"loss": 193.2568,
"nll_loss": 11.692580223083496,
"rewards/accuracies": 0.484375,
"rewards/chosen": -1.215648889541626,
"rewards/margins": 0.2964438498020172,
"rewards/rejected": -1.5120928287506104,
"step": 5
},
{
"epoch": 0.1078167115902965,
"grad_norm": 207.0,
"learning_rate": 1.3513513513513515e-05,
"log_odds_chosen": 4.102215766906738,
"log_odds_ratio": -5.448796272277832,
"logps/chosen": -20.24713897705078,
"logps/rejected": -24.35104751586914,
"loss": 161.9082,
"nll_loss": 9.846972465515137,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -1.0123568773269653,
"rewards/margins": 0.20519545674324036,
"rewards/rejected": -1.2175523042678833,
"step": 10
},
{
"epoch": 0.16172506738544473,
"grad_norm": 203.0,
"learning_rate": 2.0270270270270273e-05,
"log_odds_chosen": 3.879714250564575,
"log_odds_ratio": -6.857820987701416,
"logps/chosen": -21.977680206298828,
"logps/rejected": -25.858551025390625,
"loss": 166.6047,
"nll_loss": 10.069997787475586,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -1.098883867263794,
"rewards/margins": 0.19404351711273193,
"rewards/rejected": -1.2929275035858154,
"step": 15
},
{
"epoch": 0.215633423180593,
"grad_norm": 628.0,
"learning_rate": 2.702702702702703e-05,
"log_odds_chosen": 0.7552221417427063,
"log_odds_ratio": -7.44603967666626,
"logps/chosen": -20.903730392456055,
"logps/rejected": -21.659225463867188,
"loss": 142.4453,
"nll_loss": 8.530646324157715,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -1.0451866388320923,
"rewards/margins": 0.03777474910020828,
"rewards/rejected": -1.0829613208770752,
"step": 20
},
{
"epoch": 0.2695417789757412,
"grad_norm": 366.0,
"learning_rate": 3.3783783783783784e-05,
"log_odds_chosen": -0.09596023708581924,
"log_odds_ratio": -2.1412220001220703,
"logps/chosen": -6.6122331619262695,
"logps/rejected": -6.498193264007568,
"loss": 64.159,
"nll_loss": 3.903474807739258,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.3306116461753845,
"rewards/margins": -0.005701972637325525,
"rewards/rejected": -0.32490968704223633,
"step": 25
},
{
"epoch": 0.32345013477088946,
"grad_norm": 114.0,
"learning_rate": 4.0540540540540545e-05,
"log_odds_chosen": 0.23905467987060547,
"log_odds_ratio": -0.7873450517654419,
"logps/chosen": -2.026221752166748,
"logps/rejected": -2.2390971183776855,
"loss": 37.6504,
"nll_loss": 2.314509868621826,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.10131107270717621,
"rewards/margins": 0.010643779300153255,
"rewards/rejected": -0.11195486783981323,
"step": 30
},
{
"epoch": 0.37735849056603776,
"grad_norm": 74.0,
"learning_rate": 4.72972972972973e-05,
"log_odds_chosen": 0.28125709295272827,
"log_odds_ratio": -0.7325613498687744,
"logps/chosen": -1.7039525508880615,
"logps/rejected": -1.9455076456069946,
"loss": 31.1963,
"nll_loss": 1.9137756824493408,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.08519763499498367,
"rewards/margins": 0.012077751569449902,
"rewards/rejected": -0.09727539122104645,
"step": 35
},
{
"epoch": 0.431266846361186,
"grad_norm": 153.0,
"learning_rate": 4.998986632578596e-05,
"log_odds_chosen": 0.26568371057510376,
"log_odds_ratio": -0.7367907166481018,
"logps/chosen": -1.7482105493545532,
"logps/rejected": -1.9558181762695312,
"loss": 31.2704,
"nll_loss": 1.9178155660629272,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -0.08741052448749542,
"rewards/margins": 0.01038038544356823,
"rewards/rejected": -0.0977909117937088,
"step": 40
},
{
"epoch": 0.48517520215633425,
"grad_norm": 52.75,
"learning_rate": 4.992796806510936e-05,
"log_odds_chosen": 0.19093379378318787,
"log_odds_ratio": -0.7119738459587097,
"logps/chosen": -1.4446159601211548,
"logps/rejected": -1.5990588665008545,
"loss": 29.318,
"nll_loss": 1.797499656677246,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.07223080843687057,
"rewards/margins": 0.007722141686826944,
"rewards/rejected": -0.07995294034481049,
"step": 45
},
{
"epoch": 0.5390835579514824,
"grad_norm": 43.25,
"learning_rate": 4.980994056736854e-05,
"log_odds_chosen": 0.1590539515018463,
"log_odds_ratio": -0.7131061553955078,
"logps/chosen": -1.409611701965332,
"logps/rejected": -1.517723560333252,
"loss": 27.8473,
"nll_loss": 1.7051334381103516,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.0704805999994278,
"rewards/margins": 0.005405584815889597,
"rewards/rejected": -0.07588617503643036,
"step": 50
},
{
"epoch": 0.5929919137466307,
"grad_norm": 105.5,
"learning_rate": 4.9636049590020475e-05,
"log_odds_chosen": 0.2336808443069458,
"log_odds_ratio": -0.6900186538696289,
"logps/chosen": -1.3359286785125732,
"logps/rejected": -1.5113661289215088,
"loss": 26.8618,
"nll_loss": 1.6446822881698608,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.0667964294552803,
"rewards/margins": 0.008771875873208046,
"rewards/rejected": -0.0755683034658432,
"step": 55
},
{
"epoch": 0.6469002695417789,
"grad_norm": 81.5,
"learning_rate": 4.940668667592439e-05,
"log_odds_chosen": 0.1283489167690277,
"log_odds_ratio": -0.715418815612793,
"logps/chosen": -1.3006031513214111,
"logps/rejected": -1.4078199863433838,
"loss": 26.0219,
"nll_loss": 1.5916073322296143,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.06503016501665115,
"rewards/margins": 0.005360837560147047,
"rewards/rejected": -0.07039099186658859,
"step": 60
},
{
"epoch": 0.7008086253369272,
"grad_norm": 35.0,
"learning_rate": 4.912236827172148e-05,
"log_odds_chosen": 0.1217084527015686,
"log_odds_ratio": -0.7157756686210632,
"logps/chosen": -1.251226782798767,
"logps/rejected": -1.3473665714263916,
"loss": 25.8473,
"nll_loss": 1.5807287693023682,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.06256133317947388,
"rewards/margins": 0.004806997254490852,
"rewards/rejected": -0.06736833602190018,
"step": 65
},
{
"epoch": 0.7547169811320755,
"grad_norm": 36.5,
"learning_rate": 4.878373456497416e-05,
"log_odds_chosen": 0.1481620967388153,
"log_odds_ratio": -0.6984988451004028,
"logps/chosen": -1.207621455192566,
"logps/rejected": -1.3115525245666504,
"loss": 25.3049,
"nll_loss": 1.54731285572052,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.060381073504686356,
"rewards/margins": 0.005196552723646164,
"rewards/rejected": -0.06557762622833252,
"step": 70
},
{
"epoch": 0.8086253369272237,
"grad_norm": 157.0,
"learning_rate": 4.839154804268313e-05,
"log_odds_chosen": 0.2538384795188904,
"log_odds_ratio": -0.6509516835212708,
"logps/chosen": -1.1560814380645752,
"logps/rejected": -1.3362526893615723,
"loss": 24.7982,
"nll_loss": 1.5172107219696045,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.05780406668782234,
"rewards/margins": 0.009008568711578846,
"rewards/rejected": -0.06681263446807861,
"step": 75
},
{
"epoch": 0.862533692722372,
"grad_norm": 66.0,
"learning_rate": 4.7946691774428144e-05,
"log_odds_chosen": 0.22329294681549072,
"log_odds_ratio": -0.6457458138465881,
"logps/chosen": -1.1015363931655884,
"logps/rejected": -1.2518682479858398,
"loss": 24.3387,
"nll_loss": 1.4887213706970215,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.05507681518793106,
"rewards/margins": 0.007516591809689999,
"rewards/rejected": -0.06259341537952423,
"step": 80
},
{
"epoch": 0.9164420485175202,
"grad_norm": 60.75,
"learning_rate": 4.745016742399804e-05,
"log_odds_chosen": 0.17877401411533356,
"log_odds_ratio": -0.6742900609970093,
"logps/chosen": -1.0733582973480225,
"logps/rejected": -1.1897413730621338,
"loss": 23.64,
"nll_loss": 1.4440288543701172,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.05366792157292366,
"rewards/margins": 0.0058191511780023575,
"rewards/rejected": -0.05948706716299057,
"step": 85
},
{
"epoch": 0.9703504043126685,
"grad_norm": 114.5,
"learning_rate": 4.690309299398736e-05,
"log_odds_chosen": 0.15260997414588928,
"log_odds_ratio": -0.6815454363822937,
"logps/chosen": -1.0459394454956055,
"logps/rejected": -1.1531497240066528,
"loss": 23.1175,
"nll_loss": 1.4113500118255615,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.052296966314315796,
"rewards/margins": 0.005360516719520092,
"rewards/rejected": -0.05765749141573906,
"step": 90
},
{
"epoch": 1.0215633423180592,
"grad_norm": 80.0,
"learning_rate": 4.630670030843785e-05,
"log_odds_chosen": 0.2412596195936203,
"log_odds_ratio": -0.6528716087341309,
"logps/chosen": -0.9962456226348877,
"logps/rejected": -1.1605761051177979,
"loss": 21.3336,
"nll_loss": 1.3709510564804077,
"rewards/accuracies": 0.6151315569877625,
"rewards/chosen": -0.049812283366918564,
"rewards/margins": 0.008216519840061665,
"rewards/rejected": -0.058028798550367355,
"step": 95
},
{
"epoch": 1.0754716981132075,
"grad_norm": 53.0,
"learning_rate": 4.566233223919298e-05,
"log_odds_chosen": 0.5380310416221619,
"log_odds_ratio": -0.5722979307174683,
"logps/chosen": -0.9102069139480591,
"logps/rejected": -1.2439484596252441,
"loss": 21.3346,
"nll_loss": 1.3030532598495483,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.04551034793257713,
"rewards/margins": 0.016687078401446342,
"rewards/rejected": -0.062197424471378326,
"step": 100
},
{
"epoch": 1.1293800539083558,
"grad_norm": 80.0,
"learning_rate": 4.4971439682211125e-05,
"log_odds_chosen": 0.5655834674835205,
"log_odds_ratio": -0.5308610200881958,
"logps/chosen": -0.8367247581481934,
"logps/rejected": -1.1727405786514282,
"loss": 19.4547,
"nll_loss": 1.187097191810608,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.041836243122816086,
"rewards/margins": 0.016800785437226295,
"rewards/rejected": -0.05863703042268753,
"step": 105
},
{
"epoch": 1.1832884097035041,
"grad_norm": 29.875,
"learning_rate": 4.4235578290645194e-05,
"log_odds_chosen": 0.5750025510787964,
"log_odds_ratio": -0.5339788794517517,
"logps/chosen": -0.8711065053939819,
"logps/rejected": -1.2383763790130615,
"loss": 20.0698,
"nll_loss": 1.2257729768753052,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.043555326759815216,
"rewards/margins": 0.018363500013947487,
"rewards/rejected": -0.061918824911117554,
"step": 110
},
{
"epoch": 1.2371967654986522,
"grad_norm": 40.5,
"learning_rate": 4.3456404972045216e-05,
"log_odds_chosen": 0.6106120347976685,
"log_odds_ratio": -0.511372983455658,
"logps/chosen": -0.8859386444091797,
"logps/rejected": -1.271536111831665,
"loss": 19.8049,
"nll_loss": 1.2099454402923584,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.044296931475400925,
"rewards/margins": 0.019279872998595238,
"rewards/rejected": -0.06357680261135101,
"step": 115
},
{
"epoch": 1.2911051212938005,
"grad_norm": 59.5,
"learning_rate": 4.263567415757058e-05,
"log_odds_chosen": 0.5386055111885071,
"log_odds_ratio": -0.5215080976486206,
"logps/chosen": -0.8329726457595825,
"logps/rejected": -1.1544153690338135,
"loss": 18.5548,
"nll_loss": 1.131080150604248,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.04164863005280495,
"rewards/margins": 0.01607213169336319,
"rewards/rejected": -0.057720769196748734,
"step": 120
},
{
"epoch": 1.3450134770889488,
"grad_norm": 43.75,
"learning_rate": 4.177523385161264e-05,
"log_odds_chosen": 0.5417205691337585,
"log_odds_ratio": -0.536739706993103,
"logps/chosen": -0.8140735626220703,
"logps/rejected": -1.1415430307388306,
"loss": 19.0077,
"nll_loss": 1.1593010425567627,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.040703680366277695,
"rewards/margins": 0.016373474150896072,
"rewards/rejected": -0.05707715079188347,
"step": 125
},
{
"epoch": 1.398921832884097,
"grad_norm": 39.0,
"learning_rate": 4.087702147072241e-05,
"log_odds_chosen": 0.7008520364761353,
"log_odds_ratio": -0.4697790741920471,
"logps/chosen": -0.8314515352249146,
"logps/rejected": -1.2709705829620361,
"loss": 19.0124,
"nll_loss": 1.161712408065796,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.041572581976652145,
"rewards/margins": 0.02197595313191414,
"rewards/rejected": -0.06354853510856628,
"step": 130
},
{
"epoch": 1.4528301886792452,
"grad_norm": 25.125,
"learning_rate": 3.9943059481212795e-05,
"log_odds_chosen": 0.6904614567756653,
"log_odds_ratio": -0.48464569449424744,
"logps/chosen": -0.8193404078483582,
"logps/rejected": -1.243801474571228,
"loss": 18.8223,
"nll_loss": 1.1491620540618896,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -0.040967028588056564,
"rewards/margins": 0.021223049610853195,
"rewards/rejected": -0.06219007819890976,
"step": 135
},
{
"epoch": 1.5067385444743935,
"grad_norm": 32.5,
"learning_rate": 3.89754508452579e-05,
"log_odds_chosen": 0.5718387365341187,
"log_odds_ratio": -0.5105268359184265,
"logps/chosen": -0.8398844599723816,
"logps/rejected": -1.1855361461639404,
"loss": 18.7264,
"nll_loss": 1.1422346830368042,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.04199422523379326,
"rewards/margins": 0.017282582819461823,
"rewards/rejected": -0.05927680805325508,
"step": 140
},
{
"epoch": 1.5606469002695418,
"grad_norm": 46.75,
"learning_rate": 3.797637428574326e-05,
"log_odds_chosen": 0.5373243093490601,
"log_odds_ratio": -0.5297266840934753,
"logps/chosen": -0.8746851682662964,
"logps/rejected": -1.2106478214263916,
"loss": 20.6137,
"nll_loss": 1.2596467733383179,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.04373425990343094,
"rewards/margins": 0.01679813303053379,
"rewards/rejected": -0.06053239852190018,
"step": 145
},
{
"epoch": 1.61455525606469,
"grad_norm": 30.0,
"learning_rate": 3.694807938052887e-05,
"log_odds_chosen": 0.5541953444480896,
"log_odds_ratio": -0.5251818299293518,
"logps/chosen": -0.817675769329071,
"logps/rejected": -1.1604855060577393,
"loss": 18.8161,
"nll_loss": 1.147394061088562,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.04088379442691803,
"rewards/margins": 0.017140481621026993,
"rewards/rejected": -0.05802427604794502,
"step": 150
},
{
"epoch": 1.6684636118598384,
"grad_norm": 34.0,
"learning_rate": 3.589288149717119e-05,
"log_odds_chosen": 0.5490964651107788,
"log_odds_ratio": -0.5439670085906982,
"logps/chosen": -0.797703742980957,
"logps/rejected": -1.1254116296768188,
"loss": 18.6515,
"nll_loss": 1.1367080211639404,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.03988518938422203,
"rewards/margins": 0.016385387629270554,
"rewards/rejected": -0.05627058073878288,
"step": 155
},
{
"epoch": 1.7223719676549867,
"grad_norm": 25.0,
"learning_rate": 3.481315657950931e-05,
"log_odds_chosen": 0.6332697868347168,
"log_odds_ratio": -0.4977358281612396,
"logps/chosen": -0.825598418712616,
"logps/rejected": -1.204270601272583,
"loss": 18.9003,
"nll_loss": 1.1534397602081299,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.04127992317080498,
"rewards/margins": 0.01893361285328865,
"rewards/rejected": -0.06021353602409363,
"step": 160
},
{
"epoch": 1.7762803234501348,
"grad_norm": 24.25,
"learning_rate": 3.3711335797853977e-05,
"log_odds_chosen": 0.7610551714897156,
"log_odds_ratio": -0.4777259826660156,
"logps/chosen": -0.803636908531189,
"logps/rejected": -1.2949776649475098,
"loss": 18.4323,
"nll_loss": 1.1253396272659302,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.04018184915184975,
"rewards/margins": 0.024567028507590294,
"rewards/rejected": -0.06474888324737549,
"step": 165
},
{
"epoch": 1.830188679245283,
"grad_norm": 39.0,
"learning_rate": 3.2589900074825696e-05,
"log_odds_chosen": 0.6568578481674194,
"log_odds_ratio": -0.5057220458984375,
"logps/chosen": -0.8026474118232727,
"logps/rejected": -1.1945730447769165,
"loss": 19.1293,
"nll_loss": 1.1678292751312256,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.040132369846105576,
"rewards/margins": 0.0195962805300951,
"rewards/rejected": -0.05972864478826523,
"step": 170
},
{
"epoch": 1.8840970350404311,
"grad_norm": 24.375,
"learning_rate": 3.1451374499167586e-05,
"log_odds_chosen": 0.5363537073135376,
"log_odds_ratio": -0.531457781791687,
"logps/chosen": -0.8319064378738403,
"logps/rejected": -1.1418461799621582,
"loss": 18.4204,
"nll_loss": 1.1223351955413818,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.041595328599214554,
"rewards/margins": 0.015496985986828804,
"rewards/rejected": -0.05709231644868851,
"step": 175
},
{
"epoch": 1.9380053908355794,
"grad_norm": 26.125,
"learning_rate": 3.029832264011133e-05,
"log_odds_chosen": 0.5449716448783875,
"log_odds_ratio": -0.5418750047683716,
"logps/chosen": -0.8625362515449524,
"logps/rejected": -1.210113525390625,
"loss": 19.8141,
"nll_loss": 1.2094202041625977,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.04312681779265404,
"rewards/margins": 0.017378859221935272,
"rewards/rejected": -0.06050567701458931,
"step": 180
},
{
"epoch": 1.9919137466307277,
"grad_norm": 24.5,
"learning_rate": 2.91333407750982e-05,
"log_odds_chosen": 0.6130974292755127,
"log_odds_ratio": -0.5151475667953491,
"logps/chosen": -0.8204232454299927,
"logps/rejected": -1.1893672943115234,
"loss": 18.8592,
"nll_loss": 1.1504535675048828,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.041021160781383514,
"rewards/margins": 0.018447209149599075,
"rewards/rejected": -0.05946836993098259,
"step": 185
},
{
"epoch": 2.0431266846361185,
"grad_norm": 30.875,
"learning_rate": 2.7959052043852464e-05,
"log_odds_chosen": 1.1373300552368164,
"log_odds_ratio": -0.3630596697330475,
"logps/chosen": -0.6143659353256226,
"logps/rejected": -1.2531551122665405,
"loss": 13.6947,
"nll_loss": 0.8776864409446716,
"rewards/accuracies": 0.8914473652839661,
"rewards/chosen": -0.03071829490363598,
"rewards/margins": 0.03193946182727814,
"rewards/rejected": -0.06265775859355927,
"step": 190
},
{
"epoch": 2.0970350404312668,
"grad_norm": 29.875,
"learning_rate": 2.677810054197009e-05,
"log_odds_chosen": 1.4366052150726318,
"log_odds_ratio": -0.29583844542503357,
"logps/chosen": -0.5867640972137451,
"logps/rejected": -1.3974506855010986,
"loss": 14.6992,
"nll_loss": 0.8981709480285645,
"rewards/accuracies": 0.921875,
"rewards/chosen": -0.029338205233216286,
"rewards/margins": 0.04053433611989021,
"rewards/rejected": -0.06987253576517105,
"step": 195
},
{
"epoch": 2.150943396226415,
"grad_norm": 27.125,
"learning_rate": 2.559314536732212e-05,
"log_odds_chosen": 1.3597910404205322,
"log_odds_ratio": -0.3113149404525757,
"logps/chosen": -0.5893815159797668,
"logps/rejected": -1.344481110572815,
"loss": 13.7478,
"nll_loss": 0.8379224538803101,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.029469076544046402,
"rewards/margins": 0.037754982709884644,
"rewards/rejected": -0.06722406297922134,
"step": 200
},
{
"epoch": 2.2048517520215634,
"grad_norm": 36.0,
"learning_rate": 2.4406854632677883e-05,
"log_odds_chosen": 1.4072751998901367,
"log_odds_ratio": -0.3055153489112854,
"logps/chosen": -0.568659782409668,
"logps/rejected": -1.3686797618865967,
"loss": 13.52,
"nll_loss": 0.8238337635993958,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.02843298949301243,
"rewards/margins": 0.04000100493431091,
"rewards/rejected": -0.0684339851140976,
"step": 205
},
{
"epoch": 2.2587601078167117,
"grad_norm": 32.5,
"learning_rate": 2.3221899458029918e-05,
"log_odds_chosen": 1.3871681690216064,
"log_odds_ratio": -0.31390881538391113,
"logps/chosen": -0.5992950201034546,
"logps/rejected": -1.3834056854248047,
"loss": 14.0083,
"nll_loss": 0.8541052937507629,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.02996474876999855,
"rewards/margins": 0.03920553997159004,
"rewards/rejected": -0.069170281291008,
"step": 210
},
{
"epoch": 2.31266846361186,
"grad_norm": 29.875,
"learning_rate": 2.204094795614755e-05,
"log_odds_chosen": 1.4916521310806274,
"log_odds_ratio": -0.2852187752723694,
"logps/chosen": -0.5344905257225037,
"logps/rejected": -1.3550375699996948,
"loss": 13.3234,
"nll_loss": 0.8122995495796204,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -0.026724528521299362,
"rewards/margins": 0.04102735221385956,
"rewards/rejected": -0.06775187700986862,
"step": 215
},
{
"epoch": 2.3665768194070083,
"grad_norm": 25.25,
"learning_rate": 2.0866659224901812e-05,
"log_odds_chosen": 1.407762885093689,
"log_odds_ratio": -0.3070213496685028,
"logps/chosen": -0.5739747881889343,
"logps/rejected": -1.3859971761703491,
"loss": 13.2689,
"nll_loss": 0.8083074688911438,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -0.028698738664388657,
"rewards/margins": 0.04060111939907074,
"rewards/rejected": -0.0692998617887497,
"step": 220
},
{
"epoch": 2.420485175202156,
"grad_norm": 27.625,
"learning_rate": 1.970167735988867e-05,
"log_odds_chosen": 1.411454439163208,
"log_odds_ratio": -0.30091729760169983,
"logps/chosen": -0.5828540325164795,
"logps/rejected": -1.3717459440231323,
"loss": 13.5018,
"nll_loss": 0.8228418231010437,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.029142703860998154,
"rewards/margins": 0.039444588124752045,
"rewards/rejected": -0.0685872882604599,
"step": 225
},
{
"epoch": 2.4743935309973044,
"grad_norm": 29.75,
"learning_rate": 1.854862550083241e-05,
"log_odds_chosen": 1.4036983251571655,
"log_odds_ratio": -0.3000126779079437,
"logps/chosen": -0.5541399717330933,
"logps/rejected": -1.3349835872650146,
"loss": 13.6971,
"nll_loss": 0.834954559803009,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -0.027707001194357872,
"rewards/margins": 0.039042189717292786,
"rewards/rejected": -0.06674918532371521,
"step": 230
},
{
"epoch": 2.5283018867924527,
"grad_norm": 27.375,
"learning_rate": 1.7410099925174307e-05,
"log_odds_chosen": 1.3673919439315796,
"log_odds_ratio": -0.32730910181999207,
"logps/chosen": -0.5749243497848511,
"logps/rejected": -1.3314166069030762,
"loss": 13.4834,
"nll_loss": 0.8205031156539917,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.028746221214532852,
"rewards/margins": 0.037824612110853195,
"rewards/rejected": -0.06657083332538605,
"step": 235
},
{
"epoch": 2.582210242587601,
"grad_norm": 25.375,
"learning_rate": 1.628866420214603e-05,
"log_odds_chosen": 1.4173800945281982,
"log_odds_ratio": -0.30779603123664856,
"logps/chosen": -0.5958832502365112,
"logps/rejected": -1.4191477298736572,
"loss": 14.0265,
"nll_loss": 0.8556331396102905,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02979416213929653,
"rewards/margins": 0.04116322472691536,
"rewards/rejected": -0.07095737755298615,
"step": 240
},
{
"epoch": 2.6361185983827493,
"grad_norm": 25.125,
"learning_rate": 1.5186843420490698e-05,
"log_odds_chosen": 1.4067838191986084,
"log_odds_ratio": -0.3025580644607544,
"logps/chosen": -0.5486974716186523,
"logps/rejected": -1.3047682046890259,
"loss": 14.1827,
"nll_loss": 0.864966869354248,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.027434874325990677,
"rewards/margins": 0.037803538143634796,
"rewards/rejected": -0.06523840129375458,
"step": 245
},
{
"epoch": 2.6900269541778976,
"grad_norm": 27.5,
"learning_rate": 1.4107118502828814e-05,
"log_odds_chosen": 1.5402934551239014,
"log_odds_ratio": -0.2862972617149353,
"logps/chosen": -0.5321887731552124,
"logps/rejected": -1.3919765949249268,
"loss": 13.1153,
"nll_loss": 0.7994569540023804,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02660943940281868,
"rewards/margins": 0.042989395558834076,
"rewards/rejected": -0.06959883868694305,
"step": 250
},
{
"epoch": 2.743935309973046,
"grad_norm": 29.375,
"learning_rate": 1.3051920619471145e-05,
"log_odds_chosen": 1.4740593433380127,
"log_odds_ratio": -0.30231142044067383,
"logps/chosen": -0.5255938768386841,
"logps/rejected": -1.3372348546981812,
"loss": 13.7048,
"nll_loss": 0.8354872465133667,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": -0.026279697194695473,
"rewards/margins": 0.04058205336332321,
"rewards/rejected": -0.06686174869537354,
"step": 255
},
{
"epoch": 2.797843665768194,
"grad_norm": 21.25,
"learning_rate": 1.2023625714256743e-05,
"log_odds_chosen": 1.3471606969833374,
"log_odds_ratio": -0.3214406967163086,
"logps/chosen": -0.5802930593490601,
"logps/rejected": -1.3386670351028442,
"loss": 13.8703,
"nll_loss": 0.8453942537307739,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02901465632021427,
"rewards/margins": 0.03791869431734085,
"rewards/rejected": -0.06693334877490997,
"step": 260
},
{
"epoch": 2.8517520215633425,
"grad_norm": 25.25,
"learning_rate": 1.1024549154742098e-05,
"log_odds_chosen": 1.3048094511032104,
"log_odds_ratio": -0.3205285370349884,
"logps/chosen": -0.5728147625923157,
"logps/rejected": -1.2860543727874756,
"loss": 13.4713,
"nll_loss": 0.8200335502624512,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": -0.028640735894441605,
"rewards/margins": 0.035661980509757996,
"rewards/rejected": -0.0643027201294899,
"step": 265
},
{
"epoch": 2.9056603773584904,
"grad_norm": 25.0,
"learning_rate": 1.005694051878721e-05,
"log_odds_chosen": 1.4204931259155273,
"log_odds_ratio": -0.316345751285553,
"logps/chosen": -0.5570142865180969,
"logps/rejected": -1.3344264030456543,
"loss": 13.5552,
"nll_loss": 0.8255348205566406,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.027850713580846786,
"rewards/margins": 0.03887060284614563,
"rewards/rejected": -0.06672131270170212,
"step": 270
},
{
"epoch": 2.9595687331536387,
"grad_norm": 24.25,
"learning_rate": 9.1229785292776e-06,
"log_odds_chosen": 1.5122112035751343,
"log_odds_ratio": -0.2880076766014099,
"logps/chosen": -0.5204971432685852,
"logps/rejected": -1.3571436405181885,
"loss": 12.9568,
"nll_loss": 0.789382815361023,
"rewards/accuracies": 0.9281250238418579,
"rewards/chosen": -0.02602485381066799,
"rewards/margins": 0.041832335293293,
"rewards/rejected": -0.06785719096660614,
"step": 275
},
{
"epoch": 3.01078167115903,
"grad_norm": 24.25,
"learning_rate": 8.22476614838737e-06,
"log_odds_chosen": 1.5424445867538452,
"log_odds_ratio": -0.28235647082328796,
"logps/chosen": -0.5312695503234863,
"logps/rejected": -1.3865729570388794,
"loss": 12.1174,
"nll_loss": 0.7770485877990723,
"rewards/accuracies": 0.9243420958518982,
"rewards/chosen": -0.026563478633761406,
"rewards/margins": 0.04276517778635025,
"rewards/rejected": -0.06932865083217621,
"step": 280
},
{
"epoch": 3.0646900269541777,
"grad_norm": 19.625,
"learning_rate": 7.364325842429417e-06,
"log_odds_chosen": 1.9977210760116577,
"log_odds_ratio": -0.2042604237794876,
"logps/chosen": -0.4289301335811615,
"logps/rejected": -1.457916498184204,
"loss": 10.7252,
"nll_loss": 0.6526534557342529,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.021446505561470985,
"rewards/margins": 0.051449306309223175,
"rewards/rejected": -0.07289581000804901,
"step": 285
},
{
"epoch": 3.118598382749326,
"grad_norm": 32.0,
"learning_rate": 6.543595027954785e-06,
"log_odds_chosen": 2.0810437202453613,
"log_odds_ratio": -0.21267971396446228,
"logps/chosen": -0.4348023533821106,
"logps/rejected": -1.5428168773651123,
"loss": 10.659,
"nll_loss": 0.6491010785102844,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.02174011990427971,
"rewards/margins": 0.05540072172880173,
"rewards/rejected": -0.07714084535837173,
"step": 290
},
{
"epoch": 3.1725067385444743,
"grad_norm": 23.375,
"learning_rate": 5.764421709354811e-06,
"log_odds_chosen": 2.2832117080688477,
"log_odds_ratio": -0.17793798446655273,
"logps/chosen": -0.4084325432777405,
"logps/rejected": -1.6333366632461548,
"loss": 10.4443,
"nll_loss": 0.6368886232376099,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020421627908945084,
"rewards/margins": 0.061245210468769073,
"rewards/rejected": -0.08166682720184326,
"step": 295
},
{
"epoch": 3.2264150943396226,
"grad_norm": 23.25,
"learning_rate": 5.02856031778888e-06,
"log_odds_chosen": 2.20396089553833,
"log_odds_ratio": -0.20375783741474152,
"logps/chosen": -0.4364936351776123,
"logps/rejected": -1.5872256755828857,
"loss": 10.7344,
"nll_loss": 0.6540727019309998,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.021824682131409645,
"rewards/margins": 0.05753660202026367,
"rewards/rejected": -0.07936128228902817,
"step": 300
},
{
"epoch": 3.280323450134771,
"grad_norm": 21.625,
"learning_rate": 4.337667760807018e-06,
"log_odds_chosen": 2.0813040733337402,
"log_odds_ratio": -0.21252334117889404,
"logps/chosen": -0.4447731375694275,
"logps/rejected": -1.5767261981964111,
"loss": 10.8909,
"nll_loss": 0.6635223031044006,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -0.022238660603761673,
"rewards/margins": 0.05659765005111694,
"rewards/rejected": -0.07883630692958832,
"step": 305
},
{
"epoch": 3.334231805929919,
"grad_norm": 26.5,
"learning_rate": 3.69329969156216e-06,
"log_odds_chosen": 2.2381584644317627,
"log_odds_ratio": -0.17899712920188904,
"logps/chosen": -0.4195394515991211,
"logps/rejected": -1.6279566287994385,
"loss": 10.5163,
"nll_loss": 0.6414855122566223,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.020976971834897995,
"rewards/margins": 0.06042086333036423,
"rewards/rejected": -0.08139783143997192,
"step": 310
},
{
"epoch": 3.3881401617250675,
"grad_norm": 24.875,
"learning_rate": 3.096907006012639e-06,
"log_odds_chosen": 2.148031711578369,
"log_odds_ratio": -0.2030152827501297,
"logps/chosen": -0.43345871567726135,
"logps/rejected": -1.6023099422454834,
"loss": 10.7448,
"nll_loss": 0.6548030376434326,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.021672936156392097,
"rewards/margins": 0.05844256281852722,
"rewards/rejected": -0.08011549711227417,
"step": 315
},
{
"epoch": 3.442048517520216,
"grad_norm": 23.25,
"learning_rate": 2.549832576001965e-06,
"log_odds_chosen": 2.323772668838501,
"log_odds_ratio": -0.17535866796970367,
"logps/chosen": -0.4194834232330322,
"logps/rejected": -1.6763973236083984,
"loss": 10.4299,
"nll_loss": 0.6364527940750122,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": -0.0209741722792387,
"rewards/margins": 0.06284569203853607,
"rewards/rejected": -0.08381986618041992,
"step": 320
},
{
"epoch": 3.4959568733153636,
"grad_norm": 24.625,
"learning_rate": 2.0533082255718594e-06,
"log_odds_chosen": 2.2250733375549316,
"log_odds_ratio": -0.1992703080177307,
"logps/chosen": -0.39943161606788635,
"logps/rejected": -1.571520447731018,
"loss": 10.0609,
"nll_loss": 0.6121501922607422,
"rewards/accuracies": 0.965624988079071,
"rewards/chosen": -0.019971583038568497,
"rewards/margins": 0.05860443040728569,
"rewards/rejected": -0.07857601344585419,
"step": 325
},
{
"epoch": 3.5498652291105124,
"grad_norm": 27.875,
"learning_rate": 1.6084519573168727e-06,
"log_odds_chosen": 2.1328158378601074,
"log_odds_ratio": -0.21596260368824005,
"logps/chosen": -0.43325909972190857,
"logps/rejected": -1.574236273765564,
"loss": 10.7006,
"nll_loss": 0.6516640782356262,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.02166295424103737,
"rewards/margins": 0.05704886466264725,
"rewards/rejected": -0.07871182262897491,
"step": 330
},
{
"epoch": 3.6037735849056602,
"grad_norm": 25.25,
"learning_rate": 1.2162654350258402e-06,
"log_odds_chosen": 2.322880744934082,
"log_odds_ratio": -0.18639399111270905,
"logps/chosen": -0.4327073097229004,
"logps/rejected": -1.7132127285003662,
"loss": 10.383,
"nll_loss": 0.6331822276115417,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -0.02163536846637726,
"rewards/margins": 0.06402527540922165,
"rewards/rejected": -0.0856606513261795,
"step": 335
},
{
"epoch": 3.6576819407008085,
"grad_norm": 25.625,
"learning_rate": 8.776317282785163e-07,
"log_odds_chosen": 2.281785249710083,
"log_odds_ratio": -0.18652771413326263,
"logps/chosen": -0.42065826058387756,
"logps/rejected": -1.6458238363265991,
"loss": 10.3009,
"nll_loss": 0.6277027726173401,
"rewards/accuracies": 0.965624988079071,
"rewards/chosen": -0.02103291265666485,
"rewards/margins": 0.06125827878713608,
"rewards/rejected": -0.08229119330644608,
"step": 340
},
{
"epoch": 3.711590296495957,
"grad_norm": 21.875,
"learning_rate": 5.933133240756117e-07,
"log_odds_chosen": 2.2684173583984375,
"log_odds_ratio": -0.18595468997955322,
"logps/chosen": -0.43632110953330994,
"logps/rejected": -1.6807502508163452,
"loss": 10.4706,
"nll_loss": 0.638587474822998,
"rewards/accuracies": 0.971875011920929,
"rewards/chosen": -0.021816056221723557,
"rewards/margins": 0.062221460044384,
"rewards/rejected": -0.08403751999139786,
"step": 345
},
{
"epoch": 3.765498652291105,
"grad_norm": 21.625,
"learning_rate": 3.6395040997952714e-07,
"log_odds_chosen": 2.0946693420410156,
"log_odds_ratio": -0.20106664299964905,
"logps/chosen": -0.4340636730194092,
"logps/rejected": -1.574231505393982,
"loss": 10.9029,
"nll_loss": 0.6646233797073364,
"rewards/accuracies": 0.965624988079071,
"rewards/chosen": -0.02170318365097046,
"rewards/margins": 0.057008396834135056,
"rewards/rejected": -0.07871158421039581,
"step": 350
},
{
"epoch": 3.8194070080862534,
"grad_norm": 25.75,
"learning_rate": 1.9005943263146198e-07,
"log_odds_chosen": 2.3714165687561035,
"log_odds_ratio": -0.1724739372730255,
"logps/chosen": -0.3921690583229065,
"logps/rejected": -1.6754087209701538,
"loss": 10.2651,
"nll_loss": 0.6260087490081787,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.019608452916145325,
"rewards/margins": 0.06416197866201401,
"rewards/rejected": -0.08377043902873993,
"step": 355
},
{
"epoch": 3.8733153638814017,
"grad_norm": 29.0,
"learning_rate": 7.203193489064009e-08,
"log_odds_chosen": 2.2240853309631348,
"log_odds_ratio": -0.18985909223556519,
"logps/chosen": -0.4116806089878082,
"logps/rejected": -1.6198737621307373,
"loss": 10.3175,
"nll_loss": 0.6286627054214478,
"rewards/accuracies": 0.9781249761581421,
"rewards/chosen": -0.020584028214216232,
"rewards/margins": 0.060409657657146454,
"rewards/rejected": -0.08099369704723358,
"step": 360
},
{
"epoch": 3.92722371967655,
"grad_norm": 22.75,
"learning_rate": 1.0133674214043987e-08,
"log_odds_chosen": 2.4190001487731934,
"log_odds_ratio": -0.16849036514759064,
"logps/chosen": -0.387323796749115,
"logps/rejected": -1.692837119102478,
"loss": 9.7812,
"nll_loss": 0.5957530736923218,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.0193661917001009,
"rewards/margins": 0.06527567654848099,
"rewards/rejected": -0.08464185893535614,
"step": 365
},
{
"epoch": 3.9595687331536387,
"step": 368,
"total_flos": 0.0,
"train_loss": 25.758197416429933,
"train_runtime": 4430.8526,
"train_samples_per_second": 5.359,
"train_steps_per_second": 0.083
}
],
"logging_steps": 5,
"max_steps": 368,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}