|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9595687331536387, |
|
"eval_steps": 500, |
|
"global_step": 368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05390835579514825, |
|
"grad_norm": 700.0, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"log_odds_chosen": 5.927034378051758, |
|
"log_odds_ratio": -7.722090721130371, |
|
"logps/chosen": -24.312976837158203, |
|
"logps/rejected": -30.241857528686523, |
|
"loss": 193.2568, |
|
"nll_loss": 11.692580223083496, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -1.215648889541626, |
|
"rewards/margins": 0.2964438498020172, |
|
"rewards/rejected": -1.5120928287506104, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1078167115902965, |
|
"grad_norm": 207.0, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"log_odds_chosen": 4.102215766906738, |
|
"log_odds_ratio": -5.448796272277832, |
|
"logps/chosen": -20.24713897705078, |
|
"logps/rejected": -24.35104751586914, |
|
"loss": 161.9082, |
|
"nll_loss": 9.846972465515137, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -1.0123568773269653, |
|
"rewards/margins": 0.20519545674324036, |
|
"rewards/rejected": -1.2175523042678833, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16172506738544473, |
|
"grad_norm": 203.0, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"log_odds_chosen": 3.879714250564575, |
|
"log_odds_ratio": -6.857820987701416, |
|
"logps/chosen": -21.977680206298828, |
|
"logps/rejected": -25.858551025390625, |
|
"loss": 166.6047, |
|
"nll_loss": 10.069997787475586, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.098883867263794, |
|
"rewards/margins": 0.19404351711273193, |
|
"rewards/rejected": -1.2929275035858154, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.215633423180593, |
|
"grad_norm": 628.0, |
|
"learning_rate": 2.702702702702703e-05, |
|
"log_odds_chosen": 0.7552221417427063, |
|
"log_odds_ratio": -7.44603967666626, |
|
"logps/chosen": -20.903730392456055, |
|
"logps/rejected": -21.659225463867188, |
|
"loss": 142.4453, |
|
"nll_loss": 8.530646324157715, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.0451866388320923, |
|
"rewards/margins": 0.03777474910020828, |
|
"rewards/rejected": -1.0829613208770752, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2695417789757412, |
|
"grad_norm": 366.0, |
|
"learning_rate": 3.3783783783783784e-05, |
|
"log_odds_chosen": -0.09596023708581924, |
|
"log_odds_ratio": -2.1412220001220703, |
|
"logps/chosen": -6.6122331619262695, |
|
"logps/rejected": -6.498193264007568, |
|
"loss": 64.159, |
|
"nll_loss": 3.903474807739258, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3306116461753845, |
|
"rewards/margins": -0.005701972637325525, |
|
"rewards/rejected": -0.32490968704223633, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.32345013477088946, |
|
"grad_norm": 114.0, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"log_odds_chosen": 0.23905467987060547, |
|
"log_odds_ratio": -0.7873450517654419, |
|
"logps/chosen": -2.026221752166748, |
|
"logps/rejected": -2.2390971183776855, |
|
"loss": 37.6504, |
|
"nll_loss": 2.314509868621826, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.10131107270717621, |
|
"rewards/margins": 0.010643779300153255, |
|
"rewards/rejected": -0.11195486783981323, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 74.0, |
|
"learning_rate": 4.72972972972973e-05, |
|
"log_odds_chosen": 0.28125709295272827, |
|
"log_odds_ratio": -0.7325613498687744, |
|
"logps/chosen": -1.7039525508880615, |
|
"logps/rejected": -1.9455076456069946, |
|
"loss": 31.1963, |
|
"nll_loss": 1.9137756824493408, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08519763499498367, |
|
"rewards/margins": 0.012077751569449902, |
|
"rewards/rejected": -0.09727539122104645, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.431266846361186, |
|
"grad_norm": 153.0, |
|
"learning_rate": 4.998986632578596e-05, |
|
"log_odds_chosen": 0.26568371057510376, |
|
"log_odds_ratio": -0.7367907166481018, |
|
"logps/chosen": -1.7482105493545532, |
|
"logps/rejected": -1.9558181762695312, |
|
"loss": 31.2704, |
|
"nll_loss": 1.9178155660629272, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.08741052448749542, |
|
"rewards/margins": 0.01038038544356823, |
|
"rewards/rejected": -0.0977909117937088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48517520215633425, |
|
"grad_norm": 52.75, |
|
"learning_rate": 4.992796806510936e-05, |
|
"log_odds_chosen": 0.19093379378318787, |
|
"log_odds_ratio": -0.7119738459587097, |
|
"logps/chosen": -1.4446159601211548, |
|
"logps/rejected": -1.5990588665008545, |
|
"loss": 29.318, |
|
"nll_loss": 1.797499656677246, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07223080843687057, |
|
"rewards/margins": 0.007722141686826944, |
|
"rewards/rejected": -0.07995294034481049, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5390835579514824, |
|
"grad_norm": 43.25, |
|
"learning_rate": 4.980994056736854e-05, |
|
"log_odds_chosen": 0.1590539515018463, |
|
"log_odds_ratio": -0.7131061553955078, |
|
"logps/chosen": -1.409611701965332, |
|
"logps/rejected": -1.517723560333252, |
|
"loss": 27.8473, |
|
"nll_loss": 1.7051334381103516, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0704805999994278, |
|
"rewards/margins": 0.005405584815889597, |
|
"rewards/rejected": -0.07588617503643036, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5929919137466307, |
|
"grad_norm": 105.5, |
|
"learning_rate": 4.9636049590020475e-05, |
|
"log_odds_chosen": 0.2336808443069458, |
|
"log_odds_ratio": -0.6900186538696289, |
|
"logps/chosen": -1.3359286785125732, |
|
"logps/rejected": -1.5113661289215088, |
|
"loss": 26.8618, |
|
"nll_loss": 1.6446822881698608, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0667964294552803, |
|
"rewards/margins": 0.008771875873208046, |
|
"rewards/rejected": -0.0755683034658432, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6469002695417789, |
|
"grad_norm": 81.5, |
|
"learning_rate": 4.940668667592439e-05, |
|
"log_odds_chosen": 0.1283489167690277, |
|
"log_odds_ratio": -0.715418815612793, |
|
"logps/chosen": -1.3006031513214111, |
|
"logps/rejected": -1.4078199863433838, |
|
"loss": 26.0219, |
|
"nll_loss": 1.5916073322296143, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.06503016501665115, |
|
"rewards/margins": 0.005360837560147047, |
|
"rewards/rejected": -0.07039099186658859, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7008086253369272, |
|
"grad_norm": 35.0, |
|
"learning_rate": 4.912236827172148e-05, |
|
"log_odds_chosen": 0.1217084527015686, |
|
"log_odds_ratio": -0.7157756686210632, |
|
"logps/chosen": -1.251226782798767, |
|
"logps/rejected": -1.3473665714263916, |
|
"loss": 25.8473, |
|
"nll_loss": 1.5807287693023682, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.06256133317947388, |
|
"rewards/margins": 0.004806997254490852, |
|
"rewards/rejected": -0.06736833602190018, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 36.5, |
|
"learning_rate": 4.878373456497416e-05, |
|
"log_odds_chosen": 0.1481620967388153, |
|
"log_odds_ratio": -0.6984988451004028, |
|
"logps/chosen": -1.207621455192566, |
|
"logps/rejected": -1.3115525245666504, |
|
"loss": 25.3049, |
|
"nll_loss": 1.54731285572052, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.060381073504686356, |
|
"rewards/margins": 0.005196552723646164, |
|
"rewards/rejected": -0.06557762622833252, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8086253369272237, |
|
"grad_norm": 157.0, |
|
"learning_rate": 4.839154804268313e-05, |
|
"log_odds_chosen": 0.2538384795188904, |
|
"log_odds_ratio": -0.6509516835212708, |
|
"logps/chosen": -1.1560814380645752, |
|
"logps/rejected": -1.3362526893615723, |
|
"loss": 24.7982, |
|
"nll_loss": 1.5172107219696045, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.05780406668782234, |
|
"rewards/margins": 0.009008568711578846, |
|
"rewards/rejected": -0.06681263446807861, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.862533692722372, |
|
"grad_norm": 66.0, |
|
"learning_rate": 4.7946691774428144e-05, |
|
"log_odds_chosen": 0.22329294681549072, |
|
"log_odds_ratio": -0.6457458138465881, |
|
"logps/chosen": -1.1015363931655884, |
|
"logps/rejected": -1.2518682479858398, |
|
"loss": 24.3387, |
|
"nll_loss": 1.4887213706970215, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.05507681518793106, |
|
"rewards/margins": 0.007516591809689999, |
|
"rewards/rejected": -0.06259341537952423, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9164420485175202, |
|
"grad_norm": 60.75, |
|
"learning_rate": 4.745016742399804e-05, |
|
"log_odds_chosen": 0.17877401411533356, |
|
"log_odds_ratio": -0.6742900609970093, |
|
"logps/chosen": -1.0733582973480225, |
|
"logps/rejected": -1.1897413730621338, |
|
"loss": 23.64, |
|
"nll_loss": 1.4440288543701172, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.05366792157292366, |
|
"rewards/margins": 0.0058191511780023575, |
|
"rewards/rejected": -0.05948706716299057, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9703504043126685, |
|
"grad_norm": 114.5, |
|
"learning_rate": 4.690309299398736e-05, |
|
"log_odds_chosen": 0.15260997414588928, |
|
"log_odds_ratio": -0.6815454363822937, |
|
"logps/chosen": -1.0459394454956055, |
|
"logps/rejected": -1.1531497240066528, |
|
"loss": 23.1175, |
|
"nll_loss": 1.4113500118255615, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.052296966314315796, |
|
"rewards/margins": 0.005360516719520092, |
|
"rewards/rejected": -0.05765749141573906, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0215633423180592, |
|
"grad_norm": 80.0, |
|
"learning_rate": 4.630670030843785e-05, |
|
"log_odds_chosen": 0.2412596195936203, |
|
"log_odds_ratio": -0.6528716087341309, |
|
"logps/chosen": -0.9962456226348877, |
|
"logps/rejected": -1.1605761051177979, |
|
"loss": 21.3336, |
|
"nll_loss": 1.3709510564804077, |
|
"rewards/accuracies": 0.6151315569877625, |
|
"rewards/chosen": -0.049812283366918564, |
|
"rewards/margins": 0.008216519840061665, |
|
"rewards/rejected": -0.058028798550367355, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.0754716981132075, |
|
"grad_norm": 53.0, |
|
"learning_rate": 4.566233223919298e-05, |
|
"log_odds_chosen": 0.5380310416221619, |
|
"log_odds_ratio": -0.5722979307174683, |
|
"logps/chosen": -0.9102069139480591, |
|
"logps/rejected": -1.2439484596252441, |
|
"loss": 21.3346, |
|
"nll_loss": 1.3030532598495483, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.04551034793257713, |
|
"rewards/margins": 0.016687078401446342, |
|
"rewards/rejected": -0.062197424471378326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1293800539083558, |
|
"grad_norm": 80.0, |
|
"learning_rate": 4.4971439682211125e-05, |
|
"log_odds_chosen": 0.5655834674835205, |
|
"log_odds_ratio": -0.5308610200881958, |
|
"logps/chosen": -0.8367247581481934, |
|
"logps/rejected": -1.1727405786514282, |
|
"loss": 19.4547, |
|
"nll_loss": 1.187097191810608, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.041836243122816086, |
|
"rewards/margins": 0.016800785437226295, |
|
"rewards/rejected": -0.05863703042268753, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.1832884097035041, |
|
"grad_norm": 29.875, |
|
"learning_rate": 4.4235578290645194e-05, |
|
"log_odds_chosen": 0.5750025510787964, |
|
"log_odds_ratio": -0.5339788794517517, |
|
"logps/chosen": -0.8711065053939819, |
|
"logps/rejected": -1.2383763790130615, |
|
"loss": 20.0698, |
|
"nll_loss": 1.2257729768753052, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.043555326759815216, |
|
"rewards/margins": 0.018363500013947487, |
|
"rewards/rejected": -0.061918824911117554, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2371967654986522, |
|
"grad_norm": 40.5, |
|
"learning_rate": 4.3456404972045216e-05, |
|
"log_odds_chosen": 0.6106120347976685, |
|
"log_odds_ratio": -0.511372983455658, |
|
"logps/chosen": -0.8859386444091797, |
|
"logps/rejected": -1.271536111831665, |
|
"loss": 19.8049, |
|
"nll_loss": 1.2099454402923584, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.044296931475400925, |
|
"rewards/margins": 0.019279872998595238, |
|
"rewards/rejected": -0.06357680261135101, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.2911051212938005, |
|
"grad_norm": 59.5, |
|
"learning_rate": 4.263567415757058e-05, |
|
"log_odds_chosen": 0.5386055111885071, |
|
"log_odds_ratio": -0.5215080976486206, |
|
"logps/chosen": -0.8329726457595825, |
|
"logps/rejected": -1.1544153690338135, |
|
"loss": 18.5548, |
|
"nll_loss": 1.131080150604248, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.04164863005280495, |
|
"rewards/margins": 0.01607213169336319, |
|
"rewards/rejected": -0.057720769196748734, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3450134770889488, |
|
"grad_norm": 43.75, |
|
"learning_rate": 4.177523385161264e-05, |
|
"log_odds_chosen": 0.5417205691337585, |
|
"log_odds_ratio": -0.536739706993103, |
|
"logps/chosen": -0.8140735626220703, |
|
"logps/rejected": -1.1415430307388306, |
|
"loss": 19.0077, |
|
"nll_loss": 1.1593010425567627, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.040703680366277695, |
|
"rewards/margins": 0.016373474150896072, |
|
"rewards/rejected": -0.05707715079188347, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.398921832884097, |
|
"grad_norm": 39.0, |
|
"learning_rate": 4.087702147072241e-05, |
|
"log_odds_chosen": 0.7008520364761353, |
|
"log_odds_ratio": -0.4697790741920471, |
|
"logps/chosen": -0.8314515352249146, |
|
"logps/rejected": -1.2709705829620361, |
|
"loss": 19.0124, |
|
"nll_loss": 1.161712408065796, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.041572581976652145, |
|
"rewards/margins": 0.02197595313191414, |
|
"rewards/rejected": -0.06354853510856628, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4528301886792452, |
|
"grad_norm": 25.125, |
|
"learning_rate": 3.9943059481212795e-05, |
|
"log_odds_chosen": 0.6904614567756653, |
|
"log_odds_ratio": -0.48464569449424744, |
|
"logps/chosen": -0.8193404078483582, |
|
"logps/rejected": -1.243801474571228, |
|
"loss": 18.8223, |
|
"nll_loss": 1.1491620540618896, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.040967028588056564, |
|
"rewards/margins": 0.021223049610853195, |
|
"rewards/rejected": -0.06219007819890976, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.5067385444743935, |
|
"grad_norm": 32.5, |
|
"learning_rate": 3.89754508452579e-05, |
|
"log_odds_chosen": 0.5718387365341187, |
|
"log_odds_ratio": -0.5105268359184265, |
|
"logps/chosen": -0.8398844599723816, |
|
"logps/rejected": -1.1855361461639404, |
|
"loss": 18.7264, |
|
"nll_loss": 1.1422346830368042, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.04199422523379326, |
|
"rewards/margins": 0.017282582819461823, |
|
"rewards/rejected": -0.05927680805325508, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5606469002695418, |
|
"grad_norm": 46.75, |
|
"learning_rate": 3.797637428574326e-05, |
|
"log_odds_chosen": 0.5373243093490601, |
|
"log_odds_ratio": -0.5297266840934753, |
|
"logps/chosen": -0.8746851682662964, |
|
"logps/rejected": -1.2106478214263916, |
|
"loss": 20.6137, |
|
"nll_loss": 1.2596467733383179, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04373425990343094, |
|
"rewards/margins": 0.01679813303053379, |
|
"rewards/rejected": -0.06053239852190018, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.61455525606469, |
|
"grad_norm": 30.0, |
|
"learning_rate": 3.694807938052887e-05, |
|
"log_odds_chosen": 0.5541953444480896, |
|
"log_odds_ratio": -0.5251818299293518, |
|
"logps/chosen": -0.817675769329071, |
|
"logps/rejected": -1.1604855060577393, |
|
"loss": 18.8161, |
|
"nll_loss": 1.147394061088562, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.04088379442691803, |
|
"rewards/margins": 0.017140481621026993, |
|
"rewards/rejected": -0.05802427604794502, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6684636118598384, |
|
"grad_norm": 34.0, |
|
"learning_rate": 3.589288149717119e-05, |
|
"log_odds_chosen": 0.5490964651107788, |
|
"log_odds_ratio": -0.5439670085906982, |
|
"logps/chosen": -0.797703742980957, |
|
"logps/rejected": -1.1254116296768188, |
|
"loss": 18.6515, |
|
"nll_loss": 1.1367080211639404, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.03988518938422203, |
|
"rewards/margins": 0.016385387629270554, |
|
"rewards/rejected": -0.05627058073878288, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.7223719676549867, |
|
"grad_norm": 25.0, |
|
"learning_rate": 3.481315657950931e-05, |
|
"log_odds_chosen": 0.6332697868347168, |
|
"log_odds_ratio": -0.4977358281612396, |
|
"logps/chosen": -0.825598418712616, |
|
"logps/rejected": -1.204270601272583, |
|
"loss": 18.9003, |
|
"nll_loss": 1.1534397602081299, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.04127992317080498, |
|
"rewards/margins": 0.01893361285328865, |
|
"rewards/rejected": -0.06021353602409363, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7762803234501348, |
|
"grad_norm": 24.25, |
|
"learning_rate": 3.3711335797853977e-05, |
|
"log_odds_chosen": 0.7610551714897156, |
|
"log_odds_ratio": -0.4777259826660156, |
|
"logps/chosen": -0.803636908531189, |
|
"logps/rejected": -1.2949776649475098, |
|
"loss": 18.4323, |
|
"nll_loss": 1.1253396272659302, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.04018184915184975, |
|
"rewards/margins": 0.024567028507590294, |
|
"rewards/rejected": -0.06474888324737549, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.830188679245283, |
|
"grad_norm": 39.0, |
|
"learning_rate": 3.2589900074825696e-05, |
|
"log_odds_chosen": 0.6568578481674194, |
|
"log_odds_ratio": -0.5057220458984375, |
|
"logps/chosen": -0.8026474118232727, |
|
"logps/rejected": -1.1945730447769165, |
|
"loss": 19.1293, |
|
"nll_loss": 1.1678292751312256, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.040132369846105576, |
|
"rewards/margins": 0.0195962805300951, |
|
"rewards/rejected": -0.05972864478826523, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8840970350404311, |
|
"grad_norm": 24.375, |
|
"learning_rate": 3.1451374499167586e-05, |
|
"log_odds_chosen": 0.5363537073135376, |
|
"log_odds_ratio": -0.531457781791687, |
|
"logps/chosen": -0.8319064378738403, |
|
"logps/rejected": -1.1418461799621582, |
|
"loss": 18.4204, |
|
"nll_loss": 1.1223351955413818, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.041595328599214554, |
|
"rewards/margins": 0.015496985986828804, |
|
"rewards/rejected": -0.05709231644868851, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.9380053908355794, |
|
"grad_norm": 26.125, |
|
"learning_rate": 3.029832264011133e-05, |
|
"log_odds_chosen": 0.5449716448783875, |
|
"log_odds_ratio": -0.5418750047683716, |
|
"logps/chosen": -0.8625362515449524, |
|
"logps/rejected": -1.210113525390625, |
|
"loss": 19.8141, |
|
"nll_loss": 1.2094202041625977, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04312681779265404, |
|
"rewards/margins": 0.017378859221935272, |
|
"rewards/rejected": -0.06050567701458931, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9919137466307277, |
|
"grad_norm": 24.5, |
|
"learning_rate": 2.91333407750982e-05, |
|
"log_odds_chosen": 0.6130974292755127, |
|
"log_odds_ratio": -0.5151475667953491, |
|
"logps/chosen": -0.8204232454299927, |
|
"logps/rejected": -1.1893672943115234, |
|
"loss": 18.8592, |
|
"nll_loss": 1.1504535675048828, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.041021160781383514, |
|
"rewards/margins": 0.018447209149599075, |
|
"rewards/rejected": -0.05946836993098259, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.0431266846361185, |
|
"grad_norm": 30.875, |
|
"learning_rate": 2.7959052043852464e-05, |
|
"log_odds_chosen": 1.1373300552368164, |
|
"log_odds_ratio": -0.3630596697330475, |
|
"logps/chosen": -0.6143659353256226, |
|
"logps/rejected": -1.2531551122665405, |
|
"loss": 13.6947, |
|
"nll_loss": 0.8776864409446716, |
|
"rewards/accuracies": 0.8914473652839661, |
|
"rewards/chosen": -0.03071829490363598, |
|
"rewards/margins": 0.03193946182727814, |
|
"rewards/rejected": -0.06265775859355927, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0970350404312668, |
|
"grad_norm": 29.875, |
|
"learning_rate": 2.677810054197009e-05, |
|
"log_odds_chosen": 1.4366052150726318, |
|
"log_odds_ratio": -0.29583844542503357, |
|
"logps/chosen": -0.5867640972137451, |
|
"logps/rejected": -1.3974506855010986, |
|
"loss": 14.6992, |
|
"nll_loss": 0.8981709480285645, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -0.029338205233216286, |
|
"rewards/margins": 0.04053433611989021, |
|
"rewards/rejected": -0.06987253576517105, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.150943396226415, |
|
"grad_norm": 27.125, |
|
"learning_rate": 2.559314536732212e-05, |
|
"log_odds_chosen": 1.3597910404205322, |
|
"log_odds_ratio": -0.3113149404525757, |
|
"logps/chosen": -0.5893815159797668, |
|
"logps/rejected": -1.344481110572815, |
|
"loss": 13.7478, |
|
"nll_loss": 0.8379224538803101, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.029469076544046402, |
|
"rewards/margins": 0.037754982709884644, |
|
"rewards/rejected": -0.06722406297922134, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2048517520215634, |
|
"grad_norm": 36.0, |
|
"learning_rate": 2.4406854632677883e-05, |
|
"log_odds_chosen": 1.4072751998901367, |
|
"log_odds_ratio": -0.3055153489112854, |
|
"logps/chosen": -0.568659782409668, |
|
"logps/rejected": -1.3686797618865967, |
|
"loss": 13.52, |
|
"nll_loss": 0.8238337635993958, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.02843298949301243, |
|
"rewards/margins": 0.04000100493431091, |
|
"rewards/rejected": -0.0684339851140976, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.2587601078167117, |
|
"grad_norm": 32.5, |
|
"learning_rate": 2.3221899458029918e-05, |
|
"log_odds_chosen": 1.3871681690216064, |
|
"log_odds_ratio": -0.31390881538391113, |
|
"logps/chosen": -0.5992950201034546, |
|
"logps/rejected": -1.3834056854248047, |
|
"loss": 14.0083, |
|
"nll_loss": 0.8541052937507629, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02996474876999855, |
|
"rewards/margins": 0.03920553997159004, |
|
"rewards/rejected": -0.069170281291008, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.31266846361186, |
|
"grad_norm": 29.875, |
|
"learning_rate": 2.204094795614755e-05, |
|
"log_odds_chosen": 1.4916521310806274, |
|
"log_odds_ratio": -0.2852187752723694, |
|
"logps/chosen": -0.5344905257225037, |
|
"logps/rejected": -1.3550375699996948, |
|
"loss": 13.3234, |
|
"nll_loss": 0.8122995495796204, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.026724528521299362, |
|
"rewards/margins": 0.04102735221385956, |
|
"rewards/rejected": -0.06775187700986862, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.3665768194070083, |
|
"grad_norm": 25.25, |
|
"learning_rate": 2.0866659224901812e-05, |
|
"log_odds_chosen": 1.407762885093689, |
|
"log_odds_ratio": -0.3070213496685028, |
|
"logps/chosen": -0.5739747881889343, |
|
"logps/rejected": -1.3859971761703491, |
|
"loss": 13.2689, |
|
"nll_loss": 0.8083074688911438, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.028698738664388657, |
|
"rewards/margins": 0.04060111939907074, |
|
"rewards/rejected": -0.0692998617887497, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.420485175202156, |
|
"grad_norm": 27.625, |
|
"learning_rate": 1.970167735988867e-05, |
|
"log_odds_chosen": 1.411454439163208, |
|
"log_odds_ratio": -0.30091729760169983, |
|
"logps/chosen": -0.5828540325164795, |
|
"logps/rejected": -1.3717459440231323, |
|
"loss": 13.5018, |
|
"nll_loss": 0.8228418231010437, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.029142703860998154, |
|
"rewards/margins": 0.039444588124752045, |
|
"rewards/rejected": -0.0685872882604599, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.4743935309973044, |
|
"grad_norm": 29.75, |
|
"learning_rate": 1.854862550083241e-05, |
|
"log_odds_chosen": 1.4036983251571655, |
|
"log_odds_ratio": -0.3000126779079437, |
|
"logps/chosen": -0.5541399717330933, |
|
"logps/rejected": -1.3349835872650146, |
|
"loss": 13.6971, |
|
"nll_loss": 0.834954559803009, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.027707001194357872, |
|
"rewards/margins": 0.039042189717292786, |
|
"rewards/rejected": -0.06674918532371521, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.5283018867924527, |
|
"grad_norm": 27.375, |
|
"learning_rate": 1.7410099925174307e-05, |
|
"log_odds_chosen": 1.3673919439315796, |
|
"log_odds_ratio": -0.32730910181999207, |
|
"logps/chosen": -0.5749243497848511, |
|
"logps/rejected": -1.3314166069030762, |
|
"loss": 13.4834, |
|
"nll_loss": 0.8205031156539917, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.028746221214532852, |
|
"rewards/margins": 0.037824612110853195, |
|
"rewards/rejected": -0.06657083332538605, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.582210242587601, |
|
"grad_norm": 25.375, |
|
"learning_rate": 1.628866420214603e-05, |
|
"log_odds_chosen": 1.4173800945281982, |
|
"log_odds_ratio": -0.30779603123664856, |
|
"logps/chosen": -0.5958832502365112, |
|
"logps/rejected": -1.4191477298736572, |
|
"loss": 14.0265, |
|
"nll_loss": 0.8556331396102905, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.02979416213929653, |
|
"rewards/margins": 0.04116322472691536, |
|
"rewards/rejected": -0.07095737755298615, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.6361185983827493, |
|
"grad_norm": 25.125, |
|
"learning_rate": 1.5186843420490698e-05, |
|
"log_odds_chosen": 1.4067838191986084, |
|
"log_odds_ratio": -0.3025580644607544, |
|
"logps/chosen": -0.5486974716186523, |
|
"logps/rejected": -1.3047682046890259, |
|
"loss": 14.1827, |
|
"nll_loss": 0.864966869354248, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.027434874325990677, |
|
"rewards/margins": 0.037803538143634796, |
|
"rewards/rejected": -0.06523840129375458, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.6900269541778976, |
|
"grad_norm": 27.5, |
|
"learning_rate": 1.4107118502828814e-05, |
|
"log_odds_chosen": 1.5402934551239014, |
|
"log_odds_ratio": -0.2862972617149353, |
|
"logps/chosen": -0.5321887731552124, |
|
"logps/rejected": -1.3919765949249268, |
|
"loss": 13.1153, |
|
"nll_loss": 0.7994569540023804, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.02660943940281868, |
|
"rewards/margins": 0.042989395558834076, |
|
"rewards/rejected": -0.06959883868694305, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.743935309973046, |
|
"grad_norm": 29.375, |
|
"learning_rate": 1.3051920619471145e-05, |
|
"log_odds_chosen": 1.4740593433380127, |
|
"log_odds_ratio": -0.30231142044067383, |
|
"logps/chosen": -0.5255938768386841, |
|
"logps/rejected": -1.3372348546981812, |
|
"loss": 13.7048, |
|
"nll_loss": 0.8354872465133667, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.026279697194695473, |
|
"rewards/margins": 0.04058205336332321, |
|
"rewards/rejected": -0.06686174869537354, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.797843665768194, |
|
"grad_norm": 21.25, |
|
"learning_rate": 1.2023625714256743e-05, |
|
"log_odds_chosen": 1.3471606969833374, |
|
"log_odds_ratio": -0.3214406967163086, |
|
"logps/chosen": -0.5802930593490601, |
|
"logps/rejected": -1.3386670351028442, |
|
"loss": 13.8703, |
|
"nll_loss": 0.8453942537307739, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.02901465632021427, |
|
"rewards/margins": 0.03791869431734085, |
|
"rewards/rejected": -0.06693334877490997, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.8517520215633425, |
|
"grad_norm": 25.25, |
|
"learning_rate": 1.1024549154742098e-05, |
|
"log_odds_chosen": 1.3048094511032104, |
|
"log_odds_ratio": -0.3205285370349884, |
|
"logps/chosen": -0.5728147625923157, |
|
"logps/rejected": -1.2860543727874756, |
|
"loss": 13.4713, |
|
"nll_loss": 0.8200335502624512, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.028640735894441605, |
|
"rewards/margins": 0.035661980509757996, |
|
"rewards/rejected": -0.0643027201294899, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.9056603773584904, |
|
"grad_norm": 25.0, |
|
"learning_rate": 1.005694051878721e-05, |
|
"log_odds_chosen": 1.4204931259155273, |
|
"log_odds_ratio": -0.316345751285553, |
|
"logps/chosen": -0.5570142865180969, |
|
"logps/rejected": -1.3344264030456543, |
|
"loss": 13.5552, |
|
"nll_loss": 0.8255348205566406, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.027850713580846786, |
|
"rewards/margins": 0.03887060284614563, |
|
"rewards/rejected": -0.06672131270170212, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.9595687331536387, |
|
"grad_norm": 24.25, |
|
"learning_rate": 9.1229785292776e-06, |
|
"log_odds_chosen": 1.5122112035751343, |
|
"log_odds_ratio": -0.2880076766014099, |
|
"logps/chosen": -0.5204971432685852, |
|
"logps/rejected": -1.3571436405181885, |
|
"loss": 12.9568, |
|
"nll_loss": 0.789382815361023, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -0.02602485381066799, |
|
"rewards/margins": 0.041832335293293, |
|
"rewards/rejected": -0.06785719096660614, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.01078167115903, |
|
"grad_norm": 24.25, |
|
"learning_rate": 8.22476614838737e-06, |
|
"log_odds_chosen": 1.5424445867538452, |
|
"log_odds_ratio": -0.28235647082328796, |
|
"logps/chosen": -0.5312695503234863, |
|
"logps/rejected": -1.3865729570388794, |
|
"loss": 12.1174, |
|
"nll_loss": 0.7770485877990723, |
|
"rewards/accuracies": 0.9243420958518982, |
|
"rewards/chosen": -0.026563478633761406, |
|
"rewards/margins": 0.04276517778635025, |
|
"rewards/rejected": -0.06932865083217621, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.0646900269541777, |
|
"grad_norm": 19.625, |
|
"learning_rate": 7.364325842429417e-06, |
|
"log_odds_chosen": 1.9977210760116577, |
|
"log_odds_ratio": -0.2042604237794876, |
|
"logps/chosen": -0.4289301335811615, |
|
"logps/rejected": -1.457916498184204, |
|
"loss": 10.7252, |
|
"nll_loss": 0.6526534557342529, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.021446505561470985, |
|
"rewards/margins": 0.051449306309223175, |
|
"rewards/rejected": -0.07289581000804901, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.118598382749326, |
|
"grad_norm": 32.0, |
|
"learning_rate": 6.543595027954785e-06, |
|
"log_odds_chosen": 2.0810437202453613, |
|
"log_odds_ratio": -0.21267971396446228, |
|
"logps/chosen": -0.4348023533821106, |
|
"logps/rejected": -1.5428168773651123, |
|
"loss": 10.659, |
|
"nll_loss": 0.6491010785102844, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.02174011990427971, |
|
"rewards/margins": 0.05540072172880173, |
|
"rewards/rejected": -0.07714084535837173, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.1725067385444743, |
|
"grad_norm": 23.375, |
|
"learning_rate": 5.764421709354811e-06, |
|
"log_odds_chosen": 2.2832117080688477, |
|
"log_odds_ratio": -0.17793798446655273, |
|
"logps/chosen": -0.4084325432777405, |
|
"logps/rejected": -1.6333366632461548, |
|
"loss": 10.4443, |
|
"nll_loss": 0.6368886232376099, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020421627908945084, |
|
"rewards/margins": 0.061245210468769073, |
|
"rewards/rejected": -0.08166682720184326, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 3.2264150943396226, |
|
"grad_norm": 23.25, |
|
"learning_rate": 5.02856031778888e-06, |
|
"log_odds_chosen": 2.20396089553833, |
|
"log_odds_ratio": -0.20375783741474152, |
|
"logps/chosen": -0.4364936351776123, |
|
"logps/rejected": -1.5872256755828857, |
|
"loss": 10.7344, |
|
"nll_loss": 0.6540727019309998, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.021824682131409645, |
|
"rewards/margins": 0.05753660202026367, |
|
"rewards/rejected": -0.07936128228902817, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.280323450134771, |
|
"grad_norm": 21.625, |
|
"learning_rate": 4.337667760807018e-06, |
|
"log_odds_chosen": 2.0813040733337402, |
|
"log_odds_ratio": -0.21252334117889404, |
|
"logps/chosen": -0.4447731375694275, |
|
"logps/rejected": -1.5767261981964111, |
|
"loss": 10.8909, |
|
"nll_loss": 0.6635223031044006, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -0.022238660603761673, |
|
"rewards/margins": 0.05659765005111694, |
|
"rewards/rejected": -0.07883630692958832, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 3.334231805929919, |
|
"grad_norm": 26.5, |
|
"learning_rate": 3.69329969156216e-06, |
|
"log_odds_chosen": 2.2381584644317627, |
|
"log_odds_ratio": -0.17899712920188904, |
|
"logps/chosen": -0.4195394515991211, |
|
"logps/rejected": -1.6279566287994385, |
|
"loss": 10.5163, |
|
"nll_loss": 0.6414855122566223, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.020976971834897995, |
|
"rewards/margins": 0.06042086333036423, |
|
"rewards/rejected": -0.08139783143997192, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.3881401617250675, |
|
"grad_norm": 24.875, |
|
"learning_rate": 3.096907006012639e-06, |
|
"log_odds_chosen": 2.148031711578369, |
|
"log_odds_ratio": -0.2030152827501297, |
|
"logps/chosen": -0.43345871567726135, |
|
"logps/rejected": -1.6023099422454834, |
|
"loss": 10.7448, |
|
"nll_loss": 0.6548030376434326, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.021672936156392097, |
|
"rewards/margins": 0.05844256281852722, |
|
"rewards/rejected": -0.08011549711227417, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 3.442048517520216, |
|
"grad_norm": 23.25, |
|
"learning_rate": 2.549832576001965e-06, |
|
"log_odds_chosen": 2.323772668838501, |
|
"log_odds_ratio": -0.17535866796970367, |
|
"logps/chosen": -0.4194834232330322, |
|
"logps/rejected": -1.6763973236083984, |
|
"loss": 10.4299, |
|
"nll_loss": 0.6364527940750122, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.0209741722792387, |
|
"rewards/margins": 0.06284569203853607, |
|
"rewards/rejected": -0.08381986618041992, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.4959568733153636, |
|
"grad_norm": 24.625, |
|
"learning_rate": 2.0533082255718594e-06, |
|
"log_odds_chosen": 2.2250733375549316, |
|
"log_odds_ratio": -0.1992703080177307, |
|
"logps/chosen": -0.39943161606788635, |
|
"logps/rejected": -1.571520447731018, |
|
"loss": 10.0609, |
|
"nll_loss": 0.6121501922607422, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -0.019971583038568497, |
|
"rewards/margins": 0.05860443040728569, |
|
"rewards/rejected": -0.07857601344585419, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.5498652291105124, |
|
"grad_norm": 27.875, |
|
"learning_rate": 1.6084519573168727e-06, |
|
"log_odds_chosen": 2.1328158378601074, |
|
"log_odds_ratio": -0.21596260368824005, |
|
"logps/chosen": -0.43325909972190857, |
|
"logps/rejected": -1.574236273765564, |
|
"loss": 10.7006, |
|
"nll_loss": 0.6516640782356262, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02166295424103737, |
|
"rewards/margins": 0.05704886466264725, |
|
"rewards/rejected": -0.07871182262897491, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.6037735849056602, |
|
"grad_norm": 25.25, |
|
"learning_rate": 1.2162654350258402e-06, |
|
"log_odds_chosen": 2.322880744934082, |
|
"log_odds_ratio": -0.18639399111270905, |
|
"logps/chosen": -0.4327073097229004, |
|
"logps/rejected": -1.7132127285003662, |
|
"loss": 10.383, |
|
"nll_loss": 0.6331822276115417, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -0.02163536846637726, |
|
"rewards/margins": 0.06402527540922165, |
|
"rewards/rejected": -0.0856606513261795, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 3.6576819407008085, |
|
"grad_norm": 25.625, |
|
"learning_rate": 8.776317282785163e-07, |
|
"log_odds_chosen": 2.281785249710083, |
|
"log_odds_ratio": -0.18652771413326263, |
|
"logps/chosen": -0.42065826058387756, |
|
"logps/rejected": -1.6458238363265991, |
|
"loss": 10.3009, |
|
"nll_loss": 0.6277027726173401, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -0.02103291265666485, |
|
"rewards/margins": 0.06125827878713608, |
|
"rewards/rejected": -0.08229119330644608, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.711590296495957, |
|
"grad_norm": 21.875, |
|
"learning_rate": 5.933133240756117e-07, |
|
"log_odds_chosen": 2.2684173583984375, |
|
"log_odds_ratio": -0.18595468997955322, |
|
"logps/chosen": -0.43632110953330994, |
|
"logps/rejected": -1.6807502508163452, |
|
"loss": 10.4706, |
|
"nll_loss": 0.638587474822998, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -0.021816056221723557, |
|
"rewards/margins": 0.062221460044384, |
|
"rewards/rejected": -0.08403751999139786, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.765498652291105, |
|
"grad_norm": 21.625, |
|
"learning_rate": 3.6395040997952714e-07, |
|
"log_odds_chosen": 2.0946693420410156, |
|
"log_odds_ratio": -0.20106664299964905, |
|
"logps/chosen": -0.4340636730194092, |
|
"logps/rejected": -1.574231505393982, |
|
"loss": 10.9029, |
|
"nll_loss": 0.6646233797073364, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -0.02170318365097046, |
|
"rewards/margins": 0.057008396834135056, |
|
"rewards/rejected": -0.07871158421039581, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.8194070080862534, |
|
"grad_norm": 25.75, |
|
"learning_rate": 1.9005943263146198e-07, |
|
"log_odds_chosen": 2.3714165687561035, |
|
"log_odds_ratio": -0.1724739372730255, |
|
"logps/chosen": -0.3921690583229065, |
|
"logps/rejected": -1.6754087209701538, |
|
"loss": 10.2651, |
|
"nll_loss": 0.6260087490081787, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.019608452916145325, |
|
"rewards/margins": 0.06416197866201401, |
|
"rewards/rejected": -0.08377043902873993, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 3.8733153638814017, |
|
"grad_norm": 29.0, |
|
"learning_rate": 7.203193489064009e-08, |
|
"log_odds_chosen": 2.2240853309631348, |
|
"log_odds_ratio": -0.18985909223556519, |
|
"logps/chosen": -0.4116806089878082, |
|
"logps/rejected": -1.6198737621307373, |
|
"loss": 10.3175, |
|
"nll_loss": 0.6286627054214478, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -0.020584028214216232, |
|
"rewards/margins": 0.060409657657146454, |
|
"rewards/rejected": -0.08099369704723358, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.92722371967655, |
|
"grad_norm": 22.75, |
|
"learning_rate": 1.0133674214043987e-08, |
|
"log_odds_chosen": 2.4190001487731934, |
|
"log_odds_ratio": -0.16849036514759064, |
|
"logps/chosen": -0.387323796749115, |
|
"logps/rejected": -1.692837119102478, |
|
"loss": 9.7812, |
|
"nll_loss": 0.5957530736923218, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.0193661917001009, |
|
"rewards/margins": 0.06527567654848099, |
|
"rewards/rejected": -0.08464185893535614, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 3.9595687331536387, |
|
"step": 368, |
|
"total_flos": 0.0, |
|
"train_loss": 25.758197416429933, |
|
"train_runtime": 4430.8526, |
|
"train_samples_per_second": 5.359, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 368, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|