|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9876543209876543, |
|
"eval_steps": 100, |
|
"global_step": 363, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0411522633744856, |
|
"grad_norm": 71.36946521074697, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7249937057495117, |
|
"logits/rejected": -2.7219715118408203, |
|
"logps/chosen": -289.096435546875, |
|
"logps/rejected": -212.59097290039062, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": 0.027115171775221825, |
|
"rewards/margins": 0.011037254706025124, |
|
"rewards/rejected": 0.01607791893184185, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0823045267489712, |
|
"grad_norm": 68.75739482144014, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.673173666000366, |
|
"logits/rejected": -2.6852009296417236, |
|
"logps/chosen": -258.5091857910156, |
|
"logps/rejected": -228.7921905517578, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6803622841835022, |
|
"rewards/margins": 0.2561650276184082, |
|
"rewards/rejected": 0.424197256565094, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12345679012345678, |
|
"grad_norm": 43.9449007096878, |
|
"learning_rate": 9.995050530093366e-07, |
|
"logits/chosen": -2.5606446266174316, |
|
"logits/rejected": -2.555354595184326, |
|
"logps/chosen": -258.5283508300781, |
|
"logps/rejected": -217.637939453125, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.7179749011993408, |
|
"rewards/margins": 0.7173956036567688, |
|
"rewards/rejected": 1.0005793571472168, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1646090534979424, |
|
"grad_norm": 49.90030149803026, |
|
"learning_rate": 9.980211919274406e-07, |
|
"logits/chosen": -2.334833860397339, |
|
"logits/rejected": -2.3182854652404785, |
|
"logps/chosen": -234.5125732421875, |
|
"logps/rejected": -194.8851318359375, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.7243343591690063, |
|
"rewards/margins": 0.8316472172737122, |
|
"rewards/rejected": 0.8926870226860046, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.205761316872428, |
|
"grad_norm": 43.554349506398026, |
|
"learning_rate": 9.955513544846204e-07, |
|
"logits/chosen": -2.12056303024292, |
|
"logits/rejected": -2.095937728881836, |
|
"logps/chosen": -284.00323486328125, |
|
"logps/rejected": -210.3358154296875, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.1191883087158203, |
|
"rewards/margins": 1.4548943042755127, |
|
"rewards/rejected": 0.6642940044403076, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 45.05915140113881, |
|
"learning_rate": 9.921004304353147e-07, |
|
"logits/chosen": -2.04213547706604, |
|
"logits/rejected": -2.0172839164733887, |
|
"logps/chosen": -232.2016143798828, |
|
"logps/rejected": -217.5736846923828, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.9440408945083618, |
|
"rewards/margins": 1.5185799598693848, |
|
"rewards/rejected": 0.4254608750343323, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2880658436213992, |
|
"grad_norm": 40.00728614202134, |
|
"learning_rate": 9.876752518774164e-07, |
|
"logits/chosen": -2.0041847229003906, |
|
"logits/rejected": -1.9888496398925781, |
|
"logps/chosen": -255.5012969970703, |
|
"logps/rejected": -238.2528839111328, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.3912312984466553, |
|
"rewards/margins": 1.0289623737335205, |
|
"rewards/rejected": 0.36226886510849, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3292181069958848, |
|
"grad_norm": 42.181862044805364, |
|
"learning_rate": 9.822845797261675e-07, |
|
"logits/chosen": -2.024127244949341, |
|
"logits/rejected": -2.020592451095581, |
|
"logps/chosen": -249.13394165039062, |
|
"logps/rejected": -199.90975952148438, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.3734517097473145, |
|
"rewards/margins": 0.8223851919174194, |
|
"rewards/rejected": 0.5510665774345398, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 41.15847921708812, |
|
"learning_rate": 9.759390863694029e-07, |
|
"logits/chosen": -2.0532474517822266, |
|
"logits/rejected": -1.9978084564208984, |
|
"logps/chosen": -256.01446533203125, |
|
"logps/rejected": -206.8267059326172, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.589166283607483, |
|
"rewards/margins": 1.2907274961471558, |
|
"rewards/rejected": 0.2984387278556824, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.411522633744856, |
|
"grad_norm": 42.63680924826028, |
|
"learning_rate": 9.68651334538488e-07, |
|
"logits/chosen": -2.034133195877075, |
|
"logits/rejected": -2.0025076866149902, |
|
"logps/chosen": -259.46942138671875, |
|
"logps/rejected": -229.2208251953125, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.4298592805862427, |
|
"rewards/margins": 1.076907992362976, |
|
"rewards/rejected": 0.35295119881629944, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.45267489711934156, |
|
"grad_norm": 36.48668334468458, |
|
"learning_rate": 9.604357524367722e-07, |
|
"logits/chosen": -2.0932247638702393, |
|
"logits/rejected": -2.0437166690826416, |
|
"logps/chosen": -281.03289794921875, |
|
"logps/rejected": -227.46109008789062, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.40286123752594, |
|
"rewards/margins": 1.028884768486023, |
|
"rewards/rejected": 0.3739764094352722, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 35.36330599361053, |
|
"learning_rate": 9.513086051748067e-07, |
|
"logits/chosen": -2.1159732341766357, |
|
"logits/rejected": -2.078249931335449, |
|
"logps/chosen": -265.8070373535156, |
|
"logps/rejected": -214.79428100585938, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.7003364562988281, |
|
"rewards/margins": 1.7438255548477173, |
|
"rewards/rejected": -0.0434890016913414, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5349794238683128, |
|
"grad_norm": 31.36432376366485, |
|
"learning_rate": 9.412879625688742e-07, |
|
"logits/chosen": -2.183833599090576, |
|
"logits/rejected": -2.1385440826416016, |
|
"logps/chosen": -270.89263916015625, |
|
"logps/rejected": -203.67922973632812, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.6083428859710693, |
|
"rewards/margins": 1.5229980945587158, |
|
"rewards/rejected": 0.08534489572048187, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5761316872427984, |
|
"grad_norm": 35.68216693219843, |
|
"learning_rate": 9.303936633665839e-07, |
|
"logits/chosen": -2.3082363605499268, |
|
"logits/rejected": -2.2824604511260986, |
|
"logps/chosen": -255.9834747314453, |
|
"logps/rejected": -194.7764892578125, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.1171067953109741, |
|
"rewards/margins": 1.4306641817092896, |
|
"rewards/rejected": -0.3135572373867035, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 38.51565362073314, |
|
"learning_rate": 9.186472759703578e-07, |
|
"logits/chosen": -2.3410449028015137, |
|
"logits/rejected": -2.3213045597076416, |
|
"logps/chosen": -275.8757019042969, |
|
"logps/rejected": -213.70693969726562, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.7660292387008667, |
|
"rewards/margins": 1.4290556907653809, |
|
"rewards/rejected": -0.6630264520645142, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6584362139917695, |
|
"grad_norm": 30.908945588893605, |
|
"learning_rate": 9.060720557365682e-07, |
|
"logits/chosen": -2.3798623085021973, |
|
"logits/rejected": -2.378147602081299, |
|
"logps/chosen": -277.94622802734375, |
|
"logps/rejected": -228.6498565673828, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.9844567179679871, |
|
"rewards/margins": 1.5679962635040283, |
|
"rewards/rejected": -0.5835394859313965, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6995884773662552, |
|
"grad_norm": 36.12667290276971, |
|
"learning_rate": 8.926928989348611e-07, |
|
"logits/chosen": -2.438974618911743, |
|
"logits/rejected": -2.4293782711029053, |
|
"logps/chosen": -264.4499816894531, |
|
"logps/rejected": -233.60958862304688, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.7334972023963928, |
|
"rewards/margins": 1.825126051902771, |
|
"rewards/rejected": -1.091629147529602, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 31.43710612772888, |
|
"learning_rate": 8.785362934588233e-07, |
|
"logits/chosen": -2.4581363201141357, |
|
"logits/rejected": -2.4250473976135254, |
|
"logps/chosen": -272.42498779296875, |
|
"logps/rejected": -206.20614624023438, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.6099742650985718, |
|
"rewards/margins": 1.7208999395370483, |
|
"rewards/rejected": -0.11092579364776611, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7818930041152263, |
|
"grad_norm": 35.922757319188804, |
|
"learning_rate": 8.636302663855681e-07, |
|
"logits/chosen": -2.368760585784912, |
|
"logits/rejected": -2.3825132846832275, |
|
"logps/chosen": -247.90396118164062, |
|
"logps/rejected": -212.88232421875, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.2823846340179443, |
|
"rewards/margins": 1.8866965770721436, |
|
"rewards/rejected": -0.6043121814727783, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.823045267489712, |
|
"grad_norm": 27.664598721354345, |
|
"learning_rate": 8.480043284880664e-07, |
|
"logits/chosen": -2.346686601638794, |
|
"logits/rejected": -2.317147970199585, |
|
"logps/chosen": -269.21417236328125, |
|
"logps/rejected": -233.6097412109375, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.5551630258560181, |
|
"rewards/margins": 2.1603965759277344, |
|
"rewards/rejected": -1.6052335500717163, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.823045267489712, |
|
"eval_logits/chosen": -2.2803401947021484, |
|
"eval_logits/rejected": -2.256579875946045, |
|
"eval_logps/chosen": -257.0998229980469, |
|
"eval_logps/rejected": -231.74539184570312, |
|
"eval_loss": 0.4985389709472656, |
|
"eval_rewards/accuracies": 0.7939814925193787, |
|
"eval_rewards/chosen": 0.17793893814086914, |
|
"eval_rewards/margins": 1.7997103929519653, |
|
"eval_rewards/rejected": -1.6217713356018066, |
|
"eval_runtime": 230.2785, |
|
"eval_samples_per_second": 15.008, |
|
"eval_steps_per_second": 0.234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8641975308641975, |
|
"grad_norm": 33.43388986335041, |
|
"learning_rate": 8.316894158100727e-07, |
|
"logits/chosen": -2.238370895385742, |
|
"logits/rejected": -2.205950975418091, |
|
"logps/chosen": -270.1739807128906, |
|
"logps/rejected": -237.7426300048828, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.2168927639722824, |
|
"rewards/margins": 2.0453083515167236, |
|
"rewards/rejected": -1.8284155130386353, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9053497942386831, |
|
"grad_norm": 41.06626958250484, |
|
"learning_rate": 8.147178284193184e-07, |
|
"logits/chosen": -1.9968522787094116, |
|
"logits/rejected": -1.9477859735488892, |
|
"logps/chosen": -271.5672912597656, |
|
"logps/rejected": -244.5254364013672, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.18483969569206238, |
|
"rewards/margins": 2.090688467025757, |
|
"rewards/rejected": -2.2755284309387207, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9465020576131687, |
|
"grad_norm": 33.45568853055463, |
|
"learning_rate": 7.971231664602271e-07, |
|
"logits/chosen": -1.8657859563827515, |
|
"logits/rejected": -1.7577025890350342, |
|
"logps/chosen": -255.1681365966797, |
|
"logps/rejected": -235.93856811523438, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.08642071485519409, |
|
"rewards/margins": 2.032249689102173, |
|
"rewards/rejected": -1.9458287954330444, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 31.32834367464404, |
|
"learning_rate": 7.789402636327525e-07, |
|
"logits/chosen": -1.7241904735565186, |
|
"logits/rejected": -1.6637340784072876, |
|
"logps/chosen": -269.67364501953125, |
|
"logps/rejected": -239.79965209960938, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.0743473693728447, |
|
"rewards/margins": 2.101712942123413, |
|
"rewards/rejected": -2.0273656845092773, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.02880658436214, |
|
"grad_norm": 18.313357022047114, |
|
"learning_rate": 7.602051182290381e-07, |
|
"logits/chosen": -1.5669622421264648, |
|
"logits/rejected": -1.4961906671524048, |
|
"logps/chosen": -270.39056396484375, |
|
"logps/rejected": -223.95706176757812, |
|
"loss": 0.321, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.7599193453788757, |
|
"rewards/margins": 2.714322805404663, |
|
"rewards/rejected": -1.9544035196304321, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0699588477366255, |
|
"grad_norm": 18.77066721006591, |
|
"learning_rate": 7.409548218644331e-07, |
|
"logits/chosen": -1.4371721744537354, |
|
"logits/rejected": -1.3102617263793945, |
|
"logps/chosen": -257.923095703125, |
|
"logps/rejected": -222.04959106445312, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.8628284931182861, |
|
"rewards/margins": 3.623333692550659, |
|
"rewards/rejected": -1.7605053186416626, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 14.482571733068447, |
|
"learning_rate": 7.212274860439576e-07, |
|
"logits/chosen": -1.4088728427886963, |
|
"logits/rejected": -1.3359241485595703, |
|
"logps/chosen": -252.8369140625, |
|
"logps/rejected": -247.0041046142578, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.8369052410125732, |
|
"rewards/margins": 3.677825450897217, |
|
"rewards/rejected": -1.840920090675354, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.1522633744855968, |
|
"grad_norm": 19.475514209975124, |
|
"learning_rate": 7.010621667096041e-07, |
|
"logits/chosen": -1.5916813611984253, |
|
"logits/rejected": -1.479448676109314, |
|
"logps/chosen": -254.99136352539062, |
|
"logps/rejected": -218.8384246826172, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6720364093780518, |
|
"rewards/margins": 3.2526676654815674, |
|
"rewards/rejected": -1.5806310176849365, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1934156378600824, |
|
"grad_norm": 24.87312122824749, |
|
"learning_rate": 6.804987869178539e-07, |
|
"logits/chosen": -1.7563555240631104, |
|
"logits/rejected": -1.6887686252593994, |
|
"logps/chosen": -241.65676879882812, |
|
"logps/rejected": -225.277099609375, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.7571462392807007, |
|
"rewards/margins": 3.4047298431396484, |
|
"rewards/rejected": -1.6475833654403687, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"grad_norm": 22.485449779074028, |
|
"learning_rate": 6.5957805780049e-07, |
|
"logits/chosen": -1.889991044998169, |
|
"logits/rejected": -1.8203752040863037, |
|
"logps/chosen": -250.935302734375, |
|
"logps/rejected": -223.52401733398438, |
|
"loss": 0.2196, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 2.23984432220459, |
|
"rewards/margins": 3.5551295280456543, |
|
"rewards/rejected": -1.3152849674224854, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2757201646090535, |
|
"grad_norm": 20.0119744226792, |
|
"learning_rate": 6.383413979651893e-07, |
|
"logits/chosen": -1.9477765560150146, |
|
"logits/rejected": -1.8932664394378662, |
|
"logps/chosen": -242.27685546875, |
|
"logps/rejected": -231.18991088867188, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.800172209739685, |
|
"rewards/margins": 3.45011568069458, |
|
"rewards/rejected": -1.6499433517456055, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.316872427983539, |
|
"grad_norm": 20.63931604768156, |
|
"learning_rate": 6.168308514954602e-07, |
|
"logits/chosen": -1.973009705543518, |
|
"logits/rejected": -1.8899316787719727, |
|
"logps/chosen": -261.8257141113281, |
|
"logps/rejected": -258.97515869140625, |
|
"loss": 0.2121, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.9451110363006592, |
|
"rewards/margins": 4.489598274230957, |
|
"rewards/rejected": -2.544487237930298, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.3580246913580247, |
|
"grad_norm": 23.779662167366467, |
|
"learning_rate": 5.950890047122741e-07, |
|
"logits/chosen": -1.9724878072738647, |
|
"logits/rejected": -1.9425151348114014, |
|
"logps/chosen": -260.43084716796875, |
|
"logps/rejected": -236.8948211669922, |
|
"loss": 0.2464, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.4944422245025635, |
|
"rewards/margins": 3.678725481033325, |
|
"rewards/rejected": -2.18428373336792, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.3991769547325104, |
|
"grad_norm": 15.224094688709425, |
|
"learning_rate": 5.731589018621776e-07, |
|
"logits/chosen": -1.9535115957260132, |
|
"logits/rejected": -1.8948615789413452, |
|
"logps/chosen": -252.6552276611328, |
|
"logps/rejected": -226.4263916015625, |
|
"loss": 0.2351, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.834676742553711, |
|
"rewards/margins": 4.032426357269287, |
|
"rewards/rejected": -2.1977500915527344, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.4403292181069958, |
|
"grad_norm": 20.636053561561848, |
|
"learning_rate": 5.510839598988136e-07, |
|
"logits/chosen": -1.8348503112792969, |
|
"logits/rejected": -1.7934105396270752, |
|
"logps/chosen": -255.14895629882812, |
|
"logps/rejected": -232.3575897216797, |
|
"loss": 0.2069, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.0120339393615723, |
|
"rewards/margins": 3.713160276412964, |
|
"rewards/rejected": -1.7011263370513916, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 21.755357371160876, |
|
"learning_rate": 5.289078825265572e-07, |
|
"logits/chosen": -1.7341606616973877, |
|
"logits/rejected": -1.6741468906402588, |
|
"logps/chosen": -237.35433959960938, |
|
"logps/rejected": -228.7030487060547, |
|
"loss": 0.234, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.63128662109375, |
|
"rewards/margins": 3.659700393676758, |
|
"rewards/rejected": -2.028413772583008, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.522633744855967, |
|
"grad_norm": 23.810123453795516, |
|
"learning_rate": 5.066745736764489e-07, |
|
"logits/chosen": -1.635679841041565, |
|
"logits/rejected": -1.5873550176620483, |
|
"logps/chosen": -248.98135375976562, |
|
"logps/rejected": -240.08987426757812, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.4315288066864014, |
|
"rewards/margins": 3.4555141925811768, |
|
"rewards/rejected": -2.0239853858947754, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.5637860082304527, |
|
"grad_norm": 22.3759752093868, |
|
"learning_rate": 4.844280505857202e-07, |
|
"logits/chosen": -1.5894463062286377, |
|
"logits/rejected": -1.5013604164123535, |
|
"logps/chosen": -239.4411163330078, |
|
"logps/rejected": -219.7681121826172, |
|
"loss": 0.2732, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.9483649730682373, |
|
"rewards/margins": 3.714170455932617, |
|
"rewards/rejected": -1.7658058404922485, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6049382716049383, |
|
"grad_norm": 21.666055935350588, |
|
"learning_rate": 4.6221235665299684e-07, |
|
"logits/chosen": -1.6968196630477905, |
|
"logits/rejected": -1.6124290227890015, |
|
"logps/chosen": -246.6077117919922, |
|
"logps/rejected": -233.7628631591797, |
|
"loss": 0.2689, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.1885757446289062, |
|
"rewards/margins": 3.544438600540161, |
|
"rewards/rejected": -1.3558627367019653, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.646090534979424, |
|
"grad_norm": 23.567423393969673, |
|
"learning_rate": 4.400714742417091e-07, |
|
"logits/chosen": -1.7539150714874268, |
|
"logits/rejected": -1.6715869903564453, |
|
"logps/chosen": -289.3243713378906, |
|
"logps/rejected": -238.78271484375, |
|
"loss": 0.2463, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 2.3782408237457275, |
|
"rewards/margins": 3.8166255950927734, |
|
"rewards/rejected": -1.438385248184204, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.646090534979424, |
|
"eval_logits/chosen": -1.7062827348709106, |
|
"eval_logits/rejected": -1.629170298576355, |
|
"eval_logps/chosen": -247.42041015625, |
|
"eval_logps/rejected": -227.5958709716797, |
|
"eval_loss": 0.5190241932868958, |
|
"eval_rewards/accuracies": 0.7962962985038757, |
|
"eval_rewards/chosen": 1.1458828449249268, |
|
"eval_rewards/margins": 2.3527021408081055, |
|
"eval_rewards/rejected": -1.2068192958831787, |
|
"eval_runtime": 228.0783, |
|
"eval_samples_per_second": 15.153, |
|
"eval_steps_per_second": 0.237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6872427983539096, |
|
"grad_norm": 20.05042831109418, |
|
"learning_rate": 4.180492376043371e-07, |
|
"logits/chosen": -1.7294807434082031, |
|
"logits/rejected": -1.6129295825958252, |
|
"logps/chosen": -239.91696166992188, |
|
"logps/rejected": -241.2155303955078, |
|
"loss": 0.2475, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.699279546737671, |
|
"rewards/margins": 3.706660747528076, |
|
"rewards/rejected": -2.007380962371826, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.7283950617283952, |
|
"grad_norm": 17.373566601078217, |
|
"learning_rate": 3.961892460998862e-07, |
|
"logits/chosen": -1.7376630306243896, |
|
"logits/rejected": -1.672767996788025, |
|
"logps/chosen": -259.5295104980469, |
|
"logps/rejected": -219.8362274169922, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.8958297967910767, |
|
"rewards/margins": 3.636307954788208, |
|
"rewards/rejected": -1.7404781579971313, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.7695473251028808, |
|
"grad_norm": 20.32259020536467, |
|
"learning_rate": 3.7453477787640077e-07, |
|
"logits/chosen": -1.6703641414642334, |
|
"logits/rejected": -1.6055065393447876, |
|
"logps/chosen": -259.04559326171875, |
|
"logps/rejected": -238.02713012695312, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.8848392963409424, |
|
"rewards/margins": 3.780524492263794, |
|
"rewards/rejected": -1.8956845998764038, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.8106995884773662, |
|
"grad_norm": 20.716775450731596, |
|
"learning_rate": 3.531287041894075e-07, |
|
"logits/chosen": -1.636228322982788, |
|
"logits/rejected": -1.5927408933639526, |
|
"logps/chosen": -259.4163513183594, |
|
"logps/rejected": -262.77691650390625, |
|
"loss": 0.2641, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.8492801189422607, |
|
"rewards/margins": 3.8553290367126465, |
|
"rewards/rejected": -2.0060486793518066, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 20.26085395927115, |
|
"learning_rate": 3.320134045259192e-07, |
|
"logits/chosen": -1.6199842691421509, |
|
"logits/rejected": -1.5809019804000854, |
|
"logps/chosen": -261.5071716308594, |
|
"logps/rejected": -244.0452117919922, |
|
"loss": 0.2836, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.7676365375518799, |
|
"rewards/margins": 3.8491673469543457, |
|
"rewards/rejected": -2.081530809402466, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.8930041152263375, |
|
"grad_norm": 19.9900109721012, |
|
"learning_rate": 3.112306827020377e-07, |
|
"logits/chosen": -1.6224733591079712, |
|
"logits/rejected": -1.5683706998825073, |
|
"logps/chosen": -246.66726684570312, |
|
"logps/rejected": -252.150634765625, |
|
"loss": 0.2967, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2352790832519531, |
|
"rewards/margins": 3.3191657066345215, |
|
"rewards/rejected": -2.0838871002197266, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.934156378600823, |
|
"grad_norm": 20.679234729146177, |
|
"learning_rate": 2.90821684100261e-07, |
|
"logits/chosen": -1.665122628211975, |
|
"logits/rejected": -1.585533857345581, |
|
"logps/chosen": -258.1650390625, |
|
"logps/rejected": -238.0010223388672, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.45806884765625, |
|
"rewards/margins": 3.821526288986206, |
|
"rewards/rejected": -2.363457202911377, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.9753086419753085, |
|
"grad_norm": 17.897922449348748, |
|
"learning_rate": 2.708268142103509e-07, |
|
"logits/chosen": -1.6568527221679688, |
|
"logits/rejected": -1.594029426574707, |
|
"logps/chosen": -249.9292449951172, |
|
"logps/rejected": -217.1236114501953, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.140490174293518, |
|
"rewards/margins": 3.4049384593963623, |
|
"rewards/rejected": -2.264448404312134, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.016460905349794, |
|
"grad_norm": 15.579483343495324, |
|
"learning_rate": 2.5128565863503e-07, |
|
"logits/chosen": -1.7464730739593506, |
|
"logits/rejected": -1.64523446559906, |
|
"logps/chosen": -269.5633544921875, |
|
"logps/rejected": -218.4349365234375, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.345157504081726, |
|
"rewards/margins": 3.8898367881774902, |
|
"rewards/rejected": -2.5446791648864746, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.05761316872428, |
|
"grad_norm": 15.642770624996952, |
|
"learning_rate": 2.3223690471888286e-07, |
|
"logits/chosen": -1.7972164154052734, |
|
"logits/rejected": -1.6923631429672241, |
|
"logps/chosen": -276.4811706542969, |
|
"logps/rejected": -239.2648468017578, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.7780349254608154, |
|
"rewards/margins": 4.118841171264648, |
|
"rewards/rejected": -2.3408069610595703, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0987654320987654, |
|
"grad_norm": 13.364305072324674, |
|
"learning_rate": 2.1371826495561613e-07, |
|
"logits/chosen": -1.8449236154556274, |
|
"logits/rejected": -1.7506535053253174, |
|
"logps/chosen": -255.83792114257812, |
|
"logps/rejected": -221.6796875, |
|
"loss": 0.146, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.8460966348648071, |
|
"rewards/margins": 3.9246277809143066, |
|
"rewards/rejected": -2.078531265258789, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.139917695473251, |
|
"grad_norm": 13.06395689210594, |
|
"learning_rate": 1.9576640232531784e-07, |
|
"logits/chosen": -1.8692007064819336, |
|
"logits/rejected": -1.8045275211334229, |
|
"logps/chosen": -248.9095916748047, |
|
"logps/rejected": -250.84481811523438, |
|
"loss": 0.1171, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.9673175811767578, |
|
"rewards/margins": 4.323936462402344, |
|
"rewards/rejected": -2.356618642807007, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.1810699588477367, |
|
"grad_norm": 15.133332987736472, |
|
"learning_rate": 1.784168577095307e-07, |
|
"logits/chosen": -1.9296722412109375, |
|
"logits/rejected": -1.8828375339508057, |
|
"logps/chosen": -250.7962646484375, |
|
"logps/rejected": -228.93923950195312, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0834712982177734, |
|
"rewards/margins": 3.928879499435425, |
|
"rewards/rejected": -1.8454080820083618, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 13.466085492542144, |
|
"learning_rate": 1.6170397952784248e-07, |
|
"logits/chosen": -1.9489628076553345, |
|
"logits/rejected": -1.8797670602798462, |
|
"logps/chosen": -270.56427001953125, |
|
"logps/rejected": -242.9454803466797, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.427950143814087, |
|
"rewards/margins": 4.682461261749268, |
|
"rewards/rejected": -2.2545108795166016, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.263374485596708, |
|
"grad_norm": 14.794346267314218, |
|
"learning_rate": 1.4566085573529874e-07, |
|
"logits/chosen": -1.9156001806259155, |
|
"logits/rejected": -1.8757755756378174, |
|
"logps/chosen": -258.8504333496094, |
|
"logps/rejected": -229.5829315185547, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.124898910522461, |
|
"rewards/margins": 4.520539283752441, |
|
"rewards/rejected": -2.3956406116485596, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.3045267489711936, |
|
"grad_norm": 14.6085524255932, |
|
"learning_rate": 1.3031924831526737e-07, |
|
"logits/chosen": -1.918760895729065, |
|
"logits/rejected": -1.8703607320785522, |
|
"logps/chosen": -261.5938415527344, |
|
"logps/rejected": -230.3494415283203, |
|
"loss": 0.1162, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.9034366607666016, |
|
"rewards/margins": 4.663661003112793, |
|
"rewards/rejected": -2.7602241039276123, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.3456790123456788, |
|
"grad_norm": 16.362862237175147, |
|
"learning_rate": 1.1570953039744591e-07, |
|
"logits/chosen": -1.9305750131607056, |
|
"logits/rejected": -1.8696216344833374, |
|
"logps/chosen": -266.16680908203125, |
|
"logps/rejected": -258.2370910644531, |
|
"loss": 0.1186, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.204184055328369, |
|
"rewards/margins": 4.997335433959961, |
|
"rewards/rejected": -2.7931509017944336, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.386831275720165, |
|
"grad_norm": 13.275572612341923, |
|
"learning_rate": 1.0186062612550616e-07, |
|
"logits/chosen": -1.9214690923690796, |
|
"logits/rejected": -1.8716766834259033, |
|
"logps/chosen": -252.57180786132812, |
|
"logps/rejected": -259.24224853515625, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.912581205368042, |
|
"rewards/margins": 4.5273051261901855, |
|
"rewards/rejected": -2.6147236824035645, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.42798353909465, |
|
"grad_norm": 14.003480945619684, |
|
"learning_rate": 8.879995339342167e-08, |
|
"logits/chosen": -1.914181113243103, |
|
"logits/rejected": -1.8485758304595947, |
|
"logps/chosen": -248.25320434570312, |
|
"logps/rejected": -228.18118286132812, |
|
"loss": 0.1167, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.6209943294525146, |
|
"rewards/margins": 4.539933204650879, |
|
"rewards/rejected": -2.918938398361206, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"grad_norm": 13.39746651643324, |
|
"learning_rate": 7.655336956385155e-08, |
|
"logits/chosen": -1.936248540878296, |
|
"logits/rejected": -1.8758357763290405, |
|
"logps/chosen": -251.0574951171875, |
|
"logps/rejected": -252.95425415039062, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 1.768341302871704, |
|
"rewards/margins": 4.591066360473633, |
|
"rewards/rejected": -2.8227250576019287, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"eval_logits/chosen": -1.9243203401565552, |
|
"eval_logits/rejected": -1.8631280660629272, |
|
"eval_logps/chosen": -251.93479919433594, |
|
"eval_logps/rejected": -234.54112243652344, |
|
"eval_loss": 0.5211819410324097, |
|
"eval_rewards/accuracies": 0.8194444179534912, |
|
"eval_rewards/chosen": 0.6944435238838196, |
|
"eval_rewards/margins": 2.59578800201416, |
|
"eval_rewards/rejected": -1.9013442993164062, |
|
"eval_runtime": 228.1654, |
|
"eval_samples_per_second": 15.147, |
|
"eval_steps_per_second": 0.237, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.5102880658436213, |
|
"grad_norm": 15.074747119995138, |
|
"learning_rate": 6.514512027604508e-08, |
|
"logits/chosen": -1.9279800653457642, |
|
"logits/rejected": -1.8792842626571655, |
|
"logps/chosen": -232.16232299804688, |
|
"logps/rejected": -224.8663330078125, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4570283889770508, |
|
"rewards/margins": 4.1241984367370605, |
|
"rewards/rejected": -2.667170286178589, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.551440329218107, |
|
"grad_norm": 15.943407922179238, |
|
"learning_rate": 5.459779144461712e-08, |
|
"logits/chosen": -1.967230200767517, |
|
"logits/rejected": -1.8994722366333008, |
|
"logps/chosen": -251.5553436279297, |
|
"logps/rejected": -234.64218139648438, |
|
"loss": 0.132, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.8404948711395264, |
|
"rewards/margins": 4.55427885055542, |
|
"rewards/rejected": -2.7137837409973145, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.5925925925925926, |
|
"grad_norm": 15.791999145358414, |
|
"learning_rate": 4.49322645442266e-08, |
|
"logits/chosen": -1.9726388454437256, |
|
"logits/rejected": -1.9029220342636108, |
|
"logps/chosen": -226.0243377685547, |
|
"logps/rejected": -245.57943725585938, |
|
"loss": 0.1327, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.4504220485687256, |
|
"rewards/margins": 4.483643531799316, |
|
"rewards/rejected": -3.033221483230591, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.633744855967078, |
|
"grad_norm": 14.527344025713838, |
|
"learning_rate": 3.616767526868353e-08, |
|
"logits/chosen": -1.9656314849853516, |
|
"logits/rejected": -1.898186445236206, |
|
"logps/chosen": -268.8167419433594, |
|
"logps/rejected": -251.64340209960938, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.3940348625183105, |
|
"rewards/margins": 5.15994930267334, |
|
"rewards/rejected": -2.7659144401550293, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.674897119341564, |
|
"grad_norm": 14.467899944932638, |
|
"learning_rate": 2.8321375646333023e-08, |
|
"logits/chosen": -1.984684944152832, |
|
"logits/rejected": -1.905601143836975, |
|
"logps/chosen": -226.098876953125, |
|
"logps/rejected": -269.22723388671875, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.9057655334472656, |
|
"rewards/margins": 4.68411922454834, |
|
"rewards/rejected": -2.778353691101074, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.7160493827160495, |
|
"grad_norm": 14.107526535593529, |
|
"learning_rate": 2.1408899686718996e-08, |
|
"logits/chosen": -1.996860146522522, |
|
"logits/rejected": -1.8913567066192627, |
|
"logps/chosen": -248.2650909423828, |
|
"logps/rejected": -243.4825439453125, |
|
"loss": 0.1195, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.7427318096160889, |
|
"rewards/margins": 4.723761558532715, |
|
"rewards/rejected": -2.981029987335205, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.757201646090535, |
|
"grad_norm": 16.668582895840217, |
|
"learning_rate": 1.5443932626538314e-08, |
|
"logits/chosen": -1.9676933288574219, |
|
"logits/rejected": -1.910146713256836, |
|
"logps/chosen": -238.7953338623047, |
|
"logps/rejected": -224.4933319091797, |
|
"loss": 0.15, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.859580636024475, |
|
"rewards/margins": 4.216904640197754, |
|
"rewards/rejected": -2.3573238849639893, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.7983539094650207, |
|
"grad_norm": 13.029805689567587, |
|
"learning_rate": 1.0438283835774387e-08, |
|
"logits/chosen": -1.9859317541122437, |
|
"logits/rejected": -1.8881919384002686, |
|
"logps/chosen": -242.4602508544922, |
|
"logps/rejected": -228.0737762451172, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.7638639211654663, |
|
"rewards/margins": 4.470877170562744, |
|
"rewards/rejected": -2.7070131301879883, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.8395061728395063, |
|
"grad_norm": 13.50245071791209, |
|
"learning_rate": 6.401863437648481e-09, |
|
"logits/chosen": -1.9783008098602295, |
|
"logits/rejected": -1.8936630487442017, |
|
"logps/chosen": -262.051025390625, |
|
"logps/rejected": -244.21853637695312, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.8696222305297852, |
|
"rewards/margins": 4.679049491882324, |
|
"rewards/rejected": -2.809427261352539, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.8806584362139915, |
|
"grad_norm": 19.77488068943749, |
|
"learning_rate": 3.3426626886769448e-09, |
|
"logits/chosen": -1.9724162817001343, |
|
"logits/rejected": -1.9013561010360718, |
|
"logps/chosen": -265.6155700683594, |
|
"logps/rejected": -258.1453552246094, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.233098030090332, |
|
"rewards/margins": 4.871306419372559, |
|
"rewards/rejected": -2.6382088661193848, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.9218106995884776, |
|
"grad_norm": 16.657133866108477, |
|
"learning_rate": 1.2667381576779712e-09, |
|
"logits/chosen": -1.9556434154510498, |
|
"logits/rejected": -1.890546202659607, |
|
"logps/chosen": -237.84500122070312, |
|
"logps/rejected": -261.2818298339844, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.9853665828704834, |
|
"rewards/margins": 5.180100440979004, |
|
"rewards/rejected": -3.194733142852783, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 12.623822906293494, |
|
"learning_rate": 1.7819973504940023e-10, |
|
"logits/chosen": -1.9709722995758057, |
|
"logits/rejected": -1.8710010051727295, |
|
"logps/chosen": -241.50997924804688, |
|
"logps/rejected": -266.9458923339844, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.947824478149414, |
|
"rewards/margins": 4.476650238037109, |
|
"rewards/rejected": -2.5288257598876953, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.9876543209876543, |
|
"step": 363, |
|
"total_flos": 4280357159436288.0, |
|
"train_loss": 0.30565077164941584, |
|
"train_runtime": 13036.7565, |
|
"train_samples_per_second": 7.158, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 363, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4280357159436288.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|