htlou's picture
Upload folder using huggingface_hub
0decd61 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9876543209876543,
"eval_steps": 100,
"global_step": 363,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0411522633744856,
"grad_norm": 71.36946521074697,
"learning_rate": 5e-07,
"logits/chosen": -2.7249937057495117,
"logits/rejected": -2.7219715118408203,
"logps/chosen": -289.096435546875,
"logps/rejected": -212.59097290039062,
"loss": 0.6888,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": 0.027115171775221825,
"rewards/margins": 0.011037254706025124,
"rewards/rejected": 0.01607791893184185,
"step": 5
},
{
"epoch": 0.0823045267489712,
"grad_norm": 68.75739482144014,
"learning_rate": 1e-06,
"logits/chosen": -2.673173666000366,
"logits/rejected": -2.6852009296417236,
"logps/chosen": -258.5091857910156,
"logps/rejected": -228.7921905517578,
"loss": 0.6519,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.6803622841835022,
"rewards/margins": 0.2561650276184082,
"rewards/rejected": 0.424197256565094,
"step": 10
},
{
"epoch": 0.12345679012345678,
"grad_norm": 43.9449007096878,
"learning_rate": 9.995050530093366e-07,
"logits/chosen": -2.5606446266174316,
"logits/rejected": -2.555354595184326,
"logps/chosen": -258.5283508300781,
"logps/rejected": -217.637939453125,
"loss": 0.5873,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 1.7179749011993408,
"rewards/margins": 0.7173956036567688,
"rewards/rejected": 1.0005793571472168,
"step": 15
},
{
"epoch": 0.1646090534979424,
"grad_norm": 49.90030149803026,
"learning_rate": 9.980211919274406e-07,
"logits/chosen": -2.334833860397339,
"logits/rejected": -2.3182854652404785,
"logps/chosen": -234.5125732421875,
"logps/rejected": -194.8851318359375,
"loss": 0.6125,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 1.7243343591690063,
"rewards/margins": 0.8316472172737122,
"rewards/rejected": 0.8926870226860046,
"step": 20
},
{
"epoch": 0.205761316872428,
"grad_norm": 43.554349506398026,
"learning_rate": 9.955513544846204e-07,
"logits/chosen": -2.12056303024292,
"logits/rejected": -2.095937728881836,
"logps/chosen": -284.00323486328125,
"logps/rejected": -210.3358154296875,
"loss": 0.5749,
"rewards/accuracies": 0.8125,
"rewards/chosen": 2.1191883087158203,
"rewards/margins": 1.4548943042755127,
"rewards/rejected": 0.6642940044403076,
"step": 25
},
{
"epoch": 0.24691358024691357,
"grad_norm": 45.05915140113881,
"learning_rate": 9.921004304353147e-07,
"logits/chosen": -2.04213547706604,
"logits/rejected": -2.0172839164733887,
"logps/chosen": -232.2016143798828,
"logps/rejected": -217.5736846923828,
"loss": 0.5989,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 1.9440408945083618,
"rewards/margins": 1.5185799598693848,
"rewards/rejected": 0.4254608750343323,
"step": 30
},
{
"epoch": 0.2880658436213992,
"grad_norm": 40.00728614202134,
"learning_rate": 9.876752518774164e-07,
"logits/chosen": -2.0041847229003906,
"logits/rejected": -1.9888496398925781,
"logps/chosen": -255.5012969970703,
"logps/rejected": -238.2528839111328,
"loss": 0.6076,
"rewards/accuracies": 0.71875,
"rewards/chosen": 1.3912312984466553,
"rewards/margins": 1.0289623737335205,
"rewards/rejected": 0.36226886510849,
"step": 35
},
{
"epoch": 0.3292181069958848,
"grad_norm": 42.181862044805364,
"learning_rate": 9.822845797261675e-07,
"logits/chosen": -2.024127244949341,
"logits/rejected": -2.020592451095581,
"logps/chosen": -249.13394165039062,
"logps/rejected": -199.90975952148438,
"loss": 0.5846,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 1.3734517097473145,
"rewards/margins": 0.8223851919174194,
"rewards/rejected": 0.5510665774345398,
"step": 40
},
{
"epoch": 0.37037037037037035,
"grad_norm": 41.15847921708812,
"learning_rate": 9.759390863694029e-07,
"logits/chosen": -2.0532474517822266,
"logits/rejected": -1.9978084564208984,
"logps/chosen": -256.01446533203125,
"logps/rejected": -206.8267059326172,
"loss": 0.5481,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.589166283607483,
"rewards/margins": 1.2907274961471558,
"rewards/rejected": 0.2984387278556824,
"step": 45
},
{
"epoch": 0.411522633744856,
"grad_norm": 42.63680924826028,
"learning_rate": 9.68651334538488e-07,
"logits/chosen": -2.034133195877075,
"logits/rejected": -2.0025076866149902,
"logps/chosen": -259.46942138671875,
"logps/rejected": -229.2208251953125,
"loss": 0.5652,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 1.4298592805862427,
"rewards/margins": 1.076907992362976,
"rewards/rejected": 0.35295119881629944,
"step": 50
},
{
"epoch": 0.45267489711934156,
"grad_norm": 36.48668334468458,
"learning_rate": 9.604357524367722e-07,
"logits/chosen": -2.0932247638702393,
"logits/rejected": -2.0437166690826416,
"logps/chosen": -281.03289794921875,
"logps/rejected": -227.46109008789062,
"loss": 0.5437,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.40286123752594,
"rewards/margins": 1.028884768486023,
"rewards/rejected": 0.3739764094352722,
"step": 55
},
{
"epoch": 0.49382716049382713,
"grad_norm": 35.36330599361053,
"learning_rate": 9.513086051748067e-07,
"logits/chosen": -2.1159732341766357,
"logits/rejected": -2.078249931335449,
"logps/chosen": -265.8070373535156,
"logps/rejected": -214.79428100585938,
"loss": 0.5166,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 1.7003364562988281,
"rewards/margins": 1.7438255548477173,
"rewards/rejected": -0.0434890016913414,
"step": 60
},
{
"epoch": 0.5349794238683128,
"grad_norm": 31.36432376366485,
"learning_rate": 9.412879625688742e-07,
"logits/chosen": -2.183833599090576,
"logits/rejected": -2.1385440826416016,
"logps/chosen": -270.89263916015625,
"logps/rejected": -203.67922973632812,
"loss": 0.4848,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 1.6083428859710693,
"rewards/margins": 1.5229980945587158,
"rewards/rejected": 0.08534489572048187,
"step": 65
},
{
"epoch": 0.5761316872427984,
"grad_norm": 35.68216693219843,
"learning_rate": 9.303936633665839e-07,
"logits/chosen": -2.3082363605499268,
"logits/rejected": -2.2824604511260986,
"logps/chosen": -255.9834747314453,
"logps/rejected": -194.7764892578125,
"loss": 0.5289,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 1.1171067953109741,
"rewards/margins": 1.4306641817092896,
"rewards/rejected": -0.3135572373867035,
"step": 70
},
{
"epoch": 0.6172839506172839,
"grad_norm": 38.51565362073314,
"learning_rate": 9.186472759703578e-07,
"logits/chosen": -2.3410449028015137,
"logits/rejected": -2.3213045597076416,
"logps/chosen": -275.8757019042969,
"logps/rejected": -213.70693969726562,
"loss": 0.5387,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.7660292387008667,
"rewards/margins": 1.4290556907653809,
"rewards/rejected": -0.6630264520645142,
"step": 75
},
{
"epoch": 0.6584362139917695,
"grad_norm": 30.908945588893605,
"learning_rate": 9.060720557365682e-07,
"logits/chosen": -2.3798623085021973,
"logits/rejected": -2.378147602081299,
"logps/chosen": -277.94622802734375,
"logps/rejected": -228.6498565673828,
"loss": 0.524,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.9844567179679871,
"rewards/margins": 1.5679962635040283,
"rewards/rejected": -0.5835394859313965,
"step": 80
},
{
"epoch": 0.6995884773662552,
"grad_norm": 36.12667290276971,
"learning_rate": 8.926928989348611e-07,
"logits/chosen": -2.438974618911743,
"logits/rejected": -2.4293782711029053,
"logps/chosen": -264.4499816894531,
"logps/rejected": -233.60958862304688,
"loss": 0.5124,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.7334972023963928,
"rewards/margins": 1.825126051902771,
"rewards/rejected": -1.091629147529602,
"step": 85
},
{
"epoch": 0.7407407407407407,
"grad_norm": 31.43710612772888,
"learning_rate": 8.785362934588233e-07,
"logits/chosen": -2.4581363201141357,
"logits/rejected": -2.4250473976135254,
"logps/chosen": -272.42498779296875,
"logps/rejected": -206.20614624023438,
"loss": 0.5073,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.6099742650985718,
"rewards/margins": 1.7208999395370483,
"rewards/rejected": -0.11092579364776611,
"step": 90
},
{
"epoch": 0.7818930041152263,
"grad_norm": 35.922757319188804,
"learning_rate": 8.636302663855681e-07,
"logits/chosen": -2.368760585784912,
"logits/rejected": -2.3825132846832275,
"logps/chosen": -247.90396118164062,
"logps/rejected": -212.88232421875,
"loss": 0.4971,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 1.2823846340179443,
"rewards/margins": 1.8866965770721436,
"rewards/rejected": -0.6043121814727783,
"step": 95
},
{
"epoch": 0.823045267489712,
"grad_norm": 27.664598721354345,
"learning_rate": 8.480043284880664e-07,
"logits/chosen": -2.346686601638794,
"logits/rejected": -2.317147970199585,
"logps/chosen": -269.21417236328125,
"logps/rejected": -233.6097412109375,
"loss": 0.4674,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.5551630258560181,
"rewards/margins": 2.1603965759277344,
"rewards/rejected": -1.6052335500717163,
"step": 100
},
{
"epoch": 0.823045267489712,
"eval_logits/chosen": -2.2803401947021484,
"eval_logits/rejected": -2.256579875946045,
"eval_logps/chosen": -257.0998229980469,
"eval_logps/rejected": -231.74539184570312,
"eval_loss": 0.4985389709472656,
"eval_rewards/accuracies": 0.7939814925193787,
"eval_rewards/chosen": 0.17793893814086914,
"eval_rewards/margins": 1.7997103929519653,
"eval_rewards/rejected": -1.6217713356018066,
"eval_runtime": 230.2785,
"eval_samples_per_second": 15.008,
"eval_steps_per_second": 0.234,
"step": 100
},
{
"epoch": 0.8641975308641975,
"grad_norm": 33.43388986335041,
"learning_rate": 8.316894158100727e-07,
"logits/chosen": -2.238370895385742,
"logits/rejected": -2.205950975418091,
"logps/chosen": -270.1739807128906,
"logps/rejected": -237.7426300048828,
"loss": 0.5036,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.2168927639722824,
"rewards/margins": 2.0453083515167236,
"rewards/rejected": -1.8284155130386353,
"step": 105
},
{
"epoch": 0.9053497942386831,
"grad_norm": 41.06626958250484,
"learning_rate": 8.147178284193184e-07,
"logits/chosen": -1.9968522787094116,
"logits/rejected": -1.9477859735488892,
"logps/chosen": -271.5672912597656,
"logps/rejected": -244.5254364013672,
"loss": 0.523,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.18483969569206238,
"rewards/margins": 2.090688467025757,
"rewards/rejected": -2.2755284309387207,
"step": 110
},
{
"epoch": 0.9465020576131687,
"grad_norm": 33.45568853055463,
"learning_rate": 7.971231664602271e-07,
"logits/chosen": -1.8657859563827515,
"logits/rejected": -1.7577025890350342,
"logps/chosen": -255.1681365966797,
"logps/rejected": -235.93856811523438,
"loss": 0.4781,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.08642071485519409,
"rewards/margins": 2.032249689102173,
"rewards/rejected": -1.9458287954330444,
"step": 115
},
{
"epoch": 0.9876543209876543,
"grad_norm": 31.32834367464404,
"learning_rate": 7.789402636327525e-07,
"logits/chosen": -1.7241904735565186,
"logits/rejected": -1.6637340784072876,
"logps/chosen": -269.67364501953125,
"logps/rejected": -239.79965209960938,
"loss": 0.4614,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 0.0743473693728447,
"rewards/margins": 2.101712942123413,
"rewards/rejected": -2.0273656845092773,
"step": 120
},
{
"epoch": 1.02880658436214,
"grad_norm": 18.313357022047114,
"learning_rate": 7.602051182290381e-07,
"logits/chosen": -1.5669622421264648,
"logits/rejected": -1.4961906671524048,
"logps/chosen": -270.39056396484375,
"logps/rejected": -223.95706176757812,
"loss": 0.321,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.7599193453788757,
"rewards/margins": 2.714322805404663,
"rewards/rejected": -1.9544035196304321,
"step": 125
},
{
"epoch": 1.0699588477366255,
"grad_norm": 18.77066721006591,
"learning_rate": 7.409548218644331e-07,
"logits/chosen": -1.4371721744537354,
"logits/rejected": -1.3102617263793945,
"logps/chosen": -257.923095703125,
"logps/rejected": -222.04959106445312,
"loss": 0.1777,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.8628284931182861,
"rewards/margins": 3.623333692550659,
"rewards/rejected": -1.7605053186416626,
"step": 130
},
{
"epoch": 1.1111111111111112,
"grad_norm": 14.482571733068447,
"learning_rate": 7.212274860439576e-07,
"logits/chosen": -1.4088728427886963,
"logits/rejected": -1.3359241485595703,
"logps/chosen": -252.8369140625,
"logps/rejected": -247.0041046142578,
"loss": 0.2246,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.8369052410125732,
"rewards/margins": 3.677825450897217,
"rewards/rejected": -1.840920090675354,
"step": 135
},
{
"epoch": 1.1522633744855968,
"grad_norm": 19.475514209975124,
"learning_rate": 7.010621667096041e-07,
"logits/chosen": -1.5916813611984253,
"logits/rejected": -1.479448676109314,
"logps/chosen": -254.99136352539062,
"logps/rejected": -218.8384246826172,
"loss": 0.2218,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.6720364093780518,
"rewards/margins": 3.2526676654815674,
"rewards/rejected": -1.5806310176849365,
"step": 140
},
{
"epoch": 1.1934156378600824,
"grad_norm": 24.87312122824749,
"learning_rate": 6.804987869178539e-07,
"logits/chosen": -1.7563555240631104,
"logits/rejected": -1.6887686252593994,
"logps/chosen": -241.65676879882812,
"logps/rejected": -225.277099609375,
"loss": 0.2373,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.7571462392807007,
"rewards/margins": 3.4047298431396484,
"rewards/rejected": -1.6475833654403687,
"step": 145
},
{
"epoch": 1.2345679012345678,
"grad_norm": 22.485449779074028,
"learning_rate": 6.5957805780049e-07,
"logits/chosen": -1.889991044998169,
"logits/rejected": -1.8203752040863037,
"logps/chosen": -250.935302734375,
"logps/rejected": -223.52401733398438,
"loss": 0.2196,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 2.23984432220459,
"rewards/margins": 3.5551295280456543,
"rewards/rejected": -1.3152849674224854,
"step": 150
},
{
"epoch": 1.2757201646090535,
"grad_norm": 20.0119744226792,
"learning_rate": 6.383413979651893e-07,
"logits/chosen": -1.9477765560150146,
"logits/rejected": -1.8932664394378662,
"logps/chosen": -242.27685546875,
"logps/rejected": -231.18991088867188,
"loss": 0.2229,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.800172209739685,
"rewards/margins": 3.45011568069458,
"rewards/rejected": -1.6499433517456055,
"step": 155
},
{
"epoch": 1.316872427983539,
"grad_norm": 20.63931604768156,
"learning_rate": 6.168308514954602e-07,
"logits/chosen": -1.973009705543518,
"logits/rejected": -1.8899316787719727,
"logps/chosen": -261.8257141113281,
"logps/rejected": -258.97515869140625,
"loss": 0.2121,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 1.9451110363006592,
"rewards/margins": 4.489598274230957,
"rewards/rejected": -2.544487237930298,
"step": 160
},
{
"epoch": 1.3580246913580247,
"grad_norm": 23.779662167366467,
"learning_rate": 5.950890047122741e-07,
"logits/chosen": -1.9724878072738647,
"logits/rejected": -1.9425151348114014,
"logps/chosen": -260.43084716796875,
"logps/rejected": -236.8948211669922,
"loss": 0.2464,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 1.4944422245025635,
"rewards/margins": 3.678725481033325,
"rewards/rejected": -2.18428373336792,
"step": 165
},
{
"epoch": 1.3991769547325104,
"grad_norm": 15.224094688709425,
"learning_rate": 5.731589018621776e-07,
"logits/chosen": -1.9535115957260132,
"logits/rejected": -1.8948615789413452,
"logps/chosen": -252.6552276611328,
"logps/rejected": -226.4263916015625,
"loss": 0.2351,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.834676742553711,
"rewards/margins": 4.032426357269287,
"rewards/rejected": -2.1977500915527344,
"step": 170
},
{
"epoch": 1.4403292181069958,
"grad_norm": 20.636053561561848,
"learning_rate": 5.510839598988136e-07,
"logits/chosen": -1.8348503112792969,
"logits/rejected": -1.7934105396270752,
"logps/chosen": -255.14895629882812,
"logps/rejected": -232.3575897216797,
"loss": 0.2069,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 2.0120339393615723,
"rewards/margins": 3.713160276412964,
"rewards/rejected": -1.7011263370513916,
"step": 175
},
{
"epoch": 1.4814814814814814,
"grad_norm": 21.755357371160876,
"learning_rate": 5.289078825265572e-07,
"logits/chosen": -1.7341606616973877,
"logits/rejected": -1.6741468906402588,
"logps/chosen": -237.35433959960938,
"logps/rejected": -228.7030487060547,
"loss": 0.234,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 1.63128662109375,
"rewards/margins": 3.659700393676758,
"rewards/rejected": -2.028413772583008,
"step": 180
},
{
"epoch": 1.522633744855967,
"grad_norm": 23.810123453795516,
"learning_rate": 5.066745736764489e-07,
"logits/chosen": -1.635679841041565,
"logits/rejected": -1.5873550176620483,
"logps/chosen": -248.98135375976562,
"logps/rejected": -240.08987426757812,
"loss": 0.2576,
"rewards/accuracies": 0.90625,
"rewards/chosen": 1.4315288066864014,
"rewards/margins": 3.4555141925811768,
"rewards/rejected": -2.0239853858947754,
"step": 185
},
{
"epoch": 1.5637860082304527,
"grad_norm": 22.3759752093868,
"learning_rate": 4.844280505857202e-07,
"logits/chosen": -1.5894463062286377,
"logits/rejected": -1.5013604164123535,
"logps/chosen": -239.4411163330078,
"logps/rejected": -219.7681121826172,
"loss": 0.2732,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.9483649730682373,
"rewards/margins": 3.714170455932617,
"rewards/rejected": -1.7658058404922485,
"step": 190
},
{
"epoch": 1.6049382716049383,
"grad_norm": 21.666055935350588,
"learning_rate": 4.6221235665299684e-07,
"logits/chosen": -1.6968196630477905,
"logits/rejected": -1.6124290227890015,
"logps/chosen": -246.6077117919922,
"logps/rejected": -233.7628631591797,
"loss": 0.2689,
"rewards/accuracies": 0.90625,
"rewards/chosen": 2.1885757446289062,
"rewards/margins": 3.544438600540161,
"rewards/rejected": -1.3558627367019653,
"step": 195
},
{
"epoch": 1.646090534979424,
"grad_norm": 23.567423393969673,
"learning_rate": 4.400714742417091e-07,
"logits/chosen": -1.7539150714874268,
"logits/rejected": -1.6715869903564453,
"logps/chosen": -289.3243713378906,
"logps/rejected": -238.78271484375,
"loss": 0.2463,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 2.3782408237457275,
"rewards/margins": 3.8166255950927734,
"rewards/rejected": -1.438385248184204,
"step": 200
},
{
"epoch": 1.646090534979424,
"eval_logits/chosen": -1.7062827348709106,
"eval_logits/rejected": -1.629170298576355,
"eval_logps/chosen": -247.42041015625,
"eval_logps/rejected": -227.5958709716797,
"eval_loss": 0.5190241932868958,
"eval_rewards/accuracies": 0.7962962985038757,
"eval_rewards/chosen": 1.1458828449249268,
"eval_rewards/margins": 2.3527021408081055,
"eval_rewards/rejected": -1.2068192958831787,
"eval_runtime": 228.0783,
"eval_samples_per_second": 15.153,
"eval_steps_per_second": 0.237,
"step": 200
},
{
"epoch": 1.6872427983539096,
"grad_norm": 20.05042831109418,
"learning_rate": 4.180492376043371e-07,
"logits/chosen": -1.7294807434082031,
"logits/rejected": -1.6129295825958252,
"logps/chosen": -239.91696166992188,
"logps/rejected": -241.2155303955078,
"loss": 0.2475,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.699279546737671,
"rewards/margins": 3.706660747528076,
"rewards/rejected": -2.007380962371826,
"step": 205
},
{
"epoch": 1.7283950617283952,
"grad_norm": 17.373566601078217,
"learning_rate": 3.961892460998862e-07,
"logits/chosen": -1.7376630306243896,
"logits/rejected": -1.672767996788025,
"logps/chosen": -259.5295104980469,
"logps/rejected": -219.8362274169922,
"loss": 0.2275,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.8958297967910767,
"rewards/margins": 3.636307954788208,
"rewards/rejected": -1.7404781579971313,
"step": 210
},
{
"epoch": 1.7695473251028808,
"grad_norm": 20.32259020536467,
"learning_rate": 3.7453477787640077e-07,
"logits/chosen": -1.6703641414642334,
"logits/rejected": -1.6055065393447876,
"logps/chosen": -259.04559326171875,
"logps/rejected": -238.02713012695312,
"loss": 0.2558,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 1.8848392963409424,
"rewards/margins": 3.780524492263794,
"rewards/rejected": -1.8956845998764038,
"step": 215
},
{
"epoch": 1.8106995884773662,
"grad_norm": 20.716775450731596,
"learning_rate": 3.531287041894075e-07,
"logits/chosen": -1.636228322982788,
"logits/rejected": -1.5927408933639526,
"logps/chosen": -259.4163513183594,
"logps/rejected": -262.77691650390625,
"loss": 0.2641,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.8492801189422607,
"rewards/margins": 3.8553290367126465,
"rewards/rejected": -2.0060486793518066,
"step": 220
},
{
"epoch": 1.8518518518518519,
"grad_norm": 20.26085395927115,
"learning_rate": 3.320134045259192e-07,
"logits/chosen": -1.6199842691421509,
"logits/rejected": -1.5809019804000854,
"logps/chosen": -261.5071716308594,
"logps/rejected": -244.0452117919922,
"loss": 0.2836,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 1.7676365375518799,
"rewards/margins": 3.8491673469543457,
"rewards/rejected": -2.081530809402466,
"step": 225
},
{
"epoch": 1.8930041152263375,
"grad_norm": 19.9900109721012,
"learning_rate": 3.112306827020377e-07,
"logits/chosen": -1.6224733591079712,
"logits/rejected": -1.5683706998825073,
"logps/chosen": -246.66726684570312,
"logps/rejected": -252.150634765625,
"loss": 0.2967,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.2352790832519531,
"rewards/margins": 3.3191657066345215,
"rewards/rejected": -2.0838871002197266,
"step": 230
},
{
"epoch": 1.934156378600823,
"grad_norm": 20.679234729146177,
"learning_rate": 2.90821684100261e-07,
"logits/chosen": -1.665122628211975,
"logits/rejected": -1.585533857345581,
"logps/chosen": -258.1650390625,
"logps/rejected": -238.0010223388672,
"loss": 0.2521,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.45806884765625,
"rewards/margins": 3.821526288986206,
"rewards/rejected": -2.363457202911377,
"step": 235
},
{
"epoch": 1.9753086419753085,
"grad_norm": 17.897922449348748,
"learning_rate": 2.708268142103509e-07,
"logits/chosen": -1.6568527221679688,
"logits/rejected": -1.594029426574707,
"logps/chosen": -249.9292449951172,
"logps/rejected": -217.1236114501953,
"loss": 0.2458,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 1.140490174293518,
"rewards/margins": 3.4049384593963623,
"rewards/rejected": -2.264448404312134,
"step": 240
},
{
"epoch": 2.016460905349794,
"grad_norm": 15.579483343495324,
"learning_rate": 2.5128565863503e-07,
"logits/chosen": -1.7464730739593506,
"logits/rejected": -1.64523446559906,
"logps/chosen": -269.5633544921875,
"logps/rejected": -218.4349365234375,
"loss": 0.1875,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 1.345157504081726,
"rewards/margins": 3.8898367881774902,
"rewards/rejected": -2.5446791648864746,
"step": 245
},
{
"epoch": 2.05761316872428,
"grad_norm": 15.642770624996952,
"learning_rate": 2.3223690471888286e-07,
"logits/chosen": -1.7972164154052734,
"logits/rejected": -1.6923631429672241,
"logps/chosen": -276.4811706542969,
"logps/rejected": -239.2648468017578,
"loss": 0.1218,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.7780349254608154,
"rewards/margins": 4.118841171264648,
"rewards/rejected": -2.3408069610595703,
"step": 250
},
{
"epoch": 2.0987654320987654,
"grad_norm": 13.364305072324674,
"learning_rate": 2.1371826495561613e-07,
"logits/chosen": -1.8449236154556274,
"logits/rejected": -1.7506535053253174,
"logps/chosen": -255.83792114257812,
"logps/rejected": -221.6796875,
"loss": 0.146,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.8460966348648071,
"rewards/margins": 3.9246277809143066,
"rewards/rejected": -2.078531265258789,
"step": 255
},
{
"epoch": 2.139917695473251,
"grad_norm": 13.06395689210594,
"learning_rate": 1.9576640232531784e-07,
"logits/chosen": -1.8692007064819336,
"logits/rejected": -1.8045275211334229,
"logps/chosen": -248.9095916748047,
"logps/rejected": -250.84481811523438,
"loss": 0.1171,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 1.9673175811767578,
"rewards/margins": 4.323936462402344,
"rewards/rejected": -2.356618642807007,
"step": 260
},
{
"epoch": 2.1810699588477367,
"grad_norm": 15.133332987736472,
"learning_rate": 1.784168577095307e-07,
"logits/chosen": -1.9296722412109375,
"logits/rejected": -1.8828375339508057,
"logps/chosen": -250.7962646484375,
"logps/rejected": -228.93923950195312,
"loss": 0.1322,
"rewards/accuracies": 0.9375,
"rewards/chosen": 2.0834712982177734,
"rewards/margins": 3.928879499435425,
"rewards/rejected": -1.8454080820083618,
"step": 265
},
{
"epoch": 2.2222222222222223,
"grad_norm": 13.466085492542144,
"learning_rate": 1.6170397952784248e-07,
"logits/chosen": -1.9489628076553345,
"logits/rejected": -1.8797670602798462,
"logps/chosen": -270.56427001953125,
"logps/rejected": -242.9454803466797,
"loss": 0.1229,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 2.427950143814087,
"rewards/margins": 4.682461261749268,
"rewards/rejected": -2.2545108795166016,
"step": 270
},
{
"epoch": 2.263374485596708,
"grad_norm": 14.794346267314218,
"learning_rate": 1.4566085573529874e-07,
"logits/chosen": -1.9156001806259155,
"logits/rejected": -1.8757755756378174,
"logps/chosen": -258.8504333496094,
"logps/rejected": -229.5829315185547,
"loss": 0.1305,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 2.124898910522461,
"rewards/margins": 4.520539283752441,
"rewards/rejected": -2.3956406116485596,
"step": 275
},
{
"epoch": 2.3045267489711936,
"grad_norm": 14.6085524255932,
"learning_rate": 1.3031924831526737e-07,
"logits/chosen": -1.918760895729065,
"logits/rejected": -1.8703607320785522,
"logps/chosen": -261.5938415527344,
"logps/rejected": -230.3494415283203,
"loss": 0.1162,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 1.9034366607666016,
"rewards/margins": 4.663661003112793,
"rewards/rejected": -2.7602241039276123,
"step": 280
},
{
"epoch": 2.3456790123456788,
"grad_norm": 16.362862237175147,
"learning_rate": 1.1570953039744591e-07,
"logits/chosen": -1.9305750131607056,
"logits/rejected": -1.8696216344833374,
"logps/chosen": -266.16680908203125,
"logps/rejected": -258.2370910644531,
"loss": 0.1186,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 2.204184055328369,
"rewards/margins": 4.997335433959961,
"rewards/rejected": -2.7931509017944336,
"step": 285
},
{
"epoch": 2.386831275720165,
"grad_norm": 13.275572612341923,
"learning_rate": 1.0186062612550616e-07,
"logits/chosen": -1.9214690923690796,
"logits/rejected": -1.8716766834259033,
"logps/chosen": -252.57180786132812,
"logps/rejected": -259.24224853515625,
"loss": 0.12,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 1.912581205368042,
"rewards/margins": 4.5273051261901855,
"rewards/rejected": -2.6147236824035645,
"step": 290
},
{
"epoch": 2.42798353909465,
"grad_norm": 14.003480945619684,
"learning_rate": 8.879995339342167e-08,
"logits/chosen": -1.914181113243103,
"logits/rejected": -1.8485758304595947,
"logps/chosen": -248.25320434570312,
"logps/rejected": -228.18118286132812,
"loss": 0.1167,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.6209943294525146,
"rewards/margins": 4.539933204650879,
"rewards/rejected": -2.918938398361206,
"step": 295
},
{
"epoch": 2.4691358024691357,
"grad_norm": 13.39746651643324,
"learning_rate": 7.655336956385155e-08,
"logits/chosen": -1.936248540878296,
"logits/rejected": -1.8758357763290405,
"logps/chosen": -251.0574951171875,
"logps/rejected": -252.95425415039062,
"loss": 0.1311,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 1.768341302871704,
"rewards/margins": 4.591066360473633,
"rewards/rejected": -2.8227250576019287,
"step": 300
},
{
"epoch": 2.4691358024691357,
"eval_logits/chosen": -1.9243203401565552,
"eval_logits/rejected": -1.8631280660629272,
"eval_logps/chosen": -251.93479919433594,
"eval_logps/rejected": -234.54112243652344,
"eval_loss": 0.5211819410324097,
"eval_rewards/accuracies": 0.8194444179534912,
"eval_rewards/chosen": 0.6944435238838196,
"eval_rewards/margins": 2.59578800201416,
"eval_rewards/rejected": -1.9013442993164062,
"eval_runtime": 228.1654,
"eval_samples_per_second": 15.147,
"eval_steps_per_second": 0.237,
"step": 300
},
{
"epoch": 2.5102880658436213,
"grad_norm": 15.074747119995138,
"learning_rate": 6.514512027604508e-08,
"logits/chosen": -1.9279800653457642,
"logits/rejected": -1.8792842626571655,
"logps/chosen": -232.16232299804688,
"logps/rejected": -224.8663330078125,
"loss": 0.1173,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.4570283889770508,
"rewards/margins": 4.1241984367370605,
"rewards/rejected": -2.667170286178589,
"step": 305
},
{
"epoch": 2.551440329218107,
"grad_norm": 15.943407922179238,
"learning_rate": 5.459779144461712e-08,
"logits/chosen": -1.967230200767517,
"logits/rejected": -1.8994722366333008,
"logps/chosen": -251.5553436279297,
"logps/rejected": -234.64218139648438,
"loss": 0.132,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.8404948711395264,
"rewards/margins": 4.55427885055542,
"rewards/rejected": -2.7137837409973145,
"step": 310
},
{
"epoch": 2.5925925925925926,
"grad_norm": 15.791999145358414,
"learning_rate": 4.49322645442266e-08,
"logits/chosen": -1.9726388454437256,
"logits/rejected": -1.9029220342636108,
"logps/chosen": -226.0243377685547,
"logps/rejected": -245.57943725585938,
"loss": 0.1327,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.4504220485687256,
"rewards/margins": 4.483643531799316,
"rewards/rejected": -3.033221483230591,
"step": 315
},
{
"epoch": 2.633744855967078,
"grad_norm": 14.527344025713838,
"learning_rate": 3.616767526868353e-08,
"logits/chosen": -1.9656314849853516,
"logits/rejected": -1.898186445236206,
"logps/chosen": -268.8167419433594,
"logps/rejected": -251.64340209960938,
"loss": 0.1062,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 2.3940348625183105,
"rewards/margins": 5.15994930267334,
"rewards/rejected": -2.7659144401550293,
"step": 320
},
{
"epoch": 2.674897119341564,
"grad_norm": 14.467899944932638,
"learning_rate": 2.8321375646333023e-08,
"logits/chosen": -1.984684944152832,
"logits/rejected": -1.905601143836975,
"logps/chosen": -226.098876953125,
"logps/rejected": -269.22723388671875,
"loss": 0.1209,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 1.9057655334472656,
"rewards/margins": 4.68411922454834,
"rewards/rejected": -2.778353691101074,
"step": 325
},
{
"epoch": 2.7160493827160495,
"grad_norm": 14.107526535593529,
"learning_rate": 2.1408899686718996e-08,
"logits/chosen": -1.996860146522522,
"logits/rejected": -1.8913567066192627,
"logps/chosen": -248.2650909423828,
"logps/rejected": -243.4825439453125,
"loss": 0.1195,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 1.7427318096160889,
"rewards/margins": 4.723761558532715,
"rewards/rejected": -2.981029987335205,
"step": 330
},
{
"epoch": 2.757201646090535,
"grad_norm": 16.668582895840217,
"learning_rate": 1.5443932626538314e-08,
"logits/chosen": -1.9676933288574219,
"logits/rejected": -1.910146713256836,
"logps/chosen": -238.7953338623047,
"logps/rejected": -224.4933319091797,
"loss": 0.15,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.859580636024475,
"rewards/margins": 4.216904640197754,
"rewards/rejected": -2.3573238849639893,
"step": 335
},
{
"epoch": 2.7983539094650207,
"grad_norm": 13.029805689567587,
"learning_rate": 1.0438283835774387e-08,
"logits/chosen": -1.9859317541122437,
"logits/rejected": -1.8881919384002686,
"logps/chosen": -242.4602508544922,
"logps/rejected": -228.0737762451172,
"loss": 0.1257,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.7638639211654663,
"rewards/margins": 4.470877170562744,
"rewards/rejected": -2.7070131301879883,
"step": 340
},
{
"epoch": 2.8395061728395063,
"grad_norm": 13.50245071791209,
"learning_rate": 6.401863437648481e-09,
"logits/chosen": -1.9783008098602295,
"logits/rejected": -1.8936630487442017,
"logps/chosen": -262.051025390625,
"logps/rejected": -244.21853637695312,
"loss": 0.1265,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.8696222305297852,
"rewards/margins": 4.679049491882324,
"rewards/rejected": -2.809427261352539,
"step": 345
},
{
"epoch": 2.8806584362139915,
"grad_norm": 19.77488068943749,
"learning_rate": 3.3426626886769448e-09,
"logits/chosen": -1.9724162817001343,
"logits/rejected": -1.9013561010360718,
"logps/chosen": -265.6155700683594,
"logps/rejected": -258.1453552246094,
"loss": 0.1582,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 2.233098030090332,
"rewards/margins": 4.871306419372559,
"rewards/rejected": -2.6382088661193848,
"step": 350
},
{
"epoch": 2.9218106995884776,
"grad_norm": 16.657133866108477,
"learning_rate": 1.2667381576779712e-09,
"logits/chosen": -1.9556434154510498,
"logits/rejected": -1.890546202659607,
"logps/chosen": -237.84500122070312,
"logps/rejected": -261.2818298339844,
"loss": 0.1363,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.9853665828704834,
"rewards/margins": 5.180100440979004,
"rewards/rejected": -3.194733142852783,
"step": 355
},
{
"epoch": 2.962962962962963,
"grad_norm": 12.623822906293494,
"learning_rate": 1.7819973504940023e-10,
"logits/chosen": -1.9709722995758057,
"logits/rejected": -1.8710010051727295,
"logps/chosen": -241.50997924804688,
"logps/rejected": -266.9458923339844,
"loss": 0.1258,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.947824478149414,
"rewards/margins": 4.476650238037109,
"rewards/rejected": -2.5288257598876953,
"step": 360
},
{
"epoch": 2.9876543209876543,
"step": 363,
"total_flos": 4280357159436288.0,
"train_loss": 0.30565077164941584,
"train_runtime": 13036.7565,
"train_samples_per_second": 7.158,
"train_steps_per_second": 0.028
}
],
"logging_steps": 5,
"max_steps": 363,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4280357159436288.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}