|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 324, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.046296296296296294, |
|
"grad_norm": 60.684144460841296, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7229340076446533, |
|
"logits/rejected": -2.708962917327881, |
|
"logps/chosen": -284.58026123046875, |
|
"logps/rejected": -236.6112823486328, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.019139662384986877, |
|
"rewards/margins": 0.0077867708168923855, |
|
"rewards/rejected": 0.011352891102433205, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09259259259259259, |
|
"grad_norm": 58.154423145201655, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6737208366394043, |
|
"logits/rejected": -2.683973789215088, |
|
"logps/chosen": -277.359619140625, |
|
"logps/rejected": -221.55029296875, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.614923357963562, |
|
"rewards/margins": 0.23358741402626038, |
|
"rewards/rejected": 0.3813360035419464, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 42.31468213125409, |
|
"learning_rate": 9.99374496282885e-07, |
|
"logits/chosen": -2.4994168281555176, |
|
"logits/rejected": -2.450195789337158, |
|
"logps/chosen": -262.51397705078125, |
|
"logps/rejected": -197.8154296875, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.9596316814422607, |
|
"rewards/margins": 0.8402416110038757, |
|
"rewards/rejected": 1.1193901300430298, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 48.223216933084814, |
|
"learning_rate": 9.974995501511404e-07, |
|
"logits/chosen": -2.2974586486816406, |
|
"logits/rejected": -2.2951722145080566, |
|
"logps/chosen": -245.7628936767578, |
|
"logps/rejected": -194.25070190429688, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 1.8363304138183594, |
|
"rewards/margins": 0.6894850730895996, |
|
"rewards/rejected": 1.1468452215194702, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23148148148148148, |
|
"grad_norm": 37.278982280588096, |
|
"learning_rate": 9.94379852747865e-07, |
|
"logits/chosen": -2.147829055786133, |
|
"logits/rejected": -2.1413960456848145, |
|
"logps/chosen": -262.19586181640625, |
|
"logps/rejected": -201.21205139160156, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 2.0674498081207275, |
|
"rewards/margins": 1.227130651473999, |
|
"rewards/rejected": 0.840319037437439, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 40.42547622604402, |
|
"learning_rate": 9.900232096023476e-07, |
|
"logits/chosen": -2.008653163909912, |
|
"logits/rejected": -1.9756759405136108, |
|
"logps/chosen": -254.8358612060547, |
|
"logps/rejected": -205.0883026123047, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 2.0206902027130127, |
|
"rewards/margins": 1.3330821990966797, |
|
"rewards/rejected": 0.6876081228256226, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32407407407407407, |
|
"grad_norm": 36.172789672007866, |
|
"learning_rate": 9.844405211005144e-07, |
|
"logits/chosen": -1.9186718463897705, |
|
"logits/rejected": -1.8931595087051392, |
|
"logps/chosen": -254.8049774169922, |
|
"logps/rejected": -235.9902801513672, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.925907850265503, |
|
"rewards/margins": 1.3726422786712646, |
|
"rewards/rejected": 0.5532655119895935, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 42.11552973137223, |
|
"learning_rate": 9.776457552120033e-07, |
|
"logits/chosen": -1.9418878555297852, |
|
"logits/rejected": -1.9285017251968384, |
|
"logps/chosen": -271.52093505859375, |
|
"logps/rejected": -207.9265899658203, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.6729097366333008, |
|
"rewards/margins": 1.3872096538543701, |
|
"rewards/rejected": 0.28570008277893066, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 37.7632765834023, |
|
"learning_rate": 9.696559125420947e-07, |
|
"logits/chosen": -2.0984578132629395, |
|
"logits/rejected": -2.0816712379455566, |
|
"logps/chosen": -265.9540710449219, |
|
"logps/rejected": -209.5546112060547, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.1132609844207764, |
|
"rewards/margins": 1.7824798822402954, |
|
"rewards/rejected": 0.33078116178512573, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.46296296296296297, |
|
"grad_norm": 39.63733367911293, |
|
"learning_rate": 9.604909837959454e-07, |
|
"logits/chosen": -2.116713762283325, |
|
"logits/rejected": -2.0851855278015137, |
|
"logps/chosen": -259.4333190917969, |
|
"logps/rejected": -226.8323516845703, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 2.012805938720703, |
|
"rewards/margins": 1.6606166362762451, |
|
"rewards/rejected": 0.35218924283981323, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5092592592592593, |
|
"grad_norm": 37.60669919034035, |
|
"learning_rate": 9.501738997615469e-07, |
|
"logits/chosen": -2.129849672317505, |
|
"logits/rejected": -2.118218183517456, |
|
"logps/chosen": -269.1800537109375, |
|
"logps/rejected": -219.0711669921875, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.9002279043197632, |
|
"rewards/margins": 1.5283737182617188, |
|
"rewards/rejected": 0.37185433506965637, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 33.29835445177269, |
|
"learning_rate": 9.387304739365523e-07, |
|
"logits/chosen": -2.159351110458374, |
|
"logits/rejected": -2.130619525909424, |
|
"logps/chosen": -252.5172882080078, |
|
"logps/rejected": -228.6378936767578, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.741283655166626, |
|
"rewards/margins": 1.711003065109253, |
|
"rewards/rejected": 0.030280273407697678, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6018518518518519, |
|
"grad_norm": 30.257759350833947, |
|
"learning_rate": 9.261893379425217e-07, |
|
"logits/chosen": -2.1917521953582764, |
|
"logits/rejected": -2.1474714279174805, |
|
"logps/chosen": -268.36163330078125, |
|
"logps/rejected": -205.3818817138672, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.57059645652771, |
|
"rewards/margins": 1.7522954940795898, |
|
"rewards/rejected": -0.18169905245304108, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6481481481481481, |
|
"grad_norm": 40.25398245444873, |
|
"learning_rate": 9.125818698881797e-07, |
|
"logits/chosen": -2.2085392475128174, |
|
"logits/rejected": -2.21848201751709, |
|
"logps/chosen": -271.4682312011719, |
|
"logps/rejected": -214.6433563232422, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.0917803049087524, |
|
"rewards/margins": 1.8838036060333252, |
|
"rewards/rejected": -0.7920231819152832, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 37.0974929070999, |
|
"learning_rate": 8.979421158609205e-07, |
|
"logits/chosen": -2.2309927940368652, |
|
"logits/rejected": -2.200138807296753, |
|
"logps/chosen": -266.31951904296875, |
|
"logps/rejected": -201.2650909423828, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 1.1627981662750244, |
|
"rewards/margins": 1.913628339767456, |
|
"rewards/rejected": -0.7508302330970764, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 36.09183980430684, |
|
"learning_rate": 8.823067047429906e-07, |
|
"logits/chosen": -2.2019572257995605, |
|
"logits/rejected": -2.1673662662506104, |
|
"logps/chosen": -278.7079162597656, |
|
"logps/rejected": -234.3495330810547, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.2150039672851562, |
|
"rewards/margins": 1.7666345834732056, |
|
"rewards/rejected": -0.551630437374115, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7870370370370371, |
|
"grad_norm": 38.47115055753171, |
|
"learning_rate": 8.657147565654818e-07, |
|
"logits/chosen": -2.1636500358581543, |
|
"logits/rejected": -2.146523952484131, |
|
"logps/chosen": -275.8933410644531, |
|
"logps/rejected": -237.3660888671875, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.741151213645935, |
|
"rewards/margins": 1.9285080432891846, |
|
"rewards/rejected": -0.1873568296432495, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 31.938894285721222, |
|
"learning_rate": 8.482077846294308e-07, |
|
"logits/chosen": -2.167405605316162, |
|
"logits/rejected": -2.1467373371124268, |
|
"logps/chosen": -275.11895751953125, |
|
"logps/rejected": -217.98818969726562, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.2301520109176636, |
|
"rewards/margins": 2.0726938247680664, |
|
"rewards/rejected": -0.8425418138504028, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8796296296296297, |
|
"grad_norm": 35.431684887112006, |
|
"learning_rate": 8.298295916389233e-07, |
|
"logits/chosen": -2.1900432109832764, |
|
"logits/rejected": -2.1787796020507812, |
|
"logps/chosen": -264.6141662597656, |
|
"logps/rejected": -242.1894989013672, |
|
"loss": 0.442, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.3056919574737549, |
|
"rewards/margins": 2.1279501914978027, |
|
"rewards/rejected": -0.822258472442627, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 33.888318187677584, |
|
"learning_rate": 8.106261601060772e-07, |
|
"logits/chosen": -2.1956303119659424, |
|
"logits/rejected": -2.1856637001037598, |
|
"logps/chosen": -297.5550231933594, |
|
"logps/rejected": -255.35263061523438, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.0619537830352783, |
|
"rewards/margins": 1.9099775552749634, |
|
"rewards/rejected": -0.8480235934257507, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"eval_logits/chosen": -2.1676995754241943, |
|
"eval_logits/rejected": -2.146118402481079, |
|
"eval_logps/chosen": -279.2745056152344, |
|
"eval_logps/rejected": -233.75474548339844, |
|
"eval_loss": 0.46477359533309937, |
|
"eval_rewards/accuracies": 0.8061224222183228, |
|
"eval_rewards/chosen": 1.101247787475586, |
|
"eval_rewards/margins": 2.194356679916382, |
|
"eval_rewards/rejected": -1.0931090116500854, |
|
"eval_runtime": 208.6661, |
|
"eval_samples_per_second": 14.727, |
|
"eval_steps_per_second": 0.235, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 33.67711763185909, |
|
"learning_rate": 7.906455373021128e-07, |
|
"logits/chosen": -2.124380588531494, |
|
"logits/rejected": -2.1406655311584473, |
|
"logps/chosen": -263.89453125, |
|
"logps/rejected": -228.3328094482422, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.895250141620636, |
|
"rewards/margins": 2.0895395278930664, |
|
"rewards/rejected": -1.1942893266677856, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0185185185185186, |
|
"grad_norm": 21.244154790846327, |
|
"learning_rate": 7.699377150423672e-07, |
|
"logits/chosen": -2.1806797981262207, |
|
"logits/rejected": -2.1463980674743652, |
|
"logps/chosen": -263.21453857421875, |
|
"logps/rejected": -247.73080444335938, |
|
"loss": 0.3339, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.9122228622436523, |
|
"rewards/margins": 2.487616777420044, |
|
"rewards/rejected": -1.5753939151763916, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0648148148148149, |
|
"grad_norm": 16.299205121924633, |
|
"learning_rate": 7.485545046060271e-07, |
|
"logits/chosen": -2.2146365642547607, |
|
"logits/rejected": -2.222085475921631, |
|
"logps/chosen": -280.36639404296875, |
|
"logps/rejected": -227.9482421875, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.063042163848877, |
|
"rewards/margins": 3.6736202239990234, |
|
"rewards/rejected": -1.610577940940857, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 16.73718376613808, |
|
"learning_rate": 7.265494071035401e-07, |
|
"logits/chosen": -2.2634971141815186, |
|
"logits/rejected": -2.216996192932129, |
|
"logps/chosen": -259.3006896972656, |
|
"logps/rejected": -246.74990844726562, |
|
"loss": 0.203, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.0323493480682373, |
|
"rewards/margins": 3.455160140991211, |
|
"rewards/rejected": -1.4228107929229736, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1574074074074074, |
|
"grad_norm": 18.487444498570227, |
|
"learning_rate": 7.03977479616039e-07, |
|
"logits/chosen": -2.20058012008667, |
|
"logits/rejected": -2.2042083740234375, |
|
"logps/chosen": -257.96331787109375, |
|
"logps/rejected": -254.6935577392578, |
|
"loss": 0.1666, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.385958433151245, |
|
"rewards/margins": 3.8731255531311035, |
|
"rewards/rejected": -1.4871671199798584, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2037037037037037, |
|
"grad_norm": 22.810846773089985, |
|
"learning_rate": 6.808951974417076e-07, |
|
"logits/chosen": -2.1897432804107666, |
|
"logits/rejected": -2.141242027282715, |
|
"logps/chosen": -255.61087036132812, |
|
"logps/rejected": -223.12802124023438, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.2871758937835693, |
|
"rewards/margins": 3.679692506790161, |
|
"rewards/rejected": -1.3925166130065918, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 20.35798660753584, |
|
"learning_rate": 6.573603127937442e-07, |
|
"logits/chosen": -2.2258079051971436, |
|
"logits/rejected": -2.189937114715576, |
|
"logps/chosen": -264.5538024902344, |
|
"logps/rejected": -231.6312255859375, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.0883944034576416, |
|
"rewards/margins": 3.591538906097412, |
|
"rewards/rejected": -1.5031449794769287, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.2962962962962963, |
|
"grad_norm": 23.04895829278538, |
|
"learning_rate": 6.334317103034652e-07, |
|
"logits/chosen": -2.2638206481933594, |
|
"logits/rejected": -2.228890895843506, |
|
"logps/chosen": -257.40338134765625, |
|
"logps/rejected": -240.62997436523438, |
|
"loss": 0.2394, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0762557983398438, |
|
"rewards/margins": 3.8573691844940186, |
|
"rewards/rejected": -1.7811139822006226, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3425925925925926, |
|
"grad_norm": 17.62661068504451, |
|
"learning_rate": 6.091692596900827e-07, |
|
"logits/chosen": -2.235206127166748, |
|
"logits/rejected": -2.2013344764709473, |
|
"logps/chosen": -262.8838195800781, |
|
"logps/rejected": -244.3470916748047, |
|
"loss": 0.2448, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.0590713024139404, |
|
"rewards/margins": 3.7045459747314453, |
|
"rewards/rejected": -1.6454746723175049, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 17.569013714484907, |
|
"learning_rate": 5.84633665965777e-07, |
|
"logits/chosen": -2.194655418395996, |
|
"logits/rejected": -2.168844699859619, |
|
"logps/chosen": -251.3390350341797, |
|
"logps/rejected": -280.6144104003906, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.981199026107788, |
|
"rewards/margins": 4.223908424377441, |
|
"rewards/rejected": -2.2427096366882324, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4351851851851851, |
|
"grad_norm": 22.596638318143317, |
|
"learning_rate": 5.598863175508526e-07, |
|
"logits/chosen": -2.153757333755493, |
|
"logits/rejected": -2.102107524871826, |
|
"logps/chosen": -247.99124145507812, |
|
"logps/rejected": -237.85702514648438, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.9839226007461548, |
|
"rewards/margins": 3.839322328567505, |
|
"rewards/rejected": -1.85539972782135, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 20.676833262098377, |
|
"learning_rate": 5.349891326789986e-07, |
|
"logits/chosen": -2.069032669067383, |
|
"logits/rejected": -2.0706586837768555, |
|
"logps/chosen": -255.1598358154297, |
|
"logps/rejected": -253.1689910888672, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.097238063812256, |
|
"rewards/margins": 4.1082868576049805, |
|
"rewards/rejected": -2.011049270629883, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5277777777777777, |
|
"grad_norm": 20.783062875793597, |
|
"learning_rate": 5.100044044769472e-07, |
|
"logits/chosen": -2.0675559043884277, |
|
"logits/rejected": -2.051846981048584, |
|
"logps/chosen": -283.98687744140625, |
|
"logps/rejected": -251.8580780029297, |
|
"loss": 0.2301, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.353699207305908, |
|
"rewards/margins": 3.9306633472442627, |
|
"rewards/rejected": -1.5769641399383545, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.574074074074074, |
|
"grad_norm": 23.57908475198222, |
|
"learning_rate": 4.849946451061443e-07, |
|
"logits/chosen": -1.9997777938842773, |
|
"logits/rejected": -1.967475175857544, |
|
"logps/chosen": -279.0697937011719, |
|
"logps/rejected": -233.81790161132812, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.201470375061035, |
|
"rewards/margins": 3.6621429920196533, |
|
"rewards/rejected": -1.4606724977493286, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6203703703703702, |
|
"grad_norm": 24.506892070167254, |
|
"learning_rate": 4.6002242935639254e-07, |
|
"logits/chosen": -1.9627641439437866, |
|
"logits/rejected": -1.9285609722137451, |
|
"logps/chosen": -253.19888305664062, |
|
"logps/rejected": -248.28207397460938, |
|
"loss": 0.2712, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.0500733852386475, |
|
"rewards/margins": 3.925525188446045, |
|
"rewards/rejected": -1.875451683998108, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 21.133393742613016, |
|
"learning_rate": 4.351502380827958e-07, |
|
"logits/chosen": -1.9648786783218384, |
|
"logits/rejected": -1.9206435680389404, |
|
"logps/chosen": -272.1482238769531, |
|
"logps/rejected": -215.24819946289062, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.675158143043518, |
|
"rewards/margins": 3.6289196014404297, |
|
"rewards/rejected": -1.9537616968154907, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7129629629629628, |
|
"grad_norm": 21.130284789458205, |
|
"learning_rate": 4.104403018777323e-07, |
|
"logits/chosen": -2.0080366134643555, |
|
"logits/rejected": -1.9320275783538818, |
|
"logps/chosen": -249.93130493164062, |
|
"logps/rejected": -250.1505584716797, |
|
"loss": 0.2425, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.9229265451431274, |
|
"rewards/margins": 4.077498912811279, |
|
"rewards/rejected": -2.1545729637145996, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7592592592592593, |
|
"grad_norm": 18.108471689412447, |
|
"learning_rate": 3.8595444536898525e-07, |
|
"logits/chosen": -2.001293659210205, |
|
"logits/rejected": -1.9521024227142334, |
|
"logps/chosen": -265.4033203125, |
|
"logps/rejected": -215.8245086669922, |
|
"loss": 0.2355, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.1630921363830566, |
|
"rewards/margins": 3.895209550857544, |
|
"rewards/rejected": -1.7321174144744873, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8055555555555556, |
|
"grad_norm": 20.623886123317074, |
|
"learning_rate": 3.61753932533607e-07, |
|
"logits/chosen": -2.024538040161133, |
|
"logits/rejected": -1.9703800678253174, |
|
"logps/chosen": -271.60858154296875, |
|
"logps/rejected": -246.217529296875, |
|
"loss": 0.2361, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.9751393795013428, |
|
"rewards/margins": 3.928147554397583, |
|
"rewards/rejected": -1.9530079364776611, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 22.55367614082194, |
|
"learning_rate": 3.3789931341453557e-07, |
|
"logits/chosen": -2.030125856399536, |
|
"logits/rejected": -1.9982750415802002, |
|
"logps/chosen": -266.38250732421875, |
|
"logps/rejected": -241.37753295898438, |
|
"loss": 0.2547, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.0332014560699463, |
|
"rewards/margins": 3.87813138961792, |
|
"rewards/rejected": -1.8449299335479736, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"eval_logits/chosen": -2.0550973415374756, |
|
"eval_logits/rejected": -2.0096685886383057, |
|
"eval_logps/chosen": -274.60986328125, |
|
"eval_logps/rejected": -237.3907470703125, |
|
"eval_loss": 0.46807482838630676, |
|
"eval_rewards/accuracies": 0.8367347121238708, |
|
"eval_rewards/chosen": 1.5677103996276855, |
|
"eval_rewards/margins": 3.0244193077087402, |
|
"eval_rewards/rejected": -1.4567087888717651, |
|
"eval_runtime": 206.9715, |
|
"eval_samples_per_second": 14.847, |
|
"eval_steps_per_second": 0.237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8981481481481481, |
|
"grad_norm": 17.819204918874263, |
|
"learning_rate": 3.144502726234889e-07, |
|
"logits/chosen": -2.0324487686157227, |
|
"logits/rejected": -2.0080246925354004, |
|
"logps/chosen": -288.62127685546875, |
|
"logps/rejected": -250.75411987304688, |
|
"loss": 0.2296, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.1828694343566895, |
|
"rewards/margins": 3.992137908935547, |
|
"rewards/rejected": -1.8092679977416992, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 19.148987139894267, |
|
"learning_rate": 2.9146548000917677e-07, |
|
"logits/chosen": -2.0495505332946777, |
|
"logits/rejected": -2.0217370986938477, |
|
"logps/chosen": -242.3642120361328, |
|
"logps/rejected": -242.4574737548828, |
|
"loss": 0.2326, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.7121883630752563, |
|
"rewards/margins": 3.8212177753448486, |
|
"rewards/rejected": -2.1090292930603027, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.9907407407407407, |
|
"grad_norm": 22.69196711859144, |
|
"learning_rate": 2.69002443864469e-07, |
|
"logits/chosen": -2.080535650253296, |
|
"logits/rejected": -2.0337400436401367, |
|
"logps/chosen": -270.77313232421875, |
|
"logps/rejected": -239.3463592529297, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.7927030324935913, |
|
"rewards/margins": 3.772850513458252, |
|
"rewards/rejected": -1.9801477193832397, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.037037037037037, |
|
"grad_norm": 13.943990718217357, |
|
"learning_rate": 2.4711736703979015e-07, |
|
"logits/chosen": -2.103257656097412, |
|
"logits/rejected": -2.0635571479797363, |
|
"logps/chosen": -284.7521057128906, |
|
"logps/rejected": -256.8929138183594, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.748323678970337, |
|
"rewards/margins": 4.446930885314941, |
|
"rewards/rejected": -2.6986069679260254, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 15.45294738008002, |
|
"learning_rate": 2.258650063227533e-07, |
|
"logits/chosen": -2.1084115505218506, |
|
"logits/rejected": -2.056737184524536, |
|
"logps/chosen": -259.67041015625, |
|
"logps/rejected": -259.40728759765625, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.458908200263977, |
|
"rewards/margins": 4.31704044342041, |
|
"rewards/rejected": -2.858132839202881, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1296296296296298, |
|
"grad_norm": 12.88270101235706, |
|
"learning_rate": 2.0529853543586216e-07, |
|
"logits/chosen": -2.1149027347564697, |
|
"logits/rejected": -2.0866215229034424, |
|
"logps/chosen": -272.2378234863281, |
|
"logps/rejected": -256.72943115234375, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.90532648563385, |
|
"rewards/margins": 4.682445526123047, |
|
"rewards/rejected": -2.777118682861328, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.175925925925926, |
|
"grad_norm": 12.32826597177878, |
|
"learning_rate": 1.854694119950675e-07, |
|
"logits/chosen": -2.113334894180298, |
|
"logits/rejected": -2.05454683303833, |
|
"logps/chosen": -263.45074462890625, |
|
"logps/rejected": -255.12893676757812, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.9264167547225952, |
|
"rewards/margins": 4.30756139755249, |
|
"rewards/rejected": -2.3811442852020264, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 15.15405460227757, |
|
"learning_rate": 1.6642724876204657e-07, |
|
"logits/chosen": -2.133556842803955, |
|
"logits/rejected": -2.087902784347534, |
|
"logps/chosen": -270.2631530761719, |
|
"logps/rejected": -253.8778839111328, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 2.2957730293273926, |
|
"rewards/margins": 4.74481725692749, |
|
"rewards/rejected": -2.4490439891815186, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.2685185185185186, |
|
"grad_norm": 16.649173808427264, |
|
"learning_rate": 1.4821968951233637e-07, |
|
"logits/chosen": -2.107185125350952, |
|
"logits/rejected": -2.0924763679504395, |
|
"logps/chosen": -258.2135314941406, |
|
"logps/rejected": -233.6188507080078, |
|
"loss": 0.1055, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.095768928527832, |
|
"rewards/margins": 4.622010231018066, |
|
"rewards/rejected": -2.5262415409088135, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.314814814814815, |
|
"grad_norm": 14.609854928382536, |
|
"learning_rate": 1.308922898298977e-07, |
|
"logits/chosen": -2.115427255630493, |
|
"logits/rejected": -2.0896975994110107, |
|
"logps/chosen": -263.09588623046875, |
|
"logps/rejected": -243.6695556640625, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.3170924186706543, |
|
"rewards/margins": 4.6176228523254395, |
|
"rewards/rejected": -2.300530195236206, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.361111111111111, |
|
"grad_norm": 14.676533311284945, |
|
"learning_rate": 1.144884031263681e-07, |
|
"logits/chosen": -2.1398823261260986, |
|
"logits/rejected": -2.0688061714172363, |
|
"logps/chosen": -266.96026611328125, |
|
"logps/rejected": -252.38302612304688, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.9593225717544556, |
|
"rewards/margins": 4.533578872680664, |
|
"rewards/rejected": -2.574256420135498, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4074074074074074, |
|
"grad_norm": 11.628764039431521, |
|
"learning_rate": 9.904907217018e-08, |
|
"logits/chosen": -2.1351375579833984, |
|
"logits/rejected": -2.067322254180908, |
|
"logps/chosen": -256.94305419921875, |
|
"logps/rejected": -237.3994140625, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.1635353565216064, |
|
"rewards/margins": 4.803465843200684, |
|
"rewards/rejected": -2.6399312019348145, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.4537037037037037, |
|
"grad_norm": 18.178584418637232, |
|
"learning_rate": 8.461292639694517e-08, |
|
"logits/chosen": -2.0998477935791016, |
|
"logits/rejected": -2.060464382171631, |
|
"logps/chosen": -250.79953002929688, |
|
"logps/rejected": -249.4027557373047, |
|
"loss": 0.1143, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.085954189300537, |
|
"rewards/margins": 4.921433448791504, |
|
"rewards/rejected": -2.8354785442352295, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 16.4110633269618, |
|
"learning_rate": 7.12160852580314e-08, |
|
"logits/chosen": -2.116481065750122, |
|
"logits/rejected": -2.049785852432251, |
|
"logps/chosen": -253.52597045898438, |
|
"logps/rejected": -239.3027801513672, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.850622534751892, |
|
"rewards/margins": 4.572412014007568, |
|
"rewards/rejected": -2.7217891216278076, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5462962962962963, |
|
"grad_norm": 15.514051763273304, |
|
"learning_rate": 5.889206784915862e-08, |
|
"logits/chosen": -2.1012206077575684, |
|
"logits/rejected": -2.0580103397369385, |
|
"logps/chosen": -258.37774658203125, |
|
"logps/rejected": -258.62432861328125, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.9426629543304443, |
|
"rewards/margins": 5.028421878814697, |
|
"rewards/rejected": -3.085759401321411, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.5925925925925926, |
|
"grad_norm": 11.456484271697569, |
|
"learning_rate": 4.767170904512291e-08, |
|
"logits/chosen": -2.0715463161468506, |
|
"logits/rejected": -2.0747976303100586, |
|
"logps/chosen": -238.94155883789062, |
|
"logps/rejected": -255.44912719726562, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.1153740882873535, |
|
"rewards/margins": 4.905180931091309, |
|
"rewards/rejected": -2.789807081222534, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.638888888888889, |
|
"grad_norm": 13.870849160723635, |
|
"learning_rate": 3.7583082350481573e-08, |
|
"logits/chosen": -2.0784220695495605, |
|
"logits/rejected": -2.052750587463379, |
|
"logps/chosen": -263.8325500488281, |
|
"logps/rejected": -234.6880340576172, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.34096097946167, |
|
"rewards/margins": 4.445528507232666, |
|
"rewards/rejected": -2.104567289352417, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.685185185185185, |
|
"grad_norm": 17.181608986758075, |
|
"learning_rate": 2.86514296592269e-08, |
|
"logits/chosen": -2.086383819580078, |
|
"logits/rejected": -2.040381669998169, |
|
"logps/chosen": -280.8565673828125, |
|
"logps/rejected": -260.22607421875, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.262991428375244, |
|
"rewards/margins": 4.693873405456543, |
|
"rewards/rejected": -2.430881977081299, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.7314814814814814, |
|
"grad_norm": 13.52180668792329, |
|
"learning_rate": 2.089909809919227e-08, |
|
"logits/chosen": -2.069549083709717, |
|
"logits/rejected": -2.030317544937134, |
|
"logps/chosen": -254.6141357421875, |
|
"logps/rejected": -262.8475341796875, |
|
"loss": 0.1297, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.047020435333252, |
|
"rewards/margins": 5.004242420196533, |
|
"rewards/rejected": -2.9572222232818604, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 16.042004959202647, |
|
"learning_rate": 1.434548411920622e-08, |
|
"logits/chosen": -2.058128833770752, |
|
"logits/rejected": -2.031694173812866, |
|
"logps/chosen": -252.9358673095703, |
|
"logps/rejected": -270.03570556640625, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.2302279472351074, |
|
"rewards/margins": 4.993742942810059, |
|
"rewards/rejected": -2.7635152339935303, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_logits/chosen": -2.0879852771759033, |
|
"eval_logits/rejected": -2.0407848358154297, |
|
"eval_logps/chosen": -276.32293701171875, |
|
"eval_logps/rejected": -243.11355590820312, |
|
"eval_loss": 0.46450331807136536, |
|
"eval_rewards/accuracies": 0.8494898080825806, |
|
"eval_rewards/chosen": 1.3964064121246338, |
|
"eval_rewards/margins": 3.4253976345062256, |
|
"eval_rewards/rejected": -2.028991460800171, |
|
"eval_runtime": 206.9293, |
|
"eval_samples_per_second": 14.85, |
|
"eval_steps_per_second": 0.237, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.824074074074074, |
|
"grad_norm": 19.205673267409885, |
|
"learning_rate": 9.00698495888874e-09, |
|
"logits/chosen": -2.068077564239502, |
|
"logits/rejected": -2.027005672454834, |
|
"logps/chosen": -248.1720428466797, |
|
"logps/rejected": -249.2681121826172, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.7687886953353882, |
|
"rewards/margins": 4.742621898651123, |
|
"rewards/rejected": -2.973832845687866, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.8703703703703702, |
|
"grad_norm": 18.56397553393815, |
|
"learning_rate": 4.8969576225142975e-09, |
|
"logits/chosen": -2.074991464614868, |
|
"logits/rejected": -2.0308446884155273, |
|
"logps/chosen": -258.87982177734375, |
|
"logps/rejected": -249.52542114257812, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 2.297821521759033, |
|
"rewards/margins": 4.811751365661621, |
|
"rewards/rejected": -2.513930082321167, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 19.703259917873964, |
|
"learning_rate": 2.0256854595881446e-09, |
|
"logits/chosen": -2.0802104473114014, |
|
"logits/rejected": -2.04276704788208, |
|
"logps/chosen": -257.5400390625, |
|
"logps/rejected": -241.93057250976562, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.9402424097061157, |
|
"rewards/margins": 4.416214942932129, |
|
"rewards/rejected": -2.4759726524353027, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 15.35409361746528, |
|
"learning_rate": 4.0035243575342604e-10, |
|
"logits/chosen": -2.100257396697998, |
|
"logits/rejected": -2.023787260055542, |
|
"logps/chosen": -279.096923828125, |
|
"logps/rejected": -257.52032470703125, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.575255870819092, |
|
"rewards/margins": 5.259024620056152, |
|
"rewards/rejected": -2.6837692260742188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 324, |
|
"total_flos": 3820433807769600.0, |
|
"train_loss": 0.29292676165515996, |
|
"train_runtime": 11688.2747, |
|
"train_samples_per_second": 7.096, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 324, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3820433807769600.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|