|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5042735042735043, |
|
"eval_steps": 40, |
|
"global_step": 440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017094017094017096, |
|
"grad_norm": 35.038580788061665, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7457876205444336, |
|
"logits/rejected": -2.7444841861724854, |
|
"logps/chosen": -164.26461791992188, |
|
"logps/rejected": -170.55870056152344, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.003455913159996271, |
|
"rewards/margins": -0.0019886991940438747, |
|
"rewards/rejected": 0.0054446132853627205, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03418803418803419, |
|
"grad_norm": 36.203903910498276, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.7106502056121826, |
|
"logits/rejected": -2.716397523880005, |
|
"logps/chosen": -171.80043029785156, |
|
"logps/rejected": -165.20602416992188, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.012000308372080326, |
|
"rewards/margins": 0.0025437879376113415, |
|
"rewards/rejected": 0.009456520900130272, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 33.9576577784673, |
|
"learning_rate": 9.999177507263144e-07, |
|
"logits/chosen": -2.651571750640869, |
|
"logits/rejected": -2.629457473754883, |
|
"logps/chosen": -174.04080200195312, |
|
"logps/rejected": -174.0542755126953, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.23909731209278107, |
|
"rewards/margins": 0.10868903249502182, |
|
"rewards/rejected": 0.13040827214717865, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06837606837606838, |
|
"grad_norm": 34.33646066636181, |
|
"learning_rate": 9.996710299650301e-07, |
|
"logits/chosen": -2.476440668106079, |
|
"logits/rejected": -2.450225353240967, |
|
"logps/chosen": -158.1311798095703, |
|
"logps/rejected": -158.0066680908203, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.4318675100803375, |
|
"rewards/margins": 0.14549395442008972, |
|
"rewards/rejected": 0.2863735556602478, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 33.16430522723429, |
|
"learning_rate": 9.992599188865604e-07, |
|
"logits/chosen": -2.3086318969726562, |
|
"logits/rejected": -2.3104796409606934, |
|
"logps/chosen": -150.59771728515625, |
|
"logps/rejected": -156.85037231445312, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.5047669410705566, |
|
"rewards/margins": 0.16554531455039978, |
|
"rewards/rejected": 0.33922165632247925, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 34.52861424862365, |
|
"learning_rate": 9.98684552745256e-07, |
|
"logits/chosen": -2.217874050140381, |
|
"logits/rejected": -2.2254481315612793, |
|
"logps/chosen": -161.29412841796875, |
|
"logps/rejected": -161.40841674804688, |
|
"loss": 0.6295, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4176379144191742, |
|
"rewards/margins": 0.26531916856765747, |
|
"rewards/rejected": 0.15231874585151672, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11965811965811966, |
|
"grad_norm": 31.455117829218544, |
|
"learning_rate": 9.979451208349055e-07, |
|
"logits/chosen": -2.2608728408813477, |
|
"logits/rejected": -2.246007204055786, |
|
"logps/chosen": -171.71456909179688, |
|
"logps/rejected": -174.46578979492188, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01912705972790718, |
|
"rewards/margins": 0.31441593170166016, |
|
"rewards/rejected": -0.33354294300079346, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"grad_norm": 31.67318837058587, |
|
"learning_rate": 9.970418664264595e-07, |
|
"logits/chosen": -2.345672130584717, |
|
"logits/rejected": -2.331491470336914, |
|
"logps/chosen": -171.24766540527344, |
|
"logps/rejected": -176.8189697265625, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.27867692708969116, |
|
"rewards/margins": 0.5290472507476807, |
|
"rewards/rejected": -0.8077241778373718, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"eval_logits/chosen": -2.4102065563201904, |
|
"eval_logits/rejected": -2.401230573654175, |
|
"eval_logps/chosen": -162.36439514160156, |
|
"eval_logps/rejected": -167.4954071044922, |
|
"eval_loss": 0.6069236993789673, |
|
"eval_rewards/accuracies": 0.6365384459495544, |
|
"eval_rewards/chosen": -0.388705849647522, |
|
"eval_rewards/margins": 0.47280558943748474, |
|
"eval_rewards/rejected": -0.8615114688873291, |
|
"eval_runtime": 509.918, |
|
"eval_samples_per_second": 16.305, |
|
"eval_steps_per_second": 0.255, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 36.18313806223269, |
|
"learning_rate": 9.95975086687994e-07, |
|
"logits/chosen": -2.44050669670105, |
|
"logits/rejected": -2.4460220336914062, |
|
"logps/chosen": -163.82875061035156, |
|
"logps/rejected": -167.35989379882812, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.31098368763923645, |
|
"rewards/margins": 0.46269193291664124, |
|
"rewards/rejected": -0.7736755609512329, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 31.13412274683678, |
|
"learning_rate": 9.947451325869439e-07, |
|
"logits/chosen": -2.501091718673706, |
|
"logits/rejected": -2.4991250038146973, |
|
"logps/chosen": -172.09686279296875, |
|
"logps/rejected": -177.7747802734375, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.212348073720932, |
|
"rewards/margins": 0.6062799692153931, |
|
"rewards/rejected": -0.8186280131340027, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18803418803418803, |
|
"grad_norm": 31.508672436862835, |
|
"learning_rate": 9.933524087746347e-07, |
|
"logits/chosen": -2.437525510787964, |
|
"logits/rejected": -2.4285693168640137, |
|
"logps/chosen": -168.1316375732422, |
|
"logps/rejected": -175.23193359375, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.513076901435852, |
|
"rewards/margins": 0.7702310681343079, |
|
"rewards/rejected": -1.2833080291748047, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 30.148068867306787, |
|
"learning_rate": 9.917973734531549e-07, |
|
"logits/chosen": -2.431530475616455, |
|
"logits/rejected": -2.431729793548584, |
|
"logps/chosen": -159.38168334960938, |
|
"logps/rejected": -170.52500915527344, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.34855490922927856, |
|
"rewards/margins": 0.5969334244728088, |
|
"rewards/rejected": -0.9454883337020874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 32.03814968183332, |
|
"learning_rate": 9.90080538224607e-07, |
|
"logits/chosen": -2.533193588256836, |
|
"logits/rejected": -2.5252978801727295, |
|
"logps/chosen": -157.30966186523438, |
|
"logps/rejected": -166.26011657714844, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.007600936107337475, |
|
"rewards/margins": 0.5010749697685242, |
|
"rewards/rejected": -0.5086758732795715, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.23931623931623933, |
|
"grad_norm": 29.16308768569833, |
|
"learning_rate": 9.882024679227938e-07, |
|
"logits/chosen": -2.5899624824523926, |
|
"logits/rejected": -2.5779967308044434, |
|
"logps/chosen": -178.4553985595703, |
|
"logps/rejected": -179.71542358398438, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47189587354660034, |
|
"rewards/margins": 0.8304598927497864, |
|
"rewards/rejected": -1.3023556470870972, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 28.918531347661485, |
|
"learning_rate": 9.861637804273881e-07, |
|
"logits/chosen": -2.578892469406128, |
|
"logits/rejected": -2.5758416652679443, |
|
"logps/chosen": -162.60537719726562, |
|
"logps/rejected": -170.6789093017578, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.45147842168807983, |
|
"rewards/margins": 0.6994724273681641, |
|
"rewards/rejected": -1.1509509086608887, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"grad_norm": 26.98866754941649, |
|
"learning_rate": 9.83965146460653e-07, |
|
"logits/chosen": -2.54936146736145, |
|
"logits/rejected": -2.5406956672668457, |
|
"logps/chosen": -168.81484985351562, |
|
"logps/rejected": -179.770751953125, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6899678111076355, |
|
"rewards/margins": 0.8549306988716125, |
|
"rewards/rejected": -1.544898509979248, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"eval_logits/chosen": -2.53336238861084, |
|
"eval_logits/rejected": -2.517695665359497, |
|
"eval_logps/chosen": -167.28964233398438, |
|
"eval_logps/rejected": -177.21824645996094, |
|
"eval_loss": 0.5331124663352966, |
|
"eval_rewards/accuracies": 0.7134615182876587, |
|
"eval_rewards/chosen": -0.8812309503555298, |
|
"eval_rewards/margins": 0.9525622725486755, |
|
"eval_rewards/rejected": -1.8337931632995605, |
|
"eval_runtime": 510.0922, |
|
"eval_samples_per_second": 16.299, |
|
"eval_steps_per_second": 0.255, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2905982905982906, |
|
"grad_norm": 34.783908892421536, |
|
"learning_rate": 9.816072893667758e-07, |
|
"logits/chosen": -2.5432825088500977, |
|
"logits/rejected": -2.5159504413604736, |
|
"logps/chosen": -174.62197875976562, |
|
"logps/rejected": -185.89413452148438, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0434839725494385, |
|
"rewards/margins": 1.0283188819885254, |
|
"rewards/rejected": -2.0718026161193848, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 26.697686805838906, |
|
"learning_rate": 9.790909848738904e-07, |
|
"logits/chosen": -2.5102508068084717, |
|
"logits/rejected": -2.5222485065460205, |
|
"logps/chosen": -175.47544860839844, |
|
"logps/rejected": -183.92678833007812, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9199908971786499, |
|
"rewards/margins": 0.8521744608879089, |
|
"rewards/rejected": -1.7721655368804932, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3247863247863248, |
|
"grad_norm": 30.125094604814798, |
|
"learning_rate": 9.764170608388647e-07, |
|
"logits/chosen": -2.514260768890381, |
|
"logits/rejected": -2.4829812049865723, |
|
"logps/chosen": -167.62655639648438, |
|
"logps/rejected": -174.2395477294922, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6241778135299683, |
|
"rewards/margins": 1.0742968320846558, |
|
"rewards/rejected": -1.6984745264053345, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 27.550843374580296, |
|
"learning_rate": 9.735863969749371e-07, |
|
"logits/chosen": -2.4171032905578613, |
|
"logits/rejected": -2.381608486175537, |
|
"logps/chosen": -177.05935668945312, |
|
"logps/rejected": -188.4621124267578, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7831762433052063, |
|
"rewards/margins": 1.0672458410263062, |
|
"rewards/rejected": -1.8504221439361572, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 30.39392617500016, |
|
"learning_rate": 9.705999245622956e-07, |
|
"logits/chosen": -2.3619236946105957, |
|
"logits/rejected": -2.3391060829162598, |
|
"logps/chosen": -170.48300170898438, |
|
"logps/rejected": -183.28384399414062, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8889726400375366, |
|
"rewards/margins": 0.9097515940666199, |
|
"rewards/rejected": -1.7987244129180908, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.37606837606837606, |
|
"grad_norm": 26.741945030347612, |
|
"learning_rate": 9.674586261416873e-07, |
|
"logits/chosen": -2.2946972846984863, |
|
"logits/rejected": -2.2440435886383057, |
|
"logps/chosen": -179.06390380859375, |
|
"logps/rejected": -188.00010681152344, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6539386510848999, |
|
"rewards/margins": 1.0372655391693115, |
|
"rewards/rejected": -1.691204309463501, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.39316239316239315, |
|
"grad_norm": 33.116742735027486, |
|
"learning_rate": 9.641635351911664e-07, |
|
"logits/chosen": -2.218276262283325, |
|
"logits/rejected": -2.18500018119812, |
|
"logps/chosen": -171.17381286621094, |
|
"logps/rejected": -183.25845336914062, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9279203414916992, |
|
"rewards/margins": 1.2727015018463135, |
|
"rewards/rejected": -2.200622081756592, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 27.185641229760538, |
|
"learning_rate": 9.607157357860821e-07, |
|
"logits/chosen": -2.124584436416626, |
|
"logits/rejected": -2.0961549282073975, |
|
"logps/chosen": -189.48277282714844, |
|
"logps/rejected": -203.43951416015625, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2869656085968018, |
|
"rewards/margins": 1.3039339780807495, |
|
"rewards/rejected": -2.5908992290496826, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"eval_logits/chosen": -2.0268211364746094, |
|
"eval_logits/rejected": -1.9764775037765503, |
|
"eval_logps/chosen": -172.888671875, |
|
"eval_logps/rejected": -185.58355712890625, |
|
"eval_loss": 0.49246644973754883, |
|
"eval_rewards/accuracies": 0.7442307472229004, |
|
"eval_rewards/chosen": -1.441135048866272, |
|
"eval_rewards/margins": 1.2291908264160156, |
|
"eval_rewards/rejected": -2.670325756072998, |
|
"eval_runtime": 510.1247, |
|
"eval_samples_per_second": 16.298, |
|
"eval_steps_per_second": 0.255, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 31.03461706328688, |
|
"learning_rate": 9.571163622424225e-07, |
|
"logits/chosen": -1.944964051246643, |
|
"logits/rejected": -1.9178746938705444, |
|
"logps/chosen": -175.3327178955078, |
|
"logps/rejected": -188.2616729736328, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.579502820968628, |
|
"rewards/margins": 1.2485122680664062, |
|
"rewards/rejected": -2.828014850616455, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 29.080520770184428, |
|
"learning_rate": 9.533665987436261e-07, |
|
"logits/chosen": -1.8825464248657227, |
|
"logits/rejected": -1.8078832626342773, |
|
"logps/chosen": -178.3484649658203, |
|
"logps/rejected": -197.55380249023438, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.5868518352508545, |
|
"rewards/margins": 1.2471343278884888, |
|
"rewards/rejected": -2.8339860439300537, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 28.903021536294002, |
|
"learning_rate": 9.494676789509899e-07, |
|
"logits/chosen": -1.8585374355316162, |
|
"logits/rejected": -1.8128669261932373, |
|
"logps/chosen": -178.5911407470703, |
|
"logps/rejected": -195.90933227539062, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2965319156646729, |
|
"rewards/margins": 1.4173026084899902, |
|
"rewards/rejected": -2.713834285736084, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.47863247863247865, |
|
"grad_norm": 27.5476391641307, |
|
"learning_rate": 9.454208855977985e-07, |
|
"logits/chosen": -1.920654296875, |
|
"logits/rejected": -1.8412939310073853, |
|
"logps/chosen": -179.1053924560547, |
|
"logps/rejected": -196.11526489257812, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5140180587768555, |
|
"rewards/margins": 1.5388453006744385, |
|
"rewards/rejected": -3.052863121032715, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49572649572649574, |
|
"grad_norm": 30.03317842923354, |
|
"learning_rate": 9.41227550067308e-07, |
|
"logits/chosen": -1.9514515399932861, |
|
"logits/rejected": -1.949883222579956, |
|
"logps/chosen": -178.63250732421875, |
|
"logps/rejected": -191.42721557617188, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.513338327407837, |
|
"rewards/margins": 1.4887291193008423, |
|
"rewards/rejected": -3.0020670890808105, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 30.28469957381902, |
|
"learning_rate": 9.36889051954725e-07, |
|
"logits/chosen": -2.0093894004821777, |
|
"logits/rejected": -1.9657704830169678, |
|
"logps/chosen": -180.35043334960938, |
|
"logps/rejected": -197.2502899169922, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.602224588394165, |
|
"rewards/margins": 1.6883083581924438, |
|
"rewards/rejected": -3.2905325889587402, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5299145299145299, |
|
"grad_norm": 28.420242591686232, |
|
"learning_rate": 9.324068186133245e-07, |
|
"logits/chosen": -1.9976894855499268, |
|
"logits/rejected": -1.9886022806167603, |
|
"logps/chosen": -171.70602416992188, |
|
"logps/rejected": -185.99795532226562, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.2952425479888916, |
|
"rewards/margins": 1.7483227252960205, |
|
"rewards/rejected": -3.043565034866333, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5470085470085471, |
|
"grad_norm": 26.601543429998234, |
|
"learning_rate": 9.277823246848536e-07, |
|
"logits/chosen": -2.056879758834839, |
|
"logits/rejected": -1.9998328685760498, |
|
"logps/chosen": -186.3706817626953, |
|
"logps/rejected": -196.63290405273438, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2312135696411133, |
|
"rewards/margins": 1.352858304977417, |
|
"rewards/rejected": -2.5840718746185303, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5470085470085471, |
|
"eval_logits/chosen": -2.070892095565796, |
|
"eval_logits/rejected": -2.0279953479766846, |
|
"eval_logps/chosen": -171.76034545898438, |
|
"eval_logps/rejected": -189.1643829345703, |
|
"eval_loss": 0.4683005213737488, |
|
"eval_rewards/accuracies": 0.762499988079071, |
|
"eval_rewards/chosen": -1.328302264213562, |
|
"eval_rewards/margins": 1.70010507106781, |
|
"eval_rewards/rejected": -3.028407096862793, |
|
"eval_runtime": 509.9565, |
|
"eval_samples_per_second": 16.303, |
|
"eval_steps_per_second": 0.255, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 41.76296476638838, |
|
"learning_rate": 9.230170916143793e-07, |
|
"logits/chosen": -2.1190731525421143, |
|
"logits/rejected": -2.083359956741333, |
|
"logps/chosen": -176.87539672851562, |
|
"logps/rejected": -198.44384765625, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2975060939788818, |
|
"rewards/margins": 1.6890850067138672, |
|
"rewards/rejected": -2.98659086227417, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5811965811965812, |
|
"grad_norm": 28.83194976337172, |
|
"learning_rate": 9.181126871497378e-07, |
|
"logits/chosen": -2.175851583480835, |
|
"logits/rejected": -2.1391243934631348, |
|
"logps/chosen": -178.2881317138672, |
|
"logps/rejected": -197.88473510742188, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2544641494750977, |
|
"rewards/margins": 1.7747846841812134, |
|
"rewards/rejected": -3.0292489528656006, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5982905982905983, |
|
"grad_norm": 30.93659066586097, |
|
"learning_rate": 9.130707248257491e-07, |
|
"logits/chosen": -2.313814640045166, |
|
"logits/rejected": -2.2677135467529297, |
|
"logps/chosen": -170.06781005859375, |
|
"logps/rejected": -177.8175811767578, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.0524061918258667, |
|
"rewards/margins": 1.3644572496414185, |
|
"rewards/rejected": -2.416863441467285, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 25.018999438635433, |
|
"learning_rate": 9.078928634333698e-07, |
|
"logits/chosen": -2.302171230316162, |
|
"logits/rejected": -2.2788572311401367, |
|
"logps/chosen": -179.72390747070312, |
|
"logps/rejected": -197.12283325195312, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.6731274724006653, |
|
"rewards/margins": 1.6728944778442383, |
|
"rewards/rejected": -2.346021890640259, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6324786324786325, |
|
"grad_norm": 28.576400660174777, |
|
"learning_rate": 9.025808064739549e-07, |
|
"logits/chosen": -2.2794651985168457, |
|
"logits/rejected": -2.2391860485076904, |
|
"logps/chosen": -175.87045288085938, |
|
"logps/rejected": -189.4848175048828, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8901998400688171, |
|
"rewards/margins": 1.4675487279891968, |
|
"rewards/rejected": -2.357748508453369, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6495726495726496, |
|
"grad_norm": 25.73471562251865, |
|
"learning_rate": 8.971363015988113e-07, |
|
"logits/chosen": -2.1966824531555176, |
|
"logits/rejected": -2.1603925228118896, |
|
"logps/chosen": -172.0600128173828, |
|
"logps/rejected": -191.96176147460938, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9620615243911743, |
|
"rewards/margins": 1.4954371452331543, |
|
"rewards/rejected": -2.457498550415039, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 34.912982133976655, |
|
"learning_rate": 8.91561140034225e-07, |
|
"logits/chosen": -2.1389029026031494, |
|
"logits/rejected": -2.0825791358947754, |
|
"logps/chosen": -174.3153839111328, |
|
"logps/rejected": -194.2677459716797, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4726169109344482, |
|
"rewards/margins": 1.4599871635437012, |
|
"rewards/rejected": -2.9326040744781494, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 25.756167591259292, |
|
"learning_rate": 8.858571559921537e-07, |
|
"logits/chosen": -2.135298013687134, |
|
"logits/rejected": -2.067862033843994, |
|
"logps/chosen": -178.73361206054688, |
|
"logps/rejected": -193.21209716796875, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.452704668045044, |
|
"rewards/margins": 1.6391651630401611, |
|
"rewards/rejected": -3.091869831085205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"eval_logits/chosen": -2.1462392807006836, |
|
"eval_logits/rejected": -2.1028637886047363, |
|
"eval_logps/chosen": -173.41998291015625, |
|
"eval_logps/rejected": -191.55532836914062, |
|
"eval_loss": 0.4528014361858368, |
|
"eval_rewards/accuracies": 0.7567307949066162, |
|
"eval_rewards/chosen": -1.4942626953125, |
|
"eval_rewards/margins": 1.7732419967651367, |
|
"eval_rewards/rejected": -3.2675046920776367, |
|
"eval_runtime": 510.9487, |
|
"eval_samples_per_second": 16.272, |
|
"eval_steps_per_second": 0.254, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7008547008547008, |
|
"grad_norm": 26.77931167801656, |
|
"learning_rate": 8.800262260667754e-07, |
|
"logits/chosen": -2.1584880352020264, |
|
"logits/rejected": -2.100416660308838, |
|
"logps/chosen": -165.63743591308594, |
|
"logps/rejected": -183.36476135253906, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.409201741218567, |
|
"rewards/margins": 1.6899499893188477, |
|
"rewards/rejected": -3.099151611328125, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.717948717948718, |
|
"grad_norm": 25.986078947597964, |
|
"learning_rate": 8.740702686170954e-07, |
|
"logits/chosen": -2.2075798511505127, |
|
"logits/rejected": -2.151484727859497, |
|
"logps/chosen": -179.00509643554688, |
|
"logps/rejected": -194.68353271484375, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2429417371749878, |
|
"rewards/margins": 1.6721550226211548, |
|
"rewards/rejected": -2.9150967597961426, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7350427350427351, |
|
"grad_norm": 24.89101303634129, |
|
"learning_rate": 8.679912431358109e-07, |
|
"logits/chosen": -2.1802072525024414, |
|
"logits/rejected": -2.1238255500793457, |
|
"logps/chosen": -172.57705688476562, |
|
"logps/rejected": -189.31666564941406, |
|
"loss": 0.4521, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.4200295209884644, |
|
"rewards/margins": 1.9437878131866455, |
|
"rewards/rejected": -3.3638176918029785, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.7521367521367521, |
|
"grad_norm": 27.617679879143566, |
|
"learning_rate": 8.617911496046445e-07, |
|
"logits/chosen": -2.174743413925171, |
|
"logits/rejected": -2.1131985187530518, |
|
"logps/chosen": -171.0723876953125, |
|
"logps/rejected": -189.23275756835938, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.5752933025360107, |
|
"rewards/margins": 1.7622945308685303, |
|
"rewards/rejected": -3.337587833404541, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 25.912722321128637, |
|
"learning_rate": 8.554720278363547e-07, |
|
"logits/chosen": -2.206986427307129, |
|
"logits/rejected": -2.1668283939361572, |
|
"logps/chosen": -175.4432830810547, |
|
"logps/rejected": -193.02845764160156, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6182912588119507, |
|
"rewards/margins": 1.7047646045684814, |
|
"rewards/rejected": -3.3230559825897217, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.7863247863247863, |
|
"grad_norm": 27.20181876713083, |
|
"learning_rate": 8.490359568036445e-07, |
|
"logits/chosen": -2.3055601119995117, |
|
"logits/rejected": -2.2838051319122314, |
|
"logps/chosen": -183.21449279785156, |
|
"logps/rejected": -205.37521362304688, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.517019271850586, |
|
"rewards/margins": 1.586157202720642, |
|
"rewards/rejected": -3.1031765937805176, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8034188034188035, |
|
"grad_norm": 73.7275222246594, |
|
"learning_rate": 8.424850539551856e-07, |
|
"logits/chosen": -2.367276668548584, |
|
"logits/rejected": -2.349586009979248, |
|
"logps/chosen": -174.82656860351562, |
|
"logps/rejected": -191.1867218017578, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4777748584747314, |
|
"rewards/margins": 1.7120048999786377, |
|
"rewards/rejected": -3.1897799968719482, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 23.009281700277114, |
|
"learning_rate": 8.358214745189829e-07, |
|
"logits/chosen": -2.4104866981506348, |
|
"logits/rejected": -2.3766913414001465, |
|
"logps/chosen": -184.68222045898438, |
|
"logps/rejected": -205.4649200439453, |
|
"loss": 0.4189, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.6187865734100342, |
|
"rewards/margins": 2.1064658164978027, |
|
"rewards/rejected": -3.725252151489258, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"eval_logits/chosen": -2.447181224822998, |
|
"eval_logits/rejected": -2.4165050983428955, |
|
"eval_logps/chosen": -177.78672790527344, |
|
"eval_logps/rejected": -197.77915954589844, |
|
"eval_loss": 0.44941428303718567, |
|
"eval_rewards/accuracies": 0.7663461565971375, |
|
"eval_rewards/chosen": -1.9309390783309937, |
|
"eval_rewards/margins": 1.958947777748108, |
|
"eval_rewards/rejected": -3.8898868560791016, |
|
"eval_runtime": 510.6066, |
|
"eval_samples_per_second": 16.283, |
|
"eval_steps_per_second": 0.255, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8376068376068376, |
|
"grad_norm": 25.877339189588067, |
|
"learning_rate": 8.290474107933114e-07, |
|
"logits/chosen": -2.450867176055908, |
|
"logits/rejected": -2.427006483078003, |
|
"logps/chosen": -186.76683044433594, |
|
"logps/rejected": -206.23147583007812, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9867289066314697, |
|
"rewards/margins": 2.0301365852355957, |
|
"rewards/rejected": -4.0168657302856445, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 29.453953450785107, |
|
"learning_rate": 8.221650914254565e-07, |
|
"logits/chosen": -2.464049816131592, |
|
"logits/rejected": -2.4303317070007324, |
|
"logps/chosen": -184.5537872314453, |
|
"logps/rejected": -196.9418487548828, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.074887752532959, |
|
"rewards/margins": 1.6710395812988281, |
|
"rewards/rejected": -3.745927333831787, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8717948717948718, |
|
"grad_norm": 26.569329016808155, |
|
"learning_rate": 8.151767806784953e-07, |
|
"logits/chosen": -2.4366822242736816, |
|
"logits/rejected": -2.4094901084899902, |
|
"logps/chosen": -188.01376342773438, |
|
"logps/rejected": -199.4722442626953, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7826770544052124, |
|
"rewards/margins": 1.4153121709823608, |
|
"rewards/rejected": -3.1979892253875732, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 26.68703703609934, |
|
"learning_rate": 8.080847776863608e-07, |
|
"logits/chosen": -2.4146647453308105, |
|
"logits/rejected": -2.386958360671997, |
|
"logps/chosen": -186.34954833984375, |
|
"logps/rejected": -200.6859588623047, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0444929599761963, |
|
"rewards/margins": 1.9850835800170898, |
|
"rewards/rejected": -3.029576539993286, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.905982905982906, |
|
"grad_norm": 27.397197833953705, |
|
"learning_rate": 8.008914156974333e-07, |
|
"logits/chosen": -2.3684728145599365, |
|
"logits/rejected": -2.3436620235443115, |
|
"logps/chosen": -172.65553283691406, |
|
"logps/rejected": -194.290283203125, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7620879411697388, |
|
"rewards/margins": 1.6837621927261353, |
|
"rewards/rejected": -2.445849895477295, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 25.92494835175286, |
|
"learning_rate": 7.935990613069086e-07, |
|
"logits/chosen": -2.312016010284424, |
|
"logits/rejected": -2.2800450325012207, |
|
"logps/chosen": -171.95416259765625, |
|
"logps/rejected": -193.59378051757812, |
|
"loss": 0.445, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8496273756027222, |
|
"rewards/margins": 2.038412094116211, |
|
"rewards/rejected": -2.8880395889282227, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9401709401709402, |
|
"grad_norm": 24.604360112973207, |
|
"learning_rate": 7.862101136781946e-07, |
|
"logits/chosen": -2.2761037349700928, |
|
"logits/rejected": -2.241076707839966, |
|
"logps/chosen": -169.81842041015625, |
|
"logps/rejected": -192.06903076171875, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2587201595306396, |
|
"rewards/margins": 1.723170280456543, |
|
"rewards/rejected": -2.9818906784057617, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.9572649572649573, |
|
"grad_norm": 33.24758765533294, |
|
"learning_rate": 7.78727003753595e-07, |
|
"logits/chosen": -2.2211129665374756, |
|
"logits/rejected": -2.1957130432128906, |
|
"logps/chosen": -172.84083557128906, |
|
"logps/rejected": -195.34005737304688, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.557621717453003, |
|
"rewards/margins": 2.1072278022766113, |
|
"rewards/rejected": -3.6648497581481934, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9572649572649573, |
|
"eval_logits/chosen": -2.199989080429077, |
|
"eval_logits/rejected": -2.158634662628174, |
|
"eval_logps/chosen": -175.8746337890625, |
|
"eval_logps/rejected": -197.1187286376953, |
|
"eval_loss": 0.4431803524494171, |
|
"eval_rewards/accuracies": 0.7634615302085876, |
|
"eval_rewards/chosen": -1.7397303581237793, |
|
"eval_rewards/margins": 2.084113836288452, |
|
"eval_rewards/rejected": -3.8238441944122314, |
|
"eval_runtime": 510.3208, |
|
"eval_samples_per_second": 16.292, |
|
"eval_steps_per_second": 0.255, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9743589743589743, |
|
"grad_norm": 23.855033657318305, |
|
"learning_rate": 7.711521934545342e-07, |
|
"logits/chosen": -2.1965622901916504, |
|
"logits/rejected": -2.1558597087860107, |
|
"logps/chosen": -185.0030059814453, |
|
"logps/rejected": -205.0108642578125, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4067909717559814, |
|
"rewards/margins": 2.130743980407715, |
|
"rewards/rejected": -3.537534713745117, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.9914529914529915, |
|
"grad_norm": 27.781853522970472, |
|
"learning_rate": 7.63488174871594e-07, |
|
"logits/chosen": -2.209836483001709, |
|
"logits/rejected": -2.1382126808166504, |
|
"logps/chosen": -181.1676788330078, |
|
"logps/rejected": -201.53524780273438, |
|
"loss": 0.4064, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.1297556161880493, |
|
"rewards/margins": 2.355384349822998, |
|
"rewards/rejected": -3.485139846801758, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0085470085470085, |
|
"grad_norm": 16.38566211549952, |
|
"learning_rate": 7.557374694446221e-07, |
|
"logits/chosen": -2.191758632659912, |
|
"logits/rejected": -2.182082176208496, |
|
"logps/chosen": -169.5143585205078, |
|
"logps/rejected": -191.7115936279297, |
|
"loss": 0.3182, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.8321866989135742, |
|
"rewards/margins": 2.2344491481781006, |
|
"rewards/rejected": -3.066636085510254, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 16.985648781909422, |
|
"learning_rate": 7.479026271331863e-07, |
|
"logits/chosen": -2.267702579498291, |
|
"logits/rejected": -2.205897092819214, |
|
"logps/chosen": -169.5579833984375, |
|
"logps/rejected": -197.75802612304688, |
|
"loss": 0.2168, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.39328667521476746, |
|
"rewards/margins": 2.9768226146698, |
|
"rewards/rejected": -3.3701090812683105, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0427350427350428, |
|
"grad_norm": 18.84075344730222, |
|
"learning_rate": 7.399862255776448e-07, |
|
"logits/chosen": -2.3038105964660645, |
|
"logits/rejected": -2.2806408405303955, |
|
"logps/chosen": -164.28530883789062, |
|
"logps/rejected": -197.4765625, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.0233527421951294, |
|
"rewards/margins": 3.003648042678833, |
|
"rewards/rejected": -4.02700138092041, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.0598290598290598, |
|
"grad_norm": 22.421420935891007, |
|
"learning_rate": 7.319908692511102e-07, |
|
"logits/chosen": -2.4081215858459473, |
|
"logits/rejected": -2.3740234375, |
|
"logps/chosen": -171.662109375, |
|
"logps/rejected": -209.674560546875, |
|
"loss": 0.2371, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.9765853881835938, |
|
"rewards/margins": 3.642939805984497, |
|
"rewards/rejected": -4.619524955749512, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 19.013199137435198, |
|
"learning_rate": 7.239191886025853e-07, |
|
"logits/chosen": -2.438504695892334, |
|
"logits/rejected": -2.4153828620910645, |
|
"logps/chosen": -175.62979125976562, |
|
"logps/rejected": -207.75692749023438, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7931637763977051, |
|
"rewards/margins": 3.6793007850646973, |
|
"rewards/rejected": -4.472464561462402, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.0940170940170941, |
|
"grad_norm": 18.91200622344116, |
|
"learning_rate": 7.15773839191553e-07, |
|
"logits/chosen": -2.44122314453125, |
|
"logits/rejected": -2.4007716178894043, |
|
"logps/chosen": -164.8516387939453, |
|
"logps/rejected": -196.8404998779297, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5334895253181458, |
|
"rewards/margins": 3.032334804534912, |
|
"rewards/rejected": -3.565824508666992, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0940170940170941, |
|
"eval_logits/chosen": -2.439704418182373, |
|
"eval_logits/rejected": -2.4060051441192627, |
|
"eval_logps/chosen": -170.68392944335938, |
|
"eval_logps/rejected": -188.57797241210938, |
|
"eval_loss": 0.45035940408706665, |
|
"eval_rewards/accuracies": 0.7759615182876587, |
|
"eval_rewards/chosen": -1.2206590175628662, |
|
"eval_rewards/margins": 1.7491083145141602, |
|
"eval_rewards/rejected": -2.9697670936584473, |
|
"eval_runtime": 509.779, |
|
"eval_samples_per_second": 16.309, |
|
"eval_steps_per_second": 0.255, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 22.512058111846653, |
|
"learning_rate": 7.075575008143054e-07, |
|
"logits/chosen": -2.4442429542541504, |
|
"logits/rejected": -2.418893337249756, |
|
"logps/chosen": -172.38357543945312, |
|
"logps/rejected": -206.02169799804688, |
|
"loss": 0.2187, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.565311074256897, |
|
"rewards/margins": 3.2330455780029297, |
|
"rewards/rejected": -3.798356533050537, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.1282051282051282, |
|
"grad_norm": 20.168560881998502, |
|
"learning_rate": 6.99272876622298e-07, |
|
"logits/chosen": -2.451326847076416, |
|
"logits/rejected": -2.412057399749756, |
|
"logps/chosen": -176.92236328125, |
|
"logps/rejected": -212.13027954101562, |
|
"loss": 0.2223, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7077795267105103, |
|
"rewards/margins": 3.4706287384033203, |
|
"rewards/rejected": -4.178408622741699, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.1452991452991452, |
|
"grad_norm": 22.192981205024807, |
|
"learning_rate": 6.909226922328211e-07, |
|
"logits/chosen": -2.409423351287842, |
|
"logits/rejected": -2.3877720832824707, |
|
"logps/chosen": -178.28134155273438, |
|
"logps/rejected": -211.2721405029297, |
|
"loss": 0.2107, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3355214595794678, |
|
"rewards/margins": 3.4370384216308594, |
|
"rewards/rejected": -4.77255916595459, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.1623931623931625, |
|
"grad_norm": 21.419705510014087, |
|
"learning_rate": 6.82509694832279e-07, |
|
"logits/chosen": -2.4000024795532227, |
|
"logits/rejected": -2.370281219482422, |
|
"logps/chosen": -177.16690063476562, |
|
"logps/rejected": -216.00479125976562, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2021948099136353, |
|
"rewards/margins": 3.6778666973114014, |
|
"rewards/rejected": -4.880061149597168, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1794871794871795, |
|
"grad_norm": 22.354670781786442, |
|
"learning_rate": 6.740366522723752e-07, |
|
"logits/chosen": -2.403994083404541, |
|
"logits/rejected": -2.376459836959839, |
|
"logps/chosen": -182.30508422851562, |
|
"logps/rejected": -216.92324829101562, |
|
"loss": 0.1978, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.2613385915756226, |
|
"rewards/margins": 3.6934847831726074, |
|
"rewards/rejected": -4.9548234939575195, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.1965811965811965, |
|
"grad_norm": 23.407629177854666, |
|
"learning_rate": 6.655063521594949e-07, |
|
"logits/chosen": -2.379652738571167, |
|
"logits/rejected": -2.324298143386841, |
|
"logps/chosen": -173.8491668701172, |
|
"logps/rejected": -214.69384765625, |
|
"loss": 0.2039, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4865599870681763, |
|
"rewards/margins": 3.8272087574005127, |
|
"rewards/rejected": -5.3137688636779785, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.2136752136752136, |
|
"grad_norm": 21.571334703323895, |
|
"learning_rate": 6.569216009375929e-07, |
|
"logits/chosen": -2.3660831451416016, |
|
"logits/rejected": -2.339773654937744, |
|
"logps/chosen": -173.77110290527344, |
|
"logps/rejected": -213.0174560546875, |
|
"loss": 0.1915, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3100428581237793, |
|
"rewards/margins": 3.860780715942383, |
|
"rewards/rejected": -5.170823574066162, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 22.187677873940732, |
|
"learning_rate": 6.482852229648801e-07, |
|
"logits/chosen": -2.3646240234375, |
|
"logits/rejected": -2.3246006965637207, |
|
"logps/chosen": -175.00582885742188, |
|
"logps/rejected": -206.28280639648438, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.4125807285308838, |
|
"rewards/margins": 3.3710312843322754, |
|
"rewards/rejected": -4.783612251281738, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"eval_logits/chosen": -2.3790221214294434, |
|
"eval_logits/rejected": -2.34446120262146, |
|
"eval_logps/chosen": -179.33251953125, |
|
"eval_logps/rejected": -203.62615966796875, |
|
"eval_loss": 0.44376233220100403, |
|
"eval_rewards/accuracies": 0.7884615659713745, |
|
"eval_rewards/chosen": -2.0855188369750977, |
|
"eval_rewards/margins": 2.3890678882598877, |
|
"eval_rewards/rejected": -4.474586486816406, |
|
"eval_runtime": 510.4715, |
|
"eval_samples_per_second": 16.287, |
|
"eval_steps_per_second": 0.255, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.2478632478632479, |
|
"grad_norm": 22.165120276430873, |
|
"learning_rate": 6.396000595846187e-07, |
|
"logits/chosen": -2.3558402061462402, |
|
"logits/rejected": -2.3481571674346924, |
|
"logps/chosen": -181.36770629882812, |
|
"logps/rejected": -209.7890167236328, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.5554149150848389, |
|
"rewards/margins": 3.3219857215881348, |
|
"rewards/rejected": -4.8774003982543945, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.264957264957265, |
|
"grad_norm": 20.68437270723084, |
|
"learning_rate": 6.30868968190328e-07, |
|
"logits/chosen": -2.3541951179504395, |
|
"logits/rejected": -2.3390259742736816, |
|
"logps/chosen": -176.1993865966797, |
|
"logps/rejected": -212.49215698242188, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4656041860580444, |
|
"rewards/margins": 3.678515672683716, |
|
"rewards/rejected": -5.144120216369629, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 19.201697233386618, |
|
"learning_rate": 6.220948212857111e-07, |
|
"logits/chosen": -2.3458924293518066, |
|
"logits/rejected": -2.3219103813171387, |
|
"logps/chosen": -174.4814910888672, |
|
"logps/rejected": -214.3359375, |
|
"loss": 0.1874, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.0674364566802979, |
|
"rewards/margins": 3.7355704307556152, |
|
"rewards/rejected": -4.803006172180176, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.2991452991452992, |
|
"grad_norm": 22.01586220428365, |
|
"learning_rate": 6.13280505539608e-07, |
|
"logits/chosen": -2.342374324798584, |
|
"logits/rejected": -2.3162975311279297, |
|
"logps/chosen": -183.77163696289062, |
|
"logps/rejected": -228.7787322998047, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.164026141166687, |
|
"rewards/margins": 3.908278226852417, |
|
"rewards/rejected": -5.072304725646973, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.3162393162393162, |
|
"grad_norm": 25.00184157353881, |
|
"learning_rate": 6.044289208362914e-07, |
|
"logits/chosen": -2.333132028579712, |
|
"logits/rejected": -2.2765920162200928, |
|
"logps/chosen": -174.96617126464844, |
|
"logps/rejected": -214.20108032226562, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.3976377248764038, |
|
"rewards/margins": 3.710228443145752, |
|
"rewards/rejected": -5.107865810394287, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 31.735546531917564, |
|
"learning_rate": 5.955429793214128e-07, |
|
"logits/chosen": -2.303215503692627, |
|
"logits/rejected": -2.2655513286590576, |
|
"logps/chosen": -185.8195037841797, |
|
"logps/rejected": -223.3665313720703, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0371310710906982, |
|
"rewards/margins": 3.6536872386932373, |
|
"rewards/rejected": -5.690817832946777, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.3504273504273505, |
|
"grad_norm": 22.637262975463738, |
|
"learning_rate": 5.866256044439142e-07, |
|
"logits/chosen": -2.3101253509521484, |
|
"logits/rejected": -2.2920727729797363, |
|
"logps/chosen": -177.775390625, |
|
"logps/rejected": -217.62863159179688, |
|
"loss": 0.2183, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.9160019159317017, |
|
"rewards/margins": 3.9770541191101074, |
|
"rewards/rejected": -5.8930559158325195, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.3675213675213675, |
|
"grad_norm": 20.130725870467003, |
|
"learning_rate": 5.776797299942235e-07, |
|
"logits/chosen": -2.318878412246704, |
|
"logits/rejected": -2.2966861724853516, |
|
"logps/chosen": -173.74664306640625, |
|
"logps/rejected": -212.4857940673828, |
|
"loss": 0.2017, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.386197566986084, |
|
"rewards/margins": 3.7791614532470703, |
|
"rewards/rejected": -5.165358543395996, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3675213675213675, |
|
"eval_logits/chosen": -2.3350861072540283, |
|
"eval_logits/rejected": -2.3021962642669678, |
|
"eval_logps/chosen": -177.5862274169922, |
|
"eval_logps/rejected": -200.29428100585938, |
|
"eval_loss": 0.43498364090919495, |
|
"eval_rewards/accuracies": 0.7980769276618958, |
|
"eval_rewards/chosen": -1.910888433456421, |
|
"eval_rewards/margins": 2.230510711669922, |
|
"eval_rewards/rejected": -4.141399383544922, |
|
"eval_runtime": 544.4003, |
|
"eval_samples_per_second": 15.272, |
|
"eval_steps_per_second": 0.239, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 23.596482868635878, |
|
"learning_rate": 5.687082991390443e-07, |
|
"logits/chosen": -2.318471908569336, |
|
"logits/rejected": -2.3120999336242676, |
|
"logps/chosen": -181.3382568359375, |
|
"logps/rejected": -223.269287109375, |
|
"loss": 0.2194, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8589428067207336, |
|
"rewards/margins": 3.934159517288208, |
|
"rewards/rejected": -4.793102741241455, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.4017094017094016, |
|
"grad_norm": 21.689666892141172, |
|
"learning_rate": 5.597142634530638e-07, |
|
"logits/chosen": -2.320355176925659, |
|
"logits/rejected": -2.291660785675049, |
|
"logps/chosen": -173.66566467285156, |
|
"logps/rejected": -208.9145050048828, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.6978217363357544, |
|
"rewards/margins": 3.58097505569458, |
|
"rewards/rejected": -4.278796195983887, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.4188034188034189, |
|
"grad_norm": 24.150486741969612, |
|
"learning_rate": 5.507005819478924e-07, |
|
"logits/chosen": -2.3113839626312256, |
|
"logits/rejected": -2.2749829292297363, |
|
"logps/chosen": -177.13357543945312, |
|
"logps/rejected": -209.0823211669922, |
|
"loss": 0.2273, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.8599263429641724, |
|
"rewards/margins": 3.658784866333008, |
|
"rewards/rejected": -4.518711090087891, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.435897435897436, |
|
"grad_norm": 21.96020470207621, |
|
"learning_rate": 5.416702200985584e-07, |
|
"logits/chosen": -2.2829697132110596, |
|
"logits/rejected": -2.2764744758605957, |
|
"logps/chosen": -176.04710388183594, |
|
"logps/rejected": -212.1916961669922, |
|
"loss": 0.2022, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.101433515548706, |
|
"rewards/margins": 3.7728042602539062, |
|
"rewards/rejected": -4.874238014221191, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.452991452991453, |
|
"grad_norm": 27.78726291669523, |
|
"learning_rate": 5.326261488678748e-07, |
|
"logits/chosen": -2.2171132564544678, |
|
"logits/rejected": -2.185006856918335, |
|
"logps/chosen": -169.00796508789062, |
|
"logps/rejected": -199.62350463867188, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.3692271709442139, |
|
"rewards/margins": 3.5437960624694824, |
|
"rewards/rejected": -4.913023948669434, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.4700854700854702, |
|
"grad_norm": 21.241057222727857, |
|
"learning_rate": 5.235713437290011e-07, |
|
"logits/chosen": -2.220639705657959, |
|
"logits/rejected": -2.189408779144287, |
|
"logps/chosen": -174.73651123046875, |
|
"logps/rejected": -213.7786865234375, |
|
"loss": 0.1905, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.5636754035949707, |
|
"rewards/margins": 3.8633148670196533, |
|
"rewards/rejected": -5.426989555358887, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.4871794871794872, |
|
"grad_norm": 21.34359675472229, |
|
"learning_rate": 5.145087836865213e-07, |
|
"logits/chosen": -2.236384630203247, |
|
"logits/rejected": -2.1837754249572754, |
|
"logps/chosen": -179.68634033203125, |
|
"logps/rejected": -222.42453002929688, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.4160200357437134, |
|
"rewards/margins": 4.235383033752441, |
|
"rewards/rejected": -5.651402950286865, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.5042735042735043, |
|
"grad_norm": 25.914610827614307, |
|
"learning_rate": 5.054414502963604e-07, |
|
"logits/chosen": -2.1725914478302, |
|
"logits/rejected": -2.133784294128418, |
|
"logps/chosen": -171.98902893066406, |
|
"logps/rejected": -210.50491333007812, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4368568658828735, |
|
"rewards/margins": 3.967801332473755, |
|
"rewards/rejected": -5.404658317565918, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.5042735042735043, |
|
"eval_logits/chosen": -2.1716132164001465, |
|
"eval_logits/rejected": -2.1360905170440674, |
|
"eval_logps/chosen": -179.5331268310547, |
|
"eval_logps/rejected": -203.52139282226562, |
|
"eval_loss": 0.42879074811935425, |
|
"eval_rewards/accuracies": 0.8048076629638672, |
|
"eval_rewards/chosen": -2.105579137802124, |
|
"eval_rewards/margins": 2.3585293292999268, |
|
"eval_rewards/rejected": -4.464108467102051, |
|
"eval_runtime": 549.544, |
|
"eval_samples_per_second": 15.129, |
|
"eval_steps_per_second": 0.237, |
|
"step": 440 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5188410956316672.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|