|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 56.38858835713434, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.731353282928467, |
|
"logits/rejected": -2.7158660888671875, |
|
"logps/chosen": -256.7725524902344, |
|
"logps/rejected": -205.1663055419922, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": 0.014972714707255363, |
|
"rewards/margins": 0.006492167711257935, |
|
"rewards/rejected": 0.008480546995997429, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 48.210654865798084, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6910791397094727, |
|
"logits/rejected": -2.686784267425537, |
|
"logps/chosen": -261.28240966796875, |
|
"logps/rejected": -210.28414916992188, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5361045002937317, |
|
"rewards/margins": 0.2673659920692444, |
|
"rewards/rejected": 0.2687385082244873, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 48.18867539560807, |
|
"learning_rate": 9.983100718730718e-07, |
|
"logits/chosen": -2.5146777629852295, |
|
"logits/rejected": -2.5069146156311035, |
|
"logps/chosen": -274.51788330078125, |
|
"logps/rejected": -212.06613159179688, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 1.0824434757232666, |
|
"rewards/margins": 0.4525600075721741, |
|
"rewards/rejected": 0.6298834085464478, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 56.03620889669535, |
|
"learning_rate": 9.932517109205849e-07, |
|
"logits/chosen": -2.346874237060547, |
|
"logits/rejected": -2.3457634449005127, |
|
"logps/chosen": -233.57192993164062, |
|
"logps/rejected": -216.9462890625, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.2721847295761108, |
|
"rewards/margins": 0.8419507741928101, |
|
"rewards/rejected": 0.4302339553833008, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 44.42398594582461, |
|
"learning_rate": 9.848591102083375e-07, |
|
"logits/chosen": -2.321877956390381, |
|
"logits/rejected": -2.3033697605133057, |
|
"logps/chosen": -276.81353759765625, |
|
"logps/rejected": -215.10543823242188, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.4672718048095703, |
|
"rewards/margins": 0.9575842618942261, |
|
"rewards/rejected": 0.5096874237060547, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 47.04899305167527, |
|
"learning_rate": 9.731890013043367e-07, |
|
"logits/chosen": -2.3766517639160156, |
|
"logits/rejected": -2.375422954559326, |
|
"logps/chosen": -253.60995483398438, |
|
"logps/rejected": -239.47940063476562, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 1.2893927097320557, |
|
"rewards/margins": 0.6847591996192932, |
|
"rewards/rejected": 0.6046335697174072, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 39.80811236547202, |
|
"learning_rate": 9.583202707897073e-07, |
|
"logits/chosen": -2.4474780559539795, |
|
"logits/rejected": -2.450472354888916, |
|
"logps/chosen": -257.7427673339844, |
|
"logps/rejected": -207.629638671875, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.217116355895996, |
|
"rewards/margins": 1.1533949375152588, |
|
"rewards/rejected": 0.06372135877609253, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 42.3182391334653, |
|
"learning_rate": 9.403534270080829e-07, |
|
"logits/chosen": -2.4868321418762207, |
|
"logits/rejected": -2.4954237937927246, |
|
"logps/chosen": -243.6182403564453, |
|
"logps/rejected": -235.5696563720703, |
|
"loss": 0.5926, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.1232950687408447, |
|
"rewards/margins": 1.261273741722107, |
|
"rewards/rejected": -0.13797876238822937, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 38.544673892587376, |
|
"learning_rate": 9.19409920658098e-07, |
|
"logits/chosen": -2.4237208366394043, |
|
"logits/rejected": -2.400038480758667, |
|
"logps/chosen": -247.0177764892578, |
|
"logps/rejected": -214.0535888671875, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.1583524942398071, |
|
"rewards/margins": 1.3776183128356934, |
|
"rewards/rejected": -0.21926572918891907, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 44.95262854721761, |
|
"learning_rate": 8.956313238215823e-07, |
|
"logits/chosen": -2.3861770629882812, |
|
"logits/rejected": -2.361314058303833, |
|
"logps/chosen": -251.87680053710938, |
|
"logps/rejected": -241.48794555664062, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.3533507585525513, |
|
"rewards/margins": 1.143169641494751, |
|
"rewards/rejected": 0.2101811170578003, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"eval_logits/chosen": -2.419147253036499, |
|
"eval_logits/rejected": -2.3880856037139893, |
|
"eval_logps/chosen": -247.12657165527344, |
|
"eval_logps/rejected": -217.0163116455078, |
|
"eval_loss": 0.5850147008895874, |
|
"eval_rewards/accuracies": 0.7333333492279053, |
|
"eval_rewards/chosen": 1.2898719310760498, |
|
"eval_rewards/margins": 1.370118498802185, |
|
"eval_rewards/rejected": -0.0802464708685875, |
|
"eval_runtime": 126.7763, |
|
"eval_samples_per_second": 14.987, |
|
"eval_steps_per_second": 0.237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 34.637816378109704, |
|
"learning_rate": 8.691783729769873e-07, |
|
"logits/chosen": -2.4367661476135254, |
|
"logits/rejected": -2.4355311393737793, |
|
"logps/chosen": -237.86105346679688, |
|
"logps/rejected": -211.58084106445312, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.1501390933990479, |
|
"rewards/margins": 1.354534387588501, |
|
"rewards/rejected": -0.20439541339874268, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 39.91245885039146, |
|
"learning_rate": 8.402298824670029e-07, |
|
"logits/chosen": -2.5068681240081787, |
|
"logits/rejected": -2.485455274581909, |
|
"logps/chosen": -256.1775817871094, |
|
"logps/rejected": -231.7941436767578, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.03065025806427, |
|
"rewards/margins": 1.9949699640274048, |
|
"rewards/rejected": -0.9643197059631348, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 39.08249314957454, |
|
"learning_rate": 8.089815357650089e-07, |
|
"logits/chosen": -2.5430567264556885, |
|
"logits/rejected": -2.543178081512451, |
|
"logps/chosen": -232.22262573242188, |
|
"logps/rejected": -225.10220336914062, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.8175865411758423, |
|
"rewards/margins": 1.328970193862915, |
|
"rewards/rejected": -0.5113834738731384, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.044776119402985, |
|
"grad_norm": 20.186385290734442, |
|
"learning_rate": 7.756445627110522e-07, |
|
"logits/chosen": -2.552123546600342, |
|
"logits/rejected": -2.5313704013824463, |
|
"logps/chosen": -234.0012664794922, |
|
"logps/rejected": -240.6761474609375, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.4629334211349487, |
|
"rewards/margins": 2.36128568649292, |
|
"rewards/rejected": -0.8983524441719055, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 21.595036305155492, |
|
"learning_rate": 7.404443116588547e-07, |
|
"logits/chosen": -2.5418105125427246, |
|
"logits/rejected": -2.5285840034484863, |
|
"logps/chosen": -242.90499877929688, |
|
"logps/rejected": -216.7215576171875, |
|
"loss": 0.2463, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.360262393951416, |
|
"rewards/margins": 3.110288381576538, |
|
"rewards/rejected": -0.7500261068344116, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 22.4039757471544, |
|
"learning_rate": 7.036187261857288e-07, |
|
"logits/chosen": -2.456780195236206, |
|
"logits/rejected": -2.439521312713623, |
|
"logps/chosen": -233.08560180664062, |
|
"logps/rejected": -244.88961791992188, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.1394240856170654, |
|
"rewards/margins": 3.141387939453125, |
|
"rewards/rejected": -1.0019636154174805, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2686567164179103, |
|
"grad_norm": 25.42866646351196, |
|
"learning_rate": 6.654167366624008e-07, |
|
"logits/chosen": -2.3945281505584717, |
|
"logits/rejected": -2.3779776096343994, |
|
"logps/chosen": -230.88217163085938, |
|
"logps/rejected": -222.7549285888672, |
|
"loss": 0.2769, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.6151447296142578, |
|
"rewards/margins": 2.812251091003418, |
|
"rewards/rejected": -1.1971065998077393, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3432835820895521, |
|
"grad_norm": 23.51747857481762, |
|
"learning_rate": 6.260965775552713e-07, |
|
"logits/chosen": -2.361176013946533, |
|
"logits/rejected": -2.3344523906707764, |
|
"logps/chosen": -236.94775390625, |
|
"logps/rejected": -242.43069458007812, |
|
"loss": 0.2743, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.946207046508789, |
|
"rewards/margins": 3.319734573364258, |
|
"rewards/rejected": -1.3735275268554688, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 25.346384805721517, |
|
"learning_rate": 5.859240418356614e-07, |
|
"logits/chosen": -2.3584346771240234, |
|
"logits/rejected": -2.3219895362854004, |
|
"logps/chosen": -246.46530151367188, |
|
"logps/rejected": -231.7371063232422, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.247493267059326, |
|
"rewards/margins": 3.8193671703338623, |
|
"rewards/rejected": -1.571873426437378, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"grad_norm": 17.435014853431674, |
|
"learning_rate": 5.451706842957421e-07, |
|
"logits/chosen": -2.4034814834594727, |
|
"logits/rejected": -2.3993496894836426, |
|
"logps/chosen": -248.6347198486328, |
|
"logps/rejected": -231.65682983398438, |
|
"loss": 0.255, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.421250820159912, |
|
"rewards/margins": 3.7864270210266113, |
|
"rewards/rejected": -1.3651763200759888, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"eval_logits/chosen": -2.4342453479766846, |
|
"eval_logits/rejected": -2.4099276065826416, |
|
"eval_logps/chosen": -245.49957275390625, |
|
"eval_logps/rejected": -221.9740753173828, |
|
"eval_loss": 0.5856931805610657, |
|
"eval_rewards/accuracies": 0.7958333492279053, |
|
"eval_rewards/chosen": 1.4525729417800903, |
|
"eval_rewards/margins": 2.0285959243774414, |
|
"eval_rewards/rejected": -0.5760230422019958, |
|
"eval_runtime": 126.6219, |
|
"eval_samples_per_second": 15.005, |
|
"eval_steps_per_second": 0.237, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5671641791044775, |
|
"grad_norm": 22.513659156325907, |
|
"learning_rate": 5.041119859162068e-07, |
|
"logits/chosen": -2.425330400466919, |
|
"logits/rejected": -2.4230823516845703, |
|
"logps/chosen": -244.3297576904297, |
|
"logps/rejected": -250.95852661132812, |
|
"loss": 0.2902, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.9127695560455322, |
|
"rewards/margins": 3.8896842002868652, |
|
"rewards/rejected": -1.9769150018692017, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6417910447761193, |
|
"grad_norm": 22.093724425686545, |
|
"learning_rate": 4.630254916940423e-07, |
|
"logits/chosen": -2.456946849822998, |
|
"logits/rejected": -2.423444986343384, |
|
"logps/chosen": -262.3679504394531, |
|
"logps/rejected": -220.34878540039062, |
|
"loss": 0.2802, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.1305854320526123, |
|
"rewards/margins": 3.4911084175109863, |
|
"rewards/rejected": -1.3605228662490845, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 22.848502540826928, |
|
"learning_rate": 4.2218893451814e-07, |
|
"logits/chosen": -2.4240517616271973, |
|
"logits/rejected": -2.400174379348755, |
|
"logps/chosen": -250.54122924804688, |
|
"logps/rejected": -235.89712524414062, |
|
"loss": 0.2883, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.2658907175064087, |
|
"rewards/margins": 3.4836113452911377, |
|
"rewards/rejected": -2.2177207469940186, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.7910447761194028, |
|
"grad_norm": 25.377767671408016, |
|
"learning_rate": 3.8187835777481375e-07, |
|
"logits/chosen": -2.4062647819519043, |
|
"logits/rejected": -2.4059927463531494, |
|
"logps/chosen": -257.41558837890625, |
|
"logps/rejected": -225.0140380859375, |
|
"loss": 0.294, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.580168604850769, |
|
"rewards/margins": 3.4512062072753906, |
|
"rewards/rejected": -1.871037483215332, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8656716417910446, |
|
"grad_norm": 21.398907395127186, |
|
"learning_rate": 3.423662493738687e-07, |
|
"logits/chosen": -2.4366660118103027, |
|
"logits/rejected": -2.4090025424957275, |
|
"logps/chosen": -270.04345703125, |
|
"logps/rejected": -228.82913208007812, |
|
"loss": 0.2818, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.6028369665145874, |
|
"rewards/margins": 3.6763553619384766, |
|
"rewards/rejected": -2.0735180377960205, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.9402985074626866, |
|
"grad_norm": 19.350854009924277, |
|
"learning_rate": 3.039196998086687e-07, |
|
"logits/chosen": -2.4322643280029297, |
|
"logits/rejected": -2.4288134574890137, |
|
"logps/chosen": -243.41943359375, |
|
"logps/rejected": -237.18588256835938, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.5144699811935425, |
|
"rewards/margins": 3.2811291217803955, |
|
"rewards/rejected": -1.7666590213775635, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.014925373134328, |
|
"grad_norm": 12.838839331293379, |
|
"learning_rate": 2.667985967011878e-07, |
|
"logits/chosen": -2.4573187828063965, |
|
"logits/rejected": -2.441751003265381, |
|
"logps/chosen": -242.3375244140625, |
|
"logps/rejected": -235.7463836669922, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.0578320026397705, |
|
"rewards/margins": 3.559199810028076, |
|
"rewards/rejected": -1.5013679265975952, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.08955223880597, |
|
"grad_norm": 13.535639161407488, |
|
"learning_rate": 2.3125386803640183e-07, |
|
"logits/chosen": -2.4401307106018066, |
|
"logits/rejected": -2.417816638946533, |
|
"logps/chosen": -234.63552856445312, |
|
"logps/rejected": -217.58316040039062, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.3646421432495117, |
|
"rewards/margins": 3.8635189533233643, |
|
"rewards/rejected": -1.498876690864563, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.1641791044776117, |
|
"grad_norm": 12.98503680311861, |
|
"learning_rate": 1.9752578596124952e-07, |
|
"logits/chosen": -2.3909850120544434, |
|
"logits/rejected": -2.3737857341766357, |
|
"logps/chosen": -237.2566680908203, |
|
"logps/rejected": -225.0938262939453, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.3333511352539062, |
|
"rewards/margins": 3.83681058883667, |
|
"rewards/rejected": -1.5034593343734741, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.2388059701492535, |
|
"grad_norm": 17.53696861401471, |
|
"learning_rate": 1.6584234261399532e-07, |
|
"logits/chosen": -2.37007212638855, |
|
"logits/rejected": -2.352189779281616, |
|
"logps/chosen": -248.3662567138672, |
|
"logps/rejected": -211.06494140625, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.444425344467163, |
|
"rewards/margins": 3.893583297729492, |
|
"rewards/rejected": -1.4491578340530396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.2388059701492535, |
|
"eval_logits/chosen": -2.3633620738983154, |
|
"eval_logits/rejected": -2.328437328338623, |
|
"eval_logps/chosen": -245.0876922607422, |
|
"eval_logps/rejected": -221.6795196533203, |
|
"eval_loss": 0.5705874562263489, |
|
"eval_rewards/accuracies": 0.7916666865348816, |
|
"eval_rewards/chosen": 1.4937634468078613, |
|
"eval_rewards/margins": 2.040332555770874, |
|
"eval_rewards/rejected": -0.5465689897537231, |
|
"eval_runtime": 126.3365, |
|
"eval_samples_per_second": 15.039, |
|
"eval_steps_per_second": 0.237, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3134328358208958, |
|
"grad_norm": 21.317610168262696, |
|
"learning_rate": 1.3641770896292082e-07, |
|
"logits/chosen": -2.355787754058838, |
|
"logits/rejected": -2.3269476890563965, |
|
"logps/chosen": -244.22164916992188, |
|
"logps/rejected": -246.47555541992188, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.310300350189209, |
|
"rewards/margins": 3.7411086559295654, |
|
"rewards/rejected": -1.4308083057403564, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.388059701492537, |
|
"grad_norm": 22.107512419038276, |
|
"learning_rate": 1.0945078707215221e-07, |
|
"logits/chosen": -2.354370594024658, |
|
"logits/rejected": -2.334298610687256, |
|
"logps/chosen": -240.81668090820312, |
|
"logps/rejected": -231.38491821289062, |
|
"loss": 0.1805, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.6548850536346436, |
|
"rewards/margins": 4.173087120056152, |
|
"rewards/rejected": -1.5182020664215088, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4626865671641793, |
|
"grad_norm": 16.052607396027245, |
|
"learning_rate": 8.512386558088919e-08, |
|
"logits/chosen": -2.3480546474456787, |
|
"logits/rejected": -2.3385627269744873, |
|
"logps/chosen": -234.52035522460938, |
|
"logps/rejected": -250.7569580078125, |
|
"loss": 0.1653, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.110555648803711, |
|
"rewards/margins": 4.057579040527344, |
|
"rewards/rejected": -1.9470237493515015, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.5373134328358207, |
|
"grad_norm": 13.776519776024537, |
|
"learning_rate": 6.360138748461013e-08, |
|
"logits/chosen": -2.371123790740967, |
|
"logits/rejected": -2.344186305999756, |
|
"logps/chosen": -250.3871307373047, |
|
"logps/rejected": -216.33401489257812, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.4384984970092773, |
|
"rewards/margins": 3.937534809112549, |
|
"rewards/rejected": -1.4990360736846924, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.611940298507463, |
|
"grad_norm": 16.423256544059676, |
|
"learning_rate": 4.5028838547699346e-08, |
|
"logits/chosen": -2.3799216747283936, |
|
"logits/rejected": -2.344275951385498, |
|
"logps/chosen": -246.70285034179688, |
|
"logps/rejected": -246.2882537841797, |
|
"loss": 0.136, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.443866729736328, |
|
"rewards/margins": 4.205219268798828, |
|
"rewards/rejected": -1.761352777481079, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.6865671641791042, |
|
"grad_norm": 17.74947523952519, |
|
"learning_rate": 2.9531763861505964e-08, |
|
"logits/chosen": -2.356396198272705, |
|
"logits/rejected": -2.3508291244506836, |
|
"logps/chosen": -226.5736846923828, |
|
"logps/rejected": -226.9338836669922, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.034636974334717, |
|
"rewards/margins": 3.739074230194092, |
|
"rewards/rejected": -1.704437017440796, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7611940298507465, |
|
"grad_norm": 19.018519807815494, |
|
"learning_rate": 1.7214919195619125e-08, |
|
"logits/chosen": -2.3661041259765625, |
|
"logits/rejected": -2.341630697250366, |
|
"logps/chosen": -245.50830078125, |
|
"logps/rejected": -273.1011962890625, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.4174838066101074, |
|
"rewards/margins": 4.649423599243164, |
|
"rewards/rejected": -2.2319395542144775, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.835820895522388, |
|
"grad_norm": 16.772739857601884, |
|
"learning_rate": 8.161562878982398e-09, |
|
"logits/chosen": -2.343977451324463, |
|
"logits/rejected": -2.338977575302124, |
|
"logps/chosen": -238.34130859375, |
|
"logps/rejected": -241.8277130126953, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.559854030609131, |
|
"rewards/margins": 4.380292892456055, |
|
"rewards/rejected": -1.820439100265503, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.91044776119403, |
|
"grad_norm": 23.452590812701438, |
|
"learning_rate": 2.432892997526026e-09, |
|
"logits/chosen": -2.3592891693115234, |
|
"logits/rejected": -2.33678936958313, |
|
"logps/chosen": -253.15628051757812, |
|
"logps/rejected": -223.1919403076172, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.240382671356201, |
|
"rewards/margins": 3.8927555084228516, |
|
"rewards/rejected": -1.6523720026016235, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.9850746268656714, |
|
"grad_norm": 21.27246528519617, |
|
"learning_rate": 6.763371270035457e-11, |
|
"logits/chosen": -2.3560237884521484, |
|
"logits/rejected": -2.3329052925109863, |
|
"logps/chosen": -231.1381072998047, |
|
"logps/rejected": -234.8066864013672, |
|
"loss": 0.1536, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.2949817180633545, |
|
"rewards/margins": 4.150731086730957, |
|
"rewards/rejected": -1.8557497262954712, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9850746268656714, |
|
"eval_logits/chosen": -2.3594791889190674, |
|
"eval_logits/rejected": -2.326153039932251, |
|
"eval_logps/chosen": -246.07659912109375, |
|
"eval_logps/rejected": -224.16775512695312, |
|
"eval_loss": 0.5641939043998718, |
|
"eval_rewards/accuracies": 0.7916666865348816, |
|
"eval_rewards/chosen": 1.3948734998703003, |
|
"eval_rewards/margins": 2.1902639865875244, |
|
"eval_rewards/rejected": -0.7953903079032898, |
|
"eval_runtime": 126.3864, |
|
"eval_samples_per_second": 15.033, |
|
"eval_steps_per_second": 0.237, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 201, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2369906314051584.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|