|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006279434850863423, |
|
"grad_norm": 10.430633053065636, |
|
"learning_rate": 1.875e-08, |
|
"logits/chosen": 1.9384945631027222, |
|
"logits/rejected": 1.9276118278503418, |
|
"logps/chosen": -271.40283203125, |
|
"logps/pi_response": -164.53562927246094, |
|
"logps/ref_response": -164.53562927246094, |
|
"logps/rejected": -331.295166015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 12.398250917152115, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 1.375727891921997, |
|
"logits/rejected": 1.5644251108169556, |
|
"logps/chosen": -274.7219543457031, |
|
"logps/pi_response": -147.36016845703125, |
|
"logps/ref_response": -147.31085205078125, |
|
"logps/rejected": -435.1158142089844, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.0004548828292172402, |
|
"rewards/margins": -0.00019649550085887313, |
|
"rewards/rejected": -0.0002583875320851803, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 8.66500301618874, |
|
"learning_rate": 2.9942119880575817e-07, |
|
"logits/chosen": 1.4651533365249634, |
|
"logits/rejected": 1.655652403831482, |
|
"logps/chosen": -306.84954833984375, |
|
"logps/pi_response": -169.71615600585938, |
|
"logps/ref_response": -169.712646484375, |
|
"logps/rejected": -432.33612060546875, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.007200081832706928, |
|
"rewards/margins": 0.005233117379248142, |
|
"rewards/rejected": -0.01243319921195507, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 9.072183489589987, |
|
"learning_rate": 2.929608750821129e-07, |
|
"logits/chosen": 1.4678703546524048, |
|
"logits/rejected": 1.6495119333267212, |
|
"logps/chosen": -303.84332275390625, |
|
"logps/pi_response": -163.2515106201172, |
|
"logps/ref_response": -162.7336883544922, |
|
"logps/rejected": -418.14385986328125, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04476435109972954, |
|
"rewards/margins": 0.027997547760605812, |
|
"rewards/rejected": -0.0727619007229805, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 7.9558717739641445, |
|
"learning_rate": 2.7962832564252725e-07, |
|
"logits/chosen": 1.2683569192886353, |
|
"logits/rejected": 1.6301815509796143, |
|
"logps/chosen": -296.2575988769531, |
|
"logps/pi_response": -164.4340362548828, |
|
"logps/ref_response": -164.83663940429688, |
|
"logps/rejected": -465.8512268066406, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.08111439645290375, |
|
"rewards/margins": 0.10455696284770966, |
|
"rewards/rejected": -0.1856713443994522, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 9.575559868471615, |
|
"learning_rate": 2.6006445513357056e-07, |
|
"logits/chosen": 1.1666990518569946, |
|
"logits/rejected": 1.442095398902893, |
|
"logps/chosen": -279.1966857910156, |
|
"logps/pi_response": -159.5543670654297, |
|
"logps/ref_response": -159.3308563232422, |
|
"logps/rejected": -451.89306640625, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1434028297662735, |
|
"rewards/margins": 0.18818414211273193, |
|
"rewards/rejected": -0.33158695697784424, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 7.235393720583013, |
|
"learning_rate": 2.3520971200967334e-07, |
|
"logits/chosen": 1.2313123941421509, |
|
"logits/rejected": 1.3668440580368042, |
|
"logps/chosen": -317.02679443359375, |
|
"logps/pi_response": -161.31985473632812, |
|
"logps/ref_response": -161.15023803710938, |
|
"logps/rejected": -474.43475341796875, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21095111966133118, |
|
"rewards/margins": 0.2169308215379715, |
|
"rewards/rejected": -0.4278818964958191, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 7.493835961341088, |
|
"learning_rate": 2.0625888054143427e-07, |
|
"logits/chosen": 1.0015811920166016, |
|
"logits/rejected": 1.1100066900253296, |
|
"logps/chosen": -305.7568359375, |
|
"logps/pi_response": -157.75637817382812, |
|
"logps/ref_response": -160.76815795898438, |
|
"logps/rejected": -470.54522705078125, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1870805323123932, |
|
"rewards/margins": 0.23715010285377502, |
|
"rewards/rejected": -0.42423057556152344, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 6.931068106744734, |
|
"learning_rate": 1.7460364672965327e-07, |
|
"logits/chosen": 0.8727623224258423, |
|
"logits/rejected": 1.101215124130249, |
|
"logps/chosen": -268.4558410644531, |
|
"logps/pi_response": -142.99215698242188, |
|
"logps/ref_response": -145.04220581054688, |
|
"logps/rejected": -471.4241638183594, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.18771064281463623, |
|
"rewards/margins": 0.33435240387916565, |
|
"rewards/rejected": -0.5220630764961243, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 7.075043680291521, |
|
"learning_rate": 1.4176569902035086e-07, |
|
"logits/chosen": 0.7811240553855896, |
|
"logits/rejected": 1.0503191947937012, |
|
"logps/chosen": -308.6497497558594, |
|
"logps/pi_response": -152.72732543945312, |
|
"logps/ref_response": -156.76785278320312, |
|
"logps/rejected": -511.48516845703125, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2674116790294647, |
|
"rewards/margins": 0.39314374327659607, |
|
"rewards/rejected": -0.6605554819107056, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 7.416871526087097, |
|
"learning_rate": 1.0932357971453743e-07, |
|
"logits/chosen": 0.6527765393257141, |
|
"logits/rejected": 0.881365180015564, |
|
"logps/chosen": -290.30291748046875, |
|
"logps/pi_response": -137.30520629882812, |
|
"logps/ref_response": -141.33499145507812, |
|
"logps/rejected": -528.6287231445312, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2469521462917328, |
|
"rewards/margins": 0.4689961075782776, |
|
"rewards/rejected": -0.7159483432769775, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 6.868702818175434, |
|
"learning_rate": 7.883680337481599e-08, |
|
"logits/chosen": 0.5740480422973633, |
|
"logits/rejected": 0.858615517616272, |
|
"logps/chosen": -303.41119384765625, |
|
"logps/pi_response": -136.2499542236328, |
|
"logps/ref_response": -139.31204223632812, |
|
"logps/rejected": -475.9146423339844, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3227171003818512, |
|
"rewards/margins": 0.34727370738983154, |
|
"rewards/rejected": -0.6699907183647156, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 7.117305529201249, |
|
"learning_rate": 5.177088990820725e-08, |
|
"logits/chosen": 0.8573010563850403, |
|
"logits/rejected": 1.0616093873977661, |
|
"logps/chosen": -343.8194885253906, |
|
"logps/pi_response": -163.02926635742188, |
|
"logps/ref_response": -168.28973388671875, |
|
"logps/rejected": -572.6094970703125, |
|
"loss": 0.5672, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.34128373861312866, |
|
"rewards/margins": 0.48513007164001465, |
|
"rewards/rejected": -0.8264138102531433, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 7.6342489936056275, |
|
"learning_rate": 2.942691603548416e-08, |
|
"logits/chosen": 0.6449292302131653, |
|
"logits/rejected": 0.8830472826957703, |
|
"logps/chosen": -324.7782287597656, |
|
"logps/pi_response": -156.79000854492188, |
|
"logps/ref_response": -161.82154846191406, |
|
"logps/rejected": -524.2703857421875, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.3194042146205902, |
|
"rewards/margins": 0.4711576998233795, |
|
"rewards/rejected": -0.7905619740486145, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 6.752884100895449, |
|
"learning_rate": 1.2878971655412513e-08, |
|
"logits/chosen": 0.8525398373603821, |
|
"logits/rejected": 1.0120213031768799, |
|
"logps/chosen": -330.0039978027344, |
|
"logps/pi_response": -156.17630004882812, |
|
"logps/ref_response": -161.65866088867188, |
|
"logps/rejected": -559.6900634765625, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2980651557445526, |
|
"rewards/margins": 0.5507915616035461, |
|
"rewards/rejected": -0.8488567471504211, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 7.563586487187802, |
|
"learning_rate": 2.922527618666465e-09, |
|
"logits/chosen": 0.5885689854621887, |
|
"logits/rejected": 0.828190803527832, |
|
"logps/chosen": -297.84844970703125, |
|
"logps/pi_response": -152.97872924804688, |
|
"logps/ref_response": -157.01953125, |
|
"logps/rejected": -508.4051818847656, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2831505537033081, |
|
"rewards/margins": 0.48666033148765564, |
|
"rewards/rejected": -0.7698109149932861, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6040852474716475, |
|
"train_runtime": 3000.2234, |
|
"train_samples_per_second": 6.792, |
|
"train_steps_per_second": 0.053 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|