htlou's picture
Upload folder using huggingface_hub
d6f3661 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986175115207373,
"eval_steps": 40,
"global_step": 162,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09216589861751152,
"grad_norm": 82.64344716971713,
"learning_rate": 5e-07,
"logits/chosen": -2.732285976409912,
"logits/rejected": -2.7352840900421143,
"logps/chosen": -345.11505126953125,
"logps/rejected": -257.46209716796875,
"loss": 0.6879,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.032340794801712036,
"rewards/margins": 0.014306592755019665,
"rewards/rejected": 0.018034199252724648,
"step": 5
},
{
"epoch": 0.18433179723502305,
"grad_norm": 55.27766576693423,
"learning_rate": 1e-06,
"logits/chosen": -2.6805875301361084,
"logits/rejected": -2.6747162342071533,
"logps/chosen": -333.76953125,
"logps/rejected": -232.7953338623047,
"loss": 0.5893,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.9853774309158325,
"rewards/margins": 0.4447619318962097,
"rewards/rejected": 0.540615439414978,
"step": 10
},
{
"epoch": 0.2764976958525346,
"grad_norm": 46.427604431133965,
"learning_rate": 9.973324900566213e-07,
"logits/chosen": -2.435732364654541,
"logits/rejected": -2.423825740814209,
"logps/chosen": -310.85443115234375,
"logps/rejected": -208.2480926513672,
"loss": 0.5442,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 2.8085920810699463,
"rewards/margins": 1.5502256155014038,
"rewards/rejected": 1.258366346359253,
"step": 15
},
{
"epoch": 0.3686635944700461,
"grad_norm": 39.94842616368073,
"learning_rate": 9.893584226636772e-07,
"logits/chosen": -2.2710189819335938,
"logits/rejected": -2.255997896194458,
"logps/chosen": -323.7245788574219,
"logps/rejected": -217.50869750976562,
"loss": 0.5228,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 3.2350196838378906,
"rewards/margins": 1.5412657260894775,
"rewards/rejected": 1.6937541961669922,
"step": 20
},
{
"epoch": 0.4608294930875576,
"grad_norm": 48.62034534876174,
"learning_rate": 9.761628814374072e-07,
"logits/chosen": -2.125060558319092,
"logits/rejected": -2.0805716514587402,
"logps/chosen": -299.4482421875,
"logps/rejected": -230.19662475585938,
"loss": 0.4827,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 2.9857540130615234,
"rewards/margins": 1.7392915487289429,
"rewards/rejected": 1.246462345123291,
"step": 25
},
{
"epoch": 0.5529953917050692,
"grad_norm": 46.06223541077971,
"learning_rate": 9.578866633275286e-07,
"logits/chosen": -1.9920810461044312,
"logits/rejected": -1.9682658910751343,
"logps/chosen": -302.1333923339844,
"logps/rejected": -240.9350128173828,
"loss": 0.5361,
"rewards/accuracies": 0.78125,
"rewards/chosen": 3.1254260540008545,
"rewards/margins": 1.7768207788467407,
"rewards/rejected": 1.3486052751541138,
"step": 30
},
{
"epoch": 0.6451612903225806,
"grad_norm": 43.606270955387636,
"learning_rate": 9.347247763081833e-07,
"logits/chosen": -1.911285638809204,
"logits/rejected": -1.8874857425689697,
"logps/chosen": -318.9488525390625,
"logps/rejected": -232.88931274414062,
"loss": 0.446,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 3.207639217376709,
"rewards/margins": 2.153695583343506,
"rewards/rejected": 1.0539438724517822,
"step": 35
},
{
"epoch": 0.7373271889400922,
"grad_norm": 39.922691563648584,
"learning_rate": 9.069243586350975e-07,
"logits/chosen": -1.8659719228744507,
"logits/rejected": -1.837323546409607,
"logps/chosen": -316.7166748046875,
"logps/rejected": -247.98440551757812,
"loss": 0.5108,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 2.811978816986084,
"rewards/margins": 1.7324354648590088,
"rewards/rejected": 1.0795437097549438,
"step": 40
},
{
"epoch": 0.7373271889400922,
"eval_logits/chosen": -1.8276515007019043,
"eval_logits/rejected": -1.8153278827667236,
"eval_logps/chosen": -300.355224609375,
"eval_logps/rejected": -250.92237854003906,
"eval_loss": 0.49955785274505615,
"eval_rewards/accuracies": 0.800000011920929,
"eval_rewards/chosen": 2.801389455795288,
"eval_rewards/margins": 2.0170528888702393,
"eval_rewards/rejected": 0.7843364477157593,
"eval_runtime": 105.9206,
"eval_samples_per_second": 14.511,
"eval_steps_per_second": 0.236,
"step": 40
},
{
"epoch": 0.8294930875576036,
"grad_norm": 37.49193294339852,
"learning_rate": 8.74782041870563e-07,
"logits/chosen": -1.816886305809021,
"logits/rejected": -1.783071517944336,
"logps/chosen": -297.0343017578125,
"logps/rejected": -222.45816040039062,
"loss": 0.459,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 2.688863515853882,
"rewards/margins": 2.0437610149383545,
"rewards/rejected": 0.6451026201248169,
"step": 45
},
{
"epoch": 0.9216589861751152,
"grad_norm": 43.56619989347279,
"learning_rate": 8.386407858128706e-07,
"logits/chosen": -1.8166990280151367,
"logits/rejected": -1.7925169467926025,
"logps/chosen": -314.9337158203125,
"logps/rejected": -244.59609985351562,
"loss": 0.4783,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 2.7192726135253906,
"rewards/margins": 1.5147265195846558,
"rewards/rejected": 1.2045462131500244,
"step": 50
},
{
"epoch": 1.0138248847926268,
"grad_norm": 22.442449713126035,
"learning_rate": 7.988862191016203e-07,
"logits/chosen": -1.842585802078247,
"logits/rejected": -1.831575632095337,
"logps/chosen": -302.282958984375,
"logps/rejected": -239.08016967773438,
"loss": 0.3881,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 2.8818259239196777,
"rewards/margins": 2.1279635429382324,
"rewards/rejected": 0.7538624405860901,
"step": 55
},
{
"epoch": 1.1059907834101383,
"grad_norm": 15.231974843280263,
"learning_rate": 7.559425245448005e-07,
"logits/chosen": -1.9866081476211548,
"logits/rejected": -1.9679405689239502,
"logps/chosen": -297.02923583984375,
"logps/rejected": -269.2718200683594,
"loss": 0.1072,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 3.7900550365448,
"rewards/margins": 4.232701301574707,
"rewards/rejected": -0.4426456391811371,
"step": 60
},
{
"epoch": 1.1981566820276497,
"grad_norm": 18.307111489589293,
"learning_rate": 7.102679130713537e-07,
"logits/chosen": -2.104555130004883,
"logits/rejected": -2.1196866035461426,
"logps/chosen": -333.6317138671875,
"logps/rejected": -247.2637939453125,
"loss": 0.1207,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 4.633069038391113,
"rewards/margins": 4.566880226135254,
"rewards/rejected": 0.06618879735469818,
"step": 65
},
{
"epoch": 1.2903225806451613,
"grad_norm": 21.546461613665333,
"learning_rate": 6.623497346023417e-07,
"logits/chosen": -2.1033987998962402,
"logits/rejected": -2.08524751663208,
"logps/chosen": -305.8551940917969,
"logps/rejected": -242.0274200439453,
"loss": 0.134,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 4.447857856750488,
"rewards/margins": 4.793730735778809,
"rewards/rejected": -0.34587258100509644,
"step": 70
},
{
"epoch": 1.3824884792626728,
"grad_norm": 18.32116519016538,
"learning_rate": 6.126992780079031e-07,
"logits/chosen": -1.9051921367645264,
"logits/rejected": -1.8795219659805298,
"logps/chosen": -289.46319580078125,
"logps/rejected": -261.77264404296875,
"loss": 0.1416,
"rewards/accuracies": 0.96875,
"rewards/chosen": 3.8727810382843018,
"rewards/margins": 4.719055652618408,
"rewards/rejected": -0.846274733543396,
"step": 75
},
{
"epoch": 1.4746543778801844,
"grad_norm": 20.574623734909377,
"learning_rate": 5.618463156346739e-07,
"logits/chosen": -1.8352515697479248,
"logits/rejected": -1.7718498706817627,
"logps/chosen": -299.86773681640625,
"logps/rejected": -235.0732421875,
"loss": 0.1548,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 3.8516736030578613,
"rewards/margins": 4.764138221740723,
"rewards/rejected": -0.9124643206596375,
"step": 80
},
{
"epoch": 1.4746543778801844,
"eval_logits/chosen": -1.7812533378601074,
"eval_logits/rejected": -1.7896403074264526,
"eval_logps/chosen": -302.47705078125,
"eval_logps/rejected": -261.8770446777344,
"eval_loss": 0.5515583157539368,
"eval_rewards/accuracies": 0.8149999976158142,
"eval_rewards/chosen": 2.5892090797424316,
"eval_rewards/margins": 2.9003350734710693,
"eval_rewards/rejected": -0.3111259341239929,
"eval_runtime": 104.7508,
"eval_samples_per_second": 14.673,
"eval_steps_per_second": 0.239,
"step": 80
},
{
"epoch": 1.5668202764976957,
"grad_norm": 16.930391905891195,
"learning_rate": 5.103334506137772e-07,
"logits/chosen": -1.8332993984222412,
"logits/rejected": -1.7760483026504517,
"logps/chosen": -305.48089599609375,
"logps/rejected": -244.80807495117188,
"loss": 0.1306,
"rewards/accuracies": 0.96875,
"rewards/chosen": 3.6695034503936768,
"rewards/margins": 4.8659348487854,
"rewards/rejected": -1.1964311599731445,
"step": 85
},
{
"epoch": 1.6589861751152073,
"grad_norm": 21.11449418778823,
"learning_rate": 4.5871032726383385e-07,
"logits/chosen": -1.8413625955581665,
"logits/rejected": -1.817728042602539,
"logps/chosen": -304.38458251953125,
"logps/rejected": -250.87478637695312,
"loss": 0.177,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 3.8436717987060547,
"rewards/margins": 5.000934600830078,
"rewards/rejected": -1.1572625637054443,
"step": 90
},
{
"epoch": 1.7511520737327189,
"grad_norm": 25.731965158002115,
"learning_rate": 4.075277663642208e-07,
"logits/chosen": -1.8851152658462524,
"logits/rejected": -1.852927565574646,
"logps/chosen": -295.5654296875,
"logps/rejected": -266.7487487792969,
"loss": 0.1983,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 3.588088274002075,
"rewards/margins": 4.905179023742676,
"rewards/rejected": -1.3170902729034424,
"step": 95
},
{
"epoch": 1.8433179723502304,
"grad_norm": 21.671116502246942,
"learning_rate": 3.5733188787544746e-07,
"logits/chosen": -1.9296363592147827,
"logits/rejected": -1.9144790172576904,
"logps/chosen": -288.9561462402344,
"logps/rejected": -262.56787109375,
"loss": 0.158,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 3.9653868675231934,
"rewards/margins": 4.910740852355957,
"rewards/rejected": -0.9453536868095398,
"step": 100
},
{
"epoch": 1.935483870967742,
"grad_norm": 23.77879577555757,
"learning_rate": 3.086582838174551e-07,
"logits/chosen": -2.025709390640259,
"logits/rejected": -1.9932079315185547,
"logps/chosen": -320.6540832519531,
"logps/rejected": -243.01376342773438,
"loss": 0.1593,
"rewards/accuracies": 0.96875,
"rewards/chosen": 4.19329309463501,
"rewards/margins": 4.572449684143066,
"rewards/rejected": -0.37915733456611633,
"step": 105
},
{
"epoch": 2.0276497695852536,
"grad_norm": 7.998912412806614,
"learning_rate": 2.620263034814632e-07,
"logits/chosen": -2.079678773880005,
"logits/rejected": -2.073202133178711,
"logps/chosen": -289.73419189453125,
"logps/rejected": -268.50738525390625,
"loss": 0.109,
"rewards/accuracies": 0.96875,
"rewards/chosen": 4.6027398109436035,
"rewards/margins": 5.521824359893799,
"rewards/rejected": -0.9190845489501953,
"step": 110
},
{
"epoch": 2.119815668202765,
"grad_norm": 11.517879985604935,
"learning_rate": 2.1793351195237446e-07,
"logits/chosen": -2.0473215579986572,
"logits/rejected": -2.0440163612365723,
"logps/chosen": -293.99163818359375,
"logps/rejected": -247.82156372070312,
"loss": 0.0524,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 4.580502510070801,
"rewards/margins": 6.078469753265381,
"rewards/rejected": -1.4979677200317383,
"step": 115
},
{
"epoch": 2.2119815668202767,
"grad_norm": 10.044969791434443,
"learning_rate": 1.768503810695295e-07,
"logits/chosen": -2.0370969772338867,
"logits/rejected": -2.016648769378662,
"logps/chosen": -296.79254150390625,
"logps/rejected": -247.7318878173828,
"loss": 0.0739,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 4.419327735900879,
"rewards/margins": 5.548590183258057,
"rewards/rejected": -1.1292626857757568,
"step": 120
},
{
"epoch": 2.2119815668202767,
"eval_logits/chosen": -1.9930505752563477,
"eval_logits/rejected": -2.023695707321167,
"eval_logps/chosen": -299.45892333984375,
"eval_logps/rejected": -260.5440673828125,
"eval_loss": 0.5419167876243591,
"eval_rewards/accuracies": 0.8149999976158142,
"eval_rewards/chosen": 2.891019821166992,
"eval_rewards/margins": 3.0688512325286865,
"eval_rewards/rejected": -0.17783160507678986,
"eval_runtime": 104.7566,
"eval_samples_per_second": 14.672,
"eval_steps_per_second": 0.239,
"step": 120
},
{
"epoch": 2.3041474654377883,
"grad_norm": 9.632925545018697,
"learning_rate": 1.3921526947346901e-07,
"logits/chosen": -1.9562991857528687,
"logits/rejected": -1.9737581014633179,
"logps/chosen": -285.3409729003906,
"logps/rejected": -246.0544891357422,
"loss": 0.0495,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 4.24416971206665,
"rewards/margins": 5.664097785949707,
"rewards/rejected": -1.419929027557373,
"step": 125
},
{
"epoch": 2.3963133640552994,
"grad_norm": 13.62582621522216,
"learning_rate": 1.0542974530180327e-07,
"logits/chosen": -1.9481559991836548,
"logits/rejected": -1.9454014301300049,
"logps/chosen": -314.75067138671875,
"logps/rejected": -236.7272186279297,
"loss": 0.0637,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 4.275069236755371,
"rewards/margins": 5.598044395446777,
"rewards/rejected": -1.3229751586914062,
"step": 130
},
{
"epoch": 2.488479262672811,
"grad_norm": 7.059837317710805,
"learning_rate": 7.585430144121318e-08,
"logits/chosen": -1.8857179880142212,
"logits/rejected": -1.8879835605621338,
"logps/chosen": -294.124755859375,
"logps/rejected": -270.05633544921875,
"loss": 0.0368,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 4.17112922668457,
"rewards/margins": 5.703551292419434,
"rewards/rejected": -1.532421350479126,
"step": 135
},
{
"epoch": 2.5806451612903225,
"grad_norm": 14.302811017421536,
"learning_rate": 5.080450905401057e-08,
"logits/chosen": -1.9461250305175781,
"logits/rejected": -1.869018793106079,
"logps/chosen": -277.7373046875,
"logps/rejected": -269.21160888671875,
"loss": 0.0801,
"rewards/accuracies": 0.96875,
"rewards/chosen": 4.184547424316406,
"rewards/margins": 5.217525482177734,
"rewards/rejected": -1.0329779386520386,
"step": 140
},
{
"epoch": 2.672811059907834,
"grad_norm": 10.869742441859588,
"learning_rate": 3.054765042128521e-08,
"logits/chosen": -1.9559131860733032,
"logits/rejected": -1.9399007558822632,
"logps/chosen": -302.7723693847656,
"logps/rejected": -274.4209289550781,
"loss": 0.0562,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 4.239922523498535,
"rewards/margins": 5.70468282699585,
"rewards/rejected": -1.4647598266601562,
"step": 145
},
{
"epoch": 2.7649769585253456,
"grad_norm": 21.93426765644073,
"learning_rate": 1.5299867030334813e-08,
"logits/chosen": -1.9453132152557373,
"logits/rejected": -1.9195010662078857,
"logps/chosen": -313.5506286621094,
"logps/rejected": -258.9598693847656,
"loss": 0.0751,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 4.334193229675293,
"rewards/margins": 5.6270341873168945,
"rewards/rejected": -1.292839765548706,
"step": 150
},
{
"epoch": 2.857142857142857,
"grad_norm": 13.389771777225755,
"learning_rate": 5.223853336398632e-09,
"logits/chosen": -1.9442565441131592,
"logits/rejected": -1.934654951095581,
"logps/chosen": -288.51190185546875,
"logps/rejected": -266.2658996582031,
"loss": 0.0671,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 4.63767671585083,
"rewards/margins": 5.82180643081665,
"rewards/rejected": -1.1841299533843994,
"step": 155
},
{
"epoch": 2.9493087557603688,
"grad_norm": 15.653208968679852,
"learning_rate": 4.271208063494902e-10,
"logits/chosen": -1.9627739191055298,
"logits/rejected": -1.9069459438323975,
"logps/chosen": -293.91802978515625,
"logps/rejected": -261.7667541503906,
"loss": 0.0785,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 4.489874839782715,
"rewards/margins": 5.796733379364014,
"rewards/rejected": -1.3068585395812988,
"step": 160
},
{
"epoch": 2.9493087557603688,
"eval_logits/chosen": -1.9315097332000732,
"eval_logits/rejected": -1.9545789957046509,
"eval_logps/chosen": -298.7916564941406,
"eval_logps/rejected": -260.6390075683594,
"eval_loss": 0.5450286269187927,
"eval_rewards/accuracies": 0.824999988079071,
"eval_rewards/chosen": 2.957746982574463,
"eval_rewards/margins": 3.1450705528259277,
"eval_rewards/rejected": -0.18732379376888275,
"eval_runtime": 104.7018,
"eval_samples_per_second": 14.68,
"eval_steps_per_second": 0.239,
"step": 160
},
{
"epoch": 2.986175115207373,
"step": 162,
"total_flos": 1909982962384896.0,
"train_loss": 0.24334129419775657,
"train_runtime": 6038.8466,
"train_samples_per_second": 6.87,
"train_steps_per_second": 0.027
}
],
"logging_steps": 5,
"max_steps": 162,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 40,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1909982962384896.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}