htlou's picture
Upload folder using huggingface_hub
ae704e0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.981366459627329,
"eval_steps": 50,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12422360248447205,
"grad_norm": 66.62363784407601,
"learning_rate": 5e-07,
"logits/chosen": -2.737199068069458,
"logits/rejected": -2.716709613800049,
"logps/chosen": -271.75335693359375,
"logps/rejected": -215.45742797851562,
"loss": 0.6894,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": 0.016401026397943497,
"rewards/margins": 0.012943076901137829,
"rewards/rejected": 0.0034579492639750242,
"step": 5
},
{
"epoch": 0.2484472049689441,
"grad_norm": 48.6825109729797,
"learning_rate": 1e-06,
"logits/chosen": -2.673478364944458,
"logits/rejected": -2.6825709342956543,
"logps/chosen": -274.2002258300781,
"logps/rejected": -210.3507537841797,
"loss": 0.6196,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.5796890258789062,
"rewards/margins": 0.2897328734397888,
"rewards/rejected": 0.28995609283447266,
"step": 10
},
{
"epoch": 0.37267080745341613,
"grad_norm": 46.74043921332661,
"learning_rate": 9.949107209404663e-07,
"logits/chosen": -2.474240303039551,
"logits/rejected": -2.441648006439209,
"logps/chosen": -231.01181030273438,
"logps/rejected": -208.47927856445312,
"loss": 0.6484,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 1.5896726846694946,
"rewards/margins": 0.8002876043319702,
"rewards/rejected": 0.7893850207328796,
"step": 15
},
{
"epoch": 0.4968944099378882,
"grad_norm": 51.05927483012936,
"learning_rate": 9.797464868072486e-07,
"logits/chosen": -2.29701566696167,
"logits/rejected": -2.2914681434631348,
"logps/chosen": -262.2198181152344,
"logps/rejected": -218.2954864501953,
"loss": 0.6125,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 1.8730179071426392,
"rewards/margins": 1.2619432210922241,
"rewards/rejected": 0.6110745668411255,
"step": 20
},
{
"epoch": 0.6211180124223602,
"grad_norm": 53.85418375459847,
"learning_rate": 9.548159976772592e-07,
"logits/chosen": -2.2311034202575684,
"logits/rejected": -2.2267913818359375,
"logps/chosen": -259.2829895019531,
"logps/rejected": -203.93215942382812,
"loss": 0.581,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 1.702134370803833,
"rewards/margins": 1.3699487447738647,
"rewards/rejected": 0.3321855068206787,
"step": 25
},
{
"epoch": 0.7453416149068323,
"grad_norm": 41.921620963182335,
"learning_rate": 9.206267664155906e-07,
"logits/chosen": -2.2750821113586426,
"logits/rejected": -2.2540435791015625,
"logps/chosen": -252.75881958007812,
"logps/rejected": -227.4959716796875,
"loss": 0.5342,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": 1.7333396673202515,
"rewards/margins": 1.6640625,
"rewards/rejected": 0.06927712261676788,
"step": 30
},
{
"epoch": 0.8695652173913043,
"grad_norm": 36.22502143325224,
"learning_rate": 8.778747871771291e-07,
"logits/chosen": -2.3146414756774902,
"logits/rejected": -2.2875092029571533,
"logps/chosen": -263.4291687011719,
"logps/rejected": -221.8418731689453,
"loss": 0.5177,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 1.4538803100585938,
"rewards/margins": 1.299953579902649,
"rewards/rejected": 0.153926819562912,
"step": 35
},
{
"epoch": 0.9937888198757764,
"grad_norm": 45.69470972967703,
"learning_rate": 8.274303669726426e-07,
"logits/chosen": -2.289536714553833,
"logits/rejected": -2.2876548767089844,
"logps/chosen": -274.63818359375,
"logps/rejected": -218.11038208007812,
"loss": 0.5869,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 1.579737901687622,
"rewards/margins": 1.5463366508483887,
"rewards/rejected": 0.03340107947587967,
"step": 40
},
{
"epoch": 1.1180124223602483,
"grad_norm": 25.091084074273006,
"learning_rate": 7.703204087277988e-07,
"logits/chosen": -2.2782795429229736,
"logits/rejected": -2.2675344944000244,
"logps/chosen": -241.4102020263672,
"logps/rejected": -232.1530303955078,
"loss": 0.2415,
"rewards/accuracies": 0.90625,
"rewards/chosen": 2.0038599967956543,
"rewards/margins": 2.8345634937286377,
"rewards/rejected": -0.8307037353515625,
"step": 45
},
{
"epoch": 1.2422360248447206,
"grad_norm": 21.01590218402833,
"learning_rate": 7.077075065009433e-07,
"logits/chosen": -2.304749011993408,
"logits/rejected": -2.2815442085266113,
"logps/chosen": -248.9634552001953,
"logps/rejected": -262.0959777832031,
"loss": 0.2594,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 2.365910053253174,
"rewards/margins": 3.3114895820617676,
"rewards/rejected": -0.9455796480178833,
"step": 50
},
{
"epoch": 1.2422360248447206,
"eval_logits/chosen": -2.2956461906433105,
"eval_logits/rejected": -2.294363498687744,
"eval_logps/chosen": -252.96337890625,
"eval_logps/rejected": -231.52207946777344,
"eval_loss": 0.5901808142662048,
"eval_rewards/accuracies": 0.7986111044883728,
"eval_rewards/chosen": 1.6738612651824951,
"eval_rewards/margins": 1.8752751350402832,
"eval_rewards/rejected": -0.20141386985778809,
"eval_runtime": 75.2847,
"eval_samples_per_second": 15.143,
"eval_steps_per_second": 0.239,
"step": 50
},
{
"epoch": 1.3664596273291925,
"grad_norm": 18.646514200070648,
"learning_rate": 6.408662784207149e-07,
"logits/chosen": -2.307347297668457,
"logits/rejected": -2.263925075531006,
"logps/chosen": -249.4068145751953,
"logps/rejected": -214.16445922851562,
"loss": 0.2176,
"rewards/accuracies": 0.90625,
"rewards/chosen": 2.3753602504730225,
"rewards/margins": 3.139112949371338,
"rewards/rejected": -0.7637524008750916,
"step": 55
},
{
"epoch": 1.4906832298136645,
"grad_norm": 20.993852286888092,
"learning_rate": 5.711574191366427e-07,
"logits/chosen": -2.32312273979187,
"logits/rejected": -2.313824415206909,
"logps/chosen": -243.2293243408203,
"logps/rejected": -206.0082550048828,
"loss": 0.2417,
"rewards/accuracies": 0.9375,
"rewards/chosen": 2.353982448577881,
"rewards/margins": 3.16640043258667,
"rewards/rejected": -0.8124181032180786,
"step": 60
},
{
"epoch": 1.6149068322981366,
"grad_norm": 19.030512980103158,
"learning_rate": 5e-07,
"logits/chosen": -2.377202272415161,
"logits/rejected": -2.331650495529175,
"logps/chosen": -248.04483032226562,
"logps/rejected": -222.1618194580078,
"loss": 0.2488,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.3110172748565674,
"rewards/margins": 3.183046579360962,
"rewards/rejected": -0.8720294833183289,
"step": 65
},
{
"epoch": 1.7391304347826086,
"grad_norm": 28.536465499864658,
"learning_rate": 4.2884258086335745e-07,
"logits/chosen": -2.3466696739196777,
"logits/rejected": -2.3642795085906982,
"logps/chosen": -262.2099304199219,
"logps/rejected": -250.7125701904297,
"loss": 0.3053,
"rewards/accuracies": 0.90625,
"rewards/chosen": 2.6217122077941895,
"rewards/margins": 3.932690382003784,
"rewards/rejected": -1.3109780550003052,
"step": 70
},
{
"epoch": 1.8633540372670807,
"grad_norm": 27.768310670938217,
"learning_rate": 3.591337215792851e-07,
"logits/chosen": -2.3459136486053467,
"logits/rejected": -2.3365659713745117,
"logps/chosen": -250.7726593017578,
"logps/rejected": -221.8275909423828,
"loss": 0.3016,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": 2.6287598609924316,
"rewards/margins": 3.746206760406494,
"rewards/rejected": -1.117447018623352,
"step": 75
},
{
"epoch": 1.9875776397515528,
"grad_norm": 24.938158584160053,
"learning_rate": 2.922924934990568e-07,
"logits/chosen": -2.3689913749694824,
"logits/rejected": -2.3461122512817383,
"logps/chosen": -255.01962280273438,
"logps/rejected": -232.1419677734375,
"loss": 0.2954,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 3.2598438262939453,
"rewards/margins": 4.275376319885254,
"rewards/rejected": -1.015533208847046,
"step": 80
},
{
"epoch": 2.111801242236025,
"grad_norm": 22.47951274264517,
"learning_rate": 2.2967959127220137e-07,
"logits/chosen": -2.3769583702087402,
"logits/rejected": -2.313356399536133,
"logps/chosen": -245.34432983398438,
"logps/rejected": -256.83477783203125,
"loss": 0.1701,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 2.893587112426758,
"rewards/margins": 4.111905574798584,
"rewards/rejected": -1.218318223953247,
"step": 85
},
{
"epoch": 2.2360248447204967,
"grad_norm": 16.587973457804008,
"learning_rate": 1.725696330273575e-07,
"logits/chosen": -2.3217408657073975,
"logits/rejected": -2.322180986404419,
"logps/chosen": -261.08746337890625,
"logps/rejected": -253.5300750732422,
"loss": 0.1669,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 3.13687801361084,
"rewards/margins": 4.253005027770996,
"rewards/rejected": -1.1161267757415771,
"step": 90
},
{
"epoch": 2.360248447204969,
"grad_norm": 14.3562650408135,
"learning_rate": 1.2212521282287093e-07,
"logits/chosen": -2.2643933296203613,
"logits/rejected": -2.2418789863586426,
"logps/chosen": -230.6456298828125,
"logps/rejected": -250.2694549560547,
"loss": 0.1554,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 2.822519302368164,
"rewards/margins": 4.386029243469238,
"rewards/rejected": -1.5635101795196533,
"step": 95
},
{
"epoch": 2.4844720496894412,
"grad_norm": 18.916632747497697,
"learning_rate": 7.937323358440934e-08,
"logits/chosen": -2.2358391284942627,
"logits/rejected": -2.2123188972473145,
"logps/chosen": -245.39889526367188,
"logps/rejected": -213.0316925048828,
"loss": 0.1658,
"rewards/accuracies": 0.96875,
"rewards/chosen": 2.8756985664367676,
"rewards/margins": 4.066061973571777,
"rewards/rejected": -1.1903636455535889,
"step": 100
},
{
"epoch": 2.4844720496894412,
"eval_logits/chosen": -2.241981029510498,
"eval_logits/rejected": -2.237220048904419,
"eval_logps/chosen": -249.4546661376953,
"eval_logps/rejected": -232.948974609375,
"eval_loss": 0.5874444842338562,
"eval_rewards/accuracies": 0.8333333134651184,
"eval_rewards/chosen": 2.0247349739074707,
"eval_rewards/margins": 2.3688364028930664,
"eval_rewards/rejected": -0.34410178661346436,
"eval_runtime": 74.9734,
"eval_samples_per_second": 15.205,
"eval_steps_per_second": 0.24,
"step": 100
},
{
"epoch": 2.608695652173913,
"grad_norm": 12.416778851124059,
"learning_rate": 4.518400232274078e-08,
"logits/chosen": -2.229137659072876,
"logits/rejected": -2.201681613922119,
"logps/chosen": -237.0298309326172,
"logps/rejected": -240.43429565429688,
"loss": 0.1344,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 2.9165587425231934,
"rewards/margins": 4.170973300933838,
"rewards/rejected": -1.2544142007827759,
"step": 105
},
{
"epoch": 2.732919254658385,
"grad_norm": 15.39582049536958,
"learning_rate": 2.025351319275137e-08,
"logits/chosen": -2.226637840270996,
"logits/rejected": -2.1901309490203857,
"logps/chosen": -232.8177947998047,
"logps/rejected": -221.37646484375,
"loss": 0.1763,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 2.9458189010620117,
"rewards/margins": 4.043347358703613,
"rewards/rejected": -1.0975282192230225,
"step": 110
},
{
"epoch": 2.857142857142857,
"grad_norm": 17.081473104516775,
"learning_rate": 5.0892790595336575e-09,
"logits/chosen": -2.220303773880005,
"logits/rejected": -2.2176926136016846,
"logps/chosen": -252.9158172607422,
"logps/rejected": -242.60400390625,
"loss": 0.1587,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 3.0858166217803955,
"rewards/margins": 4.5951924324035645,
"rewards/rejected": -1.5093762874603271,
"step": 115
},
{
"epoch": 2.981366459627329,
"grad_norm": 17.254264529974623,
"learning_rate": 0.0,
"logits/chosen": -2.205538511276245,
"logits/rejected": -2.1971898078918457,
"logps/chosen": -235.06808471679688,
"logps/rejected": -225.6627197265625,
"loss": 0.1671,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 2.8083322048187256,
"rewards/margins": 4.552498817443848,
"rewards/rejected": -1.7441661357879639,
"step": 120
}
],
"logging_steps": 5,
"max_steps": 120,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1414680891359232.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}