|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.4844720496894412, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12422360248447205, |
|
"grad_norm": 66.62363784407601, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.737199068069458, |
|
"logits/rejected": -2.716709613800049, |
|
"logps/chosen": -271.75335693359375, |
|
"logps/rejected": -215.45742797851562, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": 0.016401026397943497, |
|
"rewards/margins": 0.012943076901137829, |
|
"rewards/rejected": 0.0034579492639750242, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 48.6825109729797, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.673478364944458, |
|
"logits/rejected": -2.6825709342956543, |
|
"logps/chosen": -274.2002258300781, |
|
"logps/rejected": -210.3507537841797, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.5796890258789062, |
|
"rewards/margins": 0.2897328734397888, |
|
"rewards/rejected": 0.28995609283447266, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37267080745341613, |
|
"grad_norm": 46.74043921332661, |
|
"learning_rate": 9.949107209404663e-07, |
|
"logits/chosen": -2.474240303039551, |
|
"logits/rejected": -2.441648006439209, |
|
"logps/chosen": -231.01181030273438, |
|
"logps/rejected": -208.47927856445312, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.5896726846694946, |
|
"rewards/margins": 0.8002876043319702, |
|
"rewards/rejected": 0.7893850207328796, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 51.05927483012936, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits/chosen": -2.29701566696167, |
|
"logits/rejected": -2.2914681434631348, |
|
"logps/chosen": -262.2198181152344, |
|
"logps/rejected": -218.2954864501953, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.8730179071426392, |
|
"rewards/margins": 1.2619432210922241, |
|
"rewards/rejected": 0.6110745668411255, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6211180124223602, |
|
"grad_norm": 53.85418375459847, |
|
"learning_rate": 9.548159976772592e-07, |
|
"logits/chosen": -2.2311034202575684, |
|
"logits/rejected": -2.2267913818359375, |
|
"logps/chosen": -259.2829895019531, |
|
"logps/rejected": -203.93215942382812, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.702134370803833, |
|
"rewards/margins": 1.3699487447738647, |
|
"rewards/rejected": 0.3321855068206787, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 41.921620963182335, |
|
"learning_rate": 9.206267664155906e-07, |
|
"logits/chosen": -2.2750821113586426, |
|
"logits/rejected": -2.2540435791015625, |
|
"logps/chosen": -252.75881958007812, |
|
"logps/rejected": -227.4959716796875, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.7333396673202515, |
|
"rewards/margins": 1.6640625, |
|
"rewards/rejected": 0.06927712261676788, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 36.22502143325224, |
|
"learning_rate": 8.778747871771291e-07, |
|
"logits/chosen": -2.3146414756774902, |
|
"logits/rejected": -2.2875092029571533, |
|
"logps/chosen": -263.4291687011719, |
|
"logps/rejected": -221.8418731689453, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 1.4538803100585938, |
|
"rewards/margins": 1.299953579902649, |
|
"rewards/rejected": 0.153926819562912, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 45.69470972967703, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": -2.289536714553833, |
|
"logits/rejected": -2.2876548767089844, |
|
"logps/chosen": -274.63818359375, |
|
"logps/rejected": -218.11038208007812, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.579737901687622, |
|
"rewards/margins": 1.5463366508483887, |
|
"rewards/rejected": 0.03340107947587967, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1180124223602483, |
|
"grad_norm": 25.091084074273006, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits/chosen": -2.2782795429229736, |
|
"logits/rejected": -2.2675344944000244, |
|
"logps/chosen": -241.4102020263672, |
|
"logps/rejected": -232.1530303955078, |
|
"loss": 0.2415, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.0038599967956543, |
|
"rewards/margins": 2.8345634937286377, |
|
"rewards/rejected": -0.8307037353515625, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"grad_norm": 21.01590218402833, |
|
"learning_rate": 7.077075065009433e-07, |
|
"logits/chosen": -2.304749011993408, |
|
"logits/rejected": -2.2815442085266113, |
|
"logps/chosen": -248.9634552001953, |
|
"logps/rejected": -262.0959777832031, |
|
"loss": 0.2594, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 2.365910053253174, |
|
"rewards/margins": 3.3114895820617676, |
|
"rewards/rejected": -0.9455796480178833, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"eval_logits/chosen": -2.2956461906433105, |
|
"eval_logits/rejected": -2.294363498687744, |
|
"eval_logps/chosen": -252.96337890625, |
|
"eval_logps/rejected": -231.52207946777344, |
|
"eval_loss": 0.5901808142662048, |
|
"eval_rewards/accuracies": 0.7986111044883728, |
|
"eval_rewards/chosen": 1.6738612651824951, |
|
"eval_rewards/margins": 1.8752751350402832, |
|
"eval_rewards/rejected": -0.20141386985778809, |
|
"eval_runtime": 75.2847, |
|
"eval_samples_per_second": 15.143, |
|
"eval_steps_per_second": 0.239, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3664596273291925, |
|
"grad_norm": 18.646514200070648, |
|
"learning_rate": 6.408662784207149e-07, |
|
"logits/chosen": -2.307347297668457, |
|
"logits/rejected": -2.263925075531006, |
|
"logps/chosen": -249.4068145751953, |
|
"logps/rejected": -214.16445922851562, |
|
"loss": 0.2176, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.3753602504730225, |
|
"rewards/margins": 3.139112949371338, |
|
"rewards/rejected": -0.7637524008750916, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4906832298136645, |
|
"grad_norm": 20.993852286888092, |
|
"learning_rate": 5.711574191366427e-07, |
|
"logits/chosen": -2.32312273979187, |
|
"logits/rejected": -2.313824415206909, |
|
"logps/chosen": -243.2293243408203, |
|
"logps/rejected": -206.0082550048828, |
|
"loss": 0.2417, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.353982448577881, |
|
"rewards/margins": 3.16640043258667, |
|
"rewards/rejected": -0.8124181032180786, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.6149068322981366, |
|
"grad_norm": 19.030512980103158, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.377202272415161, |
|
"logits/rejected": -2.331650495529175, |
|
"logps/chosen": -248.04483032226562, |
|
"logps/rejected": -222.1618194580078, |
|
"loss": 0.2488, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.3110172748565674, |
|
"rewards/margins": 3.183046579360962, |
|
"rewards/rejected": -0.8720294833183289, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 28.536465499864658, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits/chosen": -2.3466696739196777, |
|
"logits/rejected": -2.3642795085906982, |
|
"logps/chosen": -262.2099304199219, |
|
"logps/rejected": -250.7125701904297, |
|
"loss": 0.3053, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.6217122077941895, |
|
"rewards/margins": 3.932690382003784, |
|
"rewards/rejected": -1.3109780550003052, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8633540372670807, |
|
"grad_norm": 27.768310670938217, |
|
"learning_rate": 3.591337215792851e-07, |
|
"logits/chosen": -2.3459136486053467, |
|
"logits/rejected": -2.3365659713745117, |
|
"logps/chosen": -250.7726593017578, |
|
"logps/rejected": -221.8275909423828, |
|
"loss": 0.3016, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.6287598609924316, |
|
"rewards/margins": 3.746206760406494, |
|
"rewards/rejected": -1.117447018623352, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9875776397515528, |
|
"grad_norm": 24.938158584160053, |
|
"learning_rate": 2.922924934990568e-07, |
|
"logits/chosen": -2.3689913749694824, |
|
"logits/rejected": -2.3461122512817383, |
|
"logps/chosen": -255.01962280273438, |
|
"logps/rejected": -232.1419677734375, |
|
"loss": 0.2954, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 3.2598438262939453, |
|
"rewards/margins": 4.275376319885254, |
|
"rewards/rejected": -1.015533208847046, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.111801242236025, |
|
"grad_norm": 22.47951274264517, |
|
"learning_rate": 2.2967959127220137e-07, |
|
"logits/chosen": -2.3769583702087402, |
|
"logits/rejected": -2.313356399536133, |
|
"logps/chosen": -245.34432983398438, |
|
"logps/rejected": -256.83477783203125, |
|
"loss": 0.1701, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.893587112426758, |
|
"rewards/margins": 4.111905574798584, |
|
"rewards/rejected": -1.218318223953247, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2360248447204967, |
|
"grad_norm": 16.587973457804008, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": -2.3217408657073975, |
|
"logits/rejected": -2.322180986404419, |
|
"logps/chosen": -261.08746337890625, |
|
"logps/rejected": -253.5300750732422, |
|
"loss": 0.1669, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.13687801361084, |
|
"rewards/margins": 4.253005027770996, |
|
"rewards/rejected": -1.1161267757415771, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.360248447204969, |
|
"grad_norm": 14.3562650408135, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits/chosen": -2.2643933296203613, |
|
"logits/rejected": -2.2418789863586426, |
|
"logps/chosen": -230.6456298828125, |
|
"logps/rejected": -250.2694549560547, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.822519302368164, |
|
"rewards/margins": 4.386029243469238, |
|
"rewards/rejected": -1.5635101795196533, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"grad_norm": 18.916632747497697, |
|
"learning_rate": 7.937323358440934e-08, |
|
"logits/chosen": -2.2358391284942627, |
|
"logits/rejected": -2.2123188972473145, |
|
"logps/chosen": -245.39889526367188, |
|
"logps/rejected": -213.0316925048828, |
|
"loss": 0.1658, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.8756985664367676, |
|
"rewards/margins": 4.066061973571777, |
|
"rewards/rejected": -1.1903636455535889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"eval_logits/chosen": -2.241981029510498, |
|
"eval_logits/rejected": -2.237220048904419, |
|
"eval_logps/chosen": -249.4546661376953, |
|
"eval_logps/rejected": -232.948974609375, |
|
"eval_loss": 0.5874444842338562, |
|
"eval_rewards/accuracies": 0.8333333134651184, |
|
"eval_rewards/chosen": 2.0247349739074707, |
|
"eval_rewards/margins": 2.3688364028930664, |
|
"eval_rewards/rejected": -0.34410178661346436, |
|
"eval_runtime": 74.9734, |
|
"eval_samples_per_second": 15.205, |
|
"eval_steps_per_second": 0.24, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1178822762299392.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|