{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.981366459627329, "eval_steps": 50, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12422360248447205, "grad_norm": 54.367663803058946, "learning_rate": 5e-07, "logits/chosen": -2.7148144245147705, "logits/rejected": -2.7243547439575195, "logps/chosen": -242.867431640625, "logps/rejected": -227.12136840820312, "loss": 0.691, "rewards/accuracies": 0.3125, "rewards/chosen": 0.009815122000873089, "rewards/margins": 0.005822173319756985, "rewards/rejected": 0.003992948215454817, "step": 5 }, { "epoch": 0.2484472049689441, "grad_norm": 51.804115964444165, "learning_rate": 1e-06, "logits/chosen": -2.6798529624938965, "logits/rejected": -2.703315258026123, "logps/chosen": -256.2458190917969, "logps/rejected": -217.85592651367188, "loss": 0.6456, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.43589210510253906, "rewards/margins": 0.08772359788417816, "rewards/rejected": 0.3481685519218445, "step": 10 }, { "epoch": 0.37267080745341613, "grad_norm": 50.53080123806113, "learning_rate": 9.949107209404663e-07, "logits/chosen": -2.5799756050109863, "logits/rejected": -2.565157651901245, "logps/chosen": -237.31692504882812, "logps/rejected": -208.06655883789062, "loss": 0.6378, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 1.4567431211471558, "rewards/margins": 0.7478972673416138, "rewards/rejected": 0.7088459730148315, "step": 15 }, { "epoch": 0.4968944099378882, "grad_norm": 54.57054056014394, "learning_rate": 9.797464868072486e-07, "logits/chosen": -2.4506874084472656, "logits/rejected": -2.433974027633667, "logps/chosen": -247.51657104492188, "logps/rejected": -216.2230987548828, "loss": 0.6587, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 1.5972602367401123, "rewards/margins": 1.0024363994598389, "rewards/rejected": 0.594823956489563, "step": 20 }, { "epoch": 0.6211180124223602, "grad_norm": 44.117531702150536, "learning_rate": 9.548159976772592e-07, "logits/chosen": -2.4157333374023438, "logits/rejected": -2.3935298919677734, "logps/chosen": -231.5720672607422, "logps/rejected": -216.5135498046875, "loss": 0.6622, "rewards/accuracies": 0.6875, "rewards/chosen": 0.7699011564254761, "rewards/margins": 0.8483353853225708, "rewards/rejected": -0.07843427360057831, "step": 25 }, { "epoch": 0.7453416149068323, "grad_norm": 46.22604593677178, "learning_rate": 9.206267664155906e-07, "logits/chosen": -2.4077823162078857, "logits/rejected": -2.4088187217712402, "logps/chosen": -260.6187744140625, "logps/rejected": -240.7838897705078, "loss": 0.6343, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.9798136949539185, "rewards/margins": 1.1557605266571045, "rewards/rejected": -0.17594675719738007, "step": 30 }, { "epoch": 0.8695652173913043, "grad_norm": 45.738597782002316, "learning_rate": 8.778747871771291e-07, "logits/chosen": -2.4890403747558594, "logits/rejected": -2.4890661239624023, "logps/chosen": -267.4264831542969, "logps/rejected": -236.0730743408203, "loss": 0.6245, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.5028200745582581, "rewards/margins": 1.037217617034912, "rewards/rejected": -0.5343974232673645, "step": 35 }, { "epoch": 0.9937888198757764, "grad_norm": 40.58782325478915, "learning_rate": 8.274303669726426e-07, "logits/chosen": -2.464543104171753, "logits/rejected": -2.45270037651062, "logps/chosen": -246.43997192382812, "logps/rejected": -244.5944366455078, "loss": 0.6076, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.423252671957016, "rewards/margins": 0.9229713678359985, "rewards/rejected": -0.49971866607666016, "step": 40 }, { "epoch": 1.1180124223602483, "grad_norm": 21.75862492001889, "learning_rate": 7.703204087277988e-07, "logits/chosen": -2.437509059906006, "logits/rejected": -2.4511005878448486, "logps/chosen": -238.031005859375, "logps/rejected": -239.1492462158203, "loss": 0.2467, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 1.3256285190582275, "rewards/margins": 2.4453110694885254, "rewards/rejected": -1.1196826696395874, "step": 45 }, { "epoch": 1.2422360248447206, "grad_norm": 22.132976615768026, "learning_rate": 7.077075065009433e-07, "logits/chosen": -2.484419822692871, "logits/rejected": -2.485710859298706, "logps/chosen": -241.15200805664062, "logps/rejected": -220.26907348632812, "loss": 0.2364, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.7490062713623047, "rewards/margins": 2.5016419887542725, "rewards/rejected": -0.7526359558105469, "step": 50 }, { "epoch": 1.2422360248447206, "eval_logits/chosen": -2.5087192058563232, "eval_logits/rejected": -2.515753746032715, "eval_logps/chosen": -249.35264587402344, "eval_logps/rejected": -202.0917205810547, "eval_loss": 0.5729268789291382, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": 1.2346218824386597, "eval_rewards/margins": 1.0999401807785034, "eval_rewards/rejected": 0.13468176126480103, "eval_runtime": 75.094, "eval_samples_per_second": 15.181, "eval_steps_per_second": 0.24, "step": 50 }, { "epoch": 1.3664596273291925, "grad_norm": 17.330600410265617, "learning_rate": 6.408662784207149e-07, "logits/chosen": -2.4883952140808105, "logits/rejected": -2.482597827911377, "logps/chosen": -236.4322052001953, "logps/rejected": -229.5203094482422, "loss": 0.2196, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.2716193199157715, "rewards/margins": 3.196570634841919, "rewards/rejected": -0.9249511957168579, "step": 55 }, { "epoch": 1.4906832298136645, "grad_norm": 17.716660362051734, "learning_rate": 5.711574191366427e-07, "logits/chosen": -2.468207836151123, "logits/rejected": -2.4725213050842285, "logps/chosen": -222.43896484375, "logps/rejected": -200.0598602294922, "loss": 0.2119, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.1822891235351562, "rewards/margins": 2.8318796157836914, "rewards/rejected": -0.6495904922485352, "step": 60 }, { "epoch": 1.6149068322981366, "grad_norm": 23.32765774015972, "learning_rate": 5e-07, "logits/chosen": -2.5094847679138184, "logits/rejected": -2.5222580432891846, "logps/chosen": -236.4397430419922, "logps/rejected": -216.79052734375, "loss": 0.2118, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 2.586951494216919, "rewards/margins": 3.453221559524536, "rewards/rejected": -0.8662700653076172, "step": 65 }, { "epoch": 1.7391304347826086, "grad_norm": 25.27024243839641, "learning_rate": 4.2884258086335745e-07, "logits/chosen": -2.506361484527588, "logits/rejected": -2.4916276931762695, "logps/chosen": -228.9758758544922, "logps/rejected": -225.41006469726562, "loss": 0.2294, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.325880527496338, "rewards/margins": 3.3348236083984375, "rewards/rejected": -1.0089433193206787, "step": 70 }, { "epoch": 1.8633540372670807, "grad_norm": 27.860922972380834, "learning_rate": 3.591337215792851e-07, "logits/chosen": -2.5197181701660156, "logits/rejected": -2.5090882778167725, "logps/chosen": -239.81277465820312, "logps/rejected": -230.70059204101562, "loss": 0.265, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.431699275970459, "rewards/margins": 3.363804340362549, "rewards/rejected": -0.9321050643920898, "step": 75 }, { "epoch": 1.9875776397515528, "grad_norm": 17.61138833178944, "learning_rate": 2.922924934990568e-07, "logits/chosen": -2.543259382247925, "logits/rejected": -2.493022918701172, "logps/chosen": -237.87887573242188, "logps/rejected": -279.49261474609375, "loss": 0.2128, "rewards/accuracies": 0.918749988079071, "rewards/chosen": 2.389310836791992, "rewards/margins": 4.381407260894775, "rewards/rejected": -1.9920963048934937, "step": 80 }, { "epoch": 2.111801242236025, "grad_norm": 13.031340899683215, "learning_rate": 2.2967959127220137e-07, "logits/chosen": -2.5387518405914307, "logits/rejected": -2.5558295249938965, "logps/chosen": -225.1177520751953, "logps/rejected": -233.97073364257812, "loss": 0.1297, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 2.17976713180542, "rewards/margins": 3.992032527923584, "rewards/rejected": -1.812265396118164, "step": 85 }, { "epoch": 2.2360248447204967, "grad_norm": 16.033450629688048, "learning_rate": 1.725696330273575e-07, "logits/chosen": -2.5489468574523926, "logits/rejected": -2.5377697944641113, "logps/chosen": -255.279296875, "logps/rejected": -245.3787078857422, "loss": 0.123, "rewards/accuracies": 0.96875, "rewards/chosen": 2.2218708992004395, "rewards/margins": 4.716561794281006, "rewards/rejected": -2.4946906566619873, "step": 90 }, { "epoch": 2.360248447204969, "grad_norm": 12.307639352242482, "learning_rate": 1.2212521282287093e-07, "logits/chosen": -2.5559678077697754, "logits/rejected": -2.5754735469818115, "logps/chosen": -240.5291748046875, "logps/rejected": -270.05230712890625, "loss": 0.1073, "rewards/accuracies": 0.96875, "rewards/chosen": 1.9212055206298828, "rewards/margins": 4.681941032409668, "rewards/rejected": -2.760735511779785, "step": 95 }, { "epoch": 2.4844720496894412, "grad_norm": 12.425204577942079, "learning_rate": 7.937323358440934e-08, "logits/chosen": -2.549752950668335, "logits/rejected": -2.5533714294433594, "logps/chosen": -235.24368286132812, "logps/rejected": -259.5509033203125, "loss": 0.1061, "rewards/accuracies": 0.96875, "rewards/chosen": 1.9440858364105225, "rewards/margins": 4.815189361572266, "rewards/rejected": -2.8711037635803223, "step": 100 }, { "epoch": 2.4844720496894412, "eval_logits/chosen": -2.55989933013916, "eval_logits/rejected": -2.5775303840637207, "eval_logps/chosen": -254.12814331054688, "eval_logps/rejected": -212.31497192382812, "eval_loss": 0.6158778071403503, "eval_rewards/accuracies": 0.7569444179534912, "eval_rewards/chosen": 0.7570738792419434, "eval_rewards/margins": 1.6447181701660156, "eval_rewards/rejected": -0.8876442313194275, "eval_runtime": 74.8651, "eval_samples_per_second": 15.227, "eval_steps_per_second": 0.24, "step": 100 }, { "epoch": 2.608695652173913, "grad_norm": 18.01907950225221, "learning_rate": 4.518400232274078e-08, "logits/chosen": -2.546436309814453, "logits/rejected": -2.5362162590026855, "logps/chosen": -227.8841552734375, "logps/rejected": -236.8609619140625, "loss": 0.1288, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.770033836364746, "rewards/margins": 4.3705244064331055, "rewards/rejected": -2.6004908084869385, "step": 105 }, { "epoch": 2.732919254658385, "grad_norm": 17.707001673219747, "learning_rate": 2.025351319275137e-08, "logits/chosen": -2.5263776779174805, "logits/rejected": -2.5271897315979004, "logps/chosen": -243.36788940429688, "logps/rejected": -254.2205352783203, "loss": 0.1263, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 1.7786777019500732, "rewards/margins": 4.2846503257751465, "rewards/rejected": -2.505972385406494, "step": 110 }, { "epoch": 2.857142857142857, "grad_norm": 16.228515302877508, "learning_rate": 5.0892790595336575e-09, "logits/chosen": -2.5431525707244873, "logits/rejected": -2.5375916957855225, "logps/chosen": -234.5476531982422, "logps/rejected": -246.033447265625, "loss": 0.113, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.938812494277954, "rewards/margins": 4.491750240325928, "rewards/rejected": -2.552938461303711, "step": 115 }, { "epoch": 2.981366459627329, "grad_norm": 15.40676083530787, "learning_rate": 0.0, "logits/chosen": -2.529771089553833, "logits/rejected": -2.5455727577209473, "logps/chosen": -242.18539428710938, "logps/rejected": -240.2424774169922, "loss": 0.1225, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": 2.026702642440796, "rewards/margins": 4.205595970153809, "rewards/rejected": -2.178893566131592, "step": 120 }, { "epoch": 2.981366459627329, "step": 120, "total_flos": 1414680891359232.0, "train_loss": 0.3313312023878098, "train_runtime": 4261.9082, "train_samples_per_second": 7.22, "train_steps_per_second": 0.028 } ], "logging_steps": 5, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1414680891359232.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }