{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4925373134328357, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07462686567164178, "grad_norm": 56.38858835713434, "learning_rate": 5e-07, "logits/chosen": -2.731353282928467, "logits/rejected": -2.7158660888671875, "logps/chosen": -256.7725524902344, "logps/rejected": -205.1663055419922, "loss": 0.6903, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": 0.014972714707255363, "rewards/margins": 0.006492167711257935, "rewards/rejected": 0.008480546995997429, "step": 5 }, { "epoch": 0.14925373134328357, "grad_norm": 48.210654865798084, "learning_rate": 1e-06, "logits/chosen": -2.6910791397094727, "logits/rejected": -2.686784267425537, "logps/chosen": -261.28240966796875, "logps/rejected": -210.28414916992188, "loss": 0.6445, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5361045002937317, "rewards/margins": 0.2673659920692444, "rewards/rejected": 0.2687385082244873, "step": 10 }, { "epoch": 0.22388059701492538, "grad_norm": 48.18867539560807, "learning_rate": 9.983100718730718e-07, "logits/chosen": -2.5146777629852295, "logits/rejected": -2.5069146156311035, "logps/chosen": -274.51788330078125, "logps/rejected": -212.06613159179688, "loss": 0.6397, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 1.0824434757232666, "rewards/margins": 0.4525600075721741, "rewards/rejected": 0.6298834085464478, "step": 15 }, { "epoch": 0.29850746268656714, "grad_norm": 56.03620889669535, "learning_rate": 9.932517109205849e-07, "logits/chosen": -2.346874237060547, "logits/rejected": -2.3457634449005127, "logps/chosen": -233.57192993164062, "logps/rejected": -216.9462890625, "loss": 0.6508, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.2721847295761108, "rewards/margins": 0.8419507741928101, "rewards/rejected": 0.4302339553833008, "step": 20 }, { "epoch": 0.373134328358209, "grad_norm": 44.42398594582461, "learning_rate": 9.848591102083375e-07, "logits/chosen": -2.321877956390381, "logits/rejected": -2.3033697605133057, "logps/chosen": -276.81353759765625, "logps/rejected": -215.10543823242188, "loss": 0.5836, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 1.4672718048095703, "rewards/margins": 0.9575842618942261, "rewards/rejected": 0.5096874237060547, "step": 25 }, { "epoch": 0.44776119402985076, "grad_norm": 47.04899305167527, "learning_rate": 9.731890013043367e-07, "logits/chosen": -2.3766517639160156, "logits/rejected": -2.375422954559326, "logps/chosen": -253.60995483398438, "logps/rejected": -239.47940063476562, "loss": 0.634, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 1.2893927097320557, "rewards/margins": 0.6847591996192932, "rewards/rejected": 0.6046335697174072, "step": 30 }, { "epoch": 0.5223880597014925, "grad_norm": 39.80811236547202, "learning_rate": 9.583202707897073e-07, "logits/chosen": -2.4474780559539795, "logits/rejected": -2.450472354888916, "logps/chosen": -257.7427673339844, "logps/rejected": -207.629638671875, "loss": 0.5649, "rewards/accuracies": 0.71875, "rewards/chosen": 1.217116355895996, "rewards/margins": 1.1533949375152588, "rewards/rejected": 0.06372135877609253, "step": 35 }, { "epoch": 0.5970149253731343, "grad_norm": 42.3182391334653, "learning_rate": 9.403534270080829e-07, "logits/chosen": -2.4868321418762207, "logits/rejected": -2.4954237937927246, "logps/chosen": -243.6182403564453, "logps/rejected": -235.5696563720703, "loss": 0.5926, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 1.1232950687408447, "rewards/margins": 1.261273741722107, "rewards/rejected": -0.13797876238822937, "step": 40 }, { "epoch": 0.6716417910447762, "grad_norm": 38.544673892587376, "learning_rate": 9.19409920658098e-07, "logits/chosen": -2.4237208366394043, "logits/rejected": -2.400038480758667, "logps/chosen": -247.0177764892578, "logps/rejected": -214.0535888671875, "loss": 0.6227, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 1.1583524942398071, "rewards/margins": 1.3776183128356934, "rewards/rejected": -0.21926572918891907, "step": 45 }, { "epoch": 0.746268656716418, "grad_norm": 44.95262854721761, "learning_rate": 8.956313238215823e-07, "logits/chosen": -2.3861770629882812, "logits/rejected": -2.361314058303833, "logps/chosen": -251.87680053710938, "logps/rejected": -241.48794555664062, "loss": 0.5946, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 1.3533507585525513, "rewards/margins": 1.143169641494751, "rewards/rejected": 0.2101811170578003, "step": 50 }, { "epoch": 0.746268656716418, "eval_logits/chosen": -2.419147253036499, "eval_logits/rejected": -2.3880856037139893, "eval_logps/chosen": -247.12657165527344, "eval_logps/rejected": -217.0163116455078, "eval_loss": 0.5850147008895874, "eval_rewards/accuracies": 0.7333333492279053, "eval_rewards/chosen": 1.2898719310760498, "eval_rewards/margins": 1.370118498802185, "eval_rewards/rejected": -0.0802464708685875, "eval_runtime": 126.7763, "eval_samples_per_second": 14.987, "eval_steps_per_second": 0.237, "step": 50 }, { "epoch": 0.8208955223880597, "grad_norm": 34.637816378109704, "learning_rate": 8.691783729769873e-07, "logits/chosen": -2.4367661476135254, "logits/rejected": -2.4355311393737793, "logps/chosen": -237.86105346679688, "logps/rejected": -211.58084106445312, "loss": 0.555, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 1.1501390933990479, "rewards/margins": 1.354534387588501, "rewards/rejected": -0.20439541339874268, "step": 55 }, { "epoch": 0.8955223880597015, "grad_norm": 39.91245885039146, "learning_rate": 8.402298824670029e-07, "logits/chosen": -2.5068681240081787, "logits/rejected": -2.485455274581909, "logps/chosen": -256.1775817871094, "logps/rejected": -231.7941436767578, "loss": 0.611, "rewards/accuracies": 0.78125, "rewards/chosen": 1.03065025806427, "rewards/margins": 1.9949699640274048, "rewards/rejected": -0.9643197059631348, "step": 60 }, { "epoch": 0.9701492537313433, "grad_norm": 39.08249314957454, "learning_rate": 8.089815357650089e-07, "logits/chosen": -2.5430567264556885, "logits/rejected": -2.543178081512451, "logps/chosen": -232.22262573242188, "logps/rejected": -225.10220336914062, "loss": 0.5475, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.8175865411758423, "rewards/margins": 1.328970193862915, "rewards/rejected": -0.5113834738731384, "step": 65 }, { "epoch": 1.044776119402985, "grad_norm": 20.186385290734442, "learning_rate": 7.756445627110522e-07, "logits/chosen": -2.552123546600342, "logits/rejected": -2.5313704013824463, "logps/chosen": -234.0012664794922, "logps/rejected": -240.6761474609375, "loss": 0.3553, "rewards/accuracies": 0.831250011920929, "rewards/chosen": 1.4629334211349487, "rewards/margins": 2.36128568649292, "rewards/rejected": -0.8983524441719055, "step": 70 }, { "epoch": 1.1194029850746268, "grad_norm": 21.595036305155492, "learning_rate": 7.404443116588547e-07, "logits/chosen": -2.5418105125427246, "logits/rejected": -2.5285840034484863, "logps/chosen": -242.90499877929688, "logps/rejected": -216.7215576171875, "loss": 0.2463, "rewards/accuracies": 0.918749988079071, "rewards/chosen": 2.360262393951416, "rewards/margins": 3.110288381576538, "rewards/rejected": -0.7500261068344116, "step": 75 }, { "epoch": 1.1940298507462686, "grad_norm": 22.4039757471544, "learning_rate": 7.036187261857288e-07, "logits/chosen": -2.456780195236206, "logits/rejected": -2.439521312713623, "logps/chosen": -233.08560180664062, "logps/rejected": -244.88961791992188, "loss": 0.2423, "rewards/accuracies": 0.90625, "rewards/chosen": 2.1394240856170654, "rewards/margins": 3.141387939453125, "rewards/rejected": -1.0019636154174805, "step": 80 }, { "epoch": 1.2686567164179103, "grad_norm": 25.42866646351196, "learning_rate": 6.654167366624008e-07, "logits/chosen": -2.3945281505584717, "logits/rejected": -2.3779776096343994, "logps/chosen": -230.88217163085938, "logps/rejected": -222.7549285888672, "loss": 0.2769, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 1.6151447296142578, "rewards/margins": 2.812251091003418, "rewards/rejected": -1.1971065998077393, "step": 85 }, { "epoch": 1.3432835820895521, "grad_norm": 23.51747857481762, "learning_rate": 6.260965775552713e-07, "logits/chosen": -2.361176013946533, "logits/rejected": -2.3344523906707764, "logps/chosen": -236.94775390625, "logps/rejected": -242.43069458007812, "loss": 0.2743, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 1.946207046508789, "rewards/margins": 3.319734573364258, "rewards/rejected": -1.3735275268554688, "step": 90 }, { "epoch": 1.417910447761194, "grad_norm": 25.346384805721517, "learning_rate": 5.859240418356614e-07, "logits/chosen": -2.3584346771240234, "logits/rejected": -2.3219895362854004, "logps/chosen": -246.46530151367188, "logps/rejected": -231.7371063232422, "loss": 0.2576, "rewards/accuracies": 0.90625, "rewards/chosen": 2.247493267059326, "rewards/margins": 3.8193671703338623, "rewards/rejected": -1.571873426437378, "step": 95 }, { "epoch": 1.4925373134328357, "grad_norm": 17.435014853431674, "learning_rate": 5.451706842957421e-07, "logits/chosen": -2.4034814834594727, "logits/rejected": -2.3993496894836426, "logps/chosen": -248.6347198486328, "logps/rejected": -231.65682983398438, "loss": 0.255, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.421250820159912, "rewards/margins": 3.7864270210266113, "rewards/rejected": -1.3651763200759888, "step": 100 }, { "epoch": 1.4925373134328357, "eval_logits/chosen": -2.4342453479766846, "eval_logits/rejected": -2.4099276065826416, "eval_logps/chosen": -245.49957275390625, "eval_logps/rejected": -221.9740753173828, "eval_loss": 0.5856931805610657, "eval_rewards/accuracies": 0.7958333492279053, "eval_rewards/chosen": 1.4525729417800903, "eval_rewards/margins": 2.0285959243774414, "eval_rewards/rejected": -0.5760230422019958, "eval_runtime": 126.6219, "eval_samples_per_second": 15.005, "eval_steps_per_second": 0.237, "step": 100 } ], "logging_steps": 5, "max_steps": 201, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1178822762299392.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }