{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.981366459627329, "eval_steps": 50, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12422360248447205, "grad_norm": 62.403607830645235, "learning_rate": 5e-07, "logits/chosen": -2.756077289581299, "logits/rejected": -2.75536847114563, "logps/chosen": -266.15899658203125, "logps/rejected": -237.189697265625, "loss": 0.6905, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": 0.020162902772426605, "rewards/margins": 0.009148921817541122, "rewards/rejected": 0.011013981886208057, "step": 5 }, { "epoch": 0.2484472049689441, "grad_norm": 80.40658980559981, "learning_rate": 1e-06, "logits/chosen": -2.689826250076294, "logits/rejected": -2.672236204147339, "logps/chosen": -256.6275939941406, "logps/rejected": -209.4879608154297, "loss": 0.6412, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.5279896259307861, "rewards/margins": 0.1352878212928772, "rewards/rejected": 0.3927018344402313, "step": 10 }, { "epoch": 0.37267080745341613, "grad_norm": 42.882430184568136, "learning_rate": 9.949107209404663e-07, "logits/chosen": -2.516449213027954, "logits/rejected": -2.5170836448669434, "logps/chosen": -253.22061157226562, "logps/rejected": -214.71920776367188, "loss": 0.6132, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 1.5385050773620605, "rewards/margins": 0.7560700178146362, "rewards/rejected": 0.7824350595474243, "step": 15 }, { "epoch": 0.4968944099378882, "grad_norm": 45.56060438591064, "learning_rate": 9.797464868072486e-07, "logits/chosen": -2.4393157958984375, "logits/rejected": -2.4060797691345215, "logps/chosen": -257.09130859375, "logps/rejected": -198.79998779296875, "loss": 0.644, "rewards/accuracies": 0.75, "rewards/chosen": 1.761357307434082, "rewards/margins": 1.1699168682098389, "rewards/rejected": 0.5914403796195984, "step": 20 }, { "epoch": 0.6211180124223602, "grad_norm": 41.36750304040445, "learning_rate": 9.548159976772592e-07, "logits/chosen": -2.3880293369293213, "logits/rejected": -2.402343273162842, "logps/chosen": -246.7839813232422, "logps/rejected": -230.6208953857422, "loss": 0.5949, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 1.8271690607070923, "rewards/margins": 1.592099905014038, "rewards/rejected": 0.23506923019886017, "step": 25 }, { "epoch": 0.7453416149068323, "grad_norm": 45.4963965931298, "learning_rate": 9.206267664155906e-07, "logits/chosen": -2.424156427383423, "logits/rejected": -2.3829989433288574, "logps/chosen": -245.9100341796875, "logps/rejected": -231.0552978515625, "loss": 0.5972, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 1.6904218196868896, "rewards/margins": 1.704167366027832, "rewards/rejected": -0.013745462521910667, "step": 30 }, { "epoch": 0.8695652173913043, "grad_norm": 34.827462800688885, "learning_rate": 8.778747871771291e-07, "logits/chosen": -2.452261447906494, "logits/rejected": -2.428560972213745, "logps/chosen": -258.96221923828125, "logps/rejected": -219.5812530517578, "loss": 0.5609, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 1.5247328281402588, "rewards/margins": 1.6348918676376343, "rewards/rejected": -0.11015894263982773, "step": 35 }, { "epoch": 0.9937888198757764, "grad_norm": 41.54362761779536, "learning_rate": 8.274303669726426e-07, "logits/chosen": -2.435807228088379, "logits/rejected": -2.425825834274292, "logps/chosen": -246.02548217773438, "logps/rejected": -225.33700561523438, "loss": 0.6521, "rewards/accuracies": 0.65625, "rewards/chosen": 1.2175686359405518, "rewards/margins": 0.9398768544197083, "rewards/rejected": 0.2776917815208435, "step": 40 }, { "epoch": 1.1180124223602483, "grad_norm": 18.152319532594465, "learning_rate": 7.703204087277988e-07, "logits/chosen": -2.3833680152893066, "logits/rejected": -2.36021089553833, "logps/chosen": -239.3954315185547, "logps/rejected": -245.8417205810547, "loss": 0.244, "rewards/accuracies": 0.918749988079071, "rewards/chosen": 2.0914113521575928, "rewards/margins": 3.1723241806030273, "rewards/rejected": -1.0809123516082764, "step": 45 }, { "epoch": 1.2422360248447206, "grad_norm": 21.9049414703945, "learning_rate": 7.077075065009433e-07, "logits/chosen": -2.3475632667541504, "logits/rejected": -2.3293070793151855, "logps/chosen": -239.13436889648438, "logps/rejected": -224.94949340820312, "loss": 0.258, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": 2.809220552444458, "rewards/margins": 3.1373844146728516, "rewards/rejected": -0.32816413044929504, "step": 50 }, { "epoch": 1.2422360248447206, "eval_logits/chosen": -2.3354883193969727, "eval_logits/rejected": -2.3212666511535645, "eval_logps/chosen": -266.6775207519531, "eval_logps/rejected": -253.1901092529297, "eval_loss": 0.6212884783744812, "eval_rewards/accuracies": 0.7847222089767456, "eval_rewards/chosen": 2.382004737854004, "eval_rewards/margins": 1.986113429069519, "eval_rewards/rejected": 0.39589133858680725, "eval_runtime": 75.1943, "eval_samples_per_second": 15.161, "eval_steps_per_second": 0.239, "step": 50 }, { "epoch": 1.3664596273291925, "grad_norm": 19.456900162913456, "learning_rate": 6.408662784207149e-07, "logits/chosen": -2.3208935260772705, "logits/rejected": -2.2951972484588623, "logps/chosen": -247.5399932861328, "logps/rejected": -237.8445281982422, "loss": 0.2857, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": 2.739036798477173, "rewards/margins": 3.3734939098358154, "rewards/rejected": -0.6344569325447083, "step": 55 }, { "epoch": 1.4906832298136645, "grad_norm": 26.00493649217999, "learning_rate": 5.711574191366427e-07, "logits/chosen": -2.2791221141815186, "logits/rejected": -2.2677152156829834, "logps/chosen": -236.4568634033203, "logps/rejected": -235.218505859375, "loss": 0.2777, "rewards/accuracies": 0.84375, "rewards/chosen": 2.477545976638794, "rewards/margins": 3.141633987426758, "rewards/rejected": -0.6640880703926086, "step": 60 }, { "epoch": 1.6149068322981366, "grad_norm": 22.738140845846544, "learning_rate": 5e-07, "logits/chosen": -2.2787652015686035, "logits/rejected": -2.227478265762329, "logps/chosen": -228.0373992919922, "logps/rejected": -255.89987182617188, "loss": 0.289, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": 2.602055072784424, "rewards/margins": 3.5908477306365967, "rewards/rejected": -0.9887924194335938, "step": 65 }, { "epoch": 1.7391304347826086, "grad_norm": 23.007905633616, "learning_rate": 4.2884258086335745e-07, "logits/chosen": -2.259939670562744, "logits/rejected": -2.263455867767334, "logps/chosen": -245.46206665039062, "logps/rejected": -223.74270629882812, "loss": 0.2974, "rewards/accuracies": 0.893750011920929, "rewards/chosen": 2.2899632453918457, "rewards/margins": 3.3291678428649902, "rewards/rejected": -1.0392045974731445, "step": 70 }, { "epoch": 1.8633540372670807, "grad_norm": 22.7179826338975, "learning_rate": 3.591337215792851e-07, "logits/chosen": -2.2751305103302, "logits/rejected": -2.2498066425323486, "logps/chosen": -237.3503875732422, "logps/rejected": -236.15951538085938, "loss": 0.2561, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": 2.172248363494873, "rewards/margins": 3.147101879119873, "rewards/rejected": -0.9748538732528687, "step": 75 }, { "epoch": 1.9875776397515528, "grad_norm": 26.637593255483548, "learning_rate": 2.922924934990568e-07, "logits/chosen": -2.273965358734131, "logits/rejected": -2.2656917572021484, "logps/chosen": -257.8080139160156, "logps/rejected": -217.7511444091797, "loss": 0.3499, "rewards/accuracies": 0.893750011920929, "rewards/chosen": 2.5381007194519043, "rewards/margins": 3.5010929107666016, "rewards/rejected": -0.9629920721054077, "step": 80 }, { "epoch": 2.111801242236025, "grad_norm": 13.915541615199986, "learning_rate": 2.2967959127220137e-07, "logits/chosen": -2.2352209091186523, "logits/rejected": -2.197958469390869, "logps/chosen": -236.0648193359375, "logps/rejected": -232.01242065429688, "loss": 0.1697, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": 2.252695083618164, "rewards/margins": 3.7181904315948486, "rewards/rejected": -1.4654955863952637, "step": 85 }, { "epoch": 2.2360248447204967, "grad_norm": 15.499960854642854, "learning_rate": 1.725696330273575e-07, "logits/chosen": -2.197455883026123, "logits/rejected": -2.1763360500335693, "logps/chosen": -239.3179473876953, "logps/rejected": -238.17578125, "loss": 0.1491, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 2.6867127418518066, "rewards/margins": 4.167205810546875, "rewards/rejected": -1.480492115020752, "step": 90 }, { "epoch": 2.360248447204969, "grad_norm": 16.718898555042905, "learning_rate": 1.2212521282287093e-07, "logits/chosen": -2.1886236667633057, "logits/rejected": -2.1752870082855225, "logps/chosen": -246.4811553955078, "logps/rejected": -212.0710906982422, "loss": 0.158, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.7335801124572754, "rewards/margins": 3.6828677654266357, "rewards/rejected": -0.9492877125740051, "step": 95 }, { "epoch": 2.4844720496894412, "grad_norm": 19.465292915783245, "learning_rate": 7.937323358440934e-08, "logits/chosen": -2.1609156131744385, "logits/rejected": -2.1472043991088867, "logps/chosen": -224.5920867919922, "logps/rejected": -214.02554321289062, "loss": 0.1741, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 2.6809496879577637, "rewards/margins": 3.8545315265655518, "rewards/rejected": -1.1735817193984985, "step": 100 }, { "epoch": 2.4844720496894412, "eval_logits/chosen": -2.201831340789795, "eval_logits/rejected": -2.172940254211426, "eval_logps/chosen": -268.1634216308594, "eval_logps/rejected": -258.27825927734375, "eval_loss": 0.6181273460388184, "eval_rewards/accuracies": 0.7777777910232544, "eval_rewards/chosen": 2.2334184646606445, "eval_rewards/margins": 2.346345901489258, "eval_rewards/rejected": -0.11292734742164612, "eval_runtime": 74.9234, "eval_samples_per_second": 15.216, "eval_steps_per_second": 0.24, "step": 100 }, { "epoch": 2.608695652173913, "grad_norm": 19.995154150155244, "learning_rate": 4.518400232274078e-08, "logits/chosen": -2.207038402557373, "logits/rejected": -2.1583220958709717, "logps/chosen": -251.38162231445312, "logps/rejected": -263.5743408203125, "loss": 0.1583, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.9800047874450684, "rewards/margins": 4.533116817474365, "rewards/rejected": -1.5531116724014282, "step": 105 }, { "epoch": 2.732919254658385, "grad_norm": 16.500504970360172, "learning_rate": 2.025351319275137e-08, "logits/chosen": -2.172910690307617, "logits/rejected": -2.1606621742248535, "logps/chosen": -211.9971160888672, "logps/rejected": -253.26974487304688, "loss": 0.1399, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": 2.546518325805664, "rewards/margins": 4.063871383666992, "rewards/rejected": -1.5173530578613281, "step": 110 }, { "epoch": 2.857142857142857, "grad_norm": 19.536794540135528, "learning_rate": 5.0892790595336575e-09, "logits/chosen": -2.167283296585083, "logits/rejected": -2.1639857292175293, "logps/chosen": -246.7537841796875, "logps/rejected": -237.97262573242188, "loss": 0.1617, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 2.768967866897583, "rewards/margins": 4.068437576293945, "rewards/rejected": -1.2994694709777832, "step": 115 }, { "epoch": 2.981366459627329, "grad_norm": 13.712198521871002, "learning_rate": 0.0, "logits/chosen": -2.1904730796813965, "logits/rejected": -2.1649465560913086, "logps/chosen": -250.57174682617188, "logps/rejected": -253.6211395263672, "loss": 0.1699, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.733159065246582, "rewards/margins": 3.944279193878174, "rewards/rejected": -1.2111196517944336, "step": 120 }, { "epoch": 2.981366459627329, "step": 120, "total_flos": 1414680891359232.0, "train_loss": 0.3555192892750104, "train_runtime": 4293.0666, "train_samples_per_second": 7.168, "train_steps_per_second": 0.028 } ], "logging_steps": 5, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1414680891359232.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }