|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.981366459627329, |
|
"eval_steps": 50, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12422360248447205, |
|
"grad_norm": 62.403607830645235, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.756077289581299, |
|
"logits/rejected": -2.75536847114563, |
|
"logps/chosen": -266.15899658203125, |
|
"logps/rejected": -237.189697265625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.020162902772426605, |
|
"rewards/margins": 0.009148921817541122, |
|
"rewards/rejected": 0.011013981886208057, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 80.40658980559981, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.689826250076294, |
|
"logits/rejected": -2.672236204147339, |
|
"logps/chosen": -256.6275939941406, |
|
"logps/rejected": -209.4879608154297, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.5279896259307861, |
|
"rewards/margins": 0.1352878212928772, |
|
"rewards/rejected": 0.3927018344402313, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37267080745341613, |
|
"grad_norm": 42.882430184568136, |
|
"learning_rate": 9.949107209404663e-07, |
|
"logits/chosen": -2.516449213027954, |
|
"logits/rejected": -2.5170836448669434, |
|
"logps/chosen": -253.22061157226562, |
|
"logps/rejected": -214.71920776367188, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.5385050773620605, |
|
"rewards/margins": 0.7560700178146362, |
|
"rewards/rejected": 0.7824350595474243, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 45.56060438591064, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits/chosen": -2.4393157958984375, |
|
"logits/rejected": -2.4060797691345215, |
|
"logps/chosen": -257.09130859375, |
|
"logps/rejected": -198.79998779296875, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.761357307434082, |
|
"rewards/margins": 1.1699168682098389, |
|
"rewards/rejected": 0.5914403796195984, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6211180124223602, |
|
"grad_norm": 41.36750304040445, |
|
"learning_rate": 9.548159976772592e-07, |
|
"logits/chosen": -2.3880293369293213, |
|
"logits/rejected": -2.402343273162842, |
|
"logps/chosen": -246.7839813232422, |
|
"logps/rejected": -230.6208953857422, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.8271690607070923, |
|
"rewards/margins": 1.592099905014038, |
|
"rewards/rejected": 0.23506923019886017, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 45.4963965931298, |
|
"learning_rate": 9.206267664155906e-07, |
|
"logits/chosen": -2.424156427383423, |
|
"logits/rejected": -2.3829989433288574, |
|
"logps/chosen": -245.9100341796875, |
|
"logps/rejected": -231.0552978515625, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.6904218196868896, |
|
"rewards/margins": 1.704167366027832, |
|
"rewards/rejected": -0.013745462521910667, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 34.827462800688885, |
|
"learning_rate": 8.778747871771291e-07, |
|
"logits/chosen": -2.452261447906494, |
|
"logits/rejected": -2.428560972213745, |
|
"logps/chosen": -258.96221923828125, |
|
"logps/rejected": -219.5812530517578, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.5247328281402588, |
|
"rewards/margins": 1.6348918676376343, |
|
"rewards/rejected": -0.11015894263982773, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 41.54362761779536, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": -2.435807228088379, |
|
"logits/rejected": -2.425825834274292, |
|
"logps/chosen": -246.02548217773438, |
|
"logps/rejected": -225.33700561523438, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 1.2175686359405518, |
|
"rewards/margins": 0.9398768544197083, |
|
"rewards/rejected": 0.2776917815208435, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1180124223602483, |
|
"grad_norm": 18.152319532594465, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits/chosen": -2.3833680152893066, |
|
"logits/rejected": -2.36021089553833, |
|
"logps/chosen": -239.3954315185547, |
|
"logps/rejected": -245.8417205810547, |
|
"loss": 0.244, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.0914113521575928, |
|
"rewards/margins": 3.1723241806030273, |
|
"rewards/rejected": -1.0809123516082764, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"grad_norm": 21.9049414703945, |
|
"learning_rate": 7.077075065009433e-07, |
|
"logits/chosen": -2.3475632667541504, |
|
"logits/rejected": -2.3293070793151855, |
|
"logps/chosen": -239.13436889648438, |
|
"logps/rejected": -224.94949340820312, |
|
"loss": 0.258, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 2.809220552444458, |
|
"rewards/margins": 3.1373844146728516, |
|
"rewards/rejected": -0.32816413044929504, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"eval_logits/chosen": -2.3354883193969727, |
|
"eval_logits/rejected": -2.3212666511535645, |
|
"eval_logps/chosen": -266.6775207519531, |
|
"eval_logps/rejected": -253.1901092529297, |
|
"eval_loss": 0.6212884783744812, |
|
"eval_rewards/accuracies": 0.7847222089767456, |
|
"eval_rewards/chosen": 2.382004737854004, |
|
"eval_rewards/margins": 1.986113429069519, |
|
"eval_rewards/rejected": 0.39589133858680725, |
|
"eval_runtime": 75.1943, |
|
"eval_samples_per_second": 15.161, |
|
"eval_steps_per_second": 0.239, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3664596273291925, |
|
"grad_norm": 19.456900162913456, |
|
"learning_rate": 6.408662784207149e-07, |
|
"logits/chosen": -2.3208935260772705, |
|
"logits/rejected": -2.2951972484588623, |
|
"logps/chosen": -247.5399932861328, |
|
"logps/rejected": -237.8445281982422, |
|
"loss": 0.2857, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 2.739036798477173, |
|
"rewards/margins": 3.3734939098358154, |
|
"rewards/rejected": -0.6344569325447083, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4906832298136645, |
|
"grad_norm": 26.00493649217999, |
|
"learning_rate": 5.711574191366427e-07, |
|
"logits/chosen": -2.2791221141815186, |
|
"logits/rejected": -2.2677152156829834, |
|
"logps/chosen": -236.4568634033203, |
|
"logps/rejected": -235.218505859375, |
|
"loss": 0.2777, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 2.477545976638794, |
|
"rewards/margins": 3.141633987426758, |
|
"rewards/rejected": -0.6640880703926086, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.6149068322981366, |
|
"grad_norm": 22.738140845846544, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.2787652015686035, |
|
"logits/rejected": -2.227478265762329, |
|
"logps/chosen": -228.0373992919922, |
|
"logps/rejected": -255.89987182617188, |
|
"loss": 0.289, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.602055072784424, |
|
"rewards/margins": 3.5908477306365967, |
|
"rewards/rejected": -0.9887924194335938, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 23.007905633616, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits/chosen": -2.259939670562744, |
|
"logits/rejected": -2.263455867767334, |
|
"logps/chosen": -245.46206665039062, |
|
"logps/rejected": -223.74270629882812, |
|
"loss": 0.2974, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.2899632453918457, |
|
"rewards/margins": 3.3291678428649902, |
|
"rewards/rejected": -1.0392045974731445, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8633540372670807, |
|
"grad_norm": 22.7179826338975, |
|
"learning_rate": 3.591337215792851e-07, |
|
"logits/chosen": -2.2751305103302, |
|
"logits/rejected": -2.2498066425323486, |
|
"logps/chosen": -237.3503875732422, |
|
"logps/rejected": -236.15951538085938, |
|
"loss": 0.2561, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 2.172248363494873, |
|
"rewards/margins": 3.147101879119873, |
|
"rewards/rejected": -0.9748538732528687, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9875776397515528, |
|
"grad_norm": 26.637593255483548, |
|
"learning_rate": 2.922924934990568e-07, |
|
"logits/chosen": -2.273965358734131, |
|
"logits/rejected": -2.2656917572021484, |
|
"logps/chosen": -257.8080139160156, |
|
"logps/rejected": -217.7511444091797, |
|
"loss": 0.3499, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.5381007194519043, |
|
"rewards/margins": 3.5010929107666016, |
|
"rewards/rejected": -0.9629920721054077, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.111801242236025, |
|
"grad_norm": 13.915541615199986, |
|
"learning_rate": 2.2967959127220137e-07, |
|
"logits/chosen": -2.2352209091186523, |
|
"logits/rejected": -2.197958469390869, |
|
"logps/chosen": -236.0648193359375, |
|
"logps/rejected": -232.01242065429688, |
|
"loss": 0.1697, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.252695083618164, |
|
"rewards/margins": 3.7181904315948486, |
|
"rewards/rejected": -1.4654955863952637, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2360248447204967, |
|
"grad_norm": 15.499960854642854, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": -2.197455883026123, |
|
"logits/rejected": -2.1763360500335693, |
|
"logps/chosen": -239.3179473876953, |
|
"logps/rejected": -238.17578125, |
|
"loss": 0.1491, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.6867127418518066, |
|
"rewards/margins": 4.167205810546875, |
|
"rewards/rejected": -1.480492115020752, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.360248447204969, |
|
"grad_norm": 16.718898555042905, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits/chosen": -2.1886236667633057, |
|
"logits/rejected": -2.1752870082855225, |
|
"logps/chosen": -246.4811553955078, |
|
"logps/rejected": -212.0710906982422, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.7335801124572754, |
|
"rewards/margins": 3.6828677654266357, |
|
"rewards/rejected": -0.9492877125740051, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"grad_norm": 19.465292915783245, |
|
"learning_rate": 7.937323358440934e-08, |
|
"logits/chosen": -2.1609156131744385, |
|
"logits/rejected": -2.1472043991088867, |
|
"logps/chosen": -224.5920867919922, |
|
"logps/rejected": -214.02554321289062, |
|
"loss": 0.1741, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.6809496879577637, |
|
"rewards/margins": 3.8545315265655518, |
|
"rewards/rejected": -1.1735817193984985, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"eval_logits/chosen": -2.201831340789795, |
|
"eval_logits/rejected": -2.172940254211426, |
|
"eval_logps/chosen": -268.1634216308594, |
|
"eval_logps/rejected": -258.27825927734375, |
|
"eval_loss": 0.6181273460388184, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": 2.2334184646606445, |
|
"eval_rewards/margins": 2.346345901489258, |
|
"eval_rewards/rejected": -0.11292734742164612, |
|
"eval_runtime": 74.9234, |
|
"eval_samples_per_second": 15.216, |
|
"eval_steps_per_second": 0.24, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 19.995154150155244, |
|
"learning_rate": 4.518400232274078e-08, |
|
"logits/chosen": -2.207038402557373, |
|
"logits/rejected": -2.1583220958709717, |
|
"logps/chosen": -251.38162231445312, |
|
"logps/rejected": -263.5743408203125, |
|
"loss": 0.1583, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.9800047874450684, |
|
"rewards/margins": 4.533116817474365, |
|
"rewards/rejected": -1.5531116724014282, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.732919254658385, |
|
"grad_norm": 16.500504970360172, |
|
"learning_rate": 2.025351319275137e-08, |
|
"logits/chosen": -2.172910690307617, |
|
"logits/rejected": -2.1606621742248535, |
|
"logps/chosen": -211.9971160888672, |
|
"logps/rejected": -253.26974487304688, |
|
"loss": 0.1399, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.546518325805664, |
|
"rewards/margins": 4.063871383666992, |
|
"rewards/rejected": -1.5173530578613281, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 19.536794540135528, |
|
"learning_rate": 5.0892790595336575e-09, |
|
"logits/chosen": -2.167283296585083, |
|
"logits/rejected": -2.1639857292175293, |
|
"logps/chosen": -246.7537841796875, |
|
"logps/rejected": -237.97262573242188, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.768967866897583, |
|
"rewards/margins": 4.068437576293945, |
|
"rewards/rejected": -1.2994694709777832, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.981366459627329, |
|
"grad_norm": 13.712198521871002, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.1904730796813965, |
|
"logits/rejected": -2.1649465560913086, |
|
"logps/chosen": -250.57174682617188, |
|
"logps/rejected": -253.6211395263672, |
|
"loss": 0.1699, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.733159065246582, |
|
"rewards/margins": 3.944279193878174, |
|
"rewards/rejected": -1.2111196517944336, |
|
"step": 120 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1414680891359232.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|