|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0695187165775402, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 57.38362641680358, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.734001874923706, |
|
"logits/rejected": -2.714400053024292, |
|
"logps/chosen": -259.45416259765625, |
|
"logps/rejected": -213.60086059570312, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.022585459053516388, |
|
"rewards/margins": 0.012519368901848793, |
|
"rewards/rejected": 0.010066090151667595, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 47.481455328039516, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6481270790100098, |
|
"logits/rejected": -2.652583599090576, |
|
"logps/chosen": -257.953369140625, |
|
"logps/rejected": -188.35134887695312, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.550362229347229, |
|
"rewards/margins": 0.1817861795425415, |
|
"rewards/rejected": 0.3685761094093323, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"grad_norm": 57.27168970604102, |
|
"learning_rate": 9.991477798614637e-07, |
|
"logits/chosen": -2.517893075942993, |
|
"logits/rejected": -2.512808322906494, |
|
"logps/chosen": -238.94729614257812, |
|
"logps/rejected": -195.82278442382812, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.433282732963562, |
|
"rewards/margins": 0.7576474547386169, |
|
"rewards/rejected": 0.6756354570388794, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.21390374331550802, |
|
"grad_norm": 49.36931799258527, |
|
"learning_rate": 9.965940245625131e-07, |
|
"logits/chosen": -2.3814330101013184, |
|
"logits/rejected": -2.3650126457214355, |
|
"logps/chosen": -240.35299682617188, |
|
"logps/rejected": -224.2460174560547, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.3586658239364624, |
|
"rewards/margins": 0.8488560914993286, |
|
"rewards/rejected": 0.5098099112510681, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26737967914438504, |
|
"grad_norm": 38.23771695176387, |
|
"learning_rate": 9.923474395499264e-07, |
|
"logits/chosen": -2.3290395736694336, |
|
"logits/rejected": -2.300835371017456, |
|
"logps/chosen": -240.2759246826172, |
|
"logps/rejected": -190.5952911376953, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.567830204963684, |
|
"rewards/margins": 1.0694384574890137, |
|
"rewards/rejected": 0.4983917772769928, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.32085561497326204, |
|
"grad_norm": 38.55306482248171, |
|
"learning_rate": 9.86422500924775e-07, |
|
"logits/chosen": -2.385282039642334, |
|
"logits/rejected": -2.380516767501831, |
|
"logps/chosen": -237.0162353515625, |
|
"logps/rejected": -203.95974731445312, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.3996493816375732, |
|
"rewards/margins": 0.9638010263442993, |
|
"rewards/rejected": 0.43584829568862915, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37433155080213903, |
|
"grad_norm": 40.05177774617033, |
|
"learning_rate": 9.788394060951227e-07, |
|
"logits/chosen": -2.4351038932800293, |
|
"logits/rejected": -2.440431833267212, |
|
"logps/chosen": -247.30227661132812, |
|
"logps/rejected": -201.399658203125, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.2308628559112549, |
|
"rewards/margins": 1.0766284465789795, |
|
"rewards/rejected": 0.15423443913459778, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.42780748663101603, |
|
"grad_norm": 38.64888093844348, |
|
"learning_rate": 9.696240049254742e-07, |
|
"logits/chosen": -2.4633519649505615, |
|
"logits/rejected": -2.466259241104126, |
|
"logps/chosen": -249.7180938720703, |
|
"logps/rejected": -194.3883056640625, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.1658284664154053, |
|
"rewards/margins": 1.0980390310287476, |
|
"rewards/rejected": 0.06778934597969055, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48128342245989303, |
|
"grad_norm": 41.079430965451316, |
|
"learning_rate": 9.588077116176756e-07, |
|
"logits/chosen": -2.4081149101257324, |
|
"logits/rejected": -2.406311273574829, |
|
"logps/chosen": -238.9816436767578, |
|
"logps/rejected": -201.0675048828125, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.1393609046936035, |
|
"rewards/margins": 1.4659656286239624, |
|
"rewards/rejected": -0.32660484313964844, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"grad_norm": 46.70159372711324, |
|
"learning_rate": 9.464273976236516e-07, |
|
"logits/chosen": -2.4118716716766357, |
|
"logits/rejected": -2.408468246459961, |
|
"logps/chosen": -219.6370849609375, |
|
"logps/rejected": -186.2394561767578, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8120909929275513, |
|
"rewards/margins": 0.906074047088623, |
|
"rewards/rejected": -0.09398309141397476, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"eval_logits/chosen": -2.4070043563842773, |
|
"eval_logits/rejected": -2.395042896270752, |
|
"eval_logps/chosen": -246.7726287841797, |
|
"eval_logps/rejected": -220.63446044921875, |
|
"eval_loss": 0.5587548613548279, |
|
"eval_rewards/accuracies": 0.7202380895614624, |
|
"eval_rewards/chosen": 0.8626245856285095, |
|
"eval_rewards/margins": 1.329900860786438, |
|
"eval_rewards/rejected": -0.46727630496025085, |
|
"eval_runtime": 180.0372, |
|
"eval_samples_per_second": 14.775, |
|
"eval_steps_per_second": 0.233, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 38.079371712676014, |
|
"learning_rate": 9.325252659550308e-07, |
|
"logits/chosen": -2.387826919555664, |
|
"logits/rejected": -2.3770289421081543, |
|
"logps/chosen": -226.2966766357422, |
|
"logps/rejected": -208.65219116210938, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6860524415969849, |
|
"rewards/margins": 0.8900126218795776, |
|
"rewards/rejected": -0.2039601057767868, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6417112299465241, |
|
"grad_norm": 37.176849832612625, |
|
"learning_rate": 9.171487073181197e-07, |
|
"logits/chosen": -2.309826374053955, |
|
"logits/rejected": -2.319997549057007, |
|
"logps/chosen": -230.22189331054688, |
|
"logps/rejected": -206.3903350830078, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.7191376686096191, |
|
"rewards/margins": 1.5079154968261719, |
|
"rewards/rejected": -0.7887779474258423, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6951871657754011, |
|
"grad_norm": 36.39927326888579, |
|
"learning_rate": 9.003501385646448e-07, |
|
"logits/chosen": -2.2107081413269043, |
|
"logits/rejected": -2.21248722076416, |
|
"logps/chosen": -238.040771484375, |
|
"logps/rejected": -207.3212127685547, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5230123996734619, |
|
"rewards/margins": 1.2387675046920776, |
|
"rewards/rejected": -0.715755045413971, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7486631016042781, |
|
"grad_norm": 34.98626076335809, |
|
"learning_rate": 8.821868240089676e-07, |
|
"logits/chosen": -2.1564557552337646, |
|
"logits/rejected": -2.1244616508483887, |
|
"logps/chosen": -232.23452758789062, |
|
"logps/rejected": -215.52719116210938, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.0822027921676636, |
|
"rewards/margins": 1.5664498805999756, |
|
"rewards/rejected": -0.48424673080444336, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8021390374331551, |
|
"grad_norm": 33.602724500864966, |
|
"learning_rate": 8.62720680220876e-07, |
|
"logits/chosen": -2.2200913429260254, |
|
"logits/rejected": -2.161189079284668, |
|
"logps/chosen": -232.7351837158203, |
|
"logps/rejected": -218.43045043945312, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5824815630912781, |
|
"rewards/margins": 1.2068629264831543, |
|
"rewards/rejected": -0.6243813037872314, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": 32.25811213479902, |
|
"learning_rate": 8.420180649593929e-07, |
|
"logits/chosen": -2.223334789276123, |
|
"logits/rejected": -2.206092119216919, |
|
"logps/chosen": -237.7117156982422, |
|
"logps/rejected": -213.7598114013672, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.454035222530365, |
|
"rewards/margins": 1.7620937824249268, |
|
"rewards/rejected": -1.3080583810806274, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 28.217025801566006, |
|
"learning_rate": 8.201495509671036e-07, |
|
"logits/chosen": -2.205242872238159, |
|
"logits/rejected": -2.2124757766723633, |
|
"logps/chosen": -250.62673950195312, |
|
"logps/rejected": -233.39517211914062, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.5412265658378601, |
|
"rewards/margins": 1.507828712463379, |
|
"rewards/rejected": -0.9666021466255188, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9625668449197861, |
|
"grad_norm": 33.081462718842836, |
|
"learning_rate": 7.971896853961042e-07, |
|
"logits/chosen": -2.286748170852661, |
|
"logits/rejected": -2.2566237449645996, |
|
"logps/chosen": -235.38076782226562, |
|
"logps/rejected": -223.08413696289062, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.0189321041107178, |
|
"rewards/margins": 1.5707125663757324, |
|
"rewards/rejected": -0.5517805814743042, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0160427807486632, |
|
"grad_norm": 19.031150325303443, |
|
"learning_rate": 7.732167356856654e-07, |
|
"logits/chosen": -2.383920192718506, |
|
"logits/rejected": -2.3609161376953125, |
|
"logps/chosen": -249.6156768798828, |
|
"logps/rejected": -201.63851928710938, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.3106629848480225, |
|
"rewards/margins": 2.160562038421631, |
|
"rewards/rejected": -0.8498989939689636, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.0695187165775402, |
|
"grad_norm": 21.9932361270153, |
|
"learning_rate": 7.48312422757881e-07, |
|
"logits/chosen": -2.432481050491333, |
|
"logits/rejected": -2.416440010070801, |
|
"logps/chosen": -221.5184326171875, |
|
"logps/rejected": -238.3723602294922, |
|
"loss": 0.2515, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.44422447681427, |
|
"rewards/margins": 3.0625545978546143, |
|
"rewards/rejected": -1.6183300018310547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0695187165775402, |
|
"eval_logits/chosen": -2.3926949501037598, |
|
"eval_logits/rejected": -2.3801937103271484, |
|
"eval_logps/chosen": -243.76559448242188, |
|
"eval_logps/rejected": -223.52978515625, |
|
"eval_loss": 0.50016188621521, |
|
"eval_rewards/accuracies": 0.7827380895614624, |
|
"eval_rewards/chosen": 1.1633288860321045, |
|
"eval_rewards/margins": 1.9201369285583496, |
|
"eval_rewards/rejected": -0.7568081617355347, |
|
"eval_runtime": 179.117, |
|
"eval_samples_per_second": 14.851, |
|
"eval_steps_per_second": 0.234, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 279, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1178822762299392.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|