|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.27350427350427353, |
|
"eval_steps": 40, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017094017094017096, |
|
"grad_norm": 35.038580788061665, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7457876205444336, |
|
"logits/rejected": -2.7444841861724854, |
|
"logps/chosen": -164.26461791992188, |
|
"logps/rejected": -170.55870056152344, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.003455913159996271, |
|
"rewards/margins": -0.0019886991940438747, |
|
"rewards/rejected": 0.0054446132853627205, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03418803418803419, |
|
"grad_norm": 36.203903910498276, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.7106502056121826, |
|
"logits/rejected": -2.716397523880005, |
|
"logps/chosen": -171.80043029785156, |
|
"logps/rejected": -165.20602416992188, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.012000308372080326, |
|
"rewards/margins": 0.0025437879376113415, |
|
"rewards/rejected": 0.009456520900130272, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 33.9576577784673, |
|
"learning_rate": 9.999177507263144e-07, |
|
"logits/chosen": -2.651571750640869, |
|
"logits/rejected": -2.629457473754883, |
|
"logps/chosen": -174.04080200195312, |
|
"logps/rejected": -174.0542755126953, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.23909731209278107, |
|
"rewards/margins": 0.10868903249502182, |
|
"rewards/rejected": 0.13040827214717865, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06837606837606838, |
|
"grad_norm": 34.33646066636181, |
|
"learning_rate": 9.996710299650301e-07, |
|
"logits/chosen": -2.476440668106079, |
|
"logits/rejected": -2.450225353240967, |
|
"logps/chosen": -158.1311798095703, |
|
"logps/rejected": -158.0066680908203, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.4318675100803375, |
|
"rewards/margins": 0.14549395442008972, |
|
"rewards/rejected": 0.2863735556602478, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 33.16430522723429, |
|
"learning_rate": 9.992599188865604e-07, |
|
"logits/chosen": -2.3086318969726562, |
|
"logits/rejected": -2.3104796409606934, |
|
"logps/chosen": -150.59771728515625, |
|
"logps/rejected": -156.85037231445312, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.5047669410705566, |
|
"rewards/margins": 0.16554531455039978, |
|
"rewards/rejected": 0.33922165632247925, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 34.52861424862365, |
|
"learning_rate": 9.98684552745256e-07, |
|
"logits/chosen": -2.217874050140381, |
|
"logits/rejected": -2.2254481315612793, |
|
"logps/chosen": -161.29412841796875, |
|
"logps/rejected": -161.40841674804688, |
|
"loss": 0.6295, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4176379144191742, |
|
"rewards/margins": 0.26531916856765747, |
|
"rewards/rejected": 0.15231874585151672, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11965811965811966, |
|
"grad_norm": 31.455117829218544, |
|
"learning_rate": 9.979451208349055e-07, |
|
"logits/chosen": -2.2608728408813477, |
|
"logits/rejected": -2.246007204055786, |
|
"logps/chosen": -171.71456909179688, |
|
"logps/rejected": -174.46578979492188, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01912705972790718, |
|
"rewards/margins": 0.31441593170166016, |
|
"rewards/rejected": -0.33354294300079346, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"grad_norm": 31.67318837058587, |
|
"learning_rate": 9.970418664264595e-07, |
|
"logits/chosen": -2.345672130584717, |
|
"logits/rejected": -2.331491470336914, |
|
"logps/chosen": -171.24766540527344, |
|
"logps/rejected": -176.8189697265625, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.27867692708969116, |
|
"rewards/margins": 0.5290472507476807, |
|
"rewards/rejected": -0.8077241778373718, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13675213675213677, |
|
"eval_logits/chosen": -2.4102065563201904, |
|
"eval_logits/rejected": -2.401230573654175, |
|
"eval_logps/chosen": -162.36439514160156, |
|
"eval_logps/rejected": -167.4954071044922, |
|
"eval_loss": 0.6069236993789673, |
|
"eval_rewards/accuracies": 0.6365384459495544, |
|
"eval_rewards/chosen": -0.388705849647522, |
|
"eval_rewards/margins": 0.47280558943748474, |
|
"eval_rewards/rejected": -0.8615114688873291, |
|
"eval_runtime": 509.918, |
|
"eval_samples_per_second": 16.305, |
|
"eval_steps_per_second": 0.255, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 36.18313806223269, |
|
"learning_rate": 9.95975086687994e-07, |
|
"logits/chosen": -2.44050669670105, |
|
"logits/rejected": -2.4460220336914062, |
|
"logps/chosen": -163.82875061035156, |
|
"logps/rejected": -167.35989379882812, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.31098368763923645, |
|
"rewards/margins": 0.46269193291664124, |
|
"rewards/rejected": -0.7736755609512329, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 31.13412274683678, |
|
"learning_rate": 9.947451325869439e-07, |
|
"logits/chosen": -2.501091718673706, |
|
"logits/rejected": -2.4991250038146973, |
|
"logps/chosen": -172.09686279296875, |
|
"logps/rejected": -177.7747802734375, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.212348073720932, |
|
"rewards/margins": 0.6062799692153931, |
|
"rewards/rejected": -0.8186280131340027, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18803418803418803, |
|
"grad_norm": 31.508672436862835, |
|
"learning_rate": 9.933524087746347e-07, |
|
"logits/chosen": -2.437525510787964, |
|
"logits/rejected": -2.4285693168640137, |
|
"logps/chosen": -168.1316375732422, |
|
"logps/rejected": -175.23193359375, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.513076901435852, |
|
"rewards/margins": 0.7702310681343079, |
|
"rewards/rejected": -1.2833080291748047, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 30.148068867306787, |
|
"learning_rate": 9.917973734531549e-07, |
|
"logits/chosen": -2.431530475616455, |
|
"logits/rejected": -2.431729793548584, |
|
"logps/chosen": -159.38168334960938, |
|
"logps/rejected": -170.52500915527344, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.34855490922927856, |
|
"rewards/margins": 0.5969334244728088, |
|
"rewards/rejected": -0.9454883337020874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 32.03814968183332, |
|
"learning_rate": 9.90080538224607e-07, |
|
"logits/chosen": -2.533193588256836, |
|
"logits/rejected": -2.5252978801727295, |
|
"logps/chosen": -157.30966186523438, |
|
"logps/rejected": -166.26011657714844, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.007600936107337475, |
|
"rewards/margins": 0.5010749697685242, |
|
"rewards/rejected": -0.5086758732795715, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.23931623931623933, |
|
"grad_norm": 29.16308768569833, |
|
"learning_rate": 9.882024679227938e-07, |
|
"logits/chosen": -2.5899624824523926, |
|
"logits/rejected": -2.5779967308044434, |
|
"logps/chosen": -178.4553985595703, |
|
"logps/rejected": -179.71542358398438, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47189587354660034, |
|
"rewards/margins": 0.8304598927497864, |
|
"rewards/rejected": -1.3023556470870972, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 28.918531347661485, |
|
"learning_rate": 9.861637804273881e-07, |
|
"logits/chosen": -2.578892469406128, |
|
"logits/rejected": -2.5758416652679443, |
|
"logps/chosen": -162.60537719726562, |
|
"logps/rejected": -170.6789093017578, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.45147842168807983, |
|
"rewards/margins": 0.6994724273681641, |
|
"rewards/rejected": -1.1509509086608887, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"grad_norm": 26.98866754941649, |
|
"learning_rate": 9.83965146460653e-07, |
|
"logits/chosen": -2.54936146736145, |
|
"logits/rejected": -2.5406956672668457, |
|
"logps/chosen": -168.81484985351562, |
|
"logps/rejected": -179.770751953125, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6899678111076355, |
|
"rewards/margins": 0.8549306988716125, |
|
"rewards/rejected": -1.544898509979248, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27350427350427353, |
|
"eval_logits/chosen": -2.53336238861084, |
|
"eval_logits/rejected": -2.517695665359497, |
|
"eval_logps/chosen": -167.28964233398438, |
|
"eval_logps/rejected": -177.21824645996094, |
|
"eval_loss": 0.5331124663352966, |
|
"eval_rewards/accuracies": 0.7134615182876587, |
|
"eval_rewards/chosen": -0.8812309503555298, |
|
"eval_rewards/margins": 0.9525622725486755, |
|
"eval_rewards/rejected": -1.8337931632995605, |
|
"eval_runtime": 510.0922, |
|
"eval_samples_per_second": 16.299, |
|
"eval_steps_per_second": 0.255, |
|
"step": 80 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 942964633239552.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|