{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.13675213675213677, "eval_steps": 40, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017094017094017096, "grad_norm": 35.038580788061665, "learning_rate": 5e-07, "logits/chosen": -2.7457876205444336, "logits/rejected": -2.7444841861724854, "logps/chosen": -164.26461791992188, "logps/rejected": -170.55870056152344, "loss": 0.6935, "rewards/accuracies": 0.26875001192092896, "rewards/chosen": 0.003455913159996271, "rewards/margins": -0.0019886991940438747, "rewards/rejected": 0.0054446132853627205, "step": 5 }, { "epoch": 0.03418803418803419, "grad_norm": 36.203903910498276, "learning_rate": 1e-06, "logits/chosen": -2.7106502056121826, "logits/rejected": -2.716397523880005, "logps/chosen": -171.80043029785156, "logps/rejected": -165.20602416992188, "loss": 0.6875, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.012000308372080326, "rewards/margins": 0.0025437879376113415, "rewards/rejected": 0.009456520900130272, "step": 10 }, { "epoch": 0.05128205128205128, "grad_norm": 33.9576577784673, "learning_rate": 9.999177507263144e-07, "logits/chosen": -2.651571750640869, "logits/rejected": -2.629457473754883, "logps/chosen": -174.04080200195312, "logps/rejected": -174.0542755126953, "loss": 0.6698, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.23909731209278107, "rewards/margins": 0.10868903249502182, "rewards/rejected": 0.13040827214717865, "step": 15 }, { "epoch": 0.06837606837606838, "grad_norm": 34.33646066636181, "learning_rate": 9.996710299650301e-07, "logits/chosen": -2.476440668106079, "logits/rejected": -2.450225353240967, "logps/chosen": -158.1311798095703, "logps/rejected": -158.0066680908203, "loss": 0.6613, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.4318675100803375, "rewards/margins": 0.14549395442008972, "rewards/rejected": 0.2863735556602478, "step": 20 }, { "epoch": 0.08547008547008547, "grad_norm": 33.16430522723429, "learning_rate": 9.992599188865604e-07, "logits/chosen": -2.3086318969726562, "logits/rejected": -2.3104796409606934, "logps/chosen": -150.59771728515625, "logps/rejected": -156.85037231445312, "loss": 0.6494, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.5047669410705566, "rewards/margins": 0.16554531455039978, "rewards/rejected": 0.33922165632247925, "step": 25 }, { "epoch": 0.10256410256410256, "grad_norm": 34.52861424862365, "learning_rate": 9.98684552745256e-07, "logits/chosen": -2.217874050140381, "logits/rejected": -2.2254481315612793, "logps/chosen": -161.29412841796875, "logps/rejected": -161.40841674804688, "loss": 0.6295, "rewards/accuracies": 0.625, "rewards/chosen": 0.4176379144191742, "rewards/margins": 0.26531916856765747, "rewards/rejected": 0.15231874585151672, "step": 30 }, { "epoch": 0.11965811965811966, "grad_norm": 31.455117829218544, "learning_rate": 9.979451208349055e-07, "logits/chosen": -2.2608728408813477, "logits/rejected": -2.246007204055786, "logps/chosen": -171.71456909179688, "logps/rejected": -174.46578979492188, "loss": 0.6305, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.01912705972790718, "rewards/margins": 0.31441593170166016, "rewards/rejected": -0.33354294300079346, "step": 35 }, { "epoch": 0.13675213675213677, "grad_norm": 31.67318837058587, "learning_rate": 9.970418664264595e-07, "logits/chosen": -2.345672130584717, "logits/rejected": -2.331491470336914, "logps/chosen": -171.24766540527344, "logps/rejected": -176.8189697265625, "loss": 0.5989, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27867692708969116, "rewards/margins": 0.5290472507476807, "rewards/rejected": -0.8077241778373718, "step": 40 }, { "epoch": 0.13675213675213677, "eval_logits/chosen": -2.4102065563201904, "eval_logits/rejected": -2.401230573654175, "eval_logps/chosen": -162.36439514160156, "eval_logps/rejected": -167.4954071044922, "eval_loss": 0.6069236993789673, "eval_rewards/accuracies": 0.6365384459495544, "eval_rewards/chosen": -0.388705849647522, "eval_rewards/margins": 0.47280558943748474, "eval_rewards/rejected": -0.8615114688873291, "eval_runtime": 509.918, "eval_samples_per_second": 16.305, "eval_steps_per_second": 0.255, "step": 40 } ], "logging_steps": 5, "max_steps": 876, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 471248375119872.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }