|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.2461059190031152, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06230529595015576, |
|
"grad_norm": 64.78196225600344, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7243542671203613, |
|
"logits/rejected": -2.7354743480682373, |
|
"logps/chosen": -260.3916320800781, |
|
"logps/rejected": -244.31298828125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": 0.012873289175331593, |
|
"rewards/margins": 0.0037455155979841948, |
|
"rewards/rejected": 0.009127774275839329, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12461059190031153, |
|
"grad_norm": 45.8994374201786, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6493823528289795, |
|
"logits/rejected": -2.650038957595825, |
|
"logps/chosen": -234.55868530273438, |
|
"logps/rejected": -202.4860076904297, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.4499489665031433, |
|
"rewards/margins": 0.15348409116268158, |
|
"rewards/rejected": 0.29646486043930054, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 39.36900659105447, |
|
"learning_rate": 9.988343845952696e-07, |
|
"logits/chosen": -2.487607479095459, |
|
"logits/rejected": -2.481687068939209, |
|
"logps/chosen": -227.55001831054688, |
|
"logps/rejected": -217.2289581298828, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.2727148532867432, |
|
"rewards/margins": 0.4717913269996643, |
|
"rewards/rejected": 0.8009236454963684, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24922118380062305, |
|
"grad_norm": 48.3442406976696, |
|
"learning_rate": 9.953429730181652e-07, |
|
"logits/chosen": -2.409071683883667, |
|
"logits/rejected": -2.3922438621520996, |
|
"logps/chosen": -244.6020965576172, |
|
"logps/rejected": -230.30615234375, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.3646974563598633, |
|
"rewards/margins": 0.8445035815238953, |
|
"rewards/rejected": 0.5201937556266785, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3115264797507788, |
|
"grad_norm": 43.25375347405899, |
|
"learning_rate": 9.895420438411615e-07, |
|
"logits/chosen": -2.404625654220581, |
|
"logits/rejected": -2.380873203277588, |
|
"logps/chosen": -260.6168212890625, |
|
"logps/rejected": -226.8410186767578, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.0510780811309814, |
|
"rewards/margins": 1.0232232809066772, |
|
"rewards/rejected": 0.027854669839143753, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 35.13451219232732, |
|
"learning_rate": 9.814586436738997e-07, |
|
"logits/chosen": -2.457035779953003, |
|
"logits/rejected": -2.448774576187134, |
|
"logps/chosen": -257.907470703125, |
|
"logps/rejected": -218.4612579345703, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.1478369235992432, |
|
"rewards/margins": 1.3836512565612793, |
|
"rewards/rejected": -0.23581421375274658, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43613707165109034, |
|
"grad_norm": 41.912297815106236, |
|
"learning_rate": 9.711304610594102e-07, |
|
"logits/chosen": -2.485905408859253, |
|
"logits/rejected": -2.459043025970459, |
|
"logps/chosen": -246.7182159423828, |
|
"logps/rejected": -219.68246459960938, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6787748336791992, |
|
"rewards/margins": 1.0257090330123901, |
|
"rewards/rejected": -0.34693413972854614, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4984423676012461, |
|
"grad_norm": 47.725973440299406, |
|
"learning_rate": 9.586056507527264e-07, |
|
"logits/chosen": -2.4728102684020996, |
|
"logits/rejected": -2.4659764766693115, |
|
"logps/chosen": -242.521728515625, |
|
"logps/rejected": -244.7397003173828, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.7883692979812622, |
|
"rewards/margins": 1.2785449028015137, |
|
"rewards/rejected": -0.4901755452156067, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 36.45457422056591, |
|
"learning_rate": 9.439426092011875e-07, |
|
"logits/chosen": -2.435615301132202, |
|
"logits/rejected": -2.4443929195404053, |
|
"logps/chosen": -280.24755859375, |
|
"logps/rejected": -212.96292114257812, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.2600438594818115, |
|
"rewards/margins": 1.4364185333251953, |
|
"rewards/rejected": -0.17637479305267334, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"grad_norm": 37.12133742483065, |
|
"learning_rate": 9.272097022732443e-07, |
|
"logits/chosen": -2.447756052017212, |
|
"logits/rejected": -2.4266324043273926, |
|
"logps/chosen": -246.95840454101562, |
|
"logps/rejected": -214.29959106445312, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.2870022058486938, |
|
"rewards/margins": 1.596084475517273, |
|
"rewards/rejected": -0.3090822994709015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"eval_logits/chosen": -2.4445273876190186, |
|
"eval_logits/rejected": -2.4265356063842773, |
|
"eval_logps/chosen": -268.5102844238281, |
|
"eval_logps/rejected": -221.56869506835938, |
|
"eval_loss": 0.5847126245498657, |
|
"eval_rewards/accuracies": 0.7465277910232544, |
|
"eval_rewards/chosen": 1.2566064596176147, |
|
"eval_rewards/margins": 1.209053635597229, |
|
"eval_rewards/rejected": 0.04755274951457977, |
|
"eval_runtime": 151.9316, |
|
"eval_samples_per_second": 15.007, |
|
"eval_steps_per_second": 0.237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6853582554517134, |
|
"grad_norm": 40.79262464200207, |
|
"learning_rate": 9.084849465052209e-07, |
|
"logits/chosen": -2.3776774406433105, |
|
"logits/rejected": -2.3956453800201416, |
|
"logps/chosen": -257.2913513183594, |
|
"logps/rejected": -197.3280029296875, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.311511516571045, |
|
"rewards/margins": 1.5678449869155884, |
|
"rewards/rejected": -0.2563334107398987, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 48.34269415680465, |
|
"learning_rate": 8.878556453522099e-07, |
|
"logits/chosen": -2.3521735668182373, |
|
"logits/rejected": -2.3181591033935547, |
|
"logps/chosen": -233.2803497314453, |
|
"logps/rejected": -218.5789031982422, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.7265979647636414, |
|
"rewards/margins": 1.4349805116653442, |
|
"rewards/rejected": -0.7083825469017029, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8099688473520249, |
|
"grad_norm": 37.4927506245011, |
|
"learning_rate": 8.654179821390621e-07, |
|
"logits/chosen": -2.362112522125244, |
|
"logits/rejected": -2.3338117599487305, |
|
"logps/chosen": -267.119140625, |
|
"logps/rejected": -216.8646240234375, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.6950536966323853, |
|
"rewards/margins": 1.5640451908111572, |
|
"rewards/rejected": -0.8689913749694824, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8722741433021807, |
|
"grad_norm": 39.43481962342951, |
|
"learning_rate": 8.41276571609327e-07, |
|
"logits/chosen": -2.3775908946990967, |
|
"logits/rejected": -2.365400552749634, |
|
"logps/chosen": -277.951904296875, |
|
"logps/rejected": -225.5980682373047, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.0310388803482056, |
|
"rewards/margins": 1.5813945531845093, |
|
"rewards/rejected": -0.5503557324409485, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 41.2092804317616, |
|
"learning_rate": 8.155439721630264e-07, |
|
"logits/chosen": -2.337092638015747, |
|
"logits/rejected": -2.31592059135437, |
|
"logps/chosen": -252.63143920898438, |
|
"logps/rejected": -201.25433349609375, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.8362587690353394, |
|
"rewards/margins": 1.4375728368759155, |
|
"rewards/rejected": -0.6013139486312866, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"grad_norm": 32.58419185513781, |
|
"learning_rate": 7.883401610574336e-07, |
|
"logits/chosen": -2.255605459213257, |
|
"logits/rejected": -2.23878812789917, |
|
"logps/chosen": -257.6908264160156, |
|
"logps/rejected": -198.56063842773438, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.8411666750907898, |
|
"rewards/margins": 1.4409376382827759, |
|
"rewards/rejected": -0.5997709035873413, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0591900311526479, |
|
"grad_norm": 19.724965519418905, |
|
"learning_rate": 7.597919750177168e-07, |
|
"logits/chosen": -2.2557170391082764, |
|
"logits/rejected": -2.2346577644348145, |
|
"logps/chosen": -261.6788330078125, |
|
"logps/rejected": -238.0162353515625, |
|
"loss": 0.2444, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.7993049621582031, |
|
"rewards/margins": 3.0407943725585938, |
|
"rewards/rejected": -1.2414895296096802, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 22.70642349887756, |
|
"learning_rate": 7.30032518865576e-07, |
|
"logits/chosen": -2.3579375743865967, |
|
"logits/rejected": -2.339137554168701, |
|
"logps/chosen": -234.2419891357422, |
|
"logps/rejected": -222.1098175048828, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0569443702697754, |
|
"rewards/margins": 3.1539688110351562, |
|
"rewards/rejected": -1.0970245599746704, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1838006230529594, |
|
"grad_norm": 26.513380455541846, |
|
"learning_rate": 6.992005449231207e-07, |
|
"logits/chosen": -2.4564146995544434, |
|
"logits/rejected": -2.480577230453491, |
|
"logps/chosen": -241.68600463867188, |
|
"logps/rejected": -214.6504364013672, |
|
"loss": 0.2597, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.810720682144165, |
|
"rewards/margins": 3.2071242332458496, |
|
"rewards/rejected": -1.3964035511016846, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.2461059190031152, |
|
"grad_norm": 25.112538207360036, |
|
"learning_rate": 6.67439806085493e-07, |
|
"logits/chosen": -2.5166354179382324, |
|
"logits/rejected": -2.513838291168213, |
|
"logps/chosen": -245.987548828125, |
|
"logps/rejected": -252.9072265625, |
|
"loss": 0.2411, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6626945734024048, |
|
"rewards/margins": 3.801987409591675, |
|
"rewards/rejected": -2.1392927169799805, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2461059190031152, |
|
"eval_logits/chosen": -2.5858781337738037, |
|
"eval_logits/rejected": -2.563420534133911, |
|
"eval_logps/chosen": -270.93853759765625, |
|
"eval_logps/rejected": -233.65652465820312, |
|
"eval_loss": 0.5330603718757629, |
|
"eval_rewards/accuracies": 0.7881944179534912, |
|
"eval_rewards/chosen": 1.0137810707092285, |
|
"eval_rewards/margins": 2.17501163482666, |
|
"eval_rewards/rejected": -1.1612308025360107, |
|
"eval_runtime": 151.6106, |
|
"eval_samples_per_second": 15.039, |
|
"eval_steps_per_second": 0.237, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1178822762299392.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|