|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9906542056074765, |
|
"eval_steps": 50, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06230529595015576, |
|
"grad_norm": 64.78196225600344, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7243542671203613, |
|
"logits/rejected": -2.7354743480682373, |
|
"logps/chosen": -260.3916320800781, |
|
"logps/rejected": -244.31298828125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": 0.012873289175331593, |
|
"rewards/margins": 0.0037455155979841948, |
|
"rewards/rejected": 0.009127774275839329, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12461059190031153, |
|
"grad_norm": 45.8994374201786, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6493823528289795, |
|
"logits/rejected": -2.650038957595825, |
|
"logps/chosen": -234.55868530273438, |
|
"logps/rejected": -202.4860076904297, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.4499489665031433, |
|
"rewards/margins": 0.15348409116268158, |
|
"rewards/rejected": 0.29646486043930054, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 39.36900659105447, |
|
"learning_rate": 9.988343845952696e-07, |
|
"logits/chosen": -2.487607479095459, |
|
"logits/rejected": -2.481687068939209, |
|
"logps/chosen": -227.55001831054688, |
|
"logps/rejected": -217.2289581298828, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.2727148532867432, |
|
"rewards/margins": 0.4717913269996643, |
|
"rewards/rejected": 0.8009236454963684, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24922118380062305, |
|
"grad_norm": 48.3442406976696, |
|
"learning_rate": 9.953429730181652e-07, |
|
"logits/chosen": -2.409071683883667, |
|
"logits/rejected": -2.3922438621520996, |
|
"logps/chosen": -244.6020965576172, |
|
"logps/rejected": -230.30615234375, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.3646974563598633, |
|
"rewards/margins": 0.8445035815238953, |
|
"rewards/rejected": 0.5201937556266785, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3115264797507788, |
|
"grad_norm": 43.25375347405899, |
|
"learning_rate": 9.895420438411615e-07, |
|
"logits/chosen": -2.404625654220581, |
|
"logits/rejected": -2.380873203277588, |
|
"logps/chosen": -260.6168212890625, |
|
"logps/rejected": -226.8410186767578, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.0510780811309814, |
|
"rewards/margins": 1.0232232809066772, |
|
"rewards/rejected": 0.027854669839143753, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 35.13451219232732, |
|
"learning_rate": 9.814586436738997e-07, |
|
"logits/chosen": -2.457035779953003, |
|
"logits/rejected": -2.448774576187134, |
|
"logps/chosen": -257.907470703125, |
|
"logps/rejected": -218.4612579345703, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.1478369235992432, |
|
"rewards/margins": 1.3836512565612793, |
|
"rewards/rejected": -0.23581421375274658, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43613707165109034, |
|
"grad_norm": 41.912297815106236, |
|
"learning_rate": 9.711304610594102e-07, |
|
"logits/chosen": -2.485905408859253, |
|
"logits/rejected": -2.459043025970459, |
|
"logps/chosen": -246.7182159423828, |
|
"logps/rejected": -219.68246459960938, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6787748336791992, |
|
"rewards/margins": 1.0257090330123901, |
|
"rewards/rejected": -0.34693413972854614, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4984423676012461, |
|
"grad_norm": 47.725973440299406, |
|
"learning_rate": 9.586056507527264e-07, |
|
"logits/chosen": -2.4728102684020996, |
|
"logits/rejected": -2.4659764766693115, |
|
"logps/chosen": -242.521728515625, |
|
"logps/rejected": -244.7397003173828, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.7883692979812622, |
|
"rewards/margins": 1.2785449028015137, |
|
"rewards/rejected": -0.4901755452156067, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 36.45457422056591, |
|
"learning_rate": 9.439426092011875e-07, |
|
"logits/chosen": -2.435615301132202, |
|
"logits/rejected": -2.4443929195404053, |
|
"logps/chosen": -280.24755859375, |
|
"logps/rejected": -212.96292114257812, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.2600438594818115, |
|
"rewards/margins": 1.4364185333251953, |
|
"rewards/rejected": -0.17637479305267334, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"grad_norm": 37.12133742483065, |
|
"learning_rate": 9.272097022732443e-07, |
|
"logits/chosen": -2.447756052017212, |
|
"logits/rejected": -2.4266324043273926, |
|
"logps/chosen": -246.95840454101562, |
|
"logps/rejected": -214.29959106445312, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.2870022058486938, |
|
"rewards/margins": 1.596084475517273, |
|
"rewards/rejected": -0.3090822994709015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"eval_logits/chosen": -2.4445273876190186, |
|
"eval_logits/rejected": -2.4265356063842773, |
|
"eval_logps/chosen": -268.5102844238281, |
|
"eval_logps/rejected": -221.56869506835938, |
|
"eval_loss": 0.5847126245498657, |
|
"eval_rewards/accuracies": 0.7465277910232544, |
|
"eval_rewards/chosen": 1.2566064596176147, |
|
"eval_rewards/margins": 1.209053635597229, |
|
"eval_rewards/rejected": 0.04755274951457977, |
|
"eval_runtime": 151.9316, |
|
"eval_samples_per_second": 15.007, |
|
"eval_steps_per_second": 0.237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6853582554517134, |
|
"grad_norm": 40.79262464200207, |
|
"learning_rate": 9.084849465052209e-07, |
|
"logits/chosen": -2.3776774406433105, |
|
"logits/rejected": -2.3956453800201416, |
|
"logps/chosen": -257.2913513183594, |
|
"logps/rejected": -197.3280029296875, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.311511516571045, |
|
"rewards/margins": 1.5678449869155884, |
|
"rewards/rejected": -0.2563334107398987, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 48.34269415680465, |
|
"learning_rate": 8.878556453522099e-07, |
|
"logits/chosen": -2.3521735668182373, |
|
"logits/rejected": -2.3181591033935547, |
|
"logps/chosen": -233.2803497314453, |
|
"logps/rejected": -218.5789031982422, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.7265979647636414, |
|
"rewards/margins": 1.4349805116653442, |
|
"rewards/rejected": -0.7083825469017029, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8099688473520249, |
|
"grad_norm": 37.4927506245011, |
|
"learning_rate": 8.654179821390621e-07, |
|
"logits/chosen": -2.362112522125244, |
|
"logits/rejected": -2.3338117599487305, |
|
"logps/chosen": -267.119140625, |
|
"logps/rejected": -216.8646240234375, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.6950536966323853, |
|
"rewards/margins": 1.5640451908111572, |
|
"rewards/rejected": -0.8689913749694824, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8722741433021807, |
|
"grad_norm": 39.43481962342951, |
|
"learning_rate": 8.41276571609327e-07, |
|
"logits/chosen": -2.3775908946990967, |
|
"logits/rejected": -2.365400552749634, |
|
"logps/chosen": -277.951904296875, |
|
"logps/rejected": -225.5980682373047, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.0310388803482056, |
|
"rewards/margins": 1.5813945531845093, |
|
"rewards/rejected": -0.5503557324409485, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 41.2092804317616, |
|
"learning_rate": 8.155439721630264e-07, |
|
"logits/chosen": -2.337092638015747, |
|
"logits/rejected": -2.31592059135437, |
|
"logps/chosen": -252.63143920898438, |
|
"logps/rejected": -201.25433349609375, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.8362587690353394, |
|
"rewards/margins": 1.4375728368759155, |
|
"rewards/rejected": -0.6013139486312866, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"grad_norm": 32.58419185513781, |
|
"learning_rate": 7.883401610574336e-07, |
|
"logits/chosen": -2.255605459213257, |
|
"logits/rejected": -2.23878812789917, |
|
"logps/chosen": -257.6908264160156, |
|
"logps/rejected": -198.56063842773438, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.8411666750907898, |
|
"rewards/margins": 1.4409376382827759, |
|
"rewards/rejected": -0.5997709035873413, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0591900311526479, |
|
"grad_norm": 19.724965519418905, |
|
"learning_rate": 7.597919750177168e-07, |
|
"logits/chosen": -2.2557170391082764, |
|
"logits/rejected": -2.2346577644348145, |
|
"logps/chosen": -261.6788330078125, |
|
"logps/rejected": -238.0162353515625, |
|
"loss": 0.2444, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.7993049621582031, |
|
"rewards/margins": 3.0407943725585938, |
|
"rewards/rejected": -1.2414895296096802, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 22.70642349887756, |
|
"learning_rate": 7.30032518865576e-07, |
|
"logits/chosen": -2.3579375743865967, |
|
"logits/rejected": -2.339137554168701, |
|
"logps/chosen": -234.2419891357422, |
|
"logps/rejected": -222.1098175048828, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0569443702697754, |
|
"rewards/margins": 3.1539688110351562, |
|
"rewards/rejected": -1.0970245599746704, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1838006230529594, |
|
"grad_norm": 26.513380455541846, |
|
"learning_rate": 6.992005449231207e-07, |
|
"logits/chosen": -2.4564146995544434, |
|
"logits/rejected": -2.480577230453491, |
|
"logps/chosen": -241.68600463867188, |
|
"logps/rejected": -214.6504364013672, |
|
"loss": 0.2597, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.810720682144165, |
|
"rewards/margins": 3.2071242332458496, |
|
"rewards/rejected": -1.3964035511016846, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.2461059190031152, |
|
"grad_norm": 25.112538207360036, |
|
"learning_rate": 6.67439806085493e-07, |
|
"logits/chosen": -2.5166354179382324, |
|
"logits/rejected": -2.513838291168213, |
|
"logps/chosen": -245.987548828125, |
|
"logps/rejected": -252.9072265625, |
|
"loss": 0.2411, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6626945734024048, |
|
"rewards/margins": 3.801987409591675, |
|
"rewards/rejected": -2.1392927169799805, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2461059190031152, |
|
"eval_logits/chosen": -2.5858781337738037, |
|
"eval_logits/rejected": -2.563420534133911, |
|
"eval_logps/chosen": -270.93853759765625, |
|
"eval_logps/rejected": -233.65652465820312, |
|
"eval_loss": 0.5330603718757629, |
|
"eval_rewards/accuracies": 0.7881944179534912, |
|
"eval_rewards/chosen": 1.0137810707092285, |
|
"eval_rewards/margins": 2.17501163482666, |
|
"eval_rewards/rejected": -1.1612308025360107, |
|
"eval_runtime": 151.6106, |
|
"eval_samples_per_second": 15.039, |
|
"eval_steps_per_second": 0.237, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 21.36224069653613, |
|
"learning_rate": 6.348983855785121e-07, |
|
"logits/chosen": -2.5647153854370117, |
|
"logits/rejected": -2.575026273727417, |
|
"logps/chosen": -243.7207489013672, |
|
"logps/rejected": -250.43545532226562, |
|
"loss": 0.2593, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4892621040344238, |
|
"rewards/margins": 3.568880796432495, |
|
"rewards/rejected": -2.0796191692352295, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.3707165109034267, |
|
"grad_norm": 20.24131121354291, |
|
"learning_rate": 6.01728006526317e-07, |
|
"logits/chosen": -2.5996501445770264, |
|
"logits/rejected": -2.613658905029297, |
|
"logps/chosen": -234.7429962158203, |
|
"logps/rejected": -225.66238403320312, |
|
"loss": 0.251, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5861287117004395, |
|
"rewards/margins": 3.3715546131134033, |
|
"rewards/rejected": -1.7854257822036743, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.4330218068535825, |
|
"grad_norm": 20.01846071105953, |
|
"learning_rate": 5.680833245481234e-07, |
|
"logits/chosen": -2.6137535572052, |
|
"logits/rejected": -2.6190667152404785, |
|
"logps/chosen": -240.59274291992188, |
|
"logps/rejected": -214.5608367919922, |
|
"loss": 0.2555, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0559613704681396, |
|
"rewards/margins": 3.704247236251831, |
|
"rewards/rejected": -1.6482856273651123, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 20.827725724325912, |
|
"learning_rate": 5.341212066823355e-07, |
|
"logits/chosen": -2.6061787605285645, |
|
"logits/rejected": -2.5920939445495605, |
|
"logps/chosen": -247.16384887695312, |
|
"logps/rejected": -219.4833526611328, |
|
"loss": 0.2356, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.6824995279312134, |
|
"rewards/margins": 3.495360851287842, |
|
"rewards/rejected": -1.812861442565918, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.557632398753894, |
|
"grad_norm": 23.949899148725493, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.5854973793029785, |
|
"logits/rejected": -2.567333936691284, |
|
"logps/chosen": -265.8291931152344, |
|
"logps/rejected": -234.7073974609375, |
|
"loss": 0.2704, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.8032957315444946, |
|
"rewards/margins": 3.318470001220703, |
|
"rewards/rejected": -1.5151745080947876, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.6199376947040498, |
|
"grad_norm": 24.77516023906911, |
|
"learning_rate": 4.6587879331766457e-07, |
|
"logits/chosen": -2.5356271266937256, |
|
"logits/rejected": -2.5272650718688965, |
|
"logps/chosen": -213.00497436523438, |
|
"logps/rejected": -224.9855194091797, |
|
"loss": 0.2557, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.8242075443267822, |
|
"rewards/margins": 3.5226008892059326, |
|
"rewards/rejected": -1.69839346408844, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 24.357268196093703, |
|
"learning_rate": 4.3191667545187675e-07, |
|
"logits/chosen": -2.485233783721924, |
|
"logits/rejected": -2.4866693019866943, |
|
"logps/chosen": -238.76327514648438, |
|
"logps/rejected": -205.4741973876953, |
|
"loss": 0.3279, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.1047568321228027, |
|
"rewards/margins": 3.5537636280059814, |
|
"rewards/rejected": -1.4490069150924683, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.7445482866043613, |
|
"grad_norm": 28.92490643539179, |
|
"learning_rate": 3.9827199347368317e-07, |
|
"logits/chosen": -2.4208996295928955, |
|
"logits/rejected": -2.425780773162842, |
|
"logps/chosen": -253.2147674560547, |
|
"logps/rejected": -217.5721893310547, |
|
"loss": 0.2906, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.5468943119049072, |
|
"rewards/margins": 3.1680281162261963, |
|
"rewards/rejected": -1.621133804321289, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.8068535825545171, |
|
"grad_norm": 22.981053169106552, |
|
"learning_rate": 3.651016144214878e-07, |
|
"logits/chosen": -2.4239203929901123, |
|
"logits/rejected": -2.3991637229919434, |
|
"logps/chosen": -270.43402099609375, |
|
"logps/rejected": -230.588134765625, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.071660280227661, |
|
"rewards/margins": 3.900665760040283, |
|
"rewards/rejected": -1.829005241394043, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 24.36269176225689, |
|
"learning_rate": 3.325601939145069e-07, |
|
"logits/chosen": -2.3990695476531982, |
|
"logits/rejected": -2.389209270477295, |
|
"logps/chosen": -237.0928955078125, |
|
"logps/rejected": -214.083984375, |
|
"loss": 0.2838, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.1656646728515625, |
|
"rewards/margins": 3.661673069000244, |
|
"rewards/rejected": -1.4960081577301025, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_logits/chosen": -2.4266276359558105, |
|
"eval_logits/rejected": -2.3947973251342773, |
|
"eval_logps/chosen": -268.1986083984375, |
|
"eval_logps/rejected": -233.29901123046875, |
|
"eval_loss": 0.5178025960922241, |
|
"eval_rewards/accuracies": 0.7986111044883728, |
|
"eval_rewards/chosen": 1.287774920463562, |
|
"eval_rewards/margins": 2.413252830505371, |
|
"eval_rewards/rejected": -1.1254781484603882, |
|
"eval_runtime": 151.6475, |
|
"eval_samples_per_second": 15.035, |
|
"eval_steps_per_second": 0.237, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.9314641744548287, |
|
"grad_norm": 22.358701210505725, |
|
"learning_rate": 3.007994550768793e-07, |
|
"logits/chosen": -2.397007703781128, |
|
"logits/rejected": -2.3971073627471924, |
|
"logps/chosen": -242.742919921875, |
|
"logps/rejected": -243.31558227539062, |
|
"loss": 0.3023, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.8354260921478271, |
|
"rewards/margins": 3.713207960128784, |
|
"rewards/rejected": -1.8777821063995361, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.9937694704049844, |
|
"grad_norm": 18.438345971642573, |
|
"learning_rate": 2.699674811344239e-07, |
|
"logits/chosen": -2.359415054321289, |
|
"logits/rejected": -2.3509092330932617, |
|
"logps/chosen": -237.6903076171875, |
|
"logps/rejected": -228.841552734375, |
|
"loss": 0.2334, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.5688589811325073, |
|
"rewards/margins": 3.6319289207458496, |
|
"rewards/rejected": -2.063070297241211, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.05607476635514, |
|
"grad_norm": 15.458418492004268, |
|
"learning_rate": 2.4020802498228334e-07, |
|
"logits/chosen": -2.3363451957702637, |
|
"logits/rejected": -2.3300869464874268, |
|
"logps/chosen": -224.5348663330078, |
|
"logps/rejected": -242.59542846679688, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.179067850112915, |
|
"rewards/margins": 3.411412000656128, |
|
"rewards/rejected": -2.232343912124634, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.1183800623052957, |
|
"grad_norm": 18.095729715855835, |
|
"learning_rate": 2.1165983894256646e-07, |
|
"logits/chosen": -2.354485034942627, |
|
"logits/rejected": -2.313121795654297, |
|
"logps/chosen": -241.8025665283203, |
|
"logps/rejected": -231.444580078125, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.9406883716583252, |
|
"rewards/margins": 3.57853627204895, |
|
"rewards/rejected": -1.637847900390625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.1806853582554515, |
|
"grad_norm": 15.90752453900369, |
|
"learning_rate": 1.8445602783697373e-07, |
|
"logits/chosen": -2.3171322345733643, |
|
"logits/rejected": -2.33518648147583, |
|
"logps/chosen": -234.36575317382812, |
|
"logps/rejected": -217.76229858398438, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.836796522140503, |
|
"rewards/margins": 3.68644380569458, |
|
"rewards/rejected": -1.8496471643447876, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.2429906542056073, |
|
"grad_norm": 13.872989061840107, |
|
"learning_rate": 1.5872342839067304e-07, |
|
"logits/chosen": -2.3331127166748047, |
|
"logits/rejected": -2.3253281116485596, |
|
"logps/chosen": -236.17605590820312, |
|
"logps/rejected": -230.16336059570312, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.0620455741882324, |
|
"rewards/margins": 4.138816833496094, |
|
"rewards/rejected": -2.076770544052124, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.305295950155763, |
|
"grad_norm": 16.862615654071075, |
|
"learning_rate": 1.3458201786093794e-07, |
|
"logits/chosen": -2.3675315380096436, |
|
"logits/rejected": -2.361820697784424, |
|
"logps/chosen": -233.4048614501953, |
|
"logps/rejected": -234.2860870361328, |
|
"loss": 0.1533, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.2529256343841553, |
|
"rewards/margins": 4.307948112487793, |
|
"rewards/rejected": -2.055023193359375, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.367601246105919, |
|
"grad_norm": 18.352780454288954, |
|
"learning_rate": 1.1214435464779003e-07, |
|
"logits/chosen": -2.3591573238372803, |
|
"logits/rejected": -2.358222246170044, |
|
"logps/chosen": -261.05035400390625, |
|
"logps/rejected": -228.7847900390625, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.2873449325561523, |
|
"rewards/margins": 4.070608139038086, |
|
"rewards/rejected": -1.7832629680633545, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.4299065420560746, |
|
"grad_norm": 15.489042019269524, |
|
"learning_rate": 9.1515053494779e-08, |
|
"logits/chosen": -2.3544795513153076, |
|
"logits/rejected": -2.3470654487609863, |
|
"logps/chosen": -261.56707763671875, |
|
"logps/rejected": -247.87741088867188, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.4453256130218506, |
|
"rewards/margins": 4.215699672698975, |
|
"rewards/rejected": -1.7703742980957031, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.4922118380062304, |
|
"grad_norm": 15.905641597447591, |
|
"learning_rate": 7.279029772675571e-08, |
|
"logits/chosen": -2.3422131538391113, |
|
"logits/rejected": -2.34535813331604, |
|
"logps/chosen": -261.5428466796875, |
|
"logps/rejected": -238.24478149414062, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.112720012664795, |
|
"rewards/margins": 4.672650337219238, |
|
"rewards/rejected": -2.5599300861358643, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.4922118380062304, |
|
"eval_logits/chosen": -2.377676486968994, |
|
"eval_logits/rejected": -2.3378918170928955, |
|
"eval_logps/chosen": -267.9961242675781, |
|
"eval_logps/rejected": -234.715087890625, |
|
"eval_loss": 0.532503604888916, |
|
"eval_rewards/accuracies": 0.8090277910232544, |
|
"eval_rewards/chosen": 1.3080248832702637, |
|
"eval_rewards/margins": 2.575108766555786, |
|
"eval_rewards/rejected": -1.267083764076233, |
|
"eval_runtime": 151.6115, |
|
"eval_samples_per_second": 15.038, |
|
"eval_steps_per_second": 0.237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.554517133956386, |
|
"grad_norm": 16.249183319695316, |
|
"learning_rate": 5.605739079881239e-08, |
|
"logits/chosen": -2.3523454666137695, |
|
"logits/rejected": -2.3391835689544678, |
|
"logps/chosen": -247.360107421875, |
|
"logps/rejected": -231.1007843017578, |
|
"loss": 0.1524, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.8860536813735962, |
|
"rewards/margins": 4.352172374725342, |
|
"rewards/rejected": -2.466118574142456, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.616822429906542, |
|
"grad_norm": 15.111045956204817, |
|
"learning_rate": 4.139434924727358e-08, |
|
"logits/chosen": -2.333988666534424, |
|
"logits/rejected": -2.3273518085479736, |
|
"logps/chosen": -238.6435089111328, |
|
"logps/rejected": -244.59951782226562, |
|
"loss": 0.1293, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.8741600513458252, |
|
"rewards/margins": 4.382053375244141, |
|
"rewards/rejected": -2.5078930854797363, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.6791277258566977, |
|
"grad_norm": 20.15600996108675, |
|
"learning_rate": 2.88695389405898e-08, |
|
"logits/chosen": -2.346717357635498, |
|
"logits/rejected": -2.3138866424560547, |
|
"logps/chosen": -252.88015747070312, |
|
"logps/rejected": -246.45449829101562, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.8761266469955444, |
|
"rewards/margins": 4.2409467697143555, |
|
"rewards/rejected": -2.3648200035095215, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.7414330218068534, |
|
"grad_norm": 17.558745126946615, |
|
"learning_rate": 1.8541356326100433e-08, |
|
"logits/chosen": -2.336160898208618, |
|
"logits/rejected": -2.306347370147705, |
|
"logps/chosen": -264.2869567871094, |
|
"logps/rejected": -244.0496826171875, |
|
"loss": 0.1516, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 2.0822863578796387, |
|
"rewards/margins": 4.703708171844482, |
|
"rewards/rejected": -2.6214218139648438, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.803738317757009, |
|
"grad_norm": 11.413996749154455, |
|
"learning_rate": 1.0457956158838544e-08, |
|
"logits/chosen": -2.3174614906311035, |
|
"logits/rejected": -2.295732021331787, |
|
"logps/chosen": -244.0086669921875, |
|
"logps/rejected": -221.67776489257812, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.9246057271957397, |
|
"rewards/margins": 4.322554588317871, |
|
"rewards/rejected": -2.3979482650756836, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.866043613707165, |
|
"grad_norm": 24.637369187556377, |
|
"learning_rate": 4.657026981834622e-09, |
|
"logits/chosen": -2.326481819152832, |
|
"logits/rejected": -2.330221652984619, |
|
"logps/chosen": -233.2654571533203, |
|
"logps/rejected": -257.41412353515625, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.7080739736557007, |
|
"rewards/margins": 4.544651508331299, |
|
"rewards/rejected": -2.8365769386291504, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.9283489096573208, |
|
"grad_norm": 16.185060485352604, |
|
"learning_rate": 1.165615404730369e-09, |
|
"logits/chosen": -2.324538469314575, |
|
"logits/rejected": -2.2891266345977783, |
|
"logps/chosen": -235.43801879882812, |
|
"logps/rejected": -247.25717163085938, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 2.030827522277832, |
|
"rewards/margins": 5.017447471618652, |
|
"rewards/rejected": -2.9866199493408203, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.9906542056074765, |
|
"grad_norm": 14.802366180254003, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.3141098022460938, |
|
"logits/rejected": -2.321915626525879, |
|
"logps/chosen": -221.14602661132812, |
|
"logps/rejected": -229.0386505126953, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.363684058189392, |
|
"rewards/margins": 3.9382190704345703, |
|
"rewards/rejected": -2.5745348930358887, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.9906542056074765, |
|
"step": 240, |
|
"total_flos": 2829829665718272.0, |
|
"train_loss": 0.33247024814287823, |
|
"train_runtime": 8773.941, |
|
"train_samples_per_second": 7.014, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2829829665718272.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|