|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016025641025641025, |
|
"grad_norm": 5.729795932769775, |
|
"learning_rate": 4.25531914893617e-08, |
|
"loss": 1.033, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003205128205128205, |
|
"grad_norm": 9.286614418029785, |
|
"learning_rate": 9.574468085106382e-08, |
|
"loss": 1.0218, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004807692307692308, |
|
"grad_norm": 13.221362113952637, |
|
"learning_rate": 1.4893617021276595e-07, |
|
"loss": 1.065, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00641025641025641, |
|
"grad_norm": 11.769225120544434, |
|
"learning_rate": 2.0212765957446807e-07, |
|
"loss": 1.0354, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008012820512820512, |
|
"grad_norm": 14.483790397644043, |
|
"learning_rate": 2.5531914893617016e-07, |
|
"loss": 1.0337, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009615384615384616, |
|
"grad_norm": 7.531055450439453, |
|
"learning_rate": 3.085106382978723e-07, |
|
"loss": 1.0472, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011217948717948718, |
|
"grad_norm": 6.638299942016602, |
|
"learning_rate": 3.617021276595745e-07, |
|
"loss": 0.9963, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01282051282051282, |
|
"grad_norm": 16.26578140258789, |
|
"learning_rate": 4.148936170212766e-07, |
|
"loss": 1.0217, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014423076923076924, |
|
"grad_norm": 17.74137306213379, |
|
"learning_rate": 4.6808510638297873e-07, |
|
"loss": 1.0535, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.016025641025641024, |
|
"grad_norm": 12.711201667785645, |
|
"learning_rate": 5.212765957446809e-07, |
|
"loss": 0.8812, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017628205128205128, |
|
"grad_norm": 16.832317352294922, |
|
"learning_rate": 5.74468085106383e-07, |
|
"loss": 1.0726, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.019230769230769232, |
|
"grad_norm": 9.954739570617676, |
|
"learning_rate": 6.276595744680851e-07, |
|
"loss": 1.0394, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 15.11540699005127, |
|
"learning_rate": 6.808510638297872e-07, |
|
"loss": 0.928, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.022435897435897436, |
|
"grad_norm": 12.173163414001465, |
|
"learning_rate": 7.340425531914893e-07, |
|
"loss": 1.1422, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02403846153846154, |
|
"grad_norm": 4.052046775817871, |
|
"learning_rate": 7.872340425531915e-07, |
|
"loss": 1.0003, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 8.027369499206543, |
|
"learning_rate": 8.404255319148936e-07, |
|
"loss": 0.8759, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.027243589743589744, |
|
"grad_norm": 12.160286903381348, |
|
"learning_rate": 8.936170212765957e-07, |
|
"loss": 1.1069, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.028846153846153848, |
|
"grad_norm": 10.5565185546875, |
|
"learning_rate": 9.468085106382978e-07, |
|
"loss": 1.0333, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.030448717948717948, |
|
"grad_norm": 7.140777111053467, |
|
"learning_rate": 1e-06, |
|
"loss": 0.9822, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03205128205128205, |
|
"grad_norm": 10.643312454223633, |
|
"learning_rate": 9.983476536682088e-07, |
|
"loss": 0.9494, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03365384615384615, |
|
"grad_norm": 8.473272323608398, |
|
"learning_rate": 9.966953073364177e-07, |
|
"loss": 0.945, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.035256410256410256, |
|
"grad_norm": 4.137317180633545, |
|
"learning_rate": 9.950429610046264e-07, |
|
"loss": 1.0005, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03685897435897436, |
|
"grad_norm": 7.508554458618164, |
|
"learning_rate": 9.933906146728353e-07, |
|
"loss": 0.8444, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 6.77846097946167, |
|
"learning_rate": 9.917382683410441e-07, |
|
"loss": 0.9563, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04006410256410257, |
|
"grad_norm": 9.547110557556152, |
|
"learning_rate": 9.90085922009253e-07, |
|
"loss": 0.9827, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 13.068811416625977, |
|
"learning_rate": 9.884335756774619e-07, |
|
"loss": 0.8061, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04326923076923077, |
|
"grad_norm": 8.728320121765137, |
|
"learning_rate": 9.867812293456708e-07, |
|
"loss": 0.888, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04487179487179487, |
|
"grad_norm": 3.790550470352173, |
|
"learning_rate": 9.851288830138796e-07, |
|
"loss": 0.8734, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.046474358974358976, |
|
"grad_norm": 14.830401420593262, |
|
"learning_rate": 9.834765366820885e-07, |
|
"loss": 0.9757, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04807692307692308, |
|
"grad_norm": 7.318249702453613, |
|
"learning_rate": 9.818241903502974e-07, |
|
"loss": 0.9218, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.049679487179487176, |
|
"grad_norm": 3.7414937019348145, |
|
"learning_rate": 9.801718440185063e-07, |
|
"loss": 0.9611, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 6.792606353759766, |
|
"learning_rate": 9.78519497686715e-07, |
|
"loss": 0.9476, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.052884615384615384, |
|
"grad_norm": 4.267696380615234, |
|
"learning_rate": 9.768671513549238e-07, |
|
"loss": 0.8234, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.05448717948717949, |
|
"grad_norm": 5.2466959953308105, |
|
"learning_rate": 9.752148050231327e-07, |
|
"loss": 0.8669, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05608974358974359, |
|
"grad_norm": 11.836358070373535, |
|
"learning_rate": 9.735624586913416e-07, |
|
"loss": 0.7708, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.057692307692307696, |
|
"grad_norm": 5.974247932434082, |
|
"learning_rate": 9.719101123595505e-07, |
|
"loss": 0.8937, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05929487179487179, |
|
"grad_norm": 3.665184497833252, |
|
"learning_rate": 9.702577660277593e-07, |
|
"loss": 0.875, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.060897435897435896, |
|
"grad_norm": 4.605494022369385, |
|
"learning_rate": 9.686054196959682e-07, |
|
"loss": 0.7801, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 6.7438836097717285, |
|
"learning_rate": 9.66953073364177e-07, |
|
"loss": 0.8834, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 10.840106010437012, |
|
"learning_rate": 9.65300727032386e-07, |
|
"loss": 0.9003, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06570512820512821, |
|
"grad_norm": 5.037222385406494, |
|
"learning_rate": 9.636483807005949e-07, |
|
"loss": 0.7513, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.0673076923076923, |
|
"grad_norm": 3.903541326522827, |
|
"learning_rate": 9.619960343688035e-07, |
|
"loss": 0.8828, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06891025641025642, |
|
"grad_norm": 6.461961269378662, |
|
"learning_rate": 9.603436880370124e-07, |
|
"loss": 0.7606, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.07051282051282051, |
|
"grad_norm": 6.433114528656006, |
|
"learning_rate": 9.586913417052213e-07, |
|
"loss": 0.8829, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07211538461538461, |
|
"grad_norm": 6.478908538818359, |
|
"learning_rate": 9.570389953734302e-07, |
|
"loss": 0.86, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.07371794871794872, |
|
"grad_norm": 5.247589588165283, |
|
"learning_rate": 9.55386649041639e-07, |
|
"loss": 0.8562, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07532051282051282, |
|
"grad_norm": 8.098102569580078, |
|
"learning_rate": 9.53734302709848e-07, |
|
"loss": 0.974, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 10.06252670288086, |
|
"learning_rate": 9.520819563780568e-07, |
|
"loss": 0.9281, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07852564102564102, |
|
"grad_norm": 3.637204885482788, |
|
"learning_rate": 9.504296100462657e-07, |
|
"loss": 0.8829, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.08012820512820513, |
|
"grad_norm": 4.503812313079834, |
|
"learning_rate": 9.487772637144745e-07, |
|
"loss": 0.7286, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08173076923076923, |
|
"grad_norm": 8.717390060424805, |
|
"learning_rate": 9.471249173826834e-07, |
|
"loss": 0.6812, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 6.7273640632629395, |
|
"learning_rate": 9.454725710508922e-07, |
|
"loss": 0.808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08493589743589744, |
|
"grad_norm": 4.702677249908447, |
|
"learning_rate": 9.438202247191011e-07, |
|
"loss": 0.9192, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08653846153846154, |
|
"grad_norm": 4.1625285148620605, |
|
"learning_rate": 9.4216787838731e-07, |
|
"loss": 0.7835, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08814102564102565, |
|
"grad_norm": 5.688870906829834, |
|
"learning_rate": 9.405155320555188e-07, |
|
"loss": 0.8823, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08974358974358974, |
|
"grad_norm": 9.040973663330078, |
|
"learning_rate": 9.388631857237277e-07, |
|
"loss": 0.9733, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09134615384615384, |
|
"grad_norm": 4.173698902130127, |
|
"learning_rate": 9.372108393919365e-07, |
|
"loss": 0.7514, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.09294871794871795, |
|
"grad_norm": 7.822443962097168, |
|
"learning_rate": 9.355584930601454e-07, |
|
"loss": 0.8867, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09455128205128205, |
|
"grad_norm": 8.641590118408203, |
|
"learning_rate": 9.339061467283542e-07, |
|
"loss": 0.833, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 4.389246463775635, |
|
"learning_rate": 9.322538003965631e-07, |
|
"loss": 0.892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09775641025641026, |
|
"grad_norm": 4.615504741668701, |
|
"learning_rate": 9.30601454064772e-07, |
|
"loss": 0.8854, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.09935897435897435, |
|
"grad_norm": 7.86992073059082, |
|
"learning_rate": 9.289491077329808e-07, |
|
"loss": 0.8405, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10096153846153846, |
|
"grad_norm": 7.31835412979126, |
|
"learning_rate": 9.272967614011896e-07, |
|
"loss": 0.8817, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 3.403594970703125, |
|
"learning_rate": 9.256444150693985e-07, |
|
"loss": 0.8149, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 7.1932806968688965, |
|
"learning_rate": 9.239920687376074e-07, |
|
"loss": 0.9139, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.10576923076923077, |
|
"grad_norm": 5.652829170227051, |
|
"learning_rate": 9.223397224058163e-07, |
|
"loss": 0.8335, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10737179487179487, |
|
"grad_norm": 4.582092761993408, |
|
"learning_rate": 9.20687376074025e-07, |
|
"loss": 0.8175, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.10897435897435898, |
|
"grad_norm": 6.207703113555908, |
|
"learning_rate": 9.190350297422339e-07, |
|
"loss": 0.8367, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11057692307692307, |
|
"grad_norm": 7.341710567474365, |
|
"learning_rate": 9.173826834104428e-07, |
|
"loss": 0.838, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.11217948717948718, |
|
"grad_norm": 4.263551712036133, |
|
"learning_rate": 9.157303370786517e-07, |
|
"loss": 0.8722, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11378205128205128, |
|
"grad_norm": 5.9049601554870605, |
|
"learning_rate": 9.140779907468606e-07, |
|
"loss": 0.8158, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 6.010617256164551, |
|
"learning_rate": 9.124256444150693e-07, |
|
"loss": 0.7757, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11698717948717949, |
|
"grad_norm": 5.599278450012207, |
|
"learning_rate": 9.107732980832782e-07, |
|
"loss": 0.8487, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.11858974358974358, |
|
"grad_norm": 6.303196907043457, |
|
"learning_rate": 9.091209517514871e-07, |
|
"loss": 0.8727, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1201923076923077, |
|
"grad_norm": 5.943972110748291, |
|
"learning_rate": 9.07468605419696e-07, |
|
"loss": 0.7266, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.12179487179487179, |
|
"grad_norm": 10.433466911315918, |
|
"learning_rate": 9.058162590879048e-07, |
|
"loss": 0.8264, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1233974358974359, |
|
"grad_norm": 6.700842380523682, |
|
"learning_rate": 9.041639127561136e-07, |
|
"loss": 0.9768, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 10.210798263549805, |
|
"learning_rate": 9.025115664243225e-07, |
|
"loss": 0.8168, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1266025641025641, |
|
"grad_norm": 4.839009761810303, |
|
"learning_rate": 9.008592200925314e-07, |
|
"loss": 0.8856, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 8.077885627746582, |
|
"learning_rate": 8.992068737607403e-07, |
|
"loss": 0.9729, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12980769230769232, |
|
"grad_norm": 8.734336853027344, |
|
"learning_rate": 8.975545274289491e-07, |
|
"loss": 0.9824, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.13141025641025642, |
|
"grad_norm": 5.260401725769043, |
|
"learning_rate": 8.959021810971579e-07, |
|
"loss": 0.8476, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1330128205128205, |
|
"grad_norm": 5.269688129425049, |
|
"learning_rate": 8.942498347653668e-07, |
|
"loss": 0.8591, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.1346153846153846, |
|
"grad_norm": 4.150247097015381, |
|
"learning_rate": 8.925974884335757e-07, |
|
"loss": 0.8461, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1362179487179487, |
|
"grad_norm": 4.139176845550537, |
|
"learning_rate": 8.909451421017845e-07, |
|
"loss": 0.9335, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.13782051282051283, |
|
"grad_norm": 7.5222554206848145, |
|
"learning_rate": 8.892927957699934e-07, |
|
"loss": 0.7143, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13942307692307693, |
|
"grad_norm": 12.695758819580078, |
|
"learning_rate": 8.876404494382022e-07, |
|
"loss": 0.8184, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.14102564102564102, |
|
"grad_norm": 8.057138442993164, |
|
"learning_rate": 8.859881031064111e-07, |
|
"loss": 0.9017, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14262820512820512, |
|
"grad_norm": 8.482138633728027, |
|
"learning_rate": 8.843357567746199e-07, |
|
"loss": 0.9694, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.14423076923076922, |
|
"grad_norm": 12.769122123718262, |
|
"learning_rate": 8.826834104428288e-07, |
|
"loss": 0.8384, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 5.045727252960205, |
|
"learning_rate": 8.810310641110377e-07, |
|
"loss": 0.8156, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.14743589743589744, |
|
"grad_norm": 9.09874153137207, |
|
"learning_rate": 8.793787177792465e-07, |
|
"loss": 0.8116, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14903846153846154, |
|
"grad_norm": 6.691732883453369, |
|
"learning_rate": 8.777263714474553e-07, |
|
"loss": 0.8814, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.15064102564102563, |
|
"grad_norm": 5.676293849945068, |
|
"learning_rate": 8.760740251156642e-07, |
|
"loss": 0.8186, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15224358974358973, |
|
"grad_norm": 8.919610977172852, |
|
"learning_rate": 8.744216787838731e-07, |
|
"loss": 0.7442, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 4.288793087005615, |
|
"learning_rate": 8.72769332452082e-07, |
|
"loss": 0.8538, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15544871794871795, |
|
"grad_norm": 8.457489013671875, |
|
"learning_rate": 8.711169861202908e-07, |
|
"loss": 0.8284, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.15705128205128205, |
|
"grad_norm": 8.613219261169434, |
|
"learning_rate": 8.694646397884996e-07, |
|
"loss": 0.8465, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15865384615384615, |
|
"grad_norm": 5.168330192565918, |
|
"learning_rate": 8.678122934567085e-07, |
|
"loss": 0.854, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.16025641025641027, |
|
"grad_norm": 6.283329010009766, |
|
"learning_rate": 8.661599471249174e-07, |
|
"loss": 0.9902, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16185897435897437, |
|
"grad_norm": 8.224679946899414, |
|
"learning_rate": 8.645076007931263e-07, |
|
"loss": 0.9261, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.16346153846153846, |
|
"grad_norm": 3.9687061309814453, |
|
"learning_rate": 8.62855254461335e-07, |
|
"loss": 0.8671, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.16506410256410256, |
|
"grad_norm": 3.925053358078003, |
|
"learning_rate": 8.612029081295439e-07, |
|
"loss": 0.6827, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 4.103531837463379, |
|
"learning_rate": 8.595505617977528e-07, |
|
"loss": 0.9075, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16826923076923078, |
|
"grad_norm": 4.411681175231934, |
|
"learning_rate": 8.578982154659617e-07, |
|
"loss": 0.7698, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.16987179487179488, |
|
"grad_norm": 8.91723346710205, |
|
"learning_rate": 8.562458691341706e-07, |
|
"loss": 0.9759, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17147435897435898, |
|
"grad_norm": 3.293285846710205, |
|
"learning_rate": 8.545935228023793e-07, |
|
"loss": 0.7131, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.17307692307692307, |
|
"grad_norm": 4.500021934509277, |
|
"learning_rate": 8.529411764705882e-07, |
|
"loss": 0.6866, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17467948717948717, |
|
"grad_norm": 3.7127466201782227, |
|
"learning_rate": 8.512888301387971e-07, |
|
"loss": 0.7112, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.1762820512820513, |
|
"grad_norm": 5.5667877197265625, |
|
"learning_rate": 8.49636483807006e-07, |
|
"loss": 0.7516, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1778846153846154, |
|
"grad_norm": 4.206048965454102, |
|
"learning_rate": 8.479841374752148e-07, |
|
"loss": 0.8639, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.1794871794871795, |
|
"grad_norm": 3.593855857849121, |
|
"learning_rate": 8.463317911434236e-07, |
|
"loss": 0.7413, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.18108974358974358, |
|
"grad_norm": 9.683537483215332, |
|
"learning_rate": 8.446794448116325e-07, |
|
"loss": 0.9477, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.18269230769230768, |
|
"grad_norm": 5.113137245178223, |
|
"learning_rate": 8.430270984798414e-07, |
|
"loss": 0.8425, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1842948717948718, |
|
"grad_norm": 10.013446807861328, |
|
"learning_rate": 8.413747521480502e-07, |
|
"loss": 0.9511, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.1858974358974359, |
|
"grad_norm": 7.936026573181152, |
|
"learning_rate": 8.397224058162591e-07, |
|
"loss": 0.8367, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 4.949577331542969, |
|
"learning_rate": 8.38070059484468e-07, |
|
"loss": 0.7833, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.1891025641025641, |
|
"grad_norm": 5.491623878479004, |
|
"learning_rate": 8.364177131526768e-07, |
|
"loss": 0.7967, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1907051282051282, |
|
"grad_norm": 9.594220161437988, |
|
"learning_rate": 8.347653668208857e-07, |
|
"loss": 0.8505, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 6.291924476623535, |
|
"learning_rate": 8.331130204890945e-07, |
|
"loss": 0.7231, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.19391025641025642, |
|
"grad_norm": 5.185746192932129, |
|
"learning_rate": 8.314606741573034e-07, |
|
"loss": 0.8033, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.1955128205128205, |
|
"grad_norm": 9.937252044677734, |
|
"learning_rate": 8.298083278255123e-07, |
|
"loss": 0.8159, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1971153846153846, |
|
"grad_norm": 3.5764591693878174, |
|
"learning_rate": 8.281559814937211e-07, |
|
"loss": 0.9405, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.1987179487179487, |
|
"grad_norm": 4.1528496742248535, |
|
"learning_rate": 8.265036351619299e-07, |
|
"loss": 0.7852, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.20032051282051283, |
|
"grad_norm": 4.072427272796631, |
|
"learning_rate": 8.248512888301388e-07, |
|
"loss": 0.7844, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.20192307692307693, |
|
"grad_norm": 8.563277244567871, |
|
"learning_rate": 8.231989424983477e-07, |
|
"loss": 0.8309, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.20352564102564102, |
|
"grad_norm": 6.037329196929932, |
|
"learning_rate": 8.215465961665566e-07, |
|
"loss": 0.782, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 5.000993728637695, |
|
"learning_rate": 8.198942498347653e-07, |
|
"loss": 0.9419, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.20673076923076922, |
|
"grad_norm": 4.175522327423096, |
|
"learning_rate": 8.182419035029742e-07, |
|
"loss": 0.8316, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 5.075506210327148, |
|
"learning_rate": 8.165895571711831e-07, |
|
"loss": 0.8471, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20993589743589744, |
|
"grad_norm": 5.188806533813477, |
|
"learning_rate": 8.14937210839392e-07, |
|
"loss": 0.8379, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.21153846153846154, |
|
"grad_norm": 6.2080078125, |
|
"learning_rate": 8.132848645076009e-07, |
|
"loss": 0.9081, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21314102564102563, |
|
"grad_norm": 4.525467395782471, |
|
"learning_rate": 8.116325181758096e-07, |
|
"loss": 0.8066, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.21474358974358973, |
|
"grad_norm": 5.5678582191467285, |
|
"learning_rate": 8.099801718440185e-07, |
|
"loss": 0.7192, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21634615384615385, |
|
"grad_norm": 6.47728157043457, |
|
"learning_rate": 8.083278255122274e-07, |
|
"loss": 0.7436, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.21794871794871795, |
|
"grad_norm": 4.739030838012695, |
|
"learning_rate": 8.066754791804363e-07, |
|
"loss": 0.8783, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.21955128205128205, |
|
"grad_norm": 6.747486591339111, |
|
"learning_rate": 8.050231328486451e-07, |
|
"loss": 0.8484, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.22115384615384615, |
|
"grad_norm": 6.090416431427002, |
|
"learning_rate": 8.033707865168539e-07, |
|
"loss": 0.8766, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.22275641025641027, |
|
"grad_norm": 5.005781650543213, |
|
"learning_rate": 8.017184401850628e-07, |
|
"loss": 0.8299, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.22435897435897437, |
|
"grad_norm": 5.198122024536133, |
|
"learning_rate": 8.000660938532717e-07, |
|
"loss": 0.8107, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.22596153846153846, |
|
"grad_norm": 5.170607089996338, |
|
"learning_rate": 7.984137475214805e-07, |
|
"loss": 0.8751, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.22756410256410256, |
|
"grad_norm": 4.371824741363525, |
|
"learning_rate": 7.967614011896894e-07, |
|
"loss": 0.8545, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 7.1865363121032715, |
|
"learning_rate": 7.951090548578981e-07, |
|
"loss": 0.8519, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 11.179749488830566, |
|
"learning_rate": 7.93456708526107e-07, |
|
"loss": 0.7942, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.23237179487179488, |
|
"grad_norm": 8.086874008178711, |
|
"learning_rate": 7.91804362194316e-07, |
|
"loss": 0.8385, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.23397435897435898, |
|
"grad_norm": 5.28953218460083, |
|
"learning_rate": 7.901520158625248e-07, |
|
"loss": 0.9464, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.23557692307692307, |
|
"grad_norm": 5.9961018562316895, |
|
"learning_rate": 7.884996695307337e-07, |
|
"loss": 0.917, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.23717948717948717, |
|
"grad_norm": 6.03367805480957, |
|
"learning_rate": 7.868473231989424e-07, |
|
"loss": 0.7771, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2387820512820513, |
|
"grad_norm": 4.500458717346191, |
|
"learning_rate": 7.851949768671513e-07, |
|
"loss": 0.7903, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.2403846153846154, |
|
"grad_norm": 3.947294235229492, |
|
"learning_rate": 7.835426305353601e-07, |
|
"loss": 0.795, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2419871794871795, |
|
"grad_norm": 7.3017683029174805, |
|
"learning_rate": 7.81890284203569e-07, |
|
"loss": 0.8138, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.24358974358974358, |
|
"grad_norm": 3.787949562072754, |
|
"learning_rate": 7.802379378717779e-07, |
|
"loss": 0.6665, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.24519230769230768, |
|
"grad_norm": 5.326612949371338, |
|
"learning_rate": 7.785855915399867e-07, |
|
"loss": 0.9742, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.2467948717948718, |
|
"grad_norm": 6.92157506942749, |
|
"learning_rate": 7.769332452081955e-07, |
|
"loss": 0.841, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2483974358974359, |
|
"grad_norm": 4.417288780212402, |
|
"learning_rate": 7.752808988764044e-07, |
|
"loss": 0.9047, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.6038155555725098, |
|
"learning_rate": 7.736285525446133e-07, |
|
"loss": 0.7922, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2516025641025641, |
|
"grad_norm": 4.835304260253906, |
|
"learning_rate": 7.719762062128222e-07, |
|
"loss": 0.8349, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.2532051282051282, |
|
"grad_norm": 3.1939454078674316, |
|
"learning_rate": 7.703238598810309e-07, |
|
"loss": 0.7257, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2548076923076923, |
|
"grad_norm": 8.088797569274902, |
|
"learning_rate": 7.686715135492398e-07, |
|
"loss": 0.8311, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 7.198094367980957, |
|
"learning_rate": 7.670191672174487e-07, |
|
"loss": 0.8427, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.25801282051282054, |
|
"grad_norm": 5.080805778503418, |
|
"learning_rate": 7.653668208856576e-07, |
|
"loss": 0.7725, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.25961538461538464, |
|
"grad_norm": 3.3601558208465576, |
|
"learning_rate": 7.637144745538665e-07, |
|
"loss": 0.859, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.26121794871794873, |
|
"grad_norm": 6.839197158813477, |
|
"learning_rate": 7.620621282220752e-07, |
|
"loss": 0.8956, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.26282051282051283, |
|
"grad_norm": 4.368642807006836, |
|
"learning_rate": 7.604097818902841e-07, |
|
"loss": 0.9344, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2644230769230769, |
|
"grad_norm": 4.079487323760986, |
|
"learning_rate": 7.58757435558493e-07, |
|
"loss": 0.7743, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.266025641025641, |
|
"grad_norm": 7.400752544403076, |
|
"learning_rate": 7.571050892267019e-07, |
|
"loss": 0.8653, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2676282051282051, |
|
"grad_norm": 6.021170616149902, |
|
"learning_rate": 7.554527428949107e-07, |
|
"loss": 0.929, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.2692307692307692, |
|
"grad_norm": 7.803846836090088, |
|
"learning_rate": 7.538003965631195e-07, |
|
"loss": 0.7471, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 11.89211654663086, |
|
"learning_rate": 7.521480502313284e-07, |
|
"loss": 0.7173, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2724358974358974, |
|
"grad_norm": 9.066969871520996, |
|
"learning_rate": 7.504957038995373e-07, |
|
"loss": 0.9117, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27403846153846156, |
|
"grad_norm": 5.939947128295898, |
|
"learning_rate": 7.488433575677461e-07, |
|
"loss": 0.7001, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.27564102564102566, |
|
"grad_norm": 4.300017356872559, |
|
"learning_rate": 7.47191011235955e-07, |
|
"loss": 0.9133, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.27724358974358976, |
|
"grad_norm": 3.9818003177642822, |
|
"learning_rate": 7.455386649041638e-07, |
|
"loss": 0.8436, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.27884615384615385, |
|
"grad_norm": 6.319674968719482, |
|
"learning_rate": 7.438863185723727e-07, |
|
"loss": 0.8385, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.28044871794871795, |
|
"grad_norm": 7.230429172515869, |
|
"learning_rate": 7.422339722405816e-07, |
|
"loss": 0.8215, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.28205128205128205, |
|
"grad_norm": 3.5045459270477295, |
|
"learning_rate": 7.405816259087904e-07, |
|
"loss": 0.7351, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.28365384615384615, |
|
"grad_norm": 5.423972129821777, |
|
"learning_rate": 7.389292795769993e-07, |
|
"loss": 0.7768, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.28525641025641024, |
|
"grad_norm": 9.424778938293457, |
|
"learning_rate": 7.372769332452081e-07, |
|
"loss": 0.903, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.28685897435897434, |
|
"grad_norm": 4.601898670196533, |
|
"learning_rate": 7.35624586913417e-07, |
|
"loss": 0.8104, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 5.262858867645264, |
|
"learning_rate": 7.339722405816258e-07, |
|
"loss": 0.8147, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2900641025641026, |
|
"grad_norm": 4.327410697937012, |
|
"learning_rate": 7.323198942498347e-07, |
|
"loss": 0.7795, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 5.896692752838135, |
|
"learning_rate": 7.306675479180436e-07, |
|
"loss": 0.7502, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2932692307692308, |
|
"grad_norm": 4.993595600128174, |
|
"learning_rate": 7.290152015862524e-07, |
|
"loss": 0.9855, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.2948717948717949, |
|
"grad_norm": 7.250411510467529, |
|
"learning_rate": 7.273628552544612e-07, |
|
"loss": 0.6526, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.296474358974359, |
|
"grad_norm": 5.891010761260986, |
|
"learning_rate": 7.257105089226701e-07, |
|
"loss": 0.8565, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.2980769230769231, |
|
"grad_norm": 3.8717401027679443, |
|
"learning_rate": 7.24058162590879e-07, |
|
"loss": 0.9124, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.29967948717948717, |
|
"grad_norm": 5.1769537925720215, |
|
"learning_rate": 7.224058162590879e-07, |
|
"loss": 0.6785, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.30128205128205127, |
|
"grad_norm": 4.895565986633301, |
|
"learning_rate": 7.207534699272967e-07, |
|
"loss": 0.763, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.30288461538461536, |
|
"grad_norm": 7.584598541259766, |
|
"learning_rate": 7.191011235955055e-07, |
|
"loss": 0.8159, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.30448717948717946, |
|
"grad_norm": 2.980520009994507, |
|
"learning_rate": 7.174487772637144e-07, |
|
"loss": 0.8258, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3060897435897436, |
|
"grad_norm": 5.3033528327941895, |
|
"learning_rate": 7.157964309319233e-07, |
|
"loss": 0.9091, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 5.652465343475342, |
|
"learning_rate": 7.141440846001322e-07, |
|
"loss": 0.7601, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3092948717948718, |
|
"grad_norm": 2.7802562713623047, |
|
"learning_rate": 7.124917382683409e-07, |
|
"loss": 0.6819, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.3108974358974359, |
|
"grad_norm": 10.060710906982422, |
|
"learning_rate": 7.108393919365498e-07, |
|
"loss": 0.9308, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 8.689105033874512, |
|
"learning_rate": 7.091870456047587e-07, |
|
"loss": 0.7976, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3141025641025641, |
|
"grad_norm": 7.55824613571167, |
|
"learning_rate": 7.075346992729676e-07, |
|
"loss": 0.7857, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3157051282051282, |
|
"grad_norm": 4.5640034675598145, |
|
"learning_rate": 7.058823529411765e-07, |
|
"loss": 0.8621, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.3173076923076923, |
|
"grad_norm": 7.791897773742676, |
|
"learning_rate": 7.042300066093852e-07, |
|
"loss": 0.8943, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3189102564102564, |
|
"grad_norm": 4.598413944244385, |
|
"learning_rate": 7.025776602775941e-07, |
|
"loss": 0.8254, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 6.27009391784668, |
|
"learning_rate": 7.00925313945803e-07, |
|
"loss": 0.8624, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32211538461538464, |
|
"grad_norm": 13.144405364990234, |
|
"learning_rate": 6.992729676140119e-07, |
|
"loss": 0.9121, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.32371794871794873, |
|
"grad_norm": 3.142514944076538, |
|
"learning_rate": 6.976206212822207e-07, |
|
"loss": 0.8858, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.32532051282051283, |
|
"grad_norm": 3.696758270263672, |
|
"learning_rate": 6.959682749504295e-07, |
|
"loss": 0.86, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.3269230769230769, |
|
"grad_norm": 11.541287422180176, |
|
"learning_rate": 6.943159286186384e-07, |
|
"loss": 0.6851, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.328525641025641, |
|
"grad_norm": 8.48985481262207, |
|
"learning_rate": 6.926635822868473e-07, |
|
"loss": 0.793, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.3301282051282051, |
|
"grad_norm": 3.3774638175964355, |
|
"learning_rate": 6.910112359550561e-07, |
|
"loss": 0.8819, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3317307692307692, |
|
"grad_norm": 5.883586406707764, |
|
"learning_rate": 6.89358889623265e-07, |
|
"loss": 0.7703, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 4.836696624755859, |
|
"learning_rate": 6.877065432914738e-07, |
|
"loss": 0.7873, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3349358974358974, |
|
"grad_norm": 4.359090805053711, |
|
"learning_rate": 6.860541969596827e-07, |
|
"loss": 0.7078, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.33653846153846156, |
|
"grad_norm": 4.49058723449707, |
|
"learning_rate": 6.844018506278915e-07, |
|
"loss": 0.8212, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.33814102564102566, |
|
"grad_norm": 4.579678535461426, |
|
"learning_rate": 6.827495042961004e-07, |
|
"loss": 0.7983, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.33974358974358976, |
|
"grad_norm": 9.115843772888184, |
|
"learning_rate": 6.810971579643093e-07, |
|
"loss": 0.6587, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.34134615384615385, |
|
"grad_norm": 5.484290599822998, |
|
"learning_rate": 6.794448116325181e-07, |
|
"loss": 0.7931, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.34294871794871795, |
|
"grad_norm": 8.550032615661621, |
|
"learning_rate": 6.77792465300727e-07, |
|
"loss": 0.7788, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.34455128205128205, |
|
"grad_norm": 3.559866428375244, |
|
"learning_rate": 6.761401189689358e-07, |
|
"loss": 0.8319, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.34615384615384615, |
|
"grad_norm": 4.798201560974121, |
|
"learning_rate": 6.744877726371447e-07, |
|
"loss": 0.8986, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.34775641025641024, |
|
"grad_norm": 5.144353866577148, |
|
"learning_rate": 6.728354263053536e-07, |
|
"loss": 0.6799, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.34935897435897434, |
|
"grad_norm": 8.058225631713867, |
|
"learning_rate": 6.711830799735624e-07, |
|
"loss": 0.8101, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.35096153846153844, |
|
"grad_norm": 5.630926132202148, |
|
"learning_rate": 6.695307336417712e-07, |
|
"loss": 0.8777, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.3525641025641026, |
|
"grad_norm": 6.345671653747559, |
|
"learning_rate": 6.678783873099801e-07, |
|
"loss": 0.6524, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3541666666666667, |
|
"grad_norm": 11.713841438293457, |
|
"learning_rate": 6.66226040978189e-07, |
|
"loss": 0.8206, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.3557692307692308, |
|
"grad_norm": 4.345118999481201, |
|
"learning_rate": 6.645736946463979e-07, |
|
"loss": 0.8694, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3573717948717949, |
|
"grad_norm": 6.495255947113037, |
|
"learning_rate": 6.629213483146066e-07, |
|
"loss": 0.8721, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 4.048442840576172, |
|
"learning_rate": 6.612690019828155e-07, |
|
"loss": 0.8307, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3605769230769231, |
|
"grad_norm": 5.020994186401367, |
|
"learning_rate": 6.596166556510244e-07, |
|
"loss": 0.7022, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.36217948717948717, |
|
"grad_norm": 4.994934558868408, |
|
"learning_rate": 6.579643093192333e-07, |
|
"loss": 0.8302, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.36378205128205127, |
|
"grad_norm": 3.8185691833496094, |
|
"learning_rate": 6.563119629874422e-07, |
|
"loss": 0.702, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.36538461538461536, |
|
"grad_norm": 5.440126895904541, |
|
"learning_rate": 6.546596166556509e-07, |
|
"loss": 0.8589, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.36698717948717946, |
|
"grad_norm": 4.935449600219727, |
|
"learning_rate": 6.530072703238598e-07, |
|
"loss": 0.6958, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.3685897435897436, |
|
"grad_norm": 3.0225484371185303, |
|
"learning_rate": 6.513549239920687e-07, |
|
"loss": 0.7535, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3701923076923077, |
|
"grad_norm": 4.705560684204102, |
|
"learning_rate": 6.497025776602776e-07, |
|
"loss": 0.8748, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.3717948717948718, |
|
"grad_norm": 7.766085147857666, |
|
"learning_rate": 6.480502313284864e-07, |
|
"loss": 0.7895, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3733974358974359, |
|
"grad_norm": 7.818696975708008, |
|
"learning_rate": 6.463978849966952e-07, |
|
"loss": 0.8482, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 4.289005279541016, |
|
"learning_rate": 6.447455386649041e-07, |
|
"loss": 0.8875, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3766025641025641, |
|
"grad_norm": 4.8739752769470215, |
|
"learning_rate": 6.43093192333113e-07, |
|
"loss": 0.8013, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.3782051282051282, |
|
"grad_norm": 3.6027133464813232, |
|
"learning_rate": 6.414408460013219e-07, |
|
"loss": 0.892, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3798076923076923, |
|
"grad_norm": 4.740626335144043, |
|
"learning_rate": 6.397884996695307e-07, |
|
"loss": 0.7148, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.3814102564102564, |
|
"grad_norm": 3.2007155418395996, |
|
"learning_rate": 6.381361533377395e-07, |
|
"loss": 0.7979, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.38301282051282054, |
|
"grad_norm": 4.596534729003906, |
|
"learning_rate": 6.364838070059484e-07, |
|
"loss": 0.8757, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 4.348639488220215, |
|
"learning_rate": 6.348314606741573e-07, |
|
"loss": 0.7475, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.38621794871794873, |
|
"grad_norm": 4.388121128082275, |
|
"learning_rate": 6.331791143423661e-07, |
|
"loss": 0.7703, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.38782051282051283, |
|
"grad_norm": 3.776677370071411, |
|
"learning_rate": 6.31526768010575e-07, |
|
"loss": 0.7684, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3894230769230769, |
|
"grad_norm": 4.856482028961182, |
|
"learning_rate": 6.298744216787838e-07, |
|
"loss": 0.8132, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.391025641025641, |
|
"grad_norm": 8.453356742858887, |
|
"learning_rate": 6.282220753469927e-07, |
|
"loss": 0.8172, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3926282051282051, |
|
"grad_norm": 9.322402954101562, |
|
"learning_rate": 6.265697290152015e-07, |
|
"loss": 0.7704, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.3942307692307692, |
|
"grad_norm": 4.034356594085693, |
|
"learning_rate": 6.249173826834104e-07, |
|
"loss": 0.8072, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3958333333333333, |
|
"grad_norm": 6.027692794799805, |
|
"learning_rate": 6.232650363516193e-07, |
|
"loss": 0.6584, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.3974358974358974, |
|
"grad_norm": 3.487473487854004, |
|
"learning_rate": 6.216126900198281e-07, |
|
"loss": 0.8855, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.39903846153846156, |
|
"grad_norm": 3.8283722400665283, |
|
"learning_rate": 6.19960343688037e-07, |
|
"loss": 0.8043, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.40064102564102566, |
|
"grad_norm": 4.080909252166748, |
|
"learning_rate": 6.183079973562458e-07, |
|
"loss": 0.7189, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.40224358974358976, |
|
"grad_norm": 10.283230781555176, |
|
"learning_rate": 6.166556510244547e-07, |
|
"loss": 0.6539, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.40384615384615385, |
|
"grad_norm": 3.7215747833251953, |
|
"learning_rate": 6.150033046926636e-07, |
|
"loss": 0.8382, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.40544871794871795, |
|
"grad_norm": 6.174632549285889, |
|
"learning_rate": 6.133509583608724e-07, |
|
"loss": 0.6854, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.40705128205128205, |
|
"grad_norm": 5.546038627624512, |
|
"learning_rate": 6.116986120290812e-07, |
|
"loss": 0.633, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.40865384615384615, |
|
"grad_norm": 4.6828813552856445, |
|
"learning_rate": 6.100462656972901e-07, |
|
"loss": 0.864, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 5.444061279296875, |
|
"learning_rate": 6.08393919365499e-07, |
|
"loss": 0.8923, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.41185897435897434, |
|
"grad_norm": 9.895957946777344, |
|
"learning_rate": 6.067415730337079e-07, |
|
"loss": 0.91, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.41346153846153844, |
|
"grad_norm": 3.8047962188720703, |
|
"learning_rate": 6.050892267019166e-07, |
|
"loss": 0.82, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4150641025641026, |
|
"grad_norm": 5.19016695022583, |
|
"learning_rate": 6.034368803701255e-07, |
|
"loss": 0.8092, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 3.637864112854004, |
|
"learning_rate": 6.017845340383344e-07, |
|
"loss": 0.8826, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4182692307692308, |
|
"grad_norm": 2.6663596630096436, |
|
"learning_rate": 6.001321877065433e-07, |
|
"loss": 0.6852, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.4198717948717949, |
|
"grad_norm": 3.58880615234375, |
|
"learning_rate": 5.984798413747522e-07, |
|
"loss": 0.8336, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.421474358974359, |
|
"grad_norm": 2.4447970390319824, |
|
"learning_rate": 5.968274950429609e-07, |
|
"loss": 0.7603, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.4230769230769231, |
|
"grad_norm": 4.405289649963379, |
|
"learning_rate": 5.951751487111698e-07, |
|
"loss": 0.7799, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.42467948717948717, |
|
"grad_norm": 4.415432929992676, |
|
"learning_rate": 5.935228023793787e-07, |
|
"loss": 0.7517, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.42628205128205127, |
|
"grad_norm": 2.538200616836548, |
|
"learning_rate": 5.918704560475876e-07, |
|
"loss": 0.8617, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.42788461538461536, |
|
"grad_norm": 5.295281887054443, |
|
"learning_rate": 5.902181097157964e-07, |
|
"loss": 0.8335, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.42948717948717946, |
|
"grad_norm": 10.389196395874023, |
|
"learning_rate": 5.885657633840052e-07, |
|
"loss": 0.8529, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4310897435897436, |
|
"grad_norm": 4.9335713386535645, |
|
"learning_rate": 5.869134170522141e-07, |
|
"loss": 0.8276, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.4326923076923077, |
|
"grad_norm": 3.577237367630005, |
|
"learning_rate": 5.85261070720423e-07, |
|
"loss": 0.7883, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4342948717948718, |
|
"grad_norm": 3.355888605117798, |
|
"learning_rate": 5.836087243886318e-07, |
|
"loss": 0.7451, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.4358974358974359, |
|
"grad_norm": 4.57732629776001, |
|
"learning_rate": 5.819563780568407e-07, |
|
"loss": 0.8938, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 5.530088901519775, |
|
"learning_rate": 5.803040317250495e-07, |
|
"loss": 0.7397, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.4391025641025641, |
|
"grad_norm": 3.3376779556274414, |
|
"learning_rate": 5.786516853932584e-07, |
|
"loss": 0.7578, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4407051282051282, |
|
"grad_norm": 3.726835012435913, |
|
"learning_rate": 5.769993390614673e-07, |
|
"loss": 0.787, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.4423076923076923, |
|
"grad_norm": 4.771599769592285, |
|
"learning_rate": 5.753469927296761e-07, |
|
"loss": 0.7629, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4439102564102564, |
|
"grad_norm": 4.213784217834473, |
|
"learning_rate": 5.73694646397885e-07, |
|
"loss": 0.7171, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.44551282051282054, |
|
"grad_norm": 5.608395576477051, |
|
"learning_rate": 5.720423000660938e-07, |
|
"loss": 0.6273, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.44711538461538464, |
|
"grad_norm": 5.424095153808594, |
|
"learning_rate": 5.703899537343027e-07, |
|
"loss": 0.8648, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.44871794871794873, |
|
"grad_norm": 4.200117111206055, |
|
"learning_rate": 5.687376074025115e-07, |
|
"loss": 0.7668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.45032051282051283, |
|
"grad_norm": 4.810688495635986, |
|
"learning_rate": 5.670852610707204e-07, |
|
"loss": 0.8691, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.4519230769230769, |
|
"grad_norm": 5.285038948059082, |
|
"learning_rate": 5.654329147389293e-07, |
|
"loss": 0.8094, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.453525641025641, |
|
"grad_norm": 2.9930169582366943, |
|
"learning_rate": 5.637805684071381e-07, |
|
"loss": 0.7669, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.4551282051282051, |
|
"grad_norm": 3.244771718978882, |
|
"learning_rate": 5.621282220753469e-07, |
|
"loss": 0.7827, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4567307692307692, |
|
"grad_norm": 3.3853907585144043, |
|
"learning_rate": 5.604758757435558e-07, |
|
"loss": 0.7306, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.4583333333333333, |
|
"grad_norm": 3.7965517044067383, |
|
"learning_rate": 5.588235294117647e-07, |
|
"loss": 0.6429, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4599358974358974, |
|
"grad_norm": 4.235316753387451, |
|
"learning_rate": 5.571711830799736e-07, |
|
"loss": 0.8967, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 6.711025238037109, |
|
"learning_rate": 5.555188367481823e-07, |
|
"loss": 0.8444, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.46314102564102566, |
|
"grad_norm": 4.23643684387207, |
|
"learning_rate": 5.538664904163912e-07, |
|
"loss": 0.7664, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.46474358974358976, |
|
"grad_norm": 4.946862697601318, |
|
"learning_rate": 5.522141440846001e-07, |
|
"loss": 0.7987, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.46634615384615385, |
|
"grad_norm": 5.770292282104492, |
|
"learning_rate": 5.50561797752809e-07, |
|
"loss": 0.7844, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.46794871794871795, |
|
"grad_norm": 11.45702838897705, |
|
"learning_rate": 5.489094514210179e-07, |
|
"loss": 0.8573, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.46955128205128205, |
|
"grad_norm": 4.409577369689941, |
|
"learning_rate": 5.472571050892266e-07, |
|
"loss": 0.8122, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.47115384615384615, |
|
"grad_norm": 8.126829147338867, |
|
"learning_rate": 5.456047587574355e-07, |
|
"loss": 0.7616, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.47275641025641024, |
|
"grad_norm": 10.366379737854004, |
|
"learning_rate": 5.439524124256444e-07, |
|
"loss": 0.6492, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.47435897435897434, |
|
"grad_norm": 5.814599514007568, |
|
"learning_rate": 5.423000660938533e-07, |
|
"loss": 0.8335, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.47596153846153844, |
|
"grad_norm": 3.6713919639587402, |
|
"learning_rate": 5.406477197620621e-07, |
|
"loss": 0.7175, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.4775641025641026, |
|
"grad_norm": 4.473592758178711, |
|
"learning_rate": 5.389953734302709e-07, |
|
"loss": 0.7772, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4791666666666667, |
|
"grad_norm": 5.191585540771484, |
|
"learning_rate": 5.373430270984798e-07, |
|
"loss": 0.8085, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 4.686864376068115, |
|
"learning_rate": 5.356906807666887e-07, |
|
"loss": 0.7285, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4823717948717949, |
|
"grad_norm": 6.236685276031494, |
|
"learning_rate": 5.340383344348976e-07, |
|
"loss": 0.8491, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.483974358974359, |
|
"grad_norm": 5.375248908996582, |
|
"learning_rate": 5.323859881031064e-07, |
|
"loss": 0.8212, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4855769230769231, |
|
"grad_norm": 3.439789295196533, |
|
"learning_rate": 5.307336417713153e-07, |
|
"loss": 0.7619, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.48717948717948717, |
|
"grad_norm": 4.730751991271973, |
|
"learning_rate": 5.290812954395241e-07, |
|
"loss": 0.8577, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.48878205128205127, |
|
"grad_norm": 3.463454484939575, |
|
"learning_rate": 5.27428949107733e-07, |
|
"loss": 0.8779, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.49038461538461536, |
|
"grad_norm": 3.2680091857910156, |
|
"learning_rate": 5.257766027759418e-07, |
|
"loss": 0.7626, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.49198717948717946, |
|
"grad_norm": 4.192795753479004, |
|
"learning_rate": 5.241242564441507e-07, |
|
"loss": 0.9215, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.4935897435897436, |
|
"grad_norm": 8.984251976013184, |
|
"learning_rate": 5.224719101123596e-07, |
|
"loss": 0.7347, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4951923076923077, |
|
"grad_norm": 5.889853477478027, |
|
"learning_rate": 5.208195637805684e-07, |
|
"loss": 0.8716, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.4967948717948718, |
|
"grad_norm": 6.937811851501465, |
|
"learning_rate": 5.191672174487772e-07, |
|
"loss": 0.778, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4983974358974359, |
|
"grad_norm": 5.315396308898926, |
|
"learning_rate": 5.175148711169861e-07, |
|
"loss": 0.8273, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.183327674865723, |
|
"learning_rate": 5.15862524785195e-07, |
|
"loss": 0.8231, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5016025641025641, |
|
"grad_norm": 4.254622459411621, |
|
"learning_rate": 5.142101784534039e-07, |
|
"loss": 0.6407, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.5032051282051282, |
|
"grad_norm": 4.547656059265137, |
|
"learning_rate": 5.125578321216127e-07, |
|
"loss": 0.7148, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5048076923076923, |
|
"grad_norm": 5.993008613586426, |
|
"learning_rate": 5.109054857898215e-07, |
|
"loss": 0.7727, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.5064102564102564, |
|
"grad_norm": 3.685878276824951, |
|
"learning_rate": 5.092531394580304e-07, |
|
"loss": 0.8638, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5080128205128205, |
|
"grad_norm": 4.194368839263916, |
|
"learning_rate": 5.076007931262393e-07, |
|
"loss": 0.819, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.5096153846153846, |
|
"grad_norm": 3.141991376876831, |
|
"learning_rate": 5.059484467944482e-07, |
|
"loss": 0.7831, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5112179487179487, |
|
"grad_norm": 5.693704605102539, |
|
"learning_rate": 5.042961004626569e-07, |
|
"loss": 0.761, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 2.8469674587249756, |
|
"learning_rate": 5.026437541308658e-07, |
|
"loss": 0.7729, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5144230769230769, |
|
"grad_norm": 7.078847885131836, |
|
"learning_rate": 5.009914077990747e-07, |
|
"loss": 0.7102, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.5160256410256411, |
|
"grad_norm": 4.168100357055664, |
|
"learning_rate": 4.993390614672835e-07, |
|
"loss": 0.6727, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5176282051282052, |
|
"grad_norm": 3.5356192588806152, |
|
"learning_rate": 4.976867151354923e-07, |
|
"loss": 0.8312, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.5192307692307693, |
|
"grad_norm": 3.7321722507476807, |
|
"learning_rate": 4.960343688037012e-07, |
|
"loss": 0.8265, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 4.614173889160156, |
|
"learning_rate": 4.943820224719101e-07, |
|
"loss": 0.7464, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5224358974358975, |
|
"grad_norm": 4.419942378997803, |
|
"learning_rate": 4.92729676140119e-07, |
|
"loss": 0.7683, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5240384615384616, |
|
"grad_norm": 3.572216510772705, |
|
"learning_rate": 4.910773298083277e-07, |
|
"loss": 0.8283, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.5256410256410257, |
|
"grad_norm": 3.31060528755188, |
|
"learning_rate": 4.894249834765366e-07, |
|
"loss": 0.8539, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5272435897435898, |
|
"grad_norm": 6.509139060974121, |
|
"learning_rate": 4.877726371447455e-07, |
|
"loss": 0.6647, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.5288461538461539, |
|
"grad_norm": 4.024603843688965, |
|
"learning_rate": 4.861202908129544e-07, |
|
"loss": 0.8066, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.530448717948718, |
|
"grad_norm": 3.655712604522705, |
|
"learning_rate": 4.844679444811633e-07, |
|
"loss": 0.7693, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.532051282051282, |
|
"grad_norm": 3.420959949493408, |
|
"learning_rate": 4.82815598149372e-07, |
|
"loss": 0.7867, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5336538461538461, |
|
"grad_norm": 4.068134307861328, |
|
"learning_rate": 4.811632518175809e-07, |
|
"loss": 0.8251, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.5352564102564102, |
|
"grad_norm": 4.351796627044678, |
|
"learning_rate": 4.795109054857898e-07, |
|
"loss": 0.7103, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5368589743589743, |
|
"grad_norm": 5.838902950286865, |
|
"learning_rate": 4.778585591539987e-07, |
|
"loss": 0.835, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 4.233332633972168, |
|
"learning_rate": 4.762062128222075e-07, |
|
"loss": 0.8817, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5400641025641025, |
|
"grad_norm": 4.291604042053223, |
|
"learning_rate": 4.745538664904163e-07, |
|
"loss": 0.6882, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 3.780442714691162, |
|
"learning_rate": 4.729015201586252e-07, |
|
"loss": 0.6711, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5432692307692307, |
|
"grad_norm": 3.0913641452789307, |
|
"learning_rate": 4.712491738268341e-07, |
|
"loss": 0.8828, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.5448717948717948, |
|
"grad_norm": 2.7122883796691895, |
|
"learning_rate": 4.695968274950429e-07, |
|
"loss": 0.6597, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5464743589743589, |
|
"grad_norm": 3.7509615421295166, |
|
"learning_rate": 4.679444811632518e-07, |
|
"loss": 0.8139, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.5480769230769231, |
|
"grad_norm": 4.429205417633057, |
|
"learning_rate": 4.662921348314606e-07, |
|
"loss": 0.8188, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5496794871794872, |
|
"grad_norm": 6.677905559539795, |
|
"learning_rate": 4.646397884996695e-07, |
|
"loss": 0.689, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.5512820512820513, |
|
"grad_norm": 10.779183387756348, |
|
"learning_rate": 4.6298744216787836e-07, |
|
"loss": 0.7602, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5528846153846154, |
|
"grad_norm": 5.142210006713867, |
|
"learning_rate": 4.613350958360872e-07, |
|
"loss": 0.809, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.5544871794871795, |
|
"grad_norm": 4.238507270812988, |
|
"learning_rate": 4.5968274950429606e-07, |
|
"loss": 0.7306, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5560897435897436, |
|
"grad_norm": 4.379620552062988, |
|
"learning_rate": 4.580304031725049e-07, |
|
"loss": 0.8622, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.5576923076923077, |
|
"grad_norm": 5.106377124786377, |
|
"learning_rate": 4.5637805684071377e-07, |
|
"loss": 0.8234, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5592948717948718, |
|
"grad_norm": 4.431070327758789, |
|
"learning_rate": 4.5472571050892265e-07, |
|
"loss": 0.9141, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.5608974358974359, |
|
"grad_norm": 3.911802053451538, |
|
"learning_rate": 4.5307336417713147e-07, |
|
"loss": 0.783, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 5.008035182952881, |
|
"learning_rate": 4.5142101784534035e-07, |
|
"loss": 0.8401, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 8.659884452819824, |
|
"learning_rate": 4.4976867151354923e-07, |
|
"loss": 0.6399, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5657051282051282, |
|
"grad_norm": 3.6218109130859375, |
|
"learning_rate": 4.4811632518175805e-07, |
|
"loss": 0.8017, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.5673076923076923, |
|
"grad_norm": 8.017809867858887, |
|
"learning_rate": 4.4646397884996693e-07, |
|
"loss": 0.9154, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5689102564102564, |
|
"grad_norm": 4.872199535369873, |
|
"learning_rate": 4.4481163251817576e-07, |
|
"loss": 0.8139, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.5705128205128205, |
|
"grad_norm": 3.4777655601501465, |
|
"learning_rate": 4.4315928618638463e-07, |
|
"loss": 0.7942, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5721153846153846, |
|
"grad_norm": 4.423591613769531, |
|
"learning_rate": 4.415069398545935e-07, |
|
"loss": 0.6299, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.5737179487179487, |
|
"grad_norm": 10.330599784851074, |
|
"learning_rate": 4.3985459352280234e-07, |
|
"loss": 0.849, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5753205128205128, |
|
"grad_norm": 3.7085251808166504, |
|
"learning_rate": 4.382022471910112e-07, |
|
"loss": 0.7756, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 4.038546562194824, |
|
"learning_rate": 4.3654990085922004e-07, |
|
"loss": 0.9406, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5785256410256411, |
|
"grad_norm": 5.6599440574646, |
|
"learning_rate": 4.348975545274289e-07, |
|
"loss": 0.6654, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.5801282051282052, |
|
"grad_norm": 5.489417552947998, |
|
"learning_rate": 4.332452081956378e-07, |
|
"loss": 0.8556, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5817307692307693, |
|
"grad_norm": 7.606975078582764, |
|
"learning_rate": 4.315928618638466e-07, |
|
"loss": 0.8567, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 6.262397766113281, |
|
"learning_rate": 4.299405155320555e-07, |
|
"loss": 0.8868, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5849358974358975, |
|
"grad_norm": 8.082782745361328, |
|
"learning_rate": 4.282881692002643e-07, |
|
"loss": 0.7651, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.5865384615384616, |
|
"grad_norm": 7.61177921295166, |
|
"learning_rate": 4.266358228684732e-07, |
|
"loss": 0.8139, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5881410256410257, |
|
"grad_norm": 3.503220796585083, |
|
"learning_rate": 4.249834765366821e-07, |
|
"loss": 0.6752, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.5897435897435898, |
|
"grad_norm": 6.636229038238525, |
|
"learning_rate": 4.233311302048909e-07, |
|
"loss": 0.8225, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5913461538461539, |
|
"grad_norm": 45.19087219238281, |
|
"learning_rate": 4.216787838730998e-07, |
|
"loss": 0.8639, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.592948717948718, |
|
"grad_norm": 12.816862106323242, |
|
"learning_rate": 4.200264375413086e-07, |
|
"loss": 0.9769, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.594551282051282, |
|
"grad_norm": 5.171041011810303, |
|
"learning_rate": 4.183740912095175e-07, |
|
"loss": 0.6907, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.5961538461538461, |
|
"grad_norm": 3.1392245292663574, |
|
"learning_rate": 4.1672174487772637e-07, |
|
"loss": 0.7235, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5977564102564102, |
|
"grad_norm": 3.557652473449707, |
|
"learning_rate": 4.150693985459352e-07, |
|
"loss": 0.7241, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.5993589743589743, |
|
"grad_norm": 3.4919662475585938, |
|
"learning_rate": 4.1341705221414407e-07, |
|
"loss": 0.7947, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6009615384615384, |
|
"grad_norm": 7.577988624572754, |
|
"learning_rate": 4.117647058823529e-07, |
|
"loss": 0.7991, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.6025641025641025, |
|
"grad_norm": 6.581418514251709, |
|
"learning_rate": 4.1011235955056177e-07, |
|
"loss": 0.7415, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6041666666666666, |
|
"grad_norm": 5.872368335723877, |
|
"learning_rate": 4.0846001321877065e-07, |
|
"loss": 0.8145, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.6057692307692307, |
|
"grad_norm": 5.491688251495361, |
|
"learning_rate": 4.068076668869795e-07, |
|
"loss": 0.888, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6073717948717948, |
|
"grad_norm": 6.849071025848389, |
|
"learning_rate": 4.0515532055518835e-07, |
|
"loss": 0.6781, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.6089743589743589, |
|
"grad_norm": 3.5489501953125, |
|
"learning_rate": 4.035029742233972e-07, |
|
"loss": 0.7944, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6105769230769231, |
|
"grad_norm": 9.167459487915039, |
|
"learning_rate": 4.0185062789160606e-07, |
|
"loss": 0.7331, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.6121794871794872, |
|
"grad_norm": 2.9380276203155518, |
|
"learning_rate": 4.0019828155981494e-07, |
|
"loss": 0.7066, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6137820512820513, |
|
"grad_norm": 3.069446325302124, |
|
"learning_rate": 3.9854593522802376e-07, |
|
"loss": 0.8597, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 4.881730079650879, |
|
"learning_rate": 3.9689358889623264e-07, |
|
"loss": 0.8679, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6169871794871795, |
|
"grad_norm": 7.921117305755615, |
|
"learning_rate": 3.9524124256444146e-07, |
|
"loss": 0.8195, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.6185897435897436, |
|
"grad_norm": 7.495361328125, |
|
"learning_rate": 3.9358889623265034e-07, |
|
"loss": 0.7521, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6201923076923077, |
|
"grad_norm": 3.289283037185669, |
|
"learning_rate": 3.919365499008592e-07, |
|
"loss": 0.779, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.6217948717948718, |
|
"grad_norm": 4.523643970489502, |
|
"learning_rate": 3.9028420356906805e-07, |
|
"loss": 0.722, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6233974358974359, |
|
"grad_norm": 4.16140079498291, |
|
"learning_rate": 3.886318572372769e-07, |
|
"loss": 0.7385, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.602611541748047, |
|
"learning_rate": 3.8697951090548575e-07, |
|
"loss": 0.7818, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6266025641025641, |
|
"grad_norm": 5.022205352783203, |
|
"learning_rate": 3.8532716457369463e-07, |
|
"loss": 0.7388, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.6282051282051282, |
|
"grad_norm": 4.107226371765137, |
|
"learning_rate": 3.836748182419035e-07, |
|
"loss": 0.8531, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6298076923076923, |
|
"grad_norm": 3.9306111335754395, |
|
"learning_rate": 3.8202247191011233e-07, |
|
"loss": 0.8112, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.6314102564102564, |
|
"grad_norm": 3.1901676654815674, |
|
"learning_rate": 3.803701255783212e-07, |
|
"loss": 0.7661, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6330128205128205, |
|
"grad_norm": 5.7795820236206055, |
|
"learning_rate": 3.7871777924653003e-07, |
|
"loss": 0.7822, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.6346153846153846, |
|
"grad_norm": 4.990657806396484, |
|
"learning_rate": 3.770654329147389e-07, |
|
"loss": 0.6767, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6362179487179487, |
|
"grad_norm": 3.1682956218719482, |
|
"learning_rate": 3.754130865829478e-07, |
|
"loss": 0.7403, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.6378205128205128, |
|
"grad_norm": 8.12835693359375, |
|
"learning_rate": 3.737607402511566e-07, |
|
"loss": 0.8295, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6394230769230769, |
|
"grad_norm": 6.958061218261719, |
|
"learning_rate": 3.721083939193655e-07, |
|
"loss": 0.7853, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 9.980351448059082, |
|
"learning_rate": 3.704560475875743e-07, |
|
"loss": 0.8413, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6426282051282052, |
|
"grad_norm": 5.591805934906006, |
|
"learning_rate": 3.688037012557832e-07, |
|
"loss": 0.8321, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.6442307692307693, |
|
"grad_norm": 4.056339263916016, |
|
"learning_rate": 3.671513549239921e-07, |
|
"loss": 0.7223, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6458333333333334, |
|
"grad_norm": 4.585841655731201, |
|
"learning_rate": 3.654990085922009e-07, |
|
"loss": 0.8602, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.6474358974358975, |
|
"grad_norm": 14.423575401306152, |
|
"learning_rate": 3.638466622604098e-07, |
|
"loss": 0.8337, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6490384615384616, |
|
"grad_norm": 17.55698013305664, |
|
"learning_rate": 3.621943159286186e-07, |
|
"loss": 0.8524, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.6506410256410257, |
|
"grad_norm": 8.060038566589355, |
|
"learning_rate": 3.605419695968275e-07, |
|
"loss": 0.7047, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6522435897435898, |
|
"grad_norm": 3.0732924938201904, |
|
"learning_rate": 3.5888962326503636e-07, |
|
"loss": 0.8203, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.6538461538461539, |
|
"grad_norm": 6.2294020652771, |
|
"learning_rate": 3.572372769332452e-07, |
|
"loss": 0.8524, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.655448717948718, |
|
"grad_norm": 5.603904724121094, |
|
"learning_rate": 3.5558493060145406e-07, |
|
"loss": 0.6366, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.657051282051282, |
|
"grad_norm": 3.684701442718506, |
|
"learning_rate": 3.539325842696629e-07, |
|
"loss": 0.7765, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6586538461538461, |
|
"grad_norm": 6.113523483276367, |
|
"learning_rate": 3.5228023793787177e-07, |
|
"loss": 0.6858, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.6602564102564102, |
|
"grad_norm": 5.9543280601501465, |
|
"learning_rate": 3.5062789160608064e-07, |
|
"loss": 0.8639, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6618589743589743, |
|
"grad_norm": 2.5266408920288086, |
|
"learning_rate": 3.4897554527428947e-07, |
|
"loss": 0.9136, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.6634615384615384, |
|
"grad_norm": 4.412357807159424, |
|
"learning_rate": 3.4732319894249835e-07, |
|
"loss": 0.8078, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6650641025641025, |
|
"grad_norm": 3.709512948989868, |
|
"learning_rate": 3.4567085261070717e-07, |
|
"loss": 0.8443, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 3.4022634029388428, |
|
"learning_rate": 3.4401850627891605e-07, |
|
"loss": 0.7546, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6682692307692307, |
|
"grad_norm": 5.27069091796875, |
|
"learning_rate": 3.4236615994712493e-07, |
|
"loss": 0.8228, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.6698717948717948, |
|
"grad_norm": 3.136031150817871, |
|
"learning_rate": 3.4071381361533375e-07, |
|
"loss": 0.9051, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6714743589743589, |
|
"grad_norm": 4.431833744049072, |
|
"learning_rate": 3.3906146728354263e-07, |
|
"loss": 0.8802, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 4.416879653930664, |
|
"learning_rate": 3.3740912095175146e-07, |
|
"loss": 0.7876, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6746794871794872, |
|
"grad_norm": 3.685245990753174, |
|
"learning_rate": 3.3575677461996034e-07, |
|
"loss": 0.744, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.6762820512820513, |
|
"grad_norm": 4.721916198730469, |
|
"learning_rate": 3.341044282881692e-07, |
|
"loss": 0.7867, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6778846153846154, |
|
"grad_norm": 5.276561260223389, |
|
"learning_rate": 3.3245208195637804e-07, |
|
"loss": 0.8425, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.6794871794871795, |
|
"grad_norm": 6.171300888061523, |
|
"learning_rate": 3.307997356245869e-07, |
|
"loss": 0.742, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6810897435897436, |
|
"grad_norm": 6.1108198165893555, |
|
"learning_rate": 3.2914738929279574e-07, |
|
"loss": 0.7814, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.6826923076923077, |
|
"grad_norm": 5.54103946685791, |
|
"learning_rate": 3.274950429610046e-07, |
|
"loss": 0.8899, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6842948717948718, |
|
"grad_norm": 5.242672443389893, |
|
"learning_rate": 3.258426966292135e-07, |
|
"loss": 0.8232, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.6858974358974359, |
|
"grad_norm": 11.092650413513184, |
|
"learning_rate": 3.241903502974223e-07, |
|
"loss": 0.7744, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 3.056320905685425, |
|
"learning_rate": 3.225380039656312e-07, |
|
"loss": 0.6096, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.6891025641025641, |
|
"grad_norm": 4.238087177276611, |
|
"learning_rate": 3.2088565763384003e-07, |
|
"loss": 0.7236, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6907051282051282, |
|
"grad_norm": 3.4259557723999023, |
|
"learning_rate": 3.192333113020489e-07, |
|
"loss": 0.8002, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 3.611785411834717, |
|
"learning_rate": 3.175809649702578e-07, |
|
"loss": 0.7647, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6939102564102564, |
|
"grad_norm": 8.97962760925293, |
|
"learning_rate": 3.159286186384666e-07, |
|
"loss": 0.9061, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.6955128205128205, |
|
"grad_norm": 15.352239608764648, |
|
"learning_rate": 3.142762723066755e-07, |
|
"loss": 0.7211, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6971153846153846, |
|
"grad_norm": 7.31290340423584, |
|
"learning_rate": 3.126239259748843e-07, |
|
"loss": 0.618, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.6987179487179487, |
|
"grad_norm": 4.665528297424316, |
|
"learning_rate": 3.109715796430932e-07, |
|
"loss": 0.8203, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7003205128205128, |
|
"grad_norm": 17.2761287689209, |
|
"learning_rate": 3.0931923331130207e-07, |
|
"loss": 0.7578, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.7019230769230769, |
|
"grad_norm": 9.712289810180664, |
|
"learning_rate": 3.076668869795109e-07, |
|
"loss": 0.7531, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7035256410256411, |
|
"grad_norm": 4.434769630432129, |
|
"learning_rate": 3.0601454064771977e-07, |
|
"loss": 0.7863, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.7051282051282052, |
|
"grad_norm": 3.8715121746063232, |
|
"learning_rate": 3.043621943159286e-07, |
|
"loss": 0.7247, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7067307692307693, |
|
"grad_norm": 3.459235906600952, |
|
"learning_rate": 3.027098479841375e-07, |
|
"loss": 0.7149, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.7083333333333334, |
|
"grad_norm": 5.98268461227417, |
|
"learning_rate": 3.0105750165234635e-07, |
|
"loss": 0.8021, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7099358974358975, |
|
"grad_norm": 6.481480121612549, |
|
"learning_rate": 2.994051553205552e-07, |
|
"loss": 0.8124, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.7115384615384616, |
|
"grad_norm": 5.063220500946045, |
|
"learning_rate": 2.9775280898876406e-07, |
|
"loss": 0.6746, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7131410256410257, |
|
"grad_norm": 5.813882827758789, |
|
"learning_rate": 2.9610046265697293e-07, |
|
"loss": 0.8872, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.7147435897435898, |
|
"grad_norm": 7.330856800079346, |
|
"learning_rate": 2.9444811632518176e-07, |
|
"loss": 0.8496, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7163461538461539, |
|
"grad_norm": 4.500095367431641, |
|
"learning_rate": 2.9279576999339064e-07, |
|
"loss": 0.8594, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.717948717948718, |
|
"grad_norm": 7.6699137687683105, |
|
"learning_rate": 2.9114342366159946e-07, |
|
"loss": 0.7005, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.719551282051282, |
|
"grad_norm": 3.332604169845581, |
|
"learning_rate": 2.8949107732980834e-07, |
|
"loss": 0.8011, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.7211538461538461, |
|
"grad_norm": 7.084466457366943, |
|
"learning_rate": 2.878387309980172e-07, |
|
"loss": 0.7555, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7227564102564102, |
|
"grad_norm": 2.606405258178711, |
|
"learning_rate": 2.8618638466622604e-07, |
|
"loss": 0.8418, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.7243589743589743, |
|
"grad_norm": 5.162625312805176, |
|
"learning_rate": 2.845340383344349e-07, |
|
"loss": 0.7081, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7259615384615384, |
|
"grad_norm": 6.1882758140563965, |
|
"learning_rate": 2.8288169200264375e-07, |
|
"loss": 0.7999, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.7275641025641025, |
|
"grad_norm": 3.4105043411254883, |
|
"learning_rate": 2.812293456708526e-07, |
|
"loss": 0.7354, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 5.230040073394775, |
|
"learning_rate": 2.795769993390615e-07, |
|
"loss": 0.7022, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.7307692307692307, |
|
"grad_norm": 7.303884506225586, |
|
"learning_rate": 2.7792465300727033e-07, |
|
"loss": 0.8529, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7323717948717948, |
|
"grad_norm": 4.611577987670898, |
|
"learning_rate": 2.762723066754792e-07, |
|
"loss": 0.8055, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.7339743589743589, |
|
"grad_norm": 3.8788657188415527, |
|
"learning_rate": 2.7461996034368803e-07, |
|
"loss": 0.7476, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7355769230769231, |
|
"grad_norm": 7.592946529388428, |
|
"learning_rate": 2.729676140118969e-07, |
|
"loss": 0.8468, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.7371794871794872, |
|
"grad_norm": 12.41851806640625, |
|
"learning_rate": 2.713152676801058e-07, |
|
"loss": 0.8057, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7387820512820513, |
|
"grad_norm": 3.9982833862304688, |
|
"learning_rate": 2.6966292134831456e-07, |
|
"loss": 0.8323, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.7403846153846154, |
|
"grad_norm": 4.3113813400268555, |
|
"learning_rate": 2.6801057501652344e-07, |
|
"loss": 0.7667, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7419871794871795, |
|
"grad_norm": 6.139361381530762, |
|
"learning_rate": 2.6635822868473226e-07, |
|
"loss": 0.7687, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.7435897435897436, |
|
"grad_norm": 13.496137619018555, |
|
"learning_rate": 2.6470588235294114e-07, |
|
"loss": 0.7224, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7451923076923077, |
|
"grad_norm": 7.981110095977783, |
|
"learning_rate": 2.6305353602115e-07, |
|
"loss": 0.8216, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.7467948717948718, |
|
"grad_norm": 6.703426361083984, |
|
"learning_rate": 2.6140118968935885e-07, |
|
"loss": 0.8239, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7483974358974359, |
|
"grad_norm": 3.3382091522216797, |
|
"learning_rate": 2.597488433575677e-07, |
|
"loss": 0.8151, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.277767181396484, |
|
"learning_rate": 2.5809649702577655e-07, |
|
"loss": 0.778, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7516025641025641, |
|
"grad_norm": 3.5990350246429443, |
|
"learning_rate": 2.5644415069398543e-07, |
|
"loss": 0.7541, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.7532051282051282, |
|
"grad_norm": 4.577154159545898, |
|
"learning_rate": 2.547918043621943e-07, |
|
"loss": 0.7555, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7548076923076923, |
|
"grad_norm": 4.374950885772705, |
|
"learning_rate": 2.5313945803040313e-07, |
|
"loss": 0.8462, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.7564102564102564, |
|
"grad_norm": 8.507906913757324, |
|
"learning_rate": 2.51487111698612e-07, |
|
"loss": 0.7591, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7580128205128205, |
|
"grad_norm": 4.493144512176514, |
|
"learning_rate": 2.498347653668209e-07, |
|
"loss": 0.7708, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.7596153846153846, |
|
"grad_norm": 4.824530124664307, |
|
"learning_rate": 2.481824190350297e-07, |
|
"loss": 0.7318, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7612179487179487, |
|
"grad_norm": 4.022371292114258, |
|
"learning_rate": 2.465300727032386e-07, |
|
"loss": 0.7551, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.7628205128205128, |
|
"grad_norm": 3.5510129928588867, |
|
"learning_rate": 2.448777263714474e-07, |
|
"loss": 0.628, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7644230769230769, |
|
"grad_norm": 6.095627307891846, |
|
"learning_rate": 2.432253800396563e-07, |
|
"loss": 0.725, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.7660256410256411, |
|
"grad_norm": 3.520016670227051, |
|
"learning_rate": 2.4157303370786517e-07, |
|
"loss": 0.7484, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7676282051282052, |
|
"grad_norm": 6.656997203826904, |
|
"learning_rate": 2.39920687376074e-07, |
|
"loss": 0.7474, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 4.5073370933532715, |
|
"learning_rate": 2.3826834104428288e-07, |
|
"loss": 0.6534, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7708333333333334, |
|
"grad_norm": 5.180692195892334, |
|
"learning_rate": 2.3661599471249173e-07, |
|
"loss": 0.7398, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.7724358974358975, |
|
"grad_norm": 4.856165885925293, |
|
"learning_rate": 2.349636483807006e-07, |
|
"loss": 0.8658, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7740384615384616, |
|
"grad_norm": 4.942265510559082, |
|
"learning_rate": 2.3331130204890946e-07, |
|
"loss": 0.8106, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.7756410256410257, |
|
"grad_norm": 4.896393775939941, |
|
"learning_rate": 2.316589557171183e-07, |
|
"loss": 0.7782, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7772435897435898, |
|
"grad_norm": 4.911433696746826, |
|
"learning_rate": 2.3000660938532716e-07, |
|
"loss": 0.7034, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.7788461538461539, |
|
"grad_norm": 5.983463287353516, |
|
"learning_rate": 2.28354263053536e-07, |
|
"loss": 0.7062, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.780448717948718, |
|
"grad_norm": 5.0456414222717285, |
|
"learning_rate": 2.267019167217449e-07, |
|
"loss": 0.7615, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.782051282051282, |
|
"grad_norm": 4.779991149902344, |
|
"learning_rate": 2.2504957038995374e-07, |
|
"loss": 0.6795, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7836538461538461, |
|
"grad_norm": 5.053199768066406, |
|
"learning_rate": 2.233972240581626e-07, |
|
"loss": 0.8048, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.7852564102564102, |
|
"grad_norm": 7.191258907318115, |
|
"learning_rate": 2.2174487772637144e-07, |
|
"loss": 0.8043, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7868589743589743, |
|
"grad_norm": 3.500450611114502, |
|
"learning_rate": 2.2009253139458027e-07, |
|
"loss": 0.7147, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.7884615384615384, |
|
"grad_norm": 4.963442325592041, |
|
"learning_rate": 2.1844018506278917e-07, |
|
"loss": 0.7803, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7900641025641025, |
|
"grad_norm": 4.3301777839660645, |
|
"learning_rate": 2.16787838730998e-07, |
|
"loss": 0.7901, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.7916666666666666, |
|
"grad_norm": 4.038059711456299, |
|
"learning_rate": 2.1513549239920685e-07, |
|
"loss": 0.6812, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7932692307692307, |
|
"grad_norm": 5.824253559112549, |
|
"learning_rate": 2.134831460674157e-07, |
|
"loss": 0.7618, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.7948717948717948, |
|
"grad_norm": 5.034027099609375, |
|
"learning_rate": 2.1183079973562455e-07, |
|
"loss": 0.6987, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7964743589743589, |
|
"grad_norm": 4.224520206451416, |
|
"learning_rate": 2.1017845340383343e-07, |
|
"loss": 0.8233, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.7980769230769231, |
|
"grad_norm": 4.304800033569336, |
|
"learning_rate": 2.0852610707204228e-07, |
|
"loss": 0.683, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7996794871794872, |
|
"grad_norm": 6.027079105377197, |
|
"learning_rate": 2.0687376074025114e-07, |
|
"loss": 0.7514, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.8012820512820513, |
|
"grad_norm": 7.2774882316589355, |
|
"learning_rate": 2.0522141440846e-07, |
|
"loss": 0.8308, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8028846153846154, |
|
"grad_norm": 7.033870220184326, |
|
"learning_rate": 2.0356906807666884e-07, |
|
"loss": 0.7758, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.8044871794871795, |
|
"grad_norm": 3.2256860733032227, |
|
"learning_rate": 2.0191672174487772e-07, |
|
"loss": 0.759, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8060897435897436, |
|
"grad_norm": 7.072434425354004, |
|
"learning_rate": 2.0026437541308657e-07, |
|
"loss": 0.7686, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.8076923076923077, |
|
"grad_norm": 3.3247132301330566, |
|
"learning_rate": 1.9861202908129542e-07, |
|
"loss": 0.7644, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8092948717948718, |
|
"grad_norm": 3.6884591579437256, |
|
"learning_rate": 1.9695968274950427e-07, |
|
"loss": 0.7558, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.8108974358974359, |
|
"grad_norm": 5.145435333251953, |
|
"learning_rate": 1.9530733641771312e-07, |
|
"loss": 0.7459, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 4.134402751922607, |
|
"learning_rate": 1.93654990085922e-07, |
|
"loss": 0.8534, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.8141025641025641, |
|
"grad_norm": 3.347599744796753, |
|
"learning_rate": 1.9200264375413085e-07, |
|
"loss": 0.8417, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8157051282051282, |
|
"grad_norm": 3.6410083770751953, |
|
"learning_rate": 1.903502974223397e-07, |
|
"loss": 0.8405, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.8173076923076923, |
|
"grad_norm": 3.344439744949341, |
|
"learning_rate": 1.8869795109054856e-07, |
|
"loss": 0.7426, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8189102564102564, |
|
"grad_norm": 4.314718723297119, |
|
"learning_rate": 1.870456047587574e-07, |
|
"loss": 0.7995, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 6.937241077423096, |
|
"learning_rate": 1.853932584269663e-07, |
|
"loss": 0.8192, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8221153846153846, |
|
"grad_norm": 3.7095561027526855, |
|
"learning_rate": 1.8374091209517514e-07, |
|
"loss": 0.7015, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.8237179487179487, |
|
"grad_norm": 4.655959606170654, |
|
"learning_rate": 1.82088565763384e-07, |
|
"loss": 0.7462, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8253205128205128, |
|
"grad_norm": 5.088621616363525, |
|
"learning_rate": 1.8043621943159284e-07, |
|
"loss": 0.7669, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.8269230769230769, |
|
"grad_norm": 5.979193210601807, |
|
"learning_rate": 1.7878387309980172e-07, |
|
"loss": 0.9233, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8285256410256411, |
|
"grad_norm": 4.107568740844727, |
|
"learning_rate": 1.7713152676801057e-07, |
|
"loss": 0.868, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.8301282051282052, |
|
"grad_norm": 3.6633615493774414, |
|
"learning_rate": 1.7547918043621942e-07, |
|
"loss": 0.7795, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.8317307692307693, |
|
"grad_norm": 6.704728126525879, |
|
"learning_rate": 1.7382683410442828e-07, |
|
"loss": 0.714, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 6.485088348388672, |
|
"learning_rate": 1.7217448777263713e-07, |
|
"loss": 0.6941, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8349358974358975, |
|
"grad_norm": 5.1513566970825195, |
|
"learning_rate": 1.70522141440846e-07, |
|
"loss": 0.7235, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.8365384615384616, |
|
"grad_norm": 6.590970039367676, |
|
"learning_rate": 1.6886979510905486e-07, |
|
"loss": 0.7834, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8381410256410257, |
|
"grad_norm": 3.539618730545044, |
|
"learning_rate": 1.672174487772637e-07, |
|
"loss": 0.7529, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.8397435897435898, |
|
"grad_norm": 5.671098709106445, |
|
"learning_rate": 1.6556510244547256e-07, |
|
"loss": 0.8103, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8413461538461539, |
|
"grad_norm": 4.69738245010376, |
|
"learning_rate": 1.639127561136814e-07, |
|
"loss": 0.8686, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.842948717948718, |
|
"grad_norm": 4.465817451477051, |
|
"learning_rate": 1.622604097818903e-07, |
|
"loss": 0.7111, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.844551282051282, |
|
"grad_norm": 3.2771265506744385, |
|
"learning_rate": 1.6060806345009914e-07, |
|
"loss": 0.7292, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 7.632739067077637, |
|
"learning_rate": 1.58955717118308e-07, |
|
"loss": 0.786, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8477564102564102, |
|
"grad_norm": 4.397324085235596, |
|
"learning_rate": 1.5730337078651685e-07, |
|
"loss": 0.8378, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.8493589743589743, |
|
"grad_norm": 3.7814230918884277, |
|
"learning_rate": 1.556510244547257e-07, |
|
"loss": 0.7088, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8509615384615384, |
|
"grad_norm": 3.752884864807129, |
|
"learning_rate": 1.5399867812293457e-07, |
|
"loss": 0.8107, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.8525641025641025, |
|
"grad_norm": 3.5255517959594727, |
|
"learning_rate": 1.5234633179114343e-07, |
|
"loss": 0.7736, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8541666666666666, |
|
"grad_norm": 3.8665730953216553, |
|
"learning_rate": 1.5069398545935228e-07, |
|
"loss": 0.687, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.8557692307692307, |
|
"grad_norm": 4.789595127105713, |
|
"learning_rate": 1.4904163912756113e-07, |
|
"loss": 0.718, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8573717948717948, |
|
"grad_norm": 3.834465265274048, |
|
"learning_rate": 1.4738929279576998e-07, |
|
"loss": 0.8931, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.8589743589743589, |
|
"grad_norm": 7.070734977722168, |
|
"learning_rate": 1.4573694646397886e-07, |
|
"loss": 0.7071, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8605769230769231, |
|
"grad_norm": 2.9893038272857666, |
|
"learning_rate": 1.440846001321877e-07, |
|
"loss": 0.6394, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.8621794871794872, |
|
"grad_norm": 5.302039623260498, |
|
"learning_rate": 1.4243225380039656e-07, |
|
"loss": 0.7887, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8637820512820513, |
|
"grad_norm": 2.9799692630767822, |
|
"learning_rate": 1.4077990746860541e-07, |
|
"loss": 0.851, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 4.219221115112305, |
|
"learning_rate": 1.3912756113681427e-07, |
|
"loss": 0.7475, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8669871794871795, |
|
"grad_norm": 3.4256138801574707, |
|
"learning_rate": 1.3747521480502314e-07, |
|
"loss": 0.806, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.8685897435897436, |
|
"grad_norm": 5.021873474121094, |
|
"learning_rate": 1.35822868473232e-07, |
|
"loss": 0.7701, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8701923076923077, |
|
"grad_norm": 4.23788595199585, |
|
"learning_rate": 1.3417052214144085e-07, |
|
"loss": 0.7122, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.8717948717948718, |
|
"grad_norm": 4.850051403045654, |
|
"learning_rate": 1.325181758096497e-07, |
|
"loss": 0.6861, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8733974358974359, |
|
"grad_norm": 5.633250713348389, |
|
"learning_rate": 1.3086582947785855e-07, |
|
"loss": 0.7611, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 7.429291248321533, |
|
"learning_rate": 1.2921348314606743e-07, |
|
"loss": 0.6987, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8766025641025641, |
|
"grad_norm": 6.112792491912842, |
|
"learning_rate": 1.2756113681427628e-07, |
|
"loss": 0.7675, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.8782051282051282, |
|
"grad_norm": 9.730607986450195, |
|
"learning_rate": 1.2590879048248513e-07, |
|
"loss": 0.7429, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8798076923076923, |
|
"grad_norm": 3.8280539512634277, |
|
"learning_rate": 1.2425644415069398e-07, |
|
"loss": 0.7058, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.8814102564102564, |
|
"grad_norm": 5.016750812530518, |
|
"learning_rate": 1.2260409781890284e-07, |
|
"loss": 0.6931, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8830128205128205, |
|
"grad_norm": 3.027902603149414, |
|
"learning_rate": 1.209517514871117e-07, |
|
"loss": 0.744, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.8846153846153846, |
|
"grad_norm": 5.9112629890441895, |
|
"learning_rate": 1.1929940515532057e-07, |
|
"loss": 0.7883, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8862179487179487, |
|
"grad_norm": 3.9589760303497314, |
|
"learning_rate": 1.176470588235294e-07, |
|
"loss": 0.6751, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.8878205128205128, |
|
"grad_norm": 12.412994384765625, |
|
"learning_rate": 1.1599471249173827e-07, |
|
"loss": 0.741, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8894230769230769, |
|
"grad_norm": 6.313468933105469, |
|
"learning_rate": 1.1434236615994712e-07, |
|
"loss": 0.7083, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.8910256410256411, |
|
"grad_norm": 3.4576292037963867, |
|
"learning_rate": 1.1269001982815597e-07, |
|
"loss": 0.6078, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8926282051282052, |
|
"grad_norm": 3.770681142807007, |
|
"learning_rate": 1.1103767349636484e-07, |
|
"loss": 0.7976, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.8942307692307693, |
|
"grad_norm": 4.323639392852783, |
|
"learning_rate": 1.0938532716457369e-07, |
|
"loss": 0.726, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8958333333333334, |
|
"grad_norm": 6.223001480102539, |
|
"learning_rate": 1.0773298083278255e-07, |
|
"loss": 0.7428, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 4.867865085601807, |
|
"learning_rate": 1.060806345009914e-07, |
|
"loss": 0.747, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8990384615384616, |
|
"grad_norm": 4.22167444229126, |
|
"learning_rate": 1.0442828816920026e-07, |
|
"loss": 0.7824, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.9006410256410257, |
|
"grad_norm": 3.4794094562530518, |
|
"learning_rate": 1.0277594183740912e-07, |
|
"loss": 0.7904, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.9022435897435898, |
|
"grad_norm": 3.968479633331299, |
|
"learning_rate": 1.0112359550561797e-07, |
|
"loss": 0.8853, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.9038461538461539, |
|
"grad_norm": 3.1891181468963623, |
|
"learning_rate": 9.947124917382684e-08, |
|
"loss": 0.7753, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.905448717948718, |
|
"grad_norm": 4.9156646728515625, |
|
"learning_rate": 9.781890284203569e-08, |
|
"loss": 0.7521, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.907051282051282, |
|
"grad_norm": 4.938701152801514, |
|
"learning_rate": 9.616655651024454e-08, |
|
"loss": 0.7361, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.9086538461538461, |
|
"grad_norm": 4.312582492828369, |
|
"learning_rate": 9.451421017845341e-08, |
|
"loss": 0.7044, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.9102564102564102, |
|
"grad_norm": 7.3174519538879395, |
|
"learning_rate": 9.286186384666226e-08, |
|
"loss": 0.7778, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.9118589743589743, |
|
"grad_norm": 8.664481163024902, |
|
"learning_rate": 9.120951751487112e-08, |
|
"loss": 0.8317, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.9134615384615384, |
|
"grad_norm": 8.050248146057129, |
|
"learning_rate": 8.955717118307998e-08, |
|
"loss": 0.7777, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9150641025641025, |
|
"grad_norm": 6.539444446563721, |
|
"learning_rate": 8.790482485128881e-08, |
|
"loss": 0.8357, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 6.118063449859619, |
|
"learning_rate": 8.625247851949768e-08, |
|
"loss": 0.6746, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9182692307692307, |
|
"grad_norm": 4.888671398162842, |
|
"learning_rate": 8.460013218770653e-08, |
|
"loss": 0.7677, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.9198717948717948, |
|
"grad_norm": 5.636521816253662, |
|
"learning_rate": 8.29477858559154e-08, |
|
"loss": 0.7025, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9214743589743589, |
|
"grad_norm": 3.849520683288574, |
|
"learning_rate": 8.129543952412425e-08, |
|
"loss": 0.7187, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 5.312481880187988, |
|
"learning_rate": 7.964309319233311e-08, |
|
"loss": 0.669, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.9246794871794872, |
|
"grad_norm": 6.7007527351379395, |
|
"learning_rate": 7.799074686054196e-08, |
|
"loss": 0.7571, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.9262820512820513, |
|
"grad_norm": 5.961256980895996, |
|
"learning_rate": 7.633840052875081e-08, |
|
"loss": 0.733, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.9278846153846154, |
|
"grad_norm": 8.099090576171875, |
|
"learning_rate": 7.468605419695968e-08, |
|
"loss": 0.8415, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.9294871794871795, |
|
"grad_norm": 3.7094759941101074, |
|
"learning_rate": 7.303370786516853e-08, |
|
"loss": 0.9158, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9310897435897436, |
|
"grad_norm": 7.212512016296387, |
|
"learning_rate": 7.13813615333774e-08, |
|
"loss": 0.815, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.9326923076923077, |
|
"grad_norm": 5.013028144836426, |
|
"learning_rate": 6.972901520158625e-08, |
|
"loss": 0.7161, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9342948717948718, |
|
"grad_norm": 5.3960041999816895, |
|
"learning_rate": 6.80766688697951e-08, |
|
"loss": 0.7817, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.9358974358974359, |
|
"grad_norm": 3.4956471920013428, |
|
"learning_rate": 6.642432253800396e-08, |
|
"loss": 0.8383, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 3.654330253601074, |
|
"learning_rate": 6.477197620621282e-08, |
|
"loss": 0.8125, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.9391025641025641, |
|
"grad_norm": 6.255533695220947, |
|
"learning_rate": 6.311962987442168e-08, |
|
"loss": 0.7734, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9407051282051282, |
|
"grad_norm": 4.802107810974121, |
|
"learning_rate": 6.146728354263053e-08, |
|
"loss": 0.709, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.9423076923076923, |
|
"grad_norm": 6.442443370819092, |
|
"learning_rate": 5.981493721083938e-08, |
|
"loss": 0.7668, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9439102564102564, |
|
"grad_norm": 3.025623083114624, |
|
"learning_rate": 5.816259087904825e-08, |
|
"loss": 0.7797, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.9455128205128205, |
|
"grad_norm": 4.99326753616333, |
|
"learning_rate": 5.65102445472571e-08, |
|
"loss": 0.7969, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9471153846153846, |
|
"grad_norm": 8.48199462890625, |
|
"learning_rate": 5.485789821546596e-08, |
|
"loss": 0.7861, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.9487179487179487, |
|
"grad_norm": 4.070643901824951, |
|
"learning_rate": 5.320555188367482e-08, |
|
"loss": 0.9045, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9503205128205128, |
|
"grad_norm": 4.508942127227783, |
|
"learning_rate": 5.1553205551883676e-08, |
|
"loss": 0.806, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.9519230769230769, |
|
"grad_norm": 5.224105358123779, |
|
"learning_rate": 4.9900859220092534e-08, |
|
"loss": 0.7537, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9535256410256411, |
|
"grad_norm": 5.267168998718262, |
|
"learning_rate": 4.8248512888301386e-08, |
|
"loss": 0.7458, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.9551282051282052, |
|
"grad_norm": 14.058978080749512, |
|
"learning_rate": 4.659616655651024e-08, |
|
"loss": 0.8491, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.9567307692307693, |
|
"grad_norm": 7.71165657043457, |
|
"learning_rate": 4.4943820224719096e-08, |
|
"loss": 0.7255, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.9583333333333334, |
|
"grad_norm": 3.65620493888855, |
|
"learning_rate": 4.3291473892927954e-08, |
|
"loss": 0.7917, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9599358974358975, |
|
"grad_norm": 11.238397598266602, |
|
"learning_rate": 4.163912756113681e-08, |
|
"loss": 0.7828, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 6.159839630126953, |
|
"learning_rate": 3.998678122934567e-08, |
|
"loss": 0.7724, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9631410256410257, |
|
"grad_norm": 4.247456073760986, |
|
"learning_rate": 3.833443489755452e-08, |
|
"loss": 0.7635, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.9647435897435898, |
|
"grad_norm": 5.236011505126953, |
|
"learning_rate": 3.668208856576338e-08, |
|
"loss": 0.7782, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9663461538461539, |
|
"grad_norm": 4.830688953399658, |
|
"learning_rate": 3.502974223397224e-08, |
|
"loss": 0.7962, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.967948717948718, |
|
"grad_norm": 6.072144508361816, |
|
"learning_rate": 3.33773959021811e-08, |
|
"loss": 0.9383, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.969551282051282, |
|
"grad_norm": 3.7657108306884766, |
|
"learning_rate": 3.1725049570389955e-08, |
|
"loss": 0.9029, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.9711538461538461, |
|
"grad_norm": 5.47902774810791, |
|
"learning_rate": 3.007270323859881e-08, |
|
"loss": 0.8262, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9727564102564102, |
|
"grad_norm": 4.847268104553223, |
|
"learning_rate": 2.8420356906807665e-08, |
|
"loss": 0.8054, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.9743589743589743, |
|
"grad_norm": 6.062643527984619, |
|
"learning_rate": 2.676801057501652e-08, |
|
"loss": 0.7808, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9759615384615384, |
|
"grad_norm": 5.440711498260498, |
|
"learning_rate": 2.511566424322538e-08, |
|
"loss": 0.8026, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.9775641025641025, |
|
"grad_norm": 2.9105708599090576, |
|
"learning_rate": 2.3463317911434237e-08, |
|
"loss": 0.67, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9791666666666666, |
|
"grad_norm": 5.284862518310547, |
|
"learning_rate": 2.1810971579643092e-08, |
|
"loss": 0.7455, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.9807692307692307, |
|
"grad_norm": 3.7022602558135986, |
|
"learning_rate": 2.015862524785195e-08, |
|
"loss": 0.7627, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9823717948717948, |
|
"grad_norm": 7.428618907928467, |
|
"learning_rate": 1.850627891606081e-08, |
|
"loss": 0.7116, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.9839743589743589, |
|
"grad_norm": 6.064960956573486, |
|
"learning_rate": 1.685393258426966e-08, |
|
"loss": 0.8331, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9855769230769231, |
|
"grad_norm": 6.40654182434082, |
|
"learning_rate": 1.520158625247852e-08, |
|
"loss": 0.7827, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.9871794871794872, |
|
"grad_norm": 4.364375114440918, |
|
"learning_rate": 1.3549239920687375e-08, |
|
"loss": 0.8266, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9887820512820513, |
|
"grad_norm": 6.127290725708008, |
|
"learning_rate": 1.1896893588896232e-08, |
|
"loss": 0.7636, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.9903846153846154, |
|
"grad_norm": 2.9324896335601807, |
|
"learning_rate": 1.0244547257105088e-08, |
|
"loss": 0.6256, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9919871794871795, |
|
"grad_norm": 3.2810983657836914, |
|
"learning_rate": 8.592200925313947e-09, |
|
"loss": 0.7826, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.9935897435897436, |
|
"grad_norm": 5.652727127075195, |
|
"learning_rate": 6.939854593522802e-09, |
|
"loss": 0.6604, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9951923076923077, |
|
"grad_norm": 3.927150011062622, |
|
"learning_rate": 5.287508261731658e-09, |
|
"loss": 0.7523, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.9967948717948718, |
|
"grad_norm": 4.154155731201172, |
|
"learning_rate": 3.6351619299405156e-09, |
|
"loss": 0.7649, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9983974358974359, |
|
"grad_norm": 6.479323387145996, |
|
"learning_rate": 1.9828155981493722e-09, |
|
"loss": 0.731, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.997898817062378, |
|
"learning_rate": 3.3046926635822863e-10, |
|
"loss": 0.8839, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3120, |
|
"total_flos": 9.05641071889875e+17, |
|
"train_loss": 0.8058771748573352, |
|
"train_runtime": 7172.3863, |
|
"train_samples_per_second": 6.959, |
|
"train_steps_per_second": 0.435 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.05641071889875e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|