diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,14416 +1,400 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.2774720060732587, - "global_step": 24000, + "epoch": 15.968063872255488, + "global_step": 32000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0, - "learning_rate": 0.00019998102106661607, - "loss": 9.952, - "step": 10 - }, - { - "epoch": 0.0, - "learning_rate": 0.00019996204213323213, - "loss": 8.9779, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 0.00019994306319984817, - "loss": 8.4733, - "step": 30 - }, - { - "epoch": 0.0, - "learning_rate": 0.00019992408426646423, - "loss": 8.3471, - "step": 40 - }, - { - "epoch": 0.0, - "learning_rate": 0.0001999051053330803, - "loss": 8.3499, - "step": 50 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019988612639969635, - "loss": 8.3611, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.0001998671474663124, - "loss": 8.2644, - "step": 70 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019984816853292845, - "loss": 8.2485, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0001998291895995445, - "loss": 8.1744, - "step": 90 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019981021066616057, - "loss": 8.1581, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019979123173277663, - "loss": 8.2968, - "step": 110 - }, - { - "epoch": 0.01, - "learning_rate": 0.0001997722527993927, - "loss": 8.2153, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019975327386600875, - "loss": 8.081, - "step": 130 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019973429493262479, - "loss": 8.1176, - "step": 140 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019971531599924085, - "loss": 8.1398, - "step": 150 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001996963370658569, - "loss": 8.1933, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019967735813247297, - "loss": 8.1381, - "step": 170 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019965837919908903, - "loss": 8.1712, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019963940026570507, - "loss": 8.1477, - "step": 190 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019962042133232113, - "loss": 8.1891, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001996014423989372, - "loss": 8.0788, - "step": 210 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019958246346555325, - "loss": 8.1196, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001995634845321693, - "loss": 8.0288, - "step": 230 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019954450559878534, - "loss": 8.1236, - "step": 240 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001995255266654014, - "loss": 8.0957, - "step": 250 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019950654773201747, - "loss": 7.9825, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019948756879863353, - "loss": 8.1757, - "step": 270 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001994685898652496, - "loss": 8.1677, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019944961093186565, - "loss": 8.0789, - "step": 290 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019943063199848169, - "loss": 8.0367, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019941165306509775, - "loss": 8.1172, - "step": 310 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001993926741317138, - "loss": 8.1456, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019937369519832987, - "loss": 8.0392, - "step": 330 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019935471626494593, - "loss": 8.2246, - "step": 340 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019933573733156196, - "loss": 8.0326, - "step": 350 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019931675839817803, - "loss": 8.092, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001992977794647941, - "loss": 8.0419, - "step": 370 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019927880053141015, - "loss": 8.1022, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001992598215980262, - "loss": 8.0943, - "step": 390 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019924084266464224, - "loss": 8.0907, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001992218637312583, - "loss": 8.0365, - "step": 410 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019920288479787437, - "loss": 7.9948, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019918390586449043, - "loss": 8.033, - "step": 430 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001991649269311065, - "loss": 8.1033, - "step": 440 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019914594799772255, - "loss": 8.1076, - "step": 450 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019912696906433858, - "loss": 8.0816, - "step": 460 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019910799013095465, - "loss": 8.0745, - "step": 470 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001990890111975707, - "loss": 8.1358, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019907003226418677, - "loss": 7.9812, - "step": 490 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019905105333080283, - "loss": 8.0595, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019903207439741886, - "loss": 8.0713, - "step": 510 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019901309546403492, - "loss": 8.1596, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019899411653065099, - "loss": 8.0495, - "step": 530 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019897513759726705, - "loss": 8.0031, - "step": 540 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001989561586638831, - "loss": 8.1362, - "step": 550 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019893717973049914, - "loss": 8.0954, - "step": 560 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001989182007971152, - "loss": 8.1174, - "step": 570 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019889922186373127, - "loss": 8.0747, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019888024293034733, - "loss": 8.0865, - "step": 590 - }, - { - "epoch": 0.06, - "learning_rate": 0.0001988612639969634, - "loss": 8.0401, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019884228506357942, - "loss": 8.0427, - "step": 610 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019882330613019548, - "loss": 7.9791, - "step": 620 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019880432719681154, - "loss": 8.0075, - "step": 630 - }, - { - "epoch": 0.06, - "learning_rate": 0.0001987853482634276, - "loss": 7.999, - "step": 640 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019876636933004367, - "loss": 8.0756, - "step": 650 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019874739039665973, - "loss": 8.0046, - "step": 660 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019872841146327576, - "loss": 7.9885, - "step": 670 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019870943252989182, - "loss": 8.065, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019869045359650789, - "loss": 8.0558, - "step": 690 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019867147466312395, - "loss": 8.085, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019865249572974, - "loss": 8.0773, - "step": 710 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019863351679635604, - "loss": 8.0463, - "step": 720 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001986145378629721, - "loss": 8.0125, - "step": 730 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019859555892958816, - "loss": 8.0906, - "step": 740 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019857657999620423, - "loss": 8.038, - "step": 750 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001985576010628203, - "loss": 8.0052, - "step": 760 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019853862212943632, - "loss": 8.0353, - "step": 770 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019851964319605238, - "loss": 8.0852, - "step": 780 - }, - { - "epoch": 0.07, - "learning_rate": 0.00019850066426266844, - "loss": 8.0317, - "step": 790 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001984816853292845, - "loss": 7.9571, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019846270639590057, - "loss": 8.086, - "step": 810 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019844372746251663, - "loss": 7.9966, - "step": 820 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019842474852913266, - "loss": 8.0486, - "step": 830 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019840576959574872, - "loss": 7.9632, - "step": 840 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019838679066236478, - "loss": 8.0173, - "step": 850 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019836781172898085, - "loss": 7.9798, - "step": 860 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001983488327955969, - "loss": 7.8961, - "step": 870 - }, - { - "epoch": 0.08, - "learning_rate": 0.00019832985386221294, - "loss": 8.1219, - "step": 880 - }, - { - "epoch": 0.08, - "learning_rate": 0.000198310874928829, - "loss": 8.0091, - "step": 890 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019829189599544506, - "loss": 8.0785, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019827291706206113, - "loss": 8.0095, - "step": 910 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019825393812867719, - "loss": 7.9793, - "step": 920 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019823495919529322, - "loss": 8.006, - "step": 930 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019821598026190928, - "loss": 7.969, - "step": 940 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019819700132852534, - "loss": 7.9929, - "step": 950 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001981780223951414, - "loss": 8.1714, - "step": 960 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019815904346175747, - "loss": 8.0096, - "step": 970 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019814006452837353, - "loss": 7.9708, - "step": 980 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019812108559498956, - "loss": 8.0766, - "step": 990 - }, - { - "epoch": 0.09, - "learning_rate": 0.00019810210666160562, - "loss": 7.9056, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019808312772822168, - "loss": 8.0285, - "step": 1010 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019806414879483774, - "loss": 8.0732, - "step": 1020 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001980451698614538, - "loss": 7.8647, - "step": 1030 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019802619092806984, - "loss": 8.0618, - "step": 1040 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001980072119946859, - "loss": 7.8339, - "step": 1050 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019798823306130196, - "loss": 7.9132, - "step": 1060 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019796925412791802, - "loss": 7.913, - "step": 1070 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019795027519453409, - "loss": 7.9198, - "step": 1080 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019793129626115012, - "loss": 8.0008, - "step": 1090 - }, - { - "epoch": 0.1, - "learning_rate": 0.00019791231732776618, - "loss": 8.0339, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019789333839438224, - "loss": 8.0553, - "step": 1110 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001978743594609983, - "loss": 7.9269, - "step": 1120 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019785538052761436, - "loss": 7.9504, - "step": 1130 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001978364015942304, - "loss": 8.0064, - "step": 1140 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019781742266084646, - "loss": 8.1134, - "step": 1150 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019779844372746252, - "loss": 8.0186, - "step": 1160 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019777946479407858, - "loss": 8.02, - "step": 1170 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019776048586069464, - "loss": 8.1027, - "step": 1180 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001977415069273107, - "loss": 7.9113, - "step": 1190 - }, - { - "epoch": 0.11, - "learning_rate": 0.00019772252799392674, - "loss": 7.983, - "step": 1200 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001977035490605428, - "loss": 7.9472, - "step": 1210 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019768457012715886, - "loss": 8.0294, - "step": 1220 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019766559119377492, - "loss": 7.9201, - "step": 1230 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019764661226039098, - "loss": 7.9851, - "step": 1240 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019762763332700702, - "loss": 8.0097, - "step": 1250 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019760865439362308, - "loss": 8.0801, - "step": 1260 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019758967546023914, - "loss": 7.9854, - "step": 1270 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001975706965268552, - "loss": 7.9648, - "step": 1280 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019755171759347126, - "loss": 7.9651, - "step": 1290 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001975327386600873, - "loss": 7.9788, - "step": 1300 - }, - { - "epoch": 0.12, - "learning_rate": 0.00019751375972670336, - "loss": 7.9753, - "step": 1310 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019749478079331942, - "loss": 8.0325, - "step": 1320 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019747580185993548, - "loss": 7.937, - "step": 1330 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019745682292655154, - "loss": 8.0093, - "step": 1340 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001974378439931676, - "loss": 8.0437, - "step": 1350 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019741886505978364, - "loss": 8.0538, - "step": 1360 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001973998861263997, - "loss": 7.9591, - "step": 1370 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019738090719301576, - "loss": 8.0154, - "step": 1380 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019736192825963182, - "loss": 7.9782, - "step": 1390 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019734294932624788, - "loss": 7.9924, - "step": 1400 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019732397039286392, - "loss": 7.9091, - "step": 1410 - }, - { - "epoch": 0.13, - "learning_rate": 0.00019730499145947998, - "loss": 7.9687, - "step": 1420 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019728601252609604, - "loss": 8.0328, - "step": 1430 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001972670335927121, - "loss": 7.8584, - "step": 1440 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019724805465932816, - "loss": 8.0146, - "step": 1450 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001972290757259442, - "loss": 7.8941, - "step": 1460 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019721009679256026, - "loss": 7.9312, - "step": 1470 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019719111785917632, - "loss": 7.9333, - "step": 1480 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019717213892579238, - "loss": 7.9736, - "step": 1490 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019715315999240844, - "loss": 7.9074, - "step": 1500 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001971341810590245, - "loss": 7.9985, - "step": 1510 - }, - { - "epoch": 0.14, - "learning_rate": 0.00019711520212564054, - "loss": 7.9647, - "step": 1520 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001970962231922566, - "loss": 7.9233, - "step": 1530 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019707724425887266, - "loss": 7.9757, - "step": 1540 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019705826532548872, - "loss": 8.0475, - "step": 1550 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019703928639210478, - "loss": 7.974, - "step": 1560 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019702030745872082, - "loss": 8.0162, - "step": 1570 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019700132852533688, - "loss": 7.9094, - "step": 1580 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019698234959195294, - "loss": 7.8877, - "step": 1590 - }, - { - "epoch": 0.15, - "learning_rate": 0.000196963370658569, - "loss": 7.964, - "step": 1600 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019694439172518506, - "loss": 8.012, - "step": 1610 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001969254127918011, - "loss": 8.0087, - "step": 1620 - }, - { - "epoch": 0.15, - "learning_rate": 0.00019690643385841716, - "loss": 8.0649, - "step": 1630 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019688745492503322, - "loss": 7.9777, - "step": 1640 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019686847599164928, - "loss": 7.9384, - "step": 1650 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019684949705826534, - "loss": 7.9967, - "step": 1660 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019683051812488138, - "loss": 7.924, - "step": 1670 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019681153919149744, - "loss": 7.9114, - "step": 1680 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001967925602581135, - "loss": 7.9128, - "step": 1690 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019677358132472956, - "loss": 7.8818, - "step": 1700 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019675460239134562, - "loss": 8.0632, - "step": 1710 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019673562345796168, - "loss": 7.9477, - "step": 1720 - }, - { - "epoch": 0.16, - "learning_rate": 0.00019671664452457772, - "loss": 7.9508, - "step": 1730 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019669766559119378, - "loss": 8.0061, - "step": 1740 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019667868665780984, - "loss": 7.9196, - "step": 1750 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001966597077244259, - "loss": 7.9596, - "step": 1760 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019664072879104196, - "loss": 7.8292, - "step": 1770 - }, - { - "epoch": 0.17, - "learning_rate": 0.000196621749857658, - "loss": 7.9823, - "step": 1780 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019660277092427406, - "loss": 7.9388, - "step": 1790 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019658379199089012, - "loss": 8.0311, - "step": 1800 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019656481305750618, - "loss": 7.9965, - "step": 1810 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019654583412412224, - "loss": 7.92, - "step": 1820 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019652685519073827, - "loss": 7.9755, - "step": 1830 - }, - { - "epoch": 0.17, - "learning_rate": 0.00019650787625735434, - "loss": 7.9663, - "step": 1840 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001964888973239704, - "loss": 7.9034, - "step": 1850 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019646991839058646, - "loss": 7.9657, - "step": 1860 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019645093945720252, - "loss": 8.0662, - "step": 1870 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019643196052381858, - "loss": 8.0137, - "step": 1880 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019641298159043462, - "loss": 7.9988, - "step": 1890 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019639400265705068, - "loss": 7.9998, - "step": 1900 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019637502372366674, - "loss": 7.9599, - "step": 1910 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001963560447902828, - "loss": 7.851, - "step": 1920 - }, - { - "epoch": 0.18, - "learning_rate": 0.00019633706585689886, - "loss": 7.8906, - "step": 1930 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001963180869235149, - "loss": 7.9987, - "step": 1940 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019629910799013096, - "loss": 7.949, - "step": 1950 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019628012905674702, - "loss": 8.0121, - "step": 1960 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019626115012336308, - "loss": 7.9445, - "step": 1970 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019624217118997914, - "loss": 7.9355, - "step": 1980 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019622319225659517, - "loss": 7.9748, - "step": 1990 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019620421332321124, - "loss": 7.9165, - "step": 2000 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001961852343898273, - "loss": 7.9001, - "step": 2010 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019616625545644336, - "loss": 7.9579, - "step": 2020 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019614727652305942, - "loss": 8.0491, - "step": 2030 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019612829758967548, - "loss": 7.9823, - "step": 2040 - }, - { - "epoch": 0.19, - "learning_rate": 0.00019610931865629151, - "loss": 7.9317, - "step": 2050 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019609033972290758, - "loss": 7.981, - "step": 2060 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019607136078952364, - "loss": 7.9837, - "step": 2070 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001960523818561397, - "loss": 7.9299, - "step": 2080 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019603340292275576, - "loss": 7.885, - "step": 2090 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001960144239893718, - "loss": 7.9707, - "step": 2100 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019599544505598785, - "loss": 8.0994, - "step": 2110 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019597646612260392, - "loss": 7.9866, - "step": 2120 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019595748718921998, - "loss": 8.0013, - "step": 2130 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019593850825583604, - "loss": 7.9703, - "step": 2140 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019591952932245207, - "loss": 7.8933, - "step": 2150 - }, - { - "epoch": 0.2, - "learning_rate": 0.00019590055038906813, - "loss": 7.9128, - "step": 2160 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001958815714556842, - "loss": 7.9134, - "step": 2170 - }, - { - "epoch": 0.21, - "learning_rate": 0.00019586259252230026, - "loss": 7.9388, - "step": 2180 - }, - { - "epoch": 0.21, - "learning_rate": 0.00019584361358891632, - "loss": 7.8159, - "step": 2190 - }, - { - "epoch": 0.21, - "learning_rate": 0.00019582463465553235, - "loss": 7.966, - "step": 2200 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001958056557221484, - "loss": 7.9638, - "step": 2210 - }, - { - "epoch": 0.21, - "learning_rate": 0.00019578667678876447, - "loss": 7.9076, - "step": 2220 - }, - { - "epoch": 0.21, - "learning_rate": 0.00019576769785538054, - "loss": 7.8966, - "step": 2230 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001957487189219966, - "loss": 8.0228, - "step": 2240 - }, - { - "epoch": 0.21, - "learning_rate": 0.00019572973998861266, - "loss": 7.963, - "step": 2250 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001957107610552287, - "loss": 7.9361, - "step": 2260 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019569178212184475, - "loss": 8.0444, - "step": 2270 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019567280318846082, - "loss": 7.9489, - "step": 2280 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019565382425507688, - "loss": 8.0002, - "step": 2290 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019563484532169294, - "loss": 7.983, - "step": 2300 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019561586638830897, - "loss": 7.9444, - "step": 2310 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019559688745492503, - "loss": 7.9221, - "step": 2320 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001955779085215411, - "loss": 8.0789, - "step": 2330 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019555892958815716, - "loss": 7.9703, - "step": 2340 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019553995065477322, - "loss": 7.8596, - "step": 2350 - }, - { - "epoch": 0.22, - "learning_rate": 0.00019552097172138925, - "loss": 8.0037, - "step": 2360 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001955019927880053, - "loss": 7.919, - "step": 2370 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019548301385462137, - "loss": 7.9221, - "step": 2380 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019546403492123744, - "loss": 7.9257, - "step": 2390 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001954450559878535, - "loss": 7.8881, - "step": 2400 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019542607705446956, - "loss": 7.9372, - "step": 2410 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001954070981210856, - "loss": 7.9647, - "step": 2420 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019538811918770165, - "loss": 8.0126, - "step": 2430 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019536914025431771, - "loss": 7.9594, - "step": 2440 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019535016132093378, - "loss": 7.917, - "step": 2450 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019533118238754984, - "loss": 8.013, - "step": 2460 - }, - { - "epoch": 0.23, - "learning_rate": 0.00019531220345416587, - "loss": 7.9512, - "step": 2470 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019529322452078193, - "loss": 7.9876, - "step": 2480 - }, - { - "epoch": 0.24, - "learning_rate": 0.000195274245587398, - "loss": 7.9838, - "step": 2490 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019525526665401405, - "loss": 7.9647, - "step": 2500 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019523628772063012, - "loss": 7.8661, - "step": 2510 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019521730878724615, - "loss": 7.9554, - "step": 2520 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001951983298538622, - "loss": 7.9141, - "step": 2530 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019517935092047827, - "loss": 7.939, - "step": 2540 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019516037198709433, - "loss": 8.0098, - "step": 2550 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001951413930537104, - "loss": 7.9881, - "step": 2560 - }, - { - "epoch": 0.24, - "learning_rate": 0.00019512241412032646, - "loss": 7.9937, - "step": 2570 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001951034351869425, - "loss": 7.9844, - "step": 2580 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019508445625355855, - "loss": 8.043, - "step": 2590 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001950654773201746, - "loss": 7.9513, - "step": 2600 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019504649838679067, - "loss": 7.9174, - "step": 2610 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019502751945340674, - "loss": 7.9746, - "step": 2620 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019500854052002277, - "loss": 7.9344, - "step": 2630 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019498956158663883, - "loss": 7.8564, - "step": 2640 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001949705826532549, - "loss": 7.8934, - "step": 2650 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019495160371987095, - "loss": 7.8338, - "step": 2660 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019493262478648702, - "loss": 7.9002, - "step": 2670 - }, - { - "epoch": 0.25, - "learning_rate": 0.00019491364585310305, - "loss": 7.957, - "step": 2680 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001948946669197191, - "loss": 7.8777, - "step": 2690 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019487568798633517, - "loss": 8.0054, - "step": 2700 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019485670905295123, - "loss": 7.9225, - "step": 2710 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001948377301195673, - "loss": 7.9895, - "step": 2720 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019481875118618333, - "loss": 7.9711, - "step": 2730 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001947997722527994, - "loss": 7.9268, - "step": 2740 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019478079331941545, - "loss": 8.0199, - "step": 2750 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001947618143860315, - "loss": 7.9409, - "step": 2760 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019474283545264757, - "loss": 7.7781, - "step": 2770 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019472385651926364, - "loss": 7.787, - "step": 2780 - }, - { - "epoch": 0.26, - "learning_rate": 0.00019470487758587967, - "loss": 8.0509, - "step": 2790 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019468589865249573, - "loss": 7.8997, - "step": 2800 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001946669197191118, - "loss": 8.0071, - "step": 2810 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019464794078572785, - "loss": 7.9269, - "step": 2820 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019462896185234391, - "loss": 7.9551, - "step": 2830 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019460998291895995, - "loss": 7.9637, - "step": 2840 - }, - { - "epoch": 0.27, - "learning_rate": 0.000194591003985576, - "loss": 7.9432, - "step": 2850 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019457202505219207, - "loss": 7.8711, - "step": 2860 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019455304611880813, - "loss": 7.8302, - "step": 2870 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001945340671854242, - "loss": 7.8777, - "step": 2880 - }, - { - "epoch": 0.27, - "learning_rate": 0.00019451508825204023, - "loss": 7.9643, - "step": 2890 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001944961093186563, - "loss": 7.8927, - "step": 2900 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019447713038527235, - "loss": 7.9123, - "step": 2910 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001944581514518884, - "loss": 7.9644, - "step": 2920 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019443917251850447, - "loss": 7.891, - "step": 2930 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019442019358512053, - "loss": 8.0191, - "step": 2940 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019440121465173657, - "loss": 7.8945, - "step": 2950 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019438223571835263, - "loss": 7.9478, - "step": 2960 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001943632567849687, - "loss": 7.9027, - "step": 2970 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019434427785158475, - "loss": 7.8986, - "step": 2980 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019432529891820081, - "loss": 8.0644, - "step": 2990 - }, - { - "epoch": 0.28, - "learning_rate": 0.00019430631998481685, - "loss": 7.9844, - "step": 3000 - }, - { - "epoch": 0.29, - "learning_rate": 0.0001942873410514329, - "loss": 7.9241, - "step": 3010 - }, - { - "epoch": 0.29, - "learning_rate": 0.00019426836211804897, - "loss": 7.8227, - "step": 3020 - }, - { - "epoch": 0.29, - "learning_rate": 0.00019424938318466503, - "loss": 7.8803, - "step": 3030 - }, - { - "epoch": 0.29, - "learning_rate": 0.0001942304042512811, - "loss": 7.9341, - "step": 3040 - }, - { - "epoch": 0.29, - "learning_rate": 0.00019421142531789713, - "loss": 7.927, - "step": 3050 - }, - { - "epoch": 0.29, - "learning_rate": 0.0001941924463845132, - "loss": 7.9502, - "step": 3060 - }, - { - "epoch": 0.29, - "learning_rate": 0.00019417346745112925, - "loss": 7.921, - "step": 3070 - }, - { - "epoch": 0.29, - "learning_rate": 0.0001941544885177453, - "loss": 7.9221, - "step": 3080 - }, - { - "epoch": 0.29, - "learning_rate": 0.00019413550958436137, - "loss": 7.8924, - "step": 3090 - }, - { - "epoch": 0.29, - "learning_rate": 0.00019411653065097743, - "loss": 8.0156, - "step": 3100 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019409755171759347, - "loss": 7.9395, - "step": 3110 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019407857278420953, - "loss": 7.9926, - "step": 3120 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001940595938508256, - "loss": 7.9102, - "step": 3130 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019404061491744165, - "loss": 7.8352, - "step": 3140 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001940216359840577, - "loss": 7.8719, - "step": 3150 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019400265705067375, - "loss": 7.9277, - "step": 3160 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001939836781172898, - "loss": 7.9376, - "step": 3170 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019396469918390587, - "loss": 7.9999, - "step": 3180 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019394572025052193, - "loss": 7.8309, - "step": 3190 - }, - { - "epoch": 0.3, - "learning_rate": 0.000193926741317138, - "loss": 7.8541, - "step": 3200 - }, - { - "epoch": 0.3, - "learning_rate": 0.00019390776238375403, - "loss": 7.9434, - "step": 3210 - }, - { - "epoch": 0.31, - "learning_rate": 0.0001938887834503701, - "loss": 7.9548, - "step": 3220 - }, - { - "epoch": 0.31, - "learning_rate": 0.00019386980451698615, - "loss": 7.9358, - "step": 3230 - }, - { - "epoch": 0.31, - "learning_rate": 0.0001938508255836022, - "loss": 7.9035, - "step": 3240 - }, - { - "epoch": 0.31, - "learning_rate": 0.00019383184665021827, - "loss": 7.934, - "step": 3250 - }, - { - "epoch": 0.31, - "learning_rate": 0.0001938128677168343, - "loss": 7.9353, - "step": 3260 - }, - { - "epoch": 0.31, - "learning_rate": 0.00019379388878345037, - "loss": 7.9028, - "step": 3270 - }, - { - "epoch": 0.31, - "learning_rate": 0.00019377490985006643, - "loss": 7.9257, - "step": 3280 - }, - { - "epoch": 0.31, - "learning_rate": 0.0001937559309166825, - "loss": 7.8557, - "step": 3290 - }, - { - "epoch": 0.31, - "learning_rate": 0.00019373695198329855, - "loss": 7.8651, - "step": 3300 - }, - { - "epoch": 0.31, - "learning_rate": 0.0001937179730499146, - "loss": 7.901, - "step": 3310 - }, - { - "epoch": 0.32, - "learning_rate": 0.00019369899411653065, - "loss": 7.9915, - "step": 3320 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001936800151831467, - "loss": 7.9375, - "step": 3330 - }, - { - "epoch": 0.32, - "learning_rate": 0.00019366103624976277, - "loss": 7.8321, - "step": 3340 - }, - { - "epoch": 0.32, - "learning_rate": 0.00019364205731637883, - "loss": 7.8932, - "step": 3350 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001936230783829949, - "loss": 7.9586, - "step": 3360 - }, - { - "epoch": 0.32, - "learning_rate": 0.00019360409944961093, - "loss": 7.8609, - "step": 3370 - }, - { - "epoch": 0.32, - "learning_rate": 0.000193585120516227, - "loss": 7.9284, - "step": 3380 - }, - { - "epoch": 0.32, - "learning_rate": 0.00019356614158284305, - "loss": 7.877, - "step": 3390 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001935471626494591, - "loss": 7.9125, - "step": 3400 - }, - { - "epoch": 0.32, - "learning_rate": 0.00019352818371607517, - "loss": 7.8638, - "step": 3410 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001935092047826912, - "loss": 7.9896, - "step": 3420 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019349022584930727, - "loss": 8.0264, - "step": 3430 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019347124691592333, - "loss": 7.9667, - "step": 3440 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001934522679825394, - "loss": 7.7931, - "step": 3450 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019343328904915545, - "loss": 8.0166, - "step": 3460 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001934143101157715, - "loss": 7.844, - "step": 3470 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019339533118238755, - "loss": 7.9468, - "step": 3480 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001933763522490036, - "loss": 7.9775, - "step": 3490 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019335737331561967, - "loss": 7.8543, - "step": 3500 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019333839438223573, - "loss": 7.8744, - "step": 3510 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001933194154488518, - "loss": 7.8954, - "step": 3520 - }, - { - "epoch": 0.33, - "learning_rate": 0.00019330043651546782, - "loss": 7.9898, - "step": 3530 - }, - { - "epoch": 0.34, - "learning_rate": 0.00019328145758208389, - "loss": 7.8642, - "step": 3540 - }, - { - "epoch": 0.34, - "learning_rate": 0.00019326247864869995, - "loss": 8.0018, - "step": 3550 - }, - { - "epoch": 0.34, - "learning_rate": 0.000193243499715316, - "loss": 8.0016, - "step": 3560 - }, - { - "epoch": 0.34, - "learning_rate": 0.00019322452078193207, - "loss": 8.0801, - "step": 3570 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001932055418485481, - "loss": 8.0127, - "step": 3580 - }, - { - "epoch": 0.34, - "learning_rate": 0.00019318656291516416, - "loss": 7.8582, - "step": 3590 - }, - { - "epoch": 0.34, - "learning_rate": 0.00019316758398178023, - "loss": 7.9344, - "step": 3600 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001931486050483963, - "loss": 7.953, - "step": 3610 - }, - { - "epoch": 0.34, - "learning_rate": 0.00019312962611501235, - "loss": 7.9068, - "step": 3620 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001931106471816284, - "loss": 7.9032, - "step": 3630 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019309166824824444, - "loss": 7.8135, - "step": 3640 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001930726893148605, - "loss": 7.8799, - "step": 3650 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019305371038147657, - "loss": 7.9762, - "step": 3660 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019303473144809263, - "loss": 7.9839, - "step": 3670 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001930157525147087, - "loss": 7.8639, - "step": 3680 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019299677358132472, - "loss": 8.0189, - "step": 3690 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019297779464794078, - "loss": 7.9332, - "step": 3700 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019295881571455685, - "loss": 8.1368, - "step": 3710 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001929398367811729, - "loss": 7.8899, - "step": 3720 - }, - { - "epoch": 0.35, - "learning_rate": 0.00019292085784778897, - "loss": 7.9733, - "step": 3730 - }, - { - "epoch": 0.35, - "learning_rate": 0.000192901878914405, - "loss": 8.0364, - "step": 3740 - }, - { - "epoch": 0.36, - "learning_rate": 0.00019288289998102106, - "loss": 7.9229, - "step": 3750 - }, - { - "epoch": 0.36, - "learning_rate": 0.00019286392104763713, - "loss": 7.9838, - "step": 3760 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001928449421142532, - "loss": 7.7698, - "step": 3770 - }, - { - "epoch": 0.36, - "learning_rate": 0.00019282596318086925, - "loss": 7.8598, - "step": 3780 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001928069842474853, - "loss": 8.0224, - "step": 3790 - }, - { - "epoch": 0.36, - "learning_rate": 0.00019278800531410134, - "loss": 7.899, - "step": 3800 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001927690263807174, - "loss": 7.8593, - "step": 3810 - }, - { - "epoch": 0.36, - "learning_rate": 0.00019275004744733347, - "loss": 7.9592, - "step": 3820 - }, - { - "epoch": 0.36, - "learning_rate": 0.00019273106851394953, - "loss": 7.9444, - "step": 3830 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001927120895805656, - "loss": 7.9079, - "step": 3840 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019269311064718162, - "loss": 7.9192, - "step": 3850 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019267413171379768, - "loss": 7.8443, - "step": 3860 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019265515278041375, - "loss": 7.7676, - "step": 3870 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001926361738470298, - "loss": 7.9781, - "step": 3880 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019261719491364587, - "loss": 7.9639, - "step": 3890 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001925982159802619, - "loss": 7.9449, - "step": 3900 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019257923704687796, - "loss": 7.9286, - "step": 3910 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019256025811349402, - "loss": 7.9555, - "step": 3920 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019254127918011009, - "loss": 7.8571, - "step": 3930 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019252230024672615, - "loss": 7.9362, - "step": 3940 - }, - { - "epoch": 0.37, - "learning_rate": 0.00019250332131334218, - "loss": 7.8598, - "step": 3950 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019248434237995824, - "loss": 7.9286, - "step": 3960 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001924653634465743, - "loss": 7.9102, - "step": 3970 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019244638451319037, - "loss": 8.0461, - "step": 3980 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019242740557980643, - "loss": 8.019, - "step": 3990 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001924084266464225, - "loss": 7.9759, - "step": 4000 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019238944771303852, - "loss": 7.8909, - "step": 4010 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019237046877965458, - "loss": 7.8641, - "step": 4020 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019235148984627064, - "loss": 7.9116, - "step": 4030 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001923325109128867, - "loss": 8.1006, - "step": 4040 - }, - { - "epoch": 0.38, - "learning_rate": 0.00019231353197950277, - "loss": 7.9186, - "step": 4050 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001922945530461188, - "loss": 7.9467, - "step": 4060 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019227557411273486, - "loss": 7.9013, - "step": 4070 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019225659517935092, - "loss": 7.8616, - "step": 4080 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019223761624596698, - "loss": 7.972, - "step": 4090 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019221863731258305, - "loss": 7.8126, - "step": 4100 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019219965837919908, - "loss": 7.9782, - "step": 4110 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019218067944581514, - "loss": 7.8078, - "step": 4120 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001921617005124312, - "loss": 7.9655, - "step": 4130 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019214272157904726, - "loss": 7.914, - "step": 4140 - }, - { - "epoch": 0.39, - "learning_rate": 0.00019212374264566333, - "loss": 7.9165, - "step": 4150 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001921047637122794, - "loss": 7.8859, - "step": 4160 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019208578477889542, - "loss": 7.92, - "step": 4170 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019206680584551148, - "loss": 7.8548, - "step": 4180 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019204782691212754, - "loss": 7.8462, - "step": 4190 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001920288479787436, - "loss": 7.9479, - "step": 4200 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019200986904535967, - "loss": 7.9687, - "step": 4210 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001919908901119757, - "loss": 7.8412, - "step": 4220 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019197191117859176, - "loss": 7.9112, - "step": 4230 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019195293224520782, - "loss": 7.8358, - "step": 4240 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019193395331182388, - "loss": 7.9411, - "step": 4250 - }, - { - "epoch": 0.4, - "learning_rate": 0.00019191497437843995, - "loss": 7.8077, - "step": 4260 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019189599544505598, - "loss": 7.9192, - "step": 4270 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019187701651167204, - "loss": 7.9694, - "step": 4280 - }, - { - "epoch": 0.41, - "learning_rate": 0.0001918580375782881, - "loss": 7.8397, - "step": 4290 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019183905864490416, - "loss": 7.8919, - "step": 4300 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019182007971152022, - "loss": 7.9859, - "step": 4310 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019180110077813629, - "loss": 7.8927, - "step": 4320 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019178212184475232, - "loss": 7.9667, - "step": 4330 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019176314291136838, - "loss": 7.9561, - "step": 4340 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019174416397798444, - "loss": 7.8562, - "step": 4350 - }, - { - "epoch": 0.41, - "learning_rate": 0.0001917251850446005, - "loss": 7.8994, - "step": 4360 - }, - { - "epoch": 0.41, - "learning_rate": 0.00019170620611121657, - "loss": 7.921, - "step": 4370 - }, - { - "epoch": 0.42, - "learning_rate": 0.0001916872271778326, - "loss": 7.8508, - "step": 4380 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019166824824444866, - "loss": 7.9523, - "step": 4390 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019164926931106472, - "loss": 7.8375, - "step": 4400 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019163029037768078, - "loss": 7.9494, - "step": 4410 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019161131144429684, - "loss": 7.9264, - "step": 4420 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019159233251091288, - "loss": 7.8938, - "step": 4430 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019157335357752894, - "loss": 7.898, - "step": 4440 - }, - { - "epoch": 0.42, - "learning_rate": 0.000191554374644145, - "loss": 7.8928, - "step": 4450 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019153539571076106, - "loss": 7.9245, - "step": 4460 - }, - { - "epoch": 0.42, - "learning_rate": 0.00019151641677737712, - "loss": 7.994, - "step": 4470 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019149743784399316, - "loss": 7.9778, - "step": 4480 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019147845891060922, - "loss": 7.8502, - "step": 4490 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019145947997722528, - "loss": 7.8904, - "step": 4500 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019144050104384134, - "loss": 7.9738, - "step": 4510 - }, - { - "epoch": 0.43, - "learning_rate": 0.0001914215221104574, - "loss": 7.8975, - "step": 4520 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019140254317707346, - "loss": 7.8833, - "step": 4530 - }, - { - "epoch": 0.43, - "learning_rate": 0.0001913835642436895, - "loss": 7.9091, - "step": 4540 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019136458531030556, - "loss": 8.0118, - "step": 4550 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019134560637692162, - "loss": 7.9777, - "step": 4560 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019132662744353768, - "loss": 7.8288, - "step": 4570 - }, - { - "epoch": 0.43, - "learning_rate": 0.00019130764851015374, - "loss": 7.864, - "step": 4580 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019128866957676978, - "loss": 7.9451, - "step": 4590 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019126969064338584, - "loss": 7.9331, - "step": 4600 - }, - { - "epoch": 0.44, - "learning_rate": 0.0001912507117100019, - "loss": 7.958, - "step": 4610 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019123173277661796, - "loss": 7.8932, - "step": 4620 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019121275384323402, - "loss": 7.9253, - "step": 4630 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019119377490985006, - "loss": 7.849, - "step": 4640 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019117479597646612, - "loss": 7.9173, - "step": 4650 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019115581704308218, - "loss": 7.8063, - "step": 4660 - }, - { - "epoch": 0.44, - "learning_rate": 0.00019113683810969824, - "loss": 7.8727, - "step": 4670 - }, - { - "epoch": 0.44, - "learning_rate": 0.0001911178591763143, - "loss": 7.9849, - "step": 4680 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019109888024293036, - "loss": 7.917, - "step": 4690 - }, - { - "epoch": 0.45, - "learning_rate": 0.0001910799013095464, - "loss": 7.8493, - "step": 4700 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019106092237616246, - "loss": 7.9853, - "step": 4710 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019104194344277852, - "loss": 7.8777, - "step": 4720 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019102296450939458, - "loss": 8.0427, - "step": 4730 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019100398557601064, - "loss": 7.9599, - "step": 4740 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019098500664262668, - "loss": 8.0004, - "step": 4750 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019096602770924274, - "loss": 7.8401, - "step": 4760 - }, - { - "epoch": 0.45, - "learning_rate": 0.0001909470487758588, - "loss": 7.9153, - "step": 4770 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019092806984247486, - "loss": 7.9396, - "step": 4780 - }, - { - "epoch": 0.45, - "learning_rate": 0.00019090909090909092, - "loss": 7.9395, - "step": 4790 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019089011197570696, - "loss": 7.9248, - "step": 4800 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019087113304232302, - "loss": 7.983, - "step": 4810 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019085215410893908, - "loss": 7.9105, - "step": 4820 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019083317517555514, - "loss": 7.8619, - "step": 4830 - }, - { - "epoch": 0.46, - "learning_rate": 0.0001908141962421712, - "loss": 7.8528, - "step": 4840 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019079521730878726, - "loss": 7.8596, - "step": 4850 - }, - { - "epoch": 0.46, - "learning_rate": 0.0001907762383754033, - "loss": 7.82, - "step": 4860 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019075725944201936, - "loss": 7.8662, - "step": 4870 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019073828050863542, - "loss": 7.8878, - "step": 4880 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019071930157525148, - "loss": 7.9529, - "step": 4890 - }, - { - "epoch": 0.46, - "learning_rate": 0.00019070032264186754, - "loss": 7.8718, - "step": 4900 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019068134370848358, - "loss": 7.9205, - "step": 4910 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019066236477509964, - "loss": 7.8675, - "step": 4920 - }, - { - "epoch": 0.47, - "learning_rate": 0.0001906433858417157, - "loss": 7.9425, - "step": 4930 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019062440690833176, - "loss": 7.8226, - "step": 4940 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019060542797494782, - "loss": 7.9104, - "step": 4950 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019058644904156386, - "loss": 7.9396, - "step": 4960 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019056747010817992, - "loss": 7.9223, - "step": 4970 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019054849117479598, - "loss": 7.8131, - "step": 4980 - }, - { - "epoch": 0.47, - "learning_rate": 0.00019052951224141204, - "loss": 7.8441, - "step": 4990 - }, - { - "epoch": 0.47, - "learning_rate": 0.0001905105333080281, - "loss": 7.8416, - "step": 5000 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019049155437464413, - "loss": 7.977, - "step": 5010 - }, - { - "epoch": 0.48, - "learning_rate": 0.0001904725754412602, - "loss": 7.9648, - "step": 5020 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019045359650787626, - "loss": 7.8183, - "step": 5030 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019043461757449232, - "loss": 7.9049, - "step": 5040 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019041563864110838, - "loss": 7.9121, - "step": 5050 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019039665970772444, - "loss": 7.9363, - "step": 5060 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019037768077434048, - "loss": 7.7851, - "step": 5070 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019035870184095654, - "loss": 7.9085, - "step": 5080 - }, - { - "epoch": 0.48, - "learning_rate": 0.0001903397229075726, - "loss": 7.931, - "step": 5090 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019032074397418866, - "loss": 7.9747, - "step": 5100 - }, - { - "epoch": 0.48, - "learning_rate": 0.00019030176504080472, - "loss": 7.8948, - "step": 5110 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019028278610742075, - "loss": 7.8823, - "step": 5120 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019026380717403682, - "loss": 7.9353, - "step": 5130 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019024482824065288, - "loss": 7.8635, - "step": 5140 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019022584930726894, - "loss": 7.954, - "step": 5150 - }, - { - "epoch": 0.49, - "learning_rate": 0.000190206870373885, - "loss": 7.8627, - "step": 5160 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019018789144050103, - "loss": 7.8782, - "step": 5170 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001901689125071171, - "loss": 7.9441, - "step": 5180 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019014993357373316, - "loss": 7.8237, - "step": 5190 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019013095464034922, - "loss": 7.9041, - "step": 5200 - }, - { - "epoch": 0.49, - "learning_rate": 0.00019011197570696528, - "loss": 7.8748, - "step": 5210 - }, - { - "epoch": 0.5, - "learning_rate": 0.00019009299677358134, - "loss": 7.8806, - "step": 5220 - }, - { - "epoch": 0.5, - "learning_rate": 0.00019007401784019737, - "loss": 7.9769, - "step": 5230 - }, - { - "epoch": 0.5, - "learning_rate": 0.00019005503890681344, - "loss": 7.9438, - "step": 5240 - }, - { - "epoch": 0.5, - "learning_rate": 0.0001900360599734295, - "loss": 7.9547, - "step": 5250 - }, - { - "epoch": 0.5, - "learning_rate": 0.00019001708104004556, - "loss": 7.7906, - "step": 5260 - }, - { - "epoch": 0.5, - "learning_rate": 0.00018999810210666162, - "loss": 7.9561, - "step": 5270 - }, - { - "epoch": 0.5, - "learning_rate": 0.00018997912317327765, - "loss": 7.9207, - "step": 5280 - }, - { - "epoch": 0.5, - "learning_rate": 0.00018996014423989371, - "loss": 7.9381, - "step": 5290 - }, - { - "epoch": 0.5, - "learning_rate": 0.00018994116530650978, - "loss": 7.8762, - "step": 5300 - }, - { - "epoch": 0.5, - "learning_rate": 0.00018992218637312584, - "loss": 8.0111, - "step": 5310 - }, - { - "epoch": 0.5, - "learning_rate": 0.0001899032074397419, - "loss": 7.9814, - "step": 5320 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018988422850635793, - "loss": 7.9152, - "step": 5330 - }, - { - "epoch": 0.51, - "learning_rate": 0.000189865249572974, - "loss": 7.8724, - "step": 5340 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018984627063959006, - "loss": 8.0027, - "step": 5350 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018982729170620612, - "loss": 7.8769, - "step": 5360 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018980831277282218, - "loss": 7.864, - "step": 5370 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018978933383943824, - "loss": 7.8941, - "step": 5380 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018977035490605427, - "loss": 7.9021, - "step": 5390 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018975137597267033, - "loss": 7.7893, - "step": 5400 - }, - { - "epoch": 0.51, - "learning_rate": 0.0001897323970392864, - "loss": 7.8462, - "step": 5410 - }, - { - "epoch": 0.51, - "learning_rate": 0.00018971341810590246, - "loss": 7.8329, - "step": 5420 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018969443917251852, - "loss": 7.9414, - "step": 5430 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018967546023913455, - "loss": 7.9004, - "step": 5440 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018965648130575061, - "loss": 7.911, - "step": 5450 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018963750237236668, - "loss": 7.9274, - "step": 5460 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018961852343898274, - "loss": 7.899, - "step": 5470 - }, - { - "epoch": 0.52, - "learning_rate": 0.0001895995445055988, - "loss": 7.8951, - "step": 5480 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018958056557221483, - "loss": 7.9019, - "step": 5490 - }, - { - "epoch": 0.52, - "learning_rate": 0.0001895615866388309, - "loss": 7.9009, - "step": 5500 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018954260770544695, - "loss": 7.9239, - "step": 5510 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018952362877206302, - "loss": 7.9005, - "step": 5520 - }, - { - "epoch": 0.52, - "learning_rate": 0.00018950464983867908, - "loss": 7.9392, - "step": 5530 - }, - { - "epoch": 0.53, - "learning_rate": 0.0001894856709052951, - "loss": 7.8666, - "step": 5540 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018946669197191117, - "loss": 7.8467, - "step": 5550 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018944771303852723, - "loss": 7.9866, - "step": 5560 - }, - { - "epoch": 0.53, - "learning_rate": 0.0001894287341051433, - "loss": 7.9338, - "step": 5570 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018940975517175936, - "loss": 7.9074, - "step": 5580 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018939077623837542, - "loss": 7.8457, - "step": 5590 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018937179730499145, - "loss": 7.9165, - "step": 5600 - }, - { - "epoch": 0.53, - "learning_rate": 0.0001893528183716075, - "loss": 7.8931, - "step": 5610 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018933383943822357, - "loss": 7.9497, - "step": 5620 - }, - { - "epoch": 0.53, - "learning_rate": 0.00018931486050483964, - "loss": 7.9797, - "step": 5630 - }, - { - "epoch": 0.54, - "learning_rate": 0.0001892958815714557, - "loss": 7.8169, - "step": 5640 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018927690263807173, - "loss": 7.9766, - "step": 5650 - }, - { - "epoch": 0.54, - "learning_rate": 0.0001892579237046878, - "loss": 7.8768, - "step": 5660 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018923894477130385, - "loss": 7.8953, - "step": 5670 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018921996583791991, - "loss": 7.851, - "step": 5680 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018920098690453598, - "loss": 7.9436, - "step": 5690 - }, - { - "epoch": 0.54, - "learning_rate": 0.000189182007971152, - "loss": 7.882, - "step": 5700 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018916302903776807, - "loss": 7.8709, - "step": 5710 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018914405010438413, - "loss": 7.8128, - "step": 5720 - }, - { - "epoch": 0.54, - "learning_rate": 0.0001891250711710002, - "loss": 7.9213, - "step": 5730 - }, - { - "epoch": 0.54, - "learning_rate": 0.00018910609223761626, - "loss": 7.8444, - "step": 5740 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018908711330423232, - "loss": 7.9045, - "step": 5750 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018906813437084835, - "loss": 8.0001, - "step": 5760 - }, - { - "epoch": 0.55, - "learning_rate": 0.0001890491554374644, - "loss": 7.9054, - "step": 5770 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018903017650408047, - "loss": 7.9683, - "step": 5780 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018901119757069653, - "loss": 7.8151, - "step": 5790 - }, - { - "epoch": 0.55, - "learning_rate": 0.0001889922186373126, - "loss": 7.8192, - "step": 5800 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018897323970392863, - "loss": 7.9276, - "step": 5810 - }, - { - "epoch": 0.55, - "learning_rate": 0.0001889542607705447, - "loss": 7.9805, - "step": 5820 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018893528183716075, - "loss": 7.8192, - "step": 5830 - }, - { - "epoch": 0.55, - "learning_rate": 0.00018891630290377681, - "loss": 7.9176, - "step": 5840 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018889732397039288, - "loss": 7.8999, - "step": 5850 - }, - { - "epoch": 0.56, - "learning_rate": 0.0001888783450370089, - "loss": 7.8994, - "step": 5860 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018885936610362497, - "loss": 7.8313, - "step": 5870 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018884038717024103, - "loss": 7.924, - "step": 5880 - }, - { - "epoch": 0.56, - "learning_rate": 0.0001888214082368571, - "loss": 7.8946, - "step": 5890 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018880242930347315, - "loss": 7.9005, - "step": 5900 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018878345037008922, - "loss": 7.8146, - "step": 5910 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018876447143670525, - "loss": 7.916, - "step": 5920 - }, - { - "epoch": 0.56, - "learning_rate": 0.0001887454925033213, - "loss": 7.9194, - "step": 5930 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018872651356993737, - "loss": 7.9068, - "step": 5940 - }, - { - "epoch": 0.56, - "learning_rate": 0.00018870753463655343, - "loss": 7.8296, - "step": 5950 - }, - { - "epoch": 0.57, - "learning_rate": 0.0001886885557031695, - "loss": 7.9821, - "step": 5960 - }, - { - "epoch": 0.57, - "learning_rate": 0.00018866957676978553, - "loss": 7.9248, - "step": 5970 - }, - { - "epoch": 0.57, - "learning_rate": 0.0001886505978364016, - "loss": 7.9169, - "step": 5980 - }, - { - "epoch": 0.57, - "learning_rate": 0.00018863161890301765, - "loss": 7.9041, - "step": 5990 - }, - { - "epoch": 0.57, - "learning_rate": 0.0001886126399696337, - "loss": 7.9649, - "step": 6000 - }, - { - "epoch": 0.57, - "learning_rate": 0.00018859366103624977, - "loss": 7.9127, - "step": 6010 - }, - { - "epoch": 0.57, - "learning_rate": 0.0001885746821028658, - "loss": 8.0069, - "step": 6020 - }, - { - "epoch": 0.57, - "learning_rate": 0.00018855570316948187, - "loss": 7.9172, - "step": 6030 - }, - { - "epoch": 0.57, - "learning_rate": 0.00018853672423609793, - "loss": 7.83, - "step": 6040 - }, - { - "epoch": 0.57, - "learning_rate": 0.000188517745302714, - "loss": 8.0185, - "step": 6050 - }, - { - "epoch": 0.58, - "learning_rate": 0.00018849876636933005, - "loss": 7.96, - "step": 6060 - }, - { - "epoch": 0.58, - "learning_rate": 0.0001884797874359461, - "loss": 7.8452, - "step": 6070 - }, - { - "epoch": 0.58, - "learning_rate": 0.00018846080850256215, - "loss": 7.9495, - "step": 6080 - }, - { - "epoch": 0.58, - "learning_rate": 0.0001884418295691782, - "loss": 7.8226, - "step": 6090 - }, - { - "epoch": 0.58, - "learning_rate": 0.00018842285063579427, - "loss": 7.8803, - "step": 6100 - }, - { - "epoch": 0.58, - "learning_rate": 0.00018840387170241033, - "loss": 7.887, - "step": 6110 - }, - { - "epoch": 0.58, - "learning_rate": 0.0001883848927690264, - "loss": 7.7835, - "step": 6120 - }, - { - "epoch": 0.58, - "learning_rate": 0.00018836591383564243, - "loss": 7.8716, - "step": 6130 - }, - { - "epoch": 0.58, - "learning_rate": 0.0001883469349022585, - "loss": 7.8583, - "step": 6140 - }, - { - "epoch": 0.58, - "learning_rate": 0.00018832795596887455, - "loss": 7.925, - "step": 6150 - }, - { - "epoch": 0.58, - "learning_rate": 0.0001883089770354906, - "loss": 7.885, - "step": 6160 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018828999810210667, - "loss": 7.8988, - "step": 6170 - }, - { - "epoch": 0.59, - "learning_rate": 0.0001882710191687227, - "loss": 7.8836, - "step": 6180 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018825204023533877, - "loss": 7.8954, - "step": 6190 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018823306130195483, - "loss": 7.8208, - "step": 6200 - }, - { - "epoch": 0.59, - "learning_rate": 0.0001882140823685709, - "loss": 7.8342, - "step": 6210 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018819510343518695, - "loss": 7.9022, - "step": 6220 - }, - { - "epoch": 0.59, - "learning_rate": 0.000188176124501803, - "loss": 7.8636, - "step": 6230 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018815714556841905, - "loss": 7.8913, - "step": 6240 - }, - { - "epoch": 0.59, - "learning_rate": 0.0001881381666350351, - "loss": 7.8727, - "step": 6250 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018811918770165117, - "loss": 7.7914, - "step": 6260 - }, - { - "epoch": 0.59, - "learning_rate": 0.00018810020876826723, - "loss": 7.9805, - "step": 6270 - }, - { - "epoch": 0.6, - "learning_rate": 0.0001880812298348833, - "loss": 7.9156, - "step": 6280 - }, - { - "epoch": 0.6, - "learning_rate": 0.00018806225090149933, - "loss": 7.8462, - "step": 6290 - }, - { - "epoch": 0.6, - "learning_rate": 0.0001880432719681154, - "loss": 7.8603, - "step": 6300 - }, - { - "epoch": 0.6, - "learning_rate": 0.00018802429303473145, - "loss": 7.9177, - "step": 6310 - }, - { - "epoch": 0.6, - "learning_rate": 0.0001880053141013475, - "loss": 8.0463, - "step": 6320 - }, - { - "epoch": 0.6, - "learning_rate": 0.00018798633516796357, - "loss": 7.8587, - "step": 6330 - }, - { - "epoch": 0.6, - "learning_rate": 0.0001879673562345796, - "loss": 7.8777, - "step": 6340 - }, - { - "epoch": 0.6, - "learning_rate": 0.00018794837730119567, - "loss": 7.9968, - "step": 6350 - }, - { - "epoch": 0.6, - "learning_rate": 0.00018792939836781173, - "loss": 7.8663, - "step": 6360 - }, - { - "epoch": 0.6, - "learning_rate": 0.0001879104194344278, - "loss": 7.959, - "step": 6370 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018789144050104385, - "loss": 7.8974, - "step": 6380 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018787246156765989, - "loss": 7.9797, - "step": 6390 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018785348263427595, - "loss": 7.9455, - "step": 6400 - }, - { - "epoch": 0.61, - "learning_rate": 0.000187834503700892, - "loss": 7.8779, - "step": 6410 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018781552476750807, - "loss": 7.9653, - "step": 6420 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018779654583412413, - "loss": 7.9065, - "step": 6430 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001877775669007402, - "loss": 7.9129, - "step": 6440 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018775858796735623, - "loss": 7.8326, - "step": 6450 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001877396090339723, - "loss": 7.8568, - "step": 6460 - }, - { - "epoch": 0.61, - "learning_rate": 0.00018772063010058835, - "loss": 7.8862, - "step": 6470 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001877016511672044, - "loss": 7.9105, - "step": 6480 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018768267223382047, - "loss": 7.9036, - "step": 6490 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001876636933004365, - "loss": 7.8985, - "step": 6500 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018764471436705257, - "loss": 7.9402, - "step": 6510 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018762573543366863, - "loss": 7.9015, - "step": 6520 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001876067565002847, - "loss": 7.9234, - "step": 6530 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018758777756690075, - "loss": 7.927, - "step": 6540 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018756879863351679, - "loss": 7.9412, - "step": 6550 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018754981970013285, - "loss": 7.8886, - "step": 6560 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001875308407667489, - "loss": 7.8395, - "step": 6570 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018751186183336497, - "loss": 7.8293, - "step": 6580 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018749288289998103, - "loss": 7.9124, - "step": 6590 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018747390396659706, - "loss": 7.903, - "step": 6600 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018745492503321313, - "loss": 7.9087, - "step": 6610 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001874359460998292, - "loss": 7.9055, - "step": 6620 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018741696716644525, - "loss": 7.9397, - "step": 6630 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001873979882330613, - "loss": 7.8537, - "step": 6640 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018737900929967737, - "loss": 7.9236, - "step": 6650 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001873600303662934, - "loss": 7.8514, - "step": 6660 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018734105143290947, - "loss": 7.9212, - "step": 6670 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018732207249952553, - "loss": 7.8364, - "step": 6680 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001873030935661416, - "loss": 7.8725, - "step": 6690 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018728411463275765, - "loss": 7.8552, - "step": 6700 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018726513569937368, - "loss": 7.8767, - "step": 6710 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018724615676598975, - "loss": 7.8403, - "step": 6720 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001872271778326058, - "loss": 7.9066, - "step": 6730 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018720819889922187, - "loss": 7.8817, - "step": 6740 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018718921996583793, - "loss": 7.9498, - "step": 6750 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018717024103245396, - "loss": 7.9311, - "step": 6760 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018715126209907002, - "loss": 7.7898, - "step": 6770 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018713228316568609, - "loss": 7.9264, - "step": 6780 - }, - { - "epoch": 0.64, - "learning_rate": 0.00018711330423230215, - "loss": 7.827, - "step": 6790 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001870943252989182, - "loss": 7.9209, - "step": 6800 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018707534636553427, - "loss": 7.8619, - "step": 6810 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001870563674321503, - "loss": 7.8741, - "step": 6820 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018703738849876637, - "loss": 7.806, - "step": 6830 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018701840956538243, - "loss": 7.7915, - "step": 6840 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001869994306319985, - "loss": 7.9415, - "step": 6850 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018698045169861455, - "loss": 7.8782, - "step": 6860 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018696147276523058, - "loss": 7.9309, - "step": 6870 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018694249383184664, - "loss": 7.897, - "step": 6880 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001869235148984627, - "loss": 7.7669, - "step": 6890 - }, - { - "epoch": 0.65, - "learning_rate": 0.00018690453596507877, - "loss": 7.8783, - "step": 6900 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018688555703169483, - "loss": 7.9449, - "step": 6910 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018686657809831086, - "loss": 7.839, - "step": 6920 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018684759916492692, - "loss": 7.864, - "step": 6930 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018682862023154299, - "loss": 7.8597, - "step": 6940 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018680964129815905, - "loss": 7.7478, - "step": 6950 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001867906623647751, - "loss": 7.835, - "step": 6960 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018677168343139117, - "loss": 7.8262, - "step": 6970 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001867527044980072, - "loss": 7.9301, - "step": 6980 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018673372556462326, - "loss": 7.9125, - "step": 6990 - }, - { - "epoch": 0.66, - "learning_rate": 0.00018671474663123933, - "loss": 7.7855, - "step": 7000 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001866957676978554, - "loss": 7.9173, - "step": 7010 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018667678876447145, - "loss": 7.8071, - "step": 7020 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018665780983108748, - "loss": 7.8352, - "step": 7030 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018663883089770354, - "loss": 8.0009, - "step": 7040 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001866198519643196, - "loss": 7.8807, - "step": 7050 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018660087303093567, - "loss": 7.9295, - "step": 7060 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018658189409755173, - "loss": 7.9473, - "step": 7070 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018656291516416776, - "loss": 7.7944, - "step": 7080 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018654393623078382, - "loss": 7.929, - "step": 7090 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018652495729739988, - "loss": 7.9429, - "step": 7100 - }, - { - "epoch": 0.67, - "learning_rate": 0.00018650597836401595, - "loss": 7.8215, - "step": 7110 - }, - { - "epoch": 0.68, - "learning_rate": 0.000186486999430632, - "loss": 7.9725, - "step": 7120 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018646802049724804, - "loss": 7.8404, - "step": 7130 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001864490415638641, - "loss": 7.8327, - "step": 7140 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018643006263048016, - "loss": 7.9532, - "step": 7150 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018641108369709622, - "loss": 7.8497, - "step": 7160 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018639210476371229, - "loss": 7.9127, - "step": 7170 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018637312583032835, - "loss": 7.8786, - "step": 7180 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018635414689694438, - "loss": 7.9397, - "step": 7190 - }, - { - "epoch": 0.68, - "learning_rate": 0.00018633516796356044, - "loss": 7.9493, - "step": 7200 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001863161890301765, - "loss": 7.9002, - "step": 7210 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018629721009679257, - "loss": 7.8782, - "step": 7220 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018627823116340863, - "loss": 7.8344, - "step": 7230 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018625925223002466, - "loss": 7.8414, - "step": 7240 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018624027329664072, - "loss": 8.0188, - "step": 7250 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018622129436325678, - "loss": 8.009, - "step": 7260 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018620231542987284, - "loss": 7.8582, - "step": 7270 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001861833364964889, - "loss": 7.9226, - "step": 7280 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018616435756310494, - "loss": 7.7506, - "step": 7290 - }, - { - "epoch": 0.69, - "learning_rate": 0.000186145378629721, - "loss": 7.8297, - "step": 7300 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018612639969633706, - "loss": 7.8306, - "step": 7310 - }, - { - "epoch": 0.69, - "learning_rate": 0.00018610742076295312, - "loss": 7.9155, - "step": 7320 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018608844182956919, - "loss": 7.906, - "step": 7330 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018606946289618525, - "loss": 7.8682, - "step": 7340 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018605048396280128, - "loss": 7.8501, - "step": 7350 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018603150502941734, - "loss": 7.8741, - "step": 7360 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001860125260960334, - "loss": 7.8392, - "step": 7370 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018599354716264946, - "loss": 7.9128, - "step": 7380 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018597456822926553, - "loss": 7.9862, - "step": 7390 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018595558929588156, - "loss": 7.8779, - "step": 7400 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018593661036249762, - "loss": 7.8301, - "step": 7410 - }, - { - "epoch": 0.7, - "learning_rate": 0.00018591763142911368, - "loss": 7.8788, - "step": 7420 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018589865249572974, - "loss": 7.9279, - "step": 7430 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001858796735623458, - "loss": 7.8394, - "step": 7440 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018586069462896184, - "loss": 7.8696, - "step": 7450 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001858417156955779, - "loss": 7.9632, - "step": 7460 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018582273676219396, - "loss": 7.8598, - "step": 7470 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018580375782881002, - "loss": 7.9166, - "step": 7480 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018578477889542608, - "loss": 7.9847, - "step": 7490 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018576579996204215, - "loss": 7.9102, - "step": 7500 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018574682102865818, - "loss": 7.8375, - "step": 7510 - }, - { - "epoch": 0.71, - "learning_rate": 0.00018572784209527424, - "loss": 7.8349, - "step": 7520 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001857088631618903, - "loss": 7.8491, - "step": 7530 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018568988422850636, - "loss": 7.9786, - "step": 7540 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018567090529512242, - "loss": 7.9327, - "step": 7550 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018565192636173846, - "loss": 7.77, - "step": 7560 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018563294742835452, - "loss": 7.9486, - "step": 7570 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018561396849497058, - "loss": 7.8833, - "step": 7580 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018559498956158664, - "loss": 7.9048, - "step": 7590 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001855760106282027, - "loss": 7.874, - "step": 7600 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018555703169481874, - "loss": 7.8719, - "step": 7610 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001855380527614348, - "loss": 7.851, - "step": 7620 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018551907382805086, - "loss": 7.9377, - "step": 7630 - }, - { - "epoch": 0.72, - "learning_rate": 0.00018550009489466692, - "loss": 7.9001, - "step": 7640 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018548111596128298, - "loss": 7.8736, - "step": 7650 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018546213702789902, - "loss": 7.8838, - "step": 7660 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018544315809451508, - "loss": 7.8924, - "step": 7670 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018542417916113114, - "loss": 7.8862, - "step": 7680 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001854052002277472, - "loss": 7.8334, - "step": 7690 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018538622129436326, - "loss": 7.8363, - "step": 7700 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018536724236097932, - "loss": 7.905, - "step": 7710 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018534826342759536, - "loss": 7.9271, - "step": 7720 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018532928449421142, - "loss": 7.8476, - "step": 7730 - }, - { - "epoch": 0.73, - "learning_rate": 0.00018531030556082748, - "loss": 7.8614, - "step": 7740 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018529132662744354, - "loss": 7.8769, - "step": 7750 - }, - { - "epoch": 0.74, - "learning_rate": 0.0001852723476940596, - "loss": 7.9633, - "step": 7760 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018525336876067564, - "loss": 7.9126, - "step": 7770 - }, - { - "epoch": 0.74, - "learning_rate": 0.0001852343898272917, - "loss": 7.8547, - "step": 7780 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018521541089390776, - "loss": 7.9311, - "step": 7790 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018519643196052382, - "loss": 7.9018, - "step": 7800 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018517745302713988, - "loss": 7.9094, - "step": 7810 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018515847409375592, - "loss": 7.8283, - "step": 7820 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018513949516037198, - "loss": 7.8136, - "step": 7830 - }, - { - "epoch": 0.74, - "learning_rate": 0.00018512051622698804, - "loss": 7.7893, - "step": 7840 - }, - { - "epoch": 0.74, - "learning_rate": 0.0001851015372936041, - "loss": 7.835, - "step": 7850 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018508255836022016, - "loss": 7.9367, - "step": 7860 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018506357942683622, - "loss": 7.8171, - "step": 7870 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018504460049345226, - "loss": 7.7815, - "step": 7880 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018502562156006832, - "loss": 7.8454, - "step": 7890 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018500664262668438, - "loss": 7.9403, - "step": 7900 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018498766369330044, - "loss": 7.8852, - "step": 7910 - }, - { - "epoch": 0.75, - "learning_rate": 0.0001849686847599165, - "loss": 7.8311, - "step": 7920 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018494970582653254, - "loss": 7.8699, - "step": 7930 - }, - { - "epoch": 0.75, - "learning_rate": 0.0001849307268931486, - "loss": 7.8523, - "step": 7940 - }, - { - "epoch": 0.75, - "learning_rate": 0.00018491174795976466, - "loss": 7.7491, - "step": 7950 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018489276902638072, - "loss": 7.9315, - "step": 7960 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018487379009299678, - "loss": 7.9393, - "step": 7970 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018485481115961282, - "loss": 7.8317, - "step": 7980 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018483583222622888, - "loss": 7.9529, - "step": 7990 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018481685329284494, - "loss": 7.8806, - "step": 8000 - }, - { - "epoch": 0.76, - "learning_rate": 0.000184797874359461, - "loss": 7.9167, - "step": 8010 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018477889542607706, - "loss": 7.8738, - "step": 8020 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018475991649269312, - "loss": 8.0231, - "step": 8030 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018474093755930916, - "loss": 7.9135, - "step": 8040 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018472195862592522, - "loss": 7.8255, - "step": 8050 - }, - { - "epoch": 0.76, - "learning_rate": 0.00018470297969254128, - "loss": 7.8249, - "step": 8060 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018468400075915734, - "loss": 7.8805, - "step": 8070 - }, - { - "epoch": 0.77, - "learning_rate": 0.0001846650218257734, - "loss": 7.8936, - "step": 8080 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018464604289238944, - "loss": 7.8101, - "step": 8090 - }, - { - "epoch": 0.77, - "learning_rate": 0.0001846270639590055, - "loss": 7.9686, - "step": 8100 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018460808502562156, - "loss": 7.8303, - "step": 8110 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018458910609223762, - "loss": 7.8838, - "step": 8120 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018457012715885368, - "loss": 7.8232, - "step": 8130 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018455114822546972, - "loss": 7.8239, - "step": 8140 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018453216929208578, - "loss": 7.8823, - "step": 8150 - }, - { - "epoch": 0.77, - "learning_rate": 0.00018451319035870184, - "loss": 7.9962, - "step": 8160 - }, - { - "epoch": 0.78, - "learning_rate": 0.0001844942114253179, - "loss": 7.9401, - "step": 8170 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018447523249193396, - "loss": 7.9221, - "step": 8180 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018445625355855, - "loss": 7.8689, - "step": 8190 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018443727462516606, - "loss": 7.9139, - "step": 8200 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018441829569178212, - "loss": 8.0207, - "step": 8210 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018439931675839818, - "loss": 7.8523, - "step": 8220 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018438033782501424, - "loss": 7.8425, - "step": 8230 - }, - { - "epoch": 0.78, - "learning_rate": 0.0001843613588916303, - "loss": 7.8204, - "step": 8240 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018434237995824633, - "loss": 7.8536, - "step": 8250 - }, - { - "epoch": 0.78, - "learning_rate": 0.0001843234010248624, - "loss": 7.8367, - "step": 8260 - }, - { - "epoch": 0.78, - "learning_rate": 0.00018430442209147846, - "loss": 7.7924, - "step": 8270 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018428544315809452, - "loss": 7.8558, - "step": 8280 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018426646422471058, - "loss": 7.9187, - "step": 8290 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018424748529132661, - "loss": 7.7554, - "step": 8300 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018422850635794268, - "loss": 7.8642, - "step": 8310 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018420952742455874, - "loss": 7.8859, - "step": 8320 - }, - { - "epoch": 0.79, - "learning_rate": 0.0001841905484911748, - "loss": 7.8936, - "step": 8330 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018417156955779086, - "loss": 7.9325, - "step": 8340 - }, - { - "epoch": 0.79, - "learning_rate": 0.0001841525906244069, - "loss": 7.9461, - "step": 8350 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018413361169102295, - "loss": 7.8525, - "step": 8360 - }, - { - "epoch": 0.79, - "learning_rate": 0.00018411463275763902, - "loss": 7.9894, - "step": 8370 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018409565382425508, - "loss": 7.7846, - "step": 8380 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018407667489087114, - "loss": 7.8432, - "step": 8390 - }, - { - "epoch": 0.8, - "learning_rate": 0.0001840576959574872, - "loss": 7.9854, - "step": 8400 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018403871702410323, - "loss": 7.7578, - "step": 8410 - }, - { - "epoch": 0.8, - "learning_rate": 0.0001840197380907193, - "loss": 7.9038, - "step": 8420 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018400075915733536, - "loss": 7.9633, - "step": 8430 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018398178022395142, - "loss": 7.8895, - "step": 8440 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018396280129056748, - "loss": 7.9363, - "step": 8450 - }, - { - "epoch": 0.8, - "learning_rate": 0.0001839438223571835, - "loss": 7.8365, - "step": 8460 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018392484342379957, - "loss": 7.8543, - "step": 8470 - }, - { - "epoch": 0.8, - "learning_rate": 0.00018390586449041564, - "loss": 7.8508, - "step": 8480 - }, - { - "epoch": 0.81, - "learning_rate": 0.0001838868855570317, - "loss": 7.7221, - "step": 8490 - }, - { - "epoch": 0.81, - "learning_rate": 0.00018386790662364776, - "loss": 7.8354, - "step": 8500 - }, - { - "epoch": 0.81, - "learning_rate": 0.0001838489276902638, - "loss": 7.8542, - "step": 8510 - }, - { - "epoch": 0.81, - "learning_rate": 0.00018382994875687985, - "loss": 8.0115, - "step": 8520 - }, - { - "epoch": 0.81, - "learning_rate": 0.00018381096982349592, - "loss": 7.9047, - "step": 8530 - }, - { - "epoch": 0.81, - "learning_rate": 0.00018379199089011198, - "loss": 7.9698, - "step": 8540 - }, - { - "epoch": 0.81, - "learning_rate": 0.00018377301195672804, - "loss": 7.8554, - "step": 8550 - }, - { - "epoch": 0.81, - "learning_rate": 0.0001837540330233441, - "loss": 8.0107, - "step": 8560 - }, - { - "epoch": 0.81, - "learning_rate": 0.00018373505408996013, - "loss": 7.9775, - "step": 8570 - }, - { - "epoch": 0.81, - "learning_rate": 0.0001837160751565762, - "loss": 7.8525, - "step": 8580 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018369709622319226, - "loss": 8.0003, - "step": 8590 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018367811728980832, - "loss": 7.9092, - "step": 8600 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018365913835642438, - "loss": 7.8022, - "step": 8610 - }, - { - "epoch": 0.82, - "learning_rate": 0.0001836401594230404, - "loss": 7.9229, - "step": 8620 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018362118048965647, - "loss": 7.9276, - "step": 8630 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018360220155627254, - "loss": 7.8804, - "step": 8640 - }, - { - "epoch": 0.82, - "learning_rate": 0.0001835832226228886, - "loss": 7.917, - "step": 8650 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018356424368950466, - "loss": 7.8381, - "step": 8660 - }, - { - "epoch": 0.82, - "learning_rate": 0.0001835452647561207, - "loss": 7.8699, - "step": 8670 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018352628582273675, - "loss": 7.8897, - "step": 8680 - }, - { - "epoch": 0.82, - "learning_rate": 0.00018350730688935281, - "loss": 7.88, - "step": 8690 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018348832795596888, - "loss": 7.8877, - "step": 8700 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018346934902258494, - "loss": 7.959, - "step": 8710 - }, - { - "epoch": 0.83, - "learning_rate": 0.000183450370089201, - "loss": 7.8917, - "step": 8720 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018343139115581703, - "loss": 7.819, - "step": 8730 - }, - { - "epoch": 0.83, - "learning_rate": 0.0001834124122224331, - "loss": 7.8435, - "step": 8740 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018339343328904915, - "loss": 7.8497, - "step": 8750 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018337445435566522, - "loss": 7.9061, - "step": 8760 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018335547542228128, - "loss": 7.9324, - "step": 8770 - }, - { - "epoch": 0.83, - "learning_rate": 0.0001833364964888973, - "loss": 7.9109, - "step": 8780 - }, - { - "epoch": 0.83, - "learning_rate": 0.00018331751755551337, - "loss": 7.9571, - "step": 8790 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018329853862212943, - "loss": 7.8536, - "step": 8800 - }, - { - "epoch": 0.84, - "learning_rate": 0.0001832795596887455, - "loss": 7.8108, - "step": 8810 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018326058075536156, - "loss": 7.9172, - "step": 8820 - }, - { - "epoch": 0.84, - "learning_rate": 0.0001832416018219776, - "loss": 7.8601, - "step": 8830 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018322262288859365, - "loss": 7.8404, - "step": 8840 - }, - { - "epoch": 0.84, - "learning_rate": 0.0001832036439552097, - "loss": 7.9074, - "step": 8850 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018318466502182577, - "loss": 7.9285, - "step": 8860 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018316568608844184, - "loss": 7.8839, - "step": 8870 - }, - { - "epoch": 0.84, - "learning_rate": 0.0001831467071550579, - "loss": 7.8742, - "step": 8880 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018312772822167393, - "loss": 7.8799, - "step": 8890 - }, - { - "epoch": 0.84, - "learning_rate": 0.00018310874928829, - "loss": 7.9588, - "step": 8900 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018308977035490605, - "loss": 7.9399, - "step": 8910 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018307079142152212, - "loss": 7.879, - "step": 8920 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018305181248813818, - "loss": 7.9172, - "step": 8930 - }, - { - "epoch": 0.85, - "learning_rate": 0.0001830328335547542, - "loss": 7.9306, - "step": 8940 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018301385462137027, - "loss": 7.9562, - "step": 8950 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018299487568798633, - "loss": 7.7876, - "step": 8960 - }, - { - "epoch": 0.85, - "learning_rate": 0.0001829758967546024, - "loss": 7.9089, - "step": 8970 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018295691782121846, - "loss": 7.9165, - "step": 8980 - }, - { - "epoch": 0.85, - "learning_rate": 0.0001829379388878345, - "loss": 7.9097, - "step": 8990 - }, - { - "epoch": 0.85, - "learning_rate": 0.00018291895995445055, - "loss": 7.9582, - "step": 9000 - }, - { - "epoch": 0.86, - "learning_rate": 0.0001828999810210666, - "loss": 7.9603, - "step": 9010 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018288100208768267, - "loss": 7.8963, - "step": 9020 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018286202315429874, - "loss": 7.9486, - "step": 9030 - }, - { - "epoch": 0.86, - "learning_rate": 0.0001828430442209148, - "loss": 7.9271, - "step": 9040 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018282406528753083, - "loss": 7.8293, - "step": 9050 - }, - { - "epoch": 0.86, - "learning_rate": 0.0001828050863541469, - "loss": 7.9049, - "step": 9060 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018278610742076295, - "loss": 7.8041, - "step": 9070 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018276712848737901, - "loss": 7.8856, - "step": 9080 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018274814955399508, - "loss": 7.9318, - "step": 9090 - }, - { - "epoch": 0.86, - "learning_rate": 0.0001827291706206111, - "loss": 7.8903, - "step": 9100 - }, - { - "epoch": 0.86, - "learning_rate": 0.00018271019168722717, - "loss": 7.86, - "step": 9110 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018269121275384323, - "loss": 7.9715, - "step": 9120 - }, - { - "epoch": 0.87, - "learning_rate": 0.0001826722338204593, - "loss": 7.8544, - "step": 9130 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018265325488707535, - "loss": 7.9638, - "step": 9140 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018263427595369142, - "loss": 7.8734, - "step": 9150 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018261529702030745, - "loss": 7.901, - "step": 9160 - }, - { - "epoch": 0.87, - "learning_rate": 0.0001825963180869235, - "loss": 7.8865, - "step": 9170 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018257733915353957, - "loss": 7.8662, - "step": 9180 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018255836022015563, - "loss": 7.8327, - "step": 9190 - }, - { - "epoch": 0.87, - "learning_rate": 0.0001825393812867717, - "loss": 7.9404, - "step": 9200 - }, - { - "epoch": 0.87, - "learning_rate": 0.00018252040235338773, - "loss": 7.8854, - "step": 9210 - }, - { - "epoch": 0.87, - "learning_rate": 0.0001825014234200038, - "loss": 7.8985, - "step": 9220 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018248244448661985, - "loss": 7.9045, - "step": 9230 - }, - { - "epoch": 0.88, - "learning_rate": 0.0001824634655532359, - "loss": 7.9535, - "step": 9240 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018244448661985197, - "loss": 7.8169, - "step": 9250 - }, - { - "epoch": 0.88, - "learning_rate": 0.000182425507686468, - "loss": 7.8822, - "step": 9260 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018240652875308407, - "loss": 7.9416, - "step": 9270 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018238754981970013, - "loss": 7.871, - "step": 9280 - }, - { - "epoch": 0.88, - "learning_rate": 0.0001823685708863162, - "loss": 7.9359, - "step": 9290 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018234959195293225, - "loss": 7.865, - "step": 9300 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018233061301954832, - "loss": 7.823, - "step": 9310 - }, - { - "epoch": 0.88, - "learning_rate": 0.00018231163408616435, - "loss": 7.8769, - "step": 9320 - }, - { - "epoch": 0.89, - "learning_rate": 0.0001822926551527804, - "loss": 7.8701, - "step": 9330 - }, - { - "epoch": 0.89, - "learning_rate": 0.00018227367621939647, - "loss": 7.8604, - "step": 9340 - }, - { - "epoch": 0.89, - "learning_rate": 0.00018225469728601253, - "loss": 7.8605, - "step": 9350 - }, - { - "epoch": 0.89, - "learning_rate": 0.0001822357183526286, - "loss": 7.989, - "step": 9360 - }, - { - "epoch": 0.89, - "learning_rate": 0.00018221673941924463, - "loss": 7.9755, - "step": 9370 - }, - { - "epoch": 0.89, - "learning_rate": 0.0001821977604858607, - "loss": 7.9113, - "step": 9380 - }, - { - "epoch": 0.89, - "learning_rate": 0.00018217878155247675, - "loss": 7.9352, - "step": 9390 - }, - { - "epoch": 0.89, - "learning_rate": 0.0001821598026190928, - "loss": 7.8909, - "step": 9400 - }, - { - "epoch": 0.89, - "learning_rate": 0.00018214082368570887, - "loss": 7.9461, - "step": 9410 - }, - { - "epoch": 0.89, - "learning_rate": 0.0001821218447523249, - "loss": 7.9076, - "step": 9420 - }, - { - "epoch": 0.89, - "learning_rate": 0.00018210286581894097, - "loss": 7.8942, - "step": 9430 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018208388688555703, - "loss": 7.9481, - "step": 9440 - }, - { - "epoch": 0.9, - "learning_rate": 0.0001820649079521731, - "loss": 7.9706, - "step": 9450 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018204592901878915, - "loss": 7.862, - "step": 9460 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018202695008540521, - "loss": 7.8994, - "step": 9470 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018200797115202125, - "loss": 7.9575, - "step": 9480 - }, - { - "epoch": 0.9, - "learning_rate": 0.0001819889922186373, - "loss": 7.8784, - "step": 9490 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018197001328525337, - "loss": 7.7745, - "step": 9500 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018195103435186943, - "loss": 7.8305, - "step": 9510 - }, - { - "epoch": 0.9, - "learning_rate": 0.0001819320554184855, - "loss": 7.9674, - "step": 9520 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018191307648510153, - "loss": 7.8213, - "step": 9530 - }, - { - "epoch": 0.91, - "learning_rate": 0.0001818940975517176, - "loss": 7.9048, - "step": 9540 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018187511861833365, - "loss": 7.9176, - "step": 9550 - }, - { - "epoch": 0.91, - "learning_rate": 0.0001818561396849497, - "loss": 7.8689, - "step": 9560 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018183716075156577, - "loss": 7.7798, - "step": 9570 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018181818181818183, - "loss": 7.9013, - "step": 9580 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018179920288479787, - "loss": 7.8507, - "step": 9590 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018178022395141393, - "loss": 7.8261, - "step": 9600 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018176124501803, - "loss": 7.9989, - "step": 9610 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018174226608464605, - "loss": 7.8337, - "step": 9620 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018172328715126211, - "loss": 7.8822, - "step": 9630 - }, - { - "epoch": 0.91, - "learning_rate": 0.00018170430821787815, - "loss": 7.8731, - "step": 9640 - }, - { - "epoch": 0.92, - "learning_rate": 0.0001816853292844942, - "loss": 7.8363, - "step": 9650 - }, - { - "epoch": 0.92, - "learning_rate": 0.00018166635035111027, - "loss": 7.8563, - "step": 9660 - }, - { - "epoch": 0.92, - "learning_rate": 0.00018164737141772633, - "loss": 7.869, - "step": 9670 - }, - { - "epoch": 0.92, - "learning_rate": 0.0001816283924843424, - "loss": 7.8723, - "step": 9680 - }, - { - "epoch": 0.92, - "learning_rate": 0.00018160941355095843, - "loss": 7.8806, - "step": 9690 - }, - { - "epoch": 0.92, - "learning_rate": 0.0001815904346175745, - "loss": 7.9023, - "step": 9700 - }, - { - "epoch": 0.92, - "learning_rate": 0.00018157145568419055, - "loss": 7.8605, - "step": 9710 - }, - { - "epoch": 0.92, - "learning_rate": 0.0001815524767508066, - "loss": 7.9172, - "step": 9720 - }, - { - "epoch": 0.92, - "learning_rate": 0.00018153349781742267, - "loss": 7.8754, - "step": 9730 - }, - { - "epoch": 0.92, - "learning_rate": 0.00018151451888403873, - "loss": 7.8568, - "step": 9740 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018149553995065477, - "loss": 7.8626, - "step": 9750 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018147656101727083, - "loss": 7.9, - "step": 9760 - }, - { - "epoch": 0.93, - "learning_rate": 0.0001814575820838869, - "loss": 7.8833, - "step": 9770 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018143860315050295, - "loss": 7.928, - "step": 9780 - }, - { - "epoch": 0.93, - "learning_rate": 0.000181419624217119, - "loss": 7.9062, - "step": 9790 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018140064528373505, - "loss": 7.9355, - "step": 9800 - }, - { - "epoch": 0.93, - "learning_rate": 0.0001813816663503511, - "loss": 7.8508, - "step": 9810 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018136268741696717, - "loss": 7.974, - "step": 9820 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018134370848358323, - "loss": 7.8582, - "step": 9830 - }, - { - "epoch": 0.93, - "learning_rate": 0.0001813247295501993, - "loss": 7.9101, - "step": 9840 - }, - { - "epoch": 0.93, - "learning_rate": 0.00018130575061681533, - "loss": 7.8904, - "step": 9850 - }, - { - "epoch": 0.94, - "learning_rate": 0.0001812867716834314, - "loss": 7.8692, - "step": 9860 - }, - { - "epoch": 0.94, - "learning_rate": 0.00018126779275004745, - "loss": 7.85, - "step": 9870 - }, - { - "epoch": 0.94, - "learning_rate": 0.0001812488138166635, - "loss": 7.8922, - "step": 9880 - }, - { - "epoch": 0.94, - "learning_rate": 0.00018122983488327957, - "loss": 7.8379, - "step": 9890 - }, - { - "epoch": 0.94, - "learning_rate": 0.00018121085594989563, - "loss": 7.9406, - "step": 9900 - }, - { - "epoch": 0.94, - "learning_rate": 0.00018119187701651167, - "loss": 7.8289, - "step": 9910 - }, - { - "epoch": 0.94, - "learning_rate": 0.00018117289808312773, - "loss": 7.8563, - "step": 9920 - }, - { - "epoch": 0.94, - "learning_rate": 0.0001811539191497438, - "loss": 7.834, - "step": 9930 - }, - { - "epoch": 0.94, - "learning_rate": 0.00018113494021635985, - "loss": 8.0085, - "step": 9940 - }, - { - "epoch": 0.94, - "learning_rate": 0.0001811159612829759, - "loss": 7.9363, - "step": 9950 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018109698234959195, - "loss": 7.9089, - "step": 9960 - }, - { - "epoch": 0.95, - "learning_rate": 0.000181078003416208, - "loss": 7.8335, - "step": 9970 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018105902448282407, - "loss": 7.8081, - "step": 9980 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018104004554944013, - "loss": 7.873, - "step": 9990 - }, - { - "epoch": 0.95, - "learning_rate": 0.0001810210666160562, - "loss": 7.8915, - "step": 10000 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018100208768267225, - "loss": 7.9186, - "step": 10010 - }, - { - "epoch": 0.95, - "learning_rate": 0.0001809831087492883, - "loss": 7.8812, - "step": 10020 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018096412981590435, - "loss": 7.9097, - "step": 10030 - }, - { - "epoch": 0.95, - "learning_rate": 0.0001809451508825204, - "loss": 7.9482, - "step": 10040 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018092617194913647, - "loss": 7.8154, - "step": 10050 - }, - { - "epoch": 0.95, - "learning_rate": 0.00018090719301575253, - "loss": 7.8891, - "step": 10060 - }, - { - "epoch": 0.96, - "learning_rate": 0.00018088821408236857, - "loss": 7.9105, - "step": 10070 - }, - { - "epoch": 0.96, - "learning_rate": 0.00018086923514898463, - "loss": 7.7747, - "step": 10080 - }, - { - "epoch": 0.96, - "learning_rate": 0.0001808502562156007, - "loss": 7.8896, - "step": 10090 - }, - { - "epoch": 0.96, - "learning_rate": 0.00018083127728221675, - "loss": 7.8707, - "step": 10100 - }, - { - "epoch": 0.96, - "learning_rate": 0.0001808122983488328, - "loss": 7.8592, - "step": 10110 - }, - { - "epoch": 0.96, - "learning_rate": 0.00018079331941544885, - "loss": 7.898, - "step": 10120 - }, - { - "epoch": 0.96, - "learning_rate": 0.0001807743404820649, - "loss": 7.9184, - "step": 10130 - }, - { - "epoch": 0.96, - "learning_rate": 0.00018075536154868097, - "loss": 7.8481, - "step": 10140 - }, - { - "epoch": 0.96, - "learning_rate": 0.00018073638261529703, - "loss": 7.8695, - "step": 10150 - }, - { - "epoch": 0.96, - "learning_rate": 0.0001807174036819131, - "loss": 7.8226, - "step": 10160 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018069842474852915, - "loss": 7.8663, - "step": 10170 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018067944581514519, - "loss": 7.9406, - "step": 10180 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018066046688176125, - "loss": 7.9082, - "step": 10190 - }, - { - "epoch": 0.97, - "learning_rate": 0.0001806414879483773, - "loss": 7.9394, - "step": 10200 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018062250901499337, - "loss": 7.9245, - "step": 10210 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018060353008160943, - "loss": 7.8805, - "step": 10220 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018058455114822546, - "loss": 7.9478, - "step": 10230 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018056557221484153, - "loss": 7.9049, - "step": 10240 - }, - { - "epoch": 0.97, - "learning_rate": 0.0001805465932814576, - "loss": 7.9602, - "step": 10250 - }, - { - "epoch": 0.97, - "learning_rate": 0.00018052761434807365, - "loss": 7.901, - "step": 10260 - }, - { - "epoch": 0.97, - "learning_rate": 0.0001805086354146897, - "loss": 7.9045, - "step": 10270 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018048965648130574, - "loss": 7.901, - "step": 10280 - }, - { - "epoch": 0.98, - "learning_rate": 0.0001804706775479218, - "loss": 7.8334, - "step": 10290 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018045169861453787, - "loss": 7.9435, - "step": 10300 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018043271968115393, - "loss": 7.8548, - "step": 10310 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018041374074777, - "loss": 7.7928, - "step": 10320 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018039476181438605, - "loss": 7.8831, - "step": 10330 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018037578288100208, - "loss": 7.8562, - "step": 10340 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018035680394761815, - "loss": 7.836, - "step": 10350 - }, - { - "epoch": 0.98, - "learning_rate": 0.0001803378250142342, - "loss": 7.8384, - "step": 10360 - }, - { - "epoch": 0.98, - "learning_rate": 0.00018031884608085027, - "loss": 7.9001, - "step": 10370 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018029986714746633, - "loss": 7.8501, - "step": 10380 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018028088821408236, - "loss": 7.8157, - "step": 10390 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018026190928069843, - "loss": 7.885, - "step": 10400 - }, - { - "epoch": 0.99, - "learning_rate": 0.0001802429303473145, - "loss": 7.9899, - "step": 10410 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018022395141393055, - "loss": 7.9357, - "step": 10420 - }, - { - "epoch": 0.99, - "learning_rate": 0.0001802049724805466, - "loss": 7.9355, - "step": 10430 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018018599354716267, - "loss": 7.9414, - "step": 10440 - }, - { - "epoch": 0.99, - "learning_rate": 0.0001801670146137787, - "loss": 7.9003, - "step": 10450 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018014803568039477, - "loss": 7.8847, - "step": 10460 - }, - { - "epoch": 0.99, - "learning_rate": 0.00018012905674701083, - "loss": 7.8374, - "step": 10470 - }, - { - "epoch": 0.99, - "learning_rate": 0.0001801100778136269, - "loss": 7.8259, - "step": 10480 - }, - { - "epoch": 1.0, - "learning_rate": 0.00018009109888024295, - "loss": 7.8256, - "step": 10490 - }, - { - "epoch": 1.0, - "learning_rate": 0.00018007211994685898, - "loss": 7.8138, - "step": 10500 - }, - { - "epoch": 1.0, - "learning_rate": 0.00018005314101347505, - "loss": 7.7975, - "step": 10510 - }, - { - "epoch": 1.0, - "learning_rate": 0.0001800341620800911, - "loss": 7.7755, - "step": 10520 - }, - { - "epoch": 1.0, - "learning_rate": 0.00018001518314670717, - "loss": 7.9293, - "step": 10530 - }, - { - "epoch": 1.0, - "learning_rate": 0.00017999620421332323, - "loss": 7.8856, - "step": 10540 - }, - { - "epoch": 1.0, - "learning_rate": 0.00017997722527993926, - "loss": 7.8915, - "step": 10550 - }, - { - "epoch": 1.0, - "learning_rate": 0.00017995824634655532, - "loss": 7.891, - "step": 10560 - }, - { - "epoch": 1.0, - "learning_rate": 0.00017993926741317139, - "loss": 7.8232, - "step": 10570 - }, - { - "epoch": 1.0, - "learning_rate": 0.00017992028847978745, - "loss": 7.9083, - "step": 10580 - }, - { - "epoch": 1.0, - "learning_rate": 0.0001799013095464035, - "loss": 7.9282, - "step": 10590 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017988233061301957, - "loss": 7.8374, - "step": 10600 - }, - { - "epoch": 1.01, - "learning_rate": 0.0001798633516796356, - "loss": 7.9513, - "step": 10610 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017984437274625167, - "loss": 7.8343, - "step": 10620 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017982539381286773, - "loss": 7.7961, - "step": 10630 - }, - { - "epoch": 1.01, - "learning_rate": 0.0001798064148794838, - "loss": 7.9153, - "step": 10640 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017978743594609985, - "loss": 7.8247, - "step": 10650 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017976845701271588, - "loss": 7.8539, - "step": 10660 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017974947807933194, - "loss": 7.9135, - "step": 10670 - }, - { - "epoch": 1.01, - "learning_rate": 0.000179730499145948, - "loss": 7.8577, - "step": 10680 - }, - { - "epoch": 1.01, - "learning_rate": 0.00017971152021256407, - "loss": 7.8242, - "step": 10690 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017969254127918013, - "loss": 7.8424, - "step": 10700 - }, - { - "epoch": 1.02, - "learning_rate": 0.0001796735623457962, - "loss": 7.8466, - "step": 10710 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017965458341241222, - "loss": 7.8525, - "step": 10720 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017963560447902828, - "loss": 7.9175, - "step": 10730 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017961662554564435, - "loss": 7.8664, - "step": 10740 - }, - { - "epoch": 1.02, - "learning_rate": 0.0001795976466122604, - "loss": 7.9202, - "step": 10750 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017957866767887647, - "loss": 7.8973, - "step": 10760 - }, - { - "epoch": 1.02, - "learning_rate": 0.0001795596887454925, - "loss": 7.8988, - "step": 10770 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017954070981210856, - "loss": 7.8721, - "step": 10780 - }, - { - "epoch": 1.02, - "learning_rate": 0.00017952173087872463, - "loss": 7.8283, - "step": 10790 - }, - { - "epoch": 1.02, - "learning_rate": 0.0001795027519453407, - "loss": 7.8389, - "step": 10800 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017948377301195675, - "loss": 7.8965, - "step": 10810 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017946479407857278, - "loss": 7.7578, - "step": 10820 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017944581514518884, - "loss": 7.8069, - "step": 10830 - }, - { - "epoch": 1.03, - "learning_rate": 0.0001794268362118049, - "loss": 7.8509, - "step": 10840 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017940785727842097, - "loss": 8.0003, - "step": 10850 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017938887834503703, - "loss": 7.8455, - "step": 10860 - }, - { - "epoch": 1.03, - "learning_rate": 0.0001793698994116531, - "loss": 7.8861, - "step": 10870 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017935092047826912, - "loss": 7.996, - "step": 10880 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017933194154488518, - "loss": 7.8577, - "step": 10890 - }, - { - "epoch": 1.03, - "learning_rate": 0.00017931296261150125, - "loss": 7.8246, - "step": 10900 - }, - { - "epoch": 1.04, - "learning_rate": 0.0001792939836781173, - "loss": 7.7759, - "step": 10910 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017927500474473337, - "loss": 7.817, - "step": 10920 - }, - { - "epoch": 1.04, - "learning_rate": 0.0001792560258113494, - "loss": 7.8713, - "step": 10930 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017923704687796546, - "loss": 7.8883, - "step": 10940 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017921806794458152, - "loss": 7.8941, - "step": 10950 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017919908901119759, - "loss": 7.991, - "step": 10960 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017918011007781365, - "loss": 7.9228, - "step": 10970 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017916113114442968, - "loss": 7.8441, - "step": 10980 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017914215221104574, - "loss": 7.9454, - "step": 10990 - }, - { - "epoch": 1.04, - "learning_rate": 0.0001791231732776618, - "loss": 7.9598, - "step": 11000 - }, - { - "epoch": 1.04, - "learning_rate": 0.00017910419434427787, - "loss": 7.9049, - "step": 11010 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017908521541089393, - "loss": 7.8028, - "step": 11020 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017906623647751, - "loss": 7.8796, - "step": 11030 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017904725754412602, - "loss": 7.8912, - "step": 11040 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017902827861074208, - "loss": 7.9389, - "step": 11050 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017900929967735814, - "loss": 7.8783, - "step": 11060 - }, - { - "epoch": 1.05, - "learning_rate": 0.0001789903207439742, - "loss": 7.7984, - "step": 11070 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017897134181059027, - "loss": 7.8839, - "step": 11080 - }, - { - "epoch": 1.05, - "learning_rate": 0.0001789523628772063, - "loss": 7.9485, - "step": 11090 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017893338394382236, - "loss": 7.8604, - "step": 11100 - }, - { - "epoch": 1.05, - "learning_rate": 0.00017891440501043842, - "loss": 7.8475, - "step": 11110 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017889542607705448, - "loss": 7.8361, - "step": 11120 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017887644714367055, - "loss": 7.9018, - "step": 11130 - }, - { - "epoch": 1.06, - "learning_rate": 0.0001788574682102866, - "loss": 7.8377, - "step": 11140 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017883848927690264, - "loss": 7.9123, - "step": 11150 - }, - { - "epoch": 1.06, - "learning_rate": 0.0001788195103435187, - "loss": 7.8456, - "step": 11160 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017880053141013476, - "loss": 8.0092, - "step": 11170 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017878155247675083, - "loss": 7.9217, - "step": 11180 - }, - { - "epoch": 1.06, - "learning_rate": 0.0001787625735433669, - "loss": 7.8548, - "step": 11190 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017874359460998292, - "loss": 7.8086, - "step": 11200 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017872461567659898, - "loss": 7.9422, - "step": 11210 - }, - { - "epoch": 1.06, - "learning_rate": 0.00017870563674321504, - "loss": 7.8733, - "step": 11220 - }, - { - "epoch": 1.07, - "learning_rate": 0.0001786866578098311, - "loss": 7.9344, - "step": 11230 - }, - { - "epoch": 1.07, - "learning_rate": 0.00017866767887644717, - "loss": 7.8834, - "step": 11240 - }, - { - "epoch": 1.07, - "learning_rate": 0.0001786486999430632, - "loss": 7.9546, - "step": 11250 - }, - { - "epoch": 1.07, - "learning_rate": 0.00017862972100967926, - "loss": 7.8716, - "step": 11260 - }, - { - "epoch": 1.07, - "learning_rate": 0.00017861074207629532, - "loss": 7.8437, - "step": 11270 - }, - { - "epoch": 1.07, - "learning_rate": 0.00017859176314291138, - "loss": 7.9324, - "step": 11280 - }, - { - "epoch": 1.07, - "learning_rate": 0.00017857278420952745, - "loss": 7.9234, - "step": 11290 - }, - { - "epoch": 1.07, - "learning_rate": 0.0001785538052761435, - "loss": 7.9025, - "step": 11300 - }, - { - "epoch": 1.07, - "learning_rate": 0.00017853482634275954, - "loss": 7.9273, - "step": 11310 - }, - { - "epoch": 1.07, - "learning_rate": 0.0001785158474093756, - "loss": 7.921, - "step": 11320 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017849686847599166, - "loss": 7.8048, - "step": 11330 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017847788954260772, - "loss": 7.9139, - "step": 11340 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017845891060922379, - "loss": 7.8946, - "step": 11350 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017843993167583982, - "loss": 7.8894, - "step": 11360 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017842095274245588, - "loss": 7.929, - "step": 11370 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017840197380907194, - "loss": 7.8031, - "step": 11380 - }, - { - "epoch": 1.08, - "learning_rate": 0.000178382994875688, - "loss": 7.8458, - "step": 11390 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017836401594230407, - "loss": 7.7878, - "step": 11400 - }, - { - "epoch": 1.08, - "learning_rate": 0.0001783450370089201, - "loss": 7.8181, - "step": 11410 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017832605807553616, - "loss": 7.9258, - "step": 11420 - }, - { - "epoch": 1.08, - "learning_rate": 0.00017830707914215222, - "loss": 7.8423, - "step": 11430 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017828810020876828, - "loss": 7.8946, - "step": 11440 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017826912127538434, - "loss": 7.8909, - "step": 11450 - }, - { - "epoch": 1.09, - "learning_rate": 0.0001782501423420004, - "loss": 7.9299, - "step": 11460 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017823116340861644, - "loss": 7.8795, - "step": 11470 - }, - { - "epoch": 1.09, - "learning_rate": 0.0001782121844752325, - "loss": 7.9029, - "step": 11480 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017819320554184856, - "loss": 7.9417, - "step": 11490 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017817422660846462, - "loss": 7.8398, - "step": 11500 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017815524767508069, - "loss": 7.8438, - "step": 11510 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017813626874169672, - "loss": 7.8947, - "step": 11520 - }, - { - "epoch": 1.09, - "learning_rate": 0.00017811728980831278, - "loss": 7.8327, - "step": 11530 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017809831087492884, - "loss": 7.8483, - "step": 11540 - }, - { - "epoch": 1.1, - "learning_rate": 0.0001780793319415449, - "loss": 7.8825, - "step": 11550 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017806035300816096, - "loss": 7.9321, - "step": 11560 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017804137407477703, - "loss": 7.8518, - "step": 11570 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017802239514139306, - "loss": 7.8816, - "step": 11580 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017800341620800912, - "loss": 7.8593, - "step": 11590 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017798443727462518, - "loss": 8.0316, - "step": 11600 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017796545834124124, - "loss": 7.8125, - "step": 11610 - }, - { - "epoch": 1.1, - "learning_rate": 0.0001779464794078573, - "loss": 7.8441, - "step": 11620 - }, - { - "epoch": 1.1, - "learning_rate": 0.00017792750047447334, - "loss": 7.8297, - "step": 11630 - }, - { - "epoch": 1.1, - "learning_rate": 0.0001779085215410894, - "loss": 7.8087, - "step": 11640 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017788954260770546, - "loss": 7.8674, - "step": 11650 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017787056367432152, - "loss": 7.8526, - "step": 11660 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017785158474093758, - "loss": 7.8793, - "step": 11670 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017783260580755362, - "loss": 7.895, - "step": 11680 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017781362687416968, - "loss": 7.7946, - "step": 11690 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017779464794078574, - "loss": 7.8457, - "step": 11700 - }, - { - "epoch": 1.11, - "learning_rate": 0.0001777756690074018, - "loss": 7.8713, - "step": 11710 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017775669007401786, - "loss": 7.8753, - "step": 11720 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017773771114063392, - "loss": 7.8126, - "step": 11730 - }, - { - "epoch": 1.11, - "learning_rate": 0.00017771873220724996, - "loss": 7.7843, - "step": 11740 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017769975327386602, - "loss": 7.9056, - "step": 11750 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017768077434048208, - "loss": 7.7945, - "step": 11760 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017766179540709814, - "loss": 7.9491, - "step": 11770 - }, - { - "epoch": 1.12, - "learning_rate": 0.0001776428164737142, - "loss": 7.8972, - "step": 11780 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017762383754033024, - "loss": 7.889, - "step": 11790 - }, - { - "epoch": 1.12, - "learning_rate": 0.0001776048586069463, - "loss": 7.7927, - "step": 11800 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017758587967356236, - "loss": 7.8745, - "step": 11810 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017756690074017842, - "loss": 8.038, - "step": 11820 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017754792180679448, - "loss": 7.8502, - "step": 11830 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017752894287341052, - "loss": 7.8414, - "step": 11840 - }, - { - "epoch": 1.12, - "learning_rate": 0.00017750996394002658, - "loss": 7.8466, - "step": 11850 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017749098500664264, - "loss": 7.8095, - "step": 11860 - }, - { - "epoch": 1.13, - "learning_rate": 0.0001774720060732587, - "loss": 7.9301, - "step": 11870 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017745302713987476, - "loss": 7.8592, - "step": 11880 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017743404820649082, - "loss": 7.9105, - "step": 11890 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017741506927310686, - "loss": 7.8377, - "step": 11900 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017739609033972292, - "loss": 7.8799, - "step": 11910 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017737711140633898, - "loss": 7.8186, - "step": 11920 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017735813247295504, - "loss": 7.8921, - "step": 11930 - }, - { - "epoch": 1.13, - "learning_rate": 0.0001773391535395711, - "loss": 7.8325, - "step": 11940 - }, - { - "epoch": 1.13, - "learning_rate": 0.00017732017460618714, - "loss": 7.8533, - "step": 11950 - }, - { - "epoch": 1.13, - "learning_rate": 0.0001773011956728032, - "loss": 7.7791, - "step": 11960 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017728221673941926, - "loss": 7.841, - "step": 11970 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017726323780603532, - "loss": 7.8712, - "step": 11980 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017724425887265138, - "loss": 7.8939, - "step": 11990 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017722527993926744, - "loss": 7.8825, - "step": 12000 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017720630100588348, - "loss": 7.9506, - "step": 12010 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017718732207249954, - "loss": 7.855, - "step": 12020 - }, - { - "epoch": 1.14, - "learning_rate": 0.0001771683431391156, - "loss": 7.7315, - "step": 12030 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017714936420573166, - "loss": 7.791, - "step": 12040 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017713038527234772, - "loss": 7.8175, - "step": 12050 - }, - { - "epoch": 1.14, - "learning_rate": 0.00017711140633896376, - "loss": 7.9103, - "step": 12060 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017709242740557982, - "loss": 7.7869, - "step": 12070 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017707344847219588, - "loss": 7.952, - "step": 12080 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017705446953881194, - "loss": 7.8767, - "step": 12090 - }, - { - "epoch": 1.15, - "learning_rate": 0.000177035490605428, - "loss": 7.7993, - "step": 12100 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017701651167204404, - "loss": 7.7995, - "step": 12110 - }, - { - "epoch": 1.15, - "learning_rate": 0.0001769975327386601, - "loss": 7.869, - "step": 12120 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017697855380527616, - "loss": 7.8906, - "step": 12130 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017695957487189222, - "loss": 7.9035, - "step": 12140 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017694059593850828, - "loss": 7.9041, - "step": 12150 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017692161700512434, - "loss": 7.8036, - "step": 12160 - }, - { - "epoch": 1.15, - "learning_rate": 0.00017690263807174038, - "loss": 7.9532, - "step": 12170 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017688365913835644, - "loss": 7.8963, - "step": 12180 - }, - { - "epoch": 1.16, - "learning_rate": 0.0001768646802049725, - "loss": 7.876, - "step": 12190 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017684570127158856, - "loss": 7.8861, - "step": 12200 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017682672233820462, - "loss": 7.9612, - "step": 12210 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017680774340482066, - "loss": 7.8485, - "step": 12220 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017678876447143672, - "loss": 7.8881, - "step": 12230 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017676978553805278, - "loss": 7.8058, - "step": 12240 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017675080660466884, - "loss": 7.8773, - "step": 12250 - }, - { - "epoch": 1.16, - "learning_rate": 0.0001767318276712849, - "loss": 7.8872, - "step": 12260 - }, - { - "epoch": 1.16, - "learning_rate": 0.00017671284873790094, - "loss": 7.8364, - "step": 12270 - }, - { - "epoch": 1.17, - "learning_rate": 0.000176693869804517, - "loss": 7.8533, - "step": 12280 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017667489087113306, - "loss": 7.9749, - "step": 12290 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017665591193774912, - "loss": 7.9904, - "step": 12300 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017663693300436518, - "loss": 7.8272, - "step": 12310 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017661795407098124, - "loss": 7.8786, - "step": 12320 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017659897513759728, - "loss": 7.8607, - "step": 12330 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017657999620421334, - "loss": 7.9109, - "step": 12340 - }, - { - "epoch": 1.17, - "learning_rate": 0.0001765610172708294, - "loss": 7.8558, - "step": 12350 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017654203833744546, - "loss": 8.0308, - "step": 12360 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017652305940406152, - "loss": 7.8292, - "step": 12370 - }, - { - "epoch": 1.17, - "learning_rate": 0.00017650408047067756, - "loss": 7.927, - "step": 12380 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017648510153729362, - "loss": 8.009, - "step": 12390 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017646612260390968, - "loss": 7.8794, - "step": 12400 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017644714367052574, - "loss": 7.9297, - "step": 12410 - }, - { - "epoch": 1.18, - "learning_rate": 0.0001764281647371418, - "loss": 7.9262, - "step": 12420 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017640918580375786, - "loss": 7.89, - "step": 12430 - }, - { - "epoch": 1.18, - "learning_rate": 0.0001763902068703739, - "loss": 7.8293, - "step": 12440 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017637122793698996, - "loss": 8.0054, - "step": 12450 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017635224900360602, - "loss": 7.9354, - "step": 12460 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017633327007022208, - "loss": 7.8386, - "step": 12470 - }, - { - "epoch": 1.18, - "learning_rate": 0.00017631429113683814, - "loss": 7.9373, - "step": 12480 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017629531220345418, - "loss": 7.9175, - "step": 12490 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017627633327007024, - "loss": 7.8685, - "step": 12500 - }, - { - "epoch": 1.19, - "learning_rate": 0.0001762573543366863, - "loss": 7.8426, - "step": 12510 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017623837540330236, - "loss": 7.9382, - "step": 12520 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017621939646991842, - "loss": 7.8224, - "step": 12530 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017620041753653445, - "loss": 7.864, - "step": 12540 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017618143860315052, - "loss": 7.9494, - "step": 12550 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017616245966976658, - "loss": 7.8304, - "step": 12560 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017614348073638264, - "loss": 8.0052, - "step": 12570 - }, - { - "epoch": 1.19, - "learning_rate": 0.0001761245018029987, - "loss": 7.942, - "step": 12580 - }, - { - "epoch": 1.19, - "learning_rate": 0.00017610552286961473, - "loss": 7.8737, - "step": 12590 - }, - { - "epoch": 1.2, - "learning_rate": 0.0001760865439362308, - "loss": 7.8656, - "step": 12600 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017606756500284686, - "loss": 7.9479, - "step": 12610 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017604858606946292, - "loss": 7.8352, - "step": 12620 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017602960713607898, - "loss": 7.9308, - "step": 12630 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017601062820269504, - "loss": 7.8263, - "step": 12640 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017599164926931107, - "loss": 7.8724, - "step": 12650 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017597267033592714, - "loss": 7.8569, - "step": 12660 - }, - { - "epoch": 1.2, - "learning_rate": 0.0001759536914025432, - "loss": 7.8869, - "step": 12670 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017593471246915926, - "loss": 7.9312, - "step": 12680 - }, - { - "epoch": 1.2, - "learning_rate": 0.00017591573353577532, - "loss": 7.8857, - "step": 12690 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017589675460239135, - "loss": 7.8912, - "step": 12700 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017587777566900741, - "loss": 7.8062, - "step": 12710 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017585879673562348, - "loss": 7.8309, - "step": 12720 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017583981780223954, - "loss": 7.8373, - "step": 12730 - }, - { - "epoch": 1.21, - "learning_rate": 0.0001758208388688556, - "loss": 7.9202, - "step": 12740 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017580185993547163, - "loss": 7.8688, - "step": 12750 - }, - { - "epoch": 1.21, - "learning_rate": 0.0001757828810020877, - "loss": 7.8819, - "step": 12760 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017576390206870376, - "loss": 7.9604, - "step": 12770 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017574492313531982, - "loss": 7.8108, - "step": 12780 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017572594420193588, - "loss": 7.8294, - "step": 12790 - }, - { - "epoch": 1.21, - "learning_rate": 0.00017570696526855194, - "loss": 7.8233, - "step": 12800 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017568798633516797, - "loss": 7.8958, - "step": 12810 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017566900740178403, - "loss": 7.9881, - "step": 12820 - }, - { - "epoch": 1.22, - "learning_rate": 0.0001756500284684001, - "loss": 7.884, - "step": 12830 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017563104953501616, - "loss": 7.8286, - "step": 12840 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017561207060163222, - "loss": 7.8803, - "step": 12850 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017559309166824825, - "loss": 7.8043, - "step": 12860 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017557411273486431, - "loss": 7.8647, - "step": 12870 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017555513380148038, - "loss": 7.918, - "step": 12880 - }, - { - "epoch": 1.22, - "learning_rate": 0.00017553615486809644, - "loss": 7.8076, - "step": 12890 - }, - { - "epoch": 1.22, - "learning_rate": 0.0001755171759347125, - "loss": 7.9134, - "step": 12900 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017549819700132853, - "loss": 7.9278, - "step": 12910 - }, - { - "epoch": 1.23, - "learning_rate": 0.0001754792180679446, - "loss": 7.8538, - "step": 12920 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017546023913456065, - "loss": 7.8656, - "step": 12930 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017544126020117672, - "loss": 7.9321, - "step": 12940 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017542228126779278, - "loss": 7.9646, - "step": 12950 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017540330233440884, - "loss": 7.9302, - "step": 12960 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017538432340102487, - "loss": 7.8743, - "step": 12970 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017536534446764093, - "loss": 7.8306, - "step": 12980 - }, - { - "epoch": 1.23, - "learning_rate": 0.000175346365534257, - "loss": 7.8704, - "step": 12990 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017532738660087306, - "loss": 7.8513, - "step": 13000 - }, - { - "epoch": 1.23, - "learning_rate": 0.00017530840766748912, - "loss": 7.8965, - "step": 13010 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017528942873410515, - "loss": 7.9604, - "step": 13020 - }, - { - "epoch": 1.24, - "learning_rate": 0.0001752704498007212, - "loss": 7.9551, - "step": 13030 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017525147086733727, - "loss": 7.915, - "step": 13040 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017523249193395334, - "loss": 7.856, - "step": 13050 - }, - { - "epoch": 1.24, - "learning_rate": 0.0001752135130005694, - "loss": 7.8559, - "step": 13060 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017519453406718543, - "loss": 7.8043, - "step": 13070 - }, - { - "epoch": 1.24, - "learning_rate": 0.0001751755551338015, - "loss": 7.9493, - "step": 13080 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017515657620041755, - "loss": 7.7208, - "step": 13090 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017513759726703361, - "loss": 7.8285, - "step": 13100 - }, - { - "epoch": 1.24, - "learning_rate": 0.00017511861833364968, - "loss": 7.8769, - "step": 13110 - }, - { - "epoch": 1.25, - "learning_rate": 0.0001750996394002657, - "loss": 7.9413, - "step": 13120 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017508066046688177, - "loss": 7.9337, - "step": 13130 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017506168153349783, - "loss": 7.8663, - "step": 13140 - }, - { - "epoch": 1.25, - "learning_rate": 0.0001750427026001139, - "loss": 7.8629, - "step": 13150 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017502372366672996, - "loss": 7.9704, - "step": 13160 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017500474473334602, - "loss": 7.9988, - "step": 13170 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017498576579996205, - "loss": 7.9194, - "step": 13180 - }, - { - "epoch": 1.25, - "learning_rate": 0.0001749667868665781, - "loss": 7.9008, - "step": 13190 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017494780793319417, - "loss": 7.9587, - "step": 13200 - }, - { - "epoch": 1.25, - "learning_rate": 0.00017492882899981023, - "loss": 7.8987, - "step": 13210 - }, - { - "epoch": 1.25, - "learning_rate": 0.0001749098500664263, - "loss": 7.9129, - "step": 13220 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017489087113304233, - "loss": 7.8203, - "step": 13230 - }, - { - "epoch": 1.26, - "learning_rate": 0.0001748718921996584, - "loss": 7.7843, - "step": 13240 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017485291326627445, - "loss": 7.8528, - "step": 13250 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017483393433289051, - "loss": 7.9028, - "step": 13260 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017481495539950658, - "loss": 7.9222, - "step": 13270 - }, - { - "epoch": 1.26, - "learning_rate": 0.0001747959764661226, - "loss": 7.9008, - "step": 13280 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017477699753273867, - "loss": 7.7545, - "step": 13290 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017475801859935473, - "loss": 7.8473, - "step": 13300 - }, - { - "epoch": 1.26, - "learning_rate": 0.0001747390396659708, - "loss": 7.8514, - "step": 13310 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017472006073258685, - "loss": 7.8281, - "step": 13320 - }, - { - "epoch": 1.26, - "learning_rate": 0.00017470108179920292, - "loss": 7.9274, - "step": 13330 - }, - { - "epoch": 1.27, - "learning_rate": 0.00017468210286581895, - "loss": 7.8855, - "step": 13340 - }, - { - "epoch": 1.27, - "learning_rate": 0.000174663123932435, - "loss": 7.9234, - "step": 13350 - }, - { - "epoch": 1.27, - "learning_rate": 0.00017464414499905107, - "loss": 7.9236, - "step": 13360 - }, - { - "epoch": 1.27, - "learning_rate": 0.00017462516606566713, - "loss": 7.8509, - "step": 13370 - }, - { - "epoch": 1.27, - "learning_rate": 0.0001746061871322832, - "loss": 7.9477, - "step": 13380 - }, - { - "epoch": 1.27, - "learning_rate": 0.00017458720819889923, - "loss": 7.8218, - "step": 13390 - }, - { - "epoch": 1.27, - "learning_rate": 0.0001745682292655153, - "loss": 7.9648, - "step": 13400 - }, - { - "epoch": 1.27, - "learning_rate": 0.00017454925033213135, - "loss": 7.9526, - "step": 13410 - }, - { - "epoch": 1.27, - "learning_rate": 0.0001745302713987474, - "loss": 7.8842, - "step": 13420 - }, - { - "epoch": 1.27, - "learning_rate": 0.00017451129246536347, - "loss": 7.9777, - "step": 13430 - }, - { - "epoch": 1.28, - "learning_rate": 0.0001744923135319795, - "loss": 7.9093, - "step": 13440 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017447333459859557, - "loss": 7.8409, - "step": 13450 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017445435566521163, - "loss": 7.8896, - "step": 13460 - }, - { - "epoch": 1.28, - "learning_rate": 0.0001744353767318277, - "loss": 7.8449, - "step": 13470 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017441639779844375, - "loss": 7.8894, - "step": 13480 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017439741886505982, - "loss": 7.9111, - "step": 13490 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017437843993167585, - "loss": 7.8638, - "step": 13500 - }, - { - "epoch": 1.28, - "learning_rate": 0.0001743594609982919, - "loss": 7.8821, - "step": 13510 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017434048206490797, - "loss": 7.8377, - "step": 13520 - }, - { - "epoch": 1.28, - "learning_rate": 0.00017432150313152403, - "loss": 7.9781, - "step": 13530 - }, - { - "epoch": 1.28, - "learning_rate": 0.0001743025241981401, - "loss": 7.8674, - "step": 13540 - }, - { - "epoch": 1.29, - "learning_rate": 0.00017428354526475613, - "loss": 7.8371, - "step": 13550 - }, - { - "epoch": 1.29, - "learning_rate": 0.0001742645663313722, - "loss": 7.8285, - "step": 13560 - }, - { - "epoch": 1.29, - "learning_rate": 0.00017424558739798825, - "loss": 7.8862, - "step": 13570 - }, - { - "epoch": 1.29, - "learning_rate": 0.0001742266084646043, - "loss": 7.883, - "step": 13580 - }, - { - "epoch": 1.29, - "learning_rate": 0.00017420762953122037, - "loss": 7.9047, - "step": 13590 - }, - { - "epoch": 1.29, - "learning_rate": 0.0001741886505978364, - "loss": 7.9901, - "step": 13600 - }, - { - "epoch": 1.29, - "learning_rate": 0.00017416967166445247, - "loss": 7.9442, - "step": 13610 - }, - { - "epoch": 1.29, - "learning_rate": 0.00017415069273106853, - "loss": 7.8501, - "step": 13620 - }, - { - "epoch": 1.29, - "learning_rate": 0.0001741317137976846, - "loss": 7.7991, - "step": 13630 - }, - { - "epoch": 1.29, - "learning_rate": 0.00017411273486430065, - "loss": 7.7366, - "step": 13640 - }, - { - "epoch": 1.3, - "learning_rate": 0.0001740937559309167, - "loss": 7.9723, - "step": 13650 - }, - { - "epoch": 1.3, - "learning_rate": 0.00017407477699753275, - "loss": 7.9549, - "step": 13660 - }, - { - "epoch": 1.3, - "learning_rate": 0.0001740557980641488, - "loss": 7.8386, - "step": 13670 - }, - { - "epoch": 1.3, - "learning_rate": 0.00017403681913076487, - "loss": 7.816, - "step": 13680 - }, - { - "epoch": 1.3, - "learning_rate": 0.00017401784019738093, - "loss": 7.7909, - "step": 13690 - }, - { - "epoch": 1.3, - "learning_rate": 0.000173998861263997, - "loss": 7.859, - "step": 13700 - }, - { - "epoch": 1.3, - "learning_rate": 0.00017397988233061303, - "loss": 7.9202, - "step": 13710 - }, - { - "epoch": 1.3, - "learning_rate": 0.0001739609033972291, - "loss": 7.8609, - "step": 13720 - }, - { - "epoch": 1.3, - "learning_rate": 0.00017394192446384515, - "loss": 8.0131, - "step": 13730 - }, - { - "epoch": 1.3, - "learning_rate": 0.0001739229455304612, - "loss": 7.9343, - "step": 13740 - }, - { - "epoch": 1.3, - "learning_rate": 0.00017390396659707727, - "loss": 7.8946, - "step": 13750 - }, - { - "epoch": 1.31, - "learning_rate": 0.0001738849876636933, - "loss": 7.8813, - "step": 13760 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017386600873030937, - "loss": 7.9063, - "step": 13770 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017384702979692543, - "loss": 7.806, - "step": 13780 - }, - { - "epoch": 1.31, - "learning_rate": 0.0001738280508635415, - "loss": 7.8988, - "step": 13790 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017380907193015755, - "loss": 7.9128, - "step": 13800 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017379009299677359, - "loss": 7.9373, - "step": 13810 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017377111406338965, - "loss": 7.8923, - "step": 13820 - }, - { - "epoch": 1.31, - "learning_rate": 0.0001737521351300057, - "loss": 7.91, - "step": 13830 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017373315619662177, - "loss": 7.8137, - "step": 13840 - }, - { - "epoch": 1.31, - "learning_rate": 0.00017371417726323783, - "loss": 7.8817, - "step": 13850 - }, - { - "epoch": 1.32, - "learning_rate": 0.0001736951983298539, - "loss": 7.874, - "step": 13860 - }, - { - "epoch": 1.32, - "learning_rate": 0.00017367621939646993, - "loss": 7.8659, - "step": 13870 - }, - { - "epoch": 1.32, - "learning_rate": 0.000173657240463086, - "loss": 7.9372, - "step": 13880 - }, - { - "epoch": 1.32, - "learning_rate": 0.00017363826152970205, - "loss": 7.7769, - "step": 13890 - }, - { - "epoch": 1.32, - "learning_rate": 0.0001736192825963181, - "loss": 7.9974, - "step": 13900 - }, - { - "epoch": 1.32, - "learning_rate": 0.00017360030366293417, - "loss": 7.7119, - "step": 13910 - }, - { - "epoch": 1.32, - "learning_rate": 0.0001735813247295502, - "loss": 7.9405, - "step": 13920 - }, - { - "epoch": 1.32, - "learning_rate": 0.00017356234579616627, - "loss": 7.8877, - "step": 13930 - }, - { - "epoch": 1.32, - "learning_rate": 0.00017354336686278233, - "loss": 7.8819, - "step": 13940 - }, - { - "epoch": 1.32, - "learning_rate": 0.0001735243879293984, - "loss": 7.9628, - "step": 13950 - }, - { - "epoch": 1.32, - "learning_rate": 0.00017350540899601445, - "loss": 7.9056, - "step": 13960 - }, - { - "epoch": 1.33, - "learning_rate": 0.00017348643006263049, - "loss": 7.7774, - "step": 13970 - }, - { - "epoch": 1.33, - "learning_rate": 0.00017346745112924655, - "loss": 7.9571, - "step": 13980 - }, - { - "epoch": 1.33, - "learning_rate": 0.0001734484721958626, - "loss": 7.8876, - "step": 13990 - }, - { - "epoch": 1.33, - "learning_rate": 0.00017342949326247867, - "loss": 7.8631, - "step": 14000 - }, - { - "epoch": 1.33, - "learning_rate": 0.00017341051432909473, - "loss": 7.8511, - "step": 14010 - }, - { - "epoch": 1.33, - "learning_rate": 0.0001733915353957108, - "loss": 7.8158, - "step": 14020 - }, - { - "epoch": 1.33, - "learning_rate": 0.00017337255646232683, - "loss": 7.8307, - "step": 14030 - }, - { - "epoch": 1.33, - "learning_rate": 0.0001733535775289429, - "loss": 7.8747, - "step": 14040 - }, - { - "epoch": 1.33, - "learning_rate": 0.00017333459859555895, - "loss": 7.8938, - "step": 14050 - }, - { - "epoch": 1.33, - "learning_rate": 0.000173315619662175, - "loss": 7.9864, - "step": 14060 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017329664072879107, - "loss": 7.8846, - "step": 14070 - }, - { - "epoch": 1.34, - "learning_rate": 0.0001732776617954071, - "loss": 7.7599, - "step": 14080 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017325868286202317, - "loss": 7.8753, - "step": 14090 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017323970392863923, - "loss": 7.8896, - "step": 14100 - }, - { - "epoch": 1.34, - "learning_rate": 0.0001732207249952553, - "loss": 7.9112, - "step": 14110 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017320174606187135, - "loss": 7.8636, - "step": 14120 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017318276712848738, - "loss": 7.8808, - "step": 14130 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017316378819510345, - "loss": 7.9008, - "step": 14140 - }, - { - "epoch": 1.34, - "learning_rate": 0.0001731448092617195, - "loss": 7.828, - "step": 14150 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017312583032833557, - "loss": 7.9159, - "step": 14160 - }, - { - "epoch": 1.34, - "learning_rate": 0.00017310685139495163, - "loss": 8.0473, - "step": 14170 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017308787246156766, - "loss": 7.9296, - "step": 14180 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017306889352818372, - "loss": 7.905, - "step": 14190 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017304991459479979, - "loss": 7.8447, - "step": 14200 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017303093566141585, - "loss": 7.8995, - "step": 14210 - }, - { - "epoch": 1.35, - "learning_rate": 0.0001730119567280319, - "loss": 7.9104, - "step": 14220 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017299297779464797, - "loss": 7.8408, - "step": 14230 - }, - { - "epoch": 1.35, - "learning_rate": 0.000172973998861264, - "loss": 7.8335, - "step": 14240 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017295501992788007, - "loss": 7.7993, - "step": 14250 - }, - { - "epoch": 1.35, - "learning_rate": 0.00017293604099449613, - "loss": 7.8379, - "step": 14260 - }, - { - "epoch": 1.35, - "learning_rate": 0.0001729170620611122, - "loss": 7.8835, - "step": 14270 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017289808312772825, - "loss": 7.7236, - "step": 14280 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017287910419434428, - "loss": 7.9565, - "step": 14290 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017286012526096034, - "loss": 7.9173, - "step": 14300 - }, - { - "epoch": 1.36, - "learning_rate": 0.0001728411463275764, - "loss": 7.8003, - "step": 14310 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017282216739419247, - "loss": 7.9398, - "step": 14320 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017280318846080853, - "loss": 7.8766, - "step": 14330 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017278420952742456, - "loss": 7.8843, - "step": 14340 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017276523059404062, - "loss": 7.7653, - "step": 14350 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017274625166065669, - "loss": 7.8396, - "step": 14360 - }, - { - "epoch": 1.36, - "learning_rate": 0.00017272727272727275, - "loss": 7.9473, - "step": 14370 - }, - { - "epoch": 1.36, - "learning_rate": 0.0001727082937938888, - "loss": 7.8679, - "step": 14380 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017268931486050487, - "loss": 7.9527, - "step": 14390 - }, - { - "epoch": 1.37, - "learning_rate": 0.0001726703359271209, - "loss": 7.9523, - "step": 14400 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017265135699373696, - "loss": 7.8998, - "step": 14410 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017263237806035303, - "loss": 7.9366, - "step": 14420 - }, - { - "epoch": 1.37, - "learning_rate": 0.0001726133991269691, - "loss": 7.8584, - "step": 14430 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017259442019358515, - "loss": 7.9295, - "step": 14440 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017257544126020118, - "loss": 7.8002, - "step": 14450 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017255646232681724, - "loss": 7.9771, - "step": 14460 - }, - { - "epoch": 1.37, - "learning_rate": 0.0001725374833934333, - "loss": 7.9719, - "step": 14470 - }, - { - "epoch": 1.37, - "learning_rate": 0.00017251850446004937, - "loss": 7.8804, - "step": 14480 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017249952552666543, - "loss": 7.8649, - "step": 14490 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017248054659328146, - "loss": 7.7958, - "step": 14500 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017246156765989752, - "loss": 7.8773, - "step": 14510 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017244258872651358, - "loss": 7.8707, - "step": 14520 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017242360979312965, - "loss": 7.8253, - "step": 14530 - }, - { - "epoch": 1.38, - "learning_rate": 0.0001724046308597457, - "loss": 7.9562, - "step": 14540 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017238565192636177, - "loss": 7.8765, - "step": 14550 - }, - { - "epoch": 1.38, - "learning_rate": 0.0001723666729929778, - "loss": 7.8363, - "step": 14560 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017234769405959386, - "loss": 7.9363, - "step": 14570 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017232871512620993, - "loss": 7.9193, - "step": 14580 - }, - { - "epoch": 1.38, - "learning_rate": 0.00017230973619282599, - "loss": 7.8003, - "step": 14590 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017229075725944205, - "loss": 7.8655, - "step": 14600 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017227177832605808, - "loss": 7.8038, - "step": 14610 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017225279939267414, - "loss": 7.8799, - "step": 14620 - }, - { - "epoch": 1.39, - "learning_rate": 0.0001722338204592902, - "loss": 7.81, - "step": 14630 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017221484152590627, - "loss": 7.9701, - "step": 14640 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017219586259252233, - "loss": 7.9683, - "step": 14650 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017217688365913836, - "loss": 7.9245, - "step": 14660 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017215790472575442, - "loss": 7.9538, - "step": 14670 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017213892579237048, - "loss": 7.8722, - "step": 14680 - }, - { - "epoch": 1.39, - "learning_rate": 0.00017211994685898654, - "loss": 7.8426, - "step": 14690 - }, - { - "epoch": 1.39, - "learning_rate": 0.0001721009679256026, - "loss": 7.8924, - "step": 14700 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017208198899221864, - "loss": 7.9378, - "step": 14710 - }, - { - "epoch": 1.4, - "learning_rate": 0.0001720630100588347, - "loss": 7.8708, - "step": 14720 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017204403112545076, - "loss": 7.8503, - "step": 14730 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017202505219206682, - "loss": 7.8053, - "step": 14740 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017200607325868289, - "loss": 7.8506, - "step": 14750 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017198709432529895, - "loss": 7.8753, - "step": 14760 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017196811539191498, - "loss": 7.8996, - "step": 14770 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017194913645853104, - "loss": 7.8542, - "step": 14780 - }, - { - "epoch": 1.4, - "learning_rate": 0.0001719301575251471, - "loss": 7.8244, - "step": 14790 - }, - { - "epoch": 1.4, - "learning_rate": 0.00017191117859176316, - "loss": 7.8535, - "step": 14800 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017189219965837923, - "loss": 7.9369, - "step": 14810 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017187322072499526, - "loss": 7.9422, - "step": 14820 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017185424179161132, - "loss": 7.9927, - "step": 14830 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017183526285822738, - "loss": 7.852, - "step": 14840 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017181628392484344, - "loss": 7.947, - "step": 14850 - }, - { - "epoch": 1.41, - "learning_rate": 0.0001717973049914595, - "loss": 7.8916, - "step": 14860 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017177832605807554, - "loss": 7.9578, - "step": 14870 - }, - { - "epoch": 1.41, - "learning_rate": 0.0001717593471246916, - "loss": 7.975, - "step": 14880 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017174036819130766, - "loss": 7.9863, - "step": 14890 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017172138925792372, - "loss": 7.8942, - "step": 14900 - }, - { - "epoch": 1.41, - "learning_rate": 0.00017170241032453978, - "loss": 7.8584, - "step": 14910 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017168343139115585, - "loss": 7.9821, - "step": 14920 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017166445245777188, - "loss": 7.8786, - "step": 14930 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017164547352438794, - "loss": 7.8327, - "step": 14940 - }, - { - "epoch": 1.42, - "learning_rate": 0.000171626494591004, - "loss": 7.8841, - "step": 14950 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017160751565762006, - "loss": 7.8089, - "step": 14960 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017158853672423613, - "loss": 7.8177, - "step": 14970 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017156955779085216, - "loss": 7.9116, - "step": 14980 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017155057885746822, - "loss": 7.898, - "step": 14990 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017153159992408428, - "loss": 7.8931, - "step": 15000 - }, - { - "epoch": 1.42, - "learning_rate": 0.00017151262099070034, - "loss": 7.8535, - "step": 15010 - }, - { - "epoch": 1.43, - "learning_rate": 0.0001714936420573164, - "loss": 7.9483, - "step": 15020 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017147466312393244, - "loss": 7.8901, - "step": 15030 - }, - { - "epoch": 1.43, - "learning_rate": 0.0001714556841905485, - "loss": 7.8264, - "step": 15040 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017143670525716456, - "loss": 7.8682, - "step": 15050 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017141772632378062, - "loss": 7.9068, - "step": 15060 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017139874739039668, - "loss": 7.8185, - "step": 15070 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017137976845701274, - "loss": 7.8866, - "step": 15080 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017136078952362878, - "loss": 7.8157, - "step": 15090 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017134181059024484, - "loss": 7.9163, - "step": 15100 - }, - { - "epoch": 1.43, - "learning_rate": 0.0001713228316568609, - "loss": 7.9395, - "step": 15110 - }, - { - "epoch": 1.43, - "learning_rate": 0.00017130385272347696, - "loss": 7.7885, - "step": 15120 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017128487379009302, - "loss": 7.9234, - "step": 15130 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017126589485670906, - "loss": 7.7686, - "step": 15140 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017124691592332512, - "loss": 7.9083, - "step": 15150 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017122793698994118, - "loss": 7.9122, - "step": 15160 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017120895805655724, - "loss": 7.8276, - "step": 15170 - }, - { - "epoch": 1.44, - "learning_rate": 0.0001711899791231733, - "loss": 7.8299, - "step": 15180 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017117100018978934, - "loss": 7.8205, - "step": 15190 - }, - { - "epoch": 1.44, - "learning_rate": 0.0001711520212564054, - "loss": 7.9315, - "step": 15200 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017113304232302146, - "loss": 7.931, - "step": 15210 - }, - { - "epoch": 1.44, - "learning_rate": 0.00017111406338963752, - "loss": 7.7818, - "step": 15220 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017109508445625358, - "loss": 7.8072, - "step": 15230 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017107610552286962, - "loss": 7.8526, - "step": 15240 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017105712658948568, - "loss": 7.8238, - "step": 15250 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017103814765610174, - "loss": 7.8086, - "step": 15260 - }, - { - "epoch": 1.45, - "learning_rate": 0.0001710191687227178, - "loss": 7.9769, - "step": 15270 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017100018978933386, - "loss": 7.9049, - "step": 15280 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017098121085594992, - "loss": 7.9835, - "step": 15290 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017096223192256596, - "loss": 7.9445, - "step": 15300 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017094325298918202, - "loss": 7.9323, - "step": 15310 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017092427405579808, - "loss": 7.8599, - "step": 15320 - }, - { - "epoch": 1.45, - "learning_rate": 0.00017090529512241414, - "loss": 7.8268, - "step": 15330 - }, - { - "epoch": 1.46, - "learning_rate": 0.0001708863161890302, - "loss": 7.9166, - "step": 15340 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017086733725564624, - "loss": 7.8486, - "step": 15350 - }, - { - "epoch": 1.46, - "learning_rate": 0.0001708483583222623, - "loss": 7.9515, - "step": 15360 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017082937938887836, - "loss": 8.0511, - "step": 15370 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017081040045549442, - "loss": 7.8987, - "step": 15380 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017079142152211048, - "loss": 7.8301, - "step": 15390 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017077244258872652, - "loss": 7.8589, - "step": 15400 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017075346365534258, - "loss": 7.8948, - "step": 15410 - }, - { - "epoch": 1.46, - "learning_rate": 0.00017073448472195864, - "loss": 7.9108, - "step": 15420 - }, - { - "epoch": 1.46, - "learning_rate": 0.0001707155057885747, - "loss": 7.8949, - "step": 15430 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017069652685519076, - "loss": 7.8468, - "step": 15440 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017067754792180682, - "loss": 7.8275, - "step": 15450 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017065856898842286, - "loss": 7.8753, - "step": 15460 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017063959005503892, - "loss": 7.9076, - "step": 15470 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017062061112165498, - "loss": 7.9085, - "step": 15480 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017060163218827104, - "loss": 7.8878, - "step": 15490 - }, - { - "epoch": 1.47, - "learning_rate": 0.0001705826532548871, - "loss": 7.8957, - "step": 15500 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017056367432150314, - "loss": 7.9515, - "step": 15510 - }, - { - "epoch": 1.47, - "learning_rate": 0.0001705446953881192, - "loss": 7.8527, - "step": 15520 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017052571645473526, - "loss": 7.8314, - "step": 15530 - }, - { - "epoch": 1.47, - "learning_rate": 0.00017050673752135132, - "loss": 7.8202, - "step": 15540 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017048775858796738, - "loss": 7.8515, - "step": 15550 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017046877965458342, - "loss": 7.9015, - "step": 15560 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017044980072119948, - "loss": 7.9215, - "step": 15570 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017043082178781554, - "loss": 7.9696, - "step": 15580 - }, - { - "epoch": 1.48, - "learning_rate": 0.0001704118428544316, - "loss": 7.8362, - "step": 15590 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017039286392104766, - "loss": 7.9248, - "step": 15600 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017037388498766372, - "loss": 7.8658, - "step": 15610 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017035490605427976, - "loss": 7.9837, - "step": 15620 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017033592712089582, - "loss": 7.8591, - "step": 15630 - }, - { - "epoch": 1.48, - "learning_rate": 0.00017031694818751188, - "loss": 7.9034, - "step": 15640 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017029796925412794, - "loss": 7.876, - "step": 15650 - }, - { - "epoch": 1.49, - "learning_rate": 0.000170278990320744, - "loss": 7.8455, - "step": 15660 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017026001138736004, - "loss": 7.8404, - "step": 15670 - }, - { - "epoch": 1.49, - "learning_rate": 0.0001702410324539761, - "loss": 7.7896, - "step": 15680 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017022205352059216, - "loss": 7.93, - "step": 15690 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017020307458720822, - "loss": 7.7629, - "step": 15700 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017018409565382428, - "loss": 7.8888, - "step": 15710 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017016511672044031, - "loss": 7.8987, - "step": 15720 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017014613778705638, - "loss": 7.9326, - "step": 15730 - }, - { - "epoch": 1.49, - "learning_rate": 0.00017012715885367244, - "loss": 7.8659, - "step": 15740 - }, - { - "epoch": 1.49, - "learning_rate": 0.0001701081799202885, - "loss": 7.8767, - "step": 15750 - }, - { - "epoch": 1.5, - "learning_rate": 0.00017008920098690456, - "loss": 7.8689, - "step": 15760 - }, - { - "epoch": 1.5, - "learning_rate": 0.0001700702220535206, - "loss": 7.8666, - "step": 15770 - }, - { - "epoch": 1.5, - "learning_rate": 0.00017005124312013665, - "loss": 7.9272, - "step": 15780 - }, - { - "epoch": 1.5, - "learning_rate": 0.00017003226418675272, - "loss": 7.8931, - "step": 15790 - }, - { - "epoch": 1.5, - "learning_rate": 0.00017001328525336878, - "loss": 7.9144, - "step": 15800 - }, - { - "epoch": 1.5, - "learning_rate": 0.00016999430631998484, - "loss": 7.9967, - "step": 15810 - }, - { - "epoch": 1.5, - "learning_rate": 0.0001699753273866009, - "loss": 7.876, - "step": 15820 - }, - { - "epoch": 1.5, - "learning_rate": 0.00016995634845321693, - "loss": 7.8345, - "step": 15830 - }, - { - "epoch": 1.5, - "learning_rate": 0.000169937369519833, - "loss": 7.9589, - "step": 15840 - }, - { - "epoch": 1.5, - "learning_rate": 0.00016991839058644906, - "loss": 7.7895, - "step": 15850 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016989941165306512, - "loss": 7.8575, - "step": 15860 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016988043271968118, - "loss": 7.882, - "step": 15870 - }, - { - "epoch": 1.51, - "learning_rate": 0.0001698614537862972, - "loss": 7.7647, - "step": 15880 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016984247485291327, - "loss": 7.8884, - "step": 15890 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016982349591952934, - "loss": 7.8281, - "step": 15900 - }, - { - "epoch": 1.51, - "learning_rate": 0.0001698045169861454, - "loss": 7.7815, - "step": 15910 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016978553805276146, - "loss": 7.844, - "step": 15920 - }, - { - "epoch": 1.51, - "learning_rate": 0.0001697665591193775, - "loss": 7.8023, - "step": 15930 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016974758018599355, - "loss": 7.935, - "step": 15940 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016972860125260962, - "loss": 7.8531, - "step": 15950 - }, - { - "epoch": 1.51, - "learning_rate": 0.00016970962231922568, - "loss": 7.8349, - "step": 15960 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016969064338584174, - "loss": 7.8348, - "step": 15970 - }, - { - "epoch": 1.52, - "learning_rate": 0.0001696716644524578, - "loss": 7.8758, - "step": 15980 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016965268551907383, - "loss": 7.851, - "step": 15990 - }, - { - "epoch": 1.52, - "learning_rate": 0.0001696337065856899, - "loss": 7.7998, - "step": 16000 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016961472765230596, - "loss": 7.8882, - "step": 16010 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016959574871892202, - "loss": 7.8111, - "step": 16020 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016957676978553808, - "loss": 7.8344, - "step": 16030 - }, - { - "epoch": 1.52, - "learning_rate": 0.0001695577908521541, - "loss": 7.9109, - "step": 16040 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016953881191877017, - "loss": 7.9387, - "step": 16050 - }, - { - "epoch": 1.52, - "learning_rate": 0.00016951983298538624, - "loss": 7.9571, - "step": 16060 - }, - { - "epoch": 1.52, - "learning_rate": 0.0001695008540520023, - "loss": 7.878, - "step": 16070 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016948187511861836, - "loss": 7.9668, - "step": 16080 - }, - { - "epoch": 1.53, - "learning_rate": 0.0001694628961852344, - "loss": 7.891, - "step": 16090 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016944391725185045, - "loss": 7.9827, - "step": 16100 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016942493831846651, - "loss": 7.873, - "step": 16110 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016940595938508258, - "loss": 7.8736, - "step": 16120 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016938698045169864, - "loss": 7.9248, - "step": 16130 - }, - { - "epoch": 1.53, - "learning_rate": 0.0001693680015183147, - "loss": 7.8698, - "step": 16140 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016934902258493073, - "loss": 7.9242, - "step": 16150 - }, - { - "epoch": 1.53, - "learning_rate": 0.0001693300436515468, - "loss": 7.9224, - "step": 16160 - }, - { - "epoch": 1.53, - "learning_rate": 0.00016931106471816285, - "loss": 7.9268, - "step": 16170 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016929208578477892, - "loss": 7.776, - "step": 16180 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016927310685139498, - "loss": 7.8963, - "step": 16190 - }, - { - "epoch": 1.54, - "learning_rate": 0.000169254127918011, - "loss": 7.8501, - "step": 16200 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016923514898462707, - "loss": 7.9608, - "step": 16210 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016921617005124313, - "loss": 7.8569, - "step": 16220 - }, - { - "epoch": 1.54, - "learning_rate": 0.0001691971911178592, - "loss": 7.879, - "step": 16230 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016917821218447526, - "loss": 7.9396, - "step": 16240 - }, - { - "epoch": 1.54, - "learning_rate": 0.0001691592332510913, - "loss": 7.7836, - "step": 16250 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016914025431770735, - "loss": 7.8752, - "step": 16260 - }, - { - "epoch": 1.54, - "learning_rate": 0.0001691212753843234, - "loss": 7.9026, - "step": 16270 - }, - { - "epoch": 1.54, - "learning_rate": 0.00016910229645093947, - "loss": 7.9376, - "step": 16280 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016908331751755554, - "loss": 7.853, - "step": 16290 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016906433858417157, - "loss": 7.8648, - "step": 16300 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016904535965078763, - "loss": 7.8895, - "step": 16310 - }, - { - "epoch": 1.55, - "learning_rate": 0.0001690263807174037, - "loss": 7.8541, - "step": 16320 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016900740178401975, - "loss": 7.8666, - "step": 16330 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016898842285063582, - "loss": 7.9544, - "step": 16340 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016896944391725188, - "loss": 7.9778, - "step": 16350 - }, - { - "epoch": 1.55, - "learning_rate": 0.0001689504649838679, - "loss": 7.8113, - "step": 16360 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016893148605048397, - "loss": 7.905, - "step": 16370 - }, - { - "epoch": 1.55, - "learning_rate": 0.00016891250711710003, - "loss": 7.8119, - "step": 16380 - }, - { - "epoch": 1.56, - "learning_rate": 0.0001688935281837161, - "loss": 8.0333, - "step": 16390 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016887454925033216, - "loss": 7.9359, - "step": 16400 - }, - { - "epoch": 1.56, - "learning_rate": 0.0001688555703169482, - "loss": 7.8126, - "step": 16410 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016883659138356425, - "loss": 7.8503, - "step": 16420 - }, - { - "epoch": 1.56, - "learning_rate": 0.0001688176124501803, - "loss": 7.741, - "step": 16430 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016879863351679637, - "loss": 7.9028, - "step": 16440 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016877965458341244, - "loss": 7.7372, - "step": 16450 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016876067565002847, - "loss": 7.815, - "step": 16460 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016874169671664453, - "loss": 7.8055, - "step": 16470 - }, - { - "epoch": 1.56, - "learning_rate": 0.0001687227177832606, - "loss": 7.8315, - "step": 16480 - }, - { - "epoch": 1.56, - "learning_rate": 0.00016870373884987665, - "loss": 7.8523, - "step": 16490 - }, - { - "epoch": 1.57, - "learning_rate": 0.00016868475991649271, - "loss": 7.9014, - "step": 16500 - }, - { - "epoch": 1.57, - "learning_rate": 0.00016866578098310878, - "loss": 7.7971, - "step": 16510 - }, - { - "epoch": 1.57, - "learning_rate": 0.0001686468020497248, - "loss": 7.8215, - "step": 16520 - }, - { - "epoch": 1.57, - "learning_rate": 0.00016862782311634087, - "loss": 7.8164, - "step": 16530 - }, - { - "epoch": 1.57, - "learning_rate": 0.00016860884418295693, - "loss": 7.8579, - "step": 16540 - }, - { - "epoch": 1.57, - "learning_rate": 0.000168589865249573, - "loss": 7.9486, - "step": 16550 - }, - { - "epoch": 1.57, - "learning_rate": 0.00016857088631618906, - "loss": 7.9259, - "step": 16560 - }, - { - "epoch": 1.57, - "learning_rate": 0.0001685519073828051, - "loss": 7.8656, - "step": 16570 - }, - { - "epoch": 1.57, - "learning_rate": 0.00016853292844942115, - "loss": 7.729, - "step": 16580 - }, - { - "epoch": 1.57, - "learning_rate": 0.0001685139495160372, - "loss": 7.969, - "step": 16590 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016849497058265327, - "loss": 7.8591, - "step": 16600 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016847599164926933, - "loss": 7.781, - "step": 16610 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016845701271588537, - "loss": 7.8389, - "step": 16620 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016843803378250143, - "loss": 7.8652, - "step": 16630 - }, - { - "epoch": 1.58, - "learning_rate": 0.0001684190548491175, - "loss": 7.9893, - "step": 16640 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016840007591573355, - "loss": 7.8324, - "step": 16650 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016838109698234961, - "loss": 7.8324, - "step": 16660 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016836211804896567, - "loss": 8.0189, - "step": 16670 - }, - { - "epoch": 1.58, - "learning_rate": 0.0001683431391155817, - "loss": 7.825, - "step": 16680 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016832416018219777, - "loss": 7.8511, - "step": 16690 - }, - { - "epoch": 1.58, - "learning_rate": 0.00016830518124881383, - "loss": 7.873, - "step": 16700 - }, - { - "epoch": 1.59, - "learning_rate": 0.0001682862023154299, - "loss": 7.8481, - "step": 16710 - }, - { - "epoch": 1.59, - "learning_rate": 0.00016826722338204595, - "loss": 7.8828, - "step": 16720 - }, - { - "epoch": 1.59, - "learning_rate": 0.000168248244448662, - "loss": 7.8733, - "step": 16730 - }, - { - "epoch": 1.59, - "learning_rate": 0.00016822926551527805, - "loss": 7.968, - "step": 16740 - }, - { - "epoch": 1.59, - "learning_rate": 0.0001682102865818941, - "loss": 7.806, - "step": 16750 - }, - { - "epoch": 1.59, - "learning_rate": 0.00016819130764851017, - "loss": 7.8548, - "step": 16760 - }, - { - "epoch": 1.59, - "learning_rate": 0.00016817232871512623, - "loss": 7.8729, - "step": 16770 - }, - { - "epoch": 1.59, - "learning_rate": 0.00016815334978174227, - "loss": 7.842, - "step": 16780 - }, - { - "epoch": 1.59, - "learning_rate": 0.00016813437084835833, - "loss": 7.8119, - "step": 16790 - }, - { - "epoch": 1.59, - "learning_rate": 0.0001681153919149744, - "loss": 7.8486, - "step": 16800 - }, - { - "epoch": 1.6, - "learning_rate": 0.00016809641298159045, - "loss": 7.7795, - "step": 16810 - }, - { - "epoch": 1.6, - "learning_rate": 0.0001680774340482065, - "loss": 7.9168, - "step": 16820 - }, - { - "epoch": 1.6, - "learning_rate": 0.00016805845511482257, - "loss": 7.8893, - "step": 16830 - }, - { - "epoch": 1.6, - "learning_rate": 0.0001680394761814386, - "loss": 7.8338, - "step": 16840 - }, - { - "epoch": 1.6, - "learning_rate": 0.00016802049724805467, - "loss": 7.8607, - "step": 16850 - }, - { - "epoch": 1.6, - "learning_rate": 0.00016800151831467073, - "loss": 7.7818, - "step": 16860 - }, - { - "epoch": 1.6, - "learning_rate": 0.0001679825393812868, - "loss": 7.8901, - "step": 16870 - }, - { - "epoch": 1.6, - "learning_rate": 0.00016796356044790285, - "loss": 7.9417, - "step": 16880 - }, - { - "epoch": 1.6, - "learning_rate": 0.0001679445815145189, - "loss": 7.8965, - "step": 16890 - }, - { - "epoch": 1.6, - "learning_rate": 0.00016792560258113495, - "loss": 7.891, - "step": 16900 - }, - { - "epoch": 1.6, - "learning_rate": 0.000167906623647751, - "loss": 7.8494, - "step": 16910 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016788764471436707, - "loss": 7.8651, - "step": 16920 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016786866578098313, - "loss": 7.8477, - "step": 16930 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016784968684759917, - "loss": 7.7701, - "step": 16940 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016783070791421523, - "loss": 7.8612, - "step": 16950 - }, - { - "epoch": 1.61, - "learning_rate": 0.0001678117289808313, - "loss": 7.8107, - "step": 16960 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016779275004744735, - "loss": 7.83, - "step": 16970 - }, - { - "epoch": 1.61, - "learning_rate": 0.0001677737711140634, - "loss": 7.9374, - "step": 16980 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016775479218067945, - "loss": 7.885, - "step": 16990 - }, - { - "epoch": 1.61, - "learning_rate": 0.0001677358132472955, - "loss": 7.8927, - "step": 17000 - }, - { - "epoch": 1.61, - "learning_rate": 0.00016771683431391157, - "loss": 7.8929, - "step": 17010 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016769785538052763, - "loss": 7.7847, - "step": 17020 - }, - { - "epoch": 1.62, - "learning_rate": 0.0001676788764471437, - "loss": 7.9631, - "step": 17030 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016765989751375975, - "loss": 7.9438, - "step": 17040 - }, - { - "epoch": 1.62, - "learning_rate": 0.0001676409185803758, - "loss": 7.7907, - "step": 17050 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016762193964699185, - "loss": 7.9279, - "step": 17060 - }, - { - "epoch": 1.62, - "learning_rate": 0.0001676029607136079, - "loss": 7.8815, - "step": 17070 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016758398178022397, - "loss": 8.0089, - "step": 17080 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016756500284684003, - "loss": 7.9016, - "step": 17090 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016754602391345607, - "loss": 7.7751, - "step": 17100 - }, - { - "epoch": 1.62, - "learning_rate": 0.00016752704498007213, - "loss": 7.8964, - "step": 17110 - }, - { - "epoch": 1.62, - "learning_rate": 0.0001675080660466882, - "loss": 7.7608, - "step": 17120 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016748908711330425, - "loss": 7.9548, - "step": 17130 - }, - { - "epoch": 1.63, - "learning_rate": 0.0001674701081799203, - "loss": 7.9519, - "step": 17140 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016745112924653635, - "loss": 7.9832, - "step": 17150 - }, - { - "epoch": 1.63, - "learning_rate": 0.0001674321503131524, - "loss": 7.7887, - "step": 17160 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016741317137976847, - "loss": 7.972, - "step": 17170 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016739419244638453, - "loss": 7.8508, - "step": 17180 - }, - { - "epoch": 1.63, - "learning_rate": 0.0001673752135130006, - "loss": 7.8145, - "step": 17190 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016735623457961665, - "loss": 7.7618, - "step": 17200 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016733725564623269, - "loss": 7.8352, - "step": 17210 - }, - { - "epoch": 1.63, - "learning_rate": 0.00016731827671284875, - "loss": 7.8661, - "step": 17220 - }, - { - "epoch": 1.64, - "learning_rate": 0.0001672992977794648, - "loss": 7.9131, - "step": 17230 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016728031884608087, - "loss": 7.7831, - "step": 17240 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016726133991269693, - "loss": 7.8058, - "step": 17250 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016724236097931296, - "loss": 7.8926, - "step": 17260 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016722338204592903, - "loss": 7.8892, - "step": 17270 - }, - { - "epoch": 1.64, - "learning_rate": 0.0001672044031125451, - "loss": 7.9619, - "step": 17280 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016718542417916115, - "loss": 7.8365, - "step": 17290 - }, - { - "epoch": 1.64, - "learning_rate": 0.0001671664452457772, - "loss": 7.8317, - "step": 17300 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016714746631239324, - "loss": 7.8683, - "step": 17310 - }, - { - "epoch": 1.64, - "learning_rate": 0.0001671284873790093, - "loss": 7.9458, - "step": 17320 - }, - { - "epoch": 1.64, - "learning_rate": 0.00016710950844562537, - "loss": 7.9498, - "step": 17330 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016709052951224143, - "loss": 7.9496, - "step": 17340 - }, - { - "epoch": 1.65, - "learning_rate": 0.0001670715505788575, - "loss": 7.9351, - "step": 17350 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016705257164547355, - "loss": 7.8879, - "step": 17360 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016703359271208958, - "loss": 7.8785, - "step": 17370 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016701461377870565, - "loss": 7.8406, - "step": 17380 - }, - { - "epoch": 1.65, - "learning_rate": 0.0001669956348453217, - "loss": 7.8577, - "step": 17390 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016697665591193777, - "loss": 7.8559, - "step": 17400 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016695767697855383, - "loss": 7.8396, - "step": 17410 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016693869804516986, - "loss": 7.9009, - "step": 17420 - }, - { - "epoch": 1.65, - "learning_rate": 0.00016691971911178593, - "loss": 7.8063, - "step": 17430 - }, - { - "epoch": 1.65, - "learning_rate": 0.000166900740178402, - "loss": 7.8424, - "step": 17440 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016688176124501805, - "loss": 7.9207, - "step": 17450 - }, - { - "epoch": 1.66, - "learning_rate": 0.0001668627823116341, - "loss": 7.9105, - "step": 17460 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016684380337825014, - "loss": 7.8379, - "step": 17470 - }, - { - "epoch": 1.66, - "learning_rate": 0.0001668248244448662, - "loss": 7.7831, - "step": 17480 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016680584551148227, - "loss": 7.9231, - "step": 17490 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016678686657809833, - "loss": 7.86, - "step": 17500 - }, - { - "epoch": 1.66, - "learning_rate": 0.0001667678876447144, - "loss": 7.8946, - "step": 17510 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016674890871133042, - "loss": 7.9196, - "step": 17520 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016672992977794648, - "loss": 7.9259, - "step": 17530 - }, - { - "epoch": 1.66, - "learning_rate": 0.00016671095084456255, - "loss": 7.9177, - "step": 17540 - }, - { - "epoch": 1.67, - "learning_rate": 0.0001666919719111786, - "loss": 7.8724, - "step": 17550 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016667299297779467, - "loss": 7.8552, - "step": 17560 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016665401404441073, - "loss": 7.8017, - "step": 17570 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016663503511102676, - "loss": 7.843, - "step": 17580 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016661605617764282, - "loss": 7.8963, - "step": 17590 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016659707724425889, - "loss": 7.822, - "step": 17600 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016657809831087495, - "loss": 7.844, - "step": 17610 - }, - { - "epoch": 1.67, - "learning_rate": 0.000166559119377491, - "loss": 7.8534, - "step": 17620 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016654014044410704, - "loss": 7.9459, - "step": 17630 - }, - { - "epoch": 1.67, - "learning_rate": 0.0001665211615107231, - "loss": 7.8753, - "step": 17640 - }, - { - "epoch": 1.67, - "learning_rate": 0.00016650218257733917, - "loss": 7.8419, - "step": 17650 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016648320364395523, - "loss": 7.9352, - "step": 17660 - }, - { - "epoch": 1.68, - "learning_rate": 0.0001664642247105713, - "loss": 7.8022, - "step": 17670 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016644524577718732, - "loss": 7.9272, - "step": 17680 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016642626684380338, - "loss": 7.8676, - "step": 17690 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016640728791041944, - "loss": 7.9678, - "step": 17700 - }, - { - "epoch": 1.68, - "learning_rate": 0.0001663883089770355, - "loss": 7.919, - "step": 17710 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016636933004365157, - "loss": 7.8938, - "step": 17720 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016635035111026763, - "loss": 7.9555, - "step": 17730 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016633137217688366, - "loss": 7.8598, - "step": 17740 - }, - { - "epoch": 1.68, - "learning_rate": 0.00016631239324349972, - "loss": 7.9456, - "step": 17750 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016629341431011578, - "loss": 7.9102, - "step": 17760 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016627443537673185, - "loss": 7.8114, - "step": 17770 - }, - { - "epoch": 1.69, - "learning_rate": 0.0001662554564433479, - "loss": 7.8783, - "step": 17780 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016623647750996394, - "loss": 7.902, - "step": 17790 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016621749857658, - "loss": 7.8586, - "step": 17800 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016619851964319606, - "loss": 7.9233, - "step": 17810 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016617954070981213, - "loss": 7.8281, - "step": 17820 - }, - { - "epoch": 1.69, - "learning_rate": 0.0001661605617764282, - "loss": 7.7916, - "step": 17830 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016614158284304422, - "loss": 7.9073, - "step": 17840 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016612260390966028, - "loss": 7.8684, - "step": 17850 - }, - { - "epoch": 1.69, - "learning_rate": 0.00016610362497627634, - "loss": 7.9522, - "step": 17860 - }, - { - "epoch": 1.7, - "learning_rate": 0.0001660846460428924, - "loss": 7.7994, - "step": 17870 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016606566710950847, - "loss": 7.9712, - "step": 17880 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016604668817612453, - "loss": 7.9103, - "step": 17890 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016602770924274056, - "loss": 7.8875, - "step": 17900 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016600873030935662, - "loss": 7.9074, - "step": 17910 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016598975137597268, - "loss": 7.8733, - "step": 17920 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016597077244258875, - "loss": 7.8312, - "step": 17930 - }, - { - "epoch": 1.7, - "learning_rate": 0.0001659517935092048, - "loss": 7.869, - "step": 17940 - }, - { - "epoch": 1.7, - "learning_rate": 0.00016593281457582084, - "loss": 7.8955, - "step": 17950 - }, - { - "epoch": 1.7, - "learning_rate": 0.0001659138356424369, - "loss": 7.8527, - "step": 17960 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016589485670905296, - "loss": 7.8674, - "step": 17970 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016587587777566902, - "loss": 7.8768, - "step": 17980 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016585689884228509, - "loss": 7.9197, - "step": 17990 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016583791990890112, - "loss": 7.8606, - "step": 18000 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016581894097551718, - "loss": 7.9081, - "step": 18010 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016579996204213324, - "loss": 7.8936, - "step": 18020 - }, - { - "epoch": 1.71, - "learning_rate": 0.0001657809831087493, - "loss": 7.9135, - "step": 18030 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016576200417536537, - "loss": 7.824, - "step": 18040 - }, - { - "epoch": 1.71, - "learning_rate": 0.0001657430252419814, - "loss": 7.918, - "step": 18050 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016572404630859746, - "loss": 7.9342, - "step": 18060 - }, - { - "epoch": 1.71, - "learning_rate": 0.00016570506737521352, - "loss": 7.8834, - "step": 18070 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016568608844182958, - "loss": 7.7842, - "step": 18080 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016566710950844564, - "loss": 7.8498, - "step": 18090 - }, - { - "epoch": 1.72, - "learning_rate": 0.0001656481305750617, - "loss": 7.8644, - "step": 18100 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016562915164167774, - "loss": 7.8654, - "step": 18110 - }, - { - "epoch": 1.72, - "learning_rate": 0.0001656101727082938, - "loss": 7.8258, - "step": 18120 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016559119377490986, - "loss": 7.8322, - "step": 18130 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016557221484152592, - "loss": 7.9357, - "step": 18140 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016555323590814198, - "loss": 7.8355, - "step": 18150 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016553425697475802, - "loss": 7.8667, - "step": 18160 - }, - { - "epoch": 1.72, - "learning_rate": 0.00016551527804137408, - "loss": 7.8392, - "step": 18170 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016549629910799014, - "loss": 7.8973, - "step": 18180 - }, - { - "epoch": 1.73, - "learning_rate": 0.0001654773201746062, - "loss": 7.9184, - "step": 18190 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016545834124122226, - "loss": 7.8068, - "step": 18200 - }, - { - "epoch": 1.73, - "learning_rate": 0.0001654393623078383, - "loss": 7.9054, - "step": 18210 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016542038337445436, - "loss": 7.8072, - "step": 18220 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016540140444107042, - "loss": 7.8678, - "step": 18230 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016538242550768648, - "loss": 7.9126, - "step": 18240 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016536344657430254, - "loss": 7.9272, - "step": 18250 - }, - { - "epoch": 1.73, - "learning_rate": 0.0001653444676409186, - "loss": 7.9315, - "step": 18260 - }, - { - "epoch": 1.73, - "learning_rate": 0.00016532548870753464, - "loss": 7.8238, - "step": 18270 - }, - { - "epoch": 1.73, - "learning_rate": 0.0001653065097741507, - "loss": 7.8731, - "step": 18280 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016528753084076676, - "loss": 7.8416, - "step": 18290 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016526855190738282, - "loss": 7.9475, - "step": 18300 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016524957297399888, - "loss": 7.9196, - "step": 18310 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016523059404061492, - "loss": 7.8805, - "step": 18320 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016521161510723098, - "loss": 7.8936, - "step": 18330 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016519263617384704, - "loss": 7.8933, - "step": 18340 - }, - { - "epoch": 1.74, - "learning_rate": 0.0001651736572404631, - "loss": 7.8424, - "step": 18350 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016515467830707916, - "loss": 7.9242, - "step": 18360 - }, - { - "epoch": 1.74, - "learning_rate": 0.0001651356993736952, - "loss": 7.8339, - "step": 18370 - }, - { - "epoch": 1.74, - "learning_rate": 0.00016511672044031126, - "loss": 8.0267, - "step": 18380 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016509774150692732, - "loss": 7.9168, - "step": 18390 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016507876257354338, - "loss": 7.8347, - "step": 18400 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016505978364015944, - "loss": 7.8527, - "step": 18410 - }, - { - "epoch": 1.75, - "learning_rate": 0.0001650408047067755, - "loss": 7.8267, - "step": 18420 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016502182577339154, - "loss": 7.813, - "step": 18430 - }, - { - "epoch": 1.75, - "learning_rate": 0.0001650028468400076, - "loss": 7.8132, - "step": 18440 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016498386790662366, - "loss": 7.9144, - "step": 18450 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016496488897323972, - "loss": 7.8411, - "step": 18460 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016494591003985578, - "loss": 7.8303, - "step": 18470 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016492693110647182, - "loss": 7.8463, - "step": 18480 - }, - { - "epoch": 1.75, - "learning_rate": 0.00016490795217308788, - "loss": 7.8983, - "step": 18490 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016488897323970394, - "loss": 7.8702, - "step": 18500 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016486999430632, - "loss": 7.7592, - "step": 18510 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016485101537293606, - "loss": 7.7817, - "step": 18520 - }, - { - "epoch": 1.76, - "learning_rate": 0.0001648320364395521, - "loss": 7.845, - "step": 18530 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016481305750616816, - "loss": 7.8269, - "step": 18540 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016479407857278422, - "loss": 7.8083, - "step": 18550 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016477509963940028, - "loss": 7.8609, - "step": 18560 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016475612070601634, - "loss": 7.7417, - "step": 18570 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016473714177263238, - "loss": 7.8818, - "step": 18580 - }, - { - "epoch": 1.76, - "learning_rate": 0.00016471816283924844, - "loss": 7.8636, - "step": 18590 - }, - { - "epoch": 1.77, - "learning_rate": 0.0001646991839058645, - "loss": 7.8993, - "step": 18600 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016468020497248056, - "loss": 7.9056, - "step": 18610 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016466122603909662, - "loss": 7.852, - "step": 18620 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016464224710571268, - "loss": 7.8672, - "step": 18630 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016462326817232872, - "loss": 7.8413, - "step": 18640 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016460428923894478, - "loss": 7.8847, - "step": 18650 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016458531030556084, - "loss": 7.8263, - "step": 18660 - }, - { - "epoch": 1.77, - "learning_rate": 0.0001645663313721769, - "loss": 7.8653, - "step": 18670 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016454735243879296, - "loss": 7.857, - "step": 18680 - }, - { - "epoch": 1.77, - "learning_rate": 0.000164528373505409, - "loss": 8.0016, - "step": 18690 - }, - { - "epoch": 1.77, - "learning_rate": 0.00016450939457202506, - "loss": 7.8949, - "step": 18700 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016449041563864112, - "loss": 7.8634, - "step": 18710 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016447143670525718, - "loss": 7.8651, - "step": 18720 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016445245777187324, - "loss": 7.8809, - "step": 18730 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016443347883848928, - "loss": 7.8964, - "step": 18740 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016441449990510534, - "loss": 7.8104, - "step": 18750 - }, - { - "epoch": 1.78, - "learning_rate": 0.0001643955209717214, - "loss": 7.911, - "step": 18760 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016437654203833746, - "loss": 7.8607, - "step": 18770 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016435756310495352, - "loss": 7.8043, - "step": 18780 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016433858417156958, - "loss": 7.9543, - "step": 18790 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016431960523818562, - "loss": 7.884, - "step": 18800 - }, - { - "epoch": 1.78, - "learning_rate": 0.00016430062630480168, - "loss": 7.9173, - "step": 18810 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016428164737141774, - "loss": 7.7703, - "step": 18820 - }, - { - "epoch": 1.79, - "learning_rate": 0.0001642626684380338, - "loss": 7.8585, - "step": 18830 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016424368950464986, - "loss": 7.9445, - "step": 18840 - }, - { - "epoch": 1.79, - "learning_rate": 0.0001642247105712659, - "loss": 7.9027, - "step": 18850 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016420573163788196, - "loss": 7.8269, - "step": 18860 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016418675270449802, - "loss": 7.8792, - "step": 18870 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016416777377111408, - "loss": 7.9192, - "step": 18880 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016414879483773014, - "loss": 7.9763, - "step": 18890 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016412981590434617, - "loss": 7.8849, - "step": 18900 - }, - { - "epoch": 1.79, - "learning_rate": 0.00016411083697096224, - "loss": 7.8498, - "step": 18910 - }, - { - "epoch": 1.8, - "learning_rate": 0.0001640918580375783, - "loss": 7.807, - "step": 18920 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016407287910419436, - "loss": 7.8959, - "step": 18930 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016405390017081042, - "loss": 7.8704, - "step": 18940 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016403492123742648, - "loss": 7.842, - "step": 18950 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016401594230404251, - "loss": 7.8434, - "step": 18960 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016399696337065858, - "loss": 7.8772, - "step": 18970 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016397798443727464, - "loss": 7.9123, - "step": 18980 - }, - { - "epoch": 1.8, - "learning_rate": 0.0001639590055038907, - "loss": 7.8603, - "step": 18990 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016394002657050676, - "loss": 7.871, - "step": 19000 - }, - { - "epoch": 1.8, - "learning_rate": 0.0001639210476371228, - "loss": 7.7392, - "step": 19010 - }, - { - "epoch": 1.8, - "learning_rate": 0.00016390206870373886, - "loss": 7.8537, - "step": 19020 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016388308977035492, - "loss": 7.9253, - "step": 19030 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016386411083697098, - "loss": 7.8189, - "step": 19040 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016384513190358704, - "loss": 7.8834, - "step": 19050 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016382615297020307, - "loss": 7.8735, - "step": 19060 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016380717403681913, - "loss": 7.952, - "step": 19070 - }, - { - "epoch": 1.81, - "learning_rate": 0.0001637881951034352, - "loss": 7.7672, - "step": 19080 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016376921617005126, - "loss": 7.88, - "step": 19090 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016375023723666732, - "loss": 7.8417, - "step": 19100 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016373125830328335, - "loss": 7.8759, - "step": 19110 - }, - { - "epoch": 1.81, - "learning_rate": 0.00016371227936989941, - "loss": 7.9401, - "step": 19120 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016369330043651548, - "loss": 7.7792, - "step": 19130 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016367432150313154, - "loss": 7.904, - "step": 19140 - }, - { - "epoch": 1.82, - "learning_rate": 0.0001636553425697476, - "loss": 7.8967, - "step": 19150 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016363636363636366, - "loss": 7.9611, - "step": 19160 - }, - { - "epoch": 1.82, - "learning_rate": 0.0001636173847029797, - "loss": 7.8921, - "step": 19170 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016359840576959575, - "loss": 7.9201, - "step": 19180 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016357942683621182, - "loss": 7.9224, - "step": 19190 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016356044790282788, - "loss": 7.8683, - "step": 19200 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016354146896944394, - "loss": 7.9341, - "step": 19210 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016352249003605997, - "loss": 7.9042, - "step": 19220 - }, - { - "epoch": 1.82, - "learning_rate": 0.00016350351110267603, - "loss": 7.8717, - "step": 19230 - }, - { - "epoch": 1.83, - "learning_rate": 0.0001634845321692921, - "loss": 7.9177, - "step": 19240 - }, - { - "epoch": 1.83, - "learning_rate": 0.00016346555323590816, - "loss": 7.9534, - "step": 19250 - }, - { - "epoch": 1.83, - "learning_rate": 0.00016344657430252422, - "loss": 7.8696, - "step": 19260 - }, - { - "epoch": 1.83, - "learning_rate": 0.00016342759536914025, - "loss": 7.857, - "step": 19270 - }, - { - "epoch": 1.83, - "learning_rate": 0.0001634086164357563, - "loss": 7.8828, - "step": 19280 - }, - { - "epoch": 1.83, - "learning_rate": 0.00016338963750237237, - "loss": 7.828, - "step": 19290 - }, - { - "epoch": 1.83, - "learning_rate": 0.00016337065856898844, - "loss": 7.9118, - "step": 19300 - }, - { - "epoch": 1.83, - "learning_rate": 0.0001633516796356045, - "loss": 7.8622, - "step": 19310 - }, - { - "epoch": 1.83, - "learning_rate": 0.00016333270070222056, - "loss": 7.8215, - "step": 19320 - }, - { - "epoch": 1.83, - "learning_rate": 0.0001633137217688366, - "loss": 7.9519, - "step": 19330 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016329474283545265, - "loss": 7.8305, - "step": 19340 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016327576390206871, - "loss": 7.9268, - "step": 19350 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016325678496868478, - "loss": 7.8265, - "step": 19360 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016323780603530084, - "loss": 7.8933, - "step": 19370 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016321882710191687, - "loss": 7.8313, - "step": 19380 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016319984816853293, - "loss": 7.8801, - "step": 19390 - }, - { - "epoch": 1.84, - "learning_rate": 0.000163180869235149, - "loss": 7.9039, - "step": 19400 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016316189030176506, - "loss": 7.8568, - "step": 19410 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016314291136838112, - "loss": 7.8692, - "step": 19420 - }, - { - "epoch": 1.84, - "learning_rate": 0.00016312393243499715, - "loss": 7.9184, - "step": 19430 - }, - { - "epoch": 1.84, - "learning_rate": 0.0001631049535016132, - "loss": 7.8313, - "step": 19440 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016308597456822927, - "loss": 7.8239, - "step": 19450 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016306699563484533, - "loss": 7.9093, - "step": 19460 - }, - { - "epoch": 1.85, - "learning_rate": 0.0001630480167014614, - "loss": 7.7581, - "step": 19470 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016302903776807746, - "loss": 8.0268, - "step": 19480 - }, - { - "epoch": 1.85, - "learning_rate": 0.0001630100588346935, - "loss": 7.8394, - "step": 19490 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016299107990130955, - "loss": 7.8193, - "step": 19500 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016297210096792561, - "loss": 7.9236, - "step": 19510 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016295312203454168, - "loss": 7.963, - "step": 19520 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016293414310115774, - "loss": 7.9338, - "step": 19530 - }, - { - "epoch": 1.85, - "learning_rate": 0.00016291516416777377, - "loss": 7.8894, - "step": 19540 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016289618523438983, - "loss": 7.9377, - "step": 19550 - }, - { - "epoch": 1.86, - "learning_rate": 0.0001628772063010059, - "loss": 7.8134, - "step": 19560 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016285822736762195, - "loss": 7.9315, - "step": 19570 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016283924843423802, - "loss": 7.8767, - "step": 19580 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016282026950085405, - "loss": 7.7828, - "step": 19590 - }, - { - "epoch": 1.86, - "learning_rate": 0.0001628012905674701, - "loss": 7.8605, - "step": 19600 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016278231163408617, - "loss": 7.7621, - "step": 19610 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016276333270070223, - "loss": 7.8432, - "step": 19620 - }, - { - "epoch": 1.86, - "learning_rate": 0.0001627443537673183, - "loss": 7.898, - "step": 19630 - }, - { - "epoch": 1.86, - "learning_rate": 0.00016272537483393433, - "loss": 7.9226, - "step": 19640 - }, - { - "epoch": 1.86, - "learning_rate": 0.0001627063959005504, - "loss": 7.8093, - "step": 19650 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016268741696716645, - "loss": 7.856, - "step": 19660 - }, - { - "epoch": 1.87, - "learning_rate": 0.0001626684380337825, - "loss": 7.9024, - "step": 19670 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016264945910039857, - "loss": 7.9161, - "step": 19680 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016263048016701464, - "loss": 7.893, - "step": 19690 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016261150123363067, - "loss": 7.8729, - "step": 19700 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016259252230024673, - "loss": 7.8657, - "step": 19710 - }, - { - "epoch": 1.87, - "learning_rate": 0.0001625735433668628, - "loss": 7.9963, - "step": 19720 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016255456443347885, - "loss": 7.877, - "step": 19730 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016253558550009491, - "loss": 7.868, - "step": 19740 - }, - { - "epoch": 1.87, - "learning_rate": 0.00016251660656671095, - "loss": 7.8692, - "step": 19750 - }, - { - "epoch": 1.88, - "learning_rate": 0.000162497627633327, - "loss": 7.9183, - "step": 19760 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016247864869994307, - "loss": 7.8108, - "step": 19770 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016245966976655913, - "loss": 7.7614, - "step": 19780 - }, - { - "epoch": 1.88, - "learning_rate": 0.0001624406908331752, - "loss": 7.9221, - "step": 19790 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016242171189979123, - "loss": 7.959, - "step": 19800 - }, - { - "epoch": 1.88, - "learning_rate": 0.0001624027329664073, - "loss": 7.8201, - "step": 19810 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016238375403302335, - "loss": 7.8931, - "step": 19820 - }, - { - "epoch": 1.88, - "learning_rate": 0.0001623647750996394, - "loss": 7.7885, - "step": 19830 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016234579616625547, - "loss": 7.937, - "step": 19840 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016232681723287153, - "loss": 7.8538, - "step": 19850 - }, - { - "epoch": 1.88, - "learning_rate": 0.00016230783829948757, - "loss": 7.9476, - "step": 19860 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016228885936610363, - "loss": 7.8243, - "step": 19870 - }, - { - "epoch": 1.89, - "learning_rate": 0.0001622698804327197, - "loss": 7.9034, - "step": 19880 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016225090149933575, - "loss": 7.8047, - "step": 19890 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016223192256595181, - "loss": 7.9023, - "step": 19900 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016221294363256785, - "loss": 7.7868, - "step": 19910 - }, - { - "epoch": 1.89, - "learning_rate": 0.0001621939646991839, - "loss": 7.8242, - "step": 19920 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016217498576579997, - "loss": 7.8165, - "step": 19930 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016215600683241603, - "loss": 7.9508, - "step": 19940 - }, - { - "epoch": 1.89, - "learning_rate": 0.0001621370278990321, - "loss": 7.8908, - "step": 19950 - }, - { - "epoch": 1.89, - "learning_rate": 0.00016211804896564813, - "loss": 7.892, - "step": 19960 - }, - { - "epoch": 1.9, - "learning_rate": 0.0001620990700322642, - "loss": 7.8592, - "step": 19970 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016208009109888025, - "loss": 7.9194, - "step": 19980 - }, - { - "epoch": 1.9, - "learning_rate": 0.0001620611121654963, - "loss": 7.8351, - "step": 19990 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016204213323211237, - "loss": 7.9319, - "step": 20000 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016202315429872843, - "loss": 7.8847, - "step": 20010 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016200417536534447, - "loss": 7.8301, - "step": 20020 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016198519643196053, - "loss": 7.8568, - "step": 20030 - }, - { - "epoch": 1.9, - "learning_rate": 0.0001619662174985766, - "loss": 7.8261, - "step": 20040 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016194723856519265, - "loss": 8.0282, - "step": 20050 - }, - { - "epoch": 1.9, - "learning_rate": 0.0001619282596318087, - "loss": 7.9086, - "step": 20060 - }, - { - "epoch": 1.9, - "learning_rate": 0.00016190928069842475, - "loss": 7.9526, - "step": 20070 - }, - { - "epoch": 1.91, - "learning_rate": 0.0001618903017650408, - "loss": 7.7902, - "step": 20080 - }, - { - "epoch": 1.91, - "learning_rate": 0.00016187132283165687, - "loss": 7.8771, - "step": 20090 - }, - { - "epoch": 1.91, - "learning_rate": 0.00016185234389827293, - "loss": 7.8308, - "step": 20100 - }, - { - "epoch": 1.91, - "learning_rate": 0.000161833364964889, - "loss": 7.7032, - "step": 20110 - }, - { - "epoch": 1.91, - "learning_rate": 0.00016181438603150503, - "loss": 7.8337, - "step": 20120 - }, - { - "epoch": 1.91, - "learning_rate": 0.0001617954070981211, - "loss": 7.8936, - "step": 20130 - }, - { - "epoch": 1.91, - "learning_rate": 0.00016177642816473715, - "loss": 7.9296, - "step": 20140 - }, - { - "epoch": 1.91, - "learning_rate": 0.0001617574492313532, - "loss": 7.9812, - "step": 20150 - }, - { - "epoch": 1.91, - "learning_rate": 0.00016173847029796927, - "loss": 7.9834, - "step": 20160 - }, - { - "epoch": 1.91, - "learning_rate": 0.0001617194913645853, - "loss": 7.8294, - "step": 20170 - }, - { - "epoch": 1.91, - "learning_rate": 0.00016170051243120137, - "loss": 7.8923, - "step": 20180 - }, - { - "epoch": 1.92, - "learning_rate": 0.00016168153349781743, - "loss": 7.8267, - "step": 20190 - }, - { - "epoch": 1.92, - "learning_rate": 0.0001616625545644335, - "loss": 7.9175, - "step": 20200 - }, - { - "epoch": 1.92, - "learning_rate": 0.00016164357563104955, - "loss": 7.7191, - "step": 20210 - }, - { - "epoch": 1.92, - "learning_rate": 0.0001616245966976656, - "loss": 7.8315, - "step": 20220 - }, - { - "epoch": 1.92, - "learning_rate": 0.00016160561776428165, - "loss": 7.8252, - "step": 20230 - }, - { - "epoch": 1.92, - "learning_rate": 0.0001615866388308977, - "loss": 7.872, - "step": 20240 - }, - { - "epoch": 1.92, - "learning_rate": 0.00016156765989751377, - "loss": 7.8921, - "step": 20250 - }, - { - "epoch": 1.92, - "learning_rate": 0.00016154868096412983, - "loss": 7.8788, - "step": 20260 - }, - { - "epoch": 1.92, - "learning_rate": 0.0001615297020307459, - "loss": 7.8861, - "step": 20270 - }, - { - "epoch": 1.92, - "learning_rate": 0.00016151072309736193, - "loss": 7.8892, - "step": 20280 - }, - { - "epoch": 1.93, - "learning_rate": 0.000161491744163978, - "loss": 7.8554, - "step": 20290 - }, - { - "epoch": 1.93, - "learning_rate": 0.00016147276523059405, - "loss": 7.9014, - "step": 20300 - }, - { - "epoch": 1.93, - "learning_rate": 0.0001614537862972101, - "loss": 7.8427, - "step": 20310 - }, - { - "epoch": 1.93, - "learning_rate": 0.00016143480736382617, - "loss": 7.8983, - "step": 20320 - }, - { - "epoch": 1.93, - "learning_rate": 0.0001614158284304422, - "loss": 7.8983, - "step": 20330 - }, - { - "epoch": 1.93, - "learning_rate": 0.00016139684949705827, - "loss": 7.7865, - "step": 20340 - }, - { - "epoch": 1.93, - "learning_rate": 0.00016137787056367433, - "loss": 7.8339, - "step": 20350 - }, - { - "epoch": 1.93, - "learning_rate": 0.0001613588916302904, - "loss": 7.8371, - "step": 20360 - }, - { - "epoch": 1.93, - "learning_rate": 0.00016133991269690645, - "loss": 7.7893, - "step": 20370 - }, - { - "epoch": 1.93, - "learning_rate": 0.0001613209337635225, - "loss": 7.9403, - "step": 20380 - }, - { - "epoch": 1.93, - "learning_rate": 0.00016130195483013855, - "loss": 7.886, - "step": 20390 - }, - { - "epoch": 1.94, - "learning_rate": 0.0001612829758967546, - "loss": 7.8127, - "step": 20400 - }, - { - "epoch": 1.94, - "learning_rate": 0.00016126399696337067, - "loss": 7.8164, - "step": 20410 - }, - { - "epoch": 1.94, - "learning_rate": 0.00016124501802998673, - "loss": 7.8491, - "step": 20420 - }, - { - "epoch": 1.94, - "learning_rate": 0.0001612260390966028, - "loss": 7.7832, - "step": 20430 - }, - { - "epoch": 1.94, - "learning_rate": 0.00016120706016321882, - "loss": 7.8549, - "step": 20440 - }, - { - "epoch": 1.94, - "learning_rate": 0.00016118808122983489, - "loss": 8.0095, - "step": 20450 - }, - { - "epoch": 1.94, - "learning_rate": 0.00016116910229645095, - "loss": 7.7896, - "step": 20460 - }, - { - "epoch": 1.94, - "learning_rate": 0.000161150123363067, - "loss": 7.8677, - "step": 20470 - }, - { - "epoch": 1.94, - "learning_rate": 0.00016113114442968307, - "loss": 7.8216, - "step": 20480 - }, - { - "epoch": 1.94, - "learning_rate": 0.0001611121654962991, - "loss": 7.7524, - "step": 20490 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016109318656291517, - "loss": 7.78, - "step": 20500 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016107420762953123, - "loss": 7.9191, - "step": 20510 - }, - { - "epoch": 1.95, - "learning_rate": 0.0001610552286961473, - "loss": 7.8323, - "step": 20520 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016103624976276335, - "loss": 7.7972, - "step": 20530 - }, - { - "epoch": 1.95, - "learning_rate": 0.0001610172708293794, - "loss": 7.8524, - "step": 20540 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016099829189599544, - "loss": 7.8708, - "step": 20550 - }, - { - "epoch": 1.95, - "learning_rate": 0.0001609793129626115, - "loss": 7.8355, - "step": 20560 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016096033402922757, - "loss": 7.8517, - "step": 20570 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016094135509584363, - "loss": 7.874, - "step": 20580 - }, - { - "epoch": 1.95, - "learning_rate": 0.0001609223761624597, - "loss": 7.7804, - "step": 20590 - }, - { - "epoch": 1.95, - "learning_rate": 0.00016090339722907572, - "loss": 7.8556, - "step": 20600 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016088441829569179, - "loss": 7.9106, - "step": 20610 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016086543936230785, - "loss": 7.9515, - "step": 20620 - }, - { - "epoch": 1.96, - "learning_rate": 0.0001608464604289239, - "loss": 7.8692, - "step": 20630 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016082748149553997, - "loss": 7.8373, - "step": 20640 - }, - { - "epoch": 1.96, - "learning_rate": 0.000160808502562156, - "loss": 7.867, - "step": 20650 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016078952362877206, - "loss": 7.8678, - "step": 20660 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016077054469538813, - "loss": 7.8488, - "step": 20670 - }, - { - "epoch": 1.96, - "learning_rate": 0.0001607515657620042, - "loss": 7.8678, - "step": 20680 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016073258682862025, - "loss": 7.8862, - "step": 20690 - }, - { - "epoch": 1.96, - "learning_rate": 0.00016071360789523628, - "loss": 7.862, - "step": 20700 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016069462896185234, - "loss": 7.7964, - "step": 20710 - }, - { - "epoch": 1.97, - "learning_rate": 0.0001606756500284684, - "loss": 7.8565, - "step": 20720 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016065667109508447, - "loss": 7.9738, - "step": 20730 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016063769216170053, - "loss": 7.9275, - "step": 20740 - }, - { - "epoch": 1.97, - "learning_rate": 0.0001606187132283166, - "loss": 7.8584, - "step": 20750 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016059973429493262, - "loss": 7.8415, - "step": 20760 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016058075536154868, - "loss": 7.9181, - "step": 20770 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016056177642816475, - "loss": 7.7571, - "step": 20780 - }, - { - "epoch": 1.97, - "learning_rate": 0.0001605427974947808, - "loss": 7.9947, - "step": 20790 - }, - { - "epoch": 1.97, - "learning_rate": 0.00016052381856139687, - "loss": 7.9309, - "step": 20800 - }, - { - "epoch": 1.97, - "learning_rate": 0.0001605048396280129, - "loss": 7.788, - "step": 20810 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016048586069462896, - "loss": 7.9506, - "step": 20820 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016046688176124502, - "loss": 7.9019, - "step": 20830 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016044790282786109, - "loss": 7.9267, - "step": 20840 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016042892389447715, - "loss": 7.8396, - "step": 20850 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016040994496109318, - "loss": 7.8808, - "step": 20860 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016039096602770924, - "loss": 7.8719, - "step": 20870 - }, - { - "epoch": 1.98, - "learning_rate": 0.0001603719870943253, - "loss": 7.8729, - "step": 20880 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016035300816094137, - "loss": 7.8936, - "step": 20890 - }, - { - "epoch": 1.98, - "learning_rate": 0.00016033402922755743, - "loss": 7.8068, - "step": 20900 - }, - { - "epoch": 1.98, - "learning_rate": 0.0001603150502941735, - "loss": 7.8761, - "step": 20910 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016029607136078952, - "loss": 7.8925, - "step": 20920 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016027709242740558, - "loss": 7.7976, - "step": 20930 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016025811349402164, - "loss": 7.9489, - "step": 20940 - }, - { - "epoch": 1.99, - "learning_rate": 0.0001602391345606377, - "loss": 7.9128, - "step": 20950 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016022015562725377, - "loss": 7.8393, - "step": 20960 - }, - { - "epoch": 1.99, - "learning_rate": 0.0001602011766938698, - "loss": 7.7641, - "step": 20970 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016018219776048586, - "loss": 7.946, - "step": 20980 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016016321882710192, - "loss": 7.8674, - "step": 20990 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016014423989371799, - "loss": 7.889, - "step": 21000 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016012526096033405, - "loss": 7.9576, - "step": 21010 - }, - { - "epoch": 1.99, - "learning_rate": 0.00016010628202695008, - "loss": 7.7703, - "step": 21020 - }, - { - "epoch": 2.0, - "learning_rate": 0.00016008730309356614, - "loss": 7.921, - "step": 21030 - }, - { - "epoch": 2.0, - "learning_rate": 0.0001600683241601822, - "loss": 7.906, - "step": 21040 - }, - { - "epoch": 2.0, - "learning_rate": 0.00016004934522679826, - "loss": 7.8525, - "step": 21050 - }, - { - "epoch": 2.0, - "learning_rate": 0.00016003036629341433, - "loss": 7.8645, - "step": 21060 - }, - { - "epoch": 2.0, - "learning_rate": 0.0001600113873600304, - "loss": 7.8351, - "step": 21070 - }, - { - "epoch": 2.0, - "learning_rate": 0.00015999240842664642, - "loss": 7.9053, - "step": 21080 - }, - { - "epoch": 2.0, - "learning_rate": 0.00015997342949326248, - "loss": 7.9325, - "step": 21090 - }, - { - "epoch": 2.0, - "learning_rate": 0.00015995445055987854, - "loss": 7.9268, - "step": 21100 - }, - { - "epoch": 2.0, - "learning_rate": 0.0001599354716264946, - "loss": 7.8665, - "step": 21110 - }, - { - "epoch": 2.0, - "learning_rate": 0.00015991649269311067, - "loss": 7.876, - "step": 21120 - }, - { - "epoch": 2.01, - "learning_rate": 0.0001598975137597267, - "loss": 7.7425, - "step": 21130 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015987853482634276, - "loss": 7.8238, - "step": 21140 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015985955589295882, - "loss": 7.8337, - "step": 21150 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015984057695957488, - "loss": 7.8954, - "step": 21160 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015982159802619095, - "loss": 7.9015, - "step": 21170 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015980261909280698, - "loss": 7.8966, - "step": 21180 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015978364015942304, - "loss": 7.9607, - "step": 21190 - }, - { - "epoch": 2.01, - "learning_rate": 0.0001597646612260391, - "loss": 8.0079, - "step": 21200 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015974568229265516, - "loss": 7.8708, - "step": 21210 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015972670335927123, - "loss": 7.9371, - "step": 21220 - }, - { - "epoch": 2.01, - "learning_rate": 0.00015970772442588726, - "loss": 7.8218, - "step": 21230 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015968874549250332, - "loss": 7.819, - "step": 21240 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015966976655911938, - "loss": 7.8379, - "step": 21250 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015965078762573544, - "loss": 7.9082, - "step": 21260 - }, - { - "epoch": 2.02, - "learning_rate": 0.0001596318086923515, - "loss": 7.7326, - "step": 21270 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015961282975896757, - "loss": 7.8548, - "step": 21280 - }, - { - "epoch": 2.02, - "learning_rate": 0.0001595938508255836, - "loss": 7.9232, - "step": 21290 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015957487189219966, - "loss": 8.0124, - "step": 21300 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015955589295881572, - "loss": 7.7575, - "step": 21310 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015953691402543178, - "loss": 7.7923, - "step": 21320 - }, - { - "epoch": 2.02, - "learning_rate": 0.00015951793509204784, - "loss": 7.8825, - "step": 21330 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015949895615866388, - "loss": 7.9135, - "step": 21340 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015947997722527994, - "loss": 7.8871, - "step": 21350 - }, - { - "epoch": 2.03, - "learning_rate": 0.000159460998291896, - "loss": 7.8631, - "step": 21360 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015944201935851206, - "loss": 7.9616, - "step": 21370 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015942304042512812, - "loss": 7.8579, - "step": 21380 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015940406149174416, - "loss": 7.8539, - "step": 21390 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015938508255836022, - "loss": 7.8704, - "step": 21400 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015936610362497628, - "loss": 7.8644, - "step": 21410 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015934712469159234, - "loss": 7.8173, - "step": 21420 - }, - { - "epoch": 2.03, - "learning_rate": 0.0001593281457582084, - "loss": 7.8461, - "step": 21430 - }, - { - "epoch": 2.03, - "learning_rate": 0.00015930916682482446, - "loss": 7.8491, - "step": 21440 - }, - { - "epoch": 2.04, - "learning_rate": 0.0001592901878914405, - "loss": 7.8831, - "step": 21450 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015927120895805656, - "loss": 7.878, - "step": 21460 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015925223002467262, - "loss": 7.7975, - "step": 21470 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015923325109128868, - "loss": 7.8669, - "step": 21480 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015921427215790474, - "loss": 7.8426, - "step": 21490 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015919529322452078, - "loss": 7.7905, - "step": 21500 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015917631429113684, - "loss": 7.8559, - "step": 21510 - }, - { - "epoch": 2.04, - "learning_rate": 0.0001591573353577529, - "loss": 7.9196, - "step": 21520 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015913835642436896, - "loss": 7.8966, - "step": 21530 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015911937749098502, - "loss": 7.9587, - "step": 21540 - }, - { - "epoch": 2.04, - "learning_rate": 0.00015910039855760106, - "loss": 7.904, - "step": 21550 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015908141962421712, - "loss": 7.8236, - "step": 21560 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015906244069083318, - "loss": 7.9078, - "step": 21570 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015904346175744924, - "loss": 7.8783, - "step": 21580 - }, - { - "epoch": 2.05, - "learning_rate": 0.0001590244828240653, - "loss": 7.767, - "step": 21590 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015900550389068136, - "loss": 7.8145, - "step": 21600 - }, - { - "epoch": 2.05, - "learning_rate": 0.0001589865249572974, - "loss": 7.8772, - "step": 21610 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015896754602391346, - "loss": 7.8538, - "step": 21620 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015894856709052952, - "loss": 7.8231, - "step": 21630 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015892958815714558, - "loss": 7.9159, - "step": 21640 - }, - { - "epoch": 2.05, - "learning_rate": 0.00015891060922376164, - "loss": 7.9374, - "step": 21650 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015889163029037768, - "loss": 7.8821, - "step": 21660 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015887265135699374, - "loss": 7.9214, - "step": 21670 - }, - { - "epoch": 2.06, - "learning_rate": 0.0001588536724236098, - "loss": 8.002, - "step": 21680 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015883469349022586, - "loss": 7.8611, - "step": 21690 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015881571455684192, - "loss": 8.0108, - "step": 21700 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015879673562345796, - "loss": 7.8643, - "step": 21710 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015877775669007402, - "loss": 7.9021, - "step": 21720 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015875877775669008, - "loss": 7.8611, - "step": 21730 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015873979882330614, - "loss": 7.7276, - "step": 21740 - }, - { - "epoch": 2.06, - "learning_rate": 0.0001587208198899222, - "loss": 7.9215, - "step": 21750 - }, - { - "epoch": 2.06, - "learning_rate": 0.00015870184095653824, - "loss": 7.8625, - "step": 21760 - }, - { - "epoch": 2.07, - "learning_rate": 0.0001586828620231543, - "loss": 7.9357, - "step": 21770 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015866388308977036, - "loss": 7.89, - "step": 21780 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015864490415638642, - "loss": 7.6989, - "step": 21790 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015862592522300248, - "loss": 7.7832, - "step": 21800 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015860694628961854, - "loss": 7.9075, - "step": 21810 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015858796735623458, - "loss": 7.9715, - "step": 21820 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015856898842285064, - "loss": 7.8661, - "step": 21830 - }, - { - "epoch": 2.07, - "learning_rate": 0.0001585500094894667, - "loss": 7.8871, - "step": 21840 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015853103055608276, - "loss": 7.9332, - "step": 21850 - }, - { - "epoch": 2.07, - "learning_rate": 0.00015851205162269882, - "loss": 7.839, - "step": 21860 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015849307268931486, - "loss": 7.9071, - "step": 21870 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015847409375593092, - "loss": 7.8392, - "step": 21880 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015845511482254698, - "loss": 7.8378, - "step": 21890 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015843613588916304, - "loss": 7.895, - "step": 21900 - }, - { - "epoch": 2.08, - "learning_rate": 0.0001584171569557791, - "loss": 7.895, - "step": 21910 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015839817802239513, - "loss": 7.9746, - "step": 21920 - }, - { - "epoch": 2.08, - "learning_rate": 0.0001583791990890112, - "loss": 7.7522, - "step": 21930 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015836022015562726, - "loss": 7.9543, - "step": 21940 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015834124122224332, - "loss": 7.9075, - "step": 21950 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015832226228885938, - "loss": 7.8838, - "step": 21960 - }, - { - "epoch": 2.08, - "learning_rate": 0.00015830328335547544, - "loss": 7.8334, - "step": 21970 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015828430442209148, - "loss": 7.888, - "step": 21980 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015826532548870754, - "loss": 7.853, - "step": 21990 - }, - { - "epoch": 2.09, - "learning_rate": 0.0001582463465553236, - "loss": 8.0296, - "step": 22000 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015822736762193966, - "loss": 7.8432, - "step": 22010 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015820838868855572, - "loss": 7.8741, - "step": 22020 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015818940975517175, - "loss": 7.8237, - "step": 22030 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015817043082178782, - "loss": 7.9621, - "step": 22040 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015815145188840388, - "loss": 7.8859, - "step": 22050 - }, - { - "epoch": 2.09, - "learning_rate": 0.00015813247295501994, - "loss": 7.9003, - "step": 22060 - }, - { - "epoch": 2.09, - "learning_rate": 0.000158113494021636, - "loss": 7.9417, - "step": 22070 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015809451508825203, - "loss": 7.9236, - "step": 22080 - }, - { - "epoch": 2.1, - "learning_rate": 0.0001580755361548681, - "loss": 7.7871, - "step": 22090 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015805655722148416, - "loss": 7.8612, - "step": 22100 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015803757828810022, - "loss": 8.0152, - "step": 22110 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015801859935471628, - "loss": 7.8189, - "step": 22120 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015799962042133234, - "loss": 7.9239, - "step": 22130 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015798064148794837, - "loss": 7.8071, - "step": 22140 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015796166255456444, - "loss": 7.8181, - "step": 22150 - }, - { - "epoch": 2.1, - "learning_rate": 0.0001579426836211805, - "loss": 7.847, - "step": 22160 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015792370468779656, - "loss": 7.8419, - "step": 22170 - }, - { - "epoch": 2.1, - "learning_rate": 0.00015790472575441262, - "loss": 7.9681, - "step": 22180 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015788574682102865, - "loss": 7.8276, - "step": 22190 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015786676788764472, - "loss": 7.8943, - "step": 22200 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015784778895426078, - "loss": 7.8348, - "step": 22210 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015782881002087684, - "loss": 7.8609, - "step": 22220 - }, - { - "epoch": 2.11, - "learning_rate": 0.0001578098310874929, - "loss": 7.8044, - "step": 22230 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015779085215410893, - "loss": 7.9309, - "step": 22240 - }, - { - "epoch": 2.11, - "learning_rate": 0.000157771873220725, - "loss": 7.8456, - "step": 22250 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015775289428734106, - "loss": 7.8255, - "step": 22260 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015773391535395712, - "loss": 7.9197, - "step": 22270 - }, - { - "epoch": 2.11, - "learning_rate": 0.00015771493642057318, - "loss": 7.8745, - "step": 22280 - }, - { - "epoch": 2.12, - "learning_rate": 0.0001576959574871892, - "loss": 7.8433, - "step": 22290 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015767697855380527, - "loss": 7.8798, - "step": 22300 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015765799962042134, - "loss": 7.8917, - "step": 22310 - }, - { - "epoch": 2.12, - "learning_rate": 0.0001576390206870374, - "loss": 7.884, - "step": 22320 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015762004175365346, - "loss": 7.8985, - "step": 22330 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015760106282026952, - "loss": 7.9394, - "step": 22340 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015758208388688555, - "loss": 7.8019, - "step": 22350 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015756310495350161, - "loss": 7.8047, - "step": 22360 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015754412602011768, - "loss": 7.8143, - "step": 22370 - }, - { - "epoch": 2.12, - "learning_rate": 0.00015752514708673374, - "loss": 7.81, - "step": 22380 - }, - { - "epoch": 2.12, - "learning_rate": 0.0001575061681533498, - "loss": 7.8116, - "step": 22390 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015748718921996583, - "loss": 7.8917, - "step": 22400 - }, - { - "epoch": 2.13, - "learning_rate": 0.0001574682102865819, - "loss": 7.849, - "step": 22410 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015744923135319795, - "loss": 7.8409, - "step": 22420 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015743025241981402, - "loss": 7.8352, - "step": 22430 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015741127348643008, - "loss": 7.8681, - "step": 22440 - }, - { - "epoch": 2.13, - "learning_rate": 0.0001573922945530461, - "loss": 7.9063, - "step": 22450 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015737331561966217, - "loss": 7.8869, - "step": 22460 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015735433668627823, - "loss": 7.9133, - "step": 22470 - }, - { - "epoch": 2.13, - "learning_rate": 0.0001573353577528943, - "loss": 7.9333, - "step": 22480 - }, - { - "epoch": 2.13, - "learning_rate": 0.00015731637881951036, - "loss": 7.9291, - "step": 22490 - }, - { - "epoch": 2.14, - "learning_rate": 0.00015729739988612642, - "loss": 7.9055, - "step": 22500 - }, - { - "epoch": 2.14, - "learning_rate": 0.00015727842095274245, - "loss": 7.7936, - "step": 22510 - }, - { - "epoch": 2.14, - "learning_rate": 0.0001572594420193585, - "loss": 7.8572, - "step": 22520 - }, - { - "epoch": 2.14, - "learning_rate": 0.00015724046308597457, - "loss": 7.7666, - "step": 22530 - }, - { - "epoch": 2.14, - "learning_rate": 0.00015722148415259064, - "loss": 7.8616, - "step": 22540 - }, - { - "epoch": 2.14, - "learning_rate": 0.0001572025052192067, - "loss": 7.8381, - "step": 22550 + "epoch": 0.25, + "learning_rate": 9.916833000665336e-05, + "loss": 7.7087, + "step": 500 }, { - "epoch": 2.14, - "learning_rate": 0.00015718352628582273, - "loss": 7.902, - "step": 22560 + "epoch": 0.5, + "learning_rate": 9.833666001330672e-05, + "loss": 7.4465, + "step": 1000 }, { - "epoch": 2.14, - "learning_rate": 0.0001571645473524388, - "loss": 7.8894, - "step": 22570 + "epoch": 0.75, + "learning_rate": 9.750499001996009e-05, + "loss": 7.1976, + "step": 1500 }, { - "epoch": 2.14, - "learning_rate": 0.00015714556841905485, - "loss": 7.8568, - "step": 22580 + "epoch": 1.0, + "learning_rate": 9.667332002661345e-05, + "loss": 6.9445, + "step": 2000 }, { - "epoch": 2.14, - "learning_rate": 0.00015712658948567092, - "loss": 7.8792, - "step": 22590 + "epoch": 1.25, + "learning_rate": 9.58416500332668e-05, + "loss": 6.6812, + "step": 2500 }, { - "epoch": 2.14, - "learning_rate": 0.00015710761055228698, - "loss": 7.8798, - "step": 22600 + "epoch": 1.5, + "learning_rate": 9.500998003992016e-05, + "loss": 6.2449, + "step": 3000 }, { - "epoch": 2.15, - "learning_rate": 0.000157088631618903, - "loss": 7.8514, - "step": 22610 + "epoch": 1.75, + "learning_rate": 9.417831004657353e-05, + "loss": 5.6326, + "step": 3500 }, { - "epoch": 2.15, - "learning_rate": 0.00015706965268551907, - "loss": 7.8087, - "step": 22620 + "epoch": 2.0, + "learning_rate": 9.334664005322689e-05, + "loss": 4.9326, + "step": 4000 }, { - "epoch": 2.15, - "learning_rate": 0.00015705067375213513, - "loss": 7.8658, - "step": 22630 + "epoch": 2.25, + "learning_rate": 9.251497005988024e-05, + "loss": 4.4854, + "step": 4500 }, { - "epoch": 2.15, - "learning_rate": 0.0001570316948187512, - "loss": 7.9174, - "step": 22640 + "epoch": 2.5, + "learning_rate": 9.16833000665336e-05, + "loss": 4.1808, + "step": 5000 }, { - "epoch": 2.15, - "learning_rate": 0.00015701271588536726, - "loss": 7.8491, - "step": 22650 + "epoch": 2.74, + "learning_rate": 9.085163007318697e-05, + "loss": 3.9169, + "step": 5500 }, { - "epoch": 2.15, - "learning_rate": 0.00015699373695198332, - "loss": 7.7947, - "step": 22660 + "epoch": 2.99, + "learning_rate": 9.001996007984033e-05, + "loss": 3.7161, + "step": 6000 }, { - "epoch": 2.15, - "learning_rate": 0.00015697475801859935, - "loss": 7.7663, - "step": 22670 + "epoch": 3.24, + "learning_rate": 8.918829008649369e-05, + "loss": 3.5102, + "step": 6500 }, { - "epoch": 2.15, - "learning_rate": 0.0001569557790852154, - "loss": 7.8722, - "step": 22680 + "epoch": 3.49, + "learning_rate": 8.835662009314704e-05, + "loss": 3.3782, + "step": 7000 }, { - "epoch": 2.15, - "learning_rate": 0.00015693680015183147, - "loss": 7.8574, - "step": 22690 + "epoch": 3.74, + "learning_rate": 8.752495009980041e-05, + "loss": 3.246, + "step": 7500 }, { - "epoch": 2.15, - "learning_rate": 0.00015691782121844754, - "loss": 7.706, - "step": 22700 + "epoch": 3.99, + "learning_rate": 8.669328010645377e-05, + "loss": 3.1532, + "step": 8000 }, { - "epoch": 2.16, - "learning_rate": 0.0001568988422850636, - "loss": 7.8818, - "step": 22710 + "epoch": 4.24, + "learning_rate": 8.586161011310713e-05, + "loss": 3.0313, + "step": 8500 }, { - "epoch": 2.16, - "learning_rate": 0.00015687986335167963, - "loss": 7.8492, - "step": 22720 + "epoch": 4.49, + "learning_rate": 8.502994011976048e-05, + "loss": 2.9563, + "step": 9000 }, { - "epoch": 2.16, - "learning_rate": 0.0001568608844182957, - "loss": 7.8555, - "step": 22730 + "epoch": 4.74, + "learning_rate": 8.419827012641384e-05, + "loss": 2.8741, + "step": 9500 }, { - "epoch": 2.16, - "learning_rate": 0.00015684190548491175, - "loss": 7.8188, - "step": 22740 + "epoch": 4.99, + "learning_rate": 8.336660013306721e-05, + "loss": 2.816, + "step": 10000 }, { - "epoch": 2.16, - "learning_rate": 0.00015682292655152781, - "loss": 7.8021, - "step": 22750 + "epoch": 5.24, + "learning_rate": 8.253493013972057e-05, + "loss": 2.7435, + "step": 10500 }, { - "epoch": 2.16, - "learning_rate": 0.00015680394761814388, - "loss": 7.9007, - "step": 22760 + "epoch": 5.49, + "learning_rate": 8.170326014637393e-05, + "loss": 2.6801, + "step": 11000 }, { - "epoch": 2.16, - "learning_rate": 0.0001567849686847599, - "loss": 7.7355, - "step": 22770 + "epoch": 5.74, + "learning_rate": 8.087159015302728e-05, + "loss": 2.6325, + "step": 11500 }, { - "epoch": 2.16, - "learning_rate": 0.00015676598975137597, - "loss": 7.7312, - "step": 22780 + "epoch": 5.99, + "learning_rate": 8.003992015968065e-05, + "loss": 2.5931, + "step": 12000 }, { - "epoch": 2.16, - "learning_rate": 0.00015674701081799203, - "loss": 7.9311, - "step": 22790 + "epoch": 6.24, + "learning_rate": 7.9208250166334e-05, + "loss": 2.525, + "step": 12500 }, { - "epoch": 2.16, - "learning_rate": 0.0001567280318846081, - "loss": 7.879, - "step": 22800 + "epoch": 6.49, + "learning_rate": 7.837658017298735e-05, + "loss": 2.4914, + "step": 13000 }, { - "epoch": 2.16, - "learning_rate": 0.00015670905295122415, - "loss": 7.8623, - "step": 22810 + "epoch": 6.74, + "learning_rate": 7.754491017964072e-05, + "loss": 2.4559, + "step": 13500 }, { - "epoch": 2.17, - "learning_rate": 0.0001566900740178402, - "loss": 7.8358, - "step": 22820 + "epoch": 6.99, + "learning_rate": 7.671324018629408e-05, + "loss": 2.4176, + "step": 14000 }, { - "epoch": 2.17, - "learning_rate": 0.00015667109508445625, - "loss": 7.8962, - "step": 22830 + "epoch": 7.24, + "learning_rate": 7.588157019294744e-05, + "loss": 2.3771, + "step": 14500 }, { - "epoch": 2.17, - "learning_rate": 0.0001566521161510723, - "loss": 7.8288, - "step": 22840 + "epoch": 7.49, + "learning_rate": 7.50499001996008e-05, + "loss": 2.3386, + "step": 15000 }, { - "epoch": 2.17, - "learning_rate": 0.00015663313721768837, - "loss": 7.9741, - "step": 22850 + "epoch": 7.73, + "learning_rate": 7.421823020625415e-05, + "loss": 2.3137, + "step": 15500 }, { - "epoch": 2.17, - "learning_rate": 0.00015661415828430443, - "loss": 7.8187, - "step": 22860 + "epoch": 7.98, + "learning_rate": 7.338656021290752e-05, + "loss": 2.2792, + "step": 16000 }, { - "epoch": 2.17, - "learning_rate": 0.0001565951793509205, - "loss": 7.8735, - "step": 22870 + "epoch": 8.23, + "learning_rate": 7.255489021956088e-05, + "loss": 2.235, + "step": 16500 }, { - "epoch": 2.17, - "learning_rate": 0.00015657620041753653, - "loss": 7.9464, - "step": 22880 + "epoch": 8.48, + "learning_rate": 7.172322022621424e-05, + "loss": 2.2126, + "step": 17000 }, { - "epoch": 2.17, - "learning_rate": 0.0001565572214841526, - "loss": 7.8947, - "step": 22890 + "epoch": 8.73, + "learning_rate": 7.089155023286759e-05, + "loss": 2.1985, + "step": 17500 }, { - "epoch": 2.17, - "learning_rate": 0.00015653824255076865, - "loss": 7.8647, - "step": 22900 + "epoch": 8.98, + "learning_rate": 7.005988023952096e-05, + "loss": 2.1803, + "step": 18000 }, { - "epoch": 2.17, - "learning_rate": 0.0001565192636173847, - "loss": 7.8879, - "step": 22910 + "epoch": 9.23, + "learning_rate": 6.922821024617432e-05, + "loss": 2.1363, + "step": 18500 }, { - "epoch": 2.17, - "learning_rate": 0.00015650028468400077, - "loss": 7.8699, - "step": 22920 + "epoch": 9.48, + "learning_rate": 6.839654025282768e-05, + "loss": 2.1104, + "step": 19000 }, { - "epoch": 2.18, - "learning_rate": 0.0001564813057506168, - "loss": 7.8284, - "step": 22930 + "epoch": 9.73, + "learning_rate": 6.756487025948103e-05, + "loss": 2.0954, + "step": 19500 }, { - "epoch": 2.18, - "learning_rate": 0.00015646232681723287, - "loss": 7.826, - "step": 22940 + "epoch": 9.98, + "learning_rate": 6.67332002661344e-05, + "loss": 2.0818, + "step": 20000 }, { - "epoch": 2.18, - "learning_rate": 0.00015644334788384893, - "loss": 7.9656, - "step": 22950 + "epoch": 10.23, + "learning_rate": 6.590153027278776e-05, + "loss": 2.0576, + "step": 20500 }, { - "epoch": 2.18, - "learning_rate": 0.000156424368950465, - "loss": 7.748, - "step": 22960 + "epoch": 10.48, + "learning_rate": 6.506986027944112e-05, + "loss": 2.0304, + "step": 21000 }, { - "epoch": 2.18, - "learning_rate": 0.00015640539001708105, - "loss": 7.7614, - "step": 22970 + "epoch": 10.73, + "learning_rate": 6.423819028609448e-05, + "loss": 2.0079, + "step": 21500 }, { - "epoch": 2.18, - "learning_rate": 0.0001563864110836971, - "loss": 7.8216, - "step": 22980 + "epoch": 10.98, + "learning_rate": 6.340652029274785e-05, + "loss": 2.0089, + "step": 22000 }, { - "epoch": 2.18, - "learning_rate": 0.00015636743215031315, - "loss": 8.0235, - "step": 22990 + "epoch": 11.23, + "learning_rate": 6.25748502994012e-05, + "loss": 1.971, + "step": 22500 }, { - "epoch": 2.18, - "learning_rate": 0.0001563484532169292, - "loss": 7.8968, + "epoch": 11.48, + "learning_rate": 6.174318030605456e-05, + "loss": 1.9458, "step": 23000 }, { - "epoch": 2.18, - "learning_rate": 0.00015632947428354527, - "loss": 7.9184, - "step": 23010 - }, - { - "epoch": 2.18, - "learning_rate": 0.00015631049535016133, - "loss": 7.8753, - "step": 23020 - }, - { - "epoch": 2.19, - "learning_rate": 0.0001562915164167774, - "loss": 7.9102, - "step": 23030 - }, - { - "epoch": 2.19, - "learning_rate": 0.00015627253748339343, - "loss": 7.7806, - "step": 23040 - }, - { - "epoch": 2.19, - "learning_rate": 0.0001562535585500095, - "loss": 7.8738, - "step": 23050 - }, - { - "epoch": 2.19, - "learning_rate": 0.00015623457961662555, - "loss": 7.8276, - "step": 23060 - }, - { - "epoch": 2.19, - "learning_rate": 0.0001562156006832416, - "loss": 7.8367, - "step": 23070 - }, - { - "epoch": 2.19, - "learning_rate": 0.00015619662174985767, - "loss": 7.8343, - "step": 23080 - }, - { - "epoch": 2.19, - "learning_rate": 0.0001561776428164737, - "loss": 7.863, - "step": 23090 - }, - { - "epoch": 2.19, - "learning_rate": 0.00015615866388308977, - "loss": 7.8564, - "step": 23100 - }, - { - "epoch": 2.19, - "learning_rate": 0.00015613968494970583, - "loss": 7.8091, - "step": 23110 - }, - { - "epoch": 2.19, - "learning_rate": 0.0001561207060163219, - "loss": 7.881, - "step": 23120 - }, - { - "epoch": 2.19, - "learning_rate": 0.00015610172708293795, - "loss": 7.9338, - "step": 23130 - }, - { - "epoch": 2.2, - "learning_rate": 0.000156082748149554, - "loss": 7.8681, - "step": 23140 - }, - { - "epoch": 2.2, - "learning_rate": 0.00015606376921617005, - "loss": 7.7641, - "step": 23150 - }, - { - "epoch": 2.2, - "learning_rate": 0.0001560447902827861, - "loss": 7.9572, - "step": 23160 - }, - { - "epoch": 2.2, - "learning_rate": 0.00015602581134940217, - "loss": 7.793, - "step": 23170 - }, - { - "epoch": 2.2, - "learning_rate": 0.00015600683241601823, - "loss": 7.8405, - "step": 23180 - }, - { - "epoch": 2.2, - "learning_rate": 0.0001559878534826343, - "loss": 7.8318, - "step": 23190 - }, - { - "epoch": 2.2, - "learning_rate": 0.00015596887454925033, - "loss": 7.8334, - "step": 23200 - }, - { - "epoch": 2.2, - "learning_rate": 0.0001559498956158664, - "loss": 7.7095, - "step": 23210 - }, - { - "epoch": 2.2, - "learning_rate": 0.00015593091668248245, - "loss": 7.885, - "step": 23220 - }, - { - "epoch": 2.2, - "learning_rate": 0.0001559119377490985, - "loss": 7.904, - "step": 23230 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015589295881571457, - "loss": 7.8512, - "step": 23240 - }, - { - "epoch": 2.21, - "learning_rate": 0.0001558739798823306, - "loss": 7.7914, - "step": 23250 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015585500094894667, - "loss": 7.9344, - "step": 23260 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015583602201556273, - "loss": 7.8801, - "step": 23270 - }, - { - "epoch": 2.21, - "learning_rate": 0.0001558170430821788, - "loss": 7.8102, - "step": 23280 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015579806414879485, - "loss": 7.8134, - "step": 23290 - }, - { - "epoch": 2.21, - "learning_rate": 0.0001557790852154109, - "loss": 7.857, - "step": 23300 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015576010628202695, - "loss": 7.902, - "step": 23310 - }, - { - "epoch": 2.21, - "learning_rate": 0.000155741127348643, - "loss": 7.8886, - "step": 23320 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015572214841525907, - "loss": 7.8732, - "step": 23330 - }, - { - "epoch": 2.21, - "learning_rate": 0.00015570316948187513, - "loss": 7.8585, - "step": 23340 - }, - { - "epoch": 2.22, - "learning_rate": 0.0001556841905484912, - "loss": 7.8644, - "step": 23350 - }, - { - "epoch": 2.22, - "learning_rate": 0.00015566521161510723, - "loss": 7.8146, - "step": 23360 - }, - { - "epoch": 2.22, - "learning_rate": 0.0001556462326817233, - "loss": 7.8164, - "step": 23370 - }, - { - "epoch": 2.22, - "learning_rate": 0.00015562725374833935, - "loss": 7.7867, - "step": 23380 - }, - { - "epoch": 2.22, - "learning_rate": 0.0001556082748149554, - "loss": 7.9057, - "step": 23390 - }, - { - "epoch": 2.22, - "learning_rate": 0.00015558929588157147, - "loss": 7.8249, - "step": 23400 - }, - { - "epoch": 2.22, - "learning_rate": 0.0001555703169481875, - "loss": 7.9146, - "step": 23410 - }, - { - "epoch": 2.22, - "learning_rate": 0.00015555133801480357, - "loss": 7.8243, - "step": 23420 - }, - { - "epoch": 2.22, - "learning_rate": 0.00015553235908141963, - "loss": 7.9238, - "step": 23430 - }, - { - "epoch": 2.22, - "learning_rate": 0.0001555133801480357, - "loss": 7.8485, - "step": 23440 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015549440121465175, - "loss": 7.821, - "step": 23450 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015547542228126779, - "loss": 7.9004, - "step": 23460 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015545644334788385, - "loss": 7.8125, - "step": 23470 - }, - { - "epoch": 2.23, - "learning_rate": 0.0001554374644144999, - "loss": 7.9106, - "step": 23480 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015541848548111597, - "loss": 7.947, - "step": 23490 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015539950654773203, - "loss": 7.8984, + "epoch": 11.73, + "learning_rate": 6.091151031270792e-05, + "loss": 1.9479, "step": 23500 }, { - "epoch": 2.23, - "learning_rate": 0.00015538052761434806, - "loss": 7.9675, - "step": 23510 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015536154868096413, - "loss": 7.8223, - "step": 23520 - }, - { - "epoch": 2.23, - "learning_rate": 0.0001553425697475802, - "loss": 7.8708, - "step": 23530 - }, - { - "epoch": 2.23, - "learning_rate": 0.00015532359081419625, - "loss": 7.9586, - "step": 23540 - }, - { - "epoch": 2.23, - "learning_rate": 0.0001553046118808123, - "loss": 7.818, - "step": 23550 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015528563294742837, - "loss": 7.8387, - "step": 23560 - }, - { - "epoch": 2.24, - "learning_rate": 0.0001552666540140444, - "loss": 7.8902, - "step": 23570 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015524767508066047, - "loss": 7.8054, - "step": 23580 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015522869614727653, - "loss": 7.8994, - "step": 23590 - }, - { - "epoch": 2.24, - "learning_rate": 0.0001552097172138926, - "loss": 7.9191, - "step": 23600 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015519073828050865, - "loss": 7.8364, - "step": 23610 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015517175934712468, - "loss": 7.9712, - "step": 23620 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015515278041374075, - "loss": 7.8791, - "step": 23630 - }, - { - "epoch": 2.24, - "learning_rate": 0.0001551338014803568, - "loss": 7.8106, - "step": 23640 - }, - { - "epoch": 2.24, - "learning_rate": 0.00015511482254697287, - "loss": 7.8697, - "step": 23650 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015509584361358893, - "loss": 7.9453, - "step": 23660 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015507686468020496, - "loss": 7.8686, - "step": 23670 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015505788574682103, - "loss": 7.9112, - "step": 23680 - }, - { - "epoch": 2.25, - "learning_rate": 0.0001550389068134371, - "loss": 7.8276, - "step": 23690 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015501992788005315, - "loss": 7.8023, - "step": 23700 - }, - { - "epoch": 2.25, - "learning_rate": 0.0001550009489466692, - "loss": 7.9167, - "step": 23710 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015498197001328527, - "loss": 7.9369, - "step": 23720 - }, - { - "epoch": 2.25, - "learning_rate": 0.0001549629910799013, - "loss": 7.7976, - "step": 23730 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015494401214651737, - "loss": 7.8442, - "step": 23740 - }, - { - "epoch": 2.25, - "learning_rate": 0.00015492503321313343, - "loss": 7.8399, - "step": 23750 - }, - { - "epoch": 2.25, - "learning_rate": 0.0001549060542797495, - "loss": 7.8819, - "step": 23760 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015488707534636555, - "loss": 7.9336, - "step": 23770 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015486809641298158, - "loss": 7.8861, - "step": 23780 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015484911747959765, - "loss": 7.875, - "step": 23790 - }, - { - "epoch": 2.26, - "learning_rate": 0.0001548301385462137, - "loss": 7.8272, - "step": 23800 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015481115961282977, - "loss": 7.8513, - "step": 23810 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015479218067944583, - "loss": 7.8837, - "step": 23820 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015477320174606186, - "loss": 7.851, - "step": 23830 - }, - { - "epoch": 2.26, - "learning_rate": 0.00015475422281267792, - "loss": 7.8773, - "step": 23840 + "epoch": 11.98, + "learning_rate": 6.007984031936128e-05, + "loss": 1.9332, + "step": 24000 }, { - "epoch": 2.26, - "learning_rate": 0.00015473524387929399, - "loss": 7.9477, - "step": 23850 + "epoch": 12.23, + "learning_rate": 5.924817032601464e-05, + "loss": 1.9048, + "step": 24500 }, { - "epoch": 2.26, - "learning_rate": 0.00015471626494591005, - "loss": 7.9506, - "step": 23860 + "epoch": 12.48, + "learning_rate": 5.8416500332668e-05, + "loss": 1.8943, + "step": 25000 }, { - "epoch": 2.27, - "learning_rate": 0.0001546972860125261, - "loss": 7.8128, - "step": 23870 + "epoch": 12.72, + "learning_rate": 5.758483033932136e-05, + "loss": 1.8837, + "step": 25500 }, { - "epoch": 2.27, - "learning_rate": 0.00015467830707914217, - "loss": 7.9312, - "step": 23880 + "epoch": 12.97, + "learning_rate": 5.675316034597472e-05, + "loss": 1.8606, + "step": 26000 }, { - "epoch": 2.27, - "learning_rate": 0.0001546593281457582, - "loss": 7.9325, - "step": 23890 + "epoch": 13.22, + "learning_rate": 5.592149035262808e-05, + "loss": 1.8289, + "step": 26500 }, { - "epoch": 2.27, - "learning_rate": 0.00015464034921237426, - "loss": 7.8281, - "step": 23900 + "epoch": 13.47, + "learning_rate": 5.508982035928144e-05, + "loss": 1.8209, + "step": 27000 }, { - "epoch": 2.27, - "learning_rate": 0.00015462137027899033, - "loss": 8.0098, - "step": 23910 + "epoch": 13.72, + "learning_rate": 5.42581503659348e-05, + "loss": 1.8284, + "step": 27500 }, { - "epoch": 2.27, - "learning_rate": 0.0001546023913456064, - "loss": 7.7144, - "step": 23920 + "epoch": 13.97, + "learning_rate": 5.342648037258816e-05, + "loss": 1.814, + "step": 28000 }, { - "epoch": 2.27, - "learning_rate": 0.00015458341241222245, - "loss": 7.9032, - "step": 23930 + "epoch": 14.22, + "learning_rate": 5.259481037924152e-05, + "loss": 1.7909, + "step": 28500 }, { - "epoch": 2.27, - "learning_rate": 0.00015456443347883848, - "loss": 7.7935, - "step": 23940 + "epoch": 14.47, + "learning_rate": 5.1763140385894884e-05, + "loss": 1.7712, + "step": 29000 }, { - "epoch": 2.27, - "learning_rate": 0.00015454545454545454, - "loss": 7.8352, - "step": 23950 + "epoch": 14.72, + "learning_rate": 5.093147039254824e-05, + "loss": 1.7695, + "step": 29500 }, { - "epoch": 2.27, - "learning_rate": 0.0001545264756120706, - "loss": 7.8817, - "step": 23960 + "epoch": 14.97, + "learning_rate": 5.0099800399201604e-05, + "loss": 1.7594, + "step": 30000 }, { - "epoch": 2.27, - "learning_rate": 0.00015450749667868667, - "loss": 7.8525, - "step": 23970 + "epoch": 15.22, + "learning_rate": 4.9268130405854955e-05, + "loss": 1.7413, + "step": 30500 }, { - "epoch": 2.28, - "learning_rate": 0.00015448851774530273, - "loss": 7.8043, - "step": 23980 + "epoch": 15.47, + "learning_rate": 4.843646041250832e-05, + "loss": 1.7254, + "step": 31000 }, { - "epoch": 2.28, - "learning_rate": 0.00015446953881191876, - "loss": 7.908, - "step": 23990 + "epoch": 15.72, + "learning_rate": 4.7604790419161675e-05, + "loss": 1.7287, + "step": 31500 }, { - "epoch": 2.28, - "learning_rate": 0.00015445055987853482, - "loss": 7.8206, - "step": 24000 + "epoch": 15.97, + "learning_rate": 4.677312042581504e-05, + "loss": 1.7078, + "step": 32000 } ], - "max_steps": 105380, - "num_train_epochs": 10, - "total_flos": 2.5273221210952704e+16, + "max_steps": 60120, + "num_train_epochs": 30, + "total_flos": 3.3697979301888e+16, "trial_name": null, "trial_params": null }