{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 99.89819919147372, "eval_steps": 500, "global_step": 68000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.030871003307607496, "grad_norm": 0.7940054535865784, "learning_rate": 9.990686274509804e-06, "loss": 3.2723, "step": 21 }, { "epoch": 0.06174200661521499, "grad_norm": 0.320931613445282, "learning_rate": 9.980392156862746e-06, "loss": 2.3933, "step": 42 }, { "epoch": 0.0926130099228225, "grad_norm": 0.36235514283180237, "learning_rate": 9.970098039215688e-06, "loss": 2.2741, "step": 63 }, { "epoch": 0.12348401323042998, "grad_norm": 0.33555838465690613, "learning_rate": 9.959803921568629e-06, "loss": 2.3189, "step": 84 }, { "epoch": 0.1543550165380375, "grad_norm": 0.4002174139022827, "learning_rate": 9.94950980392157e-06, "loss": 2.3926, "step": 105 }, { "epoch": 0.185226019845645, "grad_norm": 0.4205632507801056, "learning_rate": 9.93921568627451e-06, "loss": 2.4199, "step": 126 }, { "epoch": 0.2160970231532525, "grad_norm": 0.4258420467376709, "learning_rate": 9.928921568627453e-06, "loss": 2.2021, "step": 147 }, { "epoch": 0.24696802646085997, "grad_norm": 0.45753806829452515, "learning_rate": 9.918627450980393e-06, "loss": 2.4466, "step": 168 }, { "epoch": 0.27783902976846747, "grad_norm": 0.46691378951072693, "learning_rate": 9.908333333333335e-06, "loss": 2.3134, "step": 189 }, { "epoch": 0.308710033076075, "grad_norm": 0.32314497232437134, "learning_rate": 9.898039215686275e-06, "loss": 2.3165, "step": 210 }, { "epoch": 0.3395810363836825, "grad_norm": 0.6973955035209656, "learning_rate": 9.887745098039217e-06, "loss": 2.3962, "step": 231 }, { "epoch": 0.37045203969129, "grad_norm": 0.3546930253505707, "learning_rate": 9.877450980392159e-06, "loss": 2.413, "step": 252 }, { "epoch": 0.4013230429988975, "grad_norm": 0.5109633207321167, "learning_rate": 9.867156862745099e-06, "loss": 2.292, "step": 273 }, { "epoch": 0.432194046306505, "grad_norm": 0.5256220698356628, "learning_rate": 9.856862745098041e-06, "loss": 2.285, "step": 294 }, { "epoch": 0.46306504961411243, "grad_norm": 0.6690633296966553, "learning_rate": 9.846568627450981e-06, "loss": 2.3489, "step": 315 }, { "epoch": 0.49393605292171994, "grad_norm": 0.6162332892417908, "learning_rate": 9.836274509803923e-06, "loss": 2.3883, "step": 336 }, { "epoch": 0.5248070562293274, "grad_norm": 1.9315634965896606, "learning_rate": 9.825980392156863e-06, "loss": 2.4159, "step": 357 }, { "epoch": 0.5556780595369349, "grad_norm": 0.5455235838890076, "learning_rate": 9.815686274509805e-06, "loss": 2.2694, "step": 378 }, { "epoch": 0.5865490628445424, "grad_norm": 0.6161318421363831, "learning_rate": 9.805392156862747e-06, "loss": 2.4137, "step": 399 }, { "epoch": 0.61742006615215, "grad_norm": 0.5634300112724304, "learning_rate": 9.795098039215687e-06, "loss": 2.376, "step": 420 }, { "epoch": 0.6482910694597575, "grad_norm": 0.9691163897514343, "learning_rate": 9.78480392156863e-06, "loss": 2.2671, "step": 441 }, { "epoch": 0.679162072767365, "grad_norm": 0.7895241379737854, "learning_rate": 9.77450980392157e-06, "loss": 2.3867, "step": 462 }, { "epoch": 0.7100330760749725, "grad_norm": 0.6919421553611755, "learning_rate": 9.764215686274512e-06, "loss": 2.3087, "step": 483 }, { "epoch": 0.74090407938258, "grad_norm": 0.6956549882888794, "learning_rate": 9.753921568627452e-06, "loss": 2.343, "step": 504 }, { "epoch": 0.7717750826901875, "grad_norm": 0.7738545536994934, "learning_rate": 9.743627450980394e-06, "loss": 2.3509, "step": 525 }, { "epoch": 0.802646085997795, "grad_norm": 1.1013460159301758, "learning_rate": 9.733333333333334e-06, "loss": 2.1828, "step": 546 }, { "epoch": 0.8335170893054025, "grad_norm": 1.4819823503494263, "learning_rate": 9.723039215686276e-06, "loss": 2.4565, "step": 567 }, { "epoch": 0.86438809261301, "grad_norm": 0.46090105175971985, "learning_rate": 9.712745098039216e-06, "loss": 2.2541, "step": 588 }, { "epoch": 0.8952590959206174, "grad_norm": 0.7013310790061951, "learning_rate": 9.702450980392158e-06, "loss": 2.2848, "step": 609 }, { "epoch": 0.9261300992282249, "grad_norm": 1.0015063285827637, "learning_rate": 9.69264705882353e-06, "loss": 2.2413, "step": 630 }, { "epoch": 0.9570011025358324, "grad_norm": 0.8772223591804504, "learning_rate": 9.682352941176471e-06, "loss": 2.4909, "step": 651 }, { "epoch": 0.9878721058434399, "grad_norm": 1.0654124021530151, "learning_rate": 9.672058823529413e-06, "loss": 2.375, "step": 672 }, { "epoch": 1.017640573318633, "grad_norm": 1.1609431505203247, "learning_rate": 9.661764705882353e-06, "loss": 2.2078, "step": 693 }, { "epoch": 1.0485115766262403, "grad_norm": 1.7942206859588623, "learning_rate": 9.651470588235295e-06, "loss": 2.3145, "step": 714 }, { "epoch": 1.079382579933848, "grad_norm": 1.071953296661377, "learning_rate": 9.641176470588235e-06, "loss": 2.3541, "step": 735 }, { "epoch": 1.1102535832414553, "grad_norm": 1.6100263595581055, "learning_rate": 9.630882352941177e-06, "loss": 2.2648, "step": 756 }, { "epoch": 1.141124586549063, "grad_norm": 0.9563078880310059, "learning_rate": 9.620588235294117e-06, "loss": 2.3255, "step": 777 }, { "epoch": 1.1719955898566703, "grad_norm": 0.9889941215515137, "learning_rate": 9.61029411764706e-06, "loss": 2.2734, "step": 798 }, { "epoch": 1.202866593164278, "grad_norm": 2.077514886856079, "learning_rate": 9.600000000000001e-06, "loss": 2.3709, "step": 819 }, { "epoch": 1.2337375964718853, "grad_norm": 0.9159446358680725, "learning_rate": 9.589705882352941e-06, "loss": 2.2387, "step": 840 }, { "epoch": 1.264608599779493, "grad_norm": 1.461916208267212, "learning_rate": 9.579411764705883e-06, "loss": 2.2484, "step": 861 }, { "epoch": 1.2954796030871003, "grad_norm": 1.568862795829773, "learning_rate": 9.569117647058824e-06, "loss": 2.1579, "step": 882 }, { "epoch": 1.326350606394708, "grad_norm": 1.566645622253418, "learning_rate": 9.558823529411766e-06, "loss": 2.2645, "step": 903 }, { "epoch": 1.3572216097023153, "grad_norm": 1.0653067827224731, "learning_rate": 9.548529411764706e-06, "loss": 2.3066, "step": 924 }, { "epoch": 1.3880926130099227, "grad_norm": 1.7910544872283936, "learning_rate": 9.538235294117648e-06, "loss": 2.4084, "step": 945 }, { "epoch": 1.4189636163175303, "grad_norm": 0.7429267764091492, "learning_rate": 9.527941176470588e-06, "loss": 2.2003, "step": 966 }, { "epoch": 1.449834619625138, "grad_norm": 0.8549733757972717, "learning_rate": 9.51764705882353e-06, "loss": 2.3083, "step": 987 }, { "epoch": 1.4807056229327453, "grad_norm": 1.5903714895248413, "learning_rate": 9.50735294117647e-06, "loss": 2.3847, "step": 1008 }, { "epoch": 1.5115766262403527, "grad_norm": 1.0798513889312744, "learning_rate": 9.497058823529412e-06, "loss": 2.3322, "step": 1029 }, { "epoch": 1.5424476295479603, "grad_norm": 1.6621721982955933, "learning_rate": 9.486764705882354e-06, "loss": 2.2987, "step": 1050 }, { "epoch": 1.573318632855568, "grad_norm": 1.218017339706421, "learning_rate": 9.476470588235294e-06, "loss": 2.3213, "step": 1071 }, { "epoch": 1.6041896361631753, "grad_norm": 0.7075687050819397, "learning_rate": 9.466176470588236e-06, "loss": 2.2316, "step": 1092 }, { "epoch": 1.6350606394707827, "grad_norm": 1.4596519470214844, "learning_rate": 9.455882352941176e-06, "loss": 2.2417, "step": 1113 }, { "epoch": 1.6659316427783903, "grad_norm": 1.3288954496383667, "learning_rate": 9.445588235294118e-06, "loss": 2.4049, "step": 1134 }, { "epoch": 1.696802646085998, "grad_norm": 1.7839045524597168, "learning_rate": 9.435294117647058e-06, "loss": 2.3342, "step": 1155 }, { "epoch": 1.7276736493936053, "grad_norm": 1.3778377771377563, "learning_rate": 9.425e-06, "loss": 2.2813, "step": 1176 }, { "epoch": 1.7585446527012127, "grad_norm": 1.093371033668518, "learning_rate": 9.414705882352942e-06, "loss": 2.3155, "step": 1197 }, { "epoch": 1.7894156560088201, "grad_norm": 0.8924500346183777, "learning_rate": 9.404411764705883e-06, "loss": 2.2673, "step": 1218 }, { "epoch": 1.8202866593164277, "grad_norm": 1.5816951990127563, "learning_rate": 9.394117647058824e-06, "loss": 2.3399, "step": 1239 }, { "epoch": 1.8511576626240354, "grad_norm": 0.9977998733520508, "learning_rate": 9.383823529411765e-06, "loss": 2.3644, "step": 1260 }, { "epoch": 1.8820286659316428, "grad_norm": 1.0349079370498657, "learning_rate": 9.373529411764707e-06, "loss": 2.2882, "step": 1281 }, { "epoch": 1.9128996692392501, "grad_norm": 1.2130367755889893, "learning_rate": 9.363235294117647e-06, "loss": 2.3467, "step": 1302 }, { "epoch": 1.9437706725468578, "grad_norm": 1.605616807937622, "learning_rate": 9.352941176470589e-06, "loss": 2.3506, "step": 1323 }, { "epoch": 1.9746416758544654, "grad_norm": 1.339097499847412, "learning_rate": 9.342647058823529e-06, "loss": 2.2617, "step": 1344 }, { "epoch": 2.0044101433296584, "grad_norm": 1.1007579565048218, "learning_rate": 9.332352941176471e-06, "loss": 2.1978, "step": 1365 }, { "epoch": 2.035281146637266, "grad_norm": 1.3697082996368408, "learning_rate": 9.322058823529413e-06, "loss": 2.2258, "step": 1386 }, { "epoch": 2.066152149944873, "grad_norm": 1.9182090759277344, "learning_rate": 9.311764705882353e-06, "loss": 2.3566, "step": 1407 }, { "epoch": 2.0970231532524806, "grad_norm": 1.0698152780532837, "learning_rate": 9.301470588235295e-06, "loss": 2.1923, "step": 1428 }, { "epoch": 2.1278941565600884, "grad_norm": 1.8497835397720337, "learning_rate": 9.291176470588235e-06, "loss": 2.2372, "step": 1449 }, { "epoch": 2.158765159867696, "grad_norm": 1.3342701196670532, "learning_rate": 9.280882352941177e-06, "loss": 2.1423, "step": 1470 }, { "epoch": 2.189636163175303, "grad_norm": 1.6012579202651978, "learning_rate": 9.270588235294117e-06, "loss": 2.2783, "step": 1491 }, { "epoch": 2.2205071664829106, "grad_norm": 1.488791584968567, "learning_rate": 9.26029411764706e-06, "loss": 2.209, "step": 1512 }, { "epoch": 2.2513781697905184, "grad_norm": 2.096177101135254, "learning_rate": 9.250000000000001e-06, "loss": 2.2021, "step": 1533 }, { "epoch": 2.282249173098126, "grad_norm": 1.4596940279006958, "learning_rate": 9.239705882352941e-06, "loss": 2.3285, "step": 1554 }, { "epoch": 2.313120176405733, "grad_norm": 1.4805257320404053, "learning_rate": 9.229411764705883e-06, "loss": 2.2665, "step": 1575 }, { "epoch": 2.3439911797133406, "grad_norm": 1.9005323648452759, "learning_rate": 9.219117647058824e-06, "loss": 2.336, "step": 1596 }, { "epoch": 2.374862183020948, "grad_norm": 1.7078076601028442, "learning_rate": 9.208823529411766e-06, "loss": 2.3066, "step": 1617 }, { "epoch": 2.405733186328556, "grad_norm": 1.7854089736938477, "learning_rate": 9.198529411764706e-06, "loss": 2.2063, "step": 1638 }, { "epoch": 2.436604189636163, "grad_norm": 1.8742430210113525, "learning_rate": 9.188235294117648e-06, "loss": 2.2503, "step": 1659 }, { "epoch": 2.4674751929437706, "grad_norm": 2.9032325744628906, "learning_rate": 9.17794117647059e-06, "loss": 2.1626, "step": 1680 }, { "epoch": 2.498346196251378, "grad_norm": 1.505924105644226, "learning_rate": 9.16764705882353e-06, "loss": 2.3102, "step": 1701 }, { "epoch": 2.529217199558986, "grad_norm": 1.779142141342163, "learning_rate": 9.157352941176472e-06, "loss": 2.2732, "step": 1722 }, { "epoch": 2.560088202866593, "grad_norm": 1.6594492197036743, "learning_rate": 9.147058823529412e-06, "loss": 2.1577, "step": 1743 }, { "epoch": 2.5909592061742006, "grad_norm": 1.8729362487792969, "learning_rate": 9.136764705882354e-06, "loss": 2.2017, "step": 1764 }, { "epoch": 2.621830209481808, "grad_norm": 2.1173110008239746, "learning_rate": 9.126470588235294e-06, "loss": 2.3207, "step": 1785 }, { "epoch": 2.652701212789416, "grad_norm": 3.0417325496673584, "learning_rate": 9.116176470588236e-06, "loss": 2.2706, "step": 1806 }, { "epoch": 2.6835722160970232, "grad_norm": 1.361302375793457, "learning_rate": 9.105882352941178e-06, "loss": 2.2701, "step": 1827 }, { "epoch": 2.7144432194046306, "grad_norm": 1.9176990985870361, "learning_rate": 9.095588235294118e-06, "loss": 2.2957, "step": 1848 }, { "epoch": 2.745314222712238, "grad_norm": 1.5876405239105225, "learning_rate": 9.08529411764706e-06, "loss": 2.389, "step": 1869 }, { "epoch": 2.7761852260198454, "grad_norm": 1.3612051010131836, "learning_rate": 9.075e-06, "loss": 2.3024, "step": 1890 }, { "epoch": 2.8070562293274532, "grad_norm": 2.0122861862182617, "learning_rate": 9.064705882352942e-06, "loss": 2.3609, "step": 1911 }, { "epoch": 2.8379272326350606, "grad_norm": 1.4486873149871826, "learning_rate": 9.054411764705883e-06, "loss": 2.2706, "step": 1932 }, { "epoch": 2.868798235942668, "grad_norm": 1.5788092613220215, "learning_rate": 9.044117647058824e-06, "loss": 2.1837, "step": 1953 }, { "epoch": 2.899669239250276, "grad_norm": 1.0288219451904297, "learning_rate": 9.033823529411766e-06, "loss": 2.2825, "step": 1974 }, { "epoch": 2.9305402425578833, "grad_norm": 1.6598355770111084, "learning_rate": 9.023529411764707e-06, "loss": 2.2774, "step": 1995 }, { "epoch": 2.9614112458654906, "grad_norm": 1.5685467720031738, "learning_rate": 9.013235294117649e-06, "loss": 2.1978, "step": 2016 }, { "epoch": 2.992282249173098, "grad_norm": 2.00368070602417, "learning_rate": 9.002941176470589e-06, "loss": 2.2677, "step": 2037 }, { "epoch": 3.022050716648291, "grad_norm": 1.7239810228347778, "learning_rate": 8.99264705882353e-06, "loss": 2.1242, "step": 2058 }, { "epoch": 3.0529217199558984, "grad_norm": 2.0060484409332275, "learning_rate": 8.982352941176471e-06, "loss": 2.2234, "step": 2079 }, { "epoch": 3.083792723263506, "grad_norm": 2.2679460048675537, "learning_rate": 8.972058823529413e-06, "loss": 2.1935, "step": 2100 }, { "epoch": 3.1146637265711137, "grad_norm": 2.3541860580444336, "learning_rate": 8.961764705882353e-06, "loss": 2.3283, "step": 2121 }, { "epoch": 3.145534729878721, "grad_norm": 1.637447714805603, "learning_rate": 8.951470588235295e-06, "loss": 2.2094, "step": 2142 }, { "epoch": 3.1764057331863285, "grad_norm": 2.7666993141174316, "learning_rate": 8.941176470588237e-06, "loss": 2.2272, "step": 2163 }, { "epoch": 3.207276736493936, "grad_norm": 2.9229938983917236, "learning_rate": 8.930882352941177e-06, "loss": 2.2553, "step": 2184 }, { "epoch": 3.2381477398015437, "grad_norm": 2.722456216812134, "learning_rate": 8.920588235294119e-06, "loss": 2.3223, "step": 2205 }, { "epoch": 3.269018743109151, "grad_norm": 1.965293526649475, "learning_rate": 8.91029411764706e-06, "loss": 2.2935, "step": 2226 }, { "epoch": 3.2998897464167585, "grad_norm": 1.7254505157470703, "learning_rate": 8.900000000000001e-06, "loss": 2.2514, "step": 2247 }, { "epoch": 3.330760749724366, "grad_norm": 2.369128942489624, "learning_rate": 8.889705882352941e-06, "loss": 2.2861, "step": 2268 }, { "epoch": 3.3616317530319737, "grad_norm": 2.42130184173584, "learning_rate": 8.879411764705883e-06, "loss": 2.343, "step": 2289 }, { "epoch": 3.392502756339581, "grad_norm": 2.222886085510254, "learning_rate": 8.869117647058825e-06, "loss": 2.2132, "step": 2310 }, { "epoch": 3.4233737596471885, "grad_norm": 2.613894462585449, "learning_rate": 8.858823529411765e-06, "loss": 2.2409, "step": 2331 }, { "epoch": 3.454244762954796, "grad_norm": 2.4033560752868652, "learning_rate": 8.848529411764707e-06, "loss": 2.1564, "step": 2352 }, { "epoch": 3.4851157662624037, "grad_norm": 1.3745297193527222, "learning_rate": 8.838235294117648e-06, "loss": 2.2523, "step": 2373 }, { "epoch": 3.515986769570011, "grad_norm": 1.4414029121398926, "learning_rate": 8.82794117647059e-06, "loss": 2.1393, "step": 2394 }, { "epoch": 3.5468577728776185, "grad_norm": 2.2887678146362305, "learning_rate": 8.81764705882353e-06, "loss": 2.2748, "step": 2415 }, { "epoch": 3.577728776185226, "grad_norm": 1.8498324155807495, "learning_rate": 8.807352941176472e-06, "loss": 2.1742, "step": 2436 }, { "epoch": 3.6085997794928337, "grad_norm": 3.1975903511047363, "learning_rate": 8.797058823529414e-06, "loss": 2.3019, "step": 2457 }, { "epoch": 3.639470782800441, "grad_norm": 2.0401833057403564, "learning_rate": 8.786764705882354e-06, "loss": 2.2753, "step": 2478 }, { "epoch": 3.6703417861080485, "grad_norm": 2.023527145385742, "learning_rate": 8.776470588235296e-06, "loss": 2.2265, "step": 2499 }, { "epoch": 3.701212789415656, "grad_norm": 2.5720412731170654, "learning_rate": 8.766176470588236e-06, "loss": 2.2168, "step": 2520 }, { "epoch": 3.7320837927232633, "grad_norm": 2.1886861324310303, "learning_rate": 8.755882352941178e-06, "loss": 2.2842, "step": 2541 }, { "epoch": 3.762954796030871, "grad_norm": 1.7179489135742188, "learning_rate": 8.745588235294118e-06, "loss": 2.3208, "step": 2562 }, { "epoch": 3.7938257993384785, "grad_norm": 2.382472038269043, "learning_rate": 8.73529411764706e-06, "loss": 2.2513, "step": 2583 }, { "epoch": 3.824696802646086, "grad_norm": 2.7874560356140137, "learning_rate": 8.725000000000002e-06, "loss": 2.177, "step": 2604 }, { "epoch": 3.8555678059536937, "grad_norm": 2.724936008453369, "learning_rate": 8.714705882352942e-06, "loss": 2.2365, "step": 2625 }, { "epoch": 3.886438809261301, "grad_norm": 2.303502082824707, "learning_rate": 8.704411764705884e-06, "loss": 2.2006, "step": 2646 }, { "epoch": 3.9173098125689085, "grad_norm": 2.214660406112671, "learning_rate": 8.694117647058824e-06, "loss": 2.2227, "step": 2667 }, { "epoch": 3.948180815876516, "grad_norm": 3.070887804031372, "learning_rate": 8.683823529411766e-06, "loss": 2.1777, "step": 2688 }, { "epoch": 3.9790518191841233, "grad_norm": 2.8172695636749268, "learning_rate": 8.674019607843137e-06, "loss": 2.3141, "step": 2709 }, { "epoch": 4.008820286659317, "grad_norm": 2.524413585662842, "learning_rate": 8.66372549019608e-06, "loss": 2.1247, "step": 2730 }, { "epoch": 4.039691289966924, "grad_norm": 2.7337212562561035, "learning_rate": 8.653431372549021e-06, "loss": 2.1418, "step": 2751 }, { "epoch": 4.070562293274532, "grad_norm": 3.4945359230041504, "learning_rate": 8.643137254901961e-06, "loss": 2.1551, "step": 2772 }, { "epoch": 4.1014332965821385, "grad_norm": 1.8705073595046997, "learning_rate": 8.632843137254903e-06, "loss": 2.1996, "step": 2793 }, { "epoch": 4.132304299889746, "grad_norm": 2.1908445358276367, "learning_rate": 8.622549019607844e-06, "loss": 2.214, "step": 2814 }, { "epoch": 4.163175303197354, "grad_norm": 2.8475587368011475, "learning_rate": 8.612254901960786e-06, "loss": 2.2809, "step": 2835 }, { "epoch": 4.194046306504961, "grad_norm": 2.8551456928253174, "learning_rate": 8.601960784313726e-06, "loss": 2.2141, "step": 2856 }, { "epoch": 4.224917309812569, "grad_norm": 2.450998544692993, "learning_rate": 8.591666666666668e-06, "loss": 2.3511, "step": 2877 }, { "epoch": 4.255788313120177, "grad_norm": 2.4303412437438965, "learning_rate": 8.581372549019608e-06, "loss": 2.171, "step": 2898 }, { "epoch": 4.286659316427784, "grad_norm": 1.9172242879867554, "learning_rate": 8.57107843137255e-06, "loss": 2.1682, "step": 2919 }, { "epoch": 4.317530319735392, "grad_norm": 2.9315176010131836, "learning_rate": 8.56078431372549e-06, "loss": 2.3503, "step": 2940 }, { "epoch": 4.3484013230429985, "grad_norm": 2.2080533504486084, "learning_rate": 8.550490196078432e-06, "loss": 2.1846, "step": 2961 }, { "epoch": 4.379272326350606, "grad_norm": 2.6244797706604004, "learning_rate": 8.540196078431374e-06, "loss": 2.236, "step": 2982 }, { "epoch": 4.410143329658214, "grad_norm": 2.147533655166626, "learning_rate": 8.529901960784314e-06, "loss": 2.2606, "step": 3003 }, { "epoch": 4.441014332965821, "grad_norm": 1.7855578660964966, "learning_rate": 8.519607843137256e-06, "loss": 2.2133, "step": 3024 }, { "epoch": 4.471885336273429, "grad_norm": 1.8556910753250122, "learning_rate": 8.509313725490196e-06, "loss": 2.0927, "step": 3045 }, { "epoch": 4.502756339581037, "grad_norm": 2.6865439414978027, "learning_rate": 8.499019607843138e-06, "loss": 2.1611, "step": 3066 }, { "epoch": 4.533627342888644, "grad_norm": 2.3935399055480957, "learning_rate": 8.488725490196078e-06, "loss": 2.2558, "step": 3087 }, { "epoch": 4.564498346196252, "grad_norm": 2.4272029399871826, "learning_rate": 8.47843137254902e-06, "loss": 2.1133, "step": 3108 }, { "epoch": 4.595369349503859, "grad_norm": 2.0114336013793945, "learning_rate": 8.46813725490196e-06, "loss": 2.2369, "step": 3129 }, { "epoch": 4.626240352811466, "grad_norm": 2.586351156234741, "learning_rate": 8.457843137254903e-06, "loss": 2.4161, "step": 3150 }, { "epoch": 4.657111356119074, "grad_norm": 2.777594804763794, "learning_rate": 8.447549019607843e-06, "loss": 2.2647, "step": 3171 }, { "epoch": 4.687982359426681, "grad_norm": 2.0153422355651855, "learning_rate": 8.437254901960785e-06, "loss": 2.2976, "step": 3192 }, { "epoch": 4.718853362734289, "grad_norm": 2.486306667327881, "learning_rate": 8.426960784313727e-06, "loss": 2.1799, "step": 3213 }, { "epoch": 4.749724366041896, "grad_norm": 3.186861276626587, "learning_rate": 8.416666666666667e-06, "loss": 2.2084, "step": 3234 }, { "epoch": 4.780595369349504, "grad_norm": 2.2380778789520264, "learning_rate": 8.406372549019609e-06, "loss": 2.0764, "step": 3255 }, { "epoch": 4.811466372657112, "grad_norm": 3.3927717208862305, "learning_rate": 8.396078431372549e-06, "loss": 2.1141, "step": 3276 }, { "epoch": 4.842337375964719, "grad_norm": 2.017042875289917, "learning_rate": 8.385784313725491e-06, "loss": 2.1955, "step": 3297 }, { "epoch": 4.873208379272326, "grad_norm": 2.798187494277954, "learning_rate": 8.375490196078431e-06, "loss": 2.245, "step": 3318 }, { "epoch": 4.904079382579933, "grad_norm": 2.209864377975464, "learning_rate": 8.365196078431373e-06, "loss": 2.1835, "step": 3339 }, { "epoch": 4.934950385887541, "grad_norm": 2.4622409343719482, "learning_rate": 8.354901960784313e-06, "loss": 2.2274, "step": 3360 }, { "epoch": 4.965821389195149, "grad_norm": 1.7298108339309692, "learning_rate": 8.344607843137255e-06, "loss": 2.1089, "step": 3381 }, { "epoch": 4.996692392502756, "grad_norm": 3.1159820556640625, "learning_rate": 8.334313725490195e-06, "loss": 2.1574, "step": 3402 }, { "epoch": 5.0264608599779494, "grad_norm": 1.9754016399383545, "learning_rate": 8.324019607843137e-06, "loss": 2.0488, "step": 3423 }, { "epoch": 5.057331863285556, "grad_norm": 2.3963258266448975, "learning_rate": 8.31372549019608e-06, "loss": 2.2752, "step": 3444 }, { "epoch": 5.088202866593164, "grad_norm": 3.8797078132629395, "learning_rate": 8.30343137254902e-06, "loss": 2.1056, "step": 3465 }, { "epoch": 5.119073869900772, "grad_norm": 2.89670467376709, "learning_rate": 8.293137254901961e-06, "loss": 2.1916, "step": 3486 }, { "epoch": 5.149944873208379, "grad_norm": 3.1157844066619873, "learning_rate": 8.282843137254902e-06, "loss": 2.1939, "step": 3507 }, { "epoch": 5.180815876515987, "grad_norm": 3.1985909938812256, "learning_rate": 8.272549019607844e-06, "loss": 2.1605, "step": 3528 }, { "epoch": 5.211686879823595, "grad_norm": 2.626016855239868, "learning_rate": 8.262254901960784e-06, "loss": 2.185, "step": 3549 }, { "epoch": 5.242557883131202, "grad_norm": 2.8655667304992676, "learning_rate": 8.251960784313726e-06, "loss": 2.1658, "step": 3570 }, { "epoch": 5.2734288864388095, "grad_norm": 2.5003795623779297, "learning_rate": 8.241666666666668e-06, "loss": 2.2202, "step": 3591 }, { "epoch": 5.304299889746416, "grad_norm": 3.0103225708007812, "learning_rate": 8.231372549019608e-06, "loss": 2.251, "step": 3612 }, { "epoch": 5.335170893054024, "grad_norm": 2.7656571865081787, "learning_rate": 8.22107843137255e-06, "loss": 2.1386, "step": 3633 }, { "epoch": 5.366041896361632, "grad_norm": 2.1533167362213135, "learning_rate": 8.21078431372549e-06, "loss": 2.0554, "step": 3654 }, { "epoch": 5.396912899669239, "grad_norm": 3.6254355907440186, "learning_rate": 8.200490196078432e-06, "loss": 2.1868, "step": 3675 }, { "epoch": 5.427783902976847, "grad_norm": 3.0589587688446045, "learning_rate": 8.190196078431372e-06, "loss": 2.2171, "step": 3696 }, { "epoch": 5.458654906284455, "grad_norm": 3.7360947132110596, "learning_rate": 8.179901960784314e-06, "loss": 2.191, "step": 3717 }, { "epoch": 5.489525909592062, "grad_norm": 2.9356765747070312, "learning_rate": 8.169607843137256e-06, "loss": 2.0895, "step": 3738 }, { "epoch": 5.5203969128996695, "grad_norm": 2.677701950073242, "learning_rate": 8.159313725490196e-06, "loss": 2.1751, "step": 3759 }, { "epoch": 5.551267916207276, "grad_norm": 2.194253444671631, "learning_rate": 8.149019607843138e-06, "loss": 2.1054, "step": 3780 }, { "epoch": 5.582138919514884, "grad_norm": 2.6590781211853027, "learning_rate": 8.138725490196078e-06, "loss": 2.1846, "step": 3801 }, { "epoch": 5.613009922822492, "grad_norm": 2.1141388416290283, "learning_rate": 8.12843137254902e-06, "loss": 2.1489, "step": 3822 }, { "epoch": 5.643880926130099, "grad_norm": 3.1729912757873535, "learning_rate": 8.11813725490196e-06, "loss": 2.098, "step": 3843 }, { "epoch": 5.674751929437707, "grad_norm": 2.9490084648132324, "learning_rate": 8.107843137254902e-06, "loss": 2.1878, "step": 3864 }, { "epoch": 5.705622932745314, "grad_norm": 1.9357086420059204, "learning_rate": 8.097549019607844e-06, "loss": 2.219, "step": 3885 }, { "epoch": 5.736493936052922, "grad_norm": 2.848104476928711, "learning_rate": 8.087254901960785e-06, "loss": 2.2125, "step": 3906 }, { "epoch": 5.7673649393605295, "grad_norm": 2.5720698833465576, "learning_rate": 8.076960784313727e-06, "loss": 2.1322, "step": 3927 }, { "epoch": 5.7982359426681365, "grad_norm": 2.406402349472046, "learning_rate": 8.066666666666667e-06, "loss": 2.2639, "step": 3948 }, { "epoch": 5.829106945975744, "grad_norm": 2.501682758331299, "learning_rate": 8.056372549019609e-06, "loss": 2.1916, "step": 3969 }, { "epoch": 5.859977949283351, "grad_norm": 2.7590603828430176, "learning_rate": 8.046078431372549e-06, "loss": 2.2604, "step": 3990 }, { "epoch": 5.890848952590959, "grad_norm": 3.130591630935669, "learning_rate": 8.035784313725491e-06, "loss": 2.1121, "step": 4011 }, { "epoch": 5.921719955898567, "grad_norm": 2.693161725997925, "learning_rate": 8.025490196078431e-06, "loss": 2.2319, "step": 4032 }, { "epoch": 5.952590959206174, "grad_norm": 3.0124995708465576, "learning_rate": 8.015196078431373e-06, "loss": 2.1797, "step": 4053 }, { "epoch": 5.983461962513782, "grad_norm": 3.3224260807037354, "learning_rate": 8.004901960784315e-06, "loss": 2.2071, "step": 4074 }, { "epoch": 6.013230429988974, "grad_norm": 2.305988073348999, "learning_rate": 7.994607843137255e-06, "loss": 2.0385, "step": 4095 }, { "epoch": 6.044101433296582, "grad_norm": 2.905613660812378, "learning_rate": 7.984313725490197e-06, "loss": 2.194, "step": 4116 }, { "epoch": 6.07497243660419, "grad_norm": 2.968784809112549, "learning_rate": 7.974019607843137e-06, "loss": 2.2794, "step": 4137 }, { "epoch": 6.105843439911797, "grad_norm": 2.8147284984588623, "learning_rate": 7.96372549019608e-06, "loss": 2.1266, "step": 4158 }, { "epoch": 6.136714443219405, "grad_norm": 2.993699073791504, "learning_rate": 7.95343137254902e-06, "loss": 2.1956, "step": 4179 }, { "epoch": 6.167585446527012, "grad_norm": 1.8021845817565918, "learning_rate": 7.943137254901961e-06, "loss": 2.1361, "step": 4200 }, { "epoch": 6.1984564498346195, "grad_norm": 2.0478949546813965, "learning_rate": 7.932843137254903e-06, "loss": 2.1633, "step": 4221 }, { "epoch": 6.229327453142227, "grad_norm": 2.401198387145996, "learning_rate": 7.922549019607844e-06, "loss": 2.2218, "step": 4242 }, { "epoch": 6.260198456449834, "grad_norm": 3.1930980682373047, "learning_rate": 7.912254901960785e-06, "loss": 2.2991, "step": 4263 }, { "epoch": 6.291069459757442, "grad_norm": 2.734412908554077, "learning_rate": 7.901960784313726e-06, "loss": 2.0321, "step": 4284 }, { "epoch": 6.32194046306505, "grad_norm": 1.8950814008712769, "learning_rate": 7.891666666666668e-06, "loss": 2.209, "step": 4305 }, { "epoch": 6.352811466372657, "grad_norm": 2.232196807861328, "learning_rate": 7.881372549019608e-06, "loss": 2.1485, "step": 4326 }, { "epoch": 6.383682469680265, "grad_norm": 2.378868579864502, "learning_rate": 7.87107843137255e-06, "loss": 2.1191, "step": 4347 }, { "epoch": 6.414553472987872, "grad_norm": 2.716021776199341, "learning_rate": 7.860784313725492e-06, "loss": 2.1856, "step": 4368 }, { "epoch": 6.4454244762954795, "grad_norm": 2.920015335083008, "learning_rate": 7.850490196078432e-06, "loss": 2.1965, "step": 4389 }, { "epoch": 6.476295479603087, "grad_norm": 2.696876287460327, "learning_rate": 7.840196078431374e-06, "loss": 2.1233, "step": 4410 }, { "epoch": 6.507166482910694, "grad_norm": 2.693169593811035, "learning_rate": 7.829901960784314e-06, "loss": 2.1467, "step": 4431 }, { "epoch": 6.538037486218302, "grad_norm": 2.373929023742676, "learning_rate": 7.819607843137256e-06, "loss": 2.2374, "step": 4452 }, { "epoch": 6.56890848952591, "grad_norm": 2.026304244995117, "learning_rate": 7.809313725490196e-06, "loss": 2.2376, "step": 4473 }, { "epoch": 6.599779492833517, "grad_norm": 3.8536412715911865, "learning_rate": 7.799019607843138e-06, "loss": 2.2277, "step": 4494 }, { "epoch": 6.630650496141125, "grad_norm": 3.2263355255126953, "learning_rate": 7.78872549019608e-06, "loss": 2.2015, "step": 4515 }, { "epoch": 6.661521499448732, "grad_norm": 2.5766403675079346, "learning_rate": 7.77843137254902e-06, "loss": 2.1721, "step": 4536 }, { "epoch": 6.69239250275634, "grad_norm": 2.6397998332977295, "learning_rate": 7.768137254901962e-06, "loss": 2.1238, "step": 4557 }, { "epoch": 6.723263506063947, "grad_norm": 2.5694289207458496, "learning_rate": 7.757843137254902e-06, "loss": 2.2221, "step": 4578 }, { "epoch": 6.754134509371554, "grad_norm": 2.568444013595581, "learning_rate": 7.747549019607844e-06, "loss": 2.1968, "step": 4599 }, { "epoch": 6.785005512679162, "grad_norm": 3.3770861625671387, "learning_rate": 7.737254901960785e-06, "loss": 2.2389, "step": 4620 }, { "epoch": 6.815876515986769, "grad_norm": 2.6761832237243652, "learning_rate": 7.726960784313727e-06, "loss": 2.1979, "step": 4641 }, { "epoch": 6.846747519294377, "grad_norm": 3.2881479263305664, "learning_rate": 7.716666666666667e-06, "loss": 2.1772, "step": 4662 }, { "epoch": 6.877618522601985, "grad_norm": 2.812613010406494, "learning_rate": 7.706372549019609e-06, "loss": 2.2137, "step": 4683 }, { "epoch": 6.908489525909592, "grad_norm": 2.3768069744110107, "learning_rate": 7.69607843137255e-06, "loss": 2.165, "step": 4704 }, { "epoch": 6.9393605292172, "grad_norm": 2.822144031524658, "learning_rate": 7.68578431372549e-06, "loss": 2.173, "step": 4725 }, { "epoch": 6.970231532524807, "grad_norm": 2.994647979736328, "learning_rate": 7.675490196078433e-06, "loss": 2.2263, "step": 4746 }, { "epoch": 7.0, "grad_norm": 0.8841678500175476, "learning_rate": 7.665196078431373e-06, "loss": 1.9977, "step": 4767 }, { "epoch": 7.030871003307608, "grad_norm": 2.560551166534424, "learning_rate": 7.654901960784315e-06, "loss": 2.2829, "step": 4788 }, { "epoch": 7.061742006615215, "grad_norm": 2.5852670669555664, "learning_rate": 7.644607843137255e-06, "loss": 2.1318, "step": 4809 }, { "epoch": 7.092613009922823, "grad_norm": 3.1351985931396484, "learning_rate": 7.634313725490197e-06, "loss": 2.0308, "step": 4830 }, { "epoch": 7.12348401323043, "grad_norm": 2.3059654235839844, "learning_rate": 7.624019607843138e-06, "loss": 2.0593, "step": 4851 }, { "epoch": 7.154355016538037, "grad_norm": 2.390005111694336, "learning_rate": 7.613725490196079e-06, "loss": 2.2439, "step": 4872 }, { "epoch": 7.185226019845645, "grad_norm": 2.5935399532318115, "learning_rate": 7.603921568627451e-06, "loss": 2.2061, "step": 4893 }, { "epoch": 7.216097023153252, "grad_norm": 2.6763274669647217, "learning_rate": 7.593627450980393e-06, "loss": 2.2092, "step": 4914 }, { "epoch": 7.24696802646086, "grad_norm": 2.63897967338562, "learning_rate": 7.583333333333333e-06, "loss": 2.1148, "step": 4935 }, { "epoch": 7.277839029768468, "grad_norm": 2.247389078140259, "learning_rate": 7.573039215686275e-06, "loss": 2.1387, "step": 4956 }, { "epoch": 7.308710033076075, "grad_norm": 3.124016046524048, "learning_rate": 7.5627450980392154e-06, "loss": 2.1628, "step": 4977 }, { "epoch": 7.339581036383683, "grad_norm": 3.1294453144073486, "learning_rate": 7.552450980392157e-06, "loss": 2.0332, "step": 4998 }, { "epoch": 7.37045203969129, "grad_norm": 3.3883707523345947, "learning_rate": 7.542156862745099e-06, "loss": 2.1588, "step": 5019 }, { "epoch": 7.401323042998897, "grad_norm": 2.930655002593994, "learning_rate": 7.5318627450980395e-06, "loss": 2.1299, "step": 5040 }, { "epoch": 7.432194046306505, "grad_norm": 2.963660478591919, "learning_rate": 7.5215686274509814e-06, "loss": 2.0596, "step": 5061 }, { "epoch": 7.463065049614112, "grad_norm": 2.7922561168670654, "learning_rate": 7.511274509803922e-06, "loss": 2.0655, "step": 5082 }, { "epoch": 7.49393605292172, "grad_norm": 2.352179765701294, "learning_rate": 7.500980392156864e-06, "loss": 2.1341, "step": 5103 }, { "epoch": 7.524807056229328, "grad_norm": 3.8016703128814697, "learning_rate": 7.490686274509804e-06, "loss": 2.1156, "step": 5124 }, { "epoch": 7.555678059536935, "grad_norm": 3.2462334632873535, "learning_rate": 7.480392156862746e-06, "loss": 2.25, "step": 5145 }, { "epoch": 7.586549062844543, "grad_norm": 2.0880022048950195, "learning_rate": 7.470098039215688e-06, "loss": 2.2028, "step": 5166 }, { "epoch": 7.61742006615215, "grad_norm": 2.5556790828704834, "learning_rate": 7.459803921568628e-06, "loss": 2.0374, "step": 5187 }, { "epoch": 7.6482910694597575, "grad_norm": 3.2562601566314697, "learning_rate": 7.44950980392157e-06, "loss": 2.131, "step": 5208 }, { "epoch": 7.679162072767365, "grad_norm": 2.942624807357788, "learning_rate": 7.43921568627451e-06, "loss": 2.1297, "step": 5229 }, { "epoch": 7.710033076074972, "grad_norm": 2.845376968383789, "learning_rate": 7.428921568627452e-06, "loss": 2.1408, "step": 5250 }, { "epoch": 7.74090407938258, "grad_norm": 2.4782392978668213, "learning_rate": 7.418627450980392e-06, "loss": 2.1182, "step": 5271 }, { "epoch": 7.771775082690187, "grad_norm": 4.134006023406982, "learning_rate": 7.408333333333334e-06, "loss": 2.1222, "step": 5292 }, { "epoch": 7.802646085997795, "grad_norm": 2.537743330001831, "learning_rate": 7.398039215686274e-06, "loss": 2.0851, "step": 5313 }, { "epoch": 7.833517089305403, "grad_norm": 3.0612194538116455, "learning_rate": 7.387745098039216e-06, "loss": 2.2203, "step": 5334 }, { "epoch": 7.86438809261301, "grad_norm": 2.5010664463043213, "learning_rate": 7.377450980392158e-06, "loss": 2.0808, "step": 5355 }, { "epoch": 7.8952590959206175, "grad_norm": 2.4548556804656982, "learning_rate": 7.367156862745098e-06, "loss": 2.1164, "step": 5376 }, { "epoch": 7.926130099228224, "grad_norm": 3.158270835876465, "learning_rate": 7.35686274509804e-06, "loss": 2.2378, "step": 5397 }, { "epoch": 7.957001102535832, "grad_norm": 3.6559364795684814, "learning_rate": 7.3465686274509806e-06, "loss": 2.1605, "step": 5418 }, { "epoch": 7.98787210584344, "grad_norm": 3.131807804107666, "learning_rate": 7.3362745098039225e-06, "loss": 2.1603, "step": 5439 }, { "epoch": 8.017640573318634, "grad_norm": 2.334476947784424, "learning_rate": 7.325980392156863e-06, "loss": 2.0486, "step": 5460 }, { "epoch": 8.04851157662624, "grad_norm": 3.2329330444335938, "learning_rate": 7.315686274509805e-06, "loss": 2.1824, "step": 5481 }, { "epoch": 8.079382579933847, "grad_norm": 3.517030715942383, "learning_rate": 7.3053921568627466e-06, "loss": 2.1195, "step": 5502 }, { "epoch": 8.110253583241455, "grad_norm": 2.7786850929260254, "learning_rate": 7.295098039215687e-06, "loss": 2.1134, "step": 5523 }, { "epoch": 8.141124586549063, "grad_norm": 2.952907085418701, "learning_rate": 7.284803921568629e-06, "loss": 2.1647, "step": 5544 }, { "epoch": 8.171995589856671, "grad_norm": 3.045341968536377, "learning_rate": 7.274509803921569e-06, "loss": 2.0899, "step": 5565 }, { "epoch": 8.202866593164277, "grad_norm": 2.412834882736206, "learning_rate": 7.264215686274511e-06, "loss": 2.1993, "step": 5586 }, { "epoch": 8.233737596471885, "grad_norm": 2.3728556632995605, "learning_rate": 7.253921568627451e-06, "loss": 2.1946, "step": 5607 }, { "epoch": 8.264608599779493, "grad_norm": 3.4004909992218018, "learning_rate": 7.243627450980393e-06, "loss": 2.1066, "step": 5628 }, { "epoch": 8.2954796030871, "grad_norm": 3.052344560623169, "learning_rate": 7.233333333333334e-06, "loss": 2.1316, "step": 5649 }, { "epoch": 8.326350606394708, "grad_norm": 3.6270687580108643, "learning_rate": 7.223039215686275e-06, "loss": 2.1767, "step": 5670 }, { "epoch": 8.357221609702314, "grad_norm": 2.74826717376709, "learning_rate": 7.212745098039216e-06, "loss": 2.0877, "step": 5691 }, { "epoch": 8.388092613009922, "grad_norm": 2.5722084045410156, "learning_rate": 7.202450980392157e-06, "loss": 1.9884, "step": 5712 }, { "epoch": 8.41896361631753, "grad_norm": 3.1915769577026367, "learning_rate": 7.192156862745099e-06, "loss": 2.0973, "step": 5733 }, { "epoch": 8.449834619625138, "grad_norm": 3.181138277053833, "learning_rate": 7.1818627450980395e-06, "loss": 2.1478, "step": 5754 }, { "epoch": 8.480705622932746, "grad_norm": 2.320770502090454, "learning_rate": 7.171568627450981e-06, "loss": 2.2276, "step": 5775 }, { "epoch": 8.511576626240354, "grad_norm": 3.598928928375244, "learning_rate": 7.1612745098039225e-06, "loss": 2.2547, "step": 5796 }, { "epoch": 8.54244762954796, "grad_norm": 2.7235798835754395, "learning_rate": 7.1509803921568636e-06, "loss": 2.2114, "step": 5817 }, { "epoch": 8.573318632855568, "grad_norm": 3.4046123027801514, "learning_rate": 7.140686274509805e-06, "loss": 2.0938, "step": 5838 }, { "epoch": 8.604189636163175, "grad_norm": 3.1639325618743896, "learning_rate": 7.130392156862746e-06, "loss": 2.1363, "step": 5859 }, { "epoch": 8.635060639470783, "grad_norm": 3.2247660160064697, "learning_rate": 7.120098039215687e-06, "loss": 2.2053, "step": 5880 }, { "epoch": 8.665931642778391, "grad_norm": 3.5176591873168945, "learning_rate": 7.109803921568628e-06, "loss": 2.1747, "step": 5901 }, { "epoch": 8.696802646085997, "grad_norm": 2.0369083881378174, "learning_rate": 7.099509803921569e-06, "loss": 2.0933, "step": 5922 }, { "epoch": 8.727673649393605, "grad_norm": 4.293984413146973, "learning_rate": 7.08921568627451e-06, "loss": 2.2752, "step": 5943 }, { "epoch": 8.758544652701213, "grad_norm": 3.280562162399292, "learning_rate": 7.078921568627451e-06, "loss": 2.2512, "step": 5964 }, { "epoch": 8.78941565600882, "grad_norm": 3.1418211460113525, "learning_rate": 7.068627450980393e-06, "loss": 2.0804, "step": 5985 }, { "epoch": 8.820286659316428, "grad_norm": 3.2922585010528564, "learning_rate": 7.058333333333334e-06, "loss": 2.1726, "step": 6006 }, { "epoch": 8.851157662624034, "grad_norm": 3.8439033031463623, "learning_rate": 7.048039215686275e-06, "loss": 2.0561, "step": 6027 }, { "epoch": 8.882028665931642, "grad_norm": 2.2356507778167725, "learning_rate": 7.037745098039216e-06, "loss": 2.0462, "step": 6048 }, { "epoch": 8.91289966923925, "grad_norm": 3.2431116104125977, "learning_rate": 7.027450980392157e-06, "loss": 2.1178, "step": 6069 }, { "epoch": 8.943770672546858, "grad_norm": 3.199249267578125, "learning_rate": 7.017156862745098e-06, "loss": 2.0562, "step": 6090 }, { "epoch": 8.974641675854466, "grad_norm": 2.0143613815307617, "learning_rate": 7.0068627450980395e-06, "loss": 2.2118, "step": 6111 }, { "epoch": 9.004410143329658, "grad_norm": 2.6825101375579834, "learning_rate": 6.996568627450981e-06, "loss": 2.0181, "step": 6132 }, { "epoch": 9.035281146637265, "grad_norm": 3.3078560829162598, "learning_rate": 6.986274509803922e-06, "loss": 2.1425, "step": 6153 }, { "epoch": 9.066152149944873, "grad_norm": 3.204214334487915, "learning_rate": 6.9759803921568635e-06, "loss": 2.0554, "step": 6174 }, { "epoch": 9.097023153252481, "grad_norm": 2.2293057441711426, "learning_rate": 6.965686274509804e-06, "loss": 2.0393, "step": 6195 }, { "epoch": 9.127894156560089, "grad_norm": 2.959235429763794, "learning_rate": 6.955392156862746e-06, "loss": 2.0141, "step": 6216 }, { "epoch": 9.158765159867695, "grad_norm": 3.337249755859375, "learning_rate": 6.945098039215687e-06, "loss": 2.1386, "step": 6237 }, { "epoch": 9.189636163175303, "grad_norm": 3.5160326957702637, "learning_rate": 6.934803921568628e-06, "loss": 2.0975, "step": 6258 }, { "epoch": 9.22050716648291, "grad_norm": 3.6730356216430664, "learning_rate": 6.92450980392157e-06, "loss": 2.0231, "step": 6279 }, { "epoch": 9.251378169790518, "grad_norm": 2.872706890106201, "learning_rate": 6.91421568627451e-06, "loss": 2.2048, "step": 6300 }, { "epoch": 9.282249173098126, "grad_norm": 3.1090550422668457, "learning_rate": 6.903921568627452e-06, "loss": 2.1596, "step": 6321 }, { "epoch": 9.313120176405732, "grad_norm": 2.391374349594116, "learning_rate": 6.893627450980392e-06, "loss": 2.0706, "step": 6342 }, { "epoch": 9.34399117971334, "grad_norm": 3.1433513164520264, "learning_rate": 6.883333333333334e-06, "loss": 2.2176, "step": 6363 }, { "epoch": 9.374862183020948, "grad_norm": 2.974668264389038, "learning_rate": 6.873039215686274e-06, "loss": 2.1334, "step": 6384 }, { "epoch": 9.405733186328556, "grad_norm": 2.1751785278320312, "learning_rate": 6.862745098039216e-06, "loss": 2.1712, "step": 6405 }, { "epoch": 9.436604189636164, "grad_norm": 3.2119059562683105, "learning_rate": 6.852450980392158e-06, "loss": 2.0204, "step": 6426 }, { "epoch": 9.467475192943771, "grad_norm": 2.9304966926574707, "learning_rate": 6.842156862745098e-06, "loss": 2.0646, "step": 6447 }, { "epoch": 9.498346196251378, "grad_norm": 3.7001969814300537, "learning_rate": 6.83186274509804e-06, "loss": 2.2128, "step": 6468 }, { "epoch": 9.529217199558985, "grad_norm": 2.783215284347534, "learning_rate": 6.8215686274509805e-06, "loss": 2.1408, "step": 6489 }, { "epoch": 9.560088202866593, "grad_norm": 2.5412495136260986, "learning_rate": 6.8112745098039224e-06, "loss": 2.109, "step": 6510 }, { "epoch": 9.590959206174201, "grad_norm": 2.7052578926086426, "learning_rate": 6.800980392156863e-06, "loss": 2.0865, "step": 6531 }, { "epoch": 9.621830209481809, "grad_norm": 3.288067579269409, "learning_rate": 6.790686274509805e-06, "loss": 2.0056, "step": 6552 }, { "epoch": 9.652701212789415, "grad_norm": 3.3594720363616943, "learning_rate": 6.780392156862745e-06, "loss": 2.1256, "step": 6573 }, { "epoch": 9.683572216097023, "grad_norm": 4.163586616516113, "learning_rate": 6.770098039215687e-06, "loss": 2.1335, "step": 6594 }, { "epoch": 9.71444321940463, "grad_norm": 2.5263030529022217, "learning_rate": 6.759803921568629e-06, "loss": 2.0532, "step": 6615 }, { "epoch": 9.745314222712238, "grad_norm": 2.9361979961395264, "learning_rate": 6.749509803921569e-06, "loss": 2.114, "step": 6636 }, { "epoch": 9.776185226019846, "grad_norm": 2.5235047340393066, "learning_rate": 6.739215686274511e-06, "loss": 2.1004, "step": 6657 }, { "epoch": 9.807056229327452, "grad_norm": 3.3822638988494873, "learning_rate": 6.728921568627451e-06, "loss": 2.1218, "step": 6678 }, { "epoch": 9.83792723263506, "grad_norm": 2.4693849086761475, "learning_rate": 6.718627450980393e-06, "loss": 2.0892, "step": 6699 }, { "epoch": 9.868798235942668, "grad_norm": 2.9233691692352295, "learning_rate": 6.708333333333333e-06, "loss": 2.1292, "step": 6720 }, { "epoch": 9.899669239250276, "grad_norm": 3.697094678878784, "learning_rate": 6.698039215686275e-06, "loss": 2.21, "step": 6741 }, { "epoch": 9.930540242557884, "grad_norm": 3.1898422241210938, "learning_rate": 6.687745098039217e-06, "loss": 2.1406, "step": 6762 }, { "epoch": 9.961411245865492, "grad_norm": 2.3322906494140625, "learning_rate": 6.677450980392157e-06, "loss": 2.0774, "step": 6783 }, { "epoch": 9.992282249173098, "grad_norm": 3.2816247940063477, "learning_rate": 6.667156862745099e-06, "loss": 2.2821, "step": 6804 }, { "epoch": 10.022050716648291, "grad_norm": 3.6437644958496094, "learning_rate": 6.656862745098039e-06, "loss": 1.9058, "step": 6825 }, { "epoch": 10.052921719955899, "grad_norm": 4.289977550506592, "learning_rate": 6.646568627450981e-06, "loss": 2.155, "step": 6846 }, { "epoch": 10.083792723263507, "grad_norm": 1.590816855430603, "learning_rate": 6.6362745098039216e-06, "loss": 1.956, "step": 6867 }, { "epoch": 10.114663726571113, "grad_norm": 3.7391345500946045, "learning_rate": 6.6259803921568635e-06, "loss": 2.0601, "step": 6888 }, { "epoch": 10.14553472987872, "grad_norm": 3.7508692741394043, "learning_rate": 6.6156862745098046e-06, "loss": 2.1054, "step": 6909 }, { "epoch": 10.176405733186328, "grad_norm": 2.5421996116638184, "learning_rate": 6.605392156862746e-06, "loss": 2.2101, "step": 6930 }, { "epoch": 10.207276736493936, "grad_norm": 2.4397692680358887, "learning_rate": 6.595098039215687e-06, "loss": 2.0759, "step": 6951 }, { "epoch": 10.238147739801544, "grad_norm": 4.160530090332031, "learning_rate": 6.584803921568628e-06, "loss": 2.0361, "step": 6972 }, { "epoch": 10.26901874310915, "grad_norm": 3.957812547683716, "learning_rate": 6.57450980392157e-06, "loss": 2.0717, "step": 6993 }, { "epoch": 10.299889746416758, "grad_norm": 3.6107029914855957, "learning_rate": 6.56421568627451e-06, "loss": 2.143, "step": 7014 }, { "epoch": 10.330760749724366, "grad_norm": 3.4517884254455566, "learning_rate": 6.553921568627452e-06, "loss": 2.1439, "step": 7035 }, { "epoch": 10.361631753031974, "grad_norm": 2.700695753097534, "learning_rate": 6.543627450980393e-06, "loss": 2.0712, "step": 7056 }, { "epoch": 10.392502756339582, "grad_norm": 2.3187098503112793, "learning_rate": 6.533333333333334e-06, "loss": 2.1131, "step": 7077 }, { "epoch": 10.42337375964719, "grad_norm": 3.6724188327789307, "learning_rate": 6.523529411764706e-06, "loss": 2.0648, "step": 7098 }, { "epoch": 10.454244762954795, "grad_norm": 3.3507304191589355, "learning_rate": 6.513235294117648e-06, "loss": 2.064, "step": 7119 }, { "epoch": 10.485115766262403, "grad_norm": 2.6285359859466553, "learning_rate": 6.502941176470589e-06, "loss": 2.1908, "step": 7140 }, { "epoch": 10.515986769570011, "grad_norm": 2.9801242351531982, "learning_rate": 6.49264705882353e-06, "loss": 2.1629, "step": 7161 }, { "epoch": 10.546857772877619, "grad_norm": 3.342548370361328, "learning_rate": 6.482352941176471e-06, "loss": 2.1269, "step": 7182 }, { "epoch": 10.577728776185227, "grad_norm": 3.217926025390625, "learning_rate": 6.472058823529412e-06, "loss": 2.1063, "step": 7203 }, { "epoch": 10.608599779492833, "grad_norm": 4.631300449371338, "learning_rate": 6.461764705882353e-06, "loss": 2.09, "step": 7224 }, { "epoch": 10.63947078280044, "grad_norm": 3.2653615474700928, "learning_rate": 6.451470588235294e-06, "loss": 2.1174, "step": 7245 }, { "epoch": 10.670341786108049, "grad_norm": 3.6463816165924072, "learning_rate": 6.441176470588236e-06, "loss": 2.0675, "step": 7266 }, { "epoch": 10.701212789415656, "grad_norm": 3.0588767528533936, "learning_rate": 6.4308823529411765e-06, "loss": 2.1324, "step": 7287 }, { "epoch": 10.732083792723264, "grad_norm": 2.32501220703125, "learning_rate": 6.420588235294118e-06, "loss": 2.0737, "step": 7308 }, { "epoch": 10.76295479603087, "grad_norm": 3.140713691711426, "learning_rate": 6.410294117647059e-06, "loss": 2.2039, "step": 7329 }, { "epoch": 10.793825799338478, "grad_norm": 2.824019193649292, "learning_rate": 6.4000000000000006e-06, "loss": 2.0973, "step": 7350 }, { "epoch": 10.824696802646086, "grad_norm": 3.9583792686462402, "learning_rate": 6.389705882352941e-06, "loss": 2.0894, "step": 7371 }, { "epoch": 10.855567805953694, "grad_norm": 3.186405897140503, "learning_rate": 6.379411764705883e-06, "loss": 2.139, "step": 7392 }, { "epoch": 10.886438809261302, "grad_norm": 2.8080101013183594, "learning_rate": 6.369117647058825e-06, "loss": 2.0255, "step": 7413 }, { "epoch": 10.91730981256891, "grad_norm": 2.799795389175415, "learning_rate": 6.358823529411765e-06, "loss": 2.0883, "step": 7434 }, { "epoch": 10.948180815876515, "grad_norm": 3.2571089267730713, "learning_rate": 6.348529411764707e-06, "loss": 2.1766, "step": 7455 }, { "epoch": 10.979051819184123, "grad_norm": 3.5721755027770996, "learning_rate": 6.338235294117647e-06, "loss": 2.1196, "step": 7476 }, { "epoch": 11.008820286659317, "grad_norm": 3.4969334602355957, "learning_rate": 6.327941176470589e-06, "loss": 1.967, "step": 7497 }, { "epoch": 11.039691289966925, "grad_norm": 2.619086980819702, "learning_rate": 6.317647058823529e-06, "loss": 1.9869, "step": 7518 }, { "epoch": 11.07056229327453, "grad_norm": 2.7135913372039795, "learning_rate": 6.307352941176471e-06, "loss": 2.0601, "step": 7539 }, { "epoch": 11.101433296582139, "grad_norm": 2.352649450302124, "learning_rate": 6.297058823529413e-06, "loss": 2.1449, "step": 7560 }, { "epoch": 11.132304299889746, "grad_norm": 3.9685213565826416, "learning_rate": 6.286764705882353e-06, "loss": 1.949, "step": 7581 }, { "epoch": 11.163175303197354, "grad_norm": 2.865730047225952, "learning_rate": 6.276470588235295e-06, "loss": 2.0938, "step": 7602 }, { "epoch": 11.194046306504962, "grad_norm": 2.6979782581329346, "learning_rate": 6.266176470588235e-06, "loss": 2.0482, "step": 7623 }, { "epoch": 11.224917309812568, "grad_norm": 2.2605934143066406, "learning_rate": 6.255882352941177e-06, "loss": 2.1628, "step": 7644 }, { "epoch": 11.255788313120176, "grad_norm": 3.395565986633301, "learning_rate": 6.2455882352941175e-06, "loss": 2.0532, "step": 7665 }, { "epoch": 11.286659316427784, "grad_norm": 2.142057180404663, "learning_rate": 6.2352941176470595e-06, "loss": 2.0898, "step": 7686 }, { "epoch": 11.317530319735392, "grad_norm": 3.811500310897827, "learning_rate": 6.225000000000001e-06, "loss": 2.0959, "step": 7707 }, { "epoch": 11.348401323043, "grad_norm": 4.222660541534424, "learning_rate": 6.214705882352942e-06, "loss": 2.0377, "step": 7728 }, { "epoch": 11.379272326350605, "grad_norm": 2.5871293544769287, "learning_rate": 6.2044117647058835e-06, "loss": 2.1134, "step": 7749 }, { "epoch": 11.410143329658213, "grad_norm": 3.4915037155151367, "learning_rate": 6.194117647058824e-06, "loss": 2.166, "step": 7770 }, { "epoch": 11.441014332965821, "grad_norm": 4.213758945465088, "learning_rate": 6.183823529411766e-06, "loss": 2.3109, "step": 7791 }, { "epoch": 11.471885336273429, "grad_norm": 5.042109489440918, "learning_rate": 6.173529411764706e-06, "loss": 2.0819, "step": 7812 }, { "epoch": 11.502756339581037, "grad_norm": 3.9552133083343506, "learning_rate": 6.163235294117648e-06, "loss": 1.9804, "step": 7833 }, { "epoch": 11.533627342888645, "grad_norm": 4.496159553527832, "learning_rate": 6.152941176470588e-06, "loss": 2.1712, "step": 7854 }, { "epoch": 11.56449834619625, "grad_norm": 2.321765422821045, "learning_rate": 6.14264705882353e-06, "loss": 2.0532, "step": 7875 }, { "epoch": 11.595369349503859, "grad_norm": 3.6235439777374268, "learning_rate": 6.132352941176472e-06, "loss": 2.0967, "step": 7896 }, { "epoch": 11.626240352811466, "grad_norm": 2.5655317306518555, "learning_rate": 6.122058823529412e-06, "loss": 2.0876, "step": 7917 }, { "epoch": 11.657111356119074, "grad_norm": 5.025815486907959, "learning_rate": 6.111764705882354e-06, "loss": 2.0861, "step": 7938 }, { "epoch": 11.687982359426682, "grad_norm": 3.1800806522369385, "learning_rate": 6.101470588235294e-06, "loss": 2.141, "step": 7959 }, { "epoch": 11.718853362734288, "grad_norm": 4.988901138305664, "learning_rate": 6.091176470588236e-06, "loss": 2.0665, "step": 7980 }, { "epoch": 11.749724366041896, "grad_norm": 4.063960552215576, "learning_rate": 6.0808823529411764e-06, "loss": 2.0464, "step": 8001 }, { "epoch": 11.780595369349504, "grad_norm": 3.9658567905426025, "learning_rate": 6.070588235294118e-06, "loss": 2.2195, "step": 8022 }, { "epoch": 11.811466372657112, "grad_norm": 2.566220760345459, "learning_rate": 6.0602941176470594e-06, "loss": 2.1139, "step": 8043 }, { "epoch": 11.84233737596472, "grad_norm": 3.7702178955078125, "learning_rate": 6.0500000000000005e-06, "loss": 2.2112, "step": 8064 }, { "epoch": 11.873208379272326, "grad_norm": 2.5686612129211426, "learning_rate": 6.039705882352942e-06, "loss": 2.1437, "step": 8085 }, { "epoch": 11.904079382579933, "grad_norm": 2.8515195846557617, "learning_rate": 6.029411764705883e-06, "loss": 2.1262, "step": 8106 }, { "epoch": 11.934950385887541, "grad_norm": 3.2659997940063477, "learning_rate": 6.019117647058825e-06, "loss": 2.0747, "step": 8127 }, { "epoch": 11.965821389195149, "grad_norm": 2.8110339641571045, "learning_rate": 6.008823529411765e-06, "loss": 2.0746, "step": 8148 }, { "epoch": 11.996692392502757, "grad_norm": 3.7431135177612305, "learning_rate": 5.998529411764707e-06, "loss": 2.0912, "step": 8169 }, { "epoch": 12.026460859977949, "grad_norm": 4.557000637054443, "learning_rate": 5.988235294117648e-06, "loss": 2.1271, "step": 8190 }, { "epoch": 12.057331863285556, "grad_norm": 2.926790475845337, "learning_rate": 5.977941176470589e-06, "loss": 2.0433, "step": 8211 }, { "epoch": 12.088202866593164, "grad_norm": 2.975024938583374, "learning_rate": 5.96764705882353e-06, "loss": 2.0768, "step": 8232 }, { "epoch": 12.119073869900772, "grad_norm": 3.581433057785034, "learning_rate": 5.957352941176471e-06, "loss": 2.0376, "step": 8253 }, { "epoch": 12.14994487320838, "grad_norm": 5.864632606506348, "learning_rate": 5.947058823529412e-06, "loss": 2.146, "step": 8274 }, { "epoch": 12.180815876515986, "grad_norm": 2.731471300125122, "learning_rate": 5.936764705882353e-06, "loss": 2.0287, "step": 8295 }, { "epoch": 12.211686879823594, "grad_norm": 3.1022236347198486, "learning_rate": 5.926470588235294e-06, "loss": 2.131, "step": 8316 }, { "epoch": 12.242557883131202, "grad_norm": 2.6037495136260986, "learning_rate": 5.916176470588236e-06, "loss": 2.0926, "step": 8337 }, { "epoch": 12.27342888643881, "grad_norm": 2.68418550491333, "learning_rate": 5.9058823529411764e-06, "loss": 2.1563, "step": 8358 }, { "epoch": 12.304299889746417, "grad_norm": 3.6252169609069824, "learning_rate": 5.895588235294118e-06, "loss": 2.1229, "step": 8379 }, { "epoch": 12.335170893054023, "grad_norm": 2.7223503589630127, "learning_rate": 5.8852941176470594e-06, "loss": 2.1574, "step": 8400 }, { "epoch": 12.366041896361631, "grad_norm": 3.2684574127197266, "learning_rate": 5.8750000000000005e-06, "loss": 2.0381, "step": 8421 }, { "epoch": 12.396912899669239, "grad_norm": 2.6794166564941406, "learning_rate": 5.864705882352942e-06, "loss": 2.0885, "step": 8442 }, { "epoch": 12.427783902976847, "grad_norm": 4.735394477844238, "learning_rate": 5.854411764705883e-06, "loss": 2.0897, "step": 8463 }, { "epoch": 12.458654906284455, "grad_norm": 2.711524724960327, "learning_rate": 5.8441176470588246e-06, "loss": 2.1041, "step": 8484 }, { "epoch": 12.489525909592063, "grad_norm": 3.3792128562927246, "learning_rate": 5.833823529411765e-06, "loss": 2.0796, "step": 8505 }, { "epoch": 12.520396912899669, "grad_norm": 3.296825647354126, "learning_rate": 5.823529411764707e-06, "loss": 2.0124, "step": 8526 }, { "epoch": 12.551267916207276, "grad_norm": 2.5053694248199463, "learning_rate": 5.813235294117647e-06, "loss": 1.9827, "step": 8547 }, { "epoch": 12.582138919514884, "grad_norm": 2.5156023502349854, "learning_rate": 5.802941176470589e-06, "loss": 2.1209, "step": 8568 }, { "epoch": 12.613009922822492, "grad_norm": 3.3136746883392334, "learning_rate": 5.792647058823529e-06, "loss": 2.0295, "step": 8589 }, { "epoch": 12.6438809261301, "grad_norm": 4.127097129821777, "learning_rate": 5.782352941176471e-06, "loss": 2.0824, "step": 8610 }, { "epoch": 12.674751929437706, "grad_norm": 2.4340720176696777, "learning_rate": 5.772058823529412e-06, "loss": 2.0821, "step": 8631 }, { "epoch": 12.705622932745314, "grad_norm": 2.7903730869293213, "learning_rate": 5.761764705882353e-06, "loss": 1.9539, "step": 8652 }, { "epoch": 12.736493936052922, "grad_norm": 3.9927330017089844, "learning_rate": 5.751470588235295e-06, "loss": 2.1137, "step": 8673 }, { "epoch": 12.76736493936053, "grad_norm": 4.364806175231934, "learning_rate": 5.741176470588235e-06, "loss": 2.0973, "step": 8694 }, { "epoch": 12.798235942668137, "grad_norm": 2.8431894779205322, "learning_rate": 5.730882352941177e-06, "loss": 2.0947, "step": 8715 }, { "epoch": 12.829106945975743, "grad_norm": 4.133395671844482, "learning_rate": 5.7205882352941175e-06, "loss": 2.0374, "step": 8736 }, { "epoch": 12.859977949283351, "grad_norm": 2.9242682456970215, "learning_rate": 5.710294117647059e-06, "loss": 2.1083, "step": 8757 }, { "epoch": 12.890848952590959, "grad_norm": 4.007552146911621, "learning_rate": 5.7e-06, "loss": 2.1067, "step": 8778 }, { "epoch": 12.921719955898567, "grad_norm": 3.1161675453186035, "learning_rate": 5.6897058823529416e-06, "loss": 2.0318, "step": 8799 }, { "epoch": 12.952590959206175, "grad_norm": 3.6268036365509033, "learning_rate": 5.6794117647058835e-06, "loss": 2.0485, "step": 8820 }, { "epoch": 12.983461962513783, "grad_norm": 4.202246189117432, "learning_rate": 5.669117647058824e-06, "loss": 2.0769, "step": 8841 }, { "epoch": 13.013230429988974, "grad_norm": 2.679133653640747, "learning_rate": 5.658823529411766e-06, "loss": 2.1386, "step": 8862 }, { "epoch": 13.044101433296582, "grad_norm": 2.7983431816101074, "learning_rate": 5.648529411764706e-06, "loss": 1.9623, "step": 8883 }, { "epoch": 13.07497243660419, "grad_norm": 3.559689998626709, "learning_rate": 5.638235294117648e-06, "loss": 2.0616, "step": 8904 }, { "epoch": 13.105843439911798, "grad_norm": 3.0902254581451416, "learning_rate": 5.627941176470588e-06, "loss": 2.0896, "step": 8925 }, { "epoch": 13.136714443219404, "grad_norm": 2.4725775718688965, "learning_rate": 5.61764705882353e-06, "loss": 2.0846, "step": 8946 }, { "epoch": 13.167585446527012, "grad_norm": 3.484569787979126, "learning_rate": 5.607352941176472e-06, "loss": 2.0743, "step": 8967 }, { "epoch": 13.19845644983462, "grad_norm": 3.184817314147949, "learning_rate": 5.597058823529412e-06, "loss": 2.0133, "step": 8988 }, { "epoch": 13.229327453142227, "grad_norm": 4.28677225112915, "learning_rate": 5.586764705882354e-06, "loss": 2.1234, "step": 9009 }, { "epoch": 13.260198456449835, "grad_norm": 3.0605459213256836, "learning_rate": 5.576470588235294e-06, "loss": 2.083, "step": 9030 }, { "epoch": 13.291069459757441, "grad_norm": 3.3841278553009033, "learning_rate": 5.566176470588236e-06, "loss": 2.1344, "step": 9051 }, { "epoch": 13.321940463065049, "grad_norm": 2.9158005714416504, "learning_rate": 5.555882352941176e-06, "loss": 2.0873, "step": 9072 }, { "epoch": 13.352811466372657, "grad_norm": 2.881852626800537, "learning_rate": 5.545588235294118e-06, "loss": 1.9675, "step": 9093 }, { "epoch": 13.383682469680265, "grad_norm": 3.3890089988708496, "learning_rate": 5.53529411764706e-06, "loss": 2.1642, "step": 9114 }, { "epoch": 13.414553472987873, "grad_norm": 3.3301126956939697, "learning_rate": 5.5250000000000005e-06, "loss": 2.0901, "step": 9135 }, { "epoch": 13.44542447629548, "grad_norm": 3.285712957382202, "learning_rate": 5.515196078431373e-06, "loss": 1.9407, "step": 9156 }, { "epoch": 13.476295479603086, "grad_norm": 2.797427177429199, "learning_rate": 5.504901960784314e-06, "loss": 2.1603, "step": 9177 }, { "epoch": 13.507166482910694, "grad_norm": 2.386242151260376, "learning_rate": 5.494607843137255e-06, "loss": 2.0334, "step": 9198 }, { "epoch": 13.538037486218302, "grad_norm": 2.7402749061584473, "learning_rate": 5.4843137254901965e-06, "loss": 2.1055, "step": 9219 }, { "epoch": 13.56890848952591, "grad_norm": 3.5467629432678223, "learning_rate": 5.4740196078431375e-06, "loss": 2.0922, "step": 9240 }, { "epoch": 13.599779492833518, "grad_norm": 4.005125999450684, "learning_rate": 5.463725490196079e-06, "loss": 2.0181, "step": 9261 }, { "epoch": 13.630650496141124, "grad_norm": 2.6311771869659424, "learning_rate": 5.45343137254902e-06, "loss": 2.0758, "step": 9282 }, { "epoch": 13.661521499448732, "grad_norm": 3.188577651977539, "learning_rate": 5.443137254901962e-06, "loss": 2.0835, "step": 9303 }, { "epoch": 13.69239250275634, "grad_norm": 3.115765333175659, "learning_rate": 5.432843137254903e-06, "loss": 2.1527, "step": 9324 }, { "epoch": 13.723263506063947, "grad_norm": 2.4878807067871094, "learning_rate": 5.422549019607844e-06, "loss": 2.0135, "step": 9345 }, { "epoch": 13.754134509371555, "grad_norm": 1.9076848030090332, "learning_rate": 5.412254901960785e-06, "loss": 2.0666, "step": 9366 }, { "epoch": 13.785005512679161, "grad_norm": 3.4870707988739014, "learning_rate": 5.401960784313726e-06, "loss": 2.1315, "step": 9387 }, { "epoch": 13.81587651598677, "grad_norm": 2.885655403137207, "learning_rate": 5.391666666666667e-06, "loss": 1.9702, "step": 9408 }, { "epoch": 13.846747519294377, "grad_norm": 3.8822875022888184, "learning_rate": 5.381372549019608e-06, "loss": 2.0141, "step": 9429 }, { "epoch": 13.877618522601985, "grad_norm": 2.470426559448242, "learning_rate": 5.371078431372549e-06, "loss": 2.1559, "step": 9450 }, { "epoch": 13.908489525909593, "grad_norm": 3.060715436935425, "learning_rate": 5.360784313725491e-06, "loss": 2.1225, "step": 9471 }, { "epoch": 13.9393605292172, "grad_norm": 2.9717185497283936, "learning_rate": 5.350490196078431e-06, "loss": 2.1496, "step": 9492 }, { "epoch": 13.970231532524807, "grad_norm": 3.471766471862793, "learning_rate": 5.340196078431373e-06, "loss": 2.1459, "step": 9513 }, { "epoch": 14.0, "grad_norm": 1.8087966442108154, "learning_rate": 5.329901960784314e-06, "loss": 1.9732, "step": 9534 }, { "epoch": 14.030871003307608, "grad_norm": 2.219148635864258, "learning_rate": 5.319607843137255e-06, "loss": 2.066, "step": 9555 }, { "epoch": 14.061742006615216, "grad_norm": 2.4805564880371094, "learning_rate": 5.3093137254901964e-06, "loss": 2.055, "step": 9576 }, { "epoch": 14.092613009922822, "grad_norm": 3.4554660320281982, "learning_rate": 5.2990196078431375e-06, "loss": 2.0085, "step": 9597 }, { "epoch": 14.12348401323043, "grad_norm": 3.6150786876678467, "learning_rate": 5.2887254901960794e-06, "loss": 2.1289, "step": 9618 }, { "epoch": 14.154355016538037, "grad_norm": 3.061882495880127, "learning_rate": 5.27843137254902e-06, "loss": 2.0114, "step": 9639 }, { "epoch": 14.185226019845645, "grad_norm": 3.2985293865203857, "learning_rate": 5.268137254901962e-06, "loss": 2.1027, "step": 9660 }, { "epoch": 14.216097023153253, "grad_norm": 3.460374116897583, "learning_rate": 5.257843137254902e-06, "loss": 2.1815, "step": 9681 }, { "epoch": 14.24696802646086, "grad_norm": 2.6231353282928467, "learning_rate": 5.247549019607844e-06, "loss": 2.1611, "step": 9702 }, { "epoch": 14.277839029768467, "grad_norm": 2.7419650554656982, "learning_rate": 5.237254901960784e-06, "loss": 2.069, "step": 9723 }, { "epoch": 14.308710033076075, "grad_norm": 2.1007187366485596, "learning_rate": 5.226960784313726e-06, "loss": 1.9542, "step": 9744 }, { "epoch": 14.339581036383683, "grad_norm": 2.824641227722168, "learning_rate": 5.216666666666666e-06, "loss": 2.1204, "step": 9765 }, { "epoch": 14.37045203969129, "grad_norm": 3.708585739135742, "learning_rate": 5.206372549019608e-06, "loss": 2.0792, "step": 9786 }, { "epoch": 14.401323042998898, "grad_norm": 3.4671897888183594, "learning_rate": 5.19607843137255e-06, "loss": 2.0354, "step": 9807 }, { "epoch": 14.432194046306504, "grad_norm": 3.824810743331909, "learning_rate": 5.18578431372549e-06, "loss": 1.9401, "step": 9828 }, { "epoch": 14.463065049614112, "grad_norm": 2.3937902450561523, "learning_rate": 5.175490196078432e-06, "loss": 2.0049, "step": 9849 }, { "epoch": 14.49393605292172, "grad_norm": 3.4717845916748047, "learning_rate": 5.165196078431372e-06, "loss": 2.111, "step": 9870 }, { "epoch": 14.524807056229328, "grad_norm": 3.0910346508026123, "learning_rate": 5.154901960784314e-06, "loss": 2.1328, "step": 9891 }, { "epoch": 14.555678059536936, "grad_norm": 2.9736809730529785, "learning_rate": 5.1446078431372545e-06, "loss": 1.9529, "step": 9912 }, { "epoch": 14.586549062844542, "grad_norm": 2.9420766830444336, "learning_rate": 5.1343137254901964e-06, "loss": 2.0487, "step": 9933 }, { "epoch": 14.61742006615215, "grad_norm": 3.0031661987304688, "learning_rate": 5.124019607843138e-06, "loss": 2.0438, "step": 9954 }, { "epoch": 14.648291069459757, "grad_norm": 2.0756826400756836, "learning_rate": 5.113725490196079e-06, "loss": 2.0878, "step": 9975 }, { "epoch": 14.679162072767365, "grad_norm": 3.5476233959198, "learning_rate": 5.1034313725490205e-06, "loss": 2.0462, "step": 9996 }, { "epoch": 14.710033076074973, "grad_norm": 3.7489500045776367, "learning_rate": 5.093137254901961e-06, "loss": 1.9793, "step": 10017 }, { "epoch": 14.74090407938258, "grad_norm": 2.612318992614746, "learning_rate": 5.082843137254903e-06, "loss": 1.9353, "step": 10038 }, { "epoch": 14.771775082690187, "grad_norm": 3.1954147815704346, "learning_rate": 5.072549019607843e-06, "loss": 2.1059, "step": 10059 }, { "epoch": 14.802646085997795, "grad_norm": 2.8227596282958984, "learning_rate": 5.062254901960785e-06, "loss": 2.0501, "step": 10080 }, { "epoch": 14.833517089305403, "grad_norm": 3.4035403728485107, "learning_rate": 5.051960784313727e-06, "loss": 2.0993, "step": 10101 }, { "epoch": 14.86438809261301, "grad_norm": 2.661450147628784, "learning_rate": 5.041666666666667e-06, "loss": 1.9717, "step": 10122 }, { "epoch": 14.895259095920617, "grad_norm": 3.4387428760528564, "learning_rate": 5.031372549019609e-06, "loss": 2.082, "step": 10143 }, { "epoch": 14.926130099228224, "grad_norm": 3.7169981002807617, "learning_rate": 5.021078431372549e-06, "loss": 2.1306, "step": 10164 }, { "epoch": 14.957001102535832, "grad_norm": 4.3067946434021, "learning_rate": 5.010784313725491e-06, "loss": 2.1733, "step": 10185 }, { "epoch": 14.98787210584344, "grad_norm": 3.562462329864502, "learning_rate": 5.000490196078431e-06, "loss": 2.1481, "step": 10206 }, { "epoch": 15.017640573318634, "grad_norm": 4.963381290435791, "learning_rate": 4.990196078431373e-06, "loss": 2.0355, "step": 10227 }, { "epoch": 15.04851157662624, "grad_norm": 3.029024600982666, "learning_rate": 4.979901960784314e-06, "loss": 2.1029, "step": 10248 }, { "epoch": 15.079382579933847, "grad_norm": 3.5499942302703857, "learning_rate": 4.969607843137255e-06, "loss": 2.1633, "step": 10269 }, { "epoch": 15.110253583241455, "grad_norm": 3.7054245471954346, "learning_rate": 4.959313725490196e-06, "loss": 2.1461, "step": 10290 }, { "epoch": 15.141124586549063, "grad_norm": 3.2250783443450928, "learning_rate": 4.9490196078431375e-06, "loss": 2.0355, "step": 10311 }, { "epoch": 15.171995589856671, "grad_norm": 2.0336270332336426, "learning_rate": 4.938725490196079e-06, "loss": 2.0813, "step": 10332 }, { "epoch": 15.202866593164277, "grad_norm": 3.070176839828491, "learning_rate": 4.9284313725490205e-06, "loss": 2.1598, "step": 10353 }, { "epoch": 15.233737596471885, "grad_norm": 4.083742141723633, "learning_rate": 4.9181372549019616e-06, "loss": 2.1571, "step": 10374 }, { "epoch": 15.264608599779493, "grad_norm": 2.932722568511963, "learning_rate": 4.907843137254903e-06, "loss": 2.0541, "step": 10395 }, { "epoch": 15.2954796030871, "grad_norm": 4.044142723083496, "learning_rate": 4.897549019607844e-06, "loss": 2.0635, "step": 10416 }, { "epoch": 15.326350606394708, "grad_norm": 2.9298267364501953, "learning_rate": 4.887254901960785e-06, "loss": 1.9621, "step": 10437 }, { "epoch": 15.357221609702314, "grad_norm": 3.0545711517333984, "learning_rate": 4.876960784313726e-06, "loss": 2.117, "step": 10458 }, { "epoch": 15.388092613009922, "grad_norm": 4.692001819610596, "learning_rate": 4.866666666666667e-06, "loss": 2.0084, "step": 10479 }, { "epoch": 15.41896361631753, "grad_norm": 2.91191029548645, "learning_rate": 4.856372549019608e-06, "loss": 2.062, "step": 10500 }, { "epoch": 15.449834619625138, "grad_norm": 2.914384603500366, "learning_rate": 4.84607843137255e-06, "loss": 2.0613, "step": 10521 }, { "epoch": 15.480705622932746, "grad_norm": 2.4917314052581787, "learning_rate": 4.835784313725491e-06, "loss": 1.9963, "step": 10542 }, { "epoch": 15.511576626240354, "grad_norm": 4.361894130706787, "learning_rate": 4.825490196078432e-06, "loss": 2.0153, "step": 10563 }, { "epoch": 15.54244762954796, "grad_norm": 2.501007556915283, "learning_rate": 4.815196078431373e-06, "loss": 2.1068, "step": 10584 }, { "epoch": 15.573318632855568, "grad_norm": 3.864947557449341, "learning_rate": 4.804901960784314e-06, "loss": 1.9808, "step": 10605 }, { "epoch": 15.604189636163175, "grad_norm": 2.879890203475952, "learning_rate": 4.794607843137255e-06, "loss": 2.0443, "step": 10626 }, { "epoch": 15.635060639470783, "grad_norm": 2.9568746089935303, "learning_rate": 4.784313725490196e-06, "loss": 2.1534, "step": 10647 }, { "epoch": 15.665931642778391, "grad_norm": 3.456660032272339, "learning_rate": 4.7740196078431375e-06, "loss": 2.1061, "step": 10668 }, { "epoch": 15.696802646085997, "grad_norm": 2.0130109786987305, "learning_rate": 4.7637254901960785e-06, "loss": 2.0547, "step": 10689 }, { "epoch": 15.727673649393605, "grad_norm": 2.771479845046997, "learning_rate": 4.75343137254902e-06, "loss": 2.0917, "step": 10710 }, { "epoch": 15.758544652701213, "grad_norm": 3.6473817825317383, "learning_rate": 4.743137254901961e-06, "loss": 2.0157, "step": 10731 }, { "epoch": 15.78941565600882, "grad_norm": 3.1383354663848877, "learning_rate": 4.732843137254902e-06, "loss": 1.9199, "step": 10752 }, { "epoch": 15.820286659316428, "grad_norm": 3.2463924884796143, "learning_rate": 4.722549019607844e-06, "loss": 2.053, "step": 10773 }, { "epoch": 15.851157662624034, "grad_norm": 2.4401538372039795, "learning_rate": 4.712254901960785e-06, "loss": 2.0286, "step": 10794 }, { "epoch": 15.882028665931642, "grad_norm": 3.72370982170105, "learning_rate": 4.701960784313726e-06, "loss": 2.0477, "step": 10815 }, { "epoch": 15.91289966923925, "grad_norm": 2.85298228263855, "learning_rate": 4.691666666666667e-06, "loss": 1.9962, "step": 10836 }, { "epoch": 15.943770672546858, "grad_norm": 3.810678482055664, "learning_rate": 4.681372549019608e-06, "loss": 2.1352, "step": 10857 }, { "epoch": 15.974641675854466, "grad_norm": 3.461115598678589, "learning_rate": 4.671078431372549e-06, "loss": 2.1521, "step": 10878 }, { "epoch": 16.004410143329658, "grad_norm": 3.260249376296997, "learning_rate": 4.66078431372549e-06, "loss": 1.9691, "step": 10899 }, { "epoch": 16.035281146637267, "grad_norm": 3.3586559295654297, "learning_rate": 4.650490196078431e-06, "loss": 2.1661, "step": 10920 }, { "epoch": 16.066152149944873, "grad_norm": 3.206244468688965, "learning_rate": 4.640196078431372e-06, "loss": 2.1357, "step": 10941 }, { "epoch": 16.09702315325248, "grad_norm": 3.45607590675354, "learning_rate": 4.629901960784314e-06, "loss": 2.0576, "step": 10962 }, { "epoch": 16.12789415656009, "grad_norm": 2.4897308349609375, "learning_rate": 4.619607843137255e-06, "loss": 2.152, "step": 10983 }, { "epoch": 16.158765159867695, "grad_norm": 3.5065560340881348, "learning_rate": 4.609313725490196e-06, "loss": 1.964, "step": 11004 }, { "epoch": 16.189636163175305, "grad_norm": 3.8698577880859375, "learning_rate": 4.5990196078431375e-06, "loss": 1.9255, "step": 11025 }, { "epoch": 16.22050716648291, "grad_norm": 2.7581987380981445, "learning_rate": 4.5887254901960785e-06, "loss": 2.0786, "step": 11046 }, { "epoch": 16.251378169790517, "grad_norm": 4.7639007568359375, "learning_rate": 4.57843137254902e-06, "loss": 2.1155, "step": 11067 }, { "epoch": 16.282249173098126, "grad_norm": 2.4301273822784424, "learning_rate": 4.568137254901961e-06, "loss": 2.1352, "step": 11088 }, { "epoch": 16.313120176405732, "grad_norm": 4.11771297454834, "learning_rate": 4.557843137254903e-06, "loss": 2.0469, "step": 11109 }, { "epoch": 16.343991179713342, "grad_norm": 3.496967077255249, "learning_rate": 4.547549019607844e-06, "loss": 2.1648, "step": 11130 }, { "epoch": 16.374862183020948, "grad_norm": 2.431269407272339, "learning_rate": 4.537254901960785e-06, "loss": 2.0452, "step": 11151 }, { "epoch": 16.405733186328554, "grad_norm": 2.833159923553467, "learning_rate": 4.526960784313726e-06, "loss": 2.0595, "step": 11172 }, { "epoch": 16.436604189636164, "grad_norm": 2.9650235176086426, "learning_rate": 4.516666666666667e-06, "loss": 2.0827, "step": 11193 }, { "epoch": 16.46747519294377, "grad_norm": 3.284029483795166, "learning_rate": 4.506372549019608e-06, "loss": 2.1151, "step": 11214 }, { "epoch": 16.49834619625138, "grad_norm": 2.8441848754882812, "learning_rate": 4.496078431372549e-06, "loss": 2.0386, "step": 11235 }, { "epoch": 16.529217199558985, "grad_norm": 2.7832906246185303, "learning_rate": 4.485784313725491e-06, "loss": 2.0704, "step": 11256 }, { "epoch": 16.56008820286659, "grad_norm": 2.812133550643921, "learning_rate": 4.475490196078432e-06, "loss": 2.0627, "step": 11277 }, { "epoch": 16.5909592061742, "grad_norm": 4.428758144378662, "learning_rate": 4.465196078431373e-06, "loss": 2.0506, "step": 11298 }, { "epoch": 16.621830209481807, "grad_norm": 3.15263032913208, "learning_rate": 4.454901960784314e-06, "loss": 2.0758, "step": 11319 }, { "epoch": 16.652701212789417, "grad_norm": 3.6009435653686523, "learning_rate": 4.445098039215687e-06, "loss": 2.0106, "step": 11340 }, { "epoch": 16.683572216097023, "grad_norm": 3.4212281703948975, "learning_rate": 4.434803921568628e-06, "loss": 1.9713, "step": 11361 }, { "epoch": 16.71444321940463, "grad_norm": 5.794731140136719, "learning_rate": 4.424509803921569e-06, "loss": 2.0213, "step": 11382 }, { "epoch": 16.74531422271224, "grad_norm": 3.2197704315185547, "learning_rate": 4.41421568627451e-06, "loss": 2.0999, "step": 11403 }, { "epoch": 16.776185226019845, "grad_norm": 1.9434154033660889, "learning_rate": 4.403921568627451e-06, "loss": 2.1013, "step": 11424 }, { "epoch": 16.807056229327454, "grad_norm": 2.352503538131714, "learning_rate": 4.393627450980393e-06, "loss": 1.9736, "step": 11445 }, { "epoch": 16.83792723263506, "grad_norm": 3.7558541297912598, "learning_rate": 4.383333333333334e-06, "loss": 2.0952, "step": 11466 }, { "epoch": 16.86879823594267, "grad_norm": 3.505101442337036, "learning_rate": 4.373039215686275e-06, "loss": 2.0759, "step": 11487 }, { "epoch": 16.899669239250276, "grad_norm": 2.8822970390319824, "learning_rate": 4.3627450980392164e-06, "loss": 2.1724, "step": 11508 }, { "epoch": 16.930540242557882, "grad_norm": 3.491542100906372, "learning_rate": 4.3524509803921575e-06, "loss": 2.0708, "step": 11529 }, { "epoch": 16.96141124586549, "grad_norm": 3.233745574951172, "learning_rate": 4.342156862745099e-06, "loss": 2.0463, "step": 11550 }, { "epoch": 16.992282249173098, "grad_norm": 3.1945841312408447, "learning_rate": 4.33186274509804e-06, "loss": 1.998, "step": 11571 }, { "epoch": 17.02205071664829, "grad_norm": 2.081578254699707, "learning_rate": 4.321568627450981e-06, "loss": 2.0078, "step": 11592 }, { "epoch": 17.052921719955897, "grad_norm": 3.096571683883667, "learning_rate": 4.311274509803922e-06, "loss": 1.958, "step": 11613 }, { "epoch": 17.083792723263507, "grad_norm": 1.7668076753616333, "learning_rate": 4.300980392156863e-06, "loss": 2.0671, "step": 11634 }, { "epoch": 17.114663726571113, "grad_norm": 3.3065104484558105, "learning_rate": 4.290686274509804e-06, "loss": 2.0577, "step": 11655 }, { "epoch": 17.145534729878722, "grad_norm": 3.453615427017212, "learning_rate": 4.280392156862745e-06, "loss": 2.0605, "step": 11676 }, { "epoch": 17.17640573318633, "grad_norm": 2.54748272895813, "learning_rate": 4.270098039215687e-06, "loss": 2.2096, "step": 11697 }, { "epoch": 17.207276736493935, "grad_norm": 3.4268970489501953, "learning_rate": 4.259803921568628e-06, "loss": 2.0538, "step": 11718 }, { "epoch": 17.238147739801544, "grad_norm": 3.91983699798584, "learning_rate": 4.249509803921569e-06, "loss": 2.0354, "step": 11739 }, { "epoch": 17.26901874310915, "grad_norm": 3.7987680435180664, "learning_rate": 4.23921568627451e-06, "loss": 2.0894, "step": 11760 }, { "epoch": 17.29988974641676, "grad_norm": 4.2465291023254395, "learning_rate": 4.228921568627451e-06, "loss": 2.1255, "step": 11781 }, { "epoch": 17.330760749724366, "grad_norm": 5.107961654663086, "learning_rate": 4.218627450980392e-06, "loss": 2.0218, "step": 11802 }, { "epoch": 17.361631753031972, "grad_norm": 3.0302984714508057, "learning_rate": 4.208333333333333e-06, "loss": 2.0429, "step": 11823 }, { "epoch": 17.39250275633958, "grad_norm": 3.6439778804779053, "learning_rate": 4.1980392156862745e-06, "loss": 2.0699, "step": 11844 }, { "epoch": 17.423373759647188, "grad_norm": 2.6481032371520996, "learning_rate": 4.1877450980392156e-06, "loss": 2.0194, "step": 11865 }, { "epoch": 17.454244762954797, "grad_norm": 2.3916313648223877, "learning_rate": 4.177450980392157e-06, "loss": 1.9906, "step": 11886 }, { "epoch": 17.485115766262403, "grad_norm": 3.7970051765441895, "learning_rate": 4.167156862745098e-06, "loss": 2.0747, "step": 11907 }, { "epoch": 17.51598676957001, "grad_norm": 3.466770648956299, "learning_rate": 4.15686274509804e-06, "loss": 2.0312, "step": 11928 }, { "epoch": 17.54685777287762, "grad_norm": 3.2934865951538086, "learning_rate": 4.146568627450981e-06, "loss": 2.1119, "step": 11949 }, { "epoch": 17.577728776185225, "grad_norm": 3.815096855163574, "learning_rate": 4.136274509803922e-06, "loss": 2.0647, "step": 11970 }, { "epoch": 17.608599779492835, "grad_norm": 2.554340362548828, "learning_rate": 4.125980392156863e-06, "loss": 2.1036, "step": 11991 }, { "epoch": 17.63947078280044, "grad_norm": 3.861665964126587, "learning_rate": 4.115686274509804e-06, "loss": 1.9365, "step": 12012 }, { "epoch": 17.670341786108047, "grad_norm": 4.2214226722717285, "learning_rate": 4.105392156862745e-06, "loss": 2.1702, "step": 12033 }, { "epoch": 17.701212789415656, "grad_norm": 2.405787706375122, "learning_rate": 4.095098039215686e-06, "loss": 2.0042, "step": 12054 }, { "epoch": 17.732083792723262, "grad_norm": 3.1533408164978027, "learning_rate": 4.084803921568628e-06, "loss": 1.851, "step": 12075 }, { "epoch": 17.762954796030872, "grad_norm": 3.3998310565948486, "learning_rate": 4.074509803921569e-06, "loss": 2.1838, "step": 12096 }, { "epoch": 17.793825799338478, "grad_norm": 3.0578980445861816, "learning_rate": 4.06421568627451e-06, "loss": 2.0094, "step": 12117 }, { "epoch": 17.824696802646088, "grad_norm": 2.25312876701355, "learning_rate": 4.053921568627451e-06, "loss": 2.0077, "step": 12138 }, { "epoch": 17.855567805953694, "grad_norm": 2.8547134399414062, "learning_rate": 4.043627450980392e-06, "loss": 2.096, "step": 12159 }, { "epoch": 17.8864388092613, "grad_norm": 2.6908249855041504, "learning_rate": 4.033333333333333e-06, "loss": 2.0115, "step": 12180 }, { "epoch": 17.91730981256891, "grad_norm": 4.099616527557373, "learning_rate": 4.0230392156862745e-06, "loss": 2.015, "step": 12201 }, { "epoch": 17.948180815876515, "grad_norm": 3.3205764293670654, "learning_rate": 4.0127450980392155e-06, "loss": 1.9277, "step": 12222 }, { "epoch": 17.979051819184125, "grad_norm": 3.4432685375213623, "learning_rate": 4.0024509803921575e-06, "loss": 2.0977, "step": 12243 }, { "epoch": 18.008820286659315, "grad_norm": 3.4045350551605225, "learning_rate": 3.9921568627450985e-06, "loss": 1.8597, "step": 12264 }, { "epoch": 18.039691289966925, "grad_norm": 2.844184637069702, "learning_rate": 3.98186274509804e-06, "loss": 2.0898, "step": 12285 }, { "epoch": 18.07056229327453, "grad_norm": 3.8207850456237793, "learning_rate": 3.971568627450981e-06, "loss": 2.0866, "step": 12306 }, { "epoch": 18.10143329658214, "grad_norm": 2.78249192237854, "learning_rate": 3.961274509803922e-06, "loss": 2.1116, "step": 12327 }, { "epoch": 18.132304299889746, "grad_norm": 3.2231979370117188, "learning_rate": 3.950980392156863e-06, "loss": 2.0347, "step": 12348 }, { "epoch": 18.163175303197352, "grad_norm": 3.118004560470581, "learning_rate": 3.940686274509804e-06, "loss": 2.1377, "step": 12369 }, { "epoch": 18.194046306504962, "grad_norm": 2.7039101123809814, "learning_rate": 3.930392156862746e-06, "loss": 2.0915, "step": 12390 }, { "epoch": 18.224917309812568, "grad_norm": 5.230814456939697, "learning_rate": 3.920098039215687e-06, "loss": 2.0506, "step": 12411 }, { "epoch": 18.255788313120178, "grad_norm": 4.035606861114502, "learning_rate": 3.909803921568628e-06, "loss": 2.0546, "step": 12432 }, { "epoch": 18.286659316427784, "grad_norm": 2.651754379272461, "learning_rate": 3.899509803921569e-06, "loss": 2.1167, "step": 12453 }, { "epoch": 18.31753031973539, "grad_norm": 2.6406948566436768, "learning_rate": 3.88921568627451e-06, "loss": 2.0703, "step": 12474 }, { "epoch": 18.348401323043, "grad_norm": 2.9610483646392822, "learning_rate": 3.878921568627451e-06, "loss": 2.0869, "step": 12495 }, { "epoch": 18.379272326350605, "grad_norm": 3.5515575408935547, "learning_rate": 3.868627450980392e-06, "loss": 2.0525, "step": 12516 }, { "epoch": 18.410143329658215, "grad_norm": 4.212573528289795, "learning_rate": 3.858333333333333e-06, "loss": 2.0139, "step": 12537 }, { "epoch": 18.44101433296582, "grad_norm": 5.508068561553955, "learning_rate": 3.848039215686275e-06, "loss": 2.0703, "step": 12558 }, { "epoch": 18.471885336273427, "grad_norm": 2.8144259452819824, "learning_rate": 3.837745098039216e-06, "loss": 2.0188, "step": 12579 }, { "epoch": 18.502756339581037, "grad_norm": 2.812675714492798, "learning_rate": 3.8274509803921575e-06, "loss": 2.061, "step": 12600 }, { "epoch": 18.533627342888643, "grad_norm": 3.4826412200927734, "learning_rate": 3.8171568627450985e-06, "loss": 2.1514, "step": 12621 }, { "epoch": 18.564498346196252, "grad_norm": 3.2510266304016113, "learning_rate": 3.8068627450980396e-06, "loss": 2.0922, "step": 12642 }, { "epoch": 18.59536934950386, "grad_norm": 2.7289392948150635, "learning_rate": 3.7965686274509807e-06, "loss": 1.9753, "step": 12663 }, { "epoch": 18.626240352811465, "grad_norm": 3.323258876800537, "learning_rate": 3.7862745098039218e-06, "loss": 2.0766, "step": 12684 }, { "epoch": 18.657111356119074, "grad_norm": 2.709409713745117, "learning_rate": 3.7759803921568633e-06, "loss": 2.0631, "step": 12705 }, { "epoch": 18.68798235942668, "grad_norm": 3.7730557918548584, "learning_rate": 3.7656862745098043e-06, "loss": 1.9911, "step": 12726 }, { "epoch": 18.71885336273429, "grad_norm": 3.651437997817993, "learning_rate": 3.7553921568627454e-06, "loss": 2.1125, "step": 12747 }, { "epoch": 18.749724366041896, "grad_norm": 3.381253242492676, "learning_rate": 3.7450980392156865e-06, "loss": 1.9971, "step": 12768 }, { "epoch": 18.780595369349506, "grad_norm": 3.1010146141052246, "learning_rate": 3.7348039215686276e-06, "loss": 2.0648, "step": 12789 }, { "epoch": 18.81146637265711, "grad_norm": 3.745161533355713, "learning_rate": 3.7245098039215686e-06, "loss": 2.0145, "step": 12810 }, { "epoch": 18.842337375964718, "grad_norm": 4.621708393096924, "learning_rate": 3.7142156862745097e-06, "loss": 1.9998, "step": 12831 }, { "epoch": 18.873208379272327, "grad_norm": 3.5155012607574463, "learning_rate": 3.7039215686274516e-06, "loss": 2.0464, "step": 12852 }, { "epoch": 18.904079382579933, "grad_norm": 2.5564968585968018, "learning_rate": 3.6936274509803927e-06, "loss": 2.0925, "step": 12873 }, { "epoch": 18.934950385887543, "grad_norm": 3.048408031463623, "learning_rate": 3.6833333333333338e-06, "loss": 2.0354, "step": 12894 }, { "epoch": 18.96582138919515, "grad_norm": 3.5387840270996094, "learning_rate": 3.673039215686275e-06, "loss": 1.9691, "step": 12915 }, { "epoch": 18.996692392502755, "grad_norm": 3.683196544647217, "learning_rate": 3.662745098039216e-06, "loss": 2.0746, "step": 12936 }, { "epoch": 19.02646085997795, "grad_norm": 5.585927486419678, "learning_rate": 3.652450980392157e-06, "loss": 1.9729, "step": 12957 }, { "epoch": 19.057331863285558, "grad_norm": 3.5750479698181152, "learning_rate": 3.642156862745098e-06, "loss": 2.0793, "step": 12978 }, { "epoch": 19.088202866593164, "grad_norm": 3.356248617172241, "learning_rate": 3.631862745098039e-06, "loss": 2.0644, "step": 12999 }, { "epoch": 19.11907386990077, "grad_norm": 3.57338809967041, "learning_rate": 3.621568627450981e-06, "loss": 2.1432, "step": 13020 }, { "epoch": 19.14994487320838, "grad_norm": 4.2975287437438965, "learning_rate": 3.611274509803922e-06, "loss": 2.0979, "step": 13041 }, { "epoch": 19.180815876515986, "grad_norm": 3.7634999752044678, "learning_rate": 3.6009803921568632e-06, "loss": 2.1543, "step": 13062 }, { "epoch": 19.211686879823596, "grad_norm": 3.75382924079895, "learning_rate": 3.5906862745098043e-06, "loss": 2.0601, "step": 13083 }, { "epoch": 19.2425578831312, "grad_norm": 3.5403940677642822, "learning_rate": 3.5803921568627454e-06, "loss": 2.0395, "step": 13104 }, { "epoch": 19.273428886438808, "grad_norm": 2.899658203125, "learning_rate": 3.5700980392156865e-06, "loss": 2.0467, "step": 13125 }, { "epoch": 19.304299889746417, "grad_norm": 3.702956199645996, "learning_rate": 3.5598039215686275e-06, "loss": 2.0043, "step": 13146 }, { "epoch": 19.335170893054023, "grad_norm": 3.56923508644104, "learning_rate": 3.549509803921569e-06, "loss": 1.9066, "step": 13167 }, { "epoch": 19.366041896361633, "grad_norm": 2.911870241165161, "learning_rate": 3.53921568627451e-06, "loss": 2.0302, "step": 13188 }, { "epoch": 19.39691289966924, "grad_norm": 2.913762331008911, "learning_rate": 3.528921568627451e-06, "loss": 1.9829, "step": 13209 }, { "epoch": 19.427783902976845, "grad_norm": 3.171879768371582, "learning_rate": 3.5186274509803927e-06, "loss": 1.9945, "step": 13230 }, { "epoch": 19.458654906284455, "grad_norm": 2.787626266479492, "learning_rate": 3.5083333333333338e-06, "loss": 2.0112, "step": 13251 }, { "epoch": 19.48952590959206, "grad_norm": 3.769746780395508, "learning_rate": 3.498039215686275e-06, "loss": 1.9996, "step": 13272 }, { "epoch": 19.52039691289967, "grad_norm": 3.2027320861816406, "learning_rate": 3.487745098039216e-06, "loss": 1.9983, "step": 13293 }, { "epoch": 19.551267916207276, "grad_norm": 3.7969839572906494, "learning_rate": 3.477450980392157e-06, "loss": 2.1563, "step": 13314 }, { "epoch": 19.582138919514883, "grad_norm": 2.9992904663085938, "learning_rate": 3.4671568627450985e-06, "loss": 2.0276, "step": 13335 }, { "epoch": 19.613009922822492, "grad_norm": 3.2763264179229736, "learning_rate": 3.4568627450980396e-06, "loss": 2.0982, "step": 13356 }, { "epoch": 19.643880926130098, "grad_norm": 3.3335490226745605, "learning_rate": 3.4465686274509806e-06, "loss": 2.0123, "step": 13377 }, { "epoch": 19.674751929437708, "grad_norm": 3.0416243076324463, "learning_rate": 3.4362745098039217e-06, "loss": 2.0464, "step": 13398 }, { "epoch": 19.705622932745314, "grad_norm": 3.625033140182495, "learning_rate": 3.425980392156863e-06, "loss": 2.0404, "step": 13419 }, { "epoch": 19.736493936052923, "grad_norm": 3.487109422683716, "learning_rate": 3.415686274509804e-06, "loss": 2.1132, "step": 13440 }, { "epoch": 19.76736493936053, "grad_norm": 3.335360050201416, "learning_rate": 3.405392156862745e-06, "loss": 2.0071, "step": 13461 }, { "epoch": 19.798235942668136, "grad_norm": 3.624753952026367, "learning_rate": 3.395098039215687e-06, "loss": 2.05, "step": 13482 }, { "epoch": 19.829106945975745, "grad_norm": 3.325979709625244, "learning_rate": 3.384803921568628e-06, "loss": 1.8625, "step": 13503 }, { "epoch": 19.85997794928335, "grad_norm": 3.8328804969787598, "learning_rate": 3.374509803921569e-06, "loss": 1.9652, "step": 13524 }, { "epoch": 19.89084895259096, "grad_norm": 4.318526744842529, "learning_rate": 3.364705882352942e-06, "loss": 1.955, "step": 13545 }, { "epoch": 19.921719955898567, "grad_norm": 3.1736397743225098, "learning_rate": 3.354411764705883e-06, "loss": 2.0086, "step": 13566 }, { "epoch": 19.952590959206173, "grad_norm": 2.356720209121704, "learning_rate": 3.344117647058824e-06, "loss": 2.0913, "step": 13587 }, { "epoch": 19.983461962513783, "grad_norm": 3.434952735900879, "learning_rate": 3.333823529411765e-06, "loss": 2.0391, "step": 13608 }, { "epoch": 20.013230429988976, "grad_norm": 3.4107272624969482, "learning_rate": 3.323529411764706e-06, "loss": 2.0666, "step": 13629 }, { "epoch": 20.044101433296582, "grad_norm": 4.133201599121094, "learning_rate": 3.313235294117647e-06, "loss": 2.0181, "step": 13650 }, { "epoch": 20.074972436604188, "grad_norm": 3.949664831161499, "learning_rate": 3.3029411764705887e-06, "loss": 2.033, "step": 13671 }, { "epoch": 20.105843439911798, "grad_norm": 3.270611047744751, "learning_rate": 3.2926470588235298e-06, "loss": 2.0423, "step": 13692 }, { "epoch": 20.136714443219404, "grad_norm": 3.6626546382904053, "learning_rate": 3.282352941176471e-06, "loss": 2.085, "step": 13713 }, { "epoch": 20.167585446527013, "grad_norm": 2.613144874572754, "learning_rate": 3.272058823529412e-06, "loss": 1.9857, "step": 13734 }, { "epoch": 20.19845644983462, "grad_norm": 5.440834999084473, "learning_rate": 3.261764705882353e-06, "loss": 2.0182, "step": 13755 }, { "epoch": 20.229327453142226, "grad_norm": 3.383056402206421, "learning_rate": 3.2514705882352945e-06, "loss": 2.067, "step": 13776 }, { "epoch": 20.260198456449835, "grad_norm": 3.9195475578308105, "learning_rate": 3.2411764705882356e-06, "loss": 2.0912, "step": 13797 }, { "epoch": 20.29106945975744, "grad_norm": 3.1253345012664795, "learning_rate": 3.2308823529411766e-06, "loss": 1.9867, "step": 13818 }, { "epoch": 20.32194046306505, "grad_norm": 1.728356957435608, "learning_rate": 3.220588235294118e-06, "loss": 1.9589, "step": 13839 }, { "epoch": 20.352811466372657, "grad_norm": 5.2193827629089355, "learning_rate": 3.210294117647059e-06, "loss": 2.0627, "step": 13860 }, { "epoch": 20.383682469680263, "grad_norm": 4.1548309326171875, "learning_rate": 3.2000000000000003e-06, "loss": 2.0675, "step": 13881 }, { "epoch": 20.414553472987873, "grad_norm": 3.0767784118652344, "learning_rate": 3.1897058823529414e-06, "loss": 2.1164, "step": 13902 }, { "epoch": 20.44542447629548, "grad_norm": 3.6371419429779053, "learning_rate": 3.1794117647058824e-06, "loss": 1.9204, "step": 13923 }, { "epoch": 20.47629547960309, "grad_norm": 2.9141464233398438, "learning_rate": 3.1691176470588235e-06, "loss": 2.0619, "step": 13944 }, { "epoch": 20.507166482910694, "grad_norm": 3.0113942623138428, "learning_rate": 3.1588235294117646e-06, "loss": 2.0679, "step": 13965 }, { "epoch": 20.5380374862183, "grad_norm": 3.8592772483825684, "learning_rate": 3.1485294117647065e-06, "loss": 2.1119, "step": 13986 }, { "epoch": 20.56890848952591, "grad_norm": 4.055545330047607, "learning_rate": 3.1382352941176476e-06, "loss": 2.0723, "step": 14007 }, { "epoch": 20.599779492833516, "grad_norm": 4.263570785522461, "learning_rate": 3.1279411764705887e-06, "loss": 1.9356, "step": 14028 }, { "epoch": 20.630650496141126, "grad_norm": 3.5049495697021484, "learning_rate": 3.1176470588235297e-06, "loss": 2.064, "step": 14049 }, { "epoch": 20.66152149944873, "grad_norm": 3.676957130432129, "learning_rate": 3.107352941176471e-06, "loss": 1.9775, "step": 14070 }, { "epoch": 20.69239250275634, "grad_norm": 2.424762725830078, "learning_rate": 3.097058823529412e-06, "loss": 2.0656, "step": 14091 }, { "epoch": 20.723263506063947, "grad_norm": 2.600409984588623, "learning_rate": 3.086764705882353e-06, "loss": 1.9777, "step": 14112 }, { "epoch": 20.754134509371553, "grad_norm": 3.8351240158081055, "learning_rate": 3.076470588235294e-06, "loss": 2.0204, "step": 14133 }, { "epoch": 20.785005512679163, "grad_norm": 2.652747631072998, "learning_rate": 3.066176470588236e-06, "loss": 1.9423, "step": 14154 }, { "epoch": 20.81587651598677, "grad_norm": 3.5386784076690674, "learning_rate": 3.055882352941177e-06, "loss": 2.1863, "step": 14175 }, { "epoch": 20.84674751929438, "grad_norm": 3.687246799468994, "learning_rate": 3.045588235294118e-06, "loss": 1.9755, "step": 14196 }, { "epoch": 20.877618522601985, "grad_norm": 2.620722532272339, "learning_rate": 3.035294117647059e-06, "loss": 2.0561, "step": 14217 }, { "epoch": 20.90848952590959, "grad_norm": 2.740910053253174, "learning_rate": 3.0250000000000003e-06, "loss": 2.0948, "step": 14238 }, { "epoch": 20.9393605292172, "grad_norm": 2.521815538406372, "learning_rate": 3.0147058823529413e-06, "loss": 1.9918, "step": 14259 }, { "epoch": 20.970231532524807, "grad_norm": 3.51527738571167, "learning_rate": 3.0044117647058824e-06, "loss": 2.1119, "step": 14280 }, { "epoch": 21.0, "grad_norm": 1.8354216814041138, "learning_rate": 2.994117647058824e-06, "loss": 1.9956, "step": 14301 }, { "epoch": 21.030871003307606, "grad_norm": 3.5836827754974365, "learning_rate": 2.983823529411765e-06, "loss": 2.0313, "step": 14322 }, { "epoch": 21.061742006615216, "grad_norm": 4.276784420013428, "learning_rate": 2.973529411764706e-06, "loss": 2.009, "step": 14343 }, { "epoch": 21.09261300992282, "grad_norm": 4.545126914978027, "learning_rate": 2.963235294117647e-06, "loss": 1.9624, "step": 14364 }, { "epoch": 21.12348401323043, "grad_norm": 3.138828754425049, "learning_rate": 2.9529411764705882e-06, "loss": 2.0172, "step": 14385 }, { "epoch": 21.154355016538037, "grad_norm": 3.7198119163513184, "learning_rate": 2.9426470588235297e-06, "loss": 1.9557, "step": 14406 }, { "epoch": 21.185226019845643, "grad_norm": 3.2042577266693115, "learning_rate": 2.932352941176471e-06, "loss": 2.076, "step": 14427 }, { "epoch": 21.216097023153253, "grad_norm": 3.6730806827545166, "learning_rate": 2.9220588235294123e-06, "loss": 1.9988, "step": 14448 }, { "epoch": 21.24696802646086, "grad_norm": 3.7986257076263428, "learning_rate": 2.9117647058823534e-06, "loss": 2.02, "step": 14469 }, { "epoch": 21.27783902976847, "grad_norm": 2.8649749755859375, "learning_rate": 2.9014705882352944e-06, "loss": 2.0309, "step": 14490 }, { "epoch": 21.308710033076075, "grad_norm": 3.3312325477600098, "learning_rate": 2.8911764705882355e-06, "loss": 2.1171, "step": 14511 }, { "epoch": 21.33958103638368, "grad_norm": 5.849648475646973, "learning_rate": 2.8808823529411766e-06, "loss": 1.9797, "step": 14532 }, { "epoch": 21.37045203969129, "grad_norm": 2.860837697982788, "learning_rate": 2.8705882352941177e-06, "loss": 1.9537, "step": 14553 }, { "epoch": 21.401323042998897, "grad_norm": 3.9823107719421387, "learning_rate": 2.8602941176470587e-06, "loss": 1.9863, "step": 14574 }, { "epoch": 21.432194046306506, "grad_norm": 3.9077060222625732, "learning_rate": 2.85e-06, "loss": 1.9336, "step": 14595 }, { "epoch": 21.463065049614112, "grad_norm": 3.0670223236083984, "learning_rate": 2.8397058823529417e-06, "loss": 2.1698, "step": 14616 }, { "epoch": 21.49393605292172, "grad_norm": 3.615760326385498, "learning_rate": 2.829411764705883e-06, "loss": 2.1763, "step": 14637 }, { "epoch": 21.524807056229328, "grad_norm": 2.5886383056640625, "learning_rate": 2.819117647058824e-06, "loss": 2.0616, "step": 14658 }, { "epoch": 21.555678059536934, "grad_norm": 3.460663080215454, "learning_rate": 2.808823529411765e-06, "loss": 1.9536, "step": 14679 }, { "epoch": 21.586549062844544, "grad_norm": 2.1539037227630615, "learning_rate": 2.798529411764706e-06, "loss": 2.0335, "step": 14700 }, { "epoch": 21.61742006615215, "grad_norm": 3.484496831893921, "learning_rate": 2.788235294117647e-06, "loss": 2.0183, "step": 14721 }, { "epoch": 21.64829106945976, "grad_norm": 2.755068302154541, "learning_rate": 2.777941176470588e-06, "loss": 1.8785, "step": 14742 }, { "epoch": 21.679162072767365, "grad_norm": 3.6181225776672363, "learning_rate": 2.76764705882353e-06, "loss": 2.1339, "step": 14763 }, { "epoch": 21.71003307607497, "grad_norm": 2.7359960079193115, "learning_rate": 2.757352941176471e-06, "loss": 1.9992, "step": 14784 }, { "epoch": 21.74090407938258, "grad_norm": 3.1301567554473877, "learning_rate": 2.7470588235294123e-06, "loss": 1.9978, "step": 14805 }, { "epoch": 21.771775082690187, "grad_norm": 3.758486032485962, "learning_rate": 2.7367647058823533e-06, "loss": 2.0067, "step": 14826 }, { "epoch": 21.802646085997797, "grad_norm": 3.277071475982666, "learning_rate": 2.7264705882352944e-06, "loss": 2.1184, "step": 14847 }, { "epoch": 21.833517089305403, "grad_norm": 4.998867511749268, "learning_rate": 2.7161764705882355e-06, "loss": 2.0494, "step": 14868 }, { "epoch": 21.86438809261301, "grad_norm": 3.6457066535949707, "learning_rate": 2.7058823529411766e-06, "loss": 2.1115, "step": 14889 }, { "epoch": 21.89525909592062, "grad_norm": 2.19140625, "learning_rate": 2.6955882352941176e-06, "loss": 1.9649, "step": 14910 }, { "epoch": 21.926130099228224, "grad_norm": 3.393760919570923, "learning_rate": 2.685294117647059e-06, "loss": 2.0337, "step": 14931 }, { "epoch": 21.957001102535834, "grad_norm": 2.6467642784118652, "learning_rate": 2.6750000000000002e-06, "loss": 2.0755, "step": 14952 }, { "epoch": 21.98787210584344, "grad_norm": 2.9383254051208496, "learning_rate": 2.6647058823529413e-06, "loss": 2.0456, "step": 14973 }, { "epoch": 22.017640573318634, "grad_norm": 2.71541690826416, "learning_rate": 2.6544117647058824e-06, "loss": 1.8908, "step": 14994 }, { "epoch": 22.04851157662624, "grad_norm": 3.295368194580078, "learning_rate": 2.644117647058824e-06, "loss": 2.1089, "step": 15015 }, { "epoch": 22.07938257993385, "grad_norm": 2.6102893352508545, "learning_rate": 2.633823529411765e-06, "loss": 2.0108, "step": 15036 }, { "epoch": 22.110253583241455, "grad_norm": 3.5170047283172607, "learning_rate": 2.623529411764706e-06, "loss": 2.0225, "step": 15057 }, { "epoch": 22.14112458654906, "grad_norm": 3.3258235454559326, "learning_rate": 2.6132352941176475e-06, "loss": 2.1019, "step": 15078 }, { "epoch": 22.17199558985667, "grad_norm": 4.083648204803467, "learning_rate": 2.6029411764705886e-06, "loss": 1.9216, "step": 15099 }, { "epoch": 22.202866593164277, "grad_norm": 2.2198421955108643, "learning_rate": 2.5926470588235297e-06, "loss": 2.057, "step": 15120 }, { "epoch": 22.233737596471887, "grad_norm": 3.2718071937561035, "learning_rate": 2.5823529411764708e-06, "loss": 2.0215, "step": 15141 }, { "epoch": 22.264608599779493, "grad_norm": 4.054749965667725, "learning_rate": 2.572058823529412e-06, "loss": 2.0821, "step": 15162 }, { "epoch": 22.2954796030871, "grad_norm": 3.719663143157959, "learning_rate": 2.561764705882353e-06, "loss": 2.0058, "step": 15183 }, { "epoch": 22.32635060639471, "grad_norm": 4.2490363121032715, "learning_rate": 2.551470588235294e-06, "loss": 1.9647, "step": 15204 }, { "epoch": 22.357221609702314, "grad_norm": 2.9652185440063477, "learning_rate": 2.541176470588235e-06, "loss": 2.1335, "step": 15225 }, { "epoch": 22.388092613009924, "grad_norm": 2.423184394836426, "learning_rate": 2.530882352941177e-06, "loss": 2.06, "step": 15246 }, { "epoch": 22.41896361631753, "grad_norm": 4.045749187469482, "learning_rate": 2.520588235294118e-06, "loss": 2.1137, "step": 15267 }, { "epoch": 22.449834619625136, "grad_norm": 2.758460283279419, "learning_rate": 2.510294117647059e-06, "loss": 1.9363, "step": 15288 }, { "epoch": 22.480705622932746, "grad_norm": 3.1217832565307617, "learning_rate": 2.5e-06, "loss": 2.0578, "step": 15309 }, { "epoch": 22.511576626240352, "grad_norm": 3.535093069076538, "learning_rate": 2.4897058823529413e-06, "loss": 2.0919, "step": 15330 }, { "epoch": 22.54244762954796, "grad_norm": 2.6681010723114014, "learning_rate": 2.4794117647058828e-06, "loss": 1.9656, "step": 15351 }, { "epoch": 22.573318632855568, "grad_norm": 3.941215753555298, "learning_rate": 2.469117647058824e-06, "loss": 2.0865, "step": 15372 }, { "epoch": 22.604189636163177, "grad_norm": 3.9376718997955322, "learning_rate": 2.458823529411765e-06, "loss": 2.0817, "step": 15393 }, { "epoch": 22.635060639470783, "grad_norm": 4.634395122528076, "learning_rate": 2.448529411764706e-06, "loss": 1.9958, "step": 15414 }, { "epoch": 22.66593164277839, "grad_norm": 4.030196189880371, "learning_rate": 2.4382352941176475e-06, "loss": 1.9103, "step": 15435 }, { "epoch": 22.696802646086, "grad_norm": 3.294787883758545, "learning_rate": 2.4279411764705886e-06, "loss": 1.979, "step": 15456 }, { "epoch": 22.727673649393605, "grad_norm": 4.601010322570801, "learning_rate": 2.4176470588235297e-06, "loss": 2.037, "step": 15477 }, { "epoch": 22.75854465270121, "grad_norm": 2.8019866943359375, "learning_rate": 2.4073529411764707e-06, "loss": 1.9568, "step": 15498 }, { "epoch": 22.78941565600882, "grad_norm": 4.673406600952148, "learning_rate": 2.3970588235294122e-06, "loss": 2.1818, "step": 15519 }, { "epoch": 22.820286659316427, "grad_norm": 3.075392723083496, "learning_rate": 2.3867647058823533e-06, "loss": 2.0665, "step": 15540 }, { "epoch": 22.851157662624036, "grad_norm": 4.0568437576293945, "learning_rate": 2.3764705882352944e-06, "loss": 1.9772, "step": 15561 }, { "epoch": 22.882028665931642, "grad_norm": 22.847898483276367, "learning_rate": 2.3661764705882355e-06, "loss": 2.1377, "step": 15582 }, { "epoch": 22.912899669239252, "grad_norm": 5.353538990020752, "learning_rate": 2.3558823529411765e-06, "loss": 2.0693, "step": 15603 }, { "epoch": 22.943770672546858, "grad_norm": 3.0493805408477783, "learning_rate": 2.345588235294118e-06, "loss": 2.0442, "step": 15624 }, { "epoch": 22.974641675854464, "grad_norm": 2.932695150375366, "learning_rate": 2.335294117647059e-06, "loss": 2.1378, "step": 15645 }, { "epoch": 23.004410143329658, "grad_norm": 2.9815673828125, "learning_rate": 2.325e-06, "loss": 1.9206, "step": 15666 }, { "epoch": 23.035281146637267, "grad_norm": 2.5419440269470215, "learning_rate": 2.3147058823529413e-06, "loss": 2.017, "step": 15687 }, { "epoch": 23.066152149944873, "grad_norm": 3.180964708328247, "learning_rate": 2.3044117647058823e-06, "loss": 2.0926, "step": 15708 }, { "epoch": 23.09702315325248, "grad_norm": 3.7221603393554688, "learning_rate": 2.2941176470588234e-06, "loss": 2.0389, "step": 15729 }, { "epoch": 23.12789415656009, "grad_norm": 2.8961706161499023, "learning_rate": 2.283823529411765e-06, "loss": 1.9742, "step": 15750 }, { "epoch": 23.158765159867695, "grad_norm": 4.145342826843262, "learning_rate": 2.273529411764706e-06, "loss": 1.9546, "step": 15771 }, { "epoch": 23.189636163175305, "grad_norm": 3.063854217529297, "learning_rate": 2.263235294117647e-06, "loss": 1.9493, "step": 15792 }, { "epoch": 23.22050716648291, "grad_norm": 3.4941153526306152, "learning_rate": 2.252941176470588e-06, "loss": 2.1055, "step": 15813 }, { "epoch": 23.251378169790517, "grad_norm": 5.007169723510742, "learning_rate": 2.243137254901961e-06, "loss": 1.9953, "step": 15834 }, { "epoch": 23.282249173098126, "grad_norm": 2.7144691944122314, "learning_rate": 2.2328431372549024e-06, "loss": 2.0099, "step": 15855 }, { "epoch": 23.313120176405732, "grad_norm": 3.789856433868408, "learning_rate": 2.2225490196078435e-06, "loss": 2.0094, "step": 15876 }, { "epoch": 23.343991179713342, "grad_norm": 3.635158061981201, "learning_rate": 2.2122549019607846e-06, "loss": 2.0724, "step": 15897 }, { "epoch": 23.374862183020948, "grad_norm": 3.294782876968384, "learning_rate": 2.2019607843137256e-06, "loss": 2.1435, "step": 15918 }, { "epoch": 23.405733186328554, "grad_norm": 3.1448404788970947, "learning_rate": 2.191666666666667e-06, "loss": 1.9845, "step": 15939 }, { "epoch": 23.436604189636164, "grad_norm": 3.8233425617218018, "learning_rate": 2.1813725490196082e-06, "loss": 2.0245, "step": 15960 }, { "epoch": 23.46747519294377, "grad_norm": 3.341221570968628, "learning_rate": 2.1710784313725493e-06, "loss": 1.9936, "step": 15981 }, { "epoch": 23.49834619625138, "grad_norm": 3.8249101638793945, "learning_rate": 2.1607843137254904e-06, "loss": 1.9964, "step": 16002 }, { "epoch": 23.529217199558985, "grad_norm": 3.033024549484253, "learning_rate": 2.1504901960784314e-06, "loss": 1.9725, "step": 16023 }, { "epoch": 23.56008820286659, "grad_norm": 2.373525619506836, "learning_rate": 2.1401960784313725e-06, "loss": 1.9035, "step": 16044 }, { "epoch": 23.5909592061742, "grad_norm": 3.101956844329834, "learning_rate": 2.129901960784314e-06, "loss": 1.9845, "step": 16065 }, { "epoch": 23.621830209481807, "grad_norm": 3.2224278450012207, "learning_rate": 2.119607843137255e-06, "loss": 2.0678, "step": 16086 }, { "epoch": 23.652701212789417, "grad_norm": 3.791844129562378, "learning_rate": 2.109313725490196e-06, "loss": 1.9764, "step": 16107 }, { "epoch": 23.683572216097023, "grad_norm": 2.1430459022521973, "learning_rate": 2.0990196078431372e-06, "loss": 2.1508, "step": 16128 }, { "epoch": 23.71444321940463, "grad_norm": 3.8360910415649414, "learning_rate": 2.0887254901960783e-06, "loss": 2.073, "step": 16149 }, { "epoch": 23.74531422271224, "grad_norm": 4.548948287963867, "learning_rate": 2.07843137254902e-06, "loss": 2.0117, "step": 16170 }, { "epoch": 23.776185226019845, "grad_norm": 2.1734726428985596, "learning_rate": 2.068137254901961e-06, "loss": 2.0523, "step": 16191 }, { "epoch": 23.807056229327454, "grad_norm": 2.869358539581299, "learning_rate": 2.057843137254902e-06, "loss": 2.0996, "step": 16212 }, { "epoch": 23.83792723263506, "grad_norm": 2.855677843093872, "learning_rate": 2.047549019607843e-06, "loss": 2.0153, "step": 16233 }, { "epoch": 23.86879823594267, "grad_norm": 2.8188350200653076, "learning_rate": 2.0372549019607845e-06, "loss": 2.0767, "step": 16254 }, { "epoch": 23.899669239250276, "grad_norm": 2.9440088272094727, "learning_rate": 2.0269607843137256e-06, "loss": 1.911, "step": 16275 }, { "epoch": 23.930540242557882, "grad_norm": 3.6652421951293945, "learning_rate": 2.0166666666666667e-06, "loss": 1.9595, "step": 16296 }, { "epoch": 23.96141124586549, "grad_norm": 2.7821288108825684, "learning_rate": 2.0063725490196078e-06, "loss": 2.0475, "step": 16317 }, { "epoch": 23.992282249173098, "grad_norm": 4.175220966339111, "learning_rate": 1.9960784313725493e-06, "loss": 1.97, "step": 16338 }, { "epoch": 24.02205071664829, "grad_norm": 3.457138776779175, "learning_rate": 1.9857843137254903e-06, "loss": 1.9588, "step": 16359 }, { "epoch": 24.052921719955897, "grad_norm": 3.436939001083374, "learning_rate": 1.9754901960784314e-06, "loss": 2.0322, "step": 16380 }, { "epoch": 24.083792723263507, "grad_norm": 5.357500076293945, "learning_rate": 1.965196078431373e-06, "loss": 1.9858, "step": 16401 }, { "epoch": 24.114663726571113, "grad_norm": 3.293443202972412, "learning_rate": 1.954901960784314e-06, "loss": 1.9989, "step": 16422 }, { "epoch": 24.145534729878722, "grad_norm": 4.073799133300781, "learning_rate": 1.944607843137255e-06, "loss": 2.0617, "step": 16443 }, { "epoch": 24.17640573318633, "grad_norm": 2.9053542613983154, "learning_rate": 1.934313725490196e-06, "loss": 2.031, "step": 16464 }, { "epoch": 24.207276736493935, "grad_norm": 4.001552581787109, "learning_rate": 1.9240196078431377e-06, "loss": 1.905, "step": 16485 }, { "epoch": 24.238147739801544, "grad_norm": 3.245598316192627, "learning_rate": 1.9137254901960787e-06, "loss": 2.0186, "step": 16506 }, { "epoch": 24.26901874310915, "grad_norm": 2.63043212890625, "learning_rate": 1.9034313725490198e-06, "loss": 2.0694, "step": 16527 }, { "epoch": 24.29988974641676, "grad_norm": 3.8443353176116943, "learning_rate": 1.8931372549019609e-06, "loss": 1.8988, "step": 16548 }, { "epoch": 24.330760749724366, "grad_norm": 2.937856674194336, "learning_rate": 1.8828431372549022e-06, "loss": 2.0204, "step": 16569 }, { "epoch": 24.361631753031972, "grad_norm": 3.2340915203094482, "learning_rate": 1.8725490196078432e-06, "loss": 2.0266, "step": 16590 }, { "epoch": 24.39250275633958, "grad_norm": 4.433572769165039, "learning_rate": 1.8622549019607843e-06, "loss": 2.0059, "step": 16611 }, { "epoch": 24.423373759647188, "grad_norm": 3.6761868000030518, "learning_rate": 1.8519607843137258e-06, "loss": 2.0048, "step": 16632 }, { "epoch": 24.454244762954797, "grad_norm": 3.846853017807007, "learning_rate": 1.8416666666666669e-06, "loss": 1.9698, "step": 16653 }, { "epoch": 24.485115766262403, "grad_norm": 4.937806129455566, "learning_rate": 1.831372549019608e-06, "loss": 1.9939, "step": 16674 }, { "epoch": 24.51598676957001, "grad_norm": 2.8848650455474854, "learning_rate": 1.821078431372549e-06, "loss": 2.0912, "step": 16695 }, { "epoch": 24.54685777287762, "grad_norm": 2.890634536743164, "learning_rate": 1.8107843137254905e-06, "loss": 1.9191, "step": 16716 }, { "epoch": 24.577728776185225, "grad_norm": 2.678192615509033, "learning_rate": 1.8004901960784316e-06, "loss": 2.0085, "step": 16737 }, { "epoch": 24.608599779492835, "grad_norm": 2.798147201538086, "learning_rate": 1.7901960784313727e-06, "loss": 2.1472, "step": 16758 }, { "epoch": 24.63947078280044, "grad_norm": 3.4055185317993164, "learning_rate": 1.7799019607843138e-06, "loss": 1.9615, "step": 16779 }, { "epoch": 24.670341786108047, "grad_norm": 2.357825517654419, "learning_rate": 1.769607843137255e-06, "loss": 1.9813, "step": 16800 }, { "epoch": 24.701212789415656, "grad_norm": 3.224651575088501, "learning_rate": 1.7593137254901963e-06, "loss": 2.037, "step": 16821 }, { "epoch": 24.732083792723262, "grad_norm": 3.342982053756714, "learning_rate": 1.7490196078431374e-06, "loss": 2.0401, "step": 16842 }, { "epoch": 24.762954796030872, "grad_norm": 4.797550201416016, "learning_rate": 1.7387254901960785e-06, "loss": 1.9856, "step": 16863 }, { "epoch": 24.793825799338478, "grad_norm": 3.657365322113037, "learning_rate": 1.7284313725490198e-06, "loss": 1.9802, "step": 16884 }, { "epoch": 24.824696802646088, "grad_norm": 3.4711039066314697, "learning_rate": 1.7181372549019609e-06, "loss": 1.9883, "step": 16905 }, { "epoch": 24.855567805953694, "grad_norm": 3.5633745193481445, "learning_rate": 1.707843137254902e-06, "loss": 2.0446, "step": 16926 }, { "epoch": 24.8864388092613, "grad_norm": 3.0469162464141846, "learning_rate": 1.6975490196078434e-06, "loss": 2.0676, "step": 16947 }, { "epoch": 24.91730981256891, "grad_norm": 3.1432437896728516, "learning_rate": 1.6872549019607845e-06, "loss": 2.007, "step": 16968 }, { "epoch": 24.948180815876515, "grad_norm": 3.7878284454345703, "learning_rate": 1.6769607843137256e-06, "loss": 2.0496, "step": 16989 }, { "epoch": 24.979051819184125, "grad_norm": 3.5506863594055176, "learning_rate": 1.6666666666666667e-06, "loss": 2.0364, "step": 17010 }, { "epoch": 25.008820286659315, "grad_norm": 3.147472620010376, "learning_rate": 1.6563725490196082e-06, "loss": 1.9508, "step": 17031 }, { "epoch": 25.039691289966925, "grad_norm": 2.918980598449707, "learning_rate": 1.6460784313725492e-06, "loss": 2.0846, "step": 17052 }, { "epoch": 25.07056229327453, "grad_norm": 3.624821186065674, "learning_rate": 1.6357843137254903e-06, "loss": 1.968, "step": 17073 }, { "epoch": 25.10143329658214, "grad_norm": 3.0075933933258057, "learning_rate": 1.6254901960784314e-06, "loss": 2.2606, "step": 17094 }, { "epoch": 25.132304299889746, "grad_norm": 3.014009475708008, "learning_rate": 1.6151960784313727e-06, "loss": 2.0683, "step": 17115 }, { "epoch": 25.163175303197352, "grad_norm": 2.4412238597869873, "learning_rate": 1.604901960784314e-06, "loss": 1.8993, "step": 17136 }, { "epoch": 25.194046306504962, "grad_norm": 3.6228857040405273, "learning_rate": 1.594607843137255e-06, "loss": 2.014, "step": 17157 }, { "epoch": 25.224917309812568, "grad_norm": 3.0460023880004883, "learning_rate": 1.5843137254901961e-06, "loss": 1.9859, "step": 17178 }, { "epoch": 25.255788313120178, "grad_norm": 3.527524948120117, "learning_rate": 1.5740196078431374e-06, "loss": 2.0807, "step": 17199 }, { "epoch": 25.286659316427784, "grad_norm": 3.9341087341308594, "learning_rate": 1.5637254901960785e-06, "loss": 2.0596, "step": 17220 }, { "epoch": 25.31753031973539, "grad_norm": 2.719883441925049, "learning_rate": 1.5534313725490196e-06, "loss": 2.0104, "step": 17241 }, { "epoch": 25.348401323043, "grad_norm": 2.8454911708831787, "learning_rate": 1.543137254901961e-06, "loss": 1.8807, "step": 17262 }, { "epoch": 25.379272326350605, "grad_norm": 3.0369486808776855, "learning_rate": 1.5328431372549021e-06, "loss": 2.0453, "step": 17283 }, { "epoch": 25.410143329658215, "grad_norm": 2.745230197906494, "learning_rate": 1.5225490196078432e-06, "loss": 1.9671, "step": 17304 }, { "epoch": 25.44101433296582, "grad_norm": 2.962475299835205, "learning_rate": 1.5122549019607843e-06, "loss": 1.9908, "step": 17325 }, { "epoch": 25.471885336273427, "grad_norm": 2.3031718730926514, "learning_rate": 1.5019607843137258e-06, "loss": 2.0485, "step": 17346 }, { "epoch": 25.502756339581037, "grad_norm": 3.512769937515259, "learning_rate": 1.4916666666666669e-06, "loss": 1.9571, "step": 17367 }, { "epoch": 25.533627342888643, "grad_norm": 4.890634536743164, "learning_rate": 1.481372549019608e-06, "loss": 2.0696, "step": 17388 }, { "epoch": 25.564498346196252, "grad_norm": 4.985838890075684, "learning_rate": 1.471078431372549e-06, "loss": 2.0484, "step": 17409 }, { "epoch": 25.59536934950386, "grad_norm": 4.576741695404053, "learning_rate": 1.4607843137254903e-06, "loss": 2.0638, "step": 17430 }, { "epoch": 25.626240352811465, "grad_norm": 3.1261324882507324, "learning_rate": 1.4504901960784316e-06, "loss": 2.0689, "step": 17451 }, { "epoch": 25.657111356119074, "grad_norm": 2.712101936340332, "learning_rate": 1.4401960784313727e-06, "loss": 1.9987, "step": 17472 }, { "epoch": 25.68798235942668, "grad_norm": 4.180928707122803, "learning_rate": 1.429901960784314e-06, "loss": 2.0525, "step": 17493 }, { "epoch": 25.71885336273429, "grad_norm": 3.562640428543091, "learning_rate": 1.419607843137255e-06, "loss": 2.0431, "step": 17514 }, { "epoch": 25.749724366041896, "grad_norm": 3.860323429107666, "learning_rate": 1.409313725490196e-06, "loss": 1.9785, "step": 17535 }, { "epoch": 25.780595369349506, "grad_norm": 3.463897705078125, "learning_rate": 1.3990196078431374e-06, "loss": 1.9708, "step": 17556 }, { "epoch": 25.81146637265711, "grad_norm": 3.1898350715637207, "learning_rate": 1.3887254901960787e-06, "loss": 1.981, "step": 17577 }, { "epoch": 25.842337375964718, "grad_norm": 2.384361982345581, "learning_rate": 1.3784313725490197e-06, "loss": 2.0021, "step": 17598 }, { "epoch": 25.873208379272327, "grad_norm": 3.8873538970947266, "learning_rate": 1.3681372549019608e-06, "loss": 2.1231, "step": 17619 }, { "epoch": 25.904079382579933, "grad_norm": 4.486485958099365, "learning_rate": 1.357843137254902e-06, "loss": 1.9089, "step": 17640 }, { "epoch": 25.934950385887543, "grad_norm": 2.862537145614624, "learning_rate": 1.3475490196078434e-06, "loss": 2.0553, "step": 17661 }, { "epoch": 25.96582138919515, "grad_norm": 3.2039759159088135, "learning_rate": 1.3372549019607845e-06, "loss": 1.914, "step": 17682 }, { "epoch": 25.996692392502755, "grad_norm": 2.9676759243011475, "learning_rate": 1.3269607843137255e-06, "loss": 1.9942, "step": 17703 }, { "epoch": 26.02646085997795, "grad_norm": 4.259377479553223, "learning_rate": 1.3166666666666666e-06, "loss": 1.9375, "step": 17724 }, { "epoch": 26.057331863285558, "grad_norm": 2.7348697185516357, "learning_rate": 1.3063725490196081e-06, "loss": 2.018, "step": 17745 }, { "epoch": 26.088202866593164, "grad_norm": 3.2641003131866455, "learning_rate": 1.2960784313725492e-06, "loss": 2.021, "step": 17766 }, { "epoch": 26.11907386990077, "grad_norm": 3.0980889797210693, "learning_rate": 1.2857843137254903e-06, "loss": 2.0851, "step": 17787 }, { "epoch": 26.14994487320838, "grad_norm": 2.84696888923645, "learning_rate": 1.2754901960784316e-06, "loss": 2.0336, "step": 17808 }, { "epoch": 26.180815876515986, "grad_norm": 2.650468111038208, "learning_rate": 1.2651960784313726e-06, "loss": 2.035, "step": 17829 }, { "epoch": 26.211686879823596, "grad_norm": 2.750750780105591, "learning_rate": 1.2549019607843137e-06, "loss": 1.9548, "step": 17850 }, { "epoch": 26.2425578831312, "grad_norm": 2.1722657680511475, "learning_rate": 1.244607843137255e-06, "loss": 1.9388, "step": 17871 }, { "epoch": 26.273428886438808, "grad_norm": 3.936103105545044, "learning_rate": 1.234313725490196e-06, "loss": 2.0783, "step": 17892 }, { "epoch": 26.304299889746417, "grad_norm": 3.3649473190307617, "learning_rate": 1.2240196078431374e-06, "loss": 1.9849, "step": 17913 }, { "epoch": 26.335170893054023, "grad_norm": 3.6519925594329834, "learning_rate": 1.2137254901960784e-06, "loss": 2.0773, "step": 17934 }, { "epoch": 26.366041896361633, "grad_norm": 4.6296067237854, "learning_rate": 1.2034313725490197e-06, "loss": 2.0144, "step": 17955 }, { "epoch": 26.39691289966924, "grad_norm": 3.302056312561035, "learning_rate": 1.1931372549019608e-06, "loss": 2.0479, "step": 17976 }, { "epoch": 26.427783902976845, "grad_norm": 3.523118734359741, "learning_rate": 1.182843137254902e-06, "loss": 2.0372, "step": 17997 }, { "epoch": 26.458654906284455, "grad_norm": 4.209780216217041, "learning_rate": 1.1725490196078432e-06, "loss": 2.1403, "step": 18018 }, { "epoch": 26.48952590959206, "grad_norm": 3.7631661891937256, "learning_rate": 1.1622549019607845e-06, "loss": 2.1286, "step": 18039 }, { "epoch": 26.52039691289967, "grad_norm": 4.490149021148682, "learning_rate": 1.1519607843137255e-06, "loss": 2.0241, "step": 18060 }, { "epoch": 26.551267916207276, "grad_norm": 2.7082109451293945, "learning_rate": 1.1416666666666668e-06, "loss": 2.025, "step": 18081 }, { "epoch": 26.582138919514883, "grad_norm": 3.7196762561798096, "learning_rate": 1.1313725490196079e-06, "loss": 2.0884, "step": 18102 }, { "epoch": 26.613009922822492, "grad_norm": 4.222052574157715, "learning_rate": 1.1210784313725492e-06, "loss": 2.0388, "step": 18123 }, { "epoch": 26.643880926130098, "grad_norm": 4.171756267547607, "learning_rate": 1.1107843137254903e-06, "loss": 2.028, "step": 18144 }, { "epoch": 26.674751929437708, "grad_norm": 4.496833324432373, "learning_rate": 1.1004901960784315e-06, "loss": 2.093, "step": 18165 }, { "epoch": 26.705622932745314, "grad_norm": 2.4819180965423584, "learning_rate": 1.0901960784313726e-06, "loss": 1.8364, "step": 18186 }, { "epoch": 26.736493936052923, "grad_norm": 3.3765549659729004, "learning_rate": 1.0799019607843137e-06, "loss": 2.0641, "step": 18207 }, { "epoch": 26.76736493936053, "grad_norm": 3.363340139389038, "learning_rate": 1.069607843137255e-06, "loss": 2.0086, "step": 18228 }, { "epoch": 26.798235942668136, "grad_norm": 2.692979335784912, "learning_rate": 1.059313725490196e-06, "loss": 2.0157, "step": 18249 }, { "epoch": 26.829106945975745, "grad_norm": 3.666555881500244, "learning_rate": 1.0490196078431373e-06, "loss": 2.0233, "step": 18270 }, { "epoch": 26.85997794928335, "grad_norm": 3.171398639678955, "learning_rate": 1.0387254901960784e-06, "loss": 1.976, "step": 18291 }, { "epoch": 26.89084895259096, "grad_norm": 4.0071868896484375, "learning_rate": 1.0284313725490197e-06, "loss": 1.9523, "step": 18312 }, { "epoch": 26.921719955898567, "grad_norm": 4.549193859100342, "learning_rate": 1.0181372549019608e-06, "loss": 1.9475, "step": 18333 }, { "epoch": 26.952590959206173, "grad_norm": 4.22450590133667, "learning_rate": 1.007843137254902e-06, "loss": 1.9244, "step": 18354 }, { "epoch": 26.983461962513783, "grad_norm": 3.5466229915618896, "learning_rate": 9.975490196078434e-07, "loss": 2.0134, "step": 18375 }, { "epoch": 27.013230429988976, "grad_norm": 3.650482416152954, "learning_rate": 9.872549019607844e-07, "loss": 1.9523, "step": 18396 }, { "epoch": 27.044101433296582, "grad_norm": 3.628394842147827, "learning_rate": 9.769607843137257e-07, "loss": 2.126, "step": 18417 }, { "epoch": 27.074972436604188, "grad_norm": 2.50268816947937, "learning_rate": 9.666666666666668e-07, "loss": 2.1265, "step": 18438 }, { "epoch": 27.105843439911798, "grad_norm": 2.1516237258911133, "learning_rate": 9.563725490196079e-07, "loss": 2.0307, "step": 18459 }, { "epoch": 27.136714443219404, "grad_norm": 3.48169207572937, "learning_rate": 9.460784313725491e-07, "loss": 2.0434, "step": 18480 }, { "epoch": 27.167585446527013, "grad_norm": 5.042967796325684, "learning_rate": 9.357843137254903e-07, "loss": 2.0246, "step": 18501 }, { "epoch": 27.19845644983462, "grad_norm": 2.58532977104187, "learning_rate": 9.254901960784314e-07, "loss": 2.0705, "step": 18522 }, { "epoch": 27.229327453142226, "grad_norm": 2.8869588375091553, "learning_rate": 9.151960784313726e-07, "loss": 2.0355, "step": 18543 }, { "epoch": 27.260198456449835, "grad_norm": 3.409188985824585, "learning_rate": 9.049019607843138e-07, "loss": 1.8979, "step": 18564 }, { "epoch": 27.29106945975744, "grad_norm": 2.7081124782562256, "learning_rate": 8.94607843137255e-07, "loss": 1.9425, "step": 18585 }, { "epoch": 27.32194046306505, "grad_norm": 2.522266149520874, "learning_rate": 8.84313725490196e-07, "loss": 1.9749, "step": 18606 }, { "epoch": 27.352811466372657, "grad_norm": 2.937058210372925, "learning_rate": 8.740196078431373e-07, "loss": 1.981, "step": 18627 }, { "epoch": 27.383682469680263, "grad_norm": 3.0787739753723145, "learning_rate": 8.637254901960786e-07, "loss": 2.0818, "step": 18648 }, { "epoch": 27.414553472987873, "grad_norm": 4.102266311645508, "learning_rate": 8.534313725490197e-07, "loss": 1.8458, "step": 18669 }, { "epoch": 27.44542447629548, "grad_norm": 5.012358665466309, "learning_rate": 8.431372549019609e-07, "loss": 2.0026, "step": 18690 }, { "epoch": 27.47629547960309, "grad_norm": 3.763712167739868, "learning_rate": 8.328431372549021e-07, "loss": 2.091, "step": 18711 }, { "epoch": 27.507166482910694, "grad_norm": 3.3943490982055664, "learning_rate": 8.230392156862746e-07, "loss": 2.0389, "step": 18732 }, { "epoch": 27.5380374862183, "grad_norm": 3.6211483478546143, "learning_rate": 8.127450980392157e-07, "loss": 2.0371, "step": 18753 }, { "epoch": 27.56890848952591, "grad_norm": 3.4411706924438477, "learning_rate": 8.02450980392157e-07, "loss": 1.9773, "step": 18774 }, { "epoch": 27.599779492833516, "grad_norm": 2.2726285457611084, "learning_rate": 7.921568627450981e-07, "loss": 1.9814, "step": 18795 }, { "epoch": 27.630650496141126, "grad_norm": 6.275850296020508, "learning_rate": 7.818627450980392e-07, "loss": 2.009, "step": 18816 }, { "epoch": 27.66152149944873, "grad_norm": 3.4813263416290283, "learning_rate": 7.715686274509805e-07, "loss": 1.9447, "step": 18837 }, { "epoch": 27.69239250275634, "grad_norm": 3.8926095962524414, "learning_rate": 7.612745098039216e-07, "loss": 2.1061, "step": 18858 }, { "epoch": 27.723263506063947, "grad_norm": 3.2279534339904785, "learning_rate": 7.509803921568629e-07, "loss": 2.1357, "step": 18879 }, { "epoch": 27.754134509371553, "grad_norm": 4.1901631355285645, "learning_rate": 7.40686274509804e-07, "loss": 2.0081, "step": 18900 }, { "epoch": 27.785005512679163, "grad_norm": 3.5056240558624268, "learning_rate": 7.303921568627451e-07, "loss": 2.0354, "step": 18921 }, { "epoch": 27.81587651598677, "grad_norm": 3.4158339500427246, "learning_rate": 7.200980392156863e-07, "loss": 2.0163, "step": 18942 }, { "epoch": 27.84674751929438, "grad_norm": 2.8889598846435547, "learning_rate": 7.098039215686275e-07, "loss": 2.0046, "step": 18963 }, { "epoch": 27.877618522601985, "grad_norm": 3.9091246128082275, "learning_rate": 6.995098039215687e-07, "loss": 1.9854, "step": 18984 }, { "epoch": 27.90848952590959, "grad_norm": 4.670572280883789, "learning_rate": 6.892156862745099e-07, "loss": 2.0157, "step": 19005 }, { "epoch": 27.9393605292172, "grad_norm": 2.7897861003875732, "learning_rate": 6.78921568627451e-07, "loss": 2.03, "step": 19026 }, { "epoch": 27.970231532524807, "grad_norm": 3.8964619636535645, "learning_rate": 6.686274509803922e-07, "loss": 2.028, "step": 19047 }, { "epoch": 28.0, "grad_norm": 1.9693920612335205, "learning_rate": 6.583333333333333e-07, "loss": 1.8564, "step": 19068 }, { "epoch": 28.030871003307606, "grad_norm": 4.751320838928223, "learning_rate": 6.480392156862746e-07, "loss": 1.9493, "step": 19089 }, { "epoch": 28.061742006615216, "grad_norm": 3.4831652641296387, "learning_rate": 6.377450980392158e-07, "loss": 1.9572, "step": 19110 }, { "epoch": 28.09261300992282, "grad_norm": 3.4712512493133545, "learning_rate": 6.274509803921569e-07, "loss": 2.0309, "step": 19131 }, { "epoch": 28.12348401323043, "grad_norm": 3.9922566413879395, "learning_rate": 6.17156862745098e-07, "loss": 1.9811, "step": 19152 }, { "epoch": 28.154355016538037, "grad_norm": 4.792636871337891, "learning_rate": 6.068627450980392e-07, "loss": 1.9534, "step": 19173 }, { "epoch": 28.185226019845643, "grad_norm": 3.729520559310913, "learning_rate": 5.965686274509804e-07, "loss": 1.9456, "step": 19194 }, { "epoch": 28.216097023153253, "grad_norm": 2.783691167831421, "learning_rate": 5.862745098039216e-07, "loss": 2.0291, "step": 19215 }, { "epoch": 28.24696802646086, "grad_norm": 3.3077173233032227, "learning_rate": 5.759803921568628e-07, "loss": 2.0922, "step": 19236 }, { "epoch": 28.27783902976847, "grad_norm": 3.1509790420532227, "learning_rate": 5.656862745098039e-07, "loss": 2.1581, "step": 19257 }, { "epoch": 28.308710033076075, "grad_norm": 4.7033538818359375, "learning_rate": 5.553921568627451e-07, "loss": 1.9384, "step": 19278 }, { "epoch": 28.33958103638368, "grad_norm": 4.201630115509033, "learning_rate": 5.450980392156863e-07, "loss": 1.9448, "step": 19299 }, { "epoch": 28.37045203969129, "grad_norm": 3.8276824951171875, "learning_rate": 5.348039215686275e-07, "loss": 1.9769, "step": 19320 }, { "epoch": 28.401323042998897, "grad_norm": 4.129844665527344, "learning_rate": 5.245098039215687e-07, "loss": 2.1262, "step": 19341 }, { "epoch": 28.432194046306506, "grad_norm": 3.277721643447876, "learning_rate": 5.142156862745099e-07, "loss": 1.9226, "step": 19362 }, { "epoch": 28.463065049614112, "grad_norm": 3.1267642974853516, "learning_rate": 5.03921568627451e-07, "loss": 2.1073, "step": 19383 }, { "epoch": 28.49393605292172, "grad_norm": 3.5480620861053467, "learning_rate": 4.936274509803922e-07, "loss": 2.0114, "step": 19404 }, { "epoch": 28.524807056229328, "grad_norm": 3.172790765762329, "learning_rate": 4.833333333333334e-07, "loss": 2.0302, "step": 19425 }, { "epoch": 28.555678059536934, "grad_norm": 3.0664799213409424, "learning_rate": 4.7303921568627453e-07, "loss": 2.0294, "step": 19446 }, { "epoch": 28.586549062844544, "grad_norm": 2.906982421875, "learning_rate": 4.627450980392157e-07, "loss": 2.0385, "step": 19467 }, { "epoch": 28.61742006615215, "grad_norm": 4.4187541007995605, "learning_rate": 4.524509803921569e-07, "loss": 2.0023, "step": 19488 }, { "epoch": 28.64829106945976, "grad_norm": 3.3276491165161133, "learning_rate": 4.42156862745098e-07, "loss": 2.0314, "step": 19509 }, { "epoch": 28.679162072767365, "grad_norm": 3.633134365081787, "learning_rate": 4.318627450980393e-07, "loss": 2.0017, "step": 19530 }, { "epoch": 28.71003307607497, "grad_norm": 3.469681978225708, "learning_rate": 4.2156862745098044e-07, "loss": 1.9007, "step": 19551 }, { "epoch": 28.74090407938258, "grad_norm": 3.641199827194214, "learning_rate": 4.112745098039216e-07, "loss": 2.0142, "step": 19572 }, { "epoch": 28.771775082690187, "grad_norm": 3.490839719772339, "learning_rate": 4.009803921568628e-07, "loss": 2.106, "step": 19593 }, { "epoch": 28.802646085997797, "grad_norm": 4.220351219177246, "learning_rate": 3.90686274509804e-07, "loss": 1.9925, "step": 19614 }, { "epoch": 28.833517089305403, "grad_norm": 3.093439817428589, "learning_rate": 3.8039215686274516e-07, "loss": 1.9348, "step": 19635 }, { "epoch": 28.86438809261301, "grad_norm": 3.168133497238159, "learning_rate": 3.700980392156863e-07, "loss": 2.1087, "step": 19656 }, { "epoch": 28.89525909592062, "grad_norm": 3.433194875717163, "learning_rate": 3.5980392156862747e-07, "loss": 1.9117, "step": 19677 }, { "epoch": 28.926130099228224, "grad_norm": 3.6745409965515137, "learning_rate": 3.4950980392156865e-07, "loss": 2.0456, "step": 19698 }, { "epoch": 28.957001102535834, "grad_norm": 4.280655384063721, "learning_rate": 3.3921568627450984e-07, "loss": 2.0921, "step": 19719 }, { "epoch": 28.98787210584344, "grad_norm": 2.682467460632324, "learning_rate": 3.2892156862745096e-07, "loss": 2.1163, "step": 19740 }, { "epoch": 29.017640573318634, "grad_norm": 4.8197712898254395, "learning_rate": 3.1862745098039215e-07, "loss": 1.9863, "step": 19761 }, { "epoch": 29.04851157662624, "grad_norm": 3.6110095977783203, "learning_rate": 3.083333333333334e-07, "loss": 2.014, "step": 19782 }, { "epoch": 29.07938257993385, "grad_norm": 4.205295085906982, "learning_rate": 2.9803921568627456e-07, "loss": 1.9606, "step": 19803 }, { "epoch": 29.110253583241455, "grad_norm": 4.5187907218933105, "learning_rate": 2.877450980392157e-07, "loss": 2.0288, "step": 19824 }, { "epoch": 29.14112458654906, "grad_norm": 3.4736719131469727, "learning_rate": 2.7745098039215687e-07, "loss": 1.9578, "step": 19845 }, { "epoch": 29.17199558985667, "grad_norm": 3.5511319637298584, "learning_rate": 2.6715686274509805e-07, "loss": 1.9772, "step": 19866 }, { "epoch": 29.202866593164277, "grad_norm": 2.8322770595550537, "learning_rate": 2.5686274509803924e-07, "loss": 1.9669, "step": 19887 }, { "epoch": 29.233737596471887, "grad_norm": 5.3508477210998535, "learning_rate": 2.465686274509804e-07, "loss": 2.0524, "step": 19908 }, { "epoch": 29.264608599779493, "grad_norm": 4.745840549468994, "learning_rate": 2.362745098039216e-07, "loss": 2.0862, "step": 19929 }, { "epoch": 29.2954796030871, "grad_norm": 4.096988677978516, "learning_rate": 2.2598039215686275e-07, "loss": 1.954, "step": 19950 }, { "epoch": 29.32635060639471, "grad_norm": 2.3860230445861816, "learning_rate": 2.1568627450980394e-07, "loss": 2.0573, "step": 19971 }, { "epoch": 29.357221609702314, "grad_norm": 3.304518461227417, "learning_rate": 2.0539215686274512e-07, "loss": 2.0871, "step": 19992 }, { "epoch": 29.388092613009924, "grad_norm": 3.948188304901123, "learning_rate": 1.9509803921568627e-07, "loss": 2.0126, "step": 20013 }, { "epoch": 29.41896361631753, "grad_norm": 2.696289300918579, "learning_rate": 1.8480392156862748e-07, "loss": 2.0085, "step": 20034 }, { "epoch": 29.449834619625136, "grad_norm": 3.4525928497314453, "learning_rate": 1.7450980392156866e-07, "loss": 2.0798, "step": 20055 }, { "epoch": 29.480705622932746, "grad_norm": 3.622753620147705, "learning_rate": 1.6421568627450982e-07, "loss": 2.0391, "step": 20076 }, { "epoch": 29.511576626240352, "grad_norm": 3.2289085388183594, "learning_rate": 1.53921568627451e-07, "loss": 1.992, "step": 20097 }, { "epoch": 29.54244762954796, "grad_norm": 3.7036259174346924, "learning_rate": 1.4362745098039215e-07, "loss": 2.0901, "step": 20118 }, { "epoch": 29.573318632855568, "grad_norm": 3.6887247562408447, "learning_rate": 1.3333333333333336e-07, "loss": 2.0561, "step": 20139 }, { "epoch": 29.604189636163177, "grad_norm": 2.8597869873046875, "learning_rate": 1.2303921568627452e-07, "loss": 2.072, "step": 20160 }, { "epoch": 29.635060639470783, "grad_norm": 3.587333917617798, "learning_rate": 1.127450980392157e-07, "loss": 2.0183, "step": 20181 }, { "epoch": 29.66593164277839, "grad_norm": 2.980628490447998, "learning_rate": 1.0245098039215687e-07, "loss": 2.0967, "step": 20202 }, { "epoch": 29.696802646086, "grad_norm": 3.0355567932128906, "learning_rate": 9.215686274509805e-08, "loss": 2.0283, "step": 20223 }, { "epoch": 29.727673649393605, "grad_norm": 3.7858340740203857, "learning_rate": 8.186274509803922e-08, "loss": 1.9461, "step": 20244 }, { "epoch": 29.75854465270121, "grad_norm": 3.9610328674316406, "learning_rate": 7.15686274509804e-08, "loss": 2.0268, "step": 20265 }, { "epoch": 29.78941565600882, "grad_norm": 3.412227153778076, "learning_rate": 6.127450980392157e-08, "loss": 1.903, "step": 20286 }, { "epoch": 29.820286659316427, "grad_norm": 3.366365671157837, "learning_rate": 5.098039215686275e-08, "loss": 2.0263, "step": 20307 }, { "epoch": 29.851157662624036, "grad_norm": 3.1891021728515625, "learning_rate": 4.0686274509803924e-08, "loss": 1.9993, "step": 20328 }, { "epoch": 29.882028665931642, "grad_norm": 2.5397629737854004, "learning_rate": 3.03921568627451e-08, "loss": 1.8939, "step": 20349 }, { "epoch": 29.912899669239252, "grad_norm": 3.0380682945251465, "learning_rate": 2.0098039215686278e-08, "loss": 2.0146, "step": 20370 }, { "epoch": 29.943770672546858, "grad_norm": 4.6516828536987305, "learning_rate": 9.803921568627451e-09, "loss": 2.0806, "step": 20391 }, { "epoch": 30.017640573318634, "grad_norm": 2.4761579036712646, "learning_rate": 6.999852941176472e-06, "loss": 1.8591, "step": 20412 }, { "epoch": 30.04851157662624, "grad_norm": 2.9109342098236084, "learning_rate": 6.996764705882354e-06, "loss": 1.9997, "step": 20433 }, { "epoch": 30.07938257993385, "grad_norm": 4.24910831451416, "learning_rate": 6.993676470588236e-06, "loss": 1.9292, "step": 20454 }, { "epoch": 30.110253583241455, "grad_norm": 2.175943374633789, "learning_rate": 6.990588235294118e-06, "loss": 1.9978, "step": 20475 }, { "epoch": 30.14112458654906, "grad_norm": 3.9316229820251465, "learning_rate": 6.9875000000000004e-06, "loss": 1.9205, "step": 20496 }, { "epoch": 30.17199558985667, "grad_norm": 3.8372340202331543, "learning_rate": 6.984411764705883e-06, "loss": 1.9654, "step": 20517 }, { "epoch": 30.202866593164277, "grad_norm": 2.863973617553711, "learning_rate": 6.981323529411765e-06, "loss": 2.0088, "step": 20538 }, { "epoch": 30.233737596471887, "grad_norm": 4.348237991333008, "learning_rate": 6.978235294117648e-06, "loss": 1.9423, "step": 20559 }, { "epoch": 30.264608599779493, "grad_norm": 3.077249050140381, "learning_rate": 6.97514705882353e-06, "loss": 1.965, "step": 20580 }, { "epoch": 30.2954796030871, "grad_norm": 3.171555757522583, "learning_rate": 6.9720588235294125e-06, "loss": 2.0162, "step": 20601 }, { "epoch": 30.32635060639471, "grad_norm": 4.43946647644043, "learning_rate": 6.968970588235294e-06, "loss": 2.1029, "step": 20622 }, { "epoch": 30.357221609702314, "grad_norm": 3.025813341140747, "learning_rate": 6.9658823529411764e-06, "loss": 1.9714, "step": 20643 }, { "epoch": 30.388092613009924, "grad_norm": 3.088214635848999, "learning_rate": 6.962794117647059e-06, "loss": 2.0403, "step": 20664 }, { "epoch": 30.41896361631753, "grad_norm": 3.605456829071045, "learning_rate": 6.959705882352942e-06, "loss": 1.9826, "step": 20685 }, { "epoch": 30.449834619625136, "grad_norm": 2.4986538887023926, "learning_rate": 6.9566176470588245e-06, "loss": 2.0107, "step": 20706 }, { "epoch": 30.480705622932746, "grad_norm": 3.029926061630249, "learning_rate": 6.953529411764707e-06, "loss": 2.0954, "step": 20727 }, { "epoch": 30.511576626240352, "grad_norm": 3.9245054721832275, "learning_rate": 6.950441176470589e-06, "loss": 2.1161, "step": 20748 }, { "epoch": 30.54244762954796, "grad_norm": 3.6836624145507812, "learning_rate": 6.947352941176471e-06, "loss": 1.9969, "step": 20769 }, { "epoch": 30.573318632855568, "grad_norm": 2.994611978530884, "learning_rate": 6.944264705882353e-06, "loss": 2.038, "step": 20790 }, { "epoch": 30.604189636163177, "grad_norm": 2.859304904937744, "learning_rate": 6.941176470588236e-06, "loss": 2.0954, "step": 20811 }, { "epoch": 30.635060639470783, "grad_norm": 2.7563636302948, "learning_rate": 6.938088235294118e-06, "loss": 1.938, "step": 20832 }, { "epoch": 30.66593164277839, "grad_norm": 5.054880142211914, "learning_rate": 6.9350000000000005e-06, "loss": 2.0519, "step": 20853 }, { "epoch": 30.696802646086, "grad_norm": 4.342052936553955, "learning_rate": 6.931911764705883e-06, "loss": 2.0707, "step": 20874 }, { "epoch": 30.727673649393605, "grad_norm": 4.355235576629639, "learning_rate": 6.928823529411765e-06, "loss": 1.9641, "step": 20895 }, { "epoch": 30.75854465270121, "grad_norm": 3.3632757663726807, "learning_rate": 6.925735294117648e-06, "loss": 2.0446, "step": 20916 }, { "epoch": 30.78941565600882, "grad_norm": 3.2357542514801025, "learning_rate": 6.922647058823529e-06, "loss": 2.037, "step": 20937 }, { "epoch": 30.820286659316427, "grad_norm": 2.9211175441741943, "learning_rate": 6.919558823529412e-06, "loss": 2.0354, "step": 20958 }, { "epoch": 30.851157662624036, "grad_norm": 2.8416130542755127, "learning_rate": 6.916470588235294e-06, "loss": 1.9221, "step": 20979 }, { "epoch": 30.882028665931642, "grad_norm": 4.24863862991333, "learning_rate": 6.913382352941177e-06, "loss": 2.1314, "step": 21000 }, { "epoch": 30.912899669239252, "grad_norm": 3.8200039863586426, "learning_rate": 6.910441176470589e-06, "loss": 2.0128, "step": 21021 }, { "epoch": 30.943770672546858, "grad_norm": 3.179262399673462, "learning_rate": 6.907352941176472e-06, "loss": 2.019, "step": 21042 }, { "epoch": 30.974641675854464, "grad_norm": 3.758181095123291, "learning_rate": 6.904264705882353e-06, "loss": 2.0202, "step": 21063 }, { "epoch": 31.004410143329658, "grad_norm": 3.61147141456604, "learning_rate": 6.901176470588235e-06, "loss": 1.9798, "step": 21084 }, { "epoch": 31.035281146637267, "grad_norm": 3.216090440750122, "learning_rate": 6.8980882352941185e-06, "loss": 2.0439, "step": 21105 }, { "epoch": 31.066152149944873, "grad_norm": 3.630655288696289, "learning_rate": 6.895000000000001e-06, "loss": 2.0955, "step": 21126 }, { "epoch": 31.09702315325248, "grad_norm": 2.5508739948272705, "learning_rate": 6.891911764705883e-06, "loss": 2.167, "step": 21147 }, { "epoch": 31.12789415656009, "grad_norm": 3.2604100704193115, "learning_rate": 6.888823529411766e-06, "loss": 1.9946, "step": 21168 }, { "epoch": 31.158765159867695, "grad_norm": 4.532689094543457, "learning_rate": 6.885735294117648e-06, "loss": 2.0445, "step": 21189 }, { "epoch": 31.189636163175305, "grad_norm": 2.7751121520996094, "learning_rate": 6.88264705882353e-06, "loss": 2.0775, "step": 21210 }, { "epoch": 31.22050716648291, "grad_norm": 2.354799270629883, "learning_rate": 6.879558823529412e-06, "loss": 2.0074, "step": 21231 }, { "epoch": 31.251378169790517, "grad_norm": 3.4933903217315674, "learning_rate": 6.8764705882352945e-06, "loss": 2.0126, "step": 21252 }, { "epoch": 31.282249173098126, "grad_norm": 2.627321481704712, "learning_rate": 6.873382352941177e-06, "loss": 1.989, "step": 21273 }, { "epoch": 31.313120176405732, "grad_norm": 5.017119884490967, "learning_rate": 6.870294117647059e-06, "loss": 2.0142, "step": 21294 }, { "epoch": 31.343991179713342, "grad_norm": 3.298305034637451, "learning_rate": 6.867205882352942e-06, "loss": 1.9839, "step": 21315 }, { "epoch": 31.374862183020948, "grad_norm": 3.4790468215942383, "learning_rate": 6.864117647058824e-06, "loss": 2.0382, "step": 21336 }, { "epoch": 31.405733186328554, "grad_norm": 3.4293150901794434, "learning_rate": 6.8610294117647065e-06, "loss": 1.9849, "step": 21357 }, { "epoch": 31.436604189636164, "grad_norm": 2.9297597408294678, "learning_rate": 6.857941176470588e-06, "loss": 2.1046, "step": 21378 }, { "epoch": 31.46747519294377, "grad_norm": 2.890817880630493, "learning_rate": 6.8548529411764705e-06, "loss": 1.8722, "step": 21399 }, { "epoch": 31.49834619625138, "grad_norm": 2.8951117992401123, "learning_rate": 6.851764705882353e-06, "loss": 1.9017, "step": 21420 }, { "epoch": 31.529217199558985, "grad_norm": 4.069543361663818, "learning_rate": 6.848676470588236e-06, "loss": 1.9331, "step": 21441 }, { "epoch": 31.56008820286659, "grad_norm": 2.988917589187622, "learning_rate": 6.8455882352941186e-06, "loss": 2.0361, "step": 21462 }, { "epoch": 31.5909592061742, "grad_norm": 3.507817029953003, "learning_rate": 6.842500000000001e-06, "loss": 2.0005, "step": 21483 }, { "epoch": 31.621830209481807, "grad_norm": 4.201050758361816, "learning_rate": 6.839411764705883e-06, "loss": 2.0231, "step": 21504 }, { "epoch": 31.652701212789417, "grad_norm": 2.8619723320007324, "learning_rate": 6.836323529411766e-06, "loss": 2.0663, "step": 21525 }, { "epoch": 31.683572216097023, "grad_norm": 5.256598472595215, "learning_rate": 6.833235294117647e-06, "loss": 1.9783, "step": 21546 }, { "epoch": 31.71444321940463, "grad_norm": 3.40771222114563, "learning_rate": 6.83014705882353e-06, "loss": 1.9706, "step": 21567 }, { "epoch": 31.74531422271224, "grad_norm": 3.4343996047973633, "learning_rate": 6.827058823529412e-06, "loss": 2.003, "step": 21588 }, { "epoch": 31.776185226019845, "grad_norm": 3.369107961654663, "learning_rate": 6.823970588235295e-06, "loss": 1.8853, "step": 21609 }, { "epoch": 31.807056229327454, "grad_norm": 3.3922946453094482, "learning_rate": 6.820882352941177e-06, "loss": 2.1757, "step": 21630 }, { "epoch": 31.83792723263506, "grad_norm": 4.9912896156311035, "learning_rate": 6.817794117647059e-06, "loss": 2.0339, "step": 21651 }, { "epoch": 31.86879823594267, "grad_norm": 2.6037418842315674, "learning_rate": 6.814705882352942e-06, "loss": 2.0432, "step": 21672 }, { "epoch": 31.899669239250276, "grad_norm": 2.918402910232544, "learning_rate": 6.811617647058823e-06, "loss": 2.0347, "step": 21693 }, { "epoch": 31.930540242557882, "grad_norm": 4.793334007263184, "learning_rate": 6.808529411764706e-06, "loss": 1.9873, "step": 21714 }, { "epoch": 31.96141124586549, "grad_norm": 3.8912668228149414, "learning_rate": 6.805441176470588e-06, "loss": 2.0222, "step": 21735 }, { "epoch": 31.992282249173098, "grad_norm": 3.615077495574951, "learning_rate": 6.8023529411764714e-06, "loss": 2.1043, "step": 21756 }, { "epoch": 32.022050716648295, "grad_norm": 2.69502854347229, "learning_rate": 6.799264705882354e-06, "loss": 1.9, "step": 21777 }, { "epoch": 32.0529217199559, "grad_norm": 2.9249091148376465, "learning_rate": 6.796176470588236e-06, "loss": 2.0133, "step": 21798 }, { "epoch": 32.08379272326351, "grad_norm": 2.774374008178711, "learning_rate": 6.793088235294119e-06, "loss": 1.9631, "step": 21819 }, { "epoch": 32.114663726571116, "grad_norm": 3.269963502883911, "learning_rate": 6.790000000000001e-06, "loss": 2.0575, "step": 21840 }, { "epoch": 32.14553472987872, "grad_norm": 3.5649948120117188, "learning_rate": 6.786911764705883e-06, "loss": 2.0316, "step": 21861 }, { "epoch": 32.17640573318633, "grad_norm": 4.329070091247559, "learning_rate": 6.783823529411765e-06, "loss": 2.0458, "step": 21882 }, { "epoch": 32.20727673649394, "grad_norm": 3.839090585708618, "learning_rate": 6.7807352941176474e-06, "loss": 2.0183, "step": 21903 }, { "epoch": 32.23814773980154, "grad_norm": 3.610092878341675, "learning_rate": 6.77764705882353e-06, "loss": 1.9586, "step": 21924 }, { "epoch": 32.26901874310915, "grad_norm": 3.6197407245635986, "learning_rate": 6.774558823529412e-06, "loss": 1.9854, "step": 21945 }, { "epoch": 32.29988974641676, "grad_norm": 2.5608134269714355, "learning_rate": 6.771470588235295e-06, "loss": 1.978, "step": 21966 }, { "epoch": 32.33076074972437, "grad_norm": 2.387211799621582, "learning_rate": 6.768382352941177e-06, "loss": 2.0468, "step": 21987 }, { "epoch": 32.36163175303197, "grad_norm": 3.14782977104187, "learning_rate": 6.76529411764706e-06, "loss": 1.943, "step": 22008 }, { "epoch": 32.39250275633958, "grad_norm": 4.639213562011719, "learning_rate": 6.762205882352941e-06, "loss": 2.0186, "step": 22029 }, { "epoch": 32.42337375964719, "grad_norm": 2.8931736946105957, "learning_rate": 6.7591176470588234e-06, "loss": 1.9762, "step": 22050 }, { "epoch": 32.454244762954794, "grad_norm": 4.192983150482178, "learning_rate": 6.756029411764707e-06, "loss": 2.0201, "step": 22071 }, { "epoch": 32.4851157662624, "grad_norm": 3.05107045173645, "learning_rate": 6.752941176470589e-06, "loss": 1.9026, "step": 22092 }, { "epoch": 32.51598676957001, "grad_norm": 3.5661211013793945, "learning_rate": 6.7498529411764715e-06, "loss": 1.9628, "step": 22113 }, { "epoch": 32.546857772877615, "grad_norm": 3.187516450881958, "learning_rate": 6.746764705882354e-06, "loss": 1.9693, "step": 22134 }, { "epoch": 32.577728776185225, "grad_norm": 3.1022701263427734, "learning_rate": 6.743676470588236e-06, "loss": 2.0265, "step": 22155 }, { "epoch": 32.608599779492835, "grad_norm": 3.749671459197998, "learning_rate": 6.740588235294118e-06, "loss": 2.0217, "step": 22176 }, { "epoch": 32.639470782800444, "grad_norm": 4.192118167877197, "learning_rate": 6.7375e-06, "loss": 1.9622, "step": 22197 }, { "epoch": 32.67034178610805, "grad_norm": 3.30405855178833, "learning_rate": 6.734411764705883e-06, "loss": 2.0644, "step": 22218 }, { "epoch": 32.701212789415656, "grad_norm": 2.369403600692749, "learning_rate": 6.731323529411765e-06, "loss": 1.9724, "step": 22239 }, { "epoch": 32.732083792723266, "grad_norm": 5.039781093597412, "learning_rate": 6.7282352941176475e-06, "loss": 2.0533, "step": 22260 }, { "epoch": 32.76295479603087, "grad_norm": 3.238032579421997, "learning_rate": 6.72514705882353e-06, "loss": 2.0327, "step": 22281 }, { "epoch": 32.79382579933848, "grad_norm": 2.749037265777588, "learning_rate": 6.722058823529412e-06, "loss": 2.0377, "step": 22302 }, { "epoch": 32.82469680264609, "grad_norm": 3.5611190795898438, "learning_rate": 6.718970588235295e-06, "loss": 2.0168, "step": 22323 }, { "epoch": 32.85556780595369, "grad_norm": 2.7121567726135254, "learning_rate": 6.715882352941176e-06, "loss": 2.0194, "step": 22344 }, { "epoch": 32.8864388092613, "grad_norm": 3.5365748405456543, "learning_rate": 6.712794117647059e-06, "loss": 2.0038, "step": 22365 }, { "epoch": 32.91730981256891, "grad_norm": 4.373039722442627, "learning_rate": 6.709705882352941e-06, "loss": 2.0618, "step": 22386 }, { "epoch": 32.94818081587652, "grad_norm": 3.704794406890869, "learning_rate": 6.706617647058824e-06, "loss": 2.1195, "step": 22407 }, { "epoch": 32.97905181918412, "grad_norm": 2.9559192657470703, "learning_rate": 6.703529411764707e-06, "loss": 2.0252, "step": 22428 }, { "epoch": 33.008820286659315, "grad_norm": 4.496978282928467, "learning_rate": 6.700441176470589e-06, "loss": 2.0325, "step": 22449 }, { "epoch": 33.039691289966925, "grad_norm": 3.917959213256836, "learning_rate": 6.697352941176472e-06, "loss": 2.0231, "step": 22470 }, { "epoch": 33.070562293274534, "grad_norm": 4.201647758483887, "learning_rate": 6.694264705882354e-06, "loss": 2.0468, "step": 22491 }, { "epoch": 33.10143329658214, "grad_norm": 3.741476058959961, "learning_rate": 6.6911764705882356e-06, "loss": 2.0433, "step": 22512 }, { "epoch": 33.132304299889746, "grad_norm": 4.140647888183594, "learning_rate": 6.688088235294118e-06, "loss": 1.9797, "step": 22533 }, { "epoch": 33.163175303197356, "grad_norm": 3.5432393550872803, "learning_rate": 6.685e-06, "loss": 2.051, "step": 22554 }, { "epoch": 33.19404630650496, "grad_norm": 2.7542972564697266, "learning_rate": 6.681911764705883e-06, "loss": 1.9625, "step": 22575 }, { "epoch": 33.22491730981257, "grad_norm": 2.930532455444336, "learning_rate": 6.678823529411765e-06, "loss": 2.114, "step": 22596 }, { "epoch": 33.25578831312018, "grad_norm": 3.2630701065063477, "learning_rate": 6.675735294117648e-06, "loss": 2.0613, "step": 22617 }, { "epoch": 33.28665931642779, "grad_norm": 2.5393776893615723, "learning_rate": 6.67264705882353e-06, "loss": 2.0547, "step": 22638 }, { "epoch": 33.31753031973539, "grad_norm": 2.438340187072754, "learning_rate": 6.669558823529413e-06, "loss": 2.0099, "step": 22659 }, { "epoch": 33.348401323043, "grad_norm": 3.6014482975006104, "learning_rate": 6.666470588235294e-06, "loss": 2.027, "step": 22680 }, { "epoch": 33.37927232635061, "grad_norm": 3.6766440868377686, "learning_rate": 6.663382352941176e-06, "loss": 1.8853, "step": 22701 }, { "epoch": 33.41014332965821, "grad_norm": 4.324038505554199, "learning_rate": 6.66029411764706e-06, "loss": 2.0759, "step": 22722 }, { "epoch": 33.44101433296582, "grad_norm": 3.3692779541015625, "learning_rate": 6.657205882352942e-06, "loss": 1.9839, "step": 22743 }, { "epoch": 33.47188533627343, "grad_norm": 2.354776620864868, "learning_rate": 6.6541176470588244e-06, "loss": 1.9949, "step": 22764 }, { "epoch": 33.50275633958103, "grad_norm": 2.9648211002349854, "learning_rate": 6.651029411764707e-06, "loss": 2.0207, "step": 22785 }, { "epoch": 33.53362734288864, "grad_norm": 4.958619117736816, "learning_rate": 6.647941176470589e-06, "loss": 1.9691, "step": 22806 }, { "epoch": 33.56449834619625, "grad_norm": 3.480372905731201, "learning_rate": 6.644852941176471e-06, "loss": 1.9543, "step": 22827 }, { "epoch": 33.59536934950386, "grad_norm": 3.0077171325683594, "learning_rate": 6.641764705882353e-06, "loss": 1.9544, "step": 22848 }, { "epoch": 33.626240352811465, "grad_norm": 2.513927936553955, "learning_rate": 6.638676470588236e-06, "loss": 2.0584, "step": 22869 }, { "epoch": 33.657111356119074, "grad_norm": 2.9671051502227783, "learning_rate": 6.635588235294118e-06, "loss": 2.1313, "step": 22890 }, { "epoch": 33.687982359426684, "grad_norm": 5.765228748321533, "learning_rate": 6.6325000000000004e-06, "loss": 1.9766, "step": 22911 }, { "epoch": 33.718853362734286, "grad_norm": 2.9940590858459473, "learning_rate": 6.629411764705883e-06, "loss": 1.8934, "step": 22932 }, { "epoch": 33.749724366041896, "grad_norm": 4.542492866516113, "learning_rate": 6.626323529411765e-06, "loss": 2.0946, "step": 22953 }, { "epoch": 33.780595369349506, "grad_norm": 2.6231439113616943, "learning_rate": 6.6232352941176485e-06, "loss": 2.0406, "step": 22974 }, { "epoch": 33.81146637265711, "grad_norm": 2.8544082641601562, "learning_rate": 6.620147058823529e-06, "loss": 2.0158, "step": 22995 }, { "epoch": 33.84233737596472, "grad_norm": 2.867023229598999, "learning_rate": 6.617058823529412e-06, "loss": 1.86, "step": 23016 }, { "epoch": 33.87320837927233, "grad_norm": 3.776179790496826, "learning_rate": 6.613970588235295e-06, "loss": 2.0347, "step": 23037 }, { "epoch": 33.90407938257994, "grad_norm": 4.177669525146484, "learning_rate": 6.610882352941177e-06, "loss": 2.068, "step": 23058 }, { "epoch": 33.93495038588754, "grad_norm": 3.9378044605255127, "learning_rate": 6.60779411764706e-06, "loss": 1.8476, "step": 23079 }, { "epoch": 33.96582138919515, "grad_norm": 7.767055988311768, "learning_rate": 6.604705882352942e-06, "loss": 1.8984, "step": 23100 }, { "epoch": 33.99669239250276, "grad_norm": 3.3104944229125977, "learning_rate": 6.6016176470588245e-06, "loss": 2.0702, "step": 23121 }, { "epoch": 34.02646085997795, "grad_norm": 3.0422401428222656, "learning_rate": 6.598529411764707e-06, "loss": 1.8251, "step": 23142 }, { "epoch": 34.057331863285555, "grad_norm": 4.367411136627197, "learning_rate": 6.5954411764705885e-06, "loss": 2.103, "step": 23163 }, { "epoch": 34.088202866593164, "grad_norm": 3.526519536972046, "learning_rate": 6.592352941176471e-06, "loss": 2.1468, "step": 23184 }, { "epoch": 34.119073869900774, "grad_norm": 3.5703179836273193, "learning_rate": 6.589264705882353e-06, "loss": 1.9778, "step": 23205 }, { "epoch": 34.149944873208376, "grad_norm": 4.836939334869385, "learning_rate": 6.586176470588236e-06, "loss": 1.9909, "step": 23226 }, { "epoch": 34.180815876515986, "grad_norm": 3.2862203121185303, "learning_rate": 6.583088235294118e-06, "loss": 1.8582, "step": 23247 }, { "epoch": 34.211686879823596, "grad_norm": 2.9707770347595215, "learning_rate": 6.5800000000000005e-06, "loss": 2.0413, "step": 23268 }, { "epoch": 34.242557883131205, "grad_norm": 3.828932523727417, "learning_rate": 6.576911764705883e-06, "loss": 2.0933, "step": 23289 }, { "epoch": 34.27342888643881, "grad_norm": 4.011388301849365, "learning_rate": 6.5738235294117645e-06, "loss": 2.0613, "step": 23310 }, { "epoch": 34.30429988974642, "grad_norm": 4.209685325622559, "learning_rate": 6.570735294117647e-06, "loss": 2.0407, "step": 23331 }, { "epoch": 34.33517089305403, "grad_norm": 3.3362555503845215, "learning_rate": 6.567647058823529e-06, "loss": 1.9671, "step": 23352 }, { "epoch": 34.36604189636163, "grad_norm": 2.9414002895355225, "learning_rate": 6.5645588235294126e-06, "loss": 1.9817, "step": 23373 }, { "epoch": 34.39691289966924, "grad_norm": 3.516315221786499, "learning_rate": 6.561470588235295e-06, "loss": 1.9951, "step": 23394 }, { "epoch": 34.42778390297685, "grad_norm": 2.6435933113098145, "learning_rate": 6.558382352941177e-06, "loss": 2.0764, "step": 23415 }, { "epoch": 34.45865490628445, "grad_norm": 5.475170612335205, "learning_rate": 6.55529411764706e-06, "loss": 1.9748, "step": 23436 }, { "epoch": 34.48952590959206, "grad_norm": 4.4045586585998535, "learning_rate": 6.552205882352942e-06, "loss": 2.0832, "step": 23457 }, { "epoch": 34.52039691289967, "grad_norm": 4.420597553253174, "learning_rate": 6.549117647058824e-06, "loss": 2.0431, "step": 23478 }, { "epoch": 34.55126791620728, "grad_norm": 3.511388063430786, "learning_rate": 6.546029411764706e-06, "loss": 1.9594, "step": 23499 }, { "epoch": 34.58213891951488, "grad_norm": 3.0359601974487305, "learning_rate": 6.5430882352941185e-06, "loss": 2.0504, "step": 23520 }, { "epoch": 34.61300992282249, "grad_norm": 3.008206844329834, "learning_rate": 6.540000000000001e-06, "loss": 1.9661, "step": 23541 }, { "epoch": 34.6438809261301, "grad_norm": 2.523923873901367, "learning_rate": 6.536911764705883e-06, "loss": 1.9471, "step": 23562 }, { "epoch": 34.674751929437704, "grad_norm": 3.656031608581543, "learning_rate": 6.533823529411766e-06, "loss": 1.9858, "step": 23583 }, { "epoch": 34.705622932745314, "grad_norm": 4.510978698730469, "learning_rate": 6.530735294117647e-06, "loss": 1.9141, "step": 23604 }, { "epoch": 34.73649393605292, "grad_norm": 2.750349283218384, "learning_rate": 6.52764705882353e-06, "loss": 2.0713, "step": 23625 }, { "epoch": 34.767364939360526, "grad_norm": 3.480217456817627, "learning_rate": 6.524558823529412e-06, "loss": 1.9356, "step": 23646 }, { "epoch": 34.798235942668136, "grad_norm": 3.882751226425171, "learning_rate": 6.5214705882352945e-06, "loss": 1.8708, "step": 23667 }, { "epoch": 34.829106945975745, "grad_norm": 5.057909965515137, "learning_rate": 6.518382352941177e-06, "loss": 2.076, "step": 23688 }, { "epoch": 34.859977949283355, "grad_norm": 3.6105921268463135, "learning_rate": 6.515294117647059e-06, "loss": 2.0175, "step": 23709 }, { "epoch": 34.89084895259096, "grad_norm": 2.324753761291504, "learning_rate": 6.512205882352942e-06, "loss": 2.0057, "step": 23730 }, { "epoch": 34.92171995589857, "grad_norm": 3.9272208213806152, "learning_rate": 6.509117647058823e-06, "loss": 1.9724, "step": 23751 }, { "epoch": 34.95259095920618, "grad_norm": 3.2283363342285156, "learning_rate": 6.506029411764706e-06, "loss": 2.1133, "step": 23772 }, { "epoch": 34.98346196251378, "grad_norm": 3.2121036052703857, "learning_rate": 6.502941176470589e-06, "loss": 1.9388, "step": 23793 }, { "epoch": 35.01323042998897, "grad_norm": 2.6813344955444336, "learning_rate": 6.499852941176471e-06, "loss": 1.8403, "step": 23814 }, { "epoch": 35.04410143329658, "grad_norm": 3.698582172393799, "learning_rate": 6.496764705882354e-06, "loss": 2.0815, "step": 23835 }, { "epoch": 35.07497243660419, "grad_norm": 4.471309185028076, "learning_rate": 6.493676470588236e-06, "loss": 1.9698, "step": 23856 }, { "epoch": 35.105843439911794, "grad_norm": 5.234856128692627, "learning_rate": 6.490588235294119e-06, "loss": 1.9343, "step": 23877 }, { "epoch": 35.136714443219404, "grad_norm": 3.830604314804077, "learning_rate": 6.487500000000001e-06, "loss": 2.1021, "step": 23898 }, { "epoch": 35.16758544652701, "grad_norm": 3.5014662742614746, "learning_rate": 6.4844117647058826e-06, "loss": 2.0307, "step": 23919 }, { "epoch": 35.19845644983462, "grad_norm": 6.585451126098633, "learning_rate": 6.481323529411765e-06, "loss": 2.0543, "step": 23940 }, { "epoch": 35.229327453142226, "grad_norm": 4.162327289581299, "learning_rate": 6.478235294117647e-06, "loss": 2.0375, "step": 23961 }, { "epoch": 35.260198456449835, "grad_norm": 3.6847376823425293, "learning_rate": 6.47514705882353e-06, "loss": 2.0211, "step": 23982 }, { "epoch": 35.291069459757445, "grad_norm": 2.7664706707000732, "learning_rate": 6.472058823529412e-06, "loss": 1.8568, "step": 24003 }, { "epoch": 35.32194046306505, "grad_norm": 3.4318976402282715, "learning_rate": 6.468970588235295e-06, "loss": 2.113, "step": 24024 }, { "epoch": 35.35281146637266, "grad_norm": 3.4627935886383057, "learning_rate": 6.465882352941177e-06, "loss": 1.9953, "step": 24045 }, { "epoch": 35.38368246968027, "grad_norm": 4.259135723114014, "learning_rate": 6.46279411764706e-06, "loss": 1.98, "step": 24066 }, { "epoch": 35.41455347298787, "grad_norm": 3.5053093433380127, "learning_rate": 6.459705882352941e-06, "loss": 2.0259, "step": 24087 }, { "epoch": 35.44542447629548, "grad_norm": 4.08209228515625, "learning_rate": 6.456617647058823e-06, "loss": 2.075, "step": 24108 }, { "epoch": 35.47629547960309, "grad_norm": 4.871661186218262, "learning_rate": 6.453529411764707e-06, "loss": 2.0855, "step": 24129 }, { "epoch": 35.5071664829107, "grad_norm": 2.7095463275909424, "learning_rate": 6.450441176470589e-06, "loss": 2.1123, "step": 24150 }, { "epoch": 35.5380374862183, "grad_norm": 3.0286478996276855, "learning_rate": 6.4473529411764714e-06, "loss": 1.8616, "step": 24171 }, { "epoch": 35.56890848952591, "grad_norm": 3.121309518814087, "learning_rate": 6.444264705882354e-06, "loss": 1.9944, "step": 24192 }, { "epoch": 35.59977949283352, "grad_norm": 4.2155890464782715, "learning_rate": 6.441176470588236e-06, "loss": 2.0336, "step": 24213 }, { "epoch": 35.63065049614112, "grad_norm": 3.331258773803711, "learning_rate": 6.438088235294118e-06, "loss": 1.9416, "step": 24234 }, { "epoch": 35.66152149944873, "grad_norm": 2.801480293273926, "learning_rate": 6.435e-06, "loss": 1.9753, "step": 24255 }, { "epoch": 35.69239250275634, "grad_norm": 2.368448257446289, "learning_rate": 6.431911764705883e-06, "loss": 2.0051, "step": 24276 }, { "epoch": 35.723263506063944, "grad_norm": 3.0083751678466797, "learning_rate": 6.428823529411765e-06, "loss": 2.0122, "step": 24297 }, { "epoch": 35.75413450937155, "grad_norm": 3.81593918800354, "learning_rate": 6.4257352941176474e-06, "loss": 2.0014, "step": 24318 }, { "epoch": 35.78500551267916, "grad_norm": 3.7188968658447266, "learning_rate": 6.42264705882353e-06, "loss": 2.0187, "step": 24339 }, { "epoch": 35.81587651598677, "grad_norm": 3.9990670680999756, "learning_rate": 6.419558823529412e-06, "loss": 2.0372, "step": 24360 }, { "epoch": 35.846747519294375, "grad_norm": 3.62080979347229, "learning_rate": 6.4164705882352955e-06, "loss": 2.0127, "step": 24381 }, { "epoch": 35.877618522601985, "grad_norm": 3.222687244415283, "learning_rate": 6.413382352941176e-06, "loss": 2.0441, "step": 24402 }, { "epoch": 35.908489525909594, "grad_norm": 3.2056634426116943, "learning_rate": 6.410294117647059e-06, "loss": 1.9505, "step": 24423 }, { "epoch": 35.9393605292172, "grad_norm": 4.491532325744629, "learning_rate": 6.407205882352942e-06, "loss": 1.9731, "step": 24444 }, { "epoch": 35.97023153252481, "grad_norm": 2.7524030208587646, "learning_rate": 6.404117647058824e-06, "loss": 1.927, "step": 24465 }, { "epoch": 36.0, "grad_norm": 2.59549617767334, "learning_rate": 6.401029411764707e-06, "loss": 2.0331, "step": 24486 }, { "epoch": 36.03087100330761, "grad_norm": 3.0841493606567383, "learning_rate": 6.397941176470589e-06, "loss": 2.0174, "step": 24507 }, { "epoch": 36.06174200661521, "grad_norm": 3.171637773513794, "learning_rate": 6.3948529411764715e-06, "loss": 2.0846, "step": 24528 }, { "epoch": 36.09261300992282, "grad_norm": 5.8805365562438965, "learning_rate": 6.391764705882354e-06, "loss": 1.911, "step": 24549 }, { "epoch": 36.12348401323043, "grad_norm": 3.5089828968048096, "learning_rate": 6.3886764705882355e-06, "loss": 1.8698, "step": 24570 }, { "epoch": 36.15435501653804, "grad_norm": 3.0527853965759277, "learning_rate": 6.385588235294118e-06, "loss": 2.0197, "step": 24591 }, { "epoch": 36.18522601984564, "grad_norm": 4.5389604568481445, "learning_rate": 6.3825e-06, "loss": 1.9922, "step": 24612 }, { "epoch": 36.21609702315325, "grad_norm": 2.793590545654297, "learning_rate": 6.379411764705883e-06, "loss": 1.9609, "step": 24633 }, { "epoch": 36.24696802646086, "grad_norm": 2.3130569458007812, "learning_rate": 6.376323529411765e-06, "loss": 1.9982, "step": 24654 }, { "epoch": 36.277839029768465, "grad_norm": 4.0149149894714355, "learning_rate": 6.3732352941176475e-06, "loss": 1.9811, "step": 24675 }, { "epoch": 36.308710033076075, "grad_norm": 3.9005136489868164, "learning_rate": 6.37014705882353e-06, "loss": 1.9955, "step": 24696 }, { "epoch": 36.339581036383684, "grad_norm": 4.044854164123535, "learning_rate": 6.3670588235294115e-06, "loss": 1.9582, "step": 24717 }, { "epoch": 36.37045203969129, "grad_norm": 4.300441265106201, "learning_rate": 6.363970588235294e-06, "loss": 2.0181, "step": 24738 }, { "epoch": 36.4013230429989, "grad_norm": 3.8213846683502197, "learning_rate": 6.360882352941177e-06, "loss": 2.0198, "step": 24759 }, { "epoch": 36.432194046306506, "grad_norm": 4.3118181228637695, "learning_rate": 6.3577941176470596e-06, "loss": 1.9621, "step": 24780 }, { "epoch": 36.463065049614116, "grad_norm": 4.002730369567871, "learning_rate": 6.354705882352942e-06, "loss": 2.0374, "step": 24801 }, { "epoch": 36.49393605292172, "grad_norm": 4.114466667175293, "learning_rate": 6.351617647058824e-06, "loss": 2.027, "step": 24822 }, { "epoch": 36.52480705622933, "grad_norm": 3.4697704315185547, "learning_rate": 6.348529411764707e-06, "loss": 2.0031, "step": 24843 }, { "epoch": 36.55567805953694, "grad_norm": 3.910203695297241, "learning_rate": 6.345441176470589e-06, "loss": 1.9674, "step": 24864 }, { "epoch": 36.58654906284454, "grad_norm": 2.3402340412139893, "learning_rate": 6.342352941176471e-06, "loss": 1.9509, "step": 24885 }, { "epoch": 36.61742006615215, "grad_norm": 3.1056647300720215, "learning_rate": 6.339264705882353e-06, "loss": 1.8898, "step": 24906 }, { "epoch": 36.64829106945976, "grad_norm": 3.242011547088623, "learning_rate": 6.3361764705882356e-06, "loss": 2.0552, "step": 24927 }, { "epoch": 36.67916207276736, "grad_norm": 3.33284592628479, "learning_rate": 6.333088235294118e-06, "loss": 1.9494, "step": 24948 }, { "epoch": 36.71003307607497, "grad_norm": 3.71907377243042, "learning_rate": 6.33e-06, "loss": 1.9809, "step": 24969 }, { "epoch": 36.74090407938258, "grad_norm": 3.4426217079162598, "learning_rate": 6.326911764705883e-06, "loss": 2.0723, "step": 24990 }, { "epoch": 36.77177508269019, "grad_norm": 3.634838104248047, "learning_rate": 6.323823529411765e-06, "loss": 2.0326, "step": 25011 }, { "epoch": 36.80264608599779, "grad_norm": 5.762576580047607, "learning_rate": 6.3207352941176484e-06, "loss": 2.1398, "step": 25032 }, { "epoch": 36.8335170893054, "grad_norm": 2.3509230613708496, "learning_rate": 6.317647058823529e-06, "loss": 1.9725, "step": 25053 }, { "epoch": 36.86438809261301, "grad_norm": 3.5424084663391113, "learning_rate": 6.3145588235294116e-06, "loss": 1.9878, "step": 25074 }, { "epoch": 36.895259095920615, "grad_norm": 3.1477949619293213, "learning_rate": 6.311470588235295e-06, "loss": 2.0209, "step": 25095 }, { "epoch": 36.926130099228224, "grad_norm": 3.136260986328125, "learning_rate": 6.308382352941177e-06, "loss": 2.0021, "step": 25116 }, { "epoch": 36.957001102535834, "grad_norm": 3.1308465003967285, "learning_rate": 6.30529411764706e-06, "loss": 1.9501, "step": 25137 }, { "epoch": 36.98787210584344, "grad_norm": 2.8336098194122314, "learning_rate": 6.302205882352942e-06, "loss": 1.9998, "step": 25158 }, { "epoch": 37.01764057331863, "grad_norm": 3.6999354362487793, "learning_rate": 6.2991176470588245e-06, "loss": 1.9852, "step": 25179 }, { "epoch": 37.04851157662624, "grad_norm": 4.48907470703125, "learning_rate": 6.296029411764707e-06, "loss": 1.9793, "step": 25200 }, { "epoch": 37.07938257993385, "grad_norm": 2.8020403385162354, "learning_rate": 6.292941176470588e-06, "loss": 2.0722, "step": 25221 }, { "epoch": 37.11025358324146, "grad_norm": 2.9153811931610107, "learning_rate": 6.289852941176471e-06, "loss": 1.9729, "step": 25242 }, { "epoch": 37.14112458654906, "grad_norm": 2.3451945781707764, "learning_rate": 6.286764705882353e-06, "loss": 1.9615, "step": 25263 }, { "epoch": 37.17199558985667, "grad_norm": 2.962693929672241, "learning_rate": 6.283676470588236e-06, "loss": 1.9996, "step": 25284 }, { "epoch": 37.20286659316428, "grad_norm": 3.69085955619812, "learning_rate": 6.280588235294118e-06, "loss": 1.9731, "step": 25305 }, { "epoch": 37.23373759647188, "grad_norm": 3.0895442962646484, "learning_rate": 6.2775000000000005e-06, "loss": 2.031, "step": 25326 }, { "epoch": 37.26460859977949, "grad_norm": 2.977660894393921, "learning_rate": 6.274411764705884e-06, "loss": 2.0157, "step": 25347 }, { "epoch": 37.2954796030871, "grad_norm": 4.342935085296631, "learning_rate": 6.2713235294117644e-06, "loss": 1.9671, "step": 25368 }, { "epoch": 37.326350606394705, "grad_norm": 3.017965316772461, "learning_rate": 6.268235294117647e-06, "loss": 1.9712, "step": 25389 }, { "epoch": 37.357221609702314, "grad_norm": 3.4960055351257324, "learning_rate": 6.26514705882353e-06, "loss": 2.0319, "step": 25410 }, { "epoch": 37.388092613009924, "grad_norm": 3.372668743133545, "learning_rate": 6.2620588235294125e-06, "loss": 1.9695, "step": 25431 }, { "epoch": 37.418963616317534, "grad_norm": 3.3969757556915283, "learning_rate": 6.258970588235295e-06, "loss": 1.9509, "step": 25452 }, { "epoch": 37.449834619625136, "grad_norm": 2.955012083053589, "learning_rate": 6.255882352941177e-06, "loss": 2.0859, "step": 25473 }, { "epoch": 37.480705622932746, "grad_norm": 2.79205322265625, "learning_rate": 6.25279411764706e-06, "loss": 1.9329, "step": 25494 }, { "epoch": 37.511576626240355, "grad_norm": 3.1319580078125, "learning_rate": 6.249705882352942e-06, "loss": 2.006, "step": 25515 }, { "epoch": 37.54244762954796, "grad_norm": 2.972088575363159, "learning_rate": 6.246617647058824e-06, "loss": 2.095, "step": 25536 }, { "epoch": 37.57331863285557, "grad_norm": 3.2252204418182373, "learning_rate": 6.243529411764706e-06, "loss": 1.9979, "step": 25557 }, { "epoch": 37.60418963616318, "grad_norm": 4.572553634643555, "learning_rate": 6.2404411764705885e-06, "loss": 1.9358, "step": 25578 }, { "epoch": 37.63506063947078, "grad_norm": 3.2885162830352783, "learning_rate": 6.237352941176471e-06, "loss": 1.9421, "step": 25599 }, { "epoch": 37.66593164277839, "grad_norm": 3.689624309539795, "learning_rate": 6.234264705882353e-06, "loss": 1.9048, "step": 25620 }, { "epoch": 37.696802646086, "grad_norm": 3.8721468448638916, "learning_rate": 6.231176470588236e-06, "loss": 1.8469, "step": 25641 }, { "epoch": 37.72767364939361, "grad_norm": 2.821014881134033, "learning_rate": 6.228088235294118e-06, "loss": 2.0719, "step": 25662 }, { "epoch": 37.75854465270121, "grad_norm": 3.6164093017578125, "learning_rate": 6.225000000000001e-06, "loss": 1.9861, "step": 25683 }, { "epoch": 37.78941565600882, "grad_norm": 3.64155912399292, "learning_rate": 6.221911764705882e-06, "loss": 1.9798, "step": 25704 }, { "epoch": 37.82028665931643, "grad_norm": 3.437202215194702, "learning_rate": 6.2188235294117645e-06, "loss": 2.0418, "step": 25725 }, { "epoch": 37.85115766262403, "grad_norm": 3.6846747398376465, "learning_rate": 6.215735294117648e-06, "loss": 1.9596, "step": 25746 }, { "epoch": 37.88202866593164, "grad_norm": 2.5486693382263184, "learning_rate": 6.21264705882353e-06, "loss": 2.0202, "step": 25767 }, { "epoch": 37.91289966923925, "grad_norm": 2.465639591217041, "learning_rate": 6.2095588235294126e-06, "loss": 1.8695, "step": 25788 }, { "epoch": 37.943770672546854, "grad_norm": 4.062413692474365, "learning_rate": 6.206617647058823e-06, "loss": 1.9472, "step": 25809 }, { "epoch": 37.974641675854464, "grad_norm": 3.5824201107025146, "learning_rate": 6.203529411764706e-06, "loss": 1.9689, "step": 25830 }, { "epoch": 38.00441014332966, "grad_norm": 2.764157772064209, "learning_rate": 6.200441176470589e-06, "loss": 1.861, "step": 25851 }, { "epoch": 38.03528114663727, "grad_norm": 3.95149827003479, "learning_rate": 6.197352941176471e-06, "loss": 2.0063, "step": 25872 }, { "epoch": 38.06615214994488, "grad_norm": 2.322077989578247, "learning_rate": 6.194264705882354e-06, "loss": 2.0551, "step": 25893 }, { "epoch": 38.09702315325248, "grad_norm": 3.8312318325042725, "learning_rate": 6.191176470588236e-06, "loss": 1.8719, "step": 25914 }, { "epoch": 38.12789415656009, "grad_norm": 3.4052531719207764, "learning_rate": 6.1880882352941185e-06, "loss": 2.0198, "step": 25935 }, { "epoch": 38.1587651598677, "grad_norm": 3.501736640930176, "learning_rate": 6.185000000000001e-06, "loss": 2.0684, "step": 25956 }, { "epoch": 38.1896361631753, "grad_norm": 3.392531156539917, "learning_rate": 6.1819117647058825e-06, "loss": 1.9974, "step": 25977 }, { "epoch": 38.22050716648291, "grad_norm": 3.531236410140991, "learning_rate": 6.178823529411765e-06, "loss": 2.0895, "step": 25998 }, { "epoch": 38.25137816979052, "grad_norm": 3.4448482990264893, "learning_rate": 6.175735294117647e-06, "loss": 1.9701, "step": 26019 }, { "epoch": 38.28224917309812, "grad_norm": 2.942032814025879, "learning_rate": 6.17264705882353e-06, "loss": 2.1057, "step": 26040 }, { "epoch": 38.31312017640573, "grad_norm": 2.9509127140045166, "learning_rate": 6.169558823529412e-06, "loss": 2.0718, "step": 26061 }, { "epoch": 38.34399117971334, "grad_norm": 3.5521368980407715, "learning_rate": 6.1664705882352945e-06, "loss": 2.0542, "step": 26082 }, { "epoch": 38.37486218302095, "grad_norm": 4.131003379821777, "learning_rate": 6.163382352941178e-06, "loss": 2.0091, "step": 26103 }, { "epoch": 38.405733186328554, "grad_norm": 4.656850814819336, "learning_rate": 6.16029411764706e-06, "loss": 1.9601, "step": 26124 }, { "epoch": 38.436604189636164, "grad_norm": 2.944470167160034, "learning_rate": 6.157205882352941e-06, "loss": 1.9912, "step": 26145 }, { "epoch": 38.46747519294377, "grad_norm": 2.454280138015747, "learning_rate": 6.154117647058824e-06, "loss": 1.9536, "step": 26166 }, { "epoch": 38.498346196251376, "grad_norm": 7.892366409301758, "learning_rate": 6.1510294117647066e-06, "loss": 1.9889, "step": 26187 }, { "epoch": 38.529217199558985, "grad_norm": 4.039519786834717, "learning_rate": 6.147941176470589e-06, "loss": 1.9673, "step": 26208 }, { "epoch": 38.560088202866595, "grad_norm": 3.591512441635132, "learning_rate": 6.144852941176471e-06, "loss": 2.011, "step": 26229 }, { "epoch": 38.5909592061742, "grad_norm": 4.898961067199707, "learning_rate": 6.141764705882354e-06, "loss": 1.933, "step": 26250 }, { "epoch": 38.62183020948181, "grad_norm": 5.660459995269775, "learning_rate": 6.138676470588236e-06, "loss": 1.8462, "step": 26271 }, { "epoch": 38.65270121278942, "grad_norm": 4.260655879974365, "learning_rate": 6.135588235294118e-06, "loss": 1.9629, "step": 26292 }, { "epoch": 38.683572216097026, "grad_norm": 5.660893440246582, "learning_rate": 6.1325e-06, "loss": 2.102, "step": 26313 }, { "epoch": 38.71444321940463, "grad_norm": 2.3763129711151123, "learning_rate": 6.1294117647058826e-06, "loss": 2.0179, "step": 26334 }, { "epoch": 38.74531422271224, "grad_norm": 3.9975109100341797, "learning_rate": 6.126323529411765e-06, "loss": 2.0241, "step": 26355 }, { "epoch": 38.77618522601985, "grad_norm": 4.538069248199463, "learning_rate": 6.123235294117647e-06, "loss": 1.9141, "step": 26376 }, { "epoch": 38.80705622932745, "grad_norm": 3.5908985137939453, "learning_rate": 6.12014705882353e-06, "loss": 2.0399, "step": 26397 }, { "epoch": 38.83792723263506, "grad_norm": 2.7689132690429688, "learning_rate": 6.117058823529412e-06, "loss": 2.069, "step": 26418 }, { "epoch": 38.86879823594267, "grad_norm": 3.2389492988586426, "learning_rate": 6.1139705882352954e-06, "loss": 1.9708, "step": 26439 }, { "epoch": 38.89966923925027, "grad_norm": 3.224062442779541, "learning_rate": 6.110882352941176e-06, "loss": 1.9134, "step": 26460 }, { "epoch": 38.93054024255788, "grad_norm": 3.3063061237335205, "learning_rate": 6.1077941176470586e-06, "loss": 2.0072, "step": 26481 }, { "epoch": 38.96141124586549, "grad_norm": 3.1639840602874756, "learning_rate": 6.104705882352942e-06, "loss": 2.0804, "step": 26502 }, { "epoch": 38.9922822491731, "grad_norm": 3.2271788120269775, "learning_rate": 6.101617647058824e-06, "loss": 1.9378, "step": 26523 }, { "epoch": 39.022050716648295, "grad_norm": 4.328149318695068, "learning_rate": 6.098529411764707e-06, "loss": 1.8977, "step": 26544 }, { "epoch": 39.0529217199559, "grad_norm": 2.9258031845092773, "learning_rate": 6.095441176470589e-06, "loss": 1.9626, "step": 26565 }, { "epoch": 39.08379272326351, "grad_norm": 3.8277063369750977, "learning_rate": 6.0923529411764715e-06, "loss": 2.0564, "step": 26586 }, { "epoch": 39.114663726571116, "grad_norm": 2.8944478034973145, "learning_rate": 6.089264705882354e-06, "loss": 1.9199, "step": 26607 }, { "epoch": 39.14553472987872, "grad_norm": 4.018239974975586, "learning_rate": 6.086176470588235e-06, "loss": 1.978, "step": 26628 }, { "epoch": 39.17640573318633, "grad_norm": 3.7145819664001465, "learning_rate": 6.083088235294118e-06, "loss": 1.9724, "step": 26649 }, { "epoch": 39.20727673649394, "grad_norm": 2.8322203159332275, "learning_rate": 6.08e-06, "loss": 2.0743, "step": 26670 }, { "epoch": 39.23814773980154, "grad_norm": 3.1907200813293457, "learning_rate": 6.076911764705883e-06, "loss": 2.0229, "step": 26691 }, { "epoch": 39.26901874310915, "grad_norm": 3.4734530448913574, "learning_rate": 6.073823529411765e-06, "loss": 2.0174, "step": 26712 }, { "epoch": 39.29988974641676, "grad_norm": 4.05942440032959, "learning_rate": 6.0707352941176475e-06, "loss": 1.9095, "step": 26733 }, { "epoch": 39.33076074972437, "grad_norm": 3.2065370082855225, "learning_rate": 6.067647058823531e-06, "loss": 2.0017, "step": 26754 }, { "epoch": 39.36163175303197, "grad_norm": 3.031324625015259, "learning_rate": 6.064558823529411e-06, "loss": 1.9722, "step": 26775 }, { "epoch": 39.39250275633958, "grad_norm": 3.808422803878784, "learning_rate": 6.061470588235294e-06, "loss": 2.031, "step": 26796 }, { "epoch": 39.42337375964719, "grad_norm": 4.753396987915039, "learning_rate": 6.058382352941177e-06, "loss": 1.9635, "step": 26817 }, { "epoch": 39.454244762954794, "grad_norm": 2.7838830947875977, "learning_rate": 6.0552941176470595e-06, "loss": 2.0676, "step": 26838 }, { "epoch": 39.4851157662624, "grad_norm": 3.0024726390838623, "learning_rate": 6.052205882352942e-06, "loss": 1.9721, "step": 26859 }, { "epoch": 39.51598676957001, "grad_norm": 5.292247295379639, "learning_rate": 6.049117647058824e-06, "loss": 2.1065, "step": 26880 }, { "epoch": 39.546857772877615, "grad_norm": 3.7943801879882812, "learning_rate": 6.046029411764707e-06, "loss": 2.0086, "step": 26901 }, { "epoch": 39.577728776185225, "grad_norm": 2.9838616847991943, "learning_rate": 6.042941176470589e-06, "loss": 2.0683, "step": 26922 }, { "epoch": 39.608599779492835, "grad_norm": 3.5347747802734375, "learning_rate": 6.039852941176471e-06, "loss": 1.9554, "step": 26943 }, { "epoch": 39.639470782800444, "grad_norm": 3.5704119205474854, "learning_rate": 6.036764705882353e-06, "loss": 1.9772, "step": 26964 }, { "epoch": 39.67034178610805, "grad_norm": 3.1100077629089355, "learning_rate": 6.0336764705882355e-06, "loss": 2.0814, "step": 26985 }, { "epoch": 39.701212789415656, "grad_norm": 4.088737964630127, "learning_rate": 6.030588235294118e-06, "loss": 2.0491, "step": 27006 }, { "epoch": 39.732083792723266, "grad_norm": 3.929827928543091, "learning_rate": 6.0275e-06, "loss": 1.8642, "step": 27027 }, { "epoch": 39.76295479603087, "grad_norm": 3.7784807682037354, "learning_rate": 6.024411764705883e-06, "loss": 2.083, "step": 27048 }, { "epoch": 39.79382579933848, "grad_norm": 3.4088406562805176, "learning_rate": 6.021323529411766e-06, "loss": 1.9321, "step": 27069 }, { "epoch": 39.82469680264609, "grad_norm": 3.1789748668670654, "learning_rate": 6.018235294117648e-06, "loss": 2.1648, "step": 27090 }, { "epoch": 39.85556780595369, "grad_norm": 2.635104179382324, "learning_rate": 6.015147058823529e-06, "loss": 1.9257, "step": 27111 }, { "epoch": 39.8864388092613, "grad_norm": 5.106042385101318, "learning_rate": 6.012058823529412e-06, "loss": 1.8817, "step": 27132 }, { "epoch": 39.91730981256891, "grad_norm": 3.03486967086792, "learning_rate": 6.008970588235295e-06, "loss": 1.8412, "step": 27153 }, { "epoch": 39.94818081587652, "grad_norm": 3.45609712600708, "learning_rate": 6.005882352941177e-06, "loss": 1.8996, "step": 27174 }, { "epoch": 39.97905181918412, "grad_norm": 2.6528069972991943, "learning_rate": 6.0027941176470596e-06, "loss": 1.9245, "step": 27195 }, { "epoch": 40.008820286659315, "grad_norm": 3.5425727367401123, "learning_rate": 5.999705882352942e-06, "loss": 1.8353, "step": 27216 }, { "epoch": 40.039691289966925, "grad_norm": 2.5109641551971436, "learning_rate": 5.996617647058824e-06, "loss": 2.0974, "step": 27237 }, { "epoch": 40.070562293274534, "grad_norm": 4.454708099365234, "learning_rate": 5.993529411764706e-06, "loss": 1.9955, "step": 27258 }, { "epoch": 40.10143329658214, "grad_norm": 3.159529685974121, "learning_rate": 5.990441176470588e-06, "loss": 1.8919, "step": 27279 }, { "epoch": 40.132304299889746, "grad_norm": 2.955827474594116, "learning_rate": 5.987352941176471e-06, "loss": 2.061, "step": 27300 }, { "epoch": 40.163175303197356, "grad_norm": 4.7053680419921875, "learning_rate": 5.984264705882353e-06, "loss": 2.0422, "step": 27321 }, { "epoch": 40.19404630650496, "grad_norm": 3.2399508953094482, "learning_rate": 5.981176470588236e-06, "loss": 1.9478, "step": 27342 }, { "epoch": 40.22491730981257, "grad_norm": 3.6547083854675293, "learning_rate": 5.978088235294118e-06, "loss": 2.0191, "step": 27363 }, { "epoch": 40.25578831312018, "grad_norm": 4.792405605316162, "learning_rate": 5.975e-06, "loss": 1.9443, "step": 27384 }, { "epoch": 40.28665931642779, "grad_norm": 4.776972770690918, "learning_rate": 5.971911764705884e-06, "loss": 2.0692, "step": 27405 }, { "epoch": 40.31753031973539, "grad_norm": 4.053907871246338, "learning_rate": 5.968823529411764e-06, "loss": 1.9095, "step": 27426 }, { "epoch": 40.348401323043, "grad_norm": 3.0335769653320312, "learning_rate": 5.965735294117647e-06, "loss": 1.9073, "step": 27447 }, { "epoch": 40.37927232635061, "grad_norm": 2.513704299926758, "learning_rate": 5.96264705882353e-06, "loss": 1.975, "step": 27468 }, { "epoch": 40.41014332965821, "grad_norm": 3.3102567195892334, "learning_rate": 5.9595588235294124e-06, "loss": 2.037, "step": 27489 }, { "epoch": 40.44101433296582, "grad_norm": 2.768718957901001, "learning_rate": 5.956470588235295e-06, "loss": 1.9508, "step": 27510 }, { "epoch": 40.47188533627343, "grad_norm": 2.4006845951080322, "learning_rate": 5.953382352941177e-06, "loss": 1.9156, "step": 27531 }, { "epoch": 40.50275633958103, "grad_norm": 3.039377212524414, "learning_rate": 5.95029411764706e-06, "loss": 2.0373, "step": 27552 }, { "epoch": 40.53362734288864, "grad_norm": 3.911868095397949, "learning_rate": 5.947205882352942e-06, "loss": 2.0346, "step": 27573 }, { "epoch": 40.56449834619625, "grad_norm": 2.5462331771850586, "learning_rate": 5.944117647058824e-06, "loss": 1.9594, "step": 27594 }, { "epoch": 40.59536934950386, "grad_norm": 7.370463848114014, "learning_rate": 5.941029411764706e-06, "loss": 1.831, "step": 27615 }, { "epoch": 40.626240352811465, "grad_norm": 2.8491008281707764, "learning_rate": 5.9379411764705884e-06, "loss": 2.0635, "step": 27636 }, { "epoch": 40.657111356119074, "grad_norm": 2.293325901031494, "learning_rate": 5.934852941176471e-06, "loss": 1.9706, "step": 27657 }, { "epoch": 40.687982359426684, "grad_norm": 3.360750198364258, "learning_rate": 5.931764705882353e-06, "loss": 1.9606, "step": 27678 }, { "epoch": 40.718853362734286, "grad_norm": 6.223991394042969, "learning_rate": 5.928676470588236e-06, "loss": 1.953, "step": 27699 }, { "epoch": 40.749724366041896, "grad_norm": 2.282803535461426, "learning_rate": 5.925588235294119e-06, "loss": 1.9395, "step": 27720 }, { "epoch": 40.780595369349506, "grad_norm": 3.8620846271514893, "learning_rate": 5.922500000000001e-06, "loss": 1.9948, "step": 27741 }, { "epoch": 40.81146637265711, "grad_norm": 4.761281967163086, "learning_rate": 5.919411764705882e-06, "loss": 2.0273, "step": 27762 }, { "epoch": 40.84233737596472, "grad_norm": 3.677088975906372, "learning_rate": 5.916323529411765e-06, "loss": 1.891, "step": 27783 }, { "epoch": 40.87320837927233, "grad_norm": 4.497285842895508, "learning_rate": 5.913235294117648e-06, "loss": 2.0117, "step": 27804 }, { "epoch": 40.90407938257994, "grad_norm": 3.2372024059295654, "learning_rate": 5.91014705882353e-06, "loss": 1.9946, "step": 27825 }, { "epoch": 40.93495038588754, "grad_norm": 3.134455919265747, "learning_rate": 5.9070588235294125e-06, "loss": 1.8965, "step": 27846 }, { "epoch": 40.96582138919515, "grad_norm": 3.5608882904052734, "learning_rate": 5.903970588235295e-06, "loss": 1.945, "step": 27867 }, { "epoch": 40.99669239250276, "grad_norm": 3.4263484477996826, "learning_rate": 5.900882352941177e-06, "loss": 1.9865, "step": 27888 }, { "epoch": 41.02646085997795, "grad_norm": 3.573937177658081, "learning_rate": 5.897794117647059e-06, "loss": 1.9168, "step": 27909 }, { "epoch": 41.057331863285555, "grad_norm": 3.5559401512145996, "learning_rate": 5.894705882352941e-06, "loss": 1.927, "step": 27930 }, { "epoch": 41.088202866593164, "grad_norm": 3.3269314765930176, "learning_rate": 5.891617647058824e-06, "loss": 1.9753, "step": 27951 }, { "epoch": 41.119073869900774, "grad_norm": 2.985353708267212, "learning_rate": 5.888529411764706e-06, "loss": 1.9047, "step": 27972 }, { "epoch": 41.149944873208376, "grad_norm": 2.9979796409606934, "learning_rate": 5.8854411764705885e-06, "loss": 1.9112, "step": 27993 }, { "epoch": 41.180815876515986, "grad_norm": 3.1484808921813965, "learning_rate": 5.882352941176471e-06, "loss": 2.0664, "step": 28014 }, { "epoch": 41.211686879823596, "grad_norm": 2.853654146194458, "learning_rate": 5.879264705882354e-06, "loss": 1.9628, "step": 28035 }, { "epoch": 41.242557883131205, "grad_norm": 4.964154243469238, "learning_rate": 5.876176470588237e-06, "loss": 1.989, "step": 28056 }, { "epoch": 41.27342888643881, "grad_norm": 2.9812891483306885, "learning_rate": 5.873088235294117e-06, "loss": 2.0905, "step": 28077 }, { "epoch": 41.30429988974642, "grad_norm": 3.865640640258789, "learning_rate": 5.8700000000000005e-06, "loss": 1.8917, "step": 28098 }, { "epoch": 41.33517089305403, "grad_norm": 4.640876293182373, "learning_rate": 5.866911764705883e-06, "loss": 2.1534, "step": 28119 }, { "epoch": 41.36604189636163, "grad_norm": 3.249993324279785, "learning_rate": 5.863823529411765e-06, "loss": 1.9521, "step": 28140 }, { "epoch": 41.39691289966924, "grad_norm": 3.8108301162719727, "learning_rate": 5.860735294117648e-06, "loss": 1.9181, "step": 28161 }, { "epoch": 41.42778390297685, "grad_norm": 2.777886152267456, "learning_rate": 5.85764705882353e-06, "loss": 1.9833, "step": 28182 }, { "epoch": 41.45865490628445, "grad_norm": 4.113677978515625, "learning_rate": 5.854558823529413e-06, "loss": 2.0692, "step": 28203 }, { "epoch": 41.48952590959206, "grad_norm": 8.056456565856934, "learning_rate": 5.851470588235295e-06, "loss": 1.9922, "step": 28224 }, { "epoch": 41.52039691289967, "grad_norm": 2.636186122894287, "learning_rate": 5.8483823529411766e-06, "loss": 1.9874, "step": 28245 }, { "epoch": 41.55126791620728, "grad_norm": 3.3766558170318604, "learning_rate": 5.845294117647059e-06, "loss": 1.9649, "step": 28266 }, { "epoch": 41.58213891951488, "grad_norm": 3.809915542602539, "learning_rate": 5.842205882352941e-06, "loss": 2.0029, "step": 28287 }, { "epoch": 41.61300992282249, "grad_norm": 3.4291629791259766, "learning_rate": 5.839117647058824e-06, "loss": 1.9892, "step": 28308 }, { "epoch": 41.6438809261301, "grad_norm": 3.98573637008667, "learning_rate": 5.836029411764706e-06, "loss": 1.8661, "step": 28329 }, { "epoch": 41.674751929437704, "grad_norm": 2.953317165374756, "learning_rate": 5.832941176470589e-06, "loss": 2.1175, "step": 28350 }, { "epoch": 41.705622932745314, "grad_norm": 4.0871806144714355, "learning_rate": 5.83e-06, "loss": 2.0532, "step": 28371 }, { "epoch": 41.73649393605292, "grad_norm": 3.8700499534606934, "learning_rate": 5.8269117647058825e-06, "loss": 2.0263, "step": 28392 }, { "epoch": 41.767364939360526, "grad_norm": 2.9973948001861572, "learning_rate": 5.823823529411765e-06, "loss": 1.93, "step": 28413 }, { "epoch": 41.798235942668136, "grad_norm": 3.914116621017456, "learning_rate": 5.820735294117647e-06, "loss": 2.0507, "step": 28434 }, { "epoch": 41.829106945975745, "grad_norm": 3.217986583709717, "learning_rate": 5.81764705882353e-06, "loss": 1.9769, "step": 28455 }, { "epoch": 41.859977949283355, "grad_norm": 4.535974979400635, "learning_rate": 5.814558823529413e-06, "loss": 2.0366, "step": 28476 }, { "epoch": 41.89084895259096, "grad_norm": 2.6295928955078125, "learning_rate": 5.811470588235295e-06, "loss": 2.0145, "step": 28497 }, { "epoch": 41.92171995589857, "grad_norm": 5.133613109588623, "learning_rate": 5.808382352941176e-06, "loss": 2.068, "step": 28518 }, { "epoch": 41.95259095920618, "grad_norm": 3.7133264541625977, "learning_rate": 5.805294117647059e-06, "loss": 1.9777, "step": 28539 }, { "epoch": 41.98346196251378, "grad_norm": 3.117223024368286, "learning_rate": 5.802205882352942e-06, "loss": 1.9952, "step": 28560 }, { "epoch": 42.01323042998897, "grad_norm": 3.61245059967041, "learning_rate": 5.799117647058824e-06, "loss": 1.8749, "step": 28581 }, { "epoch": 42.04410143329658, "grad_norm": 3.4127142429351807, "learning_rate": 5.7960294117647066e-06, "loss": 1.8697, "step": 28602 }, { "epoch": 42.07497243660419, "grad_norm": 2.7904696464538574, "learning_rate": 5.792941176470589e-06, "loss": 1.9193, "step": 28623 }, { "epoch": 42.105843439911794, "grad_norm": 2.2500598430633545, "learning_rate": 5.789852941176471e-06, "loss": 2.0273, "step": 28644 }, { "epoch": 42.136714443219404, "grad_norm": 4.007527828216553, "learning_rate": 5.786764705882354e-06, "loss": 2.0232, "step": 28665 }, { "epoch": 42.16758544652701, "grad_norm": 4.661435604095459, "learning_rate": 5.783676470588235e-06, "loss": 1.938, "step": 28686 }, { "epoch": 42.19845644983462, "grad_norm": 2.511146068572998, "learning_rate": 5.780588235294118e-06, "loss": 2.0327, "step": 28707 }, { "epoch": 42.229327453142226, "grad_norm": 4.660438537597656, "learning_rate": 5.7775e-06, "loss": 2.0548, "step": 28728 }, { "epoch": 42.260198456449835, "grad_norm": 2.339512586593628, "learning_rate": 5.7744117647058826e-06, "loss": 2.052, "step": 28749 }, { "epoch": 42.291069459757445, "grad_norm": 3.5054421424865723, "learning_rate": 5.771323529411765e-06, "loss": 2.0143, "step": 28770 }, { "epoch": 42.32194046306505, "grad_norm": 3.9121763706207275, "learning_rate": 5.768235294117648e-06, "loss": 1.9525, "step": 28791 }, { "epoch": 42.35281146637266, "grad_norm": 2.556558847427368, "learning_rate": 5.765147058823531e-06, "loss": 2.1254, "step": 28812 }, { "epoch": 42.38368246968027, "grad_norm": 5.350119590759277, "learning_rate": 5.762058823529411e-06, "loss": 1.946, "step": 28833 }, { "epoch": 42.41455347298787, "grad_norm": 2.8503360748291016, "learning_rate": 5.758970588235295e-06, "loss": 1.9219, "step": 28854 }, { "epoch": 42.44542447629548, "grad_norm": 2.9986045360565186, "learning_rate": 5.755882352941177e-06, "loss": 2.0914, "step": 28875 }, { "epoch": 42.47629547960309, "grad_norm": 2.2886064052581787, "learning_rate": 5.7527941176470594e-06, "loss": 1.9547, "step": 28896 }, { "epoch": 42.5071664829107, "grad_norm": 3.8905608654022217, "learning_rate": 5.749705882352942e-06, "loss": 1.9971, "step": 28917 }, { "epoch": 42.5380374862183, "grad_norm": 3.875053644180298, "learning_rate": 5.746617647058824e-06, "loss": 2.0418, "step": 28938 }, { "epoch": 42.56890848952591, "grad_norm": 2.6485989093780518, "learning_rate": 5.743529411764707e-06, "loss": 1.9593, "step": 28959 }, { "epoch": 42.59977949283352, "grad_norm": 2.7637295722961426, "learning_rate": 5.740441176470589e-06, "loss": 2.0275, "step": 28980 }, { "epoch": 42.63065049614112, "grad_norm": 3.8056092262268066, "learning_rate": 5.737352941176471e-06, "loss": 1.9905, "step": 29001 }, { "epoch": 42.66152149944873, "grad_norm": 2.9115054607391357, "learning_rate": 5.734264705882353e-06, "loss": 2.0225, "step": 29022 }, { "epoch": 42.69239250275634, "grad_norm": 4.819677829742432, "learning_rate": 5.7311764705882354e-06, "loss": 2.0294, "step": 29043 }, { "epoch": 42.723263506063944, "grad_norm": 3.419004201889038, "learning_rate": 5.728088235294118e-06, "loss": 2.0314, "step": 29064 }, { "epoch": 42.75413450937155, "grad_norm": 4.571157455444336, "learning_rate": 5.725e-06, "loss": 2.0092, "step": 29085 }, { "epoch": 42.78500551267916, "grad_norm": 2.7036497592926025, "learning_rate": 5.721911764705883e-06, "loss": 2.0527, "step": 29106 }, { "epoch": 42.81587651598677, "grad_norm": 4.141323089599609, "learning_rate": 5.718823529411766e-06, "loss": 1.9341, "step": 29127 }, { "epoch": 42.846747519294375, "grad_norm": 3.788088798522949, "learning_rate": 5.715735294117648e-06, "loss": 1.9235, "step": 29148 }, { "epoch": 42.877618522601985, "grad_norm": 2.246274471282959, "learning_rate": 5.712647058823529e-06, "loss": 1.9607, "step": 29169 }, { "epoch": 42.908489525909594, "grad_norm": 4.884033679962158, "learning_rate": 5.709558823529412e-06, "loss": 1.8879, "step": 29190 }, { "epoch": 42.9393605292172, "grad_norm": 2.816049575805664, "learning_rate": 5.706470588235295e-06, "loss": 1.9629, "step": 29211 }, { "epoch": 42.97023153252481, "grad_norm": 3.6878652572631836, "learning_rate": 5.703382352941177e-06, "loss": 2.0625, "step": 29232 }, { "epoch": 43.0, "grad_norm": 1.11833918094635, "learning_rate": 5.7002941176470595e-06, "loss": 1.8169, "step": 29253 }, { "epoch": 43.03087100330761, "grad_norm": 3.252187967300415, "learning_rate": 5.697205882352942e-06, "loss": 2.0223, "step": 29274 }, { "epoch": 43.06174200661521, "grad_norm": 3.9236972332000732, "learning_rate": 5.694117647058824e-06, "loss": 1.9185, "step": 29295 }, { "epoch": 43.09261300992282, "grad_norm": 2.0161283016204834, "learning_rate": 5.691029411764706e-06, "loss": 1.8911, "step": 29316 }, { "epoch": 43.12348401323043, "grad_norm": 2.2180113792419434, "learning_rate": 5.687941176470588e-06, "loss": 1.9973, "step": 29337 }, { "epoch": 43.15435501653804, "grad_norm": 4.029181480407715, "learning_rate": 5.684852941176471e-06, "loss": 1.9372, "step": 29358 }, { "epoch": 43.18522601984564, "grad_norm": 2.887951612472534, "learning_rate": 5.681764705882353e-06, "loss": 2.0026, "step": 29379 }, { "epoch": 43.21609702315325, "grad_norm": 3.5961804389953613, "learning_rate": 5.6786764705882355e-06, "loss": 2.0647, "step": 29400 }, { "epoch": 43.24696802646086, "grad_norm": 3.1213467121124268, "learning_rate": 5.675588235294118e-06, "loss": 1.9786, "step": 29421 }, { "epoch": 43.277839029768465, "grad_norm": 4.543787479400635, "learning_rate": 5.672500000000001e-06, "loss": 1.9223, "step": 29442 }, { "epoch": 43.308710033076075, "grad_norm": 3.7775707244873047, "learning_rate": 5.669411764705884e-06, "loss": 1.9314, "step": 29463 }, { "epoch": 43.339581036383684, "grad_norm": 3.950556755065918, "learning_rate": 5.666323529411764e-06, "loss": 2.1, "step": 29484 }, { "epoch": 43.37045203969129, "grad_norm": 2.2346153259277344, "learning_rate": 5.6632352941176475e-06, "loss": 2.0619, "step": 29505 }, { "epoch": 43.4013230429989, "grad_norm": 5.146411895751953, "learning_rate": 5.66014705882353e-06, "loss": 1.988, "step": 29526 }, { "epoch": 43.432194046306506, "grad_norm": 2.90525221824646, "learning_rate": 5.657058823529412e-06, "loss": 1.952, "step": 29547 }, { "epoch": 43.463065049614116, "grad_norm": 4.822894096374512, "learning_rate": 5.653970588235295e-06, "loss": 1.9341, "step": 29568 }, { "epoch": 43.49393605292172, "grad_norm": 3.5569303035736084, "learning_rate": 5.650882352941177e-06, "loss": 2.0785, "step": 29589 }, { "epoch": 43.52480705622933, "grad_norm": 5.545686721801758, "learning_rate": 5.64779411764706e-06, "loss": 1.8942, "step": 29610 }, { "epoch": 43.55567805953694, "grad_norm": 2.8717198371887207, "learning_rate": 5.644705882352942e-06, "loss": 2.0098, "step": 29631 }, { "epoch": 43.58654906284454, "grad_norm": 5.219166278839111, "learning_rate": 5.6416176470588236e-06, "loss": 2.0316, "step": 29652 }, { "epoch": 43.61742006615215, "grad_norm": 3.7527034282684326, "learning_rate": 5.638529411764706e-06, "loss": 1.8838, "step": 29673 }, { "epoch": 43.64829106945976, "grad_norm": 3.5321669578552246, "learning_rate": 5.635441176470588e-06, "loss": 1.8815, "step": 29694 }, { "epoch": 43.67916207276736, "grad_norm": 2.810584545135498, "learning_rate": 5.632352941176471e-06, "loss": 1.9776, "step": 29715 }, { "epoch": 43.71003307607497, "grad_norm": 3.589877128601074, "learning_rate": 5.629264705882353e-06, "loss": 1.9382, "step": 29736 }, { "epoch": 43.74090407938258, "grad_norm": 2.8305165767669678, "learning_rate": 5.6261764705882364e-06, "loss": 1.9418, "step": 29757 }, { "epoch": 43.77177508269019, "grad_norm": 4.214059829711914, "learning_rate": 5.623088235294119e-06, "loss": 1.9446, "step": 29778 }, { "epoch": 43.80264608599779, "grad_norm": 3.8999645709991455, "learning_rate": 5.620000000000001e-06, "loss": 1.8931, "step": 29799 }, { "epoch": 43.8335170893054, "grad_norm": 4.423413276672363, "learning_rate": 5.616911764705883e-06, "loss": 1.9937, "step": 29820 }, { "epoch": 43.86438809261301, "grad_norm": 3.0591726303100586, "learning_rate": 5.613823529411765e-06, "loss": 1.9556, "step": 29841 }, { "epoch": 43.895259095920615, "grad_norm": 2.870619058609009, "learning_rate": 5.610735294117648e-06, "loss": 2.0039, "step": 29862 }, { "epoch": 43.926130099228224, "grad_norm": 4.230650424957275, "learning_rate": 5.60764705882353e-06, "loss": 1.9603, "step": 29883 }, { "epoch": 43.957001102535834, "grad_norm": 2.6178581714630127, "learning_rate": 5.6045588235294124e-06, "loss": 1.9261, "step": 29904 }, { "epoch": 43.98787210584344, "grad_norm": 2.435809850692749, "learning_rate": 5.601470588235295e-06, "loss": 1.9287, "step": 29925 }, { "epoch": 44.01764057331863, "grad_norm": 3.3161368370056152, "learning_rate": 5.598382352941177e-06, "loss": 1.8765, "step": 29946 }, { "epoch": 44.04851157662624, "grad_norm": 3.747030258178711, "learning_rate": 5.595294117647059e-06, "loss": 1.8837, "step": 29967 }, { "epoch": 44.07938257993385, "grad_norm": 4.173228740692139, "learning_rate": 5.592205882352941e-06, "loss": 1.8909, "step": 29988 }, { "epoch": 44.11025358324146, "grad_norm": 3.5664258003234863, "learning_rate": 5.589117647058824e-06, "loss": 1.9999, "step": 30009 }, { "epoch": 44.14112458654906, "grad_norm": 2.9330384731292725, "learning_rate": 5.586029411764706e-06, "loss": 1.9189, "step": 30030 }, { "epoch": 44.17199558985667, "grad_norm": 2.984426736831665, "learning_rate": 5.5829411764705884e-06, "loss": 1.96, "step": 30051 }, { "epoch": 44.20286659316428, "grad_norm": 3.401930332183838, "learning_rate": 5.579852941176471e-06, "loss": 1.9869, "step": 30072 }, { "epoch": 44.23373759647188, "grad_norm": 5.670159816741943, "learning_rate": 5.576764705882354e-06, "loss": 2.017, "step": 30093 }, { "epoch": 44.26460859977949, "grad_norm": 3.2186760902404785, "learning_rate": 5.5736764705882365e-06, "loss": 1.9269, "step": 30114 }, { "epoch": 44.2954796030871, "grad_norm": 2.451211452484131, "learning_rate": 5.570588235294117e-06, "loss": 1.9671, "step": 30135 }, { "epoch": 44.326350606394705, "grad_norm": 3.9800515174865723, "learning_rate": 5.5675000000000005e-06, "loss": 1.9299, "step": 30156 }, { "epoch": 44.357221609702314, "grad_norm": 7.074432373046875, "learning_rate": 5.564411764705883e-06, "loss": 1.9755, "step": 30177 }, { "epoch": 44.388092613009924, "grad_norm": 3.490715742111206, "learning_rate": 5.561323529411765e-06, "loss": 2.0295, "step": 30198 }, { "epoch": 44.418963616317534, "grad_norm": 2.9178805351257324, "learning_rate": 5.558235294117648e-06, "loss": 2.0073, "step": 30219 }, { "epoch": 44.449834619625136, "grad_norm": 5.242512226104736, "learning_rate": 5.55514705882353e-06, "loss": 2.0184, "step": 30240 }, { "epoch": 44.480705622932746, "grad_norm": 4.9063897132873535, "learning_rate": 5.5520588235294125e-06, "loss": 1.9953, "step": 30261 }, { "epoch": 44.511576626240355, "grad_norm": 4.473737716674805, "learning_rate": 5.548970588235295e-06, "loss": 1.8812, "step": 30282 }, { "epoch": 44.54244762954796, "grad_norm": 2.9006552696228027, "learning_rate": 5.5458823529411765e-06, "loss": 2.0235, "step": 30303 }, { "epoch": 44.57331863285557, "grad_norm": 2.3534812927246094, "learning_rate": 5.542794117647059e-06, "loss": 1.8966, "step": 30324 }, { "epoch": 44.60418963616318, "grad_norm": 3.1015090942382812, "learning_rate": 5.539705882352941e-06, "loss": 1.8679, "step": 30345 }, { "epoch": 44.63506063947078, "grad_norm": 2.711728811264038, "learning_rate": 5.536617647058824e-06, "loss": 1.9926, "step": 30366 }, { "epoch": 44.66593164277839, "grad_norm": 2.3589699268341064, "learning_rate": 5.533529411764706e-06, "loss": 1.9941, "step": 30387 }, { "epoch": 44.696802646086, "grad_norm": 3.3098554611206055, "learning_rate": 5.530441176470589e-06, "loss": 1.9872, "step": 30408 }, { "epoch": 44.72767364939361, "grad_norm": 3.0449581146240234, "learning_rate": 5.527352941176472e-06, "loss": 1.9629, "step": 30429 }, { "epoch": 44.75854465270121, "grad_norm": 3.5887935161590576, "learning_rate": 5.5242647058823525e-06, "loss": 2.0201, "step": 30450 }, { "epoch": 44.78941565600882, "grad_norm": 4.497625827789307, "learning_rate": 5.521176470588236e-06, "loss": 1.9056, "step": 30471 }, { "epoch": 44.82028665931643, "grad_norm": 3.838418483734131, "learning_rate": 5.518088235294118e-06, "loss": 1.9101, "step": 30492 }, { "epoch": 44.85115766262403, "grad_norm": 3.5117242336273193, "learning_rate": 5.5150000000000006e-06, "loss": 2.0524, "step": 30513 }, { "epoch": 44.88202866593164, "grad_norm": 2.388932228088379, "learning_rate": 5.511911764705883e-06, "loss": 1.865, "step": 30534 }, { "epoch": 44.91289966923925, "grad_norm": 3.0164225101470947, "learning_rate": 5.508823529411765e-06, "loss": 2.0466, "step": 30555 }, { "epoch": 44.943770672546854, "grad_norm": 7.063328742980957, "learning_rate": 5.505735294117648e-06, "loss": 1.9815, "step": 30576 }, { "epoch": 44.974641675854464, "grad_norm": 3.866180419921875, "learning_rate": 5.50264705882353e-06, "loss": 1.967, "step": 30597 }, { "epoch": 45.00441014332966, "grad_norm": 2.573007106781006, "learning_rate": 5.499558823529412e-06, "loss": 1.8451, "step": 30618 }, { "epoch": 45.03528114663727, "grad_norm": 3.471809148788452, "learning_rate": 5.496470588235294e-06, "loss": 1.8988, "step": 30639 }, { "epoch": 45.06615214994488, "grad_norm": 3.0000619888305664, "learning_rate": 5.4933823529411766e-06, "loss": 1.965, "step": 30660 }, { "epoch": 45.09702315325248, "grad_norm": 2.937340021133423, "learning_rate": 5.490294117647059e-06, "loss": 1.9758, "step": 30681 }, { "epoch": 45.12789415656009, "grad_norm": 4.831690788269043, "learning_rate": 5.487205882352941e-06, "loss": 2.0027, "step": 30702 }, { "epoch": 45.1587651598677, "grad_norm": 5.359570503234863, "learning_rate": 5.484117647058825e-06, "loss": 2.0526, "step": 30723 }, { "epoch": 45.1896361631753, "grad_norm": 3.930889368057251, "learning_rate": 5.481029411764707e-06, "loss": 2.0324, "step": 30744 }, { "epoch": 45.22050716648291, "grad_norm": 3.557265520095825, "learning_rate": 5.4779411764705894e-06, "loss": 1.8762, "step": 30765 }, { "epoch": 45.25137816979052, "grad_norm": 3.5888421535491943, "learning_rate": 5.474852941176471e-06, "loss": 1.9685, "step": 30786 }, { "epoch": 45.28224917309812, "grad_norm": 3.93534779548645, "learning_rate": 5.471764705882353e-06, "loss": 1.9863, "step": 30807 }, { "epoch": 45.31312017640573, "grad_norm": 2.7077605724334717, "learning_rate": 5.468676470588236e-06, "loss": 2.2058, "step": 30828 }, { "epoch": 45.34399117971334, "grad_norm": 3.5668418407440186, "learning_rate": 5.465588235294118e-06, "loss": 2.0327, "step": 30849 }, { "epoch": 45.37486218302095, "grad_norm": 4.708620548248291, "learning_rate": 5.462500000000001e-06, "loss": 1.9242, "step": 30870 }, { "epoch": 45.405733186328554, "grad_norm": 2.4485063552856445, "learning_rate": 5.459411764705883e-06, "loss": 1.9932, "step": 30891 }, { "epoch": 45.436604189636164, "grad_norm": 2.8804147243499756, "learning_rate": 5.4563235294117655e-06, "loss": 1.9412, "step": 30912 }, { "epoch": 45.46747519294377, "grad_norm": 2.72686767578125, "learning_rate": 5.453235294117647e-06, "loss": 1.9133, "step": 30933 }, { "epoch": 45.498346196251376, "grad_norm": 4.44245719909668, "learning_rate": 5.450147058823529e-06, "loss": 1.9927, "step": 30954 }, { "epoch": 45.529217199558985, "grad_norm": 2.7577192783355713, "learning_rate": 5.447058823529412e-06, "loss": 1.9957, "step": 30975 }, { "epoch": 45.560088202866595, "grad_norm": 2.7706048488616943, "learning_rate": 5.443970588235294e-06, "loss": 1.8419, "step": 30996 }, { "epoch": 45.5909592061742, "grad_norm": 3.545910358428955, "learning_rate": 5.440882352941177e-06, "loss": 1.919, "step": 31017 }, { "epoch": 45.62183020948181, "grad_norm": 2.509328842163086, "learning_rate": 5.437794117647059e-06, "loss": 1.9801, "step": 31038 }, { "epoch": 45.65270121278942, "grad_norm": 3.909446954727173, "learning_rate": 5.434705882352942e-06, "loss": 1.9808, "step": 31059 }, { "epoch": 45.683572216097026, "grad_norm": 2.7039906978607178, "learning_rate": 5.431617647058825e-06, "loss": 2.0222, "step": 31080 }, { "epoch": 45.71444321940463, "grad_norm": 4.399816036224365, "learning_rate": 5.4285294117647054e-06, "loss": 1.9684, "step": 31101 }, { "epoch": 45.74531422271224, "grad_norm": 4.667072772979736, "learning_rate": 5.425441176470589e-06, "loss": 2.0588, "step": 31122 }, { "epoch": 45.77618522601985, "grad_norm": 2.787804126739502, "learning_rate": 5.422352941176471e-06, "loss": 2.0018, "step": 31143 }, { "epoch": 45.80705622932745, "grad_norm": 3.716702461242676, "learning_rate": 5.4192647058823535e-06, "loss": 2.0098, "step": 31164 }, { "epoch": 45.83792723263506, "grad_norm": 5.27888822555542, "learning_rate": 5.416176470588236e-06, "loss": 1.9279, "step": 31185 }, { "epoch": 45.86879823594267, "grad_norm": 4.65470552444458, "learning_rate": 5.413235294117648e-06, "loss": 1.9245, "step": 31206 }, { "epoch": 45.89966923925027, "grad_norm": 2.711925983428955, "learning_rate": 5.41014705882353e-06, "loss": 1.9094, "step": 31227 }, { "epoch": 45.93054024255788, "grad_norm": 3.476806879043579, "learning_rate": 5.407058823529412e-06, "loss": 1.8991, "step": 31248 }, { "epoch": 45.96141124586549, "grad_norm": 4.77373743057251, "learning_rate": 5.403970588235295e-06, "loss": 1.983, "step": 31269 }, { "epoch": 45.9922822491731, "grad_norm": 3.3017890453338623, "learning_rate": 5.400882352941177e-06, "loss": 1.8589, "step": 31290 }, { "epoch": 46.022050716648295, "grad_norm": 3.21283221244812, "learning_rate": 5.3977941176470594e-06, "loss": 1.8932, "step": 31311 }, { "epoch": 46.0529217199559, "grad_norm": 4.5764994621276855, "learning_rate": 5.394705882352942e-06, "loss": 1.9166, "step": 31332 }, { "epoch": 46.08379272326351, "grad_norm": 4.568404674530029, "learning_rate": 5.391617647058824e-06, "loss": 1.8923, "step": 31353 }, { "epoch": 46.114663726571116, "grad_norm": 3.3213441371917725, "learning_rate": 5.388529411764706e-06, "loss": 2.0048, "step": 31374 }, { "epoch": 46.14553472987872, "grad_norm": 4.474575996398926, "learning_rate": 5.385441176470588e-06, "loss": 1.9745, "step": 31395 }, { "epoch": 46.17640573318633, "grad_norm": 4.367554187774658, "learning_rate": 5.382352941176471e-06, "loss": 1.9549, "step": 31416 }, { "epoch": 46.20727673649394, "grad_norm": 2.26495623588562, "learning_rate": 5.379264705882353e-06, "loss": 1.9465, "step": 31437 }, { "epoch": 46.23814773980154, "grad_norm": 3.0966124534606934, "learning_rate": 5.3761764705882354e-06, "loss": 1.9438, "step": 31458 }, { "epoch": 46.26901874310915, "grad_norm": 1.5962589979171753, "learning_rate": 5.373088235294119e-06, "loss": 2.0326, "step": 31479 }, { "epoch": 46.29988974641676, "grad_norm": 2.9346072673797607, "learning_rate": 5.370000000000001e-06, "loss": 2.0149, "step": 31500 }, { "epoch": 46.33076074972437, "grad_norm": 3.198991298675537, "learning_rate": 5.3669117647058835e-06, "loss": 2.0487, "step": 31521 }, { "epoch": 46.36163175303197, "grad_norm": 2.5941872596740723, "learning_rate": 5.363823529411765e-06, "loss": 1.9099, "step": 31542 }, { "epoch": 46.39250275633958, "grad_norm": 2.5941882133483887, "learning_rate": 5.3607352941176475e-06, "loss": 2.0946, "step": 31563 }, { "epoch": 46.42337375964719, "grad_norm": 3.508894443511963, "learning_rate": 5.35764705882353e-06, "loss": 1.9951, "step": 31584 }, { "epoch": 46.454244762954794, "grad_norm": 3.7535741329193115, "learning_rate": 5.354558823529412e-06, "loss": 1.9225, "step": 31605 }, { "epoch": 46.4851157662624, "grad_norm": 3.3432183265686035, "learning_rate": 5.351470588235295e-06, "loss": 1.9935, "step": 31626 }, { "epoch": 46.51598676957001, "grad_norm": 2.661496162414551, "learning_rate": 5.348382352941177e-06, "loss": 1.9882, "step": 31647 }, { "epoch": 46.546857772877615, "grad_norm": 3.1475367546081543, "learning_rate": 5.3452941176470595e-06, "loss": 1.9238, "step": 31668 }, { "epoch": 46.577728776185225, "grad_norm": 5.89084529876709, "learning_rate": 5.342205882352942e-06, "loss": 1.9313, "step": 31689 }, { "epoch": 46.608599779492835, "grad_norm": 3.0444185733795166, "learning_rate": 5.3391176470588235e-06, "loss": 1.9287, "step": 31710 }, { "epoch": 46.639470782800444, "grad_norm": 3.2886157035827637, "learning_rate": 5.336029411764706e-06, "loss": 1.9746, "step": 31731 }, { "epoch": 46.67034178610805, "grad_norm": 3.1620185375213623, "learning_rate": 5.332941176470588e-06, "loss": 1.8968, "step": 31752 }, { "epoch": 46.701212789415656, "grad_norm": 4.221733570098877, "learning_rate": 5.329852941176471e-06, "loss": 1.9965, "step": 31773 }, { "epoch": 46.732083792723266, "grad_norm": 4.612674236297607, "learning_rate": 5.326764705882353e-06, "loss": 2.0501, "step": 31794 }, { "epoch": 46.76295479603087, "grad_norm": 3.7025046348571777, "learning_rate": 5.323676470588236e-06, "loss": 2.0258, "step": 31815 }, { "epoch": 46.79382579933848, "grad_norm": 3.9950573444366455, "learning_rate": 5.320588235294119e-06, "loss": 2.0169, "step": 31836 }, { "epoch": 46.82469680264609, "grad_norm": 3.970155954360962, "learning_rate": 5.3174999999999995e-06, "loss": 1.8538, "step": 31857 }, { "epoch": 46.85556780595369, "grad_norm": 4.233425617218018, "learning_rate": 5.314411764705883e-06, "loss": 1.9548, "step": 31878 }, { "epoch": 46.8864388092613, "grad_norm": 3.2957510948181152, "learning_rate": 5.311323529411765e-06, "loss": 1.8601, "step": 31899 }, { "epoch": 46.91730981256891, "grad_norm": 3.2708353996276855, "learning_rate": 5.3082352941176476e-06, "loss": 1.8697, "step": 31920 }, { "epoch": 46.94818081587652, "grad_norm": 3.599625587463379, "learning_rate": 5.30514705882353e-06, "loss": 1.9906, "step": 31941 }, { "epoch": 46.97905181918412, "grad_norm": 4.955700397491455, "learning_rate": 5.302058823529412e-06, "loss": 1.9498, "step": 31962 }, { "epoch": 47.008820286659315, "grad_norm": 2.7876241207122803, "learning_rate": 5.298970588235295e-06, "loss": 1.8186, "step": 31983 }, { "epoch": 47.039691289966925, "grad_norm": 3.536036968231201, "learning_rate": 5.295882352941177e-06, "loss": 2.1012, "step": 32004 }, { "epoch": 47.070562293274534, "grad_norm": 3.7516984939575195, "learning_rate": 5.292794117647059e-06, "loss": 1.9233, "step": 32025 }, { "epoch": 47.10143329658214, "grad_norm": 3.523350715637207, "learning_rate": 5.289705882352941e-06, "loss": 1.9308, "step": 32046 }, { "epoch": 47.132304299889746, "grad_norm": 4.283267021179199, "learning_rate": 5.2866176470588236e-06, "loss": 1.9409, "step": 32067 }, { "epoch": 47.163175303197356, "grad_norm": 3.564329147338867, "learning_rate": 5.283529411764706e-06, "loss": 1.9789, "step": 32088 }, { "epoch": 47.19404630650496, "grad_norm": 4.665071964263916, "learning_rate": 5.280441176470588e-06, "loss": 1.9015, "step": 32109 }, { "epoch": 47.22491730981257, "grad_norm": 2.7627804279327393, "learning_rate": 5.277352941176472e-06, "loss": 1.945, "step": 32130 }, { "epoch": 47.25578831312018, "grad_norm": 3.0376052856445312, "learning_rate": 5.274264705882354e-06, "loss": 1.8988, "step": 32151 }, { "epoch": 47.28665931642779, "grad_norm": 4.766862869262695, "learning_rate": 5.2711764705882364e-06, "loss": 1.9687, "step": 32172 }, { "epoch": 47.31753031973539, "grad_norm": 2.4182627201080322, "learning_rate": 5.268088235294118e-06, "loss": 1.9045, "step": 32193 }, { "epoch": 47.348401323043, "grad_norm": 3.721261501312256, "learning_rate": 5.265e-06, "loss": 2.0732, "step": 32214 }, { "epoch": 47.37927232635061, "grad_norm": 3.760502338409424, "learning_rate": 5.261911764705883e-06, "loss": 2.0287, "step": 32235 }, { "epoch": 47.41014332965821, "grad_norm": 2.8165228366851807, "learning_rate": 5.258823529411765e-06, "loss": 1.8836, "step": 32256 }, { "epoch": 47.44101433296582, "grad_norm": 3.997875690460205, "learning_rate": 5.255735294117648e-06, "loss": 1.9606, "step": 32277 }, { "epoch": 47.47188533627343, "grad_norm": 3.9775173664093018, "learning_rate": 5.25264705882353e-06, "loss": 1.9172, "step": 32298 }, { "epoch": 47.50275633958103, "grad_norm": 3.604313373565674, "learning_rate": 5.2495588235294125e-06, "loss": 1.899, "step": 32319 }, { "epoch": 47.53362734288864, "grad_norm": 3.0740437507629395, "learning_rate": 5.246470588235295e-06, "loss": 1.9572, "step": 32340 }, { "epoch": 47.56449834619625, "grad_norm": 3.6999287605285645, "learning_rate": 5.243382352941176e-06, "loss": 1.9793, "step": 32361 }, { "epoch": 47.59536934950386, "grad_norm": 4.01027774810791, "learning_rate": 5.240294117647059e-06, "loss": 2.0118, "step": 32382 }, { "epoch": 47.626240352811465, "grad_norm": 3.2269864082336426, "learning_rate": 5.237205882352941e-06, "loss": 1.9841, "step": 32403 }, { "epoch": 47.657111356119074, "grad_norm": 4.464413642883301, "learning_rate": 5.234117647058824e-06, "loss": 1.9852, "step": 32424 }, { "epoch": 47.687982359426684, "grad_norm": 2.9172916412353516, "learning_rate": 5.231029411764706e-06, "loss": 1.8664, "step": 32445 }, { "epoch": 47.718853362734286, "grad_norm": 2.315575122833252, "learning_rate": 5.227941176470589e-06, "loss": 1.885, "step": 32466 }, { "epoch": 47.749724366041896, "grad_norm": 3.697990655899048, "learning_rate": 5.224852941176472e-06, "loss": 1.9518, "step": 32487 }, { "epoch": 47.780595369349506, "grad_norm": 3.7003746032714844, "learning_rate": 5.221764705882353e-06, "loss": 1.9161, "step": 32508 }, { "epoch": 47.81146637265711, "grad_norm": 4.584070682525635, "learning_rate": 5.218676470588236e-06, "loss": 1.9402, "step": 32529 }, { "epoch": 47.84233737596472, "grad_norm": 2.5246081352233887, "learning_rate": 5.215588235294118e-06, "loss": 1.9909, "step": 32550 }, { "epoch": 47.87320837927233, "grad_norm": 3.603269100189209, "learning_rate": 5.2125000000000005e-06, "loss": 1.9604, "step": 32571 }, { "epoch": 47.90407938257994, "grad_norm": 3.450449228286743, "learning_rate": 5.209411764705883e-06, "loss": 1.9503, "step": 32592 }, { "epoch": 47.93495038588754, "grad_norm": 4.108982563018799, "learning_rate": 5.206323529411765e-06, "loss": 1.883, "step": 32613 }, { "epoch": 47.96582138919515, "grad_norm": 2.796966314315796, "learning_rate": 5.203235294117648e-06, "loss": 2.0239, "step": 32634 }, { "epoch": 47.99669239250276, "grad_norm": 2.251326560974121, "learning_rate": 5.20014705882353e-06, "loss": 1.9674, "step": 32655 }, { "epoch": 48.02646085997795, "grad_norm": 4.083993434906006, "learning_rate": 5.197058823529412e-06, "loss": 1.9957, "step": 32676 }, { "epoch": 48.057331863285555, "grad_norm": 4.947504043579102, "learning_rate": 5.193970588235294e-06, "loss": 1.9397, "step": 32697 }, { "epoch": 48.088202866593164, "grad_norm": 3.0585482120513916, "learning_rate": 5.1908823529411765e-06, "loss": 1.8828, "step": 32718 }, { "epoch": 48.119073869900774, "grad_norm": 2.8571529388427734, "learning_rate": 5.187794117647059e-06, "loss": 2.0125, "step": 32739 }, { "epoch": 48.149944873208376, "grad_norm": 2.531935214996338, "learning_rate": 5.184705882352941e-06, "loss": 1.9006, "step": 32760 }, { "epoch": 48.180815876515986, "grad_norm": 3.6095895767211914, "learning_rate": 5.1816176470588246e-06, "loss": 1.9486, "step": 32781 }, { "epoch": 48.211686879823596, "grad_norm": 5.472721576690674, "learning_rate": 5.178529411764707e-06, "loss": 1.8719, "step": 32802 }, { "epoch": 48.242557883131205, "grad_norm": 2.9063522815704346, "learning_rate": 5.175441176470589e-06, "loss": 2.0102, "step": 32823 }, { "epoch": 48.27342888643881, "grad_norm": 3.881934404373169, "learning_rate": 5.172352941176471e-06, "loss": 1.9954, "step": 32844 }, { "epoch": 48.30429988974642, "grad_norm": 4.672571659088135, "learning_rate": 5.169264705882353e-06, "loss": 1.8755, "step": 32865 }, { "epoch": 48.33517089305403, "grad_norm": 3.527888536453247, "learning_rate": 5.166176470588236e-06, "loss": 1.9681, "step": 32886 }, { "epoch": 48.36604189636163, "grad_norm": 2.516143798828125, "learning_rate": 5.163088235294118e-06, "loss": 1.9081, "step": 32907 }, { "epoch": 48.39691289966924, "grad_norm": 3.7986936569213867, "learning_rate": 5.1600000000000006e-06, "loss": 1.9571, "step": 32928 }, { "epoch": 48.42778390297685, "grad_norm": 4.25414514541626, "learning_rate": 5.156911764705883e-06, "loss": 2.0173, "step": 32949 }, { "epoch": 48.45865490628445, "grad_norm": 3.1195449829101562, "learning_rate": 5.153823529411765e-06, "loss": 1.9594, "step": 32970 }, { "epoch": 48.48952590959206, "grad_norm": 3.809643268585205, "learning_rate": 5.150735294117647e-06, "loss": 1.9546, "step": 32991 }, { "epoch": 48.52039691289967, "grad_norm": 3.0789263248443604, "learning_rate": 5.147647058823529e-06, "loss": 1.929, "step": 33012 }, { "epoch": 48.55126791620728, "grad_norm": 2.519080400466919, "learning_rate": 5.144558823529412e-06, "loss": 1.994, "step": 33033 }, { "epoch": 48.58213891951488, "grad_norm": 2.8920371532440186, "learning_rate": 5.141470588235294e-06, "loss": 1.8744, "step": 33054 }, { "epoch": 48.61300992282249, "grad_norm": 6.012355327606201, "learning_rate": 5.1383823529411766e-06, "loss": 1.9826, "step": 33075 }, { "epoch": 48.6438809261301, "grad_norm": 2.4653756618499756, "learning_rate": 5.13529411764706e-06, "loss": 2.0173, "step": 33096 }, { "epoch": 48.674751929437704, "grad_norm": 3.052381992340088, "learning_rate": 5.132205882352942e-06, "loss": 1.9577, "step": 33117 }, { "epoch": 48.705622932745314, "grad_norm": 3.9582889080047607, "learning_rate": 5.129117647058825e-06, "loss": 1.8829, "step": 33138 }, { "epoch": 48.73649393605292, "grad_norm": 3.0661113262176514, "learning_rate": 5.126029411764706e-06, "loss": 2.0572, "step": 33159 }, { "epoch": 48.767364939360526, "grad_norm": 2.6682889461517334, "learning_rate": 5.122941176470589e-06, "loss": 1.9128, "step": 33180 }, { "epoch": 48.798235942668136, "grad_norm": 3.7392077445983887, "learning_rate": 5.119852941176471e-06, "loss": 2.0627, "step": 33201 }, { "epoch": 48.829106945975745, "grad_norm": 3.4378092288970947, "learning_rate": 5.1167647058823534e-06, "loss": 1.9778, "step": 33222 }, { "epoch": 48.859977949283355, "grad_norm": 3.0426039695739746, "learning_rate": 5.113676470588236e-06, "loss": 1.9618, "step": 33243 }, { "epoch": 48.89084895259096, "grad_norm": 2.8465492725372314, "learning_rate": 5.110588235294118e-06, "loss": 1.9683, "step": 33264 }, { "epoch": 48.92171995589857, "grad_norm": 3.785952091217041, "learning_rate": 5.107500000000001e-06, "loss": 1.9459, "step": 33285 }, { "epoch": 48.95259095920618, "grad_norm": 3.9252023696899414, "learning_rate": 5.104411764705883e-06, "loss": 1.9012, "step": 33306 }, { "epoch": 48.98346196251378, "grad_norm": 5.60137939453125, "learning_rate": 5.101323529411765e-06, "loss": 1.9258, "step": 33327 }, { "epoch": 49.01323042998897, "grad_norm": 2.114008903503418, "learning_rate": 5.098235294117647e-06, "loss": 1.8847, "step": 33348 }, { "epoch": 49.04410143329658, "grad_norm": 3.875481605529785, "learning_rate": 5.0951470588235294e-06, "loss": 1.8644, "step": 33369 }, { "epoch": 49.07497243660419, "grad_norm": 3.6016502380371094, "learning_rate": 5.092058823529412e-06, "loss": 1.9088, "step": 33390 }, { "epoch": 49.105843439911794, "grad_norm": 2.072120428085327, "learning_rate": 5.088970588235294e-06, "loss": 1.9724, "step": 33411 }, { "epoch": 49.136714443219404, "grad_norm": 3.426520347595215, "learning_rate": 5.0858823529411775e-06, "loss": 1.8712, "step": 33432 }, { "epoch": 49.16758544652701, "grad_norm": 3.5268821716308594, "learning_rate": 5.08279411764706e-06, "loss": 2.1446, "step": 33453 }, { "epoch": 49.19845644983462, "grad_norm": 2.9733126163482666, "learning_rate": 5.079705882352941e-06, "loss": 1.969, "step": 33474 }, { "epoch": 49.229327453142226, "grad_norm": 4.926921367645264, "learning_rate": 5.076617647058824e-06, "loss": 1.9319, "step": 33495 }, { "epoch": 49.260198456449835, "grad_norm": 3.15452241897583, "learning_rate": 5.073529411764706e-06, "loss": 1.921, "step": 33516 }, { "epoch": 49.291069459757445, "grad_norm": 2.308353900909424, "learning_rate": 5.070441176470589e-06, "loss": 1.8623, "step": 33537 }, { "epoch": 49.32194046306505, "grad_norm": 4.4820756912231445, "learning_rate": 5.067352941176471e-06, "loss": 1.9233, "step": 33558 }, { "epoch": 49.35281146637266, "grad_norm": 5.912452220916748, "learning_rate": 5.0642647058823535e-06, "loss": 1.9439, "step": 33579 }, { "epoch": 49.38368246968027, "grad_norm": 2.0068953037261963, "learning_rate": 5.061176470588236e-06, "loss": 1.9832, "step": 33600 }, { "epoch": 49.41455347298787, "grad_norm": 3.2051031589508057, "learning_rate": 5.058088235294118e-06, "loss": 1.8979, "step": 33621 }, { "epoch": 49.44542447629548, "grad_norm": 3.270183801651001, "learning_rate": 5.055e-06, "loss": 1.9231, "step": 33642 }, { "epoch": 49.47629547960309, "grad_norm": 2.604389190673828, "learning_rate": 5.051911764705882e-06, "loss": 1.9515, "step": 33663 }, { "epoch": 49.5071664829107, "grad_norm": 4.45298433303833, "learning_rate": 5.048823529411765e-06, "loss": 1.9771, "step": 33684 }, { "epoch": 49.5380374862183, "grad_norm": 2.8883774280548096, "learning_rate": 5.045735294117647e-06, "loss": 1.9304, "step": 33705 }, { "epoch": 49.56890848952591, "grad_norm": 3.7332241535186768, "learning_rate": 5.0426470588235295e-06, "loss": 1.8855, "step": 33726 }, { "epoch": 49.59977949283352, "grad_norm": 3.1344246864318848, "learning_rate": 5.039558823529413e-06, "loss": 2.0716, "step": 33747 }, { "epoch": 49.63065049614112, "grad_norm": 3.154588222503662, "learning_rate": 5.036470588235295e-06, "loss": 1.9488, "step": 33768 }, { "epoch": 49.66152149944873, "grad_norm": 1.781362771987915, "learning_rate": 5.033382352941178e-06, "loss": 1.9287, "step": 33789 }, { "epoch": 49.69239250275634, "grad_norm": 3.200761079788208, "learning_rate": 5.030294117647059e-06, "loss": 2.0243, "step": 33810 }, { "epoch": 49.723263506063944, "grad_norm": 3.6752724647521973, "learning_rate": 5.0272058823529415e-06, "loss": 1.9433, "step": 33831 }, { "epoch": 49.75413450937155, "grad_norm": 3.9616641998291016, "learning_rate": 5.024117647058824e-06, "loss": 1.9277, "step": 33852 }, { "epoch": 49.78500551267916, "grad_norm": 2.554237127304077, "learning_rate": 5.021029411764706e-06, "loss": 1.9752, "step": 33873 }, { "epoch": 49.81587651598677, "grad_norm": 2.654353380203247, "learning_rate": 5.017941176470589e-06, "loss": 1.9974, "step": 33894 }, { "epoch": 49.846747519294375, "grad_norm": 3.2868456840515137, "learning_rate": 5.014852941176471e-06, "loss": 1.9168, "step": 33915 }, { "epoch": 49.877618522601985, "grad_norm": 3.525892496109009, "learning_rate": 5.011764705882354e-06, "loss": 1.9334, "step": 33936 }, { "epoch": 49.908489525909594, "grad_norm": 2.2830629348754883, "learning_rate": 5.008676470588236e-06, "loss": 2.1648, "step": 33957 }, { "epoch": 49.9393605292172, "grad_norm": 3.428407669067383, "learning_rate": 5.0055882352941176e-06, "loss": 1.8328, "step": 33978 }, { "epoch": 49.97023153252481, "grad_norm": 4.8658623695373535, "learning_rate": 5.0025e-06, "loss": 2.0739, "step": 33999 }, { "epoch": 50.0, "grad_norm": 1.552370309829712, "learning_rate": 4.999411764705882e-06, "loss": 1.9484, "step": 34020 }, { "epoch": 50.03087100330761, "grad_norm": 2.3611888885498047, "learning_rate": 4.996323529411765e-06, "loss": 2.0536, "step": 34041 }, { "epoch": 50.06174200661521, "grad_norm": 2.430736541748047, "learning_rate": 4.993235294117648e-06, "loss": 1.9555, "step": 34062 }, { "epoch": 50.09261300992282, "grad_norm": 3.8038551807403564, "learning_rate": 4.99014705882353e-06, "loss": 2.0131, "step": 34083 }, { "epoch": 50.12348401323043, "grad_norm": 3.239993095397949, "learning_rate": 4.987058823529412e-06, "loss": 2.0422, "step": 34104 }, { "epoch": 50.15435501653804, "grad_norm": 3.106719493865967, "learning_rate": 4.983970588235294e-06, "loss": 2.0064, "step": 34125 }, { "epoch": 50.18522601984564, "grad_norm": 3.2644596099853516, "learning_rate": 4.980882352941177e-06, "loss": 2.0107, "step": 34146 }, { "epoch": 50.21609702315325, "grad_norm": 5.0229268074035645, "learning_rate": 4.977794117647059e-06, "loss": 1.9437, "step": 34167 }, { "epoch": 50.24696802646086, "grad_norm": 2.63893461227417, "learning_rate": 4.974705882352942e-06, "loss": 1.868, "step": 34188 }, { "epoch": 50.277839029768465, "grad_norm": 3.8282299041748047, "learning_rate": 4.971617647058824e-06, "loss": 2.0121, "step": 34209 }, { "epoch": 50.308710033076075, "grad_norm": 3.2828288078308105, "learning_rate": 4.9685294117647064e-06, "loss": 1.8164, "step": 34230 }, { "epoch": 50.339581036383684, "grad_norm": 3.554033041000366, "learning_rate": 4.965441176470589e-06, "loss": 1.9425, "step": 34251 }, { "epoch": 50.37045203969129, "grad_norm": 2.758104085922241, "learning_rate": 4.962352941176471e-06, "loss": 1.8551, "step": 34272 }, { "epoch": 50.4013230429989, "grad_norm": 3.895050287246704, "learning_rate": 4.959264705882354e-06, "loss": 1.8843, "step": 34293 }, { "epoch": 50.432194046306506, "grad_norm": 3.6648812294006348, "learning_rate": 4.956176470588236e-06, "loss": 2.0471, "step": 34314 }, { "epoch": 50.463065049614116, "grad_norm": 3.5651891231536865, "learning_rate": 4.953088235294118e-06, "loss": 2.0329, "step": 34335 }, { "epoch": 50.49393605292172, "grad_norm": 3.726652145385742, "learning_rate": 4.95e-06, "loss": 1.8848, "step": 34356 }, { "epoch": 50.52480705622933, "grad_norm": 2.6405270099639893, "learning_rate": 4.9469117647058824e-06, "loss": 1.9241, "step": 34377 }, { "epoch": 50.55567805953694, "grad_norm": 4.083603382110596, "learning_rate": 4.943823529411766e-06, "loss": 2.0107, "step": 34398 }, { "epoch": 50.58654906284454, "grad_norm": 4.261117935180664, "learning_rate": 4.940882352941177e-06, "loss": 1.9474, "step": 34419 }, { "epoch": 50.61742006615215, "grad_norm": 4.652764320373535, "learning_rate": 4.937794117647059e-06, "loss": 1.8856, "step": 34440 }, { "epoch": 50.64829106945976, "grad_norm": 2.07316255569458, "learning_rate": 4.934705882352941e-06, "loss": 1.919, "step": 34461 }, { "epoch": 50.67916207276736, "grad_norm": 4.440003395080566, "learning_rate": 4.9316176470588236e-06, "loss": 1.9186, "step": 34482 }, { "epoch": 50.71003307607497, "grad_norm": 3.7420012950897217, "learning_rate": 4.928529411764707e-06, "loss": 1.9959, "step": 34503 }, { "epoch": 50.74090407938258, "grad_norm": 3.3835768699645996, "learning_rate": 4.925441176470588e-06, "loss": 1.8068, "step": 34524 }, { "epoch": 50.77177508269019, "grad_norm": 4.502290725708008, "learning_rate": 4.922352941176471e-06, "loss": 1.9604, "step": 34545 }, { "epoch": 50.80264608599779, "grad_norm": 2.8308069705963135, "learning_rate": 4.919264705882353e-06, "loss": 2.0001, "step": 34566 }, { "epoch": 50.8335170893054, "grad_norm": 2.187467575073242, "learning_rate": 4.916176470588236e-06, "loss": 2.1456, "step": 34587 }, { "epoch": 50.86438809261301, "grad_norm": 3.6640074253082275, "learning_rate": 4.913088235294118e-06, "loss": 2.0315, "step": 34608 }, { "epoch": 50.895259095920615, "grad_norm": 3.3970489501953125, "learning_rate": 4.9100000000000004e-06, "loss": 1.9614, "step": 34629 }, { "epoch": 50.926130099228224, "grad_norm": 2.179999828338623, "learning_rate": 4.906911764705883e-06, "loss": 1.8676, "step": 34650 }, { "epoch": 50.957001102535834, "grad_norm": 3.036468029022217, "learning_rate": 4.903823529411765e-06, "loss": 2.0055, "step": 34671 }, { "epoch": 50.98787210584344, "grad_norm": 3.091852903366089, "learning_rate": 4.900735294117648e-06, "loss": 2.0291, "step": 34692 }, { "epoch": 51.01764057331863, "grad_norm": 4.517202377319336, "learning_rate": 4.89764705882353e-06, "loss": 1.8568, "step": 34713 }, { "epoch": 51.04851157662624, "grad_norm": 2.857557773590088, "learning_rate": 4.8945588235294125e-06, "loss": 1.8781, "step": 34734 }, { "epoch": 51.07938257993385, "grad_norm": 3.9860188961029053, "learning_rate": 4.891470588235295e-06, "loss": 1.9259, "step": 34755 }, { "epoch": 51.11025358324146, "grad_norm": 4.130570411682129, "learning_rate": 4.8883823529411764e-06, "loss": 1.8746, "step": 34776 }, { "epoch": 51.14112458654906, "grad_norm": 3.3293113708496094, "learning_rate": 4.885294117647059e-06, "loss": 1.8677, "step": 34797 }, { "epoch": 51.17199558985667, "grad_norm": 3.61708664894104, "learning_rate": 4.882205882352942e-06, "loss": 1.8591, "step": 34818 }, { "epoch": 51.20286659316428, "grad_norm": 3.539062023162842, "learning_rate": 4.8791176470588245e-06, "loss": 1.9341, "step": 34839 }, { "epoch": 51.23373759647188, "grad_norm": 4.052618026733398, "learning_rate": 4.876029411764706e-06, "loss": 1.8478, "step": 34860 }, { "epoch": 51.26460859977949, "grad_norm": 2.886404275894165, "learning_rate": 4.8729411764705885e-06, "loss": 1.9282, "step": 34881 }, { "epoch": 51.2954796030871, "grad_norm": 3.506343364715576, "learning_rate": 4.869852941176471e-06, "loss": 1.9627, "step": 34902 }, { "epoch": 51.326350606394705, "grad_norm": 3.036496639251709, "learning_rate": 4.866764705882353e-06, "loss": 2.0213, "step": 34923 }, { "epoch": 51.357221609702314, "grad_norm": 2.2468764781951904, "learning_rate": 4.863676470588236e-06, "loss": 1.9488, "step": 34944 }, { "epoch": 51.388092613009924, "grad_norm": 3.0807530879974365, "learning_rate": 4.860588235294118e-06, "loss": 1.9407, "step": 34965 }, { "epoch": 51.418963616317534, "grad_norm": 3.6055896282196045, "learning_rate": 4.8575000000000005e-06, "loss": 1.9802, "step": 34986 }, { "epoch": 51.449834619625136, "grad_norm": 3.00866961479187, "learning_rate": 4.854411764705883e-06, "loss": 2.0526, "step": 35007 }, { "epoch": 51.480705622932746, "grad_norm": 3.289548397064209, "learning_rate": 4.851323529411765e-06, "loss": 2.0467, "step": 35028 }, { "epoch": 51.511576626240355, "grad_norm": 2.6968212127685547, "learning_rate": 4.848235294117648e-06, "loss": 2.0342, "step": 35049 }, { "epoch": 51.54244762954796, "grad_norm": 3.55562686920166, "learning_rate": 4.84514705882353e-06, "loss": 2.0009, "step": 35070 }, { "epoch": 51.57331863285557, "grad_norm": 2.948843240737915, "learning_rate": 4.842058823529412e-06, "loss": 1.9684, "step": 35091 }, { "epoch": 51.60418963616318, "grad_norm": 3.5464420318603516, "learning_rate": 4.838970588235294e-06, "loss": 2.0046, "step": 35112 }, { "epoch": 51.63506063947078, "grad_norm": 3.980527400970459, "learning_rate": 4.8358823529411765e-06, "loss": 1.9122, "step": 35133 }, { "epoch": 51.66593164277839, "grad_norm": 2.6841890811920166, "learning_rate": 4.83279411764706e-06, "loss": 1.9686, "step": 35154 }, { "epoch": 51.696802646086, "grad_norm": 3.509441614151001, "learning_rate": 4.829705882352941e-06, "loss": 2.0013, "step": 35175 }, { "epoch": 51.72767364939361, "grad_norm": 2.2676753997802734, "learning_rate": 4.826617647058824e-06, "loss": 1.9998, "step": 35196 }, { "epoch": 51.75854465270121, "grad_norm": 4.082207679748535, "learning_rate": 4.823529411764706e-06, "loss": 1.9794, "step": 35217 }, { "epoch": 51.78941565600882, "grad_norm": 3.2228591442108154, "learning_rate": 4.8204411764705885e-06, "loss": 2.0431, "step": 35238 }, { "epoch": 51.82028665931643, "grad_norm": 2.9643771648406982, "learning_rate": 4.817352941176471e-06, "loss": 1.9948, "step": 35259 }, { "epoch": 51.85115766262403, "grad_norm": 3.1746480464935303, "learning_rate": 4.814264705882353e-06, "loss": 1.9664, "step": 35280 }, { "epoch": 51.88202866593164, "grad_norm": 3.2046985626220703, "learning_rate": 4.811176470588236e-06, "loss": 1.8774, "step": 35301 }, { "epoch": 51.91289966923925, "grad_norm": 3.706369400024414, "learning_rate": 4.808088235294118e-06, "loss": 1.8669, "step": 35322 }, { "epoch": 51.943770672546854, "grad_norm": 3.738168478012085, "learning_rate": 4.805000000000001e-06, "loss": 2.0421, "step": 35343 }, { "epoch": 51.974641675854464, "grad_norm": 3.4982872009277344, "learning_rate": 4.801911764705883e-06, "loss": 1.9731, "step": 35364 }, { "epoch": 52.00441014332966, "grad_norm": 3.8404381275177, "learning_rate": 4.798823529411765e-06, "loss": 1.7778, "step": 35385 }, { "epoch": 52.03528114663727, "grad_norm": 1.5809396505355835, "learning_rate": 4.795735294117648e-06, "loss": 2.0306, "step": 35406 }, { "epoch": 52.06615214994488, "grad_norm": 2.772110939025879, "learning_rate": 4.792647058823529e-06, "loss": 1.9704, "step": 35427 }, { "epoch": 52.09702315325248, "grad_norm": 2.978775978088379, "learning_rate": 4.789558823529412e-06, "loss": 1.959, "step": 35448 }, { "epoch": 52.12789415656009, "grad_norm": 4.38999605178833, "learning_rate": 4.786470588235295e-06, "loss": 1.9426, "step": 35469 }, { "epoch": 52.1587651598677, "grad_norm": 5.012101173400879, "learning_rate": 4.783382352941177e-06, "loss": 1.977, "step": 35490 }, { "epoch": 52.1896361631753, "grad_norm": 2.927130937576294, "learning_rate": 4.780294117647059e-06, "loss": 1.8082, "step": 35511 }, { "epoch": 52.22050716648291, "grad_norm": 3.325777292251587, "learning_rate": 4.777205882352941e-06, "loss": 2.0091, "step": 35532 }, { "epoch": 52.25137816979052, "grad_norm": 4.170991897583008, "learning_rate": 4.774117647058824e-06, "loss": 1.7824, "step": 35553 }, { "epoch": 52.28224917309812, "grad_norm": 3.5277774333953857, "learning_rate": 4.771029411764706e-06, "loss": 1.9128, "step": 35574 }, { "epoch": 52.31312017640573, "grad_norm": 3.742974281311035, "learning_rate": 4.767941176470589e-06, "loss": 1.9044, "step": 35595 }, { "epoch": 52.34399117971334, "grad_norm": 3.0582103729248047, "learning_rate": 4.764852941176471e-06, "loss": 1.9125, "step": 35616 }, { "epoch": 52.37486218302095, "grad_norm": 2.5158185958862305, "learning_rate": 4.7617647058823534e-06, "loss": 1.9958, "step": 35637 }, { "epoch": 52.405733186328554, "grad_norm": 3.682742118835449, "learning_rate": 4.758676470588236e-06, "loss": 1.9507, "step": 35658 }, { "epoch": 52.436604189636164, "grad_norm": 3.5912024974823, "learning_rate": 4.755588235294118e-06, "loss": 2.074, "step": 35679 }, { "epoch": 52.46747519294377, "grad_norm": 2.8898019790649414, "learning_rate": 4.752500000000001e-06, "loss": 1.9742, "step": 35700 }, { "epoch": 52.498346196251376, "grad_norm": 3.3909738063812256, "learning_rate": 4.749411764705883e-06, "loss": 1.9988, "step": 35721 }, { "epoch": 52.529217199558985, "grad_norm": 2.7757773399353027, "learning_rate": 4.746323529411765e-06, "loss": 1.993, "step": 35742 }, { "epoch": 52.560088202866595, "grad_norm": 3.9624123573303223, "learning_rate": 4.743235294117647e-06, "loss": 1.9651, "step": 35763 }, { "epoch": 52.5909592061742, "grad_norm": 4.401577472686768, "learning_rate": 4.74014705882353e-06, "loss": 1.9091, "step": 35784 }, { "epoch": 52.62183020948181, "grad_norm": 3.9319536685943604, "learning_rate": 4.737058823529413e-06, "loss": 1.8777, "step": 35805 }, { "epoch": 52.65270121278942, "grad_norm": 5.375467777252197, "learning_rate": 4.733970588235294e-06, "loss": 1.9963, "step": 35826 }, { "epoch": 52.683572216097026, "grad_norm": 3.6134068965911865, "learning_rate": 4.730882352941177e-06, "loss": 1.9192, "step": 35847 }, { "epoch": 52.71444321940463, "grad_norm": 4.300321578979492, "learning_rate": 4.727794117647059e-06, "loss": 1.898, "step": 35868 }, { "epoch": 52.74531422271224, "grad_norm": 3.152383804321289, "learning_rate": 4.7247058823529415e-06, "loss": 1.9342, "step": 35889 }, { "epoch": 52.77618522601985, "grad_norm": 3.267566442489624, "learning_rate": 4.721617647058824e-06, "loss": 1.9179, "step": 35910 }, { "epoch": 52.80705622932745, "grad_norm": 2.878755569458008, "learning_rate": 4.718529411764706e-06, "loss": 1.9737, "step": 35931 }, { "epoch": 52.83792723263506, "grad_norm": 2.21882700920105, "learning_rate": 4.715441176470589e-06, "loss": 1.8573, "step": 35952 }, { "epoch": 52.86879823594267, "grad_norm": 2.64985990524292, "learning_rate": 4.712352941176471e-06, "loss": 2.0294, "step": 35973 }, { "epoch": 52.89966923925027, "grad_norm": 2.81136417388916, "learning_rate": 4.7092647058823535e-06, "loss": 2.0215, "step": 35994 }, { "epoch": 52.93054024255788, "grad_norm": 2.8033177852630615, "learning_rate": 4.706176470588236e-06, "loss": 1.9739, "step": 36015 }, { "epoch": 52.96141124586549, "grad_norm": 2.423895835876465, "learning_rate": 4.703088235294118e-06, "loss": 2.0485, "step": 36036 }, { "epoch": 52.9922822491731, "grad_norm": 3.9042444229125977, "learning_rate": 4.7e-06, "loss": 1.9102, "step": 36057 }, { "epoch": 53.022050716648295, "grad_norm": 3.4533393383026123, "learning_rate": 4.696911764705882e-06, "loss": 1.7891, "step": 36078 }, { "epoch": 53.0529217199559, "grad_norm": 3.4712631702423096, "learning_rate": 4.693823529411765e-06, "loss": 1.9638, "step": 36099 }, { "epoch": 53.08379272326351, "grad_norm": 3.1051712036132812, "learning_rate": 4.690735294117648e-06, "loss": 1.8181, "step": 36120 }, { "epoch": 53.114663726571116, "grad_norm": 3.073343515396118, "learning_rate": 4.6876470588235295e-06, "loss": 1.9173, "step": 36141 }, { "epoch": 53.14553472987872, "grad_norm": 3.660679340362549, "learning_rate": 4.684558823529412e-06, "loss": 1.9243, "step": 36162 }, { "epoch": 53.17640573318633, "grad_norm": 4.310573101043701, "learning_rate": 4.681470588235294e-06, "loss": 1.9849, "step": 36183 }, { "epoch": 53.20727673649394, "grad_norm": 4.682136058807373, "learning_rate": 4.678382352941177e-06, "loss": 1.9463, "step": 36204 }, { "epoch": 53.23814773980154, "grad_norm": 4.2977094650268555, "learning_rate": 4.675294117647059e-06, "loss": 2.0143, "step": 36225 }, { "epoch": 53.26901874310915, "grad_norm": 2.7769463062286377, "learning_rate": 4.6722058823529416e-06, "loss": 1.8413, "step": 36246 }, { "epoch": 53.29988974641676, "grad_norm": 3.718508720397949, "learning_rate": 4.669117647058824e-06, "loss": 1.8502, "step": 36267 }, { "epoch": 53.33076074972437, "grad_norm": 2.9025838375091553, "learning_rate": 4.666029411764706e-06, "loss": 2.0006, "step": 36288 }, { "epoch": 53.36163175303197, "grad_norm": 6.074520111083984, "learning_rate": 4.662941176470589e-06, "loss": 1.9768, "step": 36309 }, { "epoch": 53.39250275633958, "grad_norm": 4.098815441131592, "learning_rate": 4.659852941176471e-06, "loss": 1.7934, "step": 36330 }, { "epoch": 53.42337375964719, "grad_norm": 2.429485559463501, "learning_rate": 4.656764705882354e-06, "loss": 2.0066, "step": 36351 }, { "epoch": 53.454244762954794, "grad_norm": 4.578481674194336, "learning_rate": 4.653676470588236e-06, "loss": 1.9173, "step": 36372 }, { "epoch": 53.4851157662624, "grad_norm": 3.4876160621643066, "learning_rate": 4.6505882352941176e-06, "loss": 1.8886, "step": 36393 }, { "epoch": 53.51598676957001, "grad_norm": 2.275928497314453, "learning_rate": 4.6475e-06, "loss": 1.817, "step": 36414 }, { "epoch": 53.546857772877615, "grad_norm": 3.0920584201812744, "learning_rate": 4.644411764705883e-06, "loss": 1.9928, "step": 36435 }, { "epoch": 53.577728776185225, "grad_norm": 3.430478572845459, "learning_rate": 4.641323529411766e-06, "loss": 2.0196, "step": 36456 }, { "epoch": 53.608599779492835, "grad_norm": 3.47421932220459, "learning_rate": 4.638235294117647e-06, "loss": 1.9754, "step": 36477 }, { "epoch": 53.639470782800444, "grad_norm": 1.8872990608215332, "learning_rate": 4.63514705882353e-06, "loss": 1.9119, "step": 36498 }, { "epoch": 53.67034178610805, "grad_norm": 2.7055604457855225, "learning_rate": 4.632058823529412e-06, "loss": 2.0701, "step": 36519 }, { "epoch": 53.701212789415656, "grad_norm": 3.6024169921875, "learning_rate": 4.628970588235294e-06, "loss": 1.8639, "step": 36540 }, { "epoch": 53.732083792723266, "grad_norm": 4.46739387512207, "learning_rate": 4.625882352941177e-06, "loss": 2.0308, "step": 36561 }, { "epoch": 53.76295479603087, "grad_norm": 4.137654781341553, "learning_rate": 4.622794117647059e-06, "loss": 2.0213, "step": 36582 }, { "epoch": 53.79382579933848, "grad_norm": 1.7619612216949463, "learning_rate": 4.619705882352942e-06, "loss": 2.0797, "step": 36603 }, { "epoch": 53.82469680264609, "grad_norm": 2.980804443359375, "learning_rate": 4.616617647058824e-06, "loss": 1.9786, "step": 36624 }, { "epoch": 53.85556780595369, "grad_norm": 2.1154658794403076, "learning_rate": 4.6135294117647065e-06, "loss": 1.9708, "step": 36645 }, { "epoch": 53.8864388092613, "grad_norm": 1.9343713521957397, "learning_rate": 4.610441176470589e-06, "loss": 1.8764, "step": 36666 }, { "epoch": 53.91730981256891, "grad_norm": 2.7075865268707275, "learning_rate": 4.607352941176471e-06, "loss": 1.9143, "step": 36687 }, { "epoch": 53.94818081587652, "grad_norm": 3.1040847301483154, "learning_rate": 4.604264705882353e-06, "loss": 1.9031, "step": 36708 }, { "epoch": 53.97905181918412, "grad_norm": 3.6179661750793457, "learning_rate": 4.601176470588235e-06, "loss": 1.8764, "step": 36729 }, { "epoch": 54.008820286659315, "grad_norm": 3.5091428756713867, "learning_rate": 4.5980882352941185e-06, "loss": 2.0471, "step": 36750 }, { "epoch": 54.039691289966925, "grad_norm": 2.791682720184326, "learning_rate": 4.595000000000001e-06, "loss": 1.9326, "step": 36771 }, { "epoch": 54.070562293274534, "grad_norm": 2.4163472652435303, "learning_rate": 4.5919117647058825e-06, "loss": 1.9592, "step": 36792 }, { "epoch": 54.10143329658214, "grad_norm": 1.963693380355835, "learning_rate": 4.588823529411765e-06, "loss": 1.9921, "step": 36813 }, { "epoch": 54.132304299889746, "grad_norm": 3.3299829959869385, "learning_rate": 4.585735294117647e-06, "loss": 1.9043, "step": 36834 }, { "epoch": 54.163175303197356, "grad_norm": 3.7524116039276123, "learning_rate": 4.58264705882353e-06, "loss": 1.8945, "step": 36855 }, { "epoch": 54.19404630650496, "grad_norm": 3.4892425537109375, "learning_rate": 4.579558823529412e-06, "loss": 1.9158, "step": 36876 }, { "epoch": 54.22491730981257, "grad_norm": 5.149896621704102, "learning_rate": 4.5764705882352945e-06, "loss": 1.8056, "step": 36897 }, { "epoch": 54.25578831312018, "grad_norm": 3.9396770000457764, "learning_rate": 4.573382352941177e-06, "loss": 1.803, "step": 36918 }, { "epoch": 54.28665931642779, "grad_norm": 3.621070146560669, "learning_rate": 4.570294117647059e-06, "loss": 1.9214, "step": 36939 }, { "epoch": 54.31753031973539, "grad_norm": 2.348787784576416, "learning_rate": 4.567205882352942e-06, "loss": 1.851, "step": 36960 }, { "epoch": 54.348401323043, "grad_norm": 3.901158094406128, "learning_rate": 4.564117647058824e-06, "loss": 1.9585, "step": 36981 }, { "epoch": 54.37927232635061, "grad_norm": 2.5016956329345703, "learning_rate": 4.5610294117647065e-06, "loss": 1.9504, "step": 37002 }, { "epoch": 54.41014332965821, "grad_norm": 4.387479305267334, "learning_rate": 4.557941176470589e-06, "loss": 1.948, "step": 37023 }, { "epoch": 54.44101433296582, "grad_norm": 3.244612693786621, "learning_rate": 4.5548529411764705e-06, "loss": 2.03, "step": 37044 }, { "epoch": 54.47188533627343, "grad_norm": 3.1503806114196777, "learning_rate": 4.551764705882353e-06, "loss": 2.0632, "step": 37065 }, { "epoch": 54.50275633958103, "grad_norm": 3.8207144737243652, "learning_rate": 4.548676470588236e-06, "loss": 1.9433, "step": 37086 }, { "epoch": 54.53362734288864, "grad_norm": 3.643317461013794, "learning_rate": 4.545588235294118e-06, "loss": 1.8954, "step": 37107 }, { "epoch": 54.56449834619625, "grad_norm": 3.173020362854004, "learning_rate": 4.54264705882353e-06, "loss": 1.9466, "step": 37128 }, { "epoch": 54.59536934950386, "grad_norm": 5.066936492919922, "learning_rate": 4.539558823529412e-06, "loss": 1.9832, "step": 37149 }, { "epoch": 54.626240352811465, "grad_norm": 4.483925819396973, "learning_rate": 4.536470588235294e-06, "loss": 2.0182, "step": 37170 }, { "epoch": 54.657111356119074, "grad_norm": 3.2603790760040283, "learning_rate": 4.533382352941177e-06, "loss": 2.003, "step": 37191 }, { "epoch": 54.687982359426684, "grad_norm": 5.078094005584717, "learning_rate": 4.53029411764706e-06, "loss": 1.7985, "step": 37212 }, { "epoch": 54.718853362734286, "grad_norm": 2.782869338989258, "learning_rate": 4.527205882352941e-06, "loss": 1.8923, "step": 37233 }, { "epoch": 54.749724366041896, "grad_norm": 4.915396690368652, "learning_rate": 4.524117647058824e-06, "loss": 1.9798, "step": 37254 }, { "epoch": 54.780595369349506, "grad_norm": 3.0791265964508057, "learning_rate": 4.521029411764706e-06, "loss": 1.8941, "step": 37275 }, { "epoch": 54.81146637265711, "grad_norm": 2.9772324562072754, "learning_rate": 4.5179411764705885e-06, "loss": 1.9651, "step": 37296 }, { "epoch": 54.84233737596472, "grad_norm": 4.081331253051758, "learning_rate": 4.514852941176471e-06, "loss": 1.865, "step": 37317 }, { "epoch": 54.87320837927233, "grad_norm": 2.471484661102295, "learning_rate": 4.511764705882353e-06, "loss": 1.8937, "step": 37338 }, { "epoch": 54.90407938257994, "grad_norm": 2.870105504989624, "learning_rate": 4.508676470588236e-06, "loss": 1.9493, "step": 37359 }, { "epoch": 54.93495038588754, "grad_norm": 4.051716327667236, "learning_rate": 4.505588235294118e-06, "loss": 2.0091, "step": 37380 }, { "epoch": 54.96582138919515, "grad_norm": 3.932248592376709, "learning_rate": 4.5025000000000005e-06, "loss": 2.0556, "step": 37401 }, { "epoch": 54.99669239250276, "grad_norm": 3.1699836254119873, "learning_rate": 4.499411764705883e-06, "loss": 1.9966, "step": 37422 }, { "epoch": 55.02646085997795, "grad_norm": 2.9446067810058594, "learning_rate": 4.496323529411765e-06, "loss": 1.9146, "step": 37443 }, { "epoch": 55.057331863285555, "grad_norm": 3.124001979827881, "learning_rate": 4.493235294117648e-06, "loss": 1.9647, "step": 37464 }, { "epoch": 55.088202866593164, "grad_norm": 2.9898316860198975, "learning_rate": 4.490147058823529e-06, "loss": 1.9038, "step": 37485 }, { "epoch": 55.119073869900774, "grad_norm": 3.717860221862793, "learning_rate": 4.4870588235294126e-06, "loss": 1.8261, "step": 37506 }, { "epoch": 55.149944873208376, "grad_norm": 3.197941780090332, "learning_rate": 4.483970588235295e-06, "loss": 2.007, "step": 37527 }, { "epoch": 55.180815876515986, "grad_norm": 3.062481641769409, "learning_rate": 4.4808823529411765e-06, "loss": 2.0272, "step": 37548 }, { "epoch": 55.211686879823596, "grad_norm": 2.981797218322754, "learning_rate": 4.477794117647059e-06, "loss": 1.9138, "step": 37569 }, { "epoch": 55.242557883131205, "grad_norm": 3.056781530380249, "learning_rate": 4.474705882352941e-06, "loss": 1.9719, "step": 37590 }, { "epoch": 55.27342888643881, "grad_norm": 3.244813919067383, "learning_rate": 4.471617647058824e-06, "loss": 1.9558, "step": 37611 }, { "epoch": 55.30429988974642, "grad_norm": 4.525576114654541, "learning_rate": 4.468529411764706e-06, "loss": 1.9528, "step": 37632 }, { "epoch": 55.33517089305403, "grad_norm": 3.694204568862915, "learning_rate": 4.4654411764705886e-06, "loss": 1.918, "step": 37653 }, { "epoch": 55.36604189636163, "grad_norm": 4.428383827209473, "learning_rate": 4.462352941176471e-06, "loss": 1.9085, "step": 37674 }, { "epoch": 55.39691289966924, "grad_norm": 5.354264736175537, "learning_rate": 4.459264705882353e-06, "loss": 1.9025, "step": 37695 }, { "epoch": 55.42778390297685, "grad_norm": 3.8860256671905518, "learning_rate": 4.456176470588236e-06, "loss": 1.9496, "step": 37716 }, { "epoch": 55.45865490628445, "grad_norm": 2.599255084991455, "learning_rate": 4.453088235294118e-06, "loss": 1.8598, "step": 37737 }, { "epoch": 55.48952590959206, "grad_norm": 4.093317985534668, "learning_rate": 4.450000000000001e-06, "loss": 1.9042, "step": 37758 }, { "epoch": 55.52039691289967, "grad_norm": 3.939258575439453, "learning_rate": 4.446911764705883e-06, "loss": 2.0369, "step": 37779 }, { "epoch": 55.55126791620728, "grad_norm": 3.320251941680908, "learning_rate": 4.4438235294117646e-06, "loss": 1.9366, "step": 37800 }, { "epoch": 55.58213891951488, "grad_norm": 2.927976608276367, "learning_rate": 4.440735294117647e-06, "loss": 2.0188, "step": 37821 }, { "epoch": 55.61300992282249, "grad_norm": 2.6783721446990967, "learning_rate": 4.43764705882353e-06, "loss": 2.0921, "step": 37842 }, { "epoch": 55.6438809261301, "grad_norm": 2.3309595584869385, "learning_rate": 4.434558823529413e-06, "loss": 1.847, "step": 37863 }, { "epoch": 55.674751929437704, "grad_norm": 5.241121292114258, "learning_rate": 4.431470588235294e-06, "loss": 1.9419, "step": 37884 }, { "epoch": 55.705622932745314, "grad_norm": 2.1193554401397705, "learning_rate": 4.428382352941177e-06, "loss": 1.9463, "step": 37905 }, { "epoch": 55.73649393605292, "grad_norm": 3.8681976795196533, "learning_rate": 4.425294117647059e-06, "loss": 1.9207, "step": 37926 }, { "epoch": 55.767364939360526, "grad_norm": 4.109067916870117, "learning_rate": 4.422205882352941e-06, "loss": 1.931, "step": 37947 }, { "epoch": 55.798235942668136, "grad_norm": 2.3517215251922607, "learning_rate": 4.419117647058824e-06, "loss": 1.92, "step": 37968 }, { "epoch": 55.829106945975745, "grad_norm": 2.1322543621063232, "learning_rate": 4.416029411764706e-06, "loss": 1.9275, "step": 37989 }, { "epoch": 55.859977949283355, "grad_norm": 4.192801475524902, "learning_rate": 4.412941176470589e-06, "loss": 1.8114, "step": 38010 }, { "epoch": 55.89084895259096, "grad_norm": 3.3966455459594727, "learning_rate": 4.409852941176471e-06, "loss": 1.8758, "step": 38031 }, { "epoch": 55.92171995589857, "grad_norm": 3.132643461227417, "learning_rate": 4.4067647058823534e-06, "loss": 1.9544, "step": 38052 }, { "epoch": 55.95259095920618, "grad_norm": 4.393044471740723, "learning_rate": 4.403676470588236e-06, "loss": 1.8488, "step": 38073 }, { "epoch": 55.98346196251378, "grad_norm": 4.053401470184326, "learning_rate": 4.400588235294118e-06, "loss": 1.9849, "step": 38094 }, { "epoch": 56.01323042998897, "grad_norm": 4.1462249755859375, "learning_rate": 4.3975e-06, "loss": 2.0014, "step": 38115 }, { "epoch": 56.04410143329658, "grad_norm": 2.3967742919921875, "learning_rate": 4.394411764705882e-06, "loss": 2.0125, "step": 38136 }, { "epoch": 56.07497243660419, "grad_norm": 3.8294060230255127, "learning_rate": 4.3913235294117655e-06, "loss": 1.944, "step": 38157 }, { "epoch": 56.105843439911794, "grad_norm": 5.490301132202148, "learning_rate": 4.388235294117648e-06, "loss": 2.0661, "step": 38178 }, { "epoch": 56.136714443219404, "grad_norm": 3.2286152839660645, "learning_rate": 4.3851470588235295e-06, "loss": 1.9327, "step": 38199 }, { "epoch": 56.16758544652701, "grad_norm": 3.571922540664673, "learning_rate": 4.382058823529412e-06, "loss": 1.8333, "step": 38220 }, { "epoch": 56.19845644983462, "grad_norm": 3.8803212642669678, "learning_rate": 4.378970588235294e-06, "loss": 1.9068, "step": 38241 }, { "epoch": 56.229327453142226, "grad_norm": 2.458409309387207, "learning_rate": 4.375882352941177e-06, "loss": 2.0097, "step": 38262 }, { "epoch": 56.260198456449835, "grad_norm": 3.675710916519165, "learning_rate": 4.372794117647059e-06, "loss": 1.8628, "step": 38283 }, { "epoch": 56.291069459757445, "grad_norm": 3.0392580032348633, "learning_rate": 4.3697058823529415e-06, "loss": 1.94, "step": 38304 }, { "epoch": 56.32194046306505, "grad_norm": 3.6308279037475586, "learning_rate": 4.366617647058824e-06, "loss": 1.847, "step": 38325 }, { "epoch": 56.35281146637266, "grad_norm": 3.37184739112854, "learning_rate": 4.363529411764706e-06, "loss": 2.0059, "step": 38346 }, { "epoch": 56.38368246968027, "grad_norm": 1.8004812002182007, "learning_rate": 4.360441176470589e-06, "loss": 1.9427, "step": 38367 }, { "epoch": 56.41455347298787, "grad_norm": 3.9168221950531006, "learning_rate": 4.357352941176471e-06, "loss": 2.0062, "step": 38388 }, { "epoch": 56.44542447629548, "grad_norm": 5.35752010345459, "learning_rate": 4.3542647058823535e-06, "loss": 1.911, "step": 38409 }, { "epoch": 56.47629547960309, "grad_norm": 3.8705921173095703, "learning_rate": 4.351176470588236e-06, "loss": 1.9051, "step": 38430 }, { "epoch": 56.5071664829107, "grad_norm": 3.723825454711914, "learning_rate": 4.3480882352941175e-06, "loss": 1.8067, "step": 38451 }, { "epoch": 56.5380374862183, "grad_norm": 4.114817142486572, "learning_rate": 4.345000000000001e-06, "loss": 2.0221, "step": 38472 }, { "epoch": 56.56890848952591, "grad_norm": 4.313055038452148, "learning_rate": 4.341911764705883e-06, "loss": 1.8952, "step": 38493 }, { "epoch": 56.59977949283352, "grad_norm": 3.1276845932006836, "learning_rate": 4.3388235294117656e-06, "loss": 2.0535, "step": 38514 }, { "epoch": 56.63065049614112, "grad_norm": 2.0817196369171143, "learning_rate": 4.335735294117647e-06, "loss": 1.9894, "step": 38535 }, { "epoch": 56.66152149944873, "grad_norm": 3.0000627040863037, "learning_rate": 4.3326470588235295e-06, "loss": 1.8941, "step": 38556 }, { "epoch": 56.69239250275634, "grad_norm": 2.6866979598999023, "learning_rate": 4.329558823529412e-06, "loss": 1.9607, "step": 38577 }, { "epoch": 56.723263506063944, "grad_norm": 3.723560094833374, "learning_rate": 4.326470588235294e-06, "loss": 2.0017, "step": 38598 }, { "epoch": 56.75413450937155, "grad_norm": 3.982632875442505, "learning_rate": 4.323382352941177e-06, "loss": 1.9856, "step": 38619 }, { "epoch": 56.78500551267916, "grad_norm": 3.339752674102783, "learning_rate": 4.320294117647059e-06, "loss": 1.8981, "step": 38640 }, { "epoch": 56.81587651598677, "grad_norm": 3.27858304977417, "learning_rate": 4.3172058823529416e-06, "loss": 1.9082, "step": 38661 }, { "epoch": 56.846747519294375, "grad_norm": 3.8039615154266357, "learning_rate": 4.314117647058824e-06, "loss": 1.982, "step": 38682 }, { "epoch": 56.877618522601985, "grad_norm": 2.587460994720459, "learning_rate": 4.311029411764706e-06, "loss": 1.8495, "step": 38703 }, { "epoch": 56.908489525909594, "grad_norm": 2.8484318256378174, "learning_rate": 4.307941176470589e-06, "loss": 2.0169, "step": 38724 }, { "epoch": 56.9393605292172, "grad_norm": 3.948993682861328, "learning_rate": 4.304852941176471e-06, "loss": 1.9476, "step": 38745 }, { "epoch": 56.97023153252481, "grad_norm": 3.450148344039917, "learning_rate": 4.301764705882353e-06, "loss": 2.0306, "step": 38766 }, { "epoch": 57.0, "grad_norm": 0.47871342301368713, "learning_rate": 4.298676470588235e-06, "loss": 1.856, "step": 38787 }, { "epoch": 57.03087100330761, "grad_norm": 3.2402782440185547, "learning_rate": 4.295588235294118e-06, "loss": 2.044, "step": 38808 }, { "epoch": 57.06174200661521, "grad_norm": 3.5660297870635986, "learning_rate": 4.292500000000001e-06, "loss": 1.8795, "step": 38829 }, { "epoch": 57.09261300992282, "grad_norm": 2.8574061393737793, "learning_rate": 4.289411764705882e-06, "loss": 1.8981, "step": 38850 }, { "epoch": 57.12348401323043, "grad_norm": 2.725797176361084, "learning_rate": 4.286323529411765e-06, "loss": 1.9229, "step": 38871 }, { "epoch": 57.15435501653804, "grad_norm": 2.8375120162963867, "learning_rate": 4.283235294117647e-06, "loss": 1.8641, "step": 38892 }, { "epoch": 57.18522601984564, "grad_norm": 3.355712413787842, "learning_rate": 4.28014705882353e-06, "loss": 1.9563, "step": 38913 }, { "epoch": 57.21609702315325, "grad_norm": 3.5365352630615234, "learning_rate": 4.277058823529412e-06, "loss": 1.9597, "step": 38934 }, { "epoch": 57.24696802646086, "grad_norm": 4.2869696617126465, "learning_rate": 4.2739705882352944e-06, "loss": 1.9767, "step": 38955 }, { "epoch": 57.277839029768465, "grad_norm": 2.8834586143493652, "learning_rate": 4.270882352941177e-06, "loss": 1.8889, "step": 38976 }, { "epoch": 57.308710033076075, "grad_norm": 3.464279890060425, "learning_rate": 4.267794117647059e-06, "loss": 1.9478, "step": 38997 }, { "epoch": 57.339581036383684, "grad_norm": 2.4342877864837646, "learning_rate": 4.264705882352942e-06, "loss": 1.8429, "step": 39018 }, { "epoch": 57.37045203969129, "grad_norm": 2.6994986534118652, "learning_rate": 4.261617647058824e-06, "loss": 1.9564, "step": 39039 }, { "epoch": 57.4013230429989, "grad_norm": 4.203607559204102, "learning_rate": 4.2585294117647065e-06, "loss": 1.9516, "step": 39060 }, { "epoch": 57.432194046306506, "grad_norm": 2.9425482749938965, "learning_rate": 4.255441176470589e-06, "loss": 1.8634, "step": 39081 }, { "epoch": 57.463065049614116, "grad_norm": 3.1877076625823975, "learning_rate": 4.2523529411764704e-06, "loss": 1.8748, "step": 39102 }, { "epoch": 57.49393605292172, "grad_norm": 4.445103645324707, "learning_rate": 4.249264705882354e-06, "loss": 1.9667, "step": 39123 }, { "epoch": 57.52480705622933, "grad_norm": 2.6126720905303955, "learning_rate": 4.246176470588236e-06, "loss": 1.8831, "step": 39144 }, { "epoch": 57.55567805953694, "grad_norm": 9.893830299377441, "learning_rate": 4.243088235294118e-06, "loss": 1.9223, "step": 39165 }, { "epoch": 57.58654906284454, "grad_norm": 4.020883083343506, "learning_rate": 4.24e-06, "loss": 1.929, "step": 39186 }, { "epoch": 57.61742006615215, "grad_norm": 3.605193614959717, "learning_rate": 4.2369117647058825e-06, "loss": 1.9813, "step": 39207 }, { "epoch": 57.64829106945976, "grad_norm": 3.3393421173095703, "learning_rate": 4.233823529411765e-06, "loss": 1.9114, "step": 39228 }, { "epoch": 57.67916207276736, "grad_norm": 2.7481040954589844, "learning_rate": 4.230735294117647e-06, "loss": 1.9614, "step": 39249 }, { "epoch": 57.71003307607497, "grad_norm": 3.0133328437805176, "learning_rate": 4.22764705882353e-06, "loss": 2.009, "step": 39270 }, { "epoch": 57.74090407938258, "grad_norm": 3.820744037628174, "learning_rate": 4.224705882352941e-06, "loss": 1.875, "step": 39291 }, { "epoch": 57.77177508269019, "grad_norm": 2.75616455078125, "learning_rate": 4.221617647058824e-06, "loss": 1.8574, "step": 39312 }, { "epoch": 57.80264608599779, "grad_norm": 4.997870922088623, "learning_rate": 4.218529411764706e-06, "loss": 1.9301, "step": 39333 }, { "epoch": 57.8335170893054, "grad_norm": 3.361882448196411, "learning_rate": 4.215441176470588e-06, "loss": 2.0114, "step": 39354 }, { "epoch": 57.86438809261301, "grad_norm": 5.0470051765441895, "learning_rate": 4.212352941176471e-06, "loss": 1.8796, "step": 39375 }, { "epoch": 57.895259095920615, "grad_norm": 3.978750705718994, "learning_rate": 4.209264705882353e-06, "loss": 1.8721, "step": 39396 }, { "epoch": 57.926130099228224, "grad_norm": 3.3868422508239746, "learning_rate": 4.206176470588236e-06, "loss": 2.0328, "step": 39417 }, { "epoch": 57.957001102535834, "grad_norm": 3.9059574604034424, "learning_rate": 4.203088235294118e-06, "loss": 1.9607, "step": 39438 }, { "epoch": 57.98787210584344, "grad_norm": 4.466953754425049, "learning_rate": 4.2000000000000004e-06, "loss": 2.0104, "step": 39459 }, { "epoch": 58.01764057331863, "grad_norm": 2.8705430030822754, "learning_rate": 4.196911764705883e-06, "loss": 1.9083, "step": 39480 }, { "epoch": 58.04851157662624, "grad_norm": 6.494654655456543, "learning_rate": 4.193823529411765e-06, "loss": 1.8572, "step": 39501 }, { "epoch": 58.07938257993385, "grad_norm": 2.6242873668670654, "learning_rate": 4.190735294117648e-06, "loss": 1.9564, "step": 39522 }, { "epoch": 58.11025358324146, "grad_norm": 2.3304924964904785, "learning_rate": 4.187647058823529e-06, "loss": 1.9432, "step": 39543 }, { "epoch": 58.14112458654906, "grad_norm": 3.651055335998535, "learning_rate": 4.1845588235294125e-06, "loss": 2.0734, "step": 39564 }, { "epoch": 58.17199558985667, "grad_norm": 3.2254247665405273, "learning_rate": 4.181470588235295e-06, "loss": 1.9215, "step": 39585 }, { "epoch": 58.20286659316428, "grad_norm": 5.1258673667907715, "learning_rate": 4.1783823529411765e-06, "loss": 1.9546, "step": 39606 }, { "epoch": 58.23373759647188, "grad_norm": 3.5844221115112305, "learning_rate": 4.175294117647059e-06, "loss": 1.9459, "step": 39627 }, { "epoch": 58.26460859977949, "grad_norm": 2.6659371852874756, "learning_rate": 4.172205882352941e-06, "loss": 1.928, "step": 39648 }, { "epoch": 58.2954796030871, "grad_norm": 2.1783547401428223, "learning_rate": 4.169117647058824e-06, "loss": 1.9072, "step": 39669 }, { "epoch": 58.326350606394705, "grad_norm": 4.000891208648682, "learning_rate": 4.166029411764706e-06, "loss": 1.9695, "step": 39690 }, { "epoch": 58.357221609702314, "grad_norm": 3.7255136966705322, "learning_rate": 4.1629411764705885e-06, "loss": 1.9389, "step": 39711 }, { "epoch": 58.388092613009924, "grad_norm": 2.2299540042877197, "learning_rate": 4.159852941176471e-06, "loss": 1.9481, "step": 39732 }, { "epoch": 58.418963616317534, "grad_norm": 3.9022533893585205, "learning_rate": 4.156764705882353e-06, "loss": 1.9535, "step": 39753 }, { "epoch": 58.449834619625136, "grad_norm": 3.5944011211395264, "learning_rate": 4.153676470588236e-06, "loss": 1.9841, "step": 39774 }, { "epoch": 58.480705622932746, "grad_norm": 3.4860410690307617, "learning_rate": 4.150588235294118e-06, "loss": 1.949, "step": 39795 }, { "epoch": 58.511576626240355, "grad_norm": 4.037249565124512, "learning_rate": 4.1475000000000005e-06, "loss": 1.9251, "step": 39816 }, { "epoch": 58.54244762954796, "grad_norm": 3.6486995220184326, "learning_rate": 4.144411764705883e-06, "loss": 1.8424, "step": 39837 }, { "epoch": 58.57331863285557, "grad_norm": 3.6549296379089355, "learning_rate": 4.1413235294117645e-06, "loss": 2.0349, "step": 39858 }, { "epoch": 58.60418963616318, "grad_norm": 1.733698844909668, "learning_rate": 4.138235294117648e-06, "loss": 1.9231, "step": 39879 }, { "epoch": 58.63506063947078, "grad_norm": 3.8009002208709717, "learning_rate": 4.13514705882353e-06, "loss": 1.9204, "step": 39900 }, { "epoch": 58.66593164277839, "grad_norm": 3.079911947250366, "learning_rate": 4.1320588235294126e-06, "loss": 1.8451, "step": 39921 }, { "epoch": 58.696802646086, "grad_norm": 3.641172409057617, "learning_rate": 4.128970588235294e-06, "loss": 1.9422, "step": 39942 }, { "epoch": 58.72767364939361, "grad_norm": 4.283759593963623, "learning_rate": 4.1258823529411765e-06, "loss": 1.9329, "step": 39963 }, { "epoch": 58.75854465270121, "grad_norm": 4.364279270172119, "learning_rate": 4.122794117647059e-06, "loss": 1.9049, "step": 39984 }, { "epoch": 58.78941565600882, "grad_norm": 4.069849491119385, "learning_rate": 4.119705882352942e-06, "loss": 1.9753, "step": 40005 }, { "epoch": 58.82028665931643, "grad_norm": 3.208591938018799, "learning_rate": 4.116617647058824e-06, "loss": 1.8429, "step": 40026 }, { "epoch": 58.85115766262403, "grad_norm": 3.662536859512329, "learning_rate": 4.113529411764706e-06, "loss": 2.0094, "step": 40047 }, { "epoch": 58.88202866593164, "grad_norm": 2.7158749103546143, "learning_rate": 4.1104411764705886e-06, "loss": 1.8566, "step": 40068 }, { "epoch": 58.91289966923925, "grad_norm": 2.6393043994903564, "learning_rate": 4.107352941176471e-06, "loss": 1.9133, "step": 40089 }, { "epoch": 58.943770672546854, "grad_norm": 2.2021429538726807, "learning_rate": 4.104264705882353e-06, "loss": 1.8988, "step": 40110 }, { "epoch": 58.974641675854464, "grad_norm": 4.174811363220215, "learning_rate": 4.101176470588236e-06, "loss": 1.9009, "step": 40131 }, { "epoch": 59.00441014332966, "grad_norm": 2.2998969554901123, "learning_rate": 4.098088235294118e-06, "loss": 1.8199, "step": 40152 }, { "epoch": 59.03528114663727, "grad_norm": 3.7070798873901367, "learning_rate": 4.095e-06, "loss": 1.9728, "step": 40173 }, { "epoch": 59.06615214994488, "grad_norm": 4.332019805908203, "learning_rate": 4.091911764705882e-06, "loss": 1.898, "step": 40194 }, { "epoch": 59.09702315325248, "grad_norm": 3.6003835201263428, "learning_rate": 4.088823529411765e-06, "loss": 1.8593, "step": 40215 }, { "epoch": 59.12789415656009, "grad_norm": 3.1340019702911377, "learning_rate": 4.085735294117648e-06, "loss": 2.0198, "step": 40236 }, { "epoch": 59.1587651598677, "grad_norm": 3.1857857704162598, "learning_rate": 4.082647058823529e-06, "loss": 1.9584, "step": 40257 }, { "epoch": 59.1896361631753, "grad_norm": 2.4936697483062744, "learning_rate": 4.079558823529412e-06, "loss": 2.0068, "step": 40278 }, { "epoch": 59.22050716648291, "grad_norm": 2.369525194168091, "learning_rate": 4.076470588235294e-06, "loss": 1.8508, "step": 40299 }, { "epoch": 59.25137816979052, "grad_norm": 4.389956474304199, "learning_rate": 4.073382352941177e-06, "loss": 1.8506, "step": 40320 }, { "epoch": 59.28224917309812, "grad_norm": 4.037673473358154, "learning_rate": 4.070294117647059e-06, "loss": 1.9152, "step": 40341 }, { "epoch": 59.31312017640573, "grad_norm": 2.122819423675537, "learning_rate": 4.067205882352941e-06, "loss": 1.9359, "step": 40362 }, { "epoch": 59.34399117971334, "grad_norm": 3.4233779907226562, "learning_rate": 4.064117647058824e-06, "loss": 1.8605, "step": 40383 }, { "epoch": 59.37486218302095, "grad_norm": 4.786858558654785, "learning_rate": 4.061029411764706e-06, "loss": 2.0092, "step": 40404 }, { "epoch": 59.405733186328554, "grad_norm": 4.765273094177246, "learning_rate": 4.057941176470589e-06, "loss": 1.9166, "step": 40425 }, { "epoch": 59.436604189636164, "grad_norm": 4.306839942932129, "learning_rate": 4.054852941176471e-06, "loss": 2.0227, "step": 40446 }, { "epoch": 59.46747519294377, "grad_norm": 3.4277589321136475, "learning_rate": 4.0517647058823535e-06, "loss": 2.0131, "step": 40467 }, { "epoch": 59.498346196251376, "grad_norm": 4.447136402130127, "learning_rate": 4.048676470588236e-06, "loss": 1.9436, "step": 40488 }, { "epoch": 59.529217199558985, "grad_norm": 3.223224639892578, "learning_rate": 4.0455882352941174e-06, "loss": 1.9304, "step": 40509 }, { "epoch": 59.560088202866595, "grad_norm": 4.869384765625, "learning_rate": 4.042500000000001e-06, "loss": 1.931, "step": 40530 }, { "epoch": 59.5909592061742, "grad_norm": 3.169881582260132, "learning_rate": 4.039411764705883e-06, "loss": 2.0494, "step": 40551 }, { "epoch": 59.62183020948181, "grad_norm": 3.949069023132324, "learning_rate": 4.0363235294117655e-06, "loss": 2.0236, "step": 40572 }, { "epoch": 59.65270121278942, "grad_norm": 3.799015760421753, "learning_rate": 4.033235294117647e-06, "loss": 1.9279, "step": 40593 }, { "epoch": 59.683572216097026, "grad_norm": 3.3265721797943115, "learning_rate": 4.0301470588235295e-06, "loss": 1.8821, "step": 40614 }, { "epoch": 59.71444321940463, "grad_norm": 2.7146968841552734, "learning_rate": 4.027058823529412e-06, "loss": 1.8791, "step": 40635 }, { "epoch": 59.74531422271224, "grad_norm": 4.244442939758301, "learning_rate": 4.023970588235294e-06, "loss": 1.8035, "step": 40656 }, { "epoch": 59.77618522601985, "grad_norm": 4.381660461425781, "learning_rate": 4.020882352941177e-06, "loss": 1.9229, "step": 40677 }, { "epoch": 59.80705622932745, "grad_norm": 2.615638494491577, "learning_rate": 4.017794117647059e-06, "loss": 1.9625, "step": 40698 }, { "epoch": 59.83792723263506, "grad_norm": 5.6237406730651855, "learning_rate": 4.0147058823529415e-06, "loss": 1.913, "step": 40719 }, { "epoch": 59.86879823594267, "grad_norm": 4.14906120300293, "learning_rate": 4.011617647058824e-06, "loss": 2.0229, "step": 40740 }, { "epoch": 59.89966923925027, "grad_norm": 3.3856656551361084, "learning_rate": 4.008529411764706e-06, "loss": 1.9636, "step": 40761 }, { "epoch": 59.93054024255788, "grad_norm": 4.339720726013184, "learning_rate": 4.005441176470589e-06, "loss": 1.8363, "step": 40782 }, { "epoch": 59.96141124586549, "grad_norm": 4.352692604064941, "learning_rate": 4.002352941176471e-06, "loss": 1.8805, "step": 40803 }, { "epoch": 59.9922822491731, "grad_norm": 2.7439634799957275, "learning_rate": 3.999264705882353e-06, "loss": 1.8999, "step": 40824 }, { "epoch": 60.022050716648295, "grad_norm": 2.715585470199585, "learning_rate": 3.996176470588236e-06, "loss": 1.7619, "step": 40845 }, { "epoch": 60.0529217199559, "grad_norm": 2.4881930351257324, "learning_rate": 3.993088235294118e-06, "loss": 1.8955, "step": 40866 }, { "epoch": 60.08379272326351, "grad_norm": 2.317598581314087, "learning_rate": 3.990000000000001e-06, "loss": 1.8934, "step": 40887 }, { "epoch": 60.114663726571116, "grad_norm": 3.728177070617676, "learning_rate": 3.986911764705882e-06, "loss": 1.9266, "step": 40908 }, { "epoch": 60.14553472987872, "grad_norm": 4.414181709289551, "learning_rate": 3.983823529411765e-06, "loss": 1.9611, "step": 40929 }, { "epoch": 60.17640573318633, "grad_norm": 4.093717098236084, "learning_rate": 3.980735294117647e-06, "loss": 2.0178, "step": 40950 }, { "epoch": 60.20727673649394, "grad_norm": 3.0389323234558105, "learning_rate": 3.97764705882353e-06, "loss": 1.8968, "step": 40971 }, { "epoch": 60.23814773980154, "grad_norm": 3.598341703414917, "learning_rate": 3.974558823529412e-06, "loss": 1.9444, "step": 40992 }, { "epoch": 60.26901874310915, "grad_norm": 3.166620969772339, "learning_rate": 3.971470588235294e-06, "loss": 1.9725, "step": 41013 }, { "epoch": 60.29988974641676, "grad_norm": 3.8962793350219727, "learning_rate": 3.968382352941177e-06, "loss": 1.9449, "step": 41034 }, { "epoch": 60.33076074972437, "grad_norm": 3.549582004547119, "learning_rate": 3.965294117647059e-06, "loss": 1.9307, "step": 41055 }, { "epoch": 60.36163175303197, "grad_norm": 3.9438445568084717, "learning_rate": 3.962205882352942e-06, "loss": 1.7265, "step": 41076 }, { "epoch": 60.39250275633958, "grad_norm": 3.4334535598754883, "learning_rate": 3.959117647058824e-06, "loss": 2.1025, "step": 41097 }, { "epoch": 60.42337375964719, "grad_norm": 3.3093438148498535, "learning_rate": 3.956029411764706e-06, "loss": 1.9701, "step": 41118 }, { "epoch": 60.454244762954794, "grad_norm": 3.4064624309539795, "learning_rate": 3.952941176470588e-06, "loss": 1.9215, "step": 41139 }, { "epoch": 60.4851157662624, "grad_norm": 3.350752115249634, "learning_rate": 3.94985294117647e-06, "loss": 1.8258, "step": 41160 }, { "epoch": 60.51598676957001, "grad_norm": 2.869331121444702, "learning_rate": 3.946764705882354e-06, "loss": 1.9859, "step": 41181 }, { "epoch": 60.546857772877615, "grad_norm": 3.6472251415252686, "learning_rate": 3.943676470588236e-06, "loss": 1.9357, "step": 41202 }, { "epoch": 60.577728776185225, "grad_norm": 2.437091588973999, "learning_rate": 3.940588235294118e-06, "loss": 1.9193, "step": 41223 }, { "epoch": 60.608599779492835, "grad_norm": 2.6356356143951416, "learning_rate": 3.9375e-06, "loss": 1.9248, "step": 41244 }, { "epoch": 60.639470782800444, "grad_norm": 3.534963607788086, "learning_rate": 3.934411764705882e-06, "loss": 1.9316, "step": 41265 }, { "epoch": 60.67034178610805, "grad_norm": 3.524622917175293, "learning_rate": 3.931323529411765e-06, "loss": 1.925, "step": 41286 }, { "epoch": 60.701212789415656, "grad_norm": 2.3731114864349365, "learning_rate": 3.928235294117647e-06, "loss": 1.9684, "step": 41307 }, { "epoch": 60.732083792723266, "grad_norm": 3.07844877243042, "learning_rate": 3.92514705882353e-06, "loss": 1.8267, "step": 41328 }, { "epoch": 60.76295479603087, "grad_norm": 2.6213393211364746, "learning_rate": 3.922058823529412e-06, "loss": 2.0695, "step": 41349 }, { "epoch": 60.79382579933848, "grad_norm": 3.1743831634521484, "learning_rate": 3.9189705882352944e-06, "loss": 1.9629, "step": 41370 }, { "epoch": 60.82469680264609, "grad_norm": 2.5164220333099365, "learning_rate": 3.915882352941177e-06, "loss": 1.9924, "step": 41391 }, { "epoch": 60.85556780595369, "grad_norm": 3.796934127807617, "learning_rate": 3.912794117647059e-06, "loss": 2.0357, "step": 41412 }, { "epoch": 60.8864388092613, "grad_norm": 3.6090779304504395, "learning_rate": 3.909705882352942e-06, "loss": 1.991, "step": 41433 }, { "epoch": 60.91730981256891, "grad_norm": 2.6030068397521973, "learning_rate": 3.906617647058824e-06, "loss": 1.95, "step": 41454 }, { "epoch": 60.94818081587652, "grad_norm": 4.1402812004089355, "learning_rate": 3.903529411764706e-06, "loss": 1.9394, "step": 41475 }, { "epoch": 60.97905181918412, "grad_norm": 2.181687831878662, "learning_rate": 3.900441176470589e-06, "loss": 1.9234, "step": 41496 }, { "epoch": 61.008820286659315, "grad_norm": 1.9509862661361694, "learning_rate": 3.897352941176471e-06, "loss": 1.8729, "step": 41517 }, { "epoch": 61.039691289966925, "grad_norm": 2.7003443241119385, "learning_rate": 3.894264705882354e-06, "loss": 1.9133, "step": 41538 }, { "epoch": 61.070562293274534, "grad_norm": 3.7679860591888428, "learning_rate": 3.891176470588235e-06, "loss": 1.9412, "step": 41559 }, { "epoch": 61.10143329658214, "grad_norm": 3.2010719776153564, "learning_rate": 3.888088235294118e-06, "loss": 2.0401, "step": 41580 }, { "epoch": 61.132304299889746, "grad_norm": 3.0256950855255127, "learning_rate": 3.885e-06, "loss": 1.9308, "step": 41601 }, { "epoch": 61.163175303197356, "grad_norm": 3.4648752212524414, "learning_rate": 3.881911764705883e-06, "loss": 1.9297, "step": 41622 }, { "epoch": 61.19404630650496, "grad_norm": 2.3592138290405273, "learning_rate": 3.878823529411765e-06, "loss": 1.8122, "step": 41643 }, { "epoch": 61.22491730981257, "grad_norm": 3.0630240440368652, "learning_rate": 3.875735294117647e-06, "loss": 1.8488, "step": 41664 }, { "epoch": 61.25578831312018, "grad_norm": 5.008218288421631, "learning_rate": 3.87264705882353e-06, "loss": 1.9932, "step": 41685 }, { "epoch": 61.28665931642779, "grad_norm": 2.6968369483947754, "learning_rate": 3.869558823529412e-06, "loss": 1.9082, "step": 41706 }, { "epoch": 61.31753031973539, "grad_norm": 4.477757453918457, "learning_rate": 3.8664705882352945e-06, "loss": 1.9036, "step": 41727 }, { "epoch": 61.348401323043, "grad_norm": 3.729843854904175, "learning_rate": 3.863382352941177e-06, "loss": 1.909, "step": 41748 }, { "epoch": 61.37927232635061, "grad_norm": 3.1282410621643066, "learning_rate": 3.860294117647059e-06, "loss": 1.8379, "step": 41769 }, { "epoch": 61.41014332965821, "grad_norm": 3.1452572345733643, "learning_rate": 3.857205882352941e-06, "loss": 2.0447, "step": 41790 }, { "epoch": 61.44101433296582, "grad_norm": 4.679071426391602, "learning_rate": 3.854117647058824e-06, "loss": 1.9804, "step": 41811 }, { "epoch": 61.47188533627343, "grad_norm": 2.528411626815796, "learning_rate": 3.8510294117647066e-06, "loss": 1.9449, "step": 41832 }, { "epoch": 61.50275633958103, "grad_norm": 6.531820774078369, "learning_rate": 3.847941176470589e-06, "loss": 1.9072, "step": 41853 }, { "epoch": 61.53362734288864, "grad_norm": 4.075747489929199, "learning_rate": 3.8448529411764705e-06, "loss": 1.8394, "step": 41874 }, { "epoch": 61.56449834619625, "grad_norm": 4.023922443389893, "learning_rate": 3.841764705882353e-06, "loss": 1.9253, "step": 41895 }, { "epoch": 61.59536934950386, "grad_norm": 2.489851474761963, "learning_rate": 3.838676470588235e-06, "loss": 1.8775, "step": 41916 }, { "epoch": 61.626240352811465, "grad_norm": 2.486384391784668, "learning_rate": 3.835588235294118e-06, "loss": 1.9717, "step": 41937 }, { "epoch": 61.657111356119074, "grad_norm": 3.7841453552246094, "learning_rate": 3.8325e-06, "loss": 2.0282, "step": 41958 }, { "epoch": 61.687982359426684, "grad_norm": 3.1523964405059814, "learning_rate": 3.8294117647058826e-06, "loss": 1.7949, "step": 41979 }, { "epoch": 61.718853362734286, "grad_norm": 4.808964729309082, "learning_rate": 3.826323529411765e-06, "loss": 2.0045, "step": 42000 }, { "epoch": 61.749724366041896, "grad_norm": 3.5830745697021484, "learning_rate": 3.823235294117647e-06, "loss": 1.9259, "step": 42021 }, { "epoch": 61.780595369349506, "grad_norm": 2.7013514041900635, "learning_rate": 3.82014705882353e-06, "loss": 2.0325, "step": 42042 }, { "epoch": 61.81146637265711, "grad_norm": 3.859224319458008, "learning_rate": 3.817058823529412e-06, "loss": 1.8894, "step": 42063 }, { "epoch": 61.84233737596472, "grad_norm": 3.401679039001465, "learning_rate": 3.8139705882352946e-06, "loss": 2.0425, "step": 42084 }, { "epoch": 61.87320837927233, "grad_norm": 1.9202418327331543, "learning_rate": 3.810882352941177e-06, "loss": 1.8394, "step": 42105 }, { "epoch": 61.90407938257994, "grad_norm": 7.001397609710693, "learning_rate": 3.807794117647059e-06, "loss": 1.8209, "step": 42126 }, { "epoch": 61.93495038588754, "grad_norm": 2.94620418548584, "learning_rate": 3.8047058823529414e-06, "loss": 1.904, "step": 42147 }, { "epoch": 61.96582138919515, "grad_norm": 3.4998152256011963, "learning_rate": 3.801617647058824e-06, "loss": 1.8276, "step": 42168 }, { "epoch": 61.99669239250276, "grad_norm": 3.2533297538757324, "learning_rate": 3.7985294117647066e-06, "loss": 1.9442, "step": 42189 }, { "epoch": 62.02646085997795, "grad_norm": 3.0139052867889404, "learning_rate": 3.7954411764705886e-06, "loss": 1.9434, "step": 42210 }, { "epoch": 62.057331863285555, "grad_norm": 3.4162235260009766, "learning_rate": 3.792352941176471e-06, "loss": 1.9782, "step": 42231 }, { "epoch": 62.088202866593164, "grad_norm": 3.0147082805633545, "learning_rate": 3.7892647058823534e-06, "loss": 2.0135, "step": 42252 }, { "epoch": 62.119073869900774, "grad_norm": 3.0080668926239014, "learning_rate": 3.786323529411765e-06, "loss": 1.8325, "step": 42273 }, { "epoch": 62.149944873208376, "grad_norm": 4.046768665313721, "learning_rate": 3.7832352941176478e-06, "loss": 1.9708, "step": 42294 }, { "epoch": 62.180815876515986, "grad_norm": 4.137613296508789, "learning_rate": 3.7801470588235293e-06, "loss": 1.9496, "step": 42315 }, { "epoch": 62.211686879823596, "grad_norm": 2.2553725242614746, "learning_rate": 3.777058823529412e-06, "loss": 1.9935, "step": 42336 }, { "epoch": 62.242557883131205, "grad_norm": 4.005845546722412, "learning_rate": 3.7739705882352946e-06, "loss": 1.9029, "step": 42357 }, { "epoch": 62.27342888643881, "grad_norm": 5.477290153503418, "learning_rate": 3.770882352941177e-06, "loss": 1.8967, "step": 42378 }, { "epoch": 62.30429988974642, "grad_norm": 3.554682493209839, "learning_rate": 3.767794117647059e-06, "loss": 1.9269, "step": 42399 }, { "epoch": 62.33517089305403, "grad_norm": 3.608692169189453, "learning_rate": 3.7647058823529414e-06, "loss": 2.0362, "step": 42420 }, { "epoch": 62.36604189636163, "grad_norm": 2.894974946975708, "learning_rate": 3.7616176470588238e-06, "loss": 1.8709, "step": 42441 }, { "epoch": 62.39691289966924, "grad_norm": 2.4054529666900635, "learning_rate": 3.7585294117647066e-06, "loss": 1.8265, "step": 42462 }, { "epoch": 62.42778390297685, "grad_norm": 1.8542652130126953, "learning_rate": 3.7554411764705886e-06, "loss": 1.9014, "step": 42483 }, { "epoch": 62.45865490628445, "grad_norm": 2.626800298690796, "learning_rate": 3.752352941176471e-06, "loss": 1.9818, "step": 42504 }, { "epoch": 62.48952590959206, "grad_norm": 2.8472249507904053, "learning_rate": 3.7492647058823534e-06, "loss": 1.8509, "step": 42525 }, { "epoch": 62.52039691289967, "grad_norm": 2.975815773010254, "learning_rate": 3.746176470588236e-06, "loss": 1.8938, "step": 42546 }, { "epoch": 62.55126791620728, "grad_norm": 4.610398292541504, "learning_rate": 3.7430882352941178e-06, "loss": 1.9719, "step": 42567 }, { "epoch": 62.58213891951488, "grad_norm": 3.526193857192993, "learning_rate": 3.74e-06, "loss": 1.9092, "step": 42588 }, { "epoch": 62.61300992282249, "grad_norm": 3.347182273864746, "learning_rate": 3.7369117647058826e-06, "loss": 1.8197, "step": 42609 }, { "epoch": 62.6438809261301, "grad_norm": 4.83082914352417, "learning_rate": 3.7338235294117646e-06, "loss": 1.9177, "step": 42630 }, { "epoch": 62.674751929437704, "grad_norm": 3.5829532146453857, "learning_rate": 3.7307352941176474e-06, "loss": 1.85, "step": 42651 }, { "epoch": 62.705622932745314, "grad_norm": 4.986013412475586, "learning_rate": 3.72764705882353e-06, "loss": 1.9052, "step": 42672 }, { "epoch": 62.73649393605292, "grad_norm": 2.9733543395996094, "learning_rate": 3.7245588235294122e-06, "loss": 1.8864, "step": 42693 }, { "epoch": 62.767364939360526, "grad_norm": 2.543276786804199, "learning_rate": 3.7214705882352942e-06, "loss": 1.9512, "step": 42714 }, { "epoch": 62.798235942668136, "grad_norm": 4.0189008712768555, "learning_rate": 3.7183823529411766e-06, "loss": 1.9464, "step": 42735 }, { "epoch": 62.829106945975745, "grad_norm": 3.073291778564453, "learning_rate": 3.715294117647059e-06, "loss": 1.9451, "step": 42756 }, { "epoch": 62.859977949283355, "grad_norm": 2.664767265319824, "learning_rate": 3.712205882352942e-06, "loss": 1.8669, "step": 42777 }, { "epoch": 62.89084895259096, "grad_norm": 4.056784629821777, "learning_rate": 3.7091176470588234e-06, "loss": 1.9596, "step": 42798 }, { "epoch": 62.92171995589857, "grad_norm": 3.4414587020874023, "learning_rate": 3.7060294117647062e-06, "loss": 2.0251, "step": 42819 }, { "epoch": 62.95259095920618, "grad_norm": 2.6176438331604004, "learning_rate": 3.7029411764705887e-06, "loss": 1.8698, "step": 42840 }, { "epoch": 62.98346196251378, "grad_norm": 3.1629638671875, "learning_rate": 3.699852941176471e-06, "loss": 1.9198, "step": 42861 }, { "epoch": 63.01323042998897, "grad_norm": 3.665431499481201, "learning_rate": 3.696764705882353e-06, "loss": 1.8901, "step": 42882 }, { "epoch": 63.04410143329658, "grad_norm": 3.112264633178711, "learning_rate": 3.6936764705882355e-06, "loss": 1.9036, "step": 42903 }, { "epoch": 63.07497243660419, "grad_norm": 3.9984519481658936, "learning_rate": 3.690588235294118e-06, "loss": 1.8781, "step": 42924 }, { "epoch": 63.105843439911794, "grad_norm": 5.150393962860107, "learning_rate": 3.6875000000000007e-06, "loss": 1.8854, "step": 42945 }, { "epoch": 63.136714443219404, "grad_norm": 3.412156581878662, "learning_rate": 3.6844117647058823e-06, "loss": 1.802, "step": 42966 }, { "epoch": 63.16758544652701, "grad_norm": 6.005765914916992, "learning_rate": 3.681323529411765e-06, "loss": 1.9265, "step": 42987 }, { "epoch": 63.19845644983462, "grad_norm": 5.114622116088867, "learning_rate": 3.6782352941176475e-06, "loss": 1.8775, "step": 43008 }, { "epoch": 63.229327453142226, "grad_norm": 2.2738149166107178, "learning_rate": 3.67514705882353e-06, "loss": 1.8592, "step": 43029 }, { "epoch": 63.260198456449835, "grad_norm": 2.666557550430298, "learning_rate": 3.672058823529412e-06, "loss": 1.9617, "step": 43050 }, { "epoch": 63.291069459757445, "grad_norm": 3.4220707416534424, "learning_rate": 3.6689705882352943e-06, "loss": 1.868, "step": 43071 }, { "epoch": 63.32194046306505, "grad_norm": 3.1859641075134277, "learning_rate": 3.6658823529411767e-06, "loss": 1.992, "step": 43092 }, { "epoch": 63.35281146637266, "grad_norm": 2.9682886600494385, "learning_rate": 3.6627941176470595e-06, "loss": 1.9371, "step": 43113 }, { "epoch": 63.38368246968027, "grad_norm": 3.4023351669311523, "learning_rate": 3.6597058823529415e-06, "loss": 2.0701, "step": 43134 }, { "epoch": 63.41455347298787, "grad_norm": 4.126820087432861, "learning_rate": 3.656617647058824e-06, "loss": 1.921, "step": 43155 }, { "epoch": 63.44542447629548, "grad_norm": 4.44788122177124, "learning_rate": 3.6535294117647063e-06, "loss": 1.9279, "step": 43176 }, { "epoch": 63.47629547960309, "grad_norm": 2.9419105052948, "learning_rate": 3.6504411764705883e-06, "loss": 2.0075, "step": 43197 }, { "epoch": 63.5071664829107, "grad_norm": 3.8594865798950195, "learning_rate": 3.6473529411764707e-06, "loss": 1.9323, "step": 43218 }, { "epoch": 63.5380374862183, "grad_norm": 3.3175740242004395, "learning_rate": 3.644264705882353e-06, "loss": 1.9389, "step": 43239 }, { "epoch": 63.56890848952591, "grad_norm": 3.7132248878479004, "learning_rate": 3.641176470588236e-06, "loss": 1.8389, "step": 43260 }, { "epoch": 63.59977949283352, "grad_norm": 3.3033804893493652, "learning_rate": 3.6380882352941175e-06, "loss": 1.8729, "step": 43281 }, { "epoch": 63.63065049614112, "grad_norm": 3.7543158531188965, "learning_rate": 3.6350000000000003e-06, "loss": 2.0157, "step": 43302 }, { "epoch": 63.66152149944873, "grad_norm": 3.294509172439575, "learning_rate": 3.6319117647058828e-06, "loss": 1.9394, "step": 43323 }, { "epoch": 63.69239250275634, "grad_norm": 4.56433629989624, "learning_rate": 3.628823529411765e-06, "loss": 1.8915, "step": 43344 }, { "epoch": 63.723263506063944, "grad_norm": 5.944384574890137, "learning_rate": 3.625735294117647e-06, "loss": 1.922, "step": 43365 }, { "epoch": 63.75413450937155, "grad_norm": 3.938891649246216, "learning_rate": 3.6226470588235296e-06, "loss": 1.8581, "step": 43386 }, { "epoch": 63.78500551267916, "grad_norm": 3.1082050800323486, "learning_rate": 3.619558823529412e-06, "loss": 1.9946, "step": 43407 }, { "epoch": 63.81587651598677, "grad_norm": 5.416922092437744, "learning_rate": 3.616470588235295e-06, "loss": 1.9182, "step": 43428 }, { "epoch": 63.846747519294375, "grad_norm": 3.1854794025421143, "learning_rate": 3.6133823529411764e-06, "loss": 1.9764, "step": 43449 }, { "epoch": 63.877618522601985, "grad_norm": 2.387310266494751, "learning_rate": 3.610294117647059e-06, "loss": 1.9518, "step": 43470 }, { "epoch": 63.908489525909594, "grad_norm": 2.7831242084503174, "learning_rate": 3.6072058823529416e-06, "loss": 2.0162, "step": 43491 }, { "epoch": 63.9393605292172, "grad_norm": 5.028627395629883, "learning_rate": 3.604117647058824e-06, "loss": 1.9794, "step": 43512 }, { "epoch": 63.97023153252481, "grad_norm": 2.802438735961914, "learning_rate": 3.601029411764706e-06, "loss": 1.9462, "step": 43533 }, { "epoch": 64.0, "grad_norm": 2.149486541748047, "learning_rate": 3.5979411764705884e-06, "loss": 1.9908, "step": 43554 }, { "epoch": 64.03087100330761, "grad_norm": 1.583329200744629, "learning_rate": 3.594852941176471e-06, "loss": 1.8816, "step": 43575 }, { "epoch": 64.06174200661522, "grad_norm": 3.522852897644043, "learning_rate": 3.5917647058823536e-06, "loss": 1.9576, "step": 43596 }, { "epoch": 64.09261300992283, "grad_norm": 4.718653202056885, "learning_rate": 3.5886764705882356e-06, "loss": 1.9779, "step": 43617 }, { "epoch": 64.12348401323042, "grad_norm": 3.8362715244293213, "learning_rate": 3.585588235294118e-06, "loss": 2.0029, "step": 43638 }, { "epoch": 64.15435501653803, "grad_norm": 4.718454837799072, "learning_rate": 3.5825000000000004e-06, "loss": 1.892, "step": 43659 }, { "epoch": 64.18522601984564, "grad_norm": 3.2513906955718994, "learning_rate": 3.579411764705883e-06, "loss": 1.8761, "step": 43680 }, { "epoch": 64.21609702315325, "grad_norm": 3.455601930618286, "learning_rate": 3.576323529411765e-06, "loss": 1.9048, "step": 43701 }, { "epoch": 64.24696802646086, "grad_norm": 3.450056314468384, "learning_rate": 3.5732352941176472e-06, "loss": 1.8093, "step": 43722 }, { "epoch": 64.27783902976847, "grad_norm": 3.4924564361572266, "learning_rate": 3.57014705882353e-06, "loss": 2.0017, "step": 43743 }, { "epoch": 64.30871003307608, "grad_norm": 2.2506611347198486, "learning_rate": 3.5670588235294116e-06, "loss": 1.908, "step": 43764 }, { "epoch": 64.33958103638368, "grad_norm": 2.894577741622925, "learning_rate": 3.5639705882352944e-06, "loss": 1.8829, "step": 43785 }, { "epoch": 64.37045203969129, "grad_norm": 3.0532851219177246, "learning_rate": 3.560882352941177e-06, "loss": 1.9385, "step": 43806 }, { "epoch": 64.4013230429989, "grad_norm": 3.099576950073242, "learning_rate": 3.5577941176470593e-06, "loss": 1.9445, "step": 43827 }, { "epoch": 64.4321940463065, "grad_norm": 2.37949800491333, "learning_rate": 3.5547058823529412e-06, "loss": 1.9364, "step": 43848 }, { "epoch": 64.46306504961412, "grad_norm": 2.5078563690185547, "learning_rate": 3.5516176470588237e-06, "loss": 1.9129, "step": 43869 }, { "epoch": 64.49393605292173, "grad_norm": 4.141856670379639, "learning_rate": 3.548529411764706e-06, "loss": 2.0111, "step": 43890 }, { "epoch": 64.52480705622932, "grad_norm": 3.458948850631714, "learning_rate": 3.545441176470589e-06, "loss": 1.8475, "step": 43911 }, { "epoch": 64.55567805953693, "grad_norm": 2.35848331451416, "learning_rate": 3.5423529411764705e-06, "loss": 1.8873, "step": 43932 }, { "epoch": 64.58654906284454, "grad_norm": 1.566249966621399, "learning_rate": 3.5392647058823533e-06, "loss": 1.9103, "step": 43953 }, { "epoch": 64.61742006615215, "grad_norm": 1.9278273582458496, "learning_rate": 3.5361764705882357e-06, "loss": 2.0133, "step": 43974 }, { "epoch": 64.64829106945976, "grad_norm": 2.555781602859497, "learning_rate": 3.533088235294118e-06, "loss": 1.9427, "step": 43995 }, { "epoch": 64.67916207276737, "grad_norm": 2.0133771896362305, "learning_rate": 3.53e-06, "loss": 1.8819, "step": 44016 }, { "epoch": 64.71003307607498, "grad_norm": 2.233060836791992, "learning_rate": 3.5269117647058825e-06, "loss": 1.9532, "step": 44037 }, { "epoch": 64.74090407938257, "grad_norm": 2.3632125854492188, "learning_rate": 3.523823529411765e-06, "loss": 2.0418, "step": 44058 }, { "epoch": 64.77177508269018, "grad_norm": 2.2553939819335938, "learning_rate": 3.5207352941176477e-06, "loss": 1.9156, "step": 44079 }, { "epoch": 64.8026460859978, "grad_norm": 2.7149715423583984, "learning_rate": 3.5176470588235297e-06, "loss": 1.8637, "step": 44100 }, { "epoch": 64.8335170893054, "grad_norm": 2.9639580249786377, "learning_rate": 3.514558823529412e-06, "loss": 2.0403, "step": 44121 }, { "epoch": 64.86438809261301, "grad_norm": 2.638185501098633, "learning_rate": 3.5114705882352945e-06, "loss": 1.8637, "step": 44142 }, { "epoch": 64.89525909592062, "grad_norm": 3.105957508087158, "learning_rate": 3.508382352941177e-06, "loss": 1.8609, "step": 44163 }, { "epoch": 64.92613009922823, "grad_norm": 3.1245245933532715, "learning_rate": 3.505294117647059e-06, "loss": 1.975, "step": 44184 }, { "epoch": 64.95700110253583, "grad_norm": 3.7934014797210693, "learning_rate": 3.5022058823529413e-06, "loss": 1.8569, "step": 44205 }, { "epoch": 64.98787210584344, "grad_norm": 3.1689770221710205, "learning_rate": 3.499117647058824e-06, "loss": 1.8512, "step": 44226 }, { "epoch": 65.01764057331863, "grad_norm": 3.0920183658599854, "learning_rate": 3.4960294117647057e-06, "loss": 1.9532, "step": 44247 }, { "epoch": 65.04851157662624, "grad_norm": 3.5727484226226807, "learning_rate": 3.4929411764705885e-06, "loss": 1.9889, "step": 44268 }, { "epoch": 65.07938257993385, "grad_norm": 3.3157949447631836, "learning_rate": 3.489852941176471e-06, "loss": 1.9301, "step": 44289 }, { "epoch": 65.11025358324146, "grad_norm": 3.619797706604004, "learning_rate": 3.4867647058823534e-06, "loss": 1.8576, "step": 44310 }, { "epoch": 65.14112458654907, "grad_norm": 3.2026426792144775, "learning_rate": 3.4836764705882353e-06, "loss": 1.921, "step": 44331 }, { "epoch": 65.17199558985666, "grad_norm": 3.282071590423584, "learning_rate": 3.4805882352941178e-06, "loss": 1.9054, "step": 44352 }, { "epoch": 65.20286659316427, "grad_norm": 3.0602283477783203, "learning_rate": 3.4775e-06, "loss": 1.8801, "step": 44373 }, { "epoch": 65.23373759647188, "grad_norm": 2.858013153076172, "learning_rate": 3.474411764705883e-06, "loss": 1.9293, "step": 44394 }, { "epoch": 65.26460859977949, "grad_norm": 2.4385263919830322, "learning_rate": 3.4713235294117646e-06, "loss": 1.9301, "step": 44415 }, { "epoch": 65.2954796030871, "grad_norm": 2.3531248569488525, "learning_rate": 3.4682352941176474e-06, "loss": 1.8672, "step": 44436 }, { "epoch": 65.32635060639471, "grad_norm": 1.9937540292739868, "learning_rate": 3.46514705882353e-06, "loss": 1.943, "step": 44457 }, { "epoch": 65.35722160970232, "grad_norm": 3.762532949447632, "learning_rate": 3.462058823529412e-06, "loss": 2.009, "step": 44478 }, { "epoch": 65.38809261300992, "grad_norm": 5.392084121704102, "learning_rate": 3.458970588235294e-06, "loss": 1.8838, "step": 44499 }, { "epoch": 65.41896361631753, "grad_norm": 4.146719455718994, "learning_rate": 3.4558823529411766e-06, "loss": 2.0133, "step": 44520 }, { "epoch": 65.44983461962514, "grad_norm": 3.198917865753174, "learning_rate": 3.452794117647059e-06, "loss": 1.931, "step": 44541 }, { "epoch": 65.48070562293275, "grad_norm": 2.3276944160461426, "learning_rate": 3.449705882352942e-06, "loss": 1.9509, "step": 44562 }, { "epoch": 65.51157662624036, "grad_norm": 4.281065464019775, "learning_rate": 3.446617647058824e-06, "loss": 2.0162, "step": 44583 }, { "epoch": 65.54244762954796, "grad_norm": 3.0045394897460938, "learning_rate": 3.4435294117647062e-06, "loss": 1.8638, "step": 44604 }, { "epoch": 65.57331863285557, "grad_norm": 2.706193685531616, "learning_rate": 3.4404411764705886e-06, "loss": 1.8856, "step": 44625 }, { "epoch": 65.60418963616317, "grad_norm": 2.558762311935425, "learning_rate": 3.437352941176471e-06, "loss": 1.9901, "step": 44646 }, { "epoch": 65.63506063947078, "grad_norm": 4.286210060119629, "learning_rate": 3.434264705882353e-06, "loss": 1.947, "step": 44667 }, { "epoch": 65.66593164277839, "grad_norm": 2.2886972427368164, "learning_rate": 3.4311764705882354e-06, "loss": 1.8063, "step": 44688 }, { "epoch": 65.696802646086, "grad_norm": 3.8049190044403076, "learning_rate": 3.4280882352941183e-06, "loss": 1.9504, "step": 44709 }, { "epoch": 65.72767364939361, "grad_norm": 3.603228807449341, "learning_rate": 3.4250000000000007e-06, "loss": 1.9688, "step": 44730 }, { "epoch": 65.75854465270122, "grad_norm": 3.1351122856140137, "learning_rate": 3.4219117647058826e-06, "loss": 1.9054, "step": 44751 }, { "epoch": 65.78941565600881, "grad_norm": 4.01122522354126, "learning_rate": 3.418823529411765e-06, "loss": 1.8873, "step": 44772 }, { "epoch": 65.82028665931642, "grad_norm": 3.1316027641296387, "learning_rate": 3.4157352941176475e-06, "loss": 2.0009, "step": 44793 }, { "epoch": 65.85115766262403, "grad_norm": 4.030690670013428, "learning_rate": 3.4126470588235294e-06, "loss": 1.8867, "step": 44814 }, { "epoch": 65.88202866593164, "grad_norm": 2.3613288402557373, "learning_rate": 3.409558823529412e-06, "loss": 1.922, "step": 44835 }, { "epoch": 65.91289966923925, "grad_norm": 4.12093448638916, "learning_rate": 3.4064705882352943e-06, "loss": 1.8986, "step": 44856 }, { "epoch": 65.94377067254686, "grad_norm": 2.7817020416259766, "learning_rate": 3.403382352941177e-06, "loss": 1.9933, "step": 44877 }, { "epoch": 65.97464167585447, "grad_norm": 3.2258081436157227, "learning_rate": 3.4002941176470587e-06, "loss": 1.942, "step": 44898 }, { "epoch": 66.00441014332966, "grad_norm": 4.363614559173584, "learning_rate": 3.3972058823529415e-06, "loss": 1.9194, "step": 44919 }, { "epoch": 66.03528114663726, "grad_norm": 3.070793867111206, "learning_rate": 3.394117647058824e-06, "loss": 1.8545, "step": 44940 }, { "epoch": 66.06615214994487, "grad_norm": 3.2505531311035156, "learning_rate": 3.3910294117647063e-06, "loss": 1.8964, "step": 44961 }, { "epoch": 66.09702315325248, "grad_norm": 3.5161337852478027, "learning_rate": 3.3879411764705883e-06, "loss": 1.9284, "step": 44982 }, { "epoch": 66.12789415656009, "grad_norm": 3.7324793338775635, "learning_rate": 3.3848529411764707e-06, "loss": 2.0131, "step": 45003 }, { "epoch": 66.1587651598677, "grad_norm": 3.0409655570983887, "learning_rate": 3.381764705882353e-06, "loss": 1.888, "step": 45024 }, { "epoch": 66.18963616317531, "grad_norm": 3.047988176345825, "learning_rate": 3.378676470588236e-06, "loss": 1.9748, "step": 45045 }, { "epoch": 66.22050716648292, "grad_norm": 2.8559012413024902, "learning_rate": 3.375588235294118e-06, "loss": 2.0637, "step": 45066 }, { "epoch": 66.25137816979051, "grad_norm": 2.9961822032928467, "learning_rate": 3.3725000000000003e-06, "loss": 1.8699, "step": 45087 }, { "epoch": 66.28224917309812, "grad_norm": 2.8833060264587402, "learning_rate": 3.3694117647058827e-06, "loss": 1.9247, "step": 45108 }, { "epoch": 66.31312017640573, "grad_norm": 2.632903814315796, "learning_rate": 3.366323529411765e-06, "loss": 1.9579, "step": 45129 }, { "epoch": 66.34399117971334, "grad_norm": 3.8295466899871826, "learning_rate": 3.363235294117647e-06, "loss": 1.9316, "step": 45150 }, { "epoch": 66.37486218302095, "grad_norm": 9.383986473083496, "learning_rate": 3.3601470588235295e-06, "loss": 1.9074, "step": 45171 }, { "epoch": 66.40573318632856, "grad_norm": 3.005646228790283, "learning_rate": 3.3570588235294124e-06, "loss": 1.9649, "step": 45192 }, { "epoch": 66.43660418963616, "grad_norm": 4.240323066711426, "learning_rate": 3.3539705882352948e-06, "loss": 1.9185, "step": 45213 }, { "epoch": 66.46747519294377, "grad_norm": 3.0358970165252686, "learning_rate": 3.3508823529411767e-06, "loss": 1.8546, "step": 45234 }, { "epoch": 66.49834619625138, "grad_norm": 3.1998205184936523, "learning_rate": 3.347794117647059e-06, "loss": 1.9236, "step": 45255 }, { "epoch": 66.52921719955899, "grad_norm": 2.1409428119659424, "learning_rate": 3.3447058823529416e-06, "loss": 1.9268, "step": 45276 }, { "epoch": 66.5600882028666, "grad_norm": 2.9562478065490723, "learning_rate": 3.341617647058824e-06, "loss": 1.8724, "step": 45297 }, { "epoch": 66.5909592061742, "grad_norm": 3.667581558227539, "learning_rate": 3.338529411764706e-06, "loss": 1.8971, "step": 45318 }, { "epoch": 66.62183020948181, "grad_norm": 1.3554242849349976, "learning_rate": 3.3354411764705884e-06, "loss": 1.8764, "step": 45339 }, { "epoch": 66.65270121278941, "grad_norm": 3.767029285430908, "learning_rate": 3.332352941176471e-06, "loss": 1.9278, "step": 45360 }, { "epoch": 66.68357221609702, "grad_norm": 3.6631076335906982, "learning_rate": 3.3292647058823528e-06, "loss": 1.9139, "step": 45381 }, { "epoch": 66.71444321940463, "grad_norm": 4.065669059753418, "learning_rate": 3.3261764705882356e-06, "loss": 1.8075, "step": 45402 }, { "epoch": 66.74531422271224, "grad_norm": 3.241481304168701, "learning_rate": 3.323088235294118e-06, "loss": 1.9621, "step": 45423 }, { "epoch": 66.77618522601985, "grad_norm": 2.8374810218811035, "learning_rate": 3.3200000000000004e-06, "loss": 1.9502, "step": 45444 }, { "epoch": 66.80705622932746, "grad_norm": 2.7268104553222656, "learning_rate": 3.3169117647058824e-06, "loss": 1.931, "step": 45465 }, { "epoch": 66.83792723263507, "grad_norm": 3.0853271484375, "learning_rate": 3.313823529411765e-06, "loss": 1.9316, "step": 45486 }, { "epoch": 66.86879823594266, "grad_norm": 2.5061416625976562, "learning_rate": 3.310735294117647e-06, "loss": 1.867, "step": 45507 }, { "epoch": 66.89966923925027, "grad_norm": 2.3989624977111816, "learning_rate": 3.30764705882353e-06, "loss": 2.0043, "step": 45528 }, { "epoch": 66.93054024255788, "grad_norm": 3.194223642349243, "learning_rate": 3.304558823529412e-06, "loss": 1.9464, "step": 45549 }, { "epoch": 66.96141124586549, "grad_norm": 2.941554069519043, "learning_rate": 3.3014705882352944e-06, "loss": 2.0174, "step": 45570 }, { "epoch": 66.9922822491731, "grad_norm": 3.9757916927337646, "learning_rate": 3.298382352941177e-06, "loss": 1.9798, "step": 45591 }, { "epoch": 67.0220507166483, "grad_norm": 3.92671537399292, "learning_rate": 3.2952941176470592e-06, "loss": 1.8436, "step": 45612 }, { "epoch": 67.0529217199559, "grad_norm": 2.1105265617370605, "learning_rate": 3.2922058823529412e-06, "loss": 1.8913, "step": 45633 }, { "epoch": 67.0837927232635, "grad_norm": 2.709144353866577, "learning_rate": 3.2891176470588236e-06, "loss": 1.9239, "step": 45654 }, { "epoch": 67.11466372657111, "grad_norm": 2.6527013778686523, "learning_rate": 3.286029411764706e-06, "loss": 1.7944, "step": 45675 }, { "epoch": 67.14553472987872, "grad_norm": 2.8610148429870605, "learning_rate": 3.282941176470589e-06, "loss": 1.9719, "step": 45696 }, { "epoch": 67.17640573318633, "grad_norm": 2.4640679359436035, "learning_rate": 3.279852941176471e-06, "loss": 1.8675, "step": 45717 }, { "epoch": 67.20727673649394, "grad_norm": 2.9208221435546875, "learning_rate": 3.2767647058823533e-06, "loss": 1.9656, "step": 45738 }, { "epoch": 67.23814773980155, "grad_norm": 2.492672920227051, "learning_rate": 3.2736764705882357e-06, "loss": 2.0223, "step": 45759 }, { "epoch": 67.26901874310916, "grad_norm": 4.20753812789917, "learning_rate": 3.270588235294118e-06, "loss": 1.907, "step": 45780 }, { "epoch": 67.29988974641675, "grad_norm": 3.6255440711975098, "learning_rate": 3.2675e-06, "loss": 1.9534, "step": 45801 }, { "epoch": 67.33076074972436, "grad_norm": 3.2886719703674316, "learning_rate": 3.2644117647058825e-06, "loss": 1.8102, "step": 45822 }, { "epoch": 67.36163175303197, "grad_norm": 3.1397688388824463, "learning_rate": 3.2614705882352944e-06, "loss": 1.8298, "step": 45843 }, { "epoch": 67.39250275633958, "grad_norm": 3.8198771476745605, "learning_rate": 3.2583823529411768e-06, "loss": 1.9306, "step": 45864 }, { "epoch": 67.42337375964719, "grad_norm": 4.953681945800781, "learning_rate": 3.255294117647059e-06, "loss": 1.9057, "step": 45885 }, { "epoch": 67.4542447629548, "grad_norm": 5.735880374908447, "learning_rate": 3.252205882352941e-06, "loss": 1.9929, "step": 45906 }, { "epoch": 67.48511576626241, "grad_norm": 3.011443853378296, "learning_rate": 3.2491176470588236e-06, "loss": 1.891, "step": 45927 }, { "epoch": 67.51598676957, "grad_norm": 2.8782732486724854, "learning_rate": 3.2460294117647064e-06, "loss": 1.9398, "step": 45948 }, { "epoch": 67.54685777287762, "grad_norm": 4.39702844619751, "learning_rate": 3.242941176470589e-06, "loss": 1.9399, "step": 45969 }, { "epoch": 67.57772877618523, "grad_norm": 2.8236069679260254, "learning_rate": 3.239852941176471e-06, "loss": 1.8884, "step": 45990 }, { "epoch": 67.60859977949283, "grad_norm": 3.1677441596984863, "learning_rate": 3.2367647058823532e-06, "loss": 1.9135, "step": 46011 }, { "epoch": 67.63947078280044, "grad_norm": 2.398425579071045, "learning_rate": 3.2336764705882356e-06, "loss": 1.9814, "step": 46032 }, { "epoch": 67.67034178610805, "grad_norm": 3.5102760791778564, "learning_rate": 3.230588235294118e-06, "loss": 1.9165, "step": 46053 }, { "epoch": 67.70121278941565, "grad_norm": 2.440674304962158, "learning_rate": 3.2275e-06, "loss": 1.8525, "step": 46074 }, { "epoch": 67.73208379272326, "grad_norm": 2.336881399154663, "learning_rate": 3.2244117647058824e-06, "loss": 1.8781, "step": 46095 }, { "epoch": 67.76295479603087, "grad_norm": 2.771148443222046, "learning_rate": 3.2213235294117653e-06, "loss": 1.8503, "step": 46116 }, { "epoch": 67.79382579933848, "grad_norm": 4.68476676940918, "learning_rate": 3.2182352941176477e-06, "loss": 1.8731, "step": 46137 }, { "epoch": 67.82469680264609, "grad_norm": 2.5217838287353516, "learning_rate": 3.2151470588235296e-06, "loss": 1.8239, "step": 46158 }, { "epoch": 67.8555678059537, "grad_norm": 3.3602046966552734, "learning_rate": 3.212058823529412e-06, "loss": 1.9285, "step": 46179 }, { "epoch": 67.8864388092613, "grad_norm": 4.459496974945068, "learning_rate": 3.2089705882352945e-06, "loss": 1.888, "step": 46200 }, { "epoch": 67.9173098125689, "grad_norm": 3.4342386722564697, "learning_rate": 3.205882352941177e-06, "loss": 1.8691, "step": 46221 }, { "epoch": 67.94818081587651, "grad_norm": 3.064819812774658, "learning_rate": 3.202794117647059e-06, "loss": 2.0674, "step": 46242 }, { "epoch": 67.97905181918412, "grad_norm": 6.715534687042236, "learning_rate": 3.1997058823529413e-06, "loss": 1.9163, "step": 46263 }, { "epoch": 68.00882028665932, "grad_norm": 2.447021722793579, "learning_rate": 3.196617647058824e-06, "loss": 1.869, "step": 46284 }, { "epoch": 68.03969128996692, "grad_norm": 4.534775257110596, "learning_rate": 3.193529411764706e-06, "loss": 1.9221, "step": 46305 }, { "epoch": 68.07056229327453, "grad_norm": 2.885904312133789, "learning_rate": 3.1904411764705885e-06, "loss": 1.8555, "step": 46326 }, { "epoch": 68.10143329658214, "grad_norm": 3.3706905841827393, "learning_rate": 3.187352941176471e-06, "loss": 1.9062, "step": 46347 }, { "epoch": 68.13230429988975, "grad_norm": 2.976891040802002, "learning_rate": 3.1842647058823533e-06, "loss": 1.7992, "step": 46368 }, { "epoch": 68.16317530319735, "grad_norm": 2.690993070602417, "learning_rate": 3.1811764705882353e-06, "loss": 2.0282, "step": 46389 }, { "epoch": 68.19404630650496, "grad_norm": 2.1416783332824707, "learning_rate": 3.1780882352941177e-06, "loss": 1.813, "step": 46410 }, { "epoch": 68.22491730981257, "grad_norm": 2.6161868572235107, "learning_rate": 3.175e-06, "loss": 1.9812, "step": 46431 }, { "epoch": 68.25578831312018, "grad_norm": 3.8222696781158447, "learning_rate": 3.171911764705883e-06, "loss": 1.9471, "step": 46452 }, { "epoch": 68.28665931642779, "grad_norm": 2.6712918281555176, "learning_rate": 3.168823529411765e-06, "loss": 1.8784, "step": 46473 }, { "epoch": 68.3175303197354, "grad_norm": 2.664337635040283, "learning_rate": 3.1657352941176473e-06, "loss": 1.8679, "step": 46494 }, { "epoch": 68.34840132304299, "grad_norm": 4.091700553894043, "learning_rate": 3.1626470588235297e-06, "loss": 1.8242, "step": 46515 }, { "epoch": 68.3792723263506, "grad_norm": 2.763148546218872, "learning_rate": 3.159558823529412e-06, "loss": 2.0663, "step": 46536 }, { "epoch": 68.41014332965821, "grad_norm": 6.577037811279297, "learning_rate": 3.156470588235294e-06, "loss": 1.8667, "step": 46557 }, { "epoch": 68.44101433296582, "grad_norm": 2.830843687057495, "learning_rate": 3.1533823529411765e-06, "loss": 1.957, "step": 46578 }, { "epoch": 68.47188533627343, "grad_norm": 2.2050094604492188, "learning_rate": 3.1502941176470594e-06, "loss": 2.0103, "step": 46599 }, { "epoch": 68.50275633958104, "grad_norm": 4.7338786125183105, "learning_rate": 3.1472058823529418e-06, "loss": 1.9363, "step": 46620 }, { "epoch": 68.53362734288865, "grad_norm": 2.314370632171631, "learning_rate": 3.1441176470588237e-06, "loss": 1.9761, "step": 46641 }, { "epoch": 68.56449834619625, "grad_norm": 3.794013500213623, "learning_rate": 3.141029411764706e-06, "loss": 2.0012, "step": 46662 }, { "epoch": 68.59536934950386, "grad_norm": 3.1654305458068848, "learning_rate": 3.1379411764705886e-06, "loss": 1.9046, "step": 46683 }, { "epoch": 68.62624035281146, "grad_norm": 3.451721429824829, "learning_rate": 3.134852941176471e-06, "loss": 1.9531, "step": 46704 }, { "epoch": 68.65711135611907, "grad_norm": 2.8315999507904053, "learning_rate": 3.131764705882353e-06, "loss": 1.9441, "step": 46725 }, { "epoch": 68.68798235942668, "grad_norm": 3.620673894882202, "learning_rate": 3.1286764705882354e-06, "loss": 1.8649, "step": 46746 }, { "epoch": 68.7188533627343, "grad_norm": 2.8886916637420654, "learning_rate": 3.125588235294118e-06, "loss": 1.9248, "step": 46767 }, { "epoch": 68.7497243660419, "grad_norm": 3.4414961338043213, "learning_rate": 3.1225000000000006e-06, "loss": 1.8078, "step": 46788 }, { "epoch": 68.7805953693495, "grad_norm": 3.69000244140625, "learning_rate": 3.1194117647058826e-06, "loss": 1.9585, "step": 46809 }, { "epoch": 68.81146637265711, "grad_norm": 2.47991681098938, "learning_rate": 3.116323529411765e-06, "loss": 1.9314, "step": 46830 }, { "epoch": 68.84233737596472, "grad_norm": 3.581766366958618, "learning_rate": 3.1132352941176474e-06, "loss": 1.955, "step": 46851 }, { "epoch": 68.87320837927233, "grad_norm": 2.676147222518921, "learning_rate": 3.1101470588235294e-06, "loss": 1.8515, "step": 46872 }, { "epoch": 68.90407938257994, "grad_norm": 2.8726587295532227, "learning_rate": 3.1070588235294118e-06, "loss": 1.997, "step": 46893 }, { "epoch": 68.93495038588755, "grad_norm": 2.370281457901001, "learning_rate": 3.103970588235294e-06, "loss": 1.8618, "step": 46914 }, { "epoch": 68.96582138919514, "grad_norm": 4.583372592926025, "learning_rate": 3.100882352941177e-06, "loss": 1.948, "step": 46935 }, { "epoch": 68.99669239250275, "grad_norm": 2.4264566898345947, "learning_rate": 3.097794117647059e-06, "loss": 1.8566, "step": 46956 }, { "epoch": 69.02646085997795, "grad_norm": 2.8264083862304688, "learning_rate": 3.0947058823529414e-06, "loss": 1.8451, "step": 46977 }, { "epoch": 69.05733186328555, "grad_norm": 3.8331639766693115, "learning_rate": 3.091617647058824e-06, "loss": 1.953, "step": 46998 }, { "epoch": 69.08820286659316, "grad_norm": 2.8991682529449463, "learning_rate": 3.0885294117647062e-06, "loss": 1.9571, "step": 47019 }, { "epoch": 69.11907386990077, "grad_norm": 5.0382466316223145, "learning_rate": 3.0854411764705882e-06, "loss": 1.923, "step": 47040 }, { "epoch": 69.14994487320838, "grad_norm": 4.034504413604736, "learning_rate": 3.0823529411764706e-06, "loss": 2.0031, "step": 47061 }, { "epoch": 69.180815876516, "grad_norm": 4.29233980178833, "learning_rate": 3.0792647058823535e-06, "loss": 1.9127, "step": 47082 }, { "epoch": 69.21168687982359, "grad_norm": 2.299708604812622, "learning_rate": 3.076176470588236e-06, "loss": 1.9689, "step": 47103 }, { "epoch": 69.2425578831312, "grad_norm": 2.7381272315979004, "learning_rate": 3.073088235294118e-06, "loss": 1.9535, "step": 47124 }, { "epoch": 69.27342888643881, "grad_norm": 2.517571210861206, "learning_rate": 3.0700000000000003e-06, "loss": 1.9001, "step": 47145 }, { "epoch": 69.30429988974642, "grad_norm": 2.952296018600464, "learning_rate": 3.0669117647058827e-06, "loss": 1.8114, "step": 47166 }, { "epoch": 69.33517089305403, "grad_norm": 2.8386082649230957, "learning_rate": 3.063823529411765e-06, "loss": 1.8295, "step": 47187 }, { "epoch": 69.36604189636164, "grad_norm": 3.5746819972991943, "learning_rate": 3.060735294117647e-06, "loss": 1.9753, "step": 47208 }, { "epoch": 69.39691289966925, "grad_norm": 3.919343948364258, "learning_rate": 3.0576470588235295e-06, "loss": 1.9362, "step": 47229 }, { "epoch": 69.42778390297684, "grad_norm": 2.42380428314209, "learning_rate": 3.0545588235294123e-06, "loss": 1.8354, "step": 47250 }, { "epoch": 69.45865490628445, "grad_norm": 4.018710136413574, "learning_rate": 3.0514705882352947e-06, "loss": 1.8724, "step": 47271 }, { "epoch": 69.48952590959206, "grad_norm": 2.9072799682617188, "learning_rate": 3.0483823529411767e-06, "loss": 1.7899, "step": 47292 }, { "epoch": 69.52039691289967, "grad_norm": 2.7626333236694336, "learning_rate": 3.045294117647059e-06, "loss": 1.932, "step": 47313 }, { "epoch": 69.55126791620728, "grad_norm": 2.725060224533081, "learning_rate": 3.0422058823529415e-06, "loss": 1.9927, "step": 47334 }, { "epoch": 69.58213891951489, "grad_norm": 3.131769895553589, "learning_rate": 3.0391176470588235e-06, "loss": 1.9298, "step": 47355 }, { "epoch": 69.61300992282249, "grad_norm": 2.4681591987609863, "learning_rate": 3.036029411764706e-06, "loss": 2.0434, "step": 47376 }, { "epoch": 69.6438809261301, "grad_norm": 2.9780359268188477, "learning_rate": 3.0329411764705883e-06, "loss": 2.0049, "step": 47397 }, { "epoch": 69.6747519294377, "grad_norm": 3.0549962520599365, "learning_rate": 3.029852941176471e-06, "loss": 1.7712, "step": 47418 }, { "epoch": 69.70562293274531, "grad_norm": 2.240541696548462, "learning_rate": 3.026764705882353e-06, "loss": 1.8809, "step": 47439 }, { "epoch": 69.73649393605292, "grad_norm": 3.0102834701538086, "learning_rate": 3.0236764705882355e-06, "loss": 1.9157, "step": 47460 }, { "epoch": 69.76736493936053, "grad_norm": 2.4884815216064453, "learning_rate": 3.020588235294118e-06, "loss": 1.8964, "step": 47481 }, { "epoch": 69.79823594266814, "grad_norm": 4.505846977233887, "learning_rate": 3.0175000000000003e-06, "loss": 1.8934, "step": 47502 }, { "epoch": 69.82910694597574, "grad_norm": 3.8972420692443848, "learning_rate": 3.0144117647058823e-06, "loss": 1.8645, "step": 47523 }, { "epoch": 69.85997794928335, "grad_norm": 2.572105646133423, "learning_rate": 3.0113235294117647e-06, "loss": 1.9759, "step": 47544 }, { "epoch": 69.89084895259096, "grad_norm": 4.437960147857666, "learning_rate": 3.0082352941176476e-06, "loss": 1.8699, "step": 47565 }, { "epoch": 69.92171995589857, "grad_norm": 3.7894489765167236, "learning_rate": 3.00514705882353e-06, "loss": 1.9105, "step": 47586 }, { "epoch": 69.95259095920618, "grad_norm": 3.251145601272583, "learning_rate": 3.002058823529412e-06, "loss": 1.964, "step": 47607 }, { "epoch": 69.98346196251379, "grad_norm": 3.9622151851654053, "learning_rate": 2.9989705882352944e-06, "loss": 1.949, "step": 47628 }, { "epoch": 70.01323042998898, "grad_norm": 2.131441831588745, "learning_rate": 2.9958823529411768e-06, "loss": 1.8202, "step": 47649 }, { "epoch": 70.04410143329659, "grad_norm": 3.528303623199463, "learning_rate": 2.992794117647059e-06, "loss": 2.0032, "step": 47670 }, { "epoch": 70.07497243660418, "grad_norm": 4.089874267578125, "learning_rate": 2.989705882352941e-06, "loss": 1.8657, "step": 47691 }, { "epoch": 70.1058434399118, "grad_norm": 3.0120060443878174, "learning_rate": 2.9866176470588236e-06, "loss": 1.9634, "step": 47712 }, { "epoch": 70.1367144432194, "grad_norm": 2.764338731765747, "learning_rate": 2.9835294117647064e-06, "loss": 1.9048, "step": 47733 }, { "epoch": 70.16758544652701, "grad_norm": 3.350494146347046, "learning_rate": 2.980441176470589e-06, "loss": 1.955, "step": 47754 }, { "epoch": 70.19845644983462, "grad_norm": 3.1146106719970703, "learning_rate": 2.9773529411764708e-06, "loss": 1.8282, "step": 47775 }, { "epoch": 70.22932745314223, "grad_norm": 3.5768425464630127, "learning_rate": 2.974264705882353e-06, "loss": 1.8874, "step": 47796 }, { "epoch": 70.26019845644983, "grad_norm": 2.665069580078125, "learning_rate": 2.9711764705882356e-06, "loss": 2.0064, "step": 47817 }, { "epoch": 70.29106945975744, "grad_norm": 2.941518545150757, "learning_rate": 2.968088235294118e-06, "loss": 1.9942, "step": 47838 }, { "epoch": 70.32194046306505, "grad_norm": 2.5750722885131836, "learning_rate": 2.965e-06, "loss": 1.9315, "step": 47859 }, { "epoch": 70.35281146637266, "grad_norm": 3.6966700553894043, "learning_rate": 2.9619117647058824e-06, "loss": 1.9361, "step": 47880 }, { "epoch": 70.38368246968027, "grad_norm": 2.023054838180542, "learning_rate": 2.9588235294117652e-06, "loss": 2.0227, "step": 47901 }, { "epoch": 70.41455347298788, "grad_norm": 2.935760974884033, "learning_rate": 2.955735294117647e-06, "loss": 2.0104, "step": 47922 }, { "epoch": 70.44542447629549, "grad_norm": 2.1040079593658447, "learning_rate": 2.9526470588235296e-06, "loss": 1.9498, "step": 47943 }, { "epoch": 70.47629547960308, "grad_norm": 3.5212268829345703, "learning_rate": 2.949558823529412e-06, "loss": 2.0207, "step": 47964 }, { "epoch": 70.50716648291069, "grad_norm": 2.9614555835723877, "learning_rate": 2.9464705882352944e-06, "loss": 1.919, "step": 47985 }, { "epoch": 70.5380374862183, "grad_norm": 3.609107732772827, "learning_rate": 2.9433823529411764e-06, "loss": 1.7858, "step": 48006 }, { "epoch": 70.56890848952591, "grad_norm": 3.722780466079712, "learning_rate": 2.940294117647059e-06, "loss": 1.8511, "step": 48027 }, { "epoch": 70.59977949283352, "grad_norm": 3.495305299758911, "learning_rate": 2.9372058823529417e-06, "loss": 1.8101, "step": 48048 }, { "epoch": 70.63065049614113, "grad_norm": 4.503942966461182, "learning_rate": 2.934117647058824e-06, "loss": 1.8172, "step": 48069 }, { "epoch": 70.66152149944874, "grad_norm": 3.4691991806030273, "learning_rate": 2.931029411764706e-06, "loss": 1.9497, "step": 48090 }, { "epoch": 70.69239250275633, "grad_norm": 3.0474631786346436, "learning_rate": 2.9279411764705885e-06, "loss": 2.0424, "step": 48111 }, { "epoch": 70.72326350606394, "grad_norm": 2.629877805709839, "learning_rate": 2.924852941176471e-06, "loss": 1.9285, "step": 48132 }, { "epoch": 70.75413450937155, "grad_norm": 3.578118085861206, "learning_rate": 2.9217647058823533e-06, "loss": 1.8699, "step": 48153 }, { "epoch": 70.78500551267916, "grad_norm": 2.370539426803589, "learning_rate": 2.9186764705882353e-06, "loss": 1.9292, "step": 48174 }, { "epoch": 70.81587651598677, "grad_norm": 2.2128517627716064, "learning_rate": 2.9155882352941177e-06, "loss": 1.985, "step": 48195 }, { "epoch": 70.84674751929438, "grad_norm": 3.991440534591675, "learning_rate": 2.9125000000000005e-06, "loss": 1.971, "step": 48216 }, { "epoch": 70.87761852260198, "grad_norm": 2.6408755779266357, "learning_rate": 2.909411764705883e-06, "loss": 1.994, "step": 48237 }, { "epoch": 70.90848952590959, "grad_norm": 3.4526166915893555, "learning_rate": 2.906323529411765e-06, "loss": 1.9045, "step": 48258 }, { "epoch": 70.9393605292172, "grad_norm": 2.709015369415283, "learning_rate": 2.9032352941176473e-06, "loss": 1.9351, "step": 48279 }, { "epoch": 70.9702315325248, "grad_norm": 2.113975763320923, "learning_rate": 2.9001470588235297e-06, "loss": 1.9684, "step": 48300 }, { "epoch": 71.0, "grad_norm": 0.5405933260917664, "learning_rate": 2.897058823529412e-06, "loss": 1.9464, "step": 48321 }, { "epoch": 71.03087100330761, "grad_norm": 2.8186938762664795, "learning_rate": 2.893970588235294e-06, "loss": 1.9497, "step": 48342 }, { "epoch": 71.06174200661522, "grad_norm": 2.607114791870117, "learning_rate": 2.8908823529411765e-06, "loss": 1.9054, "step": 48363 }, { "epoch": 71.09261300992283, "grad_norm": 1.8641791343688965, "learning_rate": 2.8877941176470593e-06, "loss": 1.9519, "step": 48384 }, { "epoch": 71.12348401323042, "grad_norm": 3.9946582317352295, "learning_rate": 2.8847058823529417e-06, "loss": 1.8419, "step": 48405 }, { "epoch": 71.15435501653803, "grad_norm": 3.18731951713562, "learning_rate": 2.8816176470588237e-06, "loss": 1.9742, "step": 48426 }, { "epoch": 71.18522601984564, "grad_norm": 2.8127763271331787, "learning_rate": 2.878529411764706e-06, "loss": 1.9085, "step": 48447 }, { "epoch": 71.21609702315325, "grad_norm": 4.435079574584961, "learning_rate": 2.8754411764705885e-06, "loss": 1.936, "step": 48468 }, { "epoch": 71.24696802646086, "grad_norm": 2.338142156600952, "learning_rate": 2.8723529411764705e-06, "loss": 1.7801, "step": 48489 }, { "epoch": 71.27783902976847, "grad_norm": 3.031078338623047, "learning_rate": 2.869264705882353e-06, "loss": 1.8522, "step": 48510 }, { "epoch": 71.30871003307608, "grad_norm": 2.9229633808135986, "learning_rate": 2.8661764705882358e-06, "loss": 1.8577, "step": 48531 }, { "epoch": 71.33958103638368, "grad_norm": 3.113525867462158, "learning_rate": 2.863088235294118e-06, "loss": 1.8253, "step": 48552 }, { "epoch": 71.37045203969129, "grad_norm": 3.6236040592193604, "learning_rate": 2.86e-06, "loss": 1.8884, "step": 48573 }, { "epoch": 71.4013230429989, "grad_norm": 3.273730754852295, "learning_rate": 2.8569117647058826e-06, "loss": 1.9429, "step": 48594 }, { "epoch": 71.4321940463065, "grad_norm": 2.2096667289733887, "learning_rate": 2.853823529411765e-06, "loss": 1.9556, "step": 48615 }, { "epoch": 71.46306504961412, "grad_norm": 3.707087755203247, "learning_rate": 2.8507352941176474e-06, "loss": 1.9549, "step": 48636 }, { "epoch": 71.49393605292173, "grad_norm": 5.304229259490967, "learning_rate": 2.8476470588235294e-06, "loss": 1.9438, "step": 48657 }, { "epoch": 71.52480705622932, "grad_norm": 5.028119087219238, "learning_rate": 2.8445588235294118e-06, "loss": 1.9763, "step": 48678 }, { "epoch": 71.55567805953693, "grad_norm": 3.291314125061035, "learning_rate": 2.8414705882352946e-06, "loss": 1.7471, "step": 48699 }, { "epoch": 71.58654906284454, "grad_norm": 2.226691484451294, "learning_rate": 2.838382352941177e-06, "loss": 1.9686, "step": 48720 }, { "epoch": 71.61742006615215, "grad_norm": 3.041870355606079, "learning_rate": 2.835294117647059e-06, "loss": 1.9188, "step": 48741 }, { "epoch": 71.64829106945976, "grad_norm": 3.7898876667022705, "learning_rate": 2.8322058823529414e-06, "loss": 1.9545, "step": 48762 }, { "epoch": 71.67916207276737, "grad_norm": 4.93140172958374, "learning_rate": 2.829117647058824e-06, "loss": 1.9226, "step": 48783 }, { "epoch": 71.71003307607498, "grad_norm": 4.713207721710205, "learning_rate": 2.826029411764706e-06, "loss": 1.908, "step": 48804 }, { "epoch": 71.74090407938257, "grad_norm": 2.818293333053589, "learning_rate": 2.822941176470588e-06, "loss": 1.8214, "step": 48825 }, { "epoch": 71.77177508269018, "grad_norm": 3.3326711654663086, "learning_rate": 2.8198529411764706e-06, "loss": 1.9563, "step": 48846 }, { "epoch": 71.8026460859978, "grad_norm": 2.825676918029785, "learning_rate": 2.8167647058823534e-06, "loss": 1.9582, "step": 48867 }, { "epoch": 71.8335170893054, "grad_norm": 3.662075996398926, "learning_rate": 2.813676470588236e-06, "loss": 1.8696, "step": 48888 }, { "epoch": 71.86438809261301, "grad_norm": 4.072948932647705, "learning_rate": 2.810588235294118e-06, "loss": 1.8797, "step": 48909 }, { "epoch": 71.89525909592062, "grad_norm": 3.39165997505188, "learning_rate": 2.8075000000000002e-06, "loss": 1.8736, "step": 48930 }, { "epoch": 71.92613009922823, "grad_norm": 4.221452713012695, "learning_rate": 2.8044117647058826e-06, "loss": 1.9127, "step": 48951 }, { "epoch": 71.95700110253583, "grad_norm": 3.3652379512786865, "learning_rate": 2.8013235294117646e-06, "loss": 1.858, "step": 48972 }, { "epoch": 71.98787210584344, "grad_norm": 3.176637649536133, "learning_rate": 2.798235294117647e-06, "loss": 1.9693, "step": 48993 }, { "epoch": 72.01764057331863, "grad_norm": 3.307875633239746, "learning_rate": 2.79514705882353e-06, "loss": 1.9602, "step": 49014 }, { "epoch": 72.04851157662624, "grad_norm": 2.9381821155548096, "learning_rate": 2.7920588235294123e-06, "loss": 1.9102, "step": 49035 }, { "epoch": 72.07938257993385, "grad_norm": 2.792022228240967, "learning_rate": 2.7889705882352942e-06, "loss": 1.9543, "step": 49056 }, { "epoch": 72.11025358324146, "grad_norm": 3.471529006958008, "learning_rate": 2.7858823529411767e-06, "loss": 1.8186, "step": 49077 }, { "epoch": 72.14112458654907, "grad_norm": 4.519994735717773, "learning_rate": 2.782794117647059e-06, "loss": 1.9442, "step": 49098 }, { "epoch": 72.17199558985666, "grad_norm": 2.773489236831665, "learning_rate": 2.7797058823529415e-06, "loss": 1.9252, "step": 49119 }, { "epoch": 72.20286659316427, "grad_norm": 3.0610599517822266, "learning_rate": 2.7766176470588235e-06, "loss": 1.8471, "step": 49140 }, { "epoch": 72.23373759647188, "grad_norm": 2.7542271614074707, "learning_rate": 2.773529411764706e-06, "loss": 1.8421, "step": 49161 }, { "epoch": 72.26460859977949, "grad_norm": 2.4146950244903564, "learning_rate": 2.7704411764705887e-06, "loss": 1.9177, "step": 49182 }, { "epoch": 72.2954796030871, "grad_norm": 2.373931646347046, "learning_rate": 2.767352941176471e-06, "loss": 1.906, "step": 49203 }, { "epoch": 72.32635060639471, "grad_norm": 3.513009786605835, "learning_rate": 2.764264705882353e-06, "loss": 1.9208, "step": 49224 }, { "epoch": 72.35722160970232, "grad_norm": 5.382378578186035, "learning_rate": 2.7611764705882355e-06, "loss": 1.9499, "step": 49245 }, { "epoch": 72.38809261300992, "grad_norm": 3.7208235263824463, "learning_rate": 2.758088235294118e-06, "loss": 2.0002, "step": 49266 }, { "epoch": 72.41896361631753, "grad_norm": 3.809539794921875, "learning_rate": 2.7550000000000003e-06, "loss": 1.8975, "step": 49287 }, { "epoch": 72.44983461962514, "grad_norm": 3.87558650970459, "learning_rate": 2.7519117647058823e-06, "loss": 1.8582, "step": 49308 }, { "epoch": 72.48070562293275, "grad_norm": 2.45310115814209, "learning_rate": 2.7488235294117647e-06, "loss": 1.8677, "step": 49329 }, { "epoch": 72.51157662624036, "grad_norm": 4.786869525909424, "learning_rate": 2.7457352941176475e-06, "loss": 1.8856, "step": 49350 }, { "epoch": 72.54244762954796, "grad_norm": 2.5740697383880615, "learning_rate": 2.74264705882353e-06, "loss": 1.9379, "step": 49371 }, { "epoch": 72.57331863285557, "grad_norm": 2.8713462352752686, "learning_rate": 2.739558823529412e-06, "loss": 1.8496, "step": 49392 }, { "epoch": 72.60418963616317, "grad_norm": 4.247597694396973, "learning_rate": 2.7364705882352943e-06, "loss": 1.894, "step": 49413 }, { "epoch": 72.63506063947078, "grad_norm": 3.2606682777404785, "learning_rate": 2.7333823529411767e-06, "loss": 1.8653, "step": 49434 }, { "epoch": 72.66593164277839, "grad_norm": 4.239423751831055, "learning_rate": 2.730294117647059e-06, "loss": 1.9733, "step": 49455 }, { "epoch": 72.696802646086, "grad_norm": 3.1524364948272705, "learning_rate": 2.727205882352941e-06, "loss": 1.8785, "step": 49476 }, { "epoch": 72.72767364939361, "grad_norm": 5.3961358070373535, "learning_rate": 2.724117647058824e-06, "loss": 1.8692, "step": 49497 }, { "epoch": 72.75854465270122, "grad_norm": 3.5628223419189453, "learning_rate": 2.7210294117647064e-06, "loss": 1.9315, "step": 49518 }, { "epoch": 72.78941565600881, "grad_norm": 4.496179103851318, "learning_rate": 2.7179411764705883e-06, "loss": 1.9575, "step": 49539 }, { "epoch": 72.82028665931642, "grad_norm": 2.2838375568389893, "learning_rate": 2.7148529411764708e-06, "loss": 1.9155, "step": 49560 }, { "epoch": 72.85115766262403, "grad_norm": 3.5046310424804688, "learning_rate": 2.711764705882353e-06, "loss": 1.8097, "step": 49581 }, { "epoch": 72.88202866593164, "grad_norm": 3.2243173122406006, "learning_rate": 2.7086764705882356e-06, "loss": 1.9258, "step": 49602 }, { "epoch": 72.91289966923925, "grad_norm": 3.804365873336792, "learning_rate": 2.7055882352941176e-06, "loss": 2.0053, "step": 49623 }, { "epoch": 72.94377067254686, "grad_norm": 3.0289788246154785, "learning_rate": 2.7025e-06, "loss": 1.8386, "step": 49644 }, { "epoch": 72.97464167585447, "grad_norm": 3.598529100418091, "learning_rate": 2.699411764705883e-06, "loss": 1.8784, "step": 49665 }, { "epoch": 73.00441014332966, "grad_norm": 1.7006909847259521, "learning_rate": 2.696323529411765e-06, "loss": 1.8978, "step": 49686 }, { "epoch": 73.03528114663726, "grad_norm": 3.7394464015960693, "learning_rate": 2.693235294117647e-06, "loss": 1.9036, "step": 49707 }, { "epoch": 73.06615214994487, "grad_norm": 2.706868886947632, "learning_rate": 2.6901470588235296e-06, "loss": 1.9657, "step": 49728 }, { "epoch": 73.09702315325248, "grad_norm": 2.563814640045166, "learning_rate": 2.687058823529412e-06, "loss": 1.9018, "step": 49749 }, { "epoch": 73.12789415656009, "grad_norm": 1.935813546180725, "learning_rate": 2.6839705882352944e-06, "loss": 1.8917, "step": 49770 }, { "epoch": 73.1587651598677, "grad_norm": 3.0361616611480713, "learning_rate": 2.6808823529411764e-06, "loss": 1.9658, "step": 49791 }, { "epoch": 73.18963616317531, "grad_norm": 2.93310284614563, "learning_rate": 2.677794117647059e-06, "loss": 1.8034, "step": 49812 }, { "epoch": 73.22050716648292, "grad_norm": 3.4395806789398193, "learning_rate": 2.6747058823529416e-06, "loss": 1.9371, "step": 49833 }, { "epoch": 73.25137816979051, "grad_norm": 2.8080458641052246, "learning_rate": 2.671764705882353e-06, "loss": 1.9222, "step": 49854 }, { "epoch": 73.28224917309812, "grad_norm": 2.570085048675537, "learning_rate": 2.6686764705882355e-06, "loss": 1.8977, "step": 49875 }, { "epoch": 73.31312017640573, "grad_norm": 5.795790195465088, "learning_rate": 2.6655882352941175e-06, "loss": 1.9444, "step": 49896 }, { "epoch": 73.34399117971334, "grad_norm": 3.546757459640503, "learning_rate": 2.6625e-06, "loss": 1.8748, "step": 49917 }, { "epoch": 73.37486218302095, "grad_norm": 2.99651837348938, "learning_rate": 2.6594117647058828e-06, "loss": 2.0259, "step": 49938 }, { "epoch": 73.40573318632856, "grad_norm": 3.322622299194336, "learning_rate": 2.656323529411765e-06, "loss": 1.9371, "step": 49959 }, { "epoch": 73.43660418963616, "grad_norm": 2.4850637912750244, "learning_rate": 2.653235294117647e-06, "loss": 1.81, "step": 49980 }, { "epoch": 73.46747519294377, "grad_norm": 4.126558780670166, "learning_rate": 2.6501470588235295e-06, "loss": 1.9429, "step": 50001 }, { "epoch": 73.49834619625138, "grad_norm": 2.8152523040771484, "learning_rate": 2.647058823529412e-06, "loss": 1.9178, "step": 50022 }, { "epoch": 73.52921719955899, "grad_norm": 1.3115510940551758, "learning_rate": 2.6439705882352944e-06, "loss": 1.9803, "step": 50043 }, { "epoch": 73.5600882028666, "grad_norm": 3.259418249130249, "learning_rate": 2.6408823529411763e-06, "loss": 1.9242, "step": 50064 }, { "epoch": 73.5909592061742, "grad_norm": 2.800797939300537, "learning_rate": 2.6377941176470588e-06, "loss": 1.9419, "step": 50085 }, { "epoch": 73.62183020948181, "grad_norm": 4.167214870452881, "learning_rate": 2.6347058823529416e-06, "loss": 1.8914, "step": 50106 }, { "epoch": 73.65270121278941, "grad_norm": 2.929732322692871, "learning_rate": 2.631617647058824e-06, "loss": 1.8916, "step": 50127 }, { "epoch": 73.68357221609702, "grad_norm": 3.9215922355651855, "learning_rate": 2.628529411764706e-06, "loss": 1.8268, "step": 50148 }, { "epoch": 73.71444321940463, "grad_norm": 2.9222750663757324, "learning_rate": 2.6254411764705884e-06, "loss": 1.9666, "step": 50169 }, { "epoch": 73.74531422271224, "grad_norm": 3.2445180416107178, "learning_rate": 2.622352941176471e-06, "loss": 1.7913, "step": 50190 }, { "epoch": 73.77618522601985, "grad_norm": 4.673125267028809, "learning_rate": 2.619264705882353e-06, "loss": 2.0237, "step": 50211 }, { "epoch": 73.80705622932746, "grad_norm": 2.121180772781372, "learning_rate": 2.616176470588235e-06, "loss": 1.9265, "step": 50232 }, { "epoch": 73.83792723263507, "grad_norm": 2.956002950668335, "learning_rate": 2.613088235294118e-06, "loss": 2.0008, "step": 50253 }, { "epoch": 73.86879823594266, "grad_norm": 5.273523807525635, "learning_rate": 2.6100000000000004e-06, "loss": 1.7917, "step": 50274 }, { "epoch": 73.89966923925027, "grad_norm": 1.866257905960083, "learning_rate": 2.606911764705883e-06, "loss": 1.9127, "step": 50295 }, { "epoch": 73.93054024255788, "grad_norm": 2.718712568283081, "learning_rate": 2.603823529411765e-06, "loss": 1.9951, "step": 50316 }, { "epoch": 73.96141124586549, "grad_norm": 2.987459659576416, "learning_rate": 2.6007352941176472e-06, "loss": 1.8713, "step": 50337 }, { "epoch": 73.9922822491731, "grad_norm": 1.9707118272781372, "learning_rate": 2.5976470588235296e-06, "loss": 1.9064, "step": 50358 }, { "epoch": 74.0220507166483, "grad_norm": 2.9597554206848145, "learning_rate": 2.594558823529412e-06, "loss": 1.7893, "step": 50379 }, { "epoch": 74.0529217199559, "grad_norm": 2.636930227279663, "learning_rate": 2.591470588235294e-06, "loss": 1.9387, "step": 50400 }, { "epoch": 74.0837927232635, "grad_norm": 4.113221168518066, "learning_rate": 2.588382352941177e-06, "loss": 1.8611, "step": 50421 }, { "epoch": 74.11466372657111, "grad_norm": 3.16792368888855, "learning_rate": 2.5852941176470593e-06, "loss": 1.9956, "step": 50442 }, { "epoch": 74.14553472987872, "grad_norm": 2.2258970737457275, "learning_rate": 2.5822058823529412e-06, "loss": 1.9358, "step": 50463 }, { "epoch": 74.17640573318633, "grad_norm": 1.634445309638977, "learning_rate": 2.5791176470588236e-06, "loss": 1.9451, "step": 50484 }, { "epoch": 74.20727673649394, "grad_norm": 3.353851318359375, "learning_rate": 2.576029411764706e-06, "loss": 1.9217, "step": 50505 }, { "epoch": 74.23814773980155, "grad_norm": 4.005203723907471, "learning_rate": 2.5729411764705885e-06, "loss": 1.8757, "step": 50526 }, { "epoch": 74.26901874310916, "grad_norm": 3.2142398357391357, "learning_rate": 2.5698529411764704e-06, "loss": 1.8751, "step": 50547 }, { "epoch": 74.29988974641675, "grad_norm": 4.243664264678955, "learning_rate": 2.566764705882353e-06, "loss": 1.9234, "step": 50568 }, { "epoch": 74.33076074972436, "grad_norm": 3.7511684894561768, "learning_rate": 2.5636764705882357e-06, "loss": 1.9216, "step": 50589 }, { "epoch": 74.36163175303197, "grad_norm": 2.9760537147521973, "learning_rate": 2.560588235294118e-06, "loss": 1.9892, "step": 50610 }, { "epoch": 74.39250275633958, "grad_norm": 2.2952587604522705, "learning_rate": 2.5575e-06, "loss": 1.9962, "step": 50631 }, { "epoch": 74.42337375964719, "grad_norm": 2.618382692337036, "learning_rate": 2.5544117647058825e-06, "loss": 1.9411, "step": 50652 }, { "epoch": 74.4542447629548, "grad_norm": 3.2253096103668213, "learning_rate": 2.551323529411765e-06, "loss": 1.7891, "step": 50673 }, { "epoch": 74.48511576626241, "grad_norm": 2.981893539428711, "learning_rate": 2.5482352941176473e-06, "loss": 1.9162, "step": 50694 }, { "epoch": 74.51598676957, "grad_norm": 3.8257014751434326, "learning_rate": 2.5451470588235293e-06, "loss": 1.9071, "step": 50715 }, { "epoch": 74.54685777287762, "grad_norm": 3.3617916107177734, "learning_rate": 2.542058823529412e-06, "loss": 1.8275, "step": 50736 }, { "epoch": 74.57772877618523, "grad_norm": 4.299304485321045, "learning_rate": 2.5389705882352945e-06, "loss": 1.9357, "step": 50757 }, { "epoch": 74.60859977949283, "grad_norm": 3.329153060913086, "learning_rate": 2.535882352941177e-06, "loss": 1.9388, "step": 50778 }, { "epoch": 74.63947078280044, "grad_norm": 3.120755195617676, "learning_rate": 2.532794117647059e-06, "loss": 1.8664, "step": 50799 }, { "epoch": 74.67034178610805, "grad_norm": 4.159022331237793, "learning_rate": 2.5297058823529413e-06, "loss": 1.9287, "step": 50820 }, { "epoch": 74.70121278941565, "grad_norm": 2.3362767696380615, "learning_rate": 2.5266176470588237e-06, "loss": 1.8894, "step": 50841 }, { "epoch": 74.73208379272326, "grad_norm": 2.1901402473449707, "learning_rate": 2.523529411764706e-06, "loss": 1.9647, "step": 50862 }, { "epoch": 74.76295479603087, "grad_norm": 3.5210931301116943, "learning_rate": 2.520441176470588e-06, "loss": 1.8736, "step": 50883 }, { "epoch": 74.79382579933848, "grad_norm": 2.741748571395874, "learning_rate": 2.517352941176471e-06, "loss": 1.8774, "step": 50904 }, { "epoch": 74.82469680264609, "grad_norm": 2.750610828399658, "learning_rate": 2.5142647058823534e-06, "loss": 1.8566, "step": 50925 }, { "epoch": 74.8555678059537, "grad_norm": 2.628399610519409, "learning_rate": 2.5111764705882358e-06, "loss": 1.9215, "step": 50946 }, { "epoch": 74.8864388092613, "grad_norm": 2.666476011276245, "learning_rate": 2.5080882352941177e-06, "loss": 1.9796, "step": 50967 }, { "epoch": 74.9173098125689, "grad_norm": 2.506998300552368, "learning_rate": 2.505e-06, "loss": 1.8803, "step": 50988 }, { "epoch": 74.94818081587651, "grad_norm": 2.4620158672332764, "learning_rate": 2.5019117647058826e-06, "loss": 2.0641, "step": 51009 }, { "epoch": 74.97905181918412, "grad_norm": 2.9024486541748047, "learning_rate": 2.498823529411765e-06, "loss": 1.8854, "step": 51030 }, { "epoch": 75.00882028665932, "grad_norm": 3.0161125659942627, "learning_rate": 2.495735294117647e-06, "loss": 1.844, "step": 51051 }, { "epoch": 75.03969128996692, "grad_norm": 3.7023532390594482, "learning_rate": 2.4926470588235298e-06, "loss": 1.9441, "step": 51072 }, { "epoch": 75.07056229327453, "grad_norm": 4.152279853820801, "learning_rate": 2.4895588235294118e-06, "loss": 1.995, "step": 51093 }, { "epoch": 75.10143329658214, "grad_norm": 6.135012626647949, "learning_rate": 2.486470588235294e-06, "loss": 1.932, "step": 51114 }, { "epoch": 75.13230429988975, "grad_norm": 1.8155733346939087, "learning_rate": 2.4833823529411766e-06, "loss": 2.0075, "step": 51135 }, { "epoch": 75.16317530319735, "grad_norm": 2.4101316928863525, "learning_rate": 2.480294117647059e-06, "loss": 1.9383, "step": 51156 }, { "epoch": 75.19404630650496, "grad_norm": 3.6141891479492188, "learning_rate": 2.4772058823529414e-06, "loss": 1.8737, "step": 51177 }, { "epoch": 75.22491730981257, "grad_norm": 3.251715660095215, "learning_rate": 2.474117647058824e-06, "loss": 1.8535, "step": 51198 }, { "epoch": 75.25578831312018, "grad_norm": 3.371391773223877, "learning_rate": 2.4710294117647062e-06, "loss": 1.8616, "step": 51219 }, { "epoch": 75.28665931642779, "grad_norm": 1.3122135400772095, "learning_rate": 2.4679411764705886e-06, "loss": 1.9733, "step": 51240 }, { "epoch": 75.3175303197354, "grad_norm": 2.6796040534973145, "learning_rate": 2.465e-06, "loss": 1.9177, "step": 51261 }, { "epoch": 75.34840132304299, "grad_norm": 2.2464451789855957, "learning_rate": 2.4619117647058825e-06, "loss": 1.8876, "step": 51282 }, { "epoch": 75.3792723263506, "grad_norm": 3.1361215114593506, "learning_rate": 2.458823529411765e-06, "loss": 1.9187, "step": 51303 }, { "epoch": 75.41014332965821, "grad_norm": 2.984736442565918, "learning_rate": 2.4557352941176473e-06, "loss": 1.8313, "step": 51324 }, { "epoch": 75.44101433296582, "grad_norm": 2.581923484802246, "learning_rate": 2.4526470588235297e-06, "loss": 1.9939, "step": 51345 }, { "epoch": 75.47188533627343, "grad_norm": 2.3755006790161133, "learning_rate": 2.449558823529412e-06, "loss": 1.9111, "step": 51366 }, { "epoch": 75.50275633958104, "grad_norm": 2.0068445205688477, "learning_rate": 2.446470588235294e-06, "loss": 1.8545, "step": 51387 }, { "epoch": 75.53362734288865, "grad_norm": 5.183558464050293, "learning_rate": 2.4433823529411765e-06, "loss": 1.9057, "step": 51408 }, { "epoch": 75.56449834619625, "grad_norm": 2.0138821601867676, "learning_rate": 2.440294117647059e-06, "loss": 1.9387, "step": 51429 }, { "epoch": 75.59536934950386, "grad_norm": 1.9882069826126099, "learning_rate": 2.4372058823529414e-06, "loss": 1.8966, "step": 51450 }, { "epoch": 75.62624035281146, "grad_norm": 4.288424968719482, "learning_rate": 2.4341176470588238e-06, "loss": 1.7856, "step": 51471 }, { "epoch": 75.65711135611907, "grad_norm": 2.1259496212005615, "learning_rate": 2.431029411764706e-06, "loss": 1.8277, "step": 51492 }, { "epoch": 75.68798235942668, "grad_norm": 4.8258891105651855, "learning_rate": 2.4279411764705886e-06, "loss": 1.8065, "step": 51513 }, { "epoch": 75.7188533627343, "grad_norm": 4.435637950897217, "learning_rate": 2.4248529411764706e-06, "loss": 1.9267, "step": 51534 }, { "epoch": 75.7497243660419, "grad_norm": 3.4302377700805664, "learning_rate": 2.421764705882353e-06, "loss": 1.9588, "step": 51555 }, { "epoch": 75.7805953693495, "grad_norm": 3.4969675540924072, "learning_rate": 2.4186764705882354e-06, "loss": 2.0048, "step": 51576 }, { "epoch": 75.81146637265711, "grad_norm": 5.4195170402526855, "learning_rate": 2.4155882352941178e-06, "loss": 1.9878, "step": 51597 }, { "epoch": 75.84233737596472, "grad_norm": 4.28082799911499, "learning_rate": 2.4125e-06, "loss": 1.8845, "step": 51618 }, { "epoch": 75.87320837927233, "grad_norm": 2.3080432415008545, "learning_rate": 2.4094117647058826e-06, "loss": 1.8496, "step": 51639 }, { "epoch": 75.90407938257994, "grad_norm": 3.3118550777435303, "learning_rate": 2.406323529411765e-06, "loss": 1.8329, "step": 51660 }, { "epoch": 75.93495038588755, "grad_norm": 4.039615154266357, "learning_rate": 2.4032352941176474e-06, "loss": 1.8783, "step": 51681 }, { "epoch": 75.96582138919514, "grad_norm": 4.115147113800049, "learning_rate": 2.4001470588235294e-06, "loss": 1.8278, "step": 51702 }, { "epoch": 75.99669239250275, "grad_norm": 3.382730007171631, "learning_rate": 2.3970588235294122e-06, "loss": 1.876, "step": 51723 }, { "epoch": 76.02646085997795, "grad_norm": 2.418741226196289, "learning_rate": 2.3939705882352942e-06, "loss": 1.9089, "step": 51744 }, { "epoch": 76.05733186328555, "grad_norm": 5.132915496826172, "learning_rate": 2.3908823529411766e-06, "loss": 1.8215, "step": 51765 }, { "epoch": 76.08820286659316, "grad_norm": 3.5830798149108887, "learning_rate": 2.387794117647059e-06, "loss": 1.8834, "step": 51786 }, { "epoch": 76.11907386990077, "grad_norm": 2.6410534381866455, "learning_rate": 2.3847058823529414e-06, "loss": 1.8248, "step": 51807 }, { "epoch": 76.14994487320838, "grad_norm": 2.5172877311706543, "learning_rate": 2.381617647058824e-06, "loss": 1.9022, "step": 51828 }, { "epoch": 76.180815876516, "grad_norm": 3.271216869354248, "learning_rate": 2.3785294117647063e-06, "loss": 1.9306, "step": 51849 }, { "epoch": 76.21168687982359, "grad_norm": 3.526655912399292, "learning_rate": 2.3754411764705882e-06, "loss": 1.843, "step": 51870 }, { "epoch": 76.2425578831312, "grad_norm": 1.971374273300171, "learning_rate": 2.372352941176471e-06, "loss": 1.9836, "step": 51891 }, { "epoch": 76.27342888643881, "grad_norm": 2.7857720851898193, "learning_rate": 2.369264705882353e-06, "loss": 2.0551, "step": 51912 }, { "epoch": 76.30429988974642, "grad_norm": 4.324123382568359, "learning_rate": 2.3661764705882355e-06, "loss": 1.9204, "step": 51933 }, { "epoch": 76.33517089305403, "grad_norm": 2.3651795387268066, "learning_rate": 2.363088235294118e-06, "loss": 1.9882, "step": 51954 }, { "epoch": 76.36604189636164, "grad_norm": 2.8735873699188232, "learning_rate": 2.3600000000000003e-06, "loss": 1.8792, "step": 51975 }, { "epoch": 76.39691289966925, "grad_norm": 3.1472530364990234, "learning_rate": 2.3569117647058827e-06, "loss": 1.8762, "step": 51996 }, { "epoch": 76.42778390297684, "grad_norm": 2.9572362899780273, "learning_rate": 2.3538235294117647e-06, "loss": 2.0039, "step": 52017 }, { "epoch": 76.45865490628445, "grad_norm": 3.7912213802337646, "learning_rate": 2.350735294117647e-06, "loss": 1.9778, "step": 52038 }, { "epoch": 76.48952590959206, "grad_norm": 3.9416089057922363, "learning_rate": 2.3476470588235295e-06, "loss": 1.9922, "step": 52059 }, { "epoch": 76.52039691289967, "grad_norm": 3.3198537826538086, "learning_rate": 2.344558823529412e-06, "loss": 1.6592, "step": 52080 }, { "epoch": 76.55126791620728, "grad_norm": 5.149196147918701, "learning_rate": 2.3414705882352943e-06, "loss": 1.7548, "step": 52101 }, { "epoch": 76.58213891951489, "grad_norm": 2.4498510360717773, "learning_rate": 2.3383823529411767e-06, "loss": 1.7666, "step": 52122 }, { "epoch": 76.61300992282249, "grad_norm": 2.3431568145751953, "learning_rate": 2.335294117647059e-06, "loss": 2.0045, "step": 52143 }, { "epoch": 76.6438809261301, "grad_norm": 2.5487592220306396, "learning_rate": 2.3322058823529415e-06, "loss": 1.8194, "step": 52164 }, { "epoch": 76.6747519294377, "grad_norm": 3.4568991661071777, "learning_rate": 2.3291176470588235e-06, "loss": 1.9146, "step": 52185 }, { "epoch": 76.70562293274531, "grad_norm": 7.764152526855469, "learning_rate": 2.3260294117647063e-06, "loss": 1.9112, "step": 52206 }, { "epoch": 76.73649393605292, "grad_norm": 2.64389967918396, "learning_rate": 2.3229411764705883e-06, "loss": 1.8205, "step": 52227 }, { "epoch": 76.76736493936053, "grad_norm": 2.5152900218963623, "learning_rate": 2.3198529411764707e-06, "loss": 1.9012, "step": 52248 }, { "epoch": 76.79823594266814, "grad_norm": 3.0846266746520996, "learning_rate": 2.316764705882353e-06, "loss": 1.9779, "step": 52269 }, { "epoch": 76.82910694597574, "grad_norm": 3.1508610248565674, "learning_rate": 2.3136764705882355e-06, "loss": 1.8844, "step": 52290 }, { "epoch": 76.85997794928335, "grad_norm": 2.905240058898926, "learning_rate": 2.310588235294118e-06, "loss": 1.9782, "step": 52311 }, { "epoch": 76.89084895259096, "grad_norm": 2.0555567741394043, "learning_rate": 2.3075000000000004e-06, "loss": 1.822, "step": 52332 }, { "epoch": 76.92171995589857, "grad_norm": 3.9478890895843506, "learning_rate": 2.3044117647058823e-06, "loss": 1.9347, "step": 52353 }, { "epoch": 76.95259095920618, "grad_norm": 2.7098228931427, "learning_rate": 2.301323529411765e-06, "loss": 1.9042, "step": 52374 }, { "epoch": 76.98346196251379, "grad_norm": 2.646944284439087, "learning_rate": 2.298235294117647e-06, "loss": 1.9334, "step": 52395 }, { "epoch": 77.01323042998898, "grad_norm": 3.870572566986084, "learning_rate": 2.2951470588235296e-06, "loss": 1.8992, "step": 52416 }, { "epoch": 77.04410143329659, "grad_norm": 4.317201137542725, "learning_rate": 2.292058823529412e-06, "loss": 1.9485, "step": 52437 }, { "epoch": 77.07497243660418, "grad_norm": 3.205514907836914, "learning_rate": 2.2889705882352944e-06, "loss": 1.9392, "step": 52458 }, { "epoch": 77.1058434399118, "grad_norm": 2.7834088802337646, "learning_rate": 2.2858823529411768e-06, "loss": 1.9067, "step": 52479 }, { "epoch": 77.1367144432194, "grad_norm": 4.2463765144348145, "learning_rate": 2.2827941176470588e-06, "loss": 1.9237, "step": 52500 }, { "epoch": 77.16758544652701, "grad_norm": 2.879445791244507, "learning_rate": 2.279705882352941e-06, "loss": 1.8864, "step": 52521 }, { "epoch": 77.19845644983462, "grad_norm": 3.6718075275421143, "learning_rate": 2.2766176470588236e-06, "loss": 1.9833, "step": 52542 }, { "epoch": 77.22932745314223, "grad_norm": 4.284915924072266, "learning_rate": 2.273529411764706e-06, "loss": 1.9226, "step": 52563 }, { "epoch": 77.26019845644983, "grad_norm": 3.5193610191345215, "learning_rate": 2.2704411764705884e-06, "loss": 1.8805, "step": 52584 }, { "epoch": 77.29106945975744, "grad_norm": 3.159287691116333, "learning_rate": 2.267352941176471e-06, "loss": 1.7884, "step": 52605 }, { "epoch": 77.32194046306505, "grad_norm": 4.750467300415039, "learning_rate": 2.264264705882353e-06, "loss": 1.8447, "step": 52626 }, { "epoch": 77.35281146637266, "grad_norm": 2.789483070373535, "learning_rate": 2.2611764705882356e-06, "loss": 2.0292, "step": 52647 }, { "epoch": 77.38368246968027, "grad_norm": 4.384471416473389, "learning_rate": 2.2580882352941176e-06, "loss": 1.946, "step": 52668 }, { "epoch": 77.41455347298788, "grad_norm": 5.72169828414917, "learning_rate": 2.2550000000000004e-06, "loss": 1.9398, "step": 52689 }, { "epoch": 77.44542447629549, "grad_norm": 2.966850996017456, "learning_rate": 2.2519117647058824e-06, "loss": 1.9038, "step": 52710 }, { "epoch": 77.47629547960308, "grad_norm": 3.7062931060791016, "learning_rate": 2.248823529411765e-06, "loss": 1.8921, "step": 52731 }, { "epoch": 77.50716648291069, "grad_norm": 2.1951584815979004, "learning_rate": 2.2457352941176472e-06, "loss": 1.9617, "step": 52752 }, { "epoch": 77.5380374862183, "grad_norm": 3.3365771770477295, "learning_rate": 2.2426470588235296e-06, "loss": 1.8229, "step": 52773 }, { "epoch": 77.56890848952591, "grad_norm": 2.8411672115325928, "learning_rate": 2.239558823529412e-06, "loss": 1.8402, "step": 52794 }, { "epoch": 77.59977949283352, "grad_norm": 3.2125141620635986, "learning_rate": 2.2364705882352945e-06, "loss": 1.9783, "step": 52815 }, { "epoch": 77.63065049614113, "grad_norm": 3.572352409362793, "learning_rate": 2.2333823529411764e-06, "loss": 1.8585, "step": 52836 }, { "epoch": 77.66152149944874, "grad_norm": 3.1547422409057617, "learning_rate": 2.2302941176470593e-06, "loss": 1.8776, "step": 52857 }, { "epoch": 77.69239250275633, "grad_norm": 2.9596502780914307, "learning_rate": 2.2272058823529413e-06, "loss": 1.9211, "step": 52878 }, { "epoch": 77.72326350606394, "grad_norm": 2.537531614303589, "learning_rate": 2.2241176470588237e-06, "loss": 1.8779, "step": 52899 }, { "epoch": 77.75413450937155, "grad_norm": 2.3152942657470703, "learning_rate": 2.221029411764706e-06, "loss": 1.8188, "step": 52920 }, { "epoch": 77.78500551267916, "grad_norm": 3.5321731567382812, "learning_rate": 2.2179411764705885e-06, "loss": 2.0079, "step": 52941 }, { "epoch": 77.81587651598677, "grad_norm": 4.066924095153809, "learning_rate": 2.214852941176471e-06, "loss": 1.8835, "step": 52962 }, { "epoch": 77.84674751929438, "grad_norm": 3.4849722385406494, "learning_rate": 2.2117647058823533e-06, "loss": 1.8451, "step": 52983 }, { "epoch": 77.87761852260198, "grad_norm": 5.796635627746582, "learning_rate": 2.2086764705882353e-06, "loss": 1.8715, "step": 53004 }, { "epoch": 77.90848952590959, "grad_norm": 3.3961362838745117, "learning_rate": 2.2055882352941177e-06, "loss": 1.8978, "step": 53025 }, { "epoch": 77.9393605292172, "grad_norm": 2.9069998264312744, "learning_rate": 2.2025e-06, "loss": 1.9561, "step": 53046 }, { "epoch": 77.9702315325248, "grad_norm": 3.221327781677246, "learning_rate": 2.1994117647058825e-06, "loss": 1.9494, "step": 53067 }, { "epoch": 78.0, "grad_norm": 1.2039324045181274, "learning_rate": 2.196323529411765e-06, "loss": 1.8628, "step": 53088 }, { "epoch": 78.03087100330761, "grad_norm": 4.446002006530762, "learning_rate": 2.1932352941176473e-06, "loss": 1.9292, "step": 53109 }, { "epoch": 78.06174200661522, "grad_norm": 3.079376459121704, "learning_rate": 2.1901470588235297e-06, "loss": 1.7926, "step": 53130 }, { "epoch": 78.09261300992283, "grad_norm": 2.4712345600128174, "learning_rate": 2.1870588235294117e-06, "loss": 1.8624, "step": 53151 }, { "epoch": 78.12348401323042, "grad_norm": 2.881467819213867, "learning_rate": 2.1839705882352945e-06, "loss": 1.8272, "step": 53172 }, { "epoch": 78.15435501653803, "grad_norm": 3.9135468006134033, "learning_rate": 2.1808823529411765e-06, "loss": 1.9523, "step": 53193 }, { "epoch": 78.18522601984564, "grad_norm": 3.337737560272217, "learning_rate": 2.177794117647059e-06, "loss": 1.8797, "step": 53214 }, { "epoch": 78.21609702315325, "grad_norm": 3.2180802822113037, "learning_rate": 2.1747058823529413e-06, "loss": 1.9605, "step": 53235 }, { "epoch": 78.24696802646086, "grad_norm": 4.1542534828186035, "learning_rate": 2.1716176470588237e-06, "loss": 1.8794, "step": 53256 }, { "epoch": 78.27783902976847, "grad_norm": 3.3262102603912354, "learning_rate": 2.168529411764706e-06, "loss": 1.8924, "step": 53277 }, { "epoch": 78.30871003307608, "grad_norm": 3.4390077590942383, "learning_rate": 2.1654411764705886e-06, "loss": 1.9986, "step": 53298 }, { "epoch": 78.33958103638368, "grad_norm": 3.0732836723327637, "learning_rate": 2.1623529411764705e-06, "loss": 2.0085, "step": 53319 }, { "epoch": 78.37045203969129, "grad_norm": 3.862992286682129, "learning_rate": 2.1592647058823534e-06, "loss": 2.0121, "step": 53340 }, { "epoch": 78.4013230429989, "grad_norm": 3.485213041305542, "learning_rate": 2.1561764705882354e-06, "loss": 1.8854, "step": 53361 }, { "epoch": 78.4321940463065, "grad_norm": 2.326822519302368, "learning_rate": 2.1530882352941178e-06, "loss": 1.8383, "step": 53382 }, { "epoch": 78.46306504961412, "grad_norm": 2.054612874984741, "learning_rate": 2.15e-06, "loss": 1.852, "step": 53403 }, { "epoch": 78.49393605292173, "grad_norm": 4.28131103515625, "learning_rate": 2.1469117647058826e-06, "loss": 1.9932, "step": 53424 }, { "epoch": 78.52480705622932, "grad_norm": 3.1699934005737305, "learning_rate": 2.143823529411765e-06, "loss": 1.8032, "step": 53445 }, { "epoch": 78.55567805953693, "grad_norm": 3.745795488357544, "learning_rate": 2.1407352941176474e-06, "loss": 1.9409, "step": 53466 }, { "epoch": 78.58654906284454, "grad_norm": 4.9390363693237305, "learning_rate": 2.1376470588235294e-06, "loss": 1.9563, "step": 53487 }, { "epoch": 78.61742006615215, "grad_norm": 4.405721187591553, "learning_rate": 2.134558823529412e-06, "loss": 1.9059, "step": 53508 }, { "epoch": 78.64829106945976, "grad_norm": 3.1443326473236084, "learning_rate": 2.131470588235294e-06, "loss": 1.9435, "step": 53529 }, { "epoch": 78.67916207276737, "grad_norm": 3.4657139778137207, "learning_rate": 2.1283823529411766e-06, "loss": 1.9548, "step": 53550 }, { "epoch": 78.71003307607498, "grad_norm": 4.083343029022217, "learning_rate": 2.125294117647059e-06, "loss": 1.9351, "step": 53571 }, { "epoch": 78.74090407938257, "grad_norm": 3.110407829284668, "learning_rate": 2.1222058823529414e-06, "loss": 1.8613, "step": 53592 }, { "epoch": 78.77177508269018, "grad_norm": 2.7243080139160156, "learning_rate": 2.119117647058824e-06, "loss": 1.8568, "step": 53613 }, { "epoch": 78.8026460859978, "grad_norm": 1.9338915348052979, "learning_rate": 2.116029411764706e-06, "loss": 1.8276, "step": 53634 }, { "epoch": 78.8335170893054, "grad_norm": 3.2793514728546143, "learning_rate": 2.1129411764705886e-06, "loss": 2.0224, "step": 53655 }, { "epoch": 78.86438809261301, "grad_norm": 3.131603956222534, "learning_rate": 2.1098529411764706e-06, "loss": 1.9412, "step": 53676 }, { "epoch": 78.89525909592062, "grad_norm": 4.126333713531494, "learning_rate": 2.106764705882353e-06, "loss": 1.813, "step": 53697 }, { "epoch": 78.92613009922823, "grad_norm": 3.446371078491211, "learning_rate": 2.1036764705882354e-06, "loss": 1.8192, "step": 53718 }, { "epoch": 78.95700110253583, "grad_norm": 3.122298240661621, "learning_rate": 2.100588235294118e-06, "loss": 1.9535, "step": 53739 }, { "epoch": 78.98787210584344, "grad_norm": 3.5411131381988525, "learning_rate": 2.0975000000000002e-06, "loss": 1.8914, "step": 53760 }, { "epoch": 79.01764057331863, "grad_norm": 2.8983497619628906, "learning_rate": 2.0944117647058827e-06, "loss": 1.873, "step": 53781 }, { "epoch": 79.04851157662624, "grad_norm": 2.1497483253479004, "learning_rate": 2.0913235294117646e-06, "loss": 1.8743, "step": 53802 }, { "epoch": 79.07938257993385, "grad_norm": 4.303986072540283, "learning_rate": 2.0882352941176475e-06, "loss": 2.068, "step": 53823 }, { "epoch": 79.11025358324146, "grad_norm": 3.530034303665161, "learning_rate": 2.0851470588235295e-06, "loss": 1.9109, "step": 53844 }, { "epoch": 79.14112458654907, "grad_norm": 4.798586845397949, "learning_rate": 2.082058823529412e-06, "loss": 1.7187, "step": 53865 }, { "epoch": 79.17199558985666, "grad_norm": 3.43611478805542, "learning_rate": 2.0789705882352943e-06, "loss": 1.9356, "step": 53886 }, { "epoch": 79.20286659316427, "grad_norm": 2.9902796745300293, "learning_rate": 2.0758823529411767e-06, "loss": 1.8499, "step": 53907 }, { "epoch": 79.23373759647188, "grad_norm": 3.2594876289367676, "learning_rate": 2.072794117647059e-06, "loss": 1.8621, "step": 53928 }, { "epoch": 79.26460859977949, "grad_norm": 3.2155396938323975, "learning_rate": 2.0697058823529415e-06, "loss": 1.9472, "step": 53949 }, { "epoch": 79.2954796030871, "grad_norm": 2.696277379989624, "learning_rate": 2.0666176470588235e-06, "loss": 2.0207, "step": 53970 }, { "epoch": 79.32635060639471, "grad_norm": 2.7773244380950928, "learning_rate": 2.0635294117647063e-06, "loss": 1.9105, "step": 53991 }, { "epoch": 79.35722160970232, "grad_norm": 5.255712985992432, "learning_rate": 2.0604411764705883e-06, "loss": 2.0105, "step": 54012 }, { "epoch": 79.38809261300992, "grad_norm": 4.8892598152160645, "learning_rate": 2.0573529411764707e-06, "loss": 1.8834, "step": 54033 }, { "epoch": 79.41896361631753, "grad_norm": 3.9424781799316406, "learning_rate": 2.054264705882353e-06, "loss": 1.7858, "step": 54054 }, { "epoch": 79.44983461962514, "grad_norm": 2.9775705337524414, "learning_rate": 2.0511764705882355e-06, "loss": 1.8846, "step": 54075 }, { "epoch": 79.48070562293275, "grad_norm": 2.102572202682495, "learning_rate": 2.048088235294118e-06, "loss": 1.9752, "step": 54096 }, { "epoch": 79.51157662624036, "grad_norm": 2.149517059326172, "learning_rate": 2.045e-06, "loss": 1.8571, "step": 54117 }, { "epoch": 79.54244762954796, "grad_norm": 2.7648065090179443, "learning_rate": 2.0419117647058827e-06, "loss": 1.9316, "step": 54138 }, { "epoch": 79.57331863285557, "grad_norm": 3.8295509815216064, "learning_rate": 2.0388235294117647e-06, "loss": 1.9457, "step": 54159 }, { "epoch": 79.60418963616317, "grad_norm": 2.7700726985931396, "learning_rate": 2.035735294117647e-06, "loss": 1.8569, "step": 54180 }, { "epoch": 79.63506063947078, "grad_norm": 3.1032280921936035, "learning_rate": 2.0326470588235295e-06, "loss": 1.936, "step": 54201 }, { "epoch": 79.66593164277839, "grad_norm": 2.5220541954040527, "learning_rate": 2.029558823529412e-06, "loss": 1.9996, "step": 54222 }, { "epoch": 79.696802646086, "grad_norm": 2.75738263130188, "learning_rate": 2.0264705882352943e-06, "loss": 1.9259, "step": 54243 }, { "epoch": 79.72767364939361, "grad_norm": 3.1567132472991943, "learning_rate": 2.0233823529411768e-06, "loss": 1.8883, "step": 54264 }, { "epoch": 79.75854465270122, "grad_norm": 2.354546546936035, "learning_rate": 2.0202941176470587e-06, "loss": 1.8734, "step": 54285 }, { "epoch": 79.78941565600881, "grad_norm": 2.1186363697052, "learning_rate": 2.0172058823529416e-06, "loss": 1.9968, "step": 54306 }, { "epoch": 79.82028665931642, "grad_norm": 3.7963063716888428, "learning_rate": 2.0141176470588236e-06, "loss": 1.83, "step": 54327 }, { "epoch": 79.85115766262403, "grad_norm": 4.2135515213012695, "learning_rate": 2.011029411764706e-06, "loss": 2.0007, "step": 54348 }, { "epoch": 79.88202866593164, "grad_norm": 2.9620189666748047, "learning_rate": 2.0079411764705884e-06, "loss": 1.9041, "step": 54369 }, { "epoch": 79.91289966923925, "grad_norm": 2.6110002994537354, "learning_rate": 2.0048529411764708e-06, "loss": 1.923, "step": 54390 }, { "epoch": 79.94377067254686, "grad_norm": 3.4644336700439453, "learning_rate": 2.001764705882353e-06, "loss": 1.8278, "step": 54411 }, { "epoch": 79.97464167585447, "grad_norm": 2.3990249633789062, "learning_rate": 1.9986764705882356e-06, "loss": 1.9162, "step": 54432 }, { "epoch": 80.00441014332966, "grad_norm": 2.992694854736328, "learning_rate": 1.9955882352941176e-06, "loss": 1.881, "step": 54453 }, { "epoch": 80.03528114663726, "grad_norm": 2.613642930984497, "learning_rate": 1.9925000000000004e-06, "loss": 1.8855, "step": 54474 }, { "epoch": 80.06615214994487, "grad_norm": 3.76458477973938, "learning_rate": 1.9894117647058824e-06, "loss": 1.8586, "step": 54495 }, { "epoch": 80.09702315325248, "grad_norm": 3.0573434829711914, "learning_rate": 1.986323529411765e-06, "loss": 1.9761, "step": 54516 }, { "epoch": 80.12789415656009, "grad_norm": 2.528221845626831, "learning_rate": 1.983235294117647e-06, "loss": 1.8447, "step": 54537 }, { "epoch": 80.1587651598677, "grad_norm": 2.143775463104248, "learning_rate": 1.9801470588235296e-06, "loss": 1.8518, "step": 54558 }, { "epoch": 80.18963616317531, "grad_norm": 3.5363776683807373, "learning_rate": 1.977058823529412e-06, "loss": 1.954, "step": 54579 }, { "epoch": 80.22050716648292, "grad_norm": 2.219294786453247, "learning_rate": 1.9739705882352944e-06, "loss": 1.8689, "step": 54600 }, { "epoch": 80.25137816979051, "grad_norm": 3.9676334857940674, "learning_rate": 1.970882352941177e-06, "loss": 1.9159, "step": 54621 }, { "epoch": 80.28224917309812, "grad_norm": 3.110699415206909, "learning_rate": 1.967794117647059e-06, "loss": 1.8877, "step": 54642 }, { "epoch": 80.31312017640573, "grad_norm": 4.5097126960754395, "learning_rate": 1.9647058823529412e-06, "loss": 1.9106, "step": 54663 }, { "epoch": 80.34399117971334, "grad_norm": 4.937746047973633, "learning_rate": 1.9616176470588236e-06, "loss": 1.9262, "step": 54684 }, { "epoch": 80.37486218302095, "grad_norm": 4.997631072998047, "learning_rate": 1.958529411764706e-06, "loss": 1.9565, "step": 54705 }, { "epoch": 80.40573318632856, "grad_norm": 3.515503406524658, "learning_rate": 1.9554411764705884e-06, "loss": 2.0399, "step": 54726 }, { "epoch": 80.43660418963616, "grad_norm": 3.368488311767578, "learning_rate": 1.952352941176471e-06, "loss": 1.9573, "step": 54747 }, { "epoch": 80.46747519294377, "grad_norm": 3.216134786605835, "learning_rate": 1.949264705882353e-06, "loss": 1.8304, "step": 54768 }, { "epoch": 80.49834619625138, "grad_norm": 2.3551666736602783, "learning_rate": 1.9461764705882357e-06, "loss": 1.9818, "step": 54789 }, { "epoch": 80.52921719955899, "grad_norm": 4.402371406555176, "learning_rate": 1.9430882352941177e-06, "loss": 1.9777, "step": 54810 }, { "epoch": 80.5600882028666, "grad_norm": 2.428933620452881, "learning_rate": 1.94e-06, "loss": 1.9477, "step": 54831 }, { "epoch": 80.5909592061742, "grad_norm": 2.681938648223877, "learning_rate": 1.9369117647058825e-06, "loss": 1.8942, "step": 54852 }, { "epoch": 80.62183020948181, "grad_norm": 3.1741232872009277, "learning_rate": 1.933823529411765e-06, "loss": 1.9428, "step": 54873 }, { "epoch": 80.65270121278941, "grad_norm": 2.3936588764190674, "learning_rate": 1.9307352941176473e-06, "loss": 1.8308, "step": 54894 }, { "epoch": 80.68357221609702, "grad_norm": 2.5987894535064697, "learning_rate": 1.9276470588235297e-06, "loss": 1.8551, "step": 54915 }, { "epoch": 80.71444321940463, "grad_norm": 4.933919906616211, "learning_rate": 1.9245588235294117e-06, "loss": 1.8583, "step": 54936 }, { "epoch": 80.74531422271224, "grad_norm": 4.443017959594727, "learning_rate": 1.9214705882352945e-06, "loss": 1.9158, "step": 54957 }, { "epoch": 80.77618522601985, "grad_norm": 2.5975685119628906, "learning_rate": 1.9183823529411765e-06, "loss": 1.9396, "step": 54978 }, { "epoch": 80.80705622932746, "grad_norm": 3.5602004528045654, "learning_rate": 1.915294117647059e-06, "loss": 1.9, "step": 54999 }, { "epoch": 80.83792723263507, "grad_norm": 5.030701637268066, "learning_rate": 1.9122058823529413e-06, "loss": 1.8543, "step": 55020 }, { "epoch": 80.86879823594266, "grad_norm": 3.0099844932556152, "learning_rate": 1.9091176470588237e-06, "loss": 1.9342, "step": 55041 }, { "epoch": 80.89966923925027, "grad_norm": 3.5271270275115967, "learning_rate": 1.9060294117647061e-06, "loss": 1.883, "step": 55062 }, { "epoch": 80.93054024255788, "grad_norm": 2.385831832885742, "learning_rate": 1.9029411764705885e-06, "loss": 1.8801, "step": 55083 }, { "epoch": 80.96141124586549, "grad_norm": 2.7258832454681396, "learning_rate": 1.8998529411764707e-06, "loss": 1.9379, "step": 55104 }, { "epoch": 80.9922822491731, "grad_norm": 3.468010902404785, "learning_rate": 1.8967647058823531e-06, "loss": 1.8262, "step": 55125 }, { "epoch": 81.0220507166483, "grad_norm": 2.2595319747924805, "learning_rate": 1.8936764705882355e-06, "loss": 1.7634, "step": 55146 }, { "epoch": 81.0529217199559, "grad_norm": 2.740424394607544, "learning_rate": 1.8905882352941177e-06, "loss": 1.9531, "step": 55167 }, { "epoch": 81.0837927232635, "grad_norm": 3.5070292949676514, "learning_rate": 1.8875000000000001e-06, "loss": 1.9735, "step": 55188 }, { "epoch": 81.11466372657111, "grad_norm": 2.012866973876953, "learning_rate": 1.8844117647058823e-06, "loss": 1.8721, "step": 55209 }, { "epoch": 81.14553472987872, "grad_norm": 2.34311842918396, "learning_rate": 1.881323529411765e-06, "loss": 1.8043, "step": 55230 }, { "epoch": 81.17640573318633, "grad_norm": 5.14400053024292, "learning_rate": 1.8782352941176472e-06, "loss": 1.9143, "step": 55251 }, { "epoch": 81.20727673649394, "grad_norm": 2.1524829864501953, "learning_rate": 1.875294117647059e-06, "loss": 1.9258, "step": 55272 }, { "epoch": 81.23814773980155, "grad_norm": 4.2788872718811035, "learning_rate": 1.8722058823529413e-06, "loss": 1.8759, "step": 55293 }, { "epoch": 81.26901874310916, "grad_norm": 2.789827823638916, "learning_rate": 1.8691176470588237e-06, "loss": 1.8454, "step": 55314 }, { "epoch": 81.29988974641675, "grad_norm": 2.6075949668884277, "learning_rate": 1.866029411764706e-06, "loss": 1.9176, "step": 55335 }, { "epoch": 81.33076074972436, "grad_norm": 3.2883353233337402, "learning_rate": 1.8629411764705885e-06, "loss": 1.933, "step": 55356 }, { "epoch": 81.36163175303197, "grad_norm": 2.641495704650879, "learning_rate": 1.8598529411764707e-06, "loss": 1.8954, "step": 55377 }, { "epoch": 81.39250275633958, "grad_norm": 2.37809419631958, "learning_rate": 1.856764705882353e-06, "loss": 1.8682, "step": 55398 }, { "epoch": 81.42337375964719, "grad_norm": 2.954416275024414, "learning_rate": 1.8536764705882355e-06, "loss": 1.9094, "step": 55419 }, { "epoch": 81.4542447629548, "grad_norm": 3.1513495445251465, "learning_rate": 1.850588235294118e-06, "loss": 1.8165, "step": 55440 }, { "epoch": 81.48511576626241, "grad_norm": 3.5151946544647217, "learning_rate": 1.8475e-06, "loss": 1.9801, "step": 55461 }, { "epoch": 81.51598676957, "grad_norm": 4.389401912689209, "learning_rate": 1.8444117647058827e-06, "loss": 1.9072, "step": 55482 }, { "epoch": 81.54685777287762, "grad_norm": 3.281489849090576, "learning_rate": 1.841323529411765e-06, "loss": 1.858, "step": 55503 }, { "epoch": 81.57772877618523, "grad_norm": 4.108984470367432, "learning_rate": 1.8382352941176473e-06, "loss": 1.8076, "step": 55524 }, { "epoch": 81.60859977949283, "grad_norm": 3.352931261062622, "learning_rate": 1.8351470588235295e-06, "loss": 1.9589, "step": 55545 }, { "epoch": 81.63947078280044, "grad_norm": 3.3521549701690674, "learning_rate": 1.8320588235294117e-06, "loss": 1.9398, "step": 55566 }, { "epoch": 81.67034178610805, "grad_norm": 2.6842236518859863, "learning_rate": 1.8289705882352943e-06, "loss": 2.0069, "step": 55587 }, { "epoch": 81.70121278941565, "grad_norm": 1.912843942642212, "learning_rate": 1.8258823529411765e-06, "loss": 1.8707, "step": 55608 }, { "epoch": 81.73208379272326, "grad_norm": 5.301822185516357, "learning_rate": 1.822794117647059e-06, "loss": 1.8702, "step": 55629 }, { "epoch": 81.76295479603087, "grad_norm": 1.7935271263122559, "learning_rate": 1.8197058823529411e-06, "loss": 1.8525, "step": 55650 }, { "epoch": 81.79382579933848, "grad_norm": 1.4970215559005737, "learning_rate": 1.8166176470588237e-06, "loss": 1.9875, "step": 55671 }, { "epoch": 81.82469680264609, "grad_norm": 3.193878650665283, "learning_rate": 1.813529411764706e-06, "loss": 1.8876, "step": 55692 }, { "epoch": 81.8555678059537, "grad_norm": 2.9747655391693115, "learning_rate": 1.8104411764705884e-06, "loss": 1.9617, "step": 55713 }, { "epoch": 81.8864388092613, "grad_norm": 2.965677261352539, "learning_rate": 1.8073529411764705e-06, "loss": 1.9311, "step": 55734 }, { "epoch": 81.9173098125689, "grad_norm": 3.706115484237671, "learning_rate": 1.8042647058823532e-06, "loss": 1.847, "step": 55755 }, { "epoch": 81.94818081587651, "grad_norm": 3.1666228771209717, "learning_rate": 1.8011764705882354e-06, "loss": 1.849, "step": 55776 }, { "epoch": 81.97905181918412, "grad_norm": 2.9520974159240723, "learning_rate": 1.7980882352941178e-06, "loss": 1.8427, "step": 55797 }, { "epoch": 82.00882028665932, "grad_norm": 3.5446219444274902, "learning_rate": 1.7950000000000002e-06, "loss": 1.78, "step": 55818 }, { "epoch": 82.03969128996692, "grad_norm": 3.0691027641296387, "learning_rate": 1.7919117647058826e-06, "loss": 1.935, "step": 55839 }, { "epoch": 82.07056229327453, "grad_norm": 4.00185489654541, "learning_rate": 1.7888235294117648e-06, "loss": 1.9209, "step": 55860 }, { "epoch": 82.10143329658214, "grad_norm": 3.932236671447754, "learning_rate": 1.7857352941176472e-06, "loss": 1.8287, "step": 55881 }, { "epoch": 82.13230429988975, "grad_norm": 3.2191712856292725, "learning_rate": 1.7826470588235296e-06, "loss": 2.0003, "step": 55902 }, { "epoch": 82.16317530319735, "grad_norm": 3.0343360900878906, "learning_rate": 1.779558823529412e-06, "loss": 1.9686, "step": 55923 }, { "epoch": 82.19404630650496, "grad_norm": 4.155202865600586, "learning_rate": 1.7764705882352942e-06, "loss": 1.9317, "step": 55944 }, { "epoch": 82.22491730981257, "grad_norm": 2.677639961242676, "learning_rate": 1.7733823529411768e-06, "loss": 1.802, "step": 55965 }, { "epoch": 82.25578831312018, "grad_norm": 3.8115053176879883, "learning_rate": 1.770294117647059e-06, "loss": 1.9, "step": 55986 }, { "epoch": 82.28665931642779, "grad_norm": 4.36173152923584, "learning_rate": 1.7672058823529414e-06, "loss": 1.9037, "step": 56007 }, { "epoch": 82.3175303197354, "grad_norm": 3.4902122020721436, "learning_rate": 1.7641176470588236e-06, "loss": 1.8978, "step": 56028 }, { "epoch": 82.34840132304299, "grad_norm": 2.6003305912017822, "learning_rate": 1.7610294117647062e-06, "loss": 1.9309, "step": 56049 }, { "epoch": 82.3792723263506, "grad_norm": 3.3649659156799316, "learning_rate": 1.7579411764705884e-06, "loss": 1.9153, "step": 56070 }, { "epoch": 82.41014332965821, "grad_norm": 3.347731828689575, "learning_rate": 1.7548529411764708e-06, "loss": 1.908, "step": 56091 }, { "epoch": 82.44101433296582, "grad_norm": 3.347778558731079, "learning_rate": 1.751764705882353e-06, "loss": 1.9505, "step": 56112 }, { "epoch": 82.47188533627343, "grad_norm": 2.663743257522583, "learning_rate": 1.7486764705882352e-06, "loss": 1.8741, "step": 56133 }, { "epoch": 82.50275633958104, "grad_norm": 2.401683807373047, "learning_rate": 1.7455882352941178e-06, "loss": 1.9598, "step": 56154 }, { "epoch": 82.53362734288865, "grad_norm": 2.8510944843292236, "learning_rate": 1.7425e-06, "loss": 1.8976, "step": 56175 }, { "epoch": 82.56449834619625, "grad_norm": 2.6884796619415283, "learning_rate": 1.7394117647058825e-06, "loss": 1.8664, "step": 56196 }, { "epoch": 82.59536934950386, "grad_norm": 3.1295108795166016, "learning_rate": 1.7363235294117646e-06, "loss": 1.977, "step": 56217 }, { "epoch": 82.62624035281146, "grad_norm": 2.698425054550171, "learning_rate": 1.7332352941176473e-06, "loss": 1.8867, "step": 56238 }, { "epoch": 82.65711135611907, "grad_norm": 2.819748640060425, "learning_rate": 1.7301470588235295e-06, "loss": 1.799, "step": 56259 }, { "epoch": 82.68798235942668, "grad_norm": 2.1235549449920654, "learning_rate": 1.7270588235294119e-06, "loss": 1.9046, "step": 56280 }, { "epoch": 82.7188533627343, "grad_norm": 3.925448179244995, "learning_rate": 1.7239705882352943e-06, "loss": 1.9257, "step": 56301 }, { "epoch": 82.7497243660419, "grad_norm": 1.3085805177688599, "learning_rate": 1.7208823529411767e-06, "loss": 1.9641, "step": 56322 }, { "epoch": 82.7805953693495, "grad_norm": 2.447262763977051, "learning_rate": 1.7177941176470589e-06, "loss": 1.8715, "step": 56343 }, { "epoch": 82.81146637265711, "grad_norm": 3.1304545402526855, "learning_rate": 1.7147058823529413e-06, "loss": 1.9407, "step": 56364 }, { "epoch": 82.84233737596472, "grad_norm": 4.184123992919922, "learning_rate": 1.7116176470588237e-06, "loss": 1.8839, "step": 56385 }, { "epoch": 82.87320837927233, "grad_norm": 3.719771385192871, "learning_rate": 1.708529411764706e-06, "loss": 1.8754, "step": 56406 }, { "epoch": 82.90407938257994, "grad_norm": 2.999370813369751, "learning_rate": 1.7054411764705883e-06, "loss": 1.902, "step": 56427 }, { "epoch": 82.93495038588755, "grad_norm": 3.0323338508605957, "learning_rate": 1.702352941176471e-06, "loss": 1.8526, "step": 56448 }, { "epoch": 82.96582138919514, "grad_norm": 2.734877824783325, "learning_rate": 1.6992647058823531e-06, "loss": 1.9383, "step": 56469 }, { "epoch": 82.99669239250275, "grad_norm": 2.554425001144409, "learning_rate": 1.6961764705882355e-06, "loss": 1.9002, "step": 56490 }, { "epoch": 83.02646085997795, "grad_norm": 3.2247154712677, "learning_rate": 1.6930882352941177e-06, "loss": 1.8686, "step": 56511 }, { "epoch": 83.05733186328555, "grad_norm": 2.6269779205322266, "learning_rate": 1.6900000000000003e-06, "loss": 2.0023, "step": 56532 }, { "epoch": 83.08820286659316, "grad_norm": 4.678812026977539, "learning_rate": 1.6869117647058825e-06, "loss": 1.8851, "step": 56553 }, { "epoch": 83.11907386990077, "grad_norm": 3.2983531951904297, "learning_rate": 1.683823529411765e-06, "loss": 1.9637, "step": 56574 }, { "epoch": 83.14994487320838, "grad_norm": 2.474243640899658, "learning_rate": 1.6807352941176471e-06, "loss": 1.8696, "step": 56595 }, { "epoch": 83.180815876516, "grad_norm": 4.16417932510376, "learning_rate": 1.6776470588235298e-06, "loss": 1.8791, "step": 56616 }, { "epoch": 83.21168687982359, "grad_norm": 2.168107032775879, "learning_rate": 1.674558823529412e-06, "loss": 2.0216, "step": 56637 }, { "epoch": 83.2425578831312, "grad_norm": 2.1474661827087402, "learning_rate": 1.6714705882352941e-06, "loss": 1.902, "step": 56658 }, { "epoch": 83.27342888643881, "grad_norm": 3.0710597038269043, "learning_rate": 1.6683823529411766e-06, "loss": 1.985, "step": 56679 }, { "epoch": 83.30429988974642, "grad_norm": 2.7479002475738525, "learning_rate": 1.6652941176470587e-06, "loss": 1.8998, "step": 56700 }, { "epoch": 83.33517089305403, "grad_norm": 3.1270346641540527, "learning_rate": 1.6622058823529414e-06, "loss": 1.8949, "step": 56721 }, { "epoch": 83.36604189636164, "grad_norm": 2.6438467502593994, "learning_rate": 1.6591176470588236e-06, "loss": 1.9628, "step": 56742 }, { "epoch": 83.39691289966925, "grad_norm": 2.4969482421875, "learning_rate": 1.656029411764706e-06, "loss": 1.934, "step": 56763 }, { "epoch": 83.42778390297684, "grad_norm": 4.895516395568848, "learning_rate": 1.6529411764705882e-06, "loss": 1.9102, "step": 56784 }, { "epoch": 83.45865490628445, "grad_norm": 3.5764198303222656, "learning_rate": 1.6498529411764708e-06, "loss": 1.9143, "step": 56805 }, { "epoch": 83.48952590959206, "grad_norm": 3.8473057746887207, "learning_rate": 1.646764705882353e-06, "loss": 1.9567, "step": 56826 }, { "epoch": 83.52039691289967, "grad_norm": 4.43287467956543, "learning_rate": 1.6436764705882354e-06, "loss": 1.8204, "step": 56847 }, { "epoch": 83.55126791620728, "grad_norm": 2.7073535919189453, "learning_rate": 1.6405882352941178e-06, "loss": 1.8468, "step": 56868 }, { "epoch": 83.58213891951489, "grad_norm": 2.755333185195923, "learning_rate": 1.6375000000000002e-06, "loss": 1.8935, "step": 56889 }, { "epoch": 83.61300992282249, "grad_norm": 3.0799880027770996, "learning_rate": 1.6344117647058824e-06, "loss": 1.9557, "step": 56910 }, { "epoch": 83.6438809261301, "grad_norm": 3.13040828704834, "learning_rate": 1.631323529411765e-06, "loss": 1.9047, "step": 56931 }, { "epoch": 83.6747519294377, "grad_norm": 3.063424587249756, "learning_rate": 1.6282352941176472e-06, "loss": 1.9507, "step": 56952 }, { "epoch": 83.70562293274531, "grad_norm": 2.568812370300293, "learning_rate": 1.6251470588235296e-06, "loss": 1.8637, "step": 56973 }, { "epoch": 83.73649393605292, "grad_norm": 2.4771158695220947, "learning_rate": 1.6220588235294118e-06, "loss": 1.9089, "step": 56994 }, { "epoch": 83.76736493936053, "grad_norm": 3.354410171508789, "learning_rate": 1.6189705882352944e-06, "loss": 1.7989, "step": 57015 }, { "epoch": 83.79823594266814, "grad_norm": 3.4246435165405273, "learning_rate": 1.6158823529411766e-06, "loss": 1.9013, "step": 57036 }, { "epoch": 83.82910694597574, "grad_norm": 4.4100751876831055, "learning_rate": 1.612794117647059e-06, "loss": 1.9995, "step": 57057 }, { "epoch": 83.85997794928335, "grad_norm": 4.4296674728393555, "learning_rate": 1.6097058823529412e-06, "loss": 1.8234, "step": 57078 }, { "epoch": 83.89084895259096, "grad_norm": 2.6848065853118896, "learning_rate": 1.6066176470588239e-06, "loss": 1.8993, "step": 57099 }, { "epoch": 83.92171995589857, "grad_norm": 2.653097152709961, "learning_rate": 1.603529411764706e-06, "loss": 1.8631, "step": 57120 }, { "epoch": 83.95259095920618, "grad_norm": 3.478318929672241, "learning_rate": 1.6004411764705885e-06, "loss": 1.9339, "step": 57141 }, { "epoch": 83.98346196251379, "grad_norm": 3.2869911193847656, "learning_rate": 1.5973529411764707e-06, "loss": 1.862, "step": 57162 }, { "epoch": 84.01323042998898, "grad_norm": 4.9242048263549805, "learning_rate": 1.5942647058823528e-06, "loss": 1.8761, "step": 57183 }, { "epoch": 84.04410143329659, "grad_norm": 2.128086566925049, "learning_rate": 1.591323529411765e-06, "loss": 1.8304, "step": 57204 }, { "epoch": 84.07497243660418, "grad_norm": 4.8411078453063965, "learning_rate": 1.5882352941176472e-06, "loss": 1.9109, "step": 57225 }, { "epoch": 84.1058434399118, "grad_norm": 2.7543294429779053, "learning_rate": 1.5851470588235296e-06, "loss": 1.865, "step": 57246 }, { "epoch": 84.1367144432194, "grad_norm": 2.610814332962036, "learning_rate": 1.5820588235294118e-06, "loss": 1.9155, "step": 57267 }, { "epoch": 84.16758544652701, "grad_norm": 3.5008385181427, "learning_rate": 1.5789705882352944e-06, "loss": 1.7936, "step": 57288 }, { "epoch": 84.19845644983462, "grad_norm": 2.4532723426818848, "learning_rate": 1.5758823529411766e-06, "loss": 2.0299, "step": 57309 }, { "epoch": 84.22932745314223, "grad_norm": 3.2063169479370117, "learning_rate": 1.572794117647059e-06, "loss": 1.9175, "step": 57330 }, { "epoch": 84.26019845644983, "grad_norm": 2.2681519985198975, "learning_rate": 1.5697058823529412e-06, "loss": 1.9648, "step": 57351 }, { "epoch": 84.29106945975744, "grad_norm": 4.413717746734619, "learning_rate": 1.5666176470588238e-06, "loss": 1.8313, "step": 57372 }, { "epoch": 84.32194046306505, "grad_norm": 3.804009437561035, "learning_rate": 1.563529411764706e-06, "loss": 1.95, "step": 57393 }, { "epoch": 84.35281146637266, "grad_norm": 2.972759962081909, "learning_rate": 1.5604411764705884e-06, "loss": 1.8397, "step": 57414 }, { "epoch": 84.38368246968027, "grad_norm": 2.2493152618408203, "learning_rate": 1.5573529411764706e-06, "loss": 1.9346, "step": 57435 }, { "epoch": 84.41455347298788, "grad_norm": 3.6545870304107666, "learning_rate": 1.5542647058823532e-06, "loss": 1.8918, "step": 57456 }, { "epoch": 84.44542447629549, "grad_norm": 3.722609043121338, "learning_rate": 1.5511764705882354e-06, "loss": 1.9855, "step": 57477 }, { "epoch": 84.47629547960308, "grad_norm": 3.2077038288116455, "learning_rate": 1.5480882352941178e-06, "loss": 1.8985, "step": 57498 }, { "epoch": 84.50716648291069, "grad_norm": 2.0955028533935547, "learning_rate": 1.545e-06, "loss": 1.9233, "step": 57519 }, { "epoch": 84.5380374862183, "grad_norm": 2.7638797760009766, "learning_rate": 1.5419117647058826e-06, "loss": 1.9312, "step": 57540 }, { "epoch": 84.56890848952591, "grad_norm": 3.484783411026001, "learning_rate": 1.5388235294117648e-06, "loss": 1.9272, "step": 57561 }, { "epoch": 84.59977949283352, "grad_norm": 3.4145140647888184, "learning_rate": 1.535735294117647e-06, "loss": 1.8748, "step": 57582 }, { "epoch": 84.63065049614113, "grad_norm": 2.745647430419922, "learning_rate": 1.5326470588235294e-06, "loss": 1.8518, "step": 57603 }, { "epoch": 84.66152149944874, "grad_norm": 3.6098361015319824, "learning_rate": 1.5295588235294119e-06, "loss": 1.8922, "step": 57624 }, { "epoch": 84.69239250275633, "grad_norm": 2.638850212097168, "learning_rate": 1.5264705882352943e-06, "loss": 1.898, "step": 57645 }, { "epoch": 84.72326350606394, "grad_norm": 3.006580352783203, "learning_rate": 1.5233823529411765e-06, "loss": 1.8483, "step": 57666 }, { "epoch": 84.75413450937155, "grad_norm": 2.8077564239501953, "learning_rate": 1.520294117647059e-06, "loss": 1.8906, "step": 57687 }, { "epoch": 84.78500551267916, "grad_norm": 3.3110365867614746, "learning_rate": 1.5172058823529413e-06, "loss": 1.9005, "step": 57708 }, { "epoch": 84.81587651598677, "grad_norm": 3.947200059890747, "learning_rate": 1.5141176470588237e-06, "loss": 1.7805, "step": 57729 }, { "epoch": 84.84674751929438, "grad_norm": 2.894094944000244, "learning_rate": 1.5110294117647059e-06, "loss": 1.9119, "step": 57750 }, { "epoch": 84.87761852260198, "grad_norm": 3.3759777545928955, "learning_rate": 1.5079411764705885e-06, "loss": 1.8968, "step": 57771 }, { "epoch": 84.90848952590959, "grad_norm": 3.7107596397399902, "learning_rate": 1.5048529411764707e-06, "loss": 1.8822, "step": 57792 }, { "epoch": 84.9393605292172, "grad_norm": 4.369926929473877, "learning_rate": 1.501764705882353e-06, "loss": 1.927, "step": 57813 }, { "epoch": 84.9702315325248, "grad_norm": 2.1484344005584717, "learning_rate": 1.4986764705882353e-06, "loss": 1.9998, "step": 57834 }, { "epoch": 85.0, "grad_norm": 1.7058526277542114, "learning_rate": 1.495588235294118e-06, "loss": 1.9493, "step": 57855 }, { "epoch": 85.03087100330761, "grad_norm": 3.514209508895874, "learning_rate": 1.4925000000000001e-06, "loss": 1.8482, "step": 57876 }, { "epoch": 85.06174200661522, "grad_norm": 4.797916889190674, "learning_rate": 1.4894117647058825e-06, "loss": 1.9402, "step": 57897 }, { "epoch": 85.09261300992283, "grad_norm": 4.477325439453125, "learning_rate": 1.4863235294117647e-06, "loss": 1.9362, "step": 57918 }, { "epoch": 85.12348401323042, "grad_norm": 4.177937984466553, "learning_rate": 1.4832352941176473e-06, "loss": 1.8534, "step": 57939 }, { "epoch": 85.15435501653803, "grad_norm": 2.226069450378418, "learning_rate": 1.4801470588235295e-06, "loss": 1.9368, "step": 57960 }, { "epoch": 85.18522601984564, "grad_norm": 4.182055473327637, "learning_rate": 1.477058823529412e-06, "loss": 1.9506, "step": 57981 }, { "epoch": 85.21609702315325, "grad_norm": 2.9903206825256348, "learning_rate": 1.4739705882352941e-06, "loss": 1.8915, "step": 58002 }, { "epoch": 85.24696802646086, "grad_norm": 3.299546480178833, "learning_rate": 1.4708823529411768e-06, "loss": 1.7896, "step": 58023 }, { "epoch": 85.27783902976847, "grad_norm": 2.441277027130127, "learning_rate": 1.467794117647059e-06, "loss": 1.9043, "step": 58044 }, { "epoch": 85.30871003307608, "grad_norm": 5.00778865814209, "learning_rate": 1.4647058823529414e-06, "loss": 2.0262, "step": 58065 }, { "epoch": 85.33958103638368, "grad_norm": 1.9028743505477905, "learning_rate": 1.4616176470588235e-06, "loss": 1.846, "step": 58086 }, { "epoch": 85.37045203969129, "grad_norm": 2.2647411823272705, "learning_rate": 1.4585294117647062e-06, "loss": 1.9504, "step": 58107 }, { "epoch": 85.4013230429989, "grad_norm": 3.0109405517578125, "learning_rate": 1.4554411764705884e-06, "loss": 1.9534, "step": 58128 }, { "epoch": 85.4321940463065, "grad_norm": 3.4087748527526855, "learning_rate": 1.4523529411764706e-06, "loss": 1.8823, "step": 58149 }, { "epoch": 85.46306504961412, "grad_norm": 3.8880615234375, "learning_rate": 1.4492647058823532e-06, "loss": 2.0187, "step": 58170 }, { "epoch": 85.49393605292173, "grad_norm": 5.249473571777344, "learning_rate": 1.4461764705882354e-06, "loss": 1.955, "step": 58191 }, { "epoch": 85.52480705622932, "grad_norm": 2.962282657623291, "learning_rate": 1.4430882352941178e-06, "loss": 1.956, "step": 58212 }, { "epoch": 85.55567805953693, "grad_norm": 3.0038020610809326, "learning_rate": 1.44e-06, "loss": 1.88, "step": 58233 }, { "epoch": 85.58654906284454, "grad_norm": 2.637183666229248, "learning_rate": 1.4369117647058826e-06, "loss": 1.8659, "step": 58254 }, { "epoch": 85.61742006615215, "grad_norm": 6.163217544555664, "learning_rate": 1.4338235294117648e-06, "loss": 1.9037, "step": 58275 }, { "epoch": 85.64829106945976, "grad_norm": 2.899384021759033, "learning_rate": 1.4307352941176472e-06, "loss": 1.8622, "step": 58296 }, { "epoch": 85.67916207276737, "grad_norm": 3.446988344192505, "learning_rate": 1.4276470588235294e-06, "loss": 1.8837, "step": 58317 }, { "epoch": 85.71003307607498, "grad_norm": 4.195939540863037, "learning_rate": 1.424558823529412e-06, "loss": 1.8797, "step": 58338 }, { "epoch": 85.74090407938257, "grad_norm": 2.6070778369903564, "learning_rate": 1.4214705882352942e-06, "loss": 1.9156, "step": 58359 }, { "epoch": 85.77177508269018, "grad_norm": 3.2965219020843506, "learning_rate": 1.4183823529411766e-06, "loss": 1.8618, "step": 58380 }, { "epoch": 85.8026460859978, "grad_norm": 3.6454925537109375, "learning_rate": 1.4152941176470588e-06, "loss": 1.9312, "step": 58401 }, { "epoch": 85.8335170893054, "grad_norm": 1.9898680448532104, "learning_rate": 1.4122058823529414e-06, "loss": 2.0062, "step": 58422 }, { "epoch": 85.86438809261301, "grad_norm": 1.8735496997833252, "learning_rate": 1.4091176470588236e-06, "loss": 1.8109, "step": 58443 }, { "epoch": 85.89525909592062, "grad_norm": 2.667759656906128, "learning_rate": 1.406029411764706e-06, "loss": 1.8809, "step": 58464 }, { "epoch": 85.92613009922823, "grad_norm": 2.378823757171631, "learning_rate": 1.4029411764705882e-06, "loss": 1.7376, "step": 58485 }, { "epoch": 85.95700110253583, "grad_norm": 2.3611834049224854, "learning_rate": 1.3998529411764709e-06, "loss": 2.0057, "step": 58506 }, { "epoch": 85.98787210584344, "grad_norm": 3.3261101245880127, "learning_rate": 1.396764705882353e-06, "loss": 1.9105, "step": 58527 }, { "epoch": 86.01764057331863, "grad_norm": 3.8410630226135254, "learning_rate": 1.3936764705882355e-06, "loss": 1.8213, "step": 58548 }, { "epoch": 86.04851157662624, "grad_norm": 3.564905881881714, "learning_rate": 1.3905882352941176e-06, "loss": 1.8392, "step": 58569 }, { "epoch": 86.07938257993385, "grad_norm": 4.838118076324463, "learning_rate": 1.3875000000000003e-06, "loss": 1.8967, "step": 58590 }, { "epoch": 86.11025358324146, "grad_norm": 2.3735036849975586, "learning_rate": 1.3844117647058825e-06, "loss": 1.9762, "step": 58611 }, { "epoch": 86.14112458654907, "grad_norm": 3.0326428413391113, "learning_rate": 1.3813235294117649e-06, "loss": 1.82, "step": 58632 }, { "epoch": 86.17199558985666, "grad_norm": 3.101670026779175, "learning_rate": 1.378235294117647e-06, "loss": 1.8194, "step": 58653 }, { "epoch": 86.20286659316427, "grad_norm": 2.390058755874634, "learning_rate": 1.3751470588235295e-06, "loss": 2.0084, "step": 58674 }, { "epoch": 86.23373759647188, "grad_norm": 1.4813933372497559, "learning_rate": 1.3720588235294119e-06, "loss": 1.9469, "step": 58695 }, { "epoch": 86.26460859977949, "grad_norm": 2.253814220428467, "learning_rate": 1.368970588235294e-06, "loss": 1.9587, "step": 58716 }, { "epoch": 86.2954796030871, "grad_norm": 3.0864944458007812, "learning_rate": 1.3658823529411767e-06, "loss": 1.8973, "step": 58737 }, { "epoch": 86.32635060639471, "grad_norm": 3.736219882965088, "learning_rate": 1.3627941176470589e-06, "loss": 1.8536, "step": 58758 }, { "epoch": 86.35722160970232, "grad_norm": 2.422389030456543, "learning_rate": 1.3597058823529413e-06, "loss": 1.8922, "step": 58779 }, { "epoch": 86.38809261300992, "grad_norm": 1.6468392610549927, "learning_rate": 1.3566176470588235e-06, "loss": 1.9128, "step": 58800 }, { "epoch": 86.41896361631753, "grad_norm": 2.7321715354919434, "learning_rate": 1.3535294117647061e-06, "loss": 1.9701, "step": 58821 }, { "epoch": 86.44983461962514, "grad_norm": 2.7268600463867188, "learning_rate": 1.3504411764705883e-06, "loss": 1.8603, "step": 58842 }, { "epoch": 86.48070562293275, "grad_norm": 5.696651458740234, "learning_rate": 1.3473529411764707e-06, "loss": 1.7466, "step": 58863 }, { "epoch": 86.51157662624036, "grad_norm": 1.9593992233276367, "learning_rate": 1.344264705882353e-06, "loss": 1.7994, "step": 58884 }, { "epoch": 86.54244762954796, "grad_norm": 2.7126142978668213, "learning_rate": 1.3411764705882355e-06, "loss": 1.9958, "step": 58905 }, { "epoch": 86.57331863285557, "grad_norm": 3.9629502296447754, "learning_rate": 1.3380882352941177e-06, "loss": 1.8163, "step": 58926 }, { "epoch": 86.60418963616317, "grad_norm": 2.86966872215271, "learning_rate": 1.3350000000000001e-06, "loss": 1.7926, "step": 58947 }, { "epoch": 86.63506063947078, "grad_norm": 3.014176368713379, "learning_rate": 1.3319117647058823e-06, "loss": 1.8434, "step": 58968 }, { "epoch": 86.66593164277839, "grad_norm": 3.3499221801757812, "learning_rate": 1.328823529411765e-06, "loss": 1.9092, "step": 58989 }, { "epoch": 86.696802646086, "grad_norm": 3.142524480819702, "learning_rate": 1.3257352941176471e-06, "loss": 1.9423, "step": 59010 }, { "epoch": 86.72767364939361, "grad_norm": 5.264927387237549, "learning_rate": 1.3226470588235296e-06, "loss": 1.7948, "step": 59031 }, { "epoch": 86.75854465270122, "grad_norm": 2.9360337257385254, "learning_rate": 1.3195588235294117e-06, "loss": 1.977, "step": 59052 }, { "epoch": 86.78941565600881, "grad_norm": 3.2129647731781006, "learning_rate": 1.3164705882352944e-06, "loss": 1.9453, "step": 59073 }, { "epoch": 86.82028665931642, "grad_norm": 3.07582950592041, "learning_rate": 1.3133823529411766e-06, "loss": 2.0039, "step": 59094 }, { "epoch": 86.85115766262403, "grad_norm": 3.377211332321167, "learning_rate": 1.310294117647059e-06, "loss": 1.8155, "step": 59115 }, { "epoch": 86.88202866593164, "grad_norm": 3.0476300716400146, "learning_rate": 1.3072058823529412e-06, "loss": 1.9147, "step": 59136 }, { "epoch": 86.91289966923925, "grad_norm": 3.0012621879577637, "learning_rate": 1.3041176470588238e-06, "loss": 1.9902, "step": 59157 }, { "epoch": 86.94377067254686, "grad_norm": 3.756188154220581, "learning_rate": 1.301029411764706e-06, "loss": 1.8348, "step": 59178 }, { "epoch": 86.97464167585447, "grad_norm": 2.6709325313568115, "learning_rate": 1.2979411764705882e-06, "loss": 1.8793, "step": 59199 }, { "epoch": 87.00441014332966, "grad_norm": 4.436853408813477, "learning_rate": 1.2948529411764708e-06, "loss": 1.8939, "step": 59220 }, { "epoch": 87.03528114663726, "grad_norm": 3.6392900943756104, "learning_rate": 1.291764705882353e-06, "loss": 1.885, "step": 59241 }, { "epoch": 87.06615214994487, "grad_norm": 2.955660343170166, "learning_rate": 1.2886764705882354e-06, "loss": 1.9051, "step": 59262 }, { "epoch": 87.09702315325248, "grad_norm": 4.244918346405029, "learning_rate": 1.2855882352941176e-06, "loss": 1.8528, "step": 59283 }, { "epoch": 87.12789415656009, "grad_norm": 2.917839765548706, "learning_rate": 1.2825000000000002e-06, "loss": 2.002, "step": 59304 }, { "epoch": 87.1587651598677, "grad_norm": 2.505627393722534, "learning_rate": 1.2794117647058824e-06, "loss": 1.9571, "step": 59325 }, { "epoch": 87.18963616317531, "grad_norm": 3.3427155017852783, "learning_rate": 1.2763235294117648e-06, "loss": 2.0119, "step": 59346 }, { "epoch": 87.22050716648292, "grad_norm": 4.4839959144592285, "learning_rate": 1.273235294117647e-06, "loss": 1.8778, "step": 59367 }, { "epoch": 87.25137816979051, "grad_norm": 1.5844393968582153, "learning_rate": 1.2701470588235296e-06, "loss": 2.0445, "step": 59388 }, { "epoch": 87.28224917309812, "grad_norm": 2.6529717445373535, "learning_rate": 1.2670588235294118e-06, "loss": 1.8793, "step": 59409 }, { "epoch": 87.31312017640573, "grad_norm": 2.867917060852051, "learning_rate": 1.2639705882352942e-06, "loss": 1.8533, "step": 59430 }, { "epoch": 87.34399117971334, "grad_norm": 2.572507619857788, "learning_rate": 1.2608823529411764e-06, "loss": 1.9908, "step": 59451 }, { "epoch": 87.37486218302095, "grad_norm": 3.1784677505493164, "learning_rate": 1.257794117647059e-06, "loss": 1.9245, "step": 59472 }, { "epoch": 87.40573318632856, "grad_norm": 3.4458327293395996, "learning_rate": 1.2547058823529412e-06, "loss": 1.8252, "step": 59493 }, { "epoch": 87.43660418963616, "grad_norm": 3.248615026473999, "learning_rate": 1.2516176470588237e-06, "loss": 1.7971, "step": 59514 }, { "epoch": 87.46747519294377, "grad_norm": 2.1555473804473877, "learning_rate": 1.2485294117647058e-06, "loss": 1.9436, "step": 59535 }, { "epoch": 87.49834619625138, "grad_norm": 2.4054548740386963, "learning_rate": 1.2454411764705883e-06, "loss": 1.8625, "step": 59556 }, { "epoch": 87.52921719955899, "grad_norm": 2.799109935760498, "learning_rate": 1.2423529411764707e-06, "loss": 1.8425, "step": 59577 }, { "epoch": 87.5600882028666, "grad_norm": 2.340550422668457, "learning_rate": 1.239264705882353e-06, "loss": 1.8787, "step": 59598 }, { "epoch": 87.5909592061742, "grad_norm": 4.15479850769043, "learning_rate": 1.2361764705882353e-06, "loss": 1.8539, "step": 59619 }, { "epoch": 87.62183020948181, "grad_norm": 2.531327247619629, "learning_rate": 1.2330882352941177e-06, "loss": 1.9289, "step": 59640 }, { "epoch": 87.65270121278941, "grad_norm": 3.2001707553863525, "learning_rate": 1.23e-06, "loss": 1.8317, "step": 59661 }, { "epoch": 87.68357221609702, "grad_norm": 3.42370343208313, "learning_rate": 1.2269117647058825e-06, "loss": 1.8432, "step": 59682 }, { "epoch": 87.71444321940463, "grad_norm": 3.5570826530456543, "learning_rate": 1.223823529411765e-06, "loss": 1.9791, "step": 59703 }, { "epoch": 87.74531422271224, "grad_norm": 2.308673143386841, "learning_rate": 1.220735294117647e-06, "loss": 1.8591, "step": 59724 }, { "epoch": 87.77618522601985, "grad_norm": 4.056532859802246, "learning_rate": 1.2176470588235295e-06, "loss": 1.9075, "step": 59745 }, { "epoch": 87.80705622932746, "grad_norm": 2.8096394538879395, "learning_rate": 1.214558823529412e-06, "loss": 1.8066, "step": 59766 }, { "epoch": 87.83792723263507, "grad_norm": 3.5196266174316406, "learning_rate": 1.2114705882352943e-06, "loss": 1.8267, "step": 59787 }, { "epoch": 87.86879823594266, "grad_norm": 2.0174176692962646, "learning_rate": 1.2083823529411765e-06, "loss": 1.88, "step": 59808 }, { "epoch": 87.89966923925027, "grad_norm": 2.9481005668640137, "learning_rate": 1.205294117647059e-06, "loss": 1.894, "step": 59829 }, { "epoch": 87.93054024255788, "grad_norm": 2.208841562271118, "learning_rate": 1.2022058823529413e-06, "loss": 1.9034, "step": 59850 }, { "epoch": 87.96141124586549, "grad_norm": 3.058025360107422, "learning_rate": 1.1991176470588237e-06, "loss": 1.9668, "step": 59871 }, { "epoch": 87.9922822491731, "grad_norm": 4.263309478759766, "learning_rate": 1.196029411764706e-06, "loss": 1.8648, "step": 59892 }, { "epoch": 88.0220507166483, "grad_norm": 3.096315383911133, "learning_rate": 1.1929411764705883e-06, "loss": 1.7281, "step": 59913 }, { "epoch": 88.0529217199559, "grad_norm": 3.383788824081421, "learning_rate": 1.1898529411764707e-06, "loss": 1.9838, "step": 59934 }, { "epoch": 88.0837927232635, "grad_norm": 2.8319485187530518, "learning_rate": 1.1867647058823532e-06, "loss": 1.8687, "step": 59955 }, { "epoch": 88.11466372657111, "grad_norm": 2.9392781257629395, "learning_rate": 1.1836764705882356e-06, "loss": 1.9511, "step": 59976 }, { "epoch": 88.14553472987872, "grad_norm": 4.267587661743164, "learning_rate": 1.1805882352941178e-06, "loss": 1.9033, "step": 59997 }, { "epoch": 88.17640573318633, "grad_norm": 2.651862859725952, "learning_rate": 1.1775e-06, "loss": 1.8552, "step": 60018 }, { "epoch": 88.20727673649394, "grad_norm": 3.538337469100952, "learning_rate": 1.1744117647058824e-06, "loss": 1.9893, "step": 60039 }, { "epoch": 88.23814773980155, "grad_norm": 3.3270182609558105, "learning_rate": 1.1713235294117648e-06, "loss": 1.7628, "step": 60060 }, { "epoch": 88.26901874310916, "grad_norm": 3.1489109992980957, "learning_rate": 1.1682352941176472e-06, "loss": 1.9585, "step": 60081 }, { "epoch": 88.29988974641675, "grad_norm": 2.824500799179077, "learning_rate": 1.1651470588235294e-06, "loss": 1.8887, "step": 60102 }, { "epoch": 88.33076074972436, "grad_norm": 3.7373387813568115, "learning_rate": 1.1620588235294118e-06, "loss": 1.9219, "step": 60123 }, { "epoch": 88.36163175303197, "grad_norm": 2.9277431964874268, "learning_rate": 1.1591176470588237e-06, "loss": 1.8105, "step": 60144 }, { "epoch": 88.39250275633958, "grad_norm": 2.6352338790893555, "learning_rate": 1.1560294117647059e-06, "loss": 1.816, "step": 60165 }, { "epoch": 88.42337375964719, "grad_norm": 3.3314943313598633, "learning_rate": 1.1529411764705883e-06, "loss": 1.8387, "step": 60186 }, { "epoch": 88.4542447629548, "grad_norm": 3.4140288829803467, "learning_rate": 1.1498529411764707e-06, "loss": 1.9559, "step": 60207 }, { "epoch": 88.48511576626241, "grad_norm": 2.119448184967041, "learning_rate": 1.1467647058823531e-06, "loss": 1.7724, "step": 60228 }, { "epoch": 88.51598676957, "grad_norm": 3.0751090049743652, "learning_rate": 1.1436764705882355e-06, "loss": 1.864, "step": 60249 }, { "epoch": 88.54685777287762, "grad_norm": 2.5504307746887207, "learning_rate": 1.1405882352941177e-06, "loss": 1.8817, "step": 60270 }, { "epoch": 88.57772877618523, "grad_norm": 3.7658724784851074, "learning_rate": 1.1375000000000001e-06, "loss": 1.9351, "step": 60291 }, { "epoch": 88.60859977949283, "grad_norm": 4.007877826690674, "learning_rate": 1.1344117647058825e-06, "loss": 1.9142, "step": 60312 }, { "epoch": 88.63947078280044, "grad_norm": 3.648144245147705, "learning_rate": 1.131323529411765e-06, "loss": 1.8408, "step": 60333 }, { "epoch": 88.67034178610805, "grad_norm": 3.2860212326049805, "learning_rate": 1.1282352941176471e-06, "loss": 1.9493, "step": 60354 }, { "epoch": 88.70121278941565, "grad_norm": 4.458934783935547, "learning_rate": 1.1251470588235295e-06, "loss": 1.9384, "step": 60375 }, { "epoch": 88.73208379272326, "grad_norm": 4.178431034088135, "learning_rate": 1.122058823529412e-06, "loss": 1.8962, "step": 60396 }, { "epoch": 88.76295479603087, "grad_norm": 2.6484079360961914, "learning_rate": 1.1189705882352941e-06, "loss": 1.955, "step": 60417 }, { "epoch": 88.79382579933848, "grad_norm": 3.7903006076812744, "learning_rate": 1.1158823529411765e-06, "loss": 1.9187, "step": 60438 }, { "epoch": 88.82469680264609, "grad_norm": 3.3942325115203857, "learning_rate": 1.112794117647059e-06, "loss": 1.9754, "step": 60459 }, { "epoch": 88.8555678059537, "grad_norm": 3.4611849784851074, "learning_rate": 1.1097058823529412e-06, "loss": 2.0128, "step": 60480 }, { "epoch": 88.8864388092613, "grad_norm": 3.746903896331787, "learning_rate": 1.1066176470588236e-06, "loss": 1.8649, "step": 60501 }, { "epoch": 88.9173098125689, "grad_norm": 3.660487651824951, "learning_rate": 1.103529411764706e-06, "loss": 1.9396, "step": 60522 }, { "epoch": 88.94818081587651, "grad_norm": 5.853093147277832, "learning_rate": 1.1004411764705884e-06, "loss": 1.8903, "step": 60543 }, { "epoch": 88.97905181918412, "grad_norm": 2.717294216156006, "learning_rate": 1.0973529411764706e-06, "loss": 1.8359, "step": 60564 }, { "epoch": 89.00882028665932, "grad_norm": 4.310739994049072, "learning_rate": 1.094264705882353e-06, "loss": 1.7007, "step": 60585 }, { "epoch": 89.03969128996692, "grad_norm": 3.0722875595092773, "learning_rate": 1.0911764705882354e-06, "loss": 1.8967, "step": 60606 }, { "epoch": 89.07056229327453, "grad_norm": 2.93528413772583, "learning_rate": 1.0880882352941178e-06, "loss": 1.9162, "step": 60627 }, { "epoch": 89.10143329658214, "grad_norm": 3.1630377769470215, "learning_rate": 1.085e-06, "loss": 1.9871, "step": 60648 }, { "epoch": 89.13230429988975, "grad_norm": 2.4683408737182617, "learning_rate": 1.0819117647058824e-06, "loss": 1.9919, "step": 60669 }, { "epoch": 89.16317530319735, "grad_norm": 3.1284024715423584, "learning_rate": 1.0788235294117648e-06, "loss": 1.8272, "step": 60690 }, { "epoch": 89.19404630650496, "grad_norm": 2.7137832641601562, "learning_rate": 1.0757352941176472e-06, "loss": 1.8116, "step": 60711 }, { "epoch": 89.22491730981257, "grad_norm": 2.2366697788238525, "learning_rate": 1.0726470588235296e-06, "loss": 1.9729, "step": 60732 }, { "epoch": 89.25578831312018, "grad_norm": 2.5221176147460938, "learning_rate": 1.0695588235294118e-06, "loss": 2.0077, "step": 60753 }, { "epoch": 89.28665931642779, "grad_norm": 2.667752504348755, "learning_rate": 1.0664705882352942e-06, "loss": 2.0037, "step": 60774 }, { "epoch": 89.3175303197354, "grad_norm": 4.105930805206299, "learning_rate": 1.0633823529411766e-06, "loss": 1.9321, "step": 60795 }, { "epoch": 89.34840132304299, "grad_norm": 4.049197196960449, "learning_rate": 1.060294117647059e-06, "loss": 1.8349, "step": 60816 }, { "epoch": 89.3792723263506, "grad_norm": 3.184326410293579, "learning_rate": 1.0572058823529412e-06, "loss": 1.9747, "step": 60837 }, { "epoch": 89.41014332965821, "grad_norm": 3.916524887084961, "learning_rate": 1.0541176470588236e-06, "loss": 1.9874, "step": 60858 }, { "epoch": 89.44101433296582, "grad_norm": 2.034909725189209, "learning_rate": 1.051029411764706e-06, "loss": 1.9547, "step": 60879 }, { "epoch": 89.47188533627343, "grad_norm": 2.3660428524017334, "learning_rate": 1.0479411764705885e-06, "loss": 1.8134, "step": 60900 }, { "epoch": 89.50275633958104, "grad_norm": 2.7458677291870117, "learning_rate": 1.0448529411764706e-06, "loss": 1.8521, "step": 60921 }, { "epoch": 89.53362734288865, "grad_norm": 0.7721191644668579, "learning_rate": 1.041764705882353e-06, "loss": 1.8586, "step": 60942 }, { "epoch": 89.56449834619625, "grad_norm": 2.1065902709960938, "learning_rate": 1.0386764705882353e-06, "loss": 2.0351, "step": 60963 }, { "epoch": 89.59536934950386, "grad_norm": 2.1720170974731445, "learning_rate": 1.0355882352941177e-06, "loss": 1.8632, "step": 60984 }, { "epoch": 89.62624035281146, "grad_norm": 2.277952194213867, "learning_rate": 1.0325e-06, "loss": 1.8818, "step": 61005 }, { "epoch": 89.65711135611907, "grad_norm": 3.0916495323181152, "learning_rate": 1.0294117647058825e-06, "loss": 1.839, "step": 61026 }, { "epoch": 89.68798235942668, "grad_norm": 3.0497400760650635, "learning_rate": 1.0263235294117647e-06, "loss": 2.0071, "step": 61047 }, { "epoch": 89.7188533627343, "grad_norm": 2.637296676635742, "learning_rate": 1.023235294117647e-06, "loss": 1.9049, "step": 61068 }, { "epoch": 89.7497243660419, "grad_norm": 2.3482441902160645, "learning_rate": 1.0201470588235295e-06, "loss": 1.9443, "step": 61089 }, { "epoch": 89.7805953693495, "grad_norm": 3.4600319862365723, "learning_rate": 1.0170588235294119e-06, "loss": 1.8897, "step": 61110 }, { "epoch": 89.81146637265711, "grad_norm": 2.5892529487609863, "learning_rate": 1.013970588235294e-06, "loss": 1.9059, "step": 61131 }, { "epoch": 89.84233737596472, "grad_norm": 1.688792109489441, "learning_rate": 1.0108823529411765e-06, "loss": 1.9089, "step": 61152 }, { "epoch": 89.87320837927233, "grad_norm": 3.6830244064331055, "learning_rate": 1.007794117647059e-06, "loss": 1.8882, "step": 61173 }, { "epoch": 89.90407938257994, "grad_norm": 3.374119281768799, "learning_rate": 1.0047058823529413e-06, "loss": 1.9433, "step": 61194 }, { "epoch": 89.93495038588755, "grad_norm": 2.6888864040374756, "learning_rate": 1.0016176470588235e-06, "loss": 1.8906, "step": 61215 }, { "epoch": 89.96582138919514, "grad_norm": 4.220731735229492, "learning_rate": 9.98529411764706e-07, "loss": 1.8316, "step": 61236 }, { "epoch": 89.99669239250275, "grad_norm": 1.8186290264129639, "learning_rate": 9.954411764705883e-07, "loss": 1.8915, "step": 61257 }, { "epoch": 90.02646085997795, "grad_norm": 3.0741848945617676, "learning_rate": 9.923529411764707e-07, "loss": 1.878, "step": 61278 }, { "epoch": 90.05733186328555, "grad_norm": 3.43520188331604, "learning_rate": 9.892647058823531e-07, "loss": 1.8624, "step": 61299 }, { "epoch": 90.08820286659316, "grad_norm": 4.362806797027588, "learning_rate": 9.861764705882353e-07, "loss": 1.8565, "step": 61320 }, { "epoch": 90.11907386990077, "grad_norm": 3.217050313949585, "learning_rate": 9.830882352941177e-07, "loss": 1.9051, "step": 61341 }, { "epoch": 90.14994487320838, "grad_norm": 3.552097797393799, "learning_rate": 9.800000000000001e-07, "loss": 1.9054, "step": 61362 }, { "epoch": 90.180815876516, "grad_norm": 3.2667763233184814, "learning_rate": 9.769117647058826e-07, "loss": 1.9419, "step": 61383 }, { "epoch": 90.21168687982359, "grad_norm": 2.4101593494415283, "learning_rate": 9.738235294117647e-07, "loss": 1.9958, "step": 61404 }, { "epoch": 90.2425578831312, "grad_norm": 3.4765703678131104, "learning_rate": 9.707352941176472e-07, "loss": 1.8206, "step": 61425 }, { "epoch": 90.27342888643881, "grad_norm": 4.146786689758301, "learning_rate": 9.676470588235296e-07, "loss": 1.8476, "step": 61446 }, { "epoch": 90.30429988974642, "grad_norm": 2.4638280868530273, "learning_rate": 9.64558823529412e-07, "loss": 1.9358, "step": 61467 }, { "epoch": 90.33517089305403, "grad_norm": 2.5709476470947266, "learning_rate": 9.614705882352942e-07, "loss": 1.9626, "step": 61488 }, { "epoch": 90.36604189636164, "grad_norm": 3.712175130844116, "learning_rate": 9.583823529411766e-07, "loss": 1.8778, "step": 61509 }, { "epoch": 90.39691289966925, "grad_norm": 2.9449710845947266, "learning_rate": 9.552941176470588e-07, "loss": 1.9801, "step": 61530 }, { "epoch": 90.42778390297684, "grad_norm": 2.788980722427368, "learning_rate": 9.522058823529412e-07, "loss": 1.7742, "step": 61551 }, { "epoch": 90.45865490628445, "grad_norm": 2.8876864910125732, "learning_rate": 9.491176470588236e-07, "loss": 1.9068, "step": 61572 }, { "epoch": 90.48952590959206, "grad_norm": 2.6118879318237305, "learning_rate": 9.460294117647059e-07, "loss": 1.9116, "step": 61593 }, { "epoch": 90.52039691289967, "grad_norm": 3.445700168609619, "learning_rate": 9.429411764705883e-07, "loss": 1.8696, "step": 61614 }, { "epoch": 90.55126791620728, "grad_norm": 4.046363830566406, "learning_rate": 9.398529411764706e-07, "loss": 1.9238, "step": 61635 }, { "epoch": 90.58213891951489, "grad_norm": 2.464397430419922, "learning_rate": 9.36764705882353e-07, "loss": 1.8541, "step": 61656 }, { "epoch": 90.61300992282249, "grad_norm": 2.7527050971984863, "learning_rate": 9.336764705882353e-07, "loss": 2.1067, "step": 61677 }, { "epoch": 90.6438809261301, "grad_norm": 3.1945807933807373, "learning_rate": 9.305882352941177e-07, "loss": 1.8539, "step": 61698 }, { "epoch": 90.6747519294377, "grad_norm": 2.733415126800537, "learning_rate": 9.275000000000001e-07, "loss": 1.7974, "step": 61719 }, { "epoch": 90.70562293274531, "grad_norm": 1.9787558317184448, "learning_rate": 9.244117647058824e-07, "loss": 1.8947, "step": 61740 }, { "epoch": 90.73649393605292, "grad_norm": 2.8039846420288086, "learning_rate": 9.213235294117648e-07, "loss": 1.8666, "step": 61761 }, { "epoch": 90.76736493936053, "grad_norm": 1.8978792428970337, "learning_rate": 9.182352941176471e-07, "loss": 1.8399, "step": 61782 }, { "epoch": 90.79823594266814, "grad_norm": 3.542335271835327, "learning_rate": 9.151470588235295e-07, "loss": 1.8063, "step": 61803 }, { "epoch": 90.82910694597574, "grad_norm": 4.502211093902588, "learning_rate": 9.120588235294118e-07, "loss": 1.9444, "step": 61824 }, { "epoch": 90.85997794928335, "grad_norm": 3.3800113201141357, "learning_rate": 9.089705882352942e-07, "loss": 1.9673, "step": 61845 }, { "epoch": 90.89084895259096, "grad_norm": 3.33154559135437, "learning_rate": 9.058823529411765e-07, "loss": 1.8628, "step": 61866 }, { "epoch": 90.92171995589857, "grad_norm": 2.8907077312469482, "learning_rate": 9.02794117647059e-07, "loss": 1.8875, "step": 61887 }, { "epoch": 90.95259095920618, "grad_norm": 3.3395302295684814, "learning_rate": 8.997058823529413e-07, "loss": 2.0341, "step": 61908 }, { "epoch": 90.98346196251379, "grad_norm": 2.1065518856048584, "learning_rate": 8.966176470588237e-07, "loss": 1.8381, "step": 61929 }, { "epoch": 91.01323042998898, "grad_norm": 2.965150833129883, "learning_rate": 8.93529411764706e-07, "loss": 1.8495, "step": 61950 }, { "epoch": 91.04410143329659, "grad_norm": 3.7445461750030518, "learning_rate": 8.904411764705884e-07, "loss": 1.9338, "step": 61971 }, { "epoch": 91.07497243660418, "grad_norm": 2.4645745754241943, "learning_rate": 8.873529411764707e-07, "loss": 1.988, "step": 61992 }, { "epoch": 91.1058434399118, "grad_norm": 2.425020933151245, "learning_rate": 8.842647058823531e-07, "loss": 1.9032, "step": 62013 }, { "epoch": 91.1367144432194, "grad_norm": 3.099653482437134, "learning_rate": 8.811764705882353e-07, "loss": 2.0365, "step": 62034 }, { "epoch": 91.16758544652701, "grad_norm": 3.295295000076294, "learning_rate": 8.780882352941177e-07, "loss": 1.8835, "step": 62055 }, { "epoch": 91.19845644983462, "grad_norm": 2.3375332355499268, "learning_rate": 8.75e-07, "loss": 1.9469, "step": 62076 }, { "epoch": 91.22932745314223, "grad_norm": 3.078786611557007, "learning_rate": 8.719117647058824e-07, "loss": 1.7642, "step": 62097 }, { "epoch": 91.26019845644983, "grad_norm": 5.8669233322143555, "learning_rate": 8.688235294117647e-07, "loss": 1.9997, "step": 62118 }, { "epoch": 91.29106945975744, "grad_norm": 2.5775084495544434, "learning_rate": 8.657352941176471e-07, "loss": 1.8626, "step": 62139 }, { "epoch": 91.32194046306505, "grad_norm": 2.7125518321990967, "learning_rate": 8.626470588235294e-07, "loss": 1.7731, "step": 62160 }, { "epoch": 91.35281146637266, "grad_norm": 4.942500591278076, "learning_rate": 8.595588235294118e-07, "loss": 1.9256, "step": 62181 }, { "epoch": 91.38368246968027, "grad_norm": 3.3035075664520264, "learning_rate": 8.564705882352941e-07, "loss": 1.7723, "step": 62202 }, { "epoch": 91.41455347298788, "grad_norm": 2.333933115005493, "learning_rate": 8.533823529411765e-07, "loss": 1.8754, "step": 62223 }, { "epoch": 91.44542447629549, "grad_norm": 2.0019125938415527, "learning_rate": 8.502941176470589e-07, "loss": 1.9955, "step": 62244 }, { "epoch": 91.47629547960308, "grad_norm": 4.517475128173828, "learning_rate": 8.472058823529412e-07, "loss": 1.9052, "step": 62265 }, { "epoch": 91.50716648291069, "grad_norm": 3.5921847820281982, "learning_rate": 8.441176470588236e-07, "loss": 1.8548, "step": 62286 }, { "epoch": 91.5380374862183, "grad_norm": 2.8001370429992676, "learning_rate": 8.410294117647059e-07, "loss": 1.8622, "step": 62307 }, { "epoch": 91.56890848952591, "grad_norm": 2.9733877182006836, "learning_rate": 8.379411764705883e-07, "loss": 1.881, "step": 62328 }, { "epoch": 91.59977949283352, "grad_norm": 3.5342729091644287, "learning_rate": 8.348529411764706e-07, "loss": 1.9623, "step": 62349 }, { "epoch": 91.63065049614113, "grad_norm": 3.232391834259033, "learning_rate": 8.317647058823531e-07, "loss": 1.9425, "step": 62370 }, { "epoch": 91.66152149944874, "grad_norm": 3.4113640785217285, "learning_rate": 8.286764705882354e-07, "loss": 1.7955, "step": 62391 }, { "epoch": 91.69239250275633, "grad_norm": 2.7204277515411377, "learning_rate": 8.255882352941178e-07, "loss": 1.9279, "step": 62412 }, { "epoch": 91.72326350606394, "grad_norm": 2.3471639156341553, "learning_rate": 8.225000000000001e-07, "loss": 1.7866, "step": 62433 }, { "epoch": 91.75413450937155, "grad_norm": 3.409602403640747, "learning_rate": 8.194117647058825e-07, "loss": 1.9658, "step": 62454 }, { "epoch": 91.78500551267916, "grad_norm": 2.2171287536621094, "learning_rate": 8.163235294117648e-07, "loss": 2.0439, "step": 62475 }, { "epoch": 91.81587651598677, "grad_norm": 3.188605546951294, "learning_rate": 8.132352941176472e-07, "loss": 1.8307, "step": 62496 }, { "epoch": 91.84674751929438, "grad_norm": 2.2356672286987305, "learning_rate": 8.101470588235295e-07, "loss": 1.8925, "step": 62517 }, { "epoch": 91.87761852260198, "grad_norm": 2.746654987335205, "learning_rate": 8.070588235294119e-07, "loss": 1.8698, "step": 62538 }, { "epoch": 91.90848952590959, "grad_norm": 3.710447072982788, "learning_rate": 8.039705882352943e-07, "loss": 1.9152, "step": 62559 }, { "epoch": 91.9393605292172, "grad_norm": 3.453009843826294, "learning_rate": 8.008823529411765e-07, "loss": 1.8884, "step": 62580 }, { "epoch": 91.9702315325248, "grad_norm": 3.417304515838623, "learning_rate": 7.977941176470588e-07, "loss": 1.8133, "step": 62601 }, { "epoch": 92.0, "grad_norm": 1.371234655380249, "learning_rate": 7.947058823529412e-07, "loss": 1.8635, "step": 62622 }, { "epoch": 92.03087100330761, "grad_norm": 4.22697639465332, "learning_rate": 7.916176470588235e-07, "loss": 1.8214, "step": 62643 }, { "epoch": 92.06174200661522, "grad_norm": 3.756859540939331, "learning_rate": 7.885294117647059e-07, "loss": 1.8834, "step": 62664 }, { "epoch": 92.09261300992283, "grad_norm": 3.4455883502960205, "learning_rate": 7.854411764705882e-07, "loss": 1.9208, "step": 62685 }, { "epoch": 92.12348401323042, "grad_norm": 3.3572161197662354, "learning_rate": 7.823529411764706e-07, "loss": 1.8894, "step": 62706 }, { "epoch": 92.15435501653803, "grad_norm": 2.2369420528411865, "learning_rate": 7.79264705882353e-07, "loss": 1.8135, "step": 62727 }, { "epoch": 92.18522601984564, "grad_norm": 2.972598075866699, "learning_rate": 7.761764705882353e-07, "loss": 1.8805, "step": 62748 }, { "epoch": 92.21609702315325, "grad_norm": 3.37801194190979, "learning_rate": 7.730882352941177e-07, "loss": 1.9557, "step": 62769 }, { "epoch": 92.24696802646086, "grad_norm": 3.4856269359588623, "learning_rate": 7.7e-07, "loss": 1.9483, "step": 62790 }, { "epoch": 92.27783902976847, "grad_norm": 3.5852904319763184, "learning_rate": 7.669117647058824e-07, "loss": 2.0368, "step": 62811 }, { "epoch": 92.30871003307608, "grad_norm": 3.480229139328003, "learning_rate": 7.638235294117647e-07, "loss": 2.0156, "step": 62832 }, { "epoch": 92.33958103638368, "grad_norm": 2.5389676094055176, "learning_rate": 7.607352941176472e-07, "loss": 1.9678, "step": 62853 }, { "epoch": 92.37045203969129, "grad_norm": 2.4573192596435547, "learning_rate": 7.576470588235295e-07, "loss": 1.8305, "step": 62874 }, { "epoch": 92.4013230429989, "grad_norm": 2.773287773132324, "learning_rate": 7.545588235294119e-07, "loss": 1.8226, "step": 62895 }, { "epoch": 92.4321940463065, "grad_norm": 2.5444586277008057, "learning_rate": 7.514705882352942e-07, "loss": 1.8064, "step": 62916 }, { "epoch": 92.46306504961412, "grad_norm": 3.6736056804656982, "learning_rate": 7.483823529411766e-07, "loss": 1.8094, "step": 62937 }, { "epoch": 92.49393605292173, "grad_norm": 3.994213819503784, "learning_rate": 7.452941176470589e-07, "loss": 1.846, "step": 62958 }, { "epoch": 92.52480705622932, "grad_norm": 4.557860851287842, "learning_rate": 7.422058823529413e-07, "loss": 1.8729, "step": 62979 }, { "epoch": 92.55567805953693, "grad_norm": 2.33266544342041, "learning_rate": 7.391176470588236e-07, "loss": 1.9356, "step": 63000 }, { "epoch": 92.58654906284454, "grad_norm": 2.029332160949707, "learning_rate": 7.36029411764706e-07, "loss": 2.0113, "step": 63021 }, { "epoch": 92.61742006615215, "grad_norm": 3.7209012508392334, "learning_rate": 7.329411764705884e-07, "loss": 1.8427, "step": 63042 }, { "epoch": 92.64829106945976, "grad_norm": 3.4325408935546875, "learning_rate": 7.298529411764707e-07, "loss": 1.866, "step": 63063 }, { "epoch": 92.67916207276737, "grad_norm": 3.297041654586792, "learning_rate": 7.267647058823531e-07, "loss": 1.8871, "step": 63084 }, { "epoch": 92.71003307607498, "grad_norm": 1.443469762802124, "learning_rate": 7.236764705882353e-07, "loss": 1.8765, "step": 63105 }, { "epoch": 92.74090407938257, "grad_norm": 2.6600944995880127, "learning_rate": 7.205882352941176e-07, "loss": 1.8953, "step": 63126 }, { "epoch": 92.77177508269018, "grad_norm": 3.968657970428467, "learning_rate": 7.175e-07, "loss": 1.8694, "step": 63147 }, { "epoch": 92.8026460859978, "grad_norm": 3.3785226345062256, "learning_rate": 7.144117647058823e-07, "loss": 1.7716, "step": 63168 }, { "epoch": 92.8335170893054, "grad_norm": 2.286892890930176, "learning_rate": 7.113235294117647e-07, "loss": 1.8495, "step": 63189 }, { "epoch": 92.86438809261301, "grad_norm": 3.019134759902954, "learning_rate": 7.082352941176471e-07, "loss": 1.944, "step": 63210 }, { "epoch": 92.89525909592062, "grad_norm": 2.8266942501068115, "learning_rate": 7.051470588235294e-07, "loss": 1.958, "step": 63231 }, { "epoch": 92.92613009922823, "grad_norm": 2.332057476043701, "learning_rate": 7.020588235294118e-07, "loss": 1.9432, "step": 63252 }, { "epoch": 92.95700110253583, "grad_norm": 4.516944885253906, "learning_rate": 6.989705882352941e-07, "loss": 1.9615, "step": 63273 }, { "epoch": 92.98787210584344, "grad_norm": 3.0649008750915527, "learning_rate": 6.958823529411765e-07, "loss": 1.9286, "step": 63294 }, { "epoch": 93.01764057331863, "grad_norm": 2.2895352840423584, "learning_rate": 6.927941176470588e-07, "loss": 1.8291, "step": 63315 }, { "epoch": 93.04851157662624, "grad_norm": 4.072527885437012, "learning_rate": 6.897058823529413e-07, "loss": 1.9621, "step": 63336 }, { "epoch": 93.07938257993385, "grad_norm": 2.6200454235076904, "learning_rate": 6.866176470588236e-07, "loss": 1.9301, "step": 63357 }, { "epoch": 93.11025358324146, "grad_norm": 2.490124464035034, "learning_rate": 6.83529411764706e-07, "loss": 1.8936, "step": 63378 }, { "epoch": 93.14112458654907, "grad_norm": 2.105252742767334, "learning_rate": 6.805882352941178e-07, "loss": 1.7668, "step": 63399 }, { "epoch": 93.17199558985666, "grad_norm": 2.6568450927734375, "learning_rate": 6.775000000000001e-07, "loss": 1.8564, "step": 63420 }, { "epoch": 93.20286659316427, "grad_norm": 2.588062047958374, "learning_rate": 6.744117647058825e-07, "loss": 1.9208, "step": 63441 }, { "epoch": 93.23373759647188, "grad_norm": 2.882103204727173, "learning_rate": 6.713235294117648e-07, "loss": 2.0067, "step": 63462 }, { "epoch": 93.26460859977949, "grad_norm": 1.9071693420410156, "learning_rate": 6.682352941176472e-07, "loss": 1.918, "step": 63483 }, { "epoch": 93.2954796030871, "grad_norm": 3.725376844406128, "learning_rate": 6.651470588235295e-07, "loss": 1.8197, "step": 63504 }, { "epoch": 93.32635060639471, "grad_norm": 2.641511917114258, "learning_rate": 6.620588235294118e-07, "loss": 1.947, "step": 63525 }, { "epoch": 93.35722160970232, "grad_norm": 3.8231630325317383, "learning_rate": 6.589705882352941e-07, "loss": 1.9409, "step": 63546 }, { "epoch": 93.38809261300992, "grad_norm": 2.076556921005249, "learning_rate": 6.558823529411765e-07, "loss": 1.8554, "step": 63567 }, { "epoch": 93.41896361631753, "grad_norm": 3.371852397918701, "learning_rate": 6.527941176470588e-07, "loss": 1.8691, "step": 63588 }, { "epoch": 93.44983461962514, "grad_norm": 2.8261594772338867, "learning_rate": 6.497058823529412e-07, "loss": 1.939, "step": 63609 }, { "epoch": 93.48070562293275, "grad_norm": 2.602322578430176, "learning_rate": 6.466176470588235e-07, "loss": 1.9005, "step": 63630 }, { "epoch": 93.51157662624036, "grad_norm": 2.361100196838379, "learning_rate": 6.435294117647059e-07, "loss": 1.9, "step": 63651 }, { "epoch": 93.54244762954796, "grad_norm": 3.249903678894043, "learning_rate": 6.404411764705882e-07, "loss": 1.8239, "step": 63672 }, { "epoch": 93.57331863285557, "grad_norm": 3.4280481338500977, "learning_rate": 6.373529411764706e-07, "loss": 1.8802, "step": 63693 }, { "epoch": 93.60418963616317, "grad_norm": 1.832945466041565, "learning_rate": 6.342647058823529e-07, "loss": 1.7807, "step": 63714 }, { "epoch": 93.63506063947078, "grad_norm": 1.6270742416381836, "learning_rate": 6.311764705882353e-07, "loss": 1.9183, "step": 63735 }, { "epoch": 93.66593164277839, "grad_norm": 3.374030590057373, "learning_rate": 6.280882352941176e-07, "loss": 1.9235, "step": 63756 }, { "epoch": 93.696802646086, "grad_norm": 2.4579567909240723, "learning_rate": 6.25e-07, "loss": 1.8249, "step": 63777 }, { "epoch": 93.72767364939361, "grad_norm": 2.797595739364624, "learning_rate": 6.219117647058825e-07, "loss": 1.9212, "step": 63798 }, { "epoch": 93.75854465270122, "grad_norm": 2.254387140274048, "learning_rate": 6.188235294117648e-07, "loss": 1.8388, "step": 63819 }, { "epoch": 93.78941565600881, "grad_norm": 2.4352400302886963, "learning_rate": 6.157352941176472e-07, "loss": 1.9481, "step": 63840 }, { "epoch": 93.82028665931642, "grad_norm": 1.9348390102386475, "learning_rate": 6.126470588235295e-07, "loss": 1.9661, "step": 63861 }, { "epoch": 93.85115766262403, "grad_norm": 4.020388126373291, "learning_rate": 6.095588235294119e-07, "loss": 1.938, "step": 63882 }, { "epoch": 93.88202866593164, "grad_norm": 2.1344141960144043, "learning_rate": 6.064705882352942e-07, "loss": 1.9065, "step": 63903 }, { "epoch": 93.91289966923925, "grad_norm": 1.9620568752288818, "learning_rate": 6.033823529411765e-07, "loss": 1.9012, "step": 63924 }, { "epoch": 93.94377067254686, "grad_norm": 2.2524168491363525, "learning_rate": 6.002941176470589e-07, "loss": 1.8984, "step": 63945 }, { "epoch": 93.97464167585447, "grad_norm": 2.376659870147705, "learning_rate": 5.972058823529412e-07, "loss": 1.8639, "step": 63966 }, { "epoch": 94.00441014332966, "grad_norm": 2.431605100631714, "learning_rate": 5.941176470588236e-07, "loss": 1.8134, "step": 63987 }, { "epoch": 94.03528114663726, "grad_norm": 2.89320707321167, "learning_rate": 5.910294117647059e-07, "loss": 1.8376, "step": 64008 }, { "epoch": 94.06615214994487, "grad_norm": 3.2655587196350098, "learning_rate": 5.879411764705883e-07, "loss": 1.8752, "step": 64029 }, { "epoch": 94.09702315325248, "grad_norm": 3.482224225997925, "learning_rate": 5.848529411764706e-07, "loss": 1.9564, "step": 64050 }, { "epoch": 94.12789415656009, "grad_norm": 3.240647792816162, "learning_rate": 5.81764705882353e-07, "loss": 1.8805, "step": 64071 }, { "epoch": 94.1587651598677, "grad_norm": 4.104581832885742, "learning_rate": 5.786764705882353e-07, "loss": 1.8743, "step": 64092 }, { "epoch": 94.18963616317531, "grad_norm": 2.6755881309509277, "learning_rate": 5.755882352941177e-07, "loss": 1.9283, "step": 64113 }, { "epoch": 94.22050716648292, "grad_norm": 3.6967930793762207, "learning_rate": 5.725e-07, "loss": 1.9689, "step": 64134 }, { "epoch": 94.25137816979051, "grad_norm": 2.839770793914795, "learning_rate": 5.694117647058824e-07, "loss": 1.8722, "step": 64155 }, { "epoch": 94.28224917309812, "grad_norm": 3.929335117340088, "learning_rate": 5.663235294117647e-07, "loss": 1.9477, "step": 64176 }, { "epoch": 94.31312017640573, "grad_norm": 2.9592323303222656, "learning_rate": 5.63235294117647e-07, "loss": 1.9406, "step": 64197 }, { "epoch": 94.34399117971334, "grad_norm": 3.8742637634277344, "learning_rate": 5.601470588235294e-07, "loss": 1.8052, "step": 64218 }, { "epoch": 94.37486218302095, "grad_norm": 2.5880110263824463, "learning_rate": 5.570588235294117e-07, "loss": 1.8463, "step": 64239 }, { "epoch": 94.40573318632856, "grad_norm": 3.108273506164551, "learning_rate": 5.539705882352942e-07, "loss": 1.881, "step": 64260 }, { "epoch": 94.43660418963616, "grad_norm": 4.440380573272705, "learning_rate": 5.508823529411766e-07, "loss": 1.8392, "step": 64281 }, { "epoch": 94.46747519294377, "grad_norm": 2.682086229324341, "learning_rate": 5.477941176470589e-07, "loss": 1.8056, "step": 64302 }, { "epoch": 94.49834619625138, "grad_norm": 2.107405662536621, "learning_rate": 5.447058823529413e-07, "loss": 1.8922, "step": 64323 }, { "epoch": 94.52921719955899, "grad_norm": 2.959698438644409, "learning_rate": 5.416176470588236e-07, "loss": 1.8737, "step": 64344 }, { "epoch": 94.5600882028666, "grad_norm": 3.5151190757751465, "learning_rate": 5.38529411764706e-07, "loss": 1.8634, "step": 64365 }, { "epoch": 94.5909592061742, "grad_norm": 2.043682336807251, "learning_rate": 5.354411764705883e-07, "loss": 1.9261, "step": 64386 }, { "epoch": 94.62183020948181, "grad_norm": 2.3529465198516846, "learning_rate": 5.323529411764707e-07, "loss": 1.9291, "step": 64407 }, { "epoch": 94.65270121278941, "grad_norm": 4.691298484802246, "learning_rate": 5.29264705882353e-07, "loss": 1.9045, "step": 64428 }, { "epoch": 94.68357221609702, "grad_norm": 2.80434513092041, "learning_rate": 5.261764705882354e-07, "loss": 1.953, "step": 64449 }, { "epoch": 94.71444321940463, "grad_norm": 1.550885558128357, "learning_rate": 5.230882352941177e-07, "loss": 1.8899, "step": 64470 }, { "epoch": 94.74531422271224, "grad_norm": 3.4159553050994873, "learning_rate": 5.2e-07, "loss": 1.8676, "step": 64491 }, { "epoch": 94.77618522601985, "grad_norm": 2.9911859035491943, "learning_rate": 5.169117647058824e-07, "loss": 1.7682, "step": 64512 }, { "epoch": 94.80705622932746, "grad_norm": 3.2267088890075684, "learning_rate": 5.138235294117647e-07, "loss": 1.9535, "step": 64533 }, { "epoch": 94.83792723263507, "grad_norm": 2.6751444339752197, "learning_rate": 5.107352941176471e-07, "loss": 1.852, "step": 64554 }, { "epoch": 94.86879823594266, "grad_norm": 3.393630027770996, "learning_rate": 5.076470588235294e-07, "loss": 1.9178, "step": 64575 }, { "epoch": 94.89966923925027, "grad_norm": 3.4891741275787354, "learning_rate": 5.045588235294118e-07, "loss": 1.9418, "step": 64596 }, { "epoch": 94.93054024255788, "grad_norm": 3.1458282470703125, "learning_rate": 5.014705882352941e-07, "loss": 1.8907, "step": 64617 }, { "epoch": 94.96141124586549, "grad_norm": 1.7886720895767212, "learning_rate": 4.983823529411765e-07, "loss": 1.882, "step": 64638 }, { "epoch": 94.9922822491731, "grad_norm": 1.5296646356582642, "learning_rate": 4.952941176470588e-07, "loss": 1.7593, "step": 64659 }, { "epoch": 95.0220507166483, "grad_norm": 2.502643346786499, "learning_rate": 4.922058823529412e-07, "loss": 1.7942, "step": 64680 }, { "epoch": 95.0529217199559, "grad_norm": 3.305527687072754, "learning_rate": 4.891176470588236e-07, "loss": 1.7936, "step": 64701 }, { "epoch": 95.0837927232635, "grad_norm": 3.817786455154419, "learning_rate": 4.860294117647058e-07, "loss": 1.8839, "step": 64722 }, { "epoch": 95.11466372657111, "grad_norm": 4.2019877433776855, "learning_rate": 4.829411764705883e-07, "loss": 1.9809, "step": 64743 }, { "epoch": 95.14553472987872, "grad_norm": 3.1239802837371826, "learning_rate": 4.798529411764706e-07, "loss": 1.8418, "step": 64764 }, { "epoch": 95.17640573318633, "grad_norm": 2.9181668758392334, "learning_rate": 4.7676470588235296e-07, "loss": 1.8062, "step": 64785 }, { "epoch": 95.20727673649394, "grad_norm": 2.3730788230895996, "learning_rate": 4.736764705882353e-07, "loss": 2.0128, "step": 64806 }, { "epoch": 95.23814773980155, "grad_norm": 3.773725986480713, "learning_rate": 4.7058823529411767e-07, "loss": 1.9088, "step": 64827 }, { "epoch": 95.26901874310916, "grad_norm": 2.862506151199341, "learning_rate": 4.675e-07, "loss": 1.9181, "step": 64848 }, { "epoch": 95.29988974641675, "grad_norm": 4.273544788360596, "learning_rate": 4.644117647058824e-07, "loss": 1.9061, "step": 64869 }, { "epoch": 95.33076074972436, "grad_norm": 4.650204181671143, "learning_rate": 4.6132352941176473e-07, "loss": 1.8185, "step": 64890 }, { "epoch": 95.36163175303197, "grad_norm": 4.004060745239258, "learning_rate": 4.582352941176471e-07, "loss": 1.8265, "step": 64911 }, { "epoch": 95.39250275633958, "grad_norm": 4.521578311920166, "learning_rate": 4.551470588235295e-07, "loss": 1.9327, "step": 64932 }, { "epoch": 95.42337375964719, "grad_norm": 3.0491127967834473, "learning_rate": 4.5205882352941185e-07, "loss": 1.8248, "step": 64953 }, { "epoch": 95.4542447629548, "grad_norm": 4.341372013092041, "learning_rate": 4.489705882352942e-07, "loss": 1.8424, "step": 64974 }, { "epoch": 95.48511576626241, "grad_norm": 3.2945477962493896, "learning_rate": 4.4588235294117645e-07, "loss": 1.9484, "step": 64995 }, { "epoch": 95.51598676957, "grad_norm": 3.2933237552642822, "learning_rate": 4.427941176470588e-07, "loss": 1.8618, "step": 65016 }, { "epoch": 95.54685777287762, "grad_norm": 2.2352800369262695, "learning_rate": 4.397058823529412e-07, "loss": 1.9062, "step": 65037 }, { "epoch": 95.57772877618523, "grad_norm": 2.9700090885162354, "learning_rate": 4.3661764705882357e-07, "loss": 1.8407, "step": 65058 }, { "epoch": 95.60859977949283, "grad_norm": 1.982535719871521, "learning_rate": 4.335294117647059e-07, "loss": 1.8608, "step": 65079 }, { "epoch": 95.63947078280044, "grad_norm": 3.3480589389801025, "learning_rate": 4.304411764705883e-07, "loss": 2.0239, "step": 65100 }, { "epoch": 95.67034178610805, "grad_norm": 3.506640672683716, "learning_rate": 4.2735294117647063e-07, "loss": 1.901, "step": 65121 }, { "epoch": 95.70121278941565, "grad_norm": 3.4647648334503174, "learning_rate": 4.24264705882353e-07, "loss": 1.9044, "step": 65142 }, { "epoch": 95.73208379272326, "grad_norm": 2.884315013885498, "learning_rate": 4.2117647058823534e-07, "loss": 1.8687, "step": 65163 }, { "epoch": 95.76295479603087, "grad_norm": 2.795482635498047, "learning_rate": 4.180882352941177e-07, "loss": 1.8443, "step": 65184 }, { "epoch": 95.79382579933848, "grad_norm": 1.355159878730774, "learning_rate": 4.1500000000000005e-07, "loss": 1.9712, "step": 65205 }, { "epoch": 95.82469680264609, "grad_norm": 2.7582497596740723, "learning_rate": 4.119117647058824e-07, "loss": 1.9544, "step": 65226 }, { "epoch": 95.8555678059537, "grad_norm": 4.07783842086792, "learning_rate": 4.0882352941176476e-07, "loss": 2.0533, "step": 65247 }, { "epoch": 95.8864388092613, "grad_norm": 3.085580587387085, "learning_rate": 4.0573529411764706e-07, "loss": 1.7924, "step": 65268 }, { "epoch": 95.9173098125689, "grad_norm": 2.7694640159606934, "learning_rate": 4.026470588235294e-07, "loss": 1.9036, "step": 65289 }, { "epoch": 95.94818081587651, "grad_norm": 3.760343074798584, "learning_rate": 3.9955882352941177e-07, "loss": 1.8693, "step": 65310 }, { "epoch": 95.97905181918412, "grad_norm": 3.2556777000427246, "learning_rate": 3.964705882352941e-07, "loss": 1.983, "step": 65331 }, { "epoch": 96.00882028665932, "grad_norm": 3.0481033325195312, "learning_rate": 3.933823529411765e-07, "loss": 1.8261, "step": 65352 }, { "epoch": 96.03969128996692, "grad_norm": 2.7367348670959473, "learning_rate": 3.9029411764705883e-07, "loss": 1.9396, "step": 65373 }, { "epoch": 96.07056229327453, "grad_norm": 3.4011168479919434, "learning_rate": 3.872058823529412e-07, "loss": 1.9107, "step": 65394 }, { "epoch": 96.10143329658214, "grad_norm": 2.226846694946289, "learning_rate": 3.8411764705882354e-07, "loss": 1.8861, "step": 65415 }, { "epoch": 96.13230429988975, "grad_norm": 3.729644536972046, "learning_rate": 3.8102941176470595e-07, "loss": 1.8834, "step": 65436 }, { "epoch": 96.16317530319735, "grad_norm": 2.641728401184082, "learning_rate": 3.779411764705883e-07, "loss": 1.9037, "step": 65457 }, { "epoch": 96.19404630650496, "grad_norm": 3.2323360443115234, "learning_rate": 3.7485294117647066e-07, "loss": 1.9087, "step": 65478 }, { "epoch": 96.22491730981257, "grad_norm": 3.271833658218384, "learning_rate": 3.71764705882353e-07, "loss": 1.9324, "step": 65499 }, { "epoch": 96.25578831312018, "grad_norm": 3.2168593406677246, "learning_rate": 3.6867647058823537e-07, "loss": 1.8618, "step": 65520 }, { "epoch": 96.28665931642779, "grad_norm": 2.2375099658966064, "learning_rate": 3.6558823529411767e-07, "loss": 1.7591, "step": 65541 }, { "epoch": 96.3175303197354, "grad_norm": 3.232529640197754, "learning_rate": 3.625e-07, "loss": 1.9363, "step": 65562 }, { "epoch": 96.34840132304299, "grad_norm": 2.4982259273529053, "learning_rate": 3.594117647058824e-07, "loss": 1.9034, "step": 65583 }, { "epoch": 96.3792723263506, "grad_norm": 2.80391001701355, "learning_rate": 3.5632352941176473e-07, "loss": 1.8563, "step": 65604 }, { "epoch": 96.41014332965821, "grad_norm": 2.438184976577759, "learning_rate": 3.532352941176471e-07, "loss": 1.9254, "step": 65625 }, { "epoch": 96.44101433296582, "grad_norm": 2.499802827835083, "learning_rate": 3.5014705882352944e-07, "loss": 2.0085, "step": 65646 }, { "epoch": 96.47188533627343, "grad_norm": 3.8202314376831055, "learning_rate": 3.470588235294118e-07, "loss": 1.9391, "step": 65667 }, { "epoch": 96.50275633958104, "grad_norm": 2.7475433349609375, "learning_rate": 3.4397058823529415e-07, "loss": 2.0113, "step": 65688 }, { "epoch": 96.53362734288865, "grad_norm": 2.2757246494293213, "learning_rate": 3.408823529411765e-07, "loss": 1.8425, "step": 65709 }, { "epoch": 96.56449834619625, "grad_norm": 2.702732801437378, "learning_rate": 3.3779411764705886e-07, "loss": 1.8867, "step": 65730 }, { "epoch": 96.59536934950386, "grad_norm": 3.008012533187866, "learning_rate": 3.347058823529412e-07, "loss": 1.9147, "step": 65751 }, { "epoch": 96.62624035281146, "grad_norm": 2.369469165802002, "learning_rate": 3.3161764705882357e-07, "loss": 1.8489, "step": 65772 }, { "epoch": 96.65711135611907, "grad_norm": 3.7389333248138428, "learning_rate": 3.285294117647059e-07, "loss": 1.8416, "step": 65793 }, { "epoch": 96.68798235942668, "grad_norm": 2.7630550861358643, "learning_rate": 3.254411764705882e-07, "loss": 1.857, "step": 65814 }, { "epoch": 96.7188533627343, "grad_norm": 3.7400219440460205, "learning_rate": 3.223529411764706e-07, "loss": 1.8846, "step": 65835 }, { "epoch": 96.7497243660419, "grad_norm": 3.038851261138916, "learning_rate": 3.1926470588235293e-07, "loss": 1.9197, "step": 65856 }, { "epoch": 96.7805953693495, "grad_norm": 2.3390955924987793, "learning_rate": 3.161764705882353e-07, "loss": 1.7595, "step": 65877 }, { "epoch": 96.81146637265711, "grad_norm": 2.736144542694092, "learning_rate": 3.1308823529411764e-07, "loss": 1.9608, "step": 65898 }, { "epoch": 96.84233737596472, "grad_norm": 3.9220027923583984, "learning_rate": 3.1000000000000005e-07, "loss": 1.8359, "step": 65919 }, { "epoch": 96.87320837927233, "grad_norm": 4.188500881195068, "learning_rate": 3.069117647058824e-07, "loss": 2.0187, "step": 65940 }, { "epoch": 96.90407938257994, "grad_norm": 3.9322383403778076, "learning_rate": 3.0382352941176476e-07, "loss": 1.9801, "step": 65961 }, { "epoch": 96.93495038588755, "grad_norm": 2.4434332847595215, "learning_rate": 3.007352941176471e-07, "loss": 1.8078, "step": 65982 }, { "epoch": 96.96582138919514, "grad_norm": 2.7368276119232178, "learning_rate": 2.976470588235294e-07, "loss": 1.9531, "step": 66003 }, { "epoch": 96.99669239250275, "grad_norm": 2.6663424968719482, "learning_rate": 2.9455882352941177e-07, "loss": 1.9189, "step": 66024 }, { "epoch": 97.02646085997795, "grad_norm": 1.8697237968444824, "learning_rate": 2.914705882352941e-07, "loss": 1.8938, "step": 66045 }, { "epoch": 97.05733186328555, "grad_norm": 3.9901468753814697, "learning_rate": 2.883823529411765e-07, "loss": 1.8912, "step": 66066 }, { "epoch": 97.08820286659316, "grad_norm": 4.484730243682861, "learning_rate": 2.8529411764705883e-07, "loss": 1.8873, "step": 66087 }, { "epoch": 97.11907386990077, "grad_norm": 2.0661067962646484, "learning_rate": 2.822058823529412e-07, "loss": 1.8289, "step": 66108 }, { "epoch": 97.14994487320838, "grad_norm": 3.4118988513946533, "learning_rate": 2.791176470588236e-07, "loss": 1.9745, "step": 66129 }, { "epoch": 97.180815876516, "grad_norm": 1.826501488685608, "learning_rate": 2.760294117647059e-07, "loss": 1.899, "step": 66150 }, { "epoch": 97.21168687982359, "grad_norm": 1.928667664527893, "learning_rate": 2.7294117647058825e-07, "loss": 2.0111, "step": 66171 }, { "epoch": 97.2425578831312, "grad_norm": 3.6506292819976807, "learning_rate": 2.698529411764706e-07, "loss": 1.8744, "step": 66192 }, { "epoch": 97.27342888643881, "grad_norm": 1.2569831609725952, "learning_rate": 2.6676470588235296e-07, "loss": 1.8045, "step": 66213 }, { "epoch": 97.30429988974642, "grad_norm": 3.1078603267669678, "learning_rate": 2.636764705882353e-07, "loss": 1.8508, "step": 66234 }, { "epoch": 97.33517089305403, "grad_norm": 3.5259158611297607, "learning_rate": 2.6058823529411767e-07, "loss": 1.8253, "step": 66255 }, { "epoch": 97.36604189636164, "grad_norm": 3.03926682472229, "learning_rate": 2.575e-07, "loss": 2.0148, "step": 66276 }, { "epoch": 97.39691289966925, "grad_norm": 1.7484277486801147, "learning_rate": 2.544117647058824e-07, "loss": 1.9477, "step": 66297 }, { "epoch": 97.42778390297684, "grad_norm": 3.4173617362976074, "learning_rate": 2.5132352941176473e-07, "loss": 1.8847, "step": 66318 }, { "epoch": 97.45865490628445, "grad_norm": 3.4740829467773438, "learning_rate": 2.482352941176471e-07, "loss": 1.7714, "step": 66339 }, { "epoch": 97.48952590959206, "grad_norm": 1.3984400033950806, "learning_rate": 2.4514705882352944e-07, "loss": 1.8542, "step": 66360 }, { "epoch": 97.52039691289967, "grad_norm": 3.011032819747925, "learning_rate": 2.420588235294118e-07, "loss": 1.8685, "step": 66381 }, { "epoch": 97.55126791620728, "grad_norm": 3.6674866676330566, "learning_rate": 2.389705882352941e-07, "loss": 1.9379, "step": 66402 }, { "epoch": 97.58213891951489, "grad_norm": 2.981379270553589, "learning_rate": 2.3588235294117648e-07, "loss": 1.8728, "step": 66423 }, { "epoch": 97.61300992282249, "grad_norm": 2.7708966732025146, "learning_rate": 2.3279411764705883e-07, "loss": 1.8901, "step": 66444 }, { "epoch": 97.6438809261301, "grad_norm": 2.515983819961548, "learning_rate": 2.2970588235294119e-07, "loss": 1.9991, "step": 66465 }, { "epoch": 97.6747519294377, "grad_norm": 2.6616644859313965, "learning_rate": 2.2661764705882354e-07, "loss": 1.9786, "step": 66486 }, { "epoch": 97.70562293274531, "grad_norm": 2.0133228302001953, "learning_rate": 2.2352941176470592e-07, "loss": 1.8899, "step": 66507 }, { "epoch": 97.73649393605292, "grad_norm": 3.827622175216675, "learning_rate": 2.2044117647058828e-07, "loss": 1.8706, "step": 66528 }, { "epoch": 97.76736493936053, "grad_norm": 2.9072582721710205, "learning_rate": 2.173529411764706e-07, "loss": 1.8259, "step": 66549 }, { "epoch": 97.79823594266814, "grad_norm": 3.2709133625030518, "learning_rate": 2.1426470588235296e-07, "loss": 1.8595, "step": 66570 }, { "epoch": 97.82910694597574, "grad_norm": 2.63814640045166, "learning_rate": 2.1117647058823531e-07, "loss": 1.7129, "step": 66591 }, { "epoch": 97.85997794928335, "grad_norm": 1.6043705940246582, "learning_rate": 2.0808823529411767e-07, "loss": 1.7728, "step": 66612 }, { "epoch": 97.89084895259096, "grad_norm": 3.1980278491973877, "learning_rate": 2.0500000000000002e-07, "loss": 1.8281, "step": 66633 }, { "epoch": 97.92171995589857, "grad_norm": 2.4498841762542725, "learning_rate": 2.0191176470588238e-07, "loss": 1.8693, "step": 66654 }, { "epoch": 97.95259095920618, "grad_norm": 2.182715654373169, "learning_rate": 1.988235294117647e-07, "loss": 1.852, "step": 66675 }, { "epoch": 97.98346196251379, "grad_norm": 4.18843412399292, "learning_rate": 1.9573529411764706e-07, "loss": 1.8656, "step": 66696 }, { "epoch": 98.01323042998898, "grad_norm": 2.9161553382873535, "learning_rate": 1.9264705882352941e-07, "loss": 1.7865, "step": 66717 }, { "epoch": 98.04410143329659, "grad_norm": 2.3912177085876465, "learning_rate": 1.8955882352941177e-07, "loss": 2.0626, "step": 66738 }, { "epoch": 98.07497243660418, "grad_norm": 2.7265071868896484, "learning_rate": 1.8647058823529415e-07, "loss": 1.837, "step": 66759 }, { "epoch": 98.1058434399118, "grad_norm": 1.5563095808029175, "learning_rate": 1.833823529411765e-07, "loss": 2.0461, "step": 66780 }, { "epoch": 98.1367144432194, "grad_norm": 3.3853933811187744, "learning_rate": 1.8029411764705886e-07, "loss": 1.8421, "step": 66801 }, { "epoch": 98.16758544652701, "grad_norm": 3.6052029132843018, "learning_rate": 1.7720588235294119e-07, "loss": 1.88, "step": 66822 }, { "epoch": 98.19845644983462, "grad_norm": 2.846475839614868, "learning_rate": 1.7411764705882354e-07, "loss": 2.0244, "step": 66843 }, { "epoch": 98.22932745314223, "grad_norm": 3.325491428375244, "learning_rate": 1.710294117647059e-07, "loss": 1.9169, "step": 66864 }, { "epoch": 98.26019845644983, "grad_norm": 3.4086034297943115, "learning_rate": 1.6794117647058825e-07, "loss": 1.9343, "step": 66885 }, { "epoch": 98.29106945975744, "grad_norm": 3.277024030685425, "learning_rate": 1.648529411764706e-07, "loss": 1.9389, "step": 66906 }, { "epoch": 98.32194046306505, "grad_norm": 3.266974449157715, "learning_rate": 1.6176470588235296e-07, "loss": 1.8174, "step": 66927 }, { "epoch": 98.35281146637266, "grad_norm": 2.6262834072113037, "learning_rate": 1.586764705882353e-07, "loss": 1.9762, "step": 66948 }, { "epoch": 98.38368246968027, "grad_norm": 3.020660161972046, "learning_rate": 1.5558823529411767e-07, "loss": 1.8188, "step": 66969 }, { "epoch": 98.41455347298788, "grad_norm": 2.9259443283081055, "learning_rate": 1.5250000000000002e-07, "loss": 1.8618, "step": 66990 }, { "epoch": 98.44542447629549, "grad_norm": 2.5139663219451904, "learning_rate": 1.4941176470588238e-07, "loss": 1.9328, "step": 67011 }, { "epoch": 98.47629547960308, "grad_norm": 3.588460922241211, "learning_rate": 1.4632352941176473e-07, "loss": 1.7827, "step": 67032 }, { "epoch": 98.50716648291069, "grad_norm": 7.014211654663086, "learning_rate": 1.4323529411764706e-07, "loss": 1.875, "step": 67053 }, { "epoch": 98.5380374862183, "grad_norm": 3.73766827583313, "learning_rate": 1.4014705882352941e-07, "loss": 1.9766, "step": 67074 }, { "epoch": 98.56890848952591, "grad_norm": 2.5847315788269043, "learning_rate": 1.370588235294118e-07, "loss": 1.9271, "step": 67095 }, { "epoch": 98.59977949283352, "grad_norm": 2.360574722290039, "learning_rate": 1.3397058823529412e-07, "loss": 1.8773, "step": 67116 }, { "epoch": 98.63065049614113, "grad_norm": 3.1040735244750977, "learning_rate": 1.3088235294117648e-07, "loss": 1.8972, "step": 67137 }, { "epoch": 98.66152149944874, "grad_norm": 3.479513645172119, "learning_rate": 1.2779411764705883e-07, "loss": 1.9458, "step": 67158 }, { "epoch": 98.69239250275633, "grad_norm": 2.936136245727539, "learning_rate": 1.2470588235294119e-07, "loss": 1.9111, "step": 67179 }, { "epoch": 98.72326350606394, "grad_norm": 2.9547407627105713, "learning_rate": 1.2161764705882354e-07, "loss": 1.8377, "step": 67200 }, { "epoch": 98.75413450937155, "grad_norm": 2.858610153198242, "learning_rate": 1.185294117647059e-07, "loss": 1.8909, "step": 67221 }, { "epoch": 98.78500551267916, "grad_norm": 2.894165515899658, "learning_rate": 1.1544117647058824e-07, "loss": 1.9364, "step": 67242 }, { "epoch": 98.81587651598677, "grad_norm": 3.0819976329803467, "learning_rate": 1.1235294117647059e-07, "loss": 1.9145, "step": 67263 }, { "epoch": 98.84674751929438, "grad_norm": 5.760529041290283, "learning_rate": 1.0926470588235296e-07, "loss": 1.8207, "step": 67284 }, { "epoch": 98.87761852260198, "grad_norm": 2.4830756187438965, "learning_rate": 1.0617647058823531e-07, "loss": 1.9562, "step": 67305 }, { "epoch": 98.90848952590959, "grad_norm": 4.050549030303955, "learning_rate": 1.0308823529411765e-07, "loss": 1.9238, "step": 67326 }, { "epoch": 98.9393605292172, "grad_norm": 2.8485705852508545, "learning_rate": 1.0000000000000001e-07, "loss": 1.9091, "step": 67347 }, { "epoch": 98.9702315325248, "grad_norm": 4.791774749755859, "learning_rate": 9.691176470588236e-08, "loss": 1.8733, "step": 67368 }, { "epoch": 99.0, "grad_norm": 0.8079424500465393, "learning_rate": 9.38235294117647e-08, "loss": 1.9047, "step": 67389 }, { "epoch": 99.03087100330761, "grad_norm": 4.8668341636657715, "learning_rate": 9.088235294117648e-08, "loss": 1.9134, "step": 67410 }, { "epoch": 99.06174200661522, "grad_norm": 2.9017627239227295, "learning_rate": 8.779411764705883e-08, "loss": 1.817, "step": 67431 }, { "epoch": 99.09261300992283, "grad_norm": 3.3302018642425537, "learning_rate": 8.470588235294117e-08, "loss": 1.8948, "step": 67452 }, { "epoch": 99.12348401323042, "grad_norm": 1.945304274559021, "learning_rate": 8.161764705882354e-08, "loss": 1.8127, "step": 67473 }, { "epoch": 99.15435501653803, "grad_norm": 2.6104207038879395, "learning_rate": 7.85294117647059e-08, "loss": 1.9188, "step": 67494 }, { "epoch": 99.18522601984564, "grad_norm": 3.31247878074646, "learning_rate": 7.544117647058824e-08, "loss": 1.977, "step": 67515 }, { "epoch": 99.21609702315325, "grad_norm": 3.222074508666992, "learning_rate": 7.235294117647059e-08, "loss": 2.0133, "step": 67536 }, { "epoch": 99.24696802646086, "grad_norm": 3.4898641109466553, "learning_rate": 6.926470588235294e-08, "loss": 1.8498, "step": 67557 }, { "epoch": 99.27783902976847, "grad_norm": 3.5139529705047607, "learning_rate": 6.61764705882353e-08, "loss": 1.9193, "step": 67578 }, { "epoch": 99.30871003307608, "grad_norm": 3.4633169174194336, "learning_rate": 6.308823529411765e-08, "loss": 1.8635, "step": 67599 }, { "epoch": 99.33958103638368, "grad_norm": 2.8732082843780518, "learning_rate": 6.000000000000001e-08, "loss": 1.8971, "step": 67620 }, { "epoch": 99.37045203969129, "grad_norm": 2.5320065021514893, "learning_rate": 5.6911764705882356e-08, "loss": 1.7454, "step": 67641 }, { "epoch": 99.4013230429989, "grad_norm": 2.898432493209839, "learning_rate": 5.382352941176472e-08, "loss": 1.9419, "step": 67662 }, { "epoch": 99.4321940463065, "grad_norm": 2.26355242729187, "learning_rate": 5.0735294117647065e-08, "loss": 1.9062, "step": 67683 }, { "epoch": 99.46306504961412, "grad_norm": 2.6402390003204346, "learning_rate": 4.764705882352941e-08, "loss": 1.8198, "step": 67704 }, { "epoch": 99.49393605292173, "grad_norm": 3.2888033390045166, "learning_rate": 4.4558823529411774e-08, "loss": 1.8233, "step": 67725 }, { "epoch": 99.52480705622932, "grad_norm": 2.7816920280456543, "learning_rate": 4.147058823529412e-08, "loss": 1.8039, "step": 67746 }, { "epoch": 99.55567805953693, "grad_norm": 2.800788164138794, "learning_rate": 3.8382352941176476e-08, "loss": 1.902, "step": 67767 }, { "epoch": 99.58654906284454, "grad_norm": 3.3362276554107666, "learning_rate": 3.529411764705883e-08, "loss": 1.975, "step": 67788 }, { "epoch": 99.61742006615215, "grad_norm": 3.037355661392212, "learning_rate": 3.220588235294118e-08, "loss": 1.8967, "step": 67809 }, { "epoch": 99.64829106945976, "grad_norm": 2.9994897842407227, "learning_rate": 2.911764705882353e-08, "loss": 1.8873, "step": 67830 }, { "epoch": 99.67916207276737, "grad_norm": 2.301011085510254, "learning_rate": 2.6029411764705884e-08, "loss": 2.0236, "step": 67851 }, { "epoch": 99.71003307607498, "grad_norm": 3.313206195831299, "learning_rate": 2.294117647058824e-08, "loss": 1.9728, "step": 67872 }, { "epoch": 99.74090407938257, "grad_norm": 3.6507620811462402, "learning_rate": 1.9852941176470586e-08, "loss": 1.8724, "step": 67893 }, { "epoch": 99.77177508269018, "grad_norm": 3.25303316116333, "learning_rate": 1.676470588235294e-08, "loss": 1.9196, "step": 67914 }, { "epoch": 99.8026460859978, "grad_norm": 2.8233695030212402, "learning_rate": 1.3676470588235295e-08, "loss": 1.8605, "step": 67935 }, { "epoch": 99.8335170893054, "grad_norm": 2.4617419242858887, "learning_rate": 1.0588235294117647e-08, "loss": 1.8161, "step": 67956 }, { "epoch": 99.86438809261301, "grad_norm": 3.680828094482422, "learning_rate": 7.500000000000001e-09, "loss": 1.8704, "step": 67977 }, { "epoch": 99.89525909592062, "grad_norm": 3.570162534713745, "learning_rate": 4.411764705882354e-09, "loss": 1.8766, "step": 67998 }, { "epoch": 99.89819919147372, "step": 68000, "total_flos": 3.5659868061024707e+18, "train_loss": 1.3533928022805382, "train_runtime": 51839.0624, "train_samples_per_second": 262.435, "train_steps_per_second": 1.312 } ], "logging_steps": 21, "max_steps": 68000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 6800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5659868061024707e+18, "train_batch_size": 50, "trial_name": null, "trial_params": null }