{ "best_metric": 0.20438289642333984, "best_model_checkpoint": "buki_2/wiki_subset_asr_full/checkpoint-82000", "epoch": 1.0, "eval_steps": 500, "global_step": 82000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.097560975609756e-05, "grad_norm": 21068.8828125, "learning_rate": 1.999959349593496e-05, "loss": 15.9881, "step": 5 }, { "epoch": 0.00012195121951219512, "grad_norm": 13183.3583984375, "learning_rate": 1.999918699186992e-05, "loss": 16.5251, "step": 10 }, { "epoch": 0.00018292682926829268, "grad_norm": 22145.552734375, "learning_rate": 1.999878048780488e-05, "loss": 15.794, "step": 15 }, { "epoch": 0.00024390243902439024, "grad_norm": 17047.796875, "learning_rate": 1.9998373983739838e-05, "loss": 15.146, "step": 20 }, { "epoch": 0.0003048780487804878, "grad_norm": 23351.755859375, "learning_rate": 1.99979674796748e-05, "loss": 13.4116, "step": 25 }, { "epoch": 0.00036585365853658537, "grad_norm": 6266.7216796875, "learning_rate": 1.9997560975609757e-05, "loss": 13.0902, "step": 30 }, { "epoch": 0.0004268292682926829, "grad_norm": 7646.64697265625, "learning_rate": 1.999715447154472e-05, "loss": 12.7272, "step": 35 }, { "epoch": 0.0004878048780487805, "grad_norm": 35587.3203125, "learning_rate": 1.9996747967479677e-05, "loss": 13.6897, "step": 40 }, { "epoch": 0.0005487804878048781, "grad_norm": 27482.837890625, "learning_rate": 1.999634146341464e-05, "loss": 14.3627, "step": 45 }, { "epoch": 0.0006097560975609756, "grad_norm": 5993.9052734375, "learning_rate": 1.9995934959349593e-05, "loss": 12.6045, "step": 50 }, { "epoch": 0.0006707317073170732, "grad_norm": 2555.38134765625, "learning_rate": 1.9995528455284555e-05, "loss": 11.9361, "step": 55 }, { "epoch": 0.0007317073170731707, "grad_norm": 6372.08154296875, "learning_rate": 1.9995121951219513e-05, "loss": 11.4853, "step": 60 }, { "epoch": 0.0007926829268292683, "grad_norm": 9471.5224609375, "learning_rate": 1.9994715447154474e-05, "loss": 11.602, "step": 65 }, { "epoch": 0.0008536585365853659, "grad_norm": 4149.1787109375, "learning_rate": 1.9994308943089432e-05, "loss": 11.9161, "step": 70 }, { "epoch": 0.0009146341463414635, "grad_norm": 5230.70068359375, "learning_rate": 1.9993902439024394e-05, "loss": 11.4582, "step": 75 }, { "epoch": 0.000975609756097561, "grad_norm": 2866.1279296875, "learning_rate": 1.9993495934959352e-05, "loss": 11.0967, "step": 80 }, { "epoch": 0.0010365853658536586, "grad_norm": 22604.216796875, "learning_rate": 1.999308943089431e-05, "loss": 10.8629, "step": 85 }, { "epoch": 0.0010975609756097562, "grad_norm": 3619.362060546875, "learning_rate": 1.9992682926829268e-05, "loss": 10.4588, "step": 90 }, { "epoch": 0.0011585365853658536, "grad_norm": 3852.07080078125, "learning_rate": 1.999227642276423e-05, "loss": 10.397, "step": 95 }, { "epoch": 0.0012195121951219512, "grad_norm": 2039.72314453125, "learning_rate": 1.9991869918699188e-05, "loss": 9.6772, "step": 100 }, { "epoch": 0.0012804878048780488, "grad_norm": 4489.95361328125, "learning_rate": 1.999146341463415e-05, "loss": 9.4468, "step": 105 }, { "epoch": 0.0013414634146341464, "grad_norm": 3193.583251953125, "learning_rate": 1.9991056910569108e-05, "loss": 8.9202, "step": 110 }, { "epoch": 0.0014024390243902438, "grad_norm": 384.22576904296875, "learning_rate": 1.9990650406504066e-05, "loss": 8.5232, "step": 115 }, { "epoch": 0.0014634146341463415, "grad_norm": 974.3699340820312, "learning_rate": 1.9990243902439027e-05, "loss": 7.97, "step": 120 }, { "epoch": 0.001524390243902439, "grad_norm": 262.4773864746094, "learning_rate": 1.9989837398373985e-05, "loss": 7.9609, "step": 125 }, { "epoch": 0.0015853658536585367, "grad_norm": 551.0626831054688, "learning_rate": 1.9989430894308947e-05, "loss": 7.7561, "step": 130 }, { "epoch": 0.001646341463414634, "grad_norm": 1569.535888671875, "learning_rate": 1.9989024390243905e-05, "loss": 8.0695, "step": 135 }, { "epoch": 0.0017073170731707317, "grad_norm": 679.7315673828125, "learning_rate": 1.9988617886178863e-05, "loss": 8.1688, "step": 140 }, { "epoch": 0.0017682926829268293, "grad_norm": 1891.1019287109375, "learning_rate": 1.998821138211382e-05, "loss": 7.2281, "step": 145 }, { "epoch": 0.001829268292682927, "grad_norm": 193.3477020263672, "learning_rate": 1.9987804878048783e-05, "loss": 6.4952, "step": 150 }, { "epoch": 0.0018902439024390243, "grad_norm": 1594.0850830078125, "learning_rate": 1.998739837398374e-05, "loss": 6.7871, "step": 155 }, { "epoch": 0.001951219512195122, "grad_norm": 138.61203002929688, "learning_rate": 1.9986991869918702e-05, "loss": 6.8556, "step": 160 }, { "epoch": 0.0020121951219512196, "grad_norm": 1160.5452880859375, "learning_rate": 1.998658536585366e-05, "loss": 6.2281, "step": 165 }, { "epoch": 0.002073170731707317, "grad_norm": 191.53846740722656, "learning_rate": 1.998617886178862e-05, "loss": 5.7038, "step": 170 }, { "epoch": 0.002134146341463415, "grad_norm": 108.30973815917969, "learning_rate": 1.998577235772358e-05, "loss": 6.0447, "step": 175 }, { "epoch": 0.0021951219512195124, "grad_norm": 71.8901596069336, "learning_rate": 1.9985365853658538e-05, "loss": 4.9741, "step": 180 }, { "epoch": 0.0022560975609756096, "grad_norm": 4633.21875, "learning_rate": 1.9984959349593496e-05, "loss": 4.6081, "step": 185 }, { "epoch": 0.002317073170731707, "grad_norm": 134.48391723632812, "learning_rate": 1.9984552845528458e-05, "loss": 4.2175, "step": 190 }, { "epoch": 0.002378048780487805, "grad_norm": 974.0361328125, "learning_rate": 1.9984146341463416e-05, "loss": 4.3216, "step": 195 }, { "epoch": 0.0024390243902439024, "grad_norm": 24.37929344177246, "learning_rate": 1.9983739837398374e-05, "loss": 4.0565, "step": 200 }, { "epoch": 0.0025, "grad_norm": 19.091672897338867, "learning_rate": 1.9983333333333336e-05, "loss": 3.3813, "step": 205 }, { "epoch": 0.0025609756097560977, "grad_norm": 14.21900749206543, "learning_rate": 1.9982926829268294e-05, "loss": 2.9247, "step": 210 }, { "epoch": 0.0026219512195121953, "grad_norm": 15.175148963928223, "learning_rate": 1.9982520325203255e-05, "loss": 2.559, "step": 215 }, { "epoch": 0.002682926829268293, "grad_norm": 19.285018920898438, "learning_rate": 1.9982113821138213e-05, "loss": 2.3978, "step": 220 }, { "epoch": 0.00274390243902439, "grad_norm": 11.707620620727539, "learning_rate": 1.9981707317073175e-05, "loss": 2.49, "step": 225 }, { "epoch": 0.0028048780487804877, "grad_norm": 14.14188003540039, "learning_rate": 1.998130081300813e-05, "loss": 2.3662, "step": 230 }, { "epoch": 0.0028658536585365853, "grad_norm": 12.624093055725098, "learning_rate": 1.998089430894309e-05, "loss": 1.952, "step": 235 }, { "epoch": 0.002926829268292683, "grad_norm": 9.078788757324219, "learning_rate": 1.998048780487805e-05, "loss": 1.9294, "step": 240 }, { "epoch": 0.0029878048780487805, "grad_norm": 11.254697799682617, "learning_rate": 1.998008130081301e-05, "loss": 1.7116, "step": 245 }, { "epoch": 0.003048780487804878, "grad_norm": 21.097408294677734, "learning_rate": 1.997967479674797e-05, "loss": 1.8528, "step": 250 }, { "epoch": 0.0031097560975609758, "grad_norm": 12.624704360961914, "learning_rate": 1.997926829268293e-05, "loss": 1.6029, "step": 255 }, { "epoch": 0.0031707317073170734, "grad_norm": 7.478496551513672, "learning_rate": 1.997886178861789e-05, "loss": 1.7512, "step": 260 }, { "epoch": 0.0032317073170731706, "grad_norm": 7.3746161460876465, "learning_rate": 1.9978455284552847e-05, "loss": 1.4533, "step": 265 }, { "epoch": 0.003292682926829268, "grad_norm": 8.653033256530762, "learning_rate": 1.9978048780487805e-05, "loss": 1.5479, "step": 270 }, { "epoch": 0.003353658536585366, "grad_norm": 9.171099662780762, "learning_rate": 1.9977642276422766e-05, "loss": 1.5404, "step": 275 }, { "epoch": 0.0034146341463414634, "grad_norm": 6.008787631988525, "learning_rate": 1.9977235772357725e-05, "loss": 1.3035, "step": 280 }, { "epoch": 0.003475609756097561, "grad_norm": 13.289207458496094, "learning_rate": 1.9976829268292686e-05, "loss": 1.2888, "step": 285 }, { "epoch": 0.0035365853658536586, "grad_norm": 8.61522102355957, "learning_rate": 1.9976422764227644e-05, "loss": 1.5146, "step": 290 }, { "epoch": 0.0035975609756097563, "grad_norm": 7.683524131774902, "learning_rate": 1.9976016260162602e-05, "loss": 1.2756, "step": 295 }, { "epoch": 0.003658536585365854, "grad_norm": 8.47628116607666, "learning_rate": 1.9975609756097564e-05, "loss": 1.2619, "step": 300 }, { "epoch": 0.003719512195121951, "grad_norm": 8.44837474822998, "learning_rate": 1.9975203252032522e-05, "loss": 1.19, "step": 305 }, { "epoch": 0.0037804878048780487, "grad_norm": 5.119568824768066, "learning_rate": 1.9974796747967483e-05, "loss": 1.1948, "step": 310 }, { "epoch": 0.0038414634146341463, "grad_norm": 5.391551971435547, "learning_rate": 1.997439024390244e-05, "loss": 1.0786, "step": 315 }, { "epoch": 0.003902439024390244, "grad_norm": 4.793148994445801, "learning_rate": 1.99739837398374e-05, "loss": 1.1841, "step": 320 }, { "epoch": 0.0039634146341463415, "grad_norm": 8.34709358215332, "learning_rate": 1.9973577235772358e-05, "loss": 1.1039, "step": 325 }, { "epoch": 0.004024390243902439, "grad_norm": 8.927913665771484, "learning_rate": 1.997317073170732e-05, "loss": 1.1179, "step": 330 }, { "epoch": 0.004085365853658537, "grad_norm": 7.9588727951049805, "learning_rate": 1.9972764227642278e-05, "loss": 1.1046, "step": 335 }, { "epoch": 0.004146341463414634, "grad_norm": 4.40801477432251, "learning_rate": 1.997235772357724e-05, "loss": 1.0782, "step": 340 }, { "epoch": 0.004207317073170732, "grad_norm": 4.383518695831299, "learning_rate": 1.9971951219512197e-05, "loss": 1.0584, "step": 345 }, { "epoch": 0.00426829268292683, "grad_norm": 6.481703281402588, "learning_rate": 1.9971544715447155e-05, "loss": 1.0083, "step": 350 }, { "epoch": 0.004329268292682927, "grad_norm": 3.731377363204956, "learning_rate": 1.9971138211382113e-05, "loss": 0.9253, "step": 355 }, { "epoch": 0.004390243902439025, "grad_norm": 7.969158172607422, "learning_rate": 1.9970731707317075e-05, "loss": 0.9643, "step": 360 }, { "epoch": 0.004451219512195122, "grad_norm": 5.028504371643066, "learning_rate": 1.9970325203252033e-05, "loss": 1.0664, "step": 365 }, { "epoch": 0.004512195121951219, "grad_norm": 25.73756980895996, "learning_rate": 1.9969918699186995e-05, "loss": 1.128, "step": 370 }, { "epoch": 0.004573170731707317, "grad_norm": 5.379063606262207, "learning_rate": 1.9969512195121953e-05, "loss": 0.934, "step": 375 }, { "epoch": 0.004634146341463414, "grad_norm": 4.35447359085083, "learning_rate": 1.996910569105691e-05, "loss": 1.0062, "step": 380 }, { "epoch": 0.004695121951219512, "grad_norm": 4.954445838928223, "learning_rate": 1.9968699186991872e-05, "loss": 0.9052, "step": 385 }, { "epoch": 0.00475609756097561, "grad_norm": 4.362308502197266, "learning_rate": 1.996829268292683e-05, "loss": 0.9125, "step": 390 }, { "epoch": 0.004817073170731707, "grad_norm": 8.319256782531738, "learning_rate": 1.9967886178861792e-05, "loss": 1.025, "step": 395 }, { "epoch": 0.004878048780487805, "grad_norm": 4.792065143585205, "learning_rate": 1.996747967479675e-05, "loss": 0.8096, "step": 400 }, { "epoch": 0.0049390243902439025, "grad_norm": 5.166938304901123, "learning_rate": 1.996707317073171e-05, "loss": 0.8841, "step": 405 }, { "epoch": 0.005, "grad_norm": 3.6804988384246826, "learning_rate": 1.9966666666666666e-05, "loss": 0.9799, "step": 410 }, { "epoch": 0.005060975609756098, "grad_norm": 4.1193013191223145, "learning_rate": 1.9966260162601628e-05, "loss": 0.9055, "step": 415 }, { "epoch": 0.005121951219512195, "grad_norm": 3.661860227584839, "learning_rate": 1.9965853658536586e-05, "loss": 0.9538, "step": 420 }, { "epoch": 0.005182926829268293, "grad_norm": 5.358866214752197, "learning_rate": 1.9965447154471548e-05, "loss": 0.9133, "step": 425 }, { "epoch": 0.0052439024390243906, "grad_norm": 5.163463592529297, "learning_rate": 1.9965040650406506e-05, "loss": 0.9262, "step": 430 }, { "epoch": 0.005304878048780488, "grad_norm": 4.152734756469727, "learning_rate": 1.9964634146341467e-05, "loss": 0.9435, "step": 435 }, { "epoch": 0.005365853658536586, "grad_norm": 3.4346468448638916, "learning_rate": 1.9964227642276425e-05, "loss": 0.8667, "step": 440 }, { "epoch": 0.0054268292682926825, "grad_norm": 4.065300464630127, "learning_rate": 1.9963821138211383e-05, "loss": 0.9395, "step": 445 }, { "epoch": 0.00548780487804878, "grad_norm": 3.099510669708252, "learning_rate": 1.996341463414634e-05, "loss": 0.948, "step": 450 }, { "epoch": 0.005548780487804878, "grad_norm": 4.1112775802612305, "learning_rate": 1.9963008130081303e-05, "loss": 0.8184, "step": 455 }, { "epoch": 0.005609756097560975, "grad_norm": 3.3547239303588867, "learning_rate": 1.996260162601626e-05, "loss": 0.9234, "step": 460 }, { "epoch": 0.005670731707317073, "grad_norm": 12.462967872619629, "learning_rate": 1.9962195121951223e-05, "loss": 0.9045, "step": 465 }, { "epoch": 0.005731707317073171, "grad_norm": 3.4422123432159424, "learning_rate": 1.996178861788618e-05, "loss": 0.8736, "step": 470 }, { "epoch": 0.005792682926829268, "grad_norm": 2.9281888008117676, "learning_rate": 1.996138211382114e-05, "loss": 0.8314, "step": 475 }, { "epoch": 0.005853658536585366, "grad_norm": 3.342905044555664, "learning_rate": 1.99609756097561e-05, "loss": 0.8575, "step": 480 }, { "epoch": 0.0059146341463414635, "grad_norm": 3.8865346908569336, "learning_rate": 1.996056910569106e-05, "loss": 0.8504, "step": 485 }, { "epoch": 0.005975609756097561, "grad_norm": 4.707225322723389, "learning_rate": 1.996016260162602e-05, "loss": 0.8381, "step": 490 }, { "epoch": 0.006036585365853659, "grad_norm": 3.6919429302215576, "learning_rate": 1.9959756097560978e-05, "loss": 0.8344, "step": 495 }, { "epoch": 0.006097560975609756, "grad_norm": 3.408567190170288, "learning_rate": 1.9959349593495936e-05, "loss": 0.8893, "step": 500 }, { "epoch": 0.006158536585365854, "grad_norm": 3.965311288833618, "learning_rate": 1.9958943089430895e-05, "loss": 0.7381, "step": 505 }, { "epoch": 0.0062195121951219515, "grad_norm": 6.234456539154053, "learning_rate": 1.9958536585365856e-05, "loss": 0.7682, "step": 510 }, { "epoch": 0.006280487804878049, "grad_norm": 6.570810794830322, "learning_rate": 1.9958130081300814e-05, "loss": 0.6985, "step": 515 }, { "epoch": 0.006341463414634147, "grad_norm": 3.131789445877075, "learning_rate": 1.9957723577235776e-05, "loss": 0.7384, "step": 520 }, { "epoch": 0.0064024390243902435, "grad_norm": 3.095741033554077, "learning_rate": 1.9957317073170734e-05, "loss": 0.8986, "step": 525 }, { "epoch": 0.006463414634146341, "grad_norm": 2.233480930328369, "learning_rate": 1.9956910569105692e-05, "loss": 0.7045, "step": 530 }, { "epoch": 0.006524390243902439, "grad_norm": 3.094856023788452, "learning_rate": 1.995650406504065e-05, "loss": 0.7248, "step": 535 }, { "epoch": 0.006585365853658536, "grad_norm": 3.5694737434387207, "learning_rate": 1.995609756097561e-05, "loss": 0.85, "step": 540 }, { "epoch": 0.006646341463414634, "grad_norm": 2.285737991333008, "learning_rate": 1.995569105691057e-05, "loss": 0.6916, "step": 545 }, { "epoch": 0.006707317073170732, "grad_norm": 2.545903205871582, "learning_rate": 1.995528455284553e-05, "loss": 0.6767, "step": 550 }, { "epoch": 0.006768292682926829, "grad_norm": 3.110586166381836, "learning_rate": 1.995487804878049e-05, "loss": 0.7503, "step": 555 }, { "epoch": 0.006829268292682927, "grad_norm": 5.400684356689453, "learning_rate": 1.9954471544715447e-05, "loss": 0.6917, "step": 560 }, { "epoch": 0.0068902439024390244, "grad_norm": 3.03550386428833, "learning_rate": 1.995406504065041e-05, "loss": 0.6971, "step": 565 }, { "epoch": 0.006951219512195122, "grad_norm": 2.5908315181732178, "learning_rate": 1.9953658536585367e-05, "loss": 0.8355, "step": 570 }, { "epoch": 0.00701219512195122, "grad_norm": 2.0064756870269775, "learning_rate": 1.995325203252033e-05, "loss": 0.6634, "step": 575 }, { "epoch": 0.007073170731707317, "grad_norm": 2.6719536781311035, "learning_rate": 1.9952845528455287e-05, "loss": 0.6797, "step": 580 }, { "epoch": 0.007134146341463415, "grad_norm": 2.428192138671875, "learning_rate": 1.9952439024390248e-05, "loss": 0.7201, "step": 585 }, { "epoch": 0.0071951219512195125, "grad_norm": 5.75555944442749, "learning_rate": 1.9952032520325203e-05, "loss": 0.7702, "step": 590 }, { "epoch": 0.00725609756097561, "grad_norm": 2.4313719272613525, "learning_rate": 1.9951626016260165e-05, "loss": 0.6505, "step": 595 }, { "epoch": 0.007317073170731708, "grad_norm": 2.3774871826171875, "learning_rate": 1.9951219512195123e-05, "loss": 0.7602, "step": 600 }, { "epoch": 0.0073780487804878045, "grad_norm": 2.587007761001587, "learning_rate": 1.9950813008130084e-05, "loss": 0.759, "step": 605 }, { "epoch": 0.007439024390243902, "grad_norm": 3.286200523376465, "learning_rate": 1.9950406504065042e-05, "loss": 0.7135, "step": 610 }, { "epoch": 0.0075, "grad_norm": 3.8205623626708984, "learning_rate": 1.9950000000000004e-05, "loss": 0.6645, "step": 615 }, { "epoch": 0.007560975609756097, "grad_norm": 3.630608320236206, "learning_rate": 1.994959349593496e-05, "loss": 0.7439, "step": 620 }, { "epoch": 0.007621951219512195, "grad_norm": 2.1528735160827637, "learning_rate": 1.994918699186992e-05, "loss": 0.6774, "step": 625 }, { "epoch": 0.007682926829268293, "grad_norm": 3.575322151184082, "learning_rate": 1.9948780487804878e-05, "loss": 0.7526, "step": 630 }, { "epoch": 0.00774390243902439, "grad_norm": 2.9441134929656982, "learning_rate": 1.994837398373984e-05, "loss": 0.5481, "step": 635 }, { "epoch": 0.007804878048780488, "grad_norm": 2.5531086921691895, "learning_rate": 1.9947967479674798e-05, "loss": 0.6513, "step": 640 }, { "epoch": 0.007865853658536585, "grad_norm": 3.5466127395629883, "learning_rate": 1.994756097560976e-05, "loss": 0.7033, "step": 645 }, { "epoch": 0.007926829268292683, "grad_norm": 2.169678211212158, "learning_rate": 1.9947154471544717e-05, "loss": 0.6243, "step": 650 }, { "epoch": 0.00798780487804878, "grad_norm": 4.666969299316406, "learning_rate": 1.9946747967479676e-05, "loss": 0.6343, "step": 655 }, { "epoch": 0.008048780487804878, "grad_norm": 2.723241090774536, "learning_rate": 1.9946341463414637e-05, "loss": 0.5955, "step": 660 }, { "epoch": 0.008109756097560976, "grad_norm": 3.09371018409729, "learning_rate": 1.9945934959349595e-05, "loss": 0.6254, "step": 665 }, { "epoch": 0.008170731707317073, "grad_norm": 3.4821057319641113, "learning_rate": 1.9945528455284557e-05, "loss": 0.679, "step": 670 }, { "epoch": 0.008231707317073171, "grad_norm": 4.592081546783447, "learning_rate": 1.9945121951219515e-05, "loss": 0.7204, "step": 675 }, { "epoch": 0.008292682926829269, "grad_norm": 1.9702930450439453, "learning_rate": 1.9944715447154473e-05, "loss": 0.7087, "step": 680 }, { "epoch": 0.008353658536585366, "grad_norm": 2.389256715774536, "learning_rate": 1.994430894308943e-05, "loss": 0.6078, "step": 685 }, { "epoch": 0.008414634146341464, "grad_norm": 3.08144474029541, "learning_rate": 1.9943902439024393e-05, "loss": 0.6223, "step": 690 }, { "epoch": 0.008475609756097562, "grad_norm": 2.7686405181884766, "learning_rate": 1.994349593495935e-05, "loss": 0.62, "step": 695 }, { "epoch": 0.00853658536585366, "grad_norm": 2.985308885574341, "learning_rate": 1.9943089430894312e-05, "loss": 0.6067, "step": 700 }, { "epoch": 0.008597560975609757, "grad_norm": 3.7384800910949707, "learning_rate": 1.994268292682927e-05, "loss": 0.6565, "step": 705 }, { "epoch": 0.008658536585365854, "grad_norm": 2.7462961673736572, "learning_rate": 1.994227642276423e-05, "loss": 0.6409, "step": 710 }, { "epoch": 0.008719512195121952, "grad_norm": 2.485067367553711, "learning_rate": 1.9941869918699187e-05, "loss": 0.5772, "step": 715 }, { "epoch": 0.00878048780487805, "grad_norm": 3.344489336013794, "learning_rate": 1.9941463414634148e-05, "loss": 0.5567, "step": 720 }, { "epoch": 0.008841463414634146, "grad_norm": 2.9682998657226562, "learning_rate": 1.9941056910569106e-05, "loss": 0.7144, "step": 725 }, { "epoch": 0.008902439024390243, "grad_norm": 2.5773751735687256, "learning_rate": 1.9940650406504068e-05, "loss": 0.5482, "step": 730 }, { "epoch": 0.00896341463414634, "grad_norm": 1.9932299852371216, "learning_rate": 1.9940243902439026e-05, "loss": 0.514, "step": 735 }, { "epoch": 0.009024390243902438, "grad_norm": 2.0099592208862305, "learning_rate": 1.9939837398373984e-05, "loss": 0.6366, "step": 740 }, { "epoch": 0.009085365853658536, "grad_norm": 2.5286691188812256, "learning_rate": 1.9939430894308946e-05, "loss": 0.5875, "step": 745 }, { "epoch": 0.009146341463414634, "grad_norm": 2.158231258392334, "learning_rate": 1.9939024390243904e-05, "loss": 0.6492, "step": 750 }, { "epoch": 0.009207317073170731, "grad_norm": 3.1680619716644287, "learning_rate": 1.9938617886178865e-05, "loss": 0.5927, "step": 755 }, { "epoch": 0.009268292682926829, "grad_norm": 2.203378438949585, "learning_rate": 1.9938211382113823e-05, "loss": 0.6036, "step": 760 }, { "epoch": 0.009329268292682926, "grad_norm": 2.713257312774658, "learning_rate": 1.993780487804878e-05, "loss": 0.6164, "step": 765 }, { "epoch": 0.009390243902439024, "grad_norm": 2.772801637649536, "learning_rate": 1.993739837398374e-05, "loss": 0.6192, "step": 770 }, { "epoch": 0.009451219512195122, "grad_norm": 2.427680492401123, "learning_rate": 1.99369918699187e-05, "loss": 0.6351, "step": 775 }, { "epoch": 0.00951219512195122, "grad_norm": 3.3025386333465576, "learning_rate": 1.993658536585366e-05, "loss": 0.5883, "step": 780 }, { "epoch": 0.009573170731707317, "grad_norm": 2.6954727172851562, "learning_rate": 1.993617886178862e-05, "loss": 0.5622, "step": 785 }, { "epoch": 0.009634146341463415, "grad_norm": 2.46427583694458, "learning_rate": 1.993577235772358e-05, "loss": 0.556, "step": 790 }, { "epoch": 0.009695121951219512, "grad_norm": 1367.3411865234375, "learning_rate": 1.993536585365854e-05, "loss": 0.8656, "step": 795 }, { "epoch": 0.00975609756097561, "grad_norm": 2.631274461746216, "learning_rate": 1.9934959349593495e-05, "loss": 0.532, "step": 800 }, { "epoch": 0.009817073170731707, "grad_norm": 3.04709529876709, "learning_rate": 1.9934552845528457e-05, "loss": 0.8174, "step": 805 }, { "epoch": 0.009878048780487805, "grad_norm": 2.221844434738159, "learning_rate": 1.9934146341463415e-05, "loss": 0.5133, "step": 810 }, { "epoch": 0.009939024390243903, "grad_norm": 2.4361939430236816, "learning_rate": 1.9933739837398376e-05, "loss": 0.6033, "step": 815 }, { "epoch": 0.01, "grad_norm": 4.3990325927734375, "learning_rate": 1.9933333333333334e-05, "loss": 0.5177, "step": 820 }, { "epoch": 0.010060975609756098, "grad_norm": 2.5661633014678955, "learning_rate": 1.9932926829268296e-05, "loss": 0.5291, "step": 825 }, { "epoch": 0.010121951219512195, "grad_norm": 2.271019697189331, "learning_rate": 1.9932520325203254e-05, "loss": 0.5089, "step": 830 }, { "epoch": 0.010182926829268293, "grad_norm": 9.637794494628906, "learning_rate": 1.9932113821138212e-05, "loss": 0.6237, "step": 835 }, { "epoch": 0.01024390243902439, "grad_norm": 3.143720865249634, "learning_rate": 1.9931707317073174e-05, "loss": 0.5382, "step": 840 }, { "epoch": 0.010304878048780488, "grad_norm": 2.3360683917999268, "learning_rate": 1.9931300813008132e-05, "loss": 0.5178, "step": 845 }, { "epoch": 0.010365853658536586, "grad_norm": 3.873223304748535, "learning_rate": 1.9930894308943093e-05, "loss": 0.5889, "step": 850 }, { "epoch": 0.010426829268292684, "grad_norm": 1.845794916152954, "learning_rate": 1.993048780487805e-05, "loss": 0.5945, "step": 855 }, { "epoch": 0.010487804878048781, "grad_norm": 1.621500015258789, "learning_rate": 1.993008130081301e-05, "loss": 0.4828, "step": 860 }, { "epoch": 0.010548780487804879, "grad_norm": 2.3296496868133545, "learning_rate": 1.9929674796747968e-05, "loss": 0.5253, "step": 865 }, { "epoch": 0.010609756097560976, "grad_norm": 2.7935407161712646, "learning_rate": 1.992926829268293e-05, "loss": 0.9672, "step": 870 }, { "epoch": 0.010670731707317074, "grad_norm": 1.673964262008667, "learning_rate": 1.9928861788617887e-05, "loss": 0.4989, "step": 875 }, { "epoch": 0.010731707317073172, "grad_norm": 2.336047410964966, "learning_rate": 1.992845528455285e-05, "loss": 0.555, "step": 880 }, { "epoch": 0.010792682926829267, "grad_norm": 2.35268497467041, "learning_rate": 1.9928048780487807e-05, "loss": 0.5497, "step": 885 }, { "epoch": 0.010853658536585365, "grad_norm": 5.4796037673950195, "learning_rate": 1.9927642276422765e-05, "loss": 0.5068, "step": 890 }, { "epoch": 0.010914634146341463, "grad_norm": 2.1404812335968018, "learning_rate": 1.9927235772357723e-05, "loss": 0.4689, "step": 895 }, { "epoch": 0.01097560975609756, "grad_norm": 2.187176465988159, "learning_rate": 1.9926829268292685e-05, "loss": 0.574, "step": 900 }, { "epoch": 0.011036585365853658, "grad_norm": 2.9466705322265625, "learning_rate": 1.9926422764227643e-05, "loss": 0.6708, "step": 905 }, { "epoch": 0.011097560975609756, "grad_norm": 2.305023431777954, "learning_rate": 1.9926016260162604e-05, "loss": 0.5769, "step": 910 }, { "epoch": 0.011158536585365853, "grad_norm": 2.296673536300659, "learning_rate": 1.9925609756097563e-05, "loss": 0.4854, "step": 915 }, { "epoch": 0.01121951219512195, "grad_norm": 3.1287710666656494, "learning_rate": 1.992520325203252e-05, "loss": 0.5434, "step": 920 }, { "epoch": 0.011280487804878048, "grad_norm": 4.033596515655518, "learning_rate": 1.9924796747967482e-05, "loss": 0.6085, "step": 925 }, { "epoch": 0.011341463414634146, "grad_norm": 2.9098668098449707, "learning_rate": 1.992439024390244e-05, "loss": 0.7421, "step": 930 }, { "epoch": 0.011402439024390244, "grad_norm": 2.9150876998901367, "learning_rate": 1.9923983739837402e-05, "loss": 1.233, "step": 935 }, { "epoch": 0.011463414634146341, "grad_norm": 4.874716281890869, "learning_rate": 1.992357723577236e-05, "loss": 1.0199, "step": 940 }, { "epoch": 0.011524390243902439, "grad_norm": 2.8884823322296143, "learning_rate": 1.9923170731707318e-05, "loss": 0.6482, "step": 945 }, { "epoch": 0.011585365853658536, "grad_norm": 4.006091594696045, "learning_rate": 1.9922764227642276e-05, "loss": 0.7782, "step": 950 }, { "epoch": 0.011646341463414634, "grad_norm": 3.9438185691833496, "learning_rate": 1.9922357723577238e-05, "loss": 0.5622, "step": 955 }, { "epoch": 0.011707317073170732, "grad_norm": 5.1293792724609375, "learning_rate": 1.9921951219512196e-05, "loss": 0.5208, "step": 960 }, { "epoch": 0.01176829268292683, "grad_norm": 8.409658432006836, "learning_rate": 1.9921544715447157e-05, "loss": 0.5443, "step": 965 }, { "epoch": 0.011829268292682927, "grad_norm": 9.713091850280762, "learning_rate": 1.9921138211382116e-05, "loss": 0.6364, "step": 970 }, { "epoch": 0.011890243902439025, "grad_norm": 3.2790615558624268, "learning_rate": 1.9920731707317077e-05, "loss": 0.5181, "step": 975 }, { "epoch": 0.011951219512195122, "grad_norm": 3.58280611038208, "learning_rate": 1.9920325203252032e-05, "loss": 0.5244, "step": 980 }, { "epoch": 0.01201219512195122, "grad_norm": 5.477580547332764, "learning_rate": 1.9919918699186993e-05, "loss": 0.5854, "step": 985 }, { "epoch": 0.012073170731707317, "grad_norm": 3.6729865074157715, "learning_rate": 1.991951219512195e-05, "loss": 0.6583, "step": 990 }, { "epoch": 0.012134146341463415, "grad_norm": 3.4799695014953613, "learning_rate": 1.9919105691056913e-05, "loss": 0.5666, "step": 995 }, { "epoch": 0.012195121951219513, "grad_norm": 2.923051357269287, "learning_rate": 1.991869918699187e-05, "loss": 0.5576, "step": 1000 }, { "epoch": 0.01225609756097561, "grad_norm": 9.951852798461914, "learning_rate": 1.9918292682926833e-05, "loss": 0.5386, "step": 1005 }, { "epoch": 0.012317073170731708, "grad_norm": 3.899326801300049, "learning_rate": 1.991788617886179e-05, "loss": 0.7749, "step": 1010 }, { "epoch": 0.012378048780487805, "grad_norm": 280.890625, "learning_rate": 1.991747967479675e-05, "loss": 0.9094, "step": 1015 }, { "epoch": 0.012439024390243903, "grad_norm": 126.46202087402344, "learning_rate": 1.991707317073171e-05, "loss": 0.8312, "step": 1020 }, { "epoch": 0.0125, "grad_norm": 10.540144920349121, "learning_rate": 1.991666666666667e-05, "loss": 0.6427, "step": 1025 }, { "epoch": 0.012560975609756098, "grad_norm": 10.083279609680176, "learning_rate": 1.9916260162601627e-05, "loss": 0.9011, "step": 1030 }, { "epoch": 0.012621951219512196, "grad_norm": 3.1658005714416504, "learning_rate": 1.9915853658536588e-05, "loss": 0.9953, "step": 1035 }, { "epoch": 0.012682926829268294, "grad_norm": 511.3910217285156, "learning_rate": 1.9915447154471546e-05, "loss": 1.0285, "step": 1040 }, { "epoch": 0.01274390243902439, "grad_norm": 1714.57470703125, "learning_rate": 1.9915040650406504e-05, "loss": 0.8472, "step": 1045 }, { "epoch": 0.012804878048780487, "grad_norm": 9.362554550170898, "learning_rate": 1.9914634146341466e-05, "loss": 0.6786, "step": 1050 }, { "epoch": 0.012865853658536585, "grad_norm": 4.48960018157959, "learning_rate": 1.9914227642276424e-05, "loss": 0.6533, "step": 1055 }, { "epoch": 0.012926829268292682, "grad_norm": 2.7999267578125, "learning_rate": 1.9913821138211386e-05, "loss": 0.5919, "step": 1060 }, { "epoch": 0.01298780487804878, "grad_norm": 97.36503601074219, "learning_rate": 1.9913414634146344e-05, "loss": 0.8335, "step": 1065 }, { "epoch": 0.013048780487804877, "grad_norm": 4.055099964141846, "learning_rate": 1.9913008130081302e-05, "loss": 0.6478, "step": 1070 }, { "epoch": 0.013109756097560975, "grad_norm": 3.299813985824585, "learning_rate": 1.991260162601626e-05, "loss": 0.591, "step": 1075 }, { "epoch": 0.013170731707317073, "grad_norm": 191.4732208251953, "learning_rate": 1.991219512195122e-05, "loss": 0.8309, "step": 1080 }, { "epoch": 0.01323170731707317, "grad_norm": 4.406102180480957, "learning_rate": 1.991178861788618e-05, "loss": 0.7032, "step": 1085 }, { "epoch": 0.013292682926829268, "grad_norm": 7.250959873199463, "learning_rate": 1.991138211382114e-05, "loss": 0.5549, "step": 1090 }, { "epoch": 0.013353658536585366, "grad_norm": 4.201699256896973, "learning_rate": 1.99109756097561e-05, "loss": 0.6294, "step": 1095 }, { "epoch": 0.013414634146341463, "grad_norm": 7.042034149169922, "learning_rate": 1.9910569105691057e-05, "loss": 0.6134, "step": 1100 }, { "epoch": 0.01347560975609756, "grad_norm": 2.3506622314453125, "learning_rate": 1.991016260162602e-05, "loss": 0.6306, "step": 1105 }, { "epoch": 0.013536585365853658, "grad_norm": 2.887763261795044, "learning_rate": 1.9909756097560977e-05, "loss": 0.5962, "step": 1110 }, { "epoch": 0.013597560975609756, "grad_norm": 85.25740814208984, "learning_rate": 1.990934959349594e-05, "loss": 0.4954, "step": 1115 }, { "epoch": 0.013658536585365854, "grad_norm": 4.582602500915527, "learning_rate": 1.9908943089430897e-05, "loss": 0.5128, "step": 1120 }, { "epoch": 0.013719512195121951, "grad_norm": 3.270890474319458, "learning_rate": 1.9908536585365855e-05, "loss": 0.5022, "step": 1125 }, { "epoch": 0.013780487804878049, "grad_norm": 5.967759132385254, "learning_rate": 1.9908130081300813e-05, "loss": 0.6069, "step": 1130 }, { "epoch": 0.013841463414634146, "grad_norm": 2.748248338699341, "learning_rate": 1.9907723577235774e-05, "loss": 0.5443, "step": 1135 }, { "epoch": 0.013902439024390244, "grad_norm": 6.1395769119262695, "learning_rate": 1.9907317073170733e-05, "loss": 0.6217, "step": 1140 }, { "epoch": 0.013963414634146342, "grad_norm": 3.2430315017700195, "learning_rate": 1.9906910569105694e-05, "loss": 0.6147, "step": 1145 }, { "epoch": 0.01402439024390244, "grad_norm": 2.1229751110076904, "learning_rate": 1.9906504065040652e-05, "loss": 0.4853, "step": 1150 }, { "epoch": 0.014085365853658537, "grad_norm": 2.509727954864502, "learning_rate": 1.9906097560975614e-05, "loss": 0.4884, "step": 1155 }, { "epoch": 0.014146341463414635, "grad_norm": 4.117963790893555, "learning_rate": 1.990569105691057e-05, "loss": 0.6503, "step": 1160 }, { "epoch": 0.014207317073170732, "grad_norm": 2.211516857147217, "learning_rate": 1.990528455284553e-05, "loss": 0.4768, "step": 1165 }, { "epoch": 0.01426829268292683, "grad_norm": 2.08782958984375, "learning_rate": 1.9904878048780488e-05, "loss": 0.4801, "step": 1170 }, { "epoch": 0.014329268292682927, "grad_norm": 2.5629825592041016, "learning_rate": 1.990447154471545e-05, "loss": 0.4804, "step": 1175 }, { "epoch": 0.014390243902439025, "grad_norm": 3.7542026042938232, "learning_rate": 1.9904065040650408e-05, "loss": 0.5565, "step": 1180 }, { "epoch": 0.014451219512195123, "grad_norm": 3.673804759979248, "learning_rate": 1.990365853658537e-05, "loss": 0.5256, "step": 1185 }, { "epoch": 0.01451219512195122, "grad_norm": 69.72850799560547, "learning_rate": 1.9903252032520327e-05, "loss": 0.4645, "step": 1190 }, { "epoch": 0.014573170731707318, "grad_norm": 2.7645297050476074, "learning_rate": 1.9902845528455286e-05, "loss": 0.4431, "step": 1195 }, { "epoch": 0.014634146341463415, "grad_norm": 2.879667282104492, "learning_rate": 1.9902439024390247e-05, "loss": 0.4938, "step": 1200 }, { "epoch": 0.014695121951219511, "grad_norm": 3.957634925842285, "learning_rate": 1.9902032520325205e-05, "loss": 0.5482, "step": 1205 }, { "epoch": 0.014756097560975609, "grad_norm": 1.898857593536377, "learning_rate": 1.9901626016260163e-05, "loss": 0.5812, "step": 1210 }, { "epoch": 0.014817073170731707, "grad_norm": 854.6612548828125, "learning_rate": 1.9901219512195125e-05, "loss": 0.7492, "step": 1215 }, { "epoch": 0.014878048780487804, "grad_norm": 2.524425745010376, "learning_rate": 1.9900813008130083e-05, "loss": 0.4685, "step": 1220 }, { "epoch": 0.014939024390243902, "grad_norm": 2.2990028858184814, "learning_rate": 1.990040650406504e-05, "loss": 0.4125, "step": 1225 }, { "epoch": 0.015, "grad_norm": 3.8147125244140625, "learning_rate": 1.9900000000000003e-05, "loss": 0.4365, "step": 1230 }, { "epoch": 0.015060975609756097, "grad_norm": 2.6131129264831543, "learning_rate": 1.989959349593496e-05, "loss": 0.5574, "step": 1235 }, { "epoch": 0.015121951219512195, "grad_norm": 3.076523542404175, "learning_rate": 1.9899186991869922e-05, "loss": 0.515, "step": 1240 }, { "epoch": 0.015182926829268292, "grad_norm": 2.53452730178833, "learning_rate": 1.989878048780488e-05, "loss": 0.7506, "step": 1245 }, { "epoch": 0.01524390243902439, "grad_norm": 3.0246591567993164, "learning_rate": 1.989837398373984e-05, "loss": 0.5295, "step": 1250 }, { "epoch": 0.015304878048780488, "grad_norm": 2.361931800842285, "learning_rate": 1.9897967479674797e-05, "loss": 0.4744, "step": 1255 }, { "epoch": 0.015365853658536585, "grad_norm": 3.3887758255004883, "learning_rate": 1.9897560975609758e-05, "loss": 0.4798, "step": 1260 }, { "epoch": 0.015426829268292683, "grad_norm": 2.1466376781463623, "learning_rate": 1.9897154471544716e-05, "loss": 0.4525, "step": 1265 }, { "epoch": 0.01548780487804878, "grad_norm": 2.8498728275299072, "learning_rate": 1.9896747967479678e-05, "loss": 0.524, "step": 1270 }, { "epoch": 0.015548780487804878, "grad_norm": 2.712592840194702, "learning_rate": 1.9896341463414636e-05, "loss": 0.4125, "step": 1275 }, { "epoch": 0.015609756097560976, "grad_norm": 3.1168768405914307, "learning_rate": 1.9895934959349594e-05, "loss": 0.4269, "step": 1280 }, { "epoch": 0.015670731707317073, "grad_norm": 4.155173301696777, "learning_rate": 1.9895528455284556e-05, "loss": 0.4869, "step": 1285 }, { "epoch": 0.01573170731707317, "grad_norm": 3.2339656352996826, "learning_rate": 1.9895121951219514e-05, "loss": 0.5412, "step": 1290 }, { "epoch": 0.01579268292682927, "grad_norm": 1.9910519123077393, "learning_rate": 1.9894715447154472e-05, "loss": 0.4935, "step": 1295 }, { "epoch": 0.015853658536585366, "grad_norm": 2.6041488647460938, "learning_rate": 1.9894308943089433e-05, "loss": 0.3641, "step": 1300 }, { "epoch": 0.015914634146341464, "grad_norm": 2.317199230194092, "learning_rate": 1.989390243902439e-05, "loss": 0.4397, "step": 1305 }, { "epoch": 0.01597560975609756, "grad_norm": 2.8120510578155518, "learning_rate": 1.989349593495935e-05, "loss": 0.4316, "step": 1310 }, { "epoch": 0.01603658536585366, "grad_norm": 2.7145681381225586, "learning_rate": 1.989308943089431e-05, "loss": 0.427, "step": 1315 }, { "epoch": 0.016097560975609757, "grad_norm": 2.621267557144165, "learning_rate": 1.989268292682927e-05, "loss": 0.4637, "step": 1320 }, { "epoch": 0.016158536585365854, "grad_norm": 2.9357340335845947, "learning_rate": 1.989227642276423e-05, "loss": 0.4206, "step": 1325 }, { "epoch": 0.016219512195121952, "grad_norm": 2.063389539718628, "learning_rate": 1.989186991869919e-05, "loss": 0.3837, "step": 1330 }, { "epoch": 0.01628048780487805, "grad_norm": 2.5514392852783203, "learning_rate": 1.989146341463415e-05, "loss": 0.4258, "step": 1335 }, { "epoch": 0.016341463414634147, "grad_norm": 2.6780920028686523, "learning_rate": 1.9891056910569105e-05, "loss": 0.3848, "step": 1340 }, { "epoch": 0.016402439024390245, "grad_norm": 2.7599122524261475, "learning_rate": 1.9890650406504067e-05, "loss": 0.3668, "step": 1345 }, { "epoch": 0.016463414634146342, "grad_norm": 24.007387161254883, "learning_rate": 1.9890243902439025e-05, "loss": 0.4032, "step": 1350 }, { "epoch": 0.01652439024390244, "grad_norm": 2.507702112197876, "learning_rate": 1.9889837398373986e-05, "loss": 0.3407, "step": 1355 }, { "epoch": 0.016585365853658537, "grad_norm": 2.205284833908081, "learning_rate": 1.9889430894308944e-05, "loss": 0.4989, "step": 1360 }, { "epoch": 0.016646341463414635, "grad_norm": 2.213677167892456, "learning_rate": 1.9889024390243906e-05, "loss": 0.444, "step": 1365 }, { "epoch": 0.016707317073170733, "grad_norm": 2.5684657096862793, "learning_rate": 1.9888617886178864e-05, "loss": 0.4544, "step": 1370 }, { "epoch": 0.01676829268292683, "grad_norm": 2.992661714553833, "learning_rate": 1.9888211382113822e-05, "loss": 0.37, "step": 1375 }, { "epoch": 0.016829268292682928, "grad_norm": 1.851231575012207, "learning_rate": 1.9887804878048784e-05, "loss": 0.4174, "step": 1380 }, { "epoch": 0.016890243902439026, "grad_norm": 2.229578971862793, "learning_rate": 1.9887398373983742e-05, "loss": 0.4142, "step": 1385 }, { "epoch": 0.016951219512195123, "grad_norm": 5.71028470993042, "learning_rate": 1.98869918699187e-05, "loss": 0.4548, "step": 1390 }, { "epoch": 0.01701219512195122, "grad_norm": 1.6439564228057861, "learning_rate": 1.988658536585366e-05, "loss": 0.4228, "step": 1395 }, { "epoch": 0.01707317073170732, "grad_norm": 1.8651649951934814, "learning_rate": 1.988617886178862e-05, "loss": 0.4628, "step": 1400 }, { "epoch": 0.017134146341463416, "grad_norm": 2.45046329498291, "learning_rate": 1.9885772357723578e-05, "loss": 0.4054, "step": 1405 }, { "epoch": 0.017195121951219514, "grad_norm": 1.8712360858917236, "learning_rate": 1.988536585365854e-05, "loss": 0.4827, "step": 1410 }, { "epoch": 0.01725609756097561, "grad_norm": 2.917107582092285, "learning_rate": 1.9884959349593497e-05, "loss": 0.408, "step": 1415 }, { "epoch": 0.01731707317073171, "grad_norm": 1.600818395614624, "learning_rate": 1.988455284552846e-05, "loss": 0.3954, "step": 1420 }, { "epoch": 0.017378048780487806, "grad_norm": 3.256509780883789, "learning_rate": 1.9884146341463417e-05, "loss": 0.4914, "step": 1425 }, { "epoch": 0.017439024390243904, "grad_norm": 1.9801170825958252, "learning_rate": 1.9883739837398375e-05, "loss": 0.3985, "step": 1430 }, { "epoch": 0.0175, "grad_norm": 3.3555219173431396, "learning_rate": 1.9883333333333333e-05, "loss": 0.4899, "step": 1435 }, { "epoch": 0.0175609756097561, "grad_norm": 4.2231974601745605, "learning_rate": 1.9882926829268295e-05, "loss": 0.3897, "step": 1440 }, { "epoch": 0.017621951219512193, "grad_norm": 2.3731231689453125, "learning_rate": 1.9882520325203253e-05, "loss": 0.4431, "step": 1445 }, { "epoch": 0.01768292682926829, "grad_norm": 1.5018653869628906, "learning_rate": 1.9882113821138214e-05, "loss": 0.44, "step": 1450 }, { "epoch": 0.01774390243902439, "grad_norm": 2.1633551120758057, "learning_rate": 1.9881707317073173e-05, "loss": 0.5669, "step": 1455 }, { "epoch": 0.017804878048780486, "grad_norm": 3.0822927951812744, "learning_rate": 1.988130081300813e-05, "loss": 0.4058, "step": 1460 }, { "epoch": 0.017865853658536584, "grad_norm": 1.861893653869629, "learning_rate": 1.9880894308943092e-05, "loss": 0.3949, "step": 1465 }, { "epoch": 0.01792682926829268, "grad_norm": 1.6868135929107666, "learning_rate": 1.988048780487805e-05, "loss": 0.3484, "step": 1470 }, { "epoch": 0.01798780487804878, "grad_norm": 1.9727963209152222, "learning_rate": 1.988008130081301e-05, "loss": 0.3736, "step": 1475 }, { "epoch": 0.018048780487804877, "grad_norm": 2.3601901531219482, "learning_rate": 1.987967479674797e-05, "loss": 0.4342, "step": 1480 }, { "epoch": 0.018109756097560974, "grad_norm": 2.3962764739990234, "learning_rate": 1.9879268292682928e-05, "loss": 0.464, "step": 1485 }, { "epoch": 0.018170731707317072, "grad_norm": 2.2687833309173584, "learning_rate": 1.9878861788617886e-05, "loss": 0.4449, "step": 1490 }, { "epoch": 0.01823170731707317, "grad_norm": 2.7495148181915283, "learning_rate": 1.9878455284552848e-05, "loss": 0.407, "step": 1495 }, { "epoch": 0.018292682926829267, "grad_norm": 2.105987787246704, "learning_rate": 1.9878048780487806e-05, "loss": 0.4131, "step": 1500 }, { "epoch": 0.018353658536585365, "grad_norm": 1.6475448608398438, "learning_rate": 1.9877642276422767e-05, "loss": 0.3513, "step": 1505 }, { "epoch": 0.018414634146341462, "grad_norm": 1.9452823400497437, "learning_rate": 1.9877235772357726e-05, "loss": 0.4439, "step": 1510 }, { "epoch": 0.01847560975609756, "grad_norm": 1.742807149887085, "learning_rate": 1.9876829268292687e-05, "loss": 0.3397, "step": 1515 }, { "epoch": 0.018536585365853658, "grad_norm": 1.4280242919921875, "learning_rate": 1.9876422764227642e-05, "loss": 0.3993, "step": 1520 }, { "epoch": 0.018597560975609755, "grad_norm": 1.641538143157959, "learning_rate": 1.9876016260162603e-05, "loss": 0.3814, "step": 1525 }, { "epoch": 0.018658536585365853, "grad_norm": 3.0214498043060303, "learning_rate": 1.987560975609756e-05, "loss": 0.4134, "step": 1530 }, { "epoch": 0.01871951219512195, "grad_norm": 1.442081093788147, "learning_rate": 1.9875203252032523e-05, "loss": 0.3392, "step": 1535 }, { "epoch": 0.018780487804878048, "grad_norm": 2.412766695022583, "learning_rate": 1.987479674796748e-05, "loss": 0.3732, "step": 1540 }, { "epoch": 0.018841463414634146, "grad_norm": 1.7611515522003174, "learning_rate": 1.9874390243902443e-05, "loss": 0.5982, "step": 1545 }, { "epoch": 0.018902439024390243, "grad_norm": 1.5610514879226685, "learning_rate": 1.98739837398374e-05, "loss": 0.407, "step": 1550 }, { "epoch": 0.01896341463414634, "grad_norm": 2.9354045391082764, "learning_rate": 1.987357723577236e-05, "loss": 0.3497, "step": 1555 }, { "epoch": 0.01902439024390244, "grad_norm": 1.96589195728302, "learning_rate": 1.9873170731707317e-05, "loss": 0.3724, "step": 1560 }, { "epoch": 0.019085365853658536, "grad_norm": 1.6364519596099854, "learning_rate": 1.987276422764228e-05, "loss": 0.3494, "step": 1565 }, { "epoch": 0.019146341463414634, "grad_norm": 3.0814321041107178, "learning_rate": 1.9872357723577237e-05, "loss": 0.4438, "step": 1570 }, { "epoch": 0.01920731707317073, "grad_norm": 2.383636474609375, "learning_rate": 1.9871951219512198e-05, "loss": 0.3765, "step": 1575 }, { "epoch": 0.01926829268292683, "grad_norm": 1.54208505153656, "learning_rate": 1.9871544715447156e-05, "loss": 0.3865, "step": 1580 }, { "epoch": 0.019329268292682927, "grad_norm": 3.2068183422088623, "learning_rate": 1.9871138211382114e-05, "loss": 0.3911, "step": 1585 }, { "epoch": 0.019390243902439024, "grad_norm": 10.811330795288086, "learning_rate": 1.9870731707317076e-05, "loss": 0.364, "step": 1590 }, { "epoch": 0.019451219512195122, "grad_norm": 1.7432459592819214, "learning_rate": 1.9870325203252034e-05, "loss": 0.3945, "step": 1595 }, { "epoch": 0.01951219512195122, "grad_norm": 2.361682176589966, "learning_rate": 1.9869918699186996e-05, "loss": 0.3627, "step": 1600 }, { "epoch": 0.019573170731707317, "grad_norm": 1.8541576862335205, "learning_rate": 1.9869512195121954e-05, "loss": 0.2983, "step": 1605 }, { "epoch": 0.019634146341463415, "grad_norm": 1.979897141456604, "learning_rate": 1.9869105691056912e-05, "loss": 0.4035, "step": 1610 }, { "epoch": 0.019695121951219512, "grad_norm": 1.8599116802215576, "learning_rate": 1.986869918699187e-05, "loss": 0.3685, "step": 1615 }, { "epoch": 0.01975609756097561, "grad_norm": 1.3437349796295166, "learning_rate": 1.986829268292683e-05, "loss": 0.3705, "step": 1620 }, { "epoch": 0.019817073170731708, "grad_norm": 2.266080141067505, "learning_rate": 1.986788617886179e-05, "loss": 0.3986, "step": 1625 }, { "epoch": 0.019878048780487805, "grad_norm": 4.637245178222656, "learning_rate": 1.986747967479675e-05, "loss": 0.4133, "step": 1630 }, { "epoch": 0.019939024390243903, "grad_norm": 1.722585916519165, "learning_rate": 1.986707317073171e-05, "loss": 0.338, "step": 1635 }, { "epoch": 0.02, "grad_norm": 3.1807026863098145, "learning_rate": 1.9866666666666667e-05, "loss": 0.4131, "step": 1640 }, { "epoch": 0.020060975609756098, "grad_norm": 1.645983099937439, "learning_rate": 1.986626016260163e-05, "loss": 0.4125, "step": 1645 }, { "epoch": 0.020121951219512196, "grad_norm": 2.1866941452026367, "learning_rate": 1.9865853658536587e-05, "loss": 0.3974, "step": 1650 }, { "epoch": 0.020182926829268293, "grad_norm": 2.782456159591675, "learning_rate": 1.9865447154471545e-05, "loss": 0.4158, "step": 1655 }, { "epoch": 0.02024390243902439, "grad_norm": 1.8909363746643066, "learning_rate": 1.9865040650406507e-05, "loss": 0.5161, "step": 1660 }, { "epoch": 0.02030487804878049, "grad_norm": 2.7323975563049316, "learning_rate": 1.9864634146341465e-05, "loss": 0.4679, "step": 1665 }, { "epoch": 0.020365853658536586, "grad_norm": 3.2016360759735107, "learning_rate": 1.9864227642276423e-05, "loss": 0.4182, "step": 1670 }, { "epoch": 0.020426829268292684, "grad_norm": 2.108607530593872, "learning_rate": 1.9863821138211384e-05, "loss": 0.3497, "step": 1675 }, { "epoch": 0.02048780487804878, "grad_norm": 2.5474777221679688, "learning_rate": 1.9863414634146343e-05, "loss": 0.3435, "step": 1680 }, { "epoch": 0.02054878048780488, "grad_norm": 1.8071507215499878, "learning_rate": 1.9863008130081304e-05, "loss": 0.3388, "step": 1685 }, { "epoch": 0.020609756097560977, "grad_norm": 1.4561892747879028, "learning_rate": 1.9862601626016262e-05, "loss": 0.3061, "step": 1690 }, { "epoch": 0.020670731707317074, "grad_norm": 2.1610546112060547, "learning_rate": 1.9862195121951224e-05, "loss": 0.3948, "step": 1695 }, { "epoch": 0.020731707317073172, "grad_norm": 2.360403299331665, "learning_rate": 1.986178861788618e-05, "loss": 0.3343, "step": 1700 }, { "epoch": 0.02079268292682927, "grad_norm": 1.969667911529541, "learning_rate": 1.986138211382114e-05, "loss": 0.3593, "step": 1705 }, { "epoch": 0.020853658536585367, "grad_norm": 2.6530375480651855, "learning_rate": 1.9860975609756098e-05, "loss": 0.4013, "step": 1710 }, { "epoch": 0.020914634146341465, "grad_norm": 1.928605079650879, "learning_rate": 1.986056910569106e-05, "loss": 0.3513, "step": 1715 }, { "epoch": 0.020975609756097562, "grad_norm": 1.4742770195007324, "learning_rate": 1.9860162601626018e-05, "loss": 0.3213, "step": 1720 }, { "epoch": 0.02103658536585366, "grad_norm": 1.243407964706421, "learning_rate": 1.985975609756098e-05, "loss": 0.4165, "step": 1725 }, { "epoch": 0.021097560975609757, "grad_norm": 1.6812034845352173, "learning_rate": 1.9859349593495937e-05, "loss": 0.328, "step": 1730 }, { "epoch": 0.021158536585365855, "grad_norm": 1.6528271436691284, "learning_rate": 1.9858943089430895e-05, "loss": 0.3054, "step": 1735 }, { "epoch": 0.021219512195121953, "grad_norm": 3.5766243934631348, "learning_rate": 1.9858536585365854e-05, "loss": 0.3979, "step": 1740 }, { "epoch": 0.02128048780487805, "grad_norm": 1.7069156169891357, "learning_rate": 1.9858130081300815e-05, "loss": 0.3902, "step": 1745 }, { "epoch": 0.021341463414634148, "grad_norm": 3.01639461517334, "learning_rate": 1.9857723577235773e-05, "loss": 0.367, "step": 1750 }, { "epoch": 0.021402439024390246, "grad_norm": 2.3239822387695312, "learning_rate": 1.9857317073170735e-05, "loss": 0.3942, "step": 1755 }, { "epoch": 0.021463414634146343, "grad_norm": 1.6038520336151123, "learning_rate": 1.9856910569105693e-05, "loss": 0.3815, "step": 1760 }, { "epoch": 0.021524390243902437, "grad_norm": 3.625142812728882, "learning_rate": 1.985650406504065e-05, "loss": 0.4363, "step": 1765 }, { "epoch": 0.021585365853658535, "grad_norm": 2.1731176376342773, "learning_rate": 1.9856097560975613e-05, "loss": 0.3196, "step": 1770 }, { "epoch": 0.021646341463414633, "grad_norm": 2.0734498500823975, "learning_rate": 1.985569105691057e-05, "loss": 0.2973, "step": 1775 }, { "epoch": 0.02170731707317073, "grad_norm": 2.262763500213623, "learning_rate": 1.9855284552845532e-05, "loss": 0.4341, "step": 1780 }, { "epoch": 0.021768292682926828, "grad_norm": 1.8896268606185913, "learning_rate": 1.985487804878049e-05, "loss": 0.3684, "step": 1785 }, { "epoch": 0.021829268292682925, "grad_norm": 2.3700459003448486, "learning_rate": 1.985447154471545e-05, "loss": 0.2865, "step": 1790 }, { "epoch": 0.021890243902439023, "grad_norm": 14.19785213470459, "learning_rate": 1.9854065040650407e-05, "loss": 0.3951, "step": 1795 }, { "epoch": 0.02195121951219512, "grad_norm": 2.068814754486084, "learning_rate": 1.9853658536585368e-05, "loss": 0.4013, "step": 1800 }, { "epoch": 0.022012195121951218, "grad_norm": 2.4629013538360596, "learning_rate": 1.9853252032520326e-05, "loss": 0.4361, "step": 1805 }, { "epoch": 0.022073170731707316, "grad_norm": 2.108382225036621, "learning_rate": 1.9852845528455288e-05, "loss": 0.2905, "step": 1810 }, { "epoch": 0.022134146341463413, "grad_norm": 1.681330919265747, "learning_rate": 1.9852439024390246e-05, "loss": 0.3719, "step": 1815 }, { "epoch": 0.02219512195121951, "grad_norm": 4.605878829956055, "learning_rate": 1.9852032520325204e-05, "loss": 0.3423, "step": 1820 }, { "epoch": 0.02225609756097561, "grad_norm": 1.451392412185669, "learning_rate": 1.9851626016260162e-05, "loss": 0.3445, "step": 1825 }, { "epoch": 0.022317073170731706, "grad_norm": 2.2115678787231445, "learning_rate": 1.9851219512195124e-05, "loss": 0.3966, "step": 1830 }, { "epoch": 0.022378048780487804, "grad_norm": 1.6269118785858154, "learning_rate": 1.9850813008130082e-05, "loss": 0.3564, "step": 1835 }, { "epoch": 0.0224390243902439, "grad_norm": 2.250580072402954, "learning_rate": 1.9850406504065043e-05, "loss": 0.3099, "step": 1840 }, { "epoch": 0.0225, "grad_norm": 1.3357957601547241, "learning_rate": 1.985e-05, "loss": 0.3322, "step": 1845 }, { "epoch": 0.022560975609756097, "grad_norm": 1.4749305248260498, "learning_rate": 1.984959349593496e-05, "loss": 0.2834, "step": 1850 }, { "epoch": 0.022621951219512194, "grad_norm": 2.1616954803466797, "learning_rate": 1.984918699186992e-05, "loss": 0.3352, "step": 1855 }, { "epoch": 0.022682926829268292, "grad_norm": 2.0848028659820557, "learning_rate": 1.984878048780488e-05, "loss": 0.2629, "step": 1860 }, { "epoch": 0.02274390243902439, "grad_norm": 2.4697282314300537, "learning_rate": 1.984837398373984e-05, "loss": 0.317, "step": 1865 }, { "epoch": 0.022804878048780487, "grad_norm": 2.783734083175659, "learning_rate": 1.98479674796748e-05, "loss": 0.3544, "step": 1870 }, { "epoch": 0.022865853658536585, "grad_norm": 1.5979297161102295, "learning_rate": 1.984756097560976e-05, "loss": 0.3179, "step": 1875 }, { "epoch": 0.022926829268292682, "grad_norm": 1.89378023147583, "learning_rate": 1.9847154471544715e-05, "loss": 0.407, "step": 1880 }, { "epoch": 0.02298780487804878, "grad_norm": 1.8795245885849, "learning_rate": 1.9846747967479677e-05, "loss": 0.4134, "step": 1885 }, { "epoch": 0.023048780487804878, "grad_norm": 1.6853944063186646, "learning_rate": 1.9846341463414635e-05, "loss": 0.426, "step": 1890 }, { "epoch": 0.023109756097560975, "grad_norm": 2.3962371349334717, "learning_rate": 1.9845934959349596e-05, "loss": 0.3599, "step": 1895 }, { "epoch": 0.023170731707317073, "grad_norm": 1.4993773698806763, "learning_rate": 1.9845528455284554e-05, "loss": 0.3652, "step": 1900 }, { "epoch": 0.02323170731707317, "grad_norm": 1.892323613166809, "learning_rate": 1.9845121951219516e-05, "loss": 0.3994, "step": 1905 }, { "epoch": 0.023292682926829268, "grad_norm": 2.0542051792144775, "learning_rate": 1.9844715447154474e-05, "loss": 0.3639, "step": 1910 }, { "epoch": 0.023353658536585366, "grad_norm": 2.236755132675171, "learning_rate": 1.9844308943089432e-05, "loss": 0.3729, "step": 1915 }, { "epoch": 0.023414634146341463, "grad_norm": 2.4409749507904053, "learning_rate": 1.984390243902439e-05, "loss": 0.4149, "step": 1920 }, { "epoch": 0.02347560975609756, "grad_norm": 2.0810697078704834, "learning_rate": 1.9843495934959352e-05, "loss": 0.3368, "step": 1925 }, { "epoch": 0.02353658536585366, "grad_norm": 1.7911003828048706, "learning_rate": 1.984308943089431e-05, "loss": 0.3173, "step": 1930 }, { "epoch": 0.023597560975609756, "grad_norm": 1.4505670070648193, "learning_rate": 1.984268292682927e-05, "loss": 0.342, "step": 1935 }, { "epoch": 0.023658536585365854, "grad_norm": 1.8778597116470337, "learning_rate": 1.984227642276423e-05, "loss": 0.3778, "step": 1940 }, { "epoch": 0.02371951219512195, "grad_norm": 2.0050060749053955, "learning_rate": 1.9841869918699188e-05, "loss": 0.3726, "step": 1945 }, { "epoch": 0.02378048780487805, "grad_norm": 1.74635648727417, "learning_rate": 1.984146341463415e-05, "loss": 0.3686, "step": 1950 }, { "epoch": 0.023841463414634147, "grad_norm": 3.2065083980560303, "learning_rate": 1.9841056910569107e-05, "loss": 0.348, "step": 1955 }, { "epoch": 0.023902439024390244, "grad_norm": 1.7669296264648438, "learning_rate": 1.984065040650407e-05, "loss": 0.3937, "step": 1960 }, { "epoch": 0.023963414634146342, "grad_norm": 1.447015404701233, "learning_rate": 1.9840243902439027e-05, "loss": 0.3241, "step": 1965 }, { "epoch": 0.02402439024390244, "grad_norm": 2.673109531402588, "learning_rate": 1.9839837398373985e-05, "loss": 0.413, "step": 1970 }, { "epoch": 0.024085365853658537, "grad_norm": 1.5264790058135986, "learning_rate": 1.9839430894308943e-05, "loss": 0.273, "step": 1975 }, { "epoch": 0.024146341463414635, "grad_norm": 1.6571930646896362, "learning_rate": 1.9839024390243905e-05, "loss": 0.3115, "step": 1980 }, { "epoch": 0.024207317073170732, "grad_norm": 1.4265893697738647, "learning_rate": 1.9838617886178863e-05, "loss": 0.3242, "step": 1985 }, { "epoch": 0.02426829268292683, "grad_norm": 1.1129344701766968, "learning_rate": 1.9838211382113824e-05, "loss": 0.4004, "step": 1990 }, { "epoch": 0.024329268292682928, "grad_norm": 1.1281957626342773, "learning_rate": 1.9837804878048782e-05, "loss": 0.3508, "step": 1995 }, { "epoch": 0.024390243902439025, "grad_norm": 1.8074418306350708, "learning_rate": 1.983739837398374e-05, "loss": 0.3752, "step": 2000 }, { "epoch": 0.024451219512195123, "grad_norm": 1.3287034034729004, "learning_rate": 1.98369918699187e-05, "loss": 0.3302, "step": 2005 }, { "epoch": 0.02451219512195122, "grad_norm": 1.6599137783050537, "learning_rate": 1.983658536585366e-05, "loss": 0.3146, "step": 2010 }, { "epoch": 0.024573170731707318, "grad_norm": 1.3645389080047607, "learning_rate": 1.983617886178862e-05, "loss": 0.3578, "step": 2015 }, { "epoch": 0.024634146341463416, "grad_norm": 1.622273325920105, "learning_rate": 1.983577235772358e-05, "loss": 0.3382, "step": 2020 }, { "epoch": 0.024695121951219513, "grad_norm": 1.6372417211532593, "learning_rate": 1.9835365853658538e-05, "loss": 0.2807, "step": 2025 }, { "epoch": 0.02475609756097561, "grad_norm": 1.42567777633667, "learning_rate": 1.9834959349593496e-05, "loss": 0.3794, "step": 2030 }, { "epoch": 0.02481707317073171, "grad_norm": 2.4755823612213135, "learning_rate": 1.9834552845528458e-05, "loss": 0.3995, "step": 2035 }, { "epoch": 0.024878048780487806, "grad_norm": 1.4393030405044556, "learning_rate": 1.9834146341463416e-05, "loss": 0.3375, "step": 2040 }, { "epoch": 0.024939024390243904, "grad_norm": 1.2968720197677612, "learning_rate": 1.9833739837398377e-05, "loss": 0.3145, "step": 2045 }, { "epoch": 0.025, "grad_norm": 1.808491587638855, "learning_rate": 1.9833333333333335e-05, "loss": 0.3154, "step": 2050 }, { "epoch": 0.0250609756097561, "grad_norm": 1.457837462425232, "learning_rate": 1.9832926829268297e-05, "loss": 0.3146, "step": 2055 }, { "epoch": 0.025121951219512197, "grad_norm": 1.749190092086792, "learning_rate": 1.983252032520325e-05, "loss": 0.3253, "step": 2060 }, { "epoch": 0.025182926829268294, "grad_norm": 1.508781909942627, "learning_rate": 1.9832113821138213e-05, "loss": 0.3008, "step": 2065 }, { "epoch": 0.025243902439024392, "grad_norm": 1.7403297424316406, "learning_rate": 1.983170731707317e-05, "loss": 0.3864, "step": 2070 }, { "epoch": 0.02530487804878049, "grad_norm": 11.557790756225586, "learning_rate": 1.9831300813008133e-05, "loss": 0.3944, "step": 2075 }, { "epoch": 0.025365853658536587, "grad_norm": 2.543271064758301, "learning_rate": 1.983089430894309e-05, "loss": 0.3014, "step": 2080 }, { "epoch": 0.02542682926829268, "grad_norm": 1.620843529701233, "learning_rate": 1.9830487804878052e-05, "loss": 0.3549, "step": 2085 }, { "epoch": 0.02548780487804878, "grad_norm": 1.4496667385101318, "learning_rate": 1.9830081300813007e-05, "loss": 0.3387, "step": 2090 }, { "epoch": 0.025548780487804876, "grad_norm": 2.5268394947052, "learning_rate": 1.982967479674797e-05, "loss": 0.3568, "step": 2095 }, { "epoch": 0.025609756097560974, "grad_norm": 1.300079345703125, "learning_rate": 1.9829268292682927e-05, "loss": 0.3462, "step": 2100 }, { "epoch": 0.02567073170731707, "grad_norm": 1.648132562637329, "learning_rate": 1.982886178861789e-05, "loss": 0.274, "step": 2105 }, { "epoch": 0.02573170731707317, "grad_norm": 1.3018583059310913, "learning_rate": 1.9828455284552847e-05, "loss": 0.3145, "step": 2110 }, { "epoch": 0.025792682926829267, "grad_norm": 1.6943669319152832, "learning_rate": 1.9828048780487808e-05, "loss": 0.2905, "step": 2115 }, { "epoch": 0.025853658536585365, "grad_norm": 1.8372935056686401, "learning_rate": 1.9827642276422766e-05, "loss": 0.3606, "step": 2120 }, { "epoch": 0.025914634146341462, "grad_norm": 2.335698127746582, "learning_rate": 1.9827235772357724e-05, "loss": 0.3722, "step": 2125 }, { "epoch": 0.02597560975609756, "grad_norm": 1.1661771535873413, "learning_rate": 1.9826829268292686e-05, "loss": 0.3654, "step": 2130 }, { "epoch": 0.026036585365853657, "grad_norm": 2.2837696075439453, "learning_rate": 1.9826422764227644e-05, "loss": 0.3042, "step": 2135 }, { "epoch": 0.026097560975609755, "grad_norm": 2.1103086471557617, "learning_rate": 1.9826016260162605e-05, "loss": 0.3276, "step": 2140 }, { "epoch": 0.026158536585365853, "grad_norm": 2.372265338897705, "learning_rate": 1.9825609756097564e-05, "loss": 0.3233, "step": 2145 }, { "epoch": 0.02621951219512195, "grad_norm": 1.550199031829834, "learning_rate": 1.9825203252032522e-05, "loss": 0.3059, "step": 2150 }, { "epoch": 0.026280487804878048, "grad_norm": 2.341503381729126, "learning_rate": 1.982479674796748e-05, "loss": 0.3578, "step": 2155 }, { "epoch": 0.026341463414634145, "grad_norm": 1.1967891454696655, "learning_rate": 1.982439024390244e-05, "loss": 0.3176, "step": 2160 }, { "epoch": 0.026402439024390243, "grad_norm": 2.1364452838897705, "learning_rate": 1.98239837398374e-05, "loss": 0.2861, "step": 2165 }, { "epoch": 0.02646341463414634, "grad_norm": 3.065868377685547, "learning_rate": 1.982357723577236e-05, "loss": 0.3496, "step": 2170 }, { "epoch": 0.02652439024390244, "grad_norm": 2.088289260864258, "learning_rate": 1.982317073170732e-05, "loss": 0.3666, "step": 2175 }, { "epoch": 0.026585365853658536, "grad_norm": 1.4491729736328125, "learning_rate": 1.9822764227642277e-05, "loss": 0.3267, "step": 2180 }, { "epoch": 0.026646341463414634, "grad_norm": 1.7704631090164185, "learning_rate": 1.9822357723577235e-05, "loss": 0.3315, "step": 2185 }, { "epoch": 0.02670731707317073, "grad_norm": 1.639953374862671, "learning_rate": 1.9821951219512197e-05, "loss": 0.3246, "step": 2190 }, { "epoch": 0.02676829268292683, "grad_norm": 1.4317275285720825, "learning_rate": 1.9821544715447155e-05, "loss": 0.2716, "step": 2195 }, { "epoch": 0.026829268292682926, "grad_norm": 6.066577911376953, "learning_rate": 1.9821138211382117e-05, "loss": 0.3046, "step": 2200 }, { "epoch": 0.026890243902439024, "grad_norm": 1.7193670272827148, "learning_rate": 1.9820731707317075e-05, "loss": 0.2914, "step": 2205 }, { "epoch": 0.02695121951219512, "grad_norm": 1.738088846206665, "learning_rate": 1.9820325203252033e-05, "loss": 0.3634, "step": 2210 }, { "epoch": 0.02701219512195122, "grad_norm": 2.273528575897217, "learning_rate": 1.9819918699186994e-05, "loss": 0.2735, "step": 2215 }, { "epoch": 0.027073170731707317, "grad_norm": 1.868272304534912, "learning_rate": 1.9819512195121952e-05, "loss": 0.2713, "step": 2220 }, { "epoch": 0.027134146341463414, "grad_norm": 2.1973233222961426, "learning_rate": 1.9819105691056914e-05, "loss": 0.3427, "step": 2225 }, { "epoch": 0.027195121951219512, "grad_norm": 1.4685455560684204, "learning_rate": 1.9818699186991872e-05, "loss": 0.2791, "step": 2230 }, { "epoch": 0.02725609756097561, "grad_norm": 2.4450340270996094, "learning_rate": 1.981829268292683e-05, "loss": 0.3275, "step": 2235 }, { "epoch": 0.027317073170731707, "grad_norm": 1.709215760231018, "learning_rate": 1.981788617886179e-05, "loss": 0.3416, "step": 2240 }, { "epoch": 0.027378048780487805, "grad_norm": 5.408648490905762, "learning_rate": 1.981747967479675e-05, "loss": 0.3059, "step": 2245 }, { "epoch": 0.027439024390243903, "grad_norm": 1.4838064908981323, "learning_rate": 1.9817073170731708e-05, "loss": 0.3549, "step": 2250 }, { "epoch": 0.0275, "grad_norm": 1.3638285398483276, "learning_rate": 1.981666666666667e-05, "loss": 0.2999, "step": 2255 }, { "epoch": 0.027560975609756098, "grad_norm": 2.3058745861053467, "learning_rate": 1.9816260162601628e-05, "loss": 0.3038, "step": 2260 }, { "epoch": 0.027621951219512195, "grad_norm": 1.852726697921753, "learning_rate": 1.981585365853659e-05, "loss": 0.2615, "step": 2265 }, { "epoch": 0.027682926829268293, "grad_norm": 1.5893528461456299, "learning_rate": 1.9815447154471544e-05, "loss": 0.3181, "step": 2270 }, { "epoch": 0.02774390243902439, "grad_norm": 1.6254867315292358, "learning_rate": 1.9815040650406505e-05, "loss": 0.3544, "step": 2275 }, { "epoch": 0.027804878048780488, "grad_norm": 1.8871536254882812, "learning_rate": 1.9814634146341464e-05, "loss": 0.3609, "step": 2280 }, { "epoch": 0.027865853658536586, "grad_norm": 1.5980405807495117, "learning_rate": 1.9814227642276425e-05, "loss": 0.3277, "step": 2285 }, { "epoch": 0.027926829268292683, "grad_norm": 1.2380614280700684, "learning_rate": 1.9813821138211383e-05, "loss": 0.3779, "step": 2290 }, { "epoch": 0.02798780487804878, "grad_norm": 1.7316542863845825, "learning_rate": 1.9813414634146345e-05, "loss": 0.2869, "step": 2295 }, { "epoch": 0.02804878048780488, "grad_norm": 1.6094461679458618, "learning_rate": 1.9813008130081303e-05, "loss": 0.2996, "step": 2300 }, { "epoch": 0.028109756097560976, "grad_norm": 2.0489702224731445, "learning_rate": 1.981260162601626e-05, "loss": 0.3213, "step": 2305 }, { "epoch": 0.028170731707317074, "grad_norm": 2.507256269454956, "learning_rate": 1.9812195121951222e-05, "loss": 0.3181, "step": 2310 }, { "epoch": 0.02823170731707317, "grad_norm": 1.538557529449463, "learning_rate": 1.981178861788618e-05, "loss": 0.3062, "step": 2315 }, { "epoch": 0.02829268292682927, "grad_norm": 1.7040328979492188, "learning_rate": 1.9811382113821142e-05, "loss": 0.3387, "step": 2320 }, { "epoch": 0.028353658536585367, "grad_norm": 2.6938652992248535, "learning_rate": 1.98109756097561e-05, "loss": 0.3923, "step": 2325 }, { "epoch": 0.028414634146341464, "grad_norm": 1.5834853649139404, "learning_rate": 1.981056910569106e-05, "loss": 0.2691, "step": 2330 }, { "epoch": 0.028475609756097562, "grad_norm": 1.6365586519241333, "learning_rate": 1.9810162601626016e-05, "loss": 0.353, "step": 2335 }, { "epoch": 0.02853658536585366, "grad_norm": 1.4382035732269287, "learning_rate": 1.9809756097560978e-05, "loss": 0.3905, "step": 2340 }, { "epoch": 0.028597560975609757, "grad_norm": 1.6478017568588257, "learning_rate": 1.9809349593495936e-05, "loss": 0.2781, "step": 2345 }, { "epoch": 0.028658536585365855, "grad_norm": 1.7809884548187256, "learning_rate": 1.9808943089430898e-05, "loss": 0.2936, "step": 2350 }, { "epoch": 0.028719512195121952, "grad_norm": 2.1464486122131348, "learning_rate": 1.9808536585365856e-05, "loss": 0.3781, "step": 2355 }, { "epoch": 0.02878048780487805, "grad_norm": 1.562485694885254, "learning_rate": 1.9808130081300814e-05, "loss": 0.2935, "step": 2360 }, { "epoch": 0.028841463414634148, "grad_norm": 1.4212300777435303, "learning_rate": 1.9807723577235772e-05, "loss": 0.301, "step": 2365 }, { "epoch": 0.028902439024390245, "grad_norm": 1.975572109222412, "learning_rate": 1.9807317073170734e-05, "loss": 0.3017, "step": 2370 }, { "epoch": 0.028963414634146343, "grad_norm": 1.7812938690185547, "learning_rate": 1.980691056910569e-05, "loss": 0.3095, "step": 2375 }, { "epoch": 0.02902439024390244, "grad_norm": 1.2330487966537476, "learning_rate": 1.9806504065040653e-05, "loss": 0.2802, "step": 2380 }, { "epoch": 0.029085365853658538, "grad_norm": 1.5055296421051025, "learning_rate": 1.980609756097561e-05, "loss": 0.3299, "step": 2385 }, { "epoch": 0.029146341463414636, "grad_norm": 1.535119652748108, "learning_rate": 1.980569105691057e-05, "loss": 0.3185, "step": 2390 }, { "epoch": 0.029207317073170733, "grad_norm": 2.1185593605041504, "learning_rate": 1.980528455284553e-05, "loss": 0.3115, "step": 2395 }, { "epoch": 0.02926829268292683, "grad_norm": 3.2208468914031982, "learning_rate": 1.980487804878049e-05, "loss": 0.3389, "step": 2400 }, { "epoch": 0.029329268292682925, "grad_norm": 2.5524203777313232, "learning_rate": 1.980447154471545e-05, "loss": 0.3599, "step": 2405 }, { "epoch": 0.029390243902439023, "grad_norm": 3.5883874893188477, "learning_rate": 1.980406504065041e-05, "loss": 0.3244, "step": 2410 }, { "epoch": 0.02945121951219512, "grad_norm": 2.370281934738159, "learning_rate": 1.9803658536585367e-05, "loss": 0.2849, "step": 2415 }, { "epoch": 0.029512195121951218, "grad_norm": 2.2606961727142334, "learning_rate": 1.9803252032520325e-05, "loss": 0.3241, "step": 2420 }, { "epoch": 0.029573170731707316, "grad_norm": 3.410653829574585, "learning_rate": 1.9802845528455286e-05, "loss": 0.3749, "step": 2425 }, { "epoch": 0.029634146341463413, "grad_norm": 1.2450709342956543, "learning_rate": 1.9802439024390245e-05, "loss": 0.3153, "step": 2430 }, { "epoch": 0.02969512195121951, "grad_norm": 1.8989064693450928, "learning_rate": 1.9802032520325206e-05, "loss": 0.3513, "step": 2435 }, { "epoch": 0.02975609756097561, "grad_norm": 2.1146047115325928, "learning_rate": 1.9801626016260164e-05, "loss": 0.333, "step": 2440 }, { "epoch": 0.029817073170731706, "grad_norm": 1.1622655391693115, "learning_rate": 1.9801219512195126e-05, "loss": 0.2921, "step": 2445 }, { "epoch": 0.029878048780487804, "grad_norm": 2.142533302307129, "learning_rate": 1.980081300813008e-05, "loss": 0.2589, "step": 2450 }, { "epoch": 0.0299390243902439, "grad_norm": 1.401563048362732, "learning_rate": 1.9800406504065042e-05, "loss": 0.2917, "step": 2455 }, { "epoch": 0.03, "grad_norm": 2.1572766304016113, "learning_rate": 1.98e-05, "loss": 0.2797, "step": 2460 }, { "epoch": 0.030060975609756097, "grad_norm": 1.452361822128296, "learning_rate": 1.979959349593496e-05, "loss": 0.3006, "step": 2465 }, { "epoch": 0.030121951219512194, "grad_norm": 0.9127364158630371, "learning_rate": 1.979918699186992e-05, "loss": 0.2707, "step": 2470 }, { "epoch": 0.030182926829268292, "grad_norm": 1.6633731126785278, "learning_rate": 1.979878048780488e-05, "loss": 0.3396, "step": 2475 }, { "epoch": 0.03024390243902439, "grad_norm": 1.7718820571899414, "learning_rate": 1.979837398373984e-05, "loss": 0.3421, "step": 2480 }, { "epoch": 0.030304878048780487, "grad_norm": 1.482239842414856, "learning_rate": 1.9797967479674798e-05, "loss": 0.2481, "step": 2485 }, { "epoch": 0.030365853658536585, "grad_norm": 2.1266353130340576, "learning_rate": 1.979756097560976e-05, "loss": 0.3268, "step": 2490 }, { "epoch": 0.030426829268292682, "grad_norm": 4.3146071434021, "learning_rate": 1.9797154471544717e-05, "loss": 0.2997, "step": 2495 }, { "epoch": 0.03048780487804878, "grad_norm": 1.3198400735855103, "learning_rate": 1.9796747967479675e-05, "loss": 0.3153, "step": 2500 }, { "epoch": 0.030548780487804877, "grad_norm": 1.4832568168640137, "learning_rate": 1.9796341463414637e-05, "loss": 0.2704, "step": 2505 }, { "epoch": 0.030609756097560975, "grad_norm": 1.4266021251678467, "learning_rate": 1.9795934959349595e-05, "loss": 0.3216, "step": 2510 }, { "epoch": 0.030670731707317073, "grad_norm": 1.7110681533813477, "learning_rate": 1.9795528455284553e-05, "loss": 0.3362, "step": 2515 }, { "epoch": 0.03073170731707317, "grad_norm": 1.9088811874389648, "learning_rate": 1.9795121951219515e-05, "loss": 0.3047, "step": 2520 }, { "epoch": 0.030792682926829268, "grad_norm": 1.7321326732635498, "learning_rate": 1.9794715447154473e-05, "loss": 0.2573, "step": 2525 }, { "epoch": 0.030853658536585366, "grad_norm": 2.1622049808502197, "learning_rate": 1.9794308943089434e-05, "loss": 0.2661, "step": 2530 }, { "epoch": 0.030914634146341463, "grad_norm": 1.2591677904129028, "learning_rate": 1.9793902439024392e-05, "loss": 0.2583, "step": 2535 }, { "epoch": 0.03097560975609756, "grad_norm": 1.3078575134277344, "learning_rate": 1.979349593495935e-05, "loss": 0.2366, "step": 2540 }, { "epoch": 0.03103658536585366, "grad_norm": 1.5237821340560913, "learning_rate": 1.979308943089431e-05, "loss": 0.2632, "step": 2545 }, { "epoch": 0.031097560975609756, "grad_norm": 3.916215181350708, "learning_rate": 1.979268292682927e-05, "loss": 0.3386, "step": 2550 }, { "epoch": 0.031158536585365854, "grad_norm": 1.4086676836013794, "learning_rate": 1.979227642276423e-05, "loss": 0.2684, "step": 2555 }, { "epoch": 0.03121951219512195, "grad_norm": 1.42960524559021, "learning_rate": 1.979186991869919e-05, "loss": 0.2553, "step": 2560 }, { "epoch": 0.03128048780487805, "grad_norm": 1.6510446071624756, "learning_rate": 1.9791463414634148e-05, "loss": 0.2985, "step": 2565 }, { "epoch": 0.031341463414634146, "grad_norm": 1.4211738109588623, "learning_rate": 1.9791056910569106e-05, "loss": 0.3536, "step": 2570 }, { "epoch": 0.031402439024390244, "grad_norm": 1.399908423423767, "learning_rate": 1.9790650406504068e-05, "loss": 0.3061, "step": 2575 }, { "epoch": 0.03146341463414634, "grad_norm": 1.5986515283584595, "learning_rate": 1.9790243902439026e-05, "loss": 0.3009, "step": 2580 }, { "epoch": 0.03152439024390244, "grad_norm": 1.1322470903396606, "learning_rate": 1.9789837398373984e-05, "loss": 0.3197, "step": 2585 }, { "epoch": 0.03158536585365854, "grad_norm": 1.499232530593872, "learning_rate": 1.9789430894308945e-05, "loss": 0.36, "step": 2590 }, { "epoch": 0.031646341463414634, "grad_norm": 1.628829836845398, "learning_rate": 1.9789024390243903e-05, "loss": 0.278, "step": 2595 }, { "epoch": 0.03170731707317073, "grad_norm": 2.43593430519104, "learning_rate": 1.978861788617886e-05, "loss": 0.2837, "step": 2600 }, { "epoch": 0.03176829268292683, "grad_norm": 3.040083885192871, "learning_rate": 1.9788211382113823e-05, "loss": 0.3477, "step": 2605 }, { "epoch": 0.03182926829268293, "grad_norm": 1.4534801244735718, "learning_rate": 1.978780487804878e-05, "loss": 0.3012, "step": 2610 }, { "epoch": 0.031890243902439025, "grad_norm": 1.4995661973953247, "learning_rate": 1.9787398373983743e-05, "loss": 0.3459, "step": 2615 }, { "epoch": 0.03195121951219512, "grad_norm": 1.5923829078674316, "learning_rate": 1.97869918699187e-05, "loss": 0.3444, "step": 2620 }, { "epoch": 0.03201219512195122, "grad_norm": 1.1140943765640259, "learning_rate": 1.9786585365853662e-05, "loss": 0.3909, "step": 2625 }, { "epoch": 0.03207317073170732, "grad_norm": 1.5082253217697144, "learning_rate": 1.9786178861788617e-05, "loss": 0.3663, "step": 2630 }, { "epoch": 0.032134146341463415, "grad_norm": 1.1744698286056519, "learning_rate": 1.978577235772358e-05, "loss": 0.2878, "step": 2635 }, { "epoch": 0.03219512195121951, "grad_norm": 2.507054567337036, "learning_rate": 1.9785365853658537e-05, "loss": 0.3025, "step": 2640 }, { "epoch": 0.03225609756097561, "grad_norm": 4.527769088745117, "learning_rate": 1.97849593495935e-05, "loss": 0.335, "step": 2645 }, { "epoch": 0.03231707317073171, "grad_norm": 1.8743221759796143, "learning_rate": 1.9784552845528456e-05, "loss": 0.2679, "step": 2650 }, { "epoch": 0.032378048780487806, "grad_norm": 1.0681841373443604, "learning_rate": 1.9784146341463418e-05, "loss": 0.2734, "step": 2655 }, { "epoch": 0.032439024390243903, "grad_norm": 1.1152361631393433, "learning_rate": 1.9783739837398376e-05, "loss": 0.2727, "step": 2660 }, { "epoch": 0.0325, "grad_norm": 2.656332492828369, "learning_rate": 1.9783333333333334e-05, "loss": 0.3281, "step": 2665 }, { "epoch": 0.0325609756097561, "grad_norm": 2.425618886947632, "learning_rate": 1.9782926829268296e-05, "loss": 0.3511, "step": 2670 }, { "epoch": 0.032621951219512196, "grad_norm": 2.0630085468292236, "learning_rate": 1.9782520325203254e-05, "loss": 0.2953, "step": 2675 }, { "epoch": 0.032682926829268294, "grad_norm": 3.6700422763824463, "learning_rate": 1.9782113821138212e-05, "loss": 0.3047, "step": 2680 }, { "epoch": 0.03274390243902439, "grad_norm": 1.2709426879882812, "learning_rate": 1.9781707317073174e-05, "loss": 0.3357, "step": 2685 }, { "epoch": 0.03280487804878049, "grad_norm": 1.311952829360962, "learning_rate": 1.978130081300813e-05, "loss": 0.2373, "step": 2690 }, { "epoch": 0.03286585365853659, "grad_norm": 1.6436504125595093, "learning_rate": 1.978089430894309e-05, "loss": 0.2783, "step": 2695 }, { "epoch": 0.032926829268292684, "grad_norm": 1.7284051179885864, "learning_rate": 1.978048780487805e-05, "loss": 0.2606, "step": 2700 }, { "epoch": 0.03298780487804878, "grad_norm": 6.002142906188965, "learning_rate": 1.978008130081301e-05, "loss": 0.3096, "step": 2705 }, { "epoch": 0.03304878048780488, "grad_norm": 1.8170934915542603, "learning_rate": 1.977967479674797e-05, "loss": 0.3179, "step": 2710 }, { "epoch": 0.03310975609756098, "grad_norm": 1.361476182937622, "learning_rate": 1.977926829268293e-05, "loss": 0.2778, "step": 2715 }, { "epoch": 0.033170731707317075, "grad_norm": 1.7197540998458862, "learning_rate": 1.9778861788617887e-05, "loss": 0.2715, "step": 2720 }, { "epoch": 0.03323170731707317, "grad_norm": 1.513893961906433, "learning_rate": 1.9778455284552845e-05, "loss": 0.2838, "step": 2725 }, { "epoch": 0.03329268292682927, "grad_norm": 1.4205135107040405, "learning_rate": 1.9778048780487807e-05, "loss": 0.2771, "step": 2730 }, { "epoch": 0.03335365853658537, "grad_norm": 4.940367221832275, "learning_rate": 1.9777642276422765e-05, "loss": 0.3087, "step": 2735 }, { "epoch": 0.033414634146341465, "grad_norm": 1.7769176959991455, "learning_rate": 1.9777235772357726e-05, "loss": 0.2991, "step": 2740 }, { "epoch": 0.03347560975609756, "grad_norm": 1.2543959617614746, "learning_rate": 1.9776829268292685e-05, "loss": 0.327, "step": 2745 }, { "epoch": 0.03353658536585366, "grad_norm": 2.279574394226074, "learning_rate": 1.9776422764227643e-05, "loss": 0.2879, "step": 2750 }, { "epoch": 0.03359756097560976, "grad_norm": 1.4638293981552124, "learning_rate": 1.9776016260162604e-05, "loss": 0.2542, "step": 2755 }, { "epoch": 0.033658536585365856, "grad_norm": 1.6284009218215942, "learning_rate": 1.9775609756097562e-05, "loss": 0.3004, "step": 2760 }, { "epoch": 0.03371951219512195, "grad_norm": 1.8734042644500732, "learning_rate": 1.977520325203252e-05, "loss": 0.3444, "step": 2765 }, { "epoch": 0.03378048780487805, "grad_norm": 1.7191905975341797, "learning_rate": 1.9774796747967482e-05, "loss": 0.2908, "step": 2770 }, { "epoch": 0.03384146341463415, "grad_norm": 2.242094039916992, "learning_rate": 1.977439024390244e-05, "loss": 0.231, "step": 2775 }, { "epoch": 0.033902439024390246, "grad_norm": 1.0014691352844238, "learning_rate": 1.9773983739837398e-05, "loss": 0.29, "step": 2780 }, { "epoch": 0.033963414634146344, "grad_norm": 1.6026345491409302, "learning_rate": 1.977357723577236e-05, "loss": 0.3446, "step": 2785 }, { "epoch": 0.03402439024390244, "grad_norm": 3.5646066665649414, "learning_rate": 1.9773170731707318e-05, "loss": 0.3071, "step": 2790 }, { "epoch": 0.03408536585365854, "grad_norm": 1.5057013034820557, "learning_rate": 1.977276422764228e-05, "loss": 0.2878, "step": 2795 }, { "epoch": 0.03414634146341464, "grad_norm": 1.7845134735107422, "learning_rate": 1.9772357723577238e-05, "loss": 0.2984, "step": 2800 }, { "epoch": 0.034207317073170734, "grad_norm": 1.5153758525848389, "learning_rate": 1.97719512195122e-05, "loss": 0.3377, "step": 2805 }, { "epoch": 0.03426829268292683, "grad_norm": 1.6396652460098267, "learning_rate": 1.9771544715447154e-05, "loss": 0.2656, "step": 2810 }, { "epoch": 0.03432926829268293, "grad_norm": 1.3204140663146973, "learning_rate": 1.9771138211382115e-05, "loss": 0.2857, "step": 2815 }, { "epoch": 0.03439024390243903, "grad_norm": 1.8700807094573975, "learning_rate": 1.9770731707317073e-05, "loss": 0.2694, "step": 2820 }, { "epoch": 0.034451219512195125, "grad_norm": 3.2667832374572754, "learning_rate": 1.9770325203252035e-05, "loss": 0.256, "step": 2825 }, { "epoch": 0.03451219512195122, "grad_norm": 2.285712480545044, "learning_rate": 1.9769918699186993e-05, "loss": 0.3603, "step": 2830 }, { "epoch": 0.03457317073170732, "grad_norm": 1.678667664527893, "learning_rate": 1.9769512195121955e-05, "loss": 0.2823, "step": 2835 }, { "epoch": 0.03463414634146342, "grad_norm": 1.2330306768417358, "learning_rate": 1.9769105691056913e-05, "loss": 0.2751, "step": 2840 }, { "epoch": 0.034695121951219515, "grad_norm": 1.6363780498504639, "learning_rate": 1.976869918699187e-05, "loss": 0.2505, "step": 2845 }, { "epoch": 0.03475609756097561, "grad_norm": 1.6633023023605347, "learning_rate": 1.976829268292683e-05, "loss": 0.3068, "step": 2850 }, { "epoch": 0.03481707317073171, "grad_norm": 1.6157491207122803, "learning_rate": 1.976788617886179e-05, "loss": 0.3759, "step": 2855 }, { "epoch": 0.03487804878048781, "grad_norm": 1.385955572128296, "learning_rate": 1.976747967479675e-05, "loss": 0.2611, "step": 2860 }, { "epoch": 0.034939024390243906, "grad_norm": 2.275216579437256, "learning_rate": 1.976707317073171e-05, "loss": 0.2608, "step": 2865 }, { "epoch": 0.035, "grad_norm": 3.5724594593048096, "learning_rate": 1.9766666666666668e-05, "loss": 0.2358, "step": 2870 }, { "epoch": 0.0350609756097561, "grad_norm": 1.5860645771026611, "learning_rate": 1.9766260162601626e-05, "loss": 0.3318, "step": 2875 }, { "epoch": 0.0351219512195122, "grad_norm": 3.8994505405426025, "learning_rate": 1.9765853658536588e-05, "loss": 0.2715, "step": 2880 }, { "epoch": 0.03518292682926829, "grad_norm": 1.4909405708312988, "learning_rate": 1.9765447154471546e-05, "loss": 0.2694, "step": 2885 }, { "epoch": 0.03524390243902439, "grad_norm": 0.9051448106765747, "learning_rate": 1.9765040650406508e-05, "loss": 0.2462, "step": 2890 }, { "epoch": 0.035304878048780484, "grad_norm": 2.0865252017974854, "learning_rate": 1.9764634146341466e-05, "loss": 0.2869, "step": 2895 }, { "epoch": 0.03536585365853658, "grad_norm": 1.95912504196167, "learning_rate": 1.9764227642276424e-05, "loss": 0.327, "step": 2900 }, { "epoch": 0.03542682926829268, "grad_norm": 1.3116114139556885, "learning_rate": 1.9763821138211382e-05, "loss": 0.275, "step": 2905 }, { "epoch": 0.03548780487804878, "grad_norm": 1.3743834495544434, "learning_rate": 1.9763414634146343e-05, "loss": 0.2559, "step": 2910 }, { "epoch": 0.035548780487804875, "grad_norm": 1.6268800497055054, "learning_rate": 1.97630081300813e-05, "loss": 0.2882, "step": 2915 }, { "epoch": 0.03560975609756097, "grad_norm": 1.6237351894378662, "learning_rate": 1.9762601626016263e-05, "loss": 0.2643, "step": 2920 }, { "epoch": 0.03567073170731707, "grad_norm": 1.3425638675689697, "learning_rate": 1.976219512195122e-05, "loss": 0.2706, "step": 2925 }, { "epoch": 0.03573170731707317, "grad_norm": 1.331849455833435, "learning_rate": 1.976178861788618e-05, "loss": 0.3016, "step": 2930 }, { "epoch": 0.035792682926829265, "grad_norm": 3.134129762649536, "learning_rate": 1.976138211382114e-05, "loss": 0.2825, "step": 2935 }, { "epoch": 0.03585365853658536, "grad_norm": 1.3585219383239746, "learning_rate": 1.97609756097561e-05, "loss": 0.3301, "step": 2940 }, { "epoch": 0.03591463414634146, "grad_norm": 1.442960262298584, "learning_rate": 1.9760569105691057e-05, "loss": 0.343, "step": 2945 }, { "epoch": 0.03597560975609756, "grad_norm": 1.4506455659866333, "learning_rate": 1.976016260162602e-05, "loss": 0.3092, "step": 2950 }, { "epoch": 0.036036585365853656, "grad_norm": 1.1241228580474854, "learning_rate": 1.9759756097560977e-05, "loss": 0.2661, "step": 2955 }, { "epoch": 0.03609756097560975, "grad_norm": 2.861788272857666, "learning_rate": 1.9759349593495935e-05, "loss": 0.3085, "step": 2960 }, { "epoch": 0.03615853658536585, "grad_norm": 1.4915471076965332, "learning_rate": 1.9758943089430896e-05, "loss": 0.2741, "step": 2965 }, { "epoch": 0.03621951219512195, "grad_norm": 1.6313985586166382, "learning_rate": 1.9758536585365855e-05, "loss": 0.3144, "step": 2970 }, { "epoch": 0.036280487804878046, "grad_norm": 1.2205578088760376, "learning_rate": 1.9758130081300816e-05, "loss": 0.3467, "step": 2975 }, { "epoch": 0.036341463414634144, "grad_norm": 2.0074028968811035, "learning_rate": 1.9757723577235774e-05, "loss": 0.2677, "step": 2980 }, { "epoch": 0.03640243902439024, "grad_norm": 1.21519136428833, "learning_rate": 1.9757317073170736e-05, "loss": 0.2724, "step": 2985 }, { "epoch": 0.03646341463414634, "grad_norm": 1.7913119792938232, "learning_rate": 1.975691056910569e-05, "loss": 0.3075, "step": 2990 }, { "epoch": 0.03652439024390244, "grad_norm": 1.8237686157226562, "learning_rate": 1.9756504065040652e-05, "loss": 0.3215, "step": 2995 }, { "epoch": 0.036585365853658534, "grad_norm": 1.4458283185958862, "learning_rate": 1.975609756097561e-05, "loss": 0.2815, "step": 3000 }, { "epoch": 0.03664634146341463, "grad_norm": 1.7522258758544922, "learning_rate": 1.975569105691057e-05, "loss": 0.2972, "step": 3005 }, { "epoch": 0.03670731707317073, "grad_norm": 1.4182950258255005, "learning_rate": 1.975528455284553e-05, "loss": 0.2849, "step": 3010 }, { "epoch": 0.03676829268292683, "grad_norm": 0.9678946733474731, "learning_rate": 1.975487804878049e-05, "loss": 0.3018, "step": 3015 }, { "epoch": 0.036829268292682925, "grad_norm": 1.2528280019760132, "learning_rate": 1.975447154471545e-05, "loss": 0.3052, "step": 3020 }, { "epoch": 0.03689024390243902, "grad_norm": 1.0233339071273804, "learning_rate": 1.9754065040650408e-05, "loss": 0.2899, "step": 3025 }, { "epoch": 0.03695121951219512, "grad_norm": 1.07277250289917, "learning_rate": 1.9753658536585366e-05, "loss": 0.2845, "step": 3030 }, { "epoch": 0.03701219512195122, "grad_norm": 1.3224774599075317, "learning_rate": 1.9753252032520327e-05, "loss": 0.25, "step": 3035 }, { "epoch": 0.037073170731707315, "grad_norm": 1.264643669128418, "learning_rate": 1.9752845528455285e-05, "loss": 0.3254, "step": 3040 }, { "epoch": 0.03713414634146341, "grad_norm": 2.1144678592681885, "learning_rate": 1.9752439024390247e-05, "loss": 0.2861, "step": 3045 }, { "epoch": 0.03719512195121951, "grad_norm": 1.816957712173462, "learning_rate": 1.9752032520325205e-05, "loss": 0.3088, "step": 3050 }, { "epoch": 0.03725609756097561, "grad_norm": 1.3869894742965698, "learning_rate": 1.9751626016260163e-05, "loss": 0.356, "step": 3055 }, { "epoch": 0.037317073170731706, "grad_norm": 4.532183647155762, "learning_rate": 1.9751219512195125e-05, "loss": 0.3031, "step": 3060 }, { "epoch": 0.0373780487804878, "grad_norm": 1.620263695716858, "learning_rate": 1.9750813008130083e-05, "loss": 0.2464, "step": 3065 }, { "epoch": 0.0374390243902439, "grad_norm": 1.227912187576294, "learning_rate": 1.9750406504065044e-05, "loss": 0.32, "step": 3070 }, { "epoch": 0.0375, "grad_norm": 0.9838119745254517, "learning_rate": 1.9750000000000002e-05, "loss": 0.2588, "step": 3075 }, { "epoch": 0.037560975609756096, "grad_norm": 2.407285213470459, "learning_rate": 1.974959349593496e-05, "loss": 0.2255, "step": 3080 }, { "epoch": 0.037621951219512194, "grad_norm": 2.9411561489105225, "learning_rate": 1.974918699186992e-05, "loss": 0.3056, "step": 3085 }, { "epoch": 0.03768292682926829, "grad_norm": 1.6509194374084473, "learning_rate": 1.974878048780488e-05, "loss": 0.2931, "step": 3090 }, { "epoch": 0.03774390243902439, "grad_norm": 1.3063223361968994, "learning_rate": 1.9748373983739838e-05, "loss": 0.2703, "step": 3095 }, { "epoch": 0.03780487804878049, "grad_norm": 2.2693569660186768, "learning_rate": 1.97479674796748e-05, "loss": 0.3387, "step": 3100 }, { "epoch": 0.037865853658536584, "grad_norm": 1.0873849391937256, "learning_rate": 1.9747560975609758e-05, "loss": 0.307, "step": 3105 }, { "epoch": 0.03792682926829268, "grad_norm": 1.8359909057617188, "learning_rate": 1.9747154471544716e-05, "loss": 0.2572, "step": 3110 }, { "epoch": 0.03798780487804878, "grad_norm": 1.561880111694336, "learning_rate": 1.9746747967479674e-05, "loss": 0.2695, "step": 3115 }, { "epoch": 0.03804878048780488, "grad_norm": 2.1435844898223877, "learning_rate": 1.9746341463414636e-05, "loss": 0.293, "step": 3120 }, { "epoch": 0.038109756097560975, "grad_norm": 0.9059951305389404, "learning_rate": 1.9745934959349594e-05, "loss": 0.2909, "step": 3125 }, { "epoch": 0.03817073170731707, "grad_norm": 2.1007184982299805, "learning_rate": 1.9745528455284555e-05, "loss": 0.2698, "step": 3130 }, { "epoch": 0.03823170731707317, "grad_norm": 1.6851527690887451, "learning_rate": 1.9745121951219513e-05, "loss": 0.2613, "step": 3135 }, { "epoch": 0.03829268292682927, "grad_norm": 1.3700697422027588, "learning_rate": 1.974471544715447e-05, "loss": 0.38, "step": 3140 }, { "epoch": 0.038353658536585365, "grad_norm": 1.8010820150375366, "learning_rate": 1.9744308943089433e-05, "loss": 0.2526, "step": 3145 }, { "epoch": 0.03841463414634146, "grad_norm": 1.7411085367202759, "learning_rate": 1.974390243902439e-05, "loss": 0.2959, "step": 3150 }, { "epoch": 0.03847560975609756, "grad_norm": 1.767323613166809, "learning_rate": 1.9743495934959353e-05, "loss": 0.249, "step": 3155 }, { "epoch": 0.03853658536585366, "grad_norm": 1.2130095958709717, "learning_rate": 1.974308943089431e-05, "loss": 0.2518, "step": 3160 }, { "epoch": 0.038597560975609756, "grad_norm": 3.3544414043426514, "learning_rate": 1.9742682926829272e-05, "loss": 0.2868, "step": 3165 }, { "epoch": 0.03865853658536585, "grad_norm": 0.9615225791931152, "learning_rate": 1.9742276422764227e-05, "loss": 0.2812, "step": 3170 }, { "epoch": 0.03871951219512195, "grad_norm": 1.8398113250732422, "learning_rate": 1.974186991869919e-05, "loss": 0.3099, "step": 3175 }, { "epoch": 0.03878048780487805, "grad_norm": 2.554046154022217, "learning_rate": 1.9741463414634147e-05, "loss": 0.2789, "step": 3180 }, { "epoch": 0.038841463414634146, "grad_norm": 1.8626985549926758, "learning_rate": 1.9741056910569108e-05, "loss": 0.2812, "step": 3185 }, { "epoch": 0.038902439024390244, "grad_norm": 1.5064995288848877, "learning_rate": 1.9740650406504066e-05, "loss": 0.3203, "step": 3190 }, { "epoch": 0.03896341463414634, "grad_norm": 1.147133708000183, "learning_rate": 1.9740243902439028e-05, "loss": 0.2873, "step": 3195 }, { "epoch": 0.03902439024390244, "grad_norm": 1.4998210668563843, "learning_rate": 1.9739837398373986e-05, "loss": 0.3125, "step": 3200 }, { "epoch": 0.03908536585365854, "grad_norm": 2.022559642791748, "learning_rate": 1.9739430894308944e-05, "loss": 0.2827, "step": 3205 }, { "epoch": 0.039146341463414634, "grad_norm": 1.4542697668075562, "learning_rate": 1.9739024390243902e-05, "loss": 0.2473, "step": 3210 }, { "epoch": 0.03920731707317073, "grad_norm": 1.4227858781814575, "learning_rate": 1.9738617886178864e-05, "loss": 0.2754, "step": 3215 }, { "epoch": 0.03926829268292683, "grad_norm": 1.3878239393234253, "learning_rate": 1.9738211382113822e-05, "loss": 0.2364, "step": 3220 }, { "epoch": 0.03932926829268293, "grad_norm": 1.171003818511963, "learning_rate": 1.9737804878048783e-05, "loss": 0.2228, "step": 3225 }, { "epoch": 0.039390243902439025, "grad_norm": 1.4294453859329224, "learning_rate": 1.973739837398374e-05, "loss": 0.2092, "step": 3230 }, { "epoch": 0.03945121951219512, "grad_norm": 0.9315080642700195, "learning_rate": 1.97369918699187e-05, "loss": 0.2635, "step": 3235 }, { "epoch": 0.03951219512195122, "grad_norm": 2.4545018672943115, "learning_rate": 1.973658536585366e-05, "loss": 0.3045, "step": 3240 }, { "epoch": 0.03957317073170732, "grad_norm": 2.147418260574341, "learning_rate": 1.973617886178862e-05, "loss": 0.2815, "step": 3245 }, { "epoch": 0.039634146341463415, "grad_norm": 1.1806811094284058, "learning_rate": 1.973577235772358e-05, "loss": 0.2343, "step": 3250 }, { "epoch": 0.03969512195121951, "grad_norm": 1.3423949480056763, "learning_rate": 1.973536585365854e-05, "loss": 0.2598, "step": 3255 }, { "epoch": 0.03975609756097561, "grad_norm": 1.7948455810546875, "learning_rate": 1.9734959349593497e-05, "loss": 0.2911, "step": 3260 }, { "epoch": 0.03981707317073171, "grad_norm": 1.3791828155517578, "learning_rate": 1.9734552845528455e-05, "loss": 0.324, "step": 3265 }, { "epoch": 0.039878048780487806, "grad_norm": 1.2547540664672852, "learning_rate": 1.9734146341463417e-05, "loss": 0.2477, "step": 3270 }, { "epoch": 0.0399390243902439, "grad_norm": 1.4244766235351562, "learning_rate": 1.9733739837398375e-05, "loss": 0.2246, "step": 3275 }, { "epoch": 0.04, "grad_norm": 1.8609158992767334, "learning_rate": 1.9733333333333336e-05, "loss": 0.2301, "step": 3280 }, { "epoch": 0.0400609756097561, "grad_norm": 5.611292839050293, "learning_rate": 1.9732926829268295e-05, "loss": 0.2765, "step": 3285 }, { "epoch": 0.040121951219512196, "grad_norm": 1.127443790435791, "learning_rate": 1.9732520325203253e-05, "loss": 0.2674, "step": 3290 }, { "epoch": 0.040182926829268294, "grad_norm": 2.0358400344848633, "learning_rate": 1.973211382113821e-05, "loss": 0.2274, "step": 3295 }, { "epoch": 0.04024390243902439, "grad_norm": 0.9661023616790771, "learning_rate": 1.9731707317073172e-05, "loss": 0.245, "step": 3300 }, { "epoch": 0.04030487804878049, "grad_norm": 2.2361299991607666, "learning_rate": 1.973130081300813e-05, "loss": 0.2771, "step": 3305 }, { "epoch": 0.040365853658536587, "grad_norm": 1.266716480255127, "learning_rate": 1.9730894308943092e-05, "loss": 0.2769, "step": 3310 }, { "epoch": 0.040426829268292684, "grad_norm": 1.9091328382492065, "learning_rate": 1.973048780487805e-05, "loss": 0.2825, "step": 3315 }, { "epoch": 0.04048780487804878, "grad_norm": 1.0559918880462646, "learning_rate": 1.9730081300813008e-05, "loss": 0.2401, "step": 3320 }, { "epoch": 0.04054878048780488, "grad_norm": 1.0325344800949097, "learning_rate": 1.972967479674797e-05, "loss": 0.2518, "step": 3325 }, { "epoch": 0.04060975609756098, "grad_norm": 0.9880408644676208, "learning_rate": 1.9729268292682928e-05, "loss": 0.3201, "step": 3330 }, { "epoch": 0.040670731707317075, "grad_norm": 2.034712076187134, "learning_rate": 1.972886178861789e-05, "loss": 0.3138, "step": 3335 }, { "epoch": 0.04073170731707317, "grad_norm": 1.4015060663223267, "learning_rate": 1.9728455284552847e-05, "loss": 0.2293, "step": 3340 }, { "epoch": 0.04079268292682927, "grad_norm": 1.7228964567184448, "learning_rate": 1.972804878048781e-05, "loss": 0.2236, "step": 3345 }, { "epoch": 0.04085365853658537, "grad_norm": 1.9399306774139404, "learning_rate": 1.9727642276422764e-05, "loss": 0.2393, "step": 3350 }, { "epoch": 0.040914634146341465, "grad_norm": 1.507493257522583, "learning_rate": 1.9727235772357725e-05, "loss": 0.2777, "step": 3355 }, { "epoch": 0.04097560975609756, "grad_norm": 1.5948997735977173, "learning_rate": 1.9726829268292683e-05, "loss": 0.2602, "step": 3360 }, { "epoch": 0.04103658536585366, "grad_norm": 1.6067414283752441, "learning_rate": 1.9726422764227645e-05, "loss": 0.2688, "step": 3365 }, { "epoch": 0.04109756097560976, "grad_norm": 1.3751131296157837, "learning_rate": 1.9726016260162603e-05, "loss": 0.2336, "step": 3370 }, { "epoch": 0.041158536585365856, "grad_norm": 1.0930860042572021, "learning_rate": 1.9725609756097565e-05, "loss": 0.2265, "step": 3375 }, { "epoch": 0.04121951219512195, "grad_norm": 1.461969017982483, "learning_rate": 1.972520325203252e-05, "loss": 0.2523, "step": 3380 }, { "epoch": 0.04128048780487805, "grad_norm": 1.911274790763855, "learning_rate": 1.972479674796748e-05, "loss": 0.2293, "step": 3385 }, { "epoch": 0.04134146341463415, "grad_norm": 1.336201548576355, "learning_rate": 1.972439024390244e-05, "loss": 0.2559, "step": 3390 }, { "epoch": 0.041402439024390246, "grad_norm": 1.1260591745376587, "learning_rate": 1.97239837398374e-05, "loss": 0.2646, "step": 3395 }, { "epoch": 0.041463414634146344, "grad_norm": 1.205989956855774, "learning_rate": 1.972357723577236e-05, "loss": 0.2867, "step": 3400 }, { "epoch": 0.04152439024390244, "grad_norm": 2.1550018787384033, "learning_rate": 1.972317073170732e-05, "loss": 0.2499, "step": 3405 }, { "epoch": 0.04158536585365854, "grad_norm": 1.8069754838943481, "learning_rate": 1.9722764227642278e-05, "loss": 0.2287, "step": 3410 }, { "epoch": 0.041646341463414636, "grad_norm": 1.4509470462799072, "learning_rate": 1.9722357723577236e-05, "loss": 0.1972, "step": 3415 }, { "epoch": 0.041707317073170734, "grad_norm": 2.1158673763275146, "learning_rate": 1.9721951219512198e-05, "loss": 0.32, "step": 3420 }, { "epoch": 0.04176829268292683, "grad_norm": 1.3500005006790161, "learning_rate": 1.9721544715447156e-05, "loss": 0.2297, "step": 3425 }, { "epoch": 0.04182926829268293, "grad_norm": 2.415308952331543, "learning_rate": 1.9721138211382117e-05, "loss": 0.2603, "step": 3430 }, { "epoch": 0.04189024390243903, "grad_norm": 1.6149944067001343, "learning_rate": 1.9720731707317076e-05, "loss": 0.2714, "step": 3435 }, { "epoch": 0.041951219512195125, "grad_norm": 1.713051438331604, "learning_rate": 1.9720325203252034e-05, "loss": 0.2594, "step": 3440 }, { "epoch": 0.04201219512195122, "grad_norm": 2.102789878845215, "learning_rate": 1.9719918699186992e-05, "loss": 0.2261, "step": 3445 }, { "epoch": 0.04207317073170732, "grad_norm": 1.8485240936279297, "learning_rate": 1.9719512195121953e-05, "loss": 0.2578, "step": 3450 }, { "epoch": 0.04213414634146342, "grad_norm": 2.148484468460083, "learning_rate": 1.971910569105691e-05, "loss": 0.2711, "step": 3455 }, { "epoch": 0.042195121951219515, "grad_norm": 1.2901194095611572, "learning_rate": 1.9718699186991873e-05, "loss": 0.2327, "step": 3460 }, { "epoch": 0.04225609756097561, "grad_norm": 1.1411415338516235, "learning_rate": 1.971829268292683e-05, "loss": 0.2122, "step": 3465 }, { "epoch": 0.04231707317073171, "grad_norm": 1.5819401741027832, "learning_rate": 1.971788617886179e-05, "loss": 0.2909, "step": 3470 }, { "epoch": 0.04237804878048781, "grad_norm": 3.1454555988311768, "learning_rate": 1.9717479674796747e-05, "loss": 0.2927, "step": 3475 }, { "epoch": 0.042439024390243905, "grad_norm": 2.555638313293457, "learning_rate": 1.971707317073171e-05, "loss": 0.2634, "step": 3480 }, { "epoch": 0.0425, "grad_norm": 3.7042527198791504, "learning_rate": 1.9716666666666667e-05, "loss": 0.2722, "step": 3485 }, { "epoch": 0.0425609756097561, "grad_norm": 2.436293840408325, "learning_rate": 1.971626016260163e-05, "loss": 0.501, "step": 3490 }, { "epoch": 0.0426219512195122, "grad_norm": 1.3810029029846191, "learning_rate": 1.9715853658536587e-05, "loss": 0.2122, "step": 3495 }, { "epoch": 0.042682926829268296, "grad_norm": 2.0478670597076416, "learning_rate": 1.9715447154471545e-05, "loss": 0.2316, "step": 3500 }, { "epoch": 0.042743902439024394, "grad_norm": 3.1900289058685303, "learning_rate": 1.9715040650406506e-05, "loss": 0.2951, "step": 3505 }, { "epoch": 0.04280487804878049, "grad_norm": 1.764256477355957, "learning_rate": 1.9714634146341464e-05, "loss": 0.2561, "step": 3510 }, { "epoch": 0.04286585365853659, "grad_norm": 0.9600162506103516, "learning_rate": 1.9714227642276426e-05, "loss": 0.1956, "step": 3515 }, { "epoch": 0.042926829268292686, "grad_norm": 1.3479878902435303, "learning_rate": 1.9713821138211384e-05, "loss": 0.3086, "step": 3520 }, { "epoch": 0.04298780487804878, "grad_norm": 1.1767523288726807, "learning_rate": 1.9713414634146342e-05, "loss": 0.2481, "step": 3525 }, { "epoch": 0.043048780487804875, "grad_norm": 1.7927237749099731, "learning_rate": 1.97130081300813e-05, "loss": 0.2253, "step": 3530 }, { "epoch": 0.04310975609756097, "grad_norm": 1.243427038192749, "learning_rate": 1.9712601626016262e-05, "loss": 0.2226, "step": 3535 }, { "epoch": 0.04317073170731707, "grad_norm": 1.6017086505889893, "learning_rate": 1.971219512195122e-05, "loss": 0.3114, "step": 3540 }, { "epoch": 0.04323170731707317, "grad_norm": 1.0531001091003418, "learning_rate": 1.971178861788618e-05, "loss": 0.2217, "step": 3545 }, { "epoch": 0.043292682926829265, "grad_norm": 1.2158304452896118, "learning_rate": 1.971138211382114e-05, "loss": 0.3575, "step": 3550 }, { "epoch": 0.04335365853658536, "grad_norm": 1.8965427875518799, "learning_rate": 1.97109756097561e-05, "loss": 0.2326, "step": 3555 }, { "epoch": 0.04341463414634146, "grad_norm": 1.931167483329773, "learning_rate": 1.9710569105691056e-05, "loss": 0.2559, "step": 3560 }, { "epoch": 0.04347560975609756, "grad_norm": 1.6638990640640259, "learning_rate": 1.9710162601626017e-05, "loss": 0.2547, "step": 3565 }, { "epoch": 0.043536585365853656, "grad_norm": 1.630695104598999, "learning_rate": 1.9709756097560976e-05, "loss": 0.2032, "step": 3570 }, { "epoch": 0.04359756097560975, "grad_norm": 1.507279396057129, "learning_rate": 1.9709349593495937e-05, "loss": 0.2444, "step": 3575 }, { "epoch": 0.04365853658536585, "grad_norm": 1.9826780557632446, "learning_rate": 1.9708943089430895e-05, "loss": 0.2582, "step": 3580 }, { "epoch": 0.04371951219512195, "grad_norm": 1.2071077823638916, "learning_rate": 1.9708536585365857e-05, "loss": 0.2109, "step": 3585 }, { "epoch": 0.043780487804878046, "grad_norm": 1.3901991844177246, "learning_rate": 1.9708130081300815e-05, "loss": 0.2264, "step": 3590 }, { "epoch": 0.043841463414634144, "grad_norm": 1.1973110437393188, "learning_rate": 1.9707723577235773e-05, "loss": 0.2373, "step": 3595 }, { "epoch": 0.04390243902439024, "grad_norm": 2.518024206161499, "learning_rate": 1.9707317073170734e-05, "loss": 0.2687, "step": 3600 }, { "epoch": 0.04396341463414634, "grad_norm": 1.1205296516418457, "learning_rate": 1.9706910569105693e-05, "loss": 0.2616, "step": 3605 }, { "epoch": 0.044024390243902436, "grad_norm": 2.0777392387390137, "learning_rate": 1.9706504065040654e-05, "loss": 0.2852, "step": 3610 }, { "epoch": 0.044085365853658534, "grad_norm": 1.0991566181182861, "learning_rate": 1.9706097560975612e-05, "loss": 0.2433, "step": 3615 }, { "epoch": 0.04414634146341463, "grad_norm": 1.1478859186172485, "learning_rate": 1.970569105691057e-05, "loss": 0.2866, "step": 3620 }, { "epoch": 0.04420731707317073, "grad_norm": 2.2310431003570557, "learning_rate": 1.970528455284553e-05, "loss": 0.2406, "step": 3625 }, { "epoch": 0.04426829268292683, "grad_norm": 0.966279923915863, "learning_rate": 1.970487804878049e-05, "loss": 0.2719, "step": 3630 }, { "epoch": 0.044329268292682925, "grad_norm": 1.413814902305603, "learning_rate": 1.9704471544715448e-05, "loss": 0.2997, "step": 3635 }, { "epoch": 0.04439024390243902, "grad_norm": 2.1250040531158447, "learning_rate": 1.970406504065041e-05, "loss": 0.2675, "step": 3640 }, { "epoch": 0.04445121951219512, "grad_norm": 1.04999840259552, "learning_rate": 1.9703658536585368e-05, "loss": 0.2856, "step": 3645 }, { "epoch": 0.04451219512195122, "grad_norm": 1.263055682182312, "learning_rate": 1.9703252032520326e-05, "loss": 0.276, "step": 3650 }, { "epoch": 0.044573170731707315, "grad_norm": 1.3512661457061768, "learning_rate": 1.9702845528455284e-05, "loss": 0.2734, "step": 3655 }, { "epoch": 0.04463414634146341, "grad_norm": 1.4468586444854736, "learning_rate": 1.9702439024390246e-05, "loss": 0.1778, "step": 3660 }, { "epoch": 0.04469512195121951, "grad_norm": 0.9187471866607666, "learning_rate": 1.9702032520325204e-05, "loss": 0.2477, "step": 3665 }, { "epoch": 0.04475609756097561, "grad_norm": 1.4598948955535889, "learning_rate": 1.9701626016260165e-05, "loss": 0.314, "step": 3670 }, { "epoch": 0.044817073170731705, "grad_norm": 1.3803448677062988, "learning_rate": 1.9701219512195123e-05, "loss": 0.3245, "step": 3675 }, { "epoch": 0.0448780487804878, "grad_norm": 2.1724603176116943, "learning_rate": 1.970081300813008e-05, "loss": 0.2776, "step": 3680 }, { "epoch": 0.0449390243902439, "grad_norm": 1.3231399059295654, "learning_rate": 1.9700406504065043e-05, "loss": 0.254, "step": 3685 }, { "epoch": 0.045, "grad_norm": 1.120874047279358, "learning_rate": 1.97e-05, "loss": 0.2856, "step": 3690 }, { "epoch": 0.045060975609756096, "grad_norm": 1.1869999170303345, "learning_rate": 1.9699593495934963e-05, "loss": 0.4817, "step": 3695 }, { "epoch": 0.045121951219512194, "grad_norm": 1.0812528133392334, "learning_rate": 1.969918699186992e-05, "loss": 0.2005, "step": 3700 }, { "epoch": 0.04518292682926829, "grad_norm": 1.3757773637771606, "learning_rate": 1.969878048780488e-05, "loss": 0.2544, "step": 3705 }, { "epoch": 0.04524390243902439, "grad_norm": 2.12109112739563, "learning_rate": 1.9698373983739837e-05, "loss": 0.2664, "step": 3710 }, { "epoch": 0.045304878048780486, "grad_norm": 1.315524697303772, "learning_rate": 1.96979674796748e-05, "loss": 0.2201, "step": 3715 }, { "epoch": 0.045365853658536584, "grad_norm": 0.9302523136138916, "learning_rate": 1.9697560975609757e-05, "loss": 0.1939, "step": 3720 }, { "epoch": 0.04542682926829268, "grad_norm": 1.6037238836288452, "learning_rate": 1.9697154471544718e-05, "loss": 0.328, "step": 3725 }, { "epoch": 0.04548780487804878, "grad_norm": 1.296765685081482, "learning_rate": 1.9696747967479676e-05, "loss": 0.2621, "step": 3730 }, { "epoch": 0.04554878048780488, "grad_norm": 1.0164659023284912, "learning_rate": 1.9696341463414638e-05, "loss": 0.2406, "step": 3735 }, { "epoch": 0.045609756097560974, "grad_norm": 1.3567476272583008, "learning_rate": 1.9695934959349593e-05, "loss": 0.2246, "step": 3740 }, { "epoch": 0.04567073170731707, "grad_norm": 1.3615574836730957, "learning_rate": 1.9695528455284554e-05, "loss": 0.2302, "step": 3745 }, { "epoch": 0.04573170731707317, "grad_norm": 1.453104853630066, "learning_rate": 1.9695121951219512e-05, "loss": 0.3939, "step": 3750 }, { "epoch": 0.04579268292682927, "grad_norm": 1.4083442687988281, "learning_rate": 1.9694715447154474e-05, "loss": 0.2403, "step": 3755 }, { "epoch": 0.045853658536585365, "grad_norm": 2.1544687747955322, "learning_rate": 1.9694308943089432e-05, "loss": 0.2647, "step": 3760 }, { "epoch": 0.04591463414634146, "grad_norm": 1.4145537614822388, "learning_rate": 1.9693902439024393e-05, "loss": 0.2137, "step": 3765 }, { "epoch": 0.04597560975609756, "grad_norm": 1.6664584875106812, "learning_rate": 1.969349593495935e-05, "loss": 0.2801, "step": 3770 }, { "epoch": 0.04603658536585366, "grad_norm": 0.9981615543365479, "learning_rate": 1.969308943089431e-05, "loss": 0.2592, "step": 3775 }, { "epoch": 0.046097560975609755, "grad_norm": 1.158081293106079, "learning_rate": 1.969268292682927e-05, "loss": 0.3104, "step": 3780 }, { "epoch": 0.04615853658536585, "grad_norm": 1.2803317308425903, "learning_rate": 1.969227642276423e-05, "loss": 0.2617, "step": 3785 }, { "epoch": 0.04621951219512195, "grad_norm": 0.8333441019058228, "learning_rate": 1.9691869918699187e-05, "loss": 0.2138, "step": 3790 }, { "epoch": 0.04628048780487805, "grad_norm": 1.4562721252441406, "learning_rate": 1.969146341463415e-05, "loss": 0.236, "step": 3795 }, { "epoch": 0.046341463414634146, "grad_norm": 1.1905906200408936, "learning_rate": 1.9691056910569107e-05, "loss": 0.2502, "step": 3800 }, { "epoch": 0.046402439024390243, "grad_norm": 2.0177271366119385, "learning_rate": 1.9690650406504065e-05, "loss": 0.2939, "step": 3805 }, { "epoch": 0.04646341463414634, "grad_norm": 2.867096424102783, "learning_rate": 1.9690243902439027e-05, "loss": 0.2324, "step": 3810 }, { "epoch": 0.04652439024390244, "grad_norm": 1.1952217817306519, "learning_rate": 1.9689837398373985e-05, "loss": 0.2387, "step": 3815 }, { "epoch": 0.046585365853658536, "grad_norm": 1.4758615493774414, "learning_rate": 1.9689430894308946e-05, "loss": 0.2634, "step": 3820 }, { "epoch": 0.046646341463414634, "grad_norm": 1.2616688013076782, "learning_rate": 1.9689024390243904e-05, "loss": 0.2043, "step": 3825 }, { "epoch": 0.04670731707317073, "grad_norm": 1.7249786853790283, "learning_rate": 1.9688617886178863e-05, "loss": 0.2536, "step": 3830 }, { "epoch": 0.04676829268292683, "grad_norm": 1.629881501197815, "learning_rate": 1.968821138211382e-05, "loss": 0.2755, "step": 3835 }, { "epoch": 0.04682926829268293, "grad_norm": 1.0923540592193604, "learning_rate": 1.9687804878048782e-05, "loss": 0.2092, "step": 3840 }, { "epoch": 0.046890243902439024, "grad_norm": 1.4167195558547974, "learning_rate": 1.968739837398374e-05, "loss": 0.2222, "step": 3845 }, { "epoch": 0.04695121951219512, "grad_norm": 1.5079596042633057, "learning_rate": 1.9686991869918702e-05, "loss": 0.2453, "step": 3850 }, { "epoch": 0.04701219512195122, "grad_norm": 0.8960543870925903, "learning_rate": 1.968658536585366e-05, "loss": 0.2334, "step": 3855 }, { "epoch": 0.04707317073170732, "grad_norm": 1.3866350650787354, "learning_rate": 1.9686178861788618e-05, "loss": 0.2521, "step": 3860 }, { "epoch": 0.047134146341463415, "grad_norm": 1.4428964853286743, "learning_rate": 1.968577235772358e-05, "loss": 0.25, "step": 3865 }, { "epoch": 0.04719512195121951, "grad_norm": 1.4500162601470947, "learning_rate": 1.9685365853658538e-05, "loss": 0.217, "step": 3870 }, { "epoch": 0.04725609756097561, "grad_norm": 1.1383851766586304, "learning_rate": 1.96849593495935e-05, "loss": 0.1831, "step": 3875 }, { "epoch": 0.04731707317073171, "grad_norm": 2.2308623790740967, "learning_rate": 1.9684552845528457e-05, "loss": 0.2657, "step": 3880 }, { "epoch": 0.047378048780487805, "grad_norm": 1.0216810703277588, "learning_rate": 1.9684146341463416e-05, "loss": 0.2275, "step": 3885 }, { "epoch": 0.0474390243902439, "grad_norm": 2.5956320762634277, "learning_rate": 1.9683739837398374e-05, "loss": 0.2676, "step": 3890 }, { "epoch": 0.0475, "grad_norm": 1.2947747707366943, "learning_rate": 1.9683333333333335e-05, "loss": 0.2453, "step": 3895 }, { "epoch": 0.0475609756097561, "grad_norm": 2.5522515773773193, "learning_rate": 1.9682926829268293e-05, "loss": 0.2795, "step": 3900 }, { "epoch": 0.047621951219512196, "grad_norm": 3.503042697906494, "learning_rate": 1.9682520325203255e-05, "loss": 0.1931, "step": 3905 }, { "epoch": 0.04768292682926829, "grad_norm": 1.2764548063278198, "learning_rate": 1.9682113821138213e-05, "loss": 0.2313, "step": 3910 }, { "epoch": 0.04774390243902439, "grad_norm": 2.6084980964660645, "learning_rate": 1.9681707317073174e-05, "loss": 0.2775, "step": 3915 }, { "epoch": 0.04780487804878049, "grad_norm": 1.3260385990142822, "learning_rate": 1.968130081300813e-05, "loss": 0.216, "step": 3920 }, { "epoch": 0.047865853658536586, "grad_norm": 2.3357560634613037, "learning_rate": 1.968089430894309e-05, "loss": 0.2332, "step": 3925 }, { "epoch": 0.047926829268292684, "grad_norm": 1.0547586679458618, "learning_rate": 1.968048780487805e-05, "loss": 0.2625, "step": 3930 }, { "epoch": 0.04798780487804878, "grad_norm": 1.3243651390075684, "learning_rate": 1.968008130081301e-05, "loss": 0.259, "step": 3935 }, { "epoch": 0.04804878048780488, "grad_norm": 0.7277045249938965, "learning_rate": 1.967967479674797e-05, "loss": 0.2365, "step": 3940 }, { "epoch": 0.04810975609756098, "grad_norm": 1.790700078010559, "learning_rate": 1.967926829268293e-05, "loss": 0.2489, "step": 3945 }, { "epoch": 0.048170731707317074, "grad_norm": 2.472426176071167, "learning_rate": 1.9678861788617888e-05, "loss": 0.2334, "step": 3950 }, { "epoch": 0.04823170731707317, "grad_norm": 1.5822761058807373, "learning_rate": 1.9678455284552846e-05, "loss": 0.2795, "step": 3955 }, { "epoch": 0.04829268292682927, "grad_norm": 1.8574228286743164, "learning_rate": 1.9678048780487808e-05, "loss": 0.2478, "step": 3960 }, { "epoch": 0.04835365853658537, "grad_norm": 1.128359079360962, "learning_rate": 1.9677642276422766e-05, "loss": 0.2966, "step": 3965 }, { "epoch": 0.048414634146341465, "grad_norm": 1.8196905851364136, "learning_rate": 1.9677235772357724e-05, "loss": 0.2192, "step": 3970 }, { "epoch": 0.04847560975609756, "grad_norm": 1.94816255569458, "learning_rate": 1.9676829268292686e-05, "loss": 0.2312, "step": 3975 }, { "epoch": 0.04853658536585366, "grad_norm": 1.6553144454956055, "learning_rate": 1.9676422764227644e-05, "loss": 0.2158, "step": 3980 }, { "epoch": 0.04859756097560976, "grad_norm": 1.2650960683822632, "learning_rate": 1.9676016260162602e-05, "loss": 0.3012, "step": 3985 }, { "epoch": 0.048658536585365855, "grad_norm": 4.039161682128906, "learning_rate": 1.9675609756097563e-05, "loss": 0.2075, "step": 3990 }, { "epoch": 0.04871951219512195, "grad_norm": 1.3966519832611084, "learning_rate": 1.967520325203252e-05, "loss": 0.2637, "step": 3995 }, { "epoch": 0.04878048780487805, "grad_norm": 3.476414680480957, "learning_rate": 1.9674796747967483e-05, "loss": 0.2265, "step": 4000 }, { "epoch": 0.04884146341463415, "grad_norm": 1.1875008344650269, "learning_rate": 1.967439024390244e-05, "loss": 0.2541, "step": 4005 }, { "epoch": 0.048902439024390246, "grad_norm": 0.8507391214370728, "learning_rate": 1.96739837398374e-05, "loss": 0.2413, "step": 4010 }, { "epoch": 0.04896341463414634, "grad_norm": 1.579936146736145, "learning_rate": 1.9673577235772357e-05, "loss": 0.2473, "step": 4015 }, { "epoch": 0.04902439024390244, "grad_norm": 1.2900090217590332, "learning_rate": 1.967317073170732e-05, "loss": 0.257, "step": 4020 }, { "epoch": 0.04908536585365854, "grad_norm": 1.1765594482421875, "learning_rate": 1.9672764227642277e-05, "loss": 0.2485, "step": 4025 }, { "epoch": 0.049146341463414636, "grad_norm": 1.5569514036178589, "learning_rate": 1.967235772357724e-05, "loss": 0.2122, "step": 4030 }, { "epoch": 0.049207317073170734, "grad_norm": 2.3575613498687744, "learning_rate": 1.9671951219512197e-05, "loss": 0.2031, "step": 4035 }, { "epoch": 0.04926829268292683, "grad_norm": 0.9878997802734375, "learning_rate": 1.9671544715447155e-05, "loss": 0.2233, "step": 4040 }, { "epoch": 0.04932926829268293, "grad_norm": 1.3134490251541138, "learning_rate": 1.9671138211382116e-05, "loss": 0.2442, "step": 4045 }, { "epoch": 0.04939024390243903, "grad_norm": 1.826820969581604, "learning_rate": 1.9670731707317074e-05, "loss": 0.2699, "step": 4050 }, { "epoch": 0.049451219512195124, "grad_norm": 0.8810872435569763, "learning_rate": 1.9670325203252033e-05, "loss": 0.2372, "step": 4055 }, { "epoch": 0.04951219512195122, "grad_norm": 1.3949689865112305, "learning_rate": 1.9669918699186994e-05, "loss": 0.2381, "step": 4060 }, { "epoch": 0.04957317073170732, "grad_norm": 1.9489678144454956, "learning_rate": 1.9669512195121952e-05, "loss": 0.3164, "step": 4065 }, { "epoch": 0.04963414634146342, "grad_norm": 1.0418457984924316, "learning_rate": 1.966910569105691e-05, "loss": 0.3229, "step": 4070 }, { "epoch": 0.049695121951219515, "grad_norm": 1.572194218635559, "learning_rate": 1.9668699186991872e-05, "loss": 0.2341, "step": 4075 }, { "epoch": 0.04975609756097561, "grad_norm": 1.2245063781738281, "learning_rate": 1.966829268292683e-05, "loss": 0.2314, "step": 4080 }, { "epoch": 0.04981707317073171, "grad_norm": 1.0986366271972656, "learning_rate": 1.966788617886179e-05, "loss": 0.2193, "step": 4085 }, { "epoch": 0.04987804878048781, "grad_norm": 2.4941835403442383, "learning_rate": 1.966747967479675e-05, "loss": 0.2795, "step": 4090 }, { "epoch": 0.049939024390243905, "grad_norm": 1.3527085781097412, "learning_rate": 1.966707317073171e-05, "loss": 0.2305, "step": 4095 }, { "epoch": 0.05, "grad_norm": 1.9200513362884521, "learning_rate": 1.9666666666666666e-05, "loss": 0.256, "step": 4100 }, { "epoch": 0.0500609756097561, "grad_norm": 6.465672016143799, "learning_rate": 1.9666260162601627e-05, "loss": 0.27, "step": 4105 }, { "epoch": 0.0501219512195122, "grad_norm": 1.2102750539779663, "learning_rate": 1.9665853658536585e-05, "loss": 0.2395, "step": 4110 }, { "epoch": 0.050182926829268296, "grad_norm": 2.0634782314300537, "learning_rate": 1.9665447154471547e-05, "loss": 0.2655, "step": 4115 }, { "epoch": 0.05024390243902439, "grad_norm": 1.032180666923523, "learning_rate": 1.9665040650406505e-05, "loss": 0.2339, "step": 4120 }, { "epoch": 0.05030487804878049, "grad_norm": 1.4350422620773315, "learning_rate": 1.9664634146341467e-05, "loss": 0.2141, "step": 4125 }, { "epoch": 0.05036585365853659, "grad_norm": 0.9343020915985107, "learning_rate": 1.9664227642276425e-05, "loss": 0.265, "step": 4130 }, { "epoch": 0.050426829268292686, "grad_norm": 1.3290778398513794, "learning_rate": 1.9663821138211383e-05, "loss": 0.2133, "step": 4135 }, { "epoch": 0.050487804878048784, "grad_norm": 1.7133724689483643, "learning_rate": 1.9663414634146344e-05, "loss": 0.2724, "step": 4140 }, { "epoch": 0.05054878048780488, "grad_norm": 1.5100756883621216, "learning_rate": 1.9663008130081303e-05, "loss": 0.1839, "step": 4145 }, { "epoch": 0.05060975609756098, "grad_norm": 1.8340717554092407, "learning_rate": 1.966260162601626e-05, "loss": 0.2694, "step": 4150 }, { "epoch": 0.05067073170731708, "grad_norm": 2.531048536300659, "learning_rate": 1.9662195121951222e-05, "loss": 0.3919, "step": 4155 }, { "epoch": 0.050731707317073174, "grad_norm": 1.6162669658660889, "learning_rate": 1.966178861788618e-05, "loss": 0.291, "step": 4160 }, { "epoch": 0.050792682926829265, "grad_norm": 1.6503560543060303, "learning_rate": 1.966138211382114e-05, "loss": 0.2659, "step": 4165 }, { "epoch": 0.05085365853658536, "grad_norm": 2.7219290733337402, "learning_rate": 1.96609756097561e-05, "loss": 0.3324, "step": 4170 }, { "epoch": 0.05091463414634146, "grad_norm": 1.6249839067459106, "learning_rate": 1.9660569105691058e-05, "loss": 0.2108, "step": 4175 }, { "epoch": 0.05097560975609756, "grad_norm": 1.2155476808547974, "learning_rate": 1.966016260162602e-05, "loss": 0.237, "step": 4180 }, { "epoch": 0.051036585365853655, "grad_norm": 4.36357307434082, "learning_rate": 1.9659756097560978e-05, "loss": 0.2236, "step": 4185 }, { "epoch": 0.05109756097560975, "grad_norm": 1.039552927017212, "learning_rate": 1.9659349593495936e-05, "loss": 0.243, "step": 4190 }, { "epoch": 0.05115853658536585, "grad_norm": 1.820730209350586, "learning_rate": 1.9658943089430894e-05, "loss": 0.2335, "step": 4195 }, { "epoch": 0.05121951219512195, "grad_norm": 1.556298017501831, "learning_rate": 1.9658536585365856e-05, "loss": 0.198, "step": 4200 }, { "epoch": 0.051280487804878046, "grad_norm": 0.7078145742416382, "learning_rate": 1.9658130081300814e-05, "loss": 0.2168, "step": 4205 }, { "epoch": 0.05134146341463414, "grad_norm": 0.9369012117385864, "learning_rate": 1.9657723577235775e-05, "loss": 0.2124, "step": 4210 }, { "epoch": 0.05140243902439024, "grad_norm": 2.0584089756011963, "learning_rate": 1.9657317073170733e-05, "loss": 0.2387, "step": 4215 }, { "epoch": 0.05146341463414634, "grad_norm": 0.7327602505683899, "learning_rate": 1.965691056910569e-05, "loss": 0.2501, "step": 4220 }, { "epoch": 0.051524390243902436, "grad_norm": 1.4021055698394775, "learning_rate": 1.9656504065040653e-05, "loss": 0.228, "step": 4225 }, { "epoch": 0.051585365853658534, "grad_norm": 1.060149908065796, "learning_rate": 1.965609756097561e-05, "loss": 0.1917, "step": 4230 }, { "epoch": 0.05164634146341463, "grad_norm": 1.4477213621139526, "learning_rate": 1.965569105691057e-05, "loss": 0.2299, "step": 4235 }, { "epoch": 0.05170731707317073, "grad_norm": 1.7447720766067505, "learning_rate": 1.965528455284553e-05, "loss": 0.2387, "step": 4240 }, { "epoch": 0.05176829268292683, "grad_norm": 1.0414420366287231, "learning_rate": 1.965487804878049e-05, "loss": 0.2385, "step": 4245 }, { "epoch": 0.051829268292682924, "grad_norm": 1.0516210794448853, "learning_rate": 1.9654471544715447e-05, "loss": 0.2595, "step": 4250 }, { "epoch": 0.05189024390243902, "grad_norm": 1.108665943145752, "learning_rate": 1.965406504065041e-05, "loss": 0.2148, "step": 4255 }, { "epoch": 0.05195121951219512, "grad_norm": 2.21826434135437, "learning_rate": 1.9653658536585367e-05, "loss": 0.2735, "step": 4260 }, { "epoch": 0.05201219512195122, "grad_norm": 1.632172703742981, "learning_rate": 1.9653252032520328e-05, "loss": 0.2315, "step": 4265 }, { "epoch": 0.052073170731707315, "grad_norm": 3.589297294616699, "learning_rate": 1.9652845528455286e-05, "loss": 0.2918, "step": 4270 }, { "epoch": 0.05213414634146341, "grad_norm": 1.7534948587417603, "learning_rate": 1.9652439024390248e-05, "loss": 0.2361, "step": 4275 }, { "epoch": 0.05219512195121951, "grad_norm": 1.0881783962249756, "learning_rate": 1.9652032520325202e-05, "loss": 0.2388, "step": 4280 }, { "epoch": 0.05225609756097561, "grad_norm": 1.5005066394805908, "learning_rate": 1.9651626016260164e-05, "loss": 0.2158, "step": 4285 }, { "epoch": 0.052317073170731705, "grad_norm": 1.5439281463623047, "learning_rate": 1.9651219512195122e-05, "loss": 0.2542, "step": 4290 }, { "epoch": 0.0523780487804878, "grad_norm": 0.7489732503890991, "learning_rate": 1.9650813008130084e-05, "loss": 0.2187, "step": 4295 }, { "epoch": 0.0524390243902439, "grad_norm": 0.8826647996902466, "learning_rate": 1.9650406504065042e-05, "loss": 0.2734, "step": 4300 }, { "epoch": 0.0525, "grad_norm": 0.9829803109169006, "learning_rate": 1.9650000000000003e-05, "loss": 0.2389, "step": 4305 }, { "epoch": 0.052560975609756096, "grad_norm": 0.9993339776992798, "learning_rate": 1.964959349593496e-05, "loss": 0.1776, "step": 4310 }, { "epoch": 0.05262195121951219, "grad_norm": 1.6685571670532227, "learning_rate": 1.964918699186992e-05, "loss": 0.2816, "step": 4315 }, { "epoch": 0.05268292682926829, "grad_norm": 1.6112427711486816, "learning_rate": 1.9648780487804878e-05, "loss": 0.2438, "step": 4320 }, { "epoch": 0.05274390243902439, "grad_norm": 1.5388028621673584, "learning_rate": 1.964837398373984e-05, "loss": 0.1976, "step": 4325 }, { "epoch": 0.052804878048780486, "grad_norm": 14.53398609161377, "learning_rate": 1.9647967479674797e-05, "loss": 0.3526, "step": 4330 }, { "epoch": 0.052865853658536584, "grad_norm": 0.9667468070983887, "learning_rate": 1.964756097560976e-05, "loss": 0.1804, "step": 4335 }, { "epoch": 0.05292682926829268, "grad_norm": 1.2540998458862305, "learning_rate": 1.9647154471544717e-05, "loss": 0.2332, "step": 4340 }, { "epoch": 0.05298780487804878, "grad_norm": 3.1462972164154053, "learning_rate": 1.9646747967479675e-05, "loss": 0.252, "step": 4345 }, { "epoch": 0.05304878048780488, "grad_norm": 1.1336748600006104, "learning_rate": 1.9646341463414637e-05, "loss": 0.2251, "step": 4350 }, { "epoch": 0.053109756097560974, "grad_norm": 1.3508806228637695, "learning_rate": 1.9645934959349595e-05, "loss": 0.2287, "step": 4355 }, { "epoch": 0.05317073170731707, "grad_norm": 1.0721640586853027, "learning_rate": 1.9645528455284556e-05, "loss": 0.2375, "step": 4360 }, { "epoch": 0.05323170731707317, "grad_norm": 1.6466132402420044, "learning_rate": 1.9645121951219514e-05, "loss": 0.2421, "step": 4365 }, { "epoch": 0.05329268292682927, "grad_norm": 2.133709669113159, "learning_rate": 1.9644715447154473e-05, "loss": 0.2726, "step": 4370 }, { "epoch": 0.053353658536585365, "grad_norm": 3.9315407276153564, "learning_rate": 1.964430894308943e-05, "loss": 0.2729, "step": 4375 }, { "epoch": 0.05341463414634146, "grad_norm": 0.9661005735397339, "learning_rate": 1.9643902439024392e-05, "loss": 0.2544, "step": 4380 }, { "epoch": 0.05347560975609756, "grad_norm": 1.5198088884353638, "learning_rate": 1.964349593495935e-05, "loss": 0.3167, "step": 4385 }, { "epoch": 0.05353658536585366, "grad_norm": 1.80367910861969, "learning_rate": 1.9643089430894312e-05, "loss": 0.2091, "step": 4390 }, { "epoch": 0.053597560975609755, "grad_norm": 1.1664215326309204, "learning_rate": 1.964268292682927e-05, "loss": 0.2209, "step": 4395 }, { "epoch": 0.05365853658536585, "grad_norm": 1.4306467771530151, "learning_rate": 1.9642276422764228e-05, "loss": 0.2086, "step": 4400 }, { "epoch": 0.05371951219512195, "grad_norm": 0.9308014512062073, "learning_rate": 1.964186991869919e-05, "loss": 0.2857, "step": 4405 }, { "epoch": 0.05378048780487805, "grad_norm": 1.101882815361023, "learning_rate": 1.9641463414634148e-05, "loss": 0.2082, "step": 4410 }, { "epoch": 0.053841463414634146, "grad_norm": 3.365966558456421, "learning_rate": 1.9641056910569106e-05, "loss": 0.2594, "step": 4415 }, { "epoch": 0.05390243902439024, "grad_norm": 1.6154669523239136, "learning_rate": 1.9640650406504067e-05, "loss": 0.2728, "step": 4420 }, { "epoch": 0.05396341463414634, "grad_norm": 1.615317940711975, "learning_rate": 1.9640243902439025e-05, "loss": 0.2241, "step": 4425 }, { "epoch": 0.05402439024390244, "grad_norm": 2.2047932147979736, "learning_rate": 1.9639837398373984e-05, "loss": 0.3196, "step": 4430 }, { "epoch": 0.054085365853658536, "grad_norm": 0.7325743436813354, "learning_rate": 1.9639430894308945e-05, "loss": 0.2723, "step": 4435 }, { "epoch": 0.054146341463414634, "grad_norm": 1.0123072862625122, "learning_rate": 1.9639024390243903e-05, "loss": 0.183, "step": 4440 }, { "epoch": 0.05420731707317073, "grad_norm": 1.0528634786605835, "learning_rate": 1.9638617886178865e-05, "loss": 0.1868, "step": 4445 }, { "epoch": 0.05426829268292683, "grad_norm": 1.5052120685577393, "learning_rate": 1.9638211382113823e-05, "loss": 0.2942, "step": 4450 }, { "epoch": 0.054329268292682927, "grad_norm": 1.857438087463379, "learning_rate": 1.9637804878048784e-05, "loss": 0.2786, "step": 4455 }, { "epoch": 0.054390243902439024, "grad_norm": 1.2302889823913574, "learning_rate": 1.963739837398374e-05, "loss": 0.2454, "step": 4460 }, { "epoch": 0.05445121951219512, "grad_norm": 1.2602174282073975, "learning_rate": 1.96369918699187e-05, "loss": 0.2277, "step": 4465 }, { "epoch": 0.05451219512195122, "grad_norm": 1.1033580303192139, "learning_rate": 1.963658536585366e-05, "loss": 0.2479, "step": 4470 }, { "epoch": 0.05457317073170732, "grad_norm": 1.9969630241394043, "learning_rate": 1.963617886178862e-05, "loss": 0.2411, "step": 4475 }, { "epoch": 0.054634146341463415, "grad_norm": 0.9353505969047546, "learning_rate": 1.963577235772358e-05, "loss": 0.2606, "step": 4480 }, { "epoch": 0.05469512195121951, "grad_norm": 1.1125423908233643, "learning_rate": 1.963536585365854e-05, "loss": 0.2323, "step": 4485 }, { "epoch": 0.05475609756097561, "grad_norm": 1.7127087116241455, "learning_rate": 1.9634959349593498e-05, "loss": 0.2295, "step": 4490 }, { "epoch": 0.05481707317073171, "grad_norm": 0.7722250819206238, "learning_rate": 1.9634552845528456e-05, "loss": 0.2086, "step": 4495 }, { "epoch": 0.054878048780487805, "grad_norm": 1.1189613342285156, "learning_rate": 1.9634146341463414e-05, "loss": 0.254, "step": 4500 }, { "epoch": 0.0549390243902439, "grad_norm": 1.2985649108886719, "learning_rate": 1.9633739837398376e-05, "loss": 0.2461, "step": 4505 }, { "epoch": 0.055, "grad_norm": 1.8324286937713623, "learning_rate": 1.9633333333333334e-05, "loss": 0.2143, "step": 4510 }, { "epoch": 0.0550609756097561, "grad_norm": 3.3007121086120605, "learning_rate": 1.9632926829268295e-05, "loss": 0.412, "step": 4515 }, { "epoch": 0.055121951219512196, "grad_norm": 1.7374651432037354, "learning_rate": 1.9632520325203254e-05, "loss": 0.2532, "step": 4520 }, { "epoch": 0.05518292682926829, "grad_norm": 1.1008801460266113, "learning_rate": 1.9632113821138212e-05, "loss": 0.2084, "step": 4525 }, { "epoch": 0.05524390243902439, "grad_norm": 2.0207419395446777, "learning_rate": 1.9631707317073173e-05, "loss": 0.2204, "step": 4530 }, { "epoch": 0.05530487804878049, "grad_norm": 2.535259962081909, "learning_rate": 1.963130081300813e-05, "loss": 0.2307, "step": 4535 }, { "epoch": 0.055365853658536586, "grad_norm": 0.9480040073394775, "learning_rate": 1.9630894308943093e-05, "loss": 0.1509, "step": 4540 }, { "epoch": 0.055426829268292684, "grad_norm": 1.3633043766021729, "learning_rate": 1.963048780487805e-05, "loss": 0.2143, "step": 4545 }, { "epoch": 0.05548780487804878, "grad_norm": 1.3932089805603027, "learning_rate": 1.963008130081301e-05, "loss": 0.2375, "step": 4550 }, { "epoch": 0.05554878048780488, "grad_norm": 1.400546669960022, "learning_rate": 1.9629674796747967e-05, "loss": 0.1888, "step": 4555 }, { "epoch": 0.055609756097560976, "grad_norm": 1.4698904752731323, "learning_rate": 1.962926829268293e-05, "loss": 0.2533, "step": 4560 }, { "epoch": 0.055670731707317074, "grad_norm": 1.1127431392669678, "learning_rate": 1.9628861788617887e-05, "loss": 0.2512, "step": 4565 }, { "epoch": 0.05573170731707317, "grad_norm": 1.3375391960144043, "learning_rate": 1.962845528455285e-05, "loss": 0.2531, "step": 4570 }, { "epoch": 0.05579268292682927, "grad_norm": 1.381555438041687, "learning_rate": 1.9628048780487807e-05, "loss": 0.1998, "step": 4575 }, { "epoch": 0.05585365853658537, "grad_norm": 1.3112519979476929, "learning_rate": 1.9627642276422765e-05, "loss": 0.2066, "step": 4580 }, { "epoch": 0.055914634146341465, "grad_norm": 1.2505789995193481, "learning_rate": 1.9627235772357723e-05, "loss": 0.2824, "step": 4585 }, { "epoch": 0.05597560975609756, "grad_norm": 1.2132776975631714, "learning_rate": 1.9626829268292684e-05, "loss": 0.2205, "step": 4590 }, { "epoch": 0.05603658536585366, "grad_norm": 1.6914199590682983, "learning_rate": 1.9626422764227642e-05, "loss": 0.3317, "step": 4595 }, { "epoch": 0.05609756097560976, "grad_norm": 1.5477650165557861, "learning_rate": 1.9626016260162604e-05, "loss": 0.2062, "step": 4600 }, { "epoch": 0.056158536585365855, "grad_norm": 1.4325354099273682, "learning_rate": 1.9625609756097562e-05, "loss": 0.2285, "step": 4605 }, { "epoch": 0.05621951219512195, "grad_norm": 1.723581075668335, "learning_rate": 1.962520325203252e-05, "loss": 0.2706, "step": 4610 }, { "epoch": 0.05628048780487805, "grad_norm": 2.0031163692474365, "learning_rate": 1.9624796747967482e-05, "loss": 0.292, "step": 4615 }, { "epoch": 0.05634146341463415, "grad_norm": 3.3287298679351807, "learning_rate": 1.962439024390244e-05, "loss": 0.272, "step": 4620 }, { "epoch": 0.056402439024390245, "grad_norm": 1.3331035375595093, "learning_rate": 1.96239837398374e-05, "loss": 0.236, "step": 4625 }, { "epoch": 0.05646341463414634, "grad_norm": 2.6280875205993652, "learning_rate": 1.962357723577236e-05, "loss": 0.2525, "step": 4630 }, { "epoch": 0.05652439024390244, "grad_norm": 1.1984144449234009, "learning_rate": 1.962317073170732e-05, "loss": 0.2323, "step": 4635 }, { "epoch": 0.05658536585365854, "grad_norm": 1.6051266193389893, "learning_rate": 1.9622764227642276e-05, "loss": 0.3037, "step": 4640 }, { "epoch": 0.056646341463414636, "grad_norm": 1.3991652727127075, "learning_rate": 1.9622357723577237e-05, "loss": 0.1915, "step": 4645 }, { "epoch": 0.056707317073170734, "grad_norm": 1.2318801879882812, "learning_rate": 1.9621951219512195e-05, "loss": 0.2673, "step": 4650 }, { "epoch": 0.05676829268292683, "grad_norm": 0.6685267686843872, "learning_rate": 1.9621544715447157e-05, "loss": 0.2277, "step": 4655 }, { "epoch": 0.05682926829268293, "grad_norm": 1.0670490264892578, "learning_rate": 1.9621138211382115e-05, "loss": 0.2518, "step": 4660 }, { "epoch": 0.056890243902439026, "grad_norm": 1.3608481884002686, "learning_rate": 1.9620731707317077e-05, "loss": 0.2581, "step": 4665 }, { "epoch": 0.056951219512195124, "grad_norm": 1.6297940015792847, "learning_rate": 1.9620325203252035e-05, "loss": 0.1849, "step": 4670 }, { "epoch": 0.05701219512195122, "grad_norm": 1.0613322257995605, "learning_rate": 1.9619918699186993e-05, "loss": 0.1782, "step": 4675 }, { "epoch": 0.05707317073170732, "grad_norm": 1.650551438331604, "learning_rate": 1.961951219512195e-05, "loss": 0.2463, "step": 4680 }, { "epoch": 0.05713414634146342, "grad_norm": 1.2937287092208862, "learning_rate": 1.9619105691056912e-05, "loss": 0.1801, "step": 4685 }, { "epoch": 0.057195121951219514, "grad_norm": 1.230522871017456, "learning_rate": 1.961869918699187e-05, "loss": 0.2293, "step": 4690 }, { "epoch": 0.05725609756097561, "grad_norm": 1.03861403465271, "learning_rate": 1.9618292682926832e-05, "loss": 0.207, "step": 4695 }, { "epoch": 0.05731707317073171, "grad_norm": 1.4181733131408691, "learning_rate": 1.961788617886179e-05, "loss": 0.2079, "step": 4700 }, { "epoch": 0.05737804878048781, "grad_norm": 1.1432347297668457, "learning_rate": 1.961747967479675e-05, "loss": 0.246, "step": 4705 }, { "epoch": 0.057439024390243905, "grad_norm": 1.0689603090286255, "learning_rate": 1.961707317073171e-05, "loss": 0.2491, "step": 4710 }, { "epoch": 0.0575, "grad_norm": 0.89327073097229, "learning_rate": 1.9616666666666668e-05, "loss": 0.2032, "step": 4715 }, { "epoch": 0.0575609756097561, "grad_norm": 1.068626880645752, "learning_rate": 1.961626016260163e-05, "loss": 0.1911, "step": 4720 }, { "epoch": 0.0576219512195122, "grad_norm": 1.8392201662063599, "learning_rate": 1.9615853658536588e-05, "loss": 0.263, "step": 4725 }, { "epoch": 0.057682926829268295, "grad_norm": 1.2733113765716553, "learning_rate": 1.9615447154471546e-05, "loss": 0.2568, "step": 4730 }, { "epoch": 0.05774390243902439, "grad_norm": 1.063341736793518, "learning_rate": 1.9615040650406504e-05, "loss": 0.2064, "step": 4735 }, { "epoch": 0.05780487804878049, "grad_norm": 1.1022428274154663, "learning_rate": 1.9614634146341465e-05, "loss": 0.262, "step": 4740 }, { "epoch": 0.05786585365853659, "grad_norm": 4.705485820770264, "learning_rate": 1.9614227642276424e-05, "loss": 0.2264, "step": 4745 }, { "epoch": 0.057926829268292686, "grad_norm": 1.6062382459640503, "learning_rate": 1.9613821138211385e-05, "loss": 0.2557, "step": 4750 }, { "epoch": 0.05798780487804878, "grad_norm": 1.2158821821212769, "learning_rate": 1.9613414634146343e-05, "loss": 0.2541, "step": 4755 }, { "epoch": 0.05804878048780488, "grad_norm": 0.955345094203949, "learning_rate": 1.96130081300813e-05, "loss": 0.2562, "step": 4760 }, { "epoch": 0.05810975609756098, "grad_norm": 1.3730204105377197, "learning_rate": 1.961260162601626e-05, "loss": 0.2425, "step": 4765 }, { "epoch": 0.058170731707317076, "grad_norm": 1.2736151218414307, "learning_rate": 1.961219512195122e-05, "loss": 0.2134, "step": 4770 }, { "epoch": 0.058231707317073174, "grad_norm": 1.1940772533416748, "learning_rate": 1.961178861788618e-05, "loss": 0.2058, "step": 4775 }, { "epoch": 0.05829268292682927, "grad_norm": 1.1941248178482056, "learning_rate": 1.961138211382114e-05, "loss": 0.2529, "step": 4780 }, { "epoch": 0.05835365853658537, "grad_norm": 1.5527523756027222, "learning_rate": 1.96109756097561e-05, "loss": 0.2052, "step": 4785 }, { "epoch": 0.05841463414634147, "grad_norm": 3.0079734325408936, "learning_rate": 1.9610569105691057e-05, "loss": 0.2159, "step": 4790 }, { "epoch": 0.058475609756097564, "grad_norm": 1.4518787860870361, "learning_rate": 1.961016260162602e-05, "loss": 0.2223, "step": 4795 }, { "epoch": 0.05853658536585366, "grad_norm": 1.414472222328186, "learning_rate": 1.9609756097560977e-05, "loss": 0.227, "step": 4800 }, { "epoch": 0.05859756097560975, "grad_norm": 1.3681824207305908, "learning_rate": 1.9609349593495938e-05, "loss": 0.2303, "step": 4805 }, { "epoch": 0.05865853658536585, "grad_norm": 1.1832529306411743, "learning_rate": 1.9608943089430896e-05, "loss": 0.2161, "step": 4810 }, { "epoch": 0.05871951219512195, "grad_norm": 0.9079121351242065, "learning_rate": 1.9608536585365858e-05, "loss": 0.1983, "step": 4815 }, { "epoch": 0.058780487804878045, "grad_norm": 1.4259477853775024, "learning_rate": 1.9608130081300812e-05, "loss": 0.2394, "step": 4820 }, { "epoch": 0.05884146341463414, "grad_norm": 2.038018226623535, "learning_rate": 1.9607723577235774e-05, "loss": 0.2605, "step": 4825 }, { "epoch": 0.05890243902439024, "grad_norm": 1.2862237691879272, "learning_rate": 1.9607317073170732e-05, "loss": 0.2704, "step": 4830 }, { "epoch": 0.05896341463414634, "grad_norm": 0.9505934715270996, "learning_rate": 1.9606910569105694e-05, "loss": 0.2046, "step": 4835 }, { "epoch": 0.059024390243902436, "grad_norm": 1.0464810132980347, "learning_rate": 1.9606504065040652e-05, "loss": 0.1877, "step": 4840 }, { "epoch": 0.059085365853658534, "grad_norm": 1.0010478496551514, "learning_rate": 1.9606097560975613e-05, "loss": 0.1848, "step": 4845 }, { "epoch": 0.05914634146341463, "grad_norm": 1.2325624227523804, "learning_rate": 1.9605691056910568e-05, "loss": 0.1902, "step": 4850 }, { "epoch": 0.05920731707317073, "grad_norm": 1.8347747325897217, "learning_rate": 1.960528455284553e-05, "loss": 0.2194, "step": 4855 }, { "epoch": 0.059268292682926826, "grad_norm": 1.4696542024612427, "learning_rate": 1.9604878048780488e-05, "loss": 0.2193, "step": 4860 }, { "epoch": 0.059329268292682924, "grad_norm": 1.997593879699707, "learning_rate": 1.960447154471545e-05, "loss": 0.267, "step": 4865 }, { "epoch": 0.05939024390243902, "grad_norm": 1.1096110343933105, "learning_rate": 1.9604065040650407e-05, "loss": 0.1963, "step": 4870 }, { "epoch": 0.05945121951219512, "grad_norm": 1.5938149690628052, "learning_rate": 1.960365853658537e-05, "loss": 0.2337, "step": 4875 }, { "epoch": 0.05951219512195122, "grad_norm": 1.7135080099105835, "learning_rate": 1.9603252032520327e-05, "loss": 0.2214, "step": 4880 }, { "epoch": 0.059573170731707314, "grad_norm": 1.0033239126205444, "learning_rate": 1.9602845528455285e-05, "loss": 0.1866, "step": 4885 }, { "epoch": 0.05963414634146341, "grad_norm": 0.9923205375671387, "learning_rate": 1.9602439024390247e-05, "loss": 0.2461, "step": 4890 }, { "epoch": 0.05969512195121951, "grad_norm": 0.9894231557846069, "learning_rate": 1.9602032520325205e-05, "loss": 0.2303, "step": 4895 }, { "epoch": 0.05975609756097561, "grad_norm": 0.910393238067627, "learning_rate": 1.9601626016260166e-05, "loss": 0.1957, "step": 4900 }, { "epoch": 0.059817073170731705, "grad_norm": 1.6793098449707031, "learning_rate": 1.9601219512195124e-05, "loss": 0.2327, "step": 4905 }, { "epoch": 0.0598780487804878, "grad_norm": 3.0670626163482666, "learning_rate": 1.9600813008130082e-05, "loss": 0.2147, "step": 4910 }, { "epoch": 0.0599390243902439, "grad_norm": 1.249030351638794, "learning_rate": 1.960040650406504e-05, "loss": 0.244, "step": 4915 }, { "epoch": 0.06, "grad_norm": 1.259766936302185, "learning_rate": 1.9600000000000002e-05, "loss": 0.1819, "step": 4920 }, { "epoch": 0.060060975609756095, "grad_norm": 1.0892661809921265, "learning_rate": 1.959959349593496e-05, "loss": 0.2314, "step": 4925 }, { "epoch": 0.06012195121951219, "grad_norm": 0.8005820512771606, "learning_rate": 1.9599186991869922e-05, "loss": 0.1929, "step": 4930 }, { "epoch": 0.06018292682926829, "grad_norm": 1.7382147312164307, "learning_rate": 1.959878048780488e-05, "loss": 0.1999, "step": 4935 }, { "epoch": 0.06024390243902439, "grad_norm": 3.6355302333831787, "learning_rate": 1.9598373983739838e-05, "loss": 0.203, "step": 4940 }, { "epoch": 0.060304878048780486, "grad_norm": 1.1618363857269287, "learning_rate": 1.9597967479674796e-05, "loss": 0.172, "step": 4945 }, { "epoch": 0.060365853658536583, "grad_norm": 0.9747534394264221, "learning_rate": 1.9597560975609758e-05, "loss": 0.2182, "step": 4950 }, { "epoch": 0.06042682926829268, "grad_norm": 1.5912144184112549, "learning_rate": 1.9597154471544716e-05, "loss": 0.2097, "step": 4955 }, { "epoch": 0.06048780487804878, "grad_norm": 3.2882742881774902, "learning_rate": 1.9596747967479677e-05, "loss": 0.2376, "step": 4960 }, { "epoch": 0.060548780487804876, "grad_norm": 1.1155685186386108, "learning_rate": 1.9596341463414635e-05, "loss": 0.2037, "step": 4965 }, { "epoch": 0.060609756097560974, "grad_norm": 2.495530605316162, "learning_rate": 1.9595934959349594e-05, "loss": 0.1746, "step": 4970 }, { "epoch": 0.06067073170731707, "grad_norm": 0.9517881870269775, "learning_rate": 1.9595528455284555e-05, "loss": 0.1744, "step": 4975 }, { "epoch": 0.06073170731707317, "grad_norm": 1.288662075996399, "learning_rate": 1.9595121951219513e-05, "loss": 0.2269, "step": 4980 }, { "epoch": 0.06079268292682927, "grad_norm": 0.8235918879508972, "learning_rate": 1.9594715447154475e-05, "loss": 0.2069, "step": 4985 }, { "epoch": 0.060853658536585364, "grad_norm": 1.631434679031372, "learning_rate": 1.9594308943089433e-05, "loss": 0.2518, "step": 4990 }, { "epoch": 0.06091463414634146, "grad_norm": 1.3108566999435425, "learning_rate": 1.959390243902439e-05, "loss": 0.1967, "step": 4995 }, { "epoch": 0.06097560975609756, "grad_norm": 2.100707769393921, "learning_rate": 1.959349593495935e-05, "loss": 0.2078, "step": 5000 }, { "epoch": 0.06103658536585366, "grad_norm": 1.192247748374939, "learning_rate": 1.959308943089431e-05, "loss": 0.2081, "step": 5005 }, { "epoch": 0.061097560975609755, "grad_norm": 1.2766265869140625, "learning_rate": 1.959268292682927e-05, "loss": 0.224, "step": 5010 }, { "epoch": 0.06115853658536585, "grad_norm": 1.1273043155670166, "learning_rate": 1.959227642276423e-05, "loss": 0.238, "step": 5015 }, { "epoch": 0.06121951219512195, "grad_norm": 1.0847195386886597, "learning_rate": 1.959186991869919e-05, "loss": 0.2584, "step": 5020 }, { "epoch": 0.06128048780487805, "grad_norm": 2.3644700050354004, "learning_rate": 1.959146341463415e-05, "loss": 0.2812, "step": 5025 }, { "epoch": 0.061341463414634145, "grad_norm": 2.4323039054870605, "learning_rate": 1.9591056910569105e-05, "loss": 0.218, "step": 5030 }, { "epoch": 0.06140243902439024, "grad_norm": 1.0727382898330688, "learning_rate": 1.9590650406504066e-05, "loss": 0.2662, "step": 5035 }, { "epoch": 0.06146341463414634, "grad_norm": 1.0668284893035889, "learning_rate": 1.9590243902439024e-05, "loss": 0.2233, "step": 5040 }, { "epoch": 0.06152439024390244, "grad_norm": 1.0395164489746094, "learning_rate": 1.9589837398373986e-05, "loss": 0.2286, "step": 5045 }, { "epoch": 0.061585365853658536, "grad_norm": 2.3517556190490723, "learning_rate": 1.9589430894308944e-05, "loss": 0.2985, "step": 5050 }, { "epoch": 0.06164634146341463, "grad_norm": 1.4019510746002197, "learning_rate": 1.9589024390243905e-05, "loss": 0.1759, "step": 5055 }, { "epoch": 0.06170731707317073, "grad_norm": 1.3426706790924072, "learning_rate": 1.9588617886178864e-05, "loss": 0.2698, "step": 5060 }, { "epoch": 0.06176829268292683, "grad_norm": 0.8143264055252075, "learning_rate": 1.958821138211382e-05, "loss": 0.1975, "step": 5065 }, { "epoch": 0.061829268292682926, "grad_norm": 0.7507140636444092, "learning_rate": 1.9587804878048783e-05, "loss": 0.2409, "step": 5070 }, { "epoch": 0.061890243902439024, "grad_norm": 1.1944925785064697, "learning_rate": 1.958739837398374e-05, "loss": 0.2096, "step": 5075 }, { "epoch": 0.06195121951219512, "grad_norm": 1.7390371561050415, "learning_rate": 1.9586991869918703e-05, "loss": 0.225, "step": 5080 }, { "epoch": 0.06201219512195122, "grad_norm": 0.9547528624534607, "learning_rate": 1.958658536585366e-05, "loss": 0.2193, "step": 5085 }, { "epoch": 0.06207317073170732, "grad_norm": 1.5979996919631958, "learning_rate": 1.958617886178862e-05, "loss": 0.2363, "step": 5090 }, { "epoch": 0.062134146341463414, "grad_norm": 1.2684543132781982, "learning_rate": 1.9585772357723577e-05, "loss": 0.2241, "step": 5095 }, { "epoch": 0.06219512195121951, "grad_norm": 1.5525020360946655, "learning_rate": 1.958536585365854e-05, "loss": 0.2036, "step": 5100 }, { "epoch": 0.06225609756097561, "grad_norm": 1.6573811769485474, "learning_rate": 1.9584959349593497e-05, "loss": 0.2045, "step": 5105 }, { "epoch": 0.06231707317073171, "grad_norm": 1.0032970905303955, "learning_rate": 1.958455284552846e-05, "loss": 0.2065, "step": 5110 }, { "epoch": 0.062378048780487805, "grad_norm": 2.2483971118927, "learning_rate": 1.9584146341463416e-05, "loss": 0.2607, "step": 5115 }, { "epoch": 0.0624390243902439, "grad_norm": 1.2174166440963745, "learning_rate": 1.9583739837398375e-05, "loss": 0.2486, "step": 5120 }, { "epoch": 0.0625, "grad_norm": 1.2996450662612915, "learning_rate": 1.9583333333333333e-05, "loss": 0.2533, "step": 5125 }, { "epoch": 0.0625609756097561, "grad_norm": 0.8450466990470886, "learning_rate": 1.9582926829268294e-05, "loss": 0.1622, "step": 5130 }, { "epoch": 0.0626219512195122, "grad_norm": 1.0156826972961426, "learning_rate": 1.9582520325203252e-05, "loss": 0.1712, "step": 5135 }, { "epoch": 0.06268292682926829, "grad_norm": 2.207333564758301, "learning_rate": 1.9582113821138214e-05, "loss": 0.2372, "step": 5140 }, { "epoch": 0.06274390243902439, "grad_norm": 1.818793773651123, "learning_rate": 1.9581707317073172e-05, "loss": 0.2135, "step": 5145 }, { "epoch": 0.06280487804878049, "grad_norm": 1.2105324268341064, "learning_rate": 1.9581300813008134e-05, "loss": 0.1869, "step": 5150 }, { "epoch": 0.06286585365853659, "grad_norm": 1.730587363243103, "learning_rate": 1.958089430894309e-05, "loss": 0.2319, "step": 5155 }, { "epoch": 0.06292682926829268, "grad_norm": 1.6790063381195068, "learning_rate": 1.958048780487805e-05, "loss": 0.218, "step": 5160 }, { "epoch": 0.06298780487804878, "grad_norm": 0.8228532671928406, "learning_rate": 1.958008130081301e-05, "loss": 0.25, "step": 5165 }, { "epoch": 0.06304878048780488, "grad_norm": 1.9322009086608887, "learning_rate": 1.957967479674797e-05, "loss": 0.2334, "step": 5170 }, { "epoch": 0.06310975609756098, "grad_norm": 1.0725115537643433, "learning_rate": 1.9579268292682928e-05, "loss": 0.1806, "step": 5175 }, { "epoch": 0.06317073170731707, "grad_norm": 3.7388763427734375, "learning_rate": 1.957886178861789e-05, "loss": 0.2383, "step": 5180 }, { "epoch": 0.06323170731707317, "grad_norm": 1.0313746929168701, "learning_rate": 1.9578455284552847e-05, "loss": 0.2337, "step": 5185 }, { "epoch": 0.06329268292682927, "grad_norm": 1.3790615797042847, "learning_rate": 1.9578048780487805e-05, "loss": 0.1835, "step": 5190 }, { "epoch": 0.06335365853658537, "grad_norm": 1.9141311645507812, "learning_rate": 1.9577642276422767e-05, "loss": 0.2226, "step": 5195 }, { "epoch": 0.06341463414634146, "grad_norm": 0.9679999351501465, "learning_rate": 1.9577235772357725e-05, "loss": 0.1639, "step": 5200 }, { "epoch": 0.06347560975609756, "grad_norm": 1.2093479633331299, "learning_rate": 1.9576829268292687e-05, "loss": 0.1881, "step": 5205 }, { "epoch": 0.06353658536585366, "grad_norm": 1.395514965057373, "learning_rate": 1.9576422764227645e-05, "loss": 0.2441, "step": 5210 }, { "epoch": 0.06359756097560976, "grad_norm": 1.259371280670166, "learning_rate": 1.9576016260162603e-05, "loss": 0.1717, "step": 5215 }, { "epoch": 0.06365853658536585, "grad_norm": 2.2036428451538086, "learning_rate": 1.957560975609756e-05, "loss": 0.1885, "step": 5220 }, { "epoch": 0.06371951219512195, "grad_norm": 2.0729455947875977, "learning_rate": 1.9575203252032522e-05, "loss": 0.1767, "step": 5225 }, { "epoch": 0.06378048780487805, "grad_norm": 2.1832404136657715, "learning_rate": 1.957479674796748e-05, "loss": 0.2779, "step": 5230 }, { "epoch": 0.06384146341463415, "grad_norm": 1.5616875886917114, "learning_rate": 1.9574390243902442e-05, "loss": 0.23, "step": 5235 }, { "epoch": 0.06390243902439025, "grad_norm": 1.3771553039550781, "learning_rate": 1.95739837398374e-05, "loss": 0.1994, "step": 5240 }, { "epoch": 0.06396341463414634, "grad_norm": 2.7726433277130127, "learning_rate": 1.9573577235772358e-05, "loss": 0.2023, "step": 5245 }, { "epoch": 0.06402439024390244, "grad_norm": 1.1668306589126587, "learning_rate": 1.957317073170732e-05, "loss": 0.2168, "step": 5250 }, { "epoch": 0.06408536585365854, "grad_norm": 1.6613585948944092, "learning_rate": 1.9572764227642278e-05, "loss": 0.2113, "step": 5255 }, { "epoch": 0.06414634146341464, "grad_norm": 1.911489725112915, "learning_rate": 1.9572357723577236e-05, "loss": 0.2683, "step": 5260 }, { "epoch": 0.06420731707317073, "grad_norm": 1.5118707418441772, "learning_rate": 1.9571951219512198e-05, "loss": 0.1719, "step": 5265 }, { "epoch": 0.06426829268292683, "grad_norm": 1.486657738685608, "learning_rate": 1.9571544715447156e-05, "loss": 0.2252, "step": 5270 }, { "epoch": 0.06432926829268293, "grad_norm": 1.112485408782959, "learning_rate": 1.9571138211382114e-05, "loss": 0.2039, "step": 5275 }, { "epoch": 0.06439024390243903, "grad_norm": 1.5715131759643555, "learning_rate": 1.9570731707317075e-05, "loss": 0.268, "step": 5280 }, { "epoch": 0.06445121951219512, "grad_norm": 1.066007375717163, "learning_rate": 1.9570325203252033e-05, "loss": 0.1869, "step": 5285 }, { "epoch": 0.06451219512195122, "grad_norm": 1.0511561632156372, "learning_rate": 1.9569918699186995e-05, "loss": 0.2231, "step": 5290 }, { "epoch": 0.06457317073170732, "grad_norm": 0.9038728475570679, "learning_rate": 1.9569512195121953e-05, "loss": 0.2186, "step": 5295 }, { "epoch": 0.06463414634146342, "grad_norm": 1.5041346549987793, "learning_rate": 1.9569105691056915e-05, "loss": 0.2457, "step": 5300 }, { "epoch": 0.06469512195121951, "grad_norm": 1.1565309762954712, "learning_rate": 1.956869918699187e-05, "loss": 0.2114, "step": 5305 }, { "epoch": 0.06475609756097561, "grad_norm": 0.901034951210022, "learning_rate": 1.956829268292683e-05, "loss": 0.2155, "step": 5310 }, { "epoch": 0.06481707317073171, "grad_norm": 2.2072112560272217, "learning_rate": 1.956788617886179e-05, "loss": 0.1813, "step": 5315 }, { "epoch": 0.06487804878048781, "grad_norm": 0.7108058929443359, "learning_rate": 1.956747967479675e-05, "loss": 0.1665, "step": 5320 }, { "epoch": 0.0649390243902439, "grad_norm": 1.147715449333191, "learning_rate": 1.956707317073171e-05, "loss": 0.2216, "step": 5325 }, { "epoch": 0.065, "grad_norm": 0.7830981016159058, "learning_rate": 1.956666666666667e-05, "loss": 0.1368, "step": 5330 }, { "epoch": 0.0650609756097561, "grad_norm": 1.119322657585144, "learning_rate": 1.956626016260163e-05, "loss": 0.1889, "step": 5335 }, { "epoch": 0.0651219512195122, "grad_norm": 1.190929889678955, "learning_rate": 1.9565853658536586e-05, "loss": 0.2479, "step": 5340 }, { "epoch": 0.0651829268292683, "grad_norm": 2.422050952911377, "learning_rate": 1.9565447154471548e-05, "loss": 0.2703, "step": 5345 }, { "epoch": 0.06524390243902439, "grad_norm": 1.0920062065124512, "learning_rate": 1.9565040650406506e-05, "loss": 0.2049, "step": 5350 }, { "epoch": 0.06530487804878049, "grad_norm": 3.5276079177856445, "learning_rate": 1.9564634146341464e-05, "loss": 0.1937, "step": 5355 }, { "epoch": 0.06536585365853659, "grad_norm": 0.7421805262565613, "learning_rate": 1.9564227642276426e-05, "loss": 0.21, "step": 5360 }, { "epoch": 0.06542682926829269, "grad_norm": 1.1463854312896729, "learning_rate": 1.9563821138211384e-05, "loss": 0.2244, "step": 5365 }, { "epoch": 0.06548780487804878, "grad_norm": 2.627061128616333, "learning_rate": 1.9563414634146342e-05, "loss": 0.2563, "step": 5370 }, { "epoch": 0.06554878048780488, "grad_norm": 2.063666820526123, "learning_rate": 1.9563008130081304e-05, "loss": 0.1939, "step": 5375 }, { "epoch": 0.06560975609756098, "grad_norm": 1.057356834411621, "learning_rate": 1.956260162601626e-05, "loss": 0.2086, "step": 5380 }, { "epoch": 0.06567073170731708, "grad_norm": 1.0575493574142456, "learning_rate": 1.9562195121951223e-05, "loss": 0.2013, "step": 5385 }, { "epoch": 0.06573170731707317, "grad_norm": 2.888458490371704, "learning_rate": 1.956178861788618e-05, "loss": 0.2212, "step": 5390 }, { "epoch": 0.06579268292682927, "grad_norm": 1.61579430103302, "learning_rate": 1.956138211382114e-05, "loss": 0.2405, "step": 5395 }, { "epoch": 0.06585365853658537, "grad_norm": 1.5022215843200684, "learning_rate": 1.9560975609756098e-05, "loss": 0.207, "step": 5400 }, { "epoch": 0.06591463414634147, "grad_norm": 2.7562127113342285, "learning_rate": 1.956056910569106e-05, "loss": 0.1938, "step": 5405 }, { "epoch": 0.06597560975609756, "grad_norm": 0.8678697943687439, "learning_rate": 1.9560162601626017e-05, "loss": 0.185, "step": 5410 }, { "epoch": 0.06603658536585366, "grad_norm": 0.8066620826721191, "learning_rate": 1.955975609756098e-05, "loss": 0.1929, "step": 5415 }, { "epoch": 0.06609756097560976, "grad_norm": 1.7248520851135254, "learning_rate": 1.9559349593495937e-05, "loss": 0.239, "step": 5420 }, { "epoch": 0.06615853658536586, "grad_norm": 1.3518664836883545, "learning_rate": 1.9558943089430895e-05, "loss": 0.1833, "step": 5425 }, { "epoch": 0.06621951219512195, "grad_norm": 1.1668587923049927, "learning_rate": 1.9558536585365856e-05, "loss": 0.1713, "step": 5430 }, { "epoch": 0.06628048780487805, "grad_norm": 2.5065066814422607, "learning_rate": 1.9558130081300815e-05, "loss": 0.2009, "step": 5435 }, { "epoch": 0.06634146341463415, "grad_norm": 1.459516167640686, "learning_rate": 1.9557723577235773e-05, "loss": 0.2274, "step": 5440 }, { "epoch": 0.06640243902439025, "grad_norm": 1.3682218790054321, "learning_rate": 1.9557317073170734e-05, "loss": 0.2171, "step": 5445 }, { "epoch": 0.06646341463414634, "grad_norm": 1.4349440336227417, "learning_rate": 1.9556910569105692e-05, "loss": 0.2587, "step": 5450 }, { "epoch": 0.06652439024390244, "grad_norm": 0.9627454876899719, "learning_rate": 1.955650406504065e-05, "loss": 0.2001, "step": 5455 }, { "epoch": 0.06658536585365854, "grad_norm": 0.9537251591682434, "learning_rate": 1.9556097560975612e-05, "loss": 0.2405, "step": 5460 }, { "epoch": 0.06664634146341464, "grad_norm": 1.2162224054336548, "learning_rate": 1.955569105691057e-05, "loss": 0.1984, "step": 5465 }, { "epoch": 0.06670731707317074, "grad_norm": 1.2555063962936401, "learning_rate": 1.955528455284553e-05, "loss": 0.211, "step": 5470 }, { "epoch": 0.06676829268292683, "grad_norm": 1.6012171506881714, "learning_rate": 1.955487804878049e-05, "loss": 0.2214, "step": 5475 }, { "epoch": 0.06682926829268293, "grad_norm": 2.5684635639190674, "learning_rate": 1.955447154471545e-05, "loss": 0.293, "step": 5480 }, { "epoch": 0.06689024390243903, "grad_norm": 1.103837490081787, "learning_rate": 1.9554065040650406e-05, "loss": 0.1783, "step": 5485 }, { "epoch": 0.06695121951219513, "grad_norm": 1.2755159139633179, "learning_rate": 1.9553658536585368e-05, "loss": 0.2188, "step": 5490 }, { "epoch": 0.06701219512195122, "grad_norm": 2.1110167503356934, "learning_rate": 1.9553252032520326e-05, "loss": 0.1876, "step": 5495 }, { "epoch": 0.06707317073170732, "grad_norm": 1.3043817281723022, "learning_rate": 1.9552845528455287e-05, "loss": 0.2421, "step": 5500 }, { "epoch": 0.06713414634146342, "grad_norm": 0.8325398564338684, "learning_rate": 1.9552439024390245e-05, "loss": 0.1835, "step": 5505 }, { "epoch": 0.06719512195121952, "grad_norm": 0.8601069450378418, "learning_rate": 1.9552032520325207e-05, "loss": 0.2128, "step": 5510 }, { "epoch": 0.06725609756097561, "grad_norm": 2.053575277328491, "learning_rate": 1.9551626016260165e-05, "loss": 0.2099, "step": 5515 }, { "epoch": 0.06731707317073171, "grad_norm": 0.881761372089386, "learning_rate": 1.9551219512195123e-05, "loss": 0.1975, "step": 5520 }, { "epoch": 0.06737804878048781, "grad_norm": 1.1939113140106201, "learning_rate": 1.955081300813008e-05, "loss": 0.1802, "step": 5525 }, { "epoch": 0.0674390243902439, "grad_norm": 0.7805212736129761, "learning_rate": 1.9550406504065043e-05, "loss": 0.2273, "step": 5530 }, { "epoch": 0.0675, "grad_norm": 2.305536985397339, "learning_rate": 1.955e-05, "loss": 0.2262, "step": 5535 }, { "epoch": 0.0675609756097561, "grad_norm": 1.7969821691513062, "learning_rate": 1.9549593495934962e-05, "loss": 0.2619, "step": 5540 }, { "epoch": 0.0676219512195122, "grad_norm": 1.752863883972168, "learning_rate": 1.954918699186992e-05, "loss": 0.232, "step": 5545 }, { "epoch": 0.0676829268292683, "grad_norm": 1.1362732648849487, "learning_rate": 1.954878048780488e-05, "loss": 0.1891, "step": 5550 }, { "epoch": 0.0677439024390244, "grad_norm": 1.0300452709197998, "learning_rate": 1.954837398373984e-05, "loss": 0.2153, "step": 5555 }, { "epoch": 0.06780487804878049, "grad_norm": 1.1878330707550049, "learning_rate": 1.9547967479674798e-05, "loss": 0.1949, "step": 5560 }, { "epoch": 0.06786585365853659, "grad_norm": 2.5051944255828857, "learning_rate": 1.954756097560976e-05, "loss": 0.2478, "step": 5565 }, { "epoch": 0.06792682926829269, "grad_norm": 1.1977285146713257, "learning_rate": 1.9547154471544718e-05, "loss": 0.1568, "step": 5570 }, { "epoch": 0.06798780487804879, "grad_norm": 1.8843936920166016, "learning_rate": 1.9546747967479676e-05, "loss": 0.2433, "step": 5575 }, { "epoch": 0.06804878048780488, "grad_norm": 1.2589023113250732, "learning_rate": 1.9546341463414634e-05, "loss": 0.2132, "step": 5580 }, { "epoch": 0.06810975609756098, "grad_norm": 0.8794894218444824, "learning_rate": 1.9545934959349596e-05, "loss": 0.241, "step": 5585 }, { "epoch": 0.06817073170731708, "grad_norm": 1.5196013450622559, "learning_rate": 1.9545528455284554e-05, "loss": 0.2362, "step": 5590 }, { "epoch": 0.06823170731707318, "grad_norm": 1.269914150238037, "learning_rate": 1.9545121951219515e-05, "loss": 0.1863, "step": 5595 }, { "epoch": 0.06829268292682927, "grad_norm": 2.8852956295013428, "learning_rate": 1.9544715447154473e-05, "loss": 0.1619, "step": 5600 }, { "epoch": 0.06835365853658537, "grad_norm": 2.816617727279663, "learning_rate": 1.954430894308943e-05, "loss": 0.1873, "step": 5605 }, { "epoch": 0.06841463414634147, "grad_norm": 0.7460029721260071, "learning_rate": 1.9543902439024393e-05, "loss": 0.1742, "step": 5610 }, { "epoch": 0.06847560975609757, "grad_norm": 0.8367026448249817, "learning_rate": 1.954349593495935e-05, "loss": 0.1727, "step": 5615 }, { "epoch": 0.06853658536585366, "grad_norm": 4.047098636627197, "learning_rate": 1.954308943089431e-05, "loss": 0.2369, "step": 5620 }, { "epoch": 0.06859756097560976, "grad_norm": 2.2547049522399902, "learning_rate": 1.954268292682927e-05, "loss": 0.1999, "step": 5625 }, { "epoch": 0.06865853658536586, "grad_norm": 2.0497725009918213, "learning_rate": 1.954227642276423e-05, "loss": 0.2206, "step": 5630 }, { "epoch": 0.06871951219512196, "grad_norm": 1.3440226316452026, "learning_rate": 1.9541869918699187e-05, "loss": 0.2313, "step": 5635 }, { "epoch": 0.06878048780487805, "grad_norm": 1.4772146940231323, "learning_rate": 1.954146341463415e-05, "loss": 0.1958, "step": 5640 }, { "epoch": 0.06884146341463415, "grad_norm": 2.2378950119018555, "learning_rate": 1.9541056910569107e-05, "loss": 0.2169, "step": 5645 }, { "epoch": 0.06890243902439025, "grad_norm": 1.1364307403564453, "learning_rate": 1.9540650406504068e-05, "loss": 0.1973, "step": 5650 }, { "epoch": 0.06896341463414635, "grad_norm": 1.0244370698928833, "learning_rate": 1.9540243902439026e-05, "loss": 0.1899, "step": 5655 }, { "epoch": 0.06902439024390244, "grad_norm": 1.0516488552093506, "learning_rate": 1.9539837398373988e-05, "loss": 0.178, "step": 5660 }, { "epoch": 0.06908536585365854, "grad_norm": 1.005373239517212, "learning_rate": 1.9539430894308943e-05, "loss": 0.2186, "step": 5665 }, { "epoch": 0.06914634146341464, "grad_norm": 1.6780197620391846, "learning_rate": 1.9539024390243904e-05, "loss": 0.2163, "step": 5670 }, { "epoch": 0.06920731707317074, "grad_norm": 1.117673635482788, "learning_rate": 1.9538617886178862e-05, "loss": 0.1988, "step": 5675 }, { "epoch": 0.06926829268292684, "grad_norm": 1.5734533071517944, "learning_rate": 1.9538211382113824e-05, "loss": 0.2418, "step": 5680 }, { "epoch": 0.06932926829268293, "grad_norm": 1.6757392883300781, "learning_rate": 1.9537804878048782e-05, "loss": 0.2473, "step": 5685 }, { "epoch": 0.06939024390243903, "grad_norm": 1.13664710521698, "learning_rate": 1.9537398373983743e-05, "loss": 0.1922, "step": 5690 }, { "epoch": 0.06945121951219513, "grad_norm": 1.849396824836731, "learning_rate": 1.95369918699187e-05, "loss": 0.2246, "step": 5695 }, { "epoch": 0.06951219512195123, "grad_norm": 0.9028438925743103, "learning_rate": 1.953658536585366e-05, "loss": 0.1918, "step": 5700 }, { "epoch": 0.06957317073170732, "grad_norm": 1.5957348346710205, "learning_rate": 1.9536178861788618e-05, "loss": 0.1868, "step": 5705 }, { "epoch": 0.06963414634146342, "grad_norm": 1.116076946258545, "learning_rate": 1.953577235772358e-05, "loss": 0.1747, "step": 5710 }, { "epoch": 0.06969512195121952, "grad_norm": 1.8066339492797852, "learning_rate": 1.9535365853658538e-05, "loss": 0.2171, "step": 5715 }, { "epoch": 0.06975609756097562, "grad_norm": 0.8785097599029541, "learning_rate": 1.95349593495935e-05, "loss": 0.2231, "step": 5720 }, { "epoch": 0.06981707317073171, "grad_norm": 0.9987844824790955, "learning_rate": 1.9534552845528457e-05, "loss": 0.2178, "step": 5725 }, { "epoch": 0.06987804878048781, "grad_norm": 1.4637370109558105, "learning_rate": 1.9534146341463415e-05, "loss": 0.1949, "step": 5730 }, { "epoch": 0.06993902439024391, "grad_norm": 0.8498075604438782, "learning_rate": 1.9533739837398377e-05, "loss": 0.2435, "step": 5735 }, { "epoch": 0.07, "grad_norm": 1.596023440361023, "learning_rate": 1.9533333333333335e-05, "loss": 0.2012, "step": 5740 }, { "epoch": 0.0700609756097561, "grad_norm": 2.4586005210876465, "learning_rate": 1.9532926829268296e-05, "loss": 0.208, "step": 5745 }, { "epoch": 0.0701219512195122, "grad_norm": 1.0734964609146118, "learning_rate": 1.9532520325203255e-05, "loss": 0.1642, "step": 5750 }, { "epoch": 0.0701829268292683, "grad_norm": 2.0992555618286133, "learning_rate": 1.9532113821138213e-05, "loss": 0.213, "step": 5755 }, { "epoch": 0.0702439024390244, "grad_norm": 1.0996967554092407, "learning_rate": 1.953170731707317e-05, "loss": 0.2323, "step": 5760 }, { "epoch": 0.0703048780487805, "grad_norm": 1.17047917842865, "learning_rate": 1.9531300813008132e-05, "loss": 0.2278, "step": 5765 }, { "epoch": 0.07036585365853658, "grad_norm": 0.9332453012466431, "learning_rate": 1.953089430894309e-05, "loss": 0.2228, "step": 5770 }, { "epoch": 0.07042682926829268, "grad_norm": 0.8464221954345703, "learning_rate": 1.9530487804878052e-05, "loss": 0.1699, "step": 5775 }, { "epoch": 0.07048780487804877, "grad_norm": 0.9029179811477661, "learning_rate": 1.953008130081301e-05, "loss": 0.2091, "step": 5780 }, { "epoch": 0.07054878048780487, "grad_norm": 1.1833710670471191, "learning_rate": 1.9529674796747968e-05, "loss": 0.2202, "step": 5785 }, { "epoch": 0.07060975609756097, "grad_norm": 1.8679014444351196, "learning_rate": 1.9529268292682926e-05, "loss": 0.2164, "step": 5790 }, { "epoch": 0.07067073170731707, "grad_norm": 1.1535508632659912, "learning_rate": 1.9528861788617888e-05, "loss": 0.1967, "step": 5795 }, { "epoch": 0.07073170731707316, "grad_norm": 1.4973281621932983, "learning_rate": 1.9528455284552846e-05, "loss": 0.2071, "step": 5800 }, { "epoch": 0.07079268292682926, "grad_norm": 1.3119977712631226, "learning_rate": 1.9528048780487808e-05, "loss": 0.2199, "step": 5805 }, { "epoch": 0.07085365853658536, "grad_norm": 1.3981627225875854, "learning_rate": 1.9527642276422766e-05, "loss": 0.2346, "step": 5810 }, { "epoch": 0.07091463414634146, "grad_norm": 2.176456928253174, "learning_rate": 1.9527235772357724e-05, "loss": 0.2675, "step": 5815 }, { "epoch": 0.07097560975609755, "grad_norm": 0.9641260504722595, "learning_rate": 1.9526829268292685e-05, "loss": 0.2491, "step": 5820 }, { "epoch": 0.07103658536585365, "grad_norm": 1.2384932041168213, "learning_rate": 1.9526422764227643e-05, "loss": 0.2075, "step": 5825 }, { "epoch": 0.07109756097560975, "grad_norm": 1.2298928499221802, "learning_rate": 1.9526016260162605e-05, "loss": 0.1697, "step": 5830 }, { "epoch": 0.07115853658536585, "grad_norm": 1.541123867034912, "learning_rate": 1.9525609756097563e-05, "loss": 0.2161, "step": 5835 }, { "epoch": 0.07121951219512195, "grad_norm": 1.6871858835220337, "learning_rate": 1.9525203252032525e-05, "loss": 0.1998, "step": 5840 }, { "epoch": 0.07128048780487804, "grad_norm": 0.8602230548858643, "learning_rate": 1.952479674796748e-05, "loss": 0.1761, "step": 5845 }, { "epoch": 0.07134146341463414, "grad_norm": 1.3540884256362915, "learning_rate": 1.952439024390244e-05, "loss": 0.2407, "step": 5850 }, { "epoch": 0.07140243902439024, "grad_norm": 1.3052583932876587, "learning_rate": 1.95239837398374e-05, "loss": 0.2313, "step": 5855 }, { "epoch": 0.07146341463414634, "grad_norm": 0.635140061378479, "learning_rate": 1.952357723577236e-05, "loss": 0.1532, "step": 5860 }, { "epoch": 0.07152439024390243, "grad_norm": 1.207486629486084, "learning_rate": 1.952317073170732e-05, "loss": 0.1679, "step": 5865 }, { "epoch": 0.07158536585365853, "grad_norm": 0.9512946009635925, "learning_rate": 1.952276422764228e-05, "loss": 0.1638, "step": 5870 }, { "epoch": 0.07164634146341463, "grad_norm": 1.427244782447815, "learning_rate": 1.9522357723577238e-05, "loss": 0.1939, "step": 5875 }, { "epoch": 0.07170731707317073, "grad_norm": 1.594229817390442, "learning_rate": 1.9521951219512196e-05, "loss": 0.2528, "step": 5880 }, { "epoch": 0.07176829268292682, "grad_norm": 0.9782878756523132, "learning_rate": 1.9521544715447155e-05, "loss": 0.21, "step": 5885 }, { "epoch": 0.07182926829268292, "grad_norm": 1.2968884706497192, "learning_rate": 1.9521138211382116e-05, "loss": 0.2235, "step": 5890 }, { "epoch": 0.07189024390243902, "grad_norm": 1.3600236177444458, "learning_rate": 1.9520731707317074e-05, "loss": 0.1735, "step": 5895 }, { "epoch": 0.07195121951219512, "grad_norm": 1.1811197996139526, "learning_rate": 1.9520325203252036e-05, "loss": 0.2091, "step": 5900 }, { "epoch": 0.07201219512195121, "grad_norm": 1.0719733238220215, "learning_rate": 1.9519918699186994e-05, "loss": 0.1631, "step": 5905 }, { "epoch": 0.07207317073170731, "grad_norm": 2.301990032196045, "learning_rate": 1.9519512195121952e-05, "loss": 0.2147, "step": 5910 }, { "epoch": 0.07213414634146341, "grad_norm": 1.0787192583084106, "learning_rate": 1.9519105691056913e-05, "loss": 0.167, "step": 5915 }, { "epoch": 0.0721951219512195, "grad_norm": 1.0314098596572876, "learning_rate": 1.951869918699187e-05, "loss": 0.2103, "step": 5920 }, { "epoch": 0.0722560975609756, "grad_norm": 0.9224969744682312, "learning_rate": 1.9518292682926833e-05, "loss": 0.1703, "step": 5925 }, { "epoch": 0.0723170731707317, "grad_norm": 0.9998944401741028, "learning_rate": 1.951788617886179e-05, "loss": 0.1914, "step": 5930 }, { "epoch": 0.0723780487804878, "grad_norm": 1.8903546333312988, "learning_rate": 1.951747967479675e-05, "loss": 0.305, "step": 5935 }, { "epoch": 0.0724390243902439, "grad_norm": 2.6577577590942383, "learning_rate": 1.9517073170731707e-05, "loss": 0.1684, "step": 5940 }, { "epoch": 0.0725, "grad_norm": 0.9777588844299316, "learning_rate": 1.951666666666667e-05, "loss": 0.211, "step": 5945 }, { "epoch": 0.07256097560975609, "grad_norm": 1.8251609802246094, "learning_rate": 1.9516260162601627e-05, "loss": 0.2021, "step": 5950 }, { "epoch": 0.07262195121951219, "grad_norm": 2.7486627101898193, "learning_rate": 1.951585365853659e-05, "loss": 0.2109, "step": 5955 }, { "epoch": 0.07268292682926829, "grad_norm": 1.1087238788604736, "learning_rate": 1.9515447154471547e-05, "loss": 0.1713, "step": 5960 }, { "epoch": 0.07274390243902439, "grad_norm": 1.1485934257507324, "learning_rate": 1.9515040650406505e-05, "loss": 0.2557, "step": 5965 }, { "epoch": 0.07280487804878048, "grad_norm": 0.8549350500106812, "learning_rate": 1.9514634146341463e-05, "loss": 0.1383, "step": 5970 }, { "epoch": 0.07286585365853658, "grad_norm": 1.9807463884353638, "learning_rate": 1.9514227642276425e-05, "loss": 0.2106, "step": 5975 }, { "epoch": 0.07292682926829268, "grad_norm": 1.2951231002807617, "learning_rate": 1.9513821138211383e-05, "loss": 0.183, "step": 5980 }, { "epoch": 0.07298780487804878, "grad_norm": 1.1457866430282593, "learning_rate": 1.9513414634146344e-05, "loss": 0.1731, "step": 5985 }, { "epoch": 0.07304878048780487, "grad_norm": 1.249071717262268, "learning_rate": 1.9513008130081302e-05, "loss": 0.1704, "step": 5990 }, { "epoch": 0.07310975609756097, "grad_norm": 2.223555088043213, "learning_rate": 1.951260162601626e-05, "loss": 0.1564, "step": 5995 }, { "epoch": 0.07317073170731707, "grad_norm": 1.1866366863250732, "learning_rate": 1.9512195121951222e-05, "loss": 0.1987, "step": 6000 }, { "epoch": 0.07323170731707317, "grad_norm": 2.8957812786102295, "learning_rate": 1.951178861788618e-05, "loss": 0.2438, "step": 6005 }, { "epoch": 0.07329268292682926, "grad_norm": 0.7425148487091064, "learning_rate": 1.951138211382114e-05, "loss": 0.1417, "step": 6010 }, { "epoch": 0.07335365853658536, "grad_norm": 1.1451812982559204, "learning_rate": 1.95109756097561e-05, "loss": 0.2041, "step": 6015 }, { "epoch": 0.07341463414634146, "grad_norm": 1.2600020170211792, "learning_rate": 1.951056910569106e-05, "loss": 0.1718, "step": 6020 }, { "epoch": 0.07347560975609756, "grad_norm": 1.3407421112060547, "learning_rate": 1.9510162601626016e-05, "loss": 0.2, "step": 6025 }, { "epoch": 0.07353658536585365, "grad_norm": 1.1161715984344482, "learning_rate": 1.9509756097560977e-05, "loss": 0.2017, "step": 6030 }, { "epoch": 0.07359756097560975, "grad_norm": 1.0937471389770508, "learning_rate": 1.9509349593495936e-05, "loss": 0.1814, "step": 6035 }, { "epoch": 0.07365853658536585, "grad_norm": 0.9002366065979004, "learning_rate": 1.9508943089430897e-05, "loss": 0.1897, "step": 6040 }, { "epoch": 0.07371951219512195, "grad_norm": 0.7867570519447327, "learning_rate": 1.9508536585365855e-05, "loss": 0.2051, "step": 6045 }, { "epoch": 0.07378048780487804, "grad_norm": 1.0601557493209839, "learning_rate": 1.9508130081300817e-05, "loss": 0.2219, "step": 6050 }, { "epoch": 0.07384146341463414, "grad_norm": 1.1567208766937256, "learning_rate": 1.950772357723577e-05, "loss": 0.2064, "step": 6055 }, { "epoch": 0.07390243902439024, "grad_norm": 1.2209492921829224, "learning_rate": 1.9507317073170733e-05, "loss": 0.1903, "step": 6060 }, { "epoch": 0.07396341463414634, "grad_norm": 0.8404496908187866, "learning_rate": 1.950691056910569e-05, "loss": 0.1905, "step": 6065 }, { "epoch": 0.07402439024390244, "grad_norm": 0.7720926403999329, "learning_rate": 1.9506504065040653e-05, "loss": 0.2241, "step": 6070 }, { "epoch": 0.07408536585365853, "grad_norm": 2.3734822273254395, "learning_rate": 1.950609756097561e-05, "loss": 0.1515, "step": 6075 }, { "epoch": 0.07414634146341463, "grad_norm": 0.8367041349411011, "learning_rate": 1.9505691056910572e-05, "loss": 0.157, "step": 6080 }, { "epoch": 0.07420731707317073, "grad_norm": 2.8215014934539795, "learning_rate": 1.950528455284553e-05, "loss": 0.2406, "step": 6085 }, { "epoch": 0.07426829268292683, "grad_norm": 1.593270182609558, "learning_rate": 1.950487804878049e-05, "loss": 0.1874, "step": 6090 }, { "epoch": 0.07432926829268292, "grad_norm": 1.1161048412322998, "learning_rate": 1.950447154471545e-05, "loss": 0.2007, "step": 6095 }, { "epoch": 0.07439024390243902, "grad_norm": 1.228432059288025, "learning_rate": 1.9504065040650408e-05, "loss": 0.1717, "step": 6100 }, { "epoch": 0.07445121951219512, "grad_norm": 2.4750938415527344, "learning_rate": 1.950365853658537e-05, "loss": 0.2353, "step": 6105 }, { "epoch": 0.07451219512195122, "grad_norm": 1.2000079154968262, "learning_rate": 1.9503252032520328e-05, "loss": 0.2043, "step": 6110 }, { "epoch": 0.07457317073170731, "grad_norm": 1.228704810142517, "learning_rate": 1.9502845528455286e-05, "loss": 0.1846, "step": 6115 }, { "epoch": 0.07463414634146341, "grad_norm": 2.3963799476623535, "learning_rate": 1.9502439024390244e-05, "loss": 0.1937, "step": 6120 }, { "epoch": 0.07469512195121951, "grad_norm": 0.7228688597679138, "learning_rate": 1.9502032520325206e-05, "loss": 0.2298, "step": 6125 }, { "epoch": 0.0747560975609756, "grad_norm": 1.3350638151168823, "learning_rate": 1.9501626016260164e-05, "loss": 0.2202, "step": 6130 }, { "epoch": 0.0748170731707317, "grad_norm": 0.9343914985656738, "learning_rate": 1.9501219512195125e-05, "loss": 0.1687, "step": 6135 }, { "epoch": 0.0748780487804878, "grad_norm": 0.8022004961967468, "learning_rate": 1.9500813008130083e-05, "loss": 0.188, "step": 6140 }, { "epoch": 0.0749390243902439, "grad_norm": 1.0067503452301025, "learning_rate": 1.950040650406504e-05, "loss": 0.195, "step": 6145 }, { "epoch": 0.075, "grad_norm": 0.908394455909729, "learning_rate": 1.95e-05, "loss": 0.2264, "step": 6150 }, { "epoch": 0.0750609756097561, "grad_norm": 0.7967893481254578, "learning_rate": 1.949959349593496e-05, "loss": 0.2046, "step": 6155 }, { "epoch": 0.07512195121951219, "grad_norm": 0.8775299787521362, "learning_rate": 1.949918699186992e-05, "loss": 0.2185, "step": 6160 }, { "epoch": 0.07518292682926829, "grad_norm": 2.6643941402435303, "learning_rate": 1.949878048780488e-05, "loss": 0.1966, "step": 6165 }, { "epoch": 0.07524390243902439, "grad_norm": 1.309914231300354, "learning_rate": 1.949837398373984e-05, "loss": 0.1914, "step": 6170 }, { "epoch": 0.07530487804878049, "grad_norm": 1.5475555658340454, "learning_rate": 1.9497967479674797e-05, "loss": 0.1989, "step": 6175 }, { "epoch": 0.07536585365853658, "grad_norm": 0.8922855257987976, "learning_rate": 1.949756097560976e-05, "loss": 0.199, "step": 6180 }, { "epoch": 0.07542682926829268, "grad_norm": 1.0412688255310059, "learning_rate": 1.9497154471544717e-05, "loss": 0.1852, "step": 6185 }, { "epoch": 0.07548780487804878, "grad_norm": 0.7703520655632019, "learning_rate": 1.9496747967479678e-05, "loss": 0.1532, "step": 6190 }, { "epoch": 0.07554878048780488, "grad_norm": 0.7280919551849365, "learning_rate": 1.9496341463414636e-05, "loss": 0.1856, "step": 6195 }, { "epoch": 0.07560975609756097, "grad_norm": 1.1446725130081177, "learning_rate": 1.9495934959349594e-05, "loss": 0.1994, "step": 6200 }, { "epoch": 0.07567073170731707, "grad_norm": 1.1172274351119995, "learning_rate": 1.9495528455284553e-05, "loss": 0.2207, "step": 6205 }, { "epoch": 0.07573170731707317, "grad_norm": 0.9720643758773804, "learning_rate": 1.9495121951219514e-05, "loss": 0.1918, "step": 6210 }, { "epoch": 0.07579268292682927, "grad_norm": 0.9295377731323242, "learning_rate": 1.9494715447154472e-05, "loss": 0.166, "step": 6215 }, { "epoch": 0.07585365853658536, "grad_norm": 1.301262617111206, "learning_rate": 1.9494308943089434e-05, "loss": 0.1571, "step": 6220 }, { "epoch": 0.07591463414634146, "grad_norm": 1.2347996234893799, "learning_rate": 1.9493902439024392e-05, "loss": 0.2249, "step": 6225 }, { "epoch": 0.07597560975609756, "grad_norm": 1.0828520059585571, "learning_rate": 1.9493495934959353e-05, "loss": 0.2279, "step": 6230 }, { "epoch": 0.07603658536585366, "grad_norm": 1.734541416168213, "learning_rate": 1.9493089430894308e-05, "loss": 0.209, "step": 6235 }, { "epoch": 0.07609756097560975, "grad_norm": 1.7527519464492798, "learning_rate": 1.949268292682927e-05, "loss": 0.2127, "step": 6240 }, { "epoch": 0.07615853658536585, "grad_norm": 1.4249022006988525, "learning_rate": 1.9492276422764228e-05, "loss": 0.1662, "step": 6245 }, { "epoch": 0.07621951219512195, "grad_norm": 0.9574075937271118, "learning_rate": 1.949186991869919e-05, "loss": 0.242, "step": 6250 }, { "epoch": 0.07628048780487805, "grad_norm": 0.9937894940376282, "learning_rate": 1.9491463414634147e-05, "loss": 0.1845, "step": 6255 }, { "epoch": 0.07634146341463414, "grad_norm": 1.133631706237793, "learning_rate": 1.949105691056911e-05, "loss": 0.2321, "step": 6260 }, { "epoch": 0.07640243902439024, "grad_norm": 1.1015970706939697, "learning_rate": 1.9490650406504067e-05, "loss": 0.1925, "step": 6265 }, { "epoch": 0.07646341463414634, "grad_norm": 1.3315821886062622, "learning_rate": 1.9490243902439025e-05, "loss": 0.176, "step": 6270 }, { "epoch": 0.07652439024390244, "grad_norm": 1.0007425546646118, "learning_rate": 1.9489837398373987e-05, "loss": 0.186, "step": 6275 }, { "epoch": 0.07658536585365854, "grad_norm": 0.8855968713760376, "learning_rate": 1.9489430894308945e-05, "loss": 0.2198, "step": 6280 }, { "epoch": 0.07664634146341463, "grad_norm": 0.9584681987762451, "learning_rate": 1.9489024390243906e-05, "loss": 0.1895, "step": 6285 }, { "epoch": 0.07670731707317073, "grad_norm": 0.7497948408126831, "learning_rate": 1.9488617886178864e-05, "loss": 0.1858, "step": 6290 }, { "epoch": 0.07676829268292683, "grad_norm": 0.8190793395042419, "learning_rate": 1.9488211382113823e-05, "loss": 0.1602, "step": 6295 }, { "epoch": 0.07682926829268293, "grad_norm": 1.0475132465362549, "learning_rate": 1.948780487804878e-05, "loss": 0.2733, "step": 6300 }, { "epoch": 0.07689024390243902, "grad_norm": 0.7184866070747375, "learning_rate": 1.9487398373983742e-05, "loss": 0.1566, "step": 6305 }, { "epoch": 0.07695121951219512, "grad_norm": 1.2056105136871338, "learning_rate": 1.94869918699187e-05, "loss": 0.1767, "step": 6310 }, { "epoch": 0.07701219512195122, "grad_norm": 1.7630027532577515, "learning_rate": 1.9486585365853662e-05, "loss": 0.1989, "step": 6315 }, { "epoch": 0.07707317073170732, "grad_norm": 1.7822225093841553, "learning_rate": 1.948617886178862e-05, "loss": 0.2307, "step": 6320 }, { "epoch": 0.07713414634146341, "grad_norm": 0.9021450877189636, "learning_rate": 1.9485772357723578e-05, "loss": 0.2488, "step": 6325 }, { "epoch": 0.07719512195121951, "grad_norm": 0.9662003517150879, "learning_rate": 1.9485365853658536e-05, "loss": 0.1709, "step": 6330 }, { "epoch": 0.07725609756097561, "grad_norm": 1.5350160598754883, "learning_rate": 1.9484959349593498e-05, "loss": 0.2344, "step": 6335 }, { "epoch": 0.0773170731707317, "grad_norm": 3.6134750843048096, "learning_rate": 1.9484552845528456e-05, "loss": 0.2052, "step": 6340 }, { "epoch": 0.0773780487804878, "grad_norm": 1.4335066080093384, "learning_rate": 1.9484146341463417e-05, "loss": 0.2532, "step": 6345 }, { "epoch": 0.0774390243902439, "grad_norm": 1.52924382686615, "learning_rate": 1.9483739837398376e-05, "loss": 0.1645, "step": 6350 }, { "epoch": 0.0775, "grad_norm": 1.2885639667510986, "learning_rate": 1.9483333333333334e-05, "loss": 0.1988, "step": 6355 }, { "epoch": 0.0775609756097561, "grad_norm": 0.8089710474014282, "learning_rate": 1.9482926829268295e-05, "loss": 0.1475, "step": 6360 }, { "epoch": 0.0776219512195122, "grad_norm": 1.1269980669021606, "learning_rate": 1.9482520325203253e-05, "loss": 0.1876, "step": 6365 }, { "epoch": 0.07768292682926829, "grad_norm": 1.8169288635253906, "learning_rate": 1.9482113821138215e-05, "loss": 0.2717, "step": 6370 }, { "epoch": 0.07774390243902439, "grad_norm": 1.9684611558914185, "learning_rate": 1.9481707317073173e-05, "loss": 0.1972, "step": 6375 }, { "epoch": 0.07780487804878049, "grad_norm": 1.0382462739944458, "learning_rate": 1.948130081300813e-05, "loss": 0.1768, "step": 6380 }, { "epoch": 0.07786585365853659, "grad_norm": 1.3596293926239014, "learning_rate": 1.948089430894309e-05, "loss": 0.1954, "step": 6385 }, { "epoch": 0.07792682926829268, "grad_norm": 1.1067713499069214, "learning_rate": 1.948048780487805e-05, "loss": 0.2127, "step": 6390 }, { "epoch": 0.07798780487804878, "grad_norm": 0.9609337449073792, "learning_rate": 1.948008130081301e-05, "loss": 0.1892, "step": 6395 }, { "epoch": 0.07804878048780488, "grad_norm": 1.0901367664337158, "learning_rate": 1.947967479674797e-05, "loss": 0.2131, "step": 6400 }, { "epoch": 0.07810975609756098, "grad_norm": 0.6361771821975708, "learning_rate": 1.947926829268293e-05, "loss": 0.165, "step": 6405 }, { "epoch": 0.07817073170731707, "grad_norm": 1.8959449529647827, "learning_rate": 1.947886178861789e-05, "loss": 0.2034, "step": 6410 }, { "epoch": 0.07823170731707317, "grad_norm": 1.062811255455017, "learning_rate": 1.9478455284552845e-05, "loss": 0.2164, "step": 6415 }, { "epoch": 0.07829268292682927, "grad_norm": 1.3320800065994263, "learning_rate": 1.9478048780487806e-05, "loss": 0.2185, "step": 6420 }, { "epoch": 0.07835365853658537, "grad_norm": 2.118178606033325, "learning_rate": 1.9477642276422764e-05, "loss": 0.1886, "step": 6425 }, { "epoch": 0.07841463414634146, "grad_norm": 0.6408555507659912, "learning_rate": 1.9477235772357726e-05, "loss": 0.1986, "step": 6430 }, { "epoch": 0.07847560975609756, "grad_norm": 1.0601708889007568, "learning_rate": 1.9476829268292684e-05, "loss": 0.2079, "step": 6435 }, { "epoch": 0.07853658536585366, "grad_norm": 1.1027470827102661, "learning_rate": 1.9476422764227646e-05, "loss": 0.2018, "step": 6440 }, { "epoch": 0.07859756097560976, "grad_norm": 1.2896451950073242, "learning_rate": 1.9476016260162604e-05, "loss": 0.1906, "step": 6445 }, { "epoch": 0.07865853658536585, "grad_norm": 1.0036288499832153, "learning_rate": 1.9475609756097562e-05, "loss": 0.2135, "step": 6450 }, { "epoch": 0.07871951219512195, "grad_norm": 1.165128231048584, "learning_rate": 1.9475203252032523e-05, "loss": 0.1983, "step": 6455 }, { "epoch": 0.07878048780487805, "grad_norm": 1.110105276107788, "learning_rate": 1.947479674796748e-05, "loss": 0.2045, "step": 6460 }, { "epoch": 0.07884146341463415, "grad_norm": 0.9482144117355347, "learning_rate": 1.947439024390244e-05, "loss": 0.2699, "step": 6465 }, { "epoch": 0.07890243902439024, "grad_norm": 1.096529483795166, "learning_rate": 1.94739837398374e-05, "loss": 0.2305, "step": 6470 }, { "epoch": 0.07896341463414634, "grad_norm": 1.1739745140075684, "learning_rate": 1.947357723577236e-05, "loss": 0.1871, "step": 6475 }, { "epoch": 0.07902439024390244, "grad_norm": 0.8584933280944824, "learning_rate": 1.9473170731707317e-05, "loss": 0.1513, "step": 6480 }, { "epoch": 0.07908536585365854, "grad_norm": 1.041733741760254, "learning_rate": 1.947276422764228e-05, "loss": 0.1527, "step": 6485 }, { "epoch": 0.07914634146341464, "grad_norm": 0.8942210078239441, "learning_rate": 1.9472357723577237e-05, "loss": 0.1804, "step": 6490 }, { "epoch": 0.07920731707317073, "grad_norm": 6.6318840980529785, "learning_rate": 1.94719512195122e-05, "loss": 0.2282, "step": 6495 }, { "epoch": 0.07926829268292683, "grad_norm": 0.798173725605011, "learning_rate": 1.9471544715447157e-05, "loss": 0.2324, "step": 6500 }, { "epoch": 0.07932926829268293, "grad_norm": 2.4716975688934326, "learning_rate": 1.9471138211382115e-05, "loss": 0.178, "step": 6505 }, { "epoch": 0.07939024390243903, "grad_norm": 1.3148781061172485, "learning_rate": 1.9470731707317073e-05, "loss": 0.2574, "step": 6510 }, { "epoch": 0.07945121951219512, "grad_norm": 0.9021301865577698, "learning_rate": 1.9470325203252034e-05, "loss": 0.1509, "step": 6515 }, { "epoch": 0.07951219512195122, "grad_norm": 1.5369223356246948, "learning_rate": 1.9469918699186993e-05, "loss": 0.1523, "step": 6520 }, { "epoch": 0.07957317073170732, "grad_norm": 1.3019096851348877, "learning_rate": 1.9469512195121954e-05, "loss": 0.1843, "step": 6525 }, { "epoch": 0.07963414634146342, "grad_norm": 1.0527772903442383, "learning_rate": 1.9469105691056912e-05, "loss": 0.1494, "step": 6530 }, { "epoch": 0.07969512195121951, "grad_norm": 1.666675329208374, "learning_rate": 1.946869918699187e-05, "loss": 0.2, "step": 6535 }, { "epoch": 0.07975609756097561, "grad_norm": 1.0060925483703613, "learning_rate": 1.9468292682926832e-05, "loss": 0.1741, "step": 6540 }, { "epoch": 0.07981707317073171, "grad_norm": 2.3004753589630127, "learning_rate": 1.946788617886179e-05, "loss": 0.2113, "step": 6545 }, { "epoch": 0.0798780487804878, "grad_norm": 2.2836689949035645, "learning_rate": 1.946747967479675e-05, "loss": 0.1713, "step": 6550 }, { "epoch": 0.0799390243902439, "grad_norm": 1.0601335763931274, "learning_rate": 1.946707317073171e-05, "loss": 0.1455, "step": 6555 }, { "epoch": 0.08, "grad_norm": 1.0653789043426514, "learning_rate": 1.9466666666666668e-05, "loss": 0.1779, "step": 6560 }, { "epoch": 0.0800609756097561, "grad_norm": 1.7088991403579712, "learning_rate": 1.9466260162601626e-05, "loss": 0.2111, "step": 6565 }, { "epoch": 0.0801219512195122, "grad_norm": 1.068859577178955, "learning_rate": 1.9465853658536587e-05, "loss": 0.1784, "step": 6570 }, { "epoch": 0.0801829268292683, "grad_norm": 0.8103311657905579, "learning_rate": 1.9465447154471546e-05, "loss": 0.1547, "step": 6575 }, { "epoch": 0.08024390243902439, "grad_norm": 0.8760961294174194, "learning_rate": 1.9465040650406507e-05, "loss": 0.2568, "step": 6580 }, { "epoch": 0.08030487804878049, "grad_norm": 9.242572784423828, "learning_rate": 1.9464634146341465e-05, "loss": 0.1805, "step": 6585 }, { "epoch": 0.08036585365853659, "grad_norm": 1.787778615951538, "learning_rate": 1.9464227642276427e-05, "loss": 0.2671, "step": 6590 }, { "epoch": 0.08042682926829268, "grad_norm": 5.511713027954102, "learning_rate": 1.946382113821138e-05, "loss": 0.1875, "step": 6595 }, { "epoch": 0.08048780487804878, "grad_norm": 1.5316270589828491, "learning_rate": 1.9463414634146343e-05, "loss": 0.2644, "step": 6600 }, { "epoch": 0.08054878048780488, "grad_norm": 1.1427967548370361, "learning_rate": 1.94630081300813e-05, "loss": 0.2174, "step": 6605 }, { "epoch": 0.08060975609756098, "grad_norm": 0.9195204973220825, "learning_rate": 1.9462601626016263e-05, "loss": 0.1838, "step": 6610 }, { "epoch": 0.08067073170731708, "grad_norm": 0.8655902743339539, "learning_rate": 1.946219512195122e-05, "loss": 0.1725, "step": 6615 }, { "epoch": 0.08073170731707317, "grad_norm": 2.8377420902252197, "learning_rate": 1.9461788617886182e-05, "loss": 0.2317, "step": 6620 }, { "epoch": 0.08079268292682927, "grad_norm": 1.0592936277389526, "learning_rate": 1.946138211382114e-05, "loss": 0.1487, "step": 6625 }, { "epoch": 0.08085365853658537, "grad_norm": 1.6256887912750244, "learning_rate": 1.94609756097561e-05, "loss": 0.1984, "step": 6630 }, { "epoch": 0.08091463414634147, "grad_norm": 1.053801417350769, "learning_rate": 1.946056910569106e-05, "loss": 0.1942, "step": 6635 }, { "epoch": 0.08097560975609756, "grad_norm": 1.8450379371643066, "learning_rate": 1.9460162601626018e-05, "loss": 0.1675, "step": 6640 }, { "epoch": 0.08103658536585366, "grad_norm": 1.7467596530914307, "learning_rate": 1.9459756097560976e-05, "loss": 0.2299, "step": 6645 }, { "epoch": 0.08109756097560976, "grad_norm": 0.8572527766227722, "learning_rate": 1.9459349593495938e-05, "loss": 0.1596, "step": 6650 }, { "epoch": 0.08115853658536586, "grad_norm": 0.6397969722747803, "learning_rate": 1.9458943089430896e-05, "loss": 0.1545, "step": 6655 }, { "epoch": 0.08121951219512195, "grad_norm": 1.405145287513733, "learning_rate": 1.9458536585365854e-05, "loss": 0.1842, "step": 6660 }, { "epoch": 0.08128048780487805, "grad_norm": 0.8951712250709534, "learning_rate": 1.9458130081300816e-05, "loss": 0.1751, "step": 6665 }, { "epoch": 0.08134146341463415, "grad_norm": 2.709502696990967, "learning_rate": 1.9457723577235774e-05, "loss": 0.234, "step": 6670 }, { "epoch": 0.08140243902439025, "grad_norm": 1.8466001749038696, "learning_rate": 1.9457317073170735e-05, "loss": 0.2498, "step": 6675 }, { "epoch": 0.08146341463414634, "grad_norm": 0.7329550981521606, "learning_rate": 1.9456910569105693e-05, "loss": 0.1516, "step": 6680 }, { "epoch": 0.08152439024390244, "grad_norm": 0.7830443978309631, "learning_rate": 1.945650406504065e-05, "loss": 0.1769, "step": 6685 }, { "epoch": 0.08158536585365854, "grad_norm": 0.852921187877655, "learning_rate": 1.945609756097561e-05, "loss": 0.2178, "step": 6690 }, { "epoch": 0.08164634146341464, "grad_norm": 1.4444727897644043, "learning_rate": 1.945569105691057e-05, "loss": 0.2155, "step": 6695 }, { "epoch": 0.08170731707317073, "grad_norm": 1.3333208560943604, "learning_rate": 1.945528455284553e-05, "loss": 0.1749, "step": 6700 }, { "epoch": 0.08176829268292683, "grad_norm": 2.827025890350342, "learning_rate": 1.945487804878049e-05, "loss": 0.2101, "step": 6705 }, { "epoch": 0.08182926829268293, "grad_norm": 0.884382426738739, "learning_rate": 1.945447154471545e-05, "loss": 0.1479, "step": 6710 }, { "epoch": 0.08189024390243903, "grad_norm": 2.426809310913086, "learning_rate": 1.9454065040650407e-05, "loss": 0.1648, "step": 6715 }, { "epoch": 0.08195121951219513, "grad_norm": 0.8838247656822205, "learning_rate": 1.945365853658537e-05, "loss": 0.2079, "step": 6720 }, { "epoch": 0.08201219512195122, "grad_norm": 0.8352567553520203, "learning_rate": 1.9453252032520327e-05, "loss": 0.1655, "step": 6725 }, { "epoch": 0.08207317073170732, "grad_norm": 0.9802960753440857, "learning_rate": 1.9452845528455285e-05, "loss": 0.2005, "step": 6730 }, { "epoch": 0.08213414634146342, "grad_norm": 0.9636924862861633, "learning_rate": 1.9452439024390246e-05, "loss": 0.2096, "step": 6735 }, { "epoch": 0.08219512195121952, "grad_norm": 1.2587701082229614, "learning_rate": 1.9452032520325204e-05, "loss": 0.1684, "step": 6740 }, { "epoch": 0.08225609756097561, "grad_norm": 1.5013996362686157, "learning_rate": 1.9451626016260163e-05, "loss": 0.1825, "step": 6745 }, { "epoch": 0.08231707317073171, "grad_norm": 1.0785996913909912, "learning_rate": 1.9451219512195124e-05, "loss": 0.1969, "step": 6750 }, { "epoch": 0.08237804878048781, "grad_norm": 2.106999158859253, "learning_rate": 1.9450813008130082e-05, "loss": 0.1872, "step": 6755 }, { "epoch": 0.0824390243902439, "grad_norm": 0.7604215741157532, "learning_rate": 1.9450406504065044e-05, "loss": 0.2367, "step": 6760 }, { "epoch": 0.0825, "grad_norm": 2.167527675628662, "learning_rate": 1.9450000000000002e-05, "loss": 0.1634, "step": 6765 }, { "epoch": 0.0825609756097561, "grad_norm": 0.8682030439376831, "learning_rate": 1.9449593495934963e-05, "loss": 0.185, "step": 6770 }, { "epoch": 0.0826219512195122, "grad_norm": 1.4318273067474365, "learning_rate": 1.9449186991869918e-05, "loss": 0.2092, "step": 6775 }, { "epoch": 0.0826829268292683, "grad_norm": 1.0910234451293945, "learning_rate": 1.944878048780488e-05, "loss": 0.1735, "step": 6780 }, { "epoch": 0.0827439024390244, "grad_norm": 0.8231720328330994, "learning_rate": 1.9448373983739838e-05, "loss": 0.1781, "step": 6785 }, { "epoch": 0.08280487804878049, "grad_norm": 0.8391971588134766, "learning_rate": 1.94479674796748e-05, "loss": 0.1684, "step": 6790 }, { "epoch": 0.08286585365853659, "grad_norm": 1.1108360290527344, "learning_rate": 1.9447560975609757e-05, "loss": 0.2009, "step": 6795 }, { "epoch": 0.08292682926829269, "grad_norm": 1.069440484046936, "learning_rate": 1.944715447154472e-05, "loss": 0.1781, "step": 6800 }, { "epoch": 0.08298780487804878, "grad_norm": 1.6032190322875977, "learning_rate": 1.9446747967479677e-05, "loss": 0.1878, "step": 6805 }, { "epoch": 0.08304878048780488, "grad_norm": 1.163903832435608, "learning_rate": 1.9446341463414635e-05, "loss": 0.2546, "step": 6810 }, { "epoch": 0.08310975609756098, "grad_norm": 1.3591874837875366, "learning_rate": 1.9445934959349597e-05, "loss": 0.1943, "step": 6815 }, { "epoch": 0.08317073170731708, "grad_norm": 1.7338767051696777, "learning_rate": 1.9445528455284555e-05, "loss": 0.2249, "step": 6820 }, { "epoch": 0.08323170731707318, "grad_norm": 1.0810542106628418, "learning_rate": 1.9445121951219513e-05, "loss": 0.2038, "step": 6825 }, { "epoch": 0.08329268292682927, "grad_norm": 0.8793007731437683, "learning_rate": 1.9444715447154474e-05, "loss": 0.1567, "step": 6830 }, { "epoch": 0.08335365853658537, "grad_norm": 0.7714931964874268, "learning_rate": 1.9444308943089433e-05, "loss": 0.1329, "step": 6835 }, { "epoch": 0.08341463414634147, "grad_norm": 1.923449993133545, "learning_rate": 1.944390243902439e-05, "loss": 0.1434, "step": 6840 }, { "epoch": 0.08347560975609757, "grad_norm": 0.9571212530136108, "learning_rate": 1.9443495934959352e-05, "loss": 0.1959, "step": 6845 }, { "epoch": 0.08353658536585366, "grad_norm": 3.1785988807678223, "learning_rate": 1.944308943089431e-05, "loss": 0.1716, "step": 6850 }, { "epoch": 0.08359756097560976, "grad_norm": 1.0623259544372559, "learning_rate": 1.9442682926829272e-05, "loss": 0.2033, "step": 6855 }, { "epoch": 0.08365853658536586, "grad_norm": 2.1349499225616455, "learning_rate": 1.944227642276423e-05, "loss": 0.2227, "step": 6860 }, { "epoch": 0.08371951219512196, "grad_norm": 0.8524227738380432, "learning_rate": 1.9441869918699188e-05, "loss": 0.1923, "step": 6865 }, { "epoch": 0.08378048780487805, "grad_norm": 2.5040712356567383, "learning_rate": 1.9441463414634146e-05, "loss": 0.1996, "step": 6870 }, { "epoch": 0.08384146341463415, "grad_norm": 0.8095781803131104, "learning_rate": 1.9441056910569108e-05, "loss": 0.1547, "step": 6875 }, { "epoch": 0.08390243902439025, "grad_norm": 0.828519880771637, "learning_rate": 1.9440650406504066e-05, "loss": 0.1735, "step": 6880 }, { "epoch": 0.08396341463414635, "grad_norm": 1.26097571849823, "learning_rate": 1.9440243902439027e-05, "loss": 0.1844, "step": 6885 }, { "epoch": 0.08402439024390244, "grad_norm": 1.7340044975280762, "learning_rate": 1.9439837398373986e-05, "loss": 0.2012, "step": 6890 }, { "epoch": 0.08408536585365854, "grad_norm": 0.8851368427276611, "learning_rate": 1.9439430894308944e-05, "loss": 0.1438, "step": 6895 }, { "epoch": 0.08414634146341464, "grad_norm": 2.764272451400757, "learning_rate": 1.9439024390243905e-05, "loss": 0.2227, "step": 6900 }, { "epoch": 0.08420731707317074, "grad_norm": 1.7267827987670898, "learning_rate": 1.9438617886178863e-05, "loss": 0.2497, "step": 6905 }, { "epoch": 0.08426829268292683, "grad_norm": 1.04621160030365, "learning_rate": 1.943821138211382e-05, "loss": 0.1999, "step": 6910 }, { "epoch": 0.08432926829268293, "grad_norm": 1.149672508239746, "learning_rate": 1.9437804878048783e-05, "loss": 0.1753, "step": 6915 }, { "epoch": 0.08439024390243903, "grad_norm": 0.9873383641242981, "learning_rate": 1.943739837398374e-05, "loss": 0.1975, "step": 6920 }, { "epoch": 0.08445121951219513, "grad_norm": 0.680482029914856, "learning_rate": 1.94369918699187e-05, "loss": 0.2064, "step": 6925 }, { "epoch": 0.08451219512195123, "grad_norm": 1.357608675956726, "learning_rate": 1.943658536585366e-05, "loss": 0.2336, "step": 6930 }, { "epoch": 0.08457317073170732, "grad_norm": 1.1136524677276611, "learning_rate": 1.943617886178862e-05, "loss": 0.1515, "step": 6935 }, { "epoch": 0.08463414634146342, "grad_norm": 0.5457839369773865, "learning_rate": 1.943577235772358e-05, "loss": 0.2236, "step": 6940 }, { "epoch": 0.08469512195121952, "grad_norm": 1.226995825767517, "learning_rate": 1.943536585365854e-05, "loss": 0.1513, "step": 6945 }, { "epoch": 0.08475609756097562, "grad_norm": 0.9488670825958252, "learning_rate": 1.94349593495935e-05, "loss": 0.1625, "step": 6950 }, { "epoch": 0.08481707317073171, "grad_norm": 1.150247573852539, "learning_rate": 1.9434552845528455e-05, "loss": 0.2336, "step": 6955 }, { "epoch": 0.08487804878048781, "grad_norm": 1.7599718570709229, "learning_rate": 1.9434146341463416e-05, "loss": 0.1721, "step": 6960 }, { "epoch": 0.08493902439024391, "grad_norm": 0.9066707491874695, "learning_rate": 1.9433739837398374e-05, "loss": 0.1908, "step": 6965 }, { "epoch": 0.085, "grad_norm": 1.0383864641189575, "learning_rate": 1.9433333333333336e-05, "loss": 0.1724, "step": 6970 }, { "epoch": 0.0850609756097561, "grad_norm": 1.1590602397918701, "learning_rate": 1.9432926829268294e-05, "loss": 0.2192, "step": 6975 }, { "epoch": 0.0851219512195122, "grad_norm": 1.3828835487365723, "learning_rate": 1.9432520325203256e-05, "loss": 0.1745, "step": 6980 }, { "epoch": 0.0851829268292683, "grad_norm": 0.6503394842147827, "learning_rate": 1.9432113821138214e-05, "loss": 0.151, "step": 6985 }, { "epoch": 0.0852439024390244, "grad_norm": 1.4251254796981812, "learning_rate": 1.9431707317073172e-05, "loss": 0.1942, "step": 6990 }, { "epoch": 0.0853048780487805, "grad_norm": 1.2795817852020264, "learning_rate": 1.943130081300813e-05, "loss": 0.2111, "step": 6995 }, { "epoch": 0.08536585365853659, "grad_norm": 1.3484957218170166, "learning_rate": 1.943089430894309e-05, "loss": 0.1845, "step": 7000 }, { "epoch": 0.08542682926829269, "grad_norm": 1.587546706199646, "learning_rate": 1.943048780487805e-05, "loss": 0.1611, "step": 7005 }, { "epoch": 0.08548780487804879, "grad_norm": 0.9559614658355713, "learning_rate": 1.943008130081301e-05, "loss": 0.1707, "step": 7010 }, { "epoch": 0.08554878048780488, "grad_norm": 1.8392267227172852, "learning_rate": 1.942967479674797e-05, "loss": 0.2452, "step": 7015 }, { "epoch": 0.08560975609756098, "grad_norm": 0.8359034061431885, "learning_rate": 1.9429268292682927e-05, "loss": 0.1741, "step": 7020 }, { "epoch": 0.08567073170731708, "grad_norm": 1.2502855062484741, "learning_rate": 1.942886178861789e-05, "loss": 0.2, "step": 7025 }, { "epoch": 0.08573170731707318, "grad_norm": 1.3272908926010132, "learning_rate": 1.9428455284552847e-05, "loss": 0.2116, "step": 7030 }, { "epoch": 0.08579268292682928, "grad_norm": 2.272817611694336, "learning_rate": 1.942804878048781e-05, "loss": 0.2329, "step": 7035 }, { "epoch": 0.08585365853658537, "grad_norm": 0.8257755637168884, "learning_rate": 1.9427642276422767e-05, "loss": 0.1762, "step": 7040 }, { "epoch": 0.08591463414634147, "grad_norm": 2.1659505367279053, "learning_rate": 1.9427235772357725e-05, "loss": 0.1759, "step": 7045 }, { "epoch": 0.08597560975609755, "grad_norm": 0.8609198331832886, "learning_rate": 1.9426829268292683e-05, "loss": 0.2063, "step": 7050 }, { "epoch": 0.08603658536585365, "grad_norm": 1.3022924661636353, "learning_rate": 1.9426422764227644e-05, "loss": 0.1913, "step": 7055 }, { "epoch": 0.08609756097560975, "grad_norm": 0.7129493355751038, "learning_rate": 1.9426016260162603e-05, "loss": 0.1405, "step": 7060 }, { "epoch": 0.08615853658536585, "grad_norm": 0.7668561935424805, "learning_rate": 1.9425609756097564e-05, "loss": 0.1669, "step": 7065 }, { "epoch": 0.08621951219512194, "grad_norm": 1.1845217943191528, "learning_rate": 1.9425203252032522e-05, "loss": 0.1799, "step": 7070 }, { "epoch": 0.08628048780487804, "grad_norm": 1.167540192604065, "learning_rate": 1.942479674796748e-05, "loss": 0.2156, "step": 7075 }, { "epoch": 0.08634146341463414, "grad_norm": 0.7429106831550598, "learning_rate": 1.9424390243902442e-05, "loss": 0.1519, "step": 7080 }, { "epoch": 0.08640243902439024, "grad_norm": 1.0486732721328735, "learning_rate": 1.94239837398374e-05, "loss": 0.239, "step": 7085 }, { "epoch": 0.08646341463414633, "grad_norm": 1.6273690462112427, "learning_rate": 1.9423577235772358e-05, "loss": 0.166, "step": 7090 }, { "epoch": 0.08652439024390243, "grad_norm": 0.9691095352172852, "learning_rate": 1.942317073170732e-05, "loss": 0.2547, "step": 7095 }, { "epoch": 0.08658536585365853, "grad_norm": 0.9184445142745972, "learning_rate": 1.9422764227642278e-05, "loss": 0.1793, "step": 7100 }, { "epoch": 0.08664634146341463, "grad_norm": 2.317979335784912, "learning_rate": 1.9422357723577236e-05, "loss": 0.2029, "step": 7105 }, { "epoch": 0.08670731707317073, "grad_norm": 1.5476975440979004, "learning_rate": 1.9421951219512197e-05, "loss": 0.2445, "step": 7110 }, { "epoch": 0.08676829268292682, "grad_norm": 0.908139169216156, "learning_rate": 1.9421544715447155e-05, "loss": 0.1314, "step": 7115 }, { "epoch": 0.08682926829268292, "grad_norm": 2.411116361618042, "learning_rate": 1.9421138211382117e-05, "loss": 0.1999, "step": 7120 }, { "epoch": 0.08689024390243902, "grad_norm": 1.2639046907424927, "learning_rate": 1.9420731707317075e-05, "loss": 0.165, "step": 7125 }, { "epoch": 0.08695121951219512, "grad_norm": 1.2630318403244019, "learning_rate": 1.9420325203252037e-05, "loss": 0.1833, "step": 7130 }, { "epoch": 0.08701219512195121, "grad_norm": 0.9501336216926575, "learning_rate": 1.941991869918699e-05, "loss": 0.2369, "step": 7135 }, { "epoch": 0.08707317073170731, "grad_norm": 0.606437087059021, "learning_rate": 1.9419512195121953e-05, "loss": 0.1485, "step": 7140 }, { "epoch": 0.08713414634146341, "grad_norm": 1.0177257061004639, "learning_rate": 1.941910569105691e-05, "loss": 0.1803, "step": 7145 }, { "epoch": 0.0871951219512195, "grad_norm": 0.7530302405357361, "learning_rate": 1.9418699186991873e-05, "loss": 0.1704, "step": 7150 }, { "epoch": 0.0872560975609756, "grad_norm": 1.4764378070831299, "learning_rate": 1.941829268292683e-05, "loss": 0.1545, "step": 7155 }, { "epoch": 0.0873170731707317, "grad_norm": 2.1884090900421143, "learning_rate": 1.9417886178861792e-05, "loss": 0.2156, "step": 7160 }, { "epoch": 0.0873780487804878, "grad_norm": 0.9971258044242859, "learning_rate": 1.941747967479675e-05, "loss": 0.1922, "step": 7165 }, { "epoch": 0.0874390243902439, "grad_norm": 1.0486899614334106, "learning_rate": 1.941707317073171e-05, "loss": 0.1729, "step": 7170 }, { "epoch": 0.0875, "grad_norm": 1.0848677158355713, "learning_rate": 1.9416666666666667e-05, "loss": 0.1733, "step": 7175 }, { "epoch": 0.08756097560975609, "grad_norm": 0.995051383972168, "learning_rate": 1.9416260162601628e-05, "loss": 0.1656, "step": 7180 }, { "epoch": 0.08762195121951219, "grad_norm": 1.0620391368865967, "learning_rate": 1.9415853658536586e-05, "loss": 0.1405, "step": 7185 }, { "epoch": 0.08768292682926829, "grad_norm": 3.4777438640594482, "learning_rate": 1.9415447154471548e-05, "loss": 0.1521, "step": 7190 }, { "epoch": 0.08774390243902438, "grad_norm": 1.6805171966552734, "learning_rate": 1.9415040650406506e-05, "loss": 0.1583, "step": 7195 }, { "epoch": 0.08780487804878048, "grad_norm": 1.1381417512893677, "learning_rate": 1.9414634146341464e-05, "loss": 0.2003, "step": 7200 }, { "epoch": 0.08786585365853658, "grad_norm": 0.9317860007286072, "learning_rate": 1.9414227642276425e-05, "loss": 0.1541, "step": 7205 }, { "epoch": 0.08792682926829268, "grad_norm": 0.7349812984466553, "learning_rate": 1.9413821138211384e-05, "loss": 0.1902, "step": 7210 }, { "epoch": 0.08798780487804878, "grad_norm": 1.5391911268234253, "learning_rate": 1.9413414634146345e-05, "loss": 0.2687, "step": 7215 }, { "epoch": 0.08804878048780487, "grad_norm": 0.8151683807373047, "learning_rate": 1.9413008130081303e-05, "loss": 0.1541, "step": 7220 }, { "epoch": 0.08810975609756097, "grad_norm": 1.0798836946487427, "learning_rate": 1.941260162601626e-05, "loss": 0.1907, "step": 7225 }, { "epoch": 0.08817073170731707, "grad_norm": 1.5233508348464966, "learning_rate": 1.941219512195122e-05, "loss": 0.213, "step": 7230 }, { "epoch": 0.08823170731707317, "grad_norm": 1.1584773063659668, "learning_rate": 1.941178861788618e-05, "loss": 0.2328, "step": 7235 }, { "epoch": 0.08829268292682926, "grad_norm": 1.1300652027130127, "learning_rate": 1.941138211382114e-05, "loss": 0.1747, "step": 7240 }, { "epoch": 0.08835365853658536, "grad_norm": 1.5524941682815552, "learning_rate": 1.94109756097561e-05, "loss": 0.1965, "step": 7245 }, { "epoch": 0.08841463414634146, "grad_norm": 0.8937903642654419, "learning_rate": 1.941056910569106e-05, "loss": 0.1682, "step": 7250 }, { "epoch": 0.08847560975609756, "grad_norm": 1.1921125650405884, "learning_rate": 1.9410162601626017e-05, "loss": 0.1607, "step": 7255 }, { "epoch": 0.08853658536585365, "grad_norm": 2.0416812896728516, "learning_rate": 1.9409756097560975e-05, "loss": 0.2336, "step": 7260 }, { "epoch": 0.08859756097560975, "grad_norm": 1.3744733333587646, "learning_rate": 1.9409349593495937e-05, "loss": 0.1809, "step": 7265 }, { "epoch": 0.08865853658536585, "grad_norm": 0.942028820514679, "learning_rate": 1.9408943089430895e-05, "loss": 0.1896, "step": 7270 }, { "epoch": 0.08871951219512195, "grad_norm": 1.0523154735565186, "learning_rate": 1.9408536585365856e-05, "loss": 0.1769, "step": 7275 }, { "epoch": 0.08878048780487804, "grad_norm": 1.2048817873001099, "learning_rate": 1.9408130081300814e-05, "loss": 0.2347, "step": 7280 }, { "epoch": 0.08884146341463414, "grad_norm": 0.8339489698410034, "learning_rate": 1.9407723577235772e-05, "loss": 0.1779, "step": 7285 }, { "epoch": 0.08890243902439024, "grad_norm": 2.56325364112854, "learning_rate": 1.9407317073170734e-05, "loss": 0.17, "step": 7290 }, { "epoch": 0.08896341463414634, "grad_norm": 1.573581576347351, "learning_rate": 1.9406910569105692e-05, "loss": 0.1716, "step": 7295 }, { "epoch": 0.08902439024390243, "grad_norm": 1.5501142740249634, "learning_rate": 1.9406504065040654e-05, "loss": 0.1549, "step": 7300 }, { "epoch": 0.08908536585365853, "grad_norm": 1.3017897605895996, "learning_rate": 1.9406097560975612e-05, "loss": 0.2299, "step": 7305 }, { "epoch": 0.08914634146341463, "grad_norm": 3.704754114151001, "learning_rate": 1.9405691056910573e-05, "loss": 0.175, "step": 7310 }, { "epoch": 0.08920731707317073, "grad_norm": 0.8603552579879761, "learning_rate": 1.9405284552845528e-05, "loss": 0.1632, "step": 7315 }, { "epoch": 0.08926829268292683, "grad_norm": 1.1470894813537598, "learning_rate": 1.940487804878049e-05, "loss": 0.2286, "step": 7320 }, { "epoch": 0.08932926829268292, "grad_norm": 1.4449764490127563, "learning_rate": 1.9404471544715448e-05, "loss": 0.1824, "step": 7325 }, { "epoch": 0.08939024390243902, "grad_norm": 0.9761942625045776, "learning_rate": 1.940406504065041e-05, "loss": 0.1822, "step": 7330 }, { "epoch": 0.08945121951219512, "grad_norm": 1.8531566858291626, "learning_rate": 1.9403658536585367e-05, "loss": 0.196, "step": 7335 }, { "epoch": 0.08951219512195122, "grad_norm": 1.391287088394165, "learning_rate": 1.940325203252033e-05, "loss": 0.1828, "step": 7340 }, { "epoch": 0.08957317073170731, "grad_norm": 1.827877163887024, "learning_rate": 1.9402845528455284e-05, "loss": 0.1812, "step": 7345 }, { "epoch": 0.08963414634146341, "grad_norm": 1.2254709005355835, "learning_rate": 1.9402439024390245e-05, "loss": 0.2161, "step": 7350 }, { "epoch": 0.08969512195121951, "grad_norm": 1.2553927898406982, "learning_rate": 1.9402032520325203e-05, "loss": 0.2274, "step": 7355 }, { "epoch": 0.0897560975609756, "grad_norm": 0.7229008078575134, "learning_rate": 1.9401626016260165e-05, "loss": 0.1894, "step": 7360 }, { "epoch": 0.0898170731707317, "grad_norm": 4.781049728393555, "learning_rate": 1.9401219512195123e-05, "loss": 0.22, "step": 7365 }, { "epoch": 0.0898780487804878, "grad_norm": 1.2643766403198242, "learning_rate": 1.9400813008130084e-05, "loss": 0.2024, "step": 7370 }, { "epoch": 0.0899390243902439, "grad_norm": 1.6113392114639282, "learning_rate": 1.9400406504065042e-05, "loss": 0.1987, "step": 7375 }, { "epoch": 0.09, "grad_norm": 0.8690415024757385, "learning_rate": 1.94e-05, "loss": 0.1718, "step": 7380 }, { "epoch": 0.0900609756097561, "grad_norm": 0.9921799898147583, "learning_rate": 1.9399593495934962e-05, "loss": 0.1927, "step": 7385 }, { "epoch": 0.09012195121951219, "grad_norm": 1.1174848079681396, "learning_rate": 1.939918699186992e-05, "loss": 0.185, "step": 7390 }, { "epoch": 0.09018292682926829, "grad_norm": 1.1429064273834229, "learning_rate": 1.9398780487804882e-05, "loss": 0.1317, "step": 7395 }, { "epoch": 0.09024390243902439, "grad_norm": 1.4152344465255737, "learning_rate": 1.939837398373984e-05, "loss": 0.2367, "step": 7400 }, { "epoch": 0.09030487804878048, "grad_norm": 1.4048209190368652, "learning_rate": 1.9397967479674798e-05, "loss": 0.167, "step": 7405 }, { "epoch": 0.09036585365853658, "grad_norm": 1.099534273147583, "learning_rate": 1.9397560975609756e-05, "loss": 0.1443, "step": 7410 }, { "epoch": 0.09042682926829268, "grad_norm": 0.7269728779792786, "learning_rate": 1.9397154471544718e-05, "loss": 0.1642, "step": 7415 }, { "epoch": 0.09048780487804878, "grad_norm": 1.1961894035339355, "learning_rate": 1.9396747967479676e-05, "loss": 0.1614, "step": 7420 }, { "epoch": 0.09054878048780488, "grad_norm": 1.117638349533081, "learning_rate": 1.9396341463414637e-05, "loss": 0.1681, "step": 7425 }, { "epoch": 0.09060975609756097, "grad_norm": 1.2035795450210571, "learning_rate": 1.9395934959349595e-05, "loss": 0.1795, "step": 7430 }, { "epoch": 0.09067073170731707, "grad_norm": 1.6589666604995728, "learning_rate": 1.9395528455284554e-05, "loss": 0.2124, "step": 7435 }, { "epoch": 0.09073170731707317, "grad_norm": 1.6115937232971191, "learning_rate": 1.939512195121951e-05, "loss": 0.1646, "step": 7440 }, { "epoch": 0.09079268292682927, "grad_norm": 1.9475899934768677, "learning_rate": 1.9394715447154473e-05, "loss": 0.1786, "step": 7445 }, { "epoch": 0.09085365853658536, "grad_norm": 1.1456960439682007, "learning_rate": 1.939430894308943e-05, "loss": 0.2079, "step": 7450 }, { "epoch": 0.09091463414634146, "grad_norm": 0.9927575588226318, "learning_rate": 1.9393902439024393e-05, "loss": 0.1792, "step": 7455 }, { "epoch": 0.09097560975609756, "grad_norm": 0.8208774924278259, "learning_rate": 1.939349593495935e-05, "loss": 0.1542, "step": 7460 }, { "epoch": 0.09103658536585366, "grad_norm": 0.8590109348297119, "learning_rate": 1.939308943089431e-05, "loss": 0.1572, "step": 7465 }, { "epoch": 0.09109756097560975, "grad_norm": 0.7952497005462646, "learning_rate": 1.939268292682927e-05, "loss": 0.1817, "step": 7470 }, { "epoch": 0.09115853658536585, "grad_norm": 1.2740529775619507, "learning_rate": 1.939227642276423e-05, "loss": 0.1297, "step": 7475 }, { "epoch": 0.09121951219512195, "grad_norm": 0.8292527794837952, "learning_rate": 1.939186991869919e-05, "loss": 0.169, "step": 7480 }, { "epoch": 0.09128048780487805, "grad_norm": 1.2060573101043701, "learning_rate": 1.939146341463415e-05, "loss": 0.2324, "step": 7485 }, { "epoch": 0.09134146341463414, "grad_norm": 1.6729810237884521, "learning_rate": 1.939105691056911e-05, "loss": 0.1844, "step": 7490 }, { "epoch": 0.09140243902439024, "grad_norm": 1.2710801362991333, "learning_rate": 1.9390650406504065e-05, "loss": 0.1565, "step": 7495 }, { "epoch": 0.09146341463414634, "grad_norm": 1.7834229469299316, "learning_rate": 1.9390243902439026e-05, "loss": 0.1935, "step": 7500 }, { "epoch": 0.09152439024390244, "grad_norm": 1.0688104629516602, "learning_rate": 1.9389837398373984e-05, "loss": 0.178, "step": 7505 }, { "epoch": 0.09158536585365853, "grad_norm": 1.008203387260437, "learning_rate": 1.9389430894308946e-05, "loss": 0.1783, "step": 7510 }, { "epoch": 0.09164634146341463, "grad_norm": 1.8291298151016235, "learning_rate": 1.9389024390243904e-05, "loss": 0.154, "step": 7515 }, { "epoch": 0.09170731707317073, "grad_norm": 1.1473432779312134, "learning_rate": 1.9388617886178865e-05, "loss": 0.1992, "step": 7520 }, { "epoch": 0.09176829268292683, "grad_norm": 2.6685070991516113, "learning_rate": 1.938821138211382e-05, "loss": 0.1699, "step": 7525 }, { "epoch": 0.09182926829268293, "grad_norm": 1.3696539402008057, "learning_rate": 1.9387804878048782e-05, "loss": 0.1767, "step": 7530 }, { "epoch": 0.09189024390243902, "grad_norm": 0.909941554069519, "learning_rate": 1.938739837398374e-05, "loss": 0.2481, "step": 7535 }, { "epoch": 0.09195121951219512, "grad_norm": 0.9072628021240234, "learning_rate": 1.93869918699187e-05, "loss": 0.1699, "step": 7540 }, { "epoch": 0.09201219512195122, "grad_norm": 1.2330530881881714, "learning_rate": 1.938658536585366e-05, "loss": 0.187, "step": 7545 }, { "epoch": 0.09207317073170732, "grad_norm": 0.8279716968536377, "learning_rate": 1.938617886178862e-05, "loss": 0.1527, "step": 7550 }, { "epoch": 0.09213414634146341, "grad_norm": 1.398069143295288, "learning_rate": 1.938577235772358e-05, "loss": 0.1586, "step": 7555 }, { "epoch": 0.09219512195121951, "grad_norm": 1.133436918258667, "learning_rate": 1.9385365853658537e-05, "loss": 0.1802, "step": 7560 }, { "epoch": 0.09225609756097561, "grad_norm": 2.826305389404297, "learning_rate": 1.93849593495935e-05, "loss": 0.1618, "step": 7565 }, { "epoch": 0.0923170731707317, "grad_norm": 1.1553645133972168, "learning_rate": 1.9384552845528457e-05, "loss": 0.181, "step": 7570 }, { "epoch": 0.0923780487804878, "grad_norm": 1.2653954029083252, "learning_rate": 1.938414634146342e-05, "loss": 0.1748, "step": 7575 }, { "epoch": 0.0924390243902439, "grad_norm": 1.1670464277267456, "learning_rate": 1.9383739837398377e-05, "loss": 0.1613, "step": 7580 }, { "epoch": 0.0925, "grad_norm": 0.7690752148628235, "learning_rate": 1.9383333333333335e-05, "loss": 0.1994, "step": 7585 }, { "epoch": 0.0925609756097561, "grad_norm": 1.0720735788345337, "learning_rate": 1.9382926829268293e-05, "loss": 0.1973, "step": 7590 }, { "epoch": 0.0926219512195122, "grad_norm": 3.0876595973968506, "learning_rate": 1.9382520325203254e-05, "loss": 0.2229, "step": 7595 }, { "epoch": 0.09268292682926829, "grad_norm": 1.0777519941329956, "learning_rate": 1.9382113821138212e-05, "loss": 0.1621, "step": 7600 }, { "epoch": 0.09274390243902439, "grad_norm": 1.3392125368118286, "learning_rate": 1.9381707317073174e-05, "loss": 0.1471, "step": 7605 }, { "epoch": 0.09280487804878049, "grad_norm": 1.0883965492248535, "learning_rate": 1.9381300813008132e-05, "loss": 0.1358, "step": 7610 }, { "epoch": 0.09286585365853658, "grad_norm": 0.8655027747154236, "learning_rate": 1.938089430894309e-05, "loss": 0.1863, "step": 7615 }, { "epoch": 0.09292682926829268, "grad_norm": 1.3479868173599243, "learning_rate": 1.938048780487805e-05, "loss": 0.1871, "step": 7620 }, { "epoch": 0.09298780487804878, "grad_norm": 0.7552160024642944, "learning_rate": 1.938008130081301e-05, "loss": 0.1816, "step": 7625 }, { "epoch": 0.09304878048780488, "grad_norm": 1.7081749439239502, "learning_rate": 1.9379674796747968e-05, "loss": 0.2197, "step": 7630 }, { "epoch": 0.09310975609756098, "grad_norm": 1.3048789501190186, "learning_rate": 1.937926829268293e-05, "loss": 0.1365, "step": 7635 }, { "epoch": 0.09317073170731707, "grad_norm": 0.9704335927963257, "learning_rate": 1.9378861788617888e-05, "loss": 0.1704, "step": 7640 }, { "epoch": 0.09323170731707317, "grad_norm": 1.556046724319458, "learning_rate": 1.9378455284552846e-05, "loss": 0.1913, "step": 7645 }, { "epoch": 0.09329268292682927, "grad_norm": 1.6491203308105469, "learning_rate": 1.9378048780487807e-05, "loss": 0.1895, "step": 7650 }, { "epoch": 0.09335365853658537, "grad_norm": 0.9348368644714355, "learning_rate": 1.9377642276422765e-05, "loss": 0.1978, "step": 7655 }, { "epoch": 0.09341463414634146, "grad_norm": 2.038728713989258, "learning_rate": 1.9377235772357727e-05, "loss": 0.196, "step": 7660 }, { "epoch": 0.09347560975609756, "grad_norm": 0.9822637438774109, "learning_rate": 1.9376829268292685e-05, "loss": 0.1568, "step": 7665 }, { "epoch": 0.09353658536585366, "grad_norm": 1.4937350749969482, "learning_rate": 1.9376422764227643e-05, "loss": 0.1807, "step": 7670 }, { "epoch": 0.09359756097560976, "grad_norm": 0.819168210029602, "learning_rate": 1.93760162601626e-05, "loss": 0.1737, "step": 7675 }, { "epoch": 0.09365853658536585, "grad_norm": 0.7338165044784546, "learning_rate": 1.9375609756097563e-05, "loss": 0.1579, "step": 7680 }, { "epoch": 0.09371951219512195, "grad_norm": 3.3454113006591797, "learning_rate": 1.937520325203252e-05, "loss": 0.2249, "step": 7685 }, { "epoch": 0.09378048780487805, "grad_norm": 3.146973133087158, "learning_rate": 1.9374796747967482e-05, "loss": 0.1964, "step": 7690 }, { "epoch": 0.09384146341463415, "grad_norm": 1.8214666843414307, "learning_rate": 1.937439024390244e-05, "loss": 0.2081, "step": 7695 }, { "epoch": 0.09390243902439024, "grad_norm": 0.7815614938735962, "learning_rate": 1.9373983739837402e-05, "loss": 0.2394, "step": 7700 }, { "epoch": 0.09396341463414634, "grad_norm": 2.2349092960357666, "learning_rate": 1.9373577235772357e-05, "loss": 0.2024, "step": 7705 }, { "epoch": 0.09402439024390244, "grad_norm": 1.0253353118896484, "learning_rate": 1.937317073170732e-05, "loss": 0.187, "step": 7710 }, { "epoch": 0.09408536585365854, "grad_norm": 1.11607825756073, "learning_rate": 1.9372764227642276e-05, "loss": 0.1987, "step": 7715 }, { "epoch": 0.09414634146341463, "grad_norm": 0.8791502714157104, "learning_rate": 1.9372357723577238e-05, "loss": 0.1632, "step": 7720 }, { "epoch": 0.09420731707317073, "grad_norm": 0.6433708667755127, "learning_rate": 1.9371951219512196e-05, "loss": 0.1695, "step": 7725 }, { "epoch": 0.09426829268292683, "grad_norm": 1.8449991941452026, "learning_rate": 1.9371544715447158e-05, "loss": 0.2136, "step": 7730 }, { "epoch": 0.09432926829268293, "grad_norm": 0.7992122769355774, "learning_rate": 1.9371138211382116e-05, "loss": 0.1828, "step": 7735 }, { "epoch": 0.09439024390243902, "grad_norm": 1.0927984714508057, "learning_rate": 1.9370731707317074e-05, "loss": 0.2029, "step": 7740 }, { "epoch": 0.09445121951219512, "grad_norm": 0.7966647744178772, "learning_rate": 1.9370325203252035e-05, "loss": 0.1561, "step": 7745 }, { "epoch": 0.09451219512195122, "grad_norm": 2.161310911178589, "learning_rate": 1.9369918699186994e-05, "loss": 0.2387, "step": 7750 }, { "epoch": 0.09457317073170732, "grad_norm": 1.6681050062179565, "learning_rate": 1.936951219512195e-05, "loss": 0.1669, "step": 7755 }, { "epoch": 0.09463414634146342, "grad_norm": 0.6949941515922546, "learning_rate": 1.9369105691056913e-05, "loss": 0.1424, "step": 7760 }, { "epoch": 0.09469512195121951, "grad_norm": 0.779653787612915, "learning_rate": 1.936869918699187e-05, "loss": 0.1361, "step": 7765 }, { "epoch": 0.09475609756097561, "grad_norm": 1.143377423286438, "learning_rate": 1.936829268292683e-05, "loss": 0.171, "step": 7770 }, { "epoch": 0.09481707317073171, "grad_norm": 1.080962061882019, "learning_rate": 1.936788617886179e-05, "loss": 0.194, "step": 7775 }, { "epoch": 0.0948780487804878, "grad_norm": 0.947403073310852, "learning_rate": 1.936747967479675e-05, "loss": 0.2018, "step": 7780 }, { "epoch": 0.0949390243902439, "grad_norm": 1.0921902656555176, "learning_rate": 1.936707317073171e-05, "loss": 0.1568, "step": 7785 }, { "epoch": 0.095, "grad_norm": 1.1746323108673096, "learning_rate": 1.936666666666667e-05, "loss": 0.1614, "step": 7790 }, { "epoch": 0.0950609756097561, "grad_norm": 1.1926769018173218, "learning_rate": 1.9366260162601627e-05, "loss": 0.1838, "step": 7795 }, { "epoch": 0.0951219512195122, "grad_norm": 1.3482532501220703, "learning_rate": 1.9365853658536585e-05, "loss": 0.2039, "step": 7800 }, { "epoch": 0.0951829268292683, "grad_norm": 0.9616997241973877, "learning_rate": 1.9365447154471546e-05, "loss": 0.1878, "step": 7805 }, { "epoch": 0.09524390243902439, "grad_norm": 1.1437933444976807, "learning_rate": 1.9365040650406505e-05, "loss": 0.1549, "step": 7810 }, { "epoch": 0.09530487804878049, "grad_norm": 1.3482829332351685, "learning_rate": 1.9364634146341466e-05, "loss": 0.1649, "step": 7815 }, { "epoch": 0.09536585365853659, "grad_norm": 2.2040657997131348, "learning_rate": 1.9364227642276424e-05, "loss": 0.1762, "step": 7820 }, { "epoch": 0.09542682926829268, "grad_norm": 1.5702751874923706, "learning_rate": 1.9363821138211382e-05, "loss": 0.1692, "step": 7825 }, { "epoch": 0.09548780487804878, "grad_norm": 1.0488638877868652, "learning_rate": 1.9363414634146344e-05, "loss": 0.2015, "step": 7830 }, { "epoch": 0.09554878048780488, "grad_norm": 0.9205051064491272, "learning_rate": 1.9363008130081302e-05, "loss": 0.219, "step": 7835 }, { "epoch": 0.09560975609756098, "grad_norm": 0.9876087307929993, "learning_rate": 1.9362601626016264e-05, "loss": 0.2081, "step": 7840 }, { "epoch": 0.09567073170731707, "grad_norm": 0.8471043705940247, "learning_rate": 1.936219512195122e-05, "loss": 0.2064, "step": 7845 }, { "epoch": 0.09573170731707317, "grad_norm": 1.5861706733703613, "learning_rate": 1.936178861788618e-05, "loss": 0.1476, "step": 7850 }, { "epoch": 0.09579268292682927, "grad_norm": 0.7990050315856934, "learning_rate": 1.9361382113821138e-05, "loss": 0.1484, "step": 7855 }, { "epoch": 0.09585365853658537, "grad_norm": 1.1912564039230347, "learning_rate": 1.93609756097561e-05, "loss": 0.1521, "step": 7860 }, { "epoch": 0.09591463414634147, "grad_norm": 1.2760379314422607, "learning_rate": 1.9360569105691058e-05, "loss": 0.2236, "step": 7865 }, { "epoch": 0.09597560975609756, "grad_norm": 1.0990720987319946, "learning_rate": 1.936016260162602e-05, "loss": 0.1545, "step": 7870 }, { "epoch": 0.09603658536585366, "grad_norm": 0.7423365116119385, "learning_rate": 1.9359756097560977e-05, "loss": 0.1601, "step": 7875 }, { "epoch": 0.09609756097560976, "grad_norm": 1.2632232904434204, "learning_rate": 1.935934959349594e-05, "loss": 0.2234, "step": 7880 }, { "epoch": 0.09615853658536586, "grad_norm": 1.5189825296401978, "learning_rate": 1.9358943089430893e-05, "loss": 0.1952, "step": 7885 }, { "epoch": 0.09621951219512195, "grad_norm": 4.8197855949401855, "learning_rate": 1.9358536585365855e-05, "loss": 0.2416, "step": 7890 }, { "epoch": 0.09628048780487805, "grad_norm": 1.2831387519836426, "learning_rate": 1.9358130081300813e-05, "loss": 0.1786, "step": 7895 }, { "epoch": 0.09634146341463415, "grad_norm": 1.6330244541168213, "learning_rate": 1.9357723577235775e-05, "loss": 0.181, "step": 7900 }, { "epoch": 0.09640243902439025, "grad_norm": 1.0629953145980835, "learning_rate": 1.9357317073170733e-05, "loss": 0.1308, "step": 7905 }, { "epoch": 0.09646341463414634, "grad_norm": 1.2038429975509644, "learning_rate": 1.9356910569105694e-05, "loss": 0.1714, "step": 7910 }, { "epoch": 0.09652439024390244, "grad_norm": 0.777143120765686, "learning_rate": 1.9356504065040652e-05, "loss": 0.1361, "step": 7915 }, { "epoch": 0.09658536585365854, "grad_norm": 1.5277256965637207, "learning_rate": 1.935609756097561e-05, "loss": 0.1466, "step": 7920 }, { "epoch": 0.09664634146341464, "grad_norm": 1.4167982339859009, "learning_rate": 1.9355691056910572e-05, "loss": 0.2106, "step": 7925 }, { "epoch": 0.09670731707317073, "grad_norm": 1.647297739982605, "learning_rate": 1.935528455284553e-05, "loss": 0.1567, "step": 7930 }, { "epoch": 0.09676829268292683, "grad_norm": 1.2632981538772583, "learning_rate": 1.9354878048780488e-05, "loss": 0.1819, "step": 7935 }, { "epoch": 0.09682926829268293, "grad_norm": 1.0906479358673096, "learning_rate": 1.935447154471545e-05, "loss": 0.2042, "step": 7940 }, { "epoch": 0.09689024390243903, "grad_norm": 1.3444242477416992, "learning_rate": 1.9354065040650408e-05, "loss": 0.1732, "step": 7945 }, { "epoch": 0.09695121951219512, "grad_norm": 0.4832802414894104, "learning_rate": 1.9353658536585366e-05, "loss": 0.1619, "step": 7950 }, { "epoch": 0.09701219512195122, "grad_norm": 1.0565160512924194, "learning_rate": 1.9353252032520328e-05, "loss": 0.1564, "step": 7955 }, { "epoch": 0.09707317073170732, "grad_norm": 1.6092267036437988, "learning_rate": 1.9352845528455286e-05, "loss": 0.197, "step": 7960 }, { "epoch": 0.09713414634146342, "grad_norm": 1.291834831237793, "learning_rate": 1.9352439024390247e-05, "loss": 0.1484, "step": 7965 }, { "epoch": 0.09719512195121952, "grad_norm": 0.9265182018280029, "learning_rate": 1.9352032520325205e-05, "loss": 0.1603, "step": 7970 }, { "epoch": 0.09725609756097561, "grad_norm": 0.8662686944007874, "learning_rate": 1.9351626016260163e-05, "loss": 0.1759, "step": 7975 }, { "epoch": 0.09731707317073171, "grad_norm": 1.2893961668014526, "learning_rate": 1.935121951219512e-05, "loss": 0.2101, "step": 7980 }, { "epoch": 0.09737804878048781, "grad_norm": 0.8447879552841187, "learning_rate": 1.9350813008130083e-05, "loss": 0.191, "step": 7985 }, { "epoch": 0.0974390243902439, "grad_norm": 2.7169289588928223, "learning_rate": 1.935040650406504e-05, "loss": 0.2088, "step": 7990 }, { "epoch": 0.0975, "grad_norm": 1.1937347650527954, "learning_rate": 1.9350000000000003e-05, "loss": 0.2334, "step": 7995 }, { "epoch": 0.0975609756097561, "grad_norm": 1.1828062534332275, "learning_rate": 1.934959349593496e-05, "loss": 0.1812, "step": 8000 }, { "epoch": 0.0976219512195122, "grad_norm": 2.058128833770752, "learning_rate": 1.934918699186992e-05, "loss": 0.2496, "step": 8005 }, { "epoch": 0.0976829268292683, "grad_norm": 1.5229181051254272, "learning_rate": 1.934878048780488e-05, "loss": 0.1781, "step": 8010 }, { "epoch": 0.0977439024390244, "grad_norm": 0.6514689922332764, "learning_rate": 1.934837398373984e-05, "loss": 0.1549, "step": 8015 }, { "epoch": 0.09780487804878049, "grad_norm": 1.4715770483016968, "learning_rate": 1.9347967479674797e-05, "loss": 0.2341, "step": 8020 }, { "epoch": 0.09786585365853659, "grad_norm": 1.4722532033920288, "learning_rate": 1.934756097560976e-05, "loss": 0.1354, "step": 8025 }, { "epoch": 0.09792682926829269, "grad_norm": 0.8409098386764526, "learning_rate": 1.9347154471544716e-05, "loss": 0.1703, "step": 8030 }, { "epoch": 0.09798780487804878, "grad_norm": 2.6452956199645996, "learning_rate": 1.9346747967479675e-05, "loss": 0.2212, "step": 8035 }, { "epoch": 0.09804878048780488, "grad_norm": 1.0585124492645264, "learning_rate": 1.9346341463414636e-05, "loss": 0.2048, "step": 8040 }, { "epoch": 0.09810975609756098, "grad_norm": 0.6811586022377014, "learning_rate": 1.9345934959349594e-05, "loss": 0.1418, "step": 8045 }, { "epoch": 0.09817073170731708, "grad_norm": 1.6912319660186768, "learning_rate": 1.9345528455284556e-05, "loss": 0.1608, "step": 8050 }, { "epoch": 0.09823170731707317, "grad_norm": 0.9896226525306702, "learning_rate": 1.9345121951219514e-05, "loss": 0.1713, "step": 8055 }, { "epoch": 0.09829268292682927, "grad_norm": 1.4151400327682495, "learning_rate": 1.9344715447154475e-05, "loss": 0.1779, "step": 8060 }, { "epoch": 0.09835365853658537, "grad_norm": 0.9919140934944153, "learning_rate": 1.934430894308943e-05, "loss": 0.1815, "step": 8065 }, { "epoch": 0.09841463414634147, "grad_norm": 0.9729352593421936, "learning_rate": 1.934390243902439e-05, "loss": 0.225, "step": 8070 }, { "epoch": 0.09847560975609757, "grad_norm": 1.1723076105117798, "learning_rate": 1.934349593495935e-05, "loss": 0.2126, "step": 8075 }, { "epoch": 0.09853658536585366, "grad_norm": 1.626451015472412, "learning_rate": 1.934308943089431e-05, "loss": 0.201, "step": 8080 }, { "epoch": 0.09859756097560976, "grad_norm": 0.8878433704376221, "learning_rate": 1.934268292682927e-05, "loss": 0.1704, "step": 8085 }, { "epoch": 0.09865853658536586, "grad_norm": 2.044341564178467, "learning_rate": 1.934227642276423e-05, "loss": 0.1584, "step": 8090 }, { "epoch": 0.09871951219512196, "grad_norm": 1.131752848625183, "learning_rate": 1.934186991869919e-05, "loss": 0.1493, "step": 8095 }, { "epoch": 0.09878048780487805, "grad_norm": 1.697926640510559, "learning_rate": 1.9341463414634147e-05, "loss": 0.199, "step": 8100 }, { "epoch": 0.09884146341463415, "grad_norm": 1.24455726146698, "learning_rate": 1.934105691056911e-05, "loss": 0.1767, "step": 8105 }, { "epoch": 0.09890243902439025, "grad_norm": 0.7118273973464966, "learning_rate": 1.9340650406504067e-05, "loss": 0.1211, "step": 8110 }, { "epoch": 0.09896341463414635, "grad_norm": 0.7993300557136536, "learning_rate": 1.9340243902439025e-05, "loss": 0.1849, "step": 8115 }, { "epoch": 0.09902439024390244, "grad_norm": 1.1427043676376343, "learning_rate": 1.9339837398373986e-05, "loss": 0.1567, "step": 8120 }, { "epoch": 0.09908536585365854, "grad_norm": 1.1484348773956299, "learning_rate": 1.9339430894308945e-05, "loss": 0.1944, "step": 8125 }, { "epoch": 0.09914634146341464, "grad_norm": 1.270400881767273, "learning_rate": 1.9339024390243903e-05, "loss": 0.1768, "step": 8130 }, { "epoch": 0.09920731707317074, "grad_norm": 1.182568073272705, "learning_rate": 1.9338617886178864e-05, "loss": 0.1799, "step": 8135 }, { "epoch": 0.09926829268292683, "grad_norm": 0.7148823738098145, "learning_rate": 1.9338211382113822e-05, "loss": 0.1528, "step": 8140 }, { "epoch": 0.09932926829268293, "grad_norm": 2.3509740829467773, "learning_rate": 1.9337804878048784e-05, "loss": 0.155, "step": 8145 }, { "epoch": 0.09939024390243903, "grad_norm": 3.1165103912353516, "learning_rate": 1.9337398373983742e-05, "loss": 0.2323, "step": 8150 }, { "epoch": 0.09945121951219513, "grad_norm": 0.7981647253036499, "learning_rate": 1.93369918699187e-05, "loss": 0.2051, "step": 8155 }, { "epoch": 0.09951219512195122, "grad_norm": 1.2504748106002808, "learning_rate": 1.9336585365853658e-05, "loss": 0.1837, "step": 8160 }, { "epoch": 0.09957317073170732, "grad_norm": 0.5667440891265869, "learning_rate": 1.933617886178862e-05, "loss": 0.1457, "step": 8165 }, { "epoch": 0.09963414634146342, "grad_norm": 0.9233879446983337, "learning_rate": 1.9335772357723578e-05, "loss": 0.1525, "step": 8170 }, { "epoch": 0.09969512195121952, "grad_norm": 2.9932708740234375, "learning_rate": 1.933536585365854e-05, "loss": 0.2111, "step": 8175 }, { "epoch": 0.09975609756097562, "grad_norm": 0.7017787098884583, "learning_rate": 1.9334959349593498e-05, "loss": 0.1802, "step": 8180 }, { "epoch": 0.09981707317073171, "grad_norm": 1.170357346534729, "learning_rate": 1.9334552845528456e-05, "loss": 0.2199, "step": 8185 }, { "epoch": 0.09987804878048781, "grad_norm": 1.4383565187454224, "learning_rate": 1.9334146341463417e-05, "loss": 0.1922, "step": 8190 }, { "epoch": 0.09993902439024391, "grad_norm": 1.3384788036346436, "learning_rate": 1.9333739837398375e-05, "loss": 0.184, "step": 8195 }, { "epoch": 0.1, "grad_norm": 1.4509910345077515, "learning_rate": 1.9333333333333333e-05, "loss": 0.189, "step": 8200 }, { "epoch": 0.1000609756097561, "grad_norm": 2.3487019538879395, "learning_rate": 1.9332926829268295e-05, "loss": 0.1506, "step": 8205 }, { "epoch": 0.1001219512195122, "grad_norm": 1.857631802558899, "learning_rate": 1.9332520325203253e-05, "loss": 0.1285, "step": 8210 }, { "epoch": 0.1001829268292683, "grad_norm": 2.2482199668884277, "learning_rate": 1.933211382113821e-05, "loss": 0.1476, "step": 8215 }, { "epoch": 0.1002439024390244, "grad_norm": 0.6643203496932983, "learning_rate": 1.9331707317073173e-05, "loss": 0.1524, "step": 8220 }, { "epoch": 0.1003048780487805, "grad_norm": 0.9416831731796265, "learning_rate": 1.933130081300813e-05, "loss": 0.2404, "step": 8225 }, { "epoch": 0.10036585365853659, "grad_norm": 0.8501514196395874, "learning_rate": 1.9330894308943092e-05, "loss": 0.1668, "step": 8230 }, { "epoch": 0.10042682926829269, "grad_norm": 1.2312169075012207, "learning_rate": 1.933048780487805e-05, "loss": 0.1774, "step": 8235 }, { "epoch": 0.10048780487804879, "grad_norm": 0.7320352792739868, "learning_rate": 1.9330081300813012e-05, "loss": 0.1668, "step": 8240 }, { "epoch": 0.10054878048780488, "grad_norm": 1.2827873229980469, "learning_rate": 1.9329674796747967e-05, "loss": 0.2279, "step": 8245 }, { "epoch": 0.10060975609756098, "grad_norm": 1.0146541595458984, "learning_rate": 1.9329268292682928e-05, "loss": 0.1818, "step": 8250 }, { "epoch": 0.10067073170731708, "grad_norm": 1.3306480646133423, "learning_rate": 1.9328861788617886e-05, "loss": 0.1802, "step": 8255 }, { "epoch": 0.10073170731707318, "grad_norm": 1.8337265253067017, "learning_rate": 1.9328455284552848e-05, "loss": 0.1886, "step": 8260 }, { "epoch": 0.10079268292682927, "grad_norm": 0.8092582821846008, "learning_rate": 1.9328048780487806e-05, "loss": 0.1345, "step": 8265 }, { "epoch": 0.10085365853658537, "grad_norm": 0.741933286190033, "learning_rate": 1.9327642276422768e-05, "loss": 0.225, "step": 8270 }, { "epoch": 0.10091463414634147, "grad_norm": 0.9321843385696411, "learning_rate": 1.9327235772357726e-05, "loss": 0.178, "step": 8275 }, { "epoch": 0.10097560975609757, "grad_norm": 1.5674959421157837, "learning_rate": 1.9326829268292684e-05, "loss": 0.2092, "step": 8280 }, { "epoch": 0.10103658536585367, "grad_norm": 1.5933923721313477, "learning_rate": 1.9326422764227642e-05, "loss": 0.177, "step": 8285 }, { "epoch": 0.10109756097560976, "grad_norm": 1.353883147239685, "learning_rate": 1.9326016260162603e-05, "loss": 0.1519, "step": 8290 }, { "epoch": 0.10115853658536586, "grad_norm": 0.837043821811676, "learning_rate": 1.932560975609756e-05, "loss": 0.1961, "step": 8295 }, { "epoch": 0.10121951219512196, "grad_norm": 1.2982869148254395, "learning_rate": 1.9325203252032523e-05, "loss": 0.2109, "step": 8300 }, { "epoch": 0.10128048780487806, "grad_norm": 1.0532466173171997, "learning_rate": 1.932479674796748e-05, "loss": 0.167, "step": 8305 }, { "epoch": 0.10134146341463415, "grad_norm": 0.8449506163597107, "learning_rate": 1.932439024390244e-05, "loss": 0.1564, "step": 8310 }, { "epoch": 0.10140243902439025, "grad_norm": 1.1270772218704224, "learning_rate": 1.93239837398374e-05, "loss": 0.1464, "step": 8315 }, { "epoch": 0.10146341463414635, "grad_norm": 2.1787445545196533, "learning_rate": 1.932357723577236e-05, "loss": 0.2044, "step": 8320 }, { "epoch": 0.10152439024390245, "grad_norm": 1.2869668006896973, "learning_rate": 1.932317073170732e-05, "loss": 0.1429, "step": 8325 }, { "epoch": 0.10158536585365853, "grad_norm": 1.6896917819976807, "learning_rate": 1.932276422764228e-05, "loss": 0.2034, "step": 8330 }, { "epoch": 0.10164634146341463, "grad_norm": 1.5395220518112183, "learning_rate": 1.9322357723577237e-05, "loss": 0.1872, "step": 8335 }, { "epoch": 0.10170731707317072, "grad_norm": 1.3350294828414917, "learning_rate": 1.9321951219512195e-05, "loss": 0.1866, "step": 8340 }, { "epoch": 0.10176829268292682, "grad_norm": 0.9442556500434875, "learning_rate": 1.9321544715447156e-05, "loss": 0.2371, "step": 8345 }, { "epoch": 0.10182926829268292, "grad_norm": 1.4205634593963623, "learning_rate": 1.9321138211382115e-05, "loss": 0.1759, "step": 8350 }, { "epoch": 0.10189024390243902, "grad_norm": 1.5709123611450195, "learning_rate": 1.9320731707317076e-05, "loss": 0.1946, "step": 8355 }, { "epoch": 0.10195121951219512, "grad_norm": 3.1604273319244385, "learning_rate": 1.9320325203252034e-05, "loss": 0.2037, "step": 8360 }, { "epoch": 0.10201219512195121, "grad_norm": 1.0720056295394897, "learning_rate": 1.9319918699186992e-05, "loss": 0.1521, "step": 8365 }, { "epoch": 0.10207317073170731, "grad_norm": 1.0404884815216064, "learning_rate": 1.9319512195121954e-05, "loss": 0.176, "step": 8370 }, { "epoch": 0.10213414634146341, "grad_norm": 1.170656681060791, "learning_rate": 1.9319105691056912e-05, "loss": 0.1888, "step": 8375 }, { "epoch": 0.1021951219512195, "grad_norm": 2.172485589981079, "learning_rate": 1.931869918699187e-05, "loss": 0.1448, "step": 8380 }, { "epoch": 0.1022560975609756, "grad_norm": 1.3070272207260132, "learning_rate": 1.931829268292683e-05, "loss": 0.1812, "step": 8385 }, { "epoch": 0.1023170731707317, "grad_norm": 4.774148464202881, "learning_rate": 1.931788617886179e-05, "loss": 0.2061, "step": 8390 }, { "epoch": 0.1023780487804878, "grad_norm": 0.7937531471252441, "learning_rate": 1.9317479674796748e-05, "loss": 0.1695, "step": 8395 }, { "epoch": 0.1024390243902439, "grad_norm": 0.8101431131362915, "learning_rate": 1.931707317073171e-05, "loss": 0.2079, "step": 8400 }, { "epoch": 0.1025, "grad_norm": 2.354929208755493, "learning_rate": 1.9316666666666668e-05, "loss": 0.1565, "step": 8405 }, { "epoch": 0.10256097560975609, "grad_norm": 1.1941392421722412, "learning_rate": 1.931626016260163e-05, "loss": 0.1476, "step": 8410 }, { "epoch": 0.10262195121951219, "grad_norm": 2.410217761993408, "learning_rate": 1.9315853658536587e-05, "loss": 0.1798, "step": 8415 }, { "epoch": 0.10268292682926829, "grad_norm": 1.794649362564087, "learning_rate": 1.931544715447155e-05, "loss": 0.1981, "step": 8420 }, { "epoch": 0.10274390243902438, "grad_norm": 1.2763200998306274, "learning_rate": 1.9315040650406503e-05, "loss": 0.1472, "step": 8425 }, { "epoch": 0.10280487804878048, "grad_norm": 0.8896083831787109, "learning_rate": 1.9314634146341465e-05, "loss": 0.2188, "step": 8430 }, { "epoch": 0.10286585365853658, "grad_norm": 1.0892860889434814, "learning_rate": 1.9314227642276423e-05, "loss": 0.1948, "step": 8435 }, { "epoch": 0.10292682926829268, "grad_norm": 0.9622324705123901, "learning_rate": 1.9313821138211385e-05, "loss": 0.2129, "step": 8440 }, { "epoch": 0.10298780487804877, "grad_norm": 1.206885814666748, "learning_rate": 1.9313414634146343e-05, "loss": 0.2186, "step": 8445 }, { "epoch": 0.10304878048780487, "grad_norm": 1.8125154972076416, "learning_rate": 1.9313008130081304e-05, "loss": 0.1583, "step": 8450 }, { "epoch": 0.10310975609756097, "grad_norm": 0.9027661085128784, "learning_rate": 1.9312601626016262e-05, "loss": 0.225, "step": 8455 }, { "epoch": 0.10317073170731707, "grad_norm": 0.8383747935295105, "learning_rate": 1.931219512195122e-05, "loss": 0.146, "step": 8460 }, { "epoch": 0.10323170731707317, "grad_norm": 1.1599055528640747, "learning_rate": 1.931178861788618e-05, "loss": 0.181, "step": 8465 }, { "epoch": 0.10329268292682926, "grad_norm": 2.292954683303833, "learning_rate": 1.931138211382114e-05, "loss": 0.194, "step": 8470 }, { "epoch": 0.10335365853658536, "grad_norm": 1.3652169704437256, "learning_rate": 1.9310975609756098e-05, "loss": 0.1666, "step": 8475 }, { "epoch": 0.10341463414634146, "grad_norm": 1.5795091390609741, "learning_rate": 1.931056910569106e-05, "loss": 0.1839, "step": 8480 }, { "epoch": 0.10347560975609756, "grad_norm": 1.1591322422027588, "learning_rate": 1.9310162601626018e-05, "loss": 0.1633, "step": 8485 }, { "epoch": 0.10353658536585365, "grad_norm": 1.0262590646743774, "learning_rate": 1.9309756097560976e-05, "loss": 0.175, "step": 8490 }, { "epoch": 0.10359756097560975, "grad_norm": 1.1059916019439697, "learning_rate": 1.9309349593495938e-05, "loss": 0.1932, "step": 8495 }, { "epoch": 0.10365853658536585, "grad_norm": 0.5884175896644592, "learning_rate": 1.9308943089430896e-05, "loss": 0.156, "step": 8500 }, { "epoch": 0.10371951219512195, "grad_norm": 1.2772022485733032, "learning_rate": 1.9308536585365857e-05, "loss": 0.1909, "step": 8505 }, { "epoch": 0.10378048780487804, "grad_norm": 1.3502764701843262, "learning_rate": 1.9308130081300815e-05, "loss": 0.1708, "step": 8510 }, { "epoch": 0.10384146341463414, "grad_norm": 0.822049081325531, "learning_rate": 1.9307723577235773e-05, "loss": 0.2216, "step": 8515 }, { "epoch": 0.10390243902439024, "grad_norm": 1.2160849571228027, "learning_rate": 1.930731707317073e-05, "loss": 0.2129, "step": 8520 }, { "epoch": 0.10396341463414634, "grad_norm": 1.160956859588623, "learning_rate": 1.9306910569105693e-05, "loss": 0.178, "step": 8525 }, { "epoch": 0.10402439024390243, "grad_norm": 1.1767652034759521, "learning_rate": 1.930650406504065e-05, "loss": 0.1817, "step": 8530 }, { "epoch": 0.10408536585365853, "grad_norm": 4.180246829986572, "learning_rate": 1.9306097560975613e-05, "loss": 0.1645, "step": 8535 }, { "epoch": 0.10414634146341463, "grad_norm": 0.8196483850479126, "learning_rate": 1.930569105691057e-05, "loss": 0.1895, "step": 8540 }, { "epoch": 0.10420731707317073, "grad_norm": 0.8953511714935303, "learning_rate": 1.930528455284553e-05, "loss": 0.1625, "step": 8545 }, { "epoch": 0.10426829268292682, "grad_norm": 0.8600825071334839, "learning_rate": 1.9304878048780487e-05, "loss": 0.1651, "step": 8550 }, { "epoch": 0.10432926829268292, "grad_norm": 1.4229061603546143, "learning_rate": 1.930447154471545e-05, "loss": 0.1498, "step": 8555 }, { "epoch": 0.10439024390243902, "grad_norm": 0.7418032884597778, "learning_rate": 1.9304065040650407e-05, "loss": 0.1667, "step": 8560 }, { "epoch": 0.10445121951219512, "grad_norm": 1.383743166923523, "learning_rate": 1.9303658536585368e-05, "loss": 0.184, "step": 8565 }, { "epoch": 0.10451219512195122, "grad_norm": 0.876448392868042, "learning_rate": 1.9303252032520326e-05, "loss": 0.1468, "step": 8570 }, { "epoch": 0.10457317073170731, "grad_norm": 1.0641039609909058, "learning_rate": 1.9302845528455285e-05, "loss": 0.1663, "step": 8575 }, { "epoch": 0.10463414634146341, "grad_norm": 0.9769371151924133, "learning_rate": 1.9302439024390246e-05, "loss": 0.1893, "step": 8580 }, { "epoch": 0.10469512195121951, "grad_norm": 1.6534487009048462, "learning_rate": 1.9302032520325204e-05, "loss": 0.1749, "step": 8585 }, { "epoch": 0.1047560975609756, "grad_norm": 1.1830685138702393, "learning_rate": 1.9301626016260166e-05, "loss": 0.1508, "step": 8590 }, { "epoch": 0.1048170731707317, "grad_norm": 1.7791130542755127, "learning_rate": 1.9301219512195124e-05, "loss": 0.1755, "step": 8595 }, { "epoch": 0.1048780487804878, "grad_norm": 0.7324893474578857, "learning_rate": 1.9300813008130085e-05, "loss": 0.1516, "step": 8600 }, { "epoch": 0.1049390243902439, "grad_norm": 0.736961841583252, "learning_rate": 1.930040650406504e-05, "loss": 0.1787, "step": 8605 }, { "epoch": 0.105, "grad_norm": 1.6655206680297852, "learning_rate": 1.93e-05, "loss": 0.2071, "step": 8610 }, { "epoch": 0.1050609756097561, "grad_norm": 1.6183959245681763, "learning_rate": 1.929959349593496e-05, "loss": 0.1853, "step": 8615 }, { "epoch": 0.10512195121951219, "grad_norm": 0.7356444597244263, "learning_rate": 1.929918699186992e-05, "loss": 0.1247, "step": 8620 }, { "epoch": 0.10518292682926829, "grad_norm": 1.067097544670105, "learning_rate": 1.929878048780488e-05, "loss": 0.1386, "step": 8625 }, { "epoch": 0.10524390243902439, "grad_norm": 1.3596211671829224, "learning_rate": 1.929837398373984e-05, "loss": 0.1398, "step": 8630 }, { "epoch": 0.10530487804878048, "grad_norm": 1.360726237297058, "learning_rate": 1.92979674796748e-05, "loss": 0.1992, "step": 8635 }, { "epoch": 0.10536585365853658, "grad_norm": 0.8569005131721497, "learning_rate": 1.9297560975609757e-05, "loss": 0.2115, "step": 8640 }, { "epoch": 0.10542682926829268, "grad_norm": 1.398969054222107, "learning_rate": 1.9297154471544715e-05, "loss": 0.2029, "step": 8645 }, { "epoch": 0.10548780487804878, "grad_norm": 1.3199543952941895, "learning_rate": 1.9296747967479677e-05, "loss": 0.2403, "step": 8650 }, { "epoch": 0.10554878048780487, "grad_norm": 0.783778190612793, "learning_rate": 1.9296341463414635e-05, "loss": 0.1412, "step": 8655 }, { "epoch": 0.10560975609756097, "grad_norm": 2.2424519062042236, "learning_rate": 1.9295934959349596e-05, "loss": 0.19, "step": 8660 }, { "epoch": 0.10567073170731707, "grad_norm": 1.1610044240951538, "learning_rate": 1.9295528455284555e-05, "loss": 0.1447, "step": 8665 }, { "epoch": 0.10573170731707317, "grad_norm": 0.9844884276390076, "learning_rate": 1.9295121951219513e-05, "loss": 0.165, "step": 8670 }, { "epoch": 0.10579268292682927, "grad_norm": 1.3523014783859253, "learning_rate": 1.9294715447154474e-05, "loss": 0.2147, "step": 8675 }, { "epoch": 0.10585365853658536, "grad_norm": 1.5648459196090698, "learning_rate": 1.9294308943089432e-05, "loss": 0.214, "step": 8680 }, { "epoch": 0.10591463414634146, "grad_norm": 0.6959861516952515, "learning_rate": 1.9293902439024394e-05, "loss": 0.1175, "step": 8685 }, { "epoch": 0.10597560975609756, "grad_norm": 0.9553279876708984, "learning_rate": 1.9293495934959352e-05, "loss": 0.1678, "step": 8690 }, { "epoch": 0.10603658536585366, "grad_norm": 0.7812435626983643, "learning_rate": 1.929308943089431e-05, "loss": 0.1597, "step": 8695 }, { "epoch": 0.10609756097560975, "grad_norm": 0.6207860708236694, "learning_rate": 1.9292682926829268e-05, "loss": 0.1244, "step": 8700 }, { "epoch": 0.10615853658536585, "grad_norm": 0.9251199960708618, "learning_rate": 1.929227642276423e-05, "loss": 0.164, "step": 8705 }, { "epoch": 0.10621951219512195, "grad_norm": 0.8331239223480225, "learning_rate": 1.9291869918699188e-05, "loss": 0.1625, "step": 8710 }, { "epoch": 0.10628048780487805, "grad_norm": 1.0328744649887085, "learning_rate": 1.929146341463415e-05, "loss": 0.2212, "step": 8715 }, { "epoch": 0.10634146341463414, "grad_norm": 1.5330979824066162, "learning_rate": 1.9291056910569107e-05, "loss": 0.2163, "step": 8720 }, { "epoch": 0.10640243902439024, "grad_norm": 0.6258312463760376, "learning_rate": 1.9290650406504066e-05, "loss": 0.1546, "step": 8725 }, { "epoch": 0.10646341463414634, "grad_norm": 0.7747581601142883, "learning_rate": 1.9290243902439024e-05, "loss": 0.1243, "step": 8730 }, { "epoch": 0.10652439024390244, "grad_norm": 1.085817575454712, "learning_rate": 1.9289837398373985e-05, "loss": 0.201, "step": 8735 }, { "epoch": 0.10658536585365853, "grad_norm": 0.94925856590271, "learning_rate": 1.9289430894308943e-05, "loss": 0.2061, "step": 8740 }, { "epoch": 0.10664634146341463, "grad_norm": 0.8641387820243835, "learning_rate": 1.9289024390243905e-05, "loss": 0.1853, "step": 8745 }, { "epoch": 0.10670731707317073, "grad_norm": 1.145566463470459, "learning_rate": 1.9288617886178863e-05, "loss": 0.1475, "step": 8750 }, { "epoch": 0.10676829268292683, "grad_norm": 0.8672536611557007, "learning_rate": 1.928821138211382e-05, "loss": 0.154, "step": 8755 }, { "epoch": 0.10682926829268292, "grad_norm": 1.0389838218688965, "learning_rate": 1.9287804878048783e-05, "loss": 0.1705, "step": 8760 }, { "epoch": 0.10689024390243902, "grad_norm": 1.0296430587768555, "learning_rate": 1.928739837398374e-05, "loss": 0.192, "step": 8765 }, { "epoch": 0.10695121951219512, "grad_norm": 0.7259446978569031, "learning_rate": 1.9286991869918702e-05, "loss": 0.1593, "step": 8770 }, { "epoch": 0.10701219512195122, "grad_norm": 0.6693785786628723, "learning_rate": 1.928658536585366e-05, "loss": 0.1676, "step": 8775 }, { "epoch": 0.10707317073170732, "grad_norm": 1.423147439956665, "learning_rate": 1.9286178861788622e-05, "loss": 0.1467, "step": 8780 }, { "epoch": 0.10713414634146341, "grad_norm": 1.3678537607192993, "learning_rate": 1.9285772357723577e-05, "loss": 0.167, "step": 8785 }, { "epoch": 0.10719512195121951, "grad_norm": 1.3549765348434448, "learning_rate": 1.9285365853658538e-05, "loss": 0.1653, "step": 8790 }, { "epoch": 0.10725609756097561, "grad_norm": 1.0255529880523682, "learning_rate": 1.9284959349593496e-05, "loss": 0.1572, "step": 8795 }, { "epoch": 0.1073170731707317, "grad_norm": 0.9179916977882385, "learning_rate": 1.9284552845528458e-05, "loss": 0.1602, "step": 8800 }, { "epoch": 0.1073780487804878, "grad_norm": 1.4045932292938232, "learning_rate": 1.9284146341463416e-05, "loss": 0.1779, "step": 8805 }, { "epoch": 0.1074390243902439, "grad_norm": 1.1433515548706055, "learning_rate": 1.9283739837398377e-05, "loss": 0.1907, "step": 8810 }, { "epoch": 0.1075, "grad_norm": 0.8702130317687988, "learning_rate": 1.9283333333333332e-05, "loss": 0.1648, "step": 8815 }, { "epoch": 0.1075609756097561, "grad_norm": 0.8258054256439209, "learning_rate": 1.9282926829268294e-05, "loss": 0.1734, "step": 8820 }, { "epoch": 0.1076219512195122, "grad_norm": 0.8636536002159119, "learning_rate": 1.9282520325203252e-05, "loss": 0.1529, "step": 8825 }, { "epoch": 0.10768292682926829, "grad_norm": 1.9830355644226074, "learning_rate": 1.9282113821138213e-05, "loss": 0.2149, "step": 8830 }, { "epoch": 0.10774390243902439, "grad_norm": 0.8844713568687439, "learning_rate": 1.928170731707317e-05, "loss": 0.2154, "step": 8835 }, { "epoch": 0.10780487804878049, "grad_norm": 0.9279322624206543, "learning_rate": 1.9281300813008133e-05, "loss": 0.2325, "step": 8840 }, { "epoch": 0.10786585365853658, "grad_norm": 1.0966962575912476, "learning_rate": 1.928089430894309e-05, "loss": 0.1659, "step": 8845 }, { "epoch": 0.10792682926829268, "grad_norm": 1.2073185443878174, "learning_rate": 1.928048780487805e-05, "loss": 0.2106, "step": 8850 }, { "epoch": 0.10798780487804878, "grad_norm": 2.0077028274536133, "learning_rate": 1.928008130081301e-05, "loss": 0.2281, "step": 8855 }, { "epoch": 0.10804878048780488, "grad_norm": 1.0286989212036133, "learning_rate": 1.927967479674797e-05, "loss": 0.1599, "step": 8860 }, { "epoch": 0.10810975609756097, "grad_norm": 1.3122693300247192, "learning_rate": 1.927926829268293e-05, "loss": 0.2022, "step": 8865 }, { "epoch": 0.10817073170731707, "grad_norm": 1.1861122846603394, "learning_rate": 1.927886178861789e-05, "loss": 0.1604, "step": 8870 }, { "epoch": 0.10823170731707317, "grad_norm": 0.6823275089263916, "learning_rate": 1.9278455284552847e-05, "loss": 0.1975, "step": 8875 }, { "epoch": 0.10829268292682927, "grad_norm": 0.7629083395004272, "learning_rate": 1.9278048780487805e-05, "loss": 0.1566, "step": 8880 }, { "epoch": 0.10835365853658536, "grad_norm": 1.138498067855835, "learning_rate": 1.9277642276422766e-05, "loss": 0.1923, "step": 8885 }, { "epoch": 0.10841463414634146, "grad_norm": 0.8015488982200623, "learning_rate": 1.9277235772357724e-05, "loss": 0.133, "step": 8890 }, { "epoch": 0.10847560975609756, "grad_norm": 1.7183622121810913, "learning_rate": 1.9276829268292686e-05, "loss": 0.1606, "step": 8895 }, { "epoch": 0.10853658536585366, "grad_norm": 0.8423365950584412, "learning_rate": 1.9276422764227644e-05, "loss": 0.1505, "step": 8900 }, { "epoch": 0.10859756097560976, "grad_norm": 0.9970700144767761, "learning_rate": 1.9276016260162602e-05, "loss": 0.2045, "step": 8905 }, { "epoch": 0.10865853658536585, "grad_norm": 1.1107127666473389, "learning_rate": 1.927560975609756e-05, "loss": 0.1823, "step": 8910 }, { "epoch": 0.10871951219512195, "grad_norm": 0.799592137336731, "learning_rate": 1.9275203252032522e-05, "loss": 0.1217, "step": 8915 }, { "epoch": 0.10878048780487805, "grad_norm": 1.0463813543319702, "learning_rate": 1.927479674796748e-05, "loss": 0.1527, "step": 8920 }, { "epoch": 0.10884146341463415, "grad_norm": 1.609073519706726, "learning_rate": 1.927439024390244e-05, "loss": 0.1722, "step": 8925 }, { "epoch": 0.10890243902439024, "grad_norm": 0.80283522605896, "learning_rate": 1.92739837398374e-05, "loss": 0.2054, "step": 8930 }, { "epoch": 0.10896341463414634, "grad_norm": 0.8078998923301697, "learning_rate": 1.9273577235772358e-05, "loss": 0.1714, "step": 8935 }, { "epoch": 0.10902439024390244, "grad_norm": 1.114029049873352, "learning_rate": 1.927317073170732e-05, "loss": 0.1518, "step": 8940 }, { "epoch": 0.10908536585365854, "grad_norm": 0.8707472681999207, "learning_rate": 1.9272764227642277e-05, "loss": 0.1657, "step": 8945 }, { "epoch": 0.10914634146341463, "grad_norm": 0.9356319308280945, "learning_rate": 1.927235772357724e-05, "loss": 0.1622, "step": 8950 }, { "epoch": 0.10920731707317073, "grad_norm": 1.730858325958252, "learning_rate": 1.9271951219512197e-05, "loss": 0.1851, "step": 8955 }, { "epoch": 0.10926829268292683, "grad_norm": 1.596854329109192, "learning_rate": 1.9271544715447155e-05, "loss": 0.1477, "step": 8960 }, { "epoch": 0.10932926829268293, "grad_norm": 1.6027675867080688, "learning_rate": 1.9271138211382113e-05, "loss": 0.1333, "step": 8965 }, { "epoch": 0.10939024390243902, "grad_norm": 1.1912277936935425, "learning_rate": 1.9270731707317075e-05, "loss": 0.192, "step": 8970 }, { "epoch": 0.10945121951219512, "grad_norm": 0.7355931997299194, "learning_rate": 1.9270325203252033e-05, "loss": 0.218, "step": 8975 }, { "epoch": 0.10951219512195122, "grad_norm": 1.5671069622039795, "learning_rate": 1.9269918699186994e-05, "loss": 0.1821, "step": 8980 }, { "epoch": 0.10957317073170732, "grad_norm": 0.8659321665763855, "learning_rate": 1.9269512195121953e-05, "loss": 0.1548, "step": 8985 }, { "epoch": 0.10963414634146341, "grad_norm": 1.2817368507385254, "learning_rate": 1.9269105691056914e-05, "loss": 0.1566, "step": 8990 }, { "epoch": 0.10969512195121951, "grad_norm": 1.3542884588241577, "learning_rate": 1.926869918699187e-05, "loss": 0.1823, "step": 8995 }, { "epoch": 0.10975609756097561, "grad_norm": 1.1162865161895752, "learning_rate": 1.926829268292683e-05, "loss": 0.1546, "step": 9000 }, { "epoch": 0.10981707317073171, "grad_norm": 0.6664257645606995, "learning_rate": 1.926788617886179e-05, "loss": 0.1558, "step": 9005 }, { "epoch": 0.1098780487804878, "grad_norm": 1.5265239477157593, "learning_rate": 1.926747967479675e-05, "loss": 0.2208, "step": 9010 }, { "epoch": 0.1099390243902439, "grad_norm": 0.8589429259300232, "learning_rate": 1.9267073170731708e-05, "loss": 0.162, "step": 9015 }, { "epoch": 0.11, "grad_norm": 1.3135521411895752, "learning_rate": 1.926666666666667e-05, "loss": 0.1664, "step": 9020 }, { "epoch": 0.1100609756097561, "grad_norm": 0.45262929797172546, "learning_rate": 1.9266260162601628e-05, "loss": 0.1227, "step": 9025 }, { "epoch": 0.1101219512195122, "grad_norm": 0.6080160737037659, "learning_rate": 1.9265853658536586e-05, "loss": 0.1377, "step": 9030 }, { "epoch": 0.1101829268292683, "grad_norm": 0.5458572506904602, "learning_rate": 1.9265447154471547e-05, "loss": 0.1501, "step": 9035 }, { "epoch": 0.11024390243902439, "grad_norm": 1.0341253280639648, "learning_rate": 1.9265040650406506e-05, "loss": 0.1556, "step": 9040 }, { "epoch": 0.11030487804878049, "grad_norm": 1.6516876220703125, "learning_rate": 1.9264634146341467e-05, "loss": 0.1748, "step": 9045 }, { "epoch": 0.11036585365853659, "grad_norm": 1.1548759937286377, "learning_rate": 1.9264227642276425e-05, "loss": 0.2251, "step": 9050 }, { "epoch": 0.11042682926829268, "grad_norm": 0.5800977349281311, "learning_rate": 1.9263821138211383e-05, "loss": 0.1634, "step": 9055 }, { "epoch": 0.11048780487804878, "grad_norm": 0.6799824237823486, "learning_rate": 1.926341463414634e-05, "loss": 0.1283, "step": 9060 }, { "epoch": 0.11054878048780488, "grad_norm": 1.4590709209442139, "learning_rate": 1.9263008130081303e-05, "loss": 0.2015, "step": 9065 }, { "epoch": 0.11060975609756098, "grad_norm": 0.7929201126098633, "learning_rate": 1.926260162601626e-05, "loss": 0.183, "step": 9070 }, { "epoch": 0.11067073170731707, "grad_norm": 0.8352380990982056, "learning_rate": 1.9262195121951223e-05, "loss": 0.1368, "step": 9075 }, { "epoch": 0.11073170731707317, "grad_norm": 1.0157884359359741, "learning_rate": 1.926178861788618e-05, "loss": 0.1746, "step": 9080 }, { "epoch": 0.11079268292682927, "grad_norm": 1.1488680839538574, "learning_rate": 1.926138211382114e-05, "loss": 0.1776, "step": 9085 }, { "epoch": 0.11085365853658537, "grad_norm": 0.9476421475410461, "learning_rate": 1.9260975609756097e-05, "loss": 0.1799, "step": 9090 }, { "epoch": 0.11091463414634146, "grad_norm": 1.375391960144043, "learning_rate": 1.926056910569106e-05, "loss": 0.1861, "step": 9095 }, { "epoch": 0.11097560975609756, "grad_norm": 1.446678638458252, "learning_rate": 1.9260162601626017e-05, "loss": 0.18, "step": 9100 }, { "epoch": 0.11103658536585366, "grad_norm": 1.410568118095398, "learning_rate": 1.9259756097560978e-05, "loss": 0.1781, "step": 9105 }, { "epoch": 0.11109756097560976, "grad_norm": 0.8788667917251587, "learning_rate": 1.9259349593495936e-05, "loss": 0.1382, "step": 9110 }, { "epoch": 0.11115853658536586, "grad_norm": 1.1134891510009766, "learning_rate": 1.9258943089430894e-05, "loss": 0.1928, "step": 9115 }, { "epoch": 0.11121951219512195, "grad_norm": 1.1356358528137207, "learning_rate": 1.9258536585365856e-05, "loss": 0.151, "step": 9120 }, { "epoch": 0.11128048780487805, "grad_norm": 0.7801120281219482, "learning_rate": 1.9258130081300814e-05, "loss": 0.1482, "step": 9125 }, { "epoch": 0.11134146341463415, "grad_norm": 1.1168701648712158, "learning_rate": 1.9257723577235776e-05, "loss": 0.2145, "step": 9130 }, { "epoch": 0.11140243902439025, "grad_norm": 1.1311334371566772, "learning_rate": 1.9257317073170734e-05, "loss": 0.1509, "step": 9135 }, { "epoch": 0.11146341463414634, "grad_norm": 0.9670603275299072, "learning_rate": 1.9256910569105692e-05, "loss": 0.1705, "step": 9140 }, { "epoch": 0.11152439024390244, "grad_norm": 1.1638154983520508, "learning_rate": 1.925650406504065e-05, "loss": 0.1445, "step": 9145 }, { "epoch": 0.11158536585365854, "grad_norm": 1.2155267000198364, "learning_rate": 1.925609756097561e-05, "loss": 0.1626, "step": 9150 }, { "epoch": 0.11164634146341464, "grad_norm": 1.2705389261245728, "learning_rate": 1.925569105691057e-05, "loss": 0.1304, "step": 9155 }, { "epoch": 0.11170731707317073, "grad_norm": 0.9850234389305115, "learning_rate": 1.925528455284553e-05, "loss": 0.1535, "step": 9160 }, { "epoch": 0.11176829268292683, "grad_norm": 1.221476435661316, "learning_rate": 1.925487804878049e-05, "loss": 0.1545, "step": 9165 }, { "epoch": 0.11182926829268293, "grad_norm": 0.8471919894218445, "learning_rate": 1.925447154471545e-05, "loss": 0.1267, "step": 9170 }, { "epoch": 0.11189024390243903, "grad_norm": 1.2075351476669312, "learning_rate": 1.9254065040650406e-05, "loss": 0.1981, "step": 9175 }, { "epoch": 0.11195121951219512, "grad_norm": 1.1085550785064697, "learning_rate": 1.9253658536585367e-05, "loss": 0.1915, "step": 9180 }, { "epoch": 0.11201219512195122, "grad_norm": 1.0129926204681396, "learning_rate": 1.9253252032520325e-05, "loss": 0.1982, "step": 9185 }, { "epoch": 0.11207317073170732, "grad_norm": 1.3361327648162842, "learning_rate": 1.9252845528455287e-05, "loss": 0.1389, "step": 9190 }, { "epoch": 0.11213414634146342, "grad_norm": 0.5420787930488586, "learning_rate": 1.9252439024390245e-05, "loss": 0.1694, "step": 9195 }, { "epoch": 0.11219512195121951, "grad_norm": 1.7046347856521606, "learning_rate": 1.9252032520325206e-05, "loss": 0.1804, "step": 9200 }, { "epoch": 0.11225609756097561, "grad_norm": 1.6856584548950195, "learning_rate": 1.9251626016260164e-05, "loss": 0.2201, "step": 9205 }, { "epoch": 0.11231707317073171, "grad_norm": 1.7795456647872925, "learning_rate": 1.9251219512195123e-05, "loss": 0.1539, "step": 9210 }, { "epoch": 0.11237804878048781, "grad_norm": 1.353980541229248, "learning_rate": 1.9250813008130084e-05, "loss": 0.1903, "step": 9215 }, { "epoch": 0.1124390243902439, "grad_norm": 1.0860706567764282, "learning_rate": 1.9250406504065042e-05, "loss": 0.207, "step": 9220 }, { "epoch": 0.1125, "grad_norm": 0.6116988658905029, "learning_rate": 1.925e-05, "loss": 0.1558, "step": 9225 }, { "epoch": 0.1125609756097561, "grad_norm": 2.031090021133423, "learning_rate": 1.9249593495934962e-05, "loss": 0.1752, "step": 9230 }, { "epoch": 0.1126219512195122, "grad_norm": 1.9379249811172485, "learning_rate": 1.924918699186992e-05, "loss": 0.165, "step": 9235 }, { "epoch": 0.1126829268292683, "grad_norm": 1.2612468004226685, "learning_rate": 1.9248780487804878e-05, "loss": 0.2028, "step": 9240 }, { "epoch": 0.1127439024390244, "grad_norm": 1.2390998601913452, "learning_rate": 1.924837398373984e-05, "loss": 0.1885, "step": 9245 }, { "epoch": 0.11280487804878049, "grad_norm": 0.9816779494285583, "learning_rate": 1.9247967479674798e-05, "loss": 0.155, "step": 9250 }, { "epoch": 0.11286585365853659, "grad_norm": 2.8705174922943115, "learning_rate": 1.924756097560976e-05, "loss": 0.1626, "step": 9255 }, { "epoch": 0.11292682926829269, "grad_norm": 0.8547055125236511, "learning_rate": 1.9247154471544717e-05, "loss": 0.1779, "step": 9260 }, { "epoch": 0.11298780487804878, "grad_norm": 1.6156893968582153, "learning_rate": 1.9246747967479676e-05, "loss": 0.1734, "step": 9265 }, { "epoch": 0.11304878048780488, "grad_norm": 1.0954499244689941, "learning_rate": 1.9246341463414634e-05, "loss": 0.1703, "step": 9270 }, { "epoch": 0.11310975609756098, "grad_norm": 0.9187131524085999, "learning_rate": 1.9245934959349595e-05, "loss": 0.1986, "step": 9275 }, { "epoch": 0.11317073170731708, "grad_norm": 2.2828664779663086, "learning_rate": 1.9245528455284553e-05, "loss": 0.1421, "step": 9280 }, { "epoch": 0.11323170731707317, "grad_norm": 0.9288508892059326, "learning_rate": 1.9245121951219515e-05, "loss": 0.1784, "step": 9285 }, { "epoch": 0.11329268292682927, "grad_norm": 1.2361373901367188, "learning_rate": 1.9244715447154473e-05, "loss": 0.2384, "step": 9290 }, { "epoch": 0.11335365853658537, "grad_norm": 1.2142080068588257, "learning_rate": 1.924430894308943e-05, "loss": 0.1622, "step": 9295 }, { "epoch": 0.11341463414634147, "grad_norm": 0.6966942548751831, "learning_rate": 1.9243902439024393e-05, "loss": 0.1458, "step": 9300 }, { "epoch": 0.11347560975609756, "grad_norm": 0.7362017631530762, "learning_rate": 1.924349593495935e-05, "loss": 0.132, "step": 9305 }, { "epoch": 0.11353658536585366, "grad_norm": 2.5492305755615234, "learning_rate": 1.9243089430894312e-05, "loss": 0.1277, "step": 9310 }, { "epoch": 0.11359756097560976, "grad_norm": 0.9264789819717407, "learning_rate": 1.924268292682927e-05, "loss": 0.1332, "step": 9315 }, { "epoch": 0.11365853658536586, "grad_norm": 1.5585795640945435, "learning_rate": 1.924227642276423e-05, "loss": 0.1654, "step": 9320 }, { "epoch": 0.11371951219512196, "grad_norm": 1.398689866065979, "learning_rate": 1.9241869918699187e-05, "loss": 0.1528, "step": 9325 }, { "epoch": 0.11378048780487805, "grad_norm": 1.13597571849823, "learning_rate": 1.9241463414634148e-05, "loss": 0.1859, "step": 9330 }, { "epoch": 0.11384146341463415, "grad_norm": 1.0258007049560547, "learning_rate": 1.9241056910569106e-05, "loss": 0.1906, "step": 9335 }, { "epoch": 0.11390243902439025, "grad_norm": 0.8160684704780579, "learning_rate": 1.9240650406504068e-05, "loss": 0.1538, "step": 9340 }, { "epoch": 0.11396341463414635, "grad_norm": 0.7495965957641602, "learning_rate": 1.9240243902439026e-05, "loss": 0.1524, "step": 9345 }, { "epoch": 0.11402439024390244, "grad_norm": 0.8050821423530579, "learning_rate": 1.9239837398373987e-05, "loss": 0.1543, "step": 9350 }, { "epoch": 0.11408536585365854, "grad_norm": 0.7495627999305725, "learning_rate": 1.9239430894308942e-05, "loss": 0.1551, "step": 9355 }, { "epoch": 0.11414634146341464, "grad_norm": 0.6620636582374573, "learning_rate": 1.9239024390243904e-05, "loss": 0.1778, "step": 9360 }, { "epoch": 0.11420731707317074, "grad_norm": 0.5495951175689697, "learning_rate": 1.9238617886178862e-05, "loss": 0.1404, "step": 9365 }, { "epoch": 0.11426829268292683, "grad_norm": 0.6994228363037109, "learning_rate": 1.9238211382113823e-05, "loss": 0.1365, "step": 9370 }, { "epoch": 0.11432926829268293, "grad_norm": 1.2902131080627441, "learning_rate": 1.923780487804878e-05, "loss": 0.1868, "step": 9375 }, { "epoch": 0.11439024390243903, "grad_norm": 0.8645362257957458, "learning_rate": 1.9237398373983743e-05, "loss": 0.1493, "step": 9380 }, { "epoch": 0.11445121951219513, "grad_norm": 1.058730959892273, "learning_rate": 1.92369918699187e-05, "loss": 0.1839, "step": 9385 }, { "epoch": 0.11451219512195122, "grad_norm": 0.9249047636985779, "learning_rate": 1.923658536585366e-05, "loss": 0.1852, "step": 9390 }, { "epoch": 0.11457317073170732, "grad_norm": 1.5583750009536743, "learning_rate": 1.923617886178862e-05, "loss": 0.1924, "step": 9395 }, { "epoch": 0.11463414634146342, "grad_norm": 1.1659232378005981, "learning_rate": 1.923577235772358e-05, "loss": 0.1866, "step": 9400 }, { "epoch": 0.11469512195121952, "grad_norm": 1.0865916013717651, "learning_rate": 1.9235365853658537e-05, "loss": 0.1712, "step": 9405 }, { "epoch": 0.11475609756097561, "grad_norm": 1.426133394241333, "learning_rate": 1.92349593495935e-05, "loss": 0.1817, "step": 9410 }, { "epoch": 0.11481707317073171, "grad_norm": 0.7496104836463928, "learning_rate": 1.9234552845528457e-05, "loss": 0.1248, "step": 9415 }, { "epoch": 0.11487804878048781, "grad_norm": 0.7420843243598938, "learning_rate": 1.9234146341463415e-05, "loss": 0.1365, "step": 9420 }, { "epoch": 0.11493902439024391, "grad_norm": 2.3949379920959473, "learning_rate": 1.9233739837398376e-05, "loss": 0.184, "step": 9425 }, { "epoch": 0.115, "grad_norm": 2.213926076889038, "learning_rate": 1.9233333333333334e-05, "loss": 0.1832, "step": 9430 }, { "epoch": 0.1150609756097561, "grad_norm": 1.0588812828063965, "learning_rate": 1.9232926829268296e-05, "loss": 0.1254, "step": 9435 }, { "epoch": 0.1151219512195122, "grad_norm": 0.8332968950271606, "learning_rate": 1.9232520325203254e-05, "loss": 0.1366, "step": 9440 }, { "epoch": 0.1151829268292683, "grad_norm": 1.4554145336151123, "learning_rate": 1.9232113821138212e-05, "loss": 0.1774, "step": 9445 }, { "epoch": 0.1152439024390244, "grad_norm": 0.5378711819648743, "learning_rate": 1.923170731707317e-05, "loss": 0.1704, "step": 9450 }, { "epoch": 0.1153048780487805, "grad_norm": 0.8271157741546631, "learning_rate": 1.9231300813008132e-05, "loss": 0.1644, "step": 9455 }, { "epoch": 0.11536585365853659, "grad_norm": 0.8057090640068054, "learning_rate": 1.923089430894309e-05, "loss": 0.1547, "step": 9460 }, { "epoch": 0.11542682926829269, "grad_norm": 1.7997667789459229, "learning_rate": 1.923048780487805e-05, "loss": 0.1647, "step": 9465 }, { "epoch": 0.11548780487804879, "grad_norm": 0.983284592628479, "learning_rate": 1.923008130081301e-05, "loss": 0.1512, "step": 9470 }, { "epoch": 0.11554878048780488, "grad_norm": 0.7662161588668823, "learning_rate": 1.9229674796747968e-05, "loss": 0.1614, "step": 9475 }, { "epoch": 0.11560975609756098, "grad_norm": 0.774635374546051, "learning_rate": 1.922926829268293e-05, "loss": 0.1837, "step": 9480 }, { "epoch": 0.11567073170731708, "grad_norm": 1.1926846504211426, "learning_rate": 1.9228861788617887e-05, "loss": 0.1802, "step": 9485 }, { "epoch": 0.11573170731707318, "grad_norm": 4.058959484100342, "learning_rate": 1.9228455284552845e-05, "loss": 0.1586, "step": 9490 }, { "epoch": 0.11579268292682927, "grad_norm": 1.0403681993484497, "learning_rate": 1.9228048780487807e-05, "loss": 0.1601, "step": 9495 }, { "epoch": 0.11585365853658537, "grad_norm": 1.0463879108428955, "learning_rate": 1.9227642276422765e-05, "loss": 0.1932, "step": 9500 }, { "epoch": 0.11591463414634147, "grad_norm": 1.2178906202316284, "learning_rate": 1.9227235772357723e-05, "loss": 0.1195, "step": 9505 }, { "epoch": 0.11597560975609757, "grad_norm": 1.0992529392242432, "learning_rate": 1.9226829268292685e-05, "loss": 0.1565, "step": 9510 }, { "epoch": 0.11603658536585366, "grad_norm": 0.8353822231292725, "learning_rate": 1.9226422764227643e-05, "loss": 0.1495, "step": 9515 }, { "epoch": 0.11609756097560976, "grad_norm": 1.7677193880081177, "learning_rate": 1.9226016260162604e-05, "loss": 0.1768, "step": 9520 }, { "epoch": 0.11615853658536586, "grad_norm": 0.8550642132759094, "learning_rate": 1.9225609756097563e-05, "loss": 0.1727, "step": 9525 }, { "epoch": 0.11621951219512196, "grad_norm": 2.6292738914489746, "learning_rate": 1.9225203252032524e-05, "loss": 0.1629, "step": 9530 }, { "epoch": 0.11628048780487805, "grad_norm": 2.1292922496795654, "learning_rate": 1.922479674796748e-05, "loss": 0.1471, "step": 9535 }, { "epoch": 0.11634146341463415, "grad_norm": 1.117588758468628, "learning_rate": 1.922439024390244e-05, "loss": 0.159, "step": 9540 }, { "epoch": 0.11640243902439025, "grad_norm": 0.7675883173942566, "learning_rate": 1.92239837398374e-05, "loss": 0.1757, "step": 9545 }, { "epoch": 0.11646341463414635, "grad_norm": 0.7833041548728943, "learning_rate": 1.922357723577236e-05, "loss": 0.1782, "step": 9550 }, { "epoch": 0.11652439024390245, "grad_norm": 1.265960693359375, "learning_rate": 1.9223170731707318e-05, "loss": 0.1579, "step": 9555 }, { "epoch": 0.11658536585365854, "grad_norm": 0.6683962345123291, "learning_rate": 1.922276422764228e-05, "loss": 0.1836, "step": 9560 }, { "epoch": 0.11664634146341464, "grad_norm": 1.108737587928772, "learning_rate": 1.9222357723577238e-05, "loss": 0.2197, "step": 9565 }, { "epoch": 0.11670731707317074, "grad_norm": 1.2620649337768555, "learning_rate": 1.9221951219512196e-05, "loss": 0.2163, "step": 9570 }, { "epoch": 0.11676829268292684, "grad_norm": 2.32614803314209, "learning_rate": 1.9221544715447157e-05, "loss": 0.2003, "step": 9575 }, { "epoch": 0.11682926829268293, "grad_norm": 1.1280657052993774, "learning_rate": 1.9221138211382116e-05, "loss": 0.1377, "step": 9580 }, { "epoch": 0.11689024390243903, "grad_norm": 0.6862676739692688, "learning_rate": 1.9220731707317074e-05, "loss": 0.1748, "step": 9585 }, { "epoch": 0.11695121951219513, "grad_norm": 0.9936310648918152, "learning_rate": 1.9220325203252035e-05, "loss": 0.1809, "step": 9590 }, { "epoch": 0.11701219512195123, "grad_norm": 1.627987265586853, "learning_rate": 1.9219918699186993e-05, "loss": 0.2077, "step": 9595 }, { "epoch": 0.11707317073170732, "grad_norm": 1.1185994148254395, "learning_rate": 1.921951219512195e-05, "loss": 0.128, "step": 9600 }, { "epoch": 0.11713414634146342, "grad_norm": 0.553956151008606, "learning_rate": 1.9219105691056913e-05, "loss": 0.1332, "step": 9605 }, { "epoch": 0.1171951219512195, "grad_norm": 1.1312634944915771, "learning_rate": 1.921869918699187e-05, "loss": 0.1514, "step": 9610 }, { "epoch": 0.1172560975609756, "grad_norm": 1.0118811130523682, "learning_rate": 1.9218292682926833e-05, "loss": 0.2214, "step": 9615 }, { "epoch": 0.1173170731707317, "grad_norm": 0.9322419166564941, "learning_rate": 1.921788617886179e-05, "loss": 0.1574, "step": 9620 }, { "epoch": 0.1173780487804878, "grad_norm": 1.2998708486557007, "learning_rate": 1.921747967479675e-05, "loss": 0.1601, "step": 9625 }, { "epoch": 0.1174390243902439, "grad_norm": 1.2902777194976807, "learning_rate": 1.9217073170731707e-05, "loss": 0.1625, "step": 9630 }, { "epoch": 0.1175, "grad_norm": 0.7376141548156738, "learning_rate": 1.921666666666667e-05, "loss": 0.1155, "step": 9635 }, { "epoch": 0.11756097560975609, "grad_norm": 1.209019660949707, "learning_rate": 1.9216260162601627e-05, "loss": 0.1625, "step": 9640 }, { "epoch": 0.11762195121951219, "grad_norm": 0.6646397709846497, "learning_rate": 1.9215853658536588e-05, "loss": 0.1626, "step": 9645 }, { "epoch": 0.11768292682926829, "grad_norm": 0.6665369272232056, "learning_rate": 1.9215447154471546e-05, "loss": 0.1459, "step": 9650 }, { "epoch": 0.11774390243902438, "grad_norm": 1.165114164352417, "learning_rate": 1.9215040650406504e-05, "loss": 0.1408, "step": 9655 }, { "epoch": 0.11780487804878048, "grad_norm": 1.3625980615615845, "learning_rate": 1.9214634146341466e-05, "loss": 0.1942, "step": 9660 }, { "epoch": 0.11786585365853658, "grad_norm": 1.095091700553894, "learning_rate": 1.9214227642276424e-05, "loss": 0.2212, "step": 9665 }, { "epoch": 0.11792682926829268, "grad_norm": 0.9553360939025879, "learning_rate": 1.9213821138211382e-05, "loss": 0.1834, "step": 9670 }, { "epoch": 0.11798780487804877, "grad_norm": 1.3045886754989624, "learning_rate": 1.9213414634146344e-05, "loss": 0.1635, "step": 9675 }, { "epoch": 0.11804878048780487, "grad_norm": 0.7815367579460144, "learning_rate": 1.9213008130081302e-05, "loss": 0.1227, "step": 9680 }, { "epoch": 0.11810975609756097, "grad_norm": 1.1501188278198242, "learning_rate": 1.921260162601626e-05, "loss": 0.182, "step": 9685 }, { "epoch": 0.11817073170731707, "grad_norm": 0.9634864330291748, "learning_rate": 1.921219512195122e-05, "loss": 0.1083, "step": 9690 }, { "epoch": 0.11823170731707316, "grad_norm": 1.100071907043457, "learning_rate": 1.921178861788618e-05, "loss": 0.1418, "step": 9695 }, { "epoch": 0.11829268292682926, "grad_norm": 1.7441539764404297, "learning_rate": 1.921138211382114e-05, "loss": 0.1665, "step": 9700 }, { "epoch": 0.11835365853658536, "grad_norm": 1.5725903511047363, "learning_rate": 1.92109756097561e-05, "loss": 0.1285, "step": 9705 }, { "epoch": 0.11841463414634146, "grad_norm": 1.712024450302124, "learning_rate": 1.921056910569106e-05, "loss": 0.2037, "step": 9710 }, { "epoch": 0.11847560975609756, "grad_norm": 1.636780023574829, "learning_rate": 1.9210162601626015e-05, "loss": 0.1674, "step": 9715 }, { "epoch": 0.11853658536585365, "grad_norm": 0.5741573572158813, "learning_rate": 1.9209756097560977e-05, "loss": 0.1656, "step": 9720 }, { "epoch": 0.11859756097560975, "grad_norm": 0.8146753311157227, "learning_rate": 1.9209349593495935e-05, "loss": 0.1288, "step": 9725 }, { "epoch": 0.11865853658536585, "grad_norm": 1.2709983587265015, "learning_rate": 1.9208943089430897e-05, "loss": 0.1572, "step": 9730 }, { "epoch": 0.11871951219512195, "grad_norm": 1.0511256456375122, "learning_rate": 1.9208536585365855e-05, "loss": 0.1607, "step": 9735 }, { "epoch": 0.11878048780487804, "grad_norm": 1.3252195119857788, "learning_rate": 1.9208130081300816e-05, "loss": 0.1675, "step": 9740 }, { "epoch": 0.11884146341463414, "grad_norm": 0.7515648007392883, "learning_rate": 1.9207723577235774e-05, "loss": 0.1334, "step": 9745 }, { "epoch": 0.11890243902439024, "grad_norm": 0.9002703428268433, "learning_rate": 1.9207317073170733e-05, "loss": 0.1347, "step": 9750 }, { "epoch": 0.11896341463414634, "grad_norm": 1.0480787754058838, "learning_rate": 1.920691056910569e-05, "loss": 0.1883, "step": 9755 }, { "epoch": 0.11902439024390243, "grad_norm": 0.9084897637367249, "learning_rate": 1.9206504065040652e-05, "loss": 0.1334, "step": 9760 }, { "epoch": 0.11908536585365853, "grad_norm": 1.0075732469558716, "learning_rate": 1.920609756097561e-05, "loss": 0.1598, "step": 9765 }, { "epoch": 0.11914634146341463, "grad_norm": 1.1331595182418823, "learning_rate": 1.9205691056910572e-05, "loss": 0.177, "step": 9770 }, { "epoch": 0.11920731707317073, "grad_norm": 1.386496901512146, "learning_rate": 1.920528455284553e-05, "loss": 0.1738, "step": 9775 }, { "epoch": 0.11926829268292682, "grad_norm": 1.039937973022461, "learning_rate": 1.9204878048780488e-05, "loss": 0.1336, "step": 9780 }, { "epoch": 0.11932926829268292, "grad_norm": 1.0272276401519775, "learning_rate": 1.920447154471545e-05, "loss": 0.1358, "step": 9785 }, { "epoch": 0.11939024390243902, "grad_norm": 1.1922297477722168, "learning_rate": 1.9204065040650408e-05, "loss": 0.1533, "step": 9790 }, { "epoch": 0.11945121951219512, "grad_norm": 1.08523428440094, "learning_rate": 1.920365853658537e-05, "loss": 0.1266, "step": 9795 }, { "epoch": 0.11951219512195121, "grad_norm": 1.4359171390533447, "learning_rate": 1.9203252032520327e-05, "loss": 0.1877, "step": 9800 }, { "epoch": 0.11957317073170731, "grad_norm": 1.0670727491378784, "learning_rate": 1.9202845528455285e-05, "loss": 0.161, "step": 9805 }, { "epoch": 0.11963414634146341, "grad_norm": 0.9806472063064575, "learning_rate": 1.9202439024390244e-05, "loss": 0.1576, "step": 9810 }, { "epoch": 0.11969512195121951, "grad_norm": 1.597318172454834, "learning_rate": 1.9202032520325205e-05, "loss": 0.1296, "step": 9815 }, { "epoch": 0.1197560975609756, "grad_norm": 0.8495111465454102, "learning_rate": 1.9201626016260163e-05, "loss": 0.1491, "step": 9820 }, { "epoch": 0.1198170731707317, "grad_norm": 0.7970826029777527, "learning_rate": 1.9201219512195125e-05, "loss": 0.1554, "step": 9825 }, { "epoch": 0.1198780487804878, "grad_norm": 0.7113397121429443, "learning_rate": 1.9200813008130083e-05, "loss": 0.1496, "step": 9830 }, { "epoch": 0.1199390243902439, "grad_norm": 3.2669894695281982, "learning_rate": 1.920040650406504e-05, "loss": 0.2009, "step": 9835 }, { "epoch": 0.12, "grad_norm": 0.7762947678565979, "learning_rate": 1.9200000000000003e-05, "loss": 0.1821, "step": 9840 }, { "epoch": 0.1200609756097561, "grad_norm": 0.6924903988838196, "learning_rate": 1.919959349593496e-05, "loss": 0.1656, "step": 9845 }, { "epoch": 0.12012195121951219, "grad_norm": 1.1393182277679443, "learning_rate": 1.919918699186992e-05, "loss": 0.1559, "step": 9850 }, { "epoch": 0.12018292682926829, "grad_norm": 1.5047755241394043, "learning_rate": 1.919878048780488e-05, "loss": 0.1489, "step": 9855 }, { "epoch": 0.12024390243902439, "grad_norm": 0.6970740556716919, "learning_rate": 1.919837398373984e-05, "loss": 0.1581, "step": 9860 }, { "epoch": 0.12030487804878048, "grad_norm": 0.9145111441612244, "learning_rate": 1.9197967479674797e-05, "loss": 0.1437, "step": 9865 }, { "epoch": 0.12036585365853658, "grad_norm": 1.1541664600372314, "learning_rate": 1.9197560975609758e-05, "loss": 0.1697, "step": 9870 }, { "epoch": 0.12042682926829268, "grad_norm": 0.7206994891166687, "learning_rate": 1.9197154471544716e-05, "loss": 0.1314, "step": 9875 }, { "epoch": 0.12048780487804878, "grad_norm": 1.7666105031967163, "learning_rate": 1.9196747967479678e-05, "loss": 0.1769, "step": 9880 }, { "epoch": 0.12054878048780487, "grad_norm": 0.9087777733802795, "learning_rate": 1.9196341463414636e-05, "loss": 0.1642, "step": 9885 }, { "epoch": 0.12060975609756097, "grad_norm": 0.9096185564994812, "learning_rate": 1.9195934959349597e-05, "loss": 0.1308, "step": 9890 }, { "epoch": 0.12067073170731707, "grad_norm": 1.2325761318206787, "learning_rate": 1.9195528455284552e-05, "loss": 0.1703, "step": 9895 }, { "epoch": 0.12073170731707317, "grad_norm": 0.5495175123214722, "learning_rate": 1.9195121951219514e-05, "loss": 0.1442, "step": 9900 }, { "epoch": 0.12079268292682926, "grad_norm": 0.7834795117378235, "learning_rate": 1.9194715447154472e-05, "loss": 0.1153, "step": 9905 }, { "epoch": 0.12085365853658536, "grad_norm": 0.8437790274620056, "learning_rate": 1.9194308943089433e-05, "loss": 0.133, "step": 9910 }, { "epoch": 0.12091463414634146, "grad_norm": 1.3076567649841309, "learning_rate": 1.919390243902439e-05, "loss": 0.1687, "step": 9915 }, { "epoch": 0.12097560975609756, "grad_norm": 1.8106927871704102, "learning_rate": 1.9193495934959353e-05, "loss": 0.1447, "step": 9920 }, { "epoch": 0.12103658536585366, "grad_norm": 1.040403962135315, "learning_rate": 1.919308943089431e-05, "loss": 0.1515, "step": 9925 }, { "epoch": 0.12109756097560975, "grad_norm": 2.135084867477417, "learning_rate": 1.919268292682927e-05, "loss": 0.1949, "step": 9930 }, { "epoch": 0.12115853658536585, "grad_norm": 0.8555440902709961, "learning_rate": 1.9192276422764227e-05, "loss": 0.2012, "step": 9935 }, { "epoch": 0.12121951219512195, "grad_norm": 0.7913194894790649, "learning_rate": 1.919186991869919e-05, "loss": 0.159, "step": 9940 }, { "epoch": 0.12128048780487805, "grad_norm": 0.7257370352745056, "learning_rate": 1.9191463414634147e-05, "loss": 0.1665, "step": 9945 }, { "epoch": 0.12134146341463414, "grad_norm": 1.6387641429901123, "learning_rate": 1.919105691056911e-05, "loss": 0.1628, "step": 9950 }, { "epoch": 0.12140243902439024, "grad_norm": 2.013810634613037, "learning_rate": 1.9190650406504067e-05, "loss": 0.1306, "step": 9955 }, { "epoch": 0.12146341463414634, "grad_norm": 1.0545662641525269, "learning_rate": 1.9190243902439025e-05, "loss": 0.1845, "step": 9960 }, { "epoch": 0.12152439024390244, "grad_norm": 1.9446667432785034, "learning_rate": 1.9189837398373986e-05, "loss": 0.1829, "step": 9965 }, { "epoch": 0.12158536585365853, "grad_norm": 0.8598202466964722, "learning_rate": 1.9189430894308944e-05, "loss": 0.1476, "step": 9970 }, { "epoch": 0.12164634146341463, "grad_norm": 1.3030891418457031, "learning_rate": 1.9189024390243906e-05, "loss": 0.1744, "step": 9975 }, { "epoch": 0.12170731707317073, "grad_norm": 0.7068329453468323, "learning_rate": 1.9188617886178864e-05, "loss": 0.192, "step": 9980 }, { "epoch": 0.12176829268292683, "grad_norm": 1.4354588985443115, "learning_rate": 1.9188211382113822e-05, "loss": 0.1431, "step": 9985 }, { "epoch": 0.12182926829268292, "grad_norm": 1.9480245113372803, "learning_rate": 1.918780487804878e-05, "loss": 0.1539, "step": 9990 }, { "epoch": 0.12189024390243902, "grad_norm": 1.1411842107772827, "learning_rate": 1.9187398373983742e-05, "loss": 0.1199, "step": 9995 }, { "epoch": 0.12195121951219512, "grad_norm": 1.9259512424468994, "learning_rate": 1.91869918699187e-05, "loss": 0.1663, "step": 10000 }, { "epoch": 0.12201219512195122, "grad_norm": 1.005428671836853, "learning_rate": 1.918658536585366e-05, "loss": 0.1528, "step": 10005 }, { "epoch": 0.12207317073170731, "grad_norm": 1.0118119716644287, "learning_rate": 1.918617886178862e-05, "loss": 0.1356, "step": 10010 }, { "epoch": 0.12213414634146341, "grad_norm": 0.7039440274238586, "learning_rate": 1.9185772357723578e-05, "loss": 0.1658, "step": 10015 }, { "epoch": 0.12219512195121951, "grad_norm": 1.4254741668701172, "learning_rate": 1.9185365853658536e-05, "loss": 0.1848, "step": 10020 }, { "epoch": 0.12225609756097561, "grad_norm": 1.2787359952926636, "learning_rate": 1.9184959349593497e-05, "loss": 0.1393, "step": 10025 }, { "epoch": 0.1223170731707317, "grad_norm": 1.649646282196045, "learning_rate": 1.9184552845528455e-05, "loss": 0.1096, "step": 10030 }, { "epoch": 0.1223780487804878, "grad_norm": 0.9531561136245728, "learning_rate": 1.9184146341463417e-05, "loss": 0.1206, "step": 10035 }, { "epoch": 0.1224390243902439, "grad_norm": 0.8565712571144104, "learning_rate": 1.9183739837398375e-05, "loss": 0.1869, "step": 10040 }, { "epoch": 0.1225, "grad_norm": 1.0015963315963745, "learning_rate": 1.9183333333333333e-05, "loss": 0.1722, "step": 10045 }, { "epoch": 0.1225609756097561, "grad_norm": 1.0309637784957886, "learning_rate": 1.9182926829268295e-05, "loss": 0.1403, "step": 10050 }, { "epoch": 0.12262195121951219, "grad_norm": 0.6299694180488586, "learning_rate": 1.9182520325203253e-05, "loss": 0.1077, "step": 10055 }, { "epoch": 0.12268292682926829, "grad_norm": 1.250794529914856, "learning_rate": 1.9182113821138214e-05, "loss": 0.1489, "step": 10060 }, { "epoch": 0.12274390243902439, "grad_norm": 1.1486217975616455, "learning_rate": 1.9181707317073172e-05, "loss": 0.1697, "step": 10065 }, { "epoch": 0.12280487804878049, "grad_norm": 0.9820972681045532, "learning_rate": 1.9181300813008134e-05, "loss": 0.1563, "step": 10070 }, { "epoch": 0.12286585365853658, "grad_norm": 0.8954923152923584, "learning_rate": 1.918089430894309e-05, "loss": 0.1602, "step": 10075 }, { "epoch": 0.12292682926829268, "grad_norm": 1.695294737815857, "learning_rate": 1.918048780487805e-05, "loss": 0.1988, "step": 10080 }, { "epoch": 0.12298780487804878, "grad_norm": 0.49306976795196533, "learning_rate": 1.918008130081301e-05, "loss": 0.0973, "step": 10085 }, { "epoch": 0.12304878048780488, "grad_norm": 0.8403416872024536, "learning_rate": 1.917967479674797e-05, "loss": 0.1638, "step": 10090 }, { "epoch": 0.12310975609756097, "grad_norm": 1.0916284322738647, "learning_rate": 1.9179268292682928e-05, "loss": 0.1346, "step": 10095 }, { "epoch": 0.12317073170731707, "grad_norm": 1.0604722499847412, "learning_rate": 1.917886178861789e-05, "loss": 0.133, "step": 10100 }, { "epoch": 0.12323170731707317, "grad_norm": 0.7880720496177673, "learning_rate": 1.9178455284552848e-05, "loss": 0.1255, "step": 10105 }, { "epoch": 0.12329268292682927, "grad_norm": 0.8124613165855408, "learning_rate": 1.9178048780487806e-05, "loss": 0.125, "step": 10110 }, { "epoch": 0.12335365853658536, "grad_norm": 1.447683334350586, "learning_rate": 1.9177642276422764e-05, "loss": 0.1415, "step": 10115 }, { "epoch": 0.12341463414634146, "grad_norm": 1.0308146476745605, "learning_rate": 1.9177235772357725e-05, "loss": 0.1205, "step": 10120 }, { "epoch": 0.12347560975609756, "grad_norm": 1.187229871749878, "learning_rate": 1.9176829268292684e-05, "loss": 0.1702, "step": 10125 }, { "epoch": 0.12353658536585366, "grad_norm": 0.8793966770172119, "learning_rate": 1.9176422764227645e-05, "loss": 0.1435, "step": 10130 }, { "epoch": 0.12359756097560975, "grad_norm": 3.7249984741210938, "learning_rate": 1.9176016260162603e-05, "loss": 0.1484, "step": 10135 }, { "epoch": 0.12365853658536585, "grad_norm": 1.033384084701538, "learning_rate": 1.917560975609756e-05, "loss": 0.1372, "step": 10140 }, { "epoch": 0.12371951219512195, "grad_norm": 0.8622066974639893, "learning_rate": 1.9175203252032523e-05, "loss": 0.1289, "step": 10145 }, { "epoch": 0.12378048780487805, "grad_norm": 1.0669221878051758, "learning_rate": 1.917479674796748e-05, "loss": 0.1482, "step": 10150 }, { "epoch": 0.12384146341463415, "grad_norm": 1.2532700300216675, "learning_rate": 1.9174390243902442e-05, "loss": 0.2301, "step": 10155 }, { "epoch": 0.12390243902439024, "grad_norm": 1.7242016792297363, "learning_rate": 1.91739837398374e-05, "loss": 0.1313, "step": 10160 }, { "epoch": 0.12396341463414634, "grad_norm": 0.8811997175216675, "learning_rate": 1.917357723577236e-05, "loss": 0.1555, "step": 10165 }, { "epoch": 0.12402439024390244, "grad_norm": 1.607588768005371, "learning_rate": 1.9173170731707317e-05, "loss": 0.1437, "step": 10170 }, { "epoch": 0.12408536585365854, "grad_norm": 1.1553106307983398, "learning_rate": 1.917276422764228e-05, "loss": 0.1699, "step": 10175 }, { "epoch": 0.12414634146341463, "grad_norm": 1.5663138628005981, "learning_rate": 1.9172357723577237e-05, "loss": 0.121, "step": 10180 }, { "epoch": 0.12420731707317073, "grad_norm": 1.6468324661254883, "learning_rate": 1.9171951219512198e-05, "loss": 0.1392, "step": 10185 }, { "epoch": 0.12426829268292683, "grad_norm": 1.263201117515564, "learning_rate": 1.9171544715447156e-05, "loss": 0.1818, "step": 10190 }, { "epoch": 0.12432926829268293, "grad_norm": 1.296183705329895, "learning_rate": 1.9171138211382114e-05, "loss": 0.1871, "step": 10195 }, { "epoch": 0.12439024390243902, "grad_norm": 1.2525001764297485, "learning_rate": 1.9170731707317072e-05, "loss": 0.1714, "step": 10200 }, { "epoch": 0.12445121951219512, "grad_norm": 1.170768141746521, "learning_rate": 1.9170325203252034e-05, "loss": 0.1942, "step": 10205 }, { "epoch": 0.12451219512195122, "grad_norm": 1.2181692123413086, "learning_rate": 1.9169918699186992e-05, "loss": 0.1239, "step": 10210 }, { "epoch": 0.12457317073170732, "grad_norm": 1.3919626474380493, "learning_rate": 1.9169512195121954e-05, "loss": 0.1808, "step": 10215 }, { "epoch": 0.12463414634146341, "grad_norm": 0.9499415755271912, "learning_rate": 1.9169105691056912e-05, "loss": 0.1459, "step": 10220 }, { "epoch": 0.12469512195121951, "grad_norm": 2.7024617195129395, "learning_rate": 1.916869918699187e-05, "loss": 0.167, "step": 10225 }, { "epoch": 0.12475609756097561, "grad_norm": 0.7349246144294739, "learning_rate": 1.916829268292683e-05, "loss": 0.1175, "step": 10230 }, { "epoch": 0.12481707317073171, "grad_norm": 1.3966219425201416, "learning_rate": 1.916788617886179e-05, "loss": 0.1672, "step": 10235 }, { "epoch": 0.1248780487804878, "grad_norm": 1.1337051391601562, "learning_rate": 1.916747967479675e-05, "loss": 0.1542, "step": 10240 }, { "epoch": 0.1249390243902439, "grad_norm": 0.749988317489624, "learning_rate": 1.916707317073171e-05, "loss": 0.1536, "step": 10245 }, { "epoch": 0.125, "grad_norm": 1.083153486251831, "learning_rate": 1.916666666666667e-05, "loss": 0.2289, "step": 10250 }, { "epoch": 0.1250609756097561, "grad_norm": 1.115179181098938, "learning_rate": 1.916626016260163e-05, "loss": 0.1721, "step": 10255 }, { "epoch": 0.1251219512195122, "grad_norm": 0.7409632802009583, "learning_rate": 1.9165853658536587e-05, "loss": 0.1565, "step": 10260 }, { "epoch": 0.1251829268292683, "grad_norm": 1.0313775539398193, "learning_rate": 1.9165447154471545e-05, "loss": 0.1801, "step": 10265 }, { "epoch": 0.1252439024390244, "grad_norm": 0.6872244477272034, "learning_rate": 1.9165040650406507e-05, "loss": 0.1194, "step": 10270 }, { "epoch": 0.1253048780487805, "grad_norm": 2.238865375518799, "learning_rate": 1.9164634146341465e-05, "loss": 0.1951, "step": 10275 }, { "epoch": 0.12536585365853659, "grad_norm": 0.8631643652915955, "learning_rate": 1.9164227642276426e-05, "loss": 0.1226, "step": 10280 }, { "epoch": 0.12542682926829268, "grad_norm": 0.8751355409622192, "learning_rate": 1.9163821138211384e-05, "loss": 0.1435, "step": 10285 }, { "epoch": 0.12548780487804878, "grad_norm": 1.3391947746276855, "learning_rate": 1.9163414634146342e-05, "loss": 0.186, "step": 10290 }, { "epoch": 0.12554878048780488, "grad_norm": 1.386936902999878, "learning_rate": 1.91630081300813e-05, "loss": 0.159, "step": 10295 }, { "epoch": 0.12560975609756098, "grad_norm": 1.3100535869598389, "learning_rate": 1.9162601626016262e-05, "loss": 0.2109, "step": 10300 }, { "epoch": 0.12567073170731707, "grad_norm": 0.7937014102935791, "learning_rate": 1.916219512195122e-05, "loss": 0.1719, "step": 10305 }, { "epoch": 0.12573170731707317, "grad_norm": 1.0335549116134644, "learning_rate": 1.9161788617886182e-05, "loss": 0.1553, "step": 10310 }, { "epoch": 0.12579268292682927, "grad_norm": 1.3720329999923706, "learning_rate": 1.916138211382114e-05, "loss": 0.1666, "step": 10315 }, { "epoch": 0.12585365853658537, "grad_norm": 2.08571195602417, "learning_rate": 1.9160975609756098e-05, "loss": 0.1481, "step": 10320 }, { "epoch": 0.12591463414634146, "grad_norm": 0.8903197050094604, "learning_rate": 1.916056910569106e-05, "loss": 0.1141, "step": 10325 }, { "epoch": 0.12597560975609756, "grad_norm": 0.7533864974975586, "learning_rate": 1.9160162601626018e-05, "loss": 0.1182, "step": 10330 }, { "epoch": 0.12603658536585366, "grad_norm": 1.0900684595108032, "learning_rate": 1.915975609756098e-05, "loss": 0.149, "step": 10335 }, { "epoch": 0.12609756097560976, "grad_norm": 2.159269332885742, "learning_rate": 1.9159349593495937e-05, "loss": 0.1163, "step": 10340 }, { "epoch": 0.12615853658536585, "grad_norm": 0.563373863697052, "learning_rate": 1.9158943089430895e-05, "loss": 0.1202, "step": 10345 }, { "epoch": 0.12621951219512195, "grad_norm": 1.1957991123199463, "learning_rate": 1.9158536585365854e-05, "loss": 0.1427, "step": 10350 }, { "epoch": 0.12628048780487805, "grad_norm": 0.9625672698020935, "learning_rate": 1.9158130081300815e-05, "loss": 0.1501, "step": 10355 }, { "epoch": 0.12634146341463415, "grad_norm": 1.4976874589920044, "learning_rate": 1.9157723577235773e-05, "loss": 0.1748, "step": 10360 }, { "epoch": 0.12640243902439025, "grad_norm": 1.2595634460449219, "learning_rate": 1.9157317073170735e-05, "loss": 0.171, "step": 10365 }, { "epoch": 0.12646341463414634, "grad_norm": 1.041703462600708, "learning_rate": 1.9156910569105693e-05, "loss": 0.1706, "step": 10370 }, { "epoch": 0.12652439024390244, "grad_norm": 1.0045512914657593, "learning_rate": 1.9156504065040654e-05, "loss": 0.1547, "step": 10375 }, { "epoch": 0.12658536585365854, "grad_norm": 0.7224791646003723, "learning_rate": 1.915609756097561e-05, "loss": 0.1784, "step": 10380 }, { "epoch": 0.12664634146341464, "grad_norm": 0.6662632822990417, "learning_rate": 1.915569105691057e-05, "loss": 0.1361, "step": 10385 }, { "epoch": 0.12670731707317073, "grad_norm": 0.7180773615837097, "learning_rate": 1.915528455284553e-05, "loss": 0.1413, "step": 10390 }, { "epoch": 0.12676829268292683, "grad_norm": 0.7920253276824951, "learning_rate": 1.915487804878049e-05, "loss": 0.1243, "step": 10395 }, { "epoch": 0.12682926829268293, "grad_norm": 1.108095407485962, "learning_rate": 1.915447154471545e-05, "loss": 0.1389, "step": 10400 }, { "epoch": 0.12689024390243903, "grad_norm": 2.325070381164551, "learning_rate": 1.915406504065041e-05, "loss": 0.1895, "step": 10405 }, { "epoch": 0.12695121951219512, "grad_norm": 0.6840277910232544, "learning_rate": 1.9153658536585368e-05, "loss": 0.1509, "step": 10410 }, { "epoch": 0.12701219512195122, "grad_norm": 1.0610650777816772, "learning_rate": 1.9153252032520326e-05, "loss": 0.1556, "step": 10415 }, { "epoch": 0.12707317073170732, "grad_norm": 1.1185005903244019, "learning_rate": 1.9152845528455288e-05, "loss": 0.1621, "step": 10420 }, { "epoch": 0.12713414634146342, "grad_norm": 1.1412153244018555, "learning_rate": 1.9152439024390246e-05, "loss": 0.1533, "step": 10425 }, { "epoch": 0.12719512195121951, "grad_norm": 0.7787503600120544, "learning_rate": 1.9152032520325204e-05, "loss": 0.1931, "step": 10430 }, { "epoch": 0.1272560975609756, "grad_norm": 0.5554239749908447, "learning_rate": 1.9151626016260165e-05, "loss": 0.139, "step": 10435 }, { "epoch": 0.1273170731707317, "grad_norm": 0.705259382724762, "learning_rate": 1.9151219512195124e-05, "loss": 0.1116, "step": 10440 }, { "epoch": 0.1273780487804878, "grad_norm": 0.6390609741210938, "learning_rate": 1.915081300813008e-05, "loss": 0.1441, "step": 10445 }, { "epoch": 0.1274390243902439, "grad_norm": 0.8198109865188599, "learning_rate": 1.9150406504065043e-05, "loss": 0.1574, "step": 10450 }, { "epoch": 0.1275, "grad_norm": 0.8316435813903809, "learning_rate": 1.915e-05, "loss": 0.1619, "step": 10455 }, { "epoch": 0.1275609756097561, "grad_norm": 0.6387101411819458, "learning_rate": 1.9149593495934963e-05, "loss": 0.1354, "step": 10460 }, { "epoch": 0.1276219512195122, "grad_norm": 1.6782784461975098, "learning_rate": 1.914918699186992e-05, "loss": 0.1768, "step": 10465 }, { "epoch": 0.1276829268292683, "grad_norm": 0.9658781290054321, "learning_rate": 1.914878048780488e-05, "loss": 0.1754, "step": 10470 }, { "epoch": 0.1277439024390244, "grad_norm": 1.0132472515106201, "learning_rate": 1.9148373983739837e-05, "loss": 0.1492, "step": 10475 }, { "epoch": 0.1278048780487805, "grad_norm": 1.0174206495285034, "learning_rate": 1.91479674796748e-05, "loss": 0.1699, "step": 10480 }, { "epoch": 0.1278658536585366, "grad_norm": 0.7830051779747009, "learning_rate": 1.9147560975609757e-05, "loss": 0.1412, "step": 10485 }, { "epoch": 0.12792682926829269, "grad_norm": 3.0156993865966797, "learning_rate": 1.914715447154472e-05, "loss": 0.1843, "step": 10490 }, { "epoch": 0.12798780487804878, "grad_norm": 0.8819276094436646, "learning_rate": 1.9146747967479676e-05, "loss": 0.1972, "step": 10495 }, { "epoch": 0.12804878048780488, "grad_norm": 1.6802979707717896, "learning_rate": 1.9146341463414635e-05, "loss": 0.1558, "step": 10500 }, { "epoch": 0.12810975609756098, "grad_norm": 0.6888974905014038, "learning_rate": 1.9145934959349596e-05, "loss": 0.1785, "step": 10505 }, { "epoch": 0.12817073170731708, "grad_norm": 0.9790892004966736, "learning_rate": 1.9145528455284554e-05, "loss": 0.1671, "step": 10510 }, { "epoch": 0.12823170731707317, "grad_norm": 0.7859046459197998, "learning_rate": 1.9145121951219516e-05, "loss": 0.1538, "step": 10515 }, { "epoch": 0.12829268292682927, "grad_norm": 1.4511511325836182, "learning_rate": 1.9144715447154474e-05, "loss": 0.2442, "step": 10520 }, { "epoch": 0.12835365853658537, "grad_norm": 1.2015540599822998, "learning_rate": 1.9144308943089432e-05, "loss": 0.1463, "step": 10525 }, { "epoch": 0.12841463414634147, "grad_norm": 0.6995355486869812, "learning_rate": 1.914390243902439e-05, "loss": 0.1165, "step": 10530 }, { "epoch": 0.12847560975609756, "grad_norm": 1.5153592824935913, "learning_rate": 1.914349593495935e-05, "loss": 0.1375, "step": 10535 }, { "epoch": 0.12853658536585366, "grad_norm": 0.6672884821891785, "learning_rate": 1.914308943089431e-05, "loss": 0.1332, "step": 10540 }, { "epoch": 0.12859756097560976, "grad_norm": 0.8956942558288574, "learning_rate": 1.914268292682927e-05, "loss": 0.1383, "step": 10545 }, { "epoch": 0.12865853658536586, "grad_norm": 1.3106480836868286, "learning_rate": 1.914227642276423e-05, "loss": 0.1665, "step": 10550 }, { "epoch": 0.12871951219512195, "grad_norm": 1.7854582071304321, "learning_rate": 1.914186991869919e-05, "loss": 0.1626, "step": 10555 }, { "epoch": 0.12878048780487805, "grad_norm": 1.65248441696167, "learning_rate": 1.9141463414634146e-05, "loss": 0.1341, "step": 10560 }, { "epoch": 0.12884146341463415, "grad_norm": 0.9947753548622131, "learning_rate": 1.9141056910569107e-05, "loss": 0.1635, "step": 10565 }, { "epoch": 0.12890243902439025, "grad_norm": 0.8894158601760864, "learning_rate": 1.9140650406504065e-05, "loss": 0.1501, "step": 10570 }, { "epoch": 0.12896341463414634, "grad_norm": 2.0480141639709473, "learning_rate": 1.9140243902439027e-05, "loss": 0.1541, "step": 10575 }, { "epoch": 0.12902439024390244, "grad_norm": 0.9109150171279907, "learning_rate": 1.9139837398373985e-05, "loss": 0.1593, "step": 10580 }, { "epoch": 0.12908536585365854, "grad_norm": 0.9066460132598877, "learning_rate": 1.9139430894308947e-05, "loss": 0.1608, "step": 10585 }, { "epoch": 0.12914634146341464, "grad_norm": 1.1167951822280884, "learning_rate": 1.9139024390243905e-05, "loss": 0.1527, "step": 10590 }, { "epoch": 0.12920731707317074, "grad_norm": 0.8842816352844238, "learning_rate": 1.9138617886178863e-05, "loss": 0.1331, "step": 10595 }, { "epoch": 0.12926829268292683, "grad_norm": 0.681736171245575, "learning_rate": 1.9138211382113824e-05, "loss": 0.1344, "step": 10600 }, { "epoch": 0.12932926829268293, "grad_norm": 1.9099929332733154, "learning_rate": 1.9137804878048782e-05, "loss": 0.164, "step": 10605 }, { "epoch": 0.12939024390243903, "grad_norm": 1.6013065576553345, "learning_rate": 1.913739837398374e-05, "loss": 0.1738, "step": 10610 }, { "epoch": 0.12945121951219513, "grad_norm": 0.5860087275505066, "learning_rate": 1.9136991869918702e-05, "loss": 0.1372, "step": 10615 }, { "epoch": 0.12951219512195122, "grad_norm": 0.8818345069885254, "learning_rate": 1.913658536585366e-05, "loss": 0.1708, "step": 10620 }, { "epoch": 0.12957317073170732, "grad_norm": 1.052882194519043, "learning_rate": 1.9136178861788618e-05, "loss": 0.1818, "step": 10625 }, { "epoch": 0.12963414634146342, "grad_norm": 0.9600399732589722, "learning_rate": 1.913577235772358e-05, "loss": 0.1779, "step": 10630 }, { "epoch": 0.12969512195121952, "grad_norm": 0.845919132232666, "learning_rate": 1.9135365853658538e-05, "loss": 0.1213, "step": 10635 }, { "epoch": 0.12975609756097561, "grad_norm": 0.7050665616989136, "learning_rate": 1.91349593495935e-05, "loss": 0.1601, "step": 10640 }, { "epoch": 0.1298170731707317, "grad_norm": 1.2808680534362793, "learning_rate": 1.9134552845528458e-05, "loss": 0.1126, "step": 10645 }, { "epoch": 0.1298780487804878, "grad_norm": 1.0461124181747437, "learning_rate": 1.9134146341463416e-05, "loss": 0.1461, "step": 10650 }, { "epoch": 0.1299390243902439, "grad_norm": 0.8436731100082397, "learning_rate": 1.9133739837398374e-05, "loss": 0.1381, "step": 10655 }, { "epoch": 0.13, "grad_norm": 0.7762411236763, "learning_rate": 1.9133333333333335e-05, "loss": 0.1388, "step": 10660 }, { "epoch": 0.1300609756097561, "grad_norm": 1.183056354522705, "learning_rate": 1.9132926829268293e-05, "loss": 0.1816, "step": 10665 }, { "epoch": 0.1301219512195122, "grad_norm": 1.6328012943267822, "learning_rate": 1.9132520325203255e-05, "loss": 0.2373, "step": 10670 }, { "epoch": 0.1301829268292683, "grad_norm": 1.2386658191680908, "learning_rate": 1.9132113821138213e-05, "loss": 0.1553, "step": 10675 }, { "epoch": 0.1302439024390244, "grad_norm": 1.888447880744934, "learning_rate": 1.913170731707317e-05, "loss": 0.169, "step": 10680 }, { "epoch": 0.1303048780487805, "grad_norm": 0.8449842929840088, "learning_rate": 1.9131300813008133e-05, "loss": 0.1284, "step": 10685 }, { "epoch": 0.1303658536585366, "grad_norm": 1.0321383476257324, "learning_rate": 1.913089430894309e-05, "loss": 0.1657, "step": 10690 }, { "epoch": 0.1304268292682927, "grad_norm": 1.4578758478164673, "learning_rate": 1.913048780487805e-05, "loss": 0.1931, "step": 10695 }, { "epoch": 0.13048780487804879, "grad_norm": 0.7786099910736084, "learning_rate": 1.913008130081301e-05, "loss": 0.1578, "step": 10700 }, { "epoch": 0.13054878048780488, "grad_norm": 0.9874414205551147, "learning_rate": 1.912967479674797e-05, "loss": 0.1683, "step": 10705 }, { "epoch": 0.13060975609756098, "grad_norm": 1.4779119491577148, "learning_rate": 1.9129268292682927e-05, "loss": 0.132, "step": 10710 }, { "epoch": 0.13067073170731708, "grad_norm": 0.7067000865936279, "learning_rate": 1.912886178861789e-05, "loss": 0.1238, "step": 10715 }, { "epoch": 0.13073170731707318, "grad_norm": 1.2584254741668701, "learning_rate": 1.9128455284552846e-05, "loss": 0.1398, "step": 10720 }, { "epoch": 0.13079268292682927, "grad_norm": 1.345089316368103, "learning_rate": 1.9128048780487808e-05, "loss": 0.1486, "step": 10725 }, { "epoch": 0.13085365853658537, "grad_norm": 0.933559000492096, "learning_rate": 1.9127642276422766e-05, "loss": 0.1835, "step": 10730 }, { "epoch": 0.13091463414634147, "grad_norm": 1.8101437091827393, "learning_rate": 1.9127235772357728e-05, "loss": 0.1253, "step": 10735 }, { "epoch": 0.13097560975609757, "grad_norm": 0.8987864851951599, "learning_rate": 1.9126829268292682e-05, "loss": 0.1589, "step": 10740 }, { "epoch": 0.13103658536585366, "grad_norm": 0.9523582458496094, "learning_rate": 1.9126422764227644e-05, "loss": 0.165, "step": 10745 }, { "epoch": 0.13109756097560976, "grad_norm": 0.7728905081748962, "learning_rate": 1.9126016260162602e-05, "loss": 0.1886, "step": 10750 }, { "epoch": 0.13115853658536586, "grad_norm": 0.9669042229652405, "learning_rate": 1.9125609756097564e-05, "loss": 0.1143, "step": 10755 }, { "epoch": 0.13121951219512196, "grad_norm": 1.1085083484649658, "learning_rate": 1.912520325203252e-05, "loss": 0.1751, "step": 10760 }, { "epoch": 0.13128048780487805, "grad_norm": 0.7547772526741028, "learning_rate": 1.9124796747967483e-05, "loss": 0.1689, "step": 10765 }, { "epoch": 0.13134146341463415, "grad_norm": 0.9252743721008301, "learning_rate": 1.912439024390244e-05, "loss": 0.1331, "step": 10770 }, { "epoch": 0.13140243902439025, "grad_norm": 0.666464626789093, "learning_rate": 1.91239837398374e-05, "loss": 0.1545, "step": 10775 }, { "epoch": 0.13146341463414635, "grad_norm": 1.0140379667282104, "learning_rate": 1.912357723577236e-05, "loss": 0.1496, "step": 10780 }, { "epoch": 0.13152439024390244, "grad_norm": 1.442001461982727, "learning_rate": 1.912317073170732e-05, "loss": 0.1736, "step": 10785 }, { "epoch": 0.13158536585365854, "grad_norm": 0.9783215522766113, "learning_rate": 1.9122764227642277e-05, "loss": 0.1792, "step": 10790 }, { "epoch": 0.13164634146341464, "grad_norm": 1.2522857189178467, "learning_rate": 1.912235772357724e-05, "loss": 0.1205, "step": 10795 }, { "epoch": 0.13170731707317074, "grad_norm": 0.7530253529548645, "learning_rate": 1.9121951219512197e-05, "loss": 0.1346, "step": 10800 }, { "epoch": 0.13176829268292684, "grad_norm": 1.007722020149231, "learning_rate": 1.9121544715447155e-05, "loss": 0.1177, "step": 10805 }, { "epoch": 0.13182926829268293, "grad_norm": 0.9013819098472595, "learning_rate": 1.9121138211382116e-05, "loss": 0.1394, "step": 10810 }, { "epoch": 0.13189024390243903, "grad_norm": 0.8977115750312805, "learning_rate": 1.9120731707317075e-05, "loss": 0.1257, "step": 10815 }, { "epoch": 0.13195121951219513, "grad_norm": 0.9386659860610962, "learning_rate": 1.9120325203252036e-05, "loss": 0.1098, "step": 10820 }, { "epoch": 0.13201219512195123, "grad_norm": 1.5328254699707031, "learning_rate": 1.9119918699186994e-05, "loss": 0.1454, "step": 10825 }, { "epoch": 0.13207317073170732, "grad_norm": 0.8156777620315552, "learning_rate": 1.9119512195121952e-05, "loss": 0.1688, "step": 10830 }, { "epoch": 0.13213414634146342, "grad_norm": 1.2218629121780396, "learning_rate": 1.911910569105691e-05, "loss": 0.109, "step": 10835 }, { "epoch": 0.13219512195121952, "grad_norm": 1.5158402919769287, "learning_rate": 1.9118699186991872e-05, "loss": 0.1588, "step": 10840 }, { "epoch": 0.13225609756097562, "grad_norm": 0.9685631990432739, "learning_rate": 1.911829268292683e-05, "loss": 0.1442, "step": 10845 }, { "epoch": 0.1323170731707317, "grad_norm": 0.37977850437164307, "learning_rate": 1.911788617886179e-05, "loss": 0.1706, "step": 10850 }, { "epoch": 0.1323780487804878, "grad_norm": 1.2679933309555054, "learning_rate": 1.911747967479675e-05, "loss": 0.1136, "step": 10855 }, { "epoch": 0.1324390243902439, "grad_norm": 0.8460913300514221, "learning_rate": 1.9117073170731708e-05, "loss": 0.1683, "step": 10860 }, { "epoch": 0.1325, "grad_norm": 0.9883261322975159, "learning_rate": 1.911666666666667e-05, "loss": 0.1497, "step": 10865 }, { "epoch": 0.1325609756097561, "grad_norm": 1.027820110321045, "learning_rate": 1.9116260162601628e-05, "loss": 0.1464, "step": 10870 }, { "epoch": 0.1326219512195122, "grad_norm": 0.8243625164031982, "learning_rate": 1.9115853658536586e-05, "loss": 0.1979, "step": 10875 }, { "epoch": 0.1326829268292683, "grad_norm": 1.3645528554916382, "learning_rate": 1.9115447154471547e-05, "loss": 0.1544, "step": 10880 }, { "epoch": 0.1327439024390244, "grad_norm": 0.9943527579307556, "learning_rate": 1.9115040650406505e-05, "loss": 0.1856, "step": 10885 }, { "epoch": 0.1328048780487805, "grad_norm": 3.0473785400390625, "learning_rate": 1.9114634146341463e-05, "loss": 0.1922, "step": 10890 }, { "epoch": 0.1328658536585366, "grad_norm": 0.8008626103401184, "learning_rate": 1.9114227642276425e-05, "loss": 0.1233, "step": 10895 }, { "epoch": 0.1329268292682927, "grad_norm": 0.888524055480957, "learning_rate": 1.9113821138211383e-05, "loss": 0.1261, "step": 10900 }, { "epoch": 0.1329878048780488, "grad_norm": 0.8745492100715637, "learning_rate": 1.9113414634146345e-05, "loss": 0.1633, "step": 10905 }, { "epoch": 0.13304878048780489, "grad_norm": 2.5331060886383057, "learning_rate": 1.9113008130081303e-05, "loss": 0.1686, "step": 10910 }, { "epoch": 0.13310975609756098, "grad_norm": 1.3187364339828491, "learning_rate": 1.9112601626016264e-05, "loss": 0.1543, "step": 10915 }, { "epoch": 0.13317073170731708, "grad_norm": 1.3723148107528687, "learning_rate": 1.911219512195122e-05, "loss": 0.1257, "step": 10920 }, { "epoch": 0.13323170731707318, "grad_norm": 1.0494109392166138, "learning_rate": 1.911178861788618e-05, "loss": 0.1532, "step": 10925 }, { "epoch": 0.13329268292682928, "grad_norm": 1.3135406970977783, "learning_rate": 1.911138211382114e-05, "loss": 0.1375, "step": 10930 }, { "epoch": 0.13335365853658537, "grad_norm": 0.8475697040557861, "learning_rate": 1.91109756097561e-05, "loss": 0.1933, "step": 10935 }, { "epoch": 0.13341463414634147, "grad_norm": 1.315048336982727, "learning_rate": 1.9110569105691058e-05, "loss": 0.1725, "step": 10940 }, { "epoch": 0.13347560975609757, "grad_norm": 1.5489991903305054, "learning_rate": 1.911016260162602e-05, "loss": 0.1926, "step": 10945 }, { "epoch": 0.13353658536585367, "grad_norm": 1.1742221117019653, "learning_rate": 1.9109756097560978e-05, "loss": 0.1561, "step": 10950 }, { "epoch": 0.13359756097560976, "grad_norm": 1.4599988460540771, "learning_rate": 1.9109349593495936e-05, "loss": 0.1521, "step": 10955 }, { "epoch": 0.13365853658536586, "grad_norm": 0.7818409204483032, "learning_rate": 1.9108943089430894e-05, "loss": 0.1308, "step": 10960 }, { "epoch": 0.13371951219512196, "grad_norm": 1.1263856887817383, "learning_rate": 1.9108536585365856e-05, "loss": 0.1668, "step": 10965 }, { "epoch": 0.13378048780487806, "grad_norm": 1.879265308380127, "learning_rate": 1.9108130081300814e-05, "loss": 0.1464, "step": 10970 }, { "epoch": 0.13384146341463415, "grad_norm": 1.00306236743927, "learning_rate": 1.9107723577235775e-05, "loss": 0.1599, "step": 10975 }, { "epoch": 0.13390243902439025, "grad_norm": 2.6946849822998047, "learning_rate": 1.9107317073170733e-05, "loss": 0.154, "step": 10980 }, { "epoch": 0.13396341463414635, "grad_norm": 0.99327152967453, "learning_rate": 1.910691056910569e-05, "loss": 0.1563, "step": 10985 }, { "epoch": 0.13402439024390245, "grad_norm": 1.0015833377838135, "learning_rate": 1.9106504065040653e-05, "loss": 0.1239, "step": 10990 }, { "epoch": 0.13408536585365854, "grad_norm": 0.8003102540969849, "learning_rate": 1.910609756097561e-05, "loss": 0.1852, "step": 10995 }, { "epoch": 0.13414634146341464, "grad_norm": 1.3504561185836792, "learning_rate": 1.9105691056910573e-05, "loss": 0.1759, "step": 11000 }, { "epoch": 0.13420731707317074, "grad_norm": 1.5403937101364136, "learning_rate": 1.910528455284553e-05, "loss": 0.1181, "step": 11005 }, { "epoch": 0.13426829268292684, "grad_norm": 1.3064756393432617, "learning_rate": 1.910487804878049e-05, "loss": 0.1625, "step": 11010 }, { "epoch": 0.13432926829268294, "grad_norm": 1.1797815561294556, "learning_rate": 1.9104471544715447e-05, "loss": 0.147, "step": 11015 }, { "epoch": 0.13439024390243903, "grad_norm": 0.8803371787071228, "learning_rate": 1.910406504065041e-05, "loss": 0.1431, "step": 11020 }, { "epoch": 0.13445121951219513, "grad_norm": 1.0085288286209106, "learning_rate": 1.9103658536585367e-05, "loss": 0.161, "step": 11025 }, { "epoch": 0.13451219512195123, "grad_norm": 1.0112382173538208, "learning_rate": 1.9103252032520328e-05, "loss": 0.1416, "step": 11030 }, { "epoch": 0.13457317073170733, "grad_norm": 0.9374561309814453, "learning_rate": 1.9102845528455286e-05, "loss": 0.1602, "step": 11035 }, { "epoch": 0.13463414634146342, "grad_norm": 1.2674002647399902, "learning_rate": 1.9102439024390245e-05, "loss": 0.1389, "step": 11040 }, { "epoch": 0.13469512195121952, "grad_norm": 0.9354767203330994, "learning_rate": 1.9102032520325206e-05, "loss": 0.1618, "step": 11045 }, { "epoch": 0.13475609756097562, "grad_norm": 0.8396068215370178, "learning_rate": 1.9101626016260164e-05, "loss": 0.1538, "step": 11050 }, { "epoch": 0.13481707317073172, "grad_norm": 1.5528759956359863, "learning_rate": 1.9101219512195122e-05, "loss": 0.1339, "step": 11055 }, { "epoch": 0.1348780487804878, "grad_norm": 2.0237460136413574, "learning_rate": 1.9100813008130084e-05, "loss": 0.1167, "step": 11060 }, { "epoch": 0.1349390243902439, "grad_norm": 0.7833546996116638, "learning_rate": 1.9100406504065042e-05, "loss": 0.1874, "step": 11065 }, { "epoch": 0.135, "grad_norm": 0.7870609164237976, "learning_rate": 1.91e-05, "loss": 0.1724, "step": 11070 }, { "epoch": 0.1350609756097561, "grad_norm": 0.854936957359314, "learning_rate": 1.909959349593496e-05, "loss": 0.1425, "step": 11075 }, { "epoch": 0.1351219512195122, "grad_norm": 1.242335557937622, "learning_rate": 1.909918699186992e-05, "loss": 0.164, "step": 11080 }, { "epoch": 0.1351829268292683, "grad_norm": 1.4935232400894165, "learning_rate": 1.909878048780488e-05, "loss": 0.2041, "step": 11085 }, { "epoch": 0.1352439024390244, "grad_norm": 1.3828461170196533, "learning_rate": 1.909837398373984e-05, "loss": 0.1442, "step": 11090 }, { "epoch": 0.1353048780487805, "grad_norm": 1.8982213735580444, "learning_rate": 1.90979674796748e-05, "loss": 0.1434, "step": 11095 }, { "epoch": 0.1353658536585366, "grad_norm": 1.509214997291565, "learning_rate": 1.9097560975609756e-05, "loss": 0.1437, "step": 11100 }, { "epoch": 0.1354268292682927, "grad_norm": 1.4422310590744019, "learning_rate": 1.9097154471544717e-05, "loss": 0.131, "step": 11105 }, { "epoch": 0.1354878048780488, "grad_norm": 0.7735317349433899, "learning_rate": 1.9096747967479675e-05, "loss": 0.1834, "step": 11110 }, { "epoch": 0.1355487804878049, "grad_norm": 0.6386173963546753, "learning_rate": 1.9096341463414637e-05, "loss": 0.1452, "step": 11115 }, { "epoch": 0.13560975609756099, "grad_norm": 0.4294213652610779, "learning_rate": 1.9095934959349595e-05, "loss": 0.1448, "step": 11120 }, { "epoch": 0.13567073170731708, "grad_norm": 1.3151543140411377, "learning_rate": 1.9095528455284556e-05, "loss": 0.1608, "step": 11125 }, { "epoch": 0.13573170731707318, "grad_norm": 0.9864157438278198, "learning_rate": 1.9095121951219515e-05, "loss": 0.1151, "step": 11130 }, { "epoch": 0.13579268292682928, "grad_norm": 1.367794394493103, "learning_rate": 1.9094715447154473e-05, "loss": 0.1541, "step": 11135 }, { "epoch": 0.13585365853658538, "grad_norm": 0.9037635922431946, "learning_rate": 1.909430894308943e-05, "loss": 0.152, "step": 11140 }, { "epoch": 0.13591463414634147, "grad_norm": 1.6927778720855713, "learning_rate": 1.9093902439024392e-05, "loss": 0.168, "step": 11145 }, { "epoch": 0.13597560975609757, "grad_norm": 2.6796936988830566, "learning_rate": 1.909349593495935e-05, "loss": 0.1882, "step": 11150 }, { "epoch": 0.13603658536585367, "grad_norm": 0.6635127067565918, "learning_rate": 1.9093089430894312e-05, "loss": 0.0966, "step": 11155 }, { "epoch": 0.13609756097560977, "grad_norm": 1.9600557088851929, "learning_rate": 1.909268292682927e-05, "loss": 0.1913, "step": 11160 }, { "epoch": 0.13615853658536586, "grad_norm": 0.9668054580688477, "learning_rate": 1.9092276422764228e-05, "loss": 0.16, "step": 11165 }, { "epoch": 0.13621951219512196, "grad_norm": 1.5088187456130981, "learning_rate": 1.909186991869919e-05, "loss": 0.1484, "step": 11170 }, { "epoch": 0.13628048780487806, "grad_norm": 0.9969660639762878, "learning_rate": 1.9091463414634148e-05, "loss": 0.1411, "step": 11175 }, { "epoch": 0.13634146341463416, "grad_norm": 0.9927273988723755, "learning_rate": 1.909105691056911e-05, "loss": 0.1223, "step": 11180 }, { "epoch": 0.13640243902439025, "grad_norm": 0.8886783123016357, "learning_rate": 1.9090650406504068e-05, "loss": 0.1253, "step": 11185 }, { "epoch": 0.13646341463414635, "grad_norm": 0.8114961981773376, "learning_rate": 1.9090243902439026e-05, "loss": 0.1172, "step": 11190 }, { "epoch": 0.13652439024390245, "grad_norm": 0.6972160935401917, "learning_rate": 1.9089837398373984e-05, "loss": 0.1322, "step": 11195 }, { "epoch": 0.13658536585365855, "grad_norm": 0.832268476486206, "learning_rate": 1.9089430894308945e-05, "loss": 0.1507, "step": 11200 }, { "epoch": 0.13664634146341464, "grad_norm": 0.8865876793861389, "learning_rate": 1.9089024390243903e-05, "loss": 0.1466, "step": 11205 }, { "epoch": 0.13670731707317074, "grad_norm": 0.9256187081336975, "learning_rate": 1.9088617886178865e-05, "loss": 0.0922, "step": 11210 }, { "epoch": 0.13676829268292684, "grad_norm": 0.9624676704406738, "learning_rate": 1.9088211382113823e-05, "loss": 0.1781, "step": 11215 }, { "epoch": 0.13682926829268294, "grad_norm": 0.5644593238830566, "learning_rate": 1.908780487804878e-05, "loss": 0.1791, "step": 11220 }, { "epoch": 0.13689024390243903, "grad_norm": 0.8723300695419312, "learning_rate": 1.908739837398374e-05, "loss": 0.1774, "step": 11225 }, { "epoch": 0.13695121951219513, "grad_norm": 1.0304348468780518, "learning_rate": 1.90869918699187e-05, "loss": 0.1434, "step": 11230 }, { "epoch": 0.13701219512195123, "grad_norm": 1.2194218635559082, "learning_rate": 1.908658536585366e-05, "loss": 0.1735, "step": 11235 }, { "epoch": 0.13707317073170733, "grad_norm": 0.8542727828025818, "learning_rate": 1.908617886178862e-05, "loss": 0.1562, "step": 11240 }, { "epoch": 0.13713414634146343, "grad_norm": 0.9631788730621338, "learning_rate": 1.908577235772358e-05, "loss": 0.1514, "step": 11245 }, { "epoch": 0.13719512195121952, "grad_norm": 1.5331158638000488, "learning_rate": 1.9085365853658537e-05, "loss": 0.1162, "step": 11250 }, { "epoch": 0.13725609756097562, "grad_norm": 0.828045129776001, "learning_rate": 1.9084959349593498e-05, "loss": 0.1156, "step": 11255 }, { "epoch": 0.13731707317073172, "grad_norm": 0.862252414226532, "learning_rate": 1.9084552845528456e-05, "loss": 0.1267, "step": 11260 }, { "epoch": 0.13737804878048782, "grad_norm": 1.1773158311843872, "learning_rate": 1.9084146341463418e-05, "loss": 0.1701, "step": 11265 }, { "epoch": 0.1374390243902439, "grad_norm": 1.3458006381988525, "learning_rate": 1.9083739837398376e-05, "loss": 0.1649, "step": 11270 }, { "epoch": 0.1375, "grad_norm": 1.5847870111465454, "learning_rate": 1.9083333333333338e-05, "loss": 0.2087, "step": 11275 }, { "epoch": 0.1375609756097561, "grad_norm": 1.058824896812439, "learning_rate": 1.9082926829268292e-05, "loss": 0.1556, "step": 11280 }, { "epoch": 0.1376219512195122, "grad_norm": 0.9483102560043335, "learning_rate": 1.9082520325203254e-05, "loss": 0.1728, "step": 11285 }, { "epoch": 0.1376829268292683, "grad_norm": 1.2792162895202637, "learning_rate": 1.9082113821138212e-05, "loss": 0.1295, "step": 11290 }, { "epoch": 0.1377439024390244, "grad_norm": 2.5732247829437256, "learning_rate": 1.9081707317073173e-05, "loss": 0.1556, "step": 11295 }, { "epoch": 0.1378048780487805, "grad_norm": 1.2879705429077148, "learning_rate": 1.908130081300813e-05, "loss": 0.1446, "step": 11300 }, { "epoch": 0.1378658536585366, "grad_norm": 1.0222116708755493, "learning_rate": 1.9080894308943093e-05, "loss": 0.1232, "step": 11305 }, { "epoch": 0.1379268292682927, "grad_norm": 1.2883065938949585, "learning_rate": 1.908048780487805e-05, "loss": 0.1676, "step": 11310 }, { "epoch": 0.1379878048780488, "grad_norm": 1.858301043510437, "learning_rate": 1.908008130081301e-05, "loss": 0.1495, "step": 11315 }, { "epoch": 0.1380487804878049, "grad_norm": 0.7601770162582397, "learning_rate": 1.9079674796747967e-05, "loss": 0.1677, "step": 11320 }, { "epoch": 0.138109756097561, "grad_norm": 1.0422747135162354, "learning_rate": 1.907926829268293e-05, "loss": 0.1337, "step": 11325 }, { "epoch": 0.13817073170731708, "grad_norm": 0.5937492847442627, "learning_rate": 1.9078861788617887e-05, "loss": 0.1914, "step": 11330 }, { "epoch": 0.13823170731707318, "grad_norm": 1.380083441734314, "learning_rate": 1.907845528455285e-05, "loss": 0.1584, "step": 11335 }, { "epoch": 0.13829268292682928, "grad_norm": 1.0353986024856567, "learning_rate": 1.9078048780487807e-05, "loss": 0.1192, "step": 11340 }, { "epoch": 0.13835365853658538, "grad_norm": 1.2740445137023926, "learning_rate": 1.9077642276422765e-05, "loss": 0.1511, "step": 11345 }, { "epoch": 0.13841463414634148, "grad_norm": 1.0570796728134155, "learning_rate": 1.9077235772357726e-05, "loss": 0.1528, "step": 11350 }, { "epoch": 0.13847560975609757, "grad_norm": 1.079655647277832, "learning_rate": 1.9076829268292685e-05, "loss": 0.1519, "step": 11355 }, { "epoch": 0.13853658536585367, "grad_norm": 0.5536625385284424, "learning_rate": 1.9076422764227646e-05, "loss": 0.1466, "step": 11360 }, { "epoch": 0.13859756097560977, "grad_norm": 1.16559898853302, "learning_rate": 1.9076016260162604e-05, "loss": 0.1342, "step": 11365 }, { "epoch": 0.13865853658536587, "grad_norm": 0.7105541825294495, "learning_rate": 1.9075609756097562e-05, "loss": 0.1533, "step": 11370 }, { "epoch": 0.13871951219512196, "grad_norm": 1.7671855688095093, "learning_rate": 1.907520325203252e-05, "loss": 0.1665, "step": 11375 }, { "epoch": 0.13878048780487806, "grad_norm": 1.3448550701141357, "learning_rate": 1.9074796747967482e-05, "loss": 0.155, "step": 11380 }, { "epoch": 0.13884146341463416, "grad_norm": 1.0877857208251953, "learning_rate": 1.907439024390244e-05, "loss": 0.158, "step": 11385 }, { "epoch": 0.13890243902439026, "grad_norm": 1.0554656982421875, "learning_rate": 1.90739837398374e-05, "loss": 0.1543, "step": 11390 }, { "epoch": 0.13896341463414635, "grad_norm": 0.5997368693351746, "learning_rate": 1.907357723577236e-05, "loss": 0.135, "step": 11395 }, { "epoch": 0.13902439024390245, "grad_norm": 0.7745848298072815, "learning_rate": 1.9073170731707318e-05, "loss": 0.1175, "step": 11400 }, { "epoch": 0.13908536585365855, "grad_norm": 1.0664973258972168, "learning_rate": 1.9072764227642276e-05, "loss": 0.1526, "step": 11405 }, { "epoch": 0.13914634146341465, "grad_norm": 0.9529432654380798, "learning_rate": 1.9072357723577237e-05, "loss": 0.134, "step": 11410 }, { "epoch": 0.13920731707317074, "grad_norm": 0.7715851068496704, "learning_rate": 1.9071951219512196e-05, "loss": 0.1935, "step": 11415 }, { "epoch": 0.13926829268292684, "grad_norm": 1.668017029762268, "learning_rate": 1.9071544715447157e-05, "loss": 0.1633, "step": 11420 }, { "epoch": 0.13932926829268294, "grad_norm": 1.4757215976715088, "learning_rate": 1.9071138211382115e-05, "loss": 0.1582, "step": 11425 }, { "epoch": 0.13939024390243904, "grad_norm": 0.7779788374900818, "learning_rate": 1.9070731707317073e-05, "loss": 0.1387, "step": 11430 }, { "epoch": 0.13945121951219513, "grad_norm": 0.7321338057518005, "learning_rate": 1.9070325203252035e-05, "loss": 0.1505, "step": 11435 }, { "epoch": 0.13951219512195123, "grad_norm": 0.5796331763267517, "learning_rate": 1.9069918699186993e-05, "loss": 0.1399, "step": 11440 }, { "epoch": 0.13957317073170733, "grad_norm": 1.5955820083618164, "learning_rate": 1.9069512195121955e-05, "loss": 0.1676, "step": 11445 }, { "epoch": 0.13963414634146343, "grad_norm": 1.5064561367034912, "learning_rate": 1.9069105691056913e-05, "loss": 0.1547, "step": 11450 }, { "epoch": 0.13969512195121953, "grad_norm": 1.4146292209625244, "learning_rate": 1.9068699186991874e-05, "loss": 0.1677, "step": 11455 }, { "epoch": 0.13975609756097562, "grad_norm": 0.6454301476478577, "learning_rate": 1.906829268292683e-05, "loss": 0.1338, "step": 11460 }, { "epoch": 0.13981707317073172, "grad_norm": 0.9715523719787598, "learning_rate": 1.906788617886179e-05, "loss": 0.1516, "step": 11465 }, { "epoch": 0.13987804878048782, "grad_norm": 0.6599202752113342, "learning_rate": 1.906747967479675e-05, "loss": 0.1488, "step": 11470 }, { "epoch": 0.13993902439024392, "grad_norm": 2.2822186946868896, "learning_rate": 1.906707317073171e-05, "loss": 0.1484, "step": 11475 }, { "epoch": 0.14, "grad_norm": 1.2939239740371704, "learning_rate": 1.9066666666666668e-05, "loss": 0.1369, "step": 11480 }, { "epoch": 0.1400609756097561, "grad_norm": 0.4647803008556366, "learning_rate": 1.906626016260163e-05, "loss": 0.1097, "step": 11485 }, { "epoch": 0.1401219512195122, "grad_norm": 0.4765164256095886, "learning_rate": 1.9065853658536584e-05, "loss": 0.1128, "step": 11490 }, { "epoch": 0.1401829268292683, "grad_norm": 1.3102681636810303, "learning_rate": 1.9065447154471546e-05, "loss": 0.1218, "step": 11495 }, { "epoch": 0.1402439024390244, "grad_norm": 0.7656067609786987, "learning_rate": 1.9065040650406504e-05, "loss": 0.1359, "step": 11500 }, { "epoch": 0.1403048780487805, "grad_norm": 1.7419103384017944, "learning_rate": 1.9064634146341466e-05, "loss": 0.1357, "step": 11505 }, { "epoch": 0.1403658536585366, "grad_norm": 0.6040079593658447, "learning_rate": 1.9064227642276424e-05, "loss": 0.1345, "step": 11510 }, { "epoch": 0.1404268292682927, "grad_norm": 1.4700703620910645, "learning_rate": 1.9063821138211385e-05, "loss": 0.1794, "step": 11515 }, { "epoch": 0.1404878048780488, "grad_norm": 1.0364031791687012, "learning_rate": 1.9063414634146343e-05, "loss": 0.1276, "step": 11520 }, { "epoch": 0.1405487804878049, "grad_norm": 1.2157081365585327, "learning_rate": 1.90630081300813e-05, "loss": 0.1555, "step": 11525 }, { "epoch": 0.140609756097561, "grad_norm": 1.0079816579818726, "learning_rate": 1.9062601626016263e-05, "loss": 0.1655, "step": 11530 }, { "epoch": 0.14067073170731706, "grad_norm": 1.515870213508606, "learning_rate": 1.906219512195122e-05, "loss": 0.1513, "step": 11535 }, { "epoch": 0.14073170731707316, "grad_norm": 1.331705927848816, "learning_rate": 1.9061788617886183e-05, "loss": 0.1411, "step": 11540 }, { "epoch": 0.14079268292682925, "grad_norm": 0.9483458995819092, "learning_rate": 1.906138211382114e-05, "loss": 0.1651, "step": 11545 }, { "epoch": 0.14085365853658535, "grad_norm": 0.8469000458717346, "learning_rate": 1.90609756097561e-05, "loss": 0.1489, "step": 11550 }, { "epoch": 0.14091463414634145, "grad_norm": 1.0276108980178833, "learning_rate": 1.9060569105691057e-05, "loss": 0.1346, "step": 11555 }, { "epoch": 0.14097560975609755, "grad_norm": 1.2370073795318604, "learning_rate": 1.906016260162602e-05, "loss": 0.1321, "step": 11560 }, { "epoch": 0.14103658536585365, "grad_norm": 1.398324966430664, "learning_rate": 1.9059756097560977e-05, "loss": 0.1564, "step": 11565 }, { "epoch": 0.14109756097560974, "grad_norm": 1.1302224397659302, "learning_rate": 1.9059349593495938e-05, "loss": 0.1247, "step": 11570 }, { "epoch": 0.14115853658536584, "grad_norm": 1.2421727180480957, "learning_rate": 1.9058943089430896e-05, "loss": 0.1026, "step": 11575 }, { "epoch": 0.14121951219512194, "grad_norm": 1.5492205619812012, "learning_rate": 1.9058536585365854e-05, "loss": 0.1709, "step": 11580 }, { "epoch": 0.14128048780487804, "grad_norm": 1.625483512878418, "learning_rate": 1.9058130081300813e-05, "loss": 0.1557, "step": 11585 }, { "epoch": 0.14134146341463413, "grad_norm": 1.602060079574585, "learning_rate": 1.9057723577235774e-05, "loss": 0.1587, "step": 11590 }, { "epoch": 0.14140243902439023, "grad_norm": 0.8962723016738892, "learning_rate": 1.9057317073170732e-05, "loss": 0.1419, "step": 11595 }, { "epoch": 0.14146341463414633, "grad_norm": 1.0436615943908691, "learning_rate": 1.9056910569105694e-05, "loss": 0.1895, "step": 11600 }, { "epoch": 0.14152439024390243, "grad_norm": 0.5524539947509766, "learning_rate": 1.9056504065040652e-05, "loss": 0.1332, "step": 11605 }, { "epoch": 0.14158536585365852, "grad_norm": 0.8244447708129883, "learning_rate": 1.905609756097561e-05, "loss": 0.124, "step": 11610 }, { "epoch": 0.14164634146341462, "grad_norm": 1.2559629678726196, "learning_rate": 1.905569105691057e-05, "loss": 0.1514, "step": 11615 }, { "epoch": 0.14170731707317072, "grad_norm": 0.8656870126724243, "learning_rate": 1.905528455284553e-05, "loss": 0.1645, "step": 11620 }, { "epoch": 0.14176829268292682, "grad_norm": 0.9264628887176514, "learning_rate": 1.905487804878049e-05, "loss": 0.1571, "step": 11625 }, { "epoch": 0.14182926829268291, "grad_norm": 0.8975374102592468, "learning_rate": 1.905447154471545e-05, "loss": 0.1449, "step": 11630 }, { "epoch": 0.141890243902439, "grad_norm": 0.8111308813095093, "learning_rate": 1.9054065040650407e-05, "loss": 0.149, "step": 11635 }, { "epoch": 0.1419512195121951, "grad_norm": 1.0493931770324707, "learning_rate": 1.9053658536585366e-05, "loss": 0.1357, "step": 11640 }, { "epoch": 0.1420121951219512, "grad_norm": 1.1123614311218262, "learning_rate": 1.9053252032520327e-05, "loss": 0.1278, "step": 11645 }, { "epoch": 0.1420731707317073, "grad_norm": 1.5545755624771118, "learning_rate": 1.9052845528455285e-05, "loss": 0.2108, "step": 11650 }, { "epoch": 0.1421341463414634, "grad_norm": 0.6712729334831238, "learning_rate": 1.9052439024390247e-05, "loss": 0.1601, "step": 11655 }, { "epoch": 0.1421951219512195, "grad_norm": 1.2268295288085938, "learning_rate": 1.9052032520325205e-05, "loss": 0.1365, "step": 11660 }, { "epoch": 0.1422560975609756, "grad_norm": 0.8917575478553772, "learning_rate": 1.9051626016260166e-05, "loss": 0.1435, "step": 11665 }, { "epoch": 0.1423170731707317, "grad_norm": 2.9254305362701416, "learning_rate": 1.905121951219512e-05, "loss": 0.1668, "step": 11670 }, { "epoch": 0.1423780487804878, "grad_norm": 1.175106167793274, "learning_rate": 1.9050813008130083e-05, "loss": 0.1475, "step": 11675 }, { "epoch": 0.1424390243902439, "grad_norm": 0.8116751909255981, "learning_rate": 1.905040650406504e-05, "loss": 0.1448, "step": 11680 }, { "epoch": 0.1425, "grad_norm": 1.6767045259475708, "learning_rate": 1.9050000000000002e-05, "loss": 0.1242, "step": 11685 }, { "epoch": 0.14256097560975609, "grad_norm": 1.2339427471160889, "learning_rate": 1.904959349593496e-05, "loss": 0.15, "step": 11690 }, { "epoch": 0.14262195121951218, "grad_norm": 0.6222286820411682, "learning_rate": 1.9049186991869922e-05, "loss": 0.1356, "step": 11695 }, { "epoch": 0.14268292682926828, "grad_norm": 1.3327571153640747, "learning_rate": 1.904878048780488e-05, "loss": 0.1776, "step": 11700 }, { "epoch": 0.14274390243902438, "grad_norm": 0.7536190748214722, "learning_rate": 1.9048373983739838e-05, "loss": 0.1271, "step": 11705 }, { "epoch": 0.14280487804878048, "grad_norm": 1.5079388618469238, "learning_rate": 1.90479674796748e-05, "loss": 0.1429, "step": 11710 }, { "epoch": 0.14286585365853657, "grad_norm": 1.188483715057373, "learning_rate": 1.9047560975609758e-05, "loss": 0.1531, "step": 11715 }, { "epoch": 0.14292682926829267, "grad_norm": 0.863643229007721, "learning_rate": 1.904715447154472e-05, "loss": 0.1261, "step": 11720 }, { "epoch": 0.14298780487804877, "grad_norm": 0.7934163808822632, "learning_rate": 1.9046747967479677e-05, "loss": 0.1522, "step": 11725 }, { "epoch": 0.14304878048780487, "grad_norm": 1.0669078826904297, "learning_rate": 1.9046341463414636e-05, "loss": 0.1504, "step": 11730 }, { "epoch": 0.14310975609756096, "grad_norm": 0.5693074464797974, "learning_rate": 1.9045934959349594e-05, "loss": 0.1237, "step": 11735 }, { "epoch": 0.14317073170731706, "grad_norm": 0.5489134192466736, "learning_rate": 1.9045528455284555e-05, "loss": 0.1427, "step": 11740 }, { "epoch": 0.14323170731707316, "grad_norm": 0.9734132289886475, "learning_rate": 1.9045121951219513e-05, "loss": 0.1302, "step": 11745 }, { "epoch": 0.14329268292682926, "grad_norm": 1.9709951877593994, "learning_rate": 1.9044715447154475e-05, "loss": 0.1296, "step": 11750 }, { "epoch": 0.14335365853658535, "grad_norm": 0.6345658302307129, "learning_rate": 1.9044308943089433e-05, "loss": 0.1336, "step": 11755 }, { "epoch": 0.14341463414634145, "grad_norm": 0.7254900336265564, "learning_rate": 1.904390243902439e-05, "loss": 0.1283, "step": 11760 }, { "epoch": 0.14347560975609755, "grad_norm": 0.7549057602882385, "learning_rate": 1.904349593495935e-05, "loss": 0.138, "step": 11765 }, { "epoch": 0.14353658536585365, "grad_norm": 1.1991373300552368, "learning_rate": 1.904308943089431e-05, "loss": 0.1571, "step": 11770 }, { "epoch": 0.14359756097560974, "grad_norm": 0.6771124601364136, "learning_rate": 1.904268292682927e-05, "loss": 0.1486, "step": 11775 }, { "epoch": 0.14365853658536584, "grad_norm": 0.9266335368156433, "learning_rate": 1.904227642276423e-05, "loss": 0.1621, "step": 11780 }, { "epoch": 0.14371951219512194, "grad_norm": 1.829957365989685, "learning_rate": 1.904186991869919e-05, "loss": 0.1538, "step": 11785 }, { "epoch": 0.14378048780487804, "grad_norm": 0.7867283225059509, "learning_rate": 1.9041463414634147e-05, "loss": 0.1365, "step": 11790 }, { "epoch": 0.14384146341463414, "grad_norm": 1.3053433895111084, "learning_rate": 1.9041056910569108e-05, "loss": 0.1363, "step": 11795 }, { "epoch": 0.14390243902439023, "grad_norm": 1.1219819784164429, "learning_rate": 1.9040650406504066e-05, "loss": 0.1764, "step": 11800 }, { "epoch": 0.14396341463414633, "grad_norm": 0.9461402297019958, "learning_rate": 1.9040243902439028e-05, "loss": 0.1772, "step": 11805 }, { "epoch": 0.14402439024390243, "grad_norm": 0.8880084753036499, "learning_rate": 1.9039837398373986e-05, "loss": 0.1559, "step": 11810 }, { "epoch": 0.14408536585365853, "grad_norm": 0.8465166091918945, "learning_rate": 1.9039430894308944e-05, "loss": 0.127, "step": 11815 }, { "epoch": 0.14414634146341462, "grad_norm": 0.5464118719100952, "learning_rate": 1.9039024390243902e-05, "loss": 0.1273, "step": 11820 }, { "epoch": 0.14420731707317072, "grad_norm": 0.8865481019020081, "learning_rate": 1.9038617886178864e-05, "loss": 0.1538, "step": 11825 }, { "epoch": 0.14426829268292682, "grad_norm": 0.9286988377571106, "learning_rate": 1.9038211382113822e-05, "loss": 0.1607, "step": 11830 }, { "epoch": 0.14432926829268292, "grad_norm": 0.6315798759460449, "learning_rate": 1.9037804878048783e-05, "loss": 0.1896, "step": 11835 }, { "epoch": 0.144390243902439, "grad_norm": 0.9911891222000122, "learning_rate": 1.903739837398374e-05, "loss": 0.1538, "step": 11840 }, { "epoch": 0.1444512195121951, "grad_norm": 2.015972137451172, "learning_rate": 1.9036991869918703e-05, "loss": 0.1462, "step": 11845 }, { "epoch": 0.1445121951219512, "grad_norm": 0.42497000098228455, "learning_rate": 1.9036585365853658e-05, "loss": 0.1484, "step": 11850 }, { "epoch": 0.1445731707317073, "grad_norm": 2.5547902584075928, "learning_rate": 1.903617886178862e-05, "loss": 0.2263, "step": 11855 }, { "epoch": 0.1446341463414634, "grad_norm": 0.6357424855232239, "learning_rate": 1.9035772357723577e-05, "loss": 0.1302, "step": 11860 }, { "epoch": 0.1446951219512195, "grad_norm": 1.73249351978302, "learning_rate": 1.903536585365854e-05, "loss": 0.1843, "step": 11865 }, { "epoch": 0.1447560975609756, "grad_norm": 1.680095911026001, "learning_rate": 1.9034959349593497e-05, "loss": 0.1072, "step": 11870 }, { "epoch": 0.1448170731707317, "grad_norm": 0.9206677079200745, "learning_rate": 1.903455284552846e-05, "loss": 0.1752, "step": 11875 }, { "epoch": 0.1448780487804878, "grad_norm": 0.8202424645423889, "learning_rate": 1.9034146341463417e-05, "loss": 0.1053, "step": 11880 }, { "epoch": 0.1449390243902439, "grad_norm": 0.7988618016242981, "learning_rate": 1.9033739837398375e-05, "loss": 0.1431, "step": 11885 }, { "epoch": 0.145, "grad_norm": 0.6067061424255371, "learning_rate": 1.9033333333333336e-05, "loss": 0.161, "step": 11890 }, { "epoch": 0.1450609756097561, "grad_norm": 1.3386245965957642, "learning_rate": 1.9032926829268294e-05, "loss": 0.1713, "step": 11895 }, { "epoch": 0.14512195121951219, "grad_norm": 1.047518014907837, "learning_rate": 1.9032520325203253e-05, "loss": 0.1451, "step": 11900 }, { "epoch": 0.14518292682926828, "grad_norm": 1.0997204780578613, "learning_rate": 1.9032113821138214e-05, "loss": 0.1531, "step": 11905 }, { "epoch": 0.14524390243902438, "grad_norm": 0.6522324085235596, "learning_rate": 1.9031707317073172e-05, "loss": 0.1754, "step": 11910 }, { "epoch": 0.14530487804878048, "grad_norm": 0.8350545167922974, "learning_rate": 1.903130081300813e-05, "loss": 0.1713, "step": 11915 }, { "epoch": 0.14536585365853658, "grad_norm": 1.240586757659912, "learning_rate": 1.9030894308943092e-05, "loss": 0.1436, "step": 11920 }, { "epoch": 0.14542682926829267, "grad_norm": 1.7965484857559204, "learning_rate": 1.903048780487805e-05, "loss": 0.1585, "step": 11925 }, { "epoch": 0.14548780487804877, "grad_norm": 0.4929080009460449, "learning_rate": 1.903008130081301e-05, "loss": 0.1538, "step": 11930 }, { "epoch": 0.14554878048780487, "grad_norm": 0.6892387270927429, "learning_rate": 1.902967479674797e-05, "loss": 0.1177, "step": 11935 }, { "epoch": 0.14560975609756097, "grad_norm": 1.031092643737793, "learning_rate": 1.9029268292682928e-05, "loss": 0.1355, "step": 11940 }, { "epoch": 0.14567073170731706, "grad_norm": 0.8364527821540833, "learning_rate": 1.9028861788617886e-05, "loss": 0.1348, "step": 11945 }, { "epoch": 0.14573170731707316, "grad_norm": 1.099591851234436, "learning_rate": 1.9028455284552847e-05, "loss": 0.1469, "step": 11950 }, { "epoch": 0.14579268292682926, "grad_norm": 1.1881994009017944, "learning_rate": 1.9028048780487806e-05, "loss": 0.1627, "step": 11955 }, { "epoch": 0.14585365853658536, "grad_norm": 0.8652241826057434, "learning_rate": 1.9027642276422767e-05, "loss": 0.1355, "step": 11960 }, { "epoch": 0.14591463414634145, "grad_norm": 0.6394898891448975, "learning_rate": 1.9027235772357725e-05, "loss": 0.1456, "step": 11965 }, { "epoch": 0.14597560975609755, "grad_norm": 0.9432870149612427, "learning_rate": 1.9026829268292683e-05, "loss": 0.1073, "step": 11970 }, { "epoch": 0.14603658536585365, "grad_norm": 0.9012168645858765, "learning_rate": 1.9026422764227645e-05, "loss": 0.169, "step": 11975 }, { "epoch": 0.14609756097560975, "grad_norm": 0.7402430176734924, "learning_rate": 1.9026016260162603e-05, "loss": 0.1496, "step": 11980 }, { "epoch": 0.14615853658536584, "grad_norm": 1.9746259450912476, "learning_rate": 1.9025609756097564e-05, "loss": 0.1444, "step": 11985 }, { "epoch": 0.14621951219512194, "grad_norm": 0.8250483274459839, "learning_rate": 1.9025203252032523e-05, "loss": 0.14, "step": 11990 }, { "epoch": 0.14628048780487804, "grad_norm": 0.8817940354347229, "learning_rate": 1.902479674796748e-05, "loss": 0.1446, "step": 11995 }, { "epoch": 0.14634146341463414, "grad_norm": 0.8389832973480225, "learning_rate": 1.902439024390244e-05, "loss": 0.1435, "step": 12000 }, { "epoch": 0.14640243902439024, "grad_norm": 0.7444179654121399, "learning_rate": 1.90239837398374e-05, "loss": 0.1813, "step": 12005 }, { "epoch": 0.14646341463414633, "grad_norm": 1.0541646480560303, "learning_rate": 1.902357723577236e-05, "loss": 0.1278, "step": 12010 }, { "epoch": 0.14652439024390243, "grad_norm": 0.6003871560096741, "learning_rate": 1.902317073170732e-05, "loss": 0.1551, "step": 12015 }, { "epoch": 0.14658536585365853, "grad_norm": 0.8780742883682251, "learning_rate": 1.9022764227642278e-05, "loss": 0.1693, "step": 12020 }, { "epoch": 0.14664634146341463, "grad_norm": 0.7893807291984558, "learning_rate": 1.902235772357724e-05, "loss": 0.1308, "step": 12025 }, { "epoch": 0.14670731707317072, "grad_norm": 0.7035499215126038, "learning_rate": 1.9021951219512194e-05, "loss": 0.1188, "step": 12030 }, { "epoch": 0.14676829268292682, "grad_norm": 0.7234211564064026, "learning_rate": 1.9021544715447156e-05, "loss": 0.1352, "step": 12035 }, { "epoch": 0.14682926829268292, "grad_norm": 2.113936424255371, "learning_rate": 1.9021138211382114e-05, "loss": 0.1665, "step": 12040 }, { "epoch": 0.14689024390243902, "grad_norm": 1.0868991613388062, "learning_rate": 1.9020731707317076e-05, "loss": 0.1518, "step": 12045 }, { "epoch": 0.1469512195121951, "grad_norm": 1.6250005960464478, "learning_rate": 1.9020325203252034e-05, "loss": 0.168, "step": 12050 }, { "epoch": 0.1470121951219512, "grad_norm": 0.8307725191116333, "learning_rate": 1.9019918699186995e-05, "loss": 0.1393, "step": 12055 }, { "epoch": 0.1470731707317073, "grad_norm": 1.2141788005828857, "learning_rate": 1.9019512195121953e-05, "loss": 0.1525, "step": 12060 }, { "epoch": 0.1471341463414634, "grad_norm": 1.5461814403533936, "learning_rate": 1.901910569105691e-05, "loss": 0.161, "step": 12065 }, { "epoch": 0.1471951219512195, "grad_norm": 0.8654522895812988, "learning_rate": 1.9018699186991873e-05, "loss": 0.1736, "step": 12070 }, { "epoch": 0.1472560975609756, "grad_norm": 1.4274709224700928, "learning_rate": 1.901829268292683e-05, "loss": 0.131, "step": 12075 }, { "epoch": 0.1473170731707317, "grad_norm": 1.2484841346740723, "learning_rate": 1.901788617886179e-05, "loss": 0.1611, "step": 12080 }, { "epoch": 0.1473780487804878, "grad_norm": 1.291694164276123, "learning_rate": 1.901747967479675e-05, "loss": 0.1622, "step": 12085 }, { "epoch": 0.1474390243902439, "grad_norm": 1.2914201021194458, "learning_rate": 1.901707317073171e-05, "loss": 0.1378, "step": 12090 }, { "epoch": 0.1475, "grad_norm": 0.8069570660591125, "learning_rate": 1.9016666666666667e-05, "loss": 0.1353, "step": 12095 }, { "epoch": 0.1475609756097561, "grad_norm": 1.3623586893081665, "learning_rate": 1.901626016260163e-05, "loss": 0.1533, "step": 12100 }, { "epoch": 0.1476219512195122, "grad_norm": 0.6343042254447937, "learning_rate": 1.9015853658536587e-05, "loss": 0.1557, "step": 12105 }, { "epoch": 0.14768292682926829, "grad_norm": 1.488243579864502, "learning_rate": 1.9015447154471548e-05, "loss": 0.179, "step": 12110 }, { "epoch": 0.14774390243902438, "grad_norm": 0.618746280670166, "learning_rate": 1.9015040650406506e-05, "loss": 0.1558, "step": 12115 }, { "epoch": 0.14780487804878048, "grad_norm": 0.6387186050415039, "learning_rate": 1.9014634146341464e-05, "loss": 0.1552, "step": 12120 }, { "epoch": 0.14786585365853658, "grad_norm": 0.8747533559799194, "learning_rate": 1.9014227642276423e-05, "loss": 0.1576, "step": 12125 }, { "epoch": 0.14792682926829268, "grad_norm": 0.6743447184562683, "learning_rate": 1.9013821138211384e-05, "loss": 0.1819, "step": 12130 }, { "epoch": 0.14798780487804877, "grad_norm": 0.7466965317726135, "learning_rate": 1.9013414634146342e-05, "loss": 0.0996, "step": 12135 }, { "epoch": 0.14804878048780487, "grad_norm": 0.4893837869167328, "learning_rate": 1.9013008130081304e-05, "loss": 0.1663, "step": 12140 }, { "epoch": 0.14810975609756097, "grad_norm": 1.0239841938018799, "learning_rate": 1.9012601626016262e-05, "loss": 0.1301, "step": 12145 }, { "epoch": 0.14817073170731707, "grad_norm": 0.5996260643005371, "learning_rate": 1.901219512195122e-05, "loss": 0.1372, "step": 12150 }, { "epoch": 0.14823170731707316, "grad_norm": 2.034214496612549, "learning_rate": 1.901178861788618e-05, "loss": 0.1757, "step": 12155 }, { "epoch": 0.14829268292682926, "grad_norm": 0.9066073894500732, "learning_rate": 1.901138211382114e-05, "loss": 0.1035, "step": 12160 }, { "epoch": 0.14835365853658536, "grad_norm": 1.328332781791687, "learning_rate": 1.9010975609756098e-05, "loss": 0.1359, "step": 12165 }, { "epoch": 0.14841463414634146, "grad_norm": 1.9318336248397827, "learning_rate": 1.901056910569106e-05, "loss": 0.1565, "step": 12170 }, { "epoch": 0.14847560975609755, "grad_norm": 0.9658297300338745, "learning_rate": 1.9010162601626017e-05, "loss": 0.1653, "step": 12175 }, { "epoch": 0.14853658536585365, "grad_norm": 0.6175795793533325, "learning_rate": 1.9009756097560975e-05, "loss": 0.1372, "step": 12180 }, { "epoch": 0.14859756097560975, "grad_norm": 0.7163276672363281, "learning_rate": 1.9009349593495937e-05, "loss": 0.1256, "step": 12185 }, { "epoch": 0.14865853658536585, "grad_norm": 0.8204837441444397, "learning_rate": 1.9008943089430895e-05, "loss": 0.2183, "step": 12190 }, { "epoch": 0.14871951219512194, "grad_norm": 1.3736248016357422, "learning_rate": 1.9008536585365857e-05, "loss": 0.1542, "step": 12195 }, { "epoch": 0.14878048780487804, "grad_norm": 0.7935529351234436, "learning_rate": 1.9008130081300815e-05, "loss": 0.145, "step": 12200 }, { "epoch": 0.14884146341463414, "grad_norm": 0.7480982542037964, "learning_rate": 1.9007723577235776e-05, "loss": 0.1531, "step": 12205 }, { "epoch": 0.14890243902439024, "grad_norm": 1.9787036180496216, "learning_rate": 1.900731707317073e-05, "loss": 0.1824, "step": 12210 }, { "epoch": 0.14896341463414633, "grad_norm": 0.8968813419342041, "learning_rate": 1.9006910569105693e-05, "loss": 0.1174, "step": 12215 }, { "epoch": 0.14902439024390243, "grad_norm": 0.6190858483314514, "learning_rate": 1.900650406504065e-05, "loss": 0.1585, "step": 12220 }, { "epoch": 0.14908536585365853, "grad_norm": 0.7310855388641357, "learning_rate": 1.9006097560975612e-05, "loss": 0.1463, "step": 12225 }, { "epoch": 0.14914634146341463, "grad_norm": 1.342889428138733, "learning_rate": 1.900569105691057e-05, "loss": 0.1502, "step": 12230 }, { "epoch": 0.14920731707317073, "grad_norm": 0.7600098848342896, "learning_rate": 1.9005284552845532e-05, "loss": 0.1321, "step": 12235 }, { "epoch": 0.14926829268292682, "grad_norm": 0.6995247602462769, "learning_rate": 1.900487804878049e-05, "loss": 0.1174, "step": 12240 }, { "epoch": 0.14932926829268292, "grad_norm": 1.1532642841339111, "learning_rate": 1.9004471544715448e-05, "loss": 0.1401, "step": 12245 }, { "epoch": 0.14939024390243902, "grad_norm": 0.7702853679656982, "learning_rate": 1.900406504065041e-05, "loss": 0.1131, "step": 12250 }, { "epoch": 0.14945121951219512, "grad_norm": 5.37930965423584, "learning_rate": 1.9003658536585368e-05, "loss": 0.1361, "step": 12255 }, { "epoch": 0.1495121951219512, "grad_norm": 0.9702040553092957, "learning_rate": 1.9003252032520326e-05, "loss": 0.1685, "step": 12260 }, { "epoch": 0.1495731707317073, "grad_norm": 0.7502836585044861, "learning_rate": 1.9002845528455287e-05, "loss": 0.1563, "step": 12265 }, { "epoch": 0.1496341463414634, "grad_norm": 0.908891499042511, "learning_rate": 1.9002439024390246e-05, "loss": 0.1552, "step": 12270 }, { "epoch": 0.1496951219512195, "grad_norm": 0.8910593390464783, "learning_rate": 1.9002032520325204e-05, "loss": 0.1445, "step": 12275 }, { "epoch": 0.1497560975609756, "grad_norm": 1.6462363004684448, "learning_rate": 1.9001626016260165e-05, "loss": 0.1357, "step": 12280 }, { "epoch": 0.1498170731707317, "grad_norm": 1.5105394124984741, "learning_rate": 1.9001219512195123e-05, "loss": 0.1487, "step": 12285 }, { "epoch": 0.1498780487804878, "grad_norm": 0.8014326691627502, "learning_rate": 1.9000813008130085e-05, "loss": 0.134, "step": 12290 }, { "epoch": 0.1499390243902439, "grad_norm": 0.8424179553985596, "learning_rate": 1.9000406504065043e-05, "loss": 0.1347, "step": 12295 }, { "epoch": 0.15, "grad_norm": 1.2989590167999268, "learning_rate": 1.9e-05, "loss": 0.1427, "step": 12300 }, { "epoch": 0.1500609756097561, "grad_norm": 0.6201775670051575, "learning_rate": 1.899959349593496e-05, "loss": 0.1406, "step": 12305 }, { "epoch": 0.1501219512195122, "grad_norm": 0.7931581735610962, "learning_rate": 1.899918699186992e-05, "loss": 0.1177, "step": 12310 }, { "epoch": 0.1501829268292683, "grad_norm": 1.1397730112075806, "learning_rate": 1.899878048780488e-05, "loss": 0.1045, "step": 12315 }, { "epoch": 0.15024390243902438, "grad_norm": 0.7894137501716614, "learning_rate": 1.899837398373984e-05, "loss": 0.1335, "step": 12320 }, { "epoch": 0.15030487804878048, "grad_norm": 0.6977564692497253, "learning_rate": 1.89979674796748e-05, "loss": 0.1312, "step": 12325 }, { "epoch": 0.15036585365853658, "grad_norm": 0.7584391236305237, "learning_rate": 1.8997560975609757e-05, "loss": 0.1411, "step": 12330 }, { "epoch": 0.15042682926829268, "grad_norm": 6.159289836883545, "learning_rate": 1.8997154471544718e-05, "loss": 0.2568, "step": 12335 }, { "epoch": 0.15048780487804878, "grad_norm": 4.293567657470703, "learning_rate": 1.8996747967479676e-05, "loss": 0.2177, "step": 12340 }, { "epoch": 0.15054878048780487, "grad_norm": 1.3003495931625366, "learning_rate": 1.8996341463414634e-05, "loss": 0.204, "step": 12345 }, { "epoch": 0.15060975609756097, "grad_norm": 2.6701557636260986, "learning_rate": 1.8995934959349596e-05, "loss": 0.1458, "step": 12350 }, { "epoch": 0.15067073170731707, "grad_norm": 1.9284571409225464, "learning_rate": 1.8995528455284554e-05, "loss": 0.1569, "step": 12355 }, { "epoch": 0.15073170731707317, "grad_norm": 1.1158956289291382, "learning_rate": 1.8995121951219512e-05, "loss": 0.138, "step": 12360 }, { "epoch": 0.15079268292682926, "grad_norm": 1.0889848470687866, "learning_rate": 1.8994715447154474e-05, "loss": 0.1779, "step": 12365 }, { "epoch": 0.15085365853658536, "grad_norm": 0.9822239279747009, "learning_rate": 1.8994308943089432e-05, "loss": 0.1769, "step": 12370 }, { "epoch": 0.15091463414634146, "grad_norm": 0.9921790361404419, "learning_rate": 1.8993902439024393e-05, "loss": 0.1585, "step": 12375 }, { "epoch": 0.15097560975609756, "grad_norm": 1.1386568546295166, "learning_rate": 1.899349593495935e-05, "loss": 0.1309, "step": 12380 }, { "epoch": 0.15103658536585365, "grad_norm": 1.130845546722412, "learning_rate": 1.8993089430894313e-05, "loss": 0.1554, "step": 12385 }, { "epoch": 0.15109756097560975, "grad_norm": 1.3543388843536377, "learning_rate": 1.8992682926829268e-05, "loss": 0.1724, "step": 12390 }, { "epoch": 0.15115853658536585, "grad_norm": 0.9033803343772888, "learning_rate": 1.899227642276423e-05, "loss": 0.1463, "step": 12395 }, { "epoch": 0.15121951219512195, "grad_norm": 1.046794056892395, "learning_rate": 1.8991869918699187e-05, "loss": 0.1156, "step": 12400 }, { "epoch": 0.15128048780487804, "grad_norm": 1.0339778661727905, "learning_rate": 1.899146341463415e-05, "loss": 0.1083, "step": 12405 }, { "epoch": 0.15134146341463414, "grad_norm": 0.8850510716438293, "learning_rate": 1.8991056910569107e-05, "loss": 0.1547, "step": 12410 }, { "epoch": 0.15140243902439024, "grad_norm": 0.8985923528671265, "learning_rate": 1.899065040650407e-05, "loss": 0.137, "step": 12415 }, { "epoch": 0.15146341463414634, "grad_norm": 0.8743388652801514, "learning_rate": 1.8990243902439027e-05, "loss": 0.1611, "step": 12420 }, { "epoch": 0.15152439024390243, "grad_norm": 1.3483606576919556, "learning_rate": 1.8989837398373985e-05, "loss": 0.1164, "step": 12425 }, { "epoch": 0.15158536585365853, "grad_norm": 1.0697327852249146, "learning_rate": 1.8989430894308943e-05, "loss": 0.1285, "step": 12430 }, { "epoch": 0.15164634146341463, "grad_norm": 0.8343668580055237, "learning_rate": 1.8989024390243904e-05, "loss": 0.1633, "step": 12435 }, { "epoch": 0.15170731707317073, "grad_norm": 0.9923522472381592, "learning_rate": 1.8988617886178863e-05, "loss": 0.1286, "step": 12440 }, { "epoch": 0.15176829268292683, "grad_norm": 1.087173581123352, "learning_rate": 1.8988211382113824e-05, "loss": 0.1441, "step": 12445 }, { "epoch": 0.15182926829268292, "grad_norm": 0.6866230368614197, "learning_rate": 1.8987804878048782e-05, "loss": 0.1274, "step": 12450 }, { "epoch": 0.15189024390243902, "grad_norm": 0.9780888557434082, "learning_rate": 1.898739837398374e-05, "loss": 0.1455, "step": 12455 }, { "epoch": 0.15195121951219512, "grad_norm": 0.9895913600921631, "learning_rate": 1.8986991869918702e-05, "loss": 0.2102, "step": 12460 }, { "epoch": 0.15201219512195122, "grad_norm": 0.9190078377723694, "learning_rate": 1.898658536585366e-05, "loss": 0.1659, "step": 12465 }, { "epoch": 0.1520731707317073, "grad_norm": 0.6292712092399597, "learning_rate": 1.898617886178862e-05, "loss": 0.1581, "step": 12470 }, { "epoch": 0.1521341463414634, "grad_norm": 0.7340027689933777, "learning_rate": 1.898577235772358e-05, "loss": 0.1138, "step": 12475 }, { "epoch": 0.1521951219512195, "grad_norm": 1.7253183126449585, "learning_rate": 1.8985365853658538e-05, "loss": 0.1494, "step": 12480 }, { "epoch": 0.1522560975609756, "grad_norm": 0.673978865146637, "learning_rate": 1.8984959349593496e-05, "loss": 0.135, "step": 12485 }, { "epoch": 0.1523170731707317, "grad_norm": 1.1961432695388794, "learning_rate": 1.8984552845528457e-05, "loss": 0.1545, "step": 12490 }, { "epoch": 0.1523780487804878, "grad_norm": 0.6975015997886658, "learning_rate": 1.8984146341463415e-05, "loss": 0.1263, "step": 12495 }, { "epoch": 0.1524390243902439, "grad_norm": 0.7149331569671631, "learning_rate": 1.8983739837398377e-05, "loss": 0.1538, "step": 12500 }, { "epoch": 0.1525, "grad_norm": 0.8847318291664124, "learning_rate": 1.8983333333333335e-05, "loss": 0.1497, "step": 12505 }, { "epoch": 0.1525609756097561, "grad_norm": 0.5963116884231567, "learning_rate": 1.8982926829268293e-05, "loss": 0.1069, "step": 12510 }, { "epoch": 0.1526219512195122, "grad_norm": 1.8873987197875977, "learning_rate": 1.898252032520325e-05, "loss": 0.1835, "step": 12515 }, { "epoch": 0.1526829268292683, "grad_norm": 1.2294435501098633, "learning_rate": 1.8982113821138213e-05, "loss": 0.1926, "step": 12520 }, { "epoch": 0.1527439024390244, "grad_norm": 1.0112524032592773, "learning_rate": 1.898170731707317e-05, "loss": 0.1712, "step": 12525 }, { "epoch": 0.15280487804878048, "grad_norm": 1.8494057655334473, "learning_rate": 1.8981300813008133e-05, "loss": 0.1695, "step": 12530 }, { "epoch": 0.15286585365853658, "grad_norm": 0.6983107328414917, "learning_rate": 1.898089430894309e-05, "loss": 0.0996, "step": 12535 }, { "epoch": 0.15292682926829268, "grad_norm": 0.4776109755039215, "learning_rate": 1.898048780487805e-05, "loss": 0.1256, "step": 12540 }, { "epoch": 0.15298780487804878, "grad_norm": 0.9626678228378296, "learning_rate": 1.898008130081301e-05, "loss": 0.1167, "step": 12545 }, { "epoch": 0.15304878048780488, "grad_norm": 1.402397871017456, "learning_rate": 1.897967479674797e-05, "loss": 0.1492, "step": 12550 }, { "epoch": 0.15310975609756097, "grad_norm": 0.9918711185455322, "learning_rate": 1.897926829268293e-05, "loss": 0.1806, "step": 12555 }, { "epoch": 0.15317073170731707, "grad_norm": 2.707606077194214, "learning_rate": 1.8978861788617888e-05, "loss": 0.2007, "step": 12560 }, { "epoch": 0.15323170731707317, "grad_norm": 0.7747064232826233, "learning_rate": 1.897845528455285e-05, "loss": 0.1423, "step": 12565 }, { "epoch": 0.15329268292682927, "grad_norm": 1.102044939994812, "learning_rate": 1.8978048780487804e-05, "loss": 0.1669, "step": 12570 }, { "epoch": 0.15335365853658536, "grad_norm": 2.9571118354797363, "learning_rate": 1.8977642276422766e-05, "loss": 0.1156, "step": 12575 }, { "epoch": 0.15341463414634146, "grad_norm": 1.3967859745025635, "learning_rate": 1.8977235772357724e-05, "loss": 0.1149, "step": 12580 }, { "epoch": 0.15347560975609756, "grad_norm": 1.3556920289993286, "learning_rate": 1.8976829268292685e-05, "loss": 0.1721, "step": 12585 }, { "epoch": 0.15353658536585366, "grad_norm": 1.022849678993225, "learning_rate": 1.8976422764227644e-05, "loss": 0.1223, "step": 12590 }, { "epoch": 0.15359756097560975, "grad_norm": 0.8075435757637024, "learning_rate": 1.8976016260162605e-05, "loss": 0.1225, "step": 12595 }, { "epoch": 0.15365853658536585, "grad_norm": 1.0589003562927246, "learning_rate": 1.8975609756097563e-05, "loss": 0.1244, "step": 12600 }, { "epoch": 0.15371951219512195, "grad_norm": 1.2479621171951294, "learning_rate": 1.897520325203252e-05, "loss": 0.1271, "step": 12605 }, { "epoch": 0.15378048780487805, "grad_norm": 1.5305063724517822, "learning_rate": 1.897479674796748e-05, "loss": 0.1454, "step": 12610 }, { "epoch": 0.15384146341463414, "grad_norm": 1.0119333267211914, "learning_rate": 1.897439024390244e-05, "loss": 0.1306, "step": 12615 }, { "epoch": 0.15390243902439024, "grad_norm": 0.6365126967430115, "learning_rate": 1.89739837398374e-05, "loss": 0.1347, "step": 12620 }, { "epoch": 0.15396341463414634, "grad_norm": 0.7509658932685852, "learning_rate": 1.897357723577236e-05, "loss": 0.1473, "step": 12625 }, { "epoch": 0.15402439024390244, "grad_norm": 0.897495687007904, "learning_rate": 1.897317073170732e-05, "loss": 0.1513, "step": 12630 }, { "epoch": 0.15408536585365853, "grad_norm": 0.8533433675765991, "learning_rate": 1.8972764227642277e-05, "loss": 0.1511, "step": 12635 }, { "epoch": 0.15414634146341463, "grad_norm": 0.7084953188896179, "learning_rate": 1.897235772357724e-05, "loss": 0.1744, "step": 12640 }, { "epoch": 0.15420731707317073, "grad_norm": 1.0839818716049194, "learning_rate": 1.8971951219512197e-05, "loss": 0.172, "step": 12645 }, { "epoch": 0.15426829268292683, "grad_norm": 0.9108371138572693, "learning_rate": 1.8971544715447158e-05, "loss": 0.1568, "step": 12650 }, { "epoch": 0.15432926829268293, "grad_norm": 1.0708225965499878, "learning_rate": 1.8971138211382116e-05, "loss": 0.1367, "step": 12655 }, { "epoch": 0.15439024390243902, "grad_norm": 1.1629855632781982, "learning_rate": 1.8970731707317074e-05, "loss": 0.157, "step": 12660 }, { "epoch": 0.15445121951219512, "grad_norm": 1.391153335571289, "learning_rate": 1.8970325203252032e-05, "loss": 0.1412, "step": 12665 }, { "epoch": 0.15451219512195122, "grad_norm": 0.9347018599510193, "learning_rate": 1.8969918699186994e-05, "loss": 0.1588, "step": 12670 }, { "epoch": 0.15457317073170732, "grad_norm": 0.7871151566505432, "learning_rate": 1.8969512195121952e-05, "loss": 0.1216, "step": 12675 }, { "epoch": 0.1546341463414634, "grad_norm": 1.308457612991333, "learning_rate": 1.8969105691056914e-05, "loss": 0.1641, "step": 12680 }, { "epoch": 0.1546951219512195, "grad_norm": 1.9458807706832886, "learning_rate": 1.8968699186991872e-05, "loss": 0.1455, "step": 12685 }, { "epoch": 0.1547560975609756, "grad_norm": 0.5881343483924866, "learning_rate": 1.896829268292683e-05, "loss": 0.1323, "step": 12690 }, { "epoch": 0.1548170731707317, "grad_norm": 1.1304935216903687, "learning_rate": 1.8967886178861788e-05, "loss": 0.1777, "step": 12695 }, { "epoch": 0.1548780487804878, "grad_norm": 0.8843085765838623, "learning_rate": 1.896747967479675e-05, "loss": 0.1099, "step": 12700 }, { "epoch": 0.1549390243902439, "grad_norm": 0.7106626629829407, "learning_rate": 1.8967073170731708e-05, "loss": 0.1513, "step": 12705 }, { "epoch": 0.155, "grad_norm": 0.6314912438392639, "learning_rate": 1.896666666666667e-05, "loss": 0.1672, "step": 12710 }, { "epoch": 0.1550609756097561, "grad_norm": 0.8808926939964294, "learning_rate": 1.8966260162601627e-05, "loss": 0.1031, "step": 12715 }, { "epoch": 0.1551219512195122, "grad_norm": 1.0705333948135376, "learning_rate": 1.8965853658536585e-05, "loss": 0.1571, "step": 12720 }, { "epoch": 0.1551829268292683, "grad_norm": 0.7977230548858643, "learning_rate": 1.8965447154471547e-05, "loss": 0.1313, "step": 12725 }, { "epoch": 0.1552439024390244, "grad_norm": 0.70230633020401, "learning_rate": 1.8965040650406505e-05, "loss": 0.1088, "step": 12730 }, { "epoch": 0.1553048780487805, "grad_norm": 0.8471545577049255, "learning_rate": 1.8964634146341467e-05, "loss": 0.098, "step": 12735 }, { "epoch": 0.15536585365853658, "grad_norm": 0.7303450107574463, "learning_rate": 1.8964227642276425e-05, "loss": 0.1297, "step": 12740 }, { "epoch": 0.15542682926829268, "grad_norm": 1.1362769603729248, "learning_rate": 1.8963821138211386e-05, "loss": 0.1588, "step": 12745 }, { "epoch": 0.15548780487804878, "grad_norm": 0.947360634803772, "learning_rate": 1.896341463414634e-05, "loss": 0.1138, "step": 12750 }, { "epoch": 0.15554878048780488, "grad_norm": 2.049551248550415, "learning_rate": 1.8963008130081302e-05, "loss": 0.1359, "step": 12755 }, { "epoch": 0.15560975609756098, "grad_norm": 0.9416958689689636, "learning_rate": 1.896260162601626e-05, "loss": 0.1788, "step": 12760 }, { "epoch": 0.15567073170731707, "grad_norm": 1.7702205181121826, "learning_rate": 1.8962195121951222e-05, "loss": 0.121, "step": 12765 }, { "epoch": 0.15573170731707317, "grad_norm": 1.1177978515625, "learning_rate": 1.896178861788618e-05, "loss": 0.1699, "step": 12770 }, { "epoch": 0.15579268292682927, "grad_norm": 2.330725908279419, "learning_rate": 1.8961382113821142e-05, "loss": 0.1744, "step": 12775 }, { "epoch": 0.15585365853658537, "grad_norm": 0.6826907992362976, "learning_rate": 1.8960975609756097e-05, "loss": 0.135, "step": 12780 }, { "epoch": 0.15591463414634146, "grad_norm": 0.9844997525215149, "learning_rate": 1.8960569105691058e-05, "loss": 0.1358, "step": 12785 }, { "epoch": 0.15597560975609756, "grad_norm": 1.3024367094039917, "learning_rate": 1.8960162601626016e-05, "loss": 0.1405, "step": 12790 }, { "epoch": 0.15603658536585366, "grad_norm": 1.2854044437408447, "learning_rate": 1.8959756097560978e-05, "loss": 0.14, "step": 12795 }, { "epoch": 0.15609756097560976, "grad_norm": 1.0456730127334595, "learning_rate": 1.8959349593495936e-05, "loss": 0.1138, "step": 12800 }, { "epoch": 0.15615853658536585, "grad_norm": 1.4170961380004883, "learning_rate": 1.8958943089430897e-05, "loss": 0.1413, "step": 12805 }, { "epoch": 0.15621951219512195, "grad_norm": 0.6781554818153381, "learning_rate": 1.8958536585365855e-05, "loss": 0.1204, "step": 12810 }, { "epoch": 0.15628048780487805, "grad_norm": 1.0287489891052246, "learning_rate": 1.8958130081300814e-05, "loss": 0.1176, "step": 12815 }, { "epoch": 0.15634146341463415, "grad_norm": 1.0598092079162598, "learning_rate": 1.8957723577235775e-05, "loss": 0.125, "step": 12820 }, { "epoch": 0.15640243902439024, "grad_norm": 3.3374440670013428, "learning_rate": 1.8957317073170733e-05, "loss": 0.1693, "step": 12825 }, { "epoch": 0.15646341463414634, "grad_norm": 1.2100569009780884, "learning_rate": 1.8956910569105695e-05, "loss": 0.1317, "step": 12830 }, { "epoch": 0.15652439024390244, "grad_norm": 2.6097068786621094, "learning_rate": 1.8956504065040653e-05, "loss": 0.1409, "step": 12835 }, { "epoch": 0.15658536585365854, "grad_norm": 1.3517860174179077, "learning_rate": 1.895609756097561e-05, "loss": 0.1371, "step": 12840 }, { "epoch": 0.15664634146341463, "grad_norm": 0.9453885555267334, "learning_rate": 1.895569105691057e-05, "loss": 0.1608, "step": 12845 }, { "epoch": 0.15670731707317073, "grad_norm": 0.7078816890716553, "learning_rate": 1.895528455284553e-05, "loss": 0.148, "step": 12850 }, { "epoch": 0.15676829268292683, "grad_norm": 0.906618058681488, "learning_rate": 1.895487804878049e-05, "loss": 0.1226, "step": 12855 }, { "epoch": 0.15682926829268293, "grad_norm": 1.0486735105514526, "learning_rate": 1.895447154471545e-05, "loss": 0.1329, "step": 12860 }, { "epoch": 0.15689024390243902, "grad_norm": 0.6985923051834106, "learning_rate": 1.895406504065041e-05, "loss": 0.1247, "step": 12865 }, { "epoch": 0.15695121951219512, "grad_norm": 0.7131737470626831, "learning_rate": 1.8953658536585367e-05, "loss": 0.1081, "step": 12870 }, { "epoch": 0.15701219512195122, "grad_norm": 1.3296765089035034, "learning_rate": 1.8953252032520325e-05, "loss": 0.1432, "step": 12875 }, { "epoch": 0.15707317073170732, "grad_norm": 0.7753835320472717, "learning_rate": 1.8952845528455286e-05, "loss": 0.1514, "step": 12880 }, { "epoch": 0.15713414634146342, "grad_norm": 0.4640006422996521, "learning_rate": 1.8952439024390244e-05, "loss": 0.1045, "step": 12885 }, { "epoch": 0.1571951219512195, "grad_norm": 0.6045703887939453, "learning_rate": 1.8952032520325206e-05, "loss": 0.1177, "step": 12890 }, { "epoch": 0.1572560975609756, "grad_norm": 0.7000444531440735, "learning_rate": 1.8951626016260164e-05, "loss": 0.1528, "step": 12895 }, { "epoch": 0.1573170731707317, "grad_norm": 1.4257206916809082, "learning_rate": 1.8951219512195122e-05, "loss": 0.1347, "step": 12900 }, { "epoch": 0.1573780487804878, "grad_norm": 0.6642007827758789, "learning_rate": 1.8950813008130084e-05, "loss": 0.1085, "step": 12905 }, { "epoch": 0.1574390243902439, "grad_norm": 0.7262999415397644, "learning_rate": 1.8950406504065042e-05, "loss": 0.1405, "step": 12910 }, { "epoch": 0.1575, "grad_norm": 0.8699173927307129, "learning_rate": 1.8950000000000003e-05, "loss": 0.1557, "step": 12915 }, { "epoch": 0.1575609756097561, "grad_norm": 0.5250003933906555, "learning_rate": 1.894959349593496e-05, "loss": 0.1042, "step": 12920 }, { "epoch": 0.1576219512195122, "grad_norm": 0.9237308502197266, "learning_rate": 1.894918699186992e-05, "loss": 0.1217, "step": 12925 }, { "epoch": 0.1576829268292683, "grad_norm": 0.5127700567245483, "learning_rate": 1.8948780487804878e-05, "loss": 0.1171, "step": 12930 }, { "epoch": 0.1577439024390244, "grad_norm": 1.2161211967468262, "learning_rate": 1.894837398373984e-05, "loss": 0.1488, "step": 12935 }, { "epoch": 0.1578048780487805, "grad_norm": 1.0545198917388916, "learning_rate": 1.8947967479674797e-05, "loss": 0.1311, "step": 12940 }, { "epoch": 0.1578658536585366, "grad_norm": 0.7247009873390198, "learning_rate": 1.894756097560976e-05, "loss": 0.1429, "step": 12945 }, { "epoch": 0.15792682926829268, "grad_norm": 0.8051348328590393, "learning_rate": 1.8947154471544717e-05, "loss": 0.149, "step": 12950 }, { "epoch": 0.15798780487804878, "grad_norm": 0.5963977575302124, "learning_rate": 1.894674796747968e-05, "loss": 0.1218, "step": 12955 }, { "epoch": 0.15804878048780488, "grad_norm": 0.5376545190811157, "learning_rate": 1.8946341463414633e-05, "loss": 0.1518, "step": 12960 }, { "epoch": 0.15810975609756098, "grad_norm": 1.2076034545898438, "learning_rate": 1.8945934959349595e-05, "loss": 0.14, "step": 12965 }, { "epoch": 0.15817073170731707, "grad_norm": 0.8331072926521301, "learning_rate": 1.8945528455284553e-05, "loss": 0.1169, "step": 12970 }, { "epoch": 0.15823170731707317, "grad_norm": 1.5735186338424683, "learning_rate": 1.8945121951219514e-05, "loss": 0.143, "step": 12975 }, { "epoch": 0.15829268292682927, "grad_norm": 0.7486391067504883, "learning_rate": 1.8944715447154472e-05, "loss": 0.1517, "step": 12980 }, { "epoch": 0.15835365853658537, "grad_norm": 1.1693551540374756, "learning_rate": 1.8944308943089434e-05, "loss": 0.1094, "step": 12985 }, { "epoch": 0.15841463414634147, "grad_norm": 1.2109265327453613, "learning_rate": 1.8943902439024392e-05, "loss": 0.1645, "step": 12990 }, { "epoch": 0.15847560975609756, "grad_norm": 2.294144868850708, "learning_rate": 1.894349593495935e-05, "loss": 0.1908, "step": 12995 }, { "epoch": 0.15853658536585366, "grad_norm": 1.2558162212371826, "learning_rate": 1.8943089430894312e-05, "loss": 0.1292, "step": 13000 }, { "epoch": 0.15859756097560976, "grad_norm": 0.9323238134384155, "learning_rate": 1.894268292682927e-05, "loss": 0.15, "step": 13005 }, { "epoch": 0.15865853658536586, "grad_norm": 1.2794898748397827, "learning_rate": 1.894227642276423e-05, "loss": 0.104, "step": 13010 }, { "epoch": 0.15871951219512195, "grad_norm": 1.8443965911865234, "learning_rate": 1.894186991869919e-05, "loss": 0.183, "step": 13015 }, { "epoch": 0.15878048780487805, "grad_norm": 0.5453464984893799, "learning_rate": 1.8941463414634148e-05, "loss": 0.1718, "step": 13020 }, { "epoch": 0.15884146341463415, "grad_norm": 0.4266462028026581, "learning_rate": 1.8941056910569106e-05, "loss": 0.1157, "step": 13025 }, { "epoch": 0.15890243902439025, "grad_norm": 0.563491702079773, "learning_rate": 1.8940650406504067e-05, "loss": 0.1043, "step": 13030 }, { "epoch": 0.15896341463414634, "grad_norm": 0.9248059391975403, "learning_rate": 1.8940243902439025e-05, "loss": 0.1605, "step": 13035 }, { "epoch": 0.15902439024390244, "grad_norm": 1.021466851234436, "learning_rate": 1.8939837398373987e-05, "loss": 0.1218, "step": 13040 }, { "epoch": 0.15908536585365854, "grad_norm": 1.3209760189056396, "learning_rate": 1.8939430894308945e-05, "loss": 0.1482, "step": 13045 }, { "epoch": 0.15914634146341464, "grad_norm": 0.6579742431640625, "learning_rate": 1.8939024390243903e-05, "loss": 0.1215, "step": 13050 }, { "epoch": 0.15920731707317073, "grad_norm": 1.3941502571105957, "learning_rate": 1.893861788617886e-05, "loss": 0.1331, "step": 13055 }, { "epoch": 0.15926829268292683, "grad_norm": 0.8434246778488159, "learning_rate": 1.8938211382113823e-05, "loss": 0.1399, "step": 13060 }, { "epoch": 0.15932926829268293, "grad_norm": 1.1947174072265625, "learning_rate": 1.893780487804878e-05, "loss": 0.0989, "step": 13065 }, { "epoch": 0.15939024390243903, "grad_norm": 0.6977143287658691, "learning_rate": 1.8937398373983742e-05, "loss": 0.1411, "step": 13070 }, { "epoch": 0.15945121951219512, "grad_norm": 1.4177491664886475, "learning_rate": 1.89369918699187e-05, "loss": 0.145, "step": 13075 }, { "epoch": 0.15951219512195122, "grad_norm": 2.0955965518951416, "learning_rate": 1.893658536585366e-05, "loss": 0.125, "step": 13080 }, { "epoch": 0.15957317073170732, "grad_norm": 0.7033307552337646, "learning_rate": 1.893617886178862e-05, "loss": 0.1833, "step": 13085 }, { "epoch": 0.15963414634146342, "grad_norm": 0.6356862783432007, "learning_rate": 1.893577235772358e-05, "loss": 0.1415, "step": 13090 }, { "epoch": 0.15969512195121952, "grad_norm": 1.095176100730896, "learning_rate": 1.893536585365854e-05, "loss": 0.1275, "step": 13095 }, { "epoch": 0.1597560975609756, "grad_norm": 1.8691563606262207, "learning_rate": 1.8934959349593498e-05, "loss": 0.1469, "step": 13100 }, { "epoch": 0.1598170731707317, "grad_norm": 1.5668257474899292, "learning_rate": 1.8934552845528456e-05, "loss": 0.2141, "step": 13105 }, { "epoch": 0.1598780487804878, "grad_norm": 1.2109153270721436, "learning_rate": 1.8934146341463414e-05, "loss": 0.1005, "step": 13110 }, { "epoch": 0.1599390243902439, "grad_norm": 0.9152911901473999, "learning_rate": 1.8933739837398376e-05, "loss": 0.1412, "step": 13115 }, { "epoch": 0.16, "grad_norm": 0.9882194995880127, "learning_rate": 1.8933333333333334e-05, "loss": 0.1356, "step": 13120 }, { "epoch": 0.1600609756097561, "grad_norm": 1.791498064994812, "learning_rate": 1.8932926829268295e-05, "loss": 0.1279, "step": 13125 }, { "epoch": 0.1601219512195122, "grad_norm": 0.6175379157066345, "learning_rate": 1.8932520325203254e-05, "loss": 0.1345, "step": 13130 }, { "epoch": 0.1601829268292683, "grad_norm": 0.9405971169471741, "learning_rate": 1.8932113821138215e-05, "loss": 0.132, "step": 13135 }, { "epoch": 0.1602439024390244, "grad_norm": 1.4881373643875122, "learning_rate": 1.893170731707317e-05, "loss": 0.1321, "step": 13140 }, { "epoch": 0.1603048780487805, "grad_norm": 1.141452670097351, "learning_rate": 1.893130081300813e-05, "loss": 0.1349, "step": 13145 }, { "epoch": 0.1603658536585366, "grad_norm": 2.106917381286621, "learning_rate": 1.893089430894309e-05, "loss": 0.1342, "step": 13150 }, { "epoch": 0.1604268292682927, "grad_norm": 2.3721818923950195, "learning_rate": 1.893048780487805e-05, "loss": 0.1372, "step": 13155 }, { "epoch": 0.16048780487804878, "grad_norm": 1.4297195672988892, "learning_rate": 1.893008130081301e-05, "loss": 0.1479, "step": 13160 }, { "epoch": 0.16054878048780488, "grad_norm": 1.102833867073059, "learning_rate": 1.892967479674797e-05, "loss": 0.154, "step": 13165 }, { "epoch": 0.16060975609756098, "grad_norm": 1.0024217367172241, "learning_rate": 1.892926829268293e-05, "loss": 0.1551, "step": 13170 }, { "epoch": 0.16067073170731708, "grad_norm": 1.2070462703704834, "learning_rate": 1.8928861788617887e-05, "loss": 0.1203, "step": 13175 }, { "epoch": 0.16073170731707317, "grad_norm": 1.486716866493225, "learning_rate": 1.892845528455285e-05, "loss": 0.1483, "step": 13180 }, { "epoch": 0.16079268292682927, "grad_norm": 0.7870188355445862, "learning_rate": 1.8928048780487806e-05, "loss": 0.1243, "step": 13185 }, { "epoch": 0.16085365853658537, "grad_norm": 2.291349411010742, "learning_rate": 1.8927642276422765e-05, "loss": 0.1333, "step": 13190 }, { "epoch": 0.16091463414634147, "grad_norm": 0.6224071383476257, "learning_rate": 1.8927235772357726e-05, "loss": 0.107, "step": 13195 }, { "epoch": 0.16097560975609757, "grad_norm": 1.3538016080856323, "learning_rate": 1.8926829268292684e-05, "loss": 0.1761, "step": 13200 }, { "epoch": 0.16103658536585366, "grad_norm": 1.4474977254867554, "learning_rate": 1.8926422764227642e-05, "loss": 0.2117, "step": 13205 }, { "epoch": 0.16109756097560976, "grad_norm": 1.237131118774414, "learning_rate": 1.8926016260162604e-05, "loss": 0.2236, "step": 13210 }, { "epoch": 0.16115853658536586, "grad_norm": 0.7552480101585388, "learning_rate": 1.8925609756097562e-05, "loss": 0.1873, "step": 13215 }, { "epoch": 0.16121951219512196, "grad_norm": 0.9434358477592468, "learning_rate": 1.8925203252032524e-05, "loss": 0.1314, "step": 13220 }, { "epoch": 0.16128048780487805, "grad_norm": 1.2054245471954346, "learning_rate": 1.892479674796748e-05, "loss": 0.1468, "step": 13225 }, { "epoch": 0.16134146341463415, "grad_norm": 3.08451771736145, "learning_rate": 1.892439024390244e-05, "loss": 0.1448, "step": 13230 }, { "epoch": 0.16140243902439025, "grad_norm": 1.56090247631073, "learning_rate": 1.8923983739837398e-05, "loss": 0.1147, "step": 13235 }, { "epoch": 0.16146341463414635, "grad_norm": 0.8474755883216858, "learning_rate": 1.892357723577236e-05, "loss": 0.1119, "step": 13240 }, { "epoch": 0.16152439024390244, "grad_norm": 0.9153479933738708, "learning_rate": 1.8923170731707318e-05, "loss": 0.1524, "step": 13245 }, { "epoch": 0.16158536585365854, "grad_norm": 0.9324643611907959, "learning_rate": 1.892276422764228e-05, "loss": 0.1294, "step": 13250 }, { "epoch": 0.16164634146341464, "grad_norm": 0.6985194087028503, "learning_rate": 1.8922357723577237e-05, "loss": 0.1264, "step": 13255 }, { "epoch": 0.16170731707317074, "grad_norm": 1.1078747510910034, "learning_rate": 1.8921951219512195e-05, "loss": 0.1079, "step": 13260 }, { "epoch": 0.16176829268292683, "grad_norm": 0.6507152318954468, "learning_rate": 1.8921544715447157e-05, "loss": 0.1145, "step": 13265 }, { "epoch": 0.16182926829268293, "grad_norm": 1.1160951852798462, "learning_rate": 1.8921138211382115e-05, "loss": 0.1203, "step": 13270 }, { "epoch": 0.16189024390243903, "grad_norm": 1.0288132429122925, "learning_rate": 1.8920731707317077e-05, "loss": 0.1347, "step": 13275 }, { "epoch": 0.16195121951219513, "grad_norm": 0.5502692461013794, "learning_rate": 1.8920325203252035e-05, "loss": 0.1038, "step": 13280 }, { "epoch": 0.16201219512195122, "grad_norm": 1.2710673809051514, "learning_rate": 1.8919918699186993e-05, "loss": 0.146, "step": 13285 }, { "epoch": 0.16207317073170732, "grad_norm": 1.2059674263000488, "learning_rate": 1.891951219512195e-05, "loss": 0.1427, "step": 13290 }, { "epoch": 0.16213414634146342, "grad_norm": 1.6525366306304932, "learning_rate": 1.8919105691056912e-05, "loss": 0.1411, "step": 13295 }, { "epoch": 0.16219512195121952, "grad_norm": 1.4808005094528198, "learning_rate": 1.891869918699187e-05, "loss": 0.1787, "step": 13300 }, { "epoch": 0.16225609756097562, "grad_norm": 1.3473237752914429, "learning_rate": 1.8918292682926832e-05, "loss": 0.1339, "step": 13305 }, { "epoch": 0.1623170731707317, "grad_norm": 8.328198432922363, "learning_rate": 1.891788617886179e-05, "loss": 0.2043, "step": 13310 }, { "epoch": 0.1623780487804878, "grad_norm": 0.8508250713348389, "learning_rate": 1.891747967479675e-05, "loss": 0.1564, "step": 13315 }, { "epoch": 0.1624390243902439, "grad_norm": 0.8741240501403809, "learning_rate": 1.8917073170731706e-05, "loss": 0.1336, "step": 13320 }, { "epoch": 0.1625, "grad_norm": 1.6258600950241089, "learning_rate": 1.8916666666666668e-05, "loss": 0.1425, "step": 13325 }, { "epoch": 0.1625609756097561, "grad_norm": 0.8591026067733765, "learning_rate": 1.8916260162601626e-05, "loss": 0.1336, "step": 13330 }, { "epoch": 0.1626219512195122, "grad_norm": 0.7769043445587158, "learning_rate": 1.8915853658536588e-05, "loss": 0.1069, "step": 13335 }, { "epoch": 0.1626829268292683, "grad_norm": 1.0072088241577148, "learning_rate": 1.8915447154471546e-05, "loss": 0.1283, "step": 13340 }, { "epoch": 0.1627439024390244, "grad_norm": 0.6037675142288208, "learning_rate": 1.8915040650406507e-05, "loss": 0.1302, "step": 13345 }, { "epoch": 0.1628048780487805, "grad_norm": 1.3543881177902222, "learning_rate": 1.8914634146341465e-05, "loss": 0.1441, "step": 13350 }, { "epoch": 0.1628658536585366, "grad_norm": 0.875728964805603, "learning_rate": 1.8914227642276423e-05, "loss": 0.1445, "step": 13355 }, { "epoch": 0.1629268292682927, "grad_norm": 0.6901640892028809, "learning_rate": 1.8913821138211385e-05, "loss": 0.1304, "step": 13360 }, { "epoch": 0.1629878048780488, "grad_norm": 1.2778682708740234, "learning_rate": 1.8913414634146343e-05, "loss": 0.1252, "step": 13365 }, { "epoch": 0.16304878048780488, "grad_norm": 0.9418996572494507, "learning_rate": 1.89130081300813e-05, "loss": 0.1544, "step": 13370 }, { "epoch": 0.16310975609756098, "grad_norm": 1.0296869277954102, "learning_rate": 1.8912601626016263e-05, "loss": 0.1543, "step": 13375 }, { "epoch": 0.16317073170731708, "grad_norm": 1.2893972396850586, "learning_rate": 1.891219512195122e-05, "loss": 0.1333, "step": 13380 }, { "epoch": 0.16323170731707318, "grad_norm": 1.0298351049423218, "learning_rate": 1.891178861788618e-05, "loss": 0.1323, "step": 13385 }, { "epoch": 0.16329268292682927, "grad_norm": 0.9490212798118591, "learning_rate": 1.891138211382114e-05, "loss": 0.1241, "step": 13390 }, { "epoch": 0.16335365853658537, "grad_norm": 1.3337175846099854, "learning_rate": 1.89109756097561e-05, "loss": 0.1628, "step": 13395 }, { "epoch": 0.16341463414634147, "grad_norm": 0.7957617044448853, "learning_rate": 1.891056910569106e-05, "loss": 0.1466, "step": 13400 }, { "epoch": 0.16347560975609757, "grad_norm": 0.740012526512146, "learning_rate": 1.891016260162602e-05, "loss": 0.1185, "step": 13405 }, { "epoch": 0.16353658536585367, "grad_norm": 0.902097225189209, "learning_rate": 1.8909756097560976e-05, "loss": 0.1232, "step": 13410 }, { "epoch": 0.16359756097560976, "grad_norm": 1.0093317031860352, "learning_rate": 1.8909349593495935e-05, "loss": 0.1477, "step": 13415 }, { "epoch": 0.16365853658536586, "grad_norm": 1.284205675125122, "learning_rate": 1.8908943089430896e-05, "loss": 0.1466, "step": 13420 }, { "epoch": 0.16371951219512196, "grad_norm": 0.5883445143699646, "learning_rate": 1.8908536585365854e-05, "loss": 0.1351, "step": 13425 }, { "epoch": 0.16378048780487806, "grad_norm": 0.6521322131156921, "learning_rate": 1.8908130081300816e-05, "loss": 0.1552, "step": 13430 }, { "epoch": 0.16384146341463415, "grad_norm": 1.6381558179855347, "learning_rate": 1.8907723577235774e-05, "loss": 0.1592, "step": 13435 }, { "epoch": 0.16390243902439025, "grad_norm": 1.075727939605713, "learning_rate": 1.8907317073170732e-05, "loss": 0.1212, "step": 13440 }, { "epoch": 0.16396341463414635, "grad_norm": 2.24771785736084, "learning_rate": 1.8906910569105694e-05, "loss": 0.1132, "step": 13445 }, { "epoch": 0.16402439024390245, "grad_norm": 0.9357935190200806, "learning_rate": 1.890650406504065e-05, "loss": 0.1025, "step": 13450 }, { "epoch": 0.16408536585365854, "grad_norm": 0.7388603687286377, "learning_rate": 1.890609756097561e-05, "loss": 0.1309, "step": 13455 }, { "epoch": 0.16414634146341464, "grad_norm": 0.8270559906959534, "learning_rate": 1.890569105691057e-05, "loss": 0.0982, "step": 13460 }, { "epoch": 0.16420731707317074, "grad_norm": 1.5266183614730835, "learning_rate": 1.890528455284553e-05, "loss": 0.1238, "step": 13465 }, { "epoch": 0.16426829268292684, "grad_norm": 0.5088762044906616, "learning_rate": 1.8904878048780488e-05, "loss": 0.1438, "step": 13470 }, { "epoch": 0.16432926829268293, "grad_norm": 1.5403660535812378, "learning_rate": 1.890447154471545e-05, "loss": 0.1246, "step": 13475 }, { "epoch": 0.16439024390243903, "grad_norm": 1.2220935821533203, "learning_rate": 1.8904065040650407e-05, "loss": 0.1495, "step": 13480 }, { "epoch": 0.16445121951219513, "grad_norm": 0.9634247422218323, "learning_rate": 1.890365853658537e-05, "loss": 0.1485, "step": 13485 }, { "epoch": 0.16451219512195123, "grad_norm": 0.8321747183799744, "learning_rate": 1.8903252032520327e-05, "loss": 0.1292, "step": 13490 }, { "epoch": 0.16457317073170732, "grad_norm": 1.1843692064285278, "learning_rate": 1.890284552845529e-05, "loss": 0.1424, "step": 13495 }, { "epoch": 0.16463414634146342, "grad_norm": 0.8858650326728821, "learning_rate": 1.8902439024390243e-05, "loss": 0.1114, "step": 13500 }, { "epoch": 0.16469512195121952, "grad_norm": 0.9552114009857178, "learning_rate": 1.8902032520325205e-05, "loss": 0.1524, "step": 13505 }, { "epoch": 0.16475609756097562, "grad_norm": 1.4196158647537231, "learning_rate": 1.8901626016260163e-05, "loss": 0.1193, "step": 13510 }, { "epoch": 0.16481707317073171, "grad_norm": 1.1885336637496948, "learning_rate": 1.8901219512195124e-05, "loss": 0.1333, "step": 13515 }, { "epoch": 0.1648780487804878, "grad_norm": 0.9432135820388794, "learning_rate": 1.8900813008130082e-05, "loss": 0.1384, "step": 13520 }, { "epoch": 0.1649390243902439, "grad_norm": 2.1390933990478516, "learning_rate": 1.8900406504065044e-05, "loss": 0.1362, "step": 13525 }, { "epoch": 0.165, "grad_norm": 1.034254789352417, "learning_rate": 1.8900000000000002e-05, "loss": 0.1569, "step": 13530 }, { "epoch": 0.1650609756097561, "grad_norm": 0.7487561702728271, "learning_rate": 1.889959349593496e-05, "loss": 0.1689, "step": 13535 }, { "epoch": 0.1651219512195122, "grad_norm": 0.9801388382911682, "learning_rate": 1.889918699186992e-05, "loss": 0.1265, "step": 13540 }, { "epoch": 0.1651829268292683, "grad_norm": 0.866706907749176, "learning_rate": 1.889878048780488e-05, "loss": 0.1623, "step": 13545 }, { "epoch": 0.1652439024390244, "grad_norm": 1.3408812284469604, "learning_rate": 1.8898373983739838e-05, "loss": 0.1611, "step": 13550 }, { "epoch": 0.1653048780487805, "grad_norm": 0.8147128820419312, "learning_rate": 1.88979674796748e-05, "loss": 0.147, "step": 13555 }, { "epoch": 0.1653658536585366, "grad_norm": 1.4079982042312622, "learning_rate": 1.8897560975609758e-05, "loss": 0.1533, "step": 13560 }, { "epoch": 0.1654268292682927, "grad_norm": 5.763309478759766, "learning_rate": 1.8897154471544716e-05, "loss": 0.1081, "step": 13565 }, { "epoch": 0.1654878048780488, "grad_norm": 1.3876051902770996, "learning_rate": 1.8896747967479677e-05, "loss": 0.1823, "step": 13570 }, { "epoch": 0.1655487804878049, "grad_norm": 1.7990323305130005, "learning_rate": 1.8896341463414635e-05, "loss": 0.1705, "step": 13575 }, { "epoch": 0.16560975609756098, "grad_norm": 1.2322620153427124, "learning_rate": 1.8895934959349597e-05, "loss": 0.1448, "step": 13580 }, { "epoch": 0.16567073170731708, "grad_norm": 0.6594640016555786, "learning_rate": 1.8895528455284555e-05, "loss": 0.1532, "step": 13585 }, { "epoch": 0.16573170731707318, "grad_norm": 1.094244122505188, "learning_rate": 1.8895121951219513e-05, "loss": 0.123, "step": 13590 }, { "epoch": 0.16579268292682928, "grad_norm": 1.1284347772598267, "learning_rate": 1.889471544715447e-05, "loss": 0.1326, "step": 13595 }, { "epoch": 0.16585365853658537, "grad_norm": 1.1767644882202148, "learning_rate": 1.8894308943089433e-05, "loss": 0.1328, "step": 13600 }, { "epoch": 0.16591463414634147, "grad_norm": 0.9603951573371887, "learning_rate": 1.889390243902439e-05, "loss": 0.1528, "step": 13605 }, { "epoch": 0.16597560975609757, "grad_norm": 0.9352082014083862, "learning_rate": 1.8893495934959352e-05, "loss": 0.0906, "step": 13610 }, { "epoch": 0.16603658536585367, "grad_norm": 0.9257400631904602, "learning_rate": 1.889308943089431e-05, "loss": 0.1413, "step": 13615 }, { "epoch": 0.16609756097560976, "grad_norm": 0.5756912231445312, "learning_rate": 1.889268292682927e-05, "loss": 0.1131, "step": 13620 }, { "epoch": 0.16615853658536586, "grad_norm": 0.6824678778648376, "learning_rate": 1.889227642276423e-05, "loss": 0.1159, "step": 13625 }, { "epoch": 0.16621951219512196, "grad_norm": 0.8539133071899414, "learning_rate": 1.8891869918699188e-05, "loss": 0.1648, "step": 13630 }, { "epoch": 0.16628048780487806, "grad_norm": 0.854296088218689, "learning_rate": 1.8891463414634146e-05, "loss": 0.1332, "step": 13635 }, { "epoch": 0.16634146341463416, "grad_norm": 0.825799822807312, "learning_rate": 1.8891056910569108e-05, "loss": 0.1533, "step": 13640 }, { "epoch": 0.16640243902439025, "grad_norm": 0.9084630608558655, "learning_rate": 1.8890650406504066e-05, "loss": 0.1074, "step": 13645 }, { "epoch": 0.16646341463414635, "grad_norm": 0.7895258069038391, "learning_rate": 1.8890243902439024e-05, "loss": 0.1268, "step": 13650 }, { "epoch": 0.16652439024390245, "grad_norm": 0.8608607649803162, "learning_rate": 1.8889837398373986e-05, "loss": 0.1101, "step": 13655 }, { "epoch": 0.16658536585365855, "grad_norm": 0.76783287525177, "learning_rate": 1.8889430894308944e-05, "loss": 0.1018, "step": 13660 }, { "epoch": 0.16664634146341464, "grad_norm": 1.372456669807434, "learning_rate": 1.8889024390243905e-05, "loss": 0.1191, "step": 13665 }, { "epoch": 0.16670731707317074, "grad_norm": 0.9098891615867615, "learning_rate": 1.8888617886178863e-05, "loss": 0.1266, "step": 13670 }, { "epoch": 0.16676829268292684, "grad_norm": 1.0343657732009888, "learning_rate": 1.8888211382113825e-05, "loss": 0.1175, "step": 13675 }, { "epoch": 0.16682926829268294, "grad_norm": 0.818699061870575, "learning_rate": 1.888780487804878e-05, "loss": 0.1312, "step": 13680 }, { "epoch": 0.16689024390243903, "grad_norm": 0.9462467432022095, "learning_rate": 1.888739837398374e-05, "loss": 0.152, "step": 13685 }, { "epoch": 0.16695121951219513, "grad_norm": 1.0134859085083008, "learning_rate": 1.88869918699187e-05, "loss": 0.1201, "step": 13690 }, { "epoch": 0.16701219512195123, "grad_norm": 0.9060114622116089, "learning_rate": 1.888658536585366e-05, "loss": 0.1661, "step": 13695 }, { "epoch": 0.16707317073170733, "grad_norm": 0.7212604284286499, "learning_rate": 1.888617886178862e-05, "loss": 0.1143, "step": 13700 }, { "epoch": 0.16713414634146342, "grad_norm": 1.0323017835617065, "learning_rate": 1.888577235772358e-05, "loss": 0.1342, "step": 13705 }, { "epoch": 0.16719512195121952, "grad_norm": 0.9557307362556458, "learning_rate": 1.888536585365854e-05, "loss": 0.137, "step": 13710 }, { "epoch": 0.16725609756097562, "grad_norm": 1.9343711137771606, "learning_rate": 1.8884959349593497e-05, "loss": 0.1345, "step": 13715 }, { "epoch": 0.16731707317073172, "grad_norm": 0.7754582166671753, "learning_rate": 1.8884552845528455e-05, "loss": 0.1571, "step": 13720 }, { "epoch": 0.16737804878048781, "grad_norm": 0.7546263337135315, "learning_rate": 1.8884146341463416e-05, "loss": 0.1672, "step": 13725 }, { "epoch": 0.1674390243902439, "grad_norm": 1.7612489461898804, "learning_rate": 1.8883739837398375e-05, "loss": 0.1119, "step": 13730 }, { "epoch": 0.1675, "grad_norm": 0.7619767189025879, "learning_rate": 1.8883333333333336e-05, "loss": 0.1295, "step": 13735 }, { "epoch": 0.1675609756097561, "grad_norm": 0.6694514751434326, "learning_rate": 1.8882926829268294e-05, "loss": 0.1503, "step": 13740 }, { "epoch": 0.1676219512195122, "grad_norm": 1.0677661895751953, "learning_rate": 1.8882520325203252e-05, "loss": 0.1816, "step": 13745 }, { "epoch": 0.1676829268292683, "grad_norm": 1.3091603517532349, "learning_rate": 1.8882113821138214e-05, "loss": 0.1107, "step": 13750 }, { "epoch": 0.1677439024390244, "grad_norm": 0.8084039688110352, "learning_rate": 1.8881707317073172e-05, "loss": 0.1516, "step": 13755 }, { "epoch": 0.1678048780487805, "grad_norm": 0.9997568726539612, "learning_rate": 1.8881300813008133e-05, "loss": 0.1647, "step": 13760 }, { "epoch": 0.1678658536585366, "grad_norm": 0.8113890290260315, "learning_rate": 1.888089430894309e-05, "loss": 0.1202, "step": 13765 }, { "epoch": 0.1679268292682927, "grad_norm": 1.4701377153396606, "learning_rate": 1.888048780487805e-05, "loss": 0.1156, "step": 13770 }, { "epoch": 0.1679878048780488, "grad_norm": 0.5517984628677368, "learning_rate": 1.8880081300813008e-05, "loss": 0.1361, "step": 13775 }, { "epoch": 0.1680487804878049, "grad_norm": 0.7939878702163696, "learning_rate": 1.887967479674797e-05, "loss": 0.1236, "step": 13780 }, { "epoch": 0.168109756097561, "grad_norm": 0.8205292224884033, "learning_rate": 1.8879268292682928e-05, "loss": 0.1402, "step": 13785 }, { "epoch": 0.16817073170731708, "grad_norm": 1.4023258686065674, "learning_rate": 1.887886178861789e-05, "loss": 0.1659, "step": 13790 }, { "epoch": 0.16823170731707318, "grad_norm": 1.4215080738067627, "learning_rate": 1.8878455284552847e-05, "loss": 0.2001, "step": 13795 }, { "epoch": 0.16829268292682928, "grad_norm": 1.479682207107544, "learning_rate": 1.8878048780487805e-05, "loss": 0.1407, "step": 13800 }, { "epoch": 0.16835365853658538, "grad_norm": 0.6429924368858337, "learning_rate": 1.8877642276422767e-05, "loss": 0.1098, "step": 13805 }, { "epoch": 0.16841463414634147, "grad_norm": 0.7701138854026794, "learning_rate": 1.8877235772357725e-05, "loss": 0.1176, "step": 13810 }, { "epoch": 0.16847560975609757, "grad_norm": 1.0827113389968872, "learning_rate": 1.8876829268292683e-05, "loss": 0.1561, "step": 13815 }, { "epoch": 0.16853658536585367, "grad_norm": 1.849419355392456, "learning_rate": 1.8876422764227645e-05, "loss": 0.1666, "step": 13820 }, { "epoch": 0.16859756097560977, "grad_norm": 0.802527129650116, "learning_rate": 1.8876016260162603e-05, "loss": 0.1511, "step": 13825 }, { "epoch": 0.16865853658536586, "grad_norm": 0.6862055063247681, "learning_rate": 1.887560975609756e-05, "loss": 0.1005, "step": 13830 }, { "epoch": 0.16871951219512196, "grad_norm": 0.6590109467506409, "learning_rate": 1.8875203252032522e-05, "loss": 0.1034, "step": 13835 }, { "epoch": 0.16878048780487806, "grad_norm": 1.1693536043167114, "learning_rate": 1.887479674796748e-05, "loss": 0.1506, "step": 13840 }, { "epoch": 0.16884146341463416, "grad_norm": 0.672467827796936, "learning_rate": 1.8874390243902442e-05, "loss": 0.1113, "step": 13845 }, { "epoch": 0.16890243902439026, "grad_norm": 1.652408242225647, "learning_rate": 1.88739837398374e-05, "loss": 0.1524, "step": 13850 }, { "epoch": 0.16896341463414635, "grad_norm": 1.3653433322906494, "learning_rate": 1.887357723577236e-05, "loss": 0.1986, "step": 13855 }, { "epoch": 0.16902439024390245, "grad_norm": 0.834989607334137, "learning_rate": 1.8873170731707316e-05, "loss": 0.1078, "step": 13860 }, { "epoch": 0.16908536585365855, "grad_norm": 0.8021888732910156, "learning_rate": 1.8872764227642278e-05, "loss": 0.1669, "step": 13865 }, { "epoch": 0.16914634146341465, "grad_norm": 0.7659728527069092, "learning_rate": 1.8872357723577236e-05, "loss": 0.1229, "step": 13870 }, { "epoch": 0.16920731707317074, "grad_norm": 2.5701487064361572, "learning_rate": 1.8871951219512198e-05, "loss": 0.1989, "step": 13875 }, { "epoch": 0.16926829268292684, "grad_norm": 1.0379847288131714, "learning_rate": 1.8871544715447156e-05, "loss": 0.1595, "step": 13880 }, { "epoch": 0.16932926829268294, "grad_norm": 0.9557657837867737, "learning_rate": 1.8871138211382117e-05, "loss": 0.1279, "step": 13885 }, { "epoch": 0.16939024390243904, "grad_norm": 0.6096798777580261, "learning_rate": 1.8870731707317075e-05, "loss": 0.1523, "step": 13890 }, { "epoch": 0.16945121951219513, "grad_norm": 1.1629571914672852, "learning_rate": 1.8870325203252033e-05, "loss": 0.1409, "step": 13895 }, { "epoch": 0.16951219512195123, "grad_norm": 1.1205164194107056, "learning_rate": 1.886991869918699e-05, "loss": 0.1532, "step": 13900 }, { "epoch": 0.16957317073170733, "grad_norm": 0.8876596093177795, "learning_rate": 1.8869512195121953e-05, "loss": 0.1289, "step": 13905 }, { "epoch": 0.16963414634146343, "grad_norm": 1.1002647876739502, "learning_rate": 1.886910569105691e-05, "loss": 0.1435, "step": 13910 }, { "epoch": 0.16969512195121952, "grad_norm": 1.0666412115097046, "learning_rate": 1.8868699186991873e-05, "loss": 0.1547, "step": 13915 }, { "epoch": 0.16975609756097562, "grad_norm": 1.0355150699615479, "learning_rate": 1.886829268292683e-05, "loss": 0.1465, "step": 13920 }, { "epoch": 0.16981707317073172, "grad_norm": 1.322729468345642, "learning_rate": 1.886788617886179e-05, "loss": 0.2268, "step": 13925 }, { "epoch": 0.16987804878048782, "grad_norm": 0.7735452055931091, "learning_rate": 1.886747967479675e-05, "loss": 0.1403, "step": 13930 }, { "epoch": 0.16993902439024391, "grad_norm": 0.8941671252250671, "learning_rate": 1.886707317073171e-05, "loss": 0.0805, "step": 13935 }, { "epoch": 0.17, "grad_norm": 0.8769257068634033, "learning_rate": 1.886666666666667e-05, "loss": 0.1419, "step": 13940 }, { "epoch": 0.1700609756097561, "grad_norm": 1.300282597541809, "learning_rate": 1.8866260162601628e-05, "loss": 0.137, "step": 13945 }, { "epoch": 0.1701219512195122, "grad_norm": 1.00278902053833, "learning_rate": 1.8865853658536586e-05, "loss": 0.1289, "step": 13950 }, { "epoch": 0.1701829268292683, "grad_norm": 1.2032504081726074, "learning_rate": 1.8865447154471545e-05, "loss": 0.1447, "step": 13955 }, { "epoch": 0.1702439024390244, "grad_norm": 0.8293974995613098, "learning_rate": 1.8865040650406506e-05, "loss": 0.1303, "step": 13960 }, { "epoch": 0.1703048780487805, "grad_norm": 1.0612218379974365, "learning_rate": 1.8864634146341464e-05, "loss": 0.1183, "step": 13965 }, { "epoch": 0.1703658536585366, "grad_norm": 1.3057948350906372, "learning_rate": 1.8864227642276426e-05, "loss": 0.1753, "step": 13970 }, { "epoch": 0.1704268292682927, "grad_norm": 0.927384614944458, "learning_rate": 1.8863821138211384e-05, "loss": 0.1496, "step": 13975 }, { "epoch": 0.1704878048780488, "grad_norm": 0.8540711998939514, "learning_rate": 1.8863414634146342e-05, "loss": 0.1409, "step": 13980 }, { "epoch": 0.1705487804878049, "grad_norm": 0.7390502095222473, "learning_rate": 1.88630081300813e-05, "loss": 0.1638, "step": 13985 }, { "epoch": 0.170609756097561, "grad_norm": 0.6432368159294128, "learning_rate": 1.886260162601626e-05, "loss": 0.1273, "step": 13990 }, { "epoch": 0.17067073170731709, "grad_norm": 3.1724040508270264, "learning_rate": 1.886219512195122e-05, "loss": 0.1358, "step": 13995 }, { "epoch": 0.17073170731707318, "grad_norm": 1.5374784469604492, "learning_rate": 1.886178861788618e-05, "loss": 0.1497, "step": 14000 }, { "epoch": 0.17079268292682928, "grad_norm": 0.5979413390159607, "learning_rate": 1.886138211382114e-05, "loss": 0.2063, "step": 14005 }, { "epoch": 0.17085365853658538, "grad_norm": 0.5081450939178467, "learning_rate": 1.8860975609756097e-05, "loss": 0.1454, "step": 14010 }, { "epoch": 0.17091463414634148, "grad_norm": 1.253543496131897, "learning_rate": 1.886056910569106e-05, "loss": 0.1671, "step": 14015 }, { "epoch": 0.17097560975609757, "grad_norm": 0.8693462014198303, "learning_rate": 1.8860162601626017e-05, "loss": 0.1436, "step": 14020 }, { "epoch": 0.17103658536585367, "grad_norm": 1.5608752965927124, "learning_rate": 1.885975609756098e-05, "loss": 0.189, "step": 14025 }, { "epoch": 0.17109756097560977, "grad_norm": 1.0514172315597534, "learning_rate": 1.8859349593495937e-05, "loss": 0.1337, "step": 14030 }, { "epoch": 0.17115853658536587, "grad_norm": 0.7580191493034363, "learning_rate": 1.8858943089430898e-05, "loss": 0.1263, "step": 14035 }, { "epoch": 0.17121951219512196, "grad_norm": 1.2715116739273071, "learning_rate": 1.8858536585365853e-05, "loss": 0.1381, "step": 14040 }, { "epoch": 0.17128048780487806, "grad_norm": 1.579687476158142, "learning_rate": 1.8858130081300815e-05, "loss": 0.1692, "step": 14045 }, { "epoch": 0.17134146341463416, "grad_norm": 1.0878825187683105, "learning_rate": 1.8857723577235773e-05, "loss": 0.1579, "step": 14050 }, { "epoch": 0.17140243902439026, "grad_norm": 0.8717736005783081, "learning_rate": 1.8857317073170734e-05, "loss": 0.1178, "step": 14055 }, { "epoch": 0.17146341463414635, "grad_norm": 0.6724019050598145, "learning_rate": 1.8856910569105692e-05, "loss": 0.1158, "step": 14060 }, { "epoch": 0.17152439024390245, "grad_norm": 1.1903924942016602, "learning_rate": 1.8856504065040654e-05, "loss": 0.1615, "step": 14065 }, { "epoch": 0.17158536585365855, "grad_norm": 0.6354913115501404, "learning_rate": 1.8856097560975612e-05, "loss": 0.0979, "step": 14070 }, { "epoch": 0.17164634146341465, "grad_norm": 7.729087829589844, "learning_rate": 1.885569105691057e-05, "loss": 0.1617, "step": 14075 }, { "epoch": 0.17170731707317075, "grad_norm": 0.9992459416389465, "learning_rate": 1.8855284552845528e-05, "loss": 0.1573, "step": 14080 }, { "epoch": 0.17176829268292684, "grad_norm": 0.886072039604187, "learning_rate": 1.885487804878049e-05, "loss": 0.1371, "step": 14085 }, { "epoch": 0.17182926829268294, "grad_norm": 0.8676250576972961, "learning_rate": 1.8854471544715448e-05, "loss": 0.1212, "step": 14090 }, { "epoch": 0.171890243902439, "grad_norm": 0.4249236583709717, "learning_rate": 1.885406504065041e-05, "loss": 0.1145, "step": 14095 }, { "epoch": 0.1719512195121951, "grad_norm": 1.4156153202056885, "learning_rate": 1.8853658536585367e-05, "loss": 0.1315, "step": 14100 }, { "epoch": 0.1720121951219512, "grad_norm": 1.1268824338912964, "learning_rate": 1.8853252032520326e-05, "loss": 0.1399, "step": 14105 }, { "epoch": 0.1720731707317073, "grad_norm": 0.9019572734832764, "learning_rate": 1.8852845528455287e-05, "loss": 0.1235, "step": 14110 }, { "epoch": 0.1721341463414634, "grad_norm": 3.0698654651641846, "learning_rate": 1.8852439024390245e-05, "loss": 0.1517, "step": 14115 }, { "epoch": 0.1721951219512195, "grad_norm": 0.42393267154693604, "learning_rate": 1.8852032520325207e-05, "loss": 0.1388, "step": 14120 }, { "epoch": 0.1722560975609756, "grad_norm": 0.7064971923828125, "learning_rate": 1.8851626016260165e-05, "loss": 0.1256, "step": 14125 }, { "epoch": 0.1723170731707317, "grad_norm": 0.8340691924095154, "learning_rate": 1.8851219512195123e-05, "loss": 0.1266, "step": 14130 }, { "epoch": 0.1723780487804878, "grad_norm": 1.8672313690185547, "learning_rate": 1.885081300813008e-05, "loss": 0.1501, "step": 14135 }, { "epoch": 0.1724390243902439, "grad_norm": 1.102048397064209, "learning_rate": 1.8850406504065043e-05, "loss": 0.1236, "step": 14140 }, { "epoch": 0.1725, "grad_norm": 0.6877532601356506, "learning_rate": 1.885e-05, "loss": 0.0986, "step": 14145 }, { "epoch": 0.17256097560975608, "grad_norm": 0.6839805841445923, "learning_rate": 1.8849593495934962e-05, "loss": 0.1319, "step": 14150 }, { "epoch": 0.17262195121951218, "grad_norm": 1.1029503345489502, "learning_rate": 1.884918699186992e-05, "loss": 0.1045, "step": 14155 }, { "epoch": 0.17268292682926828, "grad_norm": 2.763101100921631, "learning_rate": 1.884878048780488e-05, "loss": 0.1284, "step": 14160 }, { "epoch": 0.17274390243902438, "grad_norm": 0.8431391716003418, "learning_rate": 1.8848373983739837e-05, "loss": 0.1274, "step": 14165 }, { "epoch": 0.17280487804878047, "grad_norm": 0.8670928478240967, "learning_rate": 1.8847967479674798e-05, "loss": 0.0963, "step": 14170 }, { "epoch": 0.17286585365853657, "grad_norm": 1.316732406616211, "learning_rate": 1.8847560975609756e-05, "loss": 0.1339, "step": 14175 }, { "epoch": 0.17292682926829267, "grad_norm": 1.159352421760559, "learning_rate": 1.8847154471544718e-05, "loss": 0.1264, "step": 14180 }, { "epoch": 0.17298780487804877, "grad_norm": 1.2489068508148193, "learning_rate": 1.8846747967479676e-05, "loss": 0.1015, "step": 14185 }, { "epoch": 0.17304878048780487, "grad_norm": 0.5851702690124512, "learning_rate": 1.8846341463414634e-05, "loss": 0.1428, "step": 14190 }, { "epoch": 0.17310975609756096, "grad_norm": 0.8501953482627869, "learning_rate": 1.8845934959349596e-05, "loss": 0.1942, "step": 14195 }, { "epoch": 0.17317073170731706, "grad_norm": 0.8492070436477661, "learning_rate": 1.8845528455284554e-05, "loss": 0.1395, "step": 14200 }, { "epoch": 0.17323170731707316, "grad_norm": 0.6069334745407104, "learning_rate": 1.8845121951219515e-05, "loss": 0.1676, "step": 14205 }, { "epoch": 0.17329268292682926, "grad_norm": 1.5087714195251465, "learning_rate": 1.8844715447154473e-05, "loss": 0.1768, "step": 14210 }, { "epoch": 0.17335365853658535, "grad_norm": 0.6729774475097656, "learning_rate": 1.8844308943089435e-05, "loss": 0.1612, "step": 14215 }, { "epoch": 0.17341463414634145, "grad_norm": 0.9930446147918701, "learning_rate": 1.884390243902439e-05, "loss": 0.1467, "step": 14220 }, { "epoch": 0.17347560975609755, "grad_norm": 0.7197012901306152, "learning_rate": 1.884349593495935e-05, "loss": 0.1042, "step": 14225 }, { "epoch": 0.17353658536585365, "grad_norm": 1.0120811462402344, "learning_rate": 1.884308943089431e-05, "loss": 0.1592, "step": 14230 }, { "epoch": 0.17359756097560974, "grad_norm": 0.720747709274292, "learning_rate": 1.884268292682927e-05, "loss": 0.1145, "step": 14235 }, { "epoch": 0.17365853658536584, "grad_norm": 1.1131653785705566, "learning_rate": 1.884227642276423e-05, "loss": 0.1684, "step": 14240 }, { "epoch": 0.17371951219512194, "grad_norm": 1.030226707458496, "learning_rate": 1.884186991869919e-05, "loss": 0.1254, "step": 14245 }, { "epoch": 0.17378048780487804, "grad_norm": 3.770709753036499, "learning_rate": 1.8841463414634145e-05, "loss": 0.1261, "step": 14250 }, { "epoch": 0.17384146341463413, "grad_norm": 0.7765990495681763, "learning_rate": 1.8841056910569107e-05, "loss": 0.1365, "step": 14255 }, { "epoch": 0.17390243902439023, "grad_norm": 1.6066629886627197, "learning_rate": 1.8840650406504065e-05, "loss": 0.1365, "step": 14260 }, { "epoch": 0.17396341463414633, "grad_norm": 0.8521572351455688, "learning_rate": 1.8840243902439026e-05, "loss": 0.176, "step": 14265 }, { "epoch": 0.17402439024390243, "grad_norm": 0.6557872295379639, "learning_rate": 1.8839837398373984e-05, "loss": 0.1391, "step": 14270 }, { "epoch": 0.17408536585365852, "grad_norm": 0.5583435893058777, "learning_rate": 1.8839430894308946e-05, "loss": 0.1067, "step": 14275 }, { "epoch": 0.17414634146341462, "grad_norm": 0.835393488407135, "learning_rate": 1.8839024390243904e-05, "loss": 0.1502, "step": 14280 }, { "epoch": 0.17420731707317072, "grad_norm": 0.9457806944847107, "learning_rate": 1.8838617886178862e-05, "loss": 0.118, "step": 14285 }, { "epoch": 0.17426829268292682, "grad_norm": 0.42813023924827576, "learning_rate": 1.8838211382113824e-05, "loss": 0.1345, "step": 14290 }, { "epoch": 0.17432926829268292, "grad_norm": 0.4336608946323395, "learning_rate": 1.8837804878048782e-05, "loss": 0.0854, "step": 14295 }, { "epoch": 0.174390243902439, "grad_norm": 0.7442188858985901, "learning_rate": 1.8837398373983743e-05, "loss": 0.172, "step": 14300 }, { "epoch": 0.1744512195121951, "grad_norm": 0.8138467073440552, "learning_rate": 1.88369918699187e-05, "loss": 0.1195, "step": 14305 }, { "epoch": 0.1745121951219512, "grad_norm": 0.611066460609436, "learning_rate": 1.883658536585366e-05, "loss": 0.1345, "step": 14310 }, { "epoch": 0.1745731707317073, "grad_norm": 1.0051079988479614, "learning_rate": 1.8836178861788618e-05, "loss": 0.167, "step": 14315 }, { "epoch": 0.1746341463414634, "grad_norm": 1.8122507333755493, "learning_rate": 1.883577235772358e-05, "loss": 0.1366, "step": 14320 }, { "epoch": 0.1746951219512195, "grad_norm": 1.1991358995437622, "learning_rate": 1.8835365853658537e-05, "loss": 0.1466, "step": 14325 }, { "epoch": 0.1747560975609756, "grad_norm": 0.7629988789558411, "learning_rate": 1.88349593495935e-05, "loss": 0.1354, "step": 14330 }, { "epoch": 0.1748170731707317, "grad_norm": 0.8688741326332092, "learning_rate": 1.8834552845528457e-05, "loss": 0.1172, "step": 14335 }, { "epoch": 0.1748780487804878, "grad_norm": 1.5124703645706177, "learning_rate": 1.8834146341463415e-05, "loss": 0.1437, "step": 14340 }, { "epoch": 0.1749390243902439, "grad_norm": 3.200800657272339, "learning_rate": 1.8833739837398373e-05, "loss": 0.1763, "step": 14345 }, { "epoch": 0.175, "grad_norm": 0.8252606391906738, "learning_rate": 1.8833333333333335e-05, "loss": 0.0956, "step": 14350 }, { "epoch": 0.1750609756097561, "grad_norm": 1.9907158613204956, "learning_rate": 1.8832926829268293e-05, "loss": 0.2134, "step": 14355 }, { "epoch": 0.17512195121951218, "grad_norm": 1.172268033027649, "learning_rate": 1.8832520325203254e-05, "loss": 0.2139, "step": 14360 }, { "epoch": 0.17518292682926828, "grad_norm": 1.3111282587051392, "learning_rate": 1.8832113821138213e-05, "loss": 0.1729, "step": 14365 }, { "epoch": 0.17524390243902438, "grad_norm": 1.1992229223251343, "learning_rate": 1.883170731707317e-05, "loss": 0.1567, "step": 14370 }, { "epoch": 0.17530487804878048, "grad_norm": 0.9355487823486328, "learning_rate": 1.8831300813008132e-05, "loss": 0.1573, "step": 14375 }, { "epoch": 0.17536585365853657, "grad_norm": 0.6994307041168213, "learning_rate": 1.883089430894309e-05, "loss": 0.1633, "step": 14380 }, { "epoch": 0.17542682926829267, "grad_norm": 1.1075712442398071, "learning_rate": 1.8830487804878052e-05, "loss": 0.1819, "step": 14385 }, { "epoch": 0.17548780487804877, "grad_norm": 0.7059945464134216, "learning_rate": 1.883008130081301e-05, "loss": 0.1378, "step": 14390 }, { "epoch": 0.17554878048780487, "grad_norm": 0.6016274690628052, "learning_rate": 1.8829674796747968e-05, "loss": 0.0852, "step": 14395 }, { "epoch": 0.17560975609756097, "grad_norm": 0.691436231136322, "learning_rate": 1.8829268292682926e-05, "loss": 0.1569, "step": 14400 }, { "epoch": 0.17567073170731706, "grad_norm": 0.7235100865364075, "learning_rate": 1.8828861788617888e-05, "loss": 0.0902, "step": 14405 }, { "epoch": 0.17573170731707316, "grad_norm": 0.7755398154258728, "learning_rate": 1.8828455284552846e-05, "loss": 0.1034, "step": 14410 }, { "epoch": 0.17579268292682926, "grad_norm": 0.7274044156074524, "learning_rate": 1.8828048780487807e-05, "loss": 0.1266, "step": 14415 }, { "epoch": 0.17585365853658536, "grad_norm": 1.4244658946990967, "learning_rate": 1.8827642276422766e-05, "loss": 0.1799, "step": 14420 }, { "epoch": 0.17591463414634145, "grad_norm": 0.7825751304626465, "learning_rate": 1.8827235772357727e-05, "loss": 0.1741, "step": 14425 }, { "epoch": 0.17597560975609755, "grad_norm": 6.215595722198486, "learning_rate": 1.8826829268292682e-05, "loss": 0.1704, "step": 14430 }, { "epoch": 0.17603658536585365, "grad_norm": 1.0345351696014404, "learning_rate": 1.8826422764227643e-05, "loss": 0.1538, "step": 14435 }, { "epoch": 0.17609756097560975, "grad_norm": 1.4530701637268066, "learning_rate": 1.88260162601626e-05, "loss": 0.1612, "step": 14440 }, { "epoch": 0.17615853658536584, "grad_norm": 0.8144020438194275, "learning_rate": 1.8825609756097563e-05, "loss": 0.1279, "step": 14445 }, { "epoch": 0.17621951219512194, "grad_norm": 0.8939836025238037, "learning_rate": 1.882520325203252e-05, "loss": 0.1469, "step": 14450 }, { "epoch": 0.17628048780487804, "grad_norm": 0.7356492280960083, "learning_rate": 1.8824796747967483e-05, "loss": 0.1268, "step": 14455 }, { "epoch": 0.17634146341463414, "grad_norm": 1.1114063262939453, "learning_rate": 1.882439024390244e-05, "loss": 0.1253, "step": 14460 }, { "epoch": 0.17640243902439023, "grad_norm": 0.9673697352409363, "learning_rate": 1.88239837398374e-05, "loss": 0.1239, "step": 14465 }, { "epoch": 0.17646341463414633, "grad_norm": 0.6595041155815125, "learning_rate": 1.882357723577236e-05, "loss": 0.1273, "step": 14470 }, { "epoch": 0.17652439024390243, "grad_norm": 0.8127418756484985, "learning_rate": 1.882317073170732e-05, "loss": 0.1703, "step": 14475 }, { "epoch": 0.17658536585365853, "grad_norm": 0.6464545130729675, "learning_rate": 1.882276422764228e-05, "loss": 0.1134, "step": 14480 }, { "epoch": 0.17664634146341462, "grad_norm": 1.8251999616622925, "learning_rate": 1.8822357723577238e-05, "loss": 0.1304, "step": 14485 }, { "epoch": 0.17670731707317072, "grad_norm": 1.0903087854385376, "learning_rate": 1.8821951219512196e-05, "loss": 0.1095, "step": 14490 }, { "epoch": 0.17676829268292682, "grad_norm": 1.1356490850448608, "learning_rate": 1.8821544715447154e-05, "loss": 0.1352, "step": 14495 }, { "epoch": 0.17682926829268292, "grad_norm": 0.8971841335296631, "learning_rate": 1.8821138211382116e-05, "loss": 0.1042, "step": 14500 }, { "epoch": 0.17689024390243901, "grad_norm": 1.4333206415176392, "learning_rate": 1.8820731707317074e-05, "loss": 0.108, "step": 14505 }, { "epoch": 0.1769512195121951, "grad_norm": 1.3873419761657715, "learning_rate": 1.8820325203252036e-05, "loss": 0.1508, "step": 14510 }, { "epoch": 0.1770121951219512, "grad_norm": 0.7004482746124268, "learning_rate": 1.8819918699186994e-05, "loss": 0.1199, "step": 14515 }, { "epoch": 0.1770731707317073, "grad_norm": 0.5536118745803833, "learning_rate": 1.8819512195121952e-05, "loss": 0.1336, "step": 14520 }, { "epoch": 0.1771341463414634, "grad_norm": 1.7981619834899902, "learning_rate": 1.881910569105691e-05, "loss": 0.1087, "step": 14525 }, { "epoch": 0.1771951219512195, "grad_norm": 2.0377156734466553, "learning_rate": 1.881869918699187e-05, "loss": 0.1121, "step": 14530 }, { "epoch": 0.1772560975609756, "grad_norm": 0.5908858180046082, "learning_rate": 1.881829268292683e-05, "loss": 0.1333, "step": 14535 }, { "epoch": 0.1773170731707317, "grad_norm": 1.2487646341323853, "learning_rate": 1.881788617886179e-05, "loss": 0.1388, "step": 14540 }, { "epoch": 0.1773780487804878, "grad_norm": 1.1818890571594238, "learning_rate": 1.881747967479675e-05, "loss": 0.1381, "step": 14545 }, { "epoch": 0.1774390243902439, "grad_norm": 1.0577996969223022, "learning_rate": 1.8817073170731707e-05, "loss": 0.1392, "step": 14550 }, { "epoch": 0.1775, "grad_norm": 0.9614656567573547, "learning_rate": 1.881666666666667e-05, "loss": 0.1241, "step": 14555 }, { "epoch": 0.1775609756097561, "grad_norm": 3.396732807159424, "learning_rate": 1.8816260162601627e-05, "loss": 0.0883, "step": 14560 }, { "epoch": 0.1776219512195122, "grad_norm": 0.8860374689102173, "learning_rate": 1.881585365853659e-05, "loss": 0.0889, "step": 14565 }, { "epoch": 0.17768292682926828, "grad_norm": 0.7848336100578308, "learning_rate": 1.8815447154471547e-05, "loss": 0.11, "step": 14570 }, { "epoch": 0.17774390243902438, "grad_norm": 0.9687950611114502, "learning_rate": 1.8815040650406505e-05, "loss": 0.0954, "step": 14575 }, { "epoch": 0.17780487804878048, "grad_norm": 0.7657148838043213, "learning_rate": 1.8814634146341463e-05, "loss": 0.1498, "step": 14580 }, { "epoch": 0.17786585365853658, "grad_norm": 0.8624061346054077, "learning_rate": 1.8814227642276424e-05, "loss": 0.1024, "step": 14585 }, { "epoch": 0.17792682926829267, "grad_norm": 0.9492311477661133, "learning_rate": 1.8813821138211383e-05, "loss": 0.1328, "step": 14590 }, { "epoch": 0.17798780487804877, "grad_norm": 0.865575909614563, "learning_rate": 1.8813414634146344e-05, "loss": 0.1515, "step": 14595 }, { "epoch": 0.17804878048780487, "grad_norm": 1.2329808473587036, "learning_rate": 1.8813008130081302e-05, "loss": 0.1828, "step": 14600 }, { "epoch": 0.17810975609756097, "grad_norm": 2.9033284187316895, "learning_rate": 1.8812601626016264e-05, "loss": 0.1461, "step": 14605 }, { "epoch": 0.17817073170731706, "grad_norm": 0.909584641456604, "learning_rate": 1.881219512195122e-05, "loss": 0.1616, "step": 14610 }, { "epoch": 0.17823170731707316, "grad_norm": 0.8130044937133789, "learning_rate": 1.881178861788618e-05, "loss": 0.1137, "step": 14615 }, { "epoch": 0.17829268292682926, "grad_norm": 1.6365233659744263, "learning_rate": 1.8811382113821138e-05, "loss": 0.1516, "step": 14620 }, { "epoch": 0.17835365853658536, "grad_norm": 0.8075518012046814, "learning_rate": 1.88109756097561e-05, "loss": 0.1144, "step": 14625 }, { "epoch": 0.17841463414634146, "grad_norm": 1.192846417427063, "learning_rate": 1.8810569105691058e-05, "loss": 0.1357, "step": 14630 }, { "epoch": 0.17847560975609755, "grad_norm": 0.7097212672233582, "learning_rate": 1.881016260162602e-05, "loss": 0.1416, "step": 14635 }, { "epoch": 0.17853658536585365, "grad_norm": 0.770262598991394, "learning_rate": 1.8809756097560977e-05, "loss": 0.1081, "step": 14640 }, { "epoch": 0.17859756097560975, "grad_norm": 1.2121307849884033, "learning_rate": 1.8809349593495936e-05, "loss": 0.1079, "step": 14645 }, { "epoch": 0.17865853658536585, "grad_norm": 2.2392852306365967, "learning_rate": 1.8808943089430897e-05, "loss": 0.1287, "step": 14650 }, { "epoch": 0.17871951219512194, "grad_norm": 0.6112468242645264, "learning_rate": 1.8808536585365855e-05, "loss": 0.112, "step": 14655 }, { "epoch": 0.17878048780487804, "grad_norm": 1.2916276454925537, "learning_rate": 1.8808130081300813e-05, "loss": 0.1393, "step": 14660 }, { "epoch": 0.17884146341463414, "grad_norm": 0.9855399131774902, "learning_rate": 1.8807723577235775e-05, "loss": 0.1214, "step": 14665 }, { "epoch": 0.17890243902439024, "grad_norm": 2.7414231300354004, "learning_rate": 1.8807317073170733e-05, "loss": 0.1198, "step": 14670 }, { "epoch": 0.17896341463414633, "grad_norm": 0.822084367275238, "learning_rate": 1.880691056910569e-05, "loss": 0.142, "step": 14675 }, { "epoch": 0.17902439024390243, "grad_norm": 1.3981467485427856, "learning_rate": 1.8806504065040653e-05, "loss": 0.2025, "step": 14680 }, { "epoch": 0.17908536585365853, "grad_norm": 1.0892488956451416, "learning_rate": 1.880609756097561e-05, "loss": 0.1407, "step": 14685 }, { "epoch": 0.17914634146341463, "grad_norm": 1.2538206577301025, "learning_rate": 1.8805691056910572e-05, "loss": 0.1311, "step": 14690 }, { "epoch": 0.17920731707317072, "grad_norm": 0.7349885106086731, "learning_rate": 1.880528455284553e-05, "loss": 0.1095, "step": 14695 }, { "epoch": 0.17926829268292682, "grad_norm": 1.157149314880371, "learning_rate": 1.880487804878049e-05, "loss": 0.1475, "step": 14700 }, { "epoch": 0.17932926829268292, "grad_norm": 1.392356514930725, "learning_rate": 1.8804471544715447e-05, "loss": 0.1396, "step": 14705 }, { "epoch": 0.17939024390243902, "grad_norm": 0.950852632522583, "learning_rate": 1.8804065040650408e-05, "loss": 0.1097, "step": 14710 }, { "epoch": 0.17945121951219511, "grad_norm": 0.7633503675460815, "learning_rate": 1.8803658536585366e-05, "loss": 0.1057, "step": 14715 }, { "epoch": 0.1795121951219512, "grad_norm": 0.9278164505958557, "learning_rate": 1.8803252032520328e-05, "loss": 0.1334, "step": 14720 }, { "epoch": 0.1795731707317073, "grad_norm": 1.3775001764297485, "learning_rate": 1.8802845528455286e-05, "loss": 0.1324, "step": 14725 }, { "epoch": 0.1796341463414634, "grad_norm": 1.9368165731430054, "learning_rate": 1.8802439024390244e-05, "loss": 0.1563, "step": 14730 }, { "epoch": 0.1796951219512195, "grad_norm": 1.1053531169891357, "learning_rate": 1.8802032520325206e-05, "loss": 0.1287, "step": 14735 }, { "epoch": 0.1797560975609756, "grad_norm": 0.5433680415153503, "learning_rate": 1.8801626016260164e-05, "loss": 0.1475, "step": 14740 }, { "epoch": 0.1798170731707317, "grad_norm": 0.6996515989303589, "learning_rate": 1.8801219512195125e-05, "loss": 0.1551, "step": 14745 }, { "epoch": 0.1798780487804878, "grad_norm": 0.8218307495117188, "learning_rate": 1.8800813008130083e-05, "loss": 0.1335, "step": 14750 }, { "epoch": 0.1799390243902439, "grad_norm": 0.6494511365890503, "learning_rate": 1.880040650406504e-05, "loss": 0.1213, "step": 14755 }, { "epoch": 0.18, "grad_norm": 1.4948740005493164, "learning_rate": 1.88e-05, "loss": 0.1304, "step": 14760 }, { "epoch": 0.1800609756097561, "grad_norm": 0.901662290096283, "learning_rate": 1.879959349593496e-05, "loss": 0.1159, "step": 14765 }, { "epoch": 0.1801219512195122, "grad_norm": 0.8233628869056702, "learning_rate": 1.879918699186992e-05, "loss": 0.1109, "step": 14770 }, { "epoch": 0.1801829268292683, "grad_norm": 1.2013977766036987, "learning_rate": 1.879878048780488e-05, "loss": 0.1152, "step": 14775 }, { "epoch": 0.18024390243902438, "grad_norm": 1.18537437915802, "learning_rate": 1.879837398373984e-05, "loss": 0.1396, "step": 14780 }, { "epoch": 0.18030487804878048, "grad_norm": 1.5495400428771973, "learning_rate": 1.87979674796748e-05, "loss": 0.1506, "step": 14785 }, { "epoch": 0.18036585365853658, "grad_norm": 0.8078082203865051, "learning_rate": 1.8797560975609755e-05, "loss": 0.0961, "step": 14790 }, { "epoch": 0.18042682926829268, "grad_norm": 1.285171389579773, "learning_rate": 1.8797154471544717e-05, "loss": 0.1544, "step": 14795 }, { "epoch": 0.18048780487804877, "grad_norm": 0.9159824252128601, "learning_rate": 1.8796747967479675e-05, "loss": 0.1369, "step": 14800 }, { "epoch": 0.18054878048780487, "grad_norm": 0.9149298667907715, "learning_rate": 1.8796341463414636e-05, "loss": 0.1468, "step": 14805 }, { "epoch": 0.18060975609756097, "grad_norm": 1.206947684288025, "learning_rate": 1.8795934959349594e-05, "loss": 0.1174, "step": 14810 }, { "epoch": 0.18067073170731707, "grad_norm": 1.1208999156951904, "learning_rate": 1.8795528455284556e-05, "loss": 0.1185, "step": 14815 }, { "epoch": 0.18073170731707316, "grad_norm": 1.0640835762023926, "learning_rate": 1.8795121951219514e-05, "loss": 0.1214, "step": 14820 }, { "epoch": 0.18079268292682926, "grad_norm": 0.4410882592201233, "learning_rate": 1.8794715447154472e-05, "loss": 0.1118, "step": 14825 }, { "epoch": 0.18085365853658536, "grad_norm": 0.9419755935668945, "learning_rate": 1.8794308943089434e-05, "loss": 0.1205, "step": 14830 }, { "epoch": 0.18091463414634146, "grad_norm": 0.6879866123199463, "learning_rate": 1.8793902439024392e-05, "loss": 0.1365, "step": 14835 }, { "epoch": 0.18097560975609756, "grad_norm": 1.3252363204956055, "learning_rate": 1.879349593495935e-05, "loss": 0.1536, "step": 14840 }, { "epoch": 0.18103658536585365, "grad_norm": 1.6411316394805908, "learning_rate": 1.879308943089431e-05, "loss": 0.1397, "step": 14845 }, { "epoch": 0.18109756097560975, "grad_norm": 0.6786336898803711, "learning_rate": 1.879268292682927e-05, "loss": 0.1068, "step": 14850 }, { "epoch": 0.18115853658536585, "grad_norm": 1.0978389978408813, "learning_rate": 1.8792276422764228e-05, "loss": 0.1161, "step": 14855 }, { "epoch": 0.18121951219512195, "grad_norm": 0.9339907765388489, "learning_rate": 1.879186991869919e-05, "loss": 0.1019, "step": 14860 }, { "epoch": 0.18128048780487804, "grad_norm": 0.7852123379707336, "learning_rate": 1.8791463414634147e-05, "loss": 0.1128, "step": 14865 }, { "epoch": 0.18134146341463414, "grad_norm": 1.1728109121322632, "learning_rate": 1.879105691056911e-05, "loss": 0.0953, "step": 14870 }, { "epoch": 0.18140243902439024, "grad_norm": 0.6432397961616516, "learning_rate": 1.8790650406504067e-05, "loss": 0.1131, "step": 14875 }, { "epoch": 0.18146341463414634, "grad_norm": 0.609931468963623, "learning_rate": 1.8790243902439025e-05, "loss": 0.1416, "step": 14880 }, { "epoch": 0.18152439024390243, "grad_norm": 0.7717467546463013, "learning_rate": 1.8789837398373983e-05, "loss": 0.0976, "step": 14885 }, { "epoch": 0.18158536585365853, "grad_norm": 1.1397817134857178, "learning_rate": 1.8789430894308945e-05, "loss": 0.1342, "step": 14890 }, { "epoch": 0.18164634146341463, "grad_norm": 1.1715301275253296, "learning_rate": 1.8789024390243903e-05, "loss": 0.1565, "step": 14895 }, { "epoch": 0.18170731707317073, "grad_norm": 1.325592279434204, "learning_rate": 1.8788617886178864e-05, "loss": 0.1155, "step": 14900 }, { "epoch": 0.18176829268292682, "grad_norm": 0.6051596403121948, "learning_rate": 1.8788211382113823e-05, "loss": 0.1057, "step": 14905 }, { "epoch": 0.18182926829268292, "grad_norm": 0.8723073601722717, "learning_rate": 1.878780487804878e-05, "loss": 0.1193, "step": 14910 }, { "epoch": 0.18189024390243902, "grad_norm": 0.7697763442993164, "learning_rate": 1.8787398373983742e-05, "loss": 0.0956, "step": 14915 }, { "epoch": 0.18195121951219512, "grad_norm": 0.8019835352897644, "learning_rate": 1.87869918699187e-05, "loss": 0.1335, "step": 14920 }, { "epoch": 0.18201219512195121, "grad_norm": 0.6953920722007751, "learning_rate": 1.878658536585366e-05, "loss": 0.1755, "step": 14925 }, { "epoch": 0.1820731707317073, "grad_norm": 1.6009865999221802, "learning_rate": 1.878617886178862e-05, "loss": 0.1304, "step": 14930 }, { "epoch": 0.1821341463414634, "grad_norm": 0.9836388826370239, "learning_rate": 1.8785772357723578e-05, "loss": 0.0986, "step": 14935 }, { "epoch": 0.1821951219512195, "grad_norm": 0.9306020140647888, "learning_rate": 1.8785365853658536e-05, "loss": 0.139, "step": 14940 }, { "epoch": 0.1822560975609756, "grad_norm": 0.74815833568573, "learning_rate": 1.8784959349593498e-05, "loss": 0.1352, "step": 14945 }, { "epoch": 0.1823170731707317, "grad_norm": 0.6883743405342102, "learning_rate": 1.8784552845528456e-05, "loss": 0.113, "step": 14950 }, { "epoch": 0.1823780487804878, "grad_norm": 0.5464059710502625, "learning_rate": 1.8784146341463417e-05, "loss": 0.1262, "step": 14955 }, { "epoch": 0.1824390243902439, "grad_norm": 0.9086129665374756, "learning_rate": 1.8783739837398376e-05, "loss": 0.128, "step": 14960 }, { "epoch": 0.1825, "grad_norm": 1.1347429752349854, "learning_rate": 1.8783333333333337e-05, "loss": 0.1057, "step": 14965 }, { "epoch": 0.1825609756097561, "grad_norm": 2.2640655040740967, "learning_rate": 1.8782926829268292e-05, "loss": 0.1175, "step": 14970 }, { "epoch": 0.1826219512195122, "grad_norm": 0.8083893656730652, "learning_rate": 1.8782520325203253e-05, "loss": 0.1164, "step": 14975 }, { "epoch": 0.1826829268292683, "grad_norm": 1.2279672622680664, "learning_rate": 1.878211382113821e-05, "loss": 0.1843, "step": 14980 }, { "epoch": 0.18274390243902439, "grad_norm": 0.72563236951828, "learning_rate": 1.8781707317073173e-05, "loss": 0.166, "step": 14985 }, { "epoch": 0.18280487804878048, "grad_norm": 1.177027702331543, "learning_rate": 1.878130081300813e-05, "loss": 0.1318, "step": 14990 }, { "epoch": 0.18286585365853658, "grad_norm": 0.9002001881599426, "learning_rate": 1.8780894308943093e-05, "loss": 0.1243, "step": 14995 }, { "epoch": 0.18292682926829268, "grad_norm": 1.2173001766204834, "learning_rate": 1.878048780487805e-05, "loss": 0.2172, "step": 15000 }, { "epoch": 0.18298780487804878, "grad_norm": 0.89558345079422, "learning_rate": 1.878008130081301e-05, "loss": 0.1414, "step": 15005 }, { "epoch": 0.18304878048780487, "grad_norm": 1.5990458726882935, "learning_rate": 1.877967479674797e-05, "loss": 0.1219, "step": 15010 }, { "epoch": 0.18310975609756097, "grad_norm": 1.0155404806137085, "learning_rate": 1.877926829268293e-05, "loss": 0.1433, "step": 15015 }, { "epoch": 0.18317073170731707, "grad_norm": 1.1727898120880127, "learning_rate": 1.8778861788617887e-05, "loss": 0.1225, "step": 15020 }, { "epoch": 0.18323170731707317, "grad_norm": 1.7733479738235474, "learning_rate": 1.8778455284552848e-05, "loss": 0.1112, "step": 15025 }, { "epoch": 0.18329268292682926, "grad_norm": 0.8207180500030518, "learning_rate": 1.8778048780487806e-05, "loss": 0.1561, "step": 15030 }, { "epoch": 0.18335365853658536, "grad_norm": 0.7517492175102234, "learning_rate": 1.8777642276422764e-05, "loss": 0.1624, "step": 15035 }, { "epoch": 0.18341463414634146, "grad_norm": 0.9526426196098328, "learning_rate": 1.8777235772357726e-05, "loss": 0.1549, "step": 15040 }, { "epoch": 0.18347560975609756, "grad_norm": 1.3100073337554932, "learning_rate": 1.8776829268292684e-05, "loss": 0.1307, "step": 15045 }, { "epoch": 0.18353658536585366, "grad_norm": 3.679835557937622, "learning_rate": 1.8776422764227646e-05, "loss": 0.1545, "step": 15050 }, { "epoch": 0.18359756097560975, "grad_norm": 0.5920159220695496, "learning_rate": 1.8776016260162604e-05, "loss": 0.1075, "step": 15055 }, { "epoch": 0.18365853658536585, "grad_norm": 1.5667592287063599, "learning_rate": 1.8775609756097562e-05, "loss": 0.1266, "step": 15060 }, { "epoch": 0.18371951219512195, "grad_norm": 0.7846003770828247, "learning_rate": 1.877520325203252e-05, "loss": 0.1125, "step": 15065 }, { "epoch": 0.18378048780487805, "grad_norm": 0.8804566860198975, "learning_rate": 1.877479674796748e-05, "loss": 0.1187, "step": 15070 }, { "epoch": 0.18384146341463414, "grad_norm": 1.132201910018921, "learning_rate": 1.877439024390244e-05, "loss": 0.1711, "step": 15075 }, { "epoch": 0.18390243902439024, "grad_norm": 0.865313708782196, "learning_rate": 1.87739837398374e-05, "loss": 0.1601, "step": 15080 }, { "epoch": 0.18396341463414634, "grad_norm": 0.5345932841300964, "learning_rate": 1.877357723577236e-05, "loss": 0.1184, "step": 15085 }, { "epoch": 0.18402439024390244, "grad_norm": 0.9213650226593018, "learning_rate": 1.8773170731707317e-05, "loss": 0.1435, "step": 15090 }, { "epoch": 0.18408536585365853, "grad_norm": 0.6960973739624023, "learning_rate": 1.877276422764228e-05, "loss": 0.0937, "step": 15095 }, { "epoch": 0.18414634146341463, "grad_norm": 0.5611339211463928, "learning_rate": 1.8772357723577237e-05, "loss": 0.1322, "step": 15100 }, { "epoch": 0.18420731707317073, "grad_norm": 1.2643908262252808, "learning_rate": 1.8771951219512195e-05, "loss": 0.1013, "step": 15105 }, { "epoch": 0.18426829268292683, "grad_norm": 1.1490204334259033, "learning_rate": 1.8771544715447157e-05, "loss": 0.1325, "step": 15110 }, { "epoch": 0.18432926829268292, "grad_norm": 0.4652472734451294, "learning_rate": 1.8771138211382115e-05, "loss": 0.0973, "step": 15115 }, { "epoch": 0.18439024390243902, "grad_norm": 0.9746949076652527, "learning_rate": 1.8770731707317073e-05, "loss": 0.1132, "step": 15120 }, { "epoch": 0.18445121951219512, "grad_norm": 0.4908851683139801, "learning_rate": 1.8770325203252034e-05, "loss": 0.0934, "step": 15125 }, { "epoch": 0.18451219512195122, "grad_norm": 0.9611100554466248, "learning_rate": 1.8769918699186993e-05, "loss": 0.1639, "step": 15130 }, { "epoch": 0.18457317073170731, "grad_norm": 0.4748971462249756, "learning_rate": 1.8769512195121954e-05, "loss": 0.0965, "step": 15135 }, { "epoch": 0.1846341463414634, "grad_norm": 0.7517457604408264, "learning_rate": 1.8769105691056912e-05, "loss": 0.1286, "step": 15140 }, { "epoch": 0.1846951219512195, "grad_norm": 1.0628036260604858, "learning_rate": 1.8768699186991874e-05, "loss": 0.1192, "step": 15145 }, { "epoch": 0.1847560975609756, "grad_norm": 1.2663803100585938, "learning_rate": 1.876829268292683e-05, "loss": 0.1606, "step": 15150 }, { "epoch": 0.1848170731707317, "grad_norm": 0.4455028772354126, "learning_rate": 1.876788617886179e-05, "loss": 0.131, "step": 15155 }, { "epoch": 0.1848780487804878, "grad_norm": 1.3844950199127197, "learning_rate": 1.8767479674796748e-05, "loss": 0.1372, "step": 15160 }, { "epoch": 0.1849390243902439, "grad_norm": 0.6037270426750183, "learning_rate": 1.876707317073171e-05, "loss": 0.1464, "step": 15165 }, { "epoch": 0.185, "grad_norm": 0.5499837398529053, "learning_rate": 1.8766666666666668e-05, "loss": 0.1239, "step": 15170 }, { "epoch": 0.1850609756097561, "grad_norm": 1.56475031375885, "learning_rate": 1.876626016260163e-05, "loss": 0.1062, "step": 15175 }, { "epoch": 0.1851219512195122, "grad_norm": 0.7649907469749451, "learning_rate": 1.8765853658536587e-05, "loss": 0.1137, "step": 15180 }, { "epoch": 0.1851829268292683, "grad_norm": 0.6503553986549377, "learning_rate": 1.8765447154471545e-05, "loss": 0.109, "step": 15185 }, { "epoch": 0.1852439024390244, "grad_norm": 0.850182831287384, "learning_rate": 1.8765040650406504e-05, "loss": 0.1553, "step": 15190 }, { "epoch": 0.18530487804878049, "grad_norm": 1.1230740547180176, "learning_rate": 1.8764634146341465e-05, "loss": 0.1663, "step": 15195 }, { "epoch": 0.18536585365853658, "grad_norm": 0.9971576929092407, "learning_rate": 1.8764227642276423e-05, "loss": 0.1758, "step": 15200 }, { "epoch": 0.18542682926829268, "grad_norm": 0.9780313372612, "learning_rate": 1.8763821138211385e-05, "loss": 0.14, "step": 15205 }, { "epoch": 0.18548780487804878, "grad_norm": 0.881074070930481, "learning_rate": 1.8763414634146343e-05, "loss": 0.1321, "step": 15210 }, { "epoch": 0.18554878048780488, "grad_norm": 0.6680750250816345, "learning_rate": 1.87630081300813e-05, "loss": 0.1305, "step": 15215 }, { "epoch": 0.18560975609756097, "grad_norm": 0.8657398223876953, "learning_rate": 1.8762601626016263e-05, "loss": 0.123, "step": 15220 }, { "epoch": 0.18567073170731707, "grad_norm": 0.8962168097496033, "learning_rate": 1.876219512195122e-05, "loss": 0.1285, "step": 15225 }, { "epoch": 0.18573170731707317, "grad_norm": 0.6683583855628967, "learning_rate": 1.8761788617886182e-05, "loss": 0.1338, "step": 15230 }, { "epoch": 0.18579268292682927, "grad_norm": 2.186574935913086, "learning_rate": 1.876138211382114e-05, "loss": 0.1182, "step": 15235 }, { "epoch": 0.18585365853658536, "grad_norm": 1.1309444904327393, "learning_rate": 1.87609756097561e-05, "loss": 0.1642, "step": 15240 }, { "epoch": 0.18591463414634146, "grad_norm": 1.0992861986160278, "learning_rate": 1.8760569105691057e-05, "loss": 0.145, "step": 15245 }, { "epoch": 0.18597560975609756, "grad_norm": 0.8383403420448303, "learning_rate": 1.8760162601626018e-05, "loss": 0.1272, "step": 15250 }, { "epoch": 0.18603658536585366, "grad_norm": 2.1173269748687744, "learning_rate": 1.8759756097560976e-05, "loss": 0.1447, "step": 15255 }, { "epoch": 0.18609756097560975, "grad_norm": 1.1250395774841309, "learning_rate": 1.8759349593495938e-05, "loss": 0.1483, "step": 15260 }, { "epoch": 0.18615853658536585, "grad_norm": 0.8254222273826599, "learning_rate": 1.8758943089430896e-05, "loss": 0.1248, "step": 15265 }, { "epoch": 0.18621951219512195, "grad_norm": 0.9580786824226379, "learning_rate": 1.8758536585365854e-05, "loss": 0.1754, "step": 15270 }, { "epoch": 0.18628048780487805, "grad_norm": 1.2395068407058716, "learning_rate": 1.8758130081300815e-05, "loss": 0.1382, "step": 15275 }, { "epoch": 0.18634146341463415, "grad_norm": 0.6281396746635437, "learning_rate": 1.8757723577235774e-05, "loss": 0.1779, "step": 15280 }, { "epoch": 0.18640243902439024, "grad_norm": 0.8345173597335815, "learning_rate": 1.8757317073170732e-05, "loss": 0.143, "step": 15285 }, { "epoch": 0.18646341463414634, "grad_norm": 1.1134617328643799, "learning_rate": 1.8756910569105693e-05, "loss": 0.125, "step": 15290 }, { "epoch": 0.18652439024390244, "grad_norm": 0.5292543172836304, "learning_rate": 1.875650406504065e-05, "loss": 0.089, "step": 15295 }, { "epoch": 0.18658536585365854, "grad_norm": 0.7817606925964355, "learning_rate": 1.875609756097561e-05, "loss": 0.1249, "step": 15300 }, { "epoch": 0.18664634146341463, "grad_norm": 0.8578419089317322, "learning_rate": 1.875569105691057e-05, "loss": 0.164, "step": 15305 }, { "epoch": 0.18670731707317073, "grad_norm": 1.1060367822647095, "learning_rate": 1.875528455284553e-05, "loss": 0.126, "step": 15310 }, { "epoch": 0.18676829268292683, "grad_norm": 0.6658641695976257, "learning_rate": 1.875487804878049e-05, "loss": 0.1361, "step": 15315 }, { "epoch": 0.18682926829268293, "grad_norm": 1.4252227544784546, "learning_rate": 1.875447154471545e-05, "loss": 0.1146, "step": 15320 }, { "epoch": 0.18689024390243902, "grad_norm": 1.2821091413497925, "learning_rate": 1.875406504065041e-05, "loss": 0.1463, "step": 15325 }, { "epoch": 0.18695121951219512, "grad_norm": 1.2421197891235352, "learning_rate": 1.8753658536585365e-05, "loss": 0.1181, "step": 15330 }, { "epoch": 0.18701219512195122, "grad_norm": 1.625556468963623, "learning_rate": 1.8753252032520327e-05, "loss": 0.1423, "step": 15335 }, { "epoch": 0.18707317073170732, "grad_norm": 0.6170310974121094, "learning_rate": 1.8752845528455285e-05, "loss": 0.1078, "step": 15340 }, { "epoch": 0.18713414634146341, "grad_norm": 1.0520069599151611, "learning_rate": 1.8752439024390246e-05, "loss": 0.1375, "step": 15345 }, { "epoch": 0.1871951219512195, "grad_norm": 1.0425282716751099, "learning_rate": 1.8752032520325204e-05, "loss": 0.1786, "step": 15350 }, { "epoch": 0.1872560975609756, "grad_norm": 0.9232288599014282, "learning_rate": 1.8751626016260166e-05, "loss": 0.1425, "step": 15355 }, { "epoch": 0.1873170731707317, "grad_norm": 0.6100293397903442, "learning_rate": 1.8751219512195124e-05, "loss": 0.1257, "step": 15360 }, { "epoch": 0.1873780487804878, "grad_norm": 1.1658467054367065, "learning_rate": 1.8750813008130082e-05, "loss": 0.1047, "step": 15365 }, { "epoch": 0.1874390243902439, "grad_norm": 0.4785616993904114, "learning_rate": 1.875040650406504e-05, "loss": 0.101, "step": 15370 }, { "epoch": 0.1875, "grad_norm": 1.2673221826553345, "learning_rate": 1.8750000000000002e-05, "loss": 0.1485, "step": 15375 }, { "epoch": 0.1875609756097561, "grad_norm": 0.4268704354763031, "learning_rate": 1.874959349593496e-05, "loss": 0.1321, "step": 15380 }, { "epoch": 0.1876219512195122, "grad_norm": 0.8162161111831665, "learning_rate": 1.874918699186992e-05, "loss": 0.1363, "step": 15385 }, { "epoch": 0.1876829268292683, "grad_norm": 0.837999701499939, "learning_rate": 1.874878048780488e-05, "loss": 0.1313, "step": 15390 }, { "epoch": 0.1877439024390244, "grad_norm": 0.5374805331230164, "learning_rate": 1.8748373983739838e-05, "loss": 0.1818, "step": 15395 }, { "epoch": 0.1878048780487805, "grad_norm": 0.6572932004928589, "learning_rate": 1.87479674796748e-05, "loss": 0.1242, "step": 15400 }, { "epoch": 0.18786585365853659, "grad_norm": 1.556609034538269, "learning_rate": 1.8747560975609757e-05, "loss": 0.127, "step": 15405 }, { "epoch": 0.18792682926829268, "grad_norm": 2.9653584957122803, "learning_rate": 1.874715447154472e-05, "loss": 0.1609, "step": 15410 }, { "epoch": 0.18798780487804878, "grad_norm": 0.6897343397140503, "learning_rate": 1.8746747967479677e-05, "loss": 0.1142, "step": 15415 }, { "epoch": 0.18804878048780488, "grad_norm": 0.6953224539756775, "learning_rate": 1.874634146341464e-05, "loss": 0.1084, "step": 15420 }, { "epoch": 0.18810975609756098, "grad_norm": 1.1148831844329834, "learning_rate": 1.8745934959349593e-05, "loss": 0.1362, "step": 15425 }, { "epoch": 0.18817073170731707, "grad_norm": 0.8687397837638855, "learning_rate": 1.8745528455284555e-05, "loss": 0.099, "step": 15430 }, { "epoch": 0.18823170731707317, "grad_norm": 0.576452374458313, "learning_rate": 1.8745121951219513e-05, "loss": 0.1048, "step": 15435 }, { "epoch": 0.18829268292682927, "grad_norm": 1.5720994472503662, "learning_rate": 1.8744715447154474e-05, "loss": 0.1257, "step": 15440 }, { "epoch": 0.18835365853658537, "grad_norm": 1.4781014919281006, "learning_rate": 1.8744308943089432e-05, "loss": 0.1144, "step": 15445 }, { "epoch": 0.18841463414634146, "grad_norm": 0.7886883616447449, "learning_rate": 1.8743902439024394e-05, "loss": 0.1201, "step": 15450 }, { "epoch": 0.18847560975609756, "grad_norm": 1.2891919612884521, "learning_rate": 1.874349593495935e-05, "loss": 0.1288, "step": 15455 }, { "epoch": 0.18853658536585366, "grad_norm": 0.97318035364151, "learning_rate": 1.874308943089431e-05, "loss": 0.1563, "step": 15460 }, { "epoch": 0.18859756097560976, "grad_norm": 0.5811742544174194, "learning_rate": 1.874268292682927e-05, "loss": 0.129, "step": 15465 }, { "epoch": 0.18865853658536585, "grad_norm": 1.141693353652954, "learning_rate": 1.874227642276423e-05, "loss": 0.1541, "step": 15470 }, { "epoch": 0.18871951219512195, "grad_norm": 0.9606093764305115, "learning_rate": 1.8741869918699188e-05, "loss": 0.1408, "step": 15475 }, { "epoch": 0.18878048780487805, "grad_norm": 0.7951839566230774, "learning_rate": 1.874146341463415e-05, "loss": 0.1151, "step": 15480 }, { "epoch": 0.18884146341463415, "grad_norm": 0.9514472484588623, "learning_rate": 1.8741056910569108e-05, "loss": 0.1473, "step": 15485 }, { "epoch": 0.18890243902439025, "grad_norm": 1.4560004472732544, "learning_rate": 1.8740650406504066e-05, "loss": 0.1612, "step": 15490 }, { "epoch": 0.18896341463414634, "grad_norm": 1.0653643608093262, "learning_rate": 1.8740243902439027e-05, "loss": 0.1243, "step": 15495 }, { "epoch": 0.18902439024390244, "grad_norm": 0.6277773976325989, "learning_rate": 1.8739837398373985e-05, "loss": 0.1227, "step": 15500 }, { "epoch": 0.18908536585365854, "grad_norm": 1.7648998498916626, "learning_rate": 1.8739430894308947e-05, "loss": 0.1123, "step": 15505 }, { "epoch": 0.18914634146341464, "grad_norm": 1.0190236568450928, "learning_rate": 1.8739024390243905e-05, "loss": 0.1051, "step": 15510 }, { "epoch": 0.18920731707317073, "grad_norm": 0.8820492029190063, "learning_rate": 1.8738617886178863e-05, "loss": 0.1252, "step": 15515 }, { "epoch": 0.18926829268292683, "grad_norm": 1.2625775337219238, "learning_rate": 1.873821138211382e-05, "loss": 0.1146, "step": 15520 }, { "epoch": 0.18932926829268293, "grad_norm": 1.0363985300064087, "learning_rate": 1.8737804878048783e-05, "loss": 0.1619, "step": 15525 }, { "epoch": 0.18939024390243903, "grad_norm": 1.236391544342041, "learning_rate": 1.873739837398374e-05, "loss": 0.1476, "step": 15530 }, { "epoch": 0.18945121951219512, "grad_norm": 2.7822482585906982, "learning_rate": 1.8736991869918702e-05, "loss": 0.1207, "step": 15535 }, { "epoch": 0.18951219512195122, "grad_norm": 0.9926546216011047, "learning_rate": 1.873658536585366e-05, "loss": 0.155, "step": 15540 }, { "epoch": 0.18957317073170732, "grad_norm": 2.1276612281799316, "learning_rate": 1.873617886178862e-05, "loss": 0.1161, "step": 15545 }, { "epoch": 0.18963414634146342, "grad_norm": 0.6919605135917664, "learning_rate": 1.8735772357723577e-05, "loss": 0.1215, "step": 15550 }, { "epoch": 0.18969512195121951, "grad_norm": 0.8685270547866821, "learning_rate": 1.873536585365854e-05, "loss": 0.1191, "step": 15555 }, { "epoch": 0.1897560975609756, "grad_norm": 0.4744563102722168, "learning_rate": 1.8734959349593497e-05, "loss": 0.1163, "step": 15560 }, { "epoch": 0.1898170731707317, "grad_norm": 0.9611732959747314, "learning_rate": 1.8734552845528458e-05, "loss": 0.1712, "step": 15565 }, { "epoch": 0.1898780487804878, "grad_norm": 0.7757052183151245, "learning_rate": 1.8734146341463416e-05, "loss": 0.1323, "step": 15570 }, { "epoch": 0.1899390243902439, "grad_norm": 1.898692011833191, "learning_rate": 1.8733739837398374e-05, "loss": 0.1308, "step": 15575 }, { "epoch": 0.19, "grad_norm": 0.9378382563591003, "learning_rate": 1.8733333333333336e-05, "loss": 0.1244, "step": 15580 }, { "epoch": 0.1900609756097561, "grad_norm": 1.4612438678741455, "learning_rate": 1.8732926829268294e-05, "loss": 0.1534, "step": 15585 }, { "epoch": 0.1901219512195122, "grad_norm": 0.6527574062347412, "learning_rate": 1.8732520325203255e-05, "loss": 0.1152, "step": 15590 }, { "epoch": 0.1901829268292683, "grad_norm": 0.7491434216499329, "learning_rate": 1.8732113821138214e-05, "loss": 0.1249, "step": 15595 }, { "epoch": 0.1902439024390244, "grad_norm": 0.8258016705513, "learning_rate": 1.8731707317073172e-05, "loss": 0.0997, "step": 15600 }, { "epoch": 0.1903048780487805, "grad_norm": 1.5573108196258545, "learning_rate": 1.873130081300813e-05, "loss": 0.1477, "step": 15605 }, { "epoch": 0.1903658536585366, "grad_norm": 1.1544972658157349, "learning_rate": 1.873089430894309e-05, "loss": 0.1665, "step": 15610 }, { "epoch": 0.19042682926829269, "grad_norm": 0.5659142732620239, "learning_rate": 1.873048780487805e-05, "loss": 0.1044, "step": 15615 }, { "epoch": 0.19048780487804878, "grad_norm": 0.6929096579551697, "learning_rate": 1.873008130081301e-05, "loss": 0.1133, "step": 15620 }, { "epoch": 0.19054878048780488, "grad_norm": 0.5748105049133301, "learning_rate": 1.872967479674797e-05, "loss": 0.1559, "step": 15625 }, { "epoch": 0.19060975609756098, "grad_norm": 1.0181642770767212, "learning_rate": 1.872926829268293e-05, "loss": 0.1321, "step": 15630 }, { "epoch": 0.19067073170731708, "grad_norm": 0.4163096249103546, "learning_rate": 1.8728861788617885e-05, "loss": 0.0904, "step": 15635 }, { "epoch": 0.19073170731707317, "grad_norm": 0.7924644351005554, "learning_rate": 1.8728455284552847e-05, "loss": 0.1284, "step": 15640 }, { "epoch": 0.19079268292682927, "grad_norm": 0.8138747811317444, "learning_rate": 1.8728048780487805e-05, "loss": 0.1036, "step": 15645 }, { "epoch": 0.19085365853658537, "grad_norm": 1.2324392795562744, "learning_rate": 1.8727642276422767e-05, "loss": 0.1408, "step": 15650 }, { "epoch": 0.19091463414634147, "grad_norm": 0.631024181842804, "learning_rate": 1.8727235772357725e-05, "loss": 0.1177, "step": 15655 }, { "epoch": 0.19097560975609756, "grad_norm": 0.6091323494911194, "learning_rate": 1.8726829268292686e-05, "loss": 0.1105, "step": 15660 }, { "epoch": 0.19103658536585366, "grad_norm": 0.698303759098053, "learning_rate": 1.8726422764227644e-05, "loss": 0.1257, "step": 15665 }, { "epoch": 0.19109756097560976, "grad_norm": 0.758811354637146, "learning_rate": 1.8726016260162602e-05, "loss": 0.0907, "step": 15670 }, { "epoch": 0.19115853658536586, "grad_norm": 1.8508621454238892, "learning_rate": 1.8725609756097564e-05, "loss": 0.1338, "step": 15675 }, { "epoch": 0.19121951219512195, "grad_norm": 0.5626893639564514, "learning_rate": 1.8725203252032522e-05, "loss": 0.0782, "step": 15680 }, { "epoch": 0.19128048780487805, "grad_norm": 0.6967982053756714, "learning_rate": 1.8724796747967484e-05, "loss": 0.116, "step": 15685 }, { "epoch": 0.19134146341463415, "grad_norm": 0.9046899080276489, "learning_rate": 1.8724390243902442e-05, "loss": 0.1393, "step": 15690 }, { "epoch": 0.19140243902439025, "grad_norm": 0.9686523079872131, "learning_rate": 1.87239837398374e-05, "loss": 0.0986, "step": 15695 }, { "epoch": 0.19146341463414634, "grad_norm": 1.0623539686203003, "learning_rate": 1.8723577235772358e-05, "loss": 0.14, "step": 15700 }, { "epoch": 0.19152439024390244, "grad_norm": 0.9324368834495544, "learning_rate": 1.872317073170732e-05, "loss": 0.112, "step": 15705 }, { "epoch": 0.19158536585365854, "grad_norm": 0.9498658776283264, "learning_rate": 1.8722764227642278e-05, "loss": 0.1151, "step": 15710 }, { "epoch": 0.19164634146341464, "grad_norm": 0.8343539237976074, "learning_rate": 1.872235772357724e-05, "loss": 0.1017, "step": 15715 }, { "epoch": 0.19170731707317074, "grad_norm": 4.135124206542969, "learning_rate": 1.8721951219512197e-05, "loss": 0.1523, "step": 15720 }, { "epoch": 0.19176829268292683, "grad_norm": 0.5937507748603821, "learning_rate": 1.8721544715447155e-05, "loss": 0.1357, "step": 15725 }, { "epoch": 0.19182926829268293, "grad_norm": 0.6207179427146912, "learning_rate": 1.8721138211382114e-05, "loss": 0.0843, "step": 15730 }, { "epoch": 0.19189024390243903, "grad_norm": 1.456554889678955, "learning_rate": 1.8720731707317075e-05, "loss": 0.1439, "step": 15735 }, { "epoch": 0.19195121951219513, "grad_norm": 1.0118483304977417, "learning_rate": 1.8720325203252033e-05, "loss": 0.1646, "step": 15740 }, { "epoch": 0.19201219512195122, "grad_norm": 0.9179525971412659, "learning_rate": 1.8719918699186995e-05, "loss": 0.1238, "step": 15745 }, { "epoch": 0.19207317073170732, "grad_norm": 1.1633535623550415, "learning_rate": 1.8719512195121953e-05, "loss": 0.111, "step": 15750 }, { "epoch": 0.19213414634146342, "grad_norm": 0.7308100461959839, "learning_rate": 1.871910569105691e-05, "loss": 0.1026, "step": 15755 }, { "epoch": 0.19219512195121952, "grad_norm": 0.9733361601829529, "learning_rate": 1.8718699186991872e-05, "loss": 0.1023, "step": 15760 }, { "epoch": 0.19225609756097561, "grad_norm": 0.844598114490509, "learning_rate": 1.871829268292683e-05, "loss": 0.125, "step": 15765 }, { "epoch": 0.1923170731707317, "grad_norm": 0.7520092725753784, "learning_rate": 1.8717886178861792e-05, "loss": 0.1416, "step": 15770 }, { "epoch": 0.1923780487804878, "grad_norm": 0.7251460552215576, "learning_rate": 1.871747967479675e-05, "loss": 0.1576, "step": 15775 }, { "epoch": 0.1924390243902439, "grad_norm": 1.1152739524841309, "learning_rate": 1.871707317073171e-05, "loss": 0.1534, "step": 15780 }, { "epoch": 0.1925, "grad_norm": 2.02298641204834, "learning_rate": 1.8716666666666666e-05, "loss": 0.1516, "step": 15785 }, { "epoch": 0.1925609756097561, "grad_norm": 0.7740207314491272, "learning_rate": 1.8716260162601628e-05, "loss": 0.1227, "step": 15790 }, { "epoch": 0.1926219512195122, "grad_norm": 0.9587293863296509, "learning_rate": 1.8715853658536586e-05, "loss": 0.1558, "step": 15795 }, { "epoch": 0.1926829268292683, "grad_norm": 0.49991270899772644, "learning_rate": 1.8715447154471548e-05, "loss": 0.1152, "step": 15800 }, { "epoch": 0.1927439024390244, "grad_norm": 0.769814670085907, "learning_rate": 1.8715040650406506e-05, "loss": 0.0902, "step": 15805 }, { "epoch": 0.1928048780487805, "grad_norm": 0.8685892820358276, "learning_rate": 1.8714634146341467e-05, "loss": 0.1172, "step": 15810 }, { "epoch": 0.1928658536585366, "grad_norm": 1.6203659772872925, "learning_rate": 1.8714227642276422e-05, "loss": 0.1355, "step": 15815 }, { "epoch": 0.1929268292682927, "grad_norm": 0.6062468886375427, "learning_rate": 1.8713821138211384e-05, "loss": 0.1358, "step": 15820 }, { "epoch": 0.19298780487804879, "grad_norm": 0.9053739905357361, "learning_rate": 1.871341463414634e-05, "loss": 0.1201, "step": 15825 }, { "epoch": 0.19304878048780488, "grad_norm": 4.124471664428711, "learning_rate": 1.8713008130081303e-05, "loss": 0.184, "step": 15830 }, { "epoch": 0.19310975609756098, "grad_norm": 0.9251591563224792, "learning_rate": 1.871260162601626e-05, "loss": 0.1262, "step": 15835 }, { "epoch": 0.19317073170731708, "grad_norm": 1.2305233478546143, "learning_rate": 1.8712195121951223e-05, "loss": 0.1394, "step": 15840 }, { "epoch": 0.19323170731707318, "grad_norm": 0.5410534143447876, "learning_rate": 1.871178861788618e-05, "loss": 0.1216, "step": 15845 }, { "epoch": 0.19329268292682927, "grad_norm": 0.7569400668144226, "learning_rate": 1.871138211382114e-05, "loss": 0.1389, "step": 15850 }, { "epoch": 0.19335365853658537, "grad_norm": 1.1131595373153687, "learning_rate": 1.87109756097561e-05, "loss": 0.1371, "step": 15855 }, { "epoch": 0.19341463414634147, "grad_norm": 1.0768799781799316, "learning_rate": 1.871056910569106e-05, "loss": 0.091, "step": 15860 }, { "epoch": 0.19347560975609757, "grad_norm": 1.94143545627594, "learning_rate": 1.8710162601626017e-05, "loss": 0.1268, "step": 15865 }, { "epoch": 0.19353658536585366, "grad_norm": 1.4464620351791382, "learning_rate": 1.870975609756098e-05, "loss": 0.1662, "step": 15870 }, { "epoch": 0.19359756097560976, "grad_norm": 0.9173235893249512, "learning_rate": 1.8709349593495936e-05, "loss": 0.1551, "step": 15875 }, { "epoch": 0.19365853658536586, "grad_norm": 0.7688374519348145, "learning_rate": 1.8708943089430895e-05, "loss": 0.1363, "step": 15880 }, { "epoch": 0.19371951219512196, "grad_norm": 0.5420752167701721, "learning_rate": 1.8708536585365856e-05, "loss": 0.1348, "step": 15885 }, { "epoch": 0.19378048780487805, "grad_norm": 2.0510361194610596, "learning_rate": 1.8708130081300814e-05, "loss": 0.1667, "step": 15890 }, { "epoch": 0.19384146341463415, "grad_norm": 0.8918609023094177, "learning_rate": 1.8707723577235776e-05, "loss": 0.1507, "step": 15895 }, { "epoch": 0.19390243902439025, "grad_norm": 1.3735053539276123, "learning_rate": 1.8707317073170734e-05, "loss": 0.1801, "step": 15900 }, { "epoch": 0.19396341463414635, "grad_norm": 0.700235903263092, "learning_rate": 1.8706910569105692e-05, "loss": 0.1048, "step": 15905 }, { "epoch": 0.19402439024390244, "grad_norm": 1.038394570350647, "learning_rate": 1.870650406504065e-05, "loss": 0.1321, "step": 15910 }, { "epoch": 0.19408536585365854, "grad_norm": 0.6041248440742493, "learning_rate": 1.870609756097561e-05, "loss": 0.1204, "step": 15915 }, { "epoch": 0.19414634146341464, "grad_norm": 0.7878177762031555, "learning_rate": 1.870569105691057e-05, "loss": 0.095, "step": 15920 }, { "epoch": 0.19420731707317074, "grad_norm": 1.3477270603179932, "learning_rate": 1.870528455284553e-05, "loss": 0.1291, "step": 15925 }, { "epoch": 0.19426829268292684, "grad_norm": 0.8183048963546753, "learning_rate": 1.870487804878049e-05, "loss": 0.1123, "step": 15930 }, { "epoch": 0.19432926829268293, "grad_norm": 0.8030425906181335, "learning_rate": 1.8704471544715448e-05, "loss": 0.1287, "step": 15935 }, { "epoch": 0.19439024390243903, "grad_norm": 1.3455414772033691, "learning_rate": 1.870406504065041e-05, "loss": 0.1487, "step": 15940 }, { "epoch": 0.19445121951219513, "grad_norm": 1.0266810655593872, "learning_rate": 1.8703658536585367e-05, "loss": 0.1581, "step": 15945 }, { "epoch": 0.19451219512195123, "grad_norm": 1.7932833433151245, "learning_rate": 1.870325203252033e-05, "loss": 0.1474, "step": 15950 }, { "epoch": 0.19457317073170732, "grad_norm": 0.7719590067863464, "learning_rate": 1.8702845528455287e-05, "loss": 0.1344, "step": 15955 }, { "epoch": 0.19463414634146342, "grad_norm": 1.0736353397369385, "learning_rate": 1.8702439024390245e-05, "loss": 0.1401, "step": 15960 }, { "epoch": 0.19469512195121952, "grad_norm": 0.9592297673225403, "learning_rate": 1.8702032520325203e-05, "loss": 0.1143, "step": 15965 }, { "epoch": 0.19475609756097562, "grad_norm": 0.8233340382575989, "learning_rate": 1.8701626016260165e-05, "loss": 0.1173, "step": 15970 }, { "epoch": 0.1948170731707317, "grad_norm": 0.8360081315040588, "learning_rate": 1.8701219512195123e-05, "loss": 0.1606, "step": 15975 }, { "epoch": 0.1948780487804878, "grad_norm": 0.5525704026222229, "learning_rate": 1.8700813008130084e-05, "loss": 0.1094, "step": 15980 }, { "epoch": 0.1949390243902439, "grad_norm": 0.5857849717140198, "learning_rate": 1.8700406504065042e-05, "loss": 0.1335, "step": 15985 }, { "epoch": 0.195, "grad_norm": 0.6540132761001587, "learning_rate": 1.8700000000000004e-05, "loss": 0.1434, "step": 15990 }, { "epoch": 0.1950609756097561, "grad_norm": 2.9029009342193604, "learning_rate": 1.869959349593496e-05, "loss": 0.1488, "step": 15995 }, { "epoch": 0.1951219512195122, "grad_norm": 0.5233898758888245, "learning_rate": 1.869918699186992e-05, "loss": 0.0929, "step": 16000 }, { "epoch": 0.1951829268292683, "grad_norm": 0.9528737664222717, "learning_rate": 1.8698780487804878e-05, "loss": 0.1112, "step": 16005 }, { "epoch": 0.1952439024390244, "grad_norm": 0.9588451981544495, "learning_rate": 1.869837398373984e-05, "loss": 0.151, "step": 16010 }, { "epoch": 0.1953048780487805, "grad_norm": 0.7508789896965027, "learning_rate": 1.8697967479674798e-05, "loss": 0.1152, "step": 16015 }, { "epoch": 0.1953658536585366, "grad_norm": 0.9750108122825623, "learning_rate": 1.869756097560976e-05, "loss": 0.1127, "step": 16020 }, { "epoch": 0.1954268292682927, "grad_norm": 1.1605072021484375, "learning_rate": 1.8697154471544718e-05, "loss": 0.1214, "step": 16025 }, { "epoch": 0.1954878048780488, "grad_norm": 0.9646077156066895, "learning_rate": 1.8696747967479676e-05, "loss": 0.1292, "step": 16030 }, { "epoch": 0.19554878048780489, "grad_norm": 1.745242714881897, "learning_rate": 1.8696341463414637e-05, "loss": 0.1954, "step": 16035 }, { "epoch": 0.19560975609756098, "grad_norm": 2.0059988498687744, "learning_rate": 1.8695934959349595e-05, "loss": 0.1141, "step": 16040 }, { "epoch": 0.19567073170731708, "grad_norm": 1.2179384231567383, "learning_rate": 1.8695528455284553e-05, "loss": 0.1191, "step": 16045 }, { "epoch": 0.19573170731707318, "grad_norm": 0.7733786106109619, "learning_rate": 1.8695121951219515e-05, "loss": 0.1336, "step": 16050 }, { "epoch": 0.19579268292682928, "grad_norm": 0.7637738585472107, "learning_rate": 1.8694715447154473e-05, "loss": 0.1128, "step": 16055 }, { "epoch": 0.19585365853658537, "grad_norm": 1.1047354936599731, "learning_rate": 1.869430894308943e-05, "loss": 0.1941, "step": 16060 }, { "epoch": 0.19591463414634147, "grad_norm": 2.1141269207000732, "learning_rate": 1.8693902439024393e-05, "loss": 0.1106, "step": 16065 }, { "epoch": 0.19597560975609757, "grad_norm": 0.7982511520385742, "learning_rate": 1.869349593495935e-05, "loss": 0.1637, "step": 16070 }, { "epoch": 0.19603658536585367, "grad_norm": 0.6782484650611877, "learning_rate": 1.8693089430894312e-05, "loss": 0.1162, "step": 16075 }, { "epoch": 0.19609756097560976, "grad_norm": 0.7972254157066345, "learning_rate": 1.869268292682927e-05, "loss": 0.1072, "step": 16080 }, { "epoch": 0.19615853658536586, "grad_norm": 0.8586052656173706, "learning_rate": 1.869227642276423e-05, "loss": 0.13, "step": 16085 }, { "epoch": 0.19621951219512196, "grad_norm": 1.3623749017715454, "learning_rate": 1.8691869918699187e-05, "loss": 0.1827, "step": 16090 }, { "epoch": 0.19628048780487806, "grad_norm": 1.1473585367202759, "learning_rate": 1.869146341463415e-05, "loss": 0.138, "step": 16095 }, { "epoch": 0.19634146341463415, "grad_norm": 0.6550276279449463, "learning_rate": 1.8691056910569106e-05, "loss": 0.1343, "step": 16100 }, { "epoch": 0.19640243902439025, "grad_norm": 0.882783830165863, "learning_rate": 1.8690650406504068e-05, "loss": 0.1312, "step": 16105 }, { "epoch": 0.19646341463414635, "grad_norm": 1.4432843923568726, "learning_rate": 1.8690243902439026e-05, "loss": 0.1296, "step": 16110 }, { "epoch": 0.19652439024390245, "grad_norm": 0.9042450785636902, "learning_rate": 1.8689837398373984e-05, "loss": 0.1248, "step": 16115 }, { "epoch": 0.19658536585365854, "grad_norm": 0.4495181739330292, "learning_rate": 1.8689430894308946e-05, "loss": 0.1005, "step": 16120 }, { "epoch": 0.19664634146341464, "grad_norm": 0.925576388835907, "learning_rate": 1.8689024390243904e-05, "loss": 0.1183, "step": 16125 }, { "epoch": 0.19670731707317074, "grad_norm": 0.5571459531784058, "learning_rate": 1.8688617886178862e-05, "loss": 0.0823, "step": 16130 }, { "epoch": 0.19676829268292684, "grad_norm": 0.5986247062683105, "learning_rate": 1.8688211382113824e-05, "loss": 0.0942, "step": 16135 }, { "epoch": 0.19682926829268294, "grad_norm": 0.7317216396331787, "learning_rate": 1.868780487804878e-05, "loss": 0.0808, "step": 16140 }, { "epoch": 0.19689024390243903, "grad_norm": 0.9550277590751648, "learning_rate": 1.868739837398374e-05, "loss": 0.1867, "step": 16145 }, { "epoch": 0.19695121951219513, "grad_norm": 1.4204481840133667, "learning_rate": 1.86869918699187e-05, "loss": 0.1504, "step": 16150 }, { "epoch": 0.19701219512195123, "grad_norm": 1.0450518131256104, "learning_rate": 1.868658536585366e-05, "loss": 0.1094, "step": 16155 }, { "epoch": 0.19707317073170733, "grad_norm": 0.6803722381591797, "learning_rate": 1.868617886178862e-05, "loss": 0.1101, "step": 16160 }, { "epoch": 0.19713414634146342, "grad_norm": 1.1909786462783813, "learning_rate": 1.868577235772358e-05, "loss": 0.135, "step": 16165 }, { "epoch": 0.19719512195121952, "grad_norm": 1.0265182256698608, "learning_rate": 1.868536585365854e-05, "loss": 0.1165, "step": 16170 }, { "epoch": 0.19725609756097562, "grad_norm": 1.2693864107131958, "learning_rate": 1.8684959349593495e-05, "loss": 0.1558, "step": 16175 }, { "epoch": 0.19731707317073172, "grad_norm": 1.8081616163253784, "learning_rate": 1.8684552845528457e-05, "loss": 0.1382, "step": 16180 }, { "epoch": 0.1973780487804878, "grad_norm": 0.7373142838478088, "learning_rate": 1.8684146341463415e-05, "loss": 0.0991, "step": 16185 }, { "epoch": 0.1974390243902439, "grad_norm": 1.0676655769348145, "learning_rate": 1.8683739837398376e-05, "loss": 0.1315, "step": 16190 }, { "epoch": 0.1975, "grad_norm": 1.7419319152832031, "learning_rate": 1.8683333333333335e-05, "loss": 0.117, "step": 16195 }, { "epoch": 0.1975609756097561, "grad_norm": 0.44499221444129944, "learning_rate": 1.8682926829268296e-05, "loss": 0.0996, "step": 16200 }, { "epoch": 0.1976219512195122, "grad_norm": 0.7688623666763306, "learning_rate": 1.8682520325203254e-05, "loss": 0.1313, "step": 16205 }, { "epoch": 0.1976829268292683, "grad_norm": 0.9870346784591675, "learning_rate": 1.8682113821138212e-05, "loss": 0.1142, "step": 16210 }, { "epoch": 0.1977439024390244, "grad_norm": 0.7531616687774658, "learning_rate": 1.8681707317073174e-05, "loss": 0.0934, "step": 16215 }, { "epoch": 0.1978048780487805, "grad_norm": 0.5862661600112915, "learning_rate": 1.8681300813008132e-05, "loss": 0.1215, "step": 16220 }, { "epoch": 0.1978658536585366, "grad_norm": 0.8270121812820435, "learning_rate": 1.868089430894309e-05, "loss": 0.1506, "step": 16225 }, { "epoch": 0.1979268292682927, "grad_norm": 0.5313813090324402, "learning_rate": 1.868048780487805e-05, "loss": 0.127, "step": 16230 }, { "epoch": 0.1979878048780488, "grad_norm": 1.0496470928192139, "learning_rate": 1.868008130081301e-05, "loss": 0.1296, "step": 16235 }, { "epoch": 0.1980487804878049, "grad_norm": 1.0026593208312988, "learning_rate": 1.8679674796747968e-05, "loss": 0.1091, "step": 16240 }, { "epoch": 0.19810975609756099, "grad_norm": 0.8550696969032288, "learning_rate": 1.867926829268293e-05, "loss": 0.0995, "step": 16245 }, { "epoch": 0.19817073170731708, "grad_norm": 0.7255894541740417, "learning_rate": 1.8678861788617888e-05, "loss": 0.1365, "step": 16250 }, { "epoch": 0.19823170731707318, "grad_norm": 1.0004204511642456, "learning_rate": 1.867845528455285e-05, "loss": 0.1016, "step": 16255 }, { "epoch": 0.19829268292682928, "grad_norm": 0.6764278411865234, "learning_rate": 1.8678048780487807e-05, "loss": 0.0959, "step": 16260 }, { "epoch": 0.19835365853658538, "grad_norm": 1.5064542293548584, "learning_rate": 1.8677642276422765e-05, "loss": 0.1075, "step": 16265 }, { "epoch": 0.19841463414634147, "grad_norm": 0.876924455165863, "learning_rate": 1.8677235772357723e-05, "loss": 0.1168, "step": 16270 }, { "epoch": 0.19847560975609757, "grad_norm": 0.789215624332428, "learning_rate": 1.8676829268292685e-05, "loss": 0.1298, "step": 16275 }, { "epoch": 0.19853658536585367, "grad_norm": 0.6364191174507141, "learning_rate": 1.8676422764227643e-05, "loss": 0.1447, "step": 16280 }, { "epoch": 0.19859756097560977, "grad_norm": 1.1746559143066406, "learning_rate": 1.8676016260162605e-05, "loss": 0.1155, "step": 16285 }, { "epoch": 0.19865853658536586, "grad_norm": 1.996239423751831, "learning_rate": 1.8675609756097563e-05, "loss": 0.1112, "step": 16290 }, { "epoch": 0.19871951219512196, "grad_norm": 0.7578063011169434, "learning_rate": 1.867520325203252e-05, "loss": 0.1091, "step": 16295 }, { "epoch": 0.19878048780487806, "grad_norm": 0.7784888744354248, "learning_rate": 1.8674796747967482e-05, "loss": 0.1424, "step": 16300 }, { "epoch": 0.19884146341463416, "grad_norm": 0.6087601780891418, "learning_rate": 1.867439024390244e-05, "loss": 0.0877, "step": 16305 }, { "epoch": 0.19890243902439025, "grad_norm": 1.0299739837646484, "learning_rate": 1.86739837398374e-05, "loss": 0.1423, "step": 16310 }, { "epoch": 0.19896341463414635, "grad_norm": 1.0329656600952148, "learning_rate": 1.867357723577236e-05, "loss": 0.1136, "step": 16315 }, { "epoch": 0.19902439024390245, "grad_norm": 0.8169140815734863, "learning_rate": 1.8673170731707318e-05, "loss": 0.1441, "step": 16320 }, { "epoch": 0.19908536585365855, "grad_norm": 0.614185094833374, "learning_rate": 1.8672764227642276e-05, "loss": 0.1027, "step": 16325 }, { "epoch": 0.19914634146341464, "grad_norm": 1.2635070085525513, "learning_rate": 1.8672357723577238e-05, "loss": 0.1125, "step": 16330 }, { "epoch": 0.19920731707317074, "grad_norm": 0.5382969975471497, "learning_rate": 1.8671951219512196e-05, "loss": 0.1095, "step": 16335 }, { "epoch": 0.19926829268292684, "grad_norm": 0.7164344787597656, "learning_rate": 1.8671544715447158e-05, "loss": 0.1337, "step": 16340 }, { "epoch": 0.19932926829268294, "grad_norm": 2.6105775833129883, "learning_rate": 1.8671138211382116e-05, "loss": 0.1334, "step": 16345 }, { "epoch": 0.19939024390243903, "grad_norm": 1.5935423374176025, "learning_rate": 1.8670731707317077e-05, "loss": 0.1148, "step": 16350 }, { "epoch": 0.19945121951219513, "grad_norm": 0.7436128258705139, "learning_rate": 1.8670325203252032e-05, "loss": 0.1393, "step": 16355 }, { "epoch": 0.19951219512195123, "grad_norm": 1.3971374034881592, "learning_rate": 1.8669918699186993e-05, "loss": 0.1529, "step": 16360 }, { "epoch": 0.19957317073170733, "grad_norm": 1.2316898107528687, "learning_rate": 1.866951219512195e-05, "loss": 0.1252, "step": 16365 }, { "epoch": 0.19963414634146343, "grad_norm": 0.6928900480270386, "learning_rate": 1.8669105691056913e-05, "loss": 0.0896, "step": 16370 }, { "epoch": 0.19969512195121952, "grad_norm": 0.6717300415039062, "learning_rate": 1.866869918699187e-05, "loss": 0.1152, "step": 16375 }, { "epoch": 0.19975609756097562, "grad_norm": 0.4226863384246826, "learning_rate": 1.8668292682926833e-05, "loss": 0.0837, "step": 16380 }, { "epoch": 0.19981707317073172, "grad_norm": 0.7316256761550903, "learning_rate": 1.866788617886179e-05, "loss": 0.1473, "step": 16385 }, { "epoch": 0.19987804878048782, "grad_norm": 0.47067663073539734, "learning_rate": 1.866747967479675e-05, "loss": 0.1299, "step": 16390 }, { "epoch": 0.1999390243902439, "grad_norm": 0.5630431175231934, "learning_rate": 1.8667073170731707e-05, "loss": 0.0984, "step": 16395 }, { "epoch": 0.2, "grad_norm": 1.4991754293441772, "learning_rate": 1.866666666666667e-05, "loss": 0.1177, "step": 16400 }, { "epoch": 0.2000609756097561, "grad_norm": 1.0265916585922241, "learning_rate": 1.8666260162601627e-05, "loss": 0.1421, "step": 16405 }, { "epoch": 0.2001219512195122, "grad_norm": 1.0383590459823608, "learning_rate": 1.8665853658536588e-05, "loss": 0.1208, "step": 16410 }, { "epoch": 0.2001829268292683, "grad_norm": 0.8332270383834839, "learning_rate": 1.8665447154471546e-05, "loss": 0.143, "step": 16415 }, { "epoch": 0.2002439024390244, "grad_norm": 0.6138107776641846, "learning_rate": 1.8665040650406505e-05, "loss": 0.1115, "step": 16420 }, { "epoch": 0.2003048780487805, "grad_norm": 0.9605457782745361, "learning_rate": 1.8664634146341466e-05, "loss": 0.1004, "step": 16425 }, { "epoch": 0.2003658536585366, "grad_norm": 0.7828406095504761, "learning_rate": 1.8664227642276424e-05, "loss": 0.1093, "step": 16430 }, { "epoch": 0.2004268292682927, "grad_norm": 0.945743203163147, "learning_rate": 1.8663821138211386e-05, "loss": 0.1593, "step": 16435 }, { "epoch": 0.2004878048780488, "grad_norm": 1.1849623918533325, "learning_rate": 1.8663414634146344e-05, "loss": 0.1114, "step": 16440 }, { "epoch": 0.2005487804878049, "grad_norm": 0.886402428150177, "learning_rate": 1.8663008130081302e-05, "loss": 0.0976, "step": 16445 }, { "epoch": 0.200609756097561, "grad_norm": 0.6126216053962708, "learning_rate": 1.866260162601626e-05, "loss": 0.1027, "step": 16450 }, { "epoch": 0.20067073170731708, "grad_norm": 0.8839738965034485, "learning_rate": 1.866219512195122e-05, "loss": 0.1225, "step": 16455 }, { "epoch": 0.20073170731707318, "grad_norm": 0.9865520000457764, "learning_rate": 1.866178861788618e-05, "loss": 0.1327, "step": 16460 }, { "epoch": 0.20079268292682928, "grad_norm": 0.9164289236068726, "learning_rate": 1.866138211382114e-05, "loss": 0.1172, "step": 16465 }, { "epoch": 0.20085365853658538, "grad_norm": 1.3280541896820068, "learning_rate": 1.86609756097561e-05, "loss": 0.1183, "step": 16470 }, { "epoch": 0.20091463414634148, "grad_norm": 0.923740804195404, "learning_rate": 1.8660569105691058e-05, "loss": 0.1265, "step": 16475 }, { "epoch": 0.20097560975609757, "grad_norm": 1.8658192157745361, "learning_rate": 1.866016260162602e-05, "loss": 0.1261, "step": 16480 }, { "epoch": 0.20103658536585367, "grad_norm": 0.8668442368507385, "learning_rate": 1.8659756097560977e-05, "loss": 0.1309, "step": 16485 }, { "epoch": 0.20109756097560977, "grad_norm": 0.5960710644721985, "learning_rate": 1.8659349593495935e-05, "loss": 0.1228, "step": 16490 }, { "epoch": 0.20115853658536587, "grad_norm": 1.4717708826065063, "learning_rate": 1.8658943089430897e-05, "loss": 0.1838, "step": 16495 }, { "epoch": 0.20121951219512196, "grad_norm": 1.1824140548706055, "learning_rate": 1.8658536585365855e-05, "loss": 0.1322, "step": 16500 }, { "epoch": 0.20128048780487806, "grad_norm": 1.0861510038375854, "learning_rate": 1.8658130081300813e-05, "loss": 0.1239, "step": 16505 }, { "epoch": 0.20134146341463416, "grad_norm": 0.8934125900268555, "learning_rate": 1.8657723577235775e-05, "loss": 0.095, "step": 16510 }, { "epoch": 0.20140243902439026, "grad_norm": 1.124630331993103, "learning_rate": 1.8657317073170733e-05, "loss": 0.1187, "step": 16515 }, { "epoch": 0.20146341463414635, "grad_norm": 0.42107629776000977, "learning_rate": 1.8656910569105694e-05, "loss": 0.1146, "step": 16520 }, { "epoch": 0.20152439024390245, "grad_norm": 1.1815626621246338, "learning_rate": 1.8656504065040652e-05, "loss": 0.1623, "step": 16525 }, { "epoch": 0.20158536585365855, "grad_norm": 0.5515421628952026, "learning_rate": 1.8656097560975614e-05, "loss": 0.0834, "step": 16530 }, { "epoch": 0.20164634146341465, "grad_norm": 1.7516461610794067, "learning_rate": 1.865569105691057e-05, "loss": 0.1117, "step": 16535 }, { "epoch": 0.20170731707317074, "grad_norm": 0.806381344795227, "learning_rate": 1.865528455284553e-05, "loss": 0.1113, "step": 16540 }, { "epoch": 0.20176829268292684, "grad_norm": 1.7859598398208618, "learning_rate": 1.8654878048780488e-05, "loss": 0.1152, "step": 16545 }, { "epoch": 0.20182926829268294, "grad_norm": 0.5944957137107849, "learning_rate": 1.865447154471545e-05, "loss": 0.1179, "step": 16550 }, { "epoch": 0.20189024390243904, "grad_norm": 0.7346733212471008, "learning_rate": 1.8654065040650408e-05, "loss": 0.2039, "step": 16555 }, { "epoch": 0.20195121951219513, "grad_norm": 1.024718999862671, "learning_rate": 1.865365853658537e-05, "loss": 0.1304, "step": 16560 }, { "epoch": 0.20201219512195123, "grad_norm": 0.5871094465255737, "learning_rate": 1.8653252032520328e-05, "loss": 0.1221, "step": 16565 }, { "epoch": 0.20207317073170733, "grad_norm": 1.0503829717636108, "learning_rate": 1.8652845528455286e-05, "loss": 0.1483, "step": 16570 }, { "epoch": 0.20213414634146343, "grad_norm": 0.8165857195854187, "learning_rate": 1.8652439024390244e-05, "loss": 0.1055, "step": 16575 }, { "epoch": 0.20219512195121953, "grad_norm": 1.1464481353759766, "learning_rate": 1.8652032520325205e-05, "loss": 0.1707, "step": 16580 }, { "epoch": 0.20225609756097562, "grad_norm": 0.8394029140472412, "learning_rate": 1.8651626016260163e-05, "loss": 0.1233, "step": 16585 }, { "epoch": 0.20231707317073172, "grad_norm": 0.7373074889183044, "learning_rate": 1.8651219512195125e-05, "loss": 0.1196, "step": 16590 }, { "epoch": 0.20237804878048782, "grad_norm": 0.8667508363723755, "learning_rate": 1.8650813008130083e-05, "loss": 0.1112, "step": 16595 }, { "epoch": 0.20243902439024392, "grad_norm": 0.5447790622711182, "learning_rate": 1.865040650406504e-05, "loss": 0.1324, "step": 16600 }, { "epoch": 0.2025, "grad_norm": 2.0988669395446777, "learning_rate": 1.8650000000000003e-05, "loss": 0.1544, "step": 16605 }, { "epoch": 0.2025609756097561, "grad_norm": 1.0324945449829102, "learning_rate": 1.864959349593496e-05, "loss": 0.124, "step": 16610 }, { "epoch": 0.2026219512195122, "grad_norm": 1.2526984214782715, "learning_rate": 1.8649186991869922e-05, "loss": 0.1668, "step": 16615 }, { "epoch": 0.2026829268292683, "grad_norm": 0.7098159193992615, "learning_rate": 1.864878048780488e-05, "loss": 0.1022, "step": 16620 }, { "epoch": 0.2027439024390244, "grad_norm": 0.9291409254074097, "learning_rate": 1.864837398373984e-05, "loss": 0.0949, "step": 16625 }, { "epoch": 0.2028048780487805, "grad_norm": 0.5801483988761902, "learning_rate": 1.8647967479674797e-05, "loss": 0.0864, "step": 16630 }, { "epoch": 0.2028658536585366, "grad_norm": 0.6534504890441895, "learning_rate": 1.8647560975609758e-05, "loss": 0.0853, "step": 16635 }, { "epoch": 0.2029268292682927, "grad_norm": 1.6556684970855713, "learning_rate": 1.8647154471544716e-05, "loss": 0.1019, "step": 16640 }, { "epoch": 0.2029878048780488, "grad_norm": 0.8088613152503967, "learning_rate": 1.8646747967479678e-05, "loss": 0.1046, "step": 16645 }, { "epoch": 0.2030487804878049, "grad_norm": 0.6531838774681091, "learning_rate": 1.8646341463414636e-05, "loss": 0.1099, "step": 16650 }, { "epoch": 0.203109756097561, "grad_norm": 1.203211784362793, "learning_rate": 1.8645934959349594e-05, "loss": 0.0974, "step": 16655 }, { "epoch": 0.20317073170731706, "grad_norm": 1.0170199871063232, "learning_rate": 1.8645528455284552e-05, "loss": 0.1145, "step": 16660 }, { "epoch": 0.20323170731707316, "grad_norm": 0.9212628602981567, "learning_rate": 1.8645121951219514e-05, "loss": 0.1472, "step": 16665 }, { "epoch": 0.20329268292682925, "grad_norm": 0.5918509364128113, "learning_rate": 1.8644715447154472e-05, "loss": 0.1096, "step": 16670 }, { "epoch": 0.20335365853658535, "grad_norm": 4.351781845092773, "learning_rate": 1.8644308943089433e-05, "loss": 0.1115, "step": 16675 }, { "epoch": 0.20341463414634145, "grad_norm": 0.6195643544197083, "learning_rate": 1.864390243902439e-05, "loss": 0.1358, "step": 16680 }, { "epoch": 0.20347560975609755, "grad_norm": 0.5812374353408813, "learning_rate": 1.864349593495935e-05, "loss": 0.1065, "step": 16685 }, { "epoch": 0.20353658536585365, "grad_norm": 0.7671797275543213, "learning_rate": 1.864308943089431e-05, "loss": 0.1196, "step": 16690 }, { "epoch": 0.20359756097560974, "grad_norm": 0.7715076804161072, "learning_rate": 1.864268292682927e-05, "loss": 0.1278, "step": 16695 }, { "epoch": 0.20365853658536584, "grad_norm": 0.8225272297859192, "learning_rate": 1.864227642276423e-05, "loss": 0.0966, "step": 16700 }, { "epoch": 0.20371951219512194, "grad_norm": 0.7368301749229431, "learning_rate": 1.864186991869919e-05, "loss": 0.2015, "step": 16705 }, { "epoch": 0.20378048780487804, "grad_norm": 1.161617636680603, "learning_rate": 1.864146341463415e-05, "loss": 0.1619, "step": 16710 }, { "epoch": 0.20384146341463413, "grad_norm": 1.0989221334457397, "learning_rate": 1.8641056910569105e-05, "loss": 0.1582, "step": 16715 }, { "epoch": 0.20390243902439023, "grad_norm": 0.4643488824367523, "learning_rate": 1.8640650406504067e-05, "loss": 0.0978, "step": 16720 }, { "epoch": 0.20396341463414633, "grad_norm": 1.116754412651062, "learning_rate": 1.8640243902439025e-05, "loss": 0.1202, "step": 16725 }, { "epoch": 0.20402439024390243, "grad_norm": 0.5773060917854309, "learning_rate": 1.8639837398373986e-05, "loss": 0.1011, "step": 16730 }, { "epoch": 0.20408536585365852, "grad_norm": 0.9777873158454895, "learning_rate": 1.8639430894308945e-05, "loss": 0.1576, "step": 16735 }, { "epoch": 0.20414634146341462, "grad_norm": 1.3390589952468872, "learning_rate": 1.8639024390243906e-05, "loss": 0.2102, "step": 16740 }, { "epoch": 0.20420731707317072, "grad_norm": 0.9487841129302979, "learning_rate": 1.8638617886178864e-05, "loss": 0.1386, "step": 16745 }, { "epoch": 0.20426829268292682, "grad_norm": 0.8458355665206909, "learning_rate": 1.8638211382113822e-05, "loss": 0.1094, "step": 16750 }, { "epoch": 0.20432926829268291, "grad_norm": 0.9450898170471191, "learning_rate": 1.863780487804878e-05, "loss": 0.1535, "step": 16755 }, { "epoch": 0.204390243902439, "grad_norm": 1.3944141864776611, "learning_rate": 1.8637398373983742e-05, "loss": 0.1142, "step": 16760 }, { "epoch": 0.2044512195121951, "grad_norm": 0.6936134099960327, "learning_rate": 1.86369918699187e-05, "loss": 0.1196, "step": 16765 }, { "epoch": 0.2045121951219512, "grad_norm": 0.5709937214851379, "learning_rate": 1.863658536585366e-05, "loss": 0.1163, "step": 16770 }, { "epoch": 0.2045731707317073, "grad_norm": 0.9127474427223206, "learning_rate": 1.863617886178862e-05, "loss": 0.0856, "step": 16775 }, { "epoch": 0.2046341463414634, "grad_norm": 2.0252790451049805, "learning_rate": 1.8635772357723578e-05, "loss": 0.1114, "step": 16780 }, { "epoch": 0.2046951219512195, "grad_norm": 0.894923210144043, "learning_rate": 1.863536585365854e-05, "loss": 0.1178, "step": 16785 }, { "epoch": 0.2047560975609756, "grad_norm": 0.6971614956855774, "learning_rate": 1.8634959349593497e-05, "loss": 0.134, "step": 16790 }, { "epoch": 0.2048170731707317, "grad_norm": 0.85821133852005, "learning_rate": 1.863455284552846e-05, "loss": 0.0905, "step": 16795 }, { "epoch": 0.2048780487804878, "grad_norm": 0.8999843597412109, "learning_rate": 1.8634146341463417e-05, "loss": 0.1074, "step": 16800 }, { "epoch": 0.2049390243902439, "grad_norm": 0.5444599986076355, "learning_rate": 1.8633739837398375e-05, "loss": 0.1066, "step": 16805 }, { "epoch": 0.205, "grad_norm": 0.8537779450416565, "learning_rate": 1.8633333333333333e-05, "loss": 0.1175, "step": 16810 }, { "epoch": 0.20506097560975609, "grad_norm": 1.7015409469604492, "learning_rate": 1.8632926829268295e-05, "loss": 0.1046, "step": 16815 }, { "epoch": 0.20512195121951218, "grad_norm": 0.5776312947273254, "learning_rate": 1.8632520325203253e-05, "loss": 0.0895, "step": 16820 }, { "epoch": 0.20518292682926828, "grad_norm": 0.8313401341438293, "learning_rate": 1.8632113821138215e-05, "loss": 0.142, "step": 16825 }, { "epoch": 0.20524390243902438, "grad_norm": 2.5980024337768555, "learning_rate": 1.8631707317073173e-05, "loss": 0.1578, "step": 16830 }, { "epoch": 0.20530487804878048, "grad_norm": 3.20855975151062, "learning_rate": 1.863130081300813e-05, "loss": 0.1365, "step": 16835 }, { "epoch": 0.20536585365853657, "grad_norm": 0.9043994545936584, "learning_rate": 1.863089430894309e-05, "loss": 0.1048, "step": 16840 }, { "epoch": 0.20542682926829267, "grad_norm": 0.6061489582061768, "learning_rate": 1.863048780487805e-05, "loss": 0.1266, "step": 16845 }, { "epoch": 0.20548780487804877, "grad_norm": 0.8818576335906982, "learning_rate": 1.863008130081301e-05, "loss": 0.0986, "step": 16850 }, { "epoch": 0.20554878048780487, "grad_norm": 1.0026803016662598, "learning_rate": 1.862967479674797e-05, "loss": 0.1062, "step": 16855 }, { "epoch": 0.20560975609756096, "grad_norm": 0.8791608810424805, "learning_rate": 1.8629268292682928e-05, "loss": 0.1586, "step": 16860 }, { "epoch": 0.20567073170731706, "grad_norm": 0.6816546320915222, "learning_rate": 1.8628861788617886e-05, "loss": 0.1253, "step": 16865 }, { "epoch": 0.20573170731707316, "grad_norm": 1.168676495552063, "learning_rate": 1.8628455284552848e-05, "loss": 0.1189, "step": 16870 }, { "epoch": 0.20579268292682926, "grad_norm": 0.6440593004226685, "learning_rate": 1.8628048780487806e-05, "loss": 0.1277, "step": 16875 }, { "epoch": 0.20585365853658535, "grad_norm": 0.6709810495376587, "learning_rate": 1.8627642276422767e-05, "loss": 0.116, "step": 16880 }, { "epoch": 0.20591463414634145, "grad_norm": 1.0120638608932495, "learning_rate": 1.8627235772357726e-05, "loss": 0.1498, "step": 16885 }, { "epoch": 0.20597560975609755, "grad_norm": 0.6131817698478699, "learning_rate": 1.8626829268292687e-05, "loss": 0.1374, "step": 16890 }, { "epoch": 0.20603658536585365, "grad_norm": 0.7054675221443176, "learning_rate": 1.8626422764227642e-05, "loss": 0.1323, "step": 16895 }, { "epoch": 0.20609756097560974, "grad_norm": 0.5601779818534851, "learning_rate": 1.8626016260162603e-05, "loss": 0.1821, "step": 16900 }, { "epoch": 0.20615853658536584, "grad_norm": 0.4604112505912781, "learning_rate": 1.862560975609756e-05, "loss": 0.1232, "step": 16905 }, { "epoch": 0.20621951219512194, "grad_norm": 1.4281333684921265, "learning_rate": 1.8625203252032523e-05, "loss": 0.0996, "step": 16910 }, { "epoch": 0.20628048780487804, "grad_norm": 1.038624882698059, "learning_rate": 1.862479674796748e-05, "loss": 0.1399, "step": 16915 }, { "epoch": 0.20634146341463414, "grad_norm": 0.9003133177757263, "learning_rate": 1.8624390243902443e-05, "loss": 0.1118, "step": 16920 }, { "epoch": 0.20640243902439023, "grad_norm": 1.7296268939971924, "learning_rate": 1.8623983739837397e-05, "loss": 0.1725, "step": 16925 }, { "epoch": 0.20646341463414633, "grad_norm": 1.3984813690185547, "learning_rate": 1.862357723577236e-05, "loss": 0.1087, "step": 16930 }, { "epoch": 0.20652439024390243, "grad_norm": 1.0155935287475586, "learning_rate": 1.8623170731707317e-05, "loss": 0.1309, "step": 16935 }, { "epoch": 0.20658536585365853, "grad_norm": 0.833202600479126, "learning_rate": 1.862276422764228e-05, "loss": 0.13, "step": 16940 }, { "epoch": 0.20664634146341462, "grad_norm": 0.7118765711784363, "learning_rate": 1.8622357723577237e-05, "loss": 0.1235, "step": 16945 }, { "epoch": 0.20670731707317072, "grad_norm": 1.0436598062515259, "learning_rate": 1.8621951219512198e-05, "loss": 0.1252, "step": 16950 }, { "epoch": 0.20676829268292682, "grad_norm": 0.6805412173271179, "learning_rate": 1.8621544715447156e-05, "loss": 0.0894, "step": 16955 }, { "epoch": 0.20682926829268292, "grad_norm": 7.981970310211182, "learning_rate": 1.8621138211382114e-05, "loss": 0.1458, "step": 16960 }, { "epoch": 0.206890243902439, "grad_norm": 1.3698196411132812, "learning_rate": 1.8620731707317076e-05, "loss": 0.0912, "step": 16965 }, { "epoch": 0.2069512195121951, "grad_norm": 2.8907997608184814, "learning_rate": 1.8620325203252034e-05, "loss": 0.1473, "step": 16970 }, { "epoch": 0.2070121951219512, "grad_norm": 0.870098888874054, "learning_rate": 1.8619918699186996e-05, "loss": 0.1109, "step": 16975 }, { "epoch": 0.2070731707317073, "grad_norm": 1.191535234451294, "learning_rate": 1.8619512195121954e-05, "loss": 0.1481, "step": 16980 }, { "epoch": 0.2071341463414634, "grad_norm": 0.9525921940803528, "learning_rate": 1.8619105691056912e-05, "loss": 0.0929, "step": 16985 }, { "epoch": 0.2071951219512195, "grad_norm": 0.9839493036270142, "learning_rate": 1.861869918699187e-05, "loss": 0.1445, "step": 16990 }, { "epoch": 0.2072560975609756, "grad_norm": 0.6688716411590576, "learning_rate": 1.861829268292683e-05, "loss": 0.1193, "step": 16995 }, { "epoch": 0.2073170731707317, "grad_norm": 1.1768394708633423, "learning_rate": 1.861788617886179e-05, "loss": 0.1587, "step": 17000 }, { "epoch": 0.2073780487804878, "grad_norm": 0.5572740435600281, "learning_rate": 1.861747967479675e-05, "loss": 0.0913, "step": 17005 }, { "epoch": 0.2074390243902439, "grad_norm": 0.9226147532463074, "learning_rate": 1.861707317073171e-05, "loss": 0.1311, "step": 17010 }, { "epoch": 0.2075, "grad_norm": 2.5892841815948486, "learning_rate": 1.8616666666666667e-05, "loss": 0.1131, "step": 17015 }, { "epoch": 0.2075609756097561, "grad_norm": 1.1892633438110352, "learning_rate": 1.8616260162601626e-05, "loss": 0.1163, "step": 17020 }, { "epoch": 0.20762195121951219, "grad_norm": 1.0575215816497803, "learning_rate": 1.8615853658536587e-05, "loss": 0.1162, "step": 17025 }, { "epoch": 0.20768292682926828, "grad_norm": 4.09636926651001, "learning_rate": 1.8615447154471545e-05, "loss": 0.1097, "step": 17030 }, { "epoch": 0.20774390243902438, "grad_norm": 0.8867756724357605, "learning_rate": 1.8615040650406507e-05, "loss": 0.0883, "step": 17035 }, { "epoch": 0.20780487804878048, "grad_norm": 0.4957659840583801, "learning_rate": 1.8614634146341465e-05, "loss": 0.0901, "step": 17040 }, { "epoch": 0.20786585365853658, "grad_norm": 0.6446238160133362, "learning_rate": 1.8614227642276423e-05, "loss": 0.1319, "step": 17045 }, { "epoch": 0.20792682926829267, "grad_norm": 0.6078333258628845, "learning_rate": 1.8613821138211384e-05, "loss": 0.1191, "step": 17050 }, { "epoch": 0.20798780487804877, "grad_norm": 6.556800365447998, "learning_rate": 1.8613414634146343e-05, "loss": 0.1374, "step": 17055 }, { "epoch": 0.20804878048780487, "grad_norm": 0.5991682410240173, "learning_rate": 1.8613008130081304e-05, "loss": 0.1013, "step": 17060 }, { "epoch": 0.20810975609756097, "grad_norm": 0.9294754266738892, "learning_rate": 1.8612601626016262e-05, "loss": 0.1078, "step": 17065 }, { "epoch": 0.20817073170731706, "grad_norm": 0.7615802884101868, "learning_rate": 1.861219512195122e-05, "loss": 0.1362, "step": 17070 }, { "epoch": 0.20823170731707316, "grad_norm": 0.7293683290481567, "learning_rate": 1.861178861788618e-05, "loss": 0.1012, "step": 17075 }, { "epoch": 0.20829268292682926, "grad_norm": 0.8201871514320374, "learning_rate": 1.861138211382114e-05, "loss": 0.1554, "step": 17080 }, { "epoch": 0.20835365853658536, "grad_norm": 0.8634776473045349, "learning_rate": 1.8610975609756098e-05, "loss": 0.1076, "step": 17085 }, { "epoch": 0.20841463414634145, "grad_norm": 0.5180187225341797, "learning_rate": 1.861056910569106e-05, "loss": 0.1265, "step": 17090 }, { "epoch": 0.20847560975609755, "grad_norm": 0.665908694267273, "learning_rate": 1.8610162601626018e-05, "loss": 0.1106, "step": 17095 }, { "epoch": 0.20853658536585365, "grad_norm": 0.8339003324508667, "learning_rate": 1.860975609756098e-05, "loss": 0.1006, "step": 17100 }, { "epoch": 0.20859756097560975, "grad_norm": 1.3457916975021362, "learning_rate": 1.8609349593495934e-05, "loss": 0.1218, "step": 17105 }, { "epoch": 0.20865853658536584, "grad_norm": 0.7511897087097168, "learning_rate": 1.8608943089430896e-05, "loss": 0.1255, "step": 17110 }, { "epoch": 0.20871951219512194, "grad_norm": 0.9159162044525146, "learning_rate": 1.8608536585365854e-05, "loss": 0.1253, "step": 17115 }, { "epoch": 0.20878048780487804, "grad_norm": 0.9993377327919006, "learning_rate": 1.8608130081300815e-05, "loss": 0.1215, "step": 17120 }, { "epoch": 0.20884146341463414, "grad_norm": 0.746184229850769, "learning_rate": 1.8607723577235773e-05, "loss": 0.1175, "step": 17125 }, { "epoch": 0.20890243902439024, "grad_norm": 0.7395259141921997, "learning_rate": 1.8607317073170735e-05, "loss": 0.101, "step": 17130 }, { "epoch": 0.20896341463414633, "grad_norm": 0.9616599082946777, "learning_rate": 1.8606910569105693e-05, "loss": 0.1153, "step": 17135 }, { "epoch": 0.20902439024390243, "grad_norm": 0.4690238833427429, "learning_rate": 1.860650406504065e-05, "loss": 0.1076, "step": 17140 }, { "epoch": 0.20908536585365853, "grad_norm": 1.0518850088119507, "learning_rate": 1.8606097560975613e-05, "loss": 0.1195, "step": 17145 }, { "epoch": 0.20914634146341463, "grad_norm": 0.5859444737434387, "learning_rate": 1.860569105691057e-05, "loss": 0.1535, "step": 17150 }, { "epoch": 0.20920731707317072, "grad_norm": 1.5632237195968628, "learning_rate": 1.8605284552845532e-05, "loss": 0.1167, "step": 17155 }, { "epoch": 0.20926829268292682, "grad_norm": 0.9065248966217041, "learning_rate": 1.860487804878049e-05, "loss": 0.1012, "step": 17160 }, { "epoch": 0.20932926829268292, "grad_norm": 0.43909990787506104, "learning_rate": 1.860447154471545e-05, "loss": 0.1014, "step": 17165 }, { "epoch": 0.20939024390243902, "grad_norm": 0.8929809331893921, "learning_rate": 1.8604065040650407e-05, "loss": 0.1122, "step": 17170 }, { "epoch": 0.2094512195121951, "grad_norm": 0.7582813501358032, "learning_rate": 1.8603658536585368e-05, "loss": 0.1262, "step": 17175 }, { "epoch": 0.2095121951219512, "grad_norm": 0.5795328617095947, "learning_rate": 1.8603252032520326e-05, "loss": 0.1167, "step": 17180 }, { "epoch": 0.2095731707317073, "grad_norm": 0.9619212746620178, "learning_rate": 1.8602845528455288e-05, "loss": 0.0839, "step": 17185 }, { "epoch": 0.2096341463414634, "grad_norm": 0.61222904920578, "learning_rate": 1.8602439024390246e-05, "loss": 0.1387, "step": 17190 }, { "epoch": 0.2096951219512195, "grad_norm": 1.1988765001296997, "learning_rate": 1.8602032520325204e-05, "loss": 0.1091, "step": 17195 }, { "epoch": 0.2097560975609756, "grad_norm": 0.9556024670600891, "learning_rate": 1.8601626016260162e-05, "loss": 0.1274, "step": 17200 }, { "epoch": 0.2098170731707317, "grad_norm": 1.3554338216781616, "learning_rate": 1.8601219512195124e-05, "loss": 0.1166, "step": 17205 }, { "epoch": 0.2098780487804878, "grad_norm": 0.531072735786438, "learning_rate": 1.8600813008130082e-05, "loss": 0.1213, "step": 17210 }, { "epoch": 0.2099390243902439, "grad_norm": 1.7347427606582642, "learning_rate": 1.8600406504065043e-05, "loss": 0.1106, "step": 17215 }, { "epoch": 0.21, "grad_norm": 0.7788551449775696, "learning_rate": 1.86e-05, "loss": 0.1037, "step": 17220 }, { "epoch": 0.2100609756097561, "grad_norm": 1.4390567541122437, "learning_rate": 1.859959349593496e-05, "loss": 0.0968, "step": 17225 }, { "epoch": 0.2101219512195122, "grad_norm": 1.0305250883102417, "learning_rate": 1.859918699186992e-05, "loss": 0.0921, "step": 17230 }, { "epoch": 0.21018292682926829, "grad_norm": 0.8083258867263794, "learning_rate": 1.859878048780488e-05, "loss": 0.1259, "step": 17235 }, { "epoch": 0.21024390243902438, "grad_norm": 0.936842143535614, "learning_rate": 1.859837398373984e-05, "loss": 0.1155, "step": 17240 }, { "epoch": 0.21030487804878048, "grad_norm": 0.99141526222229, "learning_rate": 1.85979674796748e-05, "loss": 0.1236, "step": 17245 }, { "epoch": 0.21036585365853658, "grad_norm": 1.2374941110610962, "learning_rate": 1.8597560975609757e-05, "loss": 0.1393, "step": 17250 }, { "epoch": 0.21042682926829268, "grad_norm": 0.6558562517166138, "learning_rate": 1.8597154471544715e-05, "loss": 0.1287, "step": 17255 }, { "epoch": 0.21048780487804877, "grad_norm": 2.0403239727020264, "learning_rate": 1.8596747967479677e-05, "loss": 0.1176, "step": 17260 }, { "epoch": 0.21054878048780487, "grad_norm": 1.8828489780426025, "learning_rate": 1.8596341463414635e-05, "loss": 0.1527, "step": 17265 }, { "epoch": 0.21060975609756097, "grad_norm": 1.015587329864502, "learning_rate": 1.8595934959349596e-05, "loss": 0.1217, "step": 17270 }, { "epoch": 0.21067073170731707, "grad_norm": 0.8607341647148132, "learning_rate": 1.8595528455284554e-05, "loss": 0.1319, "step": 17275 }, { "epoch": 0.21073170731707316, "grad_norm": 1.0904042720794678, "learning_rate": 1.8595121951219516e-05, "loss": 0.1028, "step": 17280 }, { "epoch": 0.21079268292682926, "grad_norm": 0.8644242882728577, "learning_rate": 1.859471544715447e-05, "loss": 0.1099, "step": 17285 }, { "epoch": 0.21085365853658536, "grad_norm": 0.6811634302139282, "learning_rate": 1.8594308943089432e-05, "loss": 0.0949, "step": 17290 }, { "epoch": 0.21091463414634146, "grad_norm": 1.2700437307357788, "learning_rate": 1.859390243902439e-05, "loss": 0.0876, "step": 17295 }, { "epoch": 0.21097560975609755, "grad_norm": 0.9061005115509033, "learning_rate": 1.8593495934959352e-05, "loss": 0.1267, "step": 17300 }, { "epoch": 0.21103658536585365, "grad_norm": 1.201905608177185, "learning_rate": 1.859308943089431e-05, "loss": 0.112, "step": 17305 }, { "epoch": 0.21109756097560975, "grad_norm": 0.7844575643539429, "learning_rate": 1.859268292682927e-05, "loss": 0.1731, "step": 17310 }, { "epoch": 0.21115853658536585, "grad_norm": 1.109991192817688, "learning_rate": 1.859227642276423e-05, "loss": 0.134, "step": 17315 }, { "epoch": 0.21121951219512194, "grad_norm": 1.5258066654205322, "learning_rate": 1.8591869918699188e-05, "loss": 0.0932, "step": 17320 }, { "epoch": 0.21128048780487804, "grad_norm": 0.7273459434509277, "learning_rate": 1.859146341463415e-05, "loss": 0.1148, "step": 17325 }, { "epoch": 0.21134146341463414, "grad_norm": 0.9091761708259583, "learning_rate": 1.8591056910569107e-05, "loss": 0.1225, "step": 17330 }, { "epoch": 0.21140243902439024, "grad_norm": 0.713960587978363, "learning_rate": 1.8590650406504066e-05, "loss": 0.1142, "step": 17335 }, { "epoch": 0.21146341463414633, "grad_norm": 0.7039732336997986, "learning_rate": 1.8590243902439027e-05, "loss": 0.1138, "step": 17340 }, { "epoch": 0.21152439024390243, "grad_norm": 1.2024883031845093, "learning_rate": 1.8589837398373985e-05, "loss": 0.1262, "step": 17345 }, { "epoch": 0.21158536585365853, "grad_norm": 0.6170496940612793, "learning_rate": 1.8589430894308943e-05, "loss": 0.0824, "step": 17350 }, { "epoch": 0.21164634146341463, "grad_norm": 0.8578436374664307, "learning_rate": 1.8589024390243905e-05, "loss": 0.0978, "step": 17355 }, { "epoch": 0.21170731707317073, "grad_norm": 0.5103603005409241, "learning_rate": 1.8588617886178863e-05, "loss": 0.1439, "step": 17360 }, { "epoch": 0.21176829268292682, "grad_norm": 1.310073733329773, "learning_rate": 1.8588211382113824e-05, "loss": 0.1209, "step": 17365 }, { "epoch": 0.21182926829268292, "grad_norm": 0.6847240328788757, "learning_rate": 1.8587804878048783e-05, "loss": 0.1097, "step": 17370 }, { "epoch": 0.21189024390243902, "grad_norm": 0.8230190277099609, "learning_rate": 1.858739837398374e-05, "loss": 0.122, "step": 17375 }, { "epoch": 0.21195121951219512, "grad_norm": 2.318385601043701, "learning_rate": 1.85869918699187e-05, "loss": 0.1374, "step": 17380 }, { "epoch": 0.2120121951219512, "grad_norm": 0.6926048994064331, "learning_rate": 1.858658536585366e-05, "loss": 0.1181, "step": 17385 }, { "epoch": 0.2120731707317073, "grad_norm": 2.0059401988983154, "learning_rate": 1.858617886178862e-05, "loss": 0.2436, "step": 17390 }, { "epoch": 0.2121341463414634, "grad_norm": 0.9634150862693787, "learning_rate": 1.858577235772358e-05, "loss": 0.1412, "step": 17395 }, { "epoch": 0.2121951219512195, "grad_norm": 0.7527242302894592, "learning_rate": 1.8585365853658538e-05, "loss": 0.1132, "step": 17400 }, { "epoch": 0.2122560975609756, "grad_norm": 1.0216306447982788, "learning_rate": 1.8584959349593496e-05, "loss": 0.1092, "step": 17405 }, { "epoch": 0.2123170731707317, "grad_norm": 0.6941344738006592, "learning_rate": 1.8584552845528458e-05, "loss": 0.1043, "step": 17410 }, { "epoch": 0.2123780487804878, "grad_norm": 0.9338459372520447, "learning_rate": 1.8584146341463416e-05, "loss": 0.1229, "step": 17415 }, { "epoch": 0.2124390243902439, "grad_norm": 0.8181416988372803, "learning_rate": 1.8583739837398377e-05, "loss": 0.1155, "step": 17420 }, { "epoch": 0.2125, "grad_norm": 0.84511399269104, "learning_rate": 1.8583333333333336e-05, "loss": 0.1399, "step": 17425 }, { "epoch": 0.2125609756097561, "grad_norm": 1.1546550989151, "learning_rate": 1.8582926829268294e-05, "loss": 0.1092, "step": 17430 }, { "epoch": 0.2126219512195122, "grad_norm": 0.9614156484603882, "learning_rate": 1.8582520325203252e-05, "loss": 0.1091, "step": 17435 }, { "epoch": 0.2126829268292683, "grad_norm": 0.7009599208831787, "learning_rate": 1.8582113821138213e-05, "loss": 0.1031, "step": 17440 }, { "epoch": 0.21274390243902438, "grad_norm": 0.5638642311096191, "learning_rate": 1.858170731707317e-05, "loss": 0.1025, "step": 17445 }, { "epoch": 0.21280487804878048, "grad_norm": 0.9994840621948242, "learning_rate": 1.8581300813008133e-05, "loss": 0.1352, "step": 17450 }, { "epoch": 0.21286585365853658, "grad_norm": 0.37466591596603394, "learning_rate": 1.858089430894309e-05, "loss": 0.0907, "step": 17455 }, { "epoch": 0.21292682926829268, "grad_norm": 0.5775498747825623, "learning_rate": 1.8580487804878053e-05, "loss": 0.1202, "step": 17460 }, { "epoch": 0.21298780487804878, "grad_norm": 0.8514886498451233, "learning_rate": 1.8580081300813007e-05, "loss": 0.1023, "step": 17465 }, { "epoch": 0.21304878048780487, "grad_norm": 0.8611307144165039, "learning_rate": 1.857967479674797e-05, "loss": 0.1171, "step": 17470 }, { "epoch": 0.21310975609756097, "grad_norm": 0.8538729548454285, "learning_rate": 1.8579268292682927e-05, "loss": 0.1219, "step": 17475 }, { "epoch": 0.21317073170731707, "grad_norm": 1.0551962852478027, "learning_rate": 1.857886178861789e-05, "loss": 0.1014, "step": 17480 }, { "epoch": 0.21323170731707317, "grad_norm": 0.5275969505310059, "learning_rate": 1.8578455284552847e-05, "loss": 0.0819, "step": 17485 }, { "epoch": 0.21329268292682926, "grad_norm": 0.6341127753257751, "learning_rate": 1.8578048780487808e-05, "loss": 0.0879, "step": 17490 }, { "epoch": 0.21335365853658536, "grad_norm": 2.3200385570526123, "learning_rate": 1.8577642276422766e-05, "loss": 0.1059, "step": 17495 }, { "epoch": 0.21341463414634146, "grad_norm": 0.8976805210113525, "learning_rate": 1.8577235772357724e-05, "loss": 0.1254, "step": 17500 }, { "epoch": 0.21347560975609756, "grad_norm": 0.7684794068336487, "learning_rate": 1.8576829268292686e-05, "loss": 0.097, "step": 17505 }, { "epoch": 0.21353658536585365, "grad_norm": 0.5824428200721741, "learning_rate": 1.8576422764227644e-05, "loss": 0.0946, "step": 17510 }, { "epoch": 0.21359756097560975, "grad_norm": 0.7452521324157715, "learning_rate": 1.8576016260162602e-05, "loss": 0.0967, "step": 17515 }, { "epoch": 0.21365853658536585, "grad_norm": 0.7870813608169556, "learning_rate": 1.8575609756097564e-05, "loss": 0.1106, "step": 17520 }, { "epoch": 0.21371951219512195, "grad_norm": 0.8471992611885071, "learning_rate": 1.8575203252032522e-05, "loss": 0.115, "step": 17525 }, { "epoch": 0.21378048780487804, "grad_norm": 0.5672903060913086, "learning_rate": 1.857479674796748e-05, "loss": 0.1124, "step": 17530 }, { "epoch": 0.21384146341463414, "grad_norm": 0.46028104424476624, "learning_rate": 1.857439024390244e-05, "loss": 0.1077, "step": 17535 }, { "epoch": 0.21390243902439024, "grad_norm": 1.9935694932937622, "learning_rate": 1.85739837398374e-05, "loss": 0.1054, "step": 17540 }, { "epoch": 0.21396341463414634, "grad_norm": 0.912311851978302, "learning_rate": 1.857357723577236e-05, "loss": 0.0999, "step": 17545 }, { "epoch": 0.21402439024390243, "grad_norm": 0.6329790949821472, "learning_rate": 1.857317073170732e-05, "loss": 0.0974, "step": 17550 }, { "epoch": 0.21408536585365853, "grad_norm": 1.144784927368164, "learning_rate": 1.8572764227642277e-05, "loss": 0.122, "step": 17555 }, { "epoch": 0.21414634146341463, "grad_norm": 1.6684695482254028, "learning_rate": 1.8572357723577235e-05, "loss": 0.1207, "step": 17560 }, { "epoch": 0.21420731707317073, "grad_norm": 0.9670343995094299, "learning_rate": 1.8571951219512197e-05, "loss": 0.1128, "step": 17565 }, { "epoch": 0.21426829268292683, "grad_norm": 0.8871945142745972, "learning_rate": 1.8571544715447155e-05, "loss": 0.1173, "step": 17570 }, { "epoch": 0.21432926829268292, "grad_norm": 0.6913628578186035, "learning_rate": 1.8571138211382117e-05, "loss": 0.1326, "step": 17575 }, { "epoch": 0.21439024390243902, "grad_norm": 1.4240281581878662, "learning_rate": 1.8570731707317075e-05, "loss": 0.1507, "step": 17580 }, { "epoch": 0.21445121951219512, "grad_norm": 1.3392609357833862, "learning_rate": 1.8570325203252033e-05, "loss": 0.1311, "step": 17585 }, { "epoch": 0.21451219512195122, "grad_norm": 0.8771713376045227, "learning_rate": 1.8569918699186994e-05, "loss": 0.093, "step": 17590 }, { "epoch": 0.2145731707317073, "grad_norm": 0.7847951650619507, "learning_rate": 1.8569512195121953e-05, "loss": 0.1288, "step": 17595 }, { "epoch": 0.2146341463414634, "grad_norm": 0.8222593069076538, "learning_rate": 1.856910569105691e-05, "loss": 0.1016, "step": 17600 }, { "epoch": 0.2146951219512195, "grad_norm": 1.2582112550735474, "learning_rate": 1.8568699186991872e-05, "loss": 0.124, "step": 17605 }, { "epoch": 0.2147560975609756, "grad_norm": 0.8029533624649048, "learning_rate": 1.856829268292683e-05, "loss": 0.1368, "step": 17610 }, { "epoch": 0.2148170731707317, "grad_norm": 1.3237552642822266, "learning_rate": 1.856788617886179e-05, "loss": 0.0952, "step": 17615 }, { "epoch": 0.2148780487804878, "grad_norm": 0.5167667269706726, "learning_rate": 1.856747967479675e-05, "loss": 0.1081, "step": 17620 }, { "epoch": 0.2149390243902439, "grad_norm": 1.1514670848846436, "learning_rate": 1.8567073170731708e-05, "loss": 0.1176, "step": 17625 }, { "epoch": 0.215, "grad_norm": 1.3830585479736328, "learning_rate": 1.856666666666667e-05, "loss": 0.1876, "step": 17630 }, { "epoch": 0.2150609756097561, "grad_norm": 0.539752185344696, "learning_rate": 1.8566260162601628e-05, "loss": 0.082, "step": 17635 }, { "epoch": 0.2151219512195122, "grad_norm": 1.0028849840164185, "learning_rate": 1.856585365853659e-05, "loss": 0.1134, "step": 17640 }, { "epoch": 0.2151829268292683, "grad_norm": 0.7138253450393677, "learning_rate": 1.8565447154471544e-05, "loss": 0.1003, "step": 17645 }, { "epoch": 0.2152439024390244, "grad_norm": 0.620090126991272, "learning_rate": 1.8565040650406506e-05, "loss": 0.0905, "step": 17650 }, { "epoch": 0.21530487804878048, "grad_norm": 0.6835846900939941, "learning_rate": 1.8564634146341464e-05, "loss": 0.0855, "step": 17655 }, { "epoch": 0.21536585365853658, "grad_norm": 2.0737104415893555, "learning_rate": 1.8564227642276425e-05, "loss": 0.1316, "step": 17660 }, { "epoch": 0.21542682926829268, "grad_norm": 0.7796688675880432, "learning_rate": 1.8563821138211383e-05, "loss": 0.1223, "step": 17665 }, { "epoch": 0.21548780487804878, "grad_norm": 1.0672249794006348, "learning_rate": 1.8563414634146345e-05, "loss": 0.1088, "step": 17670 }, { "epoch": 0.21554878048780488, "grad_norm": 1.193770408630371, "learning_rate": 1.8563008130081303e-05, "loss": 0.1123, "step": 17675 }, { "epoch": 0.21560975609756097, "grad_norm": 1.5589401721954346, "learning_rate": 1.856260162601626e-05, "loss": 0.1087, "step": 17680 }, { "epoch": 0.21567073170731707, "grad_norm": 1.739466667175293, "learning_rate": 1.856219512195122e-05, "loss": 0.1236, "step": 17685 }, { "epoch": 0.21573170731707317, "grad_norm": 0.6526850461959839, "learning_rate": 1.856178861788618e-05, "loss": 0.1168, "step": 17690 }, { "epoch": 0.21579268292682927, "grad_norm": 1.199857473373413, "learning_rate": 1.856138211382114e-05, "loss": 0.1533, "step": 17695 }, { "epoch": 0.21585365853658536, "grad_norm": 0.8068118095397949, "learning_rate": 1.85609756097561e-05, "loss": 0.1041, "step": 17700 }, { "epoch": 0.21591463414634146, "grad_norm": 1.0958292484283447, "learning_rate": 1.856056910569106e-05, "loss": 0.111, "step": 17705 }, { "epoch": 0.21597560975609756, "grad_norm": 1.3625564575195312, "learning_rate": 1.8560162601626017e-05, "loss": 0.1162, "step": 17710 }, { "epoch": 0.21603658536585366, "grad_norm": 1.4715099334716797, "learning_rate": 1.8559756097560978e-05, "loss": 0.1746, "step": 17715 }, { "epoch": 0.21609756097560975, "grad_norm": 0.8012833595275879, "learning_rate": 1.8559349593495936e-05, "loss": 0.158, "step": 17720 }, { "epoch": 0.21615853658536585, "grad_norm": 0.5704032182693481, "learning_rate": 1.8558943089430898e-05, "loss": 0.1092, "step": 17725 }, { "epoch": 0.21621951219512195, "grad_norm": 0.9971446394920349, "learning_rate": 1.8558536585365856e-05, "loss": 0.1153, "step": 17730 }, { "epoch": 0.21628048780487805, "grad_norm": 0.5358450412750244, "learning_rate": 1.8558130081300814e-05, "loss": 0.1312, "step": 17735 }, { "epoch": 0.21634146341463414, "grad_norm": 9.582178115844727, "learning_rate": 1.8557723577235772e-05, "loss": 0.1349, "step": 17740 }, { "epoch": 0.21640243902439024, "grad_norm": 1.5897867679595947, "learning_rate": 1.8557317073170734e-05, "loss": 0.1285, "step": 17745 }, { "epoch": 0.21646341463414634, "grad_norm": 1.0329091548919678, "learning_rate": 1.8556910569105692e-05, "loss": 0.1296, "step": 17750 }, { "epoch": 0.21652439024390244, "grad_norm": 0.7896464467048645, "learning_rate": 1.8556504065040653e-05, "loss": 0.129, "step": 17755 }, { "epoch": 0.21658536585365853, "grad_norm": 0.8020153045654297, "learning_rate": 1.855609756097561e-05, "loss": 0.1371, "step": 17760 }, { "epoch": 0.21664634146341463, "grad_norm": 0.6009228229522705, "learning_rate": 1.855569105691057e-05, "loss": 0.1063, "step": 17765 }, { "epoch": 0.21670731707317073, "grad_norm": 0.5687796473503113, "learning_rate": 1.855528455284553e-05, "loss": 0.098, "step": 17770 }, { "epoch": 0.21676829268292683, "grad_norm": 0.4952316880226135, "learning_rate": 1.855487804878049e-05, "loss": 0.1231, "step": 17775 }, { "epoch": 0.21682926829268293, "grad_norm": 0.5848963856697083, "learning_rate": 1.8554471544715447e-05, "loss": 0.1176, "step": 17780 }, { "epoch": 0.21689024390243902, "grad_norm": 1.7021007537841797, "learning_rate": 1.855406504065041e-05, "loss": 0.1061, "step": 17785 }, { "epoch": 0.21695121951219512, "grad_norm": 0.6210697293281555, "learning_rate": 1.8553658536585367e-05, "loss": 0.0814, "step": 17790 }, { "epoch": 0.21701219512195122, "grad_norm": 0.8020801544189453, "learning_rate": 1.8553252032520325e-05, "loss": 0.1112, "step": 17795 }, { "epoch": 0.21707317073170732, "grad_norm": 1.0316202640533447, "learning_rate": 1.8552845528455287e-05, "loss": 0.1183, "step": 17800 }, { "epoch": 0.2171341463414634, "grad_norm": 1.2223379611968994, "learning_rate": 1.8552439024390245e-05, "loss": 0.1529, "step": 17805 }, { "epoch": 0.2171951219512195, "grad_norm": 1.0802184343338013, "learning_rate": 1.8552032520325206e-05, "loss": 0.1383, "step": 17810 }, { "epoch": 0.2172560975609756, "grad_norm": 0.8276926279067993, "learning_rate": 1.8551626016260164e-05, "loss": 0.1369, "step": 17815 }, { "epoch": 0.2173170731707317, "grad_norm": 0.9556810855865479, "learning_rate": 1.8551219512195126e-05, "loss": 0.0762, "step": 17820 }, { "epoch": 0.2173780487804878, "grad_norm": 0.728021502494812, "learning_rate": 1.855081300813008e-05, "loss": 0.0933, "step": 17825 }, { "epoch": 0.2174390243902439, "grad_norm": 0.5016475915908813, "learning_rate": 1.8550406504065042e-05, "loss": 0.0988, "step": 17830 }, { "epoch": 0.2175, "grad_norm": 0.615200400352478, "learning_rate": 1.855e-05, "loss": 0.1365, "step": 17835 }, { "epoch": 0.2175609756097561, "grad_norm": 0.4586281180381775, "learning_rate": 1.8549593495934962e-05, "loss": 0.0996, "step": 17840 }, { "epoch": 0.2176219512195122, "grad_norm": 0.870453417301178, "learning_rate": 1.854918699186992e-05, "loss": 0.1223, "step": 17845 }, { "epoch": 0.2176829268292683, "grad_norm": 1.339739203453064, "learning_rate": 1.854878048780488e-05, "loss": 0.1285, "step": 17850 }, { "epoch": 0.2177439024390244, "grad_norm": 1.0838536024093628, "learning_rate": 1.854837398373984e-05, "loss": 0.1026, "step": 17855 }, { "epoch": 0.2178048780487805, "grad_norm": 1.529125690460205, "learning_rate": 1.8547967479674798e-05, "loss": 0.1674, "step": 17860 }, { "epoch": 0.21786585365853658, "grad_norm": 0.6734142899513245, "learning_rate": 1.8547560975609756e-05, "loss": 0.134, "step": 17865 }, { "epoch": 0.21792682926829268, "grad_norm": 0.8856792449951172, "learning_rate": 1.8547154471544717e-05, "loss": 0.1193, "step": 17870 }, { "epoch": 0.21798780487804878, "grad_norm": 0.7307069897651672, "learning_rate": 1.8546747967479675e-05, "loss": 0.0828, "step": 17875 }, { "epoch": 0.21804878048780488, "grad_norm": 0.6762153506278992, "learning_rate": 1.8546341463414637e-05, "loss": 0.1361, "step": 17880 }, { "epoch": 0.21810975609756098, "grad_norm": 0.6547636985778809, "learning_rate": 1.8545934959349595e-05, "loss": 0.0843, "step": 17885 }, { "epoch": 0.21817073170731707, "grad_norm": 0.6992166042327881, "learning_rate": 1.8545528455284553e-05, "loss": 0.1088, "step": 17890 }, { "epoch": 0.21823170731707317, "grad_norm": 0.5709012150764465, "learning_rate": 1.8545121951219515e-05, "loss": 0.078, "step": 17895 }, { "epoch": 0.21829268292682927, "grad_norm": 0.7042290568351746, "learning_rate": 1.8544715447154473e-05, "loss": 0.1073, "step": 17900 }, { "epoch": 0.21835365853658537, "grad_norm": 0.6211020946502686, "learning_rate": 1.8544308943089434e-05, "loss": 0.152, "step": 17905 }, { "epoch": 0.21841463414634146, "grad_norm": 0.5790967345237732, "learning_rate": 1.8543902439024393e-05, "loss": 0.0969, "step": 17910 }, { "epoch": 0.21847560975609756, "grad_norm": 1.1536164283752441, "learning_rate": 1.854349593495935e-05, "loss": 0.1247, "step": 17915 }, { "epoch": 0.21853658536585366, "grad_norm": 0.8289320468902588, "learning_rate": 1.854308943089431e-05, "loss": 0.094, "step": 17920 }, { "epoch": 0.21859756097560976, "grad_norm": 1.3156592845916748, "learning_rate": 1.854268292682927e-05, "loss": 0.136, "step": 17925 }, { "epoch": 0.21865853658536585, "grad_norm": 1.1504398584365845, "learning_rate": 1.854227642276423e-05, "loss": 0.1434, "step": 17930 }, { "epoch": 0.21871951219512195, "grad_norm": 1.257907509803772, "learning_rate": 1.854186991869919e-05, "loss": 0.1482, "step": 17935 }, { "epoch": 0.21878048780487805, "grad_norm": 0.9934775233268738, "learning_rate": 1.8541463414634148e-05, "loss": 0.1432, "step": 17940 }, { "epoch": 0.21884146341463415, "grad_norm": 0.8831115365028381, "learning_rate": 1.8541056910569106e-05, "loss": 0.1268, "step": 17945 }, { "epoch": 0.21890243902439024, "grad_norm": 2.983616590499878, "learning_rate": 1.8540650406504064e-05, "loss": 0.0905, "step": 17950 }, { "epoch": 0.21896341463414634, "grad_norm": 0.9478410482406616, "learning_rate": 1.8540243902439026e-05, "loss": 0.1209, "step": 17955 }, { "epoch": 0.21902439024390244, "grad_norm": 0.7340155243873596, "learning_rate": 1.8539837398373984e-05, "loss": 0.0883, "step": 17960 }, { "epoch": 0.21908536585365854, "grad_norm": 1.15873122215271, "learning_rate": 1.8539430894308945e-05, "loss": 0.1102, "step": 17965 }, { "epoch": 0.21914634146341463, "grad_norm": 0.846132755279541, "learning_rate": 1.8539024390243904e-05, "loss": 0.1099, "step": 17970 }, { "epoch": 0.21920731707317073, "grad_norm": 0.7948772311210632, "learning_rate": 1.8538617886178862e-05, "loss": 0.1026, "step": 17975 }, { "epoch": 0.21926829268292683, "grad_norm": 0.6289716362953186, "learning_rate": 1.8538211382113823e-05, "loss": 0.1394, "step": 17980 }, { "epoch": 0.21932926829268293, "grad_norm": 0.851736843585968, "learning_rate": 1.853780487804878e-05, "loss": 0.1464, "step": 17985 }, { "epoch": 0.21939024390243902, "grad_norm": 0.652935266494751, "learning_rate": 1.8537398373983743e-05, "loss": 0.126, "step": 17990 }, { "epoch": 0.21945121951219512, "grad_norm": 0.785912036895752, "learning_rate": 1.85369918699187e-05, "loss": 0.1361, "step": 17995 }, { "epoch": 0.21951219512195122, "grad_norm": 1.6493664979934692, "learning_rate": 1.8536585365853663e-05, "loss": 0.1276, "step": 18000 }, { "epoch": 0.21957317073170732, "grad_norm": 1.2319122552871704, "learning_rate": 1.8536178861788617e-05, "loss": 0.123, "step": 18005 }, { "epoch": 0.21963414634146342, "grad_norm": 0.5673662424087524, "learning_rate": 1.853577235772358e-05, "loss": 0.1121, "step": 18010 }, { "epoch": 0.2196951219512195, "grad_norm": 0.841016411781311, "learning_rate": 1.8535365853658537e-05, "loss": 0.0933, "step": 18015 }, { "epoch": 0.2197560975609756, "grad_norm": 0.6461983919143677, "learning_rate": 1.85349593495935e-05, "loss": 0.101, "step": 18020 }, { "epoch": 0.2198170731707317, "grad_norm": 1.2678700685501099, "learning_rate": 1.8534552845528457e-05, "loss": 0.1015, "step": 18025 }, { "epoch": 0.2198780487804878, "grad_norm": 0.8573291897773743, "learning_rate": 1.8534146341463418e-05, "loss": 0.0918, "step": 18030 }, { "epoch": 0.2199390243902439, "grad_norm": 0.45371562242507935, "learning_rate": 1.8533739837398376e-05, "loss": 0.118, "step": 18035 }, { "epoch": 0.22, "grad_norm": 0.8094324469566345, "learning_rate": 1.8533333333333334e-05, "loss": 0.1125, "step": 18040 }, { "epoch": 0.2200609756097561, "grad_norm": 0.6501901745796204, "learning_rate": 1.8532926829268292e-05, "loss": 0.1197, "step": 18045 }, { "epoch": 0.2201219512195122, "grad_norm": 0.5088422894477844, "learning_rate": 1.8532520325203254e-05, "loss": 0.0988, "step": 18050 }, { "epoch": 0.2201829268292683, "grad_norm": 1.0311105251312256, "learning_rate": 1.8532113821138212e-05, "loss": 0.1396, "step": 18055 }, { "epoch": 0.2202439024390244, "grad_norm": 1.5653049945831299, "learning_rate": 1.8531707317073174e-05, "loss": 0.1644, "step": 18060 }, { "epoch": 0.2203048780487805, "grad_norm": 0.7922583222389221, "learning_rate": 1.8531300813008132e-05, "loss": 0.1399, "step": 18065 }, { "epoch": 0.2203658536585366, "grad_norm": 2.260122060775757, "learning_rate": 1.853089430894309e-05, "loss": 0.1235, "step": 18070 }, { "epoch": 0.22042682926829268, "grad_norm": 0.6740506887435913, "learning_rate": 1.853048780487805e-05, "loss": 0.0935, "step": 18075 }, { "epoch": 0.22048780487804878, "grad_norm": 0.6606444120407104, "learning_rate": 1.853008130081301e-05, "loss": 0.1374, "step": 18080 }, { "epoch": 0.22054878048780488, "grad_norm": 0.6783804297447205, "learning_rate": 1.852967479674797e-05, "loss": 0.1291, "step": 18085 }, { "epoch": 0.22060975609756098, "grad_norm": 2.130450487136841, "learning_rate": 1.852926829268293e-05, "loss": 0.1187, "step": 18090 }, { "epoch": 0.22067073170731707, "grad_norm": 1.0464811325073242, "learning_rate": 1.8528861788617887e-05, "loss": 0.1352, "step": 18095 }, { "epoch": 0.22073170731707317, "grad_norm": 1.141851782798767, "learning_rate": 1.8528455284552845e-05, "loss": 0.1127, "step": 18100 }, { "epoch": 0.22079268292682927, "grad_norm": 0.9818429946899414, "learning_rate": 1.8528048780487807e-05, "loss": 0.1049, "step": 18105 }, { "epoch": 0.22085365853658537, "grad_norm": 0.8297792077064514, "learning_rate": 1.8527642276422765e-05, "loss": 0.1037, "step": 18110 }, { "epoch": 0.22091463414634147, "grad_norm": 0.7099448442459106, "learning_rate": 1.8527235772357727e-05, "loss": 0.0975, "step": 18115 }, { "epoch": 0.22097560975609756, "grad_norm": 0.8057444095611572, "learning_rate": 1.8526829268292685e-05, "loss": 0.1245, "step": 18120 }, { "epoch": 0.22103658536585366, "grad_norm": 1.3645519018173218, "learning_rate": 1.8526422764227643e-05, "loss": 0.1506, "step": 18125 }, { "epoch": 0.22109756097560976, "grad_norm": 1.4379690885543823, "learning_rate": 1.85260162601626e-05, "loss": 0.131, "step": 18130 }, { "epoch": 0.22115853658536586, "grad_norm": 1.1469347476959229, "learning_rate": 1.8525609756097562e-05, "loss": 0.1475, "step": 18135 }, { "epoch": 0.22121951219512195, "grad_norm": 1.1484417915344238, "learning_rate": 1.852520325203252e-05, "loss": 0.1555, "step": 18140 }, { "epoch": 0.22128048780487805, "grad_norm": 0.5680811405181885, "learning_rate": 1.8524796747967482e-05, "loss": 0.1245, "step": 18145 }, { "epoch": 0.22134146341463415, "grad_norm": 0.9517232775688171, "learning_rate": 1.852439024390244e-05, "loss": 0.0977, "step": 18150 }, { "epoch": 0.22140243902439025, "grad_norm": 0.8358845710754395, "learning_rate": 1.85239837398374e-05, "loss": 0.0912, "step": 18155 }, { "epoch": 0.22146341463414634, "grad_norm": 0.7291794419288635, "learning_rate": 1.852357723577236e-05, "loss": 0.113, "step": 18160 }, { "epoch": 0.22152439024390244, "grad_norm": 0.521431028842926, "learning_rate": 1.8523170731707318e-05, "loss": 0.1147, "step": 18165 }, { "epoch": 0.22158536585365854, "grad_norm": 0.6717706918716431, "learning_rate": 1.852276422764228e-05, "loss": 0.1117, "step": 18170 }, { "epoch": 0.22164634146341464, "grad_norm": 0.7941173911094666, "learning_rate": 1.8522357723577238e-05, "loss": 0.0975, "step": 18175 }, { "epoch": 0.22170731707317073, "grad_norm": 0.6425157189369202, "learning_rate": 1.85219512195122e-05, "loss": 0.1216, "step": 18180 }, { "epoch": 0.22176829268292683, "grad_norm": 0.9231148362159729, "learning_rate": 1.8521544715447154e-05, "loss": 0.1175, "step": 18185 }, { "epoch": 0.22182926829268293, "grad_norm": 0.9536622166633606, "learning_rate": 1.8521138211382115e-05, "loss": 0.0866, "step": 18190 }, { "epoch": 0.22189024390243903, "grad_norm": 0.9295595288276672, "learning_rate": 1.8520731707317074e-05, "loss": 0.0936, "step": 18195 }, { "epoch": 0.22195121951219512, "grad_norm": 0.3984490931034088, "learning_rate": 1.8520325203252035e-05, "loss": 0.0733, "step": 18200 }, { "epoch": 0.22201219512195122, "grad_norm": 0.8170921802520752, "learning_rate": 1.8519918699186993e-05, "loss": 0.1127, "step": 18205 }, { "epoch": 0.22207317073170732, "grad_norm": 0.5473208427429199, "learning_rate": 1.8519512195121955e-05, "loss": 0.0877, "step": 18210 }, { "epoch": 0.22213414634146342, "grad_norm": 0.735471248626709, "learning_rate": 1.851910569105691e-05, "loss": 0.1105, "step": 18215 }, { "epoch": 0.22219512195121952, "grad_norm": 0.8182423710823059, "learning_rate": 1.851869918699187e-05, "loss": 0.1116, "step": 18220 }, { "epoch": 0.2222560975609756, "grad_norm": 1.0201301574707031, "learning_rate": 1.851829268292683e-05, "loss": 0.146, "step": 18225 }, { "epoch": 0.2223170731707317, "grad_norm": 1.008037805557251, "learning_rate": 1.851788617886179e-05, "loss": 0.1442, "step": 18230 }, { "epoch": 0.2223780487804878, "grad_norm": 0.6411516070365906, "learning_rate": 1.851747967479675e-05, "loss": 0.1109, "step": 18235 }, { "epoch": 0.2224390243902439, "grad_norm": 1.1694488525390625, "learning_rate": 1.851707317073171e-05, "loss": 0.1147, "step": 18240 }, { "epoch": 0.2225, "grad_norm": 0.771411657333374, "learning_rate": 1.851666666666667e-05, "loss": 0.1159, "step": 18245 }, { "epoch": 0.2225609756097561, "grad_norm": 1.1034042835235596, "learning_rate": 1.8516260162601627e-05, "loss": 0.1071, "step": 18250 }, { "epoch": 0.2226219512195122, "grad_norm": 0.4836752712726593, "learning_rate": 1.8515853658536588e-05, "loss": 0.1077, "step": 18255 }, { "epoch": 0.2226829268292683, "grad_norm": 1.3456753492355347, "learning_rate": 1.8515447154471546e-05, "loss": 0.1041, "step": 18260 }, { "epoch": 0.2227439024390244, "grad_norm": 1.2953208684921265, "learning_rate": 1.8515040650406508e-05, "loss": 0.1115, "step": 18265 }, { "epoch": 0.2228048780487805, "grad_norm": 1.0194065570831299, "learning_rate": 1.8514634146341466e-05, "loss": 0.1193, "step": 18270 }, { "epoch": 0.2228658536585366, "grad_norm": 0.8142927885055542, "learning_rate": 1.8514227642276424e-05, "loss": 0.1, "step": 18275 }, { "epoch": 0.2229268292682927, "grad_norm": 0.6245907545089722, "learning_rate": 1.8513821138211382e-05, "loss": 0.1618, "step": 18280 }, { "epoch": 0.22298780487804878, "grad_norm": 0.703472375869751, "learning_rate": 1.8513414634146344e-05, "loss": 0.1377, "step": 18285 }, { "epoch": 0.22304878048780488, "grad_norm": 1.01594078540802, "learning_rate": 1.8513008130081302e-05, "loss": 0.1133, "step": 18290 }, { "epoch": 0.22310975609756098, "grad_norm": 0.6865525841712952, "learning_rate": 1.8512601626016263e-05, "loss": 0.092, "step": 18295 }, { "epoch": 0.22317073170731708, "grad_norm": 1.0602823495864868, "learning_rate": 1.851219512195122e-05, "loss": 0.108, "step": 18300 }, { "epoch": 0.22323170731707317, "grad_norm": 0.8955696225166321, "learning_rate": 1.851178861788618e-05, "loss": 0.099, "step": 18305 }, { "epoch": 0.22329268292682927, "grad_norm": 0.6118608713150024, "learning_rate": 1.8511382113821138e-05, "loss": 0.0983, "step": 18310 }, { "epoch": 0.22335365853658537, "grad_norm": 0.6518279314041138, "learning_rate": 1.85109756097561e-05, "loss": 0.0954, "step": 18315 }, { "epoch": 0.22341463414634147, "grad_norm": 1.3504191637039185, "learning_rate": 1.8510569105691057e-05, "loss": 0.0917, "step": 18320 }, { "epoch": 0.22347560975609757, "grad_norm": 0.8512226343154907, "learning_rate": 1.851016260162602e-05, "loss": 0.1152, "step": 18325 }, { "epoch": 0.22353658536585366, "grad_norm": 0.9490904211997986, "learning_rate": 1.8509756097560977e-05, "loss": 0.132, "step": 18330 }, { "epoch": 0.22359756097560976, "grad_norm": 0.854185938835144, "learning_rate": 1.8509349593495935e-05, "loss": 0.0992, "step": 18335 }, { "epoch": 0.22365853658536586, "grad_norm": 0.9842422008514404, "learning_rate": 1.8508943089430897e-05, "loss": 0.0904, "step": 18340 }, { "epoch": 0.22371951219512196, "grad_norm": 0.7965452075004578, "learning_rate": 1.8508536585365855e-05, "loss": 0.0817, "step": 18345 }, { "epoch": 0.22378048780487805, "grad_norm": 0.4040917158126831, "learning_rate": 1.8508130081300816e-05, "loss": 0.1066, "step": 18350 }, { "epoch": 0.22384146341463415, "grad_norm": 0.9421716928482056, "learning_rate": 1.8507723577235774e-05, "loss": 0.1154, "step": 18355 }, { "epoch": 0.22390243902439025, "grad_norm": 0.6525715589523315, "learning_rate": 1.8507317073170732e-05, "loss": 0.088, "step": 18360 }, { "epoch": 0.22396341463414635, "grad_norm": 0.8354149460792542, "learning_rate": 1.850691056910569e-05, "loss": 0.1127, "step": 18365 }, { "epoch": 0.22402439024390244, "grad_norm": 0.821648895740509, "learning_rate": 1.8506504065040652e-05, "loss": 0.0935, "step": 18370 }, { "epoch": 0.22408536585365854, "grad_norm": 0.8117032647132874, "learning_rate": 1.850609756097561e-05, "loss": 0.1027, "step": 18375 }, { "epoch": 0.22414634146341464, "grad_norm": 0.7418868541717529, "learning_rate": 1.8505691056910572e-05, "loss": 0.1508, "step": 18380 }, { "epoch": 0.22420731707317074, "grad_norm": 0.6127222776412964, "learning_rate": 1.850528455284553e-05, "loss": 0.1121, "step": 18385 }, { "epoch": 0.22426829268292683, "grad_norm": 0.8262706995010376, "learning_rate": 1.850487804878049e-05, "loss": 0.1438, "step": 18390 }, { "epoch": 0.22432926829268293, "grad_norm": 0.9001903533935547, "learning_rate": 1.8504471544715446e-05, "loss": 0.1, "step": 18395 }, { "epoch": 0.22439024390243903, "grad_norm": 1.1750346422195435, "learning_rate": 1.8504065040650408e-05, "loss": 0.1739, "step": 18400 }, { "epoch": 0.22445121951219513, "grad_norm": 2.3189949989318848, "learning_rate": 1.8503658536585366e-05, "loss": 0.0968, "step": 18405 }, { "epoch": 0.22451219512195122, "grad_norm": 0.6573166847229004, "learning_rate": 1.8503252032520327e-05, "loss": 0.0988, "step": 18410 }, { "epoch": 0.22457317073170732, "grad_norm": 0.7589108347892761, "learning_rate": 1.8502845528455285e-05, "loss": 0.1107, "step": 18415 }, { "epoch": 0.22463414634146342, "grad_norm": 1.2210280895233154, "learning_rate": 1.8502439024390247e-05, "loss": 0.1387, "step": 18420 }, { "epoch": 0.22469512195121952, "grad_norm": 1.467637300491333, "learning_rate": 1.8502032520325205e-05, "loss": 0.0985, "step": 18425 }, { "epoch": 0.22475609756097562, "grad_norm": 0.7020396590232849, "learning_rate": 1.8501626016260163e-05, "loss": 0.1502, "step": 18430 }, { "epoch": 0.2248170731707317, "grad_norm": 0.6876308917999268, "learning_rate": 1.8501219512195125e-05, "loss": 0.0954, "step": 18435 }, { "epoch": 0.2248780487804878, "grad_norm": 0.9120550751686096, "learning_rate": 1.8500813008130083e-05, "loss": 0.0961, "step": 18440 }, { "epoch": 0.2249390243902439, "grad_norm": 1.0052953958511353, "learning_rate": 1.8500406504065044e-05, "loss": 0.1167, "step": 18445 }, { "epoch": 0.225, "grad_norm": 1.1842340230941772, "learning_rate": 1.8500000000000002e-05, "loss": 0.1106, "step": 18450 }, { "epoch": 0.2250609756097561, "grad_norm": 0.726174533367157, "learning_rate": 1.849959349593496e-05, "loss": 0.1039, "step": 18455 }, { "epoch": 0.2251219512195122, "grad_norm": 0.980161726474762, "learning_rate": 1.849918699186992e-05, "loss": 0.1536, "step": 18460 }, { "epoch": 0.2251829268292683, "grad_norm": 2.151209592819214, "learning_rate": 1.849878048780488e-05, "loss": 0.0946, "step": 18465 }, { "epoch": 0.2252439024390244, "grad_norm": 1.2246164083480835, "learning_rate": 1.849837398373984e-05, "loss": 0.1253, "step": 18470 }, { "epoch": 0.2253048780487805, "grad_norm": 0.9144680500030518, "learning_rate": 1.84979674796748e-05, "loss": 0.1487, "step": 18475 }, { "epoch": 0.2253658536585366, "grad_norm": 1.3282735347747803, "learning_rate": 1.8497560975609758e-05, "loss": 0.1834, "step": 18480 }, { "epoch": 0.2254268292682927, "grad_norm": 0.8320720195770264, "learning_rate": 1.8497154471544716e-05, "loss": 0.0888, "step": 18485 }, { "epoch": 0.2254878048780488, "grad_norm": 0.7964045405387878, "learning_rate": 1.8496747967479674e-05, "loss": 0.1322, "step": 18490 }, { "epoch": 0.22554878048780488, "grad_norm": 2.1709256172180176, "learning_rate": 1.8496341463414636e-05, "loss": 0.133, "step": 18495 }, { "epoch": 0.22560975609756098, "grad_norm": 1.2790700197219849, "learning_rate": 1.8495934959349594e-05, "loss": 0.0902, "step": 18500 }, { "epoch": 0.22567073170731708, "grad_norm": 2.060621500015259, "learning_rate": 1.8495528455284555e-05, "loss": 0.1233, "step": 18505 }, { "epoch": 0.22573170731707318, "grad_norm": 0.7738952040672302, "learning_rate": 1.8495121951219514e-05, "loss": 0.1298, "step": 18510 }, { "epoch": 0.22579268292682927, "grad_norm": 1.4944820404052734, "learning_rate": 1.849471544715447e-05, "loss": 0.1277, "step": 18515 }, { "epoch": 0.22585365853658537, "grad_norm": 0.5279708504676819, "learning_rate": 1.8494308943089433e-05, "loss": 0.1, "step": 18520 }, { "epoch": 0.22591463414634147, "grad_norm": 0.5551148653030396, "learning_rate": 1.849390243902439e-05, "loss": 0.2058, "step": 18525 }, { "epoch": 0.22597560975609757, "grad_norm": 0.7709470987319946, "learning_rate": 1.8493495934959353e-05, "loss": 0.1204, "step": 18530 }, { "epoch": 0.22603658536585367, "grad_norm": 0.9020312428474426, "learning_rate": 1.849308943089431e-05, "loss": 0.1418, "step": 18535 }, { "epoch": 0.22609756097560976, "grad_norm": 0.8408060073852539, "learning_rate": 1.849268292682927e-05, "loss": 0.095, "step": 18540 }, { "epoch": 0.22615853658536586, "grad_norm": 0.7750778794288635, "learning_rate": 1.8492276422764227e-05, "loss": 0.1037, "step": 18545 }, { "epoch": 0.22621951219512196, "grad_norm": 0.9009009003639221, "learning_rate": 1.849186991869919e-05, "loss": 0.0725, "step": 18550 }, { "epoch": 0.22628048780487806, "grad_norm": 0.5729185342788696, "learning_rate": 1.8491463414634147e-05, "loss": 0.1205, "step": 18555 }, { "epoch": 0.22634146341463415, "grad_norm": 0.5281723141670227, "learning_rate": 1.849105691056911e-05, "loss": 0.121, "step": 18560 }, { "epoch": 0.22640243902439025, "grad_norm": 1.7148009538650513, "learning_rate": 1.8490650406504066e-05, "loss": 0.112, "step": 18565 }, { "epoch": 0.22646341463414635, "grad_norm": 26.088764190673828, "learning_rate": 1.8490243902439028e-05, "loss": 0.1375, "step": 18570 }, { "epoch": 0.22652439024390245, "grad_norm": 1.485224962234497, "learning_rate": 1.8489837398373983e-05, "loss": 0.15, "step": 18575 }, { "epoch": 0.22658536585365854, "grad_norm": 1.586064100265503, "learning_rate": 1.8489430894308944e-05, "loss": 0.1312, "step": 18580 }, { "epoch": 0.22664634146341464, "grad_norm": 0.8674637079238892, "learning_rate": 1.8489024390243902e-05, "loss": 0.0984, "step": 18585 }, { "epoch": 0.22670731707317074, "grad_norm": 1.3971772193908691, "learning_rate": 1.8488617886178864e-05, "loss": 0.1384, "step": 18590 }, { "epoch": 0.22676829268292684, "grad_norm": 0.9371227622032166, "learning_rate": 1.8488211382113822e-05, "loss": 0.1045, "step": 18595 }, { "epoch": 0.22682926829268293, "grad_norm": 1.1090720891952515, "learning_rate": 1.8487804878048784e-05, "loss": 0.1136, "step": 18600 }, { "epoch": 0.22689024390243903, "grad_norm": 0.7114262580871582, "learning_rate": 1.848739837398374e-05, "loss": 0.0937, "step": 18605 }, { "epoch": 0.22695121951219513, "grad_norm": 0.8935422301292419, "learning_rate": 1.84869918699187e-05, "loss": 0.1399, "step": 18610 }, { "epoch": 0.22701219512195123, "grad_norm": 0.6833059787750244, "learning_rate": 1.848658536585366e-05, "loss": 0.0872, "step": 18615 }, { "epoch": 0.22707317073170732, "grad_norm": 0.9558311700820923, "learning_rate": 1.848617886178862e-05, "loss": 0.1337, "step": 18620 }, { "epoch": 0.22713414634146342, "grad_norm": 0.9313580989837646, "learning_rate": 1.8485772357723578e-05, "loss": 0.1304, "step": 18625 }, { "epoch": 0.22719512195121952, "grad_norm": 0.9183863401412964, "learning_rate": 1.848536585365854e-05, "loss": 0.1189, "step": 18630 }, { "epoch": 0.22725609756097562, "grad_norm": 0.6458277106285095, "learning_rate": 1.8484959349593497e-05, "loss": 0.1196, "step": 18635 }, { "epoch": 0.22731707317073171, "grad_norm": 0.7128133773803711, "learning_rate": 1.8484552845528455e-05, "loss": 0.128, "step": 18640 }, { "epoch": 0.2273780487804878, "grad_norm": 1.1764084100723267, "learning_rate": 1.8484146341463417e-05, "loss": 0.1073, "step": 18645 }, { "epoch": 0.2274390243902439, "grad_norm": 1.3666048049926758, "learning_rate": 1.8483739837398375e-05, "loss": 0.1222, "step": 18650 }, { "epoch": 0.2275, "grad_norm": 1.0774155855178833, "learning_rate": 1.8483333333333337e-05, "loss": 0.0913, "step": 18655 }, { "epoch": 0.2275609756097561, "grad_norm": 1.0324656963348389, "learning_rate": 1.8482926829268295e-05, "loss": 0.1184, "step": 18660 }, { "epoch": 0.2276219512195122, "grad_norm": 1.2680305242538452, "learning_rate": 1.8482520325203253e-05, "loss": 0.1082, "step": 18665 }, { "epoch": 0.2276829268292683, "grad_norm": 0.7217169404029846, "learning_rate": 1.848211382113821e-05, "loss": 0.0928, "step": 18670 }, { "epoch": 0.2277439024390244, "grad_norm": 1.1544150114059448, "learning_rate": 1.8481707317073172e-05, "loss": 0.1349, "step": 18675 }, { "epoch": 0.2278048780487805, "grad_norm": 1.3701436519622803, "learning_rate": 1.848130081300813e-05, "loss": 0.103, "step": 18680 }, { "epoch": 0.2278658536585366, "grad_norm": 1.011635184288025, "learning_rate": 1.8480894308943092e-05, "loss": 0.1025, "step": 18685 }, { "epoch": 0.2279268292682927, "grad_norm": 1.1354913711547852, "learning_rate": 1.848048780487805e-05, "loss": 0.0899, "step": 18690 }, { "epoch": 0.2279878048780488, "grad_norm": 2.4104645252227783, "learning_rate": 1.8480081300813008e-05, "loss": 0.1295, "step": 18695 }, { "epoch": 0.2280487804878049, "grad_norm": 0.7045374512672424, "learning_rate": 1.847967479674797e-05, "loss": 0.1127, "step": 18700 }, { "epoch": 0.22810975609756098, "grad_norm": 0.49157464504241943, "learning_rate": 1.8479268292682928e-05, "loss": 0.1009, "step": 18705 }, { "epoch": 0.22817073170731708, "grad_norm": 0.8404134511947632, "learning_rate": 1.847886178861789e-05, "loss": 0.1305, "step": 18710 }, { "epoch": 0.22823170731707318, "grad_norm": 0.7496171593666077, "learning_rate": 1.8478455284552848e-05, "loss": 0.0985, "step": 18715 }, { "epoch": 0.22829268292682928, "grad_norm": 1.895638108253479, "learning_rate": 1.8478048780487806e-05, "loss": 0.0946, "step": 18720 }, { "epoch": 0.22835365853658537, "grad_norm": 1.694387435913086, "learning_rate": 1.8477642276422764e-05, "loss": 0.1615, "step": 18725 }, { "epoch": 0.22841463414634147, "grad_norm": 0.6485394835472107, "learning_rate": 1.8477235772357725e-05, "loss": 0.1004, "step": 18730 }, { "epoch": 0.22847560975609757, "grad_norm": 0.7597106099128723, "learning_rate": 1.8476829268292683e-05, "loss": 0.0936, "step": 18735 }, { "epoch": 0.22853658536585367, "grad_norm": 0.7109811305999756, "learning_rate": 1.8476422764227645e-05, "loss": 0.1239, "step": 18740 }, { "epoch": 0.22859756097560976, "grad_norm": 1.022241234779358, "learning_rate": 1.8476016260162603e-05, "loss": 0.1068, "step": 18745 }, { "epoch": 0.22865853658536586, "grad_norm": 0.5989212393760681, "learning_rate": 1.8475609756097565e-05, "loss": 0.1555, "step": 18750 }, { "epoch": 0.22871951219512196, "grad_norm": 0.9674970507621765, "learning_rate": 1.847520325203252e-05, "loss": 0.1516, "step": 18755 }, { "epoch": 0.22878048780487806, "grad_norm": 0.5042538046836853, "learning_rate": 1.847479674796748e-05, "loss": 0.1053, "step": 18760 }, { "epoch": 0.22884146341463416, "grad_norm": 1.1791527271270752, "learning_rate": 1.847439024390244e-05, "loss": 0.1547, "step": 18765 }, { "epoch": 0.22890243902439025, "grad_norm": 0.8661876916885376, "learning_rate": 1.84739837398374e-05, "loss": 0.1345, "step": 18770 }, { "epoch": 0.22896341463414635, "grad_norm": 0.9057254791259766, "learning_rate": 1.847357723577236e-05, "loss": 0.1165, "step": 18775 }, { "epoch": 0.22902439024390245, "grad_norm": 2.091707229614258, "learning_rate": 1.847317073170732e-05, "loss": 0.1125, "step": 18780 }, { "epoch": 0.22908536585365855, "grad_norm": 0.6951819658279419, "learning_rate": 1.847276422764228e-05, "loss": 0.1071, "step": 18785 }, { "epoch": 0.22914634146341464, "grad_norm": 1.0328655242919922, "learning_rate": 1.8472357723577236e-05, "loss": 0.1455, "step": 18790 }, { "epoch": 0.22920731707317074, "grad_norm": 1.399575114250183, "learning_rate": 1.8471951219512198e-05, "loss": 0.1475, "step": 18795 }, { "epoch": 0.22926829268292684, "grad_norm": 1.3356398344039917, "learning_rate": 1.8471544715447156e-05, "loss": 0.1464, "step": 18800 }, { "epoch": 0.22932926829268294, "grad_norm": 0.788667619228363, "learning_rate": 1.8471138211382114e-05, "loss": 0.1004, "step": 18805 }, { "epoch": 0.22939024390243903, "grad_norm": 0.9361981153488159, "learning_rate": 1.8470731707317076e-05, "loss": 0.097, "step": 18810 }, { "epoch": 0.22945121951219513, "grad_norm": 1.475494623184204, "learning_rate": 1.8470325203252034e-05, "loss": 0.1577, "step": 18815 }, { "epoch": 0.22951219512195123, "grad_norm": 0.7811160087585449, "learning_rate": 1.8469918699186992e-05, "loss": 0.1185, "step": 18820 }, { "epoch": 0.22957317073170733, "grad_norm": 1.1353615522384644, "learning_rate": 1.8469512195121954e-05, "loss": 0.1043, "step": 18825 }, { "epoch": 0.22963414634146342, "grad_norm": 0.8314191699028015, "learning_rate": 1.846910569105691e-05, "loss": 0.1058, "step": 18830 }, { "epoch": 0.22969512195121952, "grad_norm": 2.2107954025268555, "learning_rate": 1.8468699186991873e-05, "loss": 0.1224, "step": 18835 }, { "epoch": 0.22975609756097562, "grad_norm": 0.9326886534690857, "learning_rate": 1.846829268292683e-05, "loss": 0.1132, "step": 18840 }, { "epoch": 0.22981707317073172, "grad_norm": 0.5735052824020386, "learning_rate": 1.846788617886179e-05, "loss": 0.1059, "step": 18845 }, { "epoch": 0.22987804878048781, "grad_norm": 1.1006969213485718, "learning_rate": 1.8467479674796748e-05, "loss": 0.1479, "step": 18850 }, { "epoch": 0.2299390243902439, "grad_norm": 0.8006986975669861, "learning_rate": 1.846707317073171e-05, "loss": 0.1369, "step": 18855 }, { "epoch": 0.23, "grad_norm": 1.351372480392456, "learning_rate": 1.8466666666666667e-05, "loss": 0.1168, "step": 18860 }, { "epoch": 0.2300609756097561, "grad_norm": 0.9617125391960144, "learning_rate": 1.846626016260163e-05, "loss": 0.1031, "step": 18865 }, { "epoch": 0.2301219512195122, "grad_norm": 0.6870741844177246, "learning_rate": 1.8465853658536587e-05, "loss": 0.1232, "step": 18870 }, { "epoch": 0.2301829268292683, "grad_norm": 0.8810122013092041, "learning_rate": 1.8465447154471545e-05, "loss": 0.1413, "step": 18875 }, { "epoch": 0.2302439024390244, "grad_norm": 0.4025011360645294, "learning_rate": 1.8465040650406506e-05, "loss": 0.1047, "step": 18880 }, { "epoch": 0.2303048780487805, "grad_norm": 1.2057932615280151, "learning_rate": 1.8464634146341465e-05, "loss": 0.1038, "step": 18885 }, { "epoch": 0.2303658536585366, "grad_norm": 0.6756693124771118, "learning_rate": 1.8464227642276423e-05, "loss": 0.1646, "step": 18890 }, { "epoch": 0.2304268292682927, "grad_norm": 0.7431323528289795, "learning_rate": 1.8463821138211384e-05, "loss": 0.1098, "step": 18895 }, { "epoch": 0.2304878048780488, "grad_norm": 0.4915427267551422, "learning_rate": 1.8463414634146342e-05, "loss": 0.0953, "step": 18900 }, { "epoch": 0.2305487804878049, "grad_norm": 1.1414906978607178, "learning_rate": 1.84630081300813e-05, "loss": 0.1156, "step": 18905 }, { "epoch": 0.230609756097561, "grad_norm": 1.2705832719802856, "learning_rate": 1.8462601626016262e-05, "loss": 0.1253, "step": 18910 }, { "epoch": 0.23067073170731708, "grad_norm": 0.9795524477958679, "learning_rate": 1.846219512195122e-05, "loss": 0.1055, "step": 18915 }, { "epoch": 0.23073170731707318, "grad_norm": 1.3857672214508057, "learning_rate": 1.846178861788618e-05, "loss": 0.1129, "step": 18920 }, { "epoch": 0.23079268292682928, "grad_norm": 1.154821753501892, "learning_rate": 1.846138211382114e-05, "loss": 0.103, "step": 18925 }, { "epoch": 0.23085365853658538, "grad_norm": 1.4968931674957275, "learning_rate": 1.84609756097561e-05, "loss": 0.0892, "step": 18930 }, { "epoch": 0.23091463414634147, "grad_norm": 0.86323481798172, "learning_rate": 1.8460569105691056e-05, "loss": 0.1198, "step": 18935 }, { "epoch": 0.23097560975609757, "grad_norm": 0.7147438526153564, "learning_rate": 1.8460162601626018e-05, "loss": 0.1485, "step": 18940 }, { "epoch": 0.23103658536585367, "grad_norm": 0.8609045147895813, "learning_rate": 1.8459756097560976e-05, "loss": 0.1266, "step": 18945 }, { "epoch": 0.23109756097560977, "grad_norm": 0.6810513734817505, "learning_rate": 1.8459349593495937e-05, "loss": 0.1133, "step": 18950 }, { "epoch": 0.23115853658536586, "grad_norm": 0.9267876148223877, "learning_rate": 1.8458943089430895e-05, "loss": 0.1173, "step": 18955 }, { "epoch": 0.23121951219512196, "grad_norm": 0.7384090423583984, "learning_rate": 1.8458536585365857e-05, "loss": 0.1464, "step": 18960 }, { "epoch": 0.23128048780487806, "grad_norm": 0.8730599880218506, "learning_rate": 1.8458130081300815e-05, "loss": 0.1257, "step": 18965 }, { "epoch": 0.23134146341463416, "grad_norm": 0.9207214117050171, "learning_rate": 1.8457723577235773e-05, "loss": 0.1233, "step": 18970 }, { "epoch": 0.23140243902439026, "grad_norm": 1.0552372932434082, "learning_rate": 1.8457317073170735e-05, "loss": 0.1015, "step": 18975 }, { "epoch": 0.23146341463414635, "grad_norm": 1.1030614376068115, "learning_rate": 1.8456910569105693e-05, "loss": 0.1166, "step": 18980 }, { "epoch": 0.23152439024390245, "grad_norm": 1.0581563711166382, "learning_rate": 1.845650406504065e-05, "loss": 0.1149, "step": 18985 }, { "epoch": 0.23158536585365855, "grad_norm": 1.01129150390625, "learning_rate": 1.8456097560975612e-05, "loss": 0.1179, "step": 18990 }, { "epoch": 0.23164634146341465, "grad_norm": 0.6948027610778809, "learning_rate": 1.845569105691057e-05, "loss": 0.1076, "step": 18995 }, { "epoch": 0.23170731707317074, "grad_norm": 0.626914918422699, "learning_rate": 1.845528455284553e-05, "loss": 0.1119, "step": 19000 }, { "epoch": 0.23176829268292684, "grad_norm": 0.7368124723434448, "learning_rate": 1.845487804878049e-05, "loss": 0.108, "step": 19005 }, { "epoch": 0.23182926829268294, "grad_norm": 0.5848020911216736, "learning_rate": 1.8454471544715448e-05, "loss": 0.1313, "step": 19010 }, { "epoch": 0.23189024390243904, "grad_norm": 0.617242693901062, "learning_rate": 1.845406504065041e-05, "loss": 0.0952, "step": 19015 }, { "epoch": 0.23195121951219513, "grad_norm": 0.8372710943222046, "learning_rate": 1.8453658536585368e-05, "loss": 0.1085, "step": 19020 }, { "epoch": 0.23201219512195123, "grad_norm": 0.8353250026702881, "learning_rate": 1.8453252032520326e-05, "loss": 0.0912, "step": 19025 }, { "epoch": 0.23207317073170733, "grad_norm": 0.6828811168670654, "learning_rate": 1.8452845528455284e-05, "loss": 0.1061, "step": 19030 }, { "epoch": 0.23213414634146343, "grad_norm": 1.1701253652572632, "learning_rate": 1.8452439024390246e-05, "loss": 0.1566, "step": 19035 }, { "epoch": 0.23219512195121952, "grad_norm": 0.7022183537483215, "learning_rate": 1.8452032520325204e-05, "loss": 0.1552, "step": 19040 }, { "epoch": 0.23225609756097562, "grad_norm": 1.1283702850341797, "learning_rate": 1.8451626016260165e-05, "loss": 0.1069, "step": 19045 }, { "epoch": 0.23231707317073172, "grad_norm": 0.8791771531105042, "learning_rate": 1.8451219512195123e-05, "loss": 0.1468, "step": 19050 }, { "epoch": 0.23237804878048782, "grad_norm": 0.8569319844245911, "learning_rate": 1.845081300813008e-05, "loss": 0.1381, "step": 19055 }, { "epoch": 0.23243902439024391, "grad_norm": 0.785261869430542, "learning_rate": 1.8450406504065043e-05, "loss": 0.1269, "step": 19060 }, { "epoch": 0.2325, "grad_norm": 0.5572063326835632, "learning_rate": 1.845e-05, "loss": 0.0964, "step": 19065 }, { "epoch": 0.2325609756097561, "grad_norm": 0.6916512846946716, "learning_rate": 1.844959349593496e-05, "loss": 0.1124, "step": 19070 }, { "epoch": 0.2326219512195122, "grad_norm": 0.7414398193359375, "learning_rate": 1.844918699186992e-05, "loss": 0.0919, "step": 19075 }, { "epoch": 0.2326829268292683, "grad_norm": 0.9995231032371521, "learning_rate": 1.844878048780488e-05, "loss": 0.0964, "step": 19080 }, { "epoch": 0.2327439024390244, "grad_norm": 1.9533883333206177, "learning_rate": 1.8448373983739837e-05, "loss": 0.0954, "step": 19085 }, { "epoch": 0.2328048780487805, "grad_norm": 0.7388507127761841, "learning_rate": 1.84479674796748e-05, "loss": 0.1031, "step": 19090 }, { "epoch": 0.2328658536585366, "grad_norm": 1.524481177330017, "learning_rate": 1.8447560975609757e-05, "loss": 0.1495, "step": 19095 }, { "epoch": 0.2329268292682927, "grad_norm": 0.3961547315120697, "learning_rate": 1.8447154471544718e-05, "loss": 0.0971, "step": 19100 }, { "epoch": 0.2329878048780488, "grad_norm": 0.6310913562774658, "learning_rate": 1.8446747967479676e-05, "loss": 0.096, "step": 19105 }, { "epoch": 0.2330487804878049, "grad_norm": 2.6832854747772217, "learning_rate": 1.8446341463414638e-05, "loss": 0.0923, "step": 19110 }, { "epoch": 0.233109756097561, "grad_norm": 1.0678596496582031, "learning_rate": 1.8445934959349593e-05, "loss": 0.0992, "step": 19115 }, { "epoch": 0.23317073170731709, "grad_norm": 1.5528632402420044, "learning_rate": 1.8445528455284554e-05, "loss": 0.1225, "step": 19120 }, { "epoch": 0.23323170731707318, "grad_norm": 0.7144783139228821, "learning_rate": 1.8445121951219512e-05, "loss": 0.1221, "step": 19125 }, { "epoch": 0.23329268292682928, "grad_norm": 0.6123691201210022, "learning_rate": 1.8444715447154474e-05, "loss": 0.1248, "step": 19130 }, { "epoch": 0.23335365853658538, "grad_norm": 0.5261977910995483, "learning_rate": 1.8444308943089432e-05, "loss": 0.0877, "step": 19135 }, { "epoch": 0.23341463414634148, "grad_norm": 0.7222962379455566, "learning_rate": 1.8443902439024393e-05, "loss": 0.082, "step": 19140 }, { "epoch": 0.23347560975609757, "grad_norm": 0.8640010356903076, "learning_rate": 1.844349593495935e-05, "loss": 0.1176, "step": 19145 }, { "epoch": 0.23353658536585367, "grad_norm": 1.1146975755691528, "learning_rate": 1.844308943089431e-05, "loss": 0.1455, "step": 19150 }, { "epoch": 0.23359756097560977, "grad_norm": 0.7515689134597778, "learning_rate": 1.8442682926829268e-05, "loss": 0.1323, "step": 19155 }, { "epoch": 0.23365853658536587, "grad_norm": 1.233927845954895, "learning_rate": 1.844227642276423e-05, "loss": 0.0932, "step": 19160 }, { "epoch": 0.23371951219512196, "grad_norm": 0.8299046158790588, "learning_rate": 1.8441869918699188e-05, "loss": 0.1329, "step": 19165 }, { "epoch": 0.23378048780487806, "grad_norm": 0.7776274681091309, "learning_rate": 1.844146341463415e-05, "loss": 0.0861, "step": 19170 }, { "epoch": 0.23384146341463416, "grad_norm": 0.4416499435901642, "learning_rate": 1.8441056910569107e-05, "loss": 0.1158, "step": 19175 }, { "epoch": 0.23390243902439026, "grad_norm": 1.754860520362854, "learning_rate": 1.8440650406504065e-05, "loss": 0.1574, "step": 19180 }, { "epoch": 0.23396341463414635, "grad_norm": 0.8930497765541077, "learning_rate": 1.8440243902439027e-05, "loss": 0.1351, "step": 19185 }, { "epoch": 0.23402439024390245, "grad_norm": 0.6737975478172302, "learning_rate": 1.8439837398373985e-05, "loss": 0.0917, "step": 19190 }, { "epoch": 0.23408536585365855, "grad_norm": 0.7141461968421936, "learning_rate": 1.8439430894308946e-05, "loss": 0.1167, "step": 19195 }, { "epoch": 0.23414634146341465, "grad_norm": 0.5109255909919739, "learning_rate": 1.8439024390243905e-05, "loss": 0.1102, "step": 19200 }, { "epoch": 0.23420731707317075, "grad_norm": 0.7825330495834351, "learning_rate": 1.8438617886178863e-05, "loss": 0.1216, "step": 19205 }, { "epoch": 0.23426829268292684, "grad_norm": 0.8264016509056091, "learning_rate": 1.843821138211382e-05, "loss": 0.1179, "step": 19210 }, { "epoch": 0.23432926829268294, "grad_norm": 2.30155611038208, "learning_rate": 1.8437804878048782e-05, "loss": 0.1908, "step": 19215 }, { "epoch": 0.234390243902439, "grad_norm": 1.3034043312072754, "learning_rate": 1.843739837398374e-05, "loss": 0.1027, "step": 19220 }, { "epoch": 0.2344512195121951, "grad_norm": 0.5776201486587524, "learning_rate": 1.8436991869918702e-05, "loss": 0.1054, "step": 19225 }, { "epoch": 0.2345121951219512, "grad_norm": 1.2075449228286743, "learning_rate": 1.843658536585366e-05, "loss": 0.0901, "step": 19230 }, { "epoch": 0.2345731707317073, "grad_norm": 1.3910537958145142, "learning_rate": 1.8436178861788618e-05, "loss": 0.0997, "step": 19235 }, { "epoch": 0.2346341463414634, "grad_norm": 0.4940122067928314, "learning_rate": 1.843577235772358e-05, "loss": 0.1041, "step": 19240 }, { "epoch": 0.2346951219512195, "grad_norm": 0.7433070540428162, "learning_rate": 1.8435365853658538e-05, "loss": 0.1086, "step": 19245 }, { "epoch": 0.2347560975609756, "grad_norm": 1.1288373470306396, "learning_rate": 1.8434959349593496e-05, "loss": 0.1282, "step": 19250 }, { "epoch": 0.2348170731707317, "grad_norm": 0.9070663452148438, "learning_rate": 1.8434552845528458e-05, "loss": 0.0992, "step": 19255 }, { "epoch": 0.2348780487804878, "grad_norm": 0.9138774871826172, "learning_rate": 1.8434146341463416e-05, "loss": 0.1164, "step": 19260 }, { "epoch": 0.2349390243902439, "grad_norm": 1.1262402534484863, "learning_rate": 1.8433739837398374e-05, "loss": 0.1095, "step": 19265 }, { "epoch": 0.235, "grad_norm": 0.6929694414138794, "learning_rate": 1.8433333333333335e-05, "loss": 0.1074, "step": 19270 }, { "epoch": 0.23506097560975608, "grad_norm": 0.5878432393074036, "learning_rate": 1.8432926829268293e-05, "loss": 0.1067, "step": 19275 }, { "epoch": 0.23512195121951218, "grad_norm": 0.7421726584434509, "learning_rate": 1.8432520325203255e-05, "loss": 0.1358, "step": 19280 }, { "epoch": 0.23518292682926828, "grad_norm": 0.9538961052894592, "learning_rate": 1.8432113821138213e-05, "loss": 0.1149, "step": 19285 }, { "epoch": 0.23524390243902438, "grad_norm": 2.5428731441497803, "learning_rate": 1.8431707317073175e-05, "loss": 0.1171, "step": 19290 }, { "epoch": 0.23530487804878047, "grad_norm": 0.8917735815048218, "learning_rate": 1.843130081300813e-05, "loss": 0.117, "step": 19295 }, { "epoch": 0.23536585365853657, "grad_norm": 0.919899046421051, "learning_rate": 1.843089430894309e-05, "loss": 0.1011, "step": 19300 }, { "epoch": 0.23542682926829267, "grad_norm": 1.279532551765442, "learning_rate": 1.843048780487805e-05, "loss": 0.1408, "step": 19305 }, { "epoch": 0.23548780487804877, "grad_norm": 0.6514133214950562, "learning_rate": 1.843008130081301e-05, "loss": 0.093, "step": 19310 }, { "epoch": 0.23554878048780487, "grad_norm": 0.8061856627464294, "learning_rate": 1.842967479674797e-05, "loss": 0.1033, "step": 19315 }, { "epoch": 0.23560975609756096, "grad_norm": 0.5396153926849365, "learning_rate": 1.842926829268293e-05, "loss": 0.0987, "step": 19320 }, { "epoch": 0.23567073170731706, "grad_norm": 0.9270543456077576, "learning_rate": 1.8428861788617888e-05, "loss": 0.1198, "step": 19325 }, { "epoch": 0.23573170731707316, "grad_norm": 2.564185619354248, "learning_rate": 1.8428455284552846e-05, "loss": 0.147, "step": 19330 }, { "epoch": 0.23579268292682926, "grad_norm": 3.412029981613159, "learning_rate": 1.8428048780487804e-05, "loss": 0.0976, "step": 19335 }, { "epoch": 0.23585365853658535, "grad_norm": 0.7509846687316895, "learning_rate": 1.8427642276422766e-05, "loss": 0.1155, "step": 19340 }, { "epoch": 0.23591463414634145, "grad_norm": 0.8363279104232788, "learning_rate": 1.8427235772357724e-05, "loss": 0.0961, "step": 19345 }, { "epoch": 0.23597560975609755, "grad_norm": 0.9025886654853821, "learning_rate": 1.8426829268292686e-05, "loss": 0.0957, "step": 19350 }, { "epoch": 0.23603658536585365, "grad_norm": 0.5870231986045837, "learning_rate": 1.8426422764227644e-05, "loss": 0.1238, "step": 19355 }, { "epoch": 0.23609756097560974, "grad_norm": 0.719616711139679, "learning_rate": 1.8426016260162602e-05, "loss": 0.1506, "step": 19360 }, { "epoch": 0.23615853658536584, "grad_norm": 1.0052157640457153, "learning_rate": 1.8425609756097563e-05, "loss": 0.1059, "step": 19365 }, { "epoch": 0.23621951219512194, "grad_norm": 0.6658673882484436, "learning_rate": 1.842520325203252e-05, "loss": 0.1502, "step": 19370 }, { "epoch": 0.23628048780487804, "grad_norm": 0.9689531922340393, "learning_rate": 1.8424796747967483e-05, "loss": 0.1499, "step": 19375 }, { "epoch": 0.23634146341463413, "grad_norm": 0.3780660629272461, "learning_rate": 1.842439024390244e-05, "loss": 0.1172, "step": 19380 }, { "epoch": 0.23640243902439023, "grad_norm": 0.77325439453125, "learning_rate": 1.84239837398374e-05, "loss": 0.0949, "step": 19385 }, { "epoch": 0.23646341463414633, "grad_norm": 0.8151955008506775, "learning_rate": 1.8423577235772357e-05, "loss": 0.0987, "step": 19390 }, { "epoch": 0.23652439024390243, "grad_norm": 0.8376067280769348, "learning_rate": 1.842317073170732e-05, "loss": 0.0849, "step": 19395 }, { "epoch": 0.23658536585365852, "grad_norm": 0.5963724255561829, "learning_rate": 1.8422764227642277e-05, "loss": 0.1298, "step": 19400 }, { "epoch": 0.23664634146341462, "grad_norm": 0.8586075305938721, "learning_rate": 1.842235772357724e-05, "loss": 0.1035, "step": 19405 }, { "epoch": 0.23670731707317072, "grad_norm": 0.4313242435455322, "learning_rate": 1.8421951219512197e-05, "loss": 0.1177, "step": 19410 }, { "epoch": 0.23676829268292682, "grad_norm": 0.5677335262298584, "learning_rate": 1.8421544715447155e-05, "loss": 0.1052, "step": 19415 }, { "epoch": 0.23682926829268292, "grad_norm": 0.9549955725669861, "learning_rate": 1.8421138211382113e-05, "loss": 0.1006, "step": 19420 }, { "epoch": 0.236890243902439, "grad_norm": 1.2759374380111694, "learning_rate": 1.8420731707317075e-05, "loss": 0.1023, "step": 19425 }, { "epoch": 0.2369512195121951, "grad_norm": 0.666492760181427, "learning_rate": 1.8420325203252033e-05, "loss": 0.0867, "step": 19430 }, { "epoch": 0.2370121951219512, "grad_norm": 0.6513785123825073, "learning_rate": 1.8419918699186994e-05, "loss": 0.1051, "step": 19435 }, { "epoch": 0.2370731707317073, "grad_norm": 0.9938097596168518, "learning_rate": 1.8419512195121952e-05, "loss": 0.1013, "step": 19440 }, { "epoch": 0.2371341463414634, "grad_norm": 1.1259061098098755, "learning_rate": 1.841910569105691e-05, "loss": 0.1572, "step": 19445 }, { "epoch": 0.2371951219512195, "grad_norm": 0.8724405169487, "learning_rate": 1.8418699186991872e-05, "loss": 0.0961, "step": 19450 }, { "epoch": 0.2372560975609756, "grad_norm": 0.8614699244499207, "learning_rate": 1.841829268292683e-05, "loss": 0.0966, "step": 19455 }, { "epoch": 0.2373170731707317, "grad_norm": 1.6339685916900635, "learning_rate": 1.841788617886179e-05, "loss": 0.103, "step": 19460 }, { "epoch": 0.2373780487804878, "grad_norm": 0.6205081343650818, "learning_rate": 1.841747967479675e-05, "loss": 0.1179, "step": 19465 }, { "epoch": 0.2374390243902439, "grad_norm": 0.6109262704849243, "learning_rate": 1.841707317073171e-05, "loss": 0.1173, "step": 19470 }, { "epoch": 0.2375, "grad_norm": 0.7288913726806641, "learning_rate": 1.8416666666666666e-05, "loss": 0.1026, "step": 19475 }, { "epoch": 0.2375609756097561, "grad_norm": 0.7935600876808167, "learning_rate": 1.8416260162601627e-05, "loss": 0.1812, "step": 19480 }, { "epoch": 0.23762195121951218, "grad_norm": 0.736230731010437, "learning_rate": 1.8415853658536586e-05, "loss": 0.0869, "step": 19485 }, { "epoch": 0.23768292682926828, "grad_norm": 0.9151814579963684, "learning_rate": 1.8415447154471547e-05, "loss": 0.1257, "step": 19490 }, { "epoch": 0.23774390243902438, "grad_norm": 1.1008055210113525, "learning_rate": 1.8415040650406505e-05, "loss": 0.0927, "step": 19495 }, { "epoch": 0.23780487804878048, "grad_norm": 0.46400800347328186, "learning_rate": 1.8414634146341467e-05, "loss": 0.1193, "step": 19500 }, { "epoch": 0.23786585365853657, "grad_norm": 2.7715084552764893, "learning_rate": 1.8414227642276425e-05, "loss": 0.1079, "step": 19505 }, { "epoch": 0.23792682926829267, "grad_norm": 0.6177197098731995, "learning_rate": 1.8413821138211383e-05, "loss": 0.1349, "step": 19510 }, { "epoch": 0.23798780487804877, "grad_norm": 0.8798009753227234, "learning_rate": 1.841341463414634e-05, "loss": 0.1117, "step": 19515 }, { "epoch": 0.23804878048780487, "grad_norm": 0.8285876512527466, "learning_rate": 1.8413008130081303e-05, "loss": 0.1408, "step": 19520 }, { "epoch": 0.23810975609756097, "grad_norm": 0.5428522229194641, "learning_rate": 1.841260162601626e-05, "loss": 0.1099, "step": 19525 }, { "epoch": 0.23817073170731706, "grad_norm": 0.5355010032653809, "learning_rate": 1.8412195121951222e-05, "loss": 0.114, "step": 19530 }, { "epoch": 0.23823170731707316, "grad_norm": 0.6563096642494202, "learning_rate": 1.841178861788618e-05, "loss": 0.1332, "step": 19535 }, { "epoch": 0.23829268292682926, "grad_norm": 1.1489239931106567, "learning_rate": 1.841138211382114e-05, "loss": 0.0879, "step": 19540 }, { "epoch": 0.23835365853658536, "grad_norm": 2.6248648166656494, "learning_rate": 1.84109756097561e-05, "loss": 0.1047, "step": 19545 }, { "epoch": 0.23841463414634145, "grad_norm": 1.1419962644577026, "learning_rate": 1.8410569105691058e-05, "loss": 0.1117, "step": 19550 }, { "epoch": 0.23847560975609755, "grad_norm": 0.5060521960258484, "learning_rate": 1.841016260162602e-05, "loss": 0.11, "step": 19555 }, { "epoch": 0.23853658536585365, "grad_norm": 1.3006048202514648, "learning_rate": 1.8409756097560978e-05, "loss": 0.1218, "step": 19560 }, { "epoch": 0.23859756097560975, "grad_norm": 1.1591614484786987, "learning_rate": 1.8409349593495936e-05, "loss": 0.1221, "step": 19565 }, { "epoch": 0.23865853658536584, "grad_norm": 0.6246023178100586, "learning_rate": 1.8408943089430894e-05, "loss": 0.1251, "step": 19570 }, { "epoch": 0.23871951219512194, "grad_norm": 0.7212145924568176, "learning_rate": 1.8408536585365856e-05, "loss": 0.1425, "step": 19575 }, { "epoch": 0.23878048780487804, "grad_norm": 1.4211612939834595, "learning_rate": 1.8408130081300814e-05, "loss": 0.1454, "step": 19580 }, { "epoch": 0.23884146341463414, "grad_norm": 0.8296123147010803, "learning_rate": 1.8407723577235775e-05, "loss": 0.1166, "step": 19585 }, { "epoch": 0.23890243902439023, "grad_norm": 1.2191113233566284, "learning_rate": 1.8407317073170733e-05, "loss": 0.1209, "step": 19590 }, { "epoch": 0.23896341463414633, "grad_norm": 0.6446125507354736, "learning_rate": 1.840691056910569e-05, "loss": 0.1089, "step": 19595 }, { "epoch": 0.23902439024390243, "grad_norm": 0.9547168016433716, "learning_rate": 1.840650406504065e-05, "loss": 0.141, "step": 19600 }, { "epoch": 0.23908536585365853, "grad_norm": 1.1084885597229004, "learning_rate": 1.840609756097561e-05, "loss": 0.1353, "step": 19605 }, { "epoch": 0.23914634146341462, "grad_norm": 0.9702252149581909, "learning_rate": 1.840569105691057e-05, "loss": 0.1326, "step": 19610 }, { "epoch": 0.23920731707317072, "grad_norm": 0.5291832685470581, "learning_rate": 1.840528455284553e-05, "loss": 0.1071, "step": 19615 }, { "epoch": 0.23926829268292682, "grad_norm": 0.4091024696826935, "learning_rate": 1.840487804878049e-05, "loss": 0.0716, "step": 19620 }, { "epoch": 0.23932926829268292, "grad_norm": 0.9648524522781372, "learning_rate": 1.8404471544715447e-05, "loss": 0.1308, "step": 19625 }, { "epoch": 0.23939024390243901, "grad_norm": 0.9130369424819946, "learning_rate": 1.840406504065041e-05, "loss": 0.1087, "step": 19630 }, { "epoch": 0.2394512195121951, "grad_norm": 0.8563045263290405, "learning_rate": 1.8403658536585367e-05, "loss": 0.0942, "step": 19635 }, { "epoch": 0.2395121951219512, "grad_norm": 0.49637308716773987, "learning_rate": 1.8403252032520328e-05, "loss": 0.1045, "step": 19640 }, { "epoch": 0.2395731707317073, "grad_norm": 0.8834463953971863, "learning_rate": 1.8402845528455286e-05, "loss": 0.0878, "step": 19645 }, { "epoch": 0.2396341463414634, "grad_norm": 0.9668340682983398, "learning_rate": 1.8402439024390248e-05, "loss": 0.1162, "step": 19650 }, { "epoch": 0.2396951219512195, "grad_norm": 1.045034408569336, "learning_rate": 1.8402032520325203e-05, "loss": 0.1405, "step": 19655 }, { "epoch": 0.2397560975609756, "grad_norm": 0.7062855958938599, "learning_rate": 1.8401626016260164e-05, "loss": 0.0929, "step": 19660 }, { "epoch": 0.2398170731707317, "grad_norm": 0.49305933713912964, "learning_rate": 1.8401219512195122e-05, "loss": 0.1082, "step": 19665 }, { "epoch": 0.2398780487804878, "grad_norm": 0.8833307027816772, "learning_rate": 1.8400813008130084e-05, "loss": 0.1349, "step": 19670 }, { "epoch": 0.2399390243902439, "grad_norm": 0.74134761095047, "learning_rate": 1.8400406504065042e-05, "loss": 0.1116, "step": 19675 }, { "epoch": 0.24, "grad_norm": 0.5588046312332153, "learning_rate": 1.8400000000000003e-05, "loss": 0.1445, "step": 19680 }, { "epoch": 0.2400609756097561, "grad_norm": 0.6560701727867126, "learning_rate": 1.8399593495934958e-05, "loss": 0.1521, "step": 19685 }, { "epoch": 0.2401219512195122, "grad_norm": 0.8334582448005676, "learning_rate": 1.839918699186992e-05, "loss": 0.098, "step": 19690 }, { "epoch": 0.24018292682926828, "grad_norm": 1.1831458806991577, "learning_rate": 1.8398780487804878e-05, "loss": 0.0988, "step": 19695 }, { "epoch": 0.24024390243902438, "grad_norm": 0.5296885371208191, "learning_rate": 1.839837398373984e-05, "loss": 0.1172, "step": 19700 }, { "epoch": 0.24030487804878048, "grad_norm": 1.0890777111053467, "learning_rate": 1.8397967479674797e-05, "loss": 0.1471, "step": 19705 }, { "epoch": 0.24036585365853658, "grad_norm": 0.5681262016296387, "learning_rate": 1.839756097560976e-05, "loss": 0.0791, "step": 19710 }, { "epoch": 0.24042682926829267, "grad_norm": 0.7075493335723877, "learning_rate": 1.8397154471544717e-05, "loss": 0.0994, "step": 19715 }, { "epoch": 0.24048780487804877, "grad_norm": 0.9125233888626099, "learning_rate": 1.8396747967479675e-05, "loss": 0.1012, "step": 19720 }, { "epoch": 0.24054878048780487, "grad_norm": 0.6630572080612183, "learning_rate": 1.8396341463414637e-05, "loss": 0.0632, "step": 19725 }, { "epoch": 0.24060975609756097, "grad_norm": 1.02983820438385, "learning_rate": 1.8395934959349595e-05, "loss": 0.133, "step": 19730 }, { "epoch": 0.24067073170731706, "grad_norm": 0.9965811967849731, "learning_rate": 1.8395528455284556e-05, "loss": 0.163, "step": 19735 }, { "epoch": 0.24073170731707316, "grad_norm": 0.7773765325546265, "learning_rate": 1.8395121951219514e-05, "loss": 0.0826, "step": 19740 }, { "epoch": 0.24079268292682926, "grad_norm": 0.9751251339912415, "learning_rate": 1.8394715447154473e-05, "loss": 0.123, "step": 19745 }, { "epoch": 0.24085365853658536, "grad_norm": 0.4814213216304779, "learning_rate": 1.839430894308943e-05, "loss": 0.0843, "step": 19750 }, { "epoch": 0.24091463414634146, "grad_norm": 1.0153411626815796, "learning_rate": 1.8393902439024392e-05, "loss": 0.1328, "step": 19755 }, { "epoch": 0.24097560975609755, "grad_norm": 1.04469633102417, "learning_rate": 1.839349593495935e-05, "loss": 0.1073, "step": 19760 }, { "epoch": 0.24103658536585365, "grad_norm": 0.9469773173332214, "learning_rate": 1.8393089430894312e-05, "loss": 0.1393, "step": 19765 }, { "epoch": 0.24109756097560975, "grad_norm": 1.4789609909057617, "learning_rate": 1.839268292682927e-05, "loss": 0.1274, "step": 19770 }, { "epoch": 0.24115853658536585, "grad_norm": 0.5693922638893127, "learning_rate": 1.8392276422764228e-05, "loss": 0.0919, "step": 19775 }, { "epoch": 0.24121951219512194, "grad_norm": 0.5283398032188416, "learning_rate": 1.8391869918699186e-05, "loss": 0.1271, "step": 19780 }, { "epoch": 0.24128048780487804, "grad_norm": 1.4721604585647583, "learning_rate": 1.8391463414634148e-05, "loss": 0.1155, "step": 19785 }, { "epoch": 0.24134146341463414, "grad_norm": 1.2164937257766724, "learning_rate": 1.8391056910569106e-05, "loss": 0.131, "step": 19790 }, { "epoch": 0.24140243902439024, "grad_norm": 0.8745148777961731, "learning_rate": 1.8390650406504067e-05, "loss": 0.1289, "step": 19795 }, { "epoch": 0.24146341463414633, "grad_norm": 0.9247004389762878, "learning_rate": 1.8390243902439026e-05, "loss": 0.0874, "step": 19800 }, { "epoch": 0.24152439024390243, "grad_norm": 0.7978163957595825, "learning_rate": 1.8389837398373984e-05, "loss": 0.109, "step": 19805 }, { "epoch": 0.24158536585365853, "grad_norm": 0.7343635559082031, "learning_rate": 1.8389430894308945e-05, "loss": 0.1118, "step": 19810 }, { "epoch": 0.24164634146341463, "grad_norm": 0.6914651989936829, "learning_rate": 1.8389024390243903e-05, "loss": 0.1184, "step": 19815 }, { "epoch": 0.24170731707317072, "grad_norm": 0.49652570486068726, "learning_rate": 1.8388617886178865e-05, "loss": 0.0782, "step": 19820 }, { "epoch": 0.24176829268292682, "grad_norm": 0.6481441855430603, "learning_rate": 1.8388211382113823e-05, "loss": 0.1063, "step": 19825 }, { "epoch": 0.24182926829268292, "grad_norm": 0.9988501071929932, "learning_rate": 1.838780487804878e-05, "loss": 0.1071, "step": 19830 }, { "epoch": 0.24189024390243902, "grad_norm": 0.8737985491752625, "learning_rate": 1.838739837398374e-05, "loss": 0.117, "step": 19835 }, { "epoch": 0.24195121951219511, "grad_norm": 0.8475887179374695, "learning_rate": 1.83869918699187e-05, "loss": 0.1375, "step": 19840 }, { "epoch": 0.2420121951219512, "grad_norm": 1.7166435718536377, "learning_rate": 1.838658536585366e-05, "loss": 0.1082, "step": 19845 }, { "epoch": 0.2420731707317073, "grad_norm": 1.3484946489334106, "learning_rate": 1.838617886178862e-05, "loss": 0.1125, "step": 19850 }, { "epoch": 0.2421341463414634, "grad_norm": 0.7805280089378357, "learning_rate": 1.838577235772358e-05, "loss": 0.0922, "step": 19855 }, { "epoch": 0.2421951219512195, "grad_norm": 0.906917154788971, "learning_rate": 1.838536585365854e-05, "loss": 0.12, "step": 19860 }, { "epoch": 0.2422560975609756, "grad_norm": 0.9523913860321045, "learning_rate": 1.8384959349593495e-05, "loss": 0.1068, "step": 19865 }, { "epoch": 0.2423170731707317, "grad_norm": 0.4773968458175659, "learning_rate": 1.8384552845528456e-05, "loss": 0.1277, "step": 19870 }, { "epoch": 0.2423780487804878, "grad_norm": 0.7324653267860413, "learning_rate": 1.8384146341463414e-05, "loss": 0.0905, "step": 19875 }, { "epoch": 0.2424390243902439, "grad_norm": 1.676094651222229, "learning_rate": 1.8383739837398376e-05, "loss": 0.1358, "step": 19880 }, { "epoch": 0.2425, "grad_norm": 1.257089614868164, "learning_rate": 1.8383333333333334e-05, "loss": 0.1366, "step": 19885 }, { "epoch": 0.2425609756097561, "grad_norm": 0.6634739637374878, "learning_rate": 1.8382926829268296e-05, "loss": 0.1211, "step": 19890 }, { "epoch": 0.2426219512195122, "grad_norm": 0.6513113379478455, "learning_rate": 1.8382520325203254e-05, "loss": 0.1131, "step": 19895 }, { "epoch": 0.2426829268292683, "grad_norm": 1.2191215753555298, "learning_rate": 1.8382113821138212e-05, "loss": 0.1391, "step": 19900 }, { "epoch": 0.24274390243902438, "grad_norm": 0.8565521240234375, "learning_rate": 1.8381707317073173e-05, "loss": 0.0713, "step": 19905 }, { "epoch": 0.24280487804878048, "grad_norm": 0.8898703455924988, "learning_rate": 1.838130081300813e-05, "loss": 0.12, "step": 19910 }, { "epoch": 0.24286585365853658, "grad_norm": 0.5544729828834534, "learning_rate": 1.8380894308943093e-05, "loss": 0.1209, "step": 19915 }, { "epoch": 0.24292682926829268, "grad_norm": 0.562701404094696, "learning_rate": 1.838048780487805e-05, "loss": 0.1205, "step": 19920 }, { "epoch": 0.24298780487804877, "grad_norm": 0.7250763773918152, "learning_rate": 1.838008130081301e-05, "loss": 0.1182, "step": 19925 }, { "epoch": 0.24304878048780487, "grad_norm": 0.8468043804168701, "learning_rate": 1.8379674796747967e-05, "loss": 0.1002, "step": 19930 }, { "epoch": 0.24310975609756097, "grad_norm": 1.079758882522583, "learning_rate": 1.837926829268293e-05, "loss": 0.1223, "step": 19935 }, { "epoch": 0.24317073170731707, "grad_norm": 1.5157129764556885, "learning_rate": 1.8378861788617887e-05, "loss": 0.1632, "step": 19940 }, { "epoch": 0.24323170731707316, "grad_norm": 0.6492918133735657, "learning_rate": 1.837845528455285e-05, "loss": 0.0794, "step": 19945 }, { "epoch": 0.24329268292682926, "grad_norm": 2.708115577697754, "learning_rate": 1.8378048780487807e-05, "loss": 0.1326, "step": 19950 }, { "epoch": 0.24335365853658536, "grad_norm": 1.0590559244155884, "learning_rate": 1.8377642276422765e-05, "loss": 0.15, "step": 19955 }, { "epoch": 0.24341463414634146, "grad_norm": 0.6821926832199097, "learning_rate": 1.8377235772357723e-05, "loss": 0.1214, "step": 19960 }, { "epoch": 0.24347560975609756, "grad_norm": 0.7036481499671936, "learning_rate": 1.8376829268292684e-05, "loss": 0.1199, "step": 19965 }, { "epoch": 0.24353658536585365, "grad_norm": 0.7056852579116821, "learning_rate": 1.8376422764227643e-05, "loss": 0.1281, "step": 19970 }, { "epoch": 0.24359756097560975, "grad_norm": 0.5229293704032898, "learning_rate": 1.8376016260162604e-05, "loss": 0.1066, "step": 19975 }, { "epoch": 0.24365853658536585, "grad_norm": 0.666157066822052, "learning_rate": 1.8375609756097562e-05, "loss": 0.0873, "step": 19980 }, { "epoch": 0.24371951219512195, "grad_norm": 0.702277660369873, "learning_rate": 1.837520325203252e-05, "loss": 0.1235, "step": 19985 }, { "epoch": 0.24378048780487804, "grad_norm": 2.43914794921875, "learning_rate": 1.8374796747967482e-05, "loss": 0.0918, "step": 19990 }, { "epoch": 0.24384146341463414, "grad_norm": 2.014373779296875, "learning_rate": 1.837439024390244e-05, "loss": 0.1176, "step": 19995 }, { "epoch": 0.24390243902439024, "grad_norm": 0.9826141595840454, "learning_rate": 1.83739837398374e-05, "loss": 0.0924, "step": 20000 }, { "epoch": 0.24396341463414634, "grad_norm": 1.675162672996521, "learning_rate": 1.837357723577236e-05, "loss": 0.1385, "step": 20005 }, { "epoch": 0.24402439024390243, "grad_norm": 0.7271140813827515, "learning_rate": 1.8373170731707318e-05, "loss": 0.1392, "step": 20010 }, { "epoch": 0.24408536585365853, "grad_norm": 2.14137864112854, "learning_rate": 1.8372764227642276e-05, "loss": 0.1042, "step": 20015 }, { "epoch": 0.24414634146341463, "grad_norm": 0.6470587253570557, "learning_rate": 1.8372357723577237e-05, "loss": 0.1216, "step": 20020 }, { "epoch": 0.24420731707317073, "grad_norm": 0.7252209186553955, "learning_rate": 1.8371951219512196e-05, "loss": 0.1433, "step": 20025 }, { "epoch": 0.24426829268292682, "grad_norm": 0.5470742583274841, "learning_rate": 1.8371544715447157e-05, "loss": 0.0994, "step": 20030 }, { "epoch": 0.24432926829268292, "grad_norm": 1.5065081119537354, "learning_rate": 1.8371138211382115e-05, "loss": 0.1211, "step": 20035 }, { "epoch": 0.24439024390243902, "grad_norm": 0.6514719128608704, "learning_rate": 1.8370731707317077e-05, "loss": 0.0858, "step": 20040 }, { "epoch": 0.24445121951219512, "grad_norm": 0.6752686500549316, "learning_rate": 1.837032520325203e-05, "loss": 0.0842, "step": 20045 }, { "epoch": 0.24451219512195121, "grad_norm": 0.6431912183761597, "learning_rate": 1.8369918699186993e-05, "loss": 0.1228, "step": 20050 }, { "epoch": 0.2445731707317073, "grad_norm": 0.8377525806427002, "learning_rate": 1.836951219512195e-05, "loss": 0.1208, "step": 20055 }, { "epoch": 0.2446341463414634, "grad_norm": 1.4628112316131592, "learning_rate": 1.8369105691056913e-05, "loss": 0.1014, "step": 20060 }, { "epoch": 0.2446951219512195, "grad_norm": 0.8998684287071228, "learning_rate": 1.836869918699187e-05, "loss": 0.1118, "step": 20065 }, { "epoch": 0.2447560975609756, "grad_norm": 1.0605921745300293, "learning_rate": 1.8368292682926832e-05, "loss": 0.1389, "step": 20070 }, { "epoch": 0.2448170731707317, "grad_norm": 0.8287965655326843, "learning_rate": 1.836788617886179e-05, "loss": 0.0937, "step": 20075 }, { "epoch": 0.2448780487804878, "grad_norm": 0.7137671709060669, "learning_rate": 1.836747967479675e-05, "loss": 0.0949, "step": 20080 }, { "epoch": 0.2449390243902439, "grad_norm": 0.9497233629226685, "learning_rate": 1.836707317073171e-05, "loss": 0.1104, "step": 20085 }, { "epoch": 0.245, "grad_norm": 0.6782681941986084, "learning_rate": 1.8366666666666668e-05, "loss": 0.1046, "step": 20090 }, { "epoch": 0.2450609756097561, "grad_norm": 0.7980475425720215, "learning_rate": 1.8366260162601626e-05, "loss": 0.1228, "step": 20095 }, { "epoch": 0.2451219512195122, "grad_norm": 0.9100505709648132, "learning_rate": 1.8365853658536588e-05, "loss": 0.1071, "step": 20100 }, { "epoch": 0.2451829268292683, "grad_norm": 0.7972321510314941, "learning_rate": 1.8365447154471546e-05, "loss": 0.0889, "step": 20105 }, { "epoch": 0.24524390243902439, "grad_norm": 1.1110498905181885, "learning_rate": 1.8365040650406504e-05, "loss": 0.1245, "step": 20110 }, { "epoch": 0.24530487804878048, "grad_norm": 1.6006619930267334, "learning_rate": 1.8364634146341466e-05, "loss": 0.1149, "step": 20115 }, { "epoch": 0.24536585365853658, "grad_norm": 0.808143138885498, "learning_rate": 1.8364227642276424e-05, "loss": 0.0988, "step": 20120 }, { "epoch": 0.24542682926829268, "grad_norm": 1.448883295059204, "learning_rate": 1.8363821138211385e-05, "loss": 0.0907, "step": 20125 }, { "epoch": 0.24548780487804878, "grad_norm": 0.6010729074478149, "learning_rate": 1.8363414634146343e-05, "loss": 0.1122, "step": 20130 }, { "epoch": 0.24554878048780487, "grad_norm": 0.8481057286262512, "learning_rate": 1.83630081300813e-05, "loss": 0.1573, "step": 20135 }, { "epoch": 0.24560975609756097, "grad_norm": 1.205922245979309, "learning_rate": 1.836260162601626e-05, "loss": 0.0881, "step": 20140 }, { "epoch": 0.24567073170731707, "grad_norm": 1.064149260520935, "learning_rate": 1.836219512195122e-05, "loss": 0.0993, "step": 20145 }, { "epoch": 0.24573170731707317, "grad_norm": 0.6785114407539368, "learning_rate": 1.836178861788618e-05, "loss": 0.1075, "step": 20150 }, { "epoch": 0.24579268292682926, "grad_norm": 2.5077285766601562, "learning_rate": 1.836138211382114e-05, "loss": 0.0904, "step": 20155 }, { "epoch": 0.24585365853658536, "grad_norm": 0.6170461773872375, "learning_rate": 1.83609756097561e-05, "loss": 0.1129, "step": 20160 }, { "epoch": 0.24591463414634146, "grad_norm": 1.8427791595458984, "learning_rate": 1.8360569105691057e-05, "loss": 0.1216, "step": 20165 }, { "epoch": 0.24597560975609756, "grad_norm": 0.9472465515136719, "learning_rate": 1.836016260162602e-05, "loss": 0.0899, "step": 20170 }, { "epoch": 0.24603658536585366, "grad_norm": 0.708288848400116, "learning_rate": 1.8359756097560977e-05, "loss": 0.0987, "step": 20175 }, { "epoch": 0.24609756097560975, "grad_norm": 0.9716005325317383, "learning_rate": 1.8359349593495938e-05, "loss": 0.0978, "step": 20180 }, { "epoch": 0.24615853658536585, "grad_norm": 0.8207941651344299, "learning_rate": 1.8358943089430896e-05, "loss": 0.1231, "step": 20185 }, { "epoch": 0.24621951219512195, "grad_norm": 0.5283291339874268, "learning_rate": 1.8358536585365854e-05, "loss": 0.1063, "step": 20190 }, { "epoch": 0.24628048780487805, "grad_norm": 0.7391260266304016, "learning_rate": 1.8358130081300813e-05, "loss": 0.1213, "step": 20195 }, { "epoch": 0.24634146341463414, "grad_norm": 1.2885798215866089, "learning_rate": 1.8357723577235774e-05, "loss": 0.1542, "step": 20200 }, { "epoch": 0.24640243902439024, "grad_norm": 1.0327517986297607, "learning_rate": 1.8357317073170732e-05, "loss": 0.1231, "step": 20205 }, { "epoch": 0.24646341463414634, "grad_norm": 0.3808314800262451, "learning_rate": 1.8356910569105694e-05, "loss": 0.088, "step": 20210 }, { "epoch": 0.24652439024390244, "grad_norm": 1.0535640716552734, "learning_rate": 1.8356504065040652e-05, "loss": 0.0927, "step": 20215 }, { "epoch": 0.24658536585365853, "grad_norm": 2.9461545944213867, "learning_rate": 1.8356097560975613e-05, "loss": 0.1158, "step": 20220 }, { "epoch": 0.24664634146341463, "grad_norm": 0.6997600197792053, "learning_rate": 1.8355691056910568e-05, "loss": 0.1017, "step": 20225 }, { "epoch": 0.24670731707317073, "grad_norm": 0.6922504305839539, "learning_rate": 1.835528455284553e-05, "loss": 0.0956, "step": 20230 }, { "epoch": 0.24676829268292683, "grad_norm": 1.0909035205841064, "learning_rate": 1.8354878048780488e-05, "loss": 0.1304, "step": 20235 }, { "epoch": 0.24682926829268292, "grad_norm": 0.875038743019104, "learning_rate": 1.835447154471545e-05, "loss": 0.1097, "step": 20240 }, { "epoch": 0.24689024390243902, "grad_norm": 0.6770815253257751, "learning_rate": 1.8354065040650407e-05, "loss": 0.1159, "step": 20245 }, { "epoch": 0.24695121951219512, "grad_norm": 0.5781827569007874, "learning_rate": 1.835365853658537e-05, "loss": 0.1306, "step": 20250 }, { "epoch": 0.24701219512195122, "grad_norm": 0.6226261258125305, "learning_rate": 1.8353252032520327e-05, "loss": 0.0861, "step": 20255 }, { "epoch": 0.24707317073170731, "grad_norm": 0.6128671169281006, "learning_rate": 1.8352845528455285e-05, "loss": 0.0932, "step": 20260 }, { "epoch": 0.2471341463414634, "grad_norm": 0.5510351657867432, "learning_rate": 1.8352439024390247e-05, "loss": 0.0804, "step": 20265 }, { "epoch": 0.2471951219512195, "grad_norm": 1.6325242519378662, "learning_rate": 1.8352032520325205e-05, "loss": 0.1051, "step": 20270 }, { "epoch": 0.2472560975609756, "grad_norm": 0.7032472491264343, "learning_rate": 1.8351626016260163e-05, "loss": 0.1468, "step": 20275 }, { "epoch": 0.2473170731707317, "grad_norm": 1.0582213401794434, "learning_rate": 1.8351219512195124e-05, "loss": 0.1035, "step": 20280 }, { "epoch": 0.2473780487804878, "grad_norm": 0.9600726366043091, "learning_rate": 1.8350813008130083e-05, "loss": 0.1249, "step": 20285 }, { "epoch": 0.2474390243902439, "grad_norm": 1.5082619190216064, "learning_rate": 1.835040650406504e-05, "loss": 0.148, "step": 20290 }, { "epoch": 0.2475, "grad_norm": 0.8389173746109009, "learning_rate": 1.8350000000000002e-05, "loss": 0.0743, "step": 20295 }, { "epoch": 0.2475609756097561, "grad_norm": 0.8909085988998413, "learning_rate": 1.834959349593496e-05, "loss": 0.1824, "step": 20300 }, { "epoch": 0.2476219512195122, "grad_norm": 1.0907809734344482, "learning_rate": 1.8349186991869922e-05, "loss": 0.1216, "step": 20305 }, { "epoch": 0.2476829268292683, "grad_norm": 1.4113669395446777, "learning_rate": 1.834878048780488e-05, "loss": 0.1237, "step": 20310 }, { "epoch": 0.2477439024390244, "grad_norm": 1.6600221395492554, "learning_rate": 1.8348373983739838e-05, "loss": 0.1036, "step": 20315 }, { "epoch": 0.24780487804878049, "grad_norm": 0.5938466787338257, "learning_rate": 1.8347967479674796e-05, "loss": 0.1405, "step": 20320 }, { "epoch": 0.24786585365853658, "grad_norm": 0.6616277694702148, "learning_rate": 1.8347560975609758e-05, "loss": 0.1139, "step": 20325 }, { "epoch": 0.24792682926829268, "grad_norm": 0.8619281649589539, "learning_rate": 1.8347154471544716e-05, "loss": 0.1282, "step": 20330 }, { "epoch": 0.24798780487804878, "grad_norm": 2.068143606185913, "learning_rate": 1.8346747967479677e-05, "loss": 0.1224, "step": 20335 }, { "epoch": 0.24804878048780488, "grad_norm": 0.7916794419288635, "learning_rate": 1.8346341463414636e-05, "loss": 0.1301, "step": 20340 }, { "epoch": 0.24810975609756097, "grad_norm": 0.7723604440689087, "learning_rate": 1.8345934959349594e-05, "loss": 0.1031, "step": 20345 }, { "epoch": 0.24817073170731707, "grad_norm": 0.65543133020401, "learning_rate": 1.8345528455284555e-05, "loss": 0.0705, "step": 20350 }, { "epoch": 0.24823170731707317, "grad_norm": 0.4698091745376587, "learning_rate": 1.8345121951219513e-05, "loss": 0.1094, "step": 20355 }, { "epoch": 0.24829268292682927, "grad_norm": 0.5582970976829529, "learning_rate": 1.834471544715447e-05, "loss": 0.0884, "step": 20360 }, { "epoch": 0.24835365853658536, "grad_norm": 0.6166773438453674, "learning_rate": 1.8344308943089433e-05, "loss": 0.1061, "step": 20365 }, { "epoch": 0.24841463414634146, "grad_norm": 0.799598217010498, "learning_rate": 1.834390243902439e-05, "loss": 0.1058, "step": 20370 }, { "epoch": 0.24847560975609756, "grad_norm": 0.3781428933143616, "learning_rate": 1.834349593495935e-05, "loss": 0.1461, "step": 20375 }, { "epoch": 0.24853658536585366, "grad_norm": 2.4333291053771973, "learning_rate": 1.834308943089431e-05, "loss": 0.0951, "step": 20380 }, { "epoch": 0.24859756097560975, "grad_norm": 0.7929046154022217, "learning_rate": 1.834268292682927e-05, "loss": 0.1195, "step": 20385 }, { "epoch": 0.24865853658536585, "grad_norm": 0.9552169442176819, "learning_rate": 1.834227642276423e-05, "loss": 0.0779, "step": 20390 }, { "epoch": 0.24871951219512195, "grad_norm": 1.4947172403335571, "learning_rate": 1.834186991869919e-05, "loss": 0.1143, "step": 20395 }, { "epoch": 0.24878048780487805, "grad_norm": 1.717294692993164, "learning_rate": 1.834146341463415e-05, "loss": 0.1275, "step": 20400 }, { "epoch": 0.24884146341463415, "grad_norm": 0.7380844950675964, "learning_rate": 1.8341056910569105e-05, "loss": 0.0955, "step": 20405 }, { "epoch": 0.24890243902439024, "grad_norm": 1.0363255739212036, "learning_rate": 1.8340650406504066e-05, "loss": 0.1206, "step": 20410 }, { "epoch": 0.24896341463414634, "grad_norm": 1.8475267887115479, "learning_rate": 1.8340243902439024e-05, "loss": 0.1126, "step": 20415 }, { "epoch": 0.24902439024390244, "grad_norm": 1.3947949409484863, "learning_rate": 1.8339837398373986e-05, "loss": 0.1292, "step": 20420 }, { "epoch": 0.24908536585365854, "grad_norm": 0.6856445670127869, "learning_rate": 1.8339430894308944e-05, "loss": 0.1628, "step": 20425 }, { "epoch": 0.24914634146341463, "grad_norm": 0.5629368424415588, "learning_rate": 1.8339024390243906e-05, "loss": 0.0921, "step": 20430 }, { "epoch": 0.24920731707317073, "grad_norm": 1.1619292497634888, "learning_rate": 1.8338617886178864e-05, "loss": 0.1228, "step": 20435 }, { "epoch": 0.24926829268292683, "grad_norm": 0.9820244908332825, "learning_rate": 1.8338211382113822e-05, "loss": 0.1081, "step": 20440 }, { "epoch": 0.24932926829268293, "grad_norm": 0.547597348690033, "learning_rate": 1.8337804878048783e-05, "loss": 0.0966, "step": 20445 }, { "epoch": 0.24939024390243902, "grad_norm": 0.5105242729187012, "learning_rate": 1.833739837398374e-05, "loss": 0.0821, "step": 20450 }, { "epoch": 0.24945121951219512, "grad_norm": 0.47059378027915955, "learning_rate": 1.83369918699187e-05, "loss": 0.1035, "step": 20455 }, { "epoch": 0.24951219512195122, "grad_norm": 0.9411457777023315, "learning_rate": 1.833658536585366e-05, "loss": 0.0984, "step": 20460 }, { "epoch": 0.24957317073170732, "grad_norm": 1.0045485496520996, "learning_rate": 1.833617886178862e-05, "loss": 0.0939, "step": 20465 }, { "epoch": 0.24963414634146341, "grad_norm": 0.6005604863166809, "learning_rate": 1.8335772357723577e-05, "loss": 0.1177, "step": 20470 }, { "epoch": 0.2496951219512195, "grad_norm": 0.7531325817108154, "learning_rate": 1.833536585365854e-05, "loss": 0.0935, "step": 20475 }, { "epoch": 0.2497560975609756, "grad_norm": 0.9508736729621887, "learning_rate": 1.8334959349593497e-05, "loss": 0.1457, "step": 20480 }, { "epoch": 0.2498170731707317, "grad_norm": 0.8269770741462708, "learning_rate": 1.833455284552846e-05, "loss": 0.1129, "step": 20485 }, { "epoch": 0.2498780487804878, "grad_norm": 1.0564541816711426, "learning_rate": 1.8334146341463417e-05, "loss": 0.1161, "step": 20490 }, { "epoch": 0.2499390243902439, "grad_norm": 0.36440590023994446, "learning_rate": 1.8333739837398375e-05, "loss": 0.0872, "step": 20495 }, { "epoch": 0.25, "grad_norm": 0.7272793650627136, "learning_rate": 1.8333333333333333e-05, "loss": 0.1185, "step": 20500 }, { "epoch": 0.2500609756097561, "grad_norm": 0.7987051010131836, "learning_rate": 1.8332926829268294e-05, "loss": 0.1132, "step": 20505 }, { "epoch": 0.2501219512195122, "grad_norm": 0.6369150280952454, "learning_rate": 1.8332520325203253e-05, "loss": 0.081, "step": 20510 }, { "epoch": 0.2501829268292683, "grad_norm": 2.6577911376953125, "learning_rate": 1.8332113821138214e-05, "loss": 0.095, "step": 20515 }, { "epoch": 0.2502439024390244, "grad_norm": 1.1946067810058594, "learning_rate": 1.8331707317073172e-05, "loss": 0.1393, "step": 20520 }, { "epoch": 0.2503048780487805, "grad_norm": 0.8232536911964417, "learning_rate": 1.8331300813008134e-05, "loss": 0.1248, "step": 20525 }, { "epoch": 0.2503658536585366, "grad_norm": 0.8173861503601074, "learning_rate": 1.8330894308943092e-05, "loss": 0.0902, "step": 20530 }, { "epoch": 0.2504268292682927, "grad_norm": 0.8154990077018738, "learning_rate": 1.833048780487805e-05, "loss": 0.1168, "step": 20535 }, { "epoch": 0.2504878048780488, "grad_norm": 1.0786564350128174, "learning_rate": 1.8330081300813008e-05, "loss": 0.1302, "step": 20540 }, { "epoch": 0.2505487804878049, "grad_norm": 1.5087125301361084, "learning_rate": 1.832967479674797e-05, "loss": 0.0888, "step": 20545 }, { "epoch": 0.250609756097561, "grad_norm": 1.3946491479873657, "learning_rate": 1.8329268292682928e-05, "loss": 0.1281, "step": 20550 }, { "epoch": 0.2506707317073171, "grad_norm": 0.9513848423957825, "learning_rate": 1.832886178861789e-05, "loss": 0.0911, "step": 20555 }, { "epoch": 0.25073170731707317, "grad_norm": 1.3192265033721924, "learning_rate": 1.8328455284552847e-05, "loss": 0.1132, "step": 20560 }, { "epoch": 0.25079268292682927, "grad_norm": 0.7687125205993652, "learning_rate": 1.8328048780487805e-05, "loss": 0.1066, "step": 20565 }, { "epoch": 0.25085365853658537, "grad_norm": 1.0081125497817993, "learning_rate": 1.8327642276422767e-05, "loss": 0.1173, "step": 20570 }, { "epoch": 0.25091463414634146, "grad_norm": 1.154812216758728, "learning_rate": 1.8327235772357725e-05, "loss": 0.1294, "step": 20575 }, { "epoch": 0.25097560975609756, "grad_norm": 1.6275660991668701, "learning_rate": 1.8326829268292687e-05, "loss": 0.1066, "step": 20580 }, { "epoch": 0.25103658536585366, "grad_norm": 1.1071953773498535, "learning_rate": 1.8326422764227645e-05, "loss": 0.1405, "step": 20585 }, { "epoch": 0.25109756097560976, "grad_norm": 0.9377224445343018, "learning_rate": 1.8326016260162603e-05, "loss": 0.0914, "step": 20590 }, { "epoch": 0.25115853658536585, "grad_norm": 0.5393361449241638, "learning_rate": 1.832560975609756e-05, "loss": 0.0955, "step": 20595 }, { "epoch": 0.25121951219512195, "grad_norm": 0.9339272975921631, "learning_rate": 1.8325203252032523e-05, "loss": 0.088, "step": 20600 }, { "epoch": 0.25128048780487805, "grad_norm": 0.8134381771087646, "learning_rate": 1.832479674796748e-05, "loss": 0.0879, "step": 20605 }, { "epoch": 0.25134146341463415, "grad_norm": 0.42500007152557373, "learning_rate": 1.8324390243902442e-05, "loss": 0.0937, "step": 20610 }, { "epoch": 0.25140243902439025, "grad_norm": 1.1392110586166382, "learning_rate": 1.83239837398374e-05, "loss": 0.1327, "step": 20615 }, { "epoch": 0.25146341463414634, "grad_norm": 0.48446136713027954, "learning_rate": 1.832357723577236e-05, "loss": 0.103, "step": 20620 }, { "epoch": 0.25152439024390244, "grad_norm": 1.0454264879226685, "learning_rate": 1.8323170731707317e-05, "loss": 0.112, "step": 20625 }, { "epoch": 0.25158536585365854, "grad_norm": 2.108790874481201, "learning_rate": 1.8322764227642278e-05, "loss": 0.1201, "step": 20630 }, { "epoch": 0.25164634146341464, "grad_norm": 0.40104082226753235, "learning_rate": 1.8322357723577236e-05, "loss": 0.0939, "step": 20635 }, { "epoch": 0.25170731707317073, "grad_norm": 0.664239764213562, "learning_rate": 1.8321951219512198e-05, "loss": 0.1019, "step": 20640 }, { "epoch": 0.25176829268292683, "grad_norm": 0.7798515558242798, "learning_rate": 1.8321544715447156e-05, "loss": 0.0934, "step": 20645 }, { "epoch": 0.25182926829268293, "grad_norm": 0.5062527060508728, "learning_rate": 1.8321138211382114e-05, "loss": 0.0932, "step": 20650 }, { "epoch": 0.251890243902439, "grad_norm": 0.6499829292297363, "learning_rate": 1.8320731707317075e-05, "loss": 0.1105, "step": 20655 }, { "epoch": 0.2519512195121951, "grad_norm": 1.1307963132858276, "learning_rate": 1.8320325203252034e-05, "loss": 0.1255, "step": 20660 }, { "epoch": 0.2520121951219512, "grad_norm": 0.8647443056106567, "learning_rate": 1.8319918699186995e-05, "loss": 0.1099, "step": 20665 }, { "epoch": 0.2520731707317073, "grad_norm": 0.42845961451530457, "learning_rate": 1.8319512195121953e-05, "loss": 0.1334, "step": 20670 }, { "epoch": 0.2521341463414634, "grad_norm": 0.7899541258811951, "learning_rate": 1.8319105691056915e-05, "loss": 0.1222, "step": 20675 }, { "epoch": 0.2521951219512195, "grad_norm": 0.6836950778961182, "learning_rate": 1.831869918699187e-05, "loss": 0.1435, "step": 20680 }, { "epoch": 0.2522560975609756, "grad_norm": 0.5172653198242188, "learning_rate": 1.831829268292683e-05, "loss": 0.1256, "step": 20685 }, { "epoch": 0.2523170731707317, "grad_norm": 0.7727364301681519, "learning_rate": 1.831788617886179e-05, "loss": 0.1361, "step": 20690 }, { "epoch": 0.2523780487804878, "grad_norm": 0.6268903613090515, "learning_rate": 1.831747967479675e-05, "loss": 0.1103, "step": 20695 }, { "epoch": 0.2524390243902439, "grad_norm": 0.6228120923042297, "learning_rate": 1.831707317073171e-05, "loss": 0.0965, "step": 20700 }, { "epoch": 0.2525, "grad_norm": 1.345947504043579, "learning_rate": 1.831666666666667e-05, "loss": 0.0803, "step": 20705 }, { "epoch": 0.2525609756097561, "grad_norm": 1.8554861545562744, "learning_rate": 1.831626016260163e-05, "loss": 0.1077, "step": 20710 }, { "epoch": 0.2526219512195122, "grad_norm": 1.1892426013946533, "learning_rate": 1.8315853658536587e-05, "loss": 0.117, "step": 20715 }, { "epoch": 0.2526829268292683, "grad_norm": 1.0261051654815674, "learning_rate": 1.8315447154471545e-05, "loss": 0.1354, "step": 20720 }, { "epoch": 0.2527439024390244, "grad_norm": 2.6578621864318848, "learning_rate": 1.8315040650406506e-05, "loss": 0.1082, "step": 20725 }, { "epoch": 0.2528048780487805, "grad_norm": 0.4897502362728119, "learning_rate": 1.8314634146341464e-05, "loss": 0.1161, "step": 20730 }, { "epoch": 0.2528658536585366, "grad_norm": 0.7510533928871155, "learning_rate": 1.8314227642276426e-05, "loss": 0.129, "step": 20735 }, { "epoch": 0.2529268292682927, "grad_norm": 0.8873136043548584, "learning_rate": 1.8313821138211384e-05, "loss": 0.1003, "step": 20740 }, { "epoch": 0.2529878048780488, "grad_norm": 0.5221030116081238, "learning_rate": 1.8313414634146342e-05, "loss": 0.098, "step": 20745 }, { "epoch": 0.2530487804878049, "grad_norm": 1.4310799837112427, "learning_rate": 1.8313008130081304e-05, "loss": 0.1074, "step": 20750 }, { "epoch": 0.253109756097561, "grad_norm": 0.783821165561676, "learning_rate": 1.8312601626016262e-05, "loss": 0.0937, "step": 20755 }, { "epoch": 0.2531707317073171, "grad_norm": 0.5523737072944641, "learning_rate": 1.8312195121951223e-05, "loss": 0.076, "step": 20760 }, { "epoch": 0.2532317073170732, "grad_norm": 0.45234042406082153, "learning_rate": 1.831178861788618e-05, "loss": 0.1161, "step": 20765 }, { "epoch": 0.25329268292682927, "grad_norm": 0.8988186120986938, "learning_rate": 1.831138211382114e-05, "loss": 0.1268, "step": 20770 }, { "epoch": 0.25335365853658537, "grad_norm": 0.6251803040504456, "learning_rate": 1.8310975609756098e-05, "loss": 0.0847, "step": 20775 }, { "epoch": 0.25341463414634147, "grad_norm": 0.602788507938385, "learning_rate": 1.831056910569106e-05, "loss": 0.1284, "step": 20780 }, { "epoch": 0.25347560975609756, "grad_norm": 0.8381361365318298, "learning_rate": 1.8310162601626017e-05, "loss": 0.1074, "step": 20785 }, { "epoch": 0.25353658536585366, "grad_norm": 0.7470796704292297, "learning_rate": 1.830975609756098e-05, "loss": 0.1182, "step": 20790 }, { "epoch": 0.25359756097560976, "grad_norm": 0.6465372443199158, "learning_rate": 1.8309349593495937e-05, "loss": 0.1153, "step": 20795 }, { "epoch": 0.25365853658536586, "grad_norm": 1.0372117757797241, "learning_rate": 1.8308943089430895e-05, "loss": 0.124, "step": 20800 }, { "epoch": 0.25371951219512195, "grad_norm": 0.5639897584915161, "learning_rate": 1.8308536585365853e-05, "loss": 0.107, "step": 20805 }, { "epoch": 0.25378048780487805, "grad_norm": 0.6265100240707397, "learning_rate": 1.8308130081300815e-05, "loss": 0.0863, "step": 20810 }, { "epoch": 0.25384146341463415, "grad_norm": 0.7157870531082153, "learning_rate": 1.8307723577235773e-05, "loss": 0.1154, "step": 20815 }, { "epoch": 0.25390243902439025, "grad_norm": 1.0833358764648438, "learning_rate": 1.8307317073170734e-05, "loss": 0.1091, "step": 20820 }, { "epoch": 0.25396341463414634, "grad_norm": 0.7790127396583557, "learning_rate": 1.8306910569105692e-05, "loss": 0.0959, "step": 20825 }, { "epoch": 0.25402439024390244, "grad_norm": 0.6472858190536499, "learning_rate": 1.830650406504065e-05, "loss": 0.1254, "step": 20830 }, { "epoch": 0.25408536585365854, "grad_norm": 1.0980594158172607, "learning_rate": 1.8306097560975612e-05, "loss": 0.0987, "step": 20835 }, { "epoch": 0.25414634146341464, "grad_norm": 0.4819900095462799, "learning_rate": 1.830569105691057e-05, "loss": 0.0932, "step": 20840 }, { "epoch": 0.25420731707317074, "grad_norm": 2.4996535778045654, "learning_rate": 1.8305284552845532e-05, "loss": 0.1063, "step": 20845 }, { "epoch": 0.25426829268292683, "grad_norm": 0.7690636515617371, "learning_rate": 1.830487804878049e-05, "loss": 0.108, "step": 20850 }, { "epoch": 0.25432926829268293, "grad_norm": 1.1360365152359009, "learning_rate": 1.830447154471545e-05, "loss": 0.1212, "step": 20855 }, { "epoch": 0.25439024390243903, "grad_norm": 0.6760964393615723, "learning_rate": 1.8304065040650406e-05, "loss": 0.0962, "step": 20860 }, { "epoch": 0.2544512195121951, "grad_norm": 0.6247652769088745, "learning_rate": 1.8303658536585368e-05, "loss": 0.1015, "step": 20865 }, { "epoch": 0.2545121951219512, "grad_norm": 0.8542356491088867, "learning_rate": 1.8303252032520326e-05, "loss": 0.1069, "step": 20870 }, { "epoch": 0.2545731707317073, "grad_norm": 0.5966123342514038, "learning_rate": 1.8302845528455287e-05, "loss": 0.1105, "step": 20875 }, { "epoch": 0.2546341463414634, "grad_norm": 0.4113093316555023, "learning_rate": 1.8302439024390245e-05, "loss": 0.0763, "step": 20880 }, { "epoch": 0.2546951219512195, "grad_norm": 0.8143666982650757, "learning_rate": 1.8302032520325207e-05, "loss": 0.1156, "step": 20885 }, { "epoch": 0.2547560975609756, "grad_norm": 0.8393425345420837, "learning_rate": 1.830162601626016e-05, "loss": 0.0971, "step": 20890 }, { "epoch": 0.2548170731707317, "grad_norm": 0.6140120625495911, "learning_rate": 1.8301219512195123e-05, "loss": 0.1518, "step": 20895 }, { "epoch": 0.2548780487804878, "grad_norm": 1.6612757444381714, "learning_rate": 1.830081300813008e-05, "loss": 0.1281, "step": 20900 }, { "epoch": 0.2549390243902439, "grad_norm": 0.7546074986457825, "learning_rate": 1.8300406504065043e-05, "loss": 0.1096, "step": 20905 }, { "epoch": 0.255, "grad_norm": 0.8094547390937805, "learning_rate": 1.83e-05, "loss": 0.1171, "step": 20910 }, { "epoch": 0.2550609756097561, "grad_norm": 0.5962897539138794, "learning_rate": 1.8299593495934962e-05, "loss": 0.1073, "step": 20915 }, { "epoch": 0.2551219512195122, "grad_norm": 0.4772201180458069, "learning_rate": 1.829918699186992e-05, "loss": 0.0869, "step": 20920 }, { "epoch": 0.2551829268292683, "grad_norm": 1.9414597749710083, "learning_rate": 1.829878048780488e-05, "loss": 0.1336, "step": 20925 }, { "epoch": 0.2552439024390244, "grad_norm": 0.8804829120635986, "learning_rate": 1.829837398373984e-05, "loss": 0.1116, "step": 20930 }, { "epoch": 0.2553048780487805, "grad_norm": 0.7981428503990173, "learning_rate": 1.82979674796748e-05, "loss": 0.091, "step": 20935 }, { "epoch": 0.2553658536585366, "grad_norm": 0.79123854637146, "learning_rate": 1.829756097560976e-05, "loss": 0.1193, "step": 20940 }, { "epoch": 0.2554268292682927, "grad_norm": 0.44929051399230957, "learning_rate": 1.8297154471544718e-05, "loss": 0.0794, "step": 20945 }, { "epoch": 0.2554878048780488, "grad_norm": 0.8025960922241211, "learning_rate": 1.8296747967479676e-05, "loss": 0.0924, "step": 20950 }, { "epoch": 0.2555487804878049, "grad_norm": 0.7462122440338135, "learning_rate": 1.8296341463414634e-05, "loss": 0.0832, "step": 20955 }, { "epoch": 0.255609756097561, "grad_norm": 1.2193979024887085, "learning_rate": 1.8295934959349596e-05, "loss": 0.0812, "step": 20960 }, { "epoch": 0.2556707317073171, "grad_norm": 0.6061300039291382, "learning_rate": 1.8295528455284554e-05, "loss": 0.0733, "step": 20965 }, { "epoch": 0.2557317073170732, "grad_norm": 0.6722778677940369, "learning_rate": 1.8295121951219515e-05, "loss": 0.1165, "step": 20970 }, { "epoch": 0.2557926829268293, "grad_norm": 0.7515337467193604, "learning_rate": 1.8294715447154474e-05, "loss": 0.174, "step": 20975 }, { "epoch": 0.25585365853658537, "grad_norm": 1.6161787509918213, "learning_rate": 1.8294308943089432e-05, "loss": 0.105, "step": 20980 }, { "epoch": 0.25591463414634147, "grad_norm": 0.9827760457992554, "learning_rate": 1.829390243902439e-05, "loss": 0.0683, "step": 20985 }, { "epoch": 0.25597560975609757, "grad_norm": 0.7291193604469299, "learning_rate": 1.829349593495935e-05, "loss": 0.1384, "step": 20990 }, { "epoch": 0.25603658536585366, "grad_norm": 0.6937498450279236, "learning_rate": 1.829308943089431e-05, "loss": 0.1142, "step": 20995 }, { "epoch": 0.25609756097560976, "grad_norm": 0.6653767824172974, "learning_rate": 1.829268292682927e-05, "loss": 0.0997, "step": 21000 }, { "epoch": 0.25615853658536586, "grad_norm": 0.7682312726974487, "learning_rate": 1.829227642276423e-05, "loss": 0.0995, "step": 21005 }, { "epoch": 0.25621951219512196, "grad_norm": 0.7431970834732056, "learning_rate": 1.8291869918699187e-05, "loss": 0.0777, "step": 21010 }, { "epoch": 0.25628048780487805, "grad_norm": 0.49396613240242004, "learning_rate": 1.829146341463415e-05, "loss": 0.0836, "step": 21015 }, { "epoch": 0.25634146341463415, "grad_norm": 0.735817015171051, "learning_rate": 1.8291056910569107e-05, "loss": 0.1175, "step": 21020 }, { "epoch": 0.25640243902439025, "grad_norm": 0.8551995754241943, "learning_rate": 1.829065040650407e-05, "loss": 0.1273, "step": 21025 }, { "epoch": 0.25646341463414635, "grad_norm": 0.8020477890968323, "learning_rate": 1.8290243902439027e-05, "loss": 0.1114, "step": 21030 }, { "epoch": 0.25652439024390244, "grad_norm": 0.6259887218475342, "learning_rate": 1.8289837398373985e-05, "loss": 0.1532, "step": 21035 }, { "epoch": 0.25658536585365854, "grad_norm": 0.5985107421875, "learning_rate": 1.8289430894308943e-05, "loss": 0.0974, "step": 21040 }, { "epoch": 0.25664634146341464, "grad_norm": 0.8603403568267822, "learning_rate": 1.8289024390243904e-05, "loss": 0.1482, "step": 21045 }, { "epoch": 0.25670731707317074, "grad_norm": 1.1139781475067139, "learning_rate": 1.8288617886178862e-05, "loss": 0.095, "step": 21050 }, { "epoch": 0.25676829268292684, "grad_norm": 0.6336944699287415, "learning_rate": 1.8288211382113824e-05, "loss": 0.1051, "step": 21055 }, { "epoch": 0.25682926829268293, "grad_norm": 1.7662416696548462, "learning_rate": 1.8287804878048782e-05, "loss": 0.1617, "step": 21060 }, { "epoch": 0.25689024390243903, "grad_norm": 0.8249997496604919, "learning_rate": 1.8287398373983744e-05, "loss": 0.1096, "step": 21065 }, { "epoch": 0.25695121951219513, "grad_norm": 1.4293041229248047, "learning_rate": 1.82869918699187e-05, "loss": 0.1053, "step": 21070 }, { "epoch": 0.2570121951219512, "grad_norm": 1.8600432872772217, "learning_rate": 1.828658536585366e-05, "loss": 0.0953, "step": 21075 }, { "epoch": 0.2570731707317073, "grad_norm": 0.8386691212654114, "learning_rate": 1.8286178861788618e-05, "loss": 0.1184, "step": 21080 }, { "epoch": 0.2571341463414634, "grad_norm": 0.5838348269462585, "learning_rate": 1.828577235772358e-05, "loss": 0.088, "step": 21085 }, { "epoch": 0.2571951219512195, "grad_norm": 0.949819028377533, "learning_rate": 1.8285365853658538e-05, "loss": 0.1302, "step": 21090 }, { "epoch": 0.2572560975609756, "grad_norm": 1.1302465200424194, "learning_rate": 1.82849593495935e-05, "loss": 0.1102, "step": 21095 }, { "epoch": 0.2573170731707317, "grad_norm": 1.126086950302124, "learning_rate": 1.8284552845528457e-05, "loss": 0.1046, "step": 21100 }, { "epoch": 0.2573780487804878, "grad_norm": 1.1592137813568115, "learning_rate": 1.8284146341463415e-05, "loss": 0.099, "step": 21105 }, { "epoch": 0.2574390243902439, "grad_norm": 0.6968955397605896, "learning_rate": 1.8283739837398377e-05, "loss": 0.1193, "step": 21110 }, { "epoch": 0.2575, "grad_norm": 0.5756753087043762, "learning_rate": 1.8283333333333335e-05, "loss": 0.0929, "step": 21115 }, { "epoch": 0.2575609756097561, "grad_norm": 1.0099979639053345, "learning_rate": 1.8282926829268297e-05, "loss": 0.1118, "step": 21120 }, { "epoch": 0.2576219512195122, "grad_norm": 0.6892082095146179, "learning_rate": 1.8282520325203255e-05, "loss": 0.1112, "step": 21125 }, { "epoch": 0.2576829268292683, "grad_norm": 0.7763335108757019, "learning_rate": 1.8282113821138213e-05, "loss": 0.1361, "step": 21130 }, { "epoch": 0.2577439024390244, "grad_norm": 2.3329174518585205, "learning_rate": 1.828170731707317e-05, "loss": 0.1188, "step": 21135 }, { "epoch": 0.2578048780487805, "grad_norm": 0.6853740811347961, "learning_rate": 1.8281300813008132e-05, "loss": 0.111, "step": 21140 }, { "epoch": 0.2578658536585366, "grad_norm": 0.6943448185920715, "learning_rate": 1.828089430894309e-05, "loss": 0.1111, "step": 21145 }, { "epoch": 0.2579268292682927, "grad_norm": 0.8970130681991577, "learning_rate": 1.8280487804878052e-05, "loss": 0.0922, "step": 21150 }, { "epoch": 0.2579878048780488, "grad_norm": 2.342100143432617, "learning_rate": 1.828008130081301e-05, "loss": 0.1177, "step": 21155 }, { "epoch": 0.2580487804878049, "grad_norm": 0.9493321776390076, "learning_rate": 1.827967479674797e-05, "loss": 0.1064, "step": 21160 }, { "epoch": 0.258109756097561, "grad_norm": 0.7156021595001221, "learning_rate": 1.8279268292682926e-05, "loss": 0.1024, "step": 21165 }, { "epoch": 0.2581707317073171, "grad_norm": 2.0359416007995605, "learning_rate": 1.8278861788617888e-05, "loss": 0.1312, "step": 21170 }, { "epoch": 0.2582317073170732, "grad_norm": 0.9993500113487244, "learning_rate": 1.8278455284552846e-05, "loss": 0.1027, "step": 21175 }, { "epoch": 0.2582926829268293, "grad_norm": 0.8766124248504639, "learning_rate": 1.8278048780487808e-05, "loss": 0.0672, "step": 21180 }, { "epoch": 0.2583536585365854, "grad_norm": 0.8100348114967346, "learning_rate": 1.8277642276422766e-05, "loss": 0.1031, "step": 21185 }, { "epoch": 0.25841463414634147, "grad_norm": 1.072261929512024, "learning_rate": 1.8277235772357724e-05, "loss": 0.0897, "step": 21190 }, { "epoch": 0.25847560975609757, "grad_norm": 0.8035169839859009, "learning_rate": 1.8276829268292685e-05, "loss": 0.1294, "step": 21195 }, { "epoch": 0.25853658536585367, "grad_norm": 0.6442155241966248, "learning_rate": 1.8276422764227644e-05, "loss": 0.1194, "step": 21200 }, { "epoch": 0.25859756097560976, "grad_norm": 0.5451095104217529, "learning_rate": 1.8276016260162605e-05, "loss": 0.0936, "step": 21205 }, { "epoch": 0.25865853658536586, "grad_norm": 1.816610336303711, "learning_rate": 1.8275609756097563e-05, "loss": 0.1455, "step": 21210 }, { "epoch": 0.25871951219512196, "grad_norm": 1.3245348930358887, "learning_rate": 1.827520325203252e-05, "loss": 0.0957, "step": 21215 }, { "epoch": 0.25878048780487806, "grad_norm": 0.8436298370361328, "learning_rate": 1.827479674796748e-05, "loss": 0.0972, "step": 21220 }, { "epoch": 0.25884146341463415, "grad_norm": 0.5616514086723328, "learning_rate": 1.827439024390244e-05, "loss": 0.1053, "step": 21225 }, { "epoch": 0.25890243902439025, "grad_norm": 1.1722224950790405, "learning_rate": 1.82739837398374e-05, "loss": 0.1162, "step": 21230 }, { "epoch": 0.25896341463414635, "grad_norm": 0.6069326996803284, "learning_rate": 1.827357723577236e-05, "loss": 0.0828, "step": 21235 }, { "epoch": 0.25902439024390245, "grad_norm": 0.5447514057159424, "learning_rate": 1.827317073170732e-05, "loss": 0.108, "step": 21240 }, { "epoch": 0.25908536585365854, "grad_norm": 0.6322326064109802, "learning_rate": 1.827276422764228e-05, "loss": 0.0732, "step": 21245 }, { "epoch": 0.25914634146341464, "grad_norm": 0.8367429971694946, "learning_rate": 1.8272357723577235e-05, "loss": 0.125, "step": 21250 }, { "epoch": 0.25920731707317074, "grad_norm": 1.069018006324768, "learning_rate": 1.8271951219512196e-05, "loss": 0.0965, "step": 21255 }, { "epoch": 0.25926829268292684, "grad_norm": 1.199084997177124, "learning_rate": 1.8271544715447155e-05, "loss": 0.1187, "step": 21260 }, { "epoch": 0.25932926829268294, "grad_norm": 0.7646649479866028, "learning_rate": 1.8271138211382116e-05, "loss": 0.0972, "step": 21265 }, { "epoch": 0.25939024390243903, "grad_norm": 1.0380319356918335, "learning_rate": 1.8270731707317074e-05, "loss": 0.1316, "step": 21270 }, { "epoch": 0.25945121951219513, "grad_norm": 1.2883363962173462, "learning_rate": 1.8270325203252036e-05, "loss": 0.102, "step": 21275 }, { "epoch": 0.25951219512195123, "grad_norm": 1.5100945234298706, "learning_rate": 1.8269918699186994e-05, "loss": 0.1023, "step": 21280 }, { "epoch": 0.2595731707317073, "grad_norm": 0.6690894365310669, "learning_rate": 1.8269512195121952e-05, "loss": 0.1019, "step": 21285 }, { "epoch": 0.2596341463414634, "grad_norm": 1.4517239332199097, "learning_rate": 1.8269105691056914e-05, "loss": 0.1152, "step": 21290 }, { "epoch": 0.2596951219512195, "grad_norm": 0.461357057094574, "learning_rate": 1.826869918699187e-05, "loss": 0.1021, "step": 21295 }, { "epoch": 0.2597560975609756, "grad_norm": 0.6307805180549622, "learning_rate": 1.826829268292683e-05, "loss": 0.1097, "step": 21300 }, { "epoch": 0.2598170731707317, "grad_norm": 0.6144849061965942, "learning_rate": 1.826788617886179e-05, "loss": 0.0965, "step": 21305 }, { "epoch": 0.2598780487804878, "grad_norm": 0.5748528242111206, "learning_rate": 1.826747967479675e-05, "loss": 0.0876, "step": 21310 }, { "epoch": 0.2599390243902439, "grad_norm": 1.4375017881393433, "learning_rate": 1.8267073170731708e-05, "loss": 0.1298, "step": 21315 }, { "epoch": 0.26, "grad_norm": 1.2634724378585815, "learning_rate": 1.826666666666667e-05, "loss": 0.0743, "step": 21320 }, { "epoch": 0.2600609756097561, "grad_norm": 0.6723089814186096, "learning_rate": 1.8266260162601627e-05, "loss": 0.0987, "step": 21325 }, { "epoch": 0.2601219512195122, "grad_norm": 0.6628326177597046, "learning_rate": 1.826585365853659e-05, "loss": 0.0999, "step": 21330 }, { "epoch": 0.2601829268292683, "grad_norm": 0.7850484251976013, "learning_rate": 1.8265447154471547e-05, "loss": 0.0866, "step": 21335 }, { "epoch": 0.2602439024390244, "grad_norm": 0.8547763824462891, "learning_rate": 1.8265040650406505e-05, "loss": 0.1146, "step": 21340 }, { "epoch": 0.2603048780487805, "grad_norm": 0.4003717601299286, "learning_rate": 1.8264634146341463e-05, "loss": 0.0786, "step": 21345 }, { "epoch": 0.2603658536585366, "grad_norm": 0.5270789861679077, "learning_rate": 1.8264227642276425e-05, "loss": 0.0844, "step": 21350 }, { "epoch": 0.2604268292682927, "grad_norm": 1.048532485961914, "learning_rate": 1.8263821138211383e-05, "loss": 0.1213, "step": 21355 }, { "epoch": 0.2604878048780488, "grad_norm": 1.2091319561004639, "learning_rate": 1.8263414634146344e-05, "loss": 0.1377, "step": 21360 }, { "epoch": 0.2605487804878049, "grad_norm": 1.0639199018478394, "learning_rate": 1.8263008130081302e-05, "loss": 0.1081, "step": 21365 }, { "epoch": 0.260609756097561, "grad_norm": 1.2883151769638062, "learning_rate": 1.826260162601626e-05, "loss": 0.1145, "step": 21370 }, { "epoch": 0.2606707317073171, "grad_norm": 0.47076764702796936, "learning_rate": 1.8262195121951222e-05, "loss": 0.0901, "step": 21375 }, { "epoch": 0.2607317073170732, "grad_norm": 0.8786696791648865, "learning_rate": 1.826178861788618e-05, "loss": 0.1054, "step": 21380 }, { "epoch": 0.2607926829268293, "grad_norm": 0.7738630771636963, "learning_rate": 1.826138211382114e-05, "loss": 0.1222, "step": 21385 }, { "epoch": 0.2608536585365854, "grad_norm": 0.8856220841407776, "learning_rate": 1.82609756097561e-05, "loss": 0.1092, "step": 21390 }, { "epoch": 0.2609146341463415, "grad_norm": 0.5615638494491577, "learning_rate": 1.8260569105691058e-05, "loss": 0.0893, "step": 21395 }, { "epoch": 0.26097560975609757, "grad_norm": 1.4872561693191528, "learning_rate": 1.8260162601626016e-05, "loss": 0.1124, "step": 21400 }, { "epoch": 0.26103658536585367, "grad_norm": 0.6470630168914795, "learning_rate": 1.8259756097560978e-05, "loss": 0.1367, "step": 21405 }, { "epoch": 0.26109756097560977, "grad_norm": 1.1218334436416626, "learning_rate": 1.8259349593495936e-05, "loss": 0.1275, "step": 21410 }, { "epoch": 0.26115853658536586, "grad_norm": 1.1402593851089478, "learning_rate": 1.8258943089430897e-05, "loss": 0.1268, "step": 21415 }, { "epoch": 0.26121951219512196, "grad_norm": 0.8944131135940552, "learning_rate": 1.8258536585365855e-05, "loss": 0.1579, "step": 21420 }, { "epoch": 0.26128048780487806, "grad_norm": 0.8671507835388184, "learning_rate": 1.8258130081300817e-05, "loss": 0.1203, "step": 21425 }, { "epoch": 0.26134146341463416, "grad_norm": 1.245693564414978, "learning_rate": 1.825772357723577e-05, "loss": 0.1496, "step": 21430 }, { "epoch": 0.26140243902439025, "grad_norm": 0.9342502355575562, "learning_rate": 1.8257317073170733e-05, "loss": 0.0961, "step": 21435 }, { "epoch": 0.26146341463414635, "grad_norm": 0.635078489780426, "learning_rate": 1.825691056910569e-05, "loss": 0.1138, "step": 21440 }, { "epoch": 0.26152439024390245, "grad_norm": 1.1828595399856567, "learning_rate": 1.8256504065040653e-05, "loss": 0.0851, "step": 21445 }, { "epoch": 0.26158536585365855, "grad_norm": 2.9134652614593506, "learning_rate": 1.825609756097561e-05, "loss": 0.1462, "step": 21450 }, { "epoch": 0.26164634146341464, "grad_norm": 0.7633832097053528, "learning_rate": 1.8255691056910572e-05, "loss": 0.1061, "step": 21455 }, { "epoch": 0.26170731707317074, "grad_norm": 0.7549310326576233, "learning_rate": 1.825528455284553e-05, "loss": 0.0977, "step": 21460 }, { "epoch": 0.26176829268292684, "grad_norm": 1.2771751880645752, "learning_rate": 1.825487804878049e-05, "loss": 0.1381, "step": 21465 }, { "epoch": 0.26182926829268294, "grad_norm": 0.9179058074951172, "learning_rate": 1.825447154471545e-05, "loss": 0.0849, "step": 21470 }, { "epoch": 0.26189024390243903, "grad_norm": 0.7824744582176208, "learning_rate": 1.825406504065041e-05, "loss": 0.128, "step": 21475 }, { "epoch": 0.26195121951219513, "grad_norm": 0.32622095942497253, "learning_rate": 1.8253658536585366e-05, "loss": 0.0867, "step": 21480 }, { "epoch": 0.26201219512195123, "grad_norm": 1.7646501064300537, "learning_rate": 1.8253252032520328e-05, "loss": 0.1251, "step": 21485 }, { "epoch": 0.26207317073170733, "grad_norm": 0.9687061905860901, "learning_rate": 1.8252845528455286e-05, "loss": 0.1101, "step": 21490 }, { "epoch": 0.2621341463414634, "grad_norm": 0.914915919303894, "learning_rate": 1.8252439024390244e-05, "loss": 0.1157, "step": 21495 }, { "epoch": 0.2621951219512195, "grad_norm": 0.7152172327041626, "learning_rate": 1.8252032520325206e-05, "loss": 0.0916, "step": 21500 }, { "epoch": 0.2622560975609756, "grad_norm": 1.1718295812606812, "learning_rate": 1.8251626016260164e-05, "loss": 0.1365, "step": 21505 }, { "epoch": 0.2623170731707317, "grad_norm": 0.8903157114982605, "learning_rate": 1.8251219512195125e-05, "loss": 0.1065, "step": 21510 }, { "epoch": 0.2623780487804878, "grad_norm": 1.0308942794799805, "learning_rate": 1.8250813008130084e-05, "loss": 0.1207, "step": 21515 }, { "epoch": 0.2624390243902439, "grad_norm": 0.6934080123901367, "learning_rate": 1.825040650406504e-05, "loss": 0.1193, "step": 21520 }, { "epoch": 0.2625, "grad_norm": 0.6976850032806396, "learning_rate": 1.825e-05, "loss": 0.117, "step": 21525 }, { "epoch": 0.2625609756097561, "grad_norm": 0.6998504996299744, "learning_rate": 1.824959349593496e-05, "loss": 0.1014, "step": 21530 }, { "epoch": 0.2626219512195122, "grad_norm": 0.541669487953186, "learning_rate": 1.824918699186992e-05, "loss": 0.1036, "step": 21535 }, { "epoch": 0.2626829268292683, "grad_norm": 0.5481747984886169, "learning_rate": 1.824878048780488e-05, "loss": 0.0972, "step": 21540 }, { "epoch": 0.2627439024390244, "grad_norm": 0.6440427899360657, "learning_rate": 1.824837398373984e-05, "loss": 0.1136, "step": 21545 }, { "epoch": 0.2628048780487805, "grad_norm": 1.996464490890503, "learning_rate": 1.8247967479674797e-05, "loss": 0.1182, "step": 21550 }, { "epoch": 0.2628658536585366, "grad_norm": 0.9138861298561096, "learning_rate": 1.824756097560976e-05, "loss": 0.1013, "step": 21555 }, { "epoch": 0.2629268292682927, "grad_norm": 0.8752086758613586, "learning_rate": 1.8247154471544717e-05, "loss": 0.0903, "step": 21560 }, { "epoch": 0.2629878048780488, "grad_norm": 1.2748206853866577, "learning_rate": 1.8246747967479675e-05, "loss": 0.1146, "step": 21565 }, { "epoch": 0.2630487804878049, "grad_norm": 0.9128847718238831, "learning_rate": 1.8246341463414636e-05, "loss": 0.1249, "step": 21570 }, { "epoch": 0.263109756097561, "grad_norm": 0.7345057725906372, "learning_rate": 1.8245934959349595e-05, "loss": 0.1062, "step": 21575 }, { "epoch": 0.2631707317073171, "grad_norm": 0.5454924702644348, "learning_rate": 1.8245528455284553e-05, "loss": 0.0923, "step": 21580 }, { "epoch": 0.2632317073170732, "grad_norm": 2.0997862815856934, "learning_rate": 1.8245121951219514e-05, "loss": 0.0834, "step": 21585 }, { "epoch": 0.2632926829268293, "grad_norm": 1.2972947359085083, "learning_rate": 1.8244715447154472e-05, "loss": 0.1274, "step": 21590 }, { "epoch": 0.2633536585365854, "grad_norm": 0.7025289535522461, "learning_rate": 1.8244308943089434e-05, "loss": 0.0866, "step": 21595 }, { "epoch": 0.2634146341463415, "grad_norm": 0.7914589047431946, "learning_rate": 1.8243902439024392e-05, "loss": 0.1122, "step": 21600 }, { "epoch": 0.2634756097560976, "grad_norm": 0.6870167255401611, "learning_rate": 1.8243495934959354e-05, "loss": 0.1548, "step": 21605 }, { "epoch": 0.26353658536585367, "grad_norm": 3.222979784011841, "learning_rate": 1.8243089430894308e-05, "loss": 0.1237, "step": 21610 }, { "epoch": 0.26359756097560977, "grad_norm": 0.4802011549472809, "learning_rate": 1.824268292682927e-05, "loss": 0.137, "step": 21615 }, { "epoch": 0.26365853658536587, "grad_norm": 0.6281077861785889, "learning_rate": 1.8242276422764228e-05, "loss": 0.1122, "step": 21620 }, { "epoch": 0.26371951219512196, "grad_norm": 0.6068404316902161, "learning_rate": 1.824186991869919e-05, "loss": 0.1355, "step": 21625 }, { "epoch": 0.26378048780487806, "grad_norm": 0.755450963973999, "learning_rate": 1.8241463414634148e-05, "loss": 0.1068, "step": 21630 }, { "epoch": 0.26384146341463416, "grad_norm": 0.7469238638877869, "learning_rate": 1.824105691056911e-05, "loss": 0.1212, "step": 21635 }, { "epoch": 0.26390243902439026, "grad_norm": 0.8837869763374329, "learning_rate": 1.8240650406504067e-05, "loss": 0.1249, "step": 21640 }, { "epoch": 0.26396341463414635, "grad_norm": 0.5383014678955078, "learning_rate": 1.8240243902439025e-05, "loss": 0.1353, "step": 21645 }, { "epoch": 0.26402439024390245, "grad_norm": 0.8394599556922913, "learning_rate": 1.8239837398373987e-05, "loss": 0.1065, "step": 21650 }, { "epoch": 0.26408536585365855, "grad_norm": 2.1822757720947266, "learning_rate": 1.8239430894308945e-05, "loss": 0.0957, "step": 21655 }, { "epoch": 0.26414634146341465, "grad_norm": 0.44125625491142273, "learning_rate": 1.8239024390243903e-05, "loss": 0.0844, "step": 21660 }, { "epoch": 0.26420731707317074, "grad_norm": 0.5183900594711304, "learning_rate": 1.8238617886178865e-05, "loss": 0.0929, "step": 21665 }, { "epoch": 0.26426829268292684, "grad_norm": 1.0286061763763428, "learning_rate": 1.8238211382113823e-05, "loss": 0.1031, "step": 21670 }, { "epoch": 0.26432926829268294, "grad_norm": 0.5351669192314148, "learning_rate": 1.823780487804878e-05, "loss": 0.1197, "step": 21675 }, { "epoch": 0.26439024390243904, "grad_norm": 1.2119296789169312, "learning_rate": 1.8237398373983742e-05, "loss": 0.1039, "step": 21680 }, { "epoch": 0.26445121951219513, "grad_norm": 2.3700404167175293, "learning_rate": 1.82369918699187e-05, "loss": 0.1289, "step": 21685 }, { "epoch": 0.26451219512195123, "grad_norm": 0.9421448707580566, "learning_rate": 1.8236585365853662e-05, "loss": 0.126, "step": 21690 }, { "epoch": 0.26457317073170733, "grad_norm": 0.5445656180381775, "learning_rate": 1.823617886178862e-05, "loss": 0.0821, "step": 21695 }, { "epoch": 0.2646341463414634, "grad_norm": 0.693848192691803, "learning_rate": 1.8235772357723578e-05, "loss": 0.093, "step": 21700 }, { "epoch": 0.2646951219512195, "grad_norm": 0.9307454228401184, "learning_rate": 1.8235365853658536e-05, "loss": 0.0909, "step": 21705 }, { "epoch": 0.2647560975609756, "grad_norm": 0.469245970249176, "learning_rate": 1.8234959349593498e-05, "loss": 0.0992, "step": 21710 }, { "epoch": 0.2648170731707317, "grad_norm": 0.74325031042099, "learning_rate": 1.8234552845528456e-05, "loss": 0.1535, "step": 21715 }, { "epoch": 0.2648780487804878, "grad_norm": 0.8928621411323547, "learning_rate": 1.8234146341463418e-05, "loss": 0.1216, "step": 21720 }, { "epoch": 0.2649390243902439, "grad_norm": 0.6069973707199097, "learning_rate": 1.8233739837398376e-05, "loss": 0.0823, "step": 21725 }, { "epoch": 0.265, "grad_norm": 0.2062089741230011, "learning_rate": 1.8233333333333334e-05, "loss": 0.0698, "step": 21730 }, { "epoch": 0.2650609756097561, "grad_norm": 1.116214632987976, "learning_rate": 1.8232926829268295e-05, "loss": 0.105, "step": 21735 }, { "epoch": 0.2651219512195122, "grad_norm": 1.3659694194793701, "learning_rate": 1.8232520325203253e-05, "loss": 0.0856, "step": 21740 }, { "epoch": 0.2651829268292683, "grad_norm": 1.5156643390655518, "learning_rate": 1.823211382113821e-05, "loss": 0.0776, "step": 21745 }, { "epoch": 0.2652439024390244, "grad_norm": 0.8425150513648987, "learning_rate": 1.8231707317073173e-05, "loss": 0.1004, "step": 21750 }, { "epoch": 0.2653048780487805, "grad_norm": 0.6407515406608582, "learning_rate": 1.823130081300813e-05, "loss": 0.0925, "step": 21755 }, { "epoch": 0.2653658536585366, "grad_norm": 1.1325331926345825, "learning_rate": 1.823089430894309e-05, "loss": 0.1371, "step": 21760 }, { "epoch": 0.2654268292682927, "grad_norm": 0.7197691798210144, "learning_rate": 1.823048780487805e-05, "loss": 0.1153, "step": 21765 }, { "epoch": 0.2654878048780488, "grad_norm": 0.5052659511566162, "learning_rate": 1.823008130081301e-05, "loss": 0.0851, "step": 21770 }, { "epoch": 0.2655487804878049, "grad_norm": 0.5365696549415588, "learning_rate": 1.822967479674797e-05, "loss": 0.1173, "step": 21775 }, { "epoch": 0.265609756097561, "grad_norm": 0.9470045566558838, "learning_rate": 1.822926829268293e-05, "loss": 0.1033, "step": 21780 }, { "epoch": 0.2656707317073171, "grad_norm": 1.441342830657959, "learning_rate": 1.822886178861789e-05, "loss": 0.0901, "step": 21785 }, { "epoch": 0.2657317073170732, "grad_norm": 0.5843059420585632, "learning_rate": 1.8228455284552845e-05, "loss": 0.0907, "step": 21790 }, { "epoch": 0.2657926829268293, "grad_norm": 0.5528467893600464, "learning_rate": 1.8228048780487806e-05, "loss": 0.1236, "step": 21795 }, { "epoch": 0.2658536585365854, "grad_norm": 1.3395733833312988, "learning_rate": 1.8227642276422765e-05, "loss": 0.114, "step": 21800 }, { "epoch": 0.2659146341463415, "grad_norm": 1.164257287979126, "learning_rate": 1.8227235772357726e-05, "loss": 0.1186, "step": 21805 }, { "epoch": 0.2659756097560976, "grad_norm": 0.5037341117858887, "learning_rate": 1.8226829268292684e-05, "loss": 0.0542, "step": 21810 }, { "epoch": 0.2660365853658537, "grad_norm": 0.8987348079681396, "learning_rate": 1.8226422764227646e-05, "loss": 0.0808, "step": 21815 }, { "epoch": 0.26609756097560977, "grad_norm": 0.7039507031440735, "learning_rate": 1.8226016260162604e-05, "loss": 0.1019, "step": 21820 }, { "epoch": 0.26615853658536587, "grad_norm": 0.5895410776138306, "learning_rate": 1.8225609756097562e-05, "loss": 0.0941, "step": 21825 }, { "epoch": 0.26621951219512197, "grad_norm": 0.5183216333389282, "learning_rate": 1.822520325203252e-05, "loss": 0.1242, "step": 21830 }, { "epoch": 0.26628048780487806, "grad_norm": 0.9369369745254517, "learning_rate": 1.822479674796748e-05, "loss": 0.1183, "step": 21835 }, { "epoch": 0.26634146341463416, "grad_norm": 0.7221801280975342, "learning_rate": 1.822439024390244e-05, "loss": 0.1087, "step": 21840 }, { "epoch": 0.26640243902439026, "grad_norm": 1.078384280204773, "learning_rate": 1.82239837398374e-05, "loss": 0.1102, "step": 21845 }, { "epoch": 0.26646341463414636, "grad_norm": 0.6235749125480652, "learning_rate": 1.822357723577236e-05, "loss": 0.1046, "step": 21850 }, { "epoch": 0.26652439024390245, "grad_norm": 1.3265234231948853, "learning_rate": 1.8223170731707317e-05, "loss": 0.1151, "step": 21855 }, { "epoch": 0.26658536585365855, "grad_norm": 0.7081900238990784, "learning_rate": 1.822276422764228e-05, "loss": 0.0945, "step": 21860 }, { "epoch": 0.26664634146341465, "grad_norm": 0.7118531465530396, "learning_rate": 1.8222357723577237e-05, "loss": 0.1134, "step": 21865 }, { "epoch": 0.26670731707317075, "grad_norm": 0.6960749626159668, "learning_rate": 1.82219512195122e-05, "loss": 0.1158, "step": 21870 }, { "epoch": 0.26676829268292684, "grad_norm": 0.6487362384796143, "learning_rate": 1.8221544715447157e-05, "loss": 0.1406, "step": 21875 }, { "epoch": 0.26682926829268294, "grad_norm": 0.543585479259491, "learning_rate": 1.8221138211382115e-05, "loss": 0.0991, "step": 21880 }, { "epoch": 0.26689024390243904, "grad_norm": 0.6146721839904785, "learning_rate": 1.8220731707317073e-05, "loss": 0.1008, "step": 21885 }, { "epoch": 0.26695121951219514, "grad_norm": 0.8775612711906433, "learning_rate": 1.8220325203252035e-05, "loss": 0.0982, "step": 21890 }, { "epoch": 0.26701219512195123, "grad_norm": 0.6964243054389954, "learning_rate": 1.8219918699186993e-05, "loss": 0.1198, "step": 21895 }, { "epoch": 0.26707317073170733, "grad_norm": 1.1344367265701294, "learning_rate": 1.8219512195121954e-05, "loss": 0.1154, "step": 21900 }, { "epoch": 0.26713414634146343, "grad_norm": 1.1702404022216797, "learning_rate": 1.8219105691056912e-05, "loss": 0.1331, "step": 21905 }, { "epoch": 0.2671951219512195, "grad_norm": 1.393397569656372, "learning_rate": 1.821869918699187e-05, "loss": 0.1188, "step": 21910 }, { "epoch": 0.2672560975609756, "grad_norm": 0.9563729763031006, "learning_rate": 1.8218292682926832e-05, "loss": 0.118, "step": 21915 }, { "epoch": 0.2673170731707317, "grad_norm": 0.7505750060081482, "learning_rate": 1.821788617886179e-05, "loss": 0.0942, "step": 21920 }, { "epoch": 0.2673780487804878, "grad_norm": 0.8354902267456055, "learning_rate": 1.8217479674796748e-05, "loss": 0.0821, "step": 21925 }, { "epoch": 0.2674390243902439, "grad_norm": 0.807578980922699, "learning_rate": 1.821707317073171e-05, "loss": 0.1154, "step": 21930 }, { "epoch": 0.2675, "grad_norm": 0.6531137824058533, "learning_rate": 1.8216666666666668e-05, "loss": 0.1146, "step": 21935 }, { "epoch": 0.2675609756097561, "grad_norm": 0.6461347341537476, "learning_rate": 1.8216260162601626e-05, "loss": 0.0821, "step": 21940 }, { "epoch": 0.2676219512195122, "grad_norm": 1.5737898349761963, "learning_rate": 1.8215853658536588e-05, "loss": 0.1062, "step": 21945 }, { "epoch": 0.2676829268292683, "grad_norm": 0.6094998121261597, "learning_rate": 1.8215447154471546e-05, "loss": 0.1057, "step": 21950 }, { "epoch": 0.2677439024390244, "grad_norm": 0.3867991864681244, "learning_rate": 1.8215040650406507e-05, "loss": 0.1008, "step": 21955 }, { "epoch": 0.2678048780487805, "grad_norm": 0.677525520324707, "learning_rate": 1.8214634146341465e-05, "loss": 0.089, "step": 21960 }, { "epoch": 0.2678658536585366, "grad_norm": 0.998650074005127, "learning_rate": 1.8214227642276427e-05, "loss": 0.1012, "step": 21965 }, { "epoch": 0.2679268292682927, "grad_norm": 1.1238603591918945, "learning_rate": 1.821382113821138e-05, "loss": 0.0886, "step": 21970 }, { "epoch": 0.2679878048780488, "grad_norm": 1.6062231063842773, "learning_rate": 1.8213414634146343e-05, "loss": 0.1214, "step": 21975 }, { "epoch": 0.2680487804878049, "grad_norm": 0.6082218885421753, "learning_rate": 1.82130081300813e-05, "loss": 0.1, "step": 21980 }, { "epoch": 0.268109756097561, "grad_norm": 1.4354121685028076, "learning_rate": 1.8212601626016263e-05, "loss": 0.119, "step": 21985 }, { "epoch": 0.2681707317073171, "grad_norm": 0.5093905329704285, "learning_rate": 1.821219512195122e-05, "loss": 0.1178, "step": 21990 }, { "epoch": 0.2682317073170732, "grad_norm": 0.7431560158729553, "learning_rate": 1.8211788617886182e-05, "loss": 0.1227, "step": 21995 }, { "epoch": 0.2682926829268293, "grad_norm": 0.5748498439788818, "learning_rate": 1.821138211382114e-05, "loss": 0.0751, "step": 22000 }, { "epoch": 0.2683536585365854, "grad_norm": 0.7551609873771667, "learning_rate": 1.82109756097561e-05, "loss": 0.102, "step": 22005 }, { "epoch": 0.2684146341463415, "grad_norm": 1.1938250064849854, "learning_rate": 1.8210569105691057e-05, "loss": 0.0805, "step": 22010 }, { "epoch": 0.2684756097560976, "grad_norm": 0.4796825349330902, "learning_rate": 1.8210162601626018e-05, "loss": 0.0928, "step": 22015 }, { "epoch": 0.2685365853658537, "grad_norm": 0.7312179803848267, "learning_rate": 1.8209756097560976e-05, "loss": 0.1479, "step": 22020 }, { "epoch": 0.2685975609756098, "grad_norm": 0.9806240200996399, "learning_rate": 1.8209349593495938e-05, "loss": 0.1448, "step": 22025 }, { "epoch": 0.26865853658536587, "grad_norm": 1.0630414485931396, "learning_rate": 1.8208943089430896e-05, "loss": 0.1153, "step": 22030 }, { "epoch": 0.26871951219512197, "grad_norm": 0.4158112704753876, "learning_rate": 1.8208536585365854e-05, "loss": 0.0907, "step": 22035 }, { "epoch": 0.26878048780487807, "grad_norm": 0.8806180953979492, "learning_rate": 1.8208130081300816e-05, "loss": 0.1004, "step": 22040 }, { "epoch": 0.26884146341463416, "grad_norm": 0.8425145745277405, "learning_rate": 1.8207723577235774e-05, "loss": 0.1225, "step": 22045 }, { "epoch": 0.26890243902439026, "grad_norm": 0.6808643937110901, "learning_rate": 1.8207317073170735e-05, "loss": 0.0979, "step": 22050 }, { "epoch": 0.26896341463414636, "grad_norm": 0.7258196473121643, "learning_rate": 1.8206910569105693e-05, "loss": 0.1078, "step": 22055 }, { "epoch": 0.26902439024390246, "grad_norm": 1.0234431028366089, "learning_rate": 1.820650406504065e-05, "loss": 0.1876, "step": 22060 }, { "epoch": 0.26908536585365855, "grad_norm": 0.7633170485496521, "learning_rate": 1.820609756097561e-05, "loss": 0.1078, "step": 22065 }, { "epoch": 0.26914634146341465, "grad_norm": 1.240821361541748, "learning_rate": 1.820569105691057e-05, "loss": 0.1149, "step": 22070 }, { "epoch": 0.26920731707317075, "grad_norm": 0.7845515012741089, "learning_rate": 1.820528455284553e-05, "loss": 0.1062, "step": 22075 }, { "epoch": 0.26926829268292685, "grad_norm": 0.611852765083313, "learning_rate": 1.820487804878049e-05, "loss": 0.0658, "step": 22080 }, { "epoch": 0.26932926829268294, "grad_norm": 0.8700068593025208, "learning_rate": 1.820447154471545e-05, "loss": 0.0814, "step": 22085 }, { "epoch": 0.26939024390243904, "grad_norm": 0.42622843384742737, "learning_rate": 1.8204065040650407e-05, "loss": 0.1169, "step": 22090 }, { "epoch": 0.26945121951219514, "grad_norm": 1.4071892499923706, "learning_rate": 1.8203658536585365e-05, "loss": 0.0857, "step": 22095 }, { "epoch": 0.26951219512195124, "grad_norm": 0.8873327374458313, "learning_rate": 1.8203252032520327e-05, "loss": 0.0836, "step": 22100 }, { "epoch": 0.26957317073170733, "grad_norm": 1.366124153137207, "learning_rate": 1.8202845528455285e-05, "loss": 0.1203, "step": 22105 }, { "epoch": 0.26963414634146343, "grad_norm": 3.77459454536438, "learning_rate": 1.8202439024390246e-05, "loss": 0.1041, "step": 22110 }, { "epoch": 0.26969512195121953, "grad_norm": 1.2165627479553223, "learning_rate": 1.8202032520325205e-05, "loss": 0.081, "step": 22115 }, { "epoch": 0.2697560975609756, "grad_norm": 0.8212229609489441, "learning_rate": 1.8201626016260163e-05, "loss": 0.1137, "step": 22120 }, { "epoch": 0.2698170731707317, "grad_norm": 0.6767081618309021, "learning_rate": 1.8201219512195124e-05, "loss": 0.096, "step": 22125 }, { "epoch": 0.2698780487804878, "grad_norm": 0.5007617473602295, "learning_rate": 1.8200813008130082e-05, "loss": 0.0826, "step": 22130 }, { "epoch": 0.2699390243902439, "grad_norm": 0.7244983911514282, "learning_rate": 1.8200406504065044e-05, "loss": 0.1681, "step": 22135 }, { "epoch": 0.27, "grad_norm": 2.7269022464752197, "learning_rate": 1.8200000000000002e-05, "loss": 0.0897, "step": 22140 }, { "epoch": 0.2700609756097561, "grad_norm": 0.4850207567214966, "learning_rate": 1.8199593495934963e-05, "loss": 0.0877, "step": 22145 }, { "epoch": 0.2701219512195122, "grad_norm": 0.5730026960372925, "learning_rate": 1.8199186991869918e-05, "loss": 0.0925, "step": 22150 }, { "epoch": 0.2701829268292683, "grad_norm": 0.543437659740448, "learning_rate": 1.819878048780488e-05, "loss": 0.1165, "step": 22155 }, { "epoch": 0.2702439024390244, "grad_norm": 0.5473348498344421, "learning_rate": 1.8198373983739838e-05, "loss": 0.1147, "step": 22160 }, { "epoch": 0.2703048780487805, "grad_norm": 0.6364433169364929, "learning_rate": 1.81979674796748e-05, "loss": 0.0832, "step": 22165 }, { "epoch": 0.2703658536585366, "grad_norm": 1.1863124370574951, "learning_rate": 1.8197560975609757e-05, "loss": 0.1451, "step": 22170 }, { "epoch": 0.2704268292682927, "grad_norm": 0.5942081809043884, "learning_rate": 1.819715447154472e-05, "loss": 0.1104, "step": 22175 }, { "epoch": 0.2704878048780488, "grad_norm": 0.573028564453125, "learning_rate": 1.8196747967479677e-05, "loss": 0.1054, "step": 22180 }, { "epoch": 0.2705487804878049, "grad_norm": 17.638992309570312, "learning_rate": 1.8196341463414635e-05, "loss": 0.1731, "step": 22185 }, { "epoch": 0.270609756097561, "grad_norm": 0.7342460751533508, "learning_rate": 1.8195934959349593e-05, "loss": 0.0992, "step": 22190 }, { "epoch": 0.2706707317073171, "grad_norm": 0.8655105233192444, "learning_rate": 1.8195528455284555e-05, "loss": 0.0977, "step": 22195 }, { "epoch": 0.2707317073170732, "grad_norm": 0.842759907245636, "learning_rate": 1.8195121951219513e-05, "loss": 0.1049, "step": 22200 }, { "epoch": 0.2707926829268293, "grad_norm": 0.8764829039573669, "learning_rate": 1.8194715447154475e-05, "loss": 0.106, "step": 22205 }, { "epoch": 0.2708536585365854, "grad_norm": 0.7786609530448914, "learning_rate": 1.8194308943089433e-05, "loss": 0.149, "step": 22210 }, { "epoch": 0.2709146341463415, "grad_norm": 1.2409875392913818, "learning_rate": 1.819390243902439e-05, "loss": 0.1076, "step": 22215 }, { "epoch": 0.2709756097560976, "grad_norm": 0.9224186539649963, "learning_rate": 1.8193495934959352e-05, "loss": 0.113, "step": 22220 }, { "epoch": 0.2710365853658537, "grad_norm": 0.34640833735466003, "learning_rate": 1.819308943089431e-05, "loss": 0.0693, "step": 22225 }, { "epoch": 0.2710975609756098, "grad_norm": 1.2343984842300415, "learning_rate": 1.8192682926829272e-05, "loss": 0.126, "step": 22230 }, { "epoch": 0.2711585365853659, "grad_norm": 1.4584856033325195, "learning_rate": 1.819227642276423e-05, "loss": 0.132, "step": 22235 }, { "epoch": 0.27121951219512197, "grad_norm": 0.6414416432380676, "learning_rate": 1.8191869918699188e-05, "loss": 0.1391, "step": 22240 }, { "epoch": 0.27128048780487807, "grad_norm": 0.5736677646636963, "learning_rate": 1.8191463414634146e-05, "loss": 0.111, "step": 22245 }, { "epoch": 0.27134146341463417, "grad_norm": 0.732333242893219, "learning_rate": 1.8191056910569108e-05, "loss": 0.0805, "step": 22250 }, { "epoch": 0.27140243902439026, "grad_norm": 0.8919379711151123, "learning_rate": 1.8190650406504066e-05, "loss": 0.1382, "step": 22255 }, { "epoch": 0.27146341463414636, "grad_norm": 0.6439031362533569, "learning_rate": 1.8190243902439027e-05, "loss": 0.107, "step": 22260 }, { "epoch": 0.27152439024390246, "grad_norm": 1.075624942779541, "learning_rate": 1.8189837398373986e-05, "loss": 0.1157, "step": 22265 }, { "epoch": 0.27158536585365856, "grad_norm": 0.6650589108467102, "learning_rate": 1.8189430894308944e-05, "loss": 0.0871, "step": 22270 }, { "epoch": 0.27164634146341465, "grad_norm": 0.9232680201530457, "learning_rate": 1.8189024390243902e-05, "loss": 0.1128, "step": 22275 }, { "epoch": 0.27170731707317075, "grad_norm": 0.8200042247772217, "learning_rate": 1.8188617886178863e-05, "loss": 0.0853, "step": 22280 }, { "epoch": 0.27176829268292685, "grad_norm": 2.051741123199463, "learning_rate": 1.818821138211382e-05, "loss": 0.0991, "step": 22285 }, { "epoch": 0.27182926829268295, "grad_norm": 0.6616097092628479, "learning_rate": 1.8187804878048783e-05, "loss": 0.1145, "step": 22290 }, { "epoch": 0.27189024390243904, "grad_norm": 0.5624186396598816, "learning_rate": 1.818739837398374e-05, "loss": 0.0959, "step": 22295 }, { "epoch": 0.27195121951219514, "grad_norm": 0.5866892337799072, "learning_rate": 1.81869918699187e-05, "loss": 0.0852, "step": 22300 }, { "epoch": 0.27201219512195124, "grad_norm": 0.9101602435112, "learning_rate": 1.818658536585366e-05, "loss": 0.1215, "step": 22305 }, { "epoch": 0.27207317073170734, "grad_norm": 0.5357284545898438, "learning_rate": 1.818617886178862e-05, "loss": 0.1057, "step": 22310 }, { "epoch": 0.27213414634146343, "grad_norm": 0.9261302947998047, "learning_rate": 1.818577235772358e-05, "loss": 0.0815, "step": 22315 }, { "epoch": 0.27219512195121953, "grad_norm": 0.4440728724002838, "learning_rate": 1.818536585365854e-05, "loss": 0.0805, "step": 22320 }, { "epoch": 0.27225609756097563, "grad_norm": 1.6472004652023315, "learning_rate": 1.81849593495935e-05, "loss": 0.1114, "step": 22325 }, { "epoch": 0.2723170731707317, "grad_norm": 1.1614257097244263, "learning_rate": 1.8184552845528455e-05, "loss": 0.1391, "step": 22330 }, { "epoch": 0.2723780487804878, "grad_norm": 0.751933217048645, "learning_rate": 1.8184146341463416e-05, "loss": 0.1214, "step": 22335 }, { "epoch": 0.2724390243902439, "grad_norm": 0.8319097757339478, "learning_rate": 1.8183739837398374e-05, "loss": 0.0997, "step": 22340 }, { "epoch": 0.2725, "grad_norm": 1.222110629081726, "learning_rate": 1.8183333333333336e-05, "loss": 0.1038, "step": 22345 }, { "epoch": 0.2725609756097561, "grad_norm": 1.4729838371276855, "learning_rate": 1.8182926829268294e-05, "loss": 0.1277, "step": 22350 }, { "epoch": 0.2726219512195122, "grad_norm": 0.7006365656852722, "learning_rate": 1.8182520325203256e-05, "loss": 0.0874, "step": 22355 }, { "epoch": 0.2726829268292683, "grad_norm": 0.7917077541351318, "learning_rate": 1.818211382113821e-05, "loss": 0.1235, "step": 22360 }, { "epoch": 0.2727439024390244, "grad_norm": 0.8779376149177551, "learning_rate": 1.8181707317073172e-05, "loss": 0.1358, "step": 22365 }, { "epoch": 0.2728048780487805, "grad_norm": 0.7205256819725037, "learning_rate": 1.818130081300813e-05, "loss": 0.0914, "step": 22370 }, { "epoch": 0.2728658536585366, "grad_norm": 0.5612586736679077, "learning_rate": 1.818089430894309e-05, "loss": 0.0944, "step": 22375 }, { "epoch": 0.2729268292682927, "grad_norm": 0.5119507312774658, "learning_rate": 1.818048780487805e-05, "loss": 0.0788, "step": 22380 }, { "epoch": 0.2729878048780488, "grad_norm": 2.951415777206421, "learning_rate": 1.818008130081301e-05, "loss": 0.1271, "step": 22385 }, { "epoch": 0.2730487804878049, "grad_norm": 0.7240999937057495, "learning_rate": 1.817967479674797e-05, "loss": 0.0927, "step": 22390 }, { "epoch": 0.273109756097561, "grad_norm": 0.8860188722610474, "learning_rate": 1.8179268292682927e-05, "loss": 0.1029, "step": 22395 }, { "epoch": 0.2731707317073171, "grad_norm": 0.5149354338645935, "learning_rate": 1.817886178861789e-05, "loss": 0.1059, "step": 22400 }, { "epoch": 0.2732317073170732, "grad_norm": 0.7547417879104614, "learning_rate": 1.8178455284552847e-05, "loss": 0.1104, "step": 22405 }, { "epoch": 0.2732926829268293, "grad_norm": 1.1872892379760742, "learning_rate": 1.817804878048781e-05, "loss": 0.123, "step": 22410 }, { "epoch": 0.2733536585365854, "grad_norm": 1.2691097259521484, "learning_rate": 1.8177642276422767e-05, "loss": 0.1073, "step": 22415 }, { "epoch": 0.2734146341463415, "grad_norm": 0.7418218851089478, "learning_rate": 1.8177235772357725e-05, "loss": 0.1134, "step": 22420 }, { "epoch": 0.2734756097560976, "grad_norm": 0.6631916761398315, "learning_rate": 1.8176829268292683e-05, "loss": 0.08, "step": 22425 }, { "epoch": 0.2735365853658537, "grad_norm": 0.8189491033554077, "learning_rate": 1.8176422764227644e-05, "loss": 0.1007, "step": 22430 }, { "epoch": 0.2735975609756098, "grad_norm": 0.5179533362388611, "learning_rate": 1.8176016260162603e-05, "loss": 0.0934, "step": 22435 }, { "epoch": 0.2736585365853659, "grad_norm": 0.8300091624259949, "learning_rate": 1.8175609756097564e-05, "loss": 0.1181, "step": 22440 }, { "epoch": 0.273719512195122, "grad_norm": 1.2227908372879028, "learning_rate": 1.8175203252032522e-05, "loss": 0.1116, "step": 22445 }, { "epoch": 0.27378048780487807, "grad_norm": 0.6994615793228149, "learning_rate": 1.817479674796748e-05, "loss": 0.1187, "step": 22450 }, { "epoch": 0.27384146341463417, "grad_norm": 0.6264452934265137, "learning_rate": 1.817439024390244e-05, "loss": 0.1349, "step": 22455 }, { "epoch": 0.27390243902439027, "grad_norm": 0.8019965887069702, "learning_rate": 1.81739837398374e-05, "loss": 0.0957, "step": 22460 }, { "epoch": 0.27396341463414636, "grad_norm": 0.8693569898605347, "learning_rate": 1.8173577235772358e-05, "loss": 0.1172, "step": 22465 }, { "epoch": 0.27402439024390246, "grad_norm": 1.5825610160827637, "learning_rate": 1.817317073170732e-05, "loss": 0.108, "step": 22470 }, { "epoch": 0.27408536585365856, "grad_norm": 0.6410928964614868, "learning_rate": 1.8172764227642278e-05, "loss": 0.145, "step": 22475 }, { "epoch": 0.27414634146341466, "grad_norm": 0.6250145435333252, "learning_rate": 1.8172357723577236e-05, "loss": 0.0956, "step": 22480 }, { "epoch": 0.27420731707317075, "grad_norm": 1.479358196258545, "learning_rate": 1.8171951219512197e-05, "loss": 0.1188, "step": 22485 }, { "epoch": 0.27426829268292685, "grad_norm": 0.9135880470275879, "learning_rate": 1.8171544715447156e-05, "loss": 0.0904, "step": 22490 }, { "epoch": 0.27432926829268295, "grad_norm": 0.6576033234596252, "learning_rate": 1.8171138211382117e-05, "loss": 0.0822, "step": 22495 }, { "epoch": 0.27439024390243905, "grad_norm": 0.9535195231437683, "learning_rate": 1.8170731707317075e-05, "loss": 0.1124, "step": 22500 }, { "epoch": 0.27445121951219514, "grad_norm": 0.7304551005363464, "learning_rate": 1.8170325203252033e-05, "loss": 0.0953, "step": 22505 }, { "epoch": 0.27451219512195124, "grad_norm": 0.8747267127037048, "learning_rate": 1.816991869918699e-05, "loss": 0.1129, "step": 22510 }, { "epoch": 0.27457317073170734, "grad_norm": 2.026184320449829, "learning_rate": 1.8169512195121953e-05, "loss": 0.1048, "step": 22515 }, { "epoch": 0.27463414634146344, "grad_norm": 1.7452044486999512, "learning_rate": 1.816910569105691e-05, "loss": 0.0957, "step": 22520 }, { "epoch": 0.27469512195121953, "grad_norm": 1.3877085447311401, "learning_rate": 1.8168699186991873e-05, "loss": 0.1106, "step": 22525 }, { "epoch": 0.27475609756097563, "grad_norm": 0.9641222953796387, "learning_rate": 1.816829268292683e-05, "loss": 0.0846, "step": 22530 }, { "epoch": 0.27481707317073173, "grad_norm": 0.6627850532531738, "learning_rate": 1.8167886178861792e-05, "loss": 0.1675, "step": 22535 }, { "epoch": 0.2748780487804878, "grad_norm": 0.5005890130996704, "learning_rate": 1.8167479674796747e-05, "loss": 0.1172, "step": 22540 }, { "epoch": 0.2749390243902439, "grad_norm": 1.3872158527374268, "learning_rate": 1.816707317073171e-05, "loss": 0.0894, "step": 22545 }, { "epoch": 0.275, "grad_norm": 0.6793129444122314, "learning_rate": 1.8166666666666667e-05, "loss": 0.1031, "step": 22550 }, { "epoch": 0.2750609756097561, "grad_norm": 0.9085506200790405, "learning_rate": 1.8166260162601628e-05, "loss": 0.0891, "step": 22555 }, { "epoch": 0.2751219512195122, "grad_norm": 0.8300836682319641, "learning_rate": 1.8165853658536586e-05, "loss": 0.1247, "step": 22560 }, { "epoch": 0.2751829268292683, "grad_norm": 2.9250245094299316, "learning_rate": 1.8165447154471548e-05, "loss": 0.0939, "step": 22565 }, { "epoch": 0.2752439024390244, "grad_norm": 0.6620883345603943, "learning_rate": 1.8165040650406506e-05, "loss": 0.0969, "step": 22570 }, { "epoch": 0.2753048780487805, "grad_norm": 1.1356853246688843, "learning_rate": 1.8164634146341464e-05, "loss": 0.12, "step": 22575 }, { "epoch": 0.2753658536585366, "grad_norm": 0.7383288145065308, "learning_rate": 1.8164227642276426e-05, "loss": 0.1065, "step": 22580 }, { "epoch": 0.2754268292682927, "grad_norm": 0.8828492760658264, "learning_rate": 1.8163821138211384e-05, "loss": 0.0931, "step": 22585 }, { "epoch": 0.2754878048780488, "grad_norm": 1.4161076545715332, "learning_rate": 1.8163414634146345e-05, "loss": 0.1149, "step": 22590 }, { "epoch": 0.2755487804878049, "grad_norm": 0.955790102481842, "learning_rate": 1.8163008130081303e-05, "loss": 0.114, "step": 22595 }, { "epoch": 0.275609756097561, "grad_norm": 0.6605807542800903, "learning_rate": 1.816260162601626e-05, "loss": 0.1373, "step": 22600 }, { "epoch": 0.2756707317073171, "grad_norm": 0.44116878509521484, "learning_rate": 1.816219512195122e-05, "loss": 0.0851, "step": 22605 }, { "epoch": 0.2757317073170732, "grad_norm": 0.7846062183380127, "learning_rate": 1.816178861788618e-05, "loss": 0.1302, "step": 22610 }, { "epoch": 0.2757926829268293, "grad_norm": 0.8493690490722656, "learning_rate": 1.816138211382114e-05, "loss": 0.094, "step": 22615 }, { "epoch": 0.2758536585365854, "grad_norm": 0.5424134731292725, "learning_rate": 1.81609756097561e-05, "loss": 0.0938, "step": 22620 }, { "epoch": 0.2759146341463415, "grad_norm": 0.582248330116272, "learning_rate": 1.816056910569106e-05, "loss": 0.0993, "step": 22625 }, { "epoch": 0.2759756097560976, "grad_norm": 1.6534290313720703, "learning_rate": 1.8160162601626017e-05, "loss": 0.0972, "step": 22630 }, { "epoch": 0.2760365853658537, "grad_norm": 0.7516693472862244, "learning_rate": 1.8159756097560975e-05, "loss": 0.0902, "step": 22635 }, { "epoch": 0.2760975609756098, "grad_norm": 0.7912512421607971, "learning_rate": 1.8159349593495937e-05, "loss": 0.1, "step": 22640 }, { "epoch": 0.2761585365853659, "grad_norm": 0.993596076965332, "learning_rate": 1.8158943089430895e-05, "loss": 0.1103, "step": 22645 }, { "epoch": 0.276219512195122, "grad_norm": 0.36386868357658386, "learning_rate": 1.8158536585365856e-05, "loss": 0.1376, "step": 22650 }, { "epoch": 0.27628048780487807, "grad_norm": 0.7677507996559143, "learning_rate": 1.8158130081300814e-05, "loss": 0.0904, "step": 22655 }, { "epoch": 0.27634146341463417, "grad_norm": 0.6470456719398499, "learning_rate": 1.8157723577235773e-05, "loss": 0.1068, "step": 22660 }, { "epoch": 0.27640243902439027, "grad_norm": 0.7734121084213257, "learning_rate": 1.8157317073170734e-05, "loss": 0.0968, "step": 22665 }, { "epoch": 0.27646341463414636, "grad_norm": 2.101776123046875, "learning_rate": 1.8156910569105692e-05, "loss": 0.1478, "step": 22670 }, { "epoch": 0.27652439024390246, "grad_norm": 4.796646595001221, "learning_rate": 1.8156504065040654e-05, "loss": 0.0841, "step": 22675 }, { "epoch": 0.27658536585365856, "grad_norm": 0.6858411431312561, "learning_rate": 1.8156097560975612e-05, "loss": 0.1212, "step": 22680 }, { "epoch": 0.27664634146341466, "grad_norm": 0.8975194692611694, "learning_rate": 1.815569105691057e-05, "loss": 0.0807, "step": 22685 }, { "epoch": 0.27670731707317076, "grad_norm": 0.7807413935661316, "learning_rate": 1.8155284552845528e-05, "loss": 0.1068, "step": 22690 }, { "epoch": 0.27676829268292685, "grad_norm": 0.742057740688324, "learning_rate": 1.815487804878049e-05, "loss": 0.109, "step": 22695 }, { "epoch": 0.27682926829268295, "grad_norm": 0.6996167898178101, "learning_rate": 1.8154471544715448e-05, "loss": 0.0852, "step": 22700 }, { "epoch": 0.27689024390243905, "grad_norm": 0.49011141061782837, "learning_rate": 1.815406504065041e-05, "loss": 0.1387, "step": 22705 }, { "epoch": 0.27695121951219515, "grad_norm": 0.5782917141914368, "learning_rate": 1.8153658536585367e-05, "loss": 0.0983, "step": 22710 }, { "epoch": 0.27701219512195124, "grad_norm": 0.3712138831615448, "learning_rate": 1.815325203252033e-05, "loss": 0.1107, "step": 22715 }, { "epoch": 0.27707317073170734, "grad_norm": 0.5453213453292847, "learning_rate": 1.8152845528455284e-05, "loss": 0.0881, "step": 22720 }, { "epoch": 0.27713414634146344, "grad_norm": 0.7756600975990295, "learning_rate": 1.8152439024390245e-05, "loss": 0.0829, "step": 22725 }, { "epoch": 0.27719512195121954, "grad_norm": 0.6491608023643494, "learning_rate": 1.8152032520325203e-05, "loss": 0.0931, "step": 22730 }, { "epoch": 0.27725609756097563, "grad_norm": 0.6919453740119934, "learning_rate": 1.8151626016260165e-05, "loss": 0.0747, "step": 22735 }, { "epoch": 0.27731707317073173, "grad_norm": 0.6986748576164246, "learning_rate": 1.8151219512195123e-05, "loss": 0.1035, "step": 22740 }, { "epoch": 0.27737804878048783, "grad_norm": 0.6031416058540344, "learning_rate": 1.8150813008130084e-05, "loss": 0.1131, "step": 22745 }, { "epoch": 0.2774390243902439, "grad_norm": 0.9413408041000366, "learning_rate": 1.8150406504065043e-05, "loss": 0.1139, "step": 22750 }, { "epoch": 0.2775, "grad_norm": 0.7467391490936279, "learning_rate": 1.815e-05, "loss": 0.1498, "step": 22755 }, { "epoch": 0.2775609756097561, "grad_norm": 0.6367422938346863, "learning_rate": 1.8149593495934962e-05, "loss": 0.0733, "step": 22760 }, { "epoch": 0.2776219512195122, "grad_norm": 0.6444888710975647, "learning_rate": 1.814918699186992e-05, "loss": 0.1244, "step": 22765 }, { "epoch": 0.2776829268292683, "grad_norm": 0.9674644470214844, "learning_rate": 1.814878048780488e-05, "loss": 0.0842, "step": 22770 }, { "epoch": 0.2777439024390244, "grad_norm": 0.7610085606575012, "learning_rate": 1.814837398373984e-05, "loss": 0.0763, "step": 22775 }, { "epoch": 0.2778048780487805, "grad_norm": 1.4923266172409058, "learning_rate": 1.8147967479674798e-05, "loss": 0.0987, "step": 22780 }, { "epoch": 0.2778658536585366, "grad_norm": 0.8032163381576538, "learning_rate": 1.8147560975609756e-05, "loss": 0.1308, "step": 22785 }, { "epoch": 0.2779268292682927, "grad_norm": 1.1107147932052612, "learning_rate": 1.8147154471544718e-05, "loss": 0.0756, "step": 22790 }, { "epoch": 0.2779878048780488, "grad_norm": 0.7607312202453613, "learning_rate": 1.8146747967479676e-05, "loss": 0.0886, "step": 22795 }, { "epoch": 0.2780487804878049, "grad_norm": 0.5539988875389099, "learning_rate": 1.8146341463414637e-05, "loss": 0.0997, "step": 22800 }, { "epoch": 0.278109756097561, "grad_norm": 0.6245870590209961, "learning_rate": 1.8145934959349596e-05, "loss": 0.1072, "step": 22805 }, { "epoch": 0.2781707317073171, "grad_norm": 0.8058712482452393, "learning_rate": 1.8145528455284554e-05, "loss": 0.1345, "step": 22810 }, { "epoch": 0.2782317073170732, "grad_norm": 0.9330877065658569, "learning_rate": 1.8145121951219512e-05, "loss": 0.1027, "step": 22815 }, { "epoch": 0.2782926829268293, "grad_norm": 0.9826483726501465, "learning_rate": 1.8144715447154473e-05, "loss": 0.0746, "step": 22820 }, { "epoch": 0.2783536585365854, "grad_norm": 1.068574070930481, "learning_rate": 1.814430894308943e-05, "loss": 0.0861, "step": 22825 }, { "epoch": 0.2784146341463415, "grad_norm": 0.935968816280365, "learning_rate": 1.8143902439024393e-05, "loss": 0.0908, "step": 22830 }, { "epoch": 0.2784756097560976, "grad_norm": 0.6071560382843018, "learning_rate": 1.814349593495935e-05, "loss": 0.0953, "step": 22835 }, { "epoch": 0.2785365853658537, "grad_norm": 2.1149513721466064, "learning_rate": 1.814308943089431e-05, "loss": 0.1183, "step": 22840 }, { "epoch": 0.2785975609756098, "grad_norm": 0.5597639083862305, "learning_rate": 1.814268292682927e-05, "loss": 0.1168, "step": 22845 }, { "epoch": 0.2786585365853659, "grad_norm": 0.6597872376441956, "learning_rate": 1.814227642276423e-05, "loss": 0.076, "step": 22850 }, { "epoch": 0.278719512195122, "grad_norm": 0.45821377635002136, "learning_rate": 1.8141869918699187e-05, "loss": 0.07, "step": 22855 }, { "epoch": 0.2787804878048781, "grad_norm": 1.4713177680969238, "learning_rate": 1.814146341463415e-05, "loss": 0.108, "step": 22860 }, { "epoch": 0.27884146341463417, "grad_norm": 0.768419086933136, "learning_rate": 1.8141056910569107e-05, "loss": 0.1053, "step": 22865 }, { "epoch": 0.27890243902439027, "grad_norm": 2.907478094100952, "learning_rate": 1.8140650406504065e-05, "loss": 0.1116, "step": 22870 }, { "epoch": 0.27896341463414637, "grad_norm": 1.5697561502456665, "learning_rate": 1.8140243902439026e-05, "loss": 0.0963, "step": 22875 }, { "epoch": 0.27902439024390246, "grad_norm": 0.8387352228164673, "learning_rate": 1.8139837398373984e-05, "loss": 0.0812, "step": 22880 }, { "epoch": 0.27908536585365856, "grad_norm": 0.6995304822921753, "learning_rate": 1.8139430894308946e-05, "loss": 0.0965, "step": 22885 }, { "epoch": 0.27914634146341466, "grad_norm": 0.9757401347160339, "learning_rate": 1.8139024390243904e-05, "loss": 0.0901, "step": 22890 }, { "epoch": 0.27920731707317076, "grad_norm": 1.5547419786453247, "learning_rate": 1.8138617886178866e-05, "loss": 0.1184, "step": 22895 }, { "epoch": 0.27926829268292686, "grad_norm": 0.7793885469436646, "learning_rate": 1.813821138211382e-05, "loss": 0.11, "step": 22900 }, { "epoch": 0.27932926829268295, "grad_norm": 0.8738606572151184, "learning_rate": 1.8137804878048782e-05, "loss": 0.0939, "step": 22905 }, { "epoch": 0.27939024390243905, "grad_norm": 0.9382155537605286, "learning_rate": 1.813739837398374e-05, "loss": 0.0809, "step": 22910 }, { "epoch": 0.27945121951219515, "grad_norm": 0.7535094022750854, "learning_rate": 1.81369918699187e-05, "loss": 0.1171, "step": 22915 }, { "epoch": 0.27951219512195125, "grad_norm": 0.6347646117210388, "learning_rate": 1.813658536585366e-05, "loss": 0.1478, "step": 22920 }, { "epoch": 0.27957317073170734, "grad_norm": 0.6576077938079834, "learning_rate": 1.813617886178862e-05, "loss": 0.1137, "step": 22925 }, { "epoch": 0.27963414634146344, "grad_norm": 0.49847060441970825, "learning_rate": 1.813577235772358e-05, "loss": 0.0996, "step": 22930 }, { "epoch": 0.27969512195121954, "grad_norm": 0.6591958999633789, "learning_rate": 1.8135365853658537e-05, "loss": 0.0844, "step": 22935 }, { "epoch": 0.27975609756097564, "grad_norm": 0.7095420360565186, "learning_rate": 1.81349593495935e-05, "loss": 0.0942, "step": 22940 }, { "epoch": 0.27981707317073173, "grad_norm": 0.5858049392700195, "learning_rate": 1.8134552845528457e-05, "loss": 0.0887, "step": 22945 }, { "epoch": 0.27987804878048783, "grad_norm": 0.6069574356079102, "learning_rate": 1.8134146341463415e-05, "loss": 0.0883, "step": 22950 }, { "epoch": 0.27993902439024393, "grad_norm": 1.1377155780792236, "learning_rate": 1.8133739837398377e-05, "loss": 0.1111, "step": 22955 }, { "epoch": 0.28, "grad_norm": 1.2626184225082397, "learning_rate": 1.8133333333333335e-05, "loss": 0.1291, "step": 22960 }, { "epoch": 0.2800609756097561, "grad_norm": 0.5204707384109497, "learning_rate": 1.8132926829268293e-05, "loss": 0.0837, "step": 22965 }, { "epoch": 0.2801219512195122, "grad_norm": 0.6275805830955505, "learning_rate": 1.8132520325203254e-05, "loss": 0.0995, "step": 22970 }, { "epoch": 0.2801829268292683, "grad_norm": 0.8268606066703796, "learning_rate": 1.8132113821138213e-05, "loss": 0.0909, "step": 22975 }, { "epoch": 0.2802439024390244, "grad_norm": 0.4030440151691437, "learning_rate": 1.8131707317073174e-05, "loss": 0.0525, "step": 22980 }, { "epoch": 0.2803048780487805, "grad_norm": 0.5468896627426147, "learning_rate": 1.8131300813008132e-05, "loss": 0.1046, "step": 22985 }, { "epoch": 0.2803658536585366, "grad_norm": 0.9139882326126099, "learning_rate": 1.813089430894309e-05, "loss": 0.1325, "step": 22990 }, { "epoch": 0.2804268292682927, "grad_norm": 1.1090589761734009, "learning_rate": 1.813048780487805e-05, "loss": 0.1176, "step": 22995 }, { "epoch": 0.2804878048780488, "grad_norm": 0.4632267355918884, "learning_rate": 1.813008130081301e-05, "loss": 0.0721, "step": 23000 }, { "epoch": 0.2805487804878049, "grad_norm": 0.871014416217804, "learning_rate": 1.8129674796747968e-05, "loss": 0.0882, "step": 23005 }, { "epoch": 0.280609756097561, "grad_norm": 1.2787734270095825, "learning_rate": 1.812926829268293e-05, "loss": 0.1072, "step": 23010 }, { "epoch": 0.2806707317073171, "grad_norm": 0.8821853995323181, "learning_rate": 1.8128861788617888e-05, "loss": 0.1269, "step": 23015 }, { "epoch": 0.2807317073170732, "grad_norm": 0.6251704692840576, "learning_rate": 1.8128455284552846e-05, "loss": 0.0806, "step": 23020 }, { "epoch": 0.2807926829268293, "grad_norm": 0.5318490862846375, "learning_rate": 1.8128048780487807e-05, "loss": 0.1163, "step": 23025 }, { "epoch": 0.2808536585365854, "grad_norm": 0.9968268275260925, "learning_rate": 1.8127642276422765e-05, "loss": 0.1354, "step": 23030 }, { "epoch": 0.2809146341463415, "grad_norm": 0.6613959670066833, "learning_rate": 1.8127235772357724e-05, "loss": 0.0637, "step": 23035 }, { "epoch": 0.2809756097560976, "grad_norm": 0.8908623456954956, "learning_rate": 1.8126829268292685e-05, "loss": 0.0965, "step": 23040 }, { "epoch": 0.2810365853658537, "grad_norm": 0.9821550250053406, "learning_rate": 1.8126422764227643e-05, "loss": 0.0782, "step": 23045 }, { "epoch": 0.2810975609756098, "grad_norm": 0.7236118316650391, "learning_rate": 1.81260162601626e-05, "loss": 0.0913, "step": 23050 }, { "epoch": 0.2811585365853659, "grad_norm": 0.6895120143890381, "learning_rate": 1.8125609756097563e-05, "loss": 0.1065, "step": 23055 }, { "epoch": 0.281219512195122, "grad_norm": 0.6857832074165344, "learning_rate": 1.812520325203252e-05, "loss": 0.0681, "step": 23060 }, { "epoch": 0.281280487804878, "grad_norm": 1.368226170539856, "learning_rate": 1.8124796747967483e-05, "loss": 0.1121, "step": 23065 }, { "epoch": 0.2813414634146341, "grad_norm": 1.2696943283081055, "learning_rate": 1.812439024390244e-05, "loss": 0.0941, "step": 23070 }, { "epoch": 0.2814024390243902, "grad_norm": 0.7416566014289856, "learning_rate": 1.8123983739837402e-05, "loss": 0.1084, "step": 23075 }, { "epoch": 0.2814634146341463, "grad_norm": 1.040823221206665, "learning_rate": 1.8123577235772357e-05, "loss": 0.0717, "step": 23080 }, { "epoch": 0.2815243902439024, "grad_norm": 0.3456338942050934, "learning_rate": 1.812317073170732e-05, "loss": 0.0899, "step": 23085 }, { "epoch": 0.2815853658536585, "grad_norm": 0.6233701705932617, "learning_rate": 1.8122764227642277e-05, "loss": 0.1084, "step": 23090 }, { "epoch": 0.2816463414634146, "grad_norm": 0.7572261691093445, "learning_rate": 1.8122357723577238e-05, "loss": 0.1014, "step": 23095 }, { "epoch": 0.2817073170731707, "grad_norm": 0.6060342192649841, "learning_rate": 1.8121951219512196e-05, "loss": 0.096, "step": 23100 }, { "epoch": 0.2817682926829268, "grad_norm": 0.5057064294815063, "learning_rate": 1.8121544715447158e-05, "loss": 0.107, "step": 23105 }, { "epoch": 0.2818292682926829, "grad_norm": 0.6351178288459778, "learning_rate": 1.8121138211382116e-05, "loss": 0.0907, "step": 23110 }, { "epoch": 0.281890243902439, "grad_norm": 0.6972416043281555, "learning_rate": 1.8120731707317074e-05, "loss": 0.1076, "step": 23115 }, { "epoch": 0.2819512195121951, "grad_norm": 0.5079487562179565, "learning_rate": 1.8120325203252032e-05, "loss": 0.0851, "step": 23120 }, { "epoch": 0.2820121951219512, "grad_norm": 0.9655565619468689, "learning_rate": 1.8119918699186994e-05, "loss": 0.1221, "step": 23125 }, { "epoch": 0.2820731707317073, "grad_norm": 0.5980589985847473, "learning_rate": 1.8119512195121952e-05, "loss": 0.0901, "step": 23130 }, { "epoch": 0.2821341463414634, "grad_norm": 1.614680290222168, "learning_rate": 1.8119105691056913e-05, "loss": 0.0889, "step": 23135 }, { "epoch": 0.2821951219512195, "grad_norm": 0.5049274563789368, "learning_rate": 1.811869918699187e-05, "loss": 0.0945, "step": 23140 }, { "epoch": 0.2822560975609756, "grad_norm": 1.1081045866012573, "learning_rate": 1.811829268292683e-05, "loss": 0.1433, "step": 23145 }, { "epoch": 0.2823170731707317, "grad_norm": 0.6795579195022583, "learning_rate": 1.811788617886179e-05, "loss": 0.0919, "step": 23150 }, { "epoch": 0.2823780487804878, "grad_norm": 0.9612908959388733, "learning_rate": 1.811747967479675e-05, "loss": 0.1096, "step": 23155 }, { "epoch": 0.2824390243902439, "grad_norm": 0.9496840238571167, "learning_rate": 1.811707317073171e-05, "loss": 0.0966, "step": 23160 }, { "epoch": 0.2825, "grad_norm": 0.5777833461761475, "learning_rate": 1.811666666666667e-05, "loss": 0.1032, "step": 23165 }, { "epoch": 0.28256097560975607, "grad_norm": 0.45295482873916626, "learning_rate": 1.8116260162601627e-05, "loss": 0.0755, "step": 23170 }, { "epoch": 0.28262195121951217, "grad_norm": 0.7000933289527893, "learning_rate": 1.8115853658536585e-05, "loss": 0.1119, "step": 23175 }, { "epoch": 0.28268292682926827, "grad_norm": 0.7992066144943237, "learning_rate": 1.8115447154471547e-05, "loss": 0.0966, "step": 23180 }, { "epoch": 0.28274390243902436, "grad_norm": 1.0682555437088013, "learning_rate": 1.8115040650406505e-05, "loss": 0.138, "step": 23185 }, { "epoch": 0.28280487804878046, "grad_norm": 3.2474119663238525, "learning_rate": 1.8114634146341466e-05, "loss": 0.0867, "step": 23190 }, { "epoch": 0.28286585365853656, "grad_norm": 0.5546090006828308, "learning_rate": 1.8114227642276424e-05, "loss": 0.0765, "step": 23195 }, { "epoch": 0.28292682926829266, "grad_norm": 0.733884871006012, "learning_rate": 1.8113821138211382e-05, "loss": 0.1015, "step": 23200 }, { "epoch": 0.28298780487804875, "grad_norm": 0.7784020304679871, "learning_rate": 1.8113414634146344e-05, "loss": 0.0894, "step": 23205 }, { "epoch": 0.28304878048780485, "grad_norm": 1.3034089803695679, "learning_rate": 1.8113008130081302e-05, "loss": 0.091, "step": 23210 }, { "epoch": 0.28310975609756095, "grad_norm": 1.3545262813568115, "learning_rate": 1.811260162601626e-05, "loss": 0.1544, "step": 23215 }, { "epoch": 0.28317073170731705, "grad_norm": 0.506375789642334, "learning_rate": 1.8112195121951222e-05, "loss": 0.0916, "step": 23220 }, { "epoch": 0.28323170731707314, "grad_norm": 0.7165188789367676, "learning_rate": 1.811178861788618e-05, "loss": 0.1549, "step": 23225 }, { "epoch": 0.28329268292682924, "grad_norm": 3.3096506595611572, "learning_rate": 1.8111382113821138e-05, "loss": 0.1663, "step": 23230 }, { "epoch": 0.28335365853658534, "grad_norm": 0.48658108711242676, "learning_rate": 1.81109756097561e-05, "loss": 0.0814, "step": 23235 }, { "epoch": 0.28341463414634144, "grad_norm": 0.7340248823165894, "learning_rate": 1.8110569105691058e-05, "loss": 0.0958, "step": 23240 }, { "epoch": 0.28347560975609754, "grad_norm": 0.8615539073944092, "learning_rate": 1.811016260162602e-05, "loss": 0.0736, "step": 23245 }, { "epoch": 0.28353658536585363, "grad_norm": 0.8508347272872925, "learning_rate": 1.8109756097560977e-05, "loss": 0.0897, "step": 23250 }, { "epoch": 0.28359756097560973, "grad_norm": 1.6616367101669312, "learning_rate": 1.810934959349594e-05, "loss": 0.1018, "step": 23255 }, { "epoch": 0.28365853658536583, "grad_norm": 0.849183201789856, "learning_rate": 1.8108943089430894e-05, "loss": 0.128, "step": 23260 }, { "epoch": 0.2837195121951219, "grad_norm": 0.7623000741004944, "learning_rate": 1.8108536585365855e-05, "loss": 0.1117, "step": 23265 }, { "epoch": 0.283780487804878, "grad_norm": 0.8334658145904541, "learning_rate": 1.8108130081300813e-05, "loss": 0.0904, "step": 23270 }, { "epoch": 0.2838414634146341, "grad_norm": 0.6950445175170898, "learning_rate": 1.8107723577235775e-05, "loss": 0.0834, "step": 23275 }, { "epoch": 0.2839024390243902, "grad_norm": 0.5303332805633545, "learning_rate": 1.8107317073170733e-05, "loss": 0.095, "step": 23280 }, { "epoch": 0.2839634146341463, "grad_norm": 0.5030911564826965, "learning_rate": 1.8106910569105694e-05, "loss": 0.0727, "step": 23285 }, { "epoch": 0.2840243902439024, "grad_norm": 0.5023378133773804, "learning_rate": 1.8106504065040653e-05, "loss": 0.0841, "step": 23290 }, { "epoch": 0.2840853658536585, "grad_norm": 0.6529755592346191, "learning_rate": 1.810609756097561e-05, "loss": 0.1709, "step": 23295 }, { "epoch": 0.2841463414634146, "grad_norm": 0.9686474204063416, "learning_rate": 1.810569105691057e-05, "loss": 0.1006, "step": 23300 }, { "epoch": 0.2842073170731707, "grad_norm": 0.4214935302734375, "learning_rate": 1.810528455284553e-05, "loss": 0.0959, "step": 23305 }, { "epoch": 0.2842682926829268, "grad_norm": 0.8115726709365845, "learning_rate": 1.810487804878049e-05, "loss": 0.0943, "step": 23310 }, { "epoch": 0.2843292682926829, "grad_norm": 0.5703238844871521, "learning_rate": 1.810447154471545e-05, "loss": 0.1104, "step": 23315 }, { "epoch": 0.284390243902439, "grad_norm": 0.8040010333061218, "learning_rate": 1.8104065040650408e-05, "loss": 0.0987, "step": 23320 }, { "epoch": 0.2844512195121951, "grad_norm": 1.0567383766174316, "learning_rate": 1.8103658536585366e-05, "loss": 0.1193, "step": 23325 }, { "epoch": 0.2845121951219512, "grad_norm": 0.8182514905929565, "learning_rate": 1.8103252032520328e-05, "loss": 0.0787, "step": 23330 }, { "epoch": 0.2845731707317073, "grad_norm": 0.5909022092819214, "learning_rate": 1.8102845528455286e-05, "loss": 0.0756, "step": 23335 }, { "epoch": 0.2846341463414634, "grad_norm": 0.5748682618141174, "learning_rate": 1.8102439024390247e-05, "loss": 0.129, "step": 23340 }, { "epoch": 0.2846951219512195, "grad_norm": 0.7197617888450623, "learning_rate": 1.8102032520325205e-05, "loss": 0.0992, "step": 23345 }, { "epoch": 0.2847560975609756, "grad_norm": 2.7649643421173096, "learning_rate": 1.8101626016260164e-05, "loss": 0.0941, "step": 23350 }, { "epoch": 0.2848170731707317, "grad_norm": 1.360388159751892, "learning_rate": 1.8101219512195122e-05, "loss": 0.115, "step": 23355 }, { "epoch": 0.2848780487804878, "grad_norm": 0.6173604726791382, "learning_rate": 1.8100813008130083e-05, "loss": 0.0994, "step": 23360 }, { "epoch": 0.2849390243902439, "grad_norm": 0.5307409763336182, "learning_rate": 1.810040650406504e-05, "loss": 0.0735, "step": 23365 }, { "epoch": 0.285, "grad_norm": 1.3571302890777588, "learning_rate": 1.8100000000000003e-05, "loss": 0.0874, "step": 23370 }, { "epoch": 0.2850609756097561, "grad_norm": 0.5572518706321716, "learning_rate": 1.809959349593496e-05, "loss": 0.0905, "step": 23375 }, { "epoch": 0.28512195121951217, "grad_norm": 0.7133477926254272, "learning_rate": 1.809918699186992e-05, "loss": 0.123, "step": 23380 }, { "epoch": 0.28518292682926827, "grad_norm": 0.7893471717834473, "learning_rate": 1.8098780487804877e-05, "loss": 0.0832, "step": 23385 }, { "epoch": 0.28524390243902437, "grad_norm": 0.8991892337799072, "learning_rate": 1.809837398373984e-05, "loss": 0.0859, "step": 23390 }, { "epoch": 0.28530487804878046, "grad_norm": 0.9236337542533875, "learning_rate": 1.8097967479674797e-05, "loss": 0.095, "step": 23395 }, { "epoch": 0.28536585365853656, "grad_norm": 1.5032871961593628, "learning_rate": 1.809756097560976e-05, "loss": 0.0693, "step": 23400 }, { "epoch": 0.28542682926829266, "grad_norm": 0.7687724232673645, "learning_rate": 1.8097154471544717e-05, "loss": 0.0999, "step": 23405 }, { "epoch": 0.28548780487804876, "grad_norm": 0.7995852828025818, "learning_rate": 1.8096747967479675e-05, "loss": 0.0841, "step": 23410 }, { "epoch": 0.28554878048780485, "grad_norm": 0.7624332904815674, "learning_rate": 1.8096341463414636e-05, "loss": 0.0934, "step": 23415 }, { "epoch": 0.28560975609756095, "grad_norm": 0.5476480722427368, "learning_rate": 1.8095934959349594e-05, "loss": 0.0911, "step": 23420 }, { "epoch": 0.28567073170731705, "grad_norm": 1.5811285972595215, "learning_rate": 1.8095528455284556e-05, "loss": 0.1152, "step": 23425 }, { "epoch": 0.28573170731707315, "grad_norm": 1.0776655673980713, "learning_rate": 1.8095121951219514e-05, "loss": 0.1439, "step": 23430 }, { "epoch": 0.28579268292682924, "grad_norm": 0.8716649413108826, "learning_rate": 1.8094715447154475e-05, "loss": 0.0889, "step": 23435 }, { "epoch": 0.28585365853658534, "grad_norm": 0.47866329550743103, "learning_rate": 1.809430894308943e-05, "loss": 0.0881, "step": 23440 }, { "epoch": 0.28591463414634144, "grad_norm": 1.8481523990631104, "learning_rate": 1.8093902439024392e-05, "loss": 0.0764, "step": 23445 }, { "epoch": 0.28597560975609754, "grad_norm": 0.6101568341255188, "learning_rate": 1.809349593495935e-05, "loss": 0.0899, "step": 23450 }, { "epoch": 0.28603658536585364, "grad_norm": 1.1010866165161133, "learning_rate": 1.809308943089431e-05, "loss": 0.1119, "step": 23455 }, { "epoch": 0.28609756097560973, "grad_norm": 0.6610820293426514, "learning_rate": 1.809268292682927e-05, "loss": 0.0979, "step": 23460 }, { "epoch": 0.28615853658536583, "grad_norm": 0.7667832374572754, "learning_rate": 1.809227642276423e-05, "loss": 0.072, "step": 23465 }, { "epoch": 0.28621951219512193, "grad_norm": 0.4440269470214844, "learning_rate": 1.809186991869919e-05, "loss": 0.1612, "step": 23470 }, { "epoch": 0.286280487804878, "grad_norm": 0.8142979145050049, "learning_rate": 1.8091463414634147e-05, "loss": 0.09, "step": 23475 }, { "epoch": 0.2863414634146341, "grad_norm": 1.091964602470398, "learning_rate": 1.8091056910569105e-05, "loss": 0.0901, "step": 23480 }, { "epoch": 0.2864024390243902, "grad_norm": 0.6489288806915283, "learning_rate": 1.8090650406504067e-05, "loss": 0.133, "step": 23485 }, { "epoch": 0.2864634146341463, "grad_norm": 0.6791048049926758, "learning_rate": 1.8090243902439025e-05, "loss": 0.1024, "step": 23490 }, { "epoch": 0.2865243902439024, "grad_norm": 0.7346852421760559, "learning_rate": 1.8089837398373987e-05, "loss": 0.0724, "step": 23495 }, { "epoch": 0.2865853658536585, "grad_norm": 0.6063575744628906, "learning_rate": 1.8089430894308945e-05, "loss": 0.0921, "step": 23500 }, { "epoch": 0.2866463414634146, "grad_norm": 0.5881946682929993, "learning_rate": 1.8089024390243903e-05, "loss": 0.146, "step": 23505 }, { "epoch": 0.2867073170731707, "grad_norm": 0.7459958791732788, "learning_rate": 1.8088617886178864e-05, "loss": 0.085, "step": 23510 }, { "epoch": 0.2867682926829268, "grad_norm": 0.7517551779747009, "learning_rate": 1.8088211382113822e-05, "loss": 0.1064, "step": 23515 }, { "epoch": 0.2868292682926829, "grad_norm": 0.7810508608818054, "learning_rate": 1.8087804878048784e-05, "loss": 0.0957, "step": 23520 }, { "epoch": 0.286890243902439, "grad_norm": 0.7775281071662903, "learning_rate": 1.8087398373983742e-05, "loss": 0.1045, "step": 23525 }, { "epoch": 0.2869512195121951, "grad_norm": 0.7801337838172913, "learning_rate": 1.80869918699187e-05, "loss": 0.0871, "step": 23530 }, { "epoch": 0.2870121951219512, "grad_norm": 1.0614192485809326, "learning_rate": 1.808658536585366e-05, "loss": 0.1082, "step": 23535 }, { "epoch": 0.2870731707317073, "grad_norm": 1.3051789999008179, "learning_rate": 1.808617886178862e-05, "loss": 0.1078, "step": 23540 }, { "epoch": 0.2871341463414634, "grad_norm": 0.5469325184822083, "learning_rate": 1.8085772357723578e-05, "loss": 0.0938, "step": 23545 }, { "epoch": 0.2871951219512195, "grad_norm": 0.4986129403114319, "learning_rate": 1.808536585365854e-05, "loss": 0.11, "step": 23550 }, { "epoch": 0.2872560975609756, "grad_norm": 0.7646386027336121, "learning_rate": 1.8084959349593498e-05, "loss": 0.1009, "step": 23555 }, { "epoch": 0.2873170731707317, "grad_norm": 0.6584143042564392, "learning_rate": 1.8084552845528456e-05, "loss": 0.0957, "step": 23560 }, { "epoch": 0.2873780487804878, "grad_norm": 0.7469095587730408, "learning_rate": 1.8084146341463414e-05, "loss": 0.0865, "step": 23565 }, { "epoch": 0.2874390243902439, "grad_norm": 0.5921775698661804, "learning_rate": 1.8083739837398375e-05, "loss": 0.083, "step": 23570 }, { "epoch": 0.2875, "grad_norm": 0.8462940454483032, "learning_rate": 1.8083333333333334e-05, "loss": 0.1036, "step": 23575 }, { "epoch": 0.2875609756097561, "grad_norm": 0.6523867845535278, "learning_rate": 1.8082926829268295e-05, "loss": 0.0837, "step": 23580 }, { "epoch": 0.2876219512195122, "grad_norm": 1.1129509210586548, "learning_rate": 1.8082520325203253e-05, "loss": 0.0999, "step": 23585 }, { "epoch": 0.28768292682926827, "grad_norm": 1.5894371271133423, "learning_rate": 1.808211382113821e-05, "loss": 0.1441, "step": 23590 }, { "epoch": 0.28774390243902437, "grad_norm": 1.2243247032165527, "learning_rate": 1.8081707317073173e-05, "loss": 0.1427, "step": 23595 }, { "epoch": 0.28780487804878047, "grad_norm": 0.4559231698513031, "learning_rate": 1.808130081300813e-05, "loss": 0.1059, "step": 23600 }, { "epoch": 0.28786585365853656, "grad_norm": 0.5821393132209778, "learning_rate": 1.8080894308943092e-05, "loss": 0.0801, "step": 23605 }, { "epoch": 0.28792682926829266, "grad_norm": 0.6582613587379456, "learning_rate": 1.808048780487805e-05, "loss": 0.0665, "step": 23610 }, { "epoch": 0.28798780487804876, "grad_norm": 0.5622848868370056, "learning_rate": 1.8080081300813012e-05, "loss": 0.1226, "step": 23615 }, { "epoch": 0.28804878048780486, "grad_norm": 1.0040581226348877, "learning_rate": 1.8079674796747967e-05, "loss": 0.188, "step": 23620 }, { "epoch": 0.28810975609756095, "grad_norm": 0.8270630240440369, "learning_rate": 1.807926829268293e-05, "loss": 0.0653, "step": 23625 }, { "epoch": 0.28817073170731705, "grad_norm": 0.7496480345726013, "learning_rate": 1.8078861788617887e-05, "loss": 0.1016, "step": 23630 }, { "epoch": 0.28823170731707315, "grad_norm": 0.6865078210830688, "learning_rate": 1.8078455284552848e-05, "loss": 0.1195, "step": 23635 }, { "epoch": 0.28829268292682925, "grad_norm": 0.6999219655990601, "learning_rate": 1.8078048780487806e-05, "loss": 0.0738, "step": 23640 }, { "epoch": 0.28835365853658534, "grad_norm": 0.46605661511421204, "learning_rate": 1.8077642276422768e-05, "loss": 0.136, "step": 23645 }, { "epoch": 0.28841463414634144, "grad_norm": 0.8106605410575867, "learning_rate": 1.8077235772357722e-05, "loss": 0.1107, "step": 23650 }, { "epoch": 0.28847560975609754, "grad_norm": 0.6417269706726074, "learning_rate": 1.8076829268292684e-05, "loss": 0.0832, "step": 23655 }, { "epoch": 0.28853658536585364, "grad_norm": 0.5471162796020508, "learning_rate": 1.8076422764227642e-05, "loss": 0.0708, "step": 23660 }, { "epoch": 0.28859756097560973, "grad_norm": 1.2187422513961792, "learning_rate": 1.8076016260162604e-05, "loss": 0.0987, "step": 23665 }, { "epoch": 0.28865853658536583, "grad_norm": 0.7224910855293274, "learning_rate": 1.8075609756097562e-05, "loss": 0.1122, "step": 23670 }, { "epoch": 0.28871951219512193, "grad_norm": 0.9240429401397705, "learning_rate": 1.8075203252032523e-05, "loss": 0.1386, "step": 23675 }, { "epoch": 0.288780487804878, "grad_norm": 0.5165841579437256, "learning_rate": 1.807479674796748e-05, "loss": 0.1008, "step": 23680 }, { "epoch": 0.2888414634146341, "grad_norm": 0.9308422803878784, "learning_rate": 1.807439024390244e-05, "loss": 0.0656, "step": 23685 }, { "epoch": 0.2889024390243902, "grad_norm": 0.4394772946834564, "learning_rate": 1.80739837398374e-05, "loss": 0.0699, "step": 23690 }, { "epoch": 0.2889634146341463, "grad_norm": 0.5562050342559814, "learning_rate": 1.807357723577236e-05, "loss": 0.0682, "step": 23695 }, { "epoch": 0.2890243902439024, "grad_norm": 1.6585010290145874, "learning_rate": 1.807317073170732e-05, "loss": 0.114, "step": 23700 }, { "epoch": 0.2890853658536585, "grad_norm": 0.6219250559806824, "learning_rate": 1.807276422764228e-05, "loss": 0.0958, "step": 23705 }, { "epoch": 0.2891463414634146, "grad_norm": 0.40148046612739563, "learning_rate": 1.8072357723577237e-05, "loss": 0.0602, "step": 23710 }, { "epoch": 0.2892073170731707, "grad_norm": 1.015023112297058, "learning_rate": 1.8071951219512195e-05, "loss": 0.1273, "step": 23715 }, { "epoch": 0.2892682926829268, "grad_norm": 1.2403080463409424, "learning_rate": 1.8071544715447157e-05, "loss": 0.1284, "step": 23720 }, { "epoch": 0.2893292682926829, "grad_norm": 0.4703630805015564, "learning_rate": 1.8071138211382115e-05, "loss": 0.0966, "step": 23725 }, { "epoch": 0.289390243902439, "grad_norm": 2.684485673904419, "learning_rate": 1.8070731707317076e-05, "loss": 0.0939, "step": 23730 }, { "epoch": 0.2894512195121951, "grad_norm": 0.7418169975280762, "learning_rate": 1.8070325203252034e-05, "loss": 0.0999, "step": 23735 }, { "epoch": 0.2895121951219512, "grad_norm": 1.1289485692977905, "learning_rate": 1.8069918699186992e-05, "loss": 0.0988, "step": 23740 }, { "epoch": 0.2895731707317073, "grad_norm": 0.5752054452896118, "learning_rate": 1.806951219512195e-05, "loss": 0.1204, "step": 23745 }, { "epoch": 0.2896341463414634, "grad_norm": 0.5227100253105164, "learning_rate": 1.8069105691056912e-05, "loss": 0.1125, "step": 23750 }, { "epoch": 0.2896951219512195, "grad_norm": 0.6457528471946716, "learning_rate": 1.806869918699187e-05, "loss": 0.0798, "step": 23755 }, { "epoch": 0.2897560975609756, "grad_norm": 0.6450744867324829, "learning_rate": 1.8068292682926832e-05, "loss": 0.1061, "step": 23760 }, { "epoch": 0.2898170731707317, "grad_norm": 0.5799591541290283, "learning_rate": 1.806788617886179e-05, "loss": 0.1018, "step": 23765 }, { "epoch": 0.2898780487804878, "grad_norm": 0.4095749855041504, "learning_rate": 1.8067479674796748e-05, "loss": 0.0544, "step": 23770 }, { "epoch": 0.2899390243902439, "grad_norm": 0.807472288608551, "learning_rate": 1.806707317073171e-05, "loss": 0.1058, "step": 23775 }, { "epoch": 0.29, "grad_norm": 1.7027643918991089, "learning_rate": 1.8066666666666668e-05, "loss": 0.1073, "step": 23780 }, { "epoch": 0.2900609756097561, "grad_norm": 0.46079516410827637, "learning_rate": 1.806626016260163e-05, "loss": 0.0899, "step": 23785 }, { "epoch": 0.2901219512195122, "grad_norm": 0.9163893461227417, "learning_rate": 1.8065853658536587e-05, "loss": 0.1042, "step": 23790 }, { "epoch": 0.2901829268292683, "grad_norm": 0.828636884689331, "learning_rate": 1.8065447154471545e-05, "loss": 0.0813, "step": 23795 }, { "epoch": 0.29024390243902437, "grad_norm": 1.056448221206665, "learning_rate": 1.8065040650406504e-05, "loss": 0.1117, "step": 23800 }, { "epoch": 0.29030487804878047, "grad_norm": 1.2249749898910522, "learning_rate": 1.8064634146341465e-05, "loss": 0.1289, "step": 23805 }, { "epoch": 0.29036585365853657, "grad_norm": 0.714675784111023, "learning_rate": 1.8064227642276423e-05, "loss": 0.1349, "step": 23810 }, { "epoch": 0.29042682926829266, "grad_norm": 0.8360585570335388, "learning_rate": 1.8063821138211385e-05, "loss": 0.1184, "step": 23815 }, { "epoch": 0.29048780487804876, "grad_norm": 0.6237064599990845, "learning_rate": 1.8063414634146343e-05, "loss": 0.0867, "step": 23820 }, { "epoch": 0.29054878048780486, "grad_norm": 0.7672204375267029, "learning_rate": 1.8063008130081304e-05, "loss": 0.1047, "step": 23825 }, { "epoch": 0.29060975609756096, "grad_norm": 0.9152666926383972, "learning_rate": 1.806260162601626e-05, "loss": 0.0892, "step": 23830 }, { "epoch": 0.29067073170731705, "grad_norm": 0.4708854854106903, "learning_rate": 1.806219512195122e-05, "loss": 0.1036, "step": 23835 }, { "epoch": 0.29073170731707315, "grad_norm": 0.4491136074066162, "learning_rate": 1.806178861788618e-05, "loss": 0.0818, "step": 23840 }, { "epoch": 0.29079268292682925, "grad_norm": 0.5965561270713806, "learning_rate": 1.806138211382114e-05, "loss": 0.1089, "step": 23845 }, { "epoch": 0.29085365853658535, "grad_norm": 0.5075373649597168, "learning_rate": 1.80609756097561e-05, "loss": 0.0804, "step": 23850 }, { "epoch": 0.29091463414634144, "grad_norm": 0.6836814880371094, "learning_rate": 1.806056910569106e-05, "loss": 0.102, "step": 23855 }, { "epoch": 0.29097560975609754, "grad_norm": 0.9657642245292664, "learning_rate": 1.8060162601626018e-05, "loss": 0.0877, "step": 23860 }, { "epoch": 0.29103658536585364, "grad_norm": 1.0852206945419312, "learning_rate": 1.8059756097560976e-05, "loss": 0.1323, "step": 23865 }, { "epoch": 0.29109756097560974, "grad_norm": 1.273506999015808, "learning_rate": 1.8059349593495938e-05, "loss": 0.121, "step": 23870 }, { "epoch": 0.29115853658536583, "grad_norm": 0.9311147928237915, "learning_rate": 1.8058943089430896e-05, "loss": 0.07, "step": 23875 }, { "epoch": 0.29121951219512193, "grad_norm": 0.7992591261863708, "learning_rate": 1.8058536585365857e-05, "loss": 0.1099, "step": 23880 }, { "epoch": 0.29128048780487803, "grad_norm": 0.8417666554450989, "learning_rate": 1.8058130081300815e-05, "loss": 0.1178, "step": 23885 }, { "epoch": 0.2913414634146341, "grad_norm": 0.5241949558258057, "learning_rate": 1.8057723577235774e-05, "loss": 0.1064, "step": 23890 }, { "epoch": 0.2914024390243902, "grad_norm": 0.9006667733192444, "learning_rate": 1.805731707317073e-05, "loss": 0.0896, "step": 23895 }, { "epoch": 0.2914634146341463, "grad_norm": 1.7333499193191528, "learning_rate": 1.8056910569105693e-05, "loss": 0.0918, "step": 23900 }, { "epoch": 0.2915243902439024, "grad_norm": 0.5381545424461365, "learning_rate": 1.805650406504065e-05, "loss": 0.1007, "step": 23905 }, { "epoch": 0.2915853658536585, "grad_norm": 0.7401214241981506, "learning_rate": 1.8056097560975613e-05, "loss": 0.1171, "step": 23910 }, { "epoch": 0.2916463414634146, "grad_norm": 0.8082101345062256, "learning_rate": 1.805569105691057e-05, "loss": 0.0855, "step": 23915 }, { "epoch": 0.2917073170731707, "grad_norm": 0.6090333461761475, "learning_rate": 1.805528455284553e-05, "loss": 0.085, "step": 23920 }, { "epoch": 0.2917682926829268, "grad_norm": 1.5124872922897339, "learning_rate": 1.8054878048780487e-05, "loss": 0.1424, "step": 23925 }, { "epoch": 0.2918292682926829, "grad_norm": 0.7374436259269714, "learning_rate": 1.805447154471545e-05, "loss": 0.0721, "step": 23930 }, { "epoch": 0.291890243902439, "grad_norm": 0.7592292428016663, "learning_rate": 1.8054065040650407e-05, "loss": 0.1001, "step": 23935 }, { "epoch": 0.2919512195121951, "grad_norm": 0.8873343467712402, "learning_rate": 1.805365853658537e-05, "loss": 0.1106, "step": 23940 }, { "epoch": 0.2920121951219512, "grad_norm": 0.5311099886894226, "learning_rate": 1.8053252032520326e-05, "loss": 0.0592, "step": 23945 }, { "epoch": 0.2920731707317073, "grad_norm": 1.5207643508911133, "learning_rate": 1.8052845528455285e-05, "loss": 0.0816, "step": 23950 }, { "epoch": 0.2921341463414634, "grad_norm": 0.5946427583694458, "learning_rate": 1.8052439024390246e-05, "loss": 0.1182, "step": 23955 }, { "epoch": 0.2921951219512195, "grad_norm": 0.6252833008766174, "learning_rate": 1.8052032520325204e-05, "loss": 0.0876, "step": 23960 }, { "epoch": 0.2922560975609756, "grad_norm": 0.58301842212677, "learning_rate": 1.8051626016260166e-05, "loss": 0.1111, "step": 23965 }, { "epoch": 0.2923170731707317, "grad_norm": 0.4069274365901947, "learning_rate": 1.8051219512195124e-05, "loss": 0.0776, "step": 23970 }, { "epoch": 0.2923780487804878, "grad_norm": 2.0827715396881104, "learning_rate": 1.8050813008130082e-05, "loss": 0.133, "step": 23975 }, { "epoch": 0.2924390243902439, "grad_norm": 0.46829289197921753, "learning_rate": 1.805040650406504e-05, "loss": 0.0983, "step": 23980 }, { "epoch": 0.2925, "grad_norm": 0.8372810482978821, "learning_rate": 1.805e-05, "loss": 0.0889, "step": 23985 }, { "epoch": 0.2925609756097561, "grad_norm": 0.8359677195549011, "learning_rate": 1.804959349593496e-05, "loss": 0.0992, "step": 23990 }, { "epoch": 0.2926219512195122, "grad_norm": 0.725649356842041, "learning_rate": 1.804918699186992e-05, "loss": 0.1208, "step": 23995 }, { "epoch": 0.2926829268292683, "grad_norm": 0.4809744358062744, "learning_rate": 1.804878048780488e-05, "loss": 0.0709, "step": 24000 }, { "epoch": 0.2927439024390244, "grad_norm": 0.7812973260879517, "learning_rate": 1.804837398373984e-05, "loss": 0.1035, "step": 24005 }, { "epoch": 0.29280487804878047, "grad_norm": 1.319092035293579, "learning_rate": 1.8047967479674796e-05, "loss": 0.112, "step": 24010 }, { "epoch": 0.29286585365853657, "grad_norm": 0.9565643072128296, "learning_rate": 1.8047560975609757e-05, "loss": 0.1037, "step": 24015 }, { "epoch": 0.29292682926829267, "grad_norm": 0.5473825931549072, "learning_rate": 1.8047154471544715e-05, "loss": 0.1161, "step": 24020 }, { "epoch": 0.29298780487804876, "grad_norm": 0.4506221115589142, "learning_rate": 1.8046747967479677e-05, "loss": 0.0841, "step": 24025 }, { "epoch": 0.29304878048780486, "grad_norm": 0.41428086161613464, "learning_rate": 1.8046341463414635e-05, "loss": 0.0704, "step": 24030 }, { "epoch": 0.29310975609756096, "grad_norm": 0.4420340955257416, "learning_rate": 1.8045934959349597e-05, "loss": 0.1136, "step": 24035 }, { "epoch": 0.29317073170731706, "grad_norm": 0.6658501029014587, "learning_rate": 1.8045528455284555e-05, "loss": 0.1347, "step": 24040 }, { "epoch": 0.29323170731707315, "grad_norm": 1.8631107807159424, "learning_rate": 1.8045121951219513e-05, "loss": 0.0888, "step": 24045 }, { "epoch": 0.29329268292682925, "grad_norm": 0.4820840656757355, "learning_rate": 1.8044715447154474e-05, "loss": 0.1056, "step": 24050 }, { "epoch": 0.29335365853658535, "grad_norm": 0.37511125206947327, "learning_rate": 1.8044308943089432e-05, "loss": 0.1241, "step": 24055 }, { "epoch": 0.29341463414634145, "grad_norm": 0.6501897573471069, "learning_rate": 1.804390243902439e-05, "loss": 0.0645, "step": 24060 }, { "epoch": 0.29347560975609754, "grad_norm": 0.6551280617713928, "learning_rate": 1.8043495934959352e-05, "loss": 0.0566, "step": 24065 }, { "epoch": 0.29353658536585364, "grad_norm": 0.885513186454773, "learning_rate": 1.804308943089431e-05, "loss": 0.0956, "step": 24070 }, { "epoch": 0.29359756097560974, "grad_norm": 0.40251559019088745, "learning_rate": 1.8042682926829268e-05, "loss": 0.0786, "step": 24075 }, { "epoch": 0.29365853658536584, "grad_norm": 0.6329763531684875, "learning_rate": 1.804227642276423e-05, "loss": 0.114, "step": 24080 }, { "epoch": 0.29371951219512193, "grad_norm": 0.6266744136810303, "learning_rate": 1.8041869918699188e-05, "loss": 0.0812, "step": 24085 }, { "epoch": 0.29378048780487803, "grad_norm": 0.9967004060745239, "learning_rate": 1.804146341463415e-05, "loss": 0.1083, "step": 24090 }, { "epoch": 0.29384146341463413, "grad_norm": 1.0501548051834106, "learning_rate": 1.8041056910569108e-05, "loss": 0.1119, "step": 24095 }, { "epoch": 0.2939024390243902, "grad_norm": 0.6295802593231201, "learning_rate": 1.8040650406504066e-05, "loss": 0.0969, "step": 24100 }, { "epoch": 0.2939634146341463, "grad_norm": 1.6386594772338867, "learning_rate": 1.8040243902439024e-05, "loss": 0.1214, "step": 24105 }, { "epoch": 0.2940243902439024, "grad_norm": 0.8223456740379333, "learning_rate": 1.8039837398373985e-05, "loss": 0.0841, "step": 24110 }, { "epoch": 0.2940853658536585, "grad_norm": 0.7978095412254333, "learning_rate": 1.8039430894308943e-05, "loss": 0.0889, "step": 24115 }, { "epoch": 0.2941463414634146, "grad_norm": 0.7778245210647583, "learning_rate": 1.8039024390243905e-05, "loss": 0.1009, "step": 24120 }, { "epoch": 0.2942073170731707, "grad_norm": 0.7532222867012024, "learning_rate": 1.8038617886178863e-05, "loss": 0.0838, "step": 24125 }, { "epoch": 0.2942682926829268, "grad_norm": 0.7978196144104004, "learning_rate": 1.803821138211382e-05, "loss": 0.1004, "step": 24130 }, { "epoch": 0.2943292682926829, "grad_norm": 0.5176752209663391, "learning_rate": 1.8037804878048783e-05, "loss": 0.0679, "step": 24135 }, { "epoch": 0.294390243902439, "grad_norm": 0.7527872920036316, "learning_rate": 1.803739837398374e-05, "loss": 0.0995, "step": 24140 }, { "epoch": 0.2944512195121951, "grad_norm": 1.5424474477767944, "learning_rate": 1.8036991869918702e-05, "loss": 0.1014, "step": 24145 }, { "epoch": 0.2945121951219512, "grad_norm": 0.9028512835502625, "learning_rate": 1.803658536585366e-05, "loss": 0.0982, "step": 24150 }, { "epoch": 0.2945731707317073, "grad_norm": 0.6874858736991882, "learning_rate": 1.803617886178862e-05, "loss": 0.0894, "step": 24155 }, { "epoch": 0.2946341463414634, "grad_norm": 0.8140509128570557, "learning_rate": 1.8035772357723577e-05, "loss": 0.1065, "step": 24160 }, { "epoch": 0.2946951219512195, "grad_norm": 0.7367640137672424, "learning_rate": 1.803536585365854e-05, "loss": 0.0829, "step": 24165 }, { "epoch": 0.2947560975609756, "grad_norm": 0.7157819271087646, "learning_rate": 1.8034959349593496e-05, "loss": 0.0975, "step": 24170 }, { "epoch": 0.2948170731707317, "grad_norm": 1.0224583148956299, "learning_rate": 1.8034552845528458e-05, "loss": 0.07, "step": 24175 }, { "epoch": 0.2948780487804878, "grad_norm": 0.6344313025474548, "learning_rate": 1.8034146341463416e-05, "loss": 0.1133, "step": 24180 }, { "epoch": 0.2949390243902439, "grad_norm": 0.6836916208267212, "learning_rate": 1.8033739837398378e-05, "loss": 0.1469, "step": 24185 }, { "epoch": 0.295, "grad_norm": 0.8468672633171082, "learning_rate": 1.8033333333333332e-05, "loss": 0.105, "step": 24190 }, { "epoch": 0.2950609756097561, "grad_norm": 1.1764341592788696, "learning_rate": 1.8032926829268294e-05, "loss": 0.1084, "step": 24195 }, { "epoch": 0.2951219512195122, "grad_norm": 0.9691236615180969, "learning_rate": 1.8032520325203252e-05, "loss": 0.0882, "step": 24200 }, { "epoch": 0.2951829268292683, "grad_norm": 0.6962425112724304, "learning_rate": 1.8032113821138214e-05, "loss": 0.1036, "step": 24205 }, { "epoch": 0.2952439024390244, "grad_norm": 1.0107381343841553, "learning_rate": 1.803170731707317e-05, "loss": 0.1215, "step": 24210 }, { "epoch": 0.2953048780487805, "grad_norm": 0.9178775548934937, "learning_rate": 1.8031300813008133e-05, "loss": 0.0935, "step": 24215 }, { "epoch": 0.29536585365853657, "grad_norm": 0.754271388053894, "learning_rate": 1.803089430894309e-05, "loss": 0.1088, "step": 24220 }, { "epoch": 0.29542682926829267, "grad_norm": 0.9163649082183838, "learning_rate": 1.803048780487805e-05, "loss": 0.1638, "step": 24225 }, { "epoch": 0.29548780487804877, "grad_norm": 2.8563263416290283, "learning_rate": 1.803008130081301e-05, "loss": 0.1389, "step": 24230 }, { "epoch": 0.29554878048780486, "grad_norm": 0.6221686601638794, "learning_rate": 1.802967479674797e-05, "loss": 0.0908, "step": 24235 }, { "epoch": 0.29560975609756096, "grad_norm": 0.6589136123657227, "learning_rate": 1.8029268292682927e-05, "loss": 0.0712, "step": 24240 }, { "epoch": 0.29567073170731706, "grad_norm": 0.4686875343322754, "learning_rate": 1.802886178861789e-05, "loss": 0.0997, "step": 24245 }, { "epoch": 0.29573170731707316, "grad_norm": 0.506942629814148, "learning_rate": 1.8028455284552847e-05, "loss": 0.0823, "step": 24250 }, { "epoch": 0.29579268292682925, "grad_norm": 0.5874627828598022, "learning_rate": 1.8028048780487805e-05, "loss": 0.0921, "step": 24255 }, { "epoch": 0.29585365853658535, "grad_norm": 0.6757133603096008, "learning_rate": 1.8027642276422766e-05, "loss": 0.1202, "step": 24260 }, { "epoch": 0.29591463414634145, "grad_norm": 0.7176432013511658, "learning_rate": 1.8027235772357725e-05, "loss": 0.0913, "step": 24265 }, { "epoch": 0.29597560975609755, "grad_norm": 0.7543314695358276, "learning_rate": 1.8026829268292686e-05, "loss": 0.0902, "step": 24270 }, { "epoch": 0.29603658536585364, "grad_norm": 0.7534452676773071, "learning_rate": 1.8026422764227644e-05, "loss": 0.1076, "step": 24275 }, { "epoch": 0.29609756097560974, "grad_norm": 0.8083582520484924, "learning_rate": 1.8026016260162602e-05, "loss": 0.0974, "step": 24280 }, { "epoch": 0.29615853658536584, "grad_norm": 0.8755842447280884, "learning_rate": 1.802560975609756e-05, "loss": 0.1, "step": 24285 }, { "epoch": 0.29621951219512194, "grad_norm": 0.4498870074748993, "learning_rate": 1.8025203252032522e-05, "loss": 0.0625, "step": 24290 }, { "epoch": 0.29628048780487803, "grad_norm": 0.7974644303321838, "learning_rate": 1.802479674796748e-05, "loss": 0.1325, "step": 24295 }, { "epoch": 0.29634146341463413, "grad_norm": 2.3550450801849365, "learning_rate": 1.802439024390244e-05, "loss": 0.1012, "step": 24300 }, { "epoch": 0.29640243902439023, "grad_norm": 0.9023445248603821, "learning_rate": 1.80239837398374e-05, "loss": 0.093, "step": 24305 }, { "epoch": 0.2964634146341463, "grad_norm": 2.5665998458862305, "learning_rate": 1.8023577235772358e-05, "loss": 0.0842, "step": 24310 }, { "epoch": 0.2965243902439024, "grad_norm": 1.4538482427597046, "learning_rate": 1.802317073170732e-05, "loss": 0.1, "step": 24315 }, { "epoch": 0.2965853658536585, "grad_norm": 0.7885525226593018, "learning_rate": 1.8022764227642278e-05, "loss": 0.0772, "step": 24320 }, { "epoch": 0.2966463414634146, "grad_norm": 0.6630643606185913, "learning_rate": 1.8022357723577236e-05, "loss": 0.0987, "step": 24325 }, { "epoch": 0.2967073170731707, "grad_norm": 0.32061460614204407, "learning_rate": 1.8021951219512197e-05, "loss": 0.0571, "step": 24330 }, { "epoch": 0.2967682926829268, "grad_norm": 0.8455332517623901, "learning_rate": 1.8021544715447155e-05, "loss": 0.1036, "step": 24335 }, { "epoch": 0.2968292682926829, "grad_norm": 0.4419381022453308, "learning_rate": 1.8021138211382113e-05, "loss": 0.111, "step": 24340 }, { "epoch": 0.296890243902439, "grad_norm": 0.6154779195785522, "learning_rate": 1.8020731707317075e-05, "loss": 0.085, "step": 24345 }, { "epoch": 0.2969512195121951, "grad_norm": 0.9135154485702515, "learning_rate": 1.8020325203252033e-05, "loss": 0.1409, "step": 24350 }, { "epoch": 0.2970121951219512, "grad_norm": 0.4935090243816376, "learning_rate": 1.8019918699186995e-05, "loss": 0.0707, "step": 24355 }, { "epoch": 0.2970731707317073, "grad_norm": 0.7502822875976562, "learning_rate": 1.8019512195121953e-05, "loss": 0.1124, "step": 24360 }, { "epoch": 0.2971341463414634, "grad_norm": 0.8328391909599304, "learning_rate": 1.8019105691056914e-05, "loss": 0.1045, "step": 24365 }, { "epoch": 0.2971951219512195, "grad_norm": 0.8969546556472778, "learning_rate": 1.801869918699187e-05, "loss": 0.1315, "step": 24370 }, { "epoch": 0.2972560975609756, "grad_norm": 0.7243276238441467, "learning_rate": 1.801829268292683e-05, "loss": 0.068, "step": 24375 }, { "epoch": 0.2973170731707317, "grad_norm": 0.7694684863090515, "learning_rate": 1.801788617886179e-05, "loss": 0.1165, "step": 24380 }, { "epoch": 0.2973780487804878, "grad_norm": 0.7281848192214966, "learning_rate": 1.801747967479675e-05, "loss": 0.0942, "step": 24385 }, { "epoch": 0.2974390243902439, "grad_norm": 0.7745161652565002, "learning_rate": 1.8017073170731708e-05, "loss": 0.0751, "step": 24390 }, { "epoch": 0.2975, "grad_norm": 0.7831854224205017, "learning_rate": 1.801666666666667e-05, "loss": 0.1941, "step": 24395 }, { "epoch": 0.2975609756097561, "grad_norm": 4.414144515991211, "learning_rate": 1.8016260162601628e-05, "loss": 0.1112, "step": 24400 }, { "epoch": 0.2976219512195122, "grad_norm": 1.2597529888153076, "learning_rate": 1.8015853658536586e-05, "loss": 0.0903, "step": 24405 }, { "epoch": 0.2976829268292683, "grad_norm": 0.9825866222381592, "learning_rate": 1.8015447154471548e-05, "loss": 0.1115, "step": 24410 }, { "epoch": 0.2977439024390244, "grad_norm": 0.9517324566841125, "learning_rate": 1.8015040650406506e-05, "loss": 0.0564, "step": 24415 }, { "epoch": 0.2978048780487805, "grad_norm": 1.3672899007797241, "learning_rate": 1.8014634146341464e-05, "loss": 0.0827, "step": 24420 }, { "epoch": 0.2978658536585366, "grad_norm": 1.2565057277679443, "learning_rate": 1.8014227642276425e-05, "loss": 0.0919, "step": 24425 }, { "epoch": 0.29792682926829267, "grad_norm": 0.7667832970619202, "learning_rate": 1.8013821138211383e-05, "loss": 0.0965, "step": 24430 }, { "epoch": 0.29798780487804877, "grad_norm": 1.7485077381134033, "learning_rate": 1.801341463414634e-05, "loss": 0.0684, "step": 24435 }, { "epoch": 0.29804878048780487, "grad_norm": 0.5094343423843384, "learning_rate": 1.8013008130081303e-05, "loss": 0.0875, "step": 24440 }, { "epoch": 0.29810975609756096, "grad_norm": 1.0916569232940674, "learning_rate": 1.801260162601626e-05, "loss": 0.1233, "step": 24445 }, { "epoch": 0.29817073170731706, "grad_norm": 1.1402572393417358, "learning_rate": 1.8012195121951223e-05, "loss": 0.103, "step": 24450 }, { "epoch": 0.29823170731707316, "grad_norm": 0.7227899432182312, "learning_rate": 1.801178861788618e-05, "loss": 0.056, "step": 24455 }, { "epoch": 0.29829268292682926, "grad_norm": 0.4510228931903839, "learning_rate": 1.801138211382114e-05, "loss": 0.1165, "step": 24460 }, { "epoch": 0.29835365853658535, "grad_norm": 0.9395787715911865, "learning_rate": 1.8010975609756097e-05, "loss": 0.1284, "step": 24465 }, { "epoch": 0.29841463414634145, "grad_norm": 0.9090480208396912, "learning_rate": 1.801056910569106e-05, "loss": 0.1022, "step": 24470 }, { "epoch": 0.29847560975609755, "grad_norm": 0.5769307017326355, "learning_rate": 1.8010162601626017e-05, "loss": 0.0916, "step": 24475 }, { "epoch": 0.29853658536585365, "grad_norm": 0.9765657186508179, "learning_rate": 1.8009756097560978e-05, "loss": 0.0719, "step": 24480 }, { "epoch": 0.29859756097560974, "grad_norm": 1.2370246648788452, "learning_rate": 1.8009349593495936e-05, "loss": 0.1003, "step": 24485 }, { "epoch": 0.29865853658536584, "grad_norm": 1.66849684715271, "learning_rate": 1.8008943089430895e-05, "loss": 0.0962, "step": 24490 }, { "epoch": 0.29871951219512194, "grad_norm": 0.9648709893226624, "learning_rate": 1.8008536585365856e-05, "loss": 0.0947, "step": 24495 }, { "epoch": 0.29878048780487804, "grad_norm": 0.3437891900539398, "learning_rate": 1.8008130081300814e-05, "loss": 0.0955, "step": 24500 }, { "epoch": 0.29884146341463413, "grad_norm": 1.0251429080963135, "learning_rate": 1.8007723577235772e-05, "loss": 0.0717, "step": 24505 }, { "epoch": 0.29890243902439023, "grad_norm": 1.2005641460418701, "learning_rate": 1.8007317073170734e-05, "loss": 0.0753, "step": 24510 }, { "epoch": 0.29896341463414633, "grad_norm": 1.2894163131713867, "learning_rate": 1.8006910569105692e-05, "loss": 0.1375, "step": 24515 }, { "epoch": 0.2990243902439024, "grad_norm": 0.789330005645752, "learning_rate": 1.800650406504065e-05, "loss": 0.1252, "step": 24520 }, { "epoch": 0.2990853658536585, "grad_norm": 0.759242594242096, "learning_rate": 1.800609756097561e-05, "loss": 0.09, "step": 24525 }, { "epoch": 0.2991463414634146, "grad_norm": 0.7601686716079712, "learning_rate": 1.800569105691057e-05, "loss": 0.1218, "step": 24530 }, { "epoch": 0.2992073170731707, "grad_norm": 0.4920297861099243, "learning_rate": 1.800528455284553e-05, "loss": 0.0692, "step": 24535 }, { "epoch": 0.2992682926829268, "grad_norm": 0.4807659983634949, "learning_rate": 1.800487804878049e-05, "loss": 0.0953, "step": 24540 }, { "epoch": 0.2993292682926829, "grad_norm": 0.37338441610336304, "learning_rate": 1.800447154471545e-05, "loss": 0.0628, "step": 24545 }, { "epoch": 0.299390243902439, "grad_norm": 7.272066593170166, "learning_rate": 1.8004065040650406e-05, "loss": 0.1177, "step": 24550 }, { "epoch": 0.2994512195121951, "grad_norm": 0.44751492142677307, "learning_rate": 1.8003658536585367e-05, "loss": 0.0687, "step": 24555 }, { "epoch": 0.2995121951219512, "grad_norm": 0.7983130216598511, "learning_rate": 1.8003252032520325e-05, "loss": 0.1485, "step": 24560 }, { "epoch": 0.2995731707317073, "grad_norm": 0.8811423778533936, "learning_rate": 1.8002845528455287e-05, "loss": 0.1405, "step": 24565 }, { "epoch": 0.2996341463414634, "grad_norm": 0.9670649766921997, "learning_rate": 1.8002439024390245e-05, "loss": 0.0968, "step": 24570 }, { "epoch": 0.2996951219512195, "grad_norm": 1.1480803489685059, "learning_rate": 1.8002032520325206e-05, "loss": 0.0991, "step": 24575 }, { "epoch": 0.2997560975609756, "grad_norm": 0.8515452146530151, "learning_rate": 1.8001626016260165e-05, "loss": 0.1328, "step": 24580 }, { "epoch": 0.2998170731707317, "grad_norm": 0.4001106917858124, "learning_rate": 1.8001219512195123e-05, "loss": 0.0932, "step": 24585 }, { "epoch": 0.2998780487804878, "grad_norm": 3.129207134246826, "learning_rate": 1.800081300813008e-05, "loss": 0.1208, "step": 24590 }, { "epoch": 0.2999390243902439, "grad_norm": 0.6129314303398132, "learning_rate": 1.8000406504065042e-05, "loss": 0.1002, "step": 24595 }, { "epoch": 0.3, "grad_norm": 1.3309601545333862, "learning_rate": 1.8e-05, "loss": 0.1325, "step": 24600 }, { "epoch": 0.3000609756097561, "grad_norm": 0.6370049715042114, "learning_rate": 1.7999593495934962e-05, "loss": 0.1115, "step": 24605 }, { "epoch": 0.3001219512195122, "grad_norm": 0.704267680644989, "learning_rate": 1.799918699186992e-05, "loss": 0.0997, "step": 24610 }, { "epoch": 0.3001829268292683, "grad_norm": 0.7125275135040283, "learning_rate": 1.7998780487804878e-05, "loss": 0.0953, "step": 24615 }, { "epoch": 0.3002439024390244, "grad_norm": 1.0206245183944702, "learning_rate": 1.799837398373984e-05, "loss": 0.1179, "step": 24620 }, { "epoch": 0.3003048780487805, "grad_norm": 0.505033016204834, "learning_rate": 1.7997967479674798e-05, "loss": 0.0895, "step": 24625 }, { "epoch": 0.3003658536585366, "grad_norm": 0.909750759601593, "learning_rate": 1.799756097560976e-05, "loss": 0.0765, "step": 24630 }, { "epoch": 0.30042682926829267, "grad_norm": 0.7982273697853088, "learning_rate": 1.7997154471544718e-05, "loss": 0.0946, "step": 24635 }, { "epoch": 0.30048780487804877, "grad_norm": 0.5678682327270508, "learning_rate": 1.7996747967479676e-05, "loss": 0.0916, "step": 24640 }, { "epoch": 0.30054878048780487, "grad_norm": 0.7010470032691956, "learning_rate": 1.7996341463414634e-05, "loss": 0.1025, "step": 24645 }, { "epoch": 0.30060975609756097, "grad_norm": 1.2711102962493896, "learning_rate": 1.7995934959349595e-05, "loss": 0.1511, "step": 24650 }, { "epoch": 0.30067073170731706, "grad_norm": 0.9296652674674988, "learning_rate": 1.7995528455284553e-05, "loss": 0.0906, "step": 24655 }, { "epoch": 0.30073170731707316, "grad_norm": 1.1617170572280884, "learning_rate": 1.7995121951219515e-05, "loss": 0.0958, "step": 24660 }, { "epoch": 0.30079268292682926, "grad_norm": 0.595786988735199, "learning_rate": 1.7994715447154473e-05, "loss": 0.092, "step": 24665 }, { "epoch": 0.30085365853658536, "grad_norm": 0.7176058292388916, "learning_rate": 1.799430894308943e-05, "loss": 0.1004, "step": 24670 }, { "epoch": 0.30091463414634145, "grad_norm": 0.5632061958312988, "learning_rate": 1.7993902439024393e-05, "loss": 0.1034, "step": 24675 }, { "epoch": 0.30097560975609755, "grad_norm": 0.7690414786338806, "learning_rate": 1.799349593495935e-05, "loss": 0.1222, "step": 24680 }, { "epoch": 0.30103658536585365, "grad_norm": 0.8533008098602295, "learning_rate": 1.799308943089431e-05, "loss": 0.0937, "step": 24685 }, { "epoch": 0.30109756097560975, "grad_norm": 0.6302305459976196, "learning_rate": 1.799268292682927e-05, "loss": 0.0845, "step": 24690 }, { "epoch": 0.30115853658536584, "grad_norm": 1.917203664779663, "learning_rate": 1.799227642276423e-05, "loss": 0.1204, "step": 24695 }, { "epoch": 0.30121951219512194, "grad_norm": 0.7441902160644531, "learning_rate": 1.7991869918699187e-05, "loss": 0.0916, "step": 24700 }, { "epoch": 0.30128048780487804, "grad_norm": 0.9683950543403625, "learning_rate": 1.7991463414634148e-05, "loss": 0.1336, "step": 24705 }, { "epoch": 0.30134146341463414, "grad_norm": 0.4419766068458557, "learning_rate": 1.7991056910569106e-05, "loss": 0.072, "step": 24710 }, { "epoch": 0.30140243902439023, "grad_norm": 0.5799437165260315, "learning_rate": 1.7990650406504068e-05, "loss": 0.0823, "step": 24715 }, { "epoch": 0.30146341463414633, "grad_norm": 0.4051893353462219, "learning_rate": 1.7990243902439026e-05, "loss": 0.1126, "step": 24720 }, { "epoch": 0.30152439024390243, "grad_norm": 0.6556628942489624, "learning_rate": 1.7989837398373988e-05, "loss": 0.0587, "step": 24725 }, { "epoch": 0.3015853658536585, "grad_norm": 0.7373011708259583, "learning_rate": 1.7989430894308942e-05, "loss": 0.074, "step": 24730 }, { "epoch": 0.3016463414634146, "grad_norm": 0.5485449433326721, "learning_rate": 1.7989024390243904e-05, "loss": 0.1023, "step": 24735 }, { "epoch": 0.3017073170731707, "grad_norm": 1.006079912185669, "learning_rate": 1.7988617886178862e-05, "loss": 0.1255, "step": 24740 }, { "epoch": 0.3017682926829268, "grad_norm": 1.0719380378723145, "learning_rate": 1.7988211382113823e-05, "loss": 0.1502, "step": 24745 }, { "epoch": 0.3018292682926829, "grad_norm": 1.0581512451171875, "learning_rate": 1.798780487804878e-05, "loss": 0.1016, "step": 24750 }, { "epoch": 0.301890243902439, "grad_norm": 0.7895620465278625, "learning_rate": 1.7987398373983743e-05, "loss": 0.1108, "step": 24755 }, { "epoch": 0.3019512195121951, "grad_norm": 0.8725540041923523, "learning_rate": 1.79869918699187e-05, "loss": 0.1056, "step": 24760 }, { "epoch": 0.3020121951219512, "grad_norm": 0.6097989678382874, "learning_rate": 1.798658536585366e-05, "loss": 0.09, "step": 24765 }, { "epoch": 0.3020731707317073, "grad_norm": 0.6846265196800232, "learning_rate": 1.7986178861788617e-05, "loss": 0.0952, "step": 24770 }, { "epoch": 0.3021341463414634, "grad_norm": 0.6436591744422913, "learning_rate": 1.798577235772358e-05, "loss": 0.1144, "step": 24775 }, { "epoch": 0.3021951219512195, "grad_norm": 0.5886649489402771, "learning_rate": 1.7985365853658537e-05, "loss": 0.1107, "step": 24780 }, { "epoch": 0.3022560975609756, "grad_norm": 0.7869573831558228, "learning_rate": 1.79849593495935e-05, "loss": 0.0823, "step": 24785 }, { "epoch": 0.3023170731707317, "grad_norm": 0.8480908274650574, "learning_rate": 1.7984552845528457e-05, "loss": 0.0821, "step": 24790 }, { "epoch": 0.3023780487804878, "grad_norm": 0.7234013080596924, "learning_rate": 1.7984146341463415e-05, "loss": 0.1149, "step": 24795 }, { "epoch": 0.3024390243902439, "grad_norm": 0.5785788297653198, "learning_rate": 1.7983739837398376e-05, "loss": 0.1089, "step": 24800 }, { "epoch": 0.3025, "grad_norm": 1.0295852422714233, "learning_rate": 1.7983333333333335e-05, "loss": 0.1025, "step": 24805 }, { "epoch": 0.3025609756097561, "grad_norm": 0.6060652732849121, "learning_rate": 1.7982926829268296e-05, "loss": 0.0724, "step": 24810 }, { "epoch": 0.3026219512195122, "grad_norm": 0.4393749535083771, "learning_rate": 1.7982520325203254e-05, "loss": 0.0838, "step": 24815 }, { "epoch": 0.3026829268292683, "grad_norm": 0.7246796488761902, "learning_rate": 1.7982113821138212e-05, "loss": 0.081, "step": 24820 }, { "epoch": 0.3027439024390244, "grad_norm": 1.5515291690826416, "learning_rate": 1.798170731707317e-05, "loss": 0.1127, "step": 24825 }, { "epoch": 0.3028048780487805, "grad_norm": 0.5433135628700256, "learning_rate": 1.7981300813008132e-05, "loss": 0.1007, "step": 24830 }, { "epoch": 0.3028658536585366, "grad_norm": 0.6199949383735657, "learning_rate": 1.798089430894309e-05, "loss": 0.0721, "step": 24835 }, { "epoch": 0.3029268292682927, "grad_norm": 0.8999477624893188, "learning_rate": 1.798048780487805e-05, "loss": 0.1059, "step": 24840 }, { "epoch": 0.30298780487804877, "grad_norm": 1.0154906511306763, "learning_rate": 1.798008130081301e-05, "loss": 0.1301, "step": 24845 }, { "epoch": 0.30304878048780487, "grad_norm": 0.629626452922821, "learning_rate": 1.7979674796747968e-05, "loss": 0.0666, "step": 24850 }, { "epoch": 0.30310975609756097, "grad_norm": 1.5276288986206055, "learning_rate": 1.7979268292682926e-05, "loss": 0.1065, "step": 24855 }, { "epoch": 0.30317073170731706, "grad_norm": 1.1448370218276978, "learning_rate": 1.7978861788617887e-05, "loss": 0.0864, "step": 24860 }, { "epoch": 0.30323170731707316, "grad_norm": 0.8160944581031799, "learning_rate": 1.7978455284552846e-05, "loss": 0.0615, "step": 24865 }, { "epoch": 0.30329268292682926, "grad_norm": 0.6545900106430054, "learning_rate": 1.7978048780487807e-05, "loss": 0.066, "step": 24870 }, { "epoch": 0.30335365853658536, "grad_norm": 0.5571230053901672, "learning_rate": 1.7977642276422765e-05, "loss": 0.0842, "step": 24875 }, { "epoch": 0.30341463414634146, "grad_norm": 1.0862038135528564, "learning_rate": 1.7977235772357723e-05, "loss": 0.0877, "step": 24880 }, { "epoch": 0.30347560975609755, "grad_norm": 0.9086474776268005, "learning_rate": 1.7976829268292685e-05, "loss": 0.1184, "step": 24885 }, { "epoch": 0.30353658536585365, "grad_norm": 1.2642738819122314, "learning_rate": 1.7976422764227643e-05, "loss": 0.0873, "step": 24890 }, { "epoch": 0.30359756097560975, "grad_norm": 0.9466332197189331, "learning_rate": 1.7976016260162605e-05, "loss": 0.0948, "step": 24895 }, { "epoch": 0.30365853658536585, "grad_norm": 0.3781762421131134, "learning_rate": 1.7975609756097563e-05, "loss": 0.0864, "step": 24900 }, { "epoch": 0.30371951219512194, "grad_norm": 0.6753196120262146, "learning_rate": 1.7975203252032524e-05, "loss": 0.1037, "step": 24905 }, { "epoch": 0.30378048780487804, "grad_norm": 1.3419963121414185, "learning_rate": 1.797479674796748e-05, "loss": 0.1517, "step": 24910 }, { "epoch": 0.30384146341463414, "grad_norm": 1.2746700048446655, "learning_rate": 1.797439024390244e-05, "loss": 0.1113, "step": 24915 }, { "epoch": 0.30390243902439024, "grad_norm": 0.5153169631958008, "learning_rate": 1.79739837398374e-05, "loss": 0.0969, "step": 24920 }, { "epoch": 0.30396341463414633, "grad_norm": 1.6199523210525513, "learning_rate": 1.797357723577236e-05, "loss": 0.1013, "step": 24925 }, { "epoch": 0.30402439024390243, "grad_norm": 0.5651237964630127, "learning_rate": 1.7973170731707318e-05, "loss": 0.091, "step": 24930 }, { "epoch": 0.30408536585365853, "grad_norm": 1.0343635082244873, "learning_rate": 1.797276422764228e-05, "loss": 0.115, "step": 24935 }, { "epoch": 0.3041463414634146, "grad_norm": 0.30320674180984497, "learning_rate": 1.7972357723577238e-05, "loss": 0.0885, "step": 24940 }, { "epoch": 0.3042073170731707, "grad_norm": 1.0292317867279053, "learning_rate": 1.7971951219512196e-05, "loss": 0.1189, "step": 24945 }, { "epoch": 0.3042682926829268, "grad_norm": 0.33829617500305176, "learning_rate": 1.7971544715447154e-05, "loss": 0.0942, "step": 24950 }, { "epoch": 0.3043292682926829, "grad_norm": 0.6332433223724365, "learning_rate": 1.7971138211382116e-05, "loss": 0.101, "step": 24955 }, { "epoch": 0.304390243902439, "grad_norm": 1.754387378692627, "learning_rate": 1.7970731707317074e-05, "loss": 0.0876, "step": 24960 }, { "epoch": 0.3044512195121951, "grad_norm": 1.6369824409484863, "learning_rate": 1.7970325203252035e-05, "loss": 0.0847, "step": 24965 }, { "epoch": 0.3045121951219512, "grad_norm": 0.5493191480636597, "learning_rate": 1.7969918699186993e-05, "loss": 0.064, "step": 24970 }, { "epoch": 0.3045731707317073, "grad_norm": 1.0533931255340576, "learning_rate": 1.796951219512195e-05, "loss": 0.1281, "step": 24975 }, { "epoch": 0.3046341463414634, "grad_norm": 0.7733713984489441, "learning_rate": 1.7969105691056913e-05, "loss": 0.1114, "step": 24980 }, { "epoch": 0.3046951219512195, "grad_norm": 1.5960794687271118, "learning_rate": 1.796869918699187e-05, "loss": 0.108, "step": 24985 }, { "epoch": 0.3047560975609756, "grad_norm": 0.9254554510116577, "learning_rate": 1.7968292682926833e-05, "loss": 0.1125, "step": 24990 }, { "epoch": 0.3048170731707317, "grad_norm": 0.6994877457618713, "learning_rate": 1.796788617886179e-05, "loss": 0.0717, "step": 24995 }, { "epoch": 0.3048780487804878, "grad_norm": 0.5883218050003052, "learning_rate": 1.796747967479675e-05, "loss": 0.0986, "step": 25000 }, { "epoch": 0.3049390243902439, "grad_norm": 0.5347636342048645, "learning_rate": 1.7967073170731707e-05, "loss": 0.0995, "step": 25005 }, { "epoch": 0.305, "grad_norm": 0.6058582663536072, "learning_rate": 1.796666666666667e-05, "loss": 0.0949, "step": 25010 }, { "epoch": 0.3050609756097561, "grad_norm": 0.5267131924629211, "learning_rate": 1.7966260162601627e-05, "loss": 0.1003, "step": 25015 }, { "epoch": 0.3051219512195122, "grad_norm": 0.7956111431121826, "learning_rate": 1.7965853658536588e-05, "loss": 0.1056, "step": 25020 }, { "epoch": 0.3051829268292683, "grad_norm": 1.2769514322280884, "learning_rate": 1.7965447154471546e-05, "loss": 0.0929, "step": 25025 }, { "epoch": 0.3052439024390244, "grad_norm": 2.0834100246429443, "learning_rate": 1.7965040650406504e-05, "loss": 0.0866, "step": 25030 }, { "epoch": 0.3053048780487805, "grad_norm": 0.4827106297016144, "learning_rate": 1.7964634146341463e-05, "loss": 0.0755, "step": 25035 }, { "epoch": 0.3053658536585366, "grad_norm": 0.7055217623710632, "learning_rate": 1.7964227642276424e-05, "loss": 0.1305, "step": 25040 }, { "epoch": 0.3054268292682927, "grad_norm": 0.710249125957489, "learning_rate": 1.7963821138211382e-05, "loss": 0.0987, "step": 25045 }, { "epoch": 0.3054878048780488, "grad_norm": 0.5624908804893494, "learning_rate": 1.7963414634146344e-05, "loss": 0.1026, "step": 25050 }, { "epoch": 0.30554878048780487, "grad_norm": 1.5365288257598877, "learning_rate": 1.7963008130081302e-05, "loss": 0.1004, "step": 25055 }, { "epoch": 0.30560975609756097, "grad_norm": 0.9948895573616028, "learning_rate": 1.796260162601626e-05, "loss": 0.1072, "step": 25060 }, { "epoch": 0.30567073170731707, "grad_norm": 0.6090547442436218, "learning_rate": 1.796219512195122e-05, "loss": 0.1101, "step": 25065 }, { "epoch": 0.30573170731707316, "grad_norm": 0.42964500188827515, "learning_rate": 1.796178861788618e-05, "loss": 0.1022, "step": 25070 }, { "epoch": 0.30579268292682926, "grad_norm": 0.5540544390678406, "learning_rate": 1.796138211382114e-05, "loss": 0.0809, "step": 25075 }, { "epoch": 0.30585365853658536, "grad_norm": 0.7438393831253052, "learning_rate": 1.79609756097561e-05, "loss": 0.0659, "step": 25080 }, { "epoch": 0.30591463414634146, "grad_norm": 0.7372099161148071, "learning_rate": 1.796056910569106e-05, "loss": 0.094, "step": 25085 }, { "epoch": 0.30597560975609756, "grad_norm": 0.5932486057281494, "learning_rate": 1.7960162601626016e-05, "loss": 0.0857, "step": 25090 }, { "epoch": 0.30603658536585365, "grad_norm": 0.6761767268180847, "learning_rate": 1.7959756097560977e-05, "loss": 0.0823, "step": 25095 }, { "epoch": 0.30609756097560975, "grad_norm": 0.9810454249382019, "learning_rate": 1.7959349593495935e-05, "loss": 0.1054, "step": 25100 }, { "epoch": 0.30615853658536585, "grad_norm": 0.6198567152023315, "learning_rate": 1.7958943089430897e-05, "loss": 0.0924, "step": 25105 }, { "epoch": 0.30621951219512195, "grad_norm": 0.6085283756256104, "learning_rate": 1.7958536585365855e-05, "loss": 0.1092, "step": 25110 }, { "epoch": 0.30628048780487804, "grad_norm": 0.8009218573570251, "learning_rate": 1.7958130081300816e-05, "loss": 0.0728, "step": 25115 }, { "epoch": 0.30634146341463414, "grad_norm": 0.8588747978210449, "learning_rate": 1.795772357723577e-05, "loss": 0.0982, "step": 25120 }, { "epoch": 0.30640243902439024, "grad_norm": 0.7315743565559387, "learning_rate": 1.7957317073170733e-05, "loss": 0.08, "step": 25125 }, { "epoch": 0.30646341463414634, "grad_norm": 0.708351731300354, "learning_rate": 1.795691056910569e-05, "loss": 0.0914, "step": 25130 }, { "epoch": 0.30652439024390243, "grad_norm": 0.9189640283584595, "learning_rate": 1.7956504065040652e-05, "loss": 0.0798, "step": 25135 }, { "epoch": 0.30658536585365853, "grad_norm": 0.5644587278366089, "learning_rate": 1.795609756097561e-05, "loss": 0.09, "step": 25140 }, { "epoch": 0.30664634146341463, "grad_norm": 0.6750010251998901, "learning_rate": 1.7955691056910572e-05, "loss": 0.0759, "step": 25145 }, { "epoch": 0.3067073170731707, "grad_norm": 0.8228035569190979, "learning_rate": 1.795528455284553e-05, "loss": 0.1068, "step": 25150 }, { "epoch": 0.3067682926829268, "grad_norm": 0.8454434275627136, "learning_rate": 1.7954878048780488e-05, "loss": 0.0944, "step": 25155 }, { "epoch": 0.3068292682926829, "grad_norm": 2.147026300430298, "learning_rate": 1.795447154471545e-05, "loss": 0.1025, "step": 25160 }, { "epoch": 0.306890243902439, "grad_norm": 0.5627323985099792, "learning_rate": 1.7954065040650408e-05, "loss": 0.1017, "step": 25165 }, { "epoch": 0.3069512195121951, "grad_norm": 0.4253903925418854, "learning_rate": 1.795365853658537e-05, "loss": 0.0711, "step": 25170 }, { "epoch": 0.3070121951219512, "grad_norm": 0.9453485012054443, "learning_rate": 1.7953252032520327e-05, "loss": 0.1334, "step": 25175 }, { "epoch": 0.3070731707317073, "grad_norm": 0.9171939492225647, "learning_rate": 1.7952845528455286e-05, "loss": 0.1036, "step": 25180 }, { "epoch": 0.3071341463414634, "grad_norm": 0.879901647567749, "learning_rate": 1.7952439024390244e-05, "loss": 0.1501, "step": 25185 }, { "epoch": 0.3071951219512195, "grad_norm": 1.4935072660446167, "learning_rate": 1.7952032520325205e-05, "loss": 0.1028, "step": 25190 }, { "epoch": 0.3072560975609756, "grad_norm": 0.8878475427627563, "learning_rate": 1.7951626016260163e-05, "loss": 0.1156, "step": 25195 }, { "epoch": 0.3073170731707317, "grad_norm": 1.192169189453125, "learning_rate": 1.7951219512195125e-05, "loss": 0.0847, "step": 25200 }, { "epoch": 0.3073780487804878, "grad_norm": 0.6904860138893127, "learning_rate": 1.7950813008130083e-05, "loss": 0.1102, "step": 25205 }, { "epoch": 0.3074390243902439, "grad_norm": 0.921312689781189, "learning_rate": 1.795040650406504e-05, "loss": 0.0968, "step": 25210 }, { "epoch": 0.3075, "grad_norm": 1.0860774517059326, "learning_rate": 1.795e-05, "loss": 0.0844, "step": 25215 }, { "epoch": 0.3075609756097561, "grad_norm": 1.2137528657913208, "learning_rate": 1.794959349593496e-05, "loss": 0.0925, "step": 25220 }, { "epoch": 0.3076219512195122, "grad_norm": 1.7653062343597412, "learning_rate": 1.794918699186992e-05, "loss": 0.1287, "step": 25225 }, { "epoch": 0.3076829268292683, "grad_norm": 0.9395143389701843, "learning_rate": 1.794878048780488e-05, "loss": 0.0896, "step": 25230 }, { "epoch": 0.3077439024390244, "grad_norm": 0.6243391633033752, "learning_rate": 1.794837398373984e-05, "loss": 0.1365, "step": 25235 }, { "epoch": 0.3078048780487805, "grad_norm": 0.7776655554771423, "learning_rate": 1.7947967479674797e-05, "loss": 0.0988, "step": 25240 }, { "epoch": 0.3078658536585366, "grad_norm": 0.6154784560203552, "learning_rate": 1.7947560975609758e-05, "loss": 0.1461, "step": 25245 }, { "epoch": 0.3079268292682927, "grad_norm": 0.9313341379165649, "learning_rate": 1.7947154471544716e-05, "loss": 0.1133, "step": 25250 }, { "epoch": 0.3079878048780488, "grad_norm": 0.8004664182662964, "learning_rate": 1.7946747967479678e-05, "loss": 0.1019, "step": 25255 }, { "epoch": 0.3080487804878049, "grad_norm": 0.590525209903717, "learning_rate": 1.7946341463414636e-05, "loss": 0.1148, "step": 25260 }, { "epoch": 0.30810975609756097, "grad_norm": 0.5661469101905823, "learning_rate": 1.7945934959349594e-05, "loss": 0.1075, "step": 25265 }, { "epoch": 0.30817073170731707, "grad_norm": 0.5778814554214478, "learning_rate": 1.7945528455284552e-05, "loss": 0.1169, "step": 25270 }, { "epoch": 0.30823170731707317, "grad_norm": 0.5380128622055054, "learning_rate": 1.7945121951219514e-05, "loss": 0.0911, "step": 25275 }, { "epoch": 0.30829268292682926, "grad_norm": 0.521809995174408, "learning_rate": 1.7944715447154472e-05, "loss": 0.0954, "step": 25280 }, { "epoch": 0.30835365853658536, "grad_norm": 0.564824640750885, "learning_rate": 1.7944308943089433e-05, "loss": 0.0618, "step": 25285 }, { "epoch": 0.30841463414634146, "grad_norm": 0.47394561767578125, "learning_rate": 1.794390243902439e-05, "loss": 0.1357, "step": 25290 }, { "epoch": 0.30847560975609756, "grad_norm": 0.8129609227180481, "learning_rate": 1.7943495934959353e-05, "loss": 0.1394, "step": 25295 }, { "epoch": 0.30853658536585366, "grad_norm": 0.4359045624732971, "learning_rate": 1.7943089430894308e-05, "loss": 0.0917, "step": 25300 }, { "epoch": 0.30859756097560975, "grad_norm": 0.8030720353126526, "learning_rate": 1.794268292682927e-05, "loss": 0.0843, "step": 25305 }, { "epoch": 0.30865853658536585, "grad_norm": 0.5281481742858887, "learning_rate": 1.7942276422764227e-05, "loss": 0.1062, "step": 25310 }, { "epoch": 0.30871951219512195, "grad_norm": 0.6003342866897583, "learning_rate": 1.794186991869919e-05, "loss": 0.104, "step": 25315 }, { "epoch": 0.30878048780487805, "grad_norm": 0.9865038990974426, "learning_rate": 1.7941463414634147e-05, "loss": 0.1034, "step": 25320 }, { "epoch": 0.30884146341463414, "grad_norm": 1.1190464496612549, "learning_rate": 1.794105691056911e-05, "loss": 0.0892, "step": 25325 }, { "epoch": 0.30890243902439024, "grad_norm": 1.710004448890686, "learning_rate": 1.7940650406504067e-05, "loss": 0.0748, "step": 25330 }, { "epoch": 0.30896341463414634, "grad_norm": 0.8063488006591797, "learning_rate": 1.7940243902439025e-05, "loss": 0.0855, "step": 25335 }, { "epoch": 0.30902439024390244, "grad_norm": 0.7350655198097229, "learning_rate": 1.7939837398373986e-05, "loss": 0.1005, "step": 25340 }, { "epoch": 0.30908536585365853, "grad_norm": 0.5370075702667236, "learning_rate": 1.7939430894308944e-05, "loss": 0.1011, "step": 25345 }, { "epoch": 0.30914634146341463, "grad_norm": 1.0720446109771729, "learning_rate": 1.7939024390243906e-05, "loss": 0.1194, "step": 25350 }, { "epoch": 0.30920731707317073, "grad_norm": 1.3697526454925537, "learning_rate": 1.7938617886178864e-05, "loss": 0.1234, "step": 25355 }, { "epoch": 0.3092682926829268, "grad_norm": 0.7407501935958862, "learning_rate": 1.7938211382113822e-05, "loss": 0.1261, "step": 25360 }, { "epoch": 0.3093292682926829, "grad_norm": 1.0389715433120728, "learning_rate": 1.793780487804878e-05, "loss": 0.1156, "step": 25365 }, { "epoch": 0.309390243902439, "grad_norm": 0.5174434185028076, "learning_rate": 1.7937398373983742e-05, "loss": 0.1191, "step": 25370 }, { "epoch": 0.3094512195121951, "grad_norm": 0.8560402989387512, "learning_rate": 1.79369918699187e-05, "loss": 0.0603, "step": 25375 }, { "epoch": 0.3095121951219512, "grad_norm": 0.9416540861129761, "learning_rate": 1.793658536585366e-05, "loss": 0.0947, "step": 25380 }, { "epoch": 0.3095731707317073, "grad_norm": 0.8295131921768188, "learning_rate": 1.793617886178862e-05, "loss": 0.0996, "step": 25385 }, { "epoch": 0.3096341463414634, "grad_norm": 1.1068850755691528, "learning_rate": 1.7935772357723578e-05, "loss": 0.0955, "step": 25390 }, { "epoch": 0.3096951219512195, "grad_norm": 0.6909409761428833, "learning_rate": 1.7935365853658536e-05, "loss": 0.0847, "step": 25395 }, { "epoch": 0.3097560975609756, "grad_norm": 0.6027936339378357, "learning_rate": 1.7934959349593497e-05, "loss": 0.092, "step": 25400 }, { "epoch": 0.3098170731707317, "grad_norm": 0.6891223788261414, "learning_rate": 1.7934552845528456e-05, "loss": 0.1371, "step": 25405 }, { "epoch": 0.3098780487804878, "grad_norm": 0.4574551284313202, "learning_rate": 1.7934146341463417e-05, "loss": 0.1415, "step": 25410 }, { "epoch": 0.3099390243902439, "grad_norm": 3.1948108673095703, "learning_rate": 1.7933739837398375e-05, "loss": 0.0961, "step": 25415 }, { "epoch": 0.31, "grad_norm": 0.8437933921813965, "learning_rate": 1.7933333333333333e-05, "loss": 0.0853, "step": 25420 }, { "epoch": 0.3100609756097561, "grad_norm": 1.726540446281433, "learning_rate": 1.7932926829268295e-05, "loss": 0.1238, "step": 25425 }, { "epoch": 0.3101219512195122, "grad_norm": 0.3781537413597107, "learning_rate": 1.7932520325203253e-05, "loss": 0.0811, "step": 25430 }, { "epoch": 0.3101829268292683, "grad_norm": 0.6126453876495361, "learning_rate": 1.7932113821138214e-05, "loss": 0.0967, "step": 25435 }, { "epoch": 0.3102439024390244, "grad_norm": 0.7285207509994507, "learning_rate": 1.7931707317073173e-05, "loss": 0.0839, "step": 25440 }, { "epoch": 0.3103048780487805, "grad_norm": 0.9309988021850586, "learning_rate": 1.793130081300813e-05, "loss": 0.1139, "step": 25445 }, { "epoch": 0.3103658536585366, "grad_norm": 0.8619852662086487, "learning_rate": 1.793089430894309e-05, "loss": 0.0958, "step": 25450 }, { "epoch": 0.3104268292682927, "grad_norm": 0.442036509513855, "learning_rate": 1.793048780487805e-05, "loss": 0.1239, "step": 25455 }, { "epoch": 0.3104878048780488, "grad_norm": 0.33166971802711487, "learning_rate": 1.793008130081301e-05, "loss": 0.0665, "step": 25460 }, { "epoch": 0.3105487804878049, "grad_norm": 0.7922097444534302, "learning_rate": 1.792967479674797e-05, "loss": 0.101, "step": 25465 }, { "epoch": 0.310609756097561, "grad_norm": 1.1835204362869263, "learning_rate": 1.7929268292682928e-05, "loss": 0.0909, "step": 25470 }, { "epoch": 0.31067073170731707, "grad_norm": 0.6323553323745728, "learning_rate": 1.792886178861789e-05, "loss": 0.1021, "step": 25475 }, { "epoch": 0.31073170731707317, "grad_norm": 0.7577944397926331, "learning_rate": 1.7928455284552844e-05, "loss": 0.1069, "step": 25480 }, { "epoch": 0.31079268292682927, "grad_norm": 0.5869084596633911, "learning_rate": 1.7928048780487806e-05, "loss": 0.0822, "step": 25485 }, { "epoch": 0.31085365853658536, "grad_norm": 0.9133856892585754, "learning_rate": 1.7927642276422764e-05, "loss": 0.0808, "step": 25490 }, { "epoch": 0.31091463414634146, "grad_norm": 0.5655822157859802, "learning_rate": 1.7927235772357726e-05, "loss": 0.1097, "step": 25495 }, { "epoch": 0.31097560975609756, "grad_norm": 1.1582599878311157, "learning_rate": 1.7926829268292684e-05, "loss": 0.0936, "step": 25500 }, { "epoch": 0.31103658536585366, "grad_norm": 0.6416016221046448, "learning_rate": 1.7926422764227645e-05, "loss": 0.1052, "step": 25505 }, { "epoch": 0.31109756097560975, "grad_norm": 0.8996376395225525, "learning_rate": 1.7926016260162603e-05, "loss": 0.1035, "step": 25510 }, { "epoch": 0.31115853658536585, "grad_norm": 1.666978359222412, "learning_rate": 1.792560975609756e-05, "loss": 0.0773, "step": 25515 }, { "epoch": 0.31121951219512195, "grad_norm": 0.7893341779708862, "learning_rate": 1.7925203252032523e-05, "loss": 0.0939, "step": 25520 }, { "epoch": 0.31128048780487805, "grad_norm": 1.1327178478240967, "learning_rate": 1.792479674796748e-05, "loss": 0.0893, "step": 25525 }, { "epoch": 0.31134146341463415, "grad_norm": 0.6517656445503235, "learning_rate": 1.792439024390244e-05, "loss": 0.1183, "step": 25530 }, { "epoch": 0.31140243902439024, "grad_norm": 0.5156926512718201, "learning_rate": 1.79239837398374e-05, "loss": 0.0724, "step": 25535 }, { "epoch": 0.31146341463414634, "grad_norm": 0.9076527953147888, "learning_rate": 1.792357723577236e-05, "loss": 0.0764, "step": 25540 }, { "epoch": 0.31152439024390244, "grad_norm": 1.19591224193573, "learning_rate": 1.7923170731707317e-05, "loss": 0.1002, "step": 25545 }, { "epoch": 0.31158536585365854, "grad_norm": 0.898245632648468, "learning_rate": 1.792276422764228e-05, "loss": 0.0916, "step": 25550 }, { "epoch": 0.31164634146341463, "grad_norm": 0.8804939389228821, "learning_rate": 1.7922357723577237e-05, "loss": 0.089, "step": 25555 }, { "epoch": 0.31170731707317073, "grad_norm": 0.9551852941513062, "learning_rate": 1.7921951219512198e-05, "loss": 0.1313, "step": 25560 }, { "epoch": 0.31176829268292683, "grad_norm": 0.8400362133979797, "learning_rate": 1.7921544715447156e-05, "loss": 0.0835, "step": 25565 }, { "epoch": 0.3118292682926829, "grad_norm": 0.7561677098274231, "learning_rate": 1.7921138211382114e-05, "loss": 0.0884, "step": 25570 }, { "epoch": 0.311890243902439, "grad_norm": 0.6414942741394043, "learning_rate": 1.7920731707317073e-05, "loss": 0.1004, "step": 25575 }, { "epoch": 0.3119512195121951, "grad_norm": 0.5314074158668518, "learning_rate": 1.7920325203252034e-05, "loss": 0.0827, "step": 25580 }, { "epoch": 0.3120121951219512, "grad_norm": 0.4783538579940796, "learning_rate": 1.7919918699186992e-05, "loss": 0.09, "step": 25585 }, { "epoch": 0.3120731707317073, "grad_norm": 0.6032774448394775, "learning_rate": 1.7919512195121954e-05, "loss": 0.0801, "step": 25590 }, { "epoch": 0.3121341463414634, "grad_norm": 0.5932353734970093, "learning_rate": 1.7919105691056912e-05, "loss": 0.0774, "step": 25595 }, { "epoch": 0.3121951219512195, "grad_norm": 0.9179865121841431, "learning_rate": 1.791869918699187e-05, "loss": 0.0995, "step": 25600 }, { "epoch": 0.3122560975609756, "grad_norm": 0.7638202905654907, "learning_rate": 1.791829268292683e-05, "loss": 0.0982, "step": 25605 }, { "epoch": 0.3123170731707317, "grad_norm": 1.3640066385269165, "learning_rate": 1.791788617886179e-05, "loss": 0.1037, "step": 25610 }, { "epoch": 0.3123780487804878, "grad_norm": 0.38436174392700195, "learning_rate": 1.791747967479675e-05, "loss": 0.073, "step": 25615 }, { "epoch": 0.3124390243902439, "grad_norm": 0.6272493004798889, "learning_rate": 1.791707317073171e-05, "loss": 0.1286, "step": 25620 }, { "epoch": 0.3125, "grad_norm": 1.462668776512146, "learning_rate": 1.7916666666666667e-05, "loss": 0.156, "step": 25625 }, { "epoch": 0.3125609756097561, "grad_norm": 0.386807918548584, "learning_rate": 1.791626016260163e-05, "loss": 0.0964, "step": 25630 }, { "epoch": 0.3126219512195122, "grad_norm": 0.6356425881385803, "learning_rate": 1.7915853658536587e-05, "loss": 0.0766, "step": 25635 }, { "epoch": 0.3126829268292683, "grad_norm": 0.6506827473640442, "learning_rate": 1.7915447154471545e-05, "loss": 0.0775, "step": 25640 }, { "epoch": 0.3127439024390244, "grad_norm": 0.6375480890274048, "learning_rate": 1.7915040650406507e-05, "loss": 0.1235, "step": 25645 }, { "epoch": 0.3128048780487805, "grad_norm": 0.7629703283309937, "learning_rate": 1.7914634146341465e-05, "loss": 0.0639, "step": 25650 }, { "epoch": 0.3128658536585366, "grad_norm": 0.9279126524925232, "learning_rate": 1.7914227642276426e-05, "loss": 0.0934, "step": 25655 }, { "epoch": 0.3129268292682927, "grad_norm": 0.5777198076248169, "learning_rate": 1.7913821138211384e-05, "loss": 0.0803, "step": 25660 }, { "epoch": 0.3129878048780488, "grad_norm": 0.6123753786087036, "learning_rate": 1.7913414634146343e-05, "loss": 0.0797, "step": 25665 }, { "epoch": 0.3130487804878049, "grad_norm": 1.803646206855774, "learning_rate": 1.79130081300813e-05, "loss": 0.1119, "step": 25670 }, { "epoch": 0.313109756097561, "grad_norm": 0.5897849202156067, "learning_rate": 1.7912601626016262e-05, "loss": 0.09, "step": 25675 }, { "epoch": 0.3131707317073171, "grad_norm": 1.4564015865325928, "learning_rate": 1.791219512195122e-05, "loss": 0.1139, "step": 25680 }, { "epoch": 0.31323170731707317, "grad_norm": 0.724287211894989, "learning_rate": 1.7911788617886182e-05, "loss": 0.0943, "step": 25685 }, { "epoch": 0.31329268292682927, "grad_norm": 0.6562267541885376, "learning_rate": 1.791138211382114e-05, "loss": 0.0995, "step": 25690 }, { "epoch": 0.31335365853658537, "grad_norm": 0.537874698638916, "learning_rate": 1.7910975609756098e-05, "loss": 0.0972, "step": 25695 }, { "epoch": 0.31341463414634146, "grad_norm": 0.35089996457099915, "learning_rate": 1.791056910569106e-05, "loss": 0.099, "step": 25700 }, { "epoch": 0.31347560975609756, "grad_norm": 0.6782856583595276, "learning_rate": 1.7910162601626018e-05, "loss": 0.0988, "step": 25705 }, { "epoch": 0.31353658536585366, "grad_norm": 0.6607651710510254, "learning_rate": 1.7909756097560976e-05, "loss": 0.1195, "step": 25710 }, { "epoch": 0.31359756097560976, "grad_norm": 0.8702970147132874, "learning_rate": 1.7909349593495937e-05, "loss": 0.102, "step": 25715 }, { "epoch": 0.31365853658536585, "grad_norm": 2.274078130722046, "learning_rate": 1.7908943089430895e-05, "loss": 0.1005, "step": 25720 }, { "epoch": 0.31371951219512195, "grad_norm": 0.7499318718910217, "learning_rate": 1.7908536585365854e-05, "loss": 0.1089, "step": 25725 }, { "epoch": 0.31378048780487805, "grad_norm": 0.8018688559532166, "learning_rate": 1.7908130081300815e-05, "loss": 0.0912, "step": 25730 }, { "epoch": 0.31384146341463415, "grad_norm": 1.0183563232421875, "learning_rate": 1.7907723577235773e-05, "loss": 0.1399, "step": 25735 }, { "epoch": 0.31390243902439025, "grad_norm": 0.3495124280452728, "learning_rate": 1.7907317073170735e-05, "loss": 0.0655, "step": 25740 }, { "epoch": 0.31396341463414634, "grad_norm": 0.585869550704956, "learning_rate": 1.7906910569105693e-05, "loss": 0.0985, "step": 25745 }, { "epoch": 0.31402439024390244, "grad_norm": 1.2869417667388916, "learning_rate": 1.7906504065040654e-05, "loss": 0.1103, "step": 25750 }, { "epoch": 0.31408536585365854, "grad_norm": 0.5567024350166321, "learning_rate": 1.790609756097561e-05, "loss": 0.0579, "step": 25755 }, { "epoch": 0.31414634146341464, "grad_norm": 0.6195141673088074, "learning_rate": 1.790569105691057e-05, "loss": 0.0999, "step": 25760 }, { "epoch": 0.31420731707317073, "grad_norm": 1.1195942163467407, "learning_rate": 1.790528455284553e-05, "loss": 0.0984, "step": 25765 }, { "epoch": 0.31426829268292683, "grad_norm": 0.5230628848075867, "learning_rate": 1.790487804878049e-05, "loss": 0.0721, "step": 25770 }, { "epoch": 0.31432926829268293, "grad_norm": 1.2236887216567993, "learning_rate": 1.790447154471545e-05, "loss": 0.0958, "step": 25775 }, { "epoch": 0.314390243902439, "grad_norm": 0.8967905044555664, "learning_rate": 1.790406504065041e-05, "loss": 0.0935, "step": 25780 }, { "epoch": 0.3144512195121951, "grad_norm": 0.7343030571937561, "learning_rate": 1.7903658536585368e-05, "loss": 0.0714, "step": 25785 }, { "epoch": 0.3145121951219512, "grad_norm": 0.6418164372444153, "learning_rate": 1.7903252032520326e-05, "loss": 0.0776, "step": 25790 }, { "epoch": 0.3145731707317073, "grad_norm": 0.6092793345451355, "learning_rate": 1.7902845528455284e-05, "loss": 0.1066, "step": 25795 }, { "epoch": 0.3146341463414634, "grad_norm": 1.6636136770248413, "learning_rate": 1.7902439024390246e-05, "loss": 0.1014, "step": 25800 }, { "epoch": 0.3146951219512195, "grad_norm": 0.8832182288169861, "learning_rate": 1.7902032520325204e-05, "loss": 0.1008, "step": 25805 }, { "epoch": 0.3147560975609756, "grad_norm": 0.6380699276924133, "learning_rate": 1.7901626016260166e-05, "loss": 0.0722, "step": 25810 }, { "epoch": 0.3148170731707317, "grad_norm": 0.670781135559082, "learning_rate": 1.7901219512195124e-05, "loss": 0.0929, "step": 25815 }, { "epoch": 0.3148780487804878, "grad_norm": 0.737648606300354, "learning_rate": 1.7900813008130082e-05, "loss": 0.0661, "step": 25820 }, { "epoch": 0.3149390243902439, "grad_norm": 1.014365553855896, "learning_rate": 1.7900406504065043e-05, "loss": 0.0692, "step": 25825 }, { "epoch": 0.315, "grad_norm": 0.9042952060699463, "learning_rate": 1.79e-05, "loss": 0.0963, "step": 25830 }, { "epoch": 0.3150609756097561, "grad_norm": 0.7117342948913574, "learning_rate": 1.7899593495934963e-05, "loss": 0.1117, "step": 25835 }, { "epoch": 0.3151219512195122, "grad_norm": 1.1822353601455688, "learning_rate": 1.789918699186992e-05, "loss": 0.0946, "step": 25840 }, { "epoch": 0.3151829268292683, "grad_norm": 1.7626203298568726, "learning_rate": 1.789878048780488e-05, "loss": 0.0801, "step": 25845 }, { "epoch": 0.3152439024390244, "grad_norm": 0.8392936587333679, "learning_rate": 1.7898373983739837e-05, "loss": 0.0775, "step": 25850 }, { "epoch": 0.3153048780487805, "grad_norm": 0.9054762721061707, "learning_rate": 1.78979674796748e-05, "loss": 0.0772, "step": 25855 }, { "epoch": 0.3153658536585366, "grad_norm": 1.6882760524749756, "learning_rate": 1.7897560975609757e-05, "loss": 0.0734, "step": 25860 }, { "epoch": 0.3154268292682927, "grad_norm": 0.5453959703445435, "learning_rate": 1.789715447154472e-05, "loss": 0.095, "step": 25865 }, { "epoch": 0.3154878048780488, "grad_norm": 0.7704363465309143, "learning_rate": 1.7896747967479677e-05, "loss": 0.1141, "step": 25870 }, { "epoch": 0.3155487804878049, "grad_norm": 0.45479145646095276, "learning_rate": 1.7896341463414635e-05, "loss": 0.0717, "step": 25875 }, { "epoch": 0.315609756097561, "grad_norm": 0.6982016563415527, "learning_rate": 1.7895934959349596e-05, "loss": 0.0923, "step": 25880 }, { "epoch": 0.3156707317073171, "grad_norm": 0.7159556746482849, "learning_rate": 1.7895528455284554e-05, "loss": 0.0817, "step": 25885 }, { "epoch": 0.3157317073170732, "grad_norm": 1.0344910621643066, "learning_rate": 1.7895121951219512e-05, "loss": 0.1319, "step": 25890 }, { "epoch": 0.31579268292682927, "grad_norm": 0.6147760152816772, "learning_rate": 1.7894715447154474e-05, "loss": 0.0697, "step": 25895 }, { "epoch": 0.31585365853658537, "grad_norm": 0.5049393773078918, "learning_rate": 1.7894308943089432e-05, "loss": 0.0931, "step": 25900 }, { "epoch": 0.31591463414634147, "grad_norm": 0.9980780482292175, "learning_rate": 1.789390243902439e-05, "loss": 0.102, "step": 25905 }, { "epoch": 0.31597560975609756, "grad_norm": 1.4830667972564697, "learning_rate": 1.7893495934959352e-05, "loss": 0.1027, "step": 25910 }, { "epoch": 0.31603658536585366, "grad_norm": 0.5822528600692749, "learning_rate": 1.789308943089431e-05, "loss": 0.0908, "step": 25915 }, { "epoch": 0.31609756097560976, "grad_norm": 0.7168684005737305, "learning_rate": 1.789268292682927e-05, "loss": 0.0752, "step": 25920 }, { "epoch": 0.31615853658536586, "grad_norm": 1.1546939611434937, "learning_rate": 1.789227642276423e-05, "loss": 0.0873, "step": 25925 }, { "epoch": 0.31621951219512195, "grad_norm": 0.7547097206115723, "learning_rate": 1.789186991869919e-05, "loss": 0.0816, "step": 25930 }, { "epoch": 0.31628048780487805, "grad_norm": 0.8176161050796509, "learning_rate": 1.7891463414634146e-05, "loss": 0.1013, "step": 25935 }, { "epoch": 0.31634146341463415, "grad_norm": 1.1651012897491455, "learning_rate": 1.7891056910569107e-05, "loss": 0.0895, "step": 25940 }, { "epoch": 0.31640243902439025, "grad_norm": 0.4827936589717865, "learning_rate": 1.7890650406504065e-05, "loss": 0.0852, "step": 25945 }, { "epoch": 0.31646341463414634, "grad_norm": 0.9478985667228699, "learning_rate": 1.7890243902439027e-05, "loss": 0.1184, "step": 25950 }, { "epoch": 0.31652439024390244, "grad_norm": 0.6368520855903625, "learning_rate": 1.7889837398373985e-05, "loss": 0.0868, "step": 25955 }, { "epoch": 0.31658536585365854, "grad_norm": 0.6748923063278198, "learning_rate": 1.7889430894308947e-05, "loss": 0.0833, "step": 25960 }, { "epoch": 0.31664634146341464, "grad_norm": 1.1856101751327515, "learning_rate": 1.7889024390243905e-05, "loss": 0.0818, "step": 25965 }, { "epoch": 0.31670731707317074, "grad_norm": 0.4426804482936859, "learning_rate": 1.7888617886178863e-05, "loss": 0.1028, "step": 25970 }, { "epoch": 0.31676829268292683, "grad_norm": 0.8295605182647705, "learning_rate": 1.788821138211382e-05, "loss": 0.082, "step": 25975 }, { "epoch": 0.31682926829268293, "grad_norm": 0.625573992729187, "learning_rate": 1.7887804878048783e-05, "loss": 0.1012, "step": 25980 }, { "epoch": 0.31689024390243903, "grad_norm": 0.668928861618042, "learning_rate": 1.788739837398374e-05, "loss": 0.083, "step": 25985 }, { "epoch": 0.3169512195121951, "grad_norm": 0.6636958122253418, "learning_rate": 1.7886991869918702e-05, "loss": 0.0707, "step": 25990 }, { "epoch": 0.3170121951219512, "grad_norm": 1.707715630531311, "learning_rate": 1.788658536585366e-05, "loss": 0.1143, "step": 25995 }, { "epoch": 0.3170731707317073, "grad_norm": 0.505387008190155, "learning_rate": 1.788617886178862e-05, "loss": 0.0607, "step": 26000 }, { "epoch": 0.3171341463414634, "grad_norm": 0.7208840847015381, "learning_rate": 1.788577235772358e-05, "loss": 0.0821, "step": 26005 }, { "epoch": 0.3171951219512195, "grad_norm": 0.815665066242218, "learning_rate": 1.7885365853658538e-05, "loss": 0.0866, "step": 26010 }, { "epoch": 0.3172560975609756, "grad_norm": 0.91570645570755, "learning_rate": 1.78849593495935e-05, "loss": 0.0925, "step": 26015 }, { "epoch": 0.3173170731707317, "grad_norm": 0.8160760402679443, "learning_rate": 1.7884552845528458e-05, "loss": 0.0837, "step": 26020 }, { "epoch": 0.3173780487804878, "grad_norm": 1.3983290195465088, "learning_rate": 1.7884146341463416e-05, "loss": 0.1114, "step": 26025 }, { "epoch": 0.3174390243902439, "grad_norm": 0.8673959970474243, "learning_rate": 1.7883739837398374e-05, "loss": 0.1158, "step": 26030 }, { "epoch": 0.3175, "grad_norm": 1.239957571029663, "learning_rate": 1.7883333333333335e-05, "loss": 0.0986, "step": 26035 }, { "epoch": 0.3175609756097561, "grad_norm": 0.9157105088233948, "learning_rate": 1.7882926829268294e-05, "loss": 0.0866, "step": 26040 }, { "epoch": 0.3176219512195122, "grad_norm": 1.1750680208206177, "learning_rate": 1.7882520325203255e-05, "loss": 0.0834, "step": 26045 }, { "epoch": 0.3176829268292683, "grad_norm": 0.48896151781082153, "learning_rate": 1.7882113821138213e-05, "loss": 0.0989, "step": 26050 }, { "epoch": 0.3177439024390244, "grad_norm": 0.7184221744537354, "learning_rate": 1.788170731707317e-05, "loss": 0.1181, "step": 26055 }, { "epoch": 0.3178048780487805, "grad_norm": 0.7983250021934509, "learning_rate": 1.788130081300813e-05, "loss": 0.0974, "step": 26060 }, { "epoch": 0.3178658536585366, "grad_norm": 0.4563792943954468, "learning_rate": 1.788089430894309e-05, "loss": 0.0748, "step": 26065 }, { "epoch": 0.3179268292682927, "grad_norm": 0.7288339734077454, "learning_rate": 1.788048780487805e-05, "loss": 0.1137, "step": 26070 }, { "epoch": 0.3179878048780488, "grad_norm": 0.5534617304801941, "learning_rate": 1.788008130081301e-05, "loss": 0.0992, "step": 26075 }, { "epoch": 0.3180487804878049, "grad_norm": 0.7975156903266907, "learning_rate": 1.787967479674797e-05, "loss": 0.0934, "step": 26080 }, { "epoch": 0.318109756097561, "grad_norm": 1.0522953271865845, "learning_rate": 1.7879268292682927e-05, "loss": 0.0843, "step": 26085 }, { "epoch": 0.3181707317073171, "grad_norm": 0.6007835268974304, "learning_rate": 1.787886178861789e-05, "loss": 0.0689, "step": 26090 }, { "epoch": 0.3182317073170732, "grad_norm": 0.4476730525493622, "learning_rate": 1.7878455284552847e-05, "loss": 0.0961, "step": 26095 }, { "epoch": 0.3182926829268293, "grad_norm": 1.2189595699310303, "learning_rate": 1.7878048780487808e-05, "loss": 0.0957, "step": 26100 }, { "epoch": 0.31835365853658537, "grad_norm": 0.8024600148200989, "learning_rate": 1.7877642276422766e-05, "loss": 0.0974, "step": 26105 }, { "epoch": 0.31841463414634147, "grad_norm": 0.6105810403823853, "learning_rate": 1.7877235772357728e-05, "loss": 0.0856, "step": 26110 }, { "epoch": 0.31847560975609757, "grad_norm": 0.6575590372085571, "learning_rate": 1.7876829268292682e-05, "loss": 0.087, "step": 26115 }, { "epoch": 0.31853658536585366, "grad_norm": 0.7006092667579651, "learning_rate": 1.7876422764227644e-05, "loss": 0.0859, "step": 26120 }, { "epoch": 0.31859756097560976, "grad_norm": 2.0084564685821533, "learning_rate": 1.7876016260162602e-05, "loss": 0.1013, "step": 26125 }, { "epoch": 0.31865853658536586, "grad_norm": 0.6838669180870056, "learning_rate": 1.7875609756097564e-05, "loss": 0.0859, "step": 26130 }, { "epoch": 0.31871951219512196, "grad_norm": 0.8866444230079651, "learning_rate": 1.7875203252032522e-05, "loss": 0.0967, "step": 26135 }, { "epoch": 0.31878048780487805, "grad_norm": 0.9110703468322754, "learning_rate": 1.7874796747967483e-05, "loss": 0.0845, "step": 26140 }, { "epoch": 0.31884146341463415, "grad_norm": 0.4984695017337799, "learning_rate": 1.787439024390244e-05, "loss": 0.124, "step": 26145 }, { "epoch": 0.31890243902439025, "grad_norm": 0.5721395015716553, "learning_rate": 1.78739837398374e-05, "loss": 0.0796, "step": 26150 }, { "epoch": 0.31896341463414635, "grad_norm": 0.6780869960784912, "learning_rate": 1.7873577235772358e-05, "loss": 0.0959, "step": 26155 }, { "epoch": 0.31902439024390244, "grad_norm": 0.7299462556838989, "learning_rate": 1.787317073170732e-05, "loss": 0.0949, "step": 26160 }, { "epoch": 0.31908536585365854, "grad_norm": 0.7904325127601624, "learning_rate": 1.7872764227642277e-05, "loss": 0.1182, "step": 26165 }, { "epoch": 0.31914634146341464, "grad_norm": 1.281327247619629, "learning_rate": 1.787235772357724e-05, "loss": 0.1118, "step": 26170 }, { "epoch": 0.31920731707317074, "grad_norm": 1.1552634239196777, "learning_rate": 1.7871951219512197e-05, "loss": 0.1008, "step": 26175 }, { "epoch": 0.31926829268292684, "grad_norm": 0.923843502998352, "learning_rate": 1.7871544715447155e-05, "loss": 0.1, "step": 26180 }, { "epoch": 0.31932926829268293, "grad_norm": 0.27244365215301514, "learning_rate": 1.7871138211382117e-05, "loss": 0.0889, "step": 26185 }, { "epoch": 0.31939024390243903, "grad_norm": 1.0582420825958252, "learning_rate": 1.7870731707317075e-05, "loss": 0.0733, "step": 26190 }, { "epoch": 0.31945121951219513, "grad_norm": 1.2597699165344238, "learning_rate": 1.7870325203252036e-05, "loss": 0.1031, "step": 26195 }, { "epoch": 0.3195121951219512, "grad_norm": 1.0445178747177124, "learning_rate": 1.7869918699186994e-05, "loss": 0.0884, "step": 26200 }, { "epoch": 0.3195731707317073, "grad_norm": 0.7300552725791931, "learning_rate": 1.7869512195121952e-05, "loss": 0.1055, "step": 26205 }, { "epoch": 0.3196341463414634, "grad_norm": 0.5642772912979126, "learning_rate": 1.786910569105691e-05, "loss": 0.0819, "step": 26210 }, { "epoch": 0.3196951219512195, "grad_norm": 0.8192737698554993, "learning_rate": 1.7868699186991872e-05, "loss": 0.0779, "step": 26215 }, { "epoch": 0.3197560975609756, "grad_norm": 1.0467358827590942, "learning_rate": 1.786829268292683e-05, "loss": 0.0729, "step": 26220 }, { "epoch": 0.3198170731707317, "grad_norm": 0.7078177332878113, "learning_rate": 1.7867886178861792e-05, "loss": 0.0824, "step": 26225 }, { "epoch": 0.3198780487804878, "grad_norm": 0.8613936901092529, "learning_rate": 1.786747967479675e-05, "loss": 0.1166, "step": 26230 }, { "epoch": 0.3199390243902439, "grad_norm": 1.3504419326782227, "learning_rate": 1.7867073170731708e-05, "loss": 0.1027, "step": 26235 }, { "epoch": 0.32, "grad_norm": 0.7298266887664795, "learning_rate": 1.7866666666666666e-05, "loss": 0.0836, "step": 26240 }, { "epoch": 0.3200609756097561, "grad_norm": 1.0694732666015625, "learning_rate": 1.7866260162601628e-05, "loss": 0.1037, "step": 26245 }, { "epoch": 0.3201219512195122, "grad_norm": 0.6104955077171326, "learning_rate": 1.7865853658536586e-05, "loss": 0.1021, "step": 26250 }, { "epoch": 0.3201829268292683, "grad_norm": 0.6707643866539001, "learning_rate": 1.7865447154471547e-05, "loss": 0.1043, "step": 26255 }, { "epoch": 0.3202439024390244, "grad_norm": 0.41915661096572876, "learning_rate": 1.7865040650406505e-05, "loss": 0.095, "step": 26260 }, { "epoch": 0.3203048780487805, "grad_norm": 1.0333058834075928, "learning_rate": 1.7864634146341464e-05, "loss": 0.0828, "step": 26265 }, { "epoch": 0.3203658536585366, "grad_norm": 0.9117358922958374, "learning_rate": 1.7864227642276425e-05, "loss": 0.1281, "step": 26270 }, { "epoch": 0.3204268292682927, "grad_norm": 0.9073169231414795, "learning_rate": 1.7863821138211383e-05, "loss": 0.1161, "step": 26275 }, { "epoch": 0.3204878048780488, "grad_norm": 1.0139479637145996, "learning_rate": 1.7863414634146345e-05, "loss": 0.1195, "step": 26280 }, { "epoch": 0.3205487804878049, "grad_norm": 0.6771650910377502, "learning_rate": 1.7863008130081303e-05, "loss": 0.0736, "step": 26285 }, { "epoch": 0.320609756097561, "grad_norm": 0.4584307074546814, "learning_rate": 1.7862601626016264e-05, "loss": 0.0845, "step": 26290 }, { "epoch": 0.3206707317073171, "grad_norm": 0.4975202679634094, "learning_rate": 1.786219512195122e-05, "loss": 0.0999, "step": 26295 }, { "epoch": 0.3207317073170732, "grad_norm": 0.4058050811290741, "learning_rate": 1.786178861788618e-05, "loss": 0.0867, "step": 26300 }, { "epoch": 0.3207926829268293, "grad_norm": 0.5411711931228638, "learning_rate": 1.786138211382114e-05, "loss": 0.0798, "step": 26305 }, { "epoch": 0.3208536585365854, "grad_norm": 0.5476614832878113, "learning_rate": 1.78609756097561e-05, "loss": 0.081, "step": 26310 }, { "epoch": 0.32091463414634147, "grad_norm": 0.7675421833992004, "learning_rate": 1.786056910569106e-05, "loss": 0.0773, "step": 26315 }, { "epoch": 0.32097560975609757, "grad_norm": 0.7636832594871521, "learning_rate": 1.786016260162602e-05, "loss": 0.1499, "step": 26320 }, { "epoch": 0.32103658536585367, "grad_norm": 0.5361010432243347, "learning_rate": 1.7859756097560975e-05, "loss": 0.0814, "step": 26325 }, { "epoch": 0.32109756097560976, "grad_norm": 0.6921032071113586, "learning_rate": 1.7859349593495936e-05, "loss": 0.1077, "step": 26330 }, { "epoch": 0.32115853658536586, "grad_norm": 0.8209506273269653, "learning_rate": 1.7858943089430894e-05, "loss": 0.0743, "step": 26335 }, { "epoch": 0.32121951219512196, "grad_norm": 1.3395389318466187, "learning_rate": 1.7858536585365856e-05, "loss": 0.122, "step": 26340 }, { "epoch": 0.32128048780487806, "grad_norm": 0.7603794932365417, "learning_rate": 1.7858130081300814e-05, "loss": 0.0837, "step": 26345 }, { "epoch": 0.32134146341463415, "grad_norm": 1.0070829391479492, "learning_rate": 1.7857723577235775e-05, "loss": 0.0893, "step": 26350 }, { "epoch": 0.32140243902439025, "grad_norm": 1.0553019046783447, "learning_rate": 1.7857317073170734e-05, "loss": 0.0759, "step": 26355 }, { "epoch": 0.32146341463414635, "grad_norm": 1.2078310251235962, "learning_rate": 1.7856910569105692e-05, "loss": 0.1001, "step": 26360 }, { "epoch": 0.32152439024390245, "grad_norm": 0.4528052508831024, "learning_rate": 1.7856504065040653e-05, "loss": 0.0802, "step": 26365 }, { "epoch": 0.32158536585365854, "grad_norm": 0.9172999262809753, "learning_rate": 1.785609756097561e-05, "loss": 0.1182, "step": 26370 }, { "epoch": 0.32164634146341464, "grad_norm": 0.8195086717605591, "learning_rate": 1.7855691056910573e-05, "loss": 0.1019, "step": 26375 }, { "epoch": 0.32170731707317074, "grad_norm": 0.7452704310417175, "learning_rate": 1.785528455284553e-05, "loss": 0.0925, "step": 26380 }, { "epoch": 0.32176829268292684, "grad_norm": 0.7382028698921204, "learning_rate": 1.785487804878049e-05, "loss": 0.0698, "step": 26385 }, { "epoch": 0.32182926829268294, "grad_norm": 1.4364607334136963, "learning_rate": 1.7854471544715447e-05, "loss": 0.0819, "step": 26390 }, { "epoch": 0.32189024390243903, "grad_norm": 0.5889428853988647, "learning_rate": 1.785406504065041e-05, "loss": 0.09, "step": 26395 }, { "epoch": 0.32195121951219513, "grad_norm": 1.3153057098388672, "learning_rate": 1.7853658536585367e-05, "loss": 0.1133, "step": 26400 }, { "epoch": 0.32201219512195123, "grad_norm": 1.339476466178894, "learning_rate": 1.785325203252033e-05, "loss": 0.1217, "step": 26405 }, { "epoch": 0.3220731707317073, "grad_norm": 0.4167296588420868, "learning_rate": 1.7852845528455287e-05, "loss": 0.0747, "step": 26410 }, { "epoch": 0.3221341463414634, "grad_norm": 1.2865132093429565, "learning_rate": 1.7852439024390245e-05, "loss": 0.0873, "step": 26415 }, { "epoch": 0.3221951219512195, "grad_norm": 1.1981329917907715, "learning_rate": 1.7852032520325203e-05, "loss": 0.1206, "step": 26420 }, { "epoch": 0.3222560975609756, "grad_norm": 1.096604824066162, "learning_rate": 1.7851626016260164e-05, "loss": 0.0821, "step": 26425 }, { "epoch": 0.3223170731707317, "grad_norm": 0.4988473355770111, "learning_rate": 1.7851219512195122e-05, "loss": 0.1116, "step": 26430 }, { "epoch": 0.3223780487804878, "grad_norm": 2.3228280544281006, "learning_rate": 1.7850813008130084e-05, "loss": 0.0769, "step": 26435 }, { "epoch": 0.3224390243902439, "grad_norm": 0.8340215086936951, "learning_rate": 1.7850406504065042e-05, "loss": 0.102, "step": 26440 }, { "epoch": 0.3225, "grad_norm": 0.6817731261253357, "learning_rate": 1.785e-05, "loss": 0.0901, "step": 26445 }, { "epoch": 0.3225609756097561, "grad_norm": 1.448581576347351, "learning_rate": 1.7849593495934962e-05, "loss": 0.0917, "step": 26450 }, { "epoch": 0.3226219512195122, "grad_norm": 0.3927624523639679, "learning_rate": 1.784918699186992e-05, "loss": 0.0785, "step": 26455 }, { "epoch": 0.3226829268292683, "grad_norm": 1.208155632019043, "learning_rate": 1.784878048780488e-05, "loss": 0.118, "step": 26460 }, { "epoch": 0.3227439024390244, "grad_norm": 0.9342003464698792, "learning_rate": 1.784837398373984e-05, "loss": 0.0741, "step": 26465 }, { "epoch": 0.3228048780487805, "grad_norm": 0.8102266788482666, "learning_rate": 1.7847967479674798e-05, "loss": 0.083, "step": 26470 }, { "epoch": 0.3228658536585366, "grad_norm": 0.5111275911331177, "learning_rate": 1.7847560975609756e-05, "loss": 0.0707, "step": 26475 }, { "epoch": 0.3229268292682927, "grad_norm": 0.7482141256332397, "learning_rate": 1.7847154471544717e-05, "loss": 0.0848, "step": 26480 }, { "epoch": 0.3229878048780488, "grad_norm": 0.9110016822814941, "learning_rate": 1.7846747967479675e-05, "loss": 0.1148, "step": 26485 }, { "epoch": 0.3230487804878049, "grad_norm": 0.9245263934135437, "learning_rate": 1.7846341463414637e-05, "loss": 0.1009, "step": 26490 }, { "epoch": 0.323109756097561, "grad_norm": 0.7516400218009949, "learning_rate": 1.7845934959349595e-05, "loss": 0.0799, "step": 26495 }, { "epoch": 0.3231707317073171, "grad_norm": 1.1584385633468628, "learning_rate": 1.7845528455284557e-05, "loss": 0.068, "step": 26500 }, { "epoch": 0.3232317073170732, "grad_norm": 1.2910012006759644, "learning_rate": 1.784512195121951e-05, "loss": 0.0957, "step": 26505 }, { "epoch": 0.3232926829268293, "grad_norm": 0.609703004360199, "learning_rate": 1.7844715447154473e-05, "loss": 0.0621, "step": 26510 }, { "epoch": 0.3233536585365854, "grad_norm": 0.7453812956809998, "learning_rate": 1.784430894308943e-05, "loss": 0.0967, "step": 26515 }, { "epoch": 0.3234146341463415, "grad_norm": 0.6470577716827393, "learning_rate": 1.7843902439024392e-05, "loss": 0.0867, "step": 26520 }, { "epoch": 0.32347560975609757, "grad_norm": 1.4681566953659058, "learning_rate": 1.784349593495935e-05, "loss": 0.0778, "step": 26525 }, { "epoch": 0.32353658536585367, "grad_norm": 0.5316154956817627, "learning_rate": 1.7843089430894312e-05, "loss": 0.0579, "step": 26530 }, { "epoch": 0.32359756097560977, "grad_norm": 0.546100914478302, "learning_rate": 1.784268292682927e-05, "loss": 0.0697, "step": 26535 }, { "epoch": 0.32365853658536586, "grad_norm": 0.8787719011306763, "learning_rate": 1.784227642276423e-05, "loss": 0.0877, "step": 26540 }, { "epoch": 0.32371951219512196, "grad_norm": 2.268620014190674, "learning_rate": 1.784186991869919e-05, "loss": 0.1164, "step": 26545 }, { "epoch": 0.32378048780487806, "grad_norm": 0.642701268196106, "learning_rate": 1.7841463414634148e-05, "loss": 0.1298, "step": 26550 }, { "epoch": 0.32384146341463416, "grad_norm": 0.7685015797615051, "learning_rate": 1.784105691056911e-05, "loss": 0.119, "step": 26555 }, { "epoch": 0.32390243902439025, "grad_norm": 0.8673955202102661, "learning_rate": 1.7840650406504068e-05, "loss": 0.1546, "step": 26560 }, { "epoch": 0.32396341463414635, "grad_norm": 1.2930617332458496, "learning_rate": 1.7840243902439026e-05, "loss": 0.1128, "step": 26565 }, { "epoch": 0.32402439024390245, "grad_norm": 0.9146379232406616, "learning_rate": 1.7839837398373984e-05, "loss": 0.1185, "step": 26570 }, { "epoch": 0.32408536585365855, "grad_norm": 0.7501164674758911, "learning_rate": 1.7839430894308945e-05, "loss": 0.0974, "step": 26575 }, { "epoch": 0.32414634146341464, "grad_norm": 0.6566312313079834, "learning_rate": 1.7839024390243904e-05, "loss": 0.0799, "step": 26580 }, { "epoch": 0.32420731707317074, "grad_norm": 0.8549196720123291, "learning_rate": 1.7838617886178865e-05, "loss": 0.0847, "step": 26585 }, { "epoch": 0.32426829268292684, "grad_norm": 0.551754355430603, "learning_rate": 1.7838211382113823e-05, "loss": 0.0727, "step": 26590 }, { "epoch": 0.32432926829268294, "grad_norm": 0.45616182684898376, "learning_rate": 1.783780487804878e-05, "loss": 0.0785, "step": 26595 }, { "epoch": 0.32439024390243903, "grad_norm": 0.8769172430038452, "learning_rate": 1.783739837398374e-05, "loss": 0.0897, "step": 26600 }, { "epoch": 0.32445121951219513, "grad_norm": 0.797808825969696, "learning_rate": 1.78369918699187e-05, "loss": 0.0944, "step": 26605 }, { "epoch": 0.32451219512195123, "grad_norm": 1.280269980430603, "learning_rate": 1.783658536585366e-05, "loss": 0.0964, "step": 26610 }, { "epoch": 0.32457317073170733, "grad_norm": 0.5888692736625671, "learning_rate": 1.783617886178862e-05, "loss": 0.0889, "step": 26615 }, { "epoch": 0.3246341463414634, "grad_norm": 0.9523584246635437, "learning_rate": 1.783577235772358e-05, "loss": 0.1026, "step": 26620 }, { "epoch": 0.3246951219512195, "grad_norm": 0.8013091087341309, "learning_rate": 1.7835365853658537e-05, "loss": 0.0929, "step": 26625 }, { "epoch": 0.3247560975609756, "grad_norm": 1.6515452861785889, "learning_rate": 1.78349593495935e-05, "loss": 0.0881, "step": 26630 }, { "epoch": 0.3248170731707317, "grad_norm": 0.7776917219161987, "learning_rate": 1.7834552845528456e-05, "loss": 0.1255, "step": 26635 }, { "epoch": 0.3248780487804878, "grad_norm": 1.4743152856826782, "learning_rate": 1.7834146341463418e-05, "loss": 0.1198, "step": 26640 }, { "epoch": 0.3249390243902439, "grad_norm": 0.6057190299034119, "learning_rate": 1.7833739837398376e-05, "loss": 0.0645, "step": 26645 }, { "epoch": 0.325, "grad_norm": 0.6580764055252075, "learning_rate": 1.7833333333333334e-05, "loss": 0.1287, "step": 26650 }, { "epoch": 0.3250609756097561, "grad_norm": 0.7081537842750549, "learning_rate": 1.7832926829268292e-05, "loss": 0.1429, "step": 26655 }, { "epoch": 0.3251219512195122, "grad_norm": 0.4923563003540039, "learning_rate": 1.7832520325203254e-05, "loss": 0.0856, "step": 26660 }, { "epoch": 0.3251829268292683, "grad_norm": 5.0751729011535645, "learning_rate": 1.7832113821138212e-05, "loss": 0.1188, "step": 26665 }, { "epoch": 0.3252439024390244, "grad_norm": 0.6901417970657349, "learning_rate": 1.7831707317073174e-05, "loss": 0.1008, "step": 26670 }, { "epoch": 0.3253048780487805, "grad_norm": 0.5508871674537659, "learning_rate": 1.783130081300813e-05, "loss": 0.0939, "step": 26675 }, { "epoch": 0.3253658536585366, "grad_norm": 0.6932079195976257, "learning_rate": 1.7830894308943093e-05, "loss": 0.0757, "step": 26680 }, { "epoch": 0.3254268292682927, "grad_norm": 0.47404035925865173, "learning_rate": 1.7830487804878048e-05, "loss": 0.0805, "step": 26685 }, { "epoch": 0.3254878048780488, "grad_norm": 2.669825553894043, "learning_rate": 1.783008130081301e-05, "loss": 0.1318, "step": 26690 }, { "epoch": 0.3255487804878049, "grad_norm": 0.5248145461082458, "learning_rate": 1.7829674796747968e-05, "loss": 0.086, "step": 26695 }, { "epoch": 0.325609756097561, "grad_norm": 1.2055988311767578, "learning_rate": 1.782926829268293e-05, "loss": 0.0882, "step": 26700 }, { "epoch": 0.3256707317073171, "grad_norm": 1.7178627252578735, "learning_rate": 1.7828861788617887e-05, "loss": 0.0963, "step": 26705 }, { "epoch": 0.3257317073170732, "grad_norm": 1.5583558082580566, "learning_rate": 1.782845528455285e-05, "loss": 0.1272, "step": 26710 }, { "epoch": 0.3257926829268293, "grad_norm": 0.9070881009101868, "learning_rate": 1.7828048780487807e-05, "loss": 0.0958, "step": 26715 }, { "epoch": 0.3258536585365854, "grad_norm": 1.4691208600997925, "learning_rate": 1.7827642276422765e-05, "loss": 0.0702, "step": 26720 }, { "epoch": 0.3259146341463415, "grad_norm": 0.5905617475509644, "learning_rate": 1.7827235772357726e-05, "loss": 0.0797, "step": 26725 }, { "epoch": 0.3259756097560976, "grad_norm": 0.4171326160430908, "learning_rate": 1.7826829268292685e-05, "loss": 0.1063, "step": 26730 }, { "epoch": 0.32603658536585367, "grad_norm": 1.2453488111495972, "learning_rate": 1.7826422764227643e-05, "loss": 0.0935, "step": 26735 }, { "epoch": 0.32609756097560977, "grad_norm": 0.7890763282775879, "learning_rate": 1.7826016260162604e-05, "loss": 0.0746, "step": 26740 }, { "epoch": 0.32615853658536587, "grad_norm": 1.0142940282821655, "learning_rate": 1.7825609756097562e-05, "loss": 0.1035, "step": 26745 }, { "epoch": 0.32621951219512196, "grad_norm": 0.9233768582344055, "learning_rate": 1.782520325203252e-05, "loss": 0.1004, "step": 26750 }, { "epoch": 0.32628048780487806, "grad_norm": 0.9990519285202026, "learning_rate": 1.7824796747967482e-05, "loss": 0.1181, "step": 26755 }, { "epoch": 0.32634146341463416, "grad_norm": 0.4958943724632263, "learning_rate": 1.782439024390244e-05, "loss": 0.0792, "step": 26760 }, { "epoch": 0.32640243902439026, "grad_norm": 0.7520243525505066, "learning_rate": 1.78239837398374e-05, "loss": 0.0799, "step": 26765 }, { "epoch": 0.32646341463414635, "grad_norm": 0.7507062554359436, "learning_rate": 1.782357723577236e-05, "loss": 0.0923, "step": 26770 }, { "epoch": 0.32652439024390245, "grad_norm": 1.1748607158660889, "learning_rate": 1.7823170731707318e-05, "loss": 0.0889, "step": 26775 }, { "epoch": 0.32658536585365855, "grad_norm": 0.8339112401008606, "learning_rate": 1.7822764227642276e-05, "loss": 0.0998, "step": 26780 }, { "epoch": 0.32664634146341465, "grad_norm": 0.7596907019615173, "learning_rate": 1.7822357723577238e-05, "loss": 0.0907, "step": 26785 }, { "epoch": 0.32670731707317074, "grad_norm": 0.9850394129753113, "learning_rate": 1.7821951219512196e-05, "loss": 0.0912, "step": 26790 }, { "epoch": 0.32676829268292684, "grad_norm": 1.1506776809692383, "learning_rate": 1.7821544715447157e-05, "loss": 0.0902, "step": 26795 }, { "epoch": 0.32682926829268294, "grad_norm": 0.8849437832832336, "learning_rate": 1.7821138211382115e-05, "loss": 0.0931, "step": 26800 }, { "epoch": 0.32689024390243904, "grad_norm": 0.621671736240387, "learning_rate": 1.7820731707317073e-05, "loss": 0.0821, "step": 26805 }, { "epoch": 0.32695121951219513, "grad_norm": 0.802450954914093, "learning_rate": 1.7820325203252035e-05, "loss": 0.0683, "step": 26810 }, { "epoch": 0.32701219512195123, "grad_norm": 0.8359159231185913, "learning_rate": 1.7819918699186993e-05, "loss": 0.1144, "step": 26815 }, { "epoch": 0.32707317073170733, "grad_norm": 0.7763128280639648, "learning_rate": 1.7819512195121955e-05, "loss": 0.0975, "step": 26820 }, { "epoch": 0.3271341463414634, "grad_norm": 0.49353209137916565, "learning_rate": 1.7819105691056913e-05, "loss": 0.0902, "step": 26825 }, { "epoch": 0.3271951219512195, "grad_norm": 0.7148305177688599, "learning_rate": 1.781869918699187e-05, "loss": 0.1169, "step": 26830 }, { "epoch": 0.3272560975609756, "grad_norm": 0.967762291431427, "learning_rate": 1.781829268292683e-05, "loss": 0.13, "step": 26835 }, { "epoch": 0.3273170731707317, "grad_norm": 1.1986180543899536, "learning_rate": 1.781788617886179e-05, "loss": 0.1216, "step": 26840 }, { "epoch": 0.3273780487804878, "grad_norm": 0.8414387106895447, "learning_rate": 1.781747967479675e-05, "loss": 0.0969, "step": 26845 }, { "epoch": 0.3274390243902439, "grad_norm": 0.6940974593162537, "learning_rate": 1.781707317073171e-05, "loss": 0.1362, "step": 26850 }, { "epoch": 0.3275, "grad_norm": 0.6404038071632385, "learning_rate": 1.781666666666667e-05, "loss": 0.1178, "step": 26855 }, { "epoch": 0.3275609756097561, "grad_norm": 3.2661702632904053, "learning_rate": 1.781626016260163e-05, "loss": 0.0893, "step": 26860 }, { "epoch": 0.3276219512195122, "grad_norm": 0.6284893155097961, "learning_rate": 1.7815853658536585e-05, "loss": 0.0893, "step": 26865 }, { "epoch": 0.3276829268292683, "grad_norm": 0.9940066337585449, "learning_rate": 1.7815447154471546e-05, "loss": 0.0973, "step": 26870 }, { "epoch": 0.3277439024390244, "grad_norm": 0.9721570611000061, "learning_rate": 1.7815040650406504e-05, "loss": 0.0875, "step": 26875 }, { "epoch": 0.3278048780487805, "grad_norm": 0.44038695096969604, "learning_rate": 1.7814634146341466e-05, "loss": 0.0828, "step": 26880 }, { "epoch": 0.3278658536585366, "grad_norm": 0.9469091892242432, "learning_rate": 1.7814227642276424e-05, "loss": 0.116, "step": 26885 }, { "epoch": 0.3279268292682927, "grad_norm": 0.7736899852752686, "learning_rate": 1.7813821138211385e-05, "loss": 0.0716, "step": 26890 }, { "epoch": 0.3279878048780488, "grad_norm": 0.9253453016281128, "learning_rate": 1.7813414634146343e-05, "loss": 0.0994, "step": 26895 }, { "epoch": 0.3280487804878049, "grad_norm": 0.5776302218437195, "learning_rate": 1.78130081300813e-05, "loss": 0.1087, "step": 26900 }, { "epoch": 0.328109756097561, "grad_norm": 0.7047860622406006, "learning_rate": 1.7812601626016263e-05, "loss": 0.1286, "step": 26905 }, { "epoch": 0.3281707317073171, "grad_norm": 0.6168393492698669, "learning_rate": 1.781219512195122e-05, "loss": 0.085, "step": 26910 }, { "epoch": 0.3282317073170732, "grad_norm": 0.46268922090530396, "learning_rate": 1.781178861788618e-05, "loss": 0.0822, "step": 26915 }, { "epoch": 0.3282926829268293, "grad_norm": 0.7673438191413879, "learning_rate": 1.781138211382114e-05, "loss": 0.0835, "step": 26920 }, { "epoch": 0.3283536585365854, "grad_norm": 0.6559993624687195, "learning_rate": 1.78109756097561e-05, "loss": 0.0969, "step": 26925 }, { "epoch": 0.3284146341463415, "grad_norm": 0.6198636889457703, "learning_rate": 1.7810569105691057e-05, "loss": 0.0784, "step": 26930 }, { "epoch": 0.3284756097560976, "grad_norm": 0.5115612149238586, "learning_rate": 1.781016260162602e-05, "loss": 0.1081, "step": 26935 }, { "epoch": 0.3285365853658537, "grad_norm": 0.5539209842681885, "learning_rate": 1.7809756097560977e-05, "loss": 0.1157, "step": 26940 }, { "epoch": 0.32859756097560977, "grad_norm": 0.9656920433044434, "learning_rate": 1.780934959349594e-05, "loss": 0.1088, "step": 26945 }, { "epoch": 0.32865853658536587, "grad_norm": 1.1382601261138916, "learning_rate": 1.7808943089430896e-05, "loss": 0.1001, "step": 26950 }, { "epoch": 0.32871951219512197, "grad_norm": 0.7808770537376404, "learning_rate": 1.7808536585365855e-05, "loss": 0.0832, "step": 26955 }, { "epoch": 0.32878048780487806, "grad_norm": 0.5188739895820618, "learning_rate": 1.7808130081300813e-05, "loss": 0.0572, "step": 26960 }, { "epoch": 0.32884146341463416, "grad_norm": 0.8580635190010071, "learning_rate": 1.7807723577235774e-05, "loss": 0.1069, "step": 26965 }, { "epoch": 0.32890243902439026, "grad_norm": 0.7976492047309875, "learning_rate": 1.7807317073170732e-05, "loss": 0.1023, "step": 26970 }, { "epoch": 0.32896341463414636, "grad_norm": 0.6402424573898315, "learning_rate": 1.7806910569105694e-05, "loss": 0.0873, "step": 26975 }, { "epoch": 0.32902439024390245, "grad_norm": 0.4350910186767578, "learning_rate": 1.7806504065040652e-05, "loss": 0.0606, "step": 26980 }, { "epoch": 0.32908536585365855, "grad_norm": 0.4752679467201233, "learning_rate": 1.780609756097561e-05, "loss": 0.088, "step": 26985 }, { "epoch": 0.32914634146341465, "grad_norm": 0.7927698493003845, "learning_rate": 1.780569105691057e-05, "loss": 0.0824, "step": 26990 }, { "epoch": 0.32920731707317075, "grad_norm": 1.1315439939498901, "learning_rate": 1.780528455284553e-05, "loss": 0.0756, "step": 26995 }, { "epoch": 0.32926829268292684, "grad_norm": 0.7190942764282227, "learning_rate": 1.7804878048780488e-05, "loss": 0.0906, "step": 27000 }, { "epoch": 0.32932926829268294, "grad_norm": 0.7367425560951233, "learning_rate": 1.780447154471545e-05, "loss": 0.0913, "step": 27005 }, { "epoch": 0.32939024390243904, "grad_norm": 0.451196551322937, "learning_rate": 1.7804065040650408e-05, "loss": 0.0677, "step": 27010 }, { "epoch": 0.32945121951219514, "grad_norm": 0.5235524773597717, "learning_rate": 1.7803658536585366e-05, "loss": 0.1197, "step": 27015 }, { "epoch": 0.32951219512195123, "grad_norm": 0.8057446479797363, "learning_rate": 1.7803252032520327e-05, "loss": 0.1084, "step": 27020 }, { "epoch": 0.32957317073170733, "grad_norm": 0.6578790545463562, "learning_rate": 1.7802845528455285e-05, "loss": 0.0807, "step": 27025 }, { "epoch": 0.32963414634146343, "grad_norm": 0.8405277132987976, "learning_rate": 1.7802439024390247e-05, "loss": 0.0963, "step": 27030 }, { "epoch": 0.3296951219512195, "grad_norm": 0.5782690644264221, "learning_rate": 1.7802032520325205e-05, "loss": 0.0935, "step": 27035 }, { "epoch": 0.3297560975609756, "grad_norm": 0.8173016309738159, "learning_rate": 1.7801626016260166e-05, "loss": 0.0704, "step": 27040 }, { "epoch": 0.3298170731707317, "grad_norm": 1.3135483264923096, "learning_rate": 1.780121951219512e-05, "loss": 0.097, "step": 27045 }, { "epoch": 0.3298780487804878, "grad_norm": 0.33596521615982056, "learning_rate": 1.7800813008130083e-05, "loss": 0.0759, "step": 27050 }, { "epoch": 0.3299390243902439, "grad_norm": 0.7716559767723083, "learning_rate": 1.780040650406504e-05, "loss": 0.1122, "step": 27055 }, { "epoch": 0.33, "grad_norm": 0.8466148972511292, "learning_rate": 1.7800000000000002e-05, "loss": 0.0642, "step": 27060 }, { "epoch": 0.3300609756097561, "grad_norm": 0.4343003034591675, "learning_rate": 1.779959349593496e-05, "loss": 0.0838, "step": 27065 }, { "epoch": 0.3301219512195122, "grad_norm": 0.7178182005882263, "learning_rate": 1.7799186991869922e-05, "loss": 0.0974, "step": 27070 }, { "epoch": 0.3301829268292683, "grad_norm": 0.5851691961288452, "learning_rate": 1.779878048780488e-05, "loss": 0.0948, "step": 27075 }, { "epoch": 0.3302439024390244, "grad_norm": 0.5285549163818359, "learning_rate": 1.7798373983739838e-05, "loss": 0.0935, "step": 27080 }, { "epoch": 0.3303048780487805, "grad_norm": 0.6195381283760071, "learning_rate": 1.77979674796748e-05, "loss": 0.0676, "step": 27085 }, { "epoch": 0.3303658536585366, "grad_norm": 0.7376084327697754, "learning_rate": 1.7797560975609758e-05, "loss": 0.0676, "step": 27090 }, { "epoch": 0.3304268292682927, "grad_norm": 0.484072744846344, "learning_rate": 1.7797154471544716e-05, "loss": 0.0742, "step": 27095 }, { "epoch": 0.3304878048780488, "grad_norm": 0.5724899172782898, "learning_rate": 1.7796747967479678e-05, "loss": 0.0648, "step": 27100 }, { "epoch": 0.3305487804878049, "grad_norm": 0.7431255578994751, "learning_rate": 1.7796341463414636e-05, "loss": 0.1098, "step": 27105 }, { "epoch": 0.330609756097561, "grad_norm": 0.9387930631637573, "learning_rate": 1.7795934959349594e-05, "loss": 0.0873, "step": 27110 }, { "epoch": 0.3306707317073171, "grad_norm": 0.6878333687782288, "learning_rate": 1.7795528455284555e-05, "loss": 0.0571, "step": 27115 }, { "epoch": 0.3307317073170732, "grad_norm": 2.363861560821533, "learning_rate": 1.7795121951219513e-05, "loss": 0.1106, "step": 27120 }, { "epoch": 0.3307926829268293, "grad_norm": 1.2126532793045044, "learning_rate": 1.7794715447154475e-05, "loss": 0.0817, "step": 27125 }, { "epoch": 0.3308536585365854, "grad_norm": 0.6348757743835449, "learning_rate": 1.7794308943089433e-05, "loss": 0.0825, "step": 27130 }, { "epoch": 0.3309146341463415, "grad_norm": 0.5658402442932129, "learning_rate": 1.779390243902439e-05, "loss": 0.1091, "step": 27135 }, { "epoch": 0.3309756097560976, "grad_norm": 0.7857617139816284, "learning_rate": 1.779349593495935e-05, "loss": 0.0812, "step": 27140 }, { "epoch": 0.3310365853658537, "grad_norm": 1.4914219379425049, "learning_rate": 1.779308943089431e-05, "loss": 0.1066, "step": 27145 }, { "epoch": 0.3310975609756098, "grad_norm": 0.8307737708091736, "learning_rate": 1.779268292682927e-05, "loss": 0.1173, "step": 27150 }, { "epoch": 0.33115853658536587, "grad_norm": 0.9340168237686157, "learning_rate": 1.779227642276423e-05, "loss": 0.0797, "step": 27155 }, { "epoch": 0.33121951219512197, "grad_norm": 0.7752034664154053, "learning_rate": 1.779186991869919e-05, "loss": 0.1075, "step": 27160 }, { "epoch": 0.33128048780487807, "grad_norm": 0.6024896502494812, "learning_rate": 1.7791463414634147e-05, "loss": 0.0931, "step": 27165 }, { "epoch": 0.33134146341463416, "grad_norm": 0.832426130771637, "learning_rate": 1.7791056910569108e-05, "loss": 0.1245, "step": 27170 }, { "epoch": 0.33140243902439026, "grad_norm": 0.40068212151527405, "learning_rate": 1.7790650406504066e-05, "loss": 0.0803, "step": 27175 }, { "epoch": 0.33146341463414636, "grad_norm": 0.5018469095230103, "learning_rate": 1.7790243902439025e-05, "loss": 0.0581, "step": 27180 }, { "epoch": 0.33152439024390246, "grad_norm": 0.9245477914810181, "learning_rate": 1.7789837398373986e-05, "loss": 0.07, "step": 27185 }, { "epoch": 0.33158536585365855, "grad_norm": 0.645334780216217, "learning_rate": 1.7789430894308944e-05, "loss": 0.1047, "step": 27190 }, { "epoch": 0.33164634146341465, "grad_norm": 0.7052721381187439, "learning_rate": 1.7789024390243902e-05, "loss": 0.1278, "step": 27195 }, { "epoch": 0.33170731707317075, "grad_norm": 0.957838773727417, "learning_rate": 1.7788617886178864e-05, "loss": 0.083, "step": 27200 }, { "epoch": 0.33176829268292685, "grad_norm": 0.7203360795974731, "learning_rate": 1.7788211382113822e-05, "loss": 0.0866, "step": 27205 }, { "epoch": 0.33182926829268294, "grad_norm": 0.9172192215919495, "learning_rate": 1.7787804878048783e-05, "loss": 0.0849, "step": 27210 }, { "epoch": 0.33189024390243904, "grad_norm": 0.7476688027381897, "learning_rate": 1.778739837398374e-05, "loss": 0.1245, "step": 27215 }, { "epoch": 0.33195121951219514, "grad_norm": 0.44735240936279297, "learning_rate": 1.7786991869918703e-05, "loss": 0.0624, "step": 27220 }, { "epoch": 0.33201219512195124, "grad_norm": 0.558775782585144, "learning_rate": 1.7786585365853658e-05, "loss": 0.0945, "step": 27225 }, { "epoch": 0.33207317073170733, "grad_norm": 0.8341525793075562, "learning_rate": 1.778617886178862e-05, "loss": 0.0776, "step": 27230 }, { "epoch": 0.33213414634146343, "grad_norm": 0.6449737548828125, "learning_rate": 1.7785772357723577e-05, "loss": 0.1201, "step": 27235 }, { "epoch": 0.33219512195121953, "grad_norm": 0.5158229470252991, "learning_rate": 1.778536585365854e-05, "loss": 0.1258, "step": 27240 }, { "epoch": 0.3322560975609756, "grad_norm": 0.6598734855651855, "learning_rate": 1.7784959349593497e-05, "loss": 0.0977, "step": 27245 }, { "epoch": 0.3323170731707317, "grad_norm": 0.7871481776237488, "learning_rate": 1.778455284552846e-05, "loss": 0.0969, "step": 27250 }, { "epoch": 0.3323780487804878, "grad_norm": 0.8009424805641174, "learning_rate": 1.7784146341463417e-05, "loss": 0.1117, "step": 27255 }, { "epoch": 0.3324390243902439, "grad_norm": 0.7681779861450195, "learning_rate": 1.7783739837398375e-05, "loss": 0.0754, "step": 27260 }, { "epoch": 0.3325, "grad_norm": 1.038262963294983, "learning_rate": 1.7783333333333333e-05, "loss": 0.065, "step": 27265 }, { "epoch": 0.3325609756097561, "grad_norm": 0.8526999354362488, "learning_rate": 1.7782926829268295e-05, "loss": 0.1218, "step": 27270 }, { "epoch": 0.3326219512195122, "grad_norm": 1.0177011489868164, "learning_rate": 1.7782520325203253e-05, "loss": 0.0715, "step": 27275 }, { "epoch": 0.3326829268292683, "grad_norm": 0.7060079574584961, "learning_rate": 1.7782113821138214e-05, "loss": 0.1085, "step": 27280 }, { "epoch": 0.3327439024390244, "grad_norm": 0.6510595083236694, "learning_rate": 1.7781707317073172e-05, "loss": 0.0983, "step": 27285 }, { "epoch": 0.3328048780487805, "grad_norm": 0.7424899935722351, "learning_rate": 1.778130081300813e-05, "loss": 0.074, "step": 27290 }, { "epoch": 0.3328658536585366, "grad_norm": 1.1136062145233154, "learning_rate": 1.7780894308943092e-05, "loss": 0.064, "step": 27295 }, { "epoch": 0.3329268292682927, "grad_norm": 0.6143843531608582, "learning_rate": 1.778048780487805e-05, "loss": 0.0712, "step": 27300 }, { "epoch": 0.3329878048780488, "grad_norm": 1.3397891521453857, "learning_rate": 1.778008130081301e-05, "loss": 0.0911, "step": 27305 }, { "epoch": 0.3330487804878049, "grad_norm": 1.0814374685287476, "learning_rate": 1.777967479674797e-05, "loss": 0.0632, "step": 27310 }, { "epoch": 0.333109756097561, "grad_norm": 0.6885519623756409, "learning_rate": 1.7779268292682928e-05, "loss": 0.0734, "step": 27315 }, { "epoch": 0.3331707317073171, "grad_norm": 0.723943293094635, "learning_rate": 1.7778861788617886e-05, "loss": 0.0914, "step": 27320 }, { "epoch": 0.3332317073170732, "grad_norm": 2.189967632293701, "learning_rate": 1.7778455284552848e-05, "loss": 0.1105, "step": 27325 }, { "epoch": 0.3332926829268293, "grad_norm": 0.7832708954811096, "learning_rate": 1.7778048780487806e-05, "loss": 0.1175, "step": 27330 }, { "epoch": 0.3333536585365854, "grad_norm": 0.830267071723938, "learning_rate": 1.7777642276422767e-05, "loss": 0.0886, "step": 27335 }, { "epoch": 0.3334146341463415, "grad_norm": 0.9369329214096069, "learning_rate": 1.7777235772357725e-05, "loss": 0.0678, "step": 27340 }, { "epoch": 0.3334756097560976, "grad_norm": 0.8601483702659607, "learning_rate": 1.7776829268292683e-05, "loss": 0.1043, "step": 27345 }, { "epoch": 0.3335365853658537, "grad_norm": 0.8297780752182007, "learning_rate": 1.7776422764227645e-05, "loss": 0.0756, "step": 27350 }, { "epoch": 0.3335975609756098, "grad_norm": 0.7231934070587158, "learning_rate": 1.7776016260162603e-05, "loss": 0.0578, "step": 27355 }, { "epoch": 0.3336585365853659, "grad_norm": 1.1399587392807007, "learning_rate": 1.777560975609756e-05, "loss": 0.1231, "step": 27360 }, { "epoch": 0.33371951219512197, "grad_norm": 0.4487929344177246, "learning_rate": 1.7775203252032523e-05, "loss": 0.0592, "step": 27365 }, { "epoch": 0.33378048780487807, "grad_norm": 0.8142231702804565, "learning_rate": 1.777479674796748e-05, "loss": 0.0778, "step": 27370 }, { "epoch": 0.33384146341463417, "grad_norm": 0.6342450976371765, "learning_rate": 1.777439024390244e-05, "loss": 0.0746, "step": 27375 }, { "epoch": 0.33390243902439026, "grad_norm": 1.2753469944000244, "learning_rate": 1.77739837398374e-05, "loss": 0.1142, "step": 27380 }, { "epoch": 0.33396341463414636, "grad_norm": 1.6507889032363892, "learning_rate": 1.777357723577236e-05, "loss": 0.1152, "step": 27385 }, { "epoch": 0.33402439024390246, "grad_norm": 0.9134240746498108, "learning_rate": 1.777317073170732e-05, "loss": 0.0858, "step": 27390 }, { "epoch": 0.33408536585365856, "grad_norm": 0.4864073395729065, "learning_rate": 1.7772764227642278e-05, "loss": 0.1126, "step": 27395 }, { "epoch": 0.33414634146341465, "grad_norm": 0.5947949290275574, "learning_rate": 1.777235772357724e-05, "loss": 0.0822, "step": 27400 }, { "epoch": 0.33420731707317075, "grad_norm": 0.48931100964546204, "learning_rate": 1.7771951219512194e-05, "loss": 0.1002, "step": 27405 }, { "epoch": 0.33426829268292685, "grad_norm": 0.4702785015106201, "learning_rate": 1.7771544715447156e-05, "loss": 0.0819, "step": 27410 }, { "epoch": 0.33432926829268295, "grad_norm": 1.0442310571670532, "learning_rate": 1.7771138211382114e-05, "loss": 0.0662, "step": 27415 }, { "epoch": 0.33439024390243904, "grad_norm": 0.32201695442199707, "learning_rate": 1.7770731707317076e-05, "loss": 0.0698, "step": 27420 }, { "epoch": 0.33445121951219514, "grad_norm": 0.5852963328361511, "learning_rate": 1.7770325203252034e-05, "loss": 0.1221, "step": 27425 }, { "epoch": 0.33451219512195124, "grad_norm": 0.33982816338539124, "learning_rate": 1.7769918699186995e-05, "loss": 0.1369, "step": 27430 }, { "epoch": 0.33457317073170734, "grad_norm": 1.1236947774887085, "learning_rate": 1.7769512195121953e-05, "loss": 0.083, "step": 27435 }, { "epoch": 0.33463414634146343, "grad_norm": 0.42331188917160034, "learning_rate": 1.776910569105691e-05, "loss": 0.0701, "step": 27440 }, { "epoch": 0.33469512195121953, "grad_norm": 1.2639806270599365, "learning_rate": 1.776869918699187e-05, "loss": 0.1023, "step": 27445 }, { "epoch": 0.33475609756097563, "grad_norm": 0.3665170967578888, "learning_rate": 1.776829268292683e-05, "loss": 0.0861, "step": 27450 }, { "epoch": 0.3348170731707317, "grad_norm": 0.3611066937446594, "learning_rate": 1.776788617886179e-05, "loss": 0.0793, "step": 27455 }, { "epoch": 0.3348780487804878, "grad_norm": 0.974450409412384, "learning_rate": 1.776747967479675e-05, "loss": 0.0704, "step": 27460 }, { "epoch": 0.3349390243902439, "grad_norm": 0.590744137763977, "learning_rate": 1.776707317073171e-05, "loss": 0.0985, "step": 27465 }, { "epoch": 0.335, "grad_norm": 0.9410637617111206, "learning_rate": 1.7766666666666667e-05, "loss": 0.0808, "step": 27470 }, { "epoch": 0.3350609756097561, "grad_norm": 0.5582932829856873, "learning_rate": 1.776626016260163e-05, "loss": 0.0994, "step": 27475 }, { "epoch": 0.3351219512195122, "grad_norm": 2.5363950729370117, "learning_rate": 1.7765853658536587e-05, "loss": 0.1007, "step": 27480 }, { "epoch": 0.3351829268292683, "grad_norm": 0.6320449709892273, "learning_rate": 1.7765447154471548e-05, "loss": 0.095, "step": 27485 }, { "epoch": 0.3352439024390244, "grad_norm": 0.7264741659164429, "learning_rate": 1.7765040650406506e-05, "loss": 0.1073, "step": 27490 }, { "epoch": 0.3353048780487805, "grad_norm": 0.37326738238334656, "learning_rate": 1.7764634146341465e-05, "loss": 0.0867, "step": 27495 }, { "epoch": 0.3353658536585366, "grad_norm": 0.5281678438186646, "learning_rate": 1.7764227642276423e-05, "loss": 0.0752, "step": 27500 }, { "epoch": 0.3354268292682927, "grad_norm": 0.829963207244873, "learning_rate": 1.7763821138211384e-05, "loss": 0.0807, "step": 27505 }, { "epoch": 0.3354878048780488, "grad_norm": 0.6851325631141663, "learning_rate": 1.7763414634146342e-05, "loss": 0.0767, "step": 27510 }, { "epoch": 0.3355487804878049, "grad_norm": 1.4266165494918823, "learning_rate": 1.7763008130081304e-05, "loss": 0.1509, "step": 27515 }, { "epoch": 0.335609756097561, "grad_norm": 0.956007719039917, "learning_rate": 1.7762601626016262e-05, "loss": 0.1334, "step": 27520 }, { "epoch": 0.3356707317073171, "grad_norm": 1.6140817403793335, "learning_rate": 1.776219512195122e-05, "loss": 0.0871, "step": 27525 }, { "epoch": 0.3357317073170732, "grad_norm": 0.8044207096099854, "learning_rate": 1.7761788617886178e-05, "loss": 0.0804, "step": 27530 }, { "epoch": 0.3357926829268293, "grad_norm": 1.5957080125808716, "learning_rate": 1.776138211382114e-05, "loss": 0.0809, "step": 27535 }, { "epoch": 0.3358536585365854, "grad_norm": 0.6158223748207092, "learning_rate": 1.7760975609756098e-05, "loss": 0.0933, "step": 27540 }, { "epoch": 0.3359146341463415, "grad_norm": 1.2020624876022339, "learning_rate": 1.776056910569106e-05, "loss": 0.1195, "step": 27545 }, { "epoch": 0.3359756097560976, "grad_norm": 1.443310260772705, "learning_rate": 1.7760162601626017e-05, "loss": 0.1045, "step": 27550 }, { "epoch": 0.3360365853658537, "grad_norm": 1.2524782419204712, "learning_rate": 1.7759756097560976e-05, "loss": 0.091, "step": 27555 }, { "epoch": 0.3360975609756098, "grad_norm": 0.5671781301498413, "learning_rate": 1.7759349593495937e-05, "loss": 0.0708, "step": 27560 }, { "epoch": 0.3361585365853659, "grad_norm": 0.4912901818752289, "learning_rate": 1.7758943089430895e-05, "loss": 0.099, "step": 27565 }, { "epoch": 0.336219512195122, "grad_norm": 0.9526167511940002, "learning_rate": 1.7758536585365857e-05, "loss": 0.1209, "step": 27570 }, { "epoch": 0.33628048780487807, "grad_norm": 0.8402352333068848, "learning_rate": 1.7758130081300815e-05, "loss": 0.0923, "step": 27575 }, { "epoch": 0.33634146341463417, "grad_norm": 0.6769715547561646, "learning_rate": 1.7757723577235776e-05, "loss": 0.0851, "step": 27580 }, { "epoch": 0.33640243902439027, "grad_norm": 0.6196650266647339, "learning_rate": 1.775731707317073e-05, "loss": 0.0799, "step": 27585 }, { "epoch": 0.33646341463414636, "grad_norm": 1.0459562540054321, "learning_rate": 1.7756910569105693e-05, "loss": 0.0887, "step": 27590 }, { "epoch": 0.33652439024390246, "grad_norm": 0.8498438000679016, "learning_rate": 1.775650406504065e-05, "loss": 0.0969, "step": 27595 }, { "epoch": 0.33658536585365856, "grad_norm": 0.41164276003837585, "learning_rate": 1.7756097560975612e-05, "loss": 0.0737, "step": 27600 }, { "epoch": 0.33664634146341466, "grad_norm": 0.6274867653846741, "learning_rate": 1.775569105691057e-05, "loss": 0.1098, "step": 27605 }, { "epoch": 0.33670731707317075, "grad_norm": 0.6432013511657715, "learning_rate": 1.7755284552845532e-05, "loss": 0.0879, "step": 27610 }, { "epoch": 0.33676829268292685, "grad_norm": 0.6050404906272888, "learning_rate": 1.7754878048780487e-05, "loss": 0.078, "step": 27615 }, { "epoch": 0.33682926829268295, "grad_norm": 2.445493698120117, "learning_rate": 1.7754471544715448e-05, "loss": 0.0614, "step": 27620 }, { "epoch": 0.33689024390243905, "grad_norm": 0.38280755281448364, "learning_rate": 1.7754065040650406e-05, "loss": 0.0926, "step": 27625 }, { "epoch": 0.33695121951219514, "grad_norm": 0.7370227575302124, "learning_rate": 1.7753658536585368e-05, "loss": 0.0934, "step": 27630 }, { "epoch": 0.33701219512195124, "grad_norm": 0.8655751943588257, "learning_rate": 1.7753252032520326e-05, "loss": 0.1325, "step": 27635 }, { "epoch": 0.33707317073170734, "grad_norm": 0.4422271251678467, "learning_rate": 1.7752845528455287e-05, "loss": 0.0876, "step": 27640 }, { "epoch": 0.33713414634146344, "grad_norm": 0.9036253094673157, "learning_rate": 1.7752439024390246e-05, "loss": 0.0825, "step": 27645 }, { "epoch": 0.33719512195121953, "grad_norm": 0.7793129086494446, "learning_rate": 1.7752032520325204e-05, "loss": 0.0771, "step": 27650 }, { "epoch": 0.33725609756097563, "grad_norm": 0.7828892469406128, "learning_rate": 1.7751626016260165e-05, "loss": 0.1139, "step": 27655 }, { "epoch": 0.33731707317073173, "grad_norm": 0.7670634984970093, "learning_rate": 1.7751219512195123e-05, "loss": 0.0718, "step": 27660 }, { "epoch": 0.3373780487804878, "grad_norm": 1.4736729860305786, "learning_rate": 1.7750813008130085e-05, "loss": 0.1062, "step": 27665 }, { "epoch": 0.3374390243902439, "grad_norm": 0.7063939571380615, "learning_rate": 1.7750406504065043e-05, "loss": 0.0647, "step": 27670 }, { "epoch": 0.3375, "grad_norm": 0.5648718476295471, "learning_rate": 1.775e-05, "loss": 0.0868, "step": 27675 }, { "epoch": 0.3375609756097561, "grad_norm": 1.0616230964660645, "learning_rate": 1.774959349593496e-05, "loss": 0.0927, "step": 27680 }, { "epoch": 0.3376219512195122, "grad_norm": 0.6929195523262024, "learning_rate": 1.774918699186992e-05, "loss": 0.1079, "step": 27685 }, { "epoch": 0.3376829268292683, "grad_norm": 1.282559871673584, "learning_rate": 1.774878048780488e-05, "loss": 0.1074, "step": 27690 }, { "epoch": 0.3377439024390244, "grad_norm": 0.6121384501457214, "learning_rate": 1.774837398373984e-05, "loss": 0.0723, "step": 27695 }, { "epoch": 0.3378048780487805, "grad_norm": 2.6153676509857178, "learning_rate": 1.77479674796748e-05, "loss": 0.0854, "step": 27700 }, { "epoch": 0.3378658536585366, "grad_norm": 1.1016638278961182, "learning_rate": 1.7747560975609757e-05, "loss": 0.083, "step": 27705 }, { "epoch": 0.3379268292682927, "grad_norm": 0.7262780666351318, "learning_rate": 1.7747154471544715e-05, "loss": 0.0737, "step": 27710 }, { "epoch": 0.3379878048780488, "grad_norm": 0.9050475358963013, "learning_rate": 1.7746747967479676e-05, "loss": 0.1009, "step": 27715 }, { "epoch": 0.3380487804878049, "grad_norm": 0.5558261275291443, "learning_rate": 1.7746341463414634e-05, "loss": 0.1116, "step": 27720 }, { "epoch": 0.338109756097561, "grad_norm": 0.8509467244148254, "learning_rate": 1.7745934959349596e-05, "loss": 0.0926, "step": 27725 }, { "epoch": 0.3381707317073171, "grad_norm": 2.232665777206421, "learning_rate": 1.7745528455284554e-05, "loss": 0.1283, "step": 27730 }, { "epoch": 0.3382317073170732, "grad_norm": 0.8759376406669617, "learning_rate": 1.7745121951219512e-05, "loss": 0.0734, "step": 27735 }, { "epoch": 0.3382926829268293, "grad_norm": 1.044783353805542, "learning_rate": 1.7744715447154474e-05, "loss": 0.1125, "step": 27740 }, { "epoch": 0.3383536585365854, "grad_norm": 0.9373496174812317, "learning_rate": 1.7744308943089432e-05, "loss": 0.1144, "step": 27745 }, { "epoch": 0.3384146341463415, "grad_norm": 0.4787384569644928, "learning_rate": 1.7743902439024393e-05, "loss": 0.094, "step": 27750 }, { "epoch": 0.3384756097560976, "grad_norm": 0.5267041921615601, "learning_rate": 1.774349593495935e-05, "loss": 0.0991, "step": 27755 }, { "epoch": 0.3385365853658537, "grad_norm": 0.9895188808441162, "learning_rate": 1.7743089430894313e-05, "loss": 0.1172, "step": 27760 }, { "epoch": 0.3385975609756098, "grad_norm": 1.0546585321426392, "learning_rate": 1.7742682926829268e-05, "loss": 0.0836, "step": 27765 }, { "epoch": 0.3386585365853659, "grad_norm": 0.6880872249603271, "learning_rate": 1.774227642276423e-05, "loss": 0.1003, "step": 27770 }, { "epoch": 0.338719512195122, "grad_norm": 0.778566837310791, "learning_rate": 1.7741869918699187e-05, "loss": 0.1218, "step": 27775 }, { "epoch": 0.33878048780487807, "grad_norm": 0.8332103490829468, "learning_rate": 1.774146341463415e-05, "loss": 0.0866, "step": 27780 }, { "epoch": 0.33884146341463417, "grad_norm": 5.0687031745910645, "learning_rate": 1.7741056910569107e-05, "loss": 0.0942, "step": 27785 }, { "epoch": 0.33890243902439027, "grad_norm": 1.2765657901763916, "learning_rate": 1.774065040650407e-05, "loss": 0.0998, "step": 27790 }, { "epoch": 0.33896341463414636, "grad_norm": 0.47887036204338074, "learning_rate": 1.7740243902439023e-05, "loss": 0.093, "step": 27795 }, { "epoch": 0.33902439024390246, "grad_norm": 0.6007306575775146, "learning_rate": 1.7739837398373985e-05, "loss": 0.0934, "step": 27800 }, { "epoch": 0.33908536585365856, "grad_norm": 0.9866592288017273, "learning_rate": 1.7739430894308943e-05, "loss": 0.0728, "step": 27805 }, { "epoch": 0.33914634146341466, "grad_norm": 0.49089717864990234, "learning_rate": 1.7739024390243904e-05, "loss": 0.0858, "step": 27810 }, { "epoch": 0.33920731707317076, "grad_norm": 1.0383610725402832, "learning_rate": 1.7738617886178863e-05, "loss": 0.1145, "step": 27815 }, { "epoch": 0.33926829268292685, "grad_norm": 0.7296965718269348, "learning_rate": 1.7738211382113824e-05, "loss": 0.0989, "step": 27820 }, { "epoch": 0.33932926829268295, "grad_norm": 0.9092379212379456, "learning_rate": 1.7737804878048782e-05, "loss": 0.0812, "step": 27825 }, { "epoch": 0.33939024390243905, "grad_norm": 1.0218262672424316, "learning_rate": 1.773739837398374e-05, "loss": 0.0721, "step": 27830 }, { "epoch": 0.33945121951219515, "grad_norm": 0.8466712832450867, "learning_rate": 1.7736991869918702e-05, "loss": 0.0665, "step": 27835 }, { "epoch": 0.33951219512195124, "grad_norm": 0.6676509380340576, "learning_rate": 1.773658536585366e-05, "loss": 0.0744, "step": 27840 }, { "epoch": 0.33957317073170734, "grad_norm": 0.8049501776695251, "learning_rate": 1.773617886178862e-05, "loss": 0.0767, "step": 27845 }, { "epoch": 0.33963414634146344, "grad_norm": 1.0522907972335815, "learning_rate": 1.773577235772358e-05, "loss": 0.1121, "step": 27850 }, { "epoch": 0.33969512195121954, "grad_norm": 1.921647310256958, "learning_rate": 1.7735365853658538e-05, "loss": 0.1172, "step": 27855 }, { "epoch": 0.33975609756097563, "grad_norm": 1.6720904111862183, "learning_rate": 1.7734959349593496e-05, "loss": 0.0919, "step": 27860 }, { "epoch": 0.33981707317073173, "grad_norm": 0.5749544501304626, "learning_rate": 1.7734552845528457e-05, "loss": 0.0822, "step": 27865 }, { "epoch": 0.33987804878048783, "grad_norm": 1.3932361602783203, "learning_rate": 1.7734146341463416e-05, "loss": 0.0814, "step": 27870 }, { "epoch": 0.3399390243902439, "grad_norm": 0.5896546840667725, "learning_rate": 1.7733739837398377e-05, "loss": 0.079, "step": 27875 }, { "epoch": 0.34, "grad_norm": 0.6591050624847412, "learning_rate": 1.7733333333333335e-05, "loss": 0.089, "step": 27880 }, { "epoch": 0.3400609756097561, "grad_norm": 0.40604713559150696, "learning_rate": 1.7732926829268293e-05, "loss": 0.061, "step": 27885 }, { "epoch": 0.3401219512195122, "grad_norm": 0.7830857634544373, "learning_rate": 1.773252032520325e-05, "loss": 0.084, "step": 27890 }, { "epoch": 0.3401829268292683, "grad_norm": 0.6762913465499878, "learning_rate": 1.7732113821138213e-05, "loss": 0.0517, "step": 27895 }, { "epoch": 0.3402439024390244, "grad_norm": 0.5653595924377441, "learning_rate": 1.773170731707317e-05, "loss": 0.0721, "step": 27900 }, { "epoch": 0.3403048780487805, "grad_norm": 0.39051100611686707, "learning_rate": 1.7731300813008133e-05, "loss": 0.1169, "step": 27905 }, { "epoch": 0.3403658536585366, "grad_norm": 0.8756635785102844, "learning_rate": 1.773089430894309e-05, "loss": 0.0685, "step": 27910 }, { "epoch": 0.3404268292682927, "grad_norm": 7.489052772521973, "learning_rate": 1.773048780487805e-05, "loss": 0.0983, "step": 27915 }, { "epoch": 0.3404878048780488, "grad_norm": 0.7326460480690002, "learning_rate": 1.773008130081301e-05, "loss": 0.0748, "step": 27920 }, { "epoch": 0.3405487804878049, "grad_norm": 0.390239417552948, "learning_rate": 1.772967479674797e-05, "loss": 0.0898, "step": 27925 }, { "epoch": 0.340609756097561, "grad_norm": 1.660915732383728, "learning_rate": 1.772926829268293e-05, "loss": 0.0824, "step": 27930 }, { "epoch": 0.3406707317073171, "grad_norm": 0.9058966040611267, "learning_rate": 1.7728861788617888e-05, "loss": 0.0863, "step": 27935 }, { "epoch": 0.3407317073170732, "grad_norm": 0.5213035345077515, "learning_rate": 1.7728455284552846e-05, "loss": 0.0749, "step": 27940 }, { "epoch": 0.3407926829268293, "grad_norm": 0.7636004686355591, "learning_rate": 1.7728048780487804e-05, "loss": 0.0886, "step": 27945 }, { "epoch": 0.3408536585365854, "grad_norm": 0.6132323145866394, "learning_rate": 1.7727642276422766e-05, "loss": 0.08, "step": 27950 }, { "epoch": 0.3409146341463415, "grad_norm": 0.76457279920578, "learning_rate": 1.7727235772357724e-05, "loss": 0.1151, "step": 27955 }, { "epoch": 0.3409756097560976, "grad_norm": 0.627835214138031, "learning_rate": 1.7726829268292686e-05, "loss": 0.1037, "step": 27960 }, { "epoch": 0.3410365853658537, "grad_norm": 0.5114283561706543, "learning_rate": 1.7726422764227644e-05, "loss": 0.1171, "step": 27965 }, { "epoch": 0.3410975609756098, "grad_norm": 0.8783074021339417, "learning_rate": 1.7726016260162605e-05, "loss": 0.0916, "step": 27970 }, { "epoch": 0.3411585365853659, "grad_norm": 1.1727526187896729, "learning_rate": 1.772560975609756e-05, "loss": 0.0914, "step": 27975 }, { "epoch": 0.341219512195122, "grad_norm": 2.7007150650024414, "learning_rate": 1.772520325203252e-05, "loss": 0.1008, "step": 27980 }, { "epoch": 0.3412804878048781, "grad_norm": 0.5447763800621033, "learning_rate": 1.772479674796748e-05, "loss": 0.0672, "step": 27985 }, { "epoch": 0.34134146341463417, "grad_norm": 1.2615855932235718, "learning_rate": 1.772439024390244e-05, "loss": 0.0819, "step": 27990 }, { "epoch": 0.34140243902439027, "grad_norm": 0.6222813725471497, "learning_rate": 1.77239837398374e-05, "loss": 0.0885, "step": 27995 }, { "epoch": 0.34146341463414637, "grad_norm": 0.7638168931007385, "learning_rate": 1.772357723577236e-05, "loss": 0.125, "step": 28000 }, { "epoch": 0.34152439024390246, "grad_norm": 0.5844576358795166, "learning_rate": 1.772317073170732e-05, "loss": 0.117, "step": 28005 }, { "epoch": 0.34158536585365856, "grad_norm": 0.4213484823703766, "learning_rate": 1.7722764227642277e-05, "loss": 0.0976, "step": 28010 }, { "epoch": 0.34164634146341466, "grad_norm": 0.8670722842216492, "learning_rate": 1.772235772357724e-05, "loss": 0.0947, "step": 28015 }, { "epoch": 0.34170731707317076, "grad_norm": 0.5533954501152039, "learning_rate": 1.7721951219512197e-05, "loss": 0.087, "step": 28020 }, { "epoch": 0.34176829268292686, "grad_norm": 1.1094874143600464, "learning_rate": 1.7721544715447155e-05, "loss": 0.1284, "step": 28025 }, { "epoch": 0.34182926829268295, "grad_norm": 0.6310766935348511, "learning_rate": 1.7721138211382116e-05, "loss": 0.0646, "step": 28030 }, { "epoch": 0.34189024390243905, "grad_norm": 0.8735092282295227, "learning_rate": 1.7720731707317074e-05, "loss": 0.1079, "step": 28035 }, { "epoch": 0.34195121951219515, "grad_norm": 0.529106080532074, "learning_rate": 1.7720325203252033e-05, "loss": 0.1233, "step": 28040 }, { "epoch": 0.34201219512195125, "grad_norm": 0.7916815876960754, "learning_rate": 1.7719918699186994e-05, "loss": 0.0751, "step": 28045 }, { "epoch": 0.34207317073170734, "grad_norm": 0.97873854637146, "learning_rate": 1.7719512195121952e-05, "loss": 0.1222, "step": 28050 }, { "epoch": 0.34213414634146344, "grad_norm": 0.7721719741821289, "learning_rate": 1.7719105691056914e-05, "loss": 0.0657, "step": 28055 }, { "epoch": 0.34219512195121954, "grad_norm": 1.104569673538208, "learning_rate": 1.7718699186991872e-05, "loss": 0.0893, "step": 28060 }, { "epoch": 0.34225609756097564, "grad_norm": 0.7686376571655273, "learning_rate": 1.771829268292683e-05, "loss": 0.1248, "step": 28065 }, { "epoch": 0.34231707317073173, "grad_norm": 0.6778883934020996, "learning_rate": 1.7717886178861788e-05, "loss": 0.0799, "step": 28070 }, { "epoch": 0.34237804878048783, "grad_norm": 0.5634225606918335, "learning_rate": 1.771747967479675e-05, "loss": 0.0817, "step": 28075 }, { "epoch": 0.34243902439024393, "grad_norm": 0.9482403993606567, "learning_rate": 1.7717073170731708e-05, "loss": 0.0775, "step": 28080 }, { "epoch": 0.3425, "grad_norm": 0.5881661772727966, "learning_rate": 1.771666666666667e-05, "loss": 0.1098, "step": 28085 }, { "epoch": 0.3425609756097561, "grad_norm": 0.7236573696136475, "learning_rate": 1.7716260162601627e-05, "loss": 0.0945, "step": 28090 }, { "epoch": 0.3426219512195122, "grad_norm": 0.459062784910202, "learning_rate": 1.7715853658536586e-05, "loss": 0.0687, "step": 28095 }, { "epoch": 0.3426829268292683, "grad_norm": 0.8723305463790894, "learning_rate": 1.7715447154471547e-05, "loss": 0.082, "step": 28100 }, { "epoch": 0.3427439024390244, "grad_norm": 1.012069582939148, "learning_rate": 1.7715040650406505e-05, "loss": 0.0599, "step": 28105 }, { "epoch": 0.3428048780487805, "grad_norm": 1.1763314008712769, "learning_rate": 1.7714634146341467e-05, "loss": 0.1041, "step": 28110 }, { "epoch": 0.3428658536585366, "grad_norm": 0.5558013916015625, "learning_rate": 1.7714227642276425e-05, "loss": 0.0609, "step": 28115 }, { "epoch": 0.3429268292682927, "grad_norm": 0.8393757939338684, "learning_rate": 1.7713821138211383e-05, "loss": 0.0787, "step": 28120 }, { "epoch": 0.3429878048780488, "grad_norm": 1.1455211639404297, "learning_rate": 1.771341463414634e-05, "loss": 0.1264, "step": 28125 }, { "epoch": 0.3430487804878049, "grad_norm": 0.6605457663536072, "learning_rate": 1.7713008130081303e-05, "loss": 0.0835, "step": 28130 }, { "epoch": 0.343109756097561, "grad_norm": 0.6255935430526733, "learning_rate": 1.771260162601626e-05, "loss": 0.1024, "step": 28135 }, { "epoch": 0.3431707317073171, "grad_norm": 1.2511452436447144, "learning_rate": 1.7712195121951222e-05, "loss": 0.0956, "step": 28140 }, { "epoch": 0.3432317073170732, "grad_norm": 0.7973867058753967, "learning_rate": 1.771178861788618e-05, "loss": 0.0992, "step": 28145 }, { "epoch": 0.3432926829268293, "grad_norm": 0.603909432888031, "learning_rate": 1.7711382113821142e-05, "loss": 0.1035, "step": 28150 }, { "epoch": 0.3433536585365854, "grad_norm": 0.9084871411323547, "learning_rate": 1.7710975609756097e-05, "loss": 0.0925, "step": 28155 }, { "epoch": 0.3434146341463415, "grad_norm": 1.58333420753479, "learning_rate": 1.7710569105691058e-05, "loss": 0.1384, "step": 28160 }, { "epoch": 0.3434756097560976, "grad_norm": 1.1064099073410034, "learning_rate": 1.7710162601626016e-05, "loss": 0.1103, "step": 28165 }, { "epoch": 0.3435365853658537, "grad_norm": 0.7303411364555359, "learning_rate": 1.7709756097560978e-05, "loss": 0.1268, "step": 28170 }, { "epoch": 0.3435975609756098, "grad_norm": 0.48635628819465637, "learning_rate": 1.7709349593495936e-05, "loss": 0.0953, "step": 28175 }, { "epoch": 0.3436585365853659, "grad_norm": 0.5440241694450378, "learning_rate": 1.7708943089430897e-05, "loss": 0.0953, "step": 28180 }, { "epoch": 0.343719512195122, "grad_norm": 0.8062171339988708, "learning_rate": 1.7708536585365856e-05, "loss": 0.0697, "step": 28185 }, { "epoch": 0.343780487804878, "grad_norm": 0.48378241062164307, "learning_rate": 1.7708130081300814e-05, "loss": 0.0691, "step": 28190 }, { "epoch": 0.3438414634146341, "grad_norm": 0.6084084510803223, "learning_rate": 1.7707723577235775e-05, "loss": 0.0819, "step": 28195 }, { "epoch": 0.3439024390243902, "grad_norm": 0.530217170715332, "learning_rate": 1.7707317073170733e-05, "loss": 0.0934, "step": 28200 }, { "epoch": 0.3439634146341463, "grad_norm": 0.6063303351402283, "learning_rate": 1.770691056910569e-05, "loss": 0.092, "step": 28205 }, { "epoch": 0.3440243902439024, "grad_norm": 0.5490875244140625, "learning_rate": 1.7706504065040653e-05, "loss": 0.0813, "step": 28210 }, { "epoch": 0.3440853658536585, "grad_norm": 0.5010896921157837, "learning_rate": 1.770609756097561e-05, "loss": 0.0777, "step": 28215 }, { "epoch": 0.3441463414634146, "grad_norm": 1.4960938692092896, "learning_rate": 1.770569105691057e-05, "loss": 0.1291, "step": 28220 }, { "epoch": 0.3442073170731707, "grad_norm": 1.364043951034546, "learning_rate": 1.770528455284553e-05, "loss": 0.0729, "step": 28225 }, { "epoch": 0.3442682926829268, "grad_norm": 0.49311569333076477, "learning_rate": 1.770487804878049e-05, "loss": 0.0695, "step": 28230 }, { "epoch": 0.3443292682926829, "grad_norm": 0.5579979419708252, "learning_rate": 1.770447154471545e-05, "loss": 0.095, "step": 28235 }, { "epoch": 0.344390243902439, "grad_norm": 0.869498610496521, "learning_rate": 1.770406504065041e-05, "loss": 0.0949, "step": 28240 }, { "epoch": 0.3444512195121951, "grad_norm": 0.9105575084686279, "learning_rate": 1.7703658536585367e-05, "loss": 0.0799, "step": 28245 }, { "epoch": 0.3445121951219512, "grad_norm": 0.771848738193512, "learning_rate": 1.7703252032520325e-05, "loss": 0.1102, "step": 28250 }, { "epoch": 0.3445731707317073, "grad_norm": 0.7649298906326294, "learning_rate": 1.7702845528455286e-05, "loss": 0.0633, "step": 28255 }, { "epoch": 0.3446341463414634, "grad_norm": 0.4198262393474579, "learning_rate": 1.7702439024390244e-05, "loss": 0.0638, "step": 28260 }, { "epoch": 0.3446951219512195, "grad_norm": 0.6870149970054626, "learning_rate": 1.7702032520325206e-05, "loss": 0.0947, "step": 28265 }, { "epoch": 0.3447560975609756, "grad_norm": 1.1510779857635498, "learning_rate": 1.7701626016260164e-05, "loss": 0.0809, "step": 28270 }, { "epoch": 0.3448170731707317, "grad_norm": 1.283565878868103, "learning_rate": 1.7701219512195122e-05, "loss": 0.0867, "step": 28275 }, { "epoch": 0.3448780487804878, "grad_norm": 0.9584017992019653, "learning_rate": 1.7700813008130084e-05, "loss": 0.078, "step": 28280 }, { "epoch": 0.3449390243902439, "grad_norm": 0.7895310521125793, "learning_rate": 1.7700406504065042e-05, "loss": 0.0995, "step": 28285 }, { "epoch": 0.345, "grad_norm": 0.6404832601547241, "learning_rate": 1.77e-05, "loss": 0.0886, "step": 28290 }, { "epoch": 0.34506097560975607, "grad_norm": 0.3963833451271057, "learning_rate": 1.769959349593496e-05, "loss": 0.0673, "step": 28295 }, { "epoch": 0.34512195121951217, "grad_norm": 0.6589000821113586, "learning_rate": 1.769918699186992e-05, "loss": 0.1149, "step": 28300 }, { "epoch": 0.34518292682926827, "grad_norm": 1.1309481859207153, "learning_rate": 1.7698780487804878e-05, "loss": 0.1073, "step": 28305 }, { "epoch": 0.34524390243902436, "grad_norm": 0.7490445375442505, "learning_rate": 1.769837398373984e-05, "loss": 0.0845, "step": 28310 }, { "epoch": 0.34530487804878046, "grad_norm": 0.8335891962051392, "learning_rate": 1.7697967479674797e-05, "loss": 0.104, "step": 28315 }, { "epoch": 0.34536585365853656, "grad_norm": 0.7134566903114319, "learning_rate": 1.769756097560976e-05, "loss": 0.0776, "step": 28320 }, { "epoch": 0.34542682926829266, "grad_norm": 0.6964024901390076, "learning_rate": 1.7697154471544717e-05, "loss": 0.12, "step": 28325 }, { "epoch": 0.34548780487804875, "grad_norm": 0.7161528468132019, "learning_rate": 1.769674796747968e-05, "loss": 0.0742, "step": 28330 }, { "epoch": 0.34554878048780485, "grad_norm": 1.0020090341567993, "learning_rate": 1.7696341463414633e-05, "loss": 0.1104, "step": 28335 }, { "epoch": 0.34560975609756095, "grad_norm": 0.6033317446708679, "learning_rate": 1.7695934959349595e-05, "loss": 0.087, "step": 28340 }, { "epoch": 0.34567073170731705, "grad_norm": 1.9201489686965942, "learning_rate": 1.7695528455284553e-05, "loss": 0.1069, "step": 28345 }, { "epoch": 0.34573170731707314, "grad_norm": 0.66215580701828, "learning_rate": 1.7695121951219514e-05, "loss": 0.1089, "step": 28350 }, { "epoch": 0.34579268292682924, "grad_norm": 0.6898285746574402, "learning_rate": 1.7694715447154473e-05, "loss": 0.0896, "step": 28355 }, { "epoch": 0.34585365853658534, "grad_norm": 0.47195544838905334, "learning_rate": 1.7694308943089434e-05, "loss": 0.0815, "step": 28360 }, { "epoch": 0.34591463414634144, "grad_norm": 0.6970357894897461, "learning_rate": 1.7693902439024392e-05, "loss": 0.1076, "step": 28365 }, { "epoch": 0.34597560975609754, "grad_norm": 0.8254712224006653, "learning_rate": 1.769349593495935e-05, "loss": 0.0822, "step": 28370 }, { "epoch": 0.34603658536585363, "grad_norm": 0.9274221658706665, "learning_rate": 1.7693089430894312e-05, "loss": 0.1178, "step": 28375 }, { "epoch": 0.34609756097560973, "grad_norm": 0.6568260788917542, "learning_rate": 1.769268292682927e-05, "loss": 0.094, "step": 28380 }, { "epoch": 0.34615853658536583, "grad_norm": 0.38990160822868347, "learning_rate": 1.7692276422764228e-05, "loss": 0.066, "step": 28385 }, { "epoch": 0.3462195121951219, "grad_norm": 0.6593577265739441, "learning_rate": 1.769186991869919e-05, "loss": 0.1316, "step": 28390 }, { "epoch": 0.346280487804878, "grad_norm": 0.8017472624778748, "learning_rate": 1.7691463414634148e-05, "loss": 0.091, "step": 28395 }, { "epoch": 0.3463414634146341, "grad_norm": 0.7763928771018982, "learning_rate": 1.7691056910569106e-05, "loss": 0.1441, "step": 28400 }, { "epoch": 0.3464024390243902, "grad_norm": 0.49823567271232605, "learning_rate": 1.7690650406504067e-05, "loss": 0.0757, "step": 28405 }, { "epoch": 0.3464634146341463, "grad_norm": 0.836531400680542, "learning_rate": 1.7690243902439025e-05, "loss": 0.114, "step": 28410 }, { "epoch": 0.3465243902439024, "grad_norm": 1.1287236213684082, "learning_rate": 1.7689837398373987e-05, "loss": 0.104, "step": 28415 }, { "epoch": 0.3465853658536585, "grad_norm": 0.6518543362617493, "learning_rate": 1.7689430894308945e-05, "loss": 0.1061, "step": 28420 }, { "epoch": 0.3466463414634146, "grad_norm": 0.576859712600708, "learning_rate": 1.7689024390243903e-05, "loss": 0.085, "step": 28425 }, { "epoch": 0.3467073170731707, "grad_norm": 0.5922055244445801, "learning_rate": 1.768861788617886e-05, "loss": 0.1405, "step": 28430 }, { "epoch": 0.3467682926829268, "grad_norm": 0.7701317071914673, "learning_rate": 1.7688211382113823e-05, "loss": 0.0753, "step": 28435 }, { "epoch": 0.3468292682926829, "grad_norm": 0.6035706400871277, "learning_rate": 1.768780487804878e-05, "loss": 0.0693, "step": 28440 }, { "epoch": 0.346890243902439, "grad_norm": 0.5728000998497009, "learning_rate": 1.7687398373983743e-05, "loss": 0.0766, "step": 28445 }, { "epoch": 0.3469512195121951, "grad_norm": 0.558555543422699, "learning_rate": 1.76869918699187e-05, "loss": 0.082, "step": 28450 }, { "epoch": 0.3470121951219512, "grad_norm": 0.7230772376060486, "learning_rate": 1.768658536585366e-05, "loss": 0.0691, "step": 28455 }, { "epoch": 0.3470731707317073, "grad_norm": 3.203193426132202, "learning_rate": 1.768617886178862e-05, "loss": 0.0842, "step": 28460 }, { "epoch": 0.3471341463414634, "grad_norm": 0.41479507088661194, "learning_rate": 1.768577235772358e-05, "loss": 0.0606, "step": 28465 }, { "epoch": 0.3471951219512195, "grad_norm": 0.49252480268478394, "learning_rate": 1.7685365853658537e-05, "loss": 0.0693, "step": 28470 }, { "epoch": 0.3472560975609756, "grad_norm": 0.666436493396759, "learning_rate": 1.7684959349593498e-05, "loss": 0.0819, "step": 28475 }, { "epoch": 0.3473170731707317, "grad_norm": 1.7462668418884277, "learning_rate": 1.7684552845528456e-05, "loss": 0.1255, "step": 28480 }, { "epoch": 0.3473780487804878, "grad_norm": 1.0872942209243774, "learning_rate": 1.7684146341463414e-05, "loss": 0.0707, "step": 28485 }, { "epoch": 0.3474390243902439, "grad_norm": 0.6393460631370544, "learning_rate": 1.7683739837398376e-05, "loss": 0.0567, "step": 28490 }, { "epoch": 0.3475, "grad_norm": 1.1212176084518433, "learning_rate": 1.7683333333333334e-05, "loss": 0.0934, "step": 28495 }, { "epoch": 0.3475609756097561, "grad_norm": 0.7125098705291748, "learning_rate": 1.7682926829268296e-05, "loss": 0.0933, "step": 28500 }, { "epoch": 0.34762195121951217, "grad_norm": 0.5496459603309631, "learning_rate": 1.7682520325203254e-05, "loss": 0.0646, "step": 28505 }, { "epoch": 0.34768292682926827, "grad_norm": 1.0349783897399902, "learning_rate": 1.7682113821138215e-05, "loss": 0.0879, "step": 28510 }, { "epoch": 0.34774390243902437, "grad_norm": 0.3881677985191345, "learning_rate": 1.768170731707317e-05, "loss": 0.0782, "step": 28515 }, { "epoch": 0.34780487804878046, "grad_norm": 0.9937903881072998, "learning_rate": 1.768130081300813e-05, "loss": 0.0888, "step": 28520 }, { "epoch": 0.34786585365853656, "grad_norm": 0.8998527526855469, "learning_rate": 1.768089430894309e-05, "loss": 0.0858, "step": 28525 }, { "epoch": 0.34792682926829266, "grad_norm": 0.7800886034965515, "learning_rate": 1.768048780487805e-05, "loss": 0.0624, "step": 28530 }, { "epoch": 0.34798780487804876, "grad_norm": 0.872780978679657, "learning_rate": 1.768008130081301e-05, "loss": 0.0825, "step": 28535 }, { "epoch": 0.34804878048780485, "grad_norm": 0.45218774676322937, "learning_rate": 1.767967479674797e-05, "loss": 0.0862, "step": 28540 }, { "epoch": 0.34810975609756095, "grad_norm": 1.242513656616211, "learning_rate": 1.767926829268293e-05, "loss": 0.0862, "step": 28545 }, { "epoch": 0.34817073170731705, "grad_norm": 0.9201893210411072, "learning_rate": 1.7678861788617887e-05, "loss": 0.0886, "step": 28550 }, { "epoch": 0.34823170731707315, "grad_norm": 0.7068232297897339, "learning_rate": 1.7678455284552845e-05, "loss": 0.0681, "step": 28555 }, { "epoch": 0.34829268292682924, "grad_norm": 0.7424414157867432, "learning_rate": 1.7678048780487807e-05, "loss": 0.0633, "step": 28560 }, { "epoch": 0.34835365853658534, "grad_norm": 0.4922287166118622, "learning_rate": 1.7677642276422765e-05, "loss": 0.058, "step": 28565 }, { "epoch": 0.34841463414634144, "grad_norm": 0.6111693382263184, "learning_rate": 1.7677235772357726e-05, "loss": 0.1406, "step": 28570 }, { "epoch": 0.34847560975609754, "grad_norm": 0.9920800924301147, "learning_rate": 1.7676829268292684e-05, "loss": 0.1233, "step": 28575 }, { "epoch": 0.34853658536585364, "grad_norm": 0.4479232728481293, "learning_rate": 1.7676422764227642e-05, "loss": 0.0662, "step": 28580 }, { "epoch": 0.34859756097560973, "grad_norm": 0.6840654015541077, "learning_rate": 1.7676016260162604e-05, "loss": 0.0739, "step": 28585 }, { "epoch": 0.34865853658536583, "grad_norm": 0.2698759436607361, "learning_rate": 1.7675609756097562e-05, "loss": 0.101, "step": 28590 }, { "epoch": 0.34871951219512193, "grad_norm": 0.805444061756134, "learning_rate": 1.7675203252032524e-05, "loss": 0.099, "step": 28595 }, { "epoch": 0.348780487804878, "grad_norm": 1.5713075399398804, "learning_rate": 1.7674796747967482e-05, "loss": 0.1486, "step": 28600 }, { "epoch": 0.3488414634146341, "grad_norm": 0.681243360042572, "learning_rate": 1.767439024390244e-05, "loss": 0.0797, "step": 28605 }, { "epoch": 0.3489024390243902, "grad_norm": 1.1491317749023438, "learning_rate": 1.7673983739837398e-05, "loss": 0.0818, "step": 28610 }, { "epoch": 0.3489634146341463, "grad_norm": 0.3963698148727417, "learning_rate": 1.767357723577236e-05, "loss": 0.0713, "step": 28615 }, { "epoch": 0.3490243902439024, "grad_norm": 0.6360049247741699, "learning_rate": 1.7673170731707318e-05, "loss": 0.1007, "step": 28620 }, { "epoch": 0.3490853658536585, "grad_norm": 0.7738233804702759, "learning_rate": 1.767276422764228e-05, "loss": 0.0758, "step": 28625 }, { "epoch": 0.3491463414634146, "grad_norm": 0.5548813343048096, "learning_rate": 1.7672357723577237e-05, "loss": 0.0838, "step": 28630 }, { "epoch": 0.3492073170731707, "grad_norm": 0.3519914448261261, "learning_rate": 1.7671951219512195e-05, "loss": 0.0888, "step": 28635 }, { "epoch": 0.3492682926829268, "grad_norm": 1.308089017868042, "learning_rate": 1.7671544715447157e-05, "loss": 0.1064, "step": 28640 }, { "epoch": 0.3493292682926829, "grad_norm": 0.6262319087982178, "learning_rate": 1.7671138211382115e-05, "loss": 0.0963, "step": 28645 }, { "epoch": 0.349390243902439, "grad_norm": 0.6693112254142761, "learning_rate": 1.7670731707317073e-05, "loss": 0.1074, "step": 28650 }, { "epoch": 0.3494512195121951, "grad_norm": 0.7315513491630554, "learning_rate": 1.7670325203252035e-05, "loss": 0.0537, "step": 28655 }, { "epoch": 0.3495121951219512, "grad_norm": 0.5615357160568237, "learning_rate": 1.7669918699186993e-05, "loss": 0.0736, "step": 28660 }, { "epoch": 0.3495731707317073, "grad_norm": 0.5126487612724304, "learning_rate": 1.766951219512195e-05, "loss": 0.0772, "step": 28665 }, { "epoch": 0.3496341463414634, "grad_norm": 0.5108616352081299, "learning_rate": 1.7669105691056913e-05, "loss": 0.0853, "step": 28670 }, { "epoch": 0.3496951219512195, "grad_norm": 1.3523306846618652, "learning_rate": 1.766869918699187e-05, "loss": 0.0918, "step": 28675 }, { "epoch": 0.3497560975609756, "grad_norm": 0.7725014686584473, "learning_rate": 1.7668292682926832e-05, "loss": 0.0826, "step": 28680 }, { "epoch": 0.3498170731707317, "grad_norm": 0.7960441708564758, "learning_rate": 1.766788617886179e-05, "loss": 0.0671, "step": 28685 }, { "epoch": 0.3498780487804878, "grad_norm": 0.49852246046066284, "learning_rate": 1.7667479674796752e-05, "loss": 0.0654, "step": 28690 }, { "epoch": 0.3499390243902439, "grad_norm": 0.4366828501224518, "learning_rate": 1.7667073170731707e-05, "loss": 0.1122, "step": 28695 }, { "epoch": 0.35, "grad_norm": 0.933332085609436, "learning_rate": 1.7666666666666668e-05, "loss": 0.0918, "step": 28700 }, { "epoch": 0.3500609756097561, "grad_norm": 0.41787612438201904, "learning_rate": 1.7666260162601626e-05, "loss": 0.0697, "step": 28705 }, { "epoch": 0.3501219512195122, "grad_norm": 0.6665401458740234, "learning_rate": 1.7665853658536588e-05, "loss": 0.0715, "step": 28710 }, { "epoch": 0.35018292682926827, "grad_norm": 0.9188157916069031, "learning_rate": 1.7665447154471546e-05, "loss": 0.0894, "step": 28715 }, { "epoch": 0.35024390243902437, "grad_norm": 0.5611040592193604, "learning_rate": 1.7665040650406507e-05, "loss": 0.0885, "step": 28720 }, { "epoch": 0.35030487804878047, "grad_norm": 0.6212282776832581, "learning_rate": 1.7664634146341465e-05, "loss": 0.0752, "step": 28725 }, { "epoch": 0.35036585365853656, "grad_norm": 0.47383713722229004, "learning_rate": 1.7664227642276424e-05, "loss": 0.0644, "step": 28730 }, { "epoch": 0.35042682926829266, "grad_norm": 0.34849196672439575, "learning_rate": 1.7663821138211382e-05, "loss": 0.0749, "step": 28735 }, { "epoch": 0.35048780487804876, "grad_norm": 1.0365619659423828, "learning_rate": 1.7663414634146343e-05, "loss": 0.0816, "step": 28740 }, { "epoch": 0.35054878048780486, "grad_norm": 1.0845128297805786, "learning_rate": 1.76630081300813e-05, "loss": 0.1069, "step": 28745 }, { "epoch": 0.35060975609756095, "grad_norm": 0.8769921660423279, "learning_rate": 1.7662601626016263e-05, "loss": 0.0897, "step": 28750 }, { "epoch": 0.35067073170731705, "grad_norm": 0.3957565724849701, "learning_rate": 1.766219512195122e-05, "loss": 0.0863, "step": 28755 }, { "epoch": 0.35073170731707315, "grad_norm": 1.894196629524231, "learning_rate": 1.766178861788618e-05, "loss": 0.1289, "step": 28760 }, { "epoch": 0.35079268292682925, "grad_norm": 0.46962741017341614, "learning_rate": 1.766138211382114e-05, "loss": 0.0772, "step": 28765 }, { "epoch": 0.35085365853658534, "grad_norm": 0.7639976143836975, "learning_rate": 1.76609756097561e-05, "loss": 0.0917, "step": 28770 }, { "epoch": 0.35091463414634144, "grad_norm": 0.49298691749572754, "learning_rate": 1.766056910569106e-05, "loss": 0.0814, "step": 28775 }, { "epoch": 0.35097560975609754, "grad_norm": 0.6847543716430664, "learning_rate": 1.766016260162602e-05, "loss": 0.0863, "step": 28780 }, { "epoch": 0.35103658536585364, "grad_norm": 0.5050485134124756, "learning_rate": 1.7659756097560977e-05, "loss": 0.0667, "step": 28785 }, { "epoch": 0.35109756097560973, "grad_norm": 1.1616110801696777, "learning_rate": 1.7659349593495935e-05, "loss": 0.1096, "step": 28790 }, { "epoch": 0.35115853658536583, "grad_norm": 1.0235778093338013, "learning_rate": 1.7658943089430896e-05, "loss": 0.1129, "step": 28795 }, { "epoch": 0.35121951219512193, "grad_norm": 0.6921976804733276, "learning_rate": 1.7658536585365854e-05, "loss": 0.0592, "step": 28800 }, { "epoch": 0.351280487804878, "grad_norm": 1.6478866338729858, "learning_rate": 1.7658130081300816e-05, "loss": 0.1136, "step": 28805 }, { "epoch": 0.3513414634146341, "grad_norm": 1.0044044256210327, "learning_rate": 1.7657723577235774e-05, "loss": 0.1107, "step": 28810 }, { "epoch": 0.3514024390243902, "grad_norm": 0.5795383453369141, "learning_rate": 1.7657317073170732e-05, "loss": 0.0669, "step": 28815 }, { "epoch": 0.3514634146341463, "grad_norm": 1.3154137134552002, "learning_rate": 1.765691056910569e-05, "loss": 0.1333, "step": 28820 }, { "epoch": 0.3515243902439024, "grad_norm": 0.6003052592277527, "learning_rate": 1.7656504065040652e-05, "loss": 0.0758, "step": 28825 }, { "epoch": 0.3515853658536585, "grad_norm": 1.2167338132858276, "learning_rate": 1.765609756097561e-05, "loss": 0.0816, "step": 28830 }, { "epoch": 0.3516463414634146, "grad_norm": 0.5441177487373352, "learning_rate": 1.765569105691057e-05, "loss": 0.0997, "step": 28835 }, { "epoch": 0.3517073170731707, "grad_norm": 1.0784080028533936, "learning_rate": 1.765528455284553e-05, "loss": 0.0851, "step": 28840 }, { "epoch": 0.3517682926829268, "grad_norm": 1.7498199939727783, "learning_rate": 1.7654878048780488e-05, "loss": 0.148, "step": 28845 }, { "epoch": 0.3518292682926829, "grad_norm": 0.649256706237793, "learning_rate": 1.765447154471545e-05, "loss": 0.0699, "step": 28850 }, { "epoch": 0.351890243902439, "grad_norm": 0.6534109711647034, "learning_rate": 1.7654065040650407e-05, "loss": 0.0853, "step": 28855 }, { "epoch": 0.3519512195121951, "grad_norm": 0.8121623396873474, "learning_rate": 1.765365853658537e-05, "loss": 0.0828, "step": 28860 }, { "epoch": 0.3520121951219512, "grad_norm": 0.8965640068054199, "learning_rate": 1.7653252032520327e-05, "loss": 0.0729, "step": 28865 }, { "epoch": 0.3520731707317073, "grad_norm": 0.24057583510875702, "learning_rate": 1.765284552845529e-05, "loss": 0.0821, "step": 28870 }, { "epoch": 0.3521341463414634, "grad_norm": 1.2705715894699097, "learning_rate": 1.7652439024390243e-05, "loss": 0.0731, "step": 28875 }, { "epoch": 0.3521951219512195, "grad_norm": 0.3387828767299652, "learning_rate": 1.7652032520325205e-05, "loss": 0.063, "step": 28880 }, { "epoch": 0.3522560975609756, "grad_norm": 0.5669721364974976, "learning_rate": 1.7651626016260163e-05, "loss": 0.1255, "step": 28885 }, { "epoch": 0.3523170731707317, "grad_norm": 0.5900489687919617, "learning_rate": 1.7651219512195124e-05, "loss": 0.1058, "step": 28890 }, { "epoch": 0.3523780487804878, "grad_norm": 0.6698443293571472, "learning_rate": 1.7650813008130082e-05, "loss": 0.0962, "step": 28895 }, { "epoch": 0.3524390243902439, "grad_norm": 0.9527319073677063, "learning_rate": 1.7650406504065044e-05, "loss": 0.1202, "step": 28900 }, { "epoch": 0.3525, "grad_norm": 0.6664603352546692, "learning_rate": 1.7650000000000002e-05, "loss": 0.1055, "step": 28905 }, { "epoch": 0.3525609756097561, "grad_norm": 0.6196241974830627, "learning_rate": 1.764959349593496e-05, "loss": 0.0969, "step": 28910 }, { "epoch": 0.3526219512195122, "grad_norm": 0.8961361050605774, "learning_rate": 1.764918699186992e-05, "loss": 0.0529, "step": 28915 }, { "epoch": 0.3526829268292683, "grad_norm": 1.0820080041885376, "learning_rate": 1.764878048780488e-05, "loss": 0.1284, "step": 28920 }, { "epoch": 0.35274390243902437, "grad_norm": 1.4819884300231934, "learning_rate": 1.7648373983739838e-05, "loss": 0.0859, "step": 28925 }, { "epoch": 0.35280487804878047, "grad_norm": 0.6414717435836792, "learning_rate": 1.76479674796748e-05, "loss": 0.1201, "step": 28930 }, { "epoch": 0.35286585365853657, "grad_norm": 1.1522027254104614, "learning_rate": 1.7647560975609758e-05, "loss": 0.0896, "step": 28935 }, { "epoch": 0.35292682926829266, "grad_norm": 0.9944082498550415, "learning_rate": 1.7647154471544716e-05, "loss": 0.1133, "step": 28940 }, { "epoch": 0.35298780487804876, "grad_norm": 1.0650641918182373, "learning_rate": 1.7646747967479677e-05, "loss": 0.1075, "step": 28945 }, { "epoch": 0.35304878048780486, "grad_norm": 0.4983694851398468, "learning_rate": 1.7646341463414635e-05, "loss": 0.0782, "step": 28950 }, { "epoch": 0.35310975609756096, "grad_norm": 0.9587984085083008, "learning_rate": 1.7645934959349597e-05, "loss": 0.1315, "step": 28955 }, { "epoch": 0.35317073170731705, "grad_norm": 0.5111104249954224, "learning_rate": 1.7645528455284555e-05, "loss": 0.0669, "step": 28960 }, { "epoch": 0.35323170731707315, "grad_norm": 0.42578181624412537, "learning_rate": 1.7645121951219513e-05, "loss": 0.06, "step": 28965 }, { "epoch": 0.35329268292682925, "grad_norm": 0.7845242023468018, "learning_rate": 1.764471544715447e-05, "loss": 0.0806, "step": 28970 }, { "epoch": 0.35335365853658535, "grad_norm": 0.5562754273414612, "learning_rate": 1.7644308943089433e-05, "loss": 0.0634, "step": 28975 }, { "epoch": 0.35341463414634144, "grad_norm": 1.738672137260437, "learning_rate": 1.764390243902439e-05, "loss": 0.0974, "step": 28980 }, { "epoch": 0.35347560975609754, "grad_norm": 0.5604798793792725, "learning_rate": 1.7643495934959352e-05, "loss": 0.0834, "step": 28985 }, { "epoch": 0.35353658536585364, "grad_norm": 1.1224149465560913, "learning_rate": 1.764308943089431e-05, "loss": 0.0728, "step": 28990 }, { "epoch": 0.35359756097560974, "grad_norm": 1.9273464679718018, "learning_rate": 1.764268292682927e-05, "loss": 0.0982, "step": 28995 }, { "epoch": 0.35365853658536583, "grad_norm": 0.6884089112281799, "learning_rate": 1.7642276422764227e-05, "loss": 0.1264, "step": 29000 }, { "epoch": 0.35371951219512193, "grad_norm": 0.9250227212905884, "learning_rate": 1.764186991869919e-05, "loss": 0.1065, "step": 29005 }, { "epoch": 0.35378048780487803, "grad_norm": 0.6325300335884094, "learning_rate": 1.7641463414634147e-05, "loss": 0.0767, "step": 29010 }, { "epoch": 0.3538414634146341, "grad_norm": 0.6864094138145447, "learning_rate": 1.7641056910569108e-05, "loss": 0.0806, "step": 29015 }, { "epoch": 0.3539024390243902, "grad_norm": 0.6047999858856201, "learning_rate": 1.7640650406504066e-05, "loss": 0.0848, "step": 29020 }, { "epoch": 0.3539634146341463, "grad_norm": 1.2494920492172241, "learning_rate": 1.7640243902439024e-05, "loss": 0.0771, "step": 29025 }, { "epoch": 0.3540243902439024, "grad_norm": 0.7357124090194702, "learning_rate": 1.7639837398373986e-05, "loss": 0.0918, "step": 29030 }, { "epoch": 0.3540853658536585, "grad_norm": 0.800187349319458, "learning_rate": 1.7639430894308944e-05, "loss": 0.064, "step": 29035 }, { "epoch": 0.3541463414634146, "grad_norm": 0.6391848921775818, "learning_rate": 1.7639024390243905e-05, "loss": 0.0827, "step": 29040 }, { "epoch": 0.3542073170731707, "grad_norm": 0.9983208179473877, "learning_rate": 1.7638617886178864e-05, "loss": 0.0788, "step": 29045 }, { "epoch": 0.3542682926829268, "grad_norm": 0.6235866546630859, "learning_rate": 1.7638211382113825e-05, "loss": 0.0776, "step": 29050 }, { "epoch": 0.3543292682926829, "grad_norm": 1.0569900274276733, "learning_rate": 1.763780487804878e-05, "loss": 0.1036, "step": 29055 }, { "epoch": 0.354390243902439, "grad_norm": 1.833494782447815, "learning_rate": 1.763739837398374e-05, "loss": 0.1143, "step": 29060 }, { "epoch": 0.3544512195121951, "grad_norm": 0.8270975351333618, "learning_rate": 1.76369918699187e-05, "loss": 0.1076, "step": 29065 }, { "epoch": 0.3545121951219512, "grad_norm": 0.3744848668575287, "learning_rate": 1.763658536585366e-05, "loss": 0.0646, "step": 29070 }, { "epoch": 0.3545731707317073, "grad_norm": 0.716827929019928, "learning_rate": 1.763617886178862e-05, "loss": 0.0675, "step": 29075 }, { "epoch": 0.3546341463414634, "grad_norm": 0.6142649054527283, "learning_rate": 1.763577235772358e-05, "loss": 0.1363, "step": 29080 }, { "epoch": 0.3546951219512195, "grad_norm": 0.6095758080482483, "learning_rate": 1.7635365853658535e-05, "loss": 0.1033, "step": 29085 }, { "epoch": 0.3547560975609756, "grad_norm": 0.6562283635139465, "learning_rate": 1.7634959349593497e-05, "loss": 0.094, "step": 29090 }, { "epoch": 0.3548170731707317, "grad_norm": 1.065402865409851, "learning_rate": 1.7634552845528455e-05, "loss": 0.0982, "step": 29095 }, { "epoch": 0.3548780487804878, "grad_norm": 1.4640700817108154, "learning_rate": 1.7634146341463417e-05, "loss": 0.112, "step": 29100 }, { "epoch": 0.3549390243902439, "grad_norm": 1.1475772857666016, "learning_rate": 1.7633739837398375e-05, "loss": 0.0776, "step": 29105 }, { "epoch": 0.355, "grad_norm": 0.8282821178436279, "learning_rate": 1.7633333333333336e-05, "loss": 0.0794, "step": 29110 }, { "epoch": 0.3550609756097561, "grad_norm": 0.6640866994857788, "learning_rate": 1.7632926829268294e-05, "loss": 0.066, "step": 29115 }, { "epoch": 0.3551219512195122, "grad_norm": 1.126533031463623, "learning_rate": 1.7632520325203252e-05, "loss": 0.0881, "step": 29120 }, { "epoch": 0.3551829268292683, "grad_norm": 0.943587064743042, "learning_rate": 1.7632113821138214e-05, "loss": 0.117, "step": 29125 }, { "epoch": 0.3552439024390244, "grad_norm": 1.0253396034240723, "learning_rate": 1.7631707317073172e-05, "loss": 0.0809, "step": 29130 }, { "epoch": 0.35530487804878047, "grad_norm": 2.4378693103790283, "learning_rate": 1.7631300813008134e-05, "loss": 0.1421, "step": 29135 }, { "epoch": 0.35536585365853657, "grad_norm": 1.904947280883789, "learning_rate": 1.7630894308943092e-05, "loss": 0.0752, "step": 29140 }, { "epoch": 0.35542682926829267, "grad_norm": 0.6833040118217468, "learning_rate": 1.763048780487805e-05, "loss": 0.0915, "step": 29145 }, { "epoch": 0.35548780487804876, "grad_norm": 0.9512860178947449, "learning_rate": 1.7630081300813008e-05, "loss": 0.0773, "step": 29150 }, { "epoch": 0.35554878048780486, "grad_norm": 0.5248151421546936, "learning_rate": 1.762967479674797e-05, "loss": 0.0771, "step": 29155 }, { "epoch": 0.35560975609756096, "grad_norm": 0.7360613346099854, "learning_rate": 1.7629268292682928e-05, "loss": 0.0908, "step": 29160 }, { "epoch": 0.35567073170731706, "grad_norm": 0.7256472706794739, "learning_rate": 1.762886178861789e-05, "loss": 0.0968, "step": 29165 }, { "epoch": 0.35573170731707315, "grad_norm": 1.0691235065460205, "learning_rate": 1.7628455284552847e-05, "loss": 0.0674, "step": 29170 }, { "epoch": 0.35579268292682925, "grad_norm": 2.076845169067383, "learning_rate": 1.7628048780487805e-05, "loss": 0.0872, "step": 29175 }, { "epoch": 0.35585365853658535, "grad_norm": 0.5649680495262146, "learning_rate": 1.7627642276422764e-05, "loss": 0.0711, "step": 29180 }, { "epoch": 0.35591463414634145, "grad_norm": 2.1108269691467285, "learning_rate": 1.7627235772357725e-05, "loss": 0.122, "step": 29185 }, { "epoch": 0.35597560975609754, "grad_norm": 0.4686466455459595, "learning_rate": 1.7626829268292683e-05, "loss": 0.0784, "step": 29190 }, { "epoch": 0.35603658536585364, "grad_norm": 0.7833307981491089, "learning_rate": 1.7626422764227645e-05, "loss": 0.0653, "step": 29195 }, { "epoch": 0.35609756097560974, "grad_norm": 0.745834231376648, "learning_rate": 1.7626016260162603e-05, "loss": 0.0825, "step": 29200 }, { "epoch": 0.35615853658536584, "grad_norm": 0.592595100402832, "learning_rate": 1.762560975609756e-05, "loss": 0.1073, "step": 29205 }, { "epoch": 0.35621951219512193, "grad_norm": 0.8702337145805359, "learning_rate": 1.7625203252032522e-05, "loss": 0.0885, "step": 29210 }, { "epoch": 0.35628048780487803, "grad_norm": 0.773823082447052, "learning_rate": 1.762479674796748e-05, "loss": 0.092, "step": 29215 }, { "epoch": 0.35634146341463413, "grad_norm": 0.5377382040023804, "learning_rate": 1.7624390243902442e-05, "loss": 0.0663, "step": 29220 }, { "epoch": 0.3564024390243902, "grad_norm": 0.6494249701499939, "learning_rate": 1.76239837398374e-05, "loss": 0.0689, "step": 29225 }, { "epoch": 0.3564634146341463, "grad_norm": 1.2331148386001587, "learning_rate": 1.762357723577236e-05, "loss": 0.07, "step": 29230 }, { "epoch": 0.3565243902439024, "grad_norm": 0.43728870153427124, "learning_rate": 1.7623170731707316e-05, "loss": 0.0763, "step": 29235 }, { "epoch": 0.3565853658536585, "grad_norm": 1.5439196825027466, "learning_rate": 1.7622764227642278e-05, "loss": 0.0921, "step": 29240 }, { "epoch": 0.3566463414634146, "grad_norm": 0.5314050912857056, "learning_rate": 1.7622357723577236e-05, "loss": 0.0996, "step": 29245 }, { "epoch": 0.3567073170731707, "grad_norm": 0.5380570888519287, "learning_rate": 1.7621951219512198e-05, "loss": 0.0833, "step": 29250 }, { "epoch": 0.3567682926829268, "grad_norm": 0.7558435797691345, "learning_rate": 1.7621544715447156e-05, "loss": 0.1169, "step": 29255 }, { "epoch": 0.3568292682926829, "grad_norm": 1.2022491693496704, "learning_rate": 1.7621138211382117e-05, "loss": 0.0946, "step": 29260 }, { "epoch": 0.356890243902439, "grad_norm": 0.472410649061203, "learning_rate": 1.7620731707317072e-05, "loss": 0.0706, "step": 29265 }, { "epoch": 0.3569512195121951, "grad_norm": 2.5853168964385986, "learning_rate": 1.7620325203252034e-05, "loss": 0.085, "step": 29270 }, { "epoch": 0.3570121951219512, "grad_norm": 1.5760295391082764, "learning_rate": 1.761991869918699e-05, "loss": 0.0687, "step": 29275 }, { "epoch": 0.3570731707317073, "grad_norm": 0.46107685565948486, "learning_rate": 1.7619512195121953e-05, "loss": 0.0788, "step": 29280 }, { "epoch": 0.3571341463414634, "grad_norm": 1.0199263095855713, "learning_rate": 1.761910569105691e-05, "loss": 0.0941, "step": 29285 }, { "epoch": 0.3571951219512195, "grad_norm": 2.419502019882202, "learning_rate": 1.7618699186991873e-05, "loss": 0.0795, "step": 29290 }, { "epoch": 0.3572560975609756, "grad_norm": 0.8853631615638733, "learning_rate": 1.761829268292683e-05, "loss": 0.0912, "step": 29295 }, { "epoch": 0.3573170731707317, "grad_norm": 0.7718690037727356, "learning_rate": 1.761788617886179e-05, "loss": 0.1055, "step": 29300 }, { "epoch": 0.3573780487804878, "grad_norm": 0.7920199632644653, "learning_rate": 1.761747967479675e-05, "loss": 0.0878, "step": 29305 }, { "epoch": 0.3574390243902439, "grad_norm": 0.42137375473976135, "learning_rate": 1.761707317073171e-05, "loss": 0.0609, "step": 29310 }, { "epoch": 0.3575, "grad_norm": 0.5015414953231812, "learning_rate": 1.761666666666667e-05, "loss": 0.0796, "step": 29315 }, { "epoch": 0.3575609756097561, "grad_norm": 0.6728832125663757, "learning_rate": 1.761626016260163e-05, "loss": 0.1123, "step": 29320 }, { "epoch": 0.3576219512195122, "grad_norm": 1.219109058380127, "learning_rate": 1.7615853658536586e-05, "loss": 0.083, "step": 29325 }, { "epoch": 0.3576829268292683, "grad_norm": 0.8419551849365234, "learning_rate": 1.7615447154471545e-05, "loss": 0.0911, "step": 29330 }, { "epoch": 0.3577439024390244, "grad_norm": 0.6078317165374756, "learning_rate": 1.7615040650406506e-05, "loss": 0.1154, "step": 29335 }, { "epoch": 0.3578048780487805, "grad_norm": 2.306553363800049, "learning_rate": 1.7614634146341464e-05, "loss": 0.0663, "step": 29340 }, { "epoch": 0.35786585365853657, "grad_norm": 0.9258933067321777, "learning_rate": 1.7614227642276426e-05, "loss": 0.0659, "step": 29345 }, { "epoch": 0.35792682926829267, "grad_norm": 0.5463452935218811, "learning_rate": 1.7613821138211384e-05, "loss": 0.0812, "step": 29350 }, { "epoch": 0.35798780487804877, "grad_norm": 0.9106860160827637, "learning_rate": 1.7613414634146342e-05, "loss": 0.0778, "step": 29355 }, { "epoch": 0.35804878048780486, "grad_norm": 0.8852298855781555, "learning_rate": 1.76130081300813e-05, "loss": 0.0945, "step": 29360 }, { "epoch": 0.35810975609756096, "grad_norm": 0.6019974946975708, "learning_rate": 1.761260162601626e-05, "loss": 0.07, "step": 29365 }, { "epoch": 0.35817073170731706, "grad_norm": 0.7020797729492188, "learning_rate": 1.761219512195122e-05, "loss": 0.0849, "step": 29370 }, { "epoch": 0.35823170731707316, "grad_norm": 0.7070627808570862, "learning_rate": 1.761178861788618e-05, "loss": 0.0932, "step": 29375 }, { "epoch": 0.35829268292682925, "grad_norm": 1.9758470058441162, "learning_rate": 1.761138211382114e-05, "loss": 0.066, "step": 29380 }, { "epoch": 0.35835365853658535, "grad_norm": 0.4999789893627167, "learning_rate": 1.7610975609756098e-05, "loss": 0.0637, "step": 29385 }, { "epoch": 0.35841463414634145, "grad_norm": 0.7166750431060791, "learning_rate": 1.761056910569106e-05, "loss": 0.0899, "step": 29390 }, { "epoch": 0.35847560975609755, "grad_norm": 0.5930504202842712, "learning_rate": 1.7610162601626017e-05, "loss": 0.0833, "step": 29395 }, { "epoch": 0.35853658536585364, "grad_norm": 0.805559515953064, "learning_rate": 1.760975609756098e-05, "loss": 0.0934, "step": 29400 }, { "epoch": 0.35859756097560974, "grad_norm": 0.4117913246154785, "learning_rate": 1.7609349593495937e-05, "loss": 0.0804, "step": 29405 }, { "epoch": 0.35865853658536584, "grad_norm": 0.9869350790977478, "learning_rate": 1.7608943089430895e-05, "loss": 0.132, "step": 29410 }, { "epoch": 0.35871951219512194, "grad_norm": 1.8279626369476318, "learning_rate": 1.7608536585365853e-05, "loss": 0.1313, "step": 29415 }, { "epoch": 0.35878048780487803, "grad_norm": 1.1283491849899292, "learning_rate": 1.7608130081300815e-05, "loss": 0.1238, "step": 29420 }, { "epoch": 0.35884146341463413, "grad_norm": 1.187443494796753, "learning_rate": 1.7607723577235773e-05, "loss": 0.1125, "step": 29425 }, { "epoch": 0.35890243902439023, "grad_norm": 1.0214524269104004, "learning_rate": 1.7607317073170734e-05, "loss": 0.0988, "step": 29430 }, { "epoch": 0.3589634146341463, "grad_norm": 1.0423734188079834, "learning_rate": 1.7606910569105692e-05, "loss": 0.1378, "step": 29435 }, { "epoch": 0.3590243902439024, "grad_norm": 0.4234176576137543, "learning_rate": 1.7606504065040654e-05, "loss": 0.0907, "step": 29440 }, { "epoch": 0.3590853658536585, "grad_norm": 0.40562427043914795, "learning_rate": 1.760609756097561e-05, "loss": 0.0723, "step": 29445 }, { "epoch": 0.3591463414634146, "grad_norm": 1.0544418096542358, "learning_rate": 1.760569105691057e-05, "loss": 0.0762, "step": 29450 }, { "epoch": 0.3592073170731707, "grad_norm": 1.1170068979263306, "learning_rate": 1.7605284552845528e-05, "loss": 0.0627, "step": 29455 }, { "epoch": 0.3592682926829268, "grad_norm": 1.1400858163833618, "learning_rate": 1.760487804878049e-05, "loss": 0.1248, "step": 29460 }, { "epoch": 0.3593292682926829, "grad_norm": 0.642216145992279, "learning_rate": 1.7604471544715448e-05, "loss": 0.09, "step": 29465 }, { "epoch": 0.359390243902439, "grad_norm": 0.37510916590690613, "learning_rate": 1.760406504065041e-05, "loss": 0.0684, "step": 29470 }, { "epoch": 0.3594512195121951, "grad_norm": 0.5273992419242859, "learning_rate": 1.7603658536585368e-05, "loss": 0.0951, "step": 29475 }, { "epoch": 0.3595121951219512, "grad_norm": 1.0640112161636353, "learning_rate": 1.7603252032520326e-05, "loss": 0.0962, "step": 29480 }, { "epoch": 0.3595731707317073, "grad_norm": 0.6810095310211182, "learning_rate": 1.7602845528455287e-05, "loss": 0.1125, "step": 29485 }, { "epoch": 0.3596341463414634, "grad_norm": 0.6000245213508606, "learning_rate": 1.7602439024390245e-05, "loss": 0.0673, "step": 29490 }, { "epoch": 0.3596951219512195, "grad_norm": 0.9031434059143066, "learning_rate": 1.7602032520325203e-05, "loss": 0.0918, "step": 29495 }, { "epoch": 0.3597560975609756, "grad_norm": 1.0842833518981934, "learning_rate": 1.7601626016260165e-05, "loss": 0.0949, "step": 29500 }, { "epoch": 0.3598170731707317, "grad_norm": 0.6183855533599854, "learning_rate": 1.7601219512195123e-05, "loss": 0.0772, "step": 29505 }, { "epoch": 0.3598780487804878, "grad_norm": 0.4789494276046753, "learning_rate": 1.760081300813008e-05, "loss": 0.0776, "step": 29510 }, { "epoch": 0.3599390243902439, "grad_norm": 1.5469187498092651, "learning_rate": 1.7600406504065043e-05, "loss": 0.0908, "step": 29515 }, { "epoch": 0.36, "grad_norm": 0.3786306381225586, "learning_rate": 1.76e-05, "loss": 0.071, "step": 29520 }, { "epoch": 0.3600609756097561, "grad_norm": 0.6027656197547913, "learning_rate": 1.7599593495934962e-05, "loss": 0.0692, "step": 29525 }, { "epoch": 0.3601219512195122, "grad_norm": 0.9757677316665649, "learning_rate": 1.759918699186992e-05, "loss": 0.1034, "step": 29530 }, { "epoch": 0.3601829268292683, "grad_norm": 0.3647875487804413, "learning_rate": 1.759878048780488e-05, "loss": 0.0888, "step": 29535 }, { "epoch": 0.3602439024390244, "grad_norm": 0.7140775918960571, "learning_rate": 1.7598373983739837e-05, "loss": 0.0878, "step": 29540 }, { "epoch": 0.3603048780487805, "grad_norm": 0.925758957862854, "learning_rate": 1.75979674796748e-05, "loss": 0.0716, "step": 29545 }, { "epoch": 0.3603658536585366, "grad_norm": 0.6936009526252747, "learning_rate": 1.7597560975609756e-05, "loss": 0.0777, "step": 29550 }, { "epoch": 0.36042682926829267, "grad_norm": 0.6142281889915466, "learning_rate": 1.7597154471544718e-05, "loss": 0.1085, "step": 29555 }, { "epoch": 0.36048780487804877, "grad_norm": 0.767113983631134, "learning_rate": 1.7596747967479676e-05, "loss": 0.0814, "step": 29560 }, { "epoch": 0.36054878048780487, "grad_norm": 0.604967474937439, "learning_rate": 1.7596341463414634e-05, "loss": 0.1266, "step": 29565 }, { "epoch": 0.36060975609756096, "grad_norm": 0.455549031496048, "learning_rate": 1.7595934959349596e-05, "loss": 0.0613, "step": 29570 }, { "epoch": 0.36067073170731706, "grad_norm": 0.8110606074333191, "learning_rate": 1.7595528455284554e-05, "loss": 0.0981, "step": 29575 }, { "epoch": 0.36073170731707316, "grad_norm": 1.4528093338012695, "learning_rate": 1.7595121951219515e-05, "loss": 0.0616, "step": 29580 }, { "epoch": 0.36079268292682926, "grad_norm": 0.5610430240631104, "learning_rate": 1.7594715447154473e-05, "loss": 0.1161, "step": 29585 }, { "epoch": 0.36085365853658535, "grad_norm": 0.5136227607727051, "learning_rate": 1.759430894308943e-05, "loss": 0.0821, "step": 29590 }, { "epoch": 0.36091463414634145, "grad_norm": 0.7806739211082458, "learning_rate": 1.759390243902439e-05, "loss": 0.1032, "step": 29595 }, { "epoch": 0.36097560975609755, "grad_norm": 0.6609517335891724, "learning_rate": 1.759349593495935e-05, "loss": 0.0747, "step": 29600 }, { "epoch": 0.36103658536585365, "grad_norm": 0.7760024070739746, "learning_rate": 1.759308943089431e-05, "loss": 0.1121, "step": 29605 }, { "epoch": 0.36109756097560974, "grad_norm": 0.8884302973747253, "learning_rate": 1.759268292682927e-05, "loss": 0.0845, "step": 29610 }, { "epoch": 0.36115853658536584, "grad_norm": 0.7715704441070557, "learning_rate": 1.759227642276423e-05, "loss": 0.0993, "step": 29615 }, { "epoch": 0.36121951219512194, "grad_norm": 0.4908503592014313, "learning_rate": 1.759186991869919e-05, "loss": 0.0614, "step": 29620 }, { "epoch": 0.36128048780487804, "grad_norm": 0.7137747406959534, "learning_rate": 1.7591463414634145e-05, "loss": 0.0932, "step": 29625 }, { "epoch": 0.36134146341463413, "grad_norm": 0.45238032937049866, "learning_rate": 1.7591056910569107e-05, "loss": 0.0597, "step": 29630 }, { "epoch": 0.36140243902439023, "grad_norm": 1.3180732727050781, "learning_rate": 1.7590650406504065e-05, "loss": 0.1209, "step": 29635 }, { "epoch": 0.36146341463414633, "grad_norm": 1.2150778770446777, "learning_rate": 1.7590243902439026e-05, "loss": 0.096, "step": 29640 }, { "epoch": 0.3615243902439024, "grad_norm": 0.73076993227005, "learning_rate": 1.7589837398373985e-05, "loss": 0.0774, "step": 29645 }, { "epoch": 0.3615853658536585, "grad_norm": 1.3920668363571167, "learning_rate": 1.7589430894308946e-05, "loss": 0.1309, "step": 29650 }, { "epoch": 0.3616463414634146, "grad_norm": 0.5005106329917908, "learning_rate": 1.7589024390243904e-05, "loss": 0.0834, "step": 29655 }, { "epoch": 0.3617073170731707, "grad_norm": 0.6237210631370544, "learning_rate": 1.7588617886178862e-05, "loss": 0.0977, "step": 29660 }, { "epoch": 0.3617682926829268, "grad_norm": 1.3011837005615234, "learning_rate": 1.7588211382113824e-05, "loss": 0.0835, "step": 29665 }, { "epoch": 0.3618292682926829, "grad_norm": 1.4463239908218384, "learning_rate": 1.7587804878048782e-05, "loss": 0.0798, "step": 29670 }, { "epoch": 0.361890243902439, "grad_norm": 0.7300428748130798, "learning_rate": 1.758739837398374e-05, "loss": 0.0936, "step": 29675 }, { "epoch": 0.3619512195121951, "grad_norm": 0.5869334936141968, "learning_rate": 1.75869918699187e-05, "loss": 0.073, "step": 29680 }, { "epoch": 0.3620121951219512, "grad_norm": 0.5556456446647644, "learning_rate": 1.758658536585366e-05, "loss": 0.0548, "step": 29685 }, { "epoch": 0.3620731707317073, "grad_norm": 0.6952130794525146, "learning_rate": 1.7586178861788618e-05, "loss": 0.0733, "step": 29690 }, { "epoch": 0.3621341463414634, "grad_norm": 0.4911842346191406, "learning_rate": 1.758577235772358e-05, "loss": 0.063, "step": 29695 }, { "epoch": 0.3621951219512195, "grad_norm": 0.6824892163276672, "learning_rate": 1.7585365853658538e-05, "loss": 0.0805, "step": 29700 }, { "epoch": 0.3622560975609756, "grad_norm": 1.1942154169082642, "learning_rate": 1.75849593495935e-05, "loss": 0.0806, "step": 29705 }, { "epoch": 0.3623170731707317, "grad_norm": 0.886663019657135, "learning_rate": 1.7584552845528457e-05, "loss": 0.0632, "step": 29710 }, { "epoch": 0.3623780487804878, "grad_norm": 1.0874392986297607, "learning_rate": 1.7584146341463415e-05, "loss": 0.0787, "step": 29715 }, { "epoch": 0.3624390243902439, "grad_norm": 0.9908435940742493, "learning_rate": 1.7583739837398373e-05, "loss": 0.0969, "step": 29720 }, { "epoch": 0.3625, "grad_norm": 0.47689175605773926, "learning_rate": 1.7583333333333335e-05, "loss": 0.082, "step": 29725 }, { "epoch": 0.3625609756097561, "grad_norm": 0.697091817855835, "learning_rate": 1.7582926829268293e-05, "loss": 0.0886, "step": 29730 }, { "epoch": 0.3626219512195122, "grad_norm": 0.5312690138816833, "learning_rate": 1.7582520325203255e-05, "loss": 0.0603, "step": 29735 }, { "epoch": 0.3626829268292683, "grad_norm": 0.8691236972808838, "learning_rate": 1.7582113821138213e-05, "loss": 0.1098, "step": 29740 }, { "epoch": 0.3627439024390244, "grad_norm": 0.7530317306518555, "learning_rate": 1.758170731707317e-05, "loss": 0.0902, "step": 29745 }, { "epoch": 0.3628048780487805, "grad_norm": 0.4226112961769104, "learning_rate": 1.7581300813008132e-05, "loss": 0.0708, "step": 29750 }, { "epoch": 0.3628658536585366, "grad_norm": 0.55781090259552, "learning_rate": 1.758089430894309e-05, "loss": 0.0724, "step": 29755 }, { "epoch": 0.36292682926829267, "grad_norm": 0.7802090048789978, "learning_rate": 1.758048780487805e-05, "loss": 0.0749, "step": 29760 }, { "epoch": 0.36298780487804877, "grad_norm": 2.8390555381774902, "learning_rate": 1.758008130081301e-05, "loss": 0.0851, "step": 29765 }, { "epoch": 0.36304878048780487, "grad_norm": 0.8004312515258789, "learning_rate": 1.7579674796747968e-05, "loss": 0.1019, "step": 29770 }, { "epoch": 0.36310975609756097, "grad_norm": 0.8333723545074463, "learning_rate": 1.7579268292682926e-05, "loss": 0.091, "step": 29775 }, { "epoch": 0.36317073170731706, "grad_norm": 1.2168389558792114, "learning_rate": 1.7578861788617888e-05, "loss": 0.0868, "step": 29780 }, { "epoch": 0.36323170731707316, "grad_norm": 0.7153030633926392, "learning_rate": 1.7578455284552846e-05, "loss": 0.0719, "step": 29785 }, { "epoch": 0.36329268292682926, "grad_norm": 0.6156339049339294, "learning_rate": 1.7578048780487808e-05, "loss": 0.0791, "step": 29790 }, { "epoch": 0.36335365853658536, "grad_norm": 0.9814005494117737, "learning_rate": 1.7577642276422766e-05, "loss": 0.1025, "step": 29795 }, { "epoch": 0.36341463414634145, "grad_norm": 0.5406140089035034, "learning_rate": 1.7577235772357727e-05, "loss": 0.1152, "step": 29800 }, { "epoch": 0.36347560975609755, "grad_norm": 0.7069156765937805, "learning_rate": 1.7576829268292682e-05, "loss": 0.0719, "step": 29805 }, { "epoch": 0.36353658536585365, "grad_norm": 0.6623396277427673, "learning_rate": 1.7576422764227643e-05, "loss": 0.1168, "step": 29810 }, { "epoch": 0.36359756097560975, "grad_norm": 0.4126134514808655, "learning_rate": 1.75760162601626e-05, "loss": 0.0568, "step": 29815 }, { "epoch": 0.36365853658536584, "grad_norm": 1.5303288698196411, "learning_rate": 1.7575609756097563e-05, "loss": 0.1282, "step": 29820 }, { "epoch": 0.36371951219512194, "grad_norm": 0.3786599934101105, "learning_rate": 1.757520325203252e-05, "loss": 0.0635, "step": 29825 }, { "epoch": 0.36378048780487804, "grad_norm": 0.8164366483688354, "learning_rate": 1.7574796747967483e-05, "loss": 0.0836, "step": 29830 }, { "epoch": 0.36384146341463414, "grad_norm": 0.569938600063324, "learning_rate": 1.757439024390244e-05, "loss": 0.1077, "step": 29835 }, { "epoch": 0.36390243902439023, "grad_norm": 1.0000635385513306, "learning_rate": 1.75739837398374e-05, "loss": 0.0513, "step": 29840 }, { "epoch": 0.36396341463414633, "grad_norm": 0.7865660190582275, "learning_rate": 1.757357723577236e-05, "loss": 0.0898, "step": 29845 }, { "epoch": 0.36402439024390243, "grad_norm": 1.0572586059570312, "learning_rate": 1.757317073170732e-05, "loss": 0.1153, "step": 29850 }, { "epoch": 0.3640853658536585, "grad_norm": 0.9514036178588867, "learning_rate": 1.7572764227642277e-05, "loss": 0.0648, "step": 29855 }, { "epoch": 0.3641463414634146, "grad_norm": 0.5032879710197449, "learning_rate": 1.7572357723577238e-05, "loss": 0.068, "step": 29860 }, { "epoch": 0.3642073170731707, "grad_norm": 0.5005930662155151, "learning_rate": 1.7571951219512196e-05, "loss": 0.0871, "step": 29865 }, { "epoch": 0.3642682926829268, "grad_norm": 1.009871244430542, "learning_rate": 1.7571544715447155e-05, "loss": 0.0864, "step": 29870 }, { "epoch": 0.3643292682926829, "grad_norm": 1.1102898120880127, "learning_rate": 1.7571138211382116e-05, "loss": 0.1214, "step": 29875 }, { "epoch": 0.364390243902439, "grad_norm": 1.1919901371002197, "learning_rate": 1.7570731707317074e-05, "loss": 0.1182, "step": 29880 }, { "epoch": 0.3644512195121951, "grad_norm": 0.806484043598175, "learning_rate": 1.7570325203252036e-05, "loss": 0.0756, "step": 29885 }, { "epoch": 0.3645121951219512, "grad_norm": 0.7579559683799744, "learning_rate": 1.7569918699186994e-05, "loss": 0.0635, "step": 29890 }, { "epoch": 0.3645731707317073, "grad_norm": 0.5473137497901917, "learning_rate": 1.7569512195121952e-05, "loss": 0.0722, "step": 29895 }, { "epoch": 0.3646341463414634, "grad_norm": 1.2867635488510132, "learning_rate": 1.756910569105691e-05, "loss": 0.0901, "step": 29900 }, { "epoch": 0.3646951219512195, "grad_norm": 0.8127625584602356, "learning_rate": 1.756869918699187e-05, "loss": 0.102, "step": 29905 }, { "epoch": 0.3647560975609756, "grad_norm": 0.5106444954872131, "learning_rate": 1.756829268292683e-05, "loss": 0.0814, "step": 29910 }, { "epoch": 0.3648170731707317, "grad_norm": 0.4682904779911041, "learning_rate": 1.756788617886179e-05, "loss": 0.0948, "step": 29915 }, { "epoch": 0.3648780487804878, "grad_norm": 0.34893059730529785, "learning_rate": 1.756747967479675e-05, "loss": 0.1163, "step": 29920 }, { "epoch": 0.3649390243902439, "grad_norm": 0.6619811058044434, "learning_rate": 1.7567073170731707e-05, "loss": 0.0475, "step": 29925 }, { "epoch": 0.365, "grad_norm": 0.4392693340778351, "learning_rate": 1.756666666666667e-05, "loss": 0.1363, "step": 29930 }, { "epoch": 0.3650609756097561, "grad_norm": 0.7011871933937073, "learning_rate": 1.7566260162601627e-05, "loss": 0.1107, "step": 29935 }, { "epoch": 0.3651219512195122, "grad_norm": 0.9088201522827148, "learning_rate": 1.7565853658536585e-05, "loss": 0.0775, "step": 29940 }, { "epoch": 0.3651829268292683, "grad_norm": 0.46091723442077637, "learning_rate": 1.7565447154471547e-05, "loss": 0.0824, "step": 29945 }, { "epoch": 0.3652439024390244, "grad_norm": 0.6342057585716248, "learning_rate": 1.7565040650406505e-05, "loss": 0.0738, "step": 29950 }, { "epoch": 0.3653048780487805, "grad_norm": 1.004392385482788, "learning_rate": 1.7564634146341463e-05, "loss": 0.1115, "step": 29955 }, { "epoch": 0.3653658536585366, "grad_norm": 0.8263815641403198, "learning_rate": 1.7564227642276425e-05, "loss": 0.1266, "step": 29960 }, { "epoch": 0.3654268292682927, "grad_norm": 0.8411457538604736, "learning_rate": 1.7563821138211383e-05, "loss": 0.0945, "step": 29965 }, { "epoch": 0.36548780487804877, "grad_norm": 0.36736544966697693, "learning_rate": 1.7563414634146344e-05, "loss": 0.0829, "step": 29970 }, { "epoch": 0.36554878048780487, "grad_norm": 0.47764891386032104, "learning_rate": 1.7563008130081302e-05, "loss": 0.0769, "step": 29975 }, { "epoch": 0.36560975609756097, "grad_norm": 0.8166375756263733, "learning_rate": 1.7562601626016264e-05, "loss": 0.1026, "step": 29980 }, { "epoch": 0.36567073170731706, "grad_norm": 1.0113061666488647, "learning_rate": 1.756219512195122e-05, "loss": 0.1146, "step": 29985 }, { "epoch": 0.36573170731707316, "grad_norm": 0.7731139659881592, "learning_rate": 1.756178861788618e-05, "loss": 0.0614, "step": 29990 }, { "epoch": 0.36579268292682926, "grad_norm": 0.6693348288536072, "learning_rate": 1.7561382113821138e-05, "loss": 0.0727, "step": 29995 }, { "epoch": 0.36585365853658536, "grad_norm": 0.39279094338417053, "learning_rate": 1.75609756097561e-05, "loss": 0.0981, "step": 30000 }, { "epoch": 0.36591463414634146, "grad_norm": 0.7602019309997559, "learning_rate": 1.7560569105691058e-05, "loss": 0.0666, "step": 30005 }, { "epoch": 0.36597560975609755, "grad_norm": 1.249340295791626, "learning_rate": 1.756016260162602e-05, "loss": 0.0864, "step": 30010 }, { "epoch": 0.36603658536585365, "grad_norm": 0.6088346242904663, "learning_rate": 1.7559756097560978e-05, "loss": 0.0925, "step": 30015 }, { "epoch": 0.36609756097560975, "grad_norm": 0.7471178770065308, "learning_rate": 1.7559349593495936e-05, "loss": 0.0748, "step": 30020 }, { "epoch": 0.36615853658536585, "grad_norm": 0.49182817339897156, "learning_rate": 1.7558943089430894e-05, "loss": 0.071, "step": 30025 }, { "epoch": 0.36621951219512194, "grad_norm": 1.4244325160980225, "learning_rate": 1.7558536585365855e-05, "loss": 0.1021, "step": 30030 }, { "epoch": 0.36628048780487804, "grad_norm": 0.5857463479042053, "learning_rate": 1.7558130081300813e-05, "loss": 0.0926, "step": 30035 }, { "epoch": 0.36634146341463414, "grad_norm": 0.6935665011405945, "learning_rate": 1.7557723577235775e-05, "loss": 0.0958, "step": 30040 }, { "epoch": 0.36640243902439024, "grad_norm": 1.0397231578826904, "learning_rate": 1.7557317073170733e-05, "loss": 0.1108, "step": 30045 }, { "epoch": 0.36646341463414633, "grad_norm": 0.4159699082374573, "learning_rate": 1.755691056910569e-05, "loss": 0.0861, "step": 30050 }, { "epoch": 0.36652439024390243, "grad_norm": 1.1473472118377686, "learning_rate": 1.7556504065040653e-05, "loss": 0.1079, "step": 30055 }, { "epoch": 0.36658536585365853, "grad_norm": 0.6681628227233887, "learning_rate": 1.755609756097561e-05, "loss": 0.0966, "step": 30060 }, { "epoch": 0.3666463414634146, "grad_norm": 0.6256979703903198, "learning_rate": 1.7555691056910572e-05, "loss": 0.0983, "step": 30065 }, { "epoch": 0.3667073170731707, "grad_norm": 0.31624531745910645, "learning_rate": 1.755528455284553e-05, "loss": 0.0916, "step": 30070 }, { "epoch": 0.3667682926829268, "grad_norm": 0.36730214953422546, "learning_rate": 1.755487804878049e-05, "loss": 0.101, "step": 30075 }, { "epoch": 0.3668292682926829, "grad_norm": 1.9576358795166016, "learning_rate": 1.7554471544715447e-05, "loss": 0.0735, "step": 30080 }, { "epoch": 0.366890243902439, "grad_norm": 0.4353998303413391, "learning_rate": 1.7554065040650408e-05, "loss": 0.0531, "step": 30085 }, { "epoch": 0.3669512195121951, "grad_norm": 0.5937501788139343, "learning_rate": 1.7553658536585366e-05, "loss": 0.081, "step": 30090 }, { "epoch": 0.3670121951219512, "grad_norm": 1.1214381456375122, "learning_rate": 1.7553252032520328e-05, "loss": 0.1079, "step": 30095 }, { "epoch": 0.3670731707317073, "grad_norm": 0.4701562523841858, "learning_rate": 1.7552845528455286e-05, "loss": 0.0852, "step": 30100 }, { "epoch": 0.3671341463414634, "grad_norm": 0.7050830721855164, "learning_rate": 1.7552439024390244e-05, "loss": 0.068, "step": 30105 }, { "epoch": 0.3671951219512195, "grad_norm": 0.8363161683082581, "learning_rate": 1.7552032520325206e-05, "loss": 0.0772, "step": 30110 }, { "epoch": 0.3672560975609756, "grad_norm": 1.5536648035049438, "learning_rate": 1.7551626016260164e-05, "loss": 0.0995, "step": 30115 }, { "epoch": 0.3673170731707317, "grad_norm": 0.7186662554740906, "learning_rate": 1.7551219512195122e-05, "loss": 0.1029, "step": 30120 }, { "epoch": 0.3673780487804878, "grad_norm": 0.5688480138778687, "learning_rate": 1.7550813008130083e-05, "loss": 0.1243, "step": 30125 }, { "epoch": 0.3674390243902439, "grad_norm": 0.8296209573745728, "learning_rate": 1.755040650406504e-05, "loss": 0.0963, "step": 30130 }, { "epoch": 0.3675, "grad_norm": 1.0031239986419678, "learning_rate": 1.755e-05, "loss": 0.1046, "step": 30135 }, { "epoch": 0.3675609756097561, "grad_norm": 0.6108735799789429, "learning_rate": 1.754959349593496e-05, "loss": 0.0893, "step": 30140 }, { "epoch": 0.3676219512195122, "grad_norm": 1.0336321592330933, "learning_rate": 1.754918699186992e-05, "loss": 0.0759, "step": 30145 }, { "epoch": 0.3676829268292683, "grad_norm": 0.805243194103241, "learning_rate": 1.754878048780488e-05, "loss": 0.0868, "step": 30150 }, { "epoch": 0.3677439024390244, "grad_norm": 0.5882993340492249, "learning_rate": 1.754837398373984e-05, "loss": 0.0805, "step": 30155 }, { "epoch": 0.3678048780487805, "grad_norm": 0.9966295957565308, "learning_rate": 1.75479674796748e-05, "loss": 0.0853, "step": 30160 }, { "epoch": 0.3678658536585366, "grad_norm": 0.492072194814682, "learning_rate": 1.7547560975609755e-05, "loss": 0.0705, "step": 30165 }, { "epoch": 0.3679268292682927, "grad_norm": 0.8289055824279785, "learning_rate": 1.7547154471544717e-05, "loss": 0.0878, "step": 30170 }, { "epoch": 0.3679878048780488, "grad_norm": 0.6019206047058105, "learning_rate": 1.7546747967479675e-05, "loss": 0.0764, "step": 30175 }, { "epoch": 0.36804878048780487, "grad_norm": 0.47810256481170654, "learning_rate": 1.7546341463414636e-05, "loss": 0.1223, "step": 30180 }, { "epoch": 0.36810975609756097, "grad_norm": 0.6794698238372803, "learning_rate": 1.7545934959349595e-05, "loss": 0.0775, "step": 30185 }, { "epoch": 0.36817073170731707, "grad_norm": 0.5868363380432129, "learning_rate": 1.7545528455284556e-05, "loss": 0.0789, "step": 30190 }, { "epoch": 0.36823170731707316, "grad_norm": 0.47292736172676086, "learning_rate": 1.7545121951219514e-05, "loss": 0.0748, "step": 30195 }, { "epoch": 0.36829268292682926, "grad_norm": 0.8282769918441772, "learning_rate": 1.7544715447154472e-05, "loss": 0.0886, "step": 30200 }, { "epoch": 0.36835365853658536, "grad_norm": 0.2944045066833496, "learning_rate": 1.754430894308943e-05, "loss": 0.0628, "step": 30205 }, { "epoch": 0.36841463414634146, "grad_norm": 0.57232666015625, "learning_rate": 1.7543902439024392e-05, "loss": 0.114, "step": 30210 }, { "epoch": 0.36847560975609756, "grad_norm": 0.8305485844612122, "learning_rate": 1.754349593495935e-05, "loss": 0.0923, "step": 30215 }, { "epoch": 0.36853658536585365, "grad_norm": 0.9765099883079529, "learning_rate": 1.754308943089431e-05, "loss": 0.1157, "step": 30220 }, { "epoch": 0.36859756097560975, "grad_norm": 0.7637824416160583, "learning_rate": 1.754268292682927e-05, "loss": 0.0672, "step": 30225 }, { "epoch": 0.36865853658536585, "grad_norm": 0.9483838677406311, "learning_rate": 1.7542276422764228e-05, "loss": 0.1182, "step": 30230 }, { "epoch": 0.36871951219512195, "grad_norm": 1.3134143352508545, "learning_rate": 1.754186991869919e-05, "loss": 0.0874, "step": 30235 }, { "epoch": 0.36878048780487804, "grad_norm": 0.5507292747497559, "learning_rate": 1.7541463414634147e-05, "loss": 0.061, "step": 30240 }, { "epoch": 0.36884146341463414, "grad_norm": 0.5763779878616333, "learning_rate": 1.754105691056911e-05, "loss": 0.0807, "step": 30245 }, { "epoch": 0.36890243902439024, "grad_norm": 1.1420338153839111, "learning_rate": 1.7540650406504067e-05, "loss": 0.0983, "step": 30250 }, { "epoch": 0.36896341463414634, "grad_norm": 0.94781094789505, "learning_rate": 1.7540243902439025e-05, "loss": 0.0718, "step": 30255 }, { "epoch": 0.36902439024390243, "grad_norm": 0.6033045649528503, "learning_rate": 1.7539837398373983e-05, "loss": 0.0576, "step": 30260 }, { "epoch": 0.36908536585365853, "grad_norm": 0.9699821472167969, "learning_rate": 1.7539430894308945e-05, "loss": 0.1042, "step": 30265 }, { "epoch": 0.36914634146341463, "grad_norm": 1.1833709478378296, "learning_rate": 1.7539024390243903e-05, "loss": 0.082, "step": 30270 }, { "epoch": 0.3692073170731707, "grad_norm": 0.5103182792663574, "learning_rate": 1.7538617886178865e-05, "loss": 0.051, "step": 30275 }, { "epoch": 0.3692682926829268, "grad_norm": 0.9329075813293457, "learning_rate": 1.7538211382113823e-05, "loss": 0.0998, "step": 30280 }, { "epoch": 0.3693292682926829, "grad_norm": 1.40779709815979, "learning_rate": 1.753780487804878e-05, "loss": 0.0666, "step": 30285 }, { "epoch": 0.369390243902439, "grad_norm": 0.9471327066421509, "learning_rate": 1.753739837398374e-05, "loss": 0.1114, "step": 30290 }, { "epoch": 0.3694512195121951, "grad_norm": 0.7167308330535889, "learning_rate": 1.75369918699187e-05, "loss": 0.0961, "step": 30295 }, { "epoch": 0.3695121951219512, "grad_norm": 0.6385030746459961, "learning_rate": 1.753658536585366e-05, "loss": 0.1068, "step": 30300 }, { "epoch": 0.3695731707317073, "grad_norm": 0.9691615700721741, "learning_rate": 1.753617886178862e-05, "loss": 0.1099, "step": 30305 }, { "epoch": 0.3696341463414634, "grad_norm": 0.7482491731643677, "learning_rate": 1.7535772357723578e-05, "loss": 0.1196, "step": 30310 }, { "epoch": 0.3696951219512195, "grad_norm": 0.5783777236938477, "learning_rate": 1.7535365853658536e-05, "loss": 0.0754, "step": 30315 }, { "epoch": 0.3697560975609756, "grad_norm": 1.232903242111206, "learning_rate": 1.7534959349593498e-05, "loss": 0.0704, "step": 30320 }, { "epoch": 0.3698170731707317, "grad_norm": 0.4406417906284332, "learning_rate": 1.7534552845528456e-05, "loss": 0.0707, "step": 30325 }, { "epoch": 0.3698780487804878, "grad_norm": 1.4072834253311157, "learning_rate": 1.7534146341463417e-05, "loss": 0.0662, "step": 30330 }, { "epoch": 0.3699390243902439, "grad_norm": 0.6415424942970276, "learning_rate": 1.7533739837398376e-05, "loss": 0.101, "step": 30335 }, { "epoch": 0.37, "grad_norm": 0.8743340969085693, "learning_rate": 1.7533333333333337e-05, "loss": 0.07, "step": 30340 }, { "epoch": 0.3700609756097561, "grad_norm": 0.6475741267204285, "learning_rate": 1.7532926829268292e-05, "loss": 0.0706, "step": 30345 }, { "epoch": 0.3701219512195122, "grad_norm": 0.7469758987426758, "learning_rate": 1.7532520325203253e-05, "loss": 0.0801, "step": 30350 }, { "epoch": 0.3701829268292683, "grad_norm": 0.728601336479187, "learning_rate": 1.753211382113821e-05, "loss": 0.1132, "step": 30355 }, { "epoch": 0.3702439024390244, "grad_norm": 0.8206874132156372, "learning_rate": 1.7531707317073173e-05, "loss": 0.0718, "step": 30360 }, { "epoch": 0.3703048780487805, "grad_norm": 1.0440654754638672, "learning_rate": 1.753130081300813e-05, "loss": 0.0868, "step": 30365 }, { "epoch": 0.3703658536585366, "grad_norm": 0.6667378544807434, "learning_rate": 1.7530894308943093e-05, "loss": 0.0772, "step": 30370 }, { "epoch": 0.3704268292682927, "grad_norm": 0.729558527469635, "learning_rate": 1.753048780487805e-05, "loss": 0.0669, "step": 30375 }, { "epoch": 0.3704878048780488, "grad_norm": 0.5382461547851562, "learning_rate": 1.753008130081301e-05, "loss": 0.0784, "step": 30380 }, { "epoch": 0.3705487804878049, "grad_norm": 0.8166353106498718, "learning_rate": 1.7529674796747967e-05, "loss": 0.1031, "step": 30385 }, { "epoch": 0.37060975609756097, "grad_norm": 0.5976784229278564, "learning_rate": 1.752926829268293e-05, "loss": 0.0832, "step": 30390 }, { "epoch": 0.37067073170731707, "grad_norm": 0.32014477252960205, "learning_rate": 1.7528861788617887e-05, "loss": 0.0383, "step": 30395 }, { "epoch": 0.37073170731707317, "grad_norm": 0.7876566648483276, "learning_rate": 1.7528455284552848e-05, "loss": 0.0942, "step": 30400 }, { "epoch": 0.37079268292682926, "grad_norm": 0.745760977268219, "learning_rate": 1.7528048780487806e-05, "loss": 0.0802, "step": 30405 }, { "epoch": 0.37085365853658536, "grad_norm": 0.9541615843772888, "learning_rate": 1.7527642276422764e-05, "loss": 0.0873, "step": 30410 }, { "epoch": 0.37091463414634146, "grad_norm": 0.8877930641174316, "learning_rate": 1.7527235772357726e-05, "loss": 0.0995, "step": 30415 }, { "epoch": 0.37097560975609756, "grad_norm": 1.4230841398239136, "learning_rate": 1.7526829268292684e-05, "loss": 0.0761, "step": 30420 }, { "epoch": 0.37103658536585366, "grad_norm": 0.749900758266449, "learning_rate": 1.7526422764227646e-05, "loss": 0.1045, "step": 30425 }, { "epoch": 0.37109756097560975, "grad_norm": 0.4487017095088959, "learning_rate": 1.7526016260162604e-05, "loss": 0.0636, "step": 30430 }, { "epoch": 0.37115853658536585, "grad_norm": 0.5585723519325256, "learning_rate": 1.7525609756097562e-05, "loss": 0.0742, "step": 30435 }, { "epoch": 0.37121951219512195, "grad_norm": 0.6694631576538086, "learning_rate": 1.752520325203252e-05, "loss": 0.0971, "step": 30440 }, { "epoch": 0.37128048780487805, "grad_norm": 0.9160507321357727, "learning_rate": 1.752479674796748e-05, "loss": 0.0833, "step": 30445 }, { "epoch": 0.37134146341463414, "grad_norm": 0.6871713399887085, "learning_rate": 1.752439024390244e-05, "loss": 0.0761, "step": 30450 }, { "epoch": 0.37140243902439024, "grad_norm": 0.6443040370941162, "learning_rate": 1.75239837398374e-05, "loss": 0.057, "step": 30455 }, { "epoch": 0.37146341463414634, "grad_norm": 0.5218862891197205, "learning_rate": 1.752357723577236e-05, "loss": 0.0711, "step": 30460 }, { "epoch": 0.37152439024390244, "grad_norm": 2.257261037826538, "learning_rate": 1.7523170731707317e-05, "loss": 0.0679, "step": 30465 }, { "epoch": 0.37158536585365853, "grad_norm": 0.6087045669555664, "learning_rate": 1.7522764227642276e-05, "loss": 0.1166, "step": 30470 }, { "epoch": 0.37164634146341463, "grad_norm": 0.7400410771369934, "learning_rate": 1.7522357723577237e-05, "loss": 0.1057, "step": 30475 }, { "epoch": 0.37170731707317073, "grad_norm": 0.5416387319564819, "learning_rate": 1.7521951219512195e-05, "loss": 0.061, "step": 30480 }, { "epoch": 0.3717682926829268, "grad_norm": 0.5959441661834717, "learning_rate": 1.7521544715447157e-05, "loss": 0.0627, "step": 30485 }, { "epoch": 0.3718292682926829, "grad_norm": 0.5152537226676941, "learning_rate": 1.7521138211382115e-05, "loss": 0.124, "step": 30490 }, { "epoch": 0.371890243902439, "grad_norm": 0.7746345400810242, "learning_rate": 1.7520731707317073e-05, "loss": 0.076, "step": 30495 }, { "epoch": 0.3719512195121951, "grad_norm": 1.0509183406829834, "learning_rate": 1.7520325203252034e-05, "loss": 0.0611, "step": 30500 }, { "epoch": 0.3720121951219512, "grad_norm": 1.0209077596664429, "learning_rate": 1.7519918699186993e-05, "loss": 0.0701, "step": 30505 }, { "epoch": 0.3720731707317073, "grad_norm": 0.7004044055938721, "learning_rate": 1.7519512195121954e-05, "loss": 0.0593, "step": 30510 }, { "epoch": 0.3721341463414634, "grad_norm": 0.5737271904945374, "learning_rate": 1.7519105691056912e-05, "loss": 0.0995, "step": 30515 }, { "epoch": 0.3721951219512195, "grad_norm": 0.8712477684020996, "learning_rate": 1.7518699186991874e-05, "loss": 0.0555, "step": 30520 }, { "epoch": 0.3722560975609756, "grad_norm": 1.393729567527771, "learning_rate": 1.751829268292683e-05, "loss": 0.0843, "step": 30525 }, { "epoch": 0.3723170731707317, "grad_norm": 0.9448316693305969, "learning_rate": 1.751788617886179e-05, "loss": 0.0779, "step": 30530 }, { "epoch": 0.3723780487804878, "grad_norm": 0.45851805806159973, "learning_rate": 1.7517479674796748e-05, "loss": 0.0516, "step": 30535 }, { "epoch": 0.3724390243902439, "grad_norm": 0.7763647437095642, "learning_rate": 1.751707317073171e-05, "loss": 0.0883, "step": 30540 }, { "epoch": 0.3725, "grad_norm": 0.670448362827301, "learning_rate": 1.7516666666666668e-05, "loss": 0.0836, "step": 30545 }, { "epoch": 0.3725609756097561, "grad_norm": 0.5714314579963684, "learning_rate": 1.751626016260163e-05, "loss": 0.0887, "step": 30550 }, { "epoch": 0.3726219512195122, "grad_norm": 0.6205483675003052, "learning_rate": 1.7515853658536584e-05, "loss": 0.0692, "step": 30555 }, { "epoch": 0.3726829268292683, "grad_norm": 0.7200717329978943, "learning_rate": 1.7515447154471546e-05, "loss": 0.0712, "step": 30560 }, { "epoch": 0.3727439024390244, "grad_norm": 1.1611862182617188, "learning_rate": 1.7515040650406504e-05, "loss": 0.0881, "step": 30565 }, { "epoch": 0.3728048780487805, "grad_norm": 0.7801093459129333, "learning_rate": 1.7514634146341465e-05, "loss": 0.1037, "step": 30570 }, { "epoch": 0.3728658536585366, "grad_norm": 1.0053277015686035, "learning_rate": 1.7514227642276423e-05, "loss": 0.107, "step": 30575 }, { "epoch": 0.3729268292682927, "grad_norm": 1.4841705560684204, "learning_rate": 1.7513821138211385e-05, "loss": 0.1004, "step": 30580 }, { "epoch": 0.3729878048780488, "grad_norm": 1.241424798965454, "learning_rate": 1.7513414634146343e-05, "loss": 0.1091, "step": 30585 }, { "epoch": 0.3730487804878049, "grad_norm": 0.550373911857605, "learning_rate": 1.75130081300813e-05, "loss": 0.0876, "step": 30590 }, { "epoch": 0.373109756097561, "grad_norm": 0.6764078140258789, "learning_rate": 1.7512601626016263e-05, "loss": 0.059, "step": 30595 }, { "epoch": 0.37317073170731707, "grad_norm": 0.5209805369377136, "learning_rate": 1.751219512195122e-05, "loss": 0.0762, "step": 30600 }, { "epoch": 0.37323170731707317, "grad_norm": 0.6946054100990295, "learning_rate": 1.7511788617886182e-05, "loss": 0.0651, "step": 30605 }, { "epoch": 0.37329268292682927, "grad_norm": 1.0604450702667236, "learning_rate": 1.751138211382114e-05, "loss": 0.109, "step": 30610 }, { "epoch": 0.37335365853658536, "grad_norm": 0.646570086479187, "learning_rate": 1.75109756097561e-05, "loss": 0.0931, "step": 30615 }, { "epoch": 0.37341463414634146, "grad_norm": 0.5414503216743469, "learning_rate": 1.7510569105691057e-05, "loss": 0.1222, "step": 30620 }, { "epoch": 0.37347560975609756, "grad_norm": 0.9289389848709106, "learning_rate": 1.7510162601626018e-05, "loss": 0.1196, "step": 30625 }, { "epoch": 0.37353658536585366, "grad_norm": 0.6882215738296509, "learning_rate": 1.7509756097560976e-05, "loss": 0.088, "step": 30630 }, { "epoch": 0.37359756097560975, "grad_norm": 0.39413779973983765, "learning_rate": 1.7509349593495938e-05, "loss": 0.0769, "step": 30635 }, { "epoch": 0.37365853658536585, "grad_norm": 1.5508062839508057, "learning_rate": 1.7508943089430896e-05, "loss": 0.1172, "step": 30640 }, { "epoch": 0.37371951219512195, "grad_norm": 0.5098499655723572, "learning_rate": 1.7508536585365854e-05, "loss": 0.0703, "step": 30645 }, { "epoch": 0.37378048780487805, "grad_norm": 1.6603546142578125, "learning_rate": 1.7508130081300812e-05, "loss": 0.1216, "step": 30650 }, { "epoch": 0.37384146341463415, "grad_norm": 0.4549078345298767, "learning_rate": 1.7507723577235774e-05, "loss": 0.0959, "step": 30655 }, { "epoch": 0.37390243902439024, "grad_norm": 0.6907938122749329, "learning_rate": 1.7507317073170732e-05, "loss": 0.07, "step": 30660 }, { "epoch": 0.37396341463414634, "grad_norm": 1.498429775238037, "learning_rate": 1.7506910569105693e-05, "loss": 0.0605, "step": 30665 }, { "epoch": 0.37402439024390244, "grad_norm": 0.8249791264533997, "learning_rate": 1.750650406504065e-05, "loss": 0.0782, "step": 30670 }, { "epoch": 0.37408536585365854, "grad_norm": 0.664903461933136, "learning_rate": 1.750609756097561e-05, "loss": 0.0849, "step": 30675 }, { "epoch": 0.37414634146341463, "grad_norm": 0.583801805973053, "learning_rate": 1.750569105691057e-05, "loss": 0.0592, "step": 30680 }, { "epoch": 0.37420731707317073, "grad_norm": 0.9527473449707031, "learning_rate": 1.750528455284553e-05, "loss": 0.0725, "step": 30685 }, { "epoch": 0.37426829268292683, "grad_norm": 0.7935606241226196, "learning_rate": 1.750487804878049e-05, "loss": 0.0834, "step": 30690 }, { "epoch": 0.3743292682926829, "grad_norm": 0.5379757881164551, "learning_rate": 1.750447154471545e-05, "loss": 0.0594, "step": 30695 }, { "epoch": 0.374390243902439, "grad_norm": 0.7273757457733154, "learning_rate": 1.7504065040650407e-05, "loss": 0.0631, "step": 30700 }, { "epoch": 0.3744512195121951, "grad_norm": 0.6516879200935364, "learning_rate": 1.7503658536585365e-05, "loss": 0.0843, "step": 30705 }, { "epoch": 0.3745121951219512, "grad_norm": 0.6960341334342957, "learning_rate": 1.7503252032520327e-05, "loss": 0.0997, "step": 30710 }, { "epoch": 0.3745731707317073, "grad_norm": 0.6749480962753296, "learning_rate": 1.7502845528455285e-05, "loss": 0.1072, "step": 30715 }, { "epoch": 0.3746341463414634, "grad_norm": 0.6429999470710754, "learning_rate": 1.7502439024390246e-05, "loss": 0.1122, "step": 30720 }, { "epoch": 0.3746951219512195, "grad_norm": 0.44865477085113525, "learning_rate": 1.7502032520325204e-05, "loss": 0.0862, "step": 30725 }, { "epoch": 0.3747560975609756, "grad_norm": 0.32224294543266296, "learning_rate": 1.7501626016260166e-05, "loss": 0.0722, "step": 30730 }, { "epoch": 0.3748170731707317, "grad_norm": 0.945420503616333, "learning_rate": 1.750121951219512e-05, "loss": 0.0677, "step": 30735 }, { "epoch": 0.3748780487804878, "grad_norm": 1.0791387557983398, "learning_rate": 1.7500813008130082e-05, "loss": 0.0889, "step": 30740 }, { "epoch": 0.3749390243902439, "grad_norm": 0.43797802925109863, "learning_rate": 1.750040650406504e-05, "loss": 0.0445, "step": 30745 }, { "epoch": 0.375, "grad_norm": 0.6796743273735046, "learning_rate": 1.7500000000000002e-05, "loss": 0.0739, "step": 30750 }, { "epoch": 0.3750609756097561, "grad_norm": 0.7689542770385742, "learning_rate": 1.749959349593496e-05, "loss": 0.0889, "step": 30755 }, { "epoch": 0.3751219512195122, "grad_norm": 0.6236075758934021, "learning_rate": 1.749918699186992e-05, "loss": 0.0974, "step": 30760 }, { "epoch": 0.3751829268292683, "grad_norm": 0.5384557843208313, "learning_rate": 1.749878048780488e-05, "loss": 0.0777, "step": 30765 }, { "epoch": 0.3752439024390244, "grad_norm": 0.6269022822380066, "learning_rate": 1.7498373983739838e-05, "loss": 0.0815, "step": 30770 }, { "epoch": 0.3753048780487805, "grad_norm": 0.3508501350879669, "learning_rate": 1.74979674796748e-05, "loss": 0.0595, "step": 30775 }, { "epoch": 0.3753658536585366, "grad_norm": 1.0513378381729126, "learning_rate": 1.7497560975609757e-05, "loss": 0.0607, "step": 30780 }, { "epoch": 0.3754268292682927, "grad_norm": 0.7624375820159912, "learning_rate": 1.749715447154472e-05, "loss": 0.0661, "step": 30785 }, { "epoch": 0.3754878048780488, "grad_norm": 1.3525389432907104, "learning_rate": 1.7496747967479677e-05, "loss": 0.0933, "step": 30790 }, { "epoch": 0.3755487804878049, "grad_norm": 1.0665968656539917, "learning_rate": 1.7496341463414635e-05, "loss": 0.0593, "step": 30795 }, { "epoch": 0.375609756097561, "grad_norm": 0.7614911198616028, "learning_rate": 1.7495934959349593e-05, "loss": 0.1126, "step": 30800 }, { "epoch": 0.3756707317073171, "grad_norm": 1.016255497932434, "learning_rate": 1.7495528455284555e-05, "loss": 0.0593, "step": 30805 }, { "epoch": 0.37573170731707317, "grad_norm": 1.567721962928772, "learning_rate": 1.7495121951219513e-05, "loss": 0.0959, "step": 30810 }, { "epoch": 0.37579268292682927, "grad_norm": 1.2485017776489258, "learning_rate": 1.7494715447154474e-05, "loss": 0.0965, "step": 30815 }, { "epoch": 0.37585365853658537, "grad_norm": 0.4880216121673584, "learning_rate": 1.7494308943089433e-05, "loss": 0.1485, "step": 30820 }, { "epoch": 0.37591463414634146, "grad_norm": 7.183369159698486, "learning_rate": 1.7493902439024394e-05, "loss": 0.1021, "step": 30825 }, { "epoch": 0.37597560975609756, "grad_norm": 0.9945500493049622, "learning_rate": 1.749349593495935e-05, "loss": 0.0945, "step": 30830 }, { "epoch": 0.37603658536585366, "grad_norm": 0.498677134513855, "learning_rate": 1.749308943089431e-05, "loss": 0.0556, "step": 30835 }, { "epoch": 0.37609756097560976, "grad_norm": 0.5747068524360657, "learning_rate": 1.749268292682927e-05, "loss": 0.06, "step": 30840 }, { "epoch": 0.37615853658536585, "grad_norm": 0.43958809971809387, "learning_rate": 1.749227642276423e-05, "loss": 0.1018, "step": 30845 }, { "epoch": 0.37621951219512195, "grad_norm": 1.198586106300354, "learning_rate": 1.7491869918699188e-05, "loss": 0.0792, "step": 30850 }, { "epoch": 0.37628048780487805, "grad_norm": 0.7888531684875488, "learning_rate": 1.749146341463415e-05, "loss": 0.0841, "step": 30855 }, { "epoch": 0.37634146341463415, "grad_norm": 0.6172260046005249, "learning_rate": 1.7491056910569108e-05, "loss": 0.0848, "step": 30860 }, { "epoch": 0.37640243902439025, "grad_norm": 0.5614373087882996, "learning_rate": 1.7490650406504066e-05, "loss": 0.096, "step": 30865 }, { "epoch": 0.37646341463414634, "grad_norm": 0.6405167579650879, "learning_rate": 1.7490243902439027e-05, "loss": 0.0713, "step": 30870 }, { "epoch": 0.37652439024390244, "grad_norm": 1.379088044166565, "learning_rate": 1.7489837398373986e-05, "loss": 0.1186, "step": 30875 }, { "epoch": 0.37658536585365854, "grad_norm": 0.8673443794250488, "learning_rate": 1.7489430894308944e-05, "loss": 0.1113, "step": 30880 }, { "epoch": 0.37664634146341464, "grad_norm": 0.5226500034332275, "learning_rate": 1.7489024390243905e-05, "loss": 0.0601, "step": 30885 }, { "epoch": 0.37670731707317073, "grad_norm": 0.7920647859573364, "learning_rate": 1.7488617886178863e-05, "loss": 0.0939, "step": 30890 }, { "epoch": 0.37676829268292683, "grad_norm": 0.4801909923553467, "learning_rate": 1.748821138211382e-05, "loss": 0.0693, "step": 30895 }, { "epoch": 0.37682926829268293, "grad_norm": 0.5817773342132568, "learning_rate": 1.7487804878048783e-05, "loss": 0.0943, "step": 30900 }, { "epoch": 0.376890243902439, "grad_norm": 0.3559514582157135, "learning_rate": 1.748739837398374e-05, "loss": 0.0643, "step": 30905 }, { "epoch": 0.3769512195121951, "grad_norm": 0.5505872368812561, "learning_rate": 1.7486991869918703e-05, "loss": 0.0708, "step": 30910 }, { "epoch": 0.3770121951219512, "grad_norm": 0.5708461403846741, "learning_rate": 1.748658536585366e-05, "loss": 0.08, "step": 30915 }, { "epoch": 0.3770731707317073, "grad_norm": 0.807878315448761, "learning_rate": 1.748617886178862e-05, "loss": 0.0969, "step": 30920 }, { "epoch": 0.3771341463414634, "grad_norm": 0.42293381690979004, "learning_rate": 1.7485772357723577e-05, "loss": 0.0571, "step": 30925 }, { "epoch": 0.3771951219512195, "grad_norm": 0.577863872051239, "learning_rate": 1.748536585365854e-05, "loss": 0.1033, "step": 30930 }, { "epoch": 0.3772560975609756, "grad_norm": 0.5705013871192932, "learning_rate": 1.7484959349593497e-05, "loss": 0.056, "step": 30935 }, { "epoch": 0.3773170731707317, "grad_norm": 1.0113987922668457, "learning_rate": 1.7484552845528458e-05, "loss": 0.0933, "step": 30940 }, { "epoch": 0.3773780487804878, "grad_norm": 0.555583655834198, "learning_rate": 1.7484146341463416e-05, "loss": 0.0862, "step": 30945 }, { "epoch": 0.3774390243902439, "grad_norm": 0.8259071111679077, "learning_rate": 1.7483739837398374e-05, "loss": 0.0988, "step": 30950 }, { "epoch": 0.3775, "grad_norm": 0.5792637467384338, "learning_rate": 1.7483333333333336e-05, "loss": 0.0801, "step": 30955 }, { "epoch": 0.3775609756097561, "grad_norm": 2.409956216812134, "learning_rate": 1.7482926829268294e-05, "loss": 0.0781, "step": 30960 }, { "epoch": 0.3776219512195122, "grad_norm": 1.1101304292678833, "learning_rate": 1.7482520325203252e-05, "loss": 0.0865, "step": 30965 }, { "epoch": 0.3776829268292683, "grad_norm": 0.5184553861618042, "learning_rate": 1.7482113821138214e-05, "loss": 0.0916, "step": 30970 }, { "epoch": 0.3777439024390244, "grad_norm": 0.38480374217033386, "learning_rate": 1.7481707317073172e-05, "loss": 0.0528, "step": 30975 }, { "epoch": 0.3778048780487805, "grad_norm": 0.617098331451416, "learning_rate": 1.748130081300813e-05, "loss": 0.0719, "step": 30980 }, { "epoch": 0.3778658536585366, "grad_norm": 0.7491835951805115, "learning_rate": 1.748089430894309e-05, "loss": 0.069, "step": 30985 }, { "epoch": 0.3779268292682927, "grad_norm": 0.5387417674064636, "learning_rate": 1.748048780487805e-05, "loss": 0.0515, "step": 30990 }, { "epoch": 0.3779878048780488, "grad_norm": 0.746248185634613, "learning_rate": 1.748008130081301e-05, "loss": 0.1022, "step": 30995 }, { "epoch": 0.3780487804878049, "grad_norm": 0.5646330714225769, "learning_rate": 1.747967479674797e-05, "loss": 0.0831, "step": 31000 }, { "epoch": 0.378109756097561, "grad_norm": 0.4529075026512146, "learning_rate": 1.747926829268293e-05, "loss": 0.0963, "step": 31005 }, { "epoch": 0.3781707317073171, "grad_norm": 0.5975787043571472, "learning_rate": 1.7478861788617885e-05, "loss": 0.0609, "step": 31010 }, { "epoch": 0.3782317073170732, "grad_norm": 1.9724860191345215, "learning_rate": 1.7478455284552847e-05, "loss": 0.0854, "step": 31015 }, { "epoch": 0.37829268292682927, "grad_norm": 0.7673558592796326, "learning_rate": 1.7478048780487805e-05, "loss": 0.0739, "step": 31020 }, { "epoch": 0.37835365853658537, "grad_norm": 0.5467187166213989, "learning_rate": 1.7477642276422767e-05, "loss": 0.0765, "step": 31025 }, { "epoch": 0.37841463414634147, "grad_norm": 0.845853328704834, "learning_rate": 1.7477235772357725e-05, "loss": 0.0844, "step": 31030 }, { "epoch": 0.37847560975609756, "grad_norm": 0.5090993642807007, "learning_rate": 1.7476829268292686e-05, "loss": 0.053, "step": 31035 }, { "epoch": 0.37853658536585366, "grad_norm": 0.564690351486206, "learning_rate": 1.7476422764227644e-05, "loss": 0.0846, "step": 31040 }, { "epoch": 0.37859756097560976, "grad_norm": 0.37740710377693176, "learning_rate": 1.7476016260162603e-05, "loss": 0.0545, "step": 31045 }, { "epoch": 0.37865853658536586, "grad_norm": 0.534563422203064, "learning_rate": 1.7475609756097564e-05, "loss": 0.0634, "step": 31050 }, { "epoch": 0.37871951219512195, "grad_norm": 0.6151005625724792, "learning_rate": 1.7475203252032522e-05, "loss": 0.0758, "step": 31055 }, { "epoch": 0.37878048780487805, "grad_norm": 0.7663751244544983, "learning_rate": 1.747479674796748e-05, "loss": 0.0889, "step": 31060 }, { "epoch": 0.37884146341463415, "grad_norm": 0.7955190539360046, "learning_rate": 1.7474390243902442e-05, "loss": 0.0848, "step": 31065 }, { "epoch": 0.37890243902439025, "grad_norm": 0.6138256192207336, "learning_rate": 1.74739837398374e-05, "loss": 0.0696, "step": 31070 }, { "epoch": 0.37896341463414634, "grad_norm": 0.890373706817627, "learning_rate": 1.7473577235772358e-05, "loss": 0.0551, "step": 31075 }, { "epoch": 0.37902439024390244, "grad_norm": 0.4345153868198395, "learning_rate": 1.747317073170732e-05, "loss": 0.0732, "step": 31080 }, { "epoch": 0.37908536585365854, "grad_norm": 0.801672101020813, "learning_rate": 1.7472764227642278e-05, "loss": 0.0847, "step": 31085 }, { "epoch": 0.37914634146341464, "grad_norm": 0.902989387512207, "learning_rate": 1.747235772357724e-05, "loss": 0.0937, "step": 31090 }, { "epoch": 0.37920731707317074, "grad_norm": 0.7093743681907654, "learning_rate": 1.7471951219512197e-05, "loss": 0.0664, "step": 31095 }, { "epoch": 0.37926829268292683, "grad_norm": 2.1638524532318115, "learning_rate": 1.7471544715447155e-05, "loss": 0.113, "step": 31100 }, { "epoch": 0.37932926829268293, "grad_norm": 0.5425564050674438, "learning_rate": 1.7471138211382114e-05, "loss": 0.0821, "step": 31105 }, { "epoch": 0.37939024390243903, "grad_norm": 1.1302779912948608, "learning_rate": 1.7470731707317075e-05, "loss": 0.0878, "step": 31110 }, { "epoch": 0.3794512195121951, "grad_norm": 0.5097149014472961, "learning_rate": 1.7470325203252033e-05, "loss": 0.0809, "step": 31115 }, { "epoch": 0.3795121951219512, "grad_norm": 0.8484435081481934, "learning_rate": 1.7469918699186995e-05, "loss": 0.0881, "step": 31120 }, { "epoch": 0.3795731707317073, "grad_norm": 0.4524773955345154, "learning_rate": 1.7469512195121953e-05, "loss": 0.0901, "step": 31125 }, { "epoch": 0.3796341463414634, "grad_norm": 0.954031229019165, "learning_rate": 1.746910569105691e-05, "loss": 0.1019, "step": 31130 }, { "epoch": 0.3796951219512195, "grad_norm": 0.6010549068450928, "learning_rate": 1.7468699186991873e-05, "loss": 0.061, "step": 31135 }, { "epoch": 0.3797560975609756, "grad_norm": 0.9661627411842346, "learning_rate": 1.746829268292683e-05, "loss": 0.1106, "step": 31140 }, { "epoch": 0.3798170731707317, "grad_norm": 0.6921281814575195, "learning_rate": 1.746788617886179e-05, "loss": 0.0781, "step": 31145 }, { "epoch": 0.3798780487804878, "grad_norm": 0.9224145412445068, "learning_rate": 1.746747967479675e-05, "loss": 0.1157, "step": 31150 }, { "epoch": 0.3799390243902439, "grad_norm": 1.930169939994812, "learning_rate": 1.746707317073171e-05, "loss": 0.0517, "step": 31155 }, { "epoch": 0.38, "grad_norm": 0.4809591770172119, "learning_rate": 1.7466666666666667e-05, "loss": 0.0763, "step": 31160 }, { "epoch": 0.3800609756097561, "grad_norm": 0.6575741171836853, "learning_rate": 1.7466260162601628e-05, "loss": 0.0799, "step": 31165 }, { "epoch": 0.3801219512195122, "grad_norm": 0.4826349914073944, "learning_rate": 1.7465853658536586e-05, "loss": 0.0684, "step": 31170 }, { "epoch": 0.3801829268292683, "grad_norm": 0.7267991900444031, "learning_rate": 1.7465447154471548e-05, "loss": 0.1439, "step": 31175 }, { "epoch": 0.3802439024390244, "grad_norm": 0.7345913052558899, "learning_rate": 1.7465040650406506e-05, "loss": 0.0792, "step": 31180 }, { "epoch": 0.3803048780487805, "grad_norm": 0.4479052424430847, "learning_rate": 1.7464634146341467e-05, "loss": 0.0727, "step": 31185 }, { "epoch": 0.3803658536585366, "grad_norm": 0.5216819047927856, "learning_rate": 1.7464227642276422e-05, "loss": 0.0597, "step": 31190 }, { "epoch": 0.3804268292682927, "grad_norm": 0.7147831916809082, "learning_rate": 1.7463821138211384e-05, "loss": 0.0556, "step": 31195 }, { "epoch": 0.3804878048780488, "grad_norm": 1.0139532089233398, "learning_rate": 1.7463414634146342e-05, "loss": 0.0944, "step": 31200 }, { "epoch": 0.3805487804878049, "grad_norm": 0.9325046539306641, "learning_rate": 1.7463008130081303e-05, "loss": 0.0894, "step": 31205 }, { "epoch": 0.380609756097561, "grad_norm": 0.5001238584518433, "learning_rate": 1.746260162601626e-05, "loss": 0.0685, "step": 31210 }, { "epoch": 0.3806707317073171, "grad_norm": 0.8252271413803101, "learning_rate": 1.7462195121951223e-05, "loss": 0.0948, "step": 31215 }, { "epoch": 0.3807317073170732, "grad_norm": 0.6670200228691101, "learning_rate": 1.746178861788618e-05, "loss": 0.0781, "step": 31220 }, { "epoch": 0.3807926829268293, "grad_norm": 0.5524796843528748, "learning_rate": 1.746138211382114e-05, "loss": 0.0956, "step": 31225 }, { "epoch": 0.38085365853658537, "grad_norm": 1.1213364601135254, "learning_rate": 1.7460975609756097e-05, "loss": 0.1273, "step": 31230 }, { "epoch": 0.38091463414634147, "grad_norm": 0.6694385409355164, "learning_rate": 1.746056910569106e-05, "loss": 0.1314, "step": 31235 }, { "epoch": 0.38097560975609757, "grad_norm": 2.105727195739746, "learning_rate": 1.7460162601626017e-05, "loss": 0.0798, "step": 31240 }, { "epoch": 0.38103658536585366, "grad_norm": 1.0484620332717896, "learning_rate": 1.745975609756098e-05, "loss": 0.0894, "step": 31245 }, { "epoch": 0.38109756097560976, "grad_norm": 0.6364086866378784, "learning_rate": 1.7459349593495937e-05, "loss": 0.0862, "step": 31250 }, { "epoch": 0.38115853658536586, "grad_norm": 0.6073570847511292, "learning_rate": 1.7458943089430895e-05, "loss": 0.0521, "step": 31255 }, { "epoch": 0.38121951219512196, "grad_norm": 0.7451820373535156, "learning_rate": 1.7458536585365856e-05, "loss": 0.1078, "step": 31260 }, { "epoch": 0.38128048780487805, "grad_norm": 0.8024625778198242, "learning_rate": 1.7458130081300814e-05, "loss": 0.0765, "step": 31265 }, { "epoch": 0.38134146341463415, "grad_norm": 0.9246528744697571, "learning_rate": 1.7457723577235776e-05, "loss": 0.0896, "step": 31270 }, { "epoch": 0.38140243902439025, "grad_norm": 0.7252423167228699, "learning_rate": 1.7457317073170734e-05, "loss": 0.0696, "step": 31275 }, { "epoch": 0.38146341463414635, "grad_norm": 0.44933098554611206, "learning_rate": 1.7456910569105692e-05, "loss": 0.0676, "step": 31280 }, { "epoch": 0.38152439024390244, "grad_norm": 1.34077787399292, "learning_rate": 1.745650406504065e-05, "loss": 0.0712, "step": 31285 }, { "epoch": 0.38158536585365854, "grad_norm": 0.46583807468414307, "learning_rate": 1.7456097560975612e-05, "loss": 0.0739, "step": 31290 }, { "epoch": 0.38164634146341464, "grad_norm": 1.042755365371704, "learning_rate": 1.745569105691057e-05, "loss": 0.1006, "step": 31295 }, { "epoch": 0.38170731707317074, "grad_norm": 1.0887681245803833, "learning_rate": 1.745528455284553e-05, "loss": 0.0938, "step": 31300 }, { "epoch": 0.38176829268292684, "grad_norm": 0.941126823425293, "learning_rate": 1.745487804878049e-05, "loss": 0.0795, "step": 31305 }, { "epoch": 0.38182926829268293, "grad_norm": 0.7714206576347351, "learning_rate": 1.7454471544715448e-05, "loss": 0.0717, "step": 31310 }, { "epoch": 0.38189024390243903, "grad_norm": 0.8598034977912903, "learning_rate": 1.745406504065041e-05, "loss": 0.0661, "step": 31315 }, { "epoch": 0.38195121951219513, "grad_norm": 0.4877062737941742, "learning_rate": 1.7453658536585367e-05, "loss": 0.0626, "step": 31320 }, { "epoch": 0.3820121951219512, "grad_norm": 0.7714248895645142, "learning_rate": 1.7453252032520325e-05, "loss": 0.0589, "step": 31325 }, { "epoch": 0.3820731707317073, "grad_norm": 0.4141876697540283, "learning_rate": 1.7452845528455287e-05, "loss": 0.0446, "step": 31330 }, { "epoch": 0.3821341463414634, "grad_norm": 0.9380843639373779, "learning_rate": 1.7452439024390245e-05, "loss": 0.1051, "step": 31335 }, { "epoch": 0.3821951219512195, "grad_norm": 0.5981557369232178, "learning_rate": 1.7452032520325203e-05, "loss": 0.0903, "step": 31340 }, { "epoch": 0.3822560975609756, "grad_norm": 0.7805915474891663, "learning_rate": 1.7451626016260165e-05, "loss": 0.0872, "step": 31345 }, { "epoch": 0.3823170731707317, "grad_norm": 0.933959424495697, "learning_rate": 1.7451219512195123e-05, "loss": 0.0937, "step": 31350 }, { "epoch": 0.3823780487804878, "grad_norm": 0.4566448926925659, "learning_rate": 1.7450813008130084e-05, "loss": 0.1083, "step": 31355 }, { "epoch": 0.3824390243902439, "grad_norm": 0.6238496899604797, "learning_rate": 1.7450406504065043e-05, "loss": 0.0863, "step": 31360 }, { "epoch": 0.3825, "grad_norm": 0.6455135345458984, "learning_rate": 1.7450000000000004e-05, "loss": 0.0841, "step": 31365 }, { "epoch": 0.3825609756097561, "grad_norm": 0.6478356122970581, "learning_rate": 1.744959349593496e-05, "loss": 0.1009, "step": 31370 }, { "epoch": 0.3826219512195122, "grad_norm": 0.8548789620399475, "learning_rate": 1.744918699186992e-05, "loss": 0.0856, "step": 31375 }, { "epoch": 0.3826829268292683, "grad_norm": 0.7515632510185242, "learning_rate": 1.744878048780488e-05, "loss": 0.083, "step": 31380 }, { "epoch": 0.3827439024390244, "grad_norm": 1.0695204734802246, "learning_rate": 1.744837398373984e-05, "loss": 0.0709, "step": 31385 }, { "epoch": 0.3828048780487805, "grad_norm": 0.7764055728912354, "learning_rate": 1.7447967479674798e-05, "loss": 0.0854, "step": 31390 }, { "epoch": 0.3828658536585366, "grad_norm": 0.4995531141757965, "learning_rate": 1.744756097560976e-05, "loss": 0.0687, "step": 31395 }, { "epoch": 0.3829268292682927, "grad_norm": 0.8927136063575745, "learning_rate": 1.7447154471544718e-05, "loss": 0.1028, "step": 31400 }, { "epoch": 0.3829878048780488, "grad_norm": 0.45691853761672974, "learning_rate": 1.7446747967479676e-05, "loss": 0.0605, "step": 31405 }, { "epoch": 0.3830487804878049, "grad_norm": 0.7801370620727539, "learning_rate": 1.7446341463414634e-05, "loss": 0.0934, "step": 31410 }, { "epoch": 0.383109756097561, "grad_norm": 0.590628981590271, "learning_rate": 1.7445934959349595e-05, "loss": 0.1146, "step": 31415 }, { "epoch": 0.3831707317073171, "grad_norm": 0.6735996603965759, "learning_rate": 1.7445528455284554e-05, "loss": 0.0587, "step": 31420 }, { "epoch": 0.3832317073170732, "grad_norm": 0.62644362449646, "learning_rate": 1.7445121951219515e-05, "loss": 0.0712, "step": 31425 }, { "epoch": 0.3832926829268293, "grad_norm": 0.8155155181884766, "learning_rate": 1.7444715447154473e-05, "loss": 0.0749, "step": 31430 }, { "epoch": 0.3833536585365854, "grad_norm": 1.4726872444152832, "learning_rate": 1.744430894308943e-05, "loss": 0.0607, "step": 31435 }, { "epoch": 0.38341463414634147, "grad_norm": 1.7362490892410278, "learning_rate": 1.7443902439024393e-05, "loss": 0.0942, "step": 31440 }, { "epoch": 0.38347560975609757, "grad_norm": 0.6124587059020996, "learning_rate": 1.744349593495935e-05, "loss": 0.062, "step": 31445 }, { "epoch": 0.38353658536585367, "grad_norm": 0.8436253666877747, "learning_rate": 1.7443089430894313e-05, "loss": 0.0814, "step": 31450 }, { "epoch": 0.38359756097560976, "grad_norm": 0.6804364323616028, "learning_rate": 1.744268292682927e-05, "loss": 0.0798, "step": 31455 }, { "epoch": 0.38365853658536586, "grad_norm": 0.9160698056221008, "learning_rate": 1.744227642276423e-05, "loss": 0.0743, "step": 31460 }, { "epoch": 0.38371951219512196, "grad_norm": 0.9345948100090027, "learning_rate": 1.7441869918699187e-05, "loss": 0.0775, "step": 31465 }, { "epoch": 0.38378048780487806, "grad_norm": 0.7838872075080872, "learning_rate": 1.744146341463415e-05, "loss": 0.0953, "step": 31470 }, { "epoch": 0.38384146341463415, "grad_norm": 0.5560423731803894, "learning_rate": 1.7441056910569107e-05, "loss": 0.0895, "step": 31475 }, { "epoch": 0.38390243902439025, "grad_norm": 0.9441549777984619, "learning_rate": 1.7440650406504068e-05, "loss": 0.1081, "step": 31480 }, { "epoch": 0.38396341463414635, "grad_norm": 0.6195293068885803, "learning_rate": 1.7440243902439026e-05, "loss": 0.0813, "step": 31485 }, { "epoch": 0.38402439024390245, "grad_norm": 0.6163132190704346, "learning_rate": 1.7439837398373984e-05, "loss": 0.0798, "step": 31490 }, { "epoch": 0.38408536585365854, "grad_norm": 1.2786881923675537, "learning_rate": 1.7439430894308942e-05, "loss": 0.0672, "step": 31495 }, { "epoch": 0.38414634146341464, "grad_norm": 1.120011806488037, "learning_rate": 1.7439024390243904e-05, "loss": 0.0989, "step": 31500 }, { "epoch": 0.38420731707317074, "grad_norm": 0.4533947706222534, "learning_rate": 1.7438617886178862e-05, "loss": 0.0737, "step": 31505 }, { "epoch": 0.38426829268292684, "grad_norm": 0.4758807420730591, "learning_rate": 1.7438211382113824e-05, "loss": 0.0498, "step": 31510 }, { "epoch": 0.38432926829268294, "grad_norm": 0.6891293525695801, "learning_rate": 1.7437804878048782e-05, "loss": 0.1064, "step": 31515 }, { "epoch": 0.38439024390243903, "grad_norm": 0.45022472739219666, "learning_rate": 1.743739837398374e-05, "loss": 0.0865, "step": 31520 }, { "epoch": 0.38445121951219513, "grad_norm": 1.3910367488861084, "learning_rate": 1.74369918699187e-05, "loss": 0.1015, "step": 31525 }, { "epoch": 0.38451219512195123, "grad_norm": 0.7963515520095825, "learning_rate": 1.743658536585366e-05, "loss": 0.0757, "step": 31530 }, { "epoch": 0.3845731707317073, "grad_norm": 0.6028834581375122, "learning_rate": 1.743617886178862e-05, "loss": 0.1103, "step": 31535 }, { "epoch": 0.3846341463414634, "grad_norm": 0.7270348072052002, "learning_rate": 1.743577235772358e-05, "loss": 0.1122, "step": 31540 }, { "epoch": 0.3846951219512195, "grad_norm": 0.5392693877220154, "learning_rate": 1.743536585365854e-05, "loss": 0.1114, "step": 31545 }, { "epoch": 0.3847560975609756, "grad_norm": 0.7281160354614258, "learning_rate": 1.7434959349593495e-05, "loss": 0.1205, "step": 31550 }, { "epoch": 0.3848170731707317, "grad_norm": 1.1510534286499023, "learning_rate": 1.7434552845528457e-05, "loss": 0.0651, "step": 31555 }, { "epoch": 0.3848780487804878, "grad_norm": 0.5730453133583069, "learning_rate": 1.7434146341463415e-05, "loss": 0.0611, "step": 31560 }, { "epoch": 0.3849390243902439, "grad_norm": 1.4058222770690918, "learning_rate": 1.7433739837398377e-05, "loss": 0.0786, "step": 31565 }, { "epoch": 0.385, "grad_norm": 1.7846007347106934, "learning_rate": 1.7433333333333335e-05, "loss": 0.084, "step": 31570 }, { "epoch": 0.3850609756097561, "grad_norm": 0.5567824840545654, "learning_rate": 1.7432926829268296e-05, "loss": 0.05, "step": 31575 }, { "epoch": 0.3851219512195122, "grad_norm": 0.7959249019622803, "learning_rate": 1.7432520325203254e-05, "loss": 0.0747, "step": 31580 }, { "epoch": 0.3851829268292683, "grad_norm": 0.8284597396850586, "learning_rate": 1.7432113821138212e-05, "loss": 0.0735, "step": 31585 }, { "epoch": 0.3852439024390244, "grad_norm": 0.37219125032424927, "learning_rate": 1.743170731707317e-05, "loss": 0.0692, "step": 31590 }, { "epoch": 0.3853048780487805, "grad_norm": 7.639634132385254, "learning_rate": 1.7431300813008132e-05, "loss": 0.1198, "step": 31595 }, { "epoch": 0.3853658536585366, "grad_norm": 0.49986881017684937, "learning_rate": 1.743089430894309e-05, "loss": 0.0811, "step": 31600 }, { "epoch": 0.3854268292682927, "grad_norm": 0.9286925196647644, "learning_rate": 1.7430487804878052e-05, "loss": 0.0927, "step": 31605 }, { "epoch": 0.3854878048780488, "grad_norm": 1.524964451789856, "learning_rate": 1.743008130081301e-05, "loss": 0.0984, "step": 31610 }, { "epoch": 0.3855487804878049, "grad_norm": 0.5020866990089417, "learning_rate": 1.7429674796747968e-05, "loss": 0.1124, "step": 31615 }, { "epoch": 0.385609756097561, "grad_norm": 0.692038357257843, "learning_rate": 1.742926829268293e-05, "loss": 0.0849, "step": 31620 }, { "epoch": 0.3856707317073171, "grad_norm": 0.5628652572631836, "learning_rate": 1.7428861788617888e-05, "loss": 0.0696, "step": 31625 }, { "epoch": 0.3857317073170732, "grad_norm": 1.1830352544784546, "learning_rate": 1.742845528455285e-05, "loss": 0.0706, "step": 31630 }, { "epoch": 0.3857926829268293, "grad_norm": 0.7636201977729797, "learning_rate": 1.7428048780487807e-05, "loss": 0.0976, "step": 31635 }, { "epoch": 0.3858536585365854, "grad_norm": 0.5384325385093689, "learning_rate": 1.7427642276422765e-05, "loss": 0.1053, "step": 31640 }, { "epoch": 0.3859146341463415, "grad_norm": 0.502019464969635, "learning_rate": 1.7427235772357724e-05, "loss": 0.0735, "step": 31645 }, { "epoch": 0.38597560975609757, "grad_norm": 0.6414085626602173, "learning_rate": 1.7426829268292685e-05, "loss": 0.0658, "step": 31650 }, { "epoch": 0.38603658536585367, "grad_norm": 0.6209005117416382, "learning_rate": 1.7426422764227643e-05, "loss": 0.0431, "step": 31655 }, { "epoch": 0.38609756097560977, "grad_norm": 0.8818086385726929, "learning_rate": 1.7426016260162605e-05, "loss": 0.1085, "step": 31660 }, { "epoch": 0.38615853658536586, "grad_norm": 0.5118094086647034, "learning_rate": 1.7425609756097563e-05, "loss": 0.0733, "step": 31665 }, { "epoch": 0.38621951219512196, "grad_norm": 0.46461668610572815, "learning_rate": 1.742520325203252e-05, "loss": 0.0638, "step": 31670 }, { "epoch": 0.38628048780487806, "grad_norm": 0.4098633825778961, "learning_rate": 1.742479674796748e-05, "loss": 0.0658, "step": 31675 }, { "epoch": 0.38634146341463416, "grad_norm": 1.2067687511444092, "learning_rate": 1.742439024390244e-05, "loss": 0.091, "step": 31680 }, { "epoch": 0.38640243902439025, "grad_norm": 0.43904587626457214, "learning_rate": 1.74239837398374e-05, "loss": 0.0886, "step": 31685 }, { "epoch": 0.38646341463414635, "grad_norm": 0.5758240818977356, "learning_rate": 1.742357723577236e-05, "loss": 0.1039, "step": 31690 }, { "epoch": 0.38652439024390245, "grad_norm": 0.47687214612960815, "learning_rate": 1.742317073170732e-05, "loss": 0.0673, "step": 31695 }, { "epoch": 0.38658536585365855, "grad_norm": 0.5338634848594666, "learning_rate": 1.7422764227642277e-05, "loss": 0.0754, "step": 31700 }, { "epoch": 0.38664634146341464, "grad_norm": 0.7752748131752014, "learning_rate": 1.7422357723577238e-05, "loss": 0.068, "step": 31705 }, { "epoch": 0.38670731707317074, "grad_norm": 0.626963198184967, "learning_rate": 1.7421951219512196e-05, "loss": 0.0769, "step": 31710 }, { "epoch": 0.38676829268292684, "grad_norm": 0.6973700523376465, "learning_rate": 1.7421544715447158e-05, "loss": 0.1039, "step": 31715 }, { "epoch": 0.38682926829268294, "grad_norm": 1.1311097145080566, "learning_rate": 1.7421138211382116e-05, "loss": 0.1121, "step": 31720 }, { "epoch": 0.38689024390243903, "grad_norm": 0.6432369351387024, "learning_rate": 1.7420731707317077e-05, "loss": 0.0754, "step": 31725 }, { "epoch": 0.38695121951219513, "grad_norm": 0.9501160383224487, "learning_rate": 1.7420325203252032e-05, "loss": 0.0698, "step": 31730 }, { "epoch": 0.38701219512195123, "grad_norm": 0.8548575639724731, "learning_rate": 1.7419918699186994e-05, "loss": 0.0646, "step": 31735 }, { "epoch": 0.38707317073170733, "grad_norm": 1.1077872514724731, "learning_rate": 1.7419512195121952e-05, "loss": 0.1082, "step": 31740 }, { "epoch": 0.3871341463414634, "grad_norm": 0.7324093580245972, "learning_rate": 1.7419105691056913e-05, "loss": 0.0883, "step": 31745 }, { "epoch": 0.3871951219512195, "grad_norm": 0.7361893653869629, "learning_rate": 1.741869918699187e-05, "loss": 0.0915, "step": 31750 }, { "epoch": 0.3872560975609756, "grad_norm": 0.7622988224029541, "learning_rate": 1.7418292682926833e-05, "loss": 0.0943, "step": 31755 }, { "epoch": 0.3873170731707317, "grad_norm": 0.8000344634056091, "learning_rate": 1.7417886178861788e-05, "loss": 0.0811, "step": 31760 }, { "epoch": 0.3873780487804878, "grad_norm": 0.6136391162872314, "learning_rate": 1.741747967479675e-05, "loss": 0.0812, "step": 31765 }, { "epoch": 0.3874390243902439, "grad_norm": 0.8337301015853882, "learning_rate": 1.7417073170731707e-05, "loss": 0.0894, "step": 31770 }, { "epoch": 0.3875, "grad_norm": 1.170972228050232, "learning_rate": 1.741666666666667e-05, "loss": 0.085, "step": 31775 }, { "epoch": 0.3875609756097561, "grad_norm": 0.4645863175392151, "learning_rate": 1.7416260162601627e-05, "loss": 0.0828, "step": 31780 }, { "epoch": 0.3876219512195122, "grad_norm": 0.7132540941238403, "learning_rate": 1.741585365853659e-05, "loss": 0.065, "step": 31785 }, { "epoch": 0.3876829268292683, "grad_norm": 0.49823492765426636, "learning_rate": 1.7415447154471547e-05, "loss": 0.0573, "step": 31790 }, { "epoch": 0.3877439024390244, "grad_norm": 1.2947347164154053, "learning_rate": 1.7415040650406505e-05, "loss": 0.0837, "step": 31795 }, { "epoch": 0.3878048780487805, "grad_norm": 1.0845367908477783, "learning_rate": 1.7414634146341466e-05, "loss": 0.0923, "step": 31800 }, { "epoch": 0.3878658536585366, "grad_norm": 1.2134735584259033, "learning_rate": 1.7414227642276424e-05, "loss": 0.1045, "step": 31805 }, { "epoch": 0.3879268292682927, "grad_norm": 2.6237828731536865, "learning_rate": 1.7413821138211386e-05, "loss": 0.0758, "step": 31810 }, { "epoch": 0.3879878048780488, "grad_norm": 0.44648632407188416, "learning_rate": 1.7413414634146344e-05, "loss": 0.0606, "step": 31815 }, { "epoch": 0.3880487804878049, "grad_norm": 0.5529406666755676, "learning_rate": 1.7413008130081302e-05, "loss": 0.0744, "step": 31820 }, { "epoch": 0.388109756097561, "grad_norm": 0.6026673913002014, "learning_rate": 1.741260162601626e-05, "loss": 0.0866, "step": 31825 }, { "epoch": 0.3881707317073171, "grad_norm": 1.2001982927322388, "learning_rate": 1.7412195121951222e-05, "loss": 0.0977, "step": 31830 }, { "epoch": 0.3882317073170732, "grad_norm": 0.625917375087738, "learning_rate": 1.741178861788618e-05, "loss": 0.0862, "step": 31835 }, { "epoch": 0.3882926829268293, "grad_norm": 1.001815915107727, "learning_rate": 1.741138211382114e-05, "loss": 0.0669, "step": 31840 }, { "epoch": 0.3883536585365854, "grad_norm": 0.608554482460022, "learning_rate": 1.74109756097561e-05, "loss": 0.0759, "step": 31845 }, { "epoch": 0.3884146341463415, "grad_norm": 0.9118643999099731, "learning_rate": 1.7410569105691058e-05, "loss": 0.0567, "step": 31850 }, { "epoch": 0.3884756097560976, "grad_norm": 0.7677791714668274, "learning_rate": 1.7410162601626016e-05, "loss": 0.0647, "step": 31855 }, { "epoch": 0.38853658536585367, "grad_norm": 0.4422415792942047, "learning_rate": 1.7409756097560977e-05, "loss": 0.0926, "step": 31860 }, { "epoch": 0.38859756097560977, "grad_norm": 0.660839319229126, "learning_rate": 1.7409349593495935e-05, "loss": 0.0922, "step": 31865 }, { "epoch": 0.38865853658536587, "grad_norm": 0.9508630633354187, "learning_rate": 1.7408943089430897e-05, "loss": 0.0818, "step": 31870 }, { "epoch": 0.38871951219512196, "grad_norm": 0.5999927520751953, "learning_rate": 1.7408536585365855e-05, "loss": 0.0711, "step": 31875 }, { "epoch": 0.38878048780487806, "grad_norm": 0.8776038289070129, "learning_rate": 1.7408130081300813e-05, "loss": 0.0851, "step": 31880 }, { "epoch": 0.38884146341463416, "grad_norm": 1.1325033903121948, "learning_rate": 1.7407723577235775e-05, "loss": 0.0832, "step": 31885 }, { "epoch": 0.38890243902439026, "grad_norm": 0.6125277876853943, "learning_rate": 1.7407317073170733e-05, "loss": 0.1001, "step": 31890 }, { "epoch": 0.38896341463414635, "grad_norm": 0.9116683006286621, "learning_rate": 1.7406910569105694e-05, "loss": 0.0661, "step": 31895 }, { "epoch": 0.38902439024390245, "grad_norm": 0.6859461069107056, "learning_rate": 1.7406504065040652e-05, "loss": 0.052, "step": 31900 }, { "epoch": 0.38908536585365855, "grad_norm": 0.41091424226760864, "learning_rate": 1.740609756097561e-05, "loss": 0.0538, "step": 31905 }, { "epoch": 0.38914634146341465, "grad_norm": 0.5095799565315247, "learning_rate": 1.740569105691057e-05, "loss": 0.0596, "step": 31910 }, { "epoch": 0.38920731707317074, "grad_norm": 0.6139116883277893, "learning_rate": 1.740528455284553e-05, "loss": 0.0529, "step": 31915 }, { "epoch": 0.38926829268292684, "grad_norm": 0.7913233637809753, "learning_rate": 1.740487804878049e-05, "loss": 0.0877, "step": 31920 }, { "epoch": 0.38932926829268294, "grad_norm": 0.47251856327056885, "learning_rate": 1.740447154471545e-05, "loss": 0.0877, "step": 31925 }, { "epoch": 0.38939024390243904, "grad_norm": 0.362050324678421, "learning_rate": 1.7404065040650408e-05, "loss": 0.0921, "step": 31930 }, { "epoch": 0.38945121951219513, "grad_norm": 0.9722241759300232, "learning_rate": 1.740365853658537e-05, "loss": 0.0786, "step": 31935 }, { "epoch": 0.38951219512195123, "grad_norm": 0.5825849771499634, "learning_rate": 1.7403252032520324e-05, "loss": 0.0617, "step": 31940 }, { "epoch": 0.38957317073170733, "grad_norm": 0.7870461940765381, "learning_rate": 1.7402845528455286e-05, "loss": 0.111, "step": 31945 }, { "epoch": 0.3896341463414634, "grad_norm": 2.0037178993225098, "learning_rate": 1.7402439024390244e-05, "loss": 0.0887, "step": 31950 }, { "epoch": 0.3896951219512195, "grad_norm": 0.6571166515350342, "learning_rate": 1.7402032520325205e-05, "loss": 0.1062, "step": 31955 }, { "epoch": 0.3897560975609756, "grad_norm": 1.0303559303283691, "learning_rate": 1.7401626016260164e-05, "loss": 0.0713, "step": 31960 }, { "epoch": 0.3898170731707317, "grad_norm": 1.1047272682189941, "learning_rate": 1.7401219512195125e-05, "loss": 0.1205, "step": 31965 }, { "epoch": 0.3898780487804878, "grad_norm": 1.4785957336425781, "learning_rate": 1.7400813008130083e-05, "loss": 0.0696, "step": 31970 }, { "epoch": 0.3899390243902439, "grad_norm": 0.5467790365219116, "learning_rate": 1.740040650406504e-05, "loss": 0.0754, "step": 31975 }, { "epoch": 0.39, "grad_norm": 2.125115156173706, "learning_rate": 1.7400000000000003e-05, "loss": 0.0591, "step": 31980 }, { "epoch": 0.3900609756097561, "grad_norm": 0.7482975721359253, "learning_rate": 1.739959349593496e-05, "loss": 0.0759, "step": 31985 }, { "epoch": 0.3901219512195122, "grad_norm": 0.6164506077766418, "learning_rate": 1.7399186991869922e-05, "loss": 0.0906, "step": 31990 }, { "epoch": 0.3901829268292683, "grad_norm": 0.42388972640037537, "learning_rate": 1.739878048780488e-05, "loss": 0.0659, "step": 31995 }, { "epoch": 0.3902439024390244, "grad_norm": 0.9815794229507446, "learning_rate": 1.739837398373984e-05, "loss": 0.072, "step": 32000 }, { "epoch": 0.3903048780487805, "grad_norm": 1.3812787532806396, "learning_rate": 1.7397967479674797e-05, "loss": 0.0728, "step": 32005 }, { "epoch": 0.3903658536585366, "grad_norm": 0.7200108170509338, "learning_rate": 1.739756097560976e-05, "loss": 0.0718, "step": 32010 }, { "epoch": 0.3904268292682927, "grad_norm": 0.6295621395111084, "learning_rate": 1.7397154471544716e-05, "loss": 0.0627, "step": 32015 }, { "epoch": 0.3904878048780488, "grad_norm": 1.2545552253723145, "learning_rate": 1.7396747967479678e-05, "loss": 0.1229, "step": 32020 }, { "epoch": 0.3905487804878049, "grad_norm": 0.9039099216461182, "learning_rate": 1.7396341463414636e-05, "loss": 0.0819, "step": 32025 }, { "epoch": 0.390609756097561, "grad_norm": 0.6546598672866821, "learning_rate": 1.7395934959349594e-05, "loss": 0.0699, "step": 32030 }, { "epoch": 0.3906707317073171, "grad_norm": 0.44954875111579895, "learning_rate": 1.7395528455284552e-05, "loss": 0.0855, "step": 32035 }, { "epoch": 0.3907317073170732, "grad_norm": 0.4025489389896393, "learning_rate": 1.7395121951219514e-05, "loss": 0.0652, "step": 32040 }, { "epoch": 0.3907926829268293, "grad_norm": 0.6416559815406799, "learning_rate": 1.7394715447154472e-05, "loss": 0.0967, "step": 32045 }, { "epoch": 0.3908536585365854, "grad_norm": 0.7127465009689331, "learning_rate": 1.7394308943089434e-05, "loss": 0.0618, "step": 32050 }, { "epoch": 0.3909146341463415, "grad_norm": 0.6710628867149353, "learning_rate": 1.739390243902439e-05, "loss": 0.0674, "step": 32055 }, { "epoch": 0.3909756097560976, "grad_norm": 0.7810453176498413, "learning_rate": 1.739349593495935e-05, "loss": 0.0693, "step": 32060 }, { "epoch": 0.3910365853658537, "grad_norm": 0.8598901033401489, "learning_rate": 1.739308943089431e-05, "loss": 0.0591, "step": 32065 }, { "epoch": 0.39109756097560977, "grad_norm": 0.6309653520584106, "learning_rate": 1.739268292682927e-05, "loss": 0.0621, "step": 32070 }, { "epoch": 0.39115853658536587, "grad_norm": 0.45041534304618835, "learning_rate": 1.739227642276423e-05, "loss": 0.0667, "step": 32075 }, { "epoch": 0.39121951219512197, "grad_norm": 4.281717777252197, "learning_rate": 1.739186991869919e-05, "loss": 0.0903, "step": 32080 }, { "epoch": 0.39128048780487806, "grad_norm": 1.6004695892333984, "learning_rate": 1.7391463414634147e-05, "loss": 0.0743, "step": 32085 }, { "epoch": 0.39134146341463416, "grad_norm": 0.876018762588501, "learning_rate": 1.7391056910569105e-05, "loss": 0.1221, "step": 32090 }, { "epoch": 0.39140243902439026, "grad_norm": 0.6399013996124268, "learning_rate": 1.7390650406504067e-05, "loss": 0.0745, "step": 32095 }, { "epoch": 0.39146341463414636, "grad_norm": 0.4551844894886017, "learning_rate": 1.7390243902439025e-05, "loss": 0.0859, "step": 32100 }, { "epoch": 0.39152439024390245, "grad_norm": 0.6042444705963135, "learning_rate": 1.7389837398373986e-05, "loss": 0.0852, "step": 32105 }, { "epoch": 0.39158536585365855, "grad_norm": 0.6986109018325806, "learning_rate": 1.7389430894308945e-05, "loss": 0.076, "step": 32110 }, { "epoch": 0.39164634146341465, "grad_norm": 1.4408131837844849, "learning_rate": 1.7389024390243906e-05, "loss": 0.123, "step": 32115 }, { "epoch": 0.39170731707317075, "grad_norm": 0.5344316363334656, "learning_rate": 1.738861788617886e-05, "loss": 0.0663, "step": 32120 }, { "epoch": 0.39176829268292684, "grad_norm": 0.8175415992736816, "learning_rate": 1.7388211382113822e-05, "loss": 0.1347, "step": 32125 }, { "epoch": 0.39182926829268294, "grad_norm": 0.690942645072937, "learning_rate": 1.738780487804878e-05, "loss": 0.0768, "step": 32130 }, { "epoch": 0.39189024390243904, "grad_norm": 1.1165351867675781, "learning_rate": 1.7387398373983742e-05, "loss": 0.0797, "step": 32135 }, { "epoch": 0.39195121951219514, "grad_norm": 0.5352873206138611, "learning_rate": 1.73869918699187e-05, "loss": 0.0815, "step": 32140 }, { "epoch": 0.39201219512195123, "grad_norm": 0.9748357534408569, "learning_rate": 1.738658536585366e-05, "loss": 0.1107, "step": 32145 }, { "epoch": 0.39207317073170733, "grad_norm": 1.3880771398544312, "learning_rate": 1.738617886178862e-05, "loss": 0.0677, "step": 32150 }, { "epoch": 0.39213414634146343, "grad_norm": 0.630460262298584, "learning_rate": 1.7385772357723578e-05, "loss": 0.0772, "step": 32155 }, { "epoch": 0.3921951219512195, "grad_norm": 0.46902555227279663, "learning_rate": 1.738536585365854e-05, "loss": 0.07, "step": 32160 }, { "epoch": 0.3922560975609756, "grad_norm": 0.7781217098236084, "learning_rate": 1.7384959349593498e-05, "loss": 0.12, "step": 32165 }, { "epoch": 0.3923170731707317, "grad_norm": 0.9349903464317322, "learning_rate": 1.7384552845528456e-05, "loss": 0.098, "step": 32170 }, { "epoch": 0.3923780487804878, "grad_norm": 0.7051296234130859, "learning_rate": 1.7384146341463417e-05, "loss": 0.0658, "step": 32175 }, { "epoch": 0.3924390243902439, "grad_norm": 1.5441997051239014, "learning_rate": 1.7383739837398375e-05, "loss": 0.0713, "step": 32180 }, { "epoch": 0.3925, "grad_norm": 0.5220882296562195, "learning_rate": 1.7383333333333333e-05, "loss": 0.0903, "step": 32185 }, { "epoch": 0.3925609756097561, "grad_norm": 0.9648686647415161, "learning_rate": 1.7382926829268295e-05, "loss": 0.0744, "step": 32190 }, { "epoch": 0.3926219512195122, "grad_norm": 0.8295511603355408, "learning_rate": 1.7382520325203253e-05, "loss": 0.0701, "step": 32195 }, { "epoch": 0.3926829268292683, "grad_norm": 0.6858791708946228, "learning_rate": 1.7382113821138215e-05, "loss": 0.0868, "step": 32200 }, { "epoch": 0.3927439024390244, "grad_norm": 0.6120128035545349, "learning_rate": 1.7381707317073173e-05, "loss": 0.074, "step": 32205 }, { "epoch": 0.3928048780487805, "grad_norm": 0.8189988136291504, "learning_rate": 1.738130081300813e-05, "loss": 0.0862, "step": 32210 }, { "epoch": 0.3928658536585366, "grad_norm": 0.6938765048980713, "learning_rate": 1.738089430894309e-05, "loss": 0.1008, "step": 32215 }, { "epoch": 0.3929268292682927, "grad_norm": 0.9052638411521912, "learning_rate": 1.738048780487805e-05, "loss": 0.0814, "step": 32220 }, { "epoch": 0.3929878048780488, "grad_norm": 0.821090817451477, "learning_rate": 1.738008130081301e-05, "loss": 0.1097, "step": 32225 }, { "epoch": 0.3930487804878049, "grad_norm": 0.5426235198974609, "learning_rate": 1.737967479674797e-05, "loss": 0.0643, "step": 32230 }, { "epoch": 0.393109756097561, "grad_norm": 0.8436091542243958, "learning_rate": 1.737926829268293e-05, "loss": 0.0798, "step": 32235 }, { "epoch": 0.3931707317073171, "grad_norm": 0.7972288131713867, "learning_rate": 1.7378861788617886e-05, "loss": 0.0697, "step": 32240 }, { "epoch": 0.3932317073170732, "grad_norm": 0.6274355053901672, "learning_rate": 1.7378455284552848e-05, "loss": 0.0976, "step": 32245 }, { "epoch": 0.3932926829268293, "grad_norm": 0.8110036849975586, "learning_rate": 1.7378048780487806e-05, "loss": 0.0523, "step": 32250 }, { "epoch": 0.3933536585365854, "grad_norm": 0.9900693297386169, "learning_rate": 1.7377642276422768e-05, "loss": 0.0611, "step": 32255 }, { "epoch": 0.3934146341463415, "grad_norm": 0.8384750485420227, "learning_rate": 1.7377235772357726e-05, "loss": 0.0739, "step": 32260 }, { "epoch": 0.3934756097560976, "grad_norm": 0.6868626475334167, "learning_rate": 1.7376829268292684e-05, "loss": 0.0658, "step": 32265 }, { "epoch": 0.3935365853658537, "grad_norm": 1.2792398929595947, "learning_rate": 1.7376422764227642e-05, "loss": 0.0545, "step": 32270 }, { "epoch": 0.3935975609756098, "grad_norm": 0.7470294833183289, "learning_rate": 1.7376016260162603e-05, "loss": 0.0814, "step": 32275 }, { "epoch": 0.39365853658536587, "grad_norm": 0.36625441908836365, "learning_rate": 1.737560975609756e-05, "loss": 0.0763, "step": 32280 }, { "epoch": 0.39371951219512197, "grad_norm": 0.9494574069976807, "learning_rate": 1.7375203252032523e-05, "loss": 0.1031, "step": 32285 }, { "epoch": 0.39378048780487807, "grad_norm": 0.5994823575019836, "learning_rate": 1.737479674796748e-05, "loss": 0.0856, "step": 32290 }, { "epoch": 0.39384146341463416, "grad_norm": 0.6576153635978699, "learning_rate": 1.7374390243902443e-05, "loss": 0.0993, "step": 32295 }, { "epoch": 0.39390243902439026, "grad_norm": 0.5395680665969849, "learning_rate": 1.7373983739837398e-05, "loss": 0.0809, "step": 32300 }, { "epoch": 0.39396341463414636, "grad_norm": 0.8588739633560181, "learning_rate": 1.737357723577236e-05, "loss": 0.099, "step": 32305 }, { "epoch": 0.39402439024390246, "grad_norm": 0.5731980204582214, "learning_rate": 1.7373170731707317e-05, "loss": 0.0531, "step": 32310 }, { "epoch": 0.39408536585365855, "grad_norm": 0.5805833339691162, "learning_rate": 1.737276422764228e-05, "loss": 0.0963, "step": 32315 }, { "epoch": 0.39414634146341465, "grad_norm": 1.1486625671386719, "learning_rate": 1.7372357723577237e-05, "loss": 0.0955, "step": 32320 }, { "epoch": 0.39420731707317075, "grad_norm": 1.010900855064392, "learning_rate": 1.73719512195122e-05, "loss": 0.0744, "step": 32325 }, { "epoch": 0.39426829268292685, "grad_norm": 0.6267419457435608, "learning_rate": 1.7371544715447156e-05, "loss": 0.0691, "step": 32330 }, { "epoch": 0.39432926829268294, "grad_norm": 0.9156337976455688, "learning_rate": 1.7371138211382115e-05, "loss": 0.0762, "step": 32335 }, { "epoch": 0.39439024390243904, "grad_norm": 1.3961660861968994, "learning_rate": 1.7370731707317076e-05, "loss": 0.0836, "step": 32340 }, { "epoch": 0.39445121951219514, "grad_norm": 1.1636674404144287, "learning_rate": 1.7370325203252034e-05, "loss": 0.1, "step": 32345 }, { "epoch": 0.39451219512195124, "grad_norm": 0.4024312496185303, "learning_rate": 1.7369918699186992e-05, "loss": 0.0608, "step": 32350 }, { "epoch": 0.39457317073170733, "grad_norm": 0.6423847675323486, "learning_rate": 1.7369512195121954e-05, "loss": 0.0855, "step": 32355 }, { "epoch": 0.39463414634146343, "grad_norm": 0.6511117815971375, "learning_rate": 1.7369105691056912e-05, "loss": 0.0489, "step": 32360 }, { "epoch": 0.39469512195121953, "grad_norm": 0.6658669710159302, "learning_rate": 1.736869918699187e-05, "loss": 0.0545, "step": 32365 }, { "epoch": 0.3947560975609756, "grad_norm": 1.2603681087493896, "learning_rate": 1.736829268292683e-05, "loss": 0.0818, "step": 32370 }, { "epoch": 0.3948170731707317, "grad_norm": 0.5668669939041138, "learning_rate": 1.736788617886179e-05, "loss": 0.0775, "step": 32375 }, { "epoch": 0.3948780487804878, "grad_norm": 1.3392988443374634, "learning_rate": 1.736747967479675e-05, "loss": 0.0907, "step": 32380 }, { "epoch": 0.3949390243902439, "grad_norm": 0.574027419090271, "learning_rate": 1.736707317073171e-05, "loss": 0.0693, "step": 32385 }, { "epoch": 0.395, "grad_norm": 1.192718267440796, "learning_rate": 1.7366666666666668e-05, "loss": 0.0819, "step": 32390 }, { "epoch": 0.3950609756097561, "grad_norm": 0.47581109404563904, "learning_rate": 1.7366260162601626e-05, "loss": 0.0653, "step": 32395 }, { "epoch": 0.3951219512195122, "grad_norm": 0.925046980381012, "learning_rate": 1.7365853658536587e-05, "loss": 0.0796, "step": 32400 }, { "epoch": 0.3951829268292683, "grad_norm": 0.6842674016952515, "learning_rate": 1.7365447154471545e-05, "loss": 0.0819, "step": 32405 }, { "epoch": 0.3952439024390244, "grad_norm": 1.1623640060424805, "learning_rate": 1.7365040650406507e-05, "loss": 0.061, "step": 32410 }, { "epoch": 0.3953048780487805, "grad_norm": 0.3790771961212158, "learning_rate": 1.7364634146341465e-05, "loss": 0.067, "step": 32415 }, { "epoch": 0.3953658536585366, "grad_norm": 0.8355051279067993, "learning_rate": 1.7364227642276423e-05, "loss": 0.0664, "step": 32420 }, { "epoch": 0.3954268292682927, "grad_norm": 0.499169260263443, "learning_rate": 1.7363821138211385e-05, "loss": 0.0432, "step": 32425 }, { "epoch": 0.3954878048780488, "grad_norm": 0.5339771509170532, "learning_rate": 1.7363414634146343e-05, "loss": 0.0596, "step": 32430 }, { "epoch": 0.3955487804878049, "grad_norm": 0.5539813041687012, "learning_rate": 1.73630081300813e-05, "loss": 0.0719, "step": 32435 }, { "epoch": 0.395609756097561, "grad_norm": 0.7561915516853333, "learning_rate": 1.7362601626016262e-05, "loss": 0.0876, "step": 32440 }, { "epoch": 0.3956707317073171, "grad_norm": 0.5935922861099243, "learning_rate": 1.736219512195122e-05, "loss": 0.0699, "step": 32445 }, { "epoch": 0.3957317073170732, "grad_norm": 0.8597303032875061, "learning_rate": 1.736178861788618e-05, "loss": 0.0783, "step": 32450 }, { "epoch": 0.3957926829268293, "grad_norm": 0.8028575778007507, "learning_rate": 1.736138211382114e-05, "loss": 0.0742, "step": 32455 }, { "epoch": 0.3958536585365854, "grad_norm": 0.8849906325340271, "learning_rate": 1.7360975609756098e-05, "loss": 0.0767, "step": 32460 }, { "epoch": 0.3959146341463415, "grad_norm": 0.47077158093452454, "learning_rate": 1.736056910569106e-05, "loss": 0.0804, "step": 32465 }, { "epoch": 0.3959756097560976, "grad_norm": 0.9465569257736206, "learning_rate": 1.7360162601626018e-05, "loss": 0.1147, "step": 32470 }, { "epoch": 0.3960365853658537, "grad_norm": 0.617671012878418, "learning_rate": 1.735975609756098e-05, "loss": 0.104, "step": 32475 }, { "epoch": 0.3960975609756098, "grad_norm": 0.5493221282958984, "learning_rate": 1.7359349593495934e-05, "loss": 0.1111, "step": 32480 }, { "epoch": 0.3961585365853659, "grad_norm": 0.5496509075164795, "learning_rate": 1.7358943089430896e-05, "loss": 0.1196, "step": 32485 }, { "epoch": 0.39621951219512197, "grad_norm": 0.9734542369842529, "learning_rate": 1.7358536585365854e-05, "loss": 0.1203, "step": 32490 }, { "epoch": 0.39628048780487807, "grad_norm": 0.8053534030914307, "learning_rate": 1.7358130081300815e-05, "loss": 0.0716, "step": 32495 }, { "epoch": 0.39634146341463417, "grad_norm": 0.49177247285842896, "learning_rate": 1.7357723577235773e-05, "loss": 0.0751, "step": 32500 }, { "epoch": 0.39640243902439026, "grad_norm": 1.261271595954895, "learning_rate": 1.7357317073170735e-05, "loss": 0.0853, "step": 32505 }, { "epoch": 0.39646341463414636, "grad_norm": 0.8647883534431458, "learning_rate": 1.7356910569105693e-05, "loss": 0.0823, "step": 32510 }, { "epoch": 0.39652439024390246, "grad_norm": 0.6883294582366943, "learning_rate": 1.735650406504065e-05, "loss": 0.089, "step": 32515 }, { "epoch": 0.39658536585365856, "grad_norm": 0.5414827466011047, "learning_rate": 1.7356097560975613e-05, "loss": 0.0691, "step": 32520 }, { "epoch": 0.39664634146341465, "grad_norm": 0.6197112202644348, "learning_rate": 1.735569105691057e-05, "loss": 0.0701, "step": 32525 }, { "epoch": 0.39670731707317075, "grad_norm": 0.5517252683639526, "learning_rate": 1.735528455284553e-05, "loss": 0.0787, "step": 32530 }, { "epoch": 0.39676829268292685, "grad_norm": 0.7867411971092224, "learning_rate": 1.735487804878049e-05, "loss": 0.1343, "step": 32535 }, { "epoch": 0.39682926829268295, "grad_norm": 0.6435123085975647, "learning_rate": 1.735447154471545e-05, "loss": 0.0726, "step": 32540 }, { "epoch": 0.39689024390243904, "grad_norm": 0.8115198612213135, "learning_rate": 1.7354065040650407e-05, "loss": 0.0655, "step": 32545 }, { "epoch": 0.39695121951219514, "grad_norm": 0.6596391201019287, "learning_rate": 1.7353658536585368e-05, "loss": 0.0777, "step": 32550 }, { "epoch": 0.39701219512195124, "grad_norm": 0.8103286623954773, "learning_rate": 1.7353252032520326e-05, "loss": 0.0781, "step": 32555 }, { "epoch": 0.39707317073170734, "grad_norm": 0.46980392932891846, "learning_rate": 1.7352845528455288e-05, "loss": 0.0582, "step": 32560 }, { "epoch": 0.39713414634146343, "grad_norm": 0.7283908128738403, "learning_rate": 1.7352439024390246e-05, "loss": 0.0919, "step": 32565 }, { "epoch": 0.39719512195121953, "grad_norm": 1.1971311569213867, "learning_rate": 1.7352032520325204e-05, "loss": 0.0654, "step": 32570 }, { "epoch": 0.39725609756097563, "grad_norm": 0.7275471091270447, "learning_rate": 1.7351626016260162e-05, "loss": 0.0943, "step": 32575 }, { "epoch": 0.3973170731707317, "grad_norm": 0.7581620216369629, "learning_rate": 1.7351219512195124e-05, "loss": 0.0639, "step": 32580 }, { "epoch": 0.3973780487804878, "grad_norm": 0.4213206171989441, "learning_rate": 1.7350813008130082e-05, "loss": 0.0697, "step": 32585 }, { "epoch": 0.3974390243902439, "grad_norm": 0.8950481414794922, "learning_rate": 1.7350406504065043e-05, "loss": 0.0623, "step": 32590 }, { "epoch": 0.3975, "grad_norm": 0.8503230214118958, "learning_rate": 1.735e-05, "loss": 0.0607, "step": 32595 }, { "epoch": 0.3975609756097561, "grad_norm": 0.5725491046905518, "learning_rate": 1.734959349593496e-05, "loss": 0.0903, "step": 32600 }, { "epoch": 0.3976219512195122, "grad_norm": 0.5545499920845032, "learning_rate": 1.734918699186992e-05, "loss": 0.0752, "step": 32605 }, { "epoch": 0.3976829268292683, "grad_norm": 0.9925257563591003, "learning_rate": 1.734878048780488e-05, "loss": 0.1093, "step": 32610 }, { "epoch": 0.3977439024390244, "grad_norm": 0.7169824242591858, "learning_rate": 1.7348373983739837e-05, "loss": 0.0676, "step": 32615 }, { "epoch": 0.3978048780487805, "grad_norm": 0.7542220950126648, "learning_rate": 1.73479674796748e-05, "loss": 0.0918, "step": 32620 }, { "epoch": 0.3978658536585366, "grad_norm": 0.45816314220428467, "learning_rate": 1.7347560975609757e-05, "loss": 0.071, "step": 32625 }, { "epoch": 0.3979268292682927, "grad_norm": 0.6169711351394653, "learning_rate": 1.7347154471544715e-05, "loss": 0.0564, "step": 32630 }, { "epoch": 0.3979878048780488, "grad_norm": 0.645895779132843, "learning_rate": 1.7346747967479677e-05, "loss": 0.0721, "step": 32635 }, { "epoch": 0.3980487804878049, "grad_norm": 1.082602858543396, "learning_rate": 1.7346341463414635e-05, "loss": 0.0769, "step": 32640 }, { "epoch": 0.398109756097561, "grad_norm": 0.6130085587501526, "learning_rate": 1.7345934959349596e-05, "loss": 0.0998, "step": 32645 }, { "epoch": 0.3981707317073171, "grad_norm": 0.5374337434768677, "learning_rate": 1.7345528455284555e-05, "loss": 0.0729, "step": 32650 }, { "epoch": 0.3982317073170732, "grad_norm": 0.43729838728904724, "learning_rate": 1.7345121951219516e-05, "loss": 0.0616, "step": 32655 }, { "epoch": 0.3982926829268293, "grad_norm": 0.4180046021938324, "learning_rate": 1.734471544715447e-05, "loss": 0.0569, "step": 32660 }, { "epoch": 0.3983536585365854, "grad_norm": 0.7809275388717651, "learning_rate": 1.7344308943089432e-05, "loss": 0.0775, "step": 32665 }, { "epoch": 0.3984146341463415, "grad_norm": 0.5768985152244568, "learning_rate": 1.734390243902439e-05, "loss": 0.0953, "step": 32670 }, { "epoch": 0.3984756097560976, "grad_norm": 0.7230982780456543, "learning_rate": 1.7343495934959352e-05, "loss": 0.0975, "step": 32675 }, { "epoch": 0.3985365853658537, "grad_norm": 0.5192663073539734, "learning_rate": 1.734308943089431e-05, "loss": 0.0575, "step": 32680 }, { "epoch": 0.3985975609756098, "grad_norm": 0.523134171962738, "learning_rate": 1.734268292682927e-05, "loss": 0.0792, "step": 32685 }, { "epoch": 0.3986585365853659, "grad_norm": 0.7362083792686462, "learning_rate": 1.734227642276423e-05, "loss": 0.0644, "step": 32690 }, { "epoch": 0.398719512195122, "grad_norm": 0.9261457324028015, "learning_rate": 1.7341869918699188e-05, "loss": 0.0661, "step": 32695 }, { "epoch": 0.39878048780487807, "grad_norm": 0.6242994666099548, "learning_rate": 1.7341463414634146e-05, "loss": 0.0662, "step": 32700 }, { "epoch": 0.39884146341463417, "grad_norm": 0.5980279445648193, "learning_rate": 1.7341056910569108e-05, "loss": 0.0667, "step": 32705 }, { "epoch": 0.39890243902439027, "grad_norm": 1.24030339717865, "learning_rate": 1.7340650406504066e-05, "loss": 0.067, "step": 32710 }, { "epoch": 0.39896341463414636, "grad_norm": 0.7016456723213196, "learning_rate": 1.7340243902439027e-05, "loss": 0.1048, "step": 32715 }, { "epoch": 0.39902439024390246, "grad_norm": 0.6532663106918335, "learning_rate": 1.7339837398373985e-05, "loss": 0.0698, "step": 32720 }, { "epoch": 0.39908536585365856, "grad_norm": 0.6789445877075195, "learning_rate": 1.7339430894308943e-05, "loss": 0.0668, "step": 32725 }, { "epoch": 0.39914634146341466, "grad_norm": 0.41300755739212036, "learning_rate": 1.7339024390243905e-05, "loss": 0.0756, "step": 32730 }, { "epoch": 0.39920731707317075, "grad_norm": 0.8431628346443176, "learning_rate": 1.7338617886178863e-05, "loss": 0.0683, "step": 32735 }, { "epoch": 0.39926829268292685, "grad_norm": 0.5639294981956482, "learning_rate": 1.7338211382113825e-05, "loss": 0.07, "step": 32740 }, { "epoch": 0.39932926829268295, "grad_norm": 1.0226695537567139, "learning_rate": 1.7337804878048783e-05, "loss": 0.0818, "step": 32745 }, { "epoch": 0.39939024390243905, "grad_norm": 0.8728288412094116, "learning_rate": 1.733739837398374e-05, "loss": 0.0753, "step": 32750 }, { "epoch": 0.39945121951219514, "grad_norm": 0.45674562454223633, "learning_rate": 1.73369918699187e-05, "loss": 0.1002, "step": 32755 }, { "epoch": 0.39951219512195124, "grad_norm": 0.556033730506897, "learning_rate": 1.733658536585366e-05, "loss": 0.0778, "step": 32760 }, { "epoch": 0.39957317073170734, "grad_norm": 1.4391260147094727, "learning_rate": 1.733617886178862e-05, "loss": 0.072, "step": 32765 }, { "epoch": 0.39963414634146344, "grad_norm": 0.5442115068435669, "learning_rate": 1.733577235772358e-05, "loss": 0.06, "step": 32770 }, { "epoch": 0.39969512195121953, "grad_norm": 0.4989195764064789, "learning_rate": 1.7335365853658538e-05, "loss": 0.0903, "step": 32775 }, { "epoch": 0.39975609756097563, "grad_norm": 0.5686408877372742, "learning_rate": 1.7334959349593496e-05, "loss": 0.0539, "step": 32780 }, { "epoch": 0.39981707317073173, "grad_norm": 0.7328367233276367, "learning_rate": 1.7334552845528454e-05, "loss": 0.0539, "step": 32785 }, { "epoch": 0.3998780487804878, "grad_norm": 0.6407731771469116, "learning_rate": 1.7334146341463416e-05, "loss": 0.0856, "step": 32790 }, { "epoch": 0.3999390243902439, "grad_norm": 1.8545688390731812, "learning_rate": 1.7333739837398374e-05, "loss": 0.0837, "step": 32795 }, { "epoch": 0.4, "grad_norm": 0.7958287596702576, "learning_rate": 1.7333333333333336e-05, "loss": 0.0892, "step": 32800 }, { "epoch": 0.4000609756097561, "grad_norm": 0.545572817325592, "learning_rate": 1.7332926829268294e-05, "loss": 0.0786, "step": 32805 }, { "epoch": 0.4001219512195122, "grad_norm": 0.8646335005760193, "learning_rate": 1.7332520325203252e-05, "loss": 0.0814, "step": 32810 }, { "epoch": 0.4001829268292683, "grad_norm": 0.4022528827190399, "learning_rate": 1.7332113821138213e-05, "loss": 0.0617, "step": 32815 }, { "epoch": 0.4002439024390244, "grad_norm": 0.5819014310836792, "learning_rate": 1.733170731707317e-05, "loss": 0.0972, "step": 32820 }, { "epoch": 0.4003048780487805, "grad_norm": 0.8585871458053589, "learning_rate": 1.7331300813008133e-05, "loss": 0.0589, "step": 32825 }, { "epoch": 0.4003658536585366, "grad_norm": 1.0780665874481201, "learning_rate": 1.733089430894309e-05, "loss": 0.0947, "step": 32830 }, { "epoch": 0.4004268292682927, "grad_norm": 0.7609168291091919, "learning_rate": 1.7330487804878053e-05, "loss": 0.0671, "step": 32835 }, { "epoch": 0.4004878048780488, "grad_norm": 0.5734328627586365, "learning_rate": 1.7330081300813007e-05, "loss": 0.0926, "step": 32840 }, { "epoch": 0.4005487804878049, "grad_norm": 0.8221361637115479, "learning_rate": 1.732967479674797e-05, "loss": 0.0843, "step": 32845 }, { "epoch": 0.400609756097561, "grad_norm": 0.3502409756183624, "learning_rate": 1.7329268292682927e-05, "loss": 0.0869, "step": 32850 }, { "epoch": 0.4006707317073171, "grad_norm": 0.41693758964538574, "learning_rate": 1.732886178861789e-05, "loss": 0.0676, "step": 32855 }, { "epoch": 0.4007317073170732, "grad_norm": 0.6662185788154602, "learning_rate": 1.7328455284552847e-05, "loss": 0.0821, "step": 32860 }, { "epoch": 0.4007926829268293, "grad_norm": 0.6329765319824219, "learning_rate": 1.7328048780487808e-05, "loss": 0.0811, "step": 32865 }, { "epoch": 0.4008536585365854, "grad_norm": 0.9025928378105164, "learning_rate": 1.7327642276422766e-05, "loss": 0.0946, "step": 32870 }, { "epoch": 0.4009146341463415, "grad_norm": 0.8724666237831116, "learning_rate": 1.7327235772357725e-05, "loss": 0.0734, "step": 32875 }, { "epoch": 0.4009756097560976, "grad_norm": 0.4766683280467987, "learning_rate": 1.7326829268292683e-05, "loss": 0.0867, "step": 32880 }, { "epoch": 0.4010365853658537, "grad_norm": 1.2221629619598389, "learning_rate": 1.7326422764227644e-05, "loss": 0.0854, "step": 32885 }, { "epoch": 0.4010975609756098, "grad_norm": 0.6348473429679871, "learning_rate": 1.7326016260162602e-05, "loss": 0.0729, "step": 32890 }, { "epoch": 0.4011585365853659, "grad_norm": 1.2055877447128296, "learning_rate": 1.7325609756097564e-05, "loss": 0.1023, "step": 32895 }, { "epoch": 0.401219512195122, "grad_norm": 0.852263867855072, "learning_rate": 1.7325203252032522e-05, "loss": 0.0707, "step": 32900 }, { "epoch": 0.40128048780487807, "grad_norm": 0.7250267863273621, "learning_rate": 1.732479674796748e-05, "loss": 0.0812, "step": 32905 }, { "epoch": 0.40134146341463417, "grad_norm": 0.7211191654205322, "learning_rate": 1.732439024390244e-05, "loss": 0.0783, "step": 32910 }, { "epoch": 0.40140243902439027, "grad_norm": 1.2961268424987793, "learning_rate": 1.73239837398374e-05, "loss": 0.1159, "step": 32915 }, { "epoch": 0.40146341463414636, "grad_norm": 0.5585896968841553, "learning_rate": 1.732357723577236e-05, "loss": 0.0646, "step": 32920 }, { "epoch": 0.40152439024390246, "grad_norm": 2.339130163192749, "learning_rate": 1.732317073170732e-05, "loss": 0.1048, "step": 32925 }, { "epoch": 0.40158536585365856, "grad_norm": 0.5493901371955872, "learning_rate": 1.7322764227642277e-05, "loss": 0.066, "step": 32930 }, { "epoch": 0.40164634146341466, "grad_norm": 0.48407381772994995, "learning_rate": 1.7322357723577236e-05, "loss": 0.1002, "step": 32935 }, { "epoch": 0.40170731707317076, "grad_norm": 0.46850821375846863, "learning_rate": 1.7321951219512197e-05, "loss": 0.1127, "step": 32940 }, { "epoch": 0.40176829268292685, "grad_norm": 1.1518876552581787, "learning_rate": 1.7321544715447155e-05, "loss": 0.075, "step": 32945 }, { "epoch": 0.40182926829268295, "grad_norm": 1.0556374788284302, "learning_rate": 1.7321138211382117e-05, "loss": 0.1157, "step": 32950 }, { "epoch": 0.40189024390243905, "grad_norm": 0.5961774587631226, "learning_rate": 1.7320731707317075e-05, "loss": 0.0516, "step": 32955 }, { "epoch": 0.40195121951219515, "grad_norm": 1.4346777200698853, "learning_rate": 1.7320325203252033e-05, "loss": 0.082, "step": 32960 }, { "epoch": 0.40201219512195124, "grad_norm": 0.7427476048469543, "learning_rate": 1.731991869918699e-05, "loss": 0.0681, "step": 32965 }, { "epoch": 0.40207317073170734, "grad_norm": 1.2666840553283691, "learning_rate": 1.7319512195121953e-05, "loss": 0.0862, "step": 32970 }, { "epoch": 0.40213414634146344, "grad_norm": 0.8096301555633545, "learning_rate": 1.731910569105691e-05, "loss": 0.1121, "step": 32975 }, { "epoch": 0.40219512195121954, "grad_norm": 0.5713385343551636, "learning_rate": 1.7318699186991872e-05, "loss": 0.0488, "step": 32980 }, { "epoch": 0.40225609756097563, "grad_norm": 0.5117807984352112, "learning_rate": 1.731829268292683e-05, "loss": 0.1062, "step": 32985 }, { "epoch": 0.40231707317073173, "grad_norm": 1.6052626371383667, "learning_rate": 1.731788617886179e-05, "loss": 0.1065, "step": 32990 }, { "epoch": 0.40237804878048783, "grad_norm": 0.7457941770553589, "learning_rate": 1.731747967479675e-05, "loss": 0.0856, "step": 32995 }, { "epoch": 0.4024390243902439, "grad_norm": 0.5602076053619385, "learning_rate": 1.7317073170731708e-05, "loss": 0.0584, "step": 33000 }, { "epoch": 0.4025, "grad_norm": 0.49353447556495667, "learning_rate": 1.731666666666667e-05, "loss": 0.1127, "step": 33005 }, { "epoch": 0.4025609756097561, "grad_norm": 0.8946630954742432, "learning_rate": 1.7316260162601628e-05, "loss": 0.0859, "step": 33010 }, { "epoch": 0.4026219512195122, "grad_norm": 0.47066715359687805, "learning_rate": 1.731585365853659e-05, "loss": 0.0547, "step": 33015 }, { "epoch": 0.4026829268292683, "grad_norm": 0.9367210865020752, "learning_rate": 1.7315447154471544e-05, "loss": 0.0807, "step": 33020 }, { "epoch": 0.4027439024390244, "grad_norm": 0.6175686120986938, "learning_rate": 1.7315040650406506e-05, "loss": 0.0575, "step": 33025 }, { "epoch": 0.4028048780487805, "grad_norm": 0.6703305840492249, "learning_rate": 1.7314634146341464e-05, "loss": 0.0909, "step": 33030 }, { "epoch": 0.4028658536585366, "grad_norm": 1.057142734527588, "learning_rate": 1.7314227642276425e-05, "loss": 0.1021, "step": 33035 }, { "epoch": 0.4029268292682927, "grad_norm": 0.4582759439945221, "learning_rate": 1.7313821138211383e-05, "loss": 0.1339, "step": 33040 }, { "epoch": 0.4029878048780488, "grad_norm": 0.8261247277259827, "learning_rate": 1.7313414634146345e-05, "loss": 0.0897, "step": 33045 }, { "epoch": 0.4030487804878049, "grad_norm": 1.3029574155807495, "learning_rate": 1.73130081300813e-05, "loss": 0.1053, "step": 33050 }, { "epoch": 0.403109756097561, "grad_norm": 2.648730754852295, "learning_rate": 1.731260162601626e-05, "loss": 0.0619, "step": 33055 }, { "epoch": 0.4031707317073171, "grad_norm": 1.3607761859893799, "learning_rate": 1.731219512195122e-05, "loss": 0.0648, "step": 33060 }, { "epoch": 0.4032317073170732, "grad_norm": 0.4777655601501465, "learning_rate": 1.731178861788618e-05, "loss": 0.0594, "step": 33065 }, { "epoch": 0.4032926829268293, "grad_norm": 0.9320659637451172, "learning_rate": 1.731138211382114e-05, "loss": 0.0932, "step": 33070 }, { "epoch": 0.4033536585365854, "grad_norm": 0.5713777542114258, "learning_rate": 1.73109756097561e-05, "loss": 0.0603, "step": 33075 }, { "epoch": 0.4034146341463415, "grad_norm": 0.535554826259613, "learning_rate": 1.731056910569106e-05, "loss": 0.1004, "step": 33080 }, { "epoch": 0.4034756097560976, "grad_norm": 0.6011165976524353, "learning_rate": 1.7310162601626017e-05, "loss": 0.0761, "step": 33085 }, { "epoch": 0.4035365853658537, "grad_norm": 0.6709082126617432, "learning_rate": 1.7309756097560978e-05, "loss": 0.0793, "step": 33090 }, { "epoch": 0.4035975609756098, "grad_norm": 0.49825188517570496, "learning_rate": 1.7309349593495936e-05, "loss": 0.0977, "step": 33095 }, { "epoch": 0.4036585365853659, "grad_norm": 0.4382503628730774, "learning_rate": 1.7308943089430898e-05, "loss": 0.0802, "step": 33100 }, { "epoch": 0.403719512195122, "grad_norm": 0.543374240398407, "learning_rate": 1.7308536585365856e-05, "loss": 0.09, "step": 33105 }, { "epoch": 0.4037804878048781, "grad_norm": 1.1634057760238647, "learning_rate": 1.7308130081300814e-05, "loss": 0.1082, "step": 33110 }, { "epoch": 0.40384146341463417, "grad_norm": 0.9197266697883606, "learning_rate": 1.7307723577235772e-05, "loss": 0.0719, "step": 33115 }, { "epoch": 0.40390243902439027, "grad_norm": 1.953238844871521, "learning_rate": 1.7307317073170734e-05, "loss": 0.0756, "step": 33120 }, { "epoch": 0.40396341463414637, "grad_norm": 0.6185066103935242, "learning_rate": 1.7306910569105692e-05, "loss": 0.0993, "step": 33125 }, { "epoch": 0.40402439024390246, "grad_norm": 0.667745053768158, "learning_rate": 1.7306504065040653e-05, "loss": 0.1087, "step": 33130 }, { "epoch": 0.40408536585365856, "grad_norm": 0.5377657413482666, "learning_rate": 1.730609756097561e-05, "loss": 0.082, "step": 33135 }, { "epoch": 0.40414634146341466, "grad_norm": 0.9267685413360596, "learning_rate": 1.730569105691057e-05, "loss": 0.0478, "step": 33140 }, { "epoch": 0.40420731707317076, "grad_norm": 0.6589311957359314, "learning_rate": 1.7305284552845528e-05, "loss": 0.0713, "step": 33145 }, { "epoch": 0.40426829268292686, "grad_norm": 0.5511266589164734, "learning_rate": 1.730487804878049e-05, "loss": 0.0565, "step": 33150 }, { "epoch": 0.40432926829268295, "grad_norm": 0.5244184732437134, "learning_rate": 1.7304471544715447e-05, "loss": 0.0938, "step": 33155 }, { "epoch": 0.40439024390243905, "grad_norm": 1.190529704093933, "learning_rate": 1.730406504065041e-05, "loss": 0.0685, "step": 33160 }, { "epoch": 0.40445121951219515, "grad_norm": 0.588539183139801, "learning_rate": 1.7303658536585367e-05, "loss": 0.0729, "step": 33165 }, { "epoch": 0.40451219512195125, "grad_norm": 0.42208239436149597, "learning_rate": 1.7303252032520325e-05, "loss": 0.0817, "step": 33170 }, { "epoch": 0.40457317073170734, "grad_norm": 0.5112527012825012, "learning_rate": 1.7302845528455287e-05, "loss": 0.0491, "step": 33175 }, { "epoch": 0.40463414634146344, "grad_norm": 0.5120124220848083, "learning_rate": 1.7302439024390245e-05, "loss": 0.0839, "step": 33180 }, { "epoch": 0.40469512195121954, "grad_norm": 0.8810582756996155, "learning_rate": 1.7302032520325206e-05, "loss": 0.0834, "step": 33185 }, { "epoch": 0.40475609756097564, "grad_norm": 0.4289785921573639, "learning_rate": 1.7301626016260164e-05, "loss": 0.0594, "step": 33190 }, { "epoch": 0.40481707317073173, "grad_norm": 1.264824390411377, "learning_rate": 1.7301219512195123e-05, "loss": 0.0815, "step": 33195 }, { "epoch": 0.40487804878048783, "grad_norm": 0.3142927587032318, "learning_rate": 1.730081300813008e-05, "loss": 0.0632, "step": 33200 }, { "epoch": 0.40493902439024393, "grad_norm": 0.714739203453064, "learning_rate": 1.7300406504065042e-05, "loss": 0.1116, "step": 33205 }, { "epoch": 0.405, "grad_norm": 0.8260610699653625, "learning_rate": 1.73e-05, "loss": 0.0822, "step": 33210 }, { "epoch": 0.4050609756097561, "grad_norm": 0.47510233521461487, "learning_rate": 1.7299593495934962e-05, "loss": 0.0707, "step": 33215 }, { "epoch": 0.4051219512195122, "grad_norm": 0.8898782730102539, "learning_rate": 1.729918699186992e-05, "loss": 0.091, "step": 33220 }, { "epoch": 0.4051829268292683, "grad_norm": 1.5361452102661133, "learning_rate": 1.729878048780488e-05, "loss": 0.1236, "step": 33225 }, { "epoch": 0.4052439024390244, "grad_norm": 0.57980877161026, "learning_rate": 1.7298373983739836e-05, "loss": 0.1016, "step": 33230 }, { "epoch": 0.4053048780487805, "grad_norm": 1.0166677236557007, "learning_rate": 1.7297967479674798e-05, "loss": 0.1129, "step": 33235 }, { "epoch": 0.4053658536585366, "grad_norm": 1.0754942893981934, "learning_rate": 1.7297560975609756e-05, "loss": 0.075, "step": 33240 }, { "epoch": 0.4054268292682927, "grad_norm": 3.2012641429901123, "learning_rate": 1.7297154471544717e-05, "loss": 0.068, "step": 33245 }, { "epoch": 0.4054878048780488, "grad_norm": 0.40412241220474243, "learning_rate": 1.7296747967479676e-05, "loss": 0.0719, "step": 33250 }, { "epoch": 0.4055487804878049, "grad_norm": 0.49157506227493286, "learning_rate": 1.7296341463414637e-05, "loss": 0.081, "step": 33255 }, { "epoch": 0.405609756097561, "grad_norm": 0.7015171051025391, "learning_rate": 1.7295934959349595e-05, "loss": 0.0852, "step": 33260 }, { "epoch": 0.4056707317073171, "grad_norm": 0.26484382152557373, "learning_rate": 1.7295528455284553e-05, "loss": 0.0628, "step": 33265 }, { "epoch": 0.4057317073170732, "grad_norm": 0.7472266554832458, "learning_rate": 1.7295121951219515e-05, "loss": 0.0797, "step": 33270 }, { "epoch": 0.4057926829268293, "grad_norm": 0.46443215012550354, "learning_rate": 1.7294715447154473e-05, "loss": 0.0777, "step": 33275 }, { "epoch": 0.4058536585365854, "grad_norm": 0.552558958530426, "learning_rate": 1.7294308943089434e-05, "loss": 0.0935, "step": 33280 }, { "epoch": 0.4059146341463415, "grad_norm": 0.516065239906311, "learning_rate": 1.7293902439024393e-05, "loss": 0.0603, "step": 33285 }, { "epoch": 0.4059756097560976, "grad_norm": 1.047438621520996, "learning_rate": 1.729349593495935e-05, "loss": 0.0868, "step": 33290 }, { "epoch": 0.4060365853658537, "grad_norm": 0.5172978043556213, "learning_rate": 1.729308943089431e-05, "loss": 0.0866, "step": 33295 }, { "epoch": 0.4060975609756098, "grad_norm": 0.6383371949195862, "learning_rate": 1.729268292682927e-05, "loss": 0.1206, "step": 33300 }, { "epoch": 0.4061585365853659, "grad_norm": 0.5051931738853455, "learning_rate": 1.729227642276423e-05, "loss": 0.0723, "step": 33305 }, { "epoch": 0.406219512195122, "grad_norm": 0.9144957661628723, "learning_rate": 1.729186991869919e-05, "loss": 0.0919, "step": 33310 }, { "epoch": 0.406280487804878, "grad_norm": 1.1059173345565796, "learning_rate": 1.7291463414634148e-05, "loss": 0.0837, "step": 33315 }, { "epoch": 0.4063414634146341, "grad_norm": 0.7310409545898438, "learning_rate": 1.7291056910569106e-05, "loss": 0.0458, "step": 33320 }, { "epoch": 0.4064024390243902, "grad_norm": 0.8633487820625305, "learning_rate": 1.7290650406504064e-05, "loss": 0.0662, "step": 33325 }, { "epoch": 0.4064634146341463, "grad_norm": 0.5769644975662231, "learning_rate": 1.7290243902439026e-05, "loss": 0.092, "step": 33330 }, { "epoch": 0.4065243902439024, "grad_norm": 0.66208815574646, "learning_rate": 1.7289837398373984e-05, "loss": 0.07, "step": 33335 }, { "epoch": 0.4065853658536585, "grad_norm": 0.5728813409805298, "learning_rate": 1.7289430894308946e-05, "loss": 0.0761, "step": 33340 }, { "epoch": 0.4066463414634146, "grad_norm": 0.6666140556335449, "learning_rate": 1.7289024390243904e-05, "loss": 0.0782, "step": 33345 }, { "epoch": 0.4067073170731707, "grad_norm": 1.186848521232605, "learning_rate": 1.7288617886178862e-05, "loss": 0.0489, "step": 33350 }, { "epoch": 0.4067682926829268, "grad_norm": 0.7716383934020996, "learning_rate": 1.7288211382113823e-05, "loss": 0.0804, "step": 33355 }, { "epoch": 0.4068292682926829, "grad_norm": 0.4926796555519104, "learning_rate": 1.728780487804878e-05, "loss": 0.0532, "step": 33360 }, { "epoch": 0.406890243902439, "grad_norm": 0.6538439393043518, "learning_rate": 1.7287398373983743e-05, "loss": 0.0735, "step": 33365 }, { "epoch": 0.4069512195121951, "grad_norm": 1.341504693031311, "learning_rate": 1.72869918699187e-05, "loss": 0.1106, "step": 33370 }, { "epoch": 0.4070121951219512, "grad_norm": 0.699651837348938, "learning_rate": 1.728658536585366e-05, "loss": 0.09, "step": 33375 }, { "epoch": 0.4070731707317073, "grad_norm": 0.7668724656105042, "learning_rate": 1.7286178861788617e-05, "loss": 0.069, "step": 33380 }, { "epoch": 0.4071341463414634, "grad_norm": 0.38804852962493896, "learning_rate": 1.728577235772358e-05, "loss": 0.0809, "step": 33385 }, { "epoch": 0.4071951219512195, "grad_norm": 0.6465084552764893, "learning_rate": 1.7285365853658537e-05, "loss": 0.0684, "step": 33390 }, { "epoch": 0.4072560975609756, "grad_norm": 0.608100950717926, "learning_rate": 1.72849593495935e-05, "loss": 0.1038, "step": 33395 }, { "epoch": 0.4073170731707317, "grad_norm": 0.8288813233375549, "learning_rate": 1.7284552845528457e-05, "loss": 0.0657, "step": 33400 }, { "epoch": 0.4073780487804878, "grad_norm": 0.5561397671699524, "learning_rate": 1.7284146341463418e-05, "loss": 0.1091, "step": 33405 }, { "epoch": 0.4074390243902439, "grad_norm": 0.5576291084289551, "learning_rate": 1.7283739837398373e-05, "loss": 0.0931, "step": 33410 }, { "epoch": 0.4075, "grad_norm": 0.5004430413246155, "learning_rate": 1.7283333333333334e-05, "loss": 0.0913, "step": 33415 }, { "epoch": 0.40756097560975607, "grad_norm": 1.0415719747543335, "learning_rate": 1.7282926829268293e-05, "loss": 0.0788, "step": 33420 }, { "epoch": 0.40762195121951217, "grad_norm": 0.6433011293411255, "learning_rate": 1.7282520325203254e-05, "loss": 0.0799, "step": 33425 }, { "epoch": 0.40768292682926827, "grad_norm": 0.6960341930389404, "learning_rate": 1.7282113821138212e-05, "loss": 0.0795, "step": 33430 }, { "epoch": 0.40774390243902436, "grad_norm": 0.5021090507507324, "learning_rate": 1.7281707317073174e-05, "loss": 0.0501, "step": 33435 }, { "epoch": 0.40780487804878046, "grad_norm": 1.2041724920272827, "learning_rate": 1.7281300813008132e-05, "loss": 0.0691, "step": 33440 }, { "epoch": 0.40786585365853656, "grad_norm": 0.5047111511230469, "learning_rate": 1.728089430894309e-05, "loss": 0.0616, "step": 33445 }, { "epoch": 0.40792682926829266, "grad_norm": 1.2315891981124878, "learning_rate": 1.728048780487805e-05, "loss": 0.0761, "step": 33450 }, { "epoch": 0.40798780487804875, "grad_norm": 0.40379834175109863, "learning_rate": 1.728008130081301e-05, "loss": 0.0665, "step": 33455 }, { "epoch": 0.40804878048780485, "grad_norm": 0.9576863646507263, "learning_rate": 1.7279674796747968e-05, "loss": 0.0742, "step": 33460 }, { "epoch": 0.40810975609756095, "grad_norm": 0.5043803453445435, "learning_rate": 1.727926829268293e-05, "loss": 0.0932, "step": 33465 }, { "epoch": 0.40817073170731705, "grad_norm": 0.818576991558075, "learning_rate": 1.7278861788617887e-05, "loss": 0.0904, "step": 33470 }, { "epoch": 0.40823170731707314, "grad_norm": 0.7107399106025696, "learning_rate": 1.7278455284552846e-05, "loss": 0.0745, "step": 33475 }, { "epoch": 0.40829268292682924, "grad_norm": 0.4486411213874817, "learning_rate": 1.7278048780487807e-05, "loss": 0.0991, "step": 33480 }, { "epoch": 0.40835365853658534, "grad_norm": 1.0402508974075317, "learning_rate": 1.7277642276422765e-05, "loss": 0.1034, "step": 33485 }, { "epoch": 0.40841463414634144, "grad_norm": 0.6294761896133423, "learning_rate": 1.7277235772357727e-05, "loss": 0.0492, "step": 33490 }, { "epoch": 0.40847560975609754, "grad_norm": 2.34535551071167, "learning_rate": 1.7276829268292685e-05, "loss": 0.0891, "step": 33495 }, { "epoch": 0.40853658536585363, "grad_norm": 0.5274110436439514, "learning_rate": 1.7276422764227643e-05, "loss": 0.0528, "step": 33500 }, { "epoch": 0.40859756097560973, "grad_norm": 0.72989821434021, "learning_rate": 1.72760162601626e-05, "loss": 0.0909, "step": 33505 }, { "epoch": 0.40865853658536583, "grad_norm": 0.635838508605957, "learning_rate": 1.7275609756097563e-05, "loss": 0.0682, "step": 33510 }, { "epoch": 0.4087195121951219, "grad_norm": 0.7790481448173523, "learning_rate": 1.727520325203252e-05, "loss": 0.0918, "step": 33515 }, { "epoch": 0.408780487804878, "grad_norm": 0.8059723377227783, "learning_rate": 1.7274796747967482e-05, "loss": 0.1211, "step": 33520 }, { "epoch": 0.4088414634146341, "grad_norm": 0.7775880098342896, "learning_rate": 1.727439024390244e-05, "loss": 0.0669, "step": 33525 }, { "epoch": 0.4089024390243902, "grad_norm": 0.5439852476119995, "learning_rate": 1.72739837398374e-05, "loss": 0.0626, "step": 33530 }, { "epoch": 0.4089634146341463, "grad_norm": 0.8022865653038025, "learning_rate": 1.727357723577236e-05, "loss": 0.0844, "step": 33535 }, { "epoch": 0.4090243902439024, "grad_norm": 0.36317217350006104, "learning_rate": 1.7273170731707318e-05, "loss": 0.0735, "step": 33540 }, { "epoch": 0.4090853658536585, "grad_norm": 0.9380449652671814, "learning_rate": 1.727276422764228e-05, "loss": 0.1497, "step": 33545 }, { "epoch": 0.4091463414634146, "grad_norm": 0.8773309588432312, "learning_rate": 1.7272357723577238e-05, "loss": 0.1001, "step": 33550 }, { "epoch": 0.4092073170731707, "grad_norm": 0.4887436628341675, "learning_rate": 1.7271951219512196e-05, "loss": 0.0689, "step": 33555 }, { "epoch": 0.4092682926829268, "grad_norm": 0.6134903430938721, "learning_rate": 1.7271544715447154e-05, "loss": 0.0908, "step": 33560 }, { "epoch": 0.4093292682926829, "grad_norm": 1.0705229043960571, "learning_rate": 1.7271138211382116e-05, "loss": 0.0998, "step": 33565 }, { "epoch": 0.409390243902439, "grad_norm": 0.5853023529052734, "learning_rate": 1.7270731707317074e-05, "loss": 0.0598, "step": 33570 }, { "epoch": 0.4094512195121951, "grad_norm": 0.5069100260734558, "learning_rate": 1.7270325203252035e-05, "loss": 0.0574, "step": 33575 }, { "epoch": 0.4095121951219512, "grad_norm": 0.7397735118865967, "learning_rate": 1.7269918699186993e-05, "loss": 0.0685, "step": 33580 }, { "epoch": 0.4095731707317073, "grad_norm": 0.6026504635810852, "learning_rate": 1.7269512195121955e-05, "loss": 0.0694, "step": 33585 }, { "epoch": 0.4096341463414634, "grad_norm": 0.5276340246200562, "learning_rate": 1.726910569105691e-05, "loss": 0.0503, "step": 33590 }, { "epoch": 0.4096951219512195, "grad_norm": 0.69249027967453, "learning_rate": 1.726869918699187e-05, "loss": 0.0768, "step": 33595 }, { "epoch": 0.4097560975609756, "grad_norm": 0.7338582873344421, "learning_rate": 1.726829268292683e-05, "loss": 0.0535, "step": 33600 }, { "epoch": 0.4098170731707317, "grad_norm": 0.9429292678833008, "learning_rate": 1.726788617886179e-05, "loss": 0.0518, "step": 33605 }, { "epoch": 0.4098780487804878, "grad_norm": 0.550512969493866, "learning_rate": 1.726747967479675e-05, "loss": 0.0926, "step": 33610 }, { "epoch": 0.4099390243902439, "grad_norm": 0.6637189984321594, "learning_rate": 1.726707317073171e-05, "loss": 0.077, "step": 33615 }, { "epoch": 0.41, "grad_norm": 0.7636752128601074, "learning_rate": 1.726666666666667e-05, "loss": 0.1025, "step": 33620 }, { "epoch": 0.4100609756097561, "grad_norm": 0.6678932309150696, "learning_rate": 1.7266260162601627e-05, "loss": 0.0797, "step": 33625 }, { "epoch": 0.41012195121951217, "grad_norm": 0.5363587737083435, "learning_rate": 1.7265853658536588e-05, "loss": 0.06, "step": 33630 }, { "epoch": 0.41018292682926827, "grad_norm": 0.7088111639022827, "learning_rate": 1.7265447154471546e-05, "loss": 0.0944, "step": 33635 }, { "epoch": 0.41024390243902437, "grad_norm": 0.8066406846046448, "learning_rate": 1.7265040650406504e-05, "loss": 0.0816, "step": 33640 }, { "epoch": 0.41030487804878046, "grad_norm": 0.6574663519859314, "learning_rate": 1.7264634146341466e-05, "loss": 0.0678, "step": 33645 }, { "epoch": 0.41036585365853656, "grad_norm": 1.0144922733306885, "learning_rate": 1.7264227642276424e-05, "loss": 0.1047, "step": 33650 }, { "epoch": 0.41042682926829266, "grad_norm": 0.6854569315910339, "learning_rate": 1.7263821138211382e-05, "loss": 0.0696, "step": 33655 }, { "epoch": 0.41048780487804876, "grad_norm": 0.7946650981903076, "learning_rate": 1.7263414634146344e-05, "loss": 0.0711, "step": 33660 }, { "epoch": 0.41054878048780485, "grad_norm": 1.091707706451416, "learning_rate": 1.7263008130081302e-05, "loss": 0.1219, "step": 33665 }, { "epoch": 0.41060975609756095, "grad_norm": 0.47485482692718506, "learning_rate": 1.7262601626016263e-05, "loss": 0.0668, "step": 33670 }, { "epoch": 0.41067073170731705, "grad_norm": 0.7352733612060547, "learning_rate": 1.726219512195122e-05, "loss": 0.068, "step": 33675 }, { "epoch": 0.41073170731707315, "grad_norm": 0.8382393717765808, "learning_rate": 1.726178861788618e-05, "loss": 0.1032, "step": 33680 }, { "epoch": 0.41079268292682924, "grad_norm": 0.5150150060653687, "learning_rate": 1.7261382113821138e-05, "loss": 0.0883, "step": 33685 }, { "epoch": 0.41085365853658534, "grad_norm": 0.6898802518844604, "learning_rate": 1.72609756097561e-05, "loss": 0.0835, "step": 33690 }, { "epoch": 0.41091463414634144, "grad_norm": 0.6992207765579224, "learning_rate": 1.7260569105691057e-05, "loss": 0.0854, "step": 33695 }, { "epoch": 0.41097560975609754, "grad_norm": 1.1994355916976929, "learning_rate": 1.726016260162602e-05, "loss": 0.0725, "step": 33700 }, { "epoch": 0.41103658536585364, "grad_norm": 0.5886557102203369, "learning_rate": 1.7259756097560977e-05, "loss": 0.0741, "step": 33705 }, { "epoch": 0.41109756097560973, "grad_norm": 0.7368704080581665, "learning_rate": 1.7259349593495935e-05, "loss": 0.0832, "step": 33710 }, { "epoch": 0.41115853658536583, "grad_norm": 0.7547730803489685, "learning_rate": 1.7258943089430897e-05, "loss": 0.0841, "step": 33715 }, { "epoch": 0.41121951219512193, "grad_norm": 0.48514512181282043, "learning_rate": 1.7258536585365855e-05, "loss": 0.0945, "step": 33720 }, { "epoch": 0.411280487804878, "grad_norm": 1.0408234596252441, "learning_rate": 1.7258130081300813e-05, "loss": 0.1116, "step": 33725 }, { "epoch": 0.4113414634146341, "grad_norm": 0.817808985710144, "learning_rate": 1.7257723577235774e-05, "loss": 0.1268, "step": 33730 }, { "epoch": 0.4114024390243902, "grad_norm": 0.6662054657936096, "learning_rate": 1.7257317073170733e-05, "loss": 0.072, "step": 33735 }, { "epoch": 0.4114634146341463, "grad_norm": 0.7728133797645569, "learning_rate": 1.725691056910569e-05, "loss": 0.07, "step": 33740 }, { "epoch": 0.4115243902439024, "grad_norm": 0.5612142086029053, "learning_rate": 1.7256504065040652e-05, "loss": 0.0761, "step": 33745 }, { "epoch": 0.4115853658536585, "grad_norm": 0.5532336831092834, "learning_rate": 1.725609756097561e-05, "loss": 0.0806, "step": 33750 }, { "epoch": 0.4116463414634146, "grad_norm": 0.7597324252128601, "learning_rate": 1.7255691056910572e-05, "loss": 0.1037, "step": 33755 }, { "epoch": 0.4117073170731707, "grad_norm": 5.6344170570373535, "learning_rate": 1.725528455284553e-05, "loss": 0.0725, "step": 33760 }, { "epoch": 0.4117682926829268, "grad_norm": 0.835698664188385, "learning_rate": 1.725487804878049e-05, "loss": 0.1107, "step": 33765 }, { "epoch": 0.4118292682926829, "grad_norm": 1.5944766998291016, "learning_rate": 1.7254471544715446e-05, "loss": 0.1059, "step": 33770 }, { "epoch": 0.411890243902439, "grad_norm": 0.3353375196456909, "learning_rate": 1.7254065040650408e-05, "loss": 0.1023, "step": 33775 }, { "epoch": 0.4119512195121951, "grad_norm": 1.1588174104690552, "learning_rate": 1.7253658536585366e-05, "loss": 0.0681, "step": 33780 }, { "epoch": 0.4120121951219512, "grad_norm": 0.4416610300540924, "learning_rate": 1.7253252032520327e-05, "loss": 0.0452, "step": 33785 }, { "epoch": 0.4120731707317073, "grad_norm": 0.6488233804702759, "learning_rate": 1.7252845528455285e-05, "loss": 0.103, "step": 33790 }, { "epoch": 0.4121341463414634, "grad_norm": 0.5150464177131653, "learning_rate": 1.7252439024390247e-05, "loss": 0.0683, "step": 33795 }, { "epoch": 0.4121951219512195, "grad_norm": 0.5164918899536133, "learning_rate": 1.7252032520325205e-05, "loss": 0.1136, "step": 33800 }, { "epoch": 0.4122560975609756, "grad_norm": 0.7627536654472351, "learning_rate": 1.7251626016260163e-05, "loss": 0.0503, "step": 33805 }, { "epoch": 0.4123170731707317, "grad_norm": 0.5518519878387451, "learning_rate": 1.7251219512195125e-05, "loss": 0.0673, "step": 33810 }, { "epoch": 0.4123780487804878, "grad_norm": 0.5149959325790405, "learning_rate": 1.7250813008130083e-05, "loss": 0.076, "step": 33815 }, { "epoch": 0.4124390243902439, "grad_norm": 0.6254158616065979, "learning_rate": 1.725040650406504e-05, "loss": 0.0634, "step": 33820 }, { "epoch": 0.4125, "grad_norm": 1.3400392532348633, "learning_rate": 1.7250000000000003e-05, "loss": 0.0552, "step": 33825 }, { "epoch": 0.4125609756097561, "grad_norm": 0.512737512588501, "learning_rate": 1.724959349593496e-05, "loss": 0.0976, "step": 33830 }, { "epoch": 0.4126219512195122, "grad_norm": 0.5535145401954651, "learning_rate": 1.724918699186992e-05, "loss": 0.0721, "step": 33835 }, { "epoch": 0.41268292682926827, "grad_norm": 0.25145602226257324, "learning_rate": 1.724878048780488e-05, "loss": 0.0596, "step": 33840 }, { "epoch": 0.41274390243902437, "grad_norm": 0.5794246792793274, "learning_rate": 1.724837398373984e-05, "loss": 0.0585, "step": 33845 }, { "epoch": 0.41280487804878047, "grad_norm": 0.6444226503372192, "learning_rate": 1.72479674796748e-05, "loss": 0.0647, "step": 33850 }, { "epoch": 0.41286585365853656, "grad_norm": 0.9661206603050232, "learning_rate": 1.7247560975609758e-05, "loss": 0.113, "step": 33855 }, { "epoch": 0.41292682926829266, "grad_norm": 0.6042037010192871, "learning_rate": 1.7247154471544716e-05, "loss": 0.115, "step": 33860 }, { "epoch": 0.41298780487804876, "grad_norm": 3.6394197940826416, "learning_rate": 1.7246747967479674e-05, "loss": 0.074, "step": 33865 }, { "epoch": 0.41304878048780486, "grad_norm": 1.948388695716858, "learning_rate": 1.7246341463414636e-05, "loss": 0.0736, "step": 33870 }, { "epoch": 0.41310975609756095, "grad_norm": 0.7907043695449829, "learning_rate": 1.7245934959349594e-05, "loss": 0.0712, "step": 33875 }, { "epoch": 0.41317073170731705, "grad_norm": 0.7792668342590332, "learning_rate": 1.7245528455284556e-05, "loss": 0.0984, "step": 33880 }, { "epoch": 0.41323170731707315, "grad_norm": 0.8261346220970154, "learning_rate": 1.7245121951219514e-05, "loss": 0.0824, "step": 33885 }, { "epoch": 0.41329268292682925, "grad_norm": 0.5629845261573792, "learning_rate": 1.7244715447154472e-05, "loss": 0.0784, "step": 33890 }, { "epoch": 0.41335365853658534, "grad_norm": 0.6269645690917969, "learning_rate": 1.7244308943089433e-05, "loss": 0.0624, "step": 33895 }, { "epoch": 0.41341463414634144, "grad_norm": 0.6733598709106445, "learning_rate": 1.724390243902439e-05, "loss": 0.0797, "step": 33900 }, { "epoch": 0.41347560975609754, "grad_norm": 0.6649341583251953, "learning_rate": 1.724349593495935e-05, "loss": 0.0856, "step": 33905 }, { "epoch": 0.41353658536585364, "grad_norm": 0.5843542218208313, "learning_rate": 1.724308943089431e-05, "loss": 0.0785, "step": 33910 }, { "epoch": 0.41359756097560973, "grad_norm": 0.43129780888557434, "learning_rate": 1.724268292682927e-05, "loss": 0.0478, "step": 33915 }, { "epoch": 0.41365853658536583, "grad_norm": 0.6749055981636047, "learning_rate": 1.7242276422764227e-05, "loss": 0.0717, "step": 33920 }, { "epoch": 0.41371951219512193, "grad_norm": 0.6021131277084351, "learning_rate": 1.724186991869919e-05, "loss": 0.0528, "step": 33925 }, { "epoch": 0.413780487804878, "grad_norm": 0.5152398943901062, "learning_rate": 1.7241463414634147e-05, "loss": 0.0598, "step": 33930 }, { "epoch": 0.4138414634146341, "grad_norm": 1.03791081905365, "learning_rate": 1.724105691056911e-05, "loss": 0.114, "step": 33935 }, { "epoch": 0.4139024390243902, "grad_norm": 0.7358627915382385, "learning_rate": 1.7240650406504067e-05, "loss": 0.0605, "step": 33940 }, { "epoch": 0.4139634146341463, "grad_norm": 0.38020315766334534, "learning_rate": 1.7240243902439028e-05, "loss": 0.0897, "step": 33945 }, { "epoch": 0.4140243902439024, "grad_norm": 1.0979293584823608, "learning_rate": 1.7239837398373983e-05, "loss": 0.0753, "step": 33950 }, { "epoch": 0.4140853658536585, "grad_norm": 1.5676404237747192, "learning_rate": 1.7239430894308944e-05, "loss": 0.1027, "step": 33955 }, { "epoch": 0.4141463414634146, "grad_norm": 0.5891348719596863, "learning_rate": 1.7239024390243902e-05, "loss": 0.0571, "step": 33960 }, { "epoch": 0.4142073170731707, "grad_norm": 1.5031877756118774, "learning_rate": 1.7238617886178864e-05, "loss": 0.075, "step": 33965 }, { "epoch": 0.4142682926829268, "grad_norm": 0.3968318998813629, "learning_rate": 1.7238211382113822e-05, "loss": 0.0621, "step": 33970 }, { "epoch": 0.4143292682926829, "grad_norm": 2.1287841796875, "learning_rate": 1.7237804878048784e-05, "loss": 0.0924, "step": 33975 }, { "epoch": 0.414390243902439, "grad_norm": 0.5111666321754456, "learning_rate": 1.7237398373983742e-05, "loss": 0.056, "step": 33980 }, { "epoch": 0.4144512195121951, "grad_norm": 0.44631174206733704, "learning_rate": 1.72369918699187e-05, "loss": 0.0618, "step": 33985 }, { "epoch": 0.4145121951219512, "grad_norm": 0.6474784016609192, "learning_rate": 1.7236585365853658e-05, "loss": 0.0749, "step": 33990 }, { "epoch": 0.4145731707317073, "grad_norm": 0.4714117646217346, "learning_rate": 1.723617886178862e-05, "loss": 0.1021, "step": 33995 }, { "epoch": 0.4146341463414634, "grad_norm": 0.7480970025062561, "learning_rate": 1.7235772357723578e-05, "loss": 0.0725, "step": 34000 }, { "epoch": 0.4146951219512195, "grad_norm": 0.6937733888626099, "learning_rate": 1.723536585365854e-05, "loss": 0.0684, "step": 34005 }, { "epoch": 0.4147560975609756, "grad_norm": 0.5142033696174622, "learning_rate": 1.7234959349593497e-05, "loss": 0.0613, "step": 34010 }, { "epoch": 0.4148170731707317, "grad_norm": 0.9897076487541199, "learning_rate": 1.7234552845528455e-05, "loss": 0.0478, "step": 34015 }, { "epoch": 0.4148780487804878, "grad_norm": 0.590419352054596, "learning_rate": 1.7234146341463417e-05, "loss": 0.0703, "step": 34020 }, { "epoch": 0.4149390243902439, "grad_norm": 1.0980859994888306, "learning_rate": 1.7233739837398375e-05, "loss": 0.0855, "step": 34025 }, { "epoch": 0.415, "grad_norm": 0.7149935960769653, "learning_rate": 1.7233333333333337e-05, "loss": 0.0707, "step": 34030 }, { "epoch": 0.4150609756097561, "grad_norm": 0.8168052434921265, "learning_rate": 1.7232926829268295e-05, "loss": 0.1055, "step": 34035 }, { "epoch": 0.4151219512195122, "grad_norm": 0.41226446628570557, "learning_rate": 1.7232520325203253e-05, "loss": 0.0602, "step": 34040 }, { "epoch": 0.4151829268292683, "grad_norm": 0.6157629489898682, "learning_rate": 1.723211382113821e-05, "loss": 0.084, "step": 34045 }, { "epoch": 0.41524390243902437, "grad_norm": 0.24352134764194489, "learning_rate": 1.7231707317073173e-05, "loss": 0.0658, "step": 34050 }, { "epoch": 0.41530487804878047, "grad_norm": 0.5640297532081604, "learning_rate": 1.723130081300813e-05, "loss": 0.0898, "step": 34055 }, { "epoch": 0.41536585365853657, "grad_norm": 0.8486080765724182, "learning_rate": 1.7230894308943092e-05, "loss": 0.0813, "step": 34060 }, { "epoch": 0.41542682926829266, "grad_norm": 0.4593299329280853, "learning_rate": 1.723048780487805e-05, "loss": 0.0641, "step": 34065 }, { "epoch": 0.41548780487804876, "grad_norm": 1.1184953451156616, "learning_rate": 1.723008130081301e-05, "loss": 0.1235, "step": 34070 }, { "epoch": 0.41554878048780486, "grad_norm": 0.5782505869865417, "learning_rate": 1.722967479674797e-05, "loss": 0.078, "step": 34075 }, { "epoch": 0.41560975609756096, "grad_norm": 0.5790871381759644, "learning_rate": 1.7229268292682928e-05, "loss": 0.0698, "step": 34080 }, { "epoch": 0.41567073170731705, "grad_norm": 0.8051581382751465, "learning_rate": 1.7228861788617886e-05, "loss": 0.0667, "step": 34085 }, { "epoch": 0.41573170731707315, "grad_norm": 0.49593713879585266, "learning_rate": 1.7228455284552848e-05, "loss": 0.0869, "step": 34090 }, { "epoch": 0.41579268292682925, "grad_norm": 0.7090794444084167, "learning_rate": 1.7228048780487806e-05, "loss": 0.0975, "step": 34095 }, { "epoch": 0.41585365853658535, "grad_norm": 0.6019740104675293, "learning_rate": 1.7227642276422764e-05, "loss": 0.0872, "step": 34100 }, { "epoch": 0.41591463414634144, "grad_norm": 0.7611421346664429, "learning_rate": 1.7227235772357725e-05, "loss": 0.0595, "step": 34105 }, { "epoch": 0.41597560975609754, "grad_norm": 0.796192467212677, "learning_rate": 1.7226829268292684e-05, "loss": 0.0799, "step": 34110 }, { "epoch": 0.41603658536585364, "grad_norm": 0.9988760352134705, "learning_rate": 1.7226422764227645e-05, "loss": 0.106, "step": 34115 }, { "epoch": 0.41609756097560974, "grad_norm": 0.9303354620933533, "learning_rate": 1.7226016260162603e-05, "loss": 0.0679, "step": 34120 }, { "epoch": 0.41615853658536583, "grad_norm": 0.8456496000289917, "learning_rate": 1.7225609756097565e-05, "loss": 0.0984, "step": 34125 }, { "epoch": 0.41621951219512193, "grad_norm": 0.8913552761077881, "learning_rate": 1.722520325203252e-05, "loss": 0.0741, "step": 34130 }, { "epoch": 0.41628048780487803, "grad_norm": 0.4536431133747101, "learning_rate": 1.722479674796748e-05, "loss": 0.054, "step": 34135 }, { "epoch": 0.4163414634146341, "grad_norm": 2.194385528564453, "learning_rate": 1.722439024390244e-05, "loss": 0.0781, "step": 34140 }, { "epoch": 0.4164024390243902, "grad_norm": 0.4309386909008026, "learning_rate": 1.72239837398374e-05, "loss": 0.061, "step": 34145 }, { "epoch": 0.4164634146341463, "grad_norm": 2.4137072563171387, "learning_rate": 1.722357723577236e-05, "loss": 0.0893, "step": 34150 }, { "epoch": 0.4165243902439024, "grad_norm": 0.5579776763916016, "learning_rate": 1.722317073170732e-05, "loss": 0.063, "step": 34155 }, { "epoch": 0.4165853658536585, "grad_norm": 0.9879333972930908, "learning_rate": 1.722276422764228e-05, "loss": 0.0775, "step": 34160 }, { "epoch": 0.4166463414634146, "grad_norm": 0.6807560324668884, "learning_rate": 1.7222357723577237e-05, "loss": 0.0723, "step": 34165 }, { "epoch": 0.4167073170731707, "grad_norm": 0.9164482951164246, "learning_rate": 1.7221951219512195e-05, "loss": 0.0797, "step": 34170 }, { "epoch": 0.4167682926829268, "grad_norm": 0.6412872076034546, "learning_rate": 1.7221544715447156e-05, "loss": 0.0725, "step": 34175 }, { "epoch": 0.4168292682926829, "grad_norm": 1.4943290948867798, "learning_rate": 1.7221138211382114e-05, "loss": 0.0709, "step": 34180 }, { "epoch": 0.416890243902439, "grad_norm": 0.6060147881507874, "learning_rate": 1.7220731707317076e-05, "loss": 0.1039, "step": 34185 }, { "epoch": 0.4169512195121951, "grad_norm": 0.6433326005935669, "learning_rate": 1.7220325203252034e-05, "loss": 0.0703, "step": 34190 }, { "epoch": 0.4170121951219512, "grad_norm": 0.4827021062374115, "learning_rate": 1.7219918699186992e-05, "loss": 0.1286, "step": 34195 }, { "epoch": 0.4170731707317073, "grad_norm": 0.6853476166725159, "learning_rate": 1.7219512195121954e-05, "loss": 0.075, "step": 34200 }, { "epoch": 0.4171341463414634, "grad_norm": 0.46429553627967834, "learning_rate": 1.7219105691056912e-05, "loss": 0.0708, "step": 34205 }, { "epoch": 0.4171951219512195, "grad_norm": 0.9186665415763855, "learning_rate": 1.7218699186991873e-05, "loss": 0.0938, "step": 34210 }, { "epoch": 0.4172560975609756, "grad_norm": 0.9437711238861084, "learning_rate": 1.721829268292683e-05, "loss": 0.1011, "step": 34215 }, { "epoch": 0.4173170731707317, "grad_norm": 0.6954092383384705, "learning_rate": 1.721788617886179e-05, "loss": 0.0758, "step": 34220 }, { "epoch": 0.4173780487804878, "grad_norm": 0.7216512560844421, "learning_rate": 1.7217479674796748e-05, "loss": 0.0887, "step": 34225 }, { "epoch": 0.4174390243902439, "grad_norm": 0.6860384345054626, "learning_rate": 1.721707317073171e-05, "loss": 0.0801, "step": 34230 }, { "epoch": 0.4175, "grad_norm": 0.7005438804626465, "learning_rate": 1.7216666666666667e-05, "loss": 0.0534, "step": 34235 }, { "epoch": 0.4175609756097561, "grad_norm": 0.31027501821517944, "learning_rate": 1.721626016260163e-05, "loss": 0.0772, "step": 34240 }, { "epoch": 0.4176219512195122, "grad_norm": 0.3756175935268402, "learning_rate": 1.7215853658536587e-05, "loss": 0.0776, "step": 34245 }, { "epoch": 0.4176829268292683, "grad_norm": 0.808934211730957, "learning_rate": 1.7215447154471545e-05, "loss": 0.0665, "step": 34250 }, { "epoch": 0.4177439024390244, "grad_norm": 1.101189374923706, "learning_rate": 1.7215040650406503e-05, "loss": 0.0938, "step": 34255 }, { "epoch": 0.41780487804878047, "grad_norm": 0.5713682174682617, "learning_rate": 1.7214634146341465e-05, "loss": 0.0942, "step": 34260 }, { "epoch": 0.41786585365853657, "grad_norm": 0.6575173735618591, "learning_rate": 1.7214227642276423e-05, "loss": 0.0873, "step": 34265 }, { "epoch": 0.41792682926829267, "grad_norm": 0.696456789970398, "learning_rate": 1.7213821138211384e-05, "loss": 0.0771, "step": 34270 }, { "epoch": 0.41798780487804876, "grad_norm": 1.6031324863433838, "learning_rate": 1.7213414634146342e-05, "loss": 0.1138, "step": 34275 }, { "epoch": 0.41804878048780486, "grad_norm": 0.46469226479530334, "learning_rate": 1.72130081300813e-05, "loss": 0.0747, "step": 34280 }, { "epoch": 0.41810975609756096, "grad_norm": 0.6964375376701355, "learning_rate": 1.7212601626016262e-05, "loss": 0.0804, "step": 34285 }, { "epoch": 0.41817073170731706, "grad_norm": 0.6229690909385681, "learning_rate": 1.721219512195122e-05, "loss": 0.0704, "step": 34290 }, { "epoch": 0.41823170731707315, "grad_norm": 0.8342777490615845, "learning_rate": 1.7211788617886182e-05, "loss": 0.084, "step": 34295 }, { "epoch": 0.41829268292682925, "grad_norm": 0.56179279088974, "learning_rate": 1.721138211382114e-05, "loss": 0.0848, "step": 34300 }, { "epoch": 0.41835365853658535, "grad_norm": 0.454949289560318, "learning_rate": 1.72109756097561e-05, "loss": 0.0676, "step": 34305 }, { "epoch": 0.41841463414634145, "grad_norm": 1.5961036682128906, "learning_rate": 1.7210569105691056e-05, "loss": 0.0786, "step": 34310 }, { "epoch": 0.41847560975609754, "grad_norm": 0.3479662239551544, "learning_rate": 1.7210162601626018e-05, "loss": 0.1153, "step": 34315 }, { "epoch": 0.41853658536585364, "grad_norm": 0.7783010601997375, "learning_rate": 1.7209756097560976e-05, "loss": 0.0696, "step": 34320 }, { "epoch": 0.41859756097560974, "grad_norm": 1.3183776140213013, "learning_rate": 1.7209349593495937e-05, "loss": 0.0772, "step": 34325 }, { "epoch": 0.41865853658536584, "grad_norm": 0.44099777936935425, "learning_rate": 1.7208943089430895e-05, "loss": 0.0899, "step": 34330 }, { "epoch": 0.41871951219512193, "grad_norm": 1.0177900791168213, "learning_rate": 1.7208536585365857e-05, "loss": 0.0932, "step": 34335 }, { "epoch": 0.41878048780487803, "grad_norm": 0.6013968586921692, "learning_rate": 1.7208130081300815e-05, "loss": 0.1033, "step": 34340 }, { "epoch": 0.41884146341463413, "grad_norm": 2.1667778491973877, "learning_rate": 1.7207723577235773e-05, "loss": 0.1075, "step": 34345 }, { "epoch": 0.4189024390243902, "grad_norm": 0.5236873030662537, "learning_rate": 1.720731707317073e-05, "loss": 0.0747, "step": 34350 }, { "epoch": 0.4189634146341463, "grad_norm": 0.8657574653625488, "learning_rate": 1.7206910569105693e-05, "loss": 0.0909, "step": 34355 }, { "epoch": 0.4190243902439024, "grad_norm": 0.7896395325660706, "learning_rate": 1.720650406504065e-05, "loss": 0.077, "step": 34360 }, { "epoch": 0.4190853658536585, "grad_norm": 1.5436574220657349, "learning_rate": 1.7206097560975612e-05, "loss": 0.1002, "step": 34365 }, { "epoch": 0.4191463414634146, "grad_norm": 0.3721030056476593, "learning_rate": 1.720569105691057e-05, "loss": 0.0986, "step": 34370 }, { "epoch": 0.4192073170731707, "grad_norm": 0.9596165418624878, "learning_rate": 1.720528455284553e-05, "loss": 0.0804, "step": 34375 }, { "epoch": 0.4192682926829268, "grad_norm": 0.6491621732711792, "learning_rate": 1.720487804878049e-05, "loss": 0.0915, "step": 34380 }, { "epoch": 0.4193292682926829, "grad_norm": 0.8694333434104919, "learning_rate": 1.720447154471545e-05, "loss": 0.0798, "step": 34385 }, { "epoch": 0.419390243902439, "grad_norm": 0.7398212552070618, "learning_rate": 1.720406504065041e-05, "loss": 0.0566, "step": 34390 }, { "epoch": 0.4194512195121951, "grad_norm": 0.5236810445785522, "learning_rate": 1.7203658536585368e-05, "loss": 0.1103, "step": 34395 }, { "epoch": 0.4195121951219512, "grad_norm": 0.5717010498046875, "learning_rate": 1.7203252032520326e-05, "loss": 0.0535, "step": 34400 }, { "epoch": 0.4195731707317073, "grad_norm": 0.666022777557373, "learning_rate": 1.7202845528455284e-05, "loss": 0.0622, "step": 34405 }, { "epoch": 0.4196341463414634, "grad_norm": 0.4323486089706421, "learning_rate": 1.7202439024390246e-05, "loss": 0.0587, "step": 34410 }, { "epoch": 0.4196951219512195, "grad_norm": 0.7759457230567932, "learning_rate": 1.7202032520325204e-05, "loss": 0.0717, "step": 34415 }, { "epoch": 0.4197560975609756, "grad_norm": 0.9200199842453003, "learning_rate": 1.7201626016260165e-05, "loss": 0.0739, "step": 34420 }, { "epoch": 0.4198170731707317, "grad_norm": 0.41078588366508484, "learning_rate": 1.7201219512195124e-05, "loss": 0.0771, "step": 34425 }, { "epoch": 0.4198780487804878, "grad_norm": 0.7804189324378967, "learning_rate": 1.7200813008130082e-05, "loss": 0.0641, "step": 34430 }, { "epoch": 0.4199390243902439, "grad_norm": 0.7665279507637024, "learning_rate": 1.720040650406504e-05, "loss": 0.0935, "step": 34435 }, { "epoch": 0.42, "grad_norm": 0.6112057566642761, "learning_rate": 1.72e-05, "loss": 0.0878, "step": 34440 }, { "epoch": 0.4200609756097561, "grad_norm": 1.0916454792022705, "learning_rate": 1.719959349593496e-05, "loss": 0.0926, "step": 34445 }, { "epoch": 0.4201219512195122, "grad_norm": 0.818073570728302, "learning_rate": 1.719918699186992e-05, "loss": 0.0602, "step": 34450 }, { "epoch": 0.4201829268292683, "grad_norm": 2.0746238231658936, "learning_rate": 1.719878048780488e-05, "loss": 0.0745, "step": 34455 }, { "epoch": 0.4202439024390244, "grad_norm": 0.9019832015037537, "learning_rate": 1.7198373983739837e-05, "loss": 0.0826, "step": 34460 }, { "epoch": 0.4203048780487805, "grad_norm": 1.5627310276031494, "learning_rate": 1.71979674796748e-05, "loss": 0.0713, "step": 34465 }, { "epoch": 0.42036585365853657, "grad_norm": 0.5516285300254822, "learning_rate": 1.7197560975609757e-05, "loss": 0.0588, "step": 34470 }, { "epoch": 0.42042682926829267, "grad_norm": 0.6362646818161011, "learning_rate": 1.719715447154472e-05, "loss": 0.0987, "step": 34475 }, { "epoch": 0.42048780487804877, "grad_norm": 0.617476761341095, "learning_rate": 1.7196747967479677e-05, "loss": 0.0929, "step": 34480 }, { "epoch": 0.42054878048780486, "grad_norm": 0.787614107131958, "learning_rate": 1.7196341463414638e-05, "loss": 0.0715, "step": 34485 }, { "epoch": 0.42060975609756096, "grad_norm": 0.6692150831222534, "learning_rate": 1.7195934959349593e-05, "loss": 0.0603, "step": 34490 }, { "epoch": 0.42067073170731706, "grad_norm": 1.0261479616165161, "learning_rate": 1.7195528455284554e-05, "loss": 0.066, "step": 34495 }, { "epoch": 0.42073170731707316, "grad_norm": 0.5286292433738708, "learning_rate": 1.7195121951219512e-05, "loss": 0.0833, "step": 34500 }, { "epoch": 0.42079268292682925, "grad_norm": 0.3942515552043915, "learning_rate": 1.7194715447154474e-05, "loss": 0.0884, "step": 34505 }, { "epoch": 0.42085365853658535, "grad_norm": 0.3863806128501892, "learning_rate": 1.7194308943089432e-05, "loss": 0.0512, "step": 34510 }, { "epoch": 0.42091463414634145, "grad_norm": 0.49560314416885376, "learning_rate": 1.7193902439024394e-05, "loss": 0.0967, "step": 34515 }, { "epoch": 0.42097560975609755, "grad_norm": 0.4817596971988678, "learning_rate": 1.719349593495935e-05, "loss": 0.0702, "step": 34520 }, { "epoch": 0.42103658536585364, "grad_norm": 1.6904789209365845, "learning_rate": 1.719308943089431e-05, "loss": 0.124, "step": 34525 }, { "epoch": 0.42109756097560974, "grad_norm": 1.0788480043411255, "learning_rate": 1.7192682926829268e-05, "loss": 0.1216, "step": 34530 }, { "epoch": 0.42115853658536584, "grad_norm": 0.9887358546257019, "learning_rate": 1.719227642276423e-05, "loss": 0.1126, "step": 34535 }, { "epoch": 0.42121951219512194, "grad_norm": 0.9150195717811584, "learning_rate": 1.7191869918699188e-05, "loss": 0.0746, "step": 34540 }, { "epoch": 0.42128048780487803, "grad_norm": 0.6468271017074585, "learning_rate": 1.719146341463415e-05, "loss": 0.0623, "step": 34545 }, { "epoch": 0.42134146341463413, "grad_norm": 0.9489278197288513, "learning_rate": 1.7191056910569107e-05, "loss": 0.0768, "step": 34550 }, { "epoch": 0.42140243902439023, "grad_norm": 0.42245668172836304, "learning_rate": 1.7190650406504065e-05, "loss": 0.0681, "step": 34555 }, { "epoch": 0.4214634146341463, "grad_norm": 0.37050655484199524, "learning_rate": 1.7190243902439027e-05, "loss": 0.06, "step": 34560 }, { "epoch": 0.4215243902439024, "grad_norm": 0.6945165991783142, "learning_rate": 1.7189837398373985e-05, "loss": 0.067, "step": 34565 }, { "epoch": 0.4215853658536585, "grad_norm": 0.5205278992652893, "learning_rate": 1.7189430894308947e-05, "loss": 0.0844, "step": 34570 }, { "epoch": 0.4216463414634146, "grad_norm": 0.5277308821678162, "learning_rate": 1.7189024390243905e-05, "loss": 0.0541, "step": 34575 }, { "epoch": 0.4217073170731707, "grad_norm": 0.8149511814117432, "learning_rate": 1.7188617886178863e-05, "loss": 0.0627, "step": 34580 }, { "epoch": 0.4217682926829268, "grad_norm": 0.8468625545501709, "learning_rate": 1.718821138211382e-05, "loss": 0.0787, "step": 34585 }, { "epoch": 0.4218292682926829, "grad_norm": 0.5843992233276367, "learning_rate": 1.7187804878048782e-05, "loss": 0.0728, "step": 34590 }, { "epoch": 0.421890243902439, "grad_norm": 0.5004850625991821, "learning_rate": 1.718739837398374e-05, "loss": 0.0735, "step": 34595 }, { "epoch": 0.4219512195121951, "grad_norm": 0.31655043363571167, "learning_rate": 1.7186991869918702e-05, "loss": 0.0694, "step": 34600 }, { "epoch": 0.4220121951219512, "grad_norm": 0.7053194642066956, "learning_rate": 1.718658536585366e-05, "loss": 0.0684, "step": 34605 }, { "epoch": 0.4220731707317073, "grad_norm": 1.1181519031524658, "learning_rate": 1.718617886178862e-05, "loss": 0.1361, "step": 34610 }, { "epoch": 0.4221341463414634, "grad_norm": 0.4007021486759186, "learning_rate": 1.7185772357723576e-05, "loss": 0.065, "step": 34615 }, { "epoch": 0.4221951219512195, "grad_norm": 1.0073686838150024, "learning_rate": 1.7185365853658538e-05, "loss": 0.0631, "step": 34620 }, { "epoch": 0.4222560975609756, "grad_norm": 0.5240823030471802, "learning_rate": 1.7184959349593496e-05, "loss": 0.0597, "step": 34625 }, { "epoch": 0.4223170731707317, "grad_norm": 0.31652477383613586, "learning_rate": 1.7184552845528458e-05, "loss": 0.0813, "step": 34630 }, { "epoch": 0.4223780487804878, "grad_norm": 0.38275346159935, "learning_rate": 1.7184146341463416e-05, "loss": 0.0638, "step": 34635 }, { "epoch": 0.4224390243902439, "grad_norm": 0.5375688672065735, "learning_rate": 1.7183739837398374e-05, "loss": 0.0714, "step": 34640 }, { "epoch": 0.4225, "grad_norm": 0.6669514775276184, "learning_rate": 1.7183333333333335e-05, "loss": 0.0637, "step": 34645 }, { "epoch": 0.4225609756097561, "grad_norm": 1.2253586053848267, "learning_rate": 1.7182926829268294e-05, "loss": 0.1038, "step": 34650 }, { "epoch": 0.4226219512195122, "grad_norm": 0.5510749816894531, "learning_rate": 1.7182520325203255e-05, "loss": 0.1189, "step": 34655 }, { "epoch": 0.4226829268292683, "grad_norm": 1.1716628074645996, "learning_rate": 1.7182113821138213e-05, "loss": 0.0663, "step": 34660 }, { "epoch": 0.4227439024390244, "grad_norm": 0.4448736608028412, "learning_rate": 1.718170731707317e-05, "loss": 0.0684, "step": 34665 }, { "epoch": 0.4228048780487805, "grad_norm": 0.759037971496582, "learning_rate": 1.718130081300813e-05, "loss": 0.0875, "step": 34670 }, { "epoch": 0.4228658536585366, "grad_norm": 0.6588261723518372, "learning_rate": 1.718089430894309e-05, "loss": 0.093, "step": 34675 }, { "epoch": 0.42292682926829267, "grad_norm": 0.7976267337799072, "learning_rate": 1.718048780487805e-05, "loss": 0.0737, "step": 34680 }, { "epoch": 0.42298780487804877, "grad_norm": 0.6966115236282349, "learning_rate": 1.718008130081301e-05, "loss": 0.0656, "step": 34685 }, { "epoch": 0.42304878048780487, "grad_norm": 1.3535499572753906, "learning_rate": 1.717967479674797e-05, "loss": 0.0829, "step": 34690 }, { "epoch": 0.42310975609756096, "grad_norm": 0.6562657356262207, "learning_rate": 1.717926829268293e-05, "loss": 0.0923, "step": 34695 }, { "epoch": 0.42317073170731706, "grad_norm": 0.44598546624183655, "learning_rate": 1.7178861788617885e-05, "loss": 0.0569, "step": 34700 }, { "epoch": 0.42323170731707316, "grad_norm": 0.6745259761810303, "learning_rate": 1.7178455284552846e-05, "loss": 0.0913, "step": 34705 }, { "epoch": 0.42329268292682926, "grad_norm": 0.3951910436153412, "learning_rate": 1.7178048780487805e-05, "loss": 0.0674, "step": 34710 }, { "epoch": 0.42335365853658535, "grad_norm": 0.4433436691761017, "learning_rate": 1.7177642276422766e-05, "loss": 0.0614, "step": 34715 }, { "epoch": 0.42341463414634145, "grad_norm": 5.94297456741333, "learning_rate": 1.7177235772357724e-05, "loss": 0.0826, "step": 34720 }, { "epoch": 0.42347560975609755, "grad_norm": 1.1659375429153442, "learning_rate": 1.7176829268292686e-05, "loss": 0.081, "step": 34725 }, { "epoch": 0.42353658536585365, "grad_norm": 0.7078565359115601, "learning_rate": 1.7176422764227644e-05, "loss": 0.1068, "step": 34730 }, { "epoch": 0.42359756097560974, "grad_norm": 0.35297438502311707, "learning_rate": 1.7176016260162602e-05, "loss": 0.0548, "step": 34735 }, { "epoch": 0.42365853658536584, "grad_norm": 0.6478835940361023, "learning_rate": 1.7175609756097564e-05, "loss": 0.0659, "step": 34740 }, { "epoch": 0.42371951219512194, "grad_norm": 0.5683725476264954, "learning_rate": 1.717520325203252e-05, "loss": 0.0648, "step": 34745 }, { "epoch": 0.42378048780487804, "grad_norm": 0.702181875705719, "learning_rate": 1.7174796747967483e-05, "loss": 0.0794, "step": 34750 }, { "epoch": 0.42384146341463413, "grad_norm": 0.5394810438156128, "learning_rate": 1.717439024390244e-05, "loss": 0.0778, "step": 34755 }, { "epoch": 0.42390243902439023, "grad_norm": 1.5647735595703125, "learning_rate": 1.71739837398374e-05, "loss": 0.0718, "step": 34760 }, { "epoch": 0.42396341463414633, "grad_norm": 0.5597065091133118, "learning_rate": 1.7173577235772358e-05, "loss": 0.0733, "step": 34765 }, { "epoch": 0.4240243902439024, "grad_norm": 1.0252869129180908, "learning_rate": 1.717317073170732e-05, "loss": 0.1003, "step": 34770 }, { "epoch": 0.4240853658536585, "grad_norm": 0.8113433718681335, "learning_rate": 1.7172764227642277e-05, "loss": 0.0768, "step": 34775 }, { "epoch": 0.4241463414634146, "grad_norm": 0.5090694427490234, "learning_rate": 1.717235772357724e-05, "loss": 0.0461, "step": 34780 }, { "epoch": 0.4242073170731707, "grad_norm": 1.4662697315216064, "learning_rate": 1.7171951219512197e-05, "loss": 0.0732, "step": 34785 }, { "epoch": 0.4242682926829268, "grad_norm": 1.501316785812378, "learning_rate": 1.7171544715447155e-05, "loss": 0.0696, "step": 34790 }, { "epoch": 0.4243292682926829, "grad_norm": 0.40140965580940247, "learning_rate": 1.7171138211382113e-05, "loss": 0.0582, "step": 34795 }, { "epoch": 0.424390243902439, "grad_norm": 0.7413012981414795, "learning_rate": 1.7170731707317075e-05, "loss": 0.0475, "step": 34800 }, { "epoch": 0.4244512195121951, "grad_norm": 0.8722193837165833, "learning_rate": 1.7170325203252033e-05, "loss": 0.0542, "step": 34805 }, { "epoch": 0.4245121951219512, "grad_norm": 0.581346333026886, "learning_rate": 1.7169918699186994e-05, "loss": 0.0691, "step": 34810 }, { "epoch": 0.4245731707317073, "grad_norm": 0.5750762224197388, "learning_rate": 1.7169512195121952e-05, "loss": 0.1045, "step": 34815 }, { "epoch": 0.4246341463414634, "grad_norm": 0.6655908226966858, "learning_rate": 1.716910569105691e-05, "loss": 0.0897, "step": 34820 }, { "epoch": 0.4246951219512195, "grad_norm": 0.5499227046966553, "learning_rate": 1.7168699186991872e-05, "loss": 0.0594, "step": 34825 }, { "epoch": 0.4247560975609756, "grad_norm": 0.5403394103050232, "learning_rate": 1.716829268292683e-05, "loss": 0.0981, "step": 34830 }, { "epoch": 0.4248170731707317, "grad_norm": 2.7715487480163574, "learning_rate": 1.716788617886179e-05, "loss": 0.1031, "step": 34835 }, { "epoch": 0.4248780487804878, "grad_norm": 0.4289516508579254, "learning_rate": 1.716747967479675e-05, "loss": 0.0707, "step": 34840 }, { "epoch": 0.4249390243902439, "grad_norm": 0.7713654637336731, "learning_rate": 1.7167073170731708e-05, "loss": 0.0924, "step": 34845 }, { "epoch": 0.425, "grad_norm": 0.5038262009620667, "learning_rate": 1.7166666666666666e-05, "loss": 0.0644, "step": 34850 }, { "epoch": 0.4250609756097561, "grad_norm": 0.7421058416366577, "learning_rate": 1.7166260162601628e-05, "loss": 0.0743, "step": 34855 }, { "epoch": 0.4251219512195122, "grad_norm": 4.061269283294678, "learning_rate": 1.7165853658536586e-05, "loss": 0.073, "step": 34860 }, { "epoch": 0.4251829268292683, "grad_norm": 0.6582024097442627, "learning_rate": 1.7165447154471547e-05, "loss": 0.0877, "step": 34865 }, { "epoch": 0.4252439024390244, "grad_norm": 0.7482523322105408, "learning_rate": 1.7165040650406505e-05, "loss": 0.0606, "step": 34870 }, { "epoch": 0.4253048780487805, "grad_norm": 0.6597718000411987, "learning_rate": 1.7164634146341467e-05, "loss": 0.0677, "step": 34875 }, { "epoch": 0.4253658536585366, "grad_norm": 0.7950534224510193, "learning_rate": 1.716422764227642e-05, "loss": 0.0802, "step": 34880 }, { "epoch": 0.42542682926829267, "grad_norm": 2.13803768157959, "learning_rate": 1.7163821138211383e-05, "loss": 0.0842, "step": 34885 }, { "epoch": 0.42548780487804877, "grad_norm": 1.9756516218185425, "learning_rate": 1.716341463414634e-05, "loss": 0.0861, "step": 34890 }, { "epoch": 0.42554878048780487, "grad_norm": 1.354567289352417, "learning_rate": 1.7163008130081303e-05, "loss": 0.0773, "step": 34895 }, { "epoch": 0.42560975609756097, "grad_norm": 1.0594981908798218, "learning_rate": 1.716260162601626e-05, "loss": 0.0586, "step": 34900 }, { "epoch": 0.42567073170731706, "grad_norm": 0.8559247851371765, "learning_rate": 1.7162195121951222e-05, "loss": 0.0636, "step": 34905 }, { "epoch": 0.42573170731707316, "grad_norm": 0.5188690423965454, "learning_rate": 1.716178861788618e-05, "loss": 0.0919, "step": 34910 }, { "epoch": 0.42579268292682926, "grad_norm": 1.829810619354248, "learning_rate": 1.716138211382114e-05, "loss": 0.0959, "step": 34915 }, { "epoch": 0.42585365853658536, "grad_norm": 0.5450523495674133, "learning_rate": 1.71609756097561e-05, "loss": 0.07, "step": 34920 }, { "epoch": 0.42591463414634145, "grad_norm": 1.0781505107879639, "learning_rate": 1.716056910569106e-05, "loss": 0.0561, "step": 34925 }, { "epoch": 0.42597560975609755, "grad_norm": 0.7176346778869629, "learning_rate": 1.7160162601626016e-05, "loss": 0.1014, "step": 34930 }, { "epoch": 0.42603658536585365, "grad_norm": 0.42088329792022705, "learning_rate": 1.7159756097560978e-05, "loss": 0.0706, "step": 34935 }, { "epoch": 0.42609756097560975, "grad_norm": 0.9312780499458313, "learning_rate": 1.7159349593495936e-05, "loss": 0.0656, "step": 34940 }, { "epoch": 0.42615853658536584, "grad_norm": 2.879854917526245, "learning_rate": 1.7158943089430894e-05, "loss": 0.082, "step": 34945 }, { "epoch": 0.42621951219512194, "grad_norm": 0.9402745962142944, "learning_rate": 1.7158536585365856e-05, "loss": 0.0893, "step": 34950 }, { "epoch": 0.42628048780487804, "grad_norm": 0.7892346978187561, "learning_rate": 1.7158130081300814e-05, "loss": 0.0764, "step": 34955 }, { "epoch": 0.42634146341463414, "grad_norm": 0.8350266218185425, "learning_rate": 1.7157723577235775e-05, "loss": 0.0997, "step": 34960 }, { "epoch": 0.42640243902439023, "grad_norm": 0.7587133049964905, "learning_rate": 1.7157317073170733e-05, "loss": 0.0585, "step": 34965 }, { "epoch": 0.42646341463414633, "grad_norm": 0.41223523020744324, "learning_rate": 1.715691056910569e-05, "loss": 0.0781, "step": 34970 }, { "epoch": 0.42652439024390243, "grad_norm": 0.5583617687225342, "learning_rate": 1.715650406504065e-05, "loss": 0.0943, "step": 34975 }, { "epoch": 0.4265853658536585, "grad_norm": 1.5598030090332031, "learning_rate": 1.715609756097561e-05, "loss": 0.0829, "step": 34980 }, { "epoch": 0.4266463414634146, "grad_norm": 0.8401039242744446, "learning_rate": 1.715569105691057e-05, "loss": 0.0602, "step": 34985 }, { "epoch": 0.4267073170731707, "grad_norm": 0.36176392436027527, "learning_rate": 1.715528455284553e-05, "loss": 0.0598, "step": 34990 }, { "epoch": 0.4267682926829268, "grad_norm": 0.547584056854248, "learning_rate": 1.715487804878049e-05, "loss": 0.0785, "step": 34995 }, { "epoch": 0.4268292682926829, "grad_norm": 1.1155872344970703, "learning_rate": 1.7154471544715447e-05, "loss": 0.0654, "step": 35000 }, { "epoch": 0.426890243902439, "grad_norm": 0.6957824230194092, "learning_rate": 1.715406504065041e-05, "loss": 0.0777, "step": 35005 }, { "epoch": 0.4269512195121951, "grad_norm": 0.6371099948883057, "learning_rate": 1.7153658536585367e-05, "loss": 0.0494, "step": 35010 }, { "epoch": 0.4270121951219512, "grad_norm": 1.055639386177063, "learning_rate": 1.715325203252033e-05, "loss": 0.0818, "step": 35015 }, { "epoch": 0.4270731707317073, "grad_norm": 1.487727403640747, "learning_rate": 1.7152845528455286e-05, "loss": 0.0881, "step": 35020 }, { "epoch": 0.4271341463414634, "grad_norm": 0.7026081681251526, "learning_rate": 1.7152439024390245e-05, "loss": 0.0712, "step": 35025 }, { "epoch": 0.4271951219512195, "grad_norm": 0.656732439994812, "learning_rate": 1.7152032520325203e-05, "loss": 0.0936, "step": 35030 }, { "epoch": 0.4272560975609756, "grad_norm": 0.7466926574707031, "learning_rate": 1.7151626016260164e-05, "loss": 0.1051, "step": 35035 }, { "epoch": 0.4273170731707317, "grad_norm": 0.6336780786514282, "learning_rate": 1.7151219512195122e-05, "loss": 0.0722, "step": 35040 }, { "epoch": 0.4273780487804878, "grad_norm": 1.5883525609970093, "learning_rate": 1.7150813008130084e-05, "loss": 0.074, "step": 35045 }, { "epoch": 0.4274390243902439, "grad_norm": 1.3245707750320435, "learning_rate": 1.7150406504065042e-05, "loss": 0.0958, "step": 35050 }, { "epoch": 0.4275, "grad_norm": 0.5411216020584106, "learning_rate": 1.7150000000000004e-05, "loss": 0.0777, "step": 35055 }, { "epoch": 0.4275609756097561, "grad_norm": 0.7531687021255493, "learning_rate": 1.7149593495934958e-05, "loss": 0.0741, "step": 35060 }, { "epoch": 0.4276219512195122, "grad_norm": 0.471295028924942, "learning_rate": 1.714918699186992e-05, "loss": 0.0619, "step": 35065 }, { "epoch": 0.4276829268292683, "grad_norm": 6.331643104553223, "learning_rate": 1.7148780487804878e-05, "loss": 0.1106, "step": 35070 }, { "epoch": 0.4277439024390244, "grad_norm": 0.3466043174266815, "learning_rate": 1.714837398373984e-05, "loss": 0.0731, "step": 35075 }, { "epoch": 0.4278048780487805, "grad_norm": 0.8062158226966858, "learning_rate": 1.7147967479674798e-05, "loss": 0.0517, "step": 35080 }, { "epoch": 0.4278658536585366, "grad_norm": 0.42183470726013184, "learning_rate": 1.714756097560976e-05, "loss": 0.0607, "step": 35085 }, { "epoch": 0.4279268292682927, "grad_norm": 0.9668928980827332, "learning_rate": 1.7147154471544717e-05, "loss": 0.0643, "step": 35090 }, { "epoch": 0.42798780487804877, "grad_norm": 1.95649254322052, "learning_rate": 1.7146747967479675e-05, "loss": 0.0694, "step": 35095 }, { "epoch": 0.42804878048780487, "grad_norm": 0.8384796977043152, "learning_rate": 1.7146341463414637e-05, "loss": 0.0759, "step": 35100 }, { "epoch": 0.42810975609756097, "grad_norm": 0.5470051765441895, "learning_rate": 1.7145934959349595e-05, "loss": 0.0633, "step": 35105 }, { "epoch": 0.42817073170731706, "grad_norm": 0.6796663403511047, "learning_rate": 1.7145528455284553e-05, "loss": 0.0862, "step": 35110 }, { "epoch": 0.42823170731707316, "grad_norm": 1.485403060913086, "learning_rate": 1.7145121951219515e-05, "loss": 0.0607, "step": 35115 }, { "epoch": 0.42829268292682926, "grad_norm": 0.4785074293613434, "learning_rate": 1.7144715447154473e-05, "loss": 0.0768, "step": 35120 }, { "epoch": 0.42835365853658536, "grad_norm": 0.5343958735466003, "learning_rate": 1.714430894308943e-05, "loss": 0.1373, "step": 35125 }, { "epoch": 0.42841463414634146, "grad_norm": 0.7592976093292236, "learning_rate": 1.7143902439024392e-05, "loss": 0.0674, "step": 35130 }, { "epoch": 0.42847560975609755, "grad_norm": 0.5611681342124939, "learning_rate": 1.714349593495935e-05, "loss": 0.0507, "step": 35135 }, { "epoch": 0.42853658536585365, "grad_norm": 0.5960752964019775, "learning_rate": 1.7143089430894312e-05, "loss": 0.0588, "step": 35140 }, { "epoch": 0.42859756097560975, "grad_norm": 0.5944955348968506, "learning_rate": 1.714268292682927e-05, "loss": 0.0762, "step": 35145 }, { "epoch": 0.42865853658536585, "grad_norm": 1.0510603189468384, "learning_rate": 1.7142276422764228e-05, "loss": 0.0735, "step": 35150 }, { "epoch": 0.42871951219512194, "grad_norm": 0.7597401142120361, "learning_rate": 1.7141869918699186e-05, "loss": 0.099, "step": 35155 }, { "epoch": 0.42878048780487804, "grad_norm": 0.517327606678009, "learning_rate": 1.7141463414634148e-05, "loss": 0.1002, "step": 35160 }, { "epoch": 0.42884146341463414, "grad_norm": 0.6704753637313843, "learning_rate": 1.7141056910569106e-05, "loss": 0.0577, "step": 35165 }, { "epoch": 0.42890243902439024, "grad_norm": 0.8724973201751709, "learning_rate": 1.7140650406504068e-05, "loss": 0.0675, "step": 35170 }, { "epoch": 0.42896341463414633, "grad_norm": 0.7275646924972534, "learning_rate": 1.7140243902439026e-05, "loss": 0.0801, "step": 35175 }, { "epoch": 0.42902439024390243, "grad_norm": 0.43977421522140503, "learning_rate": 1.7139837398373984e-05, "loss": 0.0568, "step": 35180 }, { "epoch": 0.42908536585365853, "grad_norm": 2.507962703704834, "learning_rate": 1.7139430894308945e-05, "loss": 0.0811, "step": 35185 }, { "epoch": 0.4291463414634146, "grad_norm": 0.36435869336128235, "learning_rate": 1.7139024390243903e-05, "loss": 0.0647, "step": 35190 }, { "epoch": 0.4292073170731707, "grad_norm": 1.295720100402832, "learning_rate": 1.713861788617886e-05, "loss": 0.1096, "step": 35195 }, { "epoch": 0.4292682926829268, "grad_norm": 1.0914794206619263, "learning_rate": 1.7138211382113823e-05, "loss": 0.0809, "step": 35200 }, { "epoch": 0.4293292682926829, "grad_norm": 0.8271869421005249, "learning_rate": 1.713780487804878e-05, "loss": 0.0657, "step": 35205 }, { "epoch": 0.429390243902439, "grad_norm": 0.3986794352531433, "learning_rate": 1.713739837398374e-05, "loss": 0.0758, "step": 35210 }, { "epoch": 0.4294512195121951, "grad_norm": 0.6871868371963501, "learning_rate": 1.71369918699187e-05, "loss": 0.1067, "step": 35215 }, { "epoch": 0.4295121951219512, "grad_norm": 0.7512972950935364, "learning_rate": 1.713658536585366e-05, "loss": 0.0637, "step": 35220 }, { "epoch": 0.4295731707317073, "grad_norm": 0.8363794684410095, "learning_rate": 1.713617886178862e-05, "loss": 0.0901, "step": 35225 }, { "epoch": 0.4296341463414634, "grad_norm": 1.0017555952072144, "learning_rate": 1.713577235772358e-05, "loss": 0.0639, "step": 35230 }, { "epoch": 0.4296951219512195, "grad_norm": 0.9543589353561401, "learning_rate": 1.713536585365854e-05, "loss": 0.0571, "step": 35235 }, { "epoch": 0.4297560975609756, "grad_norm": 0.7551397681236267, "learning_rate": 1.7134959349593495e-05, "loss": 0.0732, "step": 35240 }, { "epoch": 0.4298170731707317, "grad_norm": 0.6685467958450317, "learning_rate": 1.7134552845528456e-05, "loss": 0.0578, "step": 35245 }, { "epoch": 0.4298780487804878, "grad_norm": 0.4476267397403717, "learning_rate": 1.7134146341463415e-05, "loss": 0.0525, "step": 35250 }, { "epoch": 0.4299390243902439, "grad_norm": 0.49193283915519714, "learning_rate": 1.7133739837398376e-05, "loss": 0.0582, "step": 35255 }, { "epoch": 0.43, "grad_norm": 0.7630869746208191, "learning_rate": 1.7133333333333334e-05, "loss": 0.1028, "step": 35260 }, { "epoch": 0.4300609756097561, "grad_norm": 0.6771966218948364, "learning_rate": 1.7132926829268296e-05, "loss": 0.0706, "step": 35265 }, { "epoch": 0.4301219512195122, "grad_norm": 0.7131479382514954, "learning_rate": 1.7132520325203254e-05, "loss": 0.0767, "step": 35270 }, { "epoch": 0.4301829268292683, "grad_norm": 0.5224115252494812, "learning_rate": 1.7132113821138212e-05, "loss": 0.0907, "step": 35275 }, { "epoch": 0.4302439024390244, "grad_norm": 0.6657238006591797, "learning_rate": 1.7131707317073173e-05, "loss": 0.0519, "step": 35280 }, { "epoch": 0.4303048780487805, "grad_norm": 0.4915866255760193, "learning_rate": 1.713130081300813e-05, "loss": 0.1553, "step": 35285 }, { "epoch": 0.4303658536585366, "grad_norm": 0.8377329111099243, "learning_rate": 1.713089430894309e-05, "loss": 0.0523, "step": 35290 }, { "epoch": 0.4304268292682927, "grad_norm": 0.5552785992622375, "learning_rate": 1.713048780487805e-05, "loss": 0.0621, "step": 35295 }, { "epoch": 0.4304878048780488, "grad_norm": 0.43775391578674316, "learning_rate": 1.713008130081301e-05, "loss": 0.0573, "step": 35300 }, { "epoch": 0.43054878048780487, "grad_norm": 0.5950252413749695, "learning_rate": 1.7129674796747967e-05, "loss": 0.0583, "step": 35305 }, { "epoch": 0.43060975609756097, "grad_norm": 0.6641182899475098, "learning_rate": 1.712926829268293e-05, "loss": 0.0889, "step": 35310 }, { "epoch": 0.43067073170731707, "grad_norm": 1.0105183124542236, "learning_rate": 1.7128861788617887e-05, "loss": 0.0766, "step": 35315 }, { "epoch": 0.43073170731707316, "grad_norm": 1.0650092363357544, "learning_rate": 1.712845528455285e-05, "loss": 0.1434, "step": 35320 }, { "epoch": 0.43079268292682926, "grad_norm": 0.7543355226516724, "learning_rate": 1.7128048780487807e-05, "loss": 0.053, "step": 35325 }, { "epoch": 0.43085365853658536, "grad_norm": 0.6427677273750305, "learning_rate": 1.7127642276422765e-05, "loss": 0.0639, "step": 35330 }, { "epoch": 0.43091463414634146, "grad_norm": 0.67454594373703, "learning_rate": 1.7127235772357723e-05, "loss": 0.08, "step": 35335 }, { "epoch": 0.43097560975609756, "grad_norm": 0.545463502407074, "learning_rate": 1.7126829268292685e-05, "loss": 0.0952, "step": 35340 }, { "epoch": 0.43103658536585365, "grad_norm": 0.5913627743721008, "learning_rate": 1.7126422764227643e-05, "loss": 0.0922, "step": 35345 }, { "epoch": 0.43109756097560975, "grad_norm": 3.4300384521484375, "learning_rate": 1.7126016260162604e-05, "loss": 0.0428, "step": 35350 }, { "epoch": 0.43115853658536585, "grad_norm": 0.8147683143615723, "learning_rate": 1.7125609756097562e-05, "loss": 0.0688, "step": 35355 }, { "epoch": 0.43121951219512195, "grad_norm": 0.46012943983078003, "learning_rate": 1.712520325203252e-05, "loss": 0.0651, "step": 35360 }, { "epoch": 0.43128048780487804, "grad_norm": 0.4623728096485138, "learning_rate": 1.7124796747967482e-05, "loss": 0.064, "step": 35365 }, { "epoch": 0.43134146341463414, "grad_norm": 0.7948299646377563, "learning_rate": 1.712439024390244e-05, "loss": 0.0769, "step": 35370 }, { "epoch": 0.43140243902439024, "grad_norm": 0.8337740898132324, "learning_rate": 1.7123983739837398e-05, "loss": 0.0791, "step": 35375 }, { "epoch": 0.43146341463414634, "grad_norm": 0.9620833396911621, "learning_rate": 1.712357723577236e-05, "loss": 0.0851, "step": 35380 }, { "epoch": 0.43152439024390243, "grad_norm": 0.737662136554718, "learning_rate": 1.7123170731707318e-05, "loss": 0.0826, "step": 35385 }, { "epoch": 0.43158536585365853, "grad_norm": 0.7485867738723755, "learning_rate": 1.7122764227642276e-05, "loss": 0.0628, "step": 35390 }, { "epoch": 0.43164634146341463, "grad_norm": 1.2489978075027466, "learning_rate": 1.7122357723577238e-05, "loss": 0.0592, "step": 35395 }, { "epoch": 0.4317073170731707, "grad_norm": 1.528437852859497, "learning_rate": 1.7121951219512196e-05, "loss": 0.0492, "step": 35400 }, { "epoch": 0.4317682926829268, "grad_norm": 0.765636682510376, "learning_rate": 1.7121544715447157e-05, "loss": 0.1029, "step": 35405 }, { "epoch": 0.4318292682926829, "grad_norm": 0.4840254485607147, "learning_rate": 1.7121138211382115e-05, "loss": 0.0744, "step": 35410 }, { "epoch": 0.431890243902439, "grad_norm": 0.3993075489997864, "learning_rate": 1.7120731707317077e-05, "loss": 0.0701, "step": 35415 }, { "epoch": 0.4319512195121951, "grad_norm": 0.41529354453086853, "learning_rate": 1.712032520325203e-05, "loss": 0.0712, "step": 35420 }, { "epoch": 0.4320121951219512, "grad_norm": 0.6350646615028381, "learning_rate": 1.7119918699186993e-05, "loss": 0.0641, "step": 35425 }, { "epoch": 0.4320731707317073, "grad_norm": 1.5172673463821411, "learning_rate": 1.711951219512195e-05, "loss": 0.0592, "step": 35430 }, { "epoch": 0.4321341463414634, "grad_norm": 0.6132611036300659, "learning_rate": 1.7119105691056913e-05, "loss": 0.0691, "step": 35435 }, { "epoch": 0.4321951219512195, "grad_norm": 0.48784908652305603, "learning_rate": 1.711869918699187e-05, "loss": 0.076, "step": 35440 }, { "epoch": 0.4322560975609756, "grad_norm": 1.225315809249878, "learning_rate": 1.7118292682926832e-05, "loss": 0.0624, "step": 35445 }, { "epoch": 0.4323170731707317, "grad_norm": 0.8036149740219116, "learning_rate": 1.711788617886179e-05, "loss": 0.0797, "step": 35450 }, { "epoch": 0.4323780487804878, "grad_norm": 0.6478409171104431, "learning_rate": 1.711747967479675e-05, "loss": 0.1016, "step": 35455 }, { "epoch": 0.4324390243902439, "grad_norm": 0.5745152831077576, "learning_rate": 1.7117073170731707e-05, "loss": 0.0946, "step": 35460 }, { "epoch": 0.4325, "grad_norm": 0.6802618503570557, "learning_rate": 1.7116666666666668e-05, "loss": 0.0573, "step": 35465 }, { "epoch": 0.4325609756097561, "grad_norm": 1.0629007816314697, "learning_rate": 1.7116260162601626e-05, "loss": 0.0747, "step": 35470 }, { "epoch": 0.4326219512195122, "grad_norm": 2.0053834915161133, "learning_rate": 1.7115853658536588e-05, "loss": 0.1152, "step": 35475 }, { "epoch": 0.4326829268292683, "grad_norm": 0.812238872051239, "learning_rate": 1.7115447154471546e-05, "loss": 0.087, "step": 35480 }, { "epoch": 0.4327439024390244, "grad_norm": 0.6700170636177063, "learning_rate": 1.7115040650406504e-05, "loss": 0.0556, "step": 35485 }, { "epoch": 0.4328048780487805, "grad_norm": 0.3772892653942108, "learning_rate": 1.7114634146341466e-05, "loss": 0.0862, "step": 35490 }, { "epoch": 0.4328658536585366, "grad_norm": 1.5687506198883057, "learning_rate": 1.7114227642276424e-05, "loss": 0.0662, "step": 35495 }, { "epoch": 0.4329268292682927, "grad_norm": 0.5809995532035828, "learning_rate": 1.7113821138211385e-05, "loss": 0.0709, "step": 35500 }, { "epoch": 0.4329878048780488, "grad_norm": 0.4913104176521301, "learning_rate": 1.7113414634146343e-05, "loss": 0.083, "step": 35505 }, { "epoch": 0.4330487804878049, "grad_norm": 0.587648868560791, "learning_rate": 1.71130081300813e-05, "loss": 0.0586, "step": 35510 }, { "epoch": 0.43310975609756097, "grad_norm": 0.6123248338699341, "learning_rate": 1.711260162601626e-05, "loss": 0.091, "step": 35515 }, { "epoch": 0.43317073170731707, "grad_norm": 0.7109094262123108, "learning_rate": 1.711219512195122e-05, "loss": 0.0921, "step": 35520 }, { "epoch": 0.43323170731707317, "grad_norm": 0.21392391622066498, "learning_rate": 1.711178861788618e-05, "loss": 0.059, "step": 35525 }, { "epoch": 0.43329268292682926, "grad_norm": 0.7614787220954895, "learning_rate": 1.711138211382114e-05, "loss": 0.0855, "step": 35530 }, { "epoch": 0.43335365853658536, "grad_norm": 0.503362238407135, "learning_rate": 1.71109756097561e-05, "loss": 0.0726, "step": 35535 }, { "epoch": 0.43341463414634146, "grad_norm": 0.7296110987663269, "learning_rate": 1.7110569105691057e-05, "loss": 0.0865, "step": 35540 }, { "epoch": 0.43347560975609756, "grad_norm": 0.7877447009086609, "learning_rate": 1.711016260162602e-05, "loss": 0.0674, "step": 35545 }, { "epoch": 0.43353658536585366, "grad_norm": 0.6961396932601929, "learning_rate": 1.7109756097560977e-05, "loss": 0.0695, "step": 35550 }, { "epoch": 0.43359756097560975, "grad_norm": 0.8488333821296692, "learning_rate": 1.7109349593495935e-05, "loss": 0.0769, "step": 35555 }, { "epoch": 0.43365853658536585, "grad_norm": 0.7671354413032532, "learning_rate": 1.7108943089430896e-05, "loss": 0.0957, "step": 35560 }, { "epoch": 0.43371951219512195, "grad_norm": 0.35725948214530945, "learning_rate": 1.7108536585365855e-05, "loss": 0.0578, "step": 35565 }, { "epoch": 0.43378048780487805, "grad_norm": 0.750481128692627, "learning_rate": 1.7108130081300813e-05, "loss": 0.0678, "step": 35570 }, { "epoch": 0.43384146341463414, "grad_norm": 0.46806636452674866, "learning_rate": 1.7107723577235774e-05, "loss": 0.0786, "step": 35575 }, { "epoch": 0.43390243902439024, "grad_norm": 1.2955560684204102, "learning_rate": 1.7107317073170732e-05, "loss": 0.1112, "step": 35580 }, { "epoch": 0.43396341463414634, "grad_norm": 1.0564332008361816, "learning_rate": 1.7106910569105694e-05, "loss": 0.0968, "step": 35585 }, { "epoch": 0.43402439024390244, "grad_norm": 0.6726091504096985, "learning_rate": 1.7106504065040652e-05, "loss": 0.098, "step": 35590 }, { "epoch": 0.43408536585365853, "grad_norm": 0.40883854031562805, "learning_rate": 1.7106097560975613e-05, "loss": 0.0582, "step": 35595 }, { "epoch": 0.43414634146341463, "grad_norm": 0.9158802628517151, "learning_rate": 1.7105691056910568e-05, "loss": 0.0479, "step": 35600 }, { "epoch": 0.43420731707317073, "grad_norm": 0.3121374547481537, "learning_rate": 1.710528455284553e-05, "loss": 0.0817, "step": 35605 }, { "epoch": 0.4342682926829268, "grad_norm": 4.732793807983398, "learning_rate": 1.7104878048780488e-05, "loss": 0.0803, "step": 35610 }, { "epoch": 0.4343292682926829, "grad_norm": 1.1728121042251587, "learning_rate": 1.710447154471545e-05, "loss": 0.0996, "step": 35615 }, { "epoch": 0.434390243902439, "grad_norm": 0.8576055765151978, "learning_rate": 1.7104065040650407e-05, "loss": 0.0734, "step": 35620 }, { "epoch": 0.4344512195121951, "grad_norm": 0.5072518587112427, "learning_rate": 1.710365853658537e-05, "loss": 0.0384, "step": 35625 }, { "epoch": 0.4345121951219512, "grad_norm": 0.3518926799297333, "learning_rate": 1.7103252032520327e-05, "loss": 0.0553, "step": 35630 }, { "epoch": 0.4345731707317073, "grad_norm": 0.7997722029685974, "learning_rate": 1.7102845528455285e-05, "loss": 0.1083, "step": 35635 }, { "epoch": 0.4346341463414634, "grad_norm": 0.6284251809120178, "learning_rate": 1.7102439024390243e-05, "loss": 0.054, "step": 35640 }, { "epoch": 0.4346951219512195, "grad_norm": 0.8145087361335754, "learning_rate": 1.7102032520325205e-05, "loss": 0.0647, "step": 35645 }, { "epoch": 0.4347560975609756, "grad_norm": 0.7654451727867126, "learning_rate": 1.7101626016260163e-05, "loss": 0.0624, "step": 35650 }, { "epoch": 0.4348170731707317, "grad_norm": 0.641532301902771, "learning_rate": 1.7101219512195125e-05, "loss": 0.0759, "step": 35655 }, { "epoch": 0.4348780487804878, "grad_norm": 0.743370771408081, "learning_rate": 1.7100813008130083e-05, "loss": 0.0869, "step": 35660 }, { "epoch": 0.4349390243902439, "grad_norm": 0.6852868795394897, "learning_rate": 1.710040650406504e-05, "loss": 0.0672, "step": 35665 }, { "epoch": 0.435, "grad_norm": 3.1281793117523193, "learning_rate": 1.7100000000000002e-05, "loss": 0.1196, "step": 35670 }, { "epoch": 0.4350609756097561, "grad_norm": 0.6112357974052429, "learning_rate": 1.709959349593496e-05, "loss": 0.0496, "step": 35675 }, { "epoch": 0.4351219512195122, "grad_norm": 0.6697712540626526, "learning_rate": 1.7099186991869922e-05, "loss": 0.0562, "step": 35680 }, { "epoch": 0.4351829268292683, "grad_norm": 0.9863848686218262, "learning_rate": 1.709878048780488e-05, "loss": 0.1002, "step": 35685 }, { "epoch": 0.4352439024390244, "grad_norm": 0.9794920086860657, "learning_rate": 1.7098373983739838e-05, "loss": 0.0703, "step": 35690 }, { "epoch": 0.4353048780487805, "grad_norm": 0.8122637271881104, "learning_rate": 1.7097967479674796e-05, "loss": 0.0765, "step": 35695 }, { "epoch": 0.4353658536585366, "grad_norm": 1.664888620376587, "learning_rate": 1.7097560975609758e-05, "loss": 0.1142, "step": 35700 }, { "epoch": 0.4354268292682927, "grad_norm": 0.5574924945831299, "learning_rate": 1.7097154471544716e-05, "loss": 0.07, "step": 35705 }, { "epoch": 0.4354878048780488, "grad_norm": 0.3320721983909607, "learning_rate": 1.7096747967479677e-05, "loss": 0.0674, "step": 35710 }, { "epoch": 0.4355487804878049, "grad_norm": 0.7078917026519775, "learning_rate": 1.7096341463414636e-05, "loss": 0.0989, "step": 35715 }, { "epoch": 0.435609756097561, "grad_norm": 0.8180993795394897, "learning_rate": 1.7095934959349594e-05, "loss": 0.0485, "step": 35720 }, { "epoch": 0.43567073170731707, "grad_norm": 0.6974643468856812, "learning_rate": 1.7095528455284552e-05, "loss": 0.0441, "step": 35725 }, { "epoch": 0.43573170731707317, "grad_norm": 0.8260399103164673, "learning_rate": 1.7095121951219513e-05, "loss": 0.0869, "step": 35730 }, { "epoch": 0.43579268292682927, "grad_norm": 0.6486302018165588, "learning_rate": 1.709471544715447e-05, "loss": 0.0746, "step": 35735 }, { "epoch": 0.43585365853658536, "grad_norm": 0.6674265265464783, "learning_rate": 1.7094308943089433e-05, "loss": 0.0945, "step": 35740 }, { "epoch": 0.43591463414634146, "grad_norm": 0.4557279646396637, "learning_rate": 1.709390243902439e-05, "loss": 0.0581, "step": 35745 }, { "epoch": 0.43597560975609756, "grad_norm": 0.5449943542480469, "learning_rate": 1.709349593495935e-05, "loss": 0.0831, "step": 35750 }, { "epoch": 0.43603658536585366, "grad_norm": 0.47766777873039246, "learning_rate": 1.709308943089431e-05, "loss": 0.0733, "step": 35755 }, { "epoch": 0.43609756097560975, "grad_norm": 0.9207019805908203, "learning_rate": 1.709268292682927e-05, "loss": 0.086, "step": 35760 }, { "epoch": 0.43615853658536585, "grad_norm": 1.7407969236373901, "learning_rate": 1.709227642276423e-05, "loss": 0.0853, "step": 35765 }, { "epoch": 0.43621951219512195, "grad_norm": 0.47521084547042847, "learning_rate": 1.709186991869919e-05, "loss": 0.0909, "step": 35770 }, { "epoch": 0.43628048780487805, "grad_norm": 0.7804234623908997, "learning_rate": 1.709146341463415e-05, "loss": 0.0713, "step": 35775 }, { "epoch": 0.43634146341463415, "grad_norm": 1.2680953741073608, "learning_rate": 1.7091056910569105e-05, "loss": 0.1, "step": 35780 }, { "epoch": 0.43640243902439024, "grad_norm": 0.3298013508319855, "learning_rate": 1.7090650406504066e-05, "loss": 0.0947, "step": 35785 }, { "epoch": 0.43646341463414634, "grad_norm": 0.543258786201477, "learning_rate": 1.7090243902439024e-05, "loss": 0.0757, "step": 35790 }, { "epoch": 0.43652439024390244, "grad_norm": 0.7061731815338135, "learning_rate": 1.7089837398373986e-05, "loss": 0.0705, "step": 35795 }, { "epoch": 0.43658536585365854, "grad_norm": 1.3905247449874878, "learning_rate": 1.7089430894308944e-05, "loss": 0.0847, "step": 35800 }, { "epoch": 0.43664634146341463, "grad_norm": 0.5250621438026428, "learning_rate": 1.7089024390243906e-05, "loss": 0.0767, "step": 35805 }, { "epoch": 0.43670731707317073, "grad_norm": 0.7486526966094971, "learning_rate": 1.7088617886178864e-05, "loss": 0.0546, "step": 35810 }, { "epoch": 0.43676829268292683, "grad_norm": 1.3489307165145874, "learning_rate": 1.7088211382113822e-05, "loss": 0.083, "step": 35815 }, { "epoch": 0.4368292682926829, "grad_norm": 0.6994431614875793, "learning_rate": 1.708780487804878e-05, "loss": 0.0586, "step": 35820 }, { "epoch": 0.436890243902439, "grad_norm": 0.8810197710990906, "learning_rate": 1.708739837398374e-05, "loss": 0.0684, "step": 35825 }, { "epoch": 0.4369512195121951, "grad_norm": 0.520323634147644, "learning_rate": 1.70869918699187e-05, "loss": 0.0711, "step": 35830 }, { "epoch": 0.4370121951219512, "grad_norm": 0.46364328265190125, "learning_rate": 1.708658536585366e-05, "loss": 0.0676, "step": 35835 }, { "epoch": 0.4370731707317073, "grad_norm": 0.6066880226135254, "learning_rate": 1.708617886178862e-05, "loss": 0.12, "step": 35840 }, { "epoch": 0.4371341463414634, "grad_norm": 0.3277915418148041, "learning_rate": 1.7085772357723577e-05, "loss": 0.0748, "step": 35845 }, { "epoch": 0.4371951219512195, "grad_norm": 1.8166368007659912, "learning_rate": 1.708536585365854e-05, "loss": 0.0512, "step": 35850 }, { "epoch": 0.4372560975609756, "grad_norm": 0.6232921481132507, "learning_rate": 1.7084959349593497e-05, "loss": 0.0875, "step": 35855 }, { "epoch": 0.4373170731707317, "grad_norm": 0.42910128831863403, "learning_rate": 1.708455284552846e-05, "loss": 0.0535, "step": 35860 }, { "epoch": 0.4373780487804878, "grad_norm": 0.7480011582374573, "learning_rate": 1.7084146341463417e-05, "loss": 0.0777, "step": 35865 }, { "epoch": 0.4374390243902439, "grad_norm": 0.4739478826522827, "learning_rate": 1.7083739837398375e-05, "loss": 0.0561, "step": 35870 }, { "epoch": 0.4375, "grad_norm": 0.9476837515830994, "learning_rate": 1.7083333333333333e-05, "loss": 0.0836, "step": 35875 }, { "epoch": 0.4375609756097561, "grad_norm": 0.3851914703845978, "learning_rate": 1.7082926829268294e-05, "loss": 0.0793, "step": 35880 }, { "epoch": 0.4376219512195122, "grad_norm": 0.68084317445755, "learning_rate": 1.7082520325203253e-05, "loss": 0.0556, "step": 35885 }, { "epoch": 0.4376829268292683, "grad_norm": 0.5039776563644409, "learning_rate": 1.7082113821138214e-05, "loss": 0.0438, "step": 35890 }, { "epoch": 0.4377439024390244, "grad_norm": 0.6330218315124512, "learning_rate": 1.7081707317073172e-05, "loss": 0.0736, "step": 35895 }, { "epoch": 0.4378048780487805, "grad_norm": 0.5600091218948364, "learning_rate": 1.7081300813008134e-05, "loss": 0.1032, "step": 35900 }, { "epoch": 0.4378658536585366, "grad_norm": 0.8565411567687988, "learning_rate": 1.708089430894309e-05, "loss": 0.0674, "step": 35905 }, { "epoch": 0.4379268292682927, "grad_norm": 0.5305419564247131, "learning_rate": 1.708048780487805e-05, "loss": 0.0807, "step": 35910 }, { "epoch": 0.4379878048780488, "grad_norm": 1.1354819536209106, "learning_rate": 1.7080081300813008e-05, "loss": 0.077, "step": 35915 }, { "epoch": 0.4380487804878049, "grad_norm": 1.6316399574279785, "learning_rate": 1.707967479674797e-05, "loss": 0.0867, "step": 35920 }, { "epoch": 0.438109756097561, "grad_norm": 0.7263688445091248, "learning_rate": 1.7079268292682928e-05, "loss": 0.0765, "step": 35925 }, { "epoch": 0.4381707317073171, "grad_norm": 0.541940450668335, "learning_rate": 1.707886178861789e-05, "loss": 0.0495, "step": 35930 }, { "epoch": 0.43823170731707317, "grad_norm": 0.7377004027366638, "learning_rate": 1.7078455284552847e-05, "loss": 0.0737, "step": 35935 }, { "epoch": 0.43829268292682927, "grad_norm": 0.6378033757209778, "learning_rate": 1.7078048780487806e-05, "loss": 0.0786, "step": 35940 }, { "epoch": 0.43835365853658537, "grad_norm": 0.6222402453422546, "learning_rate": 1.7077642276422767e-05, "loss": 0.0715, "step": 35945 }, { "epoch": 0.43841463414634146, "grad_norm": 0.6615198254585266, "learning_rate": 1.7077235772357725e-05, "loss": 0.0675, "step": 35950 }, { "epoch": 0.43847560975609756, "grad_norm": 0.5116525292396545, "learning_rate": 1.7076829268292687e-05, "loss": 0.0519, "step": 35955 }, { "epoch": 0.43853658536585366, "grad_norm": 0.9769070148468018, "learning_rate": 1.7076422764227645e-05, "loss": 0.0565, "step": 35960 }, { "epoch": 0.43859756097560976, "grad_norm": 1.25221586227417, "learning_rate": 1.7076016260162603e-05, "loss": 0.0688, "step": 35965 }, { "epoch": 0.43865853658536585, "grad_norm": 0.9248893857002258, "learning_rate": 1.707560975609756e-05, "loss": 0.0621, "step": 35970 }, { "epoch": 0.43871951219512195, "grad_norm": 0.8338456153869629, "learning_rate": 1.7075203252032523e-05, "loss": 0.0817, "step": 35975 }, { "epoch": 0.43878048780487805, "grad_norm": 0.6007012128829956, "learning_rate": 1.707479674796748e-05, "loss": 0.0765, "step": 35980 }, { "epoch": 0.43884146341463415, "grad_norm": 0.8937963843345642, "learning_rate": 1.7074390243902442e-05, "loss": 0.1159, "step": 35985 }, { "epoch": 0.43890243902439025, "grad_norm": 0.5198522210121155, "learning_rate": 1.70739837398374e-05, "loss": 0.0685, "step": 35990 }, { "epoch": 0.43896341463414634, "grad_norm": 0.7094073295593262, "learning_rate": 1.707357723577236e-05, "loss": 0.0606, "step": 35995 }, { "epoch": 0.43902439024390244, "grad_norm": 0.7140373587608337, "learning_rate": 1.7073170731707317e-05, "loss": 0.0714, "step": 36000 }, { "epoch": 0.43908536585365854, "grad_norm": 1.2565659284591675, "learning_rate": 1.7072764227642278e-05, "loss": 0.0743, "step": 36005 }, { "epoch": 0.43914634146341464, "grad_norm": 0.4030868113040924, "learning_rate": 1.7072357723577236e-05, "loss": 0.0636, "step": 36010 }, { "epoch": 0.43920731707317073, "grad_norm": 0.6046048998832703, "learning_rate": 1.7071951219512198e-05, "loss": 0.073, "step": 36015 }, { "epoch": 0.43926829268292683, "grad_norm": 1.100398302078247, "learning_rate": 1.7071544715447156e-05, "loss": 0.0769, "step": 36020 }, { "epoch": 0.43932926829268293, "grad_norm": 2.5668349266052246, "learning_rate": 1.7071138211382114e-05, "loss": 0.0567, "step": 36025 }, { "epoch": 0.439390243902439, "grad_norm": 0.7276167273521423, "learning_rate": 1.7070731707317076e-05, "loss": 0.0645, "step": 36030 }, { "epoch": 0.4394512195121951, "grad_norm": 0.42098015546798706, "learning_rate": 1.7070325203252034e-05, "loss": 0.0956, "step": 36035 }, { "epoch": 0.4395121951219512, "grad_norm": 0.541138768196106, "learning_rate": 1.7069918699186995e-05, "loss": 0.0734, "step": 36040 }, { "epoch": 0.4395731707317073, "grad_norm": 0.8523568511009216, "learning_rate": 1.7069512195121953e-05, "loss": 0.0857, "step": 36045 }, { "epoch": 0.4396341463414634, "grad_norm": 0.2746676802635193, "learning_rate": 1.706910569105691e-05, "loss": 0.0648, "step": 36050 }, { "epoch": 0.4396951219512195, "grad_norm": 1.6565359830856323, "learning_rate": 1.706869918699187e-05, "loss": 0.0732, "step": 36055 }, { "epoch": 0.4397560975609756, "grad_norm": 0.4599347412586212, "learning_rate": 1.706829268292683e-05, "loss": 0.0735, "step": 36060 }, { "epoch": 0.4398170731707317, "grad_norm": 0.7276666760444641, "learning_rate": 1.706788617886179e-05, "loss": 0.0562, "step": 36065 }, { "epoch": 0.4398780487804878, "grad_norm": 0.900056779384613, "learning_rate": 1.706747967479675e-05, "loss": 0.0878, "step": 36070 }, { "epoch": 0.4399390243902439, "grad_norm": 0.5552443861961365, "learning_rate": 1.706707317073171e-05, "loss": 0.0772, "step": 36075 }, { "epoch": 0.44, "grad_norm": 0.5770247578620911, "learning_rate": 1.706666666666667e-05, "loss": 0.0715, "step": 36080 }, { "epoch": 0.4400609756097561, "grad_norm": 1.0142236948013306, "learning_rate": 1.7066260162601625e-05, "loss": 0.052, "step": 36085 }, { "epoch": 0.4401219512195122, "grad_norm": 0.6920826435089111, "learning_rate": 1.7065853658536587e-05, "loss": 0.0541, "step": 36090 }, { "epoch": 0.4401829268292683, "grad_norm": 0.6679529547691345, "learning_rate": 1.7065447154471545e-05, "loss": 0.0807, "step": 36095 }, { "epoch": 0.4402439024390244, "grad_norm": 0.6953527331352234, "learning_rate": 1.7065040650406506e-05, "loss": 0.0794, "step": 36100 }, { "epoch": 0.4403048780487805, "grad_norm": 0.6658872365951538, "learning_rate": 1.7064634146341464e-05, "loss": 0.0661, "step": 36105 }, { "epoch": 0.4403658536585366, "grad_norm": 0.5548734068870544, "learning_rate": 1.7064227642276426e-05, "loss": 0.0648, "step": 36110 }, { "epoch": 0.4404268292682927, "grad_norm": 0.9913046956062317, "learning_rate": 1.7063821138211384e-05, "loss": 0.093, "step": 36115 }, { "epoch": 0.4404878048780488, "grad_norm": 0.6098795533180237, "learning_rate": 1.7063414634146342e-05, "loss": 0.0806, "step": 36120 }, { "epoch": 0.4405487804878049, "grad_norm": 0.7472824454307556, "learning_rate": 1.7063008130081304e-05, "loss": 0.0845, "step": 36125 }, { "epoch": 0.440609756097561, "grad_norm": 0.5362735986709595, "learning_rate": 1.7062601626016262e-05, "loss": 0.0658, "step": 36130 }, { "epoch": 0.4406707317073171, "grad_norm": 0.6050108075141907, "learning_rate": 1.706219512195122e-05, "loss": 0.0843, "step": 36135 }, { "epoch": 0.4407317073170732, "grad_norm": 0.5838358998298645, "learning_rate": 1.706178861788618e-05, "loss": 0.0591, "step": 36140 }, { "epoch": 0.44079268292682927, "grad_norm": 0.3401547968387604, "learning_rate": 1.706138211382114e-05, "loss": 0.0515, "step": 36145 }, { "epoch": 0.44085365853658537, "grad_norm": 0.5003753304481506, "learning_rate": 1.7060975609756098e-05, "loss": 0.063, "step": 36150 }, { "epoch": 0.44091463414634147, "grad_norm": 0.36848077178001404, "learning_rate": 1.706056910569106e-05, "loss": 0.062, "step": 36155 }, { "epoch": 0.44097560975609756, "grad_norm": 0.5392122864723206, "learning_rate": 1.7060162601626017e-05, "loss": 0.0925, "step": 36160 }, { "epoch": 0.44103658536585366, "grad_norm": 0.4743572771549225, "learning_rate": 1.705975609756098e-05, "loss": 0.053, "step": 36165 }, { "epoch": 0.44109756097560976, "grad_norm": 0.49836644530296326, "learning_rate": 1.7059349593495937e-05, "loss": 0.0572, "step": 36170 }, { "epoch": 0.44115853658536586, "grad_norm": 0.8309173583984375, "learning_rate": 1.7058943089430895e-05, "loss": 0.0682, "step": 36175 }, { "epoch": 0.44121951219512195, "grad_norm": 1.0356234312057495, "learning_rate": 1.7058536585365853e-05, "loss": 0.0958, "step": 36180 }, { "epoch": 0.44128048780487805, "grad_norm": 0.48030754923820496, "learning_rate": 1.7058130081300815e-05, "loss": 0.0651, "step": 36185 }, { "epoch": 0.44134146341463415, "grad_norm": 0.4465831518173218, "learning_rate": 1.7057723577235773e-05, "loss": 0.0953, "step": 36190 }, { "epoch": 0.44140243902439025, "grad_norm": 0.5938810110092163, "learning_rate": 1.7057317073170734e-05, "loss": 0.0784, "step": 36195 }, { "epoch": 0.44146341463414634, "grad_norm": 0.4875081777572632, "learning_rate": 1.7056910569105693e-05, "loss": 0.0752, "step": 36200 }, { "epoch": 0.44152439024390244, "grad_norm": 0.29917895793914795, "learning_rate": 1.705650406504065e-05, "loss": 0.0705, "step": 36205 }, { "epoch": 0.44158536585365854, "grad_norm": 0.5457311868667603, "learning_rate": 1.7056097560975612e-05, "loss": 0.0613, "step": 36210 }, { "epoch": 0.44164634146341464, "grad_norm": 0.8500871658325195, "learning_rate": 1.705569105691057e-05, "loss": 0.0697, "step": 36215 }, { "epoch": 0.44170731707317074, "grad_norm": 0.9881105422973633, "learning_rate": 1.7055284552845532e-05, "loss": 0.0792, "step": 36220 }, { "epoch": 0.44176829268292683, "grad_norm": 0.7029264569282532, "learning_rate": 1.705487804878049e-05, "loss": 0.1013, "step": 36225 }, { "epoch": 0.44182926829268293, "grad_norm": 0.3290034830570221, "learning_rate": 1.7054471544715448e-05, "loss": 0.063, "step": 36230 }, { "epoch": 0.44189024390243903, "grad_norm": 0.9238442778587341, "learning_rate": 1.7054065040650406e-05, "loss": 0.0731, "step": 36235 }, { "epoch": 0.4419512195121951, "grad_norm": 0.497609943151474, "learning_rate": 1.7053658536585368e-05, "loss": 0.0543, "step": 36240 }, { "epoch": 0.4420121951219512, "grad_norm": 0.5060192346572876, "learning_rate": 1.7053252032520326e-05, "loss": 0.0872, "step": 36245 }, { "epoch": 0.4420731707317073, "grad_norm": 0.41237902641296387, "learning_rate": 1.7052845528455287e-05, "loss": 0.0542, "step": 36250 }, { "epoch": 0.4421341463414634, "grad_norm": 0.667043149471283, "learning_rate": 1.7052439024390246e-05, "loss": 0.0752, "step": 36255 }, { "epoch": 0.4421951219512195, "grad_norm": 0.4933285415172577, "learning_rate": 1.7052032520325207e-05, "loss": 0.0607, "step": 36260 }, { "epoch": 0.4422560975609756, "grad_norm": 1.1327104568481445, "learning_rate": 1.7051626016260162e-05, "loss": 0.1147, "step": 36265 }, { "epoch": 0.4423170731707317, "grad_norm": 1.2892839908599854, "learning_rate": 1.7051219512195123e-05, "loss": 0.0569, "step": 36270 }, { "epoch": 0.4423780487804878, "grad_norm": 0.47883790731430054, "learning_rate": 1.705081300813008e-05, "loss": 0.0773, "step": 36275 }, { "epoch": 0.4424390243902439, "grad_norm": 0.7256497740745544, "learning_rate": 1.7050406504065043e-05, "loss": 0.0636, "step": 36280 }, { "epoch": 0.4425, "grad_norm": 0.5771304965019226, "learning_rate": 1.705e-05, "loss": 0.0533, "step": 36285 }, { "epoch": 0.4425609756097561, "grad_norm": 0.7808871865272522, "learning_rate": 1.7049593495934963e-05, "loss": 0.0808, "step": 36290 }, { "epoch": 0.4426219512195122, "grad_norm": 3.153629779815674, "learning_rate": 1.704918699186992e-05, "loss": 0.0651, "step": 36295 }, { "epoch": 0.4426829268292683, "grad_norm": 0.5421701073646545, "learning_rate": 1.704878048780488e-05, "loss": 0.0545, "step": 36300 }, { "epoch": 0.4427439024390244, "grad_norm": 0.6886086463928223, "learning_rate": 1.704837398373984e-05, "loss": 0.103, "step": 36305 }, { "epoch": 0.4428048780487805, "grad_norm": 0.45371097326278687, "learning_rate": 1.70479674796748e-05, "loss": 0.0567, "step": 36310 }, { "epoch": 0.4428658536585366, "grad_norm": 0.5339520573616028, "learning_rate": 1.7047560975609757e-05, "loss": 0.0749, "step": 36315 }, { "epoch": 0.4429268292682927, "grad_norm": 1.0977823734283447, "learning_rate": 1.7047154471544718e-05, "loss": 0.053, "step": 36320 }, { "epoch": 0.4429878048780488, "grad_norm": 0.6196001768112183, "learning_rate": 1.7046747967479676e-05, "loss": 0.0509, "step": 36325 }, { "epoch": 0.4430487804878049, "grad_norm": 1.4967626333236694, "learning_rate": 1.7046341463414634e-05, "loss": 0.0584, "step": 36330 }, { "epoch": 0.443109756097561, "grad_norm": 0.6227107048034668, "learning_rate": 1.7045934959349596e-05, "loss": 0.0731, "step": 36335 }, { "epoch": 0.4431707317073171, "grad_norm": 0.6501548290252686, "learning_rate": 1.7045528455284554e-05, "loss": 0.0541, "step": 36340 }, { "epoch": 0.4432317073170732, "grad_norm": 0.38478532433509827, "learning_rate": 1.7045121951219516e-05, "loss": 0.0502, "step": 36345 }, { "epoch": 0.4432926829268293, "grad_norm": 0.820773720741272, "learning_rate": 1.7044715447154474e-05, "loss": 0.0661, "step": 36350 }, { "epoch": 0.44335365853658537, "grad_norm": 0.611589252948761, "learning_rate": 1.7044308943089432e-05, "loss": 0.0755, "step": 36355 }, { "epoch": 0.44341463414634147, "grad_norm": 1.2261830568313599, "learning_rate": 1.704390243902439e-05, "loss": 0.0846, "step": 36360 }, { "epoch": 0.44347560975609757, "grad_norm": 0.5800412893295288, "learning_rate": 1.704349593495935e-05, "loss": 0.0726, "step": 36365 }, { "epoch": 0.44353658536585366, "grad_norm": 0.6894996762275696, "learning_rate": 1.704308943089431e-05, "loss": 0.0547, "step": 36370 }, { "epoch": 0.44359756097560976, "grad_norm": 0.9302759170532227, "learning_rate": 1.704268292682927e-05, "loss": 0.0751, "step": 36375 }, { "epoch": 0.44365853658536586, "grad_norm": 1.3426846265792847, "learning_rate": 1.704227642276423e-05, "loss": 0.0547, "step": 36380 }, { "epoch": 0.44371951219512196, "grad_norm": 0.8101917505264282, "learning_rate": 1.7041869918699187e-05, "loss": 0.0663, "step": 36385 }, { "epoch": 0.44378048780487805, "grad_norm": 0.6383995413780212, "learning_rate": 1.704146341463415e-05, "loss": 0.1001, "step": 36390 }, { "epoch": 0.44384146341463415, "grad_norm": 0.5660257935523987, "learning_rate": 1.7041056910569107e-05, "loss": 0.0582, "step": 36395 }, { "epoch": 0.44390243902439025, "grad_norm": 0.6394444108009338, "learning_rate": 1.7040650406504065e-05, "loss": 0.0667, "step": 36400 }, { "epoch": 0.44396341463414635, "grad_norm": 0.5421498417854309, "learning_rate": 1.7040243902439027e-05, "loss": 0.0731, "step": 36405 }, { "epoch": 0.44402439024390244, "grad_norm": 0.4372345209121704, "learning_rate": 1.7039837398373985e-05, "loss": 0.05, "step": 36410 }, { "epoch": 0.44408536585365854, "grad_norm": 0.3141763508319855, "learning_rate": 1.7039430894308943e-05, "loss": 0.0584, "step": 36415 }, { "epoch": 0.44414634146341464, "grad_norm": 0.5669702291488647, "learning_rate": 1.7039024390243904e-05, "loss": 0.0902, "step": 36420 }, { "epoch": 0.44420731707317074, "grad_norm": 0.4683316946029663, "learning_rate": 1.7038617886178863e-05, "loss": 0.0768, "step": 36425 }, { "epoch": 0.44426829268292684, "grad_norm": 0.3936874568462372, "learning_rate": 1.7038211382113824e-05, "loss": 0.0579, "step": 36430 }, { "epoch": 0.44432926829268293, "grad_norm": 0.8783864378929138, "learning_rate": 1.7037804878048782e-05, "loss": 0.0579, "step": 36435 }, { "epoch": 0.44439024390243903, "grad_norm": 0.5702916979789734, "learning_rate": 1.7037398373983744e-05, "loss": 0.0703, "step": 36440 }, { "epoch": 0.44445121951219513, "grad_norm": 1.181577444076538, "learning_rate": 1.70369918699187e-05, "loss": 0.0719, "step": 36445 }, { "epoch": 0.4445121951219512, "grad_norm": 0.588562548160553, "learning_rate": 1.703658536585366e-05, "loss": 0.0736, "step": 36450 }, { "epoch": 0.4445731707317073, "grad_norm": 1.3555331230163574, "learning_rate": 1.7036178861788618e-05, "loss": 0.0735, "step": 36455 }, { "epoch": 0.4446341463414634, "grad_norm": 0.37264081835746765, "learning_rate": 1.703577235772358e-05, "loss": 0.0564, "step": 36460 }, { "epoch": 0.4446951219512195, "grad_norm": 0.8418822884559631, "learning_rate": 1.7035365853658538e-05, "loss": 0.083, "step": 36465 }, { "epoch": 0.4447560975609756, "grad_norm": 1.7025824785232544, "learning_rate": 1.70349593495935e-05, "loss": 0.1054, "step": 36470 }, { "epoch": 0.4448170731707317, "grad_norm": 0.6718758344650269, "learning_rate": 1.7034552845528457e-05, "loss": 0.0805, "step": 36475 }, { "epoch": 0.4448780487804878, "grad_norm": 0.3652399778366089, "learning_rate": 1.7034146341463415e-05, "loss": 0.1094, "step": 36480 }, { "epoch": 0.4449390243902439, "grad_norm": 0.4092150926589966, "learning_rate": 1.7033739837398377e-05, "loss": 0.0834, "step": 36485 }, { "epoch": 0.445, "grad_norm": 0.6876766681671143, "learning_rate": 1.7033333333333335e-05, "loss": 0.0597, "step": 36490 }, { "epoch": 0.4450609756097561, "grad_norm": 1.1247247457504272, "learning_rate": 1.7032926829268293e-05, "loss": 0.1275, "step": 36495 }, { "epoch": 0.4451219512195122, "grad_norm": 0.6420807242393494, "learning_rate": 1.7032520325203255e-05, "loss": 0.0552, "step": 36500 }, { "epoch": 0.4451829268292683, "grad_norm": 0.7427501082420349, "learning_rate": 1.7032113821138213e-05, "loss": 0.0588, "step": 36505 }, { "epoch": 0.4452439024390244, "grad_norm": 1.129719853401184, "learning_rate": 1.703170731707317e-05, "loss": 0.0746, "step": 36510 }, { "epoch": 0.4453048780487805, "grad_norm": 1.1021900177001953, "learning_rate": 1.7031300813008133e-05, "loss": 0.0984, "step": 36515 }, { "epoch": 0.4453658536585366, "grad_norm": 1.1007355451583862, "learning_rate": 1.703089430894309e-05, "loss": 0.0934, "step": 36520 }, { "epoch": 0.4454268292682927, "grad_norm": 0.6049818396568298, "learning_rate": 1.7030487804878052e-05, "loss": 0.0555, "step": 36525 }, { "epoch": 0.4454878048780488, "grad_norm": 0.7588112354278564, "learning_rate": 1.703008130081301e-05, "loss": 0.097, "step": 36530 }, { "epoch": 0.4455487804878049, "grad_norm": 1.7259016036987305, "learning_rate": 1.702967479674797e-05, "loss": 0.1412, "step": 36535 }, { "epoch": 0.445609756097561, "grad_norm": 0.7358414530754089, "learning_rate": 1.7029268292682927e-05, "loss": 0.0576, "step": 36540 }, { "epoch": 0.4456707317073171, "grad_norm": 0.6692744493484497, "learning_rate": 1.7028861788617888e-05, "loss": 0.0952, "step": 36545 }, { "epoch": 0.4457317073170732, "grad_norm": 1.6343424320220947, "learning_rate": 1.7028455284552846e-05, "loss": 0.1089, "step": 36550 }, { "epoch": 0.4457926829268293, "grad_norm": 0.6006283164024353, "learning_rate": 1.7028048780487808e-05, "loss": 0.0751, "step": 36555 }, { "epoch": 0.4458536585365854, "grad_norm": 0.6737003326416016, "learning_rate": 1.7027642276422766e-05, "loss": 0.0604, "step": 36560 }, { "epoch": 0.44591463414634147, "grad_norm": 0.44140762090682983, "learning_rate": 1.7027235772357724e-05, "loss": 0.0598, "step": 36565 }, { "epoch": 0.44597560975609757, "grad_norm": 0.6680864691734314, "learning_rate": 1.7026829268292686e-05, "loss": 0.0775, "step": 36570 }, { "epoch": 0.44603658536585367, "grad_norm": 0.6642770767211914, "learning_rate": 1.7026422764227644e-05, "loss": 0.0557, "step": 36575 }, { "epoch": 0.44609756097560976, "grad_norm": 0.43359097838401794, "learning_rate": 1.7026016260162602e-05, "loss": 0.1077, "step": 36580 }, { "epoch": 0.44615853658536586, "grad_norm": 0.6473411321640015, "learning_rate": 1.7025609756097563e-05, "loss": 0.0664, "step": 36585 }, { "epoch": 0.44621951219512196, "grad_norm": 0.5010930895805359, "learning_rate": 1.702520325203252e-05, "loss": 0.0777, "step": 36590 }, { "epoch": 0.44628048780487806, "grad_norm": 1.027877926826477, "learning_rate": 1.702479674796748e-05, "loss": 0.0764, "step": 36595 }, { "epoch": 0.44634146341463415, "grad_norm": 0.6258888244628906, "learning_rate": 1.702439024390244e-05, "loss": 0.076, "step": 36600 }, { "epoch": 0.44640243902439025, "grad_norm": 0.6621519327163696, "learning_rate": 1.70239837398374e-05, "loss": 0.0749, "step": 36605 }, { "epoch": 0.44646341463414635, "grad_norm": 0.4965662956237793, "learning_rate": 1.702357723577236e-05, "loss": 0.0775, "step": 36610 }, { "epoch": 0.44652439024390245, "grad_norm": 1.2931363582611084, "learning_rate": 1.702317073170732e-05, "loss": 0.0922, "step": 36615 }, { "epoch": 0.44658536585365854, "grad_norm": 0.5994082689285278, "learning_rate": 1.702276422764228e-05, "loss": 0.0974, "step": 36620 }, { "epoch": 0.44664634146341464, "grad_norm": 0.839279294013977, "learning_rate": 1.7022357723577235e-05, "loss": 0.1005, "step": 36625 }, { "epoch": 0.44670731707317074, "grad_norm": 0.8394683003425598, "learning_rate": 1.7021951219512197e-05, "loss": 0.0847, "step": 36630 }, { "epoch": 0.44676829268292684, "grad_norm": 0.6636582612991333, "learning_rate": 1.7021544715447155e-05, "loss": 0.1066, "step": 36635 }, { "epoch": 0.44682926829268294, "grad_norm": 0.7340683341026306, "learning_rate": 1.7021138211382116e-05, "loss": 0.1035, "step": 36640 }, { "epoch": 0.44689024390243903, "grad_norm": 0.762836217880249, "learning_rate": 1.7020731707317074e-05, "loss": 0.0542, "step": 36645 }, { "epoch": 0.44695121951219513, "grad_norm": 0.8116294145584106, "learning_rate": 1.7020325203252036e-05, "loss": 0.0709, "step": 36650 }, { "epoch": 0.44701219512195123, "grad_norm": 0.5109180808067322, "learning_rate": 1.7019918699186994e-05, "loss": 0.0743, "step": 36655 }, { "epoch": 0.4470731707317073, "grad_norm": 0.8650820255279541, "learning_rate": 1.7019512195121952e-05, "loss": 0.0602, "step": 36660 }, { "epoch": 0.4471341463414634, "grad_norm": 0.6422354578971863, "learning_rate": 1.701910569105691e-05, "loss": 0.0769, "step": 36665 }, { "epoch": 0.4471951219512195, "grad_norm": 1.4796078205108643, "learning_rate": 1.7018699186991872e-05, "loss": 0.1055, "step": 36670 }, { "epoch": 0.4472560975609756, "grad_norm": 0.40296873450279236, "learning_rate": 1.701829268292683e-05, "loss": 0.06, "step": 36675 }, { "epoch": 0.4473170731707317, "grad_norm": 0.7186958193778992, "learning_rate": 1.701788617886179e-05, "loss": 0.067, "step": 36680 }, { "epoch": 0.4473780487804878, "grad_norm": 0.6735469698905945, "learning_rate": 1.701747967479675e-05, "loss": 0.0895, "step": 36685 }, { "epoch": 0.4474390243902439, "grad_norm": 0.527321994304657, "learning_rate": 1.7017073170731708e-05, "loss": 0.0601, "step": 36690 }, { "epoch": 0.4475, "grad_norm": 0.651568591594696, "learning_rate": 1.701666666666667e-05, "loss": 0.0887, "step": 36695 }, { "epoch": 0.4475609756097561, "grad_norm": 0.5251446962356567, "learning_rate": 1.7016260162601627e-05, "loss": 0.0725, "step": 36700 }, { "epoch": 0.4476219512195122, "grad_norm": 1.1795421838760376, "learning_rate": 1.701585365853659e-05, "loss": 0.1028, "step": 36705 }, { "epoch": 0.4476829268292683, "grad_norm": 0.7152026891708374, "learning_rate": 1.7015447154471547e-05, "loss": 0.067, "step": 36710 }, { "epoch": 0.4477439024390244, "grad_norm": 0.5443284511566162, "learning_rate": 1.7015040650406505e-05, "loss": 0.0553, "step": 36715 }, { "epoch": 0.4478048780487805, "grad_norm": 0.5671129822731018, "learning_rate": 1.7014634146341463e-05, "loss": 0.0571, "step": 36720 }, { "epoch": 0.4478658536585366, "grad_norm": 0.6295211315155029, "learning_rate": 1.7014227642276425e-05, "loss": 0.0586, "step": 36725 }, { "epoch": 0.4479268292682927, "grad_norm": 0.6995916962623596, "learning_rate": 1.7013821138211383e-05, "loss": 0.0606, "step": 36730 }, { "epoch": 0.4479878048780488, "grad_norm": 0.41824692487716675, "learning_rate": 1.7013414634146344e-05, "loss": 0.1051, "step": 36735 }, { "epoch": 0.4480487804878049, "grad_norm": 0.4919564425945282, "learning_rate": 1.7013008130081303e-05, "loss": 0.054, "step": 36740 }, { "epoch": 0.448109756097561, "grad_norm": 1.2350496053695679, "learning_rate": 1.701260162601626e-05, "loss": 0.1036, "step": 36745 }, { "epoch": 0.4481707317073171, "grad_norm": 0.6745553612709045, "learning_rate": 1.7012195121951222e-05, "loss": 0.0907, "step": 36750 }, { "epoch": 0.4482317073170732, "grad_norm": 0.9563162922859192, "learning_rate": 1.701178861788618e-05, "loss": 0.0937, "step": 36755 }, { "epoch": 0.4482926829268293, "grad_norm": 1.2918392419815063, "learning_rate": 1.701138211382114e-05, "loss": 0.1202, "step": 36760 }, { "epoch": 0.4483536585365854, "grad_norm": 0.6439908742904663, "learning_rate": 1.70109756097561e-05, "loss": 0.0547, "step": 36765 }, { "epoch": 0.4484146341463415, "grad_norm": 0.7374848127365112, "learning_rate": 1.7010569105691058e-05, "loss": 0.0843, "step": 36770 }, { "epoch": 0.44847560975609757, "grad_norm": 0.6351014375686646, "learning_rate": 1.7010162601626016e-05, "loss": 0.0607, "step": 36775 }, { "epoch": 0.44853658536585367, "grad_norm": 0.7675440311431885, "learning_rate": 1.7009756097560978e-05, "loss": 0.0661, "step": 36780 }, { "epoch": 0.44859756097560977, "grad_norm": 0.48432159423828125, "learning_rate": 1.7009349593495936e-05, "loss": 0.06, "step": 36785 }, { "epoch": 0.44865853658536586, "grad_norm": 0.5353570580482483, "learning_rate": 1.7008943089430897e-05, "loss": 0.1019, "step": 36790 }, { "epoch": 0.44871951219512196, "grad_norm": 0.4192681610584259, "learning_rate": 1.7008536585365855e-05, "loss": 0.0688, "step": 36795 }, { "epoch": 0.44878048780487806, "grad_norm": 0.676344096660614, "learning_rate": 1.7008130081300817e-05, "loss": 0.078, "step": 36800 }, { "epoch": 0.44884146341463416, "grad_norm": 0.4366949498653412, "learning_rate": 1.7007723577235772e-05, "loss": 0.066, "step": 36805 }, { "epoch": 0.44890243902439025, "grad_norm": 0.7314777970314026, "learning_rate": 1.7007317073170733e-05, "loss": 0.0788, "step": 36810 }, { "epoch": 0.44896341463414635, "grad_norm": 0.37465700507164, "learning_rate": 1.700691056910569e-05, "loss": 0.0724, "step": 36815 }, { "epoch": 0.44902439024390245, "grad_norm": 0.636229932308197, "learning_rate": 1.7006504065040653e-05, "loss": 0.058, "step": 36820 }, { "epoch": 0.44908536585365855, "grad_norm": 0.6792402267456055, "learning_rate": 1.700609756097561e-05, "loss": 0.0728, "step": 36825 }, { "epoch": 0.44914634146341464, "grad_norm": 0.44988760352134705, "learning_rate": 1.7005691056910573e-05, "loss": 0.0947, "step": 36830 }, { "epoch": 0.44920731707317074, "grad_norm": 0.7390608191490173, "learning_rate": 1.700528455284553e-05, "loss": 0.0571, "step": 36835 }, { "epoch": 0.44926829268292684, "grad_norm": 0.4103816747665405, "learning_rate": 1.700487804878049e-05, "loss": 0.0809, "step": 36840 }, { "epoch": 0.44932926829268294, "grad_norm": 0.3584911525249481, "learning_rate": 1.7004471544715447e-05, "loss": 0.0644, "step": 36845 }, { "epoch": 0.44939024390243903, "grad_norm": 0.6106853485107422, "learning_rate": 1.700406504065041e-05, "loss": 0.0596, "step": 36850 }, { "epoch": 0.44945121951219513, "grad_norm": 0.4055512547492981, "learning_rate": 1.7003658536585367e-05, "loss": 0.0513, "step": 36855 }, { "epoch": 0.44951219512195123, "grad_norm": 0.9125880599021912, "learning_rate": 1.7003252032520328e-05, "loss": 0.0643, "step": 36860 }, { "epoch": 0.44957317073170733, "grad_norm": 0.5792575478553772, "learning_rate": 1.7002845528455286e-05, "loss": 0.0686, "step": 36865 }, { "epoch": 0.4496341463414634, "grad_norm": 0.9759629368782043, "learning_rate": 1.7002439024390244e-05, "loss": 0.0895, "step": 36870 }, { "epoch": 0.4496951219512195, "grad_norm": 0.6396292448043823, "learning_rate": 1.7002032520325206e-05, "loss": 0.0538, "step": 36875 }, { "epoch": 0.4497560975609756, "grad_norm": 0.6012701392173767, "learning_rate": 1.7001626016260164e-05, "loss": 0.0538, "step": 36880 }, { "epoch": 0.4498170731707317, "grad_norm": 1.6378811597824097, "learning_rate": 1.7001219512195125e-05, "loss": 0.1072, "step": 36885 }, { "epoch": 0.4498780487804878, "grad_norm": 0.3717746436595917, "learning_rate": 1.7000813008130084e-05, "loss": 0.0632, "step": 36890 }, { "epoch": 0.4499390243902439, "grad_norm": 0.6489755511283875, "learning_rate": 1.7000406504065042e-05, "loss": 0.073, "step": 36895 }, { "epoch": 0.45, "grad_norm": 1.0854052305221558, "learning_rate": 1.7e-05, "loss": 0.0983, "step": 36900 }, { "epoch": 0.4500609756097561, "grad_norm": 0.9434260129928589, "learning_rate": 1.699959349593496e-05, "loss": 0.0664, "step": 36905 }, { "epoch": 0.4501219512195122, "grad_norm": 1.5755994319915771, "learning_rate": 1.699918699186992e-05, "loss": 0.0922, "step": 36910 }, { "epoch": 0.4501829268292683, "grad_norm": 0.46685171127319336, "learning_rate": 1.699878048780488e-05, "loss": 0.0666, "step": 36915 }, { "epoch": 0.4502439024390244, "grad_norm": 0.5453656315803528, "learning_rate": 1.699837398373984e-05, "loss": 0.0742, "step": 36920 }, { "epoch": 0.4503048780487805, "grad_norm": 0.36758410930633545, "learning_rate": 1.6997967479674797e-05, "loss": 0.0407, "step": 36925 }, { "epoch": 0.4503658536585366, "grad_norm": 0.534597635269165, "learning_rate": 1.6997560975609755e-05, "loss": 0.0636, "step": 36930 }, { "epoch": 0.4504268292682927, "grad_norm": 0.6090515851974487, "learning_rate": 1.6997154471544717e-05, "loss": 0.0628, "step": 36935 }, { "epoch": 0.4504878048780488, "grad_norm": 0.8613111972808838, "learning_rate": 1.6996747967479675e-05, "loss": 0.064, "step": 36940 }, { "epoch": 0.4505487804878049, "grad_norm": 0.8973210453987122, "learning_rate": 1.6996341463414637e-05, "loss": 0.0608, "step": 36945 }, { "epoch": 0.450609756097561, "grad_norm": 0.8611679673194885, "learning_rate": 1.6995934959349595e-05, "loss": 0.1148, "step": 36950 }, { "epoch": 0.4506707317073171, "grad_norm": 0.546547532081604, "learning_rate": 1.6995528455284553e-05, "loss": 0.0733, "step": 36955 }, { "epoch": 0.4507317073170732, "grad_norm": 0.46002599596977234, "learning_rate": 1.6995121951219514e-05, "loss": 0.0585, "step": 36960 }, { "epoch": 0.4507926829268293, "grad_norm": 0.6621313691139221, "learning_rate": 1.6994715447154472e-05, "loss": 0.0608, "step": 36965 }, { "epoch": 0.4508536585365854, "grad_norm": 0.5096427202224731, "learning_rate": 1.6994308943089434e-05, "loss": 0.063, "step": 36970 }, { "epoch": 0.4509146341463415, "grad_norm": 1.096215844154358, "learning_rate": 1.6993902439024392e-05, "loss": 0.0608, "step": 36975 }, { "epoch": 0.4509756097560976, "grad_norm": 0.8286496996879578, "learning_rate": 1.6993495934959354e-05, "loss": 0.1006, "step": 36980 }, { "epoch": 0.45103658536585367, "grad_norm": 0.6842164397239685, "learning_rate": 1.699308943089431e-05, "loss": 0.0512, "step": 36985 }, { "epoch": 0.45109756097560977, "grad_norm": 1.087410569190979, "learning_rate": 1.699268292682927e-05, "loss": 0.1027, "step": 36990 }, { "epoch": 0.45115853658536587, "grad_norm": 0.7002478837966919, "learning_rate": 1.6992276422764228e-05, "loss": 0.067, "step": 36995 }, { "epoch": 0.45121951219512196, "grad_norm": 1.131776213645935, "learning_rate": 1.699186991869919e-05, "loss": 0.107, "step": 37000 }, { "epoch": 0.45128048780487806, "grad_norm": 0.6577567458152771, "learning_rate": 1.6991463414634148e-05, "loss": 0.0481, "step": 37005 }, { "epoch": 0.45134146341463416, "grad_norm": 0.8128474950790405, "learning_rate": 1.699105691056911e-05, "loss": 0.0655, "step": 37010 }, { "epoch": 0.45140243902439026, "grad_norm": 0.5340396165847778, "learning_rate": 1.6990650406504067e-05, "loss": 0.1347, "step": 37015 }, { "epoch": 0.45146341463414635, "grad_norm": 1.4213300943374634, "learning_rate": 1.6990243902439025e-05, "loss": 0.1, "step": 37020 }, { "epoch": 0.45152439024390245, "grad_norm": 0.48293131589889526, "learning_rate": 1.6989837398373984e-05, "loss": 0.1083, "step": 37025 }, { "epoch": 0.45158536585365855, "grad_norm": 0.5759886503219604, "learning_rate": 1.6989430894308945e-05, "loss": 0.0939, "step": 37030 }, { "epoch": 0.45164634146341465, "grad_norm": 1.223362922668457, "learning_rate": 1.6989024390243903e-05, "loss": 0.0895, "step": 37035 }, { "epoch": 0.45170731707317074, "grad_norm": 0.5763206481933594, "learning_rate": 1.6988617886178865e-05, "loss": 0.0812, "step": 37040 }, { "epoch": 0.45176829268292684, "grad_norm": 0.5198390483856201, "learning_rate": 1.6988211382113823e-05, "loss": 0.074, "step": 37045 }, { "epoch": 0.45182926829268294, "grad_norm": 0.4321015179157257, "learning_rate": 1.698780487804878e-05, "loss": 0.0694, "step": 37050 }, { "epoch": 0.45189024390243904, "grad_norm": 1.3694663047790527, "learning_rate": 1.6987398373983742e-05, "loss": 0.0663, "step": 37055 }, { "epoch": 0.45195121951219513, "grad_norm": 0.838801920413971, "learning_rate": 1.69869918699187e-05, "loss": 0.0873, "step": 37060 }, { "epoch": 0.45201219512195123, "grad_norm": 1.0825982093811035, "learning_rate": 1.6986585365853662e-05, "loss": 0.1268, "step": 37065 }, { "epoch": 0.45207317073170733, "grad_norm": 0.7277013659477234, "learning_rate": 1.698617886178862e-05, "loss": 0.0958, "step": 37070 }, { "epoch": 0.4521341463414634, "grad_norm": 0.3622809052467346, "learning_rate": 1.698577235772358e-05, "loss": 0.0501, "step": 37075 }, { "epoch": 0.4521951219512195, "grad_norm": 0.6056419014930725, "learning_rate": 1.6985365853658537e-05, "loss": 0.0772, "step": 37080 }, { "epoch": 0.4522560975609756, "grad_norm": 0.3904167115688324, "learning_rate": 1.6984959349593498e-05, "loss": 0.0893, "step": 37085 }, { "epoch": 0.4523170731707317, "grad_norm": 0.5632226467132568, "learning_rate": 1.6984552845528456e-05, "loss": 0.0594, "step": 37090 }, { "epoch": 0.4523780487804878, "grad_norm": 0.42634525895118713, "learning_rate": 1.6984146341463418e-05, "loss": 0.0807, "step": 37095 }, { "epoch": 0.4524390243902439, "grad_norm": 0.4845125675201416, "learning_rate": 1.6983739837398376e-05, "loss": 0.0776, "step": 37100 }, { "epoch": 0.4525, "grad_norm": 0.7645912170410156, "learning_rate": 1.6983333333333334e-05, "loss": 0.0775, "step": 37105 }, { "epoch": 0.4525609756097561, "grad_norm": 0.4851836860179901, "learning_rate": 1.6982926829268292e-05, "loss": 0.0757, "step": 37110 }, { "epoch": 0.4526219512195122, "grad_norm": 0.5726795792579651, "learning_rate": 1.6982520325203254e-05, "loss": 0.0861, "step": 37115 }, { "epoch": 0.4526829268292683, "grad_norm": 0.5754434466362, "learning_rate": 1.6982113821138212e-05, "loss": 0.0651, "step": 37120 }, { "epoch": 0.4527439024390244, "grad_norm": 0.39468124508857727, "learning_rate": 1.6981707317073173e-05, "loss": 0.0554, "step": 37125 }, { "epoch": 0.4528048780487805, "grad_norm": 0.6521946787834167, "learning_rate": 1.698130081300813e-05, "loss": 0.0684, "step": 37130 }, { "epoch": 0.4528658536585366, "grad_norm": 0.9801324605941772, "learning_rate": 1.698089430894309e-05, "loss": 0.0886, "step": 37135 }, { "epoch": 0.4529268292682927, "grad_norm": 1.8653879165649414, "learning_rate": 1.698048780487805e-05, "loss": 0.1034, "step": 37140 }, { "epoch": 0.4529878048780488, "grad_norm": 0.28479161858558655, "learning_rate": 1.698008130081301e-05, "loss": 0.0511, "step": 37145 }, { "epoch": 0.4530487804878049, "grad_norm": 0.7189282774925232, "learning_rate": 1.697967479674797e-05, "loss": 0.085, "step": 37150 }, { "epoch": 0.453109756097561, "grad_norm": 0.555538535118103, "learning_rate": 1.697926829268293e-05, "loss": 0.0919, "step": 37155 }, { "epoch": 0.4531707317073171, "grad_norm": 0.4258648753166199, "learning_rate": 1.697886178861789e-05, "loss": 0.0947, "step": 37160 }, { "epoch": 0.4532317073170732, "grad_norm": 0.696995735168457, "learning_rate": 1.6978455284552845e-05, "loss": 0.0658, "step": 37165 }, { "epoch": 0.4532926829268293, "grad_norm": 0.7018718719482422, "learning_rate": 1.6978048780487807e-05, "loss": 0.0673, "step": 37170 }, { "epoch": 0.4533536585365854, "grad_norm": 0.6219207644462585, "learning_rate": 1.6977642276422765e-05, "loss": 0.0602, "step": 37175 }, { "epoch": 0.4534146341463415, "grad_norm": 0.3757302463054657, "learning_rate": 1.6977235772357726e-05, "loss": 0.0461, "step": 37180 }, { "epoch": 0.4534756097560976, "grad_norm": 0.4374311864376068, "learning_rate": 1.6976829268292684e-05, "loss": 0.0499, "step": 37185 }, { "epoch": 0.4535365853658537, "grad_norm": 0.6419468522071838, "learning_rate": 1.6976422764227646e-05, "loss": 0.0692, "step": 37190 }, { "epoch": 0.45359756097560977, "grad_norm": 1.069269061088562, "learning_rate": 1.69760162601626e-05, "loss": 0.1093, "step": 37195 }, { "epoch": 0.45365853658536587, "grad_norm": 0.46437129378318787, "learning_rate": 1.6975609756097562e-05, "loss": 0.0669, "step": 37200 }, { "epoch": 0.45371951219512197, "grad_norm": 0.8154667019844055, "learning_rate": 1.697520325203252e-05, "loss": 0.1134, "step": 37205 }, { "epoch": 0.45378048780487806, "grad_norm": 0.8949741721153259, "learning_rate": 1.6974796747967482e-05, "loss": 0.0957, "step": 37210 }, { "epoch": 0.45384146341463416, "grad_norm": 1.3636115789413452, "learning_rate": 1.697439024390244e-05, "loss": 0.0886, "step": 37215 }, { "epoch": 0.45390243902439026, "grad_norm": 0.7185683846473694, "learning_rate": 1.69739837398374e-05, "loss": 0.087, "step": 37220 }, { "epoch": 0.45396341463414636, "grad_norm": 0.6948537230491638, "learning_rate": 1.697357723577236e-05, "loss": 0.0677, "step": 37225 }, { "epoch": 0.45402439024390245, "grad_norm": 0.8444908857345581, "learning_rate": 1.6973170731707318e-05, "loss": 0.0662, "step": 37230 }, { "epoch": 0.45408536585365855, "grad_norm": 0.3900904655456543, "learning_rate": 1.697276422764228e-05, "loss": 0.0519, "step": 37235 }, { "epoch": 0.45414634146341465, "grad_norm": 0.6500153541564941, "learning_rate": 1.6972357723577237e-05, "loss": 0.0537, "step": 37240 }, { "epoch": 0.45420731707317075, "grad_norm": 0.6891303658485413, "learning_rate": 1.69719512195122e-05, "loss": 0.0786, "step": 37245 }, { "epoch": 0.45426829268292684, "grad_norm": 0.5835050344467163, "learning_rate": 1.6971544715447157e-05, "loss": 0.0638, "step": 37250 }, { "epoch": 0.45432926829268294, "grad_norm": 0.5135204195976257, "learning_rate": 1.6971138211382115e-05, "loss": 0.097, "step": 37255 }, { "epoch": 0.45439024390243904, "grad_norm": 0.7670146822929382, "learning_rate": 1.6970731707317073e-05, "loss": 0.0645, "step": 37260 }, { "epoch": 0.45445121951219514, "grad_norm": 0.7317219376564026, "learning_rate": 1.6970325203252035e-05, "loss": 0.0979, "step": 37265 }, { "epoch": 0.45451219512195123, "grad_norm": 0.8896968960762024, "learning_rate": 1.6969918699186993e-05, "loss": 0.0542, "step": 37270 }, { "epoch": 0.45457317073170733, "grad_norm": 0.6295434832572937, "learning_rate": 1.6969512195121954e-05, "loss": 0.0799, "step": 37275 }, { "epoch": 0.45463414634146343, "grad_norm": 1.6329585313796997, "learning_rate": 1.6969105691056912e-05, "loss": 0.0691, "step": 37280 }, { "epoch": 0.4546951219512195, "grad_norm": 0.5401318073272705, "learning_rate": 1.696869918699187e-05, "loss": 0.0476, "step": 37285 }, { "epoch": 0.4547560975609756, "grad_norm": 0.9834790229797363, "learning_rate": 1.696829268292683e-05, "loss": 0.0739, "step": 37290 }, { "epoch": 0.4548170731707317, "grad_norm": 0.8276093006134033, "learning_rate": 1.696788617886179e-05, "loss": 0.0885, "step": 37295 }, { "epoch": 0.4548780487804878, "grad_norm": 0.5002177953720093, "learning_rate": 1.696747967479675e-05, "loss": 0.0724, "step": 37300 }, { "epoch": 0.4549390243902439, "grad_norm": 0.9468427896499634, "learning_rate": 1.696707317073171e-05, "loss": 0.0724, "step": 37305 }, { "epoch": 0.455, "grad_norm": 0.8834798336029053, "learning_rate": 1.6966666666666668e-05, "loss": 0.0536, "step": 37310 }, { "epoch": 0.4550609756097561, "grad_norm": 0.5315609574317932, "learning_rate": 1.6966260162601626e-05, "loss": 0.077, "step": 37315 }, { "epoch": 0.4551219512195122, "grad_norm": 0.6137266755104065, "learning_rate": 1.6965853658536588e-05, "loss": 0.0505, "step": 37320 }, { "epoch": 0.4551829268292683, "grad_norm": 0.7105460166931152, "learning_rate": 1.6965447154471546e-05, "loss": 0.0692, "step": 37325 }, { "epoch": 0.4552439024390244, "grad_norm": 0.8199962973594666, "learning_rate": 1.6965040650406507e-05, "loss": 0.0797, "step": 37330 }, { "epoch": 0.4553048780487805, "grad_norm": 0.7426071763038635, "learning_rate": 1.6964634146341465e-05, "loss": 0.0804, "step": 37335 }, { "epoch": 0.4553658536585366, "grad_norm": 0.5482418537139893, "learning_rate": 1.6964227642276424e-05, "loss": 0.0541, "step": 37340 }, { "epoch": 0.4554268292682927, "grad_norm": 0.58001708984375, "learning_rate": 1.696382113821138e-05, "loss": 0.0549, "step": 37345 }, { "epoch": 0.4554878048780488, "grad_norm": 0.9877895712852478, "learning_rate": 1.6963414634146343e-05, "loss": 0.0617, "step": 37350 }, { "epoch": 0.4555487804878049, "grad_norm": 0.4153137505054474, "learning_rate": 1.69630081300813e-05, "loss": 0.0371, "step": 37355 }, { "epoch": 0.455609756097561, "grad_norm": 0.7970685362815857, "learning_rate": 1.6962601626016263e-05, "loss": 0.073, "step": 37360 }, { "epoch": 0.4556707317073171, "grad_norm": 0.8455252647399902, "learning_rate": 1.696219512195122e-05, "loss": 0.0674, "step": 37365 }, { "epoch": 0.4557317073170732, "grad_norm": 0.9718667268753052, "learning_rate": 1.6961788617886182e-05, "loss": 0.0646, "step": 37370 }, { "epoch": 0.4557926829268293, "grad_norm": 0.7411321997642517, "learning_rate": 1.6961382113821137e-05, "loss": 0.073, "step": 37375 }, { "epoch": 0.4558536585365854, "grad_norm": 0.40691328048706055, "learning_rate": 1.69609756097561e-05, "loss": 0.0509, "step": 37380 }, { "epoch": 0.4559146341463415, "grad_norm": 0.6355515718460083, "learning_rate": 1.6960569105691057e-05, "loss": 0.0793, "step": 37385 }, { "epoch": 0.4559756097560976, "grad_norm": 0.591215193271637, "learning_rate": 1.696016260162602e-05, "loss": 0.0686, "step": 37390 }, { "epoch": 0.4560365853658537, "grad_norm": 3.8123409748077393, "learning_rate": 1.6959756097560976e-05, "loss": 0.0548, "step": 37395 }, { "epoch": 0.4560975609756098, "grad_norm": 0.6483091115951538, "learning_rate": 1.6959349593495938e-05, "loss": 0.0838, "step": 37400 }, { "epoch": 0.45615853658536587, "grad_norm": 0.2825222909450531, "learning_rate": 1.6958943089430896e-05, "loss": 0.051, "step": 37405 }, { "epoch": 0.45621951219512197, "grad_norm": 0.562929093837738, "learning_rate": 1.6958536585365854e-05, "loss": 0.0906, "step": 37410 }, { "epoch": 0.45628048780487807, "grad_norm": 0.6405023336410522, "learning_rate": 1.6958130081300816e-05, "loss": 0.0777, "step": 37415 }, { "epoch": 0.45634146341463416, "grad_norm": 0.34285545349121094, "learning_rate": 1.6957723577235774e-05, "loss": 0.0542, "step": 37420 }, { "epoch": 0.45640243902439026, "grad_norm": 0.7270329594612122, "learning_rate": 1.6957317073170735e-05, "loss": 0.0519, "step": 37425 }, { "epoch": 0.45646341463414636, "grad_norm": 0.7096496224403381, "learning_rate": 1.6956910569105694e-05, "loss": 0.0797, "step": 37430 }, { "epoch": 0.45652439024390246, "grad_norm": 0.8417941927909851, "learning_rate": 1.695650406504065e-05, "loss": 0.0816, "step": 37435 }, { "epoch": 0.45658536585365855, "grad_norm": 1.1899534463882446, "learning_rate": 1.695609756097561e-05, "loss": 0.0915, "step": 37440 }, { "epoch": 0.45664634146341465, "grad_norm": 0.7910978198051453, "learning_rate": 1.695569105691057e-05, "loss": 0.065, "step": 37445 }, { "epoch": 0.45670731707317075, "grad_norm": 0.5942491888999939, "learning_rate": 1.695528455284553e-05, "loss": 0.0552, "step": 37450 }, { "epoch": 0.45676829268292685, "grad_norm": 0.7915257215499878, "learning_rate": 1.695487804878049e-05, "loss": 0.085, "step": 37455 }, { "epoch": 0.45682926829268294, "grad_norm": 0.648326575756073, "learning_rate": 1.695447154471545e-05, "loss": 0.0708, "step": 37460 }, { "epoch": 0.45689024390243904, "grad_norm": 0.72865891456604, "learning_rate": 1.6954065040650407e-05, "loss": 0.081, "step": 37465 }, { "epoch": 0.45695121951219514, "grad_norm": 0.4295421242713928, "learning_rate": 1.6953658536585365e-05, "loss": 0.1112, "step": 37470 }, { "epoch": 0.45701219512195124, "grad_norm": 0.5731421709060669, "learning_rate": 1.6953252032520327e-05, "loss": 0.0884, "step": 37475 }, { "epoch": 0.45707317073170733, "grad_norm": 1.5970423221588135, "learning_rate": 1.6952845528455285e-05, "loss": 0.1128, "step": 37480 }, { "epoch": 0.45713414634146343, "grad_norm": 0.7008277773857117, "learning_rate": 1.6952439024390246e-05, "loss": 0.0913, "step": 37485 }, { "epoch": 0.45719512195121953, "grad_norm": 0.711906373500824, "learning_rate": 1.6952032520325205e-05, "loss": 0.0681, "step": 37490 }, { "epoch": 0.4572560975609756, "grad_norm": 0.5358969569206238, "learning_rate": 1.6951626016260163e-05, "loss": 0.1002, "step": 37495 }, { "epoch": 0.4573170731707317, "grad_norm": 0.8713160157203674, "learning_rate": 1.6951219512195124e-05, "loss": 0.0743, "step": 37500 }, { "epoch": 0.4573780487804878, "grad_norm": 0.5612603425979614, "learning_rate": 1.6950813008130082e-05, "loss": 0.0434, "step": 37505 }, { "epoch": 0.4574390243902439, "grad_norm": 1.2034083604812622, "learning_rate": 1.6950406504065044e-05, "loss": 0.0957, "step": 37510 }, { "epoch": 0.4575, "grad_norm": 0.5800874829292297, "learning_rate": 1.6950000000000002e-05, "loss": 0.0765, "step": 37515 }, { "epoch": 0.4575609756097561, "grad_norm": 0.4375183880329132, "learning_rate": 1.694959349593496e-05, "loss": 0.0598, "step": 37520 }, { "epoch": 0.4576219512195122, "grad_norm": 0.8339414000511169, "learning_rate": 1.6949186991869918e-05, "loss": 0.0729, "step": 37525 }, { "epoch": 0.4576829268292683, "grad_norm": 0.8061226010322571, "learning_rate": 1.694878048780488e-05, "loss": 0.0581, "step": 37530 }, { "epoch": 0.4577439024390244, "grad_norm": 1.2630456686019897, "learning_rate": 1.6948373983739838e-05, "loss": 0.0549, "step": 37535 }, { "epoch": 0.4578048780487805, "grad_norm": 0.7294186353683472, "learning_rate": 1.69479674796748e-05, "loss": 0.1195, "step": 37540 }, { "epoch": 0.4578658536585366, "grad_norm": 1.0742604732513428, "learning_rate": 1.6947560975609758e-05, "loss": 0.0611, "step": 37545 }, { "epoch": 0.4579268292682927, "grad_norm": 0.6049199104309082, "learning_rate": 1.694715447154472e-05, "loss": 0.0648, "step": 37550 }, { "epoch": 0.4579878048780488, "grad_norm": 0.6232295036315918, "learning_rate": 1.6946747967479674e-05, "loss": 0.0907, "step": 37555 }, { "epoch": 0.4580487804878049, "grad_norm": 0.6970646381378174, "learning_rate": 1.6946341463414635e-05, "loss": 0.0683, "step": 37560 }, { "epoch": 0.458109756097561, "grad_norm": 0.8534400463104248, "learning_rate": 1.6945934959349593e-05, "loss": 0.0763, "step": 37565 }, { "epoch": 0.4581707317073171, "grad_norm": 1.0792839527130127, "learning_rate": 1.6945528455284555e-05, "loss": 0.11, "step": 37570 }, { "epoch": 0.4582317073170732, "grad_norm": 0.35182321071624756, "learning_rate": 1.6945121951219513e-05, "loss": 0.0547, "step": 37575 }, { "epoch": 0.4582926829268293, "grad_norm": 0.5346949100494385, "learning_rate": 1.6944715447154475e-05, "loss": 0.0917, "step": 37580 }, { "epoch": 0.4583536585365854, "grad_norm": 0.8524258732795715, "learning_rate": 1.6944308943089433e-05, "loss": 0.0771, "step": 37585 }, { "epoch": 0.4584146341463415, "grad_norm": 0.4496673047542572, "learning_rate": 1.694390243902439e-05, "loss": 0.068, "step": 37590 }, { "epoch": 0.4584756097560976, "grad_norm": 0.4087676703929901, "learning_rate": 1.6943495934959352e-05, "loss": 0.0848, "step": 37595 }, { "epoch": 0.4585365853658537, "grad_norm": 0.7930429577827454, "learning_rate": 1.694308943089431e-05, "loss": 0.054, "step": 37600 }, { "epoch": 0.4585975609756098, "grad_norm": 0.7168368697166443, "learning_rate": 1.694268292682927e-05, "loss": 0.0589, "step": 37605 }, { "epoch": 0.4586585365853659, "grad_norm": 1.2419590950012207, "learning_rate": 1.694227642276423e-05, "loss": 0.0707, "step": 37610 }, { "epoch": 0.45871951219512197, "grad_norm": 0.7476951479911804, "learning_rate": 1.694186991869919e-05, "loss": 0.0777, "step": 37615 }, { "epoch": 0.45878048780487807, "grad_norm": 0.6081401109695435, "learning_rate": 1.6941463414634146e-05, "loss": 0.0684, "step": 37620 }, { "epoch": 0.45884146341463417, "grad_norm": 1.4594119787216187, "learning_rate": 1.6941056910569108e-05, "loss": 0.0682, "step": 37625 }, { "epoch": 0.45890243902439026, "grad_norm": 0.6109505891799927, "learning_rate": 1.6940650406504066e-05, "loss": 0.1142, "step": 37630 }, { "epoch": 0.45896341463414636, "grad_norm": 0.8284942507743835, "learning_rate": 1.6940243902439028e-05, "loss": 0.0404, "step": 37635 }, { "epoch": 0.45902439024390246, "grad_norm": 0.6771957278251648, "learning_rate": 1.6939837398373986e-05, "loss": 0.0703, "step": 37640 }, { "epoch": 0.45908536585365856, "grad_norm": 0.6057401299476624, "learning_rate": 1.6939430894308944e-05, "loss": 0.0865, "step": 37645 }, { "epoch": 0.45914634146341465, "grad_norm": 0.6051211357116699, "learning_rate": 1.6939024390243902e-05, "loss": 0.0904, "step": 37650 }, { "epoch": 0.45920731707317075, "grad_norm": 0.6302400827407837, "learning_rate": 1.6938617886178863e-05, "loss": 0.0718, "step": 37655 }, { "epoch": 0.45926829268292685, "grad_norm": 1.1522787809371948, "learning_rate": 1.693821138211382e-05, "loss": 0.067, "step": 37660 }, { "epoch": 0.45932926829268295, "grad_norm": 1.3222218751907349, "learning_rate": 1.6937804878048783e-05, "loss": 0.1454, "step": 37665 }, { "epoch": 0.45939024390243904, "grad_norm": 0.664240837097168, "learning_rate": 1.693739837398374e-05, "loss": 0.0573, "step": 37670 }, { "epoch": 0.45945121951219514, "grad_norm": 0.41852685809135437, "learning_rate": 1.69369918699187e-05, "loss": 0.0643, "step": 37675 }, { "epoch": 0.45951219512195124, "grad_norm": 0.39127227663993835, "learning_rate": 1.693658536585366e-05, "loss": 0.0437, "step": 37680 }, { "epoch": 0.45957317073170734, "grad_norm": 0.747862696647644, "learning_rate": 1.693617886178862e-05, "loss": 0.0649, "step": 37685 }, { "epoch": 0.45963414634146343, "grad_norm": 2.0432636737823486, "learning_rate": 1.693577235772358e-05, "loss": 0.0877, "step": 37690 }, { "epoch": 0.45969512195121953, "grad_norm": 1.082505464553833, "learning_rate": 1.693536585365854e-05, "loss": 0.0672, "step": 37695 }, { "epoch": 0.45975609756097563, "grad_norm": 0.6023675203323364, "learning_rate": 1.6934959349593497e-05, "loss": 0.1145, "step": 37700 }, { "epoch": 0.4598170731707317, "grad_norm": 0.9775375723838806, "learning_rate": 1.6934552845528455e-05, "loss": 0.0716, "step": 37705 }, { "epoch": 0.4598780487804878, "grad_norm": 0.6423290967941284, "learning_rate": 1.6934146341463416e-05, "loss": 0.0793, "step": 37710 }, { "epoch": 0.4599390243902439, "grad_norm": 0.47072142362594604, "learning_rate": 1.6933739837398375e-05, "loss": 0.1574, "step": 37715 }, { "epoch": 0.46, "grad_norm": 0.45964089035987854, "learning_rate": 1.6933333333333336e-05, "loss": 0.0845, "step": 37720 }, { "epoch": 0.4600609756097561, "grad_norm": 0.6885509490966797, "learning_rate": 1.6932926829268294e-05, "loss": 0.0856, "step": 37725 }, { "epoch": 0.4601219512195122, "grad_norm": 0.9661829471588135, "learning_rate": 1.6932520325203256e-05, "loss": 0.0889, "step": 37730 }, { "epoch": 0.4601829268292683, "grad_norm": 1.082426905632019, "learning_rate": 1.693211382113821e-05, "loss": 0.0817, "step": 37735 }, { "epoch": 0.4602439024390244, "grad_norm": 0.6516963839530945, "learning_rate": 1.6931707317073172e-05, "loss": 0.0598, "step": 37740 }, { "epoch": 0.4603048780487805, "grad_norm": 2.7872908115386963, "learning_rate": 1.693130081300813e-05, "loss": 0.1021, "step": 37745 }, { "epoch": 0.4603658536585366, "grad_norm": 0.5413069725036621, "learning_rate": 1.693089430894309e-05, "loss": 0.0744, "step": 37750 }, { "epoch": 0.4604268292682927, "grad_norm": 1.2794280052185059, "learning_rate": 1.693048780487805e-05, "loss": 0.0808, "step": 37755 }, { "epoch": 0.4604878048780488, "grad_norm": 0.6494271755218506, "learning_rate": 1.693008130081301e-05, "loss": 0.0534, "step": 37760 }, { "epoch": 0.4605487804878049, "grad_norm": 0.5440472364425659, "learning_rate": 1.692967479674797e-05, "loss": 0.0794, "step": 37765 }, { "epoch": 0.460609756097561, "grad_norm": 0.4083232283592224, "learning_rate": 1.6929268292682928e-05, "loss": 0.0617, "step": 37770 }, { "epoch": 0.4606707317073171, "grad_norm": 0.6690700650215149, "learning_rate": 1.692886178861789e-05, "loss": 0.0637, "step": 37775 }, { "epoch": 0.4607317073170732, "grad_norm": 0.8434821963310242, "learning_rate": 1.6928455284552847e-05, "loss": 0.0463, "step": 37780 }, { "epoch": 0.4607926829268293, "grad_norm": 0.37206852436065674, "learning_rate": 1.6928048780487805e-05, "loss": 0.0688, "step": 37785 }, { "epoch": 0.4608536585365854, "grad_norm": 0.576443612575531, "learning_rate": 1.6927642276422767e-05, "loss": 0.056, "step": 37790 }, { "epoch": 0.4609146341463415, "grad_norm": 1.1820006370544434, "learning_rate": 1.6927235772357725e-05, "loss": 0.0599, "step": 37795 }, { "epoch": 0.4609756097560976, "grad_norm": 0.41722816228866577, "learning_rate": 1.6926829268292683e-05, "loss": 0.0754, "step": 37800 }, { "epoch": 0.4610365853658537, "grad_norm": 0.645774781703949, "learning_rate": 1.6926422764227645e-05, "loss": 0.0663, "step": 37805 }, { "epoch": 0.4610975609756098, "grad_norm": 0.727634608745575, "learning_rate": 1.6926016260162603e-05, "loss": 0.1142, "step": 37810 }, { "epoch": 0.4611585365853659, "grad_norm": 0.53895503282547, "learning_rate": 1.6925609756097564e-05, "loss": 0.0635, "step": 37815 }, { "epoch": 0.461219512195122, "grad_norm": 0.5430802702903748, "learning_rate": 1.6925203252032522e-05, "loss": 0.0602, "step": 37820 }, { "epoch": 0.46128048780487807, "grad_norm": 0.3316783905029297, "learning_rate": 1.692479674796748e-05, "loss": 0.0588, "step": 37825 }, { "epoch": 0.46134146341463417, "grad_norm": 0.4974891245365143, "learning_rate": 1.692439024390244e-05, "loss": 0.0762, "step": 37830 }, { "epoch": 0.46140243902439027, "grad_norm": 0.8117896914482117, "learning_rate": 1.69239837398374e-05, "loss": 0.0687, "step": 37835 }, { "epoch": 0.46146341463414636, "grad_norm": 1.5605634450912476, "learning_rate": 1.6923577235772358e-05, "loss": 0.0566, "step": 37840 }, { "epoch": 0.46152439024390246, "grad_norm": 0.4325653314590454, "learning_rate": 1.692317073170732e-05, "loss": 0.0724, "step": 37845 }, { "epoch": 0.46158536585365856, "grad_norm": 0.8692393898963928, "learning_rate": 1.6922764227642278e-05, "loss": 0.0746, "step": 37850 }, { "epoch": 0.46164634146341466, "grad_norm": 0.8359400629997253, "learning_rate": 1.6922357723577236e-05, "loss": 0.0412, "step": 37855 }, { "epoch": 0.46170731707317075, "grad_norm": 0.7679814100265503, "learning_rate": 1.6921951219512198e-05, "loss": 0.0542, "step": 37860 }, { "epoch": 0.46176829268292685, "grad_norm": 0.5493312478065491, "learning_rate": 1.6921544715447156e-05, "loss": 0.0605, "step": 37865 }, { "epoch": 0.46182926829268295, "grad_norm": 0.6060841679573059, "learning_rate": 1.6921138211382114e-05, "loss": 0.0496, "step": 37870 }, { "epoch": 0.46189024390243905, "grad_norm": 0.650307834148407, "learning_rate": 1.6920731707317075e-05, "loss": 0.0778, "step": 37875 }, { "epoch": 0.46195121951219514, "grad_norm": 0.31958624720573425, "learning_rate": 1.6920325203252033e-05, "loss": 0.0728, "step": 37880 }, { "epoch": 0.46201219512195124, "grad_norm": 0.5389629006385803, "learning_rate": 1.691991869918699e-05, "loss": 0.0493, "step": 37885 }, { "epoch": 0.46207317073170734, "grad_norm": 1.4260456562042236, "learning_rate": 1.6919512195121953e-05, "loss": 0.0904, "step": 37890 }, { "epoch": 0.46213414634146344, "grad_norm": 0.6138371229171753, "learning_rate": 1.691910569105691e-05, "loss": 0.0498, "step": 37895 }, { "epoch": 0.46219512195121953, "grad_norm": 0.9889875054359436, "learning_rate": 1.6918699186991873e-05, "loss": 0.0756, "step": 37900 }, { "epoch": 0.46225609756097563, "grad_norm": 0.851651668548584, "learning_rate": 1.691829268292683e-05, "loss": 0.0774, "step": 37905 }, { "epoch": 0.46231707317073173, "grad_norm": 2.7299060821533203, "learning_rate": 1.6917886178861792e-05, "loss": 0.0804, "step": 37910 }, { "epoch": 0.4623780487804878, "grad_norm": 1.4483728408813477, "learning_rate": 1.6917479674796747e-05, "loss": 0.0816, "step": 37915 }, { "epoch": 0.4624390243902439, "grad_norm": 1.6423156261444092, "learning_rate": 1.691707317073171e-05, "loss": 0.0816, "step": 37920 }, { "epoch": 0.4625, "grad_norm": 0.8110464215278625, "learning_rate": 1.6916666666666667e-05, "loss": 0.0628, "step": 37925 }, { "epoch": 0.4625609756097561, "grad_norm": 0.8300049304962158, "learning_rate": 1.6916260162601628e-05, "loss": 0.0723, "step": 37930 }, { "epoch": 0.4626219512195122, "grad_norm": 0.6174187660217285, "learning_rate": 1.6915853658536586e-05, "loss": 0.083, "step": 37935 }, { "epoch": 0.4626829268292683, "grad_norm": 0.9229624271392822, "learning_rate": 1.6915447154471548e-05, "loss": 0.1532, "step": 37940 }, { "epoch": 0.4627439024390244, "grad_norm": 0.45876452326774597, "learning_rate": 1.6915040650406506e-05, "loss": 0.0728, "step": 37945 }, { "epoch": 0.4628048780487805, "grad_norm": 1.2834599018096924, "learning_rate": 1.6914634146341464e-05, "loss": 0.1013, "step": 37950 }, { "epoch": 0.4628658536585366, "grad_norm": 0.9409292340278625, "learning_rate": 1.6914227642276422e-05, "loss": 0.0599, "step": 37955 }, { "epoch": 0.4629268292682927, "grad_norm": 0.633436381816864, "learning_rate": 1.6913821138211384e-05, "loss": 0.0492, "step": 37960 }, { "epoch": 0.4629878048780488, "grad_norm": 0.5368553996086121, "learning_rate": 1.6913414634146342e-05, "loss": 0.075, "step": 37965 }, { "epoch": 0.4630487804878049, "grad_norm": 1.0588854551315308, "learning_rate": 1.6913008130081303e-05, "loss": 0.0632, "step": 37970 }, { "epoch": 0.463109756097561, "grad_norm": 1.5955779552459717, "learning_rate": 1.691260162601626e-05, "loss": 0.0783, "step": 37975 }, { "epoch": 0.4631707317073171, "grad_norm": 0.8707399368286133, "learning_rate": 1.691219512195122e-05, "loss": 0.0741, "step": 37980 }, { "epoch": 0.4632317073170732, "grad_norm": 0.784467875957489, "learning_rate": 1.691178861788618e-05, "loss": 0.0898, "step": 37985 }, { "epoch": 0.4632926829268293, "grad_norm": 0.5327526330947876, "learning_rate": 1.691138211382114e-05, "loss": 0.078, "step": 37990 }, { "epoch": 0.4633536585365854, "grad_norm": 0.7262579798698425, "learning_rate": 1.69109756097561e-05, "loss": 0.0942, "step": 37995 }, { "epoch": 0.4634146341463415, "grad_norm": 2.844515800476074, "learning_rate": 1.691056910569106e-05, "loss": 0.0911, "step": 38000 }, { "epoch": 0.4634756097560976, "grad_norm": 0.5579215884208679, "learning_rate": 1.6910162601626017e-05, "loss": 0.1083, "step": 38005 }, { "epoch": 0.4635365853658537, "grad_norm": 0.4953715205192566, "learning_rate": 1.6909756097560975e-05, "loss": 0.0802, "step": 38010 }, { "epoch": 0.4635975609756098, "grad_norm": 0.7269048094749451, "learning_rate": 1.6909349593495937e-05, "loss": 0.0591, "step": 38015 }, { "epoch": 0.4636585365853659, "grad_norm": 0.5200045108795166, "learning_rate": 1.6908943089430895e-05, "loss": 0.0777, "step": 38020 }, { "epoch": 0.463719512195122, "grad_norm": 0.7813510298728943, "learning_rate": 1.6908536585365856e-05, "loss": 0.0796, "step": 38025 }, { "epoch": 0.46378048780487807, "grad_norm": 0.8366590142250061, "learning_rate": 1.6908130081300815e-05, "loss": 0.0661, "step": 38030 }, { "epoch": 0.46384146341463417, "grad_norm": 0.626876175403595, "learning_rate": 1.6907723577235773e-05, "loss": 0.1043, "step": 38035 }, { "epoch": 0.46390243902439027, "grad_norm": 0.42957648634910583, "learning_rate": 1.6907317073170734e-05, "loss": 0.08, "step": 38040 }, { "epoch": 0.46396341463414636, "grad_norm": 0.7348048686981201, "learning_rate": 1.6906910569105692e-05, "loss": 0.0895, "step": 38045 }, { "epoch": 0.46402439024390246, "grad_norm": 1.2360637187957764, "learning_rate": 1.690650406504065e-05, "loss": 0.0723, "step": 38050 }, { "epoch": 0.46408536585365856, "grad_norm": 0.5969371199607849, "learning_rate": 1.6906097560975612e-05, "loss": 0.0773, "step": 38055 }, { "epoch": 0.46414634146341466, "grad_norm": 0.5089079737663269, "learning_rate": 1.690569105691057e-05, "loss": 0.0824, "step": 38060 }, { "epoch": 0.46420731707317076, "grad_norm": 0.9982383251190186, "learning_rate": 1.6905284552845528e-05, "loss": 0.0746, "step": 38065 }, { "epoch": 0.46426829268292685, "grad_norm": 0.6275423765182495, "learning_rate": 1.690487804878049e-05, "loss": 0.0814, "step": 38070 }, { "epoch": 0.46432926829268295, "grad_norm": 0.4895952045917511, "learning_rate": 1.6904471544715448e-05, "loss": 0.0711, "step": 38075 }, { "epoch": 0.46439024390243905, "grad_norm": 0.662360429763794, "learning_rate": 1.690406504065041e-05, "loss": 0.0709, "step": 38080 }, { "epoch": 0.46445121951219515, "grad_norm": 1.4986398220062256, "learning_rate": 1.6903658536585368e-05, "loss": 0.067, "step": 38085 }, { "epoch": 0.46451219512195124, "grad_norm": 1.2765698432922363, "learning_rate": 1.690325203252033e-05, "loss": 0.0882, "step": 38090 }, { "epoch": 0.46457317073170734, "grad_norm": 0.6212641596794128, "learning_rate": 1.6902845528455284e-05, "loss": 0.0597, "step": 38095 }, { "epoch": 0.46463414634146344, "grad_norm": 0.4401925504207611, "learning_rate": 1.6902439024390245e-05, "loss": 0.0679, "step": 38100 }, { "epoch": 0.46469512195121954, "grad_norm": 1.579677939414978, "learning_rate": 1.6902032520325203e-05, "loss": 0.0671, "step": 38105 }, { "epoch": 0.46475609756097563, "grad_norm": 0.6850824356079102, "learning_rate": 1.6901626016260165e-05, "loss": 0.0696, "step": 38110 }, { "epoch": 0.46481707317073173, "grad_norm": 1.3168799877166748, "learning_rate": 1.6901219512195123e-05, "loss": 0.0605, "step": 38115 }, { "epoch": 0.46487804878048783, "grad_norm": 0.9454400539398193, "learning_rate": 1.6900813008130085e-05, "loss": 0.0768, "step": 38120 }, { "epoch": 0.4649390243902439, "grad_norm": 1.713748574256897, "learning_rate": 1.6900406504065043e-05, "loss": 0.0541, "step": 38125 }, { "epoch": 0.465, "grad_norm": 1.3861963748931885, "learning_rate": 1.69e-05, "loss": 0.0956, "step": 38130 }, { "epoch": 0.4650609756097561, "grad_norm": 0.8684269785881042, "learning_rate": 1.689959349593496e-05, "loss": 0.079, "step": 38135 }, { "epoch": 0.4651219512195122, "grad_norm": 1.1408025026321411, "learning_rate": 1.689918699186992e-05, "loss": 0.0769, "step": 38140 }, { "epoch": 0.4651829268292683, "grad_norm": 0.7841545343399048, "learning_rate": 1.689878048780488e-05, "loss": 0.0568, "step": 38145 }, { "epoch": 0.4652439024390244, "grad_norm": 0.7963852882385254, "learning_rate": 1.689837398373984e-05, "loss": 0.0639, "step": 38150 }, { "epoch": 0.4653048780487805, "grad_norm": 0.7048251628875732, "learning_rate": 1.6897967479674798e-05, "loss": 0.0813, "step": 38155 }, { "epoch": 0.4653658536585366, "grad_norm": 0.9752597212791443, "learning_rate": 1.6897560975609756e-05, "loss": 0.077, "step": 38160 }, { "epoch": 0.4654268292682927, "grad_norm": 0.6906474232673645, "learning_rate": 1.6897154471544718e-05, "loss": 0.0544, "step": 38165 }, { "epoch": 0.4654878048780488, "grad_norm": 0.8753570318222046, "learning_rate": 1.6896747967479676e-05, "loss": 0.0617, "step": 38170 }, { "epoch": 0.4655487804878049, "grad_norm": 0.4445796012878418, "learning_rate": 1.6896341463414638e-05, "loss": 0.0683, "step": 38175 }, { "epoch": 0.465609756097561, "grad_norm": 0.8628290891647339, "learning_rate": 1.6895934959349596e-05, "loss": 0.0512, "step": 38180 }, { "epoch": 0.4656707317073171, "grad_norm": 1.0533220767974854, "learning_rate": 1.6895528455284554e-05, "loss": 0.0727, "step": 38185 }, { "epoch": 0.4657317073170732, "grad_norm": 0.6090416312217712, "learning_rate": 1.6895121951219512e-05, "loss": 0.071, "step": 38190 }, { "epoch": 0.4657926829268293, "grad_norm": 0.8277850151062012, "learning_rate": 1.6894715447154473e-05, "loss": 0.068, "step": 38195 }, { "epoch": 0.4658536585365854, "grad_norm": 0.7723069787025452, "learning_rate": 1.689430894308943e-05, "loss": 0.0677, "step": 38200 }, { "epoch": 0.4659146341463415, "grad_norm": 0.717130184173584, "learning_rate": 1.6893902439024393e-05, "loss": 0.0734, "step": 38205 }, { "epoch": 0.4659756097560976, "grad_norm": 1.2740206718444824, "learning_rate": 1.689349593495935e-05, "loss": 0.0694, "step": 38210 }, { "epoch": 0.4660365853658537, "grad_norm": 0.6533278822898865, "learning_rate": 1.689308943089431e-05, "loss": 0.0801, "step": 38215 }, { "epoch": 0.4660975609756098, "grad_norm": 0.9782609343528748, "learning_rate": 1.6892682926829267e-05, "loss": 0.0807, "step": 38220 }, { "epoch": 0.4661585365853659, "grad_norm": 0.47205063700675964, "learning_rate": 1.689227642276423e-05, "loss": 0.0697, "step": 38225 }, { "epoch": 0.466219512195122, "grad_norm": 1.0341194868087769, "learning_rate": 1.6891869918699187e-05, "loss": 0.0935, "step": 38230 }, { "epoch": 0.4662804878048781, "grad_norm": 0.9425680041313171, "learning_rate": 1.689146341463415e-05, "loss": 0.0943, "step": 38235 }, { "epoch": 0.46634146341463417, "grad_norm": 0.7869579195976257, "learning_rate": 1.6891056910569107e-05, "loss": 0.0704, "step": 38240 }, { "epoch": 0.46640243902439027, "grad_norm": 0.8034073710441589, "learning_rate": 1.6890650406504065e-05, "loss": 0.0941, "step": 38245 }, { "epoch": 0.46646341463414637, "grad_norm": 0.6627644300460815, "learning_rate": 1.6890243902439026e-05, "loss": 0.0723, "step": 38250 }, { "epoch": 0.46652439024390246, "grad_norm": 0.4257717728614807, "learning_rate": 1.6889837398373985e-05, "loss": 0.0718, "step": 38255 }, { "epoch": 0.46658536585365856, "grad_norm": 0.4992537200450897, "learning_rate": 1.6889430894308946e-05, "loss": 0.0889, "step": 38260 }, { "epoch": 0.46664634146341466, "grad_norm": 0.7705093622207642, "learning_rate": 1.6889024390243904e-05, "loss": 0.1034, "step": 38265 }, { "epoch": 0.46670731707317076, "grad_norm": 1.200723648071289, "learning_rate": 1.6888617886178866e-05, "loss": 0.0775, "step": 38270 }, { "epoch": 0.46676829268292686, "grad_norm": 0.9591783881187439, "learning_rate": 1.688821138211382e-05, "loss": 0.1019, "step": 38275 }, { "epoch": 0.46682926829268295, "grad_norm": 1.1385877132415771, "learning_rate": 1.6887804878048782e-05, "loss": 0.0648, "step": 38280 }, { "epoch": 0.46689024390243905, "grad_norm": 0.4917045533657074, "learning_rate": 1.688739837398374e-05, "loss": 0.0884, "step": 38285 }, { "epoch": 0.46695121951219515, "grad_norm": 0.42471185326576233, "learning_rate": 1.68869918699187e-05, "loss": 0.0772, "step": 38290 }, { "epoch": 0.46701219512195125, "grad_norm": 0.41165632009506226, "learning_rate": 1.688658536585366e-05, "loss": 0.0577, "step": 38295 }, { "epoch": 0.46707317073170734, "grad_norm": 0.8115670084953308, "learning_rate": 1.688617886178862e-05, "loss": 0.0817, "step": 38300 }, { "epoch": 0.46713414634146344, "grad_norm": 0.4462200105190277, "learning_rate": 1.688577235772358e-05, "loss": 0.079, "step": 38305 }, { "epoch": 0.46719512195121954, "grad_norm": 0.7908489108085632, "learning_rate": 1.6885365853658537e-05, "loss": 0.0473, "step": 38310 }, { "epoch": 0.46725609756097564, "grad_norm": 0.586388111114502, "learning_rate": 1.6884959349593496e-05, "loss": 0.0755, "step": 38315 }, { "epoch": 0.46731707317073173, "grad_norm": 0.37606537342071533, "learning_rate": 1.6884552845528457e-05, "loss": 0.0678, "step": 38320 }, { "epoch": 0.46737804878048783, "grad_norm": 0.8550944328308105, "learning_rate": 1.6884146341463415e-05, "loss": 0.0945, "step": 38325 }, { "epoch": 0.46743902439024393, "grad_norm": 1.4417641162872314, "learning_rate": 1.6883739837398377e-05, "loss": 0.0597, "step": 38330 }, { "epoch": 0.4675, "grad_norm": 0.6117563247680664, "learning_rate": 1.6883333333333335e-05, "loss": 0.068, "step": 38335 }, { "epoch": 0.4675609756097561, "grad_norm": 1.7836811542510986, "learning_rate": 1.6882926829268293e-05, "loss": 0.075, "step": 38340 }, { "epoch": 0.4676219512195122, "grad_norm": 1.0891691446304321, "learning_rate": 1.6882520325203255e-05, "loss": 0.0846, "step": 38345 }, { "epoch": 0.4676829268292683, "grad_norm": 0.41029658913612366, "learning_rate": 1.6882113821138213e-05, "loss": 0.0538, "step": 38350 }, { "epoch": 0.4677439024390244, "grad_norm": 0.553130567073822, "learning_rate": 1.6881707317073174e-05, "loss": 0.0556, "step": 38355 }, { "epoch": 0.4678048780487805, "grad_norm": 0.47551459074020386, "learning_rate": 1.6881300813008132e-05, "loss": 0.0528, "step": 38360 }, { "epoch": 0.4678658536585366, "grad_norm": 0.748847484588623, "learning_rate": 1.688089430894309e-05, "loss": 0.1064, "step": 38365 }, { "epoch": 0.4679268292682927, "grad_norm": 0.9306067824363708, "learning_rate": 1.688048780487805e-05, "loss": 0.0421, "step": 38370 }, { "epoch": 0.4679878048780488, "grad_norm": 0.3914094865322113, "learning_rate": 1.688008130081301e-05, "loss": 0.1164, "step": 38375 }, { "epoch": 0.4680487804878049, "grad_norm": 0.748823881149292, "learning_rate": 1.6879674796747968e-05, "loss": 0.0723, "step": 38380 }, { "epoch": 0.468109756097561, "grad_norm": 1.2282577753067017, "learning_rate": 1.687926829268293e-05, "loss": 0.0381, "step": 38385 }, { "epoch": 0.4681707317073171, "grad_norm": 0.6841312646865845, "learning_rate": 1.6878861788617888e-05, "loss": 0.081, "step": 38390 }, { "epoch": 0.4682317073170732, "grad_norm": 1.7663240432739258, "learning_rate": 1.6878455284552846e-05, "loss": 0.0675, "step": 38395 }, { "epoch": 0.4682926829268293, "grad_norm": 0.7794210314750671, "learning_rate": 1.6878048780487804e-05, "loss": 0.0741, "step": 38400 }, { "epoch": 0.4683536585365854, "grad_norm": 0.7804881930351257, "learning_rate": 1.6877642276422766e-05, "loss": 0.0489, "step": 38405 }, { "epoch": 0.4684146341463415, "grad_norm": 0.5216922163963318, "learning_rate": 1.6877235772357724e-05, "loss": 0.0679, "step": 38410 }, { "epoch": 0.4684756097560976, "grad_norm": 0.6004688739776611, "learning_rate": 1.6876829268292685e-05, "loss": 0.0744, "step": 38415 }, { "epoch": 0.4685365853658537, "grad_norm": 0.8386684656143188, "learning_rate": 1.6876422764227643e-05, "loss": 0.099, "step": 38420 }, { "epoch": 0.4685975609756098, "grad_norm": 0.5710147023200989, "learning_rate": 1.68760162601626e-05, "loss": 0.0867, "step": 38425 }, { "epoch": 0.4686585365853659, "grad_norm": 0.5002787709236145, "learning_rate": 1.6875609756097563e-05, "loss": 0.0866, "step": 38430 }, { "epoch": 0.468719512195122, "grad_norm": 0.6129156947135925, "learning_rate": 1.687520325203252e-05, "loss": 0.0678, "step": 38435 }, { "epoch": 0.468780487804878, "grad_norm": 0.721462607383728, "learning_rate": 1.6874796747967483e-05, "loss": 0.081, "step": 38440 }, { "epoch": 0.4688414634146341, "grad_norm": 0.3786182701587677, "learning_rate": 1.687439024390244e-05, "loss": 0.0811, "step": 38445 }, { "epoch": 0.4689024390243902, "grad_norm": 0.7618556618690491, "learning_rate": 1.6873983739837402e-05, "loss": 0.0738, "step": 38450 }, { "epoch": 0.4689634146341463, "grad_norm": 0.5822233557701111, "learning_rate": 1.6873577235772357e-05, "loss": 0.0891, "step": 38455 }, { "epoch": 0.4690243902439024, "grad_norm": 0.6508752703666687, "learning_rate": 1.687317073170732e-05, "loss": 0.0519, "step": 38460 }, { "epoch": 0.4690853658536585, "grad_norm": 0.676948070526123, "learning_rate": 1.6872764227642277e-05, "loss": 0.0743, "step": 38465 }, { "epoch": 0.4691463414634146, "grad_norm": 0.8350304365158081, "learning_rate": 1.6872357723577238e-05, "loss": 0.0819, "step": 38470 }, { "epoch": 0.4692073170731707, "grad_norm": 0.5378406047821045, "learning_rate": 1.6871951219512196e-05, "loss": 0.0663, "step": 38475 }, { "epoch": 0.4692682926829268, "grad_norm": 0.9900537133216858, "learning_rate": 1.6871544715447158e-05, "loss": 0.0895, "step": 38480 }, { "epoch": 0.4693292682926829, "grad_norm": 1.8466094732284546, "learning_rate": 1.6871138211382113e-05, "loss": 0.0645, "step": 38485 }, { "epoch": 0.469390243902439, "grad_norm": 1.0575485229492188, "learning_rate": 1.6870731707317074e-05, "loss": 0.0836, "step": 38490 }, { "epoch": 0.4694512195121951, "grad_norm": 0.43648475408554077, "learning_rate": 1.6870325203252032e-05, "loss": 0.0399, "step": 38495 }, { "epoch": 0.4695121951219512, "grad_norm": 1.0887306928634644, "learning_rate": 1.6869918699186994e-05, "loss": 0.0956, "step": 38500 }, { "epoch": 0.4695731707317073, "grad_norm": 0.9212208986282349, "learning_rate": 1.6869512195121952e-05, "loss": 0.0817, "step": 38505 }, { "epoch": 0.4696341463414634, "grad_norm": 3.1602203845977783, "learning_rate": 1.6869105691056913e-05, "loss": 0.0947, "step": 38510 }, { "epoch": 0.4696951219512195, "grad_norm": 0.6898728609085083, "learning_rate": 1.686869918699187e-05, "loss": 0.0933, "step": 38515 }, { "epoch": 0.4697560975609756, "grad_norm": 0.753862738609314, "learning_rate": 1.686829268292683e-05, "loss": 0.0739, "step": 38520 }, { "epoch": 0.4698170731707317, "grad_norm": 0.797595202922821, "learning_rate": 1.686788617886179e-05, "loss": 0.0605, "step": 38525 }, { "epoch": 0.4698780487804878, "grad_norm": 0.4682733714580536, "learning_rate": 1.686747967479675e-05, "loss": 0.1062, "step": 38530 }, { "epoch": 0.4699390243902439, "grad_norm": 0.6301458477973938, "learning_rate": 1.686707317073171e-05, "loss": 0.0831, "step": 38535 }, { "epoch": 0.47, "grad_norm": 0.8181076645851135, "learning_rate": 1.686666666666667e-05, "loss": 0.0666, "step": 38540 }, { "epoch": 0.47006097560975607, "grad_norm": 0.7261807918548584, "learning_rate": 1.6866260162601627e-05, "loss": 0.0824, "step": 38545 }, { "epoch": 0.47012195121951217, "grad_norm": 0.5709417462348938, "learning_rate": 1.6865853658536585e-05, "loss": 0.0558, "step": 38550 }, { "epoch": 0.47018292682926827, "grad_norm": 0.8391415476799011, "learning_rate": 1.6865447154471547e-05, "loss": 0.1123, "step": 38555 }, { "epoch": 0.47024390243902436, "grad_norm": 0.6477771997451782, "learning_rate": 1.6865040650406505e-05, "loss": 0.0971, "step": 38560 }, { "epoch": 0.47030487804878046, "grad_norm": 1.2071585655212402, "learning_rate": 1.6864634146341466e-05, "loss": 0.0803, "step": 38565 }, { "epoch": 0.47036585365853656, "grad_norm": 0.5816293358802795, "learning_rate": 1.6864227642276424e-05, "loss": 0.1012, "step": 38570 }, { "epoch": 0.47042682926829266, "grad_norm": 0.5707952976226807, "learning_rate": 1.6863821138211383e-05, "loss": 0.09, "step": 38575 }, { "epoch": 0.47048780487804875, "grad_norm": 1.2265907526016235, "learning_rate": 1.686341463414634e-05, "loss": 0.0935, "step": 38580 }, { "epoch": 0.47054878048780485, "grad_norm": 0.4772493243217468, "learning_rate": 1.6863008130081302e-05, "loss": 0.0729, "step": 38585 }, { "epoch": 0.47060975609756095, "grad_norm": 0.8876482248306274, "learning_rate": 1.686260162601626e-05, "loss": 0.102, "step": 38590 }, { "epoch": 0.47067073170731705, "grad_norm": 0.5494601130485535, "learning_rate": 1.6862195121951222e-05, "loss": 0.1044, "step": 38595 }, { "epoch": 0.47073170731707314, "grad_norm": 0.802378237247467, "learning_rate": 1.686178861788618e-05, "loss": 0.1151, "step": 38600 }, { "epoch": 0.47079268292682924, "grad_norm": 2.1810219287872314, "learning_rate": 1.6861382113821138e-05, "loss": 0.0754, "step": 38605 }, { "epoch": 0.47085365853658534, "grad_norm": 0.5120707750320435, "learning_rate": 1.68609756097561e-05, "loss": 0.09, "step": 38610 }, { "epoch": 0.47091463414634144, "grad_norm": 0.9749104976654053, "learning_rate": 1.6860569105691058e-05, "loss": 0.0758, "step": 38615 }, { "epoch": 0.47097560975609754, "grad_norm": 0.7988747358322144, "learning_rate": 1.686016260162602e-05, "loss": 0.1174, "step": 38620 }, { "epoch": 0.47103658536585363, "grad_norm": 0.8617120981216431, "learning_rate": 1.6859756097560977e-05, "loss": 0.0618, "step": 38625 }, { "epoch": 0.47109756097560973, "grad_norm": 1.7528847455978394, "learning_rate": 1.6859349593495936e-05, "loss": 0.107, "step": 38630 }, { "epoch": 0.47115853658536583, "grad_norm": 0.5633090734481812, "learning_rate": 1.6858943089430894e-05, "loss": 0.0479, "step": 38635 }, { "epoch": 0.4712195121951219, "grad_norm": 0.703255832195282, "learning_rate": 1.6858536585365855e-05, "loss": 0.1874, "step": 38640 }, { "epoch": 0.471280487804878, "grad_norm": 0.8967924118041992, "learning_rate": 1.6858130081300813e-05, "loss": 0.0717, "step": 38645 }, { "epoch": 0.4713414634146341, "grad_norm": 1.6770168542861938, "learning_rate": 1.6857723577235775e-05, "loss": 0.1117, "step": 38650 }, { "epoch": 0.4714024390243902, "grad_norm": 0.7827868461608887, "learning_rate": 1.6857317073170733e-05, "loss": 0.0741, "step": 38655 }, { "epoch": 0.4714634146341463, "grad_norm": 0.5051152110099792, "learning_rate": 1.6856910569105694e-05, "loss": 0.0688, "step": 38660 }, { "epoch": 0.4715243902439024, "grad_norm": 0.6615888476371765, "learning_rate": 1.685650406504065e-05, "loss": 0.0897, "step": 38665 }, { "epoch": 0.4715853658536585, "grad_norm": 0.6016706228256226, "learning_rate": 1.685609756097561e-05, "loss": 0.0812, "step": 38670 }, { "epoch": 0.4716463414634146, "grad_norm": 0.63141268491745, "learning_rate": 1.685569105691057e-05, "loss": 0.0705, "step": 38675 }, { "epoch": 0.4717073170731707, "grad_norm": 0.6027154326438904, "learning_rate": 1.685528455284553e-05, "loss": 0.0546, "step": 38680 }, { "epoch": 0.4717682926829268, "grad_norm": 0.757229208946228, "learning_rate": 1.685487804878049e-05, "loss": 0.0844, "step": 38685 }, { "epoch": 0.4718292682926829, "grad_norm": 0.7977131009101868, "learning_rate": 1.685447154471545e-05, "loss": 0.0822, "step": 38690 }, { "epoch": 0.471890243902439, "grad_norm": 0.6142010688781738, "learning_rate": 1.6854065040650408e-05, "loss": 0.055, "step": 38695 }, { "epoch": 0.4719512195121951, "grad_norm": 0.8917977213859558, "learning_rate": 1.6853658536585366e-05, "loss": 0.0919, "step": 38700 }, { "epoch": 0.4720121951219512, "grad_norm": 0.5865721702575684, "learning_rate": 1.6853252032520328e-05, "loss": 0.0466, "step": 38705 }, { "epoch": 0.4720731707317073, "grad_norm": 0.6330307722091675, "learning_rate": 1.6852845528455286e-05, "loss": 0.072, "step": 38710 }, { "epoch": 0.4721341463414634, "grad_norm": 0.8449771404266357, "learning_rate": 1.6852439024390247e-05, "loss": 0.0933, "step": 38715 }, { "epoch": 0.4721951219512195, "grad_norm": 0.6385810375213623, "learning_rate": 1.6852032520325206e-05, "loss": 0.0584, "step": 38720 }, { "epoch": 0.4722560975609756, "grad_norm": 0.6439815759658813, "learning_rate": 1.6851626016260164e-05, "loss": 0.0522, "step": 38725 }, { "epoch": 0.4723170731707317, "grad_norm": 0.5531466603279114, "learning_rate": 1.6851219512195122e-05, "loss": 0.061, "step": 38730 }, { "epoch": 0.4723780487804878, "grad_norm": 0.6147937178611755, "learning_rate": 1.6850813008130083e-05, "loss": 0.0712, "step": 38735 }, { "epoch": 0.4724390243902439, "grad_norm": 0.6458390355110168, "learning_rate": 1.685040650406504e-05, "loss": 0.0937, "step": 38740 }, { "epoch": 0.4725, "grad_norm": 0.6409558653831482, "learning_rate": 1.6850000000000003e-05, "loss": 0.0765, "step": 38745 }, { "epoch": 0.4725609756097561, "grad_norm": 0.7313627004623413, "learning_rate": 1.684959349593496e-05, "loss": 0.0863, "step": 38750 }, { "epoch": 0.47262195121951217, "grad_norm": 0.6937010884284973, "learning_rate": 1.684918699186992e-05, "loss": 0.0995, "step": 38755 }, { "epoch": 0.47268292682926827, "grad_norm": 1.2061536312103271, "learning_rate": 1.6848780487804877e-05, "loss": 0.0833, "step": 38760 }, { "epoch": 0.47274390243902437, "grad_norm": 0.5363749861717224, "learning_rate": 1.684837398373984e-05, "loss": 0.054, "step": 38765 }, { "epoch": 0.47280487804878046, "grad_norm": 0.6691007614135742, "learning_rate": 1.6847967479674797e-05, "loss": 0.0746, "step": 38770 }, { "epoch": 0.47286585365853656, "grad_norm": 0.3961232304573059, "learning_rate": 1.684756097560976e-05, "loss": 0.062, "step": 38775 }, { "epoch": 0.47292682926829266, "grad_norm": 0.523360013961792, "learning_rate": 1.6847154471544717e-05, "loss": 0.0745, "step": 38780 }, { "epoch": 0.47298780487804876, "grad_norm": 0.693792462348938, "learning_rate": 1.6846747967479675e-05, "loss": 0.0734, "step": 38785 }, { "epoch": 0.47304878048780485, "grad_norm": 0.36197277903556824, "learning_rate": 1.6846341463414636e-05, "loss": 0.0471, "step": 38790 }, { "epoch": 0.47310975609756095, "grad_norm": 0.5700895190238953, "learning_rate": 1.6845934959349594e-05, "loss": 0.0643, "step": 38795 }, { "epoch": 0.47317073170731705, "grad_norm": 0.45432159304618835, "learning_rate": 1.6845528455284556e-05, "loss": 0.0731, "step": 38800 }, { "epoch": 0.47323170731707315, "grad_norm": 0.39395663142204285, "learning_rate": 1.6845121951219514e-05, "loss": 0.0592, "step": 38805 }, { "epoch": 0.47329268292682924, "grad_norm": 0.6390069723129272, "learning_rate": 1.6844715447154472e-05, "loss": 0.1407, "step": 38810 }, { "epoch": 0.47335365853658534, "grad_norm": 1.1368921995162964, "learning_rate": 1.684430894308943e-05, "loss": 0.0739, "step": 38815 }, { "epoch": 0.47341463414634144, "grad_norm": 0.6927469372749329, "learning_rate": 1.6843902439024392e-05, "loss": 0.065, "step": 38820 }, { "epoch": 0.47347560975609754, "grad_norm": 1.6718735694885254, "learning_rate": 1.684349593495935e-05, "loss": 0.0668, "step": 38825 }, { "epoch": 0.47353658536585364, "grad_norm": 1.156337022781372, "learning_rate": 1.684308943089431e-05, "loss": 0.0582, "step": 38830 }, { "epoch": 0.47359756097560973, "grad_norm": 0.5918495059013367, "learning_rate": 1.684268292682927e-05, "loss": 0.0784, "step": 38835 }, { "epoch": 0.47365853658536583, "grad_norm": 0.7340388894081116, "learning_rate": 1.684227642276423e-05, "loss": 0.0766, "step": 38840 }, { "epoch": 0.47371951219512193, "grad_norm": 1.6767617464065552, "learning_rate": 1.6841869918699186e-05, "loss": 0.0805, "step": 38845 }, { "epoch": 0.473780487804878, "grad_norm": 3.1992604732513428, "learning_rate": 1.6841463414634147e-05, "loss": 0.1102, "step": 38850 }, { "epoch": 0.4738414634146341, "grad_norm": 0.5922507643699646, "learning_rate": 1.6841056910569106e-05, "loss": 0.0486, "step": 38855 }, { "epoch": 0.4739024390243902, "grad_norm": 0.5619462728500366, "learning_rate": 1.6840650406504067e-05, "loss": 0.0745, "step": 38860 }, { "epoch": 0.4739634146341463, "grad_norm": 0.6480033993721008, "learning_rate": 1.6840243902439025e-05, "loss": 0.0769, "step": 38865 }, { "epoch": 0.4740243902439024, "grad_norm": 0.6316478252410889, "learning_rate": 1.6839837398373987e-05, "loss": 0.0771, "step": 38870 }, { "epoch": 0.4740853658536585, "grad_norm": 0.8377074003219604, "learning_rate": 1.6839430894308945e-05, "loss": 0.086, "step": 38875 }, { "epoch": 0.4741463414634146, "grad_norm": 1.2354333400726318, "learning_rate": 1.6839024390243903e-05, "loss": 0.1028, "step": 38880 }, { "epoch": 0.4742073170731707, "grad_norm": 0.6447466611862183, "learning_rate": 1.6838617886178864e-05, "loss": 0.0657, "step": 38885 }, { "epoch": 0.4742682926829268, "grad_norm": 1.1609535217285156, "learning_rate": 1.6838211382113823e-05, "loss": 0.1013, "step": 38890 }, { "epoch": 0.4743292682926829, "grad_norm": 0.5989165306091309, "learning_rate": 1.683780487804878e-05, "loss": 0.066, "step": 38895 }, { "epoch": 0.474390243902439, "grad_norm": 1.265057921409607, "learning_rate": 1.6837398373983742e-05, "loss": 0.0619, "step": 38900 }, { "epoch": 0.4744512195121951, "grad_norm": 0.5837271213531494, "learning_rate": 1.68369918699187e-05, "loss": 0.0657, "step": 38905 }, { "epoch": 0.4745121951219512, "grad_norm": 0.6145418286323547, "learning_rate": 1.683658536585366e-05, "loss": 0.0634, "step": 38910 }, { "epoch": 0.4745731707317073, "grad_norm": 0.7208627462387085, "learning_rate": 1.683617886178862e-05, "loss": 0.058, "step": 38915 }, { "epoch": 0.4746341463414634, "grad_norm": 0.822142481803894, "learning_rate": 1.6835772357723578e-05, "loss": 0.1028, "step": 38920 }, { "epoch": 0.4746951219512195, "grad_norm": 0.5169662833213806, "learning_rate": 1.683536585365854e-05, "loss": 0.0509, "step": 38925 }, { "epoch": 0.4747560975609756, "grad_norm": 0.5423587560653687, "learning_rate": 1.6834959349593498e-05, "loss": 0.0774, "step": 38930 }, { "epoch": 0.4748170731707317, "grad_norm": 0.5326518416404724, "learning_rate": 1.6834552845528456e-05, "loss": 0.0712, "step": 38935 }, { "epoch": 0.4748780487804878, "grad_norm": 0.8027861714363098, "learning_rate": 1.6834146341463414e-05, "loss": 0.0663, "step": 38940 }, { "epoch": 0.4749390243902439, "grad_norm": 0.47700318694114685, "learning_rate": 1.6833739837398376e-05, "loss": 0.0509, "step": 38945 }, { "epoch": 0.475, "grad_norm": 0.6142335534095764, "learning_rate": 1.6833333333333334e-05, "loss": 0.0399, "step": 38950 }, { "epoch": 0.4750609756097561, "grad_norm": 0.9605167508125305, "learning_rate": 1.6832926829268295e-05, "loss": 0.0825, "step": 38955 }, { "epoch": 0.4751219512195122, "grad_norm": 0.43244311213493347, "learning_rate": 1.6832520325203253e-05, "loss": 0.0399, "step": 38960 }, { "epoch": 0.47518292682926827, "grad_norm": 0.6593560576438904, "learning_rate": 1.683211382113821e-05, "loss": 0.0695, "step": 38965 }, { "epoch": 0.47524390243902437, "grad_norm": 0.4578743278980255, "learning_rate": 1.6831707317073173e-05, "loss": 0.0682, "step": 38970 }, { "epoch": 0.47530487804878047, "grad_norm": 1.0405117273330688, "learning_rate": 1.683130081300813e-05, "loss": 0.0795, "step": 38975 }, { "epoch": 0.47536585365853656, "grad_norm": 0.815833568572998, "learning_rate": 1.6830894308943093e-05, "loss": 0.092, "step": 38980 }, { "epoch": 0.47542682926829266, "grad_norm": 0.7236223816871643, "learning_rate": 1.683048780487805e-05, "loss": 0.0653, "step": 38985 }, { "epoch": 0.47548780487804876, "grad_norm": 0.7492196559906006, "learning_rate": 1.683008130081301e-05, "loss": 0.0553, "step": 38990 }, { "epoch": 0.47554878048780486, "grad_norm": 0.6463847160339355, "learning_rate": 1.6829674796747967e-05, "loss": 0.0602, "step": 38995 }, { "epoch": 0.47560975609756095, "grad_norm": 0.5358498692512512, "learning_rate": 1.682926829268293e-05, "loss": 0.0816, "step": 39000 }, { "epoch": 0.47567073170731705, "grad_norm": 0.8071694374084473, "learning_rate": 1.6828861788617887e-05, "loss": 0.0574, "step": 39005 }, { "epoch": 0.47573170731707315, "grad_norm": 0.8112804889678955, "learning_rate": 1.6828455284552848e-05, "loss": 0.0523, "step": 39010 }, { "epoch": 0.47579268292682925, "grad_norm": 0.9066358804702759, "learning_rate": 1.6828048780487806e-05, "loss": 0.0666, "step": 39015 }, { "epoch": 0.47585365853658534, "grad_norm": 0.6004869937896729, "learning_rate": 1.6827642276422768e-05, "loss": 0.0518, "step": 39020 }, { "epoch": 0.47591463414634144, "grad_norm": 0.7874798774719238, "learning_rate": 1.6827235772357723e-05, "loss": 0.0706, "step": 39025 }, { "epoch": 0.47597560975609754, "grad_norm": 0.35991179943084717, "learning_rate": 1.6826829268292684e-05, "loss": 0.0914, "step": 39030 }, { "epoch": 0.47603658536585364, "grad_norm": 0.796030580997467, "learning_rate": 1.6826422764227642e-05, "loss": 0.0811, "step": 39035 }, { "epoch": 0.47609756097560973, "grad_norm": 0.5482980608940125, "learning_rate": 1.6826016260162604e-05, "loss": 0.0628, "step": 39040 }, { "epoch": 0.47615853658536583, "grad_norm": 0.8962993025779724, "learning_rate": 1.6825609756097562e-05, "loss": 0.0983, "step": 39045 }, { "epoch": 0.47621951219512193, "grad_norm": 1.9686578512191772, "learning_rate": 1.6825203252032523e-05, "loss": 0.0697, "step": 39050 }, { "epoch": 0.476280487804878, "grad_norm": 0.5315919518470764, "learning_rate": 1.682479674796748e-05, "loss": 0.0843, "step": 39055 }, { "epoch": 0.4763414634146341, "grad_norm": 2.12320876121521, "learning_rate": 1.682439024390244e-05, "loss": 0.0754, "step": 39060 }, { "epoch": 0.4764024390243902, "grad_norm": 0.6981797814369202, "learning_rate": 1.68239837398374e-05, "loss": 0.0845, "step": 39065 }, { "epoch": 0.4764634146341463, "grad_norm": 1.1808996200561523, "learning_rate": 1.682357723577236e-05, "loss": 0.071, "step": 39070 }, { "epoch": 0.4765243902439024, "grad_norm": 1.0532277822494507, "learning_rate": 1.6823170731707317e-05, "loss": 0.0842, "step": 39075 }, { "epoch": 0.4765853658536585, "grad_norm": 0.7786932587623596, "learning_rate": 1.682276422764228e-05, "loss": 0.0767, "step": 39080 }, { "epoch": 0.4766463414634146, "grad_norm": 0.7805188298225403, "learning_rate": 1.6822357723577237e-05, "loss": 0.0577, "step": 39085 }, { "epoch": 0.4767073170731707, "grad_norm": 0.6302024126052856, "learning_rate": 1.6821951219512195e-05, "loss": 0.0626, "step": 39090 }, { "epoch": 0.4767682926829268, "grad_norm": 0.6126129031181335, "learning_rate": 1.6821544715447157e-05, "loss": 0.0804, "step": 39095 }, { "epoch": 0.4768292682926829, "grad_norm": 0.8714261651039124, "learning_rate": 1.6821138211382115e-05, "loss": 0.0486, "step": 39100 }, { "epoch": 0.476890243902439, "grad_norm": 0.8827810883522034, "learning_rate": 1.6820731707317076e-05, "loss": 0.0849, "step": 39105 }, { "epoch": 0.4769512195121951, "grad_norm": 0.8858593106269836, "learning_rate": 1.6820325203252034e-05, "loss": 0.1037, "step": 39110 }, { "epoch": 0.4770121951219512, "grad_norm": 0.3563464879989624, "learning_rate": 1.6819918699186993e-05, "loss": 0.0861, "step": 39115 }, { "epoch": 0.4770731707317073, "grad_norm": 0.7493749856948853, "learning_rate": 1.681951219512195e-05, "loss": 0.0655, "step": 39120 }, { "epoch": 0.4771341463414634, "grad_norm": 0.7272239327430725, "learning_rate": 1.6819105691056912e-05, "loss": 0.1032, "step": 39125 }, { "epoch": 0.4771951219512195, "grad_norm": 0.6856333613395691, "learning_rate": 1.681869918699187e-05, "loss": 0.0613, "step": 39130 }, { "epoch": 0.4772560975609756, "grad_norm": 0.7293714284896851, "learning_rate": 1.6818292682926832e-05, "loss": 0.0815, "step": 39135 }, { "epoch": 0.4773170731707317, "grad_norm": 0.6077646017074585, "learning_rate": 1.681788617886179e-05, "loss": 0.0509, "step": 39140 }, { "epoch": 0.4773780487804878, "grad_norm": 0.7737790942192078, "learning_rate": 1.6817479674796748e-05, "loss": 0.0504, "step": 39145 }, { "epoch": 0.4774390243902439, "grad_norm": 0.42623910307884216, "learning_rate": 1.681707317073171e-05, "loss": 0.0623, "step": 39150 }, { "epoch": 0.4775, "grad_norm": 1.4727596044540405, "learning_rate": 1.6816666666666668e-05, "loss": 0.1025, "step": 39155 }, { "epoch": 0.4775609756097561, "grad_norm": 0.834017813205719, "learning_rate": 1.6816260162601626e-05, "loss": 0.0888, "step": 39160 }, { "epoch": 0.4776219512195122, "grad_norm": 0.7692001461982727, "learning_rate": 1.6815853658536587e-05, "loss": 0.0667, "step": 39165 }, { "epoch": 0.4776829268292683, "grad_norm": 0.5576643347740173, "learning_rate": 1.6815447154471545e-05, "loss": 0.0925, "step": 39170 }, { "epoch": 0.47774390243902437, "grad_norm": 1.2556976079940796, "learning_rate": 1.6815040650406504e-05, "loss": 0.11, "step": 39175 }, { "epoch": 0.47780487804878047, "grad_norm": 0.39279794692993164, "learning_rate": 1.6814634146341465e-05, "loss": 0.0523, "step": 39180 }, { "epoch": 0.47786585365853657, "grad_norm": 0.7149326205253601, "learning_rate": 1.6814227642276423e-05, "loss": 0.0691, "step": 39185 }, { "epoch": 0.47792682926829266, "grad_norm": 0.6698909997940063, "learning_rate": 1.6813821138211385e-05, "loss": 0.0388, "step": 39190 }, { "epoch": 0.47798780487804876, "grad_norm": 0.823431670665741, "learning_rate": 1.6813414634146343e-05, "loss": 0.0715, "step": 39195 }, { "epoch": 0.47804878048780486, "grad_norm": 0.6469442248344421, "learning_rate": 1.6813008130081304e-05, "loss": 0.1001, "step": 39200 }, { "epoch": 0.47810975609756096, "grad_norm": 0.8103423714637756, "learning_rate": 1.681260162601626e-05, "loss": 0.0696, "step": 39205 }, { "epoch": 0.47817073170731705, "grad_norm": 1.2371001243591309, "learning_rate": 1.681219512195122e-05, "loss": 0.057, "step": 39210 }, { "epoch": 0.47823170731707315, "grad_norm": 0.5765079855918884, "learning_rate": 1.681178861788618e-05, "loss": 0.0707, "step": 39215 }, { "epoch": 0.47829268292682925, "grad_norm": 0.6217642426490784, "learning_rate": 1.681138211382114e-05, "loss": 0.0622, "step": 39220 }, { "epoch": 0.47835365853658535, "grad_norm": 0.4617185592651367, "learning_rate": 1.68109756097561e-05, "loss": 0.0783, "step": 39225 }, { "epoch": 0.47841463414634144, "grad_norm": 1.1368138790130615, "learning_rate": 1.681056910569106e-05, "loss": 0.0835, "step": 39230 }, { "epoch": 0.47847560975609754, "grad_norm": 0.6688132286071777, "learning_rate": 1.6810162601626018e-05, "loss": 0.0778, "step": 39235 }, { "epoch": 0.47853658536585364, "grad_norm": 0.9886794090270996, "learning_rate": 1.6809756097560976e-05, "loss": 0.0719, "step": 39240 }, { "epoch": 0.47859756097560974, "grad_norm": 0.6413474082946777, "learning_rate": 1.6809349593495938e-05, "loss": 0.0664, "step": 39245 }, { "epoch": 0.47865853658536583, "grad_norm": 0.9948187470436096, "learning_rate": 1.6808943089430896e-05, "loss": 0.0842, "step": 39250 }, { "epoch": 0.47871951219512193, "grad_norm": 0.44654926657676697, "learning_rate": 1.6808536585365854e-05, "loss": 0.0633, "step": 39255 }, { "epoch": 0.47878048780487803, "grad_norm": 0.6824544072151184, "learning_rate": 1.6808130081300816e-05, "loss": 0.0804, "step": 39260 }, { "epoch": 0.4788414634146341, "grad_norm": 0.9263792037963867, "learning_rate": 1.6807723577235774e-05, "loss": 0.0676, "step": 39265 }, { "epoch": 0.4789024390243902, "grad_norm": 0.7396017909049988, "learning_rate": 1.6807317073170732e-05, "loss": 0.0601, "step": 39270 }, { "epoch": 0.4789634146341463, "grad_norm": 0.6450471878051758, "learning_rate": 1.6806910569105693e-05, "loss": 0.0499, "step": 39275 }, { "epoch": 0.4790243902439024, "grad_norm": 0.5420567393302917, "learning_rate": 1.680650406504065e-05, "loss": 0.0397, "step": 39280 }, { "epoch": 0.4790853658536585, "grad_norm": 0.6097075939178467, "learning_rate": 1.6806097560975613e-05, "loss": 0.0794, "step": 39285 }, { "epoch": 0.4791463414634146, "grad_norm": 0.578911542892456, "learning_rate": 1.680569105691057e-05, "loss": 0.0499, "step": 39290 }, { "epoch": 0.4792073170731707, "grad_norm": 0.4290085732936859, "learning_rate": 1.680528455284553e-05, "loss": 0.0928, "step": 39295 }, { "epoch": 0.4792682926829268, "grad_norm": 1.5472921133041382, "learning_rate": 1.6804878048780487e-05, "loss": 0.0639, "step": 39300 }, { "epoch": 0.4793292682926829, "grad_norm": 0.6389114260673523, "learning_rate": 1.680447154471545e-05, "loss": 0.0605, "step": 39305 }, { "epoch": 0.479390243902439, "grad_norm": 0.7864159941673279, "learning_rate": 1.6804065040650407e-05, "loss": 0.0988, "step": 39310 }, { "epoch": 0.4794512195121951, "grad_norm": 0.6350827217102051, "learning_rate": 1.680365853658537e-05, "loss": 0.07, "step": 39315 }, { "epoch": 0.4795121951219512, "grad_norm": 0.8457200527191162, "learning_rate": 1.6803252032520327e-05, "loss": 0.0667, "step": 39320 }, { "epoch": 0.4795731707317073, "grad_norm": 0.5222476124763489, "learning_rate": 1.6802845528455285e-05, "loss": 0.0596, "step": 39325 }, { "epoch": 0.4796341463414634, "grad_norm": 0.744776725769043, "learning_rate": 1.6802439024390246e-05, "loss": 0.0508, "step": 39330 }, { "epoch": 0.4796951219512195, "grad_norm": 0.5561964511871338, "learning_rate": 1.6802032520325204e-05, "loss": 0.0598, "step": 39335 }, { "epoch": 0.4797560975609756, "grad_norm": 0.6577591896057129, "learning_rate": 1.6801626016260162e-05, "loss": 0.0929, "step": 39340 }, { "epoch": 0.4798170731707317, "grad_norm": 0.446695476770401, "learning_rate": 1.6801219512195124e-05, "loss": 0.0456, "step": 39345 }, { "epoch": 0.4798780487804878, "grad_norm": 1.0295591354370117, "learning_rate": 1.6800813008130082e-05, "loss": 0.0706, "step": 39350 }, { "epoch": 0.4799390243902439, "grad_norm": 0.5872135162353516, "learning_rate": 1.680040650406504e-05, "loss": 0.0663, "step": 39355 }, { "epoch": 0.48, "grad_norm": 0.4362730085849762, "learning_rate": 1.6800000000000002e-05, "loss": 0.072, "step": 39360 }, { "epoch": 0.4800609756097561, "grad_norm": 0.8053463697433472, "learning_rate": 1.679959349593496e-05, "loss": 0.0602, "step": 39365 }, { "epoch": 0.4801219512195122, "grad_norm": 0.5321105122566223, "learning_rate": 1.679918699186992e-05, "loss": 0.0691, "step": 39370 }, { "epoch": 0.4801829268292683, "grad_norm": 0.9158873558044434, "learning_rate": 1.679878048780488e-05, "loss": 0.1057, "step": 39375 }, { "epoch": 0.4802439024390244, "grad_norm": 1.8134753704071045, "learning_rate": 1.679837398373984e-05, "loss": 0.0793, "step": 39380 }, { "epoch": 0.48030487804878047, "grad_norm": 0.3901197910308838, "learning_rate": 1.6797967479674796e-05, "loss": 0.0484, "step": 39385 }, { "epoch": 0.48036585365853657, "grad_norm": 0.606448233127594, "learning_rate": 1.6797560975609757e-05, "loss": 0.0929, "step": 39390 }, { "epoch": 0.48042682926829267, "grad_norm": 0.41739901900291443, "learning_rate": 1.6797154471544715e-05, "loss": 0.0373, "step": 39395 }, { "epoch": 0.48048780487804876, "grad_norm": 0.9484834671020508, "learning_rate": 1.6796747967479677e-05, "loss": 0.0969, "step": 39400 }, { "epoch": 0.48054878048780486, "grad_norm": 0.540041446685791, "learning_rate": 1.6796341463414635e-05, "loss": 0.0607, "step": 39405 }, { "epoch": 0.48060975609756096, "grad_norm": 0.9498264789581299, "learning_rate": 1.6795934959349597e-05, "loss": 0.078, "step": 39410 }, { "epoch": 0.48067073170731706, "grad_norm": 0.5766417980194092, "learning_rate": 1.6795528455284555e-05, "loss": 0.0357, "step": 39415 }, { "epoch": 0.48073170731707315, "grad_norm": 0.5172134637832642, "learning_rate": 1.6795121951219513e-05, "loss": 0.0662, "step": 39420 }, { "epoch": 0.48079268292682925, "grad_norm": 3.6907012462615967, "learning_rate": 1.679471544715447e-05, "loss": 0.0561, "step": 39425 }, { "epoch": 0.48085365853658535, "grad_norm": 0.7532747387886047, "learning_rate": 1.6794308943089433e-05, "loss": 0.06, "step": 39430 }, { "epoch": 0.48091463414634145, "grad_norm": 1.1566253900527954, "learning_rate": 1.679390243902439e-05, "loss": 0.1391, "step": 39435 }, { "epoch": 0.48097560975609754, "grad_norm": 0.7502309679985046, "learning_rate": 1.6793495934959352e-05, "loss": 0.06, "step": 39440 }, { "epoch": 0.48103658536585364, "grad_norm": 0.612549364566803, "learning_rate": 1.679308943089431e-05, "loss": 0.0746, "step": 39445 }, { "epoch": 0.48109756097560974, "grad_norm": 0.46314918994903564, "learning_rate": 1.679268292682927e-05, "loss": 0.0636, "step": 39450 }, { "epoch": 0.48115853658536584, "grad_norm": 0.7584161758422852, "learning_rate": 1.679227642276423e-05, "loss": 0.0553, "step": 39455 }, { "epoch": 0.48121951219512193, "grad_norm": 1.3109939098358154, "learning_rate": 1.6791869918699188e-05, "loss": 0.0531, "step": 39460 }, { "epoch": 0.48128048780487803, "grad_norm": 1.544838309288025, "learning_rate": 1.679146341463415e-05, "loss": 0.0782, "step": 39465 }, { "epoch": 0.48134146341463413, "grad_norm": 0.9649627208709717, "learning_rate": 1.6791056910569108e-05, "loss": 0.0855, "step": 39470 }, { "epoch": 0.4814024390243902, "grad_norm": 0.5333092212677002, "learning_rate": 1.6790650406504066e-05, "loss": 0.0674, "step": 39475 }, { "epoch": 0.4814634146341463, "grad_norm": 0.39013615250587463, "learning_rate": 1.6790243902439024e-05, "loss": 0.0714, "step": 39480 }, { "epoch": 0.4815243902439024, "grad_norm": 0.7752887606620789, "learning_rate": 1.6789837398373985e-05, "loss": 0.0895, "step": 39485 }, { "epoch": 0.4815853658536585, "grad_norm": 0.9264171719551086, "learning_rate": 1.6789430894308944e-05, "loss": 0.1157, "step": 39490 }, { "epoch": 0.4816463414634146, "grad_norm": 0.924967885017395, "learning_rate": 1.6789024390243905e-05, "loss": 0.0701, "step": 39495 }, { "epoch": 0.4817073170731707, "grad_norm": 0.5715927481651306, "learning_rate": 1.6788617886178863e-05, "loss": 0.0994, "step": 39500 }, { "epoch": 0.4817682926829268, "grad_norm": 0.5168226957321167, "learning_rate": 1.678821138211382e-05, "loss": 0.0648, "step": 39505 }, { "epoch": 0.4818292682926829, "grad_norm": 1.0137099027633667, "learning_rate": 1.6787804878048783e-05, "loss": 0.077, "step": 39510 }, { "epoch": 0.481890243902439, "grad_norm": 0.5178496837615967, "learning_rate": 1.678739837398374e-05, "loss": 0.0477, "step": 39515 }, { "epoch": 0.4819512195121951, "grad_norm": 0.6903519630432129, "learning_rate": 1.67869918699187e-05, "loss": 0.0821, "step": 39520 }, { "epoch": 0.4820121951219512, "grad_norm": 0.7998219132423401, "learning_rate": 1.678658536585366e-05, "loss": 0.0681, "step": 39525 }, { "epoch": 0.4820731707317073, "grad_norm": 0.49741634726524353, "learning_rate": 1.678617886178862e-05, "loss": 0.066, "step": 39530 }, { "epoch": 0.4821341463414634, "grad_norm": 0.46401509642601013, "learning_rate": 1.6785772357723577e-05, "loss": 0.0799, "step": 39535 }, { "epoch": 0.4821951219512195, "grad_norm": 0.7855303287506104, "learning_rate": 1.678536585365854e-05, "loss": 0.0732, "step": 39540 }, { "epoch": 0.4822560975609756, "grad_norm": 0.8064094185829163, "learning_rate": 1.6784959349593497e-05, "loss": 0.0652, "step": 39545 }, { "epoch": 0.4823170731707317, "grad_norm": 0.8821930885314941, "learning_rate": 1.6784552845528458e-05, "loss": 0.0829, "step": 39550 }, { "epoch": 0.4823780487804878, "grad_norm": 0.6438621282577515, "learning_rate": 1.6784146341463416e-05, "loss": 0.0687, "step": 39555 }, { "epoch": 0.4824390243902439, "grad_norm": 0.44908252358436584, "learning_rate": 1.6783739837398378e-05, "loss": 0.0542, "step": 39560 }, { "epoch": 0.4825, "grad_norm": 0.6588733196258545, "learning_rate": 1.6783333333333332e-05, "loss": 0.0729, "step": 39565 }, { "epoch": 0.4825609756097561, "grad_norm": 0.3867413401603699, "learning_rate": 1.6782926829268294e-05, "loss": 0.1038, "step": 39570 }, { "epoch": 0.4826219512195122, "grad_norm": 0.4601377248764038, "learning_rate": 1.6782520325203252e-05, "loss": 0.0965, "step": 39575 }, { "epoch": 0.4826829268292683, "grad_norm": 1.085147738456726, "learning_rate": 1.6782113821138214e-05, "loss": 0.077, "step": 39580 }, { "epoch": 0.4827439024390244, "grad_norm": 0.5417104363441467, "learning_rate": 1.6781707317073172e-05, "loss": 0.096, "step": 39585 }, { "epoch": 0.4828048780487805, "grad_norm": 0.5844689011573792, "learning_rate": 1.6781300813008133e-05, "loss": 0.07, "step": 39590 }, { "epoch": 0.48286585365853657, "grad_norm": 0.6497510075569153, "learning_rate": 1.678089430894309e-05, "loss": 0.0856, "step": 39595 }, { "epoch": 0.48292682926829267, "grad_norm": 0.9711969494819641, "learning_rate": 1.678048780487805e-05, "loss": 0.0905, "step": 39600 }, { "epoch": 0.48298780487804877, "grad_norm": 0.5580465197563171, "learning_rate": 1.6780081300813008e-05, "loss": 0.0502, "step": 39605 }, { "epoch": 0.48304878048780486, "grad_norm": 0.28676244616508484, "learning_rate": 1.677967479674797e-05, "loss": 0.0429, "step": 39610 }, { "epoch": 0.48310975609756096, "grad_norm": 0.6579490303993225, "learning_rate": 1.6779268292682927e-05, "loss": 0.0631, "step": 39615 }, { "epoch": 0.48317073170731706, "grad_norm": 0.43119052052497864, "learning_rate": 1.677886178861789e-05, "loss": 0.1375, "step": 39620 }, { "epoch": 0.48323170731707316, "grad_norm": 0.6824659109115601, "learning_rate": 1.6778455284552847e-05, "loss": 0.0655, "step": 39625 }, { "epoch": 0.48329268292682925, "grad_norm": 0.7743238210678101, "learning_rate": 1.6778048780487805e-05, "loss": 0.0833, "step": 39630 }, { "epoch": 0.48335365853658535, "grad_norm": 0.7118445038795471, "learning_rate": 1.6777642276422767e-05, "loss": 0.1214, "step": 39635 }, { "epoch": 0.48341463414634145, "grad_norm": 0.44165825843811035, "learning_rate": 1.6777235772357725e-05, "loss": 0.0644, "step": 39640 }, { "epoch": 0.48347560975609755, "grad_norm": 0.3210378587245941, "learning_rate": 1.6776829268292686e-05, "loss": 0.0544, "step": 39645 }, { "epoch": 0.48353658536585364, "grad_norm": 0.597262442111969, "learning_rate": 1.6776422764227644e-05, "loss": 0.0992, "step": 39650 }, { "epoch": 0.48359756097560974, "grad_norm": 0.4783686697483063, "learning_rate": 1.6776016260162602e-05, "loss": 0.0652, "step": 39655 }, { "epoch": 0.48365853658536584, "grad_norm": 0.648004949092865, "learning_rate": 1.677560975609756e-05, "loss": 0.0621, "step": 39660 }, { "epoch": 0.48371951219512194, "grad_norm": 0.5750685334205627, "learning_rate": 1.6775203252032522e-05, "loss": 0.0655, "step": 39665 }, { "epoch": 0.48378048780487803, "grad_norm": 0.4556049108505249, "learning_rate": 1.677479674796748e-05, "loss": 0.0512, "step": 39670 }, { "epoch": 0.48384146341463413, "grad_norm": 0.9463493824005127, "learning_rate": 1.6774390243902442e-05, "loss": 0.0932, "step": 39675 }, { "epoch": 0.48390243902439023, "grad_norm": 0.3841193616390228, "learning_rate": 1.67739837398374e-05, "loss": 0.0692, "step": 39680 }, { "epoch": 0.4839634146341463, "grad_norm": 0.5912373661994934, "learning_rate": 1.6773577235772358e-05, "loss": 0.0843, "step": 39685 }, { "epoch": 0.4840243902439024, "grad_norm": 0.6854207515716553, "learning_rate": 1.6773170731707316e-05, "loss": 0.043, "step": 39690 }, { "epoch": 0.4840853658536585, "grad_norm": 0.8858678936958313, "learning_rate": 1.6772764227642278e-05, "loss": 0.0593, "step": 39695 }, { "epoch": 0.4841463414634146, "grad_norm": 0.41507747769355774, "learning_rate": 1.6772357723577236e-05, "loss": 0.071, "step": 39700 }, { "epoch": 0.4842073170731707, "grad_norm": 0.9344741702079773, "learning_rate": 1.6771951219512197e-05, "loss": 0.0663, "step": 39705 }, { "epoch": 0.4842682926829268, "grad_norm": 0.5449419617652893, "learning_rate": 1.6771544715447155e-05, "loss": 0.0986, "step": 39710 }, { "epoch": 0.4843292682926829, "grad_norm": 0.915926456451416, "learning_rate": 1.6771138211382114e-05, "loss": 0.0822, "step": 39715 }, { "epoch": 0.484390243902439, "grad_norm": 1.6245980262756348, "learning_rate": 1.6770731707317075e-05, "loss": 0.0594, "step": 39720 }, { "epoch": 0.4844512195121951, "grad_norm": 0.9742429852485657, "learning_rate": 1.6770325203252033e-05, "loss": 0.0861, "step": 39725 }, { "epoch": 0.4845121951219512, "grad_norm": 0.6534091830253601, "learning_rate": 1.6769918699186995e-05, "loss": 0.1072, "step": 39730 }, { "epoch": 0.4845731707317073, "grad_norm": 0.677140474319458, "learning_rate": 1.6769512195121953e-05, "loss": 0.0802, "step": 39735 }, { "epoch": 0.4846341463414634, "grad_norm": 0.5627055764198303, "learning_rate": 1.6769105691056914e-05, "loss": 0.0454, "step": 39740 }, { "epoch": 0.4846951219512195, "grad_norm": 0.43604177236557007, "learning_rate": 1.676869918699187e-05, "loss": 0.0565, "step": 39745 }, { "epoch": 0.4847560975609756, "grad_norm": 1.644087314605713, "learning_rate": 1.676829268292683e-05, "loss": 0.0565, "step": 39750 }, { "epoch": 0.4848170731707317, "grad_norm": 0.6853446364402771, "learning_rate": 1.676788617886179e-05, "loss": 0.0506, "step": 39755 }, { "epoch": 0.4848780487804878, "grad_norm": 0.46456730365753174, "learning_rate": 1.676747967479675e-05, "loss": 0.0733, "step": 39760 }, { "epoch": 0.4849390243902439, "grad_norm": 0.7335997819900513, "learning_rate": 1.676707317073171e-05, "loss": 0.0618, "step": 39765 }, { "epoch": 0.485, "grad_norm": 0.5572277903556824, "learning_rate": 1.676666666666667e-05, "loss": 0.0473, "step": 39770 }, { "epoch": 0.4850609756097561, "grad_norm": 0.47008657455444336, "learning_rate": 1.6766260162601628e-05, "loss": 0.0588, "step": 39775 }, { "epoch": 0.4851219512195122, "grad_norm": 1.083018183708191, "learning_rate": 1.6765853658536586e-05, "loss": 0.1246, "step": 39780 }, { "epoch": 0.4851829268292683, "grad_norm": 0.6758444905281067, "learning_rate": 1.6765447154471544e-05, "loss": 0.0729, "step": 39785 }, { "epoch": 0.4852439024390244, "grad_norm": 1.1218026876449585, "learning_rate": 1.6765040650406506e-05, "loss": 0.0683, "step": 39790 }, { "epoch": 0.4853048780487805, "grad_norm": 1.3303546905517578, "learning_rate": 1.6764634146341464e-05, "loss": 0.0892, "step": 39795 }, { "epoch": 0.4853658536585366, "grad_norm": 0.6044437885284424, "learning_rate": 1.6764227642276425e-05, "loss": 0.0764, "step": 39800 }, { "epoch": 0.48542682926829267, "grad_norm": 0.5900937914848328, "learning_rate": 1.6763821138211384e-05, "loss": 0.0656, "step": 39805 }, { "epoch": 0.48548780487804877, "grad_norm": 0.7401590347290039, "learning_rate": 1.676341463414634e-05, "loss": 0.0755, "step": 39810 }, { "epoch": 0.48554878048780487, "grad_norm": 0.557574987411499, "learning_rate": 1.6763008130081303e-05, "loss": 0.0348, "step": 39815 }, { "epoch": 0.48560975609756096, "grad_norm": 0.737940788269043, "learning_rate": 1.676260162601626e-05, "loss": 0.0676, "step": 39820 }, { "epoch": 0.48567073170731706, "grad_norm": 0.501391589641571, "learning_rate": 1.6762195121951223e-05, "loss": 0.0588, "step": 39825 }, { "epoch": 0.48573170731707316, "grad_norm": 0.45642971992492676, "learning_rate": 1.676178861788618e-05, "loss": 0.0725, "step": 39830 }, { "epoch": 0.48579268292682926, "grad_norm": 0.4889744222164154, "learning_rate": 1.676138211382114e-05, "loss": 0.0562, "step": 39835 }, { "epoch": 0.48585365853658535, "grad_norm": 1.2115126848220825, "learning_rate": 1.6760975609756097e-05, "loss": 0.1073, "step": 39840 }, { "epoch": 0.48591463414634145, "grad_norm": 1.092827558517456, "learning_rate": 1.676056910569106e-05, "loss": 0.0628, "step": 39845 }, { "epoch": 0.48597560975609755, "grad_norm": 0.8531093597412109, "learning_rate": 1.6760162601626017e-05, "loss": 0.0558, "step": 39850 }, { "epoch": 0.48603658536585365, "grad_norm": 0.9080551862716675, "learning_rate": 1.675975609756098e-05, "loss": 0.0837, "step": 39855 }, { "epoch": 0.48609756097560974, "grad_norm": 0.5314196944236755, "learning_rate": 1.6759349593495937e-05, "loss": 0.0672, "step": 39860 }, { "epoch": 0.48615853658536584, "grad_norm": 1.0096527338027954, "learning_rate": 1.6758943089430895e-05, "loss": 0.0944, "step": 39865 }, { "epoch": 0.48621951219512194, "grad_norm": 0.7441115379333496, "learning_rate": 1.6758536585365853e-05, "loss": 0.0989, "step": 39870 }, { "epoch": 0.48628048780487804, "grad_norm": 0.6651779413223267, "learning_rate": 1.6758130081300814e-05, "loss": 0.0613, "step": 39875 }, { "epoch": 0.48634146341463413, "grad_norm": 0.7686874866485596, "learning_rate": 1.6757723577235772e-05, "loss": 0.0607, "step": 39880 }, { "epoch": 0.48640243902439023, "grad_norm": 0.5615690350532532, "learning_rate": 1.6757317073170734e-05, "loss": 0.0773, "step": 39885 }, { "epoch": 0.48646341463414633, "grad_norm": 0.47762367129325867, "learning_rate": 1.6756910569105692e-05, "loss": 0.0859, "step": 39890 }, { "epoch": 0.4865243902439024, "grad_norm": 1.3853135108947754, "learning_rate": 1.675650406504065e-05, "loss": 0.065, "step": 39895 }, { "epoch": 0.4865853658536585, "grad_norm": 0.850695788860321, "learning_rate": 1.6756097560975612e-05, "loss": 0.106, "step": 39900 }, { "epoch": 0.4866463414634146, "grad_norm": 0.41530415415763855, "learning_rate": 1.675569105691057e-05, "loss": 0.0872, "step": 39905 }, { "epoch": 0.4867073170731707, "grad_norm": 0.44458696246147156, "learning_rate": 1.675528455284553e-05, "loss": 0.0475, "step": 39910 }, { "epoch": 0.4867682926829268, "grad_norm": 0.9863851070404053, "learning_rate": 1.675487804878049e-05, "loss": 0.0592, "step": 39915 }, { "epoch": 0.4868292682926829, "grad_norm": 0.3891470730304718, "learning_rate": 1.675447154471545e-05, "loss": 0.0501, "step": 39920 }, { "epoch": 0.486890243902439, "grad_norm": 0.5116896033287048, "learning_rate": 1.6754065040650406e-05, "loss": 0.0523, "step": 39925 }, { "epoch": 0.4869512195121951, "grad_norm": 0.29565244913101196, "learning_rate": 1.6753658536585367e-05, "loss": 0.0794, "step": 39930 }, { "epoch": 0.4870121951219512, "grad_norm": 0.45644012093544006, "learning_rate": 1.6753252032520325e-05, "loss": 0.0599, "step": 39935 }, { "epoch": 0.4870731707317073, "grad_norm": 0.7242844104766846, "learning_rate": 1.6752845528455287e-05, "loss": 0.0649, "step": 39940 }, { "epoch": 0.4871341463414634, "grad_norm": 0.9219374060630798, "learning_rate": 1.6752439024390245e-05, "loss": 0.07, "step": 39945 }, { "epoch": 0.4871951219512195, "grad_norm": 0.5078620910644531, "learning_rate": 1.6752032520325207e-05, "loss": 0.0608, "step": 39950 }, { "epoch": 0.4872560975609756, "grad_norm": 0.7907736897468567, "learning_rate": 1.675162601626016e-05, "loss": 0.049, "step": 39955 }, { "epoch": 0.4873170731707317, "grad_norm": 0.4965362846851349, "learning_rate": 1.6751219512195123e-05, "loss": 0.0686, "step": 39960 }, { "epoch": 0.4873780487804878, "grad_norm": 0.9263454079627991, "learning_rate": 1.675081300813008e-05, "loss": 0.0759, "step": 39965 }, { "epoch": 0.4874390243902439, "grad_norm": 0.8733353018760681, "learning_rate": 1.6750406504065042e-05, "loss": 0.076, "step": 39970 }, { "epoch": 0.4875, "grad_norm": 0.4780609607696533, "learning_rate": 1.675e-05, "loss": 0.0628, "step": 39975 }, { "epoch": 0.4875609756097561, "grad_norm": 0.7385201454162598, "learning_rate": 1.6749593495934962e-05, "loss": 0.0704, "step": 39980 }, { "epoch": 0.4876219512195122, "grad_norm": 0.739360511302948, "learning_rate": 1.674918699186992e-05, "loss": 0.0674, "step": 39985 }, { "epoch": 0.4876829268292683, "grad_norm": 0.6927400231361389, "learning_rate": 1.674878048780488e-05, "loss": 0.0603, "step": 39990 }, { "epoch": 0.4877439024390244, "grad_norm": 0.43306031823158264, "learning_rate": 1.674837398373984e-05, "loss": 0.0423, "step": 39995 }, { "epoch": 0.4878048780487805, "grad_norm": 0.4406674802303314, "learning_rate": 1.6747967479674798e-05, "loss": 0.0595, "step": 40000 }, { "epoch": 0.4878658536585366, "grad_norm": 0.7451022863388062, "learning_rate": 1.674756097560976e-05, "loss": 0.0848, "step": 40005 }, { "epoch": 0.48792682926829267, "grad_norm": 0.741168737411499, "learning_rate": 1.6747154471544718e-05, "loss": 0.0686, "step": 40010 }, { "epoch": 0.48798780487804877, "grad_norm": 0.766273558139801, "learning_rate": 1.6746747967479676e-05, "loss": 0.077, "step": 40015 }, { "epoch": 0.48804878048780487, "grad_norm": 0.8379135727882385, "learning_rate": 1.6746341463414634e-05, "loss": 0.0854, "step": 40020 }, { "epoch": 0.48810975609756097, "grad_norm": 0.46924933791160583, "learning_rate": 1.6745934959349595e-05, "loss": 0.0507, "step": 40025 }, { "epoch": 0.48817073170731706, "grad_norm": 0.36912813782691956, "learning_rate": 1.6745528455284554e-05, "loss": 0.058, "step": 40030 }, { "epoch": 0.48823170731707316, "grad_norm": 0.3155374825000763, "learning_rate": 1.6745121951219515e-05, "loss": 0.0653, "step": 40035 }, { "epoch": 0.48829268292682926, "grad_norm": 0.4271853566169739, "learning_rate": 1.6744715447154473e-05, "loss": 0.046, "step": 40040 }, { "epoch": 0.48835365853658536, "grad_norm": 0.9094362854957581, "learning_rate": 1.674430894308943e-05, "loss": 0.1147, "step": 40045 }, { "epoch": 0.48841463414634145, "grad_norm": 0.6938343644142151, "learning_rate": 1.674390243902439e-05, "loss": 0.0716, "step": 40050 }, { "epoch": 0.48847560975609755, "grad_norm": 0.6098489761352539, "learning_rate": 1.674349593495935e-05, "loss": 0.0971, "step": 40055 }, { "epoch": 0.48853658536585365, "grad_norm": 0.6204248666763306, "learning_rate": 1.674308943089431e-05, "loss": 0.0657, "step": 40060 }, { "epoch": 0.48859756097560975, "grad_norm": 0.5261270403862, "learning_rate": 1.674268292682927e-05, "loss": 0.065, "step": 40065 }, { "epoch": 0.48865853658536584, "grad_norm": 0.6913158297538757, "learning_rate": 1.674227642276423e-05, "loss": 0.0644, "step": 40070 }, { "epoch": 0.48871951219512194, "grad_norm": 0.5382262468338013, "learning_rate": 1.6741869918699187e-05, "loss": 0.0731, "step": 40075 }, { "epoch": 0.48878048780487804, "grad_norm": 0.9735081195831299, "learning_rate": 1.674146341463415e-05, "loss": 0.0565, "step": 40080 }, { "epoch": 0.48884146341463414, "grad_norm": 0.4986855387687683, "learning_rate": 1.6741056910569106e-05, "loss": 0.074, "step": 40085 }, { "epoch": 0.48890243902439023, "grad_norm": 0.5039525032043457, "learning_rate": 1.6740650406504068e-05, "loss": 0.0622, "step": 40090 }, { "epoch": 0.48896341463414633, "grad_norm": 0.6742830276489258, "learning_rate": 1.6740243902439026e-05, "loss": 0.0666, "step": 40095 }, { "epoch": 0.48902439024390243, "grad_norm": 0.649237334728241, "learning_rate": 1.6739837398373984e-05, "loss": 0.0437, "step": 40100 }, { "epoch": 0.4890853658536585, "grad_norm": 0.5780737996101379, "learning_rate": 1.6739430894308942e-05, "loss": 0.0639, "step": 40105 }, { "epoch": 0.4891463414634146, "grad_norm": 0.4811689257621765, "learning_rate": 1.6739024390243904e-05, "loss": 0.0555, "step": 40110 }, { "epoch": 0.4892073170731707, "grad_norm": 0.8475826978683472, "learning_rate": 1.6738617886178862e-05, "loss": 0.1103, "step": 40115 }, { "epoch": 0.4892682926829268, "grad_norm": 1.1645013093948364, "learning_rate": 1.6738211382113824e-05, "loss": 0.0855, "step": 40120 }, { "epoch": 0.4893292682926829, "grad_norm": 0.44055503606796265, "learning_rate": 1.673780487804878e-05, "loss": 0.0818, "step": 40125 }, { "epoch": 0.489390243902439, "grad_norm": 0.5831366181373596, "learning_rate": 1.6737398373983743e-05, "loss": 0.0847, "step": 40130 }, { "epoch": 0.4894512195121951, "grad_norm": 0.530968189239502, "learning_rate": 1.6736991869918698e-05, "loss": 0.0627, "step": 40135 }, { "epoch": 0.4895121951219512, "grad_norm": 0.3679642081260681, "learning_rate": 1.673658536585366e-05, "loss": 0.0614, "step": 40140 }, { "epoch": 0.4895731707317073, "grad_norm": 0.6144741773605347, "learning_rate": 1.6736178861788618e-05, "loss": 0.0568, "step": 40145 }, { "epoch": 0.4896341463414634, "grad_norm": 0.7032760977745056, "learning_rate": 1.673577235772358e-05, "loss": 0.0767, "step": 40150 }, { "epoch": 0.4896951219512195, "grad_norm": 0.8108775615692139, "learning_rate": 1.6735365853658537e-05, "loss": 0.1052, "step": 40155 }, { "epoch": 0.4897560975609756, "grad_norm": 0.735165536403656, "learning_rate": 1.67349593495935e-05, "loss": 0.0747, "step": 40160 }, { "epoch": 0.4898170731707317, "grad_norm": 0.7063118815422058, "learning_rate": 1.6734552845528457e-05, "loss": 0.0759, "step": 40165 }, { "epoch": 0.4898780487804878, "grad_norm": 0.5947168469429016, "learning_rate": 1.6734146341463415e-05, "loss": 0.0646, "step": 40170 }, { "epoch": 0.4899390243902439, "grad_norm": 0.7199665904045105, "learning_rate": 1.6733739837398376e-05, "loss": 0.0969, "step": 40175 }, { "epoch": 0.49, "grad_norm": 0.36094731092453003, "learning_rate": 1.6733333333333335e-05, "loss": 0.0371, "step": 40180 }, { "epoch": 0.4900609756097561, "grad_norm": 0.8287649154663086, "learning_rate": 1.6732926829268296e-05, "loss": 0.0795, "step": 40185 }, { "epoch": 0.4901219512195122, "grad_norm": 0.4429745376110077, "learning_rate": 1.6732520325203254e-05, "loss": 0.0461, "step": 40190 }, { "epoch": 0.4901829268292683, "grad_norm": 0.44547852873802185, "learning_rate": 1.6732113821138212e-05, "loss": 0.0622, "step": 40195 }, { "epoch": 0.4902439024390244, "grad_norm": 2.6027235984802246, "learning_rate": 1.673170731707317e-05, "loss": 0.0697, "step": 40200 }, { "epoch": 0.4903048780487805, "grad_norm": 0.6504713296890259, "learning_rate": 1.6731300813008132e-05, "loss": 0.0693, "step": 40205 }, { "epoch": 0.4903658536585366, "grad_norm": 0.7652722001075745, "learning_rate": 1.673089430894309e-05, "loss": 0.0747, "step": 40210 }, { "epoch": 0.4904268292682927, "grad_norm": 0.62162846326828, "learning_rate": 1.673048780487805e-05, "loss": 0.0956, "step": 40215 }, { "epoch": 0.49048780487804877, "grad_norm": 0.33755698800086975, "learning_rate": 1.673008130081301e-05, "loss": 0.0637, "step": 40220 }, { "epoch": 0.49054878048780487, "grad_norm": 0.48558515310287476, "learning_rate": 1.6729674796747968e-05, "loss": 0.0568, "step": 40225 }, { "epoch": 0.49060975609756097, "grad_norm": 1.1841961145401, "learning_rate": 1.6729268292682926e-05, "loss": 0.0637, "step": 40230 }, { "epoch": 0.49067073170731706, "grad_norm": 0.6436945199966431, "learning_rate": 1.6728861788617888e-05, "loss": 0.0904, "step": 40235 }, { "epoch": 0.49073170731707316, "grad_norm": 1.2850462198257446, "learning_rate": 1.6728455284552846e-05, "loss": 0.0979, "step": 40240 }, { "epoch": 0.49079268292682926, "grad_norm": 0.5027874112129211, "learning_rate": 1.6728048780487807e-05, "loss": 0.0548, "step": 40245 }, { "epoch": 0.49085365853658536, "grad_norm": 0.5644302368164062, "learning_rate": 1.6727642276422765e-05, "loss": 0.0871, "step": 40250 }, { "epoch": 0.49091463414634146, "grad_norm": 0.4237077236175537, "learning_rate": 1.6727235772357723e-05, "loss": 0.0417, "step": 40255 }, { "epoch": 0.49097560975609755, "grad_norm": 0.8219500780105591, "learning_rate": 1.6726829268292685e-05, "loss": 0.0736, "step": 40260 }, { "epoch": 0.49103658536585365, "grad_norm": 0.4148324131965637, "learning_rate": 1.6726422764227643e-05, "loss": 0.0554, "step": 40265 }, { "epoch": 0.49109756097560975, "grad_norm": 0.35015907883644104, "learning_rate": 1.6726016260162605e-05, "loss": 0.0833, "step": 40270 }, { "epoch": 0.49115853658536585, "grad_norm": 0.5803981423377991, "learning_rate": 1.6725609756097563e-05, "loss": 0.0617, "step": 40275 }, { "epoch": 0.49121951219512194, "grad_norm": 0.811258852481842, "learning_rate": 1.672520325203252e-05, "loss": 0.0529, "step": 40280 }, { "epoch": 0.49128048780487804, "grad_norm": 0.6094431281089783, "learning_rate": 1.672479674796748e-05, "loss": 0.0461, "step": 40285 }, { "epoch": 0.49134146341463414, "grad_norm": 0.5769004225730896, "learning_rate": 1.672439024390244e-05, "loss": 0.0514, "step": 40290 }, { "epoch": 0.49140243902439024, "grad_norm": 0.6423501968383789, "learning_rate": 1.67239837398374e-05, "loss": 0.091, "step": 40295 }, { "epoch": 0.49146341463414633, "grad_norm": 0.8812501430511475, "learning_rate": 1.672357723577236e-05, "loss": 0.0766, "step": 40300 }, { "epoch": 0.49152439024390243, "grad_norm": 1.324550747871399, "learning_rate": 1.672317073170732e-05, "loss": 0.0675, "step": 40305 }, { "epoch": 0.49158536585365853, "grad_norm": 0.5184176564216614, "learning_rate": 1.672276422764228e-05, "loss": 0.0668, "step": 40310 }, { "epoch": 0.4916463414634146, "grad_norm": 0.41496723890304565, "learning_rate": 1.6722357723577235e-05, "loss": 0.075, "step": 40315 }, { "epoch": 0.4917073170731707, "grad_norm": 0.4775954484939575, "learning_rate": 1.6721951219512196e-05, "loss": 0.0389, "step": 40320 }, { "epoch": 0.4917682926829268, "grad_norm": 0.4632340967655182, "learning_rate": 1.6721544715447154e-05, "loss": 0.0534, "step": 40325 }, { "epoch": 0.4918292682926829, "grad_norm": 0.5520492196083069, "learning_rate": 1.6721138211382116e-05, "loss": 0.0768, "step": 40330 }, { "epoch": 0.491890243902439, "grad_norm": 0.6653776168823242, "learning_rate": 1.6720731707317074e-05, "loss": 0.0751, "step": 40335 }, { "epoch": 0.4919512195121951, "grad_norm": 1.366645097732544, "learning_rate": 1.6720325203252035e-05, "loss": 0.0521, "step": 40340 }, { "epoch": 0.4920121951219512, "grad_norm": 0.3740113377571106, "learning_rate": 1.6719918699186993e-05, "loss": 0.08, "step": 40345 }, { "epoch": 0.4920731707317073, "grad_norm": 0.2616269886493683, "learning_rate": 1.671951219512195e-05, "loss": 0.0631, "step": 40350 }, { "epoch": 0.4921341463414634, "grad_norm": 0.5336800813674927, "learning_rate": 1.6719105691056913e-05, "loss": 0.0644, "step": 40355 }, { "epoch": 0.4921951219512195, "grad_norm": 0.8458450436592102, "learning_rate": 1.671869918699187e-05, "loss": 0.0541, "step": 40360 }, { "epoch": 0.4922560975609756, "grad_norm": 0.7365257143974304, "learning_rate": 1.671829268292683e-05, "loss": 0.0977, "step": 40365 }, { "epoch": 0.4923170731707317, "grad_norm": 0.5468148589134216, "learning_rate": 1.671788617886179e-05, "loss": 0.0753, "step": 40370 }, { "epoch": 0.4923780487804878, "grad_norm": 0.5442343950271606, "learning_rate": 1.671747967479675e-05, "loss": 0.0574, "step": 40375 }, { "epoch": 0.4924390243902439, "grad_norm": 0.5551802515983582, "learning_rate": 1.6717073170731707e-05, "loss": 0.0634, "step": 40380 }, { "epoch": 0.4925, "grad_norm": 0.5283834338188171, "learning_rate": 1.671666666666667e-05, "loss": 0.0662, "step": 40385 }, { "epoch": 0.4925609756097561, "grad_norm": 0.21134905517101288, "learning_rate": 1.6716260162601627e-05, "loss": 0.04, "step": 40390 }, { "epoch": 0.4926219512195122, "grad_norm": 0.74716717004776, "learning_rate": 1.671585365853659e-05, "loss": 0.0785, "step": 40395 }, { "epoch": 0.4926829268292683, "grad_norm": 6.9035420417785645, "learning_rate": 1.6715447154471546e-05, "loss": 0.0661, "step": 40400 }, { "epoch": 0.4927439024390244, "grad_norm": 0.4292612075805664, "learning_rate": 1.6715040650406505e-05, "loss": 0.043, "step": 40405 }, { "epoch": 0.4928048780487805, "grad_norm": 3.3372044563293457, "learning_rate": 1.6714634146341463e-05, "loss": 0.082, "step": 40410 }, { "epoch": 0.4928658536585366, "grad_norm": 0.7194771766662598, "learning_rate": 1.6714227642276424e-05, "loss": 0.0782, "step": 40415 }, { "epoch": 0.4929268292682927, "grad_norm": 1.4086662530899048, "learning_rate": 1.6713821138211382e-05, "loss": 0.0601, "step": 40420 }, { "epoch": 0.4929878048780488, "grad_norm": 0.4508928656578064, "learning_rate": 1.6713414634146344e-05, "loss": 0.0762, "step": 40425 }, { "epoch": 0.49304878048780487, "grad_norm": 0.5730611085891724, "learning_rate": 1.6713008130081302e-05, "loss": 0.058, "step": 40430 }, { "epoch": 0.49310975609756097, "grad_norm": 0.6040274500846863, "learning_rate": 1.671260162601626e-05, "loss": 0.0576, "step": 40435 }, { "epoch": 0.49317073170731707, "grad_norm": 0.48318541049957275, "learning_rate": 1.671219512195122e-05, "loss": 0.0929, "step": 40440 }, { "epoch": 0.49323170731707316, "grad_norm": 0.7683741450309753, "learning_rate": 1.671178861788618e-05, "loss": 0.0814, "step": 40445 }, { "epoch": 0.49329268292682926, "grad_norm": 0.6868038177490234, "learning_rate": 1.671138211382114e-05, "loss": 0.0523, "step": 40450 }, { "epoch": 0.49335365853658536, "grad_norm": 0.7806829214096069, "learning_rate": 1.67109756097561e-05, "loss": 0.0822, "step": 40455 }, { "epoch": 0.49341463414634146, "grad_norm": 1.6959302425384521, "learning_rate": 1.6710569105691058e-05, "loss": 0.0864, "step": 40460 }, { "epoch": 0.49347560975609756, "grad_norm": 0.4689207971096039, "learning_rate": 1.6710162601626016e-05, "loss": 0.0579, "step": 40465 }, { "epoch": 0.49353658536585365, "grad_norm": 0.6067582368850708, "learning_rate": 1.6709756097560977e-05, "loss": 0.0414, "step": 40470 }, { "epoch": 0.49359756097560975, "grad_norm": 0.6361820697784424, "learning_rate": 1.6709349593495935e-05, "loss": 0.0492, "step": 40475 }, { "epoch": 0.49365853658536585, "grad_norm": 0.7064982652664185, "learning_rate": 1.6708943089430897e-05, "loss": 0.0676, "step": 40480 }, { "epoch": 0.49371951219512195, "grad_norm": 0.9313439130783081, "learning_rate": 1.6708536585365855e-05, "loss": 0.0684, "step": 40485 }, { "epoch": 0.49378048780487804, "grad_norm": 0.5828027129173279, "learning_rate": 1.6708130081300816e-05, "loss": 0.0927, "step": 40490 }, { "epoch": 0.49384146341463414, "grad_norm": 0.9214760661125183, "learning_rate": 1.670772357723577e-05, "loss": 0.0778, "step": 40495 }, { "epoch": 0.49390243902439024, "grad_norm": 0.5791222453117371, "learning_rate": 1.6707317073170733e-05, "loss": 0.0928, "step": 40500 }, { "epoch": 0.49396341463414634, "grad_norm": 0.8290738463401794, "learning_rate": 1.670691056910569e-05, "loss": 0.0699, "step": 40505 }, { "epoch": 0.49402439024390243, "grad_norm": 1.3436362743377686, "learning_rate": 1.6706504065040652e-05, "loss": 0.0884, "step": 40510 }, { "epoch": 0.49408536585365853, "grad_norm": 0.7602244019508362, "learning_rate": 1.670609756097561e-05, "loss": 0.083, "step": 40515 }, { "epoch": 0.49414634146341463, "grad_norm": 0.41113710403442383, "learning_rate": 1.6705691056910572e-05, "loss": 0.0747, "step": 40520 }, { "epoch": 0.4942073170731707, "grad_norm": 0.7747411131858826, "learning_rate": 1.670528455284553e-05, "loss": 0.0621, "step": 40525 }, { "epoch": 0.4942682926829268, "grad_norm": 0.47967764735221863, "learning_rate": 1.6704878048780488e-05, "loss": 0.0587, "step": 40530 }, { "epoch": 0.4943292682926829, "grad_norm": 0.4826558530330658, "learning_rate": 1.670447154471545e-05, "loss": 0.0866, "step": 40535 }, { "epoch": 0.494390243902439, "grad_norm": 0.8204166293144226, "learning_rate": 1.6704065040650408e-05, "loss": 0.0813, "step": 40540 }, { "epoch": 0.4944512195121951, "grad_norm": 0.8429030776023865, "learning_rate": 1.6703658536585366e-05, "loss": 0.0584, "step": 40545 }, { "epoch": 0.4945121951219512, "grad_norm": 0.3773357570171356, "learning_rate": 1.6703252032520328e-05, "loss": 0.0682, "step": 40550 }, { "epoch": 0.4945731707317073, "grad_norm": 1.0211899280548096, "learning_rate": 1.6702845528455286e-05, "loss": 0.0774, "step": 40555 }, { "epoch": 0.4946341463414634, "grad_norm": 0.9325540065765381, "learning_rate": 1.6702439024390244e-05, "loss": 0.0792, "step": 40560 }, { "epoch": 0.4946951219512195, "grad_norm": 0.8005002737045288, "learning_rate": 1.6702032520325205e-05, "loss": 0.0883, "step": 40565 }, { "epoch": 0.4947560975609756, "grad_norm": 0.6689811944961548, "learning_rate": 1.6701626016260163e-05, "loss": 0.1218, "step": 40570 }, { "epoch": 0.4948170731707317, "grad_norm": 0.6911836266517639, "learning_rate": 1.6701219512195125e-05, "loss": 0.0917, "step": 40575 }, { "epoch": 0.4948780487804878, "grad_norm": 0.46756404638290405, "learning_rate": 1.6700813008130083e-05, "loss": 0.0869, "step": 40580 }, { "epoch": 0.4949390243902439, "grad_norm": 0.9425208568572998, "learning_rate": 1.670040650406504e-05, "loss": 0.0854, "step": 40585 }, { "epoch": 0.495, "grad_norm": 1.014233112335205, "learning_rate": 1.67e-05, "loss": 0.0566, "step": 40590 }, { "epoch": 0.4950609756097561, "grad_norm": 0.293304443359375, "learning_rate": 1.669959349593496e-05, "loss": 0.0711, "step": 40595 }, { "epoch": 0.4951219512195122, "grad_norm": 1.2862995862960815, "learning_rate": 1.669918699186992e-05, "loss": 0.087, "step": 40600 }, { "epoch": 0.4951829268292683, "grad_norm": 0.963205873966217, "learning_rate": 1.669878048780488e-05, "loss": 0.094, "step": 40605 }, { "epoch": 0.4952439024390244, "grad_norm": 0.5441936254501343, "learning_rate": 1.669837398373984e-05, "loss": 0.0737, "step": 40610 }, { "epoch": 0.4953048780487805, "grad_norm": 0.8511312007904053, "learning_rate": 1.6697967479674797e-05, "loss": 0.0568, "step": 40615 }, { "epoch": 0.4953658536585366, "grad_norm": 1.9842767715454102, "learning_rate": 1.6697560975609758e-05, "loss": 0.106, "step": 40620 }, { "epoch": 0.4954268292682927, "grad_norm": 0.6493752598762512, "learning_rate": 1.6697154471544716e-05, "loss": 0.0535, "step": 40625 }, { "epoch": 0.4954878048780488, "grad_norm": 0.5148288607597351, "learning_rate": 1.6696747967479675e-05, "loss": 0.0695, "step": 40630 }, { "epoch": 0.4955487804878049, "grad_norm": 1.389815092086792, "learning_rate": 1.6696341463414636e-05, "loss": 0.0856, "step": 40635 }, { "epoch": 0.49560975609756097, "grad_norm": 0.8282593488693237, "learning_rate": 1.6695934959349594e-05, "loss": 0.1046, "step": 40640 }, { "epoch": 0.49567073170731707, "grad_norm": 0.8305265307426453, "learning_rate": 1.6695528455284552e-05, "loss": 0.064, "step": 40645 }, { "epoch": 0.49573170731707317, "grad_norm": 0.5078139901161194, "learning_rate": 1.6695121951219514e-05, "loss": 0.0669, "step": 40650 }, { "epoch": 0.49579268292682926, "grad_norm": 0.5305932760238647, "learning_rate": 1.6694715447154472e-05, "loss": 0.0756, "step": 40655 }, { "epoch": 0.49585365853658536, "grad_norm": 0.6521833539009094, "learning_rate": 1.6694308943089433e-05, "loss": 0.1106, "step": 40660 }, { "epoch": 0.49591463414634146, "grad_norm": 0.3934141993522644, "learning_rate": 1.669390243902439e-05, "loss": 0.0828, "step": 40665 }, { "epoch": 0.49597560975609756, "grad_norm": 0.9421596527099609, "learning_rate": 1.6693495934959353e-05, "loss": 0.0856, "step": 40670 }, { "epoch": 0.49603658536585366, "grad_norm": 0.7230616807937622, "learning_rate": 1.6693089430894308e-05, "loss": 0.0541, "step": 40675 }, { "epoch": 0.49609756097560975, "grad_norm": 0.5098695158958435, "learning_rate": 1.669268292682927e-05, "loss": 0.0918, "step": 40680 }, { "epoch": 0.49615853658536585, "grad_norm": 0.9798615574836731, "learning_rate": 1.6692276422764227e-05, "loss": 0.0491, "step": 40685 }, { "epoch": 0.49621951219512195, "grad_norm": 0.35043928027153015, "learning_rate": 1.669186991869919e-05, "loss": 0.0591, "step": 40690 }, { "epoch": 0.49628048780487805, "grad_norm": 0.5480707287788391, "learning_rate": 1.6691463414634147e-05, "loss": 0.0853, "step": 40695 }, { "epoch": 0.49634146341463414, "grad_norm": 0.6987600922584534, "learning_rate": 1.669105691056911e-05, "loss": 0.1015, "step": 40700 }, { "epoch": 0.49640243902439024, "grad_norm": 0.2938224673271179, "learning_rate": 1.6690650406504067e-05, "loss": 0.0905, "step": 40705 }, { "epoch": 0.49646341463414634, "grad_norm": 0.7496236562728882, "learning_rate": 1.6690243902439025e-05, "loss": 0.0792, "step": 40710 }, { "epoch": 0.49652439024390244, "grad_norm": 0.5119480490684509, "learning_rate": 1.6689837398373986e-05, "loss": 0.0892, "step": 40715 }, { "epoch": 0.49658536585365853, "grad_norm": 0.6685272455215454, "learning_rate": 1.6689430894308945e-05, "loss": 0.0575, "step": 40720 }, { "epoch": 0.49664634146341463, "grad_norm": 0.7267563939094543, "learning_rate": 1.6689024390243903e-05, "loss": 0.0973, "step": 40725 }, { "epoch": 0.49670731707317073, "grad_norm": 0.3513592779636383, "learning_rate": 1.6688617886178864e-05, "loss": 0.061, "step": 40730 }, { "epoch": 0.4967682926829268, "grad_norm": 0.5424047708511353, "learning_rate": 1.6688211382113822e-05, "loss": 0.0583, "step": 40735 }, { "epoch": 0.4968292682926829, "grad_norm": 0.8335878252983093, "learning_rate": 1.668780487804878e-05, "loss": 0.0551, "step": 40740 }, { "epoch": 0.496890243902439, "grad_norm": 0.40576350688934326, "learning_rate": 1.6687398373983742e-05, "loss": 0.0623, "step": 40745 }, { "epoch": 0.4969512195121951, "grad_norm": 0.7793824076652527, "learning_rate": 1.66869918699187e-05, "loss": 0.0779, "step": 40750 }, { "epoch": 0.4970121951219512, "grad_norm": 0.973307728767395, "learning_rate": 1.668658536585366e-05, "loss": 0.0697, "step": 40755 }, { "epoch": 0.4970731707317073, "grad_norm": 0.752021312713623, "learning_rate": 1.668617886178862e-05, "loss": 0.0583, "step": 40760 }, { "epoch": 0.4971341463414634, "grad_norm": 0.45145106315612793, "learning_rate": 1.6685772357723578e-05, "loss": 0.0697, "step": 40765 }, { "epoch": 0.4971951219512195, "grad_norm": 0.7595183849334717, "learning_rate": 1.6685365853658536e-05, "loss": 0.0758, "step": 40770 }, { "epoch": 0.4972560975609756, "grad_norm": 0.9246714115142822, "learning_rate": 1.6684959349593498e-05, "loss": 0.0782, "step": 40775 }, { "epoch": 0.4973170731707317, "grad_norm": 0.39007043838500977, "learning_rate": 1.6684552845528456e-05, "loss": 0.0803, "step": 40780 }, { "epoch": 0.4973780487804878, "grad_norm": 1.0408514738082886, "learning_rate": 1.6684146341463417e-05, "loss": 0.0609, "step": 40785 }, { "epoch": 0.4974390243902439, "grad_norm": 0.5153638124465942, "learning_rate": 1.6683739837398375e-05, "loss": 0.0945, "step": 40790 }, { "epoch": 0.4975, "grad_norm": 0.5872447490692139, "learning_rate": 1.6683333333333333e-05, "loss": 0.0449, "step": 40795 }, { "epoch": 0.4975609756097561, "grad_norm": 0.7464507818222046, "learning_rate": 1.6682926829268295e-05, "loss": 0.06, "step": 40800 }, { "epoch": 0.4976219512195122, "grad_norm": 0.43057242035865784, "learning_rate": 1.6682520325203253e-05, "loss": 0.0698, "step": 40805 }, { "epoch": 0.4976829268292683, "grad_norm": 0.778374969959259, "learning_rate": 1.668211382113821e-05, "loss": 0.1304, "step": 40810 }, { "epoch": 0.4977439024390244, "grad_norm": 1.5446484088897705, "learning_rate": 1.6681707317073173e-05, "loss": 0.0608, "step": 40815 }, { "epoch": 0.4978048780487805, "grad_norm": 0.63277268409729, "learning_rate": 1.668130081300813e-05, "loss": 0.0693, "step": 40820 }, { "epoch": 0.4978658536585366, "grad_norm": 0.5700511932373047, "learning_rate": 1.668089430894309e-05, "loss": 0.0577, "step": 40825 }, { "epoch": 0.4979268292682927, "grad_norm": 0.6186534762382507, "learning_rate": 1.668048780487805e-05, "loss": 0.0732, "step": 40830 }, { "epoch": 0.4979878048780488, "grad_norm": 0.7371650338172913, "learning_rate": 1.668008130081301e-05, "loss": 0.1367, "step": 40835 }, { "epoch": 0.4980487804878049, "grad_norm": 0.504916250705719, "learning_rate": 1.667967479674797e-05, "loss": 0.0827, "step": 40840 }, { "epoch": 0.498109756097561, "grad_norm": 0.40194466710090637, "learning_rate": 1.6679268292682928e-05, "loss": 0.0609, "step": 40845 }, { "epoch": 0.49817073170731707, "grad_norm": 2.133108139038086, "learning_rate": 1.667886178861789e-05, "loss": 0.0737, "step": 40850 }, { "epoch": 0.49823170731707317, "grad_norm": 0.7695972919464111, "learning_rate": 1.6678455284552844e-05, "loss": 0.0635, "step": 40855 }, { "epoch": 0.49829268292682927, "grad_norm": 0.767894983291626, "learning_rate": 1.6678048780487806e-05, "loss": 0.0632, "step": 40860 }, { "epoch": 0.49835365853658536, "grad_norm": 0.5077142715454102, "learning_rate": 1.6677642276422764e-05, "loss": 0.0717, "step": 40865 }, { "epoch": 0.49841463414634146, "grad_norm": 0.530084490776062, "learning_rate": 1.6677235772357726e-05, "loss": 0.083, "step": 40870 }, { "epoch": 0.49847560975609756, "grad_norm": 0.32228678464889526, "learning_rate": 1.6676829268292684e-05, "loss": 0.0482, "step": 40875 }, { "epoch": 0.49853658536585366, "grad_norm": 0.6611551642417908, "learning_rate": 1.6676422764227645e-05, "loss": 0.0806, "step": 40880 }, { "epoch": 0.49859756097560975, "grad_norm": 1.3335611820220947, "learning_rate": 1.6676016260162603e-05, "loss": 0.0582, "step": 40885 }, { "epoch": 0.49865853658536585, "grad_norm": 1.0733963251113892, "learning_rate": 1.667560975609756e-05, "loss": 0.0491, "step": 40890 }, { "epoch": 0.49871951219512195, "grad_norm": 1.0495457649230957, "learning_rate": 1.667520325203252e-05, "loss": 0.0839, "step": 40895 }, { "epoch": 0.49878048780487805, "grad_norm": 0.3539698123931885, "learning_rate": 1.667479674796748e-05, "loss": 0.0986, "step": 40900 }, { "epoch": 0.49884146341463415, "grad_norm": 0.3335977792739868, "learning_rate": 1.667439024390244e-05, "loss": 0.0607, "step": 40905 }, { "epoch": 0.49890243902439024, "grad_norm": 0.6590673327445984, "learning_rate": 1.66739837398374e-05, "loss": 0.0694, "step": 40910 }, { "epoch": 0.49896341463414634, "grad_norm": 0.9684322476387024, "learning_rate": 1.667357723577236e-05, "loss": 0.0876, "step": 40915 }, { "epoch": 0.49902439024390244, "grad_norm": 0.8694398999214172, "learning_rate": 1.6673170731707317e-05, "loss": 0.0834, "step": 40920 }, { "epoch": 0.49908536585365854, "grad_norm": 0.4251508116722107, "learning_rate": 1.667276422764228e-05, "loss": 0.0524, "step": 40925 }, { "epoch": 0.49914634146341463, "grad_norm": 0.4052133560180664, "learning_rate": 1.6672357723577237e-05, "loss": 0.0675, "step": 40930 }, { "epoch": 0.49920731707317073, "grad_norm": 0.5893931984901428, "learning_rate": 1.6671951219512198e-05, "loss": 0.0568, "step": 40935 }, { "epoch": 0.49926829268292683, "grad_norm": 0.6117910146713257, "learning_rate": 1.6671544715447156e-05, "loss": 0.0674, "step": 40940 }, { "epoch": 0.4993292682926829, "grad_norm": 1.325364589691162, "learning_rate": 1.6671138211382115e-05, "loss": 0.0739, "step": 40945 }, { "epoch": 0.499390243902439, "grad_norm": 0.4380321204662323, "learning_rate": 1.6670731707317073e-05, "loss": 0.0687, "step": 40950 }, { "epoch": 0.4994512195121951, "grad_norm": 1.1140329837799072, "learning_rate": 1.6670325203252034e-05, "loss": 0.0516, "step": 40955 }, { "epoch": 0.4995121951219512, "grad_norm": 0.6106957793235779, "learning_rate": 1.6669918699186992e-05, "loss": 0.084, "step": 40960 }, { "epoch": 0.4995731707317073, "grad_norm": 0.8633924126625061, "learning_rate": 1.6669512195121954e-05, "loss": 0.0821, "step": 40965 }, { "epoch": 0.4996341463414634, "grad_norm": 0.45201602578163147, "learning_rate": 1.6669105691056912e-05, "loss": 0.0533, "step": 40970 }, { "epoch": 0.4996951219512195, "grad_norm": 0.376871794462204, "learning_rate": 1.666869918699187e-05, "loss": 0.0647, "step": 40975 }, { "epoch": 0.4997560975609756, "grad_norm": 0.9629954099655151, "learning_rate": 1.666829268292683e-05, "loss": 0.1011, "step": 40980 }, { "epoch": 0.4998170731707317, "grad_norm": 1.0563316345214844, "learning_rate": 1.666788617886179e-05, "loss": 0.0864, "step": 40985 }, { "epoch": 0.4998780487804878, "grad_norm": 0.609881579875946, "learning_rate": 1.6667479674796748e-05, "loss": 0.0498, "step": 40990 }, { "epoch": 0.4999390243902439, "grad_norm": 0.49890702962875366, "learning_rate": 1.666707317073171e-05, "loss": 0.0361, "step": 40995 }, { "epoch": 0.5, "grad_norm": 0.4290303885936737, "learning_rate": 1.6666666666666667e-05, "loss": 0.0451, "step": 41000 }, { "epoch": 0.500060975609756, "grad_norm": 0.6446478366851807, "learning_rate": 1.666626016260163e-05, "loss": 0.0684, "step": 41005 }, { "epoch": 0.5001219512195122, "grad_norm": 0.39346763491630554, "learning_rate": 1.6665853658536587e-05, "loss": 0.042, "step": 41010 }, { "epoch": 0.5001829268292682, "grad_norm": 0.4361616373062134, "learning_rate": 1.6665447154471545e-05, "loss": 0.0955, "step": 41015 }, { "epoch": 0.5002439024390244, "grad_norm": 0.8088859915733337, "learning_rate": 1.6665040650406507e-05, "loss": 0.0626, "step": 41020 }, { "epoch": 0.5003048780487804, "grad_norm": 2.1286535263061523, "learning_rate": 1.6664634146341465e-05, "loss": 0.0629, "step": 41025 }, { "epoch": 0.5003658536585366, "grad_norm": 0.7800548672676086, "learning_rate": 1.6664227642276426e-05, "loss": 0.1035, "step": 41030 }, { "epoch": 0.5004268292682926, "grad_norm": 0.43841636180877686, "learning_rate": 1.6663821138211385e-05, "loss": 0.0363, "step": 41035 }, { "epoch": 0.5004878048780488, "grad_norm": 1.1760005950927734, "learning_rate": 1.6663414634146343e-05, "loss": 0.0834, "step": 41040 }, { "epoch": 0.5005487804878048, "grad_norm": 0.5117418169975281, "learning_rate": 1.66630081300813e-05, "loss": 0.0717, "step": 41045 }, { "epoch": 0.500609756097561, "grad_norm": 0.7727751135826111, "learning_rate": 1.6662601626016262e-05, "loss": 0.0813, "step": 41050 }, { "epoch": 0.500670731707317, "grad_norm": 0.5149505734443665, "learning_rate": 1.666219512195122e-05, "loss": 0.087, "step": 41055 }, { "epoch": 0.5007317073170732, "grad_norm": 0.8686973452568054, "learning_rate": 1.6661788617886182e-05, "loss": 0.0967, "step": 41060 }, { "epoch": 0.5007926829268292, "grad_norm": 0.5463046431541443, "learning_rate": 1.666138211382114e-05, "loss": 0.0693, "step": 41065 }, { "epoch": 0.5008536585365854, "grad_norm": 0.5179818272590637, "learning_rate": 1.6660975609756098e-05, "loss": 0.0614, "step": 41070 }, { "epoch": 0.5009146341463414, "grad_norm": 4.192155361175537, "learning_rate": 1.6660569105691056e-05, "loss": 0.075, "step": 41075 }, { "epoch": 0.5009756097560976, "grad_norm": 0.815808117389679, "learning_rate": 1.6660162601626018e-05, "loss": 0.0567, "step": 41080 }, { "epoch": 0.5010365853658536, "grad_norm": 0.6752005815505981, "learning_rate": 1.6659756097560976e-05, "loss": 0.101, "step": 41085 }, { "epoch": 0.5010975609756098, "grad_norm": 0.48968061804771423, "learning_rate": 1.6659349593495937e-05, "loss": 0.0621, "step": 41090 }, { "epoch": 0.5011585365853658, "grad_norm": 0.6651204228401184, "learning_rate": 1.6658943089430896e-05, "loss": 0.0572, "step": 41095 }, { "epoch": 0.501219512195122, "grad_norm": 0.5313819050788879, "learning_rate": 1.6658536585365854e-05, "loss": 0.059, "step": 41100 }, { "epoch": 0.501280487804878, "grad_norm": 0.6782267689704895, "learning_rate": 1.6658130081300815e-05, "loss": 0.0438, "step": 41105 }, { "epoch": 0.5013414634146341, "grad_norm": 0.8051092624664307, "learning_rate": 1.6657723577235773e-05, "loss": 0.0752, "step": 41110 }, { "epoch": 0.5014024390243902, "grad_norm": 0.6932505965232849, "learning_rate": 1.6657317073170735e-05, "loss": 0.0976, "step": 41115 }, { "epoch": 0.5014634146341463, "grad_norm": 0.5411792993545532, "learning_rate": 1.6656910569105693e-05, "loss": 0.0612, "step": 41120 }, { "epoch": 0.5015243902439024, "grad_norm": 0.7541526556015015, "learning_rate": 1.6656504065040655e-05, "loss": 0.0567, "step": 41125 }, { "epoch": 0.5015853658536585, "grad_norm": 0.7101311683654785, "learning_rate": 1.665609756097561e-05, "loss": 0.0785, "step": 41130 }, { "epoch": 0.5016463414634146, "grad_norm": 0.5424279570579529, "learning_rate": 1.665569105691057e-05, "loss": 0.0831, "step": 41135 }, { "epoch": 0.5017073170731707, "grad_norm": 0.48810967803001404, "learning_rate": 1.665528455284553e-05, "loss": 0.0609, "step": 41140 }, { "epoch": 0.5017682926829268, "grad_norm": 0.6085863709449768, "learning_rate": 1.665487804878049e-05, "loss": 0.0729, "step": 41145 }, { "epoch": 0.5018292682926829, "grad_norm": 1.0492141246795654, "learning_rate": 1.665447154471545e-05, "loss": 0.0549, "step": 41150 }, { "epoch": 0.501890243902439, "grad_norm": 0.7767472863197327, "learning_rate": 1.665406504065041e-05, "loss": 0.0698, "step": 41155 }, { "epoch": 0.5019512195121951, "grad_norm": 0.6405445337295532, "learning_rate": 1.6653658536585365e-05, "loss": 0.0375, "step": 41160 }, { "epoch": 0.5020121951219512, "grad_norm": 0.8385785818099976, "learning_rate": 1.6653252032520326e-05, "loss": 0.0427, "step": 41165 }, { "epoch": 0.5020731707317073, "grad_norm": 0.4810633659362793, "learning_rate": 1.6652845528455284e-05, "loss": 0.0731, "step": 41170 }, { "epoch": 0.5021341463414634, "grad_norm": 3.95995831489563, "learning_rate": 1.6652439024390246e-05, "loss": 0.0874, "step": 41175 }, { "epoch": 0.5021951219512195, "grad_norm": 0.29739484190940857, "learning_rate": 1.6652032520325204e-05, "loss": 0.0654, "step": 41180 }, { "epoch": 0.5022560975609756, "grad_norm": 0.9811228513717651, "learning_rate": 1.6651626016260166e-05, "loss": 0.0824, "step": 41185 }, { "epoch": 0.5023170731707317, "grad_norm": 0.6579861044883728, "learning_rate": 1.6651219512195124e-05, "loss": 0.1044, "step": 41190 }, { "epoch": 0.5023780487804878, "grad_norm": 1.0083692073822021, "learning_rate": 1.6650813008130082e-05, "loss": 0.0573, "step": 41195 }, { "epoch": 0.5024390243902439, "grad_norm": 0.5903338193893433, "learning_rate": 1.6650406504065043e-05, "loss": 0.0601, "step": 41200 }, { "epoch": 0.5025, "grad_norm": 0.999758780002594, "learning_rate": 1.665e-05, "loss": 0.051, "step": 41205 }, { "epoch": 0.5025609756097561, "grad_norm": 0.5213822722434998, "learning_rate": 1.6649593495934963e-05, "loss": 0.0791, "step": 41210 }, { "epoch": 0.5026219512195121, "grad_norm": 0.6080747842788696, "learning_rate": 1.664918699186992e-05, "loss": 0.0667, "step": 41215 }, { "epoch": 0.5026829268292683, "grad_norm": 0.36960819363594055, "learning_rate": 1.664878048780488e-05, "loss": 0.0502, "step": 41220 }, { "epoch": 0.5027439024390243, "grad_norm": 0.6987136602401733, "learning_rate": 1.6648373983739837e-05, "loss": 0.0869, "step": 41225 }, { "epoch": 0.5028048780487805, "grad_norm": 0.38721656799316406, "learning_rate": 1.66479674796748e-05, "loss": 0.0808, "step": 41230 }, { "epoch": 0.5028658536585365, "grad_norm": 0.8067484498023987, "learning_rate": 1.6647560975609757e-05, "loss": 0.1099, "step": 41235 }, { "epoch": 0.5029268292682927, "grad_norm": 0.6968461275100708, "learning_rate": 1.664715447154472e-05, "loss": 0.0704, "step": 41240 }, { "epoch": 0.5029878048780487, "grad_norm": 1.0960620641708374, "learning_rate": 1.6646747967479677e-05, "loss": 0.0865, "step": 41245 }, { "epoch": 0.5030487804878049, "grad_norm": 2.147622585296631, "learning_rate": 1.6646341463414635e-05, "loss": 0.0701, "step": 41250 }, { "epoch": 0.5031097560975609, "grad_norm": 0.8963888883590698, "learning_rate": 1.6645934959349593e-05, "loss": 0.0491, "step": 41255 }, { "epoch": 0.5031707317073171, "grad_norm": 0.3477191627025604, "learning_rate": 1.6645528455284554e-05, "loss": 0.0772, "step": 41260 }, { "epoch": 0.5032317073170731, "grad_norm": 0.7663754820823669, "learning_rate": 1.6645121951219513e-05, "loss": 0.058, "step": 41265 }, { "epoch": 0.5032926829268293, "grad_norm": 0.3540864884853363, "learning_rate": 1.6644715447154474e-05, "loss": 0.0508, "step": 41270 }, { "epoch": 0.5033536585365853, "grad_norm": 0.6262661218643188, "learning_rate": 1.6644308943089432e-05, "loss": 0.0795, "step": 41275 }, { "epoch": 0.5034146341463415, "grad_norm": 0.36535969376564026, "learning_rate": 1.664390243902439e-05, "loss": 0.0423, "step": 41280 }, { "epoch": 0.5034756097560975, "grad_norm": 0.5662842392921448, "learning_rate": 1.6643495934959352e-05, "loss": 0.0686, "step": 41285 }, { "epoch": 0.5035365853658537, "grad_norm": 0.9670826196670532, "learning_rate": 1.664308943089431e-05, "loss": 0.0754, "step": 41290 }, { "epoch": 0.5035975609756097, "grad_norm": 0.6811863780021667, "learning_rate": 1.664268292682927e-05, "loss": 0.1061, "step": 41295 }, { "epoch": 0.5036585365853659, "grad_norm": 0.5744580626487732, "learning_rate": 1.664227642276423e-05, "loss": 0.0571, "step": 41300 }, { "epoch": 0.5037195121951219, "grad_norm": 0.9857569932937622, "learning_rate": 1.6641869918699188e-05, "loss": 0.082, "step": 41305 }, { "epoch": 0.503780487804878, "grad_norm": 0.5536259412765503, "learning_rate": 1.6641463414634146e-05, "loss": 0.0683, "step": 41310 }, { "epoch": 0.5038414634146341, "grad_norm": 0.30299389362335205, "learning_rate": 1.6641056910569107e-05, "loss": 0.0737, "step": 41315 }, { "epoch": 0.5039024390243902, "grad_norm": 0.448758065700531, "learning_rate": 1.6640650406504066e-05, "loss": 0.0786, "step": 41320 }, { "epoch": 0.5039634146341463, "grad_norm": 0.4958294630050659, "learning_rate": 1.6640243902439027e-05, "loss": 0.1096, "step": 41325 }, { "epoch": 0.5040243902439024, "grad_norm": 0.7251121401786804, "learning_rate": 1.6639837398373985e-05, "loss": 0.0672, "step": 41330 }, { "epoch": 0.5040853658536585, "grad_norm": 1.1900025606155396, "learning_rate": 1.6639430894308947e-05, "loss": 0.0646, "step": 41335 }, { "epoch": 0.5041463414634146, "grad_norm": 0.4528842270374298, "learning_rate": 1.66390243902439e-05, "loss": 0.0469, "step": 41340 }, { "epoch": 0.5042073170731707, "grad_norm": 0.7840995788574219, "learning_rate": 1.6638617886178863e-05, "loss": 0.0597, "step": 41345 }, { "epoch": 0.5042682926829268, "grad_norm": 0.5812646150588989, "learning_rate": 1.663821138211382e-05, "loss": 0.0583, "step": 41350 }, { "epoch": 0.5043292682926829, "grad_norm": 0.8051973581314087, "learning_rate": 1.6637804878048783e-05, "loss": 0.0931, "step": 41355 }, { "epoch": 0.504390243902439, "grad_norm": 0.7165017127990723, "learning_rate": 1.663739837398374e-05, "loss": 0.1039, "step": 41360 }, { "epoch": 0.5044512195121951, "grad_norm": 0.5486645698547363, "learning_rate": 1.6636991869918702e-05, "loss": 0.0496, "step": 41365 }, { "epoch": 0.5045121951219512, "grad_norm": 0.6384528875350952, "learning_rate": 1.663658536585366e-05, "loss": 0.0704, "step": 41370 }, { "epoch": 0.5045731707317073, "grad_norm": 0.6254171133041382, "learning_rate": 1.663617886178862e-05, "loss": 0.0603, "step": 41375 }, { "epoch": 0.5046341463414634, "grad_norm": 1.1116679906845093, "learning_rate": 1.663577235772358e-05, "loss": 0.0719, "step": 41380 }, { "epoch": 0.5046951219512195, "grad_norm": 0.7776381373405457, "learning_rate": 1.6635365853658538e-05, "loss": 0.0369, "step": 41385 }, { "epoch": 0.5047560975609756, "grad_norm": 0.8585925698280334, "learning_rate": 1.66349593495935e-05, "loss": 0.0755, "step": 41390 }, { "epoch": 0.5048170731707317, "grad_norm": 0.5524944067001343, "learning_rate": 1.6634552845528458e-05, "loss": 0.0765, "step": 41395 }, { "epoch": 0.5048780487804878, "grad_norm": 0.5360748767852783, "learning_rate": 1.6634146341463416e-05, "loss": 0.0598, "step": 41400 }, { "epoch": 0.5049390243902439, "grad_norm": 0.5114712119102478, "learning_rate": 1.6633739837398374e-05, "loss": 0.0715, "step": 41405 }, { "epoch": 0.505, "grad_norm": 0.719463586807251, "learning_rate": 1.6633333333333336e-05, "loss": 0.0751, "step": 41410 }, { "epoch": 0.505060975609756, "grad_norm": 0.432214617729187, "learning_rate": 1.6632926829268294e-05, "loss": 0.0503, "step": 41415 }, { "epoch": 0.5051219512195122, "grad_norm": 1.179665207862854, "learning_rate": 1.6632520325203255e-05, "loss": 0.085, "step": 41420 }, { "epoch": 0.5051829268292682, "grad_norm": 0.3330470025539398, "learning_rate": 1.6632113821138213e-05, "loss": 0.0576, "step": 41425 }, { "epoch": 0.5052439024390244, "grad_norm": 0.5965701341629028, "learning_rate": 1.663170731707317e-05, "loss": 0.082, "step": 41430 }, { "epoch": 0.5053048780487804, "grad_norm": 0.5422409176826477, "learning_rate": 1.663130081300813e-05, "loss": 0.0613, "step": 41435 }, { "epoch": 0.5053658536585366, "grad_norm": 0.34726208448410034, "learning_rate": 1.663089430894309e-05, "loss": 0.0779, "step": 41440 }, { "epoch": 0.5054268292682926, "grad_norm": 0.7938610911369324, "learning_rate": 1.663048780487805e-05, "loss": 0.0807, "step": 41445 }, { "epoch": 0.5054878048780488, "grad_norm": 0.5896340012550354, "learning_rate": 1.663008130081301e-05, "loss": 0.0612, "step": 41450 }, { "epoch": 0.5055487804878048, "grad_norm": 0.42128410935401917, "learning_rate": 1.662967479674797e-05, "loss": 0.1005, "step": 41455 }, { "epoch": 0.505609756097561, "grad_norm": 0.600800096988678, "learning_rate": 1.6629268292682927e-05, "loss": 0.0724, "step": 41460 }, { "epoch": 0.505670731707317, "grad_norm": 1.3729937076568604, "learning_rate": 1.662886178861789e-05, "loss": 0.0886, "step": 41465 }, { "epoch": 0.5057317073170732, "grad_norm": 0.4168824255466461, "learning_rate": 1.6628455284552847e-05, "loss": 0.0928, "step": 41470 }, { "epoch": 0.5057926829268292, "grad_norm": 0.6853724122047424, "learning_rate": 1.6628048780487808e-05, "loss": 0.0666, "step": 41475 }, { "epoch": 0.5058536585365854, "grad_norm": 0.42237988114356995, "learning_rate": 1.6627642276422766e-05, "loss": 0.0761, "step": 41480 }, { "epoch": 0.5059146341463414, "grad_norm": 0.4911814332008362, "learning_rate": 1.6627235772357724e-05, "loss": 0.06, "step": 41485 }, { "epoch": 0.5059756097560976, "grad_norm": 0.7243801951408386, "learning_rate": 1.6626829268292683e-05, "loss": 0.0548, "step": 41490 }, { "epoch": 0.5060365853658536, "grad_norm": 0.7215788960456848, "learning_rate": 1.6626422764227644e-05, "loss": 0.072, "step": 41495 }, { "epoch": 0.5060975609756098, "grad_norm": 0.4008924067020416, "learning_rate": 1.6626016260162602e-05, "loss": 0.0583, "step": 41500 }, { "epoch": 0.5061585365853658, "grad_norm": 0.6060939431190491, "learning_rate": 1.6625609756097564e-05, "loss": 0.0648, "step": 41505 }, { "epoch": 0.506219512195122, "grad_norm": 1.721359372138977, "learning_rate": 1.6625203252032522e-05, "loss": 0.0877, "step": 41510 }, { "epoch": 0.506280487804878, "grad_norm": 1.4889274835586548, "learning_rate": 1.6624796747967483e-05, "loss": 0.0427, "step": 41515 }, { "epoch": 0.5063414634146342, "grad_norm": 0.729904294013977, "learning_rate": 1.6624390243902438e-05, "loss": 0.064, "step": 41520 }, { "epoch": 0.5064024390243902, "grad_norm": 0.8916338086128235, "learning_rate": 1.66239837398374e-05, "loss": 0.0574, "step": 41525 }, { "epoch": 0.5064634146341463, "grad_norm": 1.253117561340332, "learning_rate": 1.6623577235772358e-05, "loss": 0.0629, "step": 41530 }, { "epoch": 0.5065243902439024, "grad_norm": 0.6027287244796753, "learning_rate": 1.662317073170732e-05, "loss": 0.0966, "step": 41535 }, { "epoch": 0.5065853658536585, "grad_norm": 0.7328367829322815, "learning_rate": 1.6622764227642277e-05, "loss": 0.067, "step": 41540 }, { "epoch": 0.5066463414634146, "grad_norm": 0.541625440120697, "learning_rate": 1.662235772357724e-05, "loss": 0.0926, "step": 41545 }, { "epoch": 0.5067073170731707, "grad_norm": 0.4602893888950348, "learning_rate": 1.6621951219512197e-05, "loss": 0.0627, "step": 41550 }, { "epoch": 0.5067682926829268, "grad_norm": 0.47631698846817017, "learning_rate": 1.6621544715447155e-05, "loss": 0.0725, "step": 41555 }, { "epoch": 0.5068292682926829, "grad_norm": 0.8600790500640869, "learning_rate": 1.6621138211382117e-05, "loss": 0.0866, "step": 41560 }, { "epoch": 0.506890243902439, "grad_norm": 1.138753890991211, "learning_rate": 1.6620731707317075e-05, "loss": 0.0869, "step": 41565 }, { "epoch": 0.5069512195121951, "grad_norm": 0.4089665412902832, "learning_rate": 1.6620325203252033e-05, "loss": 0.0596, "step": 41570 }, { "epoch": 0.5070121951219512, "grad_norm": 0.68991619348526, "learning_rate": 1.6619918699186994e-05, "loss": 0.0574, "step": 41575 }, { "epoch": 0.5070731707317073, "grad_norm": 0.9743543863296509, "learning_rate": 1.6619512195121953e-05, "loss": 0.0656, "step": 41580 }, { "epoch": 0.5071341463414634, "grad_norm": 0.5677157640457153, "learning_rate": 1.661910569105691e-05, "loss": 0.0502, "step": 41585 }, { "epoch": 0.5071951219512195, "grad_norm": 0.8770663142204285, "learning_rate": 1.6618699186991872e-05, "loss": 0.0601, "step": 41590 }, { "epoch": 0.5072560975609756, "grad_norm": 0.6087868213653564, "learning_rate": 1.661829268292683e-05, "loss": 0.0504, "step": 41595 }, { "epoch": 0.5073170731707317, "grad_norm": 0.8504387736320496, "learning_rate": 1.6617886178861792e-05, "loss": 0.0531, "step": 41600 }, { "epoch": 0.5073780487804878, "grad_norm": 0.6577669382095337, "learning_rate": 1.661747967479675e-05, "loss": 0.0957, "step": 41605 }, { "epoch": 0.5074390243902439, "grad_norm": 0.537520706653595, "learning_rate": 1.6617073170731708e-05, "loss": 0.0402, "step": 41610 }, { "epoch": 0.5075, "grad_norm": 0.649164617061615, "learning_rate": 1.6616666666666666e-05, "loss": 0.0518, "step": 41615 }, { "epoch": 0.5075609756097561, "grad_norm": 0.8748321533203125, "learning_rate": 1.6616260162601628e-05, "loss": 0.055, "step": 41620 }, { "epoch": 0.5076219512195121, "grad_norm": 0.6294457912445068, "learning_rate": 1.6615853658536586e-05, "loss": 0.0606, "step": 41625 }, { "epoch": 0.5076829268292683, "grad_norm": 0.8291483521461487, "learning_rate": 1.6615447154471547e-05, "loss": 0.0589, "step": 41630 }, { "epoch": 0.5077439024390243, "grad_norm": 0.4585660696029663, "learning_rate": 1.6615040650406506e-05, "loss": 0.0518, "step": 41635 }, { "epoch": 0.5078048780487805, "grad_norm": 0.6893411874771118, "learning_rate": 1.6614634146341464e-05, "loss": 0.078, "step": 41640 }, { "epoch": 0.5078658536585365, "grad_norm": 0.8765332102775574, "learning_rate": 1.6614227642276425e-05, "loss": 0.0703, "step": 41645 }, { "epoch": 0.5079268292682927, "grad_norm": 0.5887928009033203, "learning_rate": 1.6613821138211383e-05, "loss": 0.0558, "step": 41650 }, { "epoch": 0.5079878048780487, "grad_norm": 0.5647588968276978, "learning_rate": 1.6613414634146345e-05, "loss": 0.0461, "step": 41655 }, { "epoch": 0.5080487804878049, "grad_norm": 0.4135199785232544, "learning_rate": 1.6613008130081303e-05, "loss": 0.0383, "step": 41660 }, { "epoch": 0.5081097560975609, "grad_norm": 0.42031699419021606, "learning_rate": 1.661260162601626e-05, "loss": 0.0723, "step": 41665 }, { "epoch": 0.5081707317073171, "grad_norm": 0.41200917959213257, "learning_rate": 1.661219512195122e-05, "loss": 0.0764, "step": 41670 }, { "epoch": 0.5082317073170731, "grad_norm": 0.8643887042999268, "learning_rate": 1.661178861788618e-05, "loss": 0.0766, "step": 41675 }, { "epoch": 0.5082926829268293, "grad_norm": 0.2781254053115845, "learning_rate": 1.661138211382114e-05, "loss": 0.1225, "step": 41680 }, { "epoch": 0.5083536585365853, "grad_norm": 0.33967843651771545, "learning_rate": 1.66109756097561e-05, "loss": 0.0669, "step": 41685 }, { "epoch": 0.5084146341463415, "grad_norm": 0.4346765875816345, "learning_rate": 1.661056910569106e-05, "loss": 0.0429, "step": 41690 }, { "epoch": 0.5084756097560975, "grad_norm": 0.5987429618835449, "learning_rate": 1.661016260162602e-05, "loss": 0.0575, "step": 41695 }, { "epoch": 0.5085365853658537, "grad_norm": 0.6531281471252441, "learning_rate": 1.6609756097560975e-05, "loss": 0.0702, "step": 41700 }, { "epoch": 0.5085975609756097, "grad_norm": 1.183536410331726, "learning_rate": 1.6609349593495936e-05, "loss": 0.1031, "step": 41705 }, { "epoch": 0.5086585365853659, "grad_norm": 0.6263442635536194, "learning_rate": 1.6608943089430894e-05, "loss": 0.083, "step": 41710 }, { "epoch": 0.5087195121951219, "grad_norm": 0.7086445093154907, "learning_rate": 1.6608536585365856e-05, "loss": 0.0803, "step": 41715 }, { "epoch": 0.5087804878048781, "grad_norm": 0.39466172456741333, "learning_rate": 1.6608130081300814e-05, "loss": 0.0827, "step": 41720 }, { "epoch": 0.5088414634146341, "grad_norm": 0.5032965540885925, "learning_rate": 1.6607723577235776e-05, "loss": 0.0592, "step": 41725 }, { "epoch": 0.5089024390243903, "grad_norm": 0.7506973147392273, "learning_rate": 1.6607317073170734e-05, "loss": 0.0915, "step": 41730 }, { "epoch": 0.5089634146341463, "grad_norm": 3.488673686981201, "learning_rate": 1.6606910569105692e-05, "loss": 0.0532, "step": 41735 }, { "epoch": 0.5090243902439024, "grad_norm": 0.9580563902854919, "learning_rate": 1.6606504065040653e-05, "loss": 0.0641, "step": 41740 }, { "epoch": 0.5090853658536585, "grad_norm": 0.7064599394798279, "learning_rate": 1.660609756097561e-05, "loss": 0.0548, "step": 41745 }, { "epoch": 0.5091463414634146, "grad_norm": 0.7132721543312073, "learning_rate": 1.660569105691057e-05, "loss": 0.0664, "step": 41750 }, { "epoch": 0.5092073170731707, "grad_norm": 0.7552917003631592, "learning_rate": 1.660528455284553e-05, "loss": 0.0692, "step": 41755 }, { "epoch": 0.5092682926829268, "grad_norm": 0.9029223322868347, "learning_rate": 1.660487804878049e-05, "loss": 0.0689, "step": 41760 }, { "epoch": 0.5093292682926829, "grad_norm": 0.884052038192749, "learning_rate": 1.6604471544715447e-05, "loss": 0.0653, "step": 41765 }, { "epoch": 0.509390243902439, "grad_norm": 0.5634772777557373, "learning_rate": 1.660406504065041e-05, "loss": 0.0606, "step": 41770 }, { "epoch": 0.5094512195121951, "grad_norm": 0.693662703037262, "learning_rate": 1.6603658536585367e-05, "loss": 0.0724, "step": 41775 }, { "epoch": 0.5095121951219512, "grad_norm": 0.510761022567749, "learning_rate": 1.660325203252033e-05, "loss": 0.053, "step": 41780 }, { "epoch": 0.5095731707317073, "grad_norm": 2.6148550510406494, "learning_rate": 1.6602845528455287e-05, "loss": 0.0723, "step": 41785 }, { "epoch": 0.5096341463414634, "grad_norm": 0.5645589828491211, "learning_rate": 1.6602439024390245e-05, "loss": 0.075, "step": 41790 }, { "epoch": 0.5096951219512195, "grad_norm": 0.7261945605278015, "learning_rate": 1.6602032520325203e-05, "loss": 0.066, "step": 41795 }, { "epoch": 0.5097560975609756, "grad_norm": 0.8711902499198914, "learning_rate": 1.6601626016260164e-05, "loss": 0.0592, "step": 41800 }, { "epoch": 0.5098170731707317, "grad_norm": 0.6892122030258179, "learning_rate": 1.6601219512195123e-05, "loss": 0.1111, "step": 41805 }, { "epoch": 0.5098780487804878, "grad_norm": 0.4238922595977783, "learning_rate": 1.6600813008130084e-05, "loss": 0.0874, "step": 41810 }, { "epoch": 0.5099390243902439, "grad_norm": 1.2365928888320923, "learning_rate": 1.6600406504065042e-05, "loss": 0.0716, "step": 41815 }, { "epoch": 0.51, "grad_norm": 0.4175623655319214, "learning_rate": 1.66e-05, "loss": 0.075, "step": 41820 }, { "epoch": 0.510060975609756, "grad_norm": 0.6319650411605835, "learning_rate": 1.6599593495934962e-05, "loss": 0.0472, "step": 41825 }, { "epoch": 0.5101219512195122, "grad_norm": 0.8719133138656616, "learning_rate": 1.659918699186992e-05, "loss": 0.0578, "step": 41830 }, { "epoch": 0.5101829268292682, "grad_norm": 0.6995454430580139, "learning_rate": 1.6598780487804878e-05, "loss": 0.0745, "step": 41835 }, { "epoch": 0.5102439024390244, "grad_norm": 0.6938385963439941, "learning_rate": 1.659837398373984e-05, "loss": 0.0685, "step": 41840 }, { "epoch": 0.5103048780487804, "grad_norm": 0.5278089046478271, "learning_rate": 1.6597967479674798e-05, "loss": 0.067, "step": 41845 }, { "epoch": 0.5103658536585366, "grad_norm": 0.34152668714523315, "learning_rate": 1.6597560975609756e-05, "loss": 0.0493, "step": 41850 }, { "epoch": 0.5104268292682926, "grad_norm": 0.6083194017410278, "learning_rate": 1.6597154471544717e-05, "loss": 0.0793, "step": 41855 }, { "epoch": 0.5104878048780488, "grad_norm": 0.6539497971534729, "learning_rate": 1.6596747967479675e-05, "loss": 0.0782, "step": 41860 }, { "epoch": 0.5105487804878048, "grad_norm": 0.6303013563156128, "learning_rate": 1.6596341463414637e-05, "loss": 0.1154, "step": 41865 }, { "epoch": 0.510609756097561, "grad_norm": 0.5647299289703369, "learning_rate": 1.6595934959349595e-05, "loss": 0.064, "step": 41870 }, { "epoch": 0.510670731707317, "grad_norm": 0.41319799423217773, "learning_rate": 1.6595528455284557e-05, "loss": 0.0486, "step": 41875 }, { "epoch": 0.5107317073170732, "grad_norm": 1.1107027530670166, "learning_rate": 1.659512195121951e-05, "loss": 0.0696, "step": 41880 }, { "epoch": 0.5107926829268292, "grad_norm": 0.9330357909202576, "learning_rate": 1.6594715447154473e-05, "loss": 0.0794, "step": 41885 }, { "epoch": 0.5108536585365854, "grad_norm": 0.7140757441520691, "learning_rate": 1.659430894308943e-05, "loss": 0.0788, "step": 41890 }, { "epoch": 0.5109146341463414, "grad_norm": 0.31072551012039185, "learning_rate": 1.6593902439024393e-05, "loss": 0.0577, "step": 41895 }, { "epoch": 0.5109756097560976, "grad_norm": 0.9381476044654846, "learning_rate": 1.659349593495935e-05, "loss": 0.0653, "step": 41900 }, { "epoch": 0.5110365853658536, "grad_norm": 0.365024209022522, "learning_rate": 1.6593089430894312e-05, "loss": 0.0431, "step": 41905 }, { "epoch": 0.5110975609756098, "grad_norm": 0.6421962380409241, "learning_rate": 1.659268292682927e-05, "loss": 0.0476, "step": 41910 }, { "epoch": 0.5111585365853658, "grad_norm": 0.8550094962120056, "learning_rate": 1.659227642276423e-05, "loss": 0.0785, "step": 41915 }, { "epoch": 0.511219512195122, "grad_norm": 0.8012333512306213, "learning_rate": 1.659186991869919e-05, "loss": 0.0475, "step": 41920 }, { "epoch": 0.511280487804878, "grad_norm": 0.8064114451408386, "learning_rate": 1.6591463414634148e-05, "loss": 0.0764, "step": 41925 }, { "epoch": 0.5113414634146342, "grad_norm": 0.4469958543777466, "learning_rate": 1.6591056910569106e-05, "loss": 0.0569, "step": 41930 }, { "epoch": 0.5114024390243902, "grad_norm": 0.8555260300636292, "learning_rate": 1.6590650406504068e-05, "loss": 0.0616, "step": 41935 }, { "epoch": 0.5114634146341464, "grad_norm": 1.003027319908142, "learning_rate": 1.6590243902439026e-05, "loss": 0.0781, "step": 41940 }, { "epoch": 0.5115243902439024, "grad_norm": 0.6097857356071472, "learning_rate": 1.6589837398373984e-05, "loss": 0.0605, "step": 41945 }, { "epoch": 0.5115853658536585, "grad_norm": 1.257228970527649, "learning_rate": 1.6589430894308946e-05, "loss": 0.0799, "step": 41950 }, { "epoch": 0.5116463414634146, "grad_norm": 4.604062080383301, "learning_rate": 1.6589024390243904e-05, "loss": 0.0617, "step": 41955 }, { "epoch": 0.5117073170731707, "grad_norm": 0.7231483459472656, "learning_rate": 1.6588617886178865e-05, "loss": 0.0857, "step": 41960 }, { "epoch": 0.5117682926829268, "grad_norm": 1.237036108970642, "learning_rate": 1.6588211382113823e-05, "loss": 0.0691, "step": 41965 }, { "epoch": 0.5118292682926829, "grad_norm": 0.7218025922775269, "learning_rate": 1.658780487804878e-05, "loss": 0.0365, "step": 41970 }, { "epoch": 0.511890243902439, "grad_norm": 0.5498093366622925, "learning_rate": 1.658739837398374e-05, "loss": 0.0489, "step": 41975 }, { "epoch": 0.5119512195121951, "grad_norm": 0.6915355920791626, "learning_rate": 1.65869918699187e-05, "loss": 0.0867, "step": 41980 }, { "epoch": 0.5120121951219512, "grad_norm": 0.6150110363960266, "learning_rate": 1.658658536585366e-05, "loss": 0.0883, "step": 41985 }, { "epoch": 0.5120731707317073, "grad_norm": 1.2547791004180908, "learning_rate": 1.658617886178862e-05, "loss": 0.088, "step": 41990 }, { "epoch": 0.5121341463414634, "grad_norm": 0.37310606241226196, "learning_rate": 1.658577235772358e-05, "loss": 0.0335, "step": 41995 }, { "epoch": 0.5121951219512195, "grad_norm": 0.5498539805412292, "learning_rate": 1.6585365853658537e-05, "loss": 0.0626, "step": 42000 }, { "epoch": 0.5122560975609756, "grad_norm": 0.8336356282234192, "learning_rate": 1.65849593495935e-05, "loss": 0.0546, "step": 42005 }, { "epoch": 0.5123170731707317, "grad_norm": 0.682270348072052, "learning_rate": 1.6584552845528457e-05, "loss": 0.0677, "step": 42010 }, { "epoch": 0.5123780487804878, "grad_norm": 0.424254447221756, "learning_rate": 1.6584146341463415e-05, "loss": 0.0957, "step": 42015 }, { "epoch": 0.5124390243902439, "grad_norm": 0.7735625505447388, "learning_rate": 1.6583739837398376e-05, "loss": 0.0648, "step": 42020 }, { "epoch": 0.5125, "grad_norm": 0.6607657670974731, "learning_rate": 1.6583333333333334e-05, "loss": 0.066, "step": 42025 }, { "epoch": 0.5125609756097561, "grad_norm": 1.1088916063308716, "learning_rate": 1.6582926829268292e-05, "loss": 0.0726, "step": 42030 }, { "epoch": 0.5126219512195122, "grad_norm": 0.8877241015434265, "learning_rate": 1.6582520325203254e-05, "loss": 0.0669, "step": 42035 }, { "epoch": 0.5126829268292683, "grad_norm": 0.39323174953460693, "learning_rate": 1.6582113821138212e-05, "loss": 0.0767, "step": 42040 }, { "epoch": 0.5127439024390243, "grad_norm": 0.5377587080001831, "learning_rate": 1.6581707317073174e-05, "loss": 0.0754, "step": 42045 }, { "epoch": 0.5128048780487805, "grad_norm": 0.4473234713077545, "learning_rate": 1.6581300813008132e-05, "loss": 0.0622, "step": 42050 }, { "epoch": 0.5128658536585365, "grad_norm": 0.5706641674041748, "learning_rate": 1.6580894308943093e-05, "loss": 0.0716, "step": 42055 }, { "epoch": 0.5129268292682927, "grad_norm": 1.9952616691589355, "learning_rate": 1.6580487804878048e-05, "loss": 0.0624, "step": 42060 }, { "epoch": 0.5129878048780487, "grad_norm": 0.4561997354030609, "learning_rate": 1.658008130081301e-05, "loss": 0.0597, "step": 42065 }, { "epoch": 0.5130487804878049, "grad_norm": 0.6121014952659607, "learning_rate": 1.6579674796747968e-05, "loss": 0.0535, "step": 42070 }, { "epoch": 0.5131097560975609, "grad_norm": 0.4057575464248657, "learning_rate": 1.657926829268293e-05, "loss": 0.0762, "step": 42075 }, { "epoch": 0.5131707317073171, "grad_norm": 0.6811978816986084, "learning_rate": 1.6578861788617887e-05, "loss": 0.0665, "step": 42080 }, { "epoch": 0.5132317073170731, "grad_norm": 0.5905300974845886, "learning_rate": 1.657845528455285e-05, "loss": 0.0824, "step": 42085 }, { "epoch": 0.5132926829268293, "grad_norm": 0.8771873116493225, "learning_rate": 1.6578048780487807e-05, "loss": 0.0961, "step": 42090 }, { "epoch": 0.5133536585365853, "grad_norm": 1.254164457321167, "learning_rate": 1.6577642276422765e-05, "loss": 0.05, "step": 42095 }, { "epoch": 0.5134146341463415, "grad_norm": 0.5033044219017029, "learning_rate": 1.6577235772357723e-05, "loss": 0.0422, "step": 42100 }, { "epoch": 0.5134756097560975, "grad_norm": 0.40141940116882324, "learning_rate": 1.6576829268292685e-05, "loss": 0.063, "step": 42105 }, { "epoch": 0.5135365853658537, "grad_norm": 1.642143964767456, "learning_rate": 1.6576422764227643e-05, "loss": 0.0716, "step": 42110 }, { "epoch": 0.5135975609756097, "grad_norm": 0.8499727845191956, "learning_rate": 1.6576016260162604e-05, "loss": 0.0812, "step": 42115 }, { "epoch": 0.5136585365853659, "grad_norm": 0.529181957244873, "learning_rate": 1.6575609756097563e-05, "loss": 0.0688, "step": 42120 }, { "epoch": 0.5137195121951219, "grad_norm": 0.7061687111854553, "learning_rate": 1.657520325203252e-05, "loss": 0.0672, "step": 42125 }, { "epoch": 0.5137804878048781, "grad_norm": 1.837317705154419, "learning_rate": 1.6574796747967482e-05, "loss": 0.0685, "step": 42130 }, { "epoch": 0.5138414634146341, "grad_norm": 0.4307575821876526, "learning_rate": 1.657439024390244e-05, "loss": 0.0678, "step": 42135 }, { "epoch": 0.5139024390243903, "grad_norm": 0.6123636960983276, "learning_rate": 1.6573983739837402e-05, "loss": 0.0512, "step": 42140 }, { "epoch": 0.5139634146341463, "grad_norm": 0.5818728804588318, "learning_rate": 1.657357723577236e-05, "loss": 0.0983, "step": 42145 }, { "epoch": 0.5140243902439025, "grad_norm": 1.5371119976043701, "learning_rate": 1.6573170731707318e-05, "loss": 0.049, "step": 42150 }, { "epoch": 0.5140853658536585, "grad_norm": 0.6770710945129395, "learning_rate": 1.6572764227642276e-05, "loss": 0.0477, "step": 42155 }, { "epoch": 0.5141463414634146, "grad_norm": 0.43413442373275757, "learning_rate": 1.6572357723577238e-05, "loss": 0.0558, "step": 42160 }, { "epoch": 0.5142073170731707, "grad_norm": 0.7265070676803589, "learning_rate": 1.6571951219512196e-05, "loss": 0.0591, "step": 42165 }, { "epoch": 0.5142682926829268, "grad_norm": 0.4868791103363037, "learning_rate": 1.6571544715447157e-05, "loss": 0.0636, "step": 42170 }, { "epoch": 0.5143292682926829, "grad_norm": 0.9522831439971924, "learning_rate": 1.6571138211382115e-05, "loss": 0.0562, "step": 42175 }, { "epoch": 0.514390243902439, "grad_norm": 0.7712357640266418, "learning_rate": 1.6570731707317074e-05, "loss": 0.0644, "step": 42180 }, { "epoch": 0.5144512195121951, "grad_norm": 1.0330458879470825, "learning_rate": 1.6570325203252035e-05, "loss": 0.0561, "step": 42185 }, { "epoch": 0.5145121951219512, "grad_norm": 0.6934689283370972, "learning_rate": 1.6569918699186993e-05, "loss": 0.0541, "step": 42190 }, { "epoch": 0.5145731707317073, "grad_norm": 2.581052541732788, "learning_rate": 1.656951219512195e-05, "loss": 0.0919, "step": 42195 }, { "epoch": 0.5146341463414634, "grad_norm": 0.5278347134590149, "learning_rate": 1.6569105691056913e-05, "loss": 0.0798, "step": 42200 }, { "epoch": 0.5146951219512195, "grad_norm": 1.0666379928588867, "learning_rate": 1.656869918699187e-05, "loss": 0.0656, "step": 42205 }, { "epoch": 0.5147560975609756, "grad_norm": 0.9430740475654602, "learning_rate": 1.656829268292683e-05, "loss": 0.0949, "step": 42210 }, { "epoch": 0.5148170731707317, "grad_norm": 2.722888708114624, "learning_rate": 1.656788617886179e-05, "loss": 0.0864, "step": 42215 }, { "epoch": 0.5148780487804878, "grad_norm": 1.0341050624847412, "learning_rate": 1.656747967479675e-05, "loss": 0.077, "step": 42220 }, { "epoch": 0.5149390243902439, "grad_norm": 1.1874619722366333, "learning_rate": 1.656707317073171e-05, "loss": 0.0547, "step": 42225 }, { "epoch": 0.515, "grad_norm": 0.5503732562065125, "learning_rate": 1.656666666666667e-05, "loss": 0.0546, "step": 42230 }, { "epoch": 0.515060975609756, "grad_norm": 0.4354923367500305, "learning_rate": 1.656626016260163e-05, "loss": 0.0559, "step": 42235 }, { "epoch": 0.5151219512195122, "grad_norm": 0.3638727068901062, "learning_rate": 1.6565853658536585e-05, "loss": 0.057, "step": 42240 }, { "epoch": 0.5151829268292683, "grad_norm": 0.5623024702072144, "learning_rate": 1.6565447154471546e-05, "loss": 0.0707, "step": 42245 }, { "epoch": 0.5152439024390244, "grad_norm": 0.6932303309440613, "learning_rate": 1.6565040650406504e-05, "loss": 0.0597, "step": 42250 }, { "epoch": 0.5153048780487804, "grad_norm": 0.7744956016540527, "learning_rate": 1.6564634146341466e-05, "loss": 0.1107, "step": 42255 }, { "epoch": 0.5153658536585366, "grad_norm": 1.858762264251709, "learning_rate": 1.6564227642276424e-05, "loss": 0.0749, "step": 42260 }, { "epoch": 0.5154268292682926, "grad_norm": 1.0051707029342651, "learning_rate": 1.6563821138211385e-05, "loss": 0.0448, "step": 42265 }, { "epoch": 0.5154878048780488, "grad_norm": 1.6632708311080933, "learning_rate": 1.6563414634146344e-05, "loss": 0.071, "step": 42270 }, { "epoch": 0.5155487804878048, "grad_norm": 1.267120122909546, "learning_rate": 1.6563008130081302e-05, "loss": 0.0798, "step": 42275 }, { "epoch": 0.515609756097561, "grad_norm": 1.4578239917755127, "learning_rate": 1.656260162601626e-05, "loss": 0.0761, "step": 42280 }, { "epoch": 0.515670731707317, "grad_norm": 0.29574042558670044, "learning_rate": 1.656219512195122e-05, "loss": 0.0754, "step": 42285 }, { "epoch": 0.5157317073170732, "grad_norm": 0.6582397222518921, "learning_rate": 1.656178861788618e-05, "loss": 0.0597, "step": 42290 }, { "epoch": 0.5157926829268292, "grad_norm": 0.32617685198783875, "learning_rate": 1.656138211382114e-05, "loss": 0.0767, "step": 42295 }, { "epoch": 0.5158536585365854, "grad_norm": 0.9738189578056335, "learning_rate": 1.65609756097561e-05, "loss": 0.0619, "step": 42300 }, { "epoch": 0.5159146341463414, "grad_norm": 0.5647790431976318, "learning_rate": 1.6560569105691057e-05, "loss": 0.043, "step": 42305 }, { "epoch": 0.5159756097560976, "grad_norm": 0.7667173147201538, "learning_rate": 1.656016260162602e-05, "loss": 0.0533, "step": 42310 }, { "epoch": 0.5160365853658536, "grad_norm": 1.3668326139450073, "learning_rate": 1.6559756097560977e-05, "loss": 0.0817, "step": 42315 }, { "epoch": 0.5160975609756098, "grad_norm": 0.4839946925640106, "learning_rate": 1.655934959349594e-05, "loss": 0.0842, "step": 42320 }, { "epoch": 0.5161585365853658, "grad_norm": 0.9295654296875, "learning_rate": 1.6558943089430897e-05, "loss": 0.0633, "step": 42325 }, { "epoch": 0.516219512195122, "grad_norm": 0.7695014476776123, "learning_rate": 1.6558536585365855e-05, "loss": 0.0573, "step": 42330 }, { "epoch": 0.516280487804878, "grad_norm": 1.548233151435852, "learning_rate": 1.6558130081300813e-05, "loss": 0.0747, "step": 42335 }, { "epoch": 0.5163414634146342, "grad_norm": 0.9067152142524719, "learning_rate": 1.6557723577235774e-05, "loss": 0.0789, "step": 42340 }, { "epoch": 0.5164024390243902, "grad_norm": 0.6626863479614258, "learning_rate": 1.6557317073170732e-05, "loss": 0.0856, "step": 42345 }, { "epoch": 0.5164634146341464, "grad_norm": 1.059216856956482, "learning_rate": 1.6556910569105694e-05, "loss": 0.1012, "step": 42350 }, { "epoch": 0.5165243902439024, "grad_norm": 0.813052237033844, "learning_rate": 1.6556504065040652e-05, "loss": 0.0983, "step": 42355 }, { "epoch": 0.5165853658536586, "grad_norm": 0.2705915570259094, "learning_rate": 1.655609756097561e-05, "loss": 0.0429, "step": 42360 }, { "epoch": 0.5166463414634146, "grad_norm": 0.661828875541687, "learning_rate": 1.655569105691057e-05, "loss": 0.063, "step": 42365 }, { "epoch": 0.5167073170731707, "grad_norm": 0.6312505006790161, "learning_rate": 1.655528455284553e-05, "loss": 0.0585, "step": 42370 }, { "epoch": 0.5167682926829268, "grad_norm": 0.7480800747871399, "learning_rate": 1.6554878048780488e-05, "loss": 0.096, "step": 42375 }, { "epoch": 0.5168292682926829, "grad_norm": 0.6113834977149963, "learning_rate": 1.655447154471545e-05, "loss": 0.0611, "step": 42380 }, { "epoch": 0.516890243902439, "grad_norm": 0.5796951651573181, "learning_rate": 1.6554065040650408e-05, "loss": 0.0806, "step": 42385 }, { "epoch": 0.5169512195121951, "grad_norm": 1.4900453090667725, "learning_rate": 1.6553658536585366e-05, "loss": 0.0764, "step": 42390 }, { "epoch": 0.5170121951219512, "grad_norm": 0.48089897632598877, "learning_rate": 1.6553252032520327e-05, "loss": 0.0341, "step": 42395 }, { "epoch": 0.5170731707317073, "grad_norm": 1.1162824630737305, "learning_rate": 1.6552845528455285e-05, "loss": 0.0449, "step": 42400 }, { "epoch": 0.5171341463414634, "grad_norm": 1.367745280265808, "learning_rate": 1.6552439024390247e-05, "loss": 0.0599, "step": 42405 }, { "epoch": 0.5171951219512195, "grad_norm": 0.792407751083374, "learning_rate": 1.6552032520325205e-05, "loss": 0.0751, "step": 42410 }, { "epoch": 0.5172560975609756, "grad_norm": 1.0217552185058594, "learning_rate": 1.6551626016260167e-05, "loss": 0.1009, "step": 42415 }, { "epoch": 0.5173170731707317, "grad_norm": 0.6704054474830627, "learning_rate": 1.655121951219512e-05, "loss": 0.0531, "step": 42420 }, { "epoch": 0.5173780487804878, "grad_norm": 0.8264194130897522, "learning_rate": 1.6550813008130083e-05, "loss": 0.0661, "step": 42425 }, { "epoch": 0.5174390243902439, "grad_norm": 0.5499744415283203, "learning_rate": 1.655040650406504e-05, "loss": 0.0505, "step": 42430 }, { "epoch": 0.5175, "grad_norm": 0.8280108571052551, "learning_rate": 1.6550000000000002e-05, "loss": 0.0627, "step": 42435 }, { "epoch": 0.5175609756097561, "grad_norm": 3.1869137287139893, "learning_rate": 1.654959349593496e-05, "loss": 0.1021, "step": 42440 }, { "epoch": 0.5176219512195122, "grad_norm": 0.28233906626701355, "learning_rate": 1.6549186991869922e-05, "loss": 0.0555, "step": 42445 }, { "epoch": 0.5176829268292683, "grad_norm": 0.41708385944366455, "learning_rate": 1.654878048780488e-05, "loss": 0.0654, "step": 42450 }, { "epoch": 0.5177439024390244, "grad_norm": 0.6343851685523987, "learning_rate": 1.654837398373984e-05, "loss": 0.0486, "step": 42455 }, { "epoch": 0.5178048780487805, "grad_norm": 0.5521255731582642, "learning_rate": 1.6547967479674797e-05, "loss": 0.088, "step": 42460 }, { "epoch": 0.5178658536585365, "grad_norm": 0.6408767700195312, "learning_rate": 1.6547560975609758e-05, "loss": 0.0502, "step": 42465 }, { "epoch": 0.5179268292682927, "grad_norm": 0.4120924770832062, "learning_rate": 1.6547154471544716e-05, "loss": 0.0442, "step": 42470 }, { "epoch": 0.5179878048780487, "grad_norm": 0.4681524634361267, "learning_rate": 1.6546747967479678e-05, "loss": 0.0751, "step": 42475 }, { "epoch": 0.5180487804878049, "grad_norm": 0.6179767847061157, "learning_rate": 1.6546341463414636e-05, "loss": 0.0524, "step": 42480 }, { "epoch": 0.5181097560975609, "grad_norm": 0.853474497795105, "learning_rate": 1.6545934959349594e-05, "loss": 0.0654, "step": 42485 }, { "epoch": 0.5181707317073171, "grad_norm": 0.9236553907394409, "learning_rate": 1.6545528455284555e-05, "loss": 0.0511, "step": 42490 }, { "epoch": 0.5182317073170731, "grad_norm": 0.6832364201545715, "learning_rate": 1.6545121951219514e-05, "loss": 0.0698, "step": 42495 }, { "epoch": 0.5182926829268293, "grad_norm": 0.5562902688980103, "learning_rate": 1.6544715447154475e-05, "loss": 0.0933, "step": 42500 }, { "epoch": 0.5183536585365853, "grad_norm": 0.7238534688949585, "learning_rate": 1.6544308943089433e-05, "loss": 0.0608, "step": 42505 }, { "epoch": 0.5184146341463415, "grad_norm": 0.9071009159088135, "learning_rate": 1.654390243902439e-05, "loss": 0.0757, "step": 42510 }, { "epoch": 0.5184756097560975, "grad_norm": 0.6553727388381958, "learning_rate": 1.654349593495935e-05, "loss": 0.0519, "step": 42515 }, { "epoch": 0.5185365853658537, "grad_norm": 0.6158544421195984, "learning_rate": 1.654308943089431e-05, "loss": 0.0621, "step": 42520 }, { "epoch": 0.5185975609756097, "grad_norm": 1.3014376163482666, "learning_rate": 1.654268292682927e-05, "loss": 0.0522, "step": 42525 }, { "epoch": 0.5186585365853659, "grad_norm": 1.2611243724822998, "learning_rate": 1.654227642276423e-05, "loss": 0.0921, "step": 42530 }, { "epoch": 0.5187195121951219, "grad_norm": 0.859193742275238, "learning_rate": 1.654186991869919e-05, "loss": 0.0606, "step": 42535 }, { "epoch": 0.5187804878048781, "grad_norm": 0.4188627004623413, "learning_rate": 1.6541463414634147e-05, "loss": 0.0438, "step": 42540 }, { "epoch": 0.5188414634146341, "grad_norm": 1.4525493383407593, "learning_rate": 1.6541056910569105e-05, "loss": 0.0926, "step": 42545 }, { "epoch": 0.5189024390243903, "grad_norm": 0.8224895000457764, "learning_rate": 1.6540650406504067e-05, "loss": 0.0945, "step": 42550 }, { "epoch": 0.5189634146341463, "grad_norm": 1.2820546627044678, "learning_rate": 1.6540243902439025e-05, "loss": 0.0999, "step": 42555 }, { "epoch": 0.5190243902439025, "grad_norm": 0.6514593958854675, "learning_rate": 1.6539837398373986e-05, "loss": 0.0772, "step": 42560 }, { "epoch": 0.5190853658536585, "grad_norm": 0.8635206818580627, "learning_rate": 1.6539430894308944e-05, "loss": 0.089, "step": 42565 }, { "epoch": 0.5191463414634147, "grad_norm": 0.685551643371582, "learning_rate": 1.6539024390243902e-05, "loss": 0.066, "step": 42570 }, { "epoch": 0.5192073170731707, "grad_norm": 0.38484516739845276, "learning_rate": 1.6538617886178864e-05, "loss": 0.0542, "step": 42575 }, { "epoch": 0.5192682926829268, "grad_norm": 0.3925882875919342, "learning_rate": 1.6538211382113822e-05, "loss": 0.0839, "step": 42580 }, { "epoch": 0.5193292682926829, "grad_norm": 0.8235354423522949, "learning_rate": 1.6537804878048784e-05, "loss": 0.0516, "step": 42585 }, { "epoch": 0.519390243902439, "grad_norm": 0.502479612827301, "learning_rate": 1.6537398373983742e-05, "loss": 0.0519, "step": 42590 }, { "epoch": 0.5194512195121951, "grad_norm": 0.9600520730018616, "learning_rate": 1.6536991869918703e-05, "loss": 0.0722, "step": 42595 }, { "epoch": 0.5195121951219512, "grad_norm": 0.8907895088195801, "learning_rate": 1.6536585365853658e-05, "loss": 0.0628, "step": 42600 }, { "epoch": 0.5195731707317073, "grad_norm": 2.8647167682647705, "learning_rate": 1.653617886178862e-05, "loss": 0.0598, "step": 42605 }, { "epoch": 0.5196341463414634, "grad_norm": 0.6488530039787292, "learning_rate": 1.6535772357723578e-05, "loss": 0.0704, "step": 42610 }, { "epoch": 0.5196951219512195, "grad_norm": 0.6044294238090515, "learning_rate": 1.653536585365854e-05, "loss": 0.0476, "step": 42615 }, { "epoch": 0.5197560975609756, "grad_norm": 0.7026792764663696, "learning_rate": 1.6534959349593497e-05, "loss": 0.0697, "step": 42620 }, { "epoch": 0.5198170731707317, "grad_norm": 0.799055278301239, "learning_rate": 1.653455284552846e-05, "loss": 0.0883, "step": 42625 }, { "epoch": 0.5198780487804878, "grad_norm": 0.5877866148948669, "learning_rate": 1.6534146341463414e-05, "loss": 0.0734, "step": 42630 }, { "epoch": 0.5199390243902439, "grad_norm": 0.6059884428977966, "learning_rate": 1.6533739837398375e-05, "loss": 0.0504, "step": 42635 }, { "epoch": 0.52, "grad_norm": 0.6867831349372864, "learning_rate": 1.6533333333333333e-05, "loss": 0.0778, "step": 42640 }, { "epoch": 0.5200609756097561, "grad_norm": 0.42562082409858704, "learning_rate": 1.6532926829268295e-05, "loss": 0.068, "step": 42645 }, { "epoch": 0.5201219512195122, "grad_norm": 1.1345776319503784, "learning_rate": 1.6532520325203253e-05, "loss": 0.0972, "step": 42650 }, { "epoch": 0.5201829268292683, "grad_norm": 0.7102606892585754, "learning_rate": 1.6532113821138214e-05, "loss": 0.0532, "step": 42655 }, { "epoch": 0.5202439024390244, "grad_norm": 0.7282924652099609, "learning_rate": 1.6531707317073172e-05, "loss": 0.057, "step": 42660 }, { "epoch": 0.5203048780487805, "grad_norm": 0.78034907579422, "learning_rate": 1.653130081300813e-05, "loss": 0.0757, "step": 42665 }, { "epoch": 0.5203658536585366, "grad_norm": 1.023579478263855, "learning_rate": 1.6530894308943092e-05, "loss": 0.0849, "step": 42670 }, { "epoch": 0.5204268292682926, "grad_norm": 0.5267450213432312, "learning_rate": 1.653048780487805e-05, "loss": 0.0919, "step": 42675 }, { "epoch": 0.5204878048780488, "grad_norm": 0.6715111136436462, "learning_rate": 1.6530081300813012e-05, "loss": 0.0491, "step": 42680 }, { "epoch": 0.5205487804878048, "grad_norm": 1.0060256719589233, "learning_rate": 1.652967479674797e-05, "loss": 0.0815, "step": 42685 }, { "epoch": 0.520609756097561, "grad_norm": 0.5802152156829834, "learning_rate": 1.6529268292682928e-05, "loss": 0.0869, "step": 42690 }, { "epoch": 0.520670731707317, "grad_norm": 0.5585908889770508, "learning_rate": 1.6528861788617886e-05, "loss": 0.0529, "step": 42695 }, { "epoch": 0.5207317073170732, "grad_norm": 1.2202191352844238, "learning_rate": 1.6528455284552848e-05, "loss": 0.1331, "step": 42700 }, { "epoch": 0.5207926829268292, "grad_norm": 0.68610680103302, "learning_rate": 1.6528048780487806e-05, "loss": 0.0422, "step": 42705 }, { "epoch": 0.5208536585365854, "grad_norm": 1.046149492263794, "learning_rate": 1.6527642276422767e-05, "loss": 0.052, "step": 42710 }, { "epoch": 0.5209146341463414, "grad_norm": 0.8829363584518433, "learning_rate": 1.6527235772357725e-05, "loss": 0.065, "step": 42715 }, { "epoch": 0.5209756097560976, "grad_norm": 0.5656507015228271, "learning_rate": 1.6526829268292684e-05, "loss": 0.0665, "step": 42720 }, { "epoch": 0.5210365853658536, "grad_norm": 1.0758459568023682, "learning_rate": 1.652642276422764e-05, "loss": 0.0538, "step": 42725 }, { "epoch": 0.5210975609756098, "grad_norm": 0.5020009875297546, "learning_rate": 1.6526016260162603e-05, "loss": 0.0906, "step": 42730 }, { "epoch": 0.5211585365853658, "grad_norm": 0.8041163682937622, "learning_rate": 1.652560975609756e-05, "loss": 0.1001, "step": 42735 }, { "epoch": 0.521219512195122, "grad_norm": 0.4047401547431946, "learning_rate": 1.6525203252032523e-05, "loss": 0.0426, "step": 42740 }, { "epoch": 0.521280487804878, "grad_norm": 0.5643680095672607, "learning_rate": 1.652479674796748e-05, "loss": 0.085, "step": 42745 }, { "epoch": 0.5213414634146342, "grad_norm": 0.6161405444145203, "learning_rate": 1.652439024390244e-05, "loss": 0.0482, "step": 42750 }, { "epoch": 0.5214024390243902, "grad_norm": 0.6261500120162964, "learning_rate": 1.65239837398374e-05, "loss": 0.0707, "step": 42755 }, { "epoch": 0.5214634146341464, "grad_norm": 0.7695078253746033, "learning_rate": 1.652357723577236e-05, "loss": 0.0621, "step": 42760 }, { "epoch": 0.5215243902439024, "grad_norm": 1.0809688568115234, "learning_rate": 1.652317073170732e-05, "loss": 0.088, "step": 42765 }, { "epoch": 0.5215853658536586, "grad_norm": 0.8744638562202454, "learning_rate": 1.652276422764228e-05, "loss": 0.0655, "step": 42770 }, { "epoch": 0.5216463414634146, "grad_norm": 0.7474693059921265, "learning_rate": 1.6522357723577236e-05, "loss": 0.083, "step": 42775 }, { "epoch": 0.5217073170731708, "grad_norm": 0.346910297870636, "learning_rate": 1.6521951219512195e-05, "loss": 0.0596, "step": 42780 }, { "epoch": 0.5217682926829268, "grad_norm": 0.5942515730857849, "learning_rate": 1.6521544715447156e-05, "loss": 0.0613, "step": 42785 }, { "epoch": 0.521829268292683, "grad_norm": 0.40849485993385315, "learning_rate": 1.6521138211382114e-05, "loss": 0.059, "step": 42790 }, { "epoch": 0.521890243902439, "grad_norm": 0.765293538570404, "learning_rate": 1.6520731707317076e-05, "loss": 0.0572, "step": 42795 }, { "epoch": 0.5219512195121951, "grad_norm": 0.5992496013641357, "learning_rate": 1.6520325203252034e-05, "loss": 0.0749, "step": 42800 }, { "epoch": 0.5220121951219512, "grad_norm": 0.9593546390533447, "learning_rate": 1.6519918699186995e-05, "loss": 0.0732, "step": 42805 }, { "epoch": 0.5220731707317073, "grad_norm": 0.5654363632202148, "learning_rate": 1.651951219512195e-05, "loss": 0.0635, "step": 42810 }, { "epoch": 0.5221341463414634, "grad_norm": 0.6574358344078064, "learning_rate": 1.651910569105691e-05, "loss": 0.0593, "step": 42815 }, { "epoch": 0.5221951219512195, "grad_norm": 0.6057453155517578, "learning_rate": 1.651869918699187e-05, "loss": 0.0505, "step": 42820 }, { "epoch": 0.5222560975609756, "grad_norm": 2.4091744422912598, "learning_rate": 1.651829268292683e-05, "loss": 0.0839, "step": 42825 }, { "epoch": 0.5223170731707317, "grad_norm": 0.5215833187103271, "learning_rate": 1.651788617886179e-05, "loss": 0.0639, "step": 42830 }, { "epoch": 0.5223780487804878, "grad_norm": 1.3310832977294922, "learning_rate": 1.651747967479675e-05, "loss": 0.0758, "step": 42835 }, { "epoch": 0.5224390243902439, "grad_norm": 2.2339706420898438, "learning_rate": 1.651707317073171e-05, "loss": 0.0635, "step": 42840 }, { "epoch": 0.5225, "grad_norm": 0.5939825773239136, "learning_rate": 1.6516666666666667e-05, "loss": 0.0981, "step": 42845 }, { "epoch": 0.5225609756097561, "grad_norm": 0.765026867389679, "learning_rate": 1.651626016260163e-05, "loss": 0.0659, "step": 42850 }, { "epoch": 0.5226219512195122, "grad_norm": 0.6124969124794006, "learning_rate": 1.6515853658536587e-05, "loss": 0.0889, "step": 42855 }, { "epoch": 0.5226829268292683, "grad_norm": 0.6197335124015808, "learning_rate": 1.6515447154471545e-05, "loss": 0.0864, "step": 42860 }, { "epoch": 0.5227439024390244, "grad_norm": 0.8079968690872192, "learning_rate": 1.6515040650406506e-05, "loss": 0.0635, "step": 42865 }, { "epoch": 0.5228048780487805, "grad_norm": 0.5544997453689575, "learning_rate": 1.6514634146341465e-05, "loss": 0.0877, "step": 42870 }, { "epoch": 0.5228658536585366, "grad_norm": 0.4822181165218353, "learning_rate": 1.6514227642276423e-05, "loss": 0.0581, "step": 42875 }, { "epoch": 0.5229268292682927, "grad_norm": 1.282533049583435, "learning_rate": 1.6513821138211384e-05, "loss": 0.0665, "step": 42880 }, { "epoch": 0.5229878048780487, "grad_norm": 0.8407026529312134, "learning_rate": 1.6513414634146342e-05, "loss": 0.0964, "step": 42885 }, { "epoch": 0.5230487804878049, "grad_norm": 0.560130774974823, "learning_rate": 1.6513008130081304e-05, "loss": 0.092, "step": 42890 }, { "epoch": 0.5231097560975609, "grad_norm": 0.9610886573791504, "learning_rate": 1.6512601626016262e-05, "loss": 0.0456, "step": 42895 }, { "epoch": 0.5231707317073171, "grad_norm": 0.4997268617153168, "learning_rate": 1.651219512195122e-05, "loss": 0.0857, "step": 42900 }, { "epoch": 0.5232317073170731, "grad_norm": 0.5080631971359253, "learning_rate": 1.6511788617886178e-05, "loss": 0.091, "step": 42905 }, { "epoch": 0.5232926829268293, "grad_norm": 2.523298978805542, "learning_rate": 1.651138211382114e-05, "loss": 0.0753, "step": 42910 }, { "epoch": 0.5233536585365853, "grad_norm": 0.4707251191139221, "learning_rate": 1.6510975609756098e-05, "loss": 0.0571, "step": 42915 }, { "epoch": 0.5234146341463415, "grad_norm": 0.6686190962791443, "learning_rate": 1.651056910569106e-05, "loss": 0.0402, "step": 42920 }, { "epoch": 0.5234756097560975, "grad_norm": 0.628066897392273, "learning_rate": 1.6510162601626018e-05, "loss": 0.0657, "step": 42925 }, { "epoch": 0.5235365853658537, "grad_norm": 0.2880648970603943, "learning_rate": 1.6509756097560976e-05, "loss": 0.0833, "step": 42930 }, { "epoch": 0.5235975609756097, "grad_norm": 0.5809152126312256, "learning_rate": 1.6509349593495937e-05, "loss": 0.0632, "step": 42935 }, { "epoch": 0.5236585365853659, "grad_norm": 1.7596261501312256, "learning_rate": 1.6508943089430895e-05, "loss": 0.0694, "step": 42940 }, { "epoch": 0.5237195121951219, "grad_norm": 1.1397627592086792, "learning_rate": 1.6508536585365857e-05, "loss": 0.0892, "step": 42945 }, { "epoch": 0.5237804878048781, "grad_norm": 0.5523167252540588, "learning_rate": 1.6508130081300815e-05, "loss": 0.065, "step": 42950 }, { "epoch": 0.5238414634146341, "grad_norm": 0.44386792182922363, "learning_rate": 1.6507723577235773e-05, "loss": 0.0488, "step": 42955 }, { "epoch": 0.5239024390243903, "grad_norm": 0.5184454321861267, "learning_rate": 1.650731707317073e-05, "loss": 0.0859, "step": 42960 }, { "epoch": 0.5239634146341463, "grad_norm": 0.8504844903945923, "learning_rate": 1.6506910569105693e-05, "loss": 0.1091, "step": 42965 }, { "epoch": 0.5240243902439025, "grad_norm": 0.6140804886817932, "learning_rate": 1.650650406504065e-05, "loss": 0.0348, "step": 42970 }, { "epoch": 0.5240853658536585, "grad_norm": 0.6543145179748535, "learning_rate": 1.6506097560975612e-05, "loss": 0.0699, "step": 42975 }, { "epoch": 0.5241463414634147, "grad_norm": 1.441278100013733, "learning_rate": 1.650569105691057e-05, "loss": 0.0598, "step": 42980 }, { "epoch": 0.5242073170731707, "grad_norm": 0.5687981843948364, "learning_rate": 1.6505284552845532e-05, "loss": 0.0588, "step": 42985 }, { "epoch": 0.5242682926829269, "grad_norm": 0.8542706966400146, "learning_rate": 1.6504878048780487e-05, "loss": 0.0608, "step": 42990 }, { "epoch": 0.5243292682926829, "grad_norm": 0.576076090335846, "learning_rate": 1.650447154471545e-05, "loss": 0.0731, "step": 42995 }, { "epoch": 0.524390243902439, "grad_norm": 0.6614680290222168, "learning_rate": 1.6504065040650406e-05, "loss": 0.0723, "step": 43000 }, { "epoch": 0.5244512195121951, "grad_norm": 0.542726993560791, "learning_rate": 1.6503658536585368e-05, "loss": 0.0548, "step": 43005 }, { "epoch": 0.5245121951219512, "grad_norm": 0.5787425637245178, "learning_rate": 1.6503252032520326e-05, "loss": 0.0763, "step": 43010 }, { "epoch": 0.5245731707317073, "grad_norm": 1.4176998138427734, "learning_rate": 1.6502845528455288e-05, "loss": 0.0583, "step": 43015 }, { "epoch": 0.5246341463414634, "grad_norm": 0.43239474296569824, "learning_rate": 1.6502439024390246e-05, "loss": 0.0613, "step": 43020 }, { "epoch": 0.5246951219512195, "grad_norm": 0.6145423054695129, "learning_rate": 1.6502032520325204e-05, "loss": 0.0829, "step": 43025 }, { "epoch": 0.5247560975609756, "grad_norm": 0.42233380675315857, "learning_rate": 1.6501626016260165e-05, "loss": 0.0643, "step": 43030 }, { "epoch": 0.5248170731707317, "grad_norm": 0.4112658202648163, "learning_rate": 1.6501219512195123e-05, "loss": 0.0767, "step": 43035 }, { "epoch": 0.5248780487804878, "grad_norm": 1.157707691192627, "learning_rate": 1.650081300813008e-05, "loss": 0.0444, "step": 43040 }, { "epoch": 0.5249390243902439, "grad_norm": 0.6210154294967651, "learning_rate": 1.6500406504065043e-05, "loss": 0.061, "step": 43045 }, { "epoch": 0.525, "grad_norm": 1.0014830827713013, "learning_rate": 1.65e-05, "loss": 0.0747, "step": 43050 }, { "epoch": 0.5250609756097561, "grad_norm": 0.5527933239936829, "learning_rate": 1.649959349593496e-05, "loss": 0.0537, "step": 43055 }, { "epoch": 0.5251219512195122, "grad_norm": 0.5302569270133972, "learning_rate": 1.649918699186992e-05, "loss": 0.0533, "step": 43060 }, { "epoch": 0.5251829268292683, "grad_norm": 2.31906795501709, "learning_rate": 1.649878048780488e-05, "loss": 0.0904, "step": 43065 }, { "epoch": 0.5252439024390244, "grad_norm": 0.4279942214488983, "learning_rate": 1.649837398373984e-05, "loss": 0.053, "step": 43070 }, { "epoch": 0.5253048780487805, "grad_norm": 1.1016089916229248, "learning_rate": 1.64979674796748e-05, "loss": 0.0588, "step": 43075 }, { "epoch": 0.5253658536585366, "grad_norm": 0.4306219518184662, "learning_rate": 1.6497560975609757e-05, "loss": 0.0337, "step": 43080 }, { "epoch": 0.5254268292682926, "grad_norm": 0.6557444334030151, "learning_rate": 1.6497154471544715e-05, "loss": 0.0785, "step": 43085 }, { "epoch": 0.5254878048780488, "grad_norm": 0.8731124997138977, "learning_rate": 1.6496747967479676e-05, "loss": 0.0485, "step": 43090 }, { "epoch": 0.5255487804878048, "grad_norm": 1.0763477087020874, "learning_rate": 1.6496341463414635e-05, "loss": 0.0905, "step": 43095 }, { "epoch": 0.525609756097561, "grad_norm": 0.5431528687477112, "learning_rate": 1.6495934959349596e-05, "loss": 0.0428, "step": 43100 }, { "epoch": 0.525670731707317, "grad_norm": 0.7954915165901184, "learning_rate": 1.6495528455284554e-05, "loss": 0.0666, "step": 43105 }, { "epoch": 0.5257317073170732, "grad_norm": 0.9414682984352112, "learning_rate": 1.6495121951219512e-05, "loss": 0.0824, "step": 43110 }, { "epoch": 0.5257926829268292, "grad_norm": 0.44931867718696594, "learning_rate": 1.6494715447154474e-05, "loss": 0.0415, "step": 43115 }, { "epoch": 0.5258536585365854, "grad_norm": 0.7227495908737183, "learning_rate": 1.6494308943089432e-05, "loss": 0.0515, "step": 43120 }, { "epoch": 0.5259146341463414, "grad_norm": 1.9220342636108398, "learning_rate": 1.649390243902439e-05, "loss": 0.0903, "step": 43125 }, { "epoch": 0.5259756097560976, "grad_norm": 0.6179423332214355, "learning_rate": 1.649349593495935e-05, "loss": 0.073, "step": 43130 }, { "epoch": 0.5260365853658536, "grad_norm": 1.0474194288253784, "learning_rate": 1.649308943089431e-05, "loss": 0.0455, "step": 43135 }, { "epoch": 0.5260975609756098, "grad_norm": 0.6836456656455994, "learning_rate": 1.6492682926829268e-05, "loss": 0.0391, "step": 43140 }, { "epoch": 0.5261585365853658, "grad_norm": 0.8476439714431763, "learning_rate": 1.649227642276423e-05, "loss": 0.0533, "step": 43145 }, { "epoch": 0.526219512195122, "grad_norm": 0.7234227657318115, "learning_rate": 1.6491869918699188e-05, "loss": 0.0355, "step": 43150 }, { "epoch": 0.526280487804878, "grad_norm": 0.4154992997646332, "learning_rate": 1.649146341463415e-05, "loss": 0.0622, "step": 43155 }, { "epoch": 0.5263414634146342, "grad_norm": 0.3387569487094879, "learning_rate": 1.6491056910569107e-05, "loss": 0.0577, "step": 43160 }, { "epoch": 0.5264024390243902, "grad_norm": 0.6737422347068787, "learning_rate": 1.649065040650407e-05, "loss": 0.0514, "step": 43165 }, { "epoch": 0.5264634146341464, "grad_norm": 0.39025139808654785, "learning_rate": 1.6490243902439023e-05, "loss": 0.0601, "step": 43170 }, { "epoch": 0.5265243902439024, "grad_norm": 0.5254591703414917, "learning_rate": 1.6489837398373985e-05, "loss": 0.0755, "step": 43175 }, { "epoch": 0.5265853658536586, "grad_norm": 0.6216243505477905, "learning_rate": 1.6489430894308943e-05, "loss": 0.1078, "step": 43180 }, { "epoch": 0.5266463414634146, "grad_norm": 0.8964706659317017, "learning_rate": 1.6489024390243905e-05, "loss": 0.0748, "step": 43185 }, { "epoch": 0.5267073170731708, "grad_norm": 4.202348709106445, "learning_rate": 1.6488617886178863e-05, "loss": 0.078, "step": 43190 }, { "epoch": 0.5267682926829268, "grad_norm": 0.6259968876838684, "learning_rate": 1.6488211382113824e-05, "loss": 0.0324, "step": 43195 }, { "epoch": 0.526829268292683, "grad_norm": 0.6431142091751099, "learning_rate": 1.6487804878048782e-05, "loss": 0.069, "step": 43200 }, { "epoch": 0.526890243902439, "grad_norm": 0.40381017327308655, "learning_rate": 1.648739837398374e-05, "loss": 0.07, "step": 43205 }, { "epoch": 0.5269512195121951, "grad_norm": 0.9717616438865662, "learning_rate": 1.6486991869918702e-05, "loss": 0.0716, "step": 43210 }, { "epoch": 0.5270121951219512, "grad_norm": 0.697197675704956, "learning_rate": 1.648658536585366e-05, "loss": 0.0547, "step": 43215 }, { "epoch": 0.5270731707317073, "grad_norm": 1.1681641340255737, "learning_rate": 1.6486178861788618e-05, "loss": 0.0722, "step": 43220 }, { "epoch": 0.5271341463414634, "grad_norm": 0.7920675277709961, "learning_rate": 1.648577235772358e-05, "loss": 0.0556, "step": 43225 }, { "epoch": 0.5271951219512195, "grad_norm": 0.5679035782814026, "learning_rate": 1.6485365853658538e-05, "loss": 0.0597, "step": 43230 }, { "epoch": 0.5272560975609756, "grad_norm": 1.1409138441085815, "learning_rate": 1.6484959349593496e-05, "loss": 0.071, "step": 43235 }, { "epoch": 0.5273170731707317, "grad_norm": 0.2871566414833069, "learning_rate": 1.6484552845528458e-05, "loss": 0.0478, "step": 43240 }, { "epoch": 0.5273780487804878, "grad_norm": 0.569977879524231, "learning_rate": 1.6484146341463416e-05, "loss": 0.0954, "step": 43245 }, { "epoch": 0.5274390243902439, "grad_norm": 0.8273871541023254, "learning_rate": 1.6483739837398377e-05, "loss": 0.0903, "step": 43250 }, { "epoch": 0.5275, "grad_norm": 0.5955567359924316, "learning_rate": 1.6483333333333335e-05, "loss": 0.0555, "step": 43255 }, { "epoch": 0.5275609756097561, "grad_norm": 0.738493025302887, "learning_rate": 1.6482926829268293e-05, "loss": 0.0988, "step": 43260 }, { "epoch": 0.5276219512195122, "grad_norm": 0.6972469091415405, "learning_rate": 1.648252032520325e-05, "loss": 0.0509, "step": 43265 }, { "epoch": 0.5276829268292683, "grad_norm": 0.3975447714328766, "learning_rate": 1.6482113821138213e-05, "loss": 0.0652, "step": 43270 }, { "epoch": 0.5277439024390244, "grad_norm": 0.4616803824901581, "learning_rate": 1.648170731707317e-05, "loss": 0.0736, "step": 43275 }, { "epoch": 0.5278048780487805, "grad_norm": 0.7396820187568665, "learning_rate": 1.6481300813008133e-05, "loss": 0.072, "step": 43280 }, { "epoch": 0.5278658536585366, "grad_norm": 0.941534161567688, "learning_rate": 1.648089430894309e-05, "loss": 0.0612, "step": 43285 }, { "epoch": 0.5279268292682927, "grad_norm": 0.5277559161186218, "learning_rate": 1.648048780487805e-05, "loss": 0.0551, "step": 43290 }, { "epoch": 0.5279878048780487, "grad_norm": 0.9824479818344116, "learning_rate": 1.648008130081301e-05, "loss": 0.0769, "step": 43295 }, { "epoch": 0.5280487804878049, "grad_norm": 0.6781249046325684, "learning_rate": 1.647967479674797e-05, "loss": 0.0899, "step": 43300 }, { "epoch": 0.528109756097561, "grad_norm": 0.7921421527862549, "learning_rate": 1.6479268292682927e-05, "loss": 0.0561, "step": 43305 }, { "epoch": 0.5281707317073171, "grad_norm": 0.5898027420043945, "learning_rate": 1.6478861788617888e-05, "loss": 0.0712, "step": 43310 }, { "epoch": 0.5282317073170731, "grad_norm": 0.5313420295715332, "learning_rate": 1.6478455284552846e-05, "loss": 0.0488, "step": 43315 }, { "epoch": 0.5282926829268293, "grad_norm": 0.9486960768699646, "learning_rate": 1.6478048780487805e-05, "loss": 0.0936, "step": 43320 }, { "epoch": 0.5283536585365853, "grad_norm": 0.6457530856132507, "learning_rate": 1.6477642276422766e-05, "loss": 0.0971, "step": 43325 }, { "epoch": 0.5284146341463415, "grad_norm": 0.6167246103286743, "learning_rate": 1.6477235772357724e-05, "loss": 0.0466, "step": 43330 }, { "epoch": 0.5284756097560975, "grad_norm": 0.49313920736312866, "learning_rate": 1.6476829268292686e-05, "loss": 0.0474, "step": 43335 }, { "epoch": 0.5285365853658537, "grad_norm": 3.87241530418396, "learning_rate": 1.6476422764227644e-05, "loss": 0.0532, "step": 43340 }, { "epoch": 0.5285975609756097, "grad_norm": 0.5586433410644531, "learning_rate": 1.6476016260162605e-05, "loss": 0.0596, "step": 43345 }, { "epoch": 0.5286585365853659, "grad_norm": 0.3901444971561432, "learning_rate": 1.647560975609756e-05, "loss": 0.0741, "step": 43350 }, { "epoch": 0.5287195121951219, "grad_norm": 0.5774693489074707, "learning_rate": 1.647520325203252e-05, "loss": 0.0855, "step": 43355 }, { "epoch": 0.5287804878048781, "grad_norm": 0.2747998833656311, "learning_rate": 1.647479674796748e-05, "loss": 0.0434, "step": 43360 }, { "epoch": 0.5288414634146341, "grad_norm": 0.6087244153022766, "learning_rate": 1.647439024390244e-05, "loss": 0.0916, "step": 43365 }, { "epoch": 0.5289024390243903, "grad_norm": 0.9011507034301758, "learning_rate": 1.64739837398374e-05, "loss": 0.0742, "step": 43370 }, { "epoch": 0.5289634146341463, "grad_norm": 0.2920655310153961, "learning_rate": 1.647357723577236e-05, "loss": 0.0853, "step": 43375 }, { "epoch": 0.5290243902439025, "grad_norm": 0.5251546502113342, "learning_rate": 1.647317073170732e-05, "loss": 0.0771, "step": 43380 }, { "epoch": 0.5290853658536585, "grad_norm": 0.47944748401641846, "learning_rate": 1.6472764227642277e-05, "loss": 0.0516, "step": 43385 }, { "epoch": 0.5291463414634147, "grad_norm": 0.49831321835517883, "learning_rate": 1.6472357723577235e-05, "loss": 0.064, "step": 43390 }, { "epoch": 0.5292073170731707, "grad_norm": 0.33700957894325256, "learning_rate": 1.6471951219512197e-05, "loss": 0.1039, "step": 43395 }, { "epoch": 0.5292682926829269, "grad_norm": 0.6921716928482056, "learning_rate": 1.6471544715447155e-05, "loss": 0.0776, "step": 43400 }, { "epoch": 0.5293292682926829, "grad_norm": 0.7781722545623779, "learning_rate": 1.6471138211382116e-05, "loss": 0.0643, "step": 43405 }, { "epoch": 0.529390243902439, "grad_norm": 0.5564192533493042, "learning_rate": 1.6470731707317075e-05, "loss": 0.0511, "step": 43410 }, { "epoch": 0.5294512195121951, "grad_norm": 0.5703247785568237, "learning_rate": 1.6470325203252033e-05, "loss": 0.0751, "step": 43415 }, { "epoch": 0.5295121951219512, "grad_norm": 0.8428677916526794, "learning_rate": 1.6469918699186994e-05, "loss": 0.0711, "step": 43420 }, { "epoch": 0.5295731707317073, "grad_norm": 0.5848685503005981, "learning_rate": 1.6469512195121952e-05, "loss": 0.084, "step": 43425 }, { "epoch": 0.5296341463414634, "grad_norm": 0.3998331129550934, "learning_rate": 1.6469105691056914e-05, "loss": 0.0431, "step": 43430 }, { "epoch": 0.5296951219512195, "grad_norm": 0.5586367249488831, "learning_rate": 1.6468699186991872e-05, "loss": 0.0633, "step": 43435 }, { "epoch": 0.5297560975609756, "grad_norm": 0.42754384875297546, "learning_rate": 1.646829268292683e-05, "loss": 0.0476, "step": 43440 }, { "epoch": 0.5298170731707317, "grad_norm": 0.43536296486854553, "learning_rate": 1.6467886178861788e-05, "loss": 0.0533, "step": 43445 }, { "epoch": 0.5298780487804878, "grad_norm": 0.6501032710075378, "learning_rate": 1.646747967479675e-05, "loss": 0.0744, "step": 43450 }, { "epoch": 0.5299390243902439, "grad_norm": 0.6600316166877747, "learning_rate": 1.6467073170731708e-05, "loss": 0.0637, "step": 43455 }, { "epoch": 0.53, "grad_norm": 0.518643319606781, "learning_rate": 1.646666666666667e-05, "loss": 0.0532, "step": 43460 }, { "epoch": 0.5300609756097561, "grad_norm": 0.4102579951286316, "learning_rate": 1.6466260162601628e-05, "loss": 0.0626, "step": 43465 }, { "epoch": 0.5301219512195122, "grad_norm": 1.5618244409561157, "learning_rate": 1.6465853658536586e-05, "loss": 0.0866, "step": 43470 }, { "epoch": 0.5301829268292683, "grad_norm": 1.9577077627182007, "learning_rate": 1.6465447154471547e-05, "loss": 0.0656, "step": 43475 }, { "epoch": 0.5302439024390244, "grad_norm": 0.8108940124511719, "learning_rate": 1.6465040650406505e-05, "loss": 0.0488, "step": 43480 }, { "epoch": 0.5303048780487805, "grad_norm": 3.389352798461914, "learning_rate": 1.6464634146341463e-05, "loss": 0.0606, "step": 43485 }, { "epoch": 0.5303658536585366, "grad_norm": 0.5544785261154175, "learning_rate": 1.6464227642276425e-05, "loss": 0.049, "step": 43490 }, { "epoch": 0.5304268292682927, "grad_norm": 0.43368005752563477, "learning_rate": 1.6463821138211383e-05, "loss": 0.0541, "step": 43495 }, { "epoch": 0.5304878048780488, "grad_norm": 0.2913186550140381, "learning_rate": 1.646341463414634e-05, "loss": 0.0713, "step": 43500 }, { "epoch": 0.5305487804878048, "grad_norm": 1.0243546962738037, "learning_rate": 1.6463008130081303e-05, "loss": 0.0672, "step": 43505 }, { "epoch": 0.530609756097561, "grad_norm": 0.6716928482055664, "learning_rate": 1.646260162601626e-05, "loss": 0.073, "step": 43510 }, { "epoch": 0.530670731707317, "grad_norm": 1.420491337776184, "learning_rate": 1.6462195121951222e-05, "loss": 0.0526, "step": 43515 }, { "epoch": 0.5307317073170732, "grad_norm": 0.47760847210884094, "learning_rate": 1.646178861788618e-05, "loss": 0.0667, "step": 43520 }, { "epoch": 0.5307926829268292, "grad_norm": 0.5167302489280701, "learning_rate": 1.6461382113821142e-05, "loss": 0.0695, "step": 43525 }, { "epoch": 0.5308536585365854, "grad_norm": 0.8571386337280273, "learning_rate": 1.6460975609756097e-05, "loss": 0.0744, "step": 43530 }, { "epoch": 0.5309146341463414, "grad_norm": 0.3114028573036194, "learning_rate": 1.6460569105691058e-05, "loss": 0.0694, "step": 43535 }, { "epoch": 0.5309756097560976, "grad_norm": 0.32163524627685547, "learning_rate": 1.6460162601626016e-05, "loss": 0.0366, "step": 43540 }, { "epoch": 0.5310365853658536, "grad_norm": 0.820239245891571, "learning_rate": 1.6459756097560978e-05, "loss": 0.0735, "step": 43545 }, { "epoch": 0.5310975609756098, "grad_norm": 0.6619043350219727, "learning_rate": 1.6459349593495936e-05, "loss": 0.0897, "step": 43550 }, { "epoch": 0.5311585365853658, "grad_norm": 1.9674407243728638, "learning_rate": 1.6458943089430898e-05, "loss": 0.0723, "step": 43555 }, { "epoch": 0.531219512195122, "grad_norm": 0.8358851671218872, "learning_rate": 1.6458536585365856e-05, "loss": 0.045, "step": 43560 }, { "epoch": 0.531280487804878, "grad_norm": 0.45518946647644043, "learning_rate": 1.6458130081300814e-05, "loss": 0.0551, "step": 43565 }, { "epoch": 0.5313414634146342, "grad_norm": 0.697383463382721, "learning_rate": 1.6457723577235772e-05, "loss": 0.0479, "step": 43570 }, { "epoch": 0.5314024390243902, "grad_norm": 0.54714035987854, "learning_rate": 1.6457317073170733e-05, "loss": 0.068, "step": 43575 }, { "epoch": 0.5314634146341464, "grad_norm": 0.6018269658088684, "learning_rate": 1.645691056910569e-05, "loss": 0.0347, "step": 43580 }, { "epoch": 0.5315243902439024, "grad_norm": 0.7131730318069458, "learning_rate": 1.6456504065040653e-05, "loss": 0.0609, "step": 43585 }, { "epoch": 0.5315853658536586, "grad_norm": 0.7187107801437378, "learning_rate": 1.645609756097561e-05, "loss": 0.0425, "step": 43590 }, { "epoch": 0.5316463414634146, "grad_norm": 0.7312042117118835, "learning_rate": 1.645569105691057e-05, "loss": 0.0463, "step": 43595 }, { "epoch": 0.5317073170731708, "grad_norm": 0.5982590317726135, "learning_rate": 1.645528455284553e-05, "loss": 0.0418, "step": 43600 }, { "epoch": 0.5317682926829268, "grad_norm": 0.5354866981506348, "learning_rate": 1.645487804878049e-05, "loss": 0.0723, "step": 43605 }, { "epoch": 0.531829268292683, "grad_norm": 0.5487141609191895, "learning_rate": 1.645447154471545e-05, "loss": 0.0661, "step": 43610 }, { "epoch": 0.531890243902439, "grad_norm": 0.42666545510292053, "learning_rate": 1.645406504065041e-05, "loss": 0.0492, "step": 43615 }, { "epoch": 0.5319512195121952, "grad_norm": 0.7771586179733276, "learning_rate": 1.6453658536585367e-05, "loss": 0.0481, "step": 43620 }, { "epoch": 0.5320121951219512, "grad_norm": 0.899631142616272, "learning_rate": 1.6453252032520325e-05, "loss": 0.0715, "step": 43625 }, { "epoch": 0.5320731707317073, "grad_norm": 1.3103113174438477, "learning_rate": 1.6452845528455286e-05, "loss": 0.0615, "step": 43630 }, { "epoch": 0.5321341463414634, "grad_norm": 0.3702307343482971, "learning_rate": 1.6452439024390245e-05, "loss": 0.0362, "step": 43635 }, { "epoch": 0.5321951219512195, "grad_norm": 1.0897343158721924, "learning_rate": 1.6452032520325206e-05, "loss": 0.0967, "step": 43640 }, { "epoch": 0.5322560975609756, "grad_norm": 0.4823050796985626, "learning_rate": 1.6451626016260164e-05, "loss": 0.0609, "step": 43645 }, { "epoch": 0.5323170731707317, "grad_norm": 0.5613113045692444, "learning_rate": 1.6451219512195122e-05, "loss": 0.0618, "step": 43650 }, { "epoch": 0.5323780487804878, "grad_norm": 0.5111325979232788, "learning_rate": 1.645081300813008e-05, "loss": 0.0655, "step": 43655 }, { "epoch": 0.5324390243902439, "grad_norm": 0.699876606464386, "learning_rate": 1.6450406504065042e-05, "loss": 0.0649, "step": 43660 }, { "epoch": 0.5325, "grad_norm": 0.7896037101745605, "learning_rate": 1.645e-05, "loss": 0.0387, "step": 43665 }, { "epoch": 0.5325609756097561, "grad_norm": 0.7392635345458984, "learning_rate": 1.644959349593496e-05, "loss": 0.073, "step": 43670 }, { "epoch": 0.5326219512195122, "grad_norm": 0.599904477596283, "learning_rate": 1.644918699186992e-05, "loss": 0.099, "step": 43675 }, { "epoch": 0.5326829268292683, "grad_norm": 0.9647597074508667, "learning_rate": 1.6448780487804878e-05, "loss": 0.0799, "step": 43680 }, { "epoch": 0.5327439024390244, "grad_norm": 0.39605024456977844, "learning_rate": 1.644837398373984e-05, "loss": 0.0655, "step": 43685 }, { "epoch": 0.5328048780487805, "grad_norm": 0.8879417777061462, "learning_rate": 1.6447967479674797e-05, "loss": 0.0671, "step": 43690 }, { "epoch": 0.5328658536585366, "grad_norm": 0.4846009314060211, "learning_rate": 1.644756097560976e-05, "loss": 0.0472, "step": 43695 }, { "epoch": 0.5329268292682927, "grad_norm": 0.42556527256965637, "learning_rate": 1.6447154471544717e-05, "loss": 0.0657, "step": 43700 }, { "epoch": 0.5329878048780488, "grad_norm": 0.7408475875854492, "learning_rate": 1.644674796747968e-05, "loss": 0.0636, "step": 43705 }, { "epoch": 0.5330487804878049, "grad_norm": 0.6844901442527771, "learning_rate": 1.6446341463414633e-05, "loss": 0.0536, "step": 43710 }, { "epoch": 0.533109756097561, "grad_norm": 0.5794614553451538, "learning_rate": 1.6445934959349595e-05, "loss": 0.0582, "step": 43715 }, { "epoch": 0.5331707317073171, "grad_norm": 0.769241452217102, "learning_rate": 1.6445528455284553e-05, "loss": 0.0818, "step": 43720 }, { "epoch": 0.5332317073170731, "grad_norm": 1.9656339883804321, "learning_rate": 1.6445121951219515e-05, "loss": 0.0841, "step": 43725 }, { "epoch": 0.5332926829268293, "grad_norm": 0.708909273147583, "learning_rate": 1.6444715447154473e-05, "loss": 0.0604, "step": 43730 }, { "epoch": 0.5333536585365853, "grad_norm": 0.8603413701057434, "learning_rate": 1.6444308943089434e-05, "loss": 0.0499, "step": 43735 }, { "epoch": 0.5334146341463415, "grad_norm": 0.726778507232666, "learning_rate": 1.6443902439024392e-05, "loss": 0.0808, "step": 43740 }, { "epoch": 0.5334756097560975, "grad_norm": 0.49752289056777954, "learning_rate": 1.644349593495935e-05, "loss": 0.08, "step": 43745 }, { "epoch": 0.5335365853658537, "grad_norm": 0.7311280965805054, "learning_rate": 1.644308943089431e-05, "loss": 0.0784, "step": 43750 }, { "epoch": 0.5335975609756097, "grad_norm": 0.7840813994407654, "learning_rate": 1.644268292682927e-05, "loss": 0.0671, "step": 43755 }, { "epoch": 0.5336585365853659, "grad_norm": 0.5492636561393738, "learning_rate": 1.6442276422764228e-05, "loss": 0.0572, "step": 43760 }, { "epoch": 0.5337195121951219, "grad_norm": 0.8632778525352478, "learning_rate": 1.644186991869919e-05, "loss": 0.0918, "step": 43765 }, { "epoch": 0.5337804878048781, "grad_norm": 1.3747612237930298, "learning_rate": 1.6441463414634148e-05, "loss": 0.0563, "step": 43770 }, { "epoch": 0.5338414634146341, "grad_norm": 0.7946979999542236, "learning_rate": 1.6441056910569106e-05, "loss": 0.0621, "step": 43775 }, { "epoch": 0.5339024390243903, "grad_norm": 0.37904325127601624, "learning_rate": 1.6440650406504067e-05, "loss": 0.0502, "step": 43780 }, { "epoch": 0.5339634146341463, "grad_norm": 0.3457595109939575, "learning_rate": 1.6440243902439026e-05, "loss": 0.058, "step": 43785 }, { "epoch": 0.5340243902439025, "grad_norm": 0.7874916195869446, "learning_rate": 1.6439837398373987e-05, "loss": 0.0515, "step": 43790 }, { "epoch": 0.5340853658536585, "grad_norm": 0.8293921947479248, "learning_rate": 1.6439430894308945e-05, "loss": 0.0944, "step": 43795 }, { "epoch": 0.5341463414634147, "grad_norm": 0.684783935546875, "learning_rate": 1.6439024390243903e-05, "loss": 0.0417, "step": 43800 }, { "epoch": 0.5342073170731707, "grad_norm": 0.7984248995780945, "learning_rate": 1.643861788617886e-05, "loss": 0.069, "step": 43805 }, { "epoch": 0.5342682926829269, "grad_norm": 0.47281980514526367, "learning_rate": 1.6438211382113823e-05, "loss": 0.0459, "step": 43810 }, { "epoch": 0.5343292682926829, "grad_norm": 0.46909448504447937, "learning_rate": 1.643780487804878e-05, "loss": 0.0436, "step": 43815 }, { "epoch": 0.534390243902439, "grad_norm": 0.3234117031097412, "learning_rate": 1.6437398373983743e-05, "loss": 0.063, "step": 43820 }, { "epoch": 0.5344512195121951, "grad_norm": 0.6764292120933533, "learning_rate": 1.64369918699187e-05, "loss": 0.0958, "step": 43825 }, { "epoch": 0.5345121951219513, "grad_norm": 0.6044519543647766, "learning_rate": 1.643658536585366e-05, "loss": 0.0604, "step": 43830 }, { "epoch": 0.5345731707317073, "grad_norm": 0.6484162211418152, "learning_rate": 1.6436178861788617e-05, "loss": 0.081, "step": 43835 }, { "epoch": 0.5346341463414634, "grad_norm": 0.5122577548027039, "learning_rate": 1.643577235772358e-05, "loss": 0.0688, "step": 43840 }, { "epoch": 0.5346951219512195, "grad_norm": 0.5555042624473572, "learning_rate": 1.6435365853658537e-05, "loss": 0.0745, "step": 43845 }, { "epoch": 0.5347560975609756, "grad_norm": 1.1191200017929077, "learning_rate": 1.6434959349593498e-05, "loss": 0.0771, "step": 43850 }, { "epoch": 0.5348170731707317, "grad_norm": 0.6564549803733826, "learning_rate": 1.6434552845528456e-05, "loss": 0.0576, "step": 43855 }, { "epoch": 0.5348780487804878, "grad_norm": 0.6976713538169861, "learning_rate": 1.6434146341463414e-05, "loss": 0.0618, "step": 43860 }, { "epoch": 0.5349390243902439, "grad_norm": 1.2734979391098022, "learning_rate": 1.6433739837398376e-05, "loss": 0.083, "step": 43865 }, { "epoch": 0.535, "grad_norm": 0.48306477069854736, "learning_rate": 1.6433333333333334e-05, "loss": 0.0507, "step": 43870 }, { "epoch": 0.5350609756097561, "grad_norm": 0.433958500623703, "learning_rate": 1.6432926829268296e-05, "loss": 0.0654, "step": 43875 }, { "epoch": 0.5351219512195122, "grad_norm": 0.4533328413963318, "learning_rate": 1.6432520325203254e-05, "loss": 0.0515, "step": 43880 }, { "epoch": 0.5351829268292683, "grad_norm": 0.5278892517089844, "learning_rate": 1.6432113821138215e-05, "loss": 0.0466, "step": 43885 }, { "epoch": 0.5352439024390244, "grad_norm": 1.2563201189041138, "learning_rate": 1.643170731707317e-05, "loss": 0.0947, "step": 43890 }, { "epoch": 0.5353048780487805, "grad_norm": 0.46957990527153015, "learning_rate": 1.643130081300813e-05, "loss": 0.0665, "step": 43895 }, { "epoch": 0.5353658536585366, "grad_norm": 0.3982454240322113, "learning_rate": 1.643089430894309e-05, "loss": 0.0916, "step": 43900 }, { "epoch": 0.5354268292682927, "grad_norm": 0.8952479362487793, "learning_rate": 1.643048780487805e-05, "loss": 0.0695, "step": 43905 }, { "epoch": 0.5354878048780488, "grad_norm": 0.6432874202728271, "learning_rate": 1.643008130081301e-05, "loss": 0.0809, "step": 43910 }, { "epoch": 0.5355487804878049, "grad_norm": 0.36607998609542847, "learning_rate": 1.642967479674797e-05, "loss": 0.0409, "step": 43915 }, { "epoch": 0.535609756097561, "grad_norm": 0.5023044347763062, "learning_rate": 1.6429268292682926e-05, "loss": 0.0643, "step": 43920 }, { "epoch": 0.535670731707317, "grad_norm": 1.2071876525878906, "learning_rate": 1.6428861788617887e-05, "loss": 0.0651, "step": 43925 }, { "epoch": 0.5357317073170732, "grad_norm": 0.5064055919647217, "learning_rate": 1.6428455284552845e-05, "loss": 0.0555, "step": 43930 }, { "epoch": 0.5357926829268292, "grad_norm": 0.6580193638801575, "learning_rate": 1.6428048780487807e-05, "loss": 0.0676, "step": 43935 }, { "epoch": 0.5358536585365854, "grad_norm": 0.5841782093048096, "learning_rate": 1.6427642276422765e-05, "loss": 0.0848, "step": 43940 }, { "epoch": 0.5359146341463414, "grad_norm": 0.9488111138343811, "learning_rate": 1.6427235772357726e-05, "loss": 0.0665, "step": 43945 }, { "epoch": 0.5359756097560976, "grad_norm": 0.38455960154533386, "learning_rate": 1.6426829268292684e-05, "loss": 0.0463, "step": 43950 }, { "epoch": 0.5360365853658536, "grad_norm": 0.5070599913597107, "learning_rate": 1.6426422764227643e-05, "loss": 0.0846, "step": 43955 }, { "epoch": 0.5360975609756098, "grad_norm": 1.3678029775619507, "learning_rate": 1.6426016260162604e-05, "loss": 0.0957, "step": 43960 }, { "epoch": 0.5361585365853658, "grad_norm": 0.589384913444519, "learning_rate": 1.6425609756097562e-05, "loss": 0.0391, "step": 43965 }, { "epoch": 0.536219512195122, "grad_norm": 0.40347200632095337, "learning_rate": 1.6425203252032524e-05, "loss": 0.0523, "step": 43970 }, { "epoch": 0.536280487804878, "grad_norm": 0.576222836971283, "learning_rate": 1.6424796747967482e-05, "loss": 0.0561, "step": 43975 }, { "epoch": 0.5363414634146342, "grad_norm": 0.9132544994354248, "learning_rate": 1.642439024390244e-05, "loss": 0.0466, "step": 43980 }, { "epoch": 0.5364024390243902, "grad_norm": 0.5895966291427612, "learning_rate": 1.6423983739837398e-05, "loss": 0.0527, "step": 43985 }, { "epoch": 0.5364634146341464, "grad_norm": 0.45305678248405457, "learning_rate": 1.642357723577236e-05, "loss": 0.0511, "step": 43990 }, { "epoch": 0.5365243902439024, "grad_norm": 0.730884313583374, "learning_rate": 1.6423170731707318e-05, "loss": 0.0651, "step": 43995 }, { "epoch": 0.5365853658536586, "grad_norm": 0.5910282731056213, "learning_rate": 1.642276422764228e-05, "loss": 0.0462, "step": 44000 }, { "epoch": 0.5366463414634146, "grad_norm": 0.43741992115974426, "learning_rate": 1.6422357723577237e-05, "loss": 0.0476, "step": 44005 }, { "epoch": 0.5367073170731708, "grad_norm": 0.8047780990600586, "learning_rate": 1.6421951219512196e-05, "loss": 0.0451, "step": 44010 }, { "epoch": 0.5367682926829268, "grad_norm": 0.545612096786499, "learning_rate": 1.6421544715447154e-05, "loss": 0.0644, "step": 44015 }, { "epoch": 0.536829268292683, "grad_norm": 0.730888307094574, "learning_rate": 1.6421138211382115e-05, "loss": 0.0597, "step": 44020 }, { "epoch": 0.536890243902439, "grad_norm": 0.4231419563293457, "learning_rate": 1.6420731707317073e-05, "loss": 0.0694, "step": 44025 }, { "epoch": 0.5369512195121952, "grad_norm": 0.7957352995872498, "learning_rate": 1.6420325203252035e-05, "loss": 0.0739, "step": 44030 }, { "epoch": 0.5370121951219512, "grad_norm": 1.1548432111740112, "learning_rate": 1.6419918699186993e-05, "loss": 0.0585, "step": 44035 }, { "epoch": 0.5370731707317074, "grad_norm": 0.6518212556838989, "learning_rate": 1.641951219512195e-05, "loss": 0.0601, "step": 44040 }, { "epoch": 0.5371341463414634, "grad_norm": 0.7438656687736511, "learning_rate": 1.6419105691056913e-05, "loss": 0.0521, "step": 44045 }, { "epoch": 0.5371951219512195, "grad_norm": 0.56672203540802, "learning_rate": 1.641869918699187e-05, "loss": 0.0829, "step": 44050 }, { "epoch": 0.5372560975609756, "grad_norm": 1.19472074508667, "learning_rate": 1.6418292682926832e-05, "loss": 0.1219, "step": 44055 }, { "epoch": 0.5373170731707317, "grad_norm": 0.6363011598587036, "learning_rate": 1.641788617886179e-05, "loss": 0.0532, "step": 44060 }, { "epoch": 0.5373780487804878, "grad_norm": 0.3899357318878174, "learning_rate": 1.641747967479675e-05, "loss": 0.0395, "step": 44065 }, { "epoch": 0.5374390243902439, "grad_norm": 0.5453868508338928, "learning_rate": 1.6417073170731707e-05, "loss": 0.0869, "step": 44070 }, { "epoch": 0.5375, "grad_norm": 0.5401907563209534, "learning_rate": 1.6416666666666668e-05, "loss": 0.0795, "step": 44075 }, { "epoch": 0.5375609756097561, "grad_norm": 2.7552754878997803, "learning_rate": 1.6416260162601626e-05, "loss": 0.0639, "step": 44080 }, { "epoch": 0.5376219512195122, "grad_norm": 0.48786357045173645, "learning_rate": 1.6415853658536588e-05, "loss": 0.0697, "step": 44085 }, { "epoch": 0.5376829268292683, "grad_norm": 0.6743966937065125, "learning_rate": 1.6415447154471546e-05, "loss": 0.0701, "step": 44090 }, { "epoch": 0.5377439024390244, "grad_norm": 0.7756728529930115, "learning_rate": 1.6415040650406507e-05, "loss": 0.0673, "step": 44095 }, { "epoch": 0.5378048780487805, "grad_norm": 0.41726651787757874, "learning_rate": 1.6414634146341462e-05, "loss": 0.0597, "step": 44100 }, { "epoch": 0.5378658536585366, "grad_norm": 0.977504312992096, "learning_rate": 1.6414227642276424e-05, "loss": 0.0481, "step": 44105 }, { "epoch": 0.5379268292682927, "grad_norm": 0.5460652709007263, "learning_rate": 1.6413821138211382e-05, "loss": 0.0541, "step": 44110 }, { "epoch": 0.5379878048780488, "grad_norm": 1.0746716260910034, "learning_rate": 1.6413414634146343e-05, "loss": 0.0517, "step": 44115 }, { "epoch": 0.5380487804878049, "grad_norm": 0.5638279914855957, "learning_rate": 1.64130081300813e-05, "loss": 0.0753, "step": 44120 }, { "epoch": 0.538109756097561, "grad_norm": 0.8192041516304016, "learning_rate": 1.6412601626016263e-05, "loss": 0.0661, "step": 44125 }, { "epoch": 0.5381707317073171, "grad_norm": 0.7634279131889343, "learning_rate": 1.641219512195122e-05, "loss": 0.044, "step": 44130 }, { "epoch": 0.5382317073170731, "grad_norm": 0.9215003252029419, "learning_rate": 1.641178861788618e-05, "loss": 0.0986, "step": 44135 }, { "epoch": 0.5382926829268293, "grad_norm": 0.19776059687137604, "learning_rate": 1.641138211382114e-05, "loss": 0.0641, "step": 44140 }, { "epoch": 0.5383536585365853, "grad_norm": 6.690819263458252, "learning_rate": 1.64109756097561e-05, "loss": 0.1011, "step": 44145 }, { "epoch": 0.5384146341463415, "grad_norm": 0.776862621307373, "learning_rate": 1.641056910569106e-05, "loss": 0.0466, "step": 44150 }, { "epoch": 0.5384756097560975, "grad_norm": 0.40566036105155945, "learning_rate": 1.641016260162602e-05, "loss": 0.0839, "step": 44155 }, { "epoch": 0.5385365853658537, "grad_norm": 0.6582437753677368, "learning_rate": 1.6409756097560977e-05, "loss": 0.0636, "step": 44160 }, { "epoch": 0.5385975609756097, "grad_norm": 0.44507351517677307, "learning_rate": 1.6409349593495935e-05, "loss": 0.0609, "step": 44165 }, { "epoch": 0.5386585365853659, "grad_norm": 0.4876091182231903, "learning_rate": 1.6408943089430896e-05, "loss": 0.0363, "step": 44170 }, { "epoch": 0.5387195121951219, "grad_norm": 0.6286451816558838, "learning_rate": 1.6408536585365854e-05, "loss": 0.0663, "step": 44175 }, { "epoch": 0.5387804878048781, "grad_norm": 0.6256114840507507, "learning_rate": 1.6408130081300816e-05, "loss": 0.0639, "step": 44180 }, { "epoch": 0.5388414634146341, "grad_norm": 0.6998592615127563, "learning_rate": 1.6407723577235774e-05, "loss": 0.0774, "step": 44185 }, { "epoch": 0.5389024390243903, "grad_norm": 0.5278944373130798, "learning_rate": 1.6407317073170732e-05, "loss": 0.0579, "step": 44190 }, { "epoch": 0.5389634146341463, "grad_norm": 0.6087114214897156, "learning_rate": 1.640691056910569e-05, "loss": 0.0514, "step": 44195 }, { "epoch": 0.5390243902439025, "grad_norm": 0.32693514227867126, "learning_rate": 1.6406504065040652e-05, "loss": 0.0513, "step": 44200 }, { "epoch": 0.5390853658536585, "grad_norm": 0.35610345005989075, "learning_rate": 1.640609756097561e-05, "loss": 0.0452, "step": 44205 }, { "epoch": 0.5391463414634147, "grad_norm": 1.1525412797927856, "learning_rate": 1.640569105691057e-05, "loss": 0.0839, "step": 44210 }, { "epoch": 0.5392073170731707, "grad_norm": 0.753018319606781, "learning_rate": 1.640528455284553e-05, "loss": 0.064, "step": 44215 }, { "epoch": 0.5392682926829269, "grad_norm": 0.7495919466018677, "learning_rate": 1.6404878048780488e-05, "loss": 0.0756, "step": 44220 }, { "epoch": 0.5393292682926829, "grad_norm": 0.8478795289993286, "learning_rate": 1.640447154471545e-05, "loss": 0.077, "step": 44225 }, { "epoch": 0.5393902439024391, "grad_norm": 0.46836620569229126, "learning_rate": 1.6404065040650407e-05, "loss": 0.0758, "step": 44230 }, { "epoch": 0.5394512195121951, "grad_norm": 0.36348289251327515, "learning_rate": 1.640365853658537e-05, "loss": 0.0542, "step": 44235 }, { "epoch": 0.5395121951219513, "grad_norm": 0.5702893733978271, "learning_rate": 1.6403252032520327e-05, "loss": 0.0497, "step": 44240 }, { "epoch": 0.5395731707317073, "grad_norm": 0.8738579750061035, "learning_rate": 1.6402845528455285e-05, "loss": 0.1, "step": 44245 }, { "epoch": 0.5396341463414634, "grad_norm": 0.31907007098197937, "learning_rate": 1.6402439024390243e-05, "loss": 0.0551, "step": 44250 }, { "epoch": 0.5396951219512195, "grad_norm": 0.516572892665863, "learning_rate": 1.6402032520325205e-05, "loss": 0.0766, "step": 44255 }, { "epoch": 0.5397560975609756, "grad_norm": 2.618537187576294, "learning_rate": 1.6401626016260163e-05, "loss": 0.0614, "step": 44260 }, { "epoch": 0.5398170731707317, "grad_norm": 0.6089523434638977, "learning_rate": 1.6401219512195124e-05, "loss": 0.0523, "step": 44265 }, { "epoch": 0.5398780487804878, "grad_norm": 0.6311536431312561, "learning_rate": 1.6400813008130083e-05, "loss": 0.0628, "step": 44270 }, { "epoch": 0.5399390243902439, "grad_norm": 0.6452584266662598, "learning_rate": 1.6400406504065044e-05, "loss": 0.0923, "step": 44275 }, { "epoch": 0.54, "grad_norm": 0.41934695839881897, "learning_rate": 1.64e-05, "loss": 0.0437, "step": 44280 }, { "epoch": 0.5400609756097561, "grad_norm": 0.9592723846435547, "learning_rate": 1.639959349593496e-05, "loss": 0.0905, "step": 44285 }, { "epoch": 0.5401219512195122, "grad_norm": 0.6030157804489136, "learning_rate": 1.639918699186992e-05, "loss": 0.043, "step": 44290 }, { "epoch": 0.5401829268292683, "grad_norm": 0.7191997170448303, "learning_rate": 1.639878048780488e-05, "loss": 0.078, "step": 44295 }, { "epoch": 0.5402439024390244, "grad_norm": 1.5371845960617065, "learning_rate": 1.6398373983739838e-05, "loss": 0.0792, "step": 44300 }, { "epoch": 0.5403048780487805, "grad_norm": 0.8490865230560303, "learning_rate": 1.63979674796748e-05, "loss": 0.0539, "step": 44305 }, { "epoch": 0.5403658536585366, "grad_norm": 0.5587763786315918, "learning_rate": 1.6397560975609758e-05, "loss": 0.054, "step": 44310 }, { "epoch": 0.5404268292682927, "grad_norm": 0.45372194051742554, "learning_rate": 1.6397154471544716e-05, "loss": 0.043, "step": 44315 }, { "epoch": 0.5404878048780488, "grad_norm": 5.819982051849365, "learning_rate": 1.6396747967479677e-05, "loss": 0.0746, "step": 44320 }, { "epoch": 0.5405487804878049, "grad_norm": 0.45180320739746094, "learning_rate": 1.6396341463414636e-05, "loss": 0.0672, "step": 44325 }, { "epoch": 0.540609756097561, "grad_norm": 0.5871164798736572, "learning_rate": 1.6395934959349594e-05, "loss": 0.0788, "step": 44330 }, { "epoch": 0.540670731707317, "grad_norm": 0.618920087814331, "learning_rate": 1.6395528455284555e-05, "loss": 0.0591, "step": 44335 }, { "epoch": 0.5407317073170732, "grad_norm": 0.6643579006195068, "learning_rate": 1.6395121951219513e-05, "loss": 0.0613, "step": 44340 }, { "epoch": 0.5407926829268292, "grad_norm": 0.7924401164054871, "learning_rate": 1.639471544715447e-05, "loss": 0.0584, "step": 44345 }, { "epoch": 0.5408536585365854, "grad_norm": 0.8628490567207336, "learning_rate": 1.6394308943089433e-05, "loss": 0.0516, "step": 44350 }, { "epoch": 0.5409146341463414, "grad_norm": 0.605998158454895, "learning_rate": 1.639390243902439e-05, "loss": 0.1083, "step": 44355 }, { "epoch": 0.5409756097560976, "grad_norm": 0.6471216082572937, "learning_rate": 1.6393495934959353e-05, "loss": 0.0698, "step": 44360 }, { "epoch": 0.5410365853658536, "grad_norm": 0.5423495173454285, "learning_rate": 1.639308943089431e-05, "loss": 0.0432, "step": 44365 }, { "epoch": 0.5410975609756098, "grad_norm": 0.2245909869670868, "learning_rate": 1.639268292682927e-05, "loss": 0.055, "step": 44370 }, { "epoch": 0.5411585365853658, "grad_norm": 1.0526463985443115, "learning_rate": 1.6392276422764227e-05, "loss": 0.0932, "step": 44375 }, { "epoch": 0.541219512195122, "grad_norm": 0.673409640789032, "learning_rate": 1.639186991869919e-05, "loss": 0.068, "step": 44380 }, { "epoch": 0.541280487804878, "grad_norm": 0.44707322120666504, "learning_rate": 1.6391463414634147e-05, "loss": 0.0455, "step": 44385 }, { "epoch": 0.5413414634146342, "grad_norm": 1.0662938356399536, "learning_rate": 1.6391056910569108e-05, "loss": 0.0914, "step": 44390 }, { "epoch": 0.5414024390243902, "grad_norm": 0.39952540397644043, "learning_rate": 1.6390650406504066e-05, "loss": 0.0552, "step": 44395 }, { "epoch": 0.5414634146341464, "grad_norm": 0.5852524638175964, "learning_rate": 1.6390243902439024e-05, "loss": 0.0748, "step": 44400 }, { "epoch": 0.5415243902439024, "grad_norm": 0.36354175209999084, "learning_rate": 1.6389837398373986e-05, "loss": 0.1085, "step": 44405 }, { "epoch": 0.5415853658536586, "grad_norm": 0.7875609397888184, "learning_rate": 1.6389430894308944e-05, "loss": 0.0891, "step": 44410 }, { "epoch": 0.5416463414634146, "grad_norm": 0.29047518968582153, "learning_rate": 1.6389024390243906e-05, "loss": 0.0694, "step": 44415 }, { "epoch": 0.5417073170731708, "grad_norm": 2.015204429626465, "learning_rate": 1.6388617886178864e-05, "loss": 0.072, "step": 44420 }, { "epoch": 0.5417682926829268, "grad_norm": 0.4496409296989441, "learning_rate": 1.6388211382113822e-05, "loss": 0.0649, "step": 44425 }, { "epoch": 0.541829268292683, "grad_norm": 0.990135133266449, "learning_rate": 1.638780487804878e-05, "loss": 0.0805, "step": 44430 }, { "epoch": 0.541890243902439, "grad_norm": 0.8058432936668396, "learning_rate": 1.638739837398374e-05, "loss": 0.0862, "step": 44435 }, { "epoch": 0.5419512195121952, "grad_norm": 0.47672706842422485, "learning_rate": 1.63869918699187e-05, "loss": 0.0524, "step": 44440 }, { "epoch": 0.5420121951219512, "grad_norm": 0.6481702923774719, "learning_rate": 1.638658536585366e-05, "loss": 0.0547, "step": 44445 }, { "epoch": 0.5420731707317074, "grad_norm": 0.59629225730896, "learning_rate": 1.638617886178862e-05, "loss": 0.0617, "step": 44450 }, { "epoch": 0.5421341463414634, "grad_norm": 1.0591013431549072, "learning_rate": 1.638577235772358e-05, "loss": 0.0666, "step": 44455 }, { "epoch": 0.5421951219512195, "grad_norm": 0.9631003737449646, "learning_rate": 1.6385365853658535e-05, "loss": 0.06, "step": 44460 }, { "epoch": 0.5422560975609756, "grad_norm": 1.378190279006958, "learning_rate": 1.6384959349593497e-05, "loss": 0.0573, "step": 44465 }, { "epoch": 0.5423170731707317, "grad_norm": 0.8385246396064758, "learning_rate": 1.6384552845528455e-05, "loss": 0.097, "step": 44470 }, { "epoch": 0.5423780487804878, "grad_norm": 0.47346919775009155, "learning_rate": 1.6384146341463417e-05, "loss": 0.0494, "step": 44475 }, { "epoch": 0.5424390243902439, "grad_norm": 0.6198470592498779, "learning_rate": 1.6383739837398375e-05, "loss": 0.067, "step": 44480 }, { "epoch": 0.5425, "grad_norm": 0.6452968120574951, "learning_rate": 1.6383333333333336e-05, "loss": 0.0777, "step": 44485 }, { "epoch": 0.5425609756097561, "grad_norm": 0.6967403888702393, "learning_rate": 1.6382926829268294e-05, "loss": 0.0745, "step": 44490 }, { "epoch": 0.5426219512195122, "grad_norm": 0.7466505169868469, "learning_rate": 1.6382520325203253e-05, "loss": 0.073, "step": 44495 }, { "epoch": 0.5426829268292683, "grad_norm": 0.8048936128616333, "learning_rate": 1.6382113821138214e-05, "loss": 0.0941, "step": 44500 }, { "epoch": 0.5427439024390244, "grad_norm": 1.0431987047195435, "learning_rate": 1.6381707317073172e-05, "loss": 0.0674, "step": 44505 }, { "epoch": 0.5428048780487805, "grad_norm": 0.6181329488754272, "learning_rate": 1.638130081300813e-05, "loss": 0.0934, "step": 44510 }, { "epoch": 0.5428658536585366, "grad_norm": 0.7687761783599854, "learning_rate": 1.6380894308943092e-05, "loss": 0.0531, "step": 44515 }, { "epoch": 0.5429268292682927, "grad_norm": 0.40886926651000977, "learning_rate": 1.638048780487805e-05, "loss": 0.069, "step": 44520 }, { "epoch": 0.5429878048780488, "grad_norm": 0.7626078724861145, "learning_rate": 1.6380081300813008e-05, "loss": 0.0987, "step": 44525 }, { "epoch": 0.5430487804878049, "grad_norm": 0.518992006778717, "learning_rate": 1.637967479674797e-05, "loss": 0.0532, "step": 44530 }, { "epoch": 0.543109756097561, "grad_norm": 1.104120135307312, "learning_rate": 1.6379268292682928e-05, "loss": 0.0608, "step": 44535 }, { "epoch": 0.5431707317073171, "grad_norm": 0.4133797883987427, "learning_rate": 1.637886178861789e-05, "loss": 0.0495, "step": 44540 }, { "epoch": 0.5432317073170732, "grad_norm": 0.5836000442504883, "learning_rate": 1.6378455284552847e-05, "loss": 0.0888, "step": 44545 }, { "epoch": 0.5432926829268293, "grad_norm": 0.7386889457702637, "learning_rate": 1.6378048780487805e-05, "loss": 0.0553, "step": 44550 }, { "epoch": 0.5433536585365853, "grad_norm": 0.5865590572357178, "learning_rate": 1.6377642276422764e-05, "loss": 0.0509, "step": 44555 }, { "epoch": 0.5434146341463415, "grad_norm": 0.21847228705883026, "learning_rate": 1.6377235772357725e-05, "loss": 0.0464, "step": 44560 }, { "epoch": 0.5434756097560975, "grad_norm": 0.7733657360076904, "learning_rate": 1.6376829268292683e-05, "loss": 0.0488, "step": 44565 }, { "epoch": 0.5435365853658537, "grad_norm": 0.24086976051330566, "learning_rate": 1.6376422764227645e-05, "loss": 0.0462, "step": 44570 }, { "epoch": 0.5435975609756097, "grad_norm": 0.8924050331115723, "learning_rate": 1.6376016260162603e-05, "loss": 0.0525, "step": 44575 }, { "epoch": 0.5436585365853659, "grad_norm": 0.6523410677909851, "learning_rate": 1.637560975609756e-05, "loss": 0.0488, "step": 44580 }, { "epoch": 0.5437195121951219, "grad_norm": 1.0840719938278198, "learning_rate": 1.6375203252032523e-05, "loss": 0.0584, "step": 44585 }, { "epoch": 0.5437804878048781, "grad_norm": 0.5719249844551086, "learning_rate": 1.637479674796748e-05, "loss": 0.0562, "step": 44590 }, { "epoch": 0.5438414634146341, "grad_norm": 0.6787821054458618, "learning_rate": 1.637439024390244e-05, "loss": 0.0716, "step": 44595 }, { "epoch": 0.5439024390243903, "grad_norm": 0.4514586627483368, "learning_rate": 1.63739837398374e-05, "loss": 0.0499, "step": 44600 }, { "epoch": 0.5439634146341463, "grad_norm": 0.511603832244873, "learning_rate": 1.637357723577236e-05, "loss": 0.0453, "step": 44605 }, { "epoch": 0.5440243902439025, "grad_norm": 0.741224467754364, "learning_rate": 1.6373170731707317e-05, "loss": 0.0689, "step": 44610 }, { "epoch": 0.5440853658536585, "grad_norm": 0.39659950137138367, "learning_rate": 1.6372764227642278e-05, "loss": 0.0657, "step": 44615 }, { "epoch": 0.5441463414634147, "grad_norm": 0.591657280921936, "learning_rate": 1.6372357723577236e-05, "loss": 0.0808, "step": 44620 }, { "epoch": 0.5442073170731707, "grad_norm": 1.7492471933364868, "learning_rate": 1.6371951219512198e-05, "loss": 0.0847, "step": 44625 }, { "epoch": 0.5442682926829269, "grad_norm": 0.40708082914352417, "learning_rate": 1.6371544715447156e-05, "loss": 0.0705, "step": 44630 }, { "epoch": 0.5443292682926829, "grad_norm": 0.4433607757091522, "learning_rate": 1.6371138211382117e-05, "loss": 0.0344, "step": 44635 }, { "epoch": 0.5443902439024391, "grad_norm": 1.1006600856781006, "learning_rate": 1.6370731707317072e-05, "loss": 0.081, "step": 44640 }, { "epoch": 0.5444512195121951, "grad_norm": 0.7153940796852112, "learning_rate": 1.6370325203252034e-05, "loss": 0.0445, "step": 44645 }, { "epoch": 0.5445121951219513, "grad_norm": 0.7975109815597534, "learning_rate": 1.6369918699186992e-05, "loss": 0.0567, "step": 44650 }, { "epoch": 0.5445731707317073, "grad_norm": 0.8837456107139587, "learning_rate": 1.6369512195121953e-05, "loss": 0.0837, "step": 44655 }, { "epoch": 0.5446341463414635, "grad_norm": 0.4841192066669464, "learning_rate": 1.636910569105691e-05, "loss": 0.0581, "step": 44660 }, { "epoch": 0.5446951219512195, "grad_norm": 0.6928524374961853, "learning_rate": 1.6368699186991873e-05, "loss": 0.0706, "step": 44665 }, { "epoch": 0.5447560975609756, "grad_norm": 0.7061970829963684, "learning_rate": 1.636829268292683e-05, "loss": 0.0657, "step": 44670 }, { "epoch": 0.5448170731707317, "grad_norm": 0.6983273029327393, "learning_rate": 1.636788617886179e-05, "loss": 0.0838, "step": 44675 }, { "epoch": 0.5448780487804878, "grad_norm": 0.5028803944587708, "learning_rate": 1.636747967479675e-05, "loss": 0.0597, "step": 44680 }, { "epoch": 0.5449390243902439, "grad_norm": 0.9716575741767883, "learning_rate": 1.636707317073171e-05, "loss": 0.1064, "step": 44685 }, { "epoch": 0.545, "grad_norm": 0.610450267791748, "learning_rate": 1.6366666666666667e-05, "loss": 0.0675, "step": 44690 }, { "epoch": 0.5450609756097561, "grad_norm": 1.5286731719970703, "learning_rate": 1.636626016260163e-05, "loss": 0.068, "step": 44695 }, { "epoch": 0.5451219512195122, "grad_norm": 0.39462029933929443, "learning_rate": 1.6365853658536587e-05, "loss": 0.0298, "step": 44700 }, { "epoch": 0.5451829268292683, "grad_norm": 0.40310025215148926, "learning_rate": 1.6365447154471545e-05, "loss": 0.0617, "step": 44705 }, { "epoch": 0.5452439024390244, "grad_norm": 0.5617519617080688, "learning_rate": 1.6365040650406506e-05, "loss": 0.0774, "step": 44710 }, { "epoch": 0.5453048780487805, "grad_norm": 0.7802795767784119, "learning_rate": 1.6364634146341464e-05, "loss": 0.0596, "step": 44715 }, { "epoch": 0.5453658536585366, "grad_norm": 0.5039435625076294, "learning_rate": 1.6364227642276426e-05, "loss": 0.0664, "step": 44720 }, { "epoch": 0.5454268292682927, "grad_norm": 0.5365234613418579, "learning_rate": 1.6363821138211384e-05, "loss": 0.0365, "step": 44725 }, { "epoch": 0.5454878048780488, "grad_norm": 0.524268627166748, "learning_rate": 1.6363414634146342e-05, "loss": 0.06, "step": 44730 }, { "epoch": 0.5455487804878049, "grad_norm": 0.8985359072685242, "learning_rate": 1.63630081300813e-05, "loss": 0.0801, "step": 44735 }, { "epoch": 0.545609756097561, "grad_norm": 0.25580117106437683, "learning_rate": 1.6362601626016262e-05, "loss": 0.0725, "step": 44740 }, { "epoch": 0.5456707317073171, "grad_norm": 0.5537813901901245, "learning_rate": 1.636219512195122e-05, "loss": 0.0608, "step": 44745 }, { "epoch": 0.5457317073170732, "grad_norm": 0.6241812705993652, "learning_rate": 1.636178861788618e-05, "loss": 0.0725, "step": 44750 }, { "epoch": 0.5457926829268293, "grad_norm": 0.472327321767807, "learning_rate": 1.636138211382114e-05, "loss": 0.0493, "step": 44755 }, { "epoch": 0.5458536585365854, "grad_norm": 0.5451228618621826, "learning_rate": 1.6360975609756098e-05, "loss": 0.0646, "step": 44760 }, { "epoch": 0.5459146341463414, "grad_norm": 0.8374300003051758, "learning_rate": 1.636056910569106e-05, "loss": 0.0685, "step": 44765 }, { "epoch": 0.5459756097560976, "grad_norm": 0.6262324452400208, "learning_rate": 1.6360162601626017e-05, "loss": 0.0854, "step": 44770 }, { "epoch": 0.5460365853658536, "grad_norm": 0.6182318329811096, "learning_rate": 1.6359756097560975e-05, "loss": 0.0687, "step": 44775 }, { "epoch": 0.5460975609756098, "grad_norm": 0.954224169254303, "learning_rate": 1.6359349593495937e-05, "loss": 0.0636, "step": 44780 }, { "epoch": 0.5461585365853658, "grad_norm": 0.8143828511238098, "learning_rate": 1.6358943089430895e-05, "loss": 0.0598, "step": 44785 }, { "epoch": 0.546219512195122, "grad_norm": 1.0079575777053833, "learning_rate": 1.6358536585365853e-05, "loss": 0.0872, "step": 44790 }, { "epoch": 0.546280487804878, "grad_norm": 0.5937708020210266, "learning_rate": 1.6358130081300815e-05, "loss": 0.0597, "step": 44795 }, { "epoch": 0.5463414634146342, "grad_norm": 0.35675889253616333, "learning_rate": 1.6357723577235773e-05, "loss": 0.0438, "step": 44800 }, { "epoch": 0.5464024390243902, "grad_norm": 0.7374950647354126, "learning_rate": 1.6357317073170734e-05, "loss": 0.0643, "step": 44805 }, { "epoch": 0.5464634146341464, "grad_norm": 0.48453330993652344, "learning_rate": 1.6356910569105693e-05, "loss": 0.0681, "step": 44810 }, { "epoch": 0.5465243902439024, "grad_norm": 1.0317031145095825, "learning_rate": 1.6356504065040654e-05, "loss": 0.0812, "step": 44815 }, { "epoch": 0.5465853658536586, "grad_norm": 0.8293786644935608, "learning_rate": 1.635609756097561e-05, "loss": 0.0822, "step": 44820 }, { "epoch": 0.5466463414634146, "grad_norm": 0.7705687880516052, "learning_rate": 1.635569105691057e-05, "loss": 0.0749, "step": 44825 }, { "epoch": 0.5467073170731708, "grad_norm": 0.351448118686676, "learning_rate": 1.635528455284553e-05, "loss": 0.054, "step": 44830 }, { "epoch": 0.5467682926829268, "grad_norm": 2.465939521789551, "learning_rate": 1.635487804878049e-05, "loss": 0.0648, "step": 44835 }, { "epoch": 0.546829268292683, "grad_norm": 0.572819709777832, "learning_rate": 1.6354471544715448e-05, "loss": 0.0529, "step": 44840 }, { "epoch": 0.546890243902439, "grad_norm": 0.8430984020233154, "learning_rate": 1.635406504065041e-05, "loss": 0.0486, "step": 44845 }, { "epoch": 0.5469512195121952, "grad_norm": 1.7006648778915405, "learning_rate": 1.6353658536585368e-05, "loss": 0.0485, "step": 44850 }, { "epoch": 0.5470121951219512, "grad_norm": 0.29564833641052246, "learning_rate": 1.6353252032520326e-05, "loss": 0.0739, "step": 44855 }, { "epoch": 0.5470731707317074, "grad_norm": 0.5937559008598328, "learning_rate": 1.6352845528455284e-05, "loss": 0.0555, "step": 44860 }, { "epoch": 0.5471341463414634, "grad_norm": 1.346206784248352, "learning_rate": 1.6352439024390245e-05, "loss": 0.0837, "step": 44865 }, { "epoch": 0.5471951219512196, "grad_norm": 1.124227523803711, "learning_rate": 1.6352032520325204e-05, "loss": 0.0733, "step": 44870 }, { "epoch": 0.5472560975609756, "grad_norm": 0.8849115371704102, "learning_rate": 1.6351626016260165e-05, "loss": 0.0504, "step": 44875 }, { "epoch": 0.5473170731707317, "grad_norm": 0.3393051028251648, "learning_rate": 1.6351219512195123e-05, "loss": 0.0692, "step": 44880 }, { "epoch": 0.5473780487804878, "grad_norm": 1.3214094638824463, "learning_rate": 1.635081300813008e-05, "loss": 0.0505, "step": 44885 }, { "epoch": 0.547439024390244, "grad_norm": 1.0430296659469604, "learning_rate": 1.6350406504065043e-05, "loss": 0.0951, "step": 44890 }, { "epoch": 0.5475, "grad_norm": 0.9796311855316162, "learning_rate": 1.635e-05, "loss": 0.0568, "step": 44895 }, { "epoch": 0.5475609756097561, "grad_norm": 0.6010939478874207, "learning_rate": 1.6349593495934963e-05, "loss": 0.0788, "step": 44900 }, { "epoch": 0.5476219512195122, "grad_norm": 0.6951375603675842, "learning_rate": 1.634918699186992e-05, "loss": 0.0643, "step": 44905 }, { "epoch": 0.5476829268292683, "grad_norm": 5.754316806793213, "learning_rate": 1.634878048780488e-05, "loss": 0.0746, "step": 44910 }, { "epoch": 0.5477439024390244, "grad_norm": 0.4638902544975281, "learning_rate": 1.6348373983739837e-05, "loss": 0.0943, "step": 44915 }, { "epoch": 0.5478048780487805, "grad_norm": 0.20170029997825623, "learning_rate": 1.63479674796748e-05, "loss": 0.0718, "step": 44920 }, { "epoch": 0.5478658536585366, "grad_norm": 0.2916235625743866, "learning_rate": 1.6347560975609757e-05, "loss": 0.0446, "step": 44925 }, { "epoch": 0.5479268292682927, "grad_norm": 0.9861912131309509, "learning_rate": 1.6347154471544718e-05, "loss": 0.0637, "step": 44930 }, { "epoch": 0.5479878048780488, "grad_norm": 0.7126482725143433, "learning_rate": 1.6346747967479676e-05, "loss": 0.0468, "step": 44935 }, { "epoch": 0.5480487804878049, "grad_norm": 0.6348497867584229, "learning_rate": 1.6346341463414634e-05, "loss": 0.056, "step": 44940 }, { "epoch": 0.548109756097561, "grad_norm": 0.4894762933254242, "learning_rate": 1.6345934959349596e-05, "loss": 0.0402, "step": 44945 }, { "epoch": 0.5481707317073171, "grad_norm": 0.7821049094200134, "learning_rate": 1.6345528455284554e-05, "loss": 0.0806, "step": 44950 }, { "epoch": 0.5482317073170732, "grad_norm": 0.3378177583217621, "learning_rate": 1.6345121951219512e-05, "loss": 0.0812, "step": 44955 }, { "epoch": 0.5482926829268293, "grad_norm": 0.3485749065876007, "learning_rate": 1.6344715447154474e-05, "loss": 0.0673, "step": 44960 }, { "epoch": 0.5483536585365854, "grad_norm": 0.6033196449279785, "learning_rate": 1.6344308943089432e-05, "loss": 0.0737, "step": 44965 }, { "epoch": 0.5484146341463415, "grad_norm": 0.6798579096794128, "learning_rate": 1.634390243902439e-05, "loss": 0.0545, "step": 44970 }, { "epoch": 0.5484756097560975, "grad_norm": 0.7551172971725464, "learning_rate": 1.634349593495935e-05, "loss": 0.0559, "step": 44975 }, { "epoch": 0.5485365853658537, "grad_norm": 0.39966872334480286, "learning_rate": 1.634308943089431e-05, "loss": 0.0664, "step": 44980 }, { "epoch": 0.5485975609756097, "grad_norm": 0.5112246870994568, "learning_rate": 1.634268292682927e-05, "loss": 0.0706, "step": 44985 }, { "epoch": 0.5486585365853659, "grad_norm": 0.43480584025382996, "learning_rate": 1.634227642276423e-05, "loss": 0.0567, "step": 44990 }, { "epoch": 0.5487195121951219, "grad_norm": 0.5887946486473083, "learning_rate": 1.634186991869919e-05, "loss": 0.0704, "step": 44995 }, { "epoch": 0.5487804878048781, "grad_norm": 0.43783822655677795, "learning_rate": 1.6341463414634145e-05, "loss": 0.0443, "step": 45000 }, { "epoch": 0.5488414634146341, "grad_norm": 0.536811351776123, "learning_rate": 1.6341056910569107e-05, "loss": 0.0966, "step": 45005 }, { "epoch": 0.5489024390243903, "grad_norm": 0.9291617274284363, "learning_rate": 1.6340650406504065e-05, "loss": 0.0666, "step": 45010 }, { "epoch": 0.5489634146341463, "grad_norm": 0.5019082427024841, "learning_rate": 1.6340243902439027e-05, "loss": 0.0745, "step": 45015 }, { "epoch": 0.5490243902439025, "grad_norm": 0.4942786693572998, "learning_rate": 1.6339837398373985e-05, "loss": 0.0555, "step": 45020 }, { "epoch": 0.5490853658536585, "grad_norm": 0.5291066765785217, "learning_rate": 1.6339430894308946e-05, "loss": 0.0544, "step": 45025 }, { "epoch": 0.5491463414634147, "grad_norm": 1.942535638809204, "learning_rate": 1.6339024390243904e-05, "loss": 0.0504, "step": 45030 }, { "epoch": 0.5492073170731707, "grad_norm": 0.5904961824417114, "learning_rate": 1.6338617886178862e-05, "loss": 0.0484, "step": 45035 }, { "epoch": 0.5492682926829269, "grad_norm": 0.3442895710468292, "learning_rate": 1.633821138211382e-05, "loss": 0.0482, "step": 45040 }, { "epoch": 0.5493292682926829, "grad_norm": 0.7764165997505188, "learning_rate": 1.6337804878048782e-05, "loss": 0.0541, "step": 45045 }, { "epoch": 0.5493902439024391, "grad_norm": 1.173438549041748, "learning_rate": 1.633739837398374e-05, "loss": 0.0805, "step": 45050 }, { "epoch": 0.5494512195121951, "grad_norm": 0.8740223050117493, "learning_rate": 1.6336991869918702e-05, "loss": 0.0708, "step": 45055 }, { "epoch": 0.5495121951219513, "grad_norm": 0.6513749957084656, "learning_rate": 1.633658536585366e-05, "loss": 0.0706, "step": 45060 }, { "epoch": 0.5495731707317073, "grad_norm": 1.766876459121704, "learning_rate": 1.6336178861788618e-05, "loss": 0.0507, "step": 45065 }, { "epoch": 0.5496341463414635, "grad_norm": 0.5295689702033997, "learning_rate": 1.633577235772358e-05, "loss": 0.0427, "step": 45070 }, { "epoch": 0.5496951219512195, "grad_norm": 0.567011296749115, "learning_rate": 1.6335365853658538e-05, "loss": 0.0613, "step": 45075 }, { "epoch": 0.5497560975609757, "grad_norm": 0.9596667885780334, "learning_rate": 1.63349593495935e-05, "loss": 0.0858, "step": 45080 }, { "epoch": 0.5498170731707317, "grad_norm": 1.824244737625122, "learning_rate": 1.6334552845528457e-05, "loss": 0.0948, "step": 45085 }, { "epoch": 0.5498780487804878, "grad_norm": 0.687252938747406, "learning_rate": 1.6334146341463415e-05, "loss": 0.0694, "step": 45090 }, { "epoch": 0.5499390243902439, "grad_norm": 0.6638022065162659, "learning_rate": 1.6333739837398374e-05, "loss": 0.0728, "step": 45095 }, { "epoch": 0.55, "grad_norm": 0.3732277750968933, "learning_rate": 1.6333333333333335e-05, "loss": 0.0671, "step": 45100 }, { "epoch": 0.5500609756097561, "grad_norm": 0.3007539212703705, "learning_rate": 1.6332926829268293e-05, "loss": 0.0484, "step": 45105 }, { "epoch": 0.5501219512195122, "grad_norm": 0.6450028419494629, "learning_rate": 1.6332520325203255e-05, "loss": 0.0437, "step": 45110 }, { "epoch": 0.5501829268292683, "grad_norm": 1.2394317388534546, "learning_rate": 1.6332113821138213e-05, "loss": 0.0692, "step": 45115 }, { "epoch": 0.5502439024390244, "grad_norm": 0.917730450630188, "learning_rate": 1.633170731707317e-05, "loss": 0.0548, "step": 45120 }, { "epoch": 0.5503048780487805, "grad_norm": 0.5711794495582581, "learning_rate": 1.633130081300813e-05, "loss": 0.0996, "step": 45125 }, { "epoch": 0.5503658536585366, "grad_norm": 0.8424723148345947, "learning_rate": 1.633089430894309e-05, "loss": 0.0721, "step": 45130 }, { "epoch": 0.5504268292682927, "grad_norm": 0.5561450123786926, "learning_rate": 1.633048780487805e-05, "loss": 0.0483, "step": 45135 }, { "epoch": 0.5504878048780488, "grad_norm": 0.4119623899459839, "learning_rate": 1.633008130081301e-05, "loss": 0.0485, "step": 45140 }, { "epoch": 0.5505487804878049, "grad_norm": 0.9740258455276489, "learning_rate": 1.632967479674797e-05, "loss": 0.1354, "step": 45145 }, { "epoch": 0.550609756097561, "grad_norm": 0.7849630117416382, "learning_rate": 1.6329268292682927e-05, "loss": 0.0926, "step": 45150 }, { "epoch": 0.5506707317073171, "grad_norm": 0.6844348311424255, "learning_rate": 1.6328861788617888e-05, "loss": 0.0706, "step": 45155 }, { "epoch": 0.5507317073170732, "grad_norm": 0.5132800340652466, "learning_rate": 1.6328455284552846e-05, "loss": 0.0477, "step": 45160 }, { "epoch": 0.5507926829268293, "grad_norm": 0.6245512366294861, "learning_rate": 1.6328048780487808e-05, "loss": 0.0719, "step": 45165 }, { "epoch": 0.5508536585365854, "grad_norm": 0.9473905563354492, "learning_rate": 1.6327642276422766e-05, "loss": 0.0479, "step": 45170 }, { "epoch": 0.5509146341463415, "grad_norm": 0.4240623712539673, "learning_rate": 1.6327235772357727e-05, "loss": 0.0784, "step": 45175 }, { "epoch": 0.5509756097560976, "grad_norm": 1.0644160509109497, "learning_rate": 1.6326829268292682e-05, "loss": 0.0473, "step": 45180 }, { "epoch": 0.5510365853658536, "grad_norm": 0.6927003264427185, "learning_rate": 1.6326422764227644e-05, "loss": 0.0836, "step": 45185 }, { "epoch": 0.5510975609756098, "grad_norm": 0.581325113773346, "learning_rate": 1.63260162601626e-05, "loss": 0.0408, "step": 45190 }, { "epoch": 0.5511585365853658, "grad_norm": 1.322129487991333, "learning_rate": 1.6325609756097563e-05, "loss": 0.0465, "step": 45195 }, { "epoch": 0.551219512195122, "grad_norm": 0.2660747766494751, "learning_rate": 1.632520325203252e-05, "loss": 0.0568, "step": 45200 }, { "epoch": 0.551280487804878, "grad_norm": 0.7927122116088867, "learning_rate": 1.6324796747967483e-05, "loss": 0.0564, "step": 45205 }, { "epoch": 0.5513414634146342, "grad_norm": 1.1150615215301514, "learning_rate": 1.632439024390244e-05, "loss": 0.0625, "step": 45210 }, { "epoch": 0.5514024390243902, "grad_norm": 0.6359114646911621, "learning_rate": 1.63239837398374e-05, "loss": 0.0654, "step": 45215 }, { "epoch": 0.5514634146341464, "grad_norm": 0.6407079696655273, "learning_rate": 1.6323577235772357e-05, "loss": 0.0535, "step": 45220 }, { "epoch": 0.5515243902439024, "grad_norm": 0.7731449604034424, "learning_rate": 1.632317073170732e-05, "loss": 0.0759, "step": 45225 }, { "epoch": 0.5515853658536586, "grad_norm": 0.1813511699438095, "learning_rate": 1.6322764227642277e-05, "loss": 0.0351, "step": 45230 }, { "epoch": 0.5516463414634146, "grad_norm": 1.3404513597488403, "learning_rate": 1.632235772357724e-05, "loss": 0.0734, "step": 45235 }, { "epoch": 0.5517073170731708, "grad_norm": 0.49922195076942444, "learning_rate": 1.6321951219512197e-05, "loss": 0.0649, "step": 45240 }, { "epoch": 0.5517682926829268, "grad_norm": 1.0631932020187378, "learning_rate": 1.6321544715447155e-05, "loss": 0.0745, "step": 45245 }, { "epoch": 0.551829268292683, "grad_norm": 0.23442332446575165, "learning_rate": 1.6321138211382116e-05, "loss": 0.0536, "step": 45250 }, { "epoch": 0.551890243902439, "grad_norm": 2.5766446590423584, "learning_rate": 1.6320731707317074e-05, "loss": 0.0374, "step": 45255 }, { "epoch": 0.5519512195121952, "grad_norm": 1.5099890232086182, "learning_rate": 1.6320325203252036e-05, "loss": 0.0747, "step": 45260 }, { "epoch": 0.5520121951219512, "grad_norm": 1.582713007926941, "learning_rate": 1.6319918699186994e-05, "loss": 0.0666, "step": 45265 }, { "epoch": 0.5520731707317074, "grad_norm": 0.35823819041252136, "learning_rate": 1.6319512195121952e-05, "loss": 0.0502, "step": 45270 }, { "epoch": 0.5521341463414634, "grad_norm": 0.43054088950157166, "learning_rate": 1.631910569105691e-05, "loss": 0.0461, "step": 45275 }, { "epoch": 0.5521951219512196, "grad_norm": 0.45586177706718445, "learning_rate": 1.6318699186991872e-05, "loss": 0.0352, "step": 45280 }, { "epoch": 0.5522560975609756, "grad_norm": 0.8572638034820557, "learning_rate": 1.631829268292683e-05, "loss": 0.0611, "step": 45285 }, { "epoch": 0.5523170731707318, "grad_norm": 0.6781869530677795, "learning_rate": 1.631788617886179e-05, "loss": 0.052, "step": 45290 }, { "epoch": 0.5523780487804878, "grad_norm": 0.6295416355133057, "learning_rate": 1.631747967479675e-05, "loss": 0.088, "step": 45295 }, { "epoch": 0.552439024390244, "grad_norm": 0.5935406684875488, "learning_rate": 1.6317073170731708e-05, "loss": 0.0526, "step": 45300 }, { "epoch": 0.5525, "grad_norm": 0.34943947196006775, "learning_rate": 1.6316666666666666e-05, "loss": 0.0521, "step": 45305 }, { "epoch": 0.5525609756097561, "grad_norm": 0.24685831367969513, "learning_rate": 1.6316260162601627e-05, "loss": 0.0356, "step": 45310 }, { "epoch": 0.5526219512195122, "grad_norm": 0.6324394345283508, "learning_rate": 1.6315853658536585e-05, "loss": 0.0468, "step": 45315 }, { "epoch": 0.5526829268292683, "grad_norm": 0.5435112714767456, "learning_rate": 1.6315447154471547e-05, "loss": 0.0578, "step": 45320 }, { "epoch": 0.5527439024390244, "grad_norm": 0.638103187084198, "learning_rate": 1.6315040650406505e-05, "loss": 0.0687, "step": 45325 }, { "epoch": 0.5528048780487805, "grad_norm": 0.9755632877349854, "learning_rate": 1.6314634146341463e-05, "loss": 0.1049, "step": 45330 }, { "epoch": 0.5528658536585366, "grad_norm": 0.7170780301094055, "learning_rate": 1.6314227642276425e-05, "loss": 0.0818, "step": 45335 }, { "epoch": 0.5529268292682927, "grad_norm": 0.7340415120124817, "learning_rate": 1.6313821138211383e-05, "loss": 0.0668, "step": 45340 }, { "epoch": 0.5529878048780488, "grad_norm": 0.49409037828445435, "learning_rate": 1.6313414634146344e-05, "loss": 0.0542, "step": 45345 }, { "epoch": 0.5530487804878049, "grad_norm": 0.5823016166687012, "learning_rate": 1.6313008130081302e-05, "loss": 0.0438, "step": 45350 }, { "epoch": 0.553109756097561, "grad_norm": 1.273830771446228, "learning_rate": 1.6312601626016264e-05, "loss": 0.0626, "step": 45355 }, { "epoch": 0.5531707317073171, "grad_norm": 0.3582485318183899, "learning_rate": 1.631219512195122e-05, "loss": 0.0715, "step": 45360 }, { "epoch": 0.5532317073170732, "grad_norm": 0.32325780391693115, "learning_rate": 1.631178861788618e-05, "loss": 0.0548, "step": 45365 }, { "epoch": 0.5532926829268293, "grad_norm": 0.9559658765792847, "learning_rate": 1.631138211382114e-05, "loss": 0.0871, "step": 45370 }, { "epoch": 0.5533536585365854, "grad_norm": 0.6001525521278381, "learning_rate": 1.63109756097561e-05, "loss": 0.0662, "step": 45375 }, { "epoch": 0.5534146341463415, "grad_norm": 0.6680582165718079, "learning_rate": 1.6310569105691058e-05, "loss": 0.0763, "step": 45380 }, { "epoch": 0.5534756097560976, "grad_norm": 0.6855241060256958, "learning_rate": 1.631016260162602e-05, "loss": 0.0743, "step": 45385 }, { "epoch": 0.5535365853658537, "grad_norm": 0.5007503032684326, "learning_rate": 1.6309756097560974e-05, "loss": 0.0541, "step": 45390 }, { "epoch": 0.5535975609756097, "grad_norm": 0.5270557403564453, "learning_rate": 1.6309349593495936e-05, "loss": 0.0542, "step": 45395 }, { "epoch": 0.5536585365853659, "grad_norm": 0.5886595249176025, "learning_rate": 1.6308943089430894e-05, "loss": 0.0716, "step": 45400 }, { "epoch": 0.5537195121951219, "grad_norm": 0.9383558630943298, "learning_rate": 1.6308536585365855e-05, "loss": 0.0629, "step": 45405 }, { "epoch": 0.5537804878048781, "grad_norm": 0.7700381278991699, "learning_rate": 1.6308130081300814e-05, "loss": 0.0738, "step": 45410 }, { "epoch": 0.5538414634146341, "grad_norm": 0.6772908568382263, "learning_rate": 1.6307723577235775e-05, "loss": 0.0512, "step": 45415 }, { "epoch": 0.5539024390243903, "grad_norm": 0.4805298149585724, "learning_rate": 1.6307317073170733e-05, "loss": 0.0459, "step": 45420 }, { "epoch": 0.5539634146341463, "grad_norm": 0.3375612795352936, "learning_rate": 1.630691056910569e-05, "loss": 0.0693, "step": 45425 }, { "epoch": 0.5540243902439025, "grad_norm": 1.0448073148727417, "learning_rate": 1.6306504065040653e-05, "loss": 0.0507, "step": 45430 }, { "epoch": 0.5540853658536585, "grad_norm": 0.7184239029884338, "learning_rate": 1.630609756097561e-05, "loss": 0.0624, "step": 45435 }, { "epoch": 0.5541463414634147, "grad_norm": 0.3451814651489258, "learning_rate": 1.6305691056910572e-05, "loss": 0.0553, "step": 45440 }, { "epoch": 0.5542073170731707, "grad_norm": 0.34352684020996094, "learning_rate": 1.630528455284553e-05, "loss": 0.082, "step": 45445 }, { "epoch": 0.5542682926829269, "grad_norm": 0.5196601152420044, "learning_rate": 1.630487804878049e-05, "loss": 0.0783, "step": 45450 }, { "epoch": 0.5543292682926829, "grad_norm": 1.5843104124069214, "learning_rate": 1.6304471544715447e-05, "loss": 0.0882, "step": 45455 }, { "epoch": 0.5543902439024391, "grad_norm": 0.6889663338661194, "learning_rate": 1.630406504065041e-05, "loss": 0.0697, "step": 45460 }, { "epoch": 0.5544512195121951, "grad_norm": 0.574287474155426, "learning_rate": 1.6303658536585366e-05, "loss": 0.0474, "step": 45465 }, { "epoch": 0.5545121951219513, "grad_norm": 0.5069257616996765, "learning_rate": 1.6303252032520328e-05, "loss": 0.0376, "step": 45470 }, { "epoch": 0.5545731707317073, "grad_norm": 0.4583156406879425, "learning_rate": 1.6302845528455286e-05, "loss": 0.0637, "step": 45475 }, { "epoch": 0.5546341463414635, "grad_norm": 0.3389723598957062, "learning_rate": 1.6302439024390244e-05, "loss": 0.0607, "step": 45480 }, { "epoch": 0.5546951219512195, "grad_norm": 0.29950785636901855, "learning_rate": 1.6302032520325202e-05, "loss": 0.0582, "step": 45485 }, { "epoch": 0.5547560975609757, "grad_norm": 1.4129233360290527, "learning_rate": 1.6301626016260164e-05, "loss": 0.0388, "step": 45490 }, { "epoch": 0.5548170731707317, "grad_norm": 1.2053563594818115, "learning_rate": 1.6301219512195122e-05, "loss": 0.06, "step": 45495 }, { "epoch": 0.5548780487804879, "grad_norm": 0.30343514680862427, "learning_rate": 1.6300813008130084e-05, "loss": 0.0642, "step": 45500 }, { "epoch": 0.5549390243902439, "grad_norm": 0.6494382619857788, "learning_rate": 1.630040650406504e-05, "loss": 0.0765, "step": 45505 }, { "epoch": 0.555, "grad_norm": 0.5988620519638062, "learning_rate": 1.63e-05, "loss": 0.0816, "step": 45510 }, { "epoch": 0.5550609756097561, "grad_norm": 0.7223269939422607, "learning_rate": 1.629959349593496e-05, "loss": 0.0577, "step": 45515 }, { "epoch": 0.5551219512195122, "grad_norm": 0.6415673494338989, "learning_rate": 1.629918699186992e-05, "loss": 0.0984, "step": 45520 }, { "epoch": 0.5551829268292683, "grad_norm": 0.7758684754371643, "learning_rate": 1.629878048780488e-05, "loss": 0.0709, "step": 45525 }, { "epoch": 0.5552439024390244, "grad_norm": 0.7608162760734558, "learning_rate": 1.629837398373984e-05, "loss": 0.0741, "step": 45530 }, { "epoch": 0.5553048780487805, "grad_norm": 0.53471440076828, "learning_rate": 1.6297967479674797e-05, "loss": 0.0684, "step": 45535 }, { "epoch": 0.5553658536585366, "grad_norm": 0.6042473912239075, "learning_rate": 1.6297560975609755e-05, "loss": 0.1047, "step": 45540 }, { "epoch": 0.5554268292682927, "grad_norm": 0.7897319793701172, "learning_rate": 1.6297154471544717e-05, "loss": 0.061, "step": 45545 }, { "epoch": 0.5554878048780488, "grad_norm": 0.791610062122345, "learning_rate": 1.6296747967479675e-05, "loss": 0.0686, "step": 45550 }, { "epoch": 0.5555487804878049, "grad_norm": 0.5049106478691101, "learning_rate": 1.6296341463414636e-05, "loss": 0.0643, "step": 45555 }, { "epoch": 0.555609756097561, "grad_norm": 0.43676242232322693, "learning_rate": 1.6295934959349595e-05, "loss": 0.0513, "step": 45560 }, { "epoch": 0.5556707317073171, "grad_norm": 1.0072054862976074, "learning_rate": 1.6295528455284556e-05, "loss": 0.0391, "step": 45565 }, { "epoch": 0.5557317073170732, "grad_norm": 0.7571812868118286, "learning_rate": 1.629512195121951e-05, "loss": 0.048, "step": 45570 }, { "epoch": 0.5557926829268293, "grad_norm": 0.5388613343238831, "learning_rate": 1.6294715447154472e-05, "loss": 0.092, "step": 45575 }, { "epoch": 0.5558536585365854, "grad_norm": 0.3897816240787506, "learning_rate": 1.629430894308943e-05, "loss": 0.0593, "step": 45580 }, { "epoch": 0.5559146341463415, "grad_norm": 0.9453306794166565, "learning_rate": 1.6293902439024392e-05, "loss": 0.0975, "step": 45585 }, { "epoch": 0.5559756097560976, "grad_norm": 0.5239495038986206, "learning_rate": 1.629349593495935e-05, "loss": 0.0524, "step": 45590 }, { "epoch": 0.5560365853658537, "grad_norm": 0.7713943123817444, "learning_rate": 1.629308943089431e-05, "loss": 0.0855, "step": 45595 }, { "epoch": 0.5560975609756098, "grad_norm": 1.273267388343811, "learning_rate": 1.629268292682927e-05, "loss": 0.0741, "step": 45600 }, { "epoch": 0.5561585365853658, "grad_norm": 0.5872595310211182, "learning_rate": 1.6292276422764228e-05, "loss": 0.0551, "step": 45605 }, { "epoch": 0.556219512195122, "grad_norm": 0.9662926197052002, "learning_rate": 1.629186991869919e-05, "loss": 0.0608, "step": 45610 }, { "epoch": 0.556280487804878, "grad_norm": 0.877586841583252, "learning_rate": 1.6291463414634148e-05, "loss": 0.0933, "step": 45615 }, { "epoch": 0.5563414634146342, "grad_norm": 0.8772621154785156, "learning_rate": 1.629105691056911e-05, "loss": 0.0748, "step": 45620 }, { "epoch": 0.5564024390243902, "grad_norm": 1.9930888414382935, "learning_rate": 1.6290650406504067e-05, "loss": 0.0537, "step": 45625 }, { "epoch": 0.5564634146341464, "grad_norm": 0.4689330458641052, "learning_rate": 1.6290243902439025e-05, "loss": 0.0488, "step": 45630 }, { "epoch": 0.5565243902439024, "grad_norm": 0.5655806064605713, "learning_rate": 1.6289837398373983e-05, "loss": 0.0691, "step": 45635 }, { "epoch": 0.5565853658536586, "grad_norm": 0.9221594929695129, "learning_rate": 1.6289430894308945e-05, "loss": 0.0674, "step": 45640 }, { "epoch": 0.5566463414634146, "grad_norm": 0.5302000045776367, "learning_rate": 1.6289024390243903e-05, "loss": 0.0421, "step": 45645 }, { "epoch": 0.5567073170731708, "grad_norm": 0.5270559191703796, "learning_rate": 1.6288617886178865e-05, "loss": 0.0547, "step": 45650 }, { "epoch": 0.5567682926829268, "grad_norm": 0.41569048166275024, "learning_rate": 1.6288211382113823e-05, "loss": 0.0519, "step": 45655 }, { "epoch": 0.556829268292683, "grad_norm": 0.4404853880405426, "learning_rate": 1.628780487804878e-05, "loss": 0.0455, "step": 45660 }, { "epoch": 0.556890243902439, "grad_norm": 0.8290920257568359, "learning_rate": 1.628739837398374e-05, "loss": 0.07, "step": 45665 }, { "epoch": 0.5569512195121952, "grad_norm": 0.37204527854919434, "learning_rate": 1.62869918699187e-05, "loss": 0.0555, "step": 45670 }, { "epoch": 0.5570121951219512, "grad_norm": 0.47348934412002563, "learning_rate": 1.628658536585366e-05, "loss": 0.0584, "step": 45675 }, { "epoch": 0.5570731707317074, "grad_norm": 0.4786236882209778, "learning_rate": 1.628617886178862e-05, "loss": 0.0768, "step": 45680 }, { "epoch": 0.5571341463414634, "grad_norm": 0.9816714525222778, "learning_rate": 1.628577235772358e-05, "loss": 0.0656, "step": 45685 }, { "epoch": 0.5571951219512196, "grad_norm": 0.613588273525238, "learning_rate": 1.6285365853658536e-05, "loss": 0.0908, "step": 45690 }, { "epoch": 0.5572560975609756, "grad_norm": 0.6724909543991089, "learning_rate": 1.6284959349593498e-05, "loss": 0.0459, "step": 45695 }, { "epoch": 0.5573170731707318, "grad_norm": 0.7457009553909302, "learning_rate": 1.6284552845528456e-05, "loss": 0.0552, "step": 45700 }, { "epoch": 0.5573780487804878, "grad_norm": 0.5305752754211426, "learning_rate": 1.6284146341463418e-05, "loss": 0.0977, "step": 45705 }, { "epoch": 0.557439024390244, "grad_norm": 0.7042616009712219, "learning_rate": 1.6283739837398376e-05, "loss": 0.0773, "step": 45710 }, { "epoch": 0.5575, "grad_norm": 0.7970417737960815, "learning_rate": 1.6283333333333334e-05, "loss": 0.0656, "step": 45715 }, { "epoch": 0.5575609756097561, "grad_norm": 0.6617358922958374, "learning_rate": 1.6282926829268292e-05, "loss": 0.0638, "step": 45720 }, { "epoch": 0.5576219512195122, "grad_norm": 0.9096104502677917, "learning_rate": 1.6282520325203253e-05, "loss": 0.0407, "step": 45725 }, { "epoch": 0.5576829268292683, "grad_norm": 0.5231999158859253, "learning_rate": 1.628211382113821e-05, "loss": 0.0541, "step": 45730 }, { "epoch": 0.5577439024390244, "grad_norm": 0.6099848747253418, "learning_rate": 1.6281707317073173e-05, "loss": 0.0923, "step": 45735 }, { "epoch": 0.5578048780487805, "grad_norm": 0.6163778901100159, "learning_rate": 1.628130081300813e-05, "loss": 0.1014, "step": 45740 }, { "epoch": 0.5578658536585366, "grad_norm": 0.5751792192459106, "learning_rate": 1.6280894308943093e-05, "loss": 0.0749, "step": 45745 }, { "epoch": 0.5579268292682927, "grad_norm": 0.4244912564754486, "learning_rate": 1.6280487804878048e-05, "loss": 0.0573, "step": 45750 }, { "epoch": 0.5579878048780488, "grad_norm": 0.37129727005958557, "learning_rate": 1.628008130081301e-05, "loss": 0.0331, "step": 45755 }, { "epoch": 0.5580487804878049, "grad_norm": 0.48104965686798096, "learning_rate": 1.6279674796747967e-05, "loss": 0.0749, "step": 45760 }, { "epoch": 0.558109756097561, "grad_norm": 0.5416915416717529, "learning_rate": 1.627926829268293e-05, "loss": 0.0784, "step": 45765 }, { "epoch": 0.5581707317073171, "grad_norm": 0.3019922077655792, "learning_rate": 1.6278861788617887e-05, "loss": 0.0612, "step": 45770 }, { "epoch": 0.5582317073170732, "grad_norm": 0.6280457377433777, "learning_rate": 1.627845528455285e-05, "loss": 0.0827, "step": 45775 }, { "epoch": 0.5582926829268293, "grad_norm": 0.5800514817237854, "learning_rate": 1.6278048780487806e-05, "loss": 0.0757, "step": 45780 }, { "epoch": 0.5583536585365854, "grad_norm": 2.7487289905548096, "learning_rate": 1.6277642276422765e-05, "loss": 0.0838, "step": 45785 }, { "epoch": 0.5584146341463415, "grad_norm": 0.18887774646282196, "learning_rate": 1.6277235772357726e-05, "loss": 0.0459, "step": 45790 }, { "epoch": 0.5584756097560976, "grad_norm": 0.547473132610321, "learning_rate": 1.6276829268292684e-05, "loss": 0.0468, "step": 45795 }, { "epoch": 0.5585365853658537, "grad_norm": 0.7453975677490234, "learning_rate": 1.6276422764227642e-05, "loss": 0.066, "step": 45800 }, { "epoch": 0.5585975609756098, "grad_norm": 0.6434091329574585, "learning_rate": 1.6276016260162604e-05, "loss": 0.0596, "step": 45805 }, { "epoch": 0.5586585365853659, "grad_norm": 0.5830356478691101, "learning_rate": 1.6275609756097562e-05, "loss": 0.0862, "step": 45810 }, { "epoch": 0.558719512195122, "grad_norm": 0.8459729552268982, "learning_rate": 1.627520325203252e-05, "loss": 0.0951, "step": 45815 }, { "epoch": 0.5587804878048781, "grad_norm": 1.3085309267044067, "learning_rate": 1.627479674796748e-05, "loss": 0.061, "step": 45820 }, { "epoch": 0.5588414634146341, "grad_norm": 0.8238160014152527, "learning_rate": 1.627439024390244e-05, "loss": 0.0703, "step": 45825 }, { "epoch": 0.5589024390243903, "grad_norm": 0.6873769164085388, "learning_rate": 1.62739837398374e-05, "loss": 0.0492, "step": 45830 }, { "epoch": 0.5589634146341463, "grad_norm": 0.3580169677734375, "learning_rate": 1.627357723577236e-05, "loss": 0.0507, "step": 45835 }, { "epoch": 0.5590243902439025, "grad_norm": 0.6322861313819885, "learning_rate": 1.6273170731707318e-05, "loss": 0.0632, "step": 45840 }, { "epoch": 0.5590853658536585, "grad_norm": 0.4567103087902069, "learning_rate": 1.6272764227642276e-05, "loss": 0.062, "step": 45845 }, { "epoch": 0.5591463414634147, "grad_norm": 0.5281275510787964, "learning_rate": 1.6272357723577237e-05, "loss": 0.0501, "step": 45850 }, { "epoch": 0.5592073170731707, "grad_norm": 0.39153796434402466, "learning_rate": 1.6271951219512195e-05, "loss": 0.0687, "step": 45855 }, { "epoch": 0.5592682926829269, "grad_norm": 0.7657297253608704, "learning_rate": 1.6271544715447157e-05, "loss": 0.0601, "step": 45860 }, { "epoch": 0.5593292682926829, "grad_norm": 0.4635101854801178, "learning_rate": 1.6271138211382115e-05, "loss": 0.0485, "step": 45865 }, { "epoch": 0.5593902439024391, "grad_norm": 1.1336796283721924, "learning_rate": 1.6270731707317073e-05, "loss": 0.0526, "step": 45870 }, { "epoch": 0.5594512195121951, "grad_norm": 0.7220088243484497, "learning_rate": 1.6270325203252035e-05, "loss": 0.0596, "step": 45875 }, { "epoch": 0.5595121951219513, "grad_norm": 0.5069087743759155, "learning_rate": 1.6269918699186993e-05, "loss": 0.0581, "step": 45880 }, { "epoch": 0.5595731707317073, "grad_norm": 0.7745506763458252, "learning_rate": 1.6269512195121954e-05, "loss": 0.074, "step": 45885 }, { "epoch": 0.5596341463414635, "grad_norm": 2.863992691040039, "learning_rate": 1.6269105691056912e-05, "loss": 0.0801, "step": 45890 }, { "epoch": 0.5596951219512195, "grad_norm": 0.5825469493865967, "learning_rate": 1.626869918699187e-05, "loss": 0.0647, "step": 45895 }, { "epoch": 0.5597560975609757, "grad_norm": 0.6314193606376648, "learning_rate": 1.626829268292683e-05, "loss": 0.0647, "step": 45900 }, { "epoch": 0.5598170731707317, "grad_norm": 0.9729384779930115, "learning_rate": 1.626788617886179e-05, "loss": 0.0884, "step": 45905 }, { "epoch": 0.5598780487804879, "grad_norm": 0.9857239723205566, "learning_rate": 1.6267479674796748e-05, "loss": 0.0866, "step": 45910 }, { "epoch": 0.5599390243902439, "grad_norm": 0.7579402327537537, "learning_rate": 1.626707317073171e-05, "loss": 0.0859, "step": 45915 }, { "epoch": 0.56, "grad_norm": 0.3642420768737793, "learning_rate": 1.6266666666666668e-05, "loss": 0.0474, "step": 45920 }, { "epoch": 0.5600609756097561, "grad_norm": 0.43019428849220276, "learning_rate": 1.626626016260163e-05, "loss": 0.0861, "step": 45925 }, { "epoch": 0.5601219512195122, "grad_norm": 1.7172166109085083, "learning_rate": 1.6265853658536584e-05, "loss": 0.1023, "step": 45930 }, { "epoch": 0.5601829268292683, "grad_norm": 0.6111389398574829, "learning_rate": 1.6265447154471546e-05, "loss": 0.0594, "step": 45935 }, { "epoch": 0.5602439024390244, "grad_norm": 0.39640751481056213, "learning_rate": 1.6265040650406504e-05, "loss": 0.0517, "step": 45940 }, { "epoch": 0.5603048780487805, "grad_norm": 0.5801132321357727, "learning_rate": 1.6264634146341465e-05, "loss": 0.063, "step": 45945 }, { "epoch": 0.5603658536585366, "grad_norm": 1.4639972448349, "learning_rate": 1.6264227642276423e-05, "loss": 0.0762, "step": 45950 }, { "epoch": 0.5604268292682927, "grad_norm": 0.6410001516342163, "learning_rate": 1.6263821138211385e-05, "loss": 0.0468, "step": 45955 }, { "epoch": 0.5604878048780488, "grad_norm": 0.7253291010856628, "learning_rate": 1.6263414634146343e-05, "loss": 0.1037, "step": 45960 }, { "epoch": 0.5605487804878049, "grad_norm": 0.7313802242279053, "learning_rate": 1.62630081300813e-05, "loss": 0.0516, "step": 45965 }, { "epoch": 0.560609756097561, "grad_norm": 0.8684633374214172, "learning_rate": 1.6262601626016263e-05, "loss": 0.0676, "step": 45970 }, { "epoch": 0.5606707317073171, "grad_norm": 0.8215538263320923, "learning_rate": 1.626219512195122e-05, "loss": 0.0883, "step": 45975 }, { "epoch": 0.5607317073170732, "grad_norm": 0.552221417427063, "learning_rate": 1.626178861788618e-05, "loss": 0.0803, "step": 45980 }, { "epoch": 0.5607926829268293, "grad_norm": 1.761271595954895, "learning_rate": 1.626138211382114e-05, "loss": 0.0536, "step": 45985 }, { "epoch": 0.5608536585365854, "grad_norm": 0.5058152079582214, "learning_rate": 1.62609756097561e-05, "loss": 0.0408, "step": 45990 }, { "epoch": 0.5609146341463415, "grad_norm": 0.549679160118103, "learning_rate": 1.6260569105691057e-05, "loss": 0.085, "step": 45995 }, { "epoch": 0.5609756097560976, "grad_norm": 0.5032292008399963, "learning_rate": 1.6260162601626018e-05, "loss": 0.0685, "step": 46000 }, { "epoch": 0.5610365853658537, "grad_norm": 0.5859928727149963, "learning_rate": 1.6259756097560976e-05, "loss": 0.0778, "step": 46005 }, { "epoch": 0.5610975609756098, "grad_norm": 0.6189988255500793, "learning_rate": 1.6259349593495938e-05, "loss": 0.0574, "step": 46010 }, { "epoch": 0.5611585365853659, "grad_norm": 0.4432143270969391, "learning_rate": 1.6258943089430896e-05, "loss": 0.0569, "step": 46015 }, { "epoch": 0.561219512195122, "grad_norm": 0.5049028992652893, "learning_rate": 1.6258536585365854e-05, "loss": 0.0651, "step": 46020 }, { "epoch": 0.561280487804878, "grad_norm": 0.817966639995575, "learning_rate": 1.6258130081300812e-05, "loss": 0.0718, "step": 46025 }, { "epoch": 0.5613414634146342, "grad_norm": 1.6627357006072998, "learning_rate": 1.6257723577235774e-05, "loss": 0.0947, "step": 46030 }, { "epoch": 0.5614024390243902, "grad_norm": 0.8367448449134827, "learning_rate": 1.6257317073170732e-05, "loss": 0.0765, "step": 46035 }, { "epoch": 0.5614634146341464, "grad_norm": 0.48514100909233093, "learning_rate": 1.6256910569105693e-05, "loss": 0.0501, "step": 46040 }, { "epoch": 0.5615243902439024, "grad_norm": 0.7674830555915833, "learning_rate": 1.625650406504065e-05, "loss": 0.0542, "step": 46045 }, { "epoch": 0.5615853658536586, "grad_norm": 0.4493933320045471, "learning_rate": 1.625609756097561e-05, "loss": 0.0658, "step": 46050 }, { "epoch": 0.5616463414634146, "grad_norm": 0.4490422308444977, "learning_rate": 1.625569105691057e-05, "loss": 0.0359, "step": 46055 }, { "epoch": 0.5617073170731708, "grad_norm": 0.328049898147583, "learning_rate": 1.625528455284553e-05, "loss": 0.0456, "step": 46060 }, { "epoch": 0.5617682926829268, "grad_norm": 0.46166282892227173, "learning_rate": 1.6254878048780487e-05, "loss": 0.029, "step": 46065 }, { "epoch": 0.561829268292683, "grad_norm": 0.5723497271537781, "learning_rate": 1.625447154471545e-05, "loss": 0.0512, "step": 46070 }, { "epoch": 0.561890243902439, "grad_norm": 1.579472303390503, "learning_rate": 1.6254065040650407e-05, "loss": 0.0689, "step": 46075 }, { "epoch": 0.5619512195121952, "grad_norm": 0.851283848285675, "learning_rate": 1.6253658536585365e-05, "loss": 0.0368, "step": 46080 }, { "epoch": 0.5620121951219512, "grad_norm": 0.6368318200111389, "learning_rate": 1.6253252032520327e-05, "loss": 0.0431, "step": 46085 }, { "epoch": 0.5620731707317074, "grad_norm": 0.4950317442417145, "learning_rate": 1.6252845528455285e-05, "loss": 0.0831, "step": 46090 }, { "epoch": 0.5621341463414634, "grad_norm": 0.839548647403717, "learning_rate": 1.6252439024390246e-05, "loss": 0.0382, "step": 46095 }, { "epoch": 0.5621951219512196, "grad_norm": 0.7131622433662415, "learning_rate": 1.6252032520325205e-05, "loss": 0.0764, "step": 46100 }, { "epoch": 0.5622560975609756, "grad_norm": 0.5021420121192932, "learning_rate": 1.6251626016260166e-05, "loss": 0.0735, "step": 46105 }, { "epoch": 0.5623170731707318, "grad_norm": 1.0523607730865479, "learning_rate": 1.625121951219512e-05, "loss": 0.0479, "step": 46110 }, { "epoch": 0.5623780487804878, "grad_norm": 0.6486049890518188, "learning_rate": 1.6250813008130082e-05, "loss": 0.0555, "step": 46115 }, { "epoch": 0.562439024390244, "grad_norm": 0.9497095346450806, "learning_rate": 1.625040650406504e-05, "loss": 0.0652, "step": 46120 }, { "epoch": 0.5625, "grad_norm": 0.7836289405822754, "learning_rate": 1.6250000000000002e-05, "loss": 0.07, "step": 46125 }, { "epoch": 0.562560975609756, "grad_norm": 0.5912632346153259, "learning_rate": 1.624959349593496e-05, "loss": 0.0723, "step": 46130 }, { "epoch": 0.5626219512195122, "grad_norm": 0.5621532201766968, "learning_rate": 1.624918699186992e-05, "loss": 0.0603, "step": 46135 }, { "epoch": 0.5626829268292682, "grad_norm": 0.5586718916893005, "learning_rate": 1.624878048780488e-05, "loss": 0.0513, "step": 46140 }, { "epoch": 0.5627439024390244, "grad_norm": 0.7109769582748413, "learning_rate": 1.6248373983739838e-05, "loss": 0.0499, "step": 46145 }, { "epoch": 0.5628048780487804, "grad_norm": 0.5465186238288879, "learning_rate": 1.62479674796748e-05, "loss": 0.0604, "step": 46150 }, { "epoch": 0.5628658536585366, "grad_norm": 0.7551673054695129, "learning_rate": 1.6247560975609758e-05, "loss": 0.1058, "step": 46155 }, { "epoch": 0.5629268292682926, "grad_norm": 0.9673438668251038, "learning_rate": 1.6247154471544716e-05, "loss": 0.06, "step": 46160 }, { "epoch": 0.5629878048780488, "grad_norm": 0.358980655670166, "learning_rate": 1.6246747967479677e-05, "loss": 0.0342, "step": 46165 }, { "epoch": 0.5630487804878048, "grad_norm": 1.0135890245437622, "learning_rate": 1.6246341463414635e-05, "loss": 0.0794, "step": 46170 }, { "epoch": 0.563109756097561, "grad_norm": 0.7921364307403564, "learning_rate": 1.6245934959349593e-05, "loss": 0.1006, "step": 46175 }, { "epoch": 0.563170731707317, "grad_norm": 0.699504017829895, "learning_rate": 1.6245528455284555e-05, "loss": 0.0652, "step": 46180 }, { "epoch": 0.5632317073170732, "grad_norm": 0.7694947719573975, "learning_rate": 1.6245121951219513e-05, "loss": 0.0673, "step": 46185 }, { "epoch": 0.5632926829268292, "grad_norm": 1.1917829513549805, "learning_rate": 1.6244715447154475e-05, "loss": 0.0908, "step": 46190 }, { "epoch": 0.5633536585365854, "grad_norm": 0.8619586229324341, "learning_rate": 1.6244308943089433e-05, "loss": 0.0559, "step": 46195 }, { "epoch": 0.5634146341463414, "grad_norm": 0.32421064376831055, "learning_rate": 1.6243902439024394e-05, "loss": 0.0892, "step": 46200 }, { "epoch": 0.5634756097560976, "grad_norm": 0.4973379671573639, "learning_rate": 1.624349593495935e-05, "loss": 0.0477, "step": 46205 }, { "epoch": 0.5635365853658536, "grad_norm": 0.869947612285614, "learning_rate": 1.624308943089431e-05, "loss": 0.0591, "step": 46210 }, { "epoch": 0.5635975609756098, "grad_norm": 0.7829959988594055, "learning_rate": 1.624268292682927e-05, "loss": 0.0842, "step": 46215 }, { "epoch": 0.5636585365853658, "grad_norm": 0.499945729970932, "learning_rate": 1.624227642276423e-05, "loss": 0.0495, "step": 46220 }, { "epoch": 0.563719512195122, "grad_norm": 1.0353610515594482, "learning_rate": 1.6241869918699188e-05, "loss": 0.0868, "step": 46225 }, { "epoch": 0.563780487804878, "grad_norm": 0.5141001343727112, "learning_rate": 1.624146341463415e-05, "loss": 0.0583, "step": 46230 }, { "epoch": 0.5638414634146341, "grad_norm": 0.6234771013259888, "learning_rate": 1.6241056910569108e-05, "loss": 0.0788, "step": 46235 }, { "epoch": 0.5639024390243902, "grad_norm": 0.5787761807441711, "learning_rate": 1.6240650406504066e-05, "loss": 0.0441, "step": 46240 }, { "epoch": 0.5639634146341463, "grad_norm": 0.7176288962364197, "learning_rate": 1.6240243902439024e-05, "loss": 0.0704, "step": 46245 }, { "epoch": 0.5640243902439024, "grad_norm": 0.5336787700653076, "learning_rate": 1.6239837398373986e-05, "loss": 0.0661, "step": 46250 }, { "epoch": 0.5640853658536585, "grad_norm": 0.8223406672477722, "learning_rate": 1.6239430894308944e-05, "loss": 0.0684, "step": 46255 }, { "epoch": 0.5641463414634146, "grad_norm": 0.5917921662330627, "learning_rate": 1.6239024390243905e-05, "loss": 0.0638, "step": 46260 }, { "epoch": 0.5642073170731707, "grad_norm": 3.959562063217163, "learning_rate": 1.6238617886178863e-05, "loss": 0.0552, "step": 46265 }, { "epoch": 0.5642682926829268, "grad_norm": 0.9098039269447327, "learning_rate": 1.623821138211382e-05, "loss": 0.0656, "step": 46270 }, { "epoch": 0.5643292682926829, "grad_norm": 0.4847547709941864, "learning_rate": 1.6237804878048783e-05, "loss": 0.0681, "step": 46275 }, { "epoch": 0.564390243902439, "grad_norm": 0.44274812936782837, "learning_rate": 1.623739837398374e-05, "loss": 0.0879, "step": 46280 }, { "epoch": 0.5644512195121951, "grad_norm": 0.8138731718063354, "learning_rate": 1.6236991869918703e-05, "loss": 0.0405, "step": 46285 }, { "epoch": 0.5645121951219512, "grad_norm": 0.9465177655220032, "learning_rate": 1.623658536585366e-05, "loss": 0.0723, "step": 46290 }, { "epoch": 0.5645731707317073, "grad_norm": 0.6495517492294312, "learning_rate": 1.623617886178862e-05, "loss": 0.06, "step": 46295 }, { "epoch": 0.5646341463414634, "grad_norm": 0.5141324400901794, "learning_rate": 1.6235772357723577e-05, "loss": 0.0571, "step": 46300 }, { "epoch": 0.5646951219512195, "grad_norm": 0.3265562951564789, "learning_rate": 1.623536585365854e-05, "loss": 0.0445, "step": 46305 }, { "epoch": 0.5647560975609756, "grad_norm": 0.4803733825683594, "learning_rate": 1.6234959349593497e-05, "loss": 0.0753, "step": 46310 }, { "epoch": 0.5648170731707317, "grad_norm": 0.7171514630317688, "learning_rate": 1.6234552845528458e-05, "loss": 0.0788, "step": 46315 }, { "epoch": 0.5648780487804878, "grad_norm": 0.3584858775138855, "learning_rate": 1.6234146341463416e-05, "loss": 0.0474, "step": 46320 }, { "epoch": 0.5649390243902439, "grad_norm": 0.6886089444160461, "learning_rate": 1.6233739837398375e-05, "loss": 0.0533, "step": 46325 }, { "epoch": 0.565, "grad_norm": 0.835648238658905, "learning_rate": 1.6233333333333333e-05, "loss": 0.0741, "step": 46330 }, { "epoch": 0.5650609756097561, "grad_norm": 0.3378276824951172, "learning_rate": 1.6232926829268294e-05, "loss": 0.0533, "step": 46335 }, { "epoch": 0.5651219512195121, "grad_norm": 0.8606967926025391, "learning_rate": 1.6232520325203252e-05, "loss": 0.0598, "step": 46340 }, { "epoch": 0.5651829268292683, "grad_norm": 0.7493751049041748, "learning_rate": 1.6232113821138214e-05, "loss": 0.0605, "step": 46345 }, { "epoch": 0.5652439024390243, "grad_norm": 0.750160276889801, "learning_rate": 1.6231707317073172e-05, "loss": 0.0586, "step": 46350 }, { "epoch": 0.5653048780487805, "grad_norm": 0.7318666577339172, "learning_rate": 1.623130081300813e-05, "loss": 0.0579, "step": 46355 }, { "epoch": 0.5653658536585365, "grad_norm": 0.8270606398582458, "learning_rate": 1.623089430894309e-05, "loss": 0.0618, "step": 46360 }, { "epoch": 0.5654268292682927, "grad_norm": 0.6919379830360413, "learning_rate": 1.623048780487805e-05, "loss": 0.0615, "step": 46365 }, { "epoch": 0.5654878048780487, "grad_norm": 0.624613344669342, "learning_rate": 1.623008130081301e-05, "loss": 0.0639, "step": 46370 }, { "epoch": 0.5655487804878049, "grad_norm": 1.0418680906295776, "learning_rate": 1.622967479674797e-05, "loss": 0.0527, "step": 46375 }, { "epoch": 0.5656097560975609, "grad_norm": 0.5035647749900818, "learning_rate": 1.622926829268293e-05, "loss": 0.0516, "step": 46380 }, { "epoch": 0.5656707317073171, "grad_norm": 0.7489742040634155, "learning_rate": 1.6228861788617886e-05, "loss": 0.0677, "step": 46385 }, { "epoch": 0.5657317073170731, "grad_norm": 0.339000940322876, "learning_rate": 1.6228455284552847e-05, "loss": 0.0555, "step": 46390 }, { "epoch": 0.5657926829268293, "grad_norm": 0.38156673312187195, "learning_rate": 1.6228048780487805e-05, "loss": 0.0485, "step": 46395 }, { "epoch": 0.5658536585365853, "grad_norm": 0.5495629906654358, "learning_rate": 1.6227642276422767e-05, "loss": 0.0874, "step": 46400 }, { "epoch": 0.5659146341463415, "grad_norm": 0.3488875925540924, "learning_rate": 1.6227235772357725e-05, "loss": 0.0646, "step": 46405 }, { "epoch": 0.5659756097560975, "grad_norm": 0.46291065216064453, "learning_rate": 1.6226829268292686e-05, "loss": 0.0602, "step": 46410 }, { "epoch": 0.5660365853658537, "grad_norm": 1.0262925624847412, "learning_rate": 1.6226422764227645e-05, "loss": 0.065, "step": 46415 }, { "epoch": 0.5660975609756097, "grad_norm": 0.32907965779304504, "learning_rate": 1.6226016260162603e-05, "loss": 0.0444, "step": 46420 }, { "epoch": 0.5661585365853659, "grad_norm": 0.5677279829978943, "learning_rate": 1.622560975609756e-05, "loss": 0.0715, "step": 46425 }, { "epoch": 0.5662195121951219, "grad_norm": 1.5626211166381836, "learning_rate": 1.6225203252032522e-05, "loss": 0.1221, "step": 46430 }, { "epoch": 0.566280487804878, "grad_norm": 0.4705090820789337, "learning_rate": 1.622479674796748e-05, "loss": 0.0682, "step": 46435 }, { "epoch": 0.5663414634146341, "grad_norm": 0.33730995655059814, "learning_rate": 1.6224390243902442e-05, "loss": 0.0374, "step": 46440 }, { "epoch": 0.5664024390243902, "grad_norm": 0.43791040778160095, "learning_rate": 1.62239837398374e-05, "loss": 0.0381, "step": 46445 }, { "epoch": 0.5664634146341463, "grad_norm": 0.6783211827278137, "learning_rate": 1.6223577235772358e-05, "loss": 0.0478, "step": 46450 }, { "epoch": 0.5665243902439024, "grad_norm": 0.5973264575004578, "learning_rate": 1.622317073170732e-05, "loss": 0.0751, "step": 46455 }, { "epoch": 0.5665853658536585, "grad_norm": 0.7640355825424194, "learning_rate": 1.6222764227642278e-05, "loss": 0.049, "step": 46460 }, { "epoch": 0.5666463414634146, "grad_norm": 0.4075293242931366, "learning_rate": 1.622235772357724e-05, "loss": 0.0761, "step": 46465 }, { "epoch": 0.5667073170731707, "grad_norm": 0.9817152619361877, "learning_rate": 1.6221951219512197e-05, "loss": 0.1045, "step": 46470 }, { "epoch": 0.5667682926829268, "grad_norm": 0.8725124597549438, "learning_rate": 1.6221544715447156e-05, "loss": 0.0742, "step": 46475 }, { "epoch": 0.5668292682926829, "grad_norm": 0.37610480189323425, "learning_rate": 1.6221138211382114e-05, "loss": 0.0579, "step": 46480 }, { "epoch": 0.566890243902439, "grad_norm": 0.5434421300888062, "learning_rate": 1.6220731707317075e-05, "loss": 0.0755, "step": 46485 }, { "epoch": 0.5669512195121951, "grad_norm": 0.5856258869171143, "learning_rate": 1.6220325203252033e-05, "loss": 0.1508, "step": 46490 }, { "epoch": 0.5670121951219512, "grad_norm": 0.6187644600868225, "learning_rate": 1.6219918699186995e-05, "loss": 0.0651, "step": 46495 }, { "epoch": 0.5670731707317073, "grad_norm": 0.8793842196464539, "learning_rate": 1.6219512195121953e-05, "loss": 0.0861, "step": 46500 }, { "epoch": 0.5671341463414634, "grad_norm": 0.5501372814178467, "learning_rate": 1.621910569105691e-05, "loss": 0.0452, "step": 46505 }, { "epoch": 0.5671951219512195, "grad_norm": 0.5464043021202087, "learning_rate": 1.621869918699187e-05, "loss": 0.0532, "step": 46510 }, { "epoch": 0.5672560975609756, "grad_norm": 0.5630136132240295, "learning_rate": 1.621829268292683e-05, "loss": 0.0422, "step": 46515 }, { "epoch": 0.5673170731707317, "grad_norm": 0.6729056239128113, "learning_rate": 1.621788617886179e-05, "loss": 0.0642, "step": 46520 }, { "epoch": 0.5673780487804878, "grad_norm": 1.543023705482483, "learning_rate": 1.621747967479675e-05, "loss": 0.058, "step": 46525 }, { "epoch": 0.5674390243902439, "grad_norm": 0.7801323533058167, "learning_rate": 1.621707317073171e-05, "loss": 0.05, "step": 46530 }, { "epoch": 0.5675, "grad_norm": 0.6845968961715698, "learning_rate": 1.6216666666666667e-05, "loss": 0.072, "step": 46535 }, { "epoch": 0.567560975609756, "grad_norm": 0.7672803401947021, "learning_rate": 1.6216260162601628e-05, "loss": 0.0347, "step": 46540 }, { "epoch": 0.5676219512195122, "grad_norm": 0.6717341542243958, "learning_rate": 1.6215853658536586e-05, "loss": 0.0657, "step": 46545 }, { "epoch": 0.5676829268292682, "grad_norm": 1.0791738033294678, "learning_rate": 1.6215447154471548e-05, "loss": 0.0795, "step": 46550 }, { "epoch": 0.5677439024390244, "grad_norm": 0.6991225481033325, "learning_rate": 1.6215040650406506e-05, "loss": 0.076, "step": 46555 }, { "epoch": 0.5678048780487804, "grad_norm": 0.5062735080718994, "learning_rate": 1.6214634146341467e-05, "loss": 0.0479, "step": 46560 }, { "epoch": 0.5678658536585366, "grad_norm": 0.7243692278862, "learning_rate": 1.6214227642276422e-05, "loss": 0.0596, "step": 46565 }, { "epoch": 0.5679268292682926, "grad_norm": 0.889141321182251, "learning_rate": 1.6213821138211384e-05, "loss": 0.0703, "step": 46570 }, { "epoch": 0.5679878048780488, "grad_norm": 0.5330141186714172, "learning_rate": 1.6213414634146342e-05, "loss": 0.0372, "step": 46575 }, { "epoch": 0.5680487804878048, "grad_norm": 0.6314013004302979, "learning_rate": 1.6213008130081303e-05, "loss": 0.0684, "step": 46580 }, { "epoch": 0.568109756097561, "grad_norm": 0.4405069649219513, "learning_rate": 1.621260162601626e-05, "loss": 0.0567, "step": 46585 }, { "epoch": 0.568170731707317, "grad_norm": 0.42277655005455017, "learning_rate": 1.6212195121951223e-05, "loss": 0.0602, "step": 46590 }, { "epoch": 0.5682317073170732, "grad_norm": 0.8414978981018066, "learning_rate": 1.6211788617886178e-05, "loss": 0.0629, "step": 46595 }, { "epoch": 0.5682926829268292, "grad_norm": 0.5961487293243408, "learning_rate": 1.621138211382114e-05, "loss": 0.0549, "step": 46600 }, { "epoch": 0.5683536585365854, "grad_norm": 0.5431594252586365, "learning_rate": 1.6210975609756097e-05, "loss": 0.0434, "step": 46605 }, { "epoch": 0.5684146341463414, "grad_norm": 0.7230547070503235, "learning_rate": 1.621056910569106e-05, "loss": 0.0339, "step": 46610 }, { "epoch": 0.5684756097560976, "grad_norm": 0.6605516672134399, "learning_rate": 1.6210162601626017e-05, "loss": 0.0652, "step": 46615 }, { "epoch": 0.5685365853658536, "grad_norm": 0.855634868144989, "learning_rate": 1.620975609756098e-05, "loss": 0.0809, "step": 46620 }, { "epoch": 0.5685975609756098, "grad_norm": 0.8241590261459351, "learning_rate": 1.6209349593495937e-05, "loss": 0.0527, "step": 46625 }, { "epoch": 0.5686585365853658, "grad_norm": 0.48594674468040466, "learning_rate": 1.6208943089430895e-05, "loss": 0.0549, "step": 46630 }, { "epoch": 0.568719512195122, "grad_norm": 1.1033916473388672, "learning_rate": 1.6208536585365856e-05, "loss": 0.0372, "step": 46635 }, { "epoch": 0.568780487804878, "grad_norm": 0.3769097626209259, "learning_rate": 1.6208130081300814e-05, "loss": 0.0767, "step": 46640 }, { "epoch": 0.5688414634146342, "grad_norm": 0.4434499144554138, "learning_rate": 1.6207723577235776e-05, "loss": 0.0734, "step": 46645 }, { "epoch": 0.5689024390243902, "grad_norm": 0.6241351366043091, "learning_rate": 1.6207317073170734e-05, "loss": 0.0723, "step": 46650 }, { "epoch": 0.5689634146341463, "grad_norm": 0.6667167544364929, "learning_rate": 1.6206910569105692e-05, "loss": 0.0599, "step": 46655 }, { "epoch": 0.5690243902439024, "grad_norm": 0.5177663564682007, "learning_rate": 1.620650406504065e-05, "loss": 0.064, "step": 46660 }, { "epoch": 0.5690853658536585, "grad_norm": 0.6105569005012512, "learning_rate": 1.6206097560975612e-05, "loss": 0.0703, "step": 46665 }, { "epoch": 0.5691463414634146, "grad_norm": 0.22907839715480804, "learning_rate": 1.620569105691057e-05, "loss": 0.0527, "step": 46670 }, { "epoch": 0.5692073170731707, "grad_norm": 1.1269562244415283, "learning_rate": 1.620528455284553e-05, "loss": 0.0912, "step": 46675 }, { "epoch": 0.5692682926829268, "grad_norm": 0.6071921586990356, "learning_rate": 1.620487804878049e-05, "loss": 0.0677, "step": 46680 }, { "epoch": 0.5693292682926829, "grad_norm": 0.8176866769790649, "learning_rate": 1.6204471544715448e-05, "loss": 0.0723, "step": 46685 }, { "epoch": 0.569390243902439, "grad_norm": 1.2686842679977417, "learning_rate": 1.6204065040650406e-05, "loss": 0.0766, "step": 46690 }, { "epoch": 0.5694512195121951, "grad_norm": 0.6743295788764954, "learning_rate": 1.6203658536585367e-05, "loss": 0.0856, "step": 46695 }, { "epoch": 0.5695121951219512, "grad_norm": 0.5458331108093262, "learning_rate": 1.6203252032520326e-05, "loss": 0.0568, "step": 46700 }, { "epoch": 0.5695731707317073, "grad_norm": 0.3916226327419281, "learning_rate": 1.6202845528455287e-05, "loss": 0.0607, "step": 46705 }, { "epoch": 0.5696341463414634, "grad_norm": 0.6963006258010864, "learning_rate": 1.6202439024390245e-05, "loss": 0.0669, "step": 46710 }, { "epoch": 0.5696951219512195, "grad_norm": 0.8694283962249756, "learning_rate": 1.6202032520325203e-05, "loss": 0.0554, "step": 46715 }, { "epoch": 0.5697560975609756, "grad_norm": 1.5667743682861328, "learning_rate": 1.6201626016260165e-05, "loss": 0.0732, "step": 46720 }, { "epoch": 0.5698170731707317, "grad_norm": 1.3840669393539429, "learning_rate": 1.6201219512195123e-05, "loss": 0.0651, "step": 46725 }, { "epoch": 0.5698780487804878, "grad_norm": 0.8132798075675964, "learning_rate": 1.6200813008130084e-05, "loss": 0.0828, "step": 46730 }, { "epoch": 0.5699390243902439, "grad_norm": 0.5428960919380188, "learning_rate": 1.6200406504065043e-05, "loss": 0.0906, "step": 46735 }, { "epoch": 0.57, "grad_norm": 0.42732393741607666, "learning_rate": 1.62e-05, "loss": 0.0841, "step": 46740 }, { "epoch": 0.5700609756097561, "grad_norm": 0.506205677986145, "learning_rate": 1.619959349593496e-05, "loss": 0.0689, "step": 46745 }, { "epoch": 0.5701219512195121, "grad_norm": 0.53523188829422, "learning_rate": 1.619918699186992e-05, "loss": 0.0883, "step": 46750 }, { "epoch": 0.5701829268292683, "grad_norm": 0.2615257203578949, "learning_rate": 1.619878048780488e-05, "loss": 0.0697, "step": 46755 }, { "epoch": 0.5702439024390243, "grad_norm": 0.482719361782074, "learning_rate": 1.619837398373984e-05, "loss": 0.0961, "step": 46760 }, { "epoch": 0.5703048780487805, "grad_norm": 0.6272078156471252, "learning_rate": 1.6197967479674798e-05, "loss": 0.0687, "step": 46765 }, { "epoch": 0.5703658536585365, "grad_norm": 1.069467544555664, "learning_rate": 1.619756097560976e-05, "loss": 0.0739, "step": 46770 }, { "epoch": 0.5704268292682927, "grad_norm": 0.2985164523124695, "learning_rate": 1.6197154471544714e-05, "loss": 0.066, "step": 46775 }, { "epoch": 0.5704878048780487, "grad_norm": 2.5866620540618896, "learning_rate": 1.6196747967479676e-05, "loss": 0.0796, "step": 46780 }, { "epoch": 0.5705487804878049, "grad_norm": 1.166625738143921, "learning_rate": 1.6196341463414634e-05, "loss": 0.0481, "step": 46785 }, { "epoch": 0.5706097560975609, "grad_norm": 0.5783154368400574, "learning_rate": 1.6195934959349596e-05, "loss": 0.0733, "step": 46790 }, { "epoch": 0.5706707317073171, "grad_norm": 0.3303678631782532, "learning_rate": 1.6195528455284554e-05, "loss": 0.0486, "step": 46795 }, { "epoch": 0.5707317073170731, "grad_norm": 0.6944186687469482, "learning_rate": 1.6195121951219515e-05, "loss": 0.0711, "step": 46800 }, { "epoch": 0.5707926829268293, "grad_norm": 0.2359035760164261, "learning_rate": 1.6194715447154473e-05, "loss": 0.0395, "step": 46805 }, { "epoch": 0.5708536585365853, "grad_norm": 0.3999520242214203, "learning_rate": 1.619430894308943e-05, "loss": 0.0482, "step": 46810 }, { "epoch": 0.5709146341463415, "grad_norm": 1.072447657585144, "learning_rate": 1.6193902439024393e-05, "loss": 0.0895, "step": 46815 }, { "epoch": 0.5709756097560975, "grad_norm": 0.49334418773651123, "learning_rate": 1.619349593495935e-05, "loss": 0.0424, "step": 46820 }, { "epoch": 0.5710365853658537, "grad_norm": 0.6052070260047913, "learning_rate": 1.6193089430894313e-05, "loss": 0.0737, "step": 46825 }, { "epoch": 0.5710975609756097, "grad_norm": 0.38122448325157166, "learning_rate": 1.619268292682927e-05, "loss": 0.0584, "step": 46830 }, { "epoch": 0.5711585365853659, "grad_norm": 0.48452892899513245, "learning_rate": 1.619227642276423e-05, "loss": 0.0393, "step": 46835 }, { "epoch": 0.5712195121951219, "grad_norm": 0.3694811165332794, "learning_rate": 1.6191869918699187e-05, "loss": 0.0535, "step": 46840 }, { "epoch": 0.5712804878048781, "grad_norm": 0.2676626443862915, "learning_rate": 1.619146341463415e-05, "loss": 0.0576, "step": 46845 }, { "epoch": 0.5713414634146341, "grad_norm": 0.6204358339309692, "learning_rate": 1.6191056910569107e-05, "loss": 0.0728, "step": 46850 }, { "epoch": 0.5714024390243903, "grad_norm": 0.46896129846572876, "learning_rate": 1.6190650406504068e-05, "loss": 0.0728, "step": 46855 }, { "epoch": 0.5714634146341463, "grad_norm": 0.5035759806632996, "learning_rate": 1.6190243902439026e-05, "loss": 0.0419, "step": 46860 }, { "epoch": 0.5715243902439024, "grad_norm": 0.40417400002479553, "learning_rate": 1.6189837398373984e-05, "loss": 0.0663, "step": 46865 }, { "epoch": 0.5715853658536585, "grad_norm": 0.44705289602279663, "learning_rate": 1.6189430894308943e-05, "loss": 0.0692, "step": 46870 }, { "epoch": 0.5716463414634146, "grad_norm": 0.9299153685569763, "learning_rate": 1.6189024390243904e-05, "loss": 0.1133, "step": 46875 }, { "epoch": 0.5717073170731707, "grad_norm": 0.6362873315811157, "learning_rate": 1.6188617886178862e-05, "loss": 0.0898, "step": 46880 }, { "epoch": 0.5717682926829268, "grad_norm": 1.0179765224456787, "learning_rate": 1.6188211382113824e-05, "loss": 0.065, "step": 46885 }, { "epoch": 0.5718292682926829, "grad_norm": 0.4880782663822174, "learning_rate": 1.6187804878048782e-05, "loss": 0.0462, "step": 46890 }, { "epoch": 0.571890243902439, "grad_norm": 0.838541567325592, "learning_rate": 1.618739837398374e-05, "loss": 0.0682, "step": 46895 }, { "epoch": 0.5719512195121951, "grad_norm": 2.4435384273529053, "learning_rate": 1.61869918699187e-05, "loss": 0.0757, "step": 46900 }, { "epoch": 0.5720121951219512, "grad_norm": 1.2834943532943726, "learning_rate": 1.618658536585366e-05, "loss": 0.0812, "step": 46905 }, { "epoch": 0.5720731707317073, "grad_norm": 0.4419019818305969, "learning_rate": 1.618617886178862e-05, "loss": 0.0676, "step": 46910 }, { "epoch": 0.5721341463414634, "grad_norm": 1.985564947128296, "learning_rate": 1.618577235772358e-05, "loss": 0.054, "step": 46915 }, { "epoch": 0.5721951219512195, "grad_norm": 0.799213707447052, "learning_rate": 1.6185365853658537e-05, "loss": 0.0714, "step": 46920 }, { "epoch": 0.5722560975609756, "grad_norm": 0.4324338436126709, "learning_rate": 1.6184959349593496e-05, "loss": 0.0516, "step": 46925 }, { "epoch": 0.5723170731707317, "grad_norm": 0.5937097668647766, "learning_rate": 1.6184552845528457e-05, "loss": 0.042, "step": 46930 }, { "epoch": 0.5723780487804878, "grad_norm": 0.684406578540802, "learning_rate": 1.6184146341463415e-05, "loss": 0.0637, "step": 46935 }, { "epoch": 0.5724390243902439, "grad_norm": 0.42273247241973877, "learning_rate": 1.6183739837398377e-05, "loss": 0.0371, "step": 46940 }, { "epoch": 0.5725, "grad_norm": 0.4722040891647339, "learning_rate": 1.6183333333333335e-05, "loss": 0.098, "step": 46945 }, { "epoch": 0.572560975609756, "grad_norm": 0.8966520428657532, "learning_rate": 1.6182926829268296e-05, "loss": 0.0765, "step": 46950 }, { "epoch": 0.5726219512195122, "grad_norm": 0.6609185934066772, "learning_rate": 1.618252032520325e-05, "loss": 0.0646, "step": 46955 }, { "epoch": 0.5726829268292682, "grad_norm": 0.5980469584465027, "learning_rate": 1.6182113821138213e-05, "loss": 0.0729, "step": 46960 }, { "epoch": 0.5727439024390244, "grad_norm": 0.3269139230251312, "learning_rate": 1.618170731707317e-05, "loss": 0.0404, "step": 46965 }, { "epoch": 0.5728048780487804, "grad_norm": 0.717917799949646, "learning_rate": 1.6181300813008132e-05, "loss": 0.0809, "step": 46970 }, { "epoch": 0.5728658536585366, "grad_norm": 0.6764428019523621, "learning_rate": 1.618089430894309e-05, "loss": 0.0494, "step": 46975 }, { "epoch": 0.5729268292682926, "grad_norm": 0.6344205141067505, "learning_rate": 1.6180487804878052e-05, "loss": 0.0731, "step": 46980 }, { "epoch": 0.5729878048780488, "grad_norm": 1.145436406135559, "learning_rate": 1.618008130081301e-05, "loss": 0.0796, "step": 46985 }, { "epoch": 0.5730487804878048, "grad_norm": 0.5155028104782104, "learning_rate": 1.6179674796747968e-05, "loss": 0.046, "step": 46990 }, { "epoch": 0.573109756097561, "grad_norm": 0.6278061270713806, "learning_rate": 1.617926829268293e-05, "loss": 0.083, "step": 46995 }, { "epoch": 0.573170731707317, "grad_norm": 0.6949190497398376, "learning_rate": 1.6178861788617888e-05, "loss": 0.0784, "step": 47000 }, { "epoch": 0.5732317073170732, "grad_norm": 0.579298198223114, "learning_rate": 1.6178455284552846e-05, "loss": 0.0378, "step": 47005 }, { "epoch": 0.5732926829268292, "grad_norm": 1.597343921661377, "learning_rate": 1.6178048780487807e-05, "loss": 0.062, "step": 47010 }, { "epoch": 0.5733536585365854, "grad_norm": 0.5448532104492188, "learning_rate": 1.6177642276422766e-05, "loss": 0.0541, "step": 47015 }, { "epoch": 0.5734146341463414, "grad_norm": 0.5836458206176758, "learning_rate": 1.6177235772357724e-05, "loss": 0.0514, "step": 47020 }, { "epoch": 0.5734756097560976, "grad_norm": 0.5971692204475403, "learning_rate": 1.6176829268292685e-05, "loss": 0.0507, "step": 47025 }, { "epoch": 0.5735365853658536, "grad_norm": 0.5695919990539551, "learning_rate": 1.6176422764227643e-05, "loss": 0.0519, "step": 47030 }, { "epoch": 0.5735975609756098, "grad_norm": 0.6221720576286316, "learning_rate": 1.6176016260162605e-05, "loss": 0.0523, "step": 47035 }, { "epoch": 0.5736585365853658, "grad_norm": 0.7485078573226929, "learning_rate": 1.6175609756097563e-05, "loss": 0.0523, "step": 47040 }, { "epoch": 0.573719512195122, "grad_norm": 1.9661825895309448, "learning_rate": 1.617520325203252e-05, "loss": 0.0314, "step": 47045 }, { "epoch": 0.573780487804878, "grad_norm": 1.102503776550293, "learning_rate": 1.617479674796748e-05, "loss": 0.0675, "step": 47050 }, { "epoch": 0.5738414634146342, "grad_norm": 0.8938082456588745, "learning_rate": 1.617439024390244e-05, "loss": 0.0642, "step": 47055 }, { "epoch": 0.5739024390243902, "grad_norm": 0.7129875421524048, "learning_rate": 1.61739837398374e-05, "loss": 0.0739, "step": 47060 }, { "epoch": 0.5739634146341464, "grad_norm": 0.6173868179321289, "learning_rate": 1.617357723577236e-05, "loss": 0.052, "step": 47065 }, { "epoch": 0.5740243902439024, "grad_norm": 0.9588358402252197, "learning_rate": 1.617317073170732e-05, "loss": 0.0591, "step": 47070 }, { "epoch": 0.5740853658536585, "grad_norm": 0.48155876994132996, "learning_rate": 1.6172764227642277e-05, "loss": 0.0479, "step": 47075 }, { "epoch": 0.5741463414634146, "grad_norm": 0.45327600836753845, "learning_rate": 1.6172357723577238e-05, "loss": 0.0598, "step": 47080 }, { "epoch": 0.5742073170731707, "grad_norm": 0.5273836255073547, "learning_rate": 1.6171951219512196e-05, "loss": 0.0646, "step": 47085 }, { "epoch": 0.5742682926829268, "grad_norm": 0.8720250725746155, "learning_rate": 1.6171544715447158e-05, "loss": 0.0669, "step": 47090 }, { "epoch": 0.5743292682926829, "grad_norm": 0.476990669965744, "learning_rate": 1.6171138211382116e-05, "loss": 0.0556, "step": 47095 }, { "epoch": 0.574390243902439, "grad_norm": 0.7191054224967957, "learning_rate": 1.6170731707317074e-05, "loss": 0.0558, "step": 47100 }, { "epoch": 0.5744512195121951, "grad_norm": 0.6796718239784241, "learning_rate": 1.6170325203252032e-05, "loss": 0.0987, "step": 47105 }, { "epoch": 0.5745121951219512, "grad_norm": 0.5466949939727783, "learning_rate": 1.6169918699186994e-05, "loss": 0.0629, "step": 47110 }, { "epoch": 0.5745731707317073, "grad_norm": 0.38039204478263855, "learning_rate": 1.6169512195121952e-05, "loss": 0.0639, "step": 47115 }, { "epoch": 0.5746341463414634, "grad_norm": 0.49193286895751953, "learning_rate": 1.6169105691056913e-05, "loss": 0.0631, "step": 47120 }, { "epoch": 0.5746951219512195, "grad_norm": 0.3886277377605438, "learning_rate": 1.616869918699187e-05, "loss": 0.06, "step": 47125 }, { "epoch": 0.5747560975609756, "grad_norm": 0.612252950668335, "learning_rate": 1.6168292682926833e-05, "loss": 0.0559, "step": 47130 }, { "epoch": 0.5748170731707317, "grad_norm": 0.5637312531471252, "learning_rate": 1.6167886178861788e-05, "loss": 0.0467, "step": 47135 }, { "epoch": 0.5748780487804878, "grad_norm": 0.515587568283081, "learning_rate": 1.616747967479675e-05, "loss": 0.0491, "step": 47140 }, { "epoch": 0.5749390243902439, "grad_norm": 0.5112798810005188, "learning_rate": 1.6167073170731707e-05, "loss": 0.1116, "step": 47145 }, { "epoch": 0.575, "grad_norm": 1.234284520149231, "learning_rate": 1.616666666666667e-05, "loss": 0.0563, "step": 47150 }, { "epoch": 0.5750609756097561, "grad_norm": 1.4165081977844238, "learning_rate": 1.6166260162601627e-05, "loss": 0.042, "step": 47155 }, { "epoch": 0.5751219512195122, "grad_norm": 0.4863666594028473, "learning_rate": 1.616585365853659e-05, "loss": 0.0475, "step": 47160 }, { "epoch": 0.5751829268292683, "grad_norm": 1.6606072187423706, "learning_rate": 1.6165447154471547e-05, "loss": 0.0719, "step": 47165 }, { "epoch": 0.5752439024390243, "grad_norm": 0.7125630378723145, "learning_rate": 1.6165040650406505e-05, "loss": 0.0518, "step": 47170 }, { "epoch": 0.5753048780487805, "grad_norm": 0.4550245404243469, "learning_rate": 1.6164634146341466e-05, "loss": 0.0382, "step": 47175 }, { "epoch": 0.5753658536585365, "grad_norm": 1.2472525835037231, "learning_rate": 1.6164227642276424e-05, "loss": 0.0729, "step": 47180 }, { "epoch": 0.5754268292682927, "grad_norm": 1.207711100578308, "learning_rate": 1.6163821138211383e-05, "loss": 0.126, "step": 47185 }, { "epoch": 0.5754878048780487, "grad_norm": 0.8137549757957458, "learning_rate": 1.6163414634146344e-05, "loss": 0.0389, "step": 47190 }, { "epoch": 0.5755487804878049, "grad_norm": 0.5269941091537476, "learning_rate": 1.6163008130081302e-05, "loss": 0.0475, "step": 47195 }, { "epoch": 0.5756097560975609, "grad_norm": 0.5382208228111267, "learning_rate": 1.616260162601626e-05, "loss": 0.0424, "step": 47200 }, { "epoch": 0.5756707317073171, "grad_norm": 2.1028666496276855, "learning_rate": 1.6162195121951222e-05, "loss": 0.0448, "step": 47205 }, { "epoch": 0.5757317073170731, "grad_norm": 0.7553303837776184, "learning_rate": 1.616178861788618e-05, "loss": 0.0786, "step": 47210 }, { "epoch": 0.5757926829268293, "grad_norm": 0.3131011128425598, "learning_rate": 1.616138211382114e-05, "loss": 0.0471, "step": 47215 }, { "epoch": 0.5758536585365853, "grad_norm": 0.4622001349925995, "learning_rate": 1.61609756097561e-05, "loss": 0.0547, "step": 47220 }, { "epoch": 0.5759146341463415, "grad_norm": 2.509631872177124, "learning_rate": 1.6160569105691058e-05, "loss": 0.1206, "step": 47225 }, { "epoch": 0.5759756097560975, "grad_norm": 0.4237704873085022, "learning_rate": 1.6160162601626016e-05, "loss": 0.0357, "step": 47230 }, { "epoch": 0.5760365853658537, "grad_norm": 0.5067047476768494, "learning_rate": 1.6159756097560977e-05, "loss": 0.074, "step": 47235 }, { "epoch": 0.5760975609756097, "grad_norm": 0.5493873357772827, "learning_rate": 1.6159349593495935e-05, "loss": 0.0369, "step": 47240 }, { "epoch": 0.5761585365853659, "grad_norm": 1.2978321313858032, "learning_rate": 1.6158943089430897e-05, "loss": 0.0557, "step": 47245 }, { "epoch": 0.5762195121951219, "grad_norm": 1.1229499578475952, "learning_rate": 1.6158536585365855e-05, "loss": 0.1031, "step": 47250 }, { "epoch": 0.5762804878048781, "grad_norm": 1.4450863599777222, "learning_rate": 1.6158130081300813e-05, "loss": 0.0714, "step": 47255 }, { "epoch": 0.5763414634146341, "grad_norm": 0.4241943657398224, "learning_rate": 1.6157723577235775e-05, "loss": 0.0425, "step": 47260 }, { "epoch": 0.5764024390243903, "grad_norm": 0.32990285754203796, "learning_rate": 1.6157317073170733e-05, "loss": 0.0528, "step": 47265 }, { "epoch": 0.5764634146341463, "grad_norm": 0.7211061120033264, "learning_rate": 1.615691056910569e-05, "loss": 0.0695, "step": 47270 }, { "epoch": 0.5765243902439025, "grad_norm": 0.3804111182689667, "learning_rate": 1.6156504065040653e-05, "loss": 0.034, "step": 47275 }, { "epoch": 0.5765853658536585, "grad_norm": 0.6468015313148499, "learning_rate": 1.615609756097561e-05, "loss": 0.0356, "step": 47280 }, { "epoch": 0.5766463414634146, "grad_norm": 0.9688219428062439, "learning_rate": 1.615569105691057e-05, "loss": 0.0879, "step": 47285 }, { "epoch": 0.5767073170731707, "grad_norm": 0.34450528025627136, "learning_rate": 1.615528455284553e-05, "loss": 0.0778, "step": 47290 }, { "epoch": 0.5767682926829268, "grad_norm": 0.5472132563591003, "learning_rate": 1.615487804878049e-05, "loss": 0.0447, "step": 47295 }, { "epoch": 0.5768292682926829, "grad_norm": 0.6426844000816345, "learning_rate": 1.615447154471545e-05, "loss": 0.0844, "step": 47300 }, { "epoch": 0.576890243902439, "grad_norm": 0.5745211243629456, "learning_rate": 1.6154065040650408e-05, "loss": 0.0689, "step": 47305 }, { "epoch": 0.5769512195121951, "grad_norm": 0.7855350971221924, "learning_rate": 1.615365853658537e-05, "loss": 0.0516, "step": 47310 }, { "epoch": 0.5770121951219512, "grad_norm": 2.5444650650024414, "learning_rate": 1.6153252032520324e-05, "loss": 0.0626, "step": 47315 }, { "epoch": 0.5770731707317073, "grad_norm": 1.991953730583191, "learning_rate": 1.6152845528455286e-05, "loss": 0.0709, "step": 47320 }, { "epoch": 0.5771341463414634, "grad_norm": 0.8552135229110718, "learning_rate": 1.6152439024390244e-05, "loss": 0.0613, "step": 47325 }, { "epoch": 0.5771951219512195, "grad_norm": 1.3807873725891113, "learning_rate": 1.6152032520325206e-05, "loss": 0.0854, "step": 47330 }, { "epoch": 0.5772560975609756, "grad_norm": 1.1502560377120972, "learning_rate": 1.6151626016260164e-05, "loss": 0.0604, "step": 47335 }, { "epoch": 0.5773170731707317, "grad_norm": 0.9433528780937195, "learning_rate": 1.6151219512195125e-05, "loss": 0.0522, "step": 47340 }, { "epoch": 0.5773780487804878, "grad_norm": 0.5838854908943176, "learning_rate": 1.6150813008130083e-05, "loss": 0.0783, "step": 47345 }, { "epoch": 0.5774390243902439, "grad_norm": 3.8685004711151123, "learning_rate": 1.615040650406504e-05, "loss": 0.0537, "step": 47350 }, { "epoch": 0.5775, "grad_norm": 0.42609328031539917, "learning_rate": 1.6150000000000003e-05, "loss": 0.0784, "step": 47355 }, { "epoch": 0.577560975609756, "grad_norm": 0.8861054182052612, "learning_rate": 1.614959349593496e-05, "loss": 0.078, "step": 47360 }, { "epoch": 0.5776219512195122, "grad_norm": 0.6793922185897827, "learning_rate": 1.614918699186992e-05, "loss": 0.085, "step": 47365 }, { "epoch": 0.5776829268292683, "grad_norm": 0.40439942479133606, "learning_rate": 1.614878048780488e-05, "loss": 0.0593, "step": 47370 }, { "epoch": 0.5777439024390244, "grad_norm": 0.554714560508728, "learning_rate": 1.614837398373984e-05, "loss": 0.0791, "step": 47375 }, { "epoch": 0.5778048780487804, "grad_norm": 0.7218592762947083, "learning_rate": 1.6147967479674797e-05, "loss": 0.0706, "step": 47380 }, { "epoch": 0.5778658536585366, "grad_norm": 0.7045944333076477, "learning_rate": 1.614756097560976e-05, "loss": 0.0464, "step": 47385 }, { "epoch": 0.5779268292682926, "grad_norm": 0.787667989730835, "learning_rate": 1.6147154471544717e-05, "loss": 0.0442, "step": 47390 }, { "epoch": 0.5779878048780488, "grad_norm": 0.4226425290107727, "learning_rate": 1.6146747967479678e-05, "loss": 0.0741, "step": 47395 }, { "epoch": 0.5780487804878048, "grad_norm": 0.2868553698062897, "learning_rate": 1.6146341463414636e-05, "loss": 0.0424, "step": 47400 }, { "epoch": 0.578109756097561, "grad_norm": 0.868835985660553, "learning_rate": 1.6145934959349594e-05, "loss": 0.0406, "step": 47405 }, { "epoch": 0.578170731707317, "grad_norm": 0.747961699962616, "learning_rate": 1.6145528455284552e-05, "loss": 0.0674, "step": 47410 }, { "epoch": 0.5782317073170732, "grad_norm": 0.9179307818412781, "learning_rate": 1.6145121951219514e-05, "loss": 0.034, "step": 47415 }, { "epoch": 0.5782926829268292, "grad_norm": 0.32662808895111084, "learning_rate": 1.6144715447154472e-05, "loss": 0.0585, "step": 47420 }, { "epoch": 0.5783536585365854, "grad_norm": 1.1410850286483765, "learning_rate": 1.6144308943089434e-05, "loss": 0.0549, "step": 47425 }, { "epoch": 0.5784146341463414, "grad_norm": 0.607648491859436, "learning_rate": 1.6143902439024392e-05, "loss": 0.0605, "step": 47430 }, { "epoch": 0.5784756097560976, "grad_norm": 0.7652329802513123, "learning_rate": 1.614349593495935e-05, "loss": 0.044, "step": 47435 }, { "epoch": 0.5785365853658536, "grad_norm": 0.42779630422592163, "learning_rate": 1.614308943089431e-05, "loss": 0.0593, "step": 47440 }, { "epoch": 0.5785975609756098, "grad_norm": 0.5271528959274292, "learning_rate": 1.614268292682927e-05, "loss": 0.0324, "step": 47445 }, { "epoch": 0.5786585365853658, "grad_norm": 0.6049367785453796, "learning_rate": 1.6142276422764228e-05, "loss": 0.0354, "step": 47450 }, { "epoch": 0.578719512195122, "grad_norm": 0.4543626606464386, "learning_rate": 1.614186991869919e-05, "loss": 0.0813, "step": 47455 }, { "epoch": 0.578780487804878, "grad_norm": 0.3066190779209137, "learning_rate": 1.6141463414634147e-05, "loss": 0.0314, "step": 47460 }, { "epoch": 0.5788414634146342, "grad_norm": 0.6496961116790771, "learning_rate": 1.6141056910569105e-05, "loss": 0.0559, "step": 47465 }, { "epoch": 0.5789024390243902, "grad_norm": 0.4917561113834381, "learning_rate": 1.6140650406504067e-05, "loss": 0.0655, "step": 47470 }, { "epoch": 0.5789634146341464, "grad_norm": 0.9151979088783264, "learning_rate": 1.6140243902439025e-05, "loss": 0.053, "step": 47475 }, { "epoch": 0.5790243902439024, "grad_norm": 0.8762719035148621, "learning_rate": 1.6139837398373987e-05, "loss": 0.0688, "step": 47480 }, { "epoch": 0.5790853658536586, "grad_norm": 0.4288671016693115, "learning_rate": 1.6139430894308945e-05, "loss": 0.0617, "step": 47485 }, { "epoch": 0.5791463414634146, "grad_norm": 2.0505993366241455, "learning_rate": 1.6139024390243906e-05, "loss": 0.094, "step": 47490 }, { "epoch": 0.5792073170731707, "grad_norm": 1.2095855474472046, "learning_rate": 1.613861788617886e-05, "loss": 0.0586, "step": 47495 }, { "epoch": 0.5792682926829268, "grad_norm": 0.6707280874252319, "learning_rate": 1.6138211382113823e-05, "loss": 0.0921, "step": 47500 }, { "epoch": 0.5793292682926829, "grad_norm": 0.4890541732311249, "learning_rate": 1.613780487804878e-05, "loss": 0.1131, "step": 47505 }, { "epoch": 0.579390243902439, "grad_norm": 0.6391751170158386, "learning_rate": 1.6137398373983742e-05, "loss": 0.0796, "step": 47510 }, { "epoch": 0.5794512195121951, "grad_norm": 0.3922938406467438, "learning_rate": 1.61369918699187e-05, "loss": 0.0578, "step": 47515 }, { "epoch": 0.5795121951219512, "grad_norm": 0.6047605276107788, "learning_rate": 1.6136585365853662e-05, "loss": 0.0515, "step": 47520 }, { "epoch": 0.5795731707317073, "grad_norm": 0.4559445083141327, "learning_rate": 1.613617886178862e-05, "loss": 0.0613, "step": 47525 }, { "epoch": 0.5796341463414634, "grad_norm": 0.39477187395095825, "learning_rate": 1.6135772357723578e-05, "loss": 0.0437, "step": 47530 }, { "epoch": 0.5796951219512195, "grad_norm": 0.4664771258831024, "learning_rate": 1.6135365853658536e-05, "loss": 0.0368, "step": 47535 }, { "epoch": 0.5797560975609756, "grad_norm": 0.8726472854614258, "learning_rate": 1.6134959349593498e-05, "loss": 0.0478, "step": 47540 }, { "epoch": 0.5798170731707317, "grad_norm": 0.5351272821426392, "learning_rate": 1.6134552845528456e-05, "loss": 0.0838, "step": 47545 }, { "epoch": 0.5798780487804878, "grad_norm": 0.649048388004303, "learning_rate": 1.6134146341463417e-05, "loss": 0.0487, "step": 47550 }, { "epoch": 0.5799390243902439, "grad_norm": 1.5466744899749756, "learning_rate": 1.6133739837398375e-05, "loss": 0.0471, "step": 47555 }, { "epoch": 0.58, "grad_norm": 0.4926346242427826, "learning_rate": 1.6133333333333334e-05, "loss": 0.08, "step": 47560 }, { "epoch": 0.5800609756097561, "grad_norm": 0.6336090564727783, "learning_rate": 1.6132926829268295e-05, "loss": 0.0759, "step": 47565 }, { "epoch": 0.5801219512195122, "grad_norm": 0.39870065450668335, "learning_rate": 1.6132520325203253e-05, "loss": 0.0494, "step": 47570 }, { "epoch": 0.5801829268292683, "grad_norm": 3.1969316005706787, "learning_rate": 1.6132113821138215e-05, "loss": 0.0607, "step": 47575 }, { "epoch": 0.5802439024390244, "grad_norm": 0.5776350498199463, "learning_rate": 1.6131707317073173e-05, "loss": 0.0455, "step": 47580 }, { "epoch": 0.5803048780487805, "grad_norm": 0.3367469608783722, "learning_rate": 1.613130081300813e-05, "loss": 0.0509, "step": 47585 }, { "epoch": 0.5803658536585365, "grad_norm": 2.4585585594177246, "learning_rate": 1.613089430894309e-05, "loss": 0.0568, "step": 47590 }, { "epoch": 0.5804268292682927, "grad_norm": 0.8803650140762329, "learning_rate": 1.613048780487805e-05, "loss": 0.0607, "step": 47595 }, { "epoch": 0.5804878048780487, "grad_norm": 0.8274554014205933, "learning_rate": 1.613008130081301e-05, "loss": 0.0487, "step": 47600 }, { "epoch": 0.5805487804878049, "grad_norm": 0.8233482837677002, "learning_rate": 1.612967479674797e-05, "loss": 0.0597, "step": 47605 }, { "epoch": 0.5806097560975609, "grad_norm": 0.6223686337471008, "learning_rate": 1.612926829268293e-05, "loss": 0.0571, "step": 47610 }, { "epoch": 0.5806707317073171, "grad_norm": 0.5871958136558533, "learning_rate": 1.6128861788617887e-05, "loss": 0.0547, "step": 47615 }, { "epoch": 0.5807317073170731, "grad_norm": 1.5255050659179688, "learning_rate": 1.6128455284552845e-05, "loss": 0.0781, "step": 47620 }, { "epoch": 0.5807926829268293, "grad_norm": 0.39343157410621643, "learning_rate": 1.6128048780487806e-05, "loss": 0.1069, "step": 47625 }, { "epoch": 0.5808536585365853, "grad_norm": 0.5747738480567932, "learning_rate": 1.6127642276422764e-05, "loss": 0.0684, "step": 47630 }, { "epoch": 0.5809146341463415, "grad_norm": 0.8211041688919067, "learning_rate": 1.6127235772357726e-05, "loss": 0.0526, "step": 47635 }, { "epoch": 0.5809756097560975, "grad_norm": 0.6097192168235779, "learning_rate": 1.6126829268292684e-05, "loss": 0.0494, "step": 47640 }, { "epoch": 0.5810365853658537, "grad_norm": 0.8609230518341064, "learning_rate": 1.6126422764227642e-05, "loss": 0.0834, "step": 47645 }, { "epoch": 0.5810975609756097, "grad_norm": 0.5500087141990662, "learning_rate": 1.6126016260162604e-05, "loss": 0.0682, "step": 47650 }, { "epoch": 0.5811585365853659, "grad_norm": 0.18849852681159973, "learning_rate": 1.6125609756097562e-05, "loss": 0.068, "step": 47655 }, { "epoch": 0.5812195121951219, "grad_norm": 0.33857306838035583, "learning_rate": 1.6125203252032523e-05, "loss": 0.0471, "step": 47660 }, { "epoch": 0.5812804878048781, "grad_norm": 0.6329569816589355, "learning_rate": 1.612479674796748e-05, "loss": 0.0763, "step": 47665 }, { "epoch": 0.5813414634146341, "grad_norm": 1.0612601041793823, "learning_rate": 1.6124390243902443e-05, "loss": 0.0541, "step": 47670 }, { "epoch": 0.5814024390243903, "grad_norm": 0.7219384908676147, "learning_rate": 1.6123983739837398e-05, "loss": 0.0788, "step": 47675 }, { "epoch": 0.5814634146341463, "grad_norm": 0.5857816338539124, "learning_rate": 1.612357723577236e-05, "loss": 0.0556, "step": 47680 }, { "epoch": 0.5815243902439025, "grad_norm": 0.8864293694496155, "learning_rate": 1.6123170731707317e-05, "loss": 0.0801, "step": 47685 }, { "epoch": 0.5815853658536585, "grad_norm": 1.2081514596939087, "learning_rate": 1.612276422764228e-05, "loss": 0.0659, "step": 47690 }, { "epoch": 0.5816463414634147, "grad_norm": 0.8065913915634155, "learning_rate": 1.6122357723577237e-05, "loss": 0.0585, "step": 47695 }, { "epoch": 0.5817073170731707, "grad_norm": 0.5698899030685425, "learning_rate": 1.61219512195122e-05, "loss": 0.0468, "step": 47700 }, { "epoch": 0.5817682926829268, "grad_norm": 0.6384047865867615, "learning_rate": 1.6121544715447157e-05, "loss": 0.0498, "step": 47705 }, { "epoch": 0.5818292682926829, "grad_norm": 0.6527827978134155, "learning_rate": 1.6121138211382115e-05, "loss": 0.0724, "step": 47710 }, { "epoch": 0.581890243902439, "grad_norm": 2.325164318084717, "learning_rate": 1.6120731707317073e-05, "loss": 0.0549, "step": 47715 }, { "epoch": 0.5819512195121951, "grad_norm": 1.213568091392517, "learning_rate": 1.6120325203252034e-05, "loss": 0.0697, "step": 47720 }, { "epoch": 0.5820121951219512, "grad_norm": 0.4322051703929901, "learning_rate": 1.6119918699186992e-05, "loss": 0.05, "step": 47725 }, { "epoch": 0.5820731707317073, "grad_norm": 0.3450734615325928, "learning_rate": 1.6119512195121954e-05, "loss": 0.03, "step": 47730 }, { "epoch": 0.5821341463414634, "grad_norm": 0.9382518529891968, "learning_rate": 1.6119105691056912e-05, "loss": 0.0846, "step": 47735 }, { "epoch": 0.5821951219512195, "grad_norm": 0.6001650094985962, "learning_rate": 1.611869918699187e-05, "loss": 0.0637, "step": 47740 }, { "epoch": 0.5822560975609756, "grad_norm": 0.3128964900970459, "learning_rate": 1.6118292682926832e-05, "loss": 0.0842, "step": 47745 }, { "epoch": 0.5823170731707317, "grad_norm": 2.35028338432312, "learning_rate": 1.611788617886179e-05, "loss": 0.0724, "step": 47750 }, { "epoch": 0.5823780487804878, "grad_norm": 0.3406590223312378, "learning_rate": 1.611747967479675e-05, "loss": 0.0445, "step": 47755 }, { "epoch": 0.5824390243902439, "grad_norm": 0.6899134516716003, "learning_rate": 1.611707317073171e-05, "loss": 0.0534, "step": 47760 }, { "epoch": 0.5825, "grad_norm": 0.5696789026260376, "learning_rate": 1.6116666666666668e-05, "loss": 0.0874, "step": 47765 }, { "epoch": 0.5825609756097561, "grad_norm": 0.5298784971237183, "learning_rate": 1.6116260162601626e-05, "loss": 0.0636, "step": 47770 }, { "epoch": 0.5826219512195122, "grad_norm": 0.5049415826797485, "learning_rate": 1.6115853658536587e-05, "loss": 0.0349, "step": 47775 }, { "epoch": 0.5826829268292683, "grad_norm": 0.7977088093757629, "learning_rate": 1.6115447154471545e-05, "loss": 0.0597, "step": 47780 }, { "epoch": 0.5827439024390244, "grad_norm": 1.0145022869110107, "learning_rate": 1.6115040650406507e-05, "loss": 0.0887, "step": 47785 }, { "epoch": 0.5828048780487805, "grad_norm": 0.5060285925865173, "learning_rate": 1.6114634146341465e-05, "loss": 0.0741, "step": 47790 }, { "epoch": 0.5828658536585366, "grad_norm": 0.698655366897583, "learning_rate": 1.6114227642276423e-05, "loss": 0.0384, "step": 47795 }, { "epoch": 0.5829268292682926, "grad_norm": 0.7116857171058655, "learning_rate": 1.611382113821138e-05, "loss": 0.0561, "step": 47800 }, { "epoch": 0.5829878048780488, "grad_norm": 0.5312204957008362, "learning_rate": 1.6113414634146343e-05, "loss": 0.065, "step": 47805 }, { "epoch": 0.5830487804878048, "grad_norm": 0.3634481132030487, "learning_rate": 1.61130081300813e-05, "loss": 0.0551, "step": 47810 }, { "epoch": 0.583109756097561, "grad_norm": 0.40025392174720764, "learning_rate": 1.6112601626016262e-05, "loss": 0.0484, "step": 47815 }, { "epoch": 0.583170731707317, "grad_norm": 0.6593084931373596, "learning_rate": 1.611219512195122e-05, "loss": 0.0489, "step": 47820 }, { "epoch": 0.5832317073170732, "grad_norm": 0.6205593943595886, "learning_rate": 1.611178861788618e-05, "loss": 0.0478, "step": 47825 }, { "epoch": 0.5832926829268292, "grad_norm": 0.5579416155815125, "learning_rate": 1.611138211382114e-05, "loss": 0.068, "step": 47830 }, { "epoch": 0.5833536585365854, "grad_norm": 0.40806740522384644, "learning_rate": 1.61109756097561e-05, "loss": 0.0541, "step": 47835 }, { "epoch": 0.5834146341463414, "grad_norm": 1.2230610847473145, "learning_rate": 1.611056910569106e-05, "loss": 0.0918, "step": 47840 }, { "epoch": 0.5834756097560976, "grad_norm": 0.5619804859161377, "learning_rate": 1.6110162601626018e-05, "loss": 0.0475, "step": 47845 }, { "epoch": 0.5835365853658536, "grad_norm": 0.5362014174461365, "learning_rate": 1.610975609756098e-05, "loss": 0.0437, "step": 47850 }, { "epoch": 0.5835975609756098, "grad_norm": 0.5556354522705078, "learning_rate": 1.6109349593495934e-05, "loss": 0.0441, "step": 47855 }, { "epoch": 0.5836585365853658, "grad_norm": 0.8204360604286194, "learning_rate": 1.6108943089430896e-05, "loss": 0.045, "step": 47860 }, { "epoch": 0.583719512195122, "grad_norm": 0.7506065368652344, "learning_rate": 1.6108536585365854e-05, "loss": 0.0677, "step": 47865 }, { "epoch": 0.583780487804878, "grad_norm": 0.5339619517326355, "learning_rate": 1.6108130081300815e-05, "loss": 0.0546, "step": 47870 }, { "epoch": 0.5838414634146342, "grad_norm": 0.36548757553100586, "learning_rate": 1.6107723577235774e-05, "loss": 0.0473, "step": 47875 }, { "epoch": 0.5839024390243902, "grad_norm": 0.6960799694061279, "learning_rate": 1.6107317073170735e-05, "loss": 0.0494, "step": 47880 }, { "epoch": 0.5839634146341464, "grad_norm": 2.076977014541626, "learning_rate": 1.610691056910569e-05, "loss": 0.0596, "step": 47885 }, { "epoch": 0.5840243902439024, "grad_norm": 0.5183258056640625, "learning_rate": 1.610650406504065e-05, "loss": 0.1021, "step": 47890 }, { "epoch": 0.5840853658536586, "grad_norm": 0.4801431894302368, "learning_rate": 1.610609756097561e-05, "loss": 0.0563, "step": 47895 }, { "epoch": 0.5841463414634146, "grad_norm": 0.9085931181907654, "learning_rate": 1.610569105691057e-05, "loss": 0.0739, "step": 47900 }, { "epoch": 0.5842073170731708, "grad_norm": 0.5905459523200989, "learning_rate": 1.610528455284553e-05, "loss": 0.0568, "step": 47905 }, { "epoch": 0.5842682926829268, "grad_norm": 0.35662662982940674, "learning_rate": 1.610487804878049e-05, "loss": 0.0617, "step": 47910 }, { "epoch": 0.584329268292683, "grad_norm": 1.4314374923706055, "learning_rate": 1.610447154471545e-05, "loss": 0.0803, "step": 47915 }, { "epoch": 0.584390243902439, "grad_norm": 0.5242948532104492, "learning_rate": 1.6104065040650407e-05, "loss": 0.0613, "step": 47920 }, { "epoch": 0.5844512195121951, "grad_norm": 1.0068659782409668, "learning_rate": 1.610365853658537e-05, "loss": 0.064, "step": 47925 }, { "epoch": 0.5845121951219512, "grad_norm": 0.5738316178321838, "learning_rate": 1.6103252032520327e-05, "loss": 0.0472, "step": 47930 }, { "epoch": 0.5845731707317073, "grad_norm": 0.4896714985370636, "learning_rate": 1.6102845528455288e-05, "loss": 0.0513, "step": 47935 }, { "epoch": 0.5846341463414634, "grad_norm": 0.44557875394821167, "learning_rate": 1.6102439024390246e-05, "loss": 0.0522, "step": 47940 }, { "epoch": 0.5846951219512195, "grad_norm": 1.380980134010315, "learning_rate": 1.6102032520325204e-05, "loss": 0.0517, "step": 47945 }, { "epoch": 0.5847560975609756, "grad_norm": 0.5295771956443787, "learning_rate": 1.6101626016260162e-05, "loss": 0.0496, "step": 47950 }, { "epoch": 0.5848170731707317, "grad_norm": 0.7307811975479126, "learning_rate": 1.6101219512195124e-05, "loss": 0.0987, "step": 47955 }, { "epoch": 0.5848780487804878, "grad_norm": 0.40293946862220764, "learning_rate": 1.6100813008130082e-05, "loss": 0.0592, "step": 47960 }, { "epoch": 0.5849390243902439, "grad_norm": 0.3040475845336914, "learning_rate": 1.6100406504065044e-05, "loss": 0.0533, "step": 47965 }, { "epoch": 0.585, "grad_norm": 0.5994628071784973, "learning_rate": 1.6100000000000002e-05, "loss": 0.0748, "step": 47970 }, { "epoch": 0.5850609756097561, "grad_norm": 0.570482075214386, "learning_rate": 1.609959349593496e-05, "loss": 0.0353, "step": 47975 }, { "epoch": 0.5851219512195122, "grad_norm": 0.6119306683540344, "learning_rate": 1.6099186991869918e-05, "loss": 0.0563, "step": 47980 }, { "epoch": 0.5851829268292683, "grad_norm": 0.563244104385376, "learning_rate": 1.609878048780488e-05, "loss": 0.0474, "step": 47985 }, { "epoch": 0.5852439024390244, "grad_norm": 1.0373034477233887, "learning_rate": 1.6098373983739838e-05, "loss": 0.0543, "step": 47990 }, { "epoch": 0.5853048780487805, "grad_norm": 0.8966333866119385, "learning_rate": 1.60979674796748e-05, "loss": 0.0578, "step": 47995 }, { "epoch": 0.5853658536585366, "grad_norm": 0.5057427287101746, "learning_rate": 1.6097560975609757e-05, "loss": 0.0693, "step": 48000 }, { "epoch": 0.5854268292682927, "grad_norm": 0.7995458841323853, "learning_rate": 1.6097154471544715e-05, "loss": 0.0806, "step": 48005 }, { "epoch": 0.5854878048780487, "grad_norm": 0.8751376867294312, "learning_rate": 1.6096747967479677e-05, "loss": 0.0738, "step": 48010 }, { "epoch": 0.5855487804878049, "grad_norm": 1.01472806930542, "learning_rate": 1.6096341463414635e-05, "loss": 0.1223, "step": 48015 }, { "epoch": 0.5856097560975609, "grad_norm": 1.1526739597320557, "learning_rate": 1.6095934959349597e-05, "loss": 0.0697, "step": 48020 }, { "epoch": 0.5856707317073171, "grad_norm": 0.7632270455360413, "learning_rate": 1.6095528455284555e-05, "loss": 0.0516, "step": 48025 }, { "epoch": 0.5857317073170731, "grad_norm": 0.46740224957466125, "learning_rate": 1.6095121951219513e-05, "loss": 0.0497, "step": 48030 }, { "epoch": 0.5857926829268293, "grad_norm": 0.2979002594947815, "learning_rate": 1.609471544715447e-05, "loss": 0.0564, "step": 48035 }, { "epoch": 0.5858536585365853, "grad_norm": 0.40898871421813965, "learning_rate": 1.6094308943089432e-05, "loss": 0.0402, "step": 48040 }, { "epoch": 0.5859146341463415, "grad_norm": 0.554216742515564, "learning_rate": 1.609390243902439e-05, "loss": 0.0622, "step": 48045 }, { "epoch": 0.5859756097560975, "grad_norm": 0.7146037817001343, "learning_rate": 1.6093495934959352e-05, "loss": 0.0492, "step": 48050 }, { "epoch": 0.5860365853658537, "grad_norm": 0.6425127983093262, "learning_rate": 1.609308943089431e-05, "loss": 0.0559, "step": 48055 }, { "epoch": 0.5860975609756097, "grad_norm": 0.7010741233825684, "learning_rate": 1.6092682926829272e-05, "loss": 0.0769, "step": 48060 }, { "epoch": 0.5861585365853659, "grad_norm": 0.5296300053596497, "learning_rate": 1.6092276422764226e-05, "loss": 0.0562, "step": 48065 }, { "epoch": 0.5862195121951219, "grad_norm": 0.37825077772140503, "learning_rate": 1.6091869918699188e-05, "loss": 0.0609, "step": 48070 }, { "epoch": 0.5862804878048781, "grad_norm": 1.042059063911438, "learning_rate": 1.6091463414634146e-05, "loss": 0.0571, "step": 48075 }, { "epoch": 0.5863414634146341, "grad_norm": 0.9792384505271912, "learning_rate": 1.6091056910569108e-05, "loss": 0.1113, "step": 48080 }, { "epoch": 0.5864024390243903, "grad_norm": 2.242321491241455, "learning_rate": 1.6090650406504066e-05, "loss": 0.0619, "step": 48085 }, { "epoch": 0.5864634146341463, "grad_norm": 0.9509377479553223, "learning_rate": 1.6090243902439027e-05, "loss": 0.0582, "step": 48090 }, { "epoch": 0.5865243902439025, "grad_norm": 0.597680389881134, "learning_rate": 1.6089837398373985e-05, "loss": 0.0699, "step": 48095 }, { "epoch": 0.5865853658536585, "grad_norm": 0.6825587153434753, "learning_rate": 1.6089430894308944e-05, "loss": 0.067, "step": 48100 }, { "epoch": 0.5866463414634147, "grad_norm": 0.6774792075157166, "learning_rate": 1.6089024390243905e-05, "loss": 0.0567, "step": 48105 }, { "epoch": 0.5867073170731707, "grad_norm": 0.6521201133728027, "learning_rate": 1.6088617886178863e-05, "loss": 0.0573, "step": 48110 }, { "epoch": 0.5867682926829269, "grad_norm": 1.8274985551834106, "learning_rate": 1.6088211382113825e-05, "loss": 0.0775, "step": 48115 }, { "epoch": 0.5868292682926829, "grad_norm": 0.5816879272460938, "learning_rate": 1.6087804878048783e-05, "loss": 0.0455, "step": 48120 }, { "epoch": 0.586890243902439, "grad_norm": 0.8789560198783875, "learning_rate": 1.608739837398374e-05, "loss": 0.0573, "step": 48125 }, { "epoch": 0.5869512195121951, "grad_norm": 0.9191910028457642, "learning_rate": 1.60869918699187e-05, "loss": 0.0733, "step": 48130 }, { "epoch": 0.5870121951219512, "grad_norm": 0.5155048370361328, "learning_rate": 1.608658536585366e-05, "loss": 0.043, "step": 48135 }, { "epoch": 0.5870731707317073, "grad_norm": 0.8308166265487671, "learning_rate": 1.608617886178862e-05, "loss": 0.0516, "step": 48140 }, { "epoch": 0.5871341463414634, "grad_norm": 0.8960465788841248, "learning_rate": 1.608577235772358e-05, "loss": 0.0303, "step": 48145 }, { "epoch": 0.5871951219512195, "grad_norm": 0.29164379835128784, "learning_rate": 1.608536585365854e-05, "loss": 0.0464, "step": 48150 }, { "epoch": 0.5872560975609756, "grad_norm": 1.144701361656189, "learning_rate": 1.6084959349593496e-05, "loss": 0.0734, "step": 48155 }, { "epoch": 0.5873170731707317, "grad_norm": 0.3880434036254883, "learning_rate": 1.6084552845528455e-05, "loss": 0.0418, "step": 48160 }, { "epoch": 0.5873780487804878, "grad_norm": 0.8780815601348877, "learning_rate": 1.6084146341463416e-05, "loss": 0.0488, "step": 48165 }, { "epoch": 0.5874390243902439, "grad_norm": 0.5113297700881958, "learning_rate": 1.6083739837398374e-05, "loss": 0.049, "step": 48170 }, { "epoch": 0.5875, "grad_norm": 0.6226685047149658, "learning_rate": 1.6083333333333336e-05, "loss": 0.0535, "step": 48175 }, { "epoch": 0.5875609756097561, "grad_norm": 0.5692471265792847, "learning_rate": 1.6082926829268294e-05, "loss": 0.1057, "step": 48180 }, { "epoch": 0.5876219512195122, "grad_norm": 0.6323742270469666, "learning_rate": 1.6082520325203252e-05, "loss": 0.0605, "step": 48185 }, { "epoch": 0.5876829268292683, "grad_norm": 1.0154943466186523, "learning_rate": 1.6082113821138214e-05, "loss": 0.061, "step": 48190 }, { "epoch": 0.5877439024390244, "grad_norm": 0.42918339371681213, "learning_rate": 1.608170731707317e-05, "loss": 0.0475, "step": 48195 }, { "epoch": 0.5878048780487805, "grad_norm": 0.5814401507377625, "learning_rate": 1.6081300813008133e-05, "loss": 0.0549, "step": 48200 }, { "epoch": 0.5878658536585366, "grad_norm": 0.4824898838996887, "learning_rate": 1.608089430894309e-05, "loss": 0.0415, "step": 48205 }, { "epoch": 0.5879268292682926, "grad_norm": 0.4033622741699219, "learning_rate": 1.608048780487805e-05, "loss": 0.0856, "step": 48210 }, { "epoch": 0.5879878048780488, "grad_norm": 0.5912396907806396, "learning_rate": 1.6080081300813008e-05, "loss": 0.0554, "step": 48215 }, { "epoch": 0.5880487804878048, "grad_norm": 2.2275002002716064, "learning_rate": 1.607967479674797e-05, "loss": 0.1115, "step": 48220 }, { "epoch": 0.588109756097561, "grad_norm": 1.221104621887207, "learning_rate": 1.6079268292682927e-05, "loss": 0.064, "step": 48225 }, { "epoch": 0.588170731707317, "grad_norm": 0.8658686876296997, "learning_rate": 1.607886178861789e-05, "loss": 0.0661, "step": 48230 }, { "epoch": 0.5882317073170732, "grad_norm": 0.33059072494506836, "learning_rate": 1.6078455284552847e-05, "loss": 0.046, "step": 48235 }, { "epoch": 0.5882926829268292, "grad_norm": 0.3953807055950165, "learning_rate": 1.607804878048781e-05, "loss": 0.0391, "step": 48240 }, { "epoch": 0.5883536585365854, "grad_norm": 0.6277611255645752, "learning_rate": 1.6077642276422763e-05, "loss": 0.072, "step": 48245 }, { "epoch": 0.5884146341463414, "grad_norm": 0.539500892162323, "learning_rate": 1.6077235772357725e-05, "loss": 0.0517, "step": 48250 }, { "epoch": 0.5884756097560976, "grad_norm": 0.6657134890556335, "learning_rate": 1.6076829268292683e-05, "loss": 0.0788, "step": 48255 }, { "epoch": 0.5885365853658536, "grad_norm": 0.5324991345405579, "learning_rate": 1.6076422764227644e-05, "loss": 0.0594, "step": 48260 }, { "epoch": 0.5885975609756098, "grad_norm": 1.1345082521438599, "learning_rate": 1.6076016260162602e-05, "loss": 0.0534, "step": 48265 }, { "epoch": 0.5886585365853658, "grad_norm": 1.051157832145691, "learning_rate": 1.6075609756097564e-05, "loss": 0.0402, "step": 48270 }, { "epoch": 0.588719512195122, "grad_norm": 0.6230154633522034, "learning_rate": 1.6075203252032522e-05, "loss": 0.0502, "step": 48275 }, { "epoch": 0.588780487804878, "grad_norm": 0.49182653427124023, "learning_rate": 1.607479674796748e-05, "loss": 0.0512, "step": 48280 }, { "epoch": 0.5888414634146342, "grad_norm": 0.7746874690055847, "learning_rate": 1.607439024390244e-05, "loss": 0.0439, "step": 48285 }, { "epoch": 0.5889024390243902, "grad_norm": 0.5177356600761414, "learning_rate": 1.60739837398374e-05, "loss": 0.0633, "step": 48290 }, { "epoch": 0.5889634146341464, "grad_norm": 0.4640432596206665, "learning_rate": 1.6073577235772358e-05, "loss": 0.0461, "step": 48295 }, { "epoch": 0.5890243902439024, "grad_norm": 0.482534795999527, "learning_rate": 1.607317073170732e-05, "loss": 0.036, "step": 48300 }, { "epoch": 0.5890853658536586, "grad_norm": 0.579176664352417, "learning_rate": 1.6072764227642278e-05, "loss": 0.0554, "step": 48305 }, { "epoch": 0.5891463414634146, "grad_norm": 0.7875317335128784, "learning_rate": 1.6072357723577236e-05, "loss": 0.0748, "step": 48310 }, { "epoch": 0.5892073170731708, "grad_norm": 0.5621259212493896, "learning_rate": 1.6071951219512197e-05, "loss": 0.0754, "step": 48315 }, { "epoch": 0.5892682926829268, "grad_norm": 0.8761435747146606, "learning_rate": 1.6071544715447155e-05, "loss": 0.0624, "step": 48320 }, { "epoch": 0.589329268292683, "grad_norm": 0.5549413561820984, "learning_rate": 1.6071138211382117e-05, "loss": 0.0404, "step": 48325 }, { "epoch": 0.589390243902439, "grad_norm": 1.1822164058685303, "learning_rate": 1.6070731707317075e-05, "loss": 0.0772, "step": 48330 }, { "epoch": 0.5894512195121951, "grad_norm": 0.5151129364967346, "learning_rate": 1.6070325203252033e-05, "loss": 0.0356, "step": 48335 }, { "epoch": 0.5895121951219512, "grad_norm": 0.5661312341690063, "learning_rate": 1.606991869918699e-05, "loss": 0.0589, "step": 48340 }, { "epoch": 0.5895731707317073, "grad_norm": 1.8486602306365967, "learning_rate": 1.6069512195121953e-05, "loss": 0.0789, "step": 48345 }, { "epoch": 0.5896341463414634, "grad_norm": 0.39269229769706726, "learning_rate": 1.606910569105691e-05, "loss": 0.078, "step": 48350 }, { "epoch": 0.5896951219512195, "grad_norm": 0.35268741846084595, "learning_rate": 1.6068699186991872e-05, "loss": 0.0422, "step": 48355 }, { "epoch": 0.5897560975609756, "grad_norm": 0.7140374779701233, "learning_rate": 1.606829268292683e-05, "loss": 0.0997, "step": 48360 }, { "epoch": 0.5898170731707317, "grad_norm": 0.67100590467453, "learning_rate": 1.606788617886179e-05, "loss": 0.0677, "step": 48365 }, { "epoch": 0.5898780487804878, "grad_norm": 0.7191419005393982, "learning_rate": 1.606747967479675e-05, "loss": 0.0679, "step": 48370 }, { "epoch": 0.5899390243902439, "grad_norm": 0.66743004322052, "learning_rate": 1.606707317073171e-05, "loss": 0.0582, "step": 48375 }, { "epoch": 0.59, "grad_norm": 0.4445701837539673, "learning_rate": 1.606666666666667e-05, "loss": 0.0601, "step": 48380 }, { "epoch": 0.5900609756097561, "grad_norm": 0.5856867432594299, "learning_rate": 1.6066260162601628e-05, "loss": 0.0572, "step": 48385 }, { "epoch": 0.5901219512195122, "grad_norm": 0.6902433037757874, "learning_rate": 1.6065853658536586e-05, "loss": 0.0757, "step": 48390 }, { "epoch": 0.5901829268292683, "grad_norm": 1.8582245111465454, "learning_rate": 1.6065447154471544e-05, "loss": 0.0594, "step": 48395 }, { "epoch": 0.5902439024390244, "grad_norm": 0.6316116452217102, "learning_rate": 1.6065040650406506e-05, "loss": 0.0467, "step": 48400 }, { "epoch": 0.5903048780487805, "grad_norm": 0.8439412713050842, "learning_rate": 1.6064634146341464e-05, "loss": 0.0592, "step": 48405 }, { "epoch": 0.5903658536585366, "grad_norm": 0.30193793773651123, "learning_rate": 1.6064227642276425e-05, "loss": 0.054, "step": 48410 }, { "epoch": 0.5904268292682927, "grad_norm": 0.3135235011577606, "learning_rate": 1.6063821138211383e-05, "loss": 0.0561, "step": 48415 }, { "epoch": 0.5904878048780487, "grad_norm": 0.8053435683250427, "learning_rate": 1.6063414634146345e-05, "loss": 0.0409, "step": 48420 }, { "epoch": 0.5905487804878049, "grad_norm": 0.560474157333374, "learning_rate": 1.60630081300813e-05, "loss": 0.0599, "step": 48425 }, { "epoch": 0.590609756097561, "grad_norm": 0.5735857486724854, "learning_rate": 1.606260162601626e-05, "loss": 0.0763, "step": 48430 }, { "epoch": 0.5906707317073171, "grad_norm": 0.805637776851654, "learning_rate": 1.606219512195122e-05, "loss": 0.0633, "step": 48435 }, { "epoch": 0.5907317073170731, "grad_norm": 1.9104465246200562, "learning_rate": 1.606178861788618e-05, "loss": 0.0735, "step": 48440 }, { "epoch": 0.5907926829268293, "grad_norm": 1.3734123706817627, "learning_rate": 1.606138211382114e-05, "loss": 0.0547, "step": 48445 }, { "epoch": 0.5908536585365853, "grad_norm": 0.4408809542655945, "learning_rate": 1.60609756097561e-05, "loss": 0.0936, "step": 48450 }, { "epoch": 0.5909146341463415, "grad_norm": 0.2918468415737152, "learning_rate": 1.606056910569106e-05, "loss": 0.0542, "step": 48455 }, { "epoch": 0.5909756097560975, "grad_norm": 1.2003065347671509, "learning_rate": 1.6060162601626017e-05, "loss": 0.0517, "step": 48460 }, { "epoch": 0.5910365853658537, "grad_norm": 0.6057142019271851, "learning_rate": 1.605975609756098e-05, "loss": 0.0678, "step": 48465 }, { "epoch": 0.5910975609756097, "grad_norm": 0.42341265082359314, "learning_rate": 1.6059349593495936e-05, "loss": 0.0507, "step": 48470 }, { "epoch": 0.5911585365853659, "grad_norm": 0.5203136801719666, "learning_rate": 1.6058943089430895e-05, "loss": 0.1183, "step": 48475 }, { "epoch": 0.5912195121951219, "grad_norm": 0.5666742920875549, "learning_rate": 1.6058536585365856e-05, "loss": 0.0597, "step": 48480 }, { "epoch": 0.5912804878048781, "grad_norm": 0.9524986743927002, "learning_rate": 1.6058130081300814e-05, "loss": 0.0574, "step": 48485 }, { "epoch": 0.5913414634146341, "grad_norm": 0.8419502973556519, "learning_rate": 1.6057723577235772e-05, "loss": 0.0849, "step": 48490 }, { "epoch": 0.5914024390243903, "grad_norm": 0.7298226356506348, "learning_rate": 1.6057317073170734e-05, "loss": 0.0505, "step": 48495 }, { "epoch": 0.5914634146341463, "grad_norm": 0.4278179705142975, "learning_rate": 1.6056910569105692e-05, "loss": 0.0656, "step": 48500 }, { "epoch": 0.5915243902439025, "grad_norm": 0.6002413034439087, "learning_rate": 1.6056504065040654e-05, "loss": 0.0423, "step": 48505 }, { "epoch": 0.5915853658536585, "grad_norm": 0.807154655456543, "learning_rate": 1.605609756097561e-05, "loss": 0.0455, "step": 48510 }, { "epoch": 0.5916463414634147, "grad_norm": 0.4385775327682495, "learning_rate": 1.605569105691057e-05, "loss": 0.0641, "step": 48515 }, { "epoch": 0.5917073170731707, "grad_norm": 0.7575348615646362, "learning_rate": 1.6055284552845528e-05, "loss": 0.0549, "step": 48520 }, { "epoch": 0.5917682926829269, "grad_norm": 1.1339788436889648, "learning_rate": 1.605487804878049e-05, "loss": 0.081, "step": 48525 }, { "epoch": 0.5918292682926829, "grad_norm": 0.5232312083244324, "learning_rate": 1.6054471544715448e-05, "loss": 0.0805, "step": 48530 }, { "epoch": 0.591890243902439, "grad_norm": 0.8535528182983398, "learning_rate": 1.605406504065041e-05, "loss": 0.0646, "step": 48535 }, { "epoch": 0.5919512195121951, "grad_norm": 0.5729955434799194, "learning_rate": 1.6053658536585367e-05, "loss": 0.0665, "step": 48540 }, { "epoch": 0.5920121951219512, "grad_norm": 0.3052961826324463, "learning_rate": 1.6053252032520325e-05, "loss": 0.0754, "step": 48545 }, { "epoch": 0.5920731707317073, "grad_norm": 0.9315754771232605, "learning_rate": 1.6052845528455287e-05, "loss": 0.081, "step": 48550 }, { "epoch": 0.5921341463414634, "grad_norm": 1.630705714225769, "learning_rate": 1.6052439024390245e-05, "loss": 0.0739, "step": 48555 }, { "epoch": 0.5921951219512195, "grad_norm": 0.7960805892944336, "learning_rate": 1.6052032520325203e-05, "loss": 0.0602, "step": 48560 }, { "epoch": 0.5922560975609756, "grad_norm": 0.796987771987915, "learning_rate": 1.6051626016260165e-05, "loss": 0.0468, "step": 48565 }, { "epoch": 0.5923170731707317, "grad_norm": 0.7668876647949219, "learning_rate": 1.6051219512195123e-05, "loss": 0.0758, "step": 48570 }, { "epoch": 0.5923780487804878, "grad_norm": 1.0594534873962402, "learning_rate": 1.605081300813008e-05, "loss": 0.1045, "step": 48575 }, { "epoch": 0.5924390243902439, "grad_norm": 0.5112868547439575, "learning_rate": 1.6050406504065042e-05, "loss": 0.0732, "step": 48580 }, { "epoch": 0.5925, "grad_norm": 0.5735151171684265, "learning_rate": 1.605e-05, "loss": 0.0507, "step": 48585 }, { "epoch": 0.5925609756097561, "grad_norm": 0.8644328117370605, "learning_rate": 1.6049593495934962e-05, "loss": 0.0623, "step": 48590 }, { "epoch": 0.5926219512195122, "grad_norm": 0.345737099647522, "learning_rate": 1.604918699186992e-05, "loss": 0.0395, "step": 48595 }, { "epoch": 0.5926829268292683, "grad_norm": 0.6100075840950012, "learning_rate": 1.604878048780488e-05, "loss": 0.0657, "step": 48600 }, { "epoch": 0.5927439024390244, "grad_norm": 0.6405154466629028, "learning_rate": 1.6048373983739836e-05, "loss": 0.0722, "step": 48605 }, { "epoch": 0.5928048780487805, "grad_norm": 3.2983028888702393, "learning_rate": 1.6047967479674798e-05, "loss": 0.0475, "step": 48610 }, { "epoch": 0.5928658536585366, "grad_norm": 0.34321048855781555, "learning_rate": 1.6047560975609756e-05, "loss": 0.037, "step": 48615 }, { "epoch": 0.5929268292682927, "grad_norm": 0.48065271973609924, "learning_rate": 1.6047154471544718e-05, "loss": 0.043, "step": 48620 }, { "epoch": 0.5929878048780488, "grad_norm": 0.6902177333831787, "learning_rate": 1.6046747967479676e-05, "loss": 0.0531, "step": 48625 }, { "epoch": 0.5930487804878048, "grad_norm": 0.44917991757392883, "learning_rate": 1.6046341463414637e-05, "loss": 0.0499, "step": 48630 }, { "epoch": 0.593109756097561, "grad_norm": 0.5151479840278625, "learning_rate": 1.6045934959349595e-05, "loss": 0.0476, "step": 48635 }, { "epoch": 0.593170731707317, "grad_norm": 0.5108947157859802, "learning_rate": 1.6045528455284553e-05, "loss": 0.088, "step": 48640 }, { "epoch": 0.5932317073170732, "grad_norm": 0.39483267068862915, "learning_rate": 1.6045121951219515e-05, "loss": 0.1235, "step": 48645 }, { "epoch": 0.5932926829268292, "grad_norm": 0.7792182564735413, "learning_rate": 1.6044715447154473e-05, "loss": 0.0618, "step": 48650 }, { "epoch": 0.5933536585365854, "grad_norm": 0.7918635010719299, "learning_rate": 1.604430894308943e-05, "loss": 0.0735, "step": 48655 }, { "epoch": 0.5934146341463414, "grad_norm": 1.8654696941375732, "learning_rate": 1.6043902439024393e-05, "loss": 0.057, "step": 48660 }, { "epoch": 0.5934756097560976, "grad_norm": 0.4947991669178009, "learning_rate": 1.604349593495935e-05, "loss": 0.051, "step": 48665 }, { "epoch": 0.5935365853658536, "grad_norm": 0.640735924243927, "learning_rate": 1.604308943089431e-05, "loss": 0.0767, "step": 48670 }, { "epoch": 0.5935975609756098, "grad_norm": 0.5800995230674744, "learning_rate": 1.604268292682927e-05, "loss": 0.082, "step": 48675 }, { "epoch": 0.5936585365853658, "grad_norm": 0.6552729606628418, "learning_rate": 1.604227642276423e-05, "loss": 0.0549, "step": 48680 }, { "epoch": 0.593719512195122, "grad_norm": 0.35024815797805786, "learning_rate": 1.604186991869919e-05, "loss": 0.0446, "step": 48685 }, { "epoch": 0.593780487804878, "grad_norm": 0.5638360381126404, "learning_rate": 1.6041463414634148e-05, "loss": 0.0825, "step": 48690 }, { "epoch": 0.5938414634146342, "grad_norm": 0.7076980471611023, "learning_rate": 1.6041056910569106e-05, "loss": 0.056, "step": 48695 }, { "epoch": 0.5939024390243902, "grad_norm": 0.5858131051063538, "learning_rate": 1.6040650406504065e-05, "loss": 0.1245, "step": 48700 }, { "epoch": 0.5939634146341464, "grad_norm": 0.6987958550453186, "learning_rate": 1.6040243902439026e-05, "loss": 0.0654, "step": 48705 }, { "epoch": 0.5940243902439024, "grad_norm": 0.3888203203678131, "learning_rate": 1.6039837398373984e-05, "loss": 0.0563, "step": 48710 }, { "epoch": 0.5940853658536586, "grad_norm": 0.9805439710617065, "learning_rate": 1.6039430894308946e-05, "loss": 0.0684, "step": 48715 }, { "epoch": 0.5941463414634146, "grad_norm": 0.843852162361145, "learning_rate": 1.6039024390243904e-05, "loss": 0.0645, "step": 48720 }, { "epoch": 0.5942073170731708, "grad_norm": 0.471431702375412, "learning_rate": 1.6038617886178862e-05, "loss": 0.0318, "step": 48725 }, { "epoch": 0.5942682926829268, "grad_norm": 0.49896347522735596, "learning_rate": 1.6038211382113823e-05, "loss": 0.0663, "step": 48730 }, { "epoch": 0.594329268292683, "grad_norm": 1.0372287034988403, "learning_rate": 1.603780487804878e-05, "loss": 0.0592, "step": 48735 }, { "epoch": 0.594390243902439, "grad_norm": 0.30301451683044434, "learning_rate": 1.603739837398374e-05, "loss": 0.0523, "step": 48740 }, { "epoch": 0.5944512195121952, "grad_norm": 0.7070521116256714, "learning_rate": 1.60369918699187e-05, "loss": 0.0633, "step": 48745 }, { "epoch": 0.5945121951219512, "grad_norm": 1.0107308626174927, "learning_rate": 1.603658536585366e-05, "loss": 0.0563, "step": 48750 }, { "epoch": 0.5945731707317073, "grad_norm": 1.058458685874939, "learning_rate": 1.6036178861788617e-05, "loss": 0.0436, "step": 48755 }, { "epoch": 0.5946341463414634, "grad_norm": 0.5278228521347046, "learning_rate": 1.603577235772358e-05, "loss": 0.0701, "step": 48760 }, { "epoch": 0.5946951219512195, "grad_norm": 0.9825612306594849, "learning_rate": 1.6035365853658537e-05, "loss": 0.0802, "step": 48765 }, { "epoch": 0.5947560975609756, "grad_norm": 0.6368780732154846, "learning_rate": 1.60349593495935e-05, "loss": 0.053, "step": 48770 }, { "epoch": 0.5948170731707317, "grad_norm": 2.9189600944519043, "learning_rate": 1.6034552845528457e-05, "loss": 0.0673, "step": 48775 }, { "epoch": 0.5948780487804878, "grad_norm": 0.8018097877502441, "learning_rate": 1.6034146341463418e-05, "loss": 0.0607, "step": 48780 }, { "epoch": 0.5949390243902439, "grad_norm": 0.781410813331604, "learning_rate": 1.6033739837398373e-05, "loss": 0.0703, "step": 48785 }, { "epoch": 0.595, "grad_norm": 0.5352227091789246, "learning_rate": 1.6033333333333335e-05, "loss": 0.103, "step": 48790 }, { "epoch": 0.5950609756097561, "grad_norm": 1.0598056316375732, "learning_rate": 1.6032926829268293e-05, "loss": 0.046, "step": 48795 }, { "epoch": 0.5951219512195122, "grad_norm": 0.560280978679657, "learning_rate": 1.6032520325203254e-05, "loss": 0.0921, "step": 48800 }, { "epoch": 0.5951829268292683, "grad_norm": 0.43130236864089966, "learning_rate": 1.6032113821138212e-05, "loss": 0.0641, "step": 48805 }, { "epoch": 0.5952439024390244, "grad_norm": 0.5761204957962036, "learning_rate": 1.6031707317073174e-05, "loss": 0.0357, "step": 48810 }, { "epoch": 0.5953048780487805, "grad_norm": 2.489872932434082, "learning_rate": 1.6031300813008132e-05, "loss": 0.0854, "step": 48815 }, { "epoch": 0.5953658536585366, "grad_norm": 0.5952872037887573, "learning_rate": 1.603089430894309e-05, "loss": 0.0464, "step": 48820 }, { "epoch": 0.5954268292682927, "grad_norm": 0.7315719723701477, "learning_rate": 1.6030487804878048e-05, "loss": 0.0725, "step": 48825 }, { "epoch": 0.5954878048780488, "grad_norm": 0.5451887845993042, "learning_rate": 1.603008130081301e-05, "loss": 0.0483, "step": 48830 }, { "epoch": 0.5955487804878049, "grad_norm": 0.43796679377555847, "learning_rate": 1.6029674796747968e-05, "loss": 0.0563, "step": 48835 }, { "epoch": 0.595609756097561, "grad_norm": 1.01730477809906, "learning_rate": 1.602926829268293e-05, "loss": 0.0805, "step": 48840 }, { "epoch": 0.5956707317073171, "grad_norm": 0.42186540365219116, "learning_rate": 1.6028861788617888e-05, "loss": 0.0591, "step": 48845 }, { "epoch": 0.5957317073170731, "grad_norm": 0.9115816950798035, "learning_rate": 1.6028455284552846e-05, "loss": 0.1045, "step": 48850 }, { "epoch": 0.5957926829268293, "grad_norm": 0.9737034440040588, "learning_rate": 1.6028048780487807e-05, "loss": 0.0886, "step": 48855 }, { "epoch": 0.5958536585365853, "grad_norm": 1.6937049627304077, "learning_rate": 1.6027642276422765e-05, "loss": 0.0931, "step": 48860 }, { "epoch": 0.5959146341463415, "grad_norm": 0.2303994745016098, "learning_rate": 1.6027235772357727e-05, "loss": 0.0363, "step": 48865 }, { "epoch": 0.5959756097560975, "grad_norm": 0.6135345101356506, "learning_rate": 1.6026829268292685e-05, "loss": 0.0492, "step": 48870 }, { "epoch": 0.5960365853658537, "grad_norm": 0.6098511219024658, "learning_rate": 1.6026422764227643e-05, "loss": 0.0588, "step": 48875 }, { "epoch": 0.5960975609756097, "grad_norm": 0.3382907211780548, "learning_rate": 1.60260162601626e-05, "loss": 0.0413, "step": 48880 }, { "epoch": 0.5961585365853659, "grad_norm": 1.1777030229568481, "learning_rate": 1.6025609756097563e-05, "loss": 0.0634, "step": 48885 }, { "epoch": 0.5962195121951219, "grad_norm": 1.0951297283172607, "learning_rate": 1.602520325203252e-05, "loss": 0.087, "step": 48890 }, { "epoch": 0.5962804878048781, "grad_norm": 1.032755970954895, "learning_rate": 1.6024796747967482e-05, "loss": 0.0514, "step": 48895 }, { "epoch": 0.5963414634146341, "grad_norm": 0.5144119262695312, "learning_rate": 1.602439024390244e-05, "loss": 0.0599, "step": 48900 }, { "epoch": 0.5964024390243903, "grad_norm": 0.6725450754165649, "learning_rate": 1.60239837398374e-05, "loss": 0.0749, "step": 48905 }, { "epoch": 0.5964634146341463, "grad_norm": 0.5197233557701111, "learning_rate": 1.602357723577236e-05, "loss": 0.0577, "step": 48910 }, { "epoch": 0.5965243902439025, "grad_norm": 0.9761242866516113, "learning_rate": 1.6023170731707318e-05, "loss": 0.0575, "step": 48915 }, { "epoch": 0.5965853658536585, "grad_norm": 0.5094760656356812, "learning_rate": 1.6022764227642276e-05, "loss": 0.0551, "step": 48920 }, { "epoch": 0.5966463414634147, "grad_norm": 0.4939594566822052, "learning_rate": 1.6022357723577238e-05, "loss": 0.0407, "step": 48925 }, { "epoch": 0.5967073170731707, "grad_norm": 0.6335648894309998, "learning_rate": 1.6021951219512196e-05, "loss": 0.0677, "step": 48930 }, { "epoch": 0.5967682926829269, "grad_norm": 0.44461265206336975, "learning_rate": 1.6021544715447154e-05, "loss": 0.0592, "step": 48935 }, { "epoch": 0.5968292682926829, "grad_norm": 0.4157602787017822, "learning_rate": 1.6021138211382116e-05, "loss": 0.084, "step": 48940 }, { "epoch": 0.596890243902439, "grad_norm": 0.5516278147697449, "learning_rate": 1.6020731707317074e-05, "loss": 0.0512, "step": 48945 }, { "epoch": 0.5969512195121951, "grad_norm": 0.4957999289035797, "learning_rate": 1.6020325203252035e-05, "loss": 0.0699, "step": 48950 }, { "epoch": 0.5970121951219513, "grad_norm": 0.6610738039016724, "learning_rate": 1.6019918699186993e-05, "loss": 0.0721, "step": 48955 }, { "epoch": 0.5970731707317073, "grad_norm": 0.3980748951435089, "learning_rate": 1.6019512195121955e-05, "loss": 0.0661, "step": 48960 }, { "epoch": 0.5971341463414634, "grad_norm": 0.7367031574249268, "learning_rate": 1.601910569105691e-05, "loss": 0.1001, "step": 48965 }, { "epoch": 0.5971951219512195, "grad_norm": 0.3933224380016327, "learning_rate": 1.601869918699187e-05, "loss": 0.0388, "step": 48970 }, { "epoch": 0.5972560975609756, "grad_norm": 0.8373439908027649, "learning_rate": 1.601829268292683e-05, "loss": 0.0637, "step": 48975 }, { "epoch": 0.5973170731707317, "grad_norm": 0.5723550319671631, "learning_rate": 1.601788617886179e-05, "loss": 0.0572, "step": 48980 }, { "epoch": 0.5973780487804878, "grad_norm": 0.5208141803741455, "learning_rate": 1.601747967479675e-05, "loss": 0.0496, "step": 48985 }, { "epoch": 0.5974390243902439, "grad_norm": 0.715293824672699, "learning_rate": 1.601707317073171e-05, "loss": 0.0511, "step": 48990 }, { "epoch": 0.5975, "grad_norm": 0.3417988121509552, "learning_rate": 1.601666666666667e-05, "loss": 0.04, "step": 48995 }, { "epoch": 0.5975609756097561, "grad_norm": 0.6626218557357788, "learning_rate": 1.6016260162601627e-05, "loss": 0.0522, "step": 49000 }, { "epoch": 0.5976219512195122, "grad_norm": 0.742646336555481, "learning_rate": 1.6015853658536585e-05, "loss": 0.0399, "step": 49005 }, { "epoch": 0.5976829268292683, "grad_norm": 0.7463676929473877, "learning_rate": 1.6015447154471546e-05, "loss": 0.0436, "step": 49010 }, { "epoch": 0.5977439024390244, "grad_norm": 1.1540049314498901, "learning_rate": 1.6015040650406505e-05, "loss": 0.0637, "step": 49015 }, { "epoch": 0.5978048780487805, "grad_norm": 0.4953073263168335, "learning_rate": 1.6014634146341466e-05, "loss": 0.0672, "step": 49020 }, { "epoch": 0.5978658536585366, "grad_norm": 0.3837294280529022, "learning_rate": 1.6014227642276424e-05, "loss": 0.0445, "step": 49025 }, { "epoch": 0.5979268292682927, "grad_norm": 1.2447195053100586, "learning_rate": 1.6013821138211382e-05, "loss": 0.0524, "step": 49030 }, { "epoch": 0.5979878048780488, "grad_norm": 0.7563178539276123, "learning_rate": 1.6013414634146344e-05, "loss": 0.0749, "step": 49035 }, { "epoch": 0.5980487804878049, "grad_norm": 0.5687196254730225, "learning_rate": 1.6013008130081302e-05, "loss": 0.0704, "step": 49040 }, { "epoch": 0.598109756097561, "grad_norm": 0.4831640124320984, "learning_rate": 1.6012601626016263e-05, "loss": 0.0698, "step": 49045 }, { "epoch": 0.598170731707317, "grad_norm": 0.28563010692596436, "learning_rate": 1.601219512195122e-05, "loss": 0.0514, "step": 49050 }, { "epoch": 0.5982317073170732, "grad_norm": 0.6918767094612122, "learning_rate": 1.601178861788618e-05, "loss": 0.0566, "step": 49055 }, { "epoch": 0.5982926829268292, "grad_norm": 0.8849194049835205, "learning_rate": 1.6011382113821138e-05, "loss": 0.0872, "step": 49060 }, { "epoch": 0.5983536585365854, "grad_norm": 0.9609554409980774, "learning_rate": 1.60109756097561e-05, "loss": 0.0708, "step": 49065 }, { "epoch": 0.5984146341463414, "grad_norm": 0.5565183162689209, "learning_rate": 1.6010569105691057e-05, "loss": 0.05, "step": 49070 }, { "epoch": 0.5984756097560976, "grad_norm": 0.4057214558124542, "learning_rate": 1.601016260162602e-05, "loss": 0.0555, "step": 49075 }, { "epoch": 0.5985365853658536, "grad_norm": 0.6454868912696838, "learning_rate": 1.6009756097560977e-05, "loss": 0.0525, "step": 49080 }, { "epoch": 0.5985975609756098, "grad_norm": 4.885850429534912, "learning_rate": 1.6009349593495935e-05, "loss": 0.0744, "step": 49085 }, { "epoch": 0.5986585365853658, "grad_norm": 0.46726110577583313, "learning_rate": 1.6008943089430893e-05, "loss": 0.049, "step": 49090 }, { "epoch": 0.598719512195122, "grad_norm": 0.6318813562393188, "learning_rate": 1.6008536585365855e-05, "loss": 0.1092, "step": 49095 }, { "epoch": 0.598780487804878, "grad_norm": 0.6853229999542236, "learning_rate": 1.6008130081300813e-05, "loss": 0.0647, "step": 49100 }, { "epoch": 0.5988414634146342, "grad_norm": 0.6779026389122009, "learning_rate": 1.6007723577235775e-05, "loss": 0.0885, "step": 49105 }, { "epoch": 0.5989024390243902, "grad_norm": 0.6437020897865295, "learning_rate": 1.6007317073170733e-05, "loss": 0.0473, "step": 49110 }, { "epoch": 0.5989634146341464, "grad_norm": 0.45369192957878113, "learning_rate": 1.600691056910569e-05, "loss": 0.0641, "step": 49115 }, { "epoch": 0.5990243902439024, "grad_norm": 0.5520605444908142, "learning_rate": 1.6006504065040652e-05, "loss": 0.0394, "step": 49120 }, { "epoch": 0.5990853658536586, "grad_norm": 0.8457346558570862, "learning_rate": 1.600609756097561e-05, "loss": 0.0571, "step": 49125 }, { "epoch": 0.5991463414634146, "grad_norm": 0.5926246047019958, "learning_rate": 1.6005691056910572e-05, "loss": 0.0418, "step": 49130 }, { "epoch": 0.5992073170731708, "grad_norm": 0.8739585876464844, "learning_rate": 1.600528455284553e-05, "loss": 0.0991, "step": 49135 }, { "epoch": 0.5992682926829268, "grad_norm": 0.40278175473213196, "learning_rate": 1.600487804878049e-05, "loss": 0.0624, "step": 49140 }, { "epoch": 0.599329268292683, "grad_norm": 0.7093003988265991, "learning_rate": 1.6004471544715446e-05, "loss": 0.0623, "step": 49145 }, { "epoch": 0.599390243902439, "grad_norm": 0.7351135611534119, "learning_rate": 1.6004065040650408e-05, "loss": 0.0682, "step": 49150 }, { "epoch": 0.5994512195121952, "grad_norm": 0.46435049176216125, "learning_rate": 1.6003658536585366e-05, "loss": 0.0645, "step": 49155 }, { "epoch": 0.5995121951219512, "grad_norm": 0.44281429052352905, "learning_rate": 1.6003252032520327e-05, "loss": 0.0508, "step": 49160 }, { "epoch": 0.5995731707317074, "grad_norm": 0.7854346632957458, "learning_rate": 1.6002845528455286e-05, "loss": 0.058, "step": 49165 }, { "epoch": 0.5996341463414634, "grad_norm": 0.6005803346633911, "learning_rate": 1.6002439024390247e-05, "loss": 0.0897, "step": 49170 }, { "epoch": 0.5996951219512195, "grad_norm": 1.0527029037475586, "learning_rate": 1.6002032520325205e-05, "loss": 0.0974, "step": 49175 }, { "epoch": 0.5997560975609756, "grad_norm": 0.6912350654602051, "learning_rate": 1.6001626016260163e-05, "loss": 0.0643, "step": 49180 }, { "epoch": 0.5998170731707317, "grad_norm": 0.7537935972213745, "learning_rate": 1.600121951219512e-05, "loss": 0.0581, "step": 49185 }, { "epoch": 0.5998780487804878, "grad_norm": 0.5832063555717468, "learning_rate": 1.6000813008130083e-05, "loss": 0.0451, "step": 49190 }, { "epoch": 0.5999390243902439, "grad_norm": 0.2551247775554657, "learning_rate": 1.600040650406504e-05, "loss": 0.039, "step": 49195 }, { "epoch": 0.6, "grad_norm": 0.6656825542449951, "learning_rate": 1.6000000000000003e-05, "loss": 0.0562, "step": 49200 }, { "epoch": 0.6000609756097561, "grad_norm": 0.8063854575157166, "learning_rate": 1.599959349593496e-05, "loss": 0.0676, "step": 49205 }, { "epoch": 0.6001219512195122, "grad_norm": 0.5777647495269775, "learning_rate": 1.599918699186992e-05, "loss": 0.0624, "step": 49210 }, { "epoch": 0.6001829268292683, "grad_norm": 0.660950779914856, "learning_rate": 1.599878048780488e-05, "loss": 0.0539, "step": 49215 }, { "epoch": 0.6002439024390244, "grad_norm": 0.42906901240348816, "learning_rate": 1.599837398373984e-05, "loss": 0.036, "step": 49220 }, { "epoch": 0.6003048780487805, "grad_norm": 0.9699274301528931, "learning_rate": 1.59979674796748e-05, "loss": 0.0781, "step": 49225 }, { "epoch": 0.6003658536585366, "grad_norm": 0.47162747383117676, "learning_rate": 1.5997560975609758e-05, "loss": 0.0398, "step": 49230 }, { "epoch": 0.6004268292682927, "grad_norm": 1.2970811128616333, "learning_rate": 1.5997154471544716e-05, "loss": 0.077, "step": 49235 }, { "epoch": 0.6004878048780488, "grad_norm": 0.36453863978385925, "learning_rate": 1.5996747967479674e-05, "loss": 0.0505, "step": 49240 }, { "epoch": 0.6005487804878049, "grad_norm": 1.732340931892395, "learning_rate": 1.5996341463414636e-05, "loss": 0.0911, "step": 49245 }, { "epoch": 0.600609756097561, "grad_norm": 0.3582489490509033, "learning_rate": 1.5995934959349594e-05, "loss": 0.0274, "step": 49250 }, { "epoch": 0.6006707317073171, "grad_norm": 0.8098758459091187, "learning_rate": 1.5995528455284556e-05, "loss": 0.0619, "step": 49255 }, { "epoch": 0.6007317073170731, "grad_norm": 0.6804807782173157, "learning_rate": 1.5995121951219514e-05, "loss": 0.06, "step": 49260 }, { "epoch": 0.6007926829268293, "grad_norm": 0.4938293993473053, "learning_rate": 1.5994715447154472e-05, "loss": 0.0365, "step": 49265 }, { "epoch": 0.6008536585365853, "grad_norm": 0.5659195780754089, "learning_rate": 1.599430894308943e-05, "loss": 0.0868, "step": 49270 }, { "epoch": 0.6009146341463415, "grad_norm": 0.5559924840927124, "learning_rate": 1.599390243902439e-05, "loss": 0.0466, "step": 49275 }, { "epoch": 0.6009756097560975, "grad_norm": 0.7335701584815979, "learning_rate": 1.599349593495935e-05, "loss": 0.0608, "step": 49280 }, { "epoch": 0.6010365853658537, "grad_norm": 0.5086330771446228, "learning_rate": 1.599308943089431e-05, "loss": 0.0587, "step": 49285 }, { "epoch": 0.6010975609756097, "grad_norm": 0.511818528175354, "learning_rate": 1.599268292682927e-05, "loss": 0.0658, "step": 49290 }, { "epoch": 0.6011585365853659, "grad_norm": 1.117271065711975, "learning_rate": 1.5992276422764227e-05, "loss": 0.0472, "step": 49295 }, { "epoch": 0.6012195121951219, "grad_norm": 0.6942295432090759, "learning_rate": 1.599186991869919e-05, "loss": 0.0372, "step": 49300 }, { "epoch": 0.6012804878048781, "grad_norm": 0.6257158517837524, "learning_rate": 1.5991463414634147e-05, "loss": 0.0455, "step": 49305 }, { "epoch": 0.6013414634146341, "grad_norm": 0.6730245351791382, "learning_rate": 1.599105691056911e-05, "loss": 0.0569, "step": 49310 }, { "epoch": 0.6014024390243903, "grad_norm": 0.31479188799858093, "learning_rate": 1.5990650406504067e-05, "loss": 0.0541, "step": 49315 }, { "epoch": 0.6014634146341463, "grad_norm": 0.35640764236450195, "learning_rate": 1.5990243902439028e-05, "loss": 0.0706, "step": 49320 }, { "epoch": 0.6015243902439025, "grad_norm": 1.2131328582763672, "learning_rate": 1.5989837398373983e-05, "loss": 0.0646, "step": 49325 }, { "epoch": 0.6015853658536585, "grad_norm": 0.33549511432647705, "learning_rate": 1.5989430894308944e-05, "loss": 0.045, "step": 49330 }, { "epoch": 0.6016463414634147, "grad_norm": 0.2172485888004303, "learning_rate": 1.5989024390243903e-05, "loss": 0.0555, "step": 49335 }, { "epoch": 0.6017073170731707, "grad_norm": 0.5249984860420227, "learning_rate": 1.5988617886178864e-05, "loss": 0.0527, "step": 49340 }, { "epoch": 0.6017682926829269, "grad_norm": 0.32392364740371704, "learning_rate": 1.5988211382113822e-05, "loss": 0.0539, "step": 49345 }, { "epoch": 0.6018292682926829, "grad_norm": 0.5344512462615967, "learning_rate": 1.5987804878048784e-05, "loss": 0.0815, "step": 49350 }, { "epoch": 0.6018902439024391, "grad_norm": 1.5945097208023071, "learning_rate": 1.598739837398374e-05, "loss": 0.0513, "step": 49355 }, { "epoch": 0.6019512195121951, "grad_norm": 0.6944020390510559, "learning_rate": 1.59869918699187e-05, "loss": 0.0678, "step": 49360 }, { "epoch": 0.6020121951219513, "grad_norm": 0.46979963779449463, "learning_rate": 1.5986585365853658e-05, "loss": 0.0478, "step": 49365 }, { "epoch": 0.6020731707317073, "grad_norm": 0.5522761344909668, "learning_rate": 1.598617886178862e-05, "loss": 0.0366, "step": 49370 }, { "epoch": 0.6021341463414634, "grad_norm": 0.5405214428901672, "learning_rate": 1.5985772357723578e-05, "loss": 0.0426, "step": 49375 }, { "epoch": 0.6021951219512195, "grad_norm": 0.5135572552680969, "learning_rate": 1.598536585365854e-05, "loss": 0.0484, "step": 49380 }, { "epoch": 0.6022560975609756, "grad_norm": 0.43207499384880066, "learning_rate": 1.5984959349593497e-05, "loss": 0.0929, "step": 49385 }, { "epoch": 0.6023170731707317, "grad_norm": 0.6990634202957153, "learning_rate": 1.5984552845528456e-05, "loss": 0.0706, "step": 49390 }, { "epoch": 0.6023780487804878, "grad_norm": 0.818621814250946, "learning_rate": 1.5984146341463417e-05, "loss": 0.0595, "step": 49395 }, { "epoch": 0.6024390243902439, "grad_norm": 0.7861534953117371, "learning_rate": 1.5983739837398375e-05, "loss": 0.0714, "step": 49400 }, { "epoch": 0.6025, "grad_norm": 0.3484865128993988, "learning_rate": 1.5983333333333337e-05, "loss": 0.0445, "step": 49405 }, { "epoch": 0.6025609756097561, "grad_norm": 0.6779099106788635, "learning_rate": 1.5982926829268295e-05, "loss": 0.056, "step": 49410 }, { "epoch": 0.6026219512195122, "grad_norm": 1.1287074089050293, "learning_rate": 1.5982520325203253e-05, "loss": 0.0486, "step": 49415 }, { "epoch": 0.6026829268292683, "grad_norm": 0.3939531147480011, "learning_rate": 1.598211382113821e-05, "loss": 0.0666, "step": 49420 }, { "epoch": 0.6027439024390244, "grad_norm": 0.9620727300643921, "learning_rate": 1.5981707317073173e-05, "loss": 0.0861, "step": 49425 }, { "epoch": 0.6028048780487805, "grad_norm": 0.5481862425804138, "learning_rate": 1.598130081300813e-05, "loss": 0.0333, "step": 49430 }, { "epoch": 0.6028658536585366, "grad_norm": 0.30723056197166443, "learning_rate": 1.5980894308943092e-05, "loss": 0.0696, "step": 49435 }, { "epoch": 0.6029268292682927, "grad_norm": 0.4311351180076599, "learning_rate": 1.598048780487805e-05, "loss": 0.0552, "step": 49440 }, { "epoch": 0.6029878048780488, "grad_norm": 1.922585368156433, "learning_rate": 1.598008130081301e-05, "loss": 0.0927, "step": 49445 }, { "epoch": 0.6030487804878049, "grad_norm": 0.7124865055084229, "learning_rate": 1.5979674796747967e-05, "loss": 0.0541, "step": 49450 }, { "epoch": 0.603109756097561, "grad_norm": 1.4723539352416992, "learning_rate": 1.5979268292682928e-05, "loss": 0.0493, "step": 49455 }, { "epoch": 0.603170731707317, "grad_norm": 0.2665839195251465, "learning_rate": 1.5978861788617886e-05, "loss": 0.043, "step": 49460 }, { "epoch": 0.6032317073170732, "grad_norm": 0.46541690826416016, "learning_rate": 1.5978455284552848e-05, "loss": 0.0402, "step": 49465 }, { "epoch": 0.6032926829268292, "grad_norm": 0.4404451549053192, "learning_rate": 1.5978048780487806e-05, "loss": 0.06, "step": 49470 }, { "epoch": 0.6033536585365854, "grad_norm": 0.3429829776287079, "learning_rate": 1.5977642276422764e-05, "loss": 0.0611, "step": 49475 }, { "epoch": 0.6034146341463414, "grad_norm": 0.5930238962173462, "learning_rate": 1.5977235772357726e-05, "loss": 0.0546, "step": 49480 }, { "epoch": 0.6034756097560976, "grad_norm": 0.665841817855835, "learning_rate": 1.5976829268292684e-05, "loss": 0.0534, "step": 49485 }, { "epoch": 0.6035365853658536, "grad_norm": 0.5036768317222595, "learning_rate": 1.5976422764227645e-05, "loss": 0.0691, "step": 49490 }, { "epoch": 0.6035975609756098, "grad_norm": 0.5161897540092468, "learning_rate": 1.5976016260162603e-05, "loss": 0.0588, "step": 49495 }, { "epoch": 0.6036585365853658, "grad_norm": 0.3967624604701996, "learning_rate": 1.597560975609756e-05, "loss": 0.085, "step": 49500 }, { "epoch": 0.603719512195122, "grad_norm": 0.6039108633995056, "learning_rate": 1.597520325203252e-05, "loss": 0.0754, "step": 49505 }, { "epoch": 0.603780487804878, "grad_norm": 0.6135165095329285, "learning_rate": 1.597479674796748e-05, "loss": 0.0441, "step": 49510 }, { "epoch": 0.6038414634146342, "grad_norm": 1.1056256294250488, "learning_rate": 1.597439024390244e-05, "loss": 0.077, "step": 49515 }, { "epoch": 0.6039024390243902, "grad_norm": 0.4856681525707245, "learning_rate": 1.59739837398374e-05, "loss": 0.0423, "step": 49520 }, { "epoch": 0.6039634146341464, "grad_norm": 0.6725283265113831, "learning_rate": 1.597357723577236e-05, "loss": 0.0422, "step": 49525 }, { "epoch": 0.6040243902439024, "grad_norm": 0.7631614804267883, "learning_rate": 1.597317073170732e-05, "loss": 0.0638, "step": 49530 }, { "epoch": 0.6040853658536586, "grad_norm": 0.540625810623169, "learning_rate": 1.5972764227642275e-05, "loss": 0.0578, "step": 49535 }, { "epoch": 0.6041463414634146, "grad_norm": 1.3447256088256836, "learning_rate": 1.5972357723577237e-05, "loss": 0.0769, "step": 49540 }, { "epoch": 0.6042073170731708, "grad_norm": 0.651303768157959, "learning_rate": 1.5971951219512195e-05, "loss": 0.0504, "step": 49545 }, { "epoch": 0.6042682926829268, "grad_norm": 0.8882793188095093, "learning_rate": 1.5971544715447156e-05, "loss": 0.0623, "step": 49550 }, { "epoch": 0.604329268292683, "grad_norm": 0.4325842559337616, "learning_rate": 1.5971138211382114e-05, "loss": 0.0773, "step": 49555 }, { "epoch": 0.604390243902439, "grad_norm": 0.9806326627731323, "learning_rate": 1.5970731707317076e-05, "loss": 0.0652, "step": 49560 }, { "epoch": 0.6044512195121952, "grad_norm": 0.638909637928009, "learning_rate": 1.5970325203252034e-05, "loss": 0.042, "step": 49565 }, { "epoch": 0.6045121951219512, "grad_norm": 0.6800819039344788, "learning_rate": 1.5969918699186992e-05, "loss": 0.0377, "step": 49570 }, { "epoch": 0.6045731707317074, "grad_norm": 0.8747519254684448, "learning_rate": 1.5969512195121954e-05, "loss": 0.0601, "step": 49575 }, { "epoch": 0.6046341463414634, "grad_norm": 0.6113836169242859, "learning_rate": 1.5969105691056912e-05, "loss": 0.0497, "step": 49580 }, { "epoch": 0.6046951219512195, "grad_norm": 0.5790824294090271, "learning_rate": 1.5968699186991873e-05, "loss": 0.0608, "step": 49585 }, { "epoch": 0.6047560975609756, "grad_norm": 1.8510656356811523, "learning_rate": 1.596829268292683e-05, "loss": 0.0715, "step": 49590 }, { "epoch": 0.6048170731707317, "grad_norm": 0.9923216700553894, "learning_rate": 1.596788617886179e-05, "loss": 0.0748, "step": 49595 }, { "epoch": 0.6048780487804878, "grad_norm": 0.4595911502838135, "learning_rate": 1.5967479674796748e-05, "loss": 0.0717, "step": 49600 }, { "epoch": 0.6049390243902439, "grad_norm": 0.45042669773101807, "learning_rate": 1.596707317073171e-05, "loss": 0.0422, "step": 49605 }, { "epoch": 0.605, "grad_norm": 1.0155537128448486, "learning_rate": 1.5966666666666667e-05, "loss": 0.0679, "step": 49610 }, { "epoch": 0.6050609756097561, "grad_norm": 0.6532230973243713, "learning_rate": 1.596626016260163e-05, "loss": 0.0669, "step": 49615 }, { "epoch": 0.6051219512195122, "grad_norm": 0.4798767864704132, "learning_rate": 1.5965853658536587e-05, "loss": 0.0482, "step": 49620 }, { "epoch": 0.6051829268292683, "grad_norm": 1.97609281539917, "learning_rate": 1.5965447154471545e-05, "loss": 0.0617, "step": 49625 }, { "epoch": 0.6052439024390244, "grad_norm": 0.43668583035469055, "learning_rate": 1.5965040650406503e-05, "loss": 0.0356, "step": 49630 }, { "epoch": 0.6053048780487805, "grad_norm": 0.3990020453929901, "learning_rate": 1.5964634146341465e-05, "loss": 0.0525, "step": 49635 }, { "epoch": 0.6053658536585366, "grad_norm": 0.43934366106987, "learning_rate": 1.5964227642276423e-05, "loss": 0.0437, "step": 49640 }, { "epoch": 0.6054268292682927, "grad_norm": 0.3164374828338623, "learning_rate": 1.5963821138211384e-05, "loss": 0.0352, "step": 49645 }, { "epoch": 0.6054878048780488, "grad_norm": 0.5510721206665039, "learning_rate": 1.5963414634146343e-05, "loss": 0.0373, "step": 49650 }, { "epoch": 0.6055487804878049, "grad_norm": 0.8305870294570923, "learning_rate": 1.59630081300813e-05, "loss": 0.1034, "step": 49655 }, { "epoch": 0.605609756097561, "grad_norm": 1.2395371198654175, "learning_rate": 1.5962601626016262e-05, "loss": 0.051, "step": 49660 }, { "epoch": 0.6056707317073171, "grad_norm": 0.9378559589385986, "learning_rate": 1.596219512195122e-05, "loss": 0.0569, "step": 49665 }, { "epoch": 0.6057317073170732, "grad_norm": 3.025466203689575, "learning_rate": 1.5961788617886182e-05, "loss": 0.0557, "step": 49670 }, { "epoch": 0.6057926829268293, "grad_norm": 0.5649651885032654, "learning_rate": 1.596138211382114e-05, "loss": 0.0618, "step": 49675 }, { "epoch": 0.6058536585365853, "grad_norm": 0.6813316345214844, "learning_rate": 1.5960975609756098e-05, "loss": 0.0397, "step": 49680 }, { "epoch": 0.6059146341463415, "grad_norm": 0.7627005577087402, "learning_rate": 1.5960569105691056e-05, "loss": 0.0431, "step": 49685 }, { "epoch": 0.6059756097560975, "grad_norm": 0.46639975905418396, "learning_rate": 1.5960162601626018e-05, "loss": 0.0549, "step": 49690 }, { "epoch": 0.6060365853658537, "grad_norm": 0.5014061331748962, "learning_rate": 1.5959756097560976e-05, "loss": 0.052, "step": 49695 }, { "epoch": 0.6060975609756097, "grad_norm": 0.9318220019340515, "learning_rate": 1.5959349593495937e-05, "loss": 0.0576, "step": 49700 }, { "epoch": 0.6061585365853659, "grad_norm": 0.7210832238197327, "learning_rate": 1.5958943089430896e-05, "loss": 0.0488, "step": 49705 }, { "epoch": 0.6062195121951219, "grad_norm": 1.472612738609314, "learning_rate": 1.5958536585365857e-05, "loss": 0.0863, "step": 49710 }, { "epoch": 0.6062804878048781, "grad_norm": 0.3925257623195648, "learning_rate": 1.5958130081300812e-05, "loss": 0.0493, "step": 49715 }, { "epoch": 0.6063414634146341, "grad_norm": 0.497825026512146, "learning_rate": 1.5957723577235773e-05, "loss": 0.0596, "step": 49720 }, { "epoch": 0.6064024390243903, "grad_norm": 1.1136525869369507, "learning_rate": 1.595731707317073e-05, "loss": 0.064, "step": 49725 }, { "epoch": 0.6064634146341463, "grad_norm": 1.2055143117904663, "learning_rate": 1.5956910569105693e-05, "loss": 0.0623, "step": 49730 }, { "epoch": 0.6065243902439025, "grad_norm": 0.7456831336021423, "learning_rate": 1.595650406504065e-05, "loss": 0.1161, "step": 49735 }, { "epoch": 0.6065853658536585, "grad_norm": 1.3787906169891357, "learning_rate": 1.5956097560975613e-05, "loss": 0.0544, "step": 49740 }, { "epoch": 0.6066463414634147, "grad_norm": 0.3997933864593506, "learning_rate": 1.595569105691057e-05, "loss": 0.0446, "step": 49745 }, { "epoch": 0.6067073170731707, "grad_norm": 0.486468642950058, "learning_rate": 1.595528455284553e-05, "loss": 0.05, "step": 49750 }, { "epoch": 0.6067682926829269, "grad_norm": 0.5617851614952087, "learning_rate": 1.595487804878049e-05, "loss": 0.057, "step": 49755 }, { "epoch": 0.6068292682926829, "grad_norm": 0.4232238531112671, "learning_rate": 1.595447154471545e-05, "loss": 0.0472, "step": 49760 }, { "epoch": 0.6068902439024391, "grad_norm": 0.4060414135456085, "learning_rate": 1.5954065040650407e-05, "loss": 0.0497, "step": 49765 }, { "epoch": 0.6069512195121951, "grad_norm": 0.6561418175697327, "learning_rate": 1.5953658536585368e-05, "loss": 0.0746, "step": 49770 }, { "epoch": 0.6070121951219513, "grad_norm": 0.7998118996620178, "learning_rate": 1.5953252032520326e-05, "loss": 0.0447, "step": 49775 }, { "epoch": 0.6070731707317073, "grad_norm": 0.22069662809371948, "learning_rate": 1.5952845528455284e-05, "loss": 0.0666, "step": 49780 }, { "epoch": 0.6071341463414635, "grad_norm": 0.4697723984718323, "learning_rate": 1.5952439024390246e-05, "loss": 0.0301, "step": 49785 }, { "epoch": 0.6071951219512195, "grad_norm": 0.7035556435585022, "learning_rate": 1.5952032520325204e-05, "loss": 0.0729, "step": 49790 }, { "epoch": 0.6072560975609756, "grad_norm": 0.2500624358654022, "learning_rate": 1.5951626016260166e-05, "loss": 0.092, "step": 49795 }, { "epoch": 0.6073170731707317, "grad_norm": 1.3592168092727661, "learning_rate": 1.5951219512195124e-05, "loss": 0.0506, "step": 49800 }, { "epoch": 0.6073780487804878, "grad_norm": 0.49559029936790466, "learning_rate": 1.5950813008130082e-05, "loss": 0.0595, "step": 49805 }, { "epoch": 0.6074390243902439, "grad_norm": 0.7237815856933594, "learning_rate": 1.595040650406504e-05, "loss": 0.0828, "step": 49810 }, { "epoch": 0.6075, "grad_norm": 0.4531661570072174, "learning_rate": 1.595e-05, "loss": 0.0469, "step": 49815 }, { "epoch": 0.6075609756097561, "grad_norm": 1.5482616424560547, "learning_rate": 1.594959349593496e-05, "loss": 0.0443, "step": 49820 }, { "epoch": 0.6076219512195122, "grad_norm": 0.5503880381584167, "learning_rate": 1.594918699186992e-05, "loss": 0.0696, "step": 49825 }, { "epoch": 0.6076829268292683, "grad_norm": 0.6746926307678223, "learning_rate": 1.594878048780488e-05, "loss": 0.0354, "step": 49830 }, { "epoch": 0.6077439024390244, "grad_norm": 0.4515356421470642, "learning_rate": 1.5948373983739837e-05, "loss": 0.0457, "step": 49835 }, { "epoch": 0.6078048780487805, "grad_norm": 1.0517094135284424, "learning_rate": 1.59479674796748e-05, "loss": 0.0962, "step": 49840 }, { "epoch": 0.6078658536585366, "grad_norm": 1.153850793838501, "learning_rate": 1.5947560975609757e-05, "loss": 0.0603, "step": 49845 }, { "epoch": 0.6079268292682927, "grad_norm": 0.7549678683280945, "learning_rate": 1.594715447154472e-05, "loss": 0.0472, "step": 49850 }, { "epoch": 0.6079878048780488, "grad_norm": 0.36487096548080444, "learning_rate": 1.5946747967479677e-05, "loss": 0.0338, "step": 49855 }, { "epoch": 0.6080487804878049, "grad_norm": 0.6720055341720581, "learning_rate": 1.5946341463414635e-05, "loss": 0.0707, "step": 49860 }, { "epoch": 0.608109756097561, "grad_norm": 0.7955607175827026, "learning_rate": 1.5945934959349593e-05, "loss": 0.0431, "step": 49865 }, { "epoch": 0.6081707317073171, "grad_norm": 1.0821250677108765, "learning_rate": 1.5945528455284554e-05, "loss": 0.0454, "step": 49870 }, { "epoch": 0.6082317073170732, "grad_norm": 0.39110586047172546, "learning_rate": 1.5945121951219513e-05, "loss": 0.0537, "step": 49875 }, { "epoch": 0.6082926829268293, "grad_norm": 0.8191549181938171, "learning_rate": 1.5944715447154474e-05, "loss": 0.0692, "step": 49880 }, { "epoch": 0.6083536585365854, "grad_norm": 0.49079591035842896, "learning_rate": 1.5944308943089432e-05, "loss": 0.034, "step": 49885 }, { "epoch": 0.6084146341463414, "grad_norm": 0.5657211542129517, "learning_rate": 1.5943902439024394e-05, "loss": 0.0463, "step": 49890 }, { "epoch": 0.6084756097560976, "grad_norm": 0.6975683569908142, "learning_rate": 1.594349593495935e-05, "loss": 0.0935, "step": 49895 }, { "epoch": 0.6085365853658536, "grad_norm": 0.4158153533935547, "learning_rate": 1.594308943089431e-05, "loss": 0.0939, "step": 49900 }, { "epoch": 0.6085975609756098, "grad_norm": 1.170384168624878, "learning_rate": 1.5942682926829268e-05, "loss": 0.0481, "step": 49905 }, { "epoch": 0.6086585365853658, "grad_norm": 0.9778106808662415, "learning_rate": 1.594227642276423e-05, "loss": 0.1039, "step": 49910 }, { "epoch": 0.608719512195122, "grad_norm": 0.6065651774406433, "learning_rate": 1.5941869918699188e-05, "loss": 0.0884, "step": 49915 }, { "epoch": 0.608780487804878, "grad_norm": 1.320890188217163, "learning_rate": 1.594146341463415e-05, "loss": 0.0517, "step": 49920 }, { "epoch": 0.6088414634146342, "grad_norm": 0.661834180355072, "learning_rate": 1.5941056910569107e-05, "loss": 0.0485, "step": 49925 }, { "epoch": 0.6089024390243902, "grad_norm": 0.673568606376648, "learning_rate": 1.5940650406504065e-05, "loss": 0.0758, "step": 49930 }, { "epoch": 0.6089634146341464, "grad_norm": 0.5812423229217529, "learning_rate": 1.5940243902439027e-05, "loss": 0.0542, "step": 49935 }, { "epoch": 0.6090243902439024, "grad_norm": 3.268329381942749, "learning_rate": 1.5939837398373985e-05, "loss": 0.0374, "step": 49940 }, { "epoch": 0.6090853658536586, "grad_norm": 0.7873890399932861, "learning_rate": 1.5939430894308943e-05, "loss": 0.0902, "step": 49945 }, { "epoch": 0.6091463414634146, "grad_norm": 0.5834203958511353, "learning_rate": 1.5939024390243905e-05, "loss": 0.0733, "step": 49950 }, { "epoch": 0.6092073170731708, "grad_norm": 0.6643914580345154, "learning_rate": 1.5938617886178863e-05, "loss": 0.0586, "step": 49955 }, { "epoch": 0.6092682926829268, "grad_norm": 0.4635215103626251, "learning_rate": 1.593821138211382e-05, "loss": 0.0701, "step": 49960 }, { "epoch": 0.609329268292683, "grad_norm": 0.7561255693435669, "learning_rate": 1.5937804878048783e-05, "loss": 0.0692, "step": 49965 }, { "epoch": 0.609390243902439, "grad_norm": 0.6654828190803528, "learning_rate": 1.593739837398374e-05, "loss": 0.0537, "step": 49970 }, { "epoch": 0.6094512195121952, "grad_norm": 0.614927351474762, "learning_rate": 1.5936991869918702e-05, "loss": 0.0429, "step": 49975 }, { "epoch": 0.6095121951219512, "grad_norm": 1.076291799545288, "learning_rate": 1.593658536585366e-05, "loss": 0.0685, "step": 49980 }, { "epoch": 0.6095731707317074, "grad_norm": 0.37843266129493713, "learning_rate": 1.593617886178862e-05, "loss": 0.0509, "step": 49985 }, { "epoch": 0.6096341463414634, "grad_norm": 0.8965615034103394, "learning_rate": 1.5935772357723577e-05, "loss": 0.0549, "step": 49990 }, { "epoch": 0.6096951219512196, "grad_norm": 0.4129561185836792, "learning_rate": 1.5935365853658538e-05, "loss": 0.0498, "step": 49995 }, { "epoch": 0.6097560975609756, "grad_norm": 0.5040059089660645, "learning_rate": 1.5934959349593496e-05, "loss": 0.0394, "step": 50000 }, { "epoch": 0.6098170731707317, "grad_norm": 0.763795793056488, "learning_rate": 1.5934552845528458e-05, "loss": 0.0494, "step": 50005 }, { "epoch": 0.6098780487804878, "grad_norm": 1.3881733417510986, "learning_rate": 1.5934146341463416e-05, "loss": 0.0957, "step": 50010 }, { "epoch": 0.609939024390244, "grad_norm": 0.3939204514026642, "learning_rate": 1.5933739837398374e-05, "loss": 0.0492, "step": 50015 }, { "epoch": 0.61, "grad_norm": 2.8262646198272705, "learning_rate": 1.5933333333333336e-05, "loss": 0.0532, "step": 50020 }, { "epoch": 0.6100609756097561, "grad_norm": 0.7056580781936646, "learning_rate": 1.5932926829268294e-05, "loss": 0.03, "step": 50025 }, { "epoch": 0.6101219512195122, "grad_norm": 0.7436551451683044, "learning_rate": 1.5932520325203252e-05, "loss": 0.0593, "step": 50030 }, { "epoch": 0.6101829268292683, "grad_norm": 0.46655556559562683, "learning_rate": 1.5932113821138213e-05, "loss": 0.047, "step": 50035 }, { "epoch": 0.6102439024390244, "grad_norm": 0.532065212726593, "learning_rate": 1.593170731707317e-05, "loss": 0.0434, "step": 50040 }, { "epoch": 0.6103048780487805, "grad_norm": 0.39582929015159607, "learning_rate": 1.593130081300813e-05, "loss": 0.0551, "step": 50045 }, { "epoch": 0.6103658536585366, "grad_norm": 0.53291916847229, "learning_rate": 1.593089430894309e-05, "loss": 0.0348, "step": 50050 }, { "epoch": 0.6104268292682927, "grad_norm": 0.6986936330795288, "learning_rate": 1.593048780487805e-05, "loss": 0.0828, "step": 50055 }, { "epoch": 0.6104878048780488, "grad_norm": 0.6086515188217163, "learning_rate": 1.593008130081301e-05, "loss": 0.0852, "step": 50060 }, { "epoch": 0.6105487804878049, "grad_norm": 1.3478573560714722, "learning_rate": 1.592967479674797e-05, "loss": 0.1003, "step": 50065 }, { "epoch": 0.610609756097561, "grad_norm": 0.6226933598518372, "learning_rate": 1.592926829268293e-05, "loss": 0.0377, "step": 50070 }, { "epoch": 0.6106707317073171, "grad_norm": 0.5629236102104187, "learning_rate": 1.5928861788617885e-05, "loss": 0.036, "step": 50075 }, { "epoch": 0.6107317073170732, "grad_norm": 1.1415503025054932, "learning_rate": 1.5928455284552847e-05, "loss": 0.0742, "step": 50080 }, { "epoch": 0.6107926829268293, "grad_norm": 0.43691375851631165, "learning_rate": 1.5928048780487805e-05, "loss": 0.0457, "step": 50085 }, { "epoch": 0.6108536585365854, "grad_norm": 0.5174343585968018, "learning_rate": 1.5927642276422766e-05, "loss": 0.0606, "step": 50090 }, { "epoch": 0.6109146341463415, "grad_norm": 0.5014451146125793, "learning_rate": 1.5927235772357724e-05, "loss": 0.0648, "step": 50095 }, { "epoch": 0.6109756097560975, "grad_norm": 0.5846392512321472, "learning_rate": 1.5926829268292686e-05, "loss": 0.0523, "step": 50100 }, { "epoch": 0.6110365853658537, "grad_norm": 0.743499219417572, "learning_rate": 1.5926422764227644e-05, "loss": 0.0386, "step": 50105 }, { "epoch": 0.6110975609756097, "grad_norm": 0.29597046971321106, "learning_rate": 1.5926016260162602e-05, "loss": 0.0774, "step": 50110 }, { "epoch": 0.6111585365853659, "grad_norm": 0.7931515574455261, "learning_rate": 1.5925609756097564e-05, "loss": 0.0523, "step": 50115 }, { "epoch": 0.6112195121951219, "grad_norm": 0.6704874634742737, "learning_rate": 1.5925203252032522e-05, "loss": 0.039, "step": 50120 }, { "epoch": 0.6112804878048781, "grad_norm": 0.9795109629631042, "learning_rate": 1.592479674796748e-05, "loss": 0.0403, "step": 50125 }, { "epoch": 0.6113414634146341, "grad_norm": 0.5072560906410217, "learning_rate": 1.592439024390244e-05, "loss": 0.0557, "step": 50130 }, { "epoch": 0.6114024390243903, "grad_norm": 0.33431190252304077, "learning_rate": 1.59239837398374e-05, "loss": 0.0328, "step": 50135 }, { "epoch": 0.6114634146341463, "grad_norm": 0.6928117871284485, "learning_rate": 1.5923577235772358e-05, "loss": 0.0442, "step": 50140 }, { "epoch": 0.6115243902439025, "grad_norm": 0.6060734391212463, "learning_rate": 1.592317073170732e-05, "loss": 0.0412, "step": 50145 }, { "epoch": 0.6115853658536585, "grad_norm": 0.47825831174850464, "learning_rate": 1.5922764227642277e-05, "loss": 0.0596, "step": 50150 }, { "epoch": 0.6116463414634147, "grad_norm": 0.4756704270839691, "learning_rate": 1.592235772357724e-05, "loss": 0.0618, "step": 50155 }, { "epoch": 0.6117073170731707, "grad_norm": 0.6034573912620544, "learning_rate": 1.5921951219512197e-05, "loss": 0.061, "step": 50160 }, { "epoch": 0.6117682926829269, "grad_norm": 0.7384078502655029, "learning_rate": 1.5921544715447155e-05, "loss": 0.0478, "step": 50165 }, { "epoch": 0.6118292682926829, "grad_norm": 0.490579217672348, "learning_rate": 1.5921138211382113e-05, "loss": 0.0634, "step": 50170 }, { "epoch": 0.6118902439024391, "grad_norm": 0.4816758334636688, "learning_rate": 1.5920731707317075e-05, "loss": 0.0322, "step": 50175 }, { "epoch": 0.6119512195121951, "grad_norm": 0.8928786516189575, "learning_rate": 1.5920325203252033e-05, "loss": 0.0805, "step": 50180 }, { "epoch": 0.6120121951219513, "grad_norm": 0.35443490743637085, "learning_rate": 1.5919918699186994e-05, "loss": 0.053, "step": 50185 }, { "epoch": 0.6120731707317073, "grad_norm": 0.4866275489330292, "learning_rate": 1.5919512195121953e-05, "loss": 0.0735, "step": 50190 }, { "epoch": 0.6121341463414635, "grad_norm": 0.5850792527198792, "learning_rate": 1.591910569105691e-05, "loss": 0.0588, "step": 50195 }, { "epoch": 0.6121951219512195, "grad_norm": 0.7189211249351501, "learning_rate": 1.5918699186991872e-05, "loss": 0.0509, "step": 50200 }, { "epoch": 0.6122560975609757, "grad_norm": 0.2451583743095398, "learning_rate": 1.591829268292683e-05, "loss": 0.0614, "step": 50205 }, { "epoch": 0.6123170731707317, "grad_norm": 0.5780845284461975, "learning_rate": 1.591788617886179e-05, "loss": 0.0483, "step": 50210 }, { "epoch": 0.6123780487804878, "grad_norm": 0.3728148937225342, "learning_rate": 1.591747967479675e-05, "loss": 0.05, "step": 50215 }, { "epoch": 0.6124390243902439, "grad_norm": 0.5359419584274292, "learning_rate": 1.5917073170731708e-05, "loss": 0.0546, "step": 50220 }, { "epoch": 0.6125, "grad_norm": 0.6888025999069214, "learning_rate": 1.5916666666666666e-05, "loss": 0.0597, "step": 50225 }, { "epoch": 0.6125609756097561, "grad_norm": 0.7180156707763672, "learning_rate": 1.5916260162601628e-05, "loss": 0.0494, "step": 50230 }, { "epoch": 0.6126219512195122, "grad_norm": 0.6680036187171936, "learning_rate": 1.5915853658536586e-05, "loss": 0.055, "step": 50235 }, { "epoch": 0.6126829268292683, "grad_norm": 0.33610105514526367, "learning_rate": 1.5915447154471547e-05, "loss": 0.0408, "step": 50240 }, { "epoch": 0.6127439024390244, "grad_norm": 1.228402853012085, "learning_rate": 1.5915040650406505e-05, "loss": 0.0361, "step": 50245 }, { "epoch": 0.6128048780487805, "grad_norm": 0.7627298831939697, "learning_rate": 1.5914634146341467e-05, "loss": 0.0442, "step": 50250 }, { "epoch": 0.6128658536585366, "grad_norm": 1.3008201122283936, "learning_rate": 1.5914227642276422e-05, "loss": 0.0618, "step": 50255 }, { "epoch": 0.6129268292682927, "grad_norm": 1.2020384073257446, "learning_rate": 1.5913821138211383e-05, "loss": 0.0415, "step": 50260 }, { "epoch": 0.6129878048780488, "grad_norm": 0.45153167843818665, "learning_rate": 1.591341463414634e-05, "loss": 0.0395, "step": 50265 }, { "epoch": 0.6130487804878049, "grad_norm": 0.6475288271903992, "learning_rate": 1.5913008130081303e-05, "loss": 0.0713, "step": 50270 }, { "epoch": 0.613109756097561, "grad_norm": 0.4172656536102295, "learning_rate": 1.591260162601626e-05, "loss": 0.0779, "step": 50275 }, { "epoch": 0.6131707317073171, "grad_norm": 0.6765043139457703, "learning_rate": 1.5912195121951223e-05, "loss": 0.0517, "step": 50280 }, { "epoch": 0.6132317073170732, "grad_norm": 0.3332276940345764, "learning_rate": 1.591178861788618e-05, "loss": 0.0346, "step": 50285 }, { "epoch": 0.6132926829268293, "grad_norm": 0.7016549706459045, "learning_rate": 1.591138211382114e-05, "loss": 0.0962, "step": 50290 }, { "epoch": 0.6133536585365854, "grad_norm": 2.511922597885132, "learning_rate": 1.5910975609756097e-05, "loss": 0.083, "step": 50295 }, { "epoch": 0.6134146341463415, "grad_norm": 2.098423719406128, "learning_rate": 1.591056910569106e-05, "loss": 0.0708, "step": 50300 }, { "epoch": 0.6134756097560976, "grad_norm": 0.249197319149971, "learning_rate": 1.5910162601626017e-05, "loss": 0.0429, "step": 50305 }, { "epoch": 0.6135365853658536, "grad_norm": 0.7793335318565369, "learning_rate": 1.5909756097560978e-05, "loss": 0.0524, "step": 50310 }, { "epoch": 0.6135975609756098, "grad_norm": 1.226517677307129, "learning_rate": 1.5909349593495936e-05, "loss": 0.072, "step": 50315 }, { "epoch": 0.6136585365853658, "grad_norm": 0.5815937519073486, "learning_rate": 1.5908943089430894e-05, "loss": 0.0788, "step": 50320 }, { "epoch": 0.613719512195122, "grad_norm": 0.6818864941596985, "learning_rate": 1.5908536585365856e-05, "loss": 0.0634, "step": 50325 }, { "epoch": 0.613780487804878, "grad_norm": 0.9158244132995605, "learning_rate": 1.5908130081300814e-05, "loss": 0.0357, "step": 50330 }, { "epoch": 0.6138414634146342, "grad_norm": 0.39569294452667236, "learning_rate": 1.5907723577235775e-05, "loss": 0.044, "step": 50335 }, { "epoch": 0.6139024390243902, "grad_norm": 0.41002631187438965, "learning_rate": 1.5907317073170734e-05, "loss": 0.073, "step": 50340 }, { "epoch": 0.6139634146341464, "grad_norm": 0.570654571056366, "learning_rate": 1.5906910569105692e-05, "loss": 0.0413, "step": 50345 }, { "epoch": 0.6140243902439024, "grad_norm": 0.7749238014221191, "learning_rate": 1.590650406504065e-05, "loss": 0.1053, "step": 50350 }, { "epoch": 0.6140853658536586, "grad_norm": 0.4688718318939209, "learning_rate": 1.590609756097561e-05, "loss": 0.0569, "step": 50355 }, { "epoch": 0.6141463414634146, "grad_norm": 0.8546618223190308, "learning_rate": 1.590569105691057e-05, "loss": 0.0483, "step": 50360 }, { "epoch": 0.6142073170731708, "grad_norm": 1.4627617597579956, "learning_rate": 1.590528455284553e-05, "loss": 0.0555, "step": 50365 }, { "epoch": 0.6142682926829268, "grad_norm": 0.4046865403652191, "learning_rate": 1.590487804878049e-05, "loss": 0.0543, "step": 50370 }, { "epoch": 0.614329268292683, "grad_norm": 0.6676700711250305, "learning_rate": 1.5904471544715447e-05, "loss": 0.0699, "step": 50375 }, { "epoch": 0.614390243902439, "grad_norm": 0.6281530261039734, "learning_rate": 1.590406504065041e-05, "loss": 0.0724, "step": 50380 }, { "epoch": 0.6144512195121952, "grad_norm": 0.9290546178817749, "learning_rate": 1.5903658536585367e-05, "loss": 0.0403, "step": 50385 }, { "epoch": 0.6145121951219512, "grad_norm": 0.603352427482605, "learning_rate": 1.5903252032520325e-05, "loss": 0.0614, "step": 50390 }, { "epoch": 0.6145731707317074, "grad_norm": 0.7899636030197144, "learning_rate": 1.5902845528455287e-05, "loss": 0.0696, "step": 50395 }, { "epoch": 0.6146341463414634, "grad_norm": 0.588805615901947, "learning_rate": 1.5902439024390245e-05, "loss": 0.0766, "step": 50400 }, { "epoch": 0.6146951219512196, "grad_norm": 0.6131505370140076, "learning_rate": 1.5902032520325203e-05, "loss": 0.0631, "step": 50405 }, { "epoch": 0.6147560975609756, "grad_norm": 0.8478023409843445, "learning_rate": 1.5901626016260164e-05, "loss": 0.0633, "step": 50410 }, { "epoch": 0.6148170731707318, "grad_norm": 0.42343226075172424, "learning_rate": 1.5901219512195122e-05, "loss": 0.0573, "step": 50415 }, { "epoch": 0.6148780487804878, "grad_norm": 0.5312348008155823, "learning_rate": 1.5900813008130084e-05, "loss": 0.0514, "step": 50420 }, { "epoch": 0.614939024390244, "grad_norm": 0.8821095824241638, "learning_rate": 1.5900406504065042e-05, "loss": 0.0803, "step": 50425 }, { "epoch": 0.615, "grad_norm": 0.6324908137321472, "learning_rate": 1.5900000000000004e-05, "loss": 0.0668, "step": 50430 }, { "epoch": 0.6150609756097561, "grad_norm": 1.10772705078125, "learning_rate": 1.589959349593496e-05, "loss": 0.0526, "step": 50435 }, { "epoch": 0.6151219512195122, "grad_norm": 0.700156033039093, "learning_rate": 1.589918699186992e-05, "loss": 0.0653, "step": 50440 }, { "epoch": 0.6151829268292683, "grad_norm": 0.8560873866081238, "learning_rate": 1.5898780487804878e-05, "loss": 0.0568, "step": 50445 }, { "epoch": 0.6152439024390244, "grad_norm": 0.5663385987281799, "learning_rate": 1.589837398373984e-05, "loss": 0.0768, "step": 50450 }, { "epoch": 0.6153048780487805, "grad_norm": 0.46671751141548157, "learning_rate": 1.5897967479674798e-05, "loss": 0.0611, "step": 50455 }, { "epoch": 0.6153658536585366, "grad_norm": 0.7484610080718994, "learning_rate": 1.589756097560976e-05, "loss": 0.0537, "step": 50460 }, { "epoch": 0.6154268292682927, "grad_norm": 0.7627029418945312, "learning_rate": 1.5897154471544717e-05, "loss": 0.0655, "step": 50465 }, { "epoch": 0.6154878048780488, "grad_norm": 0.743095338344574, "learning_rate": 1.5896747967479675e-05, "loss": 0.0513, "step": 50470 }, { "epoch": 0.6155487804878049, "grad_norm": 0.7608117461204529, "learning_rate": 1.5896341463414634e-05, "loss": 0.0647, "step": 50475 }, { "epoch": 0.615609756097561, "grad_norm": 0.3902095854282379, "learning_rate": 1.5895934959349595e-05, "loss": 0.0449, "step": 50480 }, { "epoch": 0.6156707317073171, "grad_norm": 0.4469914436340332, "learning_rate": 1.5895528455284553e-05, "loss": 0.046, "step": 50485 }, { "epoch": 0.6157317073170732, "grad_norm": 0.7225015163421631, "learning_rate": 1.5895121951219515e-05, "loss": 0.0569, "step": 50490 }, { "epoch": 0.6157926829268293, "grad_norm": 0.8269250392913818, "learning_rate": 1.5894715447154473e-05, "loss": 0.0458, "step": 50495 }, { "epoch": 0.6158536585365854, "grad_norm": 0.3567674458026886, "learning_rate": 1.589430894308943e-05, "loss": 0.0721, "step": 50500 }, { "epoch": 0.6159146341463415, "grad_norm": 0.31989625096321106, "learning_rate": 1.5893902439024392e-05, "loss": 0.0528, "step": 50505 }, { "epoch": 0.6159756097560976, "grad_norm": 1.549585223197937, "learning_rate": 1.589349593495935e-05, "loss": 0.0745, "step": 50510 }, { "epoch": 0.6160365853658537, "grad_norm": 0.5238818526268005, "learning_rate": 1.5893089430894312e-05, "loss": 0.0659, "step": 50515 }, { "epoch": 0.6160975609756097, "grad_norm": 0.3894820809364319, "learning_rate": 1.589268292682927e-05, "loss": 0.0475, "step": 50520 }, { "epoch": 0.6161585365853659, "grad_norm": 0.9221206307411194, "learning_rate": 1.589227642276423e-05, "loss": 0.0516, "step": 50525 }, { "epoch": 0.6162195121951219, "grad_norm": 0.35079124569892883, "learning_rate": 1.5891869918699187e-05, "loss": 0.0592, "step": 50530 }, { "epoch": 0.6162804878048781, "grad_norm": 0.6399556994438171, "learning_rate": 1.5891463414634148e-05, "loss": 0.0785, "step": 50535 }, { "epoch": 0.6163414634146341, "grad_norm": 0.6489596962928772, "learning_rate": 1.5891056910569106e-05, "loss": 0.0665, "step": 50540 }, { "epoch": 0.6164024390243903, "grad_norm": 0.7414842844009399, "learning_rate": 1.5890650406504068e-05, "loss": 0.0789, "step": 50545 }, { "epoch": 0.6164634146341463, "grad_norm": 0.5984098315238953, "learning_rate": 1.5890243902439026e-05, "loss": 0.0917, "step": 50550 }, { "epoch": 0.6165243902439025, "grad_norm": 0.589030385017395, "learning_rate": 1.5889837398373984e-05, "loss": 0.0647, "step": 50555 }, { "epoch": 0.6165853658536585, "grad_norm": 0.7509602904319763, "learning_rate": 1.5889430894308942e-05, "loss": 0.0909, "step": 50560 }, { "epoch": 0.6166463414634147, "grad_norm": 0.3782747983932495, "learning_rate": 1.5889024390243904e-05, "loss": 0.0833, "step": 50565 }, { "epoch": 0.6167073170731707, "grad_norm": 0.6740153431892395, "learning_rate": 1.588861788617886e-05, "loss": 0.0627, "step": 50570 }, { "epoch": 0.6167682926829269, "grad_norm": 1.3289639949798584, "learning_rate": 1.5888211382113823e-05, "loss": 0.0697, "step": 50575 }, { "epoch": 0.6168292682926829, "grad_norm": 0.6306785345077515, "learning_rate": 1.588780487804878e-05, "loss": 0.056, "step": 50580 }, { "epoch": 0.6168902439024391, "grad_norm": 1.7170952558517456, "learning_rate": 1.588739837398374e-05, "loss": 0.061, "step": 50585 }, { "epoch": 0.6169512195121951, "grad_norm": 0.38363611698150635, "learning_rate": 1.58869918699187e-05, "loss": 0.0398, "step": 50590 }, { "epoch": 0.6170121951219513, "grad_norm": 0.45102617144584656, "learning_rate": 1.588658536585366e-05, "loss": 0.0555, "step": 50595 }, { "epoch": 0.6170731707317073, "grad_norm": 0.2771667242050171, "learning_rate": 1.588617886178862e-05, "loss": 0.0607, "step": 50600 }, { "epoch": 0.6171341463414635, "grad_norm": 0.5738914608955383, "learning_rate": 1.588577235772358e-05, "loss": 0.0809, "step": 50605 }, { "epoch": 0.6171951219512195, "grad_norm": 0.9141131639480591, "learning_rate": 1.588536585365854e-05, "loss": 0.0541, "step": 50610 }, { "epoch": 0.6172560975609757, "grad_norm": 0.3923912048339844, "learning_rate": 1.5884959349593495e-05, "loss": 0.06, "step": 50615 }, { "epoch": 0.6173170731707317, "grad_norm": 0.5722646117210388, "learning_rate": 1.5884552845528457e-05, "loss": 0.0853, "step": 50620 }, { "epoch": 0.6173780487804879, "grad_norm": 0.5652303099632263, "learning_rate": 1.5884146341463415e-05, "loss": 0.0919, "step": 50625 }, { "epoch": 0.6174390243902439, "grad_norm": 0.5387600064277649, "learning_rate": 1.5883739837398376e-05, "loss": 0.0469, "step": 50630 }, { "epoch": 0.6175, "grad_norm": 0.8316919207572937, "learning_rate": 1.5883333333333334e-05, "loss": 0.0589, "step": 50635 }, { "epoch": 0.6175609756097561, "grad_norm": 1.7384034395217896, "learning_rate": 1.5882926829268296e-05, "loss": 0.1135, "step": 50640 }, { "epoch": 0.6176219512195122, "grad_norm": 0.3616279065608978, "learning_rate": 1.5882520325203254e-05, "loss": 0.0415, "step": 50645 }, { "epoch": 0.6176829268292683, "grad_norm": 0.3658026158809662, "learning_rate": 1.5882113821138212e-05, "loss": 0.0515, "step": 50650 }, { "epoch": 0.6177439024390244, "grad_norm": 0.4737865626811981, "learning_rate": 1.588170731707317e-05, "loss": 0.0387, "step": 50655 }, { "epoch": 0.6178048780487805, "grad_norm": 0.7141883373260498, "learning_rate": 1.5881300813008132e-05, "loss": 0.0583, "step": 50660 }, { "epoch": 0.6178658536585366, "grad_norm": 1.0432074069976807, "learning_rate": 1.588089430894309e-05, "loss": 0.0277, "step": 50665 }, { "epoch": 0.6179268292682927, "grad_norm": 0.5814614295959473, "learning_rate": 1.588048780487805e-05, "loss": 0.0472, "step": 50670 }, { "epoch": 0.6179878048780488, "grad_norm": 2.6111881732940674, "learning_rate": 1.588008130081301e-05, "loss": 0.0454, "step": 50675 }, { "epoch": 0.6180487804878049, "grad_norm": 0.24153974652290344, "learning_rate": 1.5879674796747968e-05, "loss": 0.0647, "step": 50680 }, { "epoch": 0.618109756097561, "grad_norm": 0.8878328204154968, "learning_rate": 1.587926829268293e-05, "loss": 0.0424, "step": 50685 }, { "epoch": 0.6181707317073171, "grad_norm": 1.326135516166687, "learning_rate": 1.5878861788617887e-05, "loss": 0.0525, "step": 50690 }, { "epoch": 0.6182317073170732, "grad_norm": 0.40340912342071533, "learning_rate": 1.587845528455285e-05, "loss": 0.0503, "step": 50695 }, { "epoch": 0.6182926829268293, "grad_norm": 0.4875170588493347, "learning_rate": 1.5878048780487807e-05, "loss": 0.0529, "step": 50700 }, { "epoch": 0.6183536585365854, "grad_norm": 0.47313961386680603, "learning_rate": 1.5877642276422765e-05, "loss": 0.0327, "step": 50705 }, { "epoch": 0.6184146341463415, "grad_norm": 1.2792757749557495, "learning_rate": 1.5877235772357723e-05, "loss": 0.0412, "step": 50710 }, { "epoch": 0.6184756097560976, "grad_norm": 0.5267988443374634, "learning_rate": 1.5876829268292685e-05, "loss": 0.0599, "step": 50715 }, { "epoch": 0.6185365853658537, "grad_norm": 0.4558558166027069, "learning_rate": 1.5876422764227643e-05, "loss": 0.0497, "step": 50720 }, { "epoch": 0.6185975609756098, "grad_norm": 0.7778871059417725, "learning_rate": 1.5876016260162604e-05, "loss": 0.0634, "step": 50725 }, { "epoch": 0.6186585365853658, "grad_norm": 1.0514270067214966, "learning_rate": 1.5875609756097562e-05, "loss": 0.0902, "step": 50730 }, { "epoch": 0.618719512195122, "grad_norm": 0.8262121677398682, "learning_rate": 1.587520325203252e-05, "loss": 0.0478, "step": 50735 }, { "epoch": 0.618780487804878, "grad_norm": 0.2847713232040405, "learning_rate": 1.587479674796748e-05, "loss": 0.0447, "step": 50740 }, { "epoch": 0.6188414634146342, "grad_norm": 0.4472767114639282, "learning_rate": 1.587439024390244e-05, "loss": 0.0383, "step": 50745 }, { "epoch": 0.6189024390243902, "grad_norm": 0.5981627702713013, "learning_rate": 1.58739837398374e-05, "loss": 0.0571, "step": 50750 }, { "epoch": 0.6189634146341464, "grad_norm": 0.30212733149528503, "learning_rate": 1.587357723577236e-05, "loss": 0.0616, "step": 50755 }, { "epoch": 0.6190243902439024, "grad_norm": 0.840714156627655, "learning_rate": 1.5873170731707318e-05, "loss": 0.0972, "step": 50760 }, { "epoch": 0.6190853658536586, "grad_norm": 0.4776332378387451, "learning_rate": 1.5872764227642276e-05, "loss": 0.0654, "step": 50765 }, { "epoch": 0.6191463414634146, "grad_norm": 2.2335987091064453, "learning_rate": 1.5872357723577238e-05, "loss": 0.0711, "step": 50770 }, { "epoch": 0.6192073170731708, "grad_norm": 0.8852382898330688, "learning_rate": 1.5871951219512196e-05, "loss": 0.075, "step": 50775 }, { "epoch": 0.6192682926829268, "grad_norm": 1.7578758001327515, "learning_rate": 1.5871544715447157e-05, "loss": 0.0773, "step": 50780 }, { "epoch": 0.619329268292683, "grad_norm": 0.8597213625907898, "learning_rate": 1.5871138211382115e-05, "loss": 0.0407, "step": 50785 }, { "epoch": 0.619390243902439, "grad_norm": 0.9650235176086426, "learning_rate": 1.5870731707317077e-05, "loss": 0.0572, "step": 50790 }, { "epoch": 0.6194512195121952, "grad_norm": 1.239160180091858, "learning_rate": 1.587032520325203e-05, "loss": 0.0672, "step": 50795 }, { "epoch": 0.6195121951219512, "grad_norm": 0.4928343594074249, "learning_rate": 1.5869918699186993e-05, "loss": 0.0509, "step": 50800 }, { "epoch": 0.6195731707317074, "grad_norm": 0.5618331432342529, "learning_rate": 1.586951219512195e-05, "loss": 0.0386, "step": 50805 }, { "epoch": 0.6196341463414634, "grad_norm": 0.9187359809875488, "learning_rate": 1.5869105691056913e-05, "loss": 0.102, "step": 50810 }, { "epoch": 0.6196951219512196, "grad_norm": 0.5652737617492676, "learning_rate": 1.586869918699187e-05, "loss": 0.0391, "step": 50815 }, { "epoch": 0.6197560975609756, "grad_norm": 0.11084393411874771, "learning_rate": 1.5868292682926832e-05, "loss": 0.0385, "step": 50820 }, { "epoch": 0.6198170731707318, "grad_norm": 0.8984005451202393, "learning_rate": 1.5867886178861787e-05, "loss": 0.0444, "step": 50825 }, { "epoch": 0.6198780487804878, "grad_norm": 1.1609361171722412, "learning_rate": 1.586747967479675e-05, "loss": 0.0782, "step": 50830 }, { "epoch": 0.619939024390244, "grad_norm": 0.7756954431533813, "learning_rate": 1.5867073170731707e-05, "loss": 0.0561, "step": 50835 }, { "epoch": 0.62, "grad_norm": 1.2051787376403809, "learning_rate": 1.586666666666667e-05, "loss": 0.0415, "step": 50840 }, { "epoch": 0.6200609756097561, "grad_norm": 0.3708247244358063, "learning_rate": 1.5866260162601626e-05, "loss": 0.0441, "step": 50845 }, { "epoch": 0.6201219512195122, "grad_norm": 0.604224681854248, "learning_rate": 1.5865853658536588e-05, "loss": 0.0916, "step": 50850 }, { "epoch": 0.6201829268292683, "grad_norm": 0.4054770767688751, "learning_rate": 1.5865447154471546e-05, "loss": 0.0644, "step": 50855 }, { "epoch": 0.6202439024390244, "grad_norm": 0.3281572759151459, "learning_rate": 1.5865040650406504e-05, "loss": 0.0432, "step": 50860 }, { "epoch": 0.6203048780487805, "grad_norm": 0.8936297297477722, "learning_rate": 1.5864634146341466e-05, "loss": 0.0631, "step": 50865 }, { "epoch": 0.6203658536585366, "grad_norm": 0.4481598734855652, "learning_rate": 1.5864227642276424e-05, "loss": 0.0637, "step": 50870 }, { "epoch": 0.6204268292682927, "grad_norm": 0.745871365070343, "learning_rate": 1.5863821138211385e-05, "loss": 0.0623, "step": 50875 }, { "epoch": 0.6204878048780488, "grad_norm": 0.6751934885978699, "learning_rate": 1.5863414634146344e-05, "loss": 0.0359, "step": 50880 }, { "epoch": 0.6205487804878049, "grad_norm": 1.1605573892593384, "learning_rate": 1.58630081300813e-05, "loss": 0.0737, "step": 50885 }, { "epoch": 0.620609756097561, "grad_norm": 0.34212151169776917, "learning_rate": 1.586260162601626e-05, "loss": 0.0549, "step": 50890 }, { "epoch": 0.6206707317073171, "grad_norm": 0.45506176352500916, "learning_rate": 1.586219512195122e-05, "loss": 0.0525, "step": 50895 }, { "epoch": 0.6207317073170732, "grad_norm": 0.5790585279464722, "learning_rate": 1.586178861788618e-05, "loss": 0.0488, "step": 50900 }, { "epoch": 0.6207926829268293, "grad_norm": 0.4786946177482605, "learning_rate": 1.586138211382114e-05, "loss": 0.0786, "step": 50905 }, { "epoch": 0.6208536585365854, "grad_norm": 1.46084463596344, "learning_rate": 1.58609756097561e-05, "loss": 0.0889, "step": 50910 }, { "epoch": 0.6209146341463415, "grad_norm": 1.0837117433547974, "learning_rate": 1.5860569105691057e-05, "loss": 0.0812, "step": 50915 }, { "epoch": 0.6209756097560976, "grad_norm": 0.5934547185897827, "learning_rate": 1.5860162601626015e-05, "loss": 0.0404, "step": 50920 }, { "epoch": 0.6210365853658537, "grad_norm": 0.9461514949798584, "learning_rate": 1.5859756097560977e-05, "loss": 0.0918, "step": 50925 }, { "epoch": 0.6210975609756098, "grad_norm": 0.3724219501018524, "learning_rate": 1.5859349593495935e-05, "loss": 0.0481, "step": 50930 }, { "epoch": 0.6211585365853659, "grad_norm": 0.45849499106407166, "learning_rate": 1.5858943089430896e-05, "loss": 0.0716, "step": 50935 }, { "epoch": 0.621219512195122, "grad_norm": 0.6432141065597534, "learning_rate": 1.5858536585365855e-05, "loss": 0.0481, "step": 50940 }, { "epoch": 0.6212804878048781, "grad_norm": 0.50065016746521, "learning_rate": 1.5858130081300813e-05, "loss": 0.0436, "step": 50945 }, { "epoch": 0.6213414634146341, "grad_norm": 0.554257869720459, "learning_rate": 1.5857723577235774e-05, "loss": 0.0472, "step": 50950 }, { "epoch": 0.6214024390243903, "grad_norm": 0.762237548828125, "learning_rate": 1.5857317073170732e-05, "loss": 0.0504, "step": 50955 }, { "epoch": 0.6214634146341463, "grad_norm": 0.3594945967197418, "learning_rate": 1.5856910569105694e-05, "loss": 0.0471, "step": 50960 }, { "epoch": 0.6215243902439025, "grad_norm": 0.2853200137615204, "learning_rate": 1.5856504065040652e-05, "loss": 0.0306, "step": 50965 }, { "epoch": 0.6215853658536585, "grad_norm": 0.7181641459465027, "learning_rate": 1.585609756097561e-05, "loss": 0.0597, "step": 50970 }, { "epoch": 0.6216463414634147, "grad_norm": 1.0478795766830444, "learning_rate": 1.5855691056910568e-05, "loss": 0.0332, "step": 50975 }, { "epoch": 0.6217073170731707, "grad_norm": 0.7956925630569458, "learning_rate": 1.585528455284553e-05, "loss": 0.056, "step": 50980 }, { "epoch": 0.6217682926829269, "grad_norm": 0.42748454213142395, "learning_rate": 1.5854878048780488e-05, "loss": 0.0541, "step": 50985 }, { "epoch": 0.6218292682926829, "grad_norm": 0.514942467212677, "learning_rate": 1.585447154471545e-05, "loss": 0.0725, "step": 50990 }, { "epoch": 0.6218902439024391, "grad_norm": 2.785611867904663, "learning_rate": 1.5854065040650408e-05, "loss": 0.1131, "step": 50995 }, { "epoch": 0.6219512195121951, "grad_norm": 0.5087760090827942, "learning_rate": 1.585365853658537e-05, "loss": 0.0485, "step": 51000 }, { "epoch": 0.6220121951219513, "grad_norm": 0.8199634552001953, "learning_rate": 1.5853252032520324e-05, "loss": 0.0328, "step": 51005 }, { "epoch": 0.6220731707317073, "grad_norm": 1.7886980772018433, "learning_rate": 1.5852845528455285e-05, "loss": 0.0851, "step": 51010 }, { "epoch": 0.6221341463414635, "grad_norm": 0.6936803460121155, "learning_rate": 1.5852439024390243e-05, "loss": 0.0664, "step": 51015 }, { "epoch": 0.6221951219512195, "grad_norm": 0.6430166959762573, "learning_rate": 1.5852032520325205e-05, "loss": 0.0444, "step": 51020 }, { "epoch": 0.6222560975609757, "grad_norm": 1.3658831119537354, "learning_rate": 1.5851626016260163e-05, "loss": 0.1031, "step": 51025 }, { "epoch": 0.6223170731707317, "grad_norm": 0.718666672706604, "learning_rate": 1.5851219512195125e-05, "loss": 0.0979, "step": 51030 }, { "epoch": 0.6223780487804879, "grad_norm": 0.42000797390937805, "learning_rate": 1.5850813008130083e-05, "loss": 0.0455, "step": 51035 }, { "epoch": 0.6224390243902439, "grad_norm": 0.530476987361908, "learning_rate": 1.585040650406504e-05, "loss": 0.0422, "step": 51040 }, { "epoch": 0.6225, "grad_norm": 0.604789674282074, "learning_rate": 1.5850000000000002e-05, "loss": 0.0607, "step": 51045 }, { "epoch": 0.6225609756097561, "grad_norm": 1.5462065935134888, "learning_rate": 1.584959349593496e-05, "loss": 0.068, "step": 51050 }, { "epoch": 0.6226219512195122, "grad_norm": 0.6168437004089355, "learning_rate": 1.5849186991869922e-05, "loss": 0.0446, "step": 51055 }, { "epoch": 0.6226829268292683, "grad_norm": 5.689208507537842, "learning_rate": 1.584878048780488e-05, "loss": 0.077, "step": 51060 }, { "epoch": 0.6227439024390244, "grad_norm": 0.4673529267311096, "learning_rate": 1.5848373983739838e-05, "loss": 0.0496, "step": 51065 }, { "epoch": 0.6228048780487805, "grad_norm": 1.3635380268096924, "learning_rate": 1.5847967479674796e-05, "loss": 0.0934, "step": 51070 }, { "epoch": 0.6228658536585366, "grad_norm": 0.6213750243186951, "learning_rate": 1.5847560975609758e-05, "loss": 0.0506, "step": 51075 }, { "epoch": 0.6229268292682927, "grad_norm": 0.6858056783676147, "learning_rate": 1.5847154471544716e-05, "loss": 0.0466, "step": 51080 }, { "epoch": 0.6229878048780488, "grad_norm": 0.5670365691184998, "learning_rate": 1.5846747967479678e-05, "loss": 0.0565, "step": 51085 }, { "epoch": 0.6230487804878049, "grad_norm": 0.6904605627059937, "learning_rate": 1.5846341463414636e-05, "loss": 0.064, "step": 51090 }, { "epoch": 0.623109756097561, "grad_norm": 0.7728422284126282, "learning_rate": 1.5845934959349594e-05, "loss": 0.035, "step": 51095 }, { "epoch": 0.6231707317073171, "grad_norm": 0.9789934754371643, "learning_rate": 1.5845528455284552e-05, "loss": 0.0531, "step": 51100 }, { "epoch": 0.6232317073170732, "grad_norm": 0.7025412321090698, "learning_rate": 1.5845121951219513e-05, "loss": 0.07, "step": 51105 }, { "epoch": 0.6232926829268293, "grad_norm": 0.2982912063598633, "learning_rate": 1.584471544715447e-05, "loss": 0.0384, "step": 51110 }, { "epoch": 0.6233536585365854, "grad_norm": 0.6524956226348877, "learning_rate": 1.5844308943089433e-05, "loss": 0.0503, "step": 51115 }, { "epoch": 0.6234146341463415, "grad_norm": 0.4612398147583008, "learning_rate": 1.584390243902439e-05, "loss": 0.0522, "step": 51120 }, { "epoch": 0.6234756097560976, "grad_norm": 1.0538254976272583, "learning_rate": 1.584349593495935e-05, "loss": 0.0809, "step": 51125 }, { "epoch": 0.6235365853658537, "grad_norm": 0.6229097247123718, "learning_rate": 1.584308943089431e-05, "loss": 0.0737, "step": 51130 }, { "epoch": 0.6235975609756098, "grad_norm": 0.33383530378341675, "learning_rate": 1.584268292682927e-05, "loss": 0.0672, "step": 51135 }, { "epoch": 0.6236585365853659, "grad_norm": 0.5218626856803894, "learning_rate": 1.584227642276423e-05, "loss": 0.0622, "step": 51140 }, { "epoch": 0.623719512195122, "grad_norm": 0.4690628945827484, "learning_rate": 1.584186991869919e-05, "loss": 0.0527, "step": 51145 }, { "epoch": 0.623780487804878, "grad_norm": 1.0817909240722656, "learning_rate": 1.5841463414634147e-05, "loss": 0.0708, "step": 51150 }, { "epoch": 0.6238414634146342, "grad_norm": 0.6387261152267456, "learning_rate": 1.5841056910569105e-05, "loss": 0.0375, "step": 51155 }, { "epoch": 0.6239024390243902, "grad_norm": 0.43522846698760986, "learning_rate": 1.5840650406504066e-05, "loss": 0.0293, "step": 51160 }, { "epoch": 0.6239634146341464, "grad_norm": 0.4752395451068878, "learning_rate": 1.5840243902439025e-05, "loss": 0.0736, "step": 51165 }, { "epoch": 0.6240243902439024, "grad_norm": 0.44771161675453186, "learning_rate": 1.5839837398373986e-05, "loss": 0.0661, "step": 51170 }, { "epoch": 0.6240853658536586, "grad_norm": 0.39551228284835815, "learning_rate": 1.5839430894308944e-05, "loss": 0.0328, "step": 51175 }, { "epoch": 0.6241463414634146, "grad_norm": 0.5943437218666077, "learning_rate": 1.5839024390243906e-05, "loss": 0.0508, "step": 51180 }, { "epoch": 0.6242073170731708, "grad_norm": 0.5598412752151489, "learning_rate": 1.583861788617886e-05, "loss": 0.1002, "step": 51185 }, { "epoch": 0.6242682926829268, "grad_norm": 0.4607948660850525, "learning_rate": 1.5838211382113822e-05, "loss": 0.0532, "step": 51190 }, { "epoch": 0.624329268292683, "grad_norm": 0.6861257553100586, "learning_rate": 1.583780487804878e-05, "loss": 0.0753, "step": 51195 }, { "epoch": 0.624390243902439, "grad_norm": 0.4143601059913635, "learning_rate": 1.583739837398374e-05, "loss": 0.0553, "step": 51200 }, { "epoch": 0.6244512195121952, "grad_norm": 0.5961870551109314, "learning_rate": 1.58369918699187e-05, "loss": 0.0498, "step": 51205 }, { "epoch": 0.6245121951219512, "grad_norm": 0.3518352508544922, "learning_rate": 1.583658536585366e-05, "loss": 0.0248, "step": 51210 }, { "epoch": 0.6245731707317074, "grad_norm": 0.48826950788497925, "learning_rate": 1.583617886178862e-05, "loss": 0.0934, "step": 51215 }, { "epoch": 0.6246341463414634, "grad_norm": 0.7962267398834229, "learning_rate": 1.5835772357723578e-05, "loss": 0.064, "step": 51220 }, { "epoch": 0.6246951219512196, "grad_norm": 0.8793610334396362, "learning_rate": 1.583536585365854e-05, "loss": 0.0624, "step": 51225 }, { "epoch": 0.6247560975609756, "grad_norm": 0.713211715221405, "learning_rate": 1.5834959349593497e-05, "loss": 0.0674, "step": 51230 }, { "epoch": 0.6248170731707318, "grad_norm": 0.9752624034881592, "learning_rate": 1.5834552845528455e-05, "loss": 0.0598, "step": 51235 }, { "epoch": 0.6248780487804878, "grad_norm": 2.7639293670654297, "learning_rate": 1.5834146341463417e-05, "loss": 0.0557, "step": 51240 }, { "epoch": 0.624939024390244, "grad_norm": 0.5033963918685913, "learning_rate": 1.5833739837398375e-05, "loss": 0.0777, "step": 51245 }, { "epoch": 0.625, "grad_norm": 0.5495057702064514, "learning_rate": 1.5833333333333333e-05, "loss": 0.0638, "step": 51250 }, { "epoch": 0.625060975609756, "grad_norm": 0.644294261932373, "learning_rate": 1.5832926829268295e-05, "loss": 0.1135, "step": 51255 }, { "epoch": 0.6251219512195122, "grad_norm": 0.40100687742233276, "learning_rate": 1.5832520325203253e-05, "loss": 0.0317, "step": 51260 }, { "epoch": 0.6251829268292682, "grad_norm": 0.4582951068878174, "learning_rate": 1.5832113821138214e-05, "loss": 0.0633, "step": 51265 }, { "epoch": 0.6252439024390244, "grad_norm": 0.5217788815498352, "learning_rate": 1.5831707317073172e-05, "loss": 0.0436, "step": 51270 }, { "epoch": 0.6253048780487804, "grad_norm": 0.5833429098129272, "learning_rate": 1.5831300813008134e-05, "loss": 0.0669, "step": 51275 }, { "epoch": 0.6253658536585366, "grad_norm": 2.5237948894500732, "learning_rate": 1.583089430894309e-05, "loss": 0.0543, "step": 51280 }, { "epoch": 0.6254268292682926, "grad_norm": 0.3888452649116516, "learning_rate": 1.583048780487805e-05, "loss": 0.053, "step": 51285 }, { "epoch": 0.6254878048780488, "grad_norm": 0.2225743979215622, "learning_rate": 1.5830081300813008e-05, "loss": 0.0307, "step": 51290 }, { "epoch": 0.6255487804878048, "grad_norm": 0.7893209457397461, "learning_rate": 1.582967479674797e-05, "loss": 0.0484, "step": 51295 }, { "epoch": 0.625609756097561, "grad_norm": 0.9715831875801086, "learning_rate": 1.5829268292682928e-05, "loss": 0.0611, "step": 51300 }, { "epoch": 0.625670731707317, "grad_norm": 0.2588845193386078, "learning_rate": 1.582886178861789e-05, "loss": 0.0754, "step": 51305 }, { "epoch": 0.6257317073170732, "grad_norm": 0.5906289219856262, "learning_rate": 1.5828455284552848e-05, "loss": 0.0625, "step": 51310 }, { "epoch": 0.6257926829268292, "grad_norm": 0.5508614182472229, "learning_rate": 1.5828048780487806e-05, "loss": 0.0553, "step": 51315 }, { "epoch": 0.6258536585365854, "grad_norm": 0.35620683431625366, "learning_rate": 1.5827642276422767e-05, "loss": 0.071, "step": 51320 }, { "epoch": 0.6259146341463414, "grad_norm": 1.2063084840774536, "learning_rate": 1.5827235772357725e-05, "loss": 0.0456, "step": 51325 }, { "epoch": 0.6259756097560976, "grad_norm": 0.7307806611061096, "learning_rate": 1.5826829268292683e-05, "loss": 0.0835, "step": 51330 }, { "epoch": 0.6260365853658536, "grad_norm": 0.6491826176643372, "learning_rate": 1.5826422764227645e-05, "loss": 0.0547, "step": 51335 }, { "epoch": 0.6260975609756098, "grad_norm": 2.2210848331451416, "learning_rate": 1.5826016260162603e-05, "loss": 0.096, "step": 51340 }, { "epoch": 0.6261585365853658, "grad_norm": 0.48436301946640015, "learning_rate": 1.582560975609756e-05, "loss": 0.0469, "step": 51345 }, { "epoch": 0.626219512195122, "grad_norm": 0.7983561754226685, "learning_rate": 1.5825203252032523e-05, "loss": 0.0525, "step": 51350 }, { "epoch": 0.626280487804878, "grad_norm": 0.48256218433380127, "learning_rate": 1.582479674796748e-05, "loss": 0.0666, "step": 51355 }, { "epoch": 0.6263414634146341, "grad_norm": 0.45098623633384705, "learning_rate": 1.5824390243902442e-05, "loss": 0.0486, "step": 51360 }, { "epoch": 0.6264024390243902, "grad_norm": 0.8707193732261658, "learning_rate": 1.58239837398374e-05, "loss": 0.0592, "step": 51365 }, { "epoch": 0.6264634146341463, "grad_norm": 0.5734107494354248, "learning_rate": 1.582357723577236e-05, "loss": 0.0574, "step": 51370 }, { "epoch": 0.6265243902439024, "grad_norm": 0.42407041788101196, "learning_rate": 1.5823170731707317e-05, "loss": 0.0577, "step": 51375 }, { "epoch": 0.6265853658536585, "grad_norm": 0.834796667098999, "learning_rate": 1.5822764227642278e-05, "loss": 0.0637, "step": 51380 }, { "epoch": 0.6266463414634146, "grad_norm": 0.3349872827529907, "learning_rate": 1.5822357723577236e-05, "loss": 0.0615, "step": 51385 }, { "epoch": 0.6267073170731707, "grad_norm": 0.6611170768737793, "learning_rate": 1.5821951219512198e-05, "loss": 0.0603, "step": 51390 }, { "epoch": 0.6267682926829268, "grad_norm": 0.4827544093132019, "learning_rate": 1.5821544715447156e-05, "loss": 0.0725, "step": 51395 }, { "epoch": 0.6268292682926829, "grad_norm": 1.8580907583236694, "learning_rate": 1.5821138211382114e-05, "loss": 0.0574, "step": 51400 }, { "epoch": 0.626890243902439, "grad_norm": 0.4285849928855896, "learning_rate": 1.5820731707317076e-05, "loss": 0.0447, "step": 51405 }, { "epoch": 0.6269512195121951, "grad_norm": 0.8086888194084167, "learning_rate": 1.5820325203252034e-05, "loss": 0.0492, "step": 51410 }, { "epoch": 0.6270121951219512, "grad_norm": 1.2788147926330566, "learning_rate": 1.5819918699186992e-05, "loss": 0.0571, "step": 51415 }, { "epoch": 0.6270731707317073, "grad_norm": 0.8182400465011597, "learning_rate": 1.5819512195121953e-05, "loss": 0.0396, "step": 51420 }, { "epoch": 0.6271341463414634, "grad_norm": 0.48351848125457764, "learning_rate": 1.581910569105691e-05, "loss": 0.0493, "step": 51425 }, { "epoch": 0.6271951219512195, "grad_norm": 1.0165965557098389, "learning_rate": 1.581869918699187e-05, "loss": 0.0506, "step": 51430 }, { "epoch": 0.6272560975609756, "grad_norm": 0.7266161441802979, "learning_rate": 1.581829268292683e-05, "loss": 0.0452, "step": 51435 }, { "epoch": 0.6273170731707317, "grad_norm": 0.6977927684783936, "learning_rate": 1.581788617886179e-05, "loss": 0.0726, "step": 51440 }, { "epoch": 0.6273780487804878, "grad_norm": 0.547184944152832, "learning_rate": 1.581747967479675e-05, "loss": 0.0372, "step": 51445 }, { "epoch": 0.6274390243902439, "grad_norm": 0.7965373992919922, "learning_rate": 1.581707317073171e-05, "loss": 0.0367, "step": 51450 }, { "epoch": 0.6275, "grad_norm": 1.5905213356018066, "learning_rate": 1.581666666666667e-05, "loss": 0.0472, "step": 51455 }, { "epoch": 0.6275609756097561, "grad_norm": 0.67973393201828, "learning_rate": 1.5816260162601625e-05, "loss": 0.058, "step": 51460 }, { "epoch": 0.6276219512195121, "grad_norm": 0.19050893187522888, "learning_rate": 1.5815853658536587e-05, "loss": 0.0676, "step": 51465 }, { "epoch": 0.6276829268292683, "grad_norm": 1.1325453519821167, "learning_rate": 1.5815447154471545e-05, "loss": 0.0892, "step": 51470 }, { "epoch": 0.6277439024390243, "grad_norm": 1.3419172763824463, "learning_rate": 1.5815040650406506e-05, "loss": 0.0616, "step": 51475 }, { "epoch": 0.6278048780487805, "grad_norm": 0.5864340662956238, "learning_rate": 1.5814634146341465e-05, "loss": 0.0755, "step": 51480 }, { "epoch": 0.6278658536585365, "grad_norm": 0.6001323461532593, "learning_rate": 1.5814227642276426e-05, "loss": 0.049, "step": 51485 }, { "epoch": 0.6279268292682927, "grad_norm": 1.0834418535232544, "learning_rate": 1.5813821138211384e-05, "loss": 0.064, "step": 51490 }, { "epoch": 0.6279878048780487, "grad_norm": 0.5987242460250854, "learning_rate": 1.5813414634146342e-05, "loss": 0.0813, "step": 51495 }, { "epoch": 0.6280487804878049, "grad_norm": 0.5561903715133667, "learning_rate": 1.58130081300813e-05, "loss": 0.0896, "step": 51500 }, { "epoch": 0.6281097560975609, "grad_norm": 0.7949808239936829, "learning_rate": 1.5812601626016262e-05, "loss": 0.0436, "step": 51505 }, { "epoch": 0.6281707317073171, "grad_norm": 2.0790655612945557, "learning_rate": 1.581219512195122e-05, "loss": 0.0584, "step": 51510 }, { "epoch": 0.6282317073170731, "grad_norm": 0.49395546317100525, "learning_rate": 1.581178861788618e-05, "loss": 0.0692, "step": 51515 }, { "epoch": 0.6282926829268293, "grad_norm": 0.3750903904438019, "learning_rate": 1.581138211382114e-05, "loss": 0.0443, "step": 51520 }, { "epoch": 0.6283536585365853, "grad_norm": 0.37224310636520386, "learning_rate": 1.5810975609756098e-05, "loss": 0.1018, "step": 51525 }, { "epoch": 0.6284146341463415, "grad_norm": 0.8120748996734619, "learning_rate": 1.581056910569106e-05, "loss": 0.0644, "step": 51530 }, { "epoch": 0.6284756097560975, "grad_norm": 0.5180724263191223, "learning_rate": 1.5810162601626018e-05, "loss": 0.0453, "step": 51535 }, { "epoch": 0.6285365853658537, "grad_norm": 0.7202616333961487, "learning_rate": 1.580975609756098e-05, "loss": 0.0829, "step": 51540 }, { "epoch": 0.6285975609756097, "grad_norm": 0.5181768536567688, "learning_rate": 1.5809349593495937e-05, "loss": 0.0419, "step": 51545 }, { "epoch": 0.6286585365853659, "grad_norm": 0.47457483410835266, "learning_rate": 1.5808943089430895e-05, "loss": 0.0427, "step": 51550 }, { "epoch": 0.6287195121951219, "grad_norm": 0.5702396035194397, "learning_rate": 1.5808536585365853e-05, "loss": 0.0488, "step": 51555 }, { "epoch": 0.628780487804878, "grad_norm": 0.4660048186779022, "learning_rate": 1.5808130081300815e-05, "loss": 0.0357, "step": 51560 }, { "epoch": 0.6288414634146341, "grad_norm": 0.6331266164779663, "learning_rate": 1.5807723577235773e-05, "loss": 0.0697, "step": 51565 }, { "epoch": 0.6289024390243902, "grad_norm": 0.6293376088142395, "learning_rate": 1.5807317073170735e-05, "loss": 0.0458, "step": 51570 }, { "epoch": 0.6289634146341463, "grad_norm": 1.0226565599441528, "learning_rate": 1.5806910569105693e-05, "loss": 0.0541, "step": 51575 }, { "epoch": 0.6290243902439024, "grad_norm": 0.381717711687088, "learning_rate": 1.580650406504065e-05, "loss": 0.0888, "step": 51580 }, { "epoch": 0.6290853658536585, "grad_norm": 0.3555644452571869, "learning_rate": 1.5806097560975612e-05, "loss": 0.0333, "step": 51585 }, { "epoch": 0.6291463414634146, "grad_norm": 0.45811524987220764, "learning_rate": 1.580569105691057e-05, "loss": 0.0362, "step": 51590 }, { "epoch": 0.6292073170731707, "grad_norm": 0.7204080820083618, "learning_rate": 1.580528455284553e-05, "loss": 0.0656, "step": 51595 }, { "epoch": 0.6292682926829268, "grad_norm": 0.36605167388916016, "learning_rate": 1.580487804878049e-05, "loss": 0.0665, "step": 51600 }, { "epoch": 0.6293292682926829, "grad_norm": 0.6647655367851257, "learning_rate": 1.5804471544715448e-05, "loss": 0.1008, "step": 51605 }, { "epoch": 0.629390243902439, "grad_norm": 0.4383344054222107, "learning_rate": 1.5804065040650406e-05, "loss": 0.0589, "step": 51610 }, { "epoch": 0.6294512195121951, "grad_norm": 0.6294122934341431, "learning_rate": 1.5803658536585368e-05, "loss": 0.0629, "step": 51615 }, { "epoch": 0.6295121951219512, "grad_norm": 1.0832160711288452, "learning_rate": 1.5803252032520326e-05, "loss": 0.0512, "step": 51620 }, { "epoch": 0.6295731707317073, "grad_norm": 0.40739893913269043, "learning_rate": 1.5802845528455288e-05, "loss": 0.0563, "step": 51625 }, { "epoch": 0.6296341463414634, "grad_norm": 0.8790942430496216, "learning_rate": 1.5802439024390246e-05, "loss": 0.0913, "step": 51630 }, { "epoch": 0.6296951219512195, "grad_norm": 0.5189204812049866, "learning_rate": 1.5802032520325207e-05, "loss": 0.0579, "step": 51635 }, { "epoch": 0.6297560975609756, "grad_norm": 0.6976790428161621, "learning_rate": 1.5801626016260162e-05, "loss": 0.0322, "step": 51640 }, { "epoch": 0.6298170731707317, "grad_norm": 0.4931376278400421, "learning_rate": 1.5801219512195123e-05, "loss": 0.0336, "step": 51645 }, { "epoch": 0.6298780487804878, "grad_norm": 0.4402543902397156, "learning_rate": 1.580081300813008e-05, "loss": 0.0497, "step": 51650 }, { "epoch": 0.6299390243902439, "grad_norm": 1.9099773168563843, "learning_rate": 1.5800406504065043e-05, "loss": 0.0571, "step": 51655 }, { "epoch": 0.63, "grad_norm": 0.6881694793701172, "learning_rate": 1.58e-05, "loss": 0.0303, "step": 51660 }, { "epoch": 0.630060975609756, "grad_norm": 0.6897917985916138, "learning_rate": 1.5799593495934963e-05, "loss": 0.0511, "step": 51665 }, { "epoch": 0.6301219512195122, "grad_norm": 0.569526731967926, "learning_rate": 1.579918699186992e-05, "loss": 0.0615, "step": 51670 }, { "epoch": 0.6301829268292682, "grad_norm": 0.6833353638648987, "learning_rate": 1.579878048780488e-05, "loss": 0.1032, "step": 51675 }, { "epoch": 0.6302439024390244, "grad_norm": 0.5160160660743713, "learning_rate": 1.5798373983739837e-05, "loss": 0.057, "step": 51680 }, { "epoch": 0.6303048780487804, "grad_norm": 0.6522173881530762, "learning_rate": 1.57979674796748e-05, "loss": 0.0424, "step": 51685 }, { "epoch": 0.6303658536585366, "grad_norm": 0.7726425528526306, "learning_rate": 1.5797560975609757e-05, "loss": 0.0532, "step": 51690 }, { "epoch": 0.6304268292682926, "grad_norm": 0.4286158084869385, "learning_rate": 1.5797154471544718e-05, "loss": 0.0714, "step": 51695 }, { "epoch": 0.6304878048780488, "grad_norm": 0.24573814868927002, "learning_rate": 1.5796747967479676e-05, "loss": 0.0376, "step": 51700 }, { "epoch": 0.6305487804878048, "grad_norm": 0.9586881995201111, "learning_rate": 1.5796341463414635e-05, "loss": 0.0607, "step": 51705 }, { "epoch": 0.630609756097561, "grad_norm": 0.6276853084564209, "learning_rate": 1.5795934959349596e-05, "loss": 0.067, "step": 51710 }, { "epoch": 0.630670731707317, "grad_norm": 0.42022377252578735, "learning_rate": 1.5795528455284554e-05, "loss": 0.0559, "step": 51715 }, { "epoch": 0.6307317073170732, "grad_norm": 0.24430303275585175, "learning_rate": 1.5795121951219516e-05, "loss": 0.0538, "step": 51720 }, { "epoch": 0.6307926829268292, "grad_norm": 0.26848095655441284, "learning_rate": 1.5794715447154474e-05, "loss": 0.0209, "step": 51725 }, { "epoch": 0.6308536585365854, "grad_norm": 0.50261390209198, "learning_rate": 1.5794308943089432e-05, "loss": 0.0462, "step": 51730 }, { "epoch": 0.6309146341463414, "grad_norm": 0.7928640842437744, "learning_rate": 1.579390243902439e-05, "loss": 0.069, "step": 51735 }, { "epoch": 0.6309756097560976, "grad_norm": 0.7089446783065796, "learning_rate": 1.579349593495935e-05, "loss": 0.0316, "step": 51740 }, { "epoch": 0.6310365853658536, "grad_norm": 0.6814815402030945, "learning_rate": 1.579308943089431e-05, "loss": 0.0559, "step": 51745 }, { "epoch": 0.6310975609756098, "grad_norm": 0.9050204157829285, "learning_rate": 1.579268292682927e-05, "loss": 0.0643, "step": 51750 }, { "epoch": 0.6311585365853658, "grad_norm": 0.4366830885410309, "learning_rate": 1.579227642276423e-05, "loss": 0.0626, "step": 51755 }, { "epoch": 0.631219512195122, "grad_norm": 0.36170583963394165, "learning_rate": 1.5791869918699187e-05, "loss": 0.0459, "step": 51760 }, { "epoch": 0.631280487804878, "grad_norm": 0.5711038708686829, "learning_rate": 1.5791463414634146e-05, "loss": 0.0459, "step": 51765 }, { "epoch": 0.6313414634146342, "grad_norm": 0.7372620701789856, "learning_rate": 1.5791056910569107e-05, "loss": 0.0376, "step": 51770 }, { "epoch": 0.6314024390243902, "grad_norm": 0.7211443781852722, "learning_rate": 1.5790650406504065e-05, "loss": 0.0415, "step": 51775 }, { "epoch": 0.6314634146341463, "grad_norm": 1.3904144763946533, "learning_rate": 1.5790243902439027e-05, "loss": 0.0558, "step": 51780 }, { "epoch": 0.6315243902439024, "grad_norm": 0.7125741839408875, "learning_rate": 1.5789837398373985e-05, "loss": 0.0624, "step": 51785 }, { "epoch": 0.6315853658536585, "grad_norm": 0.5581843852996826, "learning_rate": 1.5789430894308943e-05, "loss": 0.0692, "step": 51790 }, { "epoch": 0.6316463414634146, "grad_norm": 0.7746146321296692, "learning_rate": 1.5789024390243905e-05, "loss": 0.0532, "step": 51795 }, { "epoch": 0.6317073170731707, "grad_norm": 0.4589618742465973, "learning_rate": 1.5788617886178863e-05, "loss": 0.081, "step": 51800 }, { "epoch": 0.6317682926829268, "grad_norm": 1.364481806755066, "learning_rate": 1.5788211382113824e-05, "loss": 0.0583, "step": 51805 }, { "epoch": 0.6318292682926829, "grad_norm": 0.4534529447555542, "learning_rate": 1.5787804878048782e-05, "loss": 0.0591, "step": 51810 }, { "epoch": 0.631890243902439, "grad_norm": 0.691205620765686, "learning_rate": 1.5787398373983744e-05, "loss": 0.0587, "step": 51815 }, { "epoch": 0.6319512195121951, "grad_norm": 0.6295628547668457, "learning_rate": 1.57869918699187e-05, "loss": 0.0555, "step": 51820 }, { "epoch": 0.6320121951219512, "grad_norm": 0.8357506394386292, "learning_rate": 1.578658536585366e-05, "loss": 0.0574, "step": 51825 }, { "epoch": 0.6320731707317073, "grad_norm": 0.49726244807243347, "learning_rate": 1.5786178861788618e-05, "loss": 0.0526, "step": 51830 }, { "epoch": 0.6321341463414634, "grad_norm": 0.9728850722312927, "learning_rate": 1.578577235772358e-05, "loss": 0.0916, "step": 51835 }, { "epoch": 0.6321951219512195, "grad_norm": 0.5276816487312317, "learning_rate": 1.5785365853658538e-05, "loss": 0.044, "step": 51840 }, { "epoch": 0.6322560975609756, "grad_norm": 0.413974404335022, "learning_rate": 1.57849593495935e-05, "loss": 0.0874, "step": 51845 }, { "epoch": 0.6323170731707317, "grad_norm": 0.7973225116729736, "learning_rate": 1.5784552845528457e-05, "loss": 0.0553, "step": 51850 }, { "epoch": 0.6323780487804878, "grad_norm": 0.5813515186309814, "learning_rate": 1.5784146341463416e-05, "loss": 0.0676, "step": 51855 }, { "epoch": 0.6324390243902439, "grad_norm": 1.2127100229263306, "learning_rate": 1.5783739837398374e-05, "loss": 0.0629, "step": 51860 }, { "epoch": 0.6325, "grad_norm": 1.2496484518051147, "learning_rate": 1.5783333333333335e-05, "loss": 0.0658, "step": 51865 }, { "epoch": 0.6325609756097561, "grad_norm": 0.635417103767395, "learning_rate": 1.5782926829268293e-05, "loss": 0.0564, "step": 51870 }, { "epoch": 0.6326219512195121, "grad_norm": 0.5902237296104431, "learning_rate": 1.5782520325203255e-05, "loss": 0.0745, "step": 51875 }, { "epoch": 0.6326829268292683, "grad_norm": 0.605434000492096, "learning_rate": 1.5782113821138213e-05, "loss": 0.0583, "step": 51880 }, { "epoch": 0.6327439024390243, "grad_norm": 0.4460940659046173, "learning_rate": 1.578170731707317e-05, "loss": 0.0635, "step": 51885 }, { "epoch": 0.6328048780487805, "grad_norm": 1.2456218004226685, "learning_rate": 1.5781300813008133e-05, "loss": 0.1205, "step": 51890 }, { "epoch": 0.6328658536585365, "grad_norm": 0.797735869884491, "learning_rate": 1.578089430894309e-05, "loss": 0.0396, "step": 51895 }, { "epoch": 0.6329268292682927, "grad_norm": 0.7238325476646423, "learning_rate": 1.5780487804878052e-05, "loss": 0.0617, "step": 51900 }, { "epoch": 0.6329878048780487, "grad_norm": 0.5328547954559326, "learning_rate": 1.578008130081301e-05, "loss": 0.046, "step": 51905 }, { "epoch": 0.6330487804878049, "grad_norm": 0.5107985734939575, "learning_rate": 1.577967479674797e-05, "loss": 0.0339, "step": 51910 }, { "epoch": 0.6331097560975609, "grad_norm": 0.2985881268978119, "learning_rate": 1.5779268292682927e-05, "loss": 0.0578, "step": 51915 }, { "epoch": 0.6331707317073171, "grad_norm": 0.4409712255001068, "learning_rate": 1.5778861788617888e-05, "loss": 0.0874, "step": 51920 }, { "epoch": 0.6332317073170731, "grad_norm": 0.5902196764945984, "learning_rate": 1.5778455284552846e-05, "loss": 0.0549, "step": 51925 }, { "epoch": 0.6332926829268293, "grad_norm": 0.5543815493583679, "learning_rate": 1.5778048780487808e-05, "loss": 0.0558, "step": 51930 }, { "epoch": 0.6333536585365853, "grad_norm": 0.5519509315490723, "learning_rate": 1.5777642276422766e-05, "loss": 0.0817, "step": 51935 }, { "epoch": 0.6334146341463415, "grad_norm": 0.36759039759635925, "learning_rate": 1.5777235772357724e-05, "loss": 0.0584, "step": 51940 }, { "epoch": 0.6334756097560975, "grad_norm": 0.6644532680511475, "learning_rate": 1.5776829268292682e-05, "loss": 0.0462, "step": 51945 }, { "epoch": 0.6335365853658537, "grad_norm": 0.48873063921928406, "learning_rate": 1.5776422764227644e-05, "loss": 0.0359, "step": 51950 }, { "epoch": 0.6335975609756097, "grad_norm": 0.4328446090221405, "learning_rate": 1.5776016260162602e-05, "loss": 0.048, "step": 51955 }, { "epoch": 0.6336585365853659, "grad_norm": 0.7094089984893799, "learning_rate": 1.5775609756097563e-05, "loss": 0.0599, "step": 51960 }, { "epoch": 0.6337195121951219, "grad_norm": 0.324537456035614, "learning_rate": 1.577520325203252e-05, "loss": 0.0538, "step": 51965 }, { "epoch": 0.6337804878048781, "grad_norm": 0.32511672377586365, "learning_rate": 1.577479674796748e-05, "loss": 0.0503, "step": 51970 }, { "epoch": 0.6338414634146341, "grad_norm": 0.47402432560920715, "learning_rate": 1.577439024390244e-05, "loss": 0.0688, "step": 51975 }, { "epoch": 0.6339024390243903, "grad_norm": 4.471446514129639, "learning_rate": 1.57739837398374e-05, "loss": 0.0689, "step": 51980 }, { "epoch": 0.6339634146341463, "grad_norm": 1.8898564577102661, "learning_rate": 1.577357723577236e-05, "loss": 0.1441, "step": 51985 }, { "epoch": 0.6340243902439024, "grad_norm": 0.4285926818847656, "learning_rate": 1.577317073170732e-05, "loss": 0.0588, "step": 51990 }, { "epoch": 0.6340853658536585, "grad_norm": 0.9561662077903748, "learning_rate": 1.577276422764228e-05, "loss": 0.054, "step": 51995 }, { "epoch": 0.6341463414634146, "grad_norm": 1.111185073852539, "learning_rate": 1.5772357723577235e-05, "loss": 0.03, "step": 52000 }, { "epoch": 0.6342073170731707, "grad_norm": 1.1024683713912964, "learning_rate": 1.5771951219512197e-05, "loss": 0.069, "step": 52005 }, { "epoch": 0.6342682926829268, "grad_norm": 0.6598932147026062, "learning_rate": 1.5771544715447155e-05, "loss": 0.1259, "step": 52010 }, { "epoch": 0.6343292682926829, "grad_norm": 0.6919661164283752, "learning_rate": 1.5771138211382116e-05, "loss": 0.0436, "step": 52015 }, { "epoch": 0.634390243902439, "grad_norm": 0.46781009435653687, "learning_rate": 1.5770731707317074e-05, "loss": 0.0349, "step": 52020 }, { "epoch": 0.6344512195121951, "grad_norm": 0.3930807411670685, "learning_rate": 1.5770325203252036e-05, "loss": 0.041, "step": 52025 }, { "epoch": 0.6345121951219512, "grad_norm": 0.8268004655838013, "learning_rate": 1.576991869918699e-05, "loss": 0.0879, "step": 52030 }, { "epoch": 0.6345731707317073, "grad_norm": 0.21520331501960754, "learning_rate": 1.5769512195121952e-05, "loss": 0.083, "step": 52035 }, { "epoch": 0.6346341463414634, "grad_norm": 0.5142253637313843, "learning_rate": 1.576910569105691e-05, "loss": 0.0595, "step": 52040 }, { "epoch": 0.6346951219512195, "grad_norm": 0.7492406368255615, "learning_rate": 1.5768699186991872e-05, "loss": 0.0475, "step": 52045 }, { "epoch": 0.6347560975609756, "grad_norm": 0.4106321632862091, "learning_rate": 1.576829268292683e-05, "loss": 0.0414, "step": 52050 }, { "epoch": 0.6348170731707317, "grad_norm": 0.6808724403381348, "learning_rate": 1.576788617886179e-05, "loss": 0.0347, "step": 52055 }, { "epoch": 0.6348780487804878, "grad_norm": 0.6728154420852661, "learning_rate": 1.576747967479675e-05, "loss": 0.0521, "step": 52060 }, { "epoch": 0.6349390243902439, "grad_norm": 0.7501539587974548, "learning_rate": 1.5767073170731708e-05, "loss": 0.0699, "step": 52065 }, { "epoch": 0.635, "grad_norm": 0.8446828722953796, "learning_rate": 1.576666666666667e-05, "loss": 0.0469, "step": 52070 }, { "epoch": 0.635060975609756, "grad_norm": 0.6568710207939148, "learning_rate": 1.5766260162601627e-05, "loss": 0.0669, "step": 52075 }, { "epoch": 0.6351219512195122, "grad_norm": 0.5487616062164307, "learning_rate": 1.576585365853659e-05, "loss": 0.0588, "step": 52080 }, { "epoch": 0.6351829268292682, "grad_norm": 0.4025402069091797, "learning_rate": 1.5765447154471547e-05, "loss": 0.0471, "step": 52085 }, { "epoch": 0.6352439024390244, "grad_norm": 2.123063087463379, "learning_rate": 1.5765040650406505e-05, "loss": 0.0567, "step": 52090 }, { "epoch": 0.6353048780487804, "grad_norm": 0.6942024827003479, "learning_rate": 1.5764634146341463e-05, "loss": 0.0542, "step": 52095 }, { "epoch": 0.6353658536585366, "grad_norm": 0.8224510550498962, "learning_rate": 1.5764227642276425e-05, "loss": 0.0621, "step": 52100 }, { "epoch": 0.6354268292682926, "grad_norm": 0.7756822109222412, "learning_rate": 1.5763821138211383e-05, "loss": 0.0738, "step": 52105 }, { "epoch": 0.6354878048780488, "grad_norm": 1.8038125038146973, "learning_rate": 1.5763414634146344e-05, "loss": 0.0535, "step": 52110 }, { "epoch": 0.6355487804878048, "grad_norm": 0.6905266642570496, "learning_rate": 1.5763008130081303e-05, "loss": 0.0592, "step": 52115 }, { "epoch": 0.635609756097561, "grad_norm": 0.5515515208244324, "learning_rate": 1.576260162601626e-05, "loss": 0.0539, "step": 52120 }, { "epoch": 0.635670731707317, "grad_norm": 0.6013280153274536, "learning_rate": 1.576219512195122e-05, "loss": 0.0487, "step": 52125 }, { "epoch": 0.6357317073170732, "grad_norm": 0.8055009841918945, "learning_rate": 1.576178861788618e-05, "loss": 0.0735, "step": 52130 }, { "epoch": 0.6357926829268292, "grad_norm": 0.5661836266517639, "learning_rate": 1.576138211382114e-05, "loss": 0.062, "step": 52135 }, { "epoch": 0.6358536585365854, "grad_norm": 0.5967926383018494, "learning_rate": 1.57609756097561e-05, "loss": 0.0548, "step": 52140 }, { "epoch": 0.6359146341463414, "grad_norm": 0.9059422612190247, "learning_rate": 1.5760569105691058e-05, "loss": 0.0822, "step": 52145 }, { "epoch": 0.6359756097560976, "grad_norm": 1.0280474424362183, "learning_rate": 1.5760162601626016e-05, "loss": 0.0486, "step": 52150 }, { "epoch": 0.6360365853658536, "grad_norm": 1.062554121017456, "learning_rate": 1.5759756097560978e-05, "loss": 0.1149, "step": 52155 }, { "epoch": 0.6360975609756098, "grad_norm": 0.40946465730667114, "learning_rate": 1.5759349593495936e-05, "loss": 0.0478, "step": 52160 }, { "epoch": 0.6361585365853658, "grad_norm": 1.229675531387329, "learning_rate": 1.5758943089430897e-05, "loss": 0.0432, "step": 52165 }, { "epoch": 0.636219512195122, "grad_norm": 1.0814385414123535, "learning_rate": 1.5758536585365856e-05, "loss": 0.0439, "step": 52170 }, { "epoch": 0.636280487804878, "grad_norm": 0.9488474130630493, "learning_rate": 1.5758130081300814e-05, "loss": 0.0482, "step": 52175 }, { "epoch": 0.6363414634146342, "grad_norm": 1.0995378494262695, "learning_rate": 1.5757723577235772e-05, "loss": 0.0558, "step": 52180 }, { "epoch": 0.6364024390243902, "grad_norm": 0.511390209197998, "learning_rate": 1.5757317073170733e-05, "loss": 0.061, "step": 52185 }, { "epoch": 0.6364634146341464, "grad_norm": 0.6138637661933899, "learning_rate": 1.575691056910569e-05, "loss": 0.028, "step": 52190 }, { "epoch": 0.6365243902439024, "grad_norm": 0.5948364734649658, "learning_rate": 1.5756504065040653e-05, "loss": 0.0721, "step": 52195 }, { "epoch": 0.6365853658536585, "grad_norm": 0.8037196397781372, "learning_rate": 1.575609756097561e-05, "loss": 0.0508, "step": 52200 }, { "epoch": 0.6366463414634146, "grad_norm": 1.1649550199508667, "learning_rate": 1.5755691056910573e-05, "loss": 0.0643, "step": 52205 }, { "epoch": 0.6367073170731707, "grad_norm": 0.6177260875701904, "learning_rate": 1.5755284552845527e-05, "loss": 0.0499, "step": 52210 }, { "epoch": 0.6367682926829268, "grad_norm": 0.6206122636795044, "learning_rate": 1.575487804878049e-05, "loss": 0.0886, "step": 52215 }, { "epoch": 0.6368292682926829, "grad_norm": 0.32305148243904114, "learning_rate": 1.5754471544715447e-05, "loss": 0.1256, "step": 52220 }, { "epoch": 0.636890243902439, "grad_norm": 0.40012603998184204, "learning_rate": 1.575406504065041e-05, "loss": 0.0499, "step": 52225 }, { "epoch": 0.6369512195121951, "grad_norm": 0.44989871978759766, "learning_rate": 1.5753658536585367e-05, "loss": 0.0628, "step": 52230 }, { "epoch": 0.6370121951219512, "grad_norm": 0.4617525637149811, "learning_rate": 1.5753252032520328e-05, "loss": 0.0541, "step": 52235 }, { "epoch": 0.6370731707317073, "grad_norm": 0.33646899461746216, "learning_rate": 1.5752845528455286e-05, "loss": 0.0472, "step": 52240 }, { "epoch": 0.6371341463414634, "grad_norm": 0.6105819344520569, "learning_rate": 1.5752439024390244e-05, "loss": 0.042, "step": 52245 }, { "epoch": 0.6371951219512195, "grad_norm": 0.537363588809967, "learning_rate": 1.5752032520325206e-05, "loss": 0.0708, "step": 52250 }, { "epoch": 0.6372560975609756, "grad_norm": 0.7605543732643127, "learning_rate": 1.5751626016260164e-05, "loss": 0.0567, "step": 52255 }, { "epoch": 0.6373170731707317, "grad_norm": 0.6774919033050537, "learning_rate": 1.5751219512195126e-05, "loss": 0.0821, "step": 52260 }, { "epoch": 0.6373780487804878, "grad_norm": 0.5619297623634338, "learning_rate": 1.5750813008130084e-05, "loss": 0.0432, "step": 52265 }, { "epoch": 0.6374390243902439, "grad_norm": 0.7162809371948242, "learning_rate": 1.5750406504065042e-05, "loss": 0.0535, "step": 52270 }, { "epoch": 0.6375, "grad_norm": 0.8527796268463135, "learning_rate": 1.575e-05, "loss": 0.0695, "step": 52275 }, { "epoch": 0.6375609756097561, "grad_norm": 0.7817921042442322, "learning_rate": 1.574959349593496e-05, "loss": 0.0371, "step": 52280 }, { "epoch": 0.6376219512195122, "grad_norm": 1.0318528413772583, "learning_rate": 1.574918699186992e-05, "loss": 0.0821, "step": 52285 }, { "epoch": 0.6376829268292683, "grad_norm": 0.6909475326538086, "learning_rate": 1.574878048780488e-05, "loss": 0.0449, "step": 52290 }, { "epoch": 0.6377439024390243, "grad_norm": 0.7698456048965454, "learning_rate": 1.574837398373984e-05, "loss": 0.0498, "step": 52295 }, { "epoch": 0.6378048780487805, "grad_norm": 0.4247948229312897, "learning_rate": 1.5747967479674797e-05, "loss": 0.0519, "step": 52300 }, { "epoch": 0.6378658536585365, "grad_norm": 0.5402144193649292, "learning_rate": 1.5747560975609756e-05, "loss": 0.0599, "step": 52305 }, { "epoch": 0.6379268292682927, "grad_norm": 0.3829822242259979, "learning_rate": 1.5747154471544717e-05, "loss": 0.0445, "step": 52310 }, { "epoch": 0.6379878048780487, "grad_norm": 0.3502434194087982, "learning_rate": 1.5746747967479675e-05, "loss": 0.0579, "step": 52315 }, { "epoch": 0.6380487804878049, "grad_norm": 1.3291654586791992, "learning_rate": 1.5746341463414637e-05, "loss": 0.0682, "step": 52320 }, { "epoch": 0.6381097560975609, "grad_norm": 0.8796606063842773, "learning_rate": 1.5745934959349595e-05, "loss": 0.0749, "step": 52325 }, { "epoch": 0.6381707317073171, "grad_norm": 0.5520923137664795, "learning_rate": 1.5745528455284553e-05, "loss": 0.0423, "step": 52330 }, { "epoch": 0.6382317073170731, "grad_norm": 1.0182901620864868, "learning_rate": 1.5745121951219514e-05, "loss": 0.0802, "step": 52335 }, { "epoch": 0.6382926829268293, "grad_norm": 0.5651127696037292, "learning_rate": 1.5744715447154473e-05, "loss": 0.0607, "step": 52340 }, { "epoch": 0.6383536585365853, "grad_norm": 0.37660709023475647, "learning_rate": 1.5744308943089434e-05, "loss": 0.0364, "step": 52345 }, { "epoch": 0.6384146341463415, "grad_norm": 0.4835268557071686, "learning_rate": 1.5743902439024392e-05, "loss": 0.0422, "step": 52350 }, { "epoch": 0.6384756097560975, "grad_norm": 0.8255877494812012, "learning_rate": 1.574349593495935e-05, "loss": 0.0584, "step": 52355 }, { "epoch": 0.6385365853658537, "grad_norm": 0.5622118711471558, "learning_rate": 1.574308943089431e-05, "loss": 0.0419, "step": 52360 }, { "epoch": 0.6385975609756097, "grad_norm": 0.837853193283081, "learning_rate": 1.574268292682927e-05, "loss": 0.0596, "step": 52365 }, { "epoch": 0.6386585365853659, "grad_norm": 1.1616472005844116, "learning_rate": 1.5742276422764228e-05, "loss": 0.0611, "step": 52370 }, { "epoch": 0.6387195121951219, "grad_norm": 0.43719205260276794, "learning_rate": 1.574186991869919e-05, "loss": 0.0661, "step": 52375 }, { "epoch": 0.6387804878048781, "grad_norm": 0.7308034896850586, "learning_rate": 1.5741463414634148e-05, "loss": 0.0547, "step": 52380 }, { "epoch": 0.6388414634146341, "grad_norm": 1.3019499778747559, "learning_rate": 1.574105691056911e-05, "loss": 0.0548, "step": 52385 }, { "epoch": 0.6389024390243903, "grad_norm": 0.5007910132408142, "learning_rate": 1.5740650406504064e-05, "loss": 0.0501, "step": 52390 }, { "epoch": 0.6389634146341463, "grad_norm": 0.6680266261100769, "learning_rate": 1.5740243902439026e-05, "loss": 0.0622, "step": 52395 }, { "epoch": 0.6390243902439025, "grad_norm": 0.575204074382782, "learning_rate": 1.5739837398373984e-05, "loss": 0.0464, "step": 52400 }, { "epoch": 0.6390853658536585, "grad_norm": 0.8868882060050964, "learning_rate": 1.5739430894308945e-05, "loss": 0.05, "step": 52405 }, { "epoch": 0.6391463414634146, "grad_norm": 0.5595817565917969, "learning_rate": 1.5739024390243903e-05, "loss": 0.0452, "step": 52410 }, { "epoch": 0.6392073170731707, "grad_norm": 0.4931274354457855, "learning_rate": 1.5738617886178865e-05, "loss": 0.0761, "step": 52415 }, { "epoch": 0.6392682926829268, "grad_norm": 1.0717740058898926, "learning_rate": 1.5738211382113823e-05, "loss": 0.0529, "step": 52420 }, { "epoch": 0.6393292682926829, "grad_norm": 0.3770878314971924, "learning_rate": 1.573780487804878e-05, "loss": 0.0391, "step": 52425 }, { "epoch": 0.639390243902439, "grad_norm": 1.1139289140701294, "learning_rate": 1.5737398373983743e-05, "loss": 0.0499, "step": 52430 }, { "epoch": 0.6394512195121951, "grad_norm": 0.6391211748123169, "learning_rate": 1.57369918699187e-05, "loss": 0.0835, "step": 52435 }, { "epoch": 0.6395121951219512, "grad_norm": 0.30452078580856323, "learning_rate": 1.573658536585366e-05, "loss": 0.0415, "step": 52440 }, { "epoch": 0.6395731707317073, "grad_norm": 0.38746535778045654, "learning_rate": 1.573617886178862e-05, "loss": 0.0391, "step": 52445 }, { "epoch": 0.6396341463414634, "grad_norm": 1.4138224124908447, "learning_rate": 1.573577235772358e-05, "loss": 0.0566, "step": 52450 }, { "epoch": 0.6396951219512195, "grad_norm": 0.7600956559181213, "learning_rate": 1.5735365853658537e-05, "loss": 0.061, "step": 52455 }, { "epoch": 0.6397560975609756, "grad_norm": 1.2809118032455444, "learning_rate": 1.5734959349593498e-05, "loss": 0.0433, "step": 52460 }, { "epoch": 0.6398170731707317, "grad_norm": 0.5376901030540466, "learning_rate": 1.5734552845528456e-05, "loss": 0.0589, "step": 52465 }, { "epoch": 0.6398780487804878, "grad_norm": 0.6766744256019592, "learning_rate": 1.5734146341463418e-05, "loss": 0.0652, "step": 52470 }, { "epoch": 0.6399390243902439, "grad_norm": 0.7924968004226685, "learning_rate": 1.5733739837398376e-05, "loss": 0.0494, "step": 52475 }, { "epoch": 0.64, "grad_norm": 0.8448028564453125, "learning_rate": 1.5733333333333334e-05, "loss": 0.0435, "step": 52480 }, { "epoch": 0.640060975609756, "grad_norm": 0.7081792950630188, "learning_rate": 1.5732926829268292e-05, "loss": 0.0426, "step": 52485 }, { "epoch": 0.6401219512195122, "grad_norm": 0.5232865810394287, "learning_rate": 1.5732520325203254e-05, "loss": 0.038, "step": 52490 }, { "epoch": 0.6401829268292683, "grad_norm": 0.30157676339149475, "learning_rate": 1.5732113821138212e-05, "loss": 0.0361, "step": 52495 }, { "epoch": 0.6402439024390244, "grad_norm": 0.3974984884262085, "learning_rate": 1.5731707317073173e-05, "loss": 0.0477, "step": 52500 }, { "epoch": 0.6403048780487804, "grad_norm": 0.17461349070072174, "learning_rate": 1.573130081300813e-05, "loss": 0.106, "step": 52505 }, { "epoch": 0.6403658536585366, "grad_norm": 0.5500295162200928, "learning_rate": 1.573089430894309e-05, "loss": 0.0453, "step": 52510 }, { "epoch": 0.6404268292682926, "grad_norm": 0.36207741498947144, "learning_rate": 1.573048780487805e-05, "loss": 0.0742, "step": 52515 }, { "epoch": 0.6404878048780488, "grad_norm": 0.20403996109962463, "learning_rate": 1.573008130081301e-05, "loss": 0.0343, "step": 52520 }, { "epoch": 0.6405487804878048, "grad_norm": 0.6774410009384155, "learning_rate": 1.572967479674797e-05, "loss": 0.0788, "step": 52525 }, { "epoch": 0.640609756097561, "grad_norm": 0.7826061844825745, "learning_rate": 1.572926829268293e-05, "loss": 0.0516, "step": 52530 }, { "epoch": 0.640670731707317, "grad_norm": 0.3150169849395752, "learning_rate": 1.5728861788617887e-05, "loss": 0.0547, "step": 52535 }, { "epoch": 0.6407317073170732, "grad_norm": 0.49614545702934265, "learning_rate": 1.5728455284552845e-05, "loss": 0.0486, "step": 52540 }, { "epoch": 0.6407926829268292, "grad_norm": 0.5075682401657104, "learning_rate": 1.5728048780487807e-05, "loss": 0.0565, "step": 52545 }, { "epoch": 0.6408536585365854, "grad_norm": 0.7309346199035645, "learning_rate": 1.5727642276422765e-05, "loss": 0.0641, "step": 52550 }, { "epoch": 0.6409146341463414, "grad_norm": 0.41517120599746704, "learning_rate": 1.5727235772357726e-05, "loss": 0.0517, "step": 52555 }, { "epoch": 0.6409756097560976, "grad_norm": 0.9330521821975708, "learning_rate": 1.5726829268292684e-05, "loss": 0.0634, "step": 52560 }, { "epoch": 0.6410365853658536, "grad_norm": 0.3860127329826355, "learning_rate": 1.5726422764227646e-05, "loss": 0.0336, "step": 52565 }, { "epoch": 0.6410975609756098, "grad_norm": 0.38349449634552, "learning_rate": 1.57260162601626e-05, "loss": 0.0345, "step": 52570 }, { "epoch": 0.6411585365853658, "grad_norm": 0.488099604845047, "learning_rate": 1.5725609756097562e-05, "loss": 0.0567, "step": 52575 }, { "epoch": 0.641219512195122, "grad_norm": 0.8540875911712646, "learning_rate": 1.572520325203252e-05, "loss": 0.0597, "step": 52580 }, { "epoch": 0.641280487804878, "grad_norm": 0.9493308663368225, "learning_rate": 1.5724796747967482e-05, "loss": 0.0542, "step": 52585 }, { "epoch": 0.6413414634146342, "grad_norm": 0.5517862439155579, "learning_rate": 1.572439024390244e-05, "loss": 0.05, "step": 52590 }, { "epoch": 0.6414024390243902, "grad_norm": 0.7101044654846191, "learning_rate": 1.57239837398374e-05, "loss": 0.0573, "step": 52595 }, { "epoch": 0.6414634146341464, "grad_norm": 0.6758528351783752, "learning_rate": 1.572357723577236e-05, "loss": 0.0616, "step": 52600 }, { "epoch": 0.6415243902439024, "grad_norm": 0.6602768898010254, "learning_rate": 1.5723170731707318e-05, "loss": 0.0574, "step": 52605 }, { "epoch": 0.6415853658536586, "grad_norm": 0.5617366433143616, "learning_rate": 1.572276422764228e-05, "loss": 0.0688, "step": 52610 }, { "epoch": 0.6416463414634146, "grad_norm": 1.2003329992294312, "learning_rate": 1.5722357723577237e-05, "loss": 0.0663, "step": 52615 }, { "epoch": 0.6417073170731707, "grad_norm": 0.5547628402709961, "learning_rate": 1.5721951219512195e-05, "loss": 0.0389, "step": 52620 }, { "epoch": 0.6417682926829268, "grad_norm": 0.8572755455970764, "learning_rate": 1.5721544715447157e-05, "loss": 0.0538, "step": 52625 }, { "epoch": 0.6418292682926829, "grad_norm": 0.3912191390991211, "learning_rate": 1.5721138211382115e-05, "loss": 0.0306, "step": 52630 }, { "epoch": 0.641890243902439, "grad_norm": 0.8679468631744385, "learning_rate": 1.5720731707317073e-05, "loss": 0.0619, "step": 52635 }, { "epoch": 0.6419512195121951, "grad_norm": 0.6867663860321045, "learning_rate": 1.5720325203252035e-05, "loss": 0.0985, "step": 52640 }, { "epoch": 0.6420121951219512, "grad_norm": 0.38094714283943176, "learning_rate": 1.5719918699186993e-05, "loss": 0.0324, "step": 52645 }, { "epoch": 0.6420731707317073, "grad_norm": 0.48373159766197205, "learning_rate": 1.5719512195121954e-05, "loss": 0.0341, "step": 52650 }, { "epoch": 0.6421341463414634, "grad_norm": 0.3116636276245117, "learning_rate": 1.5719105691056913e-05, "loss": 0.0494, "step": 52655 }, { "epoch": 0.6421951219512195, "grad_norm": 0.7790637612342834, "learning_rate": 1.571869918699187e-05, "loss": 0.0702, "step": 52660 }, { "epoch": 0.6422560975609756, "grad_norm": 0.30641862750053406, "learning_rate": 1.571829268292683e-05, "loss": 0.0638, "step": 52665 }, { "epoch": 0.6423170731707317, "grad_norm": 0.6870653629302979, "learning_rate": 1.571788617886179e-05, "loss": 0.0413, "step": 52670 }, { "epoch": 0.6423780487804878, "grad_norm": 0.8371071815490723, "learning_rate": 1.571747967479675e-05, "loss": 0.054, "step": 52675 }, { "epoch": 0.6424390243902439, "grad_norm": 0.34022945165634155, "learning_rate": 1.571707317073171e-05, "loss": 0.0413, "step": 52680 }, { "epoch": 0.6425, "grad_norm": 1.1959457397460938, "learning_rate": 1.5716666666666668e-05, "loss": 0.0496, "step": 52685 }, { "epoch": 0.6425609756097561, "grad_norm": 0.5830761790275574, "learning_rate": 1.5716260162601626e-05, "loss": 0.0905, "step": 52690 }, { "epoch": 0.6426219512195122, "grad_norm": 1.3316963911056519, "learning_rate": 1.5715853658536588e-05, "loss": 0.035, "step": 52695 }, { "epoch": 0.6426829268292683, "grad_norm": 2.126237630844116, "learning_rate": 1.5715447154471546e-05, "loss": 0.0976, "step": 52700 }, { "epoch": 0.6427439024390244, "grad_norm": 0.9236969351768494, "learning_rate": 1.5715040650406504e-05, "loss": 0.05, "step": 52705 }, { "epoch": 0.6428048780487805, "grad_norm": 0.534554123878479, "learning_rate": 1.5714634146341466e-05, "loss": 0.0518, "step": 52710 }, { "epoch": 0.6428658536585365, "grad_norm": 0.33817732334136963, "learning_rate": 1.5714227642276424e-05, "loss": 0.0513, "step": 52715 }, { "epoch": 0.6429268292682927, "grad_norm": 1.3856544494628906, "learning_rate": 1.5713821138211382e-05, "loss": 0.0441, "step": 52720 }, { "epoch": 0.6429878048780487, "grad_norm": 1.053725242614746, "learning_rate": 1.5713414634146343e-05, "loss": 0.0636, "step": 52725 }, { "epoch": 0.6430487804878049, "grad_norm": 0.4773673713207245, "learning_rate": 1.57130081300813e-05, "loss": 0.062, "step": 52730 }, { "epoch": 0.6431097560975609, "grad_norm": 0.6579285264015198, "learning_rate": 1.5712601626016263e-05, "loss": 0.0611, "step": 52735 }, { "epoch": 0.6431707317073171, "grad_norm": 0.5246295928955078, "learning_rate": 1.571219512195122e-05, "loss": 0.0437, "step": 52740 }, { "epoch": 0.6432317073170731, "grad_norm": 0.49238818883895874, "learning_rate": 1.5711788617886183e-05, "loss": 0.0591, "step": 52745 }, { "epoch": 0.6432926829268293, "grad_norm": 1.1419364213943481, "learning_rate": 1.5711382113821137e-05, "loss": 0.0428, "step": 52750 }, { "epoch": 0.6433536585365853, "grad_norm": 0.7191225290298462, "learning_rate": 1.57109756097561e-05, "loss": 0.0768, "step": 52755 }, { "epoch": 0.6434146341463415, "grad_norm": 0.49822092056274414, "learning_rate": 1.5710569105691057e-05, "loss": 0.0451, "step": 52760 }, { "epoch": 0.6434756097560975, "grad_norm": 0.5812797546386719, "learning_rate": 1.571016260162602e-05, "loss": 0.0606, "step": 52765 }, { "epoch": 0.6435365853658537, "grad_norm": 1.0119061470031738, "learning_rate": 1.5709756097560977e-05, "loss": 0.0553, "step": 52770 }, { "epoch": 0.6435975609756097, "grad_norm": 0.3998834192752838, "learning_rate": 1.5709349593495938e-05, "loss": 0.0396, "step": 52775 }, { "epoch": 0.6436585365853659, "grad_norm": 0.4707384705543518, "learning_rate": 1.5708943089430896e-05, "loss": 0.0736, "step": 52780 }, { "epoch": 0.6437195121951219, "grad_norm": 0.3493259847164154, "learning_rate": 1.5708536585365854e-05, "loss": 0.0615, "step": 52785 }, { "epoch": 0.6437804878048781, "grad_norm": 0.5517481565475464, "learning_rate": 1.5708130081300812e-05, "loss": 0.0609, "step": 52790 }, { "epoch": 0.6438414634146341, "grad_norm": 0.6856993436813354, "learning_rate": 1.5707723577235774e-05, "loss": 0.0663, "step": 52795 }, { "epoch": 0.6439024390243903, "grad_norm": 0.2700776755809784, "learning_rate": 1.5707317073170732e-05, "loss": 0.0535, "step": 52800 }, { "epoch": 0.6439634146341463, "grad_norm": 1.2210954427719116, "learning_rate": 1.5706910569105694e-05, "loss": 0.0604, "step": 52805 }, { "epoch": 0.6440243902439025, "grad_norm": 0.42824336886405945, "learning_rate": 1.5706504065040652e-05, "loss": 0.0647, "step": 52810 }, { "epoch": 0.6440853658536585, "grad_norm": 0.6429732441902161, "learning_rate": 1.570609756097561e-05, "loss": 0.0646, "step": 52815 }, { "epoch": 0.6441463414634147, "grad_norm": 0.7366405129432678, "learning_rate": 1.570569105691057e-05, "loss": 0.0759, "step": 52820 }, { "epoch": 0.6442073170731707, "grad_norm": 0.9895055890083313, "learning_rate": 1.570528455284553e-05, "loss": 0.0375, "step": 52825 }, { "epoch": 0.6442682926829268, "grad_norm": 0.3907622694969177, "learning_rate": 1.570487804878049e-05, "loss": 0.0395, "step": 52830 }, { "epoch": 0.6443292682926829, "grad_norm": 0.40596649050712585, "learning_rate": 1.570447154471545e-05, "loss": 0.0578, "step": 52835 }, { "epoch": 0.644390243902439, "grad_norm": 0.5116276741027832, "learning_rate": 1.5704065040650407e-05, "loss": 0.0895, "step": 52840 }, { "epoch": 0.6444512195121951, "grad_norm": 0.5221868753433228, "learning_rate": 1.5703658536585365e-05, "loss": 0.0637, "step": 52845 }, { "epoch": 0.6445121951219512, "grad_norm": 0.6399680972099304, "learning_rate": 1.5703252032520327e-05, "loss": 0.053, "step": 52850 }, { "epoch": 0.6445731707317073, "grad_norm": 0.6763759851455688, "learning_rate": 1.5702845528455285e-05, "loss": 0.0819, "step": 52855 }, { "epoch": 0.6446341463414634, "grad_norm": 0.4403519928455353, "learning_rate": 1.5702439024390247e-05, "loss": 0.0566, "step": 52860 }, { "epoch": 0.6446951219512195, "grad_norm": 0.5595464110374451, "learning_rate": 1.5702032520325205e-05, "loss": 0.0606, "step": 52865 }, { "epoch": 0.6447560975609756, "grad_norm": 0.8140273094177246, "learning_rate": 1.5701626016260163e-05, "loss": 0.0497, "step": 52870 }, { "epoch": 0.6448170731707317, "grad_norm": 0.4453791379928589, "learning_rate": 1.5701219512195124e-05, "loss": 0.0425, "step": 52875 }, { "epoch": 0.6448780487804878, "grad_norm": 0.519852876663208, "learning_rate": 1.5700813008130083e-05, "loss": 0.0447, "step": 52880 }, { "epoch": 0.6449390243902439, "grad_norm": 1.0409506559371948, "learning_rate": 1.570040650406504e-05, "loss": 0.0782, "step": 52885 }, { "epoch": 0.645, "grad_norm": 0.4949965476989746, "learning_rate": 1.5700000000000002e-05, "loss": 0.036, "step": 52890 }, { "epoch": 0.6450609756097561, "grad_norm": 0.8678846955299377, "learning_rate": 1.569959349593496e-05, "loss": 0.0322, "step": 52895 }, { "epoch": 0.6451219512195122, "grad_norm": 0.6753831505775452, "learning_rate": 1.569918699186992e-05, "loss": 0.0435, "step": 52900 }, { "epoch": 0.6451829268292683, "grad_norm": 0.7327056527137756, "learning_rate": 1.569878048780488e-05, "loss": 0.0545, "step": 52905 }, { "epoch": 0.6452439024390244, "grad_norm": 0.4068179428577423, "learning_rate": 1.5698373983739838e-05, "loss": 0.0607, "step": 52910 }, { "epoch": 0.6453048780487805, "grad_norm": 0.5003565549850464, "learning_rate": 1.56979674796748e-05, "loss": 0.0524, "step": 52915 }, { "epoch": 0.6453658536585366, "grad_norm": 0.6088694334030151, "learning_rate": 1.5697560975609758e-05, "loss": 0.0375, "step": 52920 }, { "epoch": 0.6454268292682926, "grad_norm": 0.27196723222732544, "learning_rate": 1.569715447154472e-05, "loss": 0.0413, "step": 52925 }, { "epoch": 0.6454878048780488, "grad_norm": 0.7326484322547913, "learning_rate": 1.5696747967479674e-05, "loss": 0.1102, "step": 52930 }, { "epoch": 0.6455487804878048, "grad_norm": 0.5608034133911133, "learning_rate": 1.5696341463414635e-05, "loss": 0.0413, "step": 52935 }, { "epoch": 0.645609756097561, "grad_norm": 0.4379764795303345, "learning_rate": 1.5695934959349594e-05, "loss": 0.0442, "step": 52940 }, { "epoch": 0.645670731707317, "grad_norm": 0.5036921501159668, "learning_rate": 1.5695528455284555e-05, "loss": 0.0581, "step": 52945 }, { "epoch": 0.6457317073170732, "grad_norm": 0.6767338514328003, "learning_rate": 1.5695121951219513e-05, "loss": 0.0701, "step": 52950 }, { "epoch": 0.6457926829268292, "grad_norm": 0.340035617351532, "learning_rate": 1.5694715447154475e-05, "loss": 0.0709, "step": 52955 }, { "epoch": 0.6458536585365854, "grad_norm": 1.0168613195419312, "learning_rate": 1.5694308943089433e-05, "loss": 0.0627, "step": 52960 }, { "epoch": 0.6459146341463414, "grad_norm": 0.5216514468193054, "learning_rate": 1.569390243902439e-05, "loss": 0.0663, "step": 52965 }, { "epoch": 0.6459756097560976, "grad_norm": 0.8866180777549744, "learning_rate": 1.569349593495935e-05, "loss": 0.0593, "step": 52970 }, { "epoch": 0.6460365853658536, "grad_norm": 0.4719635844230652, "learning_rate": 1.569308943089431e-05, "loss": 0.0354, "step": 52975 }, { "epoch": 0.6460975609756098, "grad_norm": 0.44722554087638855, "learning_rate": 1.569268292682927e-05, "loss": 0.0505, "step": 52980 }, { "epoch": 0.6461585365853658, "grad_norm": 0.7655324935913086, "learning_rate": 1.569227642276423e-05, "loss": 0.0477, "step": 52985 }, { "epoch": 0.646219512195122, "grad_norm": 1.0417463779449463, "learning_rate": 1.569186991869919e-05, "loss": 0.0625, "step": 52990 }, { "epoch": 0.646280487804878, "grad_norm": 0.6763619184494019, "learning_rate": 1.5691463414634147e-05, "loss": 0.0632, "step": 52995 }, { "epoch": 0.6463414634146342, "grad_norm": 1.8013194799423218, "learning_rate": 1.5691056910569108e-05, "loss": 0.0411, "step": 53000 }, { "epoch": 0.6464024390243902, "grad_norm": 0.6191869974136353, "learning_rate": 1.5690650406504066e-05, "loss": 0.0696, "step": 53005 }, { "epoch": 0.6464634146341464, "grad_norm": 0.9720760583877563, "learning_rate": 1.5690243902439028e-05, "loss": 0.052, "step": 53010 }, { "epoch": 0.6465243902439024, "grad_norm": 0.5882980227470398, "learning_rate": 1.5689837398373986e-05, "loss": 0.0314, "step": 53015 }, { "epoch": 0.6465853658536586, "grad_norm": 0.5039535164833069, "learning_rate": 1.5689430894308944e-05, "loss": 0.0754, "step": 53020 }, { "epoch": 0.6466463414634146, "grad_norm": 1.5235852003097534, "learning_rate": 1.5689024390243902e-05, "loss": 0.0708, "step": 53025 }, { "epoch": 0.6467073170731708, "grad_norm": 0.6399940848350525, "learning_rate": 1.5688617886178864e-05, "loss": 0.0558, "step": 53030 }, { "epoch": 0.6467682926829268, "grad_norm": 0.8776236176490784, "learning_rate": 1.5688211382113822e-05, "loss": 0.0693, "step": 53035 }, { "epoch": 0.646829268292683, "grad_norm": 0.996551513671875, "learning_rate": 1.5687804878048783e-05, "loss": 0.0391, "step": 53040 }, { "epoch": 0.646890243902439, "grad_norm": 0.7754421830177307, "learning_rate": 1.568739837398374e-05, "loss": 0.049, "step": 53045 }, { "epoch": 0.6469512195121951, "grad_norm": 0.5131524801254272, "learning_rate": 1.56869918699187e-05, "loss": 0.0661, "step": 53050 }, { "epoch": 0.6470121951219512, "grad_norm": 0.4755413830280304, "learning_rate": 1.5686585365853658e-05, "loss": 0.0445, "step": 53055 }, { "epoch": 0.6470731707317073, "grad_norm": 1.4110106229782104, "learning_rate": 1.568617886178862e-05, "loss": 0.0761, "step": 53060 }, { "epoch": 0.6471341463414634, "grad_norm": 1.379179835319519, "learning_rate": 1.5685772357723577e-05, "loss": 0.0399, "step": 53065 }, { "epoch": 0.6471951219512195, "grad_norm": 0.6360856890678406, "learning_rate": 1.568536585365854e-05, "loss": 0.0597, "step": 53070 }, { "epoch": 0.6472560975609756, "grad_norm": 0.8473181128501892, "learning_rate": 1.5684959349593497e-05, "loss": 0.0483, "step": 53075 }, { "epoch": 0.6473170731707317, "grad_norm": 1.185805320739746, "learning_rate": 1.5684552845528455e-05, "loss": 0.059, "step": 53080 }, { "epoch": 0.6473780487804878, "grad_norm": 0.5155017375946045, "learning_rate": 1.5684146341463417e-05, "loss": 0.0796, "step": 53085 }, { "epoch": 0.6474390243902439, "grad_norm": 0.39858514070510864, "learning_rate": 1.5683739837398375e-05, "loss": 0.0295, "step": 53090 }, { "epoch": 0.6475, "grad_norm": 0.39117738604545593, "learning_rate": 1.5683333333333336e-05, "loss": 0.0455, "step": 53095 }, { "epoch": 0.6475609756097561, "grad_norm": 0.35064154863357544, "learning_rate": 1.5682926829268294e-05, "loss": 0.0743, "step": 53100 }, { "epoch": 0.6476219512195122, "grad_norm": 0.5268722772598267, "learning_rate": 1.5682520325203256e-05, "loss": 0.0471, "step": 53105 }, { "epoch": 0.6476829268292683, "grad_norm": 0.2641597092151642, "learning_rate": 1.568211382113821e-05, "loss": 0.0653, "step": 53110 }, { "epoch": 0.6477439024390244, "grad_norm": 1.2096349000930786, "learning_rate": 1.5681707317073172e-05, "loss": 0.0611, "step": 53115 }, { "epoch": 0.6478048780487805, "grad_norm": 0.3764690160751343, "learning_rate": 1.568130081300813e-05, "loss": 0.0529, "step": 53120 }, { "epoch": 0.6478658536585366, "grad_norm": 0.5438969135284424, "learning_rate": 1.5680894308943092e-05, "loss": 0.0539, "step": 53125 }, { "epoch": 0.6479268292682927, "grad_norm": 0.42224588990211487, "learning_rate": 1.568048780487805e-05, "loss": 0.0401, "step": 53130 }, { "epoch": 0.6479878048780487, "grad_norm": 0.5987532138824463, "learning_rate": 1.568008130081301e-05, "loss": 0.0552, "step": 53135 }, { "epoch": 0.6480487804878049, "grad_norm": 1.1826133728027344, "learning_rate": 1.567967479674797e-05, "loss": 0.0654, "step": 53140 }, { "epoch": 0.6481097560975609, "grad_norm": 0.5186549425125122, "learning_rate": 1.5679268292682928e-05, "loss": 0.0468, "step": 53145 }, { "epoch": 0.6481707317073171, "grad_norm": 0.42767640948295593, "learning_rate": 1.5678861788617886e-05, "loss": 0.0493, "step": 53150 }, { "epoch": 0.6482317073170731, "grad_norm": 0.6637784838676453, "learning_rate": 1.5678455284552847e-05, "loss": 0.0456, "step": 53155 }, { "epoch": 0.6482926829268293, "grad_norm": 0.9761999249458313, "learning_rate": 1.5678048780487805e-05, "loss": 0.0525, "step": 53160 }, { "epoch": 0.6483536585365853, "grad_norm": 0.6611744165420532, "learning_rate": 1.5677642276422767e-05, "loss": 0.071, "step": 53165 }, { "epoch": 0.6484146341463415, "grad_norm": 0.47354191541671753, "learning_rate": 1.5677235772357725e-05, "loss": 0.0545, "step": 53170 }, { "epoch": 0.6484756097560975, "grad_norm": 0.6669202446937561, "learning_rate": 1.5676829268292683e-05, "loss": 0.0834, "step": 53175 }, { "epoch": 0.6485365853658537, "grad_norm": 0.932447612285614, "learning_rate": 1.5676422764227645e-05, "loss": 0.0765, "step": 53180 }, { "epoch": 0.6485975609756097, "grad_norm": 0.7443825006484985, "learning_rate": 1.5676016260162603e-05, "loss": 0.0513, "step": 53185 }, { "epoch": 0.6486585365853659, "grad_norm": 0.5049402713775635, "learning_rate": 1.5675609756097564e-05, "loss": 0.0369, "step": 53190 }, { "epoch": 0.6487195121951219, "grad_norm": 0.6508355140686035, "learning_rate": 1.5675203252032522e-05, "loss": 0.0801, "step": 53195 }, { "epoch": 0.6487804878048781, "grad_norm": 0.5788078308105469, "learning_rate": 1.567479674796748e-05, "loss": 0.0868, "step": 53200 }, { "epoch": 0.6488414634146341, "grad_norm": 0.44510021805763245, "learning_rate": 1.567439024390244e-05, "loss": 0.0491, "step": 53205 }, { "epoch": 0.6489024390243903, "grad_norm": 0.47877392172813416, "learning_rate": 1.56739837398374e-05, "loss": 0.0908, "step": 53210 }, { "epoch": 0.6489634146341463, "grad_norm": 0.5414971709251404, "learning_rate": 1.567357723577236e-05, "loss": 0.077, "step": 53215 }, { "epoch": 0.6490243902439025, "grad_norm": 0.34193575382232666, "learning_rate": 1.567317073170732e-05, "loss": 0.0472, "step": 53220 }, { "epoch": 0.6490853658536585, "grad_norm": 0.41491612792015076, "learning_rate": 1.5672764227642278e-05, "loss": 0.0679, "step": 53225 }, { "epoch": 0.6491463414634147, "grad_norm": 0.3725599944591522, "learning_rate": 1.5672357723577236e-05, "loss": 0.0613, "step": 53230 }, { "epoch": 0.6492073170731707, "grad_norm": 0.7225468158721924, "learning_rate": 1.5671951219512194e-05, "loss": 0.0915, "step": 53235 }, { "epoch": 0.6492682926829269, "grad_norm": 1.1216880083084106, "learning_rate": 1.5671544715447156e-05, "loss": 0.0701, "step": 53240 }, { "epoch": 0.6493292682926829, "grad_norm": 0.6243299245834351, "learning_rate": 1.5671138211382114e-05, "loss": 0.0477, "step": 53245 }, { "epoch": 0.649390243902439, "grad_norm": 0.7441946268081665, "learning_rate": 1.5670731707317075e-05, "loss": 0.048, "step": 53250 }, { "epoch": 0.6494512195121951, "grad_norm": 0.47777020931243896, "learning_rate": 1.5670325203252034e-05, "loss": 0.0456, "step": 53255 }, { "epoch": 0.6495121951219512, "grad_norm": 0.3926624655723572, "learning_rate": 1.566991869918699e-05, "loss": 0.0559, "step": 53260 }, { "epoch": 0.6495731707317073, "grad_norm": 0.46545544266700745, "learning_rate": 1.5669512195121953e-05, "loss": 0.0379, "step": 53265 }, { "epoch": 0.6496341463414634, "grad_norm": 0.4676022231578827, "learning_rate": 1.566910569105691e-05, "loss": 0.0558, "step": 53270 }, { "epoch": 0.6496951219512195, "grad_norm": 0.34498995542526245, "learning_rate": 1.5668699186991873e-05, "loss": 0.0562, "step": 53275 }, { "epoch": 0.6497560975609756, "grad_norm": 0.4815455973148346, "learning_rate": 1.566829268292683e-05, "loss": 0.0511, "step": 53280 }, { "epoch": 0.6498170731707317, "grad_norm": 0.6487461924552917, "learning_rate": 1.5667886178861792e-05, "loss": 0.0681, "step": 53285 }, { "epoch": 0.6498780487804878, "grad_norm": 0.6555004119873047, "learning_rate": 1.5667479674796747e-05, "loss": 0.0638, "step": 53290 }, { "epoch": 0.6499390243902439, "grad_norm": 0.45938801765441895, "learning_rate": 1.566707317073171e-05, "loss": 0.0366, "step": 53295 }, { "epoch": 0.65, "grad_norm": 0.6041575074195862, "learning_rate": 1.5666666666666667e-05, "loss": 0.0498, "step": 53300 }, { "epoch": 0.6500609756097561, "grad_norm": 0.5474935173988342, "learning_rate": 1.566626016260163e-05, "loss": 0.0703, "step": 53305 }, { "epoch": 0.6501219512195122, "grad_norm": 0.5560622215270996, "learning_rate": 1.5665853658536587e-05, "loss": 0.0634, "step": 53310 }, { "epoch": 0.6501829268292683, "grad_norm": 0.7975541353225708, "learning_rate": 1.5665447154471548e-05, "loss": 0.0739, "step": 53315 }, { "epoch": 0.6502439024390244, "grad_norm": 0.6805651783943176, "learning_rate": 1.5665040650406503e-05, "loss": 0.0581, "step": 53320 }, { "epoch": 0.6503048780487805, "grad_norm": 0.4528098702430725, "learning_rate": 1.5664634146341464e-05, "loss": 0.0623, "step": 53325 }, { "epoch": 0.6503658536585366, "grad_norm": 0.5639137029647827, "learning_rate": 1.5664227642276422e-05, "loss": 0.0429, "step": 53330 }, { "epoch": 0.6504268292682926, "grad_norm": 0.5775586366653442, "learning_rate": 1.5663821138211384e-05, "loss": 0.057, "step": 53335 }, { "epoch": 0.6504878048780488, "grad_norm": 0.8439855575561523, "learning_rate": 1.5663414634146342e-05, "loss": 0.0706, "step": 53340 }, { "epoch": 0.6505487804878048, "grad_norm": 0.7112733125686646, "learning_rate": 1.5663008130081304e-05, "loss": 0.0443, "step": 53345 }, { "epoch": 0.650609756097561, "grad_norm": 0.4798944592475891, "learning_rate": 1.5662601626016262e-05, "loss": 0.0778, "step": 53350 }, { "epoch": 0.650670731707317, "grad_norm": 0.7934032678604126, "learning_rate": 1.566219512195122e-05, "loss": 0.062, "step": 53355 }, { "epoch": 0.6507317073170732, "grad_norm": 0.4671638309955597, "learning_rate": 1.566178861788618e-05, "loss": 0.0654, "step": 53360 }, { "epoch": 0.6507926829268292, "grad_norm": 0.31390923261642456, "learning_rate": 1.566138211382114e-05, "loss": 0.0601, "step": 53365 }, { "epoch": 0.6508536585365854, "grad_norm": 0.7938957810401917, "learning_rate": 1.56609756097561e-05, "loss": 0.064, "step": 53370 }, { "epoch": 0.6509146341463414, "grad_norm": 0.9760705828666687, "learning_rate": 1.566056910569106e-05, "loss": 0.0369, "step": 53375 }, { "epoch": 0.6509756097560976, "grad_norm": 0.5659485459327698, "learning_rate": 1.5660162601626017e-05, "loss": 0.0461, "step": 53380 }, { "epoch": 0.6510365853658536, "grad_norm": 0.20923791825771332, "learning_rate": 1.5659756097560975e-05, "loss": 0.0613, "step": 53385 }, { "epoch": 0.6510975609756098, "grad_norm": 1.1103260517120361, "learning_rate": 1.5659349593495937e-05, "loss": 0.0812, "step": 53390 }, { "epoch": 0.6511585365853658, "grad_norm": 0.6549415588378906, "learning_rate": 1.5658943089430895e-05, "loss": 0.0654, "step": 53395 }, { "epoch": 0.651219512195122, "grad_norm": 0.7619310617446899, "learning_rate": 1.5658536585365857e-05, "loss": 0.0571, "step": 53400 }, { "epoch": 0.651280487804878, "grad_norm": 0.9498670697212219, "learning_rate": 1.5658130081300815e-05, "loss": 0.0679, "step": 53405 }, { "epoch": 0.6513414634146342, "grad_norm": 0.4796530604362488, "learning_rate": 1.5657723577235773e-05, "loss": 0.0504, "step": 53410 }, { "epoch": 0.6514024390243902, "grad_norm": 0.29505470395088196, "learning_rate": 1.565731707317073e-05, "loss": 0.036, "step": 53415 }, { "epoch": 0.6514634146341464, "grad_norm": 0.9273099303245544, "learning_rate": 1.5656910569105692e-05, "loss": 0.0465, "step": 53420 }, { "epoch": 0.6515243902439024, "grad_norm": 0.6885269284248352, "learning_rate": 1.565650406504065e-05, "loss": 0.0582, "step": 53425 }, { "epoch": 0.6515853658536586, "grad_norm": 0.6065188646316528, "learning_rate": 1.5656097560975612e-05, "loss": 0.0585, "step": 53430 }, { "epoch": 0.6516463414634146, "grad_norm": 0.41502678394317627, "learning_rate": 1.565569105691057e-05, "loss": 0.0774, "step": 53435 }, { "epoch": 0.6517073170731708, "grad_norm": 0.5482349395751953, "learning_rate": 1.565528455284553e-05, "loss": 0.0417, "step": 53440 }, { "epoch": 0.6517682926829268, "grad_norm": 0.10484378039836884, "learning_rate": 1.565487804878049e-05, "loss": 0.0259, "step": 53445 }, { "epoch": 0.651829268292683, "grad_norm": 0.7196028232574463, "learning_rate": 1.5654471544715448e-05, "loss": 0.0724, "step": 53450 }, { "epoch": 0.651890243902439, "grad_norm": 1.5759645700454712, "learning_rate": 1.565406504065041e-05, "loss": 0.0338, "step": 53455 }, { "epoch": 0.6519512195121951, "grad_norm": 0.23553155362606049, "learning_rate": 1.5653658536585368e-05, "loss": 0.042, "step": 53460 }, { "epoch": 0.6520121951219512, "grad_norm": 1.005803108215332, "learning_rate": 1.5653252032520326e-05, "loss": 0.0497, "step": 53465 }, { "epoch": 0.6520731707317073, "grad_norm": 0.8024439811706543, "learning_rate": 1.5652845528455284e-05, "loss": 0.0551, "step": 53470 }, { "epoch": 0.6521341463414634, "grad_norm": 1.6834185123443604, "learning_rate": 1.5652439024390245e-05, "loss": 0.0665, "step": 53475 }, { "epoch": 0.6521951219512195, "grad_norm": 0.5442401766777039, "learning_rate": 1.5652032520325204e-05, "loss": 0.1079, "step": 53480 }, { "epoch": 0.6522560975609756, "grad_norm": 0.4040186405181885, "learning_rate": 1.5651626016260165e-05, "loss": 0.0547, "step": 53485 }, { "epoch": 0.6523170731707317, "grad_norm": 0.6243391633033752, "learning_rate": 1.5651219512195123e-05, "loss": 0.0646, "step": 53490 }, { "epoch": 0.6523780487804878, "grad_norm": 0.23644588887691498, "learning_rate": 1.5650813008130085e-05, "loss": 0.0383, "step": 53495 }, { "epoch": 0.6524390243902439, "grad_norm": 0.4502129852771759, "learning_rate": 1.565040650406504e-05, "loss": 0.0474, "step": 53500 }, { "epoch": 0.6525, "grad_norm": 0.6671398282051086, "learning_rate": 1.565e-05, "loss": 0.0402, "step": 53505 }, { "epoch": 0.6525609756097561, "grad_norm": 0.47142505645751953, "learning_rate": 1.564959349593496e-05, "loss": 0.0443, "step": 53510 }, { "epoch": 0.6526219512195122, "grad_norm": 0.6729686260223389, "learning_rate": 1.564918699186992e-05, "loss": 0.0404, "step": 53515 }, { "epoch": 0.6526829268292683, "grad_norm": 0.2694218158721924, "learning_rate": 1.564878048780488e-05, "loss": 0.0652, "step": 53520 }, { "epoch": 0.6527439024390244, "grad_norm": 0.7494012713432312, "learning_rate": 1.564837398373984e-05, "loss": 0.0482, "step": 53525 }, { "epoch": 0.6528048780487805, "grad_norm": 0.33763885498046875, "learning_rate": 1.56479674796748e-05, "loss": 0.0342, "step": 53530 }, { "epoch": 0.6528658536585366, "grad_norm": 0.7835627198219299, "learning_rate": 1.5647560975609756e-05, "loss": 0.0472, "step": 53535 }, { "epoch": 0.6529268292682927, "grad_norm": 1.0477181673049927, "learning_rate": 1.5647154471544718e-05, "loss": 0.1231, "step": 53540 }, { "epoch": 0.6529878048780487, "grad_norm": 0.5157834887504578, "learning_rate": 1.5646747967479676e-05, "loss": 0.067, "step": 53545 }, { "epoch": 0.6530487804878049, "grad_norm": 0.7686808109283447, "learning_rate": 1.5646341463414638e-05, "loss": 0.0748, "step": 53550 }, { "epoch": 0.653109756097561, "grad_norm": 0.5071441531181335, "learning_rate": 1.5645934959349596e-05, "loss": 0.0387, "step": 53555 }, { "epoch": 0.6531707317073171, "grad_norm": 1.3156330585479736, "learning_rate": 1.5645528455284554e-05, "loss": 0.0787, "step": 53560 }, { "epoch": 0.6532317073170731, "grad_norm": 1.3492199182510376, "learning_rate": 1.5645121951219512e-05, "loss": 0.0678, "step": 53565 }, { "epoch": 0.6532926829268293, "grad_norm": 7.793096542358398, "learning_rate": 1.5644715447154474e-05, "loss": 0.0975, "step": 53570 }, { "epoch": 0.6533536585365853, "grad_norm": 0.4335111677646637, "learning_rate": 1.564430894308943e-05, "loss": 0.0449, "step": 53575 }, { "epoch": 0.6534146341463415, "grad_norm": 0.7539817690849304, "learning_rate": 1.5643902439024393e-05, "loss": 0.0699, "step": 53580 }, { "epoch": 0.6534756097560975, "grad_norm": 0.3041202425956726, "learning_rate": 1.564349593495935e-05, "loss": 0.0317, "step": 53585 }, { "epoch": 0.6535365853658537, "grad_norm": 0.2925401031970978, "learning_rate": 1.564308943089431e-05, "loss": 0.095, "step": 53590 }, { "epoch": 0.6535975609756097, "grad_norm": 0.4960578382015228, "learning_rate": 1.5642682926829268e-05, "loss": 0.0528, "step": 53595 }, { "epoch": 0.6536585365853659, "grad_norm": 1.4741135835647583, "learning_rate": 1.564227642276423e-05, "loss": 0.0468, "step": 53600 }, { "epoch": 0.6537195121951219, "grad_norm": 0.478507399559021, "learning_rate": 1.5641869918699187e-05, "loss": 0.0454, "step": 53605 }, { "epoch": 0.6537804878048781, "grad_norm": 0.698228657245636, "learning_rate": 1.564146341463415e-05, "loss": 0.0685, "step": 53610 }, { "epoch": 0.6538414634146341, "grad_norm": 0.26015153527259827, "learning_rate": 1.5641056910569107e-05, "loss": 0.0424, "step": 53615 }, { "epoch": 0.6539024390243903, "grad_norm": 0.416440486907959, "learning_rate": 1.5640650406504065e-05, "loss": 0.0616, "step": 53620 }, { "epoch": 0.6539634146341463, "grad_norm": 0.4228372871875763, "learning_rate": 1.5640243902439026e-05, "loss": 0.046, "step": 53625 }, { "epoch": 0.6540243902439025, "grad_norm": 0.6259408593177795, "learning_rate": 1.5639837398373985e-05, "loss": 0.0631, "step": 53630 }, { "epoch": 0.6540853658536585, "grad_norm": 0.7229248285293579, "learning_rate": 1.5639430894308946e-05, "loss": 0.0814, "step": 53635 }, { "epoch": 0.6541463414634147, "grad_norm": 0.642014741897583, "learning_rate": 1.5639024390243904e-05, "loss": 0.0405, "step": 53640 }, { "epoch": 0.6542073170731707, "grad_norm": 0.35074731707572937, "learning_rate": 1.5638617886178862e-05, "loss": 0.067, "step": 53645 }, { "epoch": 0.6542682926829269, "grad_norm": 0.7919930219650269, "learning_rate": 1.563821138211382e-05, "loss": 0.0706, "step": 53650 }, { "epoch": 0.6543292682926829, "grad_norm": 0.7045904994010925, "learning_rate": 1.5637804878048782e-05, "loss": 0.0503, "step": 53655 }, { "epoch": 0.654390243902439, "grad_norm": 0.5501507520675659, "learning_rate": 1.563739837398374e-05, "loss": 0.0832, "step": 53660 }, { "epoch": 0.6544512195121951, "grad_norm": 0.6853204965591431, "learning_rate": 1.56369918699187e-05, "loss": 0.0668, "step": 53665 }, { "epoch": 0.6545121951219512, "grad_norm": 0.5678572654724121, "learning_rate": 1.563658536585366e-05, "loss": 0.0335, "step": 53670 }, { "epoch": 0.6545731707317073, "grad_norm": 0.6030398011207581, "learning_rate": 1.563617886178862e-05, "loss": 0.08, "step": 53675 }, { "epoch": 0.6546341463414634, "grad_norm": 0.8199453949928284, "learning_rate": 1.5635772357723576e-05, "loss": 0.0363, "step": 53680 }, { "epoch": 0.6546951219512195, "grad_norm": 0.7262534499168396, "learning_rate": 1.5635365853658538e-05, "loss": 0.0653, "step": 53685 }, { "epoch": 0.6547560975609756, "grad_norm": 0.43467646837234497, "learning_rate": 1.5634959349593496e-05, "loss": 0.0484, "step": 53690 }, { "epoch": 0.6548170731707317, "grad_norm": 0.38085460662841797, "learning_rate": 1.5634552845528457e-05, "loss": 0.0574, "step": 53695 }, { "epoch": 0.6548780487804878, "grad_norm": 0.4422358274459839, "learning_rate": 1.5634146341463415e-05, "loss": 0.0578, "step": 53700 }, { "epoch": 0.6549390243902439, "grad_norm": 1.0197423696517944, "learning_rate": 1.5633739837398377e-05, "loss": 0.0324, "step": 53705 }, { "epoch": 0.655, "grad_norm": 0.5552980899810791, "learning_rate": 1.5633333333333335e-05, "loss": 0.0325, "step": 53710 }, { "epoch": 0.6550609756097561, "grad_norm": 0.3426799476146698, "learning_rate": 1.5632926829268293e-05, "loss": 0.054, "step": 53715 }, { "epoch": 0.6551219512195122, "grad_norm": 0.38574373722076416, "learning_rate": 1.5632520325203255e-05, "loss": 0.0521, "step": 53720 }, { "epoch": 0.6551829268292683, "grad_norm": 0.5057094097137451, "learning_rate": 1.5632113821138213e-05, "loss": 0.0488, "step": 53725 }, { "epoch": 0.6552439024390244, "grad_norm": 1.1246657371520996, "learning_rate": 1.563170731707317e-05, "loss": 0.0506, "step": 53730 }, { "epoch": 0.6553048780487805, "grad_norm": 0.6355334520339966, "learning_rate": 1.5631300813008132e-05, "loss": 0.043, "step": 53735 }, { "epoch": 0.6553658536585366, "grad_norm": 0.5186790823936462, "learning_rate": 1.563089430894309e-05, "loss": 0.0399, "step": 53740 }, { "epoch": 0.6554268292682927, "grad_norm": 0.9522067904472351, "learning_rate": 1.563048780487805e-05, "loss": 0.0576, "step": 53745 }, { "epoch": 0.6554878048780488, "grad_norm": 0.3820675015449524, "learning_rate": 1.563008130081301e-05, "loss": 0.06, "step": 53750 }, { "epoch": 0.6555487804878048, "grad_norm": 0.47116193175315857, "learning_rate": 1.5629674796747968e-05, "loss": 0.0477, "step": 53755 }, { "epoch": 0.655609756097561, "grad_norm": 0.6081336736679077, "learning_rate": 1.562926829268293e-05, "loss": 0.0681, "step": 53760 }, { "epoch": 0.655670731707317, "grad_norm": 0.825851559638977, "learning_rate": 1.5628861788617888e-05, "loss": 0.0552, "step": 53765 }, { "epoch": 0.6557317073170732, "grad_norm": 0.7577455639839172, "learning_rate": 1.5628455284552846e-05, "loss": 0.0524, "step": 53770 }, { "epoch": 0.6557926829268292, "grad_norm": 0.5077236294746399, "learning_rate": 1.5628048780487804e-05, "loss": 0.0398, "step": 53775 }, { "epoch": 0.6558536585365854, "grad_norm": 1.3246970176696777, "learning_rate": 1.5627642276422766e-05, "loss": 0.0695, "step": 53780 }, { "epoch": 0.6559146341463414, "grad_norm": 0.7224046587944031, "learning_rate": 1.5627235772357724e-05, "loss": 0.062, "step": 53785 }, { "epoch": 0.6559756097560976, "grad_norm": 0.2968739867210388, "learning_rate": 1.5626829268292685e-05, "loss": 0.0444, "step": 53790 }, { "epoch": 0.6560365853658536, "grad_norm": 0.6168208718299866, "learning_rate": 1.5626422764227643e-05, "loss": 0.053, "step": 53795 }, { "epoch": 0.6560975609756098, "grad_norm": 0.8109990954399109, "learning_rate": 1.56260162601626e-05, "loss": 0.0368, "step": 53800 }, { "epoch": 0.6561585365853658, "grad_norm": 0.6874208450317383, "learning_rate": 1.5625609756097563e-05, "loss": 0.0302, "step": 53805 }, { "epoch": 0.656219512195122, "grad_norm": 0.5717962384223938, "learning_rate": 1.562520325203252e-05, "loss": 0.0732, "step": 53810 }, { "epoch": 0.656280487804878, "grad_norm": 0.5833133459091187, "learning_rate": 1.5624796747967483e-05, "loss": 0.0669, "step": 53815 }, { "epoch": 0.6563414634146342, "grad_norm": 0.462544322013855, "learning_rate": 1.562439024390244e-05, "loss": 0.0396, "step": 53820 }, { "epoch": 0.6564024390243902, "grad_norm": 0.8396281599998474, "learning_rate": 1.56239837398374e-05, "loss": 0.0456, "step": 53825 }, { "epoch": 0.6564634146341464, "grad_norm": 0.8219467997550964, "learning_rate": 1.5623577235772357e-05, "loss": 0.0454, "step": 53830 }, { "epoch": 0.6565243902439024, "grad_norm": 0.7914717197418213, "learning_rate": 1.562317073170732e-05, "loss": 0.0454, "step": 53835 }, { "epoch": 0.6565853658536586, "grad_norm": 0.7410051226615906, "learning_rate": 1.5622764227642277e-05, "loss": 0.0382, "step": 53840 }, { "epoch": 0.6566463414634146, "grad_norm": 2.2547547817230225, "learning_rate": 1.562235772357724e-05, "loss": 0.0769, "step": 53845 }, { "epoch": 0.6567073170731708, "grad_norm": 0.6306477189064026, "learning_rate": 1.5621951219512196e-05, "loss": 0.0456, "step": 53850 }, { "epoch": 0.6567682926829268, "grad_norm": 0.1587187796831131, "learning_rate": 1.5621544715447158e-05, "loss": 0.0737, "step": 53855 }, { "epoch": 0.656829268292683, "grad_norm": 0.7352002859115601, "learning_rate": 1.5621138211382113e-05, "loss": 0.058, "step": 53860 }, { "epoch": 0.656890243902439, "grad_norm": 0.5792887806892395, "learning_rate": 1.5620731707317074e-05, "loss": 0.0694, "step": 53865 }, { "epoch": 0.6569512195121952, "grad_norm": 1.1176210641860962, "learning_rate": 1.5620325203252032e-05, "loss": 0.0426, "step": 53870 }, { "epoch": 0.6570121951219512, "grad_norm": 0.4776792824268341, "learning_rate": 1.5619918699186994e-05, "loss": 0.0519, "step": 53875 }, { "epoch": 0.6570731707317073, "grad_norm": 0.7124341130256653, "learning_rate": 1.5619512195121952e-05, "loss": 0.0664, "step": 53880 }, { "epoch": 0.6571341463414634, "grad_norm": 0.27851811051368713, "learning_rate": 1.5619105691056914e-05, "loss": 0.0347, "step": 53885 }, { "epoch": 0.6571951219512195, "grad_norm": 2.5650787353515625, "learning_rate": 1.561869918699187e-05, "loss": 0.0664, "step": 53890 }, { "epoch": 0.6572560975609756, "grad_norm": 0.5833896398544312, "learning_rate": 1.561829268292683e-05, "loss": 0.0434, "step": 53895 }, { "epoch": 0.6573170731707317, "grad_norm": 0.43671202659606934, "learning_rate": 1.561788617886179e-05, "loss": 0.0535, "step": 53900 }, { "epoch": 0.6573780487804878, "grad_norm": 0.6067839860916138, "learning_rate": 1.561747967479675e-05, "loss": 0.0927, "step": 53905 }, { "epoch": 0.6574390243902439, "grad_norm": 0.5726041793823242, "learning_rate": 1.5617073170731708e-05, "loss": 0.0712, "step": 53910 }, { "epoch": 0.6575, "grad_norm": 1.0446226596832275, "learning_rate": 1.561666666666667e-05, "loss": 0.0776, "step": 53915 }, { "epoch": 0.6575609756097561, "grad_norm": 0.47694826126098633, "learning_rate": 1.5616260162601627e-05, "loss": 0.0718, "step": 53920 }, { "epoch": 0.6576219512195122, "grad_norm": 2.0740115642547607, "learning_rate": 1.5615853658536585e-05, "loss": 0.0533, "step": 53925 }, { "epoch": 0.6576829268292683, "grad_norm": 0.49320322275161743, "learning_rate": 1.5615447154471547e-05, "loss": 0.0514, "step": 53930 }, { "epoch": 0.6577439024390244, "grad_norm": 0.25944289565086365, "learning_rate": 1.5615040650406505e-05, "loss": 0.0413, "step": 53935 }, { "epoch": 0.6578048780487805, "grad_norm": 0.2975078821182251, "learning_rate": 1.5614634146341466e-05, "loss": 0.0254, "step": 53940 }, { "epoch": 0.6578658536585366, "grad_norm": 0.25749441981315613, "learning_rate": 1.5614227642276425e-05, "loss": 0.0295, "step": 53945 }, { "epoch": 0.6579268292682927, "grad_norm": 0.9518334865570068, "learning_rate": 1.5613821138211383e-05, "loss": 0.0657, "step": 53950 }, { "epoch": 0.6579878048780488, "grad_norm": 1.400471806526184, "learning_rate": 1.561341463414634e-05, "loss": 0.0614, "step": 53955 }, { "epoch": 0.6580487804878049, "grad_norm": 0.44906508922576904, "learning_rate": 1.5613008130081302e-05, "loss": 0.0496, "step": 53960 }, { "epoch": 0.658109756097561, "grad_norm": 0.7223321795463562, "learning_rate": 1.561260162601626e-05, "loss": 0.0452, "step": 53965 }, { "epoch": 0.6581707317073171, "grad_norm": 0.6560736894607544, "learning_rate": 1.5612195121951222e-05, "loss": 0.0623, "step": 53970 }, { "epoch": 0.6582317073170731, "grad_norm": 0.924930989742279, "learning_rate": 1.561178861788618e-05, "loss": 0.0751, "step": 53975 }, { "epoch": 0.6582926829268293, "grad_norm": 0.8334946632385254, "learning_rate": 1.5611382113821138e-05, "loss": 0.0546, "step": 53980 }, { "epoch": 0.6583536585365853, "grad_norm": 1.1486139297485352, "learning_rate": 1.56109756097561e-05, "loss": 0.0775, "step": 53985 }, { "epoch": 0.6584146341463415, "grad_norm": 0.5240584015846252, "learning_rate": 1.5610569105691058e-05, "loss": 0.0535, "step": 53990 }, { "epoch": 0.6584756097560975, "grad_norm": 0.7481920123100281, "learning_rate": 1.5610162601626016e-05, "loss": 0.0448, "step": 53995 }, { "epoch": 0.6585365853658537, "grad_norm": 0.6103417873382568, "learning_rate": 1.5609756097560978e-05, "loss": 0.0392, "step": 54000 }, { "epoch": 0.6585975609756097, "grad_norm": 0.5869116187095642, "learning_rate": 1.5609349593495936e-05, "loss": 0.0391, "step": 54005 }, { "epoch": 0.6586585365853659, "grad_norm": 0.39169546961784363, "learning_rate": 1.5608943089430894e-05, "loss": 0.0535, "step": 54010 }, { "epoch": 0.6587195121951219, "grad_norm": 0.3174735903739929, "learning_rate": 1.5608536585365855e-05, "loss": 0.0283, "step": 54015 }, { "epoch": 0.6587804878048781, "grad_norm": 0.6621841192245483, "learning_rate": 1.5608130081300813e-05, "loss": 0.0488, "step": 54020 }, { "epoch": 0.6588414634146341, "grad_norm": 1.2884527444839478, "learning_rate": 1.5607723577235775e-05, "loss": 0.0431, "step": 54025 }, { "epoch": 0.6589024390243903, "grad_norm": 0.16608881950378418, "learning_rate": 1.5607317073170733e-05, "loss": 0.0297, "step": 54030 }, { "epoch": 0.6589634146341463, "grad_norm": 0.5357347726821899, "learning_rate": 1.5606910569105695e-05, "loss": 0.0411, "step": 54035 }, { "epoch": 0.6590243902439025, "grad_norm": 0.863267183303833, "learning_rate": 1.560650406504065e-05, "loss": 0.0849, "step": 54040 }, { "epoch": 0.6590853658536585, "grad_norm": 0.540785014629364, "learning_rate": 1.560609756097561e-05, "loss": 0.0465, "step": 54045 }, { "epoch": 0.6591463414634147, "grad_norm": 0.7796725034713745, "learning_rate": 1.560569105691057e-05, "loss": 0.0682, "step": 54050 }, { "epoch": 0.6592073170731707, "grad_norm": 0.41137489676475525, "learning_rate": 1.560528455284553e-05, "loss": 0.0456, "step": 54055 }, { "epoch": 0.6592682926829269, "grad_norm": 0.5571985244750977, "learning_rate": 1.560487804878049e-05, "loss": 0.057, "step": 54060 }, { "epoch": 0.6593292682926829, "grad_norm": 0.3705780506134033, "learning_rate": 1.560447154471545e-05, "loss": 0.0492, "step": 54065 }, { "epoch": 0.659390243902439, "grad_norm": 1.177132248878479, "learning_rate": 1.5604065040650408e-05, "loss": 0.0887, "step": 54070 }, { "epoch": 0.6594512195121951, "grad_norm": 0.41149938106536865, "learning_rate": 1.5603658536585366e-05, "loss": 0.0403, "step": 54075 }, { "epoch": 0.6595121951219513, "grad_norm": 0.505867600440979, "learning_rate": 1.5603252032520328e-05, "loss": 0.0378, "step": 54080 }, { "epoch": 0.6595731707317073, "grad_norm": 0.5520470142364502, "learning_rate": 1.5602845528455286e-05, "loss": 0.0491, "step": 54085 }, { "epoch": 0.6596341463414634, "grad_norm": 2.0214314460754395, "learning_rate": 1.5602439024390244e-05, "loss": 0.0375, "step": 54090 }, { "epoch": 0.6596951219512195, "grad_norm": 0.5935317873954773, "learning_rate": 1.5602032520325206e-05, "loss": 0.0436, "step": 54095 }, { "epoch": 0.6597560975609756, "grad_norm": 0.7512615323066711, "learning_rate": 1.5601626016260164e-05, "loss": 0.061, "step": 54100 }, { "epoch": 0.6598170731707317, "grad_norm": 0.6436501741409302, "learning_rate": 1.5601219512195122e-05, "loss": 0.0907, "step": 54105 }, { "epoch": 0.6598780487804878, "grad_norm": 0.7877206802368164, "learning_rate": 1.5600813008130083e-05, "loss": 0.0742, "step": 54110 }, { "epoch": 0.6599390243902439, "grad_norm": 0.7416594624519348, "learning_rate": 1.560040650406504e-05, "loss": 0.0537, "step": 54115 }, { "epoch": 0.66, "grad_norm": 0.6109095811843872, "learning_rate": 1.5600000000000003e-05, "loss": 0.0295, "step": 54120 }, { "epoch": 0.6600609756097561, "grad_norm": 0.655674934387207, "learning_rate": 1.559959349593496e-05, "loss": 0.038, "step": 54125 }, { "epoch": 0.6601219512195122, "grad_norm": 0.5542863011360168, "learning_rate": 1.559918699186992e-05, "loss": 0.055, "step": 54130 }, { "epoch": 0.6601829268292683, "grad_norm": 0.4317356050014496, "learning_rate": 1.5598780487804877e-05, "loss": 0.0659, "step": 54135 }, { "epoch": 0.6602439024390244, "grad_norm": 0.4784597158432007, "learning_rate": 1.559837398373984e-05, "loss": 0.0445, "step": 54140 }, { "epoch": 0.6603048780487805, "grad_norm": 0.5701896548271179, "learning_rate": 1.5597967479674797e-05, "loss": 0.0379, "step": 54145 }, { "epoch": 0.6603658536585366, "grad_norm": 1.4327232837677002, "learning_rate": 1.559756097560976e-05, "loss": 0.0823, "step": 54150 }, { "epoch": 0.6604268292682927, "grad_norm": 0.7403857707977295, "learning_rate": 1.5597154471544717e-05, "loss": 0.1222, "step": 54155 }, { "epoch": 0.6604878048780488, "grad_norm": 0.8955525159835815, "learning_rate": 1.5596747967479675e-05, "loss": 0.0623, "step": 54160 }, { "epoch": 0.6605487804878049, "grad_norm": 0.6435827612876892, "learning_rate": 1.5596341463414636e-05, "loss": 0.0526, "step": 54165 }, { "epoch": 0.660609756097561, "grad_norm": 0.8625323176383972, "learning_rate": 1.5595934959349595e-05, "loss": 0.0498, "step": 54170 }, { "epoch": 0.660670731707317, "grad_norm": 1.7698445320129395, "learning_rate": 1.5595528455284553e-05, "loss": 0.0746, "step": 54175 }, { "epoch": 0.6607317073170732, "grad_norm": 0.7033355832099915, "learning_rate": 1.5595121951219514e-05, "loss": 0.0889, "step": 54180 }, { "epoch": 0.6607926829268292, "grad_norm": 0.6830714344978333, "learning_rate": 1.5594715447154472e-05, "loss": 0.047, "step": 54185 }, { "epoch": 0.6608536585365854, "grad_norm": 0.7159563302993774, "learning_rate": 1.559430894308943e-05, "loss": 0.0472, "step": 54190 }, { "epoch": 0.6609146341463414, "grad_norm": 0.6140584945678711, "learning_rate": 1.5593902439024392e-05, "loss": 0.0599, "step": 54195 }, { "epoch": 0.6609756097560976, "grad_norm": 0.6598969101905823, "learning_rate": 1.559349593495935e-05, "loss": 0.0393, "step": 54200 }, { "epoch": 0.6610365853658536, "grad_norm": 3.0835344791412354, "learning_rate": 1.559308943089431e-05, "loss": 0.0354, "step": 54205 }, { "epoch": 0.6610975609756098, "grad_norm": 0.469430536031723, "learning_rate": 1.559268292682927e-05, "loss": 0.0606, "step": 54210 }, { "epoch": 0.6611585365853658, "grad_norm": 0.3929600715637207, "learning_rate": 1.559227642276423e-05, "loss": 0.0484, "step": 54215 }, { "epoch": 0.661219512195122, "grad_norm": 0.6548556089401245, "learning_rate": 1.5591869918699186e-05, "loss": 0.0399, "step": 54220 }, { "epoch": 0.661280487804878, "grad_norm": 0.4812262952327728, "learning_rate": 1.5591463414634148e-05, "loss": 0.0565, "step": 54225 }, { "epoch": 0.6613414634146342, "grad_norm": 0.4727938771247864, "learning_rate": 1.5591056910569106e-05, "loss": 0.0791, "step": 54230 }, { "epoch": 0.6614024390243902, "grad_norm": 0.5196911096572876, "learning_rate": 1.5590650406504067e-05, "loss": 0.0525, "step": 54235 }, { "epoch": 0.6614634146341464, "grad_norm": 0.9724487066268921, "learning_rate": 1.5590243902439025e-05, "loss": 0.0586, "step": 54240 }, { "epoch": 0.6615243902439024, "grad_norm": 0.2959631681442261, "learning_rate": 1.5589837398373987e-05, "loss": 0.0682, "step": 54245 }, { "epoch": 0.6615853658536586, "grad_norm": 0.4276915490627289, "learning_rate": 1.5589430894308945e-05, "loss": 0.0349, "step": 54250 }, { "epoch": 0.6616463414634146, "grad_norm": 2.146013021469116, "learning_rate": 1.5589024390243903e-05, "loss": 0.0403, "step": 54255 }, { "epoch": 0.6617073170731708, "grad_norm": 0.6352362036705017, "learning_rate": 1.558861788617886e-05, "loss": 0.0644, "step": 54260 }, { "epoch": 0.6617682926829268, "grad_norm": 0.8740677833557129, "learning_rate": 1.5588211382113823e-05, "loss": 0.066, "step": 54265 }, { "epoch": 0.661829268292683, "grad_norm": 0.48409879207611084, "learning_rate": 1.558780487804878e-05, "loss": 0.0465, "step": 54270 }, { "epoch": 0.661890243902439, "grad_norm": 0.860629141330719, "learning_rate": 1.5587398373983742e-05, "loss": 0.1268, "step": 54275 }, { "epoch": 0.6619512195121952, "grad_norm": 0.3233412802219391, "learning_rate": 1.55869918699187e-05, "loss": 0.0698, "step": 54280 }, { "epoch": 0.6620121951219512, "grad_norm": 0.47105419635772705, "learning_rate": 1.558658536585366e-05, "loss": 0.0592, "step": 54285 }, { "epoch": 0.6620731707317074, "grad_norm": 1.629585862159729, "learning_rate": 1.558617886178862e-05, "loss": 0.0873, "step": 54290 }, { "epoch": 0.6621341463414634, "grad_norm": 0.6299334764480591, "learning_rate": 1.5585772357723578e-05, "loss": 0.034, "step": 54295 }, { "epoch": 0.6621951219512195, "grad_norm": 0.3980330228805542, "learning_rate": 1.558536585365854e-05, "loss": 0.0605, "step": 54300 }, { "epoch": 0.6622560975609756, "grad_norm": 0.5361245274543762, "learning_rate": 1.5584959349593498e-05, "loss": 0.0305, "step": 54305 }, { "epoch": 0.6623170731707317, "grad_norm": 1.4944523572921753, "learning_rate": 1.5584552845528456e-05, "loss": 0.0509, "step": 54310 }, { "epoch": 0.6623780487804878, "grad_norm": 0.7090960741043091, "learning_rate": 1.5584146341463414e-05, "loss": 0.073, "step": 54315 }, { "epoch": 0.6624390243902439, "grad_norm": 0.5687544941902161, "learning_rate": 1.5583739837398376e-05, "loss": 0.0453, "step": 54320 }, { "epoch": 0.6625, "grad_norm": 0.43472301959991455, "learning_rate": 1.5583333333333334e-05, "loss": 0.096, "step": 54325 }, { "epoch": 0.6625609756097561, "grad_norm": 0.5340570211410522, "learning_rate": 1.5582926829268295e-05, "loss": 0.0419, "step": 54330 }, { "epoch": 0.6626219512195122, "grad_norm": 0.25658905506134033, "learning_rate": 1.5582520325203253e-05, "loss": 0.0423, "step": 54335 }, { "epoch": 0.6626829268292683, "grad_norm": 0.8001937866210938, "learning_rate": 1.558211382113821e-05, "loss": 0.0526, "step": 54340 }, { "epoch": 0.6627439024390244, "grad_norm": 0.6524427533149719, "learning_rate": 1.5581707317073173e-05, "loss": 0.0562, "step": 54345 }, { "epoch": 0.6628048780487805, "grad_norm": 0.455189049243927, "learning_rate": 1.558130081300813e-05, "loss": 0.0578, "step": 54350 }, { "epoch": 0.6628658536585366, "grad_norm": 0.4815024435520172, "learning_rate": 1.558089430894309e-05, "loss": 0.0333, "step": 54355 }, { "epoch": 0.6629268292682927, "grad_norm": 4.34987735748291, "learning_rate": 1.558048780487805e-05, "loss": 0.0454, "step": 54360 }, { "epoch": 0.6629878048780488, "grad_norm": 0.6865564584732056, "learning_rate": 1.558008130081301e-05, "loss": 0.0647, "step": 54365 }, { "epoch": 0.6630487804878049, "grad_norm": 0.38543394207954407, "learning_rate": 1.5579674796747967e-05, "loss": 0.0486, "step": 54370 }, { "epoch": 0.663109756097561, "grad_norm": 0.29257968068122864, "learning_rate": 1.557926829268293e-05, "loss": 0.065, "step": 54375 }, { "epoch": 0.6631707317073171, "grad_norm": 0.4126802384853363, "learning_rate": 1.5578861788617887e-05, "loss": 0.054, "step": 54380 }, { "epoch": 0.6632317073170731, "grad_norm": 1.2235684394836426, "learning_rate": 1.5578455284552848e-05, "loss": 0.0829, "step": 54385 }, { "epoch": 0.6632926829268293, "grad_norm": 0.5081592798233032, "learning_rate": 1.5578048780487806e-05, "loss": 0.0777, "step": 54390 }, { "epoch": 0.6633536585365853, "grad_norm": 0.636073648929596, "learning_rate": 1.5577642276422768e-05, "loss": 0.0614, "step": 54395 }, { "epoch": 0.6634146341463415, "grad_norm": 0.5710843205451965, "learning_rate": 1.5577235772357723e-05, "loss": 0.0563, "step": 54400 }, { "epoch": 0.6634756097560975, "grad_norm": 0.5829821825027466, "learning_rate": 1.5576829268292684e-05, "loss": 0.0541, "step": 54405 }, { "epoch": 0.6635365853658537, "grad_norm": 0.3754585087299347, "learning_rate": 1.5576422764227642e-05, "loss": 0.0487, "step": 54410 }, { "epoch": 0.6635975609756097, "grad_norm": 0.8475643396377563, "learning_rate": 1.5576016260162604e-05, "loss": 0.0852, "step": 54415 }, { "epoch": 0.6636585365853659, "grad_norm": 0.7253557443618774, "learning_rate": 1.5575609756097562e-05, "loss": 0.0448, "step": 54420 }, { "epoch": 0.6637195121951219, "grad_norm": 0.6544117331504822, "learning_rate": 1.5575203252032523e-05, "loss": 0.0609, "step": 54425 }, { "epoch": 0.6637804878048781, "grad_norm": 0.7056897878646851, "learning_rate": 1.557479674796748e-05, "loss": 0.0515, "step": 54430 }, { "epoch": 0.6638414634146341, "grad_norm": 0.7845697999000549, "learning_rate": 1.557439024390244e-05, "loss": 0.0414, "step": 54435 }, { "epoch": 0.6639024390243903, "grad_norm": 0.4618726372718811, "learning_rate": 1.5573983739837398e-05, "loss": 0.0472, "step": 54440 }, { "epoch": 0.6639634146341463, "grad_norm": 0.7386574745178223, "learning_rate": 1.557357723577236e-05, "loss": 0.0707, "step": 54445 }, { "epoch": 0.6640243902439025, "grad_norm": 0.3731212317943573, "learning_rate": 1.5573170731707317e-05, "loss": 0.0617, "step": 54450 }, { "epoch": 0.6640853658536585, "grad_norm": 2.7386045455932617, "learning_rate": 1.557276422764228e-05, "loss": 0.0706, "step": 54455 }, { "epoch": 0.6641463414634147, "grad_norm": 0.36970844864845276, "learning_rate": 1.5572357723577237e-05, "loss": 0.0547, "step": 54460 }, { "epoch": 0.6642073170731707, "grad_norm": 0.585273265838623, "learning_rate": 1.5571951219512195e-05, "loss": 0.0504, "step": 54465 }, { "epoch": 0.6642682926829269, "grad_norm": 0.22420983016490936, "learning_rate": 1.5571544715447157e-05, "loss": 0.045, "step": 54470 }, { "epoch": 0.6643292682926829, "grad_norm": 0.6357674598693848, "learning_rate": 1.5571138211382115e-05, "loss": 0.0759, "step": 54475 }, { "epoch": 0.6643902439024391, "grad_norm": 0.5286045670509338, "learning_rate": 1.5570731707317076e-05, "loss": 0.0748, "step": 54480 }, { "epoch": 0.6644512195121951, "grad_norm": 1.1465381383895874, "learning_rate": 1.5570325203252035e-05, "loss": 0.044, "step": 54485 }, { "epoch": 0.6645121951219513, "grad_norm": 0.554481029510498, "learning_rate": 1.5569918699186993e-05, "loss": 0.0422, "step": 54490 }, { "epoch": 0.6645731707317073, "grad_norm": 0.23373165726661682, "learning_rate": 1.556951219512195e-05, "loss": 0.0531, "step": 54495 }, { "epoch": 0.6646341463414634, "grad_norm": 1.5139000415802002, "learning_rate": 1.5569105691056912e-05, "loss": 0.0702, "step": 54500 }, { "epoch": 0.6646951219512195, "grad_norm": 2.8483309745788574, "learning_rate": 1.556869918699187e-05, "loss": 0.0509, "step": 54505 }, { "epoch": 0.6647560975609756, "grad_norm": 0.6967572569847107, "learning_rate": 1.5568292682926832e-05, "loss": 0.0594, "step": 54510 }, { "epoch": 0.6648170731707317, "grad_norm": 0.7981558442115784, "learning_rate": 1.556788617886179e-05, "loss": 0.055, "step": 54515 }, { "epoch": 0.6648780487804878, "grad_norm": 1.4191340208053589, "learning_rate": 1.5567479674796748e-05, "loss": 0.0729, "step": 54520 }, { "epoch": 0.6649390243902439, "grad_norm": 0.6294922828674316, "learning_rate": 1.5567073170731706e-05, "loss": 0.0722, "step": 54525 }, { "epoch": 0.665, "grad_norm": 0.2921954393386841, "learning_rate": 1.5566666666666668e-05, "loss": 0.0313, "step": 54530 }, { "epoch": 0.6650609756097561, "grad_norm": 0.6856077313423157, "learning_rate": 1.5566260162601626e-05, "loss": 0.0583, "step": 54535 }, { "epoch": 0.6651219512195122, "grad_norm": 0.5735582709312439, "learning_rate": 1.5565853658536587e-05, "loss": 0.0783, "step": 54540 }, { "epoch": 0.6651829268292683, "grad_norm": 0.8637853264808655, "learning_rate": 1.5565447154471546e-05, "loss": 0.0539, "step": 54545 }, { "epoch": 0.6652439024390244, "grad_norm": 0.4582096338272095, "learning_rate": 1.5565040650406504e-05, "loss": 0.0468, "step": 54550 }, { "epoch": 0.6653048780487805, "grad_norm": 0.8877779841423035, "learning_rate": 1.5564634146341465e-05, "loss": 0.0587, "step": 54555 }, { "epoch": 0.6653658536585366, "grad_norm": 0.35017386078834534, "learning_rate": 1.5564227642276423e-05, "loss": 0.032, "step": 54560 }, { "epoch": 0.6654268292682927, "grad_norm": 0.5196928381919861, "learning_rate": 1.5563821138211385e-05, "loss": 0.0449, "step": 54565 }, { "epoch": 0.6654878048780488, "grad_norm": 0.675914466381073, "learning_rate": 1.5563414634146343e-05, "loss": 0.0589, "step": 54570 }, { "epoch": 0.6655487804878049, "grad_norm": 1.5397257804870605, "learning_rate": 1.5563008130081305e-05, "loss": 0.0672, "step": 54575 }, { "epoch": 0.665609756097561, "grad_norm": 0.6953961849212646, "learning_rate": 1.556260162601626e-05, "loss": 0.0507, "step": 54580 }, { "epoch": 0.665670731707317, "grad_norm": 0.7308576703071594, "learning_rate": 1.556219512195122e-05, "loss": 0.0448, "step": 54585 }, { "epoch": 0.6657317073170732, "grad_norm": 0.4197361469268799, "learning_rate": 1.556178861788618e-05, "loss": 0.0491, "step": 54590 }, { "epoch": 0.6657926829268292, "grad_norm": 0.3043724298477173, "learning_rate": 1.556138211382114e-05, "loss": 0.0346, "step": 54595 }, { "epoch": 0.6658536585365854, "grad_norm": 0.7378056645393372, "learning_rate": 1.55609756097561e-05, "loss": 0.044, "step": 54600 }, { "epoch": 0.6659146341463414, "grad_norm": 1.0555576086044312, "learning_rate": 1.556056910569106e-05, "loss": 0.0308, "step": 54605 }, { "epoch": 0.6659756097560976, "grad_norm": 1.697434663772583, "learning_rate": 1.5560162601626018e-05, "loss": 0.0448, "step": 54610 }, { "epoch": 0.6660365853658536, "grad_norm": 0.3442808985710144, "learning_rate": 1.5559756097560976e-05, "loss": 0.0682, "step": 54615 }, { "epoch": 0.6660975609756098, "grad_norm": 0.21884185075759888, "learning_rate": 1.5559349593495934e-05, "loss": 0.0599, "step": 54620 }, { "epoch": 0.6661585365853658, "grad_norm": 1.001953125, "learning_rate": 1.5558943089430896e-05, "loss": 0.0619, "step": 54625 }, { "epoch": 0.666219512195122, "grad_norm": 0.535204291343689, "learning_rate": 1.5558536585365854e-05, "loss": 0.0362, "step": 54630 }, { "epoch": 0.666280487804878, "grad_norm": 0.4564361572265625, "learning_rate": 1.5558130081300816e-05, "loss": 0.0431, "step": 54635 }, { "epoch": 0.6663414634146342, "grad_norm": 0.4586341083049774, "learning_rate": 1.5557723577235774e-05, "loss": 0.029, "step": 54640 }, { "epoch": 0.6664024390243902, "grad_norm": 0.521961510181427, "learning_rate": 1.5557317073170732e-05, "loss": 0.0492, "step": 54645 }, { "epoch": 0.6664634146341464, "grad_norm": 0.6199505925178528, "learning_rate": 1.5556910569105693e-05, "loss": 0.0925, "step": 54650 }, { "epoch": 0.6665243902439024, "grad_norm": 0.3252430260181427, "learning_rate": 1.555650406504065e-05, "loss": 0.0411, "step": 54655 }, { "epoch": 0.6665853658536586, "grad_norm": 0.5899860262870789, "learning_rate": 1.5556097560975613e-05, "loss": 0.0504, "step": 54660 }, { "epoch": 0.6666463414634146, "grad_norm": 0.6321107745170593, "learning_rate": 1.555569105691057e-05, "loss": 0.0511, "step": 54665 }, { "epoch": 0.6667073170731708, "grad_norm": 0.4008752405643463, "learning_rate": 1.555528455284553e-05, "loss": 0.0554, "step": 54670 }, { "epoch": 0.6667682926829268, "grad_norm": 0.571216881275177, "learning_rate": 1.5554878048780487e-05, "loss": 0.0496, "step": 54675 }, { "epoch": 0.666829268292683, "grad_norm": 0.9215250611305237, "learning_rate": 1.555447154471545e-05, "loss": 0.0562, "step": 54680 }, { "epoch": 0.666890243902439, "grad_norm": 1.7579025030136108, "learning_rate": 1.5554065040650407e-05, "loss": 0.0437, "step": 54685 }, { "epoch": 0.6669512195121952, "grad_norm": 1.6803191900253296, "learning_rate": 1.555365853658537e-05, "loss": 0.0671, "step": 54690 }, { "epoch": 0.6670121951219512, "grad_norm": 1.1375397443771362, "learning_rate": 1.5553252032520327e-05, "loss": 0.0556, "step": 54695 }, { "epoch": 0.6670731707317074, "grad_norm": 0.43154817819595337, "learning_rate": 1.5552845528455285e-05, "loss": 0.0491, "step": 54700 }, { "epoch": 0.6671341463414634, "grad_norm": 0.7225391864776611, "learning_rate": 1.5552439024390243e-05, "loss": 0.0629, "step": 54705 }, { "epoch": 0.6671951219512195, "grad_norm": 0.5768401026725769, "learning_rate": 1.5552032520325204e-05, "loss": 0.0369, "step": 54710 }, { "epoch": 0.6672560975609756, "grad_norm": 0.737353503704071, "learning_rate": 1.5551626016260163e-05, "loss": 0.0359, "step": 54715 }, { "epoch": 0.6673170731707317, "grad_norm": 0.672290027141571, "learning_rate": 1.5551219512195124e-05, "loss": 0.0734, "step": 54720 }, { "epoch": 0.6673780487804878, "grad_norm": 0.8042905330657959, "learning_rate": 1.5550813008130082e-05, "loss": 0.0679, "step": 54725 }, { "epoch": 0.6674390243902439, "grad_norm": 0.6462570428848267, "learning_rate": 1.555040650406504e-05, "loss": 0.0311, "step": 54730 }, { "epoch": 0.6675, "grad_norm": 0.4620782732963562, "learning_rate": 1.5550000000000002e-05, "loss": 0.07, "step": 54735 }, { "epoch": 0.6675609756097561, "grad_norm": 0.41989865899086, "learning_rate": 1.554959349593496e-05, "loss": 0.0559, "step": 54740 }, { "epoch": 0.6676219512195122, "grad_norm": 0.6730478405952454, "learning_rate": 1.554918699186992e-05, "loss": 0.0797, "step": 54745 }, { "epoch": 0.6676829268292683, "grad_norm": 0.9819113612174988, "learning_rate": 1.554878048780488e-05, "loss": 0.0574, "step": 54750 }, { "epoch": 0.6677439024390244, "grad_norm": 0.4763883948326111, "learning_rate": 1.554837398373984e-05, "loss": 0.0659, "step": 54755 }, { "epoch": 0.6678048780487805, "grad_norm": 0.6515898108482361, "learning_rate": 1.5547967479674796e-05, "loss": 0.0438, "step": 54760 }, { "epoch": 0.6678658536585366, "grad_norm": 0.6127358675003052, "learning_rate": 1.5547560975609757e-05, "loss": 0.0607, "step": 54765 }, { "epoch": 0.6679268292682927, "grad_norm": 0.4565301537513733, "learning_rate": 1.5547154471544716e-05, "loss": 0.0478, "step": 54770 }, { "epoch": 0.6679878048780488, "grad_norm": 0.976131021976471, "learning_rate": 1.5546747967479677e-05, "loss": 0.0461, "step": 54775 }, { "epoch": 0.6680487804878049, "grad_norm": 0.9622756242752075, "learning_rate": 1.5546341463414635e-05, "loss": 0.0538, "step": 54780 }, { "epoch": 0.668109756097561, "grad_norm": 0.609056830406189, "learning_rate": 1.5545934959349597e-05, "loss": 0.0315, "step": 54785 }, { "epoch": 0.6681707317073171, "grad_norm": 0.6599568724632263, "learning_rate": 1.554552845528455e-05, "loss": 0.0356, "step": 54790 }, { "epoch": 0.6682317073170732, "grad_norm": 0.7666727304458618, "learning_rate": 1.5545121951219513e-05, "loss": 0.0707, "step": 54795 }, { "epoch": 0.6682926829268293, "grad_norm": 0.684594988822937, "learning_rate": 1.554471544715447e-05, "loss": 0.0638, "step": 54800 }, { "epoch": 0.6683536585365853, "grad_norm": 0.5181661248207092, "learning_rate": 1.5544308943089433e-05, "loss": 0.0625, "step": 54805 }, { "epoch": 0.6684146341463415, "grad_norm": 0.5036812424659729, "learning_rate": 1.554390243902439e-05, "loss": 0.0475, "step": 54810 }, { "epoch": 0.6684756097560975, "grad_norm": 0.4090081751346588, "learning_rate": 1.5543495934959352e-05, "loss": 0.0536, "step": 54815 }, { "epoch": 0.6685365853658537, "grad_norm": 0.5405166149139404, "learning_rate": 1.554308943089431e-05, "loss": 0.0567, "step": 54820 }, { "epoch": 0.6685975609756097, "grad_norm": 0.7065130472183228, "learning_rate": 1.554268292682927e-05, "loss": 0.0509, "step": 54825 }, { "epoch": 0.6686585365853659, "grad_norm": 0.9629552364349365, "learning_rate": 1.554227642276423e-05, "loss": 0.042, "step": 54830 }, { "epoch": 0.6687195121951219, "grad_norm": 0.8706586956977844, "learning_rate": 1.5541869918699188e-05, "loss": 0.0423, "step": 54835 }, { "epoch": 0.6687804878048781, "grad_norm": 0.37541529536247253, "learning_rate": 1.554146341463415e-05, "loss": 0.068, "step": 54840 }, { "epoch": 0.6688414634146341, "grad_norm": 0.6277011632919312, "learning_rate": 1.5541056910569108e-05, "loss": 0.068, "step": 54845 }, { "epoch": 0.6689024390243903, "grad_norm": 0.3688236474990845, "learning_rate": 1.5540650406504066e-05, "loss": 0.0337, "step": 54850 }, { "epoch": 0.6689634146341463, "grad_norm": 1.3013801574707031, "learning_rate": 1.5540243902439024e-05, "loss": 0.0703, "step": 54855 }, { "epoch": 0.6690243902439025, "grad_norm": 0.3830525279045105, "learning_rate": 1.5539837398373986e-05, "loss": 0.0584, "step": 54860 }, { "epoch": 0.6690853658536585, "grad_norm": 0.6437033414840698, "learning_rate": 1.5539430894308944e-05, "loss": 0.0544, "step": 54865 }, { "epoch": 0.6691463414634147, "grad_norm": 0.7346513271331787, "learning_rate": 1.5539024390243905e-05, "loss": 0.0486, "step": 54870 }, { "epoch": 0.6692073170731707, "grad_norm": 0.7257310748100281, "learning_rate": 1.5538617886178863e-05, "loss": 0.0463, "step": 54875 }, { "epoch": 0.6692682926829269, "grad_norm": 0.3467261493206024, "learning_rate": 1.553821138211382e-05, "loss": 0.0697, "step": 54880 }, { "epoch": 0.6693292682926829, "grad_norm": 0.49449795484542847, "learning_rate": 1.553780487804878e-05, "loss": 0.038, "step": 54885 }, { "epoch": 0.6693902439024391, "grad_norm": 0.4689040184020996, "learning_rate": 1.553739837398374e-05, "loss": 0.0504, "step": 54890 }, { "epoch": 0.6694512195121951, "grad_norm": 0.6229603290557861, "learning_rate": 1.55369918699187e-05, "loss": 0.0451, "step": 54895 }, { "epoch": 0.6695121951219513, "grad_norm": 1.2487256526947021, "learning_rate": 1.553658536585366e-05, "loss": 0.0761, "step": 54900 }, { "epoch": 0.6695731707317073, "grad_norm": 0.5710994601249695, "learning_rate": 1.553617886178862e-05, "loss": 0.0562, "step": 54905 }, { "epoch": 0.6696341463414635, "grad_norm": 0.8027651309967041, "learning_rate": 1.5535772357723577e-05, "loss": 0.0665, "step": 54910 }, { "epoch": 0.6696951219512195, "grad_norm": 2.9050209522247314, "learning_rate": 1.553536585365854e-05, "loss": 0.0525, "step": 54915 }, { "epoch": 0.6697560975609756, "grad_norm": 0.5459489226341248, "learning_rate": 1.5534959349593497e-05, "loss": 0.0441, "step": 54920 }, { "epoch": 0.6698170731707317, "grad_norm": 0.8664422035217285, "learning_rate": 1.5534552845528458e-05, "loss": 0.0613, "step": 54925 }, { "epoch": 0.6698780487804878, "grad_norm": 0.5028940439224243, "learning_rate": 1.5534146341463416e-05, "loss": 0.0822, "step": 54930 }, { "epoch": 0.6699390243902439, "grad_norm": 0.3963005542755127, "learning_rate": 1.5533739837398374e-05, "loss": 0.0495, "step": 54935 }, { "epoch": 0.67, "grad_norm": 0.3219781517982483, "learning_rate": 1.5533333333333333e-05, "loss": 0.0346, "step": 54940 }, { "epoch": 0.6700609756097561, "grad_norm": 0.5992798805236816, "learning_rate": 1.5532926829268294e-05, "loss": 0.0442, "step": 54945 }, { "epoch": 0.6701219512195122, "grad_norm": 0.6529350280761719, "learning_rate": 1.5532520325203252e-05, "loss": 0.0547, "step": 54950 }, { "epoch": 0.6701829268292683, "grad_norm": 0.6129862070083618, "learning_rate": 1.5532113821138214e-05, "loss": 0.0588, "step": 54955 }, { "epoch": 0.6702439024390244, "grad_norm": 0.6334772706031799, "learning_rate": 1.5531707317073172e-05, "loss": 0.0545, "step": 54960 }, { "epoch": 0.6703048780487805, "grad_norm": 0.4174460172653198, "learning_rate": 1.5531300813008133e-05, "loss": 0.0619, "step": 54965 }, { "epoch": 0.6703658536585366, "grad_norm": 0.44775035977363586, "learning_rate": 1.5530894308943088e-05, "loss": 0.055, "step": 54970 }, { "epoch": 0.6704268292682927, "grad_norm": 0.39179664850234985, "learning_rate": 1.553048780487805e-05, "loss": 0.1147, "step": 54975 }, { "epoch": 0.6704878048780488, "grad_norm": 0.6302482485771179, "learning_rate": 1.5530081300813008e-05, "loss": 0.0491, "step": 54980 }, { "epoch": 0.6705487804878049, "grad_norm": 0.4746313691139221, "learning_rate": 1.552967479674797e-05, "loss": 0.0391, "step": 54985 }, { "epoch": 0.670609756097561, "grad_norm": 0.8810680508613586, "learning_rate": 1.5529268292682927e-05, "loss": 0.0472, "step": 54990 }, { "epoch": 0.6706707317073171, "grad_norm": 1.0130376815795898, "learning_rate": 1.552886178861789e-05, "loss": 0.053, "step": 54995 }, { "epoch": 0.6707317073170732, "grad_norm": 1.254568099975586, "learning_rate": 1.5528455284552847e-05, "loss": 0.0386, "step": 55000 }, { "epoch": 0.6707926829268293, "grad_norm": 0.3881100118160248, "learning_rate": 1.5528048780487805e-05, "loss": 0.0538, "step": 55005 }, { "epoch": 0.6708536585365854, "grad_norm": 1.7901235818862915, "learning_rate": 1.5527642276422767e-05, "loss": 0.0452, "step": 55010 }, { "epoch": 0.6709146341463414, "grad_norm": 0.5405375957489014, "learning_rate": 1.5527235772357725e-05, "loss": 0.0572, "step": 55015 }, { "epoch": 0.6709756097560976, "grad_norm": 0.8877797722816467, "learning_rate": 1.5526829268292686e-05, "loss": 0.0614, "step": 55020 }, { "epoch": 0.6710365853658536, "grad_norm": 0.6779857277870178, "learning_rate": 1.5526422764227644e-05, "loss": 0.0658, "step": 55025 }, { "epoch": 0.6710975609756098, "grad_norm": 1.570659875869751, "learning_rate": 1.5526016260162603e-05, "loss": 0.056, "step": 55030 }, { "epoch": 0.6711585365853658, "grad_norm": 0.5024916529655457, "learning_rate": 1.552560975609756e-05, "loss": 0.07, "step": 55035 }, { "epoch": 0.671219512195122, "grad_norm": 0.893085777759552, "learning_rate": 1.5525203252032522e-05, "loss": 0.0682, "step": 55040 }, { "epoch": 0.671280487804878, "grad_norm": 1.0620123147964478, "learning_rate": 1.552479674796748e-05, "loss": 0.0466, "step": 55045 }, { "epoch": 0.6713414634146342, "grad_norm": 0.3696717917919159, "learning_rate": 1.5524390243902442e-05, "loss": 0.0634, "step": 55050 }, { "epoch": 0.6714024390243902, "grad_norm": 0.63874351978302, "learning_rate": 1.55239837398374e-05, "loss": 0.0419, "step": 55055 }, { "epoch": 0.6714634146341464, "grad_norm": 0.531804084777832, "learning_rate": 1.5523577235772358e-05, "loss": 0.0421, "step": 55060 }, { "epoch": 0.6715243902439024, "grad_norm": 0.49608683586120605, "learning_rate": 1.5523170731707316e-05, "loss": 0.0843, "step": 55065 }, { "epoch": 0.6715853658536586, "grad_norm": 0.5325556993484497, "learning_rate": 1.5522764227642278e-05, "loss": 0.0498, "step": 55070 }, { "epoch": 0.6716463414634146, "grad_norm": 0.7904624342918396, "learning_rate": 1.5522357723577236e-05, "loss": 0.045, "step": 55075 }, { "epoch": 0.6717073170731708, "grad_norm": 0.5557531118392944, "learning_rate": 1.5521951219512197e-05, "loss": 0.0416, "step": 55080 }, { "epoch": 0.6717682926829268, "grad_norm": 0.8925643563270569, "learning_rate": 1.5521544715447156e-05, "loss": 0.0676, "step": 55085 }, { "epoch": 0.671829268292683, "grad_norm": 0.6793676614761353, "learning_rate": 1.5521138211382114e-05, "loss": 0.0526, "step": 55090 }, { "epoch": 0.671890243902439, "grad_norm": 0.4193180501461029, "learning_rate": 1.5520731707317075e-05, "loss": 0.0351, "step": 55095 }, { "epoch": 0.6719512195121952, "grad_norm": 0.9071393013000488, "learning_rate": 1.5520325203252033e-05, "loss": 0.0535, "step": 55100 }, { "epoch": 0.6720121951219512, "grad_norm": 0.4029931128025055, "learning_rate": 1.5519918699186995e-05, "loss": 0.0524, "step": 55105 }, { "epoch": 0.6720731707317074, "grad_norm": 0.8998318910598755, "learning_rate": 1.5519512195121953e-05, "loss": 0.0574, "step": 55110 }, { "epoch": 0.6721341463414634, "grad_norm": 0.6501692533493042, "learning_rate": 1.551910569105691e-05, "loss": 0.0461, "step": 55115 }, { "epoch": 0.6721951219512196, "grad_norm": 0.48421674966812134, "learning_rate": 1.551869918699187e-05, "loss": 0.0461, "step": 55120 }, { "epoch": 0.6722560975609756, "grad_norm": 0.3750430643558502, "learning_rate": 1.551829268292683e-05, "loss": 0.0503, "step": 55125 }, { "epoch": 0.6723170731707317, "grad_norm": 1.4169371128082275, "learning_rate": 1.551788617886179e-05, "loss": 0.058, "step": 55130 }, { "epoch": 0.6723780487804878, "grad_norm": 0.7318253517150879, "learning_rate": 1.551747967479675e-05, "loss": 0.057, "step": 55135 }, { "epoch": 0.672439024390244, "grad_norm": 0.43502068519592285, "learning_rate": 1.551707317073171e-05, "loss": 0.0664, "step": 55140 }, { "epoch": 0.6725, "grad_norm": 0.625858724117279, "learning_rate": 1.551666666666667e-05, "loss": 0.0533, "step": 55145 }, { "epoch": 0.6725609756097561, "grad_norm": 0.6305286884307861, "learning_rate": 1.5516260162601625e-05, "loss": 0.0774, "step": 55150 }, { "epoch": 0.6726219512195122, "grad_norm": 0.5378586649894714, "learning_rate": 1.5515853658536586e-05, "loss": 0.0515, "step": 55155 }, { "epoch": 0.6726829268292683, "grad_norm": 0.7881559133529663, "learning_rate": 1.5515447154471544e-05, "loss": 0.049, "step": 55160 }, { "epoch": 0.6727439024390244, "grad_norm": 0.3938237130641937, "learning_rate": 1.5515040650406506e-05, "loss": 0.0707, "step": 55165 }, { "epoch": 0.6728048780487805, "grad_norm": 0.47540682554244995, "learning_rate": 1.5514634146341464e-05, "loss": 0.0566, "step": 55170 }, { "epoch": 0.6728658536585366, "grad_norm": 0.8767701983451843, "learning_rate": 1.5514227642276426e-05, "loss": 0.0684, "step": 55175 }, { "epoch": 0.6729268292682927, "grad_norm": 0.5931371450424194, "learning_rate": 1.5513821138211384e-05, "loss": 0.0725, "step": 55180 }, { "epoch": 0.6729878048780488, "grad_norm": 0.5726631283760071, "learning_rate": 1.5513414634146342e-05, "loss": 0.0601, "step": 55185 }, { "epoch": 0.6730487804878049, "grad_norm": 0.11693308502435684, "learning_rate": 1.5513008130081303e-05, "loss": 0.0421, "step": 55190 }, { "epoch": 0.673109756097561, "grad_norm": 0.3862452507019043, "learning_rate": 1.551260162601626e-05, "loss": 0.0417, "step": 55195 }, { "epoch": 0.6731707317073171, "grad_norm": 0.6529622077941895, "learning_rate": 1.551219512195122e-05, "loss": 0.0466, "step": 55200 }, { "epoch": 0.6732317073170732, "grad_norm": 0.32353511452674866, "learning_rate": 1.551178861788618e-05, "loss": 0.0436, "step": 55205 }, { "epoch": 0.6732926829268293, "grad_norm": 0.6088429689407349, "learning_rate": 1.551138211382114e-05, "loss": 0.0624, "step": 55210 }, { "epoch": 0.6733536585365854, "grad_norm": 1.283272385597229, "learning_rate": 1.5510975609756097e-05, "loss": 0.0689, "step": 55215 }, { "epoch": 0.6734146341463415, "grad_norm": 0.17863093316555023, "learning_rate": 1.551056910569106e-05, "loss": 0.0567, "step": 55220 }, { "epoch": 0.6734756097560975, "grad_norm": 0.5943081974983215, "learning_rate": 1.5510162601626017e-05, "loss": 0.0587, "step": 55225 }, { "epoch": 0.6735365853658537, "grad_norm": 0.4248928725719452, "learning_rate": 1.550975609756098e-05, "loss": 0.0552, "step": 55230 }, { "epoch": 0.6735975609756097, "grad_norm": 0.5114649534225464, "learning_rate": 1.5509349593495937e-05, "loss": 0.0454, "step": 55235 }, { "epoch": 0.6736585365853659, "grad_norm": 0.5269162654876709, "learning_rate": 1.5508943089430895e-05, "loss": 0.0451, "step": 55240 }, { "epoch": 0.6737195121951219, "grad_norm": 0.23805645108222961, "learning_rate": 1.5508536585365853e-05, "loss": 0.035, "step": 55245 }, { "epoch": 0.6737804878048781, "grad_norm": 0.4186471104621887, "learning_rate": 1.5508130081300814e-05, "loss": 0.0537, "step": 55250 }, { "epoch": 0.6738414634146341, "grad_norm": 0.6907790899276733, "learning_rate": 1.5507723577235773e-05, "loss": 0.0451, "step": 55255 }, { "epoch": 0.6739024390243903, "grad_norm": 0.7140840888023376, "learning_rate": 1.5507317073170734e-05, "loss": 0.0411, "step": 55260 }, { "epoch": 0.6739634146341463, "grad_norm": 0.5437630414962769, "learning_rate": 1.5506910569105692e-05, "loss": 0.034, "step": 55265 }, { "epoch": 0.6740243902439025, "grad_norm": 1.0942506790161133, "learning_rate": 1.550650406504065e-05, "loss": 0.0663, "step": 55270 }, { "epoch": 0.6740853658536585, "grad_norm": 0.36937612295150757, "learning_rate": 1.5506097560975612e-05, "loss": 0.0411, "step": 55275 }, { "epoch": 0.6741463414634147, "grad_norm": 0.657821536064148, "learning_rate": 1.550569105691057e-05, "loss": 0.0534, "step": 55280 }, { "epoch": 0.6742073170731707, "grad_norm": 0.6901212930679321, "learning_rate": 1.550528455284553e-05, "loss": 0.0838, "step": 55285 }, { "epoch": 0.6742682926829269, "grad_norm": 0.3121962249279022, "learning_rate": 1.550487804878049e-05, "loss": 0.0289, "step": 55290 }, { "epoch": 0.6743292682926829, "grad_norm": 0.5205137729644775, "learning_rate": 1.5504471544715448e-05, "loss": 0.0875, "step": 55295 }, { "epoch": 0.6743902439024391, "grad_norm": 1.3474198579788208, "learning_rate": 1.5504065040650406e-05, "loss": 0.0697, "step": 55300 }, { "epoch": 0.6744512195121951, "grad_norm": 0.41602644324302673, "learning_rate": 1.5503658536585367e-05, "loss": 0.0448, "step": 55305 }, { "epoch": 0.6745121951219513, "grad_norm": 0.5348101258277893, "learning_rate": 1.5503252032520325e-05, "loss": 0.0549, "step": 55310 }, { "epoch": 0.6745731707317073, "grad_norm": 0.3604995012283325, "learning_rate": 1.5502845528455287e-05, "loss": 0.042, "step": 55315 }, { "epoch": 0.6746341463414635, "grad_norm": 0.47744953632354736, "learning_rate": 1.5502439024390245e-05, "loss": 0.038, "step": 55320 }, { "epoch": 0.6746951219512195, "grad_norm": 0.6088336706161499, "learning_rate": 1.5502032520325207e-05, "loss": 0.0612, "step": 55325 }, { "epoch": 0.6747560975609757, "grad_norm": 0.544082522392273, "learning_rate": 1.550162601626016e-05, "loss": 0.0442, "step": 55330 }, { "epoch": 0.6748170731707317, "grad_norm": 0.6118254065513611, "learning_rate": 1.5501219512195123e-05, "loss": 0.0442, "step": 55335 }, { "epoch": 0.6748780487804878, "grad_norm": 0.6745907068252563, "learning_rate": 1.550081300813008e-05, "loss": 0.046, "step": 55340 }, { "epoch": 0.6749390243902439, "grad_norm": 0.8579854369163513, "learning_rate": 1.5500406504065043e-05, "loss": 0.0639, "step": 55345 }, { "epoch": 0.675, "grad_norm": 0.28364869952201843, "learning_rate": 1.55e-05, "loss": 0.0394, "step": 55350 }, { "epoch": 0.6750609756097561, "grad_norm": 0.5403698682785034, "learning_rate": 1.5499593495934962e-05, "loss": 0.0552, "step": 55355 }, { "epoch": 0.6751219512195122, "grad_norm": 0.6209880113601685, "learning_rate": 1.549918699186992e-05, "loss": 0.0502, "step": 55360 }, { "epoch": 0.6751829268292683, "grad_norm": 0.521246075630188, "learning_rate": 1.549878048780488e-05, "loss": 0.0412, "step": 55365 }, { "epoch": 0.6752439024390244, "grad_norm": 0.6776789426803589, "learning_rate": 1.549837398373984e-05, "loss": 0.0491, "step": 55370 }, { "epoch": 0.6753048780487805, "grad_norm": 0.6936069130897522, "learning_rate": 1.5497967479674798e-05, "loss": 0.0703, "step": 55375 }, { "epoch": 0.6753658536585366, "grad_norm": 1.1364457607269287, "learning_rate": 1.5497560975609756e-05, "loss": 0.0482, "step": 55380 }, { "epoch": 0.6754268292682927, "grad_norm": 0.6228162050247192, "learning_rate": 1.5497154471544718e-05, "loss": 0.0266, "step": 55385 }, { "epoch": 0.6754878048780488, "grad_norm": 0.46804723143577576, "learning_rate": 1.5496747967479676e-05, "loss": 0.0429, "step": 55390 }, { "epoch": 0.6755487804878049, "grad_norm": 0.2695215344429016, "learning_rate": 1.5496341463414634e-05, "loss": 0.0314, "step": 55395 }, { "epoch": 0.675609756097561, "grad_norm": 0.7241783738136292, "learning_rate": 1.5495934959349596e-05, "loss": 0.0545, "step": 55400 }, { "epoch": 0.6756707317073171, "grad_norm": 0.5985259413719177, "learning_rate": 1.5495528455284554e-05, "loss": 0.0366, "step": 55405 }, { "epoch": 0.6757317073170732, "grad_norm": 0.7602781057357788, "learning_rate": 1.5495121951219515e-05, "loss": 0.0502, "step": 55410 }, { "epoch": 0.6757926829268293, "grad_norm": 0.6641671061515808, "learning_rate": 1.5494715447154473e-05, "loss": 0.0767, "step": 55415 }, { "epoch": 0.6758536585365854, "grad_norm": 0.25624024868011475, "learning_rate": 1.549430894308943e-05, "loss": 0.0325, "step": 55420 }, { "epoch": 0.6759146341463415, "grad_norm": 0.3230082094669342, "learning_rate": 1.549390243902439e-05, "loss": 0.048, "step": 55425 }, { "epoch": 0.6759756097560976, "grad_norm": 1.198527216911316, "learning_rate": 1.549349593495935e-05, "loss": 0.062, "step": 55430 }, { "epoch": 0.6760365853658536, "grad_norm": 0.68063884973526, "learning_rate": 1.549308943089431e-05, "loss": 0.0461, "step": 55435 }, { "epoch": 0.6760975609756098, "grad_norm": 1.4207340478897095, "learning_rate": 1.549268292682927e-05, "loss": 0.0753, "step": 55440 }, { "epoch": 0.6761585365853658, "grad_norm": 0.5541383028030396, "learning_rate": 1.549227642276423e-05, "loss": 0.0661, "step": 55445 }, { "epoch": 0.676219512195122, "grad_norm": 1.3317002058029175, "learning_rate": 1.5491869918699187e-05, "loss": 0.0374, "step": 55450 }, { "epoch": 0.676280487804878, "grad_norm": 0.8501515984535217, "learning_rate": 1.549146341463415e-05, "loss": 0.0763, "step": 55455 }, { "epoch": 0.6763414634146342, "grad_norm": 0.5003390312194824, "learning_rate": 1.5491056910569107e-05, "loss": 0.0447, "step": 55460 }, { "epoch": 0.6764024390243902, "grad_norm": 0.3376293480396271, "learning_rate": 1.5490650406504065e-05, "loss": 0.0412, "step": 55465 }, { "epoch": 0.6764634146341464, "grad_norm": 0.4232717752456665, "learning_rate": 1.5490243902439026e-05, "loss": 0.0361, "step": 55470 }, { "epoch": 0.6765243902439024, "grad_norm": 0.889833927154541, "learning_rate": 1.5489837398373984e-05, "loss": 0.0595, "step": 55475 }, { "epoch": 0.6765853658536586, "grad_norm": 0.048058707267045975, "learning_rate": 1.5489430894308942e-05, "loss": 0.0675, "step": 55480 }, { "epoch": 0.6766463414634146, "grad_norm": 0.45834797620773315, "learning_rate": 1.5489024390243904e-05, "loss": 0.047, "step": 55485 }, { "epoch": 0.6767073170731708, "grad_norm": 0.6397189497947693, "learning_rate": 1.5488617886178862e-05, "loss": 0.0565, "step": 55490 }, { "epoch": 0.6767682926829268, "grad_norm": 0.5315234661102295, "learning_rate": 1.5488211382113824e-05, "loss": 0.0488, "step": 55495 }, { "epoch": 0.676829268292683, "grad_norm": 0.5368245244026184, "learning_rate": 1.5487804878048782e-05, "loss": 0.026, "step": 55500 }, { "epoch": 0.676890243902439, "grad_norm": 0.3366205096244812, "learning_rate": 1.5487398373983743e-05, "loss": 0.056, "step": 55505 }, { "epoch": 0.6769512195121952, "grad_norm": 0.7360542416572571, "learning_rate": 1.5486991869918698e-05, "loss": 0.0829, "step": 55510 }, { "epoch": 0.6770121951219512, "grad_norm": 0.6378380060195923, "learning_rate": 1.548658536585366e-05, "loss": 0.0578, "step": 55515 }, { "epoch": 0.6770731707317074, "grad_norm": 0.8210205435752869, "learning_rate": 1.5486178861788618e-05, "loss": 0.0661, "step": 55520 }, { "epoch": 0.6771341463414634, "grad_norm": 0.6612043976783752, "learning_rate": 1.548577235772358e-05, "loss": 0.0686, "step": 55525 }, { "epoch": 0.6771951219512196, "grad_norm": 1.7470941543579102, "learning_rate": 1.5485365853658537e-05, "loss": 0.0839, "step": 55530 }, { "epoch": 0.6772560975609756, "grad_norm": 0.23122966289520264, "learning_rate": 1.54849593495935e-05, "loss": 0.0514, "step": 55535 }, { "epoch": 0.6773170731707318, "grad_norm": 0.7308301329612732, "learning_rate": 1.5484552845528457e-05, "loss": 0.0417, "step": 55540 }, { "epoch": 0.6773780487804878, "grad_norm": 0.4802641272544861, "learning_rate": 1.5484146341463415e-05, "loss": 0.0423, "step": 55545 }, { "epoch": 0.677439024390244, "grad_norm": 1.8732424974441528, "learning_rate": 1.5483739837398377e-05, "loss": 0.0609, "step": 55550 }, { "epoch": 0.6775, "grad_norm": 0.6456295251846313, "learning_rate": 1.5483333333333335e-05, "loss": 0.0446, "step": 55555 }, { "epoch": 0.6775609756097561, "grad_norm": 0.8144656419754028, "learning_rate": 1.5482926829268293e-05, "loss": 0.0705, "step": 55560 }, { "epoch": 0.6776219512195122, "grad_norm": 0.44065985083580017, "learning_rate": 1.5482520325203254e-05, "loss": 0.0519, "step": 55565 }, { "epoch": 0.6776829268292683, "grad_norm": 0.7815595269203186, "learning_rate": 1.5482113821138213e-05, "loss": 0.0554, "step": 55570 }, { "epoch": 0.6777439024390244, "grad_norm": 0.8480566740036011, "learning_rate": 1.548170731707317e-05, "loss": 0.0391, "step": 55575 }, { "epoch": 0.6778048780487805, "grad_norm": 0.5203797817230225, "learning_rate": 1.5481300813008132e-05, "loss": 0.0407, "step": 55580 }, { "epoch": 0.6778658536585366, "grad_norm": 1.00516676902771, "learning_rate": 1.548089430894309e-05, "loss": 0.0447, "step": 55585 }, { "epoch": 0.6779268292682927, "grad_norm": 0.8567388653755188, "learning_rate": 1.5480487804878052e-05, "loss": 0.0647, "step": 55590 }, { "epoch": 0.6779878048780488, "grad_norm": 0.4677681028842926, "learning_rate": 1.548008130081301e-05, "loss": 0.0635, "step": 55595 }, { "epoch": 0.6780487804878049, "grad_norm": 0.7455037832260132, "learning_rate": 1.5479674796747968e-05, "loss": 0.038, "step": 55600 }, { "epoch": 0.678109756097561, "grad_norm": 0.3384244740009308, "learning_rate": 1.5479268292682926e-05, "loss": 0.0374, "step": 55605 }, { "epoch": 0.6781707317073171, "grad_norm": 0.5250794887542725, "learning_rate": 1.5478861788617888e-05, "loss": 0.0559, "step": 55610 }, { "epoch": 0.6782317073170732, "grad_norm": 0.9186752438545227, "learning_rate": 1.5478455284552846e-05, "loss": 0.0748, "step": 55615 }, { "epoch": 0.6782926829268293, "grad_norm": 0.7571911811828613, "learning_rate": 1.5478048780487807e-05, "loss": 0.0489, "step": 55620 }, { "epoch": 0.6783536585365854, "grad_norm": 0.6105281710624695, "learning_rate": 1.5477642276422765e-05, "loss": 0.031, "step": 55625 }, { "epoch": 0.6784146341463415, "grad_norm": 1.063389778137207, "learning_rate": 1.5477235772357724e-05, "loss": 0.0665, "step": 55630 }, { "epoch": 0.6784756097560976, "grad_norm": 0.8130277991294861, "learning_rate": 1.5476829268292685e-05, "loss": 0.0974, "step": 55635 }, { "epoch": 0.6785365853658537, "grad_norm": 0.588136613368988, "learning_rate": 1.5476422764227643e-05, "loss": 0.0517, "step": 55640 }, { "epoch": 0.6785975609756097, "grad_norm": 0.3515569567680359, "learning_rate": 1.54760162601626e-05, "loss": 0.0361, "step": 55645 }, { "epoch": 0.6786585365853659, "grad_norm": 0.8763172626495361, "learning_rate": 1.5475609756097563e-05, "loss": 0.0362, "step": 55650 }, { "epoch": 0.6787195121951219, "grad_norm": 0.4258829355239868, "learning_rate": 1.547520325203252e-05, "loss": 0.0457, "step": 55655 }, { "epoch": 0.6787804878048781, "grad_norm": 0.6876534223556519, "learning_rate": 1.547479674796748e-05, "loss": 0.035, "step": 55660 }, { "epoch": 0.6788414634146341, "grad_norm": 0.33228322863578796, "learning_rate": 1.547439024390244e-05, "loss": 0.0912, "step": 55665 }, { "epoch": 0.6789024390243903, "grad_norm": 0.38644281029701233, "learning_rate": 1.54739837398374e-05, "loss": 0.0372, "step": 55670 }, { "epoch": 0.6789634146341463, "grad_norm": 4.891824722290039, "learning_rate": 1.547357723577236e-05, "loss": 0.0353, "step": 55675 }, { "epoch": 0.6790243902439025, "grad_norm": 0.6747990250587463, "learning_rate": 1.547317073170732e-05, "loss": 0.0428, "step": 55680 }, { "epoch": 0.6790853658536585, "grad_norm": 0.4779793620109558, "learning_rate": 1.547276422764228e-05, "loss": 0.0836, "step": 55685 }, { "epoch": 0.6791463414634147, "grad_norm": 0.2349461019039154, "learning_rate": 1.5472357723577235e-05, "loss": 0.0615, "step": 55690 }, { "epoch": 0.6792073170731707, "grad_norm": 1.0986576080322266, "learning_rate": 1.5471951219512196e-05, "loss": 0.0659, "step": 55695 }, { "epoch": 0.6792682926829269, "grad_norm": 0.4930296242237091, "learning_rate": 1.5471544715447154e-05, "loss": 0.0911, "step": 55700 }, { "epoch": 0.6793292682926829, "grad_norm": 0.7686082124710083, "learning_rate": 1.5471138211382116e-05, "loss": 0.0609, "step": 55705 }, { "epoch": 0.6793902439024391, "grad_norm": 0.872590959072113, "learning_rate": 1.5470731707317074e-05, "loss": 0.0576, "step": 55710 }, { "epoch": 0.6794512195121951, "grad_norm": 0.4938685894012451, "learning_rate": 1.5470325203252035e-05, "loss": 0.045, "step": 55715 }, { "epoch": 0.6795121951219513, "grad_norm": 0.8273755311965942, "learning_rate": 1.5469918699186994e-05, "loss": 0.0313, "step": 55720 }, { "epoch": 0.6795731707317073, "grad_norm": 0.8108633756637573, "learning_rate": 1.5469512195121952e-05, "loss": 0.0471, "step": 55725 }, { "epoch": 0.6796341463414635, "grad_norm": 0.9313157796859741, "learning_rate": 1.546910569105691e-05, "loss": 0.0662, "step": 55730 }, { "epoch": 0.6796951219512195, "grad_norm": 0.5930149555206299, "learning_rate": 1.546869918699187e-05, "loss": 0.0742, "step": 55735 }, { "epoch": 0.6797560975609757, "grad_norm": 0.46054473519325256, "learning_rate": 1.546829268292683e-05, "loss": 0.0479, "step": 55740 }, { "epoch": 0.6798170731707317, "grad_norm": 0.7606982588768005, "learning_rate": 1.546788617886179e-05, "loss": 0.0541, "step": 55745 }, { "epoch": 0.6798780487804879, "grad_norm": 0.19932658970355988, "learning_rate": 1.546747967479675e-05, "loss": 0.045, "step": 55750 }, { "epoch": 0.6799390243902439, "grad_norm": 0.38925448060035706, "learning_rate": 1.5467073170731707e-05, "loss": 0.0437, "step": 55755 }, { "epoch": 0.68, "grad_norm": 0.3491824269294739, "learning_rate": 1.546666666666667e-05, "loss": 0.0584, "step": 55760 }, { "epoch": 0.6800609756097561, "grad_norm": 0.8568703532218933, "learning_rate": 1.5466260162601627e-05, "loss": 0.0974, "step": 55765 }, { "epoch": 0.6801219512195122, "grad_norm": 0.41116246581077576, "learning_rate": 1.546585365853659e-05, "loss": 0.0527, "step": 55770 }, { "epoch": 0.6801829268292683, "grad_norm": 1.3394761085510254, "learning_rate": 1.5465447154471547e-05, "loss": 0.032, "step": 55775 }, { "epoch": 0.6802439024390244, "grad_norm": 0.5011661052703857, "learning_rate": 1.5465040650406505e-05, "loss": 0.0564, "step": 55780 }, { "epoch": 0.6803048780487805, "grad_norm": 0.3359606862068176, "learning_rate": 1.5464634146341463e-05, "loss": 0.0312, "step": 55785 }, { "epoch": 0.6803658536585366, "grad_norm": 0.3722427785396576, "learning_rate": 1.5464227642276424e-05, "loss": 0.0402, "step": 55790 }, { "epoch": 0.6804268292682927, "grad_norm": 0.7832983136177063, "learning_rate": 1.5463821138211382e-05, "loss": 0.0747, "step": 55795 }, { "epoch": 0.6804878048780488, "grad_norm": 0.2924219071865082, "learning_rate": 1.5463414634146344e-05, "loss": 0.0708, "step": 55800 }, { "epoch": 0.6805487804878049, "grad_norm": 0.6071767807006836, "learning_rate": 1.5463008130081302e-05, "loss": 0.0441, "step": 55805 }, { "epoch": 0.680609756097561, "grad_norm": 0.7803589105606079, "learning_rate": 1.546260162601626e-05, "loss": 0.0632, "step": 55810 }, { "epoch": 0.6806707317073171, "grad_norm": 0.6317304372787476, "learning_rate": 1.5462195121951222e-05, "loss": 0.0587, "step": 55815 }, { "epoch": 0.6807317073170732, "grad_norm": 2.3244271278381348, "learning_rate": 1.546178861788618e-05, "loss": 0.0597, "step": 55820 }, { "epoch": 0.6807926829268293, "grad_norm": 0.49421846866607666, "learning_rate": 1.5461382113821138e-05, "loss": 0.0875, "step": 55825 }, { "epoch": 0.6808536585365854, "grad_norm": 0.7084107995033264, "learning_rate": 1.54609756097561e-05, "loss": 0.0534, "step": 55830 }, { "epoch": 0.6809146341463415, "grad_norm": 0.9770923256874084, "learning_rate": 1.5460569105691058e-05, "loss": 0.0524, "step": 55835 }, { "epoch": 0.6809756097560976, "grad_norm": 0.5799130797386169, "learning_rate": 1.5460162601626016e-05, "loss": 0.0345, "step": 55840 }, { "epoch": 0.6810365853658537, "grad_norm": 0.7862533330917358, "learning_rate": 1.5459756097560977e-05, "loss": 0.0687, "step": 55845 }, { "epoch": 0.6810975609756098, "grad_norm": 0.23647861182689667, "learning_rate": 1.5459349593495935e-05, "loss": 0.0561, "step": 55850 }, { "epoch": 0.6811585365853658, "grad_norm": 0.37170425057411194, "learning_rate": 1.5458943089430897e-05, "loss": 0.0374, "step": 55855 }, { "epoch": 0.681219512195122, "grad_norm": 1.7276687622070312, "learning_rate": 1.5458536585365855e-05, "loss": 0.0458, "step": 55860 }, { "epoch": 0.681280487804878, "grad_norm": 0.31795212626457214, "learning_rate": 1.5458130081300817e-05, "loss": 0.0516, "step": 55865 }, { "epoch": 0.6813414634146342, "grad_norm": 1.704053521156311, "learning_rate": 1.545772357723577e-05, "loss": 0.0517, "step": 55870 }, { "epoch": 0.6814024390243902, "grad_norm": 0.5213932394981384, "learning_rate": 1.5457317073170733e-05, "loss": 0.0536, "step": 55875 }, { "epoch": 0.6814634146341464, "grad_norm": 0.9292669296264648, "learning_rate": 1.545691056910569e-05, "loss": 0.0991, "step": 55880 }, { "epoch": 0.6815243902439024, "grad_norm": 0.5774633288383484, "learning_rate": 1.5456504065040652e-05, "loss": 0.0216, "step": 55885 }, { "epoch": 0.6815853658536586, "grad_norm": 0.7058150768280029, "learning_rate": 1.545609756097561e-05, "loss": 0.0618, "step": 55890 }, { "epoch": 0.6816463414634146, "grad_norm": 1.055862307548523, "learning_rate": 1.5455691056910572e-05, "loss": 0.0376, "step": 55895 }, { "epoch": 0.6817073170731708, "grad_norm": 0.5219771862030029, "learning_rate": 1.545528455284553e-05, "loss": 0.0524, "step": 55900 }, { "epoch": 0.6817682926829268, "grad_norm": 0.5147498250007629, "learning_rate": 1.545487804878049e-05, "loss": 0.0553, "step": 55905 }, { "epoch": 0.681829268292683, "grad_norm": 0.4685986638069153, "learning_rate": 1.5454471544715447e-05, "loss": 0.047, "step": 55910 }, { "epoch": 0.681890243902439, "grad_norm": 1.029052495956421, "learning_rate": 1.5454065040650408e-05, "loss": 0.0491, "step": 55915 }, { "epoch": 0.6819512195121952, "grad_norm": 0.4158909320831299, "learning_rate": 1.5453658536585366e-05, "loss": 0.0712, "step": 55920 }, { "epoch": 0.6820121951219512, "grad_norm": 0.8651280403137207, "learning_rate": 1.5453252032520328e-05, "loss": 0.0332, "step": 55925 }, { "epoch": 0.6820731707317074, "grad_norm": 1.003105640411377, "learning_rate": 1.5452845528455286e-05, "loss": 0.0544, "step": 55930 }, { "epoch": 0.6821341463414634, "grad_norm": 0.6553061604499817, "learning_rate": 1.5452439024390244e-05, "loss": 0.0681, "step": 55935 }, { "epoch": 0.6821951219512196, "grad_norm": 0.5561458468437195, "learning_rate": 1.5452032520325205e-05, "loss": 0.0368, "step": 55940 }, { "epoch": 0.6822560975609756, "grad_norm": 0.5777511596679688, "learning_rate": 1.5451626016260164e-05, "loss": 0.0439, "step": 55945 }, { "epoch": 0.6823170731707318, "grad_norm": 0.5938765406608582, "learning_rate": 1.5451219512195125e-05, "loss": 0.0593, "step": 55950 }, { "epoch": 0.6823780487804878, "grad_norm": 3.7108869552612305, "learning_rate": 1.5450813008130083e-05, "loss": 0.0728, "step": 55955 }, { "epoch": 0.682439024390244, "grad_norm": 0.6380319595336914, "learning_rate": 1.545040650406504e-05, "loss": 0.0435, "step": 55960 }, { "epoch": 0.6825, "grad_norm": 0.5378063917160034, "learning_rate": 1.545e-05, "loss": 0.053, "step": 55965 }, { "epoch": 0.6825609756097561, "grad_norm": 0.8562400341033936, "learning_rate": 1.544959349593496e-05, "loss": 0.0706, "step": 55970 }, { "epoch": 0.6826219512195122, "grad_norm": 0.8188322186470032, "learning_rate": 1.544918699186992e-05, "loss": 0.0488, "step": 55975 }, { "epoch": 0.6826829268292683, "grad_norm": 0.8911980986595154, "learning_rate": 1.544878048780488e-05, "loss": 0.0994, "step": 55980 }, { "epoch": 0.6827439024390244, "grad_norm": 0.4342714548110962, "learning_rate": 1.544837398373984e-05, "loss": 0.0561, "step": 55985 }, { "epoch": 0.6828048780487805, "grad_norm": 0.4522043466567993, "learning_rate": 1.5447967479674797e-05, "loss": 0.0394, "step": 55990 }, { "epoch": 0.6828658536585366, "grad_norm": 0.4544353485107422, "learning_rate": 1.5447560975609755e-05, "loss": 0.0433, "step": 55995 }, { "epoch": 0.6829268292682927, "grad_norm": 0.8767626881599426, "learning_rate": 1.5447154471544717e-05, "loss": 0.0432, "step": 56000 }, { "epoch": 0.6829878048780488, "grad_norm": 0.08959826827049255, "learning_rate": 1.5446747967479675e-05, "loss": 0.042, "step": 56005 }, { "epoch": 0.6830487804878049, "grad_norm": 0.5757294297218323, "learning_rate": 1.5446341463414636e-05, "loss": 0.0507, "step": 56010 }, { "epoch": 0.683109756097561, "grad_norm": 0.6502960920333862, "learning_rate": 1.5445934959349594e-05, "loss": 0.0511, "step": 56015 }, { "epoch": 0.6831707317073171, "grad_norm": 0.42206525802612305, "learning_rate": 1.5445528455284552e-05, "loss": 0.0842, "step": 56020 }, { "epoch": 0.6832317073170732, "grad_norm": 0.5376248955726624, "learning_rate": 1.5445121951219514e-05, "loss": 0.0503, "step": 56025 }, { "epoch": 0.6832926829268293, "grad_norm": 1.266400694847107, "learning_rate": 1.5444715447154472e-05, "loss": 0.0708, "step": 56030 }, { "epoch": 0.6833536585365854, "grad_norm": 0.5303771495819092, "learning_rate": 1.5444308943089434e-05, "loss": 0.0612, "step": 56035 }, { "epoch": 0.6834146341463415, "grad_norm": 0.9202718734741211, "learning_rate": 1.5443902439024392e-05, "loss": 0.0749, "step": 56040 }, { "epoch": 0.6834756097560976, "grad_norm": 0.3244510293006897, "learning_rate": 1.5443495934959353e-05, "loss": 0.0508, "step": 56045 }, { "epoch": 0.6835365853658537, "grad_norm": 1.2722092866897583, "learning_rate": 1.5443089430894308e-05, "loss": 0.0657, "step": 56050 }, { "epoch": 0.6835975609756098, "grad_norm": 0.7387871146202087, "learning_rate": 1.544268292682927e-05, "loss": 0.0443, "step": 56055 }, { "epoch": 0.6836585365853659, "grad_norm": 1.0780411958694458, "learning_rate": 1.5442276422764228e-05, "loss": 0.084, "step": 56060 }, { "epoch": 0.683719512195122, "grad_norm": 0.6702929139137268, "learning_rate": 1.544186991869919e-05, "loss": 0.054, "step": 56065 }, { "epoch": 0.6837804878048781, "grad_norm": 0.6100445985794067, "learning_rate": 1.5441463414634147e-05, "loss": 0.0587, "step": 56070 }, { "epoch": 0.6838414634146341, "grad_norm": 0.6396365761756897, "learning_rate": 1.544105691056911e-05, "loss": 0.0526, "step": 56075 }, { "epoch": 0.6839024390243903, "grad_norm": 0.7898017764091492, "learning_rate": 1.5440650406504067e-05, "loss": 0.0423, "step": 56080 }, { "epoch": 0.6839634146341463, "grad_norm": 0.5122058987617493, "learning_rate": 1.5440243902439025e-05, "loss": 0.0591, "step": 56085 }, { "epoch": 0.6840243902439025, "grad_norm": 0.5495998859405518, "learning_rate": 1.5439837398373983e-05, "loss": 0.0684, "step": 56090 }, { "epoch": 0.6840853658536585, "grad_norm": 1.209438443183899, "learning_rate": 1.5439430894308945e-05, "loss": 0.0525, "step": 56095 }, { "epoch": 0.6841463414634147, "grad_norm": 1.1278222799301147, "learning_rate": 1.5439024390243903e-05, "loss": 0.0534, "step": 56100 }, { "epoch": 0.6842073170731707, "grad_norm": 0.399262934923172, "learning_rate": 1.5438617886178864e-05, "loss": 0.0703, "step": 56105 }, { "epoch": 0.6842682926829269, "grad_norm": 0.38690030574798584, "learning_rate": 1.5438211382113822e-05, "loss": 0.0624, "step": 56110 }, { "epoch": 0.6843292682926829, "grad_norm": 1.2506898641586304, "learning_rate": 1.543780487804878e-05, "loss": 0.0594, "step": 56115 }, { "epoch": 0.6843902439024391, "grad_norm": 0.6685858368873596, "learning_rate": 1.5437398373983742e-05, "loss": 0.0328, "step": 56120 }, { "epoch": 0.6844512195121951, "grad_norm": 0.4257426857948303, "learning_rate": 1.54369918699187e-05, "loss": 0.05, "step": 56125 }, { "epoch": 0.6845121951219513, "grad_norm": 0.8018927574157715, "learning_rate": 1.5436585365853662e-05, "loss": 0.0465, "step": 56130 }, { "epoch": 0.6845731707317073, "grad_norm": 0.6653513312339783, "learning_rate": 1.543617886178862e-05, "loss": 0.0559, "step": 56135 }, { "epoch": 0.6846341463414635, "grad_norm": 0.3952728509902954, "learning_rate": 1.5435772357723578e-05, "loss": 0.0396, "step": 56140 }, { "epoch": 0.6846951219512195, "grad_norm": 0.6931394338607788, "learning_rate": 1.5435365853658536e-05, "loss": 0.04, "step": 56145 }, { "epoch": 0.6847560975609757, "grad_norm": 0.6435275077819824, "learning_rate": 1.5434959349593498e-05, "loss": 0.048, "step": 56150 }, { "epoch": 0.6848170731707317, "grad_norm": 0.8038323521614075, "learning_rate": 1.5434552845528456e-05, "loss": 0.0565, "step": 56155 }, { "epoch": 0.6848780487804879, "grad_norm": 0.6400131583213806, "learning_rate": 1.5434146341463417e-05, "loss": 0.0581, "step": 56160 }, { "epoch": 0.6849390243902439, "grad_norm": 0.847627580165863, "learning_rate": 1.5433739837398375e-05, "loss": 0.0441, "step": 56165 }, { "epoch": 0.685, "grad_norm": 0.8237854838371277, "learning_rate": 1.5433333333333334e-05, "loss": 0.0667, "step": 56170 }, { "epoch": 0.6850609756097561, "grad_norm": 0.8035299777984619, "learning_rate": 1.543292682926829e-05, "loss": 0.0436, "step": 56175 }, { "epoch": 0.6851219512195122, "grad_norm": 0.5988188982009888, "learning_rate": 1.5432520325203253e-05, "loss": 0.0547, "step": 56180 }, { "epoch": 0.6851829268292683, "grad_norm": 0.8294949531555176, "learning_rate": 1.543211382113821e-05, "loss": 0.1007, "step": 56185 }, { "epoch": 0.6852439024390244, "grad_norm": 0.435513436794281, "learning_rate": 1.5431707317073173e-05, "loss": 0.0789, "step": 56190 }, { "epoch": 0.6853048780487805, "grad_norm": 0.35848820209503174, "learning_rate": 1.543130081300813e-05, "loss": 0.0463, "step": 56195 }, { "epoch": 0.6853658536585366, "grad_norm": 0.6093842387199402, "learning_rate": 1.543089430894309e-05, "loss": 0.044, "step": 56200 }, { "epoch": 0.6854268292682927, "grad_norm": 0.5814189910888672, "learning_rate": 1.543048780487805e-05, "loss": 0.0455, "step": 56205 }, { "epoch": 0.6854878048780488, "grad_norm": 0.4246880114078522, "learning_rate": 1.543008130081301e-05, "loss": 0.0668, "step": 56210 }, { "epoch": 0.6855487804878049, "grad_norm": 0.7325282096862793, "learning_rate": 1.542967479674797e-05, "loss": 0.0709, "step": 56215 }, { "epoch": 0.685609756097561, "grad_norm": 0.7384207844734192, "learning_rate": 1.542926829268293e-05, "loss": 0.0457, "step": 56220 }, { "epoch": 0.6856707317073171, "grad_norm": 0.6925190091133118, "learning_rate": 1.542886178861789e-05, "loss": 0.0348, "step": 56225 }, { "epoch": 0.6857317073170732, "grad_norm": 0.538969874382019, "learning_rate": 1.5428455284552845e-05, "loss": 0.0673, "step": 56230 }, { "epoch": 0.6857926829268293, "grad_norm": 0.5328741073608398, "learning_rate": 1.5428048780487806e-05, "loss": 0.0701, "step": 56235 }, { "epoch": 0.6858536585365854, "grad_norm": 0.6430550813674927, "learning_rate": 1.5427642276422764e-05, "loss": 0.0503, "step": 56240 }, { "epoch": 0.6859146341463415, "grad_norm": 0.7175551652908325, "learning_rate": 1.5427235772357726e-05, "loss": 0.0528, "step": 56245 }, { "epoch": 0.6859756097560976, "grad_norm": 0.39235493540763855, "learning_rate": 1.5426829268292684e-05, "loss": 0.0714, "step": 56250 }, { "epoch": 0.6860365853658537, "grad_norm": 0.8833501935005188, "learning_rate": 1.5426422764227645e-05, "loss": 0.0653, "step": 56255 }, { "epoch": 0.6860975609756098, "grad_norm": 0.4107014238834381, "learning_rate": 1.54260162601626e-05, "loss": 0.0471, "step": 56260 }, { "epoch": 0.6861585365853659, "grad_norm": 0.5729978084564209, "learning_rate": 1.542560975609756e-05, "loss": 0.0494, "step": 56265 }, { "epoch": 0.686219512195122, "grad_norm": 0.6027621626853943, "learning_rate": 1.542520325203252e-05, "loss": 0.0596, "step": 56270 }, { "epoch": 0.686280487804878, "grad_norm": 0.45529472827911377, "learning_rate": 1.542479674796748e-05, "loss": 0.0552, "step": 56275 }, { "epoch": 0.6863414634146342, "grad_norm": 0.6716496348381042, "learning_rate": 1.542439024390244e-05, "loss": 0.0456, "step": 56280 }, { "epoch": 0.6864024390243902, "grad_norm": 0.3098636269569397, "learning_rate": 1.54239837398374e-05, "loss": 0.0553, "step": 56285 }, { "epoch": 0.6864634146341464, "grad_norm": 0.2602708041667938, "learning_rate": 1.542357723577236e-05, "loss": 0.069, "step": 56290 }, { "epoch": 0.6865243902439024, "grad_norm": 0.643634021282196, "learning_rate": 1.5423170731707317e-05, "loss": 0.0607, "step": 56295 }, { "epoch": 0.6865853658536586, "grad_norm": 0.7615916728973389, "learning_rate": 1.542276422764228e-05, "loss": 0.0744, "step": 56300 }, { "epoch": 0.6866463414634146, "grad_norm": 0.6890140175819397, "learning_rate": 1.5422357723577237e-05, "loss": 0.0785, "step": 56305 }, { "epoch": 0.6867073170731708, "grad_norm": 0.7050434350967407, "learning_rate": 1.54219512195122e-05, "loss": 0.0735, "step": 56310 }, { "epoch": 0.6867682926829268, "grad_norm": 0.4993581175804138, "learning_rate": 1.5421544715447156e-05, "loss": 0.0384, "step": 56315 }, { "epoch": 0.686829268292683, "grad_norm": 0.4525268077850342, "learning_rate": 1.5421138211382115e-05, "loss": 0.0475, "step": 56320 }, { "epoch": 0.686890243902439, "grad_norm": 0.6198607087135315, "learning_rate": 1.5420731707317073e-05, "loss": 0.0576, "step": 56325 }, { "epoch": 0.6869512195121952, "grad_norm": 0.48589053750038147, "learning_rate": 1.5420325203252034e-05, "loss": 0.0577, "step": 56330 }, { "epoch": 0.6870121951219512, "grad_norm": 0.6833314895629883, "learning_rate": 1.5419918699186992e-05, "loss": 0.0513, "step": 56335 }, { "epoch": 0.6870731707317074, "grad_norm": 0.33875784277915955, "learning_rate": 1.5419512195121954e-05, "loss": 0.0396, "step": 56340 }, { "epoch": 0.6871341463414634, "grad_norm": 0.3349759578704834, "learning_rate": 1.5419105691056912e-05, "loss": 0.0588, "step": 56345 }, { "epoch": 0.6871951219512196, "grad_norm": 0.5719079375267029, "learning_rate": 1.541869918699187e-05, "loss": 0.0458, "step": 56350 }, { "epoch": 0.6872560975609756, "grad_norm": 1.2695825099945068, "learning_rate": 1.5418292682926828e-05, "loss": 0.0505, "step": 56355 }, { "epoch": 0.6873170731707318, "grad_norm": 0.43248602747917175, "learning_rate": 1.541788617886179e-05, "loss": 0.0361, "step": 56360 }, { "epoch": 0.6873780487804878, "grad_norm": 1.6481397151947021, "learning_rate": 1.5417479674796748e-05, "loss": 0.0672, "step": 56365 }, { "epoch": 0.687439024390244, "grad_norm": 0.6847885251045227, "learning_rate": 1.541707317073171e-05, "loss": 0.0474, "step": 56370 }, { "epoch": 0.6875, "grad_norm": 0.8657257556915283, "learning_rate": 1.5416666666666668e-05, "loss": 0.0537, "step": 56375 }, { "epoch": 0.687560975609756, "grad_norm": 0.6666794419288635, "learning_rate": 1.541626016260163e-05, "loss": 0.0473, "step": 56380 }, { "epoch": 0.6876219512195122, "grad_norm": 0.57561194896698, "learning_rate": 1.5415853658536587e-05, "loss": 0.0612, "step": 56385 }, { "epoch": 0.6876829268292682, "grad_norm": 0.4949016273021698, "learning_rate": 1.5415447154471545e-05, "loss": 0.0638, "step": 56390 }, { "epoch": 0.6877439024390244, "grad_norm": 0.7599443197250366, "learning_rate": 1.5415040650406507e-05, "loss": 0.06, "step": 56395 }, { "epoch": 0.6878048780487804, "grad_norm": 0.6582603454589844, "learning_rate": 1.5414634146341465e-05, "loss": 0.0312, "step": 56400 }, { "epoch": 0.6878658536585366, "grad_norm": 0.32472822070121765, "learning_rate": 1.5414227642276423e-05, "loss": 0.0378, "step": 56405 }, { "epoch": 0.6879268292682926, "grad_norm": 0.4515957534313202, "learning_rate": 1.5413821138211385e-05, "loss": 0.0407, "step": 56410 }, { "epoch": 0.6879878048780488, "grad_norm": 0.6209232211112976, "learning_rate": 1.5413414634146343e-05, "loss": 0.0539, "step": 56415 }, { "epoch": 0.6880487804878048, "grad_norm": 0.39058607816696167, "learning_rate": 1.54130081300813e-05, "loss": 0.0454, "step": 56420 }, { "epoch": 0.688109756097561, "grad_norm": 0.8377881646156311, "learning_rate": 1.5412601626016262e-05, "loss": 0.0561, "step": 56425 }, { "epoch": 0.688170731707317, "grad_norm": 0.4267892837524414, "learning_rate": 1.541219512195122e-05, "loss": 0.0472, "step": 56430 }, { "epoch": 0.6882317073170732, "grad_norm": 0.42251843214035034, "learning_rate": 1.5411788617886182e-05, "loss": 0.0702, "step": 56435 }, { "epoch": 0.6882926829268292, "grad_norm": 0.5918501019477844, "learning_rate": 1.541138211382114e-05, "loss": 0.0552, "step": 56440 }, { "epoch": 0.6883536585365854, "grad_norm": 0.5415797233581543, "learning_rate": 1.5410975609756098e-05, "loss": 0.0417, "step": 56445 }, { "epoch": 0.6884146341463414, "grad_norm": 0.8598237633705139, "learning_rate": 1.5410569105691056e-05, "loss": 0.0457, "step": 56450 }, { "epoch": 0.6884756097560976, "grad_norm": 1.2561246156692505, "learning_rate": 1.5410162601626018e-05, "loss": 0.0851, "step": 56455 }, { "epoch": 0.6885365853658536, "grad_norm": 0.611246645450592, "learning_rate": 1.5409756097560976e-05, "loss": 0.0257, "step": 56460 }, { "epoch": 0.6885975609756098, "grad_norm": 0.32878342270851135, "learning_rate": 1.5409349593495938e-05, "loss": 0.0411, "step": 56465 }, { "epoch": 0.6886585365853658, "grad_norm": 1.6364083290100098, "learning_rate": 1.5408943089430896e-05, "loss": 0.0508, "step": 56470 }, { "epoch": 0.688719512195122, "grad_norm": 0.5755699276924133, "learning_rate": 1.5408536585365854e-05, "loss": 0.0462, "step": 56475 }, { "epoch": 0.688780487804878, "grad_norm": 2.1641736030578613, "learning_rate": 1.5408130081300815e-05, "loss": 0.0489, "step": 56480 }, { "epoch": 0.6888414634146341, "grad_norm": 2.0019822120666504, "learning_rate": 1.5407723577235773e-05, "loss": 0.0683, "step": 56485 }, { "epoch": 0.6889024390243902, "grad_norm": 0.5862534642219543, "learning_rate": 1.5407317073170735e-05, "loss": 0.0426, "step": 56490 }, { "epoch": 0.6889634146341463, "grad_norm": 0.5714043378829956, "learning_rate": 1.5406910569105693e-05, "loss": 0.0481, "step": 56495 }, { "epoch": 0.6890243902439024, "grad_norm": 0.5210985541343689, "learning_rate": 1.540650406504065e-05, "loss": 0.0575, "step": 56500 }, { "epoch": 0.6890853658536585, "grad_norm": 0.44759896397590637, "learning_rate": 1.540609756097561e-05, "loss": 0.0463, "step": 56505 }, { "epoch": 0.6891463414634146, "grad_norm": 0.5852794051170349, "learning_rate": 1.540569105691057e-05, "loss": 0.0889, "step": 56510 }, { "epoch": 0.6892073170731707, "grad_norm": 0.7417523264884949, "learning_rate": 1.540528455284553e-05, "loss": 0.0516, "step": 56515 }, { "epoch": 0.6892682926829268, "grad_norm": 0.2640395760536194, "learning_rate": 1.540487804878049e-05, "loss": 0.0355, "step": 56520 }, { "epoch": 0.6893292682926829, "grad_norm": 0.44242802262306213, "learning_rate": 1.540447154471545e-05, "loss": 0.067, "step": 56525 }, { "epoch": 0.689390243902439, "grad_norm": 0.46406883001327515, "learning_rate": 1.540406504065041e-05, "loss": 0.0463, "step": 56530 }, { "epoch": 0.6894512195121951, "grad_norm": 0.6541008353233337, "learning_rate": 1.5403658536585365e-05, "loss": 0.0366, "step": 56535 }, { "epoch": 0.6895121951219512, "grad_norm": 0.4794606566429138, "learning_rate": 1.5403252032520326e-05, "loss": 0.0642, "step": 56540 }, { "epoch": 0.6895731707317073, "grad_norm": 0.39643222093582153, "learning_rate": 1.5402845528455285e-05, "loss": 0.0632, "step": 56545 }, { "epoch": 0.6896341463414634, "grad_norm": 3.1653287410736084, "learning_rate": 1.5402439024390246e-05, "loss": 0.0408, "step": 56550 }, { "epoch": 0.6896951219512195, "grad_norm": 0.7721990942955017, "learning_rate": 1.5402032520325204e-05, "loss": 0.0501, "step": 56555 }, { "epoch": 0.6897560975609756, "grad_norm": 0.5926294326782227, "learning_rate": 1.5401626016260166e-05, "loss": 0.0997, "step": 56560 }, { "epoch": 0.6898170731707317, "grad_norm": 0.4718305468559265, "learning_rate": 1.5401219512195124e-05, "loss": 0.0444, "step": 56565 }, { "epoch": 0.6898780487804878, "grad_norm": 0.20435559749603271, "learning_rate": 1.5400813008130082e-05, "loss": 0.0436, "step": 56570 }, { "epoch": 0.6899390243902439, "grad_norm": 0.8224514126777649, "learning_rate": 1.5400406504065044e-05, "loss": 0.0599, "step": 56575 }, { "epoch": 0.69, "grad_norm": 0.24634389579296112, "learning_rate": 1.54e-05, "loss": 0.0983, "step": 56580 }, { "epoch": 0.6900609756097561, "grad_norm": 0.4070776700973511, "learning_rate": 1.539959349593496e-05, "loss": 0.0297, "step": 56585 }, { "epoch": 0.6901219512195121, "grad_norm": 1.3336408138275146, "learning_rate": 1.539918699186992e-05, "loss": 0.0457, "step": 56590 }, { "epoch": 0.6901829268292683, "grad_norm": 1.75282621383667, "learning_rate": 1.539878048780488e-05, "loss": 0.0767, "step": 56595 }, { "epoch": 0.6902439024390243, "grad_norm": 0.9369651675224304, "learning_rate": 1.5398373983739838e-05, "loss": 0.0477, "step": 56600 }, { "epoch": 0.6903048780487805, "grad_norm": 0.6201918721199036, "learning_rate": 1.53979674796748e-05, "loss": 0.0524, "step": 56605 }, { "epoch": 0.6903658536585365, "grad_norm": 0.7673997282981873, "learning_rate": 1.5397560975609757e-05, "loss": 0.032, "step": 56610 }, { "epoch": 0.6904268292682927, "grad_norm": 1.7335675954818726, "learning_rate": 1.539715447154472e-05, "loss": 0.0605, "step": 56615 }, { "epoch": 0.6904878048780487, "grad_norm": 0.4860728979110718, "learning_rate": 1.5396747967479677e-05, "loss": 0.0391, "step": 56620 }, { "epoch": 0.6905487804878049, "grad_norm": 0.555911123752594, "learning_rate": 1.5396341463414635e-05, "loss": 0.0496, "step": 56625 }, { "epoch": 0.6906097560975609, "grad_norm": 2.274428129196167, "learning_rate": 1.5395934959349593e-05, "loss": 0.0734, "step": 56630 }, { "epoch": 0.6906707317073171, "grad_norm": 0.7730064988136292, "learning_rate": 1.5395528455284555e-05, "loss": 0.0421, "step": 56635 }, { "epoch": 0.6907317073170731, "grad_norm": 0.7666866779327393, "learning_rate": 1.5395121951219513e-05, "loss": 0.0596, "step": 56640 }, { "epoch": 0.6907926829268293, "grad_norm": 0.5223142504692078, "learning_rate": 1.5394715447154474e-05, "loss": 0.0369, "step": 56645 }, { "epoch": 0.6908536585365853, "grad_norm": 0.45303767919540405, "learning_rate": 1.5394308943089432e-05, "loss": 0.0372, "step": 56650 }, { "epoch": 0.6909146341463415, "grad_norm": 0.3748726546764374, "learning_rate": 1.539390243902439e-05, "loss": 0.081, "step": 56655 }, { "epoch": 0.6909756097560975, "grad_norm": 0.5360308289527893, "learning_rate": 1.5393495934959352e-05, "loss": 0.0336, "step": 56660 }, { "epoch": 0.6910365853658537, "grad_norm": 0.7839644551277161, "learning_rate": 1.539308943089431e-05, "loss": 0.0609, "step": 56665 }, { "epoch": 0.6910975609756097, "grad_norm": 0.3598841428756714, "learning_rate": 1.5392682926829268e-05, "loss": 0.064, "step": 56670 }, { "epoch": 0.6911585365853659, "grad_norm": 1.8380519151687622, "learning_rate": 1.539227642276423e-05, "loss": 0.0907, "step": 56675 }, { "epoch": 0.6912195121951219, "grad_norm": 0.8694721460342407, "learning_rate": 1.5391869918699188e-05, "loss": 0.0362, "step": 56680 }, { "epoch": 0.691280487804878, "grad_norm": 0.6189211010932922, "learning_rate": 1.5391463414634146e-05, "loss": 0.0443, "step": 56685 }, { "epoch": 0.6913414634146341, "grad_norm": 0.9538939595222473, "learning_rate": 1.5391056910569108e-05, "loss": 0.047, "step": 56690 }, { "epoch": 0.6914024390243902, "grad_norm": 0.5804983377456665, "learning_rate": 1.5390650406504066e-05, "loss": 0.0518, "step": 56695 }, { "epoch": 0.6914634146341463, "grad_norm": 0.6388075351715088, "learning_rate": 1.5390243902439027e-05, "loss": 0.0658, "step": 56700 }, { "epoch": 0.6915243902439024, "grad_norm": 0.6172615885734558, "learning_rate": 1.5389837398373985e-05, "loss": 0.0392, "step": 56705 }, { "epoch": 0.6915853658536585, "grad_norm": 0.6420317888259888, "learning_rate": 1.5389430894308947e-05, "loss": 0.093, "step": 56710 }, { "epoch": 0.6916463414634146, "grad_norm": 0.5592117309570312, "learning_rate": 1.53890243902439e-05, "loss": 0.0908, "step": 56715 }, { "epoch": 0.6917073170731707, "grad_norm": 0.42437341809272766, "learning_rate": 1.5388617886178863e-05, "loss": 0.0598, "step": 56720 }, { "epoch": 0.6917682926829268, "grad_norm": 0.7228121161460876, "learning_rate": 1.538821138211382e-05, "loss": 0.038, "step": 56725 }, { "epoch": 0.6918292682926829, "grad_norm": 0.629754364490509, "learning_rate": 1.5387804878048783e-05, "loss": 0.0431, "step": 56730 }, { "epoch": 0.691890243902439, "grad_norm": 0.760002851486206, "learning_rate": 1.538739837398374e-05, "loss": 0.0927, "step": 56735 }, { "epoch": 0.6919512195121951, "grad_norm": 0.2731725871562958, "learning_rate": 1.5386991869918702e-05, "loss": 0.0466, "step": 56740 }, { "epoch": 0.6920121951219512, "grad_norm": 0.7686340808868408, "learning_rate": 1.538658536585366e-05, "loss": 0.044, "step": 56745 }, { "epoch": 0.6920731707317073, "grad_norm": 0.8002969622612, "learning_rate": 1.538617886178862e-05, "loss": 0.0668, "step": 56750 }, { "epoch": 0.6921341463414634, "grad_norm": 9.436389923095703, "learning_rate": 1.538577235772358e-05, "loss": 0.0684, "step": 56755 }, { "epoch": 0.6921951219512195, "grad_norm": 0.6843568682670593, "learning_rate": 1.5385365853658538e-05, "loss": 0.0632, "step": 56760 }, { "epoch": 0.6922560975609756, "grad_norm": 0.40205198526382446, "learning_rate": 1.5384959349593496e-05, "loss": 0.0394, "step": 56765 }, { "epoch": 0.6923170731707317, "grad_norm": 0.7503101825714111, "learning_rate": 1.5384552845528458e-05, "loss": 0.0454, "step": 56770 }, { "epoch": 0.6923780487804878, "grad_norm": 0.7293717861175537, "learning_rate": 1.5384146341463416e-05, "loss": 0.071, "step": 56775 }, { "epoch": 0.6924390243902439, "grad_norm": 0.4751601219177246, "learning_rate": 1.5383739837398374e-05, "loss": 0.0214, "step": 56780 }, { "epoch": 0.6925, "grad_norm": 0.3715428113937378, "learning_rate": 1.5383333333333336e-05, "loss": 0.0337, "step": 56785 }, { "epoch": 0.692560975609756, "grad_norm": 0.38800254464149475, "learning_rate": 1.5382926829268294e-05, "loss": 0.0321, "step": 56790 }, { "epoch": 0.6926219512195122, "grad_norm": 0.7946329712867737, "learning_rate": 1.5382520325203255e-05, "loss": 0.051, "step": 56795 }, { "epoch": 0.6926829268292682, "grad_norm": 0.6110531091690063, "learning_rate": 1.5382113821138213e-05, "loss": 0.0312, "step": 56800 }, { "epoch": 0.6927439024390244, "grad_norm": 0.4169575870037079, "learning_rate": 1.538170731707317e-05, "loss": 0.0587, "step": 56805 }, { "epoch": 0.6928048780487804, "grad_norm": 1.0102862119674683, "learning_rate": 1.538130081300813e-05, "loss": 0.0383, "step": 56810 }, { "epoch": 0.6928658536585366, "grad_norm": 0.7169650197029114, "learning_rate": 1.538089430894309e-05, "loss": 0.0459, "step": 56815 }, { "epoch": 0.6929268292682926, "grad_norm": 0.469190776348114, "learning_rate": 1.538048780487805e-05, "loss": 0.0441, "step": 56820 }, { "epoch": 0.6929878048780488, "grad_norm": 0.6216407418251038, "learning_rate": 1.538008130081301e-05, "loss": 0.0284, "step": 56825 }, { "epoch": 0.6930487804878048, "grad_norm": 0.6835328936576843, "learning_rate": 1.537967479674797e-05, "loss": 0.054, "step": 56830 }, { "epoch": 0.693109756097561, "grad_norm": 1.8134021759033203, "learning_rate": 1.5379268292682927e-05, "loss": 0.0642, "step": 56835 }, { "epoch": 0.693170731707317, "grad_norm": 0.48587653040885925, "learning_rate": 1.537886178861789e-05, "loss": 0.0486, "step": 56840 }, { "epoch": 0.6932317073170732, "grad_norm": 0.344858318567276, "learning_rate": 1.5378455284552847e-05, "loss": 0.0503, "step": 56845 }, { "epoch": 0.6932926829268292, "grad_norm": 0.4782141447067261, "learning_rate": 1.5378048780487805e-05, "loss": 0.0684, "step": 56850 }, { "epoch": 0.6933536585365854, "grad_norm": 0.49139833450317383, "learning_rate": 1.5377642276422766e-05, "loss": 0.0417, "step": 56855 }, { "epoch": 0.6934146341463414, "grad_norm": 0.7079091668128967, "learning_rate": 1.5377235772357725e-05, "loss": 0.0736, "step": 56860 }, { "epoch": 0.6934756097560976, "grad_norm": 0.40599605441093445, "learning_rate": 1.5376829268292683e-05, "loss": 0.032, "step": 56865 }, { "epoch": 0.6935365853658536, "grad_norm": 0.7443686127662659, "learning_rate": 1.5376422764227644e-05, "loss": 0.0693, "step": 56870 }, { "epoch": 0.6935975609756098, "grad_norm": 1.5950138568878174, "learning_rate": 1.5376016260162602e-05, "loss": 0.0768, "step": 56875 }, { "epoch": 0.6936585365853658, "grad_norm": 1.0449588298797607, "learning_rate": 1.5375609756097564e-05, "loss": 0.0492, "step": 56880 }, { "epoch": 0.693719512195122, "grad_norm": 0.6394138932228088, "learning_rate": 1.5375203252032522e-05, "loss": 0.0503, "step": 56885 }, { "epoch": 0.693780487804878, "grad_norm": 0.3172077238559723, "learning_rate": 1.5374796747967483e-05, "loss": 0.0425, "step": 56890 }, { "epoch": 0.6938414634146342, "grad_norm": 1.2951451539993286, "learning_rate": 1.5374390243902438e-05, "loss": 0.066, "step": 56895 }, { "epoch": 0.6939024390243902, "grad_norm": 0.49982577562332153, "learning_rate": 1.53739837398374e-05, "loss": 0.0367, "step": 56900 }, { "epoch": 0.6939634146341463, "grad_norm": 0.6755853295326233, "learning_rate": 1.5373577235772358e-05, "loss": 0.0539, "step": 56905 }, { "epoch": 0.6940243902439024, "grad_norm": 0.7666548490524292, "learning_rate": 1.537317073170732e-05, "loss": 0.0438, "step": 56910 }, { "epoch": 0.6940853658536585, "grad_norm": 0.5570253729820251, "learning_rate": 1.5372764227642278e-05, "loss": 0.0574, "step": 56915 }, { "epoch": 0.6941463414634146, "grad_norm": 0.37315869331359863, "learning_rate": 1.537235772357724e-05, "loss": 0.0682, "step": 56920 }, { "epoch": 0.6942073170731707, "grad_norm": 0.6100192070007324, "learning_rate": 1.5371951219512197e-05, "loss": 0.036, "step": 56925 }, { "epoch": 0.6942682926829268, "grad_norm": 0.3895310163497925, "learning_rate": 1.5371544715447155e-05, "loss": 0.0409, "step": 56930 }, { "epoch": 0.6943292682926829, "grad_norm": 0.5161440372467041, "learning_rate": 1.5371138211382113e-05, "loss": 0.0543, "step": 56935 }, { "epoch": 0.694390243902439, "grad_norm": 1.055464506149292, "learning_rate": 1.5370731707317075e-05, "loss": 0.0645, "step": 56940 }, { "epoch": 0.6944512195121951, "grad_norm": 1.4282402992248535, "learning_rate": 1.5370325203252033e-05, "loss": 0.0533, "step": 56945 }, { "epoch": 0.6945121951219512, "grad_norm": 0.7939806580543518, "learning_rate": 1.5369918699186995e-05, "loss": 0.0464, "step": 56950 }, { "epoch": 0.6945731707317073, "grad_norm": 0.6300477981567383, "learning_rate": 1.5369512195121953e-05, "loss": 0.0386, "step": 56955 }, { "epoch": 0.6946341463414634, "grad_norm": 0.5496143698692322, "learning_rate": 1.536910569105691e-05, "loss": 0.0643, "step": 56960 }, { "epoch": 0.6946951219512195, "grad_norm": 0.7665300369262695, "learning_rate": 1.5368699186991872e-05, "loss": 0.0609, "step": 56965 }, { "epoch": 0.6947560975609756, "grad_norm": 0.9505210518836975, "learning_rate": 1.536829268292683e-05, "loss": 0.0434, "step": 56970 }, { "epoch": 0.6948170731707317, "grad_norm": 0.7724857926368713, "learning_rate": 1.5367886178861792e-05, "loss": 0.0533, "step": 56975 }, { "epoch": 0.6948780487804878, "grad_norm": 0.39097273349761963, "learning_rate": 1.536747967479675e-05, "loss": 0.0683, "step": 56980 }, { "epoch": 0.6949390243902439, "grad_norm": 0.7567038536071777, "learning_rate": 1.5367073170731708e-05, "loss": 0.0465, "step": 56985 }, { "epoch": 0.695, "grad_norm": 0.5335462689399719, "learning_rate": 1.5366666666666666e-05, "loss": 0.0533, "step": 56990 }, { "epoch": 0.6950609756097561, "grad_norm": 0.2890171706676483, "learning_rate": 1.5366260162601628e-05, "loss": 0.0376, "step": 56995 }, { "epoch": 0.6951219512195121, "grad_norm": 0.5833016037940979, "learning_rate": 1.5365853658536586e-05, "loss": 0.0415, "step": 57000 }, { "epoch": 0.6951829268292683, "grad_norm": 0.5153684616088867, "learning_rate": 1.5365447154471548e-05, "loss": 0.0551, "step": 57005 }, { "epoch": 0.6952439024390243, "grad_norm": 0.32924285531044006, "learning_rate": 1.5365040650406506e-05, "loss": 0.0436, "step": 57010 }, { "epoch": 0.6953048780487805, "grad_norm": 0.6932429075241089, "learning_rate": 1.5364634146341464e-05, "loss": 0.0479, "step": 57015 }, { "epoch": 0.6953658536585365, "grad_norm": 0.6479541063308716, "learning_rate": 1.5364227642276425e-05, "loss": 0.0739, "step": 57020 }, { "epoch": 0.6954268292682927, "grad_norm": 0.4516626298427582, "learning_rate": 1.5363821138211383e-05, "loss": 0.0592, "step": 57025 }, { "epoch": 0.6954878048780487, "grad_norm": 0.353373259305954, "learning_rate": 1.536341463414634e-05, "loss": 0.0469, "step": 57030 }, { "epoch": 0.6955487804878049, "grad_norm": 0.4651568531990051, "learning_rate": 1.5363008130081303e-05, "loss": 0.0536, "step": 57035 }, { "epoch": 0.6956097560975609, "grad_norm": 0.897962212562561, "learning_rate": 1.536260162601626e-05, "loss": 0.0612, "step": 57040 }, { "epoch": 0.6956707317073171, "grad_norm": 0.9332265257835388, "learning_rate": 1.536219512195122e-05, "loss": 0.0384, "step": 57045 }, { "epoch": 0.6957317073170731, "grad_norm": 1.0151790380477905, "learning_rate": 1.536178861788618e-05, "loss": 0.0708, "step": 57050 }, { "epoch": 0.6957926829268293, "grad_norm": 0.3846840262413025, "learning_rate": 1.536138211382114e-05, "loss": 0.0333, "step": 57055 }, { "epoch": 0.6958536585365853, "grad_norm": 0.5112980604171753, "learning_rate": 1.53609756097561e-05, "loss": 0.0765, "step": 57060 }, { "epoch": 0.6959146341463415, "grad_norm": 0.6592729091644287, "learning_rate": 1.536056910569106e-05, "loss": 0.0604, "step": 57065 }, { "epoch": 0.6959756097560975, "grad_norm": 0.4460526704788208, "learning_rate": 1.536016260162602e-05, "loss": 0.0654, "step": 57070 }, { "epoch": 0.6960365853658537, "grad_norm": 0.695631206035614, "learning_rate": 1.5359756097560975e-05, "loss": 0.067, "step": 57075 }, { "epoch": 0.6960975609756097, "grad_norm": 0.7724176049232483, "learning_rate": 1.5359349593495936e-05, "loss": 0.0562, "step": 57080 }, { "epoch": 0.6961585365853659, "grad_norm": 0.9255682826042175, "learning_rate": 1.5358943089430895e-05, "loss": 0.03, "step": 57085 }, { "epoch": 0.6962195121951219, "grad_norm": 0.4414721131324768, "learning_rate": 1.5358536585365856e-05, "loss": 0.0492, "step": 57090 }, { "epoch": 0.6962804878048781, "grad_norm": 0.2537657618522644, "learning_rate": 1.5358130081300814e-05, "loss": 0.0449, "step": 57095 }, { "epoch": 0.6963414634146341, "grad_norm": 0.5944857597351074, "learning_rate": 1.5357723577235776e-05, "loss": 0.0928, "step": 57100 }, { "epoch": 0.6964024390243903, "grad_norm": 1.3592627048492432, "learning_rate": 1.5357317073170734e-05, "loss": 0.0558, "step": 57105 }, { "epoch": 0.6964634146341463, "grad_norm": 0.8143442273139954, "learning_rate": 1.5356910569105692e-05, "loss": 0.0645, "step": 57110 }, { "epoch": 0.6965243902439024, "grad_norm": 0.5215885043144226, "learning_rate": 1.535650406504065e-05, "loss": 0.0563, "step": 57115 }, { "epoch": 0.6965853658536585, "grad_norm": 2.291883707046509, "learning_rate": 1.535609756097561e-05, "loss": 0.0518, "step": 57120 }, { "epoch": 0.6966463414634146, "grad_norm": 0.4649859070777893, "learning_rate": 1.535569105691057e-05, "loss": 0.0525, "step": 57125 }, { "epoch": 0.6967073170731707, "grad_norm": 0.8220220804214478, "learning_rate": 1.535528455284553e-05, "loss": 0.0874, "step": 57130 }, { "epoch": 0.6967682926829268, "grad_norm": 0.3040795624256134, "learning_rate": 1.535487804878049e-05, "loss": 0.0695, "step": 57135 }, { "epoch": 0.6968292682926829, "grad_norm": 0.6100425124168396, "learning_rate": 1.5354471544715447e-05, "loss": 0.1055, "step": 57140 }, { "epoch": 0.696890243902439, "grad_norm": 0.9800770878791809, "learning_rate": 1.535406504065041e-05, "loss": 0.0543, "step": 57145 }, { "epoch": 0.6969512195121951, "grad_norm": 0.4311114251613617, "learning_rate": 1.5353658536585367e-05, "loss": 0.0328, "step": 57150 }, { "epoch": 0.6970121951219512, "grad_norm": 0.6237263083457947, "learning_rate": 1.535325203252033e-05, "loss": 0.0442, "step": 57155 }, { "epoch": 0.6970731707317073, "grad_norm": 0.5237985253334045, "learning_rate": 1.5352845528455287e-05, "loss": 0.0702, "step": 57160 }, { "epoch": 0.6971341463414634, "grad_norm": 1.759078025817871, "learning_rate": 1.5352439024390245e-05, "loss": 0.045, "step": 57165 }, { "epoch": 0.6971951219512195, "grad_norm": 0.59012371301651, "learning_rate": 1.5352032520325203e-05, "loss": 0.0501, "step": 57170 }, { "epoch": 0.6972560975609756, "grad_norm": 1.7377746105194092, "learning_rate": 1.5351626016260165e-05, "loss": 0.0379, "step": 57175 }, { "epoch": 0.6973170731707317, "grad_norm": 0.5845605134963989, "learning_rate": 1.5351219512195123e-05, "loss": 0.0455, "step": 57180 }, { "epoch": 0.6973780487804878, "grad_norm": 1.2437489032745361, "learning_rate": 1.5350813008130084e-05, "loss": 0.0665, "step": 57185 }, { "epoch": 0.6974390243902439, "grad_norm": 0.522064745426178, "learning_rate": 1.5350406504065042e-05, "loss": 0.0484, "step": 57190 }, { "epoch": 0.6975, "grad_norm": 0.5626155138015747, "learning_rate": 1.535e-05, "loss": 0.0397, "step": 57195 }, { "epoch": 0.697560975609756, "grad_norm": 0.8956312537193298, "learning_rate": 1.534959349593496e-05, "loss": 0.0573, "step": 57200 }, { "epoch": 0.6976219512195122, "grad_norm": 0.5329025387763977, "learning_rate": 1.534918699186992e-05, "loss": 0.0401, "step": 57205 }, { "epoch": 0.6976829268292682, "grad_norm": 0.5087875723838806, "learning_rate": 1.5348780487804878e-05, "loss": 0.0331, "step": 57210 }, { "epoch": 0.6977439024390244, "grad_norm": 0.5063304305076599, "learning_rate": 1.534837398373984e-05, "loss": 0.0407, "step": 57215 }, { "epoch": 0.6978048780487804, "grad_norm": 0.19030235707759857, "learning_rate": 1.5347967479674798e-05, "loss": 0.0231, "step": 57220 }, { "epoch": 0.6978658536585366, "grad_norm": 0.6884145736694336, "learning_rate": 1.5347560975609756e-05, "loss": 0.0426, "step": 57225 }, { "epoch": 0.6979268292682926, "grad_norm": 0.4307136535644531, "learning_rate": 1.5347154471544717e-05, "loss": 0.0637, "step": 57230 }, { "epoch": 0.6979878048780488, "grad_norm": 0.7514874339103699, "learning_rate": 1.5346747967479676e-05, "loss": 0.0662, "step": 57235 }, { "epoch": 0.6980487804878048, "grad_norm": 0.42368003726005554, "learning_rate": 1.5346341463414637e-05, "loss": 0.0439, "step": 57240 }, { "epoch": 0.698109756097561, "grad_norm": 0.5286861658096313, "learning_rate": 1.5345934959349595e-05, "loss": 0.0669, "step": 57245 }, { "epoch": 0.698170731707317, "grad_norm": 0.41141530871391296, "learning_rate": 1.5345528455284557e-05, "loss": 0.0563, "step": 57250 }, { "epoch": 0.6982317073170732, "grad_norm": 0.20495487749576569, "learning_rate": 1.534512195121951e-05, "loss": 0.0468, "step": 57255 }, { "epoch": 0.6982926829268292, "grad_norm": 0.8627120852470398, "learning_rate": 1.5344715447154473e-05, "loss": 0.0554, "step": 57260 }, { "epoch": 0.6983536585365854, "grad_norm": 0.7135831117630005, "learning_rate": 1.534430894308943e-05, "loss": 0.042, "step": 57265 }, { "epoch": 0.6984146341463414, "grad_norm": 0.4468061327934265, "learning_rate": 1.5343902439024393e-05, "loss": 0.0262, "step": 57270 }, { "epoch": 0.6984756097560976, "grad_norm": 0.6122196316719055, "learning_rate": 1.534349593495935e-05, "loss": 0.0606, "step": 57275 }, { "epoch": 0.6985365853658536, "grad_norm": 0.5721719264984131, "learning_rate": 1.5343089430894312e-05, "loss": 0.077, "step": 57280 }, { "epoch": 0.6985975609756098, "grad_norm": 0.4860312044620514, "learning_rate": 1.534268292682927e-05, "loss": 0.0485, "step": 57285 }, { "epoch": 0.6986585365853658, "grad_norm": 0.5666173100471497, "learning_rate": 1.534227642276423e-05, "loss": 0.0571, "step": 57290 }, { "epoch": 0.698719512195122, "grad_norm": 0.495434433221817, "learning_rate": 1.5341869918699187e-05, "loss": 0.0368, "step": 57295 }, { "epoch": 0.698780487804878, "grad_norm": 1.4000282287597656, "learning_rate": 1.5341463414634148e-05, "loss": 0.032, "step": 57300 }, { "epoch": 0.6988414634146342, "grad_norm": 0.47277581691741943, "learning_rate": 1.5341056910569106e-05, "loss": 0.0367, "step": 57305 }, { "epoch": 0.6989024390243902, "grad_norm": 1.031412959098816, "learning_rate": 1.5340650406504068e-05, "loss": 0.0315, "step": 57310 }, { "epoch": 0.6989634146341464, "grad_norm": 0.5494604706764221, "learning_rate": 1.5340243902439026e-05, "loss": 0.0479, "step": 57315 }, { "epoch": 0.6990243902439024, "grad_norm": 0.5336493253707886, "learning_rate": 1.5339837398373984e-05, "loss": 0.0407, "step": 57320 }, { "epoch": 0.6990853658536585, "grad_norm": 0.46453115344047546, "learning_rate": 1.5339430894308946e-05, "loss": 0.0419, "step": 57325 }, { "epoch": 0.6991463414634146, "grad_norm": 0.9025481939315796, "learning_rate": 1.5339024390243904e-05, "loss": 0.046, "step": 57330 }, { "epoch": 0.6992073170731707, "grad_norm": 0.542140543460846, "learning_rate": 1.5338617886178865e-05, "loss": 0.0426, "step": 57335 }, { "epoch": 0.6992682926829268, "grad_norm": 1.3638103008270264, "learning_rate": 1.5338211382113823e-05, "loss": 0.0881, "step": 57340 }, { "epoch": 0.6993292682926829, "grad_norm": 0.4088969826698303, "learning_rate": 1.533780487804878e-05, "loss": 0.0653, "step": 57345 }, { "epoch": 0.699390243902439, "grad_norm": 0.7808190584182739, "learning_rate": 1.533739837398374e-05, "loss": 0.0664, "step": 57350 }, { "epoch": 0.6994512195121951, "grad_norm": 1.345369577407837, "learning_rate": 1.53369918699187e-05, "loss": 0.059, "step": 57355 }, { "epoch": 0.6995121951219512, "grad_norm": 0.7030069828033447, "learning_rate": 1.533658536585366e-05, "loss": 0.0474, "step": 57360 }, { "epoch": 0.6995731707317073, "grad_norm": 0.6648716926574707, "learning_rate": 1.533617886178862e-05, "loss": 0.0749, "step": 57365 }, { "epoch": 0.6996341463414634, "grad_norm": 0.6494221687316895, "learning_rate": 1.533577235772358e-05, "loss": 0.071, "step": 57370 }, { "epoch": 0.6996951219512195, "grad_norm": 0.39155158400535583, "learning_rate": 1.5335365853658537e-05, "loss": 0.0623, "step": 57375 }, { "epoch": 0.6997560975609756, "grad_norm": 1.0666314363479614, "learning_rate": 1.5334959349593495e-05, "loss": 0.0463, "step": 57380 }, { "epoch": 0.6998170731707317, "grad_norm": 0.7214162349700928, "learning_rate": 1.5334552845528457e-05, "loss": 0.0711, "step": 57385 }, { "epoch": 0.6998780487804878, "grad_norm": 0.295890212059021, "learning_rate": 1.5334146341463415e-05, "loss": 0.0256, "step": 57390 }, { "epoch": 0.6999390243902439, "grad_norm": 0.42285799980163574, "learning_rate": 1.5333739837398376e-05, "loss": 0.0887, "step": 57395 }, { "epoch": 0.7, "grad_norm": 0.5227261185646057, "learning_rate": 1.5333333333333334e-05, "loss": 0.0801, "step": 57400 }, { "epoch": 0.7000609756097561, "grad_norm": 0.4095860421657562, "learning_rate": 1.5332926829268293e-05, "loss": 0.0585, "step": 57405 }, { "epoch": 0.7001219512195122, "grad_norm": 0.38283178210258484, "learning_rate": 1.5332520325203254e-05, "loss": 0.0509, "step": 57410 }, { "epoch": 0.7001829268292683, "grad_norm": 0.6246838569641113, "learning_rate": 1.5332113821138212e-05, "loss": 0.0386, "step": 57415 }, { "epoch": 0.7002439024390243, "grad_norm": 0.49832427501678467, "learning_rate": 1.5331707317073174e-05, "loss": 0.0417, "step": 57420 }, { "epoch": 0.7003048780487805, "grad_norm": 0.3950454890727997, "learning_rate": 1.5331300813008132e-05, "loss": 0.0687, "step": 57425 }, { "epoch": 0.7003658536585365, "grad_norm": 0.24071337282657623, "learning_rate": 1.5330894308943093e-05, "loss": 0.0328, "step": 57430 }, { "epoch": 0.7004268292682927, "grad_norm": 1.64695405960083, "learning_rate": 1.5330487804878048e-05, "loss": 0.096, "step": 57435 }, { "epoch": 0.7004878048780487, "grad_norm": 1.2331347465515137, "learning_rate": 1.533008130081301e-05, "loss": 0.0599, "step": 57440 }, { "epoch": 0.7005487804878049, "grad_norm": 0.7031156420707703, "learning_rate": 1.5329674796747968e-05, "loss": 0.0373, "step": 57445 }, { "epoch": 0.7006097560975609, "grad_norm": 1.3800112009048462, "learning_rate": 1.532926829268293e-05, "loss": 0.0793, "step": 57450 }, { "epoch": 0.7006707317073171, "grad_norm": 0.4467492401599884, "learning_rate": 1.5328861788617887e-05, "loss": 0.0602, "step": 57455 }, { "epoch": 0.7007317073170731, "grad_norm": 0.6243218779563904, "learning_rate": 1.532845528455285e-05, "loss": 0.0599, "step": 57460 }, { "epoch": 0.7007926829268293, "grad_norm": 0.7560437321662903, "learning_rate": 1.5328048780487804e-05, "loss": 0.0423, "step": 57465 }, { "epoch": 0.7008536585365853, "grad_norm": 0.8905957341194153, "learning_rate": 1.5327642276422765e-05, "loss": 0.0642, "step": 57470 }, { "epoch": 0.7009146341463415, "grad_norm": 0.3437472879886627, "learning_rate": 1.5327235772357723e-05, "loss": 0.0407, "step": 57475 }, { "epoch": 0.7009756097560975, "grad_norm": 0.2945975661277771, "learning_rate": 1.5326829268292685e-05, "loss": 0.0724, "step": 57480 }, { "epoch": 0.7010365853658537, "grad_norm": 0.4534766376018524, "learning_rate": 1.5326422764227643e-05, "loss": 0.032, "step": 57485 }, { "epoch": 0.7010975609756097, "grad_norm": 0.42088356614112854, "learning_rate": 1.5326016260162604e-05, "loss": 0.035, "step": 57490 }, { "epoch": 0.7011585365853659, "grad_norm": 1.0683706998825073, "learning_rate": 1.5325609756097563e-05, "loss": 0.0751, "step": 57495 }, { "epoch": 0.7012195121951219, "grad_norm": 0.5614152550697327, "learning_rate": 1.532520325203252e-05, "loss": 0.0513, "step": 57500 }, { "epoch": 0.7012804878048781, "grad_norm": 0.25552722811698914, "learning_rate": 1.5324796747967482e-05, "loss": 0.0407, "step": 57505 }, { "epoch": 0.7013414634146341, "grad_norm": 0.5222150683403015, "learning_rate": 1.532439024390244e-05, "loss": 0.0418, "step": 57510 }, { "epoch": 0.7014024390243903, "grad_norm": 1.2503297328948975, "learning_rate": 1.5323983739837402e-05, "loss": 0.0574, "step": 57515 }, { "epoch": 0.7014634146341463, "grad_norm": 0.36976948380470276, "learning_rate": 1.532357723577236e-05, "loss": 0.0544, "step": 57520 }, { "epoch": 0.7015243902439025, "grad_norm": 0.47155869007110596, "learning_rate": 1.5323170731707318e-05, "loss": 0.0504, "step": 57525 }, { "epoch": 0.7015853658536585, "grad_norm": 0.4897140860557556, "learning_rate": 1.5322764227642276e-05, "loss": 0.0598, "step": 57530 }, { "epoch": 0.7016463414634146, "grad_norm": 0.533989667892456, "learning_rate": 1.5322357723577238e-05, "loss": 0.0414, "step": 57535 }, { "epoch": 0.7017073170731707, "grad_norm": 0.6195166707038879, "learning_rate": 1.5321951219512196e-05, "loss": 0.0465, "step": 57540 }, { "epoch": 0.7017682926829268, "grad_norm": 0.4731225073337555, "learning_rate": 1.5321544715447157e-05, "loss": 0.0424, "step": 57545 }, { "epoch": 0.7018292682926829, "grad_norm": 1.3011000156402588, "learning_rate": 1.5321138211382116e-05, "loss": 0.0579, "step": 57550 }, { "epoch": 0.701890243902439, "grad_norm": 0.41903236508369446, "learning_rate": 1.5320731707317074e-05, "loss": 0.0515, "step": 57555 }, { "epoch": 0.7019512195121951, "grad_norm": 0.9111706614494324, "learning_rate": 1.5320325203252032e-05, "loss": 0.071, "step": 57560 }, { "epoch": 0.7020121951219512, "grad_norm": 0.6073790192604065, "learning_rate": 1.5319918699186993e-05, "loss": 0.0453, "step": 57565 }, { "epoch": 0.7020731707317073, "grad_norm": 1.5027151107788086, "learning_rate": 1.531951219512195e-05, "loss": 0.0458, "step": 57570 }, { "epoch": 0.7021341463414634, "grad_norm": 2.2168517112731934, "learning_rate": 1.5319105691056913e-05, "loss": 0.0895, "step": 57575 }, { "epoch": 0.7021951219512195, "grad_norm": 0.3926970362663269, "learning_rate": 1.531869918699187e-05, "loss": 0.0357, "step": 57580 }, { "epoch": 0.7022560975609756, "grad_norm": 0.4186280071735382, "learning_rate": 1.531829268292683e-05, "loss": 0.0371, "step": 57585 }, { "epoch": 0.7023170731707317, "grad_norm": 0.9106911420822144, "learning_rate": 1.531788617886179e-05, "loss": 0.0325, "step": 57590 }, { "epoch": 0.7023780487804878, "grad_norm": 0.5316398739814758, "learning_rate": 1.531747967479675e-05, "loss": 0.0455, "step": 57595 }, { "epoch": 0.7024390243902439, "grad_norm": 0.688742995262146, "learning_rate": 1.531707317073171e-05, "loss": 0.0587, "step": 57600 }, { "epoch": 0.7025, "grad_norm": 0.7831417322158813, "learning_rate": 1.531666666666667e-05, "loss": 0.0546, "step": 57605 }, { "epoch": 0.702560975609756, "grad_norm": 0.4072728753089905, "learning_rate": 1.5316260162601627e-05, "loss": 0.0294, "step": 57610 }, { "epoch": 0.7026219512195122, "grad_norm": 0.8149619102478027, "learning_rate": 1.5315853658536585e-05, "loss": 0.0806, "step": 57615 }, { "epoch": 0.7026829268292683, "grad_norm": 0.9252163767814636, "learning_rate": 1.5315447154471546e-05, "loss": 0.0408, "step": 57620 }, { "epoch": 0.7027439024390244, "grad_norm": 1.0354359149932861, "learning_rate": 1.5315040650406504e-05, "loss": 0.1128, "step": 57625 }, { "epoch": 0.7028048780487804, "grad_norm": 0.3341919481754303, "learning_rate": 1.5314634146341466e-05, "loss": 0.0759, "step": 57630 }, { "epoch": 0.7028658536585366, "grad_norm": 0.5225874781608582, "learning_rate": 1.5314227642276424e-05, "loss": 0.0477, "step": 57635 }, { "epoch": 0.7029268292682926, "grad_norm": 0.7812848687171936, "learning_rate": 1.5313821138211386e-05, "loss": 0.0802, "step": 57640 }, { "epoch": 0.7029878048780488, "grad_norm": 0.5747586488723755, "learning_rate": 1.531341463414634e-05, "loss": 0.0485, "step": 57645 }, { "epoch": 0.7030487804878048, "grad_norm": 0.7246751189231873, "learning_rate": 1.5313008130081302e-05, "loss": 0.0521, "step": 57650 }, { "epoch": 0.703109756097561, "grad_norm": 1.3258492946624756, "learning_rate": 1.531260162601626e-05, "loss": 0.0411, "step": 57655 }, { "epoch": 0.703170731707317, "grad_norm": 0.3983481824398041, "learning_rate": 1.531219512195122e-05, "loss": 0.0583, "step": 57660 }, { "epoch": 0.7032317073170732, "grad_norm": 0.5518993139266968, "learning_rate": 1.531178861788618e-05, "loss": 0.0533, "step": 57665 }, { "epoch": 0.7032926829268292, "grad_norm": 0.7521728873252869, "learning_rate": 1.531138211382114e-05, "loss": 0.0464, "step": 57670 }, { "epoch": 0.7033536585365854, "grad_norm": 0.7910817861557007, "learning_rate": 1.53109756097561e-05, "loss": 0.0572, "step": 57675 }, { "epoch": 0.7034146341463414, "grad_norm": 0.4235917627811432, "learning_rate": 1.5310569105691057e-05, "loss": 0.0597, "step": 57680 }, { "epoch": 0.7034756097560976, "grad_norm": 1.375669002532959, "learning_rate": 1.531016260162602e-05, "loss": 0.0434, "step": 57685 }, { "epoch": 0.7035365853658536, "grad_norm": 0.4336198568344116, "learning_rate": 1.5309756097560977e-05, "loss": 0.0476, "step": 57690 }, { "epoch": 0.7035975609756098, "grad_norm": 0.750280499458313, "learning_rate": 1.530934959349594e-05, "loss": 0.0608, "step": 57695 }, { "epoch": 0.7036585365853658, "grad_norm": 0.5319986343383789, "learning_rate": 1.5308943089430897e-05, "loss": 0.0985, "step": 57700 }, { "epoch": 0.703719512195122, "grad_norm": 0.23892897367477417, "learning_rate": 1.5308536585365855e-05, "loss": 0.0615, "step": 57705 }, { "epoch": 0.703780487804878, "grad_norm": 0.40668871998786926, "learning_rate": 1.5308130081300813e-05, "loss": 0.0425, "step": 57710 }, { "epoch": 0.7038414634146342, "grad_norm": 0.5136374235153198, "learning_rate": 1.5307723577235774e-05, "loss": 0.0395, "step": 57715 }, { "epoch": 0.7039024390243902, "grad_norm": 0.3733982741832733, "learning_rate": 1.5307317073170733e-05, "loss": 0.05, "step": 57720 }, { "epoch": 0.7039634146341464, "grad_norm": 0.3617236018180847, "learning_rate": 1.5306910569105694e-05, "loss": 0.0495, "step": 57725 }, { "epoch": 0.7040243902439024, "grad_norm": 0.4851081669330597, "learning_rate": 1.5306504065040652e-05, "loss": 0.0515, "step": 57730 }, { "epoch": 0.7040853658536586, "grad_norm": 0.3549160659313202, "learning_rate": 1.530609756097561e-05, "loss": 0.0363, "step": 57735 }, { "epoch": 0.7041463414634146, "grad_norm": 0.7341301441192627, "learning_rate": 1.530569105691057e-05, "loss": 0.0568, "step": 57740 }, { "epoch": 0.7042073170731707, "grad_norm": 1.1294831037521362, "learning_rate": 1.530528455284553e-05, "loss": 0.0355, "step": 57745 }, { "epoch": 0.7042682926829268, "grad_norm": 0.5179868340492249, "learning_rate": 1.5304878048780488e-05, "loss": 0.0385, "step": 57750 }, { "epoch": 0.7043292682926829, "grad_norm": 0.6479395031929016, "learning_rate": 1.530447154471545e-05, "loss": 0.0441, "step": 57755 }, { "epoch": 0.704390243902439, "grad_norm": 0.3931514620780945, "learning_rate": 1.5304065040650408e-05, "loss": 0.045, "step": 57760 }, { "epoch": 0.7044512195121951, "grad_norm": 0.8405985236167908, "learning_rate": 1.5303658536585366e-05, "loss": 0.0457, "step": 57765 }, { "epoch": 0.7045121951219512, "grad_norm": 0.3820911645889282, "learning_rate": 1.5303252032520327e-05, "loss": 0.0409, "step": 57770 }, { "epoch": 0.7045731707317073, "grad_norm": 0.35801729559898376, "learning_rate": 1.5302845528455286e-05, "loss": 0.0323, "step": 57775 }, { "epoch": 0.7046341463414634, "grad_norm": 0.377434641122818, "learning_rate": 1.5302439024390247e-05, "loss": 0.0428, "step": 57780 }, { "epoch": 0.7046951219512195, "grad_norm": 0.5978137254714966, "learning_rate": 1.5302032520325205e-05, "loss": 0.0511, "step": 57785 }, { "epoch": 0.7047560975609756, "grad_norm": 0.3524564206600189, "learning_rate": 1.5301626016260163e-05, "loss": 0.0716, "step": 57790 }, { "epoch": 0.7048170731707317, "grad_norm": 0.7377248406410217, "learning_rate": 1.530121951219512e-05, "loss": 0.0583, "step": 57795 }, { "epoch": 0.7048780487804878, "grad_norm": 0.40298736095428467, "learning_rate": 1.5300813008130083e-05, "loss": 0.0526, "step": 57800 }, { "epoch": 0.7049390243902439, "grad_norm": 0.8541671633720398, "learning_rate": 1.530040650406504e-05, "loss": 0.0514, "step": 57805 }, { "epoch": 0.705, "grad_norm": 0.5440132021903992, "learning_rate": 1.5300000000000003e-05, "loss": 0.0887, "step": 57810 }, { "epoch": 0.7050609756097561, "grad_norm": 0.6048829555511475, "learning_rate": 1.529959349593496e-05, "loss": 0.0561, "step": 57815 }, { "epoch": 0.7051219512195122, "grad_norm": 1.2308809757232666, "learning_rate": 1.5299186991869922e-05, "loss": 0.0838, "step": 57820 }, { "epoch": 0.7051829268292683, "grad_norm": 0.5617921352386475, "learning_rate": 1.5298780487804877e-05, "loss": 0.0432, "step": 57825 }, { "epoch": 0.7052439024390244, "grad_norm": 0.35915541648864746, "learning_rate": 1.529837398373984e-05, "loss": 0.0683, "step": 57830 }, { "epoch": 0.7053048780487805, "grad_norm": 0.46434444189071655, "learning_rate": 1.5297967479674797e-05, "loss": 0.0462, "step": 57835 }, { "epoch": 0.7053658536585365, "grad_norm": 0.6908575892448425, "learning_rate": 1.5297560975609758e-05, "loss": 0.0575, "step": 57840 }, { "epoch": 0.7054268292682927, "grad_norm": 0.9145290851593018, "learning_rate": 1.5297154471544716e-05, "loss": 0.0468, "step": 57845 }, { "epoch": 0.7054878048780487, "grad_norm": 0.4659666419029236, "learning_rate": 1.5296747967479678e-05, "loss": 0.0664, "step": 57850 }, { "epoch": 0.7055487804878049, "grad_norm": 0.5405831336975098, "learning_rate": 1.5296341463414636e-05, "loss": 0.0499, "step": 57855 }, { "epoch": 0.7056097560975609, "grad_norm": 0.5433193445205688, "learning_rate": 1.5295934959349594e-05, "loss": 0.0655, "step": 57860 }, { "epoch": 0.7056707317073171, "grad_norm": 0.7209814786911011, "learning_rate": 1.5295528455284556e-05, "loss": 0.065, "step": 57865 }, { "epoch": 0.7057317073170731, "grad_norm": 0.5895641446113586, "learning_rate": 1.5295121951219514e-05, "loss": 0.0386, "step": 57870 }, { "epoch": 0.7057926829268293, "grad_norm": 0.7836705446243286, "learning_rate": 1.5294715447154472e-05, "loss": 0.0412, "step": 57875 }, { "epoch": 0.7058536585365853, "grad_norm": 0.3693181276321411, "learning_rate": 1.5294308943089433e-05, "loss": 0.0486, "step": 57880 }, { "epoch": 0.7059146341463415, "grad_norm": 0.7474376559257507, "learning_rate": 1.529390243902439e-05, "loss": 0.0691, "step": 57885 }, { "epoch": 0.7059756097560975, "grad_norm": 0.7823330760002136, "learning_rate": 1.529349593495935e-05, "loss": 0.0443, "step": 57890 }, { "epoch": 0.7060365853658537, "grad_norm": 0.20734405517578125, "learning_rate": 1.529308943089431e-05, "loss": 0.0483, "step": 57895 }, { "epoch": 0.7060975609756097, "grad_norm": 1.6945141553878784, "learning_rate": 1.529268292682927e-05, "loss": 0.0558, "step": 57900 }, { "epoch": 0.7061585365853659, "grad_norm": 0.6253566741943359, "learning_rate": 1.529227642276423e-05, "loss": 0.044, "step": 57905 }, { "epoch": 0.7062195121951219, "grad_norm": 1.533850908279419, "learning_rate": 1.529186991869919e-05, "loss": 0.0351, "step": 57910 }, { "epoch": 0.7062804878048781, "grad_norm": 0.7994462847709656, "learning_rate": 1.5291463414634147e-05, "loss": 0.0635, "step": 57915 }, { "epoch": 0.7063414634146341, "grad_norm": 0.5775812268257141, "learning_rate": 1.5291056910569105e-05, "loss": 0.0637, "step": 57920 }, { "epoch": 0.7064024390243903, "grad_norm": 0.3971288502216339, "learning_rate": 1.5290650406504067e-05, "loss": 0.0668, "step": 57925 }, { "epoch": 0.7064634146341463, "grad_norm": 0.9666011929512024, "learning_rate": 1.5290243902439025e-05, "loss": 0.0436, "step": 57930 }, { "epoch": 0.7065243902439025, "grad_norm": 0.601520836353302, "learning_rate": 1.5289837398373986e-05, "loss": 0.0393, "step": 57935 }, { "epoch": 0.7065853658536585, "grad_norm": 0.8635389804840088, "learning_rate": 1.5289430894308944e-05, "loss": 0.0692, "step": 57940 }, { "epoch": 0.7066463414634147, "grad_norm": 0.8638709783554077, "learning_rate": 1.5289024390243903e-05, "loss": 0.0532, "step": 57945 }, { "epoch": 0.7067073170731707, "grad_norm": 0.7420406937599182, "learning_rate": 1.5288617886178864e-05, "loss": 0.0952, "step": 57950 }, { "epoch": 0.7067682926829268, "grad_norm": 0.5706419944763184, "learning_rate": 1.5288211382113822e-05, "loss": 0.0348, "step": 57955 }, { "epoch": 0.7068292682926829, "grad_norm": 0.3914712071418762, "learning_rate": 1.528780487804878e-05, "loss": 0.048, "step": 57960 }, { "epoch": 0.706890243902439, "grad_norm": 1.0473108291625977, "learning_rate": 1.5287398373983742e-05, "loss": 0.0471, "step": 57965 }, { "epoch": 0.7069512195121951, "grad_norm": 0.6220370531082153, "learning_rate": 1.52869918699187e-05, "loss": 0.0331, "step": 57970 }, { "epoch": 0.7070121951219512, "grad_norm": 0.44676223397254944, "learning_rate": 1.5286585365853658e-05, "loss": 0.0521, "step": 57975 }, { "epoch": 0.7070731707317073, "grad_norm": 0.5564998984336853, "learning_rate": 1.528617886178862e-05, "loss": 0.0756, "step": 57980 }, { "epoch": 0.7071341463414634, "grad_norm": 0.7578416466712952, "learning_rate": 1.5285772357723578e-05, "loss": 0.0386, "step": 57985 }, { "epoch": 0.7071951219512195, "grad_norm": 0.47511669993400574, "learning_rate": 1.528536585365854e-05, "loss": 0.0246, "step": 57990 }, { "epoch": 0.7072560975609756, "grad_norm": 0.563766598701477, "learning_rate": 1.5284959349593497e-05, "loss": 0.0325, "step": 57995 }, { "epoch": 0.7073170731707317, "grad_norm": 0.19232438504695892, "learning_rate": 1.528455284552846e-05, "loss": 0.0289, "step": 58000 }, { "epoch": 0.7073780487804878, "grad_norm": 0.9597378969192505, "learning_rate": 1.5284146341463414e-05, "loss": 0.0969, "step": 58005 }, { "epoch": 0.7074390243902439, "grad_norm": 0.4109092950820923, "learning_rate": 1.5283739837398375e-05, "loss": 0.069, "step": 58010 }, { "epoch": 0.7075, "grad_norm": 1.1846234798431396, "learning_rate": 1.5283333333333333e-05, "loss": 0.0489, "step": 58015 }, { "epoch": 0.7075609756097561, "grad_norm": 0.9287011027336121, "learning_rate": 1.5282926829268295e-05, "loss": 0.038, "step": 58020 }, { "epoch": 0.7076219512195122, "grad_norm": 0.5735839605331421, "learning_rate": 1.5282520325203253e-05, "loss": 0.0558, "step": 58025 }, { "epoch": 0.7076829268292683, "grad_norm": 0.7777151465415955, "learning_rate": 1.5282113821138214e-05, "loss": 0.0453, "step": 58030 }, { "epoch": 0.7077439024390244, "grad_norm": 0.49306637048721313, "learning_rate": 1.5281707317073173e-05, "loss": 0.0411, "step": 58035 }, { "epoch": 0.7078048780487805, "grad_norm": 0.7574170231819153, "learning_rate": 1.528130081300813e-05, "loss": 0.0377, "step": 58040 }, { "epoch": 0.7078658536585366, "grad_norm": 3.36069393157959, "learning_rate": 1.5280894308943092e-05, "loss": 0.0517, "step": 58045 }, { "epoch": 0.7079268292682926, "grad_norm": 0.6673798561096191, "learning_rate": 1.528048780487805e-05, "loss": 0.0737, "step": 58050 }, { "epoch": 0.7079878048780488, "grad_norm": 0.7645888924598694, "learning_rate": 1.528008130081301e-05, "loss": 0.0301, "step": 58055 }, { "epoch": 0.7080487804878048, "grad_norm": 0.7165402770042419, "learning_rate": 1.527967479674797e-05, "loss": 0.0807, "step": 58060 }, { "epoch": 0.708109756097561, "grad_norm": 0.2466534525156021, "learning_rate": 1.5279268292682928e-05, "loss": 0.1054, "step": 58065 }, { "epoch": 0.708170731707317, "grad_norm": 1.0932446718215942, "learning_rate": 1.5278861788617886e-05, "loss": 0.0559, "step": 58070 }, { "epoch": 0.7082317073170732, "grad_norm": 0.5910635590553284, "learning_rate": 1.5278455284552848e-05, "loss": 0.073, "step": 58075 }, { "epoch": 0.7082926829268292, "grad_norm": 0.3646998405456543, "learning_rate": 1.5278048780487806e-05, "loss": 0.0444, "step": 58080 }, { "epoch": 0.7083536585365854, "grad_norm": 0.5543521046638489, "learning_rate": 1.5277642276422767e-05, "loss": 0.0605, "step": 58085 }, { "epoch": 0.7084146341463414, "grad_norm": 0.8126434087753296, "learning_rate": 1.5277235772357726e-05, "loss": 0.0444, "step": 58090 }, { "epoch": 0.7084756097560976, "grad_norm": 0.44939061999320984, "learning_rate": 1.5276829268292684e-05, "loss": 0.0356, "step": 58095 }, { "epoch": 0.7085365853658536, "grad_norm": 0.6157025694847107, "learning_rate": 1.5276422764227642e-05, "loss": 0.0541, "step": 58100 }, { "epoch": 0.7085975609756098, "grad_norm": 0.7699638605117798, "learning_rate": 1.5276016260162603e-05, "loss": 0.0641, "step": 58105 }, { "epoch": 0.7086585365853658, "grad_norm": 0.5827643871307373, "learning_rate": 1.527560975609756e-05, "loss": 0.0535, "step": 58110 }, { "epoch": 0.708719512195122, "grad_norm": 3.464294672012329, "learning_rate": 1.5275203252032523e-05, "loss": 0.0664, "step": 58115 }, { "epoch": 0.708780487804878, "grad_norm": 0.3468438982963562, "learning_rate": 1.527479674796748e-05, "loss": 0.0378, "step": 58120 }, { "epoch": 0.7088414634146342, "grad_norm": 0.6809291243553162, "learning_rate": 1.527439024390244e-05, "loss": 0.0509, "step": 58125 }, { "epoch": 0.7089024390243902, "grad_norm": 0.7152318954467773, "learning_rate": 1.52739837398374e-05, "loss": 0.0711, "step": 58130 }, { "epoch": 0.7089634146341464, "grad_norm": 0.6923726201057434, "learning_rate": 1.527357723577236e-05, "loss": 0.0591, "step": 58135 }, { "epoch": 0.7090243902439024, "grad_norm": 0.5671471357345581, "learning_rate": 1.5273170731707317e-05, "loss": 0.0752, "step": 58140 }, { "epoch": 0.7090853658536586, "grad_norm": 1.3681215047836304, "learning_rate": 1.527276422764228e-05, "loss": 0.0481, "step": 58145 }, { "epoch": 0.7091463414634146, "grad_norm": 0.619255781173706, "learning_rate": 1.5272357723577237e-05, "loss": 0.0503, "step": 58150 }, { "epoch": 0.7092073170731708, "grad_norm": 0.9876366257667542, "learning_rate": 1.5271951219512195e-05, "loss": 0.0487, "step": 58155 }, { "epoch": 0.7092682926829268, "grad_norm": 0.19146157801151276, "learning_rate": 1.5271544715447156e-05, "loss": 0.0548, "step": 58160 }, { "epoch": 0.709329268292683, "grad_norm": 0.2769346535205841, "learning_rate": 1.5271138211382114e-05, "loss": 0.074, "step": 58165 }, { "epoch": 0.709390243902439, "grad_norm": 0.6186902523040771, "learning_rate": 1.5270731707317076e-05, "loss": 0.0368, "step": 58170 }, { "epoch": 0.7094512195121951, "grad_norm": 0.8473516702651978, "learning_rate": 1.5270325203252034e-05, "loss": 0.031, "step": 58175 }, { "epoch": 0.7095121951219512, "grad_norm": 0.7813392877578735, "learning_rate": 1.5269918699186996e-05, "loss": 0.0567, "step": 58180 }, { "epoch": 0.7095731707317073, "grad_norm": 0.8971875905990601, "learning_rate": 1.526951219512195e-05, "loss": 0.0849, "step": 58185 }, { "epoch": 0.7096341463414634, "grad_norm": 0.7471171617507935, "learning_rate": 1.5269105691056912e-05, "loss": 0.0581, "step": 58190 }, { "epoch": 0.7096951219512195, "grad_norm": 0.4755299389362335, "learning_rate": 1.526869918699187e-05, "loss": 0.0651, "step": 58195 }, { "epoch": 0.7097560975609756, "grad_norm": 0.4909364581108093, "learning_rate": 1.526829268292683e-05, "loss": 0.0725, "step": 58200 }, { "epoch": 0.7098170731707317, "grad_norm": 0.37171775102615356, "learning_rate": 1.526788617886179e-05, "loss": 0.0595, "step": 58205 }, { "epoch": 0.7098780487804878, "grad_norm": 0.5159468054771423, "learning_rate": 1.526747967479675e-05, "loss": 0.0464, "step": 58210 }, { "epoch": 0.7099390243902439, "grad_norm": 0.3727129399776459, "learning_rate": 1.526707317073171e-05, "loss": 0.04, "step": 58215 }, { "epoch": 0.71, "grad_norm": 0.3790886104106903, "learning_rate": 1.5266666666666667e-05, "loss": 0.0299, "step": 58220 }, { "epoch": 0.7100609756097561, "grad_norm": 0.5983748435974121, "learning_rate": 1.5266260162601625e-05, "loss": 0.0524, "step": 58225 }, { "epoch": 0.7101219512195122, "grad_norm": 0.718372642993927, "learning_rate": 1.5265853658536587e-05, "loss": 0.0546, "step": 58230 }, { "epoch": 0.7101829268292683, "grad_norm": 0.5408749580383301, "learning_rate": 1.5265447154471545e-05, "loss": 0.0533, "step": 58235 }, { "epoch": 0.7102439024390244, "grad_norm": 0.6230071187019348, "learning_rate": 1.5265040650406507e-05, "loss": 0.0497, "step": 58240 }, { "epoch": 0.7103048780487805, "grad_norm": 0.6713536977767944, "learning_rate": 1.5264634146341465e-05, "loss": 0.0374, "step": 58245 }, { "epoch": 0.7103658536585366, "grad_norm": 0.6598650813102722, "learning_rate": 1.5264227642276423e-05, "loss": 0.0544, "step": 58250 }, { "epoch": 0.7104268292682927, "grad_norm": 0.2656041383743286, "learning_rate": 1.5263821138211384e-05, "loss": 0.0315, "step": 58255 }, { "epoch": 0.7104878048780487, "grad_norm": 0.752615213394165, "learning_rate": 1.5263414634146343e-05, "loss": 0.0695, "step": 58260 }, { "epoch": 0.7105487804878049, "grad_norm": 0.30214637517929077, "learning_rate": 1.5263008130081304e-05, "loss": 0.0594, "step": 58265 }, { "epoch": 0.7106097560975609, "grad_norm": 0.500688910484314, "learning_rate": 1.5262601626016262e-05, "loss": 0.0343, "step": 58270 }, { "epoch": 0.7106707317073171, "grad_norm": 0.6009209752082825, "learning_rate": 1.526219512195122e-05, "loss": 0.069, "step": 58275 }, { "epoch": 0.7107317073170731, "grad_norm": 0.349536269903183, "learning_rate": 1.526178861788618e-05, "loss": 0.0257, "step": 58280 }, { "epoch": 0.7107926829268293, "grad_norm": 0.8435274958610535, "learning_rate": 1.526138211382114e-05, "loss": 0.0679, "step": 58285 }, { "epoch": 0.7108536585365853, "grad_norm": 0.8026602864265442, "learning_rate": 1.5260975609756098e-05, "loss": 0.0517, "step": 58290 }, { "epoch": 0.7109146341463415, "grad_norm": 1.1854878664016724, "learning_rate": 1.526056910569106e-05, "loss": 0.0362, "step": 58295 }, { "epoch": 0.7109756097560975, "grad_norm": 1.121617078781128, "learning_rate": 1.5260162601626018e-05, "loss": 0.0338, "step": 58300 }, { "epoch": 0.7110365853658537, "grad_norm": 0.9014957547187805, "learning_rate": 1.5259756097560976e-05, "loss": 0.0714, "step": 58305 }, { "epoch": 0.7110975609756097, "grad_norm": 0.37561678886413574, "learning_rate": 1.5259349593495937e-05, "loss": 0.0372, "step": 58310 }, { "epoch": 0.7111585365853659, "grad_norm": 0.25683993101119995, "learning_rate": 1.5258943089430895e-05, "loss": 0.0284, "step": 58315 }, { "epoch": 0.7112195121951219, "grad_norm": 0.5886292457580566, "learning_rate": 1.5258536585365855e-05, "loss": 0.0523, "step": 58320 }, { "epoch": 0.7112804878048781, "grad_norm": 1.4732955694198608, "learning_rate": 1.5258130081300815e-05, "loss": 0.0324, "step": 58325 }, { "epoch": 0.7113414634146341, "grad_norm": 0.4253116548061371, "learning_rate": 1.5257723577235775e-05, "loss": 0.0484, "step": 58330 }, { "epoch": 0.7114024390243903, "grad_norm": 0.6204745769500732, "learning_rate": 1.5257317073170731e-05, "loss": 0.0477, "step": 58335 }, { "epoch": 0.7114634146341463, "grad_norm": 0.57743239402771, "learning_rate": 1.5256910569105691e-05, "loss": 0.0476, "step": 58340 }, { "epoch": 0.7115243902439025, "grad_norm": 1.3746737241744995, "learning_rate": 1.5256504065040651e-05, "loss": 0.059, "step": 58345 }, { "epoch": 0.7115853658536585, "grad_norm": 0.967806339263916, "learning_rate": 1.525609756097561e-05, "loss": 0.0302, "step": 58350 }, { "epoch": 0.7116463414634147, "grad_norm": 0.24789464473724365, "learning_rate": 1.525569105691057e-05, "loss": 0.0379, "step": 58355 }, { "epoch": 0.7117073170731707, "grad_norm": 0.4738484025001526, "learning_rate": 1.525528455284553e-05, "loss": 0.0779, "step": 58360 }, { "epoch": 0.7117682926829269, "grad_norm": 0.8351383805274963, "learning_rate": 1.5254878048780489e-05, "loss": 0.0532, "step": 58365 }, { "epoch": 0.7118292682926829, "grad_norm": 1.371980905532837, "learning_rate": 1.5254471544715448e-05, "loss": 0.0419, "step": 58370 }, { "epoch": 0.711890243902439, "grad_norm": 0.7274569869041443, "learning_rate": 1.5254065040650408e-05, "loss": 0.0554, "step": 58375 }, { "epoch": 0.7119512195121951, "grad_norm": 0.7066439986228943, "learning_rate": 1.5253658536585368e-05, "loss": 0.0864, "step": 58380 }, { "epoch": 0.7120121951219512, "grad_norm": 0.6161946058273315, "learning_rate": 1.5253252032520328e-05, "loss": 0.0466, "step": 58385 }, { "epoch": 0.7120731707317073, "grad_norm": 0.39219802618026733, "learning_rate": 1.5252845528455286e-05, "loss": 0.039, "step": 58390 }, { "epoch": 0.7121341463414634, "grad_norm": 0.4130873680114746, "learning_rate": 1.5252439024390244e-05, "loss": 0.0482, "step": 58395 }, { "epoch": 0.7121951219512195, "grad_norm": 0.5932705998420715, "learning_rate": 1.5252032520325204e-05, "loss": 0.0299, "step": 58400 }, { "epoch": 0.7122560975609756, "grad_norm": 0.6025171279907227, "learning_rate": 1.5251626016260164e-05, "loss": 0.0385, "step": 58405 }, { "epoch": 0.7123170731707317, "grad_norm": 1.4318931102752686, "learning_rate": 1.5251219512195124e-05, "loss": 0.0354, "step": 58410 }, { "epoch": 0.7123780487804878, "grad_norm": 0.8181411027908325, "learning_rate": 1.5250813008130083e-05, "loss": 0.0663, "step": 58415 }, { "epoch": 0.7124390243902439, "grad_norm": 0.8084062933921814, "learning_rate": 1.5250406504065043e-05, "loss": 0.0707, "step": 58420 }, { "epoch": 0.7125, "grad_norm": 0.4132123291492462, "learning_rate": 1.525e-05, "loss": 0.025, "step": 58425 }, { "epoch": 0.7125609756097561, "grad_norm": 0.6765090227127075, "learning_rate": 1.524959349593496e-05, "loss": 0.0515, "step": 58430 }, { "epoch": 0.7126219512195122, "grad_norm": 0.5461101531982422, "learning_rate": 1.524918699186992e-05, "loss": 0.0305, "step": 58435 }, { "epoch": 0.7126829268292683, "grad_norm": 0.6520529389381409, "learning_rate": 1.5248780487804879e-05, "loss": 0.0517, "step": 58440 }, { "epoch": 0.7127439024390244, "grad_norm": 0.2766263484954834, "learning_rate": 1.5248373983739839e-05, "loss": 0.0462, "step": 58445 }, { "epoch": 0.7128048780487805, "grad_norm": 1.275371789932251, "learning_rate": 1.5247967479674799e-05, "loss": 0.0618, "step": 58450 }, { "epoch": 0.7128658536585366, "grad_norm": 0.5690521597862244, "learning_rate": 1.5247560975609757e-05, "loss": 0.0479, "step": 58455 }, { "epoch": 0.7129268292682926, "grad_norm": 1.2046769857406616, "learning_rate": 1.5247154471544717e-05, "loss": 0.0407, "step": 58460 }, { "epoch": 0.7129878048780488, "grad_norm": 0.2657300531864166, "learning_rate": 1.5246747967479677e-05, "loss": 0.0571, "step": 58465 }, { "epoch": 0.7130487804878048, "grad_norm": 1.1468762159347534, "learning_rate": 1.5246341463414636e-05, "loss": 0.0625, "step": 58470 }, { "epoch": 0.713109756097561, "grad_norm": 1.1242365837097168, "learning_rate": 1.5245934959349595e-05, "loss": 0.0608, "step": 58475 }, { "epoch": 0.713170731707317, "grad_norm": 0.6691293716430664, "learning_rate": 1.5245528455284554e-05, "loss": 0.0554, "step": 58480 }, { "epoch": 0.7132317073170732, "grad_norm": 0.26057741045951843, "learning_rate": 1.5245121951219512e-05, "loss": 0.0488, "step": 58485 }, { "epoch": 0.7132926829268292, "grad_norm": 0.6716863512992859, "learning_rate": 1.5244715447154472e-05, "loss": 0.0904, "step": 58490 }, { "epoch": 0.7133536585365854, "grad_norm": 0.4649914801120758, "learning_rate": 1.5244308943089432e-05, "loss": 0.0523, "step": 58495 }, { "epoch": 0.7134146341463414, "grad_norm": 0.6155428290367126, "learning_rate": 1.5243902439024392e-05, "loss": 0.0628, "step": 58500 }, { "epoch": 0.7134756097560976, "grad_norm": 0.7529675960540771, "learning_rate": 1.5243495934959352e-05, "loss": 0.0376, "step": 58505 }, { "epoch": 0.7135365853658536, "grad_norm": 0.41269004344940186, "learning_rate": 1.5243089430894312e-05, "loss": 0.0525, "step": 58510 }, { "epoch": 0.7135975609756098, "grad_norm": 0.4720990061759949, "learning_rate": 1.5242682926829268e-05, "loss": 0.0584, "step": 58515 }, { "epoch": 0.7136585365853658, "grad_norm": 0.9218209981918335, "learning_rate": 1.5242276422764228e-05, "loss": 0.0405, "step": 58520 }, { "epoch": 0.713719512195122, "grad_norm": 0.5824093222618103, "learning_rate": 1.5241869918699188e-05, "loss": 0.0235, "step": 58525 }, { "epoch": 0.713780487804878, "grad_norm": 0.605594277381897, "learning_rate": 1.5241463414634147e-05, "loss": 0.0714, "step": 58530 }, { "epoch": 0.7138414634146342, "grad_norm": 0.8718422651290894, "learning_rate": 1.5241056910569107e-05, "loss": 0.0623, "step": 58535 }, { "epoch": 0.7139024390243902, "grad_norm": 1.0540059804916382, "learning_rate": 1.5240650406504067e-05, "loss": 0.0507, "step": 58540 }, { "epoch": 0.7139634146341464, "grad_norm": 0.5308212041854858, "learning_rate": 1.5240243902439025e-05, "loss": 0.037, "step": 58545 }, { "epoch": 0.7140243902439024, "grad_norm": 0.3473261594772339, "learning_rate": 1.5239837398373985e-05, "loss": 0.0439, "step": 58550 }, { "epoch": 0.7140853658536586, "grad_norm": 0.3454378843307495, "learning_rate": 1.5239430894308945e-05, "loss": 0.0468, "step": 58555 }, { "epoch": 0.7141463414634146, "grad_norm": 0.6173310279846191, "learning_rate": 1.5239024390243905e-05, "loss": 0.034, "step": 58560 }, { "epoch": 0.7142073170731708, "grad_norm": 0.7161439061164856, "learning_rate": 1.5238617886178863e-05, "loss": 0.0319, "step": 58565 }, { "epoch": 0.7142682926829268, "grad_norm": 0.43613651394844055, "learning_rate": 1.5238211382113823e-05, "loss": 0.0405, "step": 58570 }, { "epoch": 0.714329268292683, "grad_norm": 0.4053315818309784, "learning_rate": 1.523780487804878e-05, "loss": 0.0428, "step": 58575 }, { "epoch": 0.714390243902439, "grad_norm": 0.4354694187641144, "learning_rate": 1.523739837398374e-05, "loss": 0.0312, "step": 58580 }, { "epoch": 0.7144512195121951, "grad_norm": 0.467206746339798, "learning_rate": 1.52369918699187e-05, "loss": 0.0778, "step": 58585 }, { "epoch": 0.7145121951219512, "grad_norm": 0.7635148167610168, "learning_rate": 1.523658536585366e-05, "loss": 0.0684, "step": 58590 }, { "epoch": 0.7145731707317073, "grad_norm": 0.6507049798965454, "learning_rate": 1.523617886178862e-05, "loss": 0.051, "step": 58595 }, { "epoch": 0.7146341463414634, "grad_norm": 0.6620525121688843, "learning_rate": 1.523577235772358e-05, "loss": 0.0659, "step": 58600 }, { "epoch": 0.7146951219512195, "grad_norm": 0.6159461736679077, "learning_rate": 1.5235365853658536e-05, "loss": 0.0496, "step": 58605 }, { "epoch": 0.7147560975609756, "grad_norm": 0.4602160155773163, "learning_rate": 1.5234959349593496e-05, "loss": 0.0538, "step": 58610 }, { "epoch": 0.7148170731707317, "grad_norm": 0.5900185108184814, "learning_rate": 1.5234552845528456e-05, "loss": 0.041, "step": 58615 }, { "epoch": 0.7148780487804878, "grad_norm": 0.2496480941772461, "learning_rate": 1.5234146341463416e-05, "loss": 0.0369, "step": 58620 }, { "epoch": 0.7149390243902439, "grad_norm": 0.8387345671653748, "learning_rate": 1.5233739837398376e-05, "loss": 0.0435, "step": 58625 }, { "epoch": 0.715, "grad_norm": 0.9333280324935913, "learning_rate": 1.5233333333333335e-05, "loss": 0.0478, "step": 58630 }, { "epoch": 0.7150609756097561, "grad_norm": 0.4248013496398926, "learning_rate": 1.5232926829268294e-05, "loss": 0.0411, "step": 58635 }, { "epoch": 0.7151219512195122, "grad_norm": 0.7999098896980286, "learning_rate": 1.5232520325203253e-05, "loss": 0.0819, "step": 58640 }, { "epoch": 0.7151829268292683, "grad_norm": 0.6149290800094604, "learning_rate": 1.5232113821138213e-05, "loss": 0.0509, "step": 58645 }, { "epoch": 0.7152439024390244, "grad_norm": 1.9589358568191528, "learning_rate": 1.5231707317073173e-05, "loss": 0.0316, "step": 58650 }, { "epoch": 0.7153048780487805, "grad_norm": 0.6069384813308716, "learning_rate": 1.5231300813008131e-05, "loss": 0.0548, "step": 58655 }, { "epoch": 0.7153658536585366, "grad_norm": 0.5509705543518066, "learning_rate": 1.5230894308943091e-05, "loss": 0.0656, "step": 58660 }, { "epoch": 0.7154268292682927, "grad_norm": 1.0171091556549072, "learning_rate": 1.5230487804878049e-05, "loss": 0.0773, "step": 58665 }, { "epoch": 0.7154878048780487, "grad_norm": 0.23748986423015594, "learning_rate": 1.5230081300813009e-05, "loss": 0.0374, "step": 58670 }, { "epoch": 0.7155487804878049, "grad_norm": 0.42520269751548767, "learning_rate": 1.5229674796747969e-05, "loss": 0.0497, "step": 58675 }, { "epoch": 0.715609756097561, "grad_norm": 0.5448294878005981, "learning_rate": 1.5229268292682929e-05, "loss": 0.0465, "step": 58680 }, { "epoch": 0.7156707317073171, "grad_norm": 0.5075141787528992, "learning_rate": 1.5228861788617888e-05, "loss": 0.0668, "step": 58685 }, { "epoch": 0.7157317073170731, "grad_norm": 0.7570412158966064, "learning_rate": 1.5228455284552848e-05, "loss": 0.0517, "step": 58690 }, { "epoch": 0.7157926829268293, "grad_norm": 0.6504715085029602, "learning_rate": 1.5228048780487805e-05, "loss": 0.0471, "step": 58695 }, { "epoch": 0.7158536585365853, "grad_norm": 0.47576218843460083, "learning_rate": 1.5227642276422764e-05, "loss": 0.0286, "step": 58700 }, { "epoch": 0.7159146341463415, "grad_norm": 0.5331642031669617, "learning_rate": 1.5227235772357724e-05, "loss": 0.0371, "step": 58705 }, { "epoch": 0.7159756097560975, "grad_norm": 0.8510209321975708, "learning_rate": 1.5226829268292684e-05, "loss": 0.0513, "step": 58710 }, { "epoch": 0.7160365853658537, "grad_norm": 0.320919394493103, "learning_rate": 1.5226422764227644e-05, "loss": 0.0342, "step": 58715 }, { "epoch": 0.7160975609756097, "grad_norm": 0.3855234682559967, "learning_rate": 1.5226016260162604e-05, "loss": 0.039, "step": 58720 }, { "epoch": 0.7161585365853659, "grad_norm": 0.8193671703338623, "learning_rate": 1.5225609756097562e-05, "loss": 0.0936, "step": 58725 }, { "epoch": 0.7162195121951219, "grad_norm": 0.37687042355537415, "learning_rate": 1.5225203252032522e-05, "loss": 0.0609, "step": 58730 }, { "epoch": 0.7162804878048781, "grad_norm": 0.3694254159927368, "learning_rate": 1.5224796747967482e-05, "loss": 0.0506, "step": 58735 }, { "epoch": 0.7163414634146341, "grad_norm": 0.3279152810573578, "learning_rate": 1.522439024390244e-05, "loss": 0.0665, "step": 58740 }, { "epoch": 0.7164024390243903, "grad_norm": 0.7961452603340149, "learning_rate": 1.52239837398374e-05, "loss": 0.0253, "step": 58745 }, { "epoch": 0.7164634146341463, "grad_norm": 0.6249093413352966, "learning_rate": 1.522357723577236e-05, "loss": 0.0493, "step": 58750 }, { "epoch": 0.7165243902439025, "grad_norm": 0.5242794752120972, "learning_rate": 1.5223170731707317e-05, "loss": 0.0414, "step": 58755 }, { "epoch": 0.7165853658536585, "grad_norm": 0.49154677987098694, "learning_rate": 1.5222764227642277e-05, "loss": 0.0317, "step": 58760 }, { "epoch": 0.7166463414634147, "grad_norm": 0.9972173571586609, "learning_rate": 1.5222357723577237e-05, "loss": 0.0706, "step": 58765 }, { "epoch": 0.7167073170731707, "grad_norm": 0.8207975029945374, "learning_rate": 1.5221951219512197e-05, "loss": 0.0586, "step": 58770 }, { "epoch": 0.7167682926829269, "grad_norm": 0.5123898983001709, "learning_rate": 1.5221544715447157e-05, "loss": 0.0658, "step": 58775 }, { "epoch": 0.7168292682926829, "grad_norm": 0.5183807015419006, "learning_rate": 1.5221138211382117e-05, "loss": 0.0662, "step": 58780 }, { "epoch": 0.716890243902439, "grad_norm": 0.6873598098754883, "learning_rate": 1.5220731707317073e-05, "loss": 0.037, "step": 58785 }, { "epoch": 0.7169512195121951, "grad_norm": 0.6775029301643372, "learning_rate": 1.5220325203252033e-05, "loss": 0.0535, "step": 58790 }, { "epoch": 0.7170121951219512, "grad_norm": 1.6105459928512573, "learning_rate": 1.5219918699186993e-05, "loss": 0.0452, "step": 58795 }, { "epoch": 0.7170731707317073, "grad_norm": 0.40728768706321716, "learning_rate": 1.5219512195121952e-05, "loss": 0.0346, "step": 58800 }, { "epoch": 0.7171341463414634, "grad_norm": 0.8633731603622437, "learning_rate": 1.5219105691056912e-05, "loss": 0.0426, "step": 58805 }, { "epoch": 0.7171951219512195, "grad_norm": 0.5576572418212891, "learning_rate": 1.5218699186991872e-05, "loss": 0.0342, "step": 58810 }, { "epoch": 0.7172560975609756, "grad_norm": 1.4218186140060425, "learning_rate": 1.521829268292683e-05, "loss": 0.0578, "step": 58815 }, { "epoch": 0.7173170731707317, "grad_norm": 1.13609778881073, "learning_rate": 1.521788617886179e-05, "loss": 0.0463, "step": 58820 }, { "epoch": 0.7173780487804878, "grad_norm": 0.6352666020393372, "learning_rate": 1.521747967479675e-05, "loss": 0.0476, "step": 58825 }, { "epoch": 0.7174390243902439, "grad_norm": 0.996203601360321, "learning_rate": 1.5217073170731708e-05, "loss": 0.0465, "step": 58830 }, { "epoch": 0.7175, "grad_norm": 0.49889782071113586, "learning_rate": 1.5216666666666668e-05, "loss": 0.0391, "step": 58835 }, { "epoch": 0.7175609756097561, "grad_norm": 0.5414856672286987, "learning_rate": 1.5216260162601628e-05, "loss": 0.0409, "step": 58840 }, { "epoch": 0.7176219512195122, "grad_norm": 0.7340548038482666, "learning_rate": 1.5215853658536586e-05, "loss": 0.0476, "step": 58845 }, { "epoch": 0.7176829268292683, "grad_norm": 0.38459908962249756, "learning_rate": 1.5215447154471546e-05, "loss": 0.0542, "step": 58850 }, { "epoch": 0.7177439024390244, "grad_norm": 0.34784257411956787, "learning_rate": 1.5215040650406505e-05, "loss": 0.0357, "step": 58855 }, { "epoch": 0.7178048780487805, "grad_norm": 0.4500046670436859, "learning_rate": 1.5214634146341465e-05, "loss": 0.0498, "step": 58860 }, { "epoch": 0.7178658536585366, "grad_norm": 0.49383747577667236, "learning_rate": 1.5214227642276425e-05, "loss": 0.0429, "step": 58865 }, { "epoch": 0.7179268292682927, "grad_norm": 0.4295665919780731, "learning_rate": 1.5213821138211385e-05, "loss": 0.0528, "step": 58870 }, { "epoch": 0.7179878048780488, "grad_norm": 0.3962697982788086, "learning_rate": 1.5213414634146341e-05, "loss": 0.0454, "step": 58875 }, { "epoch": 0.7180487804878048, "grad_norm": 0.7742010354995728, "learning_rate": 1.5213008130081301e-05, "loss": 0.0532, "step": 58880 }, { "epoch": 0.718109756097561, "grad_norm": 0.6811779141426086, "learning_rate": 1.5212601626016261e-05, "loss": 0.0759, "step": 58885 }, { "epoch": 0.718170731707317, "grad_norm": 2.541686773300171, "learning_rate": 1.521219512195122e-05, "loss": 0.0658, "step": 58890 }, { "epoch": 0.7182317073170732, "grad_norm": 0.4931827783584595, "learning_rate": 1.521178861788618e-05, "loss": 0.0375, "step": 58895 }, { "epoch": 0.7182926829268292, "grad_norm": 0.5884103178977966, "learning_rate": 1.521138211382114e-05, "loss": 0.0523, "step": 58900 }, { "epoch": 0.7183536585365854, "grad_norm": 0.2463380992412567, "learning_rate": 1.5210975609756099e-05, "loss": 0.0461, "step": 58905 }, { "epoch": 0.7184146341463414, "grad_norm": 0.646586537361145, "learning_rate": 1.5210569105691058e-05, "loss": 0.0513, "step": 58910 }, { "epoch": 0.7184756097560976, "grad_norm": 0.5104955434799194, "learning_rate": 1.5210162601626018e-05, "loss": 0.0583, "step": 58915 }, { "epoch": 0.7185365853658536, "grad_norm": 0.39717066287994385, "learning_rate": 1.5209756097560976e-05, "loss": 0.0426, "step": 58920 }, { "epoch": 0.7185975609756098, "grad_norm": 0.7371383905410767, "learning_rate": 1.5209349593495936e-05, "loss": 0.0923, "step": 58925 }, { "epoch": 0.7186585365853658, "grad_norm": 0.4750470519065857, "learning_rate": 1.5208943089430896e-05, "loss": 0.0547, "step": 58930 }, { "epoch": 0.718719512195122, "grad_norm": 0.2674699127674103, "learning_rate": 1.5208536585365854e-05, "loss": 0.0705, "step": 58935 }, { "epoch": 0.718780487804878, "grad_norm": 0.6425970196723938, "learning_rate": 1.5208130081300814e-05, "loss": 0.0478, "step": 58940 }, { "epoch": 0.7188414634146342, "grad_norm": 0.9027143716812134, "learning_rate": 1.5207723577235774e-05, "loss": 0.0569, "step": 58945 }, { "epoch": 0.7189024390243902, "grad_norm": 0.7827481627464294, "learning_rate": 1.5207317073170734e-05, "loss": 0.0454, "step": 58950 }, { "epoch": 0.7189634146341464, "grad_norm": 0.41891229152679443, "learning_rate": 1.5206910569105693e-05, "loss": 0.0708, "step": 58955 }, { "epoch": 0.7190243902439024, "grad_norm": 1.2091343402862549, "learning_rate": 1.5206504065040653e-05, "loss": 0.0582, "step": 58960 }, { "epoch": 0.7190853658536586, "grad_norm": 0.7328230142593384, "learning_rate": 1.520609756097561e-05, "loss": 0.0313, "step": 58965 }, { "epoch": 0.7191463414634146, "grad_norm": 0.6683298945426941, "learning_rate": 1.520569105691057e-05, "loss": 0.0736, "step": 58970 }, { "epoch": 0.7192073170731708, "grad_norm": 0.2051735669374466, "learning_rate": 1.520528455284553e-05, "loss": 0.0316, "step": 58975 }, { "epoch": 0.7192682926829268, "grad_norm": 0.5860222578048706, "learning_rate": 1.5204878048780489e-05, "loss": 0.0414, "step": 58980 }, { "epoch": 0.719329268292683, "grad_norm": 0.8498989939689636, "learning_rate": 1.5204471544715449e-05, "loss": 0.0861, "step": 58985 }, { "epoch": 0.719390243902439, "grad_norm": 0.5143371820449829, "learning_rate": 1.5204065040650409e-05, "loss": 0.0442, "step": 58990 }, { "epoch": 0.7194512195121952, "grad_norm": 0.3576582372188568, "learning_rate": 1.5203658536585367e-05, "loss": 0.1217, "step": 58995 }, { "epoch": 0.7195121951219512, "grad_norm": 0.4467257857322693, "learning_rate": 1.5203252032520327e-05, "loss": 0.0398, "step": 59000 }, { "epoch": 0.7195731707317073, "grad_norm": 0.6655374765396118, "learning_rate": 1.5202845528455285e-05, "loss": 0.0426, "step": 59005 }, { "epoch": 0.7196341463414634, "grad_norm": 0.95088130235672, "learning_rate": 1.5202439024390245e-05, "loss": 0.0872, "step": 59010 }, { "epoch": 0.7196951219512195, "grad_norm": 0.3506580591201782, "learning_rate": 1.5202032520325204e-05, "loss": 0.047, "step": 59015 }, { "epoch": 0.7197560975609756, "grad_norm": 0.6750876903533936, "learning_rate": 1.5201626016260164e-05, "loss": 0.0726, "step": 59020 }, { "epoch": 0.7198170731707317, "grad_norm": 0.46828585863113403, "learning_rate": 1.5201219512195122e-05, "loss": 0.0581, "step": 59025 }, { "epoch": 0.7198780487804878, "grad_norm": 1.0052683353424072, "learning_rate": 1.5200813008130082e-05, "loss": 0.0707, "step": 59030 }, { "epoch": 0.7199390243902439, "grad_norm": 0.8530440926551819, "learning_rate": 1.5200406504065042e-05, "loss": 0.0575, "step": 59035 }, { "epoch": 0.72, "grad_norm": 0.5836221575737, "learning_rate": 1.5200000000000002e-05, "loss": 0.0562, "step": 59040 }, { "epoch": 0.7200609756097561, "grad_norm": 0.481795072555542, "learning_rate": 1.5199593495934962e-05, "loss": 0.0224, "step": 59045 }, { "epoch": 0.7201219512195122, "grad_norm": 0.5664159059524536, "learning_rate": 1.5199186991869921e-05, "loss": 0.0693, "step": 59050 }, { "epoch": 0.7201829268292683, "grad_norm": 0.7098496556282043, "learning_rate": 1.5198780487804878e-05, "loss": 0.0717, "step": 59055 }, { "epoch": 0.7202439024390244, "grad_norm": 0.40929877758026123, "learning_rate": 1.5198373983739838e-05, "loss": 0.0434, "step": 59060 }, { "epoch": 0.7203048780487805, "grad_norm": 0.8406588435173035, "learning_rate": 1.5197967479674798e-05, "loss": 0.0439, "step": 59065 }, { "epoch": 0.7203658536585366, "grad_norm": 0.8065304756164551, "learning_rate": 1.5197560975609757e-05, "loss": 0.0303, "step": 59070 }, { "epoch": 0.7204268292682927, "grad_norm": 1.0701525211334229, "learning_rate": 1.5197154471544717e-05, "loss": 0.0574, "step": 59075 }, { "epoch": 0.7204878048780488, "grad_norm": 0.47154200077056885, "learning_rate": 1.5196747967479677e-05, "loss": 0.0417, "step": 59080 }, { "epoch": 0.7205487804878049, "grad_norm": 0.6229434013366699, "learning_rate": 1.5196341463414635e-05, "loss": 0.0389, "step": 59085 }, { "epoch": 0.720609756097561, "grad_norm": 0.6738966703414917, "learning_rate": 1.5195934959349595e-05, "loss": 0.0486, "step": 59090 }, { "epoch": 0.7206707317073171, "grad_norm": 0.33930253982543945, "learning_rate": 1.5195528455284553e-05, "loss": 0.0411, "step": 59095 }, { "epoch": 0.7207317073170731, "grad_norm": 0.47479885816574097, "learning_rate": 1.5195121951219513e-05, "loss": 0.0343, "step": 59100 }, { "epoch": 0.7207926829268293, "grad_norm": 0.3939705789089203, "learning_rate": 1.5194715447154473e-05, "loss": 0.0536, "step": 59105 }, { "epoch": 0.7208536585365853, "grad_norm": 0.3855718672275543, "learning_rate": 1.5194308943089433e-05, "loss": 0.0388, "step": 59110 }, { "epoch": 0.7209146341463415, "grad_norm": 0.7837618589401245, "learning_rate": 1.519390243902439e-05, "loss": 0.05, "step": 59115 }, { "epoch": 0.7209756097560975, "grad_norm": 0.5317056179046631, "learning_rate": 1.519349593495935e-05, "loss": 0.0662, "step": 59120 }, { "epoch": 0.7210365853658537, "grad_norm": 0.2973199486732483, "learning_rate": 1.519308943089431e-05, "loss": 0.0402, "step": 59125 }, { "epoch": 0.7210975609756097, "grad_norm": 0.2925114333629608, "learning_rate": 1.519268292682927e-05, "loss": 0.0514, "step": 59130 }, { "epoch": 0.7211585365853659, "grad_norm": 1.0453615188598633, "learning_rate": 1.519227642276423e-05, "loss": 0.0761, "step": 59135 }, { "epoch": 0.7212195121951219, "grad_norm": 0.6003177165985107, "learning_rate": 1.519186991869919e-05, "loss": 0.0632, "step": 59140 }, { "epoch": 0.7212804878048781, "grad_norm": 3.226128578186035, "learning_rate": 1.5191463414634146e-05, "loss": 0.0387, "step": 59145 }, { "epoch": 0.7213414634146341, "grad_norm": 0.39064502716064453, "learning_rate": 1.5191056910569106e-05, "loss": 0.055, "step": 59150 }, { "epoch": 0.7214024390243903, "grad_norm": 0.4696657657623291, "learning_rate": 1.5190650406504066e-05, "loss": 0.029, "step": 59155 }, { "epoch": 0.7214634146341463, "grad_norm": 0.6053594946861267, "learning_rate": 1.5190243902439026e-05, "loss": 0.0451, "step": 59160 }, { "epoch": 0.7215243902439025, "grad_norm": 0.39406368136405945, "learning_rate": 1.5189837398373986e-05, "loss": 0.0376, "step": 59165 }, { "epoch": 0.7215853658536585, "grad_norm": 0.35987305641174316, "learning_rate": 1.5189430894308945e-05, "loss": 0.0271, "step": 59170 }, { "epoch": 0.7216463414634147, "grad_norm": 0.32566604018211365, "learning_rate": 1.5189024390243903e-05, "loss": 0.0423, "step": 59175 }, { "epoch": 0.7217073170731707, "grad_norm": 0.9292811155319214, "learning_rate": 1.5188617886178862e-05, "loss": 0.0495, "step": 59180 }, { "epoch": 0.7217682926829269, "grad_norm": 0.567888617515564, "learning_rate": 1.5188211382113821e-05, "loss": 0.0348, "step": 59185 }, { "epoch": 0.7218292682926829, "grad_norm": 1.2184919118881226, "learning_rate": 1.5187804878048781e-05, "loss": 0.0612, "step": 59190 }, { "epoch": 0.721890243902439, "grad_norm": 0.32998427748680115, "learning_rate": 1.5187398373983741e-05, "loss": 0.0436, "step": 59195 }, { "epoch": 0.7219512195121951, "grad_norm": 0.7898442149162292, "learning_rate": 1.5186991869918701e-05, "loss": 0.0513, "step": 59200 }, { "epoch": 0.7220121951219513, "grad_norm": 0.3666365444660187, "learning_rate": 1.5186585365853659e-05, "loss": 0.059, "step": 59205 }, { "epoch": 0.7220731707317073, "grad_norm": 0.7015789151191711, "learning_rate": 1.5186178861788619e-05, "loss": 0.0941, "step": 59210 }, { "epoch": 0.7221341463414634, "grad_norm": 0.8318663239479065, "learning_rate": 1.5185772357723579e-05, "loss": 0.0668, "step": 59215 }, { "epoch": 0.7221951219512195, "grad_norm": 0.6823463439941406, "learning_rate": 1.5185365853658538e-05, "loss": 0.0785, "step": 59220 }, { "epoch": 0.7222560975609756, "grad_norm": 0.5931389331817627, "learning_rate": 1.5184959349593498e-05, "loss": 0.0344, "step": 59225 }, { "epoch": 0.7223170731707317, "grad_norm": 1.0147348642349243, "learning_rate": 1.5184552845528458e-05, "loss": 0.0766, "step": 59230 }, { "epoch": 0.7223780487804878, "grad_norm": 0.6548213958740234, "learning_rate": 1.5184146341463415e-05, "loss": 0.0751, "step": 59235 }, { "epoch": 0.7224390243902439, "grad_norm": 0.6004734635353088, "learning_rate": 1.5183739837398374e-05, "loss": 0.0476, "step": 59240 }, { "epoch": 0.7225, "grad_norm": 0.5939689874649048, "learning_rate": 1.5183333333333334e-05, "loss": 0.046, "step": 59245 }, { "epoch": 0.7225609756097561, "grad_norm": 0.6763742566108704, "learning_rate": 1.5182926829268294e-05, "loss": 0.0516, "step": 59250 }, { "epoch": 0.7226219512195122, "grad_norm": 0.5170800089836121, "learning_rate": 1.5182520325203254e-05, "loss": 0.042, "step": 59255 }, { "epoch": 0.7226829268292683, "grad_norm": 0.45676374435424805, "learning_rate": 1.5182113821138214e-05, "loss": 0.0495, "step": 59260 }, { "epoch": 0.7227439024390244, "grad_norm": 0.547653079032898, "learning_rate": 1.5181707317073172e-05, "loss": 0.0378, "step": 59265 }, { "epoch": 0.7228048780487805, "grad_norm": 0.6010538935661316, "learning_rate": 1.518130081300813e-05, "loss": 0.0924, "step": 59270 }, { "epoch": 0.7228658536585366, "grad_norm": 1.0347081422805786, "learning_rate": 1.518089430894309e-05, "loss": 0.0717, "step": 59275 }, { "epoch": 0.7229268292682927, "grad_norm": 0.30152812600135803, "learning_rate": 1.518048780487805e-05, "loss": 0.0441, "step": 59280 }, { "epoch": 0.7229878048780488, "grad_norm": 0.7000176310539246, "learning_rate": 1.518008130081301e-05, "loss": 0.0456, "step": 59285 }, { "epoch": 0.7230487804878049, "grad_norm": 1.6360937356948853, "learning_rate": 1.517967479674797e-05, "loss": 0.0511, "step": 59290 }, { "epoch": 0.723109756097561, "grad_norm": 0.37185153365135193, "learning_rate": 1.5179268292682927e-05, "loss": 0.0366, "step": 59295 }, { "epoch": 0.723170731707317, "grad_norm": 0.20905785262584686, "learning_rate": 1.5178861788617887e-05, "loss": 0.0399, "step": 59300 }, { "epoch": 0.7232317073170732, "grad_norm": 0.5849061012268066, "learning_rate": 1.5178455284552847e-05, "loss": 0.0327, "step": 59305 }, { "epoch": 0.7232926829268292, "grad_norm": 0.4182285964488983, "learning_rate": 1.5178048780487807e-05, "loss": 0.0315, "step": 59310 }, { "epoch": 0.7233536585365854, "grad_norm": 0.8246873617172241, "learning_rate": 1.5177642276422767e-05, "loss": 0.0628, "step": 59315 }, { "epoch": 0.7234146341463414, "grad_norm": 0.3885965049266815, "learning_rate": 1.5177235772357726e-05, "loss": 0.0597, "step": 59320 }, { "epoch": 0.7234756097560976, "grad_norm": 0.8387806415557861, "learning_rate": 1.5176829268292683e-05, "loss": 0.0553, "step": 59325 }, { "epoch": 0.7235365853658536, "grad_norm": 0.41666314005851746, "learning_rate": 1.5176422764227643e-05, "loss": 0.054, "step": 59330 }, { "epoch": 0.7235975609756098, "grad_norm": 0.637004017829895, "learning_rate": 1.5176016260162603e-05, "loss": 0.0585, "step": 59335 }, { "epoch": 0.7236585365853658, "grad_norm": 0.5210660099983215, "learning_rate": 1.5175609756097562e-05, "loss": 0.0334, "step": 59340 }, { "epoch": 0.723719512195122, "grad_norm": 1.1562206745147705, "learning_rate": 1.5175203252032522e-05, "loss": 0.0453, "step": 59345 }, { "epoch": 0.723780487804878, "grad_norm": 0.9025272130966187, "learning_rate": 1.5174796747967482e-05, "loss": 0.0534, "step": 59350 }, { "epoch": 0.7238414634146342, "grad_norm": 1.849757432937622, "learning_rate": 1.517439024390244e-05, "loss": 0.0627, "step": 59355 }, { "epoch": 0.7239024390243902, "grad_norm": 0.4665376842021942, "learning_rate": 1.5173983739837398e-05, "loss": 0.061, "step": 59360 }, { "epoch": 0.7239634146341464, "grad_norm": 0.6146699786186218, "learning_rate": 1.5173577235772358e-05, "loss": 0.0674, "step": 59365 }, { "epoch": 0.7240243902439024, "grad_norm": 1.4811480045318604, "learning_rate": 1.5173170731707318e-05, "loss": 0.0537, "step": 59370 }, { "epoch": 0.7240853658536586, "grad_norm": 0.8054728507995605, "learning_rate": 1.5172764227642278e-05, "loss": 0.0319, "step": 59375 }, { "epoch": 0.7241463414634146, "grad_norm": 0.30511730909347534, "learning_rate": 1.5172357723577238e-05, "loss": 0.0399, "step": 59380 }, { "epoch": 0.7242073170731708, "grad_norm": 0.7079250812530518, "learning_rate": 1.5171951219512196e-05, "loss": 0.0531, "step": 59385 }, { "epoch": 0.7242682926829268, "grad_norm": 0.7311213612556458, "learning_rate": 1.5171544715447155e-05, "loss": 0.0464, "step": 59390 }, { "epoch": 0.724329268292683, "grad_norm": 0.4737755358219147, "learning_rate": 1.5171138211382115e-05, "loss": 0.0372, "step": 59395 }, { "epoch": 0.724390243902439, "grad_norm": 0.7535629868507385, "learning_rate": 1.5170731707317075e-05, "loss": 0.069, "step": 59400 }, { "epoch": 0.7244512195121952, "grad_norm": 0.6654428243637085, "learning_rate": 1.5170325203252035e-05, "loss": 0.0702, "step": 59405 }, { "epoch": 0.7245121951219512, "grad_norm": 0.6348434090614319, "learning_rate": 1.5169918699186995e-05, "loss": 0.0346, "step": 59410 }, { "epoch": 0.7245731707317074, "grad_norm": 0.420528769493103, "learning_rate": 1.5169512195121951e-05, "loss": 0.0494, "step": 59415 }, { "epoch": 0.7246341463414634, "grad_norm": 0.6654349565505981, "learning_rate": 1.5169105691056911e-05, "loss": 0.0769, "step": 59420 }, { "epoch": 0.7246951219512195, "grad_norm": 0.4785408675670624, "learning_rate": 1.5168699186991871e-05, "loss": 0.0602, "step": 59425 }, { "epoch": 0.7247560975609756, "grad_norm": 0.4996342957019806, "learning_rate": 1.516829268292683e-05, "loss": 0.0369, "step": 59430 }, { "epoch": 0.7248170731707317, "grad_norm": 0.49356839060783386, "learning_rate": 1.516788617886179e-05, "loss": 0.0427, "step": 59435 }, { "epoch": 0.7248780487804878, "grad_norm": 0.3716033101081848, "learning_rate": 1.516747967479675e-05, "loss": 0.0445, "step": 59440 }, { "epoch": 0.7249390243902439, "grad_norm": 0.7592645287513733, "learning_rate": 1.5167073170731707e-05, "loss": 0.0783, "step": 59445 }, { "epoch": 0.725, "grad_norm": 1.0647200345993042, "learning_rate": 1.5166666666666667e-05, "loss": 0.0472, "step": 59450 }, { "epoch": 0.7250609756097561, "grad_norm": 1.5930441617965698, "learning_rate": 1.5166260162601626e-05, "loss": 0.1254, "step": 59455 }, { "epoch": 0.7251219512195122, "grad_norm": 0.5332093238830566, "learning_rate": 1.5165853658536586e-05, "loss": 0.0538, "step": 59460 }, { "epoch": 0.7251829268292683, "grad_norm": 1.1022840738296509, "learning_rate": 1.5165447154471546e-05, "loss": 0.0668, "step": 59465 }, { "epoch": 0.7252439024390244, "grad_norm": 0.4391648769378662, "learning_rate": 1.5165040650406506e-05, "loss": 0.0377, "step": 59470 }, { "epoch": 0.7253048780487805, "grad_norm": 0.8334220051765442, "learning_rate": 1.5164634146341464e-05, "loss": 0.0517, "step": 59475 }, { "epoch": 0.7253658536585366, "grad_norm": 0.6662537455558777, "learning_rate": 1.5164227642276424e-05, "loss": 0.0359, "step": 59480 }, { "epoch": 0.7254268292682927, "grad_norm": 0.8750185370445251, "learning_rate": 1.5163821138211384e-05, "loss": 0.0337, "step": 59485 }, { "epoch": 0.7254878048780488, "grad_norm": 0.7264096736907959, "learning_rate": 1.5163414634146343e-05, "loss": 0.0535, "step": 59490 }, { "epoch": 0.7255487804878049, "grad_norm": 0.335223913192749, "learning_rate": 1.5163008130081303e-05, "loss": 0.0455, "step": 59495 }, { "epoch": 0.725609756097561, "grad_norm": 0.4883430302143097, "learning_rate": 1.5162601626016263e-05, "loss": 0.0424, "step": 59500 }, { "epoch": 0.7256707317073171, "grad_norm": 0.1439770758152008, "learning_rate": 1.516219512195122e-05, "loss": 0.0352, "step": 59505 }, { "epoch": 0.7257317073170731, "grad_norm": 0.36235493421554565, "learning_rate": 1.516178861788618e-05, "loss": 0.0552, "step": 59510 }, { "epoch": 0.7257926829268293, "grad_norm": 0.705185055732727, "learning_rate": 1.516138211382114e-05, "loss": 0.051, "step": 59515 }, { "epoch": 0.7258536585365853, "grad_norm": 0.6938318014144897, "learning_rate": 1.5160975609756099e-05, "loss": 0.0369, "step": 59520 }, { "epoch": 0.7259146341463415, "grad_norm": 0.7742658257484436, "learning_rate": 1.5160569105691059e-05, "loss": 0.0457, "step": 59525 }, { "epoch": 0.7259756097560975, "grad_norm": 0.6388732194900513, "learning_rate": 1.5160162601626019e-05, "loss": 0.0409, "step": 59530 }, { "epoch": 0.7260365853658537, "grad_norm": 0.5113915801048279, "learning_rate": 1.5159756097560975e-05, "loss": 0.0267, "step": 59535 }, { "epoch": 0.7260975609756097, "grad_norm": 0.48245418071746826, "learning_rate": 1.5159349593495935e-05, "loss": 0.0498, "step": 59540 }, { "epoch": 0.7261585365853659, "grad_norm": 0.44649410247802734, "learning_rate": 1.5158943089430895e-05, "loss": 0.0304, "step": 59545 }, { "epoch": 0.7262195121951219, "grad_norm": 0.598351240158081, "learning_rate": 1.5158536585365855e-05, "loss": 0.0442, "step": 59550 }, { "epoch": 0.7262804878048781, "grad_norm": 0.8986321687698364, "learning_rate": 1.5158130081300814e-05, "loss": 0.0251, "step": 59555 }, { "epoch": 0.7263414634146341, "grad_norm": 0.44585856795310974, "learning_rate": 1.5157723577235774e-05, "loss": 0.0508, "step": 59560 }, { "epoch": 0.7264024390243903, "grad_norm": 0.5265321135520935, "learning_rate": 1.5157317073170732e-05, "loss": 0.0623, "step": 59565 }, { "epoch": 0.7264634146341463, "grad_norm": 0.37035465240478516, "learning_rate": 1.5156910569105692e-05, "loss": 0.0368, "step": 59570 }, { "epoch": 0.7265243902439025, "grad_norm": 0.8890979886054993, "learning_rate": 1.5156504065040652e-05, "loss": 0.0467, "step": 59575 }, { "epoch": 0.7265853658536585, "grad_norm": 0.2983866333961487, "learning_rate": 1.5156097560975612e-05, "loss": 0.059, "step": 59580 }, { "epoch": 0.7266463414634147, "grad_norm": 0.4137813448905945, "learning_rate": 1.5155691056910572e-05, "loss": 0.0396, "step": 59585 }, { "epoch": 0.7267073170731707, "grad_norm": 0.2694135904312134, "learning_rate": 1.515528455284553e-05, "loss": 0.052, "step": 59590 }, { "epoch": 0.7267682926829269, "grad_norm": 0.288961797952652, "learning_rate": 1.5154878048780488e-05, "loss": 0.042, "step": 59595 }, { "epoch": 0.7268292682926829, "grad_norm": 0.4646974802017212, "learning_rate": 1.5154471544715448e-05, "loss": 0.0376, "step": 59600 }, { "epoch": 0.7268902439024391, "grad_norm": 0.4906809628009796, "learning_rate": 1.5154065040650408e-05, "loss": 0.039, "step": 59605 }, { "epoch": 0.7269512195121951, "grad_norm": 1.1425483226776123, "learning_rate": 1.5153658536585367e-05, "loss": 0.08, "step": 59610 }, { "epoch": 0.7270121951219513, "grad_norm": 0.5689645409584045, "learning_rate": 1.5153252032520327e-05, "loss": 0.0325, "step": 59615 }, { "epoch": 0.7270731707317073, "grad_norm": 0.5011364221572876, "learning_rate": 1.5152845528455287e-05, "loss": 0.043, "step": 59620 }, { "epoch": 0.7271341463414634, "grad_norm": 0.711561381816864, "learning_rate": 1.5152439024390243e-05, "loss": 0.0498, "step": 59625 }, { "epoch": 0.7271951219512195, "grad_norm": 0.7381824254989624, "learning_rate": 1.5152032520325203e-05, "loss": 0.0469, "step": 59630 }, { "epoch": 0.7272560975609756, "grad_norm": 1.008176326751709, "learning_rate": 1.5151626016260163e-05, "loss": 0.0379, "step": 59635 }, { "epoch": 0.7273170731707317, "grad_norm": 0.40175607800483704, "learning_rate": 1.5151219512195123e-05, "loss": 0.0526, "step": 59640 }, { "epoch": 0.7273780487804878, "grad_norm": 0.5139753222465515, "learning_rate": 1.5150813008130083e-05, "loss": 0.0628, "step": 59645 }, { "epoch": 0.7274390243902439, "grad_norm": 0.7112306952476501, "learning_rate": 1.5150406504065043e-05, "loss": 0.0461, "step": 59650 }, { "epoch": 0.7275, "grad_norm": 0.38980311155319214, "learning_rate": 1.515e-05, "loss": 0.0609, "step": 59655 }, { "epoch": 0.7275609756097561, "grad_norm": 0.40160825848579407, "learning_rate": 1.514959349593496e-05, "loss": 0.0558, "step": 59660 }, { "epoch": 0.7276219512195122, "grad_norm": 1.223807454109192, "learning_rate": 1.514918699186992e-05, "loss": 0.0742, "step": 59665 }, { "epoch": 0.7276829268292683, "grad_norm": 0.5982843041419983, "learning_rate": 1.514878048780488e-05, "loss": 0.0455, "step": 59670 }, { "epoch": 0.7277439024390244, "grad_norm": 0.3825203776359558, "learning_rate": 1.514837398373984e-05, "loss": 0.0579, "step": 59675 }, { "epoch": 0.7278048780487805, "grad_norm": 0.48201829195022583, "learning_rate": 1.5147967479674798e-05, "loss": 0.0298, "step": 59680 }, { "epoch": 0.7278658536585366, "grad_norm": 0.9608160853385925, "learning_rate": 1.5147560975609756e-05, "loss": 0.0602, "step": 59685 }, { "epoch": 0.7279268292682927, "grad_norm": 0.8991045355796814, "learning_rate": 1.5147154471544716e-05, "loss": 0.0501, "step": 59690 }, { "epoch": 0.7279878048780488, "grad_norm": 1.3550726175308228, "learning_rate": 1.5146747967479676e-05, "loss": 0.0442, "step": 59695 }, { "epoch": 0.7280487804878049, "grad_norm": 0.5915167331695557, "learning_rate": 1.5146341463414636e-05, "loss": 0.0734, "step": 59700 }, { "epoch": 0.728109756097561, "grad_norm": 0.44747602939605713, "learning_rate": 1.5145934959349595e-05, "loss": 0.0601, "step": 59705 }, { "epoch": 0.728170731707317, "grad_norm": 1.137912392616272, "learning_rate": 1.5145528455284555e-05, "loss": 0.0741, "step": 59710 }, { "epoch": 0.7282317073170732, "grad_norm": 0.8616936206817627, "learning_rate": 1.5145121951219512e-05, "loss": 0.0489, "step": 59715 }, { "epoch": 0.7282926829268292, "grad_norm": 0.5296972393989563, "learning_rate": 1.5144715447154472e-05, "loss": 0.0666, "step": 59720 }, { "epoch": 0.7283536585365854, "grad_norm": 0.7754879593849182, "learning_rate": 1.5144308943089431e-05, "loss": 0.0445, "step": 59725 }, { "epoch": 0.7284146341463414, "grad_norm": 0.46979057788848877, "learning_rate": 1.5143902439024391e-05, "loss": 0.05, "step": 59730 }, { "epoch": 0.7284756097560976, "grad_norm": 0.5545221567153931, "learning_rate": 1.5143495934959351e-05, "loss": 0.0301, "step": 59735 }, { "epoch": 0.7285365853658536, "grad_norm": 0.5178272724151611, "learning_rate": 1.514308943089431e-05, "loss": 0.0525, "step": 59740 }, { "epoch": 0.7285975609756098, "grad_norm": 0.5596044659614563, "learning_rate": 1.5142682926829269e-05, "loss": 0.0819, "step": 59745 }, { "epoch": 0.7286585365853658, "grad_norm": 0.509559154510498, "learning_rate": 1.5142276422764229e-05, "loss": 0.0418, "step": 59750 }, { "epoch": 0.728719512195122, "grad_norm": 0.448017954826355, "learning_rate": 1.5141869918699189e-05, "loss": 0.0621, "step": 59755 }, { "epoch": 0.728780487804878, "grad_norm": 1.1842235326766968, "learning_rate": 1.5141463414634148e-05, "loss": 0.0543, "step": 59760 }, { "epoch": 0.7288414634146342, "grad_norm": 0.3038966953754425, "learning_rate": 1.5141056910569108e-05, "loss": 0.0283, "step": 59765 }, { "epoch": 0.7289024390243902, "grad_norm": 0.4435560405254364, "learning_rate": 1.5140650406504066e-05, "loss": 0.0469, "step": 59770 }, { "epoch": 0.7289634146341464, "grad_norm": 1.170333743095398, "learning_rate": 1.5140243902439025e-05, "loss": 0.0765, "step": 59775 }, { "epoch": 0.7290243902439024, "grad_norm": 0.5227828621864319, "learning_rate": 1.5139837398373984e-05, "loss": 0.0661, "step": 59780 }, { "epoch": 0.7290853658536586, "grad_norm": 0.8861423134803772, "learning_rate": 1.5139430894308944e-05, "loss": 0.0705, "step": 59785 }, { "epoch": 0.7291463414634146, "grad_norm": 0.2782936096191406, "learning_rate": 1.5139024390243904e-05, "loss": 0.0274, "step": 59790 }, { "epoch": 0.7292073170731708, "grad_norm": 0.5371922254562378, "learning_rate": 1.5138617886178864e-05, "loss": 0.0406, "step": 59795 }, { "epoch": 0.7292682926829268, "grad_norm": 0.5357728600502014, "learning_rate": 1.5138211382113824e-05, "loss": 0.0324, "step": 59800 }, { "epoch": 0.729329268292683, "grad_norm": 0.46024763584136963, "learning_rate": 1.513780487804878e-05, "loss": 0.073, "step": 59805 }, { "epoch": 0.729390243902439, "grad_norm": 0.21374471485614777, "learning_rate": 1.513739837398374e-05, "loss": 0.0732, "step": 59810 }, { "epoch": 0.7294512195121952, "grad_norm": 0.8780454397201538, "learning_rate": 1.51369918699187e-05, "loss": 0.0355, "step": 59815 }, { "epoch": 0.7295121951219512, "grad_norm": 1.0712625980377197, "learning_rate": 1.513658536585366e-05, "loss": 0.0495, "step": 59820 }, { "epoch": 0.7295731707317074, "grad_norm": 0.946969747543335, "learning_rate": 1.513617886178862e-05, "loss": 0.0558, "step": 59825 }, { "epoch": 0.7296341463414634, "grad_norm": 0.9887312650680542, "learning_rate": 1.513577235772358e-05, "loss": 0.0551, "step": 59830 }, { "epoch": 0.7296951219512195, "grad_norm": 0.414446622133255, "learning_rate": 1.5135365853658537e-05, "loss": 0.0311, "step": 59835 }, { "epoch": 0.7297560975609756, "grad_norm": 0.21862629055976868, "learning_rate": 1.5134959349593497e-05, "loss": 0.0558, "step": 59840 }, { "epoch": 0.7298170731707317, "grad_norm": 0.7856946587562561, "learning_rate": 1.5134552845528457e-05, "loss": 0.0952, "step": 59845 }, { "epoch": 0.7298780487804878, "grad_norm": 0.7111946940422058, "learning_rate": 1.5134146341463417e-05, "loss": 0.0512, "step": 59850 }, { "epoch": 0.7299390243902439, "grad_norm": 0.7040101885795593, "learning_rate": 1.5133739837398375e-05, "loss": 0.0645, "step": 59855 }, { "epoch": 0.73, "grad_norm": 0.6163043975830078, "learning_rate": 1.5133333333333335e-05, "loss": 0.0967, "step": 59860 }, { "epoch": 0.7300609756097561, "grad_norm": 0.22094368934631348, "learning_rate": 1.5132926829268293e-05, "loss": 0.0482, "step": 59865 }, { "epoch": 0.7301219512195122, "grad_norm": 1.4129127264022827, "learning_rate": 1.5132520325203253e-05, "loss": 0.074, "step": 59870 }, { "epoch": 0.7301829268292683, "grad_norm": 0.4753117263317108, "learning_rate": 1.5132113821138212e-05, "loss": 0.0689, "step": 59875 }, { "epoch": 0.7302439024390244, "grad_norm": 0.5389938950538635, "learning_rate": 1.5131707317073172e-05, "loss": 0.029, "step": 59880 }, { "epoch": 0.7303048780487805, "grad_norm": 0.8042580485343933, "learning_rate": 1.5131300813008132e-05, "loss": 0.0576, "step": 59885 }, { "epoch": 0.7303658536585366, "grad_norm": 0.5299185514450073, "learning_rate": 1.5130894308943092e-05, "loss": 0.0627, "step": 59890 }, { "epoch": 0.7304268292682927, "grad_norm": 0.42868003249168396, "learning_rate": 1.5130487804878048e-05, "loss": 0.0563, "step": 59895 }, { "epoch": 0.7304878048780488, "grad_norm": 0.632546603679657, "learning_rate": 1.5130081300813008e-05, "loss": 0.0353, "step": 59900 }, { "epoch": 0.7305487804878049, "grad_norm": 0.7814731001853943, "learning_rate": 1.5129674796747968e-05, "loss": 0.0619, "step": 59905 }, { "epoch": 0.730609756097561, "grad_norm": 0.48904526233673096, "learning_rate": 1.5129268292682928e-05, "loss": 0.0585, "step": 59910 }, { "epoch": 0.7306707317073171, "grad_norm": 0.588312566280365, "learning_rate": 1.5128861788617888e-05, "loss": 0.0663, "step": 59915 }, { "epoch": 0.7307317073170732, "grad_norm": 2.520172357559204, "learning_rate": 1.5128455284552847e-05, "loss": 0.0571, "step": 59920 }, { "epoch": 0.7307926829268293, "grad_norm": 0.85695481300354, "learning_rate": 1.5128048780487806e-05, "loss": 0.0416, "step": 59925 }, { "epoch": 0.7308536585365853, "grad_norm": 1.1951987743377686, "learning_rate": 1.5127642276422765e-05, "loss": 0.0573, "step": 59930 }, { "epoch": 0.7309146341463415, "grad_norm": 0.5739145874977112, "learning_rate": 1.5127235772357725e-05, "loss": 0.0459, "step": 59935 }, { "epoch": 0.7309756097560975, "grad_norm": 1.057410717010498, "learning_rate": 1.5126829268292685e-05, "loss": 0.0439, "step": 59940 }, { "epoch": 0.7310365853658537, "grad_norm": 0.516007125377655, "learning_rate": 1.5126422764227643e-05, "loss": 0.0398, "step": 59945 }, { "epoch": 0.7310975609756097, "grad_norm": 0.41481325030326843, "learning_rate": 1.5126016260162603e-05, "loss": 0.055, "step": 59950 }, { "epoch": 0.7311585365853659, "grad_norm": 0.8830232620239258, "learning_rate": 1.5125609756097561e-05, "loss": 0.0655, "step": 59955 }, { "epoch": 0.7312195121951219, "grad_norm": 0.529410183429718, "learning_rate": 1.5125203252032521e-05, "loss": 0.0716, "step": 59960 }, { "epoch": 0.7312804878048781, "grad_norm": 0.7816810607910156, "learning_rate": 1.512479674796748e-05, "loss": 0.0465, "step": 59965 }, { "epoch": 0.7313414634146341, "grad_norm": 0.4109395444393158, "learning_rate": 1.512439024390244e-05, "loss": 0.0413, "step": 59970 }, { "epoch": 0.7314024390243903, "grad_norm": 0.23015780746936798, "learning_rate": 1.51239837398374e-05, "loss": 0.0316, "step": 59975 }, { "epoch": 0.7314634146341463, "grad_norm": 0.7351930737495422, "learning_rate": 1.512357723577236e-05, "loss": 0.0626, "step": 59980 }, { "epoch": 0.7315243902439025, "grad_norm": 0.8781457543373108, "learning_rate": 1.5123170731707317e-05, "loss": 0.0528, "step": 59985 }, { "epoch": 0.7315853658536585, "grad_norm": 0.5253448486328125, "learning_rate": 1.5122764227642277e-05, "loss": 0.0697, "step": 59990 }, { "epoch": 0.7316463414634147, "grad_norm": 0.4748174250125885, "learning_rate": 1.5122357723577236e-05, "loss": 0.0723, "step": 59995 }, { "epoch": 0.7317073170731707, "grad_norm": 0.6017549633979797, "learning_rate": 1.5121951219512196e-05, "loss": 0.0417, "step": 60000 }, { "epoch": 0.7317682926829269, "grad_norm": 0.4539473056793213, "learning_rate": 1.5121544715447156e-05, "loss": 0.0427, "step": 60005 }, { "epoch": 0.7318292682926829, "grad_norm": 0.6114795804023743, "learning_rate": 1.5121138211382116e-05, "loss": 0.0365, "step": 60010 }, { "epoch": 0.7318902439024391, "grad_norm": 0.43203848600387573, "learning_rate": 1.5120731707317074e-05, "loss": 0.0363, "step": 60015 }, { "epoch": 0.7319512195121951, "grad_norm": 0.39704179763793945, "learning_rate": 1.5120325203252034e-05, "loss": 0.0667, "step": 60020 }, { "epoch": 0.7320121951219513, "grad_norm": 0.8974835276603699, "learning_rate": 1.5119918699186994e-05, "loss": 0.0427, "step": 60025 }, { "epoch": 0.7320731707317073, "grad_norm": 0.6367088556289673, "learning_rate": 1.5119512195121953e-05, "loss": 0.0455, "step": 60030 }, { "epoch": 0.7321341463414635, "grad_norm": 0.45341813564300537, "learning_rate": 1.5119105691056912e-05, "loss": 0.0419, "step": 60035 }, { "epoch": 0.7321951219512195, "grad_norm": 0.5811808705329895, "learning_rate": 1.5118699186991871e-05, "loss": 0.0456, "step": 60040 }, { "epoch": 0.7322560975609756, "grad_norm": 2.0330348014831543, "learning_rate": 1.511829268292683e-05, "loss": 0.0837, "step": 60045 }, { "epoch": 0.7323170731707317, "grad_norm": 0.43904197216033936, "learning_rate": 1.511788617886179e-05, "loss": 0.0415, "step": 60050 }, { "epoch": 0.7323780487804878, "grad_norm": 0.5089238882064819, "learning_rate": 1.5117479674796749e-05, "loss": 0.0706, "step": 60055 }, { "epoch": 0.7324390243902439, "grad_norm": 0.7979545593261719, "learning_rate": 1.5117073170731709e-05, "loss": 0.0363, "step": 60060 }, { "epoch": 0.7325, "grad_norm": 0.5289433002471924, "learning_rate": 1.5116666666666669e-05, "loss": 0.0599, "step": 60065 }, { "epoch": 0.7325609756097561, "grad_norm": 0.496977299451828, "learning_rate": 1.5116260162601629e-05, "loss": 0.0437, "step": 60070 }, { "epoch": 0.7326219512195122, "grad_norm": 1.7973902225494385, "learning_rate": 1.5115853658536585e-05, "loss": 0.0615, "step": 60075 }, { "epoch": 0.7326829268292683, "grad_norm": 0.20394614338874817, "learning_rate": 1.5115447154471545e-05, "loss": 0.0439, "step": 60080 }, { "epoch": 0.7327439024390244, "grad_norm": 0.916852593421936, "learning_rate": 1.5115040650406505e-05, "loss": 0.03, "step": 60085 }, { "epoch": 0.7328048780487805, "grad_norm": 0.48095592856407166, "learning_rate": 1.5114634146341464e-05, "loss": 0.0559, "step": 60090 }, { "epoch": 0.7328658536585366, "grad_norm": 0.6825483441352844, "learning_rate": 1.5114227642276424e-05, "loss": 0.0402, "step": 60095 }, { "epoch": 0.7329268292682927, "grad_norm": 0.4658859670162201, "learning_rate": 1.5113821138211384e-05, "loss": 0.0511, "step": 60100 }, { "epoch": 0.7329878048780488, "grad_norm": 0.9522426128387451, "learning_rate": 1.5113414634146342e-05, "loss": 0.054, "step": 60105 }, { "epoch": 0.7330487804878049, "grad_norm": 0.6275352239608765, "learning_rate": 1.5113008130081302e-05, "loss": 0.0702, "step": 60110 }, { "epoch": 0.733109756097561, "grad_norm": 0.27475571632385254, "learning_rate": 1.5112601626016262e-05, "loss": 0.0452, "step": 60115 }, { "epoch": 0.7331707317073171, "grad_norm": 0.7566314339637756, "learning_rate": 1.511219512195122e-05, "loss": 0.059, "step": 60120 }, { "epoch": 0.7332317073170732, "grad_norm": 0.3724593222141266, "learning_rate": 1.511178861788618e-05, "loss": 0.0443, "step": 60125 }, { "epoch": 0.7332926829268293, "grad_norm": 0.9474923610687256, "learning_rate": 1.511138211382114e-05, "loss": 0.035, "step": 60130 }, { "epoch": 0.7333536585365854, "grad_norm": 0.29579973220825195, "learning_rate": 1.5110975609756098e-05, "loss": 0.0398, "step": 60135 }, { "epoch": 0.7334146341463414, "grad_norm": 0.6110530495643616, "learning_rate": 1.5110569105691058e-05, "loss": 0.0285, "step": 60140 }, { "epoch": 0.7334756097560976, "grad_norm": 0.7275960445404053, "learning_rate": 1.5110162601626017e-05, "loss": 0.0346, "step": 60145 }, { "epoch": 0.7335365853658536, "grad_norm": 0.5076414942741394, "learning_rate": 1.5109756097560977e-05, "loss": 0.0893, "step": 60150 }, { "epoch": 0.7335975609756098, "grad_norm": 0.25526466965675354, "learning_rate": 1.5109349593495937e-05, "loss": 0.0318, "step": 60155 }, { "epoch": 0.7336585365853658, "grad_norm": 0.43843385577201843, "learning_rate": 1.5108943089430897e-05, "loss": 0.0726, "step": 60160 }, { "epoch": 0.733719512195122, "grad_norm": 1.1312097311019897, "learning_rate": 1.5108536585365853e-05, "loss": 0.0709, "step": 60165 }, { "epoch": 0.733780487804878, "grad_norm": 0.6031925082206726, "learning_rate": 1.5108130081300813e-05, "loss": 0.0647, "step": 60170 }, { "epoch": 0.7338414634146342, "grad_norm": 0.5060375332832336, "learning_rate": 1.5107723577235773e-05, "loss": 0.0322, "step": 60175 }, { "epoch": 0.7339024390243902, "grad_norm": 0.6516721248626709, "learning_rate": 1.5107317073170733e-05, "loss": 0.0571, "step": 60180 }, { "epoch": 0.7339634146341464, "grad_norm": 0.8657956123352051, "learning_rate": 1.5106910569105693e-05, "loss": 0.0541, "step": 60185 }, { "epoch": 0.7340243902439024, "grad_norm": 0.3288765847682953, "learning_rate": 1.5106504065040652e-05, "loss": 0.0391, "step": 60190 }, { "epoch": 0.7340853658536586, "grad_norm": 0.8141983151435852, "learning_rate": 1.510609756097561e-05, "loss": 0.0524, "step": 60195 }, { "epoch": 0.7341463414634146, "grad_norm": 0.6358160376548767, "learning_rate": 1.510569105691057e-05, "loss": 0.0608, "step": 60200 }, { "epoch": 0.7342073170731708, "grad_norm": 0.9079976677894592, "learning_rate": 1.510528455284553e-05, "loss": 0.079, "step": 60205 }, { "epoch": 0.7342682926829268, "grad_norm": 0.4628996253013611, "learning_rate": 1.5104878048780488e-05, "loss": 0.0551, "step": 60210 }, { "epoch": 0.734329268292683, "grad_norm": 0.4305439591407776, "learning_rate": 1.5104471544715448e-05, "loss": 0.0335, "step": 60215 }, { "epoch": 0.734390243902439, "grad_norm": 0.7398480176925659, "learning_rate": 1.5104065040650408e-05, "loss": 0.0857, "step": 60220 }, { "epoch": 0.7344512195121952, "grad_norm": 0.8283888697624207, "learning_rate": 1.5103658536585366e-05, "loss": 0.0454, "step": 60225 }, { "epoch": 0.7345121951219512, "grad_norm": 0.6445744037628174, "learning_rate": 1.5103252032520326e-05, "loss": 0.0741, "step": 60230 }, { "epoch": 0.7345731707317074, "grad_norm": 1.4792978763580322, "learning_rate": 1.5102845528455286e-05, "loss": 0.0771, "step": 60235 }, { "epoch": 0.7346341463414634, "grad_norm": 0.4059527516365051, "learning_rate": 1.5102439024390246e-05, "loss": 0.0305, "step": 60240 }, { "epoch": 0.7346951219512196, "grad_norm": 0.3277018666267395, "learning_rate": 1.5102032520325205e-05, "loss": 0.062, "step": 60245 }, { "epoch": 0.7347560975609756, "grad_norm": 0.34562110900878906, "learning_rate": 1.5101626016260165e-05, "loss": 0.0524, "step": 60250 }, { "epoch": 0.7348170731707317, "grad_norm": 0.4166242778301239, "learning_rate": 1.5101219512195122e-05, "loss": 0.0394, "step": 60255 }, { "epoch": 0.7348780487804878, "grad_norm": 0.7022594809532166, "learning_rate": 1.5100813008130081e-05, "loss": 0.0526, "step": 60260 }, { "epoch": 0.734939024390244, "grad_norm": 0.31841036677360535, "learning_rate": 1.5100406504065041e-05, "loss": 0.0541, "step": 60265 }, { "epoch": 0.735, "grad_norm": 0.3790665864944458, "learning_rate": 1.5100000000000001e-05, "loss": 0.042, "step": 60270 }, { "epoch": 0.7350609756097561, "grad_norm": 0.6284255981445312, "learning_rate": 1.5099593495934961e-05, "loss": 0.0459, "step": 60275 }, { "epoch": 0.7351219512195122, "grad_norm": 0.07711119949817657, "learning_rate": 1.509918699186992e-05, "loss": 0.0358, "step": 60280 }, { "epoch": 0.7351829268292683, "grad_norm": 0.5362619757652283, "learning_rate": 1.5098780487804879e-05, "loss": 0.0607, "step": 60285 }, { "epoch": 0.7352439024390244, "grad_norm": 0.9438992738723755, "learning_rate": 1.5098373983739839e-05, "loss": 0.0726, "step": 60290 }, { "epoch": 0.7353048780487805, "grad_norm": 0.8365448713302612, "learning_rate": 1.5097967479674799e-05, "loss": 0.0582, "step": 60295 }, { "epoch": 0.7353658536585366, "grad_norm": 1.1014848947525024, "learning_rate": 1.5097560975609757e-05, "loss": 0.0545, "step": 60300 }, { "epoch": 0.7354268292682927, "grad_norm": 0.75492924451828, "learning_rate": 1.5097154471544716e-05, "loss": 0.0584, "step": 60305 }, { "epoch": 0.7354878048780488, "grad_norm": 0.44024476408958435, "learning_rate": 1.5096747967479676e-05, "loss": 0.0409, "step": 60310 }, { "epoch": 0.7355487804878049, "grad_norm": 0.2519010603427887, "learning_rate": 1.5096341463414634e-05, "loss": 0.0662, "step": 60315 }, { "epoch": 0.735609756097561, "grad_norm": 0.6579095721244812, "learning_rate": 1.5095934959349594e-05, "loss": 0.0627, "step": 60320 }, { "epoch": 0.7356707317073171, "grad_norm": 0.6579564809799194, "learning_rate": 1.5095528455284554e-05, "loss": 0.0372, "step": 60325 }, { "epoch": 0.7357317073170732, "grad_norm": 0.5971996784210205, "learning_rate": 1.5095121951219514e-05, "loss": 0.043, "step": 60330 }, { "epoch": 0.7357926829268293, "grad_norm": 0.530009388923645, "learning_rate": 1.5094715447154474e-05, "loss": 0.0577, "step": 60335 }, { "epoch": 0.7358536585365854, "grad_norm": 0.8557780385017395, "learning_rate": 1.5094308943089434e-05, "loss": 0.0435, "step": 60340 }, { "epoch": 0.7359146341463415, "grad_norm": 0.9487393498420715, "learning_rate": 1.509390243902439e-05, "loss": 0.054, "step": 60345 }, { "epoch": 0.7359756097560975, "grad_norm": 0.4564712345600128, "learning_rate": 1.509349593495935e-05, "loss": 0.0545, "step": 60350 }, { "epoch": 0.7360365853658537, "grad_norm": 0.6712835431098938, "learning_rate": 1.509308943089431e-05, "loss": 0.0433, "step": 60355 }, { "epoch": 0.7360975609756097, "grad_norm": 0.7521679997444153, "learning_rate": 1.509268292682927e-05, "loss": 0.0534, "step": 60360 }, { "epoch": 0.7361585365853659, "grad_norm": 0.09317880868911743, "learning_rate": 1.509227642276423e-05, "loss": 0.0935, "step": 60365 }, { "epoch": 0.7362195121951219, "grad_norm": 1.0111435651779175, "learning_rate": 1.5091869918699189e-05, "loss": 0.0805, "step": 60370 }, { "epoch": 0.7362804878048781, "grad_norm": 1.6329275369644165, "learning_rate": 1.5091463414634147e-05, "loss": 0.0542, "step": 60375 }, { "epoch": 0.7363414634146341, "grad_norm": 0.41992640495300293, "learning_rate": 1.5091056910569107e-05, "loss": 0.045, "step": 60380 }, { "epoch": 0.7364024390243903, "grad_norm": 3.4703118801116943, "learning_rate": 1.5090650406504065e-05, "loss": 0.0447, "step": 60385 }, { "epoch": 0.7364634146341463, "grad_norm": 0.43096768856048584, "learning_rate": 1.5090243902439025e-05, "loss": 0.0463, "step": 60390 }, { "epoch": 0.7365243902439025, "grad_norm": 0.4099157154560089, "learning_rate": 1.5089837398373985e-05, "loss": 0.0367, "step": 60395 }, { "epoch": 0.7365853658536585, "grad_norm": 0.4659578502178192, "learning_rate": 1.5089430894308945e-05, "loss": 0.0238, "step": 60400 }, { "epoch": 0.7366463414634147, "grad_norm": 0.4675697982311249, "learning_rate": 1.5089024390243903e-05, "loss": 0.0444, "step": 60405 }, { "epoch": 0.7367073170731707, "grad_norm": 0.3419570028781891, "learning_rate": 1.5088617886178863e-05, "loss": 0.0773, "step": 60410 }, { "epoch": 0.7367682926829269, "grad_norm": 0.7248333096504211, "learning_rate": 1.5088211382113822e-05, "loss": 0.0502, "step": 60415 }, { "epoch": 0.7368292682926829, "grad_norm": 0.5007110834121704, "learning_rate": 1.5087804878048782e-05, "loss": 0.0794, "step": 60420 }, { "epoch": 0.7368902439024391, "grad_norm": 0.6141241788864136, "learning_rate": 1.5087398373983742e-05, "loss": 0.0444, "step": 60425 }, { "epoch": 0.7369512195121951, "grad_norm": 0.4316711723804474, "learning_rate": 1.5086991869918702e-05, "loss": 0.0322, "step": 60430 }, { "epoch": 0.7370121951219513, "grad_norm": 0.594980776309967, "learning_rate": 1.5086585365853658e-05, "loss": 0.0388, "step": 60435 }, { "epoch": 0.7370731707317073, "grad_norm": 0.29356321692466736, "learning_rate": 1.5086178861788618e-05, "loss": 0.0334, "step": 60440 }, { "epoch": 0.7371341463414635, "grad_norm": 0.8240951299667358, "learning_rate": 1.5085772357723578e-05, "loss": 0.0664, "step": 60445 }, { "epoch": 0.7371951219512195, "grad_norm": 0.6292615532875061, "learning_rate": 1.5085365853658538e-05, "loss": 0.0651, "step": 60450 }, { "epoch": 0.7372560975609757, "grad_norm": 0.7521641254425049, "learning_rate": 1.5084959349593498e-05, "loss": 0.0706, "step": 60455 }, { "epoch": 0.7373170731707317, "grad_norm": 0.4998258352279663, "learning_rate": 1.5084552845528457e-05, "loss": 0.0382, "step": 60460 }, { "epoch": 0.7373780487804878, "grad_norm": 1.171341061592102, "learning_rate": 1.5084146341463416e-05, "loss": 0.0523, "step": 60465 }, { "epoch": 0.7374390243902439, "grad_norm": 0.4768943190574646, "learning_rate": 1.5083739837398375e-05, "loss": 0.0679, "step": 60470 }, { "epoch": 0.7375, "grad_norm": 1.2575918436050415, "learning_rate": 1.5083333333333333e-05, "loss": 0.0686, "step": 60475 }, { "epoch": 0.7375609756097561, "grad_norm": 0.48276859521865845, "learning_rate": 1.5082926829268293e-05, "loss": 0.0664, "step": 60480 }, { "epoch": 0.7376219512195122, "grad_norm": 0.7161464691162109, "learning_rate": 1.5082520325203253e-05, "loss": 0.0539, "step": 60485 }, { "epoch": 0.7376829268292683, "grad_norm": 0.5775840282440186, "learning_rate": 1.5082113821138213e-05, "loss": 0.0441, "step": 60490 }, { "epoch": 0.7377439024390244, "grad_norm": 4.89200496673584, "learning_rate": 1.5081707317073171e-05, "loss": 0.0799, "step": 60495 }, { "epoch": 0.7378048780487805, "grad_norm": 0.8107233643531799, "learning_rate": 1.5081300813008131e-05, "loss": 0.0497, "step": 60500 }, { "epoch": 0.7378658536585366, "grad_norm": 0.4913540780544281, "learning_rate": 1.508089430894309e-05, "loss": 0.0239, "step": 60505 }, { "epoch": 0.7379268292682927, "grad_norm": 0.5561004877090454, "learning_rate": 1.508048780487805e-05, "loss": 0.0307, "step": 60510 }, { "epoch": 0.7379878048780488, "grad_norm": 0.7255659103393555, "learning_rate": 1.508008130081301e-05, "loss": 0.0513, "step": 60515 }, { "epoch": 0.7380487804878049, "grad_norm": 0.7687853574752808, "learning_rate": 1.507967479674797e-05, "loss": 0.0625, "step": 60520 }, { "epoch": 0.738109756097561, "grad_norm": 0.39013150334358215, "learning_rate": 1.5079268292682927e-05, "loss": 0.0366, "step": 60525 }, { "epoch": 0.7381707317073171, "grad_norm": 0.9404376149177551, "learning_rate": 1.5078861788617886e-05, "loss": 0.0298, "step": 60530 }, { "epoch": 0.7382317073170732, "grad_norm": 1.2575851678848267, "learning_rate": 1.5078455284552846e-05, "loss": 0.0895, "step": 60535 }, { "epoch": 0.7382926829268293, "grad_norm": 1.636404037475586, "learning_rate": 1.5078048780487806e-05, "loss": 0.0464, "step": 60540 }, { "epoch": 0.7383536585365854, "grad_norm": 1.7834051847457886, "learning_rate": 1.5077642276422766e-05, "loss": 0.0607, "step": 60545 }, { "epoch": 0.7384146341463415, "grad_norm": 0.4413072466850281, "learning_rate": 1.5077235772357726e-05, "loss": 0.0608, "step": 60550 }, { "epoch": 0.7384756097560976, "grad_norm": 0.2564527988433838, "learning_rate": 1.5076829268292684e-05, "loss": 0.0403, "step": 60555 }, { "epoch": 0.7385365853658536, "grad_norm": 0.7583011984825134, "learning_rate": 1.5076422764227644e-05, "loss": 0.0408, "step": 60560 }, { "epoch": 0.7385975609756098, "grad_norm": 0.6242010593414307, "learning_rate": 1.5076016260162602e-05, "loss": 0.0843, "step": 60565 }, { "epoch": 0.7386585365853658, "grad_norm": 0.5318142771720886, "learning_rate": 1.5075609756097562e-05, "loss": 0.0436, "step": 60570 }, { "epoch": 0.738719512195122, "grad_norm": 0.7112646698951721, "learning_rate": 1.5075203252032521e-05, "loss": 0.0464, "step": 60575 }, { "epoch": 0.738780487804878, "grad_norm": 0.6730861067771912, "learning_rate": 1.5074796747967481e-05, "loss": 0.0426, "step": 60580 }, { "epoch": 0.7388414634146342, "grad_norm": 2.316169023513794, "learning_rate": 1.507439024390244e-05, "loss": 0.0628, "step": 60585 }, { "epoch": 0.7389024390243902, "grad_norm": 0.4996494948863983, "learning_rate": 1.50739837398374e-05, "loss": 0.0969, "step": 60590 }, { "epoch": 0.7389634146341464, "grad_norm": 0.4273131787776947, "learning_rate": 1.5073577235772359e-05, "loss": 0.0428, "step": 60595 }, { "epoch": 0.7390243902439024, "grad_norm": 1.223828673362732, "learning_rate": 1.5073170731707319e-05, "loss": 0.0719, "step": 60600 }, { "epoch": 0.7390853658536586, "grad_norm": 0.630829393863678, "learning_rate": 1.5072764227642279e-05, "loss": 0.0649, "step": 60605 }, { "epoch": 0.7391463414634146, "grad_norm": 0.6799604892730713, "learning_rate": 1.5072357723577239e-05, "loss": 0.0391, "step": 60610 }, { "epoch": 0.7392073170731708, "grad_norm": 0.3926233947277069, "learning_rate": 1.5071951219512195e-05, "loss": 0.0347, "step": 60615 }, { "epoch": 0.7392682926829268, "grad_norm": 0.3844188451766968, "learning_rate": 1.5071544715447155e-05, "loss": 0.029, "step": 60620 }, { "epoch": 0.739329268292683, "grad_norm": 0.9268214106559753, "learning_rate": 1.5071138211382115e-05, "loss": 0.0567, "step": 60625 }, { "epoch": 0.739390243902439, "grad_norm": 0.3962133526802063, "learning_rate": 1.5070731707317074e-05, "loss": 0.0291, "step": 60630 }, { "epoch": 0.7394512195121952, "grad_norm": 0.42182740569114685, "learning_rate": 1.5070325203252034e-05, "loss": 0.0581, "step": 60635 }, { "epoch": 0.7395121951219512, "grad_norm": 0.5051921010017395, "learning_rate": 1.5069918699186994e-05, "loss": 0.0652, "step": 60640 }, { "epoch": 0.7395731707317074, "grad_norm": 0.705308735370636, "learning_rate": 1.5069512195121952e-05, "loss": 0.0318, "step": 60645 }, { "epoch": 0.7396341463414634, "grad_norm": 0.5815883278846741, "learning_rate": 1.506910569105691e-05, "loss": 0.0628, "step": 60650 }, { "epoch": 0.7396951219512196, "grad_norm": 0.7257498502731323, "learning_rate": 1.506869918699187e-05, "loss": 0.0426, "step": 60655 }, { "epoch": 0.7397560975609756, "grad_norm": 0.6172401905059814, "learning_rate": 1.506829268292683e-05, "loss": 0.0277, "step": 60660 }, { "epoch": 0.7398170731707318, "grad_norm": 1.6933666467666626, "learning_rate": 1.506788617886179e-05, "loss": 0.05, "step": 60665 }, { "epoch": 0.7398780487804878, "grad_norm": 0.7172496914863586, "learning_rate": 1.506747967479675e-05, "loss": 0.0386, "step": 60670 }, { "epoch": 0.739939024390244, "grad_norm": 0.7058910727500916, "learning_rate": 1.5067073170731708e-05, "loss": 0.0627, "step": 60675 }, { "epoch": 0.74, "grad_norm": 0.8793541193008423, "learning_rate": 1.5066666666666668e-05, "loss": 0.0348, "step": 60680 }, { "epoch": 0.7400609756097561, "grad_norm": 0.5036634802818298, "learning_rate": 1.5066260162601627e-05, "loss": 0.0527, "step": 60685 }, { "epoch": 0.7401219512195122, "grad_norm": 1.070233702659607, "learning_rate": 1.5065853658536587e-05, "loss": 0.0465, "step": 60690 }, { "epoch": 0.7401829268292683, "grad_norm": 0.477660596370697, "learning_rate": 1.5065447154471547e-05, "loss": 0.0519, "step": 60695 }, { "epoch": 0.7402439024390244, "grad_norm": 0.7138940691947937, "learning_rate": 1.5065040650406507e-05, "loss": 0.0473, "step": 60700 }, { "epoch": 0.7403048780487805, "grad_norm": 0.45628076791763306, "learning_rate": 1.5064634146341463e-05, "loss": 0.0366, "step": 60705 }, { "epoch": 0.7403658536585366, "grad_norm": 1.1368870735168457, "learning_rate": 1.5064227642276423e-05, "loss": 0.0425, "step": 60710 }, { "epoch": 0.7404268292682927, "grad_norm": 0.5045281052589417, "learning_rate": 1.5063821138211383e-05, "loss": 0.0349, "step": 60715 }, { "epoch": 0.7404878048780488, "grad_norm": 0.853219211101532, "learning_rate": 1.5063414634146343e-05, "loss": 0.0536, "step": 60720 }, { "epoch": 0.7405487804878049, "grad_norm": 0.4945669174194336, "learning_rate": 1.5063008130081303e-05, "loss": 0.0286, "step": 60725 }, { "epoch": 0.740609756097561, "grad_norm": 0.5396649241447449, "learning_rate": 1.5062601626016262e-05, "loss": 0.0474, "step": 60730 }, { "epoch": 0.7406707317073171, "grad_norm": 0.861651599407196, "learning_rate": 1.506219512195122e-05, "loss": 0.0546, "step": 60735 }, { "epoch": 0.7407317073170732, "grad_norm": 0.7005044221878052, "learning_rate": 1.5061788617886179e-05, "loss": 0.0438, "step": 60740 }, { "epoch": 0.7407926829268293, "grad_norm": 1.2766687870025635, "learning_rate": 1.5061382113821138e-05, "loss": 0.0315, "step": 60745 }, { "epoch": 0.7408536585365854, "grad_norm": 3.17665433883667, "learning_rate": 1.5060975609756098e-05, "loss": 0.0658, "step": 60750 }, { "epoch": 0.7409146341463415, "grad_norm": 0.6512954235076904, "learning_rate": 1.5060569105691058e-05, "loss": 0.0649, "step": 60755 }, { "epoch": 0.7409756097560976, "grad_norm": 0.5200318098068237, "learning_rate": 1.5060162601626018e-05, "loss": 0.057, "step": 60760 }, { "epoch": 0.7410365853658537, "grad_norm": 0.4116164743900299, "learning_rate": 1.5059756097560976e-05, "loss": 0.0309, "step": 60765 }, { "epoch": 0.7410975609756097, "grad_norm": 0.9063881635665894, "learning_rate": 1.5059349593495936e-05, "loss": 0.0552, "step": 60770 }, { "epoch": 0.7411585365853659, "grad_norm": 1.00752592086792, "learning_rate": 1.5058943089430896e-05, "loss": 0.0679, "step": 60775 }, { "epoch": 0.7412195121951219, "grad_norm": 0.6764259338378906, "learning_rate": 1.5058536585365856e-05, "loss": 0.0697, "step": 60780 }, { "epoch": 0.7412804878048781, "grad_norm": 0.3551720380783081, "learning_rate": 1.5058130081300815e-05, "loss": 0.0383, "step": 60785 }, { "epoch": 0.7413414634146341, "grad_norm": 0.3578733205795288, "learning_rate": 1.5057723577235775e-05, "loss": 0.0367, "step": 60790 }, { "epoch": 0.7414024390243903, "grad_norm": 0.6361296772956848, "learning_rate": 1.5057317073170732e-05, "loss": 0.0587, "step": 60795 }, { "epoch": 0.7414634146341463, "grad_norm": 0.5504994988441467, "learning_rate": 1.5056910569105691e-05, "loss": 0.0462, "step": 60800 }, { "epoch": 0.7415243902439025, "grad_norm": 0.21117274463176727, "learning_rate": 1.5056504065040651e-05, "loss": 0.0321, "step": 60805 }, { "epoch": 0.7415853658536585, "grad_norm": 0.609859049320221, "learning_rate": 1.5056097560975611e-05, "loss": 0.0376, "step": 60810 }, { "epoch": 0.7416463414634147, "grad_norm": 0.8246015906333923, "learning_rate": 1.5055691056910571e-05, "loss": 0.0575, "step": 60815 }, { "epoch": 0.7417073170731707, "grad_norm": 0.27862855792045593, "learning_rate": 1.505528455284553e-05, "loss": 0.0492, "step": 60820 }, { "epoch": 0.7417682926829269, "grad_norm": 0.33982619643211365, "learning_rate": 1.5054878048780489e-05, "loss": 0.037, "step": 60825 }, { "epoch": 0.7418292682926829, "grad_norm": 0.9746114611625671, "learning_rate": 1.5054471544715447e-05, "loss": 0.0792, "step": 60830 }, { "epoch": 0.7418902439024391, "grad_norm": 1.2165322303771973, "learning_rate": 1.5054065040650407e-05, "loss": 0.0981, "step": 60835 }, { "epoch": 0.7419512195121951, "grad_norm": 0.6340640187263489, "learning_rate": 1.5053658536585367e-05, "loss": 0.0542, "step": 60840 }, { "epoch": 0.7420121951219513, "grad_norm": 0.30515211820602417, "learning_rate": 1.5053252032520326e-05, "loss": 0.024, "step": 60845 }, { "epoch": 0.7420731707317073, "grad_norm": 0.4655514359474182, "learning_rate": 1.5052845528455286e-05, "loss": 0.0393, "step": 60850 }, { "epoch": 0.7421341463414635, "grad_norm": 2.875760793685913, "learning_rate": 1.5052439024390244e-05, "loss": 0.0767, "step": 60855 }, { "epoch": 0.7421951219512195, "grad_norm": 2.9487597942352295, "learning_rate": 1.5052032520325204e-05, "loss": 0.0683, "step": 60860 }, { "epoch": 0.7422560975609757, "grad_norm": 0.7067957520484924, "learning_rate": 1.5051626016260164e-05, "loss": 0.0542, "step": 60865 }, { "epoch": 0.7423170731707317, "grad_norm": 0.45764628052711487, "learning_rate": 1.5051219512195124e-05, "loss": 0.056, "step": 60870 }, { "epoch": 0.7423780487804879, "grad_norm": 0.588446855545044, "learning_rate": 1.5050813008130084e-05, "loss": 0.0302, "step": 60875 }, { "epoch": 0.7424390243902439, "grad_norm": 0.34325316548347473, "learning_rate": 1.5050406504065043e-05, "loss": 0.0645, "step": 60880 }, { "epoch": 0.7425, "grad_norm": 0.46953481435775757, "learning_rate": 1.505e-05, "loss": 0.0349, "step": 60885 }, { "epoch": 0.7425609756097561, "grad_norm": 0.6272725462913513, "learning_rate": 1.504959349593496e-05, "loss": 0.0405, "step": 60890 }, { "epoch": 0.7426219512195122, "grad_norm": 0.6453782320022583, "learning_rate": 1.504918699186992e-05, "loss": 0.039, "step": 60895 }, { "epoch": 0.7426829268292683, "grad_norm": 0.5405220985412598, "learning_rate": 1.504878048780488e-05, "loss": 0.0242, "step": 60900 }, { "epoch": 0.7427439024390244, "grad_norm": 0.5660209655761719, "learning_rate": 1.504837398373984e-05, "loss": 0.0591, "step": 60905 }, { "epoch": 0.7428048780487805, "grad_norm": 0.7163698077201843, "learning_rate": 1.5047967479674799e-05, "loss": 0.0431, "step": 60910 }, { "epoch": 0.7428658536585366, "grad_norm": 0.8116719126701355, "learning_rate": 1.5047560975609755e-05, "loss": 0.0401, "step": 60915 }, { "epoch": 0.7429268292682927, "grad_norm": 0.5284734964370728, "learning_rate": 1.5047154471544715e-05, "loss": 0.052, "step": 60920 }, { "epoch": 0.7429878048780488, "grad_norm": 0.3389112055301666, "learning_rate": 1.5046747967479675e-05, "loss": 0.0328, "step": 60925 }, { "epoch": 0.7430487804878049, "grad_norm": 0.4750974476337433, "learning_rate": 1.5046341463414635e-05, "loss": 0.0555, "step": 60930 }, { "epoch": 0.743109756097561, "grad_norm": 0.6218082904815674, "learning_rate": 1.5045934959349595e-05, "loss": 0.0828, "step": 60935 }, { "epoch": 0.7431707317073171, "grad_norm": 0.4841158092021942, "learning_rate": 1.5045528455284555e-05, "loss": 0.0328, "step": 60940 }, { "epoch": 0.7432317073170732, "grad_norm": 1.030957579612732, "learning_rate": 1.5045121951219513e-05, "loss": 0.0494, "step": 60945 }, { "epoch": 0.7432926829268293, "grad_norm": 0.9461770057678223, "learning_rate": 1.5044715447154473e-05, "loss": 0.044, "step": 60950 }, { "epoch": 0.7433536585365854, "grad_norm": 0.7062508463859558, "learning_rate": 1.5044308943089432e-05, "loss": 0.0426, "step": 60955 }, { "epoch": 0.7434146341463415, "grad_norm": 0.6050611138343811, "learning_rate": 1.5043902439024392e-05, "loss": 0.0334, "step": 60960 }, { "epoch": 0.7434756097560976, "grad_norm": 0.6565405130386353, "learning_rate": 1.5043495934959352e-05, "loss": 0.0434, "step": 60965 }, { "epoch": 0.7435365853658537, "grad_norm": 0.4618701934814453, "learning_rate": 1.5043089430894312e-05, "loss": 0.0264, "step": 60970 }, { "epoch": 0.7435975609756098, "grad_norm": 1.0438865423202515, "learning_rate": 1.5042682926829268e-05, "loss": 0.0662, "step": 60975 }, { "epoch": 0.7436585365853658, "grad_norm": 0.8301246762275696, "learning_rate": 1.5042276422764228e-05, "loss": 0.0791, "step": 60980 }, { "epoch": 0.743719512195122, "grad_norm": 0.6442254185676575, "learning_rate": 1.5041869918699188e-05, "loss": 0.0198, "step": 60985 }, { "epoch": 0.743780487804878, "grad_norm": 0.5085339546203613, "learning_rate": 1.5041463414634148e-05, "loss": 0.0423, "step": 60990 }, { "epoch": 0.7438414634146342, "grad_norm": 0.44331666827201843, "learning_rate": 1.5041056910569108e-05, "loss": 0.0398, "step": 60995 }, { "epoch": 0.7439024390243902, "grad_norm": 0.41350412368774414, "learning_rate": 1.5040650406504067e-05, "loss": 0.0441, "step": 61000 }, { "epoch": 0.7439634146341464, "grad_norm": 0.905491828918457, "learning_rate": 1.5040243902439024e-05, "loss": 0.0633, "step": 61005 }, { "epoch": 0.7440243902439024, "grad_norm": 0.4515131711959839, "learning_rate": 1.5039837398373984e-05, "loss": 0.0291, "step": 61010 }, { "epoch": 0.7440853658536586, "grad_norm": 0.4909326136112213, "learning_rate": 1.5039430894308943e-05, "loss": 0.0364, "step": 61015 }, { "epoch": 0.7441463414634146, "grad_norm": 0.39724284410476685, "learning_rate": 1.5039024390243903e-05, "loss": 0.035, "step": 61020 }, { "epoch": 0.7442073170731708, "grad_norm": 0.40510258078575134, "learning_rate": 1.5038617886178863e-05, "loss": 0.0477, "step": 61025 }, { "epoch": 0.7442682926829268, "grad_norm": 0.5130869150161743, "learning_rate": 1.5038211382113823e-05, "loss": 0.0387, "step": 61030 }, { "epoch": 0.744329268292683, "grad_norm": 0.7884624004364014, "learning_rate": 1.5037804878048781e-05, "loss": 0.0519, "step": 61035 }, { "epoch": 0.744390243902439, "grad_norm": 0.7462712526321411, "learning_rate": 1.503739837398374e-05, "loss": 0.0367, "step": 61040 }, { "epoch": 0.7444512195121952, "grad_norm": 0.5917069315910339, "learning_rate": 1.50369918699187e-05, "loss": 0.0598, "step": 61045 }, { "epoch": 0.7445121951219512, "grad_norm": 0.40276336669921875, "learning_rate": 1.503658536585366e-05, "loss": 0.0506, "step": 61050 }, { "epoch": 0.7445731707317074, "grad_norm": 1.3042205572128296, "learning_rate": 1.503617886178862e-05, "loss": 0.0485, "step": 61055 }, { "epoch": 0.7446341463414634, "grad_norm": 0.4406975507736206, "learning_rate": 1.5035772357723578e-05, "loss": 0.0619, "step": 61060 }, { "epoch": 0.7446951219512196, "grad_norm": 0.9368277192115784, "learning_rate": 1.5035365853658537e-05, "loss": 0.044, "step": 61065 }, { "epoch": 0.7447560975609756, "grad_norm": 0.31402820348739624, "learning_rate": 1.5034959349593496e-05, "loss": 0.0501, "step": 61070 }, { "epoch": 0.7448170731707318, "grad_norm": 0.44748619198799133, "learning_rate": 1.5034552845528456e-05, "loss": 0.0397, "step": 61075 }, { "epoch": 0.7448780487804878, "grad_norm": 0.47109556198120117, "learning_rate": 1.5034146341463416e-05, "loss": 0.0451, "step": 61080 }, { "epoch": 0.744939024390244, "grad_norm": 0.3913363516330719, "learning_rate": 1.5033739837398376e-05, "loss": 0.045, "step": 61085 }, { "epoch": 0.745, "grad_norm": 0.5347936749458313, "learning_rate": 1.5033333333333336e-05, "loss": 0.0314, "step": 61090 }, { "epoch": 0.7450609756097561, "grad_norm": 0.6488891839981079, "learning_rate": 1.5032926829268292e-05, "loss": 0.0423, "step": 61095 }, { "epoch": 0.7451219512195122, "grad_norm": 0.9573101997375488, "learning_rate": 1.5032520325203252e-05, "loss": 0.0616, "step": 61100 }, { "epoch": 0.7451829268292683, "grad_norm": 0.7263631224632263, "learning_rate": 1.5032113821138212e-05, "loss": 0.0645, "step": 61105 }, { "epoch": 0.7452439024390244, "grad_norm": 1.1807466745376587, "learning_rate": 1.5031707317073172e-05, "loss": 0.0483, "step": 61110 }, { "epoch": 0.7453048780487805, "grad_norm": 0.6058799624443054, "learning_rate": 1.5031300813008131e-05, "loss": 0.0534, "step": 61115 }, { "epoch": 0.7453658536585366, "grad_norm": 0.5904449820518494, "learning_rate": 1.5030894308943091e-05, "loss": 0.0698, "step": 61120 }, { "epoch": 0.7454268292682927, "grad_norm": 1.272606611251831, "learning_rate": 1.503048780487805e-05, "loss": 0.0516, "step": 61125 }, { "epoch": 0.7454878048780488, "grad_norm": 0.39469456672668457, "learning_rate": 1.5030081300813009e-05, "loss": 0.0474, "step": 61130 }, { "epoch": 0.7455487804878049, "grad_norm": 0.7920179963111877, "learning_rate": 1.5029674796747969e-05, "loss": 0.0544, "step": 61135 }, { "epoch": 0.745609756097561, "grad_norm": 0.7364455461502075, "learning_rate": 1.5029268292682929e-05, "loss": 0.0367, "step": 61140 }, { "epoch": 0.7456707317073171, "grad_norm": 0.6182591915130615, "learning_rate": 1.5028861788617889e-05, "loss": 0.0479, "step": 61145 }, { "epoch": 0.7457317073170732, "grad_norm": 0.8994031548500061, "learning_rate": 1.5028455284552847e-05, "loss": 0.0292, "step": 61150 }, { "epoch": 0.7457926829268293, "grad_norm": 0.4316498339176178, "learning_rate": 1.5028048780487805e-05, "loss": 0.0774, "step": 61155 }, { "epoch": 0.7458536585365854, "grad_norm": 0.34289655089378357, "learning_rate": 1.5027642276422765e-05, "loss": 0.0356, "step": 61160 }, { "epoch": 0.7459146341463415, "grad_norm": 0.5936660170555115, "learning_rate": 1.5027235772357725e-05, "loss": 0.1327, "step": 61165 }, { "epoch": 0.7459756097560976, "grad_norm": 0.9419959783554077, "learning_rate": 1.5026829268292684e-05, "loss": 0.0755, "step": 61170 }, { "epoch": 0.7460365853658537, "grad_norm": 0.5117760300636292, "learning_rate": 1.5026422764227644e-05, "loss": 0.0369, "step": 61175 }, { "epoch": 0.7460975609756098, "grad_norm": 0.9195433855056763, "learning_rate": 1.5026016260162604e-05, "loss": 0.098, "step": 61180 }, { "epoch": 0.7461585365853659, "grad_norm": 0.36066263914108276, "learning_rate": 1.502560975609756e-05, "loss": 0.029, "step": 61185 }, { "epoch": 0.746219512195122, "grad_norm": 0.6469669938087463, "learning_rate": 1.502520325203252e-05, "loss": 0.0695, "step": 61190 }, { "epoch": 0.7462804878048781, "grad_norm": 0.7689950466156006, "learning_rate": 1.502479674796748e-05, "loss": 0.0538, "step": 61195 }, { "epoch": 0.7463414634146341, "grad_norm": 0.6739314198493958, "learning_rate": 1.502439024390244e-05, "loss": 0.0526, "step": 61200 }, { "epoch": 0.7464024390243903, "grad_norm": 0.7494024634361267, "learning_rate": 1.50239837398374e-05, "loss": 0.0395, "step": 61205 }, { "epoch": 0.7464634146341463, "grad_norm": 0.6741158366203308, "learning_rate": 1.502357723577236e-05, "loss": 0.0406, "step": 61210 }, { "epoch": 0.7465243902439025, "grad_norm": 0.6708456873893738, "learning_rate": 1.5023170731707318e-05, "loss": 0.0402, "step": 61215 }, { "epoch": 0.7465853658536585, "grad_norm": 0.7191471457481384, "learning_rate": 1.5022764227642277e-05, "loss": 0.0619, "step": 61220 }, { "epoch": 0.7466463414634147, "grad_norm": 0.8214389681816101, "learning_rate": 1.5022357723577237e-05, "loss": 0.0622, "step": 61225 }, { "epoch": 0.7467073170731707, "grad_norm": 0.6060205698013306, "learning_rate": 1.5021951219512197e-05, "loss": 0.0417, "step": 61230 }, { "epoch": 0.7467682926829269, "grad_norm": 0.5293692946434021, "learning_rate": 1.5021544715447157e-05, "loss": 0.0784, "step": 61235 }, { "epoch": 0.7468292682926829, "grad_norm": 0.643653154373169, "learning_rate": 1.5021138211382115e-05, "loss": 0.0531, "step": 61240 }, { "epoch": 0.7468902439024391, "grad_norm": 0.20701053738594055, "learning_rate": 1.5020731707317073e-05, "loss": 0.0364, "step": 61245 }, { "epoch": 0.7469512195121951, "grad_norm": 0.5660070776939392, "learning_rate": 1.5020325203252033e-05, "loss": 0.0616, "step": 61250 }, { "epoch": 0.7470121951219513, "grad_norm": 0.582302451133728, "learning_rate": 1.5019918699186993e-05, "loss": 0.053, "step": 61255 }, { "epoch": 0.7470731707317073, "grad_norm": 0.5794059038162231, "learning_rate": 1.5019512195121953e-05, "loss": 0.0785, "step": 61260 }, { "epoch": 0.7471341463414635, "grad_norm": 0.5554816722869873, "learning_rate": 1.5019105691056912e-05, "loss": 0.0524, "step": 61265 }, { "epoch": 0.7471951219512195, "grad_norm": 0.26600152254104614, "learning_rate": 1.5018699186991872e-05, "loss": 0.0226, "step": 61270 }, { "epoch": 0.7472560975609757, "grad_norm": 0.554753303527832, "learning_rate": 1.5018292682926829e-05, "loss": 0.0406, "step": 61275 }, { "epoch": 0.7473170731707317, "grad_norm": 0.552532970905304, "learning_rate": 1.5017886178861789e-05, "loss": 0.0835, "step": 61280 }, { "epoch": 0.7473780487804879, "grad_norm": 0.9798924326896667, "learning_rate": 1.5017479674796748e-05, "loss": 0.0315, "step": 61285 }, { "epoch": 0.7474390243902439, "grad_norm": 0.6129953861236572, "learning_rate": 1.5017073170731708e-05, "loss": 0.0628, "step": 61290 }, { "epoch": 0.7475, "grad_norm": 0.5239832401275635, "learning_rate": 1.5016666666666668e-05, "loss": 0.0333, "step": 61295 }, { "epoch": 0.7475609756097561, "grad_norm": 1.0606894493103027, "learning_rate": 1.5016260162601628e-05, "loss": 0.0679, "step": 61300 }, { "epoch": 0.7476219512195122, "grad_norm": 0.47593459486961365, "learning_rate": 1.5015853658536586e-05, "loss": 0.0739, "step": 61305 }, { "epoch": 0.7476829268292683, "grad_norm": 0.8766483664512634, "learning_rate": 1.5015447154471546e-05, "loss": 0.0638, "step": 61310 }, { "epoch": 0.7477439024390244, "grad_norm": 0.6252970695495605, "learning_rate": 1.5015040650406506e-05, "loss": 0.0435, "step": 61315 }, { "epoch": 0.7478048780487805, "grad_norm": 0.42174315452575684, "learning_rate": 1.5014634146341465e-05, "loss": 0.066, "step": 61320 }, { "epoch": 0.7478658536585366, "grad_norm": 0.22066666185855865, "learning_rate": 1.5014227642276424e-05, "loss": 0.0331, "step": 61325 }, { "epoch": 0.7479268292682927, "grad_norm": 0.7060166597366333, "learning_rate": 1.5013821138211383e-05, "loss": 0.0896, "step": 61330 }, { "epoch": 0.7479878048780488, "grad_norm": 1.9243525266647339, "learning_rate": 1.5013414634146342e-05, "loss": 0.0383, "step": 61335 }, { "epoch": 0.7480487804878049, "grad_norm": 0.7037860155105591, "learning_rate": 1.5013008130081301e-05, "loss": 0.0763, "step": 61340 }, { "epoch": 0.748109756097561, "grad_norm": 0.35192927718162537, "learning_rate": 1.5012601626016261e-05, "loss": 0.0302, "step": 61345 }, { "epoch": 0.7481707317073171, "grad_norm": 0.7376490831375122, "learning_rate": 1.5012195121951221e-05, "loss": 0.0527, "step": 61350 }, { "epoch": 0.7482317073170732, "grad_norm": 0.4023764431476593, "learning_rate": 1.501178861788618e-05, "loss": 0.0542, "step": 61355 }, { "epoch": 0.7482926829268293, "grad_norm": 0.5408884286880493, "learning_rate": 1.501138211382114e-05, "loss": 0.0241, "step": 61360 }, { "epoch": 0.7483536585365854, "grad_norm": 0.3163203001022339, "learning_rate": 1.5010975609756097e-05, "loss": 0.0455, "step": 61365 }, { "epoch": 0.7484146341463415, "grad_norm": 0.41362401843070984, "learning_rate": 1.5010569105691057e-05, "loss": 0.0471, "step": 61370 }, { "epoch": 0.7484756097560976, "grad_norm": 0.4955834150314331, "learning_rate": 1.5010162601626017e-05, "loss": 0.0438, "step": 61375 }, { "epoch": 0.7485365853658537, "grad_norm": 0.7349846363067627, "learning_rate": 1.5009756097560977e-05, "loss": 0.0382, "step": 61380 }, { "epoch": 0.7485975609756098, "grad_norm": 0.41048571467399597, "learning_rate": 1.5009349593495936e-05, "loss": 0.0292, "step": 61385 }, { "epoch": 0.7486585365853659, "grad_norm": 0.7327640652656555, "learning_rate": 1.5008943089430896e-05, "loss": 0.0333, "step": 61390 }, { "epoch": 0.748719512195122, "grad_norm": 0.42307326197624207, "learning_rate": 1.5008536585365854e-05, "loss": 0.0463, "step": 61395 }, { "epoch": 0.748780487804878, "grad_norm": 1.3178800344467163, "learning_rate": 1.5008130081300814e-05, "loss": 0.0603, "step": 61400 }, { "epoch": 0.7488414634146342, "grad_norm": 0.32424309849739075, "learning_rate": 1.5007723577235774e-05, "loss": 0.0549, "step": 61405 }, { "epoch": 0.7489024390243902, "grad_norm": 0.6144091486930847, "learning_rate": 1.5007317073170734e-05, "loss": 0.0353, "step": 61410 }, { "epoch": 0.7489634146341464, "grad_norm": 0.5871288776397705, "learning_rate": 1.5006910569105692e-05, "loss": 0.0571, "step": 61415 }, { "epoch": 0.7490243902439024, "grad_norm": 0.6766170263290405, "learning_rate": 1.5006504065040652e-05, "loss": 0.0732, "step": 61420 }, { "epoch": 0.7490853658536586, "grad_norm": 0.5110051035881042, "learning_rate": 1.500609756097561e-05, "loss": 0.0538, "step": 61425 }, { "epoch": 0.7491463414634146, "grad_norm": 0.22002044320106506, "learning_rate": 1.500569105691057e-05, "loss": 0.0511, "step": 61430 }, { "epoch": 0.7492073170731708, "grad_norm": 0.49470430612564087, "learning_rate": 1.500528455284553e-05, "loss": 0.0607, "step": 61435 }, { "epoch": 0.7492682926829268, "grad_norm": 0.7408860325813293, "learning_rate": 1.500487804878049e-05, "loss": 0.0902, "step": 61440 }, { "epoch": 0.749329268292683, "grad_norm": 0.7696970105171204, "learning_rate": 1.5004471544715449e-05, "loss": 0.0782, "step": 61445 }, { "epoch": 0.749390243902439, "grad_norm": 0.32003912329673767, "learning_rate": 1.5004065040650409e-05, "loss": 0.021, "step": 61450 }, { "epoch": 0.7494512195121952, "grad_norm": 1.8799382448196411, "learning_rate": 1.5003658536585365e-05, "loss": 0.0275, "step": 61455 }, { "epoch": 0.7495121951219512, "grad_norm": 0.3021450340747833, "learning_rate": 1.5003252032520325e-05, "loss": 0.0777, "step": 61460 }, { "epoch": 0.7495731707317074, "grad_norm": 0.8574672937393188, "learning_rate": 1.5002845528455285e-05, "loss": 0.0771, "step": 61465 }, { "epoch": 0.7496341463414634, "grad_norm": 0.5347720980644226, "learning_rate": 1.5002439024390245e-05, "loss": 0.0477, "step": 61470 }, { "epoch": 0.7496951219512196, "grad_norm": 0.27414220571517944, "learning_rate": 1.5002032520325205e-05, "loss": 0.0518, "step": 61475 }, { "epoch": 0.7497560975609756, "grad_norm": 0.5052251219749451, "learning_rate": 1.5001626016260164e-05, "loss": 0.0489, "step": 61480 }, { "epoch": 0.7498170731707318, "grad_norm": 2.2990167140960693, "learning_rate": 1.5001219512195123e-05, "loss": 0.0626, "step": 61485 }, { "epoch": 0.7498780487804878, "grad_norm": 0.47654640674591064, "learning_rate": 1.5000813008130082e-05, "loss": 0.0416, "step": 61490 }, { "epoch": 0.749939024390244, "grad_norm": 0.6238940954208374, "learning_rate": 1.5000406504065042e-05, "loss": 0.0303, "step": 61495 }, { "epoch": 0.75, "grad_norm": 0.43531137704849243, "learning_rate": 1.5000000000000002e-05, "loss": 0.0426, "step": 61500 }, { "epoch": 0.750060975609756, "grad_norm": 0.48950403928756714, "learning_rate": 1.499959349593496e-05, "loss": 0.0363, "step": 61505 }, { "epoch": 0.7501219512195122, "grad_norm": 0.6891270875930786, "learning_rate": 1.499918699186992e-05, "loss": 0.0272, "step": 61510 }, { "epoch": 0.7501829268292682, "grad_norm": 2.4790563583374023, "learning_rate": 1.499878048780488e-05, "loss": 0.0442, "step": 61515 }, { "epoch": 0.7502439024390244, "grad_norm": 0.39949116110801697, "learning_rate": 1.4998373983739838e-05, "loss": 0.0418, "step": 61520 }, { "epoch": 0.7503048780487804, "grad_norm": 1.329392433166504, "learning_rate": 1.4997967479674798e-05, "loss": 0.0277, "step": 61525 }, { "epoch": 0.7503658536585366, "grad_norm": 0.2474239319562912, "learning_rate": 1.4997560975609758e-05, "loss": 0.0295, "step": 61530 }, { "epoch": 0.7504268292682926, "grad_norm": 0.6004372239112854, "learning_rate": 1.4997154471544717e-05, "loss": 0.0446, "step": 61535 }, { "epoch": 0.7504878048780488, "grad_norm": 0.20545931160449982, "learning_rate": 1.4996747967479677e-05, "loss": 0.0456, "step": 61540 }, { "epoch": 0.7505487804878048, "grad_norm": 1.0117888450622559, "learning_rate": 1.4996341463414637e-05, "loss": 0.0349, "step": 61545 }, { "epoch": 0.750609756097561, "grad_norm": 1.9032751321792603, "learning_rate": 1.4995934959349594e-05, "loss": 0.0689, "step": 61550 }, { "epoch": 0.750670731707317, "grad_norm": 0.6483751535415649, "learning_rate": 1.4995528455284553e-05, "loss": 0.0317, "step": 61555 }, { "epoch": 0.7507317073170732, "grad_norm": 0.40603527426719666, "learning_rate": 1.4995121951219513e-05, "loss": 0.036, "step": 61560 }, { "epoch": 0.7507926829268292, "grad_norm": 0.8180233836174011, "learning_rate": 1.4994715447154473e-05, "loss": 0.0265, "step": 61565 }, { "epoch": 0.7508536585365854, "grad_norm": 0.6432204246520996, "learning_rate": 1.4994308943089433e-05, "loss": 0.0488, "step": 61570 }, { "epoch": 0.7509146341463414, "grad_norm": 0.5446057915687561, "learning_rate": 1.4993902439024393e-05, "loss": 0.0359, "step": 61575 }, { "epoch": 0.7509756097560976, "grad_norm": 0.4486738443374634, "learning_rate": 1.499349593495935e-05, "loss": 0.0255, "step": 61580 }, { "epoch": 0.7510365853658536, "grad_norm": 0.8467264175415039, "learning_rate": 1.499308943089431e-05, "loss": 0.0426, "step": 61585 }, { "epoch": 0.7510975609756098, "grad_norm": 0.6790978312492371, "learning_rate": 1.4992682926829269e-05, "loss": 0.0572, "step": 61590 }, { "epoch": 0.7511585365853658, "grad_norm": 0.25043779611587524, "learning_rate": 1.4992276422764229e-05, "loss": 0.033, "step": 61595 }, { "epoch": 0.751219512195122, "grad_norm": 0.676249623298645, "learning_rate": 1.4991869918699188e-05, "loss": 0.0387, "step": 61600 }, { "epoch": 0.751280487804878, "grad_norm": 0.42682087421417236, "learning_rate": 1.4991463414634148e-05, "loss": 0.0402, "step": 61605 }, { "epoch": 0.7513414634146341, "grad_norm": 0.7976491451263428, "learning_rate": 1.4991056910569106e-05, "loss": 0.0934, "step": 61610 }, { "epoch": 0.7514024390243902, "grad_norm": 0.3729218542575836, "learning_rate": 1.4990650406504066e-05, "loss": 0.0625, "step": 61615 }, { "epoch": 0.7514634146341463, "grad_norm": 0.5793118476867676, "learning_rate": 1.4990243902439026e-05, "loss": 0.0403, "step": 61620 }, { "epoch": 0.7515243902439024, "grad_norm": 0.5987051129341125, "learning_rate": 1.4989837398373986e-05, "loss": 0.0819, "step": 61625 }, { "epoch": 0.7515853658536585, "grad_norm": 1.1670793294906616, "learning_rate": 1.4989430894308946e-05, "loss": 0.0356, "step": 61630 }, { "epoch": 0.7516463414634146, "grad_norm": 0.4612627923488617, "learning_rate": 1.4989024390243905e-05, "loss": 0.0514, "step": 61635 }, { "epoch": 0.7517073170731707, "grad_norm": 0.5626233816146851, "learning_rate": 1.4988617886178862e-05, "loss": 0.0371, "step": 61640 }, { "epoch": 0.7517682926829268, "grad_norm": 0.8312311768531799, "learning_rate": 1.4988211382113822e-05, "loss": 0.0684, "step": 61645 }, { "epoch": 0.7518292682926829, "grad_norm": 0.708502471446991, "learning_rate": 1.4987804878048781e-05, "loss": 0.065, "step": 61650 }, { "epoch": 0.751890243902439, "grad_norm": 0.3627442419528961, "learning_rate": 1.4987398373983741e-05, "loss": 0.043, "step": 61655 }, { "epoch": 0.7519512195121951, "grad_norm": 0.5564314126968384, "learning_rate": 1.4986991869918701e-05, "loss": 0.0419, "step": 61660 }, { "epoch": 0.7520121951219512, "grad_norm": 0.2991914749145508, "learning_rate": 1.4986585365853661e-05, "loss": 0.0299, "step": 61665 }, { "epoch": 0.7520731707317073, "grad_norm": 0.5053291916847229, "learning_rate": 1.4986178861788619e-05, "loss": 0.0373, "step": 61670 }, { "epoch": 0.7521341463414634, "grad_norm": 18.071788787841797, "learning_rate": 1.4985772357723579e-05, "loss": 0.0306, "step": 61675 }, { "epoch": 0.7521951219512195, "grad_norm": 0.411227822303772, "learning_rate": 1.4985365853658537e-05, "loss": 0.0605, "step": 61680 }, { "epoch": 0.7522560975609756, "grad_norm": 0.42325925827026367, "learning_rate": 1.4984959349593497e-05, "loss": 0.033, "step": 61685 }, { "epoch": 0.7523170731707317, "grad_norm": 0.4663982093334198, "learning_rate": 1.4984552845528457e-05, "loss": 0.0303, "step": 61690 }, { "epoch": 0.7523780487804878, "grad_norm": 0.764674961566925, "learning_rate": 1.4984146341463416e-05, "loss": 0.0629, "step": 61695 }, { "epoch": 0.7524390243902439, "grad_norm": 2.000450849533081, "learning_rate": 1.4983739837398375e-05, "loss": 0.0797, "step": 61700 }, { "epoch": 0.7525, "grad_norm": 0.49093860387802124, "learning_rate": 1.4983333333333334e-05, "loss": 0.0423, "step": 61705 }, { "epoch": 0.7525609756097561, "grad_norm": 0.3324417173862457, "learning_rate": 1.4982926829268294e-05, "loss": 0.0301, "step": 61710 }, { "epoch": 0.7526219512195121, "grad_norm": 0.5343775749206543, "learning_rate": 1.4982520325203254e-05, "loss": 0.0482, "step": 61715 }, { "epoch": 0.7526829268292683, "grad_norm": 0.5277614593505859, "learning_rate": 1.4982113821138214e-05, "loss": 0.0383, "step": 61720 }, { "epoch": 0.7527439024390243, "grad_norm": 0.6025612950325012, "learning_rate": 1.4981707317073174e-05, "loss": 0.0546, "step": 61725 }, { "epoch": 0.7528048780487805, "grad_norm": 0.5285347104072571, "learning_rate": 1.498130081300813e-05, "loss": 0.0712, "step": 61730 }, { "epoch": 0.7528658536585365, "grad_norm": 0.8143836259841919, "learning_rate": 1.498089430894309e-05, "loss": 0.0588, "step": 61735 }, { "epoch": 0.7529268292682927, "grad_norm": 0.3656423091888428, "learning_rate": 1.498048780487805e-05, "loss": 0.0541, "step": 61740 }, { "epoch": 0.7529878048780487, "grad_norm": 0.20993386209011078, "learning_rate": 1.498008130081301e-05, "loss": 0.0378, "step": 61745 }, { "epoch": 0.7530487804878049, "grad_norm": 0.7083049416542053, "learning_rate": 1.497967479674797e-05, "loss": 0.0442, "step": 61750 }, { "epoch": 0.7531097560975609, "grad_norm": 2.513488292694092, "learning_rate": 1.497926829268293e-05, "loss": 0.0435, "step": 61755 }, { "epoch": 0.7531707317073171, "grad_norm": 0.447257399559021, "learning_rate": 1.4978861788617887e-05, "loss": 0.0508, "step": 61760 }, { "epoch": 0.7532317073170731, "grad_norm": 0.8096327185630798, "learning_rate": 1.4978455284552846e-05, "loss": 0.084, "step": 61765 }, { "epoch": 0.7532926829268293, "grad_norm": 2.051825523376465, "learning_rate": 1.4978048780487805e-05, "loss": 0.0614, "step": 61770 }, { "epoch": 0.7533536585365853, "grad_norm": 0.6521630883216858, "learning_rate": 1.4977642276422765e-05, "loss": 0.0366, "step": 61775 }, { "epoch": 0.7534146341463415, "grad_norm": 0.6201331615447998, "learning_rate": 1.4977235772357725e-05, "loss": 0.026, "step": 61780 }, { "epoch": 0.7534756097560975, "grad_norm": 0.41844555735588074, "learning_rate": 1.4976829268292685e-05, "loss": 0.0445, "step": 61785 }, { "epoch": 0.7535365853658537, "grad_norm": 0.3856906592845917, "learning_rate": 1.4976422764227643e-05, "loss": 0.0375, "step": 61790 }, { "epoch": 0.7535975609756097, "grad_norm": 0.922428548336029, "learning_rate": 1.4976016260162603e-05, "loss": 0.0508, "step": 61795 }, { "epoch": 0.7536585365853659, "grad_norm": 1.7403442859649658, "learning_rate": 1.4975609756097563e-05, "loss": 0.0607, "step": 61800 }, { "epoch": 0.7537195121951219, "grad_norm": 0.40830090641975403, "learning_rate": 1.4975203252032522e-05, "loss": 0.0485, "step": 61805 }, { "epoch": 0.753780487804878, "grad_norm": 0.2746625244617462, "learning_rate": 1.4974796747967482e-05, "loss": 0.0367, "step": 61810 }, { "epoch": 0.7538414634146341, "grad_norm": 0.5062665939331055, "learning_rate": 1.4974390243902442e-05, "loss": 0.0451, "step": 61815 }, { "epoch": 0.7539024390243902, "grad_norm": 0.5524939298629761, "learning_rate": 1.4973983739837398e-05, "loss": 0.0747, "step": 61820 }, { "epoch": 0.7539634146341463, "grad_norm": 0.6348423361778259, "learning_rate": 1.4973577235772358e-05, "loss": 0.0473, "step": 61825 }, { "epoch": 0.7540243902439024, "grad_norm": 0.6498029232025146, "learning_rate": 1.4973170731707318e-05, "loss": 0.0521, "step": 61830 }, { "epoch": 0.7540853658536585, "grad_norm": 0.6641112565994263, "learning_rate": 1.4972764227642278e-05, "loss": 0.0348, "step": 61835 }, { "epoch": 0.7541463414634146, "grad_norm": 0.47719278931617737, "learning_rate": 1.4972357723577238e-05, "loss": 0.0566, "step": 61840 }, { "epoch": 0.7542073170731707, "grad_norm": 0.4283703863620758, "learning_rate": 1.4971951219512198e-05, "loss": 0.0503, "step": 61845 }, { "epoch": 0.7542682926829268, "grad_norm": 0.259207546710968, "learning_rate": 1.4971544715447156e-05, "loss": 0.0545, "step": 61850 }, { "epoch": 0.7543292682926829, "grad_norm": 1.3761705160140991, "learning_rate": 1.4971138211382114e-05, "loss": 0.0684, "step": 61855 }, { "epoch": 0.754390243902439, "grad_norm": 0.5683761239051819, "learning_rate": 1.4970731707317074e-05, "loss": 0.04, "step": 61860 }, { "epoch": 0.7544512195121951, "grad_norm": 0.5208830237388611, "learning_rate": 1.4970325203252033e-05, "loss": 0.051, "step": 61865 }, { "epoch": 0.7545121951219512, "grad_norm": 0.6473052501678467, "learning_rate": 1.4969918699186993e-05, "loss": 0.0242, "step": 61870 }, { "epoch": 0.7545731707317073, "grad_norm": 0.49880650639533997, "learning_rate": 1.4969512195121953e-05, "loss": 0.0494, "step": 61875 }, { "epoch": 0.7546341463414634, "grad_norm": 0.17061100900173187, "learning_rate": 1.4969105691056911e-05, "loss": 0.0713, "step": 61880 }, { "epoch": 0.7546951219512195, "grad_norm": 0.6709364652633667, "learning_rate": 1.4968699186991871e-05, "loss": 0.0777, "step": 61885 }, { "epoch": 0.7547560975609756, "grad_norm": 1.2564443349838257, "learning_rate": 1.4968292682926831e-05, "loss": 0.0316, "step": 61890 }, { "epoch": 0.7548170731707317, "grad_norm": 2.38118052482605, "learning_rate": 1.496788617886179e-05, "loss": 0.0699, "step": 61895 }, { "epoch": 0.7548780487804878, "grad_norm": 0.6507753133773804, "learning_rate": 1.496747967479675e-05, "loss": 0.0445, "step": 61900 }, { "epoch": 0.7549390243902439, "grad_norm": 1.0785887241363525, "learning_rate": 1.496707317073171e-05, "loss": 0.0846, "step": 61905 }, { "epoch": 0.755, "grad_norm": 0.5776293873786926, "learning_rate": 1.4966666666666667e-05, "loss": 0.0411, "step": 61910 }, { "epoch": 0.755060975609756, "grad_norm": 0.4725535213947296, "learning_rate": 1.4966260162601627e-05, "loss": 0.0305, "step": 61915 }, { "epoch": 0.7551219512195122, "grad_norm": 0.35281726717948914, "learning_rate": 1.4965853658536586e-05, "loss": 0.043, "step": 61920 }, { "epoch": 0.7551829268292682, "grad_norm": 0.3127172589302063, "learning_rate": 1.4965447154471546e-05, "loss": 0.0191, "step": 61925 }, { "epoch": 0.7552439024390244, "grad_norm": 0.3931052088737488, "learning_rate": 1.4965040650406506e-05, "loss": 0.0456, "step": 61930 }, { "epoch": 0.7553048780487804, "grad_norm": 0.7872169613838196, "learning_rate": 1.4964634146341466e-05, "loss": 0.0475, "step": 61935 }, { "epoch": 0.7553658536585366, "grad_norm": 0.6067027449607849, "learning_rate": 1.4964227642276424e-05, "loss": 0.0438, "step": 61940 }, { "epoch": 0.7554268292682926, "grad_norm": 0.5521798729896545, "learning_rate": 1.4963821138211382e-05, "loss": 0.0578, "step": 61945 }, { "epoch": 0.7554878048780488, "grad_norm": 0.42580410838127136, "learning_rate": 1.4963414634146342e-05, "loss": 0.0426, "step": 61950 }, { "epoch": 0.7555487804878048, "grad_norm": 1.753104567527771, "learning_rate": 1.4963008130081302e-05, "loss": 0.0586, "step": 61955 }, { "epoch": 0.755609756097561, "grad_norm": 0.6254223585128784, "learning_rate": 1.4962601626016262e-05, "loss": 0.0671, "step": 61960 }, { "epoch": 0.755670731707317, "grad_norm": 0.7036014199256897, "learning_rate": 1.4962195121951221e-05, "loss": 0.063, "step": 61965 }, { "epoch": 0.7557317073170732, "grad_norm": 0.7359310388565063, "learning_rate": 1.496178861788618e-05, "loss": 0.0559, "step": 61970 }, { "epoch": 0.7557926829268292, "grad_norm": 0.4970061182975769, "learning_rate": 1.496138211382114e-05, "loss": 0.0271, "step": 61975 }, { "epoch": 0.7558536585365854, "grad_norm": 0.4130187928676605, "learning_rate": 1.49609756097561e-05, "loss": 0.0281, "step": 61980 }, { "epoch": 0.7559146341463414, "grad_norm": 0.4903941750526428, "learning_rate": 1.4960569105691059e-05, "loss": 0.0513, "step": 61985 }, { "epoch": 0.7559756097560976, "grad_norm": 0.22892530262470245, "learning_rate": 1.4960162601626019e-05, "loss": 0.0483, "step": 61990 }, { "epoch": 0.7560365853658536, "grad_norm": 0.48442205786705017, "learning_rate": 1.4959756097560979e-05, "loss": 0.0343, "step": 61995 }, { "epoch": 0.7560975609756098, "grad_norm": 1.1899040937423706, "learning_rate": 1.4959349593495935e-05, "loss": 0.0538, "step": 62000 }, { "epoch": 0.7561585365853658, "grad_norm": 0.34729641675949097, "learning_rate": 1.4958943089430895e-05, "loss": 0.0421, "step": 62005 }, { "epoch": 0.756219512195122, "grad_norm": 0.5350177884101868, "learning_rate": 1.4958536585365855e-05, "loss": 0.0414, "step": 62010 }, { "epoch": 0.756280487804878, "grad_norm": 0.48975932598114014, "learning_rate": 1.4958130081300815e-05, "loss": 0.0488, "step": 62015 }, { "epoch": 0.7563414634146342, "grad_norm": 0.6639660000801086, "learning_rate": 1.4957723577235774e-05, "loss": 0.0841, "step": 62020 }, { "epoch": 0.7564024390243902, "grad_norm": 1.1531181335449219, "learning_rate": 1.4957317073170734e-05, "loss": 0.0534, "step": 62025 }, { "epoch": 0.7564634146341463, "grad_norm": 0.5639946460723877, "learning_rate": 1.495691056910569e-05, "loss": 0.028, "step": 62030 }, { "epoch": 0.7565243902439024, "grad_norm": 0.833483099937439, "learning_rate": 1.495650406504065e-05, "loss": 0.0261, "step": 62035 }, { "epoch": 0.7565853658536585, "grad_norm": 0.3535436689853668, "learning_rate": 1.495609756097561e-05, "loss": 0.0587, "step": 62040 }, { "epoch": 0.7566463414634146, "grad_norm": 0.5261027216911316, "learning_rate": 1.495569105691057e-05, "loss": 0.0488, "step": 62045 }, { "epoch": 0.7567073170731707, "grad_norm": 0.473803848028183, "learning_rate": 1.495528455284553e-05, "loss": 0.0346, "step": 62050 }, { "epoch": 0.7567682926829268, "grad_norm": 0.18744628131389618, "learning_rate": 1.495487804878049e-05, "loss": 0.0376, "step": 62055 }, { "epoch": 0.7568292682926829, "grad_norm": 0.7596760392189026, "learning_rate": 1.4954471544715448e-05, "loss": 0.0407, "step": 62060 }, { "epoch": 0.756890243902439, "grad_norm": 0.24472074210643768, "learning_rate": 1.4954065040650408e-05, "loss": 0.0417, "step": 62065 }, { "epoch": 0.7569512195121951, "grad_norm": 0.4756261706352234, "learning_rate": 1.4953658536585368e-05, "loss": 0.0472, "step": 62070 }, { "epoch": 0.7570121951219512, "grad_norm": 0.4370433986186981, "learning_rate": 1.4953252032520327e-05, "loss": 0.0431, "step": 62075 }, { "epoch": 0.7570731707317073, "grad_norm": 0.874406099319458, "learning_rate": 1.4952845528455287e-05, "loss": 0.0334, "step": 62080 }, { "epoch": 0.7571341463414634, "grad_norm": 0.42815130949020386, "learning_rate": 1.4952439024390247e-05, "loss": 0.0443, "step": 62085 }, { "epoch": 0.7571951219512195, "grad_norm": 0.6586098074913025, "learning_rate": 1.4952032520325203e-05, "loss": 0.0542, "step": 62090 }, { "epoch": 0.7572560975609756, "grad_norm": 0.6600761413574219, "learning_rate": 1.4951626016260163e-05, "loss": 0.0339, "step": 62095 }, { "epoch": 0.7573170731707317, "grad_norm": 0.6841395497322083, "learning_rate": 1.4951219512195123e-05, "loss": 0.0583, "step": 62100 }, { "epoch": 0.7573780487804878, "grad_norm": 0.5170304775238037, "learning_rate": 1.4950813008130083e-05, "loss": 0.0573, "step": 62105 }, { "epoch": 0.7574390243902439, "grad_norm": 0.6467059254646301, "learning_rate": 1.4950406504065043e-05, "loss": 0.0359, "step": 62110 }, { "epoch": 0.7575, "grad_norm": 0.6709374785423279, "learning_rate": 1.4950000000000003e-05, "loss": 0.0396, "step": 62115 }, { "epoch": 0.7575609756097561, "grad_norm": 0.797626256942749, "learning_rate": 1.4949593495934959e-05, "loss": 0.0514, "step": 62120 }, { "epoch": 0.7576219512195121, "grad_norm": 0.7680907249450684, "learning_rate": 1.4949186991869919e-05, "loss": 0.0323, "step": 62125 }, { "epoch": 0.7576829268292683, "grad_norm": 0.7742587924003601, "learning_rate": 1.4948780487804879e-05, "loss": 0.0481, "step": 62130 }, { "epoch": 0.7577439024390243, "grad_norm": 0.40993738174438477, "learning_rate": 1.4948373983739838e-05, "loss": 0.0363, "step": 62135 }, { "epoch": 0.7578048780487805, "grad_norm": 0.3946307301521301, "learning_rate": 1.4947967479674798e-05, "loss": 0.0408, "step": 62140 }, { "epoch": 0.7578658536585365, "grad_norm": 0.3292323350906372, "learning_rate": 1.4947560975609758e-05, "loss": 0.035, "step": 62145 }, { "epoch": 0.7579268292682927, "grad_norm": 0.4113517999649048, "learning_rate": 1.4947154471544716e-05, "loss": 0.0366, "step": 62150 }, { "epoch": 0.7579878048780487, "grad_norm": 0.32366684079170227, "learning_rate": 1.4946747967479676e-05, "loss": 0.0252, "step": 62155 }, { "epoch": 0.7580487804878049, "grad_norm": 0.4933719038963318, "learning_rate": 1.4946341463414636e-05, "loss": 0.0585, "step": 62160 }, { "epoch": 0.7581097560975609, "grad_norm": 0.4542912542819977, "learning_rate": 1.4945934959349596e-05, "loss": 0.0448, "step": 62165 }, { "epoch": 0.7581707317073171, "grad_norm": 0.5350919961929321, "learning_rate": 1.4945528455284556e-05, "loss": 0.0431, "step": 62170 }, { "epoch": 0.7582317073170731, "grad_norm": 0.8059453964233398, "learning_rate": 1.4945121951219514e-05, "loss": 0.0483, "step": 62175 }, { "epoch": 0.7582926829268293, "grad_norm": 0.4588286876678467, "learning_rate": 1.4944715447154472e-05, "loss": 0.0408, "step": 62180 }, { "epoch": 0.7583536585365853, "grad_norm": 0.5802088379859924, "learning_rate": 1.4944308943089432e-05, "loss": 0.0547, "step": 62185 }, { "epoch": 0.7584146341463415, "grad_norm": 0.2915925979614258, "learning_rate": 1.4943902439024391e-05, "loss": 0.0558, "step": 62190 }, { "epoch": 0.7584756097560975, "grad_norm": 0.6482955813407898, "learning_rate": 1.4943495934959351e-05, "loss": 0.0475, "step": 62195 }, { "epoch": 0.7585365853658537, "grad_norm": 0.8687958121299744, "learning_rate": 1.4943089430894311e-05, "loss": 0.0482, "step": 62200 }, { "epoch": 0.7585975609756097, "grad_norm": 0.35225215554237366, "learning_rate": 1.4942682926829271e-05, "loss": 0.0399, "step": 62205 }, { "epoch": 0.7586585365853659, "grad_norm": 0.471621036529541, "learning_rate": 1.4942276422764227e-05, "loss": 0.0434, "step": 62210 }, { "epoch": 0.7587195121951219, "grad_norm": 1.0233427286148071, "learning_rate": 1.4941869918699187e-05, "loss": 0.0901, "step": 62215 }, { "epoch": 0.7587804878048781, "grad_norm": 0.4014762341976166, "learning_rate": 1.4941463414634147e-05, "loss": 0.0394, "step": 62220 }, { "epoch": 0.7588414634146341, "grad_norm": 0.9564347863197327, "learning_rate": 1.4941056910569107e-05, "loss": 0.0375, "step": 62225 }, { "epoch": 0.7589024390243903, "grad_norm": 0.7281647324562073, "learning_rate": 1.4940650406504067e-05, "loss": 0.0501, "step": 62230 }, { "epoch": 0.7589634146341463, "grad_norm": 0.9187254309654236, "learning_rate": 1.4940243902439026e-05, "loss": 0.0583, "step": 62235 }, { "epoch": 0.7590243902439024, "grad_norm": 0.46761131286621094, "learning_rate": 1.4939837398373985e-05, "loss": 0.0887, "step": 62240 }, { "epoch": 0.7590853658536585, "grad_norm": 0.9314888119697571, "learning_rate": 1.4939430894308944e-05, "loss": 0.0465, "step": 62245 }, { "epoch": 0.7591463414634146, "grad_norm": 0.39705759286880493, "learning_rate": 1.4939024390243904e-05, "loss": 0.0372, "step": 62250 }, { "epoch": 0.7592073170731707, "grad_norm": 0.29435765743255615, "learning_rate": 1.4938617886178864e-05, "loss": 0.0383, "step": 62255 }, { "epoch": 0.7592682926829268, "grad_norm": 0.530044674873352, "learning_rate": 1.4938211382113824e-05, "loss": 0.0354, "step": 62260 }, { "epoch": 0.7593292682926829, "grad_norm": 0.47287416458129883, "learning_rate": 1.4937804878048782e-05, "loss": 0.0479, "step": 62265 }, { "epoch": 0.759390243902439, "grad_norm": 1.0391929149627686, "learning_rate": 1.493739837398374e-05, "loss": 0.0543, "step": 62270 }, { "epoch": 0.7594512195121951, "grad_norm": 0.5132818818092346, "learning_rate": 1.49369918699187e-05, "loss": 0.0426, "step": 62275 }, { "epoch": 0.7595121951219512, "grad_norm": 0.3345285654067993, "learning_rate": 1.493658536585366e-05, "loss": 0.0435, "step": 62280 }, { "epoch": 0.7595731707317073, "grad_norm": 0.3911544680595398, "learning_rate": 1.493617886178862e-05, "loss": 0.046, "step": 62285 }, { "epoch": 0.7596341463414634, "grad_norm": 0.6553930044174194, "learning_rate": 1.493577235772358e-05, "loss": 0.0258, "step": 62290 }, { "epoch": 0.7596951219512195, "grad_norm": 0.6050282716751099, "learning_rate": 1.493536585365854e-05, "loss": 0.0446, "step": 62295 }, { "epoch": 0.7597560975609756, "grad_norm": 0.7464756965637207, "learning_rate": 1.4934959349593496e-05, "loss": 0.0328, "step": 62300 }, { "epoch": 0.7598170731707317, "grad_norm": 0.7396736741065979, "learning_rate": 1.4934552845528455e-05, "loss": 0.0461, "step": 62305 }, { "epoch": 0.7598780487804878, "grad_norm": 0.31987693905830383, "learning_rate": 1.4934146341463415e-05, "loss": 0.0366, "step": 62310 }, { "epoch": 0.7599390243902439, "grad_norm": 0.7949370741844177, "learning_rate": 1.4933739837398375e-05, "loss": 0.0505, "step": 62315 }, { "epoch": 0.76, "grad_norm": 0.6255664229393005, "learning_rate": 1.4933333333333335e-05, "loss": 0.0748, "step": 62320 }, { "epoch": 0.760060975609756, "grad_norm": 3.2124075889587402, "learning_rate": 1.4932926829268295e-05, "loss": 0.0398, "step": 62325 }, { "epoch": 0.7601219512195122, "grad_norm": 0.6049442291259766, "learning_rate": 1.4932520325203253e-05, "loss": 0.0511, "step": 62330 }, { "epoch": 0.7601829268292682, "grad_norm": 0.3813832402229309, "learning_rate": 1.4932113821138213e-05, "loss": 0.0658, "step": 62335 }, { "epoch": 0.7602439024390244, "grad_norm": 0.5796113014221191, "learning_rate": 1.4931707317073173e-05, "loss": 0.0721, "step": 62340 }, { "epoch": 0.7603048780487804, "grad_norm": 0.6124700903892517, "learning_rate": 1.4931300813008132e-05, "loss": 0.0575, "step": 62345 }, { "epoch": 0.7603658536585366, "grad_norm": 0.3131304383277893, "learning_rate": 1.4930894308943092e-05, "loss": 0.025, "step": 62350 }, { "epoch": 0.7604268292682926, "grad_norm": 0.40259814262390137, "learning_rate": 1.493048780487805e-05, "loss": 0.0378, "step": 62355 }, { "epoch": 0.7604878048780488, "grad_norm": 0.48562607169151306, "learning_rate": 1.4930081300813008e-05, "loss": 0.057, "step": 62360 }, { "epoch": 0.7605487804878048, "grad_norm": 0.503461480140686, "learning_rate": 1.4929674796747968e-05, "loss": 0.0274, "step": 62365 }, { "epoch": 0.760609756097561, "grad_norm": 0.7275007367134094, "learning_rate": 1.4929268292682928e-05, "loss": 0.0396, "step": 62370 }, { "epoch": 0.760670731707317, "grad_norm": 1.1085633039474487, "learning_rate": 1.4928861788617888e-05, "loss": 0.0283, "step": 62375 }, { "epoch": 0.7607317073170732, "grad_norm": 1.1323764324188232, "learning_rate": 1.4928455284552848e-05, "loss": 0.0827, "step": 62380 }, { "epoch": 0.7607926829268292, "grad_norm": 0.5015708804130554, "learning_rate": 1.4928048780487808e-05, "loss": 0.0465, "step": 62385 }, { "epoch": 0.7608536585365854, "grad_norm": 0.552634060382843, "learning_rate": 1.4927642276422764e-05, "loss": 0.0425, "step": 62390 }, { "epoch": 0.7609146341463414, "grad_norm": 0.8560396432876587, "learning_rate": 1.4927235772357724e-05, "loss": 0.0391, "step": 62395 }, { "epoch": 0.7609756097560976, "grad_norm": 2.6726627349853516, "learning_rate": 1.4926829268292684e-05, "loss": 0.061, "step": 62400 }, { "epoch": 0.7610365853658536, "grad_norm": 0.7751275897026062, "learning_rate": 1.4926422764227643e-05, "loss": 0.067, "step": 62405 }, { "epoch": 0.7610975609756098, "grad_norm": 0.6483821272850037, "learning_rate": 1.4926016260162603e-05, "loss": 0.0371, "step": 62410 }, { "epoch": 0.7611585365853658, "grad_norm": 0.4288331866264343, "learning_rate": 1.4925609756097563e-05, "loss": 0.0308, "step": 62415 }, { "epoch": 0.761219512195122, "grad_norm": 0.3638356328010559, "learning_rate": 1.4925203252032521e-05, "loss": 0.045, "step": 62420 }, { "epoch": 0.761280487804878, "grad_norm": 0.7672831416130066, "learning_rate": 1.4924796747967481e-05, "loss": 0.0412, "step": 62425 }, { "epoch": 0.7613414634146342, "grad_norm": 0.3857088088989258, "learning_rate": 1.492439024390244e-05, "loss": 0.0527, "step": 62430 }, { "epoch": 0.7614024390243902, "grad_norm": 0.36656689643859863, "learning_rate": 1.49239837398374e-05, "loss": 0.0359, "step": 62435 }, { "epoch": 0.7614634146341464, "grad_norm": 0.42615851759910583, "learning_rate": 1.4923577235772359e-05, "loss": 0.0426, "step": 62440 }, { "epoch": 0.7615243902439024, "grad_norm": 0.4692004919052124, "learning_rate": 1.4923170731707319e-05, "loss": 0.0471, "step": 62445 }, { "epoch": 0.7615853658536585, "grad_norm": 0.6080459356307983, "learning_rate": 1.4922764227642277e-05, "loss": 0.0537, "step": 62450 }, { "epoch": 0.7616463414634146, "grad_norm": 0.45933446288108826, "learning_rate": 1.4922357723577237e-05, "loss": 0.0539, "step": 62455 }, { "epoch": 0.7617073170731707, "grad_norm": 0.4438643753528595, "learning_rate": 1.4921951219512196e-05, "loss": 0.0495, "step": 62460 }, { "epoch": 0.7617682926829268, "grad_norm": 0.17397813498973846, "learning_rate": 1.4921544715447156e-05, "loss": 0.031, "step": 62465 }, { "epoch": 0.7618292682926829, "grad_norm": 0.37187451124191284, "learning_rate": 1.4921138211382116e-05, "loss": 0.0632, "step": 62470 }, { "epoch": 0.761890243902439, "grad_norm": 0.5604380965232849, "learning_rate": 1.4920731707317076e-05, "loss": 0.0479, "step": 62475 }, { "epoch": 0.7619512195121951, "grad_norm": 2.3386857509613037, "learning_rate": 1.4920325203252032e-05, "loss": 0.0825, "step": 62480 }, { "epoch": 0.7620121951219512, "grad_norm": 0.593342661857605, "learning_rate": 1.4919918699186992e-05, "loss": 0.0608, "step": 62485 }, { "epoch": 0.7620731707317073, "grad_norm": 0.42459636926651, "learning_rate": 1.4919512195121952e-05, "loss": 0.0419, "step": 62490 }, { "epoch": 0.7621341463414634, "grad_norm": 0.40923479199409485, "learning_rate": 1.4919105691056912e-05, "loss": 0.0604, "step": 62495 }, { "epoch": 0.7621951219512195, "grad_norm": 1.6339768171310425, "learning_rate": 1.4918699186991872e-05, "loss": 0.0551, "step": 62500 }, { "epoch": 0.7622560975609756, "grad_norm": 0.8082873821258545, "learning_rate": 1.4918292682926831e-05, "loss": 0.0565, "step": 62505 }, { "epoch": 0.7623170731707317, "grad_norm": 0.4096842408180237, "learning_rate": 1.491788617886179e-05, "loss": 0.0381, "step": 62510 }, { "epoch": 0.7623780487804878, "grad_norm": 0.7903647422790527, "learning_rate": 1.491747967479675e-05, "loss": 0.0559, "step": 62515 }, { "epoch": 0.7624390243902439, "grad_norm": 0.7450940012931824, "learning_rate": 1.4917073170731709e-05, "loss": 0.0812, "step": 62520 }, { "epoch": 0.7625, "grad_norm": 0.3699072003364563, "learning_rate": 1.4916666666666669e-05, "loss": 0.029, "step": 62525 }, { "epoch": 0.7625609756097561, "grad_norm": 0.592655599117279, "learning_rate": 1.4916260162601627e-05, "loss": 0.0439, "step": 62530 }, { "epoch": 0.7626219512195122, "grad_norm": 0.4572813808917999, "learning_rate": 1.4915853658536587e-05, "loss": 0.0793, "step": 62535 }, { "epoch": 0.7626829268292683, "grad_norm": 0.7595090270042419, "learning_rate": 1.4915447154471545e-05, "loss": 0.0487, "step": 62540 }, { "epoch": 0.7627439024390243, "grad_norm": 1.2888190746307373, "learning_rate": 1.4915040650406505e-05, "loss": 0.0603, "step": 62545 }, { "epoch": 0.7628048780487805, "grad_norm": 0.7470592856407166, "learning_rate": 1.4914634146341465e-05, "loss": 0.0346, "step": 62550 }, { "epoch": 0.7628658536585365, "grad_norm": 0.3113545775413513, "learning_rate": 1.4914227642276425e-05, "loss": 0.0557, "step": 62555 }, { "epoch": 0.7629268292682927, "grad_norm": 0.5178604125976562, "learning_rate": 1.4913821138211384e-05, "loss": 0.0393, "step": 62560 }, { "epoch": 0.7629878048780487, "grad_norm": 0.48481684923171997, "learning_rate": 1.4913414634146344e-05, "loss": 0.0832, "step": 62565 }, { "epoch": 0.7630487804878049, "grad_norm": 0.1198301687836647, "learning_rate": 1.49130081300813e-05, "loss": 0.0513, "step": 62570 }, { "epoch": 0.7631097560975609, "grad_norm": 0.6866130232810974, "learning_rate": 1.491260162601626e-05, "loss": 0.0596, "step": 62575 }, { "epoch": 0.7631707317073171, "grad_norm": 0.5589277148246765, "learning_rate": 1.491219512195122e-05, "loss": 0.0633, "step": 62580 }, { "epoch": 0.7632317073170731, "grad_norm": 0.3406486213207245, "learning_rate": 1.491178861788618e-05, "loss": 0.08, "step": 62585 }, { "epoch": 0.7632926829268293, "grad_norm": 0.6640004515647888, "learning_rate": 1.491138211382114e-05, "loss": 0.0776, "step": 62590 }, { "epoch": 0.7633536585365853, "grad_norm": 0.43398746848106384, "learning_rate": 1.49109756097561e-05, "loss": 0.0523, "step": 62595 }, { "epoch": 0.7634146341463415, "grad_norm": 0.655427873134613, "learning_rate": 1.4910569105691058e-05, "loss": 0.0677, "step": 62600 }, { "epoch": 0.7634756097560975, "grad_norm": 0.6041429042816162, "learning_rate": 1.4910162601626018e-05, "loss": 0.0601, "step": 62605 }, { "epoch": 0.7635365853658537, "grad_norm": 0.49577245116233826, "learning_rate": 1.4909756097560977e-05, "loss": 0.0555, "step": 62610 }, { "epoch": 0.7635975609756097, "grad_norm": 0.7387702465057373, "learning_rate": 1.4909349593495937e-05, "loss": 0.0763, "step": 62615 }, { "epoch": 0.7636585365853659, "grad_norm": 1.180081844329834, "learning_rate": 1.4908943089430895e-05, "loss": 0.032, "step": 62620 }, { "epoch": 0.7637195121951219, "grad_norm": 0.7613048553466797, "learning_rate": 1.4908536585365855e-05, "loss": 0.0814, "step": 62625 }, { "epoch": 0.7637804878048781, "grad_norm": 0.352714866399765, "learning_rate": 1.4908130081300813e-05, "loss": 0.0433, "step": 62630 }, { "epoch": 0.7638414634146341, "grad_norm": 0.5270062685012817, "learning_rate": 1.4907723577235773e-05, "loss": 0.063, "step": 62635 }, { "epoch": 0.7639024390243903, "grad_norm": 0.6263976097106934, "learning_rate": 1.4907317073170733e-05, "loss": 0.0523, "step": 62640 }, { "epoch": 0.7639634146341463, "grad_norm": 0.5367357730865479, "learning_rate": 1.4906910569105693e-05, "loss": 0.0458, "step": 62645 }, { "epoch": 0.7640243902439025, "grad_norm": 0.07370169460773468, "learning_rate": 1.4906504065040653e-05, "loss": 0.0328, "step": 62650 }, { "epoch": 0.7640853658536585, "grad_norm": 0.8231689929962158, "learning_rate": 1.4906097560975612e-05, "loss": 0.0409, "step": 62655 }, { "epoch": 0.7641463414634146, "grad_norm": 0.5499638915061951, "learning_rate": 1.4905691056910569e-05, "loss": 0.0496, "step": 62660 }, { "epoch": 0.7642073170731707, "grad_norm": 0.8669933676719666, "learning_rate": 1.4905284552845529e-05, "loss": 0.0685, "step": 62665 }, { "epoch": 0.7642682926829268, "grad_norm": 0.21018685400485992, "learning_rate": 1.4904878048780489e-05, "loss": 0.0368, "step": 62670 }, { "epoch": 0.7643292682926829, "grad_norm": 0.7033156156539917, "learning_rate": 1.4904471544715448e-05, "loss": 0.0325, "step": 62675 }, { "epoch": 0.764390243902439, "grad_norm": 0.6464289426803589, "learning_rate": 1.4904065040650408e-05, "loss": 0.0492, "step": 62680 }, { "epoch": 0.7644512195121951, "grad_norm": 0.7780504822731018, "learning_rate": 1.4903658536585368e-05, "loss": 0.0602, "step": 62685 }, { "epoch": 0.7645121951219512, "grad_norm": 0.5103010535240173, "learning_rate": 1.4903252032520326e-05, "loss": 0.053, "step": 62690 }, { "epoch": 0.7645731707317073, "grad_norm": 0.9252300262451172, "learning_rate": 1.4902845528455286e-05, "loss": 0.045, "step": 62695 }, { "epoch": 0.7646341463414634, "grad_norm": 0.5600923895835876, "learning_rate": 1.4902439024390246e-05, "loss": 0.054, "step": 62700 }, { "epoch": 0.7646951219512195, "grad_norm": 0.925140917301178, "learning_rate": 1.4902032520325204e-05, "loss": 0.0656, "step": 62705 }, { "epoch": 0.7647560975609756, "grad_norm": 0.4401192367076874, "learning_rate": 1.4901626016260164e-05, "loss": 0.0515, "step": 62710 }, { "epoch": 0.7648170731707317, "grad_norm": 0.7007513642311096, "learning_rate": 1.4901219512195124e-05, "loss": 0.0624, "step": 62715 }, { "epoch": 0.7648780487804878, "grad_norm": 0.8846796751022339, "learning_rate": 1.4900813008130082e-05, "loss": 0.0462, "step": 62720 }, { "epoch": 0.7649390243902439, "grad_norm": 3.2846336364746094, "learning_rate": 1.4900406504065042e-05, "loss": 0.0594, "step": 62725 }, { "epoch": 0.765, "grad_norm": 1.3275102376937866, "learning_rate": 1.4900000000000001e-05, "loss": 0.0264, "step": 62730 }, { "epoch": 0.765060975609756, "grad_norm": 0.6905763745307922, "learning_rate": 1.4899593495934961e-05, "loss": 0.0405, "step": 62735 }, { "epoch": 0.7651219512195122, "grad_norm": 1.2534040212631226, "learning_rate": 1.4899186991869921e-05, "loss": 0.0451, "step": 62740 }, { "epoch": 0.7651829268292683, "grad_norm": 0.7040232419967651, "learning_rate": 1.489878048780488e-05, "loss": 0.056, "step": 62745 }, { "epoch": 0.7652439024390244, "grad_norm": 0.33498892188072205, "learning_rate": 1.4898373983739837e-05, "loss": 0.0643, "step": 62750 }, { "epoch": 0.7653048780487804, "grad_norm": 0.34635651111602783, "learning_rate": 1.4897967479674797e-05, "loss": 0.0508, "step": 62755 }, { "epoch": 0.7653658536585366, "grad_norm": 0.758186936378479, "learning_rate": 1.4897560975609757e-05, "loss": 0.051, "step": 62760 }, { "epoch": 0.7654268292682926, "grad_norm": 0.41508346796035767, "learning_rate": 1.4897154471544717e-05, "loss": 0.0518, "step": 62765 }, { "epoch": 0.7654878048780488, "grad_norm": 1.3297829627990723, "learning_rate": 1.4896747967479677e-05, "loss": 0.0501, "step": 62770 }, { "epoch": 0.7655487804878048, "grad_norm": 0.8196362853050232, "learning_rate": 1.4896341463414636e-05, "loss": 0.0438, "step": 62775 }, { "epoch": 0.765609756097561, "grad_norm": 0.32595229148864746, "learning_rate": 1.4895934959349594e-05, "loss": 0.0463, "step": 62780 }, { "epoch": 0.765670731707317, "grad_norm": 0.7035735845565796, "learning_rate": 1.4895528455284554e-05, "loss": 0.0351, "step": 62785 }, { "epoch": 0.7657317073170732, "grad_norm": 0.3489680588245392, "learning_rate": 1.4895121951219514e-05, "loss": 0.0321, "step": 62790 }, { "epoch": 0.7657926829268292, "grad_norm": 0.9499736428260803, "learning_rate": 1.4894715447154472e-05, "loss": 0.0576, "step": 62795 }, { "epoch": 0.7658536585365854, "grad_norm": 0.35541489720344543, "learning_rate": 1.4894308943089432e-05, "loss": 0.0463, "step": 62800 }, { "epoch": 0.7659146341463414, "grad_norm": 0.5599582195281982, "learning_rate": 1.4893902439024392e-05, "loss": 0.0398, "step": 62805 }, { "epoch": 0.7659756097560976, "grad_norm": 0.9923323392868042, "learning_rate": 1.489349593495935e-05, "loss": 0.0799, "step": 62810 }, { "epoch": 0.7660365853658536, "grad_norm": 0.6865273118019104, "learning_rate": 1.489308943089431e-05, "loss": 0.0396, "step": 62815 }, { "epoch": 0.7660975609756098, "grad_norm": 0.838472306728363, "learning_rate": 1.489268292682927e-05, "loss": 0.0781, "step": 62820 }, { "epoch": 0.7661585365853658, "grad_norm": 0.24092116951942444, "learning_rate": 1.489227642276423e-05, "loss": 0.0341, "step": 62825 }, { "epoch": 0.766219512195122, "grad_norm": 0.9661651849746704, "learning_rate": 1.489186991869919e-05, "loss": 0.0513, "step": 62830 }, { "epoch": 0.766280487804878, "grad_norm": 0.3399171531200409, "learning_rate": 1.4891463414634149e-05, "loss": 0.0482, "step": 62835 }, { "epoch": 0.7663414634146342, "grad_norm": 0.5017029643058777, "learning_rate": 1.4891056910569106e-05, "loss": 0.0685, "step": 62840 }, { "epoch": 0.7664024390243902, "grad_norm": 0.6427594423294067, "learning_rate": 1.4890650406504065e-05, "loss": 0.0404, "step": 62845 }, { "epoch": 0.7664634146341464, "grad_norm": 0.8101062178611755, "learning_rate": 1.4890243902439025e-05, "loss": 0.0423, "step": 62850 }, { "epoch": 0.7665243902439024, "grad_norm": 0.34601446986198425, "learning_rate": 1.4889837398373985e-05, "loss": 0.0561, "step": 62855 }, { "epoch": 0.7665853658536586, "grad_norm": 0.5030015110969543, "learning_rate": 1.4889430894308945e-05, "loss": 0.0548, "step": 62860 }, { "epoch": 0.7666463414634146, "grad_norm": 0.573540449142456, "learning_rate": 1.4889024390243905e-05, "loss": 0.056, "step": 62865 }, { "epoch": 0.7667073170731707, "grad_norm": 0.8341323137283325, "learning_rate": 1.4888617886178863e-05, "loss": 0.0446, "step": 62870 }, { "epoch": 0.7667682926829268, "grad_norm": 0.9675928354263306, "learning_rate": 1.4888211382113823e-05, "loss": 0.0275, "step": 62875 }, { "epoch": 0.7668292682926829, "grad_norm": 0.40967339277267456, "learning_rate": 1.4887804878048782e-05, "loss": 0.0245, "step": 62880 }, { "epoch": 0.766890243902439, "grad_norm": 0.540197491645813, "learning_rate": 1.488739837398374e-05, "loss": 0.0254, "step": 62885 }, { "epoch": 0.7669512195121951, "grad_norm": 1.0807627439498901, "learning_rate": 1.48869918699187e-05, "loss": 0.0658, "step": 62890 }, { "epoch": 0.7670121951219512, "grad_norm": 0.2593153119087219, "learning_rate": 1.488658536585366e-05, "loss": 0.0546, "step": 62895 }, { "epoch": 0.7670731707317073, "grad_norm": 1.0102014541625977, "learning_rate": 1.4886178861788618e-05, "loss": 0.0554, "step": 62900 }, { "epoch": 0.7671341463414634, "grad_norm": 1.3686457872390747, "learning_rate": 1.4885772357723578e-05, "loss": 0.0312, "step": 62905 }, { "epoch": 0.7671951219512195, "grad_norm": 0.6053133606910706, "learning_rate": 1.4885365853658538e-05, "loss": 0.0526, "step": 62910 }, { "epoch": 0.7672560975609756, "grad_norm": 0.29011964797973633, "learning_rate": 1.4884959349593498e-05, "loss": 0.0632, "step": 62915 }, { "epoch": 0.7673170731707317, "grad_norm": 0.5343635082244873, "learning_rate": 1.4884552845528458e-05, "loss": 0.0728, "step": 62920 }, { "epoch": 0.7673780487804878, "grad_norm": 0.47871869802474976, "learning_rate": 1.4884146341463417e-05, "loss": 0.0468, "step": 62925 }, { "epoch": 0.7674390243902439, "grad_norm": 0.530673623085022, "learning_rate": 1.4883739837398374e-05, "loss": 0.0538, "step": 62930 }, { "epoch": 0.7675, "grad_norm": 0.38779550790786743, "learning_rate": 1.4883333333333334e-05, "loss": 0.0531, "step": 62935 }, { "epoch": 0.7675609756097561, "grad_norm": 0.6295021176338196, "learning_rate": 1.4882926829268294e-05, "loss": 0.0677, "step": 62940 }, { "epoch": 0.7676219512195122, "grad_norm": 0.6031951308250427, "learning_rate": 1.4882520325203253e-05, "loss": 0.0533, "step": 62945 }, { "epoch": 0.7676829268292683, "grad_norm": 0.4577987492084503, "learning_rate": 1.4882113821138213e-05, "loss": 0.0809, "step": 62950 }, { "epoch": 0.7677439024390244, "grad_norm": 0.3486398756504059, "learning_rate": 1.4881707317073173e-05, "loss": 0.0378, "step": 62955 }, { "epoch": 0.7678048780487805, "grad_norm": 0.599384605884552, "learning_rate": 1.4881300813008131e-05, "loss": 0.0491, "step": 62960 }, { "epoch": 0.7678658536585365, "grad_norm": 0.5502505898475647, "learning_rate": 1.4880894308943091e-05, "loss": 0.0496, "step": 62965 }, { "epoch": 0.7679268292682927, "grad_norm": 0.29606297612190247, "learning_rate": 1.4880487804878049e-05, "loss": 0.0445, "step": 62970 }, { "epoch": 0.7679878048780487, "grad_norm": 0.3429534435272217, "learning_rate": 1.4880081300813009e-05, "loss": 0.0368, "step": 62975 }, { "epoch": 0.7680487804878049, "grad_norm": 0.3193437159061432, "learning_rate": 1.4879674796747969e-05, "loss": 0.0418, "step": 62980 }, { "epoch": 0.7681097560975609, "grad_norm": 0.530366063117981, "learning_rate": 1.4879268292682929e-05, "loss": 0.0315, "step": 62985 }, { "epoch": 0.7681707317073171, "grad_norm": 1.0836292505264282, "learning_rate": 1.4878861788617887e-05, "loss": 0.0463, "step": 62990 }, { "epoch": 0.7682317073170731, "grad_norm": 1.6306143999099731, "learning_rate": 1.4878455284552846e-05, "loss": 0.0675, "step": 62995 }, { "epoch": 0.7682926829268293, "grad_norm": 0.9655398726463318, "learning_rate": 1.4878048780487806e-05, "loss": 0.0567, "step": 63000 }, { "epoch": 0.7683536585365853, "grad_norm": 0.5947127938270569, "learning_rate": 1.4877642276422766e-05, "loss": 0.0442, "step": 63005 }, { "epoch": 0.7684146341463415, "grad_norm": 0.7491656541824341, "learning_rate": 1.4877235772357726e-05, "loss": 0.0334, "step": 63010 }, { "epoch": 0.7684756097560975, "grad_norm": 1.1900748014450073, "learning_rate": 1.4876829268292686e-05, "loss": 0.0549, "step": 63015 }, { "epoch": 0.7685365853658537, "grad_norm": 1.8166515827178955, "learning_rate": 1.4876422764227642e-05, "loss": 0.0468, "step": 63020 }, { "epoch": 0.7685975609756097, "grad_norm": 1.0294557809829712, "learning_rate": 1.4876016260162602e-05, "loss": 0.0393, "step": 63025 }, { "epoch": 0.7686585365853659, "grad_norm": 1.4956634044647217, "learning_rate": 1.4875609756097562e-05, "loss": 0.039, "step": 63030 }, { "epoch": 0.7687195121951219, "grad_norm": 0.5398257374763489, "learning_rate": 1.4875203252032522e-05, "loss": 0.0314, "step": 63035 }, { "epoch": 0.7687804878048781, "grad_norm": 0.3935823142528534, "learning_rate": 1.4874796747967481e-05, "loss": 0.0242, "step": 63040 }, { "epoch": 0.7688414634146341, "grad_norm": 0.23485414683818817, "learning_rate": 1.4874390243902441e-05, "loss": 0.0461, "step": 63045 }, { "epoch": 0.7689024390243903, "grad_norm": 0.3491121232509613, "learning_rate": 1.48739837398374e-05, "loss": 0.0415, "step": 63050 }, { "epoch": 0.7689634146341463, "grad_norm": 0.2938198447227478, "learning_rate": 1.487357723577236e-05, "loss": 0.0364, "step": 63055 }, { "epoch": 0.7690243902439025, "grad_norm": 0.5651171803474426, "learning_rate": 1.4873170731707317e-05, "loss": 0.0233, "step": 63060 }, { "epoch": 0.7690853658536585, "grad_norm": 0.6624968647956848, "learning_rate": 1.4872764227642277e-05, "loss": 0.1049, "step": 63065 }, { "epoch": 0.7691463414634147, "grad_norm": 0.503475546836853, "learning_rate": 1.4872357723577237e-05, "loss": 0.0741, "step": 63070 }, { "epoch": 0.7692073170731707, "grad_norm": 0.5545364022254944, "learning_rate": 1.4871951219512197e-05, "loss": 0.0586, "step": 63075 }, { "epoch": 0.7692682926829268, "grad_norm": 0.5194771885871887, "learning_rate": 1.4871544715447155e-05, "loss": 0.0285, "step": 63080 }, { "epoch": 0.7693292682926829, "grad_norm": 1.03691565990448, "learning_rate": 1.4871138211382115e-05, "loss": 0.0712, "step": 63085 }, { "epoch": 0.769390243902439, "grad_norm": 0.9771384000778198, "learning_rate": 1.4870731707317075e-05, "loss": 0.0356, "step": 63090 }, { "epoch": 0.7694512195121951, "grad_norm": 0.47745251655578613, "learning_rate": 1.4870325203252034e-05, "loss": 0.0281, "step": 63095 }, { "epoch": 0.7695121951219512, "grad_norm": 0.5570241808891296, "learning_rate": 1.4869918699186994e-05, "loss": 0.0411, "step": 63100 }, { "epoch": 0.7695731707317073, "grad_norm": 0.48366284370422363, "learning_rate": 1.4869512195121954e-05, "loss": 0.0729, "step": 63105 }, { "epoch": 0.7696341463414634, "grad_norm": 0.9992164373397827, "learning_rate": 1.486910569105691e-05, "loss": 0.0598, "step": 63110 }, { "epoch": 0.7696951219512195, "grad_norm": 0.8634548187255859, "learning_rate": 1.486869918699187e-05, "loss": 0.0435, "step": 63115 }, { "epoch": 0.7697560975609756, "grad_norm": 0.6126424074172974, "learning_rate": 1.486829268292683e-05, "loss": 0.0293, "step": 63120 }, { "epoch": 0.7698170731707317, "grad_norm": 0.6842405796051025, "learning_rate": 1.486788617886179e-05, "loss": 0.0506, "step": 63125 }, { "epoch": 0.7698780487804878, "grad_norm": 0.4220902919769287, "learning_rate": 1.486747967479675e-05, "loss": 0.0275, "step": 63130 }, { "epoch": 0.7699390243902439, "grad_norm": 0.6730918288230896, "learning_rate": 1.486707317073171e-05, "loss": 0.0476, "step": 63135 }, { "epoch": 0.77, "grad_norm": 0.5845146775245667, "learning_rate": 1.4866666666666668e-05, "loss": 0.0762, "step": 63140 }, { "epoch": 0.7700609756097561, "grad_norm": 0.36380282044410706, "learning_rate": 1.4866260162601628e-05, "loss": 0.0403, "step": 63145 }, { "epoch": 0.7701219512195122, "grad_norm": 0.9295154213905334, "learning_rate": 1.4865853658536586e-05, "loss": 0.0816, "step": 63150 }, { "epoch": 0.7701829268292683, "grad_norm": 0.5239707827568054, "learning_rate": 1.4865447154471546e-05, "loss": 0.065, "step": 63155 }, { "epoch": 0.7702439024390244, "grad_norm": 0.5560888648033142, "learning_rate": 1.4865040650406505e-05, "loss": 0.0498, "step": 63160 }, { "epoch": 0.7703048780487805, "grad_norm": 0.9321379661560059, "learning_rate": 1.4864634146341465e-05, "loss": 0.0649, "step": 63165 }, { "epoch": 0.7703658536585366, "grad_norm": 1.0137939453125, "learning_rate": 1.4864227642276423e-05, "loss": 0.0565, "step": 63170 }, { "epoch": 0.7704268292682926, "grad_norm": 0.6812419295310974, "learning_rate": 1.4863821138211383e-05, "loss": 0.0694, "step": 63175 }, { "epoch": 0.7704878048780488, "grad_norm": 0.3182806074619293, "learning_rate": 1.4863414634146343e-05, "loss": 0.0574, "step": 63180 }, { "epoch": 0.7705487804878048, "grad_norm": 0.7115845084190369, "learning_rate": 1.4863008130081303e-05, "loss": 0.0477, "step": 63185 }, { "epoch": 0.770609756097561, "grad_norm": 0.5589912533760071, "learning_rate": 1.4862601626016263e-05, "loss": 0.0606, "step": 63190 }, { "epoch": 0.770670731707317, "grad_norm": 1.5153286457061768, "learning_rate": 1.4862195121951222e-05, "loss": 0.0828, "step": 63195 }, { "epoch": 0.7707317073170732, "grad_norm": 0.7116414308547974, "learning_rate": 1.4861788617886179e-05, "loss": 0.074, "step": 63200 }, { "epoch": 0.7707926829268292, "grad_norm": 0.8524643182754517, "learning_rate": 1.4861382113821139e-05, "loss": 0.0365, "step": 63205 }, { "epoch": 0.7708536585365854, "grad_norm": 0.5558856725692749, "learning_rate": 1.4860975609756098e-05, "loss": 0.0396, "step": 63210 }, { "epoch": 0.7709146341463414, "grad_norm": 0.38449355959892273, "learning_rate": 1.4860569105691058e-05, "loss": 0.0327, "step": 63215 }, { "epoch": 0.7709756097560976, "grad_norm": 0.6794901490211487, "learning_rate": 1.4860162601626018e-05, "loss": 0.0471, "step": 63220 }, { "epoch": 0.7710365853658536, "grad_norm": 0.8440011143684387, "learning_rate": 1.4859756097560978e-05, "loss": 0.065, "step": 63225 }, { "epoch": 0.7710975609756098, "grad_norm": 0.34460902214050293, "learning_rate": 1.4859349593495936e-05, "loss": 0.0299, "step": 63230 }, { "epoch": 0.7711585365853658, "grad_norm": 0.643135666847229, "learning_rate": 1.4858943089430894e-05, "loss": 0.0551, "step": 63235 }, { "epoch": 0.771219512195122, "grad_norm": 0.23260323703289032, "learning_rate": 1.4858536585365854e-05, "loss": 0.0354, "step": 63240 }, { "epoch": 0.771280487804878, "grad_norm": 0.45751988887786865, "learning_rate": 1.4858130081300814e-05, "loss": 0.0356, "step": 63245 }, { "epoch": 0.7713414634146342, "grad_norm": 0.8571345806121826, "learning_rate": 1.4857723577235774e-05, "loss": 0.0666, "step": 63250 }, { "epoch": 0.7714024390243902, "grad_norm": 2.627981185913086, "learning_rate": 1.4857317073170733e-05, "loss": 0.0664, "step": 63255 }, { "epoch": 0.7714634146341464, "grad_norm": 0.5944331288337708, "learning_rate": 1.4856910569105692e-05, "loss": 0.043, "step": 63260 }, { "epoch": 0.7715243902439024, "grad_norm": 0.7311446666717529, "learning_rate": 1.4856504065040651e-05, "loss": 0.0538, "step": 63265 }, { "epoch": 0.7715853658536586, "grad_norm": 0.4784960448741913, "learning_rate": 1.4856097560975611e-05, "loss": 0.0466, "step": 63270 }, { "epoch": 0.7716463414634146, "grad_norm": 0.8062016367912292, "learning_rate": 1.4855691056910571e-05, "loss": 0.0516, "step": 63275 }, { "epoch": 0.7717073170731708, "grad_norm": 0.764170229434967, "learning_rate": 1.4855284552845531e-05, "loss": 0.0381, "step": 63280 }, { "epoch": 0.7717682926829268, "grad_norm": 0.5692154169082642, "learning_rate": 1.485487804878049e-05, "loss": 0.0373, "step": 63285 }, { "epoch": 0.771829268292683, "grad_norm": 1.3742609024047852, "learning_rate": 1.4854471544715447e-05, "loss": 0.0533, "step": 63290 }, { "epoch": 0.771890243902439, "grad_norm": 0.3286594748497009, "learning_rate": 1.4854065040650407e-05, "loss": 0.046, "step": 63295 }, { "epoch": 0.7719512195121951, "grad_norm": 1.2681106328964233, "learning_rate": 1.4853658536585367e-05, "loss": 0.0657, "step": 63300 }, { "epoch": 0.7720121951219512, "grad_norm": 0.23318275809288025, "learning_rate": 1.4853252032520327e-05, "loss": 0.0717, "step": 63305 }, { "epoch": 0.7720731707317073, "grad_norm": 0.8027530312538147, "learning_rate": 1.4852845528455286e-05, "loss": 0.0495, "step": 63310 }, { "epoch": 0.7721341463414634, "grad_norm": 0.5621241331100464, "learning_rate": 1.4852439024390246e-05, "loss": 0.0331, "step": 63315 }, { "epoch": 0.7721951219512195, "grad_norm": 0.7503142356872559, "learning_rate": 1.4852032520325204e-05, "loss": 0.0371, "step": 63320 }, { "epoch": 0.7722560975609756, "grad_norm": 0.5770863890647888, "learning_rate": 1.4851626016260163e-05, "loss": 0.0545, "step": 63325 }, { "epoch": 0.7723170731707317, "grad_norm": 0.530434787273407, "learning_rate": 1.4851219512195122e-05, "loss": 0.0609, "step": 63330 }, { "epoch": 0.7723780487804878, "grad_norm": 1.4151191711425781, "learning_rate": 1.4850813008130082e-05, "loss": 0.0313, "step": 63335 }, { "epoch": 0.7724390243902439, "grad_norm": 0.3792288303375244, "learning_rate": 1.4850406504065042e-05, "loss": 0.0444, "step": 63340 }, { "epoch": 0.7725, "grad_norm": 0.5245273113250732, "learning_rate": 1.4850000000000002e-05, "loss": 0.0462, "step": 63345 }, { "epoch": 0.7725609756097561, "grad_norm": 0.6309052109718323, "learning_rate": 1.484959349593496e-05, "loss": 0.0397, "step": 63350 }, { "epoch": 0.7726219512195122, "grad_norm": 0.18097203969955444, "learning_rate": 1.484918699186992e-05, "loss": 0.0406, "step": 63355 }, { "epoch": 0.7726829268292683, "grad_norm": 0.7300744652748108, "learning_rate": 1.484878048780488e-05, "loss": 0.0484, "step": 63360 }, { "epoch": 0.7727439024390244, "grad_norm": 0.5862962007522583, "learning_rate": 1.484837398373984e-05, "loss": 0.0998, "step": 63365 }, { "epoch": 0.7728048780487805, "grad_norm": 0.37606269121170044, "learning_rate": 1.48479674796748e-05, "loss": 0.0633, "step": 63370 }, { "epoch": 0.7728658536585366, "grad_norm": 0.9665565490722656, "learning_rate": 1.4847560975609759e-05, "loss": 0.0654, "step": 63375 }, { "epoch": 0.7729268292682927, "grad_norm": 1.226948857307434, "learning_rate": 1.4847154471544715e-05, "loss": 0.0707, "step": 63380 }, { "epoch": 0.7729878048780487, "grad_norm": 0.6709516048431396, "learning_rate": 1.4846747967479675e-05, "loss": 0.0475, "step": 63385 }, { "epoch": 0.7730487804878049, "grad_norm": 0.625213623046875, "learning_rate": 1.4846341463414635e-05, "loss": 0.039, "step": 63390 }, { "epoch": 0.7731097560975609, "grad_norm": 0.7118530869483948, "learning_rate": 1.4845934959349595e-05, "loss": 0.0286, "step": 63395 }, { "epoch": 0.7731707317073171, "grad_norm": 0.32795315980911255, "learning_rate": 1.4845528455284555e-05, "loss": 0.0436, "step": 63400 }, { "epoch": 0.7732317073170731, "grad_norm": 0.8758181929588318, "learning_rate": 1.4845121951219515e-05, "loss": 0.0404, "step": 63405 }, { "epoch": 0.7732926829268293, "grad_norm": 3.5412871837615967, "learning_rate": 1.4844715447154473e-05, "loss": 0.0427, "step": 63410 }, { "epoch": 0.7733536585365853, "grad_norm": 0.5928984880447388, "learning_rate": 1.4844308943089431e-05, "loss": 0.046, "step": 63415 }, { "epoch": 0.7734146341463415, "grad_norm": 0.716495931148529, "learning_rate": 1.484390243902439e-05, "loss": 0.0674, "step": 63420 }, { "epoch": 0.7734756097560975, "grad_norm": 0.5802934169769287, "learning_rate": 1.484349593495935e-05, "loss": 0.0587, "step": 63425 }, { "epoch": 0.7735365853658537, "grad_norm": 0.5461342334747314, "learning_rate": 1.484308943089431e-05, "loss": 0.053, "step": 63430 }, { "epoch": 0.7735975609756097, "grad_norm": 0.08476750552654266, "learning_rate": 1.484268292682927e-05, "loss": 0.0241, "step": 63435 }, { "epoch": 0.7736585365853659, "grad_norm": 0.481576532125473, "learning_rate": 1.4842276422764228e-05, "loss": 0.0617, "step": 63440 }, { "epoch": 0.7737195121951219, "grad_norm": 0.34648293256759644, "learning_rate": 1.4841869918699188e-05, "loss": 0.0739, "step": 63445 }, { "epoch": 0.7737804878048781, "grad_norm": 0.5899317860603333, "learning_rate": 1.4841463414634148e-05, "loss": 0.0438, "step": 63450 }, { "epoch": 0.7738414634146341, "grad_norm": 0.4777681529521942, "learning_rate": 1.4841056910569108e-05, "loss": 0.0435, "step": 63455 }, { "epoch": 0.7739024390243903, "grad_norm": 0.41398629546165466, "learning_rate": 1.4840650406504068e-05, "loss": 0.0348, "step": 63460 }, { "epoch": 0.7739634146341463, "grad_norm": 0.5557594895362854, "learning_rate": 1.4840243902439027e-05, "loss": 0.043, "step": 63465 }, { "epoch": 0.7740243902439025, "grad_norm": 0.7684390544891357, "learning_rate": 1.4839837398373984e-05, "loss": 0.0424, "step": 63470 }, { "epoch": 0.7740853658536585, "grad_norm": 0.39593979716300964, "learning_rate": 1.4839430894308944e-05, "loss": 0.0362, "step": 63475 }, { "epoch": 0.7741463414634147, "grad_norm": 0.38352274894714355, "learning_rate": 1.4839024390243903e-05, "loss": 0.0384, "step": 63480 }, { "epoch": 0.7742073170731707, "grad_norm": 0.5350298285484314, "learning_rate": 1.4838617886178863e-05, "loss": 0.0506, "step": 63485 }, { "epoch": 0.7742682926829269, "grad_norm": 0.5734763741493225, "learning_rate": 1.4838211382113823e-05, "loss": 0.064, "step": 63490 }, { "epoch": 0.7743292682926829, "grad_norm": 0.3721170425415039, "learning_rate": 1.4837804878048783e-05, "loss": 0.0308, "step": 63495 }, { "epoch": 0.774390243902439, "grad_norm": 0.41035401821136475, "learning_rate": 1.483739837398374e-05, "loss": 0.043, "step": 63500 }, { "epoch": 0.7744512195121951, "grad_norm": 0.34042373299598694, "learning_rate": 1.48369918699187e-05, "loss": 0.0321, "step": 63505 }, { "epoch": 0.7745121951219512, "grad_norm": 0.36376792192459106, "learning_rate": 1.4836585365853659e-05, "loss": 0.044, "step": 63510 }, { "epoch": 0.7745731707317073, "grad_norm": 0.4440363943576813, "learning_rate": 1.4836178861788619e-05, "loss": 0.0404, "step": 63515 }, { "epoch": 0.7746341463414634, "grad_norm": 0.4372484087944031, "learning_rate": 1.4835772357723579e-05, "loss": 0.0317, "step": 63520 }, { "epoch": 0.7746951219512195, "grad_norm": 0.9808812141418457, "learning_rate": 1.4835365853658538e-05, "loss": 0.055, "step": 63525 }, { "epoch": 0.7747560975609756, "grad_norm": 0.42185598611831665, "learning_rate": 1.4834959349593497e-05, "loss": 0.0468, "step": 63530 }, { "epoch": 0.7748170731707317, "grad_norm": 0.5520546436309814, "learning_rate": 1.4834552845528456e-05, "loss": 0.0545, "step": 63535 }, { "epoch": 0.7748780487804878, "grad_norm": 0.5088343024253845, "learning_rate": 1.4834146341463416e-05, "loss": 0.046, "step": 63540 }, { "epoch": 0.7749390243902439, "grad_norm": 0.43959638476371765, "learning_rate": 1.4833739837398376e-05, "loss": 0.0365, "step": 63545 }, { "epoch": 0.775, "grad_norm": 0.27038562297821045, "learning_rate": 1.4833333333333336e-05, "loss": 0.0274, "step": 63550 }, { "epoch": 0.7750609756097561, "grad_norm": 0.6420866847038269, "learning_rate": 1.4832926829268296e-05, "loss": 0.039, "step": 63555 }, { "epoch": 0.7751219512195122, "grad_norm": 0.34320640563964844, "learning_rate": 1.4832520325203252e-05, "loss": 0.0332, "step": 63560 }, { "epoch": 0.7751829268292683, "grad_norm": 1.0155689716339111, "learning_rate": 1.4832113821138212e-05, "loss": 0.0401, "step": 63565 }, { "epoch": 0.7752439024390244, "grad_norm": 0.49304813146591187, "learning_rate": 1.4831707317073172e-05, "loss": 0.0447, "step": 63570 }, { "epoch": 0.7753048780487805, "grad_norm": 0.8896938562393188, "learning_rate": 1.4831300813008132e-05, "loss": 0.0429, "step": 63575 }, { "epoch": 0.7753658536585366, "grad_norm": 0.425847589969635, "learning_rate": 1.4830894308943091e-05, "loss": 0.0469, "step": 63580 }, { "epoch": 0.7754268292682926, "grad_norm": 0.6069380044937134, "learning_rate": 1.4830487804878051e-05, "loss": 0.0711, "step": 63585 }, { "epoch": 0.7754878048780488, "grad_norm": 1.329607367515564, "learning_rate": 1.4830081300813008e-05, "loss": 0.0477, "step": 63590 }, { "epoch": 0.7755487804878048, "grad_norm": 0.3837648332118988, "learning_rate": 1.4829674796747967e-05, "loss": 0.0356, "step": 63595 }, { "epoch": 0.775609756097561, "grad_norm": 2.17498779296875, "learning_rate": 1.4829268292682927e-05, "loss": 0.0437, "step": 63600 }, { "epoch": 0.775670731707317, "grad_norm": 0.6008740067481995, "learning_rate": 1.4828861788617887e-05, "loss": 0.0259, "step": 63605 }, { "epoch": 0.7757317073170732, "grad_norm": 2.113502025604248, "learning_rate": 1.4828455284552847e-05, "loss": 0.0361, "step": 63610 }, { "epoch": 0.7757926829268292, "grad_norm": 0.385675847530365, "learning_rate": 1.4828048780487807e-05, "loss": 0.0363, "step": 63615 }, { "epoch": 0.7758536585365854, "grad_norm": 1.5631483793258667, "learning_rate": 1.4827642276422765e-05, "loss": 0.0679, "step": 63620 }, { "epoch": 0.7759146341463414, "grad_norm": 0.6228050589561462, "learning_rate": 1.4827235772357725e-05, "loss": 0.029, "step": 63625 }, { "epoch": 0.7759756097560976, "grad_norm": 0.45351964235305786, "learning_rate": 1.4826829268292685e-05, "loss": 0.0337, "step": 63630 }, { "epoch": 0.7760365853658536, "grad_norm": 0.7999324798583984, "learning_rate": 1.4826422764227644e-05, "loss": 0.086, "step": 63635 }, { "epoch": 0.7760975609756098, "grad_norm": 0.6955640316009521, "learning_rate": 1.4826016260162604e-05, "loss": 0.0272, "step": 63640 }, { "epoch": 0.7761585365853658, "grad_norm": 0.8050079345703125, "learning_rate": 1.4825609756097562e-05, "loss": 0.0469, "step": 63645 }, { "epoch": 0.776219512195122, "grad_norm": 0.8146944046020508, "learning_rate": 1.482520325203252e-05, "loss": 0.0886, "step": 63650 }, { "epoch": 0.776280487804878, "grad_norm": 0.419130802154541, "learning_rate": 1.482479674796748e-05, "loss": 0.0388, "step": 63655 }, { "epoch": 0.7763414634146342, "grad_norm": 0.5736701488494873, "learning_rate": 1.482439024390244e-05, "loss": 0.0367, "step": 63660 }, { "epoch": 0.7764024390243902, "grad_norm": 0.8088759779930115, "learning_rate": 1.48239837398374e-05, "loss": 0.0516, "step": 63665 }, { "epoch": 0.7764634146341464, "grad_norm": 1.1867271661758423, "learning_rate": 1.482357723577236e-05, "loss": 0.0667, "step": 63670 }, { "epoch": 0.7765243902439024, "grad_norm": 0.4295313358306885, "learning_rate": 1.482317073170732e-05, "loss": 0.0496, "step": 63675 }, { "epoch": 0.7765853658536586, "grad_norm": 0.6041161417961121, "learning_rate": 1.4822764227642276e-05, "loss": 0.0679, "step": 63680 }, { "epoch": 0.7766463414634146, "grad_norm": 0.457567423582077, "learning_rate": 1.4822357723577236e-05, "loss": 0.0289, "step": 63685 }, { "epoch": 0.7767073170731708, "grad_norm": 0.5080853700637817, "learning_rate": 1.4821951219512196e-05, "loss": 0.0259, "step": 63690 }, { "epoch": 0.7767682926829268, "grad_norm": 0.5748100876808167, "learning_rate": 1.4821544715447155e-05, "loss": 0.0553, "step": 63695 }, { "epoch": 0.776829268292683, "grad_norm": 1.1860237121582031, "learning_rate": 1.4821138211382115e-05, "loss": 0.0607, "step": 63700 }, { "epoch": 0.776890243902439, "grad_norm": 0.45821353793144226, "learning_rate": 1.4820731707317075e-05, "loss": 0.0395, "step": 63705 }, { "epoch": 0.7769512195121951, "grad_norm": 0.2535533010959625, "learning_rate": 1.4820325203252033e-05, "loss": 0.0406, "step": 63710 }, { "epoch": 0.7770121951219512, "grad_norm": 0.8325862288475037, "learning_rate": 1.4819918699186993e-05, "loss": 0.0577, "step": 63715 }, { "epoch": 0.7770731707317073, "grad_norm": 0.41477862000465393, "learning_rate": 1.4819512195121953e-05, "loss": 0.0611, "step": 63720 }, { "epoch": 0.7771341463414634, "grad_norm": 0.4110777974128723, "learning_rate": 1.4819105691056913e-05, "loss": 0.0673, "step": 63725 }, { "epoch": 0.7771951219512195, "grad_norm": 0.1848241537809372, "learning_rate": 1.4818699186991873e-05, "loss": 0.0395, "step": 63730 }, { "epoch": 0.7772560975609756, "grad_norm": 0.5049353241920471, "learning_rate": 1.481829268292683e-05, "loss": 0.0458, "step": 63735 }, { "epoch": 0.7773170731707317, "grad_norm": 1.3311184644699097, "learning_rate": 1.4817886178861789e-05, "loss": 0.0656, "step": 63740 }, { "epoch": 0.7773780487804878, "grad_norm": 0.6829493641853333, "learning_rate": 1.4817479674796749e-05, "loss": 0.0364, "step": 63745 }, { "epoch": 0.7774390243902439, "grad_norm": 0.5432010293006897, "learning_rate": 1.4817073170731708e-05, "loss": 0.0355, "step": 63750 }, { "epoch": 0.7775, "grad_norm": 0.993681788444519, "learning_rate": 1.4816666666666668e-05, "loss": 0.0627, "step": 63755 }, { "epoch": 0.7775609756097561, "grad_norm": 0.48685672879219055, "learning_rate": 1.4816260162601628e-05, "loss": 0.0266, "step": 63760 }, { "epoch": 0.7776219512195122, "grad_norm": 0.31109243631362915, "learning_rate": 1.4815853658536588e-05, "loss": 0.0484, "step": 63765 }, { "epoch": 0.7776829268292683, "grad_norm": 2.1963436603546143, "learning_rate": 1.4815447154471544e-05, "loss": 0.0406, "step": 63770 }, { "epoch": 0.7777439024390244, "grad_norm": 0.659705400466919, "learning_rate": 1.4815040650406504e-05, "loss": 0.0535, "step": 63775 }, { "epoch": 0.7778048780487805, "grad_norm": 0.6722060441970825, "learning_rate": 1.4814634146341464e-05, "loss": 0.0594, "step": 63780 }, { "epoch": 0.7778658536585366, "grad_norm": 0.4335981011390686, "learning_rate": 1.4814227642276424e-05, "loss": 0.0385, "step": 63785 }, { "epoch": 0.7779268292682927, "grad_norm": 1.0860178470611572, "learning_rate": 1.4813821138211384e-05, "loss": 0.0859, "step": 63790 }, { "epoch": 0.7779878048780487, "grad_norm": 0.465966135263443, "learning_rate": 1.4813414634146343e-05, "loss": 0.053, "step": 63795 }, { "epoch": 0.7780487804878049, "grad_norm": 0.8442513942718506, "learning_rate": 1.4813008130081302e-05, "loss": 0.0465, "step": 63800 }, { "epoch": 0.778109756097561, "grad_norm": 0.4217986464500427, "learning_rate": 1.4812601626016261e-05, "loss": 0.0327, "step": 63805 }, { "epoch": 0.7781707317073171, "grad_norm": 0.6097002625465393, "learning_rate": 1.4812195121951221e-05, "loss": 0.0609, "step": 63810 }, { "epoch": 0.7782317073170731, "grad_norm": 0.6114808320999146, "learning_rate": 1.4811788617886181e-05, "loss": 0.0356, "step": 63815 }, { "epoch": 0.7782926829268293, "grad_norm": 0.47720104455947876, "learning_rate": 1.481138211382114e-05, "loss": 0.0914, "step": 63820 }, { "epoch": 0.7783536585365853, "grad_norm": 0.5403311848640442, "learning_rate": 1.4810975609756099e-05, "loss": 0.0309, "step": 63825 }, { "epoch": 0.7784146341463415, "grad_norm": 0.6503979563713074, "learning_rate": 1.4810569105691057e-05, "loss": 0.056, "step": 63830 }, { "epoch": 0.7784756097560975, "grad_norm": 0.9489801526069641, "learning_rate": 1.4810162601626017e-05, "loss": 0.0551, "step": 63835 }, { "epoch": 0.7785365853658537, "grad_norm": 0.2057526856660843, "learning_rate": 1.4809756097560977e-05, "loss": 0.043, "step": 63840 }, { "epoch": 0.7785975609756097, "grad_norm": 1.1193008422851562, "learning_rate": 1.4809349593495937e-05, "loss": 0.0581, "step": 63845 }, { "epoch": 0.7786585365853659, "grad_norm": 0.599567174911499, "learning_rate": 1.4808943089430896e-05, "loss": 0.083, "step": 63850 }, { "epoch": 0.7787195121951219, "grad_norm": 0.4766567051410675, "learning_rate": 1.4808536585365856e-05, "loss": 0.0339, "step": 63855 }, { "epoch": 0.7787804878048781, "grad_norm": 0.30215001106262207, "learning_rate": 1.4808130081300813e-05, "loss": 0.0476, "step": 63860 }, { "epoch": 0.7788414634146341, "grad_norm": 0.6538419723510742, "learning_rate": 1.4807723577235772e-05, "loss": 0.0426, "step": 63865 }, { "epoch": 0.7789024390243903, "grad_norm": 0.6994954347610474, "learning_rate": 1.4807317073170732e-05, "loss": 0.047, "step": 63870 }, { "epoch": 0.7789634146341463, "grad_norm": 0.6861437559127808, "learning_rate": 1.4806910569105692e-05, "loss": 0.05, "step": 63875 }, { "epoch": 0.7790243902439025, "grad_norm": 0.8213483691215515, "learning_rate": 1.4806504065040652e-05, "loss": 0.0527, "step": 63880 }, { "epoch": 0.7790853658536585, "grad_norm": 0.8103446960449219, "learning_rate": 1.4806097560975612e-05, "loss": 0.0663, "step": 63885 }, { "epoch": 0.7791463414634147, "grad_norm": 0.4235619902610779, "learning_rate": 1.480569105691057e-05, "loss": 0.0373, "step": 63890 }, { "epoch": 0.7792073170731707, "grad_norm": 0.8386130928993225, "learning_rate": 1.480528455284553e-05, "loss": 0.1049, "step": 63895 }, { "epoch": 0.7792682926829269, "grad_norm": 0.4474402368068695, "learning_rate": 1.480487804878049e-05, "loss": 0.0561, "step": 63900 }, { "epoch": 0.7793292682926829, "grad_norm": 0.43085578083992004, "learning_rate": 1.480447154471545e-05, "loss": 0.0395, "step": 63905 }, { "epoch": 0.779390243902439, "grad_norm": 0.4218841791152954, "learning_rate": 1.4804065040650407e-05, "loss": 0.0442, "step": 63910 }, { "epoch": 0.7794512195121951, "grad_norm": 0.6769243478775024, "learning_rate": 1.4803658536585367e-05, "loss": 0.0494, "step": 63915 }, { "epoch": 0.7795121951219512, "grad_norm": 0.5442792773246765, "learning_rate": 1.4803252032520325e-05, "loss": 0.0437, "step": 63920 }, { "epoch": 0.7795731707317073, "grad_norm": 0.5460478663444519, "learning_rate": 1.4802845528455285e-05, "loss": 0.0583, "step": 63925 }, { "epoch": 0.7796341463414634, "grad_norm": 0.7634674906730652, "learning_rate": 1.4802439024390245e-05, "loss": 0.0449, "step": 63930 }, { "epoch": 0.7796951219512195, "grad_norm": 0.6083731055259705, "learning_rate": 1.4802032520325205e-05, "loss": 0.0309, "step": 63935 }, { "epoch": 0.7797560975609756, "grad_norm": 0.4484919011592865, "learning_rate": 1.4801626016260165e-05, "loss": 0.0763, "step": 63940 }, { "epoch": 0.7798170731707317, "grad_norm": 0.40465790033340454, "learning_rate": 1.4801219512195125e-05, "loss": 0.0503, "step": 63945 }, { "epoch": 0.7798780487804878, "grad_norm": 0.503860354423523, "learning_rate": 1.4800813008130081e-05, "loss": 0.0544, "step": 63950 }, { "epoch": 0.7799390243902439, "grad_norm": 1.4368817806243896, "learning_rate": 1.480040650406504e-05, "loss": 0.0834, "step": 63955 }, { "epoch": 0.78, "grad_norm": 0.4728098511695862, "learning_rate": 1.48e-05, "loss": 0.059, "step": 63960 }, { "epoch": 0.7800609756097561, "grad_norm": 1.0383886098861694, "learning_rate": 1.479959349593496e-05, "loss": 0.0385, "step": 63965 }, { "epoch": 0.7801219512195122, "grad_norm": 0.33956030011177063, "learning_rate": 1.479918699186992e-05, "loss": 0.0429, "step": 63970 }, { "epoch": 0.7801829268292683, "grad_norm": 0.6827002167701721, "learning_rate": 1.479878048780488e-05, "loss": 0.0479, "step": 63975 }, { "epoch": 0.7802439024390244, "grad_norm": 0.4437152147293091, "learning_rate": 1.4798373983739838e-05, "loss": 0.0259, "step": 63980 }, { "epoch": 0.7803048780487805, "grad_norm": 2.5738587379455566, "learning_rate": 1.4797967479674798e-05, "loss": 0.0574, "step": 63985 }, { "epoch": 0.7803658536585366, "grad_norm": 0.4542979300022125, "learning_rate": 1.4797560975609758e-05, "loss": 0.028, "step": 63990 }, { "epoch": 0.7804268292682927, "grad_norm": 0.5122165083885193, "learning_rate": 1.4797154471544718e-05, "loss": 0.0432, "step": 63995 }, { "epoch": 0.7804878048780488, "grad_norm": 0.37740081548690796, "learning_rate": 1.4796747967479676e-05, "loss": 0.082, "step": 64000 }, { "epoch": 0.7805487804878048, "grad_norm": 0.5544943809509277, "learning_rate": 1.4796341463414636e-05, "loss": 0.0463, "step": 64005 }, { "epoch": 0.780609756097561, "grad_norm": 0.7136656045913696, "learning_rate": 1.4795934959349594e-05, "loss": 0.0573, "step": 64010 }, { "epoch": 0.780670731707317, "grad_norm": 1.419433355331421, "learning_rate": 1.4795528455284554e-05, "loss": 0.0389, "step": 64015 }, { "epoch": 0.7807317073170732, "grad_norm": 0.8512787222862244, "learning_rate": 1.4795121951219513e-05, "loss": 0.0372, "step": 64020 }, { "epoch": 0.7807926829268292, "grad_norm": 0.22496873140335083, "learning_rate": 1.4794715447154473e-05, "loss": 0.0329, "step": 64025 }, { "epoch": 0.7808536585365854, "grad_norm": 1.2491744756698608, "learning_rate": 1.4794308943089433e-05, "loss": 0.0469, "step": 64030 }, { "epoch": 0.7809146341463414, "grad_norm": 0.3921971321105957, "learning_rate": 1.4793902439024393e-05, "loss": 0.0596, "step": 64035 }, { "epoch": 0.7809756097560976, "grad_norm": 1.167541742324829, "learning_rate": 1.479349593495935e-05, "loss": 0.0506, "step": 64040 }, { "epoch": 0.7810365853658536, "grad_norm": 0.7610942125320435, "learning_rate": 1.4793089430894309e-05, "loss": 0.0278, "step": 64045 }, { "epoch": 0.7810975609756098, "grad_norm": 0.639011561870575, "learning_rate": 1.4792682926829269e-05, "loss": 0.0629, "step": 64050 }, { "epoch": 0.7811585365853658, "grad_norm": 0.3101983666419983, "learning_rate": 1.4792276422764229e-05, "loss": 0.0278, "step": 64055 }, { "epoch": 0.781219512195122, "grad_norm": 0.7594683766365051, "learning_rate": 1.4791869918699189e-05, "loss": 0.1026, "step": 64060 }, { "epoch": 0.781280487804878, "grad_norm": 0.29263389110565186, "learning_rate": 1.4791463414634148e-05, "loss": 0.0451, "step": 64065 }, { "epoch": 0.7813414634146342, "grad_norm": 0.4369043707847595, "learning_rate": 1.4791056910569107e-05, "loss": 0.0258, "step": 64070 }, { "epoch": 0.7814024390243902, "grad_norm": 0.7511140704154968, "learning_rate": 1.4790650406504066e-05, "loss": 0.0492, "step": 64075 }, { "epoch": 0.7814634146341464, "grad_norm": 0.5660197734832764, "learning_rate": 1.4790243902439026e-05, "loss": 0.0548, "step": 64080 }, { "epoch": 0.7815243902439024, "grad_norm": 1.1027106046676636, "learning_rate": 1.4789837398373984e-05, "loss": 0.046, "step": 64085 }, { "epoch": 0.7815853658536586, "grad_norm": 0.4739224910736084, "learning_rate": 1.4789430894308944e-05, "loss": 0.0334, "step": 64090 }, { "epoch": 0.7816463414634146, "grad_norm": 0.41004234552383423, "learning_rate": 1.4789024390243904e-05, "loss": 0.0367, "step": 64095 }, { "epoch": 0.7817073170731708, "grad_norm": 0.5631633400917053, "learning_rate": 1.4788617886178862e-05, "loss": 0.0525, "step": 64100 }, { "epoch": 0.7817682926829268, "grad_norm": 0.33292070031166077, "learning_rate": 1.4788211382113822e-05, "loss": 0.0414, "step": 64105 }, { "epoch": 0.781829268292683, "grad_norm": 0.5439856648445129, "learning_rate": 1.4787804878048782e-05, "loss": 0.0516, "step": 64110 }, { "epoch": 0.781890243902439, "grad_norm": 0.3444030284881592, "learning_rate": 1.4787398373983742e-05, "loss": 0.0323, "step": 64115 }, { "epoch": 0.7819512195121952, "grad_norm": 1.7313518524169922, "learning_rate": 1.4786991869918701e-05, "loss": 0.0694, "step": 64120 }, { "epoch": 0.7820121951219512, "grad_norm": 0.8388106822967529, "learning_rate": 1.4786585365853661e-05, "loss": 0.042, "step": 64125 }, { "epoch": 0.7820731707317073, "grad_norm": 1.4298605918884277, "learning_rate": 1.4786178861788618e-05, "loss": 0.0731, "step": 64130 }, { "epoch": 0.7821341463414634, "grad_norm": 0.767032265663147, "learning_rate": 1.4785772357723577e-05, "loss": 0.0584, "step": 64135 }, { "epoch": 0.7821951219512195, "grad_norm": 0.4096050560474396, "learning_rate": 1.4785365853658537e-05, "loss": 0.052, "step": 64140 }, { "epoch": 0.7822560975609756, "grad_norm": 0.6690654158592224, "learning_rate": 1.4784959349593497e-05, "loss": 0.0677, "step": 64145 }, { "epoch": 0.7823170731707317, "grad_norm": 0.5332813858985901, "learning_rate": 1.4784552845528457e-05, "loss": 0.0611, "step": 64150 }, { "epoch": 0.7823780487804878, "grad_norm": 0.3212718069553375, "learning_rate": 1.4784146341463417e-05, "loss": 0.048, "step": 64155 }, { "epoch": 0.7824390243902439, "grad_norm": 0.3736465573310852, "learning_rate": 1.4783739837398375e-05, "loss": 0.0212, "step": 64160 }, { "epoch": 0.7825, "grad_norm": 0.691430389881134, "learning_rate": 1.4783333333333335e-05, "loss": 0.0522, "step": 64165 }, { "epoch": 0.7825609756097561, "grad_norm": 0.48082637786865234, "learning_rate": 1.4782926829268294e-05, "loss": 0.0396, "step": 64170 }, { "epoch": 0.7826219512195122, "grad_norm": 1.1383137702941895, "learning_rate": 1.4782520325203253e-05, "loss": 0.0356, "step": 64175 }, { "epoch": 0.7826829268292683, "grad_norm": 0.6897025108337402, "learning_rate": 1.4782113821138212e-05, "loss": 0.0615, "step": 64180 }, { "epoch": 0.7827439024390244, "grad_norm": 1.5676032304763794, "learning_rate": 1.4781707317073172e-05, "loss": 0.0468, "step": 64185 }, { "epoch": 0.7828048780487805, "grad_norm": 3.3837337493896484, "learning_rate": 1.478130081300813e-05, "loss": 0.0407, "step": 64190 }, { "epoch": 0.7828658536585366, "grad_norm": 0.4323488473892212, "learning_rate": 1.478089430894309e-05, "loss": 0.0393, "step": 64195 }, { "epoch": 0.7829268292682927, "grad_norm": 0.9712342619895935, "learning_rate": 1.478048780487805e-05, "loss": 0.0688, "step": 64200 }, { "epoch": 0.7829878048780488, "grad_norm": 0.7894065976142883, "learning_rate": 1.478008130081301e-05, "loss": 0.0868, "step": 64205 }, { "epoch": 0.7830487804878049, "grad_norm": 0.8664339780807495, "learning_rate": 1.477967479674797e-05, "loss": 0.0496, "step": 64210 }, { "epoch": 0.783109756097561, "grad_norm": 0.5353739261627197, "learning_rate": 1.477926829268293e-05, "loss": 0.09, "step": 64215 }, { "epoch": 0.7831707317073171, "grad_norm": 0.3813663125038147, "learning_rate": 1.4778861788617886e-05, "loss": 0.0309, "step": 64220 }, { "epoch": 0.7832317073170731, "grad_norm": 0.7290534377098083, "learning_rate": 1.4778455284552846e-05, "loss": 0.0304, "step": 64225 }, { "epoch": 0.7832926829268293, "grad_norm": 0.9959058165550232, "learning_rate": 1.4778048780487806e-05, "loss": 0.0422, "step": 64230 }, { "epoch": 0.7833536585365853, "grad_norm": 0.7706992626190186, "learning_rate": 1.4777642276422765e-05, "loss": 0.0358, "step": 64235 }, { "epoch": 0.7834146341463415, "grad_norm": 0.5338863730430603, "learning_rate": 1.4777235772357725e-05, "loss": 0.0429, "step": 64240 }, { "epoch": 0.7834756097560975, "grad_norm": 1.2133831977844238, "learning_rate": 1.4776829268292685e-05, "loss": 0.084, "step": 64245 }, { "epoch": 0.7835365853658537, "grad_norm": 0.9250991344451904, "learning_rate": 1.4776422764227643e-05, "loss": 0.0504, "step": 64250 }, { "epoch": 0.7835975609756097, "grad_norm": 0.5708081722259521, "learning_rate": 1.4776016260162603e-05, "loss": 0.0496, "step": 64255 }, { "epoch": 0.7836585365853659, "grad_norm": 0.702628493309021, "learning_rate": 1.4775609756097563e-05, "loss": 0.0506, "step": 64260 }, { "epoch": 0.7837195121951219, "grad_norm": 0.29521629214286804, "learning_rate": 1.4775203252032521e-05, "loss": 0.0259, "step": 64265 }, { "epoch": 0.7837804878048781, "grad_norm": 0.21206718683242798, "learning_rate": 1.477479674796748e-05, "loss": 0.0371, "step": 64270 }, { "epoch": 0.7838414634146341, "grad_norm": 0.9687151908874512, "learning_rate": 1.477439024390244e-05, "loss": 0.0802, "step": 64275 }, { "epoch": 0.7839024390243903, "grad_norm": 0.7801573276519775, "learning_rate": 1.4773983739837399e-05, "loss": 0.0392, "step": 64280 }, { "epoch": 0.7839634146341463, "grad_norm": 0.24985909461975098, "learning_rate": 1.4773577235772359e-05, "loss": 0.0245, "step": 64285 }, { "epoch": 0.7840243902439025, "grad_norm": 1.1473362445831299, "learning_rate": 1.4773170731707318e-05, "loss": 0.0721, "step": 64290 }, { "epoch": 0.7840853658536585, "grad_norm": 0.5917689800262451, "learning_rate": 1.4772764227642278e-05, "loss": 0.0475, "step": 64295 }, { "epoch": 0.7841463414634147, "grad_norm": 0.2974187433719635, "learning_rate": 1.4772357723577238e-05, "loss": 0.034, "step": 64300 }, { "epoch": 0.7842073170731707, "grad_norm": 0.38723084330558777, "learning_rate": 1.4771951219512198e-05, "loss": 0.0489, "step": 64305 }, { "epoch": 0.7842682926829269, "grad_norm": 0.48815596103668213, "learning_rate": 1.4771544715447154e-05, "loss": 0.029, "step": 64310 }, { "epoch": 0.7843292682926829, "grad_norm": 0.6204236149787903, "learning_rate": 1.4771138211382114e-05, "loss": 0.0343, "step": 64315 }, { "epoch": 0.784390243902439, "grad_norm": 0.6051305532455444, "learning_rate": 1.4770731707317074e-05, "loss": 0.0438, "step": 64320 }, { "epoch": 0.7844512195121951, "grad_norm": 0.3897434175014496, "learning_rate": 1.4770325203252034e-05, "loss": 0.0635, "step": 64325 }, { "epoch": 0.7845121951219513, "grad_norm": 0.5846503973007202, "learning_rate": 1.4769918699186994e-05, "loss": 0.0512, "step": 64330 }, { "epoch": 0.7845731707317073, "grad_norm": 0.8290796875953674, "learning_rate": 1.4769512195121953e-05, "loss": 0.0493, "step": 64335 }, { "epoch": 0.7846341463414634, "grad_norm": 0.4561467170715332, "learning_rate": 1.4769105691056911e-05, "loss": 0.0531, "step": 64340 }, { "epoch": 0.7846951219512195, "grad_norm": 0.4706307351589203, "learning_rate": 1.4768699186991871e-05, "loss": 0.0451, "step": 64345 }, { "epoch": 0.7847560975609756, "grad_norm": 0.4540483057498932, "learning_rate": 1.476829268292683e-05, "loss": 0.0416, "step": 64350 }, { "epoch": 0.7848170731707317, "grad_norm": 0.6528122425079346, "learning_rate": 1.476788617886179e-05, "loss": 0.0365, "step": 64355 }, { "epoch": 0.7848780487804878, "grad_norm": 0.9775636196136475, "learning_rate": 1.4767479674796749e-05, "loss": 0.0478, "step": 64360 }, { "epoch": 0.7849390243902439, "grad_norm": 0.7050915956497192, "learning_rate": 1.4767073170731709e-05, "loss": 0.0447, "step": 64365 }, { "epoch": 0.785, "grad_norm": 1.1222683191299438, "learning_rate": 1.4766666666666667e-05, "loss": 0.1137, "step": 64370 }, { "epoch": 0.7850609756097561, "grad_norm": 3.0853779315948486, "learning_rate": 1.4766260162601627e-05, "loss": 0.0576, "step": 64375 }, { "epoch": 0.7851219512195122, "grad_norm": 0.28915804624557495, "learning_rate": 1.4765853658536587e-05, "loss": 0.043, "step": 64380 }, { "epoch": 0.7851829268292683, "grad_norm": 0.7047111988067627, "learning_rate": 1.4765447154471546e-05, "loss": 0.0536, "step": 64385 }, { "epoch": 0.7852439024390244, "grad_norm": 0.7526270151138306, "learning_rate": 1.4765040650406506e-05, "loss": 0.0663, "step": 64390 }, { "epoch": 0.7853048780487805, "grad_norm": 0.6665706634521484, "learning_rate": 1.4764634146341466e-05, "loss": 0.0463, "step": 64395 }, { "epoch": 0.7853658536585366, "grad_norm": 0.5894805192947388, "learning_rate": 1.4764227642276423e-05, "loss": 0.0595, "step": 64400 }, { "epoch": 0.7854268292682927, "grad_norm": 0.3503147065639496, "learning_rate": 1.4763821138211382e-05, "loss": 0.0579, "step": 64405 }, { "epoch": 0.7854878048780488, "grad_norm": 0.25801214575767517, "learning_rate": 1.4763414634146342e-05, "loss": 0.0499, "step": 64410 }, { "epoch": 0.7855487804878049, "grad_norm": 0.8081841468811035, "learning_rate": 1.4763008130081302e-05, "loss": 0.0301, "step": 64415 }, { "epoch": 0.785609756097561, "grad_norm": 0.6705858707427979, "learning_rate": 1.4762601626016262e-05, "loss": 0.0507, "step": 64420 }, { "epoch": 0.785670731707317, "grad_norm": 0.5939160585403442, "learning_rate": 1.4762195121951222e-05, "loss": 0.0593, "step": 64425 }, { "epoch": 0.7857317073170732, "grad_norm": 0.656147837638855, "learning_rate": 1.476178861788618e-05, "loss": 0.0349, "step": 64430 }, { "epoch": 0.7857926829268292, "grad_norm": 0.4896070957183838, "learning_rate": 1.476138211382114e-05, "loss": 0.0456, "step": 64435 }, { "epoch": 0.7858536585365854, "grad_norm": 0.5463581085205078, "learning_rate": 1.4760975609756098e-05, "loss": 0.0616, "step": 64440 }, { "epoch": 0.7859146341463414, "grad_norm": 0.3708178400993347, "learning_rate": 1.4760569105691058e-05, "loss": 0.0429, "step": 64445 }, { "epoch": 0.7859756097560976, "grad_norm": 0.7496522068977356, "learning_rate": 1.4760162601626017e-05, "loss": 0.0631, "step": 64450 }, { "epoch": 0.7860365853658536, "grad_norm": 2.703685998916626, "learning_rate": 1.4759756097560977e-05, "loss": 0.0568, "step": 64455 }, { "epoch": 0.7860975609756098, "grad_norm": 0.6966729164123535, "learning_rate": 1.4759349593495935e-05, "loss": 0.0333, "step": 64460 }, { "epoch": 0.7861585365853658, "grad_norm": 1.6947096586227417, "learning_rate": 1.4758943089430895e-05, "loss": 0.052, "step": 64465 }, { "epoch": 0.786219512195122, "grad_norm": 0.6936659812927246, "learning_rate": 1.4758536585365855e-05, "loss": 0.0431, "step": 64470 }, { "epoch": 0.786280487804878, "grad_norm": 0.6114353537559509, "learning_rate": 1.4758130081300815e-05, "loss": 0.0524, "step": 64475 }, { "epoch": 0.7863414634146342, "grad_norm": 0.41747167706489563, "learning_rate": 1.4757723577235775e-05, "loss": 0.0354, "step": 64480 }, { "epoch": 0.7864024390243902, "grad_norm": 0.6328956484794617, "learning_rate": 1.4757317073170734e-05, "loss": 0.0466, "step": 64485 }, { "epoch": 0.7864634146341464, "grad_norm": 0.33019915223121643, "learning_rate": 1.4756910569105691e-05, "loss": 0.049, "step": 64490 }, { "epoch": 0.7865243902439024, "grad_norm": 0.7786057591438293, "learning_rate": 1.475650406504065e-05, "loss": 0.0423, "step": 64495 }, { "epoch": 0.7865853658536586, "grad_norm": 0.38535210490226746, "learning_rate": 1.475609756097561e-05, "loss": 0.0442, "step": 64500 }, { "epoch": 0.7866463414634146, "grad_norm": 0.6374151110649109, "learning_rate": 1.475569105691057e-05, "loss": 0.0643, "step": 64505 }, { "epoch": 0.7867073170731708, "grad_norm": 0.9497826099395752, "learning_rate": 1.475528455284553e-05, "loss": 0.0527, "step": 64510 }, { "epoch": 0.7867682926829268, "grad_norm": 0.9140124917030334, "learning_rate": 1.475487804878049e-05, "loss": 0.0588, "step": 64515 }, { "epoch": 0.786829268292683, "grad_norm": 0.5747582912445068, "learning_rate": 1.4754471544715448e-05, "loss": 0.0463, "step": 64520 }, { "epoch": 0.786890243902439, "grad_norm": 0.6655888557434082, "learning_rate": 1.4754065040650408e-05, "loss": 0.04, "step": 64525 }, { "epoch": 0.7869512195121952, "grad_norm": 0.4744865298271179, "learning_rate": 1.4753658536585366e-05, "loss": 0.0326, "step": 64530 }, { "epoch": 0.7870121951219512, "grad_norm": 0.274505078792572, "learning_rate": 1.4753252032520326e-05, "loss": 0.0281, "step": 64535 }, { "epoch": 0.7870731707317074, "grad_norm": 0.781217634677887, "learning_rate": 1.4752845528455286e-05, "loss": 0.0568, "step": 64540 }, { "epoch": 0.7871341463414634, "grad_norm": 0.26352736353874207, "learning_rate": 1.4752439024390246e-05, "loss": 0.0476, "step": 64545 }, { "epoch": 0.7871951219512195, "grad_norm": 0.2691512703895569, "learning_rate": 1.4752032520325204e-05, "loss": 0.0441, "step": 64550 }, { "epoch": 0.7872560975609756, "grad_norm": 0.8247342705726624, "learning_rate": 1.4751626016260163e-05, "loss": 0.0408, "step": 64555 }, { "epoch": 0.7873170731707317, "grad_norm": 0.9091023206710815, "learning_rate": 1.4751219512195123e-05, "loss": 0.0985, "step": 64560 }, { "epoch": 0.7873780487804878, "grad_norm": 0.46966856718063354, "learning_rate": 1.4750813008130083e-05, "loss": 0.0405, "step": 64565 }, { "epoch": 0.7874390243902439, "grad_norm": 0.7501252889633179, "learning_rate": 1.4750406504065043e-05, "loss": 0.0409, "step": 64570 }, { "epoch": 0.7875, "grad_norm": 0.4937984347343445, "learning_rate": 1.4750000000000003e-05, "loss": 0.0296, "step": 64575 }, { "epoch": 0.7875609756097561, "grad_norm": 0.6190078854560852, "learning_rate": 1.474959349593496e-05, "loss": 0.0336, "step": 64580 }, { "epoch": 0.7876219512195122, "grad_norm": 0.9789820909500122, "learning_rate": 1.4749186991869919e-05, "loss": 0.0491, "step": 64585 }, { "epoch": 0.7876829268292683, "grad_norm": 0.5135878920555115, "learning_rate": 1.4748780487804879e-05, "loss": 0.0242, "step": 64590 }, { "epoch": 0.7877439024390244, "grad_norm": 1.3180328607559204, "learning_rate": 1.4748373983739839e-05, "loss": 0.077, "step": 64595 }, { "epoch": 0.7878048780487805, "grad_norm": 1.0449367761611938, "learning_rate": 1.4747967479674798e-05, "loss": 0.044, "step": 64600 }, { "epoch": 0.7878658536585366, "grad_norm": 0.4600965976715088, "learning_rate": 1.4747560975609758e-05, "loss": 0.0407, "step": 64605 }, { "epoch": 0.7879268292682927, "grad_norm": 0.527977705001831, "learning_rate": 1.4747154471544716e-05, "loss": 0.0607, "step": 64610 }, { "epoch": 0.7879878048780488, "grad_norm": 0.9993494749069214, "learning_rate": 1.4746747967479675e-05, "loss": 0.0855, "step": 64615 }, { "epoch": 0.7880487804878049, "grad_norm": 0.5272454619407654, "learning_rate": 1.4746341463414634e-05, "loss": 0.0684, "step": 64620 }, { "epoch": 0.788109756097561, "grad_norm": 0.45482441782951355, "learning_rate": 1.4745934959349594e-05, "loss": 0.0494, "step": 64625 }, { "epoch": 0.7881707317073171, "grad_norm": 1.1872531175613403, "learning_rate": 1.4745528455284554e-05, "loss": 0.0822, "step": 64630 }, { "epoch": 0.7882317073170731, "grad_norm": 0.13692530989646912, "learning_rate": 1.4745121951219514e-05, "loss": 0.0352, "step": 64635 }, { "epoch": 0.7882926829268293, "grad_norm": 0.3734234571456909, "learning_rate": 1.4744715447154472e-05, "loss": 0.0561, "step": 64640 }, { "epoch": 0.7883536585365853, "grad_norm": 0.37015724182128906, "learning_rate": 1.4744308943089432e-05, "loss": 0.0238, "step": 64645 }, { "epoch": 0.7884146341463415, "grad_norm": 0.41131624579429626, "learning_rate": 1.4743902439024392e-05, "loss": 0.027, "step": 64650 }, { "epoch": 0.7884756097560975, "grad_norm": 0.9302641153335571, "learning_rate": 1.4743495934959351e-05, "loss": 0.0423, "step": 64655 }, { "epoch": 0.7885365853658537, "grad_norm": 0.5347848534584045, "learning_rate": 1.4743089430894311e-05, "loss": 0.0571, "step": 64660 }, { "epoch": 0.7885975609756097, "grad_norm": 0.3705124855041504, "learning_rate": 1.4742682926829271e-05, "loss": 0.0684, "step": 64665 }, { "epoch": 0.7886585365853659, "grad_norm": 0.5719287395477295, "learning_rate": 1.4742276422764228e-05, "loss": 0.0452, "step": 64670 }, { "epoch": 0.7887195121951219, "grad_norm": 0.7784188389778137, "learning_rate": 1.4741869918699187e-05, "loss": 0.0579, "step": 64675 }, { "epoch": 0.7887804878048781, "grad_norm": 0.6149826645851135, "learning_rate": 1.4741463414634147e-05, "loss": 0.0319, "step": 64680 }, { "epoch": 0.7888414634146341, "grad_norm": 0.8937076330184937, "learning_rate": 1.4741056910569107e-05, "loss": 0.0394, "step": 64685 }, { "epoch": 0.7889024390243903, "grad_norm": 1.232418179512024, "learning_rate": 1.4740650406504067e-05, "loss": 0.0616, "step": 64690 }, { "epoch": 0.7889634146341463, "grad_norm": 0.8392858505249023, "learning_rate": 1.4740243902439027e-05, "loss": 0.0286, "step": 64695 }, { "epoch": 0.7890243902439025, "grad_norm": 0.5034302473068237, "learning_rate": 1.4739837398373985e-05, "loss": 0.049, "step": 64700 }, { "epoch": 0.7890853658536585, "grad_norm": 0.553144097328186, "learning_rate": 1.4739430894308943e-05, "loss": 0.0542, "step": 64705 }, { "epoch": 0.7891463414634147, "grad_norm": 0.23210297524929047, "learning_rate": 1.4739024390243903e-05, "loss": 0.0553, "step": 64710 }, { "epoch": 0.7892073170731707, "grad_norm": 0.4099755585193634, "learning_rate": 1.4738617886178863e-05, "loss": 0.1038, "step": 64715 }, { "epoch": 0.7892682926829269, "grad_norm": 0.8555989861488342, "learning_rate": 1.4738211382113822e-05, "loss": 0.0628, "step": 64720 }, { "epoch": 0.7893292682926829, "grad_norm": 0.712973415851593, "learning_rate": 1.4737804878048782e-05, "loss": 0.0545, "step": 64725 }, { "epoch": 0.7893902439024391, "grad_norm": 0.6334938406944275, "learning_rate": 1.473739837398374e-05, "loss": 0.0642, "step": 64730 }, { "epoch": 0.7894512195121951, "grad_norm": 0.719527542591095, "learning_rate": 1.47369918699187e-05, "loss": 0.0408, "step": 64735 }, { "epoch": 0.7895121951219513, "grad_norm": 0.5604293942451477, "learning_rate": 1.473658536585366e-05, "loss": 0.0365, "step": 64740 }, { "epoch": 0.7895731707317073, "grad_norm": 0.9181734323501587, "learning_rate": 1.473617886178862e-05, "loss": 0.0983, "step": 64745 }, { "epoch": 0.7896341463414634, "grad_norm": 0.49837419390678406, "learning_rate": 1.473577235772358e-05, "loss": 0.0676, "step": 64750 }, { "epoch": 0.7896951219512195, "grad_norm": 0.3866126835346222, "learning_rate": 1.473536585365854e-05, "loss": 0.0502, "step": 64755 }, { "epoch": 0.7897560975609756, "grad_norm": 0.34033942222595215, "learning_rate": 1.4734959349593496e-05, "loss": 0.0301, "step": 64760 }, { "epoch": 0.7898170731707317, "grad_norm": 0.7281615734100342, "learning_rate": 1.4734552845528456e-05, "loss": 0.0395, "step": 64765 }, { "epoch": 0.7898780487804878, "grad_norm": 0.7192507386207581, "learning_rate": 1.4734146341463415e-05, "loss": 0.0554, "step": 64770 }, { "epoch": 0.7899390243902439, "grad_norm": 0.5172268152236938, "learning_rate": 1.4733739837398375e-05, "loss": 0.0354, "step": 64775 }, { "epoch": 0.79, "grad_norm": 0.5530074834823608, "learning_rate": 1.4733333333333335e-05, "loss": 0.0717, "step": 64780 }, { "epoch": 0.7900609756097561, "grad_norm": 0.3288326561450958, "learning_rate": 1.4732926829268295e-05, "loss": 0.0378, "step": 64785 }, { "epoch": 0.7901219512195122, "grad_norm": 0.5450852513313293, "learning_rate": 1.4732520325203253e-05, "loss": 0.0296, "step": 64790 }, { "epoch": 0.7901829268292683, "grad_norm": 0.8919211030006409, "learning_rate": 1.4732113821138211e-05, "loss": 0.0425, "step": 64795 }, { "epoch": 0.7902439024390244, "grad_norm": 0.6022881269454956, "learning_rate": 1.4731707317073171e-05, "loss": 0.0478, "step": 64800 }, { "epoch": 0.7903048780487805, "grad_norm": 0.8919049501419067, "learning_rate": 1.4731300813008131e-05, "loss": 0.0396, "step": 64805 }, { "epoch": 0.7903658536585366, "grad_norm": 0.4718140959739685, "learning_rate": 1.473089430894309e-05, "loss": 0.0323, "step": 64810 }, { "epoch": 0.7904268292682927, "grad_norm": 0.653387188911438, "learning_rate": 1.473048780487805e-05, "loss": 0.0484, "step": 64815 }, { "epoch": 0.7904878048780488, "grad_norm": 0.6120955348014832, "learning_rate": 1.4730081300813009e-05, "loss": 0.0438, "step": 64820 }, { "epoch": 0.7905487804878049, "grad_norm": 2.6619794368743896, "learning_rate": 1.4729674796747968e-05, "loss": 0.034, "step": 64825 }, { "epoch": 0.790609756097561, "grad_norm": 1.0106306076049805, "learning_rate": 1.4729268292682928e-05, "loss": 0.0302, "step": 64830 }, { "epoch": 0.790670731707317, "grad_norm": 0.6117687821388245, "learning_rate": 1.4728861788617888e-05, "loss": 0.0558, "step": 64835 }, { "epoch": 0.7907317073170732, "grad_norm": 0.4615057706832886, "learning_rate": 1.4728455284552848e-05, "loss": 0.0612, "step": 64840 }, { "epoch": 0.7907926829268292, "grad_norm": 0.9314630627632141, "learning_rate": 1.4728048780487808e-05, "loss": 0.0528, "step": 64845 }, { "epoch": 0.7908536585365854, "grad_norm": 0.4100378751754761, "learning_rate": 1.4727642276422764e-05, "loss": 0.0469, "step": 64850 }, { "epoch": 0.7909146341463414, "grad_norm": 0.5145370960235596, "learning_rate": 1.4727235772357724e-05, "loss": 0.059, "step": 64855 }, { "epoch": 0.7909756097560976, "grad_norm": 0.6005622148513794, "learning_rate": 1.4726829268292684e-05, "loss": 0.038, "step": 64860 }, { "epoch": 0.7910365853658536, "grad_norm": 0.41984009742736816, "learning_rate": 1.4726422764227644e-05, "loss": 0.0611, "step": 64865 }, { "epoch": 0.7910975609756098, "grad_norm": 0.6076722741127014, "learning_rate": 1.4726016260162603e-05, "loss": 0.0395, "step": 64870 }, { "epoch": 0.7911585365853658, "grad_norm": 1.6772809028625488, "learning_rate": 1.4725609756097563e-05, "loss": 0.1126, "step": 64875 }, { "epoch": 0.791219512195122, "grad_norm": 0.3335016071796417, "learning_rate": 1.472520325203252e-05, "loss": 0.027, "step": 64880 }, { "epoch": 0.791280487804878, "grad_norm": 0.7413287162780762, "learning_rate": 1.472479674796748e-05, "loss": 0.024, "step": 64885 }, { "epoch": 0.7913414634146342, "grad_norm": 0.36370736360549927, "learning_rate": 1.472439024390244e-05, "loss": 0.0359, "step": 64890 }, { "epoch": 0.7914024390243902, "grad_norm": 0.7955176830291748, "learning_rate": 1.47239837398374e-05, "loss": 0.0726, "step": 64895 }, { "epoch": 0.7914634146341464, "grad_norm": 0.8014788031578064, "learning_rate": 1.4723577235772359e-05, "loss": 0.0425, "step": 64900 }, { "epoch": 0.7915243902439024, "grad_norm": 0.43127548694610596, "learning_rate": 1.4723170731707319e-05, "loss": 0.0294, "step": 64905 }, { "epoch": 0.7915853658536586, "grad_norm": 2.3369319438934326, "learning_rate": 1.4722764227642277e-05, "loss": 0.0732, "step": 64910 }, { "epoch": 0.7916463414634146, "grad_norm": 0.6814964413642883, "learning_rate": 1.4722357723577237e-05, "loss": 0.0551, "step": 64915 }, { "epoch": 0.7917073170731708, "grad_norm": 0.8971332311630249, "learning_rate": 1.4721951219512197e-05, "loss": 0.0453, "step": 64920 }, { "epoch": 0.7917682926829268, "grad_norm": 1.2139307260513306, "learning_rate": 1.4721544715447156e-05, "loss": 0.051, "step": 64925 }, { "epoch": 0.791829268292683, "grad_norm": 0.3987502157688141, "learning_rate": 1.4721138211382116e-05, "loss": 0.0281, "step": 64930 }, { "epoch": 0.791890243902439, "grad_norm": 0.7091889977455139, "learning_rate": 1.4720731707317076e-05, "loss": 0.062, "step": 64935 }, { "epoch": 0.7919512195121952, "grad_norm": 0.6721524000167847, "learning_rate": 1.4720325203252032e-05, "loss": 0.0677, "step": 64940 }, { "epoch": 0.7920121951219512, "grad_norm": 0.28066614270210266, "learning_rate": 1.4719918699186992e-05, "loss": 0.0567, "step": 64945 }, { "epoch": 0.7920731707317074, "grad_norm": 0.8653939962387085, "learning_rate": 1.4719512195121952e-05, "loss": 0.0781, "step": 64950 }, { "epoch": 0.7921341463414634, "grad_norm": 0.7759164571762085, "learning_rate": 1.4719105691056912e-05, "loss": 0.0592, "step": 64955 }, { "epoch": 0.7921951219512195, "grad_norm": 0.4719640612602234, "learning_rate": 1.4718699186991872e-05, "loss": 0.0291, "step": 64960 }, { "epoch": 0.7922560975609756, "grad_norm": 0.3291495740413666, "learning_rate": 1.4718292682926832e-05, "loss": 0.0562, "step": 64965 }, { "epoch": 0.7923170731707317, "grad_norm": 0.5371624231338501, "learning_rate": 1.4717886178861788e-05, "loss": 0.0698, "step": 64970 }, { "epoch": 0.7923780487804878, "grad_norm": 0.7658769488334656, "learning_rate": 1.4717479674796748e-05, "loss": 0.0404, "step": 64975 }, { "epoch": 0.7924390243902439, "grad_norm": 0.48271405696868896, "learning_rate": 1.4717073170731708e-05, "loss": 0.0436, "step": 64980 }, { "epoch": 0.7925, "grad_norm": 0.7286911606788635, "learning_rate": 1.4716666666666668e-05, "loss": 0.0526, "step": 64985 }, { "epoch": 0.7925609756097561, "grad_norm": 0.45961254835128784, "learning_rate": 1.4716260162601627e-05, "loss": 0.0764, "step": 64990 }, { "epoch": 0.7926219512195122, "grad_norm": 3.10003662109375, "learning_rate": 1.4715853658536587e-05, "loss": 0.0759, "step": 64995 }, { "epoch": 0.7926829268292683, "grad_norm": 1.1418790817260742, "learning_rate": 1.4715447154471545e-05, "loss": 0.0325, "step": 65000 }, { "epoch": 0.7927439024390244, "grad_norm": 0.555860698223114, "learning_rate": 1.4715040650406505e-05, "loss": 0.0284, "step": 65005 }, { "epoch": 0.7928048780487805, "grad_norm": 0.5408211350440979, "learning_rate": 1.4714634146341465e-05, "loss": 0.0694, "step": 65010 }, { "epoch": 0.7928658536585366, "grad_norm": 0.5305262207984924, "learning_rate": 1.4714227642276425e-05, "loss": 0.0561, "step": 65015 }, { "epoch": 0.7929268292682927, "grad_norm": 0.5737125277519226, "learning_rate": 1.4713821138211385e-05, "loss": 0.0314, "step": 65020 }, { "epoch": 0.7929878048780488, "grad_norm": 0.7200427651405334, "learning_rate": 1.4713414634146343e-05, "loss": 0.0635, "step": 65025 }, { "epoch": 0.7930487804878049, "grad_norm": 0.3387983441352844, "learning_rate": 1.47130081300813e-05, "loss": 0.0384, "step": 65030 }, { "epoch": 0.793109756097561, "grad_norm": 0.3579745888710022, "learning_rate": 1.471260162601626e-05, "loss": 0.0298, "step": 65035 }, { "epoch": 0.7931707317073171, "grad_norm": 0.4954962134361267, "learning_rate": 1.471219512195122e-05, "loss": 0.0527, "step": 65040 }, { "epoch": 0.7932317073170732, "grad_norm": 0.44065314531326294, "learning_rate": 1.471178861788618e-05, "loss": 0.0317, "step": 65045 }, { "epoch": 0.7932926829268293, "grad_norm": 0.4078432321548462, "learning_rate": 1.471138211382114e-05, "loss": 0.0455, "step": 65050 }, { "epoch": 0.7933536585365853, "grad_norm": 0.48274850845336914, "learning_rate": 1.47109756097561e-05, "loss": 0.0639, "step": 65055 }, { "epoch": 0.7934146341463415, "grad_norm": 0.7146080732345581, "learning_rate": 1.4710569105691056e-05, "loss": 0.027, "step": 65060 }, { "epoch": 0.7934756097560975, "grad_norm": 0.5272269248962402, "learning_rate": 1.4710162601626016e-05, "loss": 0.0373, "step": 65065 }, { "epoch": 0.7935365853658537, "grad_norm": 0.9307964444160461, "learning_rate": 1.4709756097560976e-05, "loss": 0.0774, "step": 65070 }, { "epoch": 0.7935975609756097, "grad_norm": 0.35146626830101013, "learning_rate": 1.4709349593495936e-05, "loss": 0.0245, "step": 65075 }, { "epoch": 0.7936585365853659, "grad_norm": 0.4677543640136719, "learning_rate": 1.4708943089430896e-05, "loss": 0.0324, "step": 65080 }, { "epoch": 0.7937195121951219, "grad_norm": 0.5517566800117493, "learning_rate": 1.4708536585365855e-05, "loss": 0.0348, "step": 65085 }, { "epoch": 0.7937804878048781, "grad_norm": 1.427802324295044, "learning_rate": 1.4708130081300814e-05, "loss": 0.0388, "step": 65090 }, { "epoch": 0.7938414634146341, "grad_norm": 0.48929381370544434, "learning_rate": 1.4707723577235773e-05, "loss": 0.0355, "step": 65095 }, { "epoch": 0.7939024390243903, "grad_norm": 1.2531380653381348, "learning_rate": 1.4707317073170733e-05, "loss": 0.0465, "step": 65100 }, { "epoch": 0.7939634146341463, "grad_norm": 0.3753295838832855, "learning_rate": 1.4706910569105693e-05, "loss": 0.032, "step": 65105 }, { "epoch": 0.7940243902439025, "grad_norm": 0.7800383567810059, "learning_rate": 1.4706504065040653e-05, "loss": 0.0591, "step": 65110 }, { "epoch": 0.7940853658536585, "grad_norm": 0.6208650469779968, "learning_rate": 1.4706097560975611e-05, "loss": 0.0498, "step": 65115 }, { "epoch": 0.7941463414634147, "grad_norm": 0.30722248554229736, "learning_rate": 1.4705691056910569e-05, "loss": 0.0311, "step": 65120 }, { "epoch": 0.7942073170731707, "grad_norm": 0.6047577261924744, "learning_rate": 1.4705284552845529e-05, "loss": 0.0258, "step": 65125 }, { "epoch": 0.7942682926829269, "grad_norm": 1.5202836990356445, "learning_rate": 1.4704878048780489e-05, "loss": 0.0621, "step": 65130 }, { "epoch": 0.7943292682926829, "grad_norm": 0.24030360579490662, "learning_rate": 1.4704471544715449e-05, "loss": 0.0332, "step": 65135 }, { "epoch": 0.7943902439024391, "grad_norm": 0.47473978996276855, "learning_rate": 1.4704065040650408e-05, "loss": 0.051, "step": 65140 }, { "epoch": 0.7944512195121951, "grad_norm": 0.44707751274108887, "learning_rate": 1.4703658536585368e-05, "loss": 0.04, "step": 65145 }, { "epoch": 0.7945121951219513, "grad_norm": 0.301388680934906, "learning_rate": 1.4703252032520325e-05, "loss": 0.0448, "step": 65150 }, { "epoch": 0.7945731707317073, "grad_norm": 0.612463116645813, "learning_rate": 1.4702845528455284e-05, "loss": 0.0427, "step": 65155 }, { "epoch": 0.7946341463414635, "grad_norm": 0.50084388256073, "learning_rate": 1.4702439024390244e-05, "loss": 0.0283, "step": 65160 }, { "epoch": 0.7946951219512195, "grad_norm": 0.776711642742157, "learning_rate": 1.4702032520325204e-05, "loss": 0.0565, "step": 65165 }, { "epoch": 0.7947560975609756, "grad_norm": 0.3025863766670227, "learning_rate": 1.4701626016260164e-05, "loss": 0.0222, "step": 65170 }, { "epoch": 0.7948170731707317, "grad_norm": 1.063854455947876, "learning_rate": 1.4701219512195124e-05, "loss": 0.0671, "step": 65175 }, { "epoch": 0.7948780487804878, "grad_norm": 0.9824146628379822, "learning_rate": 1.4700813008130082e-05, "loss": 0.0639, "step": 65180 }, { "epoch": 0.7949390243902439, "grad_norm": 0.35989147424697876, "learning_rate": 1.4700406504065042e-05, "loss": 0.0695, "step": 65185 }, { "epoch": 0.795, "grad_norm": 0.5229333639144897, "learning_rate": 1.4700000000000002e-05, "loss": 0.0408, "step": 65190 }, { "epoch": 0.7950609756097561, "grad_norm": 0.2930617928504944, "learning_rate": 1.4699593495934961e-05, "loss": 0.0625, "step": 65195 }, { "epoch": 0.7951219512195122, "grad_norm": 0.2940807342529297, "learning_rate": 1.4699186991869921e-05, "loss": 0.0647, "step": 65200 }, { "epoch": 0.7951829268292683, "grad_norm": 0.6525278091430664, "learning_rate": 1.469878048780488e-05, "loss": 0.0576, "step": 65205 }, { "epoch": 0.7952439024390244, "grad_norm": 0.43714380264282227, "learning_rate": 1.4698373983739837e-05, "loss": 0.0568, "step": 65210 }, { "epoch": 0.7953048780487805, "grad_norm": 0.5099974870681763, "learning_rate": 1.4697967479674797e-05, "loss": 0.0458, "step": 65215 }, { "epoch": 0.7953658536585366, "grad_norm": 0.43721163272857666, "learning_rate": 1.4697560975609757e-05, "loss": 0.0629, "step": 65220 }, { "epoch": 0.7954268292682927, "grad_norm": 0.39971891045570374, "learning_rate": 1.4697154471544717e-05, "loss": 0.0429, "step": 65225 }, { "epoch": 0.7954878048780488, "grad_norm": 0.3463577926158905, "learning_rate": 1.4696747967479677e-05, "loss": 0.0383, "step": 65230 }, { "epoch": 0.7955487804878049, "grad_norm": 0.7276536226272583, "learning_rate": 1.4696341463414637e-05, "loss": 0.0729, "step": 65235 }, { "epoch": 0.795609756097561, "grad_norm": 0.5208073854446411, "learning_rate": 1.4695934959349593e-05, "loss": 0.0261, "step": 65240 }, { "epoch": 0.7956707317073171, "grad_norm": 0.3796223998069763, "learning_rate": 1.4695528455284553e-05, "loss": 0.0337, "step": 65245 }, { "epoch": 0.7957317073170732, "grad_norm": 0.37461939454078674, "learning_rate": 1.4695121951219513e-05, "loss": 0.0423, "step": 65250 }, { "epoch": 0.7957926829268293, "grad_norm": 0.8813255429267883, "learning_rate": 1.4694715447154472e-05, "loss": 0.0393, "step": 65255 }, { "epoch": 0.7958536585365854, "grad_norm": 0.5839626789093018, "learning_rate": 1.4694308943089432e-05, "loss": 0.041, "step": 65260 }, { "epoch": 0.7959146341463414, "grad_norm": 1.0173579454421997, "learning_rate": 1.4693902439024392e-05, "loss": 0.0361, "step": 65265 }, { "epoch": 0.7959756097560976, "grad_norm": 0.3499844968318939, "learning_rate": 1.469349593495935e-05, "loss": 0.0456, "step": 65270 }, { "epoch": 0.7960365853658536, "grad_norm": 0.5374818444252014, "learning_rate": 1.469308943089431e-05, "loss": 0.0445, "step": 65275 }, { "epoch": 0.7960975609756098, "grad_norm": 0.8033055663108826, "learning_rate": 1.469268292682927e-05, "loss": 0.0294, "step": 65280 }, { "epoch": 0.7961585365853658, "grad_norm": 0.8029568791389465, "learning_rate": 1.469227642276423e-05, "loss": 0.0421, "step": 65285 }, { "epoch": 0.796219512195122, "grad_norm": 0.7668588161468506, "learning_rate": 1.4691869918699188e-05, "loss": 0.0499, "step": 65290 }, { "epoch": 0.796280487804878, "grad_norm": 0.4684561491012573, "learning_rate": 1.4691463414634148e-05, "loss": 0.0418, "step": 65295 }, { "epoch": 0.7963414634146342, "grad_norm": 0.48318514227867126, "learning_rate": 1.4691056910569106e-05, "loss": 0.0277, "step": 65300 }, { "epoch": 0.7964024390243902, "grad_norm": 0.5388874411582947, "learning_rate": 1.4690650406504066e-05, "loss": 0.045, "step": 65305 }, { "epoch": 0.7964634146341464, "grad_norm": 0.6641628742218018, "learning_rate": 1.4690243902439025e-05, "loss": 0.0331, "step": 65310 }, { "epoch": 0.7965243902439024, "grad_norm": 0.8729920387268066, "learning_rate": 1.4689837398373985e-05, "loss": 0.0524, "step": 65315 }, { "epoch": 0.7965853658536586, "grad_norm": 0.8766315579414368, "learning_rate": 1.4689430894308945e-05, "loss": 0.0665, "step": 65320 }, { "epoch": 0.7966463414634146, "grad_norm": 0.3895626366138458, "learning_rate": 1.4689024390243905e-05, "loss": 0.0426, "step": 65325 }, { "epoch": 0.7967073170731708, "grad_norm": 0.7049990296363831, "learning_rate": 1.4688617886178861e-05, "loss": 0.0668, "step": 65330 }, { "epoch": 0.7967682926829268, "grad_norm": 1.011674165725708, "learning_rate": 1.4688211382113821e-05, "loss": 0.0406, "step": 65335 }, { "epoch": 0.796829268292683, "grad_norm": 0.5969281792640686, "learning_rate": 1.4687804878048781e-05, "loss": 0.045, "step": 65340 }, { "epoch": 0.796890243902439, "grad_norm": 0.3753010332584381, "learning_rate": 1.468739837398374e-05, "loss": 0.0426, "step": 65345 }, { "epoch": 0.7969512195121952, "grad_norm": 0.9075905084609985, "learning_rate": 1.46869918699187e-05, "loss": 0.0469, "step": 65350 }, { "epoch": 0.7970121951219512, "grad_norm": 0.3592613935470581, "learning_rate": 1.468658536585366e-05, "loss": 0.0288, "step": 65355 }, { "epoch": 0.7970731707317074, "grad_norm": 0.7586759924888611, "learning_rate": 1.4686178861788619e-05, "loss": 0.0454, "step": 65360 }, { "epoch": 0.7971341463414634, "grad_norm": 0.7593233585357666, "learning_rate": 1.4685772357723578e-05, "loss": 0.0639, "step": 65365 }, { "epoch": 0.7971951219512196, "grad_norm": 0.8056750893592834, "learning_rate": 1.4685365853658538e-05, "loss": 0.0464, "step": 65370 }, { "epoch": 0.7972560975609756, "grad_norm": 0.5565040111541748, "learning_rate": 1.4684959349593498e-05, "loss": 0.0288, "step": 65375 }, { "epoch": 0.7973170731707317, "grad_norm": 1.9785012006759644, "learning_rate": 1.4684552845528456e-05, "loss": 0.046, "step": 65380 }, { "epoch": 0.7973780487804878, "grad_norm": 0.5292317271232605, "learning_rate": 1.4684146341463416e-05, "loss": 0.0505, "step": 65385 }, { "epoch": 0.797439024390244, "grad_norm": 0.5901139974594116, "learning_rate": 1.4683739837398374e-05, "loss": 0.0589, "step": 65390 }, { "epoch": 0.7975, "grad_norm": 0.762746274471283, "learning_rate": 1.4683333333333334e-05, "loss": 0.0602, "step": 65395 }, { "epoch": 0.7975609756097561, "grad_norm": 0.6366841793060303, "learning_rate": 1.4682926829268294e-05, "loss": 0.0689, "step": 65400 }, { "epoch": 0.7976219512195122, "grad_norm": 0.536224901676178, "learning_rate": 1.4682520325203254e-05, "loss": 0.0731, "step": 65405 }, { "epoch": 0.7976829268292683, "grad_norm": 0.2916334867477417, "learning_rate": 1.4682113821138213e-05, "loss": 0.0377, "step": 65410 }, { "epoch": 0.7977439024390244, "grad_norm": 1.0780977010726929, "learning_rate": 1.4681707317073173e-05, "loss": 0.0423, "step": 65415 }, { "epoch": 0.7978048780487805, "grad_norm": 0.3299233019351959, "learning_rate": 1.468130081300813e-05, "loss": 0.0401, "step": 65420 }, { "epoch": 0.7978658536585366, "grad_norm": 0.3047165274620056, "learning_rate": 1.468089430894309e-05, "loss": 0.0456, "step": 65425 }, { "epoch": 0.7979268292682927, "grad_norm": 0.21378552913665771, "learning_rate": 1.468048780487805e-05, "loss": 0.0266, "step": 65430 }, { "epoch": 0.7979878048780488, "grad_norm": 0.3457554876804352, "learning_rate": 1.4680081300813009e-05, "loss": 0.0235, "step": 65435 }, { "epoch": 0.7980487804878049, "grad_norm": 1.4850374460220337, "learning_rate": 1.4679674796747969e-05, "loss": 0.1027, "step": 65440 }, { "epoch": 0.798109756097561, "grad_norm": 0.5692199468612671, "learning_rate": 1.4679268292682929e-05, "loss": 0.033, "step": 65445 }, { "epoch": 0.7981707317073171, "grad_norm": 1.3829925060272217, "learning_rate": 1.4678861788617887e-05, "loss": 0.0262, "step": 65450 }, { "epoch": 0.7982317073170732, "grad_norm": 0.3980167806148529, "learning_rate": 1.4678455284552847e-05, "loss": 0.0467, "step": 65455 }, { "epoch": 0.7982926829268293, "grad_norm": 0.6182351112365723, "learning_rate": 1.4678048780487807e-05, "loss": 0.0699, "step": 65460 }, { "epoch": 0.7983536585365854, "grad_norm": 0.44582000374794006, "learning_rate": 1.4677642276422766e-05, "loss": 0.0476, "step": 65465 }, { "epoch": 0.7984146341463415, "grad_norm": 1.2439813613891602, "learning_rate": 1.4677235772357724e-05, "loss": 0.045, "step": 65470 }, { "epoch": 0.7984756097560975, "grad_norm": 1.0803591012954712, "learning_rate": 1.4676829268292684e-05, "loss": 0.0635, "step": 65475 }, { "epoch": 0.7985365853658537, "grad_norm": 0.2533400356769562, "learning_rate": 1.4676422764227642e-05, "loss": 0.0419, "step": 65480 }, { "epoch": 0.7985975609756097, "grad_norm": 0.7998397946357727, "learning_rate": 1.4676016260162602e-05, "loss": 0.0585, "step": 65485 }, { "epoch": 0.7986585365853659, "grad_norm": 0.4086199700832367, "learning_rate": 1.4675609756097562e-05, "loss": 0.0424, "step": 65490 }, { "epoch": 0.7987195121951219, "grad_norm": 0.7364935874938965, "learning_rate": 1.4675203252032522e-05, "loss": 0.0596, "step": 65495 }, { "epoch": 0.7987804878048781, "grad_norm": 0.5223264694213867, "learning_rate": 1.4674796747967482e-05, "loss": 0.0307, "step": 65500 }, { "epoch": 0.7988414634146341, "grad_norm": 0.4966912269592285, "learning_rate": 1.4674390243902442e-05, "loss": 0.061, "step": 65505 }, { "epoch": 0.7989024390243903, "grad_norm": 0.4295866787433624, "learning_rate": 1.4673983739837398e-05, "loss": 0.0447, "step": 65510 }, { "epoch": 0.7989634146341463, "grad_norm": 1.0539699792861938, "learning_rate": 1.4673577235772358e-05, "loss": 0.0958, "step": 65515 }, { "epoch": 0.7990243902439025, "grad_norm": 0.6017675399780273, "learning_rate": 1.4673170731707318e-05, "loss": 0.027, "step": 65520 }, { "epoch": 0.7990853658536585, "grad_norm": 0.6196990609169006, "learning_rate": 1.4672764227642277e-05, "loss": 0.0749, "step": 65525 }, { "epoch": 0.7991463414634147, "grad_norm": 0.7506252527236938, "learning_rate": 1.4672357723577237e-05, "loss": 0.0293, "step": 65530 }, { "epoch": 0.7992073170731707, "grad_norm": 1.1036349534988403, "learning_rate": 1.4671951219512197e-05, "loss": 0.0592, "step": 65535 }, { "epoch": 0.7992682926829269, "grad_norm": 1.1218421459197998, "learning_rate": 1.4671544715447155e-05, "loss": 0.0823, "step": 65540 }, { "epoch": 0.7993292682926829, "grad_norm": 1.253508448600769, "learning_rate": 1.4671138211382115e-05, "loss": 0.0606, "step": 65545 }, { "epoch": 0.7993902439024391, "grad_norm": 0.6766008734703064, "learning_rate": 1.4670731707317075e-05, "loss": 0.0565, "step": 65550 }, { "epoch": 0.7994512195121951, "grad_norm": 0.34895390272140503, "learning_rate": 1.4670325203252033e-05, "loss": 0.0466, "step": 65555 }, { "epoch": 0.7995121951219513, "grad_norm": 1.3886154890060425, "learning_rate": 1.4669918699186993e-05, "loss": 0.1089, "step": 65560 }, { "epoch": 0.7995731707317073, "grad_norm": 0.5095632672309875, "learning_rate": 1.4669512195121953e-05, "loss": 0.0307, "step": 65565 }, { "epoch": 0.7996341463414635, "grad_norm": 0.5215538740158081, "learning_rate": 1.466910569105691e-05, "loss": 0.0412, "step": 65570 }, { "epoch": 0.7996951219512195, "grad_norm": 0.5515062808990479, "learning_rate": 1.466869918699187e-05, "loss": 0.0301, "step": 65575 }, { "epoch": 0.7997560975609757, "grad_norm": 0.5203027129173279, "learning_rate": 1.466829268292683e-05, "loss": 0.0393, "step": 65580 }, { "epoch": 0.7998170731707317, "grad_norm": 0.46315035223960876, "learning_rate": 1.466788617886179e-05, "loss": 0.0389, "step": 65585 }, { "epoch": 0.7998780487804878, "grad_norm": 0.5518621802330017, "learning_rate": 1.466747967479675e-05, "loss": 0.0449, "step": 65590 }, { "epoch": 0.7999390243902439, "grad_norm": 0.7958507537841797, "learning_rate": 1.466707317073171e-05, "loss": 0.0634, "step": 65595 }, { "epoch": 0.8, "grad_norm": 0.3151974380016327, "learning_rate": 1.4666666666666666e-05, "loss": 0.0339, "step": 65600 }, { "epoch": 0.8000609756097561, "grad_norm": 0.5654678344726562, "learning_rate": 1.4666260162601626e-05, "loss": 0.0276, "step": 65605 }, { "epoch": 0.8001219512195122, "grad_norm": 0.42452144622802734, "learning_rate": 1.4665853658536586e-05, "loss": 0.0432, "step": 65610 }, { "epoch": 0.8001829268292683, "grad_norm": 0.747145414352417, "learning_rate": 1.4665447154471546e-05, "loss": 0.0585, "step": 65615 }, { "epoch": 0.8002439024390244, "grad_norm": 0.2346794754266739, "learning_rate": 1.4665040650406506e-05, "loss": 0.0591, "step": 65620 }, { "epoch": 0.8003048780487805, "grad_norm": 0.29046565294265747, "learning_rate": 1.4664634146341465e-05, "loss": 0.0291, "step": 65625 }, { "epoch": 0.8003658536585366, "grad_norm": 0.5917673707008362, "learning_rate": 1.4664227642276424e-05, "loss": 0.0613, "step": 65630 }, { "epoch": 0.8004268292682927, "grad_norm": 1.019566297531128, "learning_rate": 1.4663821138211383e-05, "loss": 0.0468, "step": 65635 }, { "epoch": 0.8004878048780488, "grad_norm": 0.6094812154769897, "learning_rate": 1.4663414634146343e-05, "loss": 0.0551, "step": 65640 }, { "epoch": 0.8005487804878049, "grad_norm": 0.46643465757369995, "learning_rate": 1.4663008130081301e-05, "loss": 0.0494, "step": 65645 }, { "epoch": 0.800609756097561, "grad_norm": 0.3968815505504608, "learning_rate": 1.4662601626016261e-05, "loss": 0.0333, "step": 65650 }, { "epoch": 0.8006707317073171, "grad_norm": 0.5803295969963074, "learning_rate": 1.4662195121951221e-05, "loss": 0.0322, "step": 65655 }, { "epoch": 0.8007317073170732, "grad_norm": 0.9759855270385742, "learning_rate": 1.4661788617886179e-05, "loss": 0.0414, "step": 65660 }, { "epoch": 0.8007926829268293, "grad_norm": 0.5501409769058228, "learning_rate": 1.4661382113821139e-05, "loss": 0.0593, "step": 65665 }, { "epoch": 0.8008536585365854, "grad_norm": 1.8334498405456543, "learning_rate": 1.4660975609756099e-05, "loss": 0.0966, "step": 65670 }, { "epoch": 0.8009146341463415, "grad_norm": 0.6653315424919128, "learning_rate": 1.4660569105691059e-05, "loss": 0.1106, "step": 65675 }, { "epoch": 0.8009756097560976, "grad_norm": 0.5263994932174683, "learning_rate": 1.4660162601626018e-05, "loss": 0.052, "step": 65680 }, { "epoch": 0.8010365853658536, "grad_norm": 0.4905274510383606, "learning_rate": 1.4659756097560978e-05, "loss": 0.0433, "step": 65685 }, { "epoch": 0.8010975609756098, "grad_norm": 1.3847845792770386, "learning_rate": 1.4659349593495935e-05, "loss": 0.0411, "step": 65690 }, { "epoch": 0.8011585365853658, "grad_norm": 1.0247801542282104, "learning_rate": 1.4658943089430894e-05, "loss": 0.0543, "step": 65695 }, { "epoch": 0.801219512195122, "grad_norm": 0.4752891957759857, "learning_rate": 1.4658536585365854e-05, "loss": 0.0351, "step": 65700 }, { "epoch": 0.801280487804878, "grad_norm": 0.8470171689987183, "learning_rate": 1.4658130081300814e-05, "loss": 0.0423, "step": 65705 }, { "epoch": 0.8013414634146342, "grad_norm": 1.1154083013534546, "learning_rate": 1.4657723577235774e-05, "loss": 0.0385, "step": 65710 }, { "epoch": 0.8014024390243902, "grad_norm": 0.8229905962944031, "learning_rate": 1.4657317073170734e-05, "loss": 0.0814, "step": 65715 }, { "epoch": 0.8014634146341464, "grad_norm": 0.6682421565055847, "learning_rate": 1.4656910569105692e-05, "loss": 0.0439, "step": 65720 }, { "epoch": 0.8015243902439024, "grad_norm": 0.5466671586036682, "learning_rate": 1.4656504065040652e-05, "loss": 0.033, "step": 65725 }, { "epoch": 0.8015853658536586, "grad_norm": 0.5709746479988098, "learning_rate": 1.4656097560975611e-05, "loss": 0.0954, "step": 65730 }, { "epoch": 0.8016463414634146, "grad_norm": 0.4156721234321594, "learning_rate": 1.465569105691057e-05, "loss": 0.0412, "step": 65735 }, { "epoch": 0.8017073170731708, "grad_norm": 2.613255023956299, "learning_rate": 1.465528455284553e-05, "loss": 0.0423, "step": 65740 }, { "epoch": 0.8017682926829268, "grad_norm": 0.7050459980964661, "learning_rate": 1.465487804878049e-05, "loss": 0.0355, "step": 65745 }, { "epoch": 0.801829268292683, "grad_norm": 0.6244153380393982, "learning_rate": 1.4654471544715447e-05, "loss": 0.048, "step": 65750 }, { "epoch": 0.801890243902439, "grad_norm": 0.328133761882782, "learning_rate": 1.4654065040650407e-05, "loss": 0.0507, "step": 65755 }, { "epoch": 0.8019512195121952, "grad_norm": 0.5872785449028015, "learning_rate": 1.4653658536585367e-05, "loss": 0.0254, "step": 65760 }, { "epoch": 0.8020121951219512, "grad_norm": 0.7133810520172119, "learning_rate": 1.4653252032520327e-05, "loss": 0.0412, "step": 65765 }, { "epoch": 0.8020731707317074, "grad_norm": 0.34000399708747864, "learning_rate": 1.4652845528455287e-05, "loss": 0.0351, "step": 65770 }, { "epoch": 0.8021341463414634, "grad_norm": 0.3043091297149658, "learning_rate": 1.4652439024390246e-05, "loss": 0.0677, "step": 65775 }, { "epoch": 0.8021951219512196, "grad_norm": 0.3277774751186371, "learning_rate": 1.4652032520325203e-05, "loss": 0.0536, "step": 65780 }, { "epoch": 0.8022560975609756, "grad_norm": 0.30710089206695557, "learning_rate": 1.4651626016260163e-05, "loss": 0.067, "step": 65785 }, { "epoch": 0.8023170731707318, "grad_norm": 0.8877986669540405, "learning_rate": 1.4651219512195123e-05, "loss": 0.0341, "step": 65790 }, { "epoch": 0.8023780487804878, "grad_norm": 0.4720756411552429, "learning_rate": 1.4650813008130082e-05, "loss": 0.0478, "step": 65795 }, { "epoch": 0.802439024390244, "grad_norm": 0.3918658494949341, "learning_rate": 1.4650406504065042e-05, "loss": 0.0271, "step": 65800 }, { "epoch": 0.8025, "grad_norm": 0.7630936503410339, "learning_rate": 1.4650000000000002e-05, "loss": 0.0307, "step": 65805 }, { "epoch": 0.8025609756097561, "grad_norm": 0.5981448888778687, "learning_rate": 1.464959349593496e-05, "loss": 0.0595, "step": 65810 }, { "epoch": 0.8026219512195122, "grad_norm": 0.27002036571502686, "learning_rate": 1.464918699186992e-05, "loss": 0.0297, "step": 65815 }, { "epoch": 0.8026829268292683, "grad_norm": 0.6491511464118958, "learning_rate": 1.4648780487804878e-05, "loss": 0.0671, "step": 65820 }, { "epoch": 0.8027439024390244, "grad_norm": 0.5202068090438843, "learning_rate": 1.4648373983739838e-05, "loss": 0.0444, "step": 65825 }, { "epoch": 0.8028048780487805, "grad_norm": 0.5086449384689331, "learning_rate": 1.4647967479674798e-05, "loss": 0.0988, "step": 65830 }, { "epoch": 0.8028658536585366, "grad_norm": 0.5921338200569153, "learning_rate": 1.4647560975609758e-05, "loss": 0.0442, "step": 65835 }, { "epoch": 0.8029268292682927, "grad_norm": 0.37313538789749146, "learning_rate": 1.4647154471544716e-05, "loss": 0.0524, "step": 65840 }, { "epoch": 0.8029878048780488, "grad_norm": 0.48170599341392517, "learning_rate": 1.4646747967479676e-05, "loss": 0.0345, "step": 65845 }, { "epoch": 0.8030487804878049, "grad_norm": 0.738639235496521, "learning_rate": 1.4646341463414635e-05, "loss": 0.0414, "step": 65850 }, { "epoch": 0.803109756097561, "grad_norm": 0.690413773059845, "learning_rate": 1.4645934959349595e-05, "loss": 0.0516, "step": 65855 }, { "epoch": 0.8031707317073171, "grad_norm": 0.643150269985199, "learning_rate": 1.4645528455284555e-05, "loss": 0.0503, "step": 65860 }, { "epoch": 0.8032317073170732, "grad_norm": 0.9407252669334412, "learning_rate": 1.4645121951219515e-05, "loss": 0.0662, "step": 65865 }, { "epoch": 0.8032926829268293, "grad_norm": 1.7847980260849, "learning_rate": 1.4644715447154471e-05, "loss": 0.0462, "step": 65870 }, { "epoch": 0.8033536585365854, "grad_norm": 0.31830430030822754, "learning_rate": 1.4644308943089431e-05, "loss": 0.0268, "step": 65875 }, { "epoch": 0.8034146341463415, "grad_norm": 0.3843947649002075, "learning_rate": 1.4643902439024391e-05, "loss": 0.0887, "step": 65880 }, { "epoch": 0.8034756097560976, "grad_norm": 0.5503847002983093, "learning_rate": 1.464349593495935e-05, "loss": 0.0346, "step": 65885 }, { "epoch": 0.8035365853658537, "grad_norm": 0.4795004725456238, "learning_rate": 1.464308943089431e-05, "loss": 0.0411, "step": 65890 }, { "epoch": 0.8035975609756097, "grad_norm": 0.42743992805480957, "learning_rate": 1.464268292682927e-05, "loss": 0.0422, "step": 65895 }, { "epoch": 0.8036585365853659, "grad_norm": 0.5661632418632507, "learning_rate": 1.4642276422764228e-05, "loss": 0.0454, "step": 65900 }, { "epoch": 0.8037195121951219, "grad_norm": 1.132593035697937, "learning_rate": 1.4641869918699188e-05, "loss": 0.101, "step": 65905 }, { "epoch": 0.8037804878048781, "grad_norm": 0.4887681305408478, "learning_rate": 1.4641463414634146e-05, "loss": 0.0556, "step": 65910 }, { "epoch": 0.8038414634146341, "grad_norm": 13.17729377746582, "learning_rate": 1.4641056910569106e-05, "loss": 0.0333, "step": 65915 }, { "epoch": 0.8039024390243903, "grad_norm": 0.4759039878845215, "learning_rate": 1.4640650406504066e-05, "loss": 0.0284, "step": 65920 }, { "epoch": 0.8039634146341463, "grad_norm": 0.5693449974060059, "learning_rate": 1.4640243902439026e-05, "loss": 0.0627, "step": 65925 }, { "epoch": 0.8040243902439025, "grad_norm": 0.20327359437942505, "learning_rate": 1.4639837398373984e-05, "loss": 0.0321, "step": 65930 }, { "epoch": 0.8040853658536585, "grad_norm": 2.2931814193725586, "learning_rate": 1.4639430894308944e-05, "loss": 0.0474, "step": 65935 }, { "epoch": 0.8041463414634147, "grad_norm": 0.5343404412269592, "learning_rate": 1.4639024390243904e-05, "loss": 0.052, "step": 65940 }, { "epoch": 0.8042073170731707, "grad_norm": 0.5722362399101257, "learning_rate": 1.4638617886178863e-05, "loss": 0.097, "step": 65945 }, { "epoch": 0.8042682926829269, "grad_norm": 0.28325530886650085, "learning_rate": 1.4638211382113823e-05, "loss": 0.0347, "step": 65950 }, { "epoch": 0.8043292682926829, "grad_norm": 0.7126153707504272, "learning_rate": 1.4637804878048783e-05, "loss": 0.0449, "step": 65955 }, { "epoch": 0.8043902439024391, "grad_norm": 0.5210660099983215, "learning_rate": 1.463739837398374e-05, "loss": 0.0369, "step": 65960 }, { "epoch": 0.8044512195121951, "grad_norm": 0.6615588665008545, "learning_rate": 1.46369918699187e-05, "loss": 0.0316, "step": 65965 }, { "epoch": 0.8045121951219513, "grad_norm": 0.7975603938102722, "learning_rate": 1.463658536585366e-05, "loss": 0.058, "step": 65970 }, { "epoch": 0.8045731707317073, "grad_norm": 0.4426361918449402, "learning_rate": 1.4636178861788619e-05, "loss": 0.0367, "step": 65975 }, { "epoch": 0.8046341463414635, "grad_norm": 2.6967873573303223, "learning_rate": 1.4635772357723579e-05, "loss": 0.1151, "step": 65980 }, { "epoch": 0.8046951219512195, "grad_norm": 0.4249173104763031, "learning_rate": 1.4635365853658539e-05, "loss": 0.0391, "step": 65985 }, { "epoch": 0.8047560975609757, "grad_norm": 0.7066946625709534, "learning_rate": 1.4634959349593497e-05, "loss": 0.0533, "step": 65990 }, { "epoch": 0.8048170731707317, "grad_norm": 0.4038335084915161, "learning_rate": 1.4634552845528457e-05, "loss": 0.058, "step": 65995 }, { "epoch": 0.8048780487804879, "grad_norm": 0.47058549523353577, "learning_rate": 1.4634146341463415e-05, "loss": 0.0319, "step": 66000 }, { "epoch": 0.8049390243902439, "grad_norm": 1.2062965631484985, "learning_rate": 1.4633739837398375e-05, "loss": 0.0419, "step": 66005 }, { "epoch": 0.805, "grad_norm": 0.3382284939289093, "learning_rate": 1.4633333333333334e-05, "loss": 0.0291, "step": 66010 }, { "epoch": 0.8050609756097561, "grad_norm": 0.2885899841785431, "learning_rate": 1.4632926829268294e-05, "loss": 0.0574, "step": 66015 }, { "epoch": 0.8051219512195122, "grad_norm": 0.3540145456790924, "learning_rate": 1.4632520325203252e-05, "loss": 0.0279, "step": 66020 }, { "epoch": 0.8051829268292683, "grad_norm": 0.5844317078590393, "learning_rate": 1.4632113821138212e-05, "loss": 0.039, "step": 66025 }, { "epoch": 0.8052439024390244, "grad_norm": 0.38974910974502563, "learning_rate": 1.4631707317073172e-05, "loss": 0.0317, "step": 66030 }, { "epoch": 0.8053048780487805, "grad_norm": 1.4851810932159424, "learning_rate": 1.4631300813008132e-05, "loss": 0.0515, "step": 66035 }, { "epoch": 0.8053658536585366, "grad_norm": 0.45885464549064636, "learning_rate": 1.4630894308943092e-05, "loss": 0.043, "step": 66040 }, { "epoch": 0.8054268292682927, "grad_norm": 1.254120111465454, "learning_rate": 1.4630487804878051e-05, "loss": 0.0445, "step": 66045 }, { "epoch": 0.8054878048780488, "grad_norm": 0.536173939704895, "learning_rate": 1.4630081300813008e-05, "loss": 0.0751, "step": 66050 }, { "epoch": 0.8055487804878049, "grad_norm": 0.5113347768783569, "learning_rate": 1.4629674796747968e-05, "loss": 0.0479, "step": 66055 }, { "epoch": 0.805609756097561, "grad_norm": 0.46341484785079956, "learning_rate": 1.4629268292682928e-05, "loss": 0.0584, "step": 66060 }, { "epoch": 0.8056707317073171, "grad_norm": 0.3906468451023102, "learning_rate": 1.4628861788617887e-05, "loss": 0.0643, "step": 66065 }, { "epoch": 0.8057317073170732, "grad_norm": 0.44529402256011963, "learning_rate": 1.4628455284552847e-05, "loss": 0.0318, "step": 66070 }, { "epoch": 0.8057926829268293, "grad_norm": 0.8612979650497437, "learning_rate": 1.4628048780487807e-05, "loss": 0.0405, "step": 66075 }, { "epoch": 0.8058536585365854, "grad_norm": 0.8870943188667297, "learning_rate": 1.4627642276422765e-05, "loss": 0.0585, "step": 66080 }, { "epoch": 0.8059146341463415, "grad_norm": 1.0860339403152466, "learning_rate": 1.4627235772357723e-05, "loss": 0.0554, "step": 66085 }, { "epoch": 0.8059756097560976, "grad_norm": 0.38172250986099243, "learning_rate": 1.4626829268292683e-05, "loss": 0.0519, "step": 66090 }, { "epoch": 0.8060365853658537, "grad_norm": 0.4524138271808624, "learning_rate": 1.4626422764227643e-05, "loss": 0.0399, "step": 66095 }, { "epoch": 0.8060975609756098, "grad_norm": 0.364091157913208, "learning_rate": 1.4626016260162603e-05, "loss": 0.0562, "step": 66100 }, { "epoch": 0.8061585365853658, "grad_norm": 0.6302241683006287, "learning_rate": 1.4625609756097563e-05, "loss": 0.0502, "step": 66105 }, { "epoch": 0.806219512195122, "grad_norm": 0.5167445540428162, "learning_rate": 1.462520325203252e-05, "loss": 0.0648, "step": 66110 }, { "epoch": 0.806280487804878, "grad_norm": 1.9229031801223755, "learning_rate": 1.462479674796748e-05, "loss": 0.0258, "step": 66115 }, { "epoch": 0.8063414634146342, "grad_norm": 0.8767083883285522, "learning_rate": 1.462439024390244e-05, "loss": 0.0384, "step": 66120 }, { "epoch": 0.8064024390243902, "grad_norm": 0.4577749967575073, "learning_rate": 1.46239837398374e-05, "loss": 0.0733, "step": 66125 }, { "epoch": 0.8064634146341464, "grad_norm": 0.7161833643913269, "learning_rate": 1.462357723577236e-05, "loss": 0.0271, "step": 66130 }, { "epoch": 0.8065243902439024, "grad_norm": 1.0758576393127441, "learning_rate": 1.462317073170732e-05, "loss": 0.0759, "step": 66135 }, { "epoch": 0.8065853658536586, "grad_norm": 1.056033730506897, "learning_rate": 1.4622764227642276e-05, "loss": 0.0648, "step": 66140 }, { "epoch": 0.8066463414634146, "grad_norm": 1.3787356615066528, "learning_rate": 1.4622357723577236e-05, "loss": 0.0376, "step": 66145 }, { "epoch": 0.8067073170731708, "grad_norm": 0.7881515622138977, "learning_rate": 1.4621951219512196e-05, "loss": 0.0482, "step": 66150 }, { "epoch": 0.8067682926829268, "grad_norm": 1.0030437707901, "learning_rate": 1.4621544715447156e-05, "loss": 0.0396, "step": 66155 }, { "epoch": 0.806829268292683, "grad_norm": 0.6824822425842285, "learning_rate": 1.4621138211382116e-05, "loss": 0.0249, "step": 66160 }, { "epoch": 0.806890243902439, "grad_norm": 0.45309701561927795, "learning_rate": 1.4620731707317075e-05, "loss": 0.0188, "step": 66165 }, { "epoch": 0.8069512195121952, "grad_norm": 0.2906780242919922, "learning_rate": 1.4620325203252033e-05, "loss": 0.0252, "step": 66170 }, { "epoch": 0.8070121951219512, "grad_norm": 0.18388375639915466, "learning_rate": 1.4619918699186992e-05, "loss": 0.045, "step": 66175 }, { "epoch": 0.8070731707317074, "grad_norm": 0.7938902378082275, "learning_rate": 1.4619512195121951e-05, "loss": 0.0435, "step": 66180 }, { "epoch": 0.8071341463414634, "grad_norm": 0.8068766593933105, "learning_rate": 1.4619105691056911e-05, "loss": 0.0417, "step": 66185 }, { "epoch": 0.8071951219512196, "grad_norm": 0.6564091444015503, "learning_rate": 1.4618699186991871e-05, "loss": 0.0473, "step": 66190 }, { "epoch": 0.8072560975609756, "grad_norm": 0.4246974289417267, "learning_rate": 1.4618292682926831e-05, "loss": 0.0542, "step": 66195 }, { "epoch": 0.8073170731707318, "grad_norm": 0.45694345235824585, "learning_rate": 1.4617886178861789e-05, "loss": 0.0913, "step": 66200 }, { "epoch": 0.8073780487804878, "grad_norm": 0.2985375225543976, "learning_rate": 1.4617479674796749e-05, "loss": 0.0416, "step": 66205 }, { "epoch": 0.807439024390244, "grad_norm": 0.7512791752815247, "learning_rate": 1.4617073170731709e-05, "loss": 0.0439, "step": 66210 }, { "epoch": 0.8075, "grad_norm": 0.5642675757408142, "learning_rate": 1.4616666666666668e-05, "loss": 0.0361, "step": 66215 }, { "epoch": 0.8075609756097561, "grad_norm": 0.289528489112854, "learning_rate": 1.4616260162601628e-05, "loss": 0.0325, "step": 66220 }, { "epoch": 0.8076219512195122, "grad_norm": 0.7485630512237549, "learning_rate": 1.4615853658536588e-05, "loss": 0.0376, "step": 66225 }, { "epoch": 0.8076829268292683, "grad_norm": 0.48278915882110596, "learning_rate": 1.4615447154471545e-05, "loss": 0.0549, "step": 66230 }, { "epoch": 0.8077439024390244, "grad_norm": 0.6238645315170288, "learning_rate": 1.4615040650406504e-05, "loss": 0.0665, "step": 66235 }, { "epoch": 0.8078048780487805, "grad_norm": 0.833349883556366, "learning_rate": 1.4614634146341464e-05, "loss": 0.0438, "step": 66240 }, { "epoch": 0.8078658536585366, "grad_norm": 0.8424351215362549, "learning_rate": 1.4614227642276424e-05, "loss": 0.0858, "step": 66245 }, { "epoch": 0.8079268292682927, "grad_norm": 0.526323676109314, "learning_rate": 1.4613821138211384e-05, "loss": 0.0281, "step": 66250 }, { "epoch": 0.8079878048780488, "grad_norm": 0.8595454096794128, "learning_rate": 1.4613414634146344e-05, "loss": 0.0358, "step": 66255 }, { "epoch": 0.8080487804878049, "grad_norm": 0.6186687350273132, "learning_rate": 1.46130081300813e-05, "loss": 0.038, "step": 66260 }, { "epoch": 0.808109756097561, "grad_norm": 0.281055212020874, "learning_rate": 1.461260162601626e-05, "loss": 0.0212, "step": 66265 }, { "epoch": 0.8081707317073171, "grad_norm": 0.9782273769378662, "learning_rate": 1.461219512195122e-05, "loss": 0.0388, "step": 66270 }, { "epoch": 0.8082317073170732, "grad_norm": 0.7644299864768982, "learning_rate": 1.461178861788618e-05, "loss": 0.0365, "step": 66275 }, { "epoch": 0.8082926829268293, "grad_norm": 0.3912017047405243, "learning_rate": 1.461138211382114e-05, "loss": 0.0347, "step": 66280 }, { "epoch": 0.8083536585365854, "grad_norm": 0.26961594820022583, "learning_rate": 1.46109756097561e-05, "loss": 0.0547, "step": 66285 }, { "epoch": 0.8084146341463415, "grad_norm": 0.5033563375473022, "learning_rate": 1.4610569105691057e-05, "loss": 0.0452, "step": 66290 }, { "epoch": 0.8084756097560976, "grad_norm": 0.8197075724601746, "learning_rate": 1.4610162601626017e-05, "loss": 0.0522, "step": 66295 }, { "epoch": 0.8085365853658537, "grad_norm": 1.0000064373016357, "learning_rate": 1.4609756097560977e-05, "loss": 0.0499, "step": 66300 }, { "epoch": 0.8085975609756098, "grad_norm": 0.7444723844528198, "learning_rate": 1.4609349593495937e-05, "loss": 0.0406, "step": 66305 }, { "epoch": 0.8086585365853659, "grad_norm": 0.6729173064231873, "learning_rate": 1.4608943089430897e-05, "loss": 0.0409, "step": 66310 }, { "epoch": 0.808719512195122, "grad_norm": 0.28573715686798096, "learning_rate": 1.4608536585365856e-05, "loss": 0.0386, "step": 66315 }, { "epoch": 0.8087804878048781, "grad_norm": 0.7477862238883972, "learning_rate": 1.4608130081300813e-05, "loss": 0.0615, "step": 66320 }, { "epoch": 0.8088414634146341, "grad_norm": 0.639771044254303, "learning_rate": 1.4607723577235773e-05, "loss": 0.0343, "step": 66325 }, { "epoch": 0.8089024390243903, "grad_norm": 0.3381498157978058, "learning_rate": 1.4607317073170733e-05, "loss": 0.022, "step": 66330 }, { "epoch": 0.8089634146341463, "grad_norm": 0.4133051037788391, "learning_rate": 1.4606910569105692e-05, "loss": 0.059, "step": 66335 }, { "epoch": 0.8090243902439025, "grad_norm": 0.44286370277404785, "learning_rate": 1.4606504065040652e-05, "loss": 0.0342, "step": 66340 }, { "epoch": 0.8090853658536585, "grad_norm": 0.7046345472335815, "learning_rate": 1.4606097560975612e-05, "loss": 0.0777, "step": 66345 }, { "epoch": 0.8091463414634147, "grad_norm": 0.5356307625770569, "learning_rate": 1.4605691056910568e-05, "loss": 0.0324, "step": 66350 }, { "epoch": 0.8092073170731707, "grad_norm": 0.38517773151397705, "learning_rate": 1.4605284552845528e-05, "loss": 0.0406, "step": 66355 }, { "epoch": 0.8092682926829269, "grad_norm": 0.8789002895355225, "learning_rate": 1.4604878048780488e-05, "loss": 0.0532, "step": 66360 }, { "epoch": 0.8093292682926829, "grad_norm": 0.4960280954837799, "learning_rate": 1.4604471544715448e-05, "loss": 0.0459, "step": 66365 }, { "epoch": 0.8093902439024391, "grad_norm": 0.7875783443450928, "learning_rate": 1.4604065040650408e-05, "loss": 0.0454, "step": 66370 }, { "epoch": 0.8094512195121951, "grad_norm": 0.6837928295135498, "learning_rate": 1.4603658536585368e-05, "loss": 0.0814, "step": 66375 }, { "epoch": 0.8095121951219513, "grad_norm": 0.7451743483543396, "learning_rate": 1.4603252032520326e-05, "loss": 0.0303, "step": 66380 }, { "epoch": 0.8095731707317073, "grad_norm": 0.8205732107162476, "learning_rate": 1.4602845528455285e-05, "loss": 0.033, "step": 66385 }, { "epoch": 0.8096341463414635, "grad_norm": 0.8259363770484924, "learning_rate": 1.4602439024390245e-05, "loss": 0.0469, "step": 66390 }, { "epoch": 0.8096951219512195, "grad_norm": 0.5665404200553894, "learning_rate": 1.4602032520325205e-05, "loss": 0.0332, "step": 66395 }, { "epoch": 0.8097560975609757, "grad_norm": 0.163301020860672, "learning_rate": 1.4601626016260165e-05, "loss": 0.056, "step": 66400 }, { "epoch": 0.8098170731707317, "grad_norm": 0.7839200496673584, "learning_rate": 1.4601219512195125e-05, "loss": 0.0585, "step": 66405 }, { "epoch": 0.8098780487804879, "grad_norm": 0.591420590877533, "learning_rate": 1.4600813008130081e-05, "loss": 0.0414, "step": 66410 }, { "epoch": 0.8099390243902439, "grad_norm": 1.565302848815918, "learning_rate": 1.4600406504065041e-05, "loss": 0.0473, "step": 66415 }, { "epoch": 0.81, "grad_norm": 0.6761687994003296, "learning_rate": 1.46e-05, "loss": 0.0567, "step": 66420 }, { "epoch": 0.8100609756097561, "grad_norm": 0.9310659766197205, "learning_rate": 1.459959349593496e-05, "loss": 0.0319, "step": 66425 }, { "epoch": 0.8101219512195122, "grad_norm": 1.7371331453323364, "learning_rate": 1.459918699186992e-05, "loss": 0.0525, "step": 66430 }, { "epoch": 0.8101829268292683, "grad_norm": 0.656531810760498, "learning_rate": 1.459878048780488e-05, "loss": 0.0636, "step": 66435 }, { "epoch": 0.8102439024390244, "grad_norm": 0.4339110851287842, "learning_rate": 1.4598373983739837e-05, "loss": 0.022, "step": 66440 }, { "epoch": 0.8103048780487805, "grad_norm": 0.6485439538955688, "learning_rate": 1.4597967479674797e-05, "loss": 0.0337, "step": 66445 }, { "epoch": 0.8103658536585366, "grad_norm": 0.6168124675750732, "learning_rate": 1.4597560975609756e-05, "loss": 0.0694, "step": 66450 }, { "epoch": 0.8104268292682927, "grad_norm": 1.953871488571167, "learning_rate": 1.4597154471544716e-05, "loss": 0.0506, "step": 66455 }, { "epoch": 0.8104878048780488, "grad_norm": 0.4697958827018738, "learning_rate": 1.4596747967479676e-05, "loss": 0.0345, "step": 66460 }, { "epoch": 0.8105487804878049, "grad_norm": 0.3344472348690033, "learning_rate": 1.4596341463414636e-05, "loss": 0.0479, "step": 66465 }, { "epoch": 0.810609756097561, "grad_norm": 0.47309941053390503, "learning_rate": 1.4595934959349594e-05, "loss": 0.0598, "step": 66470 }, { "epoch": 0.8106707317073171, "grad_norm": 0.849938690662384, "learning_rate": 1.4595528455284554e-05, "loss": 0.0471, "step": 66475 }, { "epoch": 0.8107317073170732, "grad_norm": 0.6297580599784851, "learning_rate": 1.4595121951219514e-05, "loss": 0.055, "step": 66480 }, { "epoch": 0.8107926829268293, "grad_norm": 0.9668779969215393, "learning_rate": 1.4594715447154473e-05, "loss": 0.0415, "step": 66485 }, { "epoch": 0.8108536585365854, "grad_norm": 0.5298168659210205, "learning_rate": 1.4594308943089433e-05, "loss": 0.053, "step": 66490 }, { "epoch": 0.8109146341463415, "grad_norm": 0.9660903215408325, "learning_rate": 1.4593902439024391e-05, "loss": 0.0598, "step": 66495 }, { "epoch": 0.8109756097560976, "grad_norm": 0.5958839654922485, "learning_rate": 1.459349593495935e-05, "loss": 0.0538, "step": 66500 }, { "epoch": 0.8110365853658537, "grad_norm": 0.42675310373306274, "learning_rate": 1.459308943089431e-05, "loss": 0.0465, "step": 66505 }, { "epoch": 0.8110975609756098, "grad_norm": 0.6282041668891907, "learning_rate": 1.4592682926829269e-05, "loss": 0.0357, "step": 66510 }, { "epoch": 0.8111585365853659, "grad_norm": 0.7849520444869995, "learning_rate": 1.4592276422764229e-05, "loss": 0.0493, "step": 66515 }, { "epoch": 0.811219512195122, "grad_norm": 0.6669273376464844, "learning_rate": 1.4591869918699189e-05, "loss": 0.0462, "step": 66520 }, { "epoch": 0.811280487804878, "grad_norm": 0.7334781885147095, "learning_rate": 1.4591463414634149e-05, "loss": 0.0461, "step": 66525 }, { "epoch": 0.8113414634146342, "grad_norm": 0.8932009339332581, "learning_rate": 1.4591056910569105e-05, "loss": 0.0442, "step": 66530 }, { "epoch": 0.8114024390243902, "grad_norm": 0.6605110764503479, "learning_rate": 1.4590650406504065e-05, "loss": 0.0194, "step": 66535 }, { "epoch": 0.8114634146341464, "grad_norm": 1.0563198328018188, "learning_rate": 1.4590243902439025e-05, "loss": 0.068, "step": 66540 }, { "epoch": 0.8115243902439024, "grad_norm": 0.7396688461303711, "learning_rate": 1.4589837398373985e-05, "loss": 0.0449, "step": 66545 }, { "epoch": 0.8115853658536586, "grad_norm": 0.5983324646949768, "learning_rate": 1.4589430894308944e-05, "loss": 0.0462, "step": 66550 }, { "epoch": 0.8116463414634146, "grad_norm": 0.39725643396377563, "learning_rate": 1.4589024390243904e-05, "loss": 0.0454, "step": 66555 }, { "epoch": 0.8117073170731708, "grad_norm": 0.22511732578277588, "learning_rate": 1.4588617886178862e-05, "loss": 0.0267, "step": 66560 }, { "epoch": 0.8117682926829268, "grad_norm": 0.690859317779541, "learning_rate": 1.4588211382113822e-05, "loss": 0.0372, "step": 66565 }, { "epoch": 0.811829268292683, "grad_norm": 0.5459917783737183, "learning_rate": 1.4587804878048782e-05, "loss": 0.0445, "step": 66570 }, { "epoch": 0.811890243902439, "grad_norm": 0.3156536817550659, "learning_rate": 1.4587398373983742e-05, "loss": 0.0558, "step": 66575 }, { "epoch": 0.8119512195121952, "grad_norm": 0.4738140404224396, "learning_rate": 1.4586991869918702e-05, "loss": 0.0494, "step": 66580 }, { "epoch": 0.8120121951219512, "grad_norm": 0.4415194094181061, "learning_rate": 1.458658536585366e-05, "loss": 0.0569, "step": 66585 }, { "epoch": 0.8120731707317074, "grad_norm": 0.6679831147193909, "learning_rate": 1.4586178861788618e-05, "loss": 0.0369, "step": 66590 }, { "epoch": 0.8121341463414634, "grad_norm": 0.5710659027099609, "learning_rate": 1.4585772357723578e-05, "loss": 0.0377, "step": 66595 }, { "epoch": 0.8121951219512196, "grad_norm": 0.36972689628601074, "learning_rate": 1.4585365853658537e-05, "loss": 0.0521, "step": 66600 }, { "epoch": 0.8122560975609756, "grad_norm": 0.49283555150032043, "learning_rate": 1.4584959349593497e-05, "loss": 0.0342, "step": 66605 }, { "epoch": 0.8123170731707318, "grad_norm": 0.8419270515441895, "learning_rate": 1.4584552845528457e-05, "loss": 0.0374, "step": 66610 }, { "epoch": 0.8123780487804878, "grad_norm": 0.6724631190299988, "learning_rate": 1.4584146341463417e-05, "loss": 0.0843, "step": 66615 }, { "epoch": 0.812439024390244, "grad_norm": 0.6795777678489685, "learning_rate": 1.4583739837398373e-05, "loss": 0.0292, "step": 66620 }, { "epoch": 0.8125, "grad_norm": 0.40474918484687805, "learning_rate": 1.4583333333333333e-05, "loss": 0.0287, "step": 66625 }, { "epoch": 0.812560975609756, "grad_norm": 0.316886305809021, "learning_rate": 1.4582926829268293e-05, "loss": 0.0501, "step": 66630 }, { "epoch": 0.8126219512195122, "grad_norm": 0.41602760553359985, "learning_rate": 1.4582520325203253e-05, "loss": 0.0412, "step": 66635 }, { "epoch": 0.8126829268292682, "grad_norm": 0.5117303133010864, "learning_rate": 1.4582113821138213e-05, "loss": 0.0664, "step": 66640 }, { "epoch": 0.8127439024390244, "grad_norm": 0.46860358119010925, "learning_rate": 1.4581707317073172e-05, "loss": 0.0383, "step": 66645 }, { "epoch": 0.8128048780487804, "grad_norm": 0.5818555355072021, "learning_rate": 1.4581300813008132e-05, "loss": 0.0456, "step": 66650 }, { "epoch": 0.8128658536585366, "grad_norm": 0.44634580612182617, "learning_rate": 1.458089430894309e-05, "loss": 0.049, "step": 66655 }, { "epoch": 0.8129268292682926, "grad_norm": 0.4427603781223297, "learning_rate": 1.458048780487805e-05, "loss": 0.0438, "step": 66660 }, { "epoch": 0.8129878048780488, "grad_norm": 0.6574809551239014, "learning_rate": 1.458008130081301e-05, "loss": 0.0455, "step": 66665 }, { "epoch": 0.8130487804878048, "grad_norm": 0.9247046709060669, "learning_rate": 1.4579674796747968e-05, "loss": 0.0643, "step": 66670 }, { "epoch": 0.813109756097561, "grad_norm": 0.38034942746162415, "learning_rate": 1.4579268292682928e-05, "loss": 0.0336, "step": 66675 }, { "epoch": 0.813170731707317, "grad_norm": 0.58064866065979, "learning_rate": 1.4578861788617888e-05, "loss": 0.0578, "step": 66680 }, { "epoch": 0.8132317073170732, "grad_norm": 0.5297273993492126, "learning_rate": 1.4578455284552846e-05, "loss": 0.0339, "step": 66685 }, { "epoch": 0.8132926829268292, "grad_norm": 0.33205536007881165, "learning_rate": 1.4578048780487806e-05, "loss": 0.0417, "step": 66690 }, { "epoch": 0.8133536585365854, "grad_norm": 1.2062093019485474, "learning_rate": 1.4577642276422766e-05, "loss": 0.0528, "step": 66695 }, { "epoch": 0.8134146341463414, "grad_norm": 1.755556344985962, "learning_rate": 1.4577235772357725e-05, "loss": 0.0637, "step": 66700 }, { "epoch": 0.8134756097560976, "grad_norm": 0.6588472723960876, "learning_rate": 1.4576829268292685e-05, "loss": 0.062, "step": 66705 }, { "epoch": 0.8135365853658536, "grad_norm": 1.1389284133911133, "learning_rate": 1.4576422764227645e-05, "loss": 0.0543, "step": 66710 }, { "epoch": 0.8135975609756098, "grad_norm": 0.7105712890625, "learning_rate": 1.4576016260162602e-05, "loss": 0.0693, "step": 66715 }, { "epoch": 0.8136585365853658, "grad_norm": 0.47798237204551697, "learning_rate": 1.4575609756097561e-05, "loss": 0.0552, "step": 66720 }, { "epoch": 0.813719512195122, "grad_norm": 0.6647099852561951, "learning_rate": 1.4575203252032521e-05, "loss": 0.0403, "step": 66725 }, { "epoch": 0.813780487804878, "grad_norm": 1.9598572254180908, "learning_rate": 1.4574796747967481e-05, "loss": 0.0463, "step": 66730 }, { "epoch": 0.8138414634146341, "grad_norm": 0.3973994553089142, "learning_rate": 1.457439024390244e-05, "loss": 0.0334, "step": 66735 }, { "epoch": 0.8139024390243902, "grad_norm": 0.9845095872879028, "learning_rate": 1.45739837398374e-05, "loss": 0.0336, "step": 66740 }, { "epoch": 0.8139634146341463, "grad_norm": 1.5572583675384521, "learning_rate": 1.4573577235772359e-05, "loss": 0.0528, "step": 66745 }, { "epoch": 0.8140243902439024, "grad_norm": 1.1596283912658691, "learning_rate": 1.4573170731707319e-05, "loss": 0.0406, "step": 66750 }, { "epoch": 0.8140853658536585, "grad_norm": 0.8977047204971313, "learning_rate": 1.4572764227642278e-05, "loss": 0.0742, "step": 66755 }, { "epoch": 0.8141463414634146, "grad_norm": 0.6850315928459167, "learning_rate": 1.4572357723577237e-05, "loss": 0.0527, "step": 66760 }, { "epoch": 0.8142073170731707, "grad_norm": 0.4961150288581848, "learning_rate": 1.4571951219512196e-05, "loss": 0.0492, "step": 66765 }, { "epoch": 0.8142682926829268, "grad_norm": 1.7663511037826538, "learning_rate": 1.4571544715447156e-05, "loss": 0.0511, "step": 66770 }, { "epoch": 0.8143292682926829, "grad_norm": 1.6700658798217773, "learning_rate": 1.4571138211382114e-05, "loss": 0.0312, "step": 66775 }, { "epoch": 0.814390243902439, "grad_norm": 0.47208577394485474, "learning_rate": 1.4570731707317074e-05, "loss": 0.0769, "step": 66780 }, { "epoch": 0.8144512195121951, "grad_norm": 0.5672895312309265, "learning_rate": 1.4570325203252034e-05, "loss": 0.0552, "step": 66785 }, { "epoch": 0.8145121951219512, "grad_norm": 1.1313095092773438, "learning_rate": 1.4569918699186994e-05, "loss": 0.0403, "step": 66790 }, { "epoch": 0.8145731707317073, "grad_norm": 0.590449869632721, "learning_rate": 1.4569512195121954e-05, "loss": 0.0652, "step": 66795 }, { "epoch": 0.8146341463414634, "grad_norm": 0.6426428556442261, "learning_rate": 1.4569105691056913e-05, "loss": 0.0411, "step": 66800 }, { "epoch": 0.8146951219512195, "grad_norm": 0.511817991733551, "learning_rate": 1.456869918699187e-05, "loss": 0.035, "step": 66805 }, { "epoch": 0.8147560975609756, "grad_norm": 0.5316745638847351, "learning_rate": 1.456829268292683e-05, "loss": 0.0437, "step": 66810 }, { "epoch": 0.8148170731707317, "grad_norm": 0.569727897644043, "learning_rate": 1.456788617886179e-05, "loss": 0.0634, "step": 66815 }, { "epoch": 0.8148780487804878, "grad_norm": 0.6512453556060791, "learning_rate": 1.456747967479675e-05, "loss": 0.066, "step": 66820 }, { "epoch": 0.8149390243902439, "grad_norm": 0.6319032311439514, "learning_rate": 1.4567073170731709e-05, "loss": 0.0487, "step": 66825 }, { "epoch": 0.815, "grad_norm": 0.33486148715019226, "learning_rate": 1.4566666666666669e-05, "loss": 0.0481, "step": 66830 }, { "epoch": 0.8150609756097561, "grad_norm": 0.499176561832428, "learning_rate": 1.4566260162601627e-05, "loss": 0.0367, "step": 66835 }, { "epoch": 0.8151219512195121, "grad_norm": 0.35410234332084656, "learning_rate": 1.4565853658536587e-05, "loss": 0.0322, "step": 66840 }, { "epoch": 0.8151829268292683, "grad_norm": 0.5889647006988525, "learning_rate": 1.4565447154471547e-05, "loss": 0.0294, "step": 66845 }, { "epoch": 0.8152439024390243, "grad_norm": 1.1891909837722778, "learning_rate": 1.4565040650406505e-05, "loss": 0.0597, "step": 66850 }, { "epoch": 0.8153048780487805, "grad_norm": 0.3380904197692871, "learning_rate": 1.4564634146341465e-05, "loss": 0.0425, "step": 66855 }, { "epoch": 0.8153658536585365, "grad_norm": 0.7798374891281128, "learning_rate": 1.4564227642276424e-05, "loss": 0.0367, "step": 66860 }, { "epoch": 0.8154268292682927, "grad_norm": 2.2586398124694824, "learning_rate": 1.4563821138211383e-05, "loss": 0.0556, "step": 66865 }, { "epoch": 0.8154878048780487, "grad_norm": 1.2080401182174683, "learning_rate": 1.4563414634146342e-05, "loss": 0.0415, "step": 66870 }, { "epoch": 0.8155487804878049, "grad_norm": 0.5661988854408264, "learning_rate": 1.4563008130081302e-05, "loss": 0.0237, "step": 66875 }, { "epoch": 0.8156097560975609, "grad_norm": 1.1885610818862915, "learning_rate": 1.4562601626016262e-05, "loss": 0.0195, "step": 66880 }, { "epoch": 0.8156707317073171, "grad_norm": 0.3858417570590973, "learning_rate": 1.4562195121951222e-05, "loss": 0.047, "step": 66885 }, { "epoch": 0.8157317073170731, "grad_norm": 0.4473848342895508, "learning_rate": 1.4561788617886182e-05, "loss": 0.0551, "step": 66890 }, { "epoch": 0.8157926829268293, "grad_norm": 0.5143095254898071, "learning_rate": 1.4561382113821138e-05, "loss": 0.0469, "step": 66895 }, { "epoch": 0.8158536585365853, "grad_norm": 0.8574416637420654, "learning_rate": 1.4560975609756098e-05, "loss": 0.0485, "step": 66900 }, { "epoch": 0.8159146341463415, "grad_norm": 1.4406801462173462, "learning_rate": 1.4560569105691058e-05, "loss": 0.0458, "step": 66905 }, { "epoch": 0.8159756097560975, "grad_norm": 1.6739532947540283, "learning_rate": 1.4560162601626018e-05, "loss": 0.0514, "step": 66910 }, { "epoch": 0.8160365853658537, "grad_norm": 0.6748563647270203, "learning_rate": 1.4559756097560977e-05, "loss": 0.0406, "step": 66915 }, { "epoch": 0.8160975609756097, "grad_norm": 0.4292305111885071, "learning_rate": 1.4559349593495937e-05, "loss": 0.0254, "step": 66920 }, { "epoch": 0.8161585365853659, "grad_norm": 1.103473424911499, "learning_rate": 1.4558943089430895e-05, "loss": 0.0426, "step": 66925 }, { "epoch": 0.8162195121951219, "grad_norm": 0.5752432346343994, "learning_rate": 1.4558536585365855e-05, "loss": 0.0225, "step": 66930 }, { "epoch": 0.816280487804878, "grad_norm": 0.5586874485015869, "learning_rate": 1.4558130081300813e-05, "loss": 0.0585, "step": 66935 }, { "epoch": 0.8163414634146341, "grad_norm": 0.6633952856063843, "learning_rate": 1.4557723577235773e-05, "loss": 0.0518, "step": 66940 }, { "epoch": 0.8164024390243902, "grad_norm": 0.4055575430393219, "learning_rate": 1.4557317073170733e-05, "loss": 0.0446, "step": 66945 }, { "epoch": 0.8164634146341463, "grad_norm": 0.3956933319568634, "learning_rate": 1.4556910569105693e-05, "loss": 0.0327, "step": 66950 }, { "epoch": 0.8165243902439024, "grad_norm": 0.5247949361801147, "learning_rate": 1.4556504065040651e-05, "loss": 0.0322, "step": 66955 }, { "epoch": 0.8165853658536585, "grad_norm": 0.45670151710510254, "learning_rate": 1.455609756097561e-05, "loss": 0.0263, "step": 66960 }, { "epoch": 0.8166463414634146, "grad_norm": 0.4953528642654419, "learning_rate": 1.455569105691057e-05, "loss": 0.0322, "step": 66965 }, { "epoch": 0.8167073170731707, "grad_norm": 0.5942116379737854, "learning_rate": 1.455528455284553e-05, "loss": 0.0686, "step": 66970 }, { "epoch": 0.8167682926829268, "grad_norm": 0.5094163417816162, "learning_rate": 1.455487804878049e-05, "loss": 0.0361, "step": 66975 }, { "epoch": 0.8168292682926829, "grad_norm": 1.1463122367858887, "learning_rate": 1.455447154471545e-05, "loss": 0.036, "step": 66980 }, { "epoch": 0.816890243902439, "grad_norm": 0.272541880607605, "learning_rate": 1.4554065040650406e-05, "loss": 0.0566, "step": 66985 }, { "epoch": 0.8169512195121951, "grad_norm": 0.8313632011413574, "learning_rate": 1.4553658536585366e-05, "loss": 0.03, "step": 66990 }, { "epoch": 0.8170121951219512, "grad_norm": 0.4483087658882141, "learning_rate": 1.4553252032520326e-05, "loss": 0.082, "step": 66995 }, { "epoch": 0.8170731707317073, "grad_norm": 0.5304388403892517, "learning_rate": 1.4552845528455286e-05, "loss": 0.0189, "step": 67000 }, { "epoch": 0.8171341463414634, "grad_norm": 0.5037078857421875, "learning_rate": 1.4552439024390246e-05, "loss": 0.063, "step": 67005 }, { "epoch": 0.8171951219512195, "grad_norm": 0.6103984117507935, "learning_rate": 1.4552032520325206e-05, "loss": 0.0419, "step": 67010 }, { "epoch": 0.8172560975609756, "grad_norm": 1.2149454355239868, "learning_rate": 1.4551626016260164e-05, "loss": 0.0304, "step": 67015 }, { "epoch": 0.8173170731707317, "grad_norm": 0.814245343208313, "learning_rate": 1.4551219512195124e-05, "loss": 0.0489, "step": 67020 }, { "epoch": 0.8173780487804878, "grad_norm": 0.6603949666023254, "learning_rate": 1.4550813008130082e-05, "loss": 0.0394, "step": 67025 }, { "epoch": 0.8174390243902439, "grad_norm": 0.33538636565208435, "learning_rate": 1.4550406504065041e-05, "loss": 0.0361, "step": 67030 }, { "epoch": 0.8175, "grad_norm": 0.6004590392112732, "learning_rate": 1.4550000000000001e-05, "loss": 0.0571, "step": 67035 }, { "epoch": 0.817560975609756, "grad_norm": 0.4582892954349518, "learning_rate": 1.4549593495934961e-05, "loss": 0.0367, "step": 67040 }, { "epoch": 0.8176219512195122, "grad_norm": 1.1394609212875366, "learning_rate": 1.454918699186992e-05, "loss": 0.0476, "step": 67045 }, { "epoch": 0.8176829268292682, "grad_norm": 0.3759736716747284, "learning_rate": 1.4548780487804879e-05, "loss": 0.0624, "step": 67050 }, { "epoch": 0.8177439024390244, "grad_norm": 0.6705651879310608, "learning_rate": 1.4548373983739839e-05, "loss": 0.0407, "step": 67055 }, { "epoch": 0.8178048780487804, "grad_norm": 0.6469178795814514, "learning_rate": 1.4547967479674799e-05, "loss": 0.0397, "step": 67060 }, { "epoch": 0.8178658536585366, "grad_norm": 0.31945309042930603, "learning_rate": 1.4547560975609759e-05, "loss": 0.0248, "step": 67065 }, { "epoch": 0.8179268292682926, "grad_norm": 0.34800708293914795, "learning_rate": 1.4547154471544718e-05, "loss": 0.0502, "step": 67070 }, { "epoch": 0.8179878048780488, "grad_norm": 0.2866726219654083, "learning_rate": 1.4546747967479675e-05, "loss": 0.0457, "step": 67075 }, { "epoch": 0.8180487804878048, "grad_norm": 0.32326316833496094, "learning_rate": 1.4546341463414635e-05, "loss": 0.0217, "step": 67080 }, { "epoch": 0.818109756097561, "grad_norm": 0.5001170635223389, "learning_rate": 1.4545934959349594e-05, "loss": 0.0443, "step": 67085 }, { "epoch": 0.818170731707317, "grad_norm": 0.4515351951122284, "learning_rate": 1.4545528455284554e-05, "loss": 0.0355, "step": 67090 }, { "epoch": 0.8182317073170732, "grad_norm": 0.4836730360984802, "learning_rate": 1.4545121951219514e-05, "loss": 0.0468, "step": 67095 }, { "epoch": 0.8182926829268292, "grad_norm": 0.5083650350570679, "learning_rate": 1.4544715447154474e-05, "loss": 0.0587, "step": 67100 }, { "epoch": 0.8183536585365854, "grad_norm": 0.5180169343948364, "learning_rate": 1.4544308943089432e-05, "loss": 0.0288, "step": 67105 }, { "epoch": 0.8184146341463414, "grad_norm": 0.5149913430213928, "learning_rate": 1.4543902439024392e-05, "loss": 0.0353, "step": 67110 }, { "epoch": 0.8184756097560976, "grad_norm": 0.49806445837020874, "learning_rate": 1.454349593495935e-05, "loss": 0.0841, "step": 67115 }, { "epoch": 0.8185365853658536, "grad_norm": 0.5141265392303467, "learning_rate": 1.454308943089431e-05, "loss": 0.0731, "step": 67120 }, { "epoch": 0.8185975609756098, "grad_norm": 0.5369608402252197, "learning_rate": 1.454268292682927e-05, "loss": 0.0473, "step": 67125 }, { "epoch": 0.8186585365853658, "grad_norm": 0.8144778609275818, "learning_rate": 1.454227642276423e-05, "loss": 0.0588, "step": 67130 }, { "epoch": 0.818719512195122, "grad_norm": 0.7145209908485413, "learning_rate": 1.4541869918699188e-05, "loss": 0.0514, "step": 67135 }, { "epoch": 0.818780487804878, "grad_norm": 0.38784870505332947, "learning_rate": 1.4541463414634147e-05, "loss": 0.0182, "step": 67140 }, { "epoch": 0.8188414634146342, "grad_norm": 0.6416090130805969, "learning_rate": 1.4541056910569107e-05, "loss": 0.034, "step": 67145 }, { "epoch": 0.8189024390243902, "grad_norm": 0.5675408840179443, "learning_rate": 1.4540650406504067e-05, "loss": 0.0438, "step": 67150 }, { "epoch": 0.8189634146341463, "grad_norm": 0.3287261128425598, "learning_rate": 1.4540243902439027e-05, "loss": 0.0312, "step": 67155 }, { "epoch": 0.8190243902439024, "grad_norm": 0.7620291709899902, "learning_rate": 1.4539837398373987e-05, "loss": 0.0607, "step": 67160 }, { "epoch": 0.8190853658536585, "grad_norm": 0.6855103969573975, "learning_rate": 1.4539430894308943e-05, "loss": 0.0358, "step": 67165 }, { "epoch": 0.8191463414634146, "grad_norm": 0.5154224634170532, "learning_rate": 1.4539024390243903e-05, "loss": 0.0292, "step": 67170 }, { "epoch": 0.8192073170731707, "grad_norm": 0.6846491098403931, "learning_rate": 1.4538617886178863e-05, "loss": 0.0524, "step": 67175 }, { "epoch": 0.8192682926829268, "grad_norm": 0.5927026271820068, "learning_rate": 1.4538211382113823e-05, "loss": 0.0231, "step": 67180 }, { "epoch": 0.8193292682926829, "grad_norm": 0.1922018676996231, "learning_rate": 1.4537804878048782e-05, "loss": 0.0413, "step": 67185 }, { "epoch": 0.819390243902439, "grad_norm": 0.568954348564148, "learning_rate": 1.4537398373983742e-05, "loss": 0.0602, "step": 67190 }, { "epoch": 0.8194512195121951, "grad_norm": 0.29338720440864563, "learning_rate": 1.45369918699187e-05, "loss": 0.0429, "step": 67195 }, { "epoch": 0.8195121951219512, "grad_norm": 0.598950982093811, "learning_rate": 1.4536585365853658e-05, "loss": 0.0492, "step": 67200 }, { "epoch": 0.8195731707317073, "grad_norm": 0.6424103379249573, "learning_rate": 1.4536178861788618e-05, "loss": 0.0281, "step": 67205 }, { "epoch": 0.8196341463414634, "grad_norm": 0.6958038210868835, "learning_rate": 1.4535772357723578e-05, "loss": 0.0388, "step": 67210 }, { "epoch": 0.8196951219512195, "grad_norm": 0.6383441090583801, "learning_rate": 1.4535365853658538e-05, "loss": 0.0485, "step": 67215 }, { "epoch": 0.8197560975609756, "grad_norm": 0.7114503979682922, "learning_rate": 1.4534959349593498e-05, "loss": 0.0463, "step": 67220 }, { "epoch": 0.8198170731707317, "grad_norm": 0.6856278777122498, "learning_rate": 1.4534552845528456e-05, "loss": 0.0405, "step": 67225 }, { "epoch": 0.8198780487804878, "grad_norm": 1.2104618549346924, "learning_rate": 1.4534146341463416e-05, "loss": 0.0453, "step": 67230 }, { "epoch": 0.8199390243902439, "grad_norm": 3.263195276260376, "learning_rate": 1.4533739837398376e-05, "loss": 0.0588, "step": 67235 }, { "epoch": 0.82, "grad_norm": 0.6495903730392456, "learning_rate": 1.4533333333333335e-05, "loss": 0.0322, "step": 67240 }, { "epoch": 0.8200609756097561, "grad_norm": 0.39621004462242126, "learning_rate": 1.4532926829268295e-05, "loss": 0.0595, "step": 67245 }, { "epoch": 0.8201219512195121, "grad_norm": 0.6880597472190857, "learning_rate": 1.4532520325203255e-05, "loss": 0.07, "step": 67250 }, { "epoch": 0.8201829268292683, "grad_norm": 0.6402738690376282, "learning_rate": 1.4532113821138211e-05, "loss": 0.05, "step": 67255 }, { "epoch": 0.8202439024390243, "grad_norm": 1.0397701263427734, "learning_rate": 1.4531707317073171e-05, "loss": 0.0561, "step": 67260 }, { "epoch": 0.8203048780487805, "grad_norm": 0.28503769636154175, "learning_rate": 1.4531300813008131e-05, "loss": 0.04, "step": 67265 }, { "epoch": 0.8203658536585365, "grad_norm": 0.4540914297103882, "learning_rate": 1.4530894308943091e-05, "loss": 0.028, "step": 67270 }, { "epoch": 0.8204268292682927, "grad_norm": 0.38599616289138794, "learning_rate": 1.453048780487805e-05, "loss": 0.0448, "step": 67275 }, { "epoch": 0.8204878048780487, "grad_norm": 0.5573568940162659, "learning_rate": 1.453008130081301e-05, "loss": 0.0428, "step": 67280 }, { "epoch": 0.8205487804878049, "grad_norm": 0.3803195655345917, "learning_rate": 1.4529674796747969e-05, "loss": 0.0872, "step": 67285 }, { "epoch": 0.8206097560975609, "grad_norm": 0.061632439494132996, "learning_rate": 1.4529268292682927e-05, "loss": 0.0561, "step": 67290 }, { "epoch": 0.8206707317073171, "grad_norm": 0.7163195610046387, "learning_rate": 1.4528861788617887e-05, "loss": 0.0543, "step": 67295 }, { "epoch": 0.8207317073170731, "grad_norm": 0.24439603090286255, "learning_rate": 1.4528455284552846e-05, "loss": 0.0327, "step": 67300 }, { "epoch": 0.8207926829268293, "grad_norm": 0.8267403244972229, "learning_rate": 1.4528048780487806e-05, "loss": 0.0471, "step": 67305 }, { "epoch": 0.8208536585365853, "grad_norm": 0.7791479229927063, "learning_rate": 1.4527642276422766e-05, "loss": 0.0611, "step": 67310 }, { "epoch": 0.8209146341463415, "grad_norm": 0.7973626255989075, "learning_rate": 1.4527235772357724e-05, "loss": 0.055, "step": 67315 }, { "epoch": 0.8209756097560975, "grad_norm": 0.4356311559677124, "learning_rate": 1.4526829268292684e-05, "loss": 0.0487, "step": 67320 }, { "epoch": 0.8210365853658537, "grad_norm": 0.370471715927124, "learning_rate": 1.4526422764227644e-05, "loss": 0.0304, "step": 67325 }, { "epoch": 0.8210975609756097, "grad_norm": 0.6931551694869995, "learning_rate": 1.4526016260162604e-05, "loss": 0.0626, "step": 67330 }, { "epoch": 0.8211585365853659, "grad_norm": 0.7977729439735413, "learning_rate": 1.4525609756097564e-05, "loss": 0.0428, "step": 67335 }, { "epoch": 0.8212195121951219, "grad_norm": 0.3795621693134308, "learning_rate": 1.4525203252032523e-05, "loss": 0.0387, "step": 67340 }, { "epoch": 0.8212804878048781, "grad_norm": 0.3424663543701172, "learning_rate": 1.452479674796748e-05, "loss": 0.065, "step": 67345 }, { "epoch": 0.8213414634146341, "grad_norm": 0.8174306154251099, "learning_rate": 1.452439024390244e-05, "loss": 0.0537, "step": 67350 }, { "epoch": 0.8214024390243903, "grad_norm": 0.5130813121795654, "learning_rate": 1.45239837398374e-05, "loss": 0.0447, "step": 67355 }, { "epoch": 0.8214634146341463, "grad_norm": 0.5668690204620361, "learning_rate": 1.452357723577236e-05, "loss": 0.072, "step": 67360 }, { "epoch": 0.8215243902439024, "grad_norm": 0.5687738656997681, "learning_rate": 1.4523170731707319e-05, "loss": 0.0707, "step": 67365 }, { "epoch": 0.8215853658536585, "grad_norm": 0.628495454788208, "learning_rate": 1.4522764227642279e-05, "loss": 0.0414, "step": 67370 }, { "epoch": 0.8216463414634146, "grad_norm": 0.33268603682518005, "learning_rate": 1.4522357723577237e-05, "loss": 0.0358, "step": 67375 }, { "epoch": 0.8217073170731707, "grad_norm": 0.48710328340530396, "learning_rate": 1.4521951219512195e-05, "loss": 0.0359, "step": 67380 }, { "epoch": 0.8217682926829268, "grad_norm": 0.8314007520675659, "learning_rate": 1.4521544715447155e-05, "loss": 0.0674, "step": 67385 }, { "epoch": 0.8218292682926829, "grad_norm": 0.7730041742324829, "learning_rate": 1.4521138211382115e-05, "loss": 0.0886, "step": 67390 }, { "epoch": 0.821890243902439, "grad_norm": 2.958289384841919, "learning_rate": 1.4520731707317075e-05, "loss": 0.0489, "step": 67395 }, { "epoch": 0.8219512195121951, "grad_norm": 0.4590059816837311, "learning_rate": 1.4520325203252034e-05, "loss": 0.0305, "step": 67400 }, { "epoch": 0.8220121951219512, "grad_norm": 0.4939560294151306, "learning_rate": 1.4519918699186993e-05, "loss": 0.0541, "step": 67405 }, { "epoch": 0.8220731707317073, "grad_norm": 0.4084422290325165, "learning_rate": 1.4519512195121952e-05, "loss": 0.0228, "step": 67410 }, { "epoch": 0.8221341463414634, "grad_norm": 1.0279933214187622, "learning_rate": 1.4519105691056912e-05, "loss": 0.0446, "step": 67415 }, { "epoch": 0.8221951219512195, "grad_norm": 0.7757772207260132, "learning_rate": 1.4518699186991872e-05, "loss": 0.0667, "step": 67420 }, { "epoch": 0.8222560975609756, "grad_norm": 0.6612668037414551, "learning_rate": 1.4518292682926832e-05, "loss": 0.0625, "step": 67425 }, { "epoch": 0.8223170731707317, "grad_norm": 0.7219733595848083, "learning_rate": 1.4517886178861792e-05, "loss": 0.0572, "step": 67430 }, { "epoch": 0.8223780487804878, "grad_norm": 0.5037736296653748, "learning_rate": 1.4517479674796748e-05, "loss": 0.0448, "step": 67435 }, { "epoch": 0.8224390243902439, "grad_norm": 0.5424696207046509, "learning_rate": 1.4517073170731708e-05, "loss": 0.0387, "step": 67440 }, { "epoch": 0.8225, "grad_norm": 0.6173847913742065, "learning_rate": 1.4516666666666668e-05, "loss": 0.048, "step": 67445 }, { "epoch": 0.822560975609756, "grad_norm": 0.5189070701599121, "learning_rate": 1.4516260162601628e-05, "loss": 0.0357, "step": 67450 }, { "epoch": 0.8226219512195122, "grad_norm": 0.20122474431991577, "learning_rate": 1.4515853658536587e-05, "loss": 0.0362, "step": 67455 }, { "epoch": 0.8226829268292682, "grad_norm": 0.3682582676410675, "learning_rate": 1.4515447154471547e-05, "loss": 0.0374, "step": 67460 }, { "epoch": 0.8227439024390244, "grad_norm": 0.39280810952186584, "learning_rate": 1.4515040650406504e-05, "loss": 0.0384, "step": 67465 }, { "epoch": 0.8228048780487804, "grad_norm": 0.35996881127357483, "learning_rate": 1.4514634146341463e-05, "loss": 0.0445, "step": 67470 }, { "epoch": 0.8228658536585366, "grad_norm": 0.582392156124115, "learning_rate": 1.4514227642276423e-05, "loss": 0.0405, "step": 67475 }, { "epoch": 0.8229268292682926, "grad_norm": 0.7949399352073669, "learning_rate": 1.4513821138211383e-05, "loss": 0.0594, "step": 67480 }, { "epoch": 0.8229878048780488, "grad_norm": 0.8536871671676636, "learning_rate": 1.4513414634146343e-05, "loss": 0.0416, "step": 67485 }, { "epoch": 0.8230487804878048, "grad_norm": 0.4720844626426697, "learning_rate": 1.4513008130081303e-05, "loss": 0.0475, "step": 67490 }, { "epoch": 0.823109756097561, "grad_norm": 0.4888164699077606, "learning_rate": 1.4512601626016261e-05, "loss": 0.0549, "step": 67495 }, { "epoch": 0.823170731707317, "grad_norm": 0.7477284669876099, "learning_rate": 1.451219512195122e-05, "loss": 0.0208, "step": 67500 }, { "epoch": 0.8232317073170732, "grad_norm": 0.7466558218002319, "learning_rate": 1.451178861788618e-05, "loss": 0.0868, "step": 67505 }, { "epoch": 0.8232926829268292, "grad_norm": 0.38953328132629395, "learning_rate": 1.451138211382114e-05, "loss": 0.054, "step": 67510 }, { "epoch": 0.8233536585365854, "grad_norm": 0.40057334303855896, "learning_rate": 1.45109756097561e-05, "loss": 0.0284, "step": 67515 }, { "epoch": 0.8234146341463414, "grad_norm": 0.4009574055671692, "learning_rate": 1.451056910569106e-05, "loss": 0.0287, "step": 67520 }, { "epoch": 0.8234756097560976, "grad_norm": 0.45172685384750366, "learning_rate": 1.4510162601626016e-05, "loss": 0.0238, "step": 67525 }, { "epoch": 0.8235365853658536, "grad_norm": 1.2000993490219116, "learning_rate": 1.4509756097560976e-05, "loss": 0.068, "step": 67530 }, { "epoch": 0.8235975609756098, "grad_norm": 0.5710775852203369, "learning_rate": 1.4509349593495936e-05, "loss": 0.0456, "step": 67535 }, { "epoch": 0.8236585365853658, "grad_norm": 0.5004799962043762, "learning_rate": 1.4508943089430896e-05, "loss": 0.0256, "step": 67540 }, { "epoch": 0.823719512195122, "grad_norm": 0.29721036553382874, "learning_rate": 1.4508536585365856e-05, "loss": 0.0241, "step": 67545 }, { "epoch": 0.823780487804878, "grad_norm": 0.6166170239448547, "learning_rate": 1.4508130081300816e-05, "loss": 0.0778, "step": 67550 }, { "epoch": 0.8238414634146342, "grad_norm": 0.8569822311401367, "learning_rate": 1.4507723577235772e-05, "loss": 0.0404, "step": 67555 }, { "epoch": 0.8239024390243902, "grad_norm": 0.749638020992279, "learning_rate": 1.4507317073170732e-05, "loss": 0.0564, "step": 67560 }, { "epoch": 0.8239634146341464, "grad_norm": 0.502711296081543, "learning_rate": 1.4506910569105692e-05, "loss": 0.046, "step": 67565 }, { "epoch": 0.8240243902439024, "grad_norm": 0.6119204759597778, "learning_rate": 1.4506504065040651e-05, "loss": 0.0442, "step": 67570 }, { "epoch": 0.8240853658536585, "grad_norm": 0.3694421947002411, "learning_rate": 1.4506097560975611e-05, "loss": 0.0548, "step": 67575 }, { "epoch": 0.8241463414634146, "grad_norm": 0.3973526358604431, "learning_rate": 1.4505691056910571e-05, "loss": 0.0201, "step": 67580 }, { "epoch": 0.8242073170731707, "grad_norm": 0.5089683532714844, "learning_rate": 1.450528455284553e-05, "loss": 0.0676, "step": 67585 }, { "epoch": 0.8242682926829268, "grad_norm": 0.6813347935676575, "learning_rate": 1.4504878048780489e-05, "loss": 0.036, "step": 67590 }, { "epoch": 0.8243292682926829, "grad_norm": 0.5443387031555176, "learning_rate": 1.4504471544715449e-05, "loss": 0.0334, "step": 67595 }, { "epoch": 0.824390243902439, "grad_norm": 0.42287540435791016, "learning_rate": 1.4504065040650409e-05, "loss": 0.0283, "step": 67600 }, { "epoch": 0.8244512195121951, "grad_norm": 0.8263571858406067, "learning_rate": 1.4503658536585368e-05, "loss": 0.0452, "step": 67605 }, { "epoch": 0.8245121951219512, "grad_norm": 0.5229496955871582, "learning_rate": 1.4503252032520327e-05, "loss": 0.0697, "step": 67610 }, { "epoch": 0.8245731707317073, "grad_norm": 0.18880708515644073, "learning_rate": 1.4502845528455285e-05, "loss": 0.0272, "step": 67615 }, { "epoch": 0.8246341463414634, "grad_norm": 0.5006006956100464, "learning_rate": 1.4502439024390245e-05, "loss": 0.0533, "step": 67620 }, { "epoch": 0.8246951219512195, "grad_norm": 0.3691574037075043, "learning_rate": 1.4502032520325204e-05, "loss": 0.0985, "step": 67625 }, { "epoch": 0.8247560975609756, "grad_norm": 0.38934287428855896, "learning_rate": 1.4501626016260164e-05, "loss": 0.0299, "step": 67630 }, { "epoch": 0.8248170731707317, "grad_norm": 0.7523852586746216, "learning_rate": 1.4501219512195124e-05, "loss": 0.0529, "step": 67635 }, { "epoch": 0.8248780487804878, "grad_norm": 2.5104775428771973, "learning_rate": 1.4500813008130084e-05, "loss": 0.0713, "step": 67640 }, { "epoch": 0.8249390243902439, "grad_norm": 0.29327893257141113, "learning_rate": 1.450040650406504e-05, "loss": 0.0167, "step": 67645 }, { "epoch": 0.825, "grad_norm": 0.5173285007476807, "learning_rate": 1.45e-05, "loss": 0.0338, "step": 67650 }, { "epoch": 0.8250609756097561, "grad_norm": 0.46614980697631836, "learning_rate": 1.449959349593496e-05, "loss": 0.0665, "step": 67655 }, { "epoch": 0.8251219512195122, "grad_norm": 0.42517781257629395, "learning_rate": 1.449918699186992e-05, "loss": 0.0386, "step": 67660 }, { "epoch": 0.8251829268292683, "grad_norm": 0.3174229860305786, "learning_rate": 1.449878048780488e-05, "loss": 0.0414, "step": 67665 }, { "epoch": 0.8252439024390243, "grad_norm": 0.46775689721107483, "learning_rate": 1.449837398373984e-05, "loss": 0.0451, "step": 67670 }, { "epoch": 0.8253048780487805, "grad_norm": 0.28505897521972656, "learning_rate": 1.4497967479674797e-05, "loss": 0.0362, "step": 67675 }, { "epoch": 0.8253658536585365, "grad_norm": 0.7743140459060669, "learning_rate": 1.4497560975609757e-05, "loss": 0.0656, "step": 67680 }, { "epoch": 0.8254268292682927, "grad_norm": 0.45882928371429443, "learning_rate": 1.4497154471544717e-05, "loss": 0.0444, "step": 67685 }, { "epoch": 0.8254878048780487, "grad_norm": 0.5537647008895874, "learning_rate": 1.4496747967479677e-05, "loss": 0.0446, "step": 67690 }, { "epoch": 0.8255487804878049, "grad_norm": 0.3600698709487915, "learning_rate": 1.4496341463414637e-05, "loss": 0.0811, "step": 67695 }, { "epoch": 0.8256097560975609, "grad_norm": 2.114668130874634, "learning_rate": 1.4495934959349595e-05, "loss": 0.0969, "step": 67700 }, { "epoch": 0.8256707317073171, "grad_norm": 0.3801169693470001, "learning_rate": 1.4495528455284553e-05, "loss": 0.0486, "step": 67705 }, { "epoch": 0.8257317073170731, "grad_norm": 0.6833316087722778, "learning_rate": 1.4495121951219513e-05, "loss": 0.0468, "step": 67710 }, { "epoch": 0.8257926829268293, "grad_norm": 0.5814684629440308, "learning_rate": 1.4494715447154473e-05, "loss": 0.0573, "step": 67715 }, { "epoch": 0.8258536585365853, "grad_norm": 0.8333043456077576, "learning_rate": 1.4494308943089433e-05, "loss": 0.0797, "step": 67720 }, { "epoch": 0.8259146341463415, "grad_norm": 0.35711756348609924, "learning_rate": 1.4493902439024392e-05, "loss": 0.0487, "step": 67725 }, { "epoch": 0.8259756097560975, "grad_norm": 0.6069551110267639, "learning_rate": 1.4493495934959352e-05, "loss": 0.0516, "step": 67730 }, { "epoch": 0.8260365853658537, "grad_norm": 0.6606289744377136, "learning_rate": 1.4493089430894309e-05, "loss": 0.0336, "step": 67735 }, { "epoch": 0.8260975609756097, "grad_norm": 0.5527471303939819, "learning_rate": 1.4492682926829268e-05, "loss": 0.0803, "step": 67740 }, { "epoch": 0.8261585365853659, "grad_norm": 1.5939395427703857, "learning_rate": 1.4492276422764228e-05, "loss": 0.0445, "step": 67745 }, { "epoch": 0.8262195121951219, "grad_norm": 0.31320416927337646, "learning_rate": 1.4491869918699188e-05, "loss": 0.0198, "step": 67750 }, { "epoch": 0.8262804878048781, "grad_norm": 0.8768658638000488, "learning_rate": 1.4491463414634148e-05, "loss": 0.0423, "step": 67755 }, { "epoch": 0.8263414634146341, "grad_norm": 0.4834521412849426, "learning_rate": 1.4491056910569108e-05, "loss": 0.0298, "step": 67760 }, { "epoch": 0.8264024390243903, "grad_norm": 0.5042605400085449, "learning_rate": 1.4490650406504066e-05, "loss": 0.0536, "step": 67765 }, { "epoch": 0.8264634146341463, "grad_norm": 0.7701900601387024, "learning_rate": 1.4490243902439026e-05, "loss": 0.0493, "step": 67770 }, { "epoch": 0.8265243902439025, "grad_norm": 0.6370798945426941, "learning_rate": 1.4489837398373985e-05, "loss": 0.0862, "step": 67775 }, { "epoch": 0.8265853658536585, "grad_norm": 0.49508723616600037, "learning_rate": 1.4489430894308945e-05, "loss": 0.0478, "step": 67780 }, { "epoch": 0.8266463414634146, "grad_norm": 0.46744489669799805, "learning_rate": 1.4489024390243905e-05, "loss": 0.0309, "step": 67785 }, { "epoch": 0.8267073170731707, "grad_norm": 2.93135666847229, "learning_rate": 1.4488617886178863e-05, "loss": 0.0773, "step": 67790 }, { "epoch": 0.8267682926829268, "grad_norm": 0.706182062625885, "learning_rate": 1.4488211382113821e-05, "loss": 0.0863, "step": 67795 }, { "epoch": 0.8268292682926829, "grad_norm": 0.5681421756744385, "learning_rate": 1.4487804878048781e-05, "loss": 0.0344, "step": 67800 }, { "epoch": 0.826890243902439, "grad_norm": 0.785733699798584, "learning_rate": 1.4487398373983741e-05, "loss": 0.039, "step": 67805 }, { "epoch": 0.8269512195121951, "grad_norm": 0.42310333251953125, "learning_rate": 1.44869918699187e-05, "loss": 0.0369, "step": 67810 }, { "epoch": 0.8270121951219512, "grad_norm": 0.662833571434021, "learning_rate": 1.448658536585366e-05, "loss": 0.0388, "step": 67815 }, { "epoch": 0.8270731707317073, "grad_norm": 5.8845624923706055, "learning_rate": 1.448617886178862e-05, "loss": 0.052, "step": 67820 }, { "epoch": 0.8271341463414634, "grad_norm": 0.4812846779823303, "learning_rate": 1.4485772357723577e-05, "loss": 0.0429, "step": 67825 }, { "epoch": 0.8271951219512195, "grad_norm": 0.5390410423278809, "learning_rate": 1.4485365853658537e-05, "loss": 0.0383, "step": 67830 }, { "epoch": 0.8272560975609756, "grad_norm": 0.9928528070449829, "learning_rate": 1.4484959349593497e-05, "loss": 0.0395, "step": 67835 }, { "epoch": 0.8273170731707317, "grad_norm": 0.42922574281692505, "learning_rate": 1.4484552845528456e-05, "loss": 0.0414, "step": 67840 }, { "epoch": 0.8273780487804878, "grad_norm": 0.23720549046993256, "learning_rate": 1.4484146341463416e-05, "loss": 0.0397, "step": 67845 }, { "epoch": 0.8274390243902439, "grad_norm": 1.2720057964324951, "learning_rate": 1.4483739837398376e-05, "loss": 0.0448, "step": 67850 }, { "epoch": 0.8275, "grad_norm": 0.5044589638710022, "learning_rate": 1.4483333333333334e-05, "loss": 0.0638, "step": 67855 }, { "epoch": 0.827560975609756, "grad_norm": 0.33048194646835327, "learning_rate": 1.4482926829268294e-05, "loss": 0.0334, "step": 67860 }, { "epoch": 0.8276219512195122, "grad_norm": 0.3152589499950409, "learning_rate": 1.4482520325203254e-05, "loss": 0.0691, "step": 67865 }, { "epoch": 0.8276829268292683, "grad_norm": 1.0814895629882812, "learning_rate": 1.4482113821138214e-05, "loss": 0.0833, "step": 67870 }, { "epoch": 0.8277439024390244, "grad_norm": 0.527152955532074, "learning_rate": 1.4481707317073172e-05, "loss": 0.03, "step": 67875 }, { "epoch": 0.8278048780487804, "grad_norm": 0.8764349818229675, "learning_rate": 1.4481300813008132e-05, "loss": 0.0452, "step": 67880 }, { "epoch": 0.8278658536585366, "grad_norm": 0.6060100197792053, "learning_rate": 1.448089430894309e-05, "loss": 0.0415, "step": 67885 }, { "epoch": 0.8279268292682926, "grad_norm": 0.31490853428840637, "learning_rate": 1.448048780487805e-05, "loss": 0.0679, "step": 67890 }, { "epoch": 0.8279878048780488, "grad_norm": 1.0555769205093384, "learning_rate": 1.448008130081301e-05, "loss": 0.0736, "step": 67895 }, { "epoch": 0.8280487804878048, "grad_norm": 0.43083494901657104, "learning_rate": 1.4479674796747969e-05, "loss": 0.0238, "step": 67900 }, { "epoch": 0.828109756097561, "grad_norm": 0.5011531114578247, "learning_rate": 1.4479268292682929e-05, "loss": 0.0412, "step": 67905 }, { "epoch": 0.828170731707317, "grad_norm": 1.1708625555038452, "learning_rate": 1.4478861788617889e-05, "loss": 0.0421, "step": 67910 }, { "epoch": 0.8282317073170732, "grad_norm": 0.7127252221107483, "learning_rate": 1.4478455284552845e-05, "loss": 0.0242, "step": 67915 }, { "epoch": 0.8282926829268292, "grad_norm": 0.7051812410354614, "learning_rate": 1.4478048780487805e-05, "loss": 0.0684, "step": 67920 }, { "epoch": 0.8283536585365854, "grad_norm": 0.5893136262893677, "learning_rate": 1.4477642276422765e-05, "loss": 0.0366, "step": 67925 }, { "epoch": 0.8284146341463414, "grad_norm": 0.184235081076622, "learning_rate": 1.4477235772357725e-05, "loss": 0.0271, "step": 67930 }, { "epoch": 0.8284756097560976, "grad_norm": 0.21939389407634735, "learning_rate": 1.4476829268292685e-05, "loss": 0.0473, "step": 67935 }, { "epoch": 0.8285365853658536, "grad_norm": 1.2703993320465088, "learning_rate": 1.4476422764227644e-05, "loss": 0.0598, "step": 67940 }, { "epoch": 0.8285975609756098, "grad_norm": 0.7214164137840271, "learning_rate": 1.4476016260162602e-05, "loss": 0.0609, "step": 67945 }, { "epoch": 0.8286585365853658, "grad_norm": 0.8094984292984009, "learning_rate": 1.4475609756097562e-05, "loss": 0.0455, "step": 67950 }, { "epoch": 0.828719512195122, "grad_norm": 2.052952289581299, "learning_rate": 1.4475203252032522e-05, "loss": 0.0676, "step": 67955 }, { "epoch": 0.828780487804878, "grad_norm": 0.43602439761161804, "learning_rate": 1.4474796747967482e-05, "loss": 0.034, "step": 67960 }, { "epoch": 0.8288414634146342, "grad_norm": 0.3318029046058655, "learning_rate": 1.447439024390244e-05, "loss": 0.0463, "step": 67965 }, { "epoch": 0.8289024390243902, "grad_norm": 0.5593394637107849, "learning_rate": 1.44739837398374e-05, "loss": 0.0599, "step": 67970 }, { "epoch": 0.8289634146341464, "grad_norm": 0.5532708764076233, "learning_rate": 1.4473577235772358e-05, "loss": 0.0472, "step": 67975 }, { "epoch": 0.8290243902439024, "grad_norm": 0.5985625386238098, "learning_rate": 1.4473170731707318e-05, "loss": 0.0462, "step": 67980 }, { "epoch": 0.8290853658536586, "grad_norm": 0.3397943079471588, "learning_rate": 1.4472764227642278e-05, "loss": 0.0331, "step": 67985 }, { "epoch": 0.8291463414634146, "grad_norm": 0.5356846451759338, "learning_rate": 1.4472357723577237e-05, "loss": 0.0594, "step": 67990 }, { "epoch": 0.8292073170731707, "grad_norm": 0.47131916880607605, "learning_rate": 1.4471951219512197e-05, "loss": 0.0477, "step": 67995 }, { "epoch": 0.8292682926829268, "grad_norm": 0.3243774473667145, "learning_rate": 1.4471544715447157e-05, "loss": 0.0277, "step": 68000 }, { "epoch": 0.8293292682926829, "grad_norm": 0.40223410725593567, "learning_rate": 1.4471138211382114e-05, "loss": 0.072, "step": 68005 }, { "epoch": 0.829390243902439, "grad_norm": 0.5194897055625916, "learning_rate": 1.4470731707317073e-05, "loss": 0.0337, "step": 68010 }, { "epoch": 0.8294512195121951, "grad_norm": 0.28569790720939636, "learning_rate": 1.4470325203252033e-05, "loss": 0.036, "step": 68015 }, { "epoch": 0.8295121951219512, "grad_norm": 0.6006851196289062, "learning_rate": 1.4469918699186993e-05, "loss": 0.0306, "step": 68020 }, { "epoch": 0.8295731707317073, "grad_norm": 0.5358741283416748, "learning_rate": 1.4469512195121953e-05, "loss": 0.0341, "step": 68025 }, { "epoch": 0.8296341463414634, "grad_norm": 0.24363607168197632, "learning_rate": 1.4469105691056913e-05, "loss": 0.0673, "step": 68030 }, { "epoch": 0.8296951219512195, "grad_norm": 0.4292347729206085, "learning_rate": 1.446869918699187e-05, "loss": 0.0334, "step": 68035 }, { "epoch": 0.8297560975609756, "grad_norm": 0.2856557071208954, "learning_rate": 1.446829268292683e-05, "loss": 0.0301, "step": 68040 }, { "epoch": 0.8298170731707317, "grad_norm": 1.2326997518539429, "learning_rate": 1.446788617886179e-05, "loss": 0.0364, "step": 68045 }, { "epoch": 0.8298780487804878, "grad_norm": 0.20092444121837616, "learning_rate": 1.446747967479675e-05, "loss": 0.033, "step": 68050 }, { "epoch": 0.8299390243902439, "grad_norm": 0.3076639175415039, "learning_rate": 1.4467073170731708e-05, "loss": 0.0335, "step": 68055 }, { "epoch": 0.83, "grad_norm": 0.47521212697029114, "learning_rate": 1.4466666666666668e-05, "loss": 0.0274, "step": 68060 }, { "epoch": 0.8300609756097561, "grad_norm": 0.736122727394104, "learning_rate": 1.4466260162601626e-05, "loss": 0.0447, "step": 68065 }, { "epoch": 0.8301219512195122, "grad_norm": 0.434671014547348, "learning_rate": 1.4465853658536586e-05, "loss": 0.029, "step": 68070 }, { "epoch": 0.8301829268292683, "grad_norm": 0.6232606768608093, "learning_rate": 1.4465447154471546e-05, "loss": 0.0448, "step": 68075 }, { "epoch": 0.8302439024390244, "grad_norm": 0.32635462284088135, "learning_rate": 1.4465040650406506e-05, "loss": 0.0508, "step": 68080 }, { "epoch": 0.8303048780487805, "grad_norm": 0.5751934051513672, "learning_rate": 1.4464634146341466e-05, "loss": 0.0515, "step": 68085 }, { "epoch": 0.8303658536585365, "grad_norm": 0.6767314672470093, "learning_rate": 1.4464227642276425e-05, "loss": 0.1261, "step": 68090 }, { "epoch": 0.8304268292682927, "grad_norm": 0.2600909173488617, "learning_rate": 1.4463821138211382e-05, "loss": 0.0367, "step": 68095 }, { "epoch": 0.8304878048780487, "grad_norm": 0.4955098330974579, "learning_rate": 1.4463414634146342e-05, "loss": 0.0481, "step": 68100 }, { "epoch": 0.8305487804878049, "grad_norm": 0.62772536277771, "learning_rate": 1.4463008130081302e-05, "loss": 0.0418, "step": 68105 }, { "epoch": 0.8306097560975609, "grad_norm": 0.6246349811553955, "learning_rate": 1.4462601626016261e-05, "loss": 0.068, "step": 68110 }, { "epoch": 0.8306707317073171, "grad_norm": 0.21611599624156952, "learning_rate": 1.4462195121951221e-05, "loss": 0.0576, "step": 68115 }, { "epoch": 0.8307317073170731, "grad_norm": 0.3785347044467926, "learning_rate": 1.4461788617886181e-05, "loss": 0.0278, "step": 68120 }, { "epoch": 0.8307926829268293, "grad_norm": 0.24477456510066986, "learning_rate": 1.4461382113821139e-05, "loss": 0.0206, "step": 68125 }, { "epoch": 0.8308536585365853, "grad_norm": 0.5583072900772095, "learning_rate": 1.4460975609756099e-05, "loss": 0.0349, "step": 68130 }, { "epoch": 0.8309146341463415, "grad_norm": 0.680679976940155, "learning_rate": 1.4460569105691059e-05, "loss": 0.0255, "step": 68135 }, { "epoch": 0.8309756097560975, "grad_norm": 0.5002020597457886, "learning_rate": 1.4460162601626017e-05, "loss": 0.0577, "step": 68140 }, { "epoch": 0.8310365853658537, "grad_norm": 0.5796048045158386, "learning_rate": 1.4459756097560977e-05, "loss": 0.0296, "step": 68145 }, { "epoch": 0.8310975609756097, "grad_norm": 0.32736340165138245, "learning_rate": 1.4459349593495937e-05, "loss": 0.0558, "step": 68150 }, { "epoch": 0.8311585365853659, "grad_norm": 0.3128516972064972, "learning_rate": 1.4458943089430895e-05, "loss": 0.036, "step": 68155 }, { "epoch": 0.8312195121951219, "grad_norm": 0.8512588739395142, "learning_rate": 1.4458536585365854e-05, "loss": 0.0309, "step": 68160 }, { "epoch": 0.8312804878048781, "grad_norm": 1.824290156364441, "learning_rate": 1.4458130081300814e-05, "loss": 0.0552, "step": 68165 }, { "epoch": 0.8313414634146341, "grad_norm": 0.6899076104164124, "learning_rate": 1.4457723577235774e-05, "loss": 0.051, "step": 68170 }, { "epoch": 0.8314024390243903, "grad_norm": 0.754668116569519, "learning_rate": 1.4457317073170734e-05, "loss": 0.043, "step": 68175 }, { "epoch": 0.8314634146341463, "grad_norm": 0.5393843054771423, "learning_rate": 1.4456910569105694e-05, "loss": 0.0444, "step": 68180 }, { "epoch": 0.8315243902439025, "grad_norm": 0.368900865316391, "learning_rate": 1.445650406504065e-05, "loss": 0.0435, "step": 68185 }, { "epoch": 0.8315853658536585, "grad_norm": 0.30259940028190613, "learning_rate": 1.445609756097561e-05, "loss": 0.0465, "step": 68190 }, { "epoch": 0.8316463414634147, "grad_norm": 0.4899739623069763, "learning_rate": 1.445569105691057e-05, "loss": 0.0385, "step": 68195 }, { "epoch": 0.8317073170731707, "grad_norm": 0.4448663294315338, "learning_rate": 1.445528455284553e-05, "loss": 0.0469, "step": 68200 }, { "epoch": 0.8317682926829268, "grad_norm": 0.4880223274230957, "learning_rate": 1.445487804878049e-05, "loss": 0.0314, "step": 68205 }, { "epoch": 0.8318292682926829, "grad_norm": 0.43392932415008545, "learning_rate": 1.445447154471545e-05, "loss": 0.0434, "step": 68210 }, { "epoch": 0.831890243902439, "grad_norm": 0.3537827730178833, "learning_rate": 1.4454065040650407e-05, "loss": 0.0732, "step": 68215 }, { "epoch": 0.8319512195121951, "grad_norm": 3.3306803703308105, "learning_rate": 1.4453658536585367e-05, "loss": 0.0303, "step": 68220 }, { "epoch": 0.8320121951219512, "grad_norm": 0.5677869915962219, "learning_rate": 1.4453252032520327e-05, "loss": 0.0564, "step": 68225 }, { "epoch": 0.8320731707317073, "grad_norm": 0.5112507343292236, "learning_rate": 1.4452845528455285e-05, "loss": 0.0417, "step": 68230 }, { "epoch": 0.8321341463414634, "grad_norm": 0.7099888324737549, "learning_rate": 1.4452439024390245e-05, "loss": 0.0454, "step": 68235 }, { "epoch": 0.8321951219512195, "grad_norm": 0.37233707308769226, "learning_rate": 1.4452032520325205e-05, "loss": 0.0206, "step": 68240 }, { "epoch": 0.8322560975609756, "grad_norm": 1.774687647819519, "learning_rate": 1.4451626016260163e-05, "loss": 0.0406, "step": 68245 }, { "epoch": 0.8323170731707317, "grad_norm": 0.6783661842346191, "learning_rate": 1.4451219512195123e-05, "loss": 0.0219, "step": 68250 }, { "epoch": 0.8323780487804878, "grad_norm": 0.5248519778251648, "learning_rate": 1.4450813008130083e-05, "loss": 0.05, "step": 68255 }, { "epoch": 0.8324390243902439, "grad_norm": 0.3121396005153656, "learning_rate": 1.4450406504065042e-05, "loss": 0.054, "step": 68260 }, { "epoch": 0.8325, "grad_norm": 0.5778226852416992, "learning_rate": 1.4450000000000002e-05, "loss": 0.077, "step": 68265 }, { "epoch": 0.8325609756097561, "grad_norm": 1.4297173023223877, "learning_rate": 1.4449593495934962e-05, "loss": 0.0524, "step": 68270 }, { "epoch": 0.8326219512195122, "grad_norm": 0.401877760887146, "learning_rate": 1.4449186991869919e-05, "loss": 0.0442, "step": 68275 }, { "epoch": 0.8326829268292683, "grad_norm": 0.8746810555458069, "learning_rate": 1.4448780487804878e-05, "loss": 0.0421, "step": 68280 }, { "epoch": 0.8327439024390244, "grad_norm": 0.6620638966560364, "learning_rate": 1.4448373983739838e-05, "loss": 0.0822, "step": 68285 }, { "epoch": 0.8328048780487805, "grad_norm": 0.4100746512413025, "learning_rate": 1.4447967479674798e-05, "loss": 0.0359, "step": 68290 }, { "epoch": 0.8328658536585366, "grad_norm": 1.6342464685440063, "learning_rate": 1.4447560975609758e-05, "loss": 0.0976, "step": 68295 }, { "epoch": 0.8329268292682926, "grad_norm": 0.48937028646469116, "learning_rate": 1.4447154471544718e-05, "loss": 0.0678, "step": 68300 }, { "epoch": 0.8329878048780488, "grad_norm": 0.512010395526886, "learning_rate": 1.4446747967479676e-05, "loss": 0.0514, "step": 68305 }, { "epoch": 0.8330487804878048, "grad_norm": 0.9720756411552429, "learning_rate": 1.4446341463414636e-05, "loss": 0.0429, "step": 68310 }, { "epoch": 0.833109756097561, "grad_norm": 0.4315011203289032, "learning_rate": 1.4445934959349595e-05, "loss": 0.0388, "step": 68315 }, { "epoch": 0.833170731707317, "grad_norm": 0.5866732001304626, "learning_rate": 1.4445528455284554e-05, "loss": 0.0583, "step": 68320 }, { "epoch": 0.8332317073170732, "grad_norm": 0.18384571373462677, "learning_rate": 1.4445121951219513e-05, "loss": 0.0377, "step": 68325 }, { "epoch": 0.8332926829268292, "grad_norm": 0.39925363659858704, "learning_rate": 1.4444715447154473e-05, "loss": 0.0316, "step": 68330 }, { "epoch": 0.8333536585365854, "grad_norm": 0.7803840637207031, "learning_rate": 1.4444308943089431e-05, "loss": 0.0642, "step": 68335 }, { "epoch": 0.8334146341463414, "grad_norm": 1.5234510898590088, "learning_rate": 1.4443902439024391e-05, "loss": 0.0338, "step": 68340 }, { "epoch": 0.8334756097560976, "grad_norm": 0.5442762970924377, "learning_rate": 1.4443495934959351e-05, "loss": 0.0547, "step": 68345 }, { "epoch": 0.8335365853658536, "grad_norm": 0.3393392562866211, "learning_rate": 1.444308943089431e-05, "loss": 0.0496, "step": 68350 }, { "epoch": 0.8335975609756098, "grad_norm": 0.5185678601264954, "learning_rate": 1.444268292682927e-05, "loss": 0.0377, "step": 68355 }, { "epoch": 0.8336585365853658, "grad_norm": 0.35516810417175293, "learning_rate": 1.444227642276423e-05, "loss": 0.0331, "step": 68360 }, { "epoch": 0.833719512195122, "grad_norm": 0.36902186274528503, "learning_rate": 1.4441869918699187e-05, "loss": 0.0609, "step": 68365 }, { "epoch": 0.833780487804878, "grad_norm": 0.42310991883277893, "learning_rate": 1.4441463414634147e-05, "loss": 0.0433, "step": 68370 }, { "epoch": 0.8338414634146342, "grad_norm": 0.5812153816223145, "learning_rate": 1.4441056910569106e-05, "loss": 0.0446, "step": 68375 }, { "epoch": 0.8339024390243902, "grad_norm": 0.5401787757873535, "learning_rate": 1.4440650406504066e-05, "loss": 0.0452, "step": 68380 }, { "epoch": 0.8339634146341464, "grad_norm": 1.1431350708007812, "learning_rate": 1.4440243902439026e-05, "loss": 0.0496, "step": 68385 }, { "epoch": 0.8340243902439024, "grad_norm": 0.6985152363777161, "learning_rate": 1.4439837398373986e-05, "loss": 0.0784, "step": 68390 }, { "epoch": 0.8340853658536586, "grad_norm": 0.8099421858787537, "learning_rate": 1.4439430894308944e-05, "loss": 0.0458, "step": 68395 }, { "epoch": 0.8341463414634146, "grad_norm": 0.48207536339759827, "learning_rate": 1.4439024390243904e-05, "loss": 0.0295, "step": 68400 }, { "epoch": 0.8342073170731708, "grad_norm": 2.2953526973724365, "learning_rate": 1.4438617886178862e-05, "loss": 0.0412, "step": 68405 }, { "epoch": 0.8342682926829268, "grad_norm": 0.562386155128479, "learning_rate": 1.4438211382113822e-05, "loss": 0.064, "step": 68410 }, { "epoch": 0.834329268292683, "grad_norm": 0.7868521213531494, "learning_rate": 1.4437804878048782e-05, "loss": 0.0838, "step": 68415 }, { "epoch": 0.834390243902439, "grad_norm": 0.6015167236328125, "learning_rate": 1.4437398373983741e-05, "loss": 0.0444, "step": 68420 }, { "epoch": 0.8344512195121951, "grad_norm": 0.5457426905632019, "learning_rate": 1.44369918699187e-05, "loss": 0.0303, "step": 68425 }, { "epoch": 0.8345121951219512, "grad_norm": 0.6646619439125061, "learning_rate": 1.443658536585366e-05, "loss": 0.0306, "step": 68430 }, { "epoch": 0.8345731707317073, "grad_norm": 0.3635112941265106, "learning_rate": 1.443617886178862e-05, "loss": 0.0388, "step": 68435 }, { "epoch": 0.8346341463414634, "grad_norm": 0.3783465623855591, "learning_rate": 1.4435772357723579e-05, "loss": 0.0312, "step": 68440 }, { "epoch": 0.8346951219512195, "grad_norm": 0.5388421416282654, "learning_rate": 1.4435365853658539e-05, "loss": 0.0343, "step": 68445 }, { "epoch": 0.8347560975609756, "grad_norm": 0.4700915217399597, "learning_rate": 1.4434959349593499e-05, "loss": 0.0837, "step": 68450 }, { "epoch": 0.8348170731707317, "grad_norm": 0.7068333029747009, "learning_rate": 1.4434552845528455e-05, "loss": 0.0466, "step": 68455 }, { "epoch": 0.8348780487804878, "grad_norm": 1.4455662965774536, "learning_rate": 1.4434146341463415e-05, "loss": 0.0647, "step": 68460 }, { "epoch": 0.8349390243902439, "grad_norm": 0.8345768451690674, "learning_rate": 1.4433739837398375e-05, "loss": 0.0482, "step": 68465 }, { "epoch": 0.835, "grad_norm": 1.7027043104171753, "learning_rate": 1.4433333333333335e-05, "loss": 0.0497, "step": 68470 }, { "epoch": 0.8350609756097561, "grad_norm": 0.6606848835945129, "learning_rate": 1.4432926829268294e-05, "loss": 0.0471, "step": 68475 }, { "epoch": 0.8351219512195122, "grad_norm": 0.6135147213935852, "learning_rate": 1.4432520325203254e-05, "loss": 0.0392, "step": 68480 }, { "epoch": 0.8351829268292683, "grad_norm": 0.9483045339584351, "learning_rate": 1.4432113821138212e-05, "loss": 0.0432, "step": 68485 }, { "epoch": 0.8352439024390244, "grad_norm": 0.39348772168159485, "learning_rate": 1.4431707317073172e-05, "loss": 0.0361, "step": 68490 }, { "epoch": 0.8353048780487805, "grad_norm": 0.42137521505355835, "learning_rate": 1.443130081300813e-05, "loss": 0.0477, "step": 68495 }, { "epoch": 0.8353658536585366, "grad_norm": 0.39804747700691223, "learning_rate": 1.443089430894309e-05, "loss": 0.0378, "step": 68500 }, { "epoch": 0.8354268292682927, "grad_norm": 0.5096694231033325, "learning_rate": 1.443048780487805e-05, "loss": 0.0645, "step": 68505 }, { "epoch": 0.8354878048780487, "grad_norm": 0.4857456684112549, "learning_rate": 1.443008130081301e-05, "loss": 0.05, "step": 68510 }, { "epoch": 0.8355487804878049, "grad_norm": 0.6194814443588257, "learning_rate": 1.4429674796747968e-05, "loss": 0.0414, "step": 68515 }, { "epoch": 0.8356097560975609, "grad_norm": 0.2830771207809448, "learning_rate": 1.4429268292682928e-05, "loss": 0.0467, "step": 68520 }, { "epoch": 0.8356707317073171, "grad_norm": 0.5419443249702454, "learning_rate": 1.4428861788617888e-05, "loss": 0.0279, "step": 68525 }, { "epoch": 0.8357317073170731, "grad_norm": 0.4266595244407654, "learning_rate": 1.4428455284552847e-05, "loss": 0.0315, "step": 68530 }, { "epoch": 0.8357926829268293, "grad_norm": 1.148694396018982, "learning_rate": 1.4428048780487807e-05, "loss": 0.0475, "step": 68535 }, { "epoch": 0.8358536585365853, "grad_norm": 0.5348846912384033, "learning_rate": 1.4427642276422767e-05, "loss": 0.0462, "step": 68540 }, { "epoch": 0.8359146341463415, "grad_norm": 0.5638325214385986, "learning_rate": 1.4427235772357723e-05, "loss": 0.0381, "step": 68545 }, { "epoch": 0.8359756097560975, "grad_norm": 0.5149259567260742, "learning_rate": 1.4426829268292683e-05, "loss": 0.04, "step": 68550 }, { "epoch": 0.8360365853658537, "grad_norm": 0.9477027058601379, "learning_rate": 1.4426422764227643e-05, "loss": 0.0305, "step": 68555 }, { "epoch": 0.8360975609756097, "grad_norm": 0.6091075539588928, "learning_rate": 1.4426016260162603e-05, "loss": 0.0464, "step": 68560 }, { "epoch": 0.8361585365853659, "grad_norm": 0.5699810981750488, "learning_rate": 1.4425609756097563e-05, "loss": 0.0349, "step": 68565 }, { "epoch": 0.8362195121951219, "grad_norm": 0.6868264079093933, "learning_rate": 1.4425203252032523e-05, "loss": 0.0403, "step": 68570 }, { "epoch": 0.8362804878048781, "grad_norm": 0.6667306423187256, "learning_rate": 1.442479674796748e-05, "loss": 0.0357, "step": 68575 }, { "epoch": 0.8363414634146341, "grad_norm": 0.3132162094116211, "learning_rate": 1.442439024390244e-05, "loss": 0.0211, "step": 68580 }, { "epoch": 0.8364024390243903, "grad_norm": 1.3466706275939941, "learning_rate": 1.4423983739837399e-05, "loss": 0.0656, "step": 68585 }, { "epoch": 0.8364634146341463, "grad_norm": 0.2908531427383423, "learning_rate": 1.4423577235772358e-05, "loss": 0.0435, "step": 68590 }, { "epoch": 0.8365243902439025, "grad_norm": 1.8891462087631226, "learning_rate": 1.4423170731707318e-05, "loss": 0.0397, "step": 68595 }, { "epoch": 0.8365853658536585, "grad_norm": 0.31516706943511963, "learning_rate": 1.4422764227642278e-05, "loss": 0.0317, "step": 68600 }, { "epoch": 0.8366463414634147, "grad_norm": 0.6600328683853149, "learning_rate": 1.4422357723577236e-05, "loss": 0.0668, "step": 68605 }, { "epoch": 0.8367073170731707, "grad_norm": 0.37688419222831726, "learning_rate": 1.4421951219512196e-05, "loss": 0.0439, "step": 68610 }, { "epoch": 0.8367682926829269, "grad_norm": 0.805486261844635, "learning_rate": 1.4421544715447156e-05, "loss": 0.061, "step": 68615 }, { "epoch": 0.8368292682926829, "grad_norm": 0.48254093527793884, "learning_rate": 1.4421138211382116e-05, "loss": 0.0381, "step": 68620 }, { "epoch": 0.836890243902439, "grad_norm": 0.4245980978012085, "learning_rate": 1.4420731707317076e-05, "loss": 0.0627, "step": 68625 }, { "epoch": 0.8369512195121951, "grad_norm": 0.4318089783191681, "learning_rate": 1.4420325203252035e-05, "loss": 0.0274, "step": 68630 }, { "epoch": 0.8370121951219512, "grad_norm": 0.5818338394165039, "learning_rate": 1.4419918699186992e-05, "loss": 0.0752, "step": 68635 }, { "epoch": 0.8370731707317073, "grad_norm": 1.2431751489639282, "learning_rate": 1.4419512195121952e-05, "loss": 0.0495, "step": 68640 }, { "epoch": 0.8371341463414634, "grad_norm": 0.6598655581474304, "learning_rate": 1.4419105691056911e-05, "loss": 0.0423, "step": 68645 }, { "epoch": 0.8371951219512195, "grad_norm": 0.696359395980835, "learning_rate": 1.4418699186991871e-05, "loss": 0.0657, "step": 68650 }, { "epoch": 0.8372560975609756, "grad_norm": 0.22570012509822845, "learning_rate": 1.4418292682926831e-05, "loss": 0.0612, "step": 68655 }, { "epoch": 0.8373170731707317, "grad_norm": 0.59493088722229, "learning_rate": 1.4417886178861791e-05, "loss": 0.0528, "step": 68660 }, { "epoch": 0.8373780487804878, "grad_norm": 0.6894636154174805, "learning_rate": 1.4417479674796749e-05, "loss": 0.072, "step": 68665 }, { "epoch": 0.8374390243902439, "grad_norm": 0.5932570695877075, "learning_rate": 1.4417073170731707e-05, "loss": 0.0469, "step": 68670 }, { "epoch": 0.8375, "grad_norm": 0.5337029695510864, "learning_rate": 1.4416666666666667e-05, "loss": 0.0329, "step": 68675 }, { "epoch": 0.8375609756097561, "grad_norm": 1.8003376722335815, "learning_rate": 1.4416260162601627e-05, "loss": 0.1067, "step": 68680 }, { "epoch": 0.8376219512195122, "grad_norm": 4.512035369873047, "learning_rate": 1.4415853658536587e-05, "loss": 0.0382, "step": 68685 }, { "epoch": 0.8376829268292683, "grad_norm": 0.7068005800247192, "learning_rate": 1.4415447154471546e-05, "loss": 0.0612, "step": 68690 }, { "epoch": 0.8377439024390244, "grad_norm": 0.7029521465301514, "learning_rate": 1.4415040650406505e-05, "loss": 0.0473, "step": 68695 }, { "epoch": 0.8378048780487805, "grad_norm": 0.6292438507080078, "learning_rate": 1.4414634146341464e-05, "loss": 0.0321, "step": 68700 }, { "epoch": 0.8378658536585366, "grad_norm": 0.3339005708694458, "learning_rate": 1.4414227642276424e-05, "loss": 0.0284, "step": 68705 }, { "epoch": 0.8379268292682926, "grad_norm": 0.48440954089164734, "learning_rate": 1.4413821138211384e-05, "loss": 0.0487, "step": 68710 }, { "epoch": 0.8379878048780488, "grad_norm": 0.4175848662853241, "learning_rate": 1.4413414634146344e-05, "loss": 0.0527, "step": 68715 }, { "epoch": 0.8380487804878048, "grad_norm": 0.4471876919269562, "learning_rate": 1.4413008130081304e-05, "loss": 0.0496, "step": 68720 }, { "epoch": 0.838109756097561, "grad_norm": 0.9567237496376038, "learning_rate": 1.441260162601626e-05, "loss": 0.0417, "step": 68725 }, { "epoch": 0.838170731707317, "grad_norm": 0.35882800817489624, "learning_rate": 1.441219512195122e-05, "loss": 0.0515, "step": 68730 }, { "epoch": 0.8382317073170732, "grad_norm": 0.9133695363998413, "learning_rate": 1.441178861788618e-05, "loss": 0.0678, "step": 68735 }, { "epoch": 0.8382926829268292, "grad_norm": 0.9670981168746948, "learning_rate": 1.441138211382114e-05, "loss": 0.0739, "step": 68740 }, { "epoch": 0.8383536585365854, "grad_norm": 0.6630988717079163, "learning_rate": 1.44109756097561e-05, "loss": 0.0427, "step": 68745 }, { "epoch": 0.8384146341463414, "grad_norm": 1.5944703817367554, "learning_rate": 1.441056910569106e-05, "loss": 0.0378, "step": 68750 }, { "epoch": 0.8384756097560976, "grad_norm": 0.7136611938476562, "learning_rate": 1.4410162601626017e-05, "loss": 0.0453, "step": 68755 }, { "epoch": 0.8385365853658536, "grad_norm": 0.7002214193344116, "learning_rate": 1.4409756097560975e-05, "loss": 0.0245, "step": 68760 }, { "epoch": 0.8385975609756098, "grad_norm": 0.3437080681324005, "learning_rate": 1.4409349593495935e-05, "loss": 0.0355, "step": 68765 }, { "epoch": 0.8386585365853658, "grad_norm": 0.7882000803947449, "learning_rate": 1.4408943089430895e-05, "loss": 0.0449, "step": 68770 }, { "epoch": 0.838719512195122, "grad_norm": 1.6615628004074097, "learning_rate": 1.4408536585365855e-05, "loss": 0.0674, "step": 68775 }, { "epoch": 0.838780487804878, "grad_norm": 0.5646579265594482, "learning_rate": 1.4408130081300815e-05, "loss": 0.0481, "step": 68780 }, { "epoch": 0.8388414634146342, "grad_norm": 2.0705318450927734, "learning_rate": 1.4407723577235773e-05, "loss": 0.0481, "step": 68785 }, { "epoch": 0.8389024390243902, "grad_norm": 0.5449237823486328, "learning_rate": 1.4407317073170733e-05, "loss": 0.0411, "step": 68790 }, { "epoch": 0.8389634146341464, "grad_norm": 0.34065377712249756, "learning_rate": 1.4406910569105693e-05, "loss": 0.0436, "step": 68795 }, { "epoch": 0.8390243902439024, "grad_norm": 0.4180257022380829, "learning_rate": 1.4406504065040652e-05, "loss": 0.0373, "step": 68800 }, { "epoch": 0.8390853658536586, "grad_norm": 0.4446347951889038, "learning_rate": 1.4406097560975612e-05, "loss": 0.0736, "step": 68805 }, { "epoch": 0.8391463414634146, "grad_norm": 0.5791149139404297, "learning_rate": 1.4405691056910572e-05, "loss": 0.0379, "step": 68810 }, { "epoch": 0.8392073170731708, "grad_norm": 0.43414703011512756, "learning_rate": 1.4405284552845528e-05, "loss": 0.04, "step": 68815 }, { "epoch": 0.8392682926829268, "grad_norm": 0.673466145992279, "learning_rate": 1.4404878048780488e-05, "loss": 0.0375, "step": 68820 }, { "epoch": 0.839329268292683, "grad_norm": 0.42465928196907043, "learning_rate": 1.4404471544715448e-05, "loss": 0.0302, "step": 68825 }, { "epoch": 0.839390243902439, "grad_norm": 0.53815096616745, "learning_rate": 1.4404065040650408e-05, "loss": 0.0423, "step": 68830 }, { "epoch": 0.8394512195121951, "grad_norm": 0.6525099873542786, "learning_rate": 1.4403658536585368e-05, "loss": 0.0419, "step": 68835 }, { "epoch": 0.8395121951219512, "grad_norm": 0.46264490485191345, "learning_rate": 1.4403252032520328e-05, "loss": 0.0453, "step": 68840 }, { "epoch": 0.8395731707317073, "grad_norm": 0.7522948980331421, "learning_rate": 1.4402845528455284e-05, "loss": 0.043, "step": 68845 }, { "epoch": 0.8396341463414634, "grad_norm": 0.4902370870113373, "learning_rate": 1.4402439024390244e-05, "loss": 0.0575, "step": 68850 }, { "epoch": 0.8396951219512195, "grad_norm": 0.8475025296211243, "learning_rate": 1.4402032520325204e-05, "loss": 0.0727, "step": 68855 }, { "epoch": 0.8397560975609756, "grad_norm": 0.5902956128120422, "learning_rate": 1.4401626016260163e-05, "loss": 0.0371, "step": 68860 }, { "epoch": 0.8398170731707317, "grad_norm": 0.4963100552558899, "learning_rate": 1.4401219512195123e-05, "loss": 0.0551, "step": 68865 }, { "epoch": 0.8398780487804878, "grad_norm": 0.7450962066650391, "learning_rate": 1.4400813008130083e-05, "loss": 0.0793, "step": 68870 }, { "epoch": 0.8399390243902439, "grad_norm": 0.7207644581794739, "learning_rate": 1.4400406504065041e-05, "loss": 0.0551, "step": 68875 }, { "epoch": 0.84, "grad_norm": 0.4535031318664551, "learning_rate": 1.4400000000000001e-05, "loss": 0.0277, "step": 68880 }, { "epoch": 0.8400609756097561, "grad_norm": 0.27042269706726074, "learning_rate": 1.4399593495934961e-05, "loss": 0.0359, "step": 68885 }, { "epoch": 0.8401219512195122, "grad_norm": 0.6650047302246094, "learning_rate": 1.439918699186992e-05, "loss": 0.03, "step": 68890 }, { "epoch": 0.8401829268292683, "grad_norm": 0.3603511452674866, "learning_rate": 1.439878048780488e-05, "loss": 0.0305, "step": 68895 }, { "epoch": 0.8402439024390244, "grad_norm": 0.605145275592804, "learning_rate": 1.439837398373984e-05, "loss": 0.0449, "step": 68900 }, { "epoch": 0.8403048780487805, "grad_norm": 2.0366570949554443, "learning_rate": 1.4397967479674797e-05, "loss": 0.0556, "step": 68905 }, { "epoch": 0.8403658536585366, "grad_norm": 0.6230202913284302, "learning_rate": 1.4397560975609757e-05, "loss": 0.0416, "step": 68910 }, { "epoch": 0.8404268292682927, "grad_norm": 0.2624397277832031, "learning_rate": 1.4397154471544716e-05, "loss": 0.0254, "step": 68915 }, { "epoch": 0.8404878048780487, "grad_norm": 0.4886144697666168, "learning_rate": 1.4396747967479676e-05, "loss": 0.0681, "step": 68920 }, { "epoch": 0.8405487804878049, "grad_norm": 0.4783265292644501, "learning_rate": 1.4396341463414636e-05, "loss": 0.0671, "step": 68925 }, { "epoch": 0.840609756097561, "grad_norm": 0.4301152527332306, "learning_rate": 1.4395934959349596e-05, "loss": 0.0275, "step": 68930 }, { "epoch": 0.8406707317073171, "grad_norm": 0.31728190183639526, "learning_rate": 1.4395528455284552e-05, "loss": 0.0403, "step": 68935 }, { "epoch": 0.8407317073170731, "grad_norm": 1.3727213144302368, "learning_rate": 1.4395121951219512e-05, "loss": 0.0414, "step": 68940 }, { "epoch": 0.8407926829268293, "grad_norm": 0.7238288521766663, "learning_rate": 1.4394715447154472e-05, "loss": 0.0446, "step": 68945 }, { "epoch": 0.8408536585365853, "grad_norm": 0.45840951800346375, "learning_rate": 1.4394308943089432e-05, "loss": 0.0381, "step": 68950 }, { "epoch": 0.8409146341463415, "grad_norm": 0.5773155689239502, "learning_rate": 1.4393902439024392e-05, "loss": 0.0509, "step": 68955 }, { "epoch": 0.8409756097560975, "grad_norm": 0.33235809206962585, "learning_rate": 1.4393495934959351e-05, "loss": 0.0777, "step": 68960 }, { "epoch": 0.8410365853658537, "grad_norm": 0.677914023399353, "learning_rate": 1.439308943089431e-05, "loss": 0.0347, "step": 68965 }, { "epoch": 0.8410975609756097, "grad_norm": 0.37627390027046204, "learning_rate": 1.439268292682927e-05, "loss": 0.0565, "step": 68970 }, { "epoch": 0.8411585365853659, "grad_norm": 0.5697197318077087, "learning_rate": 1.439227642276423e-05, "loss": 0.0396, "step": 68975 }, { "epoch": 0.8412195121951219, "grad_norm": 0.9586141705513, "learning_rate": 1.4391869918699189e-05, "loss": 0.0736, "step": 68980 }, { "epoch": 0.8412804878048781, "grad_norm": 0.44256067276000977, "learning_rate": 1.4391463414634149e-05, "loss": 0.0461, "step": 68985 }, { "epoch": 0.8413414634146341, "grad_norm": 1.9784176349639893, "learning_rate": 1.4391056910569109e-05, "loss": 0.0491, "step": 68990 }, { "epoch": 0.8414024390243903, "grad_norm": 0.6822493672370911, "learning_rate": 1.4390650406504065e-05, "loss": 0.0401, "step": 68995 }, { "epoch": 0.8414634146341463, "grad_norm": 0.4556739330291748, "learning_rate": 1.4390243902439025e-05, "loss": 0.0518, "step": 69000 }, { "epoch": 0.8415243902439025, "grad_norm": 0.7926552295684814, "learning_rate": 1.4389837398373985e-05, "loss": 0.0461, "step": 69005 }, { "epoch": 0.8415853658536585, "grad_norm": 0.40361279249191284, "learning_rate": 1.4389430894308945e-05, "loss": 0.0394, "step": 69010 }, { "epoch": 0.8416463414634147, "grad_norm": 0.4567972421646118, "learning_rate": 1.4389024390243904e-05, "loss": 0.0238, "step": 69015 }, { "epoch": 0.8417073170731707, "grad_norm": 0.5564677119255066, "learning_rate": 1.4388617886178864e-05, "loss": 0.0328, "step": 69020 }, { "epoch": 0.8417682926829269, "grad_norm": 0.8513609766960144, "learning_rate": 1.438821138211382e-05, "loss": 0.0672, "step": 69025 }, { "epoch": 0.8418292682926829, "grad_norm": 0.6386876702308655, "learning_rate": 1.438780487804878e-05, "loss": 0.0258, "step": 69030 }, { "epoch": 0.841890243902439, "grad_norm": 0.5746679306030273, "learning_rate": 1.438739837398374e-05, "loss": 0.0676, "step": 69035 }, { "epoch": 0.8419512195121951, "grad_norm": 0.8415563702583313, "learning_rate": 1.43869918699187e-05, "loss": 0.0441, "step": 69040 }, { "epoch": 0.8420121951219512, "grad_norm": 0.3755054771900177, "learning_rate": 1.438658536585366e-05, "loss": 0.0264, "step": 69045 }, { "epoch": 0.8420731707317073, "grad_norm": 0.8504658341407776, "learning_rate": 1.438617886178862e-05, "loss": 0.0361, "step": 69050 }, { "epoch": 0.8421341463414634, "grad_norm": 0.5373388528823853, "learning_rate": 1.4385772357723578e-05, "loss": 0.0467, "step": 69055 }, { "epoch": 0.8421951219512195, "grad_norm": 0.8084474802017212, "learning_rate": 1.4385365853658538e-05, "loss": 0.0503, "step": 69060 }, { "epoch": 0.8422560975609756, "grad_norm": 0.8251451253890991, "learning_rate": 1.4384959349593498e-05, "loss": 0.0501, "step": 69065 }, { "epoch": 0.8423170731707317, "grad_norm": 1.366377592086792, "learning_rate": 1.4384552845528457e-05, "loss": 0.0451, "step": 69070 }, { "epoch": 0.8423780487804878, "grad_norm": 0.7892324328422546, "learning_rate": 1.4384146341463417e-05, "loss": 0.0409, "step": 69075 }, { "epoch": 0.8424390243902439, "grad_norm": 0.6335516571998596, "learning_rate": 1.4383739837398375e-05, "loss": 0.0551, "step": 69080 }, { "epoch": 0.8425, "grad_norm": 0.411885142326355, "learning_rate": 1.4383333333333333e-05, "loss": 0.0532, "step": 69085 }, { "epoch": 0.8425609756097561, "grad_norm": 0.46901166439056396, "learning_rate": 1.4382926829268293e-05, "loss": 0.0502, "step": 69090 }, { "epoch": 0.8426219512195122, "grad_norm": 0.2937055230140686, "learning_rate": 1.4382520325203253e-05, "loss": 0.0376, "step": 69095 }, { "epoch": 0.8426829268292683, "grad_norm": 0.36449870467185974, "learning_rate": 1.4382113821138213e-05, "loss": 0.041, "step": 69100 }, { "epoch": 0.8427439024390244, "grad_norm": 0.5908575654029846, "learning_rate": 1.4381707317073173e-05, "loss": 0.0371, "step": 69105 }, { "epoch": 0.8428048780487805, "grad_norm": 0.4307720363140106, "learning_rate": 1.4381300813008133e-05, "loss": 0.0764, "step": 69110 }, { "epoch": 0.8428658536585366, "grad_norm": 0.3888591229915619, "learning_rate": 1.4380894308943089e-05, "loss": 0.0421, "step": 69115 }, { "epoch": 0.8429268292682927, "grad_norm": 0.5321288704872131, "learning_rate": 1.4380487804878049e-05, "loss": 0.0387, "step": 69120 }, { "epoch": 0.8429878048780488, "grad_norm": 0.4726233184337616, "learning_rate": 1.4380081300813009e-05, "loss": 0.0368, "step": 69125 }, { "epoch": 0.8430487804878048, "grad_norm": 0.5049808621406555, "learning_rate": 1.4379674796747968e-05, "loss": 0.0424, "step": 69130 }, { "epoch": 0.843109756097561, "grad_norm": 0.6313337683677673, "learning_rate": 1.4379268292682928e-05, "loss": 0.0471, "step": 69135 }, { "epoch": 0.843170731707317, "grad_norm": 0.2735004425048828, "learning_rate": 1.4378861788617888e-05, "loss": 0.026, "step": 69140 }, { "epoch": 0.8432317073170732, "grad_norm": 0.3945689797401428, "learning_rate": 1.4378455284552846e-05, "loss": 0.0474, "step": 69145 }, { "epoch": 0.8432926829268292, "grad_norm": 0.5455473065376282, "learning_rate": 1.4378048780487806e-05, "loss": 0.0407, "step": 69150 }, { "epoch": 0.8433536585365854, "grad_norm": 0.5740822553634644, "learning_rate": 1.4377642276422766e-05, "loss": 0.0368, "step": 69155 }, { "epoch": 0.8434146341463414, "grad_norm": 0.8150089979171753, "learning_rate": 1.4377235772357726e-05, "loss": 0.0457, "step": 69160 }, { "epoch": 0.8434756097560976, "grad_norm": 0.8389765024185181, "learning_rate": 1.4376829268292685e-05, "loss": 0.0538, "step": 69165 }, { "epoch": 0.8435365853658536, "grad_norm": 0.4674333333969116, "learning_rate": 1.4376422764227644e-05, "loss": 0.0387, "step": 69170 }, { "epoch": 0.8435975609756098, "grad_norm": 0.33287855982780457, "learning_rate": 1.4376016260162602e-05, "loss": 0.0216, "step": 69175 }, { "epoch": 0.8436585365853658, "grad_norm": 0.5510727167129517, "learning_rate": 1.4375609756097562e-05, "loss": 0.0405, "step": 69180 }, { "epoch": 0.843719512195122, "grad_norm": 0.6437557935714722, "learning_rate": 1.4375203252032521e-05, "loss": 0.033, "step": 69185 }, { "epoch": 0.843780487804878, "grad_norm": 0.7331780791282654, "learning_rate": 1.4374796747967481e-05, "loss": 0.0286, "step": 69190 }, { "epoch": 0.8438414634146342, "grad_norm": 0.3838605284690857, "learning_rate": 1.4374390243902441e-05, "loss": 0.0348, "step": 69195 }, { "epoch": 0.8439024390243902, "grad_norm": 0.7682926654815674, "learning_rate": 1.43739837398374e-05, "loss": 0.0434, "step": 69200 }, { "epoch": 0.8439634146341464, "grad_norm": 0.5059729218482971, "learning_rate": 1.4373577235772357e-05, "loss": 0.0314, "step": 69205 }, { "epoch": 0.8440243902439024, "grad_norm": 0.3317699432373047, "learning_rate": 1.4373170731707317e-05, "loss": 0.0379, "step": 69210 }, { "epoch": 0.8440853658536586, "grad_norm": 0.4594379961490631, "learning_rate": 1.4372764227642277e-05, "loss": 0.0449, "step": 69215 }, { "epoch": 0.8441463414634146, "grad_norm": 1.422196388244629, "learning_rate": 1.4372357723577237e-05, "loss": 0.0559, "step": 69220 }, { "epoch": 0.8442073170731708, "grad_norm": 2.661673069000244, "learning_rate": 1.4371951219512197e-05, "loss": 0.072, "step": 69225 }, { "epoch": 0.8442682926829268, "grad_norm": 0.833634614944458, "learning_rate": 1.4371544715447156e-05, "loss": 0.0319, "step": 69230 }, { "epoch": 0.844329268292683, "grad_norm": 0.30097848176956177, "learning_rate": 1.4371138211382115e-05, "loss": 0.0373, "step": 69235 }, { "epoch": 0.844390243902439, "grad_norm": 0.53182452917099, "learning_rate": 1.4370731707317074e-05, "loss": 0.051, "step": 69240 }, { "epoch": 0.8444512195121952, "grad_norm": 0.8086622357368469, "learning_rate": 1.4370325203252034e-05, "loss": 0.0632, "step": 69245 }, { "epoch": 0.8445121951219512, "grad_norm": 0.4792203903198242, "learning_rate": 1.4369918699186994e-05, "loss": 0.035, "step": 69250 }, { "epoch": 0.8445731707317073, "grad_norm": 0.4225824177265167, "learning_rate": 1.4369512195121952e-05, "loss": 0.033, "step": 69255 }, { "epoch": 0.8446341463414634, "grad_norm": 0.39849522709846497, "learning_rate": 1.4369105691056912e-05, "loss": 0.0236, "step": 69260 }, { "epoch": 0.8446951219512195, "grad_norm": 0.648776650428772, "learning_rate": 1.436869918699187e-05, "loss": 0.0401, "step": 69265 }, { "epoch": 0.8447560975609756, "grad_norm": 0.32570880651474, "learning_rate": 1.436829268292683e-05, "loss": 0.0605, "step": 69270 }, { "epoch": 0.8448170731707317, "grad_norm": 0.9102585911750793, "learning_rate": 1.436788617886179e-05, "loss": 0.0437, "step": 69275 }, { "epoch": 0.8448780487804878, "grad_norm": 0.47518032789230347, "learning_rate": 1.436747967479675e-05, "loss": 0.0782, "step": 69280 }, { "epoch": 0.8449390243902439, "grad_norm": 0.633209764957428, "learning_rate": 1.436707317073171e-05, "loss": 0.045, "step": 69285 }, { "epoch": 0.845, "grad_norm": 0.4745110869407654, "learning_rate": 1.436666666666667e-05, "loss": 0.0366, "step": 69290 }, { "epoch": 0.8450609756097561, "grad_norm": 0.4942936897277832, "learning_rate": 1.4366260162601626e-05, "loss": 0.0379, "step": 69295 }, { "epoch": 0.8451219512195122, "grad_norm": 0.7253304123878479, "learning_rate": 1.4365853658536585e-05, "loss": 0.0308, "step": 69300 }, { "epoch": 0.8451829268292683, "grad_norm": 0.7757853865623474, "learning_rate": 1.4365447154471545e-05, "loss": 0.0254, "step": 69305 }, { "epoch": 0.8452439024390244, "grad_norm": 1.0897066593170166, "learning_rate": 1.4365040650406505e-05, "loss": 0.036, "step": 69310 }, { "epoch": 0.8453048780487805, "grad_norm": 0.5426456332206726, "learning_rate": 1.4364634146341465e-05, "loss": 0.0709, "step": 69315 }, { "epoch": 0.8453658536585366, "grad_norm": 0.5568783283233643, "learning_rate": 1.4364227642276425e-05, "loss": 0.0377, "step": 69320 }, { "epoch": 0.8454268292682927, "grad_norm": 0.5540744662284851, "learning_rate": 1.4363821138211383e-05, "loss": 0.0592, "step": 69325 }, { "epoch": 0.8454878048780488, "grad_norm": 0.5746530294418335, "learning_rate": 1.4363414634146343e-05, "loss": 0.0845, "step": 69330 }, { "epoch": 0.8455487804878049, "grad_norm": 0.6693692803382874, "learning_rate": 1.4363008130081302e-05, "loss": 0.0547, "step": 69335 }, { "epoch": 0.845609756097561, "grad_norm": 0.2714707553386688, "learning_rate": 1.4362601626016262e-05, "loss": 0.0655, "step": 69340 }, { "epoch": 0.8456707317073171, "grad_norm": 0.6603233814239502, "learning_rate": 1.436219512195122e-05, "loss": 0.0493, "step": 69345 }, { "epoch": 0.8457317073170731, "grad_norm": 0.6531426906585693, "learning_rate": 1.436178861788618e-05, "loss": 0.0304, "step": 69350 }, { "epoch": 0.8457926829268293, "grad_norm": 0.3230782449245453, "learning_rate": 1.4361382113821138e-05, "loss": 0.0402, "step": 69355 }, { "epoch": 0.8458536585365853, "grad_norm": 0.68714439868927, "learning_rate": 1.4360975609756098e-05, "loss": 0.0735, "step": 69360 }, { "epoch": 0.8459146341463415, "grad_norm": 0.8056955933570862, "learning_rate": 1.4360569105691058e-05, "loss": 0.0639, "step": 69365 }, { "epoch": 0.8459756097560975, "grad_norm": 0.5367429852485657, "learning_rate": 1.4360162601626018e-05, "loss": 0.036, "step": 69370 }, { "epoch": 0.8460365853658537, "grad_norm": 1.4984326362609863, "learning_rate": 1.4359756097560978e-05, "loss": 0.0793, "step": 69375 }, { "epoch": 0.8460975609756097, "grad_norm": 0.2909967303276062, "learning_rate": 1.4359349593495937e-05, "loss": 0.0959, "step": 69380 }, { "epoch": 0.8461585365853659, "grad_norm": 0.5285242795944214, "learning_rate": 1.4358943089430894e-05, "loss": 0.0773, "step": 69385 }, { "epoch": 0.8462195121951219, "grad_norm": 0.3594607412815094, "learning_rate": 1.4358536585365854e-05, "loss": 0.0533, "step": 69390 }, { "epoch": 0.8462804878048781, "grad_norm": 0.5794939398765564, "learning_rate": 1.4358130081300814e-05, "loss": 0.0318, "step": 69395 }, { "epoch": 0.8463414634146341, "grad_norm": 0.7262267470359802, "learning_rate": 1.4357723577235773e-05, "loss": 0.0356, "step": 69400 }, { "epoch": 0.8464024390243903, "grad_norm": 0.49432164430618286, "learning_rate": 1.4357317073170733e-05, "loss": 0.0323, "step": 69405 }, { "epoch": 0.8464634146341463, "grad_norm": 0.48252204060554504, "learning_rate": 1.4356910569105693e-05, "loss": 0.0441, "step": 69410 }, { "epoch": 0.8465243902439025, "grad_norm": 0.43027496337890625, "learning_rate": 1.4356504065040651e-05, "loss": 0.0419, "step": 69415 }, { "epoch": 0.8465853658536585, "grad_norm": 0.9548021554946899, "learning_rate": 1.4356097560975611e-05, "loss": 0.0326, "step": 69420 }, { "epoch": 0.8466463414634147, "grad_norm": 1.2191790342330933, "learning_rate": 1.435569105691057e-05, "loss": 0.0284, "step": 69425 }, { "epoch": 0.8467073170731707, "grad_norm": 1.7883491516113281, "learning_rate": 1.435528455284553e-05, "loss": 0.0667, "step": 69430 }, { "epoch": 0.8467682926829269, "grad_norm": 0.4812500476837158, "learning_rate": 1.4354878048780489e-05, "loss": 0.0516, "step": 69435 }, { "epoch": 0.8468292682926829, "grad_norm": 0.42611944675445557, "learning_rate": 1.4354471544715449e-05, "loss": 0.0374, "step": 69440 }, { "epoch": 0.846890243902439, "grad_norm": 0.6075436472892761, "learning_rate": 1.4354065040650407e-05, "loss": 0.0607, "step": 69445 }, { "epoch": 0.8469512195121951, "grad_norm": 0.631912887096405, "learning_rate": 1.4353658536585367e-05, "loss": 0.0341, "step": 69450 }, { "epoch": 0.8470121951219513, "grad_norm": 0.506743848323822, "learning_rate": 1.4353252032520326e-05, "loss": 0.0426, "step": 69455 }, { "epoch": 0.8470731707317073, "grad_norm": 0.8464556336402893, "learning_rate": 1.4352845528455286e-05, "loss": 0.0433, "step": 69460 }, { "epoch": 0.8471341463414634, "grad_norm": 0.9546468257904053, "learning_rate": 1.4352439024390246e-05, "loss": 0.0426, "step": 69465 }, { "epoch": 0.8471951219512195, "grad_norm": 0.5095991492271423, "learning_rate": 1.4352032520325206e-05, "loss": 0.0435, "step": 69470 }, { "epoch": 0.8472560975609756, "grad_norm": 0.6204387545585632, "learning_rate": 1.4351626016260162e-05, "loss": 0.0548, "step": 69475 }, { "epoch": 0.8473170731707317, "grad_norm": 0.4345191419124603, "learning_rate": 1.4351219512195122e-05, "loss": 0.0619, "step": 69480 }, { "epoch": 0.8473780487804878, "grad_norm": 0.7808007001876831, "learning_rate": 1.4350813008130082e-05, "loss": 0.0477, "step": 69485 }, { "epoch": 0.8474390243902439, "grad_norm": 1.3175442218780518, "learning_rate": 1.4350406504065042e-05, "loss": 0.0465, "step": 69490 }, { "epoch": 0.8475, "grad_norm": 0.1469305455684662, "learning_rate": 1.4350000000000002e-05, "loss": 0.0289, "step": 69495 }, { "epoch": 0.8475609756097561, "grad_norm": 0.6817193031311035, "learning_rate": 1.4349593495934961e-05, "loss": 0.0234, "step": 69500 }, { "epoch": 0.8476219512195122, "grad_norm": 0.37676748633384705, "learning_rate": 1.434918699186992e-05, "loss": 0.0407, "step": 69505 }, { "epoch": 0.8476829268292683, "grad_norm": 0.4807104766368866, "learning_rate": 1.434878048780488e-05, "loss": 0.0413, "step": 69510 }, { "epoch": 0.8477439024390244, "grad_norm": 0.18036742508411407, "learning_rate": 1.4348373983739839e-05, "loss": 0.0519, "step": 69515 }, { "epoch": 0.8478048780487805, "grad_norm": 0.5350101590156555, "learning_rate": 1.4347967479674797e-05, "loss": 0.0436, "step": 69520 }, { "epoch": 0.8478658536585366, "grad_norm": 0.9213530421257019, "learning_rate": 1.4347560975609757e-05, "loss": 0.0434, "step": 69525 }, { "epoch": 0.8479268292682927, "grad_norm": 0.3945353329181671, "learning_rate": 1.4347154471544717e-05, "loss": 0.0407, "step": 69530 }, { "epoch": 0.8479878048780488, "grad_norm": 1.0826129913330078, "learning_rate": 1.4346747967479675e-05, "loss": 0.038, "step": 69535 }, { "epoch": 0.8480487804878049, "grad_norm": 1.0560061931610107, "learning_rate": 1.4346341463414635e-05, "loss": 0.0374, "step": 69540 }, { "epoch": 0.848109756097561, "grad_norm": 0.7507020235061646, "learning_rate": 1.4345934959349595e-05, "loss": 0.0633, "step": 69545 }, { "epoch": 0.848170731707317, "grad_norm": 0.4399053156375885, "learning_rate": 1.4345528455284554e-05, "loss": 0.0249, "step": 69550 }, { "epoch": 0.8482317073170732, "grad_norm": 0.640076756477356, "learning_rate": 1.4345121951219514e-05, "loss": 0.0355, "step": 69555 }, { "epoch": 0.8482926829268292, "grad_norm": 0.5025220513343811, "learning_rate": 1.4344715447154474e-05, "loss": 0.0219, "step": 69560 }, { "epoch": 0.8483536585365854, "grad_norm": 0.606945276260376, "learning_rate": 1.434430894308943e-05, "loss": 0.0408, "step": 69565 }, { "epoch": 0.8484146341463414, "grad_norm": 0.5843665599822998, "learning_rate": 1.434390243902439e-05, "loss": 0.0356, "step": 69570 }, { "epoch": 0.8484756097560976, "grad_norm": 0.5662742257118225, "learning_rate": 1.434349593495935e-05, "loss": 0.043, "step": 69575 }, { "epoch": 0.8485365853658536, "grad_norm": 0.3507862985134125, "learning_rate": 1.434308943089431e-05, "loss": 0.0671, "step": 69580 }, { "epoch": 0.8485975609756098, "grad_norm": 0.7490506768226624, "learning_rate": 1.434268292682927e-05, "loss": 0.0544, "step": 69585 }, { "epoch": 0.8486585365853658, "grad_norm": 0.8022969961166382, "learning_rate": 1.434227642276423e-05, "loss": 0.061, "step": 69590 }, { "epoch": 0.848719512195122, "grad_norm": 1.0884114503860474, "learning_rate": 1.4341869918699188e-05, "loss": 0.0397, "step": 69595 }, { "epoch": 0.848780487804878, "grad_norm": 0.3895591199398041, "learning_rate": 1.4341463414634148e-05, "loss": 0.0306, "step": 69600 }, { "epoch": 0.8488414634146342, "grad_norm": 0.9655464887619019, "learning_rate": 1.4341056910569107e-05, "loss": 0.0485, "step": 69605 }, { "epoch": 0.8489024390243902, "grad_norm": 0.3947978913784027, "learning_rate": 1.4340650406504066e-05, "loss": 0.044, "step": 69610 }, { "epoch": 0.8489634146341464, "grad_norm": 0.5488362908363342, "learning_rate": 1.4340243902439025e-05, "loss": 0.0431, "step": 69615 }, { "epoch": 0.8490243902439024, "grad_norm": 0.4392903745174408, "learning_rate": 1.4339837398373985e-05, "loss": 0.0544, "step": 69620 }, { "epoch": 0.8490853658536586, "grad_norm": 0.6607663631439209, "learning_rate": 1.4339430894308943e-05, "loss": 0.0471, "step": 69625 }, { "epoch": 0.8491463414634146, "grad_norm": 0.450591117143631, "learning_rate": 1.4339024390243903e-05, "loss": 0.0431, "step": 69630 }, { "epoch": 0.8492073170731708, "grad_norm": 0.4531943202018738, "learning_rate": 1.4338617886178863e-05, "loss": 0.0418, "step": 69635 }, { "epoch": 0.8492682926829268, "grad_norm": 0.31224626302719116, "learning_rate": 1.4338211382113823e-05, "loss": 0.0417, "step": 69640 }, { "epoch": 0.849329268292683, "grad_norm": 0.8225442171096802, "learning_rate": 1.4337804878048783e-05, "loss": 0.0419, "step": 69645 }, { "epoch": 0.849390243902439, "grad_norm": 1.2253913879394531, "learning_rate": 1.4337398373983742e-05, "loss": 0.0642, "step": 69650 }, { "epoch": 0.8494512195121952, "grad_norm": 0.5624138116836548, "learning_rate": 1.4336991869918699e-05, "loss": 0.0332, "step": 69655 }, { "epoch": 0.8495121951219512, "grad_norm": 1.1844109296798706, "learning_rate": 1.4336585365853659e-05, "loss": 0.0574, "step": 69660 }, { "epoch": 0.8495731707317074, "grad_norm": 1.3803989887237549, "learning_rate": 1.4336178861788619e-05, "loss": 0.0491, "step": 69665 }, { "epoch": 0.8496341463414634, "grad_norm": 0.4944506585597992, "learning_rate": 1.4335772357723578e-05, "loss": 0.0505, "step": 69670 }, { "epoch": 0.8496951219512195, "grad_norm": 0.6862828731536865, "learning_rate": 1.4335365853658538e-05, "loss": 0.0515, "step": 69675 }, { "epoch": 0.8497560975609756, "grad_norm": 1.3830233812332153, "learning_rate": 1.4334959349593498e-05, "loss": 0.0464, "step": 69680 }, { "epoch": 0.8498170731707317, "grad_norm": 0.2836829423904419, "learning_rate": 1.4334552845528456e-05, "loss": 0.0589, "step": 69685 }, { "epoch": 0.8498780487804878, "grad_norm": 0.4348633587360382, "learning_rate": 1.4334146341463416e-05, "loss": 0.0412, "step": 69690 }, { "epoch": 0.8499390243902439, "grad_norm": 0.8824028968811035, "learning_rate": 1.4333739837398376e-05, "loss": 0.0446, "step": 69695 }, { "epoch": 0.85, "grad_norm": 0.680603563785553, "learning_rate": 1.4333333333333334e-05, "loss": 0.0391, "step": 69700 }, { "epoch": 0.8500609756097561, "grad_norm": 0.4722888171672821, "learning_rate": 1.4332926829268294e-05, "loss": 0.0301, "step": 69705 }, { "epoch": 0.8501219512195122, "grad_norm": 0.5852919816970825, "learning_rate": 1.4332520325203254e-05, "loss": 0.0373, "step": 69710 }, { "epoch": 0.8501829268292683, "grad_norm": 0.7831968665122986, "learning_rate": 1.4332113821138212e-05, "loss": 0.0562, "step": 69715 }, { "epoch": 0.8502439024390244, "grad_norm": 0.8742902874946594, "learning_rate": 1.4331707317073171e-05, "loss": 0.0527, "step": 69720 }, { "epoch": 0.8503048780487805, "grad_norm": 0.6223249435424805, "learning_rate": 1.4331300813008131e-05, "loss": 0.0715, "step": 69725 }, { "epoch": 0.8503658536585366, "grad_norm": 0.756350576877594, "learning_rate": 1.4330894308943091e-05, "loss": 0.0526, "step": 69730 }, { "epoch": 0.8504268292682927, "grad_norm": 1.2307524681091309, "learning_rate": 1.4330487804878051e-05, "loss": 0.0445, "step": 69735 }, { "epoch": 0.8504878048780488, "grad_norm": 0.5161592364311218, "learning_rate": 1.433008130081301e-05, "loss": 0.0431, "step": 69740 }, { "epoch": 0.8505487804878049, "grad_norm": 0.3829428255558014, "learning_rate": 1.4329674796747967e-05, "loss": 0.0529, "step": 69745 }, { "epoch": 0.850609756097561, "grad_norm": 0.3587190806865692, "learning_rate": 1.4329268292682927e-05, "loss": 0.0398, "step": 69750 }, { "epoch": 0.8506707317073171, "grad_norm": 0.7025007605552673, "learning_rate": 1.4328861788617887e-05, "loss": 0.052, "step": 69755 }, { "epoch": 0.8507317073170731, "grad_norm": 1.0516679286956787, "learning_rate": 1.4328455284552847e-05, "loss": 0.0476, "step": 69760 }, { "epoch": 0.8507926829268293, "grad_norm": 0.8247101902961731, "learning_rate": 1.4328048780487806e-05, "loss": 0.0352, "step": 69765 }, { "epoch": 0.8508536585365853, "grad_norm": 0.50807785987854, "learning_rate": 1.4327642276422766e-05, "loss": 0.0642, "step": 69770 }, { "epoch": 0.8509146341463415, "grad_norm": 1.341861367225647, "learning_rate": 1.4327235772357724e-05, "loss": 0.0657, "step": 69775 }, { "epoch": 0.8509756097560975, "grad_norm": 0.333861380815506, "learning_rate": 1.4326829268292684e-05, "loss": 0.0174, "step": 69780 }, { "epoch": 0.8510365853658537, "grad_norm": 0.5383665561676025, "learning_rate": 1.4326422764227642e-05, "loss": 0.1081, "step": 69785 }, { "epoch": 0.8510975609756097, "grad_norm": 0.40597081184387207, "learning_rate": 1.4326016260162602e-05, "loss": 0.0449, "step": 69790 }, { "epoch": 0.8511585365853659, "grad_norm": 0.28296443819999695, "learning_rate": 1.4325609756097562e-05, "loss": 0.0537, "step": 69795 }, { "epoch": 0.8512195121951219, "grad_norm": 0.45970818400382996, "learning_rate": 1.4325203252032522e-05, "loss": 0.0467, "step": 69800 }, { "epoch": 0.8512804878048781, "grad_norm": 0.20828844606876373, "learning_rate": 1.432479674796748e-05, "loss": 0.0282, "step": 69805 }, { "epoch": 0.8513414634146341, "grad_norm": 0.7091397047042847, "learning_rate": 1.432439024390244e-05, "loss": 0.0631, "step": 69810 }, { "epoch": 0.8514024390243903, "grad_norm": 0.3070434033870697, "learning_rate": 1.43239837398374e-05, "loss": 0.0577, "step": 69815 }, { "epoch": 0.8514634146341463, "grad_norm": 0.43532589077949524, "learning_rate": 1.432357723577236e-05, "loss": 0.0598, "step": 69820 }, { "epoch": 0.8515243902439025, "grad_norm": 0.5631083250045776, "learning_rate": 1.432317073170732e-05, "loss": 0.0519, "step": 69825 }, { "epoch": 0.8515853658536585, "grad_norm": 0.2969953119754791, "learning_rate": 1.4322764227642279e-05, "loss": 0.0414, "step": 69830 }, { "epoch": 0.8516463414634147, "grad_norm": 0.29536136984825134, "learning_rate": 1.4322357723577236e-05, "loss": 0.0548, "step": 69835 }, { "epoch": 0.8517073170731707, "grad_norm": 0.5620254874229431, "learning_rate": 1.4321951219512195e-05, "loss": 0.0398, "step": 69840 }, { "epoch": 0.8517682926829269, "grad_norm": 0.6492855548858643, "learning_rate": 1.4321544715447155e-05, "loss": 0.046, "step": 69845 }, { "epoch": 0.8518292682926829, "grad_norm": 0.6273573040962219, "learning_rate": 1.4321138211382115e-05, "loss": 0.0287, "step": 69850 }, { "epoch": 0.8518902439024391, "grad_norm": 0.38876691460609436, "learning_rate": 1.4320731707317075e-05, "loss": 0.0401, "step": 69855 }, { "epoch": 0.8519512195121951, "grad_norm": 0.44079074263572693, "learning_rate": 1.4320325203252035e-05, "loss": 0.0444, "step": 69860 }, { "epoch": 0.8520121951219513, "grad_norm": 0.2704660892486572, "learning_rate": 1.4319918699186993e-05, "loss": 0.0569, "step": 69865 }, { "epoch": 0.8520731707317073, "grad_norm": 0.7536828517913818, "learning_rate": 1.4319512195121953e-05, "loss": 0.0345, "step": 69870 }, { "epoch": 0.8521341463414634, "grad_norm": 0.8094749450683594, "learning_rate": 1.431910569105691e-05, "loss": 0.0331, "step": 69875 }, { "epoch": 0.8521951219512195, "grad_norm": 0.40137696266174316, "learning_rate": 1.431869918699187e-05, "loss": 0.0406, "step": 69880 }, { "epoch": 0.8522560975609756, "grad_norm": 0.619390606880188, "learning_rate": 1.431829268292683e-05, "loss": 0.0417, "step": 69885 }, { "epoch": 0.8523170731707317, "grad_norm": 0.6582727432250977, "learning_rate": 1.431788617886179e-05, "loss": 0.0334, "step": 69890 }, { "epoch": 0.8523780487804878, "grad_norm": 0.5856502652168274, "learning_rate": 1.4317479674796748e-05, "loss": 0.0615, "step": 69895 }, { "epoch": 0.8524390243902439, "grad_norm": 0.571165144443512, "learning_rate": 1.4317073170731708e-05, "loss": 0.0583, "step": 69900 }, { "epoch": 0.8525, "grad_norm": 0.6943836808204651, "learning_rate": 1.4316666666666668e-05, "loss": 0.0369, "step": 69905 }, { "epoch": 0.8525609756097561, "grad_norm": 0.8579022884368896, "learning_rate": 1.4316260162601628e-05, "loss": 0.0503, "step": 69910 }, { "epoch": 0.8526219512195122, "grad_norm": 0.4118516147136688, "learning_rate": 1.4315853658536588e-05, "loss": 0.0375, "step": 69915 }, { "epoch": 0.8526829268292683, "grad_norm": 0.364071249961853, "learning_rate": 1.4315447154471547e-05, "loss": 0.0163, "step": 69920 }, { "epoch": 0.8527439024390244, "grad_norm": 0.4606457054615021, "learning_rate": 1.4315040650406504e-05, "loss": 0.0248, "step": 69925 }, { "epoch": 0.8528048780487805, "grad_norm": 0.4366825520992279, "learning_rate": 1.4314634146341464e-05, "loss": 0.034, "step": 69930 }, { "epoch": 0.8528658536585366, "grad_norm": 0.3453900218009949, "learning_rate": 1.4314227642276423e-05, "loss": 0.0324, "step": 69935 }, { "epoch": 0.8529268292682927, "grad_norm": 0.5180660486221313, "learning_rate": 1.4313821138211383e-05, "loss": 0.0603, "step": 69940 }, { "epoch": 0.8529878048780488, "grad_norm": 0.9020816683769226, "learning_rate": 1.4313414634146343e-05, "loss": 0.056, "step": 69945 }, { "epoch": 0.8530487804878049, "grad_norm": 0.8188281059265137, "learning_rate": 1.4313008130081303e-05, "loss": 0.0317, "step": 69950 }, { "epoch": 0.853109756097561, "grad_norm": 2.29257869720459, "learning_rate": 1.4312601626016261e-05, "loss": 0.0471, "step": 69955 }, { "epoch": 0.853170731707317, "grad_norm": 0.5565334558486938, "learning_rate": 1.4312195121951221e-05, "loss": 0.0357, "step": 69960 }, { "epoch": 0.8532317073170732, "grad_norm": 0.8918346166610718, "learning_rate": 1.4311788617886179e-05, "loss": 0.0767, "step": 69965 }, { "epoch": 0.8532926829268292, "grad_norm": 0.42253053188323975, "learning_rate": 1.4311382113821139e-05, "loss": 0.0475, "step": 69970 }, { "epoch": 0.8533536585365854, "grad_norm": 0.6872632503509521, "learning_rate": 1.4310975609756099e-05, "loss": 0.031, "step": 69975 }, { "epoch": 0.8534146341463414, "grad_norm": 0.5189197659492493, "learning_rate": 1.4310569105691058e-05, "loss": 0.0591, "step": 69980 }, { "epoch": 0.8534756097560976, "grad_norm": 0.34216392040252686, "learning_rate": 1.4310162601626017e-05, "loss": 0.0424, "step": 69985 }, { "epoch": 0.8535365853658536, "grad_norm": 0.550422191619873, "learning_rate": 1.4309756097560976e-05, "loss": 0.042, "step": 69990 }, { "epoch": 0.8535975609756098, "grad_norm": 0.3038140535354614, "learning_rate": 1.4309349593495936e-05, "loss": 0.0506, "step": 69995 }, { "epoch": 0.8536585365853658, "grad_norm": 0.27637264132499695, "learning_rate": 1.4308943089430896e-05, "loss": 0.0315, "step": 70000 }, { "epoch": 0.853719512195122, "grad_norm": 1.281497597694397, "learning_rate": 1.4308536585365856e-05, "loss": 0.0605, "step": 70005 }, { "epoch": 0.853780487804878, "grad_norm": 0.5030134320259094, "learning_rate": 1.4308130081300816e-05, "loss": 0.0739, "step": 70010 }, { "epoch": 0.8538414634146342, "grad_norm": 0.6939111948013306, "learning_rate": 1.4307723577235772e-05, "loss": 0.0388, "step": 70015 }, { "epoch": 0.8539024390243902, "grad_norm": 0.3746405243873596, "learning_rate": 1.4307317073170732e-05, "loss": 0.0252, "step": 70020 }, { "epoch": 0.8539634146341464, "grad_norm": 1.100699782371521, "learning_rate": 1.4306910569105692e-05, "loss": 0.0743, "step": 70025 }, { "epoch": 0.8540243902439024, "grad_norm": 0.18123307824134827, "learning_rate": 1.4306504065040652e-05, "loss": 0.0429, "step": 70030 }, { "epoch": 0.8540853658536586, "grad_norm": 0.5074217915534973, "learning_rate": 1.4306097560975611e-05, "loss": 0.0299, "step": 70035 }, { "epoch": 0.8541463414634146, "grad_norm": 0.656085193157196, "learning_rate": 1.4305691056910571e-05, "loss": 0.0466, "step": 70040 }, { "epoch": 0.8542073170731708, "grad_norm": 0.41477030515670776, "learning_rate": 1.430528455284553e-05, "loss": 0.0354, "step": 70045 }, { "epoch": 0.8542682926829268, "grad_norm": 0.6422396302223206, "learning_rate": 1.4304878048780488e-05, "loss": 0.0641, "step": 70050 }, { "epoch": 0.854329268292683, "grad_norm": 0.3867385685443878, "learning_rate": 1.4304471544715447e-05, "loss": 0.0413, "step": 70055 }, { "epoch": 0.854390243902439, "grad_norm": 0.684565544128418, "learning_rate": 1.4304065040650407e-05, "loss": 0.0491, "step": 70060 }, { "epoch": 0.8544512195121952, "grad_norm": 0.71746826171875, "learning_rate": 1.4303658536585367e-05, "loss": 0.0575, "step": 70065 }, { "epoch": 0.8545121951219512, "grad_norm": 0.26460134983062744, "learning_rate": 1.4303252032520327e-05, "loss": 0.0441, "step": 70070 }, { "epoch": 0.8545731707317074, "grad_norm": 0.5037738084793091, "learning_rate": 1.4302845528455285e-05, "loss": 0.0422, "step": 70075 }, { "epoch": 0.8546341463414634, "grad_norm": 0.7940125465393066, "learning_rate": 1.4302439024390245e-05, "loss": 0.0621, "step": 70080 }, { "epoch": 0.8546951219512195, "grad_norm": 0.5837357640266418, "learning_rate": 1.4302032520325205e-05, "loss": 0.0539, "step": 70085 }, { "epoch": 0.8547560975609756, "grad_norm": 0.9169703125953674, "learning_rate": 1.4301626016260164e-05, "loss": 0.0747, "step": 70090 }, { "epoch": 0.8548170731707317, "grad_norm": 0.7119457721710205, "learning_rate": 1.4301219512195124e-05, "loss": 0.0345, "step": 70095 }, { "epoch": 0.8548780487804878, "grad_norm": 0.7434288859367371, "learning_rate": 1.4300813008130084e-05, "loss": 0.0669, "step": 70100 }, { "epoch": 0.8549390243902439, "grad_norm": 0.6781180500984192, "learning_rate": 1.430040650406504e-05, "loss": 0.0475, "step": 70105 }, { "epoch": 0.855, "grad_norm": 0.6097806692123413, "learning_rate": 1.43e-05, "loss": 0.0346, "step": 70110 }, { "epoch": 0.8550609756097561, "grad_norm": 2.558046817779541, "learning_rate": 1.429959349593496e-05, "loss": 0.0436, "step": 70115 }, { "epoch": 0.8551219512195122, "grad_norm": 0.5632429122924805, "learning_rate": 1.429918699186992e-05, "loss": 0.0413, "step": 70120 }, { "epoch": 0.8551829268292683, "grad_norm": 1.521787405014038, "learning_rate": 1.429878048780488e-05, "loss": 0.0364, "step": 70125 }, { "epoch": 0.8552439024390244, "grad_norm": 0.4897366762161255, "learning_rate": 1.429837398373984e-05, "loss": 0.0489, "step": 70130 }, { "epoch": 0.8553048780487805, "grad_norm": 0.6578853130340576, "learning_rate": 1.4297967479674798e-05, "loss": 0.0825, "step": 70135 }, { "epoch": 0.8553658536585366, "grad_norm": 0.9070673584938049, "learning_rate": 1.4297560975609756e-05, "loss": 0.0403, "step": 70140 }, { "epoch": 0.8554268292682927, "grad_norm": 0.3231043517589569, "learning_rate": 1.4297154471544716e-05, "loss": 0.0304, "step": 70145 }, { "epoch": 0.8554878048780488, "grad_norm": 0.3789363503456116, "learning_rate": 1.4296747967479675e-05, "loss": 0.0245, "step": 70150 }, { "epoch": 0.8555487804878049, "grad_norm": 0.5164662003517151, "learning_rate": 1.4296341463414635e-05, "loss": 0.0426, "step": 70155 }, { "epoch": 0.855609756097561, "grad_norm": 0.4792010188102722, "learning_rate": 1.4295934959349595e-05, "loss": 0.0329, "step": 70160 }, { "epoch": 0.8556707317073171, "grad_norm": 0.5453652739524841, "learning_rate": 1.4295528455284553e-05, "loss": 0.0308, "step": 70165 }, { "epoch": 0.8557317073170732, "grad_norm": 0.5331518650054932, "learning_rate": 1.4295121951219513e-05, "loss": 0.0394, "step": 70170 }, { "epoch": 0.8557926829268293, "grad_norm": 0.8618170022964478, "learning_rate": 1.4294715447154473e-05, "loss": 0.0332, "step": 70175 }, { "epoch": 0.8558536585365853, "grad_norm": 0.49231046438217163, "learning_rate": 1.4294308943089433e-05, "loss": 0.0391, "step": 70180 }, { "epoch": 0.8559146341463415, "grad_norm": 0.8264417052268982, "learning_rate": 1.4293902439024393e-05, "loss": 0.0232, "step": 70185 }, { "epoch": 0.8559756097560975, "grad_norm": 0.45477741956710815, "learning_rate": 1.4293495934959352e-05, "loss": 0.0324, "step": 70190 }, { "epoch": 0.8560365853658537, "grad_norm": 0.6306946873664856, "learning_rate": 1.4293089430894309e-05, "loss": 0.0888, "step": 70195 }, { "epoch": 0.8560975609756097, "grad_norm": 0.21404995024204254, "learning_rate": 1.4292682926829269e-05, "loss": 0.031, "step": 70200 }, { "epoch": 0.8561585365853659, "grad_norm": 0.5416638255119324, "learning_rate": 1.4292276422764228e-05, "loss": 0.0363, "step": 70205 }, { "epoch": 0.8562195121951219, "grad_norm": 0.3488333523273468, "learning_rate": 1.4291869918699188e-05, "loss": 0.0199, "step": 70210 }, { "epoch": 0.8562804878048781, "grad_norm": 0.2992381453514099, "learning_rate": 1.4291463414634148e-05, "loss": 0.023, "step": 70215 }, { "epoch": 0.8563414634146341, "grad_norm": 0.2883402705192566, "learning_rate": 1.4291056910569108e-05, "loss": 0.0153, "step": 70220 }, { "epoch": 0.8564024390243903, "grad_norm": 0.3513756990432739, "learning_rate": 1.4290650406504066e-05, "loss": 0.029, "step": 70225 }, { "epoch": 0.8564634146341463, "grad_norm": 0.534028947353363, "learning_rate": 1.4290243902439024e-05, "loss": 0.0436, "step": 70230 }, { "epoch": 0.8565243902439025, "grad_norm": 0.5038643479347229, "learning_rate": 1.4289837398373984e-05, "loss": 0.0733, "step": 70235 }, { "epoch": 0.8565853658536585, "grad_norm": 2.9543850421905518, "learning_rate": 1.4289430894308944e-05, "loss": 0.0205, "step": 70240 }, { "epoch": 0.8566463414634147, "grad_norm": 1.2759642601013184, "learning_rate": 1.4289024390243904e-05, "loss": 0.0442, "step": 70245 }, { "epoch": 0.8567073170731707, "grad_norm": 0.52683025598526, "learning_rate": 1.4288617886178863e-05, "loss": 0.0532, "step": 70250 }, { "epoch": 0.8567682926829269, "grad_norm": 1.0762591361999512, "learning_rate": 1.4288211382113822e-05, "loss": 0.0379, "step": 70255 }, { "epoch": 0.8568292682926829, "grad_norm": 0.9584445953369141, "learning_rate": 1.4287804878048781e-05, "loss": 0.0457, "step": 70260 }, { "epoch": 0.8568902439024391, "grad_norm": 0.44429904222488403, "learning_rate": 1.4287398373983741e-05, "loss": 0.0251, "step": 70265 }, { "epoch": 0.8569512195121951, "grad_norm": 0.2929709255695343, "learning_rate": 1.4286991869918701e-05, "loss": 0.0371, "step": 70270 }, { "epoch": 0.8570121951219513, "grad_norm": 0.7628397345542908, "learning_rate": 1.4286585365853661e-05, "loss": 0.078, "step": 70275 }, { "epoch": 0.8570731707317073, "grad_norm": 0.5115139484405518, "learning_rate": 1.428617886178862e-05, "loss": 0.039, "step": 70280 }, { "epoch": 0.8571341463414635, "grad_norm": 0.5193437933921814, "learning_rate": 1.4285772357723577e-05, "loss": 0.0616, "step": 70285 }, { "epoch": 0.8571951219512195, "grad_norm": 0.4059655964374542, "learning_rate": 1.4285365853658537e-05, "loss": 0.0301, "step": 70290 }, { "epoch": 0.8572560975609756, "grad_norm": 0.736612856388092, "learning_rate": 1.4284959349593497e-05, "loss": 0.0232, "step": 70295 }, { "epoch": 0.8573170731707317, "grad_norm": 0.8151488304138184, "learning_rate": 1.4284552845528457e-05, "loss": 0.0292, "step": 70300 }, { "epoch": 0.8573780487804878, "grad_norm": 0.9303203821182251, "learning_rate": 1.4284146341463416e-05, "loss": 0.0369, "step": 70305 }, { "epoch": 0.8574390243902439, "grad_norm": 0.5296903252601624, "learning_rate": 1.4283739837398376e-05, "loss": 0.023, "step": 70310 }, { "epoch": 0.8575, "grad_norm": 0.6691786646842957, "learning_rate": 1.4283333333333333e-05, "loss": 0.0348, "step": 70315 }, { "epoch": 0.8575609756097561, "grad_norm": 0.6569558382034302, "learning_rate": 1.4282926829268292e-05, "loss": 0.0272, "step": 70320 }, { "epoch": 0.8576219512195122, "grad_norm": 0.8298605680465698, "learning_rate": 1.4282520325203252e-05, "loss": 0.0582, "step": 70325 }, { "epoch": 0.8576829268292683, "grad_norm": 0.6621079444885254, "learning_rate": 1.4282113821138212e-05, "loss": 0.071, "step": 70330 }, { "epoch": 0.8577439024390244, "grad_norm": 0.9931389689445496, "learning_rate": 1.4281707317073172e-05, "loss": 0.0361, "step": 70335 }, { "epoch": 0.8578048780487805, "grad_norm": 0.3378628194332123, "learning_rate": 1.4281300813008132e-05, "loss": 0.0364, "step": 70340 }, { "epoch": 0.8578658536585366, "grad_norm": 0.4631951153278351, "learning_rate": 1.428089430894309e-05, "loss": 0.058, "step": 70345 }, { "epoch": 0.8579268292682927, "grad_norm": 0.6351652145385742, "learning_rate": 1.428048780487805e-05, "loss": 0.0496, "step": 70350 }, { "epoch": 0.8579878048780488, "grad_norm": 1.0604451894760132, "learning_rate": 1.428008130081301e-05, "loss": 0.0472, "step": 70355 }, { "epoch": 0.8580487804878049, "grad_norm": 0.6565051674842834, "learning_rate": 1.427967479674797e-05, "loss": 0.0227, "step": 70360 }, { "epoch": 0.858109756097561, "grad_norm": 0.5019060373306274, "learning_rate": 1.427926829268293e-05, "loss": 0.051, "step": 70365 }, { "epoch": 0.8581707317073171, "grad_norm": 0.4234968423843384, "learning_rate": 1.4278861788617889e-05, "loss": 0.0548, "step": 70370 }, { "epoch": 0.8582317073170732, "grad_norm": 0.613871157169342, "learning_rate": 1.4278455284552845e-05, "loss": 0.0477, "step": 70375 }, { "epoch": 0.8582926829268293, "grad_norm": 0.6529686450958252, "learning_rate": 1.4278048780487805e-05, "loss": 0.0421, "step": 70380 }, { "epoch": 0.8583536585365854, "grad_norm": 0.09889470040798187, "learning_rate": 1.4277642276422765e-05, "loss": 0.0164, "step": 70385 }, { "epoch": 0.8584146341463414, "grad_norm": 0.4348519742488861, "learning_rate": 1.4277235772357725e-05, "loss": 0.0434, "step": 70390 }, { "epoch": 0.8584756097560976, "grad_norm": 0.4304799437522888, "learning_rate": 1.4276829268292685e-05, "loss": 0.0328, "step": 70395 }, { "epoch": 0.8585365853658536, "grad_norm": 0.35860154032707214, "learning_rate": 1.4276422764227645e-05, "loss": 0.0426, "step": 70400 }, { "epoch": 0.8585975609756098, "grad_norm": 0.765582263469696, "learning_rate": 1.4276016260162601e-05, "loss": 0.0311, "step": 70405 }, { "epoch": 0.8586585365853658, "grad_norm": 0.8108653426170349, "learning_rate": 1.427560975609756e-05, "loss": 0.0791, "step": 70410 }, { "epoch": 0.858719512195122, "grad_norm": 0.8033909797668457, "learning_rate": 1.427520325203252e-05, "loss": 0.0585, "step": 70415 }, { "epoch": 0.858780487804878, "grad_norm": 0.44880881905555725, "learning_rate": 1.427479674796748e-05, "loss": 0.0421, "step": 70420 }, { "epoch": 0.8588414634146342, "grad_norm": 0.3880351483821869, "learning_rate": 1.427439024390244e-05, "loss": 0.0345, "step": 70425 }, { "epoch": 0.8589024390243902, "grad_norm": 0.5172824859619141, "learning_rate": 1.42739837398374e-05, "loss": 0.0688, "step": 70430 }, { "epoch": 0.8589634146341464, "grad_norm": 0.2658209800720215, "learning_rate": 1.4273577235772358e-05, "loss": 0.0377, "step": 70435 }, { "epoch": 0.8590243902439024, "grad_norm": 0.5366972088813782, "learning_rate": 1.4273170731707318e-05, "loss": 0.0337, "step": 70440 }, { "epoch": 0.8590853658536586, "grad_norm": 0.8421708345413208, "learning_rate": 1.4272764227642278e-05, "loss": 0.0613, "step": 70445 }, { "epoch": 0.8591463414634146, "grad_norm": 0.709073543548584, "learning_rate": 1.4272357723577238e-05, "loss": 0.0335, "step": 70450 }, { "epoch": 0.8592073170731708, "grad_norm": 0.6295568943023682, "learning_rate": 1.4271951219512198e-05, "loss": 0.0434, "step": 70455 }, { "epoch": 0.8592682926829268, "grad_norm": 0.9692814350128174, "learning_rate": 1.4271544715447156e-05, "loss": 0.0341, "step": 70460 }, { "epoch": 0.859329268292683, "grad_norm": 0.39846330881118774, "learning_rate": 1.4271138211382114e-05, "loss": 0.0329, "step": 70465 }, { "epoch": 0.859390243902439, "grad_norm": 1.0492596626281738, "learning_rate": 1.4270731707317074e-05, "loss": 0.0445, "step": 70470 }, { "epoch": 0.8594512195121952, "grad_norm": 0.43585941195487976, "learning_rate": 1.4270325203252033e-05, "loss": 0.0586, "step": 70475 }, { "epoch": 0.8595121951219512, "grad_norm": 0.40204182267189026, "learning_rate": 1.4269918699186993e-05, "loss": 0.0434, "step": 70480 }, { "epoch": 0.8595731707317074, "grad_norm": 0.9974672794342041, "learning_rate": 1.4269512195121953e-05, "loss": 0.0367, "step": 70485 }, { "epoch": 0.8596341463414634, "grad_norm": 0.6530646681785583, "learning_rate": 1.4269105691056913e-05, "loss": 0.0265, "step": 70490 }, { "epoch": 0.8596951219512196, "grad_norm": 0.6462556719779968, "learning_rate": 1.426869918699187e-05, "loss": 0.0505, "step": 70495 }, { "epoch": 0.8597560975609756, "grad_norm": 0.8703033328056335, "learning_rate": 1.4268292682926829e-05, "loss": 0.0597, "step": 70500 }, { "epoch": 0.8598170731707317, "grad_norm": 0.7201969027519226, "learning_rate": 1.4267886178861789e-05, "loss": 0.0588, "step": 70505 }, { "epoch": 0.8598780487804878, "grad_norm": 0.2833408713340759, "learning_rate": 1.4267479674796749e-05, "loss": 0.0359, "step": 70510 }, { "epoch": 0.859939024390244, "grad_norm": 0.46607932448387146, "learning_rate": 1.4267073170731709e-05, "loss": 0.0455, "step": 70515 }, { "epoch": 0.86, "grad_norm": 0.3651503920555115, "learning_rate": 1.4266666666666668e-05, "loss": 0.0401, "step": 70520 }, { "epoch": 0.8600609756097561, "grad_norm": 0.7327007055282593, "learning_rate": 1.4266260162601627e-05, "loss": 0.0584, "step": 70525 }, { "epoch": 0.8601219512195122, "grad_norm": 0.5906372666358948, "learning_rate": 1.4265853658536586e-05, "loss": 0.042, "step": 70530 }, { "epoch": 0.8601829268292683, "grad_norm": 0.6638488173484802, "learning_rate": 1.4265447154471546e-05, "loss": 0.0364, "step": 70535 }, { "epoch": 0.8602439024390244, "grad_norm": 0.3655446767807007, "learning_rate": 1.4265040650406506e-05, "loss": 0.0434, "step": 70540 }, { "epoch": 0.8603048780487805, "grad_norm": 0.45281004905700684, "learning_rate": 1.4264634146341466e-05, "loss": 0.0549, "step": 70545 }, { "epoch": 0.8603658536585366, "grad_norm": 0.7125005125999451, "learning_rate": 1.4264227642276424e-05, "loss": 0.0888, "step": 70550 }, { "epoch": 0.8604268292682927, "grad_norm": 1.1011465787887573, "learning_rate": 1.4263821138211382e-05, "loss": 0.0319, "step": 70555 }, { "epoch": 0.8604878048780488, "grad_norm": 0.6460021734237671, "learning_rate": 1.4263414634146342e-05, "loss": 0.0432, "step": 70560 }, { "epoch": 0.8605487804878049, "grad_norm": 0.4278900921344757, "learning_rate": 1.4263008130081302e-05, "loss": 0.0423, "step": 70565 }, { "epoch": 0.860609756097561, "grad_norm": 0.33967459201812744, "learning_rate": 1.4262601626016262e-05, "loss": 0.0276, "step": 70570 }, { "epoch": 0.8606707317073171, "grad_norm": 0.5099325180053711, "learning_rate": 1.4262195121951221e-05, "loss": 0.0425, "step": 70575 }, { "epoch": 0.8607317073170732, "grad_norm": 6.4642229080200195, "learning_rate": 1.4261788617886181e-05, "loss": 0.058, "step": 70580 }, { "epoch": 0.8607926829268293, "grad_norm": 0.9505370855331421, "learning_rate": 1.4261382113821138e-05, "loss": 0.0433, "step": 70585 }, { "epoch": 0.8608536585365854, "grad_norm": 0.4856606721878052, "learning_rate": 1.4260975609756097e-05, "loss": 0.034, "step": 70590 }, { "epoch": 0.8609146341463415, "grad_norm": 1.5065371990203857, "learning_rate": 1.4260569105691057e-05, "loss": 0.0479, "step": 70595 }, { "epoch": 0.8609756097560975, "grad_norm": 0.7438085675239563, "learning_rate": 1.4260162601626017e-05, "loss": 0.0245, "step": 70600 }, { "epoch": 0.8610365853658537, "grad_norm": 0.7408375144004822, "learning_rate": 1.4259756097560977e-05, "loss": 0.0404, "step": 70605 }, { "epoch": 0.8610975609756097, "grad_norm": 0.6355658769607544, "learning_rate": 1.4259349593495937e-05, "loss": 0.0356, "step": 70610 }, { "epoch": 0.8611585365853659, "grad_norm": 1.2946065664291382, "learning_rate": 1.4258943089430895e-05, "loss": 0.0573, "step": 70615 }, { "epoch": 0.8612195121951219, "grad_norm": 1.2650781869888306, "learning_rate": 1.4258536585365855e-05, "loss": 0.0578, "step": 70620 }, { "epoch": 0.8612804878048781, "grad_norm": 0.09275467693805695, "learning_rate": 1.4258130081300815e-05, "loss": 0.0265, "step": 70625 }, { "epoch": 0.8613414634146341, "grad_norm": 0.6782172918319702, "learning_rate": 1.4257723577235774e-05, "loss": 0.0813, "step": 70630 }, { "epoch": 0.8614024390243903, "grad_norm": 1.610084891319275, "learning_rate": 1.4257317073170734e-05, "loss": 0.0442, "step": 70635 }, { "epoch": 0.8614634146341463, "grad_norm": 0.6925851702690125, "learning_rate": 1.4256910569105692e-05, "loss": 0.0516, "step": 70640 }, { "epoch": 0.8615243902439025, "grad_norm": 0.5904974937438965, "learning_rate": 1.425650406504065e-05, "loss": 0.0493, "step": 70645 }, { "epoch": 0.8615853658536585, "grad_norm": 0.3746712803840637, "learning_rate": 1.425609756097561e-05, "loss": 0.0543, "step": 70650 }, { "epoch": 0.8616463414634147, "grad_norm": 0.9636844396591187, "learning_rate": 1.425569105691057e-05, "loss": 0.0483, "step": 70655 }, { "epoch": 0.8617073170731707, "grad_norm": 0.5982451438903809, "learning_rate": 1.425528455284553e-05, "loss": 0.0586, "step": 70660 }, { "epoch": 0.8617682926829269, "grad_norm": 1.128434419631958, "learning_rate": 1.425487804878049e-05, "loss": 0.0681, "step": 70665 }, { "epoch": 0.8618292682926829, "grad_norm": 0.457938551902771, "learning_rate": 1.425447154471545e-05, "loss": 0.0389, "step": 70670 }, { "epoch": 0.8618902439024391, "grad_norm": 0.1558266282081604, "learning_rate": 1.4254065040650406e-05, "loss": 0.0158, "step": 70675 }, { "epoch": 0.8619512195121951, "grad_norm": 0.7133550047874451, "learning_rate": 1.4253658536585366e-05, "loss": 0.0487, "step": 70680 }, { "epoch": 0.8620121951219513, "grad_norm": 1.1531106233596802, "learning_rate": 1.4253252032520326e-05, "loss": 0.0323, "step": 70685 }, { "epoch": 0.8620731707317073, "grad_norm": 0.6047443151473999, "learning_rate": 1.4252845528455285e-05, "loss": 0.0667, "step": 70690 }, { "epoch": 0.8621341463414635, "grad_norm": 0.4379163384437561, "learning_rate": 1.4252439024390245e-05, "loss": 0.0261, "step": 70695 }, { "epoch": 0.8621951219512195, "grad_norm": 0.2870040237903595, "learning_rate": 1.4252032520325205e-05, "loss": 0.0391, "step": 70700 }, { "epoch": 0.8622560975609757, "grad_norm": 0.4212421476840973, "learning_rate": 1.4251626016260163e-05, "loss": 0.0366, "step": 70705 }, { "epoch": 0.8623170731707317, "grad_norm": 0.7773711085319519, "learning_rate": 1.4251219512195123e-05, "loss": 0.0487, "step": 70710 }, { "epoch": 0.8623780487804878, "grad_norm": 0.4072738289833069, "learning_rate": 1.4250813008130083e-05, "loss": 0.0209, "step": 70715 }, { "epoch": 0.8624390243902439, "grad_norm": 1.0355815887451172, "learning_rate": 1.4250406504065043e-05, "loss": 0.0997, "step": 70720 }, { "epoch": 0.8625, "grad_norm": 0.630531907081604, "learning_rate": 1.425e-05, "loss": 0.0525, "step": 70725 }, { "epoch": 0.8625609756097561, "grad_norm": 0.2892431914806366, "learning_rate": 1.424959349593496e-05, "loss": 0.0427, "step": 70730 }, { "epoch": 0.8626219512195122, "grad_norm": 0.8006160855293274, "learning_rate": 1.4249186991869919e-05, "loss": 0.0558, "step": 70735 }, { "epoch": 0.8626829268292683, "grad_norm": 0.7744449377059937, "learning_rate": 1.4248780487804879e-05, "loss": 0.039, "step": 70740 }, { "epoch": 0.8627439024390244, "grad_norm": 0.24260252714157104, "learning_rate": 1.4248373983739838e-05, "loss": 0.0206, "step": 70745 }, { "epoch": 0.8628048780487805, "grad_norm": 0.576324462890625, "learning_rate": 1.4247967479674798e-05, "loss": 0.0535, "step": 70750 }, { "epoch": 0.8628658536585366, "grad_norm": 4.156123638153076, "learning_rate": 1.4247560975609758e-05, "loss": 0.035, "step": 70755 }, { "epoch": 0.8629268292682927, "grad_norm": 0.9422901272773743, "learning_rate": 1.4247154471544718e-05, "loss": 0.0359, "step": 70760 }, { "epoch": 0.8629878048780488, "grad_norm": 0.4898309111595154, "learning_rate": 1.4246747967479674e-05, "loss": 0.0624, "step": 70765 }, { "epoch": 0.8630487804878049, "grad_norm": 0.7721284031867981, "learning_rate": 1.4246341463414634e-05, "loss": 0.039, "step": 70770 }, { "epoch": 0.863109756097561, "grad_norm": 0.6511331796646118, "learning_rate": 1.4245934959349594e-05, "loss": 0.0607, "step": 70775 }, { "epoch": 0.8631707317073171, "grad_norm": 0.5484586954116821, "learning_rate": 1.4245528455284554e-05, "loss": 0.031, "step": 70780 }, { "epoch": 0.8632317073170732, "grad_norm": 0.6355984210968018, "learning_rate": 1.4245121951219514e-05, "loss": 0.0333, "step": 70785 }, { "epoch": 0.8632926829268293, "grad_norm": 0.2831588685512543, "learning_rate": 1.4244715447154473e-05, "loss": 0.0458, "step": 70790 }, { "epoch": 0.8633536585365854, "grad_norm": 1.0301865339279175, "learning_rate": 1.4244308943089432e-05, "loss": 0.039, "step": 70795 }, { "epoch": 0.8634146341463415, "grad_norm": 0.25866448879241943, "learning_rate": 1.4243902439024391e-05, "loss": 0.027, "step": 70800 }, { "epoch": 0.8634756097560976, "grad_norm": 0.703509509563446, "learning_rate": 1.4243495934959351e-05, "loss": 0.0435, "step": 70805 }, { "epoch": 0.8635365853658536, "grad_norm": 0.7001673579216003, "learning_rate": 1.4243089430894311e-05, "loss": 0.0391, "step": 70810 }, { "epoch": 0.8635975609756098, "grad_norm": 0.6953734755516052, "learning_rate": 1.4242682926829269e-05, "loss": 0.0351, "step": 70815 }, { "epoch": 0.8636585365853658, "grad_norm": 0.7311550974845886, "learning_rate": 1.4242276422764229e-05, "loss": 0.0421, "step": 70820 }, { "epoch": 0.863719512195122, "grad_norm": 0.5173485279083252, "learning_rate": 1.4241869918699187e-05, "loss": 0.0337, "step": 70825 }, { "epoch": 0.863780487804878, "grad_norm": 0.7379241585731506, "learning_rate": 1.4241463414634147e-05, "loss": 0.0802, "step": 70830 }, { "epoch": 0.8638414634146342, "grad_norm": 1.5007493495941162, "learning_rate": 1.4241056910569107e-05, "loss": 0.0414, "step": 70835 }, { "epoch": 0.8639024390243902, "grad_norm": 0.6719593405723572, "learning_rate": 1.4240650406504067e-05, "loss": 0.0252, "step": 70840 }, { "epoch": 0.8639634146341464, "grad_norm": 0.5898699760437012, "learning_rate": 1.4240243902439026e-05, "loss": 0.0397, "step": 70845 }, { "epoch": 0.8640243902439024, "grad_norm": 0.36796993017196655, "learning_rate": 1.4239837398373986e-05, "loss": 0.0412, "step": 70850 }, { "epoch": 0.8640853658536586, "grad_norm": 0.7946882247924805, "learning_rate": 1.4239430894308943e-05, "loss": 0.0402, "step": 70855 }, { "epoch": 0.8641463414634146, "grad_norm": 0.46210914850234985, "learning_rate": 1.4239024390243902e-05, "loss": 0.0343, "step": 70860 }, { "epoch": 0.8642073170731708, "grad_norm": 0.8009138703346252, "learning_rate": 1.4238617886178862e-05, "loss": 0.0406, "step": 70865 }, { "epoch": 0.8642682926829268, "grad_norm": 0.28532326221466064, "learning_rate": 1.4238211382113822e-05, "loss": 0.0411, "step": 70870 }, { "epoch": 0.864329268292683, "grad_norm": 0.4123368263244629, "learning_rate": 1.4237804878048782e-05, "loss": 0.0539, "step": 70875 }, { "epoch": 0.864390243902439, "grad_norm": 0.5872098207473755, "learning_rate": 1.4237398373983742e-05, "loss": 0.0453, "step": 70880 }, { "epoch": 0.8644512195121952, "grad_norm": 0.553480863571167, "learning_rate": 1.42369918699187e-05, "loss": 0.0929, "step": 70885 }, { "epoch": 0.8645121951219512, "grad_norm": 2.089123487472534, "learning_rate": 1.423658536585366e-05, "loss": 0.0719, "step": 70890 }, { "epoch": 0.8645731707317074, "grad_norm": 1.3487907648086548, "learning_rate": 1.423617886178862e-05, "loss": 0.0686, "step": 70895 }, { "epoch": 0.8646341463414634, "grad_norm": 0.8863824009895325, "learning_rate": 1.423577235772358e-05, "loss": 0.0405, "step": 70900 }, { "epoch": 0.8646951219512196, "grad_norm": 0.45668742060661316, "learning_rate": 1.4235365853658537e-05, "loss": 0.0178, "step": 70905 }, { "epoch": 0.8647560975609756, "grad_norm": 0.5495673418045044, "learning_rate": 1.4234959349593497e-05, "loss": 0.0316, "step": 70910 }, { "epoch": 0.8648170731707318, "grad_norm": 0.554470956325531, "learning_rate": 1.4234552845528455e-05, "loss": 0.0374, "step": 70915 }, { "epoch": 0.8648780487804878, "grad_norm": 0.5484219193458557, "learning_rate": 1.4234146341463415e-05, "loss": 0.0547, "step": 70920 }, { "epoch": 0.864939024390244, "grad_norm": 0.16870389878749847, "learning_rate": 1.4233739837398375e-05, "loss": 0.0268, "step": 70925 }, { "epoch": 0.865, "grad_norm": 0.5603146553039551, "learning_rate": 1.4233333333333335e-05, "loss": 0.032, "step": 70930 }, { "epoch": 0.8650609756097561, "grad_norm": 0.27064335346221924, "learning_rate": 1.4232926829268295e-05, "loss": 0.0346, "step": 70935 }, { "epoch": 0.8651219512195122, "grad_norm": 0.8970967531204224, "learning_rate": 1.4232520325203254e-05, "loss": 0.0749, "step": 70940 }, { "epoch": 0.8651829268292683, "grad_norm": 0.4762994050979614, "learning_rate": 1.4232113821138211e-05, "loss": 0.0441, "step": 70945 }, { "epoch": 0.8652439024390244, "grad_norm": 0.33043673634529114, "learning_rate": 1.423170731707317e-05, "loss": 0.0418, "step": 70950 }, { "epoch": 0.8653048780487805, "grad_norm": 0.6494238376617432, "learning_rate": 1.423130081300813e-05, "loss": 0.0318, "step": 70955 }, { "epoch": 0.8653658536585366, "grad_norm": 0.24611909687519073, "learning_rate": 1.423089430894309e-05, "loss": 0.0436, "step": 70960 }, { "epoch": 0.8654268292682927, "grad_norm": 0.551288366317749, "learning_rate": 1.423048780487805e-05, "loss": 0.0298, "step": 70965 }, { "epoch": 0.8654878048780488, "grad_norm": 0.2282932549715042, "learning_rate": 1.423008130081301e-05, "loss": 0.0253, "step": 70970 }, { "epoch": 0.8655487804878049, "grad_norm": 0.7560519576072693, "learning_rate": 1.4229674796747968e-05, "loss": 0.0478, "step": 70975 }, { "epoch": 0.865609756097561, "grad_norm": 0.5495349168777466, "learning_rate": 1.4229268292682928e-05, "loss": 0.044, "step": 70980 }, { "epoch": 0.8656707317073171, "grad_norm": 0.5426865816116333, "learning_rate": 1.4228861788617888e-05, "loss": 0.0435, "step": 70985 }, { "epoch": 0.8657317073170732, "grad_norm": 0.54750657081604, "learning_rate": 1.4228455284552846e-05, "loss": 0.0551, "step": 70990 }, { "epoch": 0.8657926829268293, "grad_norm": 0.7305434942245483, "learning_rate": 1.4228048780487806e-05, "loss": 0.0488, "step": 70995 }, { "epoch": 0.8658536585365854, "grad_norm": 0.6547286510467529, "learning_rate": 1.4227642276422766e-05, "loss": 0.1014, "step": 71000 }, { "epoch": 0.8659146341463415, "grad_norm": 0.6888526678085327, "learning_rate": 1.4227235772357724e-05, "loss": 0.0591, "step": 71005 }, { "epoch": 0.8659756097560976, "grad_norm": 0.4961766302585602, "learning_rate": 1.4226829268292684e-05, "loss": 0.0349, "step": 71010 }, { "epoch": 0.8660365853658537, "grad_norm": 0.6400858759880066, "learning_rate": 1.4226422764227643e-05, "loss": 0.04, "step": 71015 }, { "epoch": 0.8660975609756097, "grad_norm": 0.4150281250476837, "learning_rate": 1.4226016260162603e-05, "loss": 0.0468, "step": 71020 }, { "epoch": 0.8661585365853659, "grad_norm": 0.5357706546783447, "learning_rate": 1.4225609756097563e-05, "loss": 0.0367, "step": 71025 }, { "epoch": 0.8662195121951219, "grad_norm": 0.7204429507255554, "learning_rate": 1.4225203252032523e-05, "loss": 0.0296, "step": 71030 }, { "epoch": 0.8662804878048781, "grad_norm": 0.4559723436832428, "learning_rate": 1.422479674796748e-05, "loss": 0.0272, "step": 71035 }, { "epoch": 0.8663414634146341, "grad_norm": 0.418390154838562, "learning_rate": 1.4224390243902439e-05, "loss": 0.0544, "step": 71040 }, { "epoch": 0.8664024390243903, "grad_norm": 0.6431319117546082, "learning_rate": 1.4223983739837399e-05, "loss": 0.0343, "step": 71045 }, { "epoch": 0.8664634146341463, "grad_norm": 0.7744481563568115, "learning_rate": 1.4223577235772359e-05, "loss": 0.044, "step": 71050 }, { "epoch": 0.8665243902439025, "grad_norm": 0.8160578012466431, "learning_rate": 1.4223170731707319e-05, "loss": 0.0337, "step": 71055 }, { "epoch": 0.8665853658536585, "grad_norm": 0.7416093945503235, "learning_rate": 1.4222764227642278e-05, "loss": 0.042, "step": 71060 }, { "epoch": 0.8666463414634147, "grad_norm": 1.0738930702209473, "learning_rate": 1.4222357723577236e-05, "loss": 0.0771, "step": 71065 }, { "epoch": 0.8667073170731707, "grad_norm": 2.0604164600372314, "learning_rate": 1.4221951219512196e-05, "loss": 0.0525, "step": 71070 }, { "epoch": 0.8667682926829269, "grad_norm": 0.37940600514411926, "learning_rate": 1.4221544715447156e-05, "loss": 0.0366, "step": 71075 }, { "epoch": 0.8668292682926829, "grad_norm": 1.427567720413208, "learning_rate": 1.4221138211382114e-05, "loss": 0.0243, "step": 71080 }, { "epoch": 0.8668902439024391, "grad_norm": 1.2587764263153076, "learning_rate": 1.4220731707317074e-05, "loss": 0.0469, "step": 71085 }, { "epoch": 0.8669512195121951, "grad_norm": 1.0339107513427734, "learning_rate": 1.4220325203252034e-05, "loss": 0.0407, "step": 71090 }, { "epoch": 0.8670121951219513, "grad_norm": 0.7883397340774536, "learning_rate": 1.4219918699186992e-05, "loss": 0.0639, "step": 71095 }, { "epoch": 0.8670731707317073, "grad_norm": 0.5653404593467712, "learning_rate": 1.4219512195121952e-05, "loss": 0.057, "step": 71100 }, { "epoch": 0.8671341463414635, "grad_norm": 0.4478118121623993, "learning_rate": 1.4219105691056912e-05, "loss": 0.065, "step": 71105 }, { "epoch": 0.8671951219512195, "grad_norm": 0.6344748735427856, "learning_rate": 1.4218699186991871e-05, "loss": 0.0383, "step": 71110 }, { "epoch": 0.8672560975609757, "grad_norm": 0.3665538728237152, "learning_rate": 1.4218292682926831e-05, "loss": 0.0336, "step": 71115 }, { "epoch": 0.8673170731707317, "grad_norm": 1.2955501079559326, "learning_rate": 1.4217886178861791e-05, "loss": 0.0384, "step": 71120 }, { "epoch": 0.8673780487804879, "grad_norm": 1.023817539215088, "learning_rate": 1.4217479674796748e-05, "loss": 0.0411, "step": 71125 }, { "epoch": 0.8674390243902439, "grad_norm": 0.7256795167922974, "learning_rate": 1.4217073170731707e-05, "loss": 0.0749, "step": 71130 }, { "epoch": 0.8675, "grad_norm": 0.6548454761505127, "learning_rate": 1.4216666666666667e-05, "loss": 0.0483, "step": 71135 }, { "epoch": 0.8675609756097561, "grad_norm": 1.6647019386291504, "learning_rate": 1.4216260162601627e-05, "loss": 0.0336, "step": 71140 }, { "epoch": 0.8676219512195122, "grad_norm": 0.3256058394908905, "learning_rate": 1.4215853658536587e-05, "loss": 0.0433, "step": 71145 }, { "epoch": 0.8676829268292683, "grad_norm": 0.6788148283958435, "learning_rate": 1.4215447154471547e-05, "loss": 0.0376, "step": 71150 }, { "epoch": 0.8677439024390244, "grad_norm": 0.4663415849208832, "learning_rate": 1.4215040650406505e-05, "loss": 0.0336, "step": 71155 }, { "epoch": 0.8678048780487805, "grad_norm": 0.9245310425758362, "learning_rate": 1.4214634146341465e-05, "loss": 0.0446, "step": 71160 }, { "epoch": 0.8678658536585366, "grad_norm": 0.5552200675010681, "learning_rate": 1.4214227642276424e-05, "loss": 0.0513, "step": 71165 }, { "epoch": 0.8679268292682927, "grad_norm": 0.5624041557312012, "learning_rate": 1.4213821138211383e-05, "loss": 0.027, "step": 71170 }, { "epoch": 0.8679878048780488, "grad_norm": 0.8237650990486145, "learning_rate": 1.4213414634146342e-05, "loss": 0.0482, "step": 71175 }, { "epoch": 0.8680487804878049, "grad_norm": 3.0092079639434814, "learning_rate": 1.4213008130081302e-05, "loss": 0.0396, "step": 71180 }, { "epoch": 0.868109756097561, "grad_norm": 0.22536173462867737, "learning_rate": 1.421260162601626e-05, "loss": 0.0293, "step": 71185 }, { "epoch": 0.8681707317073171, "grad_norm": 0.5449653267860413, "learning_rate": 1.421219512195122e-05, "loss": 0.0336, "step": 71190 }, { "epoch": 0.8682317073170732, "grad_norm": 0.4975484013557434, "learning_rate": 1.421178861788618e-05, "loss": 0.0221, "step": 71195 }, { "epoch": 0.8682926829268293, "grad_norm": 0.8038354516029358, "learning_rate": 1.421138211382114e-05, "loss": 0.0564, "step": 71200 }, { "epoch": 0.8683536585365854, "grad_norm": 0.5522905588150024, "learning_rate": 1.42109756097561e-05, "loss": 0.0338, "step": 71205 }, { "epoch": 0.8684146341463415, "grad_norm": 0.9026093482971191, "learning_rate": 1.421056910569106e-05, "loss": 0.057, "step": 71210 }, { "epoch": 0.8684756097560976, "grad_norm": 0.5633116364479065, "learning_rate": 1.4210162601626016e-05, "loss": 0.05, "step": 71215 }, { "epoch": 0.8685365853658537, "grad_norm": 0.6048529148101807, "learning_rate": 1.4209756097560976e-05, "loss": 0.0574, "step": 71220 }, { "epoch": 0.8685975609756098, "grad_norm": 0.542881965637207, "learning_rate": 1.4209349593495936e-05, "loss": 0.0603, "step": 71225 }, { "epoch": 0.8686585365853658, "grad_norm": 0.5321353077888489, "learning_rate": 1.4208943089430895e-05, "loss": 0.0697, "step": 71230 }, { "epoch": 0.868719512195122, "grad_norm": 0.5846667289733887, "learning_rate": 1.4208536585365855e-05, "loss": 0.0431, "step": 71235 }, { "epoch": 0.868780487804878, "grad_norm": 0.7066788673400879, "learning_rate": 1.4208130081300815e-05, "loss": 0.0349, "step": 71240 }, { "epoch": 0.8688414634146342, "grad_norm": 0.5471241474151611, "learning_rate": 1.4207723577235773e-05, "loss": 0.0373, "step": 71245 }, { "epoch": 0.8689024390243902, "grad_norm": 0.6611534357070923, "learning_rate": 1.4207317073170733e-05, "loss": 0.0472, "step": 71250 }, { "epoch": 0.8689634146341464, "grad_norm": 0.684169352054596, "learning_rate": 1.4206910569105691e-05, "loss": 0.0455, "step": 71255 }, { "epoch": 0.8690243902439024, "grad_norm": 0.41587406396865845, "learning_rate": 1.4206504065040651e-05, "loss": 0.0371, "step": 71260 }, { "epoch": 0.8690853658536586, "grad_norm": 0.3699265420436859, "learning_rate": 1.420609756097561e-05, "loss": 0.0252, "step": 71265 }, { "epoch": 0.8691463414634146, "grad_norm": 1.1478767395019531, "learning_rate": 1.420569105691057e-05, "loss": 0.0561, "step": 71270 }, { "epoch": 0.8692073170731708, "grad_norm": 0.34344232082366943, "learning_rate": 1.4205284552845529e-05, "loss": 0.0353, "step": 71275 }, { "epoch": 0.8692682926829268, "grad_norm": 0.5046690106391907, "learning_rate": 1.4204878048780488e-05, "loss": 0.046, "step": 71280 }, { "epoch": 0.869329268292683, "grad_norm": 0.4941377639770508, "learning_rate": 1.4204471544715448e-05, "loss": 0.0527, "step": 71285 }, { "epoch": 0.869390243902439, "grad_norm": 0.25575706362724304, "learning_rate": 1.4204065040650408e-05, "loss": 0.0284, "step": 71290 }, { "epoch": 0.8694512195121952, "grad_norm": 0.572702944278717, "learning_rate": 1.4203658536585368e-05, "loss": 0.0249, "step": 71295 }, { "epoch": 0.8695121951219512, "grad_norm": 0.22197555005550385, "learning_rate": 1.4203252032520328e-05, "loss": 0.0402, "step": 71300 }, { "epoch": 0.8695731707317074, "grad_norm": 0.4887290596961975, "learning_rate": 1.4202845528455284e-05, "loss": 0.034, "step": 71305 }, { "epoch": 0.8696341463414634, "grad_norm": 0.5518994927406311, "learning_rate": 1.4202439024390244e-05, "loss": 0.0572, "step": 71310 }, { "epoch": 0.8696951219512196, "grad_norm": 0.7557265162467957, "learning_rate": 1.4202032520325204e-05, "loss": 0.058, "step": 71315 }, { "epoch": 0.8697560975609756, "grad_norm": 0.7239245176315308, "learning_rate": 1.4201626016260164e-05, "loss": 0.0332, "step": 71320 }, { "epoch": 0.8698170731707318, "grad_norm": 0.6011078357696533, "learning_rate": 1.4201219512195123e-05, "loss": 0.0397, "step": 71325 }, { "epoch": 0.8698780487804878, "grad_norm": 0.7952292561531067, "learning_rate": 1.4200813008130083e-05, "loss": 0.0434, "step": 71330 }, { "epoch": 0.869939024390244, "grad_norm": 0.41019299626350403, "learning_rate": 1.4200406504065041e-05, "loss": 0.0665, "step": 71335 }, { "epoch": 0.87, "grad_norm": 1.059374213218689, "learning_rate": 1.4200000000000001e-05, "loss": 0.0536, "step": 71340 }, { "epoch": 0.8700609756097561, "grad_norm": 0.8226652145385742, "learning_rate": 1.419959349593496e-05, "loss": 0.0671, "step": 71345 }, { "epoch": 0.8701219512195122, "grad_norm": 0.44075095653533936, "learning_rate": 1.419918699186992e-05, "loss": 0.0609, "step": 71350 }, { "epoch": 0.8701829268292683, "grad_norm": 0.41488978266716003, "learning_rate": 1.4198780487804879e-05, "loss": 0.0419, "step": 71355 }, { "epoch": 0.8702439024390244, "grad_norm": 0.6101725697517395, "learning_rate": 1.4198373983739839e-05, "loss": 0.044, "step": 71360 }, { "epoch": 0.8703048780487805, "grad_norm": 0.3130820095539093, "learning_rate": 1.4197967479674797e-05, "loss": 0.0278, "step": 71365 }, { "epoch": 0.8703658536585366, "grad_norm": 0.3012191355228424, "learning_rate": 1.4197560975609757e-05, "loss": 0.0283, "step": 71370 }, { "epoch": 0.8704268292682927, "grad_norm": 0.8222558498382568, "learning_rate": 1.4197154471544717e-05, "loss": 0.0645, "step": 71375 }, { "epoch": 0.8704878048780488, "grad_norm": 0.2952412962913513, "learning_rate": 1.4196747967479676e-05, "loss": 0.036, "step": 71380 }, { "epoch": 0.8705487804878049, "grad_norm": 0.34498849511146545, "learning_rate": 1.4196341463414636e-05, "loss": 0.0536, "step": 71385 }, { "epoch": 0.870609756097561, "grad_norm": 1.0809379816055298, "learning_rate": 1.4195934959349596e-05, "loss": 0.0557, "step": 71390 }, { "epoch": 0.8706707317073171, "grad_norm": 0.5129916667938232, "learning_rate": 1.4195528455284553e-05, "loss": 0.0397, "step": 71395 }, { "epoch": 0.8707317073170732, "grad_norm": 0.6994798183441162, "learning_rate": 1.4195121951219512e-05, "loss": 0.0625, "step": 71400 }, { "epoch": 0.8707926829268293, "grad_norm": 0.5999664068222046, "learning_rate": 1.4194715447154472e-05, "loss": 0.0299, "step": 71405 }, { "epoch": 0.8708536585365854, "grad_norm": 0.5165858864784241, "learning_rate": 1.4194308943089432e-05, "loss": 0.0465, "step": 71410 }, { "epoch": 0.8709146341463415, "grad_norm": 0.44342851638793945, "learning_rate": 1.4193902439024392e-05, "loss": 0.0475, "step": 71415 }, { "epoch": 0.8709756097560976, "grad_norm": 0.5762441754341125, "learning_rate": 1.4193495934959352e-05, "loss": 0.0282, "step": 71420 }, { "epoch": 0.8710365853658537, "grad_norm": 0.5614624619483948, "learning_rate": 1.419308943089431e-05, "loss": 0.0288, "step": 71425 }, { "epoch": 0.8710975609756098, "grad_norm": 0.3242199122905731, "learning_rate": 1.4192682926829268e-05, "loss": 0.0535, "step": 71430 }, { "epoch": 0.8711585365853659, "grad_norm": 0.8553449511528015, "learning_rate": 1.4192276422764228e-05, "loss": 0.0514, "step": 71435 }, { "epoch": 0.871219512195122, "grad_norm": 0.4351901113986969, "learning_rate": 1.4191869918699188e-05, "loss": 0.0386, "step": 71440 }, { "epoch": 0.8712804878048781, "grad_norm": 0.45650357007980347, "learning_rate": 1.4191463414634147e-05, "loss": 0.0291, "step": 71445 }, { "epoch": 0.8713414634146341, "grad_norm": 0.5155431628227234, "learning_rate": 1.4191056910569107e-05, "loss": 0.0657, "step": 71450 }, { "epoch": 0.8714024390243903, "grad_norm": 0.750981330871582, "learning_rate": 1.4190650406504065e-05, "loss": 0.034, "step": 71455 }, { "epoch": 0.8714634146341463, "grad_norm": 0.45802542567253113, "learning_rate": 1.4190243902439025e-05, "loss": 0.0244, "step": 71460 }, { "epoch": 0.8715243902439025, "grad_norm": 0.441937118768692, "learning_rate": 1.4189837398373985e-05, "loss": 0.027, "step": 71465 }, { "epoch": 0.8715853658536585, "grad_norm": 1.1383421421051025, "learning_rate": 1.4189430894308945e-05, "loss": 0.0232, "step": 71470 }, { "epoch": 0.8716463414634147, "grad_norm": 0.6661239862442017, "learning_rate": 1.4189024390243905e-05, "loss": 0.0391, "step": 71475 }, { "epoch": 0.8717073170731707, "grad_norm": 0.9840467572212219, "learning_rate": 1.4188617886178864e-05, "loss": 0.0463, "step": 71480 }, { "epoch": 0.8717682926829269, "grad_norm": 0.4895095229148865, "learning_rate": 1.4188211382113821e-05, "loss": 0.0348, "step": 71485 }, { "epoch": 0.8718292682926829, "grad_norm": 0.5858736634254456, "learning_rate": 1.418780487804878e-05, "loss": 0.0277, "step": 71490 }, { "epoch": 0.8718902439024391, "grad_norm": 0.8093296885490417, "learning_rate": 1.418739837398374e-05, "loss": 0.0395, "step": 71495 }, { "epoch": 0.8719512195121951, "grad_norm": 0.3755158483982086, "learning_rate": 1.41869918699187e-05, "loss": 0.0286, "step": 71500 }, { "epoch": 0.8720121951219513, "grad_norm": 0.3927185535430908, "learning_rate": 1.418658536585366e-05, "loss": 0.0532, "step": 71505 }, { "epoch": 0.8720731707317073, "grad_norm": 0.32091107964515686, "learning_rate": 1.418617886178862e-05, "loss": 0.047, "step": 71510 }, { "epoch": 0.8721341463414635, "grad_norm": 0.7665942311286926, "learning_rate": 1.4185772357723578e-05, "loss": 0.0413, "step": 71515 }, { "epoch": 0.8721951219512195, "grad_norm": 0.8284850716590881, "learning_rate": 1.4185365853658536e-05, "loss": 0.0275, "step": 71520 }, { "epoch": 0.8722560975609757, "grad_norm": 0.3921688199043274, "learning_rate": 1.4184959349593496e-05, "loss": 0.0251, "step": 71525 }, { "epoch": 0.8723170731707317, "grad_norm": 0.5141410827636719, "learning_rate": 1.4184552845528456e-05, "loss": 0.0249, "step": 71530 }, { "epoch": 0.8723780487804879, "grad_norm": 0.3370819687843323, "learning_rate": 1.4184146341463416e-05, "loss": 0.0298, "step": 71535 }, { "epoch": 0.8724390243902439, "grad_norm": 0.786512017250061, "learning_rate": 1.4183739837398375e-05, "loss": 0.037, "step": 71540 }, { "epoch": 0.8725, "grad_norm": 0.5572032928466797, "learning_rate": 1.4183333333333334e-05, "loss": 0.0382, "step": 71545 }, { "epoch": 0.8725609756097561, "grad_norm": 1.3585366010665894, "learning_rate": 1.4182926829268293e-05, "loss": 0.0302, "step": 71550 }, { "epoch": 0.8726219512195122, "grad_norm": 0.3695542514324188, "learning_rate": 1.4182520325203253e-05, "loss": 0.0311, "step": 71555 }, { "epoch": 0.8726829268292683, "grad_norm": 0.5582023859024048, "learning_rate": 1.4182113821138213e-05, "loss": 0.0416, "step": 71560 }, { "epoch": 0.8727439024390244, "grad_norm": 0.5309286117553711, "learning_rate": 1.4181707317073173e-05, "loss": 0.0546, "step": 71565 }, { "epoch": 0.8728048780487805, "grad_norm": 0.35293614864349365, "learning_rate": 1.4181300813008133e-05, "loss": 0.0685, "step": 71570 }, { "epoch": 0.8728658536585366, "grad_norm": 0.7402794361114502, "learning_rate": 1.418089430894309e-05, "loss": 0.0484, "step": 71575 }, { "epoch": 0.8729268292682927, "grad_norm": 0.9215801954269409, "learning_rate": 1.4180487804878049e-05, "loss": 0.0738, "step": 71580 }, { "epoch": 0.8729878048780488, "grad_norm": 0.49107465147972107, "learning_rate": 1.4180081300813009e-05, "loss": 0.0374, "step": 71585 }, { "epoch": 0.8730487804878049, "grad_norm": 0.5759534239768982, "learning_rate": 1.4179674796747969e-05, "loss": 0.0156, "step": 71590 }, { "epoch": 0.873109756097561, "grad_norm": 0.4230654537677765, "learning_rate": 1.4179268292682928e-05, "loss": 0.0565, "step": 71595 }, { "epoch": 0.8731707317073171, "grad_norm": 0.2170930802822113, "learning_rate": 1.4178861788617888e-05, "loss": 0.0327, "step": 71600 }, { "epoch": 0.8732317073170732, "grad_norm": 0.3225599527359009, "learning_rate": 1.4178455284552846e-05, "loss": 0.0355, "step": 71605 }, { "epoch": 0.8732926829268293, "grad_norm": 0.48960232734680176, "learning_rate": 1.4178048780487805e-05, "loss": 0.0478, "step": 71610 }, { "epoch": 0.8733536585365854, "grad_norm": 0.8119661211967468, "learning_rate": 1.4177642276422764e-05, "loss": 0.0251, "step": 71615 }, { "epoch": 0.8734146341463415, "grad_norm": 0.4372199773788452, "learning_rate": 1.4177235772357724e-05, "loss": 0.0413, "step": 71620 }, { "epoch": 0.8734756097560976, "grad_norm": 0.3435642719268799, "learning_rate": 1.4176829268292684e-05, "loss": 0.0434, "step": 71625 }, { "epoch": 0.8735365853658537, "grad_norm": 0.48198041319847107, "learning_rate": 1.4176422764227644e-05, "loss": 0.0287, "step": 71630 }, { "epoch": 0.8735975609756098, "grad_norm": 0.9317657947540283, "learning_rate": 1.4176016260162602e-05, "loss": 0.0684, "step": 71635 }, { "epoch": 0.8736585365853659, "grad_norm": 0.5392155647277832, "learning_rate": 1.4175609756097562e-05, "loss": 0.0561, "step": 71640 }, { "epoch": 0.873719512195122, "grad_norm": 0.9253454804420471, "learning_rate": 1.4175203252032522e-05, "loss": 0.0662, "step": 71645 }, { "epoch": 0.873780487804878, "grad_norm": 0.3947533369064331, "learning_rate": 1.4174796747967481e-05, "loss": 0.0294, "step": 71650 }, { "epoch": 0.8738414634146342, "grad_norm": 0.6129354238510132, "learning_rate": 1.4174390243902441e-05, "loss": 0.0821, "step": 71655 }, { "epoch": 0.8739024390243902, "grad_norm": 3.2792561054229736, "learning_rate": 1.4173983739837401e-05, "loss": 0.0539, "step": 71660 }, { "epoch": 0.8739634146341464, "grad_norm": 0.596408486366272, "learning_rate": 1.4173577235772357e-05, "loss": 0.0331, "step": 71665 }, { "epoch": 0.8740243902439024, "grad_norm": 0.9966567754745483, "learning_rate": 1.4173170731707317e-05, "loss": 0.0474, "step": 71670 }, { "epoch": 0.8740853658536586, "grad_norm": 2.293355703353882, "learning_rate": 1.4172764227642277e-05, "loss": 0.0301, "step": 71675 }, { "epoch": 0.8741463414634146, "grad_norm": 1.078218936920166, "learning_rate": 1.4172357723577237e-05, "loss": 0.0306, "step": 71680 }, { "epoch": 0.8742073170731708, "grad_norm": 0.531618058681488, "learning_rate": 1.4171951219512197e-05, "loss": 0.0374, "step": 71685 }, { "epoch": 0.8742682926829268, "grad_norm": 0.46054625511169434, "learning_rate": 1.4171544715447157e-05, "loss": 0.0407, "step": 71690 }, { "epoch": 0.874329268292683, "grad_norm": 0.3542558252811432, "learning_rate": 1.4171138211382113e-05, "loss": 0.0199, "step": 71695 }, { "epoch": 0.874390243902439, "grad_norm": 0.7165883779525757, "learning_rate": 1.4170731707317073e-05, "loss": 0.09, "step": 71700 }, { "epoch": 0.8744512195121952, "grad_norm": 1.1451131105422974, "learning_rate": 1.4170325203252033e-05, "loss": 0.0672, "step": 71705 }, { "epoch": 0.8745121951219512, "grad_norm": 0.7487224340438843, "learning_rate": 1.4169918699186992e-05, "loss": 0.0549, "step": 71710 }, { "epoch": 0.8745731707317074, "grad_norm": 0.31793320178985596, "learning_rate": 1.4169512195121952e-05, "loss": 0.0394, "step": 71715 }, { "epoch": 0.8746341463414634, "grad_norm": 0.8015451431274414, "learning_rate": 1.4169105691056912e-05, "loss": 0.047, "step": 71720 }, { "epoch": 0.8746951219512196, "grad_norm": 0.680081307888031, "learning_rate": 1.416869918699187e-05, "loss": 0.0389, "step": 71725 }, { "epoch": 0.8747560975609756, "grad_norm": 0.6031713485717773, "learning_rate": 1.416829268292683e-05, "loss": 0.0468, "step": 71730 }, { "epoch": 0.8748170731707318, "grad_norm": 0.18147605657577515, "learning_rate": 1.416788617886179e-05, "loss": 0.0256, "step": 71735 }, { "epoch": 0.8748780487804878, "grad_norm": 0.5798052549362183, "learning_rate": 1.416747967479675e-05, "loss": 0.0532, "step": 71740 }, { "epoch": 0.874939024390244, "grad_norm": 0.7474591135978699, "learning_rate": 1.416707317073171e-05, "loss": 0.0394, "step": 71745 }, { "epoch": 0.875, "grad_norm": 0.6208704710006714, "learning_rate": 1.416666666666667e-05, "loss": 0.0435, "step": 71750 }, { "epoch": 0.875060975609756, "grad_norm": 0.4249170124530792, "learning_rate": 1.4166260162601628e-05, "loss": 0.0582, "step": 71755 }, { "epoch": 0.8751219512195122, "grad_norm": 0.8192146420478821, "learning_rate": 1.4165853658536586e-05, "loss": 0.0298, "step": 71760 }, { "epoch": 0.8751829268292682, "grad_norm": 0.4726419746875763, "learning_rate": 1.4165447154471545e-05, "loss": 0.0501, "step": 71765 }, { "epoch": 0.8752439024390244, "grad_norm": 0.6685755848884583, "learning_rate": 1.4165040650406505e-05, "loss": 0.0299, "step": 71770 }, { "epoch": 0.8753048780487804, "grad_norm": 0.5498518943786621, "learning_rate": 1.4164634146341465e-05, "loss": 0.0316, "step": 71775 }, { "epoch": 0.8753658536585366, "grad_norm": 0.5045130252838135, "learning_rate": 1.4164227642276425e-05, "loss": 0.0376, "step": 71780 }, { "epoch": 0.8754268292682926, "grad_norm": 0.3502849340438843, "learning_rate": 1.4163821138211385e-05, "loss": 0.0392, "step": 71785 }, { "epoch": 0.8754878048780488, "grad_norm": 0.2799016535282135, "learning_rate": 1.4163414634146341e-05, "loss": 0.0336, "step": 71790 }, { "epoch": 0.8755487804878048, "grad_norm": 0.782688319683075, "learning_rate": 1.4163008130081301e-05, "loss": 0.0758, "step": 71795 }, { "epoch": 0.875609756097561, "grad_norm": 0.611290693283081, "learning_rate": 1.416260162601626e-05, "loss": 0.0412, "step": 71800 }, { "epoch": 0.875670731707317, "grad_norm": 0.562268853187561, "learning_rate": 1.416219512195122e-05, "loss": 0.0699, "step": 71805 }, { "epoch": 0.8757317073170732, "grad_norm": 0.28720995783805847, "learning_rate": 1.416178861788618e-05, "loss": 0.0451, "step": 71810 }, { "epoch": 0.8757926829268292, "grad_norm": 0.4879446029663086, "learning_rate": 1.416138211382114e-05, "loss": 0.0377, "step": 71815 }, { "epoch": 0.8758536585365854, "grad_norm": 0.6181243062019348, "learning_rate": 1.4160975609756098e-05, "loss": 0.0313, "step": 71820 }, { "epoch": 0.8759146341463414, "grad_norm": 0.25491049885749817, "learning_rate": 1.4160569105691058e-05, "loss": 0.1059, "step": 71825 }, { "epoch": 0.8759756097560976, "grad_norm": 0.2878071069717407, "learning_rate": 1.4160162601626018e-05, "loss": 0.0439, "step": 71830 }, { "epoch": 0.8760365853658536, "grad_norm": 0.5522472858428955, "learning_rate": 1.4159756097560978e-05, "loss": 0.027, "step": 71835 }, { "epoch": 0.8760975609756098, "grad_norm": 0.33880457282066345, "learning_rate": 1.4159349593495936e-05, "loss": 0.0316, "step": 71840 }, { "epoch": 0.8761585365853658, "grad_norm": 0.8901528120040894, "learning_rate": 1.4158943089430896e-05, "loss": 0.0531, "step": 71845 }, { "epoch": 0.876219512195122, "grad_norm": 0.5333328247070312, "learning_rate": 1.4158536585365854e-05, "loss": 0.0328, "step": 71850 }, { "epoch": 0.876280487804878, "grad_norm": 0.555419921875, "learning_rate": 1.4158130081300814e-05, "loss": 0.0268, "step": 71855 }, { "epoch": 0.8763414634146341, "grad_norm": 0.7977216839790344, "learning_rate": 1.4157723577235774e-05, "loss": 0.0266, "step": 71860 }, { "epoch": 0.8764024390243902, "grad_norm": 0.5169394016265869, "learning_rate": 1.4157317073170733e-05, "loss": 0.0252, "step": 71865 }, { "epoch": 0.8764634146341463, "grad_norm": 0.7400259375572205, "learning_rate": 1.4156910569105693e-05, "loss": 0.0392, "step": 71870 }, { "epoch": 0.8765243902439024, "grad_norm": 0.6343756318092346, "learning_rate": 1.4156504065040653e-05, "loss": 0.0618, "step": 71875 }, { "epoch": 0.8765853658536585, "grad_norm": 0.4140315353870392, "learning_rate": 1.415609756097561e-05, "loss": 0.0439, "step": 71880 }, { "epoch": 0.8766463414634146, "grad_norm": 0.7722370624542236, "learning_rate": 1.415569105691057e-05, "loss": 0.0537, "step": 71885 }, { "epoch": 0.8767073170731707, "grad_norm": 0.5234901309013367, "learning_rate": 1.4155284552845529e-05, "loss": 0.0489, "step": 71890 }, { "epoch": 0.8767682926829268, "grad_norm": 0.49995777010917664, "learning_rate": 1.4154878048780489e-05, "loss": 0.0318, "step": 71895 }, { "epoch": 0.8768292682926829, "grad_norm": 0.711386501789093, "learning_rate": 1.4154471544715449e-05, "loss": 0.05, "step": 71900 }, { "epoch": 0.876890243902439, "grad_norm": 0.5627413988113403, "learning_rate": 1.4154065040650409e-05, "loss": 0.0503, "step": 71905 }, { "epoch": 0.8769512195121951, "grad_norm": 0.40238866209983826, "learning_rate": 1.4153658536585367e-05, "loss": 0.0332, "step": 71910 }, { "epoch": 0.8770121951219512, "grad_norm": 0.5388128757476807, "learning_rate": 1.4153252032520327e-05, "loss": 0.0344, "step": 71915 }, { "epoch": 0.8770731707317073, "grad_norm": 0.3550748825073242, "learning_rate": 1.4152845528455286e-05, "loss": 0.0312, "step": 71920 }, { "epoch": 0.8771341463414634, "grad_norm": 0.5460125803947449, "learning_rate": 1.4152439024390246e-05, "loss": 0.0687, "step": 71925 }, { "epoch": 0.8771951219512195, "grad_norm": 1.2542510032653809, "learning_rate": 1.4152032520325204e-05, "loss": 0.0487, "step": 71930 }, { "epoch": 0.8772560975609756, "grad_norm": 0.5584765672683716, "learning_rate": 1.4151626016260164e-05, "loss": 0.0359, "step": 71935 }, { "epoch": 0.8773170731707317, "grad_norm": 0.306846022605896, "learning_rate": 1.4151219512195122e-05, "loss": 0.0493, "step": 71940 }, { "epoch": 0.8773780487804878, "grad_norm": 0.5858107805252075, "learning_rate": 1.4150813008130082e-05, "loss": 0.0408, "step": 71945 }, { "epoch": 0.8774390243902439, "grad_norm": 0.5284878015518188, "learning_rate": 1.4150406504065042e-05, "loss": 0.0378, "step": 71950 }, { "epoch": 0.8775, "grad_norm": 0.4404347836971283, "learning_rate": 1.4150000000000002e-05, "loss": 0.0392, "step": 71955 }, { "epoch": 0.8775609756097561, "grad_norm": 0.686059296131134, "learning_rate": 1.4149593495934962e-05, "loss": 0.0501, "step": 71960 }, { "epoch": 0.8776219512195121, "grad_norm": 0.28864341974258423, "learning_rate": 1.4149186991869921e-05, "loss": 0.0287, "step": 71965 }, { "epoch": 0.8776829268292683, "grad_norm": 0.501521110534668, "learning_rate": 1.4148780487804878e-05, "loss": 0.0415, "step": 71970 }, { "epoch": 0.8777439024390243, "grad_norm": 0.4077812731266022, "learning_rate": 1.4148373983739838e-05, "loss": 0.0567, "step": 71975 }, { "epoch": 0.8778048780487805, "grad_norm": 0.6339690089225769, "learning_rate": 1.4147967479674797e-05, "loss": 0.0387, "step": 71980 }, { "epoch": 0.8778658536585365, "grad_norm": 0.6786708235740662, "learning_rate": 1.4147560975609757e-05, "loss": 0.0794, "step": 71985 }, { "epoch": 0.8779268292682927, "grad_norm": 0.5005685091018677, "learning_rate": 1.4147154471544717e-05, "loss": 0.0326, "step": 71990 }, { "epoch": 0.8779878048780487, "grad_norm": 0.30415740609169006, "learning_rate": 1.4146747967479677e-05, "loss": 0.0234, "step": 71995 }, { "epoch": 0.8780487804878049, "grad_norm": 0.37127017974853516, "learning_rate": 1.4146341463414635e-05, "loss": 0.0237, "step": 72000 }, { "epoch": 0.8781097560975609, "grad_norm": 0.4493976831436157, "learning_rate": 1.4145934959349595e-05, "loss": 0.0374, "step": 72005 }, { "epoch": 0.8781707317073171, "grad_norm": 0.33958715200424194, "learning_rate": 1.4145528455284555e-05, "loss": 0.0547, "step": 72010 }, { "epoch": 0.8782317073170731, "grad_norm": 0.26224374771118164, "learning_rate": 1.4145121951219515e-05, "loss": 0.0419, "step": 72015 }, { "epoch": 0.8782926829268293, "grad_norm": 1.0056251287460327, "learning_rate": 1.4144715447154473e-05, "loss": 0.0498, "step": 72020 }, { "epoch": 0.8783536585365853, "grad_norm": 0.2776903212070465, "learning_rate": 1.4144308943089432e-05, "loss": 0.0577, "step": 72025 }, { "epoch": 0.8784146341463415, "grad_norm": 0.13973098993301392, "learning_rate": 1.414390243902439e-05, "loss": 0.0555, "step": 72030 }, { "epoch": 0.8784756097560975, "grad_norm": 1.3569695949554443, "learning_rate": 1.414349593495935e-05, "loss": 0.0583, "step": 72035 }, { "epoch": 0.8785365853658537, "grad_norm": 0.654111385345459, "learning_rate": 1.414308943089431e-05, "loss": 0.0276, "step": 72040 }, { "epoch": 0.8785975609756097, "grad_norm": 0.7167918682098389, "learning_rate": 1.414268292682927e-05, "loss": 0.0576, "step": 72045 }, { "epoch": 0.8786585365853659, "grad_norm": 0.7485944032669067, "learning_rate": 1.414227642276423e-05, "loss": 0.027, "step": 72050 }, { "epoch": 0.8787195121951219, "grad_norm": 1.3486008644104004, "learning_rate": 1.414186991869919e-05, "loss": 0.0476, "step": 72055 }, { "epoch": 0.878780487804878, "grad_norm": 0.4118322432041168, "learning_rate": 1.4141463414634146e-05, "loss": 0.0575, "step": 72060 }, { "epoch": 0.8788414634146341, "grad_norm": 0.45339640974998474, "learning_rate": 1.4141056910569106e-05, "loss": 0.0631, "step": 72065 }, { "epoch": 0.8789024390243902, "grad_norm": 0.43723228573799133, "learning_rate": 1.4140650406504066e-05, "loss": 0.0429, "step": 72070 }, { "epoch": 0.8789634146341463, "grad_norm": 0.16354764997959137, "learning_rate": 1.4140243902439026e-05, "loss": 0.1366, "step": 72075 }, { "epoch": 0.8790243902439024, "grad_norm": 0.5410050749778748, "learning_rate": 1.4139837398373985e-05, "loss": 0.035, "step": 72080 }, { "epoch": 0.8790853658536585, "grad_norm": 0.29127979278564453, "learning_rate": 1.4139430894308945e-05, "loss": 0.0413, "step": 72085 }, { "epoch": 0.8791463414634146, "grad_norm": 0.28282788395881653, "learning_rate": 1.4139024390243903e-05, "loss": 0.0273, "step": 72090 }, { "epoch": 0.8792073170731707, "grad_norm": 0.533086895942688, "learning_rate": 1.4138617886178863e-05, "loss": 0.0487, "step": 72095 }, { "epoch": 0.8792682926829268, "grad_norm": 0.42789286375045776, "learning_rate": 1.4138211382113823e-05, "loss": 0.0487, "step": 72100 }, { "epoch": 0.8793292682926829, "grad_norm": 0.5056027770042419, "learning_rate": 1.4137804878048781e-05, "loss": 0.0673, "step": 72105 }, { "epoch": 0.879390243902439, "grad_norm": 1.5614700317382812, "learning_rate": 1.4137398373983741e-05, "loss": 0.0441, "step": 72110 }, { "epoch": 0.8794512195121951, "grad_norm": 1.388767957687378, "learning_rate": 1.41369918699187e-05, "loss": 0.052, "step": 72115 }, { "epoch": 0.8795121951219512, "grad_norm": 0.28723078966140747, "learning_rate": 1.4136585365853659e-05, "loss": 0.02, "step": 72120 }, { "epoch": 0.8795731707317073, "grad_norm": 0.47207051515579224, "learning_rate": 1.4136178861788619e-05, "loss": 0.0579, "step": 72125 }, { "epoch": 0.8796341463414634, "grad_norm": 0.7009961009025574, "learning_rate": 1.4135772357723579e-05, "loss": 0.021, "step": 72130 }, { "epoch": 0.8796951219512195, "grad_norm": 0.8665297627449036, "learning_rate": 1.4135365853658538e-05, "loss": 0.0525, "step": 72135 }, { "epoch": 0.8797560975609756, "grad_norm": 0.6912084817886353, "learning_rate": 1.4134959349593498e-05, "loss": 0.0429, "step": 72140 }, { "epoch": 0.8798170731707317, "grad_norm": 0.4199455678462982, "learning_rate": 1.4134552845528458e-05, "loss": 0.0498, "step": 72145 }, { "epoch": 0.8798780487804878, "grad_norm": 0.4640718102455139, "learning_rate": 1.4134146341463414e-05, "loss": 0.053, "step": 72150 }, { "epoch": 0.8799390243902439, "grad_norm": 0.21063435077667236, "learning_rate": 1.4133739837398374e-05, "loss": 0.0198, "step": 72155 }, { "epoch": 0.88, "grad_norm": 0.4028433561325073, "learning_rate": 1.4133333333333334e-05, "loss": 0.021, "step": 72160 }, { "epoch": 0.880060975609756, "grad_norm": 0.7540515661239624, "learning_rate": 1.4132926829268294e-05, "loss": 0.0641, "step": 72165 }, { "epoch": 0.8801219512195122, "grad_norm": 0.6717495322227478, "learning_rate": 1.4132520325203254e-05, "loss": 0.0753, "step": 72170 }, { "epoch": 0.8801829268292682, "grad_norm": 0.45365065336227417, "learning_rate": 1.4132113821138214e-05, "loss": 0.058, "step": 72175 }, { "epoch": 0.8802439024390244, "grad_norm": 0.37941470742225647, "learning_rate": 1.4131707317073172e-05, "loss": 0.044, "step": 72180 }, { "epoch": 0.8803048780487804, "grad_norm": 0.40566450357437134, "learning_rate": 1.4131300813008132e-05, "loss": 0.0311, "step": 72185 }, { "epoch": 0.8803658536585366, "grad_norm": 0.597789466381073, "learning_rate": 1.4130894308943091e-05, "loss": 0.041, "step": 72190 }, { "epoch": 0.8804268292682926, "grad_norm": 0.5069324970245361, "learning_rate": 1.413048780487805e-05, "loss": 0.0784, "step": 72195 }, { "epoch": 0.8804878048780488, "grad_norm": 0.3637939989566803, "learning_rate": 1.413008130081301e-05, "loss": 0.0429, "step": 72200 }, { "epoch": 0.8805487804878048, "grad_norm": 0.9683631062507629, "learning_rate": 1.4129674796747969e-05, "loss": 0.0513, "step": 72205 }, { "epoch": 0.880609756097561, "grad_norm": 1.1530290842056274, "learning_rate": 1.4129268292682927e-05, "loss": 0.0925, "step": 72210 }, { "epoch": 0.880670731707317, "grad_norm": 0.5336560606956482, "learning_rate": 1.4128861788617887e-05, "loss": 0.0593, "step": 72215 }, { "epoch": 0.8807317073170732, "grad_norm": 0.7154411673545837, "learning_rate": 1.4128455284552847e-05, "loss": 0.0454, "step": 72220 }, { "epoch": 0.8807926829268292, "grad_norm": 0.5978644490242004, "learning_rate": 1.4128048780487807e-05, "loss": 0.0451, "step": 72225 }, { "epoch": 0.8808536585365854, "grad_norm": 0.566597580909729, "learning_rate": 1.4127642276422767e-05, "loss": 0.023, "step": 72230 }, { "epoch": 0.8809146341463414, "grad_norm": 0.38245296478271484, "learning_rate": 1.4127235772357726e-05, "loss": 0.0236, "step": 72235 }, { "epoch": 0.8809756097560976, "grad_norm": 0.6551957130432129, "learning_rate": 1.4126829268292683e-05, "loss": 0.0365, "step": 72240 }, { "epoch": 0.8810365853658536, "grad_norm": 1.2151039838790894, "learning_rate": 1.4126422764227643e-05, "loss": 0.0447, "step": 72245 }, { "epoch": 0.8810975609756098, "grad_norm": 0.7582557201385498, "learning_rate": 1.4126016260162602e-05, "loss": 0.0799, "step": 72250 }, { "epoch": 0.8811585365853658, "grad_norm": 0.6339250802993774, "learning_rate": 1.4125609756097562e-05, "loss": 0.0204, "step": 72255 }, { "epoch": 0.881219512195122, "grad_norm": 0.6944722533226013, "learning_rate": 1.4125203252032522e-05, "loss": 0.0384, "step": 72260 }, { "epoch": 0.881280487804878, "grad_norm": 0.9677171111106873, "learning_rate": 1.4124796747967482e-05, "loss": 0.0364, "step": 72265 }, { "epoch": 0.8813414634146342, "grad_norm": 0.23880930244922638, "learning_rate": 1.412439024390244e-05, "loss": 0.0212, "step": 72270 }, { "epoch": 0.8814024390243902, "grad_norm": 0.3186109960079193, "learning_rate": 1.41239837398374e-05, "loss": 0.0586, "step": 72275 }, { "epoch": 0.8814634146341463, "grad_norm": 0.35574108362197876, "learning_rate": 1.412357723577236e-05, "loss": 0.0518, "step": 72280 }, { "epoch": 0.8815243902439024, "grad_norm": 0.5974717140197754, "learning_rate": 1.4123170731707318e-05, "loss": 0.0563, "step": 72285 }, { "epoch": 0.8815853658536585, "grad_norm": 0.325002521276474, "learning_rate": 1.4122764227642278e-05, "loss": 0.0336, "step": 72290 }, { "epoch": 0.8816463414634146, "grad_norm": 0.6706408262252808, "learning_rate": 1.4122357723577237e-05, "loss": 0.0611, "step": 72295 }, { "epoch": 0.8817073170731707, "grad_norm": 0.3853383958339691, "learning_rate": 1.4121951219512196e-05, "loss": 0.0548, "step": 72300 }, { "epoch": 0.8817682926829268, "grad_norm": 0.3428049683570862, "learning_rate": 1.4121544715447155e-05, "loss": 0.0513, "step": 72305 }, { "epoch": 0.8818292682926829, "grad_norm": 0.43518567085266113, "learning_rate": 1.4121138211382115e-05, "loss": 0.0453, "step": 72310 }, { "epoch": 0.881890243902439, "grad_norm": 0.5396626591682434, "learning_rate": 1.4120731707317075e-05, "loss": 0.0577, "step": 72315 }, { "epoch": 0.8819512195121951, "grad_norm": 0.6089015007019043, "learning_rate": 1.4120325203252035e-05, "loss": 0.0323, "step": 72320 }, { "epoch": 0.8820121951219512, "grad_norm": 0.778779149055481, "learning_rate": 1.4119918699186995e-05, "loss": 0.0372, "step": 72325 }, { "epoch": 0.8820731707317073, "grad_norm": 0.46645963191986084, "learning_rate": 1.4119512195121951e-05, "loss": 0.0344, "step": 72330 }, { "epoch": 0.8821341463414634, "grad_norm": 0.5140206217765808, "learning_rate": 1.4119105691056911e-05, "loss": 0.055, "step": 72335 }, { "epoch": 0.8821951219512195, "grad_norm": 0.6581587791442871, "learning_rate": 1.411869918699187e-05, "loss": 0.055, "step": 72340 }, { "epoch": 0.8822560975609756, "grad_norm": 0.3595852553844452, "learning_rate": 1.411829268292683e-05, "loss": 0.0285, "step": 72345 }, { "epoch": 0.8823170731707317, "grad_norm": 0.23313969373703003, "learning_rate": 1.411788617886179e-05, "loss": 0.0563, "step": 72350 }, { "epoch": 0.8823780487804878, "grad_norm": 0.7327799201011658, "learning_rate": 1.411747967479675e-05, "loss": 0.0491, "step": 72355 }, { "epoch": 0.8824390243902439, "grad_norm": 0.5751803517341614, "learning_rate": 1.4117073170731708e-05, "loss": 0.0582, "step": 72360 }, { "epoch": 0.8825, "grad_norm": 0.5065327882766724, "learning_rate": 1.4116666666666668e-05, "loss": 0.0541, "step": 72365 }, { "epoch": 0.8825609756097561, "grad_norm": 0.7735816836357117, "learning_rate": 1.4116260162601626e-05, "loss": 0.0412, "step": 72370 }, { "epoch": 0.8826219512195121, "grad_norm": 0.6818752884864807, "learning_rate": 1.4115853658536586e-05, "loss": 0.0339, "step": 72375 }, { "epoch": 0.8826829268292683, "grad_norm": 0.6045874357223511, "learning_rate": 1.4115447154471546e-05, "loss": 0.0621, "step": 72380 }, { "epoch": 0.8827439024390243, "grad_norm": 0.38175609707832336, "learning_rate": 1.4115040650406506e-05, "loss": 0.0246, "step": 72385 }, { "epoch": 0.8828048780487805, "grad_norm": 0.3813464343547821, "learning_rate": 1.4114634146341464e-05, "loss": 0.0314, "step": 72390 }, { "epoch": 0.8828658536585365, "grad_norm": 0.40636271238327026, "learning_rate": 1.4114227642276424e-05, "loss": 0.0389, "step": 72395 }, { "epoch": 0.8829268292682927, "grad_norm": 0.5212200880050659, "learning_rate": 1.4113821138211384e-05, "loss": 0.0428, "step": 72400 }, { "epoch": 0.8829878048780487, "grad_norm": 1.401283621788025, "learning_rate": 1.4113414634146343e-05, "loss": 0.0306, "step": 72405 }, { "epoch": 0.8830487804878049, "grad_norm": 0.622747004032135, "learning_rate": 1.4113008130081303e-05, "loss": 0.0229, "step": 72410 }, { "epoch": 0.8831097560975609, "grad_norm": 0.525530219078064, "learning_rate": 1.4112601626016263e-05, "loss": 0.0356, "step": 72415 }, { "epoch": 0.8831707317073171, "grad_norm": 0.09513784199953079, "learning_rate": 1.411219512195122e-05, "loss": 0.0502, "step": 72420 }, { "epoch": 0.8832317073170731, "grad_norm": 0.5093193650245667, "learning_rate": 1.411178861788618e-05, "loss": 0.0505, "step": 72425 }, { "epoch": 0.8832926829268293, "grad_norm": 1.0248723030090332, "learning_rate": 1.4111382113821139e-05, "loss": 0.0657, "step": 72430 }, { "epoch": 0.8833536585365853, "grad_norm": 0.5187270641326904, "learning_rate": 1.4110975609756099e-05, "loss": 0.0277, "step": 72435 }, { "epoch": 0.8834146341463415, "grad_norm": 0.3615376055240631, "learning_rate": 1.4110569105691059e-05, "loss": 0.0226, "step": 72440 }, { "epoch": 0.8834756097560975, "grad_norm": 0.6499277353286743, "learning_rate": 1.4110162601626019e-05, "loss": 0.0408, "step": 72445 }, { "epoch": 0.8835365853658537, "grad_norm": 0.7790379524230957, "learning_rate": 1.4109756097560977e-05, "loss": 0.0663, "step": 72450 }, { "epoch": 0.8835975609756097, "grad_norm": 0.523985743522644, "learning_rate": 1.4109349593495936e-05, "loss": 0.0287, "step": 72455 }, { "epoch": 0.8836585365853659, "grad_norm": 0.9870331883430481, "learning_rate": 1.4108943089430895e-05, "loss": 0.0482, "step": 72460 }, { "epoch": 0.8837195121951219, "grad_norm": 0.6230271458625793, "learning_rate": 1.4108536585365854e-05, "loss": 0.0679, "step": 72465 }, { "epoch": 0.8837804878048781, "grad_norm": 0.4355553686618805, "learning_rate": 1.4108130081300814e-05, "loss": 0.0406, "step": 72470 }, { "epoch": 0.8838414634146341, "grad_norm": 0.32798656821250916, "learning_rate": 1.4107723577235774e-05, "loss": 0.0498, "step": 72475 }, { "epoch": 0.8839024390243903, "grad_norm": 1.8312742710113525, "learning_rate": 1.4107317073170732e-05, "loss": 0.0313, "step": 72480 }, { "epoch": 0.8839634146341463, "grad_norm": 0.28688305616378784, "learning_rate": 1.4106910569105692e-05, "loss": 0.0401, "step": 72485 }, { "epoch": 0.8840243902439024, "grad_norm": 0.45983943343162537, "learning_rate": 1.4106504065040652e-05, "loss": 0.0397, "step": 72490 }, { "epoch": 0.8840853658536585, "grad_norm": 0.6212489008903503, "learning_rate": 1.4106097560975612e-05, "loss": 0.071, "step": 72495 }, { "epoch": 0.8841463414634146, "grad_norm": 0.6101621389389038, "learning_rate": 1.4105691056910571e-05, "loss": 0.025, "step": 72500 }, { "epoch": 0.8842073170731707, "grad_norm": 1.012520432472229, "learning_rate": 1.4105284552845531e-05, "loss": 0.0461, "step": 72505 }, { "epoch": 0.8842682926829268, "grad_norm": 0.4843772053718567, "learning_rate": 1.4104878048780488e-05, "loss": 0.0401, "step": 72510 }, { "epoch": 0.8843292682926829, "grad_norm": 1.1966419219970703, "learning_rate": 1.4104471544715448e-05, "loss": 0.0572, "step": 72515 }, { "epoch": 0.884390243902439, "grad_norm": 0.7266497015953064, "learning_rate": 1.4104065040650407e-05, "loss": 0.0359, "step": 72520 }, { "epoch": 0.8844512195121951, "grad_norm": 0.571025013923645, "learning_rate": 1.4103658536585367e-05, "loss": 0.0312, "step": 72525 }, { "epoch": 0.8845121951219512, "grad_norm": 0.701327383518219, "learning_rate": 1.4103252032520327e-05, "loss": 0.0591, "step": 72530 }, { "epoch": 0.8845731707317073, "grad_norm": 0.38968122005462646, "learning_rate": 1.4102845528455287e-05, "loss": 0.0718, "step": 72535 }, { "epoch": 0.8846341463414634, "grad_norm": 0.2321140319108963, "learning_rate": 1.4102439024390245e-05, "loss": 0.1047, "step": 72540 }, { "epoch": 0.8846951219512195, "grad_norm": 0.30122098326683044, "learning_rate": 1.4102032520325205e-05, "loss": 0.0267, "step": 72545 }, { "epoch": 0.8847560975609756, "grad_norm": 0.39274564385414124, "learning_rate": 1.4101626016260163e-05, "loss": 0.0399, "step": 72550 }, { "epoch": 0.8848170731707317, "grad_norm": 0.6012138724327087, "learning_rate": 1.4101219512195123e-05, "loss": 0.0519, "step": 72555 }, { "epoch": 0.8848780487804878, "grad_norm": 0.7128737568855286, "learning_rate": 1.4100813008130083e-05, "loss": 0.0507, "step": 72560 }, { "epoch": 0.8849390243902439, "grad_norm": 0.47112560272216797, "learning_rate": 1.4100406504065042e-05, "loss": 0.039, "step": 72565 }, { "epoch": 0.885, "grad_norm": 0.7850009202957153, "learning_rate": 1.41e-05, "loss": 0.0409, "step": 72570 }, { "epoch": 0.885060975609756, "grad_norm": 0.42852675914764404, "learning_rate": 1.409959349593496e-05, "loss": 0.0268, "step": 72575 }, { "epoch": 0.8851219512195122, "grad_norm": 0.5961447954177856, "learning_rate": 1.409918699186992e-05, "loss": 0.0401, "step": 72580 }, { "epoch": 0.8851829268292682, "grad_norm": 0.23669102787971497, "learning_rate": 1.409878048780488e-05, "loss": 0.0275, "step": 72585 }, { "epoch": 0.8852439024390244, "grad_norm": 0.31837159395217896, "learning_rate": 1.409837398373984e-05, "loss": 0.0311, "step": 72590 }, { "epoch": 0.8853048780487804, "grad_norm": 0.8601019382476807, "learning_rate": 1.40979674796748e-05, "loss": 0.086, "step": 72595 }, { "epoch": 0.8853658536585366, "grad_norm": 1.0186928510665894, "learning_rate": 1.4097560975609756e-05, "loss": 0.0627, "step": 72600 }, { "epoch": 0.8854268292682926, "grad_norm": 1.5953024625778198, "learning_rate": 1.4097154471544716e-05, "loss": 0.0581, "step": 72605 }, { "epoch": 0.8854878048780488, "grad_norm": 1.0937480926513672, "learning_rate": 1.4096747967479676e-05, "loss": 0.0589, "step": 72610 }, { "epoch": 0.8855487804878048, "grad_norm": 0.6803282499313354, "learning_rate": 1.4096341463414636e-05, "loss": 0.0515, "step": 72615 }, { "epoch": 0.885609756097561, "grad_norm": 1.7943854331970215, "learning_rate": 1.4095934959349595e-05, "loss": 0.0409, "step": 72620 }, { "epoch": 0.885670731707317, "grad_norm": 0.525753378868103, "learning_rate": 1.4095528455284555e-05, "loss": 0.0321, "step": 72625 }, { "epoch": 0.8857317073170732, "grad_norm": 0.749732255935669, "learning_rate": 1.4095121951219513e-05, "loss": 0.0377, "step": 72630 }, { "epoch": 0.8857926829268292, "grad_norm": 0.5876030921936035, "learning_rate": 1.4094715447154471e-05, "loss": 0.0377, "step": 72635 }, { "epoch": 0.8858536585365854, "grad_norm": 1.080741047859192, "learning_rate": 1.4094308943089431e-05, "loss": 0.0353, "step": 72640 }, { "epoch": 0.8859146341463414, "grad_norm": 0.5975728034973145, "learning_rate": 1.4093902439024391e-05, "loss": 0.0458, "step": 72645 }, { "epoch": 0.8859756097560976, "grad_norm": 0.6256012320518494, "learning_rate": 1.4093495934959351e-05, "loss": 0.0481, "step": 72650 }, { "epoch": 0.8860365853658536, "grad_norm": 0.43999165296554565, "learning_rate": 1.409308943089431e-05, "loss": 0.0356, "step": 72655 }, { "epoch": 0.8860975609756098, "grad_norm": 0.5216357111930847, "learning_rate": 1.4092682926829269e-05, "loss": 0.0294, "step": 72660 }, { "epoch": 0.8861585365853658, "grad_norm": 0.37394455075263977, "learning_rate": 1.4092276422764229e-05, "loss": 0.0326, "step": 72665 }, { "epoch": 0.886219512195122, "grad_norm": 0.5465267896652222, "learning_rate": 1.4091869918699188e-05, "loss": 0.0435, "step": 72670 }, { "epoch": 0.886280487804878, "grad_norm": 0.3150981366634369, "learning_rate": 1.4091463414634148e-05, "loss": 0.0414, "step": 72675 }, { "epoch": 0.8863414634146342, "grad_norm": 0.5322985053062439, "learning_rate": 1.4091056910569108e-05, "loss": 0.0365, "step": 72680 }, { "epoch": 0.8864024390243902, "grad_norm": 0.42262810468673706, "learning_rate": 1.4090650406504068e-05, "loss": 0.0478, "step": 72685 }, { "epoch": 0.8864634146341464, "grad_norm": 0.6643384099006653, "learning_rate": 1.4090243902439024e-05, "loss": 0.0508, "step": 72690 }, { "epoch": 0.8865243902439024, "grad_norm": 0.5752224326133728, "learning_rate": 1.4089837398373984e-05, "loss": 0.0637, "step": 72695 }, { "epoch": 0.8865853658536585, "grad_norm": 0.7423221468925476, "learning_rate": 1.4089430894308944e-05, "loss": 0.0319, "step": 72700 }, { "epoch": 0.8866463414634146, "grad_norm": 0.6061269044876099, "learning_rate": 1.4089024390243904e-05, "loss": 0.0514, "step": 72705 }, { "epoch": 0.8867073170731707, "grad_norm": 1.2603189945220947, "learning_rate": 1.4088617886178864e-05, "loss": 0.0472, "step": 72710 }, { "epoch": 0.8867682926829268, "grad_norm": 0.6086381673812866, "learning_rate": 1.4088211382113823e-05, "loss": 0.0424, "step": 72715 }, { "epoch": 0.8868292682926829, "grad_norm": 0.42813464999198914, "learning_rate": 1.4087804878048782e-05, "loss": 0.0457, "step": 72720 }, { "epoch": 0.886890243902439, "grad_norm": 0.8115578293800354, "learning_rate": 1.408739837398374e-05, "loss": 0.0407, "step": 72725 }, { "epoch": 0.8869512195121951, "grad_norm": 0.4409499764442444, "learning_rate": 1.40869918699187e-05, "loss": 0.0518, "step": 72730 }, { "epoch": 0.8870121951219512, "grad_norm": 0.6957923173904419, "learning_rate": 1.408658536585366e-05, "loss": 0.0504, "step": 72735 }, { "epoch": 0.8870731707317073, "grad_norm": 0.6165318489074707, "learning_rate": 1.408617886178862e-05, "loss": 0.0451, "step": 72740 }, { "epoch": 0.8871341463414634, "grad_norm": 0.7715339660644531, "learning_rate": 1.4085772357723579e-05, "loss": 0.0383, "step": 72745 }, { "epoch": 0.8871951219512195, "grad_norm": 0.5062179565429688, "learning_rate": 1.4085365853658537e-05, "loss": 0.0486, "step": 72750 }, { "epoch": 0.8872560975609756, "grad_norm": 2.7391281127929688, "learning_rate": 1.4084959349593497e-05, "loss": 0.0752, "step": 72755 }, { "epoch": 0.8873170731707317, "grad_norm": 0.4426858723163605, "learning_rate": 1.4084552845528457e-05, "loss": 0.0429, "step": 72760 }, { "epoch": 0.8873780487804878, "grad_norm": 0.31952130794525146, "learning_rate": 1.4084146341463417e-05, "loss": 0.0413, "step": 72765 }, { "epoch": 0.8874390243902439, "grad_norm": 0.5149855017662048, "learning_rate": 1.4083739837398376e-05, "loss": 0.0447, "step": 72770 }, { "epoch": 0.8875, "grad_norm": 1.1054604053497314, "learning_rate": 1.4083333333333336e-05, "loss": 0.0511, "step": 72775 }, { "epoch": 0.8875609756097561, "grad_norm": 0.4419623017311096, "learning_rate": 1.4082926829268293e-05, "loss": 0.044, "step": 72780 }, { "epoch": 0.8876219512195122, "grad_norm": 0.5903491377830505, "learning_rate": 1.4082520325203253e-05, "loss": 0.0572, "step": 72785 }, { "epoch": 0.8876829268292683, "grad_norm": 0.7025286555290222, "learning_rate": 1.4082113821138212e-05, "loss": 0.0457, "step": 72790 }, { "epoch": 0.8877439024390243, "grad_norm": 0.3480384349822998, "learning_rate": 1.4081707317073172e-05, "loss": 0.0431, "step": 72795 }, { "epoch": 0.8878048780487805, "grad_norm": 0.6313233375549316, "learning_rate": 1.4081300813008132e-05, "loss": 0.0298, "step": 72800 }, { "epoch": 0.8878658536585365, "grad_norm": 0.867081344127655, "learning_rate": 1.4080894308943092e-05, "loss": 0.0334, "step": 72805 }, { "epoch": 0.8879268292682927, "grad_norm": 1.2090511322021484, "learning_rate": 1.408048780487805e-05, "loss": 0.0612, "step": 72810 }, { "epoch": 0.8879878048780487, "grad_norm": 0.16738799214363098, "learning_rate": 1.4080081300813008e-05, "loss": 0.0302, "step": 72815 }, { "epoch": 0.8880487804878049, "grad_norm": 0.45663243532180786, "learning_rate": 1.4079674796747968e-05, "loss": 0.0323, "step": 72820 }, { "epoch": 0.8881097560975609, "grad_norm": 0.49084246158599854, "learning_rate": 1.4079268292682928e-05, "loss": 0.0186, "step": 72825 }, { "epoch": 0.8881707317073171, "grad_norm": 0.49955055117607117, "learning_rate": 1.4078861788617888e-05, "loss": 0.0371, "step": 72830 }, { "epoch": 0.8882317073170731, "grad_norm": 0.5347467064857483, "learning_rate": 1.4078455284552847e-05, "loss": 0.0244, "step": 72835 }, { "epoch": 0.8882926829268293, "grad_norm": 0.30966904759407043, "learning_rate": 1.4078048780487805e-05, "loss": 0.0617, "step": 72840 }, { "epoch": 0.8883536585365853, "grad_norm": 0.6799771785736084, "learning_rate": 1.4077642276422765e-05, "loss": 0.0491, "step": 72845 }, { "epoch": 0.8884146341463415, "grad_norm": 0.2459830492734909, "learning_rate": 1.4077235772357725e-05, "loss": 0.0364, "step": 72850 }, { "epoch": 0.8884756097560975, "grad_norm": 0.9558047652244568, "learning_rate": 1.4076829268292685e-05, "loss": 0.0781, "step": 72855 }, { "epoch": 0.8885365853658537, "grad_norm": 0.48452281951904297, "learning_rate": 1.4076422764227645e-05, "loss": 0.0598, "step": 72860 }, { "epoch": 0.8885975609756097, "grad_norm": 0.8876370787620544, "learning_rate": 1.4076016260162605e-05, "loss": 0.0517, "step": 72865 }, { "epoch": 0.8886585365853659, "grad_norm": 0.4647754430770874, "learning_rate": 1.4075609756097561e-05, "loss": 0.0577, "step": 72870 }, { "epoch": 0.8887195121951219, "grad_norm": 0.4199875295162201, "learning_rate": 1.4075203252032521e-05, "loss": 0.0389, "step": 72875 }, { "epoch": 0.8887804878048781, "grad_norm": 0.7431939840316772, "learning_rate": 1.407479674796748e-05, "loss": 0.1047, "step": 72880 }, { "epoch": 0.8888414634146341, "grad_norm": 0.37986478209495544, "learning_rate": 1.407439024390244e-05, "loss": 0.0424, "step": 72885 }, { "epoch": 0.8889024390243903, "grad_norm": 0.6850677132606506, "learning_rate": 1.40739837398374e-05, "loss": 0.0601, "step": 72890 }, { "epoch": 0.8889634146341463, "grad_norm": 1.3731592893600464, "learning_rate": 1.407357723577236e-05, "loss": 0.0719, "step": 72895 }, { "epoch": 0.8890243902439025, "grad_norm": 0.32218119502067566, "learning_rate": 1.4073170731707317e-05, "loss": 0.0413, "step": 72900 }, { "epoch": 0.8890853658536585, "grad_norm": 0.6437419652938843, "learning_rate": 1.4072764227642276e-05, "loss": 0.0413, "step": 72905 }, { "epoch": 0.8891463414634146, "grad_norm": 0.21297237277030945, "learning_rate": 1.4072357723577236e-05, "loss": 0.0287, "step": 72910 }, { "epoch": 0.8892073170731707, "grad_norm": 0.43808260560035706, "learning_rate": 1.4071951219512196e-05, "loss": 0.0384, "step": 72915 }, { "epoch": 0.8892682926829268, "grad_norm": 0.5844728350639343, "learning_rate": 1.4071544715447156e-05, "loss": 0.0365, "step": 72920 }, { "epoch": 0.8893292682926829, "grad_norm": 0.5264991521835327, "learning_rate": 1.4071138211382116e-05, "loss": 0.0326, "step": 72925 }, { "epoch": 0.889390243902439, "grad_norm": 0.49608007073402405, "learning_rate": 1.4070731707317074e-05, "loss": 0.0405, "step": 72930 }, { "epoch": 0.8894512195121951, "grad_norm": 0.9407493472099304, "learning_rate": 1.4070325203252034e-05, "loss": 0.0401, "step": 72935 }, { "epoch": 0.8895121951219512, "grad_norm": 0.19729788601398468, "learning_rate": 1.4069918699186993e-05, "loss": 0.0341, "step": 72940 }, { "epoch": 0.8895731707317073, "grad_norm": 0.6469299793243408, "learning_rate": 1.4069512195121953e-05, "loss": 0.0572, "step": 72945 }, { "epoch": 0.8896341463414634, "grad_norm": 0.3457988202571869, "learning_rate": 1.4069105691056913e-05, "loss": 0.0425, "step": 72950 }, { "epoch": 0.8896951219512195, "grad_norm": 0.5459713935852051, "learning_rate": 1.4068699186991873e-05, "loss": 0.0334, "step": 72955 }, { "epoch": 0.8897560975609756, "grad_norm": 0.6293070912361145, "learning_rate": 1.406829268292683e-05, "loss": 0.0379, "step": 72960 }, { "epoch": 0.8898170731707317, "grad_norm": 0.8293498158454895, "learning_rate": 1.406788617886179e-05, "loss": 0.0731, "step": 72965 }, { "epoch": 0.8898780487804878, "grad_norm": 0.6051873564720154, "learning_rate": 1.4067479674796749e-05, "loss": 0.0432, "step": 72970 }, { "epoch": 0.8899390243902439, "grad_norm": 0.6318880915641785, "learning_rate": 1.4067073170731709e-05, "loss": 0.0294, "step": 72975 }, { "epoch": 0.89, "grad_norm": 0.3816411793231964, "learning_rate": 1.4066666666666669e-05, "loss": 0.0408, "step": 72980 }, { "epoch": 0.890060975609756, "grad_norm": 22.978965759277344, "learning_rate": 1.4066260162601628e-05, "loss": 0.0386, "step": 72985 }, { "epoch": 0.8901219512195122, "grad_norm": 0.7214639782905579, "learning_rate": 1.4065853658536585e-05, "loss": 0.0664, "step": 72990 }, { "epoch": 0.8901829268292683, "grad_norm": 0.6974252462387085, "learning_rate": 1.4065447154471545e-05, "loss": 0.0257, "step": 72995 }, { "epoch": 0.8902439024390244, "grad_norm": 0.42962008714675903, "learning_rate": 1.4065040650406505e-05, "loss": 0.0802, "step": 73000 }, { "epoch": 0.8903048780487804, "grad_norm": 0.44783902168273926, "learning_rate": 1.4064634146341464e-05, "loss": 0.0514, "step": 73005 }, { "epoch": 0.8903658536585366, "grad_norm": 0.3989277780056, "learning_rate": 1.4064227642276424e-05, "loss": 0.0511, "step": 73010 }, { "epoch": 0.8904268292682926, "grad_norm": 1.0088931322097778, "learning_rate": 1.4063821138211384e-05, "loss": 0.0473, "step": 73015 }, { "epoch": 0.8904878048780488, "grad_norm": 0.5360299944877625, "learning_rate": 1.4063414634146342e-05, "loss": 0.0469, "step": 73020 }, { "epoch": 0.8905487804878048, "grad_norm": 0.4578700661659241, "learning_rate": 1.4063008130081302e-05, "loss": 0.0364, "step": 73025 }, { "epoch": 0.890609756097561, "grad_norm": 0.5714344382286072, "learning_rate": 1.4062601626016262e-05, "loss": 0.0602, "step": 73030 }, { "epoch": 0.890670731707317, "grad_norm": 0.4439414441585541, "learning_rate": 1.4062195121951222e-05, "loss": 0.0225, "step": 73035 }, { "epoch": 0.8907317073170732, "grad_norm": 0.2885790467262268, "learning_rate": 1.4061788617886181e-05, "loss": 0.0179, "step": 73040 }, { "epoch": 0.8907926829268292, "grad_norm": 0.6242091059684753, "learning_rate": 1.406138211382114e-05, "loss": 0.0383, "step": 73045 }, { "epoch": 0.8908536585365854, "grad_norm": 0.4378567039966583, "learning_rate": 1.4060975609756098e-05, "loss": 0.0502, "step": 73050 }, { "epoch": 0.8909146341463414, "grad_norm": 0.1294260025024414, "learning_rate": 1.4060569105691057e-05, "loss": 0.0512, "step": 73055 }, { "epoch": 0.8909756097560976, "grad_norm": 0.7334704399108887, "learning_rate": 1.4060162601626017e-05, "loss": 0.0615, "step": 73060 }, { "epoch": 0.8910365853658536, "grad_norm": 0.06539170444011688, "learning_rate": 1.4059756097560977e-05, "loss": 0.0365, "step": 73065 }, { "epoch": 0.8910975609756098, "grad_norm": 1.2118313312530518, "learning_rate": 1.4059349593495937e-05, "loss": 0.0361, "step": 73070 }, { "epoch": 0.8911585365853658, "grad_norm": 0.4912000596523285, "learning_rate": 1.4058943089430897e-05, "loss": 0.0391, "step": 73075 }, { "epoch": 0.891219512195122, "grad_norm": 0.9659811854362488, "learning_rate": 1.4058536585365853e-05, "loss": 0.0598, "step": 73080 }, { "epoch": 0.891280487804878, "grad_norm": 0.5505623817443848, "learning_rate": 1.4058130081300813e-05, "loss": 0.0468, "step": 73085 }, { "epoch": 0.8913414634146342, "grad_norm": 0.22346127033233643, "learning_rate": 1.4057723577235773e-05, "loss": 0.0326, "step": 73090 }, { "epoch": 0.8914024390243902, "grad_norm": 1.5079185962677002, "learning_rate": 1.4057317073170733e-05, "loss": 0.0467, "step": 73095 }, { "epoch": 0.8914634146341464, "grad_norm": 0.9724406599998474, "learning_rate": 1.4056910569105693e-05, "loss": 0.0502, "step": 73100 }, { "epoch": 0.8915243902439024, "grad_norm": 0.3173692226409912, "learning_rate": 1.4056504065040652e-05, "loss": 0.0377, "step": 73105 }, { "epoch": 0.8915853658536586, "grad_norm": 0.7575974464416504, "learning_rate": 1.405609756097561e-05, "loss": 0.044, "step": 73110 }, { "epoch": 0.8916463414634146, "grad_norm": 0.48974940180778503, "learning_rate": 1.405569105691057e-05, "loss": 0.0419, "step": 73115 }, { "epoch": 0.8917073170731707, "grad_norm": 0.7471808791160583, "learning_rate": 1.405528455284553e-05, "loss": 0.0404, "step": 73120 }, { "epoch": 0.8917682926829268, "grad_norm": 0.766118586063385, "learning_rate": 1.405487804878049e-05, "loss": 0.0734, "step": 73125 }, { "epoch": 0.8918292682926829, "grad_norm": 1.1496505737304688, "learning_rate": 1.405447154471545e-05, "loss": 0.0449, "step": 73130 }, { "epoch": 0.891890243902439, "grad_norm": 0.49377351999282837, "learning_rate": 1.4054065040650408e-05, "loss": 0.041, "step": 73135 }, { "epoch": 0.8919512195121951, "grad_norm": 0.3637543022632599, "learning_rate": 1.4053658536585366e-05, "loss": 0.0265, "step": 73140 }, { "epoch": 0.8920121951219512, "grad_norm": 0.4608251452445984, "learning_rate": 1.4053252032520326e-05, "loss": 0.0308, "step": 73145 }, { "epoch": 0.8920731707317073, "grad_norm": 0.6495087146759033, "learning_rate": 1.4052845528455286e-05, "loss": 0.0345, "step": 73150 }, { "epoch": 0.8921341463414634, "grad_norm": 0.5793495774269104, "learning_rate": 1.4052439024390245e-05, "loss": 0.0213, "step": 73155 }, { "epoch": 0.8921951219512195, "grad_norm": 0.3597155511379242, "learning_rate": 1.4052032520325205e-05, "loss": 0.0297, "step": 73160 }, { "epoch": 0.8922560975609756, "grad_norm": 2.8790841102600098, "learning_rate": 1.4051626016260165e-05, "loss": 0.0872, "step": 73165 }, { "epoch": 0.8923170731707317, "grad_norm": 0.4921470284461975, "learning_rate": 1.4051219512195122e-05, "loss": 0.0342, "step": 73170 }, { "epoch": 0.8923780487804878, "grad_norm": 0.3438687026500702, "learning_rate": 1.4050813008130081e-05, "loss": 0.0445, "step": 73175 }, { "epoch": 0.8924390243902439, "grad_norm": 0.6579373478889465, "learning_rate": 1.4050406504065041e-05, "loss": 0.0581, "step": 73180 }, { "epoch": 0.8925, "grad_norm": 0.6483650207519531, "learning_rate": 1.4050000000000001e-05, "loss": 0.0707, "step": 73185 }, { "epoch": 0.8925609756097561, "grad_norm": 1.1987783908843994, "learning_rate": 1.404959349593496e-05, "loss": 0.0549, "step": 73190 }, { "epoch": 0.8926219512195122, "grad_norm": 0.5645003318786621, "learning_rate": 1.404918699186992e-05, "loss": 0.0372, "step": 73195 }, { "epoch": 0.8926829268292683, "grad_norm": 0.523052453994751, "learning_rate": 1.4048780487804879e-05, "loss": 0.0414, "step": 73200 }, { "epoch": 0.8927439024390244, "grad_norm": 0.6824104189872742, "learning_rate": 1.4048373983739839e-05, "loss": 0.0401, "step": 73205 }, { "epoch": 0.8928048780487805, "grad_norm": 0.5046728849411011, "learning_rate": 1.4047967479674798e-05, "loss": 0.0353, "step": 73210 }, { "epoch": 0.8928658536585365, "grad_norm": 0.6542367935180664, "learning_rate": 1.4047560975609758e-05, "loss": 0.0353, "step": 73215 }, { "epoch": 0.8929268292682927, "grad_norm": 0.6385696530342102, "learning_rate": 1.4047154471544718e-05, "loss": 0.0261, "step": 73220 }, { "epoch": 0.8929878048780487, "grad_norm": 0.4732264578342438, "learning_rate": 1.4046747967479676e-05, "loss": 0.0443, "step": 73225 }, { "epoch": 0.8930487804878049, "grad_norm": 0.6734127402305603, "learning_rate": 1.4046341463414634e-05, "loss": 0.0438, "step": 73230 }, { "epoch": 0.8931097560975609, "grad_norm": 0.7237747311592102, "learning_rate": 1.4045934959349594e-05, "loss": 0.0313, "step": 73235 }, { "epoch": 0.8931707317073171, "grad_norm": 0.7434213757514954, "learning_rate": 1.4045528455284554e-05, "loss": 0.0591, "step": 73240 }, { "epoch": 0.8932317073170731, "grad_norm": 0.8833279609680176, "learning_rate": 1.4045121951219514e-05, "loss": 0.0364, "step": 73245 }, { "epoch": 0.8932926829268293, "grad_norm": 0.26849278807640076, "learning_rate": 1.4044715447154474e-05, "loss": 0.0395, "step": 73250 }, { "epoch": 0.8933536585365853, "grad_norm": 0.5219739079475403, "learning_rate": 1.4044308943089433e-05, "loss": 0.038, "step": 73255 }, { "epoch": 0.8934146341463415, "grad_norm": 0.6826415657997131, "learning_rate": 1.404390243902439e-05, "loss": 0.0599, "step": 73260 }, { "epoch": 0.8934756097560975, "grad_norm": 0.41688376665115356, "learning_rate": 1.404349593495935e-05, "loss": 0.0865, "step": 73265 }, { "epoch": 0.8935365853658537, "grad_norm": 0.43002739548683167, "learning_rate": 1.404308943089431e-05, "loss": 0.0321, "step": 73270 }, { "epoch": 0.8935975609756097, "grad_norm": 0.36585500836372375, "learning_rate": 1.404268292682927e-05, "loss": 0.0394, "step": 73275 }, { "epoch": 0.8936585365853659, "grad_norm": 0.5839827060699463, "learning_rate": 1.4042276422764229e-05, "loss": 0.0402, "step": 73280 }, { "epoch": 0.8937195121951219, "grad_norm": 0.560547947883606, "learning_rate": 1.4041869918699189e-05, "loss": 0.0412, "step": 73285 }, { "epoch": 0.8937804878048781, "grad_norm": 1.0853885412216187, "learning_rate": 1.4041463414634147e-05, "loss": 0.0752, "step": 73290 }, { "epoch": 0.8938414634146341, "grad_norm": 0.41616731882095337, "learning_rate": 1.4041056910569107e-05, "loss": 0.045, "step": 73295 }, { "epoch": 0.8939024390243903, "grad_norm": 0.7637137770652771, "learning_rate": 1.4040650406504067e-05, "loss": 0.0196, "step": 73300 }, { "epoch": 0.8939634146341463, "grad_norm": 0.25157228112220764, "learning_rate": 1.4040243902439027e-05, "loss": 0.0637, "step": 73305 }, { "epoch": 0.8940243902439025, "grad_norm": 0.5060285925865173, "learning_rate": 1.4039837398373985e-05, "loss": 0.0604, "step": 73310 }, { "epoch": 0.8940853658536585, "grad_norm": 0.6452106833457947, "learning_rate": 1.4039430894308945e-05, "loss": 0.0533, "step": 73315 }, { "epoch": 0.8941463414634147, "grad_norm": 0.474676251411438, "learning_rate": 1.4039024390243903e-05, "loss": 0.042, "step": 73320 }, { "epoch": 0.8942073170731707, "grad_norm": 1.392723798751831, "learning_rate": 1.4038617886178862e-05, "loss": 0.0654, "step": 73325 }, { "epoch": 0.8942682926829268, "grad_norm": 0.6877263784408569, "learning_rate": 1.4038211382113822e-05, "loss": 0.0384, "step": 73330 }, { "epoch": 0.8943292682926829, "grad_norm": 0.2345750629901886, "learning_rate": 1.4037804878048782e-05, "loss": 0.0279, "step": 73335 }, { "epoch": 0.894390243902439, "grad_norm": 0.8220568895339966, "learning_rate": 1.4037398373983742e-05, "loss": 0.0302, "step": 73340 }, { "epoch": 0.8944512195121951, "grad_norm": 0.46164047718048096, "learning_rate": 1.4036991869918702e-05, "loss": 0.0329, "step": 73345 }, { "epoch": 0.8945121951219512, "grad_norm": 0.810988187789917, "learning_rate": 1.4036585365853658e-05, "loss": 0.0478, "step": 73350 }, { "epoch": 0.8945731707317073, "grad_norm": 0.7826861143112183, "learning_rate": 1.4036178861788618e-05, "loss": 0.0365, "step": 73355 }, { "epoch": 0.8946341463414634, "grad_norm": 0.6146560907363892, "learning_rate": 1.4035772357723578e-05, "loss": 0.0257, "step": 73360 }, { "epoch": 0.8946951219512195, "grad_norm": 0.410959929227829, "learning_rate": 1.4035365853658538e-05, "loss": 0.0464, "step": 73365 }, { "epoch": 0.8947560975609756, "grad_norm": 0.39766445755958557, "learning_rate": 1.4034959349593497e-05, "loss": 0.048, "step": 73370 }, { "epoch": 0.8948170731707317, "grad_norm": 1.0682549476623535, "learning_rate": 1.4034552845528457e-05, "loss": 0.0436, "step": 73375 }, { "epoch": 0.8948780487804878, "grad_norm": 0.5212699174880981, "learning_rate": 1.4034146341463415e-05, "loss": 0.0344, "step": 73380 }, { "epoch": 0.8949390243902439, "grad_norm": 0.431718647480011, "learning_rate": 1.4033739837398375e-05, "loss": 0.0357, "step": 73385 }, { "epoch": 0.895, "grad_norm": 0.3870064914226532, "learning_rate": 1.4033333333333335e-05, "loss": 0.0495, "step": 73390 }, { "epoch": 0.8950609756097561, "grad_norm": 0.6137146353721619, "learning_rate": 1.4032926829268295e-05, "loss": 0.0625, "step": 73395 }, { "epoch": 0.8951219512195122, "grad_norm": 0.4360557198524475, "learning_rate": 1.4032520325203253e-05, "loss": 0.0293, "step": 73400 }, { "epoch": 0.8951829268292683, "grad_norm": 0.8303175568580627, "learning_rate": 1.4032113821138213e-05, "loss": 0.0535, "step": 73405 }, { "epoch": 0.8952439024390244, "grad_norm": 0.6925923228263855, "learning_rate": 1.4031707317073171e-05, "loss": 0.0441, "step": 73410 }, { "epoch": 0.8953048780487805, "grad_norm": 0.31595122814178467, "learning_rate": 1.403130081300813e-05, "loss": 0.0345, "step": 73415 }, { "epoch": 0.8953658536585366, "grad_norm": 0.5824403166770935, "learning_rate": 1.403089430894309e-05, "loss": 0.0345, "step": 73420 }, { "epoch": 0.8954268292682926, "grad_norm": 0.44405096769332886, "learning_rate": 1.403048780487805e-05, "loss": 0.0269, "step": 73425 }, { "epoch": 0.8954878048780488, "grad_norm": 2.079685688018799, "learning_rate": 1.403008130081301e-05, "loss": 0.0591, "step": 73430 }, { "epoch": 0.8955487804878048, "grad_norm": 0.2784847319126129, "learning_rate": 1.402967479674797e-05, "loss": 0.0392, "step": 73435 }, { "epoch": 0.895609756097561, "grad_norm": 0.7283955216407776, "learning_rate": 1.4029268292682927e-05, "loss": 0.0441, "step": 73440 }, { "epoch": 0.895670731707317, "grad_norm": 0.33864864706993103, "learning_rate": 1.4028861788617886e-05, "loss": 0.032, "step": 73445 }, { "epoch": 0.8957317073170732, "grad_norm": 2.333923101425171, "learning_rate": 1.4028455284552846e-05, "loss": 0.0235, "step": 73450 }, { "epoch": 0.8957926829268292, "grad_norm": 0.8711426258087158, "learning_rate": 1.4028048780487806e-05, "loss": 0.0496, "step": 73455 }, { "epoch": 0.8958536585365854, "grad_norm": 0.5710893869400024, "learning_rate": 1.4027642276422766e-05, "loss": 0.0661, "step": 73460 }, { "epoch": 0.8959146341463414, "grad_norm": 0.5228899717330933, "learning_rate": 1.4027235772357726e-05, "loss": 0.033, "step": 73465 }, { "epoch": 0.8959756097560976, "grad_norm": 0.3690122365951538, "learning_rate": 1.4026829268292684e-05, "loss": 0.0339, "step": 73470 }, { "epoch": 0.8960365853658536, "grad_norm": 0.19567328691482544, "learning_rate": 1.4026422764227644e-05, "loss": 0.0464, "step": 73475 }, { "epoch": 0.8960975609756098, "grad_norm": 0.12834487855434418, "learning_rate": 1.4026016260162603e-05, "loss": 0.0223, "step": 73480 }, { "epoch": 0.8961585365853658, "grad_norm": 0.9256407022476196, "learning_rate": 1.4025609756097563e-05, "loss": 0.0463, "step": 73485 }, { "epoch": 0.896219512195122, "grad_norm": 0.3490161895751953, "learning_rate": 1.4025203252032521e-05, "loss": 0.0238, "step": 73490 }, { "epoch": 0.896280487804878, "grad_norm": 0.48327043652534485, "learning_rate": 1.4024796747967481e-05, "loss": 0.0442, "step": 73495 }, { "epoch": 0.8963414634146342, "grad_norm": 0.4305306673049927, "learning_rate": 1.402439024390244e-05, "loss": 0.0326, "step": 73500 }, { "epoch": 0.8964024390243902, "grad_norm": 0.6924322843551636, "learning_rate": 1.4023983739837399e-05, "loss": 0.031, "step": 73505 }, { "epoch": 0.8964634146341464, "grad_norm": 0.5953752398490906, "learning_rate": 1.4023577235772359e-05, "loss": 0.0245, "step": 73510 }, { "epoch": 0.8965243902439024, "grad_norm": 0.5543162822723389, "learning_rate": 1.4023170731707319e-05, "loss": 0.0331, "step": 73515 }, { "epoch": 0.8965853658536586, "grad_norm": 0.6026347875595093, "learning_rate": 1.4022764227642279e-05, "loss": 0.0342, "step": 73520 }, { "epoch": 0.8966463414634146, "grad_norm": 0.3793988823890686, "learning_rate": 1.4022357723577238e-05, "loss": 0.0395, "step": 73525 }, { "epoch": 0.8967073170731708, "grad_norm": 0.665805995464325, "learning_rate": 1.4021951219512195e-05, "loss": 0.036, "step": 73530 }, { "epoch": 0.8967682926829268, "grad_norm": 0.6014606356620789, "learning_rate": 1.4021544715447155e-05, "loss": 0.0791, "step": 73535 }, { "epoch": 0.896829268292683, "grad_norm": 0.4106595516204834, "learning_rate": 1.4021138211382114e-05, "loss": 0.024, "step": 73540 }, { "epoch": 0.896890243902439, "grad_norm": 1.2928149700164795, "learning_rate": 1.4020731707317074e-05, "loss": 0.0461, "step": 73545 }, { "epoch": 0.8969512195121951, "grad_norm": 0.6123837232589722, "learning_rate": 1.4020325203252034e-05, "loss": 0.0319, "step": 73550 }, { "epoch": 0.8970121951219512, "grad_norm": 0.5063599944114685, "learning_rate": 1.4019918699186994e-05, "loss": 0.0338, "step": 73555 }, { "epoch": 0.8970731707317073, "grad_norm": 1.0151634216308594, "learning_rate": 1.4019512195121952e-05, "loss": 0.028, "step": 73560 }, { "epoch": 0.8971341463414634, "grad_norm": 0.24745644629001617, "learning_rate": 1.4019105691056912e-05, "loss": 0.0594, "step": 73565 }, { "epoch": 0.8971951219512195, "grad_norm": 0.7869508862495422, "learning_rate": 1.4018699186991872e-05, "loss": 0.0303, "step": 73570 }, { "epoch": 0.8972560975609756, "grad_norm": 1.8005634546279907, "learning_rate": 1.401829268292683e-05, "loss": 0.039, "step": 73575 }, { "epoch": 0.8973170731707317, "grad_norm": 0.525568425655365, "learning_rate": 1.401788617886179e-05, "loss": 0.0399, "step": 73580 }, { "epoch": 0.8973780487804878, "grad_norm": 0.5875566005706787, "learning_rate": 1.401747967479675e-05, "loss": 0.0475, "step": 73585 }, { "epoch": 0.8974390243902439, "grad_norm": 0.7602859139442444, "learning_rate": 1.4017073170731708e-05, "loss": 0.0659, "step": 73590 }, { "epoch": 0.8975, "grad_norm": 0.5484591126441956, "learning_rate": 1.4016666666666667e-05, "loss": 0.0386, "step": 73595 }, { "epoch": 0.8975609756097561, "grad_norm": 0.7241572141647339, "learning_rate": 1.4016260162601627e-05, "loss": 0.0655, "step": 73600 }, { "epoch": 0.8976219512195122, "grad_norm": 1.4169448614120483, "learning_rate": 1.4015853658536587e-05, "loss": 0.0694, "step": 73605 }, { "epoch": 0.8976829268292683, "grad_norm": 0.7478677034378052, "learning_rate": 1.4015447154471547e-05, "loss": 0.0471, "step": 73610 }, { "epoch": 0.8977439024390244, "grad_norm": 0.8738951086997986, "learning_rate": 1.4015040650406507e-05, "loss": 0.0572, "step": 73615 }, { "epoch": 0.8978048780487805, "grad_norm": 0.8002816438674927, "learning_rate": 1.4014634146341463e-05, "loss": 0.0385, "step": 73620 }, { "epoch": 0.8978658536585366, "grad_norm": 0.9026579260826111, "learning_rate": 1.4014227642276423e-05, "loss": 0.0445, "step": 73625 }, { "epoch": 0.8979268292682927, "grad_norm": 0.805097222328186, "learning_rate": 1.4013821138211383e-05, "loss": 0.0375, "step": 73630 }, { "epoch": 0.8979878048780487, "grad_norm": 0.8633623123168945, "learning_rate": 1.4013414634146343e-05, "loss": 0.0936, "step": 73635 }, { "epoch": 0.8980487804878049, "grad_norm": 0.43376919627189636, "learning_rate": 1.4013008130081302e-05, "loss": 0.0351, "step": 73640 }, { "epoch": 0.8981097560975609, "grad_norm": 0.6607679724693298, "learning_rate": 1.4012601626016262e-05, "loss": 0.0349, "step": 73645 }, { "epoch": 0.8981707317073171, "grad_norm": 1.1696442365646362, "learning_rate": 1.401219512195122e-05, "loss": 0.0298, "step": 73650 }, { "epoch": 0.8982317073170731, "grad_norm": 0.6701169013977051, "learning_rate": 1.401178861788618e-05, "loss": 0.0326, "step": 73655 }, { "epoch": 0.8982926829268293, "grad_norm": 0.3861851394176483, "learning_rate": 1.401138211382114e-05, "loss": 0.0434, "step": 73660 }, { "epoch": 0.8983536585365853, "grad_norm": 0.5305500626564026, "learning_rate": 1.4010975609756098e-05, "loss": 0.0457, "step": 73665 }, { "epoch": 0.8984146341463415, "grad_norm": 0.510554850101471, "learning_rate": 1.4010569105691058e-05, "loss": 0.0287, "step": 73670 }, { "epoch": 0.8984756097560975, "grad_norm": 0.4170764982700348, "learning_rate": 1.4010162601626018e-05, "loss": 0.0182, "step": 73675 }, { "epoch": 0.8985365853658537, "grad_norm": 1.5558563470840454, "learning_rate": 1.4009756097560976e-05, "loss": 0.0417, "step": 73680 }, { "epoch": 0.8985975609756097, "grad_norm": 0.7042909264564514, "learning_rate": 1.4009349593495936e-05, "loss": 0.0447, "step": 73685 }, { "epoch": 0.8986585365853659, "grad_norm": 1.0769500732421875, "learning_rate": 1.4008943089430896e-05, "loss": 0.0363, "step": 73690 }, { "epoch": 0.8987195121951219, "grad_norm": 0.5413071513175964, "learning_rate": 1.4008536585365855e-05, "loss": 0.0297, "step": 73695 }, { "epoch": 0.8987804878048781, "grad_norm": 1.0761890411376953, "learning_rate": 1.4008130081300815e-05, "loss": 0.0669, "step": 73700 }, { "epoch": 0.8988414634146341, "grad_norm": 0.4336468279361725, "learning_rate": 1.4007723577235775e-05, "loss": 0.0258, "step": 73705 }, { "epoch": 0.8989024390243903, "grad_norm": 0.4667252004146576, "learning_rate": 1.4007317073170731e-05, "loss": 0.0399, "step": 73710 }, { "epoch": 0.8989634146341463, "grad_norm": 0.6291453838348389, "learning_rate": 1.4006910569105691e-05, "loss": 0.0337, "step": 73715 }, { "epoch": 0.8990243902439025, "grad_norm": 0.869439423084259, "learning_rate": 1.4006504065040651e-05, "loss": 0.0387, "step": 73720 }, { "epoch": 0.8990853658536585, "grad_norm": 0.6954829692840576, "learning_rate": 1.4006097560975611e-05, "loss": 0.0637, "step": 73725 }, { "epoch": 0.8991463414634147, "grad_norm": 0.3632781207561493, "learning_rate": 1.400569105691057e-05, "loss": 0.0301, "step": 73730 }, { "epoch": 0.8992073170731707, "grad_norm": 0.25030404329299927, "learning_rate": 1.400528455284553e-05, "loss": 0.0391, "step": 73735 }, { "epoch": 0.8992682926829269, "grad_norm": 0.44898521900177, "learning_rate": 1.4004878048780489e-05, "loss": 0.0407, "step": 73740 }, { "epoch": 0.8993292682926829, "grad_norm": 1.252434492111206, "learning_rate": 1.4004471544715449e-05, "loss": 0.0552, "step": 73745 }, { "epoch": 0.899390243902439, "grad_norm": 0.45718759298324585, "learning_rate": 1.4004065040650408e-05, "loss": 0.0767, "step": 73750 }, { "epoch": 0.8994512195121951, "grad_norm": 0.607151448726654, "learning_rate": 1.4003658536585366e-05, "loss": 0.0625, "step": 73755 }, { "epoch": 0.8995121951219512, "grad_norm": 0.4917740821838379, "learning_rate": 1.4003252032520326e-05, "loss": 0.0389, "step": 73760 }, { "epoch": 0.8995731707317073, "grad_norm": 0.8529196977615356, "learning_rate": 1.4002845528455286e-05, "loss": 0.0701, "step": 73765 }, { "epoch": 0.8996341463414634, "grad_norm": 0.5246853828430176, "learning_rate": 1.4002439024390244e-05, "loss": 0.0363, "step": 73770 }, { "epoch": 0.8996951219512195, "grad_norm": 0.5318347215652466, "learning_rate": 1.4002032520325204e-05, "loss": 0.049, "step": 73775 }, { "epoch": 0.8997560975609756, "grad_norm": 0.49949148297309875, "learning_rate": 1.4001626016260164e-05, "loss": 0.0414, "step": 73780 }, { "epoch": 0.8998170731707317, "grad_norm": 0.8405700325965881, "learning_rate": 1.4001219512195124e-05, "loss": 0.0222, "step": 73785 }, { "epoch": 0.8998780487804878, "grad_norm": 0.39495494961738586, "learning_rate": 1.4000813008130084e-05, "loss": 0.0427, "step": 73790 }, { "epoch": 0.8999390243902439, "grad_norm": 0.7085874080657959, "learning_rate": 1.4000406504065043e-05, "loss": 0.0389, "step": 73795 }, { "epoch": 0.9, "grad_norm": 0.47487467527389526, "learning_rate": 1.4e-05, "loss": 0.0829, "step": 73800 }, { "epoch": 0.9000609756097561, "grad_norm": 0.5690054893493652, "learning_rate": 1.399959349593496e-05, "loss": 0.0446, "step": 73805 }, { "epoch": 0.9001219512195122, "grad_norm": 0.6833183169364929, "learning_rate": 1.399918699186992e-05, "loss": 0.0839, "step": 73810 }, { "epoch": 0.9001829268292683, "grad_norm": 0.7652893662452698, "learning_rate": 1.399878048780488e-05, "loss": 0.0459, "step": 73815 }, { "epoch": 0.9002439024390244, "grad_norm": 0.42402738332748413, "learning_rate": 1.3998373983739839e-05, "loss": 0.0436, "step": 73820 }, { "epoch": 0.9003048780487805, "grad_norm": 0.3205939531326294, "learning_rate": 1.3997967479674799e-05, "loss": 0.0467, "step": 73825 }, { "epoch": 0.9003658536585366, "grad_norm": 0.6398777961730957, "learning_rate": 1.3997560975609757e-05, "loss": 0.074, "step": 73830 }, { "epoch": 0.9004268292682926, "grad_norm": 0.3749595582485199, "learning_rate": 1.3997154471544717e-05, "loss": 0.04, "step": 73835 }, { "epoch": 0.9004878048780488, "grad_norm": 0.40685078501701355, "learning_rate": 1.3996747967479675e-05, "loss": 0.0602, "step": 73840 }, { "epoch": 0.9005487804878048, "grad_norm": 0.48561325669288635, "learning_rate": 1.3996341463414635e-05, "loss": 0.0633, "step": 73845 }, { "epoch": 0.900609756097561, "grad_norm": 0.2834535837173462, "learning_rate": 1.3995934959349595e-05, "loss": 0.0349, "step": 73850 }, { "epoch": 0.900670731707317, "grad_norm": 0.7930008769035339, "learning_rate": 1.3995528455284554e-05, "loss": 0.0435, "step": 73855 }, { "epoch": 0.9007317073170732, "grad_norm": 0.87520432472229, "learning_rate": 1.3995121951219513e-05, "loss": 0.0418, "step": 73860 }, { "epoch": 0.9007926829268292, "grad_norm": 0.10259121656417847, "learning_rate": 1.3994715447154472e-05, "loss": 0.055, "step": 73865 }, { "epoch": 0.9008536585365854, "grad_norm": 0.35625797510147095, "learning_rate": 1.3994308943089432e-05, "loss": 0.054, "step": 73870 }, { "epoch": 0.9009146341463414, "grad_norm": 0.5190841555595398, "learning_rate": 1.3993902439024392e-05, "loss": 0.0388, "step": 73875 }, { "epoch": 0.9009756097560976, "grad_norm": 0.7062758207321167, "learning_rate": 1.3993495934959352e-05, "loss": 0.0353, "step": 73880 }, { "epoch": 0.9010365853658536, "grad_norm": 0.9611294269561768, "learning_rate": 1.3993089430894312e-05, "loss": 0.0322, "step": 73885 }, { "epoch": 0.9010975609756098, "grad_norm": 0.29155904054641724, "learning_rate": 1.3992682926829268e-05, "loss": 0.0209, "step": 73890 }, { "epoch": 0.9011585365853658, "grad_norm": 0.5243695974349976, "learning_rate": 1.3992276422764228e-05, "loss": 0.0397, "step": 73895 }, { "epoch": 0.901219512195122, "grad_norm": 0.8787371516227722, "learning_rate": 1.3991869918699188e-05, "loss": 0.0374, "step": 73900 }, { "epoch": 0.901280487804878, "grad_norm": 0.7972330451011658, "learning_rate": 1.3991463414634148e-05, "loss": 0.0546, "step": 73905 }, { "epoch": 0.9013414634146342, "grad_norm": 0.9586586356163025, "learning_rate": 1.3991056910569107e-05, "loss": 0.0443, "step": 73910 }, { "epoch": 0.9014024390243902, "grad_norm": 1.2534008026123047, "learning_rate": 1.3990650406504067e-05, "loss": 0.0438, "step": 73915 }, { "epoch": 0.9014634146341464, "grad_norm": 0.37925249338150024, "learning_rate": 1.3990243902439025e-05, "loss": 0.037, "step": 73920 }, { "epoch": 0.9015243902439024, "grad_norm": 0.43724167346954346, "learning_rate": 1.3989837398373985e-05, "loss": 0.0389, "step": 73925 }, { "epoch": 0.9015853658536586, "grad_norm": 0.5966171026229858, "learning_rate": 1.3989430894308943e-05, "loss": 0.0435, "step": 73930 }, { "epoch": 0.9016463414634146, "grad_norm": 0.5412560701370239, "learning_rate": 1.3989024390243903e-05, "loss": 0.0526, "step": 73935 }, { "epoch": 0.9017073170731708, "grad_norm": 0.6846276521682739, "learning_rate": 1.3988617886178863e-05, "loss": 0.0521, "step": 73940 }, { "epoch": 0.9017682926829268, "grad_norm": 0.5186317563056946, "learning_rate": 1.3988211382113823e-05, "loss": 0.0386, "step": 73945 }, { "epoch": 0.901829268292683, "grad_norm": 0.3318001627922058, "learning_rate": 1.3987804878048781e-05, "loss": 0.0243, "step": 73950 }, { "epoch": 0.901890243902439, "grad_norm": 0.4686674475669861, "learning_rate": 1.398739837398374e-05, "loss": 0.0363, "step": 73955 }, { "epoch": 0.9019512195121951, "grad_norm": 0.5875336527824402, "learning_rate": 1.39869918699187e-05, "loss": 0.0167, "step": 73960 }, { "epoch": 0.9020121951219512, "grad_norm": 0.337181955575943, "learning_rate": 1.398658536585366e-05, "loss": 0.0423, "step": 73965 }, { "epoch": 0.9020731707317073, "grad_norm": 0.6394808292388916, "learning_rate": 1.398617886178862e-05, "loss": 0.036, "step": 73970 }, { "epoch": 0.9021341463414634, "grad_norm": 0.6428699493408203, "learning_rate": 1.398577235772358e-05, "loss": 0.0367, "step": 73975 }, { "epoch": 0.9021951219512195, "grad_norm": 0.9669381380081177, "learning_rate": 1.3985365853658536e-05, "loss": 0.0421, "step": 73980 }, { "epoch": 0.9022560975609756, "grad_norm": 0.35432934761047363, "learning_rate": 1.3984959349593496e-05, "loss": 0.0235, "step": 73985 }, { "epoch": 0.9023170731707317, "grad_norm": 0.7050709128379822, "learning_rate": 1.3984552845528456e-05, "loss": 0.0372, "step": 73990 }, { "epoch": 0.9023780487804878, "grad_norm": 0.45081785321235657, "learning_rate": 1.3984146341463416e-05, "loss": 0.0411, "step": 73995 }, { "epoch": 0.9024390243902439, "grad_norm": 0.7966261506080627, "learning_rate": 1.3983739837398376e-05, "loss": 0.0354, "step": 74000 }, { "epoch": 0.9025, "grad_norm": 0.3951266407966614, "learning_rate": 1.3983333333333336e-05, "loss": 0.0553, "step": 74005 }, { "epoch": 0.9025609756097561, "grad_norm": 0.40217357873916626, "learning_rate": 1.3982926829268294e-05, "loss": 0.0205, "step": 74010 }, { "epoch": 0.9026219512195122, "grad_norm": 0.8695189356803894, "learning_rate": 1.3982520325203252e-05, "loss": 0.0435, "step": 74015 }, { "epoch": 0.9026829268292683, "grad_norm": 0.9803825616836548, "learning_rate": 1.3982113821138212e-05, "loss": 0.0439, "step": 74020 }, { "epoch": 0.9027439024390244, "grad_norm": 0.7190921902656555, "learning_rate": 1.3981707317073171e-05, "loss": 0.063, "step": 74025 }, { "epoch": 0.9028048780487805, "grad_norm": 1.0880578756332397, "learning_rate": 1.3981300813008131e-05, "loss": 0.0384, "step": 74030 }, { "epoch": 0.9028658536585366, "grad_norm": 0.5164849162101746, "learning_rate": 1.3980894308943091e-05, "loss": 0.0374, "step": 74035 }, { "epoch": 0.9029268292682927, "grad_norm": 0.3292008638381958, "learning_rate": 1.398048780487805e-05, "loss": 0.0435, "step": 74040 }, { "epoch": 0.9029878048780487, "grad_norm": 0.7534511685371399, "learning_rate": 1.3980081300813009e-05, "loss": 0.031, "step": 74045 }, { "epoch": 0.9030487804878049, "grad_norm": 0.2631228566169739, "learning_rate": 1.3979674796747969e-05, "loss": 0.0536, "step": 74050 }, { "epoch": 0.903109756097561, "grad_norm": 0.4518856108188629, "learning_rate": 1.3979268292682929e-05, "loss": 0.0559, "step": 74055 }, { "epoch": 0.9031707317073171, "grad_norm": 0.35399842262268066, "learning_rate": 1.3978861788617888e-05, "loss": 0.0511, "step": 74060 }, { "epoch": 0.9032317073170731, "grad_norm": 0.2772262990474701, "learning_rate": 1.3978455284552848e-05, "loss": 0.0399, "step": 74065 }, { "epoch": 0.9032926829268293, "grad_norm": 0.36760473251342773, "learning_rate": 1.3978048780487805e-05, "loss": 0.0255, "step": 74070 }, { "epoch": 0.9033536585365853, "grad_norm": 0.8244776725769043, "learning_rate": 1.3977642276422765e-05, "loss": 0.0727, "step": 74075 }, { "epoch": 0.9034146341463415, "grad_norm": 0.5640941858291626, "learning_rate": 1.3977235772357724e-05, "loss": 0.0765, "step": 74080 }, { "epoch": 0.9034756097560975, "grad_norm": 0.659868061542511, "learning_rate": 1.3976829268292684e-05, "loss": 0.0366, "step": 74085 }, { "epoch": 0.9035365853658537, "grad_norm": 0.7673010230064392, "learning_rate": 1.3976422764227644e-05, "loss": 0.055, "step": 74090 }, { "epoch": 0.9035975609756097, "grad_norm": 0.4671836197376251, "learning_rate": 1.3976016260162604e-05, "loss": 0.048, "step": 74095 }, { "epoch": 0.9036585365853659, "grad_norm": 0.6503079533576965, "learning_rate": 1.3975609756097562e-05, "loss": 0.0419, "step": 74100 }, { "epoch": 0.9037195121951219, "grad_norm": 0.7886067032814026, "learning_rate": 1.397520325203252e-05, "loss": 0.0586, "step": 74105 }, { "epoch": 0.9037804878048781, "grad_norm": 0.414977103471756, "learning_rate": 1.397479674796748e-05, "loss": 0.0238, "step": 74110 }, { "epoch": 0.9038414634146341, "grad_norm": 0.32761698961257935, "learning_rate": 1.397439024390244e-05, "loss": 0.0251, "step": 74115 }, { "epoch": 0.9039024390243903, "grad_norm": 0.46942976117134094, "learning_rate": 1.39739837398374e-05, "loss": 0.0438, "step": 74120 }, { "epoch": 0.9039634146341463, "grad_norm": 0.8851733207702637, "learning_rate": 1.397357723577236e-05, "loss": 0.0273, "step": 74125 }, { "epoch": 0.9040243902439025, "grad_norm": 0.7074211835861206, "learning_rate": 1.3973170731707318e-05, "loss": 0.0374, "step": 74130 }, { "epoch": 0.9040853658536585, "grad_norm": 0.583152174949646, "learning_rate": 1.3972764227642277e-05, "loss": 0.0704, "step": 74135 }, { "epoch": 0.9041463414634147, "grad_norm": 0.4547881484031677, "learning_rate": 1.3972357723577237e-05, "loss": 0.0416, "step": 74140 }, { "epoch": 0.9042073170731707, "grad_norm": 0.3302749991416931, "learning_rate": 1.3971951219512197e-05, "loss": 0.0411, "step": 74145 }, { "epoch": 0.9042682926829269, "grad_norm": 0.4166024327278137, "learning_rate": 1.3971544715447157e-05, "loss": 0.032, "step": 74150 }, { "epoch": 0.9043292682926829, "grad_norm": 1.669331431388855, "learning_rate": 1.3971138211382117e-05, "loss": 0.0632, "step": 74155 }, { "epoch": 0.904390243902439, "grad_norm": 0.8765796422958374, "learning_rate": 1.3970731707317073e-05, "loss": 0.0442, "step": 74160 }, { "epoch": 0.9044512195121951, "grad_norm": 0.6186010241508484, "learning_rate": 1.3970325203252033e-05, "loss": 0.0575, "step": 74165 }, { "epoch": 0.9045121951219512, "grad_norm": 0.6245002746582031, "learning_rate": 1.3969918699186993e-05, "loss": 0.0735, "step": 74170 }, { "epoch": 0.9045731707317073, "grad_norm": 2.152083158493042, "learning_rate": 1.3969512195121953e-05, "loss": 0.0455, "step": 74175 }, { "epoch": 0.9046341463414634, "grad_norm": 0.577947199344635, "learning_rate": 1.3969105691056912e-05, "loss": 0.0287, "step": 74180 }, { "epoch": 0.9046951219512195, "grad_norm": 0.41874271631240845, "learning_rate": 1.3968699186991872e-05, "loss": 0.0462, "step": 74185 }, { "epoch": 0.9047560975609756, "grad_norm": 0.2179928869009018, "learning_rate": 1.396829268292683e-05, "loss": 0.0616, "step": 74190 }, { "epoch": 0.9048170731707317, "grad_norm": 0.5512980222702026, "learning_rate": 1.3967886178861788e-05, "loss": 0.0326, "step": 74195 }, { "epoch": 0.9048780487804878, "grad_norm": 1.4825873374938965, "learning_rate": 1.3967479674796748e-05, "loss": 0.0334, "step": 74200 }, { "epoch": 0.9049390243902439, "grad_norm": 0.7061110734939575, "learning_rate": 1.3967073170731708e-05, "loss": 0.0388, "step": 74205 }, { "epoch": 0.905, "grad_norm": 1.203081727027893, "learning_rate": 1.3966666666666668e-05, "loss": 0.0568, "step": 74210 }, { "epoch": 0.9050609756097561, "grad_norm": 0.6468844413757324, "learning_rate": 1.3966260162601628e-05, "loss": 0.0377, "step": 74215 }, { "epoch": 0.9051219512195122, "grad_norm": 0.39397409558296204, "learning_rate": 1.3965853658536586e-05, "loss": 0.0232, "step": 74220 }, { "epoch": 0.9051829268292683, "grad_norm": 1.0184531211853027, "learning_rate": 1.3965447154471546e-05, "loss": 0.0372, "step": 74225 }, { "epoch": 0.9052439024390244, "grad_norm": 0.25008347630500793, "learning_rate": 1.3965040650406505e-05, "loss": 0.0276, "step": 74230 }, { "epoch": 0.9053048780487805, "grad_norm": 0.26369500160217285, "learning_rate": 1.3964634146341465e-05, "loss": 0.0329, "step": 74235 }, { "epoch": 0.9053658536585366, "grad_norm": 1.0550650358200073, "learning_rate": 1.3964227642276425e-05, "loss": 0.041, "step": 74240 }, { "epoch": 0.9054268292682927, "grad_norm": 1.4272019863128662, "learning_rate": 1.3963821138211385e-05, "loss": 0.0565, "step": 74245 }, { "epoch": 0.9054878048780488, "grad_norm": 0.3682514429092407, "learning_rate": 1.3963414634146341e-05, "loss": 0.0312, "step": 74250 }, { "epoch": 0.9055487804878048, "grad_norm": 0.15415476262569427, "learning_rate": 1.3963008130081301e-05, "loss": 0.0333, "step": 74255 }, { "epoch": 0.905609756097561, "grad_norm": 1.2895439863204956, "learning_rate": 1.3962601626016261e-05, "loss": 0.0544, "step": 74260 }, { "epoch": 0.905670731707317, "grad_norm": 0.3801785111427307, "learning_rate": 1.3962195121951221e-05, "loss": 0.0202, "step": 74265 }, { "epoch": 0.9057317073170732, "grad_norm": 1.3433858156204224, "learning_rate": 1.396178861788618e-05, "loss": 0.0639, "step": 74270 }, { "epoch": 0.9057926829268292, "grad_norm": 0.668205976486206, "learning_rate": 1.396138211382114e-05, "loss": 0.0385, "step": 74275 }, { "epoch": 0.9058536585365854, "grad_norm": 0.4709759056568146, "learning_rate": 1.3960975609756097e-05, "loss": 0.0262, "step": 74280 }, { "epoch": 0.9059146341463414, "grad_norm": 0.47878551483154297, "learning_rate": 1.3960569105691057e-05, "loss": 0.0648, "step": 74285 }, { "epoch": 0.9059756097560976, "grad_norm": 0.36843615770339966, "learning_rate": 1.3960162601626017e-05, "loss": 0.0407, "step": 74290 }, { "epoch": 0.9060365853658536, "grad_norm": 0.6401308178901672, "learning_rate": 1.3959756097560976e-05, "loss": 0.0312, "step": 74295 }, { "epoch": 0.9060975609756098, "grad_norm": 0.4631319046020508, "learning_rate": 1.3959349593495936e-05, "loss": 0.0446, "step": 74300 }, { "epoch": 0.9061585365853658, "grad_norm": 0.39247819781303406, "learning_rate": 1.3958943089430896e-05, "loss": 0.0611, "step": 74305 }, { "epoch": 0.906219512195122, "grad_norm": 1.3676910400390625, "learning_rate": 1.3958536585365854e-05, "loss": 0.1061, "step": 74310 }, { "epoch": 0.906280487804878, "grad_norm": 0.4186742603778839, "learning_rate": 1.3958130081300814e-05, "loss": 0.0298, "step": 74315 }, { "epoch": 0.9063414634146342, "grad_norm": 0.39888182282447815, "learning_rate": 1.3957723577235774e-05, "loss": 0.027, "step": 74320 }, { "epoch": 0.9064024390243902, "grad_norm": 0.7642017006874084, "learning_rate": 1.3957317073170734e-05, "loss": 0.0639, "step": 74325 }, { "epoch": 0.9064634146341464, "grad_norm": 0.43747812509536743, "learning_rate": 1.3956910569105693e-05, "loss": 0.0505, "step": 74330 }, { "epoch": 0.9065243902439024, "grad_norm": 0.4844730794429779, "learning_rate": 1.3956504065040653e-05, "loss": 0.0608, "step": 74335 }, { "epoch": 0.9065853658536586, "grad_norm": 0.3628363609313965, "learning_rate": 1.395609756097561e-05, "loss": 0.0417, "step": 74340 }, { "epoch": 0.9066463414634146, "grad_norm": 0.9797781109809875, "learning_rate": 1.395569105691057e-05, "loss": 0.0543, "step": 74345 }, { "epoch": 0.9067073170731708, "grad_norm": 0.5483920574188232, "learning_rate": 1.395528455284553e-05, "loss": 0.0377, "step": 74350 }, { "epoch": 0.9067682926829268, "grad_norm": 1.0766154527664185, "learning_rate": 1.395487804878049e-05, "loss": 0.0519, "step": 74355 }, { "epoch": 0.906829268292683, "grad_norm": 2.847134828567505, "learning_rate": 1.3954471544715449e-05, "loss": 0.0422, "step": 74360 }, { "epoch": 0.906890243902439, "grad_norm": 0.7536612749099731, "learning_rate": 1.3954065040650409e-05, "loss": 0.0394, "step": 74365 }, { "epoch": 0.9069512195121952, "grad_norm": 0.555245041847229, "learning_rate": 1.3953658536585365e-05, "loss": 0.0262, "step": 74370 }, { "epoch": 0.9070121951219512, "grad_norm": 1.1168426275253296, "learning_rate": 1.3953252032520325e-05, "loss": 0.0632, "step": 74375 }, { "epoch": 0.9070731707317073, "grad_norm": 0.579408586025238, "learning_rate": 1.3952845528455285e-05, "loss": 0.027, "step": 74380 }, { "epoch": 0.9071341463414634, "grad_norm": 0.4735781252384186, "learning_rate": 1.3952439024390245e-05, "loss": 0.0503, "step": 74385 }, { "epoch": 0.9071951219512195, "grad_norm": 0.44828617572784424, "learning_rate": 1.3952032520325205e-05, "loss": 0.0454, "step": 74390 }, { "epoch": 0.9072560975609756, "grad_norm": 0.4432210624217987, "learning_rate": 1.3951626016260164e-05, "loss": 0.0464, "step": 74395 }, { "epoch": 0.9073170731707317, "grad_norm": 0.35152897238731384, "learning_rate": 1.3951219512195122e-05, "loss": 0.058, "step": 74400 }, { "epoch": 0.9073780487804878, "grad_norm": 0.7509186863899231, "learning_rate": 1.3950813008130082e-05, "loss": 0.045, "step": 74405 }, { "epoch": 0.9074390243902439, "grad_norm": 0.31413891911506653, "learning_rate": 1.3950406504065042e-05, "loss": 0.0288, "step": 74410 }, { "epoch": 0.9075, "grad_norm": 0.8013456463813782, "learning_rate": 1.3950000000000002e-05, "loss": 0.0388, "step": 74415 }, { "epoch": 0.9075609756097561, "grad_norm": 0.7108612060546875, "learning_rate": 1.3949593495934962e-05, "loss": 0.0855, "step": 74420 }, { "epoch": 0.9076219512195122, "grad_norm": 0.8680824041366577, "learning_rate": 1.394918699186992e-05, "loss": 0.0748, "step": 74425 }, { "epoch": 0.9076829268292683, "grad_norm": 0.5431670546531677, "learning_rate": 1.3948780487804878e-05, "loss": 0.0462, "step": 74430 }, { "epoch": 0.9077439024390244, "grad_norm": 0.45947733521461487, "learning_rate": 1.3948373983739838e-05, "loss": 0.0682, "step": 74435 }, { "epoch": 0.9078048780487805, "grad_norm": 0.7988178133964539, "learning_rate": 1.3947967479674798e-05, "loss": 0.0322, "step": 74440 }, { "epoch": 0.9078658536585366, "grad_norm": 0.4970459043979645, "learning_rate": 1.3947560975609758e-05, "loss": 0.0733, "step": 74445 }, { "epoch": 0.9079268292682927, "grad_norm": 2.7518649101257324, "learning_rate": 1.3947154471544717e-05, "loss": 0.0441, "step": 74450 }, { "epoch": 0.9079878048780488, "grad_norm": 0.9559610486030579, "learning_rate": 1.3946747967479677e-05, "loss": 0.0481, "step": 74455 }, { "epoch": 0.9080487804878049, "grad_norm": 0.6560311317443848, "learning_rate": 1.3946341463414634e-05, "loss": 0.0405, "step": 74460 }, { "epoch": 0.908109756097561, "grad_norm": 0.4899899363517761, "learning_rate": 1.3945934959349593e-05, "loss": 0.0295, "step": 74465 }, { "epoch": 0.9081707317073171, "grad_norm": 0.5681875944137573, "learning_rate": 1.3945528455284553e-05, "loss": 0.0434, "step": 74470 }, { "epoch": 0.9082317073170731, "grad_norm": 0.6475340723991394, "learning_rate": 1.3945121951219513e-05, "loss": 0.0428, "step": 74475 }, { "epoch": 0.9082926829268293, "grad_norm": 0.4442823827266693, "learning_rate": 1.3944715447154473e-05, "loss": 0.0645, "step": 74480 }, { "epoch": 0.9083536585365853, "grad_norm": 0.3676958978176117, "learning_rate": 1.3944308943089433e-05, "loss": 0.0321, "step": 74485 }, { "epoch": 0.9084146341463415, "grad_norm": 0.48075875639915466, "learning_rate": 1.394390243902439e-05, "loss": 0.0286, "step": 74490 }, { "epoch": 0.9084756097560975, "grad_norm": 0.5154358744621277, "learning_rate": 1.394349593495935e-05, "loss": 0.0431, "step": 74495 }, { "epoch": 0.9085365853658537, "grad_norm": 0.3879747986793518, "learning_rate": 1.394308943089431e-05, "loss": 0.0594, "step": 74500 }, { "epoch": 0.9085975609756097, "grad_norm": 0.4880562722682953, "learning_rate": 1.394268292682927e-05, "loss": 0.0402, "step": 74505 }, { "epoch": 0.9086585365853659, "grad_norm": 0.401743084192276, "learning_rate": 1.394227642276423e-05, "loss": 0.0231, "step": 74510 }, { "epoch": 0.9087195121951219, "grad_norm": 0.4120534062385559, "learning_rate": 1.3941869918699188e-05, "loss": 0.0236, "step": 74515 }, { "epoch": 0.9087804878048781, "grad_norm": 0.7635689377784729, "learning_rate": 1.3941463414634146e-05, "loss": 0.038, "step": 74520 }, { "epoch": 0.9088414634146341, "grad_norm": 3.3044943809509277, "learning_rate": 1.3941056910569106e-05, "loss": 0.0566, "step": 74525 }, { "epoch": 0.9089024390243903, "grad_norm": 0.7121680378913879, "learning_rate": 1.3940650406504066e-05, "loss": 0.0658, "step": 74530 }, { "epoch": 0.9089634146341463, "grad_norm": 0.5529080033302307, "learning_rate": 1.3940243902439026e-05, "loss": 0.031, "step": 74535 }, { "epoch": 0.9090243902439025, "grad_norm": 0.6853414177894592, "learning_rate": 1.3939837398373986e-05, "loss": 0.026, "step": 74540 }, { "epoch": 0.9090853658536585, "grad_norm": 0.25460439920425415, "learning_rate": 1.3939430894308945e-05, "loss": 0.054, "step": 74545 }, { "epoch": 0.9091463414634147, "grad_norm": 0.4818212389945984, "learning_rate": 1.3939024390243902e-05, "loss": 0.0343, "step": 74550 }, { "epoch": 0.9092073170731707, "grad_norm": 0.8153778910636902, "learning_rate": 1.3938617886178862e-05, "loss": 0.0229, "step": 74555 }, { "epoch": 0.9092682926829269, "grad_norm": 0.42859959602355957, "learning_rate": 1.3938211382113822e-05, "loss": 0.0404, "step": 74560 }, { "epoch": 0.9093292682926829, "grad_norm": 1.5319100618362427, "learning_rate": 1.3937804878048781e-05, "loss": 0.0285, "step": 74565 }, { "epoch": 0.909390243902439, "grad_norm": 0.6170706748962402, "learning_rate": 1.3937398373983741e-05, "loss": 0.0449, "step": 74570 }, { "epoch": 0.9094512195121951, "grad_norm": 0.4471866488456726, "learning_rate": 1.3936991869918701e-05, "loss": 0.0407, "step": 74575 }, { "epoch": 0.9095121951219513, "grad_norm": 0.8288413882255554, "learning_rate": 1.3936585365853659e-05, "loss": 0.0423, "step": 74580 }, { "epoch": 0.9095731707317073, "grad_norm": 0.9586229920387268, "learning_rate": 1.3936178861788619e-05, "loss": 0.0325, "step": 74585 }, { "epoch": 0.9096341463414634, "grad_norm": 0.7409289479255676, "learning_rate": 1.3935772357723579e-05, "loss": 0.0268, "step": 74590 }, { "epoch": 0.9096951219512195, "grad_norm": 0.602778971195221, "learning_rate": 1.3935365853658539e-05, "loss": 0.0991, "step": 74595 }, { "epoch": 0.9097560975609756, "grad_norm": 0.3458399772644043, "learning_rate": 1.3934959349593498e-05, "loss": 0.0327, "step": 74600 }, { "epoch": 0.9098170731707317, "grad_norm": 0.8292653560638428, "learning_rate": 1.3934552845528457e-05, "loss": 0.0742, "step": 74605 }, { "epoch": 0.9098780487804878, "grad_norm": 0.6535548567771912, "learning_rate": 1.3934146341463415e-05, "loss": 0.0324, "step": 74610 }, { "epoch": 0.9099390243902439, "grad_norm": 0.5369475483894348, "learning_rate": 1.3933739837398375e-05, "loss": 0.0348, "step": 74615 }, { "epoch": 0.91, "grad_norm": 0.6323140263557434, "learning_rate": 1.3933333333333334e-05, "loss": 0.0527, "step": 74620 }, { "epoch": 0.9100609756097561, "grad_norm": 0.6668592691421509, "learning_rate": 1.3932926829268294e-05, "loss": 0.0309, "step": 74625 }, { "epoch": 0.9101219512195122, "grad_norm": 0.19624674320220947, "learning_rate": 1.3932520325203254e-05, "loss": 0.0217, "step": 74630 }, { "epoch": 0.9101829268292683, "grad_norm": 0.5834667086601257, "learning_rate": 1.3932113821138214e-05, "loss": 0.0445, "step": 74635 }, { "epoch": 0.9102439024390244, "grad_norm": 0.5505427122116089, "learning_rate": 1.393170731707317e-05, "loss": 0.0554, "step": 74640 }, { "epoch": 0.9103048780487805, "grad_norm": 0.8378329873085022, "learning_rate": 1.393130081300813e-05, "loss": 0.0479, "step": 74645 }, { "epoch": 0.9103658536585366, "grad_norm": 0.8066174983978271, "learning_rate": 1.393089430894309e-05, "loss": 0.0628, "step": 74650 }, { "epoch": 0.9104268292682927, "grad_norm": 0.43924659490585327, "learning_rate": 1.393048780487805e-05, "loss": 0.0717, "step": 74655 }, { "epoch": 0.9104878048780488, "grad_norm": 0.565590500831604, "learning_rate": 1.393008130081301e-05, "loss": 0.0678, "step": 74660 }, { "epoch": 0.9105487804878049, "grad_norm": 0.5866805911064148, "learning_rate": 1.392967479674797e-05, "loss": 0.0284, "step": 74665 }, { "epoch": 0.910609756097561, "grad_norm": 0.8942592740058899, "learning_rate": 1.3929268292682927e-05, "loss": 0.0657, "step": 74670 }, { "epoch": 0.910670731707317, "grad_norm": 0.4890969693660736, "learning_rate": 1.3928861788617887e-05, "loss": 0.0418, "step": 74675 }, { "epoch": 0.9107317073170732, "grad_norm": 0.23520120978355408, "learning_rate": 1.3928455284552847e-05, "loss": 0.0395, "step": 74680 }, { "epoch": 0.9107926829268292, "grad_norm": 0.46869656443595886, "learning_rate": 1.3928048780487807e-05, "loss": 0.0256, "step": 74685 }, { "epoch": 0.9108536585365854, "grad_norm": 3.4936270713806152, "learning_rate": 1.3927642276422765e-05, "loss": 0.0313, "step": 74690 }, { "epoch": 0.9109146341463414, "grad_norm": 0.5006424188613892, "learning_rate": 1.3927235772357725e-05, "loss": 0.0347, "step": 74695 }, { "epoch": 0.9109756097560976, "grad_norm": 2.985729694366455, "learning_rate": 1.3926829268292683e-05, "loss": 0.0621, "step": 74700 }, { "epoch": 0.9110365853658536, "grad_norm": 0.3169321119785309, "learning_rate": 1.3926422764227643e-05, "loss": 0.0445, "step": 74705 }, { "epoch": 0.9110975609756098, "grad_norm": 0.32100993394851685, "learning_rate": 1.3926016260162603e-05, "loss": 0.0431, "step": 74710 }, { "epoch": 0.9111585365853658, "grad_norm": 0.38017702102661133, "learning_rate": 1.3925609756097562e-05, "loss": 0.0424, "step": 74715 }, { "epoch": 0.911219512195122, "grad_norm": 2.588996171951294, "learning_rate": 1.3925203252032522e-05, "loss": 0.0468, "step": 74720 }, { "epoch": 0.911280487804878, "grad_norm": 0.12208358943462372, "learning_rate": 1.3924796747967482e-05, "loss": 0.0402, "step": 74725 }, { "epoch": 0.9113414634146342, "grad_norm": 0.6466497778892517, "learning_rate": 1.3924390243902439e-05, "loss": 0.0881, "step": 74730 }, { "epoch": 0.9114024390243902, "grad_norm": 0.5895903706550598, "learning_rate": 1.3923983739837398e-05, "loss": 0.0722, "step": 74735 }, { "epoch": 0.9114634146341464, "grad_norm": 0.20874008536338806, "learning_rate": 1.3923577235772358e-05, "loss": 0.0306, "step": 74740 }, { "epoch": 0.9115243902439024, "grad_norm": 0.5575474500656128, "learning_rate": 1.3923170731707318e-05, "loss": 0.0416, "step": 74745 }, { "epoch": 0.9115853658536586, "grad_norm": 0.3890538513660431, "learning_rate": 1.3922764227642278e-05, "loss": 0.0424, "step": 74750 }, { "epoch": 0.9116463414634146, "grad_norm": 0.6124933362007141, "learning_rate": 1.3922357723577238e-05, "loss": 0.0352, "step": 74755 }, { "epoch": 0.9117073170731708, "grad_norm": 0.3386143147945404, "learning_rate": 1.3921951219512196e-05, "loss": 0.032, "step": 74760 }, { "epoch": 0.9117682926829268, "grad_norm": 0.7699998021125793, "learning_rate": 1.3921544715447156e-05, "loss": 0.03, "step": 74765 }, { "epoch": 0.911829268292683, "grad_norm": 0.5073805451393127, "learning_rate": 1.3921138211382115e-05, "loss": 0.0479, "step": 74770 }, { "epoch": 0.911890243902439, "grad_norm": 0.6796051859855652, "learning_rate": 1.3920731707317075e-05, "loss": 0.0466, "step": 74775 }, { "epoch": 0.9119512195121952, "grad_norm": 0.9972568154335022, "learning_rate": 1.3920325203252033e-05, "loss": 0.0515, "step": 74780 }, { "epoch": 0.9120121951219512, "grad_norm": 0.37384575605392456, "learning_rate": 1.3919918699186993e-05, "loss": 0.0634, "step": 74785 }, { "epoch": 0.9120731707317074, "grad_norm": 0.6496354341506958, "learning_rate": 1.3919512195121951e-05, "loss": 0.0488, "step": 74790 }, { "epoch": 0.9121341463414634, "grad_norm": 0.48796433210372925, "learning_rate": 1.3919105691056911e-05, "loss": 0.0256, "step": 74795 }, { "epoch": 0.9121951219512195, "grad_norm": 0.5557217597961426, "learning_rate": 1.3918699186991871e-05, "loss": 0.0269, "step": 74800 }, { "epoch": 0.9122560975609756, "grad_norm": 0.7309368252754211, "learning_rate": 1.391829268292683e-05, "loss": 0.0652, "step": 74805 }, { "epoch": 0.9123170731707317, "grad_norm": 0.6502672433853149, "learning_rate": 1.391788617886179e-05, "loss": 0.0469, "step": 74810 }, { "epoch": 0.9123780487804878, "grad_norm": 0.53605055809021, "learning_rate": 1.391747967479675e-05, "loss": 0.0452, "step": 74815 }, { "epoch": 0.9124390243902439, "grad_norm": 0.6421862244606018, "learning_rate": 1.3917073170731707e-05, "loss": 0.0925, "step": 74820 }, { "epoch": 0.9125, "grad_norm": 0.6334125995635986, "learning_rate": 1.3916666666666667e-05, "loss": 0.0406, "step": 74825 }, { "epoch": 0.9125609756097561, "grad_norm": 0.7295766472816467, "learning_rate": 1.3916260162601627e-05, "loss": 0.0322, "step": 74830 }, { "epoch": 0.9126219512195122, "grad_norm": 0.9815036058425903, "learning_rate": 1.3915853658536586e-05, "loss": 0.0414, "step": 74835 }, { "epoch": 0.9126829268292683, "grad_norm": 0.43255293369293213, "learning_rate": 1.3915447154471546e-05, "loss": 0.0239, "step": 74840 }, { "epoch": 0.9127439024390244, "grad_norm": 0.17752087116241455, "learning_rate": 1.3915040650406506e-05, "loss": 0.0387, "step": 74845 }, { "epoch": 0.9128048780487805, "grad_norm": 0.6869625449180603, "learning_rate": 1.3914634146341464e-05, "loss": 0.0283, "step": 74850 }, { "epoch": 0.9128658536585366, "grad_norm": 0.5948669910430908, "learning_rate": 1.3914227642276424e-05, "loss": 0.0299, "step": 74855 }, { "epoch": 0.9129268292682927, "grad_norm": 0.37148550152778625, "learning_rate": 1.3913821138211384e-05, "loss": 0.0369, "step": 74860 }, { "epoch": 0.9129878048780488, "grad_norm": 0.7710943222045898, "learning_rate": 1.3913414634146344e-05, "loss": 0.0623, "step": 74865 }, { "epoch": 0.9130487804878049, "grad_norm": 0.4189792573451996, "learning_rate": 1.3913008130081302e-05, "loss": 0.0189, "step": 74870 }, { "epoch": 0.913109756097561, "grad_norm": 0.38054701685905457, "learning_rate": 1.3912601626016262e-05, "loss": 0.0856, "step": 74875 }, { "epoch": 0.9131707317073171, "grad_norm": 0.7865190505981445, "learning_rate": 1.391219512195122e-05, "loss": 0.0272, "step": 74880 }, { "epoch": 0.9132317073170731, "grad_norm": 0.42003700137138367, "learning_rate": 1.391178861788618e-05, "loss": 0.0479, "step": 74885 }, { "epoch": 0.9132926829268293, "grad_norm": 0.6573950052261353, "learning_rate": 1.391138211382114e-05, "loss": 0.0351, "step": 74890 }, { "epoch": 0.9133536585365853, "grad_norm": 0.9864131212234497, "learning_rate": 1.3910975609756099e-05, "loss": 0.0365, "step": 74895 }, { "epoch": 0.9134146341463415, "grad_norm": 0.928669810295105, "learning_rate": 1.3910569105691059e-05, "loss": 0.0445, "step": 74900 }, { "epoch": 0.9134756097560975, "grad_norm": 1.1215821504592896, "learning_rate": 1.3910162601626019e-05, "loss": 0.0383, "step": 74905 }, { "epoch": 0.9135365853658537, "grad_norm": 0.6759467720985413, "learning_rate": 1.3909756097560975e-05, "loss": 0.0393, "step": 74910 }, { "epoch": 0.9135975609756097, "grad_norm": 0.6751623153686523, "learning_rate": 1.3909349593495935e-05, "loss": 0.0497, "step": 74915 }, { "epoch": 0.9136585365853659, "grad_norm": 0.8935059905052185, "learning_rate": 1.3908943089430895e-05, "loss": 0.0386, "step": 74920 }, { "epoch": 0.9137195121951219, "grad_norm": 0.5823222994804382, "learning_rate": 1.3908536585365855e-05, "loss": 0.0295, "step": 74925 }, { "epoch": 0.9137804878048781, "grad_norm": 0.8204264640808105, "learning_rate": 1.3908130081300814e-05, "loss": 0.0663, "step": 74930 }, { "epoch": 0.9138414634146341, "grad_norm": 0.5754966735839844, "learning_rate": 1.3907723577235774e-05, "loss": 0.0356, "step": 74935 }, { "epoch": 0.9139024390243903, "grad_norm": 0.5828487277030945, "learning_rate": 1.3907317073170732e-05, "loss": 0.0322, "step": 74940 }, { "epoch": 0.9139634146341463, "grad_norm": 0.6139683127403259, "learning_rate": 1.3906910569105692e-05, "loss": 0.0323, "step": 74945 }, { "epoch": 0.9140243902439025, "grad_norm": 0.12010151147842407, "learning_rate": 1.3906504065040652e-05, "loss": 0.0422, "step": 74950 }, { "epoch": 0.9140853658536585, "grad_norm": 0.6280186176300049, "learning_rate": 1.390609756097561e-05, "loss": 0.0315, "step": 74955 }, { "epoch": 0.9141463414634147, "grad_norm": 0.43644610047340393, "learning_rate": 1.390569105691057e-05, "loss": 0.0302, "step": 74960 }, { "epoch": 0.9142073170731707, "grad_norm": 0.5526338815689087, "learning_rate": 1.390528455284553e-05, "loss": 0.0927, "step": 74965 }, { "epoch": 0.9142682926829269, "grad_norm": 0.751435399055481, "learning_rate": 1.3904878048780488e-05, "loss": 0.075, "step": 74970 }, { "epoch": 0.9143292682926829, "grad_norm": 0.4218984544277191, "learning_rate": 1.3904471544715448e-05, "loss": 0.0362, "step": 74975 }, { "epoch": 0.9143902439024391, "grad_norm": 0.28597524762153625, "learning_rate": 1.3904065040650408e-05, "loss": 0.0576, "step": 74980 }, { "epoch": 0.9144512195121951, "grad_norm": 0.20335006713867188, "learning_rate": 1.3903658536585367e-05, "loss": 0.0312, "step": 74985 }, { "epoch": 0.9145121951219513, "grad_norm": 0.6151728630065918, "learning_rate": 1.3903252032520327e-05, "loss": 0.0315, "step": 74990 }, { "epoch": 0.9145731707317073, "grad_norm": 1.5889111757278442, "learning_rate": 1.3902845528455287e-05, "loss": 0.0701, "step": 74995 }, { "epoch": 0.9146341463414634, "grad_norm": 0.4456506073474884, "learning_rate": 1.3902439024390244e-05, "loss": 0.0673, "step": 75000 }, { "epoch": 0.9146951219512195, "grad_norm": 0.5438336730003357, "learning_rate": 1.3902032520325203e-05, "loss": 0.0499, "step": 75005 }, { "epoch": 0.9147560975609756, "grad_norm": 0.5254265069961548, "learning_rate": 1.3901626016260163e-05, "loss": 0.0408, "step": 75010 }, { "epoch": 0.9148170731707317, "grad_norm": 0.5150269269943237, "learning_rate": 1.3901219512195123e-05, "loss": 0.043, "step": 75015 }, { "epoch": 0.9148780487804878, "grad_norm": 0.5026569962501526, "learning_rate": 1.3900813008130083e-05, "loss": 0.0492, "step": 75020 }, { "epoch": 0.9149390243902439, "grad_norm": 0.5889995694160461, "learning_rate": 1.3900406504065043e-05, "loss": 0.0302, "step": 75025 }, { "epoch": 0.915, "grad_norm": 0.8049201965332031, "learning_rate": 1.39e-05, "loss": 0.0241, "step": 75030 }, { "epoch": 0.9150609756097561, "grad_norm": 0.6617100238800049, "learning_rate": 1.389959349593496e-05, "loss": 0.0453, "step": 75035 }, { "epoch": 0.9151219512195122, "grad_norm": 0.5602442026138306, "learning_rate": 1.389918699186992e-05, "loss": 0.0339, "step": 75040 }, { "epoch": 0.9151829268292683, "grad_norm": 0.4409942030906677, "learning_rate": 1.3898780487804879e-05, "loss": 0.0384, "step": 75045 }, { "epoch": 0.9152439024390244, "grad_norm": 1.0167783498764038, "learning_rate": 1.3898373983739838e-05, "loss": 0.0803, "step": 75050 }, { "epoch": 0.9153048780487805, "grad_norm": 0.5754734873771667, "learning_rate": 1.3897967479674798e-05, "loss": 0.0271, "step": 75055 }, { "epoch": 0.9153658536585366, "grad_norm": 0.5226901769638062, "learning_rate": 1.3897560975609756e-05, "loss": 0.0381, "step": 75060 }, { "epoch": 0.9154268292682927, "grad_norm": 1.183834433555603, "learning_rate": 1.3897154471544716e-05, "loss": 0.0382, "step": 75065 }, { "epoch": 0.9154878048780488, "grad_norm": 0.6470215916633606, "learning_rate": 1.3896747967479676e-05, "loss": 0.0235, "step": 75070 }, { "epoch": 0.9155487804878049, "grad_norm": 0.6697620749473572, "learning_rate": 1.3896341463414636e-05, "loss": 0.0292, "step": 75075 }, { "epoch": 0.915609756097561, "grad_norm": 0.3799585700035095, "learning_rate": 1.3895934959349596e-05, "loss": 0.0242, "step": 75080 }, { "epoch": 0.915670731707317, "grad_norm": 0.41248515248298645, "learning_rate": 1.3895528455284555e-05, "loss": 0.0284, "step": 75085 }, { "epoch": 0.9157317073170732, "grad_norm": 0.30830249190330505, "learning_rate": 1.3895121951219512e-05, "loss": 0.0307, "step": 75090 }, { "epoch": 0.9157926829268292, "grad_norm": 0.6185872554779053, "learning_rate": 1.3894715447154472e-05, "loss": 0.0525, "step": 75095 }, { "epoch": 0.9158536585365854, "grad_norm": 0.4498116075992584, "learning_rate": 1.3894308943089431e-05, "loss": 0.0642, "step": 75100 }, { "epoch": 0.9159146341463414, "grad_norm": 0.7483223676681519, "learning_rate": 1.3893902439024391e-05, "loss": 0.032, "step": 75105 }, { "epoch": 0.9159756097560976, "grad_norm": 0.639930009841919, "learning_rate": 1.3893495934959351e-05, "loss": 0.0399, "step": 75110 }, { "epoch": 0.9160365853658536, "grad_norm": 0.4527500569820404, "learning_rate": 1.3893089430894311e-05, "loss": 0.0293, "step": 75115 }, { "epoch": 0.9160975609756098, "grad_norm": 0.7376760840415955, "learning_rate": 1.3892682926829269e-05, "loss": 0.0257, "step": 75120 }, { "epoch": 0.9161585365853658, "grad_norm": 0.5287989377975464, "learning_rate": 1.3892276422764229e-05, "loss": 0.0448, "step": 75125 }, { "epoch": 0.916219512195122, "grad_norm": 0.4103694558143616, "learning_rate": 1.3891869918699189e-05, "loss": 0.0599, "step": 75130 }, { "epoch": 0.916280487804878, "grad_norm": 0.25707677006721497, "learning_rate": 1.3891463414634147e-05, "loss": 0.0207, "step": 75135 }, { "epoch": 0.9163414634146342, "grad_norm": 0.4855436086654663, "learning_rate": 1.3891056910569107e-05, "loss": 0.034, "step": 75140 }, { "epoch": 0.9164024390243902, "grad_norm": 0.5421827435493469, "learning_rate": 1.3890650406504066e-05, "loss": 0.0357, "step": 75145 }, { "epoch": 0.9164634146341464, "grad_norm": 2.013209581375122, "learning_rate": 1.3890243902439025e-05, "loss": 0.084, "step": 75150 }, { "epoch": 0.9165243902439024, "grad_norm": 0.6905697584152222, "learning_rate": 1.3889837398373984e-05, "loss": 0.0448, "step": 75155 }, { "epoch": 0.9165853658536586, "grad_norm": 0.779214084148407, "learning_rate": 1.3889430894308944e-05, "loss": 0.0278, "step": 75160 }, { "epoch": 0.9166463414634146, "grad_norm": 0.44435805082321167, "learning_rate": 1.3889024390243904e-05, "loss": 0.0369, "step": 75165 }, { "epoch": 0.9167073170731708, "grad_norm": 0.4197620451450348, "learning_rate": 1.3888617886178864e-05, "loss": 0.0161, "step": 75170 }, { "epoch": 0.9167682926829268, "grad_norm": 0.7223200798034668, "learning_rate": 1.3888211382113824e-05, "loss": 0.062, "step": 75175 }, { "epoch": 0.916829268292683, "grad_norm": 0.6950539350509644, "learning_rate": 1.388780487804878e-05, "loss": 0.0358, "step": 75180 }, { "epoch": 0.916890243902439, "grad_norm": 0.6791081428527832, "learning_rate": 1.388739837398374e-05, "loss": 0.0521, "step": 75185 }, { "epoch": 0.9169512195121952, "grad_norm": 0.5583440065383911, "learning_rate": 1.38869918699187e-05, "loss": 0.0338, "step": 75190 }, { "epoch": 0.9170121951219512, "grad_norm": 0.577832043170929, "learning_rate": 1.388658536585366e-05, "loss": 0.07, "step": 75195 }, { "epoch": 0.9170731707317074, "grad_norm": 1.170247197151184, "learning_rate": 1.388617886178862e-05, "loss": 0.048, "step": 75200 }, { "epoch": 0.9171341463414634, "grad_norm": 0.6299519538879395, "learning_rate": 1.388577235772358e-05, "loss": 0.0315, "step": 75205 }, { "epoch": 0.9171951219512195, "grad_norm": 0.46467238664627075, "learning_rate": 1.3885365853658537e-05, "loss": 0.0409, "step": 75210 }, { "epoch": 0.9172560975609756, "grad_norm": 0.40765100717544556, "learning_rate": 1.3884959349593497e-05, "loss": 0.0633, "step": 75215 }, { "epoch": 0.9173170731707317, "grad_norm": 0.6116068363189697, "learning_rate": 1.3884552845528455e-05, "loss": 0.0534, "step": 75220 }, { "epoch": 0.9173780487804878, "grad_norm": 0.6081845164299011, "learning_rate": 1.3884146341463415e-05, "loss": 0.0391, "step": 75225 }, { "epoch": 0.9174390243902439, "grad_norm": 0.6438771486282349, "learning_rate": 1.3883739837398375e-05, "loss": 0.0384, "step": 75230 }, { "epoch": 0.9175, "grad_norm": 0.794844925403595, "learning_rate": 1.3883333333333335e-05, "loss": 0.0366, "step": 75235 }, { "epoch": 0.9175609756097561, "grad_norm": 0.35877811908721924, "learning_rate": 1.3882926829268293e-05, "loss": 0.0283, "step": 75240 }, { "epoch": 0.9176219512195122, "grad_norm": 0.39226457476615906, "learning_rate": 1.3882520325203253e-05, "loss": 0.0474, "step": 75245 }, { "epoch": 0.9176829268292683, "grad_norm": 0.21657255291938782, "learning_rate": 1.3882113821138213e-05, "loss": 0.0363, "step": 75250 }, { "epoch": 0.9177439024390244, "grad_norm": 1.037171483039856, "learning_rate": 1.3881707317073172e-05, "loss": 0.0379, "step": 75255 }, { "epoch": 0.9178048780487805, "grad_norm": 0.48755598068237305, "learning_rate": 1.3881300813008132e-05, "loss": 0.0649, "step": 75260 }, { "epoch": 0.9178658536585366, "grad_norm": 0.5307029485702515, "learning_rate": 1.3880894308943092e-05, "loss": 0.0321, "step": 75265 }, { "epoch": 0.9179268292682927, "grad_norm": 2.8739445209503174, "learning_rate": 1.3880487804878048e-05, "loss": 0.0357, "step": 75270 }, { "epoch": 0.9179878048780488, "grad_norm": 0.7874382734298706, "learning_rate": 1.3880081300813008e-05, "loss": 0.0289, "step": 75275 }, { "epoch": 0.9180487804878049, "grad_norm": 0.48671597242355347, "learning_rate": 1.3879674796747968e-05, "loss": 0.084, "step": 75280 }, { "epoch": 0.918109756097561, "grad_norm": 0.5136896371841431, "learning_rate": 1.3879268292682928e-05, "loss": 0.0472, "step": 75285 }, { "epoch": 0.9181707317073171, "grad_norm": 0.514692485332489, "learning_rate": 1.3878861788617888e-05, "loss": 0.0461, "step": 75290 }, { "epoch": 0.9182317073170732, "grad_norm": 0.633367657661438, "learning_rate": 1.3878455284552848e-05, "loss": 0.0495, "step": 75295 }, { "epoch": 0.9182926829268293, "grad_norm": 0.4361334443092346, "learning_rate": 1.3878048780487806e-05, "loss": 0.0368, "step": 75300 }, { "epoch": 0.9183536585365853, "grad_norm": 1.1969184875488281, "learning_rate": 1.3877642276422766e-05, "loss": 0.0298, "step": 75305 }, { "epoch": 0.9184146341463415, "grad_norm": 0.567974865436554, "learning_rate": 1.3877235772357724e-05, "loss": 0.0543, "step": 75310 }, { "epoch": 0.9184756097560975, "grad_norm": 0.4257846176624298, "learning_rate": 1.3876829268292683e-05, "loss": 0.041, "step": 75315 }, { "epoch": 0.9185365853658537, "grad_norm": 0.25354668498039246, "learning_rate": 1.3876422764227643e-05, "loss": 0.0502, "step": 75320 }, { "epoch": 0.9185975609756097, "grad_norm": 0.6287432312965393, "learning_rate": 1.3876016260162603e-05, "loss": 0.0455, "step": 75325 }, { "epoch": 0.9186585365853659, "grad_norm": 0.8263911008834839, "learning_rate": 1.3875609756097561e-05, "loss": 0.0436, "step": 75330 }, { "epoch": 0.9187195121951219, "grad_norm": 0.3641578257083893, "learning_rate": 1.3875203252032521e-05, "loss": 0.0387, "step": 75335 }, { "epoch": 0.9187804878048781, "grad_norm": 0.6168946623802185, "learning_rate": 1.3874796747967481e-05, "loss": 0.04, "step": 75340 }, { "epoch": 0.9188414634146341, "grad_norm": 0.5137982964515686, "learning_rate": 1.387439024390244e-05, "loss": 0.0415, "step": 75345 }, { "epoch": 0.9189024390243903, "grad_norm": 1.414259672164917, "learning_rate": 1.38739837398374e-05, "loss": 0.0421, "step": 75350 }, { "epoch": 0.9189634146341463, "grad_norm": 0.7775096893310547, "learning_rate": 1.387357723577236e-05, "loss": 0.0382, "step": 75355 }, { "epoch": 0.9190243902439025, "grad_norm": 1.71388578414917, "learning_rate": 1.3873170731707317e-05, "loss": 0.0499, "step": 75360 }, { "epoch": 0.9190853658536585, "grad_norm": 0.6057710647583008, "learning_rate": 1.3872764227642277e-05, "loss": 0.0406, "step": 75365 }, { "epoch": 0.9191463414634147, "grad_norm": 0.1871398240327835, "learning_rate": 1.3872357723577236e-05, "loss": 0.0491, "step": 75370 }, { "epoch": 0.9192073170731707, "grad_norm": 0.36157870292663574, "learning_rate": 1.3871951219512196e-05, "loss": 0.0324, "step": 75375 }, { "epoch": 0.9192682926829269, "grad_norm": 0.6942043304443359, "learning_rate": 1.3871544715447156e-05, "loss": 0.049, "step": 75380 }, { "epoch": 0.9193292682926829, "grad_norm": 0.5020427107810974, "learning_rate": 1.3871138211382116e-05, "loss": 0.0464, "step": 75385 }, { "epoch": 0.9193902439024391, "grad_norm": 0.27469339966773987, "learning_rate": 1.3870731707317074e-05, "loss": 0.0451, "step": 75390 }, { "epoch": 0.9194512195121951, "grad_norm": 0.557522177696228, "learning_rate": 1.3870325203252034e-05, "loss": 0.0233, "step": 75395 }, { "epoch": 0.9195121951219513, "grad_norm": 0.28099676966667175, "learning_rate": 1.3869918699186992e-05, "loss": 0.0233, "step": 75400 }, { "epoch": 0.9195731707317073, "grad_norm": 0.3476189970970154, "learning_rate": 1.3869512195121952e-05, "loss": 0.0448, "step": 75405 }, { "epoch": 0.9196341463414635, "grad_norm": 0.4822208881378174, "learning_rate": 1.3869105691056912e-05, "loss": 0.0277, "step": 75410 }, { "epoch": 0.9196951219512195, "grad_norm": 0.3103999197483063, "learning_rate": 1.3868699186991871e-05, "loss": 0.0359, "step": 75415 }, { "epoch": 0.9197560975609756, "grad_norm": 0.5870921611785889, "learning_rate": 1.386829268292683e-05, "loss": 0.026, "step": 75420 }, { "epoch": 0.9198170731707317, "grad_norm": 0.8694222569465637, "learning_rate": 1.386788617886179e-05, "loss": 0.0652, "step": 75425 }, { "epoch": 0.9198780487804878, "grad_norm": 0.41232722997665405, "learning_rate": 1.386747967479675e-05, "loss": 0.0399, "step": 75430 }, { "epoch": 0.9199390243902439, "grad_norm": 0.687651515007019, "learning_rate": 1.3867073170731709e-05, "loss": 0.0397, "step": 75435 }, { "epoch": 0.92, "grad_norm": 1.983423113822937, "learning_rate": 1.3866666666666669e-05, "loss": 0.0383, "step": 75440 }, { "epoch": 0.9200609756097561, "grad_norm": 1.1758217811584473, "learning_rate": 1.3866260162601629e-05, "loss": 0.0427, "step": 75445 }, { "epoch": 0.9201219512195122, "grad_norm": 0.8592514991760254, "learning_rate": 1.3865853658536585e-05, "loss": 0.0475, "step": 75450 }, { "epoch": 0.9201829268292683, "grad_norm": 0.549405574798584, "learning_rate": 1.3865447154471545e-05, "loss": 0.0389, "step": 75455 }, { "epoch": 0.9202439024390244, "grad_norm": 0.31140848994255066, "learning_rate": 1.3865040650406505e-05, "loss": 0.028, "step": 75460 }, { "epoch": 0.9203048780487805, "grad_norm": 0.6095442175865173, "learning_rate": 1.3864634146341465e-05, "loss": 0.0603, "step": 75465 }, { "epoch": 0.9203658536585366, "grad_norm": 0.5842000246047974, "learning_rate": 1.3864227642276424e-05, "loss": 0.0409, "step": 75470 }, { "epoch": 0.9204268292682927, "grad_norm": 0.34103885293006897, "learning_rate": 1.3863821138211384e-05, "loss": 0.0576, "step": 75475 }, { "epoch": 0.9204878048780488, "grad_norm": 0.4836711287498474, "learning_rate": 1.3863414634146342e-05, "loss": 0.0389, "step": 75480 }, { "epoch": 0.9205487804878049, "grad_norm": 0.9855769276618958, "learning_rate": 1.38630081300813e-05, "loss": 0.0565, "step": 75485 }, { "epoch": 0.920609756097561, "grad_norm": 0.8666356801986694, "learning_rate": 1.386260162601626e-05, "loss": 0.0325, "step": 75490 }, { "epoch": 0.9206707317073171, "grad_norm": 0.9813256859779358, "learning_rate": 1.386219512195122e-05, "loss": 0.0449, "step": 75495 }, { "epoch": 0.9207317073170732, "grad_norm": 0.6311010718345642, "learning_rate": 1.386178861788618e-05, "loss": 0.0267, "step": 75500 }, { "epoch": 0.9207926829268293, "grad_norm": 0.34300750494003296, "learning_rate": 1.386138211382114e-05, "loss": 0.0225, "step": 75505 }, { "epoch": 0.9208536585365854, "grad_norm": 0.40465664863586426, "learning_rate": 1.3860975609756098e-05, "loss": 0.0212, "step": 75510 }, { "epoch": 0.9209146341463414, "grad_norm": 0.23016594350337982, "learning_rate": 1.3860569105691058e-05, "loss": 0.0707, "step": 75515 }, { "epoch": 0.9209756097560976, "grad_norm": 0.9008458852767944, "learning_rate": 1.3860162601626018e-05, "loss": 0.0349, "step": 75520 }, { "epoch": 0.9210365853658536, "grad_norm": 0.5297664999961853, "learning_rate": 1.3859756097560977e-05, "loss": 0.0498, "step": 75525 }, { "epoch": 0.9210975609756098, "grad_norm": 0.5535913705825806, "learning_rate": 1.3859349593495937e-05, "loss": 0.0499, "step": 75530 }, { "epoch": 0.9211585365853658, "grad_norm": 0.5571811199188232, "learning_rate": 1.3858943089430897e-05, "loss": 0.0332, "step": 75535 }, { "epoch": 0.921219512195122, "grad_norm": 0.679162859916687, "learning_rate": 1.3858536585365853e-05, "loss": 0.0441, "step": 75540 }, { "epoch": 0.921280487804878, "grad_norm": 0.3933880031108856, "learning_rate": 1.3858130081300813e-05, "loss": 0.0487, "step": 75545 }, { "epoch": 0.9213414634146342, "grad_norm": 0.38265514373779297, "learning_rate": 1.3857723577235773e-05, "loss": 0.044, "step": 75550 }, { "epoch": 0.9214024390243902, "grad_norm": 0.7045801877975464, "learning_rate": 1.3857317073170733e-05, "loss": 0.0265, "step": 75555 }, { "epoch": 0.9214634146341464, "grad_norm": 0.45801690220832825, "learning_rate": 1.3856910569105693e-05, "loss": 0.0403, "step": 75560 }, { "epoch": 0.9215243902439024, "grad_norm": 0.8060317039489746, "learning_rate": 1.3856504065040653e-05, "loss": 0.0336, "step": 75565 }, { "epoch": 0.9215853658536586, "grad_norm": 1.0267130136489868, "learning_rate": 1.385609756097561e-05, "loss": 0.0377, "step": 75570 }, { "epoch": 0.9216463414634146, "grad_norm": 0.3859793543815613, "learning_rate": 1.3855691056910569e-05, "loss": 0.0484, "step": 75575 }, { "epoch": 0.9217073170731708, "grad_norm": 0.5375170111656189, "learning_rate": 1.3855284552845529e-05, "loss": 0.0546, "step": 75580 }, { "epoch": 0.9217682926829268, "grad_norm": 0.7333572506904602, "learning_rate": 1.3854878048780488e-05, "loss": 0.0207, "step": 75585 }, { "epoch": 0.921829268292683, "grad_norm": 0.4686834216117859, "learning_rate": 1.3854471544715448e-05, "loss": 0.0435, "step": 75590 }, { "epoch": 0.921890243902439, "grad_norm": 1.5393415689468384, "learning_rate": 1.3854065040650408e-05, "loss": 0.039, "step": 75595 }, { "epoch": 0.9219512195121952, "grad_norm": 0.4781336188316345, "learning_rate": 1.3853658536585366e-05, "loss": 0.0333, "step": 75600 }, { "epoch": 0.9220121951219512, "grad_norm": 0.9120156764984131, "learning_rate": 1.3853252032520326e-05, "loss": 0.0509, "step": 75605 }, { "epoch": 0.9220731707317074, "grad_norm": 0.530695915222168, "learning_rate": 1.3852845528455286e-05, "loss": 0.0267, "step": 75610 }, { "epoch": 0.9221341463414634, "grad_norm": 0.8108497262001038, "learning_rate": 1.3852439024390246e-05, "loss": 0.0495, "step": 75615 }, { "epoch": 0.9221951219512196, "grad_norm": 0.8386637568473816, "learning_rate": 1.3852032520325206e-05, "loss": 0.06, "step": 75620 }, { "epoch": 0.9222560975609756, "grad_norm": 0.5995069742202759, "learning_rate": 1.3851626016260165e-05, "loss": 0.0437, "step": 75625 }, { "epoch": 0.9223170731707317, "grad_norm": 1.0373430252075195, "learning_rate": 1.3851219512195122e-05, "loss": 0.0383, "step": 75630 }, { "epoch": 0.9223780487804878, "grad_norm": 0.9250533580780029, "learning_rate": 1.3850813008130082e-05, "loss": 0.0292, "step": 75635 }, { "epoch": 0.922439024390244, "grad_norm": 0.6795925498008728, "learning_rate": 1.3850406504065041e-05, "loss": 0.0574, "step": 75640 }, { "epoch": 0.9225, "grad_norm": 0.8286501169204712, "learning_rate": 1.3850000000000001e-05, "loss": 0.043, "step": 75645 }, { "epoch": 0.9225609756097561, "grad_norm": 0.6663788557052612, "learning_rate": 1.3849593495934961e-05, "loss": 0.0276, "step": 75650 }, { "epoch": 0.9226219512195122, "grad_norm": 0.35324856638908386, "learning_rate": 1.3849186991869921e-05, "loss": 0.0198, "step": 75655 }, { "epoch": 0.9226829268292683, "grad_norm": 0.36985716223716736, "learning_rate": 1.3848780487804879e-05, "loss": 0.0383, "step": 75660 }, { "epoch": 0.9227439024390244, "grad_norm": 0.39318153262138367, "learning_rate": 1.3848373983739837e-05, "loss": 0.0462, "step": 75665 }, { "epoch": 0.9228048780487805, "grad_norm": 0.29887551069259644, "learning_rate": 1.3847967479674797e-05, "loss": 0.048, "step": 75670 }, { "epoch": 0.9228658536585366, "grad_norm": 2.291551351547241, "learning_rate": 1.3847560975609757e-05, "loss": 0.056, "step": 75675 }, { "epoch": 0.9229268292682927, "grad_norm": 0.469570130109787, "learning_rate": 1.3847154471544717e-05, "loss": 0.0473, "step": 75680 }, { "epoch": 0.9229878048780488, "grad_norm": 0.3530357778072357, "learning_rate": 1.3846747967479676e-05, "loss": 0.0547, "step": 75685 }, { "epoch": 0.9230487804878049, "grad_norm": 0.42339932918548584, "learning_rate": 1.3846341463414635e-05, "loss": 0.0242, "step": 75690 }, { "epoch": 0.923109756097561, "grad_norm": 1.251155138015747, "learning_rate": 1.3845934959349594e-05, "loss": 0.058, "step": 75695 }, { "epoch": 0.9231707317073171, "grad_norm": 0.7247166037559509, "learning_rate": 1.3845528455284554e-05, "loss": 0.043, "step": 75700 }, { "epoch": 0.9232317073170732, "grad_norm": 0.3476827144622803, "learning_rate": 1.3845121951219514e-05, "loss": 0.0381, "step": 75705 }, { "epoch": 0.9232926829268293, "grad_norm": 0.631549596786499, "learning_rate": 1.3844715447154474e-05, "loss": 0.0265, "step": 75710 }, { "epoch": 0.9233536585365854, "grad_norm": 0.34037983417510986, "learning_rate": 1.3844308943089434e-05, "loss": 0.0414, "step": 75715 }, { "epoch": 0.9234146341463415, "grad_norm": 0.7913196682929993, "learning_rate": 1.384390243902439e-05, "loss": 0.0353, "step": 75720 }, { "epoch": 0.9234756097560975, "grad_norm": 0.3466692864894867, "learning_rate": 1.384349593495935e-05, "loss": 0.0388, "step": 75725 }, { "epoch": 0.9235365853658537, "grad_norm": 0.6647277474403381, "learning_rate": 1.384308943089431e-05, "loss": 0.0396, "step": 75730 }, { "epoch": 0.9235975609756097, "grad_norm": 0.8140819668769836, "learning_rate": 1.384268292682927e-05, "loss": 0.0573, "step": 75735 }, { "epoch": 0.9236585365853659, "grad_norm": 0.8743356466293335, "learning_rate": 1.384227642276423e-05, "loss": 0.0241, "step": 75740 }, { "epoch": 0.9237195121951219, "grad_norm": 0.60419762134552, "learning_rate": 1.384186991869919e-05, "loss": 0.0356, "step": 75745 }, { "epoch": 0.9237804878048781, "grad_norm": 0.3283499479293823, "learning_rate": 1.3841463414634146e-05, "loss": 0.0347, "step": 75750 }, { "epoch": 0.9238414634146341, "grad_norm": 0.5976677536964417, "learning_rate": 1.3841056910569105e-05, "loss": 0.0218, "step": 75755 }, { "epoch": 0.9239024390243903, "grad_norm": 0.6068081259727478, "learning_rate": 1.3840650406504065e-05, "loss": 0.0429, "step": 75760 }, { "epoch": 0.9239634146341463, "grad_norm": 0.4579721987247467, "learning_rate": 1.3840243902439025e-05, "loss": 0.046, "step": 75765 }, { "epoch": 0.9240243902439025, "grad_norm": 0.6119675636291504, "learning_rate": 1.3839837398373985e-05, "loss": 0.0226, "step": 75770 }, { "epoch": 0.9240853658536585, "grad_norm": 0.978355884552002, "learning_rate": 1.3839430894308945e-05, "loss": 0.0337, "step": 75775 }, { "epoch": 0.9241463414634147, "grad_norm": 0.48007732629776, "learning_rate": 1.3839024390243903e-05, "loss": 0.0256, "step": 75780 }, { "epoch": 0.9242073170731707, "grad_norm": 1.257470965385437, "learning_rate": 1.3838617886178863e-05, "loss": 0.0747, "step": 75785 }, { "epoch": 0.9242682926829269, "grad_norm": 0.561923623085022, "learning_rate": 1.3838211382113823e-05, "loss": 0.0846, "step": 75790 }, { "epoch": 0.9243292682926829, "grad_norm": 0.4242507517337799, "learning_rate": 1.3837804878048782e-05, "loss": 0.0304, "step": 75795 }, { "epoch": 0.9243902439024391, "grad_norm": 0.2209036499261856, "learning_rate": 1.3837398373983742e-05, "loss": 0.0208, "step": 75800 }, { "epoch": 0.9244512195121951, "grad_norm": 0.5026652216911316, "learning_rate": 1.3836991869918702e-05, "loss": 0.0846, "step": 75805 }, { "epoch": 0.9245121951219513, "grad_norm": 0.4568135738372803, "learning_rate": 1.3836585365853658e-05, "loss": 0.0437, "step": 75810 }, { "epoch": 0.9245731707317073, "grad_norm": 0.33448901772499084, "learning_rate": 1.3836178861788618e-05, "loss": 0.0388, "step": 75815 }, { "epoch": 0.9246341463414635, "grad_norm": 0.9635494351387024, "learning_rate": 1.3835772357723578e-05, "loss": 0.047, "step": 75820 }, { "epoch": 0.9246951219512195, "grad_norm": 0.5851154327392578, "learning_rate": 1.3835365853658538e-05, "loss": 0.0181, "step": 75825 }, { "epoch": 0.9247560975609757, "grad_norm": 0.4571705162525177, "learning_rate": 1.3834959349593498e-05, "loss": 0.0467, "step": 75830 }, { "epoch": 0.9248170731707317, "grad_norm": 0.43643462657928467, "learning_rate": 1.3834552845528458e-05, "loss": 0.0486, "step": 75835 }, { "epoch": 0.9248780487804878, "grad_norm": 1.0040802955627441, "learning_rate": 1.3834146341463414e-05, "loss": 0.0536, "step": 75840 }, { "epoch": 0.9249390243902439, "grad_norm": 0.6850164532661438, "learning_rate": 1.3833739837398374e-05, "loss": 0.0549, "step": 75845 }, { "epoch": 0.925, "grad_norm": 0.40959542989730835, "learning_rate": 1.3833333333333334e-05, "loss": 0.0222, "step": 75850 }, { "epoch": 0.9250609756097561, "grad_norm": 0.4588019847869873, "learning_rate": 1.3832926829268293e-05, "loss": 0.056, "step": 75855 }, { "epoch": 0.9251219512195122, "grad_norm": 0.7088348269462585, "learning_rate": 1.3832520325203253e-05, "loss": 0.0497, "step": 75860 }, { "epoch": 0.9251829268292683, "grad_norm": 0.620692789554596, "learning_rate": 1.3832113821138213e-05, "loss": 0.0378, "step": 75865 }, { "epoch": 0.9252439024390244, "grad_norm": 0.3739624321460724, "learning_rate": 1.3831707317073171e-05, "loss": 0.0239, "step": 75870 }, { "epoch": 0.9253048780487805, "grad_norm": 0.44184231758117676, "learning_rate": 1.3831300813008131e-05, "loss": 0.0282, "step": 75875 }, { "epoch": 0.9253658536585366, "grad_norm": 0.733032763004303, "learning_rate": 1.383089430894309e-05, "loss": 0.0503, "step": 75880 }, { "epoch": 0.9254268292682927, "grad_norm": 3.9248104095458984, "learning_rate": 1.383048780487805e-05, "loss": 0.0258, "step": 75885 }, { "epoch": 0.9254878048780488, "grad_norm": 0.27628079056739807, "learning_rate": 1.383008130081301e-05, "loss": 0.0409, "step": 75890 }, { "epoch": 0.9255487804878049, "grad_norm": 0.5726205706596375, "learning_rate": 1.3829674796747969e-05, "loss": 0.0417, "step": 75895 }, { "epoch": 0.925609756097561, "grad_norm": 0.6730140447616577, "learning_rate": 1.3829268292682927e-05, "loss": 0.032, "step": 75900 }, { "epoch": 0.9256707317073171, "grad_norm": 0.52579265832901, "learning_rate": 1.3828861788617887e-05, "loss": 0.0654, "step": 75905 }, { "epoch": 0.9257317073170732, "grad_norm": 0.7193120121955872, "learning_rate": 1.3828455284552846e-05, "loss": 0.0554, "step": 75910 }, { "epoch": 0.9257926829268293, "grad_norm": 0.6794463396072388, "learning_rate": 1.3828048780487806e-05, "loss": 0.065, "step": 75915 }, { "epoch": 0.9258536585365854, "grad_norm": 0.32312315702438354, "learning_rate": 1.3827642276422766e-05, "loss": 0.0562, "step": 75920 }, { "epoch": 0.9259146341463415, "grad_norm": 0.3535279631614685, "learning_rate": 1.3827235772357726e-05, "loss": 0.0276, "step": 75925 }, { "epoch": 0.9259756097560976, "grad_norm": 0.1845298558473587, "learning_rate": 1.3826829268292682e-05, "loss": 0.0371, "step": 75930 }, { "epoch": 0.9260365853658536, "grad_norm": 0.5093113780021667, "learning_rate": 1.3826422764227642e-05, "loss": 0.0368, "step": 75935 }, { "epoch": 0.9260975609756098, "grad_norm": 1.2318978309631348, "learning_rate": 1.3826016260162602e-05, "loss": 0.0369, "step": 75940 }, { "epoch": 0.9261585365853658, "grad_norm": 0.7394647002220154, "learning_rate": 1.3825609756097562e-05, "loss": 0.0865, "step": 75945 }, { "epoch": 0.926219512195122, "grad_norm": 0.37189456820487976, "learning_rate": 1.3825203252032522e-05, "loss": 0.0362, "step": 75950 }, { "epoch": 0.926280487804878, "grad_norm": 0.4509885609149933, "learning_rate": 1.3824796747967481e-05, "loss": 0.0379, "step": 75955 }, { "epoch": 0.9263414634146342, "grad_norm": 0.43430644273757935, "learning_rate": 1.382439024390244e-05, "loss": 0.0578, "step": 75960 }, { "epoch": 0.9264024390243902, "grad_norm": 0.8190406560897827, "learning_rate": 1.38239837398374e-05, "loss": 0.0451, "step": 75965 }, { "epoch": 0.9264634146341464, "grad_norm": 0.152433842420578, "learning_rate": 1.3823577235772359e-05, "loss": 0.0291, "step": 75970 }, { "epoch": 0.9265243902439024, "grad_norm": 0.5742034912109375, "learning_rate": 1.3823170731707319e-05, "loss": 0.031, "step": 75975 }, { "epoch": 0.9265853658536586, "grad_norm": 0.5142594575881958, "learning_rate": 1.3822764227642279e-05, "loss": 0.0377, "step": 75980 }, { "epoch": 0.9266463414634146, "grad_norm": 0.4749523401260376, "learning_rate": 1.3822357723577237e-05, "loss": 0.0949, "step": 75985 }, { "epoch": 0.9267073170731708, "grad_norm": 0.5868616700172424, "learning_rate": 1.3821951219512195e-05, "loss": 0.0339, "step": 75990 }, { "epoch": 0.9267682926829268, "grad_norm": 0.40050211548805237, "learning_rate": 1.3821544715447155e-05, "loss": 0.0302, "step": 75995 }, { "epoch": 0.926829268292683, "grad_norm": 0.5366073846817017, "learning_rate": 1.3821138211382115e-05, "loss": 0.0274, "step": 76000 }, { "epoch": 0.926890243902439, "grad_norm": 0.19185784459114075, "learning_rate": 1.3820731707317075e-05, "loss": 0.0335, "step": 76005 }, { "epoch": 0.9269512195121952, "grad_norm": 0.7671518325805664, "learning_rate": 1.3820325203252034e-05, "loss": 0.0367, "step": 76010 }, { "epoch": 0.9270121951219512, "grad_norm": 0.8358582258224487, "learning_rate": 1.3819918699186994e-05, "loss": 0.0667, "step": 76015 }, { "epoch": 0.9270731707317074, "grad_norm": 0.429777592420578, "learning_rate": 1.381951219512195e-05, "loss": 0.0386, "step": 76020 }, { "epoch": 0.9271341463414634, "grad_norm": 0.42262622714042664, "learning_rate": 1.381910569105691e-05, "loss": 0.0677, "step": 76025 }, { "epoch": 0.9271951219512196, "grad_norm": 0.36855462193489075, "learning_rate": 1.381869918699187e-05, "loss": 0.0728, "step": 76030 }, { "epoch": 0.9272560975609756, "grad_norm": 0.5603023171424866, "learning_rate": 1.381829268292683e-05, "loss": 0.0229, "step": 76035 }, { "epoch": 0.9273170731707318, "grad_norm": 0.7656720280647278, "learning_rate": 1.381788617886179e-05, "loss": 0.0901, "step": 76040 }, { "epoch": 0.9273780487804878, "grad_norm": 0.3522584140300751, "learning_rate": 1.381747967479675e-05, "loss": 0.0345, "step": 76045 }, { "epoch": 0.927439024390244, "grad_norm": 0.5233808159828186, "learning_rate": 1.3817073170731708e-05, "loss": 0.028, "step": 76050 }, { "epoch": 0.9275, "grad_norm": 0.3322969973087311, "learning_rate": 1.3816666666666668e-05, "loss": 0.0228, "step": 76055 }, { "epoch": 0.9275609756097561, "grad_norm": 0.6919727921485901, "learning_rate": 1.3816260162601627e-05, "loss": 0.0368, "step": 76060 }, { "epoch": 0.9276219512195122, "grad_norm": 0.32681846618652344, "learning_rate": 1.3815853658536587e-05, "loss": 0.0327, "step": 76065 }, { "epoch": 0.9276829268292683, "grad_norm": 0.1346009373664856, "learning_rate": 1.3815447154471547e-05, "loss": 0.0308, "step": 76070 }, { "epoch": 0.9277439024390244, "grad_norm": 0.5235316157341003, "learning_rate": 1.3815040650406505e-05, "loss": 0.0416, "step": 76075 }, { "epoch": 0.9278048780487805, "grad_norm": 0.45751821994781494, "learning_rate": 1.3814634146341463e-05, "loss": 0.0214, "step": 76080 }, { "epoch": 0.9278658536585366, "grad_norm": 0.7384775280952454, "learning_rate": 1.3814227642276423e-05, "loss": 0.0231, "step": 76085 }, { "epoch": 0.9279268292682927, "grad_norm": 0.3474341034889221, "learning_rate": 1.3813821138211383e-05, "loss": 0.0381, "step": 76090 }, { "epoch": 0.9279878048780488, "grad_norm": 0.3694309890270233, "learning_rate": 1.3813414634146343e-05, "loss": 0.041, "step": 76095 }, { "epoch": 0.9280487804878049, "grad_norm": 0.6592804193496704, "learning_rate": 1.3813008130081303e-05, "loss": 0.0369, "step": 76100 }, { "epoch": 0.928109756097561, "grad_norm": 0.7664508819580078, "learning_rate": 1.3812601626016262e-05, "loss": 0.0296, "step": 76105 }, { "epoch": 0.9281707317073171, "grad_norm": 0.6623284816741943, "learning_rate": 1.3812195121951219e-05, "loss": 0.0476, "step": 76110 }, { "epoch": 0.9282317073170732, "grad_norm": 0.6203131079673767, "learning_rate": 1.3811788617886179e-05, "loss": 0.0447, "step": 76115 }, { "epoch": 0.9282926829268293, "grad_norm": 0.4017268419265747, "learning_rate": 1.3811382113821139e-05, "loss": 0.0269, "step": 76120 }, { "epoch": 0.9283536585365854, "grad_norm": 0.5199465751647949, "learning_rate": 1.3810975609756098e-05, "loss": 0.031, "step": 76125 }, { "epoch": 0.9284146341463415, "grad_norm": 0.9674933552742004, "learning_rate": 1.3810569105691058e-05, "loss": 0.0358, "step": 76130 }, { "epoch": 0.9284756097560976, "grad_norm": 0.477788507938385, "learning_rate": 1.3810162601626018e-05, "loss": 0.0602, "step": 76135 }, { "epoch": 0.9285365853658537, "grad_norm": 0.1952485293149948, "learning_rate": 1.3809756097560976e-05, "loss": 0.0347, "step": 76140 }, { "epoch": 0.9285975609756097, "grad_norm": 1.7283533811569214, "learning_rate": 1.3809349593495936e-05, "loss": 0.0787, "step": 76145 }, { "epoch": 0.9286585365853659, "grad_norm": 0.6290341019630432, "learning_rate": 1.3808943089430896e-05, "loss": 0.0509, "step": 76150 }, { "epoch": 0.9287195121951219, "grad_norm": 0.4264102578163147, "learning_rate": 1.3808536585365856e-05, "loss": 0.0594, "step": 76155 }, { "epoch": 0.9287804878048781, "grad_norm": 0.471725195646286, "learning_rate": 1.3808130081300814e-05, "loss": 0.0331, "step": 76160 }, { "epoch": 0.9288414634146341, "grad_norm": 0.8646591901779175, "learning_rate": 1.3807723577235774e-05, "loss": 0.0288, "step": 76165 }, { "epoch": 0.9289024390243903, "grad_norm": 1.0412806272506714, "learning_rate": 1.3807317073170732e-05, "loss": 0.0384, "step": 76170 }, { "epoch": 0.9289634146341463, "grad_norm": 0.40461671352386475, "learning_rate": 1.3806910569105692e-05, "loss": 0.0661, "step": 76175 }, { "epoch": 0.9290243902439025, "grad_norm": 0.4169674813747406, "learning_rate": 1.3806504065040651e-05, "loss": 0.0363, "step": 76180 }, { "epoch": 0.9290853658536585, "grad_norm": 0.5350891351699829, "learning_rate": 1.3806097560975611e-05, "loss": 0.0273, "step": 76185 }, { "epoch": 0.9291463414634147, "grad_norm": 0.8037227392196655, "learning_rate": 1.3805691056910571e-05, "loss": 0.0747, "step": 76190 }, { "epoch": 0.9292073170731707, "grad_norm": 0.08540602773427963, "learning_rate": 1.380528455284553e-05, "loss": 0.0297, "step": 76195 }, { "epoch": 0.9292682926829269, "grad_norm": 0.3569529354572296, "learning_rate": 1.3804878048780487e-05, "loss": 0.0156, "step": 76200 }, { "epoch": 0.9293292682926829, "grad_norm": 0.5519914627075195, "learning_rate": 1.3804471544715447e-05, "loss": 0.0258, "step": 76205 }, { "epoch": 0.9293902439024391, "grad_norm": 0.5955744981765747, "learning_rate": 1.3804065040650407e-05, "loss": 0.0309, "step": 76210 }, { "epoch": 0.9294512195121951, "grad_norm": 0.36472710967063904, "learning_rate": 1.3803658536585367e-05, "loss": 0.026, "step": 76215 }, { "epoch": 0.9295121951219513, "grad_norm": 0.3799985647201538, "learning_rate": 1.3803252032520327e-05, "loss": 0.0305, "step": 76220 }, { "epoch": 0.9295731707317073, "grad_norm": 0.4064345061779022, "learning_rate": 1.3802845528455286e-05, "loss": 0.0186, "step": 76225 }, { "epoch": 0.9296341463414635, "grad_norm": 0.4862210750579834, "learning_rate": 1.3802439024390244e-05, "loss": 0.0425, "step": 76230 }, { "epoch": 0.9296951219512195, "grad_norm": 1.1605132818222046, "learning_rate": 1.3802032520325204e-05, "loss": 0.0414, "step": 76235 }, { "epoch": 0.9297560975609757, "grad_norm": 0.6431221961975098, "learning_rate": 1.3801626016260164e-05, "loss": 0.0374, "step": 76240 }, { "epoch": 0.9298170731707317, "grad_norm": 0.7102773189544678, "learning_rate": 1.3801219512195124e-05, "loss": 0.0249, "step": 76245 }, { "epoch": 0.9298780487804879, "grad_norm": 0.4189704358577728, "learning_rate": 1.3800813008130082e-05, "loss": 0.0198, "step": 76250 }, { "epoch": 0.9299390243902439, "grad_norm": 0.162499338388443, "learning_rate": 1.3800406504065042e-05, "loss": 0.0341, "step": 76255 }, { "epoch": 0.93, "grad_norm": 0.847577691078186, "learning_rate": 1.38e-05, "loss": 0.0414, "step": 76260 }, { "epoch": 0.9300609756097561, "grad_norm": 0.309407114982605, "learning_rate": 1.379959349593496e-05, "loss": 0.0434, "step": 76265 }, { "epoch": 0.9301219512195122, "grad_norm": 0.4687265157699585, "learning_rate": 1.379918699186992e-05, "loss": 0.034, "step": 76270 }, { "epoch": 0.9301829268292683, "grad_norm": 0.4542620778083801, "learning_rate": 1.379878048780488e-05, "loss": 0.0339, "step": 76275 }, { "epoch": 0.9302439024390244, "grad_norm": 0.5468671917915344, "learning_rate": 1.379837398373984e-05, "loss": 0.0239, "step": 76280 }, { "epoch": 0.9303048780487805, "grad_norm": 0.765420138835907, "learning_rate": 1.3797967479674799e-05, "loss": 0.03, "step": 76285 }, { "epoch": 0.9303658536585366, "grad_norm": 0.17028045654296875, "learning_rate": 1.3797560975609756e-05, "loss": 0.0432, "step": 76290 }, { "epoch": 0.9304268292682927, "grad_norm": 0.4863396883010864, "learning_rate": 1.3797154471544715e-05, "loss": 0.0311, "step": 76295 }, { "epoch": 0.9304878048780488, "grad_norm": 0.5236272215843201, "learning_rate": 1.3796747967479675e-05, "loss": 0.0398, "step": 76300 }, { "epoch": 0.9305487804878049, "grad_norm": 0.47314831614494324, "learning_rate": 1.3796341463414635e-05, "loss": 0.0347, "step": 76305 }, { "epoch": 0.930609756097561, "grad_norm": 0.8669286370277405, "learning_rate": 1.3795934959349595e-05, "loss": 0.0518, "step": 76310 }, { "epoch": 0.9306707317073171, "grad_norm": 0.925751805305481, "learning_rate": 1.3795528455284555e-05, "loss": 0.0278, "step": 76315 }, { "epoch": 0.9307317073170732, "grad_norm": 0.3825482130050659, "learning_rate": 1.3795121951219513e-05, "loss": 0.0416, "step": 76320 }, { "epoch": 0.9307926829268293, "grad_norm": 0.6622867584228516, "learning_rate": 1.3794715447154473e-05, "loss": 0.0631, "step": 76325 }, { "epoch": 0.9308536585365854, "grad_norm": 0.310522198677063, "learning_rate": 1.3794308943089432e-05, "loss": 0.0415, "step": 76330 }, { "epoch": 0.9309146341463415, "grad_norm": 0.5223378539085388, "learning_rate": 1.3793902439024392e-05, "loss": 0.0327, "step": 76335 }, { "epoch": 0.9309756097560976, "grad_norm": 1.6242194175720215, "learning_rate": 1.379349593495935e-05, "loss": 0.0748, "step": 76340 }, { "epoch": 0.9310365853658537, "grad_norm": 0.5020418167114258, "learning_rate": 1.379308943089431e-05, "loss": 0.0624, "step": 76345 }, { "epoch": 0.9310975609756098, "grad_norm": 0.4992745518684387, "learning_rate": 1.3792682926829268e-05, "loss": 0.0345, "step": 76350 }, { "epoch": 0.9311585365853658, "grad_norm": 0.25193318724632263, "learning_rate": 1.3792276422764228e-05, "loss": 0.0488, "step": 76355 }, { "epoch": 0.931219512195122, "grad_norm": 0.8316398859024048, "learning_rate": 1.3791869918699188e-05, "loss": 0.0403, "step": 76360 }, { "epoch": 0.931280487804878, "grad_norm": 0.5014843344688416, "learning_rate": 1.3791463414634148e-05, "loss": 0.0465, "step": 76365 }, { "epoch": 0.9313414634146342, "grad_norm": 0.3442976474761963, "learning_rate": 1.3791056910569108e-05, "loss": 0.0265, "step": 76370 }, { "epoch": 0.9314024390243902, "grad_norm": 0.30356889963150024, "learning_rate": 1.3790650406504067e-05, "loss": 0.0495, "step": 76375 }, { "epoch": 0.9314634146341464, "grad_norm": 0.6077677607536316, "learning_rate": 1.3790243902439024e-05, "loss": 0.0622, "step": 76380 }, { "epoch": 0.9315243902439024, "grad_norm": 0.8072797656059265, "learning_rate": 1.3789837398373984e-05, "loss": 0.0613, "step": 76385 }, { "epoch": 0.9315853658536586, "grad_norm": 0.5776717066764832, "learning_rate": 1.3789430894308944e-05, "loss": 0.0481, "step": 76390 }, { "epoch": 0.9316463414634146, "grad_norm": 0.5965726375579834, "learning_rate": 1.3789024390243903e-05, "loss": 0.0281, "step": 76395 }, { "epoch": 0.9317073170731708, "grad_norm": 0.5421350002288818, "learning_rate": 1.3788617886178863e-05, "loss": 0.0267, "step": 76400 }, { "epoch": 0.9317682926829268, "grad_norm": 0.38432368636131287, "learning_rate": 1.3788211382113823e-05, "loss": 0.0318, "step": 76405 }, { "epoch": 0.931829268292683, "grad_norm": 0.5183431506156921, "learning_rate": 1.3787804878048781e-05, "loss": 0.0472, "step": 76410 }, { "epoch": 0.931890243902439, "grad_norm": 1.1059454679489136, "learning_rate": 1.3787398373983741e-05, "loss": 0.0338, "step": 76415 }, { "epoch": 0.9319512195121952, "grad_norm": 0.4385720491409302, "learning_rate": 1.37869918699187e-05, "loss": 0.044, "step": 76420 }, { "epoch": 0.9320121951219512, "grad_norm": 0.8429983258247375, "learning_rate": 1.3786585365853659e-05, "loss": 0.0458, "step": 76425 }, { "epoch": 0.9320731707317074, "grad_norm": 0.5572999119758606, "learning_rate": 1.3786178861788619e-05, "loss": 0.0332, "step": 76430 }, { "epoch": 0.9321341463414634, "grad_norm": 0.7781212329864502, "learning_rate": 1.3785772357723579e-05, "loss": 0.0394, "step": 76435 }, { "epoch": 0.9321951219512196, "grad_norm": 0.6936108469963074, "learning_rate": 1.3785365853658537e-05, "loss": 0.055, "step": 76440 }, { "epoch": 0.9322560975609756, "grad_norm": 0.6850804686546326, "learning_rate": 1.3784959349593496e-05, "loss": 0.051, "step": 76445 }, { "epoch": 0.9323170731707318, "grad_norm": 0.952987551689148, "learning_rate": 1.3784552845528456e-05, "loss": 0.0652, "step": 76450 }, { "epoch": 0.9323780487804878, "grad_norm": 0.31956803798675537, "learning_rate": 1.3784146341463416e-05, "loss": 0.0361, "step": 76455 }, { "epoch": 0.932439024390244, "grad_norm": 0.27409395575523376, "learning_rate": 1.3783739837398376e-05, "loss": 0.0515, "step": 76460 }, { "epoch": 0.9325, "grad_norm": 0.5124744772911072, "learning_rate": 1.3783333333333336e-05, "loss": 0.0567, "step": 76465 }, { "epoch": 0.9325609756097561, "grad_norm": 1.581260085105896, "learning_rate": 1.3782926829268292e-05, "loss": 0.1179, "step": 76470 }, { "epoch": 0.9326219512195122, "grad_norm": 0.5702218413352966, "learning_rate": 1.3782520325203252e-05, "loss": 0.0263, "step": 76475 }, { "epoch": 0.9326829268292683, "grad_norm": 0.7867305874824524, "learning_rate": 1.3782113821138212e-05, "loss": 0.063, "step": 76480 }, { "epoch": 0.9327439024390244, "grad_norm": 0.6755234599113464, "learning_rate": 1.3781707317073172e-05, "loss": 0.0419, "step": 76485 }, { "epoch": 0.9328048780487805, "grad_norm": 0.527057409286499, "learning_rate": 1.3781300813008131e-05, "loss": 0.061, "step": 76490 }, { "epoch": 0.9328658536585366, "grad_norm": 0.3191658854484558, "learning_rate": 1.3780894308943091e-05, "loss": 0.0189, "step": 76495 }, { "epoch": 0.9329268292682927, "grad_norm": 0.30190765857696533, "learning_rate": 1.378048780487805e-05, "loss": 0.0274, "step": 76500 }, { "epoch": 0.9329878048780488, "grad_norm": 0.29478350281715393, "learning_rate": 1.378008130081301e-05, "loss": 0.0687, "step": 76505 }, { "epoch": 0.9330487804878049, "grad_norm": 0.5131024122238159, "learning_rate": 1.3779674796747969e-05, "loss": 0.0603, "step": 76510 }, { "epoch": 0.933109756097561, "grad_norm": 0.5515013933181763, "learning_rate": 1.3779268292682927e-05, "loss": 0.0804, "step": 76515 }, { "epoch": 0.9331707317073171, "grad_norm": 0.5088929533958435, "learning_rate": 1.3778861788617887e-05, "loss": 0.0332, "step": 76520 }, { "epoch": 0.9332317073170732, "grad_norm": 0.38133811950683594, "learning_rate": 1.3778455284552847e-05, "loss": 0.0566, "step": 76525 }, { "epoch": 0.9332926829268293, "grad_norm": 0.4226362407207489, "learning_rate": 1.3778048780487805e-05, "loss": 0.0311, "step": 76530 }, { "epoch": 0.9333536585365854, "grad_norm": 0.48286929726600647, "learning_rate": 1.3777642276422765e-05, "loss": 0.0372, "step": 76535 }, { "epoch": 0.9334146341463415, "grad_norm": 0.7421911954879761, "learning_rate": 1.3777235772357725e-05, "loss": 0.0329, "step": 76540 }, { "epoch": 0.9334756097560976, "grad_norm": 0.47563183307647705, "learning_rate": 1.3776829268292684e-05, "loss": 0.0363, "step": 76545 }, { "epoch": 0.9335365853658537, "grad_norm": 0.645386815071106, "learning_rate": 1.3776422764227644e-05, "loss": 0.0411, "step": 76550 }, { "epoch": 0.9335975609756098, "grad_norm": 0.6209370493888855, "learning_rate": 1.3776016260162604e-05, "loss": 0.048, "step": 76555 }, { "epoch": 0.9336585365853659, "grad_norm": 1.126238226890564, "learning_rate": 1.377560975609756e-05, "loss": 0.0292, "step": 76560 }, { "epoch": 0.933719512195122, "grad_norm": 0.664205014705658, "learning_rate": 1.377520325203252e-05, "loss": 0.0327, "step": 76565 }, { "epoch": 0.9337804878048781, "grad_norm": 0.3154158592224121, "learning_rate": 1.377479674796748e-05, "loss": 0.0199, "step": 76570 }, { "epoch": 0.9338414634146341, "grad_norm": 0.2894608676433563, "learning_rate": 1.377439024390244e-05, "loss": 0.0481, "step": 76575 }, { "epoch": 0.9339024390243903, "grad_norm": 0.37404686212539673, "learning_rate": 1.37739837398374e-05, "loss": 0.0185, "step": 76580 }, { "epoch": 0.9339634146341463, "grad_norm": 0.23403768241405487, "learning_rate": 1.377357723577236e-05, "loss": 0.0593, "step": 76585 }, { "epoch": 0.9340243902439025, "grad_norm": 0.8017391562461853, "learning_rate": 1.3773170731707318e-05, "loss": 0.0332, "step": 76590 }, { "epoch": 0.9340853658536585, "grad_norm": 0.7447320818901062, "learning_rate": 1.3772764227642278e-05, "loss": 0.0467, "step": 76595 }, { "epoch": 0.9341463414634147, "grad_norm": 0.5523407459259033, "learning_rate": 1.3772357723577236e-05, "loss": 0.0578, "step": 76600 }, { "epoch": 0.9342073170731707, "grad_norm": 0.8466123938560486, "learning_rate": 1.3771951219512196e-05, "loss": 0.0479, "step": 76605 }, { "epoch": 0.9342682926829269, "grad_norm": 0.7843273878097534, "learning_rate": 1.3771544715447155e-05, "loss": 0.04, "step": 76610 }, { "epoch": 0.9343292682926829, "grad_norm": 0.4715442955493927, "learning_rate": 1.3771138211382115e-05, "loss": 0.0443, "step": 76615 }, { "epoch": 0.9343902439024391, "grad_norm": 0.3994670808315277, "learning_rate": 1.3770731707317073e-05, "loss": 0.0198, "step": 76620 }, { "epoch": 0.9344512195121951, "grad_norm": 0.7145517468452454, "learning_rate": 1.3770325203252033e-05, "loss": 0.0362, "step": 76625 }, { "epoch": 0.9345121951219513, "grad_norm": 0.4514956474304199, "learning_rate": 1.3769918699186993e-05, "loss": 0.0372, "step": 76630 }, { "epoch": 0.9345731707317073, "grad_norm": 0.6602538824081421, "learning_rate": 1.3769512195121953e-05, "loss": 0.0354, "step": 76635 }, { "epoch": 0.9346341463414635, "grad_norm": 0.6811226010322571, "learning_rate": 1.3769105691056913e-05, "loss": 0.0313, "step": 76640 }, { "epoch": 0.9346951219512195, "grad_norm": 0.4049362242221832, "learning_rate": 1.3768699186991872e-05, "loss": 0.0609, "step": 76645 }, { "epoch": 0.9347560975609757, "grad_norm": 0.14553427696228027, "learning_rate": 1.3768292682926829e-05, "loss": 0.0443, "step": 76650 }, { "epoch": 0.9348170731707317, "grad_norm": 0.7693884968757629, "learning_rate": 1.3767886178861789e-05, "loss": 0.0337, "step": 76655 }, { "epoch": 0.9348780487804879, "grad_norm": 0.31390810012817383, "learning_rate": 1.3767479674796748e-05, "loss": 0.0373, "step": 76660 }, { "epoch": 0.9349390243902439, "grad_norm": 0.4732571244239807, "learning_rate": 1.3767073170731708e-05, "loss": 0.0655, "step": 76665 }, { "epoch": 0.935, "grad_norm": 0.7757952213287354, "learning_rate": 1.3766666666666668e-05, "loss": 0.0941, "step": 76670 }, { "epoch": 0.9350609756097561, "grad_norm": 0.2725866436958313, "learning_rate": 1.3766260162601628e-05, "loss": 0.0605, "step": 76675 }, { "epoch": 0.9351219512195122, "grad_norm": 0.5662636160850525, "learning_rate": 1.3765853658536586e-05, "loss": 0.0374, "step": 76680 }, { "epoch": 0.9351829268292683, "grad_norm": 0.4495216906070709, "learning_rate": 1.3765447154471546e-05, "loss": 0.0369, "step": 76685 }, { "epoch": 0.9352439024390244, "grad_norm": 0.5970337986946106, "learning_rate": 1.3765040650406504e-05, "loss": 0.0372, "step": 76690 }, { "epoch": 0.9353048780487805, "grad_norm": 0.41200128197669983, "learning_rate": 1.3764634146341464e-05, "loss": 0.0405, "step": 76695 }, { "epoch": 0.9353658536585366, "grad_norm": 1.2143727540969849, "learning_rate": 1.3764227642276424e-05, "loss": 0.0465, "step": 76700 }, { "epoch": 0.9354268292682927, "grad_norm": 0.5902882814407349, "learning_rate": 1.3763821138211383e-05, "loss": 0.0361, "step": 76705 }, { "epoch": 0.9354878048780488, "grad_norm": 0.7791754007339478, "learning_rate": 1.3763414634146342e-05, "loss": 0.0324, "step": 76710 }, { "epoch": 0.9355487804878049, "grad_norm": 1.2724488973617554, "learning_rate": 1.3763008130081301e-05, "loss": 0.0645, "step": 76715 }, { "epoch": 0.935609756097561, "grad_norm": 2.710651159286499, "learning_rate": 1.3762601626016261e-05, "loss": 0.0189, "step": 76720 }, { "epoch": 0.9356707317073171, "grad_norm": 0.4204164445400238, "learning_rate": 1.3762195121951221e-05, "loss": 0.0319, "step": 76725 }, { "epoch": 0.9357317073170732, "grad_norm": 0.4931308925151825, "learning_rate": 1.3761788617886181e-05, "loss": 0.0865, "step": 76730 }, { "epoch": 0.9357926829268293, "grad_norm": 0.37422800064086914, "learning_rate": 1.376138211382114e-05, "loss": 0.0494, "step": 76735 }, { "epoch": 0.9358536585365854, "grad_norm": 0.865009605884552, "learning_rate": 1.3760975609756097e-05, "loss": 0.0322, "step": 76740 }, { "epoch": 0.9359146341463415, "grad_norm": 0.7285118699073792, "learning_rate": 1.3760569105691057e-05, "loss": 0.0568, "step": 76745 }, { "epoch": 0.9359756097560976, "grad_norm": 0.2664221227169037, "learning_rate": 1.3760162601626017e-05, "loss": 0.0422, "step": 76750 }, { "epoch": 0.9360365853658537, "grad_norm": 0.4887503683567047, "learning_rate": 1.3759756097560977e-05, "loss": 0.0428, "step": 76755 }, { "epoch": 0.9360975609756098, "grad_norm": 0.23690126836299896, "learning_rate": 1.3759349593495936e-05, "loss": 0.0281, "step": 76760 }, { "epoch": 0.9361585365853659, "grad_norm": 0.2945602536201477, "learning_rate": 1.3758943089430896e-05, "loss": 0.0199, "step": 76765 }, { "epoch": 0.936219512195122, "grad_norm": 0.8921151161193848, "learning_rate": 1.3758536585365854e-05, "loss": 0.0585, "step": 76770 }, { "epoch": 0.936280487804878, "grad_norm": 0.3371957838535309, "learning_rate": 1.3758130081300814e-05, "loss": 0.0251, "step": 76775 }, { "epoch": 0.9363414634146342, "grad_norm": 1.2259292602539062, "learning_rate": 1.3757723577235772e-05, "loss": 0.0401, "step": 76780 }, { "epoch": 0.9364024390243902, "grad_norm": 0.5578867793083191, "learning_rate": 1.3757317073170732e-05, "loss": 0.0281, "step": 76785 }, { "epoch": 0.9364634146341464, "grad_norm": 0.6763214468955994, "learning_rate": 1.3756910569105692e-05, "loss": 0.0519, "step": 76790 }, { "epoch": 0.9365243902439024, "grad_norm": 0.7676724195480347, "learning_rate": 1.3756504065040652e-05, "loss": 0.0268, "step": 76795 }, { "epoch": 0.9365853658536586, "grad_norm": 0.48884326219558716, "learning_rate": 1.375609756097561e-05, "loss": 0.0498, "step": 76800 }, { "epoch": 0.9366463414634146, "grad_norm": 0.4557172656059265, "learning_rate": 1.375569105691057e-05, "loss": 0.0585, "step": 76805 }, { "epoch": 0.9367073170731708, "grad_norm": 0.828629195690155, "learning_rate": 1.375528455284553e-05, "loss": 0.0583, "step": 76810 }, { "epoch": 0.9367682926829268, "grad_norm": 0.466439425945282, "learning_rate": 1.375487804878049e-05, "loss": 0.0306, "step": 76815 }, { "epoch": 0.936829268292683, "grad_norm": 0.8348448276519775, "learning_rate": 1.375447154471545e-05, "loss": 0.0321, "step": 76820 }, { "epoch": 0.936890243902439, "grad_norm": 0.8011490702629089, "learning_rate": 1.3754065040650409e-05, "loss": 0.0488, "step": 76825 }, { "epoch": 0.9369512195121952, "grad_norm": 0.5077983140945435, "learning_rate": 1.3753658536585365e-05, "loss": 0.0731, "step": 76830 }, { "epoch": 0.9370121951219512, "grad_norm": 0.625403881072998, "learning_rate": 1.3753252032520325e-05, "loss": 0.0512, "step": 76835 }, { "epoch": 0.9370731707317074, "grad_norm": 0.2640562057495117, "learning_rate": 1.3752845528455285e-05, "loss": 0.0216, "step": 76840 }, { "epoch": 0.9371341463414634, "grad_norm": 1.0016578435897827, "learning_rate": 1.3752439024390245e-05, "loss": 0.0434, "step": 76845 }, { "epoch": 0.9371951219512196, "grad_norm": 0.3887660503387451, "learning_rate": 1.3752032520325205e-05, "loss": 0.0556, "step": 76850 }, { "epoch": 0.9372560975609756, "grad_norm": 0.28904685378074646, "learning_rate": 1.3751626016260165e-05, "loss": 0.0566, "step": 76855 }, { "epoch": 0.9373170731707318, "grad_norm": 0.36595603823661804, "learning_rate": 1.3751219512195123e-05, "loss": 0.0262, "step": 76860 }, { "epoch": 0.9373780487804878, "grad_norm": 0.6539575457572937, "learning_rate": 1.375081300813008e-05, "loss": 0.0379, "step": 76865 }, { "epoch": 0.937439024390244, "grad_norm": 0.34503883123397827, "learning_rate": 1.375040650406504e-05, "loss": 0.0174, "step": 76870 }, { "epoch": 0.9375, "grad_norm": 0.6896288394927979, "learning_rate": 1.375e-05, "loss": 0.0285, "step": 76875 }, { "epoch": 0.937560975609756, "grad_norm": 0.4552663564682007, "learning_rate": 1.374959349593496e-05, "loss": 0.0574, "step": 76880 }, { "epoch": 0.9376219512195122, "grad_norm": 1.0470799207687378, "learning_rate": 1.374918699186992e-05, "loss": 0.0437, "step": 76885 }, { "epoch": 0.9376829268292682, "grad_norm": 0.46809715032577515, "learning_rate": 1.374878048780488e-05, "loss": 0.0331, "step": 76890 }, { "epoch": 0.9377439024390244, "grad_norm": 0.7364147901535034, "learning_rate": 1.3748373983739838e-05, "loss": 0.0425, "step": 76895 }, { "epoch": 0.9378048780487804, "grad_norm": 0.536257803440094, "learning_rate": 1.3747967479674798e-05, "loss": 0.0488, "step": 76900 }, { "epoch": 0.9378658536585366, "grad_norm": 1.2714370489120483, "learning_rate": 1.3747560975609758e-05, "loss": 0.0876, "step": 76905 }, { "epoch": 0.9379268292682926, "grad_norm": 1.5483070611953735, "learning_rate": 1.3747154471544718e-05, "loss": 0.071, "step": 76910 }, { "epoch": 0.9379878048780488, "grad_norm": 0.7789644598960876, "learning_rate": 1.3746747967479677e-05, "loss": 0.0268, "step": 76915 }, { "epoch": 0.9380487804878048, "grad_norm": 0.300990492105484, "learning_rate": 1.3746341463414637e-05, "loss": 0.0241, "step": 76920 }, { "epoch": 0.938109756097561, "grad_norm": 0.6880024671554565, "learning_rate": 1.3745934959349594e-05, "loss": 0.0297, "step": 76925 }, { "epoch": 0.938170731707317, "grad_norm": 0.45579794049263, "learning_rate": 1.3745528455284553e-05, "loss": 0.0362, "step": 76930 }, { "epoch": 0.9382317073170732, "grad_norm": 0.3770677447319031, "learning_rate": 1.3745121951219513e-05, "loss": 0.0364, "step": 76935 }, { "epoch": 0.9382926829268292, "grad_norm": 0.6375514268875122, "learning_rate": 1.3744715447154473e-05, "loss": 0.0428, "step": 76940 }, { "epoch": 0.9383536585365854, "grad_norm": 0.3999820351600647, "learning_rate": 1.3744308943089433e-05, "loss": 0.0353, "step": 76945 }, { "epoch": 0.9384146341463414, "grad_norm": 0.8099133968353271, "learning_rate": 1.3743902439024393e-05, "loss": 0.0449, "step": 76950 }, { "epoch": 0.9384756097560976, "grad_norm": 0.3517715632915497, "learning_rate": 1.374349593495935e-05, "loss": 0.0244, "step": 76955 }, { "epoch": 0.9385365853658536, "grad_norm": 0.18215405941009521, "learning_rate": 1.3743089430894309e-05, "loss": 0.0275, "step": 76960 }, { "epoch": 0.9385975609756098, "grad_norm": 0.7774873375892639, "learning_rate": 1.3742682926829269e-05, "loss": 0.0303, "step": 76965 }, { "epoch": 0.9386585365853658, "grad_norm": 0.7447283864021301, "learning_rate": 1.3742276422764229e-05, "loss": 0.0369, "step": 76970 }, { "epoch": 0.938719512195122, "grad_norm": 0.6248683929443359, "learning_rate": 1.3741869918699188e-05, "loss": 0.0301, "step": 76975 }, { "epoch": 0.938780487804878, "grad_norm": 0.4085192084312439, "learning_rate": 1.3741463414634148e-05, "loss": 0.0805, "step": 76980 }, { "epoch": 0.9388414634146341, "grad_norm": 2.791717052459717, "learning_rate": 1.3741056910569106e-05, "loss": 0.0811, "step": 76985 }, { "epoch": 0.9389024390243902, "grad_norm": 1.5183134078979492, "learning_rate": 1.3740650406504066e-05, "loss": 0.0649, "step": 76990 }, { "epoch": 0.9389634146341463, "grad_norm": 0.46928438544273376, "learning_rate": 1.3740243902439026e-05, "loss": 0.0549, "step": 76995 }, { "epoch": 0.9390243902439024, "grad_norm": 1.1256132125854492, "learning_rate": 1.3739837398373986e-05, "loss": 0.0422, "step": 77000 }, { "epoch": 0.9390853658536585, "grad_norm": 0.428078293800354, "learning_rate": 1.3739430894308946e-05, "loss": 0.037, "step": 77005 }, { "epoch": 0.9391463414634146, "grad_norm": 0.6036169528961182, "learning_rate": 1.3739024390243904e-05, "loss": 0.0348, "step": 77010 }, { "epoch": 0.9392073170731707, "grad_norm": 0.7168169021606445, "learning_rate": 1.3738617886178862e-05, "loss": 0.0503, "step": 77015 }, { "epoch": 0.9392682926829268, "grad_norm": 0.37894701957702637, "learning_rate": 1.3738211382113822e-05, "loss": 0.0276, "step": 77020 }, { "epoch": 0.9393292682926829, "grad_norm": 0.5320449471473694, "learning_rate": 1.3737804878048782e-05, "loss": 0.035, "step": 77025 }, { "epoch": 0.939390243902439, "grad_norm": 0.615029513835907, "learning_rate": 1.3737398373983741e-05, "loss": 0.0416, "step": 77030 }, { "epoch": 0.9394512195121951, "grad_norm": 0.5798778533935547, "learning_rate": 1.3736991869918701e-05, "loss": 0.051, "step": 77035 }, { "epoch": 0.9395121951219512, "grad_norm": 0.5650972723960876, "learning_rate": 1.3736585365853661e-05, "loss": 0.0286, "step": 77040 }, { "epoch": 0.9395731707317073, "grad_norm": 0.803024172782898, "learning_rate": 1.3736178861788617e-05, "loss": 0.0485, "step": 77045 }, { "epoch": 0.9396341463414634, "grad_norm": 0.6285877823829651, "learning_rate": 1.3735772357723577e-05, "loss": 0.047, "step": 77050 }, { "epoch": 0.9396951219512195, "grad_norm": 0.6216349601745605, "learning_rate": 1.3735365853658537e-05, "loss": 0.0493, "step": 77055 }, { "epoch": 0.9397560975609756, "grad_norm": 0.37704020738601685, "learning_rate": 1.3734959349593497e-05, "loss": 0.0203, "step": 77060 }, { "epoch": 0.9398170731707317, "grad_norm": 0.5138107538223267, "learning_rate": 1.3734552845528457e-05, "loss": 0.035, "step": 77065 }, { "epoch": 0.9398780487804878, "grad_norm": 0.559916079044342, "learning_rate": 1.3734146341463417e-05, "loss": 0.0219, "step": 77070 }, { "epoch": 0.9399390243902439, "grad_norm": 0.49754172563552856, "learning_rate": 1.3733739837398375e-05, "loss": 0.0359, "step": 77075 }, { "epoch": 0.94, "grad_norm": 0.21273916959762573, "learning_rate": 1.3733333333333335e-05, "loss": 0.0436, "step": 77080 }, { "epoch": 0.9400609756097561, "grad_norm": 0.5760965347290039, "learning_rate": 1.3732926829268294e-05, "loss": 0.0806, "step": 77085 }, { "epoch": 0.9401219512195121, "grad_norm": 0.48397716879844666, "learning_rate": 1.3732520325203254e-05, "loss": 0.0662, "step": 77090 }, { "epoch": 0.9401829268292683, "grad_norm": 0.6712400913238525, "learning_rate": 1.3732113821138214e-05, "loss": 0.0424, "step": 77095 }, { "epoch": 0.9402439024390243, "grad_norm": 0.44533199071884155, "learning_rate": 1.3731707317073172e-05, "loss": 0.0277, "step": 77100 }, { "epoch": 0.9403048780487805, "grad_norm": 0.4891093373298645, "learning_rate": 1.373130081300813e-05, "loss": 0.0442, "step": 77105 }, { "epoch": 0.9403658536585365, "grad_norm": 0.6036306619644165, "learning_rate": 1.373089430894309e-05, "loss": 0.0325, "step": 77110 }, { "epoch": 0.9404268292682927, "grad_norm": 0.3857201337814331, "learning_rate": 1.373048780487805e-05, "loss": 0.0393, "step": 77115 }, { "epoch": 0.9404878048780487, "grad_norm": 0.618482232093811, "learning_rate": 1.373008130081301e-05, "loss": 0.0523, "step": 77120 }, { "epoch": 0.9405487804878049, "grad_norm": 0.15678094327449799, "learning_rate": 1.372967479674797e-05, "loss": 0.0238, "step": 77125 }, { "epoch": 0.9406097560975609, "grad_norm": 0.9553641676902771, "learning_rate": 1.372926829268293e-05, "loss": 0.0364, "step": 77130 }, { "epoch": 0.9406707317073171, "grad_norm": 1.1557904481887817, "learning_rate": 1.3728861788617886e-05, "loss": 0.0342, "step": 77135 }, { "epoch": 0.9407317073170731, "grad_norm": 1.5982916355133057, "learning_rate": 1.3728455284552846e-05, "loss": 0.0531, "step": 77140 }, { "epoch": 0.9407926829268293, "grad_norm": 0.8499220013618469, "learning_rate": 1.3728048780487805e-05, "loss": 0.0344, "step": 77145 }, { "epoch": 0.9408536585365853, "grad_norm": 0.5973464250564575, "learning_rate": 1.3727642276422765e-05, "loss": 0.0651, "step": 77150 }, { "epoch": 0.9409146341463415, "grad_norm": 0.6277034878730774, "learning_rate": 1.3727235772357725e-05, "loss": 0.0366, "step": 77155 }, { "epoch": 0.9409756097560975, "grad_norm": 0.43179798126220703, "learning_rate": 1.3726829268292685e-05, "loss": 0.0708, "step": 77160 }, { "epoch": 0.9410365853658537, "grad_norm": 0.9989341497421265, "learning_rate": 1.3726422764227643e-05, "loss": 0.0597, "step": 77165 }, { "epoch": 0.9410975609756097, "grad_norm": 0.6615029573440552, "learning_rate": 1.3726016260162603e-05, "loss": 0.0524, "step": 77170 }, { "epoch": 0.9411585365853659, "grad_norm": 0.3301929533481598, "learning_rate": 1.3725609756097563e-05, "loss": 0.0556, "step": 77175 }, { "epoch": 0.9412195121951219, "grad_norm": 0.31029582023620605, "learning_rate": 1.3725203252032523e-05, "loss": 0.0613, "step": 77180 }, { "epoch": 0.941280487804878, "grad_norm": 0.22863063216209412, "learning_rate": 1.3724796747967482e-05, "loss": 0.0393, "step": 77185 }, { "epoch": 0.9413414634146341, "grad_norm": 0.43395280838012695, "learning_rate": 1.372439024390244e-05, "loss": 0.0441, "step": 77190 }, { "epoch": 0.9414024390243902, "grad_norm": 0.48593035340309143, "learning_rate": 1.3723983739837399e-05, "loss": 0.0482, "step": 77195 }, { "epoch": 0.9414634146341463, "grad_norm": 0.42897161841392517, "learning_rate": 1.3723577235772358e-05, "loss": 0.0212, "step": 77200 }, { "epoch": 0.9415243902439024, "grad_norm": 1.160215973854065, "learning_rate": 1.3723170731707318e-05, "loss": 0.0688, "step": 77205 }, { "epoch": 0.9415853658536585, "grad_norm": 1.0195591449737549, "learning_rate": 1.3722764227642278e-05, "loss": 0.0372, "step": 77210 }, { "epoch": 0.9416463414634146, "grad_norm": 0.565837025642395, "learning_rate": 1.3722357723577238e-05, "loss": 0.0299, "step": 77215 }, { "epoch": 0.9417073170731707, "grad_norm": 0.26697689294815063, "learning_rate": 1.3721951219512198e-05, "loss": 0.0093, "step": 77220 }, { "epoch": 0.9417682926829268, "grad_norm": 0.607787013053894, "learning_rate": 1.3721544715447154e-05, "loss": 0.0606, "step": 77225 }, { "epoch": 0.9418292682926829, "grad_norm": 0.7865710258483887, "learning_rate": 1.3721138211382114e-05, "loss": 0.0391, "step": 77230 }, { "epoch": 0.941890243902439, "grad_norm": 0.6988195776939392, "learning_rate": 1.3720731707317074e-05, "loss": 0.0382, "step": 77235 }, { "epoch": 0.9419512195121951, "grad_norm": 0.27666333317756653, "learning_rate": 1.3720325203252034e-05, "loss": 0.0214, "step": 77240 }, { "epoch": 0.9420121951219512, "grad_norm": 0.3563079833984375, "learning_rate": 1.3719918699186993e-05, "loss": 0.0472, "step": 77245 }, { "epoch": 0.9420731707317073, "grad_norm": 0.5725104808807373, "learning_rate": 1.3719512195121953e-05, "loss": 0.0462, "step": 77250 }, { "epoch": 0.9421341463414634, "grad_norm": 0.7017048597335815, "learning_rate": 1.3719105691056911e-05, "loss": 0.0481, "step": 77255 }, { "epoch": 0.9421951219512195, "grad_norm": 0.6154289245605469, "learning_rate": 1.3718699186991871e-05, "loss": 0.031, "step": 77260 }, { "epoch": 0.9422560975609756, "grad_norm": 1.0364434719085693, "learning_rate": 1.3718292682926831e-05, "loss": 0.0273, "step": 77265 }, { "epoch": 0.9423170731707317, "grad_norm": 0.44612735509872437, "learning_rate": 1.371788617886179e-05, "loss": 0.039, "step": 77270 }, { "epoch": 0.9423780487804878, "grad_norm": 0.4578578770160675, "learning_rate": 1.3717479674796749e-05, "loss": 0.044, "step": 77275 }, { "epoch": 0.9424390243902439, "grad_norm": 0.3792925179004669, "learning_rate": 1.3717073170731709e-05, "loss": 0.0357, "step": 77280 }, { "epoch": 0.9425, "grad_norm": 0.38299620151519775, "learning_rate": 1.3716666666666667e-05, "loss": 0.0272, "step": 77285 }, { "epoch": 0.942560975609756, "grad_norm": 0.6277565360069275, "learning_rate": 1.3716260162601627e-05, "loss": 0.0216, "step": 77290 }, { "epoch": 0.9426219512195122, "grad_norm": 0.5627005696296692, "learning_rate": 1.3715853658536587e-05, "loss": 0.0435, "step": 77295 }, { "epoch": 0.9426829268292682, "grad_norm": 0.4989834427833557, "learning_rate": 1.3715447154471546e-05, "loss": 0.0335, "step": 77300 }, { "epoch": 0.9427439024390244, "grad_norm": 0.4422870874404907, "learning_rate": 1.3715040650406506e-05, "loss": 0.0636, "step": 77305 }, { "epoch": 0.9428048780487804, "grad_norm": 0.7724015712738037, "learning_rate": 1.3714634146341466e-05, "loss": 0.0399, "step": 77310 }, { "epoch": 0.9428658536585366, "grad_norm": 0.4331042170524597, "learning_rate": 1.3714227642276422e-05, "loss": 0.0464, "step": 77315 }, { "epoch": 0.9429268292682926, "grad_norm": 0.467741459608078, "learning_rate": 1.3713821138211382e-05, "loss": 0.0596, "step": 77320 }, { "epoch": 0.9429878048780488, "grad_norm": 0.5336962342262268, "learning_rate": 1.3713414634146342e-05, "loss": 0.0424, "step": 77325 }, { "epoch": 0.9430487804878048, "grad_norm": 0.318939745426178, "learning_rate": 1.3713008130081302e-05, "loss": 0.0508, "step": 77330 }, { "epoch": 0.943109756097561, "grad_norm": 0.3605339527130127, "learning_rate": 1.3712601626016262e-05, "loss": 0.0296, "step": 77335 }, { "epoch": 0.943170731707317, "grad_norm": 0.5295592546463013, "learning_rate": 1.3712195121951222e-05, "loss": 0.0352, "step": 77340 }, { "epoch": 0.9432317073170732, "grad_norm": 0.5441219806671143, "learning_rate": 1.371178861788618e-05, "loss": 0.0279, "step": 77345 }, { "epoch": 0.9432926829268292, "grad_norm": 0.4883471727371216, "learning_rate": 1.371138211382114e-05, "loss": 0.0337, "step": 77350 }, { "epoch": 0.9433536585365854, "grad_norm": 0.4323548674583435, "learning_rate": 1.37109756097561e-05, "loss": 0.0388, "step": 77355 }, { "epoch": 0.9434146341463414, "grad_norm": 0.4002116322517395, "learning_rate": 1.371056910569106e-05, "loss": 0.0681, "step": 77360 }, { "epoch": 0.9434756097560976, "grad_norm": 0.3449181616306305, "learning_rate": 1.3710162601626017e-05, "loss": 0.053, "step": 77365 }, { "epoch": 0.9435365853658536, "grad_norm": 0.9642330408096313, "learning_rate": 1.3709756097560977e-05, "loss": 0.0913, "step": 77370 }, { "epoch": 0.9435975609756098, "grad_norm": 0.5543972253799438, "learning_rate": 1.3709349593495935e-05, "loss": 0.0423, "step": 77375 }, { "epoch": 0.9436585365853658, "grad_norm": 0.8369798064231873, "learning_rate": 1.3708943089430895e-05, "loss": 0.1134, "step": 77380 }, { "epoch": 0.943719512195122, "grad_norm": 0.6715936064720154, "learning_rate": 1.3708536585365855e-05, "loss": 0.0655, "step": 77385 }, { "epoch": 0.943780487804878, "grad_norm": 0.49988651275634766, "learning_rate": 1.3708130081300815e-05, "loss": 0.0475, "step": 77390 }, { "epoch": 0.9438414634146342, "grad_norm": 0.5174569487571716, "learning_rate": 1.3707723577235775e-05, "loss": 0.0564, "step": 77395 }, { "epoch": 0.9439024390243902, "grad_norm": 0.803361177444458, "learning_rate": 1.3707317073170734e-05, "loss": 0.0315, "step": 77400 }, { "epoch": 0.9439634146341463, "grad_norm": 0.6380859017372131, "learning_rate": 1.370691056910569e-05, "loss": 0.0781, "step": 77405 }, { "epoch": 0.9440243902439024, "grad_norm": 0.8797647953033447, "learning_rate": 1.370650406504065e-05, "loss": 0.0318, "step": 77410 }, { "epoch": 0.9440853658536585, "grad_norm": 1.2391902208328247, "learning_rate": 1.370609756097561e-05, "loss": 0.0438, "step": 77415 }, { "epoch": 0.9441463414634146, "grad_norm": 0.6202788949012756, "learning_rate": 1.370569105691057e-05, "loss": 0.0499, "step": 77420 }, { "epoch": 0.9442073170731707, "grad_norm": 0.6328511238098145, "learning_rate": 1.370528455284553e-05, "loss": 0.0374, "step": 77425 }, { "epoch": 0.9442682926829268, "grad_norm": 1.320801854133606, "learning_rate": 1.370487804878049e-05, "loss": 0.0236, "step": 77430 }, { "epoch": 0.9443292682926829, "grad_norm": 0.4424143433570862, "learning_rate": 1.3704471544715448e-05, "loss": 0.0417, "step": 77435 }, { "epoch": 0.944390243902439, "grad_norm": 0.3559405505657196, "learning_rate": 1.3704065040650408e-05, "loss": 0.0385, "step": 77440 }, { "epoch": 0.9444512195121951, "grad_norm": 0.5386531949043274, "learning_rate": 1.3703658536585368e-05, "loss": 0.0246, "step": 77445 }, { "epoch": 0.9445121951219512, "grad_norm": 1.1709966659545898, "learning_rate": 1.3703252032520327e-05, "loss": 0.0345, "step": 77450 }, { "epoch": 0.9445731707317073, "grad_norm": 0.4622446894645691, "learning_rate": 1.3702845528455286e-05, "loss": 0.031, "step": 77455 }, { "epoch": 0.9446341463414634, "grad_norm": 0.4901471734046936, "learning_rate": 1.3702439024390245e-05, "loss": 0.0447, "step": 77460 }, { "epoch": 0.9446951219512195, "grad_norm": 0.9721518158912659, "learning_rate": 1.3702032520325204e-05, "loss": 0.0225, "step": 77465 }, { "epoch": 0.9447560975609756, "grad_norm": 0.7575930953025818, "learning_rate": 1.3701626016260163e-05, "loss": 0.0291, "step": 77470 }, { "epoch": 0.9448170731707317, "grad_norm": 0.48336854577064514, "learning_rate": 1.3701219512195123e-05, "loss": 0.0369, "step": 77475 }, { "epoch": 0.9448780487804878, "grad_norm": 0.850414514541626, "learning_rate": 1.3700813008130083e-05, "loss": 0.0297, "step": 77480 }, { "epoch": 0.9449390243902439, "grad_norm": 0.9281995296478271, "learning_rate": 1.3700406504065043e-05, "loss": 0.0458, "step": 77485 }, { "epoch": 0.945, "grad_norm": 0.6266590356826782, "learning_rate": 1.3700000000000003e-05, "loss": 0.0366, "step": 77490 }, { "epoch": 0.9450609756097561, "grad_norm": 0.5592871308326721, "learning_rate": 1.3699593495934959e-05, "loss": 0.0249, "step": 77495 }, { "epoch": 0.9451219512195121, "grad_norm": 0.5918963551521301, "learning_rate": 1.3699186991869919e-05, "loss": 0.0426, "step": 77500 }, { "epoch": 0.9451829268292683, "grad_norm": 0.36303144693374634, "learning_rate": 1.3698780487804879e-05, "loss": 0.0333, "step": 77505 }, { "epoch": 0.9452439024390243, "grad_norm": 0.963438093662262, "learning_rate": 1.3698373983739839e-05, "loss": 0.0185, "step": 77510 }, { "epoch": 0.9453048780487805, "grad_norm": 0.7702344655990601, "learning_rate": 1.3697967479674798e-05, "loss": 0.0304, "step": 77515 }, { "epoch": 0.9453658536585365, "grad_norm": 0.25871777534484863, "learning_rate": 1.3697560975609758e-05, "loss": 0.0721, "step": 77520 }, { "epoch": 0.9454268292682927, "grad_norm": 0.5118154883384705, "learning_rate": 1.3697154471544716e-05, "loss": 0.0737, "step": 77525 }, { "epoch": 0.9454878048780487, "grad_norm": 0.4134267270565033, "learning_rate": 1.3696747967479676e-05, "loss": 0.0299, "step": 77530 }, { "epoch": 0.9455487804878049, "grad_norm": 0.6854175925254822, "learning_rate": 1.3696341463414636e-05, "loss": 0.0513, "step": 77535 }, { "epoch": 0.9456097560975609, "grad_norm": 0.3269648849964142, "learning_rate": 1.3695934959349594e-05, "loss": 0.027, "step": 77540 }, { "epoch": 0.9456707317073171, "grad_norm": 0.48170891404151917, "learning_rate": 1.3695528455284554e-05, "loss": 0.0333, "step": 77545 }, { "epoch": 0.9457317073170731, "grad_norm": 0.6260095834732056, "learning_rate": 1.3695121951219514e-05, "loss": 0.0723, "step": 77550 }, { "epoch": 0.9457926829268293, "grad_norm": 1.8393605947494507, "learning_rate": 1.3694715447154472e-05, "loss": 0.0472, "step": 77555 }, { "epoch": 0.9458536585365853, "grad_norm": 0.6714160442352295, "learning_rate": 1.3694308943089432e-05, "loss": 0.0307, "step": 77560 }, { "epoch": 0.9459146341463415, "grad_norm": 0.46975913643836975, "learning_rate": 1.3693902439024392e-05, "loss": 0.0351, "step": 77565 }, { "epoch": 0.9459756097560975, "grad_norm": 1.229382872581482, "learning_rate": 1.3693495934959351e-05, "loss": 0.051, "step": 77570 }, { "epoch": 0.9460365853658537, "grad_norm": 0.2439512312412262, "learning_rate": 1.3693089430894311e-05, "loss": 0.0239, "step": 77575 }, { "epoch": 0.9460975609756097, "grad_norm": 0.2916572093963623, "learning_rate": 1.3692682926829271e-05, "loss": 0.0252, "step": 77580 }, { "epoch": 0.9461585365853659, "grad_norm": 0.7893183827400208, "learning_rate": 1.3692276422764227e-05, "loss": 0.0245, "step": 77585 }, { "epoch": 0.9462195121951219, "grad_norm": 0.3783522844314575, "learning_rate": 1.3691869918699187e-05, "loss": 0.0361, "step": 77590 }, { "epoch": 0.9462804878048781, "grad_norm": 0.77939772605896, "learning_rate": 1.3691463414634147e-05, "loss": 0.0329, "step": 77595 }, { "epoch": 0.9463414634146341, "grad_norm": 2.135716676712036, "learning_rate": 1.3691056910569107e-05, "loss": 0.0364, "step": 77600 }, { "epoch": 0.9464024390243903, "grad_norm": 0.4850812256336212, "learning_rate": 1.3690650406504067e-05, "loss": 0.0405, "step": 77605 }, { "epoch": 0.9464634146341463, "grad_norm": 0.5900667309761047, "learning_rate": 1.3690243902439027e-05, "loss": 0.0324, "step": 77610 }, { "epoch": 0.9465243902439024, "grad_norm": 0.398664265871048, "learning_rate": 1.3689837398373985e-05, "loss": 0.0202, "step": 77615 }, { "epoch": 0.9465853658536585, "grad_norm": 0.39848288893699646, "learning_rate": 1.3689430894308944e-05, "loss": 0.0494, "step": 77620 }, { "epoch": 0.9466463414634146, "grad_norm": 0.4453342854976654, "learning_rate": 1.3689024390243904e-05, "loss": 0.0818, "step": 77625 }, { "epoch": 0.9467073170731707, "grad_norm": 0.3046935200691223, "learning_rate": 1.3688617886178862e-05, "loss": 0.0412, "step": 77630 }, { "epoch": 0.9467682926829268, "grad_norm": 0.3673369288444519, "learning_rate": 1.3688211382113822e-05, "loss": 0.0425, "step": 77635 }, { "epoch": 0.9468292682926829, "grad_norm": 0.5550208687782288, "learning_rate": 1.3687804878048782e-05, "loss": 0.042, "step": 77640 }, { "epoch": 0.946890243902439, "grad_norm": 0.42734041810035706, "learning_rate": 1.368739837398374e-05, "loss": 0.0395, "step": 77645 }, { "epoch": 0.9469512195121951, "grad_norm": 0.2983894348144531, "learning_rate": 1.36869918699187e-05, "loss": 0.0576, "step": 77650 }, { "epoch": 0.9470121951219512, "grad_norm": 0.969237208366394, "learning_rate": 1.368658536585366e-05, "loss": 0.048, "step": 77655 }, { "epoch": 0.9470731707317073, "grad_norm": 0.9686558842658997, "learning_rate": 1.368617886178862e-05, "loss": 0.0539, "step": 77660 }, { "epoch": 0.9471341463414634, "grad_norm": 0.9503363966941833, "learning_rate": 1.368577235772358e-05, "loss": 0.0292, "step": 77665 }, { "epoch": 0.9471951219512195, "grad_norm": 0.30877405405044556, "learning_rate": 1.368536585365854e-05, "loss": 0.0339, "step": 77670 }, { "epoch": 0.9472560975609756, "grad_norm": 0.2901299297809601, "learning_rate": 1.3684959349593496e-05, "loss": 0.0543, "step": 77675 }, { "epoch": 0.9473170731707317, "grad_norm": 0.7101953625679016, "learning_rate": 1.3684552845528456e-05, "loss": 0.033, "step": 77680 }, { "epoch": 0.9473780487804878, "grad_norm": 1.2285674810409546, "learning_rate": 1.3684146341463415e-05, "loss": 0.0571, "step": 77685 }, { "epoch": 0.9474390243902439, "grad_norm": 0.28952911496162415, "learning_rate": 1.3683739837398375e-05, "loss": 0.0262, "step": 77690 }, { "epoch": 0.9475, "grad_norm": 0.5008050203323364, "learning_rate": 1.3683333333333335e-05, "loss": 0.079, "step": 77695 }, { "epoch": 0.947560975609756, "grad_norm": 0.29656508564949036, "learning_rate": 1.3682926829268295e-05, "loss": 0.0311, "step": 77700 }, { "epoch": 0.9476219512195122, "grad_norm": 0.22276633977890015, "learning_rate": 1.3682520325203253e-05, "loss": 0.0375, "step": 77705 }, { "epoch": 0.9476829268292682, "grad_norm": 0.4397987723350525, "learning_rate": 1.3682113821138213e-05, "loss": 0.0356, "step": 77710 }, { "epoch": 0.9477439024390244, "grad_norm": 0.6771247982978821, "learning_rate": 1.3681707317073173e-05, "loss": 0.0464, "step": 77715 }, { "epoch": 0.9478048780487804, "grad_norm": 0.49750351905822754, "learning_rate": 1.368130081300813e-05, "loss": 0.029, "step": 77720 }, { "epoch": 0.9478658536585366, "grad_norm": 0.489070862531662, "learning_rate": 1.368089430894309e-05, "loss": 0.0435, "step": 77725 }, { "epoch": 0.9479268292682926, "grad_norm": 0.8023384213447571, "learning_rate": 1.368048780487805e-05, "loss": 0.0515, "step": 77730 }, { "epoch": 0.9479878048780488, "grad_norm": 1.2632713317871094, "learning_rate": 1.3680081300813009e-05, "loss": 0.0756, "step": 77735 }, { "epoch": 0.9480487804878048, "grad_norm": 0.6538295745849609, "learning_rate": 1.3679674796747968e-05, "loss": 0.0416, "step": 77740 }, { "epoch": 0.948109756097561, "grad_norm": 0.5892119407653809, "learning_rate": 1.3679268292682928e-05, "loss": 0.0289, "step": 77745 }, { "epoch": 0.948170731707317, "grad_norm": 0.8832639455795288, "learning_rate": 1.3678861788617888e-05, "loss": 0.0427, "step": 77750 }, { "epoch": 0.9482317073170732, "grad_norm": 0.8067852854728699, "learning_rate": 1.3678455284552848e-05, "loss": 0.0674, "step": 77755 }, { "epoch": 0.9482926829268292, "grad_norm": 0.44628798961639404, "learning_rate": 1.3678048780487808e-05, "loss": 0.0495, "step": 77760 }, { "epoch": 0.9483536585365854, "grad_norm": 0.48732298612594604, "learning_rate": 1.3677642276422764e-05, "loss": 0.0394, "step": 77765 }, { "epoch": 0.9484146341463414, "grad_norm": 0.8324607610702515, "learning_rate": 1.3677235772357724e-05, "loss": 0.0705, "step": 77770 }, { "epoch": 0.9484756097560976, "grad_norm": 0.3064589202404022, "learning_rate": 1.3676829268292684e-05, "loss": 0.0419, "step": 77775 }, { "epoch": 0.9485365853658536, "grad_norm": 0.39618027210235596, "learning_rate": 1.3676422764227644e-05, "loss": 0.0377, "step": 77780 }, { "epoch": 0.9485975609756098, "grad_norm": 0.5182409882545471, "learning_rate": 1.3676016260162603e-05, "loss": 0.0546, "step": 77785 }, { "epoch": 0.9486585365853658, "grad_norm": 0.5352696776390076, "learning_rate": 1.3675609756097563e-05, "loss": 0.0491, "step": 77790 }, { "epoch": 0.948719512195122, "grad_norm": 0.5209038853645325, "learning_rate": 1.3675203252032521e-05, "loss": 0.0564, "step": 77795 }, { "epoch": 0.948780487804878, "grad_norm": 1.5486615896224976, "learning_rate": 1.3674796747967481e-05, "loss": 0.0564, "step": 77800 }, { "epoch": 0.9488414634146342, "grad_norm": 0.72899329662323, "learning_rate": 1.367439024390244e-05, "loss": 0.0261, "step": 77805 }, { "epoch": 0.9489024390243902, "grad_norm": 1.5020489692687988, "learning_rate": 1.3673983739837399e-05, "loss": 0.0443, "step": 77810 }, { "epoch": 0.9489634146341464, "grad_norm": 0.571611225605011, "learning_rate": 1.3673577235772359e-05, "loss": 0.0394, "step": 77815 }, { "epoch": 0.9490243902439024, "grad_norm": 0.7497014999389648, "learning_rate": 1.3673170731707319e-05, "loss": 0.0477, "step": 77820 }, { "epoch": 0.9490853658536585, "grad_norm": 1.0454267263412476, "learning_rate": 1.3672764227642277e-05, "loss": 0.0454, "step": 77825 }, { "epoch": 0.9491463414634146, "grad_norm": 0.522104024887085, "learning_rate": 1.3672357723577237e-05, "loss": 0.0359, "step": 77830 }, { "epoch": 0.9492073170731707, "grad_norm": 0.5459507703781128, "learning_rate": 1.3671951219512196e-05, "loss": 0.0217, "step": 77835 }, { "epoch": 0.9492682926829268, "grad_norm": 0.5251327753067017, "learning_rate": 1.3671544715447156e-05, "loss": 0.0387, "step": 77840 }, { "epoch": 0.9493292682926829, "grad_norm": 0.29594072699546814, "learning_rate": 1.3671138211382116e-05, "loss": 0.0566, "step": 77845 }, { "epoch": 0.949390243902439, "grad_norm": 1.4963109493255615, "learning_rate": 1.3670731707317076e-05, "loss": 0.0389, "step": 77850 }, { "epoch": 0.9494512195121951, "grad_norm": 0.6640253663063049, "learning_rate": 1.3670325203252032e-05, "loss": 0.0739, "step": 77855 }, { "epoch": 0.9495121951219512, "grad_norm": 0.44517460465431213, "learning_rate": 1.3669918699186992e-05, "loss": 0.0668, "step": 77860 }, { "epoch": 0.9495731707317073, "grad_norm": 0.4657399356365204, "learning_rate": 1.3669512195121952e-05, "loss": 0.0452, "step": 77865 }, { "epoch": 0.9496341463414634, "grad_norm": 0.5772992968559265, "learning_rate": 1.3669105691056912e-05, "loss": 0.0545, "step": 77870 }, { "epoch": 0.9496951219512195, "grad_norm": 0.4041910469532013, "learning_rate": 1.3668699186991872e-05, "loss": 0.0454, "step": 77875 }, { "epoch": 0.9497560975609756, "grad_norm": 0.8904819488525391, "learning_rate": 1.3668292682926831e-05, "loss": 0.061, "step": 77880 }, { "epoch": 0.9498170731707317, "grad_norm": 0.3799656629562378, "learning_rate": 1.366788617886179e-05, "loss": 0.0429, "step": 77885 }, { "epoch": 0.9498780487804878, "grad_norm": 0.5162075757980347, "learning_rate": 1.366747967479675e-05, "loss": 0.0641, "step": 77890 }, { "epoch": 0.9499390243902439, "grad_norm": 0.5739046335220337, "learning_rate": 1.3667073170731708e-05, "loss": 0.0299, "step": 77895 }, { "epoch": 0.95, "grad_norm": 1.4185149669647217, "learning_rate": 1.3666666666666667e-05, "loss": 0.0722, "step": 77900 }, { "epoch": 0.9500609756097561, "grad_norm": 0.39873191714286804, "learning_rate": 1.3666260162601627e-05, "loss": 0.0206, "step": 77905 }, { "epoch": 0.9501219512195122, "grad_norm": 0.37288063764572144, "learning_rate": 1.3665853658536587e-05, "loss": 0.0261, "step": 77910 }, { "epoch": 0.9501829268292683, "grad_norm": 0.5834012627601624, "learning_rate": 1.3665447154471545e-05, "loss": 0.0336, "step": 77915 }, { "epoch": 0.9502439024390243, "grad_norm": 0.838467001914978, "learning_rate": 1.3665040650406505e-05, "loss": 0.0374, "step": 77920 }, { "epoch": 0.9503048780487805, "grad_norm": 1.4704054594039917, "learning_rate": 1.3664634146341465e-05, "loss": 0.0296, "step": 77925 }, { "epoch": 0.9503658536585365, "grad_norm": 0.4325650930404663, "learning_rate": 1.3664227642276425e-05, "loss": 0.0396, "step": 77930 }, { "epoch": 0.9504268292682927, "grad_norm": 0.5000041723251343, "learning_rate": 1.3663821138211384e-05, "loss": 0.0413, "step": 77935 }, { "epoch": 0.9504878048780487, "grad_norm": 0.5089552402496338, "learning_rate": 1.3663414634146344e-05, "loss": 0.0361, "step": 77940 }, { "epoch": 0.9505487804878049, "grad_norm": 0.2194814234972, "learning_rate": 1.36630081300813e-05, "loss": 0.031, "step": 77945 }, { "epoch": 0.9506097560975609, "grad_norm": 0.23399056494235992, "learning_rate": 1.366260162601626e-05, "loss": 0.0156, "step": 77950 }, { "epoch": 0.9506707317073171, "grad_norm": 0.5462061762809753, "learning_rate": 1.366219512195122e-05, "loss": 0.0571, "step": 77955 }, { "epoch": 0.9507317073170731, "grad_norm": 0.5261184573173523, "learning_rate": 1.366178861788618e-05, "loss": 0.0525, "step": 77960 }, { "epoch": 0.9507926829268293, "grad_norm": 1.1235684156417847, "learning_rate": 1.366138211382114e-05, "loss": 0.0267, "step": 77965 }, { "epoch": 0.9508536585365853, "grad_norm": 0.6233047842979431, "learning_rate": 1.36609756097561e-05, "loss": 0.038, "step": 77970 }, { "epoch": 0.9509146341463415, "grad_norm": 2.2644619941711426, "learning_rate": 1.3660569105691058e-05, "loss": 0.0486, "step": 77975 }, { "epoch": 0.9509756097560975, "grad_norm": 0.4000096321105957, "learning_rate": 1.3660162601626018e-05, "loss": 0.0215, "step": 77980 }, { "epoch": 0.9510365853658537, "grad_norm": 0.5610741376876831, "learning_rate": 1.3659756097560976e-05, "loss": 0.0493, "step": 77985 }, { "epoch": 0.9510975609756097, "grad_norm": 0.3099621832370758, "learning_rate": 1.3659349593495936e-05, "loss": 0.0545, "step": 77990 }, { "epoch": 0.9511585365853659, "grad_norm": 0.46692416071891785, "learning_rate": 1.3658943089430896e-05, "loss": 0.0228, "step": 77995 }, { "epoch": 0.9512195121951219, "grad_norm": 0.8137999176979065, "learning_rate": 1.3658536585365855e-05, "loss": 0.0389, "step": 78000 }, { "epoch": 0.9512804878048781, "grad_norm": 0.633217453956604, "learning_rate": 1.3658130081300813e-05, "loss": 0.0524, "step": 78005 }, { "epoch": 0.9513414634146341, "grad_norm": 1.0570416450500488, "learning_rate": 1.3657723577235773e-05, "loss": 0.035, "step": 78010 }, { "epoch": 0.9514024390243903, "grad_norm": 0.39733538031578064, "learning_rate": 1.3657317073170733e-05, "loss": 0.038, "step": 78015 }, { "epoch": 0.9514634146341463, "grad_norm": 0.9108917117118835, "learning_rate": 1.3656910569105693e-05, "loss": 0.0738, "step": 78020 }, { "epoch": 0.9515243902439025, "grad_norm": 0.8150069117546082, "learning_rate": 1.3656504065040653e-05, "loss": 0.047, "step": 78025 }, { "epoch": 0.9515853658536585, "grad_norm": 0.5691747069358826, "learning_rate": 1.3656097560975613e-05, "loss": 0.0443, "step": 78030 }, { "epoch": 0.9516463414634146, "grad_norm": 0.6778827905654907, "learning_rate": 1.3655691056910569e-05, "loss": 0.0728, "step": 78035 }, { "epoch": 0.9517073170731707, "grad_norm": 0.4728584587574005, "learning_rate": 1.3655284552845529e-05, "loss": 0.0516, "step": 78040 }, { "epoch": 0.9517682926829268, "grad_norm": 0.3873042166233063, "learning_rate": 1.3654878048780489e-05, "loss": 0.0337, "step": 78045 }, { "epoch": 0.9518292682926829, "grad_norm": 0.4351317286491394, "learning_rate": 1.3654471544715448e-05, "loss": 0.0278, "step": 78050 }, { "epoch": 0.951890243902439, "grad_norm": 0.6784636378288269, "learning_rate": 1.3654065040650408e-05, "loss": 0.0465, "step": 78055 }, { "epoch": 0.9519512195121951, "grad_norm": 1.1857112646102905, "learning_rate": 1.3653658536585368e-05, "loss": 0.0348, "step": 78060 }, { "epoch": 0.9520121951219512, "grad_norm": 0.4377410411834717, "learning_rate": 1.3653252032520326e-05, "loss": 0.0182, "step": 78065 }, { "epoch": 0.9520731707317073, "grad_norm": 0.4651162624359131, "learning_rate": 1.3652845528455284e-05, "loss": 0.0305, "step": 78070 }, { "epoch": 0.9521341463414634, "grad_norm": 0.437176376581192, "learning_rate": 1.3652439024390244e-05, "loss": 0.0351, "step": 78075 }, { "epoch": 0.9521951219512195, "grad_norm": 0.6574258208274841, "learning_rate": 1.3652032520325204e-05, "loss": 0.0519, "step": 78080 }, { "epoch": 0.9522560975609756, "grad_norm": 0.3443472683429718, "learning_rate": 1.3651626016260164e-05, "loss": 0.0444, "step": 78085 }, { "epoch": 0.9523170731707317, "grad_norm": 0.25967034697532654, "learning_rate": 1.3651219512195124e-05, "loss": 0.0361, "step": 78090 }, { "epoch": 0.9523780487804878, "grad_norm": 0.5768436193466187, "learning_rate": 1.3650813008130082e-05, "loss": 0.0308, "step": 78095 }, { "epoch": 0.9524390243902439, "grad_norm": 1.5311261415481567, "learning_rate": 1.3650406504065042e-05, "loss": 0.0444, "step": 78100 }, { "epoch": 0.9525, "grad_norm": 0.7280199527740479, "learning_rate": 1.3650000000000001e-05, "loss": 0.0476, "step": 78105 }, { "epoch": 0.952560975609756, "grad_norm": 0.874588668346405, "learning_rate": 1.3649593495934961e-05, "loss": 0.0722, "step": 78110 }, { "epoch": 0.9526219512195122, "grad_norm": 0.655960738658905, "learning_rate": 1.3649186991869921e-05, "loss": 0.0279, "step": 78115 }, { "epoch": 0.9526829268292683, "grad_norm": 0.48853856325149536, "learning_rate": 1.3648780487804881e-05, "loss": 0.0395, "step": 78120 }, { "epoch": 0.9527439024390244, "grad_norm": 1.9551177024841309, "learning_rate": 1.3648373983739837e-05, "loss": 0.0346, "step": 78125 }, { "epoch": 0.9528048780487804, "grad_norm": 0.9106545448303223, "learning_rate": 1.3647967479674797e-05, "loss": 0.0484, "step": 78130 }, { "epoch": 0.9528658536585366, "grad_norm": 1.7885884046554565, "learning_rate": 1.3647560975609757e-05, "loss": 0.0523, "step": 78135 }, { "epoch": 0.9529268292682926, "grad_norm": 0.5547187924385071, "learning_rate": 1.3647154471544717e-05, "loss": 0.0419, "step": 78140 }, { "epoch": 0.9529878048780488, "grad_norm": 0.7371909618377686, "learning_rate": 1.3646747967479677e-05, "loss": 0.039, "step": 78145 }, { "epoch": 0.9530487804878048, "grad_norm": 0.5438973307609558, "learning_rate": 1.3646341463414636e-05, "loss": 0.0356, "step": 78150 }, { "epoch": 0.953109756097561, "grad_norm": 0.93754643201828, "learning_rate": 1.3645934959349595e-05, "loss": 0.0324, "step": 78155 }, { "epoch": 0.953170731707317, "grad_norm": 0.29077431559562683, "learning_rate": 1.3645528455284553e-05, "loss": 0.0337, "step": 78160 }, { "epoch": 0.9532317073170732, "grad_norm": 0.29237186908721924, "learning_rate": 1.3645121951219513e-05, "loss": 0.0241, "step": 78165 }, { "epoch": 0.9532926829268292, "grad_norm": 0.3931988775730133, "learning_rate": 1.3644715447154472e-05, "loss": 0.0379, "step": 78170 }, { "epoch": 0.9533536585365854, "grad_norm": 0.3612883687019348, "learning_rate": 1.3644308943089432e-05, "loss": 0.0406, "step": 78175 }, { "epoch": 0.9534146341463414, "grad_norm": 0.3271612823009491, "learning_rate": 1.3643902439024392e-05, "loss": 0.035, "step": 78180 }, { "epoch": 0.9534756097560976, "grad_norm": 0.8607773780822754, "learning_rate": 1.364349593495935e-05, "loss": 0.0592, "step": 78185 }, { "epoch": 0.9535365853658536, "grad_norm": 0.6426246762275696, "learning_rate": 1.364308943089431e-05, "loss": 0.2403, "step": 78190 }, { "epoch": 0.9535975609756098, "grad_norm": 0.36040258407592773, "learning_rate": 1.364268292682927e-05, "loss": 0.0656, "step": 78195 }, { "epoch": 0.9536585365853658, "grad_norm": 1.2484673261642456, "learning_rate": 1.364227642276423e-05, "loss": 0.0358, "step": 78200 }, { "epoch": 0.953719512195122, "grad_norm": 0.6154747605323792, "learning_rate": 1.364186991869919e-05, "loss": 0.0416, "step": 78205 }, { "epoch": 0.953780487804878, "grad_norm": 0.5430963635444641, "learning_rate": 1.364146341463415e-05, "loss": 0.0698, "step": 78210 }, { "epoch": 0.9538414634146342, "grad_norm": 0.1737893521785736, "learning_rate": 1.3641056910569106e-05, "loss": 0.0134, "step": 78215 }, { "epoch": 0.9539024390243902, "grad_norm": 0.6317397356033325, "learning_rate": 1.3640650406504065e-05, "loss": 0.0459, "step": 78220 }, { "epoch": 0.9539634146341464, "grad_norm": 0.43410447239875793, "learning_rate": 1.3640243902439025e-05, "loss": 0.0545, "step": 78225 }, { "epoch": 0.9540243902439024, "grad_norm": 0.49196529388427734, "learning_rate": 1.3639837398373985e-05, "loss": 0.0552, "step": 78230 }, { "epoch": 0.9540853658536586, "grad_norm": 0.6577086448669434, "learning_rate": 1.3639430894308945e-05, "loss": 0.0438, "step": 78235 }, { "epoch": 0.9541463414634146, "grad_norm": 0.4130421280860901, "learning_rate": 1.3639024390243905e-05, "loss": 0.0198, "step": 78240 }, { "epoch": 0.9542073170731707, "grad_norm": 2.0074963569641113, "learning_rate": 1.3638617886178863e-05, "loss": 0.0612, "step": 78245 }, { "epoch": 0.9542682926829268, "grad_norm": 0.5176106095314026, "learning_rate": 1.3638211382113821e-05, "loss": 0.0182, "step": 78250 }, { "epoch": 0.9543292682926829, "grad_norm": 0.7824530005455017, "learning_rate": 1.3637804878048781e-05, "loss": 0.0423, "step": 78255 }, { "epoch": 0.954390243902439, "grad_norm": 0.33592885732650757, "learning_rate": 1.363739837398374e-05, "loss": 0.0212, "step": 78260 }, { "epoch": 0.9544512195121951, "grad_norm": 0.18841300904750824, "learning_rate": 1.36369918699187e-05, "loss": 0.0173, "step": 78265 }, { "epoch": 0.9545121951219512, "grad_norm": 0.30040857195854187, "learning_rate": 1.363658536585366e-05, "loss": 0.0341, "step": 78270 }, { "epoch": 0.9545731707317073, "grad_norm": 0.5818082690238953, "learning_rate": 1.3636178861788618e-05, "loss": 0.0262, "step": 78275 }, { "epoch": 0.9546341463414634, "grad_norm": 0.4514407813549042, "learning_rate": 1.3635772357723578e-05, "loss": 0.0646, "step": 78280 }, { "epoch": 0.9546951219512195, "grad_norm": 0.274882435798645, "learning_rate": 1.3635365853658538e-05, "loss": 0.0297, "step": 78285 }, { "epoch": 0.9547560975609756, "grad_norm": 0.35225191712379456, "learning_rate": 1.3634959349593498e-05, "loss": 0.0269, "step": 78290 }, { "epoch": 0.9548170731707317, "grad_norm": 0.47901859879493713, "learning_rate": 1.3634552845528458e-05, "loss": 0.0231, "step": 78295 }, { "epoch": 0.9548780487804878, "grad_norm": 0.7152422070503235, "learning_rate": 1.3634146341463418e-05, "loss": 0.0295, "step": 78300 }, { "epoch": 0.9549390243902439, "grad_norm": 0.6149860620498657, "learning_rate": 1.3633739837398374e-05, "loss": 0.037, "step": 78305 }, { "epoch": 0.955, "grad_norm": 0.4674786627292633, "learning_rate": 1.3633333333333334e-05, "loss": 0.0378, "step": 78310 }, { "epoch": 0.9550609756097561, "grad_norm": 0.3280130624771118, "learning_rate": 1.3632926829268294e-05, "loss": 0.0305, "step": 78315 }, { "epoch": 0.9551219512195122, "grad_norm": 0.578759491443634, "learning_rate": 1.3632520325203253e-05, "loss": 0.0251, "step": 78320 }, { "epoch": 0.9551829268292683, "grad_norm": 0.32572466135025024, "learning_rate": 1.3632113821138213e-05, "loss": 0.0301, "step": 78325 }, { "epoch": 0.9552439024390244, "grad_norm": 1.3407386541366577, "learning_rate": 1.3631707317073173e-05, "loss": 0.0781, "step": 78330 }, { "epoch": 0.9553048780487805, "grad_norm": 0.6047645211219788, "learning_rate": 1.363130081300813e-05, "loss": 0.0486, "step": 78335 }, { "epoch": 0.9553658536585365, "grad_norm": 0.4719371795654297, "learning_rate": 1.363089430894309e-05, "loss": 0.0412, "step": 78340 }, { "epoch": 0.9554268292682927, "grad_norm": 0.9303117394447327, "learning_rate": 1.363048780487805e-05, "loss": 0.0777, "step": 78345 }, { "epoch": 0.9554878048780487, "grad_norm": 1.2164322137832642, "learning_rate": 1.3630081300813009e-05, "loss": 0.0381, "step": 78350 }, { "epoch": 0.9555487804878049, "grad_norm": 0.476598858833313, "learning_rate": 1.3629674796747969e-05, "loss": 0.0532, "step": 78355 }, { "epoch": 0.9556097560975609, "grad_norm": 1.2809995412826538, "learning_rate": 1.3629268292682929e-05, "loss": 0.0911, "step": 78360 }, { "epoch": 0.9556707317073171, "grad_norm": 0.5639530420303345, "learning_rate": 1.3628861788617887e-05, "loss": 0.0501, "step": 78365 }, { "epoch": 0.9557317073170731, "grad_norm": 0.5363977551460266, "learning_rate": 1.3628455284552847e-05, "loss": 0.0273, "step": 78370 }, { "epoch": 0.9557926829268293, "grad_norm": 0.5009174942970276, "learning_rate": 1.3628048780487806e-05, "loss": 0.0325, "step": 78375 }, { "epoch": 0.9558536585365853, "grad_norm": 0.5306530594825745, "learning_rate": 1.3627642276422766e-05, "loss": 0.0698, "step": 78380 }, { "epoch": 0.9559146341463415, "grad_norm": 0.6611799597740173, "learning_rate": 1.3627235772357726e-05, "loss": 0.0441, "step": 78385 }, { "epoch": 0.9559756097560975, "grad_norm": 0.3343203365802765, "learning_rate": 1.3626829268292686e-05, "loss": 0.0262, "step": 78390 }, { "epoch": 0.9560365853658537, "grad_norm": 0.29541581869125366, "learning_rate": 1.3626422764227642e-05, "loss": 0.0336, "step": 78395 }, { "epoch": 0.9560975609756097, "grad_norm": 0.8820011615753174, "learning_rate": 1.3626016260162602e-05, "loss": 0.048, "step": 78400 }, { "epoch": 0.9561585365853659, "grad_norm": 0.8007094860076904, "learning_rate": 1.3625609756097562e-05, "loss": 0.0464, "step": 78405 }, { "epoch": 0.9562195121951219, "grad_norm": 0.4816732108592987, "learning_rate": 1.3625203252032522e-05, "loss": 0.0366, "step": 78410 }, { "epoch": 0.9562804878048781, "grad_norm": 0.4271865785121918, "learning_rate": 1.3624796747967482e-05, "loss": 0.0319, "step": 78415 }, { "epoch": 0.9563414634146341, "grad_norm": 0.6362743377685547, "learning_rate": 1.3624390243902441e-05, "loss": 0.1034, "step": 78420 }, { "epoch": 0.9564024390243903, "grad_norm": 0.4604356586933136, "learning_rate": 1.3623983739837398e-05, "loss": 0.0327, "step": 78425 }, { "epoch": 0.9564634146341463, "grad_norm": 0.357835590839386, "learning_rate": 1.3623577235772358e-05, "loss": 0.0146, "step": 78430 }, { "epoch": 0.9565243902439025, "grad_norm": 0.5493260622024536, "learning_rate": 1.3623170731707317e-05, "loss": 0.0251, "step": 78435 }, { "epoch": 0.9565853658536585, "grad_norm": 0.5199363827705383, "learning_rate": 1.3622764227642277e-05, "loss": 0.0433, "step": 78440 }, { "epoch": 0.9566463414634147, "grad_norm": 0.35515445470809937, "learning_rate": 1.3622357723577237e-05, "loss": 0.1108, "step": 78445 }, { "epoch": 0.9567073170731707, "grad_norm": 0.30851060152053833, "learning_rate": 1.3621951219512197e-05, "loss": 0.0168, "step": 78450 }, { "epoch": 0.9567682926829268, "grad_norm": 0.3244292140007019, "learning_rate": 1.3621544715447155e-05, "loss": 0.0322, "step": 78455 }, { "epoch": 0.9568292682926829, "grad_norm": 0.6564730405807495, "learning_rate": 1.3621138211382115e-05, "loss": 0.038, "step": 78460 }, { "epoch": 0.956890243902439, "grad_norm": 0.2738577425479889, "learning_rate": 1.3620731707317075e-05, "loss": 0.0362, "step": 78465 }, { "epoch": 0.9569512195121951, "grad_norm": 0.37367984652519226, "learning_rate": 1.3620325203252035e-05, "loss": 0.0699, "step": 78470 }, { "epoch": 0.9570121951219512, "grad_norm": 0.4231763780117035, "learning_rate": 1.3619918699186994e-05, "loss": 0.0339, "step": 78475 }, { "epoch": 0.9570731707317073, "grad_norm": 0.6607648730278015, "learning_rate": 1.3619512195121953e-05, "loss": 0.0298, "step": 78480 }, { "epoch": 0.9571341463414634, "grad_norm": 0.2601023316383362, "learning_rate": 1.361910569105691e-05, "loss": 0.0326, "step": 78485 }, { "epoch": 0.9571951219512195, "grad_norm": 0.4879853427410126, "learning_rate": 1.361869918699187e-05, "loss": 0.0314, "step": 78490 }, { "epoch": 0.9572560975609756, "grad_norm": 0.230246439576149, "learning_rate": 1.361829268292683e-05, "loss": 0.0332, "step": 78495 }, { "epoch": 0.9573170731707317, "grad_norm": 0.8005183935165405, "learning_rate": 1.361788617886179e-05, "loss": 0.0506, "step": 78500 }, { "epoch": 0.9573780487804878, "grad_norm": 2.468062162399292, "learning_rate": 1.361747967479675e-05, "loss": 0.0627, "step": 78505 }, { "epoch": 0.9574390243902439, "grad_norm": 1.0922707319259644, "learning_rate": 1.361707317073171e-05, "loss": 0.0337, "step": 78510 }, { "epoch": 0.9575, "grad_norm": 0.516727864742279, "learning_rate": 1.3616666666666666e-05, "loss": 0.0576, "step": 78515 }, { "epoch": 0.9575609756097561, "grad_norm": 0.3716101050376892, "learning_rate": 1.3616260162601626e-05, "loss": 0.0249, "step": 78520 }, { "epoch": 0.9576219512195122, "grad_norm": 0.2565458118915558, "learning_rate": 1.3615853658536586e-05, "loss": 0.0321, "step": 78525 }, { "epoch": 0.9576829268292683, "grad_norm": 0.3299415707588196, "learning_rate": 1.3615447154471546e-05, "loss": 0.0522, "step": 78530 }, { "epoch": 0.9577439024390244, "grad_norm": 0.3607569634914398, "learning_rate": 1.3615040650406505e-05, "loss": 0.0646, "step": 78535 }, { "epoch": 0.9578048780487805, "grad_norm": 0.7530606389045715, "learning_rate": 1.3614634146341465e-05, "loss": 0.0383, "step": 78540 }, { "epoch": 0.9578658536585366, "grad_norm": 0.2673322856426239, "learning_rate": 1.3614227642276423e-05, "loss": 0.0297, "step": 78545 }, { "epoch": 0.9579268292682926, "grad_norm": 0.9319897294044495, "learning_rate": 1.3613821138211383e-05, "loss": 0.0347, "step": 78550 }, { "epoch": 0.9579878048780488, "grad_norm": 0.5650102496147156, "learning_rate": 1.3613414634146343e-05, "loss": 0.0377, "step": 78555 }, { "epoch": 0.9580487804878048, "grad_norm": 1.3260855674743652, "learning_rate": 1.3613008130081303e-05, "loss": 0.0388, "step": 78560 }, { "epoch": 0.958109756097561, "grad_norm": 0.5793141722679138, "learning_rate": 1.3612601626016263e-05, "loss": 0.0382, "step": 78565 }, { "epoch": 0.958170731707317, "grad_norm": 0.4141295850276947, "learning_rate": 1.361219512195122e-05, "loss": 0.0304, "step": 78570 }, { "epoch": 0.9582317073170732, "grad_norm": 0.5399338603019714, "learning_rate": 1.3611788617886179e-05, "loss": 0.0421, "step": 78575 }, { "epoch": 0.9582926829268292, "grad_norm": 1.0548824071884155, "learning_rate": 1.3611382113821139e-05, "loss": 0.0537, "step": 78580 }, { "epoch": 0.9583536585365854, "grad_norm": 0.6202613711357117, "learning_rate": 1.3610975609756099e-05, "loss": 0.0405, "step": 78585 }, { "epoch": 0.9584146341463414, "grad_norm": 0.39278358221054077, "learning_rate": 1.3610569105691058e-05, "loss": 0.027, "step": 78590 }, { "epoch": 0.9584756097560976, "grad_norm": 0.694172739982605, "learning_rate": 1.3610162601626018e-05, "loss": 0.0144, "step": 78595 }, { "epoch": 0.9585365853658536, "grad_norm": 0.6203023195266724, "learning_rate": 1.3609756097560978e-05, "loss": 0.0357, "step": 78600 }, { "epoch": 0.9585975609756098, "grad_norm": 1.03528892993927, "learning_rate": 1.3609349593495934e-05, "loss": 0.0384, "step": 78605 }, { "epoch": 0.9586585365853658, "grad_norm": 0.3351523280143738, "learning_rate": 1.3608943089430894e-05, "loss": 0.0337, "step": 78610 }, { "epoch": 0.958719512195122, "grad_norm": 0.4290855824947357, "learning_rate": 1.3608536585365854e-05, "loss": 0.0442, "step": 78615 }, { "epoch": 0.958780487804878, "grad_norm": 0.3938213288784027, "learning_rate": 1.3608130081300814e-05, "loss": 0.0301, "step": 78620 }, { "epoch": 0.9588414634146342, "grad_norm": 2.2613213062286377, "learning_rate": 1.3607723577235774e-05, "loss": 0.045, "step": 78625 }, { "epoch": 0.9589024390243902, "grad_norm": 0.504765510559082, "learning_rate": 1.3607317073170734e-05, "loss": 0.0757, "step": 78630 }, { "epoch": 0.9589634146341464, "grad_norm": 0.6509226560592651, "learning_rate": 1.3606910569105692e-05, "loss": 0.0246, "step": 78635 }, { "epoch": 0.9590243902439024, "grad_norm": 1.4850273132324219, "learning_rate": 1.3606504065040652e-05, "loss": 0.0289, "step": 78640 }, { "epoch": 0.9590853658536586, "grad_norm": 0.4822026193141937, "learning_rate": 1.3606097560975611e-05, "loss": 0.0722, "step": 78645 }, { "epoch": 0.9591463414634146, "grad_norm": 0.6170072555541992, "learning_rate": 1.3605691056910571e-05, "loss": 0.0292, "step": 78650 }, { "epoch": 0.9592073170731708, "grad_norm": 0.4646832048892975, "learning_rate": 1.3605284552845531e-05, "loss": 0.0559, "step": 78655 }, { "epoch": 0.9592682926829268, "grad_norm": 0.7606871724128723, "learning_rate": 1.3604878048780489e-05, "loss": 0.0458, "step": 78660 }, { "epoch": 0.959329268292683, "grad_norm": 0.3827000558376312, "learning_rate": 1.3604471544715447e-05, "loss": 0.0162, "step": 78665 }, { "epoch": 0.959390243902439, "grad_norm": 0.6681001782417297, "learning_rate": 1.3604065040650407e-05, "loss": 0.0235, "step": 78670 }, { "epoch": 0.9594512195121951, "grad_norm": 0.7013302445411682, "learning_rate": 1.3603658536585367e-05, "loss": 0.0337, "step": 78675 }, { "epoch": 0.9595121951219512, "grad_norm": 0.8522825241088867, "learning_rate": 1.3603252032520327e-05, "loss": 0.0516, "step": 78680 }, { "epoch": 0.9595731707317073, "grad_norm": 0.6834943890571594, "learning_rate": 1.3602845528455287e-05, "loss": 0.0428, "step": 78685 }, { "epoch": 0.9596341463414634, "grad_norm": 0.9250059127807617, "learning_rate": 1.3602439024390246e-05, "loss": 0.0207, "step": 78690 }, { "epoch": 0.9596951219512195, "grad_norm": 0.6753360629081726, "learning_rate": 1.3602032520325203e-05, "loss": 0.0252, "step": 78695 }, { "epoch": 0.9597560975609756, "grad_norm": 0.2675575613975525, "learning_rate": 1.3601626016260163e-05, "loss": 0.0277, "step": 78700 }, { "epoch": 0.9598170731707317, "grad_norm": 0.6490853428840637, "learning_rate": 1.3601219512195122e-05, "loss": 0.0344, "step": 78705 }, { "epoch": 0.9598780487804878, "grad_norm": 0.8207244277000427, "learning_rate": 1.3600813008130082e-05, "loss": 0.054, "step": 78710 }, { "epoch": 0.9599390243902439, "grad_norm": 0.4963778257369995, "learning_rate": 1.3600406504065042e-05, "loss": 0.0623, "step": 78715 }, { "epoch": 0.96, "grad_norm": 0.766783595085144, "learning_rate": 1.3600000000000002e-05, "loss": 0.0321, "step": 78720 }, { "epoch": 0.9600609756097561, "grad_norm": 0.4407590627670288, "learning_rate": 1.359959349593496e-05, "loss": 0.0434, "step": 78725 }, { "epoch": 0.9601219512195122, "grad_norm": 0.28684625029563904, "learning_rate": 1.359918699186992e-05, "loss": 0.0332, "step": 78730 }, { "epoch": 0.9601829268292683, "grad_norm": 0.7155224680900574, "learning_rate": 1.359878048780488e-05, "loss": 0.0437, "step": 78735 }, { "epoch": 0.9602439024390244, "grad_norm": 0.3993743658065796, "learning_rate": 1.359837398373984e-05, "loss": 0.0276, "step": 78740 }, { "epoch": 0.9603048780487805, "grad_norm": 1.0656392574310303, "learning_rate": 1.3597967479674798e-05, "loss": 0.0424, "step": 78745 }, { "epoch": 0.9603658536585366, "grad_norm": 0.6386241912841797, "learning_rate": 1.3597560975609757e-05, "loss": 0.057, "step": 78750 }, { "epoch": 0.9604268292682927, "grad_norm": 1.0418446063995361, "learning_rate": 1.3597154471544716e-05, "loss": 0.073, "step": 78755 }, { "epoch": 0.9604878048780487, "grad_norm": 0.3820689916610718, "learning_rate": 1.3596747967479675e-05, "loss": 0.0276, "step": 78760 }, { "epoch": 0.9605487804878049, "grad_norm": 0.3715401887893677, "learning_rate": 1.3596341463414635e-05, "loss": 0.0281, "step": 78765 }, { "epoch": 0.9606097560975609, "grad_norm": 0.4235002398490906, "learning_rate": 1.3595934959349595e-05, "loss": 0.0326, "step": 78770 }, { "epoch": 0.9606707317073171, "grad_norm": 0.6580033898353577, "learning_rate": 1.3595528455284555e-05, "loss": 0.0427, "step": 78775 }, { "epoch": 0.9607317073170731, "grad_norm": 0.7395763993263245, "learning_rate": 1.3595121951219515e-05, "loss": 0.0424, "step": 78780 }, { "epoch": 0.9607926829268293, "grad_norm": 0.4307340681552887, "learning_rate": 1.3594715447154471e-05, "loss": 0.0278, "step": 78785 }, { "epoch": 0.9608536585365853, "grad_norm": 0.31044942140579224, "learning_rate": 1.3594308943089431e-05, "loss": 0.0282, "step": 78790 }, { "epoch": 0.9609146341463415, "grad_norm": 1.366593360900879, "learning_rate": 1.359390243902439e-05, "loss": 0.0361, "step": 78795 }, { "epoch": 0.9609756097560975, "grad_norm": 0.5615267753601074, "learning_rate": 1.359349593495935e-05, "loss": 0.0378, "step": 78800 }, { "epoch": 0.9610365853658537, "grad_norm": 0.32927772402763367, "learning_rate": 1.359308943089431e-05, "loss": 0.0341, "step": 78805 }, { "epoch": 0.9610975609756097, "grad_norm": 0.2569752037525177, "learning_rate": 1.359268292682927e-05, "loss": 0.0264, "step": 78810 }, { "epoch": 0.9611585365853659, "grad_norm": 0.5782620906829834, "learning_rate": 1.3592276422764228e-05, "loss": 0.0202, "step": 78815 }, { "epoch": 0.9612195121951219, "grad_norm": 0.2159072756767273, "learning_rate": 1.3591869918699188e-05, "loss": 0.0289, "step": 78820 }, { "epoch": 0.9612804878048781, "grad_norm": 0.49828311800956726, "learning_rate": 1.3591463414634148e-05, "loss": 0.0273, "step": 78825 }, { "epoch": 0.9613414634146341, "grad_norm": 0.4526404142379761, "learning_rate": 1.3591056910569108e-05, "loss": 0.0632, "step": 78830 }, { "epoch": 0.9614024390243903, "grad_norm": 0.2647649049758911, "learning_rate": 1.3590650406504066e-05, "loss": 0.0244, "step": 78835 }, { "epoch": 0.9614634146341463, "grad_norm": 0.6730732917785645, "learning_rate": 1.3590243902439026e-05, "loss": 0.0451, "step": 78840 }, { "epoch": 0.9615243902439025, "grad_norm": 0.19335873425006866, "learning_rate": 1.3589837398373984e-05, "loss": 0.0273, "step": 78845 }, { "epoch": 0.9615853658536585, "grad_norm": 1.022687315940857, "learning_rate": 1.3589430894308944e-05, "loss": 0.0286, "step": 78850 }, { "epoch": 0.9616463414634147, "grad_norm": 0.9608629941940308, "learning_rate": 1.3589024390243904e-05, "loss": 0.0303, "step": 78855 }, { "epoch": 0.9617073170731707, "grad_norm": 0.5349416136741638, "learning_rate": 1.3588617886178863e-05, "loss": 0.0461, "step": 78860 }, { "epoch": 0.9617682926829269, "grad_norm": 0.8811460733413696, "learning_rate": 1.3588211382113823e-05, "loss": 0.0294, "step": 78865 }, { "epoch": 0.9618292682926829, "grad_norm": 0.7117552161216736, "learning_rate": 1.3587804878048783e-05, "loss": 0.0373, "step": 78870 }, { "epoch": 0.961890243902439, "grad_norm": 0.567345917224884, "learning_rate": 1.358739837398374e-05, "loss": 0.0506, "step": 78875 }, { "epoch": 0.9619512195121951, "grad_norm": 0.5957660675048828, "learning_rate": 1.35869918699187e-05, "loss": 0.02, "step": 78880 }, { "epoch": 0.9620121951219512, "grad_norm": 0.1242995485663414, "learning_rate": 1.3586585365853659e-05, "loss": 0.0475, "step": 78885 }, { "epoch": 0.9620731707317073, "grad_norm": 0.2906372845172882, "learning_rate": 1.3586178861788619e-05, "loss": 0.0398, "step": 78890 }, { "epoch": 0.9621341463414634, "grad_norm": 0.511680543422699, "learning_rate": 1.3585772357723579e-05, "loss": 0.0365, "step": 78895 }, { "epoch": 0.9621951219512195, "grad_norm": 0.8107993602752686, "learning_rate": 1.3585365853658539e-05, "loss": 0.0394, "step": 78900 }, { "epoch": 0.9622560975609756, "grad_norm": 0.32455974817276, "learning_rate": 1.3584959349593497e-05, "loss": 0.028, "step": 78905 }, { "epoch": 0.9623170731707317, "grad_norm": 0.541478157043457, "learning_rate": 1.3584552845528457e-05, "loss": 0.0305, "step": 78910 }, { "epoch": 0.9623780487804878, "grad_norm": 1.0092326402664185, "learning_rate": 1.3584146341463416e-05, "loss": 0.0508, "step": 78915 }, { "epoch": 0.9624390243902439, "grad_norm": 0.5743931531906128, "learning_rate": 1.3583739837398376e-05, "loss": 0.0344, "step": 78920 }, { "epoch": 0.9625, "grad_norm": 0.47535303235054016, "learning_rate": 1.3583333333333334e-05, "loss": 0.0441, "step": 78925 }, { "epoch": 0.9625609756097561, "grad_norm": 0.4325786232948303, "learning_rate": 1.3582926829268294e-05, "loss": 0.0384, "step": 78930 }, { "epoch": 0.9626219512195122, "grad_norm": 0.6156640648841858, "learning_rate": 1.3582520325203252e-05, "loss": 0.0435, "step": 78935 }, { "epoch": 0.9626829268292683, "grad_norm": 0.4325966238975525, "learning_rate": 1.3582113821138212e-05, "loss": 0.051, "step": 78940 }, { "epoch": 0.9627439024390244, "grad_norm": 0.6410079598426819, "learning_rate": 1.3581707317073172e-05, "loss": 0.0282, "step": 78945 }, { "epoch": 0.9628048780487805, "grad_norm": 1.025132656097412, "learning_rate": 1.3581300813008132e-05, "loss": 0.0296, "step": 78950 }, { "epoch": 0.9628658536585366, "grad_norm": 0.51177579164505, "learning_rate": 1.3580894308943092e-05, "loss": 0.0415, "step": 78955 }, { "epoch": 0.9629268292682926, "grad_norm": 0.6711846590042114, "learning_rate": 1.3580487804878051e-05, "loss": 0.0327, "step": 78960 }, { "epoch": 0.9629878048780488, "grad_norm": 1.7547087669372559, "learning_rate": 1.3580081300813008e-05, "loss": 0.0304, "step": 78965 }, { "epoch": 0.9630487804878048, "grad_norm": 3.4496090412139893, "learning_rate": 1.3579674796747968e-05, "loss": 0.0668, "step": 78970 }, { "epoch": 0.963109756097561, "grad_norm": 1.8420273065567017, "learning_rate": 1.3579268292682927e-05, "loss": 0.0292, "step": 78975 }, { "epoch": 0.963170731707317, "grad_norm": 0.4048767685890198, "learning_rate": 1.3578861788617887e-05, "loss": 0.045, "step": 78980 }, { "epoch": 0.9632317073170732, "grad_norm": 0.6540517807006836, "learning_rate": 1.3578455284552847e-05, "loss": 0.0503, "step": 78985 }, { "epoch": 0.9632926829268292, "grad_norm": 0.36931028962135315, "learning_rate": 1.3578048780487807e-05, "loss": 0.0384, "step": 78990 }, { "epoch": 0.9633536585365854, "grad_norm": 0.7502772212028503, "learning_rate": 1.3577642276422765e-05, "loss": 0.0347, "step": 78995 }, { "epoch": 0.9634146341463414, "grad_norm": 0.32129326462745667, "learning_rate": 1.3577235772357725e-05, "loss": 0.047, "step": 79000 }, { "epoch": 0.9634756097560976, "grad_norm": 1.329059362411499, "learning_rate": 1.3576829268292685e-05, "loss": 0.0364, "step": 79005 }, { "epoch": 0.9635365853658536, "grad_norm": 0.1886812448501587, "learning_rate": 1.3576422764227643e-05, "loss": 0.0236, "step": 79010 }, { "epoch": 0.9635975609756098, "grad_norm": 0.3354533612728119, "learning_rate": 1.3576016260162603e-05, "loss": 0.0262, "step": 79015 }, { "epoch": 0.9636585365853658, "grad_norm": 0.3242012858390808, "learning_rate": 1.3575609756097562e-05, "loss": 0.0387, "step": 79020 }, { "epoch": 0.963719512195122, "grad_norm": 0.6855171918869019, "learning_rate": 1.357520325203252e-05, "loss": 0.0524, "step": 79025 }, { "epoch": 0.963780487804878, "grad_norm": 0.5808914303779602, "learning_rate": 1.357479674796748e-05, "loss": 0.0369, "step": 79030 }, { "epoch": 0.9638414634146342, "grad_norm": 0.4401531219482422, "learning_rate": 1.357439024390244e-05, "loss": 0.0644, "step": 79035 }, { "epoch": 0.9639024390243902, "grad_norm": 0.32404765486717224, "learning_rate": 1.35739837398374e-05, "loss": 0.0481, "step": 79040 }, { "epoch": 0.9639634146341464, "grad_norm": 0.4214348793029785, "learning_rate": 1.357357723577236e-05, "loss": 0.0578, "step": 79045 }, { "epoch": 0.9640243902439024, "grad_norm": 0.40662920475006104, "learning_rate": 1.357317073170732e-05, "loss": 0.0317, "step": 79050 }, { "epoch": 0.9640853658536586, "grad_norm": 0.37066200375556946, "learning_rate": 1.3572764227642276e-05, "loss": 0.0393, "step": 79055 }, { "epoch": 0.9641463414634146, "grad_norm": 0.49991822242736816, "learning_rate": 1.3572357723577236e-05, "loss": 0.0272, "step": 79060 }, { "epoch": 0.9642073170731708, "grad_norm": 0.35515058040618896, "learning_rate": 1.3571951219512196e-05, "loss": 0.0356, "step": 79065 }, { "epoch": 0.9642682926829268, "grad_norm": 0.502199113368988, "learning_rate": 1.3571544715447156e-05, "loss": 0.0284, "step": 79070 }, { "epoch": 0.964329268292683, "grad_norm": 0.5843703746795654, "learning_rate": 1.3571138211382115e-05, "loss": 0.0359, "step": 79075 }, { "epoch": 0.964390243902439, "grad_norm": 0.4208025634288788, "learning_rate": 1.3570731707317075e-05, "loss": 0.0247, "step": 79080 }, { "epoch": 0.9644512195121951, "grad_norm": 0.8100994229316711, "learning_rate": 1.3570325203252033e-05, "loss": 0.046, "step": 79085 }, { "epoch": 0.9645121951219512, "grad_norm": 1.5513488054275513, "learning_rate": 1.3569918699186993e-05, "loss": 0.0319, "step": 79090 }, { "epoch": 0.9645731707317073, "grad_norm": 0.7049974799156189, "learning_rate": 1.3569512195121953e-05, "loss": 0.0497, "step": 79095 }, { "epoch": 0.9646341463414634, "grad_norm": 0.4257819950580597, "learning_rate": 1.3569105691056911e-05, "loss": 0.032, "step": 79100 }, { "epoch": 0.9646951219512195, "grad_norm": 0.4175812304019928, "learning_rate": 1.3568699186991871e-05, "loss": 0.063, "step": 79105 }, { "epoch": 0.9647560975609756, "grad_norm": 0.8315497636795044, "learning_rate": 1.356829268292683e-05, "loss": 0.0587, "step": 79110 }, { "epoch": 0.9648170731707317, "grad_norm": 0.5510340929031372, "learning_rate": 1.3567886178861789e-05, "loss": 0.0305, "step": 79115 }, { "epoch": 0.9648780487804878, "grad_norm": 0.2509614825248718, "learning_rate": 1.3567479674796749e-05, "loss": 0.0317, "step": 79120 }, { "epoch": 0.9649390243902439, "grad_norm": 0.6148128509521484, "learning_rate": 1.3567073170731709e-05, "loss": 0.0409, "step": 79125 }, { "epoch": 0.965, "grad_norm": 0.6861087679862976, "learning_rate": 1.3566666666666668e-05, "loss": 0.0532, "step": 79130 }, { "epoch": 0.9650609756097561, "grad_norm": 0.40898647904396057, "learning_rate": 1.3566260162601628e-05, "loss": 0.0181, "step": 79135 }, { "epoch": 0.9651219512195122, "grad_norm": 0.36204975843429565, "learning_rate": 1.3565853658536588e-05, "loss": 0.0192, "step": 79140 }, { "epoch": 0.9651829268292683, "grad_norm": 0.6798118352890015, "learning_rate": 1.3565447154471544e-05, "loss": 0.0439, "step": 79145 }, { "epoch": 0.9652439024390244, "grad_norm": 0.43963494896888733, "learning_rate": 1.3565040650406504e-05, "loss": 0.0279, "step": 79150 }, { "epoch": 0.9653048780487805, "grad_norm": 1.4659453630447388, "learning_rate": 1.3564634146341464e-05, "loss": 0.0703, "step": 79155 }, { "epoch": 0.9653658536585366, "grad_norm": 0.41148191690444946, "learning_rate": 1.3564227642276424e-05, "loss": 0.0428, "step": 79160 }, { "epoch": 0.9654268292682927, "grad_norm": 0.5137409567832947, "learning_rate": 1.3563821138211384e-05, "loss": 0.0427, "step": 79165 }, { "epoch": 0.9654878048780487, "grad_norm": 0.22441640496253967, "learning_rate": 1.3563414634146344e-05, "loss": 0.0276, "step": 79170 }, { "epoch": 0.9655487804878049, "grad_norm": 0.38357892632484436, "learning_rate": 1.3563008130081302e-05, "loss": 0.0498, "step": 79175 }, { "epoch": 0.965609756097561, "grad_norm": 0.7130944728851318, "learning_rate": 1.3562601626016261e-05, "loss": 0.0767, "step": 79180 }, { "epoch": 0.9656707317073171, "grad_norm": 1.2052689790725708, "learning_rate": 1.356219512195122e-05, "loss": 0.0588, "step": 79185 }, { "epoch": 0.9657317073170731, "grad_norm": 0.687640905380249, "learning_rate": 1.356178861788618e-05, "loss": 0.048, "step": 79190 }, { "epoch": 0.9657926829268293, "grad_norm": 0.4241260290145874, "learning_rate": 1.356138211382114e-05, "loss": 0.0356, "step": 79195 }, { "epoch": 0.9658536585365853, "grad_norm": 0.447348028421402, "learning_rate": 1.3560975609756099e-05, "loss": 0.0647, "step": 79200 }, { "epoch": 0.9659146341463415, "grad_norm": 0.429005891084671, "learning_rate": 1.3560569105691057e-05, "loss": 0.0236, "step": 79205 }, { "epoch": 0.9659756097560975, "grad_norm": 0.23323093354701996, "learning_rate": 1.3560162601626017e-05, "loss": 0.0181, "step": 79210 }, { "epoch": 0.9660365853658537, "grad_norm": 0.22838689386844635, "learning_rate": 1.3559756097560977e-05, "loss": 0.0211, "step": 79215 }, { "epoch": 0.9660975609756097, "grad_norm": 0.524541974067688, "learning_rate": 1.3559349593495937e-05, "loss": 0.0223, "step": 79220 }, { "epoch": 0.9661585365853659, "grad_norm": 0.41775763034820557, "learning_rate": 1.3558943089430896e-05, "loss": 0.0147, "step": 79225 }, { "epoch": 0.9662195121951219, "grad_norm": 0.5376932621002197, "learning_rate": 1.3558536585365856e-05, "loss": 0.0256, "step": 79230 }, { "epoch": 0.9662804878048781, "grad_norm": 0.4271497130393982, "learning_rate": 1.3558130081300813e-05, "loss": 0.0427, "step": 79235 }, { "epoch": 0.9663414634146341, "grad_norm": 0.6027699708938599, "learning_rate": 1.3557723577235773e-05, "loss": 0.0658, "step": 79240 }, { "epoch": 0.9664024390243903, "grad_norm": 0.3085726797580719, "learning_rate": 1.3557317073170732e-05, "loss": 0.059, "step": 79245 }, { "epoch": 0.9664634146341463, "grad_norm": 0.533311128616333, "learning_rate": 1.3556910569105692e-05, "loss": 0.0251, "step": 79250 }, { "epoch": 0.9665243902439025, "grad_norm": 0.3572448194026947, "learning_rate": 1.3556504065040652e-05, "loss": 0.0321, "step": 79255 }, { "epoch": 0.9665853658536585, "grad_norm": 0.593450665473938, "learning_rate": 1.3556097560975612e-05, "loss": 0.06, "step": 79260 }, { "epoch": 0.9666463414634147, "grad_norm": 1.6865829229354858, "learning_rate": 1.355569105691057e-05, "loss": 0.0521, "step": 79265 }, { "epoch": 0.9667073170731707, "grad_norm": 0.6509189605712891, "learning_rate": 1.355528455284553e-05, "loss": 0.0504, "step": 79270 }, { "epoch": 0.9667682926829269, "grad_norm": 0.6891764402389526, "learning_rate": 1.3554878048780488e-05, "loss": 0.0559, "step": 79275 }, { "epoch": 0.9668292682926829, "grad_norm": 0.4412417411804199, "learning_rate": 1.3554471544715448e-05, "loss": 0.0546, "step": 79280 }, { "epoch": 0.966890243902439, "grad_norm": 0.7726796269416809, "learning_rate": 1.3554065040650408e-05, "loss": 0.045, "step": 79285 }, { "epoch": 0.9669512195121951, "grad_norm": 0.5551275014877319, "learning_rate": 1.3553658536585367e-05, "loss": 0.0439, "step": 79290 }, { "epoch": 0.9670121951219512, "grad_norm": 0.8375838994979858, "learning_rate": 1.3553252032520326e-05, "loss": 0.0324, "step": 79295 }, { "epoch": 0.9670731707317073, "grad_norm": 0.4976601004600525, "learning_rate": 1.3552845528455285e-05, "loss": 0.037, "step": 79300 }, { "epoch": 0.9671341463414634, "grad_norm": 0.3783376216888428, "learning_rate": 1.3552439024390245e-05, "loss": 0.0441, "step": 79305 }, { "epoch": 0.9671951219512195, "grad_norm": 0.16685692965984344, "learning_rate": 1.3552032520325205e-05, "loss": 0.0355, "step": 79310 }, { "epoch": 0.9672560975609756, "grad_norm": 0.6146019697189331, "learning_rate": 1.3551626016260165e-05, "loss": 0.0472, "step": 79315 }, { "epoch": 0.9673170731707317, "grad_norm": 0.5861569046974182, "learning_rate": 1.3551219512195125e-05, "loss": 0.0241, "step": 79320 }, { "epoch": 0.9673780487804878, "grad_norm": 0.5851652026176453, "learning_rate": 1.3550813008130081e-05, "loss": 0.042, "step": 79325 }, { "epoch": 0.9674390243902439, "grad_norm": 0.7099328637123108, "learning_rate": 1.3550406504065041e-05, "loss": 0.0429, "step": 79330 }, { "epoch": 0.9675, "grad_norm": 0.28875094652175903, "learning_rate": 1.355e-05, "loss": 0.0402, "step": 79335 }, { "epoch": 0.9675609756097561, "grad_norm": 0.46124765276908875, "learning_rate": 1.354959349593496e-05, "loss": 0.036, "step": 79340 }, { "epoch": 0.9676219512195122, "grad_norm": 0.47582077980041504, "learning_rate": 1.354918699186992e-05, "loss": 0.0786, "step": 79345 }, { "epoch": 0.9676829268292683, "grad_norm": 0.6777867078781128, "learning_rate": 1.354878048780488e-05, "loss": 0.0916, "step": 79350 }, { "epoch": 0.9677439024390244, "grad_norm": 0.2775273621082306, "learning_rate": 1.3548373983739838e-05, "loss": 0.0364, "step": 79355 }, { "epoch": 0.9678048780487805, "grad_norm": 0.3831324875354767, "learning_rate": 1.3547967479674798e-05, "loss": 0.0489, "step": 79360 }, { "epoch": 0.9678658536585366, "grad_norm": 0.5064945220947266, "learning_rate": 1.3547560975609756e-05, "loss": 0.0388, "step": 79365 }, { "epoch": 0.9679268292682927, "grad_norm": 3.8372082710266113, "learning_rate": 1.3547154471544716e-05, "loss": 0.0553, "step": 79370 }, { "epoch": 0.9679878048780488, "grad_norm": 0.19154265522956848, "learning_rate": 1.3546747967479676e-05, "loss": 0.0364, "step": 79375 }, { "epoch": 0.9680487804878048, "grad_norm": 0.5484025478363037, "learning_rate": 1.3546341463414636e-05, "loss": 0.0392, "step": 79380 }, { "epoch": 0.968109756097561, "grad_norm": 1.0188021659851074, "learning_rate": 1.3545934959349594e-05, "loss": 0.0824, "step": 79385 }, { "epoch": 0.968170731707317, "grad_norm": 0.5810921788215637, "learning_rate": 1.3545528455284554e-05, "loss": 0.0491, "step": 79390 }, { "epoch": 0.9682317073170732, "grad_norm": 0.569084644317627, "learning_rate": 1.3545121951219513e-05, "loss": 0.0423, "step": 79395 }, { "epoch": 0.9682926829268292, "grad_norm": 0.36307400465011597, "learning_rate": 1.3544715447154473e-05, "loss": 0.0195, "step": 79400 }, { "epoch": 0.9683536585365854, "grad_norm": 0.5700241923332214, "learning_rate": 1.3544308943089433e-05, "loss": 0.0684, "step": 79405 }, { "epoch": 0.9684146341463414, "grad_norm": 0.43586549162864685, "learning_rate": 1.3543902439024393e-05, "loss": 0.0346, "step": 79410 }, { "epoch": 0.9684756097560976, "grad_norm": 0.4076455533504486, "learning_rate": 1.354349593495935e-05, "loss": 0.0385, "step": 79415 }, { "epoch": 0.9685365853658536, "grad_norm": 0.42413026094436646, "learning_rate": 1.354308943089431e-05, "loss": 0.0206, "step": 79420 }, { "epoch": 0.9685975609756098, "grad_norm": 0.3767073154449463, "learning_rate": 1.3542682926829269e-05, "loss": 0.0318, "step": 79425 }, { "epoch": 0.9686585365853658, "grad_norm": 0.533348023891449, "learning_rate": 1.3542276422764229e-05, "loss": 0.0291, "step": 79430 }, { "epoch": 0.968719512195122, "grad_norm": 0.27560779452323914, "learning_rate": 1.3541869918699189e-05, "loss": 0.0322, "step": 79435 }, { "epoch": 0.968780487804878, "grad_norm": 0.7892652750015259, "learning_rate": 1.3541463414634148e-05, "loss": 0.0691, "step": 79440 }, { "epoch": 0.9688414634146342, "grad_norm": 0.6452347636222839, "learning_rate": 1.3541056910569107e-05, "loss": 0.0626, "step": 79445 }, { "epoch": 0.9689024390243902, "grad_norm": 0.5786581635475159, "learning_rate": 1.3540650406504065e-05, "loss": 0.0422, "step": 79450 }, { "epoch": 0.9689634146341464, "grad_norm": 0.4727212190628052, "learning_rate": 1.3540243902439025e-05, "loss": 0.0401, "step": 79455 }, { "epoch": 0.9690243902439024, "grad_norm": 0.33648204803466797, "learning_rate": 1.3539837398373984e-05, "loss": 0.0235, "step": 79460 }, { "epoch": 0.9690853658536586, "grad_norm": 0.9410169720649719, "learning_rate": 1.3539430894308944e-05, "loss": 0.0578, "step": 79465 }, { "epoch": 0.9691463414634146, "grad_norm": 0.934244692325592, "learning_rate": 1.3539024390243904e-05, "loss": 0.0556, "step": 79470 }, { "epoch": 0.9692073170731708, "grad_norm": 0.6823033094406128, "learning_rate": 1.3538617886178862e-05, "loss": 0.0457, "step": 79475 }, { "epoch": 0.9692682926829268, "grad_norm": 0.25971874594688416, "learning_rate": 1.3538211382113822e-05, "loss": 0.0297, "step": 79480 }, { "epoch": 0.969329268292683, "grad_norm": 0.19903039932250977, "learning_rate": 1.3537804878048782e-05, "loss": 0.0347, "step": 79485 }, { "epoch": 0.969390243902439, "grad_norm": 0.5129262804985046, "learning_rate": 1.3537398373983742e-05, "loss": 0.0412, "step": 79490 }, { "epoch": 0.9694512195121952, "grad_norm": 0.683707594871521, "learning_rate": 1.3536991869918701e-05, "loss": 0.0577, "step": 79495 }, { "epoch": 0.9695121951219512, "grad_norm": 0.7454473972320557, "learning_rate": 1.3536585365853661e-05, "loss": 0.0615, "step": 79500 }, { "epoch": 0.9695731707317073, "grad_norm": 0.7351858615875244, "learning_rate": 1.3536178861788618e-05, "loss": 0.0334, "step": 79505 }, { "epoch": 0.9696341463414634, "grad_norm": 1.051858901977539, "learning_rate": 1.3535772357723578e-05, "loss": 0.0718, "step": 79510 }, { "epoch": 0.9696951219512195, "grad_norm": 0.9907976388931274, "learning_rate": 1.3535365853658537e-05, "loss": 0.0248, "step": 79515 }, { "epoch": 0.9697560975609756, "grad_norm": 0.3948708772659302, "learning_rate": 1.3534959349593497e-05, "loss": 0.0297, "step": 79520 }, { "epoch": 0.9698170731707317, "grad_norm": 0.9549596309661865, "learning_rate": 1.3534552845528457e-05, "loss": 0.0353, "step": 79525 }, { "epoch": 0.9698780487804878, "grad_norm": 0.6910462975502014, "learning_rate": 1.3534146341463417e-05, "loss": 0.0404, "step": 79530 }, { "epoch": 0.9699390243902439, "grad_norm": 0.636650025844574, "learning_rate": 1.3533739837398375e-05, "loss": 0.037, "step": 79535 }, { "epoch": 0.97, "grad_norm": 0.7691659331321716, "learning_rate": 1.3533333333333333e-05, "loss": 0.0302, "step": 79540 }, { "epoch": 0.9700609756097561, "grad_norm": 0.6568459868431091, "learning_rate": 1.3532926829268293e-05, "loss": 0.0575, "step": 79545 }, { "epoch": 0.9701219512195122, "grad_norm": 0.7304339408874512, "learning_rate": 1.3532520325203253e-05, "loss": 0.0359, "step": 79550 }, { "epoch": 0.9701829268292683, "grad_norm": 0.393074095249176, "learning_rate": 1.3532113821138213e-05, "loss": 0.0427, "step": 79555 }, { "epoch": 0.9702439024390244, "grad_norm": 0.6573325991630554, "learning_rate": 1.3531707317073172e-05, "loss": 0.0253, "step": 79560 }, { "epoch": 0.9703048780487805, "grad_norm": 0.5413575172424316, "learning_rate": 1.353130081300813e-05, "loss": 0.0454, "step": 79565 }, { "epoch": 0.9703658536585366, "grad_norm": 0.43099087476730347, "learning_rate": 1.353089430894309e-05, "loss": 0.0412, "step": 79570 }, { "epoch": 0.9704268292682927, "grad_norm": 0.24537676572799683, "learning_rate": 1.353048780487805e-05, "loss": 0.0618, "step": 79575 }, { "epoch": 0.9704878048780488, "grad_norm": 0.28330472111701965, "learning_rate": 1.353008130081301e-05, "loss": 0.0439, "step": 79580 }, { "epoch": 0.9705487804878049, "grad_norm": 0.4927966892719269, "learning_rate": 1.352967479674797e-05, "loss": 0.0385, "step": 79585 }, { "epoch": 0.970609756097561, "grad_norm": 0.6201769709587097, "learning_rate": 1.352926829268293e-05, "loss": 0.074, "step": 79590 }, { "epoch": 0.9706707317073171, "grad_norm": 0.4088303744792938, "learning_rate": 1.3528861788617886e-05, "loss": 0.0514, "step": 79595 }, { "epoch": 0.9707317073170731, "grad_norm": 0.5780530571937561, "learning_rate": 1.3528455284552846e-05, "loss": 0.046, "step": 79600 }, { "epoch": 0.9707926829268293, "grad_norm": 0.5180376172065735, "learning_rate": 1.3528048780487806e-05, "loss": 0.0617, "step": 79605 }, { "epoch": 0.9708536585365853, "grad_norm": 1.1849912405014038, "learning_rate": 1.3527642276422765e-05, "loss": 0.0762, "step": 79610 }, { "epoch": 0.9709146341463415, "grad_norm": 0.8616588711738586, "learning_rate": 1.3527235772357725e-05, "loss": 0.0732, "step": 79615 }, { "epoch": 0.9709756097560975, "grad_norm": 0.8348750472068787, "learning_rate": 1.3526829268292685e-05, "loss": 0.0614, "step": 79620 }, { "epoch": 0.9710365853658537, "grad_norm": 0.46632006764411926, "learning_rate": 1.3526422764227643e-05, "loss": 0.0391, "step": 79625 }, { "epoch": 0.9710975609756097, "grad_norm": 0.2772141396999359, "learning_rate": 1.3526016260162601e-05, "loss": 0.0345, "step": 79630 }, { "epoch": 0.9711585365853659, "grad_norm": 0.25138720870018005, "learning_rate": 1.3525609756097561e-05, "loss": 0.0183, "step": 79635 }, { "epoch": 0.9712195121951219, "grad_norm": 0.5001352429389954, "learning_rate": 1.3525203252032521e-05, "loss": 0.0377, "step": 79640 }, { "epoch": 0.9712804878048781, "grad_norm": 0.47380509972572327, "learning_rate": 1.3524796747967481e-05, "loss": 0.0145, "step": 79645 }, { "epoch": 0.9713414634146341, "grad_norm": 0.6238980889320374, "learning_rate": 1.352439024390244e-05, "loss": 0.0395, "step": 79650 }, { "epoch": 0.9714024390243903, "grad_norm": 2.123264789581299, "learning_rate": 1.3523983739837399e-05, "loss": 0.0516, "step": 79655 }, { "epoch": 0.9714634146341463, "grad_norm": 0.3818240761756897, "learning_rate": 1.3523577235772359e-05, "loss": 0.0349, "step": 79660 }, { "epoch": 0.9715243902439025, "grad_norm": 0.4871208369731903, "learning_rate": 1.3523170731707318e-05, "loss": 0.0278, "step": 79665 }, { "epoch": 0.9715853658536585, "grad_norm": 0.24756400287151337, "learning_rate": 1.3522764227642278e-05, "loss": 0.0155, "step": 79670 }, { "epoch": 0.9716463414634147, "grad_norm": 0.5142735838890076, "learning_rate": 1.3522357723577238e-05, "loss": 0.0403, "step": 79675 }, { "epoch": 0.9717073170731707, "grad_norm": 0.5025383830070496, "learning_rate": 1.3521951219512198e-05, "loss": 0.0345, "step": 79680 }, { "epoch": 0.9717682926829269, "grad_norm": 0.6722396612167358, "learning_rate": 1.3521544715447154e-05, "loss": 0.0347, "step": 79685 }, { "epoch": 0.9718292682926829, "grad_norm": 0.26203757524490356, "learning_rate": 1.3521138211382114e-05, "loss": 0.0192, "step": 79690 }, { "epoch": 0.971890243902439, "grad_norm": 0.8319216966629028, "learning_rate": 1.3520731707317074e-05, "loss": 0.0291, "step": 79695 }, { "epoch": 0.9719512195121951, "grad_norm": 0.6014253497123718, "learning_rate": 1.3520325203252034e-05, "loss": 0.0387, "step": 79700 }, { "epoch": 0.9720121951219513, "grad_norm": 0.6872104406356812, "learning_rate": 1.3519918699186994e-05, "loss": 0.0338, "step": 79705 }, { "epoch": 0.9720731707317073, "grad_norm": 0.7936789393424988, "learning_rate": 1.3519512195121953e-05, "loss": 0.0421, "step": 79710 }, { "epoch": 0.9721341463414634, "grad_norm": 0.5556071996688843, "learning_rate": 1.351910569105691e-05, "loss": 0.0358, "step": 79715 }, { "epoch": 0.9721951219512195, "grad_norm": 0.5237188935279846, "learning_rate": 1.351869918699187e-05, "loss": 0.0444, "step": 79720 }, { "epoch": 0.9722560975609756, "grad_norm": 0.29840049147605896, "learning_rate": 1.351829268292683e-05, "loss": 0.0312, "step": 79725 }, { "epoch": 0.9723170731707317, "grad_norm": 1.799986720085144, "learning_rate": 1.351788617886179e-05, "loss": 0.0437, "step": 79730 }, { "epoch": 0.9723780487804878, "grad_norm": 0.5052334070205688, "learning_rate": 1.351747967479675e-05, "loss": 0.0281, "step": 79735 }, { "epoch": 0.9724390243902439, "grad_norm": 0.727799117565155, "learning_rate": 1.3517073170731709e-05, "loss": 0.0488, "step": 79740 }, { "epoch": 0.9725, "grad_norm": 1.6882761716842651, "learning_rate": 1.3516666666666667e-05, "loss": 0.0256, "step": 79745 }, { "epoch": 0.9725609756097561, "grad_norm": 0.40513697266578674, "learning_rate": 1.3516260162601627e-05, "loss": 0.0336, "step": 79750 }, { "epoch": 0.9726219512195122, "grad_norm": 0.24654637277126312, "learning_rate": 1.3515853658536587e-05, "loss": 0.0383, "step": 79755 }, { "epoch": 0.9726829268292683, "grad_norm": 1.5561052560806274, "learning_rate": 1.3515447154471547e-05, "loss": 0.0214, "step": 79760 }, { "epoch": 0.9727439024390244, "grad_norm": 0.40039777755737305, "learning_rate": 1.3515040650406506e-05, "loss": 0.0177, "step": 79765 }, { "epoch": 0.9728048780487805, "grad_norm": 0.6196580529212952, "learning_rate": 1.3514634146341466e-05, "loss": 0.0333, "step": 79770 }, { "epoch": 0.9728658536585366, "grad_norm": 0.7239316701889038, "learning_rate": 1.3514227642276423e-05, "loss": 0.0538, "step": 79775 }, { "epoch": 0.9729268292682927, "grad_norm": 0.4737304449081421, "learning_rate": 1.3513821138211382e-05, "loss": 0.0387, "step": 79780 }, { "epoch": 0.9729878048780488, "grad_norm": 1.1134402751922607, "learning_rate": 1.3513414634146342e-05, "loss": 0.0605, "step": 79785 }, { "epoch": 0.9730487804878049, "grad_norm": 0.6143026947975159, "learning_rate": 1.3513008130081302e-05, "loss": 0.0202, "step": 79790 }, { "epoch": 0.973109756097561, "grad_norm": 0.6956985592842102, "learning_rate": 1.3512601626016262e-05, "loss": 0.0418, "step": 79795 }, { "epoch": 0.973170731707317, "grad_norm": 0.19240562617778778, "learning_rate": 1.3512195121951222e-05, "loss": 0.0216, "step": 79800 }, { "epoch": 0.9732317073170732, "grad_norm": 1.1235086917877197, "learning_rate": 1.3511788617886178e-05, "loss": 0.0585, "step": 79805 }, { "epoch": 0.9732926829268292, "grad_norm": 0.42092350125312805, "learning_rate": 1.3511382113821138e-05, "loss": 0.0258, "step": 79810 }, { "epoch": 0.9733536585365854, "grad_norm": 0.7993826866149902, "learning_rate": 1.3510975609756098e-05, "loss": 0.0281, "step": 79815 }, { "epoch": 0.9734146341463414, "grad_norm": 0.3526008129119873, "learning_rate": 1.3510569105691058e-05, "loss": 0.0418, "step": 79820 }, { "epoch": 0.9734756097560976, "grad_norm": 0.3559902310371399, "learning_rate": 1.3510162601626018e-05, "loss": 0.0254, "step": 79825 }, { "epoch": 0.9735365853658536, "grad_norm": 0.754090428352356, "learning_rate": 1.3509756097560977e-05, "loss": 0.0411, "step": 79830 }, { "epoch": 0.9735975609756098, "grad_norm": 0.6835768818855286, "learning_rate": 1.3509349593495935e-05, "loss": 0.0264, "step": 79835 }, { "epoch": 0.9736585365853658, "grad_norm": 0.38676917552948, "learning_rate": 1.3508943089430895e-05, "loss": 0.0248, "step": 79840 }, { "epoch": 0.973719512195122, "grad_norm": 0.4472232162952423, "learning_rate": 1.3508536585365855e-05, "loss": 0.0191, "step": 79845 }, { "epoch": 0.973780487804878, "grad_norm": 4.052104949951172, "learning_rate": 1.3508130081300815e-05, "loss": 0.0505, "step": 79850 }, { "epoch": 0.9738414634146342, "grad_norm": 1.0783361196517944, "learning_rate": 1.3507723577235775e-05, "loss": 0.0461, "step": 79855 }, { "epoch": 0.9739024390243902, "grad_norm": 0.33227506279945374, "learning_rate": 1.3507317073170733e-05, "loss": 0.0489, "step": 79860 }, { "epoch": 0.9739634146341464, "grad_norm": 1.1372696161270142, "learning_rate": 1.3506910569105691e-05, "loss": 0.0254, "step": 79865 }, { "epoch": 0.9740243902439024, "grad_norm": 0.39844006299972534, "learning_rate": 1.350650406504065e-05, "loss": 0.0362, "step": 79870 }, { "epoch": 0.9740853658536586, "grad_norm": 0.1334337294101715, "learning_rate": 1.350609756097561e-05, "loss": 0.0364, "step": 79875 }, { "epoch": 0.9741463414634146, "grad_norm": 0.4118150770664215, "learning_rate": 1.350569105691057e-05, "loss": 0.0433, "step": 79880 }, { "epoch": 0.9742073170731708, "grad_norm": 0.3346664607524872, "learning_rate": 1.350528455284553e-05, "loss": 0.0214, "step": 79885 }, { "epoch": 0.9742682926829268, "grad_norm": 0.4537787437438965, "learning_rate": 1.350487804878049e-05, "loss": 0.0482, "step": 79890 }, { "epoch": 0.974329268292683, "grad_norm": 0.3260839283466339, "learning_rate": 1.3504471544715447e-05, "loss": 0.0308, "step": 79895 }, { "epoch": 0.974390243902439, "grad_norm": 0.6994398236274719, "learning_rate": 1.3504065040650406e-05, "loss": 0.0442, "step": 79900 }, { "epoch": 0.9744512195121952, "grad_norm": 1.011873483657837, "learning_rate": 1.3503658536585366e-05, "loss": 0.0476, "step": 79905 }, { "epoch": 0.9745121951219512, "grad_norm": 0.2869718372821808, "learning_rate": 1.3503252032520326e-05, "loss": 0.0244, "step": 79910 }, { "epoch": 0.9745731707317074, "grad_norm": 0.43317070603370667, "learning_rate": 1.3502845528455286e-05, "loss": 0.0246, "step": 79915 }, { "epoch": 0.9746341463414634, "grad_norm": 0.44269993901252747, "learning_rate": 1.3502439024390246e-05, "loss": 0.0429, "step": 79920 }, { "epoch": 0.9746951219512195, "grad_norm": 0.9438230991363525, "learning_rate": 1.3502032520325204e-05, "loss": 0.0194, "step": 79925 }, { "epoch": 0.9747560975609756, "grad_norm": 0.5942339897155762, "learning_rate": 1.3501626016260164e-05, "loss": 0.0342, "step": 79930 }, { "epoch": 0.9748170731707317, "grad_norm": 0.19741655886173248, "learning_rate": 1.3501219512195123e-05, "loss": 0.0306, "step": 79935 }, { "epoch": 0.9748780487804878, "grad_norm": 0.5300714373588562, "learning_rate": 1.3500813008130083e-05, "loss": 0.0607, "step": 79940 }, { "epoch": 0.9749390243902439, "grad_norm": 0.6819072365760803, "learning_rate": 1.3500406504065043e-05, "loss": 0.0345, "step": 79945 }, { "epoch": 0.975, "grad_norm": 0.6984114050865173, "learning_rate": 1.3500000000000001e-05, "loss": 0.0462, "step": 79950 }, { "epoch": 0.9750609756097561, "grad_norm": 1.3908690214157104, "learning_rate": 1.349959349593496e-05, "loss": 0.0519, "step": 79955 }, { "epoch": 0.9751219512195122, "grad_norm": 0.4571073353290558, "learning_rate": 1.3499186991869919e-05, "loss": 0.027, "step": 79960 }, { "epoch": 0.9751829268292683, "grad_norm": 0.8560102581977844, "learning_rate": 1.3498780487804879e-05, "loss": 0.0387, "step": 79965 }, { "epoch": 0.9752439024390244, "grad_norm": 0.5780693292617798, "learning_rate": 1.3498373983739839e-05, "loss": 0.0411, "step": 79970 }, { "epoch": 0.9753048780487805, "grad_norm": 0.809361457824707, "learning_rate": 1.3497967479674799e-05, "loss": 0.044, "step": 79975 }, { "epoch": 0.9753658536585366, "grad_norm": 0.3940410017967224, "learning_rate": 1.3497560975609758e-05, "loss": 0.0281, "step": 79980 }, { "epoch": 0.9754268292682927, "grad_norm": 0.5997437834739685, "learning_rate": 1.3497154471544715e-05, "loss": 0.0195, "step": 79985 }, { "epoch": 0.9754878048780488, "grad_norm": 0.43781787157058716, "learning_rate": 1.3496747967479675e-05, "loss": 0.0695, "step": 79990 }, { "epoch": 0.9755487804878049, "grad_norm": 0.6733816266059875, "learning_rate": 1.3496341463414635e-05, "loss": 0.0353, "step": 79995 }, { "epoch": 0.975609756097561, "grad_norm": 0.8297646045684814, "learning_rate": 1.3495934959349594e-05, "loss": 0.0529, "step": 80000 }, { "epoch": 0.9756707317073171, "grad_norm": 1.0444037914276123, "learning_rate": 1.3495528455284554e-05, "loss": 0.0431, "step": 80005 }, { "epoch": 0.9757317073170731, "grad_norm": 0.5879418253898621, "learning_rate": 1.3495121951219514e-05, "loss": 0.0249, "step": 80010 }, { "epoch": 0.9757926829268293, "grad_norm": 0.46496525406837463, "learning_rate": 1.3494715447154472e-05, "loss": 0.0358, "step": 80015 }, { "epoch": 0.9758536585365853, "grad_norm": 0.46125227212905884, "learning_rate": 1.3494308943089432e-05, "loss": 0.034, "step": 80020 }, { "epoch": 0.9759146341463415, "grad_norm": 0.514070451259613, "learning_rate": 1.3493902439024392e-05, "loss": 0.0328, "step": 80025 }, { "epoch": 0.9759756097560975, "grad_norm": 0.7171222567558289, "learning_rate": 1.3493495934959352e-05, "loss": 0.0318, "step": 80030 }, { "epoch": 0.9760365853658537, "grad_norm": 0.5432957410812378, "learning_rate": 1.3493089430894311e-05, "loss": 0.0408, "step": 80035 }, { "epoch": 0.9760975609756097, "grad_norm": 0.6653685569763184, "learning_rate": 1.349268292682927e-05, "loss": 0.0344, "step": 80040 }, { "epoch": 0.9761585365853659, "grad_norm": 0.26009315252304077, "learning_rate": 1.3492276422764228e-05, "loss": 0.0456, "step": 80045 }, { "epoch": 0.9762195121951219, "grad_norm": 0.33840668201446533, "learning_rate": 1.3491869918699187e-05, "loss": 0.038, "step": 80050 }, { "epoch": 0.9762804878048781, "grad_norm": 0.6884384155273438, "learning_rate": 1.3491463414634147e-05, "loss": 0.0423, "step": 80055 }, { "epoch": 0.9763414634146341, "grad_norm": 0.6043477654457092, "learning_rate": 1.3491056910569107e-05, "loss": 0.0282, "step": 80060 }, { "epoch": 0.9764024390243903, "grad_norm": 0.39046794176101685, "learning_rate": 1.3490650406504067e-05, "loss": 0.034, "step": 80065 }, { "epoch": 0.9764634146341463, "grad_norm": 0.7286660671234131, "learning_rate": 1.3490243902439027e-05, "loss": 0.0508, "step": 80070 }, { "epoch": 0.9765243902439025, "grad_norm": 0.46619245409965515, "learning_rate": 1.3489837398373983e-05, "loss": 0.0271, "step": 80075 }, { "epoch": 0.9765853658536585, "grad_norm": 0.39671987295150757, "learning_rate": 1.3489430894308943e-05, "loss": 0.0471, "step": 80080 }, { "epoch": 0.9766463414634147, "grad_norm": 0.764156699180603, "learning_rate": 1.3489024390243903e-05, "loss": 0.0625, "step": 80085 }, { "epoch": 0.9767073170731707, "grad_norm": 0.41305044293403625, "learning_rate": 1.3488617886178863e-05, "loss": 0.0313, "step": 80090 }, { "epoch": 0.9767682926829269, "grad_norm": 0.22423222661018372, "learning_rate": 1.3488211382113822e-05, "loss": 0.0389, "step": 80095 }, { "epoch": 0.9768292682926829, "grad_norm": 0.42461130023002625, "learning_rate": 1.3487804878048782e-05, "loss": 0.0366, "step": 80100 }, { "epoch": 0.9768902439024391, "grad_norm": 0.3771257698535919, "learning_rate": 1.348739837398374e-05, "loss": 0.04, "step": 80105 }, { "epoch": 0.9769512195121951, "grad_norm": 0.8852300047874451, "learning_rate": 1.34869918699187e-05, "loss": 0.0659, "step": 80110 }, { "epoch": 0.9770121951219513, "grad_norm": 0.4194970726966858, "learning_rate": 1.348658536585366e-05, "loss": 0.0334, "step": 80115 }, { "epoch": 0.9770731707317073, "grad_norm": 1.0341428518295288, "learning_rate": 1.348617886178862e-05, "loss": 0.0185, "step": 80120 }, { "epoch": 0.9771341463414634, "grad_norm": 0.2879771888256073, "learning_rate": 1.3485772357723578e-05, "loss": 0.0267, "step": 80125 }, { "epoch": 0.9771951219512195, "grad_norm": 0.2979922294616699, "learning_rate": 1.3485365853658538e-05, "loss": 0.0465, "step": 80130 }, { "epoch": 0.9772560975609756, "grad_norm": 0.46433278918266296, "learning_rate": 1.3484959349593496e-05, "loss": 0.063, "step": 80135 }, { "epoch": 0.9773170731707317, "grad_norm": 0.9099870324134827, "learning_rate": 1.3484552845528456e-05, "loss": 0.0734, "step": 80140 }, { "epoch": 0.9773780487804878, "grad_norm": 0.6106629371643066, "learning_rate": 1.3484146341463416e-05, "loss": 0.0469, "step": 80145 }, { "epoch": 0.9774390243902439, "grad_norm": 0.37134915590286255, "learning_rate": 1.3483739837398375e-05, "loss": 0.0634, "step": 80150 }, { "epoch": 0.9775, "grad_norm": 0.2200310379266739, "learning_rate": 1.3483333333333335e-05, "loss": 0.0403, "step": 80155 }, { "epoch": 0.9775609756097561, "grad_norm": 0.35713598132133484, "learning_rate": 1.3482926829268295e-05, "loss": 0.0557, "step": 80160 }, { "epoch": 0.9776219512195122, "grad_norm": 0.3686189651489258, "learning_rate": 1.3482520325203252e-05, "loss": 0.0406, "step": 80165 }, { "epoch": 0.9776829268292683, "grad_norm": 0.37709176540374756, "learning_rate": 1.3482113821138211e-05, "loss": 0.0331, "step": 80170 }, { "epoch": 0.9777439024390244, "grad_norm": 0.3489660322666168, "learning_rate": 1.3481707317073171e-05, "loss": 0.0698, "step": 80175 }, { "epoch": 0.9778048780487805, "grad_norm": 0.2748264968395233, "learning_rate": 1.3481300813008131e-05, "loss": 0.0186, "step": 80180 }, { "epoch": 0.9778658536585366, "grad_norm": 0.7549915909767151, "learning_rate": 1.348089430894309e-05, "loss": 0.0464, "step": 80185 }, { "epoch": 0.9779268292682927, "grad_norm": 0.2625490128993988, "learning_rate": 1.348048780487805e-05, "loss": 0.034, "step": 80190 }, { "epoch": 0.9779878048780488, "grad_norm": 0.3452760577201843, "learning_rate": 1.3480081300813009e-05, "loss": 0.0265, "step": 80195 }, { "epoch": 0.9780487804878049, "grad_norm": 0.9166218638420105, "learning_rate": 1.3479674796747969e-05, "loss": 0.0211, "step": 80200 }, { "epoch": 0.978109756097561, "grad_norm": 0.7009000778198242, "learning_rate": 1.3479268292682928e-05, "loss": 0.0287, "step": 80205 }, { "epoch": 0.978170731707317, "grad_norm": 1.117314338684082, "learning_rate": 1.3478861788617888e-05, "loss": 0.0505, "step": 80210 }, { "epoch": 0.9782317073170732, "grad_norm": 0.445828378200531, "learning_rate": 1.3478455284552846e-05, "loss": 0.0467, "step": 80215 }, { "epoch": 0.9782926829268292, "grad_norm": 0.5552980899810791, "learning_rate": 1.3478048780487806e-05, "loss": 0.0312, "step": 80220 }, { "epoch": 0.9783536585365854, "grad_norm": 1.0494904518127441, "learning_rate": 1.3477642276422764e-05, "loss": 0.0504, "step": 80225 }, { "epoch": 0.9784146341463414, "grad_norm": 1.7133677005767822, "learning_rate": 1.3477235772357724e-05, "loss": 0.0396, "step": 80230 }, { "epoch": 0.9784756097560976, "grad_norm": 0.9640681147575378, "learning_rate": 1.3476829268292684e-05, "loss": 0.0732, "step": 80235 }, { "epoch": 0.9785365853658536, "grad_norm": 0.3998231291770935, "learning_rate": 1.3476422764227644e-05, "loss": 0.0249, "step": 80240 }, { "epoch": 0.9785975609756098, "grad_norm": 0.9902573823928833, "learning_rate": 1.3476016260162604e-05, "loss": 0.0547, "step": 80245 }, { "epoch": 0.9786585365853658, "grad_norm": 0.24953517317771912, "learning_rate": 1.3475609756097563e-05, "loss": 0.024, "step": 80250 }, { "epoch": 0.978719512195122, "grad_norm": 0.19085891544818878, "learning_rate": 1.347520325203252e-05, "loss": 0.0327, "step": 80255 }, { "epoch": 0.978780487804878, "grad_norm": 0.4616525173187256, "learning_rate": 1.347479674796748e-05, "loss": 0.027, "step": 80260 }, { "epoch": 0.9788414634146342, "grad_norm": 0.46828582882881165, "learning_rate": 1.347439024390244e-05, "loss": 0.0339, "step": 80265 }, { "epoch": 0.9789024390243902, "grad_norm": 0.20978006720542908, "learning_rate": 1.34739837398374e-05, "loss": 0.0304, "step": 80270 }, { "epoch": 0.9789634146341464, "grad_norm": 0.5719335079193115, "learning_rate": 1.3473577235772359e-05, "loss": 0.0347, "step": 80275 }, { "epoch": 0.9790243902439024, "grad_norm": 0.4321424663066864, "learning_rate": 1.3473170731707319e-05, "loss": 0.0476, "step": 80280 }, { "epoch": 0.9790853658536586, "grad_norm": 0.6853199005126953, "learning_rate": 1.3472764227642277e-05, "loss": 0.0591, "step": 80285 }, { "epoch": 0.9791463414634146, "grad_norm": 0.3371563255786896, "learning_rate": 1.3472357723577237e-05, "loss": 0.0312, "step": 80290 }, { "epoch": 0.9792073170731708, "grad_norm": 0.9905474781990051, "learning_rate": 1.3471951219512197e-05, "loss": 0.0729, "step": 80295 }, { "epoch": 0.9792682926829268, "grad_norm": 0.4744340777397156, "learning_rate": 1.3471544715447157e-05, "loss": 0.0232, "step": 80300 }, { "epoch": 0.979329268292683, "grad_norm": 0.3655346632003784, "learning_rate": 1.3471138211382115e-05, "loss": 0.0411, "step": 80305 }, { "epoch": 0.979390243902439, "grad_norm": 0.23548032343387604, "learning_rate": 1.3470731707317074e-05, "loss": 0.0271, "step": 80310 }, { "epoch": 0.9794512195121952, "grad_norm": 0.5258365273475647, "learning_rate": 1.3470325203252033e-05, "loss": 0.0636, "step": 80315 }, { "epoch": 0.9795121951219512, "grad_norm": 0.3704403042793274, "learning_rate": 1.3469918699186992e-05, "loss": 0.0288, "step": 80320 }, { "epoch": 0.9795731707317074, "grad_norm": 1.2072029113769531, "learning_rate": 1.3469512195121952e-05, "loss": 0.0394, "step": 80325 }, { "epoch": 0.9796341463414634, "grad_norm": 0.47715821862220764, "learning_rate": 1.3469105691056912e-05, "loss": 0.0269, "step": 80330 }, { "epoch": 0.9796951219512195, "grad_norm": 0.7472102642059326, "learning_rate": 1.3468699186991872e-05, "loss": 0.052, "step": 80335 }, { "epoch": 0.9797560975609756, "grad_norm": 0.6150704026222229, "learning_rate": 1.3468292682926832e-05, "loss": 0.0369, "step": 80340 }, { "epoch": 0.9798170731707317, "grad_norm": 0.7751224040985107, "learning_rate": 1.3467886178861788e-05, "loss": 0.0465, "step": 80345 }, { "epoch": 0.9798780487804878, "grad_norm": 0.9485374093055725, "learning_rate": 1.3467479674796748e-05, "loss": 0.0684, "step": 80350 }, { "epoch": 0.9799390243902439, "grad_norm": 0.4413447678089142, "learning_rate": 1.3467073170731708e-05, "loss": 0.0282, "step": 80355 }, { "epoch": 0.98, "grad_norm": 0.508675217628479, "learning_rate": 1.3466666666666668e-05, "loss": 0.0793, "step": 80360 }, { "epoch": 0.9800609756097561, "grad_norm": 0.35716432332992554, "learning_rate": 1.3466260162601627e-05, "loss": 0.0187, "step": 80365 }, { "epoch": 0.9801219512195122, "grad_norm": 0.6308485865592957, "learning_rate": 1.3465853658536587e-05, "loss": 0.0497, "step": 80370 }, { "epoch": 0.9801829268292683, "grad_norm": 0.6144315004348755, "learning_rate": 1.3465447154471545e-05, "loss": 0.0333, "step": 80375 }, { "epoch": 0.9802439024390244, "grad_norm": 0.477811336517334, "learning_rate": 1.3465040650406505e-05, "loss": 0.0222, "step": 80380 }, { "epoch": 0.9803048780487805, "grad_norm": 0.6855757832527161, "learning_rate": 1.3464634146341465e-05, "loss": 0.0491, "step": 80385 }, { "epoch": 0.9803658536585366, "grad_norm": 0.2033296376466751, "learning_rate": 1.3464227642276423e-05, "loss": 0.0247, "step": 80390 }, { "epoch": 0.9804268292682927, "grad_norm": 0.4149395227432251, "learning_rate": 1.3463821138211383e-05, "loss": 0.0311, "step": 80395 }, { "epoch": 0.9804878048780488, "grad_norm": 0.568572461605072, "learning_rate": 1.3463414634146343e-05, "loss": 0.0211, "step": 80400 }, { "epoch": 0.9805487804878049, "grad_norm": 0.5319584012031555, "learning_rate": 1.3463008130081301e-05, "loss": 0.033, "step": 80405 }, { "epoch": 0.980609756097561, "grad_norm": 0.5128005146980286, "learning_rate": 1.346260162601626e-05, "loss": 0.065, "step": 80410 }, { "epoch": 0.9806707317073171, "grad_norm": 0.5123218894004822, "learning_rate": 1.346219512195122e-05, "loss": 0.0316, "step": 80415 }, { "epoch": 0.9807317073170732, "grad_norm": 0.61735600233078, "learning_rate": 1.346178861788618e-05, "loss": 0.0298, "step": 80420 }, { "epoch": 0.9807926829268293, "grad_norm": 0.22792309522628784, "learning_rate": 1.346138211382114e-05, "loss": 0.0226, "step": 80425 }, { "epoch": 0.9808536585365853, "grad_norm": 0.6182034015655518, "learning_rate": 1.34609756097561e-05, "loss": 0.0334, "step": 80430 }, { "epoch": 0.9809146341463415, "grad_norm": 0.2816452980041504, "learning_rate": 1.3460569105691056e-05, "loss": 0.0268, "step": 80435 }, { "epoch": 0.9809756097560975, "grad_norm": 0.3976912796497345, "learning_rate": 1.3460162601626016e-05, "loss": 0.0381, "step": 80440 }, { "epoch": 0.9810365853658537, "grad_norm": 0.45747196674346924, "learning_rate": 1.3459756097560976e-05, "loss": 0.0279, "step": 80445 }, { "epoch": 0.9810975609756097, "grad_norm": 1.8724122047424316, "learning_rate": 1.3459349593495936e-05, "loss": 0.0336, "step": 80450 }, { "epoch": 0.9811585365853659, "grad_norm": 0.4514313340187073, "learning_rate": 1.3458943089430896e-05, "loss": 0.0252, "step": 80455 }, { "epoch": 0.9812195121951219, "grad_norm": 0.4264068305492401, "learning_rate": 1.3458536585365856e-05, "loss": 0.0487, "step": 80460 }, { "epoch": 0.9812804878048781, "grad_norm": 1.6332646608352661, "learning_rate": 1.3458130081300814e-05, "loss": 0.0595, "step": 80465 }, { "epoch": 0.9813414634146341, "grad_norm": 0.3942190408706665, "learning_rate": 1.3457723577235774e-05, "loss": 0.0927, "step": 80470 }, { "epoch": 0.9814024390243903, "grad_norm": 0.42137423157691956, "learning_rate": 1.3457317073170733e-05, "loss": 0.0345, "step": 80475 }, { "epoch": 0.9814634146341463, "grad_norm": 0.5897141098976135, "learning_rate": 1.3456910569105691e-05, "loss": 0.0343, "step": 80480 }, { "epoch": 0.9815243902439025, "grad_norm": 0.4245627522468567, "learning_rate": 1.3456504065040651e-05, "loss": 0.0312, "step": 80485 }, { "epoch": 0.9815853658536585, "grad_norm": 0.4502132534980774, "learning_rate": 1.3456097560975611e-05, "loss": 0.0538, "step": 80490 }, { "epoch": 0.9816463414634147, "grad_norm": 0.6066680550575256, "learning_rate": 1.345569105691057e-05, "loss": 0.0463, "step": 80495 }, { "epoch": 0.9817073170731707, "grad_norm": 0.3987971544265747, "learning_rate": 1.3455284552845529e-05, "loss": 0.0211, "step": 80500 }, { "epoch": 0.9817682926829269, "grad_norm": 1.4505330324172974, "learning_rate": 1.3454878048780489e-05, "loss": 0.048, "step": 80505 }, { "epoch": 0.9818292682926829, "grad_norm": 0.4055105447769165, "learning_rate": 1.3454471544715449e-05, "loss": 0.0341, "step": 80510 }, { "epoch": 0.9818902439024391, "grad_norm": 0.5364035367965698, "learning_rate": 1.3454065040650409e-05, "loss": 0.0455, "step": 80515 }, { "epoch": 0.9819512195121951, "grad_norm": 0.6535040140151978, "learning_rate": 1.3453658536585368e-05, "loss": 0.044, "step": 80520 }, { "epoch": 0.9820121951219513, "grad_norm": 0.3217219412326813, "learning_rate": 1.3453252032520325e-05, "loss": 0.0253, "step": 80525 }, { "epoch": 0.9820731707317073, "grad_norm": 0.31323039531707764, "learning_rate": 1.3452845528455285e-05, "loss": 0.0219, "step": 80530 }, { "epoch": 0.9821341463414635, "grad_norm": 0.32578137516975403, "learning_rate": 1.3452439024390244e-05, "loss": 0.0236, "step": 80535 }, { "epoch": 0.9821951219512195, "grad_norm": 0.3044661283493042, "learning_rate": 1.3452032520325204e-05, "loss": 0.0451, "step": 80540 }, { "epoch": 0.9822560975609756, "grad_norm": 0.1784725934267044, "learning_rate": 1.3451626016260164e-05, "loss": 0.0335, "step": 80545 }, { "epoch": 0.9823170731707317, "grad_norm": 0.5358237028121948, "learning_rate": 1.3451219512195124e-05, "loss": 0.0377, "step": 80550 }, { "epoch": 0.9823780487804878, "grad_norm": 1.0649710893630981, "learning_rate": 1.3450813008130082e-05, "loss": 0.0838, "step": 80555 }, { "epoch": 0.9824390243902439, "grad_norm": 0.3642670214176178, "learning_rate": 1.3450406504065042e-05, "loss": 0.0319, "step": 80560 }, { "epoch": 0.9825, "grad_norm": 0.7294529676437378, "learning_rate": 1.3450000000000002e-05, "loss": 0.0948, "step": 80565 }, { "epoch": 0.9825609756097561, "grad_norm": 0.38891497254371643, "learning_rate": 1.344959349593496e-05, "loss": 0.0758, "step": 80570 }, { "epoch": 0.9826219512195122, "grad_norm": 0.3879598379135132, "learning_rate": 1.344918699186992e-05, "loss": 0.0226, "step": 80575 }, { "epoch": 0.9826829268292683, "grad_norm": 0.49412280321121216, "learning_rate": 1.344878048780488e-05, "loss": 0.0465, "step": 80580 }, { "epoch": 0.9827439024390244, "grad_norm": 0.6021062731742859, "learning_rate": 1.3448373983739838e-05, "loss": 0.0238, "step": 80585 }, { "epoch": 0.9828048780487805, "grad_norm": 0.5909583568572998, "learning_rate": 1.3447967479674797e-05, "loss": 0.0235, "step": 80590 }, { "epoch": 0.9828658536585366, "grad_norm": 0.4206027090549469, "learning_rate": 1.3447560975609757e-05, "loss": 0.0592, "step": 80595 }, { "epoch": 0.9829268292682927, "grad_norm": 0.3844945430755615, "learning_rate": 1.3447154471544717e-05, "loss": 0.032, "step": 80600 }, { "epoch": 0.9829878048780488, "grad_norm": 0.40303826332092285, "learning_rate": 1.3446747967479677e-05, "loss": 0.0294, "step": 80605 }, { "epoch": 0.9830487804878049, "grad_norm": 0.7283486127853394, "learning_rate": 1.3446341463414637e-05, "loss": 0.0404, "step": 80610 }, { "epoch": 0.983109756097561, "grad_norm": 0.28158318996429443, "learning_rate": 1.3445934959349593e-05, "loss": 0.0208, "step": 80615 }, { "epoch": 0.9831707317073171, "grad_norm": 0.5147814154624939, "learning_rate": 1.3445528455284553e-05, "loss": 0.0383, "step": 80620 }, { "epoch": 0.9832317073170732, "grad_norm": 0.43919646739959717, "learning_rate": 1.3445121951219513e-05, "loss": 0.0355, "step": 80625 }, { "epoch": 0.9832926829268293, "grad_norm": 0.6328622698783875, "learning_rate": 1.3444715447154473e-05, "loss": 0.0536, "step": 80630 }, { "epoch": 0.9833536585365854, "grad_norm": 0.3942162096500397, "learning_rate": 1.3444308943089432e-05, "loss": 0.0336, "step": 80635 }, { "epoch": 0.9834146341463414, "grad_norm": 1.139389157295227, "learning_rate": 1.3443902439024392e-05, "loss": 0.0439, "step": 80640 }, { "epoch": 0.9834756097560976, "grad_norm": 0.2421509176492691, "learning_rate": 1.344349593495935e-05, "loss": 0.0307, "step": 80645 }, { "epoch": 0.9835365853658536, "grad_norm": 16.699018478393555, "learning_rate": 1.344308943089431e-05, "loss": 0.1354, "step": 80650 }, { "epoch": 0.9835975609756098, "grad_norm": 0.5035185813903809, "learning_rate": 1.3442682926829268e-05, "loss": 0.065, "step": 80655 }, { "epoch": 0.9836585365853658, "grad_norm": 0.8782334327697754, "learning_rate": 1.3442276422764228e-05, "loss": 0.0439, "step": 80660 }, { "epoch": 0.983719512195122, "grad_norm": 0.5877414345741272, "learning_rate": 1.3441869918699188e-05, "loss": 0.0536, "step": 80665 }, { "epoch": 0.983780487804878, "grad_norm": 0.23213396966457367, "learning_rate": 1.3441463414634148e-05, "loss": 0.0384, "step": 80670 }, { "epoch": 0.9838414634146342, "grad_norm": 0.5375064015388489, "learning_rate": 1.3441056910569106e-05, "loss": 0.0254, "step": 80675 }, { "epoch": 0.9839024390243902, "grad_norm": 0.4037702977657318, "learning_rate": 1.3440650406504066e-05, "loss": 0.0552, "step": 80680 }, { "epoch": 0.9839634146341464, "grad_norm": 0.4145432412624359, "learning_rate": 1.3440243902439026e-05, "loss": 0.0221, "step": 80685 }, { "epoch": 0.9840243902439024, "grad_norm": 0.5854960083961487, "learning_rate": 1.3439837398373985e-05, "loss": 0.0482, "step": 80690 }, { "epoch": 0.9840853658536586, "grad_norm": 0.6840453147888184, "learning_rate": 1.3439430894308945e-05, "loss": 0.0445, "step": 80695 }, { "epoch": 0.9841463414634146, "grad_norm": 0.3582191467285156, "learning_rate": 1.3439024390243905e-05, "loss": 0.0328, "step": 80700 }, { "epoch": 0.9842073170731708, "grad_norm": 0.34245049953460693, "learning_rate": 1.3438617886178861e-05, "loss": 0.0586, "step": 80705 }, { "epoch": 0.9842682926829268, "grad_norm": 0.9841567873954773, "learning_rate": 1.3438211382113821e-05, "loss": 0.0441, "step": 80710 }, { "epoch": 0.984329268292683, "grad_norm": 1.0497331619262695, "learning_rate": 1.3437804878048781e-05, "loss": 0.0412, "step": 80715 }, { "epoch": 0.984390243902439, "grad_norm": 0.32629284262657166, "learning_rate": 1.3437398373983741e-05, "loss": 0.0212, "step": 80720 }, { "epoch": 0.9844512195121952, "grad_norm": 0.8137565851211548, "learning_rate": 1.34369918699187e-05, "loss": 0.0457, "step": 80725 }, { "epoch": 0.9845121951219512, "grad_norm": 0.7234307527542114, "learning_rate": 1.343658536585366e-05, "loss": 0.0312, "step": 80730 }, { "epoch": 0.9845731707317074, "grad_norm": 0.4438128173351288, "learning_rate": 1.3436178861788619e-05, "loss": 0.0338, "step": 80735 }, { "epoch": 0.9846341463414634, "grad_norm": 0.59373939037323, "learning_rate": 1.3435772357723578e-05, "loss": 0.0211, "step": 80740 }, { "epoch": 0.9846951219512196, "grad_norm": 0.44269949197769165, "learning_rate": 1.3435365853658537e-05, "loss": 0.0515, "step": 80745 }, { "epoch": 0.9847560975609756, "grad_norm": 0.6165912747383118, "learning_rate": 1.3434959349593496e-05, "loss": 0.0522, "step": 80750 }, { "epoch": 0.9848170731707317, "grad_norm": 0.32440271973609924, "learning_rate": 1.3434552845528456e-05, "loss": 0.0631, "step": 80755 }, { "epoch": 0.9848780487804878, "grad_norm": 0.36568304896354675, "learning_rate": 1.3434146341463416e-05, "loss": 0.04, "step": 80760 }, { "epoch": 0.984939024390244, "grad_norm": 0.6886784434318542, "learning_rate": 1.3433739837398374e-05, "loss": 0.0218, "step": 80765 }, { "epoch": 0.985, "grad_norm": 0.7221159934997559, "learning_rate": 1.3433333333333334e-05, "loss": 0.0468, "step": 80770 }, { "epoch": 0.9850609756097561, "grad_norm": 0.37984874844551086, "learning_rate": 1.3432926829268294e-05, "loss": 0.04, "step": 80775 }, { "epoch": 0.9851219512195122, "grad_norm": 0.2619747817516327, "learning_rate": 1.3432520325203254e-05, "loss": 0.0252, "step": 80780 }, { "epoch": 0.9851829268292683, "grad_norm": 0.588935911655426, "learning_rate": 1.3432113821138213e-05, "loss": 0.0257, "step": 80785 }, { "epoch": 0.9852439024390244, "grad_norm": 0.6661096811294556, "learning_rate": 1.3431707317073173e-05, "loss": 0.0963, "step": 80790 }, { "epoch": 0.9853048780487805, "grad_norm": 0.3484633266925812, "learning_rate": 1.343130081300813e-05, "loss": 0.0272, "step": 80795 }, { "epoch": 0.9853658536585366, "grad_norm": 0.6479007005691528, "learning_rate": 1.343089430894309e-05, "loss": 0.0261, "step": 80800 }, { "epoch": 0.9854268292682927, "grad_norm": 0.1807195246219635, "learning_rate": 1.343048780487805e-05, "loss": 0.0497, "step": 80805 }, { "epoch": 0.9854878048780488, "grad_norm": 0.44882190227508545, "learning_rate": 1.343008130081301e-05, "loss": 0.025, "step": 80810 }, { "epoch": 0.9855487804878049, "grad_norm": 0.39292481541633606, "learning_rate": 1.3429674796747969e-05, "loss": 0.0276, "step": 80815 }, { "epoch": 0.985609756097561, "grad_norm": 0.640207827091217, "learning_rate": 1.3429268292682929e-05, "loss": 0.0407, "step": 80820 }, { "epoch": 0.9856707317073171, "grad_norm": 0.19222603738307953, "learning_rate": 1.3428861788617887e-05, "loss": 0.0293, "step": 80825 }, { "epoch": 0.9857317073170732, "grad_norm": 0.6302841305732727, "learning_rate": 1.3428455284552847e-05, "loss": 0.0783, "step": 80830 }, { "epoch": 0.9857926829268293, "grad_norm": 0.5435642600059509, "learning_rate": 1.3428048780487805e-05, "loss": 0.0491, "step": 80835 }, { "epoch": 0.9858536585365854, "grad_norm": 0.4592556357383728, "learning_rate": 1.3427642276422765e-05, "loss": 0.0321, "step": 80840 }, { "epoch": 0.9859146341463415, "grad_norm": 1.0287632942199707, "learning_rate": 1.3427235772357725e-05, "loss": 0.0554, "step": 80845 }, { "epoch": 0.9859756097560975, "grad_norm": 0.2135477364063263, "learning_rate": 1.3426829268292684e-05, "loss": 0.0347, "step": 80850 }, { "epoch": 0.9860365853658537, "grad_norm": 1.0309215784072876, "learning_rate": 1.3426422764227643e-05, "loss": 0.0234, "step": 80855 }, { "epoch": 0.9860975609756097, "grad_norm": 0.40850573778152466, "learning_rate": 1.3426016260162602e-05, "loss": 0.0226, "step": 80860 }, { "epoch": 0.9861585365853659, "grad_norm": 0.49936848878860474, "learning_rate": 1.3425609756097562e-05, "loss": 0.0564, "step": 80865 }, { "epoch": 0.9862195121951219, "grad_norm": 0.6065976619720459, "learning_rate": 1.3425203252032522e-05, "loss": 0.0415, "step": 80870 }, { "epoch": 0.9862804878048781, "grad_norm": 0.6314600706100464, "learning_rate": 1.3424796747967482e-05, "loss": 0.0302, "step": 80875 }, { "epoch": 0.9863414634146341, "grad_norm": 0.5775068402290344, "learning_rate": 1.3424390243902442e-05, "loss": 0.0526, "step": 80880 }, { "epoch": 0.9864024390243903, "grad_norm": 0.6888573169708252, "learning_rate": 1.3423983739837398e-05, "loss": 0.0414, "step": 80885 }, { "epoch": 0.9864634146341463, "grad_norm": 0.3150046169757843, "learning_rate": 1.3423577235772358e-05, "loss": 0.0297, "step": 80890 }, { "epoch": 0.9865243902439025, "grad_norm": 0.1347483992576599, "learning_rate": 1.3423170731707318e-05, "loss": 0.0591, "step": 80895 }, { "epoch": 0.9865853658536585, "grad_norm": 0.44299471378326416, "learning_rate": 1.3422764227642278e-05, "loss": 0.0207, "step": 80900 }, { "epoch": 0.9866463414634147, "grad_norm": 1.1418248414993286, "learning_rate": 1.3422357723577237e-05, "loss": 0.0349, "step": 80905 }, { "epoch": 0.9867073170731707, "grad_norm": 0.4235994517803192, "learning_rate": 1.3421951219512197e-05, "loss": 0.03, "step": 80910 }, { "epoch": 0.9867682926829269, "grad_norm": 0.4947211742401123, "learning_rate": 1.3421544715447155e-05, "loss": 0.0301, "step": 80915 }, { "epoch": 0.9868292682926829, "grad_norm": 0.9433019757270813, "learning_rate": 1.3421138211382113e-05, "loss": 0.0427, "step": 80920 }, { "epoch": 0.9868902439024391, "grad_norm": 0.19916082918643951, "learning_rate": 1.3420731707317073e-05, "loss": 0.0341, "step": 80925 }, { "epoch": 0.9869512195121951, "grad_norm": 0.6900160908699036, "learning_rate": 1.3420325203252033e-05, "loss": 0.055, "step": 80930 }, { "epoch": 0.9870121951219513, "grad_norm": 2.190619468688965, "learning_rate": 1.3419918699186993e-05, "loss": 0.0783, "step": 80935 }, { "epoch": 0.9870731707317073, "grad_norm": 0.5264178514480591, "learning_rate": 1.3419512195121953e-05, "loss": 0.0427, "step": 80940 }, { "epoch": 0.9871341463414635, "grad_norm": 0.377984881401062, "learning_rate": 1.3419105691056911e-05, "loss": 0.0344, "step": 80945 }, { "epoch": 0.9871951219512195, "grad_norm": 0.12000156939029694, "learning_rate": 1.341869918699187e-05, "loss": 0.0306, "step": 80950 }, { "epoch": 0.9872560975609757, "grad_norm": 0.3663996160030365, "learning_rate": 1.341829268292683e-05, "loss": 0.0275, "step": 80955 }, { "epoch": 0.9873170731707317, "grad_norm": 0.7066659331321716, "learning_rate": 1.341788617886179e-05, "loss": 0.048, "step": 80960 }, { "epoch": 0.9873780487804878, "grad_norm": 0.1704041212797165, "learning_rate": 1.341747967479675e-05, "loss": 0.0375, "step": 80965 }, { "epoch": 0.9874390243902439, "grad_norm": 0.4802391231060028, "learning_rate": 1.341707317073171e-05, "loss": 0.0376, "step": 80970 }, { "epoch": 0.9875, "grad_norm": 0.3241598904132843, "learning_rate": 1.3416666666666666e-05, "loss": 0.0212, "step": 80975 }, { "epoch": 0.9875609756097561, "grad_norm": 0.774709939956665, "learning_rate": 1.3416260162601626e-05, "loss": 0.0599, "step": 80980 }, { "epoch": 0.9876219512195122, "grad_norm": 0.678419291973114, "learning_rate": 1.3415853658536586e-05, "loss": 0.0231, "step": 80985 }, { "epoch": 0.9876829268292683, "grad_norm": 0.29880279302597046, "learning_rate": 1.3415447154471546e-05, "loss": 0.0258, "step": 80990 }, { "epoch": 0.9877439024390244, "grad_norm": 0.49149200320243835, "learning_rate": 1.3415040650406506e-05, "loss": 0.0499, "step": 80995 }, { "epoch": 0.9878048780487805, "grad_norm": 0.4863725006580353, "learning_rate": 1.3414634146341466e-05, "loss": 0.0447, "step": 81000 }, { "epoch": 0.9878658536585366, "grad_norm": 1.1937397718429565, "learning_rate": 1.3414227642276424e-05, "loss": 0.071, "step": 81005 }, { "epoch": 0.9879268292682927, "grad_norm": 0.5035638213157654, "learning_rate": 1.3413821138211382e-05, "loss": 0.1072, "step": 81010 }, { "epoch": 0.9879878048780488, "grad_norm": 0.6522389650344849, "learning_rate": 1.3413414634146342e-05, "loss": 0.0342, "step": 81015 }, { "epoch": 0.9880487804878049, "grad_norm": 1.4333947896957397, "learning_rate": 1.3413008130081301e-05, "loss": 0.0181, "step": 81020 }, { "epoch": 0.988109756097561, "grad_norm": 0.42007365822792053, "learning_rate": 1.3412601626016261e-05, "loss": 0.0375, "step": 81025 }, { "epoch": 0.9881707317073171, "grad_norm": 0.240243598818779, "learning_rate": 1.3412195121951221e-05, "loss": 0.0309, "step": 81030 }, { "epoch": 0.9882317073170732, "grad_norm": 0.10954838246107101, "learning_rate": 1.341178861788618e-05, "loss": 0.0443, "step": 81035 }, { "epoch": 0.9882926829268293, "grad_norm": 0.4647369980812073, "learning_rate": 1.3411382113821139e-05, "loss": 0.0506, "step": 81040 }, { "epoch": 0.9883536585365854, "grad_norm": 0.5197798609733582, "learning_rate": 1.3410975609756099e-05, "loss": 0.0307, "step": 81045 }, { "epoch": 0.9884146341463415, "grad_norm": 0.5135193467140198, "learning_rate": 1.3410569105691059e-05, "loss": 0.0455, "step": 81050 }, { "epoch": 0.9884756097560976, "grad_norm": 0.5251172184944153, "learning_rate": 1.3410162601626018e-05, "loss": 0.0142, "step": 81055 }, { "epoch": 0.9885365853658536, "grad_norm": 0.608483612537384, "learning_rate": 1.3409756097560978e-05, "loss": 0.0468, "step": 81060 }, { "epoch": 0.9885975609756098, "grad_norm": 0.657873272895813, "learning_rate": 1.3409349593495935e-05, "loss": 0.0397, "step": 81065 }, { "epoch": 0.9886585365853658, "grad_norm": 0.7865094542503357, "learning_rate": 1.3408943089430895e-05, "loss": 0.087, "step": 81070 }, { "epoch": 0.988719512195122, "grad_norm": 0.611779510974884, "learning_rate": 1.3408536585365854e-05, "loss": 0.0305, "step": 81075 }, { "epoch": 0.988780487804878, "grad_norm": 0.9092410206794739, "learning_rate": 1.3408130081300814e-05, "loss": 0.0579, "step": 81080 }, { "epoch": 0.9888414634146342, "grad_norm": 0.7593660950660706, "learning_rate": 1.3407723577235774e-05, "loss": 0.061, "step": 81085 }, { "epoch": 0.9889024390243902, "grad_norm": 0.6333224177360535, "learning_rate": 1.3407317073170734e-05, "loss": 0.0507, "step": 81090 }, { "epoch": 0.9889634146341464, "grad_norm": 0.40401846170425415, "learning_rate": 1.3406910569105692e-05, "loss": 0.0386, "step": 81095 }, { "epoch": 0.9890243902439024, "grad_norm": 0.5352569818496704, "learning_rate": 1.340650406504065e-05, "loss": 0.0638, "step": 81100 }, { "epoch": 0.9890853658536586, "grad_norm": 0.4975651800632477, "learning_rate": 1.340609756097561e-05, "loss": 0.0321, "step": 81105 }, { "epoch": 0.9891463414634146, "grad_norm": 0.630702018737793, "learning_rate": 1.340569105691057e-05, "loss": 0.0319, "step": 81110 }, { "epoch": 0.9892073170731708, "grad_norm": 0.1253466159105301, "learning_rate": 1.340528455284553e-05, "loss": 0.0433, "step": 81115 }, { "epoch": 0.9892682926829268, "grad_norm": 0.4456680715084076, "learning_rate": 1.340487804878049e-05, "loss": 0.0397, "step": 81120 }, { "epoch": 0.989329268292683, "grad_norm": 0.7103415131568909, "learning_rate": 1.3404471544715447e-05, "loss": 0.0336, "step": 81125 }, { "epoch": 0.989390243902439, "grad_norm": 0.6195449233055115, "learning_rate": 1.3404065040650407e-05, "loss": 0.0269, "step": 81130 }, { "epoch": 0.9894512195121952, "grad_norm": 0.6768099665641785, "learning_rate": 1.3403658536585367e-05, "loss": 0.0306, "step": 81135 }, { "epoch": 0.9895121951219512, "grad_norm": 0.4890460669994354, "learning_rate": 1.3403252032520327e-05, "loss": 0.0359, "step": 81140 }, { "epoch": 0.9895731707317074, "grad_norm": 0.6445631384849548, "learning_rate": 1.3402845528455287e-05, "loss": 0.0423, "step": 81145 }, { "epoch": 0.9896341463414634, "grad_norm": 0.6696475148200989, "learning_rate": 1.3402439024390247e-05, "loss": 0.0318, "step": 81150 }, { "epoch": 0.9896951219512196, "grad_norm": 0.6710439324378967, "learning_rate": 1.3402032520325203e-05, "loss": 0.0463, "step": 81155 }, { "epoch": 0.9897560975609756, "grad_norm": 1.1386237144470215, "learning_rate": 1.3401626016260163e-05, "loss": 0.0487, "step": 81160 }, { "epoch": 0.9898170731707318, "grad_norm": 0.562735915184021, "learning_rate": 1.3401219512195123e-05, "loss": 0.0324, "step": 81165 }, { "epoch": 0.9898780487804878, "grad_norm": 0.482363224029541, "learning_rate": 1.3400813008130083e-05, "loss": 0.0246, "step": 81170 }, { "epoch": 0.989939024390244, "grad_norm": 0.5564454197883606, "learning_rate": 1.3400406504065042e-05, "loss": 0.0355, "step": 81175 }, { "epoch": 0.99, "grad_norm": 0.29520976543426514, "learning_rate": 1.3400000000000002e-05, "loss": 0.0402, "step": 81180 }, { "epoch": 0.9900609756097561, "grad_norm": 0.6012548208236694, "learning_rate": 1.3399593495934959e-05, "loss": 0.0296, "step": 81185 }, { "epoch": 0.9901219512195122, "grad_norm": 0.38730981945991516, "learning_rate": 1.3399186991869918e-05, "loss": 0.0406, "step": 81190 }, { "epoch": 0.9901829268292683, "grad_norm": 1.4460867643356323, "learning_rate": 1.3398780487804878e-05, "loss": 0.0228, "step": 81195 }, { "epoch": 0.9902439024390244, "grad_norm": 0.7251154780387878, "learning_rate": 1.3398373983739838e-05, "loss": 0.0826, "step": 81200 }, { "epoch": 0.9903048780487805, "grad_norm": 0.3454187512397766, "learning_rate": 1.3397967479674798e-05, "loss": 0.0346, "step": 81205 }, { "epoch": 0.9903658536585366, "grad_norm": 0.7962641716003418, "learning_rate": 1.3397560975609758e-05, "loss": 0.0338, "step": 81210 }, { "epoch": 0.9904268292682927, "grad_norm": 0.6420829892158508, "learning_rate": 1.3397154471544716e-05, "loss": 0.0294, "step": 81215 }, { "epoch": 0.9904878048780488, "grad_norm": 0.24987074732780457, "learning_rate": 1.3396747967479676e-05, "loss": 0.0236, "step": 81220 }, { "epoch": 0.9905487804878049, "grad_norm": 0.5421448349952698, "learning_rate": 1.3396341463414635e-05, "loss": 0.0507, "step": 81225 }, { "epoch": 0.990609756097561, "grad_norm": 0.6497499942779541, "learning_rate": 1.3395934959349595e-05, "loss": 0.046, "step": 81230 }, { "epoch": 0.9906707317073171, "grad_norm": 0.39820051193237305, "learning_rate": 1.3395528455284555e-05, "loss": 0.0346, "step": 81235 }, { "epoch": 0.9907317073170732, "grad_norm": 0.409743070602417, "learning_rate": 1.3395121951219515e-05, "loss": 0.0425, "step": 81240 }, { "epoch": 0.9907926829268293, "grad_norm": 0.683716893196106, "learning_rate": 1.3394715447154471e-05, "loss": 0.0336, "step": 81245 }, { "epoch": 0.9908536585365854, "grad_norm": 0.5783455967903137, "learning_rate": 1.3394308943089431e-05, "loss": 0.0354, "step": 81250 }, { "epoch": 0.9909146341463415, "grad_norm": 0.285770058631897, "learning_rate": 1.3393902439024391e-05, "loss": 0.0447, "step": 81255 }, { "epoch": 0.9909756097560976, "grad_norm": 0.7199292778968811, "learning_rate": 1.339349593495935e-05, "loss": 0.0218, "step": 81260 }, { "epoch": 0.9910365853658537, "grad_norm": 0.5968541502952576, "learning_rate": 1.339308943089431e-05, "loss": 0.035, "step": 81265 }, { "epoch": 0.9910975609756097, "grad_norm": 0.3379609286785126, "learning_rate": 1.339268292682927e-05, "loss": 0.0349, "step": 81270 }, { "epoch": 0.9911585365853659, "grad_norm": 0.7665074467658997, "learning_rate": 1.3392276422764227e-05, "loss": 0.0425, "step": 81275 }, { "epoch": 0.9912195121951219, "grad_norm": 0.4722321331501007, "learning_rate": 1.3391869918699187e-05, "loss": 0.0494, "step": 81280 }, { "epoch": 0.9912804878048781, "grad_norm": 0.5359424352645874, "learning_rate": 1.3391463414634147e-05, "loss": 0.0387, "step": 81285 }, { "epoch": 0.9913414634146341, "grad_norm": 0.7697651982307434, "learning_rate": 1.3391056910569106e-05, "loss": 0.0374, "step": 81290 }, { "epoch": 0.9914024390243903, "grad_norm": 0.6618819236755371, "learning_rate": 1.3390650406504066e-05, "loss": 0.0227, "step": 81295 }, { "epoch": 0.9914634146341463, "grad_norm": 0.8098912835121155, "learning_rate": 1.3390243902439026e-05, "loss": 0.0245, "step": 81300 }, { "epoch": 0.9915243902439025, "grad_norm": 0.5337647199630737, "learning_rate": 1.3389837398373984e-05, "loss": 0.0303, "step": 81305 }, { "epoch": 0.9915853658536585, "grad_norm": 0.2054836004972458, "learning_rate": 1.3389430894308944e-05, "loss": 0.0334, "step": 81310 }, { "epoch": 0.9916463414634147, "grad_norm": 0.20911675691604614, "learning_rate": 1.3389024390243904e-05, "loss": 0.0224, "step": 81315 }, { "epoch": 0.9917073170731707, "grad_norm": 1.864645004272461, "learning_rate": 1.3388617886178864e-05, "loss": 0.0612, "step": 81320 }, { "epoch": 0.9917682926829269, "grad_norm": 0.42515674233436584, "learning_rate": 1.3388211382113823e-05, "loss": 0.039, "step": 81325 }, { "epoch": 0.9918292682926829, "grad_norm": 2.745023488998413, "learning_rate": 1.3387804878048782e-05, "loss": 0.0793, "step": 81330 }, { "epoch": 0.9918902439024391, "grad_norm": 0.3443699777126312, "learning_rate": 1.338739837398374e-05, "loss": 0.0259, "step": 81335 }, { "epoch": 0.9919512195121951, "grad_norm": 0.2923997938632965, "learning_rate": 1.33869918699187e-05, "loss": 0.0267, "step": 81340 }, { "epoch": 0.9920121951219513, "grad_norm": 0.9909288883209229, "learning_rate": 1.338658536585366e-05, "loss": 0.0447, "step": 81345 }, { "epoch": 0.9920731707317073, "grad_norm": 0.5840235352516174, "learning_rate": 1.3386178861788619e-05, "loss": 0.0313, "step": 81350 }, { "epoch": 0.9921341463414635, "grad_norm": 0.8500227928161621, "learning_rate": 1.3385772357723579e-05, "loss": 0.0467, "step": 81355 }, { "epoch": 0.9921951219512195, "grad_norm": 0.25470495223999023, "learning_rate": 1.3385365853658539e-05, "loss": 0.0496, "step": 81360 }, { "epoch": 0.9922560975609757, "grad_norm": 0.4980029761791229, "learning_rate": 1.3384959349593495e-05, "loss": 0.033, "step": 81365 }, { "epoch": 0.9923170731707317, "grad_norm": 0.7478543519973755, "learning_rate": 1.3384552845528455e-05, "loss": 0.0303, "step": 81370 }, { "epoch": 0.9923780487804879, "grad_norm": 0.24341773986816406, "learning_rate": 1.3384146341463415e-05, "loss": 0.0176, "step": 81375 }, { "epoch": 0.9924390243902439, "grad_norm": 0.4024169445037842, "learning_rate": 1.3383739837398375e-05, "loss": 0.0261, "step": 81380 }, { "epoch": 0.9925, "grad_norm": 0.6760255098342896, "learning_rate": 1.3383333333333335e-05, "loss": 0.0293, "step": 81385 }, { "epoch": 0.9925609756097561, "grad_norm": 0.26822128891944885, "learning_rate": 1.3382926829268294e-05, "loss": 0.0299, "step": 81390 }, { "epoch": 0.9926219512195122, "grad_norm": 0.32176658511161804, "learning_rate": 1.3382520325203252e-05, "loss": 0.0479, "step": 81395 }, { "epoch": 0.9926829268292683, "grad_norm": 0.7827244997024536, "learning_rate": 1.3382113821138212e-05, "loss": 0.0269, "step": 81400 }, { "epoch": 0.9927439024390244, "grad_norm": 1.2240710258483887, "learning_rate": 1.3381707317073172e-05, "loss": 0.0477, "step": 81405 }, { "epoch": 0.9928048780487805, "grad_norm": 0.38446876406669617, "learning_rate": 1.3381300813008132e-05, "loss": 0.0251, "step": 81410 }, { "epoch": 0.9928658536585366, "grad_norm": 0.5036425590515137, "learning_rate": 1.3380894308943092e-05, "loss": 0.073, "step": 81415 }, { "epoch": 0.9929268292682927, "grad_norm": 0.5427605509757996, "learning_rate": 1.338048780487805e-05, "loss": 0.05, "step": 81420 }, { "epoch": 0.9929878048780488, "grad_norm": 0.7093369364738464, "learning_rate": 1.3380081300813008e-05, "loss": 0.0382, "step": 81425 }, { "epoch": 0.9930487804878049, "grad_norm": 0.47775381803512573, "learning_rate": 1.3379674796747968e-05, "loss": 0.0495, "step": 81430 }, { "epoch": 0.993109756097561, "grad_norm": 0.2293032854795456, "learning_rate": 1.3379268292682928e-05, "loss": 0.0306, "step": 81435 }, { "epoch": 0.9931707317073171, "grad_norm": 0.4053148627281189, "learning_rate": 1.3378861788617887e-05, "loss": 0.019, "step": 81440 }, { "epoch": 0.9932317073170732, "grad_norm": 0.5673492550849915, "learning_rate": 1.3378455284552847e-05, "loss": 0.0248, "step": 81445 }, { "epoch": 0.9932926829268293, "grad_norm": 0.4000227749347687, "learning_rate": 1.3378048780487807e-05, "loss": 0.0444, "step": 81450 }, { "epoch": 0.9933536585365854, "grad_norm": 0.4813506603240967, "learning_rate": 1.3377642276422764e-05, "loss": 0.0441, "step": 81455 }, { "epoch": 0.9934146341463415, "grad_norm": 0.5283697247505188, "learning_rate": 1.3377235772357723e-05, "loss": 0.0591, "step": 81460 }, { "epoch": 0.9934756097560976, "grad_norm": 1.0107040405273438, "learning_rate": 1.3376829268292683e-05, "loss": 0.0361, "step": 81465 }, { "epoch": 0.9935365853658537, "grad_norm": 0.4291567802429199, "learning_rate": 1.3376422764227643e-05, "loss": 0.063, "step": 81470 }, { "epoch": 0.9935975609756098, "grad_norm": 0.25258684158325195, "learning_rate": 1.3376016260162603e-05, "loss": 0.0361, "step": 81475 }, { "epoch": 0.9936585365853658, "grad_norm": 0.348518967628479, "learning_rate": 1.3375609756097563e-05, "loss": 0.0705, "step": 81480 }, { "epoch": 0.993719512195122, "grad_norm": 0.21296429634094238, "learning_rate": 1.337520325203252e-05, "loss": 0.0435, "step": 81485 }, { "epoch": 0.993780487804878, "grad_norm": 0.5425792336463928, "learning_rate": 1.337479674796748e-05, "loss": 0.027, "step": 81490 }, { "epoch": 0.9938414634146342, "grad_norm": 0.4521120488643646, "learning_rate": 1.337439024390244e-05, "loss": 0.0358, "step": 81495 }, { "epoch": 0.9939024390243902, "grad_norm": 1.1094392538070679, "learning_rate": 1.33739837398374e-05, "loss": 0.0805, "step": 81500 }, { "epoch": 0.9939634146341464, "grad_norm": 0.6550334095954895, "learning_rate": 1.337357723577236e-05, "loss": 0.0413, "step": 81505 }, { "epoch": 0.9940243902439024, "grad_norm": 0.6735662221908569, "learning_rate": 1.3373170731707318e-05, "loss": 0.0406, "step": 81510 }, { "epoch": 0.9940853658536586, "grad_norm": 0.32777535915374756, "learning_rate": 1.3372764227642276e-05, "loss": 0.0506, "step": 81515 }, { "epoch": 0.9941463414634146, "grad_norm": 0.14540287852287292, "learning_rate": 1.3372357723577236e-05, "loss": 0.0264, "step": 81520 }, { "epoch": 0.9942073170731708, "grad_norm": 0.7958234548568726, "learning_rate": 1.3371951219512196e-05, "loss": 0.0613, "step": 81525 }, { "epoch": 0.9942682926829268, "grad_norm": 0.29520905017852783, "learning_rate": 1.3371544715447156e-05, "loss": 0.021, "step": 81530 }, { "epoch": 0.994329268292683, "grad_norm": 0.6268213391304016, "learning_rate": 1.3371138211382116e-05, "loss": 0.0356, "step": 81535 }, { "epoch": 0.994390243902439, "grad_norm": 0.33559057116508484, "learning_rate": 1.3370731707317075e-05, "loss": 0.0391, "step": 81540 }, { "epoch": 0.9944512195121952, "grad_norm": 0.45914319157600403, "learning_rate": 1.3370325203252032e-05, "loss": 0.024, "step": 81545 }, { "epoch": 0.9945121951219512, "grad_norm": 0.24844728410243988, "learning_rate": 1.3369918699186992e-05, "loss": 0.0261, "step": 81550 }, { "epoch": 0.9945731707317074, "grad_norm": 0.11652537435293198, "learning_rate": 1.3369512195121952e-05, "loss": 0.0238, "step": 81555 }, { "epoch": 0.9946341463414634, "grad_norm": 0.6972858905792236, "learning_rate": 1.3369105691056911e-05, "loss": 0.0545, "step": 81560 }, { "epoch": 0.9946951219512196, "grad_norm": 0.4403286576271057, "learning_rate": 1.3368699186991871e-05, "loss": 0.0305, "step": 81565 }, { "epoch": 0.9947560975609756, "grad_norm": 0.6568981409072876, "learning_rate": 1.3368292682926831e-05, "loss": 0.07, "step": 81570 }, { "epoch": 0.9948170731707318, "grad_norm": 1.3159945011138916, "learning_rate": 1.3367886178861789e-05, "loss": 0.0552, "step": 81575 }, { "epoch": 0.9948780487804878, "grad_norm": 0.6159153580665588, "learning_rate": 1.3367479674796749e-05, "loss": 0.0681, "step": 81580 }, { "epoch": 0.994939024390244, "grad_norm": 0.36219972372055054, "learning_rate": 1.3367073170731709e-05, "loss": 0.0277, "step": 81585 }, { "epoch": 0.995, "grad_norm": 0.5029848217964172, "learning_rate": 1.3366666666666669e-05, "loss": 0.0694, "step": 81590 }, { "epoch": 0.9950609756097561, "grad_norm": 0.3062666058540344, "learning_rate": 1.3366260162601627e-05, "loss": 0.0467, "step": 81595 }, { "epoch": 0.9951219512195122, "grad_norm": 0.635311484336853, "learning_rate": 1.3365853658536587e-05, "loss": 0.0711, "step": 81600 }, { "epoch": 0.9951829268292683, "grad_norm": 0.6310042142868042, "learning_rate": 1.3365447154471545e-05, "loss": 0.0353, "step": 81605 }, { "epoch": 0.9952439024390244, "grad_norm": 0.5034379363059998, "learning_rate": 1.3365040650406504e-05, "loss": 0.0289, "step": 81610 }, { "epoch": 0.9953048780487805, "grad_norm": 0.34343409538269043, "learning_rate": 1.3364634146341464e-05, "loss": 0.0361, "step": 81615 }, { "epoch": 0.9953658536585366, "grad_norm": 0.3149370849132538, "learning_rate": 1.3364227642276424e-05, "loss": 0.0559, "step": 81620 }, { "epoch": 0.9954268292682927, "grad_norm": 0.4322220981121063, "learning_rate": 1.3363821138211384e-05, "loss": 0.0251, "step": 81625 }, { "epoch": 0.9954878048780488, "grad_norm": 0.7319861054420471, "learning_rate": 1.3363414634146344e-05, "loss": 0.0277, "step": 81630 }, { "epoch": 0.9955487804878049, "grad_norm": 0.48107054829597473, "learning_rate": 1.33630081300813e-05, "loss": 0.0525, "step": 81635 }, { "epoch": 0.995609756097561, "grad_norm": 0.18132750689983368, "learning_rate": 1.336260162601626e-05, "loss": 0.0132, "step": 81640 }, { "epoch": 0.9956707317073171, "grad_norm": 0.3553664982318878, "learning_rate": 1.336219512195122e-05, "loss": 0.0399, "step": 81645 }, { "epoch": 0.9957317073170732, "grad_norm": 0.8732177019119263, "learning_rate": 1.336178861788618e-05, "loss": 0.0449, "step": 81650 }, { "epoch": 0.9957926829268293, "grad_norm": 0.7025429010391235, "learning_rate": 1.336138211382114e-05, "loss": 0.0423, "step": 81655 }, { "epoch": 0.9958536585365854, "grad_norm": 0.38135427236557007, "learning_rate": 1.33609756097561e-05, "loss": 0.0346, "step": 81660 }, { "epoch": 0.9959146341463415, "grad_norm": 0.7047069072723389, "learning_rate": 1.3360569105691057e-05, "loss": 0.0427, "step": 81665 }, { "epoch": 0.9959756097560976, "grad_norm": 0.5379379987716675, "learning_rate": 1.3360162601626017e-05, "loss": 0.0415, "step": 81670 }, { "epoch": 0.9960365853658537, "grad_norm": 0.2506932020187378, "learning_rate": 1.3359756097560977e-05, "loss": 0.0396, "step": 81675 }, { "epoch": 0.9960975609756098, "grad_norm": 0.36117956042289734, "learning_rate": 1.3359349593495937e-05, "loss": 0.0495, "step": 81680 }, { "epoch": 0.9961585365853659, "grad_norm": 1.057213306427002, "learning_rate": 1.3358943089430895e-05, "loss": 0.037, "step": 81685 }, { "epoch": 0.996219512195122, "grad_norm": 0.6155073046684265, "learning_rate": 1.3358536585365855e-05, "loss": 0.0596, "step": 81690 }, { "epoch": 0.9962804878048781, "grad_norm": 0.0738711729645729, "learning_rate": 1.3358130081300813e-05, "loss": 0.0354, "step": 81695 }, { "epoch": 0.9963414634146341, "grad_norm": 0.72386634349823, "learning_rate": 1.3357723577235773e-05, "loss": 0.0539, "step": 81700 }, { "epoch": 0.9964024390243903, "grad_norm": 0.8076116442680359, "learning_rate": 1.3357317073170733e-05, "loss": 0.0365, "step": 81705 }, { "epoch": 0.9964634146341463, "grad_norm": 0.7523083090782166, "learning_rate": 1.3356910569105692e-05, "loss": 0.0741, "step": 81710 }, { "epoch": 0.9965243902439025, "grad_norm": 0.6807393431663513, "learning_rate": 1.3356504065040652e-05, "loss": 0.0355, "step": 81715 }, { "epoch": 0.9965853658536585, "grad_norm": 0.687554657459259, "learning_rate": 1.3356097560975612e-05, "loss": 0.0755, "step": 81720 }, { "epoch": 0.9966463414634147, "grad_norm": 0.663661539554596, "learning_rate": 1.3355691056910569e-05, "loss": 0.0296, "step": 81725 }, { "epoch": 0.9967073170731707, "grad_norm": 0.45094913244247437, "learning_rate": 1.3355284552845528e-05, "loss": 0.0223, "step": 81730 }, { "epoch": 0.9967682926829269, "grad_norm": 0.630315899848938, "learning_rate": 1.3354878048780488e-05, "loss": 0.0257, "step": 81735 }, { "epoch": 0.9968292682926829, "grad_norm": 0.6912282705307007, "learning_rate": 1.3354471544715448e-05, "loss": 0.0235, "step": 81740 }, { "epoch": 0.9968902439024391, "grad_norm": 0.6530377268791199, "learning_rate": 1.3354065040650408e-05, "loss": 0.0468, "step": 81745 }, { "epoch": 0.9969512195121951, "grad_norm": 1.4276670217514038, "learning_rate": 1.3353658536585368e-05, "loss": 0.0556, "step": 81750 }, { "epoch": 0.9970121951219513, "grad_norm": 0.3268297016620636, "learning_rate": 1.3353252032520326e-05, "loss": 0.0189, "step": 81755 }, { "epoch": 0.9970731707317073, "grad_norm": 0.5384563207626343, "learning_rate": 1.3352845528455286e-05, "loss": 0.0599, "step": 81760 }, { "epoch": 0.9971341463414635, "grad_norm": 0.5899960398674011, "learning_rate": 1.3352439024390245e-05, "loss": 0.0221, "step": 81765 }, { "epoch": 0.9971951219512195, "grad_norm": 0.647114098072052, "learning_rate": 1.3352032520325204e-05, "loss": 0.0513, "step": 81770 }, { "epoch": 0.9972560975609757, "grad_norm": 2.0115952491760254, "learning_rate": 1.3351626016260163e-05, "loss": 0.0472, "step": 81775 }, { "epoch": 0.9973170731707317, "grad_norm": 3.4862024784088135, "learning_rate": 1.3351219512195123e-05, "loss": 0.0406, "step": 81780 }, { "epoch": 0.9973780487804879, "grad_norm": 1.0577770471572876, "learning_rate": 1.3350813008130081e-05, "loss": 0.0373, "step": 81785 }, { "epoch": 0.9974390243902439, "grad_norm": 0.32273924350738525, "learning_rate": 1.3350406504065041e-05, "loss": 0.0559, "step": 81790 }, { "epoch": 0.9975, "grad_norm": 0.5310530066490173, "learning_rate": 1.3350000000000001e-05, "loss": 0.0279, "step": 81795 }, { "epoch": 0.9975609756097561, "grad_norm": 0.20810696482658386, "learning_rate": 1.334959349593496e-05, "loss": 0.0433, "step": 81800 }, { "epoch": 0.9976219512195122, "grad_norm": 0.19763395190238953, "learning_rate": 1.334918699186992e-05, "loss": 0.0206, "step": 81805 }, { "epoch": 0.9976829268292683, "grad_norm": 0.5006973147392273, "learning_rate": 1.334878048780488e-05, "loss": 0.0298, "step": 81810 }, { "epoch": 0.9977439024390244, "grad_norm": 1.112681269645691, "learning_rate": 1.3348373983739837e-05, "loss": 0.0456, "step": 81815 }, { "epoch": 0.9978048780487805, "grad_norm": 0.22833362221717834, "learning_rate": 1.3347967479674797e-05, "loss": 0.0533, "step": 81820 }, { "epoch": 0.9978658536585366, "grad_norm": 0.30721354484558105, "learning_rate": 1.3347560975609756e-05, "loss": 0.0178, "step": 81825 }, { "epoch": 0.9979268292682927, "grad_norm": 0.6880255341529846, "learning_rate": 1.3347154471544716e-05, "loss": 0.024, "step": 81830 }, { "epoch": 0.9979878048780488, "grad_norm": 0.6495354175567627, "learning_rate": 1.3346747967479676e-05, "loss": 0.0526, "step": 81835 }, { "epoch": 0.9980487804878049, "grad_norm": 1.0689798593521118, "learning_rate": 1.3346341463414636e-05, "loss": 0.0671, "step": 81840 }, { "epoch": 0.998109756097561, "grad_norm": 0.7096878886222839, "learning_rate": 1.3345934959349594e-05, "loss": 0.056, "step": 81845 }, { "epoch": 0.9981707317073171, "grad_norm": 1.597687005996704, "learning_rate": 1.3345528455284554e-05, "loss": 0.0328, "step": 81850 }, { "epoch": 0.9982317073170732, "grad_norm": 0.8948290348052979, "learning_rate": 1.3345121951219514e-05, "loss": 0.0669, "step": 81855 }, { "epoch": 0.9982926829268293, "grad_norm": 0.42767712473869324, "learning_rate": 1.3344715447154472e-05, "loss": 0.0264, "step": 81860 }, { "epoch": 0.9983536585365854, "grad_norm": 0.7417709231376648, "learning_rate": 1.3344308943089432e-05, "loss": 0.0323, "step": 81865 }, { "epoch": 0.9984146341463415, "grad_norm": 0.1909000128507614, "learning_rate": 1.3343902439024391e-05, "loss": 0.0201, "step": 81870 }, { "epoch": 0.9984756097560976, "grad_norm": 0.2652723491191864, "learning_rate": 1.334349593495935e-05, "loss": 0.0256, "step": 81875 }, { "epoch": 0.9985365853658537, "grad_norm": 0.4894905686378479, "learning_rate": 1.334308943089431e-05, "loss": 0.0399, "step": 81880 }, { "epoch": 0.9985975609756098, "grad_norm": 1.167328119277954, "learning_rate": 1.334268292682927e-05, "loss": 0.0407, "step": 81885 }, { "epoch": 0.9986585365853659, "grad_norm": 1.1951305866241455, "learning_rate": 1.3342276422764229e-05, "loss": 0.0601, "step": 81890 }, { "epoch": 0.998719512195122, "grad_norm": 0.6389905214309692, "learning_rate": 1.3341869918699189e-05, "loss": 0.0458, "step": 81895 }, { "epoch": 0.998780487804878, "grad_norm": 0.7169416546821594, "learning_rate": 1.3341463414634149e-05, "loss": 0.0409, "step": 81900 }, { "epoch": 0.9988414634146342, "grad_norm": 0.3931267261505127, "learning_rate": 1.3341056910569105e-05, "loss": 0.0261, "step": 81905 }, { "epoch": 0.9989024390243902, "grad_norm": 0.33579838275909424, "learning_rate": 1.3340650406504065e-05, "loss": 0.033, "step": 81910 }, { "epoch": 0.9989634146341464, "grad_norm": 0.7774484157562256, "learning_rate": 1.3340243902439025e-05, "loss": 0.0481, "step": 81915 }, { "epoch": 0.9990243902439024, "grad_norm": 0.7785080075263977, "learning_rate": 1.3339837398373985e-05, "loss": 0.0407, "step": 81920 }, { "epoch": 0.9990853658536586, "grad_norm": 0.4008927345275879, "learning_rate": 1.3339430894308944e-05, "loss": 0.0427, "step": 81925 }, { "epoch": 0.9991463414634146, "grad_norm": 0.42606326937675476, "learning_rate": 1.3339024390243904e-05, "loss": 0.0783, "step": 81930 }, { "epoch": 0.9992073170731708, "grad_norm": 0.23923224210739136, "learning_rate": 1.3338617886178862e-05, "loss": 0.0186, "step": 81935 }, { "epoch": 0.9992682926829268, "grad_norm": 0.8506394624710083, "learning_rate": 1.3338211382113822e-05, "loss": 0.0394, "step": 81940 }, { "epoch": 0.999329268292683, "grad_norm": 0.9602214694023132, "learning_rate": 1.3337804878048782e-05, "loss": 0.0375, "step": 81945 }, { "epoch": 0.999390243902439, "grad_norm": 0.3650228977203369, "learning_rate": 1.333739837398374e-05, "loss": 0.0289, "step": 81950 }, { "epoch": 0.9994512195121952, "grad_norm": 0.6778674721717834, "learning_rate": 1.33369918699187e-05, "loss": 0.0365, "step": 81955 }, { "epoch": 0.9995121951219512, "grad_norm": 0.6902416944503784, "learning_rate": 1.333658536585366e-05, "loss": 0.0561, "step": 81960 }, { "epoch": 0.9995731707317074, "grad_norm": 0.5933471322059631, "learning_rate": 1.3336178861788618e-05, "loss": 0.0275, "step": 81965 }, { "epoch": 0.9996341463414634, "grad_norm": 0.44846388697624207, "learning_rate": 1.3335772357723578e-05, "loss": 0.0206, "step": 81970 }, { "epoch": 0.9996951219512196, "grad_norm": 0.725297212600708, "learning_rate": 1.3335365853658538e-05, "loss": 0.0427, "step": 81975 }, { "epoch": 0.9997560975609756, "grad_norm": 0.6950681805610657, "learning_rate": 1.3334959349593497e-05, "loss": 0.049, "step": 81980 }, { "epoch": 0.9998170731707318, "grad_norm": 1.0724985599517822, "learning_rate": 1.3334552845528457e-05, "loss": 0.0531, "step": 81985 }, { "epoch": 0.9998780487804878, "grad_norm": 0.9090952277183533, "learning_rate": 1.3334146341463417e-05, "loss": 0.0352, "step": 81990 }, { "epoch": 0.999939024390244, "grad_norm": 0.7096337676048279, "learning_rate": 1.3333739837398373e-05, "loss": 0.0535, "step": 81995 }, { "epoch": 1.0, "grad_norm": 0.18459957838058472, "learning_rate": 1.3333333333333333e-05, "loss": 0.0202, "step": 82000 }, { "epoch": 1.0, "eval_loss": 0.20438289642333984, "eval_runtime": 2655.9492, "eval_samples_per_second": 47.064, "eval_steps_per_second": 5.883, "step": 82000 } ], "logging_steps": 5, "max_steps": 246000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.93287131201536e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }