{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.896265560165975, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008298755186721991, "grad_norm": NaN, "learning_rate": 2e-05, "loss": 4.438, "step": 1 }, { "epoch": 0.0016597510373443983, "grad_norm": NaN, "learning_rate": 2e-05, "loss": 2.7802, "step": 2 }, { "epoch": 0.0024896265560165973, "grad_norm": 2.8055419921875, "learning_rate": 1.9999668049792534e-05, "loss": 3.1013, "step": 3 }, { "epoch": 0.0033195020746887966, "grad_norm": 4.739878177642822, "learning_rate": 1.9999336099585066e-05, "loss": 3.5187, "step": 4 }, { "epoch": 0.004149377593360996, "grad_norm": 5.71291446685791, "learning_rate": 1.9999004149377595e-05, "loss": 3.6288, "step": 5 }, { "epoch": 0.004979253112033195, "grad_norm": 4.758028984069824, "learning_rate": 1.9998672199170127e-05, "loss": 3.6327, "step": 6 }, { "epoch": 0.005809128630705394, "grad_norm": 2.4280595779418945, "learning_rate": 1.999834024896266e-05, "loss": 2.1611, "step": 7 }, { "epoch": 0.006639004149377593, "grad_norm": 3.7824857234954834, "learning_rate": 1.999800829875519e-05, "loss": 2.6463, "step": 8 }, { "epoch": 0.007468879668049793, "grad_norm": 3.9658751487731934, "learning_rate": 1.999767634854772e-05, "loss": 3.4621, "step": 9 }, { "epoch": 0.008298755186721992, "grad_norm": 5.018659591674805, "learning_rate": 1.9997344398340252e-05, "loss": 3.6079, "step": 10 }, { "epoch": 0.009128630705394191, "grad_norm": 4.934898853302002, "learning_rate": 1.999701244813278e-05, "loss": 2.8582, "step": 11 }, { "epoch": 0.00995850622406639, "grad_norm": 5.521002769470215, "learning_rate": 1.9996680497925313e-05, "loss": 3.1373, "step": 12 }, { "epoch": 0.010788381742738589, "grad_norm": 6.408207416534424, "learning_rate": 1.9996348547717845e-05, "loss": 4.3224, "step": 13 }, { "epoch": 0.011618257261410789, "grad_norm": 4.0805206298828125, "learning_rate": 1.9996016597510373e-05, "loss": 2.8426, "step": 14 }, { "epoch": 0.012448132780082987, "grad_norm": 4.6477484703063965, "learning_rate": 1.9995684647302905e-05, "loss": 3.4745, "step": 15 }, { "epoch": 0.013278008298755186, "grad_norm": 5.83081579208374, "learning_rate": 1.9995352697095438e-05, "loss": 3.2408, "step": 16 }, { "epoch": 0.014107883817427386, "grad_norm": 4.970170021057129, "learning_rate": 1.9995020746887966e-05, "loss": 3.121, "step": 17 }, { "epoch": 0.014937759336099586, "grad_norm": 4.323098659515381, "learning_rate": 1.99946887966805e-05, "loss": 2.8191, "step": 18 }, { "epoch": 0.015767634854771784, "grad_norm": 4.7327799797058105, "learning_rate": 1.999435684647303e-05, "loss": 3.365, "step": 19 }, { "epoch": 0.016597510373443983, "grad_norm": 3.181143283843994, "learning_rate": 1.9994024896265563e-05, "loss": 2.6976, "step": 20 }, { "epoch": 0.017427385892116183, "grad_norm": 4.719732761383057, "learning_rate": 1.999369294605809e-05, "loss": 3.1719, "step": 21 }, { "epoch": 0.018257261410788383, "grad_norm": 6.204226493835449, "learning_rate": 1.9993360995850623e-05, "loss": 3.4642, "step": 22 }, { "epoch": 0.019087136929460582, "grad_norm": 3.0302820205688477, "learning_rate": 1.9993029045643156e-05, "loss": 1.8101, "step": 23 }, { "epoch": 0.01991701244813278, "grad_norm": 4.1779465675354, "learning_rate": 1.9992697095435688e-05, "loss": 2.9551, "step": 24 }, { "epoch": 0.02074688796680498, "grad_norm": 4.637296199798584, "learning_rate": 1.9992365145228216e-05, "loss": 2.4445, "step": 25 }, { "epoch": 0.021576763485477178, "grad_norm": 4.140933036804199, "learning_rate": 1.999203319502075e-05, "loss": 2.6725, "step": 26 }, { "epoch": 0.022406639004149378, "grad_norm": NaN, "learning_rate": 1.999203319502075e-05, "loss": 3.5596, "step": 27 }, { "epoch": 0.023236514522821577, "grad_norm": 5.132777214050293, "learning_rate": 1.999170124481328e-05, "loss": 2.3707, "step": 28 }, { "epoch": 0.024066390041493777, "grad_norm": 7.892861843109131, "learning_rate": 1.9991369294605813e-05, "loss": 3.9838, "step": 29 }, { "epoch": 0.024896265560165973, "grad_norm": 11.268702507019043, "learning_rate": 1.999103734439834e-05, "loss": 2.924, "step": 30 }, { "epoch": 0.025726141078838173, "grad_norm": 3.3129234313964844, "learning_rate": 1.9990705394190874e-05, "loss": 2.0094, "step": 31 }, { "epoch": 0.026556016597510373, "grad_norm": 8.532336235046387, "learning_rate": 1.9990373443983406e-05, "loss": 2.7557, "step": 32 }, { "epoch": 0.027385892116182572, "grad_norm": 5.4265313148498535, "learning_rate": 1.9990041493775934e-05, "loss": 2.8659, "step": 33 }, { "epoch": 0.028215767634854772, "grad_norm": 6.699700832366943, "learning_rate": 1.9989709543568466e-05, "loss": 2.4746, "step": 34 }, { "epoch": 0.029045643153526972, "grad_norm": 4.7542243003845215, "learning_rate": 1.9989377593360995e-05, "loss": 2.6018, "step": 35 }, { "epoch": 0.02987551867219917, "grad_norm": 4.0629425048828125, "learning_rate": 1.9989045643153527e-05, "loss": 2.6742, "step": 36 }, { "epoch": 0.030705394190871368, "grad_norm": 7.385385990142822, "learning_rate": 1.998871369294606e-05, "loss": 3.1315, "step": 37 }, { "epoch": 0.03153526970954357, "grad_norm": 5.426710605621338, "learning_rate": 1.998838174273859e-05, "loss": 2.5375, "step": 38 }, { "epoch": 0.03236514522821577, "grad_norm": 11.000356674194336, "learning_rate": 1.998804979253112e-05, "loss": 3.4459, "step": 39 }, { "epoch": 0.03319502074688797, "grad_norm": 8.161194801330566, "learning_rate": 1.9987717842323652e-05, "loss": 2.8874, "step": 40 }, { "epoch": 0.03402489626556016, "grad_norm": 8.697426795959473, "learning_rate": 1.9987385892116184e-05, "loss": 2.3664, "step": 41 }, { "epoch": 0.034854771784232366, "grad_norm": 20.174734115600586, "learning_rate": 1.9987053941908717e-05, "loss": 3.2438, "step": 42 }, { "epoch": 0.03568464730290456, "grad_norm": 10.48316764831543, "learning_rate": 1.9986721991701245e-05, "loss": 3.4882, "step": 43 }, { "epoch": 0.036514522821576766, "grad_norm": 7.954885482788086, "learning_rate": 1.9986390041493777e-05, "loss": 2.5935, "step": 44 }, { "epoch": 0.03734439834024896, "grad_norm": 10.628199577331543, "learning_rate": 1.998605809128631e-05, "loss": 3.3418, "step": 45 }, { "epoch": 0.038174273858921165, "grad_norm": 7.746209144592285, "learning_rate": 1.998572614107884e-05, "loss": 2.4324, "step": 46 }, { "epoch": 0.03900414937759336, "grad_norm": 6.404862880706787, "learning_rate": 1.998539419087137e-05, "loss": 2.8026, "step": 47 }, { "epoch": 0.03983402489626556, "grad_norm": 10.133047103881836, "learning_rate": 1.9985062240663902e-05, "loss": 3.5225, "step": 48 }, { "epoch": 0.04066390041493776, "grad_norm": 6.768952369689941, "learning_rate": 1.9984730290456435e-05, "loss": 2.4584, "step": 49 }, { "epoch": 0.04149377593360996, "grad_norm": 10.809011459350586, "learning_rate": 1.9984398340248967e-05, "loss": 2.9003, "step": 50 }, { "epoch": 0.04232365145228216, "grad_norm": 13.164773941040039, "learning_rate": 1.9984066390041495e-05, "loss": 2.8724, "step": 51 }, { "epoch": 0.043153526970954356, "grad_norm": 4.188051223754883, "learning_rate": 1.9983734439834027e-05, "loss": 2.2148, "step": 52 }, { "epoch": 0.04398340248962656, "grad_norm": 6.970693111419678, "learning_rate": 1.9983402489626556e-05, "loss": 2.3503, "step": 53 }, { "epoch": 0.044813278008298756, "grad_norm": 7.918090343475342, "learning_rate": 1.9983070539419088e-05, "loss": 2.4995, "step": 54 }, { "epoch": 0.04564315352697095, "grad_norm": 9.855175018310547, "learning_rate": 1.998273858921162e-05, "loss": 2.5236, "step": 55 }, { "epoch": 0.046473029045643155, "grad_norm": 6.014580726623535, "learning_rate": 1.998240663900415e-05, "loss": 2.1542, "step": 56 }, { "epoch": 0.04730290456431535, "grad_norm": 7.328947067260742, "learning_rate": 1.998207468879668e-05, "loss": 2.6489, "step": 57 }, { "epoch": 0.048132780082987554, "grad_norm": 9.107131004333496, "learning_rate": 1.9981742738589213e-05, "loss": 2.559, "step": 58 }, { "epoch": 0.04896265560165975, "grad_norm": 3.688723087310791, "learning_rate": 1.9981410788381745e-05, "loss": 1.7674, "step": 59 }, { "epoch": 0.04979253112033195, "grad_norm": 6.811691761016846, "learning_rate": 1.9981078838174274e-05, "loss": 2.1288, "step": 60 }, { "epoch": 0.05062240663900415, "grad_norm": 5.053576946258545, "learning_rate": 1.9980746887966806e-05, "loss": 1.8744, "step": 61 }, { "epoch": 0.051452282157676346, "grad_norm": 6.623212814331055, "learning_rate": 1.998041493775934e-05, "loss": 1.8102, "step": 62 }, { "epoch": 0.05228215767634855, "grad_norm": 8.690807342529297, "learning_rate": 1.998008298755187e-05, "loss": 2.2465, "step": 63 }, { "epoch": 0.053112033195020746, "grad_norm": 10.452146530151367, "learning_rate": 1.99797510373444e-05, "loss": 2.6647, "step": 64 }, { "epoch": 0.05394190871369295, "grad_norm": 5.793288707733154, "learning_rate": 1.997941908713693e-05, "loss": 1.5447, "step": 65 }, { "epoch": 0.054771784232365145, "grad_norm": 8.479602813720703, "learning_rate": 1.9979087136929463e-05, "loss": 3.0203, "step": 66 }, { "epoch": 0.05560165975103734, "grad_norm": 9.427205085754395, "learning_rate": 1.9978755186721996e-05, "loss": 2.7403, "step": 67 }, { "epoch": 0.056431535269709544, "grad_norm": 12.148172378540039, "learning_rate": 1.9978423236514524e-05, "loss": 3.2964, "step": 68 }, { "epoch": 0.05726141078838174, "grad_norm": 10.205883026123047, "learning_rate": 1.9978091286307056e-05, "loss": 2.5866, "step": 69 }, { "epoch": 0.058091286307053944, "grad_norm": 6.960912227630615, "learning_rate": 1.997775933609959e-05, "loss": 2.4715, "step": 70 }, { "epoch": 0.05892116182572614, "grad_norm": 8.27250862121582, "learning_rate": 1.9977427385892117e-05, "loss": 2.1081, "step": 71 }, { "epoch": 0.05975103734439834, "grad_norm": 17.181142807006836, "learning_rate": 1.997709543568465e-05, "loss": 2.0039, "step": 72 }, { "epoch": 0.06058091286307054, "grad_norm": 6.659110069274902, "learning_rate": 1.9976763485477178e-05, "loss": 2.7739, "step": 73 }, { "epoch": 0.061410788381742736, "grad_norm": 11.762589454650879, "learning_rate": 1.997643153526971e-05, "loss": 2.635, "step": 74 }, { "epoch": 0.06224066390041494, "grad_norm": 14.24266242980957, "learning_rate": 1.9976099585062242e-05, "loss": 2.3992, "step": 75 }, { "epoch": 0.06307053941908713, "grad_norm": 10.986048698425293, "learning_rate": 1.997576763485477e-05, "loss": 2.2773, "step": 76 }, { "epoch": 0.06390041493775933, "grad_norm": 10.79555892944336, "learning_rate": 1.9975435684647303e-05, "loss": 2.0013, "step": 77 }, { "epoch": 0.06473029045643154, "grad_norm": 9.084288597106934, "learning_rate": 1.9975103734439835e-05, "loss": 2.5334, "step": 78 }, { "epoch": 0.06556016597510374, "grad_norm": 9.437190055847168, "learning_rate": 1.9974771784232367e-05, "loss": 2.341, "step": 79 }, { "epoch": 0.06639004149377593, "grad_norm": 5.838840484619141, "learning_rate": 1.9974439834024896e-05, "loss": 1.8882, "step": 80 }, { "epoch": 0.06721991701244813, "grad_norm": NaN, "learning_rate": 1.9974439834024896e-05, "loss": 2.1929, "step": 81 }, { "epoch": 0.06804979253112033, "grad_norm": 11.496440887451172, "learning_rate": 1.9974107883817428e-05, "loss": 2.3558, "step": 82 }, { "epoch": 0.06887966804979254, "grad_norm": 8.687644004821777, "learning_rate": 1.997377593360996e-05, "loss": 2.3281, "step": 83 }, { "epoch": 0.06970954356846473, "grad_norm": 16.702177047729492, "learning_rate": 1.9973443983402492e-05, "loss": 2.3158, "step": 84 }, { "epoch": 0.07053941908713693, "grad_norm": 8.99643611907959, "learning_rate": 1.9973112033195024e-05, "loss": 1.7171, "step": 85 }, { "epoch": 0.07136929460580912, "grad_norm": 10.340606689453125, "learning_rate": 1.9972780082987553e-05, "loss": 2.9184, "step": 86 }, { "epoch": 0.07219917012448132, "grad_norm": 16.338909149169922, "learning_rate": 1.9972448132780085e-05, "loss": 2.0109, "step": 87 }, { "epoch": 0.07302904564315353, "grad_norm": 15.070080757141113, "learning_rate": 1.9972116182572617e-05, "loss": 3.131, "step": 88 }, { "epoch": 0.07385892116182573, "grad_norm": 29.919939041137695, "learning_rate": 1.997178423236515e-05, "loss": 2.6566, "step": 89 }, { "epoch": 0.07468879668049792, "grad_norm": 11.811314582824707, "learning_rate": 1.9971452282157678e-05, "loss": 2.2707, "step": 90 }, { "epoch": 0.07551867219917012, "grad_norm": 11.234865188598633, "learning_rate": 1.997112033195021e-05, "loss": 2.3183, "step": 91 }, { "epoch": 0.07634854771784233, "grad_norm": 5.611378192901611, "learning_rate": 1.997078838174274e-05, "loss": 1.8266, "step": 92 }, { "epoch": 0.07717842323651453, "grad_norm": 11.409871101379395, "learning_rate": 1.997045643153527e-05, "loss": 2.1099, "step": 93 }, { "epoch": 0.07800829875518672, "grad_norm": 14.931343078613281, "learning_rate": 1.9970124481327803e-05, "loss": 3.0408, "step": 94 }, { "epoch": 0.07883817427385892, "grad_norm": 13.530205726623535, "learning_rate": 1.9969792531120332e-05, "loss": 2.0524, "step": 95 }, { "epoch": 0.07966804979253111, "grad_norm": 10.057723045349121, "learning_rate": 1.9969460580912864e-05, "loss": 2.6261, "step": 96 }, { "epoch": 0.08049792531120332, "grad_norm": 10.64914321899414, "learning_rate": 1.9969128630705396e-05, "loss": 2.431, "step": 97 }, { "epoch": 0.08132780082987552, "grad_norm": 6.524570465087891, "learning_rate": 1.9968796680497925e-05, "loss": 1.8908, "step": 98 }, { "epoch": 0.08215767634854772, "grad_norm": 19.114322662353516, "learning_rate": 1.9968464730290457e-05, "loss": 2.6673, "step": 99 }, { "epoch": 0.08298755186721991, "grad_norm": 10.354879379272461, "learning_rate": 1.996813278008299e-05, "loss": 2.7377, "step": 100 }, { "epoch": 0.08381742738589211, "grad_norm": 8.883151054382324, "learning_rate": 1.996780082987552e-05, "loss": 2.1357, "step": 101 }, { "epoch": 0.08464730290456432, "grad_norm": 13.590032577514648, "learning_rate": 1.996746887966805e-05, "loss": 2.3366, "step": 102 }, { "epoch": 0.08547717842323652, "grad_norm": 7.95885705947876, "learning_rate": 1.9967136929460582e-05, "loss": 2.3212, "step": 103 }, { "epoch": 0.08630705394190871, "grad_norm": 9.607882499694824, "learning_rate": 1.9966804979253114e-05, "loss": 1.611, "step": 104 }, { "epoch": 0.08713692946058091, "grad_norm": 5.797218322753906, "learning_rate": 1.9966473029045646e-05, "loss": 1.7986, "step": 105 }, { "epoch": 0.08796680497925312, "grad_norm": 6.384740829467773, "learning_rate": 1.9966141078838175e-05, "loss": 2.2716, "step": 106 }, { "epoch": 0.08879668049792531, "grad_norm": 7.59683084487915, "learning_rate": 1.9965809128630707e-05, "loss": 1.5948, "step": 107 }, { "epoch": 0.08962655601659751, "grad_norm": 8.958853721618652, "learning_rate": 1.996547717842324e-05, "loss": 2.5645, "step": 108 }, { "epoch": 0.09045643153526971, "grad_norm": 8.285823822021484, "learning_rate": 1.996514522821577e-05, "loss": 3.0129, "step": 109 }, { "epoch": 0.0912863070539419, "grad_norm": 11.840156555175781, "learning_rate": 1.99648132780083e-05, "loss": 2.3739, "step": 110 }, { "epoch": 0.09211618257261411, "grad_norm": 5.685758590698242, "learning_rate": 1.9964481327800832e-05, "loss": 2.5257, "step": 111 }, { "epoch": 0.09294605809128631, "grad_norm": 6.591436386108398, "learning_rate": 1.9964149377593364e-05, "loss": 2.2736, "step": 112 }, { "epoch": 0.0937759336099585, "grad_norm": 9.022760391235352, "learning_rate": 1.9963817427385893e-05, "loss": 2.2562, "step": 113 }, { "epoch": 0.0946058091286307, "grad_norm": 8.462565422058105, "learning_rate": 1.9963485477178425e-05, "loss": 1.8674, "step": 114 }, { "epoch": 0.0954356846473029, "grad_norm": 12.722848892211914, "learning_rate": 1.9963153526970954e-05, "loss": 2.1155, "step": 115 }, { "epoch": 0.09626556016597511, "grad_norm": 10.177489280700684, "learning_rate": 1.9962821576763486e-05, "loss": 1.9779, "step": 116 }, { "epoch": 0.0970954356846473, "grad_norm": 14.792176246643066, "learning_rate": 1.9962489626556018e-05, "loss": 2.5546, "step": 117 }, { "epoch": 0.0979253112033195, "grad_norm": 8.576160430908203, "learning_rate": 1.996215767634855e-05, "loss": 2.434, "step": 118 }, { "epoch": 0.0987551867219917, "grad_norm": 6.974018096923828, "learning_rate": 1.996182572614108e-05, "loss": 2.2004, "step": 119 }, { "epoch": 0.0995850622406639, "grad_norm": 8.307764053344727, "learning_rate": 1.996149377593361e-05, "loss": 2.087, "step": 120 }, { "epoch": 0.1004149377593361, "grad_norm": 5.7631025314331055, "learning_rate": 1.9961161825726143e-05, "loss": 1.5241, "step": 121 }, { "epoch": 0.1012448132780083, "grad_norm": 7.102991580963135, "learning_rate": 1.9960829875518675e-05, "loss": 2.0972, "step": 122 }, { "epoch": 0.1020746887966805, "grad_norm": 8.783501625061035, "learning_rate": 1.9960497925311204e-05, "loss": 2.4027, "step": 123 }, { "epoch": 0.10290456431535269, "grad_norm": 7.967042446136475, "learning_rate": 1.9960165975103736e-05, "loss": 1.7956, "step": 124 }, { "epoch": 0.1037344398340249, "grad_norm": 13.175877571105957, "learning_rate": 1.9959834024896268e-05, "loss": 2.1222, "step": 125 }, { "epoch": 0.1045643153526971, "grad_norm": 11.574004173278809, "learning_rate": 1.99595020746888e-05, "loss": 2.2748, "step": 126 }, { "epoch": 0.1053941908713693, "grad_norm": 4.713891506195068, "learning_rate": 1.995917012448133e-05, "loss": 1.278, "step": 127 }, { "epoch": 0.10622406639004149, "grad_norm": 10.678647994995117, "learning_rate": 1.995883817427386e-05, "loss": 2.356, "step": 128 }, { "epoch": 0.10705394190871369, "grad_norm": 7.1327595710754395, "learning_rate": 1.9958506224066393e-05, "loss": 1.6851, "step": 129 }, { "epoch": 0.1078838174273859, "grad_norm": 6.220240116119385, "learning_rate": 1.9958174273858922e-05, "loss": 2.0569, "step": 130 }, { "epoch": 0.1087136929460581, "grad_norm": 8.67856216430664, "learning_rate": 1.9957842323651454e-05, "loss": 1.8691, "step": 131 }, { "epoch": 0.10954356846473029, "grad_norm": 4.676248073577881, "learning_rate": 1.9957510373443986e-05, "loss": 1.484, "step": 132 }, { "epoch": 0.11037344398340249, "grad_norm": 7.425815105438232, "learning_rate": 1.9957178423236515e-05, "loss": 2.1437, "step": 133 }, { "epoch": 0.11120331950207468, "grad_norm": 10.510612487792969, "learning_rate": 1.9956846473029047e-05, "loss": 1.5717, "step": 134 }, { "epoch": 0.11203319502074689, "grad_norm": 9.169523239135742, "learning_rate": 1.995651452282158e-05, "loss": 2.2657, "step": 135 }, { "epoch": 0.11286307053941909, "grad_norm": 7.721034049987793, "learning_rate": 1.9956182572614108e-05, "loss": 2.2077, "step": 136 }, { "epoch": 0.11369294605809128, "grad_norm": 9.134894371032715, "learning_rate": 1.995585062240664e-05, "loss": 2.2787, "step": 137 }, { "epoch": 0.11452282157676348, "grad_norm": 10.286320686340332, "learning_rate": 1.9955518672199172e-05, "loss": 2.7669, "step": 138 }, { "epoch": 0.11535269709543569, "grad_norm": 6.977259159088135, "learning_rate": 1.9955186721991704e-05, "loss": 1.7641, "step": 139 }, { "epoch": 0.11618257261410789, "grad_norm": 6.465917110443115, "learning_rate": 1.9954854771784233e-05, "loss": 1.5937, "step": 140 }, { "epoch": 0.11701244813278008, "grad_norm": 7.880545139312744, "learning_rate": 1.9954522821576765e-05, "loss": 1.7577, "step": 141 }, { "epoch": 0.11784232365145228, "grad_norm": 9.635154724121094, "learning_rate": 1.9954190871369297e-05, "loss": 2.41, "step": 142 }, { "epoch": 0.11867219917012448, "grad_norm": 13.164551734924316, "learning_rate": 1.995385892116183e-05, "loss": 1.999, "step": 143 }, { "epoch": 0.11950207468879669, "grad_norm": 9.794902801513672, "learning_rate": 1.9953526970954358e-05, "loss": 1.7057, "step": 144 }, { "epoch": 0.12033195020746888, "grad_norm": 7.929394721984863, "learning_rate": 1.995319502074689e-05, "loss": 2.0518, "step": 145 }, { "epoch": 0.12116182572614108, "grad_norm": 8.544232368469238, "learning_rate": 1.9952863070539422e-05, "loss": 2.5474, "step": 146 }, { "epoch": 0.12199170124481327, "grad_norm": 5.953860759735107, "learning_rate": 1.9952531120331954e-05, "loss": 2.0104, "step": 147 }, { "epoch": 0.12282157676348547, "grad_norm": 12.995792388916016, "learning_rate": 1.9952199170124483e-05, "loss": 1.8219, "step": 148 }, { "epoch": 0.12365145228215768, "grad_norm": 4.448729991912842, "learning_rate": 1.9951867219917015e-05, "loss": 1.4387, "step": 149 }, { "epoch": 0.12448132780082988, "grad_norm": 6.048081398010254, "learning_rate": 1.9951535269709547e-05, "loss": 1.9145, "step": 150 }, { "epoch": 0.12531120331950207, "grad_norm": 6.079620838165283, "learning_rate": 1.9951203319502076e-05, "loss": 1.5149, "step": 151 }, { "epoch": 0.12614107883817427, "grad_norm": 9.201626777648926, "learning_rate": 1.9950871369294608e-05, "loss": 1.5422, "step": 152 }, { "epoch": 0.12697095435684647, "grad_norm": 13.805401802062988, "learning_rate": 1.9950539419087137e-05, "loss": 2.0316, "step": 153 }, { "epoch": 0.12780082987551866, "grad_norm": 13.520840644836426, "learning_rate": 1.995020746887967e-05, "loss": 1.9758, "step": 154 }, { "epoch": 0.12863070539419086, "grad_norm": 8.48114013671875, "learning_rate": 1.99498755186722e-05, "loss": 1.8878, "step": 155 }, { "epoch": 0.12946058091286308, "grad_norm": 11.503006935119629, "learning_rate": 1.994954356846473e-05, "loss": 2.5037, "step": 156 }, { "epoch": 0.13029045643153528, "grad_norm": 13.774365425109863, "learning_rate": 1.994921161825726e-05, "loss": 2.5114, "step": 157 }, { "epoch": 0.13112033195020747, "grad_norm": 11.80081844329834, "learning_rate": 1.9948879668049794e-05, "loss": 2.7875, "step": 158 }, { "epoch": 0.13195020746887967, "grad_norm": 10.773965835571289, "learning_rate": 1.9948547717842326e-05, "loss": 1.9843, "step": 159 }, { "epoch": 0.13278008298755187, "grad_norm": 14.357955932617188, "learning_rate": 1.9948215767634855e-05, "loss": 1.9232, "step": 160 }, { "epoch": 0.13360995850622406, "grad_norm": 4.998948574066162, "learning_rate": 1.9947883817427387e-05, "loss": 1.563, "step": 161 }, { "epoch": 0.13443983402489626, "grad_norm": 7.8385725021362305, "learning_rate": 1.994755186721992e-05, "loss": 2.0963, "step": 162 }, { "epoch": 0.13526970954356846, "grad_norm": 12.935052871704102, "learning_rate": 1.994721991701245e-05, "loss": 2.2704, "step": 163 }, { "epoch": 0.13609958506224065, "grad_norm": 7.446180820465088, "learning_rate": 1.9946887966804983e-05, "loss": 2.2667, "step": 164 }, { "epoch": 0.13692946058091288, "grad_norm": 9.82408332824707, "learning_rate": 1.9946556016597512e-05, "loss": 1.7222, "step": 165 }, { "epoch": 0.13775933609958507, "grad_norm": 9.074694633483887, "learning_rate": 1.9946224066390044e-05, "loss": 1.8997, "step": 166 }, { "epoch": 0.13858921161825727, "grad_norm": 8.734217643737793, "learning_rate": 1.9945892116182576e-05, "loss": 1.8576, "step": 167 }, { "epoch": 0.13941908713692946, "grad_norm": 13.096667289733887, "learning_rate": 1.9945560165975108e-05, "loss": 2.0711, "step": 168 }, { "epoch": 0.14024896265560166, "grad_norm": 9.062167167663574, "learning_rate": 1.9945228215767637e-05, "loss": 2.2493, "step": 169 }, { "epoch": 0.14107883817427386, "grad_norm": 8.470439910888672, "learning_rate": 1.994489626556017e-05, "loss": 2.4233, "step": 170 }, { "epoch": 0.14190871369294605, "grad_norm": 27.831579208374023, "learning_rate": 1.9944564315352698e-05, "loss": 1.9196, "step": 171 }, { "epoch": 0.14273858921161825, "grad_norm": 12.173110961914062, "learning_rate": 1.994423236514523e-05, "loss": 1.749, "step": 172 }, { "epoch": 0.14356846473029045, "grad_norm": 11.141422271728516, "learning_rate": 1.9943900414937762e-05, "loss": 1.5731, "step": 173 }, { "epoch": 0.14439834024896264, "grad_norm": 10.837520599365234, "learning_rate": 1.994356846473029e-05, "loss": 2.5507, "step": 174 }, { "epoch": 0.14522821576763487, "grad_norm": 16.37676429748535, "learning_rate": 1.9943236514522823e-05, "loss": 2.6086, "step": 175 }, { "epoch": 0.14605809128630706, "grad_norm": 11.117220878601074, "learning_rate": 1.9942904564315355e-05, "loss": 2.1426, "step": 176 }, { "epoch": 0.14688796680497926, "grad_norm": 8.186538696289062, "learning_rate": 1.9942572614107883e-05, "loss": 1.3714, "step": 177 }, { "epoch": 0.14771784232365145, "grad_norm": 11.090169906616211, "learning_rate": 1.9942240663900416e-05, "loss": 2.751, "step": 178 }, { "epoch": 0.14854771784232365, "grad_norm": 7.188792705535889, "learning_rate": 1.9941908713692948e-05, "loss": 1.852, "step": 179 }, { "epoch": 0.14937759336099585, "grad_norm": 19.580026626586914, "learning_rate": 1.994157676348548e-05, "loss": 2.4711, "step": 180 }, { "epoch": 0.15020746887966804, "grad_norm": 10.350895881652832, "learning_rate": 1.994124481327801e-05, "loss": 2.317, "step": 181 }, { "epoch": 0.15103734439834024, "grad_norm": 7.561884880065918, "learning_rate": 1.994091286307054e-05, "loss": 2.3865, "step": 182 }, { "epoch": 0.15186721991701244, "grad_norm": 7.210818767547607, "learning_rate": 1.9940580912863073e-05, "loss": 1.6291, "step": 183 }, { "epoch": 0.15269709543568466, "grad_norm": 8.779282569885254, "learning_rate": 1.9940248962655605e-05, "loss": 1.5874, "step": 184 }, { "epoch": 0.15352697095435686, "grad_norm": 10.743186950683594, "learning_rate": 1.9939917012448134e-05, "loss": 2.142, "step": 185 }, { "epoch": 0.15435684647302905, "grad_norm": 10.731431007385254, "learning_rate": 1.9939585062240666e-05, "loss": 1.8054, "step": 186 }, { "epoch": 0.15518672199170125, "grad_norm": 5.9684367179870605, "learning_rate": 1.9939253112033198e-05, "loss": 1.9317, "step": 187 }, { "epoch": 0.15601659751037344, "grad_norm": 7.415689945220947, "learning_rate": 1.993892116182573e-05, "loss": 1.8014, "step": 188 }, { "epoch": 0.15684647302904564, "grad_norm": 6.5677618980407715, "learning_rate": 1.993858921161826e-05, "loss": 1.6924, "step": 189 }, { "epoch": 0.15767634854771784, "grad_norm": 10.093159675598145, "learning_rate": 1.993825726141079e-05, "loss": 1.9246, "step": 190 }, { "epoch": 0.15850622406639003, "grad_norm": 12.390091896057129, "learning_rate": 1.993792531120332e-05, "loss": 1.757, "step": 191 }, { "epoch": 0.15933609958506223, "grad_norm": 11.132200241088867, "learning_rate": 1.993759336099585e-05, "loss": 1.6522, "step": 192 }, { "epoch": 0.16016597510373445, "grad_norm": 5.919717788696289, "learning_rate": 1.9937261410788384e-05, "loss": 1.4904, "step": 193 }, { "epoch": 0.16099585062240665, "grad_norm": 7.588345527648926, "learning_rate": 1.9936929460580912e-05, "loss": 1.6933, "step": 194 }, { "epoch": 0.16182572614107885, "grad_norm": 7.904445171356201, "learning_rate": 1.9936597510373444e-05, "loss": 2.2117, "step": 195 }, { "epoch": 0.16265560165975104, "grad_norm": 6.951838970184326, "learning_rate": 1.9936265560165977e-05, "loss": 2.3671, "step": 196 }, { "epoch": 0.16348547717842324, "grad_norm": 7.371549129486084, "learning_rate": 1.993593360995851e-05, "loss": 1.7245, "step": 197 }, { "epoch": 0.16431535269709543, "grad_norm": 7.2242937088012695, "learning_rate": 1.9935601659751037e-05, "loss": 1.9407, "step": 198 }, { "epoch": 0.16514522821576763, "grad_norm": 14.373573303222656, "learning_rate": 1.993526970954357e-05, "loss": 2.784, "step": 199 }, { "epoch": 0.16597510373443983, "grad_norm": 6.820574760437012, "learning_rate": 1.99349377593361e-05, "loss": 2.0402, "step": 200 }, { "epoch": 0.16680497925311202, "grad_norm": 6.174655914306641, "learning_rate": 1.9934605809128634e-05, "loss": 1.4572, "step": 201 }, { "epoch": 0.16763485477178422, "grad_norm": 12.551054000854492, "learning_rate": 1.9934273858921162e-05, "loss": 1.9594, "step": 202 }, { "epoch": 0.16846473029045644, "grad_norm": 9.779921531677246, "learning_rate": 1.9933941908713695e-05, "loss": 1.9275, "step": 203 }, { "epoch": 0.16929460580912864, "grad_norm": 8.187336921691895, "learning_rate": 1.9933609958506227e-05, "loss": 1.4473, "step": 204 }, { "epoch": 0.17012448132780084, "grad_norm": 11.696194648742676, "learning_rate": 1.993327800829876e-05, "loss": 2.2274, "step": 205 }, { "epoch": 0.17095435684647303, "grad_norm": 6.584875583648682, "learning_rate": 1.9932946058091287e-05, "loss": 1.6289, "step": 206 }, { "epoch": 0.17178423236514523, "grad_norm": 9.729814529418945, "learning_rate": 1.993261410788382e-05, "loss": 2.0591, "step": 207 }, { "epoch": 0.17261410788381742, "grad_norm": 7.94426155090332, "learning_rate": 1.993228215767635e-05, "loss": 2.3223, "step": 208 }, { "epoch": 0.17344398340248962, "grad_norm": 10.794224739074707, "learning_rate": 1.993195020746888e-05, "loss": 1.7669, "step": 209 }, { "epoch": 0.17427385892116182, "grad_norm": 8.278517723083496, "learning_rate": 1.9931618257261413e-05, "loss": 1.9911, "step": 210 }, { "epoch": 0.175103734439834, "grad_norm": 14.654485702514648, "learning_rate": 1.9931286307053945e-05, "loss": 1.9971, "step": 211 }, { "epoch": 0.17593360995850624, "grad_norm": 9.253569602966309, "learning_rate": 1.9930954356846473e-05, "loss": 1.9909, "step": 212 }, { "epoch": 0.17676348547717843, "grad_norm": 4.507252216339111, "learning_rate": 1.9930622406639005e-05, "loss": 1.6876, "step": 213 }, { "epoch": 0.17759336099585063, "grad_norm": 6.573850631713867, "learning_rate": 1.9930290456431534e-05, "loss": 1.8868, "step": 214 }, { "epoch": 0.17842323651452283, "grad_norm": 5.76279354095459, "learning_rate": 1.9929958506224066e-05, "loss": 1.75, "step": 215 }, { "epoch": 0.17925311203319502, "grad_norm": 15.815079689025879, "learning_rate": 1.99296265560166e-05, "loss": 2.5493, "step": 216 }, { "epoch": 0.18008298755186722, "grad_norm": 8.92386245727539, "learning_rate": 1.992929460580913e-05, "loss": 1.505, "step": 217 }, { "epoch": 0.18091286307053941, "grad_norm": 7.897176265716553, "learning_rate": 1.9928962655601663e-05, "loss": 1.9572, "step": 218 }, { "epoch": 0.1817427385892116, "grad_norm": 12.261737823486328, "learning_rate": 1.992863070539419e-05, "loss": 3.105, "step": 219 }, { "epoch": 0.1825726141078838, "grad_norm": 10.295833587646484, "learning_rate": 1.9928298755186723e-05, "loss": 2.3292, "step": 220 }, { "epoch": 0.183402489626556, "grad_norm": 8.279837608337402, "learning_rate": 1.9927966804979256e-05, "loss": 1.4343, "step": 221 }, { "epoch": 0.18423236514522823, "grad_norm": 8.297914505004883, "learning_rate": 1.9927634854771788e-05, "loss": 2.1836, "step": 222 }, { "epoch": 0.18506224066390042, "grad_norm": 5.658071994781494, "learning_rate": 1.9927302904564316e-05, "loss": 1.4613, "step": 223 }, { "epoch": 0.18589211618257262, "grad_norm": 8.424749374389648, "learning_rate": 1.992697095435685e-05, "loss": 1.9757, "step": 224 }, { "epoch": 0.18672199170124482, "grad_norm": 10.60951042175293, "learning_rate": 1.992663900414938e-05, "loss": 1.953, "step": 225 }, { "epoch": 0.187551867219917, "grad_norm": 6.6981329917907715, "learning_rate": 1.9926307053941913e-05, "loss": 1.694, "step": 226 }, { "epoch": 0.1883817427385892, "grad_norm": 12.06093978881836, "learning_rate": 1.992597510373444e-05, "loss": 2.3285, "step": 227 }, { "epoch": 0.1892116182572614, "grad_norm": 6.941900253295898, "learning_rate": 1.9925643153526974e-05, "loss": 1.6685, "step": 228 }, { "epoch": 0.1900414937759336, "grad_norm": 11.915095329284668, "learning_rate": 1.9925311203319506e-05, "loss": 2.621, "step": 229 }, { "epoch": 0.1908713692946058, "grad_norm": 8.235644340515137, "learning_rate": 1.9924979253112034e-05, "loss": 1.6519, "step": 230 }, { "epoch": 0.19170124481327802, "grad_norm": 6.772810459136963, "learning_rate": 1.9924647302904566e-05, "loss": 1.7088, "step": 231 }, { "epoch": 0.19253112033195022, "grad_norm": 7.013772487640381, "learning_rate": 1.9924315352697095e-05, "loss": 2.1051, "step": 232 }, { "epoch": 0.1933609958506224, "grad_norm": 12.982172012329102, "learning_rate": 1.9923983402489627e-05, "loss": 2.0776, "step": 233 }, { "epoch": 0.1941908713692946, "grad_norm": 14.510982513427734, "learning_rate": 1.992365145228216e-05, "loss": 2.2612, "step": 234 }, { "epoch": 0.1950207468879668, "grad_norm": 12.777177810668945, "learning_rate": 1.9923319502074688e-05, "loss": 1.8111, "step": 235 }, { "epoch": 0.195850622406639, "grad_norm": 6.739729881286621, "learning_rate": 1.992298755186722e-05, "loss": 1.7153, "step": 236 }, { "epoch": 0.1966804979253112, "grad_norm": 9.410346031188965, "learning_rate": 1.9922655601659752e-05, "loss": 1.9232, "step": 237 }, { "epoch": 0.1975103734439834, "grad_norm": 8.65363883972168, "learning_rate": 1.9922323651452284e-05, "loss": 2.5375, "step": 238 }, { "epoch": 0.1983402489626556, "grad_norm": 9.966316223144531, "learning_rate": 1.9921991701244813e-05, "loss": 1.8283, "step": 239 }, { "epoch": 0.1991701244813278, "grad_norm": 30.863615036010742, "learning_rate": 1.9921659751037345e-05, "loss": 2.9038, "step": 240 }, { "epoch": 0.2, "grad_norm": 12.947586059570312, "learning_rate": 1.9921327800829877e-05, "loss": 2.2333, "step": 241 }, { "epoch": 0.2008298755186722, "grad_norm": 14.083191871643066, "learning_rate": 1.992099585062241e-05, "loss": 2.1849, "step": 242 }, { "epoch": 0.2016597510373444, "grad_norm": 6.202965259552002, "learning_rate": 1.992066390041494e-05, "loss": 1.5185, "step": 243 }, { "epoch": 0.2024896265560166, "grad_norm": 8.26837158203125, "learning_rate": 1.992033195020747e-05, "loss": 1.8744, "step": 244 }, { "epoch": 0.2033195020746888, "grad_norm": 7.758639812469482, "learning_rate": 1.9920000000000002e-05, "loss": 2.539, "step": 245 }, { "epoch": 0.204149377593361, "grad_norm": 11.463561058044434, "learning_rate": 1.9919668049792535e-05, "loss": 2.8718, "step": 246 }, { "epoch": 0.2049792531120332, "grad_norm": 7.234189987182617, "learning_rate": 1.9919336099585063e-05, "loss": 1.5743, "step": 247 }, { "epoch": 0.20580912863070538, "grad_norm": 19.747238159179688, "learning_rate": 1.9919004149377595e-05, "loss": 2.8093, "step": 248 }, { "epoch": 0.20663900414937758, "grad_norm": 6.884157657623291, "learning_rate": 1.9918672199170127e-05, "loss": 1.7725, "step": 249 }, { "epoch": 0.2074688796680498, "grad_norm": 15.65538501739502, "learning_rate": 1.9918340248962656e-05, "loss": 1.8563, "step": 250 }, { "epoch": 0.208298755186722, "grad_norm": 8.748774528503418, "learning_rate": 1.9918008298755188e-05, "loss": 2.2553, "step": 251 }, { "epoch": 0.2091286307053942, "grad_norm": 5.773577690124512, "learning_rate": 1.991767634854772e-05, "loss": 1.6218, "step": 252 }, { "epoch": 0.2099585062240664, "grad_norm": 7.637843132019043, "learning_rate": 1.991734439834025e-05, "loss": 1.8267, "step": 253 }, { "epoch": 0.2107883817427386, "grad_norm": 7.328134536743164, "learning_rate": 1.991701244813278e-05, "loss": 1.682, "step": 254 }, { "epoch": 0.21161825726141079, "grad_norm": 12.805720329284668, "learning_rate": 1.9916680497925313e-05, "loss": 2.0521, "step": 255 }, { "epoch": 0.21244813278008298, "grad_norm": 8.870809555053711, "learning_rate": 1.9916348547717842e-05, "loss": 2.0278, "step": 256 }, { "epoch": 0.21327800829875518, "grad_norm": 10.095145225524902, "learning_rate": 1.9916016597510374e-05, "loss": 1.5181, "step": 257 }, { "epoch": 0.21410788381742737, "grad_norm": 8.560073852539062, "learning_rate": 1.9915684647302906e-05, "loss": 1.5796, "step": 258 }, { "epoch": 0.2149377593360996, "grad_norm": 18.932571411132812, "learning_rate": 1.991535269709544e-05, "loss": 2.3943, "step": 259 }, { "epoch": 0.2157676348547718, "grad_norm": 11.39395809173584, "learning_rate": 1.9915020746887967e-05, "loss": 2.9611, "step": 260 }, { "epoch": 0.216597510373444, "grad_norm": 8.256449699401855, "learning_rate": 1.99146887966805e-05, "loss": 2.0118, "step": 261 }, { "epoch": 0.2174273858921162, "grad_norm": 14.281265258789062, "learning_rate": 1.991435684647303e-05, "loss": 2.0005, "step": 262 }, { "epoch": 0.21825726141078838, "grad_norm": 10.79612922668457, "learning_rate": 1.9914024896265563e-05, "loss": 2.2118, "step": 263 }, { "epoch": 0.21908713692946058, "grad_norm": 6.627613067626953, "learning_rate": 1.9913692946058092e-05, "loss": 2.1207, "step": 264 }, { "epoch": 0.21991701244813278, "grad_norm": 7.987668037414551, "learning_rate": 1.9913360995850624e-05, "loss": 2.06, "step": 265 }, { "epoch": 0.22074688796680497, "grad_norm": 8.939229965209961, "learning_rate": 1.9913029045643156e-05, "loss": 1.7487, "step": 266 }, { "epoch": 0.22157676348547717, "grad_norm": 9.623392105102539, "learning_rate": 1.991269709543569e-05, "loss": 2.6588, "step": 267 }, { "epoch": 0.22240663900414936, "grad_norm": 7.301198482513428, "learning_rate": 1.9912365145228217e-05, "loss": 2.1543, "step": 268 }, { "epoch": 0.2232365145228216, "grad_norm": 18.999454498291016, "learning_rate": 1.991203319502075e-05, "loss": 2.4698, "step": 269 }, { "epoch": 0.22406639004149378, "grad_norm": 5.381386756896973, "learning_rate": 1.9911701244813278e-05, "loss": 1.5197, "step": 270 }, { "epoch": 0.22489626556016598, "grad_norm": 7.463920593261719, "learning_rate": 1.991136929460581e-05, "loss": 1.6211, "step": 271 }, { "epoch": 0.22572614107883818, "grad_norm": 11.753081321716309, "learning_rate": 1.9911037344398342e-05, "loss": 2.9156, "step": 272 }, { "epoch": 0.22655601659751037, "grad_norm": 7.662219047546387, "learning_rate": 1.991070539419087e-05, "loss": 1.4606, "step": 273 }, { "epoch": 0.22738589211618257, "grad_norm": 7.085025787353516, "learning_rate": 1.9910373443983403e-05, "loss": 1.4456, "step": 274 }, { "epoch": 0.22821576763485477, "grad_norm": 12.476405143737793, "learning_rate": 1.9910041493775935e-05, "loss": 1.7275, "step": 275 }, { "epoch": 0.22904564315352696, "grad_norm": 10.338284492492676, "learning_rate": 1.9909709543568467e-05, "loss": 1.7193, "step": 276 }, { "epoch": 0.22987551867219916, "grad_norm": 9.45667552947998, "learning_rate": 1.9909377593360996e-05, "loss": 1.815, "step": 277 }, { "epoch": 0.23070539419087138, "grad_norm": 6.937129497528076, "learning_rate": 1.9909045643153528e-05, "loss": 1.926, "step": 278 }, { "epoch": 0.23153526970954358, "grad_norm": 9.840572357177734, "learning_rate": 1.990871369294606e-05, "loss": 1.9563, "step": 279 }, { "epoch": 0.23236514522821577, "grad_norm": 7.756499767303467, "learning_rate": 1.9908381742738592e-05, "loss": 2.2771, "step": 280 }, { "epoch": 0.23319502074688797, "grad_norm": 15.489341735839844, "learning_rate": 1.990804979253112e-05, "loss": 2.2222, "step": 281 }, { "epoch": 0.23402489626556017, "grad_norm": 11.886398315429688, "learning_rate": 1.9907717842323653e-05, "loss": 1.9077, "step": 282 }, { "epoch": 0.23485477178423236, "grad_norm": 7.524961471557617, "learning_rate": 1.9907385892116185e-05, "loss": 1.7157, "step": 283 }, { "epoch": 0.23568464730290456, "grad_norm": 11.045442581176758, "learning_rate": 1.9907053941908717e-05, "loss": 1.9228, "step": 284 }, { "epoch": 0.23651452282157676, "grad_norm": 13.449480056762695, "learning_rate": 1.9906721991701246e-05, "loss": 2.0483, "step": 285 }, { "epoch": 0.23734439834024895, "grad_norm": 9.582374572753906, "learning_rate": 1.9906390041493778e-05, "loss": 1.8704, "step": 286 }, { "epoch": 0.23817427385892115, "grad_norm": 15.229805946350098, "learning_rate": 1.990605809128631e-05, "loss": 1.9986, "step": 287 }, { "epoch": 0.23900414937759337, "grad_norm": 7.812011241912842, "learning_rate": 1.990572614107884e-05, "loss": 1.527, "step": 288 }, { "epoch": 0.23983402489626557, "grad_norm": 15.362859725952148, "learning_rate": 1.990539419087137e-05, "loss": 1.8905, "step": 289 }, { "epoch": 0.24066390041493776, "grad_norm": 7.929564476013184, "learning_rate": 1.9905062240663903e-05, "loss": 1.8315, "step": 290 }, { "epoch": 0.24149377593360996, "grad_norm": 11.632556915283203, "learning_rate": 1.9904730290456432e-05, "loss": 2.3839, "step": 291 }, { "epoch": 0.24232365145228216, "grad_norm": 12.163155555725098, "learning_rate": 1.9904398340248964e-05, "loss": 2.0244, "step": 292 }, { "epoch": 0.24315352697095435, "grad_norm": 7.031055450439453, "learning_rate": 1.9904066390041493e-05, "loss": 2.1228, "step": 293 }, { "epoch": 0.24398340248962655, "grad_norm": 8.277750015258789, "learning_rate": 1.9903734439834025e-05, "loss": 2.4679, "step": 294 }, { "epoch": 0.24481327800829875, "grad_norm": 12.513097763061523, "learning_rate": 1.9903402489626557e-05, "loss": 1.8792, "step": 295 }, { "epoch": 0.24564315352697094, "grad_norm": 6.579935073852539, "learning_rate": 1.990307053941909e-05, "loss": 1.6046, "step": 296 }, { "epoch": 0.24647302904564317, "grad_norm": 14.93478012084961, "learning_rate": 1.990273858921162e-05, "loss": 2.4009, "step": 297 }, { "epoch": 0.24730290456431536, "grad_norm": 9.737977981567383, "learning_rate": 1.990240663900415e-05, "loss": 1.7501, "step": 298 }, { "epoch": 0.24813278008298756, "grad_norm": 9.408220291137695, "learning_rate": 1.9902074688796682e-05, "loss": 1.8118, "step": 299 }, { "epoch": 0.24896265560165975, "grad_norm": 9.431487083435059, "learning_rate": 1.9901742738589214e-05, "loss": 1.9074, "step": 300 }, { "epoch": 0.24979253112033195, "grad_norm": 4.304020881652832, "learning_rate": 1.9901410788381746e-05, "loss": 1.1732, "step": 301 }, { "epoch": 0.25062240663900415, "grad_norm": 19.789804458618164, "learning_rate": 1.9901078838174275e-05, "loss": 1.6547, "step": 302 }, { "epoch": 0.25145228215767634, "grad_norm": 13.886343955993652, "learning_rate": 1.9900746887966807e-05, "loss": 1.8054, "step": 303 }, { "epoch": 0.25228215767634854, "grad_norm": 8.941858291625977, "learning_rate": 1.990041493775934e-05, "loss": 1.7543, "step": 304 }, { "epoch": 0.25311203319502074, "grad_norm": 10.007930755615234, "learning_rate": 1.990008298755187e-05, "loss": 2.6187, "step": 305 }, { "epoch": 0.25394190871369293, "grad_norm": 10.720308303833008, "learning_rate": 1.98997510373444e-05, "loss": 1.6178, "step": 306 }, { "epoch": 0.25477178423236513, "grad_norm": 9.800912857055664, "learning_rate": 1.9899419087136932e-05, "loss": 2.1088, "step": 307 }, { "epoch": 0.2556016597510373, "grad_norm": 13.919331550598145, "learning_rate": 1.989908713692946e-05, "loss": 2.5015, "step": 308 }, { "epoch": 0.2564315352697095, "grad_norm": 11.061727523803711, "learning_rate": 1.9898755186721993e-05, "loss": 1.5908, "step": 309 }, { "epoch": 0.2572614107883817, "grad_norm": 10.59677791595459, "learning_rate": 1.9898423236514525e-05, "loss": 2.2442, "step": 310 }, { "epoch": 0.25809128630705397, "grad_norm": 12.39517593383789, "learning_rate": 1.9898091286307054e-05, "loss": 1.9247, "step": 311 }, { "epoch": 0.25892116182572616, "grad_norm": 7.9535603523254395, "learning_rate": 1.9897759336099586e-05, "loss": 1.6174, "step": 312 }, { "epoch": 0.25975103734439836, "grad_norm": 11.271891593933105, "learning_rate": 1.9897427385892118e-05, "loss": 2.0319, "step": 313 }, { "epoch": 0.26058091286307056, "grad_norm": 9.67825698852539, "learning_rate": 1.9897095435684647e-05, "loss": 2.0728, "step": 314 }, { "epoch": 0.26141078838174275, "grad_norm": 8.162148475646973, "learning_rate": 1.989676348547718e-05, "loss": 1.9148, "step": 315 }, { "epoch": 0.26224066390041495, "grad_norm": 7.897415637969971, "learning_rate": 1.989643153526971e-05, "loss": 1.7712, "step": 316 }, { "epoch": 0.26307053941908715, "grad_norm": 14.785557746887207, "learning_rate": 1.9896099585062243e-05, "loss": 1.6678, "step": 317 }, { "epoch": 0.26390041493775934, "grad_norm": 10.395452499389648, "learning_rate": 1.9895767634854772e-05, "loss": 1.5046, "step": 318 }, { "epoch": 0.26473029045643154, "grad_norm": 8.668415069580078, "learning_rate": 1.9895435684647304e-05, "loss": 2.3541, "step": 319 }, { "epoch": 0.26556016597510373, "grad_norm": 11.882841110229492, "learning_rate": 1.9895103734439836e-05, "loss": 2.4594, "step": 320 }, { "epoch": 0.26639004149377593, "grad_norm": 6.219594955444336, "learning_rate": 1.9894771784232368e-05, "loss": 1.834, "step": 321 }, { "epoch": 0.2672199170124481, "grad_norm": 19.03469467163086, "learning_rate": 1.98944398340249e-05, "loss": 2.6029, "step": 322 }, { "epoch": 0.2680497925311203, "grad_norm": 6.237669944763184, "learning_rate": 1.989410788381743e-05, "loss": 1.5352, "step": 323 }, { "epoch": 0.2688796680497925, "grad_norm": 12.55418586730957, "learning_rate": 1.989377593360996e-05, "loss": 1.8524, "step": 324 }, { "epoch": 0.2697095435684647, "grad_norm": 7.847701072692871, "learning_rate": 1.9893443983402493e-05, "loss": 1.7957, "step": 325 }, { "epoch": 0.2705394190871369, "grad_norm": 7.027853012084961, "learning_rate": 1.9893112033195022e-05, "loss": 1.7478, "step": 326 }, { "epoch": 0.2713692946058091, "grad_norm": 6.449759483337402, "learning_rate": 1.9892780082987554e-05, "loss": 1.4932, "step": 327 }, { "epoch": 0.2721991701244813, "grad_norm": 10.768731117248535, "learning_rate": 1.9892448132780086e-05, "loss": 2.082, "step": 328 }, { "epoch": 0.2730290456431535, "grad_norm": 9.304479598999023, "learning_rate": 1.9892116182572615e-05, "loss": 1.6083, "step": 329 }, { "epoch": 0.27385892116182575, "grad_norm": 11.799768447875977, "learning_rate": 1.9891784232365147e-05, "loss": 2.998, "step": 330 }, { "epoch": 0.27468879668049795, "grad_norm": 7.850957870483398, "learning_rate": 1.9891452282157676e-05, "loss": 2.4549, "step": 331 }, { "epoch": 0.27551867219917014, "grad_norm": 5.635564804077148, "learning_rate": 1.9891120331950208e-05, "loss": 1.5166, "step": 332 }, { "epoch": 0.27634854771784234, "grad_norm": 13.148083686828613, "learning_rate": 1.989078838174274e-05, "loss": 1.9662, "step": 333 }, { "epoch": 0.27717842323651454, "grad_norm": 7.737997055053711, "learning_rate": 1.9890456431535272e-05, "loss": 1.8137, "step": 334 }, { "epoch": 0.27800829875518673, "grad_norm": 10.302597045898438, "learning_rate": 1.98901244813278e-05, "loss": 2.2275, "step": 335 }, { "epoch": 0.27883817427385893, "grad_norm": 8.77177619934082, "learning_rate": 1.9889792531120333e-05, "loss": 2.1849, "step": 336 }, { "epoch": 0.2796680497925311, "grad_norm": 9.664620399475098, "learning_rate": 1.9889460580912865e-05, "loss": 1.797, "step": 337 }, { "epoch": 0.2804979253112033, "grad_norm": 8.532379150390625, "learning_rate": 1.9889128630705397e-05, "loss": 1.3641, "step": 338 }, { "epoch": 0.2813278008298755, "grad_norm": 9.868210792541504, "learning_rate": 1.9888796680497926e-05, "loss": 2.289, "step": 339 }, { "epoch": 0.2821576763485477, "grad_norm": 7.570600509643555, "learning_rate": 1.9888464730290458e-05, "loss": 2.1966, "step": 340 }, { "epoch": 0.2829875518672199, "grad_norm": 9.624650955200195, "learning_rate": 1.988813278008299e-05, "loss": 2.2315, "step": 341 }, { "epoch": 0.2838174273858921, "grad_norm": 9.14719009399414, "learning_rate": 1.9887800829875522e-05, "loss": 2.6975, "step": 342 }, { "epoch": 0.2846473029045643, "grad_norm": 9.10033893585205, "learning_rate": 1.988746887966805e-05, "loss": 1.5517, "step": 343 }, { "epoch": 0.2854771784232365, "grad_norm": 9.065482139587402, "learning_rate": 1.9887136929460583e-05, "loss": 1.9026, "step": 344 }, { "epoch": 0.2863070539419087, "grad_norm": 8.26892375946045, "learning_rate": 1.9886804979253115e-05, "loss": 1.4956, "step": 345 }, { "epoch": 0.2871369294605809, "grad_norm": 8.85065746307373, "learning_rate": 1.9886473029045647e-05, "loss": 1.3969, "step": 346 }, { "epoch": 0.2879668049792531, "grad_norm": 11.672158241271973, "learning_rate": 1.9886141078838176e-05, "loss": 1.4355, "step": 347 }, { "epoch": 0.2887966804979253, "grad_norm": 8.785264015197754, "learning_rate": 1.9885809128630708e-05, "loss": 1.5609, "step": 348 }, { "epoch": 0.28962655601659754, "grad_norm": 9.668018341064453, "learning_rate": 1.9885477178423237e-05, "loss": 1.5094, "step": 349 }, { "epoch": 0.29045643153526973, "grad_norm": 8.325971603393555, "learning_rate": 1.988514522821577e-05, "loss": 1.6859, "step": 350 }, { "epoch": 0.29128630705394193, "grad_norm": 8.858888626098633, "learning_rate": 1.98848132780083e-05, "loss": 2.2606, "step": 351 }, { "epoch": 0.2921161825726141, "grad_norm": 9.793957710266113, "learning_rate": 1.988448132780083e-05, "loss": 1.3348, "step": 352 }, { "epoch": 0.2929460580912863, "grad_norm": 11.848450660705566, "learning_rate": 1.988414937759336e-05, "loss": 1.643, "step": 353 }, { "epoch": 0.2937759336099585, "grad_norm": 11.273825645446777, "learning_rate": 1.9883817427385894e-05, "loss": 1.5234, "step": 354 }, { "epoch": 0.2946058091286307, "grad_norm": 13.246862411499023, "learning_rate": 1.9883485477178426e-05, "loss": 2.7838, "step": 355 }, { "epoch": 0.2954356846473029, "grad_norm": 8.90209674835205, "learning_rate": 1.9883153526970955e-05, "loss": 1.4532, "step": 356 }, { "epoch": 0.2962655601659751, "grad_norm": 12.951064109802246, "learning_rate": 1.9882821576763487e-05, "loss": 2.0032, "step": 357 }, { "epoch": 0.2970954356846473, "grad_norm": 6.655320167541504, "learning_rate": 1.988248962655602e-05, "loss": 1.4062, "step": 358 }, { "epoch": 0.2979253112033195, "grad_norm": 6.934351921081543, "learning_rate": 1.988215767634855e-05, "loss": 1.5582, "step": 359 }, { "epoch": 0.2987551867219917, "grad_norm": 8.301117897033691, "learning_rate": 1.988182572614108e-05, "loss": 2.0881, "step": 360 }, { "epoch": 0.2995850622406639, "grad_norm": 6.222822666168213, "learning_rate": 1.988149377593361e-05, "loss": 1.8923, "step": 361 }, { "epoch": 0.3004149377593361, "grad_norm": 4.976552963256836, "learning_rate": 1.9881161825726144e-05, "loss": 1.118, "step": 362 }, { "epoch": 0.3012448132780083, "grad_norm": 23.62723731994629, "learning_rate": 1.9880829875518676e-05, "loss": 1.9861, "step": 363 }, { "epoch": 0.3020746887966805, "grad_norm": 10.285877227783203, "learning_rate": 1.9880497925311205e-05, "loss": 2.3175, "step": 364 }, { "epoch": 0.3029045643153527, "grad_norm": 10.19700813293457, "learning_rate": 1.9880165975103737e-05, "loss": 1.7621, "step": 365 }, { "epoch": 0.30373443983402487, "grad_norm": 14.880671501159668, "learning_rate": 1.987983402489627e-05, "loss": 3.4321, "step": 366 }, { "epoch": 0.30456431535269707, "grad_norm": 13.508030891418457, "learning_rate": 1.9879502074688798e-05, "loss": 1.8024, "step": 367 }, { "epoch": 0.3053941908713693, "grad_norm": 11.81153678894043, "learning_rate": 1.987917012448133e-05, "loss": 2.2324, "step": 368 }, { "epoch": 0.3062240663900415, "grad_norm": 9.104647636413574, "learning_rate": 1.9878838174273862e-05, "loss": 1.1552, "step": 369 }, { "epoch": 0.3070539419087137, "grad_norm": 9.31092357635498, "learning_rate": 1.987850622406639e-05, "loss": 1.5797, "step": 370 }, { "epoch": 0.3078838174273859, "grad_norm": 11.721932411193848, "learning_rate": 1.9878174273858923e-05, "loss": 2.3224, "step": 371 }, { "epoch": 0.3087136929460581, "grad_norm": 10.981051445007324, "learning_rate": 1.987784232365145e-05, "loss": 1.4681, "step": 372 }, { "epoch": 0.3095435684647303, "grad_norm": 10.20358943939209, "learning_rate": 1.9877510373443983e-05, "loss": 1.6949, "step": 373 }, { "epoch": 0.3103734439834025, "grad_norm": 11.699739456176758, "learning_rate": 1.9877178423236516e-05, "loss": 1.8191, "step": 374 }, { "epoch": 0.3112033195020747, "grad_norm": 13.850192070007324, "learning_rate": 1.9876846473029048e-05, "loss": 1.5977, "step": 375 }, { "epoch": 0.3120331950207469, "grad_norm": 9.4487886428833, "learning_rate": 1.987651452282158e-05, "loss": 1.6011, "step": 376 }, { "epoch": 0.3128630705394191, "grad_norm": 9.263087272644043, "learning_rate": 1.987618257261411e-05, "loss": 1.9744, "step": 377 }, { "epoch": 0.3136929460580913, "grad_norm": 12.952744483947754, "learning_rate": 1.987585062240664e-05, "loss": 2.0091, "step": 378 }, { "epoch": 0.3145228215767635, "grad_norm": 7.532689094543457, "learning_rate": 1.9875518672199173e-05, "loss": 1.292, "step": 379 }, { "epoch": 0.3153526970954357, "grad_norm": 9.215330123901367, "learning_rate": 1.9875186721991705e-05, "loss": 1.3843, "step": 380 }, { "epoch": 0.31618257261410787, "grad_norm": 8.78801155090332, "learning_rate": 1.9874854771784234e-05, "loss": 2.4164, "step": 381 }, { "epoch": 0.31701244813278007, "grad_norm": 5.946457862854004, "learning_rate": 1.9874522821576766e-05, "loss": 1.9496, "step": 382 }, { "epoch": 0.31784232365145226, "grad_norm": 5.752866744995117, "learning_rate": 1.9874190871369298e-05, "loss": 1.5854, "step": 383 }, { "epoch": 0.31867219917012446, "grad_norm": 7.314510822296143, "learning_rate": 1.987385892116183e-05, "loss": 1.6094, "step": 384 }, { "epoch": 0.31950207468879666, "grad_norm": 11.701753616333008, "learning_rate": 1.987352697095436e-05, "loss": 1.7005, "step": 385 }, { "epoch": 0.3203319502074689, "grad_norm": 6.985563278198242, "learning_rate": 1.987319502074689e-05, "loss": 1.2801, "step": 386 }, { "epoch": 0.3211618257261411, "grad_norm": 10.41495132446289, "learning_rate": 1.987286307053942e-05, "loss": 1.5859, "step": 387 }, { "epoch": 0.3219917012448133, "grad_norm": 9.037649154663086, "learning_rate": 1.987253112033195e-05, "loss": 1.4375, "step": 388 }, { "epoch": 0.3228215767634855, "grad_norm": 9.659147262573242, "learning_rate": 1.9872199170124484e-05, "loss": 1.1119, "step": 389 }, { "epoch": 0.3236514522821577, "grad_norm": 8.83980941772461, "learning_rate": 1.9871867219917012e-05, "loss": 1.2831, "step": 390 }, { "epoch": 0.3244813278008299, "grad_norm": 10.130660057067871, "learning_rate": 1.9871535269709544e-05, "loss": 1.4915, "step": 391 }, { "epoch": 0.3253112033195021, "grad_norm": 12.017533302307129, "learning_rate": 1.9871203319502077e-05, "loss": 2.0838, "step": 392 }, { "epoch": 0.3261410788381743, "grad_norm": 9.829458236694336, "learning_rate": 1.9870871369294605e-05, "loss": 1.8872, "step": 393 }, { "epoch": 0.3269709543568465, "grad_norm": 8.105406761169434, "learning_rate": 1.9870539419087137e-05, "loss": 1.5974, "step": 394 }, { "epoch": 0.3278008298755187, "grad_norm": 11.911584854125977, "learning_rate": 1.987020746887967e-05, "loss": 2.3399, "step": 395 }, { "epoch": 0.32863070539419087, "grad_norm": 7.976802825927734, "learning_rate": 1.98698755186722e-05, "loss": 1.5084, "step": 396 }, { "epoch": 0.32946058091286307, "grad_norm": 10.139780044555664, "learning_rate": 1.986954356846473e-05, "loss": 2.369, "step": 397 }, { "epoch": 0.33029045643153526, "grad_norm": 8.757875442504883, "learning_rate": 1.9869211618257262e-05, "loss": 1.7336, "step": 398 }, { "epoch": 0.33112033195020746, "grad_norm": 14.091323852539062, "learning_rate": 1.9868879668049795e-05, "loss": 1.7271, "step": 399 }, { "epoch": 0.33195020746887965, "grad_norm": 9.322914123535156, "learning_rate": 1.9868547717842327e-05, "loss": 2.0774, "step": 400 }, { "epoch": 0.33278008298755185, "grad_norm": 14.308728218078613, "learning_rate": 1.986821576763486e-05, "loss": 1.8423, "step": 401 }, { "epoch": 0.33360995850622405, "grad_norm": 8.773816108703613, "learning_rate": 1.9867883817427387e-05, "loss": 1.6326, "step": 402 }, { "epoch": 0.33443983402489624, "grad_norm": 13.497881889343262, "learning_rate": 1.986755186721992e-05, "loss": 1.9582, "step": 403 }, { "epoch": 0.33526970954356844, "grad_norm": 7.429559230804443, "learning_rate": 1.986721991701245e-05, "loss": 1.1151, "step": 404 }, { "epoch": 0.3360995850622407, "grad_norm": 10.602397918701172, "learning_rate": 1.986688796680498e-05, "loss": 1.5191, "step": 405 }, { "epoch": 0.3369294605809129, "grad_norm": 8.518929481506348, "learning_rate": 1.9866556016597512e-05, "loss": 1.5199, "step": 406 }, { "epoch": 0.3377593360995851, "grad_norm": 7.496772289276123, "learning_rate": 1.9866224066390045e-05, "loss": 1.7036, "step": 407 }, { "epoch": 0.3385892116182573, "grad_norm": 15.788126945495605, "learning_rate": 1.9865892116182573e-05, "loss": 2.1724, "step": 408 }, { "epoch": 0.3394190871369295, "grad_norm": 9.288516998291016, "learning_rate": 1.9865560165975105e-05, "loss": 1.9728, "step": 409 }, { "epoch": 0.34024896265560167, "grad_norm": 10.466230392456055, "learning_rate": 1.9865228215767634e-05, "loss": 1.3204, "step": 410 }, { "epoch": 0.34107883817427387, "grad_norm": 11.187043190002441, "learning_rate": 1.9864896265560166e-05, "loss": 1.8819, "step": 411 }, { "epoch": 0.34190871369294606, "grad_norm": 10.48108959197998, "learning_rate": 1.98645643153527e-05, "loss": 1.747, "step": 412 }, { "epoch": 0.34273858921161826, "grad_norm": 15.635665893554688, "learning_rate": 1.986423236514523e-05, "loss": 1.9063, "step": 413 }, { "epoch": 0.34356846473029046, "grad_norm": 14.35388469696045, "learning_rate": 1.986390041493776e-05, "loss": 2.7768, "step": 414 }, { "epoch": 0.34439834024896265, "grad_norm": 9.362570762634277, "learning_rate": 1.986356846473029e-05, "loss": 1.7583, "step": 415 }, { "epoch": 0.34522821576763485, "grad_norm": 6.6007184982299805, "learning_rate": 1.9863236514522823e-05, "loss": 1.898, "step": 416 }, { "epoch": 0.34605809128630705, "grad_norm": 11.47203540802002, "learning_rate": 1.9862904564315356e-05, "loss": 1.0925, "step": 417 }, { "epoch": 0.34688796680497924, "grad_norm": 7.127942085266113, "learning_rate": 1.9862572614107884e-05, "loss": 1.0937, "step": 418 }, { "epoch": 0.34771784232365144, "grad_norm": 13.636455535888672, "learning_rate": 1.9862240663900416e-05, "loss": 1.4263, "step": 419 }, { "epoch": 0.34854771784232363, "grad_norm": 5.823472499847412, "learning_rate": 1.986190871369295e-05, "loss": 1.3873, "step": 420 }, { "epoch": 0.34937759336099583, "grad_norm": 13.85187816619873, "learning_rate": 1.986157676348548e-05, "loss": 3.126, "step": 421 }, { "epoch": 0.350207468879668, "grad_norm": 8.519275665283203, "learning_rate": 1.986124481327801e-05, "loss": 1.832, "step": 422 }, { "epoch": 0.3510373443983402, "grad_norm": 7.010831356048584, "learning_rate": 1.986091286307054e-05, "loss": 2.3446, "step": 423 }, { "epoch": 0.3518672199170125, "grad_norm": 10.78986930847168, "learning_rate": 1.9860580912863073e-05, "loss": 1.3716, "step": 424 }, { "epoch": 0.35269709543568467, "grad_norm": 10.416297912597656, "learning_rate": 1.9860248962655602e-05, "loss": 1.5195, "step": 425 }, { "epoch": 0.35352697095435687, "grad_norm": 8.662460327148438, "learning_rate": 1.9859917012448134e-05, "loss": 1.3587, "step": 426 }, { "epoch": 0.35435684647302906, "grad_norm": 5.57528829574585, "learning_rate": 1.9859585062240666e-05, "loss": 1.5715, "step": 427 }, { "epoch": 0.35518672199170126, "grad_norm": 6.51400089263916, "learning_rate": 1.9859253112033195e-05, "loss": 1.2057, "step": 428 }, { "epoch": 0.35601659751037346, "grad_norm": 6.829798698425293, "learning_rate": 1.9858921161825727e-05, "loss": 1.2347, "step": 429 }, { "epoch": 0.35684647302904565, "grad_norm": 9.718297958374023, "learning_rate": 1.985858921161826e-05, "loss": 2.0125, "step": 430 }, { "epoch": 0.35767634854771785, "grad_norm": 8.271976470947266, "learning_rate": 1.9858257261410788e-05, "loss": 1.2399, "step": 431 }, { "epoch": 0.35850622406639004, "grad_norm": 7.087287425994873, "learning_rate": 1.985792531120332e-05, "loss": 1.449, "step": 432 }, { "epoch": 0.35933609958506224, "grad_norm": 7.888195514678955, "learning_rate": 1.9857593360995852e-05, "loss": 1.4116, "step": 433 }, { "epoch": 0.36016597510373444, "grad_norm": 7.869657039642334, "learning_rate": 1.9857261410788384e-05, "loss": 1.5559, "step": 434 }, { "epoch": 0.36099585062240663, "grad_norm": 11.626910209655762, "learning_rate": 1.9856929460580913e-05, "loss": 1.2107, "step": 435 }, { "epoch": 0.36182572614107883, "grad_norm": 14.000014305114746, "learning_rate": 1.9856597510373445e-05, "loss": 1.8515, "step": 436 }, { "epoch": 0.362655601659751, "grad_norm": 8.694389343261719, "learning_rate": 1.9856265560165977e-05, "loss": 1.5605, "step": 437 }, { "epoch": 0.3634854771784232, "grad_norm": 8.997848510742188, "learning_rate": 1.985593360995851e-05, "loss": 1.5626, "step": 438 }, { "epoch": 0.3643153526970954, "grad_norm": 12.579106330871582, "learning_rate": 1.9855601659751038e-05, "loss": 1.6712, "step": 439 }, { "epoch": 0.3651452282157676, "grad_norm": 8.657224655151367, "learning_rate": 1.985526970954357e-05, "loss": 1.864, "step": 440 }, { "epoch": 0.3659751037344398, "grad_norm": 8.69199275970459, "learning_rate": 1.9854937759336102e-05, "loss": 2.1729, "step": 441 }, { "epoch": 0.366804979253112, "grad_norm": 11.726838111877441, "learning_rate": 1.9854605809128634e-05, "loss": 1.4736, "step": 442 }, { "epoch": 0.36763485477178426, "grad_norm": 9.194008827209473, "learning_rate": 1.9854273858921163e-05, "loss": 1.5415, "step": 443 }, { "epoch": 0.36846473029045645, "grad_norm": 15.414788246154785, "learning_rate": 1.9853941908713695e-05, "loss": 2.23, "step": 444 }, { "epoch": 0.36929460580912865, "grad_norm": 12.181160926818848, "learning_rate": 1.9853609958506227e-05, "loss": 2.5438, "step": 445 }, { "epoch": 0.37012448132780085, "grad_norm": 9.222726821899414, "learning_rate": 1.9853278008298756e-05, "loss": 1.7819, "step": 446 }, { "epoch": 0.37095435684647304, "grad_norm": 8.515706062316895, "learning_rate": 1.9852946058091288e-05, "loss": 1.5994, "step": 447 }, { "epoch": 0.37178423236514524, "grad_norm": 8.711225509643555, "learning_rate": 1.9852614107883817e-05, "loss": 1.4037, "step": 448 }, { "epoch": 0.37261410788381744, "grad_norm": 7.424642562866211, "learning_rate": 1.985228215767635e-05, "loss": 1.7359, "step": 449 }, { "epoch": 0.37344398340248963, "grad_norm": 9.517801284790039, "learning_rate": 1.985195020746888e-05, "loss": 0.9666, "step": 450 }, { "epoch": 0.37427385892116183, "grad_norm": 7.619672775268555, "learning_rate": 1.985161825726141e-05, "loss": 1.5141, "step": 451 }, { "epoch": 0.375103734439834, "grad_norm": 10.298349380493164, "learning_rate": 1.9851286307053942e-05, "loss": 1.8717, "step": 452 }, { "epoch": 0.3759336099585062, "grad_norm": 7.518166542053223, "learning_rate": 1.9850954356846474e-05, "loss": 1.3744, "step": 453 }, { "epoch": 0.3767634854771784, "grad_norm": 7.124811172485352, "learning_rate": 1.9850622406639006e-05, "loss": 1.6306, "step": 454 }, { "epoch": 0.3775933609958506, "grad_norm": 18.42142677307129, "learning_rate": 1.985029045643154e-05, "loss": 2.3333, "step": 455 }, { "epoch": 0.3784232365145228, "grad_norm": 15.974663734436035, "learning_rate": 1.9849958506224067e-05, "loss": 1.4223, "step": 456 }, { "epoch": 0.379253112033195, "grad_norm": 8.829963684082031, "learning_rate": 1.98496265560166e-05, "loss": 1.3882, "step": 457 }, { "epoch": 0.3800829875518672, "grad_norm": 7.651063919067383, "learning_rate": 1.984929460580913e-05, "loss": 1.2142, "step": 458 }, { "epoch": 0.3809128630705394, "grad_norm": 7.4223761558532715, "learning_rate": 1.9848962655601663e-05, "loss": 1.3838, "step": 459 }, { "epoch": 0.3817427385892116, "grad_norm": 11.761604309082031, "learning_rate": 1.9848630705394192e-05, "loss": 1.8351, "step": 460 }, { "epoch": 0.3825726141078838, "grad_norm": 11.444391250610352, "learning_rate": 1.9848298755186724e-05, "loss": 1.6216, "step": 461 }, { "epoch": 0.38340248962655604, "grad_norm": 12.002763748168945, "learning_rate": 1.9847966804979256e-05, "loss": 1.8111, "step": 462 }, { "epoch": 0.38423236514522824, "grad_norm": 9.347603797912598, "learning_rate": 1.984763485477179e-05, "loss": 1.6554, "step": 463 }, { "epoch": 0.38506224066390043, "grad_norm": 7.481888771057129, "learning_rate": 1.9847302904564317e-05, "loss": 1.388, "step": 464 }, { "epoch": 0.38589211618257263, "grad_norm": 5.7775797843933105, "learning_rate": 1.984697095435685e-05, "loss": 1.7325, "step": 465 }, { "epoch": 0.3867219917012448, "grad_norm": 6.031735897064209, "learning_rate": 1.9846639004149378e-05, "loss": 1.647, "step": 466 }, { "epoch": 0.387551867219917, "grad_norm": 18.01087188720703, "learning_rate": 1.984630705394191e-05, "loss": 2.4181, "step": 467 }, { "epoch": 0.3883817427385892, "grad_norm": 11.654090881347656, "learning_rate": 1.9845975103734442e-05, "loss": 1.121, "step": 468 }, { "epoch": 0.3892116182572614, "grad_norm": 12.326499938964844, "learning_rate": 1.984564315352697e-05, "loss": 1.1398, "step": 469 }, { "epoch": 0.3900414937759336, "grad_norm": 10.442092895507812, "learning_rate": 1.9845311203319503e-05, "loss": 1.5398, "step": 470 }, { "epoch": 0.3908713692946058, "grad_norm": 14.667844772338867, "learning_rate": 1.9844979253112035e-05, "loss": 1.8843, "step": 471 }, { "epoch": 0.391701244813278, "grad_norm": 12.97749137878418, "learning_rate": 1.9844647302904564e-05, "loss": 1.6156, "step": 472 }, { "epoch": 0.3925311203319502, "grad_norm": 12.189905166625977, "learning_rate": 1.9844315352697096e-05, "loss": 1.2705, "step": 473 }, { "epoch": 0.3933609958506224, "grad_norm": 8.335585594177246, "learning_rate": 1.9843983402489628e-05, "loss": 1.46, "step": 474 }, { "epoch": 0.3941908713692946, "grad_norm": 6.984908103942871, "learning_rate": 1.984365145228216e-05, "loss": 1.1473, "step": 475 }, { "epoch": 0.3950207468879668, "grad_norm": 6.805650234222412, "learning_rate": 1.984331950207469e-05, "loss": 1.6271, "step": 476 }, { "epoch": 0.395850622406639, "grad_norm": 7.31339168548584, "learning_rate": 1.984298755186722e-05, "loss": 1.1627, "step": 477 }, { "epoch": 0.3966804979253112, "grad_norm": 8.82826042175293, "learning_rate": 1.9842655601659753e-05, "loss": 1.5635, "step": 478 }, { "epoch": 0.3975103734439834, "grad_norm": 10.888141632080078, "learning_rate": 1.9842323651452285e-05, "loss": 1.6194, "step": 479 }, { "epoch": 0.3983402489626556, "grad_norm": 8.298318862915039, "learning_rate": 1.9841991701244817e-05, "loss": 1.9764, "step": 480 }, { "epoch": 0.3991701244813278, "grad_norm": 8.283834457397461, "learning_rate": 1.9841659751037346e-05, "loss": 0.9373, "step": 481 }, { "epoch": 0.4, "grad_norm": 10.027059555053711, "learning_rate": 1.9841327800829878e-05, "loss": 1.6717, "step": 482 }, { "epoch": 0.4008298755186722, "grad_norm": 16.446043014526367, "learning_rate": 1.984099585062241e-05, "loss": 1.8901, "step": 483 }, { "epoch": 0.4016597510373444, "grad_norm": 8.61890983581543, "learning_rate": 1.984066390041494e-05, "loss": 1.2704, "step": 484 }, { "epoch": 0.4024896265560166, "grad_norm": 9.592090606689453, "learning_rate": 1.984033195020747e-05, "loss": 0.9469, "step": 485 }, { "epoch": 0.4033195020746888, "grad_norm": 11.512595176696777, "learning_rate": 1.9840000000000003e-05, "loss": 1.7556, "step": 486 }, { "epoch": 0.404149377593361, "grad_norm": 9.236711502075195, "learning_rate": 1.9839668049792532e-05, "loss": 1.0933, "step": 487 }, { "epoch": 0.4049792531120332, "grad_norm": 11.157052040100098, "learning_rate": 1.9839336099585064e-05, "loss": 1.4786, "step": 488 }, { "epoch": 0.4058091286307054, "grad_norm": 11.272665023803711, "learning_rate": 1.9839004149377593e-05, "loss": 2.0213, "step": 489 }, { "epoch": 0.4066390041493776, "grad_norm": 8.155815124511719, "learning_rate": 1.9838672199170125e-05, "loss": 1.6609, "step": 490 }, { "epoch": 0.4074688796680498, "grad_norm": 10.842107772827148, "learning_rate": 1.9838340248962657e-05, "loss": 1.9158, "step": 491 }, { "epoch": 0.408298755186722, "grad_norm": 5.637999534606934, "learning_rate": 1.983800829875519e-05, "loss": 1.6013, "step": 492 }, { "epoch": 0.4091286307053942, "grad_norm": 9.51002025604248, "learning_rate": 1.9837676348547718e-05, "loss": 1.1956, "step": 493 }, { "epoch": 0.4099585062240664, "grad_norm": 5.451273441314697, "learning_rate": 1.983734439834025e-05, "loss": 1.3843, "step": 494 }, { "epoch": 0.4107883817427386, "grad_norm": 15.484855651855469, "learning_rate": 1.9837012448132782e-05, "loss": 1.3481, "step": 495 }, { "epoch": 0.41161825726141077, "grad_norm": 8.532337188720703, "learning_rate": 1.9836680497925314e-05, "loss": 1.6409, "step": 496 }, { "epoch": 0.41244813278008297, "grad_norm": 9.93855094909668, "learning_rate": 1.9836348547717843e-05, "loss": 1.3733, "step": 497 }, { "epoch": 0.41327800829875516, "grad_norm": 10.40090274810791, "learning_rate": 1.9836016597510375e-05, "loss": 1.9395, "step": 498 }, { "epoch": 0.4141078838174274, "grad_norm": 17.130619049072266, "learning_rate": 1.9835684647302907e-05, "loss": 2.5648, "step": 499 }, { "epoch": 0.4149377593360996, "grad_norm": 13.049044609069824, "learning_rate": 1.983535269709544e-05, "loss": 1.6021, "step": 500 }, { "epoch": 0.4157676348547718, "grad_norm": 6.714792728424072, "learning_rate": 1.9835020746887968e-05, "loss": 1.3244, "step": 501 }, { "epoch": 0.416597510373444, "grad_norm": 11.318907737731934, "learning_rate": 1.98346887966805e-05, "loss": 1.3849, "step": 502 }, { "epoch": 0.4174273858921162, "grad_norm": 8.358821868896484, "learning_rate": 1.9834356846473032e-05, "loss": 1.9837, "step": 503 }, { "epoch": 0.4182572614107884, "grad_norm": 6.660184860229492, "learning_rate": 1.983402489626556e-05, "loss": 1.5324, "step": 504 }, { "epoch": 0.4190871369294606, "grad_norm": 14.783061027526855, "learning_rate": 1.9833692946058093e-05, "loss": 1.9316, "step": 505 }, { "epoch": 0.4199170124481328, "grad_norm": 8.343722343444824, "learning_rate": 1.9833360995850625e-05, "loss": 1.7081, "step": 506 }, { "epoch": 0.420746887966805, "grad_norm": 9.337301254272461, "learning_rate": 1.9833029045643154e-05, "loss": 1.2461, "step": 507 }, { "epoch": 0.4215767634854772, "grad_norm": 6.987564563751221, "learning_rate": 1.9832697095435686e-05, "loss": 1.6244, "step": 508 }, { "epoch": 0.4224066390041494, "grad_norm": 10.634900093078613, "learning_rate": 1.9832365145228218e-05, "loss": 1.2862, "step": 509 }, { "epoch": 0.42323651452282157, "grad_norm": 15.436484336853027, "learning_rate": 1.9832033195020747e-05, "loss": 2.1505, "step": 510 }, { "epoch": 0.42406639004149377, "grad_norm": 9.877289772033691, "learning_rate": 1.983170124481328e-05, "loss": 2.4, "step": 511 }, { "epoch": 0.42489626556016596, "grad_norm": 27.067655563354492, "learning_rate": 1.983136929460581e-05, "loss": 1.5892, "step": 512 }, { "epoch": 0.42572614107883816, "grad_norm": 15.959968566894531, "learning_rate": 1.9831037344398343e-05, "loss": 1.1536, "step": 513 }, { "epoch": 0.42655601659751036, "grad_norm": 13.886764526367188, "learning_rate": 1.983070539419087e-05, "loss": 1.2825, "step": 514 }, { "epoch": 0.42738589211618255, "grad_norm": 7.436283111572266, "learning_rate": 1.9830373443983404e-05, "loss": 1.3762, "step": 515 }, { "epoch": 0.42821576763485475, "grad_norm": 15.930097579956055, "learning_rate": 1.9830041493775936e-05, "loss": 1.2718, "step": 516 }, { "epoch": 0.42904564315352695, "grad_norm": 11.211267471313477, "learning_rate": 1.9829709543568468e-05, "loss": 1.04, "step": 517 }, { "epoch": 0.4298755186721992, "grad_norm": 9.242352485656738, "learning_rate": 1.9829377593360997e-05, "loss": 1.7659, "step": 518 }, { "epoch": 0.4307053941908714, "grad_norm": 7.535037994384766, "learning_rate": 1.982904564315353e-05, "loss": 1.8147, "step": 519 }, { "epoch": 0.4315352697095436, "grad_norm": 7.217496871948242, "learning_rate": 1.982871369294606e-05, "loss": 1.1135, "step": 520 }, { "epoch": 0.4323651452282158, "grad_norm": 10.320106506347656, "learning_rate": 1.9828381742738593e-05, "loss": 2.2624, "step": 521 }, { "epoch": 0.433195020746888, "grad_norm": 12.82776927947998, "learning_rate": 1.9828049792531122e-05, "loss": 2.2067, "step": 522 }, { "epoch": 0.4340248962655602, "grad_norm": 8.319528579711914, "learning_rate": 1.9827717842323654e-05, "loss": 1.8336, "step": 523 }, { "epoch": 0.4348547717842324, "grad_norm": 10.033407211303711, "learning_rate": 1.9827385892116186e-05, "loss": 1.621, "step": 524 }, { "epoch": 0.43568464730290457, "grad_norm": 11.663265228271484, "learning_rate": 1.9827053941908715e-05, "loss": 1.7611, "step": 525 }, { "epoch": 0.43651452282157677, "grad_norm": 9.773290634155273, "learning_rate": 1.9826721991701247e-05, "loss": 1.3264, "step": 526 }, { "epoch": 0.43734439834024896, "grad_norm": 10.150465965270996, "learning_rate": 1.9826390041493776e-05, "loss": 2.0697, "step": 527 }, { "epoch": 0.43817427385892116, "grad_norm": 8.194113731384277, "learning_rate": 1.9826058091286308e-05, "loss": 2.4929, "step": 528 }, { "epoch": 0.43900414937759336, "grad_norm": 15.360188484191895, "learning_rate": 1.982572614107884e-05, "loss": 3.092, "step": 529 }, { "epoch": 0.43983402489626555, "grad_norm": 12.139814376831055, "learning_rate": 1.982539419087137e-05, "loss": 2.278, "step": 530 }, { "epoch": 0.44066390041493775, "grad_norm": 11.115652084350586, "learning_rate": 1.98250622406639e-05, "loss": 1.6199, "step": 531 }, { "epoch": 0.44149377593360994, "grad_norm": 14.266292572021484, "learning_rate": 1.9824730290456433e-05, "loss": 2.3861, "step": 532 }, { "epoch": 0.44232365145228214, "grad_norm": 14.102721214294434, "learning_rate": 1.9824398340248965e-05, "loss": 1.4821, "step": 533 }, { "epoch": 0.44315352697095434, "grad_norm": 6.742665767669678, "learning_rate": 1.9824066390041497e-05, "loss": 1.9068, "step": 534 }, { "epoch": 0.44398340248962653, "grad_norm": 6.743689060211182, "learning_rate": 1.9823734439834026e-05, "loss": 1.3432, "step": 535 }, { "epoch": 0.44481327800829873, "grad_norm": 13.790847778320312, "learning_rate": 1.9823402489626558e-05, "loss": 1.4972, "step": 536 }, { "epoch": 0.445643153526971, "grad_norm": 5.9009318351745605, "learning_rate": 1.982307053941909e-05, "loss": 1.8701, "step": 537 }, { "epoch": 0.4464730290456432, "grad_norm": 8.911346435546875, "learning_rate": 1.9822738589211622e-05, "loss": 1.8821, "step": 538 }, { "epoch": 0.4473029045643154, "grad_norm": 7.462369441986084, "learning_rate": 1.982240663900415e-05, "loss": 1.2825, "step": 539 }, { "epoch": 0.44813278008298757, "grad_norm": 6.892119884490967, "learning_rate": 1.9822074688796683e-05, "loss": 1.4002, "step": 540 }, { "epoch": 0.44896265560165977, "grad_norm": 7.8822808265686035, "learning_rate": 1.9821742738589215e-05, "loss": 1.4451, "step": 541 }, { "epoch": 0.44979253112033196, "grad_norm": 9.182154655456543, "learning_rate": 1.9821410788381744e-05, "loss": 0.9527, "step": 542 }, { "epoch": 0.45062240663900416, "grad_norm": 11.479106903076172, "learning_rate": 1.9821078838174276e-05, "loss": 1.4288, "step": 543 }, { "epoch": 0.45145228215767635, "grad_norm": 9.261968612670898, "learning_rate": 1.9820746887966808e-05, "loss": 1.0874, "step": 544 }, { "epoch": 0.45228215767634855, "grad_norm": 12.43693733215332, "learning_rate": 1.9820414937759337e-05, "loss": 1.7662, "step": 545 }, { "epoch": 0.45311203319502075, "grad_norm": 10.45494270324707, "learning_rate": 1.982008298755187e-05, "loss": 1.5135, "step": 546 }, { "epoch": 0.45394190871369294, "grad_norm": 13.939451217651367, "learning_rate": 1.98197510373444e-05, "loss": 2.4318, "step": 547 }, { "epoch": 0.45477178423236514, "grad_norm": 7.804406642913818, "learning_rate": 1.981941908713693e-05, "loss": 1.2125, "step": 548 }, { "epoch": 0.45560165975103734, "grad_norm": 6.310918807983398, "learning_rate": 1.981908713692946e-05, "loss": 1.7098, "step": 549 }, { "epoch": 0.45643153526970953, "grad_norm": 11.533365249633789, "learning_rate": 1.9818755186721994e-05, "loss": 1.4911, "step": 550 }, { "epoch": 0.45726141078838173, "grad_norm": 9.136207580566406, "learning_rate": 1.9818423236514522e-05, "loss": 1.1627, "step": 551 }, { "epoch": 0.4580912863070539, "grad_norm": 12.000271797180176, "learning_rate": 1.9818091286307055e-05, "loss": 2.3218, "step": 552 }, { "epoch": 0.4589211618257261, "grad_norm": 7.532035827636719, "learning_rate": 1.9817759336099587e-05, "loss": 1.5098, "step": 553 }, { "epoch": 0.4597510373443983, "grad_norm": 6.193575382232666, "learning_rate": 1.981742738589212e-05, "loss": 1.2722, "step": 554 }, { "epoch": 0.4605809128630705, "grad_norm": 9.713109970092773, "learning_rate": 1.9817095435684647e-05, "loss": 2.2317, "step": 555 }, { "epoch": 0.46141078838174276, "grad_norm": 9.847427368164062, "learning_rate": 1.981676348547718e-05, "loss": 1.16, "step": 556 }, { "epoch": 0.46224066390041496, "grad_norm": 9.35519790649414, "learning_rate": 1.981643153526971e-05, "loss": 1.8479, "step": 557 }, { "epoch": 0.46307053941908716, "grad_norm": 10.971839904785156, "learning_rate": 1.9816099585062244e-05, "loss": 1.3879, "step": 558 }, { "epoch": 0.46390041493775935, "grad_norm": 7.915721416473389, "learning_rate": 1.9815767634854776e-05, "loss": 1.6293, "step": 559 }, { "epoch": 0.46473029045643155, "grad_norm": 6.637838840484619, "learning_rate": 1.9815435684647305e-05, "loss": 1.5209, "step": 560 }, { "epoch": 0.46556016597510375, "grad_norm": 9.599556922912598, "learning_rate": 1.9815103734439837e-05, "loss": 2.2372, "step": 561 }, { "epoch": 0.46639004149377594, "grad_norm": 5.626766204833984, "learning_rate": 1.981477178423237e-05, "loss": 1.3554, "step": 562 }, { "epoch": 0.46721991701244814, "grad_norm": 10.247428894042969, "learning_rate": 1.9814439834024898e-05, "loss": 1.8024, "step": 563 }, { "epoch": 0.46804979253112033, "grad_norm": 9.597233772277832, "learning_rate": 1.981410788381743e-05, "loss": 1.6339, "step": 564 }, { "epoch": 0.46887966804979253, "grad_norm": 5.829065322875977, "learning_rate": 1.981377593360996e-05, "loss": 1.3126, "step": 565 }, { "epoch": 0.4697095435684647, "grad_norm": 7.966991424560547, "learning_rate": 1.981344398340249e-05, "loss": 1.8366, "step": 566 }, { "epoch": 0.4705394190871369, "grad_norm": 5.516387462615967, "learning_rate": 1.9813112033195023e-05, "loss": 1.4647, "step": 567 }, { "epoch": 0.4713692946058091, "grad_norm": 7.562460899353027, "learning_rate": 1.981278008298755e-05, "loss": 1.8819, "step": 568 }, { "epoch": 0.4721991701244813, "grad_norm": 12.320283889770508, "learning_rate": 1.9812448132780083e-05, "loss": 1.8054, "step": 569 }, { "epoch": 0.4730290456431535, "grad_norm": 7.216981410980225, "learning_rate": 1.9812116182572616e-05, "loss": 1.2267, "step": 570 }, { "epoch": 0.4738589211618257, "grad_norm": 10.108872413635254, "learning_rate": 1.9811784232365148e-05, "loss": 1.7495, "step": 571 }, { "epoch": 0.4746887966804979, "grad_norm": 8.729304313659668, "learning_rate": 1.9811452282157676e-05, "loss": 1.592, "step": 572 }, { "epoch": 0.4755186721991701, "grad_norm": 7.772199630737305, "learning_rate": 1.981112033195021e-05, "loss": 1.7167, "step": 573 }, { "epoch": 0.4763485477178423, "grad_norm": 13.381706237792969, "learning_rate": 1.981078838174274e-05, "loss": 1.4398, "step": 574 }, { "epoch": 0.47717842323651455, "grad_norm": 6.8157477378845215, "learning_rate": 1.9810456431535273e-05, "loss": 1.4784, "step": 575 }, { "epoch": 0.47800829875518674, "grad_norm": 9.59642219543457, "learning_rate": 1.98101244813278e-05, "loss": 2.752, "step": 576 }, { "epoch": 0.47883817427385894, "grad_norm": 6.245513916015625, "learning_rate": 1.9809792531120333e-05, "loss": 0.8729, "step": 577 }, { "epoch": 0.47966804979253114, "grad_norm": 11.22604751586914, "learning_rate": 1.9809460580912866e-05, "loss": 2.2247, "step": 578 }, { "epoch": 0.48049792531120333, "grad_norm": 6.590996265411377, "learning_rate": 1.9809128630705398e-05, "loss": 1.1516, "step": 579 }, { "epoch": 0.48132780082987553, "grad_norm": 7.145528316497803, "learning_rate": 1.9808796680497926e-05, "loss": 1.3432, "step": 580 }, { "epoch": 0.4821576763485477, "grad_norm": 7.657177448272705, "learning_rate": 1.980846473029046e-05, "loss": 1.1789, "step": 581 }, { "epoch": 0.4829875518672199, "grad_norm": 6.350827217102051, "learning_rate": 1.980813278008299e-05, "loss": 1.3375, "step": 582 }, { "epoch": 0.4838174273858921, "grad_norm": 15.868746757507324, "learning_rate": 1.980780082987552e-05, "loss": 1.5094, "step": 583 }, { "epoch": 0.4846473029045643, "grad_norm": 8.272220611572266, "learning_rate": 1.980746887966805e-05, "loss": 2.0577, "step": 584 }, { "epoch": 0.4854771784232365, "grad_norm": 8.6841459274292, "learning_rate": 1.9807136929460584e-05, "loss": 0.9897, "step": 585 }, { "epoch": 0.4863070539419087, "grad_norm": 5.820038318634033, "learning_rate": 1.9806804979253112e-05, "loss": 1.5019, "step": 586 }, { "epoch": 0.4871369294605809, "grad_norm": 10.807679176330566, "learning_rate": 1.9806473029045644e-05, "loss": 1.527, "step": 587 }, { "epoch": 0.4879668049792531, "grad_norm": 4.609764575958252, "learning_rate": 1.9806141078838177e-05, "loss": 1.0532, "step": 588 }, { "epoch": 0.4887966804979253, "grad_norm": 5.6909565925598145, "learning_rate": 1.9805809128630705e-05, "loss": 1.2515, "step": 589 }, { "epoch": 0.4896265560165975, "grad_norm": 8.31584358215332, "learning_rate": 1.9805477178423237e-05, "loss": 1.6803, "step": 590 }, { "epoch": 0.4904564315352697, "grad_norm": 7.411323547363281, "learning_rate": 1.980514522821577e-05, "loss": 1.3238, "step": 591 }, { "epoch": 0.4912863070539419, "grad_norm": 15.076225280761719, "learning_rate": 1.98048132780083e-05, "loss": 2.6518, "step": 592 }, { "epoch": 0.4921161825726141, "grad_norm": 7.221822261810303, "learning_rate": 1.980448132780083e-05, "loss": 1.3367, "step": 593 }, { "epoch": 0.49294605809128633, "grad_norm": 7.717432975769043, "learning_rate": 1.9804149377593362e-05, "loss": 1.4139, "step": 594 }, { "epoch": 0.49377593360995853, "grad_norm": 11.30334186553955, "learning_rate": 1.9803817427385894e-05, "loss": 0.8873, "step": 595 }, { "epoch": 0.4946058091286307, "grad_norm": 7.644035339355469, "learning_rate": 1.9803485477178427e-05, "loss": 1.6086, "step": 596 }, { "epoch": 0.4954356846473029, "grad_norm": 8.140392303466797, "learning_rate": 1.9803153526970955e-05, "loss": 1.7205, "step": 597 }, { "epoch": 0.4962655601659751, "grad_norm": 7.953341007232666, "learning_rate": 1.9802821576763487e-05, "loss": 1.3428, "step": 598 }, { "epoch": 0.4970954356846473, "grad_norm": 7.670416355133057, "learning_rate": 1.980248962655602e-05, "loss": 1.2115, "step": 599 }, { "epoch": 0.4979253112033195, "grad_norm": 7.5955281257629395, "learning_rate": 1.980215767634855e-05, "loss": 0.8842, "step": 600 }, { "epoch": 0.4987551867219917, "grad_norm": 12.399198532104492, "learning_rate": 1.980182572614108e-05, "loss": 2.8031, "step": 601 }, { "epoch": 0.4995850622406639, "grad_norm": 11.712977409362793, "learning_rate": 1.9801493775933612e-05, "loss": 1.9894, "step": 602 }, { "epoch": 0.5004149377593361, "grad_norm": 9.172057151794434, "learning_rate": 1.9801161825726145e-05, "loss": 1.4946, "step": 603 }, { "epoch": 0.5012448132780083, "grad_norm": 5.582091331481934, "learning_rate": 1.9800829875518673e-05, "loss": 1.4167, "step": 604 }, { "epoch": 0.5020746887966805, "grad_norm": 8.31021499633789, "learning_rate": 1.9800497925311205e-05, "loss": 1.1963, "step": 605 }, { "epoch": 0.5029045643153527, "grad_norm": 7.310237884521484, "learning_rate": 1.9800165975103734e-05, "loss": 1.6271, "step": 606 }, { "epoch": 0.5037344398340249, "grad_norm": 6.463324546813965, "learning_rate": 1.9799834024896266e-05, "loss": 1.2437, "step": 607 }, { "epoch": 0.5045643153526971, "grad_norm": 9.826141357421875, "learning_rate": 1.97995020746888e-05, "loss": 1.653, "step": 608 }, { "epoch": 0.5053941908713693, "grad_norm": 8.224804878234863, "learning_rate": 1.9799170124481327e-05, "loss": 2.0502, "step": 609 }, { "epoch": 0.5062240663900415, "grad_norm": 9.904128074645996, "learning_rate": 1.979883817427386e-05, "loss": 2.1603, "step": 610 }, { "epoch": 0.5070539419087137, "grad_norm": 8.134193420410156, "learning_rate": 1.979850622406639e-05, "loss": 1.6157, "step": 611 }, { "epoch": 0.5078838174273859, "grad_norm": 10.464661598205566, "learning_rate": 1.9798174273858923e-05, "loss": 2.0031, "step": 612 }, { "epoch": 0.5087136929460581, "grad_norm": 11.536565780639648, "learning_rate": 1.9797842323651455e-05, "loss": 1.619, "step": 613 }, { "epoch": 0.5095435684647303, "grad_norm": 4.869104385375977, "learning_rate": 1.9797510373443984e-05, "loss": 1.3958, "step": 614 }, { "epoch": 0.5103734439834025, "grad_norm": 11.26481819152832, "learning_rate": 1.9797178423236516e-05, "loss": 2.198, "step": 615 }, { "epoch": 0.5112033195020746, "grad_norm": 7.988707542419434, "learning_rate": 1.979684647302905e-05, "loss": 1.1204, "step": 616 }, { "epoch": 0.5120331950207468, "grad_norm": 14.218883514404297, "learning_rate": 1.979651452282158e-05, "loss": 2.9953, "step": 617 }, { "epoch": 0.512863070539419, "grad_norm": 8.647932052612305, "learning_rate": 1.979618257261411e-05, "loss": 1.5382, "step": 618 }, { "epoch": 0.5136929460580912, "grad_norm": 10.152853012084961, "learning_rate": 1.979585062240664e-05, "loss": 1.2932, "step": 619 }, { "epoch": 0.5145228215767634, "grad_norm": 7.523678302764893, "learning_rate": 1.9795518672199173e-05, "loss": 1.2607, "step": 620 }, { "epoch": 0.5153526970954356, "grad_norm": 10.791775703430176, "learning_rate": 1.9795186721991702e-05, "loss": 1.224, "step": 621 }, { "epoch": 0.5161825726141079, "grad_norm": 7.395244121551514, "learning_rate": 1.9794854771784234e-05, "loss": 1.7285, "step": 622 }, { "epoch": 0.5170124481327801, "grad_norm": 7.226534843444824, "learning_rate": 1.9794522821576766e-05, "loss": 1.6532, "step": 623 }, { "epoch": 0.5178423236514523, "grad_norm": 8.731203079223633, "learning_rate": 1.9794190871369295e-05, "loss": 1.692, "step": 624 }, { "epoch": 0.5186721991701245, "grad_norm": 10.136211395263672, "learning_rate": 1.9793858921161827e-05, "loss": 1.1511, "step": 625 }, { "epoch": 0.5195020746887967, "grad_norm": 8.170292854309082, "learning_rate": 1.9793526970954356e-05, "loss": 1.5222, "step": 626 }, { "epoch": 0.5203319502074689, "grad_norm": 9.250843048095703, "learning_rate": 1.9793195020746888e-05, "loss": 2.0432, "step": 627 }, { "epoch": 0.5211618257261411, "grad_norm": 17.282520294189453, "learning_rate": 1.979286307053942e-05, "loss": 2.2762, "step": 628 }, { "epoch": 0.5219917012448133, "grad_norm": 8.5121431350708, "learning_rate": 1.9792531120331952e-05, "loss": 1.2765, "step": 629 }, { "epoch": 0.5228215767634855, "grad_norm": 7.84888219833374, "learning_rate": 1.979219917012448e-05, "loss": 1.6439, "step": 630 }, { "epoch": 0.5236514522821577, "grad_norm": 5.65797233581543, "learning_rate": 1.9791867219917013e-05, "loss": 1.2277, "step": 631 }, { "epoch": 0.5244813278008299, "grad_norm": 9.176640510559082, "learning_rate": 1.9791535269709545e-05, "loss": 1.895, "step": 632 }, { "epoch": 0.5253112033195021, "grad_norm": 8.384190559387207, "learning_rate": 1.9791203319502077e-05, "loss": 1.3972, "step": 633 }, { "epoch": 0.5261410788381743, "grad_norm": 5.923446178436279, "learning_rate": 1.9790871369294606e-05, "loss": 1.1367, "step": 634 }, { "epoch": 0.5269709543568465, "grad_norm": 4.668921947479248, "learning_rate": 1.9790539419087138e-05, "loss": 1.1052, "step": 635 }, { "epoch": 0.5278008298755187, "grad_norm": 10.75835132598877, "learning_rate": 1.979020746887967e-05, "loss": 1.838, "step": 636 }, { "epoch": 0.5286307053941909, "grad_norm": 6.2999982833862305, "learning_rate": 1.9789875518672202e-05, "loss": 1.4536, "step": 637 }, { "epoch": 0.5294605809128631, "grad_norm": 12.873556137084961, "learning_rate": 1.9789543568464734e-05, "loss": 2.4146, "step": 638 }, { "epoch": 0.5302904564315353, "grad_norm": 6.915092468261719, "learning_rate": 1.9789211618257263e-05, "loss": 1.6121, "step": 639 }, { "epoch": 0.5311203319502075, "grad_norm": 16.096851348876953, "learning_rate": 1.9788879668049795e-05, "loss": 2.2482, "step": 640 }, { "epoch": 0.5319502074688797, "grad_norm": 13.273808479309082, "learning_rate": 1.9788547717842327e-05, "loss": 2.212, "step": 641 }, { "epoch": 0.5327800829875519, "grad_norm": 11.95026969909668, "learning_rate": 1.9788215767634856e-05, "loss": 1.8151, "step": 642 }, { "epoch": 0.5336099585062241, "grad_norm": 11.391117095947266, "learning_rate": 1.9787883817427388e-05, "loss": 1.7185, "step": 643 }, { "epoch": 0.5344398340248963, "grad_norm": 9.096097946166992, "learning_rate": 1.9787551867219917e-05, "loss": 1.8694, "step": 644 }, { "epoch": 0.5352697095435685, "grad_norm": 30.01875114440918, "learning_rate": 1.978721991701245e-05, "loss": 1.4723, "step": 645 }, { "epoch": 0.5360995850622406, "grad_norm": 13.166624069213867, "learning_rate": 1.978688796680498e-05, "loss": 1.8882, "step": 646 }, { "epoch": 0.5369294605809128, "grad_norm": 8.874157905578613, "learning_rate": 1.978655601659751e-05, "loss": 1.0965, "step": 647 }, { "epoch": 0.537759336099585, "grad_norm": 5.912436008453369, "learning_rate": 1.9786224066390042e-05, "loss": 1.3933, "step": 648 }, { "epoch": 0.5385892116182572, "grad_norm": 7.729119300842285, "learning_rate": 1.9785892116182574e-05, "loss": 1.954, "step": 649 }, { "epoch": 0.5394190871369294, "grad_norm": 10.648614883422852, "learning_rate": 1.9785560165975106e-05, "loss": 1.0942, "step": 650 }, { "epoch": 0.5402489626556016, "grad_norm": 8.761894226074219, "learning_rate": 1.9785228215767635e-05, "loss": 1.6174, "step": 651 }, { "epoch": 0.5410788381742738, "grad_norm": 7.414854526519775, "learning_rate": 1.9784896265560167e-05, "loss": 1.2752, "step": 652 }, { "epoch": 0.541908713692946, "grad_norm": 8.263986587524414, "learning_rate": 1.97845643153527e-05, "loss": 1.6829, "step": 653 }, { "epoch": 0.5427385892116182, "grad_norm": 8.466611862182617, "learning_rate": 1.978423236514523e-05, "loss": 1.8801, "step": 654 }, { "epoch": 0.5435684647302904, "grad_norm": 6.418078899383545, "learning_rate": 1.978390041493776e-05, "loss": 0.868, "step": 655 }, { "epoch": 0.5443983402489626, "grad_norm": 15.36385440826416, "learning_rate": 1.9783568464730292e-05, "loss": 1.9468, "step": 656 }, { "epoch": 0.5452282157676348, "grad_norm": 8.366915702819824, "learning_rate": 1.9783236514522824e-05, "loss": 0.9136, "step": 657 }, { "epoch": 0.546058091286307, "grad_norm": 11.184041976928711, "learning_rate": 1.9782904564315356e-05, "loss": 2.0567, "step": 658 }, { "epoch": 0.5468879668049793, "grad_norm": 8.681366920471191, "learning_rate": 1.9782572614107885e-05, "loss": 1.1615, "step": 659 }, { "epoch": 0.5477178423236515, "grad_norm": 11.926992416381836, "learning_rate": 1.9782240663900417e-05, "loss": 2.4058, "step": 660 }, { "epoch": 0.5485477178423237, "grad_norm": 15.221156120300293, "learning_rate": 1.978190871369295e-05, "loss": 2.0681, "step": 661 }, { "epoch": 0.5493775933609959, "grad_norm": 7.921992778778076, "learning_rate": 1.9781576763485478e-05, "loss": 1.665, "step": 662 }, { "epoch": 0.5502074688796681, "grad_norm": 10.545568466186523, "learning_rate": 1.978124481327801e-05, "loss": 1.5994, "step": 663 }, { "epoch": 0.5510373443983403, "grad_norm": 8.078706741333008, "learning_rate": 1.9780912863070542e-05, "loss": 1.496, "step": 664 }, { "epoch": 0.5518672199170125, "grad_norm": 13.505032539367676, "learning_rate": 1.978058091286307e-05, "loss": 2.1216, "step": 665 }, { "epoch": 0.5526970954356847, "grad_norm": 11.040108680725098, "learning_rate": 1.9780248962655603e-05, "loss": 2.0932, "step": 666 }, { "epoch": 0.5535269709543569, "grad_norm": 13.413844108581543, "learning_rate": 1.9779917012448135e-05, "loss": 1.2936, "step": 667 }, { "epoch": 0.5543568464730291, "grad_norm": 8.612730026245117, "learning_rate": 1.9779585062240664e-05, "loss": 1.2773, "step": 668 }, { "epoch": 0.5551867219917013, "grad_norm": 8.716302871704102, "learning_rate": 1.9779253112033196e-05, "loss": 1.3367, "step": 669 }, { "epoch": 0.5560165975103735, "grad_norm": 8.150843620300293, "learning_rate": 1.9778921161825728e-05, "loss": 1.0204, "step": 670 }, { "epoch": 0.5568464730290457, "grad_norm": 12.888699531555176, "learning_rate": 1.977858921161826e-05, "loss": 2.0488, "step": 671 }, { "epoch": 0.5576763485477179, "grad_norm": 9.963764190673828, "learning_rate": 1.977825726141079e-05, "loss": 1.2267, "step": 672 }, { "epoch": 0.5585062240663901, "grad_norm": 11.074448585510254, "learning_rate": 1.977792531120332e-05, "loss": 1.7493, "step": 673 }, { "epoch": 0.5593360995850623, "grad_norm": 8.585468292236328, "learning_rate": 1.9777593360995853e-05, "loss": 1.575, "step": 674 }, { "epoch": 0.5601659751037344, "grad_norm": 11.077042579650879, "learning_rate": 1.9777261410788385e-05, "loss": 2.0512, "step": 675 }, { "epoch": 0.5609958506224066, "grad_norm": 9.678529739379883, "learning_rate": 1.9776929460580914e-05, "loss": 1.0406, "step": 676 }, { "epoch": 0.5618257261410788, "grad_norm": 12.53193187713623, "learning_rate": 1.9776597510373446e-05, "loss": 1.7023, "step": 677 }, { "epoch": 0.562655601659751, "grad_norm": 13.62054443359375, "learning_rate": 1.9776265560165978e-05, "loss": 2.0756, "step": 678 }, { "epoch": 0.5634854771784232, "grad_norm": 13.577149391174316, "learning_rate": 1.977593360995851e-05, "loss": 1.5871, "step": 679 }, { "epoch": 0.5643153526970954, "grad_norm": 9.034966468811035, "learning_rate": 1.977560165975104e-05, "loss": 1.5816, "step": 680 }, { "epoch": 0.5651452282157676, "grad_norm": 7.533294200897217, "learning_rate": 1.977526970954357e-05, "loss": 1.4724, "step": 681 }, { "epoch": 0.5659751037344398, "grad_norm": 10.94795036315918, "learning_rate": 1.97749377593361e-05, "loss": 1.2689, "step": 682 }, { "epoch": 0.566804979253112, "grad_norm": 10.689513206481934, "learning_rate": 1.9774605809128632e-05, "loss": 1.8322, "step": 683 }, { "epoch": 0.5676348547717842, "grad_norm": 12.682149887084961, "learning_rate": 1.9774273858921164e-05, "loss": 2.5135, "step": 684 }, { "epoch": 0.5684647302904564, "grad_norm": 8.54594612121582, "learning_rate": 1.9773941908713693e-05, "loss": 1.6224, "step": 685 }, { "epoch": 0.5692946058091286, "grad_norm": 10.73337173461914, "learning_rate": 1.9773609958506225e-05, "loss": 1.5169, "step": 686 }, { "epoch": 0.5701244813278008, "grad_norm": 10.415870666503906, "learning_rate": 1.9773278008298757e-05, "loss": 1.9321, "step": 687 }, { "epoch": 0.570954356846473, "grad_norm": 9.337287902832031, "learning_rate": 1.9772946058091286e-05, "loss": 1.4509, "step": 688 }, { "epoch": 0.5717842323651452, "grad_norm": 6.574709415435791, "learning_rate": 1.9772614107883818e-05, "loss": 0.8005, "step": 689 }, { "epoch": 0.5726141078838174, "grad_norm": 8.107803344726562, "learning_rate": 1.977228215767635e-05, "loss": 1.2528, "step": 690 }, { "epoch": 0.5734439834024896, "grad_norm": 10.66901683807373, "learning_rate": 1.9771950207468882e-05, "loss": 1.3474, "step": 691 }, { "epoch": 0.5742738589211618, "grad_norm": 10.224052429199219, "learning_rate": 1.9771618257261414e-05, "loss": 1.9654, "step": 692 }, { "epoch": 0.575103734439834, "grad_norm": 7.848286151885986, "learning_rate": 1.9771286307053943e-05, "loss": 1.4646, "step": 693 }, { "epoch": 0.5759336099585062, "grad_norm": 9.191915512084961, "learning_rate": 1.9770954356846475e-05, "loss": 1.6047, "step": 694 }, { "epoch": 0.5767634854771784, "grad_norm": 9.359247207641602, "learning_rate": 1.9770622406639007e-05, "loss": 1.0537, "step": 695 }, { "epoch": 0.5775933609958506, "grad_norm": 14.742951393127441, "learning_rate": 1.977029045643154e-05, "loss": 2.2356, "step": 696 }, { "epoch": 0.5784232365145229, "grad_norm": 7.093631267547607, "learning_rate": 1.9769958506224068e-05, "loss": 0.9209, "step": 697 }, { "epoch": 0.5792531120331951, "grad_norm": 12.236814498901367, "learning_rate": 1.97696265560166e-05, "loss": 2.1056, "step": 698 }, { "epoch": 0.5800829875518673, "grad_norm": 7.313275337219238, "learning_rate": 1.9769294605809132e-05, "loss": 1.5495, "step": 699 }, { "epoch": 0.5809128630705395, "grad_norm": 11.183744430541992, "learning_rate": 1.976896265560166e-05, "loss": 2.3929, "step": 700 }, { "epoch": 0.5817427385892117, "grad_norm": 10.440017700195312, "learning_rate": 1.9768630705394193e-05, "loss": 1.3819, "step": 701 }, { "epoch": 0.5825726141078839, "grad_norm": 13.094622611999512, "learning_rate": 1.9768298755186725e-05, "loss": 2.1213, "step": 702 }, { "epoch": 0.583402489626556, "grad_norm": 8.756860733032227, "learning_rate": 1.9767966804979254e-05, "loss": 2.5395, "step": 703 }, { "epoch": 0.5842323651452282, "grad_norm": 11.573752403259277, "learning_rate": 1.9767634854771786e-05, "loss": 1.8652, "step": 704 }, { "epoch": 0.5850622406639004, "grad_norm": 8.790217399597168, "learning_rate": 1.9767302904564315e-05, "loss": 1.5457, "step": 705 }, { "epoch": 0.5858921161825726, "grad_norm": 11.009099960327148, "learning_rate": 1.9766970954356847e-05, "loss": 2.0724, "step": 706 }, { "epoch": 0.5867219917012448, "grad_norm": 16.338903427124023, "learning_rate": 1.976663900414938e-05, "loss": 1.6365, "step": 707 }, { "epoch": 0.587551867219917, "grad_norm": 8.269020080566406, "learning_rate": 1.976630705394191e-05, "loss": 1.1684, "step": 708 }, { "epoch": 0.5883817427385892, "grad_norm": 10.088255882263184, "learning_rate": 1.976597510373444e-05, "loss": 1.8064, "step": 709 }, { "epoch": 0.5892116182572614, "grad_norm": 11.757061958312988, "learning_rate": 1.976564315352697e-05, "loss": 2.14, "step": 710 }, { "epoch": 0.5900414937759336, "grad_norm": 7.7814249992370605, "learning_rate": 1.9765311203319504e-05, "loss": 1.4225, "step": 711 }, { "epoch": 0.5908713692946058, "grad_norm": 7.738471984863281, "learning_rate": 1.9764979253112036e-05, "loss": 2.3642, "step": 712 }, { "epoch": 0.591701244813278, "grad_norm": 5.904284477233887, "learning_rate": 1.9764647302904565e-05, "loss": 1.3455, "step": 713 }, { "epoch": 0.5925311203319502, "grad_norm": 10.143125534057617, "learning_rate": 1.9764315352697097e-05, "loss": 0.9673, "step": 714 }, { "epoch": 0.5933609958506224, "grad_norm": 7.445408344268799, "learning_rate": 1.976398340248963e-05, "loss": 1.8593, "step": 715 }, { "epoch": 0.5941908713692946, "grad_norm": 11.045828819274902, "learning_rate": 1.976365145228216e-05, "loss": 2.2966, "step": 716 }, { "epoch": 0.5950207468879668, "grad_norm": 9.600874900817871, "learning_rate": 1.9763319502074693e-05, "loss": 1.6832, "step": 717 }, { "epoch": 0.595850622406639, "grad_norm": 13.292166709899902, "learning_rate": 1.9762987551867222e-05, "loss": 2.1886, "step": 718 }, { "epoch": 0.5966804979253112, "grad_norm": 9.911042213439941, "learning_rate": 1.9762655601659754e-05, "loss": 1.446, "step": 719 }, { "epoch": 0.5975103734439834, "grad_norm": 8.747103691101074, "learning_rate": 1.9762323651452286e-05, "loss": 1.4078, "step": 720 }, { "epoch": 0.5983402489626556, "grad_norm": 14.633676528930664, "learning_rate": 1.9761991701244815e-05, "loss": 1.2771, "step": 721 }, { "epoch": 0.5991701244813278, "grad_norm": 6.393072128295898, "learning_rate": 1.9761659751037347e-05, "loss": 1.4169, "step": 722 }, { "epoch": 0.6, "grad_norm": 9.802299499511719, "learning_rate": 1.9761327800829876e-05, "loss": 1.6175, "step": 723 }, { "epoch": 0.6008298755186722, "grad_norm": 13.831559181213379, "learning_rate": 1.9760995850622408e-05, "loss": 1.3907, "step": 724 }, { "epoch": 0.6016597510373444, "grad_norm": 12.137506484985352, "learning_rate": 1.976066390041494e-05, "loss": 1.1344, "step": 725 }, { "epoch": 0.6024896265560166, "grad_norm": 8.58517837524414, "learning_rate": 1.976033195020747e-05, "loss": 0.6904, "step": 726 }, { "epoch": 0.6033195020746888, "grad_norm": 6.833664894104004, "learning_rate": 1.976e-05, "loss": 0.8722, "step": 727 }, { "epoch": 0.604149377593361, "grad_norm": 9.584247589111328, "learning_rate": 1.9759668049792533e-05, "loss": 1.9674, "step": 728 }, { "epoch": 0.6049792531120332, "grad_norm": 6.578160285949707, "learning_rate": 1.9759336099585065e-05, "loss": 1.4092, "step": 729 }, { "epoch": 0.6058091286307054, "grad_norm": 12.280091285705566, "learning_rate": 1.9759004149377593e-05, "loss": 1.2164, "step": 730 }, { "epoch": 0.6066390041493775, "grad_norm": 6.001800537109375, "learning_rate": 1.9758672199170126e-05, "loss": 1.6386, "step": 731 }, { "epoch": 0.6074688796680497, "grad_norm": 16.502803802490234, "learning_rate": 1.9758340248962658e-05, "loss": 2.4885, "step": 732 }, { "epoch": 0.6082987551867219, "grad_norm": 6.5544939041137695, "learning_rate": 1.975800829875519e-05, "loss": 1.537, "step": 733 }, { "epoch": 0.6091286307053941, "grad_norm": 7.828271389007568, "learning_rate": 1.975767634854772e-05, "loss": 1.4403, "step": 734 }, { "epoch": 0.6099585062240664, "grad_norm": 7.6147074699401855, "learning_rate": 1.975734439834025e-05, "loss": 2.0335, "step": 735 }, { "epoch": 0.6107883817427386, "grad_norm": 7.613832950592041, "learning_rate": 1.9757012448132783e-05, "loss": 1.623, "step": 736 }, { "epoch": 0.6116182572614108, "grad_norm": 13.787713050842285, "learning_rate": 1.9756680497925315e-05, "loss": 1.9449, "step": 737 }, { "epoch": 0.612448132780083, "grad_norm": 11.189668655395508, "learning_rate": 1.9756348547717844e-05, "loss": 1.7438, "step": 738 }, { "epoch": 0.6132780082987552, "grad_norm": 10.112369537353516, "learning_rate": 1.9756016597510376e-05, "loss": 2.0596, "step": 739 }, { "epoch": 0.6141078838174274, "grad_norm": 9.063736915588379, "learning_rate": 1.9755684647302908e-05, "loss": 1.1049, "step": 740 }, { "epoch": 0.6149377593360996, "grad_norm": 7.2860493659973145, "learning_rate": 1.9755352697095436e-05, "loss": 1.7377, "step": 741 }, { "epoch": 0.6157676348547718, "grad_norm": 9.075769424438477, "learning_rate": 1.975502074688797e-05, "loss": 1.3223, "step": 742 }, { "epoch": 0.616597510373444, "grad_norm": 11.337075233459473, "learning_rate": 1.9754688796680497e-05, "loss": 1.5889, "step": 743 }, { "epoch": 0.6174273858921162, "grad_norm": 11.580168724060059, "learning_rate": 1.975435684647303e-05, "loss": 1.3686, "step": 744 }, { "epoch": 0.6182572614107884, "grad_norm": 9.199110984802246, "learning_rate": 1.975402489626556e-05, "loss": 1.3118, "step": 745 }, { "epoch": 0.6190871369294606, "grad_norm": 11.595465660095215, "learning_rate": 1.9753692946058094e-05, "loss": 0.9407, "step": 746 }, { "epoch": 0.6199170124481328, "grad_norm": 11.991275787353516, "learning_rate": 1.9753360995850622e-05, "loss": 1.7152, "step": 747 }, { "epoch": 0.620746887966805, "grad_norm": 8.372994422912598, "learning_rate": 1.9753029045643154e-05, "loss": 1.4826, "step": 748 }, { "epoch": 0.6215767634854772, "grad_norm": 10.05420970916748, "learning_rate": 1.9752697095435687e-05, "loss": 0.9941, "step": 749 }, { "epoch": 0.6224066390041494, "grad_norm": 8.930930137634277, "learning_rate": 1.975236514522822e-05, "loss": 1.4397, "step": 750 }, { "epoch": 0.6232365145228216, "grad_norm": 12.85309886932373, "learning_rate": 1.9752033195020747e-05, "loss": 2.1171, "step": 751 }, { "epoch": 0.6240663900414938, "grad_norm": 7.537475109100342, "learning_rate": 1.975170124481328e-05, "loss": 1.7403, "step": 752 }, { "epoch": 0.624896265560166, "grad_norm": 5.249268054962158, "learning_rate": 1.975136929460581e-05, "loss": 1.4997, "step": 753 }, { "epoch": 0.6257261410788382, "grad_norm": 7.7571587562561035, "learning_rate": 1.9751037344398344e-05, "loss": 1.7462, "step": 754 }, { "epoch": 0.6265560165975104, "grad_norm": 10.867100715637207, "learning_rate": 1.9750705394190872e-05, "loss": 2.1749, "step": 755 }, { "epoch": 0.6273858921161826, "grad_norm": 12.078096389770508, "learning_rate": 1.9750373443983405e-05, "loss": 1.915, "step": 756 }, { "epoch": 0.6282157676348548, "grad_norm": 8.686990737915039, "learning_rate": 1.9750041493775937e-05, "loss": 1.4722, "step": 757 }, { "epoch": 0.629045643153527, "grad_norm": 8.576576232910156, "learning_rate": 1.974970954356847e-05, "loss": 2.1231, "step": 758 }, { "epoch": 0.6298755186721992, "grad_norm": 8.872736930847168, "learning_rate": 1.9749377593360997e-05, "loss": 1.0889, "step": 759 }, { "epoch": 0.6307053941908713, "grad_norm": 16.927043914794922, "learning_rate": 1.974904564315353e-05, "loss": 1.2562, "step": 760 }, { "epoch": 0.6315352697095435, "grad_norm": 6.313920021057129, "learning_rate": 1.974871369294606e-05, "loss": 1.2695, "step": 761 }, { "epoch": 0.6323651452282157, "grad_norm": 6.409618377685547, "learning_rate": 1.974838174273859e-05, "loss": 1.4002, "step": 762 }, { "epoch": 0.6331950207468879, "grad_norm": 10.722704887390137, "learning_rate": 1.9748049792531123e-05, "loss": 1.7879, "step": 763 }, { "epoch": 0.6340248962655601, "grad_norm": 7.753451824188232, "learning_rate": 1.974771784232365e-05, "loss": 2.1283, "step": 764 }, { "epoch": 0.6348547717842323, "grad_norm": 10.715447425842285, "learning_rate": 1.9747385892116183e-05, "loss": 2.093, "step": 765 }, { "epoch": 0.6356846473029045, "grad_norm": 10.578195571899414, "learning_rate": 1.9747053941908715e-05, "loss": 2.1623, "step": 766 }, { "epoch": 0.6365145228215767, "grad_norm": 6.314997673034668, "learning_rate": 1.9746721991701244e-05, "loss": 1.3395, "step": 767 }, { "epoch": 0.6373443983402489, "grad_norm": 6.739042282104492, "learning_rate": 1.9746390041493776e-05, "loss": 1.4348, "step": 768 }, { "epoch": 0.6381742738589211, "grad_norm": 7.333440780639648, "learning_rate": 1.974605809128631e-05, "loss": 1.4534, "step": 769 }, { "epoch": 0.6390041493775933, "grad_norm": 8.203914642333984, "learning_rate": 1.974572614107884e-05, "loss": 1.1822, "step": 770 }, { "epoch": 0.6398340248962655, "grad_norm": 10.746581077575684, "learning_rate": 1.9745394190871373e-05, "loss": 0.9045, "step": 771 }, { "epoch": 0.6406639004149378, "grad_norm": 8.722668647766113, "learning_rate": 1.97450622406639e-05, "loss": 1.6077, "step": 772 }, { "epoch": 0.64149377593361, "grad_norm": 10.541608810424805, "learning_rate": 1.9744730290456433e-05, "loss": 2.1432, "step": 773 }, { "epoch": 0.6423236514522822, "grad_norm": 9.159321784973145, "learning_rate": 1.9744398340248966e-05, "loss": 1.9592, "step": 774 }, { "epoch": 0.6431535269709544, "grad_norm": 10.044557571411133, "learning_rate": 1.9744066390041498e-05, "loss": 1.8348, "step": 775 }, { "epoch": 0.6439834024896266, "grad_norm": 8.359901428222656, "learning_rate": 1.9743734439834026e-05, "loss": 1.2136, "step": 776 }, { "epoch": 0.6448132780082988, "grad_norm": 14.731941223144531, "learning_rate": 1.974340248962656e-05, "loss": 2.4061, "step": 777 }, { "epoch": 0.645643153526971, "grad_norm": 9.754042625427246, "learning_rate": 1.974307053941909e-05, "loss": 1.3719, "step": 778 }, { "epoch": 0.6464730290456432, "grad_norm": 12.280746459960938, "learning_rate": 1.974273858921162e-05, "loss": 0.8904, "step": 779 }, { "epoch": 0.6473029045643154, "grad_norm": 8.001644134521484, "learning_rate": 1.974240663900415e-05, "loss": 1.298, "step": 780 }, { "epoch": 0.6481327800829876, "grad_norm": 8.180662155151367, "learning_rate": 1.9742074688796684e-05, "loss": 1.4396, "step": 781 }, { "epoch": 0.6489626556016598, "grad_norm": 8.732044219970703, "learning_rate": 1.9741742738589212e-05, "loss": 1.1532, "step": 782 }, { "epoch": 0.649792531120332, "grad_norm": 14.06383991241455, "learning_rate": 1.9741410788381744e-05, "loss": 1.2168, "step": 783 }, { "epoch": 0.6506224066390042, "grad_norm": 8.976767539978027, "learning_rate": 1.9741078838174273e-05, "loss": 1.7781, "step": 784 }, { "epoch": 0.6514522821576764, "grad_norm": 10.17072868347168, "learning_rate": 1.9740746887966805e-05, "loss": 1.5846, "step": 785 }, { "epoch": 0.6522821576763486, "grad_norm": 8.318115234375, "learning_rate": 1.9740414937759337e-05, "loss": 1.1029, "step": 786 }, { "epoch": 0.6531120331950208, "grad_norm": 14.979177474975586, "learning_rate": 1.974008298755187e-05, "loss": 1.2154, "step": 787 }, { "epoch": 0.653941908713693, "grad_norm": 15.549798965454102, "learning_rate": 1.9739751037344398e-05, "loss": 1.7735, "step": 788 }, { "epoch": 0.6547717842323652, "grad_norm": 14.332062721252441, "learning_rate": 1.973941908713693e-05, "loss": 1.415, "step": 789 }, { "epoch": 0.6556016597510373, "grad_norm": 8.440523147583008, "learning_rate": 1.9739087136929462e-05, "loss": 2.199, "step": 790 }, { "epoch": 0.6564315352697095, "grad_norm": 11.047229766845703, "learning_rate": 1.9738755186721994e-05, "loss": 1.2234, "step": 791 }, { "epoch": 0.6572614107883817, "grad_norm": 9.492706298828125, "learning_rate": 1.9738423236514523e-05, "loss": 0.727, "step": 792 }, { "epoch": 0.6580912863070539, "grad_norm": 11.41375732421875, "learning_rate": 1.9738091286307055e-05, "loss": 2.489, "step": 793 }, { "epoch": 0.6589211618257261, "grad_norm": 10.563092231750488, "learning_rate": 1.9737759336099587e-05, "loss": 2.1849, "step": 794 }, { "epoch": 0.6597510373443983, "grad_norm": 13.83169174194336, "learning_rate": 1.973742738589212e-05, "loss": 1.4115, "step": 795 }, { "epoch": 0.6605809128630705, "grad_norm": 8.701924324035645, "learning_rate": 1.9737095435684648e-05, "loss": 1.9359, "step": 796 }, { "epoch": 0.6614107883817427, "grad_norm": 7.3775315284729, "learning_rate": 1.973676348547718e-05, "loss": 1.1722, "step": 797 }, { "epoch": 0.6622406639004149, "grad_norm": 7.216670513153076, "learning_rate": 1.9736431535269712e-05, "loss": 1.5493, "step": 798 }, { "epoch": 0.6630705394190871, "grad_norm": 10.264052391052246, "learning_rate": 1.973609958506224e-05, "loss": 2.2102, "step": 799 }, { "epoch": 0.6639004149377593, "grad_norm": 10.368277549743652, "learning_rate": 1.9735767634854773e-05, "loss": 1.5989, "step": 800 }, { "epoch": 0.6647302904564315, "grad_norm": 8.545835494995117, "learning_rate": 1.9735435684647305e-05, "loss": 1.4001, "step": 801 }, { "epoch": 0.6655601659751037, "grad_norm": 6.146645545959473, "learning_rate": 1.9735103734439834e-05, "loss": 1.1646, "step": 802 }, { "epoch": 0.6663900414937759, "grad_norm": 9.472197532653809, "learning_rate": 1.9734771784232366e-05, "loss": 1.1113, "step": 803 }, { "epoch": 0.6672199170124481, "grad_norm": 12.905648231506348, "learning_rate": 1.9734439834024898e-05, "loss": 2.1529, "step": 804 }, { "epoch": 0.6680497925311203, "grad_norm": 7.464044094085693, "learning_rate": 1.9734107883817427e-05, "loss": 0.9371, "step": 805 }, { "epoch": 0.6688796680497925, "grad_norm": 7.3920135498046875, "learning_rate": 1.973377593360996e-05, "loss": 1.5428, "step": 806 }, { "epoch": 0.6697095435684647, "grad_norm": 8.556690216064453, "learning_rate": 1.973344398340249e-05, "loss": 1.8888, "step": 807 }, { "epoch": 0.6705394190871369, "grad_norm": 10.260435104370117, "learning_rate": 1.9733112033195023e-05, "loss": 2.3388, "step": 808 }, { "epoch": 0.6713692946058091, "grad_norm": 9.792708396911621, "learning_rate": 1.9732780082987552e-05, "loss": 1.2008, "step": 809 }, { "epoch": 0.6721991701244814, "grad_norm": 8.983231544494629, "learning_rate": 1.9732448132780084e-05, "loss": 1.5587, "step": 810 }, { "epoch": 0.6730290456431536, "grad_norm": 12.472162246704102, "learning_rate": 1.9732116182572616e-05, "loss": 1.3056, "step": 811 }, { "epoch": 0.6738589211618258, "grad_norm": 13.35712718963623, "learning_rate": 1.973178423236515e-05, "loss": 2.7942, "step": 812 }, { "epoch": 0.674688796680498, "grad_norm": 11.83455753326416, "learning_rate": 1.9731452282157677e-05, "loss": 1.9778, "step": 813 }, { "epoch": 0.6755186721991702, "grad_norm": 7.5075249671936035, "learning_rate": 1.973112033195021e-05, "loss": 1.4825, "step": 814 }, { "epoch": 0.6763485477178424, "grad_norm": 7.411449432373047, "learning_rate": 1.973078838174274e-05, "loss": 1.0477, "step": 815 }, { "epoch": 0.6771784232365146, "grad_norm": 8.382607460021973, "learning_rate": 1.9730456431535273e-05, "loss": 1.663, "step": 816 }, { "epoch": 0.6780082987551868, "grad_norm": 10.166770935058594, "learning_rate": 1.9730124481327802e-05, "loss": 1.4579, "step": 817 }, { "epoch": 0.678838174273859, "grad_norm": 7.214890956878662, "learning_rate": 1.9729792531120334e-05, "loss": 1.7276, "step": 818 }, { "epoch": 0.6796680497925311, "grad_norm": 7.785235404968262, "learning_rate": 1.9729460580912866e-05, "loss": 1.5525, "step": 819 }, { "epoch": 0.6804979253112033, "grad_norm": 6.455045700073242, "learning_rate": 1.9729128630705395e-05, "loss": 1.5079, "step": 820 }, { "epoch": 0.6813278008298755, "grad_norm": 6.133499622344971, "learning_rate": 1.9728796680497927e-05, "loss": 1.4535, "step": 821 }, { "epoch": 0.6821576763485477, "grad_norm": 10.2451171875, "learning_rate": 1.9728464730290456e-05, "loss": 0.9592, "step": 822 }, { "epoch": 0.6829875518672199, "grad_norm": 7.533570766448975, "learning_rate": 1.9728132780082988e-05, "loss": 1.9152, "step": 823 }, { "epoch": 0.6838174273858921, "grad_norm": 11.145341873168945, "learning_rate": 1.972780082987552e-05, "loss": 1.9694, "step": 824 }, { "epoch": 0.6846473029045643, "grad_norm": 7.599055290222168, "learning_rate": 1.9727468879668052e-05, "loss": 1.6797, "step": 825 }, { "epoch": 0.6854771784232365, "grad_norm": 7.103338718414307, "learning_rate": 1.972713692946058e-05, "loss": 1.1758, "step": 826 }, { "epoch": 0.6863070539419087, "grad_norm": 13.526860237121582, "learning_rate": 1.9726804979253113e-05, "loss": 2.6481, "step": 827 }, { "epoch": 0.6871369294605809, "grad_norm": 17.797550201416016, "learning_rate": 1.9726473029045645e-05, "loss": 2.5532, "step": 828 }, { "epoch": 0.6879668049792531, "grad_norm": 12.104155540466309, "learning_rate": 1.9726141078838177e-05, "loss": 2.2935, "step": 829 }, { "epoch": 0.6887966804979253, "grad_norm": 10.271782875061035, "learning_rate": 1.9725809128630706e-05, "loss": 1.8163, "step": 830 }, { "epoch": 0.6896265560165975, "grad_norm": 7.33585786819458, "learning_rate": 1.9725477178423238e-05, "loss": 1.2349, "step": 831 }, { "epoch": 0.6904564315352697, "grad_norm": 8.205761909484863, "learning_rate": 1.972514522821577e-05, "loss": 1.0548, "step": 832 }, { "epoch": 0.6912863070539419, "grad_norm": 11.850053787231445, "learning_rate": 1.9724813278008302e-05, "loss": 1.9109, "step": 833 }, { "epoch": 0.6921161825726141, "grad_norm": 12.743362426757812, "learning_rate": 1.972448132780083e-05, "loss": 1.5495, "step": 834 }, { "epoch": 0.6929460580912863, "grad_norm": 5.612105846405029, "learning_rate": 1.9724149377593363e-05, "loss": 0.9216, "step": 835 }, { "epoch": 0.6937759336099585, "grad_norm": 14.846869468688965, "learning_rate": 1.9723817427385895e-05, "loss": 2.0012, "step": 836 }, { "epoch": 0.6946058091286307, "grad_norm": 7.577256202697754, "learning_rate": 1.9723485477178424e-05, "loss": 1.5972, "step": 837 }, { "epoch": 0.6954356846473029, "grad_norm": 10.01706314086914, "learning_rate": 1.9723153526970956e-05, "loss": 2.1875, "step": 838 }, { "epoch": 0.6962655601659751, "grad_norm": 15.011361122131348, "learning_rate": 1.9722821576763488e-05, "loss": 2.3971, "step": 839 }, { "epoch": 0.6970954356846473, "grad_norm": 10.761693000793457, "learning_rate": 1.9722489626556017e-05, "loss": 2.3111, "step": 840 }, { "epoch": 0.6979253112033195, "grad_norm": 7.052284240722656, "learning_rate": 1.972215767634855e-05, "loss": 1.3448, "step": 841 }, { "epoch": 0.6987551867219917, "grad_norm": 8.3283052444458, "learning_rate": 1.972182572614108e-05, "loss": 1.4859, "step": 842 }, { "epoch": 0.6995850622406639, "grad_norm": 9.446699142456055, "learning_rate": 1.972149377593361e-05, "loss": 0.853, "step": 843 }, { "epoch": 0.700414937759336, "grad_norm": 6.608066558837891, "learning_rate": 1.9721161825726142e-05, "loss": 1.4092, "step": 844 }, { "epoch": 0.7012448132780082, "grad_norm": 13.260890007019043, "learning_rate": 1.9720829875518674e-05, "loss": 1.2872, "step": 845 }, { "epoch": 0.7020746887966804, "grad_norm": 5.204719543457031, "learning_rate": 1.9720497925311203e-05, "loss": 0.8434, "step": 846 }, { "epoch": 0.7029045643153526, "grad_norm": 11.822699546813965, "learning_rate": 1.9720165975103735e-05, "loss": 1.3929, "step": 847 }, { "epoch": 0.703734439834025, "grad_norm": 6.670868396759033, "learning_rate": 1.9719834024896267e-05, "loss": 1.9495, "step": 848 }, { "epoch": 0.7045643153526971, "grad_norm": 9.776046752929688, "learning_rate": 1.97195020746888e-05, "loss": 1.3567, "step": 849 }, { "epoch": 0.7053941908713693, "grad_norm": 5.669515609741211, "learning_rate": 1.971917012448133e-05, "loss": 1.4155, "step": 850 }, { "epoch": 0.7062240663900415, "grad_norm": 7.804290771484375, "learning_rate": 1.971883817427386e-05, "loss": 1.716, "step": 851 }, { "epoch": 0.7070539419087137, "grad_norm": 12.972235679626465, "learning_rate": 1.9718506224066392e-05, "loss": 1.4463, "step": 852 }, { "epoch": 0.7078838174273859, "grad_norm": 10.901017189025879, "learning_rate": 1.9718174273858924e-05, "loss": 1.5951, "step": 853 }, { "epoch": 0.7087136929460581, "grad_norm": 9.106499671936035, "learning_rate": 1.9717842323651456e-05, "loss": 1.4899, "step": 854 }, { "epoch": 0.7095435684647303, "grad_norm": 9.095657348632812, "learning_rate": 1.9717510373443985e-05, "loss": 1.4448, "step": 855 }, { "epoch": 0.7103734439834025, "grad_norm": 8.77692985534668, "learning_rate": 1.9717178423236517e-05, "loss": 1.1464, "step": 856 }, { "epoch": 0.7112033195020747, "grad_norm": 7.756300449371338, "learning_rate": 1.971684647302905e-05, "loss": 1.5459, "step": 857 }, { "epoch": 0.7120331950207469, "grad_norm": 16.953567504882812, "learning_rate": 1.9716514522821578e-05, "loss": 2.4925, "step": 858 }, { "epoch": 0.7128630705394191, "grad_norm": 10.29880428314209, "learning_rate": 1.971618257261411e-05, "loss": 1.4051, "step": 859 }, { "epoch": 0.7136929460580913, "grad_norm": 13.173881530761719, "learning_rate": 1.971585062240664e-05, "loss": 1.9117, "step": 860 }, { "epoch": 0.7145228215767635, "grad_norm": 9.953489303588867, "learning_rate": 1.971551867219917e-05, "loss": 1.6549, "step": 861 }, { "epoch": 0.7153526970954357, "grad_norm": 6.605320930480957, "learning_rate": 1.9715186721991703e-05, "loss": 1.1327, "step": 862 }, { "epoch": 0.7161825726141079, "grad_norm": 9.14734935760498, "learning_rate": 1.971485477178423e-05, "loss": 1.5671, "step": 863 }, { "epoch": 0.7170124481327801, "grad_norm": 15.559391021728516, "learning_rate": 1.9714522821576764e-05, "loss": 1.8793, "step": 864 }, { "epoch": 0.7178423236514523, "grad_norm": 10.377245903015137, "learning_rate": 1.9714190871369296e-05, "loss": 1.6307, "step": 865 }, { "epoch": 0.7186721991701245, "grad_norm": 6.882962226867676, "learning_rate": 1.9713858921161828e-05, "loss": 1.3019, "step": 866 }, { "epoch": 0.7195020746887967, "grad_norm": 8.892613410949707, "learning_rate": 1.9713526970954357e-05, "loss": 1.7757, "step": 867 }, { "epoch": 0.7203319502074689, "grad_norm": 6.765893459320068, "learning_rate": 1.971319502074689e-05, "loss": 1.7198, "step": 868 }, { "epoch": 0.7211618257261411, "grad_norm": 8.451148986816406, "learning_rate": 1.971286307053942e-05, "loss": 1.6824, "step": 869 }, { "epoch": 0.7219917012448133, "grad_norm": 12.163532257080078, "learning_rate": 1.9712531120331953e-05, "loss": 1.7357, "step": 870 }, { "epoch": 0.7228215767634855, "grad_norm": 10.470823287963867, "learning_rate": 1.9712199170124482e-05, "loss": 2.1098, "step": 871 }, { "epoch": 0.7236514522821577, "grad_norm": 6.519016265869141, "learning_rate": 1.9711867219917014e-05, "loss": 1.7163, "step": 872 }, { "epoch": 0.7244813278008299, "grad_norm": 8.468648910522461, "learning_rate": 1.9711535269709546e-05, "loss": 0.9853, "step": 873 }, { "epoch": 0.725311203319502, "grad_norm": 6.727292537689209, "learning_rate": 1.9711203319502078e-05, "loss": 1.4333, "step": 874 }, { "epoch": 0.7261410788381742, "grad_norm": 7.704986095428467, "learning_rate": 1.9710871369294607e-05, "loss": 1.4856, "step": 875 }, { "epoch": 0.7269709543568464, "grad_norm": 10.851725578308105, "learning_rate": 1.971053941908714e-05, "loss": 1.7058, "step": 876 }, { "epoch": 0.7278008298755186, "grad_norm": 7.691779613494873, "learning_rate": 1.971020746887967e-05, "loss": 0.909, "step": 877 }, { "epoch": 0.7286307053941908, "grad_norm": 7.897294521331787, "learning_rate": 1.97098755186722e-05, "loss": 1.8235, "step": 878 }, { "epoch": 0.729460580912863, "grad_norm": 6.848681926727295, "learning_rate": 1.9709543568464732e-05, "loss": 1.3717, "step": 879 }, { "epoch": 0.7302904564315352, "grad_norm": 8.404923439025879, "learning_rate": 1.9709211618257264e-05, "loss": 1.5888, "step": 880 }, { "epoch": 0.7311203319502074, "grad_norm": 11.388883590698242, "learning_rate": 1.9708879668049793e-05, "loss": 2.1761, "step": 881 }, { "epoch": 0.7319502074688796, "grad_norm": 6.789611339569092, "learning_rate": 1.9708547717842325e-05, "loss": 1.7311, "step": 882 }, { "epoch": 0.7327800829875518, "grad_norm": 4.9357147216796875, "learning_rate": 1.9708215767634857e-05, "loss": 1.1783, "step": 883 }, { "epoch": 0.733609958506224, "grad_norm": 11.178107261657715, "learning_rate": 1.9707883817427386e-05, "loss": 1.6487, "step": 884 }, { "epoch": 0.7344398340248963, "grad_norm": 11.29975700378418, "learning_rate": 1.9707551867219918e-05, "loss": 1.1278, "step": 885 }, { "epoch": 0.7352697095435685, "grad_norm": 7.2978692054748535, "learning_rate": 1.970721991701245e-05, "loss": 1.7646, "step": 886 }, { "epoch": 0.7360995850622407, "grad_norm": 8.76308822631836, "learning_rate": 1.9706887966804982e-05, "loss": 1.0577, "step": 887 }, { "epoch": 0.7369294605809129, "grad_norm": 7.330498218536377, "learning_rate": 1.970655601659751e-05, "loss": 1.4935, "step": 888 }, { "epoch": 0.7377593360995851, "grad_norm": 7.788291931152344, "learning_rate": 1.9706224066390043e-05, "loss": 1.2877, "step": 889 }, { "epoch": 0.7385892116182573, "grad_norm": 9.444518089294434, "learning_rate": 1.9705892116182575e-05, "loss": 1.6161, "step": 890 }, { "epoch": 0.7394190871369295, "grad_norm": 6.870673656463623, "learning_rate": 1.9705560165975107e-05, "loss": 1.6162, "step": 891 }, { "epoch": 0.7402489626556017, "grad_norm": 12.847532272338867, "learning_rate": 1.9705228215767636e-05, "loss": 1.6892, "step": 892 }, { "epoch": 0.7410788381742739, "grad_norm": 9.822442054748535, "learning_rate": 1.9704896265560168e-05, "loss": 0.7905, "step": 893 }, { "epoch": 0.7419087136929461, "grad_norm": 9.752788543701172, "learning_rate": 1.97045643153527e-05, "loss": 2.6233, "step": 894 }, { "epoch": 0.7427385892116183, "grad_norm": 7.514604091644287, "learning_rate": 1.9704232365145232e-05, "loss": 1.2533, "step": 895 }, { "epoch": 0.7435684647302905, "grad_norm": 8.74625015258789, "learning_rate": 1.970390041493776e-05, "loss": 0.9207, "step": 896 }, { "epoch": 0.7443983402489627, "grad_norm": 7.280252456665039, "learning_rate": 1.9703568464730293e-05, "loss": 1.7723, "step": 897 }, { "epoch": 0.7452282157676349, "grad_norm": 8.236467361450195, "learning_rate": 1.9703236514522825e-05, "loss": 2.3046, "step": 898 }, { "epoch": 0.7460580912863071, "grad_norm": 7.727840900421143, "learning_rate": 1.9702904564315354e-05, "loss": 1.3194, "step": 899 }, { "epoch": 0.7468879668049793, "grad_norm": 9.204072952270508, "learning_rate": 1.9702572614107886e-05, "loss": 2.0006, "step": 900 }, { "epoch": 0.7477178423236515, "grad_norm": 6.788082599639893, "learning_rate": 1.9702240663900414e-05, "loss": 1.6211, "step": 901 }, { "epoch": 0.7485477178423237, "grad_norm": 10.130561828613281, "learning_rate": 1.9701908713692947e-05, "loss": 1.9462, "step": 902 }, { "epoch": 0.7493775933609959, "grad_norm": 10.819340705871582, "learning_rate": 1.970157676348548e-05, "loss": 2.0819, "step": 903 }, { "epoch": 0.750207468879668, "grad_norm": 9.382555961608887, "learning_rate": 1.970124481327801e-05, "loss": 2.3008, "step": 904 }, { "epoch": 0.7510373443983402, "grad_norm": 9.819191932678223, "learning_rate": 1.970091286307054e-05, "loss": 1.497, "step": 905 }, { "epoch": 0.7518672199170124, "grad_norm": 6.313736438751221, "learning_rate": 1.970058091286307e-05, "loss": 0.9966, "step": 906 }, { "epoch": 0.7526970954356846, "grad_norm": 13.355637550354004, "learning_rate": 1.9700248962655604e-05, "loss": 1.3573, "step": 907 }, { "epoch": 0.7535269709543568, "grad_norm": 11.965509414672852, "learning_rate": 1.9699917012448136e-05, "loss": 1.2366, "step": 908 }, { "epoch": 0.754356846473029, "grad_norm": 6.219414710998535, "learning_rate": 1.9699585062240665e-05, "loss": 2.0283, "step": 909 }, { "epoch": 0.7551867219917012, "grad_norm": 13.732353210449219, "learning_rate": 1.9699253112033197e-05, "loss": 1.972, "step": 910 }, { "epoch": 0.7560165975103734, "grad_norm": 7.089382648468018, "learning_rate": 1.969892116182573e-05, "loss": 1.4138, "step": 911 }, { "epoch": 0.7568464730290456, "grad_norm": 10.770194053649902, "learning_rate": 1.969858921161826e-05, "loss": 1.8654, "step": 912 }, { "epoch": 0.7576763485477178, "grad_norm": 6.418844699859619, "learning_rate": 1.969825726141079e-05, "loss": 1.4201, "step": 913 }, { "epoch": 0.75850622406639, "grad_norm": 7.00799036026001, "learning_rate": 1.9697925311203322e-05, "loss": 1.4294, "step": 914 }, { "epoch": 0.7593360995850622, "grad_norm": 14.663089752197266, "learning_rate": 1.9697593360995854e-05, "loss": 1.268, "step": 915 }, { "epoch": 0.7601659751037344, "grad_norm": 9.343082427978516, "learning_rate": 1.9697261410788383e-05, "loss": 2.0042, "step": 916 }, { "epoch": 0.7609958506224066, "grad_norm": 11.322927474975586, "learning_rate": 1.9696929460580915e-05, "loss": 1.3418, "step": 917 }, { "epoch": 0.7618257261410788, "grad_norm": 7.351823806762695, "learning_rate": 1.9696597510373447e-05, "loss": 1.4921, "step": 918 }, { "epoch": 0.762655601659751, "grad_norm": 10.492436408996582, "learning_rate": 1.9696265560165975e-05, "loss": 0.9525, "step": 919 }, { "epoch": 0.7634854771784232, "grad_norm": 6.410496234893799, "learning_rate": 1.9695933609958508e-05, "loss": 1.6335, "step": 920 }, { "epoch": 0.7643153526970954, "grad_norm": 15.592869758605957, "learning_rate": 1.969560165975104e-05, "loss": 2.013, "step": 921 }, { "epoch": 0.7651452282157676, "grad_norm": 8.419303894042969, "learning_rate": 1.969526970954357e-05, "loss": 0.9311, "step": 922 }, { "epoch": 0.7659751037344399, "grad_norm": 8.413793563842773, "learning_rate": 1.96949377593361e-05, "loss": 1.3601, "step": 923 }, { "epoch": 0.7668049792531121, "grad_norm": 8.315201759338379, "learning_rate": 1.9694605809128633e-05, "loss": 1.1585, "step": 924 }, { "epoch": 0.7676348547717843, "grad_norm": 13.638212203979492, "learning_rate": 1.969427385892116e-05, "loss": 2.2302, "step": 925 }, { "epoch": 0.7684647302904565, "grad_norm": 6.532405853271484, "learning_rate": 1.9693941908713693e-05, "loss": 1.472, "step": 926 }, { "epoch": 0.7692946058091287, "grad_norm": 13.646692276000977, "learning_rate": 1.9693609958506226e-05, "loss": 1.5292, "step": 927 }, { "epoch": 0.7701244813278009, "grad_norm": 12.231216430664062, "learning_rate": 1.9693278008298758e-05, "loss": 2.3402, "step": 928 }, { "epoch": 0.7709543568464731, "grad_norm": 8.361285209655762, "learning_rate": 1.969294605809129e-05, "loss": 0.9799, "step": 929 }, { "epoch": 0.7717842323651453, "grad_norm": 7.308678150177002, "learning_rate": 1.969261410788382e-05, "loss": 1.2543, "step": 930 }, { "epoch": 0.7726141078838175, "grad_norm": 18.021982192993164, "learning_rate": 1.969228215767635e-05, "loss": 2.2498, "step": 931 }, { "epoch": 0.7734439834024897, "grad_norm": 10.647754669189453, "learning_rate": 1.9691950207468883e-05, "loss": 2.0019, "step": 932 }, { "epoch": 0.7742738589211619, "grad_norm": 15.274364471435547, "learning_rate": 1.9691618257261415e-05, "loss": 1.7736, "step": 933 }, { "epoch": 0.775103734439834, "grad_norm": 6.879903793334961, "learning_rate": 1.9691286307053944e-05, "loss": 1.4161, "step": 934 }, { "epoch": 0.7759336099585062, "grad_norm": 9.342314720153809, "learning_rate": 1.9690954356846476e-05, "loss": 1.6783, "step": 935 }, { "epoch": 0.7767634854771784, "grad_norm": 8.718962669372559, "learning_rate": 1.9690622406639008e-05, "loss": 1.4341, "step": 936 }, { "epoch": 0.7775933609958506, "grad_norm": 9.557229995727539, "learning_rate": 1.9690290456431536e-05, "loss": 1.1803, "step": 937 }, { "epoch": 0.7784232365145228, "grad_norm": 7.859264850616455, "learning_rate": 1.968995850622407e-05, "loss": 1.8727, "step": 938 }, { "epoch": 0.779253112033195, "grad_norm": 7.473854064941406, "learning_rate": 1.9689626556016597e-05, "loss": 1.545, "step": 939 }, { "epoch": 0.7800829875518672, "grad_norm": 9.916956901550293, "learning_rate": 1.968929460580913e-05, "loss": 1.7452, "step": 940 }, { "epoch": 0.7809128630705394, "grad_norm": 8.823984146118164, "learning_rate": 1.968896265560166e-05, "loss": 2.5897, "step": 941 }, { "epoch": 0.7817427385892116, "grad_norm": 12.045278549194336, "learning_rate": 1.968863070539419e-05, "loss": 2.0159, "step": 942 }, { "epoch": 0.7825726141078838, "grad_norm": 7.770388126373291, "learning_rate": 1.9688298755186722e-05, "loss": 1.5823, "step": 943 }, { "epoch": 0.783402489626556, "grad_norm": 8.49427604675293, "learning_rate": 1.9687966804979254e-05, "loss": 1.9762, "step": 944 }, { "epoch": 0.7842323651452282, "grad_norm": 13.524663925170898, "learning_rate": 1.9687634854771787e-05, "loss": 2.8349, "step": 945 }, { "epoch": 0.7850622406639004, "grad_norm": 9.418774604797363, "learning_rate": 1.9687302904564315e-05, "loss": 1.7676, "step": 946 }, { "epoch": 0.7858921161825726, "grad_norm": 9.206923484802246, "learning_rate": 1.9686970954356847e-05, "loss": 1.3076, "step": 947 }, { "epoch": 0.7867219917012448, "grad_norm": 13.416679382324219, "learning_rate": 1.968663900414938e-05, "loss": 1.2119, "step": 948 }, { "epoch": 0.787551867219917, "grad_norm": 11.847709655761719, "learning_rate": 1.968630705394191e-05, "loss": 1.2537, "step": 949 }, { "epoch": 0.7883817427385892, "grad_norm": 9.787245750427246, "learning_rate": 1.968597510373444e-05, "loss": 1.4671, "step": 950 }, { "epoch": 0.7892116182572614, "grad_norm": 7.3747076988220215, "learning_rate": 1.9685643153526972e-05, "loss": 1.558, "step": 951 }, { "epoch": 0.7900414937759336, "grad_norm": 9.097302436828613, "learning_rate": 1.9685311203319505e-05, "loss": 1.8082, "step": 952 }, { "epoch": 0.7908713692946058, "grad_norm": 11.470207214355469, "learning_rate": 1.9684979253112037e-05, "loss": 1.2244, "step": 953 }, { "epoch": 0.791701244813278, "grad_norm": 7.885365962982178, "learning_rate": 1.9684647302904565e-05, "loss": 1.5382, "step": 954 }, { "epoch": 0.7925311203319502, "grad_norm": 8.373284339904785, "learning_rate": 1.9684315352697097e-05, "loss": 1.4915, "step": 955 }, { "epoch": 0.7933609958506224, "grad_norm": 7.036355972290039, "learning_rate": 1.968398340248963e-05, "loss": 1.2764, "step": 956 }, { "epoch": 0.7941908713692946, "grad_norm": 8.315632820129395, "learning_rate": 1.9683651452282158e-05, "loss": 2.0935, "step": 957 }, { "epoch": 0.7950207468879668, "grad_norm": 11.527595520019531, "learning_rate": 1.968331950207469e-05, "loss": 1.687, "step": 958 }, { "epoch": 0.795850622406639, "grad_norm": 8.437081336975098, "learning_rate": 1.9682987551867223e-05, "loss": 1.2634, "step": 959 }, { "epoch": 0.7966804979253111, "grad_norm": 7.808870792388916, "learning_rate": 1.968265560165975e-05, "loss": 1.485, "step": 960 }, { "epoch": 0.7975103734439835, "grad_norm": 5.336411476135254, "learning_rate": 1.9682323651452283e-05, "loss": 1.242, "step": 961 }, { "epoch": 0.7983402489626557, "grad_norm": 15.170831680297852, "learning_rate": 1.9681991701244815e-05, "loss": 1.8214, "step": 962 }, { "epoch": 0.7991701244813278, "grad_norm": 10.07691764831543, "learning_rate": 1.9681659751037344e-05, "loss": 1.648, "step": 963 }, { "epoch": 0.8, "grad_norm": 6.997379302978516, "learning_rate": 1.9681327800829876e-05, "loss": 1.0243, "step": 964 }, { "epoch": 0.8008298755186722, "grad_norm": 15.939200401306152, "learning_rate": 1.968099585062241e-05, "loss": 1.5784, "step": 965 }, { "epoch": 0.8016597510373444, "grad_norm": 9.812588691711426, "learning_rate": 1.968066390041494e-05, "loss": 1.4343, "step": 966 }, { "epoch": 0.8024896265560166, "grad_norm": 15.12867546081543, "learning_rate": 1.968033195020747e-05, "loss": 2.0867, "step": 967 }, { "epoch": 0.8033195020746888, "grad_norm": 8.215837478637695, "learning_rate": 1.968e-05, "loss": 1.4944, "step": 968 }, { "epoch": 0.804149377593361, "grad_norm": 15.169416427612305, "learning_rate": 1.9679668049792533e-05, "loss": 1.3614, "step": 969 }, { "epoch": 0.8049792531120332, "grad_norm": 6.239467620849609, "learning_rate": 1.9679336099585066e-05, "loss": 1.0133, "step": 970 }, { "epoch": 0.8058091286307054, "grad_norm": 15.446439743041992, "learning_rate": 1.9679004149377594e-05, "loss": 1.8776, "step": 971 }, { "epoch": 0.8066390041493776, "grad_norm": 10.137720108032227, "learning_rate": 1.9678672199170126e-05, "loss": 1.0835, "step": 972 }, { "epoch": 0.8074688796680498, "grad_norm": 12.084761619567871, "learning_rate": 1.967834024896266e-05, "loss": 1.3282, "step": 973 }, { "epoch": 0.808298755186722, "grad_norm": 11.411214828491211, "learning_rate": 1.967800829875519e-05, "loss": 1.7917, "step": 974 }, { "epoch": 0.8091286307053942, "grad_norm": 6.625853538513184, "learning_rate": 1.967767634854772e-05, "loss": 1.6394, "step": 975 }, { "epoch": 0.8099585062240664, "grad_norm": 7.777348041534424, "learning_rate": 1.967734439834025e-05, "loss": 1.7629, "step": 976 }, { "epoch": 0.8107883817427386, "grad_norm": 9.663055419921875, "learning_rate": 1.967701244813278e-05, "loss": 1.715, "step": 977 }, { "epoch": 0.8116182572614108, "grad_norm": 7.094366550445557, "learning_rate": 1.9676680497925312e-05, "loss": 1.792, "step": 978 }, { "epoch": 0.812448132780083, "grad_norm": 12.4456205368042, "learning_rate": 1.9676348547717844e-05, "loss": 1.755, "step": 979 }, { "epoch": 0.8132780082987552, "grad_norm": 11.456305503845215, "learning_rate": 1.9676016597510373e-05, "loss": 1.6894, "step": 980 }, { "epoch": 0.8141078838174274, "grad_norm": 8.9269380569458, "learning_rate": 1.9675684647302905e-05, "loss": 1.6747, "step": 981 }, { "epoch": 0.8149377593360996, "grad_norm": 15.046621322631836, "learning_rate": 1.9675352697095437e-05, "loss": 2.4619, "step": 982 }, { "epoch": 0.8157676348547718, "grad_norm": 7.26068639755249, "learning_rate": 1.967502074688797e-05, "loss": 1.1563, "step": 983 }, { "epoch": 0.816597510373444, "grad_norm": 9.267903327941895, "learning_rate": 1.9674688796680498e-05, "loss": 1.4251, "step": 984 }, { "epoch": 0.8174273858921162, "grad_norm": 12.032682418823242, "learning_rate": 1.967435684647303e-05, "loss": 1.0147, "step": 985 }, { "epoch": 0.8182572614107884, "grad_norm": 8.241469383239746, "learning_rate": 1.9674024896265562e-05, "loss": 1.3935, "step": 986 }, { "epoch": 0.8190871369294606, "grad_norm": 11.916176795959473, "learning_rate": 1.9673692946058094e-05, "loss": 1.865, "step": 987 }, { "epoch": 0.8199170124481328, "grad_norm": 7.564818859100342, "learning_rate": 1.9673360995850623e-05, "loss": 1.2389, "step": 988 }, { "epoch": 0.820746887966805, "grad_norm": 8.493477821350098, "learning_rate": 1.9673029045643155e-05, "loss": 1.4244, "step": 989 }, { "epoch": 0.8215767634854771, "grad_norm": 12.8887300491333, "learning_rate": 1.9672697095435687e-05, "loss": 1.5347, "step": 990 }, { "epoch": 0.8224066390041493, "grad_norm": 11.733437538146973, "learning_rate": 1.967236514522822e-05, "loss": 1.5341, "step": 991 }, { "epoch": 0.8232365145228215, "grad_norm": 6.051559925079346, "learning_rate": 1.9672033195020748e-05, "loss": 1.7379, "step": 992 }, { "epoch": 0.8240663900414937, "grad_norm": 13.509745597839355, "learning_rate": 1.967170124481328e-05, "loss": 1.93, "step": 993 }, { "epoch": 0.8248962655601659, "grad_norm": 8.659034729003906, "learning_rate": 1.9671369294605812e-05, "loss": 1.3133, "step": 994 }, { "epoch": 0.8257261410788381, "grad_norm": 13.187024116516113, "learning_rate": 1.967103734439834e-05, "loss": 2.3914, "step": 995 }, { "epoch": 0.8265560165975103, "grad_norm": 10.235223770141602, "learning_rate": 1.9670705394190873e-05, "loss": 1.1518, "step": 996 }, { "epoch": 0.8273858921161825, "grad_norm": 7.345759868621826, "learning_rate": 1.9670373443983405e-05, "loss": 1.2079, "step": 997 }, { "epoch": 0.8282157676348548, "grad_norm": 7.494898319244385, "learning_rate": 1.9670041493775934e-05, "loss": 1.3654, "step": 998 }, { "epoch": 0.829045643153527, "grad_norm": 20.565608978271484, "learning_rate": 1.9669709543568466e-05, "loss": 2.3989, "step": 999 }, { "epoch": 0.8298755186721992, "grad_norm": 13.420109748840332, "learning_rate": 1.9669377593360995e-05, "loss": 1.9799, "step": 1000 }, { "epoch": 0.8307053941908714, "grad_norm": 9.606064796447754, "learning_rate": 1.9669045643153527e-05, "loss": 1.4727, "step": 1001 }, { "epoch": 0.8315352697095436, "grad_norm": 9.857084274291992, "learning_rate": 1.966871369294606e-05, "loss": 1.5107, "step": 1002 }, { "epoch": 0.8323651452282158, "grad_norm": 6.88421630859375, "learning_rate": 1.966838174273859e-05, "loss": 1.1236, "step": 1003 }, { "epoch": 0.833195020746888, "grad_norm": 13.06930923461914, "learning_rate": 1.966804979253112e-05, "loss": 1.4518, "step": 1004 }, { "epoch": 0.8340248962655602, "grad_norm": 6.136581897735596, "learning_rate": 1.9667717842323652e-05, "loss": 1.1021, "step": 1005 }, { "epoch": 0.8348547717842324, "grad_norm": 11.666712760925293, "learning_rate": 1.9667385892116184e-05, "loss": 1.6488, "step": 1006 }, { "epoch": 0.8356846473029046, "grad_norm": 9.010550498962402, "learning_rate": 1.9667053941908716e-05, "loss": 1.3047, "step": 1007 }, { "epoch": 0.8365145228215768, "grad_norm": 7.512453079223633, "learning_rate": 1.966672199170125e-05, "loss": 1.2726, "step": 1008 }, { "epoch": 0.837344398340249, "grad_norm": 13.104828834533691, "learning_rate": 1.9666390041493777e-05, "loss": 2.098, "step": 1009 }, { "epoch": 0.8381742738589212, "grad_norm": 12.534167289733887, "learning_rate": 1.966605809128631e-05, "loss": 1.5226, "step": 1010 }, { "epoch": 0.8390041493775934, "grad_norm": 8.550573348999023, "learning_rate": 1.966572614107884e-05, "loss": 1.7593, "step": 1011 }, { "epoch": 0.8398340248962656, "grad_norm": 16.806367874145508, "learning_rate": 1.9665394190871373e-05, "loss": 2.8666, "step": 1012 }, { "epoch": 0.8406639004149378, "grad_norm": 6.3786540031433105, "learning_rate": 1.9665062240663902e-05, "loss": 1.3696, "step": 1013 }, { "epoch": 0.84149377593361, "grad_norm": 8.544774055480957, "learning_rate": 1.9664730290456434e-05, "loss": 1.5706, "step": 1014 }, { "epoch": 0.8423236514522822, "grad_norm": 7.669879913330078, "learning_rate": 1.9664398340248966e-05, "loss": 1.4174, "step": 1015 }, { "epoch": 0.8431535269709544, "grad_norm": 12.738797187805176, "learning_rate": 1.9664066390041495e-05, "loss": 1.5025, "step": 1016 }, { "epoch": 0.8439834024896266, "grad_norm": 10.935934066772461, "learning_rate": 1.9663734439834027e-05, "loss": 1.2086, "step": 1017 }, { "epoch": 0.8448132780082988, "grad_norm": 10.80820369720459, "learning_rate": 1.9663402489626556e-05, "loss": 2.1271, "step": 1018 }, { "epoch": 0.845643153526971, "grad_norm": 11.655653953552246, "learning_rate": 1.9663070539419088e-05, "loss": 1.755, "step": 1019 }, { "epoch": 0.8464730290456431, "grad_norm": 13.466104507446289, "learning_rate": 1.966273858921162e-05, "loss": 2.3968, "step": 1020 }, { "epoch": 0.8473029045643153, "grad_norm": 19.458518981933594, "learning_rate": 1.966240663900415e-05, "loss": 2.7242, "step": 1021 }, { "epoch": 0.8481327800829875, "grad_norm": 9.436688423156738, "learning_rate": 1.966207468879668e-05, "loss": 1.9204, "step": 1022 }, { "epoch": 0.8489626556016597, "grad_norm": 13.547919273376465, "learning_rate": 1.9661742738589213e-05, "loss": 1.8768, "step": 1023 }, { "epoch": 0.8497925311203319, "grad_norm": 9.066967010498047, "learning_rate": 1.9661410788381745e-05, "loss": 1.4841, "step": 1024 }, { "epoch": 0.8506224066390041, "grad_norm": 8.317646980285645, "learning_rate": 1.9661078838174274e-05, "loss": 1.1132, "step": 1025 }, { "epoch": 0.8514522821576763, "grad_norm": 6.278991222381592, "learning_rate": 1.9660746887966806e-05, "loss": 1.4325, "step": 1026 }, { "epoch": 0.8522821576763485, "grad_norm": 11.320104598999023, "learning_rate": 1.9660414937759338e-05, "loss": 1.5552, "step": 1027 }, { "epoch": 0.8531120331950207, "grad_norm": 8.768880844116211, "learning_rate": 1.966008298755187e-05, "loss": 1.3691, "step": 1028 }, { "epoch": 0.8539419087136929, "grad_norm": 7.3119964599609375, "learning_rate": 1.96597510373444e-05, "loss": 0.9, "step": 1029 }, { "epoch": 0.8547717842323651, "grad_norm": 4.879432201385498, "learning_rate": 1.965941908713693e-05, "loss": 1.1424, "step": 1030 }, { "epoch": 0.8556016597510373, "grad_norm": 5.870429992675781, "learning_rate": 1.9659087136929463e-05, "loss": 1.3067, "step": 1031 }, { "epoch": 0.8564315352697095, "grad_norm": 7.399411201477051, "learning_rate": 1.9658755186721995e-05, "loss": 1.4241, "step": 1032 }, { "epoch": 0.8572614107883817, "grad_norm": 5.226459503173828, "learning_rate": 1.9658423236514524e-05, "loss": 1.4072, "step": 1033 }, { "epoch": 0.8580912863070539, "grad_norm": 7.430332660675049, "learning_rate": 1.9658091286307056e-05, "loss": 1.191, "step": 1034 }, { "epoch": 0.8589211618257261, "grad_norm": 6.439052581787109, "learning_rate": 1.9657759336099588e-05, "loss": 1.1566, "step": 1035 }, { "epoch": 0.8597510373443984, "grad_norm": 7.312918663024902, "learning_rate": 1.9657427385892117e-05, "loss": 1.9308, "step": 1036 }, { "epoch": 0.8605809128630706, "grad_norm": 10.908380508422852, "learning_rate": 1.965709543568465e-05, "loss": 1.7436, "step": 1037 }, { "epoch": 0.8614107883817428, "grad_norm": 8.831663131713867, "learning_rate": 1.9656763485477178e-05, "loss": 1.9511, "step": 1038 }, { "epoch": 0.862240663900415, "grad_norm": 10.01542854309082, "learning_rate": 1.965643153526971e-05, "loss": 1.5681, "step": 1039 }, { "epoch": 0.8630705394190872, "grad_norm": 9.65222454071045, "learning_rate": 1.9656099585062242e-05, "loss": 1.2126, "step": 1040 }, { "epoch": 0.8639004149377594, "grad_norm": 11.097689628601074, "learning_rate": 1.9655767634854774e-05, "loss": 2.1252, "step": 1041 }, { "epoch": 0.8647302904564316, "grad_norm": 8.643355369567871, "learning_rate": 1.9655435684647303e-05, "loss": 1.5944, "step": 1042 }, { "epoch": 0.8655601659751038, "grad_norm": 11.84477710723877, "learning_rate": 1.9655103734439835e-05, "loss": 1.7435, "step": 1043 }, { "epoch": 0.866390041493776, "grad_norm": 11.224655151367188, "learning_rate": 1.9654771784232367e-05, "loss": 1.3211, "step": 1044 }, { "epoch": 0.8672199170124482, "grad_norm": 8.349313735961914, "learning_rate": 1.96544398340249e-05, "loss": 1.4563, "step": 1045 }, { "epoch": 0.8680497925311204, "grad_norm": 9.56961441040039, "learning_rate": 1.9654107883817428e-05, "loss": 1.2579, "step": 1046 }, { "epoch": 0.8688796680497926, "grad_norm": 9.453864097595215, "learning_rate": 1.965377593360996e-05, "loss": 1.9285, "step": 1047 }, { "epoch": 0.8697095435684647, "grad_norm": 12.237977981567383, "learning_rate": 1.9653443983402492e-05, "loss": 1.9079, "step": 1048 }, { "epoch": 0.870539419087137, "grad_norm": 9.775456428527832, "learning_rate": 1.9653112033195024e-05, "loss": 1.3014, "step": 1049 }, { "epoch": 0.8713692946058091, "grad_norm": 11.715251922607422, "learning_rate": 1.9652780082987553e-05, "loss": 1.4514, "step": 1050 }, { "epoch": 0.8721991701244813, "grad_norm": 12.325919151306152, "learning_rate": 1.9652448132780085e-05, "loss": 1.7345, "step": 1051 }, { "epoch": 0.8730290456431535, "grad_norm": 7.349343299865723, "learning_rate": 1.9652116182572617e-05, "loss": 1.808, "step": 1052 }, { "epoch": 0.8738589211618257, "grad_norm": 9.346109390258789, "learning_rate": 1.965178423236515e-05, "loss": 2.1499, "step": 1053 }, { "epoch": 0.8746887966804979, "grad_norm": 11.922453880310059, "learning_rate": 1.9651452282157678e-05, "loss": 2.2316, "step": 1054 }, { "epoch": 0.8755186721991701, "grad_norm": 10.335662841796875, "learning_rate": 1.965112033195021e-05, "loss": 2.1105, "step": 1055 }, { "epoch": 0.8763485477178423, "grad_norm": 9.07435131072998, "learning_rate": 1.965078838174274e-05, "loss": 1.137, "step": 1056 }, { "epoch": 0.8771784232365145, "grad_norm": 9.33111572265625, "learning_rate": 1.965045643153527e-05, "loss": 1.5888, "step": 1057 }, { "epoch": 0.8780082987551867, "grad_norm": 5.676502227783203, "learning_rate": 1.9650124481327803e-05, "loss": 1.2563, "step": 1058 }, { "epoch": 0.8788381742738589, "grad_norm": 5.44855260848999, "learning_rate": 1.964979253112033e-05, "loss": 1.6331, "step": 1059 }, { "epoch": 0.8796680497925311, "grad_norm": 9.275320053100586, "learning_rate": 1.9649460580912864e-05, "loss": 0.8066, "step": 1060 }, { "epoch": 0.8804979253112033, "grad_norm": 8.472344398498535, "learning_rate": 1.9649128630705396e-05, "loss": 1.257, "step": 1061 }, { "epoch": 0.8813278008298755, "grad_norm": 11.252317428588867, "learning_rate": 1.9648796680497928e-05, "loss": 1.5945, "step": 1062 }, { "epoch": 0.8821576763485477, "grad_norm": 12.751405715942383, "learning_rate": 1.9648464730290457e-05, "loss": 1.5689, "step": 1063 }, { "epoch": 0.8829875518672199, "grad_norm": 9.615579605102539, "learning_rate": 1.964813278008299e-05, "loss": 1.3637, "step": 1064 }, { "epoch": 0.8838174273858921, "grad_norm": 13.07732105255127, "learning_rate": 1.964780082987552e-05, "loss": 1.9585, "step": 1065 }, { "epoch": 0.8846473029045643, "grad_norm": 6.308200359344482, "learning_rate": 1.9647468879668053e-05, "loss": 1.0783, "step": 1066 }, { "epoch": 0.8854771784232365, "grad_norm": 12.633715629577637, "learning_rate": 1.9647136929460582e-05, "loss": 2.227, "step": 1067 }, { "epoch": 0.8863070539419087, "grad_norm": 6.260104179382324, "learning_rate": 1.9646804979253114e-05, "loss": 1.0809, "step": 1068 }, { "epoch": 0.8871369294605809, "grad_norm": 7.620476245880127, "learning_rate": 1.9646473029045646e-05, "loss": 1.7141, "step": 1069 }, { "epoch": 0.8879668049792531, "grad_norm": 5.977156162261963, "learning_rate": 1.9646141078838178e-05, "loss": 0.9577, "step": 1070 }, { "epoch": 0.8887966804979253, "grad_norm": 8.926619529724121, "learning_rate": 1.9645809128630707e-05, "loss": 1.9496, "step": 1071 }, { "epoch": 0.8896265560165975, "grad_norm": 6.9818315505981445, "learning_rate": 1.964547717842324e-05, "loss": 1.3928, "step": 1072 }, { "epoch": 0.8904564315352697, "grad_norm": 7.453327655792236, "learning_rate": 1.964514522821577e-05, "loss": 1.8306, "step": 1073 }, { "epoch": 0.891286307053942, "grad_norm": 6.459455490112305, "learning_rate": 1.96448132780083e-05, "loss": 1.8452, "step": 1074 }, { "epoch": 0.8921161825726142, "grad_norm": 8.85497760772705, "learning_rate": 1.9644481327800832e-05, "loss": 1.4466, "step": 1075 }, { "epoch": 0.8929460580912864, "grad_norm": 7.858205795288086, "learning_rate": 1.9644149377593364e-05, "loss": 1.719, "step": 1076 }, { "epoch": 0.8937759336099586, "grad_norm": 6.800254821777344, "learning_rate": 1.9643817427385893e-05, "loss": 1.2508, "step": 1077 }, { "epoch": 0.8946058091286307, "grad_norm": 12.278005599975586, "learning_rate": 1.9643485477178425e-05, "loss": 2.9581, "step": 1078 }, { "epoch": 0.8954356846473029, "grad_norm": 7.072972297668457, "learning_rate": 1.9643153526970953e-05, "loss": 1.1437, "step": 1079 }, { "epoch": 0.8962655601659751, "grad_norm": 14.452621459960938, "learning_rate": 1.9642821576763486e-05, "loss": 2.0826, "step": 1080 }, { "epoch": 0.8970954356846473, "grad_norm": 11.068501472473145, "learning_rate": 1.9642489626556018e-05, "loss": 2.0001, "step": 1081 }, { "epoch": 0.8979253112033195, "grad_norm": 8.703234672546387, "learning_rate": 1.964215767634855e-05, "loss": 1.4437, "step": 1082 }, { "epoch": 0.8987551867219917, "grad_norm": 7.677877426147461, "learning_rate": 1.964182572614108e-05, "loss": 0.91, "step": 1083 }, { "epoch": 0.8995850622406639, "grad_norm": 7.341625690460205, "learning_rate": 1.964149377593361e-05, "loss": 0.8562, "step": 1084 }, { "epoch": 0.9004149377593361, "grad_norm": 8.102919578552246, "learning_rate": 1.9641161825726143e-05, "loss": 1.6433, "step": 1085 }, { "epoch": 0.9012448132780083, "grad_norm": 12.120625495910645, "learning_rate": 1.9640829875518675e-05, "loss": 1.7007, "step": 1086 }, { "epoch": 0.9020746887966805, "grad_norm": 8.988472938537598, "learning_rate": 1.9640497925311207e-05, "loss": 1.5577, "step": 1087 }, { "epoch": 0.9029045643153527, "grad_norm": 12.991697311401367, "learning_rate": 1.9640165975103736e-05, "loss": 2.2318, "step": 1088 }, { "epoch": 0.9037344398340249, "grad_norm": 12.665489196777344, "learning_rate": 1.9639834024896268e-05, "loss": 1.7622, "step": 1089 }, { "epoch": 0.9045643153526971, "grad_norm": 8.638046264648438, "learning_rate": 1.96395020746888e-05, "loss": 1.6725, "step": 1090 }, { "epoch": 0.9053941908713693, "grad_norm": 11.038932800292969, "learning_rate": 1.9639170124481332e-05, "loss": 1.5495, "step": 1091 }, { "epoch": 0.9062240663900415, "grad_norm": 8.468798637390137, "learning_rate": 1.963883817427386e-05, "loss": 1.3112, "step": 1092 }, { "epoch": 0.9070539419087137, "grad_norm": 8.319005012512207, "learning_rate": 1.9638506224066393e-05, "loss": 1.3783, "step": 1093 }, { "epoch": 0.9078838174273859, "grad_norm": 8.475297927856445, "learning_rate": 1.963817427385892e-05, "loss": 1.4451, "step": 1094 }, { "epoch": 0.9087136929460581, "grad_norm": 8.049338340759277, "learning_rate": 1.9637842323651454e-05, "loss": 1.8044, "step": 1095 }, { "epoch": 0.9095435684647303, "grad_norm": 12.467497825622559, "learning_rate": 1.9637510373443986e-05, "loss": 2.0921, "step": 1096 }, { "epoch": 0.9103734439834025, "grad_norm": 10.269949913024902, "learning_rate": 1.9637178423236514e-05, "loss": 1.344, "step": 1097 }, { "epoch": 0.9112033195020747, "grad_norm": 11.483344078063965, "learning_rate": 1.9636846473029047e-05, "loss": 0.9318, "step": 1098 }, { "epoch": 0.9120331950207469, "grad_norm": 14.027472496032715, "learning_rate": 1.963651452282158e-05, "loss": 1.5546, "step": 1099 }, { "epoch": 0.9128630705394191, "grad_norm": 7.289459705352783, "learning_rate": 1.9636182572614107e-05, "loss": 1.5897, "step": 1100 }, { "epoch": 0.9136929460580913, "grad_norm": 11.149405479431152, "learning_rate": 1.963585062240664e-05, "loss": 1.423, "step": 1101 }, { "epoch": 0.9145228215767635, "grad_norm": 8.928139686584473, "learning_rate": 1.963551867219917e-05, "loss": 1.5195, "step": 1102 }, { "epoch": 0.9153526970954357, "grad_norm": 8.811877250671387, "learning_rate": 1.9635186721991704e-05, "loss": 1.1676, "step": 1103 }, { "epoch": 0.9161825726141078, "grad_norm": 9.088351249694824, "learning_rate": 1.9634854771784232e-05, "loss": 1.8757, "step": 1104 }, { "epoch": 0.91701244813278, "grad_norm": 14.94248104095459, "learning_rate": 1.9634522821576765e-05, "loss": 1.6892, "step": 1105 }, { "epoch": 0.9178423236514522, "grad_norm": 8.41159725189209, "learning_rate": 1.9634190871369297e-05, "loss": 2.1096, "step": 1106 }, { "epoch": 0.9186721991701244, "grad_norm": 6.850897789001465, "learning_rate": 1.963385892116183e-05, "loss": 1.8368, "step": 1107 }, { "epoch": 0.9195020746887966, "grad_norm": 11.140777587890625, "learning_rate": 1.9633526970954357e-05, "loss": 1.8981, "step": 1108 }, { "epoch": 0.9203319502074688, "grad_norm": 7.905435085296631, "learning_rate": 1.963319502074689e-05, "loss": 1.2971, "step": 1109 }, { "epoch": 0.921161825726141, "grad_norm": 9.92632007598877, "learning_rate": 1.963286307053942e-05, "loss": 1.6403, "step": 1110 }, { "epoch": 0.9219917012448133, "grad_norm": 12.060151100158691, "learning_rate": 1.9632531120331954e-05, "loss": 2.0028, "step": 1111 }, { "epoch": 0.9228215767634855, "grad_norm": 13.670514106750488, "learning_rate": 1.9632199170124483e-05, "loss": 1.4117, "step": 1112 }, { "epoch": 0.9236514522821577, "grad_norm": 13.203557968139648, "learning_rate": 1.9631867219917015e-05, "loss": 2.3244, "step": 1113 }, { "epoch": 0.9244813278008299, "grad_norm": 7.941713333129883, "learning_rate": 1.9631535269709547e-05, "loss": 1.0045, "step": 1114 }, { "epoch": 0.9253112033195021, "grad_norm": 9.260502815246582, "learning_rate": 1.9631203319502075e-05, "loss": 1.6537, "step": 1115 }, { "epoch": 0.9261410788381743, "grad_norm": 5.598431587219238, "learning_rate": 1.9630871369294608e-05, "loss": 1.1329, "step": 1116 }, { "epoch": 0.9269709543568465, "grad_norm": 7.84628963470459, "learning_rate": 1.9630539419087136e-05, "loss": 1.2571, "step": 1117 }, { "epoch": 0.9278008298755187, "grad_norm": 11.151373863220215, "learning_rate": 1.963020746887967e-05, "loss": 1.6188, "step": 1118 }, { "epoch": 0.9286307053941909, "grad_norm": 10.507115364074707, "learning_rate": 1.96298755186722e-05, "loss": 1.1385, "step": 1119 }, { "epoch": 0.9294605809128631, "grad_norm": 7.356566905975342, "learning_rate": 1.9629543568464733e-05, "loss": 1.4925, "step": 1120 }, { "epoch": 0.9302904564315353, "grad_norm": 7.131621837615967, "learning_rate": 1.962921161825726e-05, "loss": 1.0718, "step": 1121 }, { "epoch": 0.9311203319502075, "grad_norm": 5.317333221435547, "learning_rate": 1.9628879668049793e-05, "loss": 1.453, "step": 1122 }, { "epoch": 0.9319502074688797, "grad_norm": 11.037420272827148, "learning_rate": 1.9628547717842326e-05, "loss": 1.3411, "step": 1123 }, { "epoch": 0.9327800829875519, "grad_norm": 7.246433734893799, "learning_rate": 1.9628215767634858e-05, "loss": 1.5457, "step": 1124 }, { "epoch": 0.9336099585062241, "grad_norm": 9.744181632995605, "learning_rate": 1.9627883817427386e-05, "loss": 0.9045, "step": 1125 }, { "epoch": 0.9344398340248963, "grad_norm": 8.039041519165039, "learning_rate": 1.962755186721992e-05, "loss": 0.9266, "step": 1126 }, { "epoch": 0.9352697095435685, "grad_norm": 13.927977561950684, "learning_rate": 1.962721991701245e-05, "loss": 2.1639, "step": 1127 }, { "epoch": 0.9360995850622407, "grad_norm": 13.629364967346191, "learning_rate": 1.9626887966804983e-05, "loss": 1.7407, "step": 1128 }, { "epoch": 0.9369294605809129, "grad_norm": 11.53200912475586, "learning_rate": 1.962655601659751e-05, "loss": 1.065, "step": 1129 }, { "epoch": 0.9377593360995851, "grad_norm": 7.056593418121338, "learning_rate": 1.9626224066390044e-05, "loss": 0.9255, "step": 1130 }, { "epoch": 0.9385892116182573, "grad_norm": 12.026193618774414, "learning_rate": 1.9625892116182576e-05, "loss": 1.7606, "step": 1131 }, { "epoch": 0.9394190871369295, "grad_norm": 12.923417091369629, "learning_rate": 1.9625560165975108e-05, "loss": 1.8457, "step": 1132 }, { "epoch": 0.9402489626556016, "grad_norm": 6.492031574249268, "learning_rate": 1.9625228215767636e-05, "loss": 1.0813, "step": 1133 }, { "epoch": 0.9410788381742738, "grad_norm": 10.134482383728027, "learning_rate": 1.962489626556017e-05, "loss": 1.4814, "step": 1134 }, { "epoch": 0.941908713692946, "grad_norm": 6.962988376617432, "learning_rate": 1.9624564315352697e-05, "loss": 1.6219, "step": 1135 }, { "epoch": 0.9427385892116182, "grad_norm": 12.181124687194824, "learning_rate": 1.962423236514523e-05, "loss": 2.0113, "step": 1136 }, { "epoch": 0.9435684647302904, "grad_norm": 7.402917385101318, "learning_rate": 1.962390041493776e-05, "loss": 1.892, "step": 1137 }, { "epoch": 0.9443983402489626, "grad_norm": 9.652159690856934, "learning_rate": 1.962356846473029e-05, "loss": 1.8854, "step": 1138 }, { "epoch": 0.9452282157676348, "grad_norm": 9.993203163146973, "learning_rate": 1.9623236514522822e-05, "loss": 1.2979, "step": 1139 }, { "epoch": 0.946058091286307, "grad_norm": 9.869012832641602, "learning_rate": 1.9622904564315354e-05, "loss": 1.8606, "step": 1140 }, { "epoch": 0.9468879668049792, "grad_norm": 7.5565290451049805, "learning_rate": 1.9622572614107887e-05, "loss": 1.8507, "step": 1141 }, { "epoch": 0.9477178423236514, "grad_norm": 7.970448970794678, "learning_rate": 1.9622240663900415e-05, "loss": 0.712, "step": 1142 }, { "epoch": 0.9485477178423236, "grad_norm": 9.247259140014648, "learning_rate": 1.9621908713692947e-05, "loss": 1.3743, "step": 1143 }, { "epoch": 0.9493775933609958, "grad_norm": 10.870914459228516, "learning_rate": 1.962157676348548e-05, "loss": 1.725, "step": 1144 }, { "epoch": 0.950207468879668, "grad_norm": 7.657266139984131, "learning_rate": 1.962124481327801e-05, "loss": 1.3564, "step": 1145 }, { "epoch": 0.9510373443983402, "grad_norm": 12.44525146484375, "learning_rate": 1.962091286307054e-05, "loss": 1.5985, "step": 1146 }, { "epoch": 0.9518672199170124, "grad_norm": 14.240571975708008, "learning_rate": 1.9620580912863072e-05, "loss": 2.1158, "step": 1147 }, { "epoch": 0.9526970954356846, "grad_norm": 13.605881690979004, "learning_rate": 1.9620248962655604e-05, "loss": 1.976, "step": 1148 }, { "epoch": 0.9535269709543569, "grad_norm": 12.36135482788086, "learning_rate": 1.9619917012448137e-05, "loss": 1.7927, "step": 1149 }, { "epoch": 0.9543568464730291, "grad_norm": 7.712305545806885, "learning_rate": 1.9619585062240665e-05, "loss": 1.7334, "step": 1150 }, { "epoch": 0.9551867219917013, "grad_norm": 8.571516990661621, "learning_rate": 1.9619253112033197e-05, "loss": 1.1563, "step": 1151 }, { "epoch": 0.9560165975103735, "grad_norm": 11.323554039001465, "learning_rate": 1.961892116182573e-05, "loss": 1.8578, "step": 1152 }, { "epoch": 0.9568464730290457, "grad_norm": 11.22138500213623, "learning_rate": 1.9618589211618258e-05, "loss": 1.6092, "step": 1153 }, { "epoch": 0.9576763485477179, "grad_norm": 12.102768898010254, "learning_rate": 1.961825726141079e-05, "loss": 2.1958, "step": 1154 }, { "epoch": 0.9585062240663901, "grad_norm": 12.474087715148926, "learning_rate": 1.961792531120332e-05, "loss": 1.3974, "step": 1155 }, { "epoch": 0.9593360995850623, "grad_norm": 9.393556594848633, "learning_rate": 1.961759336099585e-05, "loss": 2.3058, "step": 1156 }, { "epoch": 0.9601659751037345, "grad_norm": 12.507339477539062, "learning_rate": 1.9617261410788383e-05, "loss": 2.185, "step": 1157 }, { "epoch": 0.9609958506224067, "grad_norm": 6.652568340301514, "learning_rate": 1.9616929460580912e-05, "loss": 1.4906, "step": 1158 }, { "epoch": 0.9618257261410789, "grad_norm": 9.148261070251465, "learning_rate": 1.9616597510373444e-05, "loss": 1.8632, "step": 1159 }, { "epoch": 0.9626556016597511, "grad_norm": 8.941611289978027, "learning_rate": 1.9616265560165976e-05, "loss": 1.4933, "step": 1160 }, { "epoch": 0.9634854771784233, "grad_norm": 5.323235988616943, "learning_rate": 1.961593360995851e-05, "loss": 1.0475, "step": 1161 }, { "epoch": 0.9643153526970955, "grad_norm": 11.610217094421387, "learning_rate": 1.9615601659751037e-05, "loss": 1.8635, "step": 1162 }, { "epoch": 0.9651452282157676, "grad_norm": 7.463394641876221, "learning_rate": 1.961526970954357e-05, "loss": 1.1777, "step": 1163 }, { "epoch": 0.9659751037344398, "grad_norm": 5.993826866149902, "learning_rate": 1.96149377593361e-05, "loss": 1.3624, "step": 1164 }, { "epoch": 0.966804979253112, "grad_norm": 12.622208595275879, "learning_rate": 1.9614605809128633e-05, "loss": 1.9223, "step": 1165 }, { "epoch": 0.9676348547717842, "grad_norm": 10.41330623626709, "learning_rate": 1.9614273858921165e-05, "loss": 0.9344, "step": 1166 }, { "epoch": 0.9684647302904564, "grad_norm": 6.4254679679870605, "learning_rate": 1.9613941908713694e-05, "loss": 1.6221, "step": 1167 }, { "epoch": 0.9692946058091286, "grad_norm": 8.31645679473877, "learning_rate": 1.9613609958506226e-05, "loss": 2.6583, "step": 1168 }, { "epoch": 0.9701244813278008, "grad_norm": 9.011703491210938, "learning_rate": 1.961327800829876e-05, "loss": 1.1664, "step": 1169 }, { "epoch": 0.970954356846473, "grad_norm": 8.8826265335083, "learning_rate": 1.961294605809129e-05, "loss": 1.8831, "step": 1170 }, { "epoch": 0.9717842323651452, "grad_norm": 12.772198677062988, "learning_rate": 1.961261410788382e-05, "loss": 1.956, "step": 1171 }, { "epoch": 0.9726141078838174, "grad_norm": 7.337917804718018, "learning_rate": 1.961228215767635e-05, "loss": 1.0126, "step": 1172 }, { "epoch": 0.9734439834024896, "grad_norm": 12.464527130126953, "learning_rate": 1.961195020746888e-05, "loss": 2.4376, "step": 1173 }, { "epoch": 0.9742738589211618, "grad_norm": 7.8203864097595215, "learning_rate": 1.9611618257261412e-05, "loss": 1.4724, "step": 1174 }, { "epoch": 0.975103734439834, "grad_norm": 16.980052947998047, "learning_rate": 1.9611286307053944e-05, "loss": 2.5642, "step": 1175 }, { "epoch": 0.9759336099585062, "grad_norm": 7.089928150177002, "learning_rate": 1.9610954356846473e-05, "loss": 1.4258, "step": 1176 }, { "epoch": 0.9767634854771784, "grad_norm": 10.712523460388184, "learning_rate": 1.9610622406639005e-05, "loss": 2.1308, "step": 1177 }, { "epoch": 0.9775933609958506, "grad_norm": 12.64175796508789, "learning_rate": 1.9610290456431537e-05, "loss": 1.7469, "step": 1178 }, { "epoch": 0.9784232365145228, "grad_norm": 10.267084121704102, "learning_rate": 1.9609958506224066e-05, "loss": 3.0885, "step": 1179 }, { "epoch": 0.979253112033195, "grad_norm": 10.980965614318848, "learning_rate": 1.9609626556016598e-05, "loss": 1.1867, "step": 1180 }, { "epoch": 0.9800829875518672, "grad_norm": 8.941593170166016, "learning_rate": 1.960929460580913e-05, "loss": 2.0885, "step": 1181 }, { "epoch": 0.9809128630705394, "grad_norm": 6.80097770690918, "learning_rate": 1.9608962655601662e-05, "loss": 1.6937, "step": 1182 }, { "epoch": 0.9817427385892116, "grad_norm": 8.885619163513184, "learning_rate": 1.960863070539419e-05, "loss": 1.7681, "step": 1183 }, { "epoch": 0.9825726141078838, "grad_norm": 10.494258880615234, "learning_rate": 1.9608298755186723e-05, "loss": 1.2304, "step": 1184 }, { "epoch": 0.983402489626556, "grad_norm": 8.968890190124512, "learning_rate": 1.9607966804979255e-05, "loss": 1.4823, "step": 1185 }, { "epoch": 0.9842323651452282, "grad_norm": 7.222068786621094, "learning_rate": 1.9607634854771787e-05, "loss": 2.0161, "step": 1186 }, { "epoch": 0.9850622406639005, "grad_norm": 9.491312980651855, "learning_rate": 1.9607302904564316e-05, "loss": 2.362, "step": 1187 }, { "epoch": 0.9858921161825727, "grad_norm": 9.274510383605957, "learning_rate": 1.9606970954356848e-05, "loss": 2.4733, "step": 1188 }, { "epoch": 0.9867219917012449, "grad_norm": 11.142280578613281, "learning_rate": 1.960663900414938e-05, "loss": 1.4259, "step": 1189 }, { "epoch": 0.9875518672199171, "grad_norm": 8.210805892944336, "learning_rate": 1.9606307053941912e-05, "loss": 1.6663, "step": 1190 }, { "epoch": 0.9883817427385893, "grad_norm": 8.041902542114258, "learning_rate": 1.960597510373444e-05, "loss": 1.509, "step": 1191 }, { "epoch": 0.9892116182572614, "grad_norm": 5.91854190826416, "learning_rate": 1.9605643153526973e-05, "loss": 1.1494, "step": 1192 }, { "epoch": 0.9900414937759336, "grad_norm": 7.414859294891357, "learning_rate": 1.9605311203319505e-05, "loss": 1.9772, "step": 1193 }, { "epoch": 0.9908713692946058, "grad_norm": 8.630481719970703, "learning_rate": 1.9604979253112034e-05, "loss": 1.2604, "step": 1194 }, { "epoch": 0.991701244813278, "grad_norm": 5.5753397941589355, "learning_rate": 1.9604647302904566e-05, "loss": 0.9531, "step": 1195 }, { "epoch": 0.9925311203319502, "grad_norm": 15.51531982421875, "learning_rate": 1.9604315352697095e-05, "loss": 2.3928, "step": 1196 }, { "epoch": 0.9933609958506224, "grad_norm": 8.179166793823242, "learning_rate": 1.9603983402489627e-05, "loss": 1.9686, "step": 1197 }, { "epoch": 0.9941908713692946, "grad_norm": 12.76013469696045, "learning_rate": 1.960365145228216e-05, "loss": 1.8254, "step": 1198 }, { "epoch": 0.9950207468879668, "grad_norm": 9.132487297058105, "learning_rate": 1.960331950207469e-05, "loss": 2.1314, "step": 1199 }, { "epoch": 0.995850622406639, "grad_norm": 5.881124496459961, "learning_rate": 1.960298755186722e-05, "loss": 1.1243, "step": 1200 }, { "epoch": 0.9966804979253112, "grad_norm": 9.89426326751709, "learning_rate": 1.9602655601659752e-05, "loss": 2.2028, "step": 1201 }, { "epoch": 0.9975103734439834, "grad_norm": 11.195515632629395, "learning_rate": 1.9602323651452284e-05, "loss": 2.0128, "step": 1202 }, { "epoch": 0.9983402489626556, "grad_norm": 7.577419281005859, "learning_rate": 1.9601991701244816e-05, "loss": 1.2759, "step": 1203 }, { "epoch": 0.9991701244813278, "grad_norm": 8.465282440185547, "learning_rate": 1.9601659751037345e-05, "loss": 2.7101, "step": 1204 }, { "epoch": 1.0, "grad_norm": 6.406623840332031, "learning_rate": 1.9601327800829877e-05, "loss": 1.1225, "step": 1205 }, { "epoch": 1.0008298755186722, "grad_norm": 7.9376444816589355, "learning_rate": 1.960099585062241e-05, "loss": 1.3204, "step": 1206 }, { "epoch": 1.0016597510373444, "grad_norm": 8.21362590789795, "learning_rate": 1.960066390041494e-05, "loss": 1.1699, "step": 1207 }, { "epoch": 1.0024896265560166, "grad_norm": 11.835687637329102, "learning_rate": 1.960033195020747e-05, "loss": 1.833, "step": 1208 }, { "epoch": 1.0033195020746888, "grad_norm": 9.337054252624512, "learning_rate": 1.9600000000000002e-05, "loss": 1.845, "step": 1209 }, { "epoch": 1.004149377593361, "grad_norm": 10.81097412109375, "learning_rate": 1.9599668049792534e-05, "loss": 1.1784, "step": 1210 }, { "epoch": 1.0049792531120332, "grad_norm": 9.772863388061523, "learning_rate": 1.9599336099585063e-05, "loss": 1.4553, "step": 1211 }, { "epoch": 1.0058091286307054, "grad_norm": 12.056110382080078, "learning_rate": 1.9599004149377595e-05, "loss": 2.9113, "step": 1212 }, { "epoch": 1.0066390041493776, "grad_norm": 14.09991455078125, "learning_rate": 1.9598672199170127e-05, "loss": 1.9362, "step": 1213 }, { "epoch": 1.0074688796680498, "grad_norm": 8.049407958984375, "learning_rate": 1.9598340248962656e-05, "loss": 1.5703, "step": 1214 }, { "epoch": 1.008298755186722, "grad_norm": 6.409671783447266, "learning_rate": 1.9598008298755188e-05, "loss": 1.1386, "step": 1215 }, { "epoch": 1.0091286307053942, "grad_norm": 7.7022786140441895, "learning_rate": 1.959767634854772e-05, "loss": 1.2478, "step": 1216 }, { "epoch": 1.0099585062240664, "grad_norm": 10.611076354980469, "learning_rate": 1.959734439834025e-05, "loss": 1.829, "step": 1217 }, { "epoch": 1.0107883817427386, "grad_norm": 7.726940155029297, "learning_rate": 1.959701244813278e-05, "loss": 1.6712, "step": 1218 }, { "epoch": 1.0116182572614107, "grad_norm": 8.456817626953125, "learning_rate": 1.9596680497925313e-05, "loss": 1.7441, "step": 1219 }, { "epoch": 1.012448132780083, "grad_norm": 6.938722610473633, "learning_rate": 1.9596348547717845e-05, "loss": 1.3418, "step": 1220 }, { "epoch": 1.0132780082987551, "grad_norm": 6.953182220458984, "learning_rate": 1.9596016597510374e-05, "loss": 1.3717, "step": 1221 }, { "epoch": 1.0141078838174273, "grad_norm": 9.242083549499512, "learning_rate": 1.9595684647302906e-05, "loss": 1.1688, "step": 1222 }, { "epoch": 1.0149377593360995, "grad_norm": 6.174468994140625, "learning_rate": 1.9595352697095438e-05, "loss": 1.4704, "step": 1223 }, { "epoch": 1.0157676348547717, "grad_norm": 8.876529693603516, "learning_rate": 1.959502074688797e-05, "loss": 1.4757, "step": 1224 }, { "epoch": 1.016597510373444, "grad_norm": 9.987481117248535, "learning_rate": 1.95946887966805e-05, "loss": 1.8249, "step": 1225 }, { "epoch": 1.0174273858921161, "grad_norm": 8.320985794067383, "learning_rate": 1.959435684647303e-05, "loss": 1.616, "step": 1226 }, { "epoch": 1.0182572614107883, "grad_norm": 11.016260147094727, "learning_rate": 1.9594024896265563e-05, "loss": 1.2133, "step": 1227 }, { "epoch": 1.0190871369294605, "grad_norm": 7.246163845062256, "learning_rate": 1.9593692946058095e-05, "loss": 1.5985, "step": 1228 }, { "epoch": 1.0199170124481327, "grad_norm": 7.9562273025512695, "learning_rate": 1.9593360995850624e-05, "loss": 1.3805, "step": 1229 }, { "epoch": 1.020746887966805, "grad_norm": 9.441693305969238, "learning_rate": 1.9593029045643156e-05, "loss": 1.3252, "step": 1230 }, { "epoch": 1.021576763485477, "grad_norm": 10.749380111694336, "learning_rate": 1.9592697095435688e-05, "loss": 1.337, "step": 1231 }, { "epoch": 1.0224066390041493, "grad_norm": 8.544478416442871, "learning_rate": 1.9592365145228217e-05, "loss": 1.4451, "step": 1232 }, { "epoch": 1.0232365145228215, "grad_norm": 8.33585262298584, "learning_rate": 1.959203319502075e-05, "loss": 2.5483, "step": 1233 }, { "epoch": 1.0240663900414937, "grad_norm": 7.414074897766113, "learning_rate": 1.9591701244813278e-05, "loss": 1.7194, "step": 1234 }, { "epoch": 1.0248962655601659, "grad_norm": 10.076128959655762, "learning_rate": 1.959136929460581e-05, "loss": 1.9209, "step": 1235 }, { "epoch": 1.025726141078838, "grad_norm": 9.02608871459961, "learning_rate": 1.9591037344398342e-05, "loss": 1.7502, "step": 1236 }, { "epoch": 1.0265560165975103, "grad_norm": 13.699504852294922, "learning_rate": 1.959070539419087e-05, "loss": 1.9281, "step": 1237 }, { "epoch": 1.0273858921161825, "grad_norm": 7.9698872566223145, "learning_rate": 1.9590373443983403e-05, "loss": 1.7754, "step": 1238 }, { "epoch": 1.0282157676348547, "grad_norm": 5.164582252502441, "learning_rate": 1.9590041493775935e-05, "loss": 1.0067, "step": 1239 }, { "epoch": 1.0290456431535269, "grad_norm": 12.720795631408691, "learning_rate": 1.9589709543568467e-05, "loss": 2.051, "step": 1240 }, { "epoch": 1.029875518672199, "grad_norm": 7.432298183441162, "learning_rate": 1.9589377593360996e-05, "loss": 1.8456, "step": 1241 }, { "epoch": 1.0307053941908713, "grad_norm": 7.848179340362549, "learning_rate": 1.9589045643153528e-05, "loss": 1.7794, "step": 1242 }, { "epoch": 1.0315352697095435, "grad_norm": 10.56924057006836, "learning_rate": 1.958871369294606e-05, "loss": 1.3449, "step": 1243 }, { "epoch": 1.0323651452282159, "grad_norm": 6.904541969299316, "learning_rate": 1.9588381742738592e-05, "loss": 1.095, "step": 1244 }, { "epoch": 1.033195020746888, "grad_norm": 13.38341236114502, "learning_rate": 1.9588049792531124e-05, "loss": 2.4407, "step": 1245 }, { "epoch": 1.0340248962655603, "grad_norm": 8.907658576965332, "learning_rate": 1.9587717842323653e-05, "loss": 1.209, "step": 1246 }, { "epoch": 1.0348547717842325, "grad_norm": 7.367257595062256, "learning_rate": 1.9587385892116185e-05, "loss": 1.3861, "step": 1247 }, { "epoch": 1.0356846473029047, "grad_norm": 7.578843116760254, "learning_rate": 1.9587053941908717e-05, "loss": 1.4708, "step": 1248 }, { "epoch": 1.0365145228215769, "grad_norm": 11.078348159790039, "learning_rate": 1.958672199170125e-05, "loss": 1.9145, "step": 1249 }, { "epoch": 1.037344398340249, "grad_norm": 6.2126054763793945, "learning_rate": 1.9586390041493778e-05, "loss": 1.2811, "step": 1250 }, { "epoch": 1.0381742738589212, "grad_norm": 7.224891185760498, "learning_rate": 1.958605809128631e-05, "loss": 1.165, "step": 1251 }, { "epoch": 1.0390041493775934, "grad_norm": 12.385437965393066, "learning_rate": 1.958572614107884e-05, "loss": 1.3389, "step": 1252 }, { "epoch": 1.0398340248962656, "grad_norm": 8.676042556762695, "learning_rate": 1.958539419087137e-05, "loss": 1.565, "step": 1253 }, { "epoch": 1.0406639004149378, "grad_norm": 12.983094215393066, "learning_rate": 1.9585062240663903e-05, "loss": 1.3004, "step": 1254 }, { "epoch": 1.04149377593361, "grad_norm": 9.143336296081543, "learning_rate": 1.958473029045643e-05, "loss": 1.5574, "step": 1255 }, { "epoch": 1.0423236514522822, "grad_norm": 14.589420318603516, "learning_rate": 1.9584398340248964e-05, "loss": 1.0291, "step": 1256 }, { "epoch": 1.0431535269709544, "grad_norm": 9.352991104125977, "learning_rate": 1.9584066390041496e-05, "loss": 1.1041, "step": 1257 }, { "epoch": 1.0439834024896266, "grad_norm": 7.2385149002075195, "learning_rate": 1.9583734439834025e-05, "loss": 1.4319, "step": 1258 }, { "epoch": 1.0448132780082988, "grad_norm": 11.156270027160645, "learning_rate": 1.9583402489626557e-05, "loss": 1.6634, "step": 1259 }, { "epoch": 1.045643153526971, "grad_norm": 7.589219093322754, "learning_rate": 1.958307053941909e-05, "loss": 2.0033, "step": 1260 }, { "epoch": 1.0464730290456432, "grad_norm": 10.900761604309082, "learning_rate": 1.958273858921162e-05, "loss": 2.006, "step": 1261 }, { "epoch": 1.0473029045643154, "grad_norm": 5.803115367889404, "learning_rate": 1.958240663900415e-05, "loss": 1.5284, "step": 1262 }, { "epoch": 1.0481327800829876, "grad_norm": 12.925824165344238, "learning_rate": 1.958207468879668e-05, "loss": 1.3888, "step": 1263 }, { "epoch": 1.0489626556016598, "grad_norm": 10.819426536560059, "learning_rate": 1.9581742738589214e-05, "loss": 1.7187, "step": 1264 }, { "epoch": 1.049792531120332, "grad_norm": 7.5981903076171875, "learning_rate": 1.9581410788381746e-05, "loss": 1.4827, "step": 1265 }, { "epoch": 1.0506224066390042, "grad_norm": 6.20528507232666, "learning_rate": 1.9581078838174275e-05, "loss": 1.5871, "step": 1266 }, { "epoch": 1.0514522821576764, "grad_norm": 7.585771560668945, "learning_rate": 1.9580746887966807e-05, "loss": 1.4313, "step": 1267 }, { "epoch": 1.0522821576763486, "grad_norm": 11.178248405456543, "learning_rate": 1.958041493775934e-05, "loss": 2.2121, "step": 1268 }, { "epoch": 1.0531120331950208, "grad_norm": 8.7740478515625, "learning_rate": 1.958008298755187e-05, "loss": 1.9523, "step": 1269 }, { "epoch": 1.053941908713693, "grad_norm": 8.614056587219238, "learning_rate": 1.95797510373444e-05, "loss": 1.2271, "step": 1270 }, { "epoch": 1.0547717842323652, "grad_norm": 7.115377902984619, "learning_rate": 1.9579419087136932e-05, "loss": 1.3689, "step": 1271 }, { "epoch": 1.0556016597510374, "grad_norm": 10.324527740478516, "learning_rate": 1.957908713692946e-05, "loss": 1.9551, "step": 1272 }, { "epoch": 1.0564315352697096, "grad_norm": 6.720996856689453, "learning_rate": 1.9578755186721993e-05, "loss": 1.3484, "step": 1273 }, { "epoch": 1.0572614107883818, "grad_norm": 12.611400604248047, "learning_rate": 1.9578423236514525e-05, "loss": 2.2011, "step": 1274 }, { "epoch": 1.058091286307054, "grad_norm": 5.820699691772461, "learning_rate": 1.9578091286307053e-05, "loss": 1.3286, "step": 1275 }, { "epoch": 1.0589211618257262, "grad_norm": 7.956890106201172, "learning_rate": 1.9577759336099586e-05, "loss": 1.7138, "step": 1276 }, { "epoch": 1.0597510373443984, "grad_norm": 9.752882957458496, "learning_rate": 1.9577427385892118e-05, "loss": 1.9651, "step": 1277 }, { "epoch": 1.0605809128630705, "grad_norm": 13.564322471618652, "learning_rate": 1.957709543568465e-05, "loss": 2.091, "step": 1278 }, { "epoch": 1.0614107883817427, "grad_norm": 12.97313404083252, "learning_rate": 1.957676348547718e-05, "loss": 2.3777, "step": 1279 }, { "epoch": 1.062240663900415, "grad_norm": 14.361461639404297, "learning_rate": 1.957643153526971e-05, "loss": 1.3459, "step": 1280 }, { "epoch": 1.0630705394190871, "grad_norm": 7.849279403686523, "learning_rate": 1.9576099585062243e-05, "loss": 1.3319, "step": 1281 }, { "epoch": 1.0639004149377593, "grad_norm": 9.407218933105469, "learning_rate": 1.9575767634854775e-05, "loss": 1.8352, "step": 1282 }, { "epoch": 1.0647302904564315, "grad_norm": 11.959476470947266, "learning_rate": 1.9575435684647304e-05, "loss": 1.6947, "step": 1283 }, { "epoch": 1.0655601659751037, "grad_norm": 9.934351921081543, "learning_rate": 1.9575103734439836e-05, "loss": 1.9226, "step": 1284 }, { "epoch": 1.066390041493776, "grad_norm": 7.0815958976745605, "learning_rate": 1.9574771784232368e-05, "loss": 1.318, "step": 1285 }, { "epoch": 1.0672199170124481, "grad_norm": 10.776185035705566, "learning_rate": 1.95744398340249e-05, "loss": 0.9034, "step": 1286 }, { "epoch": 1.0680497925311203, "grad_norm": 9.610231399536133, "learning_rate": 1.957410788381743e-05, "loss": 1.6373, "step": 1287 }, { "epoch": 1.0688796680497925, "grad_norm": 11.736823081970215, "learning_rate": 1.957377593360996e-05, "loss": 1.8709, "step": 1288 }, { "epoch": 1.0697095435684647, "grad_norm": 9.322908401489258, "learning_rate": 1.9573443983402493e-05, "loss": 1.6758, "step": 1289 }, { "epoch": 1.070539419087137, "grad_norm": 9.718615531921387, "learning_rate": 1.957311203319502e-05, "loss": 2.438, "step": 1290 }, { "epoch": 1.071369294605809, "grad_norm": 6.499076843261719, "learning_rate": 1.9572780082987554e-05, "loss": 1.5806, "step": 1291 }, { "epoch": 1.0721991701244813, "grad_norm": 8.816822052001953, "learning_rate": 1.9572448132780086e-05, "loss": 1.1169, "step": 1292 }, { "epoch": 1.0730290456431535, "grad_norm": 7.268451690673828, "learning_rate": 1.9572116182572614e-05, "loss": 1.4695, "step": 1293 }, { "epoch": 1.0738589211618257, "grad_norm": 8.94394302368164, "learning_rate": 1.9571784232365147e-05, "loss": 0.6577, "step": 1294 }, { "epoch": 1.0746887966804979, "grad_norm": 12.73840045928955, "learning_rate": 1.9571452282157675e-05, "loss": 2.1353, "step": 1295 }, { "epoch": 1.07551867219917, "grad_norm": 8.579385757446289, "learning_rate": 1.9571120331950207e-05, "loss": 2.199, "step": 1296 }, { "epoch": 1.0763485477178423, "grad_norm": 6.506949424743652, "learning_rate": 1.957078838174274e-05, "loss": 1.437, "step": 1297 }, { "epoch": 1.0771784232365145, "grad_norm": 7.910426139831543, "learning_rate": 1.957045643153527e-05, "loss": 1.8945, "step": 1298 }, { "epoch": 1.0780082987551867, "grad_norm": 12.989954948425293, "learning_rate": 1.9570124481327804e-05, "loss": 2.4679, "step": 1299 }, { "epoch": 1.0788381742738589, "grad_norm": 8.580968856811523, "learning_rate": 1.9569792531120332e-05, "loss": 1.68, "step": 1300 }, { "epoch": 1.079668049792531, "grad_norm": 6.862180233001709, "learning_rate": 1.9569460580912864e-05, "loss": 1.7058, "step": 1301 }, { "epoch": 1.0804979253112033, "grad_norm": 7.230332851409912, "learning_rate": 1.9569128630705397e-05, "loss": 1.3051, "step": 1302 }, { "epoch": 1.0813278008298755, "grad_norm": 9.673539161682129, "learning_rate": 1.956879668049793e-05, "loss": 1.4665, "step": 1303 }, { "epoch": 1.0821576763485476, "grad_norm": 5.522988796234131, "learning_rate": 1.9568464730290457e-05, "loss": 1.5681, "step": 1304 }, { "epoch": 1.0829875518672198, "grad_norm": 8.69428825378418, "learning_rate": 1.956813278008299e-05, "loss": 1.421, "step": 1305 }, { "epoch": 1.083817427385892, "grad_norm": 11.053277015686035, "learning_rate": 1.956780082987552e-05, "loss": 1.4933, "step": 1306 }, { "epoch": 1.0846473029045642, "grad_norm": 11.911993026733398, "learning_rate": 1.9567468879668054e-05, "loss": 1.3909, "step": 1307 }, { "epoch": 1.0854771784232364, "grad_norm": 12.326620101928711, "learning_rate": 1.9567136929460582e-05, "loss": 1.7983, "step": 1308 }, { "epoch": 1.0863070539419086, "grad_norm": 7.082305908203125, "learning_rate": 1.9566804979253115e-05, "loss": 1.3768, "step": 1309 }, { "epoch": 1.0871369294605808, "grad_norm": 14.851119995117188, "learning_rate": 1.9566473029045647e-05, "loss": 2.1864, "step": 1310 }, { "epoch": 1.087966804979253, "grad_norm": 6.452696800231934, "learning_rate": 1.9566141078838175e-05, "loss": 1.3859, "step": 1311 }, { "epoch": 1.0887966804979252, "grad_norm": 8.092888832092285, "learning_rate": 1.9565809128630708e-05, "loss": 0.9091, "step": 1312 }, { "epoch": 1.0896265560165974, "grad_norm": 8.010175704956055, "learning_rate": 1.9565477178423236e-05, "loss": 1.5669, "step": 1313 }, { "epoch": 1.0904564315352696, "grad_norm": 12.003629684448242, "learning_rate": 1.956514522821577e-05, "loss": 1.289, "step": 1314 }, { "epoch": 1.0912863070539418, "grad_norm": 11.766084671020508, "learning_rate": 1.95648132780083e-05, "loss": 1.7064, "step": 1315 }, { "epoch": 1.0921161825726142, "grad_norm": 8.901801109313965, "learning_rate": 1.956448132780083e-05, "loss": 1.352, "step": 1316 }, { "epoch": 1.0929460580912864, "grad_norm": 7.009518623352051, "learning_rate": 1.956414937759336e-05, "loss": 1.2516, "step": 1317 }, { "epoch": 1.0937759336099586, "grad_norm": 7.746376037597656, "learning_rate": 1.9563817427385893e-05, "loss": 1.7884, "step": 1318 }, { "epoch": 1.0946058091286308, "grad_norm": 10.098143577575684, "learning_rate": 1.9563485477178425e-05, "loss": 1.7199, "step": 1319 }, { "epoch": 1.095435684647303, "grad_norm": 7.68332576751709, "learning_rate": 1.9563153526970954e-05, "loss": 1.5011, "step": 1320 }, { "epoch": 1.0962655601659752, "grad_norm": 8.775968551635742, "learning_rate": 1.9562821576763486e-05, "loss": 1.2923, "step": 1321 }, { "epoch": 1.0970954356846474, "grad_norm": 11.886751174926758, "learning_rate": 1.956248962655602e-05, "loss": 2.2631, "step": 1322 }, { "epoch": 1.0979253112033196, "grad_norm": 6.469749450683594, "learning_rate": 1.956215767634855e-05, "loss": 0.8932, "step": 1323 }, { "epoch": 1.0987551867219918, "grad_norm": 7.604263782501221, "learning_rate": 1.956182572614108e-05, "loss": 1.3093, "step": 1324 }, { "epoch": 1.099585062240664, "grad_norm": 8.84247875213623, "learning_rate": 1.956149377593361e-05, "loss": 1.3286, "step": 1325 }, { "epoch": 1.1004149377593362, "grad_norm": 15.669500350952148, "learning_rate": 1.9561161825726143e-05, "loss": 2.103, "step": 1326 }, { "epoch": 1.1012448132780084, "grad_norm": 6.317906856536865, "learning_rate": 1.9560829875518676e-05, "loss": 1.2269, "step": 1327 }, { "epoch": 1.1020746887966806, "grad_norm": 6.511379241943359, "learning_rate": 1.9560497925311204e-05, "loss": 1.6914, "step": 1328 }, { "epoch": 1.1029045643153528, "grad_norm": 6.4059224128723145, "learning_rate": 1.9560165975103736e-05, "loss": 1.0067, "step": 1329 }, { "epoch": 1.103734439834025, "grad_norm": 8.472036361694336, "learning_rate": 1.955983402489627e-05, "loss": 1.884, "step": 1330 }, { "epoch": 1.1045643153526972, "grad_norm": 8.3264799118042, "learning_rate": 1.9559502074688797e-05, "loss": 0.8869, "step": 1331 }, { "epoch": 1.1053941908713694, "grad_norm": 12.522126197814941, "learning_rate": 1.955917012448133e-05, "loss": 1.6328, "step": 1332 }, { "epoch": 1.1062240663900416, "grad_norm": 7.156670093536377, "learning_rate": 1.955883817427386e-05, "loss": 1.3413, "step": 1333 }, { "epoch": 1.1070539419087138, "grad_norm": 11.897173881530762, "learning_rate": 1.955850622406639e-05, "loss": 2.046, "step": 1334 }, { "epoch": 1.107883817427386, "grad_norm": 7.502878189086914, "learning_rate": 1.9558174273858922e-05, "loss": 1.0058, "step": 1335 }, { "epoch": 1.1087136929460581, "grad_norm": 9.02784538269043, "learning_rate": 1.9557842323651454e-05, "loss": 1.8981, "step": 1336 }, { "epoch": 1.1095435684647303, "grad_norm": 8.672867774963379, "learning_rate": 1.9557510373443983e-05, "loss": 1.7468, "step": 1337 }, { "epoch": 1.1103734439834025, "grad_norm": 8.40977668762207, "learning_rate": 1.9557178423236515e-05, "loss": 1.9107, "step": 1338 }, { "epoch": 1.1112033195020747, "grad_norm": 9.11327075958252, "learning_rate": 1.9556846473029047e-05, "loss": 1.4493, "step": 1339 }, { "epoch": 1.112033195020747, "grad_norm": 11.82172966003418, "learning_rate": 1.955651452282158e-05, "loss": 1.3871, "step": 1340 }, { "epoch": 1.1128630705394191, "grad_norm": 9.709136009216309, "learning_rate": 1.9556182572614108e-05, "loss": 2.2042, "step": 1341 }, { "epoch": 1.1136929460580913, "grad_norm": 8.658158302307129, "learning_rate": 1.955585062240664e-05, "loss": 1.658, "step": 1342 }, { "epoch": 1.1145228215767635, "grad_norm": 6.613819122314453, "learning_rate": 1.9555518672199172e-05, "loss": 0.8968, "step": 1343 }, { "epoch": 1.1153526970954357, "grad_norm": 13.895752906799316, "learning_rate": 1.9555186721991704e-05, "loss": 1.3798, "step": 1344 }, { "epoch": 1.116182572614108, "grad_norm": 12.559549331665039, "learning_rate": 1.9554854771784233e-05, "loss": 2.226, "step": 1345 }, { "epoch": 1.1170124481327801, "grad_norm": 7.307908058166504, "learning_rate": 1.9554522821576765e-05, "loss": 1.1225, "step": 1346 }, { "epoch": 1.1178423236514523, "grad_norm": 5.2264556884765625, "learning_rate": 1.9554190871369297e-05, "loss": 1.2422, "step": 1347 }, { "epoch": 1.1186721991701245, "grad_norm": 13.708698272705078, "learning_rate": 1.955385892116183e-05, "loss": 1.3401, "step": 1348 }, { "epoch": 1.1195020746887967, "grad_norm": 5.221086502075195, "learning_rate": 1.9553526970954358e-05, "loss": 1.5394, "step": 1349 }, { "epoch": 1.120331950207469, "grad_norm": 6.663780689239502, "learning_rate": 1.955319502074689e-05, "loss": 1.0153, "step": 1350 }, { "epoch": 1.121161825726141, "grad_norm": 7.2884840965271, "learning_rate": 1.955286307053942e-05, "loss": 1.0611, "step": 1351 }, { "epoch": 1.1219917012448133, "grad_norm": 13.449247360229492, "learning_rate": 1.955253112033195e-05, "loss": 2.345, "step": 1352 }, { "epoch": 1.1228215767634855, "grad_norm": 15.307674407958984, "learning_rate": 1.9552199170124483e-05, "loss": 1.1984, "step": 1353 }, { "epoch": 1.1236514522821577, "grad_norm": 7.215226173400879, "learning_rate": 1.9551867219917012e-05, "loss": 1.3288, "step": 1354 }, { "epoch": 1.1244813278008299, "grad_norm": 6.741346836090088, "learning_rate": 1.9551535269709544e-05, "loss": 1.3764, "step": 1355 }, { "epoch": 1.125311203319502, "grad_norm": 14.286454200744629, "learning_rate": 1.9551203319502076e-05, "loss": 1.151, "step": 1356 }, { "epoch": 1.1261410788381743, "grad_norm": 11.940996170043945, "learning_rate": 1.955087136929461e-05, "loss": 1.6465, "step": 1357 }, { "epoch": 1.1269709543568465, "grad_norm": 10.835439682006836, "learning_rate": 1.9550539419087137e-05, "loss": 1.0964, "step": 1358 }, { "epoch": 1.1278008298755187, "grad_norm": 6.806938648223877, "learning_rate": 1.955020746887967e-05, "loss": 1.3385, "step": 1359 }, { "epoch": 1.1286307053941909, "grad_norm": 10.947266578674316, "learning_rate": 1.95498755186722e-05, "loss": 0.8669, "step": 1360 }, { "epoch": 1.129460580912863, "grad_norm": 7.669120788574219, "learning_rate": 1.9549543568464733e-05, "loss": 1.4374, "step": 1361 }, { "epoch": 1.1302904564315353, "grad_norm": 10.281548500061035, "learning_rate": 1.9549211618257262e-05, "loss": 0.9263, "step": 1362 }, { "epoch": 1.1311203319502074, "grad_norm": 8.798393249511719, "learning_rate": 1.9548879668049794e-05, "loss": 1.3118, "step": 1363 }, { "epoch": 1.1319502074688796, "grad_norm": 9.350452423095703, "learning_rate": 1.9548547717842326e-05, "loss": 1.2851, "step": 1364 }, { "epoch": 1.1327800829875518, "grad_norm": 14.48063850402832, "learning_rate": 1.954821576763486e-05, "loss": 2.2502, "step": 1365 }, { "epoch": 1.133609958506224, "grad_norm": 16.25042724609375, "learning_rate": 1.9547883817427387e-05, "loss": 1.8693, "step": 1366 }, { "epoch": 1.1344398340248962, "grad_norm": 10.667486190795898, "learning_rate": 1.954755186721992e-05, "loss": 1.6733, "step": 1367 }, { "epoch": 1.1352697095435684, "grad_norm": 10.074225425720215, "learning_rate": 1.954721991701245e-05, "loss": 1.8073, "step": 1368 }, { "epoch": 1.1360995850622406, "grad_norm": 11.323653221130371, "learning_rate": 1.954688796680498e-05, "loss": 2.2911, "step": 1369 }, { "epoch": 1.1369294605809128, "grad_norm": 8.908066749572754, "learning_rate": 1.9546556016597512e-05, "loss": 1.8062, "step": 1370 }, { "epoch": 1.137759336099585, "grad_norm": 11.386402130126953, "learning_rate": 1.9546224066390044e-05, "loss": 2.0625, "step": 1371 }, { "epoch": 1.1385892116182572, "grad_norm": 5.37874698638916, "learning_rate": 1.9545892116182573e-05, "loss": 0.984, "step": 1372 }, { "epoch": 1.1394190871369294, "grad_norm": 10.123196601867676, "learning_rate": 1.9545560165975105e-05, "loss": 1.7713, "step": 1373 }, { "epoch": 1.1402489626556016, "grad_norm": 9.140893936157227, "learning_rate": 1.9545228215767634e-05, "loss": 1.7327, "step": 1374 }, { "epoch": 1.1410788381742738, "grad_norm": 10.179215431213379, "learning_rate": 1.9544896265560166e-05, "loss": 1.5935, "step": 1375 }, { "epoch": 1.141908713692946, "grad_norm": 6.30558967590332, "learning_rate": 1.9544564315352698e-05, "loss": 0.7595, "step": 1376 }, { "epoch": 1.1427385892116182, "grad_norm": 10.19792652130127, "learning_rate": 1.954423236514523e-05, "loss": 1.325, "step": 1377 }, { "epoch": 1.1435684647302904, "grad_norm": 11.06634521484375, "learning_rate": 1.9543900414937762e-05, "loss": 1.5083, "step": 1378 }, { "epoch": 1.1443983402489626, "grad_norm": 11.033997535705566, "learning_rate": 1.954356846473029e-05, "loss": 1.7097, "step": 1379 }, { "epoch": 1.1452282157676348, "grad_norm": 12.299509048461914, "learning_rate": 1.9543236514522823e-05, "loss": 1.4374, "step": 1380 }, { "epoch": 1.146058091286307, "grad_norm": 8.816729545593262, "learning_rate": 1.9542904564315355e-05, "loss": 1.4576, "step": 1381 }, { "epoch": 1.1468879668049792, "grad_norm": 9.398429870605469, "learning_rate": 1.9542572614107887e-05, "loss": 1.6557, "step": 1382 }, { "epoch": 1.1477178423236514, "grad_norm": 7.857003211975098, "learning_rate": 1.9542240663900416e-05, "loss": 1.1918, "step": 1383 }, { "epoch": 1.1485477178423236, "grad_norm": 13.41519546508789, "learning_rate": 1.9541908713692948e-05, "loss": 0.9188, "step": 1384 }, { "epoch": 1.1493775933609958, "grad_norm": 6.703701019287109, "learning_rate": 1.954157676348548e-05, "loss": 1.3527, "step": 1385 }, { "epoch": 1.150207468879668, "grad_norm": 10.432761192321777, "learning_rate": 1.9541244813278012e-05, "loss": 2.2871, "step": 1386 }, { "epoch": 1.1510373443983402, "grad_norm": 8.617255210876465, "learning_rate": 1.954091286307054e-05, "loss": 2.0553, "step": 1387 }, { "epoch": 1.1518672199170124, "grad_norm": 10.678892135620117, "learning_rate": 1.9540580912863073e-05, "loss": 1.9709, "step": 1388 }, { "epoch": 1.1526970954356845, "grad_norm": 8.311568260192871, "learning_rate": 1.9540248962655602e-05, "loss": 1.5991, "step": 1389 }, { "epoch": 1.1535269709543567, "grad_norm": 7.983694553375244, "learning_rate": 1.9539917012448134e-05, "loss": 0.6249, "step": 1390 }, { "epoch": 1.154356846473029, "grad_norm": 12.245828628540039, "learning_rate": 1.9539585062240666e-05, "loss": 1.1873, "step": 1391 }, { "epoch": 1.1551867219917011, "grad_norm": 6.067671298980713, "learning_rate": 1.9539253112033195e-05, "loss": 1.2443, "step": 1392 }, { "epoch": 1.1560165975103733, "grad_norm": 9.398874282836914, "learning_rate": 1.9538921161825727e-05, "loss": 1.3528, "step": 1393 }, { "epoch": 1.1568464730290455, "grad_norm": 14.049287796020508, "learning_rate": 1.953858921161826e-05, "loss": 2.7288, "step": 1394 }, { "epoch": 1.1576763485477177, "grad_norm": 8.128776550292969, "learning_rate": 1.9538257261410788e-05, "loss": 1.3571, "step": 1395 }, { "epoch": 1.15850622406639, "grad_norm": 7.9690070152282715, "learning_rate": 1.953792531120332e-05, "loss": 0.9289, "step": 1396 }, { "epoch": 1.1593360995850621, "grad_norm": 5.803670406341553, "learning_rate": 1.9537593360995852e-05, "loss": 1.5815, "step": 1397 }, { "epoch": 1.1601659751037345, "grad_norm": 6.900575160980225, "learning_rate": 1.9537261410788384e-05, "loss": 1.1098, "step": 1398 }, { "epoch": 1.1609958506224067, "grad_norm": 11.414563179016113, "learning_rate": 1.9536929460580913e-05, "loss": 1.2532, "step": 1399 }, { "epoch": 1.161825726141079, "grad_norm": 11.338727951049805, "learning_rate": 1.9536597510373445e-05, "loss": 2.1399, "step": 1400 }, { "epoch": 1.1626556016597511, "grad_norm": 9.426340103149414, "learning_rate": 1.9536265560165977e-05, "loss": 1.6297, "step": 1401 }, { "epoch": 1.1634854771784233, "grad_norm": 6.278823375701904, "learning_rate": 1.953593360995851e-05, "loss": 1.0674, "step": 1402 }, { "epoch": 1.1643153526970955, "grad_norm": 12.232488632202148, "learning_rate": 1.9535601659751038e-05, "loss": 1.2025, "step": 1403 }, { "epoch": 1.1651452282157677, "grad_norm": 13.585097312927246, "learning_rate": 1.953526970954357e-05, "loss": 2.1143, "step": 1404 }, { "epoch": 1.16597510373444, "grad_norm": 8.05331039428711, "learning_rate": 1.9534937759336102e-05, "loss": 1.4824, "step": 1405 }, { "epoch": 1.166804979253112, "grad_norm": 12.309944152832031, "learning_rate": 1.9534605809128634e-05, "loss": 1.745, "step": 1406 }, { "epoch": 1.1676348547717843, "grad_norm": 13.298381805419922, "learning_rate": 1.9534273858921163e-05, "loss": 1.8391, "step": 1407 }, { "epoch": 1.1684647302904565, "grad_norm": 13.132065773010254, "learning_rate": 1.9533941908713695e-05, "loss": 1.9761, "step": 1408 }, { "epoch": 1.1692946058091287, "grad_norm": 7.795182704925537, "learning_rate": 1.9533609958506227e-05, "loss": 1.2051, "step": 1409 }, { "epoch": 1.170124481327801, "grad_norm": 8.186982154846191, "learning_rate": 1.9533278008298756e-05, "loss": 1.0336, "step": 1410 }, { "epoch": 1.170954356846473, "grad_norm": 10.58688735961914, "learning_rate": 1.9532946058091288e-05, "loss": 2.0746, "step": 1411 }, { "epoch": 1.1717842323651453, "grad_norm": 8.646858215332031, "learning_rate": 1.9532614107883817e-05, "loss": 0.8881, "step": 1412 }, { "epoch": 1.1726141078838175, "grad_norm": 7.923764705657959, "learning_rate": 1.953228215767635e-05, "loss": 1.6543, "step": 1413 }, { "epoch": 1.1734439834024897, "grad_norm": 11.030084609985352, "learning_rate": 1.953195020746888e-05, "loss": 1.0486, "step": 1414 }, { "epoch": 1.1742738589211619, "grad_norm": 8.922853469848633, "learning_rate": 1.9531618257261413e-05, "loss": 1.2688, "step": 1415 }, { "epoch": 1.175103734439834, "grad_norm": 8.775773048400879, "learning_rate": 1.953128630705394e-05, "loss": 1.6668, "step": 1416 }, { "epoch": 1.1759336099585063, "grad_norm": 9.656767845153809, "learning_rate": 1.9530954356846474e-05, "loss": 1.2065, "step": 1417 }, { "epoch": 1.1767634854771785, "grad_norm": 14.272638320922852, "learning_rate": 1.9530622406639006e-05, "loss": 2.3734, "step": 1418 }, { "epoch": 1.1775933609958507, "grad_norm": 7.4946417808532715, "learning_rate": 1.9530290456431538e-05, "loss": 0.9921, "step": 1419 }, { "epoch": 1.1784232365145229, "grad_norm": 11.029170989990234, "learning_rate": 1.9529958506224067e-05, "loss": 1.487, "step": 1420 }, { "epoch": 1.179253112033195, "grad_norm": 12.757689476013184, "learning_rate": 1.95296265560166e-05, "loss": 2.7793, "step": 1421 }, { "epoch": 1.1800829875518672, "grad_norm": 14.730562210083008, "learning_rate": 1.952929460580913e-05, "loss": 1.8835, "step": 1422 }, { "epoch": 1.1809128630705394, "grad_norm": 10.296401977539062, "learning_rate": 1.9528962655601663e-05, "loss": 1.0523, "step": 1423 }, { "epoch": 1.1817427385892116, "grad_norm": 12.560590744018555, "learning_rate": 1.9528630705394192e-05, "loss": 2.2925, "step": 1424 }, { "epoch": 1.1825726141078838, "grad_norm": 5.043403625488281, "learning_rate": 1.9528298755186724e-05, "loss": 1.1588, "step": 1425 }, { "epoch": 1.183402489626556, "grad_norm": 6.62839937210083, "learning_rate": 1.9527966804979256e-05, "loss": 1.5732, "step": 1426 }, { "epoch": 1.1842323651452282, "grad_norm": 16.726165771484375, "learning_rate": 1.9527634854771788e-05, "loss": 1.635, "step": 1427 }, { "epoch": 1.1850622406639004, "grad_norm": 7.374053001403809, "learning_rate": 1.9527302904564317e-05, "loss": 1.1683, "step": 1428 }, { "epoch": 1.1858921161825726, "grad_norm": 12.293844223022461, "learning_rate": 1.952697095435685e-05, "loss": 1.4384, "step": 1429 }, { "epoch": 1.1867219917012448, "grad_norm": 8.10482406616211, "learning_rate": 1.9526639004149378e-05, "loss": 1.2501, "step": 1430 }, { "epoch": 1.187551867219917, "grad_norm": 14.853662490844727, "learning_rate": 1.952630705394191e-05, "loss": 3.1407, "step": 1431 }, { "epoch": 1.1883817427385892, "grad_norm": 7.575500965118408, "learning_rate": 1.9525975103734442e-05, "loss": 1.3209, "step": 1432 }, { "epoch": 1.1892116182572614, "grad_norm": 15.161070823669434, "learning_rate": 1.952564315352697e-05, "loss": 1.7054, "step": 1433 }, { "epoch": 1.1900414937759336, "grad_norm": 6.202317714691162, "learning_rate": 1.9525311203319503e-05, "loss": 1.0859, "step": 1434 }, { "epoch": 1.1908713692946058, "grad_norm": 7.594390869140625, "learning_rate": 1.9524979253112035e-05, "loss": 1.3271, "step": 1435 }, { "epoch": 1.191701244813278, "grad_norm": 9.848727226257324, "learning_rate": 1.9524647302904567e-05, "loss": 1.1077, "step": 1436 }, { "epoch": 1.1925311203319502, "grad_norm": 11.037665367126465, "learning_rate": 1.9524315352697096e-05, "loss": 1.9782, "step": 1437 }, { "epoch": 1.1933609958506224, "grad_norm": 6.64110803604126, "learning_rate": 1.9523983402489628e-05, "loss": 1.2506, "step": 1438 }, { "epoch": 1.1941908713692946, "grad_norm": 14.005292892456055, "learning_rate": 1.952365145228216e-05, "loss": 1.9377, "step": 1439 }, { "epoch": 1.1950207468879668, "grad_norm": 5.822318077087402, "learning_rate": 1.9523319502074692e-05, "loss": 1.3931, "step": 1440 }, { "epoch": 1.195850622406639, "grad_norm": 13.535847663879395, "learning_rate": 1.952298755186722e-05, "loss": 1.1856, "step": 1441 }, { "epoch": 1.1966804979253112, "grad_norm": 13.421255111694336, "learning_rate": 1.9522655601659753e-05, "loss": 0.95, "step": 1442 }, { "epoch": 1.1975103734439834, "grad_norm": 9.771718978881836, "learning_rate": 1.9522323651452285e-05, "loss": 1.8171, "step": 1443 }, { "epoch": 1.1983402489626556, "grad_norm": 8.482172966003418, "learning_rate": 1.9521991701244817e-05, "loss": 1.6367, "step": 1444 }, { "epoch": 1.1991701244813278, "grad_norm": 7.370646953582764, "learning_rate": 1.9521659751037346e-05, "loss": 1.3728, "step": 1445 }, { "epoch": 1.2, "grad_norm": 5.991360664367676, "learning_rate": 1.9521327800829878e-05, "loss": 1.2847, "step": 1446 }, { "epoch": 1.2008298755186722, "grad_norm": 8.605517387390137, "learning_rate": 1.952099585062241e-05, "loss": 1.4678, "step": 1447 }, { "epoch": 1.2016597510373443, "grad_norm": 11.212103843688965, "learning_rate": 1.952066390041494e-05, "loss": 1.23, "step": 1448 }, { "epoch": 1.2024896265560165, "grad_norm": 11.346314430236816, "learning_rate": 1.952033195020747e-05, "loss": 2.0386, "step": 1449 }, { "epoch": 1.2033195020746887, "grad_norm": 9.00829029083252, "learning_rate": 1.9520000000000003e-05, "loss": 2.2713, "step": 1450 }, { "epoch": 1.204149377593361, "grad_norm": 12.769853591918945, "learning_rate": 1.951966804979253e-05, "loss": 1.5284, "step": 1451 }, { "epoch": 1.2049792531120331, "grad_norm": 8.782848358154297, "learning_rate": 1.9519336099585064e-05, "loss": 1.7847, "step": 1452 }, { "epoch": 1.2058091286307053, "grad_norm": 10.5407133102417, "learning_rate": 1.9519004149377592e-05, "loss": 2.0421, "step": 1453 }, { "epoch": 1.2066390041493775, "grad_norm": 10.123241424560547, "learning_rate": 1.9518672199170124e-05, "loss": 1.6744, "step": 1454 }, { "epoch": 1.2074688796680497, "grad_norm": 10.25167179107666, "learning_rate": 1.9518340248962657e-05, "loss": 1.0892, "step": 1455 }, { "epoch": 1.208298755186722, "grad_norm": 10.509990692138672, "learning_rate": 1.951800829875519e-05, "loss": 1.4159, "step": 1456 }, { "epoch": 1.2091286307053941, "grad_norm": 11.474100112915039, "learning_rate": 1.951767634854772e-05, "loss": 1.6197, "step": 1457 }, { "epoch": 1.2099585062240663, "grad_norm": 9.373014450073242, "learning_rate": 1.951734439834025e-05, "loss": 1.4541, "step": 1458 }, { "epoch": 1.2107883817427385, "grad_norm": 11.685567855834961, "learning_rate": 1.951701244813278e-05, "loss": 1.2859, "step": 1459 }, { "epoch": 1.2116182572614107, "grad_norm": 7.345618724822998, "learning_rate": 1.9516680497925314e-05, "loss": 1.3367, "step": 1460 }, { "epoch": 1.212448132780083, "grad_norm": 8.459707260131836, "learning_rate": 1.9516348547717846e-05, "loss": 1.9171, "step": 1461 }, { "epoch": 1.213278008298755, "grad_norm": 11.134950637817383, "learning_rate": 1.9516016597510375e-05, "loss": 1.7684, "step": 1462 }, { "epoch": 1.2141078838174273, "grad_norm": 6.915536403656006, "learning_rate": 1.9515684647302907e-05, "loss": 1.1834, "step": 1463 }, { "epoch": 1.2149377593360997, "grad_norm": 8.069690704345703, "learning_rate": 1.951535269709544e-05, "loss": 1.6456, "step": 1464 }, { "epoch": 1.215767634854772, "grad_norm": 8.485416412353516, "learning_rate": 1.951502074688797e-05, "loss": 1.3686, "step": 1465 }, { "epoch": 1.216597510373444, "grad_norm": 10.030961036682129, "learning_rate": 1.95146887966805e-05, "loss": 2.1609, "step": 1466 }, { "epoch": 1.2174273858921163, "grad_norm": 10.975284576416016, "learning_rate": 1.9514356846473032e-05, "loss": 1.33, "step": 1467 }, { "epoch": 1.2182572614107885, "grad_norm": 13.347543716430664, "learning_rate": 1.951402489626556e-05, "loss": 1.1945, "step": 1468 }, { "epoch": 1.2190871369294607, "grad_norm": 12.214614868164062, "learning_rate": 1.9513692946058093e-05, "loss": 1.484, "step": 1469 }, { "epoch": 1.2199170124481329, "grad_norm": 10.14606761932373, "learning_rate": 1.9513360995850625e-05, "loss": 1.9451, "step": 1470 }, { "epoch": 1.220746887966805, "grad_norm": 12.780320167541504, "learning_rate": 1.9513029045643153e-05, "loss": 1.9594, "step": 1471 }, { "epoch": 1.2215767634854773, "grad_norm": 12.804708480834961, "learning_rate": 1.9512697095435685e-05, "loss": 1.0045, "step": 1472 }, { "epoch": 1.2224066390041495, "grad_norm": 11.110213279724121, "learning_rate": 1.9512365145228218e-05, "loss": 1.4816, "step": 1473 }, { "epoch": 1.2232365145228217, "grad_norm": 7.216926574707031, "learning_rate": 1.9512033195020746e-05, "loss": 1.6175, "step": 1474 }, { "epoch": 1.2240663900414939, "grad_norm": 11.234636306762695, "learning_rate": 1.951170124481328e-05, "loss": 1.4034, "step": 1475 }, { "epoch": 1.224896265560166, "grad_norm": 11.389232635498047, "learning_rate": 1.951136929460581e-05, "loss": 1.2813, "step": 1476 }, { "epoch": 1.2257261410788383, "grad_norm": 9.317341804504395, "learning_rate": 1.9511037344398343e-05, "loss": 1.9218, "step": 1477 }, { "epoch": 1.2265560165975105, "grad_norm": 10.275772094726562, "learning_rate": 1.951070539419087e-05, "loss": 1.5195, "step": 1478 }, { "epoch": 1.2273858921161827, "grad_norm": 12.122617721557617, "learning_rate": 1.9510373443983403e-05, "loss": 1.8415, "step": 1479 }, { "epoch": 1.2282157676348548, "grad_norm": 15.382635116577148, "learning_rate": 1.9510041493775936e-05, "loss": 1.8524, "step": 1480 }, { "epoch": 1.229045643153527, "grad_norm": 7.7138285636901855, "learning_rate": 1.9509709543568468e-05, "loss": 1.1816, "step": 1481 }, { "epoch": 1.2298755186721992, "grad_norm": 11.990979194641113, "learning_rate": 1.9509377593360996e-05, "loss": 2.0832, "step": 1482 }, { "epoch": 1.2307053941908714, "grad_norm": 9.440702438354492, "learning_rate": 1.950904564315353e-05, "loss": 1.6045, "step": 1483 }, { "epoch": 1.2315352697095436, "grad_norm": 9.289656639099121, "learning_rate": 1.950871369294606e-05, "loss": 1.415, "step": 1484 }, { "epoch": 1.2323651452282158, "grad_norm": 9.108003616333008, "learning_rate": 1.9508381742738593e-05, "loss": 1.5007, "step": 1485 }, { "epoch": 1.233195020746888, "grad_norm": 14.952779769897461, "learning_rate": 1.950804979253112e-05, "loss": 2.897, "step": 1486 }, { "epoch": 1.2340248962655602, "grad_norm": 10.266624450683594, "learning_rate": 1.9507717842323654e-05, "loss": 1.5673, "step": 1487 }, { "epoch": 1.2348547717842324, "grad_norm": 12.024714469909668, "learning_rate": 1.9507385892116186e-05, "loss": 2.0141, "step": 1488 }, { "epoch": 1.2356846473029046, "grad_norm": 7.9349045753479, "learning_rate": 1.9507053941908714e-05, "loss": 1.0052, "step": 1489 }, { "epoch": 1.2365145228215768, "grad_norm": 8.129036903381348, "learning_rate": 1.9506721991701246e-05, "loss": 1.1115, "step": 1490 }, { "epoch": 1.237344398340249, "grad_norm": 11.098745346069336, "learning_rate": 1.9506390041493775e-05, "loss": 1.1979, "step": 1491 }, { "epoch": 1.2381742738589212, "grad_norm": 7.494071006774902, "learning_rate": 1.9506058091286307e-05, "loss": 1.5926, "step": 1492 }, { "epoch": 1.2390041493775934, "grad_norm": 15.633931159973145, "learning_rate": 1.950572614107884e-05, "loss": 1.3215, "step": 1493 }, { "epoch": 1.2398340248962656, "grad_norm": 7.311223983764648, "learning_rate": 1.950539419087137e-05, "loss": 1.3581, "step": 1494 }, { "epoch": 1.2406639004149378, "grad_norm": 13.561467170715332, "learning_rate": 1.95050622406639e-05, "loss": 1.6164, "step": 1495 }, { "epoch": 1.24149377593361, "grad_norm": 7.52053689956665, "learning_rate": 1.9504730290456432e-05, "loss": 1.1437, "step": 1496 }, { "epoch": 1.2423236514522822, "grad_norm": 9.16651439666748, "learning_rate": 1.9504398340248964e-05, "loss": 1.2497, "step": 1497 }, { "epoch": 1.2431535269709544, "grad_norm": 13.686694145202637, "learning_rate": 1.9504066390041497e-05, "loss": 1.8669, "step": 1498 }, { "epoch": 1.2439834024896266, "grad_norm": 6.591301441192627, "learning_rate": 1.9503734439834025e-05, "loss": 1.3257, "step": 1499 }, { "epoch": 1.2448132780082988, "grad_norm": 6.223268508911133, "learning_rate": 1.9503402489626557e-05, "loss": 0.9378, "step": 1500 }, { "epoch": 1.245643153526971, "grad_norm": 7.387204647064209, "learning_rate": 1.950307053941909e-05, "loss": 1.0554, "step": 1501 }, { "epoch": 1.2464730290456432, "grad_norm": 10.053179740905762, "learning_rate": 1.950273858921162e-05, "loss": 1.1643, "step": 1502 }, { "epoch": 1.2473029045643154, "grad_norm": 8.099894523620605, "learning_rate": 1.950240663900415e-05, "loss": 1.7343, "step": 1503 }, { "epoch": 1.2481327800829876, "grad_norm": 9.499972343444824, "learning_rate": 1.9502074688796682e-05, "loss": 1.0079, "step": 1504 }, { "epoch": 1.2489626556016598, "grad_norm": 8.704476356506348, "learning_rate": 1.9501742738589215e-05, "loss": 1.3461, "step": 1505 }, { "epoch": 1.249792531120332, "grad_norm": 8.369412422180176, "learning_rate": 1.9501410788381743e-05, "loss": 1.2105, "step": 1506 }, { "epoch": 1.2506224066390041, "grad_norm": 13.62077808380127, "learning_rate": 1.9501078838174275e-05, "loss": 1.7707, "step": 1507 }, { "epoch": 1.2514522821576763, "grad_norm": 14.265397071838379, "learning_rate": 1.9500746887966807e-05, "loss": 1.6811, "step": 1508 }, { "epoch": 1.2522821576763485, "grad_norm": 8.035451889038086, "learning_rate": 1.9500414937759336e-05, "loss": 0.6404, "step": 1509 }, { "epoch": 1.2531120331950207, "grad_norm": 7.606302261352539, "learning_rate": 1.950008298755187e-05, "loss": 1.0574, "step": 1510 }, { "epoch": 1.253941908713693, "grad_norm": 9.147083282470703, "learning_rate": 1.94997510373444e-05, "loss": 1.1994, "step": 1511 }, { "epoch": 1.2547717842323651, "grad_norm": 15.432548522949219, "learning_rate": 1.949941908713693e-05, "loss": 2.1078, "step": 1512 }, { "epoch": 1.2556016597510373, "grad_norm": 10.105053901672363, "learning_rate": 1.949908713692946e-05, "loss": 1.552, "step": 1513 }, { "epoch": 1.2564315352697095, "grad_norm": 9.800429344177246, "learning_rate": 1.9498755186721993e-05, "loss": 1.0674, "step": 1514 }, { "epoch": 1.2572614107883817, "grad_norm": 14.344225883483887, "learning_rate": 1.9498423236514525e-05, "loss": 2.5474, "step": 1515 }, { "epoch": 1.258091286307054, "grad_norm": 8.363912582397461, "learning_rate": 1.9498091286307054e-05, "loss": 1.0133, "step": 1516 }, { "epoch": 1.258921161825726, "grad_norm": 11.58470630645752, "learning_rate": 1.9497759336099586e-05, "loss": 1.4076, "step": 1517 }, { "epoch": 1.2597510373443983, "grad_norm": 5.82852029800415, "learning_rate": 1.949742738589212e-05, "loss": 1.3508, "step": 1518 }, { "epoch": 1.2605809128630705, "grad_norm": 10.06196117401123, "learning_rate": 1.949709543568465e-05, "loss": 1.3177, "step": 1519 }, { "epoch": 1.2614107883817427, "grad_norm": 6.817223072052002, "learning_rate": 1.949676348547718e-05, "loss": 1.4613, "step": 1520 }, { "epoch": 1.262240663900415, "grad_norm": 6.821423053741455, "learning_rate": 1.949643153526971e-05, "loss": 1.352, "step": 1521 }, { "epoch": 1.263070539419087, "grad_norm": 11.791996002197266, "learning_rate": 1.9496099585062243e-05, "loss": 1.4449, "step": 1522 }, { "epoch": 1.2639004149377593, "grad_norm": 8.419824600219727, "learning_rate": 1.9495767634854776e-05, "loss": 0.7122, "step": 1523 }, { "epoch": 1.2647302904564315, "grad_norm": 11.439078330993652, "learning_rate": 1.9495435684647304e-05, "loss": 1.0424, "step": 1524 }, { "epoch": 1.2655601659751037, "grad_norm": 13.486656188964844, "learning_rate": 1.9495103734439836e-05, "loss": 2.5821, "step": 1525 }, { "epoch": 1.2663900414937759, "grad_norm": 9.408564567565918, "learning_rate": 1.949477178423237e-05, "loss": 1.6112, "step": 1526 }, { "epoch": 1.267219917012448, "grad_norm": 7.906382083892822, "learning_rate": 1.9494439834024897e-05, "loss": 1.6418, "step": 1527 }, { "epoch": 1.2680497925311203, "grad_norm": 8.255053520202637, "learning_rate": 1.949410788381743e-05, "loss": 1.6785, "step": 1528 }, { "epoch": 1.2688796680497925, "grad_norm": 9.537044525146484, "learning_rate": 1.9493775933609958e-05, "loss": 1.1738, "step": 1529 }, { "epoch": 1.2697095435684647, "grad_norm": 8.16978645324707, "learning_rate": 1.949344398340249e-05, "loss": 0.9143, "step": 1530 }, { "epoch": 1.2705394190871369, "grad_norm": 9.242683410644531, "learning_rate": 1.9493112033195022e-05, "loss": 1.1189, "step": 1531 }, { "epoch": 1.271369294605809, "grad_norm": 5.629144191741943, "learning_rate": 1.949278008298755e-05, "loss": 0.9364, "step": 1532 }, { "epoch": 1.2721991701244812, "grad_norm": 15.612404823303223, "learning_rate": 1.9492448132780083e-05, "loss": 2.3431, "step": 1533 }, { "epoch": 1.2730290456431534, "grad_norm": 7.629666328430176, "learning_rate": 1.9492116182572615e-05, "loss": 1.0855, "step": 1534 }, { "epoch": 1.2738589211618256, "grad_norm": 6.50730562210083, "learning_rate": 1.9491784232365147e-05, "loss": 1.0677, "step": 1535 }, { "epoch": 1.2746887966804978, "grad_norm": 6.8207526206970215, "learning_rate": 1.949145228215768e-05, "loss": 1.2931, "step": 1536 }, { "epoch": 1.27551867219917, "grad_norm": 10.3192720413208, "learning_rate": 1.9491120331950208e-05, "loss": 1.3506, "step": 1537 }, { "epoch": 1.2763485477178422, "grad_norm": 13.612898826599121, "learning_rate": 1.949078838174274e-05, "loss": 1.4338, "step": 1538 }, { "epoch": 1.2771784232365144, "grad_norm": 9.768418312072754, "learning_rate": 1.9490456431535272e-05, "loss": 1.2786, "step": 1539 }, { "epoch": 1.2780082987551866, "grad_norm": 12.457313537597656, "learning_rate": 1.9490124481327804e-05, "loss": 1.8662, "step": 1540 }, { "epoch": 1.2788381742738588, "grad_norm": 10.078165054321289, "learning_rate": 1.9489792531120333e-05, "loss": 1.7391, "step": 1541 }, { "epoch": 1.279668049792531, "grad_norm": 9.125035285949707, "learning_rate": 1.9489460580912865e-05, "loss": 1.7541, "step": 1542 }, { "epoch": 1.2804979253112032, "grad_norm": 12.970154762268066, "learning_rate": 1.9489128630705397e-05, "loss": 1.9517, "step": 1543 }, { "epoch": 1.2813278008298754, "grad_norm": 6.430961608886719, "learning_rate": 1.948879668049793e-05, "loss": 1.2031, "step": 1544 }, { "epoch": 1.2821576763485476, "grad_norm": 6.170076847076416, "learning_rate": 1.9488464730290458e-05, "loss": 0.9941, "step": 1545 }, { "epoch": 1.2829875518672198, "grad_norm": 20.963470458984375, "learning_rate": 1.948813278008299e-05, "loss": 2.3224, "step": 1546 }, { "epoch": 1.283817427385892, "grad_norm": 8.834589004516602, "learning_rate": 1.948780082987552e-05, "loss": 1.5395, "step": 1547 }, { "epoch": 1.2846473029045642, "grad_norm": 12.440603256225586, "learning_rate": 1.948746887966805e-05, "loss": 2.3328, "step": 1548 }, { "epoch": 1.2854771784232364, "grad_norm": 15.440217018127441, "learning_rate": 1.9487136929460583e-05, "loss": 1.9742, "step": 1549 }, { "epoch": 1.2863070539419086, "grad_norm": 15.3441162109375, "learning_rate": 1.9486804979253112e-05, "loss": 1.6858, "step": 1550 }, { "epoch": 1.2871369294605808, "grad_norm": 9.414998054504395, "learning_rate": 1.9486473029045644e-05, "loss": 1.7919, "step": 1551 }, { "epoch": 1.287966804979253, "grad_norm": 12.325206756591797, "learning_rate": 1.9486141078838176e-05, "loss": 1.7485, "step": 1552 }, { "epoch": 1.2887966804979252, "grad_norm": 9.210448265075684, "learning_rate": 1.9485809128630705e-05, "loss": 1.1165, "step": 1553 }, { "epoch": 1.2896265560165976, "grad_norm": 7.73524284362793, "learning_rate": 1.9485477178423237e-05, "loss": 1.6331, "step": 1554 }, { "epoch": 1.2904564315352698, "grad_norm": 10.341755867004395, "learning_rate": 1.948514522821577e-05, "loss": 1.4892, "step": 1555 }, { "epoch": 1.291286307053942, "grad_norm": 7.627194404602051, "learning_rate": 1.94848132780083e-05, "loss": 0.8961, "step": 1556 }, { "epoch": 1.2921161825726142, "grad_norm": 10.489337921142578, "learning_rate": 1.948448132780083e-05, "loss": 1.1632, "step": 1557 }, { "epoch": 1.2929460580912864, "grad_norm": 7.602052211761475, "learning_rate": 1.9484149377593362e-05, "loss": 1.0147, "step": 1558 }, { "epoch": 1.2937759336099586, "grad_norm": 11.499402046203613, "learning_rate": 1.9483817427385894e-05, "loss": 1.7175, "step": 1559 }, { "epoch": 1.2946058091286308, "grad_norm": 9.768735885620117, "learning_rate": 1.9483485477178426e-05, "loss": 1.628, "step": 1560 }, { "epoch": 1.295435684647303, "grad_norm": 12.245067596435547, "learning_rate": 1.9483153526970955e-05, "loss": 1.9387, "step": 1561 }, { "epoch": 1.2962655601659752, "grad_norm": 7.433518409729004, "learning_rate": 1.9482821576763487e-05, "loss": 1.4616, "step": 1562 }, { "epoch": 1.2970954356846474, "grad_norm": 11.0540771484375, "learning_rate": 1.948248962655602e-05, "loss": 1.5124, "step": 1563 }, { "epoch": 1.2979253112033196, "grad_norm": 10.116069793701172, "learning_rate": 1.948215767634855e-05, "loss": 1.9581, "step": 1564 }, { "epoch": 1.2987551867219918, "grad_norm": 12.828059196472168, "learning_rate": 1.948182572614108e-05, "loss": 1.6532, "step": 1565 }, { "epoch": 1.299585062240664, "grad_norm": 10.569860458374023, "learning_rate": 1.9481493775933612e-05, "loss": 1.0751, "step": 1566 }, { "epoch": 1.3004149377593361, "grad_norm": 8.303494453430176, "learning_rate": 1.9481161825726144e-05, "loss": 1.6387, "step": 1567 }, { "epoch": 1.3012448132780083, "grad_norm": 8.382455825805664, "learning_rate": 1.9480829875518673e-05, "loss": 1.6528, "step": 1568 }, { "epoch": 1.3020746887966805, "grad_norm": 9.75537109375, "learning_rate": 1.9480497925311205e-05, "loss": 1.7167, "step": 1569 }, { "epoch": 1.3029045643153527, "grad_norm": 12.551398277282715, "learning_rate": 1.9480165975103734e-05, "loss": 1.7897, "step": 1570 }, { "epoch": 1.303734439834025, "grad_norm": 14.330484390258789, "learning_rate": 1.9479834024896266e-05, "loss": 2.4979, "step": 1571 }, { "epoch": 1.3045643153526971, "grad_norm": 10.632245063781738, "learning_rate": 1.9479502074688798e-05, "loss": 1.8537, "step": 1572 }, { "epoch": 1.3053941908713693, "grad_norm": 12.99238109588623, "learning_rate": 1.947917012448133e-05, "loss": 2.1787, "step": 1573 }, { "epoch": 1.3062240663900415, "grad_norm": 8.460664749145508, "learning_rate": 1.947883817427386e-05, "loss": 1.1643, "step": 1574 }, { "epoch": 1.3070539419087137, "grad_norm": 7.250710964202881, "learning_rate": 1.947850622406639e-05, "loss": 1.2942, "step": 1575 }, { "epoch": 1.307883817427386, "grad_norm": 6.919223785400391, "learning_rate": 1.9478174273858923e-05, "loss": 1.5411, "step": 1576 }, { "epoch": 1.308713692946058, "grad_norm": 11.267199516296387, "learning_rate": 1.9477842323651455e-05, "loss": 2.0255, "step": 1577 }, { "epoch": 1.3095435684647303, "grad_norm": 7.833077907562256, "learning_rate": 1.9477510373443984e-05, "loss": 1.5576, "step": 1578 }, { "epoch": 1.3103734439834025, "grad_norm": 10.784452438354492, "learning_rate": 1.9477178423236516e-05, "loss": 2.0924, "step": 1579 }, { "epoch": 1.3112033195020747, "grad_norm": 13.133220672607422, "learning_rate": 1.9476846473029048e-05, "loss": 2.3076, "step": 1580 }, { "epoch": 1.312033195020747, "grad_norm": 6.166381359100342, "learning_rate": 1.947651452282158e-05, "loss": 0.8969, "step": 1581 }, { "epoch": 1.312863070539419, "grad_norm": 15.235029220581055, "learning_rate": 1.947618257261411e-05, "loss": 2.2171, "step": 1582 }, { "epoch": 1.3136929460580913, "grad_norm": 14.040870666503906, "learning_rate": 1.947585062240664e-05, "loss": 1.0608, "step": 1583 }, { "epoch": 1.3145228215767635, "grad_norm": 7.523036956787109, "learning_rate": 1.9475518672199173e-05, "loss": 1.1363, "step": 1584 }, { "epoch": 1.3153526970954357, "grad_norm": 7.99989652633667, "learning_rate": 1.9475186721991702e-05, "loss": 1.3908, "step": 1585 }, { "epoch": 1.3161825726141079, "grad_norm": 9.935023307800293, "learning_rate": 1.9474854771784234e-05, "loss": 1.5975, "step": 1586 }, { "epoch": 1.31701244813278, "grad_norm": 6.838608741760254, "learning_rate": 1.9474522821576766e-05, "loss": 1.1479, "step": 1587 }, { "epoch": 1.3178423236514523, "grad_norm": 5.949337482452393, "learning_rate": 1.9474190871369295e-05, "loss": 1.3378, "step": 1588 }, { "epoch": 1.3186721991701245, "grad_norm": 11.299942970275879, "learning_rate": 1.9473858921161827e-05, "loss": 1.4403, "step": 1589 }, { "epoch": 1.3195020746887967, "grad_norm": 7.731199741363525, "learning_rate": 1.947352697095436e-05, "loss": 1.3065, "step": 1590 }, { "epoch": 1.3203319502074689, "grad_norm": 8.897139549255371, "learning_rate": 1.9473195020746888e-05, "loss": 1.496, "step": 1591 }, { "epoch": 1.321161825726141, "grad_norm": 6.580718040466309, "learning_rate": 1.947286307053942e-05, "loss": 1.0945, "step": 1592 }, { "epoch": 1.3219917012448132, "grad_norm": 13.745743751525879, "learning_rate": 1.9472531120331952e-05, "loss": 1.1853, "step": 1593 }, { "epoch": 1.3228215767634854, "grad_norm": 14.59749698638916, "learning_rate": 1.9472199170124484e-05, "loss": 2.0539, "step": 1594 }, { "epoch": 1.3236514522821576, "grad_norm": 6.880407333374023, "learning_rate": 1.9471867219917013e-05, "loss": 0.9785, "step": 1595 }, { "epoch": 1.3244813278008298, "grad_norm": 7.083506107330322, "learning_rate": 1.9471535269709545e-05, "loss": 1.3996, "step": 1596 }, { "epoch": 1.325311203319502, "grad_norm": 6.669032573699951, "learning_rate": 1.9471203319502077e-05, "loss": 1.6718, "step": 1597 }, { "epoch": 1.3261410788381742, "grad_norm": 8.371360778808594, "learning_rate": 1.947087136929461e-05, "loss": 1.3584, "step": 1598 }, { "epoch": 1.3269709543568464, "grad_norm": 10.796257019042969, "learning_rate": 1.9470539419087138e-05, "loss": 1.495, "step": 1599 }, { "epoch": 1.3278008298755186, "grad_norm": 7.607381343841553, "learning_rate": 1.947020746887967e-05, "loss": 0.958, "step": 1600 }, { "epoch": 1.3286307053941908, "grad_norm": 14.346025466918945, "learning_rate": 1.9469875518672202e-05, "loss": 1.3494, "step": 1601 }, { "epoch": 1.329460580912863, "grad_norm": 5.530843257904053, "learning_rate": 1.9469543568464734e-05, "loss": 1.4592, "step": 1602 }, { "epoch": 1.3302904564315352, "grad_norm": 9.040916442871094, "learning_rate": 1.9469211618257263e-05, "loss": 1.4024, "step": 1603 }, { "epoch": 1.3311203319502074, "grad_norm": 8.623650550842285, "learning_rate": 1.9468879668049795e-05, "loss": 1.1131, "step": 1604 }, { "epoch": 1.3319502074688796, "grad_norm": 11.860259056091309, "learning_rate": 1.9468547717842327e-05, "loss": 1.7148, "step": 1605 }, { "epoch": 1.3327800829875518, "grad_norm": 11.04283618927002, "learning_rate": 1.9468215767634856e-05, "loss": 1.7119, "step": 1606 }, { "epoch": 1.333609958506224, "grad_norm": 8.157196998596191, "learning_rate": 1.9467883817427388e-05, "loss": 1.2194, "step": 1607 }, { "epoch": 1.3344398340248962, "grad_norm": 7.334689617156982, "learning_rate": 1.9467551867219917e-05, "loss": 1.3987, "step": 1608 }, { "epoch": 1.3352697095435684, "grad_norm": 7.458621978759766, "learning_rate": 1.946721991701245e-05, "loss": 1.7319, "step": 1609 }, { "epoch": 1.3360995850622408, "grad_norm": 12.48288345336914, "learning_rate": 1.946688796680498e-05, "loss": 1.9907, "step": 1610 }, { "epoch": 1.336929460580913, "grad_norm": 11.463747024536133, "learning_rate": 1.946655601659751e-05, "loss": 1.9942, "step": 1611 }, { "epoch": 1.3377593360995852, "grad_norm": 10.837531089782715, "learning_rate": 1.946622406639004e-05, "loss": 2.3857, "step": 1612 }, { "epoch": 1.3385892116182574, "grad_norm": 15.06580638885498, "learning_rate": 1.9465892116182574e-05, "loss": 1.4931, "step": 1613 }, { "epoch": 1.3394190871369296, "grad_norm": 8.062992095947266, "learning_rate": 1.9465560165975106e-05, "loss": 2.0291, "step": 1614 }, { "epoch": 1.3402489626556018, "grad_norm": 6.8864006996154785, "learning_rate": 1.9465228215767638e-05, "loss": 1.1927, "step": 1615 }, { "epoch": 1.341078838174274, "grad_norm": 7.0707173347473145, "learning_rate": 1.9464896265560167e-05, "loss": 1.5409, "step": 1616 }, { "epoch": 1.3419087136929462, "grad_norm": 8.361580848693848, "learning_rate": 1.94645643153527e-05, "loss": 1.6411, "step": 1617 }, { "epoch": 1.3427385892116184, "grad_norm": 11.549544334411621, "learning_rate": 1.946423236514523e-05, "loss": 1.6407, "step": 1618 }, { "epoch": 1.3435684647302906, "grad_norm": 11.27184009552002, "learning_rate": 1.9463900414937763e-05, "loss": 1.6766, "step": 1619 }, { "epoch": 1.3443983402489628, "grad_norm": 8.172802925109863, "learning_rate": 1.9463568464730292e-05, "loss": 1.8286, "step": 1620 }, { "epoch": 1.345228215767635, "grad_norm": 8.873550415039062, "learning_rate": 1.9463236514522824e-05, "loss": 1.5609, "step": 1621 }, { "epoch": 1.3460580912863072, "grad_norm": 6.667811870574951, "learning_rate": 1.9462904564315356e-05, "loss": 1.2226, "step": 1622 }, { "epoch": 1.3468879668049794, "grad_norm": 7.426571846008301, "learning_rate": 1.9462572614107885e-05, "loss": 1.2753, "step": 1623 }, { "epoch": 1.3477178423236515, "grad_norm": 8.555197715759277, "learning_rate": 1.9462240663900417e-05, "loss": 1.325, "step": 1624 }, { "epoch": 1.3485477178423237, "grad_norm": 7.980350971221924, "learning_rate": 1.946190871369295e-05, "loss": 1.2089, "step": 1625 }, { "epoch": 1.349377593360996, "grad_norm": 11.289377212524414, "learning_rate": 1.9461576763485478e-05, "loss": 1.5408, "step": 1626 }, { "epoch": 1.3502074688796681, "grad_norm": 7.2698235511779785, "learning_rate": 1.946124481327801e-05, "loss": 1.2466, "step": 1627 }, { "epoch": 1.3510373443983403, "grad_norm": 12.293818473815918, "learning_rate": 1.9460912863070542e-05, "loss": 2.3165, "step": 1628 }, { "epoch": 1.3518672199170125, "grad_norm": 14.478318214416504, "learning_rate": 1.946058091286307e-05, "loss": 1.6112, "step": 1629 }, { "epoch": 1.3526970954356847, "grad_norm": 11.512182235717773, "learning_rate": 1.9460248962655603e-05, "loss": 2.2395, "step": 1630 }, { "epoch": 1.353526970954357, "grad_norm": 13.560525894165039, "learning_rate": 1.9459917012448135e-05, "loss": 1.2831, "step": 1631 }, { "epoch": 1.3543568464730291, "grad_norm": 8.811263084411621, "learning_rate": 1.9459585062240663e-05, "loss": 1.3933, "step": 1632 }, { "epoch": 1.3551867219917013, "grad_norm": 13.511309623718262, "learning_rate": 1.9459253112033196e-05, "loss": 1.7515, "step": 1633 }, { "epoch": 1.3560165975103735, "grad_norm": 8.996207237243652, "learning_rate": 1.9458921161825728e-05, "loss": 1.6765, "step": 1634 }, { "epoch": 1.3568464730290457, "grad_norm": 11.334844589233398, "learning_rate": 1.945858921161826e-05, "loss": 1.1328, "step": 1635 }, { "epoch": 1.357676348547718, "grad_norm": 16.81660270690918, "learning_rate": 1.945825726141079e-05, "loss": 2.2252, "step": 1636 }, { "epoch": 1.35850622406639, "grad_norm": 11.725566864013672, "learning_rate": 1.945792531120332e-05, "loss": 2.1274, "step": 1637 }, { "epoch": 1.3593360995850623, "grad_norm": 13.674995422363281, "learning_rate": 1.9457593360995853e-05, "loss": 1.3638, "step": 1638 }, { "epoch": 1.3601659751037345, "grad_norm": 13.268964767456055, "learning_rate": 1.9457261410788385e-05, "loss": 1.8657, "step": 1639 }, { "epoch": 1.3609958506224067, "grad_norm": 11.89432430267334, "learning_rate": 1.9456929460580914e-05, "loss": 1.1377, "step": 1640 }, { "epoch": 1.3618257261410789, "grad_norm": 8.677180290222168, "learning_rate": 1.9456597510373446e-05, "loss": 1.9314, "step": 1641 }, { "epoch": 1.362655601659751, "grad_norm": 10.291467666625977, "learning_rate": 1.9456265560165978e-05, "loss": 1.8665, "step": 1642 }, { "epoch": 1.3634854771784233, "grad_norm": 8.58264446258545, "learning_rate": 1.945593360995851e-05, "loss": 1.5106, "step": 1643 }, { "epoch": 1.3643153526970955, "grad_norm": 7.943110942840576, "learning_rate": 1.945560165975104e-05, "loss": 1.5817, "step": 1644 }, { "epoch": 1.3651452282157677, "grad_norm": 8.36326789855957, "learning_rate": 1.945526970954357e-05, "loss": 1.6104, "step": 1645 }, { "epoch": 1.3659751037344399, "grad_norm": 12.379961013793945, "learning_rate": 1.94549377593361e-05, "loss": 2.2803, "step": 1646 }, { "epoch": 1.366804979253112, "grad_norm": 7.559453964233398, "learning_rate": 1.945460580912863e-05, "loss": 1.8185, "step": 1647 }, { "epoch": 1.3676348547717843, "grad_norm": 10.43746280670166, "learning_rate": 1.9454273858921164e-05, "loss": 1.484, "step": 1648 }, { "epoch": 1.3684647302904565, "grad_norm": 6.592848777770996, "learning_rate": 1.9453941908713692e-05, "loss": 1.1007, "step": 1649 }, { "epoch": 1.3692946058091287, "grad_norm": 10.18802547454834, "learning_rate": 1.9453609958506224e-05, "loss": 1.4071, "step": 1650 }, { "epoch": 1.3701244813278008, "grad_norm": 8.798125267028809, "learning_rate": 1.9453278008298757e-05, "loss": 0.9266, "step": 1651 }, { "epoch": 1.370954356846473, "grad_norm": 9.63762092590332, "learning_rate": 1.945294605809129e-05, "loss": 1.2118, "step": 1652 }, { "epoch": 1.3717842323651452, "grad_norm": 6.188743591308594, "learning_rate": 1.9452614107883817e-05, "loss": 1.5806, "step": 1653 }, { "epoch": 1.3726141078838174, "grad_norm": 7.970561981201172, "learning_rate": 1.945228215767635e-05, "loss": 1.1782, "step": 1654 }, { "epoch": 1.3734439834024896, "grad_norm": 9.99677848815918, "learning_rate": 1.945195020746888e-05, "loss": 1.8005, "step": 1655 }, { "epoch": 1.3742738589211618, "grad_norm": 13.500931739807129, "learning_rate": 1.9451618257261414e-05, "loss": 1.6584, "step": 1656 }, { "epoch": 1.375103734439834, "grad_norm": 10.570168495178223, "learning_rate": 1.9451286307053942e-05, "loss": 1.2947, "step": 1657 }, { "epoch": 1.3759336099585062, "grad_norm": 7.594712734222412, "learning_rate": 1.9450954356846475e-05, "loss": 2.035, "step": 1658 }, { "epoch": 1.3767634854771784, "grad_norm": 6.9683918952941895, "learning_rate": 1.9450622406639007e-05, "loss": 1.4686, "step": 1659 }, { "epoch": 1.3775933609958506, "grad_norm": 12.211488723754883, "learning_rate": 1.945029045643154e-05, "loss": 1.2653, "step": 1660 }, { "epoch": 1.3784232365145228, "grad_norm": 14.722977638244629, "learning_rate": 1.9449958506224067e-05, "loss": 1.8994, "step": 1661 }, { "epoch": 1.379253112033195, "grad_norm": 8.73166561126709, "learning_rate": 1.94496265560166e-05, "loss": 1.1441, "step": 1662 }, { "epoch": 1.3800829875518672, "grad_norm": 8.867891311645508, "learning_rate": 1.9449294605809132e-05, "loss": 1.6534, "step": 1663 }, { "epoch": 1.3809128630705394, "grad_norm": 10.128045082092285, "learning_rate": 1.944896265560166e-05, "loss": 1.6895, "step": 1664 }, { "epoch": 1.3817427385892116, "grad_norm": 11.264575004577637, "learning_rate": 1.9448630705394193e-05, "loss": 1.3288, "step": 1665 }, { "epoch": 1.3825726141078838, "grad_norm": 10.899102210998535, "learning_rate": 1.9448298755186725e-05, "loss": 1.6571, "step": 1666 }, { "epoch": 1.383402489626556, "grad_norm": 7.016176223754883, "learning_rate": 1.9447966804979253e-05, "loss": 1.218, "step": 1667 }, { "epoch": 1.3842323651452282, "grad_norm": 11.400229454040527, "learning_rate": 1.9447634854771785e-05, "loss": 2.3788, "step": 1668 }, { "epoch": 1.3850622406639004, "grad_norm": 10.793584823608398, "learning_rate": 1.9447302904564318e-05, "loss": 1.029, "step": 1669 }, { "epoch": 1.3858921161825726, "grad_norm": 8.55276107788086, "learning_rate": 1.9446970954356846e-05, "loss": 2.2114, "step": 1670 }, { "epoch": 1.3867219917012448, "grad_norm": 8.316422462463379, "learning_rate": 1.944663900414938e-05, "loss": 1.7624, "step": 1671 }, { "epoch": 1.387551867219917, "grad_norm": 7.619316101074219, "learning_rate": 1.944630705394191e-05, "loss": 1.6468, "step": 1672 }, { "epoch": 1.3883817427385892, "grad_norm": 7.917924404144287, "learning_rate": 1.9445975103734443e-05, "loss": 1.1817, "step": 1673 }, { "epoch": 1.3892116182572614, "grad_norm": 9.05376148223877, "learning_rate": 1.944564315352697e-05, "loss": 1.9141, "step": 1674 }, { "epoch": 1.3900414937759336, "grad_norm": 7.900075912475586, "learning_rate": 1.9445311203319503e-05, "loss": 1.5461, "step": 1675 }, { "epoch": 1.3908713692946058, "grad_norm": 11.176753044128418, "learning_rate": 1.9444979253112036e-05, "loss": 1.8076, "step": 1676 }, { "epoch": 1.391701244813278, "grad_norm": 13.892728805541992, "learning_rate": 1.9444647302904568e-05, "loss": 1.6653, "step": 1677 }, { "epoch": 1.3925311203319501, "grad_norm": 9.741902351379395, "learning_rate": 1.9444315352697096e-05, "loss": 2.1068, "step": 1678 }, { "epoch": 1.3933609958506223, "grad_norm": 8.065905570983887, "learning_rate": 1.944398340248963e-05, "loss": 1.1247, "step": 1679 }, { "epoch": 1.3941908713692945, "grad_norm": 7.715620517730713, "learning_rate": 1.944365145228216e-05, "loss": 1.2691, "step": 1680 }, { "epoch": 1.3950207468879667, "grad_norm": 10.709822654724121, "learning_rate": 1.9443319502074693e-05, "loss": 1.6035, "step": 1681 }, { "epoch": 1.395850622406639, "grad_norm": 8.566603660583496, "learning_rate": 1.944298755186722e-05, "loss": 1.1494, "step": 1682 }, { "epoch": 1.3966804979253111, "grad_norm": 8.579460144042969, "learning_rate": 1.9442655601659754e-05, "loss": 1.7502, "step": 1683 }, { "epoch": 1.3975103734439833, "grad_norm": 9.91588020324707, "learning_rate": 1.9442323651452286e-05, "loss": 1.0748, "step": 1684 }, { "epoch": 1.3983402489626555, "grad_norm": 9.687861442565918, "learning_rate": 1.9441991701244814e-05, "loss": 1.0133, "step": 1685 }, { "epoch": 1.3991701244813277, "grad_norm": 6.716582298278809, "learning_rate": 1.9441659751037346e-05, "loss": 1.5739, "step": 1686 }, { "epoch": 1.4, "grad_norm": 14.490427017211914, "learning_rate": 1.9441327800829875e-05, "loss": 1.7521, "step": 1687 }, { "epoch": 1.400829875518672, "grad_norm": 10.890149116516113, "learning_rate": 1.9440995850622407e-05, "loss": 1.3233, "step": 1688 }, { "epoch": 1.4016597510373443, "grad_norm": 12.943136215209961, "learning_rate": 1.944066390041494e-05, "loss": 1.7951, "step": 1689 }, { "epoch": 1.4024896265560165, "grad_norm": 10.15245532989502, "learning_rate": 1.9440331950207468e-05, "loss": 1.1583, "step": 1690 }, { "epoch": 1.4033195020746887, "grad_norm": 10.418087005615234, "learning_rate": 1.944e-05, "loss": 1.2382, "step": 1691 }, { "epoch": 1.404149377593361, "grad_norm": 9.71353816986084, "learning_rate": 1.9439668049792532e-05, "loss": 0.9259, "step": 1692 }, { "epoch": 1.404979253112033, "grad_norm": 10.069025039672852, "learning_rate": 1.9439336099585064e-05, "loss": 2.1252, "step": 1693 }, { "epoch": 1.4058091286307053, "grad_norm": 10.563461303710938, "learning_rate": 1.9439004149377597e-05, "loss": 1.5417, "step": 1694 }, { "epoch": 1.4066390041493775, "grad_norm": 13.041437149047852, "learning_rate": 1.9438672199170125e-05, "loss": 1.8848, "step": 1695 }, { "epoch": 1.4074688796680497, "grad_norm": 8.119351387023926, "learning_rate": 1.9438340248962657e-05, "loss": 1.3258, "step": 1696 }, { "epoch": 1.4082987551867219, "grad_norm": 7.8063859939575195, "learning_rate": 1.943800829875519e-05, "loss": 1.1134, "step": 1697 }, { "epoch": 1.409128630705394, "grad_norm": 8.970438957214355, "learning_rate": 1.943767634854772e-05, "loss": 1.6084, "step": 1698 }, { "epoch": 1.4099585062240663, "grad_norm": 7.734731197357178, "learning_rate": 1.943734439834025e-05, "loss": 1.0457, "step": 1699 }, { "epoch": 1.4107883817427385, "grad_norm": 12.84327507019043, "learning_rate": 1.9437012448132782e-05, "loss": 1.2437, "step": 1700 }, { "epoch": 1.4116182572614107, "grad_norm": 6.936054229736328, "learning_rate": 1.9436680497925315e-05, "loss": 1.2673, "step": 1701 }, { "epoch": 1.4124481327800829, "grad_norm": 10.55057144165039, "learning_rate": 1.9436348547717843e-05, "loss": 1.6647, "step": 1702 }, { "epoch": 1.413278008298755, "grad_norm": 6.6439409255981445, "learning_rate": 1.9436016597510375e-05, "loss": 1.746, "step": 1703 }, { "epoch": 1.4141078838174275, "grad_norm": 4.761666774749756, "learning_rate": 1.9435684647302907e-05, "loss": 1.0208, "step": 1704 }, { "epoch": 1.4149377593360997, "grad_norm": 10.830184936523438, "learning_rate": 1.9435352697095436e-05, "loss": 2.3378, "step": 1705 }, { "epoch": 1.4157676348547719, "grad_norm": 9.45682144165039, "learning_rate": 1.9435020746887968e-05, "loss": 1.0146, "step": 1706 }, { "epoch": 1.416597510373444, "grad_norm": 13.57961654663086, "learning_rate": 1.9434688796680497e-05, "loss": 2.1466, "step": 1707 }, { "epoch": 1.4174273858921163, "grad_norm": 10.271368026733398, "learning_rate": 1.943435684647303e-05, "loss": 2.0796, "step": 1708 }, { "epoch": 1.4182572614107885, "grad_norm": 9.665106773376465, "learning_rate": 1.943402489626556e-05, "loss": 1.5208, "step": 1709 }, { "epoch": 1.4190871369294606, "grad_norm": 10.220808982849121, "learning_rate": 1.9433692946058093e-05, "loss": 1.3028, "step": 1710 }, { "epoch": 1.4199170124481328, "grad_norm": 9.787593841552734, "learning_rate": 1.9433360995850622e-05, "loss": 1.6665, "step": 1711 }, { "epoch": 1.420746887966805, "grad_norm": 6.977484703063965, "learning_rate": 1.9433029045643154e-05, "loss": 1.591, "step": 1712 }, { "epoch": 1.4215767634854772, "grad_norm": 9.336465835571289, "learning_rate": 1.9432697095435686e-05, "loss": 2.3857, "step": 1713 }, { "epoch": 1.4224066390041494, "grad_norm": 9.083735466003418, "learning_rate": 1.943236514522822e-05, "loss": 1.4929, "step": 1714 }, { "epoch": 1.4232365145228216, "grad_norm": 6.483444690704346, "learning_rate": 1.9432033195020747e-05, "loss": 1.3756, "step": 1715 }, { "epoch": 1.4240663900414938, "grad_norm": 6.3072943687438965, "learning_rate": 1.943170124481328e-05, "loss": 1.2584, "step": 1716 }, { "epoch": 1.424896265560166, "grad_norm": 10.56040096282959, "learning_rate": 1.943136929460581e-05, "loss": 1.1409, "step": 1717 }, { "epoch": 1.4257261410788382, "grad_norm": 6.244845867156982, "learning_rate": 1.9431037344398343e-05, "loss": 1.0642, "step": 1718 }, { "epoch": 1.4265560165975104, "grad_norm": 8.397760391235352, "learning_rate": 1.9430705394190872e-05, "loss": 1.3915, "step": 1719 }, { "epoch": 1.4273858921161826, "grad_norm": 8.961357116699219, "learning_rate": 1.9430373443983404e-05, "loss": 0.928, "step": 1720 }, { "epoch": 1.4282157676348548, "grad_norm": 8.479146003723145, "learning_rate": 1.9430041493775936e-05, "loss": 1.06, "step": 1721 }, { "epoch": 1.429045643153527, "grad_norm": 9.659703254699707, "learning_rate": 1.942970954356847e-05, "loss": 1.4325, "step": 1722 }, { "epoch": 1.4298755186721992, "grad_norm": 9.694884300231934, "learning_rate": 1.9429377593360997e-05, "loss": 1.5909, "step": 1723 }, { "epoch": 1.4307053941908714, "grad_norm": 5.939307689666748, "learning_rate": 1.942904564315353e-05, "loss": 1.1656, "step": 1724 }, { "epoch": 1.4315352697095436, "grad_norm": 7.534804821014404, "learning_rate": 1.9428713692946058e-05, "loss": 0.8379, "step": 1725 }, { "epoch": 1.4323651452282158, "grad_norm": 9.517645835876465, "learning_rate": 1.942838174273859e-05, "loss": 2.0302, "step": 1726 }, { "epoch": 1.433195020746888, "grad_norm": 5.931868076324463, "learning_rate": 1.9428049792531122e-05, "loss": 1.3426, "step": 1727 }, { "epoch": 1.4340248962655602, "grad_norm": 9.239653587341309, "learning_rate": 1.942771784232365e-05, "loss": 1.7056, "step": 1728 }, { "epoch": 1.4348547717842324, "grad_norm": 16.121156692504883, "learning_rate": 1.9427385892116183e-05, "loss": 2.7434, "step": 1729 }, { "epoch": 1.4356846473029046, "grad_norm": 7.672459125518799, "learning_rate": 1.9427053941908715e-05, "loss": 1.1254, "step": 1730 }, { "epoch": 1.4365145228215768, "grad_norm": 6.97655725479126, "learning_rate": 1.9426721991701247e-05, "loss": 1.1403, "step": 1731 }, { "epoch": 1.437344398340249, "grad_norm": 6.91421365737915, "learning_rate": 1.9426390041493776e-05, "loss": 1.1115, "step": 1732 }, { "epoch": 1.4381742738589212, "grad_norm": 11.347044944763184, "learning_rate": 1.9426058091286308e-05, "loss": 2.0662, "step": 1733 }, { "epoch": 1.4390041493775934, "grad_norm": 13.977072715759277, "learning_rate": 1.942572614107884e-05, "loss": 1.4022, "step": 1734 }, { "epoch": 1.4398340248962656, "grad_norm": 7.106309413909912, "learning_rate": 1.9425394190871372e-05, "loss": 1.1077, "step": 1735 }, { "epoch": 1.4406639004149377, "grad_norm": 10.116974830627441, "learning_rate": 1.94250622406639e-05, "loss": 2.1354, "step": 1736 }, { "epoch": 1.44149377593361, "grad_norm": 8.42430305480957, "learning_rate": 1.9424730290456433e-05, "loss": 0.9521, "step": 1737 }, { "epoch": 1.4423236514522821, "grad_norm": 14.817705154418945, "learning_rate": 1.9424398340248965e-05, "loss": 2.8606, "step": 1738 }, { "epoch": 1.4431535269709543, "grad_norm": 10.177366256713867, "learning_rate": 1.9424066390041497e-05, "loss": 1.5314, "step": 1739 }, { "epoch": 1.4439834024896265, "grad_norm": 6.047399044036865, "learning_rate": 1.9423734439834026e-05, "loss": 1.0232, "step": 1740 }, { "epoch": 1.4448132780082987, "grad_norm": 8.316845893859863, "learning_rate": 1.9423402489626558e-05, "loss": 1.1685, "step": 1741 }, { "epoch": 1.445643153526971, "grad_norm": 7.708665370941162, "learning_rate": 1.942307053941909e-05, "loss": 1.4588, "step": 1742 }, { "epoch": 1.4464730290456431, "grad_norm": 7.07352352142334, "learning_rate": 1.942273858921162e-05, "loss": 2.0328, "step": 1743 }, { "epoch": 1.4473029045643153, "grad_norm": 8.617966651916504, "learning_rate": 1.942240663900415e-05, "loss": 1.6173, "step": 1744 }, { "epoch": 1.4481327800829875, "grad_norm": 8.657365798950195, "learning_rate": 1.9422074688796683e-05, "loss": 1.5701, "step": 1745 }, { "epoch": 1.4489626556016597, "grad_norm": 11.81044864654541, "learning_rate": 1.9421742738589212e-05, "loss": 2.0124, "step": 1746 }, { "epoch": 1.449792531120332, "grad_norm": 15.004030227661133, "learning_rate": 1.9421410788381744e-05, "loss": 1.5107, "step": 1747 }, { "epoch": 1.450622406639004, "grad_norm": 8.1617431640625, "learning_rate": 1.9421078838174276e-05, "loss": 1.0918, "step": 1748 }, { "epoch": 1.4514522821576763, "grad_norm": 11.835411071777344, "learning_rate": 1.9420746887966805e-05, "loss": 1.4515, "step": 1749 }, { "epoch": 1.4522821576763485, "grad_norm": 6.1957197189331055, "learning_rate": 1.9420414937759337e-05, "loss": 1.0088, "step": 1750 }, { "epoch": 1.4531120331950207, "grad_norm": 6.2429585456848145, "learning_rate": 1.942008298755187e-05, "loss": 0.6851, "step": 1751 }, { "epoch": 1.4539419087136929, "grad_norm": 14.771920204162598, "learning_rate": 1.94197510373444e-05, "loss": 2.1065, "step": 1752 }, { "epoch": 1.454771784232365, "grad_norm": 11.3765230178833, "learning_rate": 1.941941908713693e-05, "loss": 1.5597, "step": 1753 }, { "epoch": 1.4556016597510373, "grad_norm": 13.335838317871094, "learning_rate": 1.9419087136929462e-05, "loss": 1.2467, "step": 1754 }, { "epoch": 1.4564315352697095, "grad_norm": 13.754793167114258, "learning_rate": 1.9418755186721994e-05, "loss": 1.6875, "step": 1755 }, { "epoch": 1.4572614107883817, "grad_norm": 10.893234252929688, "learning_rate": 1.9418423236514526e-05, "loss": 1.589, "step": 1756 }, { "epoch": 1.4580912863070539, "grad_norm": 9.872191429138184, "learning_rate": 1.9418091286307055e-05, "loss": 1.5537, "step": 1757 }, { "epoch": 1.458921161825726, "grad_norm": 7.704354763031006, "learning_rate": 1.9417759336099587e-05, "loss": 1.2676, "step": 1758 }, { "epoch": 1.4597510373443983, "grad_norm": 10.783002853393555, "learning_rate": 1.941742738589212e-05, "loss": 1.4169, "step": 1759 }, { "epoch": 1.4605809128630705, "grad_norm": 19.930225372314453, "learning_rate": 1.941709543568465e-05, "loss": 2.7182, "step": 1760 }, { "epoch": 1.4614107883817429, "grad_norm": 8.512571334838867, "learning_rate": 1.941676348547718e-05, "loss": 1.7405, "step": 1761 }, { "epoch": 1.462240663900415, "grad_norm": 13.492795944213867, "learning_rate": 1.9416431535269712e-05, "loss": 1.86, "step": 1762 }, { "epoch": 1.4630705394190873, "grad_norm": 7.24754524230957, "learning_rate": 1.941609958506224e-05, "loss": 1.8895, "step": 1763 }, { "epoch": 1.4639004149377595, "grad_norm": 8.904690742492676, "learning_rate": 1.9415767634854773e-05, "loss": 0.8775, "step": 1764 }, { "epoch": 1.4647302904564317, "grad_norm": 14.459775924682617, "learning_rate": 1.9415435684647305e-05, "loss": 1.5768, "step": 1765 }, { "epoch": 1.4655601659751039, "grad_norm": 7.297588348388672, "learning_rate": 1.9415103734439834e-05, "loss": 0.8234, "step": 1766 }, { "epoch": 1.466390041493776, "grad_norm": 6.452759265899658, "learning_rate": 1.9414771784232366e-05, "loss": 1.72, "step": 1767 }, { "epoch": 1.4672199170124482, "grad_norm": 9.694622993469238, "learning_rate": 1.9414439834024898e-05, "loss": 0.7992, "step": 1768 }, { "epoch": 1.4680497925311204, "grad_norm": 7.888101577758789, "learning_rate": 1.9414107883817427e-05, "loss": 1.5939, "step": 1769 }, { "epoch": 1.4688796680497926, "grad_norm": 11.531874656677246, "learning_rate": 1.941377593360996e-05, "loss": 2.1605, "step": 1770 }, { "epoch": 1.4697095435684648, "grad_norm": 8.965580940246582, "learning_rate": 1.941344398340249e-05, "loss": 1.2585, "step": 1771 }, { "epoch": 1.470539419087137, "grad_norm": 7.166990756988525, "learning_rate": 1.9413112033195023e-05, "loss": 1.0688, "step": 1772 }, { "epoch": 1.4713692946058092, "grad_norm": 8.4426851272583, "learning_rate": 1.9412780082987555e-05, "loss": 1.4013, "step": 1773 }, { "epoch": 1.4721991701244814, "grad_norm": 8.997841835021973, "learning_rate": 1.9412448132780084e-05, "loss": 1.3695, "step": 1774 }, { "epoch": 1.4730290456431536, "grad_norm": 9.768280029296875, "learning_rate": 1.9412116182572616e-05, "loss": 1.1599, "step": 1775 }, { "epoch": 1.4738589211618258, "grad_norm": 14.60306453704834, "learning_rate": 1.9411784232365148e-05, "loss": 2.2514, "step": 1776 }, { "epoch": 1.474688796680498, "grad_norm": 8.897573471069336, "learning_rate": 1.941145228215768e-05, "loss": 1.285, "step": 1777 }, { "epoch": 1.4755186721991702, "grad_norm": 13.028273582458496, "learning_rate": 1.941112033195021e-05, "loss": 1.6528, "step": 1778 }, { "epoch": 1.4763485477178424, "grad_norm": 8.560065269470215, "learning_rate": 1.941078838174274e-05, "loss": 1.4872, "step": 1779 }, { "epoch": 1.4771784232365146, "grad_norm": 6.920947074890137, "learning_rate": 1.9410456431535273e-05, "loss": 1.4612, "step": 1780 }, { "epoch": 1.4780082987551868, "grad_norm": 14.141142845153809, "learning_rate": 1.9410124481327802e-05, "loss": 2.5502, "step": 1781 }, { "epoch": 1.478838174273859, "grad_norm": 12.197755813598633, "learning_rate": 1.9409792531120334e-05, "loss": 1.6557, "step": 1782 }, { "epoch": 1.4796680497925312, "grad_norm": 15.183043479919434, "learning_rate": 1.9409460580912866e-05, "loss": 1.9342, "step": 1783 }, { "epoch": 1.4804979253112034, "grad_norm": 21.87809944152832, "learning_rate": 1.9409128630705395e-05, "loss": 2.2808, "step": 1784 }, { "epoch": 1.4813278008298756, "grad_norm": 11.939187049865723, "learning_rate": 1.9408796680497927e-05, "loss": 2.0297, "step": 1785 }, { "epoch": 1.4821576763485478, "grad_norm": 8.437117576599121, "learning_rate": 1.9408464730290456e-05, "loss": 1.2022, "step": 1786 }, { "epoch": 1.48298755186722, "grad_norm": 6.924440383911133, "learning_rate": 1.9408132780082988e-05, "loss": 1.3337, "step": 1787 }, { "epoch": 1.4838174273858922, "grad_norm": 9.046149253845215, "learning_rate": 1.940780082987552e-05, "loss": 1.5496, "step": 1788 }, { "epoch": 1.4846473029045644, "grad_norm": 7.4626970291137695, "learning_rate": 1.9407468879668052e-05, "loss": 1.452, "step": 1789 }, { "epoch": 1.4854771784232366, "grad_norm": 8.123830795288086, "learning_rate": 1.940713692946058e-05, "loss": 1.5059, "step": 1790 }, { "epoch": 1.4863070539419088, "grad_norm": 8.270654678344727, "learning_rate": 1.9406804979253113e-05, "loss": 1.3121, "step": 1791 }, { "epoch": 1.487136929460581, "grad_norm": 7.675711631774902, "learning_rate": 1.9406473029045645e-05, "loss": 1.392, "step": 1792 }, { "epoch": 1.4879668049792532, "grad_norm": 10.798935890197754, "learning_rate": 1.9406141078838177e-05, "loss": 1.5144, "step": 1793 }, { "epoch": 1.4887966804979254, "grad_norm": 12.712041854858398, "learning_rate": 1.9405809128630706e-05, "loss": 2.4017, "step": 1794 }, { "epoch": 1.4896265560165975, "grad_norm": 7.633434772491455, "learning_rate": 1.9405477178423238e-05, "loss": 1.2797, "step": 1795 }, { "epoch": 1.4904564315352697, "grad_norm": 9.310720443725586, "learning_rate": 1.940514522821577e-05, "loss": 1.7971, "step": 1796 }, { "epoch": 1.491286307053942, "grad_norm": 14.185187339782715, "learning_rate": 1.9404813278008302e-05, "loss": 1.6271, "step": 1797 }, { "epoch": 1.4921161825726141, "grad_norm": 8.889180183410645, "learning_rate": 1.940448132780083e-05, "loss": 1.0385, "step": 1798 }, { "epoch": 1.4929460580912863, "grad_norm": 9.753216743469238, "learning_rate": 1.9404149377593363e-05, "loss": 2.149, "step": 1799 }, { "epoch": 1.4937759336099585, "grad_norm": 11.88789176940918, "learning_rate": 1.9403817427385895e-05, "loss": 1.5237, "step": 1800 }, { "epoch": 1.4946058091286307, "grad_norm": 8.422148704528809, "learning_rate": 1.9403485477178427e-05, "loss": 1.542, "step": 1801 }, { "epoch": 1.495435684647303, "grad_norm": 15.313962936401367, "learning_rate": 1.9403153526970956e-05, "loss": 2.4271, "step": 1802 }, { "epoch": 1.4962655601659751, "grad_norm": 11.417238235473633, "learning_rate": 1.9402821576763488e-05, "loss": 1.6466, "step": 1803 }, { "epoch": 1.4970954356846473, "grad_norm": 12.914595603942871, "learning_rate": 1.9402489626556017e-05, "loss": 1.9171, "step": 1804 }, { "epoch": 1.4979253112033195, "grad_norm": 8.8079195022583, "learning_rate": 1.940215767634855e-05, "loss": 1.7706, "step": 1805 }, { "epoch": 1.4987551867219917, "grad_norm": 10.622793197631836, "learning_rate": 1.940182572614108e-05, "loss": 1.868, "step": 1806 }, { "epoch": 1.499585062240664, "grad_norm": 6.076992034912109, "learning_rate": 1.940149377593361e-05, "loss": 1.3145, "step": 1807 }, { "epoch": 1.500414937759336, "grad_norm": 8.712096214294434, "learning_rate": 1.940116182572614e-05, "loss": 1.2878, "step": 1808 }, { "epoch": 1.5012448132780083, "grad_norm": 6.0929741859436035, "learning_rate": 1.9400829875518674e-05, "loss": 0.9236, "step": 1809 }, { "epoch": 1.5020746887966805, "grad_norm": 10.289093971252441, "learning_rate": 1.9400497925311206e-05, "loss": 1.772, "step": 1810 }, { "epoch": 1.5029045643153527, "grad_norm": 8.495865821838379, "learning_rate": 1.9400165975103735e-05, "loss": 1.3811, "step": 1811 }, { "epoch": 1.5037344398340249, "grad_norm": 13.732516288757324, "learning_rate": 1.9399834024896267e-05, "loss": 1.7854, "step": 1812 }, { "epoch": 1.504564315352697, "grad_norm": 9.69160270690918, "learning_rate": 1.93995020746888e-05, "loss": 1.3941, "step": 1813 }, { "epoch": 1.5053941908713693, "grad_norm": 17.76471710205078, "learning_rate": 1.939917012448133e-05, "loss": 1.3277, "step": 1814 }, { "epoch": 1.5062240663900415, "grad_norm": 5.774110794067383, "learning_rate": 1.939883817427386e-05, "loss": 1.4351, "step": 1815 }, { "epoch": 1.5070539419087137, "grad_norm": 7.5423479080200195, "learning_rate": 1.9398506224066392e-05, "loss": 1.3342, "step": 1816 }, { "epoch": 1.5078838174273859, "grad_norm": 8.4131498336792, "learning_rate": 1.9398174273858924e-05, "loss": 1.1185, "step": 1817 }, { "epoch": 1.508713692946058, "grad_norm": 12.029433250427246, "learning_rate": 1.9397842323651456e-05, "loss": 1.4964, "step": 1818 }, { "epoch": 1.5095435684647303, "grad_norm": 13.13941764831543, "learning_rate": 1.9397510373443985e-05, "loss": 2.6674, "step": 1819 }, { "epoch": 1.5103734439834025, "grad_norm": 10.489325523376465, "learning_rate": 1.9397178423236517e-05, "loss": 2.2313, "step": 1820 }, { "epoch": 1.5112033195020746, "grad_norm": 9.331995964050293, "learning_rate": 1.939684647302905e-05, "loss": 1.3746, "step": 1821 }, { "epoch": 1.5120331950207468, "grad_norm": 8.975489616394043, "learning_rate": 1.9396514522821578e-05, "loss": 0.8193, "step": 1822 }, { "epoch": 1.512863070539419, "grad_norm": 7.850194454193115, "learning_rate": 1.939618257261411e-05, "loss": 1.4865, "step": 1823 }, { "epoch": 1.5136929460580912, "grad_norm": 7.5312628746032715, "learning_rate": 1.939585062240664e-05, "loss": 1.2643, "step": 1824 }, { "epoch": 1.5145228215767634, "grad_norm": 9.377593994140625, "learning_rate": 1.939551867219917e-05, "loss": 1.5075, "step": 1825 }, { "epoch": 1.5153526970954356, "grad_norm": 6.453577041625977, "learning_rate": 1.9395186721991703e-05, "loss": 1.2205, "step": 1826 }, { "epoch": 1.5161825726141078, "grad_norm": 10.575399398803711, "learning_rate": 1.9394854771784235e-05, "loss": 2.0534, "step": 1827 }, { "epoch": 1.51701244813278, "grad_norm": 9.57988452911377, "learning_rate": 1.9394522821576763e-05, "loss": 1.528, "step": 1828 }, { "epoch": 1.5178423236514522, "grad_norm": 8.128032684326172, "learning_rate": 1.9394190871369296e-05, "loss": 0.7047, "step": 1829 }, { "epoch": 1.5186721991701244, "grad_norm": 8.307210922241211, "learning_rate": 1.9393858921161828e-05, "loss": 1.3075, "step": 1830 }, { "epoch": 1.5195020746887966, "grad_norm": 7.358850002288818, "learning_rate": 1.939352697095436e-05, "loss": 1.8407, "step": 1831 }, { "epoch": 1.5203319502074688, "grad_norm": 7.815321445465088, "learning_rate": 1.939319502074689e-05, "loss": 1.5665, "step": 1832 }, { "epoch": 1.521161825726141, "grad_norm": 12.834221839904785, "learning_rate": 1.939286307053942e-05, "loss": 1.9915, "step": 1833 }, { "epoch": 1.5219917012448132, "grad_norm": 8.734257698059082, "learning_rate": 1.9392531120331953e-05, "loss": 0.8101, "step": 1834 }, { "epoch": 1.5228215767634854, "grad_norm": 8.213400840759277, "learning_rate": 1.9392199170124485e-05, "loss": 1.3352, "step": 1835 }, { "epoch": 1.5236514522821576, "grad_norm": 17.66513442993164, "learning_rate": 1.9391867219917014e-05, "loss": 1.9925, "step": 1836 }, { "epoch": 1.5244813278008298, "grad_norm": 7.8755412101745605, "learning_rate": 1.9391535269709546e-05, "loss": 1.6938, "step": 1837 }, { "epoch": 1.525311203319502, "grad_norm": 9.200056076049805, "learning_rate": 1.9391203319502078e-05, "loss": 1.5301, "step": 1838 }, { "epoch": 1.5261410788381742, "grad_norm": 10.143491744995117, "learning_rate": 1.939087136929461e-05, "loss": 1.5173, "step": 1839 }, { "epoch": 1.5269709543568464, "grad_norm": 10.905449867248535, "learning_rate": 1.939053941908714e-05, "loss": 1.5428, "step": 1840 }, { "epoch": 1.5278008298755186, "grad_norm": 9.252960205078125, "learning_rate": 1.939020746887967e-05, "loss": 0.7403, "step": 1841 }, { "epoch": 1.5286307053941908, "grad_norm": 9.669342994689941, "learning_rate": 1.93898755186722e-05, "loss": 1.7441, "step": 1842 }, { "epoch": 1.529460580912863, "grad_norm": 8.167795181274414, "learning_rate": 1.938954356846473e-05, "loss": 1.2893, "step": 1843 }, { "epoch": 1.5302904564315352, "grad_norm": 9.40062141418457, "learning_rate": 1.9389211618257264e-05, "loss": 1.3659, "step": 1844 }, { "epoch": 1.5311203319502074, "grad_norm": 12.62509822845459, "learning_rate": 1.9388879668049792e-05, "loss": 1.4938, "step": 1845 }, { "epoch": 1.5319502074688796, "grad_norm": 7.733036041259766, "learning_rate": 1.9388547717842324e-05, "loss": 1.2925, "step": 1846 }, { "epoch": 1.5327800829875518, "grad_norm": 8.066865921020508, "learning_rate": 1.9388215767634857e-05, "loss": 1.6908, "step": 1847 }, { "epoch": 1.533609958506224, "grad_norm": 9.31932544708252, "learning_rate": 1.9387883817427385e-05, "loss": 1.2249, "step": 1848 }, { "epoch": 1.5344398340248961, "grad_norm": 8.84864616394043, "learning_rate": 1.9387551867219917e-05, "loss": 1.7096, "step": 1849 }, { "epoch": 1.5352697095435683, "grad_norm": 11.597811698913574, "learning_rate": 1.938721991701245e-05, "loss": 2.0072, "step": 1850 }, { "epoch": 1.5360995850622405, "grad_norm": 11.974201202392578, "learning_rate": 1.938688796680498e-05, "loss": 1.7488, "step": 1851 }, { "epoch": 1.5369294605809127, "grad_norm": 9.935356140136719, "learning_rate": 1.938655601659751e-05, "loss": 1.7051, "step": 1852 }, { "epoch": 1.537759336099585, "grad_norm": 5.819921016693115, "learning_rate": 1.9386224066390042e-05, "loss": 1.2095, "step": 1853 }, { "epoch": 1.5385892116182571, "grad_norm": 10.715987205505371, "learning_rate": 1.9385892116182575e-05, "loss": 0.9787, "step": 1854 }, { "epoch": 1.5394190871369293, "grad_norm": 11.710895538330078, "learning_rate": 1.9385560165975107e-05, "loss": 1.4576, "step": 1855 }, { "epoch": 1.5402489626556015, "grad_norm": 7.641659736633301, "learning_rate": 1.938522821576764e-05, "loss": 1.5219, "step": 1856 }, { "epoch": 1.5410788381742737, "grad_norm": 11.605780601501465, "learning_rate": 1.9384896265560167e-05, "loss": 2.0791, "step": 1857 }, { "epoch": 1.541908713692946, "grad_norm": 13.073232650756836, "learning_rate": 1.93845643153527e-05, "loss": 1.4914, "step": 1858 }, { "epoch": 1.542738589211618, "grad_norm": 11.894709587097168, "learning_rate": 1.938423236514523e-05, "loss": 1.6076, "step": 1859 }, { "epoch": 1.5435684647302903, "grad_norm": 7.798985004425049, "learning_rate": 1.938390041493776e-05, "loss": 1.0533, "step": 1860 }, { "epoch": 1.5443983402489625, "grad_norm": 7.831367015838623, "learning_rate": 1.9383568464730292e-05, "loss": 1.7695, "step": 1861 }, { "epoch": 1.5452282157676347, "grad_norm": 13.18012523651123, "learning_rate": 1.9383236514522825e-05, "loss": 1.8156, "step": 1862 }, { "epoch": 1.546058091286307, "grad_norm": 11.383728981018066, "learning_rate": 1.9382904564315353e-05, "loss": 1.6564, "step": 1863 }, { "epoch": 1.5468879668049793, "grad_norm": 16.59432029724121, "learning_rate": 1.9382572614107885e-05, "loss": 2.3812, "step": 1864 }, { "epoch": 1.5477178423236515, "grad_norm": 7.83573579788208, "learning_rate": 1.9382240663900414e-05, "loss": 1.6419, "step": 1865 }, { "epoch": 1.5485477178423237, "grad_norm": 12.0936279296875, "learning_rate": 1.9381908713692946e-05, "loss": 1.5666, "step": 1866 }, { "epoch": 1.549377593360996, "grad_norm": 11.610274314880371, "learning_rate": 1.938157676348548e-05, "loss": 2.0044, "step": 1867 }, { "epoch": 1.550207468879668, "grad_norm": 10.279458999633789, "learning_rate": 1.938124481327801e-05, "loss": 1.6999, "step": 1868 }, { "epoch": 1.5510373443983403, "grad_norm": 15.950599670410156, "learning_rate": 1.938091286307054e-05, "loss": 2.3829, "step": 1869 }, { "epoch": 1.5518672199170125, "grad_norm": 8.196824073791504, "learning_rate": 1.938058091286307e-05, "loss": 1.4262, "step": 1870 }, { "epoch": 1.5526970954356847, "grad_norm": 6.884124755859375, "learning_rate": 1.9380248962655603e-05, "loss": 1.6079, "step": 1871 }, { "epoch": 1.5535269709543569, "grad_norm": 7.944014072418213, "learning_rate": 1.9379917012448136e-05, "loss": 1.497, "step": 1872 }, { "epoch": 1.554356846473029, "grad_norm": 8.71760082244873, "learning_rate": 1.9379585062240664e-05, "loss": 1.2054, "step": 1873 }, { "epoch": 1.5551867219917013, "grad_norm": 8.071576118469238, "learning_rate": 1.9379253112033196e-05, "loss": 1.0858, "step": 1874 }, { "epoch": 1.5560165975103735, "grad_norm": 11.039804458618164, "learning_rate": 1.937892116182573e-05, "loss": 1.7864, "step": 1875 }, { "epoch": 1.5568464730290457, "grad_norm": 13.53388500213623, "learning_rate": 1.937858921161826e-05, "loss": 1.5819, "step": 1876 }, { "epoch": 1.5576763485477179, "grad_norm": 17.912900924682617, "learning_rate": 1.937825726141079e-05, "loss": 1.721, "step": 1877 }, { "epoch": 1.55850622406639, "grad_norm": 6.739965915679932, "learning_rate": 1.937792531120332e-05, "loss": 1.3305, "step": 1878 }, { "epoch": 1.5593360995850623, "grad_norm": 7.80411958694458, "learning_rate": 1.9377593360995853e-05, "loss": 1.2247, "step": 1879 }, { "epoch": 1.5601659751037344, "grad_norm": 9.685667991638184, "learning_rate": 1.9377261410788382e-05, "loss": 1.3401, "step": 1880 }, { "epoch": 1.5609958506224066, "grad_norm": 8.874372482299805, "learning_rate": 1.9376929460580914e-05, "loss": 1.7218, "step": 1881 }, { "epoch": 1.5618257261410788, "grad_norm": 6.8849101066589355, "learning_rate": 1.9376597510373446e-05, "loss": 1.1373, "step": 1882 }, { "epoch": 1.562655601659751, "grad_norm": 6.6615729331970215, "learning_rate": 1.9376265560165975e-05, "loss": 1.0282, "step": 1883 }, { "epoch": 1.5634854771784232, "grad_norm": 8.29474925994873, "learning_rate": 1.9375933609958507e-05, "loss": 1.9888, "step": 1884 }, { "epoch": 1.5643153526970954, "grad_norm": 7.147170543670654, "learning_rate": 1.937560165975104e-05, "loss": 1.1337, "step": 1885 }, { "epoch": 1.5651452282157676, "grad_norm": 6.540191650390625, "learning_rate": 1.9375269709543568e-05, "loss": 0.9008, "step": 1886 }, { "epoch": 1.5659751037344398, "grad_norm": 7.457280158996582, "learning_rate": 1.93749377593361e-05, "loss": 1.6231, "step": 1887 }, { "epoch": 1.566804979253112, "grad_norm": 8.383551597595215, "learning_rate": 1.9374605809128632e-05, "loss": 1.525, "step": 1888 }, { "epoch": 1.5676348547717842, "grad_norm": 7.832956314086914, "learning_rate": 1.9374273858921164e-05, "loss": 1.434, "step": 1889 }, { "epoch": 1.5684647302904564, "grad_norm": 6.788453578948975, "learning_rate": 1.9373941908713693e-05, "loss": 1.019, "step": 1890 }, { "epoch": 1.5692946058091286, "grad_norm": 6.967404365539551, "learning_rate": 1.9373609958506225e-05, "loss": 1.4167, "step": 1891 }, { "epoch": 1.5701244813278008, "grad_norm": 12.188233375549316, "learning_rate": 1.9373278008298757e-05, "loss": 1.5325, "step": 1892 }, { "epoch": 1.570954356846473, "grad_norm": 7.960035800933838, "learning_rate": 1.937294605809129e-05, "loss": 1.7668, "step": 1893 }, { "epoch": 1.5717842323651452, "grad_norm": 10.847424507141113, "learning_rate": 1.9372614107883818e-05, "loss": 1.6526, "step": 1894 }, { "epoch": 1.5726141078838174, "grad_norm": 12.793315887451172, "learning_rate": 1.937228215767635e-05, "loss": 1.9551, "step": 1895 }, { "epoch": 1.5734439834024896, "grad_norm": 12.200318336486816, "learning_rate": 1.9371950207468882e-05, "loss": 3.1019, "step": 1896 }, { "epoch": 1.5742738589211618, "grad_norm": 11.513026237487793, "learning_rate": 1.9371618257261414e-05, "loss": 1.6068, "step": 1897 }, { "epoch": 1.575103734439834, "grad_norm": 13.191624641418457, "learning_rate": 1.9371286307053943e-05, "loss": 1.8432, "step": 1898 }, { "epoch": 1.5759336099585062, "grad_norm": 9.152524948120117, "learning_rate": 1.9370954356846475e-05, "loss": 1.5332, "step": 1899 }, { "epoch": 1.5767634854771784, "grad_norm": 10.20380973815918, "learning_rate": 1.9370622406639007e-05, "loss": 1.9576, "step": 1900 }, { "epoch": 1.5775933609958506, "grad_norm": 15.958489418029785, "learning_rate": 1.9370290456431536e-05, "loss": 2.0072, "step": 1901 }, { "epoch": 1.578423236514523, "grad_norm": 12.593667030334473, "learning_rate": 1.9369958506224068e-05, "loss": 1.6952, "step": 1902 }, { "epoch": 1.5792531120331952, "grad_norm": 10.974665641784668, "learning_rate": 1.9369626556016597e-05, "loss": 1.3033, "step": 1903 }, { "epoch": 1.5800829875518674, "grad_norm": 10.91817569732666, "learning_rate": 1.936929460580913e-05, "loss": 1.3277, "step": 1904 }, { "epoch": 1.5809128630705396, "grad_norm": 15.354419708251953, "learning_rate": 1.936896265560166e-05, "loss": 1.8838, "step": 1905 }, { "epoch": 1.5817427385892118, "grad_norm": 9.577791213989258, "learning_rate": 1.9368630705394193e-05, "loss": 1.4681, "step": 1906 }, { "epoch": 1.582572614107884, "grad_norm": 7.708042144775391, "learning_rate": 1.9368298755186722e-05, "loss": 1.4807, "step": 1907 }, { "epoch": 1.5834024896265562, "grad_norm": 10.550963401794434, "learning_rate": 1.9367966804979254e-05, "loss": 2.2229, "step": 1908 }, { "epoch": 1.5842323651452284, "grad_norm": 13.29854679107666, "learning_rate": 1.9367634854771786e-05, "loss": 2.0197, "step": 1909 }, { "epoch": 1.5850622406639006, "grad_norm": 11.053549766540527, "learning_rate": 1.936730290456432e-05, "loss": 1.6223, "step": 1910 }, { "epoch": 1.5858921161825728, "grad_norm": 8.994999885559082, "learning_rate": 1.9366970954356847e-05, "loss": 1.1675, "step": 1911 }, { "epoch": 1.586721991701245, "grad_norm": 10.373934745788574, "learning_rate": 1.936663900414938e-05, "loss": 2.0725, "step": 1912 }, { "epoch": 1.5875518672199171, "grad_norm": 8.626922607421875, "learning_rate": 1.936630705394191e-05, "loss": 1.3358, "step": 1913 }, { "epoch": 1.5883817427385893, "grad_norm": 7.266122817993164, "learning_rate": 1.9365975103734443e-05, "loss": 1.2154, "step": 1914 }, { "epoch": 1.5892116182572615, "grad_norm": 7.373734474182129, "learning_rate": 1.9365643153526972e-05, "loss": 1.0646, "step": 1915 }, { "epoch": 1.5900414937759337, "grad_norm": 9.811017036437988, "learning_rate": 1.9365311203319504e-05, "loss": 1.308, "step": 1916 }, { "epoch": 1.590871369294606, "grad_norm": 7.757078170776367, "learning_rate": 1.9364979253112036e-05, "loss": 1.3996, "step": 1917 }, { "epoch": 1.5917012448132781, "grad_norm": 6.246563911437988, "learning_rate": 1.936464730290457e-05, "loss": 1.3761, "step": 1918 }, { "epoch": 1.5925311203319503, "grad_norm": 9.673032760620117, "learning_rate": 1.9364315352697097e-05, "loss": 1.8469, "step": 1919 }, { "epoch": 1.5933609958506225, "grad_norm": 8.453177452087402, "learning_rate": 1.936398340248963e-05, "loss": 1.5457, "step": 1920 }, { "epoch": 1.5941908713692947, "grad_norm": 13.081642150878906, "learning_rate": 1.9363651452282158e-05, "loss": 2.1608, "step": 1921 }, { "epoch": 1.595020746887967, "grad_norm": 8.16489028930664, "learning_rate": 1.936331950207469e-05, "loss": 1.4319, "step": 1922 }, { "epoch": 1.595850622406639, "grad_norm": 11.328606605529785, "learning_rate": 1.9362987551867222e-05, "loss": 1.4874, "step": 1923 }, { "epoch": 1.5966804979253113, "grad_norm": 4.44419527053833, "learning_rate": 1.936265560165975e-05, "loss": 0.7751, "step": 1924 }, { "epoch": 1.5975103734439835, "grad_norm": 7.699121952056885, "learning_rate": 1.9362323651452283e-05, "loss": 2.3286, "step": 1925 }, { "epoch": 1.5983402489626557, "grad_norm": 5.572314739227295, "learning_rate": 1.9361991701244815e-05, "loss": 1.3815, "step": 1926 }, { "epoch": 1.599170124481328, "grad_norm": 9.711087226867676, "learning_rate": 1.9361659751037344e-05, "loss": 1.615, "step": 1927 }, { "epoch": 1.6, "grad_norm": 7.135917663574219, "learning_rate": 1.9361327800829876e-05, "loss": 0.8842, "step": 1928 }, { "epoch": 1.6008298755186723, "grad_norm": 9.415799140930176, "learning_rate": 1.9360995850622408e-05, "loss": 1.5708, "step": 1929 }, { "epoch": 1.6016597510373445, "grad_norm": 12.761911392211914, "learning_rate": 1.936066390041494e-05, "loss": 2.1142, "step": 1930 }, { "epoch": 1.6024896265560167, "grad_norm": 12.859713554382324, "learning_rate": 1.936033195020747e-05, "loss": 1.4546, "step": 1931 }, { "epoch": 1.6033195020746889, "grad_norm": 11.17138671875, "learning_rate": 1.936e-05, "loss": 1.5944, "step": 1932 }, { "epoch": 1.604149377593361, "grad_norm": 11.695622444152832, "learning_rate": 1.9359668049792533e-05, "loss": 1.7252, "step": 1933 }, { "epoch": 1.6049792531120333, "grad_norm": 7.012678146362305, "learning_rate": 1.9359336099585065e-05, "loss": 1.2443, "step": 1934 }, { "epoch": 1.6058091286307055, "grad_norm": 8.033668518066406, "learning_rate": 1.9359004149377597e-05, "loss": 1.2589, "step": 1935 }, { "epoch": 1.6066390041493777, "grad_norm": 7.47511100769043, "learning_rate": 1.9358672199170126e-05, "loss": 1.1611, "step": 1936 }, { "epoch": 1.6074688796680499, "grad_norm": 11.530107498168945, "learning_rate": 1.9358340248962658e-05, "loss": 1.4711, "step": 1937 }, { "epoch": 1.608298755186722, "grad_norm": 17.01097297668457, "learning_rate": 1.935800829875519e-05, "loss": 1.6193, "step": 1938 }, { "epoch": 1.6091286307053942, "grad_norm": 11.075984001159668, "learning_rate": 1.935767634854772e-05, "loss": 1.3992, "step": 1939 }, { "epoch": 1.6099585062240664, "grad_norm": 7.956113338470459, "learning_rate": 1.935734439834025e-05, "loss": 1.6177, "step": 1940 }, { "epoch": 1.6107883817427386, "grad_norm": 11.8423433303833, "learning_rate": 1.935701244813278e-05, "loss": 1.8551, "step": 1941 }, { "epoch": 1.6116182572614108, "grad_norm": 8.937410354614258, "learning_rate": 1.9356680497925312e-05, "loss": 1.2058, "step": 1942 }, { "epoch": 1.612448132780083, "grad_norm": 7.9990057945251465, "learning_rate": 1.9356348547717844e-05, "loss": 1.3859, "step": 1943 }, { "epoch": 1.6132780082987552, "grad_norm": 12.32593822479248, "learning_rate": 1.9356016597510373e-05, "loss": 1.7743, "step": 1944 }, { "epoch": 1.6141078838174274, "grad_norm": 6.846778392791748, "learning_rate": 1.9355684647302905e-05, "loss": 1.8165, "step": 1945 }, { "epoch": 1.6149377593360996, "grad_norm": 11.169933319091797, "learning_rate": 1.9355352697095437e-05, "loss": 1.8701, "step": 1946 }, { "epoch": 1.6157676348547718, "grad_norm": 19.328855514526367, "learning_rate": 1.935502074688797e-05, "loss": 2.2319, "step": 1947 }, { "epoch": 1.616597510373444, "grad_norm": 11.898629188537598, "learning_rate": 1.9354688796680498e-05, "loss": 2.106, "step": 1948 }, { "epoch": 1.6174273858921162, "grad_norm": 6.599542140960693, "learning_rate": 1.935435684647303e-05, "loss": 1.5266, "step": 1949 }, { "epoch": 1.6182572614107884, "grad_norm": 12.277094841003418, "learning_rate": 1.9354024896265562e-05, "loss": 1.3732, "step": 1950 }, { "epoch": 1.6190871369294606, "grad_norm": 12.178019523620605, "learning_rate": 1.9353692946058094e-05, "loss": 2.3813, "step": 1951 }, { "epoch": 1.6199170124481328, "grad_norm": 12.562318801879883, "learning_rate": 1.9353360995850623e-05, "loss": 2.7453, "step": 1952 }, { "epoch": 1.620746887966805, "grad_norm": 9.581323623657227, "learning_rate": 1.9353029045643155e-05, "loss": 1.5194, "step": 1953 }, { "epoch": 1.6215767634854772, "grad_norm": 8.559000968933105, "learning_rate": 1.9352697095435687e-05, "loss": 1.3703, "step": 1954 }, { "epoch": 1.6224066390041494, "grad_norm": 6.704750061035156, "learning_rate": 1.935236514522822e-05, "loss": 1.1455, "step": 1955 }, { "epoch": 1.6232365145228216, "grad_norm": 11.154669761657715, "learning_rate": 1.9352033195020748e-05, "loss": 1.8263, "step": 1956 }, { "epoch": 1.6240663900414938, "grad_norm": 8.406031608581543, "learning_rate": 1.935170124481328e-05, "loss": 1.0867, "step": 1957 }, { "epoch": 1.624896265560166, "grad_norm": 12.059597969055176, "learning_rate": 1.9351369294605812e-05, "loss": 2.0403, "step": 1958 }, { "epoch": 1.6257261410788382, "grad_norm": 10.25149154663086, "learning_rate": 1.935103734439834e-05, "loss": 1.5927, "step": 1959 }, { "epoch": 1.6265560165975104, "grad_norm": 6.79824161529541, "learning_rate": 1.9350705394190873e-05, "loss": 1.6735, "step": 1960 }, { "epoch": 1.6273858921161826, "grad_norm": 4.536545753479004, "learning_rate": 1.9350373443983405e-05, "loss": 1.5362, "step": 1961 }, { "epoch": 1.6282157676348548, "grad_norm": 8.38997745513916, "learning_rate": 1.9350041493775934e-05, "loss": 1.8931, "step": 1962 }, { "epoch": 1.629045643153527, "grad_norm": 8.26342487335205, "learning_rate": 1.9349709543568466e-05, "loss": 1.2737, "step": 1963 }, { "epoch": 1.6298755186721992, "grad_norm": 13.070613861083984, "learning_rate": 1.9349377593360998e-05, "loss": 1.7659, "step": 1964 }, { "epoch": 1.6307053941908713, "grad_norm": 11.483370780944824, "learning_rate": 1.9349045643153527e-05, "loss": 1.2759, "step": 1965 }, { "epoch": 1.6315352697095435, "grad_norm": 6.452202796936035, "learning_rate": 1.934871369294606e-05, "loss": 1.3042, "step": 1966 }, { "epoch": 1.6323651452282157, "grad_norm": 9.520054817199707, "learning_rate": 1.934838174273859e-05, "loss": 2.0056, "step": 1967 }, { "epoch": 1.633195020746888, "grad_norm": 10.742744445800781, "learning_rate": 1.9348049792531123e-05, "loss": 1.783, "step": 1968 }, { "epoch": 1.6340248962655601, "grad_norm": 7.417154312133789, "learning_rate": 1.9347717842323652e-05, "loss": 1.1071, "step": 1969 }, { "epoch": 1.6348547717842323, "grad_norm": 8.24433708190918, "learning_rate": 1.9347385892116184e-05, "loss": 1.5191, "step": 1970 }, { "epoch": 1.6356846473029045, "grad_norm": 12.018776893615723, "learning_rate": 1.9347053941908716e-05, "loss": 2.1783, "step": 1971 }, { "epoch": 1.6365145228215767, "grad_norm": 9.821699142456055, "learning_rate": 1.9346721991701248e-05, "loss": 1.4937, "step": 1972 }, { "epoch": 1.637344398340249, "grad_norm": 10.90096378326416, "learning_rate": 1.9346390041493777e-05, "loss": 1.574, "step": 1973 }, { "epoch": 1.6381742738589211, "grad_norm": 9.725264549255371, "learning_rate": 1.934605809128631e-05, "loss": 1.6246, "step": 1974 }, { "epoch": 1.6390041493775933, "grad_norm": 8.063091278076172, "learning_rate": 1.934572614107884e-05, "loss": 1.708, "step": 1975 }, { "epoch": 1.6398340248962655, "grad_norm": 6.824950695037842, "learning_rate": 1.9345394190871373e-05, "loss": 1.1444, "step": 1976 }, { "epoch": 1.6406639004149377, "grad_norm": 8.557698249816895, "learning_rate": 1.9345062240663902e-05, "loss": 1.6289, "step": 1977 }, { "epoch": 1.64149377593361, "grad_norm": 13.439423561096191, "learning_rate": 1.9344730290456434e-05, "loss": 1.8875, "step": 1978 }, { "epoch": 1.642323651452282, "grad_norm": 7.028676986694336, "learning_rate": 1.9344398340248966e-05, "loss": 0.8248, "step": 1979 }, { "epoch": 1.6431535269709543, "grad_norm": 10.507439613342285, "learning_rate": 1.9344066390041495e-05, "loss": 1.758, "step": 1980 }, { "epoch": 1.6439834024896265, "grad_norm": 7.257308483123779, "learning_rate": 1.9343734439834027e-05, "loss": 1.402, "step": 1981 }, { "epoch": 1.6448132780082987, "grad_norm": 8.823452949523926, "learning_rate": 1.9343402489626556e-05, "loss": 0.9115, "step": 1982 }, { "epoch": 1.6456431535269709, "grad_norm": 7.290677547454834, "learning_rate": 1.9343070539419088e-05, "loss": 1.1583, "step": 1983 }, { "epoch": 1.646473029045643, "grad_norm": 13.14465618133545, "learning_rate": 1.934273858921162e-05, "loss": 2.2575, "step": 1984 }, { "epoch": 1.6473029045643153, "grad_norm": 8.554330825805664, "learning_rate": 1.9342406639004152e-05, "loss": 1.1715, "step": 1985 }, { "epoch": 1.6481327800829875, "grad_norm": 8.199057579040527, "learning_rate": 1.934207468879668e-05, "loss": 1.412, "step": 1986 }, { "epoch": 1.6489626556016597, "grad_norm": 9.568710327148438, "learning_rate": 1.9341742738589213e-05, "loss": 1.27, "step": 1987 }, { "epoch": 1.6497925311203319, "grad_norm": 7.7163777351379395, "learning_rate": 1.9341410788381745e-05, "loss": 1.1304, "step": 1988 }, { "epoch": 1.650622406639004, "grad_norm": 6.177610874176025, "learning_rate": 1.9341078838174277e-05, "loss": 1.5958, "step": 1989 }, { "epoch": 1.6514522821576763, "grad_norm": 11.958740234375, "learning_rate": 1.9340746887966806e-05, "loss": 1.8237, "step": 1990 }, { "epoch": 1.6522821576763485, "grad_norm": 8.632132530212402, "learning_rate": 1.9340414937759338e-05, "loss": 1.4138, "step": 1991 }, { "epoch": 1.6531120331950206, "grad_norm": 10.174393653869629, "learning_rate": 1.934008298755187e-05, "loss": 1.351, "step": 1992 }, { "epoch": 1.6539419087136928, "grad_norm": 7.025203227996826, "learning_rate": 1.9339751037344402e-05, "loss": 1.4505, "step": 1993 }, { "epoch": 1.654771784232365, "grad_norm": 11.708313941955566, "learning_rate": 1.933941908713693e-05, "loss": 2.0867, "step": 1994 }, { "epoch": 1.6556016597510372, "grad_norm": 10.420455932617188, "learning_rate": 1.9339087136929463e-05, "loss": 1.7891, "step": 1995 }, { "epoch": 1.6564315352697094, "grad_norm": 13.46472454071045, "learning_rate": 1.9338755186721995e-05, "loss": 1.87, "step": 1996 }, { "epoch": 1.6572614107883816, "grad_norm": 14.052865028381348, "learning_rate": 1.9338423236514524e-05, "loss": 2.5404, "step": 1997 }, { "epoch": 1.6580912863070538, "grad_norm": 9.707562446594238, "learning_rate": 1.9338091286307056e-05, "loss": 1.4356, "step": 1998 }, { "epoch": 1.658921161825726, "grad_norm": 11.769050598144531, "learning_rate": 1.9337759336099588e-05, "loss": 1.4869, "step": 1999 }, { "epoch": 1.6597510373443982, "grad_norm": 10.479175567626953, "learning_rate": 1.9337427385892117e-05, "loss": 1.5569, "step": 2000 }, { "epoch": 1.6605809128630704, "grad_norm": 10.506841659545898, "learning_rate": 1.933709543568465e-05, "loss": 1.9433, "step": 2001 }, { "epoch": 1.6614107883817426, "grad_norm": 7.845117092132568, "learning_rate": 1.933676348547718e-05, "loss": 1.9338, "step": 2002 }, { "epoch": 1.6622406639004148, "grad_norm": 9.206958770751953, "learning_rate": 1.933643153526971e-05, "loss": 1.9058, "step": 2003 }, { "epoch": 1.663070539419087, "grad_norm": 9.74509048461914, "learning_rate": 1.933609958506224e-05, "loss": 1.2397, "step": 2004 }, { "epoch": 1.6639004149377592, "grad_norm": 9.954602241516113, "learning_rate": 1.9335767634854774e-05, "loss": 1.248, "step": 2005 }, { "epoch": 1.6647302904564314, "grad_norm": 7.565554141998291, "learning_rate": 1.9335435684647302e-05, "loss": 1.5049, "step": 2006 }, { "epoch": 1.6655601659751036, "grad_norm": 5.964968681335449, "learning_rate": 1.9335103734439835e-05, "loss": 1.665, "step": 2007 }, { "epoch": 1.6663900414937758, "grad_norm": 10.4768705368042, "learning_rate": 1.9334771784232367e-05, "loss": 1.5476, "step": 2008 }, { "epoch": 1.667219917012448, "grad_norm": 8.903473854064941, "learning_rate": 1.93344398340249e-05, "loss": 1.001, "step": 2009 }, { "epoch": 1.6680497925311202, "grad_norm": 6.650935173034668, "learning_rate": 1.9334107883817427e-05, "loss": 1.7559, "step": 2010 }, { "epoch": 1.6688796680497924, "grad_norm": 11.772870063781738, "learning_rate": 1.933377593360996e-05, "loss": 2.1216, "step": 2011 }, { "epoch": 1.6697095435684646, "grad_norm": 10.366384506225586, "learning_rate": 1.933344398340249e-05, "loss": 1.6338, "step": 2012 }, { "epoch": 1.6705394190871368, "grad_norm": 11.715922355651855, "learning_rate": 1.9333112033195024e-05, "loss": 1.2025, "step": 2013 }, { "epoch": 1.671369294605809, "grad_norm": 8.869370460510254, "learning_rate": 1.9332780082987556e-05, "loss": 0.8383, "step": 2014 }, { "epoch": 1.6721991701244814, "grad_norm": 8.169434547424316, "learning_rate": 1.9332448132780085e-05, "loss": 1.2909, "step": 2015 }, { "epoch": 1.6730290456431536, "grad_norm": 8.44085693359375, "learning_rate": 1.9332116182572617e-05, "loss": 1.6102, "step": 2016 }, { "epoch": 1.6738589211618258, "grad_norm": 9.376835823059082, "learning_rate": 1.933178423236515e-05, "loss": 2.0948, "step": 2017 }, { "epoch": 1.674688796680498, "grad_norm": 11.370725631713867, "learning_rate": 1.9331452282157678e-05, "loss": 1.948, "step": 2018 }, { "epoch": 1.6755186721991702, "grad_norm": 9.061299324035645, "learning_rate": 1.933112033195021e-05, "loss": 1.3307, "step": 2019 }, { "epoch": 1.6763485477178424, "grad_norm": 11.061910629272461, "learning_rate": 1.933078838174274e-05, "loss": 1.6156, "step": 2020 }, { "epoch": 1.6771784232365146, "grad_norm": 7.564661026000977, "learning_rate": 1.933045643153527e-05, "loss": 1.624, "step": 2021 }, { "epoch": 1.6780082987551868, "grad_norm": 14.919791221618652, "learning_rate": 1.9330124481327803e-05, "loss": 2.0639, "step": 2022 }, { "epoch": 1.678838174273859, "grad_norm": 6.6976776123046875, "learning_rate": 1.932979253112033e-05, "loss": 1.3554, "step": 2023 }, { "epoch": 1.6796680497925311, "grad_norm": 9.200414657592773, "learning_rate": 1.9329460580912863e-05, "loss": 1.3943, "step": 2024 }, { "epoch": 1.6804979253112033, "grad_norm": 10.987116813659668, "learning_rate": 1.9329128630705396e-05, "loss": 1.4646, "step": 2025 }, { "epoch": 1.6813278008298755, "grad_norm": 10.647331237792969, "learning_rate": 1.9328796680497928e-05, "loss": 1.5755, "step": 2026 }, { "epoch": 1.6821576763485477, "grad_norm": 7.192817211151123, "learning_rate": 1.9328464730290456e-05, "loss": 1.2571, "step": 2027 }, { "epoch": 1.68298755186722, "grad_norm": 14.820597648620605, "learning_rate": 1.932813278008299e-05, "loss": 1.5891, "step": 2028 }, { "epoch": 1.6838174273858921, "grad_norm": 8.305025100708008, "learning_rate": 1.932780082987552e-05, "loss": 1.2731, "step": 2029 }, { "epoch": 1.6846473029045643, "grad_norm": 8.75129222869873, "learning_rate": 1.9327468879668053e-05, "loss": 2.4534, "step": 2030 }, { "epoch": 1.6854771784232365, "grad_norm": 10.764566421508789, "learning_rate": 1.932713692946058e-05, "loss": 2.6982, "step": 2031 }, { "epoch": 1.6863070539419087, "grad_norm": 8.528451919555664, "learning_rate": 1.9326804979253113e-05, "loss": 1.5724, "step": 2032 }, { "epoch": 1.687136929460581, "grad_norm": 12.625093460083008, "learning_rate": 1.9326473029045646e-05, "loss": 1.6949, "step": 2033 }, { "epoch": 1.687966804979253, "grad_norm": 13.911087036132812, "learning_rate": 1.9326141078838178e-05, "loss": 1.9618, "step": 2034 }, { "epoch": 1.6887966804979253, "grad_norm": 8.149459838867188, "learning_rate": 1.9325809128630706e-05, "loss": 1.3765, "step": 2035 }, { "epoch": 1.6896265560165975, "grad_norm": 7.353023529052734, "learning_rate": 1.932547717842324e-05, "loss": 1.0878, "step": 2036 }, { "epoch": 1.6904564315352697, "grad_norm": 9.106232643127441, "learning_rate": 1.932514522821577e-05, "loss": 1.8786, "step": 2037 }, { "epoch": 1.691286307053942, "grad_norm": 8.337231636047363, "learning_rate": 1.93248132780083e-05, "loss": 1.6511, "step": 2038 }, { "epoch": 1.692116182572614, "grad_norm": 8.878396034240723, "learning_rate": 1.932448132780083e-05, "loss": 1.5387, "step": 2039 }, { "epoch": 1.6929460580912863, "grad_norm": 11.581090927124023, "learning_rate": 1.9324149377593364e-05, "loss": 2.218, "step": 2040 }, { "epoch": 1.6937759336099585, "grad_norm": 8.966328620910645, "learning_rate": 1.9323817427385892e-05, "loss": 1.4249, "step": 2041 }, { "epoch": 1.6946058091286307, "grad_norm": 8.34956169128418, "learning_rate": 1.9323485477178424e-05, "loss": 1.198, "step": 2042 }, { "epoch": 1.6954356846473029, "grad_norm": 11.627195358276367, "learning_rate": 1.9323153526970957e-05, "loss": 1.8906, "step": 2043 }, { "epoch": 1.696265560165975, "grad_norm": 9.532196044921875, "learning_rate": 1.9322821576763485e-05, "loss": 1.6163, "step": 2044 }, { "epoch": 1.6970954356846473, "grad_norm": 13.279045104980469, "learning_rate": 1.9322489626556017e-05, "loss": 1.9572, "step": 2045 }, { "epoch": 1.6979253112033195, "grad_norm": 16.72869300842285, "learning_rate": 1.932215767634855e-05, "loss": 2.6677, "step": 2046 }, { "epoch": 1.6987551867219917, "grad_norm": 7.00724983215332, "learning_rate": 1.932182572614108e-05, "loss": 1.7003, "step": 2047 }, { "epoch": 1.6995850622406639, "grad_norm": 12.62999153137207, "learning_rate": 1.932149377593361e-05, "loss": 1.4061, "step": 2048 }, { "epoch": 1.700414937759336, "grad_norm": 7.231219291687012, "learning_rate": 1.9321161825726142e-05, "loss": 1.4838, "step": 2049 }, { "epoch": 1.7012448132780082, "grad_norm": 10.233558654785156, "learning_rate": 1.9320829875518674e-05, "loss": 2.2217, "step": 2050 }, { "epoch": 1.7020746887966804, "grad_norm": 10.175472259521484, "learning_rate": 1.9320497925311207e-05, "loss": 1.7338, "step": 2051 }, { "epoch": 1.7029045643153526, "grad_norm": 11.400713920593262, "learning_rate": 1.9320165975103735e-05, "loss": 2.4557, "step": 2052 }, { "epoch": 1.703734439834025, "grad_norm": 14.399995803833008, "learning_rate": 1.9319834024896267e-05, "loss": 1.4394, "step": 2053 }, { "epoch": 1.7045643153526973, "grad_norm": 9.315067291259766, "learning_rate": 1.93195020746888e-05, "loss": 1.5652, "step": 2054 }, { "epoch": 1.7053941908713695, "grad_norm": 14.176732063293457, "learning_rate": 1.931917012448133e-05, "loss": 1.4727, "step": 2055 }, { "epoch": 1.7062240663900416, "grad_norm": 10.24250602722168, "learning_rate": 1.931883817427386e-05, "loss": 1.2484, "step": 2056 }, { "epoch": 1.7070539419087138, "grad_norm": 8.336776733398438, "learning_rate": 1.9318506224066392e-05, "loss": 1.865, "step": 2057 }, { "epoch": 1.707883817427386, "grad_norm": 10.94306468963623, "learning_rate": 1.931817427385892e-05, "loss": 1.1788, "step": 2058 }, { "epoch": 1.7087136929460582, "grad_norm": 10.29683780670166, "learning_rate": 1.9317842323651453e-05, "loss": 1.9471, "step": 2059 }, { "epoch": 1.7095435684647304, "grad_norm": 11.303812980651855, "learning_rate": 1.9317510373443985e-05, "loss": 1.6846, "step": 2060 }, { "epoch": 1.7103734439834026, "grad_norm": 10.321022987365723, "learning_rate": 1.9317178423236514e-05, "loss": 1.8886, "step": 2061 }, { "epoch": 1.7112033195020748, "grad_norm": 8.579574584960938, "learning_rate": 1.9316846473029046e-05, "loss": 1.6806, "step": 2062 }, { "epoch": 1.712033195020747, "grad_norm": 14.556415557861328, "learning_rate": 1.931651452282158e-05, "loss": 1.4412, "step": 2063 }, { "epoch": 1.7128630705394192, "grad_norm": 8.918317794799805, "learning_rate": 1.931618257261411e-05, "loss": 1.4134, "step": 2064 }, { "epoch": 1.7136929460580914, "grad_norm": 8.974635124206543, "learning_rate": 1.931585062240664e-05, "loss": 1.22, "step": 2065 }, { "epoch": 1.7145228215767636, "grad_norm": 5.481673240661621, "learning_rate": 1.931551867219917e-05, "loss": 1.3008, "step": 2066 }, { "epoch": 1.7153526970954358, "grad_norm": 7.9867472648620605, "learning_rate": 1.9315186721991703e-05, "loss": 1.3853, "step": 2067 }, { "epoch": 1.716182572614108, "grad_norm": 6.328382968902588, "learning_rate": 1.9314854771784235e-05, "loss": 1.7536, "step": 2068 }, { "epoch": 1.7170124481327802, "grad_norm": 10.753763198852539, "learning_rate": 1.9314522821576764e-05, "loss": 1.981, "step": 2069 }, { "epoch": 1.7178423236514524, "grad_norm": 9.318131446838379, "learning_rate": 1.9314190871369296e-05, "loss": 1.8396, "step": 2070 }, { "epoch": 1.7186721991701246, "grad_norm": 14.660786628723145, "learning_rate": 1.931385892116183e-05, "loss": 1.7678, "step": 2071 }, { "epoch": 1.7195020746887968, "grad_norm": 8.769526481628418, "learning_rate": 1.931352697095436e-05, "loss": 1.0047, "step": 2072 }, { "epoch": 1.720331950207469, "grad_norm": 9.051626205444336, "learning_rate": 1.931319502074689e-05, "loss": 1.6014, "step": 2073 }, { "epoch": 1.7211618257261412, "grad_norm": 9.615530014038086, "learning_rate": 1.931286307053942e-05, "loss": 1.3076, "step": 2074 }, { "epoch": 1.7219917012448134, "grad_norm": 10.390085220336914, "learning_rate": 1.9312531120331953e-05, "loss": 1.4468, "step": 2075 }, { "epoch": 1.7228215767634856, "grad_norm": 8.861926078796387, "learning_rate": 1.9312199170124482e-05, "loss": 1.6047, "step": 2076 }, { "epoch": 1.7236514522821578, "grad_norm": 8.368080139160156, "learning_rate": 1.9311867219917014e-05, "loss": 1.8209, "step": 2077 }, { "epoch": 1.72448132780083, "grad_norm": 8.51073169708252, "learning_rate": 1.9311535269709546e-05, "loss": 1.404, "step": 2078 }, { "epoch": 1.7253112033195022, "grad_norm": 8.483341217041016, "learning_rate": 1.9311203319502075e-05, "loss": 1.4707, "step": 2079 }, { "epoch": 1.7261410788381744, "grad_norm": 9.660340309143066, "learning_rate": 1.9310871369294607e-05, "loss": 1.2768, "step": 2080 }, { "epoch": 1.7269709543568466, "grad_norm": 11.41272258758545, "learning_rate": 1.9310539419087136e-05, "loss": 1.57, "step": 2081 }, { "epoch": 1.7278008298755188, "grad_norm": 11.147222518920898, "learning_rate": 1.9310207468879668e-05, "loss": 1.1693, "step": 2082 }, { "epoch": 1.728630705394191, "grad_norm": 12.118568420410156, "learning_rate": 1.93098755186722e-05, "loss": 1.2602, "step": 2083 }, { "epoch": 1.7294605809128631, "grad_norm": 8.742721557617188, "learning_rate": 1.9309543568464732e-05, "loss": 1.1974, "step": 2084 }, { "epoch": 1.7302904564315353, "grad_norm": 7.282506465911865, "learning_rate": 1.930921161825726e-05, "loss": 0.8336, "step": 2085 }, { "epoch": 1.7311203319502075, "grad_norm": 12.70893669128418, "learning_rate": 1.9308879668049793e-05, "loss": 1.9755, "step": 2086 }, { "epoch": 1.7319502074688797, "grad_norm": 11.09173583984375, "learning_rate": 1.9308547717842325e-05, "loss": 1.6822, "step": 2087 }, { "epoch": 1.732780082987552, "grad_norm": 8.763065338134766, "learning_rate": 1.9308215767634857e-05, "loss": 1.1394, "step": 2088 }, { "epoch": 1.7336099585062241, "grad_norm": 7.901734828948975, "learning_rate": 1.9307883817427386e-05, "loss": 0.7784, "step": 2089 }, { "epoch": 1.7344398340248963, "grad_norm": 9.459686279296875, "learning_rate": 1.9307551867219918e-05, "loss": 1.4903, "step": 2090 }, { "epoch": 1.7352697095435685, "grad_norm": 9.869329452514648, "learning_rate": 1.930721991701245e-05, "loss": 2.4045, "step": 2091 }, { "epoch": 1.7360995850622407, "grad_norm": 10.727017402648926, "learning_rate": 1.9306887966804982e-05, "loss": 1.9385, "step": 2092 }, { "epoch": 1.736929460580913, "grad_norm": 6.519128799438477, "learning_rate": 1.9306556016597514e-05, "loss": 1.0283, "step": 2093 }, { "epoch": 1.737759336099585, "grad_norm": 10.385659217834473, "learning_rate": 1.9306224066390043e-05, "loss": 1.1252, "step": 2094 }, { "epoch": 1.7385892116182573, "grad_norm": 5.967244625091553, "learning_rate": 1.9305892116182575e-05, "loss": 1.2834, "step": 2095 }, { "epoch": 1.7394190871369295, "grad_norm": 8.356549263000488, "learning_rate": 1.9305560165975107e-05, "loss": 1.7375, "step": 2096 }, { "epoch": 1.7402489626556017, "grad_norm": 9.785962104797363, "learning_rate": 1.9305228215767636e-05, "loss": 1.7032, "step": 2097 }, { "epoch": 1.741078838174274, "grad_norm": 8.771713256835938, "learning_rate": 1.9304896265560168e-05, "loss": 1.259, "step": 2098 }, { "epoch": 1.741908713692946, "grad_norm": 9.122869491577148, "learning_rate": 1.9304564315352697e-05, "loss": 1.4438, "step": 2099 }, { "epoch": 1.7427385892116183, "grad_norm": 12.009428977966309, "learning_rate": 1.930423236514523e-05, "loss": 1.6691, "step": 2100 }, { "epoch": 1.7435684647302905, "grad_norm": 10.286236763000488, "learning_rate": 1.930390041493776e-05, "loss": 2.3869, "step": 2101 }, { "epoch": 1.7443983402489627, "grad_norm": 14.147066116333008, "learning_rate": 1.930356846473029e-05, "loss": 1.158, "step": 2102 }, { "epoch": 1.7452282157676349, "grad_norm": 10.09622859954834, "learning_rate": 1.9303236514522822e-05, "loss": 2.0363, "step": 2103 }, { "epoch": 1.746058091286307, "grad_norm": 10.496744155883789, "learning_rate": 1.9302904564315354e-05, "loss": 1.9517, "step": 2104 }, { "epoch": 1.7468879668049793, "grad_norm": 7.9813032150268555, "learning_rate": 1.9302572614107886e-05, "loss": 1.8876, "step": 2105 }, { "epoch": 1.7477178423236515, "grad_norm": 9.822171211242676, "learning_rate": 1.9302240663900415e-05, "loss": 2.5616, "step": 2106 }, { "epoch": 1.7485477178423237, "grad_norm": 9.599821090698242, "learning_rate": 1.9301908713692947e-05, "loss": 1.3474, "step": 2107 }, { "epoch": 1.7493775933609959, "grad_norm": 8.163154602050781, "learning_rate": 1.930157676348548e-05, "loss": 1.0776, "step": 2108 }, { "epoch": 1.750207468879668, "grad_norm": 9.4437894821167, "learning_rate": 1.930124481327801e-05, "loss": 1.838, "step": 2109 }, { "epoch": 1.7510373443983402, "grad_norm": 11.557877540588379, "learning_rate": 1.930091286307054e-05, "loss": 1.4867, "step": 2110 }, { "epoch": 1.7518672199170124, "grad_norm": 11.02773380279541, "learning_rate": 1.9300580912863072e-05, "loss": 1.4938, "step": 2111 }, { "epoch": 1.7526970954356846, "grad_norm": 7.236415386199951, "learning_rate": 1.9300248962655604e-05, "loss": 1.4917, "step": 2112 }, { "epoch": 1.7535269709543568, "grad_norm": 8.367701530456543, "learning_rate": 1.9299917012448136e-05, "loss": 0.8918, "step": 2113 }, { "epoch": 1.754356846473029, "grad_norm": 7.248497009277344, "learning_rate": 1.9299585062240665e-05, "loss": 1.346, "step": 2114 }, { "epoch": 1.7551867219917012, "grad_norm": 7.85228967666626, "learning_rate": 1.9299253112033197e-05, "loss": 1.3335, "step": 2115 }, { "epoch": 1.7560165975103734, "grad_norm": 10.099919319152832, "learning_rate": 1.929892116182573e-05, "loss": 1.1457, "step": 2116 }, { "epoch": 1.7568464730290456, "grad_norm": 7.462228298187256, "learning_rate": 1.9298589211618258e-05, "loss": 1.3156, "step": 2117 }, { "epoch": 1.7576763485477178, "grad_norm": 7.05635929107666, "learning_rate": 1.929825726141079e-05, "loss": 1.0396, "step": 2118 }, { "epoch": 1.75850622406639, "grad_norm": 9.378667831420898, "learning_rate": 1.9297925311203322e-05, "loss": 0.7795, "step": 2119 }, { "epoch": 1.7593360995850622, "grad_norm": 10.13689136505127, "learning_rate": 1.929759336099585e-05, "loss": 1.0378, "step": 2120 }, { "epoch": 1.7601659751037344, "grad_norm": 7.704498291015625, "learning_rate": 1.9297261410788383e-05, "loss": 1.1031, "step": 2121 }, { "epoch": 1.7609958506224066, "grad_norm": 10.970793724060059, "learning_rate": 1.9296929460580915e-05, "loss": 1.7701, "step": 2122 }, { "epoch": 1.7618257261410788, "grad_norm": 11.227266311645508, "learning_rate": 1.9296597510373444e-05, "loss": 1.174, "step": 2123 }, { "epoch": 1.762655601659751, "grad_norm": 8.570856094360352, "learning_rate": 1.9296265560165976e-05, "loss": 1.2374, "step": 2124 }, { "epoch": 1.7634854771784232, "grad_norm": 9.691099166870117, "learning_rate": 1.9295933609958508e-05, "loss": 1.5277, "step": 2125 }, { "epoch": 1.7643153526970954, "grad_norm": 8.926804542541504, "learning_rate": 1.929560165975104e-05, "loss": 1.1718, "step": 2126 }, { "epoch": 1.7651452282157676, "grad_norm": 11.298043251037598, "learning_rate": 1.929526970954357e-05, "loss": 2.2427, "step": 2127 }, { "epoch": 1.7659751037344398, "grad_norm": 7.744802951812744, "learning_rate": 1.92949377593361e-05, "loss": 1.1782, "step": 2128 }, { "epoch": 1.766804979253112, "grad_norm": 9.524020195007324, "learning_rate": 1.9294605809128633e-05, "loss": 2.0979, "step": 2129 }, { "epoch": 1.7676348547717842, "grad_norm": 8.992040634155273, "learning_rate": 1.9294273858921165e-05, "loss": 1.9791, "step": 2130 }, { "epoch": 1.7684647302904564, "grad_norm": 12.233969688415527, "learning_rate": 1.9293941908713694e-05, "loss": 1.9001, "step": 2131 }, { "epoch": 1.7692946058091286, "grad_norm": 11.165228843688965, "learning_rate": 1.9293609958506226e-05, "loss": 1.266, "step": 2132 }, { "epoch": 1.7701244813278008, "grad_norm": 9.32113265991211, "learning_rate": 1.9293278008298758e-05, "loss": 0.862, "step": 2133 }, { "epoch": 1.770954356846473, "grad_norm": 9.762495994567871, "learning_rate": 1.929294605809129e-05, "loss": 2.0899, "step": 2134 }, { "epoch": 1.7717842323651452, "grad_norm": 7.282179355621338, "learning_rate": 1.929261410788382e-05, "loss": 1.651, "step": 2135 }, { "epoch": 1.7726141078838173, "grad_norm": 10.548526763916016, "learning_rate": 1.929228215767635e-05, "loss": 1.3122, "step": 2136 }, { "epoch": 1.7734439834024895, "grad_norm": 8.919600486755371, "learning_rate": 1.929195020746888e-05, "loss": 1.8575, "step": 2137 }, { "epoch": 1.7742738589211617, "grad_norm": 9.551130294799805, "learning_rate": 1.9291618257261412e-05, "loss": 1.5919, "step": 2138 }, { "epoch": 1.775103734439834, "grad_norm": 8.767273902893066, "learning_rate": 1.9291286307053944e-05, "loss": 1.4719, "step": 2139 }, { "epoch": 1.7759336099585061, "grad_norm": 11.34361457824707, "learning_rate": 1.9290954356846473e-05, "loss": 1.4933, "step": 2140 }, { "epoch": 1.7767634854771783, "grad_norm": 9.553133964538574, "learning_rate": 1.9290622406639005e-05, "loss": 1.376, "step": 2141 }, { "epoch": 1.7775933609958505, "grad_norm": 14.422611236572266, "learning_rate": 1.9290290456431537e-05, "loss": 1.5553, "step": 2142 }, { "epoch": 1.7784232365145227, "grad_norm": 9.957442283630371, "learning_rate": 1.928995850622407e-05, "loss": 1.5787, "step": 2143 }, { "epoch": 1.779253112033195, "grad_norm": 10.227676391601562, "learning_rate": 1.9289626556016598e-05, "loss": 1.9787, "step": 2144 }, { "epoch": 1.7800829875518671, "grad_norm": 7.3123321533203125, "learning_rate": 1.928929460580913e-05, "loss": 0.7373, "step": 2145 }, { "epoch": 1.7809128630705393, "grad_norm": 12.067768096923828, "learning_rate": 1.9288962655601662e-05, "loss": 1.0342, "step": 2146 }, { "epoch": 1.7817427385892115, "grad_norm": 11.213293075561523, "learning_rate": 1.9288630705394194e-05, "loss": 1.4322, "step": 2147 }, { "epoch": 1.7825726141078837, "grad_norm": 10.249102592468262, "learning_rate": 1.9288298755186723e-05, "loss": 1.2263, "step": 2148 }, { "epoch": 1.783402489626556, "grad_norm": 13.349039077758789, "learning_rate": 1.9287966804979255e-05, "loss": 2.1468, "step": 2149 }, { "epoch": 1.784232365145228, "grad_norm": 8.267746925354004, "learning_rate": 1.9287634854771787e-05, "loss": 2.1351, "step": 2150 }, { "epoch": 1.7850622406639003, "grad_norm": 11.316544532775879, "learning_rate": 1.928730290456432e-05, "loss": 1.6091, "step": 2151 }, { "epoch": 1.7858921161825725, "grad_norm": 8.49838924407959, "learning_rate": 1.9286970954356848e-05, "loss": 1.1356, "step": 2152 }, { "epoch": 1.7867219917012447, "grad_norm": 7.746708869934082, "learning_rate": 1.928663900414938e-05, "loss": 1.6905, "step": 2153 }, { "epoch": 1.7875518672199169, "grad_norm": 9.092689514160156, "learning_rate": 1.9286307053941912e-05, "loss": 1.1867, "step": 2154 }, { "epoch": 1.788381742738589, "grad_norm": 8.432912826538086, "learning_rate": 1.928597510373444e-05, "loss": 1.5841, "step": 2155 }, { "epoch": 1.7892116182572613, "grad_norm": 7.540434837341309, "learning_rate": 1.9285643153526973e-05, "loss": 1.2479, "step": 2156 }, { "epoch": 1.7900414937759335, "grad_norm": 6.548765659332275, "learning_rate": 1.9285311203319505e-05, "loss": 0.8841, "step": 2157 }, { "epoch": 1.7908713692946057, "grad_norm": 6.19071626663208, "learning_rate": 1.9284979253112034e-05, "loss": 0.8888, "step": 2158 }, { "epoch": 1.7917012448132779, "grad_norm": 11.7321138381958, "learning_rate": 1.9284647302904566e-05, "loss": 1.4688, "step": 2159 }, { "epoch": 1.79253112033195, "grad_norm": 9.409035682678223, "learning_rate": 1.9284315352697095e-05, "loss": 0.9735, "step": 2160 }, { "epoch": 1.7933609958506223, "grad_norm": 7.621585845947266, "learning_rate": 1.9283983402489627e-05, "loss": 1.2526, "step": 2161 }, { "epoch": 1.7941908713692944, "grad_norm": 6.920598030090332, "learning_rate": 1.928365145228216e-05, "loss": 1.5663, "step": 2162 }, { "epoch": 1.7950207468879666, "grad_norm": 8.012588500976562, "learning_rate": 1.928331950207469e-05, "loss": 1.29, "step": 2163 }, { "epoch": 1.7958506224066388, "grad_norm": 7.878469944000244, "learning_rate": 1.928298755186722e-05, "loss": 1.6398, "step": 2164 }, { "epoch": 1.796680497925311, "grad_norm": 8.105186462402344, "learning_rate": 1.928265560165975e-05, "loss": 1.7281, "step": 2165 }, { "epoch": 1.7975103734439835, "grad_norm": 13.06432819366455, "learning_rate": 1.9282323651452284e-05, "loss": 1.4966, "step": 2166 }, { "epoch": 1.7983402489626557, "grad_norm": 8.314264297485352, "learning_rate": 1.9281991701244816e-05, "loss": 1.4235, "step": 2167 }, { "epoch": 1.7991701244813278, "grad_norm": 8.537186622619629, "learning_rate": 1.9281659751037345e-05, "loss": 1.0795, "step": 2168 }, { "epoch": 1.8, "grad_norm": 18.544973373413086, "learning_rate": 1.9281327800829877e-05, "loss": 2.5859, "step": 2169 }, { "epoch": 1.8008298755186722, "grad_norm": 7.787558555603027, "learning_rate": 1.928099585062241e-05, "loss": 1.5835, "step": 2170 }, { "epoch": 1.8016597510373444, "grad_norm": 7.184789657592773, "learning_rate": 1.928066390041494e-05, "loss": 1.7016, "step": 2171 }, { "epoch": 1.8024896265560166, "grad_norm": 9.430644035339355, "learning_rate": 1.9280331950207473e-05, "loss": 1.2649, "step": 2172 }, { "epoch": 1.8033195020746888, "grad_norm": 10.365245819091797, "learning_rate": 1.9280000000000002e-05, "loss": 1.7213, "step": 2173 }, { "epoch": 1.804149377593361, "grad_norm": 8.624671936035156, "learning_rate": 1.9279668049792534e-05, "loss": 1.9463, "step": 2174 }, { "epoch": 1.8049792531120332, "grad_norm": 7.8775858879089355, "learning_rate": 1.9279336099585063e-05, "loss": 1.9007, "step": 2175 }, { "epoch": 1.8058091286307054, "grad_norm": 8.234537124633789, "learning_rate": 1.9279004149377595e-05, "loss": 1.3017, "step": 2176 }, { "epoch": 1.8066390041493776, "grad_norm": 12.914467811584473, "learning_rate": 1.9278672199170127e-05, "loss": 2.1861, "step": 2177 }, { "epoch": 1.8074688796680498, "grad_norm": 11.663803100585938, "learning_rate": 1.9278340248962656e-05, "loss": 2.007, "step": 2178 }, { "epoch": 1.808298755186722, "grad_norm": 10.955609321594238, "learning_rate": 1.9278008298755188e-05, "loss": 1.5653, "step": 2179 }, { "epoch": 1.8091286307053942, "grad_norm": 6.659911632537842, "learning_rate": 1.927767634854772e-05, "loss": 1.1794, "step": 2180 }, { "epoch": 1.8099585062240664, "grad_norm": 8.41240119934082, "learning_rate": 1.927734439834025e-05, "loss": 1.2423, "step": 2181 }, { "epoch": 1.8107883817427386, "grad_norm": 8.295307159423828, "learning_rate": 1.927701244813278e-05, "loss": 1.1898, "step": 2182 }, { "epoch": 1.8116182572614108, "grad_norm": 8.267765998840332, "learning_rate": 1.9276680497925313e-05, "loss": 0.8202, "step": 2183 }, { "epoch": 1.812448132780083, "grad_norm": 9.372530937194824, "learning_rate": 1.9276348547717845e-05, "loss": 1.2239, "step": 2184 }, { "epoch": 1.8132780082987552, "grad_norm": 7.203227519989014, "learning_rate": 1.9276016597510373e-05, "loss": 1.0474, "step": 2185 }, { "epoch": 1.8141078838174274, "grad_norm": 12.838705062866211, "learning_rate": 1.9275684647302906e-05, "loss": 1.6305, "step": 2186 }, { "epoch": 1.8149377593360996, "grad_norm": 10.85132122039795, "learning_rate": 1.9275352697095438e-05, "loss": 1.5687, "step": 2187 }, { "epoch": 1.8157676348547718, "grad_norm": 9.057552337646484, "learning_rate": 1.927502074688797e-05, "loss": 1.218, "step": 2188 }, { "epoch": 1.816597510373444, "grad_norm": 8.299690246582031, "learning_rate": 1.92746887966805e-05, "loss": 1.1969, "step": 2189 }, { "epoch": 1.8174273858921162, "grad_norm": 8.256765365600586, "learning_rate": 1.927435684647303e-05, "loss": 1.3761, "step": 2190 }, { "epoch": 1.8182572614107884, "grad_norm": 14.869793891906738, "learning_rate": 1.9274024896265563e-05, "loss": 1.6435, "step": 2191 }, { "epoch": 1.8190871369294606, "grad_norm": 10.511138916015625, "learning_rate": 1.9273692946058095e-05, "loss": 1.5049, "step": 2192 }, { "epoch": 1.8199170124481328, "grad_norm": 13.941792488098145, "learning_rate": 1.9273360995850624e-05, "loss": 0.9623, "step": 2193 }, { "epoch": 1.820746887966805, "grad_norm": 9.33469009399414, "learning_rate": 1.9273029045643156e-05, "loss": 1.7238, "step": 2194 }, { "epoch": 1.8215767634854771, "grad_norm": 9.592551231384277, "learning_rate": 1.9272697095435688e-05, "loss": 1.8717, "step": 2195 }, { "epoch": 1.8224066390041493, "grad_norm": 13.92823600769043, "learning_rate": 1.9272365145228217e-05, "loss": 1.241, "step": 2196 }, { "epoch": 1.8232365145228215, "grad_norm": 8.72945499420166, "learning_rate": 1.927203319502075e-05, "loss": 1.2738, "step": 2197 }, { "epoch": 1.8240663900414937, "grad_norm": 10.134559631347656, "learning_rate": 1.9271701244813277e-05, "loss": 1.2227, "step": 2198 }, { "epoch": 1.824896265560166, "grad_norm": 10.694923400878906, "learning_rate": 1.927136929460581e-05, "loss": 1.9162, "step": 2199 }, { "epoch": 1.8257261410788381, "grad_norm": 9.409418106079102, "learning_rate": 1.927103734439834e-05, "loss": 2.1395, "step": 2200 }, { "epoch": 1.8265560165975103, "grad_norm": 5.694802284240723, "learning_rate": 1.9270705394190874e-05, "loss": 1.0867, "step": 2201 }, { "epoch": 1.8273858921161825, "grad_norm": 7.0895256996154785, "learning_rate": 1.9270373443983402e-05, "loss": 1.2225, "step": 2202 }, { "epoch": 1.828215767634855, "grad_norm": 7.898608207702637, "learning_rate": 1.9270041493775934e-05, "loss": 1.424, "step": 2203 }, { "epoch": 1.8290456431535271, "grad_norm": 13.49325942993164, "learning_rate": 1.9269709543568467e-05, "loss": 2.098, "step": 2204 }, { "epoch": 1.8298755186721993, "grad_norm": 15.317045211791992, "learning_rate": 1.9269377593361e-05, "loss": 1.5649, "step": 2205 }, { "epoch": 1.8307053941908715, "grad_norm": 8.108298301696777, "learning_rate": 1.9269045643153527e-05, "loss": 1.6145, "step": 2206 }, { "epoch": 1.8315352697095437, "grad_norm": 7.8493123054504395, "learning_rate": 1.926871369294606e-05, "loss": 1.2988, "step": 2207 }, { "epoch": 1.832365145228216, "grad_norm": 16.971529006958008, "learning_rate": 1.926838174273859e-05, "loss": 1.8805, "step": 2208 }, { "epoch": 1.8331950207468881, "grad_norm": 7.856917381286621, "learning_rate": 1.9268049792531124e-05, "loss": 1.7747, "step": 2209 }, { "epoch": 1.8340248962655603, "grad_norm": 9.513200759887695, "learning_rate": 1.9267717842323652e-05, "loss": 1.8996, "step": 2210 }, { "epoch": 1.8348547717842325, "grad_norm": 7.430887699127197, "learning_rate": 1.9267385892116185e-05, "loss": 1.3898, "step": 2211 }, { "epoch": 1.8356846473029047, "grad_norm": 13.916132926940918, "learning_rate": 1.9267053941908717e-05, "loss": 1.9482, "step": 2212 }, { "epoch": 1.836514522821577, "grad_norm": 10.29243278503418, "learning_rate": 1.926672199170125e-05, "loss": 1.9598, "step": 2213 }, { "epoch": 1.837344398340249, "grad_norm": 12.055341720581055, "learning_rate": 1.9266390041493778e-05, "loss": 1.6093, "step": 2214 }, { "epoch": 1.8381742738589213, "grad_norm": 12.209753036499023, "learning_rate": 1.926605809128631e-05, "loss": 1.4246, "step": 2215 }, { "epoch": 1.8390041493775935, "grad_norm": 6.500247478485107, "learning_rate": 1.926572614107884e-05, "loss": 1.4973, "step": 2216 }, { "epoch": 1.8398340248962657, "grad_norm": 6.517870903015137, "learning_rate": 1.926539419087137e-05, "loss": 0.9348, "step": 2217 }, { "epoch": 1.8406639004149379, "grad_norm": 11.031837463378906, "learning_rate": 1.9265062240663903e-05, "loss": 1.1927, "step": 2218 }, { "epoch": 1.84149377593361, "grad_norm": 9.521615982055664, "learning_rate": 1.926473029045643e-05, "loss": 1.1135, "step": 2219 }, { "epoch": 1.8423236514522823, "grad_norm": 6.737220764160156, "learning_rate": 1.9264398340248963e-05, "loss": 1.0494, "step": 2220 }, { "epoch": 1.8431535269709545, "grad_norm": 12.233875274658203, "learning_rate": 1.9264066390041495e-05, "loss": 1.62, "step": 2221 }, { "epoch": 1.8439834024896267, "grad_norm": 14.895808219909668, "learning_rate": 1.9263734439834028e-05, "loss": 1.612, "step": 2222 }, { "epoch": 1.8448132780082989, "grad_norm": 7.557332992553711, "learning_rate": 1.9263402489626556e-05, "loss": 1.4051, "step": 2223 }, { "epoch": 1.845643153526971, "grad_norm": 10.522257804870605, "learning_rate": 1.926307053941909e-05, "loss": 1.585, "step": 2224 }, { "epoch": 1.8464730290456433, "grad_norm": 13.376199722290039, "learning_rate": 1.926273858921162e-05, "loss": 1.6103, "step": 2225 }, { "epoch": 1.8473029045643155, "grad_norm": 9.338712692260742, "learning_rate": 1.9262406639004153e-05, "loss": 1.1227, "step": 2226 }, { "epoch": 1.8481327800829876, "grad_norm": 9.2010498046875, "learning_rate": 1.926207468879668e-05, "loss": 1.0906, "step": 2227 }, { "epoch": 1.8489626556016598, "grad_norm": 9.129615783691406, "learning_rate": 1.9261742738589213e-05, "loss": 1.3987, "step": 2228 }, { "epoch": 1.849792531120332, "grad_norm": 11.310952186584473, "learning_rate": 1.9261410788381746e-05, "loss": 1.7376, "step": 2229 }, { "epoch": 1.8506224066390042, "grad_norm": 11.040858268737793, "learning_rate": 1.9261078838174278e-05, "loss": 1.0938, "step": 2230 }, { "epoch": 1.8514522821576764, "grad_norm": 7.92012882232666, "learning_rate": 1.9260746887966806e-05, "loss": 1.4712, "step": 2231 }, { "epoch": 1.8522821576763486, "grad_norm": 6.296175479888916, "learning_rate": 1.926041493775934e-05, "loss": 1.0531, "step": 2232 }, { "epoch": 1.8531120331950208, "grad_norm": 9.037479400634766, "learning_rate": 1.926008298755187e-05, "loss": 1.3266, "step": 2233 }, { "epoch": 1.853941908713693, "grad_norm": 6.9732208251953125, "learning_rate": 1.92597510373444e-05, "loss": 0.797, "step": 2234 }, { "epoch": 1.8547717842323652, "grad_norm": 9.27695083618164, "learning_rate": 1.925941908713693e-05, "loss": 1.3087, "step": 2235 }, { "epoch": 1.8556016597510374, "grad_norm": 14.641132354736328, "learning_rate": 1.9259087136929464e-05, "loss": 2.8319, "step": 2236 }, { "epoch": 1.8564315352697096, "grad_norm": 9.006206512451172, "learning_rate": 1.9258755186721992e-05, "loss": 1.8349, "step": 2237 }, { "epoch": 1.8572614107883818, "grad_norm": 14.053860664367676, "learning_rate": 1.9258423236514524e-05, "loss": 1.6672, "step": 2238 }, { "epoch": 1.858091286307054, "grad_norm": 7.722374439239502, "learning_rate": 1.9258091286307053e-05, "loss": 1.3384, "step": 2239 }, { "epoch": 1.8589211618257262, "grad_norm": 11.690129280090332, "learning_rate": 1.9257759336099585e-05, "loss": 1.6574, "step": 2240 }, { "epoch": 1.8597510373443984, "grad_norm": 8.391144752502441, "learning_rate": 1.9257427385892117e-05, "loss": 1.8114, "step": 2241 }, { "epoch": 1.8605809128630706, "grad_norm": 14.190875053405762, "learning_rate": 1.925709543568465e-05, "loss": 1.1298, "step": 2242 }, { "epoch": 1.8614107883817428, "grad_norm": 9.867246627807617, "learning_rate": 1.9256763485477178e-05, "loss": 0.8749, "step": 2243 }, { "epoch": 1.862240663900415, "grad_norm": 7.01045036315918, "learning_rate": 1.925643153526971e-05, "loss": 1.5424, "step": 2244 }, { "epoch": 1.8630705394190872, "grad_norm": 12.546241760253906, "learning_rate": 1.9256099585062242e-05, "loss": 1.9549, "step": 2245 }, { "epoch": 1.8639004149377594, "grad_norm": 10.65329360961914, "learning_rate": 1.9255767634854774e-05, "loss": 2.0743, "step": 2246 }, { "epoch": 1.8647302904564316, "grad_norm": 10.80991268157959, "learning_rate": 1.9255435684647303e-05, "loss": 1.9216, "step": 2247 }, { "epoch": 1.8655601659751038, "grad_norm": 9.170760154724121, "learning_rate": 1.9255103734439835e-05, "loss": 1.708, "step": 2248 }, { "epoch": 1.866390041493776, "grad_norm": 11.686246871948242, "learning_rate": 1.9254771784232367e-05, "loss": 1.741, "step": 2249 }, { "epoch": 1.8672199170124482, "grad_norm": 9.451083183288574, "learning_rate": 1.92544398340249e-05, "loss": 1.3951, "step": 2250 }, { "epoch": 1.8680497925311204, "grad_norm": 10.633955955505371, "learning_rate": 1.925410788381743e-05, "loss": 1.4505, "step": 2251 }, { "epoch": 1.8688796680497926, "grad_norm": 7.912226676940918, "learning_rate": 1.925377593360996e-05, "loss": 1.2232, "step": 2252 }, { "epoch": 1.8697095435684647, "grad_norm": 10.269554138183594, "learning_rate": 1.9253443983402492e-05, "loss": 1.6966, "step": 2253 }, { "epoch": 1.870539419087137, "grad_norm": 17.31600570678711, "learning_rate": 1.925311203319502e-05, "loss": 1.3412, "step": 2254 }, { "epoch": 1.8713692946058091, "grad_norm": 16.91676902770996, "learning_rate": 1.9252780082987553e-05, "loss": 1.1935, "step": 2255 }, { "epoch": 1.8721991701244813, "grad_norm": 13.992193222045898, "learning_rate": 1.9252448132780085e-05, "loss": 1.751, "step": 2256 }, { "epoch": 1.8730290456431535, "grad_norm": 6.980785846710205, "learning_rate": 1.9252116182572614e-05, "loss": 1.8007, "step": 2257 }, { "epoch": 1.8738589211618257, "grad_norm": 13.333746910095215, "learning_rate": 1.9251784232365146e-05, "loss": 2.1346, "step": 2258 }, { "epoch": 1.874688796680498, "grad_norm": 9.709553718566895, "learning_rate": 1.9251452282157678e-05, "loss": 1.1639, "step": 2259 }, { "epoch": 1.8755186721991701, "grad_norm": 7.800947666168213, "learning_rate": 1.9251120331950207e-05, "loss": 1.1934, "step": 2260 }, { "epoch": 1.8763485477178423, "grad_norm": 9.975723266601562, "learning_rate": 1.925078838174274e-05, "loss": 1.5556, "step": 2261 }, { "epoch": 1.8771784232365145, "grad_norm": 7.33115291595459, "learning_rate": 1.925045643153527e-05, "loss": 1.7927, "step": 2262 }, { "epoch": 1.8780082987551867, "grad_norm": 9.53356647491455, "learning_rate": 1.9250124481327803e-05, "loss": 1.216, "step": 2263 }, { "epoch": 1.878838174273859, "grad_norm": 10.478517532348633, "learning_rate": 1.9249792531120332e-05, "loss": 1.9417, "step": 2264 }, { "epoch": 1.879668049792531, "grad_norm": 7.298680305480957, "learning_rate": 1.9249460580912864e-05, "loss": 1.4152, "step": 2265 }, { "epoch": 1.8804979253112033, "grad_norm": 8.461395263671875, "learning_rate": 1.9249128630705396e-05, "loss": 1.1654, "step": 2266 }, { "epoch": 1.8813278008298755, "grad_norm": 9.24711799621582, "learning_rate": 1.924879668049793e-05, "loss": 2.0546, "step": 2267 }, { "epoch": 1.8821576763485477, "grad_norm": 7.404140472412109, "learning_rate": 1.9248464730290457e-05, "loss": 1.3816, "step": 2268 }, { "epoch": 1.8829875518672199, "grad_norm": 7.987394332885742, "learning_rate": 1.924813278008299e-05, "loss": 1.2172, "step": 2269 }, { "epoch": 1.883817427385892, "grad_norm": 12.325611114501953, "learning_rate": 1.924780082987552e-05, "loss": 1.3114, "step": 2270 }, { "epoch": 1.8846473029045643, "grad_norm": 6.763222694396973, "learning_rate": 1.9247468879668053e-05, "loss": 1.2578, "step": 2271 }, { "epoch": 1.8854771784232365, "grad_norm": 9.103304862976074, "learning_rate": 1.9247136929460582e-05, "loss": 1.5882, "step": 2272 }, { "epoch": 1.8863070539419087, "grad_norm": 12.36307430267334, "learning_rate": 1.9246804979253114e-05, "loss": 1.353, "step": 2273 }, { "epoch": 1.8871369294605809, "grad_norm": 7.07887601852417, "learning_rate": 1.9246473029045646e-05, "loss": 1.5102, "step": 2274 }, { "epoch": 1.887966804979253, "grad_norm": 10.084634780883789, "learning_rate": 1.9246141078838175e-05, "loss": 1.3361, "step": 2275 }, { "epoch": 1.8887966804979253, "grad_norm": 13.804167747497559, "learning_rate": 1.9245809128630707e-05, "loss": 2.0866, "step": 2276 }, { "epoch": 1.8896265560165975, "grad_norm": 7.274987697601318, "learning_rate": 1.9245477178423236e-05, "loss": 0.7418, "step": 2277 }, { "epoch": 1.8904564315352697, "grad_norm": 13.919780731201172, "learning_rate": 1.9245145228215768e-05, "loss": 1.876, "step": 2278 }, { "epoch": 1.8912863070539419, "grad_norm": 12.396402359008789, "learning_rate": 1.92448132780083e-05, "loss": 2.3571, "step": 2279 }, { "epoch": 1.892116182572614, "grad_norm": 6.6701765060424805, "learning_rate": 1.9244481327800832e-05, "loss": 1.2796, "step": 2280 }, { "epoch": 1.8929460580912862, "grad_norm": 10.230559349060059, "learning_rate": 1.924414937759336e-05, "loss": 1.576, "step": 2281 }, { "epoch": 1.8937759336099584, "grad_norm": 15.330816268920898, "learning_rate": 1.9243817427385893e-05, "loss": 2.3848, "step": 2282 }, { "epoch": 1.8946058091286306, "grad_norm": 10.03857707977295, "learning_rate": 1.9243485477178425e-05, "loss": 2.0699, "step": 2283 }, { "epoch": 1.8954356846473028, "grad_norm": 8.207927703857422, "learning_rate": 1.9243153526970957e-05, "loss": 1.4352, "step": 2284 }, { "epoch": 1.896265560165975, "grad_norm": 8.035945892333984, "learning_rate": 1.9242821576763486e-05, "loss": 1.4996, "step": 2285 }, { "epoch": 1.8970954356846472, "grad_norm": 5.958741664886475, "learning_rate": 1.9242489626556018e-05, "loss": 1.1337, "step": 2286 }, { "epoch": 1.8979253112033194, "grad_norm": 9.391554832458496, "learning_rate": 1.924215767634855e-05, "loss": 1.611, "step": 2287 }, { "epoch": 1.8987551867219916, "grad_norm": 10.86502456665039, "learning_rate": 1.9241825726141082e-05, "loss": 1.3766, "step": 2288 }, { "epoch": 1.8995850622406638, "grad_norm": 18.307016372680664, "learning_rate": 1.924149377593361e-05, "loss": 1.9953, "step": 2289 }, { "epoch": 1.900414937759336, "grad_norm": 9.37975025177002, "learning_rate": 1.9241161825726143e-05, "loss": 1.745, "step": 2290 }, { "epoch": 1.9012448132780082, "grad_norm": 10.642926216125488, "learning_rate": 1.9240829875518675e-05, "loss": 1.5417, "step": 2291 }, { "epoch": 1.9020746887966804, "grad_norm": 7.648264408111572, "learning_rate": 1.9240497925311204e-05, "loss": 0.9617, "step": 2292 }, { "epoch": 1.9029045643153526, "grad_norm": 8.095795631408691, "learning_rate": 1.9240165975103736e-05, "loss": 1.5, "step": 2293 }, { "epoch": 1.9037344398340248, "grad_norm": 9.668135643005371, "learning_rate": 1.9239834024896268e-05, "loss": 0.8648, "step": 2294 }, { "epoch": 1.904564315352697, "grad_norm": 10.628385543823242, "learning_rate": 1.9239502074688797e-05, "loss": 1.361, "step": 2295 }, { "epoch": 1.9053941908713692, "grad_norm": 13.112011909484863, "learning_rate": 1.923917012448133e-05, "loss": 2.4558, "step": 2296 }, { "epoch": 1.9062240663900414, "grad_norm": 8.839573860168457, "learning_rate": 1.923883817427386e-05, "loss": 0.8294, "step": 2297 }, { "epoch": 1.9070539419087136, "grad_norm": 7.602405071258545, "learning_rate": 1.923850622406639e-05, "loss": 1.4632, "step": 2298 }, { "epoch": 1.9078838174273858, "grad_norm": 15.840035438537598, "learning_rate": 1.9238174273858922e-05, "loss": 1.9685, "step": 2299 }, { "epoch": 1.908713692946058, "grad_norm": 10.269492149353027, "learning_rate": 1.9237842323651454e-05, "loss": 1.7075, "step": 2300 }, { "epoch": 1.9095435684647302, "grad_norm": 7.206499099731445, "learning_rate": 1.9237510373443986e-05, "loss": 1.2486, "step": 2301 }, { "epoch": 1.9103734439834024, "grad_norm": 9.208133697509766, "learning_rate": 1.9237178423236515e-05, "loss": 1.889, "step": 2302 }, { "epoch": 1.9112033195020746, "grad_norm": 12.383056640625, "learning_rate": 1.9236846473029047e-05, "loss": 0.8701, "step": 2303 }, { "epoch": 1.9120331950207468, "grad_norm": 8.305276870727539, "learning_rate": 1.923651452282158e-05, "loss": 0.9711, "step": 2304 }, { "epoch": 1.912863070539419, "grad_norm": 14.526252746582031, "learning_rate": 1.923618257261411e-05, "loss": 2.3631, "step": 2305 }, { "epoch": 1.9136929460580911, "grad_norm": 14.089731216430664, "learning_rate": 1.923585062240664e-05, "loss": 1.1692, "step": 2306 }, { "epoch": 1.9145228215767633, "grad_norm": 5.871735572814941, "learning_rate": 1.9235518672199172e-05, "loss": 1.1297, "step": 2307 }, { "epoch": 1.9153526970954355, "grad_norm": 9.810627937316895, "learning_rate": 1.9235186721991704e-05, "loss": 1.355, "step": 2308 }, { "epoch": 1.9161825726141077, "grad_norm": 12.230180740356445, "learning_rate": 1.9234854771784236e-05, "loss": 2.3151, "step": 2309 }, { "epoch": 1.91701244813278, "grad_norm": 14.5304536819458, "learning_rate": 1.9234522821576765e-05, "loss": 1.6545, "step": 2310 }, { "epoch": 1.9178423236514521, "grad_norm": 10.298810958862305, "learning_rate": 1.9234190871369297e-05, "loss": 1.6537, "step": 2311 }, { "epoch": 1.9186721991701243, "grad_norm": 7.481245040893555, "learning_rate": 1.923385892116183e-05, "loss": 1.5228, "step": 2312 }, { "epoch": 1.9195020746887965, "grad_norm": 11.70166301727295, "learning_rate": 1.9233526970954358e-05, "loss": 1.8214, "step": 2313 }, { "epoch": 1.9203319502074687, "grad_norm": 22.586332321166992, "learning_rate": 1.923319502074689e-05, "loss": 2.2095, "step": 2314 }, { "epoch": 1.921161825726141, "grad_norm": 11.395065307617188, "learning_rate": 1.923286307053942e-05, "loss": 2.2111, "step": 2315 }, { "epoch": 1.9219917012448133, "grad_norm": 5.798124313354492, "learning_rate": 1.923253112033195e-05, "loss": 0.999, "step": 2316 }, { "epoch": 1.9228215767634855, "grad_norm": 9.029067993164062, "learning_rate": 1.9232199170124483e-05, "loss": 0.8962, "step": 2317 }, { "epoch": 1.9236514522821577, "grad_norm": 12.466934204101562, "learning_rate": 1.923186721991701e-05, "loss": 2.503, "step": 2318 }, { "epoch": 1.92448132780083, "grad_norm": 9.480273246765137, "learning_rate": 1.9231535269709544e-05, "loss": 1.4075, "step": 2319 }, { "epoch": 1.9253112033195021, "grad_norm": 7.786736011505127, "learning_rate": 1.9231203319502076e-05, "loss": 1.4682, "step": 2320 }, { "epoch": 1.9261410788381743, "grad_norm": 5.162440776824951, "learning_rate": 1.9230871369294608e-05, "loss": 1.4073, "step": 2321 }, { "epoch": 1.9269709543568465, "grad_norm": 6.697940349578857, "learning_rate": 1.9230539419087137e-05, "loss": 1.3881, "step": 2322 }, { "epoch": 1.9278008298755187, "grad_norm": 14.355868339538574, "learning_rate": 1.923020746887967e-05, "loss": 1.7317, "step": 2323 }, { "epoch": 1.928630705394191, "grad_norm": 8.294445991516113, "learning_rate": 1.92298755186722e-05, "loss": 1.6279, "step": 2324 }, { "epoch": 1.929460580912863, "grad_norm": 9.757670402526855, "learning_rate": 1.9229543568464733e-05, "loss": 1.5904, "step": 2325 }, { "epoch": 1.9302904564315353, "grad_norm": 10.089956283569336, "learning_rate": 1.9229211618257262e-05, "loss": 1.2326, "step": 2326 }, { "epoch": 1.9311203319502075, "grad_norm": 7.999348163604736, "learning_rate": 1.9228879668049794e-05, "loss": 2.2276, "step": 2327 }, { "epoch": 1.9319502074688797, "grad_norm": 10.661383628845215, "learning_rate": 1.9228547717842326e-05, "loss": 1.1029, "step": 2328 }, { "epoch": 1.9327800829875519, "grad_norm": 11.851531982421875, "learning_rate": 1.9228215767634858e-05, "loss": 1.8571, "step": 2329 }, { "epoch": 1.933609958506224, "grad_norm": 9.163769721984863, "learning_rate": 1.922788381742739e-05, "loss": 0.9656, "step": 2330 }, { "epoch": 1.9344398340248963, "grad_norm": 9.18065071105957, "learning_rate": 1.922755186721992e-05, "loss": 1.9122, "step": 2331 }, { "epoch": 1.9352697095435685, "grad_norm": 12.31534481048584, "learning_rate": 1.922721991701245e-05, "loss": 2.1034, "step": 2332 }, { "epoch": 1.9360995850622407, "grad_norm": 9.632641792297363, "learning_rate": 1.922688796680498e-05, "loss": 1.45, "step": 2333 }, { "epoch": 1.9369294605809129, "grad_norm": 6.549619674682617, "learning_rate": 1.9226556016597512e-05, "loss": 1.5653, "step": 2334 }, { "epoch": 1.937759336099585, "grad_norm": 10.691838264465332, "learning_rate": 1.9226224066390044e-05, "loss": 1.207, "step": 2335 }, { "epoch": 1.9385892116182573, "grad_norm": 12.162968635559082, "learning_rate": 1.9225892116182573e-05, "loss": 2.1902, "step": 2336 }, { "epoch": 1.9394190871369295, "grad_norm": 8.271276473999023, "learning_rate": 1.9225560165975105e-05, "loss": 1.0253, "step": 2337 }, { "epoch": 1.9402489626556016, "grad_norm": 9.06953239440918, "learning_rate": 1.9225228215767637e-05, "loss": 2.0628, "step": 2338 }, { "epoch": 1.9410788381742738, "grad_norm": 11.2086820602417, "learning_rate": 1.9224896265560166e-05, "loss": 1.186, "step": 2339 }, { "epoch": 1.941908713692946, "grad_norm": 12.051876068115234, "learning_rate": 1.9224564315352698e-05, "loss": 2.0441, "step": 2340 }, { "epoch": 1.9427385892116182, "grad_norm": 8.23487663269043, "learning_rate": 1.922423236514523e-05, "loss": 1.1503, "step": 2341 }, { "epoch": 1.9435684647302904, "grad_norm": 12.44682502746582, "learning_rate": 1.9223900414937762e-05, "loss": 2.3708, "step": 2342 }, { "epoch": 1.9443983402489626, "grad_norm": 7.946966648101807, "learning_rate": 1.922356846473029e-05, "loss": 1.1915, "step": 2343 }, { "epoch": 1.9452282157676348, "grad_norm": 6.000150680541992, "learning_rate": 1.9223236514522823e-05, "loss": 1.2133, "step": 2344 }, { "epoch": 1.946058091286307, "grad_norm": 12.243410110473633, "learning_rate": 1.9222904564315355e-05, "loss": 1.356, "step": 2345 }, { "epoch": 1.9468879668049792, "grad_norm": 15.757204055786133, "learning_rate": 1.9222572614107887e-05, "loss": 1.7504, "step": 2346 }, { "epoch": 1.9477178423236514, "grad_norm": 11.032793998718262, "learning_rate": 1.9222240663900416e-05, "loss": 1.2574, "step": 2347 }, { "epoch": 1.9485477178423236, "grad_norm": 7.431301116943359, "learning_rate": 1.9221908713692948e-05, "loss": 1.5487, "step": 2348 }, { "epoch": 1.9493775933609958, "grad_norm": 8.27353286743164, "learning_rate": 1.922157676348548e-05, "loss": 1.4345, "step": 2349 }, { "epoch": 1.950207468879668, "grad_norm": 9.710593223571777, "learning_rate": 1.9221244813278012e-05, "loss": 1.2274, "step": 2350 }, { "epoch": 1.9510373443983402, "grad_norm": 13.743937492370605, "learning_rate": 1.922091286307054e-05, "loss": 1.4916, "step": 2351 }, { "epoch": 1.9518672199170124, "grad_norm": 10.740615844726562, "learning_rate": 1.9220580912863073e-05, "loss": 1.9341, "step": 2352 }, { "epoch": 1.9526970954356846, "grad_norm": 10.917229652404785, "learning_rate": 1.9220248962655605e-05, "loss": 1.3653, "step": 2353 }, { "epoch": 1.953526970954357, "grad_norm": 9.310868263244629, "learning_rate": 1.9219917012448134e-05, "loss": 2.5947, "step": 2354 }, { "epoch": 1.9543568464730292, "grad_norm": 7.930066108703613, "learning_rate": 1.9219585062240666e-05, "loss": 1.7776, "step": 2355 }, { "epoch": 1.9551867219917014, "grad_norm": 10.571444511413574, "learning_rate": 1.9219253112033194e-05, "loss": 2.0451, "step": 2356 }, { "epoch": 1.9560165975103736, "grad_norm": 10.800997734069824, "learning_rate": 1.9218921161825727e-05, "loss": 1.6312, "step": 2357 }, { "epoch": 1.9568464730290458, "grad_norm": 9.716198921203613, "learning_rate": 1.921858921161826e-05, "loss": 1.0793, "step": 2358 }, { "epoch": 1.957676348547718, "grad_norm": 8.150080680847168, "learning_rate": 1.921825726141079e-05, "loss": 1.8736, "step": 2359 }, { "epoch": 1.9585062240663902, "grad_norm": 12.956464767456055, "learning_rate": 1.921792531120332e-05, "loss": 1.4461, "step": 2360 }, { "epoch": 1.9593360995850624, "grad_norm": 10.936516761779785, "learning_rate": 1.921759336099585e-05, "loss": 1.8936, "step": 2361 }, { "epoch": 1.9601659751037346, "grad_norm": 11.869302749633789, "learning_rate": 1.9217261410788384e-05, "loss": 1.8331, "step": 2362 }, { "epoch": 1.9609958506224068, "grad_norm": 9.028914451599121, "learning_rate": 1.9216929460580916e-05, "loss": 0.8476, "step": 2363 }, { "epoch": 1.961825726141079, "grad_norm": 10.35650634765625, "learning_rate": 1.9216597510373445e-05, "loss": 1.5198, "step": 2364 }, { "epoch": 1.9626556016597512, "grad_norm": 10.798906326293945, "learning_rate": 1.9216265560165977e-05, "loss": 1.817, "step": 2365 }, { "epoch": 1.9634854771784234, "grad_norm": 7.224380970001221, "learning_rate": 1.921593360995851e-05, "loss": 1.247, "step": 2366 }, { "epoch": 1.9643153526970956, "grad_norm": 9.539834976196289, "learning_rate": 1.921560165975104e-05, "loss": 2.4659, "step": 2367 }, { "epoch": 1.9651452282157678, "grad_norm": 7.421000003814697, "learning_rate": 1.921526970954357e-05, "loss": 1.4054, "step": 2368 }, { "epoch": 1.96597510373444, "grad_norm": 12.993587493896484, "learning_rate": 1.9214937759336102e-05, "loss": 0.7642, "step": 2369 }, { "epoch": 1.9668049792531122, "grad_norm": 13.3458833694458, "learning_rate": 1.9214605809128634e-05, "loss": 1.6576, "step": 2370 }, { "epoch": 1.9676348547717843, "grad_norm": 9.560420989990234, "learning_rate": 1.9214273858921163e-05, "loss": 1.4884, "step": 2371 }, { "epoch": 1.9684647302904565, "grad_norm": 8.708368301391602, "learning_rate": 1.9213941908713695e-05, "loss": 1.5587, "step": 2372 }, { "epoch": 1.9692946058091287, "grad_norm": 9.06648063659668, "learning_rate": 1.9213609958506227e-05, "loss": 1.9728, "step": 2373 }, { "epoch": 1.970124481327801, "grad_norm": 9.664203643798828, "learning_rate": 1.9213278008298755e-05, "loss": 1.7427, "step": 2374 }, { "epoch": 1.9709543568464731, "grad_norm": 8.37360954284668, "learning_rate": 1.9212946058091288e-05, "loss": 1.2412, "step": 2375 }, { "epoch": 1.9717842323651453, "grad_norm": 13.42367935180664, "learning_rate": 1.9212614107883816e-05, "loss": 1.4081, "step": 2376 }, { "epoch": 1.9726141078838175, "grad_norm": 6.036684513092041, "learning_rate": 1.921228215767635e-05, "loss": 1.085, "step": 2377 }, { "epoch": 1.9734439834024897, "grad_norm": 13.371716499328613, "learning_rate": 1.921195020746888e-05, "loss": 2.0267, "step": 2378 }, { "epoch": 1.974273858921162, "grad_norm": 9.559000968933105, "learning_rate": 1.9211618257261413e-05, "loss": 1.4806, "step": 2379 }, { "epoch": 1.9751037344398341, "grad_norm": 9.765844345092773, "learning_rate": 1.921128630705394e-05, "loss": 1.1833, "step": 2380 }, { "epoch": 1.9759336099585063, "grad_norm": 8.740890502929688, "learning_rate": 1.9210954356846473e-05, "loss": 1.4539, "step": 2381 }, { "epoch": 1.9767634854771785, "grad_norm": 8.664724349975586, "learning_rate": 1.9210622406639006e-05, "loss": 1.9315, "step": 2382 }, { "epoch": 1.9775933609958507, "grad_norm": 10.048588752746582, "learning_rate": 1.9210290456431538e-05, "loss": 1.1609, "step": 2383 }, { "epoch": 1.978423236514523, "grad_norm": 7.098512649536133, "learning_rate": 1.920995850622407e-05, "loss": 1.6802, "step": 2384 }, { "epoch": 1.979253112033195, "grad_norm": 9.615174293518066, "learning_rate": 1.92096265560166e-05, "loss": 0.9816, "step": 2385 }, { "epoch": 1.9800829875518673, "grad_norm": 13.173839569091797, "learning_rate": 1.920929460580913e-05, "loss": 1.3263, "step": 2386 }, { "epoch": 1.9809128630705395, "grad_norm": 7.953535079956055, "learning_rate": 1.9208962655601663e-05, "loss": 1.9396, "step": 2387 }, { "epoch": 1.9817427385892117, "grad_norm": 7.646029949188232, "learning_rate": 1.9208630705394195e-05, "loss": 1.2682, "step": 2388 }, { "epoch": 1.9825726141078839, "grad_norm": 9.0812406539917, "learning_rate": 1.9208298755186724e-05, "loss": 2.0292, "step": 2389 }, { "epoch": 1.983402489626556, "grad_norm": 10.475313186645508, "learning_rate": 1.9207966804979256e-05, "loss": 1.311, "step": 2390 }, { "epoch": 1.9842323651452283, "grad_norm": 8.172788619995117, "learning_rate": 1.9207634854771788e-05, "loss": 1.5358, "step": 2391 }, { "epoch": 1.9850622406639005, "grad_norm": 7.183462619781494, "learning_rate": 1.9207302904564316e-05, "loss": 1.6872, "step": 2392 }, { "epoch": 1.9858921161825727, "grad_norm": 10.345059394836426, "learning_rate": 1.920697095435685e-05, "loss": 1.8564, "step": 2393 }, { "epoch": 1.9867219917012449, "grad_norm": 6.780726432800293, "learning_rate": 1.9206639004149377e-05, "loss": 1.4342, "step": 2394 }, { "epoch": 1.987551867219917, "grad_norm": 9.457111358642578, "learning_rate": 1.920630705394191e-05, "loss": 0.9713, "step": 2395 }, { "epoch": 1.9883817427385893, "grad_norm": 8.10920524597168, "learning_rate": 1.920597510373444e-05, "loss": 1.0632, "step": 2396 }, { "epoch": 1.9892116182572614, "grad_norm": 9.039057731628418, "learning_rate": 1.920564315352697e-05, "loss": 1.5961, "step": 2397 }, { "epoch": 1.9900414937759336, "grad_norm": 7.677680492401123, "learning_rate": 1.9205311203319502e-05, "loss": 2.2644, "step": 2398 }, { "epoch": 1.9908713692946058, "grad_norm": 7.144964218139648, "learning_rate": 1.9204979253112034e-05, "loss": 0.9318, "step": 2399 }, { "epoch": 1.991701244813278, "grad_norm": 11.850919723510742, "learning_rate": 1.9204647302904567e-05, "loss": 1.662, "step": 2400 }, { "epoch": 1.9925311203319502, "grad_norm": 9.820211410522461, "learning_rate": 1.9204315352697095e-05, "loss": 1.3041, "step": 2401 }, { "epoch": 1.9933609958506224, "grad_norm": 10.661338806152344, "learning_rate": 1.9203983402489627e-05, "loss": 1.6385, "step": 2402 }, { "epoch": 1.9941908713692946, "grad_norm": 10.356027603149414, "learning_rate": 1.920365145228216e-05, "loss": 1.8294, "step": 2403 }, { "epoch": 1.9950207468879668, "grad_norm": 8.761855125427246, "learning_rate": 1.920331950207469e-05, "loss": 1.4365, "step": 2404 }, { "epoch": 1.995850622406639, "grad_norm": 7.379500865936279, "learning_rate": 1.920298755186722e-05, "loss": 0.9305, "step": 2405 }, { "epoch": 1.9966804979253112, "grad_norm": 9.460919380187988, "learning_rate": 1.9202655601659752e-05, "loss": 1.397, "step": 2406 }, { "epoch": 1.9975103734439834, "grad_norm": 10.643996238708496, "learning_rate": 1.9202323651452285e-05, "loss": 2.1274, "step": 2407 }, { "epoch": 1.9983402489626556, "grad_norm": 9.487954139709473, "learning_rate": 1.9201991701244817e-05, "loss": 1.7536, "step": 2408 }, { "epoch": 1.9991701244813278, "grad_norm": 13.650579452514648, "learning_rate": 1.9201659751037345e-05, "loss": 2.1822, "step": 2409 }, { "epoch": 2.0, "grad_norm": 8.249176979064941, "learning_rate": 1.9201327800829877e-05, "loss": 1.2316, "step": 2410 }, { "epoch": 2.000829875518672, "grad_norm": 9.127294540405273, "learning_rate": 1.920099585062241e-05, "loss": 0.7493, "step": 2411 }, { "epoch": 2.0016597510373444, "grad_norm": 5.399831771850586, "learning_rate": 1.9200663900414938e-05, "loss": 1.4884, "step": 2412 }, { "epoch": 2.0024896265560166, "grad_norm": 7.88582181930542, "learning_rate": 1.920033195020747e-05, "loss": 1.4158, "step": 2413 }, { "epoch": 2.003319502074689, "grad_norm": 9.116361618041992, "learning_rate": 1.9200000000000003e-05, "loss": 1.8006, "step": 2414 }, { "epoch": 2.004149377593361, "grad_norm": 11.266243934631348, "learning_rate": 1.919966804979253e-05, "loss": 1.1988, "step": 2415 }, { "epoch": 2.004979253112033, "grad_norm": 10.872687339782715, "learning_rate": 1.9199336099585063e-05, "loss": 2.0375, "step": 2416 }, { "epoch": 2.0058091286307054, "grad_norm": 11.538824081420898, "learning_rate": 1.9199004149377595e-05, "loss": 1.8234, "step": 2417 }, { "epoch": 2.0066390041493776, "grad_norm": 11.356353759765625, "learning_rate": 1.9198672199170124e-05, "loss": 1.6732, "step": 2418 }, { "epoch": 2.0074688796680498, "grad_norm": 11.267871856689453, "learning_rate": 1.9198340248962656e-05, "loss": 1.2775, "step": 2419 }, { "epoch": 2.008298755186722, "grad_norm": 8.107894897460938, "learning_rate": 1.919800829875519e-05, "loss": 1.4994, "step": 2420 }, { "epoch": 2.009128630705394, "grad_norm": 8.419795989990234, "learning_rate": 1.919767634854772e-05, "loss": 1.2141, "step": 2421 }, { "epoch": 2.0099585062240664, "grad_norm": 9.42773151397705, "learning_rate": 1.919734439834025e-05, "loss": 1.2655, "step": 2422 }, { "epoch": 2.0107883817427386, "grad_norm": 9.87039852142334, "learning_rate": 1.919701244813278e-05, "loss": 1.1958, "step": 2423 }, { "epoch": 2.0116182572614107, "grad_norm": 9.412879943847656, "learning_rate": 1.9196680497925313e-05, "loss": 2.0165, "step": 2424 }, { "epoch": 2.012448132780083, "grad_norm": 12.1580171585083, "learning_rate": 1.9196348547717846e-05, "loss": 1.6532, "step": 2425 }, { "epoch": 2.013278008298755, "grad_norm": 13.159649848937988, "learning_rate": 1.9196016597510374e-05, "loss": 1.4881, "step": 2426 }, { "epoch": 2.0141078838174273, "grad_norm": 13.699675559997559, "learning_rate": 1.9195684647302906e-05, "loss": 2.5598, "step": 2427 }, { "epoch": 2.0149377593360995, "grad_norm": 8.668912887573242, "learning_rate": 1.919535269709544e-05, "loss": 1.0373, "step": 2428 }, { "epoch": 2.0157676348547717, "grad_norm": 11.304940223693848, "learning_rate": 1.919502074688797e-05, "loss": 1.8534, "step": 2429 }, { "epoch": 2.016597510373444, "grad_norm": 8.32729721069336, "learning_rate": 1.91946887966805e-05, "loss": 1.528, "step": 2430 }, { "epoch": 2.017427385892116, "grad_norm": 8.966348648071289, "learning_rate": 1.919435684647303e-05, "loss": 0.8354, "step": 2431 }, { "epoch": 2.0182572614107883, "grad_norm": 10.311884880065918, "learning_rate": 1.919402489626556e-05, "loss": 1.398, "step": 2432 }, { "epoch": 2.0190871369294605, "grad_norm": 9.81961441040039, "learning_rate": 1.9193692946058092e-05, "loss": 1.4126, "step": 2433 }, { "epoch": 2.0199170124481327, "grad_norm": 9.076661109924316, "learning_rate": 1.9193360995850624e-05, "loss": 1.8479, "step": 2434 }, { "epoch": 2.020746887966805, "grad_norm": 9.66213607788086, "learning_rate": 1.9193029045643153e-05, "loss": 1.4901, "step": 2435 }, { "epoch": 2.021576763485477, "grad_norm": 10.557022094726562, "learning_rate": 1.9192697095435685e-05, "loss": 1.4322, "step": 2436 }, { "epoch": 2.0224066390041493, "grad_norm": 8.439718246459961, "learning_rate": 1.9192365145228217e-05, "loss": 1.493, "step": 2437 }, { "epoch": 2.0232365145228215, "grad_norm": 8.959397315979004, "learning_rate": 1.919203319502075e-05, "loss": 0.83, "step": 2438 }, { "epoch": 2.0240663900414937, "grad_norm": 11.377038955688477, "learning_rate": 1.9191701244813278e-05, "loss": 1.4153, "step": 2439 }, { "epoch": 2.024896265560166, "grad_norm": 16.014694213867188, "learning_rate": 1.919136929460581e-05, "loss": 2.2826, "step": 2440 }, { "epoch": 2.025726141078838, "grad_norm": 9.765339851379395, "learning_rate": 1.9191037344398342e-05, "loss": 1.5904, "step": 2441 }, { "epoch": 2.0265560165975103, "grad_norm": 8.380574226379395, "learning_rate": 1.9190705394190874e-05, "loss": 0.8803, "step": 2442 }, { "epoch": 2.0273858921161825, "grad_norm": 9.200161933898926, "learning_rate": 1.9190373443983403e-05, "loss": 0.866, "step": 2443 }, { "epoch": 2.0282157676348547, "grad_norm": 9.140693664550781, "learning_rate": 1.9190041493775935e-05, "loss": 1.5389, "step": 2444 }, { "epoch": 2.029045643153527, "grad_norm": 8.571486473083496, "learning_rate": 1.9189709543568467e-05, "loss": 0.9712, "step": 2445 }, { "epoch": 2.029875518672199, "grad_norm": 6.421820163726807, "learning_rate": 1.9189377593361e-05, "loss": 0.9697, "step": 2446 }, { "epoch": 2.0307053941908713, "grad_norm": 11.985848426818848, "learning_rate": 1.9189045643153528e-05, "loss": 1.27, "step": 2447 }, { "epoch": 2.0315352697095435, "grad_norm": 10.6143798828125, "learning_rate": 1.918871369294606e-05, "loss": 1.8701, "step": 2448 }, { "epoch": 2.0323651452282157, "grad_norm": 20.086599349975586, "learning_rate": 1.9188381742738592e-05, "loss": 2.5383, "step": 2449 }, { "epoch": 2.033195020746888, "grad_norm": 13.445959091186523, "learning_rate": 1.918804979253112e-05, "loss": 1.6456, "step": 2450 }, { "epoch": 2.03402489626556, "grad_norm": 6.355902194976807, "learning_rate": 1.9187717842323653e-05, "loss": 0.8582, "step": 2451 }, { "epoch": 2.0348547717842322, "grad_norm": 7.755450248718262, "learning_rate": 1.9187385892116185e-05, "loss": 1.3186, "step": 2452 }, { "epoch": 2.0356846473029044, "grad_norm": 9.077384948730469, "learning_rate": 1.9187053941908714e-05, "loss": 1.468, "step": 2453 }, { "epoch": 2.0365145228215766, "grad_norm": 6.4339680671691895, "learning_rate": 1.9186721991701246e-05, "loss": 0.9047, "step": 2454 }, { "epoch": 2.037344398340249, "grad_norm": 12.059142112731934, "learning_rate": 1.9186390041493775e-05, "loss": 1.9972, "step": 2455 }, { "epoch": 2.038174273858921, "grad_norm": 7.968225479125977, "learning_rate": 1.9186058091286307e-05, "loss": 0.9477, "step": 2456 }, { "epoch": 2.0390041493775932, "grad_norm": 6.913792610168457, "learning_rate": 1.918572614107884e-05, "loss": 1.4216, "step": 2457 }, { "epoch": 2.0398340248962654, "grad_norm": 13.953023910522461, "learning_rate": 1.918539419087137e-05, "loss": 2.0302, "step": 2458 }, { "epoch": 2.0406639004149376, "grad_norm": 14.093500137329102, "learning_rate": 1.91850622406639e-05, "loss": 1.2126, "step": 2459 }, { "epoch": 2.04149377593361, "grad_norm": 8.707942008972168, "learning_rate": 1.9184730290456432e-05, "loss": 1.1552, "step": 2460 }, { "epoch": 2.042323651452282, "grad_norm": 7.237843036651611, "learning_rate": 1.9184398340248964e-05, "loss": 1.3516, "step": 2461 }, { "epoch": 2.043153526970954, "grad_norm": 8.149576187133789, "learning_rate": 1.9184066390041496e-05, "loss": 1.2858, "step": 2462 }, { "epoch": 2.0439834024896264, "grad_norm": 7.6167073249816895, "learning_rate": 1.918373443983403e-05, "loss": 1.6103, "step": 2463 }, { "epoch": 2.0448132780082986, "grad_norm": 7.131430625915527, "learning_rate": 1.9183402489626557e-05, "loss": 1.221, "step": 2464 }, { "epoch": 2.045643153526971, "grad_norm": 10.25903034210205, "learning_rate": 1.918307053941909e-05, "loss": 2.0068, "step": 2465 }, { "epoch": 2.046473029045643, "grad_norm": 8.41498851776123, "learning_rate": 1.918273858921162e-05, "loss": 1.1566, "step": 2466 }, { "epoch": 2.047302904564315, "grad_norm": 9.803988456726074, "learning_rate": 1.9182406639004153e-05, "loss": 1.2773, "step": 2467 }, { "epoch": 2.0481327800829874, "grad_norm": 10.695011138916016, "learning_rate": 1.9182074688796682e-05, "loss": 2.0951, "step": 2468 }, { "epoch": 2.0489626556016596, "grad_norm": 9.293597221374512, "learning_rate": 1.9181742738589214e-05, "loss": 1.3575, "step": 2469 }, { "epoch": 2.0497925311203318, "grad_norm": 9.76063346862793, "learning_rate": 1.9181410788381743e-05, "loss": 1.1092, "step": 2470 }, { "epoch": 2.050622406639004, "grad_norm": 7.1609649658203125, "learning_rate": 1.9181078838174275e-05, "loss": 1.0577, "step": 2471 }, { "epoch": 2.051452282157676, "grad_norm": 13.371454238891602, "learning_rate": 1.9180746887966807e-05, "loss": 1.0278, "step": 2472 }, { "epoch": 2.0522821576763484, "grad_norm": 13.598397254943848, "learning_rate": 1.9180414937759336e-05, "loss": 1.7754, "step": 2473 }, { "epoch": 2.0531120331950206, "grad_norm": 9.461254119873047, "learning_rate": 1.9180082987551868e-05, "loss": 0.9873, "step": 2474 }, { "epoch": 2.0539419087136928, "grad_norm": 9.000308990478516, "learning_rate": 1.91797510373444e-05, "loss": 1.4972, "step": 2475 }, { "epoch": 2.054771784232365, "grad_norm": 5.565464019775391, "learning_rate": 1.917941908713693e-05, "loss": 0.902, "step": 2476 }, { "epoch": 2.055601659751037, "grad_norm": 6.408386707305908, "learning_rate": 1.917908713692946e-05, "loss": 1.27, "step": 2477 }, { "epoch": 2.0564315352697093, "grad_norm": 7.539608001708984, "learning_rate": 1.9178755186721993e-05, "loss": 1.5337, "step": 2478 }, { "epoch": 2.0572614107883815, "grad_norm": 8.729866981506348, "learning_rate": 1.9178423236514525e-05, "loss": 1.5039, "step": 2479 }, { "epoch": 2.0580912863070537, "grad_norm": 6.557750225067139, "learning_rate": 1.9178091286307054e-05, "loss": 1.4007, "step": 2480 }, { "epoch": 2.058921161825726, "grad_norm": 8.272934913635254, "learning_rate": 1.9177759336099586e-05, "loss": 1.2384, "step": 2481 }, { "epoch": 2.059751037344398, "grad_norm": 13.448517799377441, "learning_rate": 1.9177427385892118e-05, "loss": 1.3875, "step": 2482 }, { "epoch": 2.0605809128630703, "grad_norm": 6.26338005065918, "learning_rate": 1.917709543568465e-05, "loss": 1.2948, "step": 2483 }, { "epoch": 2.0614107883817425, "grad_norm": 13.882221221923828, "learning_rate": 1.917676348547718e-05, "loss": 2.7883, "step": 2484 }, { "epoch": 2.0622406639004147, "grad_norm": 7.664864540100098, "learning_rate": 1.917643153526971e-05, "loss": 1.3255, "step": 2485 }, { "epoch": 2.063070539419087, "grad_norm": 6.854956150054932, "learning_rate": 1.9176099585062243e-05, "loss": 1.4891, "step": 2486 }, { "epoch": 2.063900414937759, "grad_norm": 13.871954917907715, "learning_rate": 1.9175767634854775e-05, "loss": 1.7454, "step": 2487 }, { "epoch": 2.0647302904564317, "grad_norm": 13.860342979431152, "learning_rate": 1.9175435684647304e-05, "loss": 2.2772, "step": 2488 }, { "epoch": 2.065560165975104, "grad_norm": 7.530017852783203, "learning_rate": 1.9175103734439836e-05, "loss": 0.7232, "step": 2489 }, { "epoch": 2.066390041493776, "grad_norm": 6.994811058044434, "learning_rate": 1.9174771784232368e-05, "loss": 1.431, "step": 2490 }, { "epoch": 2.0672199170124483, "grad_norm": 14.33594036102295, "learning_rate": 1.9174439834024897e-05, "loss": 1.5379, "step": 2491 }, { "epoch": 2.0680497925311205, "grad_norm": 9.473711967468262, "learning_rate": 1.917410788381743e-05, "loss": 1.2691, "step": 2492 }, { "epoch": 2.0688796680497927, "grad_norm": 7.157381057739258, "learning_rate": 1.9173775933609958e-05, "loss": 1.5771, "step": 2493 }, { "epoch": 2.069709543568465, "grad_norm": 7.717891216278076, "learning_rate": 1.917344398340249e-05, "loss": 0.7211, "step": 2494 }, { "epoch": 2.070539419087137, "grad_norm": 8.370161056518555, "learning_rate": 1.9173112033195022e-05, "loss": 1.3311, "step": 2495 }, { "epoch": 2.0713692946058093, "grad_norm": 7.689192771911621, "learning_rate": 1.9172780082987554e-05, "loss": 0.8917, "step": 2496 }, { "epoch": 2.0721991701244815, "grad_norm": 8.700095176696777, "learning_rate": 1.9172448132780083e-05, "loss": 0.8329, "step": 2497 }, { "epoch": 2.0730290456431537, "grad_norm": 6.711269378662109, "learning_rate": 1.9172116182572615e-05, "loss": 1.5112, "step": 2498 }, { "epoch": 2.073858921161826, "grad_norm": 8.784908294677734, "learning_rate": 1.9171784232365147e-05, "loss": 1.8605, "step": 2499 }, { "epoch": 2.074688796680498, "grad_norm": 12.595096588134766, "learning_rate": 1.917145228215768e-05, "loss": 3.0155, "step": 2500 }, { "epoch": 2.0755186721991703, "grad_norm": 9.644672393798828, "learning_rate": 1.9171120331950208e-05, "loss": 1.1621, "step": 2501 }, { "epoch": 2.0763485477178425, "grad_norm": 5.967259407043457, "learning_rate": 1.917078838174274e-05, "loss": 1.1466, "step": 2502 }, { "epoch": 2.0771784232365147, "grad_norm": 8.137417793273926, "learning_rate": 1.9170456431535272e-05, "loss": 1.3034, "step": 2503 }, { "epoch": 2.078008298755187, "grad_norm": 13.770953178405762, "learning_rate": 1.9170124481327804e-05, "loss": 1.4929, "step": 2504 }, { "epoch": 2.078838174273859, "grad_norm": 9.885885238647461, "learning_rate": 1.9169792531120333e-05, "loss": 1.3046, "step": 2505 }, { "epoch": 2.0796680497925313, "grad_norm": 13.628999710083008, "learning_rate": 1.9169460580912865e-05, "loss": 1.4125, "step": 2506 }, { "epoch": 2.0804979253112035, "grad_norm": 9.030044555664062, "learning_rate": 1.9169128630705397e-05, "loss": 2.182, "step": 2507 }, { "epoch": 2.0813278008298757, "grad_norm": 11.653294563293457, "learning_rate": 1.916879668049793e-05, "loss": 1.4407, "step": 2508 }, { "epoch": 2.082157676348548, "grad_norm": 19.529457092285156, "learning_rate": 1.9168464730290458e-05, "loss": 1.3271, "step": 2509 }, { "epoch": 2.08298755186722, "grad_norm": 7.635500431060791, "learning_rate": 1.916813278008299e-05, "loss": 1.0482, "step": 2510 }, { "epoch": 2.0838174273858923, "grad_norm": 12.214343070983887, "learning_rate": 1.916780082987552e-05, "loss": 1.5863, "step": 2511 }, { "epoch": 2.0846473029045645, "grad_norm": 10.060574531555176, "learning_rate": 1.916746887966805e-05, "loss": 1.4895, "step": 2512 }, { "epoch": 2.0854771784232367, "grad_norm": 12.596840858459473, "learning_rate": 1.9167136929460583e-05, "loss": 1.8234, "step": 2513 }, { "epoch": 2.086307053941909, "grad_norm": 11.243595123291016, "learning_rate": 1.916680497925311e-05, "loss": 1.1844, "step": 2514 }, { "epoch": 2.087136929460581, "grad_norm": 16.02602195739746, "learning_rate": 1.9166473029045644e-05, "loss": 1.0048, "step": 2515 }, { "epoch": 2.0879668049792532, "grad_norm": 15.696050643920898, "learning_rate": 1.9166141078838176e-05, "loss": 1.6332, "step": 2516 }, { "epoch": 2.0887966804979254, "grad_norm": 10.504521369934082, "learning_rate": 1.9165809128630708e-05, "loss": 1.5032, "step": 2517 }, { "epoch": 2.0896265560165976, "grad_norm": 7.874056816101074, "learning_rate": 1.9165477178423237e-05, "loss": 1.5002, "step": 2518 }, { "epoch": 2.09045643153527, "grad_norm": 8.62625503540039, "learning_rate": 1.916514522821577e-05, "loss": 1.2771, "step": 2519 }, { "epoch": 2.091286307053942, "grad_norm": 10.320693969726562, "learning_rate": 1.91648132780083e-05, "loss": 1.7151, "step": 2520 }, { "epoch": 2.0921161825726142, "grad_norm": 15.860832214355469, "learning_rate": 1.9164481327800833e-05, "loss": 1.1702, "step": 2521 }, { "epoch": 2.0929460580912864, "grad_norm": 11.296422958374023, "learning_rate": 1.9164149377593362e-05, "loss": 1.3465, "step": 2522 }, { "epoch": 2.0937759336099586, "grad_norm": 10.834589004516602, "learning_rate": 1.9163817427385894e-05, "loss": 1.7513, "step": 2523 }, { "epoch": 2.094605809128631, "grad_norm": 10.192965507507324, "learning_rate": 1.9163485477178426e-05, "loss": 1.8854, "step": 2524 }, { "epoch": 2.095435684647303, "grad_norm": 9.714402198791504, "learning_rate": 1.9163153526970958e-05, "loss": 1.6139, "step": 2525 }, { "epoch": 2.096265560165975, "grad_norm": 10.275357246398926, "learning_rate": 1.9162821576763487e-05, "loss": 1.8725, "step": 2526 }, { "epoch": 2.0970954356846474, "grad_norm": 8.208827018737793, "learning_rate": 1.916248962655602e-05, "loss": 1.4678, "step": 2527 }, { "epoch": 2.0979253112033196, "grad_norm": 7.612676620483398, "learning_rate": 1.916215767634855e-05, "loss": 1.1903, "step": 2528 }, { "epoch": 2.098755186721992, "grad_norm": 15.837113380432129, "learning_rate": 1.916182572614108e-05, "loss": 1.6406, "step": 2529 }, { "epoch": 2.099585062240664, "grad_norm": 12.090262413024902, "learning_rate": 1.9161493775933612e-05, "loss": 1.155, "step": 2530 }, { "epoch": 2.100414937759336, "grad_norm": 6.589348793029785, "learning_rate": 1.9161161825726144e-05, "loss": 1.2349, "step": 2531 }, { "epoch": 2.1012448132780084, "grad_norm": 20.562599182128906, "learning_rate": 1.9160829875518673e-05, "loss": 1.9687, "step": 2532 }, { "epoch": 2.1020746887966806, "grad_norm": 10.191686630249023, "learning_rate": 1.9160497925311205e-05, "loss": 1.9647, "step": 2533 }, { "epoch": 2.1029045643153528, "grad_norm": 7.139536380767822, "learning_rate": 1.9160165975103733e-05, "loss": 1.0575, "step": 2534 }, { "epoch": 2.103734439834025, "grad_norm": 13.017189025878906, "learning_rate": 1.9159834024896266e-05, "loss": 1.6034, "step": 2535 }, { "epoch": 2.104564315352697, "grad_norm": 10.893631935119629, "learning_rate": 1.9159502074688798e-05, "loss": 1.7254, "step": 2536 }, { "epoch": 2.1053941908713694, "grad_norm": 14.115684509277344, "learning_rate": 1.915917012448133e-05, "loss": 1.7531, "step": 2537 }, { "epoch": 2.1062240663900416, "grad_norm": 8.4971342086792, "learning_rate": 1.915883817427386e-05, "loss": 1.3888, "step": 2538 }, { "epoch": 2.1070539419087138, "grad_norm": 9.45059871673584, "learning_rate": 1.915850622406639e-05, "loss": 1.5601, "step": 2539 }, { "epoch": 2.107883817427386, "grad_norm": 8.315967559814453, "learning_rate": 1.9158174273858923e-05, "loss": 1.8319, "step": 2540 }, { "epoch": 2.108713692946058, "grad_norm": 10.099102973937988, "learning_rate": 1.9157842323651455e-05, "loss": 1.2323, "step": 2541 }, { "epoch": 2.1095435684647303, "grad_norm": 6.7495903968811035, "learning_rate": 1.9157510373443987e-05, "loss": 1.7643, "step": 2542 }, { "epoch": 2.1103734439834025, "grad_norm": 7.12886905670166, "learning_rate": 1.9157178423236516e-05, "loss": 1.5523, "step": 2543 }, { "epoch": 2.1112033195020747, "grad_norm": 8.643832206726074, "learning_rate": 1.9156846473029048e-05, "loss": 1.1635, "step": 2544 }, { "epoch": 2.112033195020747, "grad_norm": 7.4825053215026855, "learning_rate": 1.915651452282158e-05, "loss": 1.3871, "step": 2545 }, { "epoch": 2.112863070539419, "grad_norm": 7.551272869110107, "learning_rate": 1.9156182572614112e-05, "loss": 1.57, "step": 2546 }, { "epoch": 2.1136929460580913, "grad_norm": 10.566024780273438, "learning_rate": 1.915585062240664e-05, "loss": 1.1644, "step": 2547 }, { "epoch": 2.1145228215767635, "grad_norm": 9.858790397644043, "learning_rate": 1.9155518672199173e-05, "loss": 1.2557, "step": 2548 }, { "epoch": 2.1153526970954357, "grad_norm": 8.280162811279297, "learning_rate": 1.91551867219917e-05, "loss": 1.1152, "step": 2549 }, { "epoch": 2.116182572614108, "grad_norm": 5.775630474090576, "learning_rate": 1.9154854771784234e-05, "loss": 1.3929, "step": 2550 }, { "epoch": 2.11701244813278, "grad_norm": 8.951160430908203, "learning_rate": 1.9154522821576766e-05, "loss": 1.8441, "step": 2551 }, { "epoch": 2.1178423236514523, "grad_norm": 12.702296257019043, "learning_rate": 1.9154190871369294e-05, "loss": 1.1195, "step": 2552 }, { "epoch": 2.1186721991701245, "grad_norm": 12.471384048461914, "learning_rate": 1.9153858921161827e-05, "loss": 0.9986, "step": 2553 }, { "epoch": 2.1195020746887967, "grad_norm": 5.214249134063721, "learning_rate": 1.915352697095436e-05, "loss": 1.0219, "step": 2554 }, { "epoch": 2.120331950207469, "grad_norm": 6.384991645812988, "learning_rate": 1.9153195020746887e-05, "loss": 1.2903, "step": 2555 }, { "epoch": 2.121161825726141, "grad_norm": 11.1992769241333, "learning_rate": 1.915286307053942e-05, "loss": 2.1322, "step": 2556 }, { "epoch": 2.1219917012448133, "grad_norm": 9.090169906616211, "learning_rate": 1.915253112033195e-05, "loss": 1.1769, "step": 2557 }, { "epoch": 2.1228215767634855, "grad_norm": 12.739233016967773, "learning_rate": 1.9152199170124484e-05, "loss": 1.9742, "step": 2558 }, { "epoch": 2.1236514522821577, "grad_norm": 7.8150529861450195, "learning_rate": 1.9151867219917012e-05, "loss": 1.4288, "step": 2559 }, { "epoch": 2.12448132780083, "grad_norm": 7.72048807144165, "learning_rate": 1.9151535269709545e-05, "loss": 1.7398, "step": 2560 }, { "epoch": 2.125311203319502, "grad_norm": 5.701071262359619, "learning_rate": 1.9151203319502077e-05, "loss": 1.1478, "step": 2561 }, { "epoch": 2.1261410788381743, "grad_norm": 14.802825927734375, "learning_rate": 1.915087136929461e-05, "loss": 1.2833, "step": 2562 }, { "epoch": 2.1269709543568465, "grad_norm": 10.549960136413574, "learning_rate": 1.9150539419087137e-05, "loss": 2.5356, "step": 2563 }, { "epoch": 2.1278008298755187, "grad_norm": 16.493797302246094, "learning_rate": 1.915020746887967e-05, "loss": 2.0177, "step": 2564 }, { "epoch": 2.128630705394191, "grad_norm": 4.937650680541992, "learning_rate": 1.91498755186722e-05, "loss": 0.7953, "step": 2565 }, { "epoch": 2.129460580912863, "grad_norm": 10.470561981201172, "learning_rate": 1.9149543568464734e-05, "loss": 1.4541, "step": 2566 }, { "epoch": 2.1302904564315353, "grad_norm": 10.110647201538086, "learning_rate": 1.9149211618257263e-05, "loss": 1.8478, "step": 2567 }, { "epoch": 2.1311203319502074, "grad_norm": 9.7265625, "learning_rate": 1.9148879668049795e-05, "loss": 1.1732, "step": 2568 }, { "epoch": 2.1319502074688796, "grad_norm": 17.93308448791504, "learning_rate": 1.9148547717842327e-05, "loss": 2.0939, "step": 2569 }, { "epoch": 2.132780082987552, "grad_norm": 7.805483341217041, "learning_rate": 1.9148215767634855e-05, "loss": 0.5577, "step": 2570 }, { "epoch": 2.133609958506224, "grad_norm": 14.600140571594238, "learning_rate": 1.9147883817427388e-05, "loss": 1.3316, "step": 2571 }, { "epoch": 2.1344398340248962, "grad_norm": 14.119353294372559, "learning_rate": 1.9147551867219916e-05, "loss": 1.8449, "step": 2572 }, { "epoch": 2.1352697095435684, "grad_norm": 8.701376914978027, "learning_rate": 1.914721991701245e-05, "loss": 1.1396, "step": 2573 }, { "epoch": 2.1360995850622406, "grad_norm": 12.277219772338867, "learning_rate": 1.914688796680498e-05, "loss": 2.2889, "step": 2574 }, { "epoch": 2.136929460580913, "grad_norm": 9.856963157653809, "learning_rate": 1.9146556016597513e-05, "loss": 2.4132, "step": 2575 }, { "epoch": 2.137759336099585, "grad_norm": 14.379226684570312, "learning_rate": 1.914622406639004e-05, "loss": 1.4079, "step": 2576 }, { "epoch": 2.138589211618257, "grad_norm": 13.274550437927246, "learning_rate": 1.9145892116182573e-05, "loss": 1.9266, "step": 2577 }, { "epoch": 2.1394190871369294, "grad_norm": 8.078908920288086, "learning_rate": 1.9145560165975106e-05, "loss": 0.8909, "step": 2578 }, { "epoch": 2.1402489626556016, "grad_norm": 9.274185180664062, "learning_rate": 1.9145228215767638e-05, "loss": 1.602, "step": 2579 }, { "epoch": 2.141078838174274, "grad_norm": 12.277952194213867, "learning_rate": 1.9144896265560166e-05, "loss": 1.5535, "step": 2580 }, { "epoch": 2.141908713692946, "grad_norm": 8.597379684448242, "learning_rate": 1.91445643153527e-05, "loss": 1.8303, "step": 2581 }, { "epoch": 2.142738589211618, "grad_norm": 7.026464462280273, "learning_rate": 1.914423236514523e-05, "loss": 1.1882, "step": 2582 }, { "epoch": 2.1435684647302904, "grad_norm": 9.794578552246094, "learning_rate": 1.9143900414937763e-05, "loss": 1.4987, "step": 2583 }, { "epoch": 2.1443983402489626, "grad_norm": 7.344831466674805, "learning_rate": 1.914356846473029e-05, "loss": 1.963, "step": 2584 }, { "epoch": 2.145228215767635, "grad_norm": 6.207168102264404, "learning_rate": 1.9143236514522824e-05, "loss": 1.3328, "step": 2585 }, { "epoch": 2.146058091286307, "grad_norm": 13.932330131530762, "learning_rate": 1.9142904564315356e-05, "loss": 1.9121, "step": 2586 }, { "epoch": 2.146887966804979, "grad_norm": 12.970211029052734, "learning_rate": 1.9142572614107884e-05, "loss": 1.5476, "step": 2587 }, { "epoch": 2.1477178423236514, "grad_norm": 6.654008865356445, "learning_rate": 1.9142240663900416e-05, "loss": 0.6324, "step": 2588 }, { "epoch": 2.1485477178423236, "grad_norm": 10.32128620147705, "learning_rate": 1.914190871369295e-05, "loss": 1.1589, "step": 2589 }, { "epoch": 2.1493775933609958, "grad_norm": 8.878461837768555, "learning_rate": 1.9141576763485477e-05, "loss": 0.7005, "step": 2590 }, { "epoch": 2.150207468879668, "grad_norm": 9.103872299194336, "learning_rate": 1.914124481327801e-05, "loss": 1.6531, "step": 2591 }, { "epoch": 2.15103734439834, "grad_norm": 7.473913192749023, "learning_rate": 1.914091286307054e-05, "loss": 1.2012, "step": 2592 }, { "epoch": 2.1518672199170124, "grad_norm": 9.401164054870605, "learning_rate": 1.914058091286307e-05, "loss": 1.5255, "step": 2593 }, { "epoch": 2.1526970954356845, "grad_norm": 12.362866401672363, "learning_rate": 1.9140248962655602e-05, "loss": 1.4709, "step": 2594 }, { "epoch": 2.1535269709543567, "grad_norm": 10.939626693725586, "learning_rate": 1.9139917012448134e-05, "loss": 1.1733, "step": 2595 }, { "epoch": 2.154356846473029, "grad_norm": 10.314467430114746, "learning_rate": 1.9139585062240667e-05, "loss": 1.5025, "step": 2596 }, { "epoch": 2.155186721991701, "grad_norm": 9.079737663269043, "learning_rate": 1.9139253112033195e-05, "loss": 1.6175, "step": 2597 }, { "epoch": 2.1560165975103733, "grad_norm": 9.20674991607666, "learning_rate": 1.9138921161825727e-05, "loss": 0.7826, "step": 2598 }, { "epoch": 2.1568464730290455, "grad_norm": 13.586068153381348, "learning_rate": 1.913858921161826e-05, "loss": 0.9504, "step": 2599 }, { "epoch": 2.1576763485477177, "grad_norm": 5.751948833465576, "learning_rate": 1.913825726141079e-05, "loss": 1.3658, "step": 2600 }, { "epoch": 2.15850622406639, "grad_norm": 10.569400787353516, "learning_rate": 1.913792531120332e-05, "loss": 1.4865, "step": 2601 }, { "epoch": 2.159336099585062, "grad_norm": 13.0072660446167, "learning_rate": 1.9137593360995852e-05, "loss": 2.0614, "step": 2602 }, { "epoch": 2.1601659751037343, "grad_norm": 12.060885429382324, "learning_rate": 1.9137261410788385e-05, "loss": 1.2283, "step": 2603 }, { "epoch": 2.1609958506224065, "grad_norm": 10.690999031066895, "learning_rate": 1.9136929460580917e-05, "loss": 1.0014, "step": 2604 }, { "epoch": 2.1618257261410787, "grad_norm": 7.918331146240234, "learning_rate": 1.9136597510373445e-05, "loss": 1.2046, "step": 2605 }, { "epoch": 2.162655601659751, "grad_norm": 9.04177188873291, "learning_rate": 1.9136265560165977e-05, "loss": 1.0608, "step": 2606 }, { "epoch": 2.163485477178423, "grad_norm": 15.359429359436035, "learning_rate": 1.913593360995851e-05, "loss": 1.9158, "step": 2607 }, { "epoch": 2.1643153526970953, "grad_norm": 8.168084144592285, "learning_rate": 1.9135601659751038e-05, "loss": 1.1491, "step": 2608 }, { "epoch": 2.1651452282157675, "grad_norm": 10.446446418762207, "learning_rate": 1.913526970954357e-05, "loss": 1.2502, "step": 2609 }, { "epoch": 2.1659751037344397, "grad_norm": 14.505271911621094, "learning_rate": 1.91349377593361e-05, "loss": 2.1436, "step": 2610 }, { "epoch": 2.166804979253112, "grad_norm": 14.405625343322754, "learning_rate": 1.913460580912863e-05, "loss": 1.0543, "step": 2611 }, { "epoch": 2.167634854771784, "grad_norm": 10.888189315795898, "learning_rate": 1.9134273858921163e-05, "loss": 0.8094, "step": 2612 }, { "epoch": 2.1684647302904563, "grad_norm": 13.381094932556152, "learning_rate": 1.9133941908713692e-05, "loss": 1.8267, "step": 2613 }, { "epoch": 2.1692946058091285, "grad_norm": 11.115133285522461, "learning_rate": 1.9133609958506224e-05, "loss": 1.7016, "step": 2614 }, { "epoch": 2.1701244813278007, "grad_norm": 12.69227123260498, "learning_rate": 1.9133278008298756e-05, "loss": 1.2674, "step": 2615 }, { "epoch": 2.170954356846473, "grad_norm": 8.693068504333496, "learning_rate": 1.913294605809129e-05, "loss": 1.0204, "step": 2616 }, { "epoch": 2.171784232365145, "grad_norm": 11.643972396850586, "learning_rate": 1.9132614107883817e-05, "loss": 1.1718, "step": 2617 }, { "epoch": 2.1726141078838173, "grad_norm": 8.894976615905762, "learning_rate": 1.913228215767635e-05, "loss": 1.3355, "step": 2618 }, { "epoch": 2.1734439834024895, "grad_norm": 9.723855972290039, "learning_rate": 1.913195020746888e-05, "loss": 1.8522, "step": 2619 }, { "epoch": 2.1742738589211617, "grad_norm": 10.273652076721191, "learning_rate": 1.9131618257261413e-05, "loss": 1.455, "step": 2620 }, { "epoch": 2.175103734439834, "grad_norm": 9.985360145568848, "learning_rate": 1.9131286307053946e-05, "loss": 1.2143, "step": 2621 }, { "epoch": 2.175933609958506, "grad_norm": 10.546401023864746, "learning_rate": 1.9130954356846474e-05, "loss": 1.2035, "step": 2622 }, { "epoch": 2.1767634854771782, "grad_norm": 11.549467086791992, "learning_rate": 1.9130622406639006e-05, "loss": 1.5209, "step": 2623 }, { "epoch": 2.1775933609958504, "grad_norm": 11.960349082946777, "learning_rate": 1.913029045643154e-05, "loss": 1.4786, "step": 2624 }, { "epoch": 2.1784232365145226, "grad_norm": 10.658820152282715, "learning_rate": 1.912995850622407e-05, "loss": 1.7169, "step": 2625 }, { "epoch": 2.179253112033195, "grad_norm": 13.403305053710938, "learning_rate": 1.91296265560166e-05, "loss": 1.5101, "step": 2626 }, { "epoch": 2.180082987551867, "grad_norm": 10.272491455078125, "learning_rate": 1.912929460580913e-05, "loss": 1.4686, "step": 2627 }, { "epoch": 2.180912863070539, "grad_norm": 12.991399765014648, "learning_rate": 1.912896265560166e-05, "loss": 2.0775, "step": 2628 }, { "epoch": 2.1817427385892114, "grad_norm": 18.764602661132812, "learning_rate": 1.9128630705394192e-05, "loss": 2.3307, "step": 2629 }, { "epoch": 2.1825726141078836, "grad_norm": 8.71583080291748, "learning_rate": 1.9128298755186724e-05, "loss": 1.4961, "step": 2630 }, { "epoch": 2.183402489626556, "grad_norm": 12.995841026306152, "learning_rate": 1.9127966804979253e-05, "loss": 1.6186, "step": 2631 }, { "epoch": 2.1842323651452284, "grad_norm": 7.034140586853027, "learning_rate": 1.9127634854771785e-05, "loss": 1.5574, "step": 2632 }, { "epoch": 2.1850622406639006, "grad_norm": 8.89040756225586, "learning_rate": 1.9127302904564317e-05, "loss": 1.2779, "step": 2633 }, { "epoch": 2.185892116182573, "grad_norm": 10.296299934387207, "learning_rate": 1.9126970954356846e-05, "loss": 1.8753, "step": 2634 }, { "epoch": 2.186721991701245, "grad_norm": 10.188587188720703, "learning_rate": 1.9126639004149378e-05, "loss": 1.4006, "step": 2635 }, { "epoch": 2.1875518672199172, "grad_norm": 7.567741870880127, "learning_rate": 1.912630705394191e-05, "loss": 1.6523, "step": 2636 }, { "epoch": 2.1883817427385894, "grad_norm": 6.68474817276001, "learning_rate": 1.9125975103734442e-05, "loss": 1.2365, "step": 2637 }, { "epoch": 2.1892116182572616, "grad_norm": 7.559723377227783, "learning_rate": 1.912564315352697e-05, "loss": 1.254, "step": 2638 }, { "epoch": 2.190041493775934, "grad_norm": 7.411098003387451, "learning_rate": 1.9125311203319503e-05, "loss": 1.084, "step": 2639 }, { "epoch": 2.190871369294606, "grad_norm": 7.172590732574463, "learning_rate": 1.9124979253112035e-05, "loss": 1.158, "step": 2640 }, { "epoch": 2.191701244813278, "grad_norm": 15.02193546295166, "learning_rate": 1.9124647302904567e-05, "loss": 2.2428, "step": 2641 }, { "epoch": 2.1925311203319504, "grad_norm": 14.194352149963379, "learning_rate": 1.9124315352697096e-05, "loss": 1.5093, "step": 2642 }, { "epoch": 2.1933609958506226, "grad_norm": 8.003094673156738, "learning_rate": 1.9123983402489628e-05, "loss": 1.5423, "step": 2643 }, { "epoch": 2.194190871369295, "grad_norm": 7.990683078765869, "learning_rate": 1.912365145228216e-05, "loss": 1.3332, "step": 2644 }, { "epoch": 2.195020746887967, "grad_norm": 11.71065616607666, "learning_rate": 1.9123319502074692e-05, "loss": 1.1004, "step": 2645 }, { "epoch": 2.195850622406639, "grad_norm": 9.717833518981934, "learning_rate": 1.912298755186722e-05, "loss": 1.9252, "step": 2646 }, { "epoch": 2.1966804979253114, "grad_norm": 12.998700141906738, "learning_rate": 1.9122655601659753e-05, "loss": 2.3541, "step": 2647 }, { "epoch": 2.1975103734439836, "grad_norm": 6.154157638549805, "learning_rate": 1.9122323651452285e-05, "loss": 1.3287, "step": 2648 }, { "epoch": 2.198340248962656, "grad_norm": 12.98293685913086, "learning_rate": 1.9121991701244814e-05, "loss": 1.5364, "step": 2649 }, { "epoch": 2.199170124481328, "grad_norm": 13.440439224243164, "learning_rate": 1.9121659751037346e-05, "loss": 0.9844, "step": 2650 }, { "epoch": 2.2, "grad_norm": 12.1198148727417, "learning_rate": 1.9121327800829875e-05, "loss": 1.5245, "step": 2651 }, { "epoch": 2.2008298755186724, "grad_norm": 8.836471557617188, "learning_rate": 1.9120995850622407e-05, "loss": 1.8873, "step": 2652 }, { "epoch": 2.2016597510373446, "grad_norm": 9.00040054321289, "learning_rate": 1.912066390041494e-05, "loss": 0.969, "step": 2653 }, { "epoch": 2.2024896265560168, "grad_norm": 7.6998162269592285, "learning_rate": 1.912033195020747e-05, "loss": 1.3721, "step": 2654 }, { "epoch": 2.203319502074689, "grad_norm": 11.834884643554688, "learning_rate": 1.912e-05, "loss": 2.1341, "step": 2655 }, { "epoch": 2.204149377593361, "grad_norm": 22.202669143676758, "learning_rate": 1.9119668049792532e-05, "loss": 2.1927, "step": 2656 }, { "epoch": 2.2049792531120334, "grad_norm": 10.357146263122559, "learning_rate": 1.9119336099585064e-05, "loss": 1.4526, "step": 2657 }, { "epoch": 2.2058091286307056, "grad_norm": 10.976402282714844, "learning_rate": 1.9119004149377596e-05, "loss": 1.1902, "step": 2658 }, { "epoch": 2.2066390041493777, "grad_norm": 7.388336181640625, "learning_rate": 1.9118672199170125e-05, "loss": 1.3426, "step": 2659 }, { "epoch": 2.20746887966805, "grad_norm": 8.3743257522583, "learning_rate": 1.9118340248962657e-05, "loss": 1.2853, "step": 2660 }, { "epoch": 2.208298755186722, "grad_norm": 10.91913890838623, "learning_rate": 1.911800829875519e-05, "loss": 1.3683, "step": 2661 }, { "epoch": 2.2091286307053943, "grad_norm": 9.07222843170166, "learning_rate": 1.911767634854772e-05, "loss": 1.404, "step": 2662 }, { "epoch": 2.2099585062240665, "grad_norm": 11.157424926757812, "learning_rate": 1.911734439834025e-05, "loss": 1.318, "step": 2663 }, { "epoch": 2.2107883817427387, "grad_norm": 7.6363444328308105, "learning_rate": 1.9117012448132782e-05, "loss": 1.4104, "step": 2664 }, { "epoch": 2.211618257261411, "grad_norm": 7.833524703979492, "learning_rate": 1.9116680497925314e-05, "loss": 1.2928, "step": 2665 }, { "epoch": 2.212448132780083, "grad_norm": 7.639269828796387, "learning_rate": 1.9116348547717843e-05, "loss": 1.1132, "step": 2666 }, { "epoch": 2.2132780082987553, "grad_norm": 16.22468376159668, "learning_rate": 1.9116016597510375e-05, "loss": 1.795, "step": 2667 }, { "epoch": 2.2141078838174275, "grad_norm": 8.512378692626953, "learning_rate": 1.9115684647302907e-05, "loss": 1.8709, "step": 2668 }, { "epoch": 2.2149377593360997, "grad_norm": 10.526844024658203, "learning_rate": 1.9115352697095436e-05, "loss": 1.7422, "step": 2669 }, { "epoch": 2.215767634854772, "grad_norm": 12.08505916595459, "learning_rate": 1.9115020746887968e-05, "loss": 1.7835, "step": 2670 }, { "epoch": 2.216597510373444, "grad_norm": 8.657723426818848, "learning_rate": 1.91146887966805e-05, "loss": 1.3294, "step": 2671 }, { "epoch": 2.2174273858921163, "grad_norm": 11.35291862487793, "learning_rate": 1.911435684647303e-05, "loss": 1.9972, "step": 2672 }, { "epoch": 2.2182572614107885, "grad_norm": 9.484082221984863, "learning_rate": 1.911402489626556e-05, "loss": 1.4437, "step": 2673 }, { "epoch": 2.2190871369294607, "grad_norm": 7.086977005004883, "learning_rate": 1.9113692946058093e-05, "loss": 1.6465, "step": 2674 }, { "epoch": 2.219917012448133, "grad_norm": 19.870878219604492, "learning_rate": 1.9113360995850625e-05, "loss": 2.1595, "step": 2675 }, { "epoch": 2.220746887966805, "grad_norm": 8.72417163848877, "learning_rate": 1.9113029045643154e-05, "loss": 1.1615, "step": 2676 }, { "epoch": 2.2215767634854773, "grad_norm": 6.334792137145996, "learning_rate": 1.9112697095435686e-05, "loss": 0.93, "step": 2677 }, { "epoch": 2.2224066390041495, "grad_norm": 10.254833221435547, "learning_rate": 1.9112365145228218e-05, "loss": 1.9704, "step": 2678 }, { "epoch": 2.2232365145228217, "grad_norm": 9.38754653930664, "learning_rate": 1.911203319502075e-05, "loss": 1.2753, "step": 2679 }, { "epoch": 2.224066390041494, "grad_norm": 8.675891876220703, "learning_rate": 1.911170124481328e-05, "loss": 1.5703, "step": 2680 }, { "epoch": 2.224896265560166, "grad_norm": 9.475407600402832, "learning_rate": 1.911136929460581e-05, "loss": 1.6842, "step": 2681 }, { "epoch": 2.2257261410788383, "grad_norm": 10.319535255432129, "learning_rate": 1.9111037344398343e-05, "loss": 2.0373, "step": 2682 }, { "epoch": 2.2265560165975105, "grad_norm": 11.647933006286621, "learning_rate": 1.9110705394190875e-05, "loss": 1.3081, "step": 2683 }, { "epoch": 2.2273858921161827, "grad_norm": 10.397621154785156, "learning_rate": 1.9110373443983404e-05, "loss": 1.377, "step": 2684 }, { "epoch": 2.228215767634855, "grad_norm": 7.043399810791016, "learning_rate": 1.9110041493775936e-05, "loss": 1.452, "step": 2685 }, { "epoch": 2.229045643153527, "grad_norm": 6.962493896484375, "learning_rate": 1.9109709543568468e-05, "loss": 1.1575, "step": 2686 }, { "epoch": 2.2298755186721992, "grad_norm": 10.792947769165039, "learning_rate": 1.9109377593360997e-05, "loss": 1.8874, "step": 2687 }, { "epoch": 2.2307053941908714, "grad_norm": 9.769786834716797, "learning_rate": 1.910904564315353e-05, "loss": 1.1267, "step": 2688 }, { "epoch": 2.2315352697095436, "grad_norm": 8.253867149353027, "learning_rate": 1.9108713692946058e-05, "loss": 1.1135, "step": 2689 }, { "epoch": 2.232365145228216, "grad_norm": 11.473889350891113, "learning_rate": 1.910838174273859e-05, "loss": 1.9532, "step": 2690 }, { "epoch": 2.233195020746888, "grad_norm": 9.184492111206055, "learning_rate": 1.9108049792531122e-05, "loss": 1.2013, "step": 2691 }, { "epoch": 2.2340248962655602, "grad_norm": 7.345178127288818, "learning_rate": 1.910771784232365e-05, "loss": 1.3553, "step": 2692 }, { "epoch": 2.2348547717842324, "grad_norm": 9.100554466247559, "learning_rate": 1.9107385892116183e-05, "loss": 1.5342, "step": 2693 }, { "epoch": 2.2356846473029046, "grad_norm": 6.979428768157959, "learning_rate": 1.9107053941908715e-05, "loss": 1.0389, "step": 2694 }, { "epoch": 2.236514522821577, "grad_norm": 12.139086723327637, "learning_rate": 1.9106721991701247e-05, "loss": 1.1018, "step": 2695 }, { "epoch": 2.237344398340249, "grad_norm": 8.916826248168945, "learning_rate": 1.9106390041493776e-05, "loss": 1.4085, "step": 2696 }, { "epoch": 2.238174273858921, "grad_norm": 11.347062110900879, "learning_rate": 1.9106058091286308e-05, "loss": 2.3286, "step": 2697 }, { "epoch": 2.2390041493775934, "grad_norm": 9.308196067810059, "learning_rate": 1.910572614107884e-05, "loss": 1.3673, "step": 2698 }, { "epoch": 2.2398340248962656, "grad_norm": 7.896581172943115, "learning_rate": 1.9105394190871372e-05, "loss": 1.1647, "step": 2699 }, { "epoch": 2.240663900414938, "grad_norm": 7.131147384643555, "learning_rate": 1.9105062240663904e-05, "loss": 1.4772, "step": 2700 }, { "epoch": 2.24149377593361, "grad_norm": 8.326560974121094, "learning_rate": 1.9104730290456433e-05, "loss": 1.1337, "step": 2701 }, { "epoch": 2.242323651452282, "grad_norm": 15.803574562072754, "learning_rate": 1.9104398340248965e-05, "loss": 1.2954, "step": 2702 }, { "epoch": 2.2431535269709544, "grad_norm": 10.989089012145996, "learning_rate": 1.9104066390041497e-05, "loss": 2.334, "step": 2703 }, { "epoch": 2.2439834024896266, "grad_norm": 15.656307220458984, "learning_rate": 1.9103734439834026e-05, "loss": 1.22, "step": 2704 }, { "epoch": 2.2448132780082988, "grad_norm": 12.596370697021484, "learning_rate": 1.9103402489626558e-05, "loss": 1.5619, "step": 2705 }, { "epoch": 2.245643153526971, "grad_norm": 6.634912967681885, "learning_rate": 1.910307053941909e-05, "loss": 1.4494, "step": 2706 }, { "epoch": 2.246473029045643, "grad_norm": 15.913532257080078, "learning_rate": 1.910273858921162e-05, "loss": 1.1518, "step": 2707 }, { "epoch": 2.2473029045643154, "grad_norm": 7.433257102966309, "learning_rate": 1.910240663900415e-05, "loss": 1.7339, "step": 2708 }, { "epoch": 2.2481327800829876, "grad_norm": 15.324607849121094, "learning_rate": 1.9102074688796683e-05, "loss": 0.9256, "step": 2709 }, { "epoch": 2.2489626556016598, "grad_norm": 9.95607852935791, "learning_rate": 1.910174273858921e-05, "loss": 1.3144, "step": 2710 }, { "epoch": 2.249792531120332, "grad_norm": 9.991671562194824, "learning_rate": 1.9101410788381744e-05, "loss": 2.0957, "step": 2711 }, { "epoch": 2.250622406639004, "grad_norm": 8.019237518310547, "learning_rate": 1.9101078838174276e-05, "loss": 1.4867, "step": 2712 }, { "epoch": 2.2514522821576763, "grad_norm": 11.438071250915527, "learning_rate": 1.9100746887966805e-05, "loss": 1.6322, "step": 2713 }, { "epoch": 2.2522821576763485, "grad_norm": 7.735003471374512, "learning_rate": 1.9100414937759337e-05, "loss": 0.9906, "step": 2714 }, { "epoch": 2.2531120331950207, "grad_norm": 8.410391807556152, "learning_rate": 1.910008298755187e-05, "loss": 1.5829, "step": 2715 }, { "epoch": 2.253941908713693, "grad_norm": 9.154509544372559, "learning_rate": 1.90997510373444e-05, "loss": 0.8037, "step": 2716 }, { "epoch": 2.254771784232365, "grad_norm": 13.861351013183594, "learning_rate": 1.909941908713693e-05, "loss": 2.1607, "step": 2717 }, { "epoch": 2.2556016597510373, "grad_norm": 10.329876899719238, "learning_rate": 1.909908713692946e-05, "loss": 0.8895, "step": 2718 }, { "epoch": 2.2564315352697095, "grad_norm": 11.522165298461914, "learning_rate": 1.9098755186721994e-05, "loss": 1.3845, "step": 2719 }, { "epoch": 2.2572614107883817, "grad_norm": 7.512920379638672, "learning_rate": 1.9098423236514526e-05, "loss": 1.3012, "step": 2720 }, { "epoch": 2.258091286307054, "grad_norm": 15.00888442993164, "learning_rate": 1.9098091286307055e-05, "loss": 1.6898, "step": 2721 }, { "epoch": 2.258921161825726, "grad_norm": 14.806727409362793, "learning_rate": 1.9097759336099587e-05, "loss": 1.8869, "step": 2722 }, { "epoch": 2.2597510373443983, "grad_norm": 8.912371635437012, "learning_rate": 1.909742738589212e-05, "loss": 1.7507, "step": 2723 }, { "epoch": 2.2605809128630705, "grad_norm": 14.144149780273438, "learning_rate": 1.909709543568465e-05, "loss": 2.0715, "step": 2724 }, { "epoch": 2.2614107883817427, "grad_norm": 10.26998233795166, "learning_rate": 1.909676348547718e-05, "loss": 1.4226, "step": 2725 }, { "epoch": 2.262240663900415, "grad_norm": 8.492868423461914, "learning_rate": 1.9096431535269712e-05, "loss": 1.8236, "step": 2726 }, { "epoch": 2.263070539419087, "grad_norm": 8.400762557983398, "learning_rate": 1.909609958506224e-05, "loss": 0.9026, "step": 2727 }, { "epoch": 2.2639004149377593, "grad_norm": 16.356037139892578, "learning_rate": 1.9095767634854773e-05, "loss": 1.3996, "step": 2728 }, { "epoch": 2.2647302904564315, "grad_norm": 11.800326347351074, "learning_rate": 1.9095435684647305e-05, "loss": 1.0874, "step": 2729 }, { "epoch": 2.2655601659751037, "grad_norm": 8.1221284866333, "learning_rate": 1.9095103734439833e-05, "loss": 1.3322, "step": 2730 }, { "epoch": 2.266390041493776, "grad_norm": 7.892780780792236, "learning_rate": 1.9094771784232366e-05, "loss": 1.803, "step": 2731 }, { "epoch": 2.267219917012448, "grad_norm": 8.178391456604004, "learning_rate": 1.9094439834024898e-05, "loss": 1.2411, "step": 2732 }, { "epoch": 2.2680497925311203, "grad_norm": 10.785005569458008, "learning_rate": 1.909410788381743e-05, "loss": 1.664, "step": 2733 }, { "epoch": 2.2688796680497925, "grad_norm": 9.66584300994873, "learning_rate": 1.909377593360996e-05, "loss": 1.5881, "step": 2734 }, { "epoch": 2.2697095435684647, "grad_norm": 8.366338729858398, "learning_rate": 1.909344398340249e-05, "loss": 1.7013, "step": 2735 }, { "epoch": 2.270539419087137, "grad_norm": 13.908855438232422, "learning_rate": 1.9093112033195023e-05, "loss": 1.7045, "step": 2736 }, { "epoch": 2.271369294605809, "grad_norm": 9.614745140075684, "learning_rate": 1.9092780082987555e-05, "loss": 1.4459, "step": 2737 }, { "epoch": 2.2721991701244812, "grad_norm": 5.697186470031738, "learning_rate": 1.9092448132780084e-05, "loss": 1.1577, "step": 2738 }, { "epoch": 2.2730290456431534, "grad_norm": 13.849976539611816, "learning_rate": 1.9092116182572616e-05, "loss": 1.685, "step": 2739 }, { "epoch": 2.2738589211618256, "grad_norm": 6.076547622680664, "learning_rate": 1.9091784232365148e-05, "loss": 0.5638, "step": 2740 }, { "epoch": 2.274688796680498, "grad_norm": 7.790408611297607, "learning_rate": 1.909145228215768e-05, "loss": 0.8159, "step": 2741 }, { "epoch": 2.27551867219917, "grad_norm": 9.004902839660645, "learning_rate": 1.909112033195021e-05, "loss": 2.1404, "step": 2742 }, { "epoch": 2.2763485477178422, "grad_norm": 11.850214958190918, "learning_rate": 1.909078838174274e-05, "loss": 1.7555, "step": 2743 }, { "epoch": 2.2771784232365144, "grad_norm": 8.527634620666504, "learning_rate": 1.9090456431535273e-05, "loss": 1.2553, "step": 2744 }, { "epoch": 2.2780082987551866, "grad_norm": 8.941749572753906, "learning_rate": 1.90901244813278e-05, "loss": 1.7394, "step": 2745 }, { "epoch": 2.278838174273859, "grad_norm": 12.485697746276855, "learning_rate": 1.9089792531120334e-05, "loss": 2.2152, "step": 2746 }, { "epoch": 2.279668049792531, "grad_norm": 8.451172828674316, "learning_rate": 1.9089460580912866e-05, "loss": 1.3009, "step": 2747 }, { "epoch": 2.280497925311203, "grad_norm": 9.08105754852295, "learning_rate": 1.9089128630705394e-05, "loss": 1.1601, "step": 2748 }, { "epoch": 2.2813278008298754, "grad_norm": 11.169414520263672, "learning_rate": 1.9088796680497927e-05, "loss": 2.6856, "step": 2749 }, { "epoch": 2.2821576763485476, "grad_norm": 14.97182559967041, "learning_rate": 1.908846473029046e-05, "loss": 1.6163, "step": 2750 }, { "epoch": 2.28298755186722, "grad_norm": 12.782700538635254, "learning_rate": 1.9088132780082987e-05, "loss": 1.0881, "step": 2751 }, { "epoch": 2.283817427385892, "grad_norm": 9.241196632385254, "learning_rate": 1.908780082987552e-05, "loss": 1.3564, "step": 2752 }, { "epoch": 2.284647302904564, "grad_norm": 9.292325019836426, "learning_rate": 1.908746887966805e-05, "loss": 1.7398, "step": 2753 }, { "epoch": 2.2854771784232364, "grad_norm": 7.050085067749023, "learning_rate": 1.9087136929460584e-05, "loss": 1.1127, "step": 2754 }, { "epoch": 2.2863070539419086, "grad_norm": 10.317290306091309, "learning_rate": 1.9086804979253112e-05, "loss": 1.3201, "step": 2755 }, { "epoch": 2.287136929460581, "grad_norm": 11.39129638671875, "learning_rate": 1.9086473029045645e-05, "loss": 1.6778, "step": 2756 }, { "epoch": 2.287966804979253, "grad_norm": 12.853645324707031, "learning_rate": 1.9086141078838177e-05, "loss": 2.4578, "step": 2757 }, { "epoch": 2.288796680497925, "grad_norm": 6.7998456954956055, "learning_rate": 1.908580912863071e-05, "loss": 1.0623, "step": 2758 }, { "epoch": 2.2896265560165974, "grad_norm": 6.789866924285889, "learning_rate": 1.9085477178423237e-05, "loss": 1.2193, "step": 2759 }, { "epoch": 2.2904564315352696, "grad_norm": 10.508180618286133, "learning_rate": 1.908514522821577e-05, "loss": 2.0027, "step": 2760 }, { "epoch": 2.2912863070539418, "grad_norm": 8.612090110778809, "learning_rate": 1.90848132780083e-05, "loss": 1.139, "step": 2761 }, { "epoch": 2.292116182572614, "grad_norm": 16.84589958190918, "learning_rate": 1.9084481327800834e-05, "loss": 1.3714, "step": 2762 }, { "epoch": 2.292946058091286, "grad_norm": 11.768668174743652, "learning_rate": 1.9084149377593362e-05, "loss": 1.9129, "step": 2763 }, { "epoch": 2.2937759336099584, "grad_norm": 9.699544906616211, "learning_rate": 1.9083817427385895e-05, "loss": 1.026, "step": 2764 }, { "epoch": 2.2946058091286305, "grad_norm": 9.745293617248535, "learning_rate": 1.9083485477178427e-05, "loss": 1.5705, "step": 2765 }, { "epoch": 2.2954356846473027, "grad_norm": 10.219700813293457, "learning_rate": 1.9083153526970955e-05, "loss": 1.7589, "step": 2766 }, { "epoch": 2.296265560165975, "grad_norm": 9.316211700439453, "learning_rate": 1.9082821576763488e-05, "loss": 1.2486, "step": 2767 }, { "epoch": 2.297095435684647, "grad_norm": 7.44034481048584, "learning_rate": 1.9082489626556016e-05, "loss": 1.2853, "step": 2768 }, { "epoch": 2.2979253112033193, "grad_norm": 11.022424697875977, "learning_rate": 1.908215767634855e-05, "loss": 1.0535, "step": 2769 }, { "epoch": 2.2987551867219915, "grad_norm": 7.1119704246521, "learning_rate": 1.908182572614108e-05, "loss": 1.0455, "step": 2770 }, { "epoch": 2.2995850622406637, "grad_norm": 6.50109338760376, "learning_rate": 1.908149377593361e-05, "loss": 1.0174, "step": 2771 }, { "epoch": 2.300414937759336, "grad_norm": 10.041461944580078, "learning_rate": 1.908116182572614e-05, "loss": 1.5801, "step": 2772 }, { "epoch": 2.301244813278008, "grad_norm": 10.464702606201172, "learning_rate": 1.9080829875518673e-05, "loss": 1.5161, "step": 2773 }, { "epoch": 2.3020746887966803, "grad_norm": 8.017416000366211, "learning_rate": 1.9080497925311206e-05, "loss": 1.3966, "step": 2774 }, { "epoch": 2.3029045643153525, "grad_norm": 14.879337310791016, "learning_rate": 1.9080165975103734e-05, "loss": 2.384, "step": 2775 }, { "epoch": 2.3037344398340247, "grad_norm": 10.622455596923828, "learning_rate": 1.9079834024896266e-05, "loss": 1.5078, "step": 2776 }, { "epoch": 2.304564315352697, "grad_norm": 12.222049713134766, "learning_rate": 1.90795020746888e-05, "loss": 1.9987, "step": 2777 }, { "epoch": 2.305394190871369, "grad_norm": 7.4754438400268555, "learning_rate": 1.907917012448133e-05, "loss": 0.921, "step": 2778 }, { "epoch": 2.3062240663900413, "grad_norm": 10.39736557006836, "learning_rate": 1.9078838174273863e-05, "loss": 1.1076, "step": 2779 }, { "epoch": 2.3070539419087135, "grad_norm": 12.09475326538086, "learning_rate": 1.907850622406639e-05, "loss": 1.6995, "step": 2780 }, { "epoch": 2.3078838174273857, "grad_norm": 6.580734729766846, "learning_rate": 1.9078174273858923e-05, "loss": 1.0576, "step": 2781 }, { "epoch": 2.308713692946058, "grad_norm": 10.404513359069824, "learning_rate": 1.9077842323651456e-05, "loss": 1.5729, "step": 2782 }, { "epoch": 2.30954356846473, "grad_norm": 8.871068000793457, "learning_rate": 1.9077510373443984e-05, "loss": 1.5928, "step": 2783 }, { "epoch": 2.3103734439834023, "grad_norm": 26.653148651123047, "learning_rate": 1.9077178423236516e-05, "loss": 2.2055, "step": 2784 }, { "epoch": 2.3112033195020745, "grad_norm": 8.71789264678955, "learning_rate": 1.907684647302905e-05, "loss": 1.3947, "step": 2785 }, { "epoch": 2.3120331950207467, "grad_norm": 7.536588668823242, "learning_rate": 1.9076514522821577e-05, "loss": 1.1524, "step": 2786 }, { "epoch": 2.312863070539419, "grad_norm": 8.773945808410645, "learning_rate": 1.907618257261411e-05, "loss": 0.9594, "step": 2787 }, { "epoch": 2.313692946058091, "grad_norm": 10.178915977478027, "learning_rate": 1.9075850622406638e-05, "loss": 1.4218, "step": 2788 }, { "epoch": 2.3145228215767633, "grad_norm": 11.187898635864258, "learning_rate": 1.907551867219917e-05, "loss": 1.5219, "step": 2789 }, { "epoch": 2.3153526970954355, "grad_norm": 8.364126205444336, "learning_rate": 1.9075186721991702e-05, "loss": 1.256, "step": 2790 }, { "epoch": 2.3161825726141076, "grad_norm": 11.639732360839844, "learning_rate": 1.9074854771784234e-05, "loss": 1.5347, "step": 2791 }, { "epoch": 2.31701244813278, "grad_norm": 11.120314598083496, "learning_rate": 1.9074522821576763e-05, "loss": 1.8667, "step": 2792 }, { "epoch": 2.317842323651452, "grad_norm": 8.670122146606445, "learning_rate": 1.9074190871369295e-05, "loss": 1.4045, "step": 2793 }, { "epoch": 2.3186721991701242, "grad_norm": 16.47856330871582, "learning_rate": 1.9073858921161827e-05, "loss": 2.2811, "step": 2794 }, { "epoch": 2.3195020746887964, "grad_norm": 9.695953369140625, "learning_rate": 1.907352697095436e-05, "loss": 1.1453, "step": 2795 }, { "epoch": 2.320331950207469, "grad_norm": 10.549628257751465, "learning_rate": 1.9073195020746888e-05, "loss": 1.5963, "step": 2796 }, { "epoch": 2.3211618257261413, "grad_norm": 8.264155387878418, "learning_rate": 1.907286307053942e-05, "loss": 1.5245, "step": 2797 }, { "epoch": 2.3219917012448135, "grad_norm": 12.970861434936523, "learning_rate": 1.9072531120331952e-05, "loss": 1.7659, "step": 2798 }, { "epoch": 2.3228215767634857, "grad_norm": 11.4599609375, "learning_rate": 1.9072199170124484e-05, "loss": 1.8446, "step": 2799 }, { "epoch": 2.323651452282158, "grad_norm": 9.033869743347168, "learning_rate": 1.9071867219917013e-05, "loss": 1.8572, "step": 2800 }, { "epoch": 2.32448132780083, "grad_norm": 7.648157119750977, "learning_rate": 1.9071535269709545e-05, "loss": 0.7887, "step": 2801 }, { "epoch": 2.3253112033195023, "grad_norm": 7.758192539215088, "learning_rate": 1.9071203319502077e-05, "loss": 1.6317, "step": 2802 }, { "epoch": 2.3261410788381744, "grad_norm": 12.079388618469238, "learning_rate": 1.907087136929461e-05, "loss": 1.2878, "step": 2803 }, { "epoch": 2.3269709543568466, "grad_norm": 8.79883098602295, "learning_rate": 1.9070539419087138e-05, "loss": 1.7706, "step": 2804 }, { "epoch": 2.327800829875519, "grad_norm": 10.530709266662598, "learning_rate": 1.907020746887967e-05, "loss": 1.2416, "step": 2805 }, { "epoch": 2.328630705394191, "grad_norm": 9.6174955368042, "learning_rate": 1.90698755186722e-05, "loss": 0.8777, "step": 2806 }, { "epoch": 2.3294605809128632, "grad_norm": 16.057628631591797, "learning_rate": 1.906954356846473e-05, "loss": 1.6559, "step": 2807 }, { "epoch": 2.3302904564315354, "grad_norm": 8.092164993286133, "learning_rate": 1.9069211618257263e-05, "loss": 1.3684, "step": 2808 }, { "epoch": 2.3311203319502076, "grad_norm": 13.530648231506348, "learning_rate": 1.9068879668049792e-05, "loss": 1.972, "step": 2809 }, { "epoch": 2.33195020746888, "grad_norm": 8.584879875183105, "learning_rate": 1.9068547717842324e-05, "loss": 1.3836, "step": 2810 }, { "epoch": 2.332780082987552, "grad_norm": 10.880836486816406, "learning_rate": 1.9068215767634856e-05, "loss": 1.7841, "step": 2811 }, { "epoch": 2.333609958506224, "grad_norm": 14.95755672454834, "learning_rate": 1.906788381742739e-05, "loss": 2.6918, "step": 2812 }, { "epoch": 2.3344398340248964, "grad_norm": 10.69562816619873, "learning_rate": 1.9067551867219917e-05, "loss": 1.7442, "step": 2813 }, { "epoch": 2.3352697095435686, "grad_norm": 11.25577449798584, "learning_rate": 1.906721991701245e-05, "loss": 1.5557, "step": 2814 }, { "epoch": 2.336099585062241, "grad_norm": 10.46995735168457, "learning_rate": 1.906688796680498e-05, "loss": 1.6084, "step": 2815 }, { "epoch": 2.336929460580913, "grad_norm": 16.511394500732422, "learning_rate": 1.9066556016597513e-05, "loss": 2.9103, "step": 2816 }, { "epoch": 2.337759336099585, "grad_norm": 11.384230613708496, "learning_rate": 1.9066224066390042e-05, "loss": 2.0667, "step": 2817 }, { "epoch": 2.3385892116182574, "grad_norm": 8.013731002807617, "learning_rate": 1.9065892116182574e-05, "loss": 1.2586, "step": 2818 }, { "epoch": 2.3394190871369296, "grad_norm": 8.585970878601074, "learning_rate": 1.9065560165975106e-05, "loss": 2.0626, "step": 2819 }, { "epoch": 2.340248962655602, "grad_norm": 14.292641639709473, "learning_rate": 1.906522821576764e-05, "loss": 1.3547, "step": 2820 }, { "epoch": 2.341078838174274, "grad_norm": 8.722957611083984, "learning_rate": 1.9064896265560167e-05, "loss": 0.9783, "step": 2821 }, { "epoch": 2.341908713692946, "grad_norm": 8.633204460144043, "learning_rate": 1.90645643153527e-05, "loss": 0.9856, "step": 2822 }, { "epoch": 2.3427385892116184, "grad_norm": 7.689266204833984, "learning_rate": 1.906423236514523e-05, "loss": 1.3561, "step": 2823 }, { "epoch": 2.3435684647302906, "grad_norm": 12.0396089553833, "learning_rate": 1.906390041493776e-05, "loss": 1.2896, "step": 2824 }, { "epoch": 2.3443983402489628, "grad_norm": 13.226630210876465, "learning_rate": 1.9063568464730292e-05, "loss": 1.4422, "step": 2825 }, { "epoch": 2.345228215767635, "grad_norm": 10.216367721557617, "learning_rate": 1.9063236514522824e-05, "loss": 2.4546, "step": 2826 }, { "epoch": 2.346058091286307, "grad_norm": 6.152241230010986, "learning_rate": 1.9062904564315353e-05, "loss": 1.0609, "step": 2827 }, { "epoch": 2.3468879668049794, "grad_norm": 9.495250701904297, "learning_rate": 1.9062572614107885e-05, "loss": 1.2329, "step": 2828 }, { "epoch": 2.3477178423236515, "grad_norm": 12.387079238891602, "learning_rate": 1.9062240663900414e-05, "loss": 1.3451, "step": 2829 }, { "epoch": 2.3485477178423237, "grad_norm": 12.189041137695312, "learning_rate": 1.9061908713692946e-05, "loss": 1.8825, "step": 2830 }, { "epoch": 2.349377593360996, "grad_norm": 11.75938892364502, "learning_rate": 1.9061576763485478e-05, "loss": 1.8647, "step": 2831 }, { "epoch": 2.350207468879668, "grad_norm": 13.362648010253906, "learning_rate": 1.906124481327801e-05, "loss": 1.4314, "step": 2832 }, { "epoch": 2.3510373443983403, "grad_norm": 9.592211723327637, "learning_rate": 1.9060912863070542e-05, "loss": 1.0864, "step": 2833 }, { "epoch": 2.3518672199170125, "grad_norm": 6.118255615234375, "learning_rate": 1.906058091286307e-05, "loss": 0.9592, "step": 2834 }, { "epoch": 2.3526970954356847, "grad_norm": 7.83433198928833, "learning_rate": 1.9060248962655603e-05, "loss": 1.7039, "step": 2835 }, { "epoch": 2.353526970954357, "grad_norm": 8.428567886352539, "learning_rate": 1.9059917012448135e-05, "loss": 1.2261, "step": 2836 }, { "epoch": 2.354356846473029, "grad_norm": 8.03513240814209, "learning_rate": 1.9059585062240667e-05, "loss": 0.991, "step": 2837 }, { "epoch": 2.3551867219917013, "grad_norm": 14.382529258728027, "learning_rate": 1.9059253112033196e-05, "loss": 1.4634, "step": 2838 }, { "epoch": 2.3560165975103735, "grad_norm": 6.972408294677734, "learning_rate": 1.9058921161825728e-05, "loss": 0.687, "step": 2839 }, { "epoch": 2.3568464730290457, "grad_norm": 10.40318489074707, "learning_rate": 1.905858921161826e-05, "loss": 1.4513, "step": 2840 }, { "epoch": 2.357676348547718, "grad_norm": 11.126317977905273, "learning_rate": 1.9058257261410792e-05, "loss": 1.5537, "step": 2841 }, { "epoch": 2.35850622406639, "grad_norm": 14.213129997253418, "learning_rate": 1.905792531120332e-05, "loss": 1.727, "step": 2842 }, { "epoch": 2.3593360995850623, "grad_norm": 13.754611015319824, "learning_rate": 1.9057593360995853e-05, "loss": 1.5974, "step": 2843 }, { "epoch": 2.3601659751037345, "grad_norm": 9.253334045410156, "learning_rate": 1.9057261410788382e-05, "loss": 1.0971, "step": 2844 }, { "epoch": 2.3609958506224067, "grad_norm": 10.636733055114746, "learning_rate": 1.9056929460580914e-05, "loss": 1.1426, "step": 2845 }, { "epoch": 2.361825726141079, "grad_norm": 9.54047679901123, "learning_rate": 1.9056597510373446e-05, "loss": 1.2359, "step": 2846 }, { "epoch": 2.362655601659751, "grad_norm": 14.764167785644531, "learning_rate": 1.9056265560165975e-05, "loss": 1.7339, "step": 2847 }, { "epoch": 2.3634854771784233, "grad_norm": 13.897377967834473, "learning_rate": 1.9055933609958507e-05, "loss": 1.1764, "step": 2848 }, { "epoch": 2.3643153526970955, "grad_norm": 8.559100151062012, "learning_rate": 1.905560165975104e-05, "loss": 0.9331, "step": 2849 }, { "epoch": 2.3651452282157677, "grad_norm": 15.481189727783203, "learning_rate": 1.9055269709543568e-05, "loss": 2.3016, "step": 2850 }, { "epoch": 2.36597510373444, "grad_norm": 8.820018768310547, "learning_rate": 1.90549377593361e-05, "loss": 1.3543, "step": 2851 }, { "epoch": 2.366804979253112, "grad_norm": 10.377511024475098, "learning_rate": 1.9054605809128632e-05, "loss": 1.5798, "step": 2852 }, { "epoch": 2.3676348547717843, "grad_norm": 7.566277027130127, "learning_rate": 1.9054273858921164e-05, "loss": 1.225, "step": 2853 }, { "epoch": 2.3684647302904565, "grad_norm": 10.682597160339355, "learning_rate": 1.9053941908713693e-05, "loss": 1.5835, "step": 2854 }, { "epoch": 2.3692946058091287, "grad_norm": 12.653092384338379, "learning_rate": 1.9053609958506225e-05, "loss": 1.4251, "step": 2855 }, { "epoch": 2.370124481327801, "grad_norm": 7.809486389160156, "learning_rate": 1.9053278008298757e-05, "loss": 1.2062, "step": 2856 }, { "epoch": 2.370954356846473, "grad_norm": 10.534130096435547, "learning_rate": 1.905294605809129e-05, "loss": 1.2018, "step": 2857 }, { "epoch": 2.3717842323651452, "grad_norm": 8.030129432678223, "learning_rate": 1.905261410788382e-05, "loss": 1.2315, "step": 2858 }, { "epoch": 2.3726141078838174, "grad_norm": 8.640446662902832, "learning_rate": 1.905228215767635e-05, "loss": 0.9001, "step": 2859 }, { "epoch": 2.3734439834024896, "grad_norm": 11.414006233215332, "learning_rate": 1.9051950207468882e-05, "loss": 1.0946, "step": 2860 }, { "epoch": 2.374273858921162, "grad_norm": 10.43490982055664, "learning_rate": 1.9051618257261414e-05, "loss": 1.6759, "step": 2861 }, { "epoch": 2.375103734439834, "grad_norm": 11.017292022705078, "learning_rate": 1.9051286307053943e-05, "loss": 1.9355, "step": 2862 }, { "epoch": 2.375933609958506, "grad_norm": 10.010336875915527, "learning_rate": 1.9050954356846475e-05, "loss": 1.6917, "step": 2863 }, { "epoch": 2.3767634854771784, "grad_norm": 15.976592063903809, "learning_rate": 1.9050622406639007e-05, "loss": 1.5876, "step": 2864 }, { "epoch": 2.3775933609958506, "grad_norm": 12.930251121520996, "learning_rate": 1.9050290456431536e-05, "loss": 1.8739, "step": 2865 }, { "epoch": 2.378423236514523, "grad_norm": 9.549222946166992, "learning_rate": 1.9049958506224068e-05, "loss": 2.1311, "step": 2866 }, { "epoch": 2.379253112033195, "grad_norm": 9.616665840148926, "learning_rate": 1.9049626556016597e-05, "loss": 1.3257, "step": 2867 }, { "epoch": 2.380082987551867, "grad_norm": 9.9885892868042, "learning_rate": 1.904929460580913e-05, "loss": 1.4045, "step": 2868 }, { "epoch": 2.3809128630705394, "grad_norm": 12.957947731018066, "learning_rate": 1.904896265560166e-05, "loss": 1.8442, "step": 2869 }, { "epoch": 2.3817427385892116, "grad_norm": 20.61417007446289, "learning_rate": 1.9048630705394193e-05, "loss": 1.3342, "step": 2870 }, { "epoch": 2.382572614107884, "grad_norm": 13.789813041687012, "learning_rate": 1.904829875518672e-05, "loss": 2.4454, "step": 2871 }, { "epoch": 2.383402489626556, "grad_norm": 9.945839881896973, "learning_rate": 1.9047966804979254e-05, "loss": 1.5208, "step": 2872 }, { "epoch": 2.384232365145228, "grad_norm": 11.720196723937988, "learning_rate": 1.9047634854771786e-05, "loss": 1.3529, "step": 2873 }, { "epoch": 2.3850622406639004, "grad_norm": 16.17715835571289, "learning_rate": 1.9047302904564318e-05, "loss": 1.845, "step": 2874 }, { "epoch": 2.3858921161825726, "grad_norm": 10.016830444335938, "learning_rate": 1.9046970954356847e-05, "loss": 1.6402, "step": 2875 }, { "epoch": 2.3867219917012448, "grad_norm": 13.12220287322998, "learning_rate": 1.904663900414938e-05, "loss": 2.1068, "step": 2876 }, { "epoch": 2.387551867219917, "grad_norm": 10.220163345336914, "learning_rate": 1.904630705394191e-05, "loss": 1.8135, "step": 2877 }, { "epoch": 2.388381742738589, "grad_norm": 12.314238548278809, "learning_rate": 1.9045975103734443e-05, "loss": 1.8035, "step": 2878 }, { "epoch": 2.3892116182572614, "grad_norm": 11.391175270080566, "learning_rate": 1.9045643153526972e-05, "loss": 1.6327, "step": 2879 }, { "epoch": 2.3900414937759336, "grad_norm": 12.677956581115723, "learning_rate": 1.9045311203319504e-05, "loss": 1.6176, "step": 2880 }, { "epoch": 2.3908713692946058, "grad_norm": 10.366390228271484, "learning_rate": 1.9044979253112036e-05, "loss": 1.6964, "step": 2881 }, { "epoch": 2.391701244813278, "grad_norm": 8.531329154968262, "learning_rate": 1.9044647302904568e-05, "loss": 1.4281, "step": 2882 }, { "epoch": 2.39253112033195, "grad_norm": 10.552675247192383, "learning_rate": 1.9044315352697097e-05, "loss": 1.5906, "step": 2883 }, { "epoch": 2.3933609958506223, "grad_norm": 7.625146389007568, "learning_rate": 1.904398340248963e-05, "loss": 1.1297, "step": 2884 }, { "epoch": 2.3941908713692945, "grad_norm": 10.197431564331055, "learning_rate": 1.9043651452282158e-05, "loss": 1.367, "step": 2885 }, { "epoch": 2.3950207468879667, "grad_norm": 12.549077987670898, "learning_rate": 1.904331950207469e-05, "loss": 2.0834, "step": 2886 }, { "epoch": 2.395850622406639, "grad_norm": 10.473624229431152, "learning_rate": 1.9042987551867222e-05, "loss": 2.01, "step": 2887 }, { "epoch": 2.396680497925311, "grad_norm": 10.332006454467773, "learning_rate": 1.904265560165975e-05, "loss": 1.8203, "step": 2888 }, { "epoch": 2.3975103734439833, "grad_norm": 6.971932411193848, "learning_rate": 1.9042323651452283e-05, "loss": 1.1765, "step": 2889 }, { "epoch": 2.3983402489626555, "grad_norm": 11.472186088562012, "learning_rate": 1.9041991701244815e-05, "loss": 1.9175, "step": 2890 }, { "epoch": 2.3991701244813277, "grad_norm": 14.440092086791992, "learning_rate": 1.9041659751037347e-05, "loss": 2.0272, "step": 2891 }, { "epoch": 2.4, "grad_norm": 9.39772891998291, "learning_rate": 1.9041327800829876e-05, "loss": 1.5477, "step": 2892 }, { "epoch": 2.400829875518672, "grad_norm": 12.417226791381836, "learning_rate": 1.9040995850622408e-05, "loss": 1.4263, "step": 2893 }, { "epoch": 2.4016597510373443, "grad_norm": 7.6314263343811035, "learning_rate": 1.904066390041494e-05, "loss": 0.9453, "step": 2894 }, { "epoch": 2.4024896265560165, "grad_norm": 13.20450210571289, "learning_rate": 1.9040331950207472e-05, "loss": 1.7976, "step": 2895 }, { "epoch": 2.4033195020746887, "grad_norm": 12.390836715698242, "learning_rate": 1.904e-05, "loss": 2.7186, "step": 2896 }, { "epoch": 2.404149377593361, "grad_norm": 10.723623275756836, "learning_rate": 1.9039668049792533e-05, "loss": 1.9458, "step": 2897 }, { "epoch": 2.404979253112033, "grad_norm": 10.030876159667969, "learning_rate": 1.9039336099585065e-05, "loss": 1.9069, "step": 2898 }, { "epoch": 2.4058091286307053, "grad_norm": 8.893101692199707, "learning_rate": 1.9039004149377597e-05, "loss": 0.8545, "step": 2899 }, { "epoch": 2.4066390041493775, "grad_norm": 14.967886924743652, "learning_rate": 1.9038672199170126e-05, "loss": 1.8882, "step": 2900 }, { "epoch": 2.4074688796680497, "grad_norm": 8.939778327941895, "learning_rate": 1.9038340248962658e-05, "loss": 1.33, "step": 2901 }, { "epoch": 2.408298755186722, "grad_norm": 12.260611534118652, "learning_rate": 1.903800829875519e-05, "loss": 1.1176, "step": 2902 }, { "epoch": 2.409128630705394, "grad_norm": 11.762990951538086, "learning_rate": 1.903767634854772e-05, "loss": 1.9007, "step": 2903 }, { "epoch": 2.4099585062240663, "grad_norm": 10.835368156433105, "learning_rate": 1.903734439834025e-05, "loss": 1.2253, "step": 2904 }, { "epoch": 2.4107883817427385, "grad_norm": 8.803507804870605, "learning_rate": 1.903701244813278e-05, "loss": 1.2814, "step": 2905 }, { "epoch": 2.4116182572614107, "grad_norm": 12.526862144470215, "learning_rate": 1.903668049792531e-05, "loss": 1.6553, "step": 2906 }, { "epoch": 2.412448132780083, "grad_norm": 9.696232795715332, "learning_rate": 1.9036348547717844e-05, "loss": 1.2146, "step": 2907 }, { "epoch": 2.413278008298755, "grad_norm": 14.269275665283203, "learning_rate": 1.9036016597510372e-05, "loss": 2.3985, "step": 2908 }, { "epoch": 2.4141078838174272, "grad_norm": 9.894453048706055, "learning_rate": 1.9035684647302905e-05, "loss": 1.4718, "step": 2909 }, { "epoch": 2.4149377593360994, "grad_norm": 7.264761924743652, "learning_rate": 1.9035352697095437e-05, "loss": 0.8441, "step": 2910 }, { "epoch": 2.4157676348547716, "grad_norm": 9.383100509643555, "learning_rate": 1.903502074688797e-05, "loss": 1.4372, "step": 2911 }, { "epoch": 2.416597510373444, "grad_norm": 11.234149932861328, "learning_rate": 1.90346887966805e-05, "loss": 1.5967, "step": 2912 }, { "epoch": 2.417427385892116, "grad_norm": 8.087450981140137, "learning_rate": 1.903435684647303e-05, "loss": 1.3402, "step": 2913 }, { "epoch": 2.4182572614107882, "grad_norm": 10.88637924194336, "learning_rate": 1.903402489626556e-05, "loss": 1.3719, "step": 2914 }, { "epoch": 2.4190871369294604, "grad_norm": 10.645732879638672, "learning_rate": 1.9033692946058094e-05, "loss": 1.3828, "step": 2915 }, { "epoch": 2.4199170124481326, "grad_norm": 8.932549476623535, "learning_rate": 1.9033360995850626e-05, "loss": 1.8939, "step": 2916 }, { "epoch": 2.420746887966805, "grad_norm": 12.358599662780762, "learning_rate": 1.9033029045643155e-05, "loss": 0.8985, "step": 2917 }, { "epoch": 2.421576763485477, "grad_norm": 13.293651580810547, "learning_rate": 1.9032697095435687e-05, "loss": 1.6662, "step": 2918 }, { "epoch": 2.422406639004149, "grad_norm": 8.543951988220215, "learning_rate": 1.903236514522822e-05, "loss": 1.2968, "step": 2919 }, { "epoch": 2.4232365145228214, "grad_norm": 7.43954610824585, "learning_rate": 1.903203319502075e-05, "loss": 1.513, "step": 2920 }, { "epoch": 2.4240663900414936, "grad_norm": 13.631239891052246, "learning_rate": 1.903170124481328e-05, "loss": 1.9533, "step": 2921 }, { "epoch": 2.424896265560166, "grad_norm": 9.578145980834961, "learning_rate": 1.9031369294605812e-05, "loss": 1.2081, "step": 2922 }, { "epoch": 2.425726141078838, "grad_norm": 9.426339149475098, "learning_rate": 1.903103734439834e-05, "loss": 1.116, "step": 2923 }, { "epoch": 2.42655601659751, "grad_norm": 13.713653564453125, "learning_rate": 1.9030705394190873e-05, "loss": 2.0413, "step": 2924 }, { "epoch": 2.4273858921161824, "grad_norm": 14.103453636169434, "learning_rate": 1.9030373443983405e-05, "loss": 1.6553, "step": 2925 }, { "epoch": 2.4282157676348546, "grad_norm": 10.030986785888672, "learning_rate": 1.9030041493775933e-05, "loss": 1.3829, "step": 2926 }, { "epoch": 2.429045643153527, "grad_norm": 12.948698043823242, "learning_rate": 1.9029709543568466e-05, "loss": 0.7191, "step": 2927 }, { "epoch": 2.4298755186721994, "grad_norm": 11.342391014099121, "learning_rate": 1.9029377593360998e-05, "loss": 0.9132, "step": 2928 }, { "epoch": 2.4307053941908716, "grad_norm": 6.974279880523682, "learning_rate": 1.9029045643153526e-05, "loss": 1.6379, "step": 2929 }, { "epoch": 2.431535269709544, "grad_norm": 14.159711837768555, "learning_rate": 1.902871369294606e-05, "loss": 1.8542, "step": 2930 }, { "epoch": 2.432365145228216, "grad_norm": 10.931143760681152, "learning_rate": 1.902838174273859e-05, "loss": 1.7084, "step": 2931 }, { "epoch": 2.433195020746888, "grad_norm": 17.96043586730957, "learning_rate": 1.9028049792531123e-05, "loss": 1.7458, "step": 2932 }, { "epoch": 2.4340248962655604, "grad_norm": 8.741089820861816, "learning_rate": 1.902771784232365e-05, "loss": 1.3419, "step": 2933 }, { "epoch": 2.4348547717842326, "grad_norm": 9.37428092956543, "learning_rate": 1.9027385892116183e-05, "loss": 1.3787, "step": 2934 }, { "epoch": 2.435684647302905, "grad_norm": 12.244746208190918, "learning_rate": 1.9027053941908716e-05, "loss": 1.032, "step": 2935 }, { "epoch": 2.436514522821577, "grad_norm": 13.955615043640137, "learning_rate": 1.9026721991701248e-05, "loss": 1.4203, "step": 2936 }, { "epoch": 2.437344398340249, "grad_norm": 9.121560096740723, "learning_rate": 1.902639004149378e-05, "loss": 1.7129, "step": 2937 }, { "epoch": 2.4381742738589214, "grad_norm": 9.715144157409668, "learning_rate": 1.902605809128631e-05, "loss": 1.528, "step": 2938 }, { "epoch": 2.4390041493775936, "grad_norm": 8.811474800109863, "learning_rate": 1.902572614107884e-05, "loss": 1.0179, "step": 2939 }, { "epoch": 2.4398340248962658, "grad_norm": 11.311925888061523, "learning_rate": 1.9025394190871373e-05, "loss": 1.6552, "step": 2940 }, { "epoch": 2.440663900414938, "grad_norm": 13.428905487060547, "learning_rate": 1.90250622406639e-05, "loss": 1.4556, "step": 2941 }, { "epoch": 2.44149377593361, "grad_norm": 8.898534774780273, "learning_rate": 1.9024730290456434e-05, "loss": 1.0626, "step": 2942 }, { "epoch": 2.4423236514522824, "grad_norm": 13.371454238891602, "learning_rate": 1.9024398340248966e-05, "loss": 1.0391, "step": 2943 }, { "epoch": 2.4431535269709546, "grad_norm": 9.047843933105469, "learning_rate": 1.9024066390041494e-05, "loss": 1.469, "step": 2944 }, { "epoch": 2.4439834024896268, "grad_norm": 9.10043716430664, "learning_rate": 1.9023734439834027e-05, "loss": 1.6665, "step": 2945 }, { "epoch": 2.444813278008299, "grad_norm": 8.485358238220215, "learning_rate": 1.9023402489626555e-05, "loss": 1.3099, "step": 2946 }, { "epoch": 2.445643153526971, "grad_norm": 16.330842971801758, "learning_rate": 1.9023070539419087e-05, "loss": 1.3335, "step": 2947 }, { "epoch": 2.4464730290456433, "grad_norm": 12.601024627685547, "learning_rate": 1.902273858921162e-05, "loss": 2.208, "step": 2948 }, { "epoch": 2.4473029045643155, "grad_norm": 12.351107597351074, "learning_rate": 1.902240663900415e-05, "loss": 1.5289, "step": 2949 }, { "epoch": 2.4481327800829877, "grad_norm": 7.565761089324951, "learning_rate": 1.902207468879668e-05, "loss": 1.0202, "step": 2950 }, { "epoch": 2.44896265560166, "grad_norm": 8.516521453857422, "learning_rate": 1.9021742738589212e-05, "loss": 1.5795, "step": 2951 }, { "epoch": 2.449792531120332, "grad_norm": 11.585369110107422, "learning_rate": 1.9021410788381744e-05, "loss": 1.5011, "step": 2952 }, { "epoch": 2.4506224066390043, "grad_norm": 12.112651824951172, "learning_rate": 1.9021078838174277e-05, "loss": 1.5535, "step": 2953 }, { "epoch": 2.4514522821576765, "grad_norm": 13.447582244873047, "learning_rate": 1.9020746887966805e-05, "loss": 1.562, "step": 2954 }, { "epoch": 2.4522821576763487, "grad_norm": 10.246111869812012, "learning_rate": 1.9020414937759337e-05, "loss": 1.5827, "step": 2955 }, { "epoch": 2.453112033195021, "grad_norm": 12.930103302001953, "learning_rate": 1.902008298755187e-05, "loss": 1.7999, "step": 2956 }, { "epoch": 2.453941908713693, "grad_norm": 15.0877103805542, "learning_rate": 1.90197510373444e-05, "loss": 2.3666, "step": 2957 }, { "epoch": 2.4547717842323653, "grad_norm": 11.583578109741211, "learning_rate": 1.901941908713693e-05, "loss": 1.8881, "step": 2958 }, { "epoch": 2.4556016597510375, "grad_norm": 6.448083400726318, "learning_rate": 1.9019087136929462e-05, "loss": 1.3995, "step": 2959 }, { "epoch": 2.4564315352697097, "grad_norm": 11.464866638183594, "learning_rate": 1.9018755186721995e-05, "loss": 1.3391, "step": 2960 }, { "epoch": 2.457261410788382, "grad_norm": 10.643453598022461, "learning_rate": 1.9018423236514523e-05, "loss": 1.5957, "step": 2961 }, { "epoch": 2.458091286307054, "grad_norm": 11.312220573425293, "learning_rate": 1.9018091286307055e-05, "loss": 1.8925, "step": 2962 }, { "epoch": 2.4589211618257263, "grad_norm": 14.352627754211426, "learning_rate": 1.9017759336099587e-05, "loss": 1.1053, "step": 2963 }, { "epoch": 2.4597510373443985, "grad_norm": 7.7463059425354, "learning_rate": 1.9017427385892116e-05, "loss": 1.1023, "step": 2964 }, { "epoch": 2.4605809128630707, "grad_norm": 11.132250785827637, "learning_rate": 1.901709543568465e-05, "loss": 1.4388, "step": 2965 }, { "epoch": 2.461410788381743, "grad_norm": 13.106171607971191, "learning_rate": 1.901676348547718e-05, "loss": 1.7036, "step": 2966 }, { "epoch": 2.462240663900415, "grad_norm": 7.6287617683410645, "learning_rate": 1.901643153526971e-05, "loss": 1.0625, "step": 2967 }, { "epoch": 2.4630705394190873, "grad_norm": 7.589328289031982, "learning_rate": 1.901609958506224e-05, "loss": 1.1419, "step": 2968 }, { "epoch": 2.4639004149377595, "grad_norm": 8.240933418273926, "learning_rate": 1.9015767634854773e-05, "loss": 0.8665, "step": 2969 }, { "epoch": 2.4647302904564317, "grad_norm": 13.827260971069336, "learning_rate": 1.9015435684647305e-05, "loss": 2.1103, "step": 2970 }, { "epoch": 2.465560165975104, "grad_norm": 11.87549114227295, "learning_rate": 1.9015103734439834e-05, "loss": 1.552, "step": 2971 }, { "epoch": 2.466390041493776, "grad_norm": 10.770988464355469, "learning_rate": 1.9014771784232366e-05, "loss": 1.8742, "step": 2972 }, { "epoch": 2.4672199170124482, "grad_norm": 14.919153213500977, "learning_rate": 1.90144398340249e-05, "loss": 1.7201, "step": 2973 }, { "epoch": 2.4680497925311204, "grad_norm": 7.536365032196045, "learning_rate": 1.901410788381743e-05, "loss": 1.6459, "step": 2974 }, { "epoch": 2.4688796680497926, "grad_norm": 9.098098754882812, "learning_rate": 1.901377593360996e-05, "loss": 1.4656, "step": 2975 }, { "epoch": 2.469709543568465, "grad_norm": 7.289506912231445, "learning_rate": 1.901344398340249e-05, "loss": 0.8141, "step": 2976 }, { "epoch": 2.470539419087137, "grad_norm": 10.90192985534668, "learning_rate": 1.9013112033195023e-05, "loss": 1.445, "step": 2977 }, { "epoch": 2.4713692946058092, "grad_norm": 9.264500617980957, "learning_rate": 1.9012780082987556e-05, "loss": 1.7235, "step": 2978 }, { "epoch": 2.4721991701244814, "grad_norm": 8.965867042541504, "learning_rate": 1.9012448132780084e-05, "loss": 1.1247, "step": 2979 }, { "epoch": 2.4730290456431536, "grad_norm": 10.216469764709473, "learning_rate": 1.9012116182572616e-05, "loss": 1.2096, "step": 2980 }, { "epoch": 2.473858921161826, "grad_norm": 6.600274562835693, "learning_rate": 1.901178423236515e-05, "loss": 0.6878, "step": 2981 }, { "epoch": 2.474688796680498, "grad_norm": 9.58837890625, "learning_rate": 1.9011452282157677e-05, "loss": 1.3403, "step": 2982 }, { "epoch": 2.47551867219917, "grad_norm": 10.800660133361816, "learning_rate": 1.901112033195021e-05, "loss": 1.7763, "step": 2983 }, { "epoch": 2.4763485477178424, "grad_norm": 9.341007232666016, "learning_rate": 1.9010788381742738e-05, "loss": 1.8277, "step": 2984 }, { "epoch": 2.4771784232365146, "grad_norm": 15.515198707580566, "learning_rate": 1.901045643153527e-05, "loss": 1.2923, "step": 2985 }, { "epoch": 2.478008298755187, "grad_norm": 11.819595336914062, "learning_rate": 1.9010124481327802e-05, "loss": 1.1876, "step": 2986 }, { "epoch": 2.478838174273859, "grad_norm": 8.121495246887207, "learning_rate": 1.900979253112033e-05, "loss": 1.5273, "step": 2987 }, { "epoch": 2.479668049792531, "grad_norm": 8.543267250061035, "learning_rate": 1.9009460580912863e-05, "loss": 1.266, "step": 2988 }, { "epoch": 2.4804979253112034, "grad_norm": 9.54163646697998, "learning_rate": 1.9009128630705395e-05, "loss": 1.3585, "step": 2989 }, { "epoch": 2.4813278008298756, "grad_norm": 12.654959678649902, "learning_rate": 1.9008796680497927e-05, "loss": 1.828, "step": 2990 }, { "epoch": 2.482157676348548, "grad_norm": 11.177042961120605, "learning_rate": 1.900846473029046e-05, "loss": 1.9332, "step": 2991 }, { "epoch": 2.48298755186722, "grad_norm": 8.016578674316406, "learning_rate": 1.9008132780082988e-05, "loss": 1.4764, "step": 2992 }, { "epoch": 2.483817427385892, "grad_norm": 14.178135871887207, "learning_rate": 1.900780082987552e-05, "loss": 1.7566, "step": 2993 }, { "epoch": 2.4846473029045644, "grad_norm": 8.734691619873047, "learning_rate": 1.9007468879668052e-05, "loss": 1.4563, "step": 2994 }, { "epoch": 2.4854771784232366, "grad_norm": 8.772781372070312, "learning_rate": 1.9007136929460584e-05, "loss": 1.1819, "step": 2995 }, { "epoch": 2.4863070539419088, "grad_norm": 7.899593353271484, "learning_rate": 1.9006804979253113e-05, "loss": 1.696, "step": 2996 }, { "epoch": 2.487136929460581, "grad_norm": 7.613328456878662, "learning_rate": 1.9006473029045645e-05, "loss": 1.4398, "step": 2997 }, { "epoch": 2.487966804979253, "grad_norm": 9.171751976013184, "learning_rate": 1.9006141078838177e-05, "loss": 1.198, "step": 2998 }, { "epoch": 2.4887966804979254, "grad_norm": 9.467680931091309, "learning_rate": 1.900580912863071e-05, "loss": 1.2793, "step": 2999 }, { "epoch": 2.4896265560165975, "grad_norm": 8.200111389160156, "learning_rate": 1.9005477178423238e-05, "loss": 1.2249, "step": 3000 }, { "epoch": 2.4904564315352697, "grad_norm": 10.713764190673828, "learning_rate": 1.900514522821577e-05, "loss": 1.4557, "step": 3001 }, { "epoch": 2.491286307053942, "grad_norm": 19.840801239013672, "learning_rate": 1.90048132780083e-05, "loss": 2.0107, "step": 3002 }, { "epoch": 2.492116182572614, "grad_norm": 9.422197341918945, "learning_rate": 1.900448132780083e-05, "loss": 1.2115, "step": 3003 }, { "epoch": 2.4929460580912863, "grad_norm": 14.409473419189453, "learning_rate": 1.9004149377593363e-05, "loss": 2.1463, "step": 3004 }, { "epoch": 2.4937759336099585, "grad_norm": 9.272351264953613, "learning_rate": 1.9003817427385892e-05, "loss": 1.3586, "step": 3005 }, { "epoch": 2.4946058091286307, "grad_norm": 12.078909873962402, "learning_rate": 1.9003485477178424e-05, "loss": 1.3783, "step": 3006 }, { "epoch": 2.495435684647303, "grad_norm": 8.68553352355957, "learning_rate": 1.9003153526970956e-05, "loss": 1.1374, "step": 3007 }, { "epoch": 2.496265560165975, "grad_norm": 13.528084754943848, "learning_rate": 1.9002821576763485e-05, "loss": 2.2237, "step": 3008 }, { "epoch": 2.4970954356846473, "grad_norm": 8.558391571044922, "learning_rate": 1.9002489626556017e-05, "loss": 1.191, "step": 3009 }, { "epoch": 2.4979253112033195, "grad_norm": 9.679841041564941, "learning_rate": 1.900215767634855e-05, "loss": 0.9324, "step": 3010 }, { "epoch": 2.4987551867219917, "grad_norm": 6.7132792472839355, "learning_rate": 1.900182572614108e-05, "loss": 1.0737, "step": 3011 }, { "epoch": 2.499585062240664, "grad_norm": 6.04586124420166, "learning_rate": 1.900149377593361e-05, "loss": 1.2317, "step": 3012 }, { "epoch": 2.500414937759336, "grad_norm": 7.651583671569824, "learning_rate": 1.9001161825726142e-05, "loss": 1.2811, "step": 3013 }, { "epoch": 2.5012448132780083, "grad_norm": 10.578011512756348, "learning_rate": 1.9000829875518674e-05, "loss": 1.695, "step": 3014 }, { "epoch": 2.5020746887966805, "grad_norm": 15.206655502319336, "learning_rate": 1.9000497925311206e-05, "loss": 1.8815, "step": 3015 }, { "epoch": 2.5029045643153527, "grad_norm": 7.451765060424805, "learning_rate": 1.900016597510374e-05, "loss": 1.2558, "step": 3016 }, { "epoch": 2.503734439834025, "grad_norm": 9.501880645751953, "learning_rate": 1.8999834024896267e-05, "loss": 1.3321, "step": 3017 }, { "epoch": 2.504564315352697, "grad_norm": 7.265587329864502, "learning_rate": 1.89995020746888e-05, "loss": 1.012, "step": 3018 }, { "epoch": 2.5053941908713693, "grad_norm": 11.611119270324707, "learning_rate": 1.899917012448133e-05, "loss": 1.2372, "step": 3019 }, { "epoch": 2.5062240663900415, "grad_norm": 14.95294189453125, "learning_rate": 1.899883817427386e-05, "loss": 1.2878, "step": 3020 }, { "epoch": 2.5070539419087137, "grad_norm": 17.125307083129883, "learning_rate": 1.8998506224066392e-05, "loss": 1.8855, "step": 3021 }, { "epoch": 2.507883817427386, "grad_norm": 9.74034309387207, "learning_rate": 1.899817427385892e-05, "loss": 1.8798, "step": 3022 }, { "epoch": 2.508713692946058, "grad_norm": 13.024829864501953, "learning_rate": 1.8997842323651453e-05, "loss": 1.4226, "step": 3023 }, { "epoch": 2.5095435684647303, "grad_norm": 9.756355285644531, "learning_rate": 1.8997510373443985e-05, "loss": 1.5891, "step": 3024 }, { "epoch": 2.5103734439834025, "grad_norm": 6.8641791343688965, "learning_rate": 1.8997178423236514e-05, "loss": 0.8386, "step": 3025 }, { "epoch": 2.5112033195020746, "grad_norm": 15.32538890838623, "learning_rate": 1.8996846473029046e-05, "loss": 1.7947, "step": 3026 }, { "epoch": 2.512033195020747, "grad_norm": 12.067728042602539, "learning_rate": 1.8996514522821578e-05, "loss": 1.6868, "step": 3027 }, { "epoch": 2.512863070539419, "grad_norm": 7.98341178894043, "learning_rate": 1.899618257261411e-05, "loss": 1.0968, "step": 3028 }, { "epoch": 2.5136929460580912, "grad_norm": 10.644120216369629, "learning_rate": 1.899585062240664e-05, "loss": 2.0068, "step": 3029 }, { "epoch": 2.5145228215767634, "grad_norm": 15.404167175292969, "learning_rate": 1.899551867219917e-05, "loss": 1.9654, "step": 3030 }, { "epoch": 2.5153526970954356, "grad_norm": 16.18561553955078, "learning_rate": 1.8995186721991703e-05, "loss": 1.4662, "step": 3031 }, { "epoch": 2.516182572614108, "grad_norm": 8.63159465789795, "learning_rate": 1.8994854771784235e-05, "loss": 0.9957, "step": 3032 }, { "epoch": 2.51701244813278, "grad_norm": 12.714666366577148, "learning_rate": 1.8994522821576764e-05, "loss": 2.7072, "step": 3033 }, { "epoch": 2.517842323651452, "grad_norm": 9.308873176574707, "learning_rate": 1.8994190871369296e-05, "loss": 1.3904, "step": 3034 }, { "epoch": 2.5186721991701244, "grad_norm": 13.240626335144043, "learning_rate": 1.8993858921161828e-05, "loss": 1.5891, "step": 3035 }, { "epoch": 2.5195020746887966, "grad_norm": 5.572559833526611, "learning_rate": 1.899352697095436e-05, "loss": 0.9762, "step": 3036 }, { "epoch": 2.520331950207469, "grad_norm": 11.161065101623535, "learning_rate": 1.899319502074689e-05, "loss": 1.8964, "step": 3037 }, { "epoch": 2.521161825726141, "grad_norm": 12.723210334777832, "learning_rate": 1.899286307053942e-05, "loss": 2.4171, "step": 3038 }, { "epoch": 2.521991701244813, "grad_norm": 15.987079620361328, "learning_rate": 1.8992531120331953e-05, "loss": 1.7094, "step": 3039 }, { "epoch": 2.5228215767634854, "grad_norm": 7.731632709503174, "learning_rate": 1.8992199170124482e-05, "loss": 1.6111, "step": 3040 }, { "epoch": 2.5236514522821576, "grad_norm": 11.682188034057617, "learning_rate": 1.8991867219917014e-05, "loss": 1.5376, "step": 3041 }, { "epoch": 2.52448132780083, "grad_norm": 14.356841087341309, "learning_rate": 1.8991535269709546e-05, "loss": 0.8444, "step": 3042 }, { "epoch": 2.525311203319502, "grad_norm": 13.078615188598633, "learning_rate": 1.8991203319502075e-05, "loss": 1.724, "step": 3043 }, { "epoch": 2.526141078838174, "grad_norm": 9.882340431213379, "learning_rate": 1.8990871369294607e-05, "loss": 1.274, "step": 3044 }, { "epoch": 2.5269709543568464, "grad_norm": 15.710305213928223, "learning_rate": 1.899053941908714e-05, "loss": 2.2742, "step": 3045 }, { "epoch": 2.5278008298755186, "grad_norm": 9.628416061401367, "learning_rate": 1.8990207468879668e-05, "loss": 1.587, "step": 3046 }, { "epoch": 2.5286307053941908, "grad_norm": 7.939834117889404, "learning_rate": 1.89898755186722e-05, "loss": 0.7074, "step": 3047 }, { "epoch": 2.529460580912863, "grad_norm": 11.213594436645508, "learning_rate": 1.8989543568464732e-05, "loss": 2.0734, "step": 3048 }, { "epoch": 2.530290456431535, "grad_norm": 12.481683731079102, "learning_rate": 1.8989211618257264e-05, "loss": 1.7707, "step": 3049 }, { "epoch": 2.5311203319502074, "grad_norm": 12.58144760131836, "learning_rate": 1.8988879668049793e-05, "loss": 1.467, "step": 3050 }, { "epoch": 2.5319502074688796, "grad_norm": 11.50440788269043, "learning_rate": 1.8988547717842325e-05, "loss": 2.1288, "step": 3051 }, { "epoch": 2.5327800829875518, "grad_norm": 10.59146499633789, "learning_rate": 1.8988215767634857e-05, "loss": 1.1265, "step": 3052 }, { "epoch": 2.533609958506224, "grad_norm": 11.120722770690918, "learning_rate": 1.898788381742739e-05, "loss": 1.3625, "step": 3053 }, { "epoch": 2.534439834024896, "grad_norm": 11.983293533325195, "learning_rate": 1.8987551867219918e-05, "loss": 1.4647, "step": 3054 }, { "epoch": 2.5352697095435683, "grad_norm": 12.45897102355957, "learning_rate": 1.898721991701245e-05, "loss": 1.3954, "step": 3055 }, { "epoch": 2.5360995850622405, "grad_norm": 9.63779354095459, "learning_rate": 1.8986887966804982e-05, "loss": 1.8147, "step": 3056 }, { "epoch": 2.5369294605809127, "grad_norm": 12.63122844696045, "learning_rate": 1.8986556016597514e-05, "loss": 1.8474, "step": 3057 }, { "epoch": 2.537759336099585, "grad_norm": 12.184530258178711, "learning_rate": 1.8986224066390043e-05, "loss": 0.941, "step": 3058 }, { "epoch": 2.538589211618257, "grad_norm": 12.202775001525879, "learning_rate": 1.8985892116182575e-05, "loss": 2.3587, "step": 3059 }, { "epoch": 2.5394190871369293, "grad_norm": 13.2190523147583, "learning_rate": 1.8985560165975107e-05, "loss": 2.1234, "step": 3060 }, { "epoch": 2.5402489626556015, "grad_norm": 8.270452499389648, "learning_rate": 1.8985228215767636e-05, "loss": 1.3823, "step": 3061 }, { "epoch": 2.5410788381742737, "grad_norm": 13.309720039367676, "learning_rate": 1.8984896265560168e-05, "loss": 1.7217, "step": 3062 }, { "epoch": 2.541908713692946, "grad_norm": 12.726096153259277, "learning_rate": 1.8984564315352697e-05, "loss": 2.2929, "step": 3063 }, { "epoch": 2.542738589211618, "grad_norm": 10.900895118713379, "learning_rate": 1.898423236514523e-05, "loss": 1.6009, "step": 3064 }, { "epoch": 2.5435684647302903, "grad_norm": 8.295674324035645, "learning_rate": 1.898390041493776e-05, "loss": 1.5261, "step": 3065 }, { "epoch": 2.5443983402489625, "grad_norm": 7.476327419281006, "learning_rate": 1.898356846473029e-05, "loss": 1.1784, "step": 3066 }, { "epoch": 2.5452282157676347, "grad_norm": 13.517438888549805, "learning_rate": 1.898323651452282e-05, "loss": 1.7575, "step": 3067 }, { "epoch": 2.546058091286307, "grad_norm": 12.256380081176758, "learning_rate": 1.8982904564315354e-05, "loss": 1.8134, "step": 3068 }, { "epoch": 2.546887966804979, "grad_norm": 6.949573516845703, "learning_rate": 1.8982572614107886e-05, "loss": 1.06, "step": 3069 }, { "epoch": 2.5477178423236513, "grad_norm": 6.460617542266846, "learning_rate": 1.8982240663900418e-05, "loss": 1.4287, "step": 3070 }, { "epoch": 2.5485477178423235, "grad_norm": 9.032816886901855, "learning_rate": 1.8981908713692947e-05, "loss": 1.648, "step": 3071 }, { "epoch": 2.5493775933609957, "grad_norm": 7.593075752258301, "learning_rate": 1.898157676348548e-05, "loss": 1.0558, "step": 3072 }, { "epoch": 2.550207468879668, "grad_norm": 10.434066772460938, "learning_rate": 1.898124481327801e-05, "loss": 1.4293, "step": 3073 }, { "epoch": 2.55103734439834, "grad_norm": 11.849448204040527, "learning_rate": 1.8980912863070543e-05, "loss": 2.1774, "step": 3074 }, { "epoch": 2.5518672199170123, "grad_norm": 14.484130859375, "learning_rate": 1.8980580912863072e-05, "loss": 1.5832, "step": 3075 }, { "epoch": 2.5526970954356845, "grad_norm": 9.538695335388184, "learning_rate": 1.8980248962655604e-05, "loss": 2.016, "step": 3076 }, { "epoch": 2.5535269709543567, "grad_norm": 10.64164924621582, "learning_rate": 1.8979917012448136e-05, "loss": 1.8134, "step": 3077 }, { "epoch": 2.554356846473029, "grad_norm": 9.99460506439209, "learning_rate": 1.8979585062240665e-05, "loss": 0.7887, "step": 3078 }, { "epoch": 2.555186721991701, "grad_norm": 7.680563449859619, "learning_rate": 1.8979253112033197e-05, "loss": 1.4787, "step": 3079 }, { "epoch": 2.5560165975103732, "grad_norm": 9.476099967956543, "learning_rate": 1.897892116182573e-05, "loss": 1.6909, "step": 3080 }, { "epoch": 2.5568464730290454, "grad_norm": 12.252365112304688, "learning_rate": 1.8978589211618258e-05, "loss": 1.8679, "step": 3081 }, { "epoch": 2.5576763485477176, "grad_norm": 10.298565864562988, "learning_rate": 1.897825726141079e-05, "loss": 1.7172, "step": 3082 }, { "epoch": 2.55850622406639, "grad_norm": 12.500755310058594, "learning_rate": 1.8977925311203322e-05, "loss": 1.6075, "step": 3083 }, { "epoch": 2.559336099585062, "grad_norm": 11.857595443725586, "learning_rate": 1.897759336099585e-05, "loss": 1.4165, "step": 3084 }, { "epoch": 2.5601659751037342, "grad_norm": 12.781428337097168, "learning_rate": 1.8977261410788383e-05, "loss": 1.3222, "step": 3085 }, { "epoch": 2.5609958506224064, "grad_norm": 12.833918571472168, "learning_rate": 1.8976929460580915e-05, "loss": 2.0748, "step": 3086 }, { "epoch": 2.5618257261410786, "grad_norm": 7.796336650848389, "learning_rate": 1.8976597510373443e-05, "loss": 1.3414, "step": 3087 }, { "epoch": 2.562655601659751, "grad_norm": 8.138579368591309, "learning_rate": 1.8976265560165976e-05, "loss": 1.5799, "step": 3088 }, { "epoch": 2.563485477178423, "grad_norm": 8.399800300598145, "learning_rate": 1.8975933609958508e-05, "loss": 2.2163, "step": 3089 }, { "epoch": 2.564315352697095, "grad_norm": 8.95430850982666, "learning_rate": 1.897560165975104e-05, "loss": 1.7939, "step": 3090 }, { "epoch": 2.5651452282157674, "grad_norm": 8.034798622131348, "learning_rate": 1.897526970954357e-05, "loss": 1.1691, "step": 3091 }, { "epoch": 2.5659751037344396, "grad_norm": 13.324599266052246, "learning_rate": 1.89749377593361e-05, "loss": 1.9306, "step": 3092 }, { "epoch": 2.566804979253112, "grad_norm": 12.29759693145752, "learning_rate": 1.8974605809128633e-05, "loss": 2.2669, "step": 3093 }, { "epoch": 2.567634854771784, "grad_norm": 8.969149589538574, "learning_rate": 1.8974273858921165e-05, "loss": 1.309, "step": 3094 }, { "epoch": 2.568464730290456, "grad_norm": 13.00916576385498, "learning_rate": 1.8973941908713697e-05, "loss": 2.1287, "step": 3095 }, { "epoch": 2.5692946058091284, "grad_norm": 13.952262878417969, "learning_rate": 1.8973609958506226e-05, "loss": 1.5589, "step": 3096 }, { "epoch": 2.5701244813278006, "grad_norm": 7.826522350311279, "learning_rate": 1.8973278008298758e-05, "loss": 1.2688, "step": 3097 }, { "epoch": 2.5709543568464728, "grad_norm": 10.372822761535645, "learning_rate": 1.897294605809129e-05, "loss": 1.1421, "step": 3098 }, { "epoch": 2.571784232365145, "grad_norm": 9.61740779876709, "learning_rate": 1.897261410788382e-05, "loss": 1.0607, "step": 3099 }, { "epoch": 2.572614107883817, "grad_norm": 7.472452640533447, "learning_rate": 1.897228215767635e-05, "loss": 1.408, "step": 3100 }, { "epoch": 2.5734439834024894, "grad_norm": 18.479957580566406, "learning_rate": 1.897195020746888e-05, "loss": 2.1526, "step": 3101 }, { "epoch": 2.5742738589211616, "grad_norm": 16.511945724487305, "learning_rate": 1.897161825726141e-05, "loss": 1.0745, "step": 3102 }, { "epoch": 2.5751037344398338, "grad_norm": 16.006366729736328, "learning_rate": 1.8971286307053944e-05, "loss": 2.9745, "step": 3103 }, { "epoch": 2.575933609958506, "grad_norm": 11.9442138671875, "learning_rate": 1.8970954356846472e-05, "loss": 1.7878, "step": 3104 }, { "epoch": 2.576763485477178, "grad_norm": 7.413383960723877, "learning_rate": 1.8970622406639004e-05, "loss": 1.5915, "step": 3105 }, { "epoch": 2.5775933609958503, "grad_norm": 12.53866958618164, "learning_rate": 1.8970290456431537e-05, "loss": 2.4091, "step": 3106 }, { "epoch": 2.578423236514523, "grad_norm": 10.776885032653809, "learning_rate": 1.896995850622407e-05, "loss": 1.8778, "step": 3107 }, { "epoch": 2.579253112033195, "grad_norm": 12.859111785888672, "learning_rate": 1.8969626556016597e-05, "loss": 1.6106, "step": 3108 }, { "epoch": 2.5800829875518674, "grad_norm": 20.287336349487305, "learning_rate": 1.896929460580913e-05, "loss": 1.6261, "step": 3109 }, { "epoch": 2.5809128630705396, "grad_norm": 10.503483772277832, "learning_rate": 1.896896265560166e-05, "loss": 1.1478, "step": 3110 }, { "epoch": 2.5817427385892118, "grad_norm": 10.526529312133789, "learning_rate": 1.8968630705394194e-05, "loss": 1.8394, "step": 3111 }, { "epoch": 2.582572614107884, "grad_norm": 8.267707824707031, "learning_rate": 1.8968298755186722e-05, "loss": 1.74, "step": 3112 }, { "epoch": 2.583402489626556, "grad_norm": 8.837667465209961, "learning_rate": 1.8967966804979255e-05, "loss": 1.2862, "step": 3113 }, { "epoch": 2.5842323651452284, "grad_norm": 14.535545349121094, "learning_rate": 1.8967634854771787e-05, "loss": 2.0089, "step": 3114 }, { "epoch": 2.5850622406639006, "grad_norm": 10.417916297912598, "learning_rate": 1.896730290456432e-05, "loss": 1.1702, "step": 3115 }, { "epoch": 2.5858921161825728, "grad_norm": 6.981987953186035, "learning_rate": 1.8966970954356847e-05, "loss": 1.521, "step": 3116 }, { "epoch": 2.586721991701245, "grad_norm": 8.440995216369629, "learning_rate": 1.896663900414938e-05, "loss": 1.1985, "step": 3117 }, { "epoch": 2.587551867219917, "grad_norm": 7.296407222747803, "learning_rate": 1.8966307053941912e-05, "loss": 1.2529, "step": 3118 }, { "epoch": 2.5883817427385893, "grad_norm": 10.464786529541016, "learning_rate": 1.896597510373444e-05, "loss": 1.7685, "step": 3119 }, { "epoch": 2.5892116182572615, "grad_norm": 10.11099624633789, "learning_rate": 1.8965643153526973e-05, "loss": 1.1656, "step": 3120 }, { "epoch": 2.5900414937759337, "grad_norm": 8.374485969543457, "learning_rate": 1.8965311203319505e-05, "loss": 1.1147, "step": 3121 }, { "epoch": 2.590871369294606, "grad_norm": 10.030954360961914, "learning_rate": 1.8964979253112033e-05, "loss": 1.975, "step": 3122 }, { "epoch": 2.591701244813278, "grad_norm": 7.127978801727295, "learning_rate": 1.8964647302904565e-05, "loss": 1.016, "step": 3123 }, { "epoch": 2.5925311203319503, "grad_norm": 8.725591659545898, "learning_rate": 1.8964315352697098e-05, "loss": 1.2658, "step": 3124 }, { "epoch": 2.5933609958506225, "grad_norm": 7.0484619140625, "learning_rate": 1.8963983402489626e-05, "loss": 1.0468, "step": 3125 }, { "epoch": 2.5941908713692947, "grad_norm": 7.555415153503418, "learning_rate": 1.896365145228216e-05, "loss": 1.3576, "step": 3126 }, { "epoch": 2.595020746887967, "grad_norm": 14.136002540588379, "learning_rate": 1.896331950207469e-05, "loss": 1.1765, "step": 3127 }, { "epoch": 2.595850622406639, "grad_norm": 9.641349792480469, "learning_rate": 1.8962987551867223e-05, "loss": 1.4107, "step": 3128 }, { "epoch": 2.5966804979253113, "grad_norm": 6.545807361602783, "learning_rate": 1.896265560165975e-05, "loss": 1.0657, "step": 3129 }, { "epoch": 2.5975103734439835, "grad_norm": 11.736283302307129, "learning_rate": 1.8962323651452283e-05, "loss": 1.302, "step": 3130 }, { "epoch": 2.5983402489626557, "grad_norm": 7.986082553863525, "learning_rate": 1.8961991701244816e-05, "loss": 0.9052, "step": 3131 }, { "epoch": 2.599170124481328, "grad_norm": 8.913910865783691, "learning_rate": 1.8961659751037348e-05, "loss": 1.3947, "step": 3132 }, { "epoch": 2.6, "grad_norm": 13.474832534790039, "learning_rate": 1.8961327800829876e-05, "loss": 1.4269, "step": 3133 }, { "epoch": 2.6008298755186723, "grad_norm": 11.775554656982422, "learning_rate": 1.896099585062241e-05, "loss": 1.1493, "step": 3134 }, { "epoch": 2.6016597510373445, "grad_norm": 21.197940826416016, "learning_rate": 1.896066390041494e-05, "loss": 2.1268, "step": 3135 }, { "epoch": 2.6024896265560167, "grad_norm": 13.873042106628418, "learning_rate": 1.8960331950207473e-05, "loss": 1.304, "step": 3136 }, { "epoch": 2.603319502074689, "grad_norm": 13.861886978149414, "learning_rate": 1.896e-05, "loss": 1.4857, "step": 3137 }, { "epoch": 2.604149377593361, "grad_norm": 7.87708044052124, "learning_rate": 1.8959668049792534e-05, "loss": 1.0361, "step": 3138 }, { "epoch": 2.6049792531120333, "grad_norm": 8.486552238464355, "learning_rate": 1.8959336099585062e-05, "loss": 1.3817, "step": 3139 }, { "epoch": 2.6058091286307055, "grad_norm": 7.11970329284668, "learning_rate": 1.8959004149377594e-05, "loss": 1.099, "step": 3140 }, { "epoch": 2.6066390041493777, "grad_norm": 11.73997974395752, "learning_rate": 1.8958672199170126e-05, "loss": 1.3095, "step": 3141 }, { "epoch": 2.60746887966805, "grad_norm": 17.00907325744629, "learning_rate": 1.8958340248962655e-05, "loss": 2.0457, "step": 3142 }, { "epoch": 2.608298755186722, "grad_norm": 8.407144546508789, "learning_rate": 1.8958008298755187e-05, "loss": 0.9715, "step": 3143 }, { "epoch": 2.6091286307053942, "grad_norm": 8.739181518554688, "learning_rate": 1.895767634854772e-05, "loss": 1.3869, "step": 3144 }, { "epoch": 2.6099585062240664, "grad_norm": 9.675012588500977, "learning_rate": 1.8957344398340248e-05, "loss": 1.6314, "step": 3145 }, { "epoch": 2.6107883817427386, "grad_norm": 17.132673263549805, "learning_rate": 1.895701244813278e-05, "loss": 1.7778, "step": 3146 }, { "epoch": 2.611618257261411, "grad_norm": 12.004385948181152, "learning_rate": 1.8956680497925312e-05, "loss": 2.2622, "step": 3147 }, { "epoch": 2.612448132780083, "grad_norm": 7.153652667999268, "learning_rate": 1.8956348547717844e-05, "loss": 1.1904, "step": 3148 }, { "epoch": 2.6132780082987552, "grad_norm": 9.461274147033691, "learning_rate": 1.8956016597510377e-05, "loss": 1.3308, "step": 3149 }, { "epoch": 2.6141078838174274, "grad_norm": 14.992892265319824, "learning_rate": 1.8955684647302905e-05, "loss": 1.4908, "step": 3150 }, { "epoch": 2.6149377593360996, "grad_norm": 12.4358549118042, "learning_rate": 1.8955352697095437e-05, "loss": 2.1243, "step": 3151 }, { "epoch": 2.615767634854772, "grad_norm": 13.710226058959961, "learning_rate": 1.895502074688797e-05, "loss": 1.4657, "step": 3152 }, { "epoch": 2.616597510373444, "grad_norm": 10.181720733642578, "learning_rate": 1.89546887966805e-05, "loss": 1.6085, "step": 3153 }, { "epoch": 2.617427385892116, "grad_norm": 7.503370761871338, "learning_rate": 1.895435684647303e-05, "loss": 1.2499, "step": 3154 }, { "epoch": 2.6182572614107884, "grad_norm": 9.844366073608398, "learning_rate": 1.8954024896265562e-05, "loss": 1.5272, "step": 3155 }, { "epoch": 2.6190871369294606, "grad_norm": 14.577975273132324, "learning_rate": 1.8953692946058095e-05, "loss": 1.9804, "step": 3156 }, { "epoch": 2.619917012448133, "grad_norm": 9.503067970275879, "learning_rate": 1.8953360995850623e-05, "loss": 1.6003, "step": 3157 }, { "epoch": 2.620746887966805, "grad_norm": 7.297095775604248, "learning_rate": 1.8953029045643155e-05, "loss": 1.3593, "step": 3158 }, { "epoch": 2.621576763485477, "grad_norm": 15.837091445922852, "learning_rate": 1.8952697095435687e-05, "loss": 1.6535, "step": 3159 }, { "epoch": 2.6224066390041494, "grad_norm": 7.281178951263428, "learning_rate": 1.8952365145228216e-05, "loss": 1.3392, "step": 3160 }, { "epoch": 2.6232365145228216, "grad_norm": 10.735540390014648, "learning_rate": 1.8952033195020748e-05, "loss": 0.8405, "step": 3161 }, { "epoch": 2.624066390041494, "grad_norm": 10.008926391601562, "learning_rate": 1.8951701244813277e-05, "loss": 1.1386, "step": 3162 }, { "epoch": 2.624896265560166, "grad_norm": 14.024138450622559, "learning_rate": 1.895136929460581e-05, "loss": 1.5525, "step": 3163 }, { "epoch": 2.625726141078838, "grad_norm": 9.422201156616211, "learning_rate": 1.895103734439834e-05, "loss": 1.4734, "step": 3164 }, { "epoch": 2.6265560165975104, "grad_norm": 11.584612846374512, "learning_rate": 1.8950705394190873e-05, "loss": 1.8058, "step": 3165 }, { "epoch": 2.6273858921161826, "grad_norm": 10.045364379882812, "learning_rate": 1.8950373443983402e-05, "loss": 2.153, "step": 3166 }, { "epoch": 2.6282157676348548, "grad_norm": 9.929515838623047, "learning_rate": 1.8950041493775934e-05, "loss": 1.2065, "step": 3167 }, { "epoch": 2.629045643153527, "grad_norm": 13.736995697021484, "learning_rate": 1.8949709543568466e-05, "loss": 1.4439, "step": 3168 }, { "epoch": 2.629875518672199, "grad_norm": 16.1452579498291, "learning_rate": 1.8949377593361e-05, "loss": 1.4879, "step": 3169 }, { "epoch": 2.6307053941908713, "grad_norm": 8.985688209533691, "learning_rate": 1.8949045643153527e-05, "loss": 1.2006, "step": 3170 }, { "epoch": 2.6315352697095435, "grad_norm": 10.810930252075195, "learning_rate": 1.894871369294606e-05, "loss": 1.2597, "step": 3171 }, { "epoch": 2.6323651452282157, "grad_norm": 11.980714797973633, "learning_rate": 1.894838174273859e-05, "loss": 1.8611, "step": 3172 }, { "epoch": 2.633195020746888, "grad_norm": 8.326506614685059, "learning_rate": 1.8948049792531123e-05, "loss": 1.1763, "step": 3173 }, { "epoch": 2.63402489626556, "grad_norm": 8.18309497833252, "learning_rate": 1.8947717842323656e-05, "loss": 1.6482, "step": 3174 }, { "epoch": 2.6348547717842323, "grad_norm": 11.809752464294434, "learning_rate": 1.8947385892116184e-05, "loss": 1.5355, "step": 3175 }, { "epoch": 2.6356846473029045, "grad_norm": 8.943493843078613, "learning_rate": 1.8947053941908716e-05, "loss": 1.2778, "step": 3176 }, { "epoch": 2.6365145228215767, "grad_norm": 9.510695457458496, "learning_rate": 1.894672199170125e-05, "loss": 1.4304, "step": 3177 }, { "epoch": 2.637344398340249, "grad_norm": 9.848443984985352, "learning_rate": 1.8946390041493777e-05, "loss": 1.6202, "step": 3178 }, { "epoch": 2.638174273858921, "grad_norm": 10.325112342834473, "learning_rate": 1.894605809128631e-05, "loss": 2.0319, "step": 3179 }, { "epoch": 2.6390041493775933, "grad_norm": 12.71788501739502, "learning_rate": 1.8945726141078838e-05, "loss": 1.7585, "step": 3180 }, { "epoch": 2.6398340248962655, "grad_norm": 12.323354721069336, "learning_rate": 1.894539419087137e-05, "loss": 1.8294, "step": 3181 }, { "epoch": 2.6406639004149377, "grad_norm": 12.989850997924805, "learning_rate": 1.8945062240663902e-05, "loss": 1.0768, "step": 3182 }, { "epoch": 2.64149377593361, "grad_norm": 9.40091323852539, "learning_rate": 1.894473029045643e-05, "loss": 1.276, "step": 3183 }, { "epoch": 2.642323651452282, "grad_norm": 11.145212173461914, "learning_rate": 1.8944398340248963e-05, "loss": 1.5946, "step": 3184 }, { "epoch": 2.6431535269709543, "grad_norm": 11.261791229248047, "learning_rate": 1.8944066390041495e-05, "loss": 2.0695, "step": 3185 }, { "epoch": 2.6439834024896265, "grad_norm": 6.508551120758057, "learning_rate": 1.8943734439834027e-05, "loss": 1.2635, "step": 3186 }, { "epoch": 2.6448132780082987, "grad_norm": 11.629057884216309, "learning_rate": 1.8943402489626556e-05, "loss": 1.8986, "step": 3187 }, { "epoch": 2.645643153526971, "grad_norm": 13.00645637512207, "learning_rate": 1.8943070539419088e-05, "loss": 1.8962, "step": 3188 }, { "epoch": 2.646473029045643, "grad_norm": 12.220613479614258, "learning_rate": 1.894273858921162e-05, "loss": 2.112, "step": 3189 }, { "epoch": 2.6473029045643153, "grad_norm": 8.959778785705566, "learning_rate": 1.8942406639004152e-05, "loss": 1.255, "step": 3190 }, { "epoch": 2.6481327800829875, "grad_norm": 8.639277458190918, "learning_rate": 1.894207468879668e-05, "loss": 1.2594, "step": 3191 }, { "epoch": 2.6489626556016597, "grad_norm": 9.81689739227295, "learning_rate": 1.8941742738589213e-05, "loss": 0.9589, "step": 3192 }, { "epoch": 2.649792531120332, "grad_norm": 14.816205024719238, "learning_rate": 1.8941410788381745e-05, "loss": 1.2914, "step": 3193 }, { "epoch": 2.650622406639004, "grad_norm": 12.200414657592773, "learning_rate": 1.8941078838174277e-05, "loss": 2.1176, "step": 3194 }, { "epoch": 2.6514522821576763, "grad_norm": 11.958333015441895, "learning_rate": 1.8940746887966806e-05, "loss": 1.6773, "step": 3195 }, { "epoch": 2.6522821576763485, "grad_norm": 9.137033462524414, "learning_rate": 1.8940414937759338e-05, "loss": 1.3179, "step": 3196 }, { "epoch": 2.6531120331950206, "grad_norm": 8.349519729614258, "learning_rate": 1.894008298755187e-05, "loss": 1.4702, "step": 3197 }, { "epoch": 2.653941908713693, "grad_norm": 12.118748664855957, "learning_rate": 1.89397510373444e-05, "loss": 2.171, "step": 3198 }, { "epoch": 2.654771784232365, "grad_norm": 10.39599323272705, "learning_rate": 1.893941908713693e-05, "loss": 1.2843, "step": 3199 }, { "epoch": 2.6556016597510372, "grad_norm": 7.67322301864624, "learning_rate": 1.8939087136929463e-05, "loss": 1.2116, "step": 3200 }, { "epoch": 2.6564315352697094, "grad_norm": 7.665401935577393, "learning_rate": 1.8938755186721992e-05, "loss": 1.0695, "step": 3201 }, { "epoch": 2.6572614107883816, "grad_norm": 9.008350372314453, "learning_rate": 1.8938423236514524e-05, "loss": 1.5564, "step": 3202 }, { "epoch": 2.658091286307054, "grad_norm": 15.102622985839844, "learning_rate": 1.8938091286307056e-05, "loss": 1.461, "step": 3203 }, { "epoch": 2.658921161825726, "grad_norm": 12.299023628234863, "learning_rate": 1.8937759336099585e-05, "loss": 1.9092, "step": 3204 }, { "epoch": 2.659751037344398, "grad_norm": 7.758678913116455, "learning_rate": 1.8937427385892117e-05, "loss": 0.8371, "step": 3205 }, { "epoch": 2.6605809128630704, "grad_norm": 15.79900074005127, "learning_rate": 1.893709543568465e-05, "loss": 1.4119, "step": 3206 }, { "epoch": 2.6614107883817426, "grad_norm": 9.48027229309082, "learning_rate": 1.893676348547718e-05, "loss": 1.6525, "step": 3207 }, { "epoch": 2.662240663900415, "grad_norm": 10.71438217163086, "learning_rate": 1.893643153526971e-05, "loss": 1.1067, "step": 3208 }, { "epoch": 2.663070539419087, "grad_norm": 11.843385696411133, "learning_rate": 1.8936099585062242e-05, "loss": 1.7298, "step": 3209 }, { "epoch": 2.663900414937759, "grad_norm": 10.536054611206055, "learning_rate": 1.8935767634854774e-05, "loss": 1.2033, "step": 3210 }, { "epoch": 2.6647302904564314, "grad_norm": 13.395133972167969, "learning_rate": 1.8935435684647306e-05, "loss": 1.1016, "step": 3211 }, { "epoch": 2.6655601659751036, "grad_norm": 14.204322814941406, "learning_rate": 1.8935103734439835e-05, "loss": 1.162, "step": 3212 }, { "epoch": 2.666390041493776, "grad_norm": 11.97094440460205, "learning_rate": 1.8934771784232367e-05, "loss": 1.4445, "step": 3213 }, { "epoch": 2.667219917012448, "grad_norm": 9.730684280395508, "learning_rate": 1.89344398340249e-05, "loss": 1.3674, "step": 3214 }, { "epoch": 2.66804979253112, "grad_norm": 12.434308052062988, "learning_rate": 1.893410788381743e-05, "loss": 1.6918, "step": 3215 }, { "epoch": 2.6688796680497924, "grad_norm": 8.315287590026855, "learning_rate": 1.893377593360996e-05, "loss": 1.5966, "step": 3216 }, { "epoch": 2.6697095435684646, "grad_norm": 12.02142333984375, "learning_rate": 1.8933443983402492e-05, "loss": 1.3465, "step": 3217 }, { "epoch": 2.6705394190871368, "grad_norm": 9.149182319641113, "learning_rate": 1.893311203319502e-05, "loss": 1.6077, "step": 3218 }, { "epoch": 2.671369294605809, "grad_norm": 10.343049049377441, "learning_rate": 1.8932780082987553e-05, "loss": 1.8894, "step": 3219 }, { "epoch": 2.6721991701244816, "grad_norm": 7.299708843231201, "learning_rate": 1.8932448132780085e-05, "loss": 0.9511, "step": 3220 }, { "epoch": 2.673029045643154, "grad_norm": 10.554289817810059, "learning_rate": 1.8932116182572614e-05, "loss": 0.9442, "step": 3221 }, { "epoch": 2.673858921161826, "grad_norm": 11.712886810302734, "learning_rate": 1.8931784232365146e-05, "loss": 1.1679, "step": 3222 }, { "epoch": 2.674688796680498, "grad_norm": 15.183955192565918, "learning_rate": 1.8931452282157678e-05, "loss": 2.3672, "step": 3223 }, { "epoch": 2.6755186721991704, "grad_norm": 8.609086036682129, "learning_rate": 1.8931120331950207e-05, "loss": 1.4105, "step": 3224 }, { "epoch": 2.6763485477178426, "grad_norm": 6.857759952545166, "learning_rate": 1.893078838174274e-05, "loss": 1.112, "step": 3225 }, { "epoch": 2.677178423236515, "grad_norm": 12.104621887207031, "learning_rate": 1.893045643153527e-05, "loss": 1.2041, "step": 3226 }, { "epoch": 2.678008298755187, "grad_norm": 13.370457649230957, "learning_rate": 1.8930124481327803e-05, "loss": 2.208, "step": 3227 }, { "epoch": 2.678838174273859, "grad_norm": 11.244921684265137, "learning_rate": 1.8929792531120335e-05, "loss": 1.5398, "step": 3228 }, { "epoch": 2.6796680497925314, "grad_norm": 11.806807518005371, "learning_rate": 1.8929460580912864e-05, "loss": 1.5672, "step": 3229 }, { "epoch": 2.6804979253112036, "grad_norm": 6.743889331817627, "learning_rate": 1.8929128630705396e-05, "loss": 0.8664, "step": 3230 }, { "epoch": 2.6813278008298758, "grad_norm": 13.9816312789917, "learning_rate": 1.8928796680497928e-05, "loss": 1.7475, "step": 3231 }, { "epoch": 2.682157676348548, "grad_norm": 11.35490608215332, "learning_rate": 1.892846473029046e-05, "loss": 1.2911, "step": 3232 }, { "epoch": 2.68298755186722, "grad_norm": 13.716229438781738, "learning_rate": 1.892813278008299e-05, "loss": 1.5235, "step": 3233 }, { "epoch": 2.6838174273858924, "grad_norm": 13.659769058227539, "learning_rate": 1.892780082987552e-05, "loss": 1.7849, "step": 3234 }, { "epoch": 2.6846473029045645, "grad_norm": 11.828393936157227, "learning_rate": 1.8927468879668053e-05, "loss": 1.6043, "step": 3235 }, { "epoch": 2.6854771784232367, "grad_norm": 14.416590690612793, "learning_rate": 1.8927136929460582e-05, "loss": 2.2484, "step": 3236 }, { "epoch": 2.686307053941909, "grad_norm": 8.491799354553223, "learning_rate": 1.8926804979253114e-05, "loss": 1.5353, "step": 3237 }, { "epoch": 2.687136929460581, "grad_norm": 9.95811653137207, "learning_rate": 1.8926473029045646e-05, "loss": 1.0735, "step": 3238 }, { "epoch": 2.6879668049792533, "grad_norm": 11.509576797485352, "learning_rate": 1.8926141078838175e-05, "loss": 2.2483, "step": 3239 }, { "epoch": 2.6887966804979255, "grad_norm": 10.121554374694824, "learning_rate": 1.8925809128630707e-05, "loss": 1.36, "step": 3240 }, { "epoch": 2.6896265560165977, "grad_norm": 11.813666343688965, "learning_rate": 1.8925477178423236e-05, "loss": 1.4305, "step": 3241 }, { "epoch": 2.69045643153527, "grad_norm": 7.336197376251221, "learning_rate": 1.8925145228215768e-05, "loss": 1.2637, "step": 3242 }, { "epoch": 2.691286307053942, "grad_norm": 6.939370155334473, "learning_rate": 1.89248132780083e-05, "loss": 1.2622, "step": 3243 }, { "epoch": 2.6921161825726143, "grad_norm": 12.979338645935059, "learning_rate": 1.8924481327800832e-05, "loss": 1.0179, "step": 3244 }, { "epoch": 2.6929460580912865, "grad_norm": 9.203307151794434, "learning_rate": 1.892414937759336e-05, "loss": 1.4519, "step": 3245 }, { "epoch": 2.6937759336099587, "grad_norm": 9.157299995422363, "learning_rate": 1.8923817427385893e-05, "loss": 1.3792, "step": 3246 }, { "epoch": 2.694605809128631, "grad_norm": 10.879405975341797, "learning_rate": 1.8923485477178425e-05, "loss": 1.1041, "step": 3247 }, { "epoch": 2.695435684647303, "grad_norm": 11.330772399902344, "learning_rate": 1.8923153526970957e-05, "loss": 1.4352, "step": 3248 }, { "epoch": 2.6962655601659753, "grad_norm": 11.370257377624512, "learning_rate": 1.8922821576763486e-05, "loss": 1.8663, "step": 3249 }, { "epoch": 2.6970954356846475, "grad_norm": 8.073138236999512, "learning_rate": 1.8922489626556018e-05, "loss": 1.4018, "step": 3250 }, { "epoch": 2.6979253112033197, "grad_norm": 7.491978168487549, "learning_rate": 1.892215767634855e-05, "loss": 1.0056, "step": 3251 }, { "epoch": 2.698755186721992, "grad_norm": 12.10370922088623, "learning_rate": 1.8921825726141082e-05, "loss": 1.1073, "step": 3252 }, { "epoch": 2.699585062240664, "grad_norm": 14.696846961975098, "learning_rate": 1.8921493775933614e-05, "loss": 1.1391, "step": 3253 }, { "epoch": 2.7004149377593363, "grad_norm": 10.135567665100098, "learning_rate": 1.8921161825726143e-05, "loss": 1.2082, "step": 3254 }, { "epoch": 2.7012448132780085, "grad_norm": 10.874871253967285, "learning_rate": 1.8920829875518675e-05, "loss": 1.2309, "step": 3255 }, { "epoch": 2.7020746887966807, "grad_norm": 10.759659767150879, "learning_rate": 1.8920497925311204e-05, "loss": 1.757, "step": 3256 }, { "epoch": 2.702904564315353, "grad_norm": 15.89378547668457, "learning_rate": 1.8920165975103736e-05, "loss": 1.0404, "step": 3257 }, { "epoch": 2.703734439834025, "grad_norm": 7.0499091148376465, "learning_rate": 1.8919834024896268e-05, "loss": 0.9674, "step": 3258 }, { "epoch": 2.7045643153526973, "grad_norm": 8.000340461730957, "learning_rate": 1.8919502074688797e-05, "loss": 1.0515, "step": 3259 }, { "epoch": 2.7053941908713695, "grad_norm": 10.1553955078125, "learning_rate": 1.891917012448133e-05, "loss": 1.7359, "step": 3260 }, { "epoch": 2.7062240663900416, "grad_norm": 9.236349105834961, "learning_rate": 1.891883817427386e-05, "loss": 1.8707, "step": 3261 }, { "epoch": 2.707053941908714, "grad_norm": 9.33111572265625, "learning_rate": 1.891850622406639e-05, "loss": 1.6884, "step": 3262 }, { "epoch": 2.707883817427386, "grad_norm": 10.618459701538086, "learning_rate": 1.891817427385892e-05, "loss": 1.0582, "step": 3263 }, { "epoch": 2.7087136929460582, "grad_norm": 11.818191528320312, "learning_rate": 1.8917842323651454e-05, "loss": 1.6912, "step": 3264 }, { "epoch": 2.7095435684647304, "grad_norm": 18.299930572509766, "learning_rate": 1.8917510373443986e-05, "loss": 1.6359, "step": 3265 }, { "epoch": 2.7103734439834026, "grad_norm": 13.538220405578613, "learning_rate": 1.8917178423236515e-05, "loss": 1.5959, "step": 3266 }, { "epoch": 2.711203319502075, "grad_norm": 7.32205057144165, "learning_rate": 1.8916846473029047e-05, "loss": 1.4427, "step": 3267 }, { "epoch": 2.712033195020747, "grad_norm": 11.913426399230957, "learning_rate": 1.891651452282158e-05, "loss": 1.8083, "step": 3268 }, { "epoch": 2.712863070539419, "grad_norm": 9.074190139770508, "learning_rate": 1.891618257261411e-05, "loss": 1.5237, "step": 3269 }, { "epoch": 2.7136929460580914, "grad_norm": 12.176370620727539, "learning_rate": 1.891585062240664e-05, "loss": 1.1123, "step": 3270 }, { "epoch": 2.7145228215767636, "grad_norm": 14.445473670959473, "learning_rate": 1.8915518672199172e-05, "loss": 1.5398, "step": 3271 }, { "epoch": 2.715352697095436, "grad_norm": 7.58969259262085, "learning_rate": 1.8915186721991704e-05, "loss": 0.8485, "step": 3272 }, { "epoch": 2.716182572614108, "grad_norm": 9.482129096984863, "learning_rate": 1.8914854771784236e-05, "loss": 1.139, "step": 3273 }, { "epoch": 2.71701244813278, "grad_norm": 16.935134887695312, "learning_rate": 1.8914522821576765e-05, "loss": 1.7357, "step": 3274 }, { "epoch": 2.7178423236514524, "grad_norm": 11.24087142944336, "learning_rate": 1.8914190871369297e-05, "loss": 0.8027, "step": 3275 }, { "epoch": 2.7186721991701246, "grad_norm": 10.832423210144043, "learning_rate": 1.891385892116183e-05, "loss": 1.3856, "step": 3276 }, { "epoch": 2.719502074688797, "grad_norm": 11.335968971252441, "learning_rate": 1.8913526970954358e-05, "loss": 1.6604, "step": 3277 }, { "epoch": 2.720331950207469, "grad_norm": 13.907978057861328, "learning_rate": 1.891319502074689e-05, "loss": 2.3546, "step": 3278 }, { "epoch": 2.721161825726141, "grad_norm": 14.207383155822754, "learning_rate": 1.891286307053942e-05, "loss": 0.9206, "step": 3279 }, { "epoch": 2.7219917012448134, "grad_norm": 7.657797336578369, "learning_rate": 1.891253112033195e-05, "loss": 1.3132, "step": 3280 }, { "epoch": 2.7228215767634856, "grad_norm": 11.829877853393555, "learning_rate": 1.8912199170124483e-05, "loss": 2.0775, "step": 3281 }, { "epoch": 2.7236514522821578, "grad_norm": 14.816702842712402, "learning_rate": 1.8911867219917015e-05, "loss": 2.3291, "step": 3282 }, { "epoch": 2.72448132780083, "grad_norm": 12.505298614501953, "learning_rate": 1.8911535269709543e-05, "loss": 1.9857, "step": 3283 }, { "epoch": 2.725311203319502, "grad_norm": 9.497720718383789, "learning_rate": 1.8911203319502076e-05, "loss": 1.0414, "step": 3284 }, { "epoch": 2.7261410788381744, "grad_norm": 10.679957389831543, "learning_rate": 1.8910871369294608e-05, "loss": 1.6729, "step": 3285 }, { "epoch": 2.7269709543568466, "grad_norm": 13.75272274017334, "learning_rate": 1.891053941908714e-05, "loss": 1.342, "step": 3286 }, { "epoch": 2.7278008298755188, "grad_norm": 10.80843734741211, "learning_rate": 1.891020746887967e-05, "loss": 1.6188, "step": 3287 }, { "epoch": 2.728630705394191, "grad_norm": 12.067217826843262, "learning_rate": 1.89098755186722e-05, "loss": 1.3817, "step": 3288 }, { "epoch": 2.729460580912863, "grad_norm": 10.176386833190918, "learning_rate": 1.8909543568464733e-05, "loss": 0.8782, "step": 3289 }, { "epoch": 2.7302904564315353, "grad_norm": 8.110641479492188, "learning_rate": 1.8909211618257265e-05, "loss": 1.143, "step": 3290 }, { "epoch": 2.7311203319502075, "grad_norm": 12.768743515014648, "learning_rate": 1.8908879668049794e-05, "loss": 2.3541, "step": 3291 }, { "epoch": 2.7319502074688797, "grad_norm": 10.872047424316406, "learning_rate": 1.8908547717842326e-05, "loss": 1.7503, "step": 3292 }, { "epoch": 2.732780082987552, "grad_norm": 7.891881942749023, "learning_rate": 1.8908215767634858e-05, "loss": 1.07, "step": 3293 }, { "epoch": 2.733609958506224, "grad_norm": 11.430798530578613, "learning_rate": 1.890788381742739e-05, "loss": 1.754, "step": 3294 }, { "epoch": 2.7344398340248963, "grad_norm": 9.984461784362793, "learning_rate": 1.890755186721992e-05, "loss": 1.2459, "step": 3295 }, { "epoch": 2.7352697095435685, "grad_norm": 10.12006664276123, "learning_rate": 1.890721991701245e-05, "loss": 1.2224, "step": 3296 }, { "epoch": 2.7360995850622407, "grad_norm": 9.84395694732666, "learning_rate": 1.890688796680498e-05, "loss": 1.2972, "step": 3297 }, { "epoch": 2.736929460580913, "grad_norm": 10.59936809539795, "learning_rate": 1.890655601659751e-05, "loss": 1.11, "step": 3298 }, { "epoch": 2.737759336099585, "grad_norm": 9.82438850402832, "learning_rate": 1.8906224066390044e-05, "loss": 1.1519, "step": 3299 }, { "epoch": 2.7385892116182573, "grad_norm": 7.051093578338623, "learning_rate": 1.8905892116182572e-05, "loss": 1.3089, "step": 3300 }, { "epoch": 2.7394190871369295, "grad_norm": 7.991415500640869, "learning_rate": 1.8905560165975104e-05, "loss": 1.3234, "step": 3301 }, { "epoch": 2.7402489626556017, "grad_norm": 12.99364185333252, "learning_rate": 1.8905228215767637e-05, "loss": 1.2143, "step": 3302 }, { "epoch": 2.741078838174274, "grad_norm": 14.674527168273926, "learning_rate": 1.8904896265560165e-05, "loss": 1.1497, "step": 3303 }, { "epoch": 2.741908713692946, "grad_norm": 13.534507751464844, "learning_rate": 1.8904564315352697e-05, "loss": 1.4471, "step": 3304 }, { "epoch": 2.7427385892116183, "grad_norm": 12.609314918518066, "learning_rate": 1.890423236514523e-05, "loss": 1.314, "step": 3305 }, { "epoch": 2.7435684647302905, "grad_norm": 12.224895477294922, "learning_rate": 1.890390041493776e-05, "loss": 1.5658, "step": 3306 }, { "epoch": 2.7443983402489627, "grad_norm": 12.246676445007324, "learning_rate": 1.8903568464730294e-05, "loss": 1.6137, "step": 3307 }, { "epoch": 2.745228215767635, "grad_norm": 8.70690631866455, "learning_rate": 1.8903236514522822e-05, "loss": 1.4867, "step": 3308 }, { "epoch": 2.746058091286307, "grad_norm": 11.05584716796875, "learning_rate": 1.8902904564315355e-05, "loss": 1.476, "step": 3309 }, { "epoch": 2.7468879668049793, "grad_norm": 14.524630546569824, "learning_rate": 1.8902572614107887e-05, "loss": 1.1625, "step": 3310 }, { "epoch": 2.7477178423236515, "grad_norm": 9.011341094970703, "learning_rate": 1.890224066390042e-05, "loss": 1.6162, "step": 3311 }, { "epoch": 2.7485477178423237, "grad_norm": 13.154902458190918, "learning_rate": 1.8901908713692947e-05, "loss": 1.9244, "step": 3312 }, { "epoch": 2.749377593360996, "grad_norm": 11.108386993408203, "learning_rate": 1.890157676348548e-05, "loss": 1.7674, "step": 3313 }, { "epoch": 2.750207468879668, "grad_norm": 10.524935722351074, "learning_rate": 1.890124481327801e-05, "loss": 1.5953, "step": 3314 }, { "epoch": 2.7510373443983402, "grad_norm": 13.147171020507812, "learning_rate": 1.890091286307054e-05, "loss": 1.9773, "step": 3315 }, { "epoch": 2.7518672199170124, "grad_norm": 10.146673202514648, "learning_rate": 1.8900580912863073e-05, "loss": 1.3014, "step": 3316 }, { "epoch": 2.7526970954356846, "grad_norm": 14.275408744812012, "learning_rate": 1.8900248962655605e-05, "loss": 0.9313, "step": 3317 }, { "epoch": 2.753526970954357, "grad_norm": 13.673773765563965, "learning_rate": 1.8899917012448133e-05, "loss": 1.7035, "step": 3318 }, { "epoch": 2.754356846473029, "grad_norm": 12.389337539672852, "learning_rate": 1.8899585062240665e-05, "loss": 1.3713, "step": 3319 }, { "epoch": 2.7551867219917012, "grad_norm": 14.251633644104004, "learning_rate": 1.8899253112033194e-05, "loss": 2.0207, "step": 3320 }, { "epoch": 2.7560165975103734, "grad_norm": 11.277101516723633, "learning_rate": 1.8898921161825726e-05, "loss": 1.3729, "step": 3321 }, { "epoch": 2.7568464730290456, "grad_norm": 9.325691223144531, "learning_rate": 1.889858921161826e-05, "loss": 1.0013, "step": 3322 }, { "epoch": 2.757676348547718, "grad_norm": 9.424595832824707, "learning_rate": 1.889825726141079e-05, "loss": 1.8392, "step": 3323 }, { "epoch": 2.75850622406639, "grad_norm": 13.176839828491211, "learning_rate": 1.889792531120332e-05, "loss": 1.5448, "step": 3324 }, { "epoch": 2.759336099585062, "grad_norm": 9.492487907409668, "learning_rate": 1.889759336099585e-05, "loss": 1.4738, "step": 3325 }, { "epoch": 2.7601659751037344, "grad_norm": 17.68553924560547, "learning_rate": 1.8897261410788383e-05, "loss": 1.5348, "step": 3326 }, { "epoch": 2.7609958506224066, "grad_norm": 9.077948570251465, "learning_rate": 1.8896929460580916e-05, "loss": 1.6012, "step": 3327 }, { "epoch": 2.761825726141079, "grad_norm": 15.339975357055664, "learning_rate": 1.8896597510373444e-05, "loss": 2.1609, "step": 3328 }, { "epoch": 2.762655601659751, "grad_norm": 14.008545875549316, "learning_rate": 1.8896265560165976e-05, "loss": 1.2671, "step": 3329 }, { "epoch": 2.763485477178423, "grad_norm": 13.566668510437012, "learning_rate": 1.889593360995851e-05, "loss": 1.5711, "step": 3330 }, { "epoch": 2.7643153526970954, "grad_norm": 11.53331470489502, "learning_rate": 1.889560165975104e-05, "loss": 1.5993, "step": 3331 }, { "epoch": 2.7651452282157676, "grad_norm": 11.178157806396484, "learning_rate": 1.8895269709543573e-05, "loss": 1.4584, "step": 3332 }, { "epoch": 2.7659751037344398, "grad_norm": 13.423360824584961, "learning_rate": 1.88949377593361e-05, "loss": 1.7475, "step": 3333 }, { "epoch": 2.766804979253112, "grad_norm": 10.375596046447754, "learning_rate": 1.8894605809128634e-05, "loss": 2.4211, "step": 3334 }, { "epoch": 2.767634854771784, "grad_norm": 15.709497451782227, "learning_rate": 1.8894273858921162e-05, "loss": 1.6767, "step": 3335 }, { "epoch": 2.7684647302904564, "grad_norm": 10.186994552612305, "learning_rate": 1.8893941908713694e-05, "loss": 1.3854, "step": 3336 }, { "epoch": 2.7692946058091286, "grad_norm": 16.727863311767578, "learning_rate": 1.8893609958506226e-05, "loss": 1.6905, "step": 3337 }, { "epoch": 2.7701244813278008, "grad_norm": 9.170150756835938, "learning_rate": 1.8893278008298755e-05, "loss": 1.582, "step": 3338 }, { "epoch": 2.770954356846473, "grad_norm": 23.185575485229492, "learning_rate": 1.8892946058091287e-05, "loss": 2.7278, "step": 3339 }, { "epoch": 2.771784232365145, "grad_norm": 12.491739273071289, "learning_rate": 1.889261410788382e-05, "loss": 1.1588, "step": 3340 }, { "epoch": 2.7726141078838173, "grad_norm": 9.803428649902344, "learning_rate": 1.8892282157676348e-05, "loss": 1.2439, "step": 3341 }, { "epoch": 2.7734439834024895, "grad_norm": 9.468338966369629, "learning_rate": 1.889195020746888e-05, "loss": 1.1313, "step": 3342 }, { "epoch": 2.7742738589211617, "grad_norm": 11.767534255981445, "learning_rate": 1.8891618257261412e-05, "loss": 1.5306, "step": 3343 }, { "epoch": 2.775103734439834, "grad_norm": 8.73188591003418, "learning_rate": 1.8891286307053944e-05, "loss": 0.8097, "step": 3344 }, { "epoch": 2.775933609958506, "grad_norm": 8.279026985168457, "learning_rate": 1.8890954356846473e-05, "loss": 1.0658, "step": 3345 }, { "epoch": 2.7767634854771783, "grad_norm": 12.403583526611328, "learning_rate": 1.8890622406639005e-05, "loss": 1.6664, "step": 3346 }, { "epoch": 2.7775933609958505, "grad_norm": 13.891053199768066, "learning_rate": 1.8890290456431537e-05, "loss": 2.1041, "step": 3347 }, { "epoch": 2.7784232365145227, "grad_norm": 10.170638084411621, "learning_rate": 1.888995850622407e-05, "loss": 1.2035, "step": 3348 }, { "epoch": 2.779253112033195, "grad_norm": 9.961816787719727, "learning_rate": 1.8889626556016598e-05, "loss": 2.0107, "step": 3349 }, { "epoch": 2.780082987551867, "grad_norm": 10.095903396606445, "learning_rate": 1.888929460580913e-05, "loss": 1.0188, "step": 3350 }, { "epoch": 2.7809128630705393, "grad_norm": 9.479558944702148, "learning_rate": 1.8888962655601662e-05, "loss": 1.4997, "step": 3351 }, { "epoch": 2.7817427385892115, "grad_norm": 8.127591133117676, "learning_rate": 1.8888630705394195e-05, "loss": 1.137, "step": 3352 }, { "epoch": 2.7825726141078837, "grad_norm": 16.714948654174805, "learning_rate": 1.8888298755186723e-05, "loss": 2.6637, "step": 3353 }, { "epoch": 2.783402489626556, "grad_norm": 11.393444061279297, "learning_rate": 1.8887966804979255e-05, "loss": 1.6941, "step": 3354 }, { "epoch": 2.784232365145228, "grad_norm": 8.918164253234863, "learning_rate": 1.8887634854771787e-05, "loss": 1.6546, "step": 3355 }, { "epoch": 2.7850622406639003, "grad_norm": 12.798556327819824, "learning_rate": 1.8887302904564316e-05, "loss": 2.3141, "step": 3356 }, { "epoch": 2.7858921161825725, "grad_norm": 13.885255813598633, "learning_rate": 1.8886970954356848e-05, "loss": 2.0791, "step": 3357 }, { "epoch": 2.7867219917012447, "grad_norm": 8.834342956542969, "learning_rate": 1.8886639004149377e-05, "loss": 1.4996, "step": 3358 }, { "epoch": 2.787551867219917, "grad_norm": 9.872679710388184, "learning_rate": 1.888630705394191e-05, "loss": 1.5408, "step": 3359 }, { "epoch": 2.788381742738589, "grad_norm": 7.818081378936768, "learning_rate": 1.888597510373444e-05, "loss": 1.1919, "step": 3360 }, { "epoch": 2.7892116182572613, "grad_norm": 13.207056045532227, "learning_rate": 1.8885643153526973e-05, "loss": 1.5295, "step": 3361 }, { "epoch": 2.7900414937759335, "grad_norm": 10.861288070678711, "learning_rate": 1.8885311203319502e-05, "loss": 1.7538, "step": 3362 }, { "epoch": 2.7908713692946057, "grad_norm": 9.435961723327637, "learning_rate": 1.8884979253112034e-05, "loss": 1.5936, "step": 3363 }, { "epoch": 2.791701244813278, "grad_norm": 12.497191429138184, "learning_rate": 1.8884647302904566e-05, "loss": 1.3552, "step": 3364 }, { "epoch": 2.79253112033195, "grad_norm": 9.336036682128906, "learning_rate": 1.88843153526971e-05, "loss": 1.4877, "step": 3365 }, { "epoch": 2.7933609958506223, "grad_norm": 8.710036277770996, "learning_rate": 1.8883983402489627e-05, "loss": 1.4224, "step": 3366 }, { "epoch": 2.7941908713692944, "grad_norm": 6.727874279022217, "learning_rate": 1.888365145228216e-05, "loss": 0.7663, "step": 3367 }, { "epoch": 2.7950207468879666, "grad_norm": 7.139999866485596, "learning_rate": 1.888331950207469e-05, "loss": 0.8759, "step": 3368 }, { "epoch": 2.795850622406639, "grad_norm": 9.672231674194336, "learning_rate": 1.8882987551867223e-05, "loss": 1.807, "step": 3369 }, { "epoch": 2.796680497925311, "grad_norm": 14.75564956665039, "learning_rate": 1.8882655601659752e-05, "loss": 0.9257, "step": 3370 }, { "epoch": 2.7975103734439832, "grad_norm": 8.877702713012695, "learning_rate": 1.8882323651452284e-05, "loss": 0.7666, "step": 3371 }, { "epoch": 2.7983402489626554, "grad_norm": 10.668499946594238, "learning_rate": 1.8881991701244816e-05, "loss": 1.1525, "step": 3372 }, { "epoch": 2.7991701244813276, "grad_norm": 17.240022659301758, "learning_rate": 1.8881659751037345e-05, "loss": 1.3185, "step": 3373 }, { "epoch": 2.8, "grad_norm": 11.857933044433594, "learning_rate": 1.8881327800829877e-05, "loss": 1.692, "step": 3374 }, { "epoch": 2.800829875518672, "grad_norm": 13.956525802612305, "learning_rate": 1.888099585062241e-05, "loss": 2.1558, "step": 3375 }, { "epoch": 2.801659751037344, "grad_norm": 7.815750598907471, "learning_rate": 1.8880663900414938e-05, "loss": 1.4536, "step": 3376 }, { "epoch": 2.8024896265560164, "grad_norm": 14.626765251159668, "learning_rate": 1.888033195020747e-05, "loss": 1.1088, "step": 3377 }, { "epoch": 2.8033195020746886, "grad_norm": 8.136638641357422, "learning_rate": 1.8880000000000002e-05, "loss": 1.4253, "step": 3378 }, { "epoch": 2.804149377593361, "grad_norm": 8.724748611450195, "learning_rate": 1.887966804979253e-05, "loss": 1.6431, "step": 3379 }, { "epoch": 2.804979253112033, "grad_norm": 8.707719802856445, "learning_rate": 1.8879336099585063e-05, "loss": 1.4553, "step": 3380 }, { "epoch": 2.805809128630705, "grad_norm": 7.6868791580200195, "learning_rate": 1.8879004149377595e-05, "loss": 0.9425, "step": 3381 }, { "epoch": 2.8066390041493774, "grad_norm": 8.708954811096191, "learning_rate": 1.8878672199170124e-05, "loss": 1.6239, "step": 3382 }, { "epoch": 2.8074688796680496, "grad_norm": 13.545257568359375, "learning_rate": 1.8878340248962656e-05, "loss": 1.7856, "step": 3383 }, { "epoch": 2.808298755186722, "grad_norm": 6.8256754875183105, "learning_rate": 1.8878008298755188e-05, "loss": 1.1765, "step": 3384 }, { "epoch": 2.809128630705394, "grad_norm": 11.6149263381958, "learning_rate": 1.887767634854772e-05, "loss": 0.8723, "step": 3385 }, { "epoch": 2.809958506224066, "grad_norm": 11.381980895996094, "learning_rate": 1.8877344398340252e-05, "loss": 1.2801, "step": 3386 }, { "epoch": 2.8107883817427384, "grad_norm": 8.460088729858398, "learning_rate": 1.887701244813278e-05, "loss": 1.0546, "step": 3387 }, { "epoch": 2.8116182572614106, "grad_norm": 13.93718433380127, "learning_rate": 1.8876680497925313e-05, "loss": 0.9718, "step": 3388 }, { "epoch": 2.8124481327800828, "grad_norm": 8.117897987365723, "learning_rate": 1.8876348547717845e-05, "loss": 0.5719, "step": 3389 }, { "epoch": 2.813278008298755, "grad_norm": 12.903424263000488, "learning_rate": 1.8876016597510377e-05, "loss": 1.4107, "step": 3390 }, { "epoch": 2.814107883817427, "grad_norm": 8.640923500061035, "learning_rate": 1.8875684647302906e-05, "loss": 1.5811, "step": 3391 }, { "epoch": 2.8149377593360994, "grad_norm": 11.802755355834961, "learning_rate": 1.8875352697095438e-05, "loss": 1.6585, "step": 3392 }, { "epoch": 2.8157676348547716, "grad_norm": 10.963879585266113, "learning_rate": 1.887502074688797e-05, "loss": 1.0893, "step": 3393 }, { "epoch": 2.8165975103734437, "grad_norm": 14.153505325317383, "learning_rate": 1.88746887966805e-05, "loss": 2.3211, "step": 3394 }, { "epoch": 2.817427385892116, "grad_norm": 11.356252670288086, "learning_rate": 1.887435684647303e-05, "loss": 1.0365, "step": 3395 }, { "epoch": 2.818257261410788, "grad_norm": 9.476226806640625, "learning_rate": 1.887402489626556e-05, "loss": 0.9712, "step": 3396 }, { "epoch": 2.8190871369294603, "grad_norm": 15.137112617492676, "learning_rate": 1.8873692946058092e-05, "loss": 1.8165, "step": 3397 }, { "epoch": 2.8199170124481325, "grad_norm": 7.282263278961182, "learning_rate": 1.8873360995850624e-05, "loss": 0.9481, "step": 3398 }, { "epoch": 2.8207468879668047, "grad_norm": 9.44405746459961, "learning_rate": 1.8873029045643153e-05, "loss": 1.4794, "step": 3399 }, { "epoch": 2.821576763485477, "grad_norm": 11.386075019836426, "learning_rate": 1.8872697095435685e-05, "loss": 1.3585, "step": 3400 }, { "epoch": 2.822406639004149, "grad_norm": 9.267541885375977, "learning_rate": 1.8872365145228217e-05, "loss": 1.6598, "step": 3401 }, { "epoch": 2.8232365145228213, "grad_norm": 11.936152458190918, "learning_rate": 1.887203319502075e-05, "loss": 1.7238, "step": 3402 }, { "epoch": 2.8240663900414935, "grad_norm": 20.216861724853516, "learning_rate": 1.8871701244813278e-05, "loss": 1.8377, "step": 3403 }, { "epoch": 2.8248962655601657, "grad_norm": 16.156288146972656, "learning_rate": 1.887136929460581e-05, "loss": 1.5931, "step": 3404 }, { "epoch": 2.825726141078838, "grad_norm": 8.703010559082031, "learning_rate": 1.8871037344398342e-05, "loss": 1.0272, "step": 3405 }, { "epoch": 2.82655601659751, "grad_norm": 12.100080490112305, "learning_rate": 1.8870705394190874e-05, "loss": 2.1158, "step": 3406 }, { "epoch": 2.8273858921161823, "grad_norm": 8.640381813049316, "learning_rate": 1.8870373443983403e-05, "loss": 1.4469, "step": 3407 }, { "epoch": 2.828215767634855, "grad_norm": 11.283651351928711, "learning_rate": 1.8870041493775935e-05, "loss": 1.6901, "step": 3408 }, { "epoch": 2.829045643153527, "grad_norm": 8.472804069519043, "learning_rate": 1.8869709543568467e-05, "loss": 1.2413, "step": 3409 }, { "epoch": 2.8298755186721993, "grad_norm": 5.694946765899658, "learning_rate": 1.8869377593361e-05, "loss": 1.4792, "step": 3410 }, { "epoch": 2.8307053941908715, "grad_norm": 10.891706466674805, "learning_rate": 1.886904564315353e-05, "loss": 1.6904, "step": 3411 }, { "epoch": 2.8315352697095437, "grad_norm": 14.004064559936523, "learning_rate": 1.886871369294606e-05, "loss": 1.559, "step": 3412 }, { "epoch": 2.832365145228216, "grad_norm": 6.676290988922119, "learning_rate": 1.8868381742738592e-05, "loss": 0.8769, "step": 3413 }, { "epoch": 2.833195020746888, "grad_norm": 11.010711669921875, "learning_rate": 1.886804979253112e-05, "loss": 1.2197, "step": 3414 }, { "epoch": 2.8340248962655603, "grad_norm": 9.73654842376709, "learning_rate": 1.8867717842323653e-05, "loss": 0.9943, "step": 3415 }, { "epoch": 2.8348547717842325, "grad_norm": 14.030654907226562, "learning_rate": 1.8867385892116185e-05, "loss": 1.7089, "step": 3416 }, { "epoch": 2.8356846473029047, "grad_norm": 15.400557518005371, "learning_rate": 1.8867053941908714e-05, "loss": 1.3995, "step": 3417 }, { "epoch": 2.836514522821577, "grad_norm": 14.924637794494629, "learning_rate": 1.8866721991701246e-05, "loss": 2.73, "step": 3418 }, { "epoch": 2.837344398340249, "grad_norm": 9.575301170349121, "learning_rate": 1.8866390041493778e-05, "loss": 1.2602, "step": 3419 }, { "epoch": 2.8381742738589213, "grad_norm": 11.05324649810791, "learning_rate": 1.8866058091286307e-05, "loss": 1.3155, "step": 3420 }, { "epoch": 2.8390041493775935, "grad_norm": 12.50300121307373, "learning_rate": 1.886572614107884e-05, "loss": 1.4467, "step": 3421 }, { "epoch": 2.8398340248962657, "grad_norm": 7.129781723022461, "learning_rate": 1.886539419087137e-05, "loss": 0.9567, "step": 3422 }, { "epoch": 2.840663900414938, "grad_norm": 12.042284965515137, "learning_rate": 1.8865062240663903e-05, "loss": 1.5393, "step": 3423 }, { "epoch": 2.84149377593361, "grad_norm": 13.13973331451416, "learning_rate": 1.8864730290456432e-05, "loss": 1.5716, "step": 3424 }, { "epoch": 2.8423236514522823, "grad_norm": 10.93436336517334, "learning_rate": 1.8864398340248964e-05, "loss": 1.1491, "step": 3425 }, { "epoch": 2.8431535269709545, "grad_norm": 15.751879692077637, "learning_rate": 1.8864066390041496e-05, "loss": 1.1167, "step": 3426 }, { "epoch": 2.8439834024896267, "grad_norm": 10.118321418762207, "learning_rate": 1.8863734439834028e-05, "loss": 1.7933, "step": 3427 }, { "epoch": 2.844813278008299, "grad_norm": 8.417364120483398, "learning_rate": 1.8863402489626557e-05, "loss": 1.2331, "step": 3428 }, { "epoch": 2.845643153526971, "grad_norm": 11.51558780670166, "learning_rate": 1.886307053941909e-05, "loss": 2.3416, "step": 3429 }, { "epoch": 2.8464730290456433, "grad_norm": 10.558334350585938, "learning_rate": 1.886273858921162e-05, "loss": 1.3106, "step": 3430 }, { "epoch": 2.8473029045643155, "grad_norm": 9.092787742614746, "learning_rate": 1.8862406639004153e-05, "loss": 1.2064, "step": 3431 }, { "epoch": 2.8481327800829876, "grad_norm": 8.806182861328125, "learning_rate": 1.8862074688796682e-05, "loss": 1.0721, "step": 3432 }, { "epoch": 2.84896265560166, "grad_norm": 14.578390121459961, "learning_rate": 1.8861742738589214e-05, "loss": 1.4634, "step": 3433 }, { "epoch": 2.849792531120332, "grad_norm": 9.749099731445312, "learning_rate": 1.8861410788381746e-05, "loss": 1.2985, "step": 3434 }, { "epoch": 2.8506224066390042, "grad_norm": 10.25998592376709, "learning_rate": 1.8861078838174275e-05, "loss": 1.6492, "step": 3435 }, { "epoch": 2.8514522821576764, "grad_norm": 9.757079124450684, "learning_rate": 1.8860746887966807e-05, "loss": 1.512, "step": 3436 }, { "epoch": 2.8522821576763486, "grad_norm": 9.156516075134277, "learning_rate": 1.8860414937759336e-05, "loss": 1.2411, "step": 3437 }, { "epoch": 2.853112033195021, "grad_norm": 14.143542289733887, "learning_rate": 1.8860082987551868e-05, "loss": 1.4167, "step": 3438 }, { "epoch": 2.853941908713693, "grad_norm": 13.14087200164795, "learning_rate": 1.88597510373444e-05, "loss": 2.1913, "step": 3439 }, { "epoch": 2.854771784232365, "grad_norm": 11.657431602478027, "learning_rate": 1.8859419087136932e-05, "loss": 1.3296, "step": 3440 }, { "epoch": 2.8556016597510374, "grad_norm": 10.277931213378906, "learning_rate": 1.885908713692946e-05, "loss": 1.3581, "step": 3441 }, { "epoch": 2.8564315352697096, "grad_norm": 10.632019996643066, "learning_rate": 1.8858755186721993e-05, "loss": 1.1299, "step": 3442 }, { "epoch": 2.857261410788382, "grad_norm": 10.492674827575684, "learning_rate": 1.8858423236514525e-05, "loss": 1.5247, "step": 3443 }, { "epoch": 2.858091286307054, "grad_norm": 16.270381927490234, "learning_rate": 1.8858091286307057e-05, "loss": 2.146, "step": 3444 }, { "epoch": 2.858921161825726, "grad_norm": 11.082291603088379, "learning_rate": 1.8857759336099586e-05, "loss": 1.9089, "step": 3445 }, { "epoch": 2.8597510373443984, "grad_norm": 10.616772651672363, "learning_rate": 1.8857427385892118e-05, "loss": 0.711, "step": 3446 }, { "epoch": 2.8605809128630706, "grad_norm": 10.731535911560059, "learning_rate": 1.885709543568465e-05, "loss": 1.3081, "step": 3447 }, { "epoch": 2.861410788381743, "grad_norm": 11.36392593383789, "learning_rate": 1.8856763485477182e-05, "loss": 1.6873, "step": 3448 }, { "epoch": 2.862240663900415, "grad_norm": 10.460933685302734, "learning_rate": 1.885643153526971e-05, "loss": 1.0221, "step": 3449 }, { "epoch": 2.863070539419087, "grad_norm": 8.615594863891602, "learning_rate": 1.8856099585062243e-05, "loss": 1.3589, "step": 3450 }, { "epoch": 2.8639004149377594, "grad_norm": 14.06229305267334, "learning_rate": 1.8855767634854775e-05, "loss": 1.7154, "step": 3451 }, { "epoch": 2.8647302904564316, "grad_norm": 11.697526931762695, "learning_rate": 1.8855435684647304e-05, "loss": 1.5923, "step": 3452 }, { "epoch": 2.8655601659751038, "grad_norm": 6.9123029708862305, "learning_rate": 1.8855103734439836e-05, "loss": 1.0778, "step": 3453 }, { "epoch": 2.866390041493776, "grad_norm": 8.614581108093262, "learning_rate": 1.8854771784232368e-05, "loss": 1.0018, "step": 3454 }, { "epoch": 2.867219917012448, "grad_norm": 7.947262287139893, "learning_rate": 1.8854439834024897e-05, "loss": 1.009, "step": 3455 }, { "epoch": 2.8680497925311204, "grad_norm": 8.997335433959961, "learning_rate": 1.885410788381743e-05, "loss": 1.5454, "step": 3456 }, { "epoch": 2.8688796680497926, "grad_norm": 10.29330062866211, "learning_rate": 1.8853775933609957e-05, "loss": 1.6997, "step": 3457 }, { "epoch": 2.8697095435684647, "grad_norm": 9.169981956481934, "learning_rate": 1.885344398340249e-05, "loss": 1.4044, "step": 3458 }, { "epoch": 2.870539419087137, "grad_norm": 24.551862716674805, "learning_rate": 1.885311203319502e-05, "loss": 2.0004, "step": 3459 }, { "epoch": 2.871369294605809, "grad_norm": 11.373209953308105, "learning_rate": 1.8852780082987554e-05, "loss": 1.6176, "step": 3460 }, { "epoch": 2.8721991701244813, "grad_norm": 14.58926010131836, "learning_rate": 1.8852448132780082e-05, "loss": 1.7705, "step": 3461 }, { "epoch": 2.8730290456431535, "grad_norm": 11.97011947631836, "learning_rate": 1.8852116182572615e-05, "loss": 1.6259, "step": 3462 }, { "epoch": 2.8738589211618257, "grad_norm": 11.026578903198242, "learning_rate": 1.8851784232365147e-05, "loss": 1.2966, "step": 3463 }, { "epoch": 2.874688796680498, "grad_norm": 10.308553695678711, "learning_rate": 1.885145228215768e-05, "loss": 1.3306, "step": 3464 }, { "epoch": 2.87551867219917, "grad_norm": 18.087495803833008, "learning_rate": 1.885112033195021e-05, "loss": 1.5029, "step": 3465 }, { "epoch": 2.8763485477178423, "grad_norm": 12.309741020202637, "learning_rate": 1.885078838174274e-05, "loss": 2.229, "step": 3466 }, { "epoch": 2.8771784232365145, "grad_norm": 15.014227867126465, "learning_rate": 1.885045643153527e-05, "loss": 1.178, "step": 3467 }, { "epoch": 2.8780082987551867, "grad_norm": 11.003460884094238, "learning_rate": 1.8850124481327804e-05, "loss": 1.2511, "step": 3468 }, { "epoch": 2.878838174273859, "grad_norm": 12.382216453552246, "learning_rate": 1.8849792531120336e-05, "loss": 1.0802, "step": 3469 }, { "epoch": 2.879668049792531, "grad_norm": 8.538657188415527, "learning_rate": 1.8849460580912865e-05, "loss": 1.6359, "step": 3470 }, { "epoch": 2.8804979253112033, "grad_norm": 9.586384773254395, "learning_rate": 1.8849128630705397e-05, "loss": 1.5669, "step": 3471 }, { "epoch": 2.8813278008298755, "grad_norm": 11.222602844238281, "learning_rate": 1.884879668049793e-05, "loss": 0.6296, "step": 3472 }, { "epoch": 2.8821576763485477, "grad_norm": 13.09217643737793, "learning_rate": 1.8848464730290458e-05, "loss": 1.1889, "step": 3473 }, { "epoch": 2.88298755186722, "grad_norm": 12.864498138427734, "learning_rate": 1.884813278008299e-05, "loss": 1.3687, "step": 3474 }, { "epoch": 2.883817427385892, "grad_norm": 10.450336456298828, "learning_rate": 1.884780082987552e-05, "loss": 1.2684, "step": 3475 }, { "epoch": 2.8846473029045643, "grad_norm": 14.899264335632324, "learning_rate": 1.884746887966805e-05, "loss": 1.9593, "step": 3476 }, { "epoch": 2.8854771784232365, "grad_norm": 6.984968185424805, "learning_rate": 1.8847136929460583e-05, "loss": 0.9818, "step": 3477 }, { "epoch": 2.8863070539419087, "grad_norm": 11.205042839050293, "learning_rate": 1.884680497925311e-05, "loss": 1.8589, "step": 3478 }, { "epoch": 2.887136929460581, "grad_norm": 21.40639305114746, "learning_rate": 1.8846473029045643e-05, "loss": 1.8529, "step": 3479 }, { "epoch": 2.887966804979253, "grad_norm": 24.786712646484375, "learning_rate": 1.8846141078838176e-05, "loss": 2.6362, "step": 3480 }, { "epoch": 2.8887966804979253, "grad_norm": 8.647282600402832, "learning_rate": 1.8845809128630708e-05, "loss": 1.7476, "step": 3481 }, { "epoch": 2.8896265560165975, "grad_norm": 16.768577575683594, "learning_rate": 1.8845477178423236e-05, "loss": 1.3072, "step": 3482 }, { "epoch": 2.8904564315352697, "grad_norm": 13.302267074584961, "learning_rate": 1.884514522821577e-05, "loss": 1.4927, "step": 3483 }, { "epoch": 2.891286307053942, "grad_norm": 9.479769706726074, "learning_rate": 1.88448132780083e-05, "loss": 1.2962, "step": 3484 }, { "epoch": 2.892116182572614, "grad_norm": 10.647314071655273, "learning_rate": 1.8844481327800833e-05, "loss": 1.4358, "step": 3485 }, { "epoch": 2.8929460580912862, "grad_norm": 7.549808025360107, "learning_rate": 1.884414937759336e-05, "loss": 1.0486, "step": 3486 }, { "epoch": 2.8937759336099584, "grad_norm": 15.892627716064453, "learning_rate": 1.8843817427385894e-05, "loss": 1.9078, "step": 3487 }, { "epoch": 2.8946058091286306, "grad_norm": 11.25290584564209, "learning_rate": 1.8843485477178426e-05, "loss": 1.3053, "step": 3488 }, { "epoch": 2.895435684647303, "grad_norm": 10.044973373413086, "learning_rate": 1.8843153526970958e-05, "loss": 1.6326, "step": 3489 }, { "epoch": 2.896265560165975, "grad_norm": 11.756795883178711, "learning_rate": 1.8842821576763486e-05, "loss": 1.0765, "step": 3490 }, { "epoch": 2.8970954356846472, "grad_norm": 16.266727447509766, "learning_rate": 1.884248962655602e-05, "loss": 1.7161, "step": 3491 }, { "epoch": 2.8979253112033194, "grad_norm": 11.396690368652344, "learning_rate": 1.884215767634855e-05, "loss": 1.725, "step": 3492 }, { "epoch": 2.8987551867219916, "grad_norm": 12.701398849487305, "learning_rate": 1.884182572614108e-05, "loss": 1.6878, "step": 3493 }, { "epoch": 2.899585062240664, "grad_norm": 17.061676025390625, "learning_rate": 1.884149377593361e-05, "loss": 1.5576, "step": 3494 }, { "epoch": 2.900414937759336, "grad_norm": 11.388296127319336, "learning_rate": 1.8841161825726144e-05, "loss": 1.1871, "step": 3495 }, { "epoch": 2.901244813278008, "grad_norm": 8.911275863647461, "learning_rate": 1.8840829875518672e-05, "loss": 1.3672, "step": 3496 }, { "epoch": 2.9020746887966804, "grad_norm": 9.873563766479492, "learning_rate": 1.8840497925311204e-05, "loss": 1.2944, "step": 3497 }, { "epoch": 2.9029045643153526, "grad_norm": 16.14385414123535, "learning_rate": 1.8840165975103737e-05, "loss": 1.6255, "step": 3498 }, { "epoch": 2.903734439834025, "grad_norm": 8.45927619934082, "learning_rate": 1.8839834024896265e-05, "loss": 1.7233, "step": 3499 }, { "epoch": 2.904564315352697, "grad_norm": 9.344693183898926, "learning_rate": 1.8839502074688797e-05, "loss": 0.993, "step": 3500 }, { "epoch": 2.905394190871369, "grad_norm": 12.153889656066895, "learning_rate": 1.883917012448133e-05, "loss": 1.3297, "step": 3501 }, { "epoch": 2.9062240663900414, "grad_norm": 14.201428413391113, "learning_rate": 1.883883817427386e-05, "loss": 1.5747, "step": 3502 }, { "epoch": 2.9070539419087136, "grad_norm": 12.73836612701416, "learning_rate": 1.883850622406639e-05, "loss": 0.9624, "step": 3503 }, { "epoch": 2.9078838174273858, "grad_norm": 12.732584953308105, "learning_rate": 1.8838174273858922e-05, "loss": 2.1092, "step": 3504 }, { "epoch": 2.908713692946058, "grad_norm": 11.283859252929688, "learning_rate": 1.8837842323651455e-05, "loss": 1.9081, "step": 3505 }, { "epoch": 2.90954356846473, "grad_norm": 7.920281410217285, "learning_rate": 1.8837510373443987e-05, "loss": 1.0447, "step": 3506 }, { "epoch": 2.9103734439834024, "grad_norm": 10.109195709228516, "learning_rate": 1.8837178423236515e-05, "loss": 1.8671, "step": 3507 }, { "epoch": 2.9112033195020746, "grad_norm": 10.335853576660156, "learning_rate": 1.8836846473029047e-05, "loss": 0.8378, "step": 3508 }, { "epoch": 2.9120331950207468, "grad_norm": 10.532302856445312, "learning_rate": 1.883651452282158e-05, "loss": 2.5799, "step": 3509 }, { "epoch": 2.912863070539419, "grad_norm": 13.624271392822266, "learning_rate": 1.883618257261411e-05, "loss": 1.0778, "step": 3510 }, { "epoch": 2.913692946058091, "grad_norm": 13.921723365783691, "learning_rate": 1.883585062240664e-05, "loss": 1.7103, "step": 3511 }, { "epoch": 2.9145228215767633, "grad_norm": 9.867796897888184, "learning_rate": 1.8835518672199172e-05, "loss": 1.5486, "step": 3512 }, { "epoch": 2.9153526970954355, "grad_norm": 9.125530242919922, "learning_rate": 1.88351867219917e-05, "loss": 1.1079, "step": 3513 }, { "epoch": 2.9161825726141077, "grad_norm": 10.934476852416992, "learning_rate": 1.8834854771784233e-05, "loss": 1.3256, "step": 3514 }, { "epoch": 2.91701244813278, "grad_norm": 13.049747467041016, "learning_rate": 1.8834522821576765e-05, "loss": 1.8871, "step": 3515 }, { "epoch": 2.917842323651452, "grad_norm": 9.198606491088867, "learning_rate": 1.8834190871369294e-05, "loss": 0.6164, "step": 3516 }, { "epoch": 2.9186721991701243, "grad_norm": 9.454556465148926, "learning_rate": 1.8833858921161826e-05, "loss": 1.1104, "step": 3517 }, { "epoch": 2.9195020746887965, "grad_norm": 11.653419494628906, "learning_rate": 1.883352697095436e-05, "loss": 1.1297, "step": 3518 }, { "epoch": 2.9203319502074687, "grad_norm": 10.211153030395508, "learning_rate": 1.883319502074689e-05, "loss": 1.2165, "step": 3519 }, { "epoch": 2.921161825726141, "grad_norm": 11.827204704284668, "learning_rate": 1.883286307053942e-05, "loss": 1.3119, "step": 3520 }, { "epoch": 2.9219917012448136, "grad_norm": 10.062827110290527, "learning_rate": 1.883253112033195e-05, "loss": 1.5842, "step": 3521 }, { "epoch": 2.9228215767634858, "grad_norm": 14.760677337646484, "learning_rate": 1.8832199170124483e-05, "loss": 1.773, "step": 3522 }, { "epoch": 2.923651452282158, "grad_norm": 12.362505912780762, "learning_rate": 1.8831867219917015e-05, "loss": 1.7581, "step": 3523 }, { "epoch": 2.92448132780083, "grad_norm": 14.641130447387695, "learning_rate": 1.8831535269709544e-05, "loss": 2.1227, "step": 3524 }, { "epoch": 2.9253112033195023, "grad_norm": 12.647582054138184, "learning_rate": 1.8831203319502076e-05, "loss": 1.4211, "step": 3525 }, { "epoch": 2.9261410788381745, "grad_norm": 9.544833183288574, "learning_rate": 1.883087136929461e-05, "loss": 1.2567, "step": 3526 }, { "epoch": 2.9269709543568467, "grad_norm": 14.654105186462402, "learning_rate": 1.883053941908714e-05, "loss": 1.9502, "step": 3527 }, { "epoch": 2.927800829875519, "grad_norm": 10.993865966796875, "learning_rate": 1.883020746887967e-05, "loss": 1.983, "step": 3528 }, { "epoch": 2.928630705394191, "grad_norm": 9.766879081726074, "learning_rate": 1.88298755186722e-05, "loss": 1.4374, "step": 3529 }, { "epoch": 2.9294605809128633, "grad_norm": 15.764823913574219, "learning_rate": 1.8829543568464733e-05, "loss": 1.6566, "step": 3530 }, { "epoch": 2.9302904564315355, "grad_norm": 10.219304084777832, "learning_rate": 1.8829211618257262e-05, "loss": 1.256, "step": 3531 }, { "epoch": 2.9311203319502077, "grad_norm": 11.781291007995605, "learning_rate": 1.8828879668049794e-05, "loss": 1.9962, "step": 3532 }, { "epoch": 2.93195020746888, "grad_norm": 8.337133407592773, "learning_rate": 1.8828547717842326e-05, "loss": 1.6042, "step": 3533 }, { "epoch": 2.932780082987552, "grad_norm": 16.449602127075195, "learning_rate": 1.8828215767634855e-05, "loss": 1.8193, "step": 3534 }, { "epoch": 2.9336099585062243, "grad_norm": 8.96648120880127, "learning_rate": 1.8827883817427387e-05, "loss": 1.023, "step": 3535 }, { "epoch": 2.9344398340248965, "grad_norm": 7.023741245269775, "learning_rate": 1.8827551867219916e-05, "loss": 1.2877, "step": 3536 }, { "epoch": 2.9352697095435687, "grad_norm": 11.161797523498535, "learning_rate": 1.8827219917012448e-05, "loss": 1.7866, "step": 3537 }, { "epoch": 2.936099585062241, "grad_norm": 10.143223762512207, "learning_rate": 1.882688796680498e-05, "loss": 1.7144, "step": 3538 }, { "epoch": 2.936929460580913, "grad_norm": 15.519615173339844, "learning_rate": 1.8826556016597512e-05, "loss": 1.2214, "step": 3539 }, { "epoch": 2.9377593360995853, "grad_norm": 13.851301193237305, "learning_rate": 1.882622406639004e-05, "loss": 1.9322, "step": 3540 }, { "epoch": 2.9385892116182575, "grad_norm": 10.220735549926758, "learning_rate": 1.8825892116182573e-05, "loss": 1.4723, "step": 3541 }, { "epoch": 2.9394190871369297, "grad_norm": 19.46562385559082, "learning_rate": 1.8825560165975105e-05, "loss": 1.3337, "step": 3542 }, { "epoch": 2.940248962655602, "grad_norm": 10.249600410461426, "learning_rate": 1.8825228215767637e-05, "loss": 1.0664, "step": 3543 }, { "epoch": 2.941078838174274, "grad_norm": 9.477153778076172, "learning_rate": 1.882489626556017e-05, "loss": 1.1087, "step": 3544 }, { "epoch": 2.9419087136929463, "grad_norm": 7.807400703430176, "learning_rate": 1.8824564315352698e-05, "loss": 1.6635, "step": 3545 }, { "epoch": 2.9427385892116185, "grad_norm": 12.369733810424805, "learning_rate": 1.882423236514523e-05, "loss": 1.9239, "step": 3546 }, { "epoch": 2.9435684647302907, "grad_norm": 19.99860382080078, "learning_rate": 1.8823900414937762e-05, "loss": 1.9462, "step": 3547 }, { "epoch": 2.944398340248963, "grad_norm": 8.986186027526855, "learning_rate": 1.8823568464730294e-05, "loss": 1.2213, "step": 3548 }, { "epoch": 2.945228215767635, "grad_norm": 18.453285217285156, "learning_rate": 1.8823236514522823e-05, "loss": 1.2168, "step": 3549 }, { "epoch": 2.9460580912863072, "grad_norm": 11.880725860595703, "learning_rate": 1.8822904564315355e-05, "loss": 1.0711, "step": 3550 }, { "epoch": 2.9468879668049794, "grad_norm": 14.921900749206543, "learning_rate": 1.8822572614107887e-05, "loss": 2.1975, "step": 3551 }, { "epoch": 2.9477178423236516, "grad_norm": 15.539724349975586, "learning_rate": 1.8822240663900416e-05, "loss": 1.7542, "step": 3552 }, { "epoch": 2.948547717842324, "grad_norm": 8.201582908630371, "learning_rate": 1.8821908713692948e-05, "loss": 1.1905, "step": 3553 }, { "epoch": 2.949377593360996, "grad_norm": 20.104393005371094, "learning_rate": 1.8821576763485477e-05, "loss": 1.7382, "step": 3554 }, { "epoch": 2.9502074688796682, "grad_norm": 9.571094512939453, "learning_rate": 1.882124481327801e-05, "loss": 1.515, "step": 3555 }, { "epoch": 2.9510373443983404, "grad_norm": 11.395162582397461, "learning_rate": 1.882091286307054e-05, "loss": 1.8458, "step": 3556 }, { "epoch": 2.9518672199170126, "grad_norm": 9.340180397033691, "learning_rate": 1.882058091286307e-05, "loss": 1.7877, "step": 3557 }, { "epoch": 2.952697095435685, "grad_norm": 8.673478126525879, "learning_rate": 1.8820248962655602e-05, "loss": 1.2868, "step": 3558 }, { "epoch": 2.953526970954357, "grad_norm": 10.081418991088867, "learning_rate": 1.8819917012448134e-05, "loss": 1.275, "step": 3559 }, { "epoch": 2.954356846473029, "grad_norm": 10.124805450439453, "learning_rate": 1.8819585062240666e-05, "loss": 1.7875, "step": 3560 }, { "epoch": 2.9551867219917014, "grad_norm": 13.153204917907715, "learning_rate": 1.8819253112033195e-05, "loss": 1.4266, "step": 3561 }, { "epoch": 2.9560165975103736, "grad_norm": 15.326417922973633, "learning_rate": 1.8818921161825727e-05, "loss": 1.7194, "step": 3562 }, { "epoch": 2.956846473029046, "grad_norm": 8.683735847473145, "learning_rate": 1.881858921161826e-05, "loss": 1.3856, "step": 3563 }, { "epoch": 2.957676348547718, "grad_norm": 8.44781494140625, "learning_rate": 1.881825726141079e-05, "loss": 0.9802, "step": 3564 }, { "epoch": 2.95850622406639, "grad_norm": 13.26966381072998, "learning_rate": 1.881792531120332e-05, "loss": 1.355, "step": 3565 }, { "epoch": 2.9593360995850624, "grad_norm": 10.011978149414062, "learning_rate": 1.8817593360995852e-05, "loss": 1.718, "step": 3566 }, { "epoch": 2.9601659751037346, "grad_norm": 10.937348365783691, "learning_rate": 1.8817261410788384e-05, "loss": 1.0562, "step": 3567 }, { "epoch": 2.9609958506224068, "grad_norm": 9.458999633789062, "learning_rate": 1.8816929460580916e-05, "loss": 1.4231, "step": 3568 }, { "epoch": 2.961825726141079, "grad_norm": 19.126008987426758, "learning_rate": 1.8816597510373445e-05, "loss": 1.5597, "step": 3569 }, { "epoch": 2.962655601659751, "grad_norm": 10.469141006469727, "learning_rate": 1.8816265560165977e-05, "loss": 1.6515, "step": 3570 }, { "epoch": 2.9634854771784234, "grad_norm": 12.112736701965332, "learning_rate": 1.881593360995851e-05, "loss": 1.9188, "step": 3571 }, { "epoch": 2.9643153526970956, "grad_norm": 13.361482620239258, "learning_rate": 1.8815601659751038e-05, "loss": 1.2457, "step": 3572 }, { "epoch": 2.9651452282157678, "grad_norm": 13.718754768371582, "learning_rate": 1.881526970954357e-05, "loss": 2.6905, "step": 3573 }, { "epoch": 2.96597510373444, "grad_norm": 11.907383918762207, "learning_rate": 1.88149377593361e-05, "loss": 1.6461, "step": 3574 }, { "epoch": 2.966804979253112, "grad_norm": 11.673566818237305, "learning_rate": 1.881460580912863e-05, "loss": 1.5502, "step": 3575 }, { "epoch": 2.9676348547717843, "grad_norm": 8.434698104858398, "learning_rate": 1.8814273858921163e-05, "loss": 0.7837, "step": 3576 }, { "epoch": 2.9684647302904565, "grad_norm": 9.058120727539062, "learning_rate": 1.8813941908713695e-05, "loss": 1.1568, "step": 3577 }, { "epoch": 2.9692946058091287, "grad_norm": 8.8428316116333, "learning_rate": 1.8813609958506224e-05, "loss": 2.2116, "step": 3578 }, { "epoch": 2.970124481327801, "grad_norm": 11.801445007324219, "learning_rate": 1.8813278008298756e-05, "loss": 1.5927, "step": 3579 }, { "epoch": 2.970954356846473, "grad_norm": 10.462145805358887, "learning_rate": 1.8812946058091288e-05, "loss": 1.5609, "step": 3580 }, { "epoch": 2.9717842323651453, "grad_norm": 11.50774097442627, "learning_rate": 1.881261410788382e-05, "loss": 1.189, "step": 3581 }, { "epoch": 2.9726141078838175, "grad_norm": 10.692903518676758, "learning_rate": 1.881228215767635e-05, "loss": 1.6711, "step": 3582 }, { "epoch": 2.9734439834024897, "grad_norm": 11.33624267578125, "learning_rate": 1.881195020746888e-05, "loss": 0.8912, "step": 3583 }, { "epoch": 2.974273858921162, "grad_norm": 13.1943998336792, "learning_rate": 1.8811618257261413e-05, "loss": 1.1887, "step": 3584 }, { "epoch": 2.975103734439834, "grad_norm": 7.732624530792236, "learning_rate": 1.8811286307053945e-05, "loss": 1.188, "step": 3585 }, { "epoch": 2.9759336099585063, "grad_norm": 14.802912712097168, "learning_rate": 1.8810954356846474e-05, "loss": 1.9104, "step": 3586 }, { "epoch": 2.9767634854771785, "grad_norm": 10.228134155273438, "learning_rate": 1.8810622406639006e-05, "loss": 1.525, "step": 3587 }, { "epoch": 2.9775933609958507, "grad_norm": 14.49252986907959, "learning_rate": 1.8810290456431538e-05, "loss": 2.0681, "step": 3588 }, { "epoch": 2.978423236514523, "grad_norm": 7.921606540679932, "learning_rate": 1.880995850622407e-05, "loss": 1.2436, "step": 3589 }, { "epoch": 2.979253112033195, "grad_norm": 9.084486961364746, "learning_rate": 1.88096265560166e-05, "loss": 1.0923, "step": 3590 }, { "epoch": 2.9800829875518673, "grad_norm": 11.556385040283203, "learning_rate": 1.880929460580913e-05, "loss": 1.9164, "step": 3591 }, { "epoch": 2.9809128630705395, "grad_norm": 11.482934951782227, "learning_rate": 1.880896265560166e-05, "loss": 1.4501, "step": 3592 }, { "epoch": 2.9817427385892117, "grad_norm": 12.42314624786377, "learning_rate": 1.8808630705394192e-05, "loss": 1.5084, "step": 3593 }, { "epoch": 2.982572614107884, "grad_norm": 12.970316886901855, "learning_rate": 1.8808298755186724e-05, "loss": 1.2064, "step": 3594 }, { "epoch": 2.983402489626556, "grad_norm": 12.84929084777832, "learning_rate": 1.8807966804979253e-05, "loss": 1.1852, "step": 3595 }, { "epoch": 2.9842323651452283, "grad_norm": 7.649717330932617, "learning_rate": 1.8807634854771785e-05, "loss": 1.249, "step": 3596 }, { "epoch": 2.9850622406639005, "grad_norm": 12.003314971923828, "learning_rate": 1.8807302904564317e-05, "loss": 1.7314, "step": 3597 }, { "epoch": 2.9858921161825727, "grad_norm": 17.193193435668945, "learning_rate": 1.880697095435685e-05, "loss": 2.1041, "step": 3598 }, { "epoch": 2.986721991701245, "grad_norm": 11.827178955078125, "learning_rate": 1.8806639004149378e-05, "loss": 1.2671, "step": 3599 }, { "epoch": 2.987551867219917, "grad_norm": 9.217832565307617, "learning_rate": 1.880630705394191e-05, "loss": 1.1546, "step": 3600 }, { "epoch": 2.9883817427385893, "grad_norm": 10.525640487670898, "learning_rate": 1.8805975103734442e-05, "loss": 1.1277, "step": 3601 }, { "epoch": 2.9892116182572614, "grad_norm": 16.05880355834961, "learning_rate": 1.8805643153526974e-05, "loss": 2.109, "step": 3602 }, { "epoch": 2.9900414937759336, "grad_norm": 7.666934490203857, "learning_rate": 1.8805311203319503e-05, "loss": 1.6089, "step": 3603 }, { "epoch": 2.990871369294606, "grad_norm": 9.560527801513672, "learning_rate": 1.8804979253112035e-05, "loss": 1.2469, "step": 3604 }, { "epoch": 2.991701244813278, "grad_norm": 8.650616645812988, "learning_rate": 1.8804647302904567e-05, "loss": 1.3233, "step": 3605 }, { "epoch": 2.9925311203319502, "grad_norm": 13.836917877197266, "learning_rate": 1.88043153526971e-05, "loss": 2.1592, "step": 3606 }, { "epoch": 2.9933609958506224, "grad_norm": 8.144417762756348, "learning_rate": 1.8803983402489628e-05, "loss": 1.1381, "step": 3607 }, { "epoch": 2.9941908713692946, "grad_norm": 8.687387466430664, "learning_rate": 1.880365145228216e-05, "loss": 0.8899, "step": 3608 }, { "epoch": 2.995020746887967, "grad_norm": 7.382131099700928, "learning_rate": 1.8803319502074692e-05, "loss": 1.2487, "step": 3609 }, { "epoch": 2.995850622406639, "grad_norm": 10.087827682495117, "learning_rate": 1.880298755186722e-05, "loss": 1.29, "step": 3610 }, { "epoch": 2.996680497925311, "grad_norm": 7.278181552886963, "learning_rate": 1.8802655601659753e-05, "loss": 0.8879, "step": 3611 }, { "epoch": 2.9975103734439834, "grad_norm": 7.9760236740112305, "learning_rate": 1.8802323651452285e-05, "loss": 1.2596, "step": 3612 }, { "epoch": 2.9983402489626556, "grad_norm": 9.329651832580566, "learning_rate": 1.8801991701244814e-05, "loss": 1.1473, "step": 3613 }, { "epoch": 2.999170124481328, "grad_norm": 10.684151649475098, "learning_rate": 1.8801659751037346e-05, "loss": 1.6337, "step": 3614 }, { "epoch": 3.0, "grad_norm": 10.025201797485352, "learning_rate": 1.8801327800829875e-05, "loss": 1.529, "step": 3615 }, { "epoch": 3.000829875518672, "grad_norm": 6.739085674285889, "learning_rate": 1.8800995850622407e-05, "loss": 1.3509, "step": 3616 }, { "epoch": 3.0016597510373444, "grad_norm": 14.309226989746094, "learning_rate": 1.880066390041494e-05, "loss": 1.1189, "step": 3617 }, { "epoch": 3.0024896265560166, "grad_norm": 12.508896827697754, "learning_rate": 1.880033195020747e-05, "loss": 1.2093, "step": 3618 }, { "epoch": 3.003319502074689, "grad_norm": 8.715012550354004, "learning_rate": 1.88e-05, "loss": 1.5383, "step": 3619 }, { "epoch": 3.004149377593361, "grad_norm": 15.380736351013184, "learning_rate": 1.879966804979253e-05, "loss": 1.3608, "step": 3620 }, { "epoch": 3.004979253112033, "grad_norm": 11.57726001739502, "learning_rate": 1.8799336099585064e-05, "loss": 1.0399, "step": 3621 }, { "epoch": 3.0058091286307054, "grad_norm": 12.556771278381348, "learning_rate": 1.8799004149377596e-05, "loss": 1.9887, "step": 3622 }, { "epoch": 3.0066390041493776, "grad_norm": 10.397862434387207, "learning_rate": 1.8798672199170128e-05, "loss": 1.2296, "step": 3623 }, { "epoch": 3.0074688796680498, "grad_norm": 8.33933162689209, "learning_rate": 1.8798340248962657e-05, "loss": 1.5699, "step": 3624 }, { "epoch": 3.008298755186722, "grad_norm": 9.15941047668457, "learning_rate": 1.879800829875519e-05, "loss": 0.7953, "step": 3625 }, { "epoch": 3.009128630705394, "grad_norm": 8.78773307800293, "learning_rate": 1.879767634854772e-05, "loss": 1.3285, "step": 3626 }, { "epoch": 3.0099585062240664, "grad_norm": 9.578631401062012, "learning_rate": 1.8797344398340253e-05, "loss": 1.1149, "step": 3627 }, { "epoch": 3.0107883817427386, "grad_norm": 8.745670318603516, "learning_rate": 1.8797012448132782e-05, "loss": 1.1711, "step": 3628 }, { "epoch": 3.0116182572614107, "grad_norm": 18.866168975830078, "learning_rate": 1.8796680497925314e-05, "loss": 1.1276, "step": 3629 }, { "epoch": 3.012448132780083, "grad_norm": 14.400278091430664, "learning_rate": 1.8796348547717843e-05, "loss": 1.3585, "step": 3630 }, { "epoch": 3.013278008298755, "grad_norm": 10.189868927001953, "learning_rate": 1.8796016597510375e-05, "loss": 1.0781, "step": 3631 }, { "epoch": 3.0141078838174273, "grad_norm": 9.54878044128418, "learning_rate": 1.8795684647302907e-05, "loss": 1.3039, "step": 3632 }, { "epoch": 3.0149377593360995, "grad_norm": 8.447976112365723, "learning_rate": 1.8795352697095436e-05, "loss": 0.9192, "step": 3633 }, { "epoch": 3.0157676348547717, "grad_norm": 16.90699005126953, "learning_rate": 1.8795020746887968e-05, "loss": 2.373, "step": 3634 }, { "epoch": 3.016597510373444, "grad_norm": 14.154850959777832, "learning_rate": 1.87946887966805e-05, "loss": 1.6557, "step": 3635 }, { "epoch": 3.017427385892116, "grad_norm": 14.645227432250977, "learning_rate": 1.879435684647303e-05, "loss": 1.8059, "step": 3636 }, { "epoch": 3.0182572614107883, "grad_norm": 13.055290222167969, "learning_rate": 1.879402489626556e-05, "loss": 1.7982, "step": 3637 }, { "epoch": 3.0190871369294605, "grad_norm": 17.435428619384766, "learning_rate": 1.8793692946058093e-05, "loss": 1.8461, "step": 3638 }, { "epoch": 3.0199170124481327, "grad_norm": 14.869207382202148, "learning_rate": 1.8793360995850625e-05, "loss": 1.7948, "step": 3639 }, { "epoch": 3.020746887966805, "grad_norm": 11.466483116149902, "learning_rate": 1.8793029045643154e-05, "loss": 1.1671, "step": 3640 }, { "epoch": 3.021576763485477, "grad_norm": 11.896764755249023, "learning_rate": 1.8792697095435686e-05, "loss": 2.0353, "step": 3641 }, { "epoch": 3.0224066390041493, "grad_norm": 10.225407600402832, "learning_rate": 1.8792365145228218e-05, "loss": 1.2881, "step": 3642 }, { "epoch": 3.0232365145228215, "grad_norm": 9.90793514251709, "learning_rate": 1.879203319502075e-05, "loss": 1.1812, "step": 3643 }, { "epoch": 3.0240663900414937, "grad_norm": 11.644999504089355, "learning_rate": 1.879170124481328e-05, "loss": 0.9389, "step": 3644 }, { "epoch": 3.024896265560166, "grad_norm": 12.108419418334961, "learning_rate": 1.879136929460581e-05, "loss": 1.7812, "step": 3645 }, { "epoch": 3.025726141078838, "grad_norm": 11.668343544006348, "learning_rate": 1.8791037344398343e-05, "loss": 1.9872, "step": 3646 }, { "epoch": 3.0265560165975103, "grad_norm": 9.65005874633789, "learning_rate": 1.8790705394190875e-05, "loss": 1.1672, "step": 3647 }, { "epoch": 3.0273858921161825, "grad_norm": 10.688027381896973, "learning_rate": 1.8790373443983404e-05, "loss": 1.506, "step": 3648 }, { "epoch": 3.0282157676348547, "grad_norm": 15.925739288330078, "learning_rate": 1.8790041493775936e-05, "loss": 1.6168, "step": 3649 }, { "epoch": 3.029045643153527, "grad_norm": 10.922504425048828, "learning_rate": 1.8789709543568468e-05, "loss": 1.019, "step": 3650 }, { "epoch": 3.029875518672199, "grad_norm": 10.336153984069824, "learning_rate": 1.8789377593360997e-05, "loss": 1.3659, "step": 3651 }, { "epoch": 3.0307053941908713, "grad_norm": 9.883700370788574, "learning_rate": 1.878904564315353e-05, "loss": 0.7015, "step": 3652 }, { "epoch": 3.0315352697095435, "grad_norm": 10.65406322479248, "learning_rate": 1.8788713692946057e-05, "loss": 1.1123, "step": 3653 }, { "epoch": 3.0323651452282157, "grad_norm": 8.955337524414062, "learning_rate": 1.878838174273859e-05, "loss": 1.7611, "step": 3654 }, { "epoch": 3.033195020746888, "grad_norm": 12.333045959472656, "learning_rate": 1.878804979253112e-05, "loss": 1.2331, "step": 3655 }, { "epoch": 3.03402489626556, "grad_norm": 7.139054775238037, "learning_rate": 1.8787717842323654e-05, "loss": 0.7068, "step": 3656 }, { "epoch": 3.0348547717842322, "grad_norm": 13.52485466003418, "learning_rate": 1.8787385892116182e-05, "loss": 1.6671, "step": 3657 }, { "epoch": 3.0356846473029044, "grad_norm": 12.687222480773926, "learning_rate": 1.8787053941908715e-05, "loss": 1.8175, "step": 3658 }, { "epoch": 3.0365145228215766, "grad_norm": 8.063126564025879, "learning_rate": 1.8786721991701247e-05, "loss": 0.5746, "step": 3659 }, { "epoch": 3.037344398340249, "grad_norm": 10.32188892364502, "learning_rate": 1.878639004149378e-05, "loss": 1.0675, "step": 3660 }, { "epoch": 3.038174273858921, "grad_norm": 16.4278621673584, "learning_rate": 1.8786058091286307e-05, "loss": 1.5936, "step": 3661 }, { "epoch": 3.0390041493775932, "grad_norm": 14.237157821655273, "learning_rate": 1.878572614107884e-05, "loss": 1.5275, "step": 3662 }, { "epoch": 3.0398340248962654, "grad_norm": 12.138679504394531, "learning_rate": 1.878539419087137e-05, "loss": 1.4139, "step": 3663 }, { "epoch": 3.0406639004149376, "grad_norm": 9.331094741821289, "learning_rate": 1.8785062240663904e-05, "loss": 1.3707, "step": 3664 }, { "epoch": 3.04149377593361, "grad_norm": 7.836324691772461, "learning_rate": 1.8784730290456432e-05, "loss": 1.3443, "step": 3665 }, { "epoch": 3.042323651452282, "grad_norm": 10.486859321594238, "learning_rate": 1.8784398340248965e-05, "loss": 1.1035, "step": 3666 }, { "epoch": 3.043153526970954, "grad_norm": 9.12595272064209, "learning_rate": 1.8784066390041497e-05, "loss": 1.3961, "step": 3667 }, { "epoch": 3.0439834024896264, "grad_norm": 10.57666015625, "learning_rate": 1.878373443983403e-05, "loss": 1.6157, "step": 3668 }, { "epoch": 3.0448132780082986, "grad_norm": 13.403475761413574, "learning_rate": 1.8783402489626558e-05, "loss": 1.8483, "step": 3669 }, { "epoch": 3.045643153526971, "grad_norm": 10.150472640991211, "learning_rate": 1.878307053941909e-05, "loss": 1.0687, "step": 3670 }, { "epoch": 3.046473029045643, "grad_norm": 14.05070972442627, "learning_rate": 1.878273858921162e-05, "loss": 1.8432, "step": 3671 }, { "epoch": 3.047302904564315, "grad_norm": 12.019089698791504, "learning_rate": 1.878240663900415e-05, "loss": 1.392, "step": 3672 }, { "epoch": 3.0481327800829874, "grad_norm": 8.845876693725586, "learning_rate": 1.8782074688796683e-05, "loss": 0.982, "step": 3673 }, { "epoch": 3.0489626556016596, "grad_norm": 8.579429626464844, "learning_rate": 1.878174273858921e-05, "loss": 1.4891, "step": 3674 }, { "epoch": 3.0497925311203318, "grad_norm": 8.063657760620117, "learning_rate": 1.8781410788381743e-05, "loss": 1.6687, "step": 3675 }, { "epoch": 3.050622406639004, "grad_norm": 8.996057510375977, "learning_rate": 1.8781078838174275e-05, "loss": 0.6815, "step": 3676 }, { "epoch": 3.051452282157676, "grad_norm": 8.25039005279541, "learning_rate": 1.8780746887966808e-05, "loss": 0.8867, "step": 3677 }, { "epoch": 3.0522821576763484, "grad_norm": 11.666977882385254, "learning_rate": 1.8780414937759336e-05, "loss": 1.0938, "step": 3678 }, { "epoch": 3.0531120331950206, "grad_norm": 12.122383117675781, "learning_rate": 1.878008298755187e-05, "loss": 1.3603, "step": 3679 }, { "epoch": 3.0539419087136928, "grad_norm": 10.816991806030273, "learning_rate": 1.87797510373444e-05, "loss": 1.1277, "step": 3680 }, { "epoch": 3.054771784232365, "grad_norm": 14.05333423614502, "learning_rate": 1.8779419087136933e-05, "loss": 0.795, "step": 3681 }, { "epoch": 3.055601659751037, "grad_norm": 8.9217529296875, "learning_rate": 1.877908713692946e-05, "loss": 1.0057, "step": 3682 }, { "epoch": 3.0564315352697093, "grad_norm": 12.922019958496094, "learning_rate": 1.8778755186721993e-05, "loss": 1.9535, "step": 3683 }, { "epoch": 3.0572614107883815, "grad_norm": 13.346872329711914, "learning_rate": 1.8778423236514526e-05, "loss": 1.5741, "step": 3684 }, { "epoch": 3.0580912863070537, "grad_norm": 10.421700477600098, "learning_rate": 1.8778091286307058e-05, "loss": 1.2122, "step": 3685 }, { "epoch": 3.058921161825726, "grad_norm": 9.491891860961914, "learning_rate": 1.8777759336099586e-05, "loss": 1.4016, "step": 3686 }, { "epoch": 3.059751037344398, "grad_norm": 14.158402442932129, "learning_rate": 1.877742738589212e-05, "loss": 1.7874, "step": 3687 }, { "epoch": 3.0605809128630703, "grad_norm": 12.442717552185059, "learning_rate": 1.877709543568465e-05, "loss": 1.8332, "step": 3688 }, { "epoch": 3.0614107883817425, "grad_norm": 10.337498664855957, "learning_rate": 1.877676348547718e-05, "loss": 1.3884, "step": 3689 }, { "epoch": 3.0622406639004147, "grad_norm": 9.009239196777344, "learning_rate": 1.877643153526971e-05, "loss": 1.1537, "step": 3690 }, { "epoch": 3.063070539419087, "grad_norm": 12.704934120178223, "learning_rate": 1.877609958506224e-05, "loss": 1.2374, "step": 3691 }, { "epoch": 3.063900414937759, "grad_norm": 11.97091293334961, "learning_rate": 1.8775767634854772e-05, "loss": 1.1806, "step": 3692 }, { "epoch": 3.0647302904564317, "grad_norm": 10.127089500427246, "learning_rate": 1.8775435684647304e-05, "loss": 1.0937, "step": 3693 }, { "epoch": 3.065560165975104, "grad_norm": 10.061217308044434, "learning_rate": 1.8775103734439833e-05, "loss": 1.2297, "step": 3694 }, { "epoch": 3.066390041493776, "grad_norm": 6.102879047393799, "learning_rate": 1.8774771784232365e-05, "loss": 0.8913, "step": 3695 }, { "epoch": 3.0672199170124483, "grad_norm": 8.612335205078125, "learning_rate": 1.8774439834024897e-05, "loss": 1.142, "step": 3696 }, { "epoch": 3.0680497925311205, "grad_norm": 14.737208366394043, "learning_rate": 1.877410788381743e-05, "loss": 1.7995, "step": 3697 }, { "epoch": 3.0688796680497927, "grad_norm": 11.198050498962402, "learning_rate": 1.8773775933609958e-05, "loss": 1.1682, "step": 3698 }, { "epoch": 3.069709543568465, "grad_norm": 18.50822639465332, "learning_rate": 1.877344398340249e-05, "loss": 1.2507, "step": 3699 }, { "epoch": 3.070539419087137, "grad_norm": 8.27393913269043, "learning_rate": 1.8773112033195022e-05, "loss": 0.7927, "step": 3700 }, { "epoch": 3.0713692946058093, "grad_norm": 11.25320816040039, "learning_rate": 1.8772780082987554e-05, "loss": 1.546, "step": 3701 }, { "epoch": 3.0721991701244815, "grad_norm": 10.638550758361816, "learning_rate": 1.8772448132780087e-05, "loss": 1.3712, "step": 3702 }, { "epoch": 3.0730290456431537, "grad_norm": 7.978694915771484, "learning_rate": 1.8772116182572615e-05, "loss": 1.0824, "step": 3703 }, { "epoch": 3.073858921161826, "grad_norm": 10.774871826171875, "learning_rate": 1.8771784232365147e-05, "loss": 1.8029, "step": 3704 }, { "epoch": 3.074688796680498, "grad_norm": 13.127466201782227, "learning_rate": 1.877145228215768e-05, "loss": 1.8993, "step": 3705 }, { "epoch": 3.0755186721991703, "grad_norm": 10.305338859558105, "learning_rate": 1.877112033195021e-05, "loss": 1.4427, "step": 3706 }, { "epoch": 3.0763485477178425, "grad_norm": 14.645195007324219, "learning_rate": 1.877078838174274e-05, "loss": 1.8387, "step": 3707 }, { "epoch": 3.0771784232365147, "grad_norm": 7.966423511505127, "learning_rate": 1.8770456431535272e-05, "loss": 1.0453, "step": 3708 }, { "epoch": 3.078008298755187, "grad_norm": 8.432308197021484, "learning_rate": 1.87701244813278e-05, "loss": 1.3457, "step": 3709 }, { "epoch": 3.078838174273859, "grad_norm": 7.3420329093933105, "learning_rate": 1.8769792531120333e-05, "loss": 1.2377, "step": 3710 }, { "epoch": 3.0796680497925313, "grad_norm": 7.651875972747803, "learning_rate": 1.8769460580912865e-05, "loss": 0.9087, "step": 3711 }, { "epoch": 3.0804979253112035, "grad_norm": 7.193748474121094, "learning_rate": 1.8769128630705394e-05, "loss": 0.63, "step": 3712 }, { "epoch": 3.0813278008298757, "grad_norm": 15.297120094299316, "learning_rate": 1.8768796680497926e-05, "loss": 2.4086, "step": 3713 }, { "epoch": 3.082157676348548, "grad_norm": 15.479887962341309, "learning_rate": 1.876846473029046e-05, "loss": 1.6353, "step": 3714 }, { "epoch": 3.08298755186722, "grad_norm": 7.543334007263184, "learning_rate": 1.8768132780082987e-05, "loss": 0.776, "step": 3715 }, { "epoch": 3.0838174273858923, "grad_norm": 13.815061569213867, "learning_rate": 1.876780082987552e-05, "loss": 1.8138, "step": 3716 }, { "epoch": 3.0846473029045645, "grad_norm": 10.030582427978516, "learning_rate": 1.876746887966805e-05, "loss": 1.6946, "step": 3717 }, { "epoch": 3.0854771784232367, "grad_norm": 16.33852195739746, "learning_rate": 1.8767136929460583e-05, "loss": 1.3664, "step": 3718 }, { "epoch": 3.086307053941909, "grad_norm": 11.967428207397461, "learning_rate": 1.8766804979253112e-05, "loss": 1.591, "step": 3719 }, { "epoch": 3.087136929460581, "grad_norm": 17.910255432128906, "learning_rate": 1.8766473029045644e-05, "loss": 1.1664, "step": 3720 }, { "epoch": 3.0879668049792532, "grad_norm": 11.757038116455078, "learning_rate": 1.8766141078838176e-05, "loss": 1.1647, "step": 3721 }, { "epoch": 3.0887966804979254, "grad_norm": 13.376699447631836, "learning_rate": 1.876580912863071e-05, "loss": 1.8506, "step": 3722 }, { "epoch": 3.0896265560165976, "grad_norm": 13.934391975402832, "learning_rate": 1.8765477178423237e-05, "loss": 2.3492, "step": 3723 }, { "epoch": 3.09045643153527, "grad_norm": 10.407032012939453, "learning_rate": 1.876514522821577e-05, "loss": 1.1124, "step": 3724 }, { "epoch": 3.091286307053942, "grad_norm": 10.040982246398926, "learning_rate": 1.87648132780083e-05, "loss": 1.295, "step": 3725 }, { "epoch": 3.0921161825726142, "grad_norm": 17.128347396850586, "learning_rate": 1.8764481327800833e-05, "loss": 1.7507, "step": 3726 }, { "epoch": 3.0929460580912864, "grad_norm": 11.440707206726074, "learning_rate": 1.8764149377593362e-05, "loss": 1.3328, "step": 3727 }, { "epoch": 3.0937759336099586, "grad_norm": 17.963510513305664, "learning_rate": 1.8763817427385894e-05, "loss": 1.0208, "step": 3728 }, { "epoch": 3.094605809128631, "grad_norm": 15.620414733886719, "learning_rate": 1.8763485477178426e-05, "loss": 1.9543, "step": 3729 }, { "epoch": 3.095435684647303, "grad_norm": 12.820590019226074, "learning_rate": 1.8763153526970955e-05, "loss": 1.806, "step": 3730 }, { "epoch": 3.096265560165975, "grad_norm": 17.078710556030273, "learning_rate": 1.8762821576763487e-05, "loss": 1.8893, "step": 3731 }, { "epoch": 3.0970954356846474, "grad_norm": 7.564926624298096, "learning_rate": 1.8762489626556016e-05, "loss": 1.2994, "step": 3732 }, { "epoch": 3.0979253112033196, "grad_norm": 11.889098167419434, "learning_rate": 1.8762157676348548e-05, "loss": 2.434, "step": 3733 }, { "epoch": 3.098755186721992, "grad_norm": 9.018062591552734, "learning_rate": 1.876182572614108e-05, "loss": 1.9267, "step": 3734 }, { "epoch": 3.099585062240664, "grad_norm": 11.141496658325195, "learning_rate": 1.8761493775933612e-05, "loss": 1.5155, "step": 3735 }, { "epoch": 3.100414937759336, "grad_norm": 11.035276412963867, "learning_rate": 1.876116182572614e-05, "loss": 1.1285, "step": 3736 }, { "epoch": 3.1012448132780084, "grad_norm": 11.771339416503906, "learning_rate": 1.8760829875518673e-05, "loss": 1.5259, "step": 3737 }, { "epoch": 3.1020746887966806, "grad_norm": 7.762494087219238, "learning_rate": 1.8760497925311205e-05, "loss": 0.987, "step": 3738 }, { "epoch": 3.1029045643153528, "grad_norm": 12.95701789855957, "learning_rate": 1.8760165975103737e-05, "loss": 1.7146, "step": 3739 }, { "epoch": 3.103734439834025, "grad_norm": 15.36007308959961, "learning_rate": 1.8759834024896266e-05, "loss": 1.6982, "step": 3740 }, { "epoch": 3.104564315352697, "grad_norm": 12.212925910949707, "learning_rate": 1.8759502074688798e-05, "loss": 1.0397, "step": 3741 }, { "epoch": 3.1053941908713694, "grad_norm": 8.944146156311035, "learning_rate": 1.875917012448133e-05, "loss": 1.2559, "step": 3742 }, { "epoch": 3.1062240663900416, "grad_norm": 9.931336402893066, "learning_rate": 1.8758838174273862e-05, "loss": 1.4481, "step": 3743 }, { "epoch": 3.1070539419087138, "grad_norm": 11.08430004119873, "learning_rate": 1.875850622406639e-05, "loss": 1.1638, "step": 3744 }, { "epoch": 3.107883817427386, "grad_norm": 14.654452323913574, "learning_rate": 1.8758174273858923e-05, "loss": 2.2474, "step": 3745 }, { "epoch": 3.108713692946058, "grad_norm": 14.897881507873535, "learning_rate": 1.8757842323651455e-05, "loss": 1.2058, "step": 3746 }, { "epoch": 3.1095435684647303, "grad_norm": 17.05011558532715, "learning_rate": 1.8757510373443984e-05, "loss": 1.0966, "step": 3747 }, { "epoch": 3.1103734439834025, "grad_norm": 11.89504337310791, "learning_rate": 1.8757178423236516e-05, "loss": 0.7376, "step": 3748 }, { "epoch": 3.1112033195020747, "grad_norm": 9.193351745605469, "learning_rate": 1.8756846473029048e-05, "loss": 1.0352, "step": 3749 }, { "epoch": 3.112033195020747, "grad_norm": 17.65694808959961, "learning_rate": 1.8756514522821577e-05, "loss": 2.4042, "step": 3750 }, { "epoch": 3.112863070539419, "grad_norm": 11.997686386108398, "learning_rate": 1.875618257261411e-05, "loss": 2.1806, "step": 3751 }, { "epoch": 3.1136929460580913, "grad_norm": 18.43730354309082, "learning_rate": 1.875585062240664e-05, "loss": 1.5149, "step": 3752 }, { "epoch": 3.1145228215767635, "grad_norm": 12.321210861206055, "learning_rate": 1.875551867219917e-05, "loss": 1.8197, "step": 3753 }, { "epoch": 3.1153526970954357, "grad_norm": 12.60944652557373, "learning_rate": 1.8755186721991702e-05, "loss": 1.853, "step": 3754 }, { "epoch": 3.116182572614108, "grad_norm": 14.568658828735352, "learning_rate": 1.8754854771784234e-05, "loss": 2.053, "step": 3755 }, { "epoch": 3.11701244813278, "grad_norm": 8.8859281539917, "learning_rate": 1.8754522821576766e-05, "loss": 1.0143, "step": 3756 }, { "epoch": 3.1178423236514523, "grad_norm": 17.526052474975586, "learning_rate": 1.8754190871369295e-05, "loss": 2.7665, "step": 3757 }, { "epoch": 3.1186721991701245, "grad_norm": 9.052931785583496, "learning_rate": 1.8753858921161827e-05, "loss": 0.697, "step": 3758 }, { "epoch": 3.1195020746887967, "grad_norm": 13.572149276733398, "learning_rate": 1.875352697095436e-05, "loss": 0.9972, "step": 3759 }, { "epoch": 3.120331950207469, "grad_norm": 12.06502628326416, "learning_rate": 1.875319502074689e-05, "loss": 1.7778, "step": 3760 }, { "epoch": 3.121161825726141, "grad_norm": 11.696333885192871, "learning_rate": 1.875286307053942e-05, "loss": 1.7191, "step": 3761 }, { "epoch": 3.1219917012448133, "grad_norm": 11.015828132629395, "learning_rate": 1.8752531120331952e-05, "loss": 1.7779, "step": 3762 }, { "epoch": 3.1228215767634855, "grad_norm": 15.536070823669434, "learning_rate": 1.8752199170124484e-05, "loss": 1.864, "step": 3763 }, { "epoch": 3.1236514522821577, "grad_norm": 8.6630277633667, "learning_rate": 1.8751867219917016e-05, "loss": 1.0569, "step": 3764 }, { "epoch": 3.12448132780083, "grad_norm": 16.38041877746582, "learning_rate": 1.8751535269709545e-05, "loss": 1.6178, "step": 3765 }, { "epoch": 3.125311203319502, "grad_norm": 14.300190925598145, "learning_rate": 1.8751203319502077e-05, "loss": 1.1288, "step": 3766 }, { "epoch": 3.1261410788381743, "grad_norm": 10.942256927490234, "learning_rate": 1.875087136929461e-05, "loss": 1.0569, "step": 3767 }, { "epoch": 3.1269709543568465, "grad_norm": 10.200688362121582, "learning_rate": 1.8750539419087138e-05, "loss": 1.2208, "step": 3768 }, { "epoch": 3.1278008298755187, "grad_norm": 14.697931289672852, "learning_rate": 1.875020746887967e-05, "loss": 1.5139, "step": 3769 }, { "epoch": 3.128630705394191, "grad_norm": 10.437667846679688, "learning_rate": 1.87498755186722e-05, "loss": 1.4058, "step": 3770 }, { "epoch": 3.129460580912863, "grad_norm": 8.347858428955078, "learning_rate": 1.874954356846473e-05, "loss": 1.3778, "step": 3771 }, { "epoch": 3.1302904564315353, "grad_norm": 17.664098739624023, "learning_rate": 1.8749211618257263e-05, "loss": 1.4896, "step": 3772 }, { "epoch": 3.1311203319502074, "grad_norm": 19.60611915588379, "learning_rate": 1.874887966804979e-05, "loss": 2.1988, "step": 3773 }, { "epoch": 3.1319502074688796, "grad_norm": 10.896742820739746, "learning_rate": 1.8748547717842324e-05, "loss": 1.2414, "step": 3774 }, { "epoch": 3.132780082987552, "grad_norm": 8.597979545593262, "learning_rate": 1.8748215767634856e-05, "loss": 1.4897, "step": 3775 }, { "epoch": 3.133609958506224, "grad_norm": 9.805597305297852, "learning_rate": 1.8747883817427388e-05, "loss": 1.0698, "step": 3776 }, { "epoch": 3.1344398340248962, "grad_norm": 9.469306945800781, "learning_rate": 1.8747551867219917e-05, "loss": 1.3947, "step": 3777 }, { "epoch": 3.1352697095435684, "grad_norm": 11.699784278869629, "learning_rate": 1.874721991701245e-05, "loss": 1.1908, "step": 3778 }, { "epoch": 3.1360995850622406, "grad_norm": 9.616311073303223, "learning_rate": 1.874688796680498e-05, "loss": 1.5382, "step": 3779 }, { "epoch": 3.136929460580913, "grad_norm": 13.975098609924316, "learning_rate": 1.8746556016597513e-05, "loss": 1.3346, "step": 3780 }, { "epoch": 3.137759336099585, "grad_norm": 11.568653106689453, "learning_rate": 1.8746224066390045e-05, "loss": 1.8833, "step": 3781 }, { "epoch": 3.138589211618257, "grad_norm": 9.567313194274902, "learning_rate": 1.8745892116182574e-05, "loss": 1.3425, "step": 3782 }, { "epoch": 3.1394190871369294, "grad_norm": 9.019244194030762, "learning_rate": 1.8745560165975106e-05, "loss": 1.6769, "step": 3783 }, { "epoch": 3.1402489626556016, "grad_norm": 12.34217643737793, "learning_rate": 1.8745228215767638e-05, "loss": 1.7431, "step": 3784 }, { "epoch": 3.141078838174274, "grad_norm": 13.916444778442383, "learning_rate": 1.874489626556017e-05, "loss": 1.1183, "step": 3785 }, { "epoch": 3.141908713692946, "grad_norm": 21.928245544433594, "learning_rate": 1.87445643153527e-05, "loss": 1.089, "step": 3786 }, { "epoch": 3.142738589211618, "grad_norm": 16.734180450439453, "learning_rate": 1.874423236514523e-05, "loss": 1.393, "step": 3787 }, { "epoch": 3.1435684647302904, "grad_norm": 13.83670425415039, "learning_rate": 1.874390041493776e-05, "loss": 2.0851, "step": 3788 }, { "epoch": 3.1443983402489626, "grad_norm": 12.899624824523926, "learning_rate": 1.8743568464730292e-05, "loss": 2.1521, "step": 3789 }, { "epoch": 3.145228215767635, "grad_norm": 16.820816040039062, "learning_rate": 1.8743236514522824e-05, "loss": 1.6158, "step": 3790 }, { "epoch": 3.146058091286307, "grad_norm": 11.426924705505371, "learning_rate": 1.8742904564315353e-05, "loss": 1.639, "step": 3791 }, { "epoch": 3.146887966804979, "grad_norm": 12.834939956665039, "learning_rate": 1.8742572614107885e-05, "loss": 1.8572, "step": 3792 }, { "epoch": 3.1477178423236514, "grad_norm": 11.456585884094238, "learning_rate": 1.8742240663900417e-05, "loss": 0.9777, "step": 3793 }, { "epoch": 3.1485477178423236, "grad_norm": 26.18918800354004, "learning_rate": 1.8741908713692946e-05, "loss": 2.2538, "step": 3794 }, { "epoch": 3.1493775933609958, "grad_norm": 11.493468284606934, "learning_rate": 1.8741576763485478e-05, "loss": 1.8681, "step": 3795 }, { "epoch": 3.150207468879668, "grad_norm": 8.313706398010254, "learning_rate": 1.874124481327801e-05, "loss": 1.7265, "step": 3796 }, { "epoch": 3.15103734439834, "grad_norm": 12.519815444946289, "learning_rate": 1.8740912863070542e-05, "loss": 0.9874, "step": 3797 }, { "epoch": 3.1518672199170124, "grad_norm": 9.342902183532715, "learning_rate": 1.874058091286307e-05, "loss": 1.2364, "step": 3798 }, { "epoch": 3.1526970954356845, "grad_norm": 10.700967788696289, "learning_rate": 1.8740248962655603e-05, "loss": 1.8674, "step": 3799 }, { "epoch": 3.1535269709543567, "grad_norm": 11.123417854309082, "learning_rate": 1.8739917012448135e-05, "loss": 1.4676, "step": 3800 }, { "epoch": 3.154356846473029, "grad_norm": 12.40934944152832, "learning_rate": 1.8739585062240667e-05, "loss": 1.6523, "step": 3801 }, { "epoch": 3.155186721991701, "grad_norm": 11.078166007995605, "learning_rate": 1.8739253112033196e-05, "loss": 0.7694, "step": 3802 }, { "epoch": 3.1560165975103733, "grad_norm": 11.713862419128418, "learning_rate": 1.8738921161825728e-05, "loss": 1.852, "step": 3803 }, { "epoch": 3.1568464730290455, "grad_norm": 11.172148704528809, "learning_rate": 1.873858921161826e-05, "loss": 1.1555, "step": 3804 }, { "epoch": 3.1576763485477177, "grad_norm": 8.317693710327148, "learning_rate": 1.8738257261410792e-05, "loss": 0.9772, "step": 3805 }, { "epoch": 3.15850622406639, "grad_norm": 18.136608123779297, "learning_rate": 1.873792531120332e-05, "loss": 2.2966, "step": 3806 }, { "epoch": 3.159336099585062, "grad_norm": 14.771613121032715, "learning_rate": 1.8737593360995853e-05, "loss": 1.4041, "step": 3807 }, { "epoch": 3.1601659751037343, "grad_norm": 14.403578758239746, "learning_rate": 1.873726141078838e-05, "loss": 2.0599, "step": 3808 }, { "epoch": 3.1609958506224065, "grad_norm": 8.183732986450195, "learning_rate": 1.8736929460580914e-05, "loss": 0.7977, "step": 3809 }, { "epoch": 3.1618257261410787, "grad_norm": 9.575260162353516, "learning_rate": 1.8736597510373446e-05, "loss": 1.3119, "step": 3810 }, { "epoch": 3.162655601659751, "grad_norm": 9.520088195800781, "learning_rate": 1.8736265560165974e-05, "loss": 1.174, "step": 3811 }, { "epoch": 3.163485477178423, "grad_norm": 12.145699501037598, "learning_rate": 1.8735933609958507e-05, "loss": 1.6974, "step": 3812 }, { "epoch": 3.1643153526970953, "grad_norm": 8.27530574798584, "learning_rate": 1.873560165975104e-05, "loss": 1.6367, "step": 3813 }, { "epoch": 3.1651452282157675, "grad_norm": 20.297548294067383, "learning_rate": 1.873526970954357e-05, "loss": 1.4516, "step": 3814 }, { "epoch": 3.1659751037344397, "grad_norm": 10.703194618225098, "learning_rate": 1.87349377593361e-05, "loss": 1.0888, "step": 3815 }, { "epoch": 3.166804979253112, "grad_norm": 10.301309585571289, "learning_rate": 1.873460580912863e-05, "loss": 0.9556, "step": 3816 }, { "epoch": 3.167634854771784, "grad_norm": 11.410462379455566, "learning_rate": 1.8734273858921164e-05, "loss": 1.5173, "step": 3817 }, { "epoch": 3.1684647302904563, "grad_norm": 9.804913520812988, "learning_rate": 1.8733941908713696e-05, "loss": 0.836, "step": 3818 }, { "epoch": 3.1692946058091285, "grad_norm": 7.087132930755615, "learning_rate": 1.8733609958506225e-05, "loss": 1.4655, "step": 3819 }, { "epoch": 3.1701244813278007, "grad_norm": 12.917189598083496, "learning_rate": 1.8733278008298757e-05, "loss": 1.9496, "step": 3820 }, { "epoch": 3.170954356846473, "grad_norm": 12.380096435546875, "learning_rate": 1.873294605809129e-05, "loss": 1.4364, "step": 3821 }, { "epoch": 3.171784232365145, "grad_norm": 10.728672981262207, "learning_rate": 1.873261410788382e-05, "loss": 1.7463, "step": 3822 }, { "epoch": 3.1726141078838173, "grad_norm": 12.165213584899902, "learning_rate": 1.873228215767635e-05, "loss": 1.612, "step": 3823 }, { "epoch": 3.1734439834024895, "grad_norm": 8.648187637329102, "learning_rate": 1.8731950207468882e-05, "loss": 1.7275, "step": 3824 }, { "epoch": 3.1742738589211617, "grad_norm": 11.916733741760254, "learning_rate": 1.8731618257261414e-05, "loss": 1.4698, "step": 3825 }, { "epoch": 3.175103734439834, "grad_norm": 8.889618873596191, "learning_rate": 1.8731286307053943e-05, "loss": 1.5979, "step": 3826 }, { "epoch": 3.175933609958506, "grad_norm": 17.11205291748047, "learning_rate": 1.8730954356846475e-05, "loss": 1.533, "step": 3827 }, { "epoch": 3.1767634854771782, "grad_norm": 10.84894847869873, "learning_rate": 1.8730622406639007e-05, "loss": 1.2868, "step": 3828 }, { "epoch": 3.1775933609958504, "grad_norm": 10.165132522583008, "learning_rate": 1.8730290456431535e-05, "loss": 1.2455, "step": 3829 }, { "epoch": 3.1784232365145226, "grad_norm": 12.677614212036133, "learning_rate": 1.8729958506224068e-05, "loss": 1.4751, "step": 3830 }, { "epoch": 3.179253112033195, "grad_norm": 9.000186920166016, "learning_rate": 1.8729626556016596e-05, "loss": 1.5772, "step": 3831 }, { "epoch": 3.180082987551867, "grad_norm": 10.603879928588867, "learning_rate": 1.872929460580913e-05, "loss": 1.4855, "step": 3832 }, { "epoch": 3.180912863070539, "grad_norm": 10.427000999450684, "learning_rate": 1.872896265560166e-05, "loss": 0.9187, "step": 3833 }, { "epoch": 3.1817427385892114, "grad_norm": 15.055007934570312, "learning_rate": 1.8728630705394193e-05, "loss": 1.7757, "step": 3834 }, { "epoch": 3.1825726141078836, "grad_norm": 14.807282447814941, "learning_rate": 1.8728298755186725e-05, "loss": 1.6127, "step": 3835 }, { "epoch": 3.183402489626556, "grad_norm": 8.792576789855957, "learning_rate": 1.8727966804979253e-05, "loss": 1.2999, "step": 3836 }, { "epoch": 3.1842323651452284, "grad_norm": 9.867677688598633, "learning_rate": 1.8727634854771786e-05, "loss": 1.2634, "step": 3837 }, { "epoch": 3.1850622406639006, "grad_norm": 11.110596656799316, "learning_rate": 1.8727302904564318e-05, "loss": 1.5103, "step": 3838 }, { "epoch": 3.185892116182573, "grad_norm": 7.060393333435059, "learning_rate": 1.872697095435685e-05, "loss": 1.2051, "step": 3839 }, { "epoch": 3.186721991701245, "grad_norm": 12.152443885803223, "learning_rate": 1.872663900414938e-05, "loss": 1.5997, "step": 3840 }, { "epoch": 3.1875518672199172, "grad_norm": 12.444893836975098, "learning_rate": 1.872630705394191e-05, "loss": 1.05, "step": 3841 }, { "epoch": 3.1883817427385894, "grad_norm": 13.115272521972656, "learning_rate": 1.8725975103734443e-05, "loss": 1.5872, "step": 3842 }, { "epoch": 3.1892116182572616, "grad_norm": 7.982016086578369, "learning_rate": 1.8725643153526975e-05, "loss": 1.5486, "step": 3843 }, { "epoch": 3.190041493775934, "grad_norm": 18.530282974243164, "learning_rate": 1.8725311203319504e-05, "loss": 1.1933, "step": 3844 }, { "epoch": 3.190871369294606, "grad_norm": 11.847617149353027, "learning_rate": 1.8724979253112036e-05, "loss": 1.5735, "step": 3845 }, { "epoch": 3.191701244813278, "grad_norm": 10.650832176208496, "learning_rate": 1.8724647302904568e-05, "loss": 1.5462, "step": 3846 }, { "epoch": 3.1925311203319504, "grad_norm": 12.378369331359863, "learning_rate": 1.8724315352697096e-05, "loss": 0.8631, "step": 3847 }, { "epoch": 3.1933609958506226, "grad_norm": 13.687235832214355, "learning_rate": 1.872398340248963e-05, "loss": 1.7254, "step": 3848 }, { "epoch": 3.194190871369295, "grad_norm": 13.003584861755371, "learning_rate": 1.8723651452282157e-05, "loss": 1.141, "step": 3849 }, { "epoch": 3.195020746887967, "grad_norm": 12.293378829956055, "learning_rate": 1.872331950207469e-05, "loss": 1.7272, "step": 3850 }, { "epoch": 3.195850622406639, "grad_norm": 8.368897438049316, "learning_rate": 1.872298755186722e-05, "loss": 0.8696, "step": 3851 }, { "epoch": 3.1966804979253114, "grad_norm": 16.54350471496582, "learning_rate": 1.872265560165975e-05, "loss": 1.6197, "step": 3852 }, { "epoch": 3.1975103734439836, "grad_norm": 8.281582832336426, "learning_rate": 1.8722323651452282e-05, "loss": 1.5769, "step": 3853 }, { "epoch": 3.198340248962656, "grad_norm": 9.4375638961792, "learning_rate": 1.8721991701244814e-05, "loss": 1.0546, "step": 3854 }, { "epoch": 3.199170124481328, "grad_norm": 9.451720237731934, "learning_rate": 1.8721659751037347e-05, "loss": 1.0049, "step": 3855 }, { "epoch": 3.2, "grad_norm": 12.114397048950195, "learning_rate": 1.8721327800829875e-05, "loss": 1.9367, "step": 3856 }, { "epoch": 3.2008298755186724, "grad_norm": 8.246334075927734, "learning_rate": 1.8720995850622407e-05, "loss": 1.4219, "step": 3857 }, { "epoch": 3.2016597510373446, "grad_norm": 16.67559242248535, "learning_rate": 1.872066390041494e-05, "loss": 1.3254, "step": 3858 }, { "epoch": 3.2024896265560168, "grad_norm": 21.133344650268555, "learning_rate": 1.872033195020747e-05, "loss": 1.4193, "step": 3859 }, { "epoch": 3.203319502074689, "grad_norm": 24.351877212524414, "learning_rate": 1.8720000000000004e-05, "loss": 1.7241, "step": 3860 }, { "epoch": 3.204149377593361, "grad_norm": 14.394047737121582, "learning_rate": 1.8719668049792532e-05, "loss": 1.484, "step": 3861 }, { "epoch": 3.2049792531120334, "grad_norm": 15.422383308410645, "learning_rate": 1.8719336099585065e-05, "loss": 1.1471, "step": 3862 }, { "epoch": 3.2058091286307056, "grad_norm": 8.964966773986816, "learning_rate": 1.8719004149377597e-05, "loss": 1.2923, "step": 3863 }, { "epoch": 3.2066390041493777, "grad_norm": 14.161527633666992, "learning_rate": 1.8718672199170125e-05, "loss": 1.4312, "step": 3864 }, { "epoch": 3.20746887966805, "grad_norm": 8.260527610778809, "learning_rate": 1.8718340248962657e-05, "loss": 0.8836, "step": 3865 }, { "epoch": 3.208298755186722, "grad_norm": 18.897632598876953, "learning_rate": 1.871800829875519e-05, "loss": 1.1602, "step": 3866 }, { "epoch": 3.2091286307053943, "grad_norm": 15.093317985534668, "learning_rate": 1.871767634854772e-05, "loss": 0.8943, "step": 3867 }, { "epoch": 3.2099585062240665, "grad_norm": 15.75745677947998, "learning_rate": 1.871734439834025e-05, "loss": 2.0926, "step": 3868 }, { "epoch": 3.2107883817427387, "grad_norm": 13.452893257141113, "learning_rate": 1.8717012448132783e-05, "loss": 1.6596, "step": 3869 }, { "epoch": 3.211618257261411, "grad_norm": 8.558396339416504, "learning_rate": 1.871668049792531e-05, "loss": 1.1507, "step": 3870 }, { "epoch": 3.212448132780083, "grad_norm": 13.779743194580078, "learning_rate": 1.8716348547717843e-05, "loss": 1.6253, "step": 3871 }, { "epoch": 3.2132780082987553, "grad_norm": 9.110764503479004, "learning_rate": 1.8716016597510375e-05, "loss": 1.3432, "step": 3872 }, { "epoch": 3.2141078838174275, "grad_norm": 14.72974967956543, "learning_rate": 1.8715684647302904e-05, "loss": 1.338, "step": 3873 }, { "epoch": 3.2149377593360997, "grad_norm": 10.79191780090332, "learning_rate": 1.8715352697095436e-05, "loss": 1.1191, "step": 3874 }, { "epoch": 3.215767634854772, "grad_norm": 9.849821090698242, "learning_rate": 1.871502074688797e-05, "loss": 1.5395, "step": 3875 }, { "epoch": 3.216597510373444, "grad_norm": 11.84482479095459, "learning_rate": 1.87146887966805e-05, "loss": 1.2284, "step": 3876 }, { "epoch": 3.2174273858921163, "grad_norm": 8.15947151184082, "learning_rate": 1.871435684647303e-05, "loss": 0.9706, "step": 3877 }, { "epoch": 3.2182572614107885, "grad_norm": 15.999320983886719, "learning_rate": 1.871402489626556e-05, "loss": 1.803, "step": 3878 }, { "epoch": 3.2190871369294607, "grad_norm": 10.860907554626465, "learning_rate": 1.8713692946058093e-05, "loss": 1.2517, "step": 3879 }, { "epoch": 3.219917012448133, "grad_norm": 14.037487030029297, "learning_rate": 1.8713360995850626e-05, "loss": 1.1926, "step": 3880 }, { "epoch": 3.220746887966805, "grad_norm": 8.432286262512207, "learning_rate": 1.8713029045643154e-05, "loss": 1.1558, "step": 3881 }, { "epoch": 3.2215767634854773, "grad_norm": 7.41440486907959, "learning_rate": 1.8712697095435686e-05, "loss": 0.5408, "step": 3882 }, { "epoch": 3.2224066390041495, "grad_norm": 7.183982849121094, "learning_rate": 1.871236514522822e-05, "loss": 1.1582, "step": 3883 }, { "epoch": 3.2232365145228217, "grad_norm": 8.5653657913208, "learning_rate": 1.871203319502075e-05, "loss": 1.2289, "step": 3884 }, { "epoch": 3.224066390041494, "grad_norm": 15.180088996887207, "learning_rate": 1.871170124481328e-05, "loss": 1.2837, "step": 3885 }, { "epoch": 3.224896265560166, "grad_norm": 13.898665428161621, "learning_rate": 1.871136929460581e-05, "loss": 1.5076, "step": 3886 }, { "epoch": 3.2257261410788383, "grad_norm": 6.832977771759033, "learning_rate": 1.871103734439834e-05, "loss": 1.1128, "step": 3887 }, { "epoch": 3.2265560165975105, "grad_norm": 14.388788223266602, "learning_rate": 1.8710705394190872e-05, "loss": 1.4458, "step": 3888 }, { "epoch": 3.2273858921161827, "grad_norm": 10.386435508728027, "learning_rate": 1.8710373443983404e-05, "loss": 1.4255, "step": 3889 }, { "epoch": 3.228215767634855, "grad_norm": 17.293027877807617, "learning_rate": 1.8710041493775933e-05, "loss": 1.6557, "step": 3890 }, { "epoch": 3.229045643153527, "grad_norm": 8.588201522827148, "learning_rate": 1.8709709543568465e-05, "loss": 1.2991, "step": 3891 }, { "epoch": 3.2298755186721992, "grad_norm": 19.659360885620117, "learning_rate": 1.8709377593360997e-05, "loss": 1.8894, "step": 3892 }, { "epoch": 3.2307053941908714, "grad_norm": 18.062225341796875, "learning_rate": 1.870904564315353e-05, "loss": 1.8315, "step": 3893 }, { "epoch": 3.2315352697095436, "grad_norm": 13.20347785949707, "learning_rate": 1.8708713692946058e-05, "loss": 1.442, "step": 3894 }, { "epoch": 3.232365145228216, "grad_norm": 9.087116241455078, "learning_rate": 1.870838174273859e-05, "loss": 1.0681, "step": 3895 }, { "epoch": 3.233195020746888, "grad_norm": 9.356470108032227, "learning_rate": 1.8708049792531122e-05, "loss": 1.0688, "step": 3896 }, { "epoch": 3.2340248962655602, "grad_norm": 9.750651359558105, "learning_rate": 1.8707717842323654e-05, "loss": 1.3177, "step": 3897 }, { "epoch": 3.2348547717842324, "grad_norm": 16.989482879638672, "learning_rate": 1.8707385892116183e-05, "loss": 1.1768, "step": 3898 }, { "epoch": 3.2356846473029046, "grad_norm": 13.026405334472656, "learning_rate": 1.8707053941908715e-05, "loss": 2.0157, "step": 3899 }, { "epoch": 3.236514522821577, "grad_norm": 10.453156471252441, "learning_rate": 1.8706721991701247e-05, "loss": 1.2457, "step": 3900 }, { "epoch": 3.237344398340249, "grad_norm": 9.679973602294922, "learning_rate": 1.870639004149378e-05, "loss": 1.5687, "step": 3901 }, { "epoch": 3.238174273858921, "grad_norm": 9.72243595123291, "learning_rate": 1.8706058091286308e-05, "loss": 0.9819, "step": 3902 }, { "epoch": 3.2390041493775934, "grad_norm": 12.167519569396973, "learning_rate": 1.870572614107884e-05, "loss": 1.1271, "step": 3903 }, { "epoch": 3.2398340248962656, "grad_norm": 12.22063159942627, "learning_rate": 1.8705394190871372e-05, "loss": 1.0316, "step": 3904 }, { "epoch": 3.240663900414938, "grad_norm": 14.855719566345215, "learning_rate": 1.87050622406639e-05, "loss": 1.3391, "step": 3905 }, { "epoch": 3.24149377593361, "grad_norm": 14.81612491607666, "learning_rate": 1.8704730290456433e-05, "loss": 1.1434, "step": 3906 }, { "epoch": 3.242323651452282, "grad_norm": 17.240175247192383, "learning_rate": 1.8704398340248965e-05, "loss": 1.2377, "step": 3907 }, { "epoch": 3.2431535269709544, "grad_norm": 14.987098693847656, "learning_rate": 1.8704066390041494e-05, "loss": 1.532, "step": 3908 }, { "epoch": 3.2439834024896266, "grad_norm": 10.478139877319336, "learning_rate": 1.8703734439834026e-05, "loss": 1.6237, "step": 3909 }, { "epoch": 3.2448132780082988, "grad_norm": 17.23118782043457, "learning_rate": 1.8703402489626555e-05, "loss": 1.6439, "step": 3910 }, { "epoch": 3.245643153526971, "grad_norm": 10.680072784423828, "learning_rate": 1.8703070539419087e-05, "loss": 1.2563, "step": 3911 }, { "epoch": 3.246473029045643, "grad_norm": 9.615934371948242, "learning_rate": 1.870273858921162e-05, "loss": 1.1211, "step": 3912 }, { "epoch": 3.2473029045643154, "grad_norm": 8.689411163330078, "learning_rate": 1.870240663900415e-05, "loss": 1.0949, "step": 3913 }, { "epoch": 3.2481327800829876, "grad_norm": 9.381586074829102, "learning_rate": 1.8702074688796683e-05, "loss": 1.5475, "step": 3914 }, { "epoch": 3.2489626556016598, "grad_norm": 10.622690200805664, "learning_rate": 1.8701742738589212e-05, "loss": 1.9773, "step": 3915 }, { "epoch": 3.249792531120332, "grad_norm": 9.792773246765137, "learning_rate": 1.8701410788381744e-05, "loss": 1.6073, "step": 3916 }, { "epoch": 3.250622406639004, "grad_norm": 13.823862075805664, "learning_rate": 1.8701078838174276e-05, "loss": 1.6668, "step": 3917 }, { "epoch": 3.2514522821576763, "grad_norm": 16.342985153198242, "learning_rate": 1.870074688796681e-05, "loss": 1.6792, "step": 3918 }, { "epoch": 3.2522821576763485, "grad_norm": 13.241079330444336, "learning_rate": 1.8700414937759337e-05, "loss": 2.1361, "step": 3919 }, { "epoch": 3.2531120331950207, "grad_norm": 8.116661071777344, "learning_rate": 1.870008298755187e-05, "loss": 1.016, "step": 3920 }, { "epoch": 3.253941908713693, "grad_norm": 13.346014976501465, "learning_rate": 1.86997510373444e-05, "loss": 1.4272, "step": 3921 }, { "epoch": 3.254771784232365, "grad_norm": 12.3650541305542, "learning_rate": 1.8699419087136933e-05, "loss": 1.0221, "step": 3922 }, { "epoch": 3.2556016597510373, "grad_norm": 13.872796058654785, "learning_rate": 1.8699087136929462e-05, "loss": 1.297, "step": 3923 }, { "epoch": 3.2564315352697095, "grad_norm": 8.361647605895996, "learning_rate": 1.8698755186721994e-05, "loss": 1.2596, "step": 3924 }, { "epoch": 3.2572614107883817, "grad_norm": 10.386218070983887, "learning_rate": 1.8698423236514523e-05, "loss": 1.4699, "step": 3925 }, { "epoch": 3.258091286307054, "grad_norm": 14.395611763000488, "learning_rate": 1.8698091286307055e-05, "loss": 2.0731, "step": 3926 }, { "epoch": 3.258921161825726, "grad_norm": 11.789512634277344, "learning_rate": 1.8697759336099587e-05, "loss": 1.8599, "step": 3927 }, { "epoch": 3.2597510373443983, "grad_norm": 10.868023872375488, "learning_rate": 1.8697427385892116e-05, "loss": 1.1322, "step": 3928 }, { "epoch": 3.2605809128630705, "grad_norm": 13.636320114135742, "learning_rate": 1.8697095435684648e-05, "loss": 1.5359, "step": 3929 }, { "epoch": 3.2614107883817427, "grad_norm": 8.483131408691406, "learning_rate": 1.869676348547718e-05, "loss": 1.667, "step": 3930 }, { "epoch": 3.262240663900415, "grad_norm": 13.364006042480469, "learning_rate": 1.869643153526971e-05, "loss": 1.7845, "step": 3931 }, { "epoch": 3.263070539419087, "grad_norm": 11.553585052490234, "learning_rate": 1.869609958506224e-05, "loss": 1.7636, "step": 3932 }, { "epoch": 3.2639004149377593, "grad_norm": 8.443575859069824, "learning_rate": 1.8695767634854773e-05, "loss": 1.1347, "step": 3933 }, { "epoch": 3.2647302904564315, "grad_norm": 11.486900329589844, "learning_rate": 1.8695435684647305e-05, "loss": 1.3012, "step": 3934 }, { "epoch": 3.2655601659751037, "grad_norm": 10.103445053100586, "learning_rate": 1.8695103734439834e-05, "loss": 1.1902, "step": 3935 }, { "epoch": 3.266390041493776, "grad_norm": 12.202069282531738, "learning_rate": 1.8694771784232366e-05, "loss": 1.3563, "step": 3936 }, { "epoch": 3.267219917012448, "grad_norm": 10.461334228515625, "learning_rate": 1.8694439834024898e-05, "loss": 1.6618, "step": 3937 }, { "epoch": 3.2680497925311203, "grad_norm": 11.23230266571045, "learning_rate": 1.869410788381743e-05, "loss": 1.5123, "step": 3938 }, { "epoch": 3.2688796680497925, "grad_norm": 8.953450202941895, "learning_rate": 1.8693775933609962e-05, "loss": 1.0805, "step": 3939 }, { "epoch": 3.2697095435684647, "grad_norm": 8.795641899108887, "learning_rate": 1.869344398340249e-05, "loss": 1.1588, "step": 3940 }, { "epoch": 3.270539419087137, "grad_norm": 12.391861915588379, "learning_rate": 1.8693112033195023e-05, "loss": 1.0882, "step": 3941 }, { "epoch": 3.271369294605809, "grad_norm": 14.717439651489258, "learning_rate": 1.8692780082987555e-05, "loss": 1.2272, "step": 3942 }, { "epoch": 3.2721991701244812, "grad_norm": 14.206424713134766, "learning_rate": 1.8692448132780084e-05, "loss": 1.7749, "step": 3943 }, { "epoch": 3.2730290456431534, "grad_norm": 10.089107513427734, "learning_rate": 1.8692116182572616e-05, "loss": 1.8196, "step": 3944 }, { "epoch": 3.2738589211618256, "grad_norm": 11.05600357055664, "learning_rate": 1.8691784232365148e-05, "loss": 1.6854, "step": 3945 }, { "epoch": 3.274688796680498, "grad_norm": 13.462288856506348, "learning_rate": 1.8691452282157677e-05, "loss": 1.4993, "step": 3946 }, { "epoch": 3.27551867219917, "grad_norm": 12.603726387023926, "learning_rate": 1.869112033195021e-05, "loss": 0.9963, "step": 3947 }, { "epoch": 3.2763485477178422, "grad_norm": 10.406051635742188, "learning_rate": 1.8690788381742738e-05, "loss": 1.5579, "step": 3948 }, { "epoch": 3.2771784232365144, "grad_norm": 8.171342849731445, "learning_rate": 1.869045643153527e-05, "loss": 1.2664, "step": 3949 }, { "epoch": 3.2780082987551866, "grad_norm": 11.552242279052734, "learning_rate": 1.8690124481327802e-05, "loss": 1.6256, "step": 3950 }, { "epoch": 3.278838174273859, "grad_norm": 6.521785259246826, "learning_rate": 1.8689792531120334e-05, "loss": 1.107, "step": 3951 }, { "epoch": 3.279668049792531, "grad_norm": 11.364466667175293, "learning_rate": 1.8689460580912863e-05, "loss": 2.144, "step": 3952 }, { "epoch": 3.280497925311203, "grad_norm": 10.378761291503906, "learning_rate": 1.8689128630705395e-05, "loss": 1.2009, "step": 3953 }, { "epoch": 3.2813278008298754, "grad_norm": 11.367013931274414, "learning_rate": 1.8688796680497927e-05, "loss": 1.2144, "step": 3954 }, { "epoch": 3.2821576763485476, "grad_norm": 10.438924789428711, "learning_rate": 1.868846473029046e-05, "loss": 1.6866, "step": 3955 }, { "epoch": 3.28298755186722, "grad_norm": 9.125081062316895, "learning_rate": 1.8688132780082988e-05, "loss": 1.4972, "step": 3956 }, { "epoch": 3.283817427385892, "grad_norm": 14.231449127197266, "learning_rate": 1.868780082987552e-05, "loss": 1.275, "step": 3957 }, { "epoch": 3.284647302904564, "grad_norm": 12.417224884033203, "learning_rate": 1.8687468879668052e-05, "loss": 1.6652, "step": 3958 }, { "epoch": 3.2854771784232364, "grad_norm": 14.766655921936035, "learning_rate": 1.8687136929460584e-05, "loss": 1.2714, "step": 3959 }, { "epoch": 3.2863070539419086, "grad_norm": 14.985539436340332, "learning_rate": 1.8686804979253113e-05, "loss": 2.0476, "step": 3960 }, { "epoch": 3.287136929460581, "grad_norm": 12.035087585449219, "learning_rate": 1.8686473029045645e-05, "loss": 1.9392, "step": 3961 }, { "epoch": 3.287966804979253, "grad_norm": 12.548690795898438, "learning_rate": 1.8686141078838177e-05, "loss": 1.3107, "step": 3962 }, { "epoch": 3.288796680497925, "grad_norm": 15.727429389953613, "learning_rate": 1.868580912863071e-05, "loss": 1.9933, "step": 3963 }, { "epoch": 3.2896265560165974, "grad_norm": 12.103107452392578, "learning_rate": 1.8685477178423238e-05, "loss": 1.5443, "step": 3964 }, { "epoch": 3.2904564315352696, "grad_norm": 12.795783042907715, "learning_rate": 1.868514522821577e-05, "loss": 1.5429, "step": 3965 }, { "epoch": 3.2912863070539418, "grad_norm": 10.938614845275879, "learning_rate": 1.86848132780083e-05, "loss": 1.5868, "step": 3966 }, { "epoch": 3.292116182572614, "grad_norm": 8.78968620300293, "learning_rate": 1.868448132780083e-05, "loss": 1.0846, "step": 3967 }, { "epoch": 3.292946058091286, "grad_norm": 12.877483367919922, "learning_rate": 1.8684149377593363e-05, "loss": 1.0112, "step": 3968 }, { "epoch": 3.2937759336099584, "grad_norm": 9.26638412475586, "learning_rate": 1.868381742738589e-05, "loss": 1.4472, "step": 3969 }, { "epoch": 3.2946058091286305, "grad_norm": 11.185019493103027, "learning_rate": 1.8683485477178424e-05, "loss": 1.5905, "step": 3970 }, { "epoch": 3.2954356846473027, "grad_norm": 12.838313102722168, "learning_rate": 1.8683153526970956e-05, "loss": 1.484, "step": 3971 }, { "epoch": 3.296265560165975, "grad_norm": 11.181231498718262, "learning_rate": 1.8682821576763488e-05, "loss": 1.6642, "step": 3972 }, { "epoch": 3.297095435684647, "grad_norm": 15.669758796691895, "learning_rate": 1.8682489626556017e-05, "loss": 1.2692, "step": 3973 }, { "epoch": 3.2979253112033193, "grad_norm": 10.94751262664795, "learning_rate": 1.868215767634855e-05, "loss": 1.4458, "step": 3974 }, { "epoch": 3.2987551867219915, "grad_norm": 18.403709411621094, "learning_rate": 1.868182572614108e-05, "loss": 1.9684, "step": 3975 }, { "epoch": 3.2995850622406637, "grad_norm": 10.934036254882812, "learning_rate": 1.8681493775933613e-05, "loss": 0.9646, "step": 3976 }, { "epoch": 3.300414937759336, "grad_norm": 6.905263900756836, "learning_rate": 1.8681161825726142e-05, "loss": 1.2285, "step": 3977 }, { "epoch": 3.301244813278008, "grad_norm": 12.408951759338379, "learning_rate": 1.8680829875518674e-05, "loss": 1.8249, "step": 3978 }, { "epoch": 3.3020746887966803, "grad_norm": 14.830741882324219, "learning_rate": 1.8680497925311206e-05, "loss": 1.3153, "step": 3979 }, { "epoch": 3.3029045643153525, "grad_norm": 14.10616683959961, "learning_rate": 1.8680165975103738e-05, "loss": 1.0993, "step": 3980 }, { "epoch": 3.3037344398340247, "grad_norm": 12.484282493591309, "learning_rate": 1.8679834024896267e-05, "loss": 1.6962, "step": 3981 }, { "epoch": 3.304564315352697, "grad_norm": 11.676020622253418, "learning_rate": 1.86795020746888e-05, "loss": 1.425, "step": 3982 }, { "epoch": 3.305394190871369, "grad_norm": 8.79667854309082, "learning_rate": 1.867917012448133e-05, "loss": 1.1319, "step": 3983 }, { "epoch": 3.3062240663900413, "grad_norm": 10.929120063781738, "learning_rate": 1.867883817427386e-05, "loss": 0.8129, "step": 3984 }, { "epoch": 3.3070539419087135, "grad_norm": 12.181342124938965, "learning_rate": 1.8678506224066392e-05, "loss": 1.751, "step": 3985 }, { "epoch": 3.3078838174273857, "grad_norm": 10.059220314025879, "learning_rate": 1.8678174273858924e-05, "loss": 1.1996, "step": 3986 }, { "epoch": 3.308713692946058, "grad_norm": 11.433343887329102, "learning_rate": 1.8677842323651453e-05, "loss": 1.2979, "step": 3987 }, { "epoch": 3.30954356846473, "grad_norm": 15.895087242126465, "learning_rate": 1.8677510373443985e-05, "loss": 2.3636, "step": 3988 }, { "epoch": 3.3103734439834023, "grad_norm": 13.344529151916504, "learning_rate": 1.8677178423236513e-05, "loss": 1.893, "step": 3989 }, { "epoch": 3.3112033195020745, "grad_norm": 9.520231246948242, "learning_rate": 1.8676846473029046e-05, "loss": 1.1828, "step": 3990 }, { "epoch": 3.3120331950207467, "grad_norm": 10.66016960144043, "learning_rate": 1.8676514522821578e-05, "loss": 0.756, "step": 3991 }, { "epoch": 3.312863070539419, "grad_norm": 13.426664352416992, "learning_rate": 1.867618257261411e-05, "loss": 1.0914, "step": 3992 }, { "epoch": 3.313692946058091, "grad_norm": 13.570053100585938, "learning_rate": 1.8675850622406642e-05, "loss": 1.4427, "step": 3993 }, { "epoch": 3.3145228215767633, "grad_norm": 19.506576538085938, "learning_rate": 1.867551867219917e-05, "loss": 2.1513, "step": 3994 }, { "epoch": 3.3153526970954355, "grad_norm": 11.897796630859375, "learning_rate": 1.8675186721991703e-05, "loss": 1.778, "step": 3995 }, { "epoch": 3.3161825726141076, "grad_norm": 9.59802532196045, "learning_rate": 1.8674854771784235e-05, "loss": 1.6921, "step": 3996 }, { "epoch": 3.31701244813278, "grad_norm": 9.83110237121582, "learning_rate": 1.8674522821576767e-05, "loss": 1.3242, "step": 3997 }, { "epoch": 3.317842323651452, "grad_norm": 11.736534118652344, "learning_rate": 1.8674190871369296e-05, "loss": 1.5477, "step": 3998 }, { "epoch": 3.3186721991701242, "grad_norm": 12.145337104797363, "learning_rate": 1.8673858921161828e-05, "loss": 1.1888, "step": 3999 }, { "epoch": 3.3195020746887964, "grad_norm": 8.994376182556152, "learning_rate": 1.867352697095436e-05, "loss": 0.9667, "step": 4000 }, { "epoch": 3.320331950207469, "grad_norm": 14.810153007507324, "learning_rate": 1.8673195020746892e-05, "loss": 2.2836, "step": 4001 }, { "epoch": 3.3211618257261413, "grad_norm": 10.01926040649414, "learning_rate": 1.867286307053942e-05, "loss": 1.5881, "step": 4002 }, { "epoch": 3.3219917012448135, "grad_norm": 10.702919006347656, "learning_rate": 1.8672531120331953e-05, "loss": 1.1057, "step": 4003 }, { "epoch": 3.3228215767634857, "grad_norm": 10.117610931396484, "learning_rate": 1.867219917012448e-05, "loss": 1.0184, "step": 4004 }, { "epoch": 3.323651452282158, "grad_norm": 11.402552604675293, "learning_rate": 1.8671867219917014e-05, "loss": 0.8361, "step": 4005 }, { "epoch": 3.32448132780083, "grad_norm": 11.6742525100708, "learning_rate": 1.8671535269709546e-05, "loss": 1.4305, "step": 4006 }, { "epoch": 3.3253112033195023, "grad_norm": 8.219328880310059, "learning_rate": 1.8671203319502074e-05, "loss": 1.1857, "step": 4007 }, { "epoch": 3.3261410788381744, "grad_norm": 13.623161315917969, "learning_rate": 1.8670871369294607e-05, "loss": 1.5241, "step": 4008 }, { "epoch": 3.3269709543568466, "grad_norm": 9.776506423950195, "learning_rate": 1.867053941908714e-05, "loss": 1.6202, "step": 4009 }, { "epoch": 3.327800829875519, "grad_norm": 18.511945724487305, "learning_rate": 1.8670207468879667e-05, "loss": 1.2923, "step": 4010 }, { "epoch": 3.328630705394191, "grad_norm": 12.197259902954102, "learning_rate": 1.86698755186722e-05, "loss": 1.6832, "step": 4011 }, { "epoch": 3.3294605809128632, "grad_norm": 11.233154296875, "learning_rate": 1.866954356846473e-05, "loss": 1.1748, "step": 4012 }, { "epoch": 3.3302904564315354, "grad_norm": 9.930086135864258, "learning_rate": 1.8669211618257264e-05, "loss": 1.2362, "step": 4013 }, { "epoch": 3.3311203319502076, "grad_norm": 11.763325691223145, "learning_rate": 1.8668879668049792e-05, "loss": 1.4562, "step": 4014 }, { "epoch": 3.33195020746888, "grad_norm": 16.177976608276367, "learning_rate": 1.8668547717842325e-05, "loss": 1.2746, "step": 4015 }, { "epoch": 3.332780082987552, "grad_norm": 14.136348724365234, "learning_rate": 1.8668215767634857e-05, "loss": 1.1677, "step": 4016 }, { "epoch": 3.333609958506224, "grad_norm": 14.456006050109863, "learning_rate": 1.866788381742739e-05, "loss": 1.4386, "step": 4017 }, { "epoch": 3.3344398340248964, "grad_norm": 13.991820335388184, "learning_rate": 1.866755186721992e-05, "loss": 1.357, "step": 4018 }, { "epoch": 3.3352697095435686, "grad_norm": 8.694933891296387, "learning_rate": 1.866721991701245e-05, "loss": 0.9386, "step": 4019 }, { "epoch": 3.336099585062241, "grad_norm": 12.662911415100098, "learning_rate": 1.8666887966804982e-05, "loss": 1.2139, "step": 4020 }, { "epoch": 3.336929460580913, "grad_norm": 17.635066986083984, "learning_rate": 1.8666556016597514e-05, "loss": 1.2104, "step": 4021 }, { "epoch": 3.337759336099585, "grad_norm": 12.070723533630371, "learning_rate": 1.8666224066390043e-05, "loss": 1.2431, "step": 4022 }, { "epoch": 3.3385892116182574, "grad_norm": 12.314764022827148, "learning_rate": 1.8665892116182575e-05, "loss": 1.4981, "step": 4023 }, { "epoch": 3.3394190871369296, "grad_norm": 12.453583717346191, "learning_rate": 1.8665560165975107e-05, "loss": 1.098, "step": 4024 }, { "epoch": 3.340248962655602, "grad_norm": 8.646550178527832, "learning_rate": 1.8665228215767635e-05, "loss": 0.923, "step": 4025 }, { "epoch": 3.341078838174274, "grad_norm": 14.916499137878418, "learning_rate": 1.8664896265560168e-05, "loss": 1.1561, "step": 4026 }, { "epoch": 3.341908713692946, "grad_norm": 13.542882919311523, "learning_rate": 1.8664564315352696e-05, "loss": 1.991, "step": 4027 }, { "epoch": 3.3427385892116184, "grad_norm": 10.654802322387695, "learning_rate": 1.866423236514523e-05, "loss": 1.6475, "step": 4028 }, { "epoch": 3.3435684647302906, "grad_norm": 14.584741592407227, "learning_rate": 1.866390041493776e-05, "loss": 1.4754, "step": 4029 }, { "epoch": 3.3443983402489628, "grad_norm": 13.881878852844238, "learning_rate": 1.8663568464730293e-05, "loss": 1.592, "step": 4030 }, { "epoch": 3.345228215767635, "grad_norm": 11.829785346984863, "learning_rate": 1.866323651452282e-05, "loss": 1.2885, "step": 4031 }, { "epoch": 3.346058091286307, "grad_norm": 11.123085021972656, "learning_rate": 1.8662904564315353e-05, "loss": 1.2231, "step": 4032 }, { "epoch": 3.3468879668049794, "grad_norm": 12.362848281860352, "learning_rate": 1.8662572614107886e-05, "loss": 1.1732, "step": 4033 }, { "epoch": 3.3477178423236515, "grad_norm": 9.931136131286621, "learning_rate": 1.8662240663900418e-05, "loss": 1.1051, "step": 4034 }, { "epoch": 3.3485477178423237, "grad_norm": 8.20693588256836, "learning_rate": 1.8661908713692946e-05, "loss": 1.2895, "step": 4035 }, { "epoch": 3.349377593360996, "grad_norm": 11.973498344421387, "learning_rate": 1.866157676348548e-05, "loss": 1.5763, "step": 4036 }, { "epoch": 3.350207468879668, "grad_norm": 16.617549896240234, "learning_rate": 1.866124481327801e-05, "loss": 1.6231, "step": 4037 }, { "epoch": 3.3510373443983403, "grad_norm": 17.587688446044922, "learning_rate": 1.8660912863070543e-05, "loss": 1.889, "step": 4038 }, { "epoch": 3.3518672199170125, "grad_norm": 16.147218704223633, "learning_rate": 1.866058091286307e-05, "loss": 1.3042, "step": 4039 }, { "epoch": 3.3526970954356847, "grad_norm": 10.58631420135498, "learning_rate": 1.8660248962655604e-05, "loss": 1.1077, "step": 4040 }, { "epoch": 3.353526970954357, "grad_norm": 12.228683471679688, "learning_rate": 1.8659917012448136e-05, "loss": 1.5095, "step": 4041 }, { "epoch": 3.354356846473029, "grad_norm": 17.027578353881836, "learning_rate": 1.8659585062240664e-05, "loss": 1.8516, "step": 4042 }, { "epoch": 3.3551867219917013, "grad_norm": 14.754700660705566, "learning_rate": 1.8659253112033196e-05, "loss": 1.6734, "step": 4043 }, { "epoch": 3.3560165975103735, "grad_norm": 15.8501558303833, "learning_rate": 1.865892116182573e-05, "loss": 0.9507, "step": 4044 }, { "epoch": 3.3568464730290457, "grad_norm": 10.734025001525879, "learning_rate": 1.8658589211618257e-05, "loss": 1.4282, "step": 4045 }, { "epoch": 3.357676348547718, "grad_norm": 16.2228946685791, "learning_rate": 1.865825726141079e-05, "loss": 1.2635, "step": 4046 }, { "epoch": 3.35850622406639, "grad_norm": 15.111655235290527, "learning_rate": 1.865792531120332e-05, "loss": 1.7686, "step": 4047 }, { "epoch": 3.3593360995850623, "grad_norm": 10.29537296295166, "learning_rate": 1.865759336099585e-05, "loss": 1.2263, "step": 4048 }, { "epoch": 3.3601659751037345, "grad_norm": 9.200590133666992, "learning_rate": 1.8657261410788382e-05, "loss": 1.3318, "step": 4049 }, { "epoch": 3.3609958506224067, "grad_norm": 17.713146209716797, "learning_rate": 1.8656929460580914e-05, "loss": 1.4075, "step": 4050 }, { "epoch": 3.361825726141079, "grad_norm": 13.422170639038086, "learning_rate": 1.8656597510373447e-05, "loss": 1.2469, "step": 4051 }, { "epoch": 3.362655601659751, "grad_norm": 11.765008926391602, "learning_rate": 1.8656265560165975e-05, "loss": 1.4792, "step": 4052 }, { "epoch": 3.3634854771784233, "grad_norm": 14.354348182678223, "learning_rate": 1.8655933609958507e-05, "loss": 1.5468, "step": 4053 }, { "epoch": 3.3643153526970955, "grad_norm": 7.5888824462890625, "learning_rate": 1.865560165975104e-05, "loss": 1.3855, "step": 4054 }, { "epoch": 3.3651452282157677, "grad_norm": 10.247099876403809, "learning_rate": 1.865526970954357e-05, "loss": 1.1433, "step": 4055 }, { "epoch": 3.36597510373444, "grad_norm": 9.285201072692871, "learning_rate": 1.86549377593361e-05, "loss": 1.4198, "step": 4056 }, { "epoch": 3.366804979253112, "grad_norm": 14.699231147766113, "learning_rate": 1.8654605809128632e-05, "loss": 1.6845, "step": 4057 }, { "epoch": 3.3676348547717843, "grad_norm": 13.65155029296875, "learning_rate": 1.8654273858921165e-05, "loss": 1.1928, "step": 4058 }, { "epoch": 3.3684647302904565, "grad_norm": 11.780854225158691, "learning_rate": 1.8653941908713697e-05, "loss": 1.1876, "step": 4059 }, { "epoch": 3.3692946058091287, "grad_norm": 9.52560806274414, "learning_rate": 1.8653609958506225e-05, "loss": 1.2228, "step": 4060 }, { "epoch": 3.370124481327801, "grad_norm": 12.150862693786621, "learning_rate": 1.8653278008298757e-05, "loss": 1.7846, "step": 4061 }, { "epoch": 3.370954356846473, "grad_norm": 15.191167831420898, "learning_rate": 1.865294605809129e-05, "loss": 1.1603, "step": 4062 }, { "epoch": 3.3717842323651452, "grad_norm": 12.055506706237793, "learning_rate": 1.8652614107883818e-05, "loss": 1.7697, "step": 4063 }, { "epoch": 3.3726141078838174, "grad_norm": 8.853070259094238, "learning_rate": 1.865228215767635e-05, "loss": 1.4561, "step": 4064 }, { "epoch": 3.3734439834024896, "grad_norm": 11.120927810668945, "learning_rate": 1.865195020746888e-05, "loss": 1.7443, "step": 4065 }, { "epoch": 3.374273858921162, "grad_norm": 17.860986709594727, "learning_rate": 1.865161825726141e-05, "loss": 1.7173, "step": 4066 }, { "epoch": 3.375103734439834, "grad_norm": 9.929632186889648, "learning_rate": 1.8651286307053943e-05, "loss": 1.647, "step": 4067 }, { "epoch": 3.375933609958506, "grad_norm": 11.829453468322754, "learning_rate": 1.8650954356846472e-05, "loss": 1.3723, "step": 4068 }, { "epoch": 3.3767634854771784, "grad_norm": 15.81595516204834, "learning_rate": 1.8650622406639004e-05, "loss": 2.0285, "step": 4069 }, { "epoch": 3.3775933609958506, "grad_norm": 18.06222915649414, "learning_rate": 1.8650290456431536e-05, "loss": 2.1583, "step": 4070 }, { "epoch": 3.378423236514523, "grad_norm": 20.48339080810547, "learning_rate": 1.864995850622407e-05, "loss": 2.0318, "step": 4071 }, { "epoch": 3.379253112033195, "grad_norm": 11.798038482666016, "learning_rate": 1.86496265560166e-05, "loss": 1.3877, "step": 4072 }, { "epoch": 3.380082987551867, "grad_norm": 19.514522552490234, "learning_rate": 1.864929460580913e-05, "loss": 1.6158, "step": 4073 }, { "epoch": 3.3809128630705394, "grad_norm": 10.437124252319336, "learning_rate": 1.864896265560166e-05, "loss": 1.8285, "step": 4074 }, { "epoch": 3.3817427385892116, "grad_norm": 12.593973159790039, "learning_rate": 1.8648630705394193e-05, "loss": 1.8317, "step": 4075 }, { "epoch": 3.382572614107884, "grad_norm": 14.037131309509277, "learning_rate": 1.8648298755186726e-05, "loss": 2.1149, "step": 4076 }, { "epoch": 3.383402489626556, "grad_norm": 13.712749481201172, "learning_rate": 1.8647966804979254e-05, "loss": 1.4581, "step": 4077 }, { "epoch": 3.384232365145228, "grad_norm": 8.679749488830566, "learning_rate": 1.8647634854771786e-05, "loss": 0.8858, "step": 4078 }, { "epoch": 3.3850622406639004, "grad_norm": 8.781000137329102, "learning_rate": 1.864730290456432e-05, "loss": 1.0103, "step": 4079 }, { "epoch": 3.3858921161825726, "grad_norm": 8.21993637084961, "learning_rate": 1.864697095435685e-05, "loss": 1.0412, "step": 4080 }, { "epoch": 3.3867219917012448, "grad_norm": 9.703584671020508, "learning_rate": 1.864663900414938e-05, "loss": 1.548, "step": 4081 }, { "epoch": 3.387551867219917, "grad_norm": 12.570663452148438, "learning_rate": 1.864630705394191e-05, "loss": 1.9948, "step": 4082 }, { "epoch": 3.388381742738589, "grad_norm": 7.623225688934326, "learning_rate": 1.864597510373444e-05, "loss": 1.1989, "step": 4083 }, { "epoch": 3.3892116182572614, "grad_norm": 8.8183012008667, "learning_rate": 1.8645643153526972e-05, "loss": 1.2666, "step": 4084 }, { "epoch": 3.3900414937759336, "grad_norm": 12.076203346252441, "learning_rate": 1.8645311203319504e-05, "loss": 1.8196, "step": 4085 }, { "epoch": 3.3908713692946058, "grad_norm": 12.458266258239746, "learning_rate": 1.8644979253112033e-05, "loss": 2.1569, "step": 4086 }, { "epoch": 3.391701244813278, "grad_norm": 11.951115608215332, "learning_rate": 1.8644647302904565e-05, "loss": 1.8171, "step": 4087 }, { "epoch": 3.39253112033195, "grad_norm": 10.43995189666748, "learning_rate": 1.8644315352697097e-05, "loss": 1.3267, "step": 4088 }, { "epoch": 3.3933609958506223, "grad_norm": 14.599634170532227, "learning_rate": 1.8643983402489626e-05, "loss": 2.0464, "step": 4089 }, { "epoch": 3.3941908713692945, "grad_norm": 12.557828903198242, "learning_rate": 1.8643651452282158e-05, "loss": 1.1412, "step": 4090 }, { "epoch": 3.3950207468879667, "grad_norm": 13.917923927307129, "learning_rate": 1.864331950207469e-05, "loss": 2.3012, "step": 4091 }, { "epoch": 3.395850622406639, "grad_norm": 12.288287162780762, "learning_rate": 1.8642987551867222e-05, "loss": 1.6875, "step": 4092 }, { "epoch": 3.396680497925311, "grad_norm": 9.810935020446777, "learning_rate": 1.864265560165975e-05, "loss": 1.1999, "step": 4093 }, { "epoch": 3.3975103734439833, "grad_norm": 15.423853874206543, "learning_rate": 1.8642323651452283e-05, "loss": 1.3567, "step": 4094 }, { "epoch": 3.3983402489626555, "grad_norm": 13.142157554626465, "learning_rate": 1.8641991701244815e-05, "loss": 1.5848, "step": 4095 }, { "epoch": 3.3991701244813277, "grad_norm": 16.48990821838379, "learning_rate": 1.8641659751037347e-05, "loss": 2.3146, "step": 4096 }, { "epoch": 3.4, "grad_norm": 27.015073776245117, "learning_rate": 1.864132780082988e-05, "loss": 2.1412, "step": 4097 }, { "epoch": 3.400829875518672, "grad_norm": 11.419520378112793, "learning_rate": 1.8640995850622408e-05, "loss": 0.885, "step": 4098 }, { "epoch": 3.4016597510373443, "grad_norm": 8.912713050842285, "learning_rate": 1.864066390041494e-05, "loss": 1.2417, "step": 4099 }, { "epoch": 3.4024896265560165, "grad_norm": 16.22442626953125, "learning_rate": 1.8640331950207472e-05, "loss": 1.4638, "step": 4100 }, { "epoch": 3.4033195020746887, "grad_norm": 6.976986408233643, "learning_rate": 1.864e-05, "loss": 1.1014, "step": 4101 }, { "epoch": 3.404149377593361, "grad_norm": 17.482513427734375, "learning_rate": 1.8639668049792533e-05, "loss": 1.556, "step": 4102 }, { "epoch": 3.404979253112033, "grad_norm": 7.512611389160156, "learning_rate": 1.8639336099585062e-05, "loss": 1.0468, "step": 4103 }, { "epoch": 3.4058091286307053, "grad_norm": 9.121601104736328, "learning_rate": 1.8639004149377594e-05, "loss": 1.0487, "step": 4104 }, { "epoch": 3.4066390041493775, "grad_norm": 14.5038423538208, "learning_rate": 1.8638672199170126e-05, "loss": 2.0779, "step": 4105 }, { "epoch": 3.4074688796680497, "grad_norm": 10.86966609954834, "learning_rate": 1.8638340248962655e-05, "loss": 1.5936, "step": 4106 }, { "epoch": 3.408298755186722, "grad_norm": 10.7008056640625, "learning_rate": 1.8638008298755187e-05, "loss": 1.4243, "step": 4107 }, { "epoch": 3.409128630705394, "grad_norm": 16.189268112182617, "learning_rate": 1.863767634854772e-05, "loss": 2.1728, "step": 4108 }, { "epoch": 3.4099585062240663, "grad_norm": 9.557831764221191, "learning_rate": 1.863734439834025e-05, "loss": 0.7674, "step": 4109 }, { "epoch": 3.4107883817427385, "grad_norm": 9.406899452209473, "learning_rate": 1.863701244813278e-05, "loss": 1.1251, "step": 4110 }, { "epoch": 3.4116182572614107, "grad_norm": 21.986248016357422, "learning_rate": 1.8636680497925312e-05, "loss": 1.7143, "step": 4111 }, { "epoch": 3.412448132780083, "grad_norm": 15.749984741210938, "learning_rate": 1.8636348547717844e-05, "loss": 1.197, "step": 4112 }, { "epoch": 3.413278008298755, "grad_norm": 10.691671371459961, "learning_rate": 1.8636016597510376e-05, "loss": 1.3775, "step": 4113 }, { "epoch": 3.4141078838174272, "grad_norm": 9.782275199890137, "learning_rate": 1.8635684647302905e-05, "loss": 1.3769, "step": 4114 }, { "epoch": 3.4149377593360994, "grad_norm": 14.391514778137207, "learning_rate": 1.8635352697095437e-05, "loss": 2.0998, "step": 4115 }, { "epoch": 3.4157676348547716, "grad_norm": 12.320795059204102, "learning_rate": 1.863502074688797e-05, "loss": 0.9757, "step": 4116 }, { "epoch": 3.416597510373444, "grad_norm": 11.311616897583008, "learning_rate": 1.86346887966805e-05, "loss": 1.7313, "step": 4117 }, { "epoch": 3.417427385892116, "grad_norm": 11.604686737060547, "learning_rate": 1.863435684647303e-05, "loss": 1.4358, "step": 4118 }, { "epoch": 3.4182572614107882, "grad_norm": 17.880525588989258, "learning_rate": 1.8634024896265562e-05, "loss": 1.7359, "step": 4119 }, { "epoch": 3.4190871369294604, "grad_norm": 12.261286735534668, "learning_rate": 1.8633692946058094e-05, "loss": 2.0855, "step": 4120 }, { "epoch": 3.4199170124481326, "grad_norm": 10.865216255187988, "learning_rate": 1.8633360995850623e-05, "loss": 0.8847, "step": 4121 }, { "epoch": 3.420746887966805, "grad_norm": 10.937309265136719, "learning_rate": 1.8633029045643155e-05, "loss": 1.5337, "step": 4122 }, { "epoch": 3.421576763485477, "grad_norm": 11.799428939819336, "learning_rate": 1.8632697095435687e-05, "loss": 1.6957, "step": 4123 }, { "epoch": 3.422406639004149, "grad_norm": 13.23399829864502, "learning_rate": 1.8632365145228216e-05, "loss": 1.6563, "step": 4124 }, { "epoch": 3.4232365145228214, "grad_norm": 12.857365608215332, "learning_rate": 1.8632033195020748e-05, "loss": 1.0856, "step": 4125 }, { "epoch": 3.4240663900414936, "grad_norm": 15.55430793762207, "learning_rate": 1.863170124481328e-05, "loss": 2.2572, "step": 4126 }, { "epoch": 3.424896265560166, "grad_norm": 10.076264381408691, "learning_rate": 1.863136929460581e-05, "loss": 1.5468, "step": 4127 }, { "epoch": 3.425726141078838, "grad_norm": 12.729403495788574, "learning_rate": 1.863103734439834e-05, "loss": 1.391, "step": 4128 }, { "epoch": 3.42655601659751, "grad_norm": 9.588005065917969, "learning_rate": 1.8630705394190873e-05, "loss": 1.4981, "step": 4129 }, { "epoch": 3.4273858921161824, "grad_norm": 13.679415702819824, "learning_rate": 1.8630373443983405e-05, "loss": 0.8493, "step": 4130 }, { "epoch": 3.4282157676348546, "grad_norm": 18.567096710205078, "learning_rate": 1.8630041493775934e-05, "loss": 1.9726, "step": 4131 }, { "epoch": 3.429045643153527, "grad_norm": 12.490508079528809, "learning_rate": 1.8629709543568466e-05, "loss": 1.2212, "step": 4132 }, { "epoch": 3.4298755186721994, "grad_norm": 10.531415939331055, "learning_rate": 1.8629377593360998e-05, "loss": 0.9882, "step": 4133 }, { "epoch": 3.4307053941908716, "grad_norm": 8.73449420928955, "learning_rate": 1.862904564315353e-05, "loss": 1.0593, "step": 4134 }, { "epoch": 3.431535269709544, "grad_norm": 8.738554954528809, "learning_rate": 1.862871369294606e-05, "loss": 1.1493, "step": 4135 }, { "epoch": 3.432365145228216, "grad_norm": 10.901997566223145, "learning_rate": 1.862838174273859e-05, "loss": 1.3813, "step": 4136 }, { "epoch": 3.433195020746888, "grad_norm": 13.512003898620605, "learning_rate": 1.8628049792531123e-05, "loss": 1.5912, "step": 4137 }, { "epoch": 3.4340248962655604, "grad_norm": 10.989665031433105, "learning_rate": 1.8627717842323655e-05, "loss": 1.5611, "step": 4138 }, { "epoch": 3.4348547717842326, "grad_norm": 15.917702674865723, "learning_rate": 1.8627385892116184e-05, "loss": 2.5863, "step": 4139 }, { "epoch": 3.435684647302905, "grad_norm": 10.767499923706055, "learning_rate": 1.8627053941908716e-05, "loss": 1.386, "step": 4140 }, { "epoch": 3.436514522821577, "grad_norm": 11.153655052185059, "learning_rate": 1.8626721991701248e-05, "loss": 1.5771, "step": 4141 }, { "epoch": 3.437344398340249, "grad_norm": 16.536428451538086, "learning_rate": 1.8626390041493777e-05, "loss": 1.3649, "step": 4142 }, { "epoch": 3.4381742738589214, "grad_norm": 12.519694328308105, "learning_rate": 1.862605809128631e-05, "loss": 1.2698, "step": 4143 }, { "epoch": 3.4390041493775936, "grad_norm": 15.268561363220215, "learning_rate": 1.8625726141078838e-05, "loss": 1.7723, "step": 4144 }, { "epoch": 3.4398340248962658, "grad_norm": 14.000271797180176, "learning_rate": 1.862539419087137e-05, "loss": 1.4639, "step": 4145 }, { "epoch": 3.440663900414938, "grad_norm": 10.392973899841309, "learning_rate": 1.8625062240663902e-05, "loss": 1.3622, "step": 4146 }, { "epoch": 3.44149377593361, "grad_norm": 9.631819725036621, "learning_rate": 1.862473029045643e-05, "loss": 0.7115, "step": 4147 }, { "epoch": 3.4423236514522824, "grad_norm": 11.595695495605469, "learning_rate": 1.8624398340248963e-05, "loss": 1.3632, "step": 4148 }, { "epoch": 3.4431535269709546, "grad_norm": 13.381875038146973, "learning_rate": 1.8624066390041495e-05, "loss": 1.5712, "step": 4149 }, { "epoch": 3.4439834024896268, "grad_norm": 10.2434663772583, "learning_rate": 1.8623734439834027e-05, "loss": 1.161, "step": 4150 }, { "epoch": 3.444813278008299, "grad_norm": 15.186600685119629, "learning_rate": 1.862340248962656e-05, "loss": 2.2619, "step": 4151 }, { "epoch": 3.445643153526971, "grad_norm": 10.010846138000488, "learning_rate": 1.8623070539419088e-05, "loss": 1.4526, "step": 4152 }, { "epoch": 3.4464730290456433, "grad_norm": 14.24757194519043, "learning_rate": 1.862273858921162e-05, "loss": 1.1739, "step": 4153 }, { "epoch": 3.4473029045643155, "grad_norm": 24.049715042114258, "learning_rate": 1.8622406639004152e-05, "loss": 1.7009, "step": 4154 }, { "epoch": 3.4481327800829877, "grad_norm": 9.606839179992676, "learning_rate": 1.8622074688796684e-05, "loss": 0.9016, "step": 4155 }, { "epoch": 3.44896265560166, "grad_norm": 20.564916610717773, "learning_rate": 1.8621742738589213e-05, "loss": 2.554, "step": 4156 }, { "epoch": 3.449792531120332, "grad_norm": 9.546016693115234, "learning_rate": 1.8621410788381745e-05, "loss": 1.4534, "step": 4157 }, { "epoch": 3.4506224066390043, "grad_norm": 12.283117294311523, "learning_rate": 1.8621078838174277e-05, "loss": 1.4126, "step": 4158 }, { "epoch": 3.4514522821576765, "grad_norm": 9.707690238952637, "learning_rate": 1.8620746887966806e-05, "loss": 1.0651, "step": 4159 }, { "epoch": 3.4522821576763487, "grad_norm": 25.47548484802246, "learning_rate": 1.8620414937759338e-05, "loss": 3.0134, "step": 4160 }, { "epoch": 3.453112033195021, "grad_norm": 18.03512191772461, "learning_rate": 1.862008298755187e-05, "loss": 2.1267, "step": 4161 }, { "epoch": 3.453941908713693, "grad_norm": 17.10879898071289, "learning_rate": 1.86197510373444e-05, "loss": 1.5726, "step": 4162 }, { "epoch": 3.4547717842323653, "grad_norm": 10.851736068725586, "learning_rate": 1.861941908713693e-05, "loss": 1.9682, "step": 4163 }, { "epoch": 3.4556016597510375, "grad_norm": 16.338478088378906, "learning_rate": 1.8619087136929463e-05, "loss": 1.5007, "step": 4164 }, { "epoch": 3.4564315352697097, "grad_norm": 16.018810272216797, "learning_rate": 1.861875518672199e-05, "loss": 2.0422, "step": 4165 }, { "epoch": 3.457261410788382, "grad_norm": 10.07244873046875, "learning_rate": 1.8618423236514524e-05, "loss": 1.1767, "step": 4166 }, { "epoch": 3.458091286307054, "grad_norm": 9.140935897827148, "learning_rate": 1.8618091286307056e-05, "loss": 1.0591, "step": 4167 }, { "epoch": 3.4589211618257263, "grad_norm": 16.61534309387207, "learning_rate": 1.8617759336099585e-05, "loss": 1.6327, "step": 4168 }, { "epoch": 3.4597510373443985, "grad_norm": 11.328654289245605, "learning_rate": 1.8617427385892117e-05, "loss": 1.3121, "step": 4169 }, { "epoch": 3.4605809128630707, "grad_norm": 8.03176212310791, "learning_rate": 1.861709543568465e-05, "loss": 1.2106, "step": 4170 }, { "epoch": 3.461410788381743, "grad_norm": 10.703593254089355, "learning_rate": 1.861676348547718e-05, "loss": 1.1775, "step": 4171 }, { "epoch": 3.462240663900415, "grad_norm": 13.896729469299316, "learning_rate": 1.861643153526971e-05, "loss": 1.7856, "step": 4172 }, { "epoch": 3.4630705394190873, "grad_norm": 11.046793937683105, "learning_rate": 1.8616099585062242e-05, "loss": 1.268, "step": 4173 }, { "epoch": 3.4639004149377595, "grad_norm": 14.77756118774414, "learning_rate": 1.8615767634854774e-05, "loss": 1.8036, "step": 4174 }, { "epoch": 3.4647302904564317, "grad_norm": 6.915499687194824, "learning_rate": 1.8615435684647306e-05, "loss": 0.8998, "step": 4175 }, { "epoch": 3.465560165975104, "grad_norm": 8.499076843261719, "learning_rate": 1.8615103734439838e-05, "loss": 1.1379, "step": 4176 }, { "epoch": 3.466390041493776, "grad_norm": 13.20510482788086, "learning_rate": 1.8614771784232367e-05, "loss": 1.1218, "step": 4177 }, { "epoch": 3.4672199170124482, "grad_norm": 15.698990821838379, "learning_rate": 1.86144398340249e-05, "loss": 1.5525, "step": 4178 }, { "epoch": 3.4680497925311204, "grad_norm": 13.503251075744629, "learning_rate": 1.861410788381743e-05, "loss": 1.4788, "step": 4179 }, { "epoch": 3.4688796680497926, "grad_norm": 12.051443099975586, "learning_rate": 1.861377593360996e-05, "loss": 1.4489, "step": 4180 }, { "epoch": 3.469709543568465, "grad_norm": 13.301304817199707, "learning_rate": 1.8613443983402492e-05, "loss": 1.6506, "step": 4181 }, { "epoch": 3.470539419087137, "grad_norm": 9.228447914123535, "learning_rate": 1.861311203319502e-05, "loss": 1.0593, "step": 4182 }, { "epoch": 3.4713692946058092, "grad_norm": 9.475942611694336, "learning_rate": 1.8612780082987553e-05, "loss": 0.9812, "step": 4183 }, { "epoch": 3.4721991701244814, "grad_norm": 7.586282730102539, "learning_rate": 1.8612448132780085e-05, "loss": 1.1279, "step": 4184 }, { "epoch": 3.4730290456431536, "grad_norm": 7.376452922821045, "learning_rate": 1.8612116182572613e-05, "loss": 0.8275, "step": 4185 }, { "epoch": 3.473858921161826, "grad_norm": 13.364314079284668, "learning_rate": 1.8611784232365146e-05, "loss": 2.0308, "step": 4186 }, { "epoch": 3.474688796680498, "grad_norm": 14.642224311828613, "learning_rate": 1.8611452282157678e-05, "loss": 1.5385, "step": 4187 }, { "epoch": 3.47551867219917, "grad_norm": 15.390666007995605, "learning_rate": 1.861112033195021e-05, "loss": 1.841, "step": 4188 }, { "epoch": 3.4763485477178424, "grad_norm": 16.64185905456543, "learning_rate": 1.861078838174274e-05, "loss": 1.8253, "step": 4189 }, { "epoch": 3.4771784232365146, "grad_norm": 8.729402542114258, "learning_rate": 1.861045643153527e-05, "loss": 1.5678, "step": 4190 }, { "epoch": 3.478008298755187, "grad_norm": 10.376811981201172, "learning_rate": 1.8610124481327803e-05, "loss": 1.3816, "step": 4191 }, { "epoch": 3.478838174273859, "grad_norm": 12.325413703918457, "learning_rate": 1.8609792531120335e-05, "loss": 1.1584, "step": 4192 }, { "epoch": 3.479668049792531, "grad_norm": 10.32645034790039, "learning_rate": 1.8609460580912864e-05, "loss": 1.3209, "step": 4193 }, { "epoch": 3.4804979253112034, "grad_norm": 13.286934852600098, "learning_rate": 1.8609128630705396e-05, "loss": 1.3702, "step": 4194 }, { "epoch": 3.4813278008298756, "grad_norm": 7.899598121643066, "learning_rate": 1.8608796680497928e-05, "loss": 0.7614, "step": 4195 }, { "epoch": 3.482157676348548, "grad_norm": 9.348714828491211, "learning_rate": 1.860846473029046e-05, "loss": 1.1445, "step": 4196 }, { "epoch": 3.48298755186722, "grad_norm": 12.814255714416504, "learning_rate": 1.860813278008299e-05, "loss": 1.7315, "step": 4197 }, { "epoch": 3.483817427385892, "grad_norm": 11.949858665466309, "learning_rate": 1.860780082987552e-05, "loss": 1.1463, "step": 4198 }, { "epoch": 3.4846473029045644, "grad_norm": 13.728779792785645, "learning_rate": 1.8607468879668053e-05, "loss": 1.486, "step": 4199 }, { "epoch": 3.4854771784232366, "grad_norm": 10.988884925842285, "learning_rate": 1.860713692946058e-05, "loss": 1.26, "step": 4200 }, { "epoch": 3.4863070539419088, "grad_norm": 12.28232192993164, "learning_rate": 1.8606804979253114e-05, "loss": 1.6591, "step": 4201 }, { "epoch": 3.487136929460581, "grad_norm": 11.273881912231445, "learning_rate": 1.8606473029045646e-05, "loss": 1.1729, "step": 4202 }, { "epoch": 3.487966804979253, "grad_norm": 9.488723754882812, "learning_rate": 1.8606141078838174e-05, "loss": 1.4952, "step": 4203 }, { "epoch": 3.4887966804979254, "grad_norm": 10.563736915588379, "learning_rate": 1.8605809128630707e-05, "loss": 1.17, "step": 4204 }, { "epoch": 3.4896265560165975, "grad_norm": 13.30685806274414, "learning_rate": 1.860547717842324e-05, "loss": 1.895, "step": 4205 }, { "epoch": 3.4904564315352697, "grad_norm": 15.139542579650879, "learning_rate": 1.8605145228215767e-05, "loss": 1.6449, "step": 4206 }, { "epoch": 3.491286307053942, "grad_norm": 13.578407287597656, "learning_rate": 1.86048132780083e-05, "loss": 1.332, "step": 4207 }, { "epoch": 3.492116182572614, "grad_norm": NaN, "learning_rate": 1.86048132780083e-05, "loss": 2.5135, "step": 4208 }, { "epoch": 3.4929460580912863, "grad_norm": 11.872662544250488, "learning_rate": 1.860448132780083e-05, "loss": 2.4482, "step": 4209 }, { "epoch": 3.4937759336099585, "grad_norm": 12.1799955368042, "learning_rate": 1.8604149377593364e-05, "loss": 1.6502, "step": 4210 }, { "epoch": 3.4946058091286307, "grad_norm": 13.426412582397461, "learning_rate": 1.8603817427385892e-05, "loss": 1.602, "step": 4211 }, { "epoch": 3.495435684647303, "grad_norm": 18.799367904663086, "learning_rate": 1.8603485477178425e-05, "loss": 1.9873, "step": 4212 }, { "epoch": 3.496265560165975, "grad_norm": 18.210372924804688, "learning_rate": 1.8603153526970957e-05, "loss": 1.1148, "step": 4213 }, { "epoch": 3.4970954356846473, "grad_norm": 13.955924034118652, "learning_rate": 1.860282157676349e-05, "loss": 1.4338, "step": 4214 }, { "epoch": 3.4979253112033195, "grad_norm": 13.17807388305664, "learning_rate": 1.8602489626556017e-05, "loss": 1.318, "step": 4215 }, { "epoch": 3.4987551867219917, "grad_norm": 10.948043823242188, "learning_rate": 1.860215767634855e-05, "loss": 1.8543, "step": 4216 }, { "epoch": 3.499585062240664, "grad_norm": 7.102659702301025, "learning_rate": 1.860182572614108e-05, "loss": 0.9924, "step": 4217 }, { "epoch": 3.500414937759336, "grad_norm": 10.26503849029541, "learning_rate": 1.8601493775933614e-05, "loss": 1.4314, "step": 4218 }, { "epoch": 3.5012448132780083, "grad_norm": 10.454504013061523, "learning_rate": 1.8601161825726142e-05, "loss": 1.0242, "step": 4219 }, { "epoch": 3.5020746887966805, "grad_norm": 6.570529937744141, "learning_rate": 1.8600829875518675e-05, "loss": 0.9256, "step": 4220 }, { "epoch": 3.5029045643153527, "grad_norm": 8.658845901489258, "learning_rate": 1.8600497925311203e-05, "loss": 1.2376, "step": 4221 }, { "epoch": 3.503734439834025, "grad_norm": 9.820682525634766, "learning_rate": 1.8600165975103735e-05, "loss": 1.1755, "step": 4222 }, { "epoch": 3.504564315352697, "grad_norm": 10.160717010498047, "learning_rate": 1.8599834024896268e-05, "loss": 1.0179, "step": 4223 }, { "epoch": 3.5053941908713693, "grad_norm": 17.232175827026367, "learning_rate": 1.8599502074688796e-05, "loss": 1.6816, "step": 4224 }, { "epoch": 3.5062240663900415, "grad_norm": 8.364269256591797, "learning_rate": 1.859917012448133e-05, "loss": 0.8796, "step": 4225 }, { "epoch": 3.5070539419087137, "grad_norm": 8.164363861083984, "learning_rate": 1.859883817427386e-05, "loss": 0.7589, "step": 4226 }, { "epoch": 3.507883817427386, "grad_norm": 17.292041778564453, "learning_rate": 1.859850622406639e-05, "loss": 1.7748, "step": 4227 }, { "epoch": 3.508713692946058, "grad_norm": 9.190414428710938, "learning_rate": 1.859817427385892e-05, "loss": 1.4027, "step": 4228 }, { "epoch": 3.5095435684647303, "grad_norm": 13.614595413208008, "learning_rate": 1.8597842323651453e-05, "loss": 1.9201, "step": 4229 }, { "epoch": 3.5103734439834025, "grad_norm": 8.833016395568848, "learning_rate": 1.8597510373443986e-05, "loss": 1.3157, "step": 4230 }, { "epoch": 3.5112033195020746, "grad_norm": 10.632390975952148, "learning_rate": 1.8597178423236518e-05, "loss": 1.1875, "step": 4231 }, { "epoch": 3.512033195020747, "grad_norm": 9.529816627502441, "learning_rate": 1.8596846473029046e-05, "loss": 1.5688, "step": 4232 }, { "epoch": 3.512863070539419, "grad_norm": 9.00306510925293, "learning_rate": 1.859651452282158e-05, "loss": 1.4146, "step": 4233 }, { "epoch": 3.5136929460580912, "grad_norm": 12.742318153381348, "learning_rate": 1.859618257261411e-05, "loss": 1.4282, "step": 4234 }, { "epoch": 3.5145228215767634, "grad_norm": 17.088258743286133, "learning_rate": 1.8595850622406643e-05, "loss": 2.6832, "step": 4235 }, { "epoch": 3.5153526970954356, "grad_norm": 11.372958183288574, "learning_rate": 1.859551867219917e-05, "loss": 1.958, "step": 4236 }, { "epoch": 3.516182572614108, "grad_norm": 16.12078094482422, "learning_rate": 1.8595186721991703e-05, "loss": 1.1548, "step": 4237 }, { "epoch": 3.51701244813278, "grad_norm": 10.599590301513672, "learning_rate": 1.8594854771784236e-05, "loss": 0.9986, "step": 4238 }, { "epoch": 3.517842323651452, "grad_norm": 15.658926963806152, "learning_rate": 1.8594522821576764e-05, "loss": 1.1521, "step": 4239 }, { "epoch": 3.5186721991701244, "grad_norm": 20.11154556274414, "learning_rate": 1.8594190871369296e-05, "loss": 2.0982, "step": 4240 }, { "epoch": 3.5195020746887966, "grad_norm": 22.059127807617188, "learning_rate": 1.859385892116183e-05, "loss": 1.1743, "step": 4241 }, { "epoch": 3.520331950207469, "grad_norm": 15.927122116088867, "learning_rate": 1.8593526970954357e-05, "loss": 2.0029, "step": 4242 }, { "epoch": 3.521161825726141, "grad_norm": 11.01850414276123, "learning_rate": 1.859319502074689e-05, "loss": 2.096, "step": 4243 }, { "epoch": 3.521991701244813, "grad_norm": 11.696022033691406, "learning_rate": 1.8592863070539418e-05, "loss": 1.1093, "step": 4244 }, { "epoch": 3.5228215767634854, "grad_norm": 15.176166534423828, "learning_rate": 1.859253112033195e-05, "loss": 1.4071, "step": 4245 }, { "epoch": 3.5236514522821576, "grad_norm": 10.464396476745605, "learning_rate": 1.8592199170124482e-05, "loss": 1.538, "step": 4246 }, { "epoch": 3.52448132780083, "grad_norm": 10.499456405639648, "learning_rate": 1.8591867219917014e-05, "loss": 1.2891, "step": 4247 }, { "epoch": 3.525311203319502, "grad_norm": 13.370776176452637, "learning_rate": 1.8591535269709543e-05, "loss": 1.4835, "step": 4248 }, { "epoch": 3.526141078838174, "grad_norm": 18.42913818359375, "learning_rate": 1.8591203319502075e-05, "loss": 2.0608, "step": 4249 }, { "epoch": 3.5269709543568464, "grad_norm": 12.411416053771973, "learning_rate": 1.8590871369294607e-05, "loss": 1.7816, "step": 4250 }, { "epoch": 3.5278008298755186, "grad_norm": 10.51384162902832, "learning_rate": 1.859053941908714e-05, "loss": 1.5386, "step": 4251 }, { "epoch": 3.5286307053941908, "grad_norm": 12.605629920959473, "learning_rate": 1.8590207468879668e-05, "loss": 1.3959, "step": 4252 }, { "epoch": 3.529460580912863, "grad_norm": 13.090730667114258, "learning_rate": 1.85898755186722e-05, "loss": 1.9863, "step": 4253 }, { "epoch": 3.530290456431535, "grad_norm": 19.094667434692383, "learning_rate": 1.8589543568464732e-05, "loss": 2.2157, "step": 4254 }, { "epoch": 3.5311203319502074, "grad_norm": 14.902704238891602, "learning_rate": 1.8589211618257264e-05, "loss": 1.8702, "step": 4255 }, { "epoch": 3.5319502074688796, "grad_norm": 14.624022483825684, "learning_rate": 1.8588879668049797e-05, "loss": 1.4682, "step": 4256 }, { "epoch": 3.5327800829875518, "grad_norm": 9.692436218261719, "learning_rate": 1.8588547717842325e-05, "loss": 1.1402, "step": 4257 }, { "epoch": 3.533609958506224, "grad_norm": 7.6851806640625, "learning_rate": 1.8588215767634857e-05, "loss": 0.6228, "step": 4258 }, { "epoch": 3.534439834024896, "grad_norm": 11.4299955368042, "learning_rate": 1.858788381742739e-05, "loss": 1.2784, "step": 4259 }, { "epoch": 3.5352697095435683, "grad_norm": 12.972491264343262, "learning_rate": 1.8587551867219918e-05, "loss": 1.785, "step": 4260 }, { "epoch": 3.5360995850622405, "grad_norm": 18.590293884277344, "learning_rate": 1.858721991701245e-05, "loss": 1.3359, "step": 4261 }, { "epoch": 3.5369294605809127, "grad_norm": 15.721590042114258, "learning_rate": 1.858688796680498e-05, "loss": 1.6243, "step": 4262 }, { "epoch": 3.537759336099585, "grad_norm": 14.761180877685547, "learning_rate": 1.858655601659751e-05, "loss": 1.8958, "step": 4263 }, { "epoch": 3.538589211618257, "grad_norm": 13.892040252685547, "learning_rate": 1.8586224066390043e-05, "loss": 1.0872, "step": 4264 }, { "epoch": 3.5394190871369293, "grad_norm": 18.43928337097168, "learning_rate": 1.8585892116182572e-05, "loss": 1.0172, "step": 4265 }, { "epoch": 3.5402489626556015, "grad_norm": 17.860795974731445, "learning_rate": 1.8585560165975104e-05, "loss": 1.8324, "step": 4266 }, { "epoch": 3.5410788381742737, "grad_norm": 11.055435180664062, "learning_rate": 1.8585228215767636e-05, "loss": 1.155, "step": 4267 }, { "epoch": 3.541908713692946, "grad_norm": 15.261962890625, "learning_rate": 1.858489626556017e-05, "loss": 2.1656, "step": 4268 }, { "epoch": 3.542738589211618, "grad_norm": 8.290606498718262, "learning_rate": 1.8584564315352697e-05, "loss": 0.8743, "step": 4269 }, { "epoch": 3.5435684647302903, "grad_norm": 15.812814712524414, "learning_rate": 1.858423236514523e-05, "loss": 1.9543, "step": 4270 }, { "epoch": 3.5443983402489625, "grad_norm": 9.30041790008545, "learning_rate": 1.858390041493776e-05, "loss": 1.1533, "step": 4271 }, { "epoch": 3.5452282157676347, "grad_norm": 12.928849220275879, "learning_rate": 1.8583568464730293e-05, "loss": 1.4484, "step": 4272 }, { "epoch": 3.546058091286307, "grad_norm": 11.582785606384277, "learning_rate": 1.8583236514522822e-05, "loss": 1.4755, "step": 4273 }, { "epoch": 3.546887966804979, "grad_norm": 13.343505859375, "learning_rate": 1.8582904564315354e-05, "loss": 1.3497, "step": 4274 }, { "epoch": 3.5477178423236513, "grad_norm": 11.527206420898438, "learning_rate": 1.8582572614107886e-05, "loss": 1.3344, "step": 4275 }, { "epoch": 3.5485477178423235, "grad_norm": 11.885031700134277, "learning_rate": 1.858224066390042e-05, "loss": 1.3689, "step": 4276 }, { "epoch": 3.5493775933609957, "grad_norm": 11.294028282165527, "learning_rate": 1.8581908713692947e-05, "loss": 1.0256, "step": 4277 }, { "epoch": 3.550207468879668, "grad_norm": 11.421378135681152, "learning_rate": 1.858157676348548e-05, "loss": 1.2166, "step": 4278 }, { "epoch": 3.55103734439834, "grad_norm": 14.938289642333984, "learning_rate": 1.858124481327801e-05, "loss": 1.1574, "step": 4279 }, { "epoch": 3.5518672199170123, "grad_norm": 15.598954200744629, "learning_rate": 1.858091286307054e-05, "loss": 1.148, "step": 4280 }, { "epoch": 3.5526970954356845, "grad_norm": 8.545459747314453, "learning_rate": 1.8580580912863072e-05, "loss": 1.2095, "step": 4281 }, { "epoch": 3.5535269709543567, "grad_norm": 13.522475242614746, "learning_rate": 1.8580248962655604e-05, "loss": 1.9261, "step": 4282 }, { "epoch": 3.554356846473029, "grad_norm": 20.437820434570312, "learning_rate": 1.8579917012448133e-05, "loss": 1.7437, "step": 4283 }, { "epoch": 3.555186721991701, "grad_norm": 14.214183807373047, "learning_rate": 1.8579585062240665e-05, "loss": 1.0981, "step": 4284 }, { "epoch": 3.5560165975103732, "grad_norm": 17.546884536743164, "learning_rate": 1.8579253112033197e-05, "loss": 1.3663, "step": 4285 }, { "epoch": 3.5568464730290454, "grad_norm": 9.938496589660645, "learning_rate": 1.8578921161825726e-05, "loss": 1.4304, "step": 4286 }, { "epoch": 3.5576763485477176, "grad_norm": 14.005789756774902, "learning_rate": 1.8578589211618258e-05, "loss": 2.29, "step": 4287 }, { "epoch": 3.55850622406639, "grad_norm": 11.010510444641113, "learning_rate": 1.857825726141079e-05, "loss": 1.0857, "step": 4288 }, { "epoch": 3.559336099585062, "grad_norm": 11.2562255859375, "learning_rate": 1.8577925311203322e-05, "loss": 1.4341, "step": 4289 }, { "epoch": 3.5601659751037342, "grad_norm": 10.4430513381958, "learning_rate": 1.857759336099585e-05, "loss": 0.7047, "step": 4290 }, { "epoch": 3.5609958506224064, "grad_norm": 10.206623077392578, "learning_rate": 1.8577261410788383e-05, "loss": 1.5242, "step": 4291 }, { "epoch": 3.5618257261410786, "grad_norm": 13.172380447387695, "learning_rate": 1.8576929460580915e-05, "loss": 1.7186, "step": 4292 }, { "epoch": 3.562655601659751, "grad_norm": 9.549870491027832, "learning_rate": 1.8576597510373447e-05, "loss": 1.4409, "step": 4293 }, { "epoch": 3.563485477178423, "grad_norm": 12.302627563476562, "learning_rate": 1.8576265560165976e-05, "loss": 1.0017, "step": 4294 }, { "epoch": 3.564315352697095, "grad_norm": 13.788215637207031, "learning_rate": 1.8575933609958508e-05, "loss": 1.3953, "step": 4295 }, { "epoch": 3.5651452282157674, "grad_norm": 8.782567024230957, "learning_rate": 1.857560165975104e-05, "loss": 1.0911, "step": 4296 }, { "epoch": 3.5659751037344396, "grad_norm": 18.156707763671875, "learning_rate": 1.8575269709543572e-05, "loss": 2.2051, "step": 4297 }, { "epoch": 3.566804979253112, "grad_norm": 9.302591323852539, "learning_rate": 1.85749377593361e-05, "loss": 1.3821, "step": 4298 }, { "epoch": 3.567634854771784, "grad_norm": 7.923496723175049, "learning_rate": 1.8574605809128633e-05, "loss": 0.8571, "step": 4299 }, { "epoch": 3.568464730290456, "grad_norm": 19.947420120239258, "learning_rate": 1.8574273858921162e-05, "loss": 1.8528, "step": 4300 }, { "epoch": 3.5692946058091284, "grad_norm": 10.727086067199707, "learning_rate": 1.8573941908713694e-05, "loss": 0.9657, "step": 4301 }, { "epoch": 3.5701244813278006, "grad_norm": 17.653409957885742, "learning_rate": 1.8573609958506226e-05, "loss": 1.3423, "step": 4302 }, { "epoch": 3.5709543568464728, "grad_norm": 6.171414375305176, "learning_rate": 1.8573278008298755e-05, "loss": 1.0293, "step": 4303 }, { "epoch": 3.571784232365145, "grad_norm": 13.455318450927734, "learning_rate": 1.8572946058091287e-05, "loss": 1.046, "step": 4304 }, { "epoch": 3.572614107883817, "grad_norm": 15.468533515930176, "learning_rate": 1.857261410788382e-05, "loss": 1.9405, "step": 4305 }, { "epoch": 3.5734439834024894, "grad_norm": 20.600078582763672, "learning_rate": 1.8572282157676348e-05, "loss": 1.1449, "step": 4306 }, { "epoch": 3.5742738589211616, "grad_norm": 14.497272491455078, "learning_rate": 1.857195020746888e-05, "loss": 1.4137, "step": 4307 }, { "epoch": 3.5751037344398338, "grad_norm": 11.497292518615723, "learning_rate": 1.8571618257261412e-05, "loss": 1.179, "step": 4308 }, { "epoch": 3.575933609958506, "grad_norm": 14.690534591674805, "learning_rate": 1.8571286307053944e-05, "loss": 1.628, "step": 4309 }, { "epoch": 3.576763485477178, "grad_norm": 12.592418670654297, "learning_rate": 1.8570954356846476e-05, "loss": 1.0485, "step": 4310 }, { "epoch": 3.5775933609958503, "grad_norm": 9.664722442626953, "learning_rate": 1.8570622406639005e-05, "loss": 1.1669, "step": 4311 }, { "epoch": 3.578423236514523, "grad_norm": 12.376097679138184, "learning_rate": 1.8570290456431537e-05, "loss": 0.9988, "step": 4312 }, { "epoch": 3.579253112033195, "grad_norm": 17.41110610961914, "learning_rate": 1.856995850622407e-05, "loss": 2.1364, "step": 4313 }, { "epoch": 3.5800829875518674, "grad_norm": 15.581522941589355, "learning_rate": 1.85696265560166e-05, "loss": 1.6614, "step": 4314 }, { "epoch": 3.5809128630705396, "grad_norm": 9.622179985046387, "learning_rate": 1.856929460580913e-05, "loss": 1.1492, "step": 4315 }, { "epoch": 3.5817427385892118, "grad_norm": 10.089607238769531, "learning_rate": 1.8568962655601662e-05, "loss": 1.3724, "step": 4316 }, { "epoch": 3.582572614107884, "grad_norm": 14.753847122192383, "learning_rate": 1.8568630705394194e-05, "loss": 1.8462, "step": 4317 }, { "epoch": 3.583402489626556, "grad_norm": 14.11154842376709, "learning_rate": 1.8568298755186723e-05, "loss": 1.6135, "step": 4318 }, { "epoch": 3.5842323651452284, "grad_norm": 11.180510520935059, "learning_rate": 1.8567966804979255e-05, "loss": 1.4404, "step": 4319 }, { "epoch": 3.5850622406639006, "grad_norm": 9.117507934570312, "learning_rate": 1.8567634854771787e-05, "loss": 1.2114, "step": 4320 }, { "epoch": 3.5858921161825728, "grad_norm": 14.048809051513672, "learning_rate": 1.8567302904564316e-05, "loss": 1.7947, "step": 4321 }, { "epoch": 3.586721991701245, "grad_norm": 17.386674880981445, "learning_rate": 1.8566970954356848e-05, "loss": 1.6004, "step": 4322 }, { "epoch": 3.587551867219917, "grad_norm": 10.691329002380371, "learning_rate": 1.8566639004149377e-05, "loss": 1.1007, "step": 4323 }, { "epoch": 3.5883817427385893, "grad_norm": 12.518233299255371, "learning_rate": 1.856630705394191e-05, "loss": 1.4698, "step": 4324 }, { "epoch": 3.5892116182572615, "grad_norm": 9.042892456054688, "learning_rate": 1.856597510373444e-05, "loss": 1.1475, "step": 4325 }, { "epoch": 3.5900414937759337, "grad_norm": 12.647279739379883, "learning_rate": 1.8565643153526973e-05, "loss": 1.2452, "step": 4326 }, { "epoch": 3.590871369294606, "grad_norm": 15.163999557495117, "learning_rate": 1.8565311203319502e-05, "loss": 1.6632, "step": 4327 }, { "epoch": 3.591701244813278, "grad_norm": 11.877470016479492, "learning_rate": 1.8564979253112034e-05, "loss": 1.5292, "step": 4328 }, { "epoch": 3.5925311203319503, "grad_norm": 19.90605354309082, "learning_rate": 1.8564647302904566e-05, "loss": 2.6478, "step": 4329 }, { "epoch": 3.5933609958506225, "grad_norm": 7.647215366363525, "learning_rate": 1.8564315352697098e-05, "loss": 1.0534, "step": 4330 }, { "epoch": 3.5941908713692947, "grad_norm": 14.628666877746582, "learning_rate": 1.8563983402489627e-05, "loss": 1.0273, "step": 4331 }, { "epoch": 3.595020746887967, "grad_norm": 14.04655933380127, "learning_rate": 1.856365145228216e-05, "loss": 1.4747, "step": 4332 }, { "epoch": 3.595850622406639, "grad_norm": 10.006529808044434, "learning_rate": 1.856331950207469e-05, "loss": 1.1643, "step": 4333 }, { "epoch": 3.5966804979253113, "grad_norm": 13.458473205566406, "learning_rate": 1.8562987551867223e-05, "loss": 1.4314, "step": 4334 }, { "epoch": 3.5975103734439835, "grad_norm": 15.877001762390137, "learning_rate": 1.8562655601659755e-05, "loss": 1.1457, "step": 4335 }, { "epoch": 3.5983402489626557, "grad_norm": 10.546379089355469, "learning_rate": 1.8562323651452284e-05, "loss": 0.9534, "step": 4336 }, { "epoch": 3.599170124481328, "grad_norm": 13.889629364013672, "learning_rate": 1.8561991701244816e-05, "loss": 1.7196, "step": 4337 }, { "epoch": 3.6, "grad_norm": 12.389409065246582, "learning_rate": 1.8561659751037345e-05, "loss": 1.7868, "step": 4338 }, { "epoch": 3.6008298755186723, "grad_norm": 12.686994552612305, "learning_rate": 1.8561327800829877e-05, "loss": 1.1481, "step": 4339 }, { "epoch": 3.6016597510373445, "grad_norm": 9.757670402526855, "learning_rate": 1.856099585062241e-05, "loss": 1.5326, "step": 4340 }, { "epoch": 3.6024896265560167, "grad_norm": 12.104743003845215, "learning_rate": 1.8560663900414938e-05, "loss": 1.7534, "step": 4341 }, { "epoch": 3.603319502074689, "grad_norm": 14.114852905273438, "learning_rate": 1.856033195020747e-05, "loss": 1.8213, "step": 4342 }, { "epoch": 3.604149377593361, "grad_norm": 11.376083374023438, "learning_rate": 1.8560000000000002e-05, "loss": 1.4118, "step": 4343 }, { "epoch": 3.6049792531120333, "grad_norm": 12.995166778564453, "learning_rate": 1.855966804979253e-05, "loss": 2.0063, "step": 4344 }, { "epoch": 3.6058091286307055, "grad_norm": 13.403077125549316, "learning_rate": 1.8559336099585063e-05, "loss": 1.8236, "step": 4345 }, { "epoch": 3.6066390041493777, "grad_norm": 17.066543579101562, "learning_rate": 1.8559004149377595e-05, "loss": 1.0556, "step": 4346 }, { "epoch": 3.60746887966805, "grad_norm": 11.830862998962402, "learning_rate": 1.8558672199170127e-05, "loss": 1.6751, "step": 4347 }, { "epoch": 3.608298755186722, "grad_norm": 14.170730590820312, "learning_rate": 1.8558340248962656e-05, "loss": 1.35, "step": 4348 }, { "epoch": 3.6091286307053942, "grad_norm": 22.74485969543457, "learning_rate": 1.8558008298755188e-05, "loss": 1.9689, "step": 4349 }, { "epoch": 3.6099585062240664, "grad_norm": 10.559514045715332, "learning_rate": 1.855767634854772e-05, "loss": 1.5157, "step": 4350 }, { "epoch": 3.6107883817427386, "grad_norm": 10.285812377929688, "learning_rate": 1.8557344398340252e-05, "loss": 1.3706, "step": 4351 }, { "epoch": 3.611618257261411, "grad_norm": 11.591938972473145, "learning_rate": 1.855701244813278e-05, "loss": 1.2291, "step": 4352 }, { "epoch": 3.612448132780083, "grad_norm": 12.256515502929688, "learning_rate": 1.8556680497925313e-05, "loss": 1.0646, "step": 4353 }, { "epoch": 3.6132780082987552, "grad_norm": 9.436701774597168, "learning_rate": 1.8556348547717845e-05, "loss": 0.7379, "step": 4354 }, { "epoch": 3.6141078838174274, "grad_norm": 12.6382474899292, "learning_rate": 1.8556016597510377e-05, "loss": 1.5365, "step": 4355 }, { "epoch": 3.6149377593360996, "grad_norm": 16.291034698486328, "learning_rate": 1.8555684647302906e-05, "loss": 1.2076, "step": 4356 }, { "epoch": 3.615767634854772, "grad_norm": 21.551006317138672, "learning_rate": 1.8555352697095438e-05, "loss": 1.5564, "step": 4357 }, { "epoch": 3.616597510373444, "grad_norm": 6.522058963775635, "learning_rate": 1.855502074688797e-05, "loss": 1.465, "step": 4358 }, { "epoch": 3.617427385892116, "grad_norm": 7.829730987548828, "learning_rate": 1.85546887966805e-05, "loss": 0.9819, "step": 4359 }, { "epoch": 3.6182572614107884, "grad_norm": 15.717830657958984, "learning_rate": 1.855435684647303e-05, "loss": 0.962, "step": 4360 }, { "epoch": 3.6190871369294606, "grad_norm": 13.507185935974121, "learning_rate": 1.855402489626556e-05, "loss": 1.0725, "step": 4361 }, { "epoch": 3.619917012448133, "grad_norm": 8.536640167236328, "learning_rate": 1.855369294605809e-05, "loss": 1.3972, "step": 4362 }, { "epoch": 3.620746887966805, "grad_norm": 21.06667137145996, "learning_rate": 1.8553360995850624e-05, "loss": 1.1577, "step": 4363 }, { "epoch": 3.621576763485477, "grad_norm": 12.311704635620117, "learning_rate": 1.8553029045643156e-05, "loss": 0.8163, "step": 4364 }, { "epoch": 3.6224066390041494, "grad_norm": 9.078479766845703, "learning_rate": 1.8552697095435685e-05, "loss": 0.9701, "step": 4365 }, { "epoch": 3.6232365145228216, "grad_norm": 11.68118953704834, "learning_rate": 1.8552365145228217e-05, "loss": 1.8434, "step": 4366 }, { "epoch": 3.624066390041494, "grad_norm": 10.780330657958984, "learning_rate": 1.855203319502075e-05, "loss": 1.0531, "step": 4367 }, { "epoch": 3.624896265560166, "grad_norm": 10.705464363098145, "learning_rate": 1.855170124481328e-05, "loss": 1.339, "step": 4368 }, { "epoch": 3.625726141078838, "grad_norm": 9.524133682250977, "learning_rate": 1.855136929460581e-05, "loss": 1.2349, "step": 4369 }, { "epoch": 3.6265560165975104, "grad_norm": 8.07824993133545, "learning_rate": 1.855103734439834e-05, "loss": 1.0241, "step": 4370 }, { "epoch": 3.6273858921161826, "grad_norm": 18.013275146484375, "learning_rate": 1.8550705394190874e-05, "loss": 1.4204, "step": 4371 }, { "epoch": 3.6282157676348548, "grad_norm": 13.173284530639648, "learning_rate": 1.8550373443983406e-05, "loss": 1.5, "step": 4372 }, { "epoch": 3.629045643153527, "grad_norm": 14.75029468536377, "learning_rate": 1.8550041493775935e-05, "loss": 1.0254, "step": 4373 }, { "epoch": 3.629875518672199, "grad_norm": 15.45464038848877, "learning_rate": 1.8549709543568467e-05, "loss": 0.9122, "step": 4374 }, { "epoch": 3.6307053941908713, "grad_norm": 17.552093505859375, "learning_rate": 1.8549377593361e-05, "loss": 1.6823, "step": 4375 }, { "epoch": 3.6315352697095435, "grad_norm": 21.297863006591797, "learning_rate": 1.854904564315353e-05, "loss": 1.3914, "step": 4376 }, { "epoch": 3.6323651452282157, "grad_norm": 17.728120803833008, "learning_rate": 1.854871369294606e-05, "loss": 3.0679, "step": 4377 }, { "epoch": 3.633195020746888, "grad_norm": 16.854612350463867, "learning_rate": 1.8548381742738592e-05, "loss": 1.5803, "step": 4378 }, { "epoch": 3.63402489626556, "grad_norm": 10.510882377624512, "learning_rate": 1.854804979253112e-05, "loss": 1.1844, "step": 4379 }, { "epoch": 3.6348547717842323, "grad_norm": 15.54935359954834, "learning_rate": 1.8547717842323653e-05, "loss": 1.3959, "step": 4380 }, { "epoch": 3.6356846473029045, "grad_norm": 13.265677452087402, "learning_rate": 1.8547385892116185e-05, "loss": 1.5569, "step": 4381 }, { "epoch": 3.6365145228215767, "grad_norm": 8.910394668579102, "learning_rate": 1.8547053941908713e-05, "loss": 0.8655, "step": 4382 }, { "epoch": 3.637344398340249, "grad_norm": 16.382131576538086, "learning_rate": 1.8546721991701246e-05, "loss": 1.6283, "step": 4383 }, { "epoch": 3.638174273858921, "grad_norm": 15.945353507995605, "learning_rate": 1.8546390041493778e-05, "loss": 2.1891, "step": 4384 }, { "epoch": 3.6390041493775933, "grad_norm": 10.654199600219727, "learning_rate": 1.8546058091286306e-05, "loss": 1.7972, "step": 4385 }, { "epoch": 3.6398340248962655, "grad_norm": 14.887775421142578, "learning_rate": 1.854572614107884e-05, "loss": 1.7437, "step": 4386 }, { "epoch": 3.6406639004149377, "grad_norm": 14.499342918395996, "learning_rate": 1.854539419087137e-05, "loss": 1.2287, "step": 4387 }, { "epoch": 3.64149377593361, "grad_norm": 18.075605392456055, "learning_rate": 1.8545062240663903e-05, "loss": 1.2697, "step": 4388 }, { "epoch": 3.642323651452282, "grad_norm": 8.54690170288086, "learning_rate": 1.8544730290456435e-05, "loss": 1.2818, "step": 4389 }, { "epoch": 3.6431535269709543, "grad_norm": 12.821741104125977, "learning_rate": 1.8544398340248963e-05, "loss": 1.5881, "step": 4390 }, { "epoch": 3.6439834024896265, "grad_norm": 15.499619483947754, "learning_rate": 1.8544066390041496e-05, "loss": 1.8679, "step": 4391 }, { "epoch": 3.6448132780082987, "grad_norm": 14.224452018737793, "learning_rate": 1.8543734439834028e-05, "loss": 1.7008, "step": 4392 }, { "epoch": 3.645643153526971, "grad_norm": 15.628201484680176, "learning_rate": 1.854340248962656e-05, "loss": 1.9044, "step": 4393 }, { "epoch": 3.646473029045643, "grad_norm": 13.07601547241211, "learning_rate": 1.854307053941909e-05, "loss": 1.3298, "step": 4394 }, { "epoch": 3.6473029045643153, "grad_norm": 11.890751838684082, "learning_rate": 1.854273858921162e-05, "loss": 1.3634, "step": 4395 }, { "epoch": 3.6481327800829875, "grad_norm": 14.490467071533203, "learning_rate": 1.8542406639004153e-05, "loss": 1.5058, "step": 4396 }, { "epoch": 3.6489626556016597, "grad_norm": 10.0647611618042, "learning_rate": 1.854207468879668e-05, "loss": 1.1977, "step": 4397 }, { "epoch": 3.649792531120332, "grad_norm": 15.753451347351074, "learning_rate": 1.8541742738589214e-05, "loss": 1.7553, "step": 4398 }, { "epoch": 3.650622406639004, "grad_norm": 10.87661075592041, "learning_rate": 1.8541410788381746e-05, "loss": 1.3345, "step": 4399 }, { "epoch": 3.6514522821576763, "grad_norm": 13.670919418334961, "learning_rate": 1.8541078838174274e-05, "loss": 1.4063, "step": 4400 }, { "epoch": 3.6522821576763485, "grad_norm": 13.333632469177246, "learning_rate": 1.8540746887966807e-05, "loss": 1.7613, "step": 4401 }, { "epoch": 3.6531120331950206, "grad_norm": 8.21965503692627, "learning_rate": 1.8540414937759335e-05, "loss": 1.0226, "step": 4402 }, { "epoch": 3.653941908713693, "grad_norm": 10.128933906555176, "learning_rate": 1.8540082987551867e-05, "loss": 1.6344, "step": 4403 }, { "epoch": 3.654771784232365, "grad_norm": 13.330695152282715, "learning_rate": 1.85397510373444e-05, "loss": 0.7042, "step": 4404 }, { "epoch": 3.6556016597510372, "grad_norm": 13.853755950927734, "learning_rate": 1.853941908713693e-05, "loss": 1.4026, "step": 4405 }, { "epoch": 3.6564315352697094, "grad_norm": 11.045310020446777, "learning_rate": 1.853908713692946e-05, "loss": 1.5298, "step": 4406 }, { "epoch": 3.6572614107883816, "grad_norm": 21.87653160095215, "learning_rate": 1.8538755186721992e-05, "loss": 1.5378, "step": 4407 }, { "epoch": 3.658091286307054, "grad_norm": 8.862853050231934, "learning_rate": 1.8538423236514524e-05, "loss": 0.7534, "step": 4408 }, { "epoch": 3.658921161825726, "grad_norm": 14.001031875610352, "learning_rate": 1.8538091286307057e-05, "loss": 1.6835, "step": 4409 }, { "epoch": 3.659751037344398, "grad_norm": 20.398649215698242, "learning_rate": 1.8537759336099585e-05, "loss": 0.9738, "step": 4410 }, { "epoch": 3.6605809128630704, "grad_norm": 10.671929359436035, "learning_rate": 1.8537427385892117e-05, "loss": 1.4256, "step": 4411 }, { "epoch": 3.6614107883817426, "grad_norm": 14.303461074829102, "learning_rate": 1.853709543568465e-05, "loss": 1.7185, "step": 4412 }, { "epoch": 3.662240663900415, "grad_norm": 8.1558256149292, "learning_rate": 1.853676348547718e-05, "loss": 0.9198, "step": 4413 }, { "epoch": 3.663070539419087, "grad_norm": 14.596439361572266, "learning_rate": 1.853643153526971e-05, "loss": 1.3421, "step": 4414 }, { "epoch": 3.663900414937759, "grad_norm": 11.856522560119629, "learning_rate": 1.8536099585062242e-05, "loss": 1.2761, "step": 4415 }, { "epoch": 3.6647302904564314, "grad_norm": 12.419913291931152, "learning_rate": 1.8535767634854775e-05, "loss": 1.8734, "step": 4416 }, { "epoch": 3.6655601659751036, "grad_norm": 9.50389575958252, "learning_rate": 1.8535435684647303e-05, "loss": 1.5173, "step": 4417 }, { "epoch": 3.666390041493776, "grad_norm": 20.31846809387207, "learning_rate": 1.8535103734439835e-05, "loss": 1.6484, "step": 4418 }, { "epoch": 3.667219917012448, "grad_norm": 15.16329288482666, "learning_rate": 1.8534771784232368e-05, "loss": 1.4491, "step": 4419 }, { "epoch": 3.66804979253112, "grad_norm": 9.704947471618652, "learning_rate": 1.8534439834024896e-05, "loss": 0.7841, "step": 4420 }, { "epoch": 3.6688796680497924, "grad_norm": 15.34317684173584, "learning_rate": 1.853410788381743e-05, "loss": 2.17, "step": 4421 }, { "epoch": 3.6697095435684646, "grad_norm": 14.130288124084473, "learning_rate": 1.853377593360996e-05, "loss": 1.5459, "step": 4422 }, { "epoch": 3.6705394190871368, "grad_norm": 10.038933753967285, "learning_rate": 1.853344398340249e-05, "loss": 1.2214, "step": 4423 }, { "epoch": 3.671369294605809, "grad_norm": 12.551173210144043, "learning_rate": 1.853311203319502e-05, "loss": 1.3968, "step": 4424 }, { "epoch": 3.6721991701244816, "grad_norm": 11.588277816772461, "learning_rate": 1.8532780082987553e-05, "loss": 1.5737, "step": 4425 }, { "epoch": 3.673029045643154, "grad_norm": 11.779592514038086, "learning_rate": 1.8532448132780085e-05, "loss": 1.2923, "step": 4426 }, { "epoch": 3.673858921161826, "grad_norm": 8.827106475830078, "learning_rate": 1.8532116182572614e-05, "loss": 1.0954, "step": 4427 }, { "epoch": 3.674688796680498, "grad_norm": 11.301580429077148, "learning_rate": 1.8531784232365146e-05, "loss": 1.1939, "step": 4428 }, { "epoch": 3.6755186721991704, "grad_norm": 15.622467994689941, "learning_rate": 1.853145228215768e-05, "loss": 1.3785, "step": 4429 }, { "epoch": 3.6763485477178426, "grad_norm": 10.087066650390625, "learning_rate": 1.853112033195021e-05, "loss": 0.9109, "step": 4430 }, { "epoch": 3.677178423236515, "grad_norm": 8.388214111328125, "learning_rate": 1.853078838174274e-05, "loss": 1.2633, "step": 4431 }, { "epoch": 3.678008298755187, "grad_norm": 11.506667137145996, "learning_rate": 1.853045643153527e-05, "loss": 1.409, "step": 4432 }, { "epoch": 3.678838174273859, "grad_norm": 12.697265625, "learning_rate": 1.8530124481327803e-05, "loss": 2.0594, "step": 4433 }, { "epoch": 3.6796680497925314, "grad_norm": 7.113288402557373, "learning_rate": 1.8529792531120336e-05, "loss": 0.5729, "step": 4434 }, { "epoch": 3.6804979253112036, "grad_norm": 8.901537895202637, "learning_rate": 1.8529460580912864e-05, "loss": 1.0397, "step": 4435 }, { "epoch": 3.6813278008298758, "grad_norm": 8.650733947753906, "learning_rate": 1.8529128630705396e-05, "loss": 1.055, "step": 4436 }, { "epoch": 3.682157676348548, "grad_norm": 9.852311134338379, "learning_rate": 1.852879668049793e-05, "loss": 1.1168, "step": 4437 }, { "epoch": 3.68298755186722, "grad_norm": 12.58156967163086, "learning_rate": 1.8528464730290457e-05, "loss": 1.4568, "step": 4438 }, { "epoch": 3.6838174273858924, "grad_norm": 11.331259727478027, "learning_rate": 1.852813278008299e-05, "loss": 1.6332, "step": 4439 }, { "epoch": 3.6846473029045645, "grad_norm": 18.88066864013672, "learning_rate": 1.8527800829875518e-05, "loss": 1.6261, "step": 4440 }, { "epoch": 3.6854771784232367, "grad_norm": 12.617685317993164, "learning_rate": 1.852746887966805e-05, "loss": 1.5709, "step": 4441 }, { "epoch": 3.686307053941909, "grad_norm": 14.13012981414795, "learning_rate": 1.8527136929460582e-05, "loss": 2.0881, "step": 4442 }, { "epoch": 3.687136929460581, "grad_norm": 11.500184059143066, "learning_rate": 1.8526804979253114e-05, "loss": 1.0168, "step": 4443 }, { "epoch": 3.6879668049792533, "grad_norm": 12.557517051696777, "learning_rate": 1.8526473029045643e-05, "loss": 1.5773, "step": 4444 }, { "epoch": 3.6887966804979255, "grad_norm": 9.901421546936035, "learning_rate": 1.8526141078838175e-05, "loss": 1.0971, "step": 4445 }, { "epoch": 3.6896265560165977, "grad_norm": 10.726953506469727, "learning_rate": 1.8525809128630707e-05, "loss": 1.2589, "step": 4446 }, { "epoch": 3.69045643153527, "grad_norm": 13.245406150817871, "learning_rate": 1.852547717842324e-05, "loss": 1.3362, "step": 4447 }, { "epoch": 3.691286307053942, "grad_norm": 11.077710151672363, "learning_rate": 1.8525145228215768e-05, "loss": 1.0307, "step": 4448 }, { "epoch": 3.6921161825726143, "grad_norm": 13.311516761779785, "learning_rate": 1.85248132780083e-05, "loss": 1.6084, "step": 4449 }, { "epoch": 3.6929460580912865, "grad_norm": 11.570483207702637, "learning_rate": 1.8524481327800832e-05, "loss": 1.3159, "step": 4450 }, { "epoch": 3.6937759336099587, "grad_norm": 14.881027221679688, "learning_rate": 1.8524149377593364e-05, "loss": 1.446, "step": 4451 }, { "epoch": 3.694605809128631, "grad_norm": 10.964341163635254, "learning_rate": 1.8523817427385893e-05, "loss": 1.4283, "step": 4452 }, { "epoch": 3.695435684647303, "grad_norm": 16.302387237548828, "learning_rate": 1.8523485477178425e-05, "loss": 1.592, "step": 4453 }, { "epoch": 3.6962655601659753, "grad_norm": 9.259871482849121, "learning_rate": 1.8523153526970957e-05, "loss": 1.1948, "step": 4454 }, { "epoch": 3.6970954356846475, "grad_norm": 10.40372371673584, "learning_rate": 1.8522821576763486e-05, "loss": 1.0291, "step": 4455 }, { "epoch": 3.6979253112033197, "grad_norm": 11.150699615478516, "learning_rate": 1.8522489626556018e-05, "loss": 1.5915, "step": 4456 }, { "epoch": 3.698755186721992, "grad_norm": 14.304373741149902, "learning_rate": 1.852215767634855e-05, "loss": 1.6055, "step": 4457 }, { "epoch": 3.699585062240664, "grad_norm": 9.479884147644043, "learning_rate": 1.852182572614108e-05, "loss": 1.1171, "step": 4458 }, { "epoch": 3.7004149377593363, "grad_norm": 8.24842643737793, "learning_rate": 1.852149377593361e-05, "loss": 1.0955, "step": 4459 }, { "epoch": 3.7012448132780085, "grad_norm": 20.3818416595459, "learning_rate": 1.8521161825726143e-05, "loss": 1.6592, "step": 4460 }, { "epoch": 3.7020746887966807, "grad_norm": 11.312056541442871, "learning_rate": 1.8520829875518672e-05, "loss": 1.0445, "step": 4461 }, { "epoch": 3.702904564315353, "grad_norm": 17.500194549560547, "learning_rate": 1.8520497925311204e-05, "loss": 1.7883, "step": 4462 }, { "epoch": 3.703734439834025, "grad_norm": 8.035144805908203, "learning_rate": 1.8520165975103736e-05, "loss": 0.8131, "step": 4463 }, { "epoch": 3.7045643153526973, "grad_norm": 10.619231224060059, "learning_rate": 1.8519834024896265e-05, "loss": 1.1053, "step": 4464 }, { "epoch": 3.7053941908713695, "grad_norm": 8.966118812561035, "learning_rate": 1.8519502074688797e-05, "loss": 1.484, "step": 4465 }, { "epoch": 3.7062240663900416, "grad_norm": 10.008089065551758, "learning_rate": 1.851917012448133e-05, "loss": 0.9158, "step": 4466 }, { "epoch": 3.707053941908714, "grad_norm": 19.288599014282227, "learning_rate": 1.851883817427386e-05, "loss": 1.5364, "step": 4467 }, { "epoch": 3.707883817427386, "grad_norm": 12.933401107788086, "learning_rate": 1.8518506224066393e-05, "loss": 1.5495, "step": 4468 }, { "epoch": 3.7087136929460582, "grad_norm": 9.74271297454834, "learning_rate": 1.8518174273858922e-05, "loss": 1.2736, "step": 4469 }, { "epoch": 3.7095435684647304, "grad_norm": 18.679441452026367, "learning_rate": 1.8517842323651454e-05, "loss": 2.0902, "step": 4470 }, { "epoch": 3.7103734439834026, "grad_norm": 9.071356773376465, "learning_rate": 1.8517510373443986e-05, "loss": 0.9693, "step": 4471 }, { "epoch": 3.711203319502075, "grad_norm": 13.427332878112793, "learning_rate": 1.851717842323652e-05, "loss": 0.7968, "step": 4472 }, { "epoch": 3.712033195020747, "grad_norm": 13.740172386169434, "learning_rate": 1.8516846473029047e-05, "loss": 1.8678, "step": 4473 }, { "epoch": 3.712863070539419, "grad_norm": 17.13425064086914, "learning_rate": 1.851651452282158e-05, "loss": 1.4938, "step": 4474 }, { "epoch": 3.7136929460580914, "grad_norm": 16.748533248901367, "learning_rate": 1.851618257261411e-05, "loss": 1.7017, "step": 4475 }, { "epoch": 3.7145228215767636, "grad_norm": 15.176800727844238, "learning_rate": 1.851585062240664e-05, "loss": 1.0084, "step": 4476 }, { "epoch": 3.715352697095436, "grad_norm": 16.971187591552734, "learning_rate": 1.8515518672199172e-05, "loss": 1.2585, "step": 4477 }, { "epoch": 3.716182572614108, "grad_norm": 8.319766998291016, "learning_rate": 1.85151867219917e-05, "loss": 0.9402, "step": 4478 }, { "epoch": 3.71701244813278, "grad_norm": 8.047627449035645, "learning_rate": 1.8514854771784233e-05, "loss": 0.7872, "step": 4479 }, { "epoch": 3.7178423236514524, "grad_norm": 11.878668785095215, "learning_rate": 1.8514522821576765e-05, "loss": 1.6442, "step": 4480 }, { "epoch": 3.7186721991701246, "grad_norm": 10.308414459228516, "learning_rate": 1.8514190871369294e-05, "loss": 1.314, "step": 4481 }, { "epoch": 3.719502074688797, "grad_norm": 13.237092971801758, "learning_rate": 1.8513858921161826e-05, "loss": 1.7687, "step": 4482 }, { "epoch": 3.720331950207469, "grad_norm": 10.245854377746582, "learning_rate": 1.8513526970954358e-05, "loss": 1.5548, "step": 4483 }, { "epoch": 3.721161825726141, "grad_norm": 12.712915420532227, "learning_rate": 1.851319502074689e-05, "loss": 1.2127, "step": 4484 }, { "epoch": 3.7219917012448134, "grad_norm": 11.599702835083008, "learning_rate": 1.851286307053942e-05, "loss": 2.2363, "step": 4485 }, { "epoch": 3.7228215767634856, "grad_norm": 18.4963436126709, "learning_rate": 1.851253112033195e-05, "loss": 0.8006, "step": 4486 }, { "epoch": 3.7236514522821578, "grad_norm": 8.646843910217285, "learning_rate": 1.8512199170124483e-05, "loss": 1.1672, "step": 4487 }, { "epoch": 3.72448132780083, "grad_norm": 9.795914649963379, "learning_rate": 1.8511867219917015e-05, "loss": 1.1894, "step": 4488 }, { "epoch": 3.725311203319502, "grad_norm": 13.562689781188965, "learning_rate": 1.8511535269709544e-05, "loss": 2.3446, "step": 4489 }, { "epoch": 3.7261410788381744, "grad_norm": 9.478860855102539, "learning_rate": 1.8511203319502076e-05, "loss": 1.2488, "step": 4490 }, { "epoch": 3.7269709543568466, "grad_norm": 10.122382164001465, "learning_rate": 1.8510871369294608e-05, "loss": 1.5822, "step": 4491 }, { "epoch": 3.7278008298755188, "grad_norm": 10.524572372436523, "learning_rate": 1.851053941908714e-05, "loss": 1.3621, "step": 4492 }, { "epoch": 3.728630705394191, "grad_norm": 11.010307312011719, "learning_rate": 1.851020746887967e-05, "loss": 1.3998, "step": 4493 }, { "epoch": 3.729460580912863, "grad_norm": 9.840502738952637, "learning_rate": 1.85098755186722e-05, "loss": 1.243, "step": 4494 }, { "epoch": 3.7302904564315353, "grad_norm": 14.2648344039917, "learning_rate": 1.8509543568464733e-05, "loss": 1.1029, "step": 4495 }, { "epoch": 3.7311203319502075, "grad_norm": 9.804741859436035, "learning_rate": 1.8509211618257262e-05, "loss": 1.2951, "step": 4496 }, { "epoch": 3.7319502074688797, "grad_norm": 10.822497367858887, "learning_rate": 1.8508879668049794e-05, "loss": 1.2893, "step": 4497 }, { "epoch": 3.732780082987552, "grad_norm": 20.106609344482422, "learning_rate": 1.8508547717842326e-05, "loss": 1.3451, "step": 4498 }, { "epoch": 3.733609958506224, "grad_norm": 12.264996528625488, "learning_rate": 1.8508215767634855e-05, "loss": 2.1382, "step": 4499 }, { "epoch": 3.7344398340248963, "grad_norm": 11.529064178466797, "learning_rate": 1.8507883817427387e-05, "loss": 2.3433, "step": 4500 }, { "epoch": 3.7352697095435685, "grad_norm": 9.939523696899414, "learning_rate": 1.850755186721992e-05, "loss": 0.974, "step": 4501 }, { "epoch": 3.7360995850622407, "grad_norm": 9.850220680236816, "learning_rate": 1.8507219917012448e-05, "loss": 1.0819, "step": 4502 }, { "epoch": 3.736929460580913, "grad_norm": 18.595813751220703, "learning_rate": 1.850688796680498e-05, "loss": 2.1436, "step": 4503 }, { "epoch": 3.737759336099585, "grad_norm": 14.586170196533203, "learning_rate": 1.8506556016597512e-05, "loss": 1.2466, "step": 4504 }, { "epoch": 3.7385892116182573, "grad_norm": 13.34765338897705, "learning_rate": 1.8506224066390044e-05, "loss": 1.5583, "step": 4505 }, { "epoch": 3.7394190871369295, "grad_norm": 10.71086597442627, "learning_rate": 1.8505892116182573e-05, "loss": 1.2829, "step": 4506 }, { "epoch": 3.7402489626556017, "grad_norm": 10.63636302947998, "learning_rate": 1.8505560165975105e-05, "loss": 1.4964, "step": 4507 }, { "epoch": 3.741078838174274, "grad_norm": 11.61983585357666, "learning_rate": 1.8505228215767637e-05, "loss": 1.0308, "step": 4508 }, { "epoch": 3.741908713692946, "grad_norm": 10.044177055358887, "learning_rate": 1.850489626556017e-05, "loss": 1.489, "step": 4509 }, { "epoch": 3.7427385892116183, "grad_norm": 16.77060317993164, "learning_rate": 1.8504564315352698e-05, "loss": 1.6123, "step": 4510 }, { "epoch": 3.7435684647302905, "grad_norm": 13.717040061950684, "learning_rate": 1.850423236514523e-05, "loss": 1.7911, "step": 4511 }, { "epoch": 3.7443983402489627, "grad_norm": 11.60048770904541, "learning_rate": 1.8503900414937762e-05, "loss": 0.818, "step": 4512 }, { "epoch": 3.745228215767635, "grad_norm": 15.391756057739258, "learning_rate": 1.8503568464730294e-05, "loss": 1.0477, "step": 4513 }, { "epoch": 3.746058091286307, "grad_norm": 11.868863105773926, "learning_rate": 1.8503236514522823e-05, "loss": 1.8405, "step": 4514 }, { "epoch": 3.7468879668049793, "grad_norm": 12.307621002197266, "learning_rate": 1.8502904564315355e-05, "loss": 0.9475, "step": 4515 }, { "epoch": 3.7477178423236515, "grad_norm": 10.712437629699707, "learning_rate": 1.8502572614107887e-05, "loss": 1.5052, "step": 4516 }, { "epoch": 3.7485477178423237, "grad_norm": 9.50899887084961, "learning_rate": 1.8502240663900416e-05, "loss": 1.2956, "step": 4517 }, { "epoch": 3.749377593360996, "grad_norm": 13.772932052612305, "learning_rate": 1.8501908713692948e-05, "loss": 1.6208, "step": 4518 }, { "epoch": 3.750207468879668, "grad_norm": 13.918939590454102, "learning_rate": 1.8501576763485477e-05, "loss": 1.5265, "step": 4519 }, { "epoch": 3.7510373443983402, "grad_norm": 12.267228126525879, "learning_rate": 1.850124481327801e-05, "loss": 1.5604, "step": 4520 }, { "epoch": 3.7518672199170124, "grad_norm": 11.987642288208008, "learning_rate": 1.850091286307054e-05, "loss": 1.336, "step": 4521 }, { "epoch": 3.7526970954356846, "grad_norm": 11.453329086303711, "learning_rate": 1.8500580912863073e-05, "loss": 1.2875, "step": 4522 }, { "epoch": 3.753526970954357, "grad_norm": 10.654605865478516, "learning_rate": 1.85002489626556e-05, "loss": 0.7791, "step": 4523 }, { "epoch": 3.754356846473029, "grad_norm": 12.79528522491455, "learning_rate": 1.8499917012448134e-05, "loss": 1.6297, "step": 4524 }, { "epoch": 3.7551867219917012, "grad_norm": 10.212074279785156, "learning_rate": 1.8499585062240666e-05, "loss": 1.3487, "step": 4525 }, { "epoch": 3.7560165975103734, "grad_norm": 7.865725517272949, "learning_rate": 1.8499253112033198e-05, "loss": 0.928, "step": 4526 }, { "epoch": 3.7568464730290456, "grad_norm": 12.446734428405762, "learning_rate": 1.8498921161825727e-05, "loss": 0.827, "step": 4527 }, { "epoch": 3.757676348547718, "grad_norm": 21.518634796142578, "learning_rate": 1.849858921161826e-05, "loss": 1.4234, "step": 4528 }, { "epoch": 3.75850622406639, "grad_norm": 8.63377857208252, "learning_rate": 1.849825726141079e-05, "loss": 0.5488, "step": 4529 }, { "epoch": 3.759336099585062, "grad_norm": 14.402122497558594, "learning_rate": 1.8497925311203323e-05, "loss": 1.4398, "step": 4530 }, { "epoch": 3.7601659751037344, "grad_norm": 9.55277156829834, "learning_rate": 1.8497593360995852e-05, "loss": 0.8847, "step": 4531 }, { "epoch": 3.7609958506224066, "grad_norm": 8.802431106567383, "learning_rate": 1.8497261410788384e-05, "loss": 0.8759, "step": 4532 }, { "epoch": 3.761825726141079, "grad_norm": 12.970032691955566, "learning_rate": 1.8496929460580916e-05, "loss": 2.058, "step": 4533 }, { "epoch": 3.762655601659751, "grad_norm": 17.290531158447266, "learning_rate": 1.8496597510373445e-05, "loss": 1.3761, "step": 4534 }, { "epoch": 3.763485477178423, "grad_norm": 18.416770935058594, "learning_rate": 1.8496265560165977e-05, "loss": 2.3001, "step": 4535 }, { "epoch": 3.7643153526970954, "grad_norm": 10.732998847961426, "learning_rate": 1.849593360995851e-05, "loss": 0.9328, "step": 4536 }, { "epoch": 3.7651452282157676, "grad_norm": 9.719735145568848, "learning_rate": 1.8495601659751038e-05, "loss": 1.4322, "step": 4537 }, { "epoch": 3.7659751037344398, "grad_norm": 10.319262504577637, "learning_rate": 1.849526970954357e-05, "loss": 0.9134, "step": 4538 }, { "epoch": 3.766804979253112, "grad_norm": 10.233504295349121, "learning_rate": 1.84949377593361e-05, "loss": 1.2911, "step": 4539 }, { "epoch": 3.767634854771784, "grad_norm": 15.099631309509277, "learning_rate": 1.849460580912863e-05, "loss": 1.2451, "step": 4540 }, { "epoch": 3.7684647302904564, "grad_norm": 9.391501426696777, "learning_rate": 1.8494273858921163e-05, "loss": 1.182, "step": 4541 }, { "epoch": 3.7692946058091286, "grad_norm": 12.376446723937988, "learning_rate": 1.8493941908713695e-05, "loss": 1.4934, "step": 4542 }, { "epoch": 3.7701244813278008, "grad_norm": 9.587580680847168, "learning_rate": 1.8493609958506223e-05, "loss": 1.043, "step": 4543 }, { "epoch": 3.770954356846473, "grad_norm": 13.464548110961914, "learning_rate": 1.8493278008298756e-05, "loss": 1.2635, "step": 4544 }, { "epoch": 3.771784232365145, "grad_norm": 17.155115127563477, "learning_rate": 1.8492946058091288e-05, "loss": 2.6184, "step": 4545 }, { "epoch": 3.7726141078838173, "grad_norm": 16.664470672607422, "learning_rate": 1.849261410788382e-05, "loss": 0.8768, "step": 4546 }, { "epoch": 3.7734439834024895, "grad_norm": 15.056281089782715, "learning_rate": 1.8492282157676352e-05, "loss": 1.2744, "step": 4547 }, { "epoch": 3.7742738589211617, "grad_norm": 15.407609939575195, "learning_rate": 1.849195020746888e-05, "loss": 1.5059, "step": 4548 }, { "epoch": 3.775103734439834, "grad_norm": 15.303094863891602, "learning_rate": 1.8491618257261413e-05, "loss": 1.2233, "step": 4549 }, { "epoch": 3.775933609958506, "grad_norm": 11.470788955688477, "learning_rate": 1.8491286307053945e-05, "loss": 1.3638, "step": 4550 }, { "epoch": 3.7767634854771783, "grad_norm": 6.626414775848389, "learning_rate": 1.8490954356846477e-05, "loss": 0.6243, "step": 4551 }, { "epoch": 3.7775933609958505, "grad_norm": 11.523884773254395, "learning_rate": 1.8490622406639006e-05, "loss": 1.4866, "step": 4552 }, { "epoch": 3.7784232365145227, "grad_norm": 11.189117431640625, "learning_rate": 1.8490290456431538e-05, "loss": 1.7725, "step": 4553 }, { "epoch": 3.779253112033195, "grad_norm": 14.381937980651855, "learning_rate": 1.848995850622407e-05, "loss": 1.8953, "step": 4554 }, { "epoch": 3.780082987551867, "grad_norm": 13.483034133911133, "learning_rate": 1.84896265560166e-05, "loss": 1.3051, "step": 4555 }, { "epoch": 3.7809128630705393, "grad_norm": 10.49387264251709, "learning_rate": 1.848929460580913e-05, "loss": 1.0737, "step": 4556 }, { "epoch": 3.7817427385892115, "grad_norm": 11.341567039489746, "learning_rate": 1.848896265560166e-05, "loss": 0.8536, "step": 4557 }, { "epoch": 3.7825726141078837, "grad_norm": 14.692270278930664, "learning_rate": 1.848863070539419e-05, "loss": 1.5885, "step": 4558 }, { "epoch": 3.783402489626556, "grad_norm": 15.113868713378906, "learning_rate": 1.8488298755186724e-05, "loss": 1.2475, "step": 4559 }, { "epoch": 3.784232365145228, "grad_norm": 12.171663284301758, "learning_rate": 1.8487966804979252e-05, "loss": 1.4652, "step": 4560 }, { "epoch": 3.7850622406639003, "grad_norm": 10.462695121765137, "learning_rate": 1.8487634854771784e-05, "loss": 1.2744, "step": 4561 }, { "epoch": 3.7858921161825725, "grad_norm": 9.877893447875977, "learning_rate": 1.8487302904564317e-05, "loss": 1.2697, "step": 4562 }, { "epoch": 3.7867219917012447, "grad_norm": 14.401061058044434, "learning_rate": 1.848697095435685e-05, "loss": 1.4095, "step": 4563 }, { "epoch": 3.787551867219917, "grad_norm": 15.369714736938477, "learning_rate": 1.8486639004149377e-05, "loss": 1.1767, "step": 4564 }, { "epoch": 3.788381742738589, "grad_norm": 9.706716537475586, "learning_rate": 1.848630705394191e-05, "loss": 1.4266, "step": 4565 }, { "epoch": 3.7892116182572613, "grad_norm": 9.715535163879395, "learning_rate": 1.848597510373444e-05, "loss": 0.5299, "step": 4566 }, { "epoch": 3.7900414937759335, "grad_norm": 10.653464317321777, "learning_rate": 1.8485643153526974e-05, "loss": 1.4737, "step": 4567 }, { "epoch": 3.7908713692946057, "grad_norm": 13.248740196228027, "learning_rate": 1.8485311203319502e-05, "loss": 1.1768, "step": 4568 }, { "epoch": 3.791701244813278, "grad_norm": 14.364236831665039, "learning_rate": 1.8484979253112035e-05, "loss": 1.1536, "step": 4569 }, { "epoch": 3.79253112033195, "grad_norm": 13.117932319641113, "learning_rate": 1.8484647302904567e-05, "loss": 1.2434, "step": 4570 }, { "epoch": 3.7933609958506223, "grad_norm": 12.133090019226074, "learning_rate": 1.84843153526971e-05, "loss": 1.0025, "step": 4571 }, { "epoch": 3.7941908713692944, "grad_norm": 26.728605270385742, "learning_rate": 1.8483983402489628e-05, "loss": 1.4639, "step": 4572 }, { "epoch": 3.7950207468879666, "grad_norm": 9.518874168395996, "learning_rate": 1.848365145228216e-05, "loss": 0.8867, "step": 4573 }, { "epoch": 3.795850622406639, "grad_norm": 13.771527290344238, "learning_rate": 1.8483319502074692e-05, "loss": 2.054, "step": 4574 }, { "epoch": 3.796680497925311, "grad_norm": 14.049699783325195, "learning_rate": 1.848298755186722e-05, "loss": 1.0432, "step": 4575 }, { "epoch": 3.7975103734439832, "grad_norm": 11.686946868896484, "learning_rate": 1.8482655601659753e-05, "loss": 0.9731, "step": 4576 }, { "epoch": 3.7983402489626554, "grad_norm": 15.87531566619873, "learning_rate": 1.8482323651452285e-05, "loss": 1.5901, "step": 4577 }, { "epoch": 3.7991701244813276, "grad_norm": 17.49444580078125, "learning_rate": 1.8481991701244813e-05, "loss": 1.6867, "step": 4578 }, { "epoch": 3.8, "grad_norm": 8.240997314453125, "learning_rate": 1.8481659751037345e-05, "loss": 1.3723, "step": 4579 }, { "epoch": 3.800829875518672, "grad_norm": 16.675159454345703, "learning_rate": 1.8481327800829878e-05, "loss": 1.2882, "step": 4580 }, { "epoch": 3.801659751037344, "grad_norm": 11.1671724319458, "learning_rate": 1.8480995850622406e-05, "loss": 1.4384, "step": 4581 }, { "epoch": 3.8024896265560164, "grad_norm": 20.967748641967773, "learning_rate": 1.848066390041494e-05, "loss": 0.961, "step": 4582 }, { "epoch": 3.8033195020746886, "grad_norm": 13.068291664123535, "learning_rate": 1.848033195020747e-05, "loss": 1.4778, "step": 4583 }, { "epoch": 3.804149377593361, "grad_norm": 18.619033813476562, "learning_rate": 1.8480000000000003e-05, "loss": 1.8228, "step": 4584 }, { "epoch": 3.804979253112033, "grad_norm": 10.594451904296875, "learning_rate": 1.847966804979253e-05, "loss": 1.0124, "step": 4585 }, { "epoch": 3.805809128630705, "grad_norm": 21.57969856262207, "learning_rate": 1.8479336099585063e-05, "loss": 1.7034, "step": 4586 }, { "epoch": 3.8066390041493774, "grad_norm": 11.409276008605957, "learning_rate": 1.8479004149377596e-05, "loss": 0.9054, "step": 4587 }, { "epoch": 3.8074688796680496, "grad_norm": 10.457351684570312, "learning_rate": 1.8478672199170128e-05, "loss": 1.5256, "step": 4588 }, { "epoch": 3.808298755186722, "grad_norm": 14.645707130432129, "learning_rate": 1.8478340248962656e-05, "loss": 1.7755, "step": 4589 }, { "epoch": 3.809128630705394, "grad_norm": 13.494565963745117, "learning_rate": 1.847800829875519e-05, "loss": 1.4107, "step": 4590 }, { "epoch": 3.809958506224066, "grad_norm": 15.374553680419922, "learning_rate": 1.847767634854772e-05, "loss": 1.4374, "step": 4591 }, { "epoch": 3.8107883817427384, "grad_norm": 12.442339897155762, "learning_rate": 1.8477344398340253e-05, "loss": 0.8553, "step": 4592 }, { "epoch": 3.8116182572614106, "grad_norm": 14.899370193481445, "learning_rate": 1.847701244813278e-05, "loss": 1.4726, "step": 4593 }, { "epoch": 3.8124481327800828, "grad_norm": 12.378907203674316, "learning_rate": 1.8476680497925314e-05, "loss": 1.8227, "step": 4594 }, { "epoch": 3.813278008298755, "grad_norm": 15.096233367919922, "learning_rate": 1.8476348547717842e-05, "loss": 1.9169, "step": 4595 }, { "epoch": 3.814107883817427, "grad_norm": 12.139265060424805, "learning_rate": 1.8476016597510374e-05, "loss": 1.0782, "step": 4596 }, { "epoch": 3.8149377593360994, "grad_norm": 14.31010913848877, "learning_rate": 1.8475684647302906e-05, "loss": 2.1193, "step": 4597 }, { "epoch": 3.8157676348547716, "grad_norm": 17.97463035583496, "learning_rate": 1.8475352697095435e-05, "loss": 2.4944, "step": 4598 }, { "epoch": 3.8165975103734437, "grad_norm": 11.965766906738281, "learning_rate": 1.8475020746887967e-05, "loss": 1.1929, "step": 4599 }, { "epoch": 3.817427385892116, "grad_norm": 10.85203742980957, "learning_rate": 1.84746887966805e-05, "loss": 2.1509, "step": 4600 }, { "epoch": 3.818257261410788, "grad_norm": 11.132781028747559, "learning_rate": 1.847435684647303e-05, "loss": 1.324, "step": 4601 }, { "epoch": 3.8190871369294603, "grad_norm": 11.789621353149414, "learning_rate": 1.847402489626556e-05, "loss": 1.6007, "step": 4602 }, { "epoch": 3.8199170124481325, "grad_norm": 11.798501014709473, "learning_rate": 1.8473692946058092e-05, "loss": 1.668, "step": 4603 }, { "epoch": 3.8207468879668047, "grad_norm": 17.37257957458496, "learning_rate": 1.8473360995850624e-05, "loss": 1.9797, "step": 4604 }, { "epoch": 3.821576763485477, "grad_norm": 10.752548217773438, "learning_rate": 1.8473029045643157e-05, "loss": 1.1787, "step": 4605 }, { "epoch": 3.822406639004149, "grad_norm": 10.870410919189453, "learning_rate": 1.8472697095435685e-05, "loss": 1.2229, "step": 4606 }, { "epoch": 3.8232365145228213, "grad_norm": 14.897695541381836, "learning_rate": 1.8472365145228217e-05, "loss": 2.6348, "step": 4607 }, { "epoch": 3.8240663900414935, "grad_norm": 15.764653205871582, "learning_rate": 1.847203319502075e-05, "loss": 2.172, "step": 4608 }, { "epoch": 3.8248962655601657, "grad_norm": 13.062101364135742, "learning_rate": 1.847170124481328e-05, "loss": 0.8245, "step": 4609 }, { "epoch": 3.825726141078838, "grad_norm": 10.174038887023926, "learning_rate": 1.847136929460581e-05, "loss": 1.4005, "step": 4610 }, { "epoch": 3.82655601659751, "grad_norm": 13.540996551513672, "learning_rate": 1.8471037344398342e-05, "loss": 0.9862, "step": 4611 }, { "epoch": 3.8273858921161823, "grad_norm": 18.14056968688965, "learning_rate": 1.8470705394190875e-05, "loss": 2.6954, "step": 4612 }, { "epoch": 3.828215767634855, "grad_norm": 15.329588890075684, "learning_rate": 1.8470373443983403e-05, "loss": 1.6571, "step": 4613 }, { "epoch": 3.829045643153527, "grad_norm": 9.536712646484375, "learning_rate": 1.8470041493775935e-05, "loss": 1.0138, "step": 4614 }, { "epoch": 3.8298755186721993, "grad_norm": 12.652536392211914, "learning_rate": 1.8469709543568467e-05, "loss": 1.3396, "step": 4615 }, { "epoch": 3.8307053941908715, "grad_norm": 17.019811630249023, "learning_rate": 1.8469377593360996e-05, "loss": 1.688, "step": 4616 }, { "epoch": 3.8315352697095437, "grad_norm": 20.156272888183594, "learning_rate": 1.846904564315353e-05, "loss": 2.109, "step": 4617 }, { "epoch": 3.832365145228216, "grad_norm": 11.242842674255371, "learning_rate": 1.8468713692946057e-05, "loss": 1.5392, "step": 4618 }, { "epoch": 3.833195020746888, "grad_norm": 16.53919219970703, "learning_rate": 1.846838174273859e-05, "loss": 1.3369, "step": 4619 }, { "epoch": 3.8340248962655603, "grad_norm": 10.228275299072266, "learning_rate": 1.846804979253112e-05, "loss": 1.4712, "step": 4620 }, { "epoch": 3.8348547717842325, "grad_norm": 11.631420135498047, "learning_rate": 1.8467717842323653e-05, "loss": 1.1575, "step": 4621 }, { "epoch": 3.8356846473029047, "grad_norm": 8.531525611877441, "learning_rate": 1.8467385892116182e-05, "loss": 1.2224, "step": 4622 }, { "epoch": 3.836514522821577, "grad_norm": 10.07715892791748, "learning_rate": 1.8467053941908714e-05, "loss": 0.7942, "step": 4623 }, { "epoch": 3.837344398340249, "grad_norm": 10.434632301330566, "learning_rate": 1.8466721991701246e-05, "loss": 1.4008, "step": 4624 }, { "epoch": 3.8381742738589213, "grad_norm": 13.804797172546387, "learning_rate": 1.846639004149378e-05, "loss": 1.546, "step": 4625 }, { "epoch": 3.8390041493775935, "grad_norm": 14.926173210144043, "learning_rate": 1.846605809128631e-05, "loss": 1.894, "step": 4626 }, { "epoch": 3.8398340248962657, "grad_norm": 13.850680351257324, "learning_rate": 1.846572614107884e-05, "loss": 1.511, "step": 4627 }, { "epoch": 3.840663900414938, "grad_norm": 10.18859577178955, "learning_rate": 1.846539419087137e-05, "loss": 0.9218, "step": 4628 }, { "epoch": 3.84149377593361, "grad_norm": 8.476318359375, "learning_rate": 1.8465062240663903e-05, "loss": 1.0442, "step": 4629 }, { "epoch": 3.8423236514522823, "grad_norm": 21.956438064575195, "learning_rate": 1.8464730290456436e-05, "loss": 2.1871, "step": 4630 }, { "epoch": 3.8431535269709545, "grad_norm": 14.410369873046875, "learning_rate": 1.8464398340248964e-05, "loss": 1.4087, "step": 4631 }, { "epoch": 3.8439834024896267, "grad_norm": 8.116064071655273, "learning_rate": 1.8464066390041496e-05, "loss": 0.9084, "step": 4632 }, { "epoch": 3.844813278008299, "grad_norm": 14.335823059082031, "learning_rate": 1.846373443983403e-05, "loss": 1.2695, "step": 4633 }, { "epoch": 3.845643153526971, "grad_norm": 13.586532592773438, "learning_rate": 1.8463402489626557e-05, "loss": 1.8237, "step": 4634 }, { "epoch": 3.8464730290456433, "grad_norm": 17.069307327270508, "learning_rate": 1.846307053941909e-05, "loss": 1.6878, "step": 4635 }, { "epoch": 3.8473029045643155, "grad_norm": 13.96126937866211, "learning_rate": 1.8462738589211618e-05, "loss": 1.5253, "step": 4636 }, { "epoch": 3.8481327800829876, "grad_norm": 15.602198600769043, "learning_rate": 1.846240663900415e-05, "loss": 1.7323, "step": 4637 }, { "epoch": 3.84896265560166, "grad_norm": 11.106369018554688, "learning_rate": 1.8462074688796682e-05, "loss": 1.4, "step": 4638 }, { "epoch": 3.849792531120332, "grad_norm": 24.046167373657227, "learning_rate": 1.846174273858921e-05, "loss": 2.7061, "step": 4639 }, { "epoch": 3.8506224066390042, "grad_norm": 10.775321960449219, "learning_rate": 1.8461410788381743e-05, "loss": 1.487, "step": 4640 }, { "epoch": 3.8514522821576764, "grad_norm": 14.769693374633789, "learning_rate": 1.8461078838174275e-05, "loss": 1.5027, "step": 4641 }, { "epoch": 3.8522821576763486, "grad_norm": 15.011316299438477, "learning_rate": 1.8460746887966807e-05, "loss": 1.758, "step": 4642 }, { "epoch": 3.853112033195021, "grad_norm": 17.689380645751953, "learning_rate": 1.8460414937759336e-05, "loss": 1.2686, "step": 4643 }, { "epoch": 3.853941908713693, "grad_norm": 14.611698150634766, "learning_rate": 1.8460082987551868e-05, "loss": 2.2957, "step": 4644 }, { "epoch": 3.854771784232365, "grad_norm": 15.429278373718262, "learning_rate": 1.84597510373444e-05, "loss": 1.6836, "step": 4645 }, { "epoch": 3.8556016597510374, "grad_norm": 16.21767807006836, "learning_rate": 1.8459419087136932e-05, "loss": 1.0291, "step": 4646 }, { "epoch": 3.8564315352697096, "grad_norm": 12.708388328552246, "learning_rate": 1.845908713692946e-05, "loss": 1.3537, "step": 4647 }, { "epoch": 3.857261410788382, "grad_norm": 13.9622802734375, "learning_rate": 1.8458755186721993e-05, "loss": 1.3267, "step": 4648 }, { "epoch": 3.858091286307054, "grad_norm": 14.95307731628418, "learning_rate": 1.8458423236514525e-05, "loss": 1.7805, "step": 4649 }, { "epoch": 3.858921161825726, "grad_norm": 9.427729606628418, "learning_rate": 1.8458091286307057e-05, "loss": 1.0773, "step": 4650 }, { "epoch": 3.8597510373443984, "grad_norm": 19.679506301879883, "learning_rate": 1.8457759336099586e-05, "loss": 1.7768, "step": 4651 }, { "epoch": 3.8605809128630706, "grad_norm": 8.7242431640625, "learning_rate": 1.8457427385892118e-05, "loss": 0.9654, "step": 4652 }, { "epoch": 3.861410788381743, "grad_norm": 8.52262020111084, "learning_rate": 1.845709543568465e-05, "loss": 1.0357, "step": 4653 }, { "epoch": 3.862240663900415, "grad_norm": 10.775751113891602, "learning_rate": 1.845676348547718e-05, "loss": 1.518, "step": 4654 }, { "epoch": 3.863070539419087, "grad_norm": 9.034241676330566, "learning_rate": 1.845643153526971e-05, "loss": 1.2873, "step": 4655 }, { "epoch": 3.8639004149377594, "grad_norm": 20.83137321472168, "learning_rate": 1.845609958506224e-05, "loss": 0.8235, "step": 4656 }, { "epoch": 3.8647302904564316, "grad_norm": 13.947542190551758, "learning_rate": 1.8455767634854772e-05, "loss": 1.7832, "step": 4657 }, { "epoch": 3.8655601659751038, "grad_norm": 13.293231010437012, "learning_rate": 1.8455435684647304e-05, "loss": 1.1165, "step": 4658 }, { "epoch": 3.866390041493776, "grad_norm": 12.022812843322754, "learning_rate": 1.8455103734439836e-05, "loss": 1.3025, "step": 4659 }, { "epoch": 3.867219917012448, "grad_norm": 13.863626480102539, "learning_rate": 1.8454771784232365e-05, "loss": 1.6912, "step": 4660 }, { "epoch": 3.8680497925311204, "grad_norm": 15.194438934326172, "learning_rate": 1.8454439834024897e-05, "loss": 1.7505, "step": 4661 }, { "epoch": 3.8688796680497926, "grad_norm": 17.197643280029297, "learning_rate": 1.845410788381743e-05, "loss": 2.1635, "step": 4662 }, { "epoch": 3.8697095435684647, "grad_norm": 13.192564964294434, "learning_rate": 1.845377593360996e-05, "loss": 1.1707, "step": 4663 }, { "epoch": 3.870539419087137, "grad_norm": 16.429697036743164, "learning_rate": 1.845344398340249e-05, "loss": 1.3467, "step": 4664 }, { "epoch": 3.871369294605809, "grad_norm": 14.30775260925293, "learning_rate": 1.8453112033195022e-05, "loss": 1.028, "step": 4665 }, { "epoch": 3.8721991701244813, "grad_norm": 10.491448402404785, "learning_rate": 1.8452780082987554e-05, "loss": 1.1971, "step": 4666 }, { "epoch": 3.8730290456431535, "grad_norm": 14.115928649902344, "learning_rate": 1.8452448132780086e-05, "loss": 1.0007, "step": 4667 }, { "epoch": 3.8738589211618257, "grad_norm": 13.051931381225586, "learning_rate": 1.8452116182572615e-05, "loss": 1.2897, "step": 4668 }, { "epoch": 3.874688796680498, "grad_norm": 20.452072143554688, "learning_rate": 1.8451784232365147e-05, "loss": 1.4516, "step": 4669 }, { "epoch": 3.87551867219917, "grad_norm": 17.760744094848633, "learning_rate": 1.845145228215768e-05, "loss": 1.2062, "step": 4670 }, { "epoch": 3.8763485477178423, "grad_norm": 11.178945541381836, "learning_rate": 1.845112033195021e-05, "loss": 0.6589, "step": 4671 }, { "epoch": 3.8771784232365145, "grad_norm": 16.545774459838867, "learning_rate": 1.845078838174274e-05, "loss": 1.3764, "step": 4672 }, { "epoch": 3.8780082987551867, "grad_norm": 11.358962059020996, "learning_rate": 1.8450456431535272e-05, "loss": 1.1184, "step": 4673 }, { "epoch": 3.878838174273859, "grad_norm": 18.231361389160156, "learning_rate": 1.84501244813278e-05, "loss": 1.0249, "step": 4674 }, { "epoch": 3.879668049792531, "grad_norm": 14.862493515014648, "learning_rate": 1.8449792531120333e-05, "loss": 2.0314, "step": 4675 }, { "epoch": 3.8804979253112033, "grad_norm": 11.01107120513916, "learning_rate": 1.8449460580912865e-05, "loss": 1.0367, "step": 4676 }, { "epoch": 3.8813278008298755, "grad_norm": 12.323145866394043, "learning_rate": 1.8449128630705394e-05, "loss": 1.3014, "step": 4677 }, { "epoch": 3.8821576763485477, "grad_norm": 15.015504837036133, "learning_rate": 1.8448796680497926e-05, "loss": 2.412, "step": 4678 }, { "epoch": 3.88298755186722, "grad_norm": 10.640324592590332, "learning_rate": 1.8448464730290458e-05, "loss": 1.1255, "step": 4679 }, { "epoch": 3.883817427385892, "grad_norm": 13.938735961914062, "learning_rate": 1.844813278008299e-05, "loss": 1.388, "step": 4680 }, { "epoch": 3.8846473029045643, "grad_norm": 11.144166946411133, "learning_rate": 1.844780082987552e-05, "loss": 1.4768, "step": 4681 }, { "epoch": 3.8854771784232365, "grad_norm": 10.114496231079102, "learning_rate": 1.844746887966805e-05, "loss": 0.895, "step": 4682 }, { "epoch": 3.8863070539419087, "grad_norm": 10.379409790039062, "learning_rate": 1.8447136929460583e-05, "loss": 1.2123, "step": 4683 }, { "epoch": 3.887136929460581, "grad_norm": 19.09441566467285, "learning_rate": 1.8446804979253115e-05, "loss": 1.7371, "step": 4684 }, { "epoch": 3.887966804979253, "grad_norm": 12.990735054016113, "learning_rate": 1.8446473029045644e-05, "loss": 1.8506, "step": 4685 }, { "epoch": 3.8887966804979253, "grad_norm": 10.248579025268555, "learning_rate": 1.8446141078838176e-05, "loss": 1.1204, "step": 4686 }, { "epoch": 3.8896265560165975, "grad_norm": 11.178241729736328, "learning_rate": 1.8445809128630708e-05, "loss": 1.8427, "step": 4687 }, { "epoch": 3.8904564315352697, "grad_norm": 14.489066123962402, "learning_rate": 1.844547717842324e-05, "loss": 1.9635, "step": 4688 }, { "epoch": 3.891286307053942, "grad_norm": 11.83570384979248, "learning_rate": 1.844514522821577e-05, "loss": 1.5555, "step": 4689 }, { "epoch": 3.892116182572614, "grad_norm": 11.384876251220703, "learning_rate": 1.84448132780083e-05, "loss": 1.3774, "step": 4690 }, { "epoch": 3.8929460580912862, "grad_norm": 15.118989944458008, "learning_rate": 1.8444481327800833e-05, "loss": 1.4118, "step": 4691 }, { "epoch": 3.8937759336099584, "grad_norm": 9.629194259643555, "learning_rate": 1.8444149377593362e-05, "loss": 1.0321, "step": 4692 }, { "epoch": 3.8946058091286306, "grad_norm": 12.296831130981445, "learning_rate": 1.8443817427385894e-05, "loss": 1.7929, "step": 4693 }, { "epoch": 3.895435684647303, "grad_norm": 10.519309043884277, "learning_rate": 1.8443485477178426e-05, "loss": 1.16, "step": 4694 }, { "epoch": 3.896265560165975, "grad_norm": 13.30518627166748, "learning_rate": 1.8443153526970955e-05, "loss": 1.7029, "step": 4695 }, { "epoch": 3.8970954356846472, "grad_norm": 8.970481872558594, "learning_rate": 1.8442821576763487e-05, "loss": 1.3235, "step": 4696 }, { "epoch": 3.8979253112033194, "grad_norm": 8.785886764526367, "learning_rate": 1.8442489626556016e-05, "loss": 1.5165, "step": 4697 }, { "epoch": 3.8987551867219916, "grad_norm": 14.159449577331543, "learning_rate": 1.8442157676348548e-05, "loss": 0.991, "step": 4698 }, { "epoch": 3.899585062240664, "grad_norm": 12.17695140838623, "learning_rate": 1.844182572614108e-05, "loss": 1.5725, "step": 4699 }, { "epoch": 3.900414937759336, "grad_norm": 13.370075225830078, "learning_rate": 1.8441493775933612e-05, "loss": 1.4343, "step": 4700 }, { "epoch": 3.901244813278008, "grad_norm": 11.715414047241211, "learning_rate": 1.844116182572614e-05, "loss": 1.513, "step": 4701 }, { "epoch": 3.9020746887966804, "grad_norm": 11.552024841308594, "learning_rate": 1.8440829875518673e-05, "loss": 1.4027, "step": 4702 }, { "epoch": 3.9029045643153526, "grad_norm": 14.505635261535645, "learning_rate": 1.8440497925311205e-05, "loss": 1.0403, "step": 4703 }, { "epoch": 3.903734439834025, "grad_norm": 10.47092342376709, "learning_rate": 1.8440165975103737e-05, "loss": 1.3018, "step": 4704 }, { "epoch": 3.904564315352697, "grad_norm": 23.5716495513916, "learning_rate": 1.843983402489627e-05, "loss": 2.1351, "step": 4705 }, { "epoch": 3.905394190871369, "grad_norm": 11.038814544677734, "learning_rate": 1.8439502074688798e-05, "loss": 1.393, "step": 4706 }, { "epoch": 3.9062240663900414, "grad_norm": 14.098103523254395, "learning_rate": 1.843917012448133e-05, "loss": 1.2361, "step": 4707 }, { "epoch": 3.9070539419087136, "grad_norm": 11.020967483520508, "learning_rate": 1.8438838174273862e-05, "loss": 0.9277, "step": 4708 }, { "epoch": 3.9078838174273858, "grad_norm": 9.855338096618652, "learning_rate": 1.8438506224066394e-05, "loss": 1.1547, "step": 4709 }, { "epoch": 3.908713692946058, "grad_norm": 8.05812931060791, "learning_rate": 1.8438174273858923e-05, "loss": 1.5639, "step": 4710 }, { "epoch": 3.90954356846473, "grad_norm": 12.519821166992188, "learning_rate": 1.8437842323651455e-05, "loss": 1.0424, "step": 4711 }, { "epoch": 3.9103734439834024, "grad_norm": 13.193244934082031, "learning_rate": 1.8437510373443984e-05, "loss": 1.6839, "step": 4712 }, { "epoch": 3.9112033195020746, "grad_norm": 10.413725852966309, "learning_rate": 1.8437178423236516e-05, "loss": 0.9735, "step": 4713 }, { "epoch": 3.9120331950207468, "grad_norm": 13.775492668151855, "learning_rate": 1.8436846473029048e-05, "loss": 1.8996, "step": 4714 }, { "epoch": 3.912863070539419, "grad_norm": 11.169647216796875, "learning_rate": 1.8436514522821577e-05, "loss": 1.2278, "step": 4715 }, { "epoch": 3.913692946058091, "grad_norm": 8.590880393981934, "learning_rate": 1.843618257261411e-05, "loss": 1.1667, "step": 4716 }, { "epoch": 3.9145228215767633, "grad_norm": 13.160411834716797, "learning_rate": 1.843585062240664e-05, "loss": 0.9605, "step": 4717 }, { "epoch": 3.9153526970954355, "grad_norm": 14.070521354675293, "learning_rate": 1.843551867219917e-05, "loss": 1.7765, "step": 4718 }, { "epoch": 3.9161825726141077, "grad_norm": 14.10072135925293, "learning_rate": 1.84351867219917e-05, "loss": 1.527, "step": 4719 }, { "epoch": 3.91701244813278, "grad_norm": 11.163506507873535, "learning_rate": 1.8434854771784234e-05, "loss": 1.7674, "step": 4720 }, { "epoch": 3.917842323651452, "grad_norm": 11.289031982421875, "learning_rate": 1.8434522821576766e-05, "loss": 1.3211, "step": 4721 }, { "epoch": 3.9186721991701243, "grad_norm": 12.676259994506836, "learning_rate": 1.8434190871369295e-05, "loss": 1.5022, "step": 4722 }, { "epoch": 3.9195020746887965, "grad_norm": 11.417187690734863, "learning_rate": 1.8433858921161827e-05, "loss": 1.7826, "step": 4723 }, { "epoch": 3.9203319502074687, "grad_norm": 13.398945808410645, "learning_rate": 1.843352697095436e-05, "loss": 1.4465, "step": 4724 }, { "epoch": 3.921161825726141, "grad_norm": 9.142646789550781, "learning_rate": 1.843319502074689e-05, "loss": 1.4653, "step": 4725 }, { "epoch": 3.9219917012448136, "grad_norm": 11.090513229370117, "learning_rate": 1.843286307053942e-05, "loss": 1.1958, "step": 4726 }, { "epoch": 3.9228215767634858, "grad_norm": 10.500250816345215, "learning_rate": 1.8432531120331952e-05, "loss": 1.0529, "step": 4727 }, { "epoch": 3.923651452282158, "grad_norm": 15.475504875183105, "learning_rate": 1.8432199170124484e-05, "loss": 2.0704, "step": 4728 }, { "epoch": 3.92448132780083, "grad_norm": 8.40884017944336, "learning_rate": 1.8431867219917016e-05, "loss": 0.9549, "step": 4729 }, { "epoch": 3.9253112033195023, "grad_norm": 10.123230934143066, "learning_rate": 1.8431535269709545e-05, "loss": 1.0849, "step": 4730 }, { "epoch": 3.9261410788381745, "grad_norm": 10.670896530151367, "learning_rate": 1.8431203319502077e-05, "loss": 1.4639, "step": 4731 }, { "epoch": 3.9269709543568467, "grad_norm": 12.379463195800781, "learning_rate": 1.843087136929461e-05, "loss": 1.6961, "step": 4732 }, { "epoch": 3.927800829875519, "grad_norm": 8.138073921203613, "learning_rate": 1.8430539419087138e-05, "loss": 0.9421, "step": 4733 }, { "epoch": 3.928630705394191, "grad_norm": 9.916502952575684, "learning_rate": 1.843020746887967e-05, "loss": 1.8123, "step": 4734 }, { "epoch": 3.9294605809128633, "grad_norm": 8.247180938720703, "learning_rate": 1.84298755186722e-05, "loss": 1.3084, "step": 4735 }, { "epoch": 3.9302904564315355, "grad_norm": 15.672019004821777, "learning_rate": 1.842954356846473e-05, "loss": 2.0832, "step": 4736 }, { "epoch": 3.9311203319502077, "grad_norm": 12.660255432128906, "learning_rate": 1.8429211618257263e-05, "loss": 1.5396, "step": 4737 }, { "epoch": 3.93195020746888, "grad_norm": 18.707073211669922, "learning_rate": 1.8428879668049795e-05, "loss": 1.734, "step": 4738 }, { "epoch": 3.932780082987552, "grad_norm": 16.586801528930664, "learning_rate": 1.8428547717842323e-05, "loss": 1.6301, "step": 4739 }, { "epoch": 3.9336099585062243, "grad_norm": 10.171385765075684, "learning_rate": 1.8428215767634856e-05, "loss": 1.1865, "step": 4740 }, { "epoch": 3.9344398340248965, "grad_norm": 14.160385131835938, "learning_rate": 1.8427883817427388e-05, "loss": 1.4798, "step": 4741 }, { "epoch": 3.9352697095435687, "grad_norm": 16.110721588134766, "learning_rate": 1.842755186721992e-05, "loss": 2.0916, "step": 4742 }, { "epoch": 3.936099585062241, "grad_norm": 22.353857040405273, "learning_rate": 1.842721991701245e-05, "loss": 1.9446, "step": 4743 }, { "epoch": 3.936929460580913, "grad_norm": 14.25089168548584, "learning_rate": 1.842688796680498e-05, "loss": 1.3767, "step": 4744 }, { "epoch": 3.9377593360995853, "grad_norm": 13.694479942321777, "learning_rate": 1.8426556016597513e-05, "loss": 1.2352, "step": 4745 }, { "epoch": 3.9385892116182575, "grad_norm": 10.93478012084961, "learning_rate": 1.8426224066390045e-05, "loss": 1.5586, "step": 4746 }, { "epoch": 3.9394190871369297, "grad_norm": 13.959232330322266, "learning_rate": 1.8425892116182574e-05, "loss": 1.1233, "step": 4747 }, { "epoch": 3.940248962655602, "grad_norm": 10.377793312072754, "learning_rate": 1.8425560165975106e-05, "loss": 1.2184, "step": 4748 }, { "epoch": 3.941078838174274, "grad_norm": 10.973336219787598, "learning_rate": 1.8425228215767638e-05, "loss": 1.731, "step": 4749 }, { "epoch": 3.9419087136929463, "grad_norm": 19.994897842407227, "learning_rate": 1.842489626556017e-05, "loss": 2.5792, "step": 4750 }, { "epoch": 3.9427385892116185, "grad_norm": 9.962438583374023, "learning_rate": 1.84245643153527e-05, "loss": 1.0004, "step": 4751 }, { "epoch": 3.9435684647302907, "grad_norm": 20.422292709350586, "learning_rate": 1.842423236514523e-05, "loss": 1.5153, "step": 4752 }, { "epoch": 3.944398340248963, "grad_norm": 16.203750610351562, "learning_rate": 1.842390041493776e-05, "loss": 1.5392, "step": 4753 }, { "epoch": 3.945228215767635, "grad_norm": 10.18405532836914, "learning_rate": 1.842356846473029e-05, "loss": 0.991, "step": 4754 }, { "epoch": 3.9460580912863072, "grad_norm": 11.270330429077148, "learning_rate": 1.8423236514522824e-05, "loss": 1.32, "step": 4755 }, { "epoch": 3.9468879668049794, "grad_norm": 14.97566032409668, "learning_rate": 1.8422904564315352e-05, "loss": 1.5467, "step": 4756 }, { "epoch": 3.9477178423236516, "grad_norm": 9.076957702636719, "learning_rate": 1.8422572614107884e-05, "loss": 1.4984, "step": 4757 }, { "epoch": 3.948547717842324, "grad_norm": 12.487448692321777, "learning_rate": 1.8422240663900417e-05, "loss": 1.633, "step": 4758 }, { "epoch": 3.949377593360996, "grad_norm": 13.48928165435791, "learning_rate": 1.842190871369295e-05, "loss": 1.5396, "step": 4759 }, { "epoch": 3.9502074688796682, "grad_norm": 14.284558296203613, "learning_rate": 1.8421576763485477e-05, "loss": 1.9911, "step": 4760 }, { "epoch": 3.9510373443983404, "grad_norm": 15.99367904663086, "learning_rate": 1.842124481327801e-05, "loss": 1.1906, "step": 4761 }, { "epoch": 3.9518672199170126, "grad_norm": 11.570676803588867, "learning_rate": 1.842091286307054e-05, "loss": 1.1061, "step": 4762 }, { "epoch": 3.952697095435685, "grad_norm": 21.51803207397461, "learning_rate": 1.8420580912863074e-05, "loss": 1.3916, "step": 4763 }, { "epoch": 3.953526970954357, "grad_norm": 14.931929588317871, "learning_rate": 1.8420248962655602e-05, "loss": 1.9368, "step": 4764 }, { "epoch": 3.954356846473029, "grad_norm": 10.537493705749512, "learning_rate": 1.8419917012448135e-05, "loss": 1.4563, "step": 4765 }, { "epoch": 3.9551867219917014, "grad_norm": 8.569028854370117, "learning_rate": 1.8419585062240667e-05, "loss": 0.8579, "step": 4766 }, { "epoch": 3.9560165975103736, "grad_norm": 21.72835922241211, "learning_rate": 1.84192531120332e-05, "loss": 1.9333, "step": 4767 }, { "epoch": 3.956846473029046, "grad_norm": 23.34404754638672, "learning_rate": 1.8418921161825727e-05, "loss": 1.2964, "step": 4768 }, { "epoch": 3.957676348547718, "grad_norm": 21.38665008544922, "learning_rate": 1.841858921161826e-05, "loss": 1.1308, "step": 4769 }, { "epoch": 3.95850622406639, "grad_norm": 16.442869186401367, "learning_rate": 1.841825726141079e-05, "loss": 2.555, "step": 4770 }, { "epoch": 3.9593360995850624, "grad_norm": 13.00115966796875, "learning_rate": 1.841792531120332e-05, "loss": 1.2966, "step": 4771 }, { "epoch": 3.9601659751037346, "grad_norm": 23.273889541625977, "learning_rate": 1.8417593360995853e-05, "loss": 2.232, "step": 4772 }, { "epoch": 3.9609958506224068, "grad_norm": 11.692388534545898, "learning_rate": 1.841726141078838e-05, "loss": 1.2204, "step": 4773 }, { "epoch": 3.961825726141079, "grad_norm": 19.59782600402832, "learning_rate": 1.8416929460580913e-05, "loss": 1.7894, "step": 4774 }, { "epoch": 3.962655601659751, "grad_norm": 14.472220420837402, "learning_rate": 1.8416597510373445e-05, "loss": 1.6289, "step": 4775 }, { "epoch": 3.9634854771784234, "grad_norm": 12.817424774169922, "learning_rate": 1.8416265560165974e-05, "loss": 0.9801, "step": 4776 }, { "epoch": 3.9643153526970956, "grad_norm": 25.88071632385254, "learning_rate": 1.8415933609958506e-05, "loss": 1.8585, "step": 4777 }, { "epoch": 3.9651452282157678, "grad_norm": 14.029111862182617, "learning_rate": 1.841560165975104e-05, "loss": 0.7648, "step": 4778 }, { "epoch": 3.96597510373444, "grad_norm": 18.58186912536621, "learning_rate": 1.841526970954357e-05, "loss": 2.0712, "step": 4779 }, { "epoch": 3.966804979253112, "grad_norm": 11.396181106567383, "learning_rate": 1.84149377593361e-05, "loss": 0.9894, "step": 4780 }, { "epoch": 3.9676348547717843, "grad_norm": 8.2029447555542, "learning_rate": 1.841460580912863e-05, "loss": 1.013, "step": 4781 }, { "epoch": 3.9684647302904565, "grad_norm": 19.722335815429688, "learning_rate": 1.8414273858921163e-05, "loss": 0.8944, "step": 4782 }, { "epoch": 3.9692946058091287, "grad_norm": 15.023296356201172, "learning_rate": 1.8413941908713696e-05, "loss": 1.3947, "step": 4783 }, { "epoch": 3.970124481327801, "grad_norm": 13.247252464294434, "learning_rate": 1.8413609958506228e-05, "loss": 1.6742, "step": 4784 }, { "epoch": 3.970954356846473, "grad_norm": 10.403339385986328, "learning_rate": 1.8413278008298756e-05, "loss": 1.2601, "step": 4785 }, { "epoch": 3.9717842323651453, "grad_norm": 12.254374504089355, "learning_rate": 1.841294605809129e-05, "loss": 1.1968, "step": 4786 }, { "epoch": 3.9726141078838175, "grad_norm": 17.26634979248047, "learning_rate": 1.841261410788382e-05, "loss": 1.7657, "step": 4787 }, { "epoch": 3.9734439834024897, "grad_norm": 14.378070831298828, "learning_rate": 1.8412282157676353e-05, "loss": 1.2919, "step": 4788 }, { "epoch": 3.974273858921162, "grad_norm": 13.340736389160156, "learning_rate": 1.841195020746888e-05, "loss": 2.0541, "step": 4789 }, { "epoch": 3.975103734439834, "grad_norm": 15.275132179260254, "learning_rate": 1.8411618257261414e-05, "loss": 1.4514, "step": 4790 }, { "epoch": 3.9759336099585063, "grad_norm": 13.867713928222656, "learning_rate": 1.8411286307053942e-05, "loss": 2.0079, "step": 4791 }, { "epoch": 3.9767634854771785, "grad_norm": 11.37132740020752, "learning_rate": 1.8410954356846474e-05, "loss": 1.5242, "step": 4792 }, { "epoch": 3.9775933609958507, "grad_norm": 6.026978969573975, "learning_rate": 1.8410622406639006e-05, "loss": 0.7308, "step": 4793 }, { "epoch": 3.978423236514523, "grad_norm": 13.496237754821777, "learning_rate": 1.8410290456431535e-05, "loss": 1.6224, "step": 4794 }, { "epoch": 3.979253112033195, "grad_norm": 9.386000633239746, "learning_rate": 1.8409958506224067e-05, "loss": 1.6129, "step": 4795 }, { "epoch": 3.9800829875518673, "grad_norm": 15.344732284545898, "learning_rate": 1.84096265560166e-05, "loss": 1.3713, "step": 4796 }, { "epoch": 3.9809128630705395, "grad_norm": 9.901336669921875, "learning_rate": 1.8409294605809128e-05, "loss": 1.204, "step": 4797 }, { "epoch": 3.9817427385892117, "grad_norm": 9.51058292388916, "learning_rate": 1.840896265560166e-05, "loss": 1.0958, "step": 4798 }, { "epoch": 3.982572614107884, "grad_norm": 11.184589385986328, "learning_rate": 1.8408630705394192e-05, "loss": 1.4806, "step": 4799 }, { "epoch": 3.983402489626556, "grad_norm": 8.525703430175781, "learning_rate": 1.8408298755186724e-05, "loss": 0.7305, "step": 4800 }, { "epoch": 3.9842323651452283, "grad_norm": 11.709962844848633, "learning_rate": 1.8407966804979253e-05, "loss": 1.2614, "step": 4801 }, { "epoch": 3.9850622406639005, "grad_norm": 16.65069580078125, "learning_rate": 1.8407634854771785e-05, "loss": 1.3141, "step": 4802 }, { "epoch": 3.9858921161825727, "grad_norm": 12.286311149597168, "learning_rate": 1.8407302904564317e-05, "loss": 1.4162, "step": 4803 }, { "epoch": 3.986721991701245, "grad_norm": 12.364822387695312, "learning_rate": 1.840697095435685e-05, "loss": 1.3386, "step": 4804 }, { "epoch": 3.987551867219917, "grad_norm": 11.970717430114746, "learning_rate": 1.8406639004149378e-05, "loss": 1.6562, "step": 4805 }, { "epoch": 3.9883817427385893, "grad_norm": 12.147722244262695, "learning_rate": 1.840630705394191e-05, "loss": 1.397, "step": 4806 }, { "epoch": 3.9892116182572614, "grad_norm": 16.995059967041016, "learning_rate": 1.8405975103734442e-05, "loss": 1.3224, "step": 4807 }, { "epoch": 3.9900414937759336, "grad_norm": 17.8424129486084, "learning_rate": 1.8405643153526975e-05, "loss": 1.8004, "step": 4808 }, { "epoch": 3.990871369294606, "grad_norm": 14.385451316833496, "learning_rate": 1.8405311203319503e-05, "loss": 1.349, "step": 4809 }, { "epoch": 3.991701244813278, "grad_norm": 16.092391967773438, "learning_rate": 1.8404979253112035e-05, "loss": 1.5627, "step": 4810 }, { "epoch": 3.9925311203319502, "grad_norm": 11.275274276733398, "learning_rate": 1.8404647302904567e-05, "loss": 1.4928, "step": 4811 }, { "epoch": 3.9933609958506224, "grad_norm": 8.703329086303711, "learning_rate": 1.8404315352697096e-05, "loss": 0.9868, "step": 4812 }, { "epoch": 3.9941908713692946, "grad_norm": 13.396111488342285, "learning_rate": 1.8403983402489628e-05, "loss": 1.6156, "step": 4813 }, { "epoch": 3.995020746887967, "grad_norm": 17.114273071289062, "learning_rate": 1.8403651452282157e-05, "loss": 1.6989, "step": 4814 }, { "epoch": 3.995850622406639, "grad_norm": 14.408864974975586, "learning_rate": 1.840331950207469e-05, "loss": 1.4222, "step": 4815 }, { "epoch": 3.996680497925311, "grad_norm": 8.555814743041992, "learning_rate": 1.840298755186722e-05, "loss": 0.9317, "step": 4816 }, { "epoch": 3.9975103734439834, "grad_norm": 13.220963478088379, "learning_rate": 1.8402655601659753e-05, "loss": 0.9866, "step": 4817 }, { "epoch": 3.9983402489626556, "grad_norm": 17.100276947021484, "learning_rate": 1.8402323651452282e-05, "loss": 1.1189, "step": 4818 }, { "epoch": 3.999170124481328, "grad_norm": 10.966687202453613, "learning_rate": 1.8401991701244814e-05, "loss": 1.7561, "step": 4819 }, { "epoch": 4.0, "grad_norm": 14.227295875549316, "learning_rate": 1.8401659751037346e-05, "loss": 1.7123, "step": 4820 }, { "epoch": 4.000829875518672, "grad_norm": 12.983024597167969, "learning_rate": 1.840132780082988e-05, "loss": 1.6203, "step": 4821 }, { "epoch": 4.001659751037344, "grad_norm": 8.782751083374023, "learning_rate": 1.8400995850622407e-05, "loss": 0.9966, "step": 4822 }, { "epoch": 4.002489626556017, "grad_norm": 10.475540161132812, "learning_rate": 1.840066390041494e-05, "loss": 1.0493, "step": 4823 }, { "epoch": 4.003319502074689, "grad_norm": 10.72029972076416, "learning_rate": 1.840033195020747e-05, "loss": 1.2117, "step": 4824 }, { "epoch": 4.004149377593361, "grad_norm": 12.69178295135498, "learning_rate": 1.8400000000000003e-05, "loss": 1.7862, "step": 4825 }, { "epoch": 4.004979253112033, "grad_norm": 13.536027908325195, "learning_rate": 1.8399668049792532e-05, "loss": 1.1336, "step": 4826 }, { "epoch": 4.005809128630705, "grad_norm": 12.841397285461426, "learning_rate": 1.8399336099585064e-05, "loss": 0.7519, "step": 4827 }, { "epoch": 4.006639004149378, "grad_norm": 13.026637077331543, "learning_rate": 1.8399004149377596e-05, "loss": 1.1213, "step": 4828 }, { "epoch": 4.00746887966805, "grad_norm": 10.534645080566406, "learning_rate": 1.8398672199170125e-05, "loss": 1.3375, "step": 4829 }, { "epoch": 4.008298755186722, "grad_norm": 13.177151679992676, "learning_rate": 1.8398340248962657e-05, "loss": 1.4532, "step": 4830 }, { "epoch": 4.009128630705394, "grad_norm": 10.316591262817383, "learning_rate": 1.839800829875519e-05, "loss": 1.336, "step": 4831 }, { "epoch": 4.009958506224066, "grad_norm": 15.937275886535645, "learning_rate": 1.8397676348547718e-05, "loss": 0.9938, "step": 4832 }, { "epoch": 4.0107883817427386, "grad_norm": 11.210840225219727, "learning_rate": 1.839734439834025e-05, "loss": 1.1687, "step": 4833 }, { "epoch": 4.011618257261411, "grad_norm": 15.270161628723145, "learning_rate": 1.8397012448132782e-05, "loss": 1.2274, "step": 4834 }, { "epoch": 4.012448132780083, "grad_norm": 10.687252044677734, "learning_rate": 1.839668049792531e-05, "loss": 1.1658, "step": 4835 }, { "epoch": 4.013278008298755, "grad_norm": 16.006704330444336, "learning_rate": 1.8396348547717843e-05, "loss": 1.2309, "step": 4836 }, { "epoch": 4.014107883817427, "grad_norm": 10.244546890258789, "learning_rate": 1.8396016597510375e-05, "loss": 1.0289, "step": 4837 }, { "epoch": 4.0149377593360995, "grad_norm": 13.828387260437012, "learning_rate": 1.8395684647302907e-05, "loss": 1.3613, "step": 4838 }, { "epoch": 4.015767634854772, "grad_norm": 11.761421203613281, "learning_rate": 1.8395352697095436e-05, "loss": 1.0264, "step": 4839 }, { "epoch": 4.016597510373444, "grad_norm": 31.6379451751709, "learning_rate": 1.8395020746887968e-05, "loss": 1.354, "step": 4840 }, { "epoch": 4.017427385892116, "grad_norm": 15.64604663848877, "learning_rate": 1.83946887966805e-05, "loss": 1.5393, "step": 4841 }, { "epoch": 4.018257261410788, "grad_norm": 14.58024787902832, "learning_rate": 1.8394356846473032e-05, "loss": 1.0189, "step": 4842 }, { "epoch": 4.0190871369294605, "grad_norm": 10.018889427185059, "learning_rate": 1.839402489626556e-05, "loss": 1.1628, "step": 4843 }, { "epoch": 4.019917012448133, "grad_norm": 19.14592933654785, "learning_rate": 1.8393692946058093e-05, "loss": 1.324, "step": 4844 }, { "epoch": 4.020746887966805, "grad_norm": 8.109804153442383, "learning_rate": 1.8393360995850625e-05, "loss": 0.938, "step": 4845 }, { "epoch": 4.021576763485477, "grad_norm": 11.726329803466797, "learning_rate": 1.8393029045643157e-05, "loss": 1.4319, "step": 4846 }, { "epoch": 4.022406639004149, "grad_norm": 15.50085163116455, "learning_rate": 1.8392697095435686e-05, "loss": 2.1464, "step": 4847 }, { "epoch": 4.0232365145228215, "grad_norm": 16.052005767822266, "learning_rate": 1.8392365145228218e-05, "loss": 1.2881, "step": 4848 }, { "epoch": 4.024066390041494, "grad_norm": 10.762858390808105, "learning_rate": 1.839203319502075e-05, "loss": 1.1072, "step": 4849 }, { "epoch": 4.024896265560166, "grad_norm": 14.151637077331543, "learning_rate": 1.839170124481328e-05, "loss": 1.3418, "step": 4850 }, { "epoch": 4.025726141078838, "grad_norm": 13.699653625488281, "learning_rate": 1.839136929460581e-05, "loss": 1.4799, "step": 4851 }, { "epoch": 4.02655601659751, "grad_norm": 10.27891731262207, "learning_rate": 1.839103734439834e-05, "loss": 1.026, "step": 4852 }, { "epoch": 4.0273858921161825, "grad_norm": 14.170805931091309, "learning_rate": 1.8390705394190872e-05, "loss": 1.6101, "step": 4853 }, { "epoch": 4.028215767634855, "grad_norm": 15.536898612976074, "learning_rate": 1.8390373443983404e-05, "loss": 1.289, "step": 4854 }, { "epoch": 4.029045643153527, "grad_norm": 13.592977523803711, "learning_rate": 1.8390041493775933e-05, "loss": 1.7959, "step": 4855 }, { "epoch": 4.029875518672199, "grad_norm": 14.815842628479004, "learning_rate": 1.8389709543568465e-05, "loss": 0.8859, "step": 4856 }, { "epoch": 4.030705394190871, "grad_norm": 12.383862495422363, "learning_rate": 1.8389377593360997e-05, "loss": 1.5274, "step": 4857 }, { "epoch": 4.0315352697095435, "grad_norm": 14.595527648925781, "learning_rate": 1.838904564315353e-05, "loss": 1.0546, "step": 4858 }, { "epoch": 4.032365145228216, "grad_norm": 22.751602172851562, "learning_rate": 1.8388713692946058e-05, "loss": 1.6938, "step": 4859 }, { "epoch": 4.033195020746888, "grad_norm": 15.306804656982422, "learning_rate": 1.838838174273859e-05, "loss": 1.3333, "step": 4860 }, { "epoch": 4.03402489626556, "grad_norm": 9.897120475769043, "learning_rate": 1.8388049792531122e-05, "loss": 1.2923, "step": 4861 }, { "epoch": 4.034854771784232, "grad_norm": 7.910459518432617, "learning_rate": 1.8387717842323654e-05, "loss": 0.7294, "step": 4862 }, { "epoch": 4.035684647302904, "grad_norm": 11.617247581481934, "learning_rate": 1.8387385892116183e-05, "loss": 1.3678, "step": 4863 }, { "epoch": 4.036514522821577, "grad_norm": 11.877718925476074, "learning_rate": 1.8387053941908715e-05, "loss": 1.143, "step": 4864 }, { "epoch": 4.037344398340249, "grad_norm": 14.408759117126465, "learning_rate": 1.8386721991701247e-05, "loss": 1.606, "step": 4865 }, { "epoch": 4.038174273858921, "grad_norm": 9.828580856323242, "learning_rate": 1.838639004149378e-05, "loss": 0.624, "step": 4866 }, { "epoch": 4.039004149377593, "grad_norm": 11.65112018585205, "learning_rate": 1.838605809128631e-05, "loss": 1.5656, "step": 4867 }, { "epoch": 4.039834024896265, "grad_norm": 13.020928382873535, "learning_rate": 1.838572614107884e-05, "loss": 1.3361, "step": 4868 }, { "epoch": 4.040663900414938, "grad_norm": 13.384485244750977, "learning_rate": 1.8385394190871372e-05, "loss": 1.6135, "step": 4869 }, { "epoch": 4.04149377593361, "grad_norm": 10.27296257019043, "learning_rate": 1.83850622406639e-05, "loss": 1.0254, "step": 4870 }, { "epoch": 4.042323651452282, "grad_norm": 11.534634590148926, "learning_rate": 1.8384730290456433e-05, "loss": 1.1506, "step": 4871 }, { "epoch": 4.043153526970954, "grad_norm": 9.680898666381836, "learning_rate": 1.8384398340248965e-05, "loss": 0.9769, "step": 4872 }, { "epoch": 4.043983402489626, "grad_norm": 13.5412015914917, "learning_rate": 1.8384066390041494e-05, "loss": 1.9397, "step": 4873 }, { "epoch": 4.044813278008299, "grad_norm": 9.57105827331543, "learning_rate": 1.8383734439834026e-05, "loss": 0.9543, "step": 4874 }, { "epoch": 4.045643153526971, "grad_norm": 15.917527198791504, "learning_rate": 1.8383402489626558e-05, "loss": 1.11, "step": 4875 }, { "epoch": 4.046473029045643, "grad_norm": 11.351470947265625, "learning_rate": 1.8383070539419087e-05, "loss": 1.0212, "step": 4876 }, { "epoch": 4.047302904564315, "grad_norm": 12.221122741699219, "learning_rate": 1.838273858921162e-05, "loss": 1.8133, "step": 4877 }, { "epoch": 4.048132780082987, "grad_norm": 9.789039611816406, "learning_rate": 1.838240663900415e-05, "loss": 1.4854, "step": 4878 }, { "epoch": 4.04896265560166, "grad_norm": 13.015995025634766, "learning_rate": 1.8382074688796683e-05, "loss": 1.54, "step": 4879 }, { "epoch": 4.049792531120332, "grad_norm": 18.406269073486328, "learning_rate": 1.8381742738589212e-05, "loss": 2.1419, "step": 4880 }, { "epoch": 4.050622406639004, "grad_norm": 14.896153450012207, "learning_rate": 1.8381410788381744e-05, "loss": 1.1959, "step": 4881 }, { "epoch": 4.051452282157676, "grad_norm": 12.805060386657715, "learning_rate": 1.8381078838174276e-05, "loss": 1.3091, "step": 4882 }, { "epoch": 4.052282157676348, "grad_norm": 14.954431533813477, "learning_rate": 1.8380746887966808e-05, "loss": 1.5139, "step": 4883 }, { "epoch": 4.053112033195021, "grad_norm": 11.85341739654541, "learning_rate": 1.8380414937759337e-05, "loss": 1.4039, "step": 4884 }, { "epoch": 4.053941908713693, "grad_norm": 9.860529899597168, "learning_rate": 1.838008298755187e-05, "loss": 1.0207, "step": 4885 }, { "epoch": 4.054771784232365, "grad_norm": 13.914158821105957, "learning_rate": 1.83797510373444e-05, "loss": 1.2575, "step": 4886 }, { "epoch": 4.055601659751037, "grad_norm": 9.456094741821289, "learning_rate": 1.8379419087136933e-05, "loss": 1.1158, "step": 4887 }, { "epoch": 4.056431535269709, "grad_norm": 16.861160278320312, "learning_rate": 1.8379087136929462e-05, "loss": 0.9557, "step": 4888 }, { "epoch": 4.0572614107883815, "grad_norm": 13.69189739227295, "learning_rate": 1.8378755186721994e-05, "loss": 2.0699, "step": 4889 }, { "epoch": 4.058091286307054, "grad_norm": 8.86284351348877, "learning_rate": 1.8378423236514523e-05, "loss": 1.3378, "step": 4890 }, { "epoch": 4.058921161825726, "grad_norm": 12.908047676086426, "learning_rate": 1.8378091286307055e-05, "loss": 1.2014, "step": 4891 }, { "epoch": 4.059751037344398, "grad_norm": 12.540960311889648, "learning_rate": 1.8377759336099587e-05, "loss": 1.7803, "step": 4892 }, { "epoch": 4.06058091286307, "grad_norm": 14.328608512878418, "learning_rate": 1.8377427385892116e-05, "loss": 1.1782, "step": 4893 }, { "epoch": 4.0614107883817425, "grad_norm": 13.21591854095459, "learning_rate": 1.8377095435684648e-05, "loss": 1.0279, "step": 4894 }, { "epoch": 4.062240663900415, "grad_norm": 11.005860328674316, "learning_rate": 1.837676348547718e-05, "loss": 1.1973, "step": 4895 }, { "epoch": 4.063070539419087, "grad_norm": 9.682038307189941, "learning_rate": 1.8376431535269712e-05, "loss": 1.3567, "step": 4896 }, { "epoch": 4.063900414937759, "grad_norm": 10.405766487121582, "learning_rate": 1.837609958506224e-05, "loss": 1.3939, "step": 4897 }, { "epoch": 4.064730290456431, "grad_norm": 15.177417755126953, "learning_rate": 1.8375767634854773e-05, "loss": 1.7637, "step": 4898 }, { "epoch": 4.0655601659751035, "grad_norm": 17.671119689941406, "learning_rate": 1.8375435684647305e-05, "loss": 1.6088, "step": 4899 }, { "epoch": 4.066390041493776, "grad_norm": 13.451322555541992, "learning_rate": 1.8375103734439837e-05, "loss": 1.0006, "step": 4900 }, { "epoch": 4.067219917012448, "grad_norm": 18.73296356201172, "learning_rate": 1.8374771784232366e-05, "loss": 2.0058, "step": 4901 }, { "epoch": 4.06804979253112, "grad_norm": 9.383795738220215, "learning_rate": 1.8374439834024898e-05, "loss": 0.9371, "step": 4902 }, { "epoch": 4.068879668049792, "grad_norm": 15.50122356414795, "learning_rate": 1.837410788381743e-05, "loss": 1.6325, "step": 4903 }, { "epoch": 4.0697095435684645, "grad_norm": 16.54717445373535, "learning_rate": 1.8373775933609962e-05, "loss": 0.8383, "step": 4904 }, { "epoch": 4.070539419087137, "grad_norm": 7.092892169952393, "learning_rate": 1.837344398340249e-05, "loss": 1.0615, "step": 4905 }, { "epoch": 4.071369294605809, "grad_norm": 9.067404747009277, "learning_rate": 1.8373112033195023e-05, "loss": 1.3388, "step": 4906 }, { "epoch": 4.072199170124481, "grad_norm": 10.55510139465332, "learning_rate": 1.8372780082987555e-05, "loss": 1.1271, "step": 4907 }, { "epoch": 4.073029045643153, "grad_norm": 16.34210205078125, "learning_rate": 1.8372448132780084e-05, "loss": 1.9585, "step": 4908 }, { "epoch": 4.0738589211618255, "grad_norm": 15.699533462524414, "learning_rate": 1.8372116182572616e-05, "loss": 2.2336, "step": 4909 }, { "epoch": 4.074688796680498, "grad_norm": 13.354754447937012, "learning_rate": 1.8371784232365148e-05, "loss": 1.5614, "step": 4910 }, { "epoch": 4.07551867219917, "grad_norm": 9.27721118927002, "learning_rate": 1.8371452282157677e-05, "loss": 0.5288, "step": 4911 }, { "epoch": 4.076348547717842, "grad_norm": 13.36586856842041, "learning_rate": 1.837112033195021e-05, "loss": 1.5671, "step": 4912 }, { "epoch": 4.077178423236514, "grad_norm": 11.146671295166016, "learning_rate": 1.8370788381742737e-05, "loss": 1.1764, "step": 4913 }, { "epoch": 4.0780082987551864, "grad_norm": 11.177574157714844, "learning_rate": 1.837045643153527e-05, "loss": 1.671, "step": 4914 }, { "epoch": 4.078838174273859, "grad_norm": 10.557908058166504, "learning_rate": 1.83701244813278e-05, "loss": 1.3191, "step": 4915 }, { "epoch": 4.079668049792531, "grad_norm": 18.26020050048828, "learning_rate": 1.8369792531120334e-05, "loss": 1.6101, "step": 4916 }, { "epoch": 4.080497925311203, "grad_norm": 17.315088272094727, "learning_rate": 1.8369460580912866e-05, "loss": 1.4731, "step": 4917 }, { "epoch": 4.081327800829875, "grad_norm": 9.659344673156738, "learning_rate": 1.8369128630705395e-05, "loss": 1.1472, "step": 4918 }, { "epoch": 4.082157676348547, "grad_norm": 9.415677070617676, "learning_rate": 1.8368796680497927e-05, "loss": 1.1242, "step": 4919 }, { "epoch": 4.08298755186722, "grad_norm": 11.497020721435547, "learning_rate": 1.836846473029046e-05, "loss": 1.7084, "step": 4920 }, { "epoch": 4.083817427385892, "grad_norm": 12.386226654052734, "learning_rate": 1.836813278008299e-05, "loss": 1.6002, "step": 4921 }, { "epoch": 4.084647302904564, "grad_norm": 19.887001037597656, "learning_rate": 1.836780082987552e-05, "loss": 1.9832, "step": 4922 }, { "epoch": 4.085477178423236, "grad_norm": 13.458765029907227, "learning_rate": 1.836746887966805e-05, "loss": 1.8513, "step": 4923 }, { "epoch": 4.086307053941908, "grad_norm": 12.979419708251953, "learning_rate": 1.8367136929460584e-05, "loss": 1.0625, "step": 4924 }, { "epoch": 4.087136929460581, "grad_norm": 12.30837631225586, "learning_rate": 1.8366804979253116e-05, "loss": 0.952, "step": 4925 }, { "epoch": 4.087966804979253, "grad_norm": 10.441951751708984, "learning_rate": 1.8366473029045645e-05, "loss": 0.8925, "step": 4926 }, { "epoch": 4.088796680497925, "grad_norm": 8.477840423583984, "learning_rate": 1.8366141078838177e-05, "loss": 1.4379, "step": 4927 }, { "epoch": 4.089626556016597, "grad_norm": 9.099698066711426, "learning_rate": 1.836580912863071e-05, "loss": 1.3568, "step": 4928 }, { "epoch": 4.090456431535269, "grad_norm": 9.049991607666016, "learning_rate": 1.8365477178423238e-05, "loss": 1.0011, "step": 4929 }, { "epoch": 4.091286307053942, "grad_norm": 10.591012954711914, "learning_rate": 1.836514522821577e-05, "loss": 1.1743, "step": 4930 }, { "epoch": 4.092116182572614, "grad_norm": 11.74271011352539, "learning_rate": 1.83648132780083e-05, "loss": 1.3316, "step": 4931 }, { "epoch": 4.092946058091286, "grad_norm": 19.183513641357422, "learning_rate": 1.836448132780083e-05, "loss": 2.1258, "step": 4932 }, { "epoch": 4.093775933609958, "grad_norm": 13.207598686218262, "learning_rate": 1.8364149377593363e-05, "loss": 1.4352, "step": 4933 }, { "epoch": 4.09460580912863, "grad_norm": 10.893078804016113, "learning_rate": 1.836381742738589e-05, "loss": 1.7436, "step": 4934 }, { "epoch": 4.095435684647303, "grad_norm": 12.17190170288086, "learning_rate": 1.8363485477178423e-05, "loss": 1.4362, "step": 4935 }, { "epoch": 4.096265560165975, "grad_norm": 14.158878326416016, "learning_rate": 1.8363153526970956e-05, "loss": 1.548, "step": 4936 }, { "epoch": 4.097095435684647, "grad_norm": 15.6825590133667, "learning_rate": 1.8362821576763488e-05, "loss": 2.1001, "step": 4937 }, { "epoch": 4.097925311203319, "grad_norm": 11.047364234924316, "learning_rate": 1.8362489626556016e-05, "loss": 0.7422, "step": 4938 }, { "epoch": 4.098755186721991, "grad_norm": 9.690177917480469, "learning_rate": 1.836215767634855e-05, "loss": 1.2474, "step": 4939 }, { "epoch": 4.0995850622406635, "grad_norm": 19.32131004333496, "learning_rate": 1.836182572614108e-05, "loss": 1.8211, "step": 4940 }, { "epoch": 4.100414937759336, "grad_norm": 10.873930931091309, "learning_rate": 1.8361493775933613e-05, "loss": 0.7421, "step": 4941 }, { "epoch": 4.101244813278008, "grad_norm": 13.097393035888672, "learning_rate": 1.836116182572614e-05, "loss": 1.6103, "step": 4942 }, { "epoch": 4.10207468879668, "grad_norm": 12.563607215881348, "learning_rate": 1.8360829875518674e-05, "loss": 1.6756, "step": 4943 }, { "epoch": 4.102904564315352, "grad_norm": 12.256402015686035, "learning_rate": 1.8360497925311206e-05, "loss": 1.0374, "step": 4944 }, { "epoch": 4.1037344398340245, "grad_norm": 12.840080261230469, "learning_rate": 1.8360165975103738e-05, "loss": 1.581, "step": 4945 }, { "epoch": 4.104564315352697, "grad_norm": 22.18114471435547, "learning_rate": 1.8359834024896266e-05, "loss": 1.0288, "step": 4946 }, { "epoch": 4.105394190871369, "grad_norm": 12.211222648620605, "learning_rate": 1.83595020746888e-05, "loss": 0.9964, "step": 4947 }, { "epoch": 4.106224066390041, "grad_norm": 12.660571098327637, "learning_rate": 1.835917012448133e-05, "loss": 1.8528, "step": 4948 }, { "epoch": 4.107053941908713, "grad_norm": 13.666690826416016, "learning_rate": 1.835883817427386e-05, "loss": 1.5605, "step": 4949 }, { "epoch": 4.1078838174273855, "grad_norm": 11.608672142028809, "learning_rate": 1.835850622406639e-05, "loss": 1.7192, "step": 4950 }, { "epoch": 4.108713692946058, "grad_norm": 12.873458862304688, "learning_rate": 1.8358174273858924e-05, "loss": 1.0865, "step": 4951 }, { "epoch": 4.10954356846473, "grad_norm": 10.42817497253418, "learning_rate": 1.8357842323651452e-05, "loss": 1.1283, "step": 4952 }, { "epoch": 4.110373443983402, "grad_norm": 13.536055564880371, "learning_rate": 1.8357510373443984e-05, "loss": 0.9058, "step": 4953 }, { "epoch": 4.111203319502074, "grad_norm": 13.097993850708008, "learning_rate": 1.8357178423236517e-05, "loss": 1.4058, "step": 4954 }, { "epoch": 4.1120331950207465, "grad_norm": 12.497723579406738, "learning_rate": 1.8356846473029045e-05, "loss": 1.2461, "step": 4955 }, { "epoch": 4.112863070539419, "grad_norm": 13.871450424194336, "learning_rate": 1.8356514522821577e-05, "loss": 1.1198, "step": 4956 }, { "epoch": 4.113692946058091, "grad_norm": 15.124959945678711, "learning_rate": 1.835618257261411e-05, "loss": 1.4424, "step": 4957 }, { "epoch": 4.114522821576763, "grad_norm": 9.509029388427734, "learning_rate": 1.835585062240664e-05, "loss": 1.1601, "step": 4958 }, { "epoch": 4.115352697095435, "grad_norm": 15.638204574584961, "learning_rate": 1.835551867219917e-05, "loss": 1.6385, "step": 4959 }, { "epoch": 4.1161825726141075, "grad_norm": 13.4893217086792, "learning_rate": 1.8355186721991702e-05, "loss": 1.3753, "step": 4960 }, { "epoch": 4.11701244813278, "grad_norm": 18.459362030029297, "learning_rate": 1.8354854771784235e-05, "loss": 2.2286, "step": 4961 }, { "epoch": 4.117842323651452, "grad_norm": 10.067466735839844, "learning_rate": 1.8354522821576767e-05, "loss": 1.1614, "step": 4962 }, { "epoch": 4.118672199170124, "grad_norm": 9.605265617370605, "learning_rate": 1.8354190871369295e-05, "loss": 1.4241, "step": 4963 }, { "epoch": 4.119502074688796, "grad_norm": 10.289458274841309, "learning_rate": 1.8353858921161827e-05, "loss": 0.8047, "step": 4964 }, { "epoch": 4.1203319502074685, "grad_norm": 14.967679977416992, "learning_rate": 1.835352697095436e-05, "loss": 1.0306, "step": 4965 }, { "epoch": 4.121161825726141, "grad_norm": 13.363367080688477, "learning_rate": 1.835319502074689e-05, "loss": 1.2582, "step": 4966 }, { "epoch": 4.121991701244813, "grad_norm": 15.022610664367676, "learning_rate": 1.835286307053942e-05, "loss": 0.6819, "step": 4967 }, { "epoch": 4.122821576763485, "grad_norm": 20.789142608642578, "learning_rate": 1.8352531120331952e-05, "loss": 1.444, "step": 4968 }, { "epoch": 4.123651452282157, "grad_norm": 16.88640785217285, "learning_rate": 1.835219917012448e-05, "loss": 1.8901, "step": 4969 }, { "epoch": 4.124481327800829, "grad_norm": 14.970269203186035, "learning_rate": 1.8351867219917013e-05, "loss": 1.2138, "step": 4970 }, { "epoch": 4.125311203319502, "grad_norm": 15.409701347351074, "learning_rate": 1.8351535269709545e-05, "loss": 1.0365, "step": 4971 }, { "epoch": 4.126141078838174, "grad_norm": 9.028555870056152, "learning_rate": 1.8351203319502074e-05, "loss": 1.0563, "step": 4972 }, { "epoch": 4.126970954356846, "grad_norm": 17.519182205200195, "learning_rate": 1.8350871369294606e-05, "loss": 1.0028, "step": 4973 }, { "epoch": 4.127800829875518, "grad_norm": 10.19582462310791, "learning_rate": 1.835053941908714e-05, "loss": 1.1314, "step": 4974 }, { "epoch": 4.12863070539419, "grad_norm": 12.405933380126953, "learning_rate": 1.835020746887967e-05, "loss": 1.5029, "step": 4975 }, { "epoch": 4.1294605809128635, "grad_norm": 13.832188606262207, "learning_rate": 1.83498755186722e-05, "loss": 1.4203, "step": 4976 }, { "epoch": 4.130290456431536, "grad_norm": 10.365145683288574, "learning_rate": 1.834954356846473e-05, "loss": 0.9539, "step": 4977 }, { "epoch": 4.131120331950208, "grad_norm": 19.261581420898438, "learning_rate": 1.8349211618257263e-05, "loss": 1.2322, "step": 4978 }, { "epoch": 4.13195020746888, "grad_norm": 12.0945405960083, "learning_rate": 1.8348879668049796e-05, "loss": 1.1993, "step": 4979 }, { "epoch": 4.132780082987552, "grad_norm": 14.705653190612793, "learning_rate": 1.8348547717842324e-05, "loss": 1.0411, "step": 4980 }, { "epoch": 4.1336099585062245, "grad_norm": 8.55501651763916, "learning_rate": 1.8348215767634856e-05, "loss": 0.9749, "step": 4981 }, { "epoch": 4.134439834024897, "grad_norm": 16.255605697631836, "learning_rate": 1.834788381742739e-05, "loss": 0.7348, "step": 4982 }, { "epoch": 4.135269709543569, "grad_norm": 14.188125610351562, "learning_rate": 1.834755186721992e-05, "loss": 1.4864, "step": 4983 }, { "epoch": 4.136099585062241, "grad_norm": 15.489665031433105, "learning_rate": 1.834721991701245e-05, "loss": 1.2485, "step": 4984 }, { "epoch": 4.136929460580913, "grad_norm": 15.017971992492676, "learning_rate": 1.834688796680498e-05, "loss": 1.2601, "step": 4985 }, { "epoch": 4.1377593360995855, "grad_norm": 22.722476959228516, "learning_rate": 1.8346556016597513e-05, "loss": 2.292, "step": 4986 }, { "epoch": 4.138589211618258, "grad_norm": 15.782623291015625, "learning_rate": 1.8346224066390042e-05, "loss": 1.3811, "step": 4987 }, { "epoch": 4.13941908713693, "grad_norm": 7.266226768493652, "learning_rate": 1.8345892116182574e-05, "loss": 0.6235, "step": 4988 }, { "epoch": 4.140248962655602, "grad_norm": 14.3782958984375, "learning_rate": 1.8345560165975106e-05, "loss": 1.3851, "step": 4989 }, { "epoch": 4.141078838174274, "grad_norm": 8.660089492797852, "learning_rate": 1.8345228215767635e-05, "loss": 0.6199, "step": 4990 }, { "epoch": 4.141908713692946, "grad_norm": 13.408466339111328, "learning_rate": 1.8344896265560167e-05, "loss": 1.0946, "step": 4991 }, { "epoch": 4.142738589211619, "grad_norm": 14.383794784545898, "learning_rate": 1.8344564315352696e-05, "loss": 1.3151, "step": 4992 }, { "epoch": 4.143568464730291, "grad_norm": 13.426956176757812, "learning_rate": 1.8344232365145228e-05, "loss": 1.6587, "step": 4993 }, { "epoch": 4.144398340248963, "grad_norm": 15.213772773742676, "learning_rate": 1.834390041493776e-05, "loss": 1.5774, "step": 4994 }, { "epoch": 4.145228215767635, "grad_norm": 14.373580932617188, "learning_rate": 1.8343568464730292e-05, "loss": 1.7969, "step": 4995 }, { "epoch": 4.146058091286307, "grad_norm": 8.420004844665527, "learning_rate": 1.8343236514522824e-05, "loss": 1.0624, "step": 4996 }, { "epoch": 4.14688796680498, "grad_norm": 15.263846397399902, "learning_rate": 1.8342904564315353e-05, "loss": 1.7712, "step": 4997 }, { "epoch": 4.147717842323652, "grad_norm": 12.045543670654297, "learning_rate": 1.8342572614107885e-05, "loss": 1.5181, "step": 4998 }, { "epoch": 4.148547717842324, "grad_norm": 14.638398170471191, "learning_rate": 1.8342240663900417e-05, "loss": 1.3792, "step": 4999 }, { "epoch": 4.149377593360996, "grad_norm": 16.602354049682617, "learning_rate": 1.834190871369295e-05, "loss": 1.2727, "step": 5000 }, { "epoch": 4.150207468879668, "grad_norm": 16.505821228027344, "learning_rate": 1.8341576763485478e-05, "loss": 1.253, "step": 5001 }, { "epoch": 4.151037344398341, "grad_norm": 15.924038887023926, "learning_rate": 1.834124481327801e-05, "loss": 1.1205, "step": 5002 }, { "epoch": 4.151867219917013, "grad_norm": 9.99044418334961, "learning_rate": 1.8340912863070542e-05, "loss": 0.8332, "step": 5003 }, { "epoch": 4.152697095435685, "grad_norm": 24.040761947631836, "learning_rate": 1.8340580912863074e-05, "loss": 2.8007, "step": 5004 }, { "epoch": 4.153526970954357, "grad_norm": 16.358657836914062, "learning_rate": 1.8340248962655603e-05, "loss": 2.0564, "step": 5005 }, { "epoch": 4.154356846473029, "grad_norm": 18.654376983642578, "learning_rate": 1.8339917012448135e-05, "loss": 1.5292, "step": 5006 }, { "epoch": 4.155186721991702, "grad_norm": 10.321094512939453, "learning_rate": 1.8339585062240664e-05, "loss": 1.36, "step": 5007 }, { "epoch": 4.156016597510374, "grad_norm": 12.381718635559082, "learning_rate": 1.8339253112033196e-05, "loss": 1.3312, "step": 5008 }, { "epoch": 4.156846473029046, "grad_norm": 15.013113975524902, "learning_rate": 1.8338921161825728e-05, "loss": 1.4509, "step": 5009 }, { "epoch": 4.157676348547718, "grad_norm": 20.871776580810547, "learning_rate": 1.8338589211618257e-05, "loss": 2.2912, "step": 5010 }, { "epoch": 4.15850622406639, "grad_norm": 8.688532829284668, "learning_rate": 1.833825726141079e-05, "loss": 0.9233, "step": 5011 }, { "epoch": 4.159336099585063, "grad_norm": 18.170209884643555, "learning_rate": 1.833792531120332e-05, "loss": 1.728, "step": 5012 }, { "epoch": 4.160165975103735, "grad_norm": 9.104578971862793, "learning_rate": 1.833759336099585e-05, "loss": 0.8109, "step": 5013 }, { "epoch": 4.160995850622407, "grad_norm": 18.47806167602539, "learning_rate": 1.8337261410788382e-05, "loss": 1.7129, "step": 5014 }, { "epoch": 4.161825726141079, "grad_norm": 12.992082595825195, "learning_rate": 1.8336929460580914e-05, "loss": 1.6756, "step": 5015 }, { "epoch": 4.162655601659751, "grad_norm": 14.241910934448242, "learning_rate": 1.8336597510373446e-05, "loss": 1.8184, "step": 5016 }, { "epoch": 4.1634854771784235, "grad_norm": 9.991686820983887, "learning_rate": 1.8336265560165975e-05, "loss": 1.0414, "step": 5017 }, { "epoch": 4.164315352697096, "grad_norm": 9.013056755065918, "learning_rate": 1.8335933609958507e-05, "loss": 1.4805, "step": 5018 }, { "epoch": 4.165145228215768, "grad_norm": 11.359001159667969, "learning_rate": 1.833560165975104e-05, "loss": 1.2418, "step": 5019 }, { "epoch": 4.16597510373444, "grad_norm": 15.42612361907959, "learning_rate": 1.833526970954357e-05, "loss": 1.8497, "step": 5020 }, { "epoch": 4.166804979253112, "grad_norm": 10.219301223754883, "learning_rate": 1.83349377593361e-05, "loss": 0.6391, "step": 5021 }, { "epoch": 4.1676348547717845, "grad_norm": 20.154438018798828, "learning_rate": 1.8334605809128632e-05, "loss": 1.1728, "step": 5022 }, { "epoch": 4.168464730290457, "grad_norm": 18.237346649169922, "learning_rate": 1.8334273858921164e-05, "loss": 1.9057, "step": 5023 }, { "epoch": 4.169294605809129, "grad_norm": 9.150467872619629, "learning_rate": 1.8333941908713696e-05, "loss": 0.9839, "step": 5024 }, { "epoch": 4.170124481327801, "grad_norm": 20.57369041442871, "learning_rate": 1.8333609958506225e-05, "loss": 1.8077, "step": 5025 }, { "epoch": 4.170954356846473, "grad_norm": 13.078910827636719, "learning_rate": 1.8333278008298757e-05, "loss": 1.6286, "step": 5026 }, { "epoch": 4.1717842323651455, "grad_norm": 20.73501968383789, "learning_rate": 1.833294605809129e-05, "loss": 2.1794, "step": 5027 }, { "epoch": 4.172614107883818, "grad_norm": 16.196399688720703, "learning_rate": 1.8332614107883818e-05, "loss": 1.4456, "step": 5028 }, { "epoch": 4.17344398340249, "grad_norm": 14.938542366027832, "learning_rate": 1.833228215767635e-05, "loss": 1.6582, "step": 5029 }, { "epoch": 4.174273858921162, "grad_norm": 11.1660795211792, "learning_rate": 1.833195020746888e-05, "loss": 1.0801, "step": 5030 }, { "epoch": 4.175103734439834, "grad_norm": 10.89555549621582, "learning_rate": 1.833161825726141e-05, "loss": 1.0397, "step": 5031 }, { "epoch": 4.1759336099585065, "grad_norm": 13.408668518066406, "learning_rate": 1.8331286307053943e-05, "loss": 1.3737, "step": 5032 }, { "epoch": 4.176763485477179, "grad_norm": 16.35780143737793, "learning_rate": 1.8330954356846475e-05, "loss": 1.1012, "step": 5033 }, { "epoch": 4.177593360995851, "grad_norm": 15.434636116027832, "learning_rate": 1.8330622406639004e-05, "loss": 1.5769, "step": 5034 }, { "epoch": 4.178423236514523, "grad_norm": 17.523622512817383, "learning_rate": 1.8330290456431536e-05, "loss": 0.8528, "step": 5035 }, { "epoch": 4.179253112033195, "grad_norm": 11.969276428222656, "learning_rate": 1.8329958506224068e-05, "loss": 1.282, "step": 5036 }, { "epoch": 4.1800829875518675, "grad_norm": 15.313117980957031, "learning_rate": 1.83296265560166e-05, "loss": 1.7076, "step": 5037 }, { "epoch": 4.18091286307054, "grad_norm": 19.87818717956543, "learning_rate": 1.832929460580913e-05, "loss": 1.0897, "step": 5038 }, { "epoch": 4.181742738589212, "grad_norm": 13.607630729675293, "learning_rate": 1.832896265560166e-05, "loss": 1.4774, "step": 5039 }, { "epoch": 4.182572614107884, "grad_norm": 12.634137153625488, "learning_rate": 1.8328630705394193e-05, "loss": 1.4955, "step": 5040 }, { "epoch": 4.183402489626556, "grad_norm": 17.610437393188477, "learning_rate": 1.8328298755186725e-05, "loss": 1.9593, "step": 5041 }, { "epoch": 4.1842323651452284, "grad_norm": 19.21596336364746, "learning_rate": 1.8327966804979254e-05, "loss": 1.0576, "step": 5042 }, { "epoch": 4.185062240663901, "grad_norm": 10.865203857421875, "learning_rate": 1.8327634854771786e-05, "loss": 1.4748, "step": 5043 }, { "epoch": 4.185892116182573, "grad_norm": 17.084692001342773, "learning_rate": 1.8327302904564318e-05, "loss": 1.2552, "step": 5044 }, { "epoch": 4.186721991701245, "grad_norm": 14.293729782104492, "learning_rate": 1.832697095435685e-05, "loss": 1.5732, "step": 5045 }, { "epoch": 4.187551867219917, "grad_norm": 10.501733779907227, "learning_rate": 1.832663900414938e-05, "loss": 1.6871, "step": 5046 }, { "epoch": 4.188381742738589, "grad_norm": 13.693690299987793, "learning_rate": 1.832630705394191e-05, "loss": 1.0617, "step": 5047 }, { "epoch": 4.189211618257262, "grad_norm": 10.007823944091797, "learning_rate": 1.832597510373444e-05, "loss": 1.1246, "step": 5048 }, { "epoch": 4.190041493775934, "grad_norm": 10.233973503112793, "learning_rate": 1.8325643153526972e-05, "loss": 0.7665, "step": 5049 }, { "epoch": 4.190871369294606, "grad_norm": 22.59496307373047, "learning_rate": 1.8325311203319504e-05, "loss": 1.7654, "step": 5050 }, { "epoch": 4.191701244813278, "grad_norm": 11.804381370544434, "learning_rate": 1.8324979253112033e-05, "loss": 1.356, "step": 5051 }, { "epoch": 4.19253112033195, "grad_norm": 18.569931030273438, "learning_rate": 1.8324647302904565e-05, "loss": 0.6103, "step": 5052 }, { "epoch": 4.193360995850623, "grad_norm": 18.337186813354492, "learning_rate": 1.8324315352697097e-05, "loss": 1.109, "step": 5053 }, { "epoch": 4.194190871369295, "grad_norm": 9.90226936340332, "learning_rate": 1.832398340248963e-05, "loss": 0.9229, "step": 5054 }, { "epoch": 4.195020746887967, "grad_norm": 15.196566581726074, "learning_rate": 1.8323651452282158e-05, "loss": 1.1756, "step": 5055 }, { "epoch": 4.195850622406639, "grad_norm": 9.776313781738281, "learning_rate": 1.832331950207469e-05, "loss": 1.0237, "step": 5056 }, { "epoch": 4.196680497925311, "grad_norm": 17.78888702392578, "learning_rate": 1.8322987551867222e-05, "loss": 1.956, "step": 5057 }, { "epoch": 4.197510373443984, "grad_norm": 11.638833045959473, "learning_rate": 1.8322655601659754e-05, "loss": 1.3223, "step": 5058 }, { "epoch": 4.198340248962656, "grad_norm": 17.675582885742188, "learning_rate": 1.8322323651452283e-05, "loss": 2.232, "step": 5059 }, { "epoch": 4.199170124481328, "grad_norm": 9.988609313964844, "learning_rate": 1.8321991701244815e-05, "loss": 1.4858, "step": 5060 }, { "epoch": 4.2, "grad_norm": 20.55324935913086, "learning_rate": 1.8321659751037347e-05, "loss": 2.0113, "step": 5061 }, { "epoch": 4.200829875518672, "grad_norm": 11.61731243133545, "learning_rate": 1.832132780082988e-05, "loss": 1.2835, "step": 5062 }, { "epoch": 4.201659751037345, "grad_norm": 32.4781379699707, "learning_rate": 1.8320995850622408e-05, "loss": 2.4651, "step": 5063 }, { "epoch": 4.202489626556017, "grad_norm": 10.415848731994629, "learning_rate": 1.832066390041494e-05, "loss": 1.2863, "step": 5064 }, { "epoch": 4.203319502074689, "grad_norm": 12.210722923278809, "learning_rate": 1.8320331950207472e-05, "loss": 1.4174, "step": 5065 }, { "epoch": 4.204149377593361, "grad_norm": 12.971435546875, "learning_rate": 1.832e-05, "loss": 1.3669, "step": 5066 }, { "epoch": 4.204979253112033, "grad_norm": 12.929647445678711, "learning_rate": 1.8319668049792533e-05, "loss": 1.1363, "step": 5067 }, { "epoch": 4.2058091286307056, "grad_norm": 16.459184646606445, "learning_rate": 1.8319336099585065e-05, "loss": 2.2405, "step": 5068 }, { "epoch": 4.206639004149378, "grad_norm": 15.962919235229492, "learning_rate": 1.8319004149377594e-05, "loss": 1.8639, "step": 5069 }, { "epoch": 4.20746887966805, "grad_norm": 13.631397247314453, "learning_rate": 1.8318672199170126e-05, "loss": 1.0149, "step": 5070 }, { "epoch": 4.208298755186722, "grad_norm": 12.632491111755371, "learning_rate": 1.8318340248962655e-05, "loss": 1.7295, "step": 5071 }, { "epoch": 4.209128630705394, "grad_norm": 12.704483985900879, "learning_rate": 1.8318008298755187e-05, "loss": 1.1344, "step": 5072 }, { "epoch": 4.2099585062240665, "grad_norm": 19.707590103149414, "learning_rate": 1.831767634854772e-05, "loss": 2.2873, "step": 5073 }, { "epoch": 4.210788381742739, "grad_norm": 18.723735809326172, "learning_rate": 1.831734439834025e-05, "loss": 1.7364, "step": 5074 }, { "epoch": 4.211618257261411, "grad_norm": 16.315765380859375, "learning_rate": 1.8317012448132783e-05, "loss": 1.2755, "step": 5075 }, { "epoch": 4.212448132780083, "grad_norm": 18.486730575561523, "learning_rate": 1.831668049792531e-05, "loss": 1.3481, "step": 5076 }, { "epoch": 4.213278008298755, "grad_norm": 17.432567596435547, "learning_rate": 1.8316348547717844e-05, "loss": 1.5816, "step": 5077 }, { "epoch": 4.2141078838174275, "grad_norm": 10.953680038452148, "learning_rate": 1.8316016597510376e-05, "loss": 1.321, "step": 5078 }, { "epoch": 4.2149377593361, "grad_norm": 10.570313453674316, "learning_rate": 1.8315684647302908e-05, "loss": 1.3416, "step": 5079 }, { "epoch": 4.215767634854772, "grad_norm": 17.115650177001953, "learning_rate": 1.8315352697095437e-05, "loss": 2.273, "step": 5080 }, { "epoch": 4.216597510373444, "grad_norm": 9.988577842712402, "learning_rate": 1.831502074688797e-05, "loss": 1.1428, "step": 5081 }, { "epoch": 4.217427385892116, "grad_norm": 19.220531463623047, "learning_rate": 1.83146887966805e-05, "loss": 1.5433, "step": 5082 }, { "epoch": 4.2182572614107885, "grad_norm": 9.534348487854004, "learning_rate": 1.8314356846473033e-05, "loss": 0.7279, "step": 5083 }, { "epoch": 4.219087136929461, "grad_norm": 17.56696319580078, "learning_rate": 1.8314024896265562e-05, "loss": 1.5835, "step": 5084 }, { "epoch": 4.219917012448133, "grad_norm": 13.947196960449219, "learning_rate": 1.8313692946058094e-05, "loss": 0.7691, "step": 5085 }, { "epoch": 4.220746887966805, "grad_norm": 17.450267791748047, "learning_rate": 1.8313360995850623e-05, "loss": 1.4689, "step": 5086 }, { "epoch": 4.221576763485477, "grad_norm": 8.768145561218262, "learning_rate": 1.8313029045643155e-05, "loss": 0.931, "step": 5087 }, { "epoch": 4.2224066390041495, "grad_norm": 7.251339435577393, "learning_rate": 1.8312697095435687e-05, "loss": 0.4473, "step": 5088 }, { "epoch": 4.223236514522822, "grad_norm": 12.196138381958008, "learning_rate": 1.8312365145228216e-05, "loss": 0.9295, "step": 5089 }, { "epoch": 4.224066390041494, "grad_norm": 16.419090270996094, "learning_rate": 1.8312033195020748e-05, "loss": 1.785, "step": 5090 }, { "epoch": 4.224896265560166, "grad_norm": 10.857132911682129, "learning_rate": 1.831170124481328e-05, "loss": 1.4268, "step": 5091 }, { "epoch": 4.225726141078838, "grad_norm": 10.480254173278809, "learning_rate": 1.831136929460581e-05, "loss": 1.1661, "step": 5092 }, { "epoch": 4.2265560165975105, "grad_norm": 16.196664810180664, "learning_rate": 1.831103734439834e-05, "loss": 1.2081, "step": 5093 }, { "epoch": 4.227385892116183, "grad_norm": 17.882001876831055, "learning_rate": 1.8310705394190873e-05, "loss": 1.1094, "step": 5094 }, { "epoch": 4.228215767634855, "grad_norm": 9.627055168151855, "learning_rate": 1.8310373443983405e-05, "loss": 0.8178, "step": 5095 }, { "epoch": 4.229045643153527, "grad_norm": 10.100174903869629, "learning_rate": 1.8310041493775934e-05, "loss": 0.8158, "step": 5096 }, { "epoch": 4.229875518672199, "grad_norm": 17.25478744506836, "learning_rate": 1.8309709543568466e-05, "loss": 1.9171, "step": 5097 }, { "epoch": 4.230705394190871, "grad_norm": 13.92207145690918, "learning_rate": 1.8309377593360998e-05, "loss": 1.6275, "step": 5098 }, { "epoch": 4.231535269709544, "grad_norm": 12.468681335449219, "learning_rate": 1.830904564315353e-05, "loss": 0.8096, "step": 5099 }, { "epoch": 4.232365145228216, "grad_norm": 16.1026554107666, "learning_rate": 1.830871369294606e-05, "loss": 1.4392, "step": 5100 }, { "epoch": 4.233195020746888, "grad_norm": 13.935612678527832, "learning_rate": 1.830838174273859e-05, "loss": 1.0539, "step": 5101 }, { "epoch": 4.23402489626556, "grad_norm": 17.151660919189453, "learning_rate": 1.8308049792531123e-05, "loss": 1.8494, "step": 5102 }, { "epoch": 4.234854771784232, "grad_norm": 14.694818496704102, "learning_rate": 1.8307717842323655e-05, "loss": 1.4781, "step": 5103 }, { "epoch": 4.235684647302905, "grad_norm": 23.10759925842285, "learning_rate": 1.8307385892116184e-05, "loss": 1.4718, "step": 5104 }, { "epoch": 4.236514522821577, "grad_norm": 8.702200889587402, "learning_rate": 1.8307053941908716e-05, "loss": 1.0812, "step": 5105 }, { "epoch": 4.237344398340249, "grad_norm": 9.674163818359375, "learning_rate": 1.8306721991701248e-05, "loss": 1.0081, "step": 5106 }, { "epoch": 4.238174273858921, "grad_norm": 17.564170837402344, "learning_rate": 1.8306390041493777e-05, "loss": 1.3699, "step": 5107 }, { "epoch": 4.239004149377593, "grad_norm": 13.137238502502441, "learning_rate": 1.830605809128631e-05, "loss": 1.0393, "step": 5108 }, { "epoch": 4.239834024896266, "grad_norm": 19.336015701293945, "learning_rate": 1.8305726141078837e-05, "loss": 1.6114, "step": 5109 }, { "epoch": 4.240663900414938, "grad_norm": 11.585476875305176, "learning_rate": 1.830539419087137e-05, "loss": 1.47, "step": 5110 }, { "epoch": 4.24149377593361, "grad_norm": 19.04075813293457, "learning_rate": 1.83050622406639e-05, "loss": 1.0703, "step": 5111 }, { "epoch": 4.242323651452282, "grad_norm": 10.137799263000488, "learning_rate": 1.8304730290456434e-05, "loss": 0.6341, "step": 5112 }, { "epoch": 4.243153526970954, "grad_norm": 10.83258056640625, "learning_rate": 1.8304398340248962e-05, "loss": 1.0214, "step": 5113 }, { "epoch": 4.243983402489627, "grad_norm": 12.435966491699219, "learning_rate": 1.8304066390041495e-05, "loss": 1.4023, "step": 5114 }, { "epoch": 4.244813278008299, "grad_norm": 13.15057373046875, "learning_rate": 1.8303734439834027e-05, "loss": 1.5575, "step": 5115 }, { "epoch": 4.245643153526971, "grad_norm": 11.325773239135742, "learning_rate": 1.830340248962656e-05, "loss": 0.8959, "step": 5116 }, { "epoch": 4.246473029045643, "grad_norm": 13.5670166015625, "learning_rate": 1.8303070539419087e-05, "loss": 1.215, "step": 5117 }, { "epoch": 4.247302904564315, "grad_norm": 16.248292922973633, "learning_rate": 1.830273858921162e-05, "loss": 1.5934, "step": 5118 }, { "epoch": 4.248132780082988, "grad_norm": 15.803533554077148, "learning_rate": 1.830240663900415e-05, "loss": 2.2446, "step": 5119 }, { "epoch": 4.24896265560166, "grad_norm": 13.168496131896973, "learning_rate": 1.8302074688796684e-05, "loss": 0.8145, "step": 5120 }, { "epoch": 4.249792531120332, "grad_norm": 17.12794303894043, "learning_rate": 1.8301742738589212e-05, "loss": 1.3437, "step": 5121 }, { "epoch": 4.250622406639004, "grad_norm": 16.709327697753906, "learning_rate": 1.8301410788381745e-05, "loss": 2.0351, "step": 5122 }, { "epoch": 4.251452282157676, "grad_norm": 14.484366416931152, "learning_rate": 1.8301078838174277e-05, "loss": 1.3369, "step": 5123 }, { "epoch": 4.2522821576763485, "grad_norm": 16.99020004272461, "learning_rate": 1.8300746887966805e-05, "loss": 1.3779, "step": 5124 }, { "epoch": 4.253112033195021, "grad_norm": 11.828866958618164, "learning_rate": 1.8300414937759338e-05, "loss": 1.3284, "step": 5125 }, { "epoch": 4.253941908713693, "grad_norm": 9.120285034179688, "learning_rate": 1.830008298755187e-05, "loss": 0.6642, "step": 5126 }, { "epoch": 4.254771784232365, "grad_norm": 20.07598114013672, "learning_rate": 1.82997510373444e-05, "loss": 2.0956, "step": 5127 }, { "epoch": 4.255601659751037, "grad_norm": 16.5772762298584, "learning_rate": 1.829941908713693e-05, "loss": 1.3545, "step": 5128 }, { "epoch": 4.2564315352697095, "grad_norm": 12.430036544799805, "learning_rate": 1.8299087136929463e-05, "loss": 1.3405, "step": 5129 }, { "epoch": 4.257261410788382, "grad_norm": 20.81964111328125, "learning_rate": 1.829875518672199e-05, "loss": 2.556, "step": 5130 }, { "epoch": 4.258091286307054, "grad_norm": 16.250202178955078, "learning_rate": 1.8298423236514523e-05, "loss": 1.09, "step": 5131 }, { "epoch": 4.258921161825726, "grad_norm": 12.182912826538086, "learning_rate": 1.8298091286307056e-05, "loss": 1.206, "step": 5132 }, { "epoch": 4.259751037344398, "grad_norm": 31.068838119506836, "learning_rate": 1.8297759336099588e-05, "loss": 1.2507, "step": 5133 }, { "epoch": 4.2605809128630705, "grad_norm": 9.801301956176758, "learning_rate": 1.8297427385892116e-05, "loss": 1.2295, "step": 5134 }, { "epoch": 4.261410788381743, "grad_norm": 12.62170124053955, "learning_rate": 1.829709543568465e-05, "loss": 0.8608, "step": 5135 }, { "epoch": 4.262240663900415, "grad_norm": 14.807416915893555, "learning_rate": 1.829676348547718e-05, "loss": 1.1278, "step": 5136 }, { "epoch": 4.263070539419087, "grad_norm": 11.358887672424316, "learning_rate": 1.8296431535269713e-05, "loss": 1.3818, "step": 5137 }, { "epoch": 4.263900414937759, "grad_norm": 11.517962455749512, "learning_rate": 1.829609958506224e-05, "loss": 1.5343, "step": 5138 }, { "epoch": 4.2647302904564315, "grad_norm": 18.220674514770508, "learning_rate": 1.8295767634854773e-05, "loss": 1.0364, "step": 5139 }, { "epoch": 4.265560165975104, "grad_norm": 18.168886184692383, "learning_rate": 1.8295435684647306e-05, "loss": 1.2986, "step": 5140 }, { "epoch": 4.266390041493776, "grad_norm": 16.048490524291992, "learning_rate": 1.8295103734439838e-05, "loss": 1.7588, "step": 5141 }, { "epoch": 4.267219917012448, "grad_norm": 10.877685546875, "learning_rate": 1.8294771784232366e-05, "loss": 1.0804, "step": 5142 }, { "epoch": 4.26804979253112, "grad_norm": 15.425410270690918, "learning_rate": 1.82944398340249e-05, "loss": 1.6914, "step": 5143 }, { "epoch": 4.2688796680497925, "grad_norm": 12.262040138244629, "learning_rate": 1.829410788381743e-05, "loss": 1.174, "step": 5144 }, { "epoch": 4.269709543568465, "grad_norm": 13.098991394042969, "learning_rate": 1.829377593360996e-05, "loss": 1.9656, "step": 5145 }, { "epoch": 4.270539419087137, "grad_norm": 11.496848106384277, "learning_rate": 1.829344398340249e-05, "loss": 1.2824, "step": 5146 }, { "epoch": 4.271369294605809, "grad_norm": 12.296554565429688, "learning_rate": 1.829311203319502e-05, "loss": 1.2827, "step": 5147 }, { "epoch": 4.272199170124481, "grad_norm": 15.287501335144043, "learning_rate": 1.8292780082987552e-05, "loss": 1.4142, "step": 5148 }, { "epoch": 4.2730290456431534, "grad_norm": 10.737537384033203, "learning_rate": 1.8292448132780084e-05, "loss": 1.6072, "step": 5149 }, { "epoch": 4.273858921161826, "grad_norm": 18.880285263061523, "learning_rate": 1.8292116182572613e-05, "loss": 1.2683, "step": 5150 }, { "epoch": 4.274688796680498, "grad_norm": 12.65750789642334, "learning_rate": 1.8291784232365145e-05, "loss": 1.132, "step": 5151 }, { "epoch": 4.27551867219917, "grad_norm": 15.259390830993652, "learning_rate": 1.8291452282157677e-05, "loss": 1.3979, "step": 5152 }, { "epoch": 4.276348547717842, "grad_norm": 17.047338485717773, "learning_rate": 1.829112033195021e-05, "loss": 1.1179, "step": 5153 }, { "epoch": 4.277178423236514, "grad_norm": 14.338834762573242, "learning_rate": 1.829078838174274e-05, "loss": 1.5521, "step": 5154 }, { "epoch": 4.278008298755187, "grad_norm": 14.747817993164062, "learning_rate": 1.829045643153527e-05, "loss": 1.7382, "step": 5155 }, { "epoch": 4.278838174273859, "grad_norm": 11.707688331604004, "learning_rate": 1.8290124481327802e-05, "loss": 1.2521, "step": 5156 }, { "epoch": 4.279668049792531, "grad_norm": 11.823092460632324, "learning_rate": 1.8289792531120334e-05, "loss": 1.4194, "step": 5157 }, { "epoch": 4.280497925311203, "grad_norm": 14.16942310333252, "learning_rate": 1.8289460580912867e-05, "loss": 1.6178, "step": 5158 }, { "epoch": 4.281327800829875, "grad_norm": 13.988332748413086, "learning_rate": 1.8289128630705395e-05, "loss": 1.3023, "step": 5159 }, { "epoch": 4.282157676348548, "grad_norm": 8.323294639587402, "learning_rate": 1.8288796680497927e-05, "loss": 0.9366, "step": 5160 }, { "epoch": 4.28298755186722, "grad_norm": 12.430182456970215, "learning_rate": 1.828846473029046e-05, "loss": 1.2557, "step": 5161 }, { "epoch": 4.283817427385892, "grad_norm": 11.81238079071045, "learning_rate": 1.828813278008299e-05, "loss": 0.9721, "step": 5162 }, { "epoch": 4.284647302904564, "grad_norm": 12.952916145324707, "learning_rate": 1.828780082987552e-05, "loss": 0.9689, "step": 5163 }, { "epoch": 4.285477178423236, "grad_norm": 23.731109619140625, "learning_rate": 1.8287468879668052e-05, "loss": 1.8287, "step": 5164 }, { "epoch": 4.286307053941909, "grad_norm": 18.816078186035156, "learning_rate": 1.828713692946058e-05, "loss": 1.1022, "step": 5165 }, { "epoch": 4.287136929460581, "grad_norm": 27.8134822845459, "learning_rate": 1.8286804979253113e-05, "loss": 1.2017, "step": 5166 }, { "epoch": 4.287966804979253, "grad_norm": 26.01520538330078, "learning_rate": 1.8286473029045645e-05, "loss": 1.4537, "step": 5167 }, { "epoch": 4.288796680497925, "grad_norm": 12.272680282592773, "learning_rate": 1.8286141078838174e-05, "loss": 1.2514, "step": 5168 }, { "epoch": 4.289626556016597, "grad_norm": 11.512808799743652, "learning_rate": 1.8285809128630706e-05, "loss": 1.189, "step": 5169 }, { "epoch": 4.29045643153527, "grad_norm": 22.217796325683594, "learning_rate": 1.828547717842324e-05, "loss": 1.158, "step": 5170 }, { "epoch": 4.291286307053942, "grad_norm": 13.696503639221191, "learning_rate": 1.8285145228215767e-05, "loss": 1.3088, "step": 5171 }, { "epoch": 4.292116182572614, "grad_norm": 14.961424827575684, "learning_rate": 1.82848132780083e-05, "loss": 1.1485, "step": 5172 }, { "epoch": 4.292946058091286, "grad_norm": 13.965357780456543, "learning_rate": 1.828448132780083e-05, "loss": 1.3281, "step": 5173 }, { "epoch": 4.293775933609958, "grad_norm": 15.713706016540527, "learning_rate": 1.8284149377593363e-05, "loss": 1.2615, "step": 5174 }, { "epoch": 4.2946058091286305, "grad_norm": 22.840972900390625, "learning_rate": 1.8283817427385892e-05, "loss": 1.7827, "step": 5175 }, { "epoch": 4.295435684647303, "grad_norm": 13.678256034851074, "learning_rate": 1.8283485477178424e-05, "loss": 1.3441, "step": 5176 }, { "epoch": 4.296265560165975, "grad_norm": 13.316431045532227, "learning_rate": 1.8283153526970956e-05, "loss": 1.3152, "step": 5177 }, { "epoch": 4.297095435684647, "grad_norm": 16.697904586791992, "learning_rate": 1.828282157676349e-05, "loss": 0.9808, "step": 5178 }, { "epoch": 4.297925311203319, "grad_norm": 11.406844139099121, "learning_rate": 1.8282489626556017e-05, "loss": 0.7483, "step": 5179 }, { "epoch": 4.2987551867219915, "grad_norm": 14.12951946258545, "learning_rate": 1.828215767634855e-05, "loss": 1.6076, "step": 5180 }, { "epoch": 4.299585062240664, "grad_norm": 16.593236923217773, "learning_rate": 1.828182572614108e-05, "loss": 1.3757, "step": 5181 }, { "epoch": 4.300414937759336, "grad_norm": 9.448348045349121, "learning_rate": 1.8281493775933613e-05, "loss": 0.9323, "step": 5182 }, { "epoch": 4.301244813278008, "grad_norm": 16.16413116455078, "learning_rate": 1.8281161825726142e-05, "loss": 1.4421, "step": 5183 }, { "epoch": 4.30207468879668, "grad_norm": 20.471731185913086, "learning_rate": 1.8280829875518674e-05, "loss": 1.5985, "step": 5184 }, { "epoch": 4.3029045643153525, "grad_norm": 14.172061920166016, "learning_rate": 1.8280497925311206e-05, "loss": 1.0788, "step": 5185 }, { "epoch": 4.303734439834025, "grad_norm": 15.72762680053711, "learning_rate": 1.8280165975103735e-05, "loss": 1.3926, "step": 5186 }, { "epoch": 4.304564315352697, "grad_norm": 16.359704971313477, "learning_rate": 1.8279834024896267e-05, "loss": 1.1176, "step": 5187 }, { "epoch": 4.305394190871369, "grad_norm": 18.6568603515625, "learning_rate": 1.8279502074688796e-05, "loss": 1.5592, "step": 5188 }, { "epoch": 4.306224066390041, "grad_norm": 12.566786766052246, "learning_rate": 1.8279170124481328e-05, "loss": 1.138, "step": 5189 }, { "epoch": 4.3070539419087135, "grad_norm": 15.48395824432373, "learning_rate": 1.827883817427386e-05, "loss": 1.255, "step": 5190 }, { "epoch": 4.307883817427386, "grad_norm": 16.616661071777344, "learning_rate": 1.8278506224066392e-05, "loss": 0.9539, "step": 5191 }, { "epoch": 4.308713692946058, "grad_norm": 16.90814208984375, "learning_rate": 1.827817427385892e-05, "loss": 1.0639, "step": 5192 }, { "epoch": 4.30954356846473, "grad_norm": 21.01140022277832, "learning_rate": 1.8277842323651453e-05, "loss": 0.9688, "step": 5193 }, { "epoch": 4.310373443983402, "grad_norm": 13.574223518371582, "learning_rate": 1.8277510373443985e-05, "loss": 1.6563, "step": 5194 }, { "epoch": 4.3112033195020745, "grad_norm": 20.04719352722168, "learning_rate": 1.8277178423236517e-05, "loss": 0.6381, "step": 5195 }, { "epoch": 4.312033195020747, "grad_norm": 22.691028594970703, "learning_rate": 1.8276846473029046e-05, "loss": 2.0894, "step": 5196 }, { "epoch": 4.312863070539419, "grad_norm": 18.845712661743164, "learning_rate": 1.8276514522821578e-05, "loss": 1.725, "step": 5197 }, { "epoch": 4.313692946058091, "grad_norm": 11.910076141357422, "learning_rate": 1.827618257261411e-05, "loss": 1.0659, "step": 5198 }, { "epoch": 4.314522821576763, "grad_norm": 11.129748344421387, "learning_rate": 1.8275850622406642e-05, "loss": 0.8916, "step": 5199 }, { "epoch": 4.3153526970954355, "grad_norm": 16.337411880493164, "learning_rate": 1.827551867219917e-05, "loss": 1.5837, "step": 5200 }, { "epoch": 4.316182572614108, "grad_norm": 16.324254989624023, "learning_rate": 1.8275186721991703e-05, "loss": 1.3899, "step": 5201 }, { "epoch": 4.31701244813278, "grad_norm": 20.241636276245117, "learning_rate": 1.8274854771784235e-05, "loss": 1.4147, "step": 5202 }, { "epoch": 4.317842323651452, "grad_norm": 13.024633407592773, "learning_rate": 1.8274522821576764e-05, "loss": 1.5025, "step": 5203 }, { "epoch": 4.318672199170124, "grad_norm": 16.97525405883789, "learning_rate": 1.8274190871369296e-05, "loss": 1.9473, "step": 5204 }, { "epoch": 4.319502074688796, "grad_norm": 20.147520065307617, "learning_rate": 1.8273858921161828e-05, "loss": 2.0906, "step": 5205 }, { "epoch": 4.320331950207469, "grad_norm": 17.622209548950195, "learning_rate": 1.8273526970954357e-05, "loss": 1.519, "step": 5206 }, { "epoch": 4.321161825726141, "grad_norm": 14.797100067138672, "learning_rate": 1.827319502074689e-05, "loss": 1.2313, "step": 5207 }, { "epoch": 4.321991701244813, "grad_norm": 15.13411808013916, "learning_rate": 1.827286307053942e-05, "loss": 1.3559, "step": 5208 }, { "epoch": 4.322821576763485, "grad_norm": 12.841116905212402, "learning_rate": 1.827253112033195e-05, "loss": 1.7545, "step": 5209 }, { "epoch": 4.323651452282157, "grad_norm": 16.389551162719727, "learning_rate": 1.8272199170124482e-05, "loss": 0.9591, "step": 5210 }, { "epoch": 4.32448132780083, "grad_norm": 15.758413314819336, "learning_rate": 1.8271867219917014e-05, "loss": 2.0144, "step": 5211 }, { "epoch": 4.325311203319502, "grad_norm": 9.173931121826172, "learning_rate": 1.8271535269709546e-05, "loss": 0.9309, "step": 5212 }, { "epoch": 4.326141078838174, "grad_norm": 15.591995239257812, "learning_rate": 1.8271203319502075e-05, "loss": 1.2332, "step": 5213 }, { "epoch": 4.326970954356846, "grad_norm": 19.842193603515625, "learning_rate": 1.8270871369294607e-05, "loss": 1.4193, "step": 5214 }, { "epoch": 4.327800829875518, "grad_norm": 15.759269714355469, "learning_rate": 1.827053941908714e-05, "loss": 0.9513, "step": 5215 }, { "epoch": 4.328630705394191, "grad_norm": 10.74172306060791, "learning_rate": 1.827020746887967e-05, "loss": 1.4283, "step": 5216 }, { "epoch": 4.329460580912863, "grad_norm": 18.984516143798828, "learning_rate": 1.82698755186722e-05, "loss": 1.245, "step": 5217 }, { "epoch": 4.330290456431535, "grad_norm": 10.917500495910645, "learning_rate": 1.8269543568464732e-05, "loss": 1.2719, "step": 5218 }, { "epoch": 4.331120331950207, "grad_norm": 13.848150253295898, "learning_rate": 1.8269211618257264e-05, "loss": 1.5314, "step": 5219 }, { "epoch": 4.331950207468879, "grad_norm": 15.014172554016113, "learning_rate": 1.8268879668049796e-05, "loss": 1.4563, "step": 5220 }, { "epoch": 4.332780082987552, "grad_norm": 16.5062255859375, "learning_rate": 1.8268547717842325e-05, "loss": 1.6337, "step": 5221 }, { "epoch": 4.333609958506224, "grad_norm": 10.044733047485352, "learning_rate": 1.8268215767634857e-05, "loss": 0.9487, "step": 5222 }, { "epoch": 4.334439834024896, "grad_norm": 8.762482643127441, "learning_rate": 1.826788381742739e-05, "loss": 0.843, "step": 5223 }, { "epoch": 4.335269709543568, "grad_norm": 16.91419792175293, "learning_rate": 1.8267551867219918e-05, "loss": 1.7668, "step": 5224 }, { "epoch": 4.33609958506224, "grad_norm": 17.9198055267334, "learning_rate": 1.826721991701245e-05, "loss": 1.4534, "step": 5225 }, { "epoch": 4.3369294605809126, "grad_norm": 15.147297859191895, "learning_rate": 1.826688796680498e-05, "loss": 1.7302, "step": 5226 }, { "epoch": 4.337759336099585, "grad_norm": 18.446413040161133, "learning_rate": 1.826655601659751e-05, "loss": 1.3918, "step": 5227 }, { "epoch": 4.338589211618257, "grad_norm": 12.698823928833008, "learning_rate": 1.8266224066390043e-05, "loss": 1.4708, "step": 5228 }, { "epoch": 4.339419087136929, "grad_norm": 9.516239166259766, "learning_rate": 1.826589211618257e-05, "loss": 0.9795, "step": 5229 }, { "epoch": 4.340248962655601, "grad_norm": 12.154705047607422, "learning_rate": 1.8265560165975104e-05, "loss": 1.2224, "step": 5230 }, { "epoch": 4.3410788381742735, "grad_norm": 12.51566219329834, "learning_rate": 1.8265228215767636e-05, "loss": 1.6993, "step": 5231 }, { "epoch": 4.341908713692946, "grad_norm": 19.877592086791992, "learning_rate": 1.8264896265560168e-05, "loss": 1.434, "step": 5232 }, { "epoch": 4.342738589211618, "grad_norm": 11.864407539367676, "learning_rate": 1.82645643153527e-05, "loss": 1.4501, "step": 5233 }, { "epoch": 4.34356846473029, "grad_norm": 13.01565170288086, "learning_rate": 1.826423236514523e-05, "loss": 2.1379, "step": 5234 }, { "epoch": 4.344398340248962, "grad_norm": 20.111738204956055, "learning_rate": 1.826390041493776e-05, "loss": 1.5795, "step": 5235 }, { "epoch": 4.3452282157676345, "grad_norm": 21.704160690307617, "learning_rate": 1.8263568464730293e-05, "loss": 2.0936, "step": 5236 }, { "epoch": 4.346058091286307, "grad_norm": 11.87844181060791, "learning_rate": 1.8263236514522825e-05, "loss": 1.5138, "step": 5237 }, { "epoch": 4.346887966804979, "grad_norm": 10.196870803833008, "learning_rate": 1.8262904564315354e-05, "loss": 0.747, "step": 5238 }, { "epoch": 4.347717842323651, "grad_norm": 10.970168113708496, "learning_rate": 1.8262572614107886e-05, "loss": 1.5167, "step": 5239 }, { "epoch": 4.348547717842323, "grad_norm": 10.325639724731445, "learning_rate": 1.8262240663900418e-05, "loss": 1.2936, "step": 5240 }, { "epoch": 4.3493775933609955, "grad_norm": 12.988848686218262, "learning_rate": 1.8261908713692947e-05, "loss": 1.4139, "step": 5241 }, { "epoch": 4.350207468879668, "grad_norm": 14.760241508483887, "learning_rate": 1.826157676348548e-05, "loss": 1.373, "step": 5242 }, { "epoch": 4.35103734439834, "grad_norm": 11.113277435302734, "learning_rate": 1.826124481327801e-05, "loss": 1.1011, "step": 5243 }, { "epoch": 4.351867219917012, "grad_norm": 12.223127365112305, "learning_rate": 1.826091286307054e-05, "loss": 1.4602, "step": 5244 }, { "epoch": 4.352697095435684, "grad_norm": 13.527121543884277, "learning_rate": 1.8260580912863072e-05, "loss": 1.7154, "step": 5245 }, { "epoch": 4.3535269709543565, "grad_norm": 17.037607192993164, "learning_rate": 1.8260248962655604e-05, "loss": 1.9771, "step": 5246 }, { "epoch": 4.354356846473029, "grad_norm": 18.617752075195312, "learning_rate": 1.8259917012448133e-05, "loss": 1.6753, "step": 5247 }, { "epoch": 4.355186721991701, "grad_norm": 11.152627944946289, "learning_rate": 1.8259585062240665e-05, "loss": 1.2291, "step": 5248 }, { "epoch": 4.356016597510373, "grad_norm": 10.677518844604492, "learning_rate": 1.8259253112033197e-05, "loss": 1.173, "step": 5249 }, { "epoch": 4.356846473029045, "grad_norm": 10.642374992370605, "learning_rate": 1.8258921161825726e-05, "loss": 1.0981, "step": 5250 }, { "epoch": 4.3576763485477175, "grad_norm": 15.452634811401367, "learning_rate": 1.8258589211618258e-05, "loss": 1.1614, "step": 5251 }, { "epoch": 4.35850622406639, "grad_norm": 17.921037673950195, "learning_rate": 1.825825726141079e-05, "loss": 1.0127, "step": 5252 }, { "epoch": 4.359336099585062, "grad_norm": 19.83861541748047, "learning_rate": 1.8257925311203322e-05, "loss": 1.5905, "step": 5253 }, { "epoch": 4.360165975103734, "grad_norm": 10.648608207702637, "learning_rate": 1.825759336099585e-05, "loss": 1.3724, "step": 5254 }, { "epoch": 4.360995850622406, "grad_norm": 11.928756713867188, "learning_rate": 1.8257261410788383e-05, "loss": 1.4149, "step": 5255 }, { "epoch": 4.361825726141078, "grad_norm": 15.613259315490723, "learning_rate": 1.8256929460580915e-05, "loss": 1.1012, "step": 5256 }, { "epoch": 4.362655601659751, "grad_norm": 8.726663589477539, "learning_rate": 1.8256597510373447e-05, "loss": 1.1396, "step": 5257 }, { "epoch": 4.363485477178423, "grad_norm": 14.059844970703125, "learning_rate": 1.8256265560165976e-05, "loss": 2.1415, "step": 5258 }, { "epoch": 4.364315352697095, "grad_norm": 9.114884376525879, "learning_rate": 1.8255933609958508e-05, "loss": 0.6864, "step": 5259 }, { "epoch": 4.365145228215767, "grad_norm": 10.922210693359375, "learning_rate": 1.825560165975104e-05, "loss": 1.4953, "step": 5260 }, { "epoch": 4.365975103734439, "grad_norm": 11.436659812927246, "learning_rate": 1.8255269709543572e-05, "loss": 0.7805, "step": 5261 }, { "epoch": 4.366804979253112, "grad_norm": 14.434393882751465, "learning_rate": 1.82549377593361e-05, "loss": 0.9957, "step": 5262 }, { "epoch": 4.367634854771785, "grad_norm": 14.60749340057373, "learning_rate": 1.8254605809128633e-05, "loss": 1.5228, "step": 5263 }, { "epoch": 4.368464730290457, "grad_norm": 10.756621360778809, "learning_rate": 1.825427385892116e-05, "loss": 1.1621, "step": 5264 }, { "epoch": 4.369294605809129, "grad_norm": 23.361160278320312, "learning_rate": 1.8253941908713694e-05, "loss": 1.8886, "step": 5265 }, { "epoch": 4.370124481327801, "grad_norm": 11.940540313720703, "learning_rate": 1.8253609958506226e-05, "loss": 0.9061, "step": 5266 }, { "epoch": 4.3709543568464735, "grad_norm": 10.100438117980957, "learning_rate": 1.8253278008298755e-05, "loss": 0.9086, "step": 5267 }, { "epoch": 4.371784232365146, "grad_norm": 13.942888259887695, "learning_rate": 1.8252946058091287e-05, "loss": 2.2558, "step": 5268 }, { "epoch": 4.372614107883818, "grad_norm": 8.90333366394043, "learning_rate": 1.825261410788382e-05, "loss": 0.8649, "step": 5269 }, { "epoch": 4.37344398340249, "grad_norm": 14.918102264404297, "learning_rate": 1.825228215767635e-05, "loss": 1.0196, "step": 5270 }, { "epoch": 4.374273858921162, "grad_norm": 11.959424018859863, "learning_rate": 1.825195020746888e-05, "loss": 0.9508, "step": 5271 }, { "epoch": 4.3751037344398345, "grad_norm": 17.967418670654297, "learning_rate": 1.825161825726141e-05, "loss": 2.1784, "step": 5272 }, { "epoch": 4.375933609958507, "grad_norm": 16.763683319091797, "learning_rate": 1.8251286307053944e-05, "loss": 1.2063, "step": 5273 }, { "epoch": 4.376763485477179, "grad_norm": 19.86250877380371, "learning_rate": 1.8250954356846476e-05, "loss": 1.3739, "step": 5274 }, { "epoch": 4.377593360995851, "grad_norm": 20.4642276763916, "learning_rate": 1.8250622406639005e-05, "loss": 1.0939, "step": 5275 }, { "epoch": 4.378423236514523, "grad_norm": 18.391254425048828, "learning_rate": 1.8250290456431537e-05, "loss": 1.6714, "step": 5276 }, { "epoch": 4.3792531120331954, "grad_norm": 12.350838661193848, "learning_rate": 1.824995850622407e-05, "loss": 0.9685, "step": 5277 }, { "epoch": 4.380082987551868, "grad_norm": 15.601609230041504, "learning_rate": 1.82496265560166e-05, "loss": 0.7803, "step": 5278 }, { "epoch": 4.38091286307054, "grad_norm": 9.762877464294434, "learning_rate": 1.824929460580913e-05, "loss": 1.205, "step": 5279 }, { "epoch": 4.381742738589212, "grad_norm": 15.940163612365723, "learning_rate": 1.8248962655601662e-05, "loss": 1.6355, "step": 5280 }, { "epoch": 4.382572614107884, "grad_norm": 22.352638244628906, "learning_rate": 1.8248630705394194e-05, "loss": 0.9411, "step": 5281 }, { "epoch": 4.383402489626556, "grad_norm": 13.524152755737305, "learning_rate": 1.8248298755186723e-05, "loss": 1.2422, "step": 5282 }, { "epoch": 4.384232365145229, "grad_norm": 15.026430130004883, "learning_rate": 1.8247966804979255e-05, "loss": 2.4145, "step": 5283 }, { "epoch": 4.385062240663901, "grad_norm": 13.481915473937988, "learning_rate": 1.8247634854771787e-05, "loss": 0.9819, "step": 5284 }, { "epoch": 4.385892116182573, "grad_norm": 22.326370239257812, "learning_rate": 1.8247302904564316e-05, "loss": 1.3909, "step": 5285 }, { "epoch": 4.386721991701245, "grad_norm": 13.298795700073242, "learning_rate": 1.8246970954356848e-05, "loss": 1.3143, "step": 5286 }, { "epoch": 4.387551867219917, "grad_norm": 11.863256454467773, "learning_rate": 1.824663900414938e-05, "loss": 1.0789, "step": 5287 }, { "epoch": 4.38838174273859, "grad_norm": 20.48744010925293, "learning_rate": 1.824630705394191e-05, "loss": 1.6008, "step": 5288 }, { "epoch": 4.389211618257262, "grad_norm": 11.108988761901855, "learning_rate": 1.824597510373444e-05, "loss": 1.0358, "step": 5289 }, { "epoch": 4.390041493775934, "grad_norm": 13.971006393432617, "learning_rate": 1.8245643153526973e-05, "loss": 1.0991, "step": 5290 }, { "epoch": 4.390871369294606, "grad_norm": 19.37089729309082, "learning_rate": 1.8245311203319505e-05, "loss": 1.0996, "step": 5291 }, { "epoch": 4.391701244813278, "grad_norm": 14.108808517456055, "learning_rate": 1.8244979253112033e-05, "loss": 2.3833, "step": 5292 }, { "epoch": 4.392531120331951, "grad_norm": 16.642606735229492, "learning_rate": 1.8244647302904566e-05, "loss": 1.6285, "step": 5293 }, { "epoch": 4.393360995850623, "grad_norm": 20.953048706054688, "learning_rate": 1.8244315352697098e-05, "loss": 0.9664, "step": 5294 }, { "epoch": 4.394190871369295, "grad_norm": 11.079885482788086, "learning_rate": 1.824398340248963e-05, "loss": 1.0086, "step": 5295 }, { "epoch": 4.395020746887967, "grad_norm": 18.022371292114258, "learning_rate": 1.824365145228216e-05, "loss": 1.4215, "step": 5296 }, { "epoch": 4.395850622406639, "grad_norm": 14.625792503356934, "learning_rate": 1.824331950207469e-05, "loss": 1.6643, "step": 5297 }, { "epoch": 4.396680497925312, "grad_norm": 12.491451263427734, "learning_rate": 1.8242987551867223e-05, "loss": 1.5303, "step": 5298 }, { "epoch": 4.397510373443984, "grad_norm": 14.01663875579834, "learning_rate": 1.8242655601659755e-05, "loss": 1.6898, "step": 5299 }, { "epoch": 4.398340248962656, "grad_norm": 10.136892318725586, "learning_rate": 1.8242323651452284e-05, "loss": 1.2496, "step": 5300 }, { "epoch": 4.399170124481328, "grad_norm": 14.664401054382324, "learning_rate": 1.8241991701244816e-05, "loss": 1.4698, "step": 5301 }, { "epoch": 4.4, "grad_norm": 12.28082275390625, "learning_rate": 1.8241659751037348e-05, "loss": 0.9847, "step": 5302 }, { "epoch": 4.4008298755186726, "grad_norm": 12.37286376953125, "learning_rate": 1.8241327800829877e-05, "loss": 1.427, "step": 5303 }, { "epoch": 4.401659751037345, "grad_norm": 11.230707168579102, "learning_rate": 1.824099585062241e-05, "loss": 1.3203, "step": 5304 }, { "epoch": 4.402489626556017, "grad_norm": 20.568313598632812, "learning_rate": 1.8240663900414937e-05, "loss": 1.557, "step": 5305 }, { "epoch": 4.403319502074689, "grad_norm": 29.20132064819336, "learning_rate": 1.824033195020747e-05, "loss": 2.0828, "step": 5306 }, { "epoch": 4.404149377593361, "grad_norm": 10.605605125427246, "learning_rate": 1.824e-05, "loss": 1.3573, "step": 5307 }, { "epoch": 4.4049792531120335, "grad_norm": 17.692764282226562, "learning_rate": 1.823966804979253e-05, "loss": 1.6954, "step": 5308 }, { "epoch": 4.405809128630706, "grad_norm": 11.971481323242188, "learning_rate": 1.8239336099585062e-05, "loss": 1.1682, "step": 5309 }, { "epoch": 4.406639004149378, "grad_norm": 13.78427791595459, "learning_rate": 1.8239004149377594e-05, "loss": 1.027, "step": 5310 }, { "epoch": 4.40746887966805, "grad_norm": 16.8776798248291, "learning_rate": 1.8238672199170127e-05, "loss": 1.1862, "step": 5311 }, { "epoch": 4.408298755186722, "grad_norm": 15.050566673278809, "learning_rate": 1.823834024896266e-05, "loss": 1.5249, "step": 5312 }, { "epoch": 4.4091286307053945, "grad_norm": 22.401037216186523, "learning_rate": 1.8238008298755187e-05, "loss": 1.4297, "step": 5313 }, { "epoch": 4.409958506224067, "grad_norm": 22.672927856445312, "learning_rate": 1.823767634854772e-05, "loss": 1.2978, "step": 5314 }, { "epoch": 4.410788381742739, "grad_norm": 11.848212242126465, "learning_rate": 1.823734439834025e-05, "loss": 1.6931, "step": 5315 }, { "epoch": 4.411618257261411, "grad_norm": 16.080583572387695, "learning_rate": 1.8237012448132784e-05, "loss": 1.8517, "step": 5316 }, { "epoch": 4.412448132780083, "grad_norm": 7.918478965759277, "learning_rate": 1.8236680497925312e-05, "loss": 0.824, "step": 5317 }, { "epoch": 4.4132780082987555, "grad_norm": 17.513486862182617, "learning_rate": 1.8236348547717845e-05, "loss": 1.4494, "step": 5318 }, { "epoch": 4.414107883817428, "grad_norm": 16.856956481933594, "learning_rate": 1.8236016597510377e-05, "loss": 1.9384, "step": 5319 }, { "epoch": 4.4149377593361, "grad_norm": 14.598139762878418, "learning_rate": 1.8235684647302905e-05, "loss": 1.6704, "step": 5320 }, { "epoch": 4.415767634854772, "grad_norm": 13.685858726501465, "learning_rate": 1.8235352697095438e-05, "loss": 1.7242, "step": 5321 }, { "epoch": 4.416597510373444, "grad_norm": 21.28217315673828, "learning_rate": 1.823502074688797e-05, "loss": 0.9355, "step": 5322 }, { "epoch": 4.4174273858921165, "grad_norm": 15.06758975982666, "learning_rate": 1.82346887966805e-05, "loss": 1.7938, "step": 5323 }, { "epoch": 4.418257261410789, "grad_norm": 15.424675941467285, "learning_rate": 1.823435684647303e-05, "loss": 1.8137, "step": 5324 }, { "epoch": 4.419087136929461, "grad_norm": 13.850615501403809, "learning_rate": 1.823402489626556e-05, "loss": 0.965, "step": 5325 }, { "epoch": 4.419917012448133, "grad_norm": 16.15668487548828, "learning_rate": 1.823369294605809e-05, "loss": 1.454, "step": 5326 }, { "epoch": 4.420746887966805, "grad_norm": 12.304710388183594, "learning_rate": 1.8233360995850623e-05, "loss": 1.2583, "step": 5327 }, { "epoch": 4.4215767634854775, "grad_norm": 13.816938400268555, "learning_rate": 1.8233029045643155e-05, "loss": 0.9983, "step": 5328 }, { "epoch": 4.42240663900415, "grad_norm": 19.223480224609375, "learning_rate": 1.8232697095435684e-05, "loss": 1.7744, "step": 5329 }, { "epoch": 4.423236514522822, "grad_norm": 13.316498756408691, "learning_rate": 1.8232365145228216e-05, "loss": 1.0737, "step": 5330 }, { "epoch": 4.424066390041494, "grad_norm": 11.337088584899902, "learning_rate": 1.823203319502075e-05, "loss": 1.0981, "step": 5331 }, { "epoch": 4.424896265560166, "grad_norm": 11.239445686340332, "learning_rate": 1.823170124481328e-05, "loss": 1.3308, "step": 5332 }, { "epoch": 4.425726141078838, "grad_norm": 11.536534309387207, "learning_rate": 1.823136929460581e-05, "loss": 1.379, "step": 5333 }, { "epoch": 4.426556016597511, "grad_norm": 12.059017181396484, "learning_rate": 1.823103734439834e-05, "loss": 1.0976, "step": 5334 }, { "epoch": 4.427385892116183, "grad_norm": 11.36961555480957, "learning_rate": 1.8230705394190873e-05, "loss": 0.9032, "step": 5335 }, { "epoch": 4.428215767634855, "grad_norm": 13.477258682250977, "learning_rate": 1.8230373443983406e-05, "loss": 1.9268, "step": 5336 }, { "epoch": 4.429045643153527, "grad_norm": 13.572737693786621, "learning_rate": 1.8230041493775934e-05, "loss": 1.3019, "step": 5337 }, { "epoch": 4.429875518672199, "grad_norm": 15.534132957458496, "learning_rate": 1.8229709543568466e-05, "loss": 0.9842, "step": 5338 }, { "epoch": 4.430705394190872, "grad_norm": 15.311563491821289, "learning_rate": 1.8229377593361e-05, "loss": 1.2822, "step": 5339 }, { "epoch": 4.431535269709544, "grad_norm": 14.100868225097656, "learning_rate": 1.822904564315353e-05, "loss": 1.3375, "step": 5340 }, { "epoch": 4.432365145228216, "grad_norm": 10.98434829711914, "learning_rate": 1.822871369294606e-05, "loss": 1.3987, "step": 5341 }, { "epoch": 4.433195020746888, "grad_norm": 12.296451568603516, "learning_rate": 1.822838174273859e-05, "loss": 0.8883, "step": 5342 }, { "epoch": 4.43402489626556, "grad_norm": 9.955257415771484, "learning_rate": 1.822804979253112e-05, "loss": 1.4349, "step": 5343 }, { "epoch": 4.434854771784233, "grad_norm": 16.37889862060547, "learning_rate": 1.8227717842323652e-05, "loss": 1.7435, "step": 5344 }, { "epoch": 4.435684647302905, "grad_norm": 13.405402183532715, "learning_rate": 1.8227385892116184e-05, "loss": 1.1746, "step": 5345 }, { "epoch": 4.436514522821577, "grad_norm": 10.811164855957031, "learning_rate": 1.8227053941908713e-05, "loss": 1.1828, "step": 5346 }, { "epoch": 4.437344398340249, "grad_norm": 9.22826862335205, "learning_rate": 1.8226721991701245e-05, "loss": 1.063, "step": 5347 }, { "epoch": 4.438174273858921, "grad_norm": 19.98404884338379, "learning_rate": 1.8226390041493777e-05, "loss": 1.8084, "step": 5348 }, { "epoch": 4.439004149377594, "grad_norm": 25.49090576171875, "learning_rate": 1.822605809128631e-05, "loss": 1.0109, "step": 5349 }, { "epoch": 4.439834024896266, "grad_norm": 15.063783645629883, "learning_rate": 1.8225726141078838e-05, "loss": 0.8598, "step": 5350 }, { "epoch": 4.440663900414938, "grad_norm": 17.63273811340332, "learning_rate": 1.822539419087137e-05, "loss": 1.4015, "step": 5351 }, { "epoch": 4.44149377593361, "grad_norm": 11.174792289733887, "learning_rate": 1.8225062240663902e-05, "loss": 0.9536, "step": 5352 }, { "epoch": 4.442323651452282, "grad_norm": 20.031909942626953, "learning_rate": 1.8224730290456434e-05, "loss": 1.6036, "step": 5353 }, { "epoch": 4.443153526970955, "grad_norm": 21.476268768310547, "learning_rate": 1.8224398340248963e-05, "loss": 1.4001, "step": 5354 }, { "epoch": 4.443983402489627, "grad_norm": 15.580179214477539, "learning_rate": 1.8224066390041495e-05, "loss": 0.9429, "step": 5355 }, { "epoch": 4.444813278008299, "grad_norm": 10.838064193725586, "learning_rate": 1.8223734439834027e-05, "loss": 1.1414, "step": 5356 }, { "epoch": 4.445643153526971, "grad_norm": 22.39131736755371, "learning_rate": 1.822340248962656e-05, "loss": 1.0247, "step": 5357 }, { "epoch": 4.446473029045643, "grad_norm": 17.045578002929688, "learning_rate": 1.8223070539419088e-05, "loss": 1.4036, "step": 5358 }, { "epoch": 4.4473029045643155, "grad_norm": 16.299095153808594, "learning_rate": 1.822273858921162e-05, "loss": 1.8567, "step": 5359 }, { "epoch": 4.448132780082988, "grad_norm": 22.081384658813477, "learning_rate": 1.8222406639004152e-05, "loss": 1.5342, "step": 5360 }, { "epoch": 4.44896265560166, "grad_norm": 7.889340400695801, "learning_rate": 1.822207468879668e-05, "loss": 0.9056, "step": 5361 }, { "epoch": 4.449792531120332, "grad_norm": 10.769538879394531, "learning_rate": 1.8221742738589213e-05, "loss": 1.1149, "step": 5362 }, { "epoch": 4.450622406639004, "grad_norm": 10.098917961120605, "learning_rate": 1.8221410788381745e-05, "loss": 0.8363, "step": 5363 }, { "epoch": 4.4514522821576765, "grad_norm": 16.027868270874023, "learning_rate": 1.8221078838174274e-05, "loss": 1.6556, "step": 5364 }, { "epoch": 4.452282157676349, "grad_norm": 12.694156646728516, "learning_rate": 1.8220746887966806e-05, "loss": 1.3712, "step": 5365 }, { "epoch": 4.453112033195021, "grad_norm": 14.39469051361084, "learning_rate": 1.8220414937759338e-05, "loss": 0.9606, "step": 5366 }, { "epoch": 4.453941908713693, "grad_norm": 18.21802520751953, "learning_rate": 1.8220082987551867e-05, "loss": 1.1458, "step": 5367 }, { "epoch": 4.454771784232365, "grad_norm": 8.80807876586914, "learning_rate": 1.82197510373444e-05, "loss": 0.8129, "step": 5368 }, { "epoch": 4.4556016597510375, "grad_norm": 16.61296844482422, "learning_rate": 1.821941908713693e-05, "loss": 1.1657, "step": 5369 }, { "epoch": 4.45643153526971, "grad_norm": 17.028362274169922, "learning_rate": 1.8219087136929463e-05, "loss": 1.7072, "step": 5370 }, { "epoch": 4.457261410788382, "grad_norm": 8.014799118041992, "learning_rate": 1.8218755186721992e-05, "loss": 0.771, "step": 5371 }, { "epoch": 4.458091286307054, "grad_norm": 13.635767936706543, "learning_rate": 1.8218423236514524e-05, "loss": 1.6143, "step": 5372 }, { "epoch": 4.458921161825726, "grad_norm": 20.44639015197754, "learning_rate": 1.8218091286307056e-05, "loss": 0.753, "step": 5373 }, { "epoch": 4.4597510373443985, "grad_norm": 14.368900299072266, "learning_rate": 1.821775933609959e-05, "loss": 1.4702, "step": 5374 }, { "epoch": 4.460580912863071, "grad_norm": 15.714359283447266, "learning_rate": 1.8217427385892117e-05, "loss": 1.3693, "step": 5375 }, { "epoch": 4.461410788381743, "grad_norm": 13.160922050476074, "learning_rate": 1.821709543568465e-05, "loss": 1.149, "step": 5376 }, { "epoch": 4.462240663900415, "grad_norm": 11.631671905517578, "learning_rate": 1.821676348547718e-05, "loss": 1.2649, "step": 5377 }, { "epoch": 4.463070539419087, "grad_norm": 20.780170440673828, "learning_rate": 1.8216431535269713e-05, "loss": 2.2029, "step": 5378 }, { "epoch": 4.4639004149377595, "grad_norm": 13.369840621948242, "learning_rate": 1.8216099585062242e-05, "loss": 1.5613, "step": 5379 }, { "epoch": 4.464730290456432, "grad_norm": 9.711792945861816, "learning_rate": 1.8215767634854774e-05, "loss": 1.2519, "step": 5380 }, { "epoch": 4.465560165975104, "grad_norm": 10.980873107910156, "learning_rate": 1.8215435684647303e-05, "loss": 1.241, "step": 5381 }, { "epoch": 4.466390041493776, "grad_norm": 19.048601150512695, "learning_rate": 1.8215103734439835e-05, "loss": 2.0865, "step": 5382 }, { "epoch": 4.467219917012448, "grad_norm": 17.421728134155273, "learning_rate": 1.8214771784232367e-05, "loss": 1.0326, "step": 5383 }, { "epoch": 4.4680497925311204, "grad_norm": 21.935449600219727, "learning_rate": 1.8214439834024896e-05, "loss": 1.5228, "step": 5384 }, { "epoch": 4.468879668049793, "grad_norm": 12.374005317687988, "learning_rate": 1.8214107883817428e-05, "loss": 1.1798, "step": 5385 }, { "epoch": 4.469709543568465, "grad_norm": 17.562429428100586, "learning_rate": 1.821377593360996e-05, "loss": 0.8543, "step": 5386 }, { "epoch": 4.470539419087137, "grad_norm": 16.611286163330078, "learning_rate": 1.821344398340249e-05, "loss": 0.9584, "step": 5387 }, { "epoch": 4.471369294605809, "grad_norm": 23.253860473632812, "learning_rate": 1.821311203319502e-05, "loss": 2.5724, "step": 5388 }, { "epoch": 4.472199170124481, "grad_norm": 25.861080169677734, "learning_rate": 1.8212780082987553e-05, "loss": 1.0912, "step": 5389 }, { "epoch": 4.473029045643154, "grad_norm": 12.003926277160645, "learning_rate": 1.8212448132780085e-05, "loss": 1.4086, "step": 5390 }, { "epoch": 4.473858921161826, "grad_norm": 9.697318077087402, "learning_rate": 1.8212116182572614e-05, "loss": 1.4259, "step": 5391 }, { "epoch": 4.474688796680498, "grad_norm": 31.288793563842773, "learning_rate": 1.8211784232365146e-05, "loss": 1.121, "step": 5392 }, { "epoch": 4.47551867219917, "grad_norm": 13.817673683166504, "learning_rate": 1.8211452282157678e-05, "loss": 1.3486, "step": 5393 }, { "epoch": 4.476348547717842, "grad_norm": 10.315773963928223, "learning_rate": 1.821112033195021e-05, "loss": 0.9438, "step": 5394 }, { "epoch": 4.477178423236515, "grad_norm": 10.605923652648926, "learning_rate": 1.8210788381742742e-05, "loss": 0.8933, "step": 5395 }, { "epoch": 4.478008298755187, "grad_norm": 14.377888679504395, "learning_rate": 1.821045643153527e-05, "loss": 1.2922, "step": 5396 }, { "epoch": 4.478838174273859, "grad_norm": 7.649898052215576, "learning_rate": 1.8210124481327803e-05, "loss": 0.9558, "step": 5397 }, { "epoch": 4.479668049792531, "grad_norm": 21.488056182861328, "learning_rate": 1.8209792531120335e-05, "loss": 1.413, "step": 5398 }, { "epoch": 4.480497925311203, "grad_norm": 19.177146911621094, "learning_rate": 1.8209460580912864e-05, "loss": 1.6987, "step": 5399 }, { "epoch": 4.481327800829876, "grad_norm": 17.19017791748047, "learning_rate": 1.8209128630705396e-05, "loss": 1.514, "step": 5400 }, { "epoch": 4.482157676348548, "grad_norm": 18.975027084350586, "learning_rate": 1.8208796680497928e-05, "loss": 1.9452, "step": 5401 }, { "epoch": 4.48298755186722, "grad_norm": 12.55734920501709, "learning_rate": 1.8208464730290457e-05, "loss": 1.0952, "step": 5402 }, { "epoch": 4.483817427385892, "grad_norm": 13.452259063720703, "learning_rate": 1.820813278008299e-05, "loss": 1.1596, "step": 5403 }, { "epoch": 4.484647302904564, "grad_norm": 24.238418579101562, "learning_rate": 1.8207800829875518e-05, "loss": 1.2646, "step": 5404 }, { "epoch": 4.485477178423237, "grad_norm": 12.902413368225098, "learning_rate": 1.820746887966805e-05, "loss": 1.4088, "step": 5405 }, { "epoch": 4.486307053941909, "grad_norm": 21.23838233947754, "learning_rate": 1.8207136929460582e-05, "loss": 1.9233, "step": 5406 }, { "epoch": 4.487136929460581, "grad_norm": 15.583715438842773, "learning_rate": 1.8206804979253114e-05, "loss": 1.9463, "step": 5407 }, { "epoch": 4.487966804979253, "grad_norm": 13.7454195022583, "learning_rate": 1.8206473029045643e-05, "loss": 1.3831, "step": 5408 }, { "epoch": 4.488796680497925, "grad_norm": 13.991020202636719, "learning_rate": 1.8206141078838175e-05, "loss": 1.0706, "step": 5409 }, { "epoch": 4.4896265560165975, "grad_norm": 18.150331497192383, "learning_rate": 1.8205809128630707e-05, "loss": 1.7432, "step": 5410 }, { "epoch": 4.49045643153527, "grad_norm": 13.826122283935547, "learning_rate": 1.820547717842324e-05, "loss": 1.4166, "step": 5411 }, { "epoch": 4.491286307053942, "grad_norm": 21.00939178466797, "learning_rate": 1.8205145228215768e-05, "loss": 1.408, "step": 5412 }, { "epoch": 4.492116182572614, "grad_norm": 15.49984073638916, "learning_rate": 1.82048132780083e-05, "loss": 1.5916, "step": 5413 }, { "epoch": 4.492946058091286, "grad_norm": 16.122520446777344, "learning_rate": 1.8204481327800832e-05, "loss": 1.2124, "step": 5414 }, { "epoch": 4.4937759336099585, "grad_norm": 11.75365161895752, "learning_rate": 1.8204149377593364e-05, "loss": 1.3887, "step": 5415 }, { "epoch": 4.494605809128631, "grad_norm": 19.349637985229492, "learning_rate": 1.8203817427385893e-05, "loss": 2.0442, "step": 5416 }, { "epoch": 4.495435684647303, "grad_norm": 22.767284393310547, "learning_rate": 1.8203485477178425e-05, "loss": 2.5469, "step": 5417 }, { "epoch": 4.496265560165975, "grad_norm": 13.04238510131836, "learning_rate": 1.8203153526970957e-05, "loss": 1.1407, "step": 5418 }, { "epoch": 4.497095435684647, "grad_norm": 13.189403533935547, "learning_rate": 1.820282157676349e-05, "loss": 2.1674, "step": 5419 }, { "epoch": 4.4979253112033195, "grad_norm": 14.715107917785645, "learning_rate": 1.8202489626556018e-05, "loss": 1.4905, "step": 5420 }, { "epoch": 4.498755186721992, "grad_norm": 21.701854705810547, "learning_rate": 1.820215767634855e-05, "loss": 1.733, "step": 5421 }, { "epoch": 4.499585062240664, "grad_norm": 25.757036209106445, "learning_rate": 1.820182572614108e-05, "loss": 1.1014, "step": 5422 }, { "epoch": 4.500414937759336, "grad_norm": 17.282459259033203, "learning_rate": 1.820149377593361e-05, "loss": 1.2239, "step": 5423 }, { "epoch": 4.501244813278008, "grad_norm": 16.33254051208496, "learning_rate": 1.8201161825726143e-05, "loss": 2.1768, "step": 5424 }, { "epoch": 4.5020746887966805, "grad_norm": 24.498966217041016, "learning_rate": 1.820082987551867e-05, "loss": 1.4904, "step": 5425 }, { "epoch": 4.502904564315353, "grad_norm": 12.148995399475098, "learning_rate": 1.8200497925311204e-05, "loss": 1.3651, "step": 5426 }, { "epoch": 4.503734439834025, "grad_norm": 10.017206192016602, "learning_rate": 1.8200165975103736e-05, "loss": 1.0384, "step": 5427 }, { "epoch": 4.504564315352697, "grad_norm": 17.382688522338867, "learning_rate": 1.8199834024896268e-05, "loss": 0.9776, "step": 5428 }, { "epoch": 4.505394190871369, "grad_norm": 16.206222534179688, "learning_rate": 1.8199502074688797e-05, "loss": 0.9942, "step": 5429 }, { "epoch": 4.5062240663900415, "grad_norm": 12.141045570373535, "learning_rate": 1.819917012448133e-05, "loss": 0.9803, "step": 5430 }, { "epoch": 4.507053941908714, "grad_norm": 10.567651748657227, "learning_rate": 1.819883817427386e-05, "loss": 1.2437, "step": 5431 }, { "epoch": 4.507883817427386, "grad_norm": 11.772774696350098, "learning_rate": 1.8198506224066393e-05, "loss": 1.0713, "step": 5432 }, { "epoch": 4.508713692946058, "grad_norm": 11.578743934631348, "learning_rate": 1.8198174273858922e-05, "loss": 1.3154, "step": 5433 }, { "epoch": 4.50954356846473, "grad_norm": 15.345852851867676, "learning_rate": 1.8197842323651454e-05, "loss": 1.2298, "step": 5434 }, { "epoch": 4.5103734439834025, "grad_norm": 10.70240592956543, "learning_rate": 1.8197510373443986e-05, "loss": 1.2164, "step": 5435 }, { "epoch": 4.511203319502075, "grad_norm": 13.1345796585083, "learning_rate": 1.8197178423236518e-05, "loss": 1.0742, "step": 5436 }, { "epoch": 4.512033195020747, "grad_norm": 15.677188873291016, "learning_rate": 1.8196846473029047e-05, "loss": 2.2247, "step": 5437 }, { "epoch": 4.512863070539419, "grad_norm": 25.686079025268555, "learning_rate": 1.819651452282158e-05, "loss": 1.9072, "step": 5438 }, { "epoch": 4.513692946058091, "grad_norm": 10.287721633911133, "learning_rate": 1.819618257261411e-05, "loss": 1.4077, "step": 5439 }, { "epoch": 4.514522821576763, "grad_norm": 17.67034149169922, "learning_rate": 1.819585062240664e-05, "loss": 1.3804, "step": 5440 }, { "epoch": 4.515352697095436, "grad_norm": 16.004911422729492, "learning_rate": 1.8195518672199172e-05, "loss": 1.4451, "step": 5441 }, { "epoch": 4.516182572614108, "grad_norm": 10.99366569519043, "learning_rate": 1.81951867219917e-05, "loss": 1.1491, "step": 5442 }, { "epoch": 4.51701244813278, "grad_norm": 17.916419982910156, "learning_rate": 1.8194854771784233e-05, "loss": 1.0059, "step": 5443 }, { "epoch": 4.517842323651452, "grad_norm": 18.654367446899414, "learning_rate": 1.8194522821576765e-05, "loss": 1.1511, "step": 5444 }, { "epoch": 4.518672199170124, "grad_norm": 35.17913818359375, "learning_rate": 1.8194190871369297e-05, "loss": 1.6511, "step": 5445 }, { "epoch": 4.519502074688797, "grad_norm": 13.492326736450195, "learning_rate": 1.8193858921161826e-05, "loss": 1.2084, "step": 5446 }, { "epoch": 4.520331950207469, "grad_norm": 15.700743675231934, "learning_rate": 1.8193526970954358e-05, "loss": 1.3432, "step": 5447 }, { "epoch": 4.521161825726141, "grad_norm": 19.556730270385742, "learning_rate": 1.819319502074689e-05, "loss": 2.6843, "step": 5448 }, { "epoch": 4.521991701244813, "grad_norm": 27.108461380004883, "learning_rate": 1.8192863070539422e-05, "loss": 2.0399, "step": 5449 }, { "epoch": 4.522821576763485, "grad_norm": 20.877105712890625, "learning_rate": 1.819253112033195e-05, "loss": 1.9732, "step": 5450 }, { "epoch": 4.523651452282158, "grad_norm": 10.619975090026855, "learning_rate": 1.8192199170124483e-05, "loss": 1.1803, "step": 5451 }, { "epoch": 4.52448132780083, "grad_norm": 25.321500778198242, "learning_rate": 1.8191867219917015e-05, "loss": 1.8485, "step": 5452 }, { "epoch": 4.525311203319502, "grad_norm": 12.705185890197754, "learning_rate": 1.8191535269709547e-05, "loss": 1.0725, "step": 5453 }, { "epoch": 4.526141078838174, "grad_norm": 15.68419075012207, "learning_rate": 1.8191203319502076e-05, "loss": 1.4732, "step": 5454 }, { "epoch": 4.526970954356846, "grad_norm": 15.846855163574219, "learning_rate": 1.8190871369294608e-05, "loss": 1.5116, "step": 5455 }, { "epoch": 4.527800829875519, "grad_norm": 15.48581314086914, "learning_rate": 1.819053941908714e-05, "loss": 2.2235, "step": 5456 }, { "epoch": 4.528630705394191, "grad_norm": 8.958888053894043, "learning_rate": 1.8190207468879672e-05, "loss": 1.3525, "step": 5457 }, { "epoch": 4.529460580912863, "grad_norm": 13.096529960632324, "learning_rate": 1.81898755186722e-05, "loss": 0.9172, "step": 5458 }, { "epoch": 4.530290456431535, "grad_norm": 13.99023723602295, "learning_rate": 1.8189543568464733e-05, "loss": 1.0715, "step": 5459 }, { "epoch": 4.531120331950207, "grad_norm": 14.356639862060547, "learning_rate": 1.818921161825726e-05, "loss": 1.4296, "step": 5460 }, { "epoch": 4.5319502074688796, "grad_norm": 12.782538414001465, "learning_rate": 1.8188879668049794e-05, "loss": 1.396, "step": 5461 }, { "epoch": 4.532780082987552, "grad_norm": 15.47133731842041, "learning_rate": 1.8188547717842326e-05, "loss": 1.9131, "step": 5462 }, { "epoch": 4.533609958506224, "grad_norm": 15.763813018798828, "learning_rate": 1.8188215767634854e-05, "loss": 1.5122, "step": 5463 }, { "epoch": 4.534439834024896, "grad_norm": 14.325847625732422, "learning_rate": 1.8187883817427387e-05, "loss": 1.324, "step": 5464 }, { "epoch": 4.535269709543568, "grad_norm": 13.801334381103516, "learning_rate": 1.818755186721992e-05, "loss": 1.3296, "step": 5465 }, { "epoch": 4.5360995850622405, "grad_norm": 16.59076499938965, "learning_rate": 1.8187219917012447e-05, "loss": 1.3569, "step": 5466 }, { "epoch": 4.536929460580913, "grad_norm": 17.468116760253906, "learning_rate": 1.818688796680498e-05, "loss": 1.7862, "step": 5467 }, { "epoch": 4.537759336099585, "grad_norm": 15.228153228759766, "learning_rate": 1.818655601659751e-05, "loss": 1.3156, "step": 5468 }, { "epoch": 4.538589211618257, "grad_norm": 16.39984893798828, "learning_rate": 1.8186224066390044e-05, "loss": 1.7204, "step": 5469 }, { "epoch": 4.539419087136929, "grad_norm": 11.740344047546387, "learning_rate": 1.8185892116182572e-05, "loss": 1.4889, "step": 5470 }, { "epoch": 4.5402489626556015, "grad_norm": 11.280110359191895, "learning_rate": 1.8185560165975105e-05, "loss": 1.2147, "step": 5471 }, { "epoch": 4.541078838174274, "grad_norm": 16.058155059814453, "learning_rate": 1.8185228215767637e-05, "loss": 1.0611, "step": 5472 }, { "epoch": 4.541908713692946, "grad_norm": 14.857819557189941, "learning_rate": 1.818489626556017e-05, "loss": 1.1582, "step": 5473 }, { "epoch": 4.542738589211618, "grad_norm": 11.666789054870605, "learning_rate": 1.81845643153527e-05, "loss": 1.6659, "step": 5474 }, { "epoch": 4.54356846473029, "grad_norm": 10.037529945373535, "learning_rate": 1.818423236514523e-05, "loss": 0.9238, "step": 5475 }, { "epoch": 4.5443983402489625, "grad_norm": 27.487245559692383, "learning_rate": 1.8183900414937762e-05, "loss": 1.7973, "step": 5476 }, { "epoch": 4.545228215767635, "grad_norm": 12.542476654052734, "learning_rate": 1.8183568464730294e-05, "loss": 1.491, "step": 5477 }, { "epoch": 4.546058091286307, "grad_norm": 19.240589141845703, "learning_rate": 1.8183236514522823e-05, "loss": 1.7305, "step": 5478 }, { "epoch": 4.546887966804979, "grad_norm": 17.233552932739258, "learning_rate": 1.8182904564315355e-05, "loss": 2.0525, "step": 5479 }, { "epoch": 4.547717842323651, "grad_norm": 18.454448699951172, "learning_rate": 1.8182572614107887e-05, "loss": 1.3155, "step": 5480 }, { "epoch": 4.5485477178423235, "grad_norm": 13.670068740844727, "learning_rate": 1.8182240663900415e-05, "loss": 1.5134, "step": 5481 }, { "epoch": 4.549377593360996, "grad_norm": 16.851728439331055, "learning_rate": 1.8181908713692948e-05, "loss": 0.9858, "step": 5482 }, { "epoch": 4.550207468879668, "grad_norm": 20.721223831176758, "learning_rate": 1.8181576763485476e-05, "loss": 1.6159, "step": 5483 }, { "epoch": 4.55103734439834, "grad_norm": 13.39480209350586, "learning_rate": 1.818124481327801e-05, "loss": 1.6037, "step": 5484 }, { "epoch": 4.551867219917012, "grad_norm": 15.367685317993164, "learning_rate": 1.818091286307054e-05, "loss": 1.5351, "step": 5485 }, { "epoch": 4.5526970954356845, "grad_norm": 12.125056266784668, "learning_rate": 1.8180580912863073e-05, "loss": 1.6299, "step": 5486 }, { "epoch": 4.553526970954357, "grad_norm": 18.71515464782715, "learning_rate": 1.81802489626556e-05, "loss": 1.3346, "step": 5487 }, { "epoch": 4.554356846473029, "grad_norm": 13.62588882446289, "learning_rate": 1.8179917012448133e-05, "loss": 0.8788, "step": 5488 }, { "epoch": 4.555186721991701, "grad_norm": 11.828125953674316, "learning_rate": 1.8179585062240666e-05, "loss": 1.1595, "step": 5489 }, { "epoch": 4.556016597510373, "grad_norm": 13.799908638000488, "learning_rate": 1.8179253112033198e-05, "loss": 1.8091, "step": 5490 }, { "epoch": 4.556846473029045, "grad_norm": 11.83006477355957, "learning_rate": 1.8178921161825726e-05, "loss": 1.1255, "step": 5491 }, { "epoch": 4.557676348547718, "grad_norm": 10.702951431274414, "learning_rate": 1.817858921161826e-05, "loss": 1.1876, "step": 5492 }, { "epoch": 4.55850622406639, "grad_norm": 13.032912254333496, "learning_rate": 1.817825726141079e-05, "loss": 1.3391, "step": 5493 }, { "epoch": 4.559336099585062, "grad_norm": 11.455252647399902, "learning_rate": 1.8177925311203323e-05, "loss": 1.4405, "step": 5494 }, { "epoch": 4.560165975103734, "grad_norm": 12.807985305786133, "learning_rate": 1.817759336099585e-05, "loss": 1.1603, "step": 5495 }, { "epoch": 4.560995850622406, "grad_norm": 12.832371711730957, "learning_rate": 1.8177261410788384e-05, "loss": 1.1346, "step": 5496 }, { "epoch": 4.561825726141079, "grad_norm": 11.19836139678955, "learning_rate": 1.8176929460580916e-05, "loss": 1.1429, "step": 5497 }, { "epoch": 4.562655601659751, "grad_norm": 16.37242317199707, "learning_rate": 1.8176597510373444e-05, "loss": 0.9901, "step": 5498 }, { "epoch": 4.563485477178423, "grad_norm": 16.65576934814453, "learning_rate": 1.8176265560165976e-05, "loss": 2.0082, "step": 5499 }, { "epoch": 4.564315352697095, "grad_norm": 32.03096389770508, "learning_rate": 1.817593360995851e-05, "loss": 1.5683, "step": 5500 }, { "epoch": 4.565145228215767, "grad_norm": 13.801016807556152, "learning_rate": 1.8175601659751037e-05, "loss": 1.0718, "step": 5501 }, { "epoch": 4.56597510373444, "grad_norm": 14.216219902038574, "learning_rate": 1.817526970954357e-05, "loss": 1.2999, "step": 5502 }, { "epoch": 4.566804979253112, "grad_norm": 28.121204376220703, "learning_rate": 1.81749377593361e-05, "loss": 1.8169, "step": 5503 }, { "epoch": 4.567634854771784, "grad_norm": 10.900871276855469, "learning_rate": 1.817460580912863e-05, "loss": 1.2786, "step": 5504 }, { "epoch": 4.568464730290456, "grad_norm": 15.06229019165039, "learning_rate": 1.8174273858921162e-05, "loss": 1.6233, "step": 5505 }, { "epoch": 4.569294605809128, "grad_norm": 14.61507797241211, "learning_rate": 1.8173941908713694e-05, "loss": 1.4823, "step": 5506 }, { "epoch": 4.570124481327801, "grad_norm": 12.592364311218262, "learning_rate": 1.8173609958506227e-05, "loss": 1.1873, "step": 5507 }, { "epoch": 4.570954356846473, "grad_norm": 10.672873497009277, "learning_rate": 1.8173278008298755e-05, "loss": 1.2844, "step": 5508 }, { "epoch": 4.571784232365145, "grad_norm": 19.91383171081543, "learning_rate": 1.8172946058091287e-05, "loss": 1.8922, "step": 5509 }, { "epoch": 4.572614107883817, "grad_norm": 8.786358833312988, "learning_rate": 1.817261410788382e-05, "loss": 0.8986, "step": 5510 }, { "epoch": 4.573443983402489, "grad_norm": 15.209883689880371, "learning_rate": 1.817228215767635e-05, "loss": 1.5229, "step": 5511 }, { "epoch": 4.574273858921162, "grad_norm": 17.493635177612305, "learning_rate": 1.817195020746888e-05, "loss": 1.4157, "step": 5512 }, { "epoch": 4.575103734439834, "grad_norm": 20.414432525634766, "learning_rate": 1.8171618257261412e-05, "loss": 1.5841, "step": 5513 }, { "epoch": 4.575933609958506, "grad_norm": 8.72944450378418, "learning_rate": 1.8171286307053945e-05, "loss": 0.8737, "step": 5514 }, { "epoch": 4.576763485477178, "grad_norm": 21.49504280090332, "learning_rate": 1.8170954356846477e-05, "loss": 1.8012, "step": 5515 }, { "epoch": 4.57759336099585, "grad_norm": 11.42143440246582, "learning_rate": 1.8170622406639005e-05, "loss": 1.2984, "step": 5516 }, { "epoch": 4.5784232365145225, "grad_norm": 10.226584434509277, "learning_rate": 1.8170290456431537e-05, "loss": 0.9594, "step": 5517 }, { "epoch": 4.579253112033195, "grad_norm": 17.596155166625977, "learning_rate": 1.816995850622407e-05, "loss": 1.4691, "step": 5518 }, { "epoch": 4.580082987551867, "grad_norm": 12.465777397155762, "learning_rate": 1.8169626556016598e-05, "loss": 1.0558, "step": 5519 }, { "epoch": 4.580912863070539, "grad_norm": 10.253978729248047, "learning_rate": 1.816929460580913e-05, "loss": 0.897, "step": 5520 }, { "epoch": 4.581742738589211, "grad_norm": 11.552445411682129, "learning_rate": 1.816896265560166e-05, "loss": 1.175, "step": 5521 }, { "epoch": 4.5825726141078835, "grad_norm": 15.038251876831055, "learning_rate": 1.816863070539419e-05, "loss": 1.0059, "step": 5522 }, { "epoch": 4.583402489626556, "grad_norm": 16.00387954711914, "learning_rate": 1.8168298755186723e-05, "loss": 1.7511, "step": 5523 }, { "epoch": 4.584232365145228, "grad_norm": 11.854130744934082, "learning_rate": 1.8167966804979255e-05, "loss": 0.8836, "step": 5524 }, { "epoch": 4.5850622406639, "grad_norm": 16.923660278320312, "learning_rate": 1.8167634854771784e-05, "loss": 1.7805, "step": 5525 }, { "epoch": 4.585892116182572, "grad_norm": 8.383119583129883, "learning_rate": 1.8167302904564316e-05, "loss": 0.5159, "step": 5526 }, { "epoch": 4.5867219917012445, "grad_norm": 15.841998100280762, "learning_rate": 1.816697095435685e-05, "loss": 1.8496, "step": 5527 }, { "epoch": 4.587551867219917, "grad_norm": 23.495302200317383, "learning_rate": 1.816663900414938e-05, "loss": 1.3475, "step": 5528 }, { "epoch": 4.588381742738589, "grad_norm": 16.048053741455078, "learning_rate": 1.816630705394191e-05, "loss": 1.4538, "step": 5529 }, { "epoch": 4.589211618257261, "grad_norm": 13.512393951416016, "learning_rate": 1.816597510373444e-05, "loss": 1.5667, "step": 5530 }, { "epoch": 4.590041493775933, "grad_norm": 9.577834129333496, "learning_rate": 1.8165643153526973e-05, "loss": 1.0365, "step": 5531 }, { "epoch": 4.5908713692946055, "grad_norm": 18.842580795288086, "learning_rate": 1.8165311203319506e-05, "loss": 1.3303, "step": 5532 }, { "epoch": 4.591701244813278, "grad_norm": 15.447693824768066, "learning_rate": 1.8164979253112034e-05, "loss": 1.8296, "step": 5533 }, { "epoch": 4.59253112033195, "grad_norm": 13.957122802734375, "learning_rate": 1.8164647302904566e-05, "loss": 1.9595, "step": 5534 }, { "epoch": 4.593360995850622, "grad_norm": 21.52276039123535, "learning_rate": 1.81643153526971e-05, "loss": 1.8027, "step": 5535 }, { "epoch": 4.594190871369294, "grad_norm": 10.808111190795898, "learning_rate": 1.816398340248963e-05, "loss": 1.0567, "step": 5536 }, { "epoch": 4.5950207468879665, "grad_norm": 16.520845413208008, "learning_rate": 1.816365145228216e-05, "loss": 1.2722, "step": 5537 }, { "epoch": 4.595850622406639, "grad_norm": 11.694535255432129, "learning_rate": 1.816331950207469e-05, "loss": 1.2068, "step": 5538 }, { "epoch": 4.596680497925311, "grad_norm": 19.49464225769043, "learning_rate": 1.816298755186722e-05, "loss": 1.592, "step": 5539 }, { "epoch": 4.597510373443983, "grad_norm": 16.41690444946289, "learning_rate": 1.8162655601659752e-05, "loss": 1.2439, "step": 5540 }, { "epoch": 4.598340248962655, "grad_norm": 22.825164794921875, "learning_rate": 1.8162323651452284e-05, "loss": 1.1959, "step": 5541 }, { "epoch": 4.5991701244813274, "grad_norm": 9.800524711608887, "learning_rate": 1.8161991701244813e-05, "loss": 0.7974, "step": 5542 }, { "epoch": 4.6, "grad_norm": 11.67833137512207, "learning_rate": 1.8161659751037345e-05, "loss": 1.1465, "step": 5543 }, { "epoch": 4.600829875518672, "grad_norm": 18.861507415771484, "learning_rate": 1.8161327800829877e-05, "loss": 1.6776, "step": 5544 }, { "epoch": 4.601659751037344, "grad_norm": 10.490435600280762, "learning_rate": 1.8160995850622406e-05, "loss": 1.1075, "step": 5545 }, { "epoch": 4.602489626556016, "grad_norm": 13.310526847839355, "learning_rate": 1.8160663900414938e-05, "loss": 1.3824, "step": 5546 }, { "epoch": 4.603319502074688, "grad_norm": 21.22655487060547, "learning_rate": 1.816033195020747e-05, "loss": 1.9268, "step": 5547 }, { "epoch": 4.604149377593361, "grad_norm": 16.096405029296875, "learning_rate": 1.8160000000000002e-05, "loss": 1.3971, "step": 5548 }, { "epoch": 4.604979253112033, "grad_norm": 22.98384666442871, "learning_rate": 1.815966804979253e-05, "loss": 1.4688, "step": 5549 }, { "epoch": 4.605809128630705, "grad_norm": 20.690519332885742, "learning_rate": 1.8159336099585063e-05, "loss": 1.1885, "step": 5550 }, { "epoch": 4.606639004149377, "grad_norm": 10.107259750366211, "learning_rate": 1.8159004149377595e-05, "loss": 1.1927, "step": 5551 }, { "epoch": 4.607468879668049, "grad_norm": 13.043742179870605, "learning_rate": 1.8158672199170127e-05, "loss": 1.3289, "step": 5552 }, { "epoch": 4.608298755186722, "grad_norm": 13.53634262084961, "learning_rate": 1.815834024896266e-05, "loss": 2.041, "step": 5553 }, { "epoch": 4.609128630705394, "grad_norm": 15.940784454345703, "learning_rate": 1.8158008298755188e-05, "loss": 1.7515, "step": 5554 }, { "epoch": 4.609958506224066, "grad_norm": 24.873836517333984, "learning_rate": 1.815767634854772e-05, "loss": 1.443, "step": 5555 }, { "epoch": 4.610788381742738, "grad_norm": 14.124162673950195, "learning_rate": 1.8157344398340252e-05, "loss": 1.2565, "step": 5556 }, { "epoch": 4.61161825726141, "grad_norm": 17.808488845825195, "learning_rate": 1.815701244813278e-05, "loss": 1.5131, "step": 5557 }, { "epoch": 4.612448132780083, "grad_norm": 15.849410057067871, "learning_rate": 1.8156680497925313e-05, "loss": 1.2384, "step": 5558 }, { "epoch": 4.613278008298755, "grad_norm": 9.962212562561035, "learning_rate": 1.8156348547717842e-05, "loss": 1.1467, "step": 5559 }, { "epoch": 4.614107883817427, "grad_norm": 15.456257820129395, "learning_rate": 1.8156016597510374e-05, "loss": 1.7942, "step": 5560 }, { "epoch": 4.614937759336099, "grad_norm": 19.289072036743164, "learning_rate": 1.8155684647302906e-05, "loss": 1.3868, "step": 5561 }, { "epoch": 4.615767634854771, "grad_norm": 12.711864471435547, "learning_rate": 1.8155352697095435e-05, "loss": 1.4255, "step": 5562 }, { "epoch": 4.616597510373444, "grad_norm": 23.452659606933594, "learning_rate": 1.8155020746887967e-05, "loss": 2.1725, "step": 5563 }, { "epoch": 4.617427385892116, "grad_norm": 12.642927169799805, "learning_rate": 1.81546887966805e-05, "loss": 1.0948, "step": 5564 }, { "epoch": 4.618257261410788, "grad_norm": 10.607982635498047, "learning_rate": 1.815435684647303e-05, "loss": 1.0689, "step": 5565 }, { "epoch": 4.61908713692946, "grad_norm": 14.728254318237305, "learning_rate": 1.815402489626556e-05, "loss": 1.068, "step": 5566 }, { "epoch": 4.619917012448132, "grad_norm": 11.87625503540039, "learning_rate": 1.8153692946058092e-05, "loss": 1.0409, "step": 5567 }, { "epoch": 4.6207468879668046, "grad_norm": 16.785680770874023, "learning_rate": 1.8153360995850624e-05, "loss": 1.3574, "step": 5568 }, { "epoch": 4.621576763485477, "grad_norm": 13.767330169677734, "learning_rate": 1.8153029045643156e-05, "loss": 1.6002, "step": 5569 }, { "epoch": 4.622406639004149, "grad_norm": 15.47026538848877, "learning_rate": 1.8152697095435685e-05, "loss": 1.2163, "step": 5570 }, { "epoch": 4.623236514522821, "grad_norm": 16.759300231933594, "learning_rate": 1.8152365145228217e-05, "loss": 1.965, "step": 5571 }, { "epoch": 4.624066390041493, "grad_norm": 12.560284614562988, "learning_rate": 1.815203319502075e-05, "loss": 1.5897, "step": 5572 }, { "epoch": 4.6248962655601655, "grad_norm": 9.14549446105957, "learning_rate": 1.815170124481328e-05, "loss": 1.1408, "step": 5573 }, { "epoch": 4.625726141078838, "grad_norm": 21.00138282775879, "learning_rate": 1.815136929460581e-05, "loss": 1.5753, "step": 5574 }, { "epoch": 4.62655601659751, "grad_norm": 15.93901538848877, "learning_rate": 1.8151037344398342e-05, "loss": 1.1124, "step": 5575 }, { "epoch": 4.627385892116182, "grad_norm": 15.695380210876465, "learning_rate": 1.8150705394190874e-05, "loss": 1.1794, "step": 5576 }, { "epoch": 4.628215767634854, "grad_norm": 14.894582748413086, "learning_rate": 1.8150373443983403e-05, "loss": 1.7022, "step": 5577 }, { "epoch": 4.6290456431535265, "grad_norm": 15.71333122253418, "learning_rate": 1.8150041493775935e-05, "loss": 1.3409, "step": 5578 }, { "epoch": 4.629875518672199, "grad_norm": 20.464401245117188, "learning_rate": 1.8149709543568467e-05, "loss": 1.6819, "step": 5579 }, { "epoch": 4.630705394190871, "grad_norm": 13.504186630249023, "learning_rate": 1.8149377593360996e-05, "loss": 1.1896, "step": 5580 }, { "epoch": 4.631535269709543, "grad_norm": 17.674522399902344, "learning_rate": 1.8149045643153528e-05, "loss": 0.785, "step": 5581 }, { "epoch": 4.632365145228215, "grad_norm": 15.727545738220215, "learning_rate": 1.814871369294606e-05, "loss": 1.1326, "step": 5582 }, { "epoch": 4.6331950207468875, "grad_norm": 10.962987899780273, "learning_rate": 1.814838174273859e-05, "loss": 1.5096, "step": 5583 }, { "epoch": 4.63402489626556, "grad_norm": 11.992470741271973, "learning_rate": 1.814804979253112e-05, "loss": 1.5378, "step": 5584 }, { "epoch": 4.634854771784232, "grad_norm": 12.571730613708496, "learning_rate": 1.8147717842323653e-05, "loss": 1.1588, "step": 5585 }, { "epoch": 4.635684647302904, "grad_norm": 16.432342529296875, "learning_rate": 1.8147385892116185e-05, "loss": 1.2238, "step": 5586 }, { "epoch": 4.636514522821576, "grad_norm": 23.93462562561035, "learning_rate": 1.8147053941908714e-05, "loss": 1.2212, "step": 5587 }, { "epoch": 4.6373443983402485, "grad_norm": 15.623159408569336, "learning_rate": 1.8146721991701246e-05, "loss": 1.6336, "step": 5588 }, { "epoch": 4.638174273858921, "grad_norm": 14.102100372314453, "learning_rate": 1.8146390041493778e-05, "loss": 1.2796, "step": 5589 }, { "epoch": 4.639004149377593, "grad_norm": 21.732301712036133, "learning_rate": 1.814605809128631e-05, "loss": 2.3275, "step": 5590 }, { "epoch": 4.639834024896265, "grad_norm": 11.38355541229248, "learning_rate": 1.814572614107884e-05, "loss": 1.4196, "step": 5591 }, { "epoch": 4.640663900414938, "grad_norm": 14.100300788879395, "learning_rate": 1.814539419087137e-05, "loss": 1.291, "step": 5592 }, { "epoch": 4.64149377593361, "grad_norm": 13.878972053527832, "learning_rate": 1.8145062240663903e-05, "loss": 1.822, "step": 5593 }, { "epoch": 4.6423236514522825, "grad_norm": 18.570219039916992, "learning_rate": 1.8144730290456435e-05, "loss": 1.5874, "step": 5594 }, { "epoch": 4.643153526970955, "grad_norm": 22.881071090698242, "learning_rate": 1.8144398340248964e-05, "loss": 1.3583, "step": 5595 }, { "epoch": 4.643983402489627, "grad_norm": 12.152410507202148, "learning_rate": 1.8144066390041496e-05, "loss": 1.3288, "step": 5596 }, { "epoch": 4.644813278008299, "grad_norm": 22.811569213867188, "learning_rate": 1.8143734439834028e-05, "loss": 1.2416, "step": 5597 }, { "epoch": 4.645643153526971, "grad_norm": 19.333066940307617, "learning_rate": 1.8143402489626557e-05, "loss": 1.8088, "step": 5598 }, { "epoch": 4.6464730290456435, "grad_norm": 18.755416870117188, "learning_rate": 1.814307053941909e-05, "loss": 1.405, "step": 5599 }, { "epoch": 4.647302904564316, "grad_norm": 14.683113098144531, "learning_rate": 1.8142738589211618e-05, "loss": 1.1253, "step": 5600 }, { "epoch": 4.648132780082988, "grad_norm": 14.611047744750977, "learning_rate": 1.814240663900415e-05, "loss": 1.6145, "step": 5601 }, { "epoch": 4.64896265560166, "grad_norm": 14.740541458129883, "learning_rate": 1.8142074688796682e-05, "loss": 1.0876, "step": 5602 }, { "epoch": 4.649792531120332, "grad_norm": 14.424495697021484, "learning_rate": 1.8141742738589214e-05, "loss": 1.6091, "step": 5603 }, { "epoch": 4.6506224066390045, "grad_norm": 10.514883041381836, "learning_rate": 1.8141410788381743e-05, "loss": 1.0939, "step": 5604 }, { "epoch": 4.651452282157677, "grad_norm": 12.918855667114258, "learning_rate": 1.8141078838174275e-05, "loss": 1.7596, "step": 5605 }, { "epoch": 4.652282157676349, "grad_norm": 16.901081085205078, "learning_rate": 1.8140746887966807e-05, "loss": 1.8114, "step": 5606 }, { "epoch": 4.653112033195021, "grad_norm": 10.448381423950195, "learning_rate": 1.814041493775934e-05, "loss": 1.4088, "step": 5607 }, { "epoch": 4.653941908713693, "grad_norm": 13.005698204040527, "learning_rate": 1.8140082987551868e-05, "loss": 1.8734, "step": 5608 }, { "epoch": 4.6547717842323655, "grad_norm": 12.85934066772461, "learning_rate": 1.81397510373444e-05, "loss": 1.6769, "step": 5609 }, { "epoch": 4.655601659751038, "grad_norm": 9.111410140991211, "learning_rate": 1.8139419087136932e-05, "loss": 1.2343, "step": 5610 }, { "epoch": 4.65643153526971, "grad_norm": 18.57863998413086, "learning_rate": 1.8139087136929464e-05, "loss": 2.047, "step": 5611 }, { "epoch": 4.657261410788382, "grad_norm": 18.913583755493164, "learning_rate": 1.8138755186721993e-05, "loss": 2.2715, "step": 5612 }, { "epoch": 4.658091286307054, "grad_norm": 17.42350959777832, "learning_rate": 1.8138423236514525e-05, "loss": 2.0054, "step": 5613 }, { "epoch": 4.6589211618257265, "grad_norm": 16.20682716369629, "learning_rate": 1.8138091286307057e-05, "loss": 1.2733, "step": 5614 }, { "epoch": 4.659751037344399, "grad_norm": 12.57110595703125, "learning_rate": 1.8137759336099586e-05, "loss": 0.8847, "step": 5615 }, { "epoch": 4.660580912863071, "grad_norm": 12.640823364257812, "learning_rate": 1.8137427385892118e-05, "loss": 1.2559, "step": 5616 }, { "epoch": 4.661410788381743, "grad_norm": 9.224489212036133, "learning_rate": 1.813709543568465e-05, "loss": 1.335, "step": 5617 }, { "epoch": 4.662240663900415, "grad_norm": 11.661203384399414, "learning_rate": 1.813676348547718e-05, "loss": 1.1373, "step": 5618 }, { "epoch": 4.6630705394190874, "grad_norm": 11.499163627624512, "learning_rate": 1.813643153526971e-05, "loss": 1.8012, "step": 5619 }, { "epoch": 4.66390041493776, "grad_norm": 17.425262451171875, "learning_rate": 1.8136099585062243e-05, "loss": 1.3018, "step": 5620 }, { "epoch": 4.664730290456432, "grad_norm": 9.947319030761719, "learning_rate": 1.813576763485477e-05, "loss": 1.3229, "step": 5621 }, { "epoch": 4.665560165975104, "grad_norm": 19.800077438354492, "learning_rate": 1.8135435684647304e-05, "loss": 0.8625, "step": 5622 }, { "epoch": 4.666390041493776, "grad_norm": 13.028889656066895, "learning_rate": 1.8135103734439836e-05, "loss": 1.8947, "step": 5623 }, { "epoch": 4.667219917012448, "grad_norm": 27.909648895263672, "learning_rate": 1.8134771784232365e-05, "loss": 1.7742, "step": 5624 }, { "epoch": 4.668049792531121, "grad_norm": 14.84560489654541, "learning_rate": 1.8134439834024897e-05, "loss": 1.7315, "step": 5625 }, { "epoch": 4.668879668049793, "grad_norm": 13.802136421203613, "learning_rate": 1.813410788381743e-05, "loss": 1.1644, "step": 5626 }, { "epoch": 4.669709543568465, "grad_norm": 17.79461669921875, "learning_rate": 1.813377593360996e-05, "loss": 1.2719, "step": 5627 }, { "epoch": 4.670539419087137, "grad_norm": 13.375576972961426, "learning_rate": 1.813344398340249e-05, "loss": 1.3035, "step": 5628 }, { "epoch": 4.671369294605809, "grad_norm": 9.796713829040527, "learning_rate": 1.8133112033195022e-05, "loss": 0.8978, "step": 5629 }, { "epoch": 4.672199170124482, "grad_norm": 10.313724517822266, "learning_rate": 1.8132780082987554e-05, "loss": 0.8947, "step": 5630 }, { "epoch": 4.673029045643154, "grad_norm": 23.643144607543945, "learning_rate": 1.8132448132780086e-05, "loss": 1.3231, "step": 5631 }, { "epoch": 4.673858921161826, "grad_norm": 11.335030555725098, "learning_rate": 1.8132116182572618e-05, "loss": 1.2, "step": 5632 }, { "epoch": 4.674688796680498, "grad_norm": 15.752572059631348, "learning_rate": 1.8131784232365147e-05, "loss": 1.1481, "step": 5633 }, { "epoch": 4.67551867219917, "grad_norm": 18.87421989440918, "learning_rate": 1.813145228215768e-05, "loss": 1.1443, "step": 5634 }, { "epoch": 4.676348547717843, "grad_norm": 14.363447189331055, "learning_rate": 1.813112033195021e-05, "loss": 1.3071, "step": 5635 }, { "epoch": 4.677178423236515, "grad_norm": 12.946590423583984, "learning_rate": 1.813078838174274e-05, "loss": 1.2033, "step": 5636 }, { "epoch": 4.678008298755187, "grad_norm": 10.375175476074219, "learning_rate": 1.8130456431535272e-05, "loss": 0.9716, "step": 5637 }, { "epoch": 4.678838174273859, "grad_norm": 19.373334884643555, "learning_rate": 1.81301244813278e-05, "loss": 1.4048, "step": 5638 }, { "epoch": 4.679668049792531, "grad_norm": 19.846294403076172, "learning_rate": 1.8129792531120333e-05, "loss": 1.5216, "step": 5639 }, { "epoch": 4.680497925311204, "grad_norm": 10.598480224609375, "learning_rate": 1.8129460580912865e-05, "loss": 0.7296, "step": 5640 }, { "epoch": 4.681327800829876, "grad_norm": 15.834653854370117, "learning_rate": 1.8129128630705393e-05, "loss": 1.4966, "step": 5641 }, { "epoch": 4.682157676348548, "grad_norm": 19.318239212036133, "learning_rate": 1.8128796680497926e-05, "loss": 2.4026, "step": 5642 }, { "epoch": 4.68298755186722, "grad_norm": 14.312356948852539, "learning_rate": 1.8128464730290458e-05, "loss": 1.0236, "step": 5643 }, { "epoch": 4.683817427385892, "grad_norm": 10.771161079406738, "learning_rate": 1.812813278008299e-05, "loss": 0.747, "step": 5644 }, { "epoch": 4.6846473029045645, "grad_norm": 20.618942260742188, "learning_rate": 1.812780082987552e-05, "loss": 1.0087, "step": 5645 }, { "epoch": 4.685477178423237, "grad_norm": 11.314779281616211, "learning_rate": 1.812746887966805e-05, "loss": 1.1536, "step": 5646 }, { "epoch": 4.686307053941909, "grad_norm": 16.091609954833984, "learning_rate": 1.8127136929460583e-05, "loss": 1.4376, "step": 5647 }, { "epoch": 4.687136929460581, "grad_norm": 12.801372528076172, "learning_rate": 1.8126804979253115e-05, "loss": 0.8632, "step": 5648 }, { "epoch": 4.687966804979253, "grad_norm": 11.043707847595215, "learning_rate": 1.8126473029045644e-05, "loss": 1.2226, "step": 5649 }, { "epoch": 4.6887966804979255, "grad_norm": 15.578460693359375, "learning_rate": 1.8126141078838176e-05, "loss": 1.0824, "step": 5650 }, { "epoch": 4.689626556016598, "grad_norm": 12.385218620300293, "learning_rate": 1.8125809128630708e-05, "loss": 1.6365, "step": 5651 }, { "epoch": 4.69045643153527, "grad_norm": 9.05652904510498, "learning_rate": 1.812547717842324e-05, "loss": 0.7461, "step": 5652 }, { "epoch": 4.691286307053942, "grad_norm": 16.359722137451172, "learning_rate": 1.812514522821577e-05, "loss": 0.9986, "step": 5653 }, { "epoch": 4.692116182572614, "grad_norm": 17.6013240814209, "learning_rate": 1.81248132780083e-05, "loss": 1.0391, "step": 5654 }, { "epoch": 4.6929460580912865, "grad_norm": 29.374515533447266, "learning_rate": 1.8124481327800833e-05, "loss": 2.7644, "step": 5655 }, { "epoch": 4.693775933609959, "grad_norm": 19.42072105407715, "learning_rate": 1.812414937759336e-05, "loss": 0.9628, "step": 5656 }, { "epoch": 4.694605809128631, "grad_norm": 10.284246444702148, "learning_rate": 1.8123817427385894e-05, "loss": 1.0961, "step": 5657 }, { "epoch": 4.695435684647303, "grad_norm": 12.637935638427734, "learning_rate": 1.8123485477178426e-05, "loss": 0.8718, "step": 5658 }, { "epoch": 4.696265560165975, "grad_norm": 15.94958782196045, "learning_rate": 1.8123153526970954e-05, "loss": 1.0804, "step": 5659 }, { "epoch": 4.6970954356846475, "grad_norm": 12.644710540771484, "learning_rate": 1.8122821576763487e-05, "loss": 1.2901, "step": 5660 }, { "epoch": 4.69792531120332, "grad_norm": 12.479612350463867, "learning_rate": 1.812248962655602e-05, "loss": 0.8453, "step": 5661 }, { "epoch": 4.698755186721992, "grad_norm": 16.583755493164062, "learning_rate": 1.8122157676348547e-05, "loss": 1.2995, "step": 5662 }, { "epoch": 4.699585062240664, "grad_norm": 23.28697967529297, "learning_rate": 1.812182572614108e-05, "loss": 1.9254, "step": 5663 }, { "epoch": 4.700414937759336, "grad_norm": 26.15114974975586, "learning_rate": 1.812149377593361e-05, "loss": 2.5073, "step": 5664 }, { "epoch": 4.7012448132780085, "grad_norm": 12.74905014038086, "learning_rate": 1.8121161825726144e-05, "loss": 0.9797, "step": 5665 }, { "epoch": 4.702074688796681, "grad_norm": 11.135167121887207, "learning_rate": 1.8120829875518672e-05, "loss": 0.8422, "step": 5666 }, { "epoch": 4.702904564315353, "grad_norm": 31.18474006652832, "learning_rate": 1.8120497925311205e-05, "loss": 1.6379, "step": 5667 }, { "epoch": 4.703734439834025, "grad_norm": 21.851560592651367, "learning_rate": 1.8120165975103737e-05, "loss": 1.297, "step": 5668 }, { "epoch": 4.704564315352697, "grad_norm": 11.220932960510254, "learning_rate": 1.811983402489627e-05, "loss": 0.8961, "step": 5669 }, { "epoch": 4.7053941908713695, "grad_norm": 21.953081130981445, "learning_rate": 1.8119502074688797e-05, "loss": 1.1561, "step": 5670 }, { "epoch": 4.706224066390042, "grad_norm": 26.663951873779297, "learning_rate": 1.811917012448133e-05, "loss": 1.0377, "step": 5671 }, { "epoch": 4.707053941908714, "grad_norm": 25.659381866455078, "learning_rate": 1.811883817427386e-05, "loss": 1.3418, "step": 5672 }, { "epoch": 4.707883817427386, "grad_norm": 19.50069236755371, "learning_rate": 1.8118506224066394e-05, "loss": 2.0951, "step": 5673 }, { "epoch": 4.708713692946058, "grad_norm": 17.30755615234375, "learning_rate": 1.8118174273858923e-05, "loss": 1.8723, "step": 5674 }, { "epoch": 4.70954356846473, "grad_norm": 23.047616958618164, "learning_rate": 1.8117842323651455e-05, "loss": 2.0932, "step": 5675 }, { "epoch": 4.710373443983403, "grad_norm": 13.457672119140625, "learning_rate": 1.8117510373443983e-05, "loss": 1.0693, "step": 5676 }, { "epoch": 4.711203319502075, "grad_norm": 19.378564834594727, "learning_rate": 1.8117178423236515e-05, "loss": 1.3507, "step": 5677 }, { "epoch": 4.712033195020747, "grad_norm": 14.237495422363281, "learning_rate": 1.8116846473029048e-05, "loss": 1.203, "step": 5678 }, { "epoch": 4.712863070539419, "grad_norm": 19.628259658813477, "learning_rate": 1.8116514522821576e-05, "loss": 2.2164, "step": 5679 }, { "epoch": 4.713692946058091, "grad_norm": 17.790685653686523, "learning_rate": 1.811618257261411e-05, "loss": 2.0405, "step": 5680 }, { "epoch": 4.714522821576764, "grad_norm": 14.078742027282715, "learning_rate": 1.811585062240664e-05, "loss": 1.5028, "step": 5681 }, { "epoch": 4.715352697095436, "grad_norm": 17.49408721923828, "learning_rate": 1.8115518672199173e-05, "loss": 1.6409, "step": 5682 }, { "epoch": 4.716182572614108, "grad_norm": 9.583661079406738, "learning_rate": 1.81151867219917e-05, "loss": 0.7329, "step": 5683 }, { "epoch": 4.71701244813278, "grad_norm": 17.486896514892578, "learning_rate": 1.8114854771784233e-05, "loss": 1.4947, "step": 5684 }, { "epoch": 4.717842323651452, "grad_norm": 16.90993309020996, "learning_rate": 1.8114522821576766e-05, "loss": 0.8256, "step": 5685 }, { "epoch": 4.718672199170125, "grad_norm": 13.341667175292969, "learning_rate": 1.8114190871369298e-05, "loss": 1.0424, "step": 5686 }, { "epoch": 4.719502074688797, "grad_norm": 12.180904388427734, "learning_rate": 1.8113858921161826e-05, "loss": 0.7858, "step": 5687 }, { "epoch": 4.720331950207469, "grad_norm": 12.18250560760498, "learning_rate": 1.811352697095436e-05, "loss": 0.8737, "step": 5688 }, { "epoch": 4.721161825726141, "grad_norm": 12.025622367858887, "learning_rate": 1.811319502074689e-05, "loss": 1.2356, "step": 5689 }, { "epoch": 4.721991701244813, "grad_norm": 19.072025299072266, "learning_rate": 1.8112863070539423e-05, "loss": 2.8405, "step": 5690 }, { "epoch": 4.722821576763486, "grad_norm": 16.951587677001953, "learning_rate": 1.811253112033195e-05, "loss": 1.5544, "step": 5691 }, { "epoch": 4.723651452282158, "grad_norm": 15.910937309265137, "learning_rate": 1.8112199170124484e-05, "loss": 1.5949, "step": 5692 }, { "epoch": 4.72448132780083, "grad_norm": 26.563623428344727, "learning_rate": 1.8111867219917016e-05, "loss": 1.7024, "step": 5693 }, { "epoch": 4.725311203319502, "grad_norm": 12.663944244384766, "learning_rate": 1.8111535269709544e-05, "loss": 1.067, "step": 5694 }, { "epoch": 4.726141078838174, "grad_norm": 16.510000228881836, "learning_rate": 1.8111203319502076e-05, "loss": 1.092, "step": 5695 }, { "epoch": 4.7269709543568466, "grad_norm": 10.477309226989746, "learning_rate": 1.811087136929461e-05, "loss": 1.3068, "step": 5696 }, { "epoch": 4.727800829875519, "grad_norm": 12.7138090133667, "learning_rate": 1.8110539419087137e-05, "loss": 0.8935, "step": 5697 }, { "epoch": 4.728630705394191, "grad_norm": 11.808931350708008, "learning_rate": 1.811020746887967e-05, "loss": 0.9866, "step": 5698 }, { "epoch": 4.729460580912863, "grad_norm": 20.36458396911621, "learning_rate": 1.8109875518672198e-05, "loss": 1.6374, "step": 5699 }, { "epoch": 4.730290456431535, "grad_norm": 14.944721221923828, "learning_rate": 1.810954356846473e-05, "loss": 1.5432, "step": 5700 }, { "epoch": 4.7311203319502075, "grad_norm": 10.753727912902832, "learning_rate": 1.8109211618257262e-05, "loss": 0.9364, "step": 5701 }, { "epoch": 4.73195020746888, "grad_norm": 12.934391975402832, "learning_rate": 1.8108879668049794e-05, "loss": 1.2502, "step": 5702 }, { "epoch": 4.732780082987552, "grad_norm": 24.39999771118164, "learning_rate": 1.8108547717842323e-05, "loss": 1.9792, "step": 5703 }, { "epoch": 4.733609958506224, "grad_norm": 10.928123474121094, "learning_rate": 1.8108215767634855e-05, "loss": 1.2594, "step": 5704 }, { "epoch": 4.734439834024896, "grad_norm": 16.56842613220215, "learning_rate": 1.8107883817427387e-05, "loss": 1.8646, "step": 5705 }, { "epoch": 4.7352697095435685, "grad_norm": 13.763626098632812, "learning_rate": 1.810755186721992e-05, "loss": 0.7596, "step": 5706 }, { "epoch": 4.736099585062241, "grad_norm": 11.926955223083496, "learning_rate": 1.8107219917012448e-05, "loss": 1.6685, "step": 5707 }, { "epoch": 4.736929460580913, "grad_norm": 11.432174682617188, "learning_rate": 1.810688796680498e-05, "loss": 0.8909, "step": 5708 }, { "epoch": 4.737759336099585, "grad_norm": 16.863765716552734, "learning_rate": 1.8106556016597512e-05, "loss": 1.9121, "step": 5709 }, { "epoch": 4.738589211618257, "grad_norm": 16.362218856811523, "learning_rate": 1.8106224066390045e-05, "loss": 1.6552, "step": 5710 }, { "epoch": 4.7394190871369295, "grad_norm": 12.805654525756836, "learning_rate": 1.8105892116182577e-05, "loss": 1.3613, "step": 5711 }, { "epoch": 4.740248962655602, "grad_norm": 12.912162780761719, "learning_rate": 1.8105560165975105e-05, "loss": 1.9005, "step": 5712 }, { "epoch": 4.741078838174274, "grad_norm": 19.230966567993164, "learning_rate": 1.8105228215767637e-05, "loss": 1.458, "step": 5713 }, { "epoch": 4.741908713692946, "grad_norm": 15.129587173461914, "learning_rate": 1.810489626556017e-05, "loss": 1.5589, "step": 5714 }, { "epoch": 4.742738589211618, "grad_norm": 14.539974212646484, "learning_rate": 1.8104564315352698e-05, "loss": 1.8303, "step": 5715 }, { "epoch": 4.7435684647302905, "grad_norm": 15.29634952545166, "learning_rate": 1.810423236514523e-05, "loss": 1.4085, "step": 5716 }, { "epoch": 4.744398340248963, "grad_norm": 14.2179536819458, "learning_rate": 1.810390041493776e-05, "loss": 1.3762, "step": 5717 }, { "epoch": 4.745228215767635, "grad_norm": 17.35223388671875, "learning_rate": 1.810356846473029e-05, "loss": 1.7408, "step": 5718 }, { "epoch": 4.746058091286307, "grad_norm": 10.886016845703125, "learning_rate": 1.8103236514522823e-05, "loss": 1.1264, "step": 5719 }, { "epoch": 4.746887966804979, "grad_norm": 14.020925521850586, "learning_rate": 1.8102904564315352e-05, "loss": 1.0946, "step": 5720 }, { "epoch": 4.7477178423236515, "grad_norm": 14.282012939453125, "learning_rate": 1.8102572614107884e-05, "loss": 1.8192, "step": 5721 }, { "epoch": 4.748547717842324, "grad_norm": 13.652073860168457, "learning_rate": 1.8102240663900416e-05, "loss": 1.4781, "step": 5722 }, { "epoch": 4.749377593360996, "grad_norm": 21.446596145629883, "learning_rate": 1.810190871369295e-05, "loss": 1.7205, "step": 5723 }, { "epoch": 4.750207468879668, "grad_norm": 11.253631591796875, "learning_rate": 1.8101576763485477e-05, "loss": 1.2838, "step": 5724 }, { "epoch": 4.75103734439834, "grad_norm": 16.768020629882812, "learning_rate": 1.810124481327801e-05, "loss": 1.6593, "step": 5725 }, { "epoch": 4.751867219917012, "grad_norm": 17.00059700012207, "learning_rate": 1.810091286307054e-05, "loss": 1.7134, "step": 5726 }, { "epoch": 4.752697095435685, "grad_norm": 11.787339210510254, "learning_rate": 1.8100580912863073e-05, "loss": 1.4359, "step": 5727 }, { "epoch": 4.753526970954357, "grad_norm": 11.798918724060059, "learning_rate": 1.8100248962655602e-05, "loss": 1.0489, "step": 5728 }, { "epoch": 4.754356846473029, "grad_norm": 11.360837936401367, "learning_rate": 1.8099917012448134e-05, "loss": 1.5932, "step": 5729 }, { "epoch": 4.755186721991701, "grad_norm": 10.091628074645996, "learning_rate": 1.8099585062240666e-05, "loss": 1.28, "step": 5730 }, { "epoch": 4.756016597510373, "grad_norm": 12.19310188293457, "learning_rate": 1.80992531120332e-05, "loss": 1.0693, "step": 5731 }, { "epoch": 4.756846473029046, "grad_norm": 22.91493034362793, "learning_rate": 1.8098921161825727e-05, "loss": 0.8522, "step": 5732 }, { "epoch": 4.757676348547718, "grad_norm": 16.582782745361328, "learning_rate": 1.809858921161826e-05, "loss": 1.1639, "step": 5733 }, { "epoch": 4.75850622406639, "grad_norm": 12.16837215423584, "learning_rate": 1.809825726141079e-05, "loss": 1.1684, "step": 5734 }, { "epoch": 4.759336099585062, "grad_norm": 11.656092643737793, "learning_rate": 1.809792531120332e-05, "loss": 1.691, "step": 5735 }, { "epoch": 4.760165975103734, "grad_norm": 11.532025337219238, "learning_rate": 1.8097593360995852e-05, "loss": 1.1031, "step": 5736 }, { "epoch": 4.760995850622407, "grad_norm": 12.509071350097656, "learning_rate": 1.8097261410788384e-05, "loss": 0.8142, "step": 5737 }, { "epoch": 4.761825726141079, "grad_norm": 10.04922103881836, "learning_rate": 1.8096929460580913e-05, "loss": 1.415, "step": 5738 }, { "epoch": 4.762655601659751, "grad_norm": 17.413246154785156, "learning_rate": 1.8096597510373445e-05, "loss": 1.7985, "step": 5739 }, { "epoch": 4.763485477178423, "grad_norm": 18.031980514526367, "learning_rate": 1.8096265560165977e-05, "loss": 1.4811, "step": 5740 }, { "epoch": 4.764315352697095, "grad_norm": 18.107540130615234, "learning_rate": 1.8095933609958506e-05, "loss": 0.7921, "step": 5741 }, { "epoch": 4.765145228215768, "grad_norm": 11.87496566772461, "learning_rate": 1.8095601659751038e-05, "loss": 1.0354, "step": 5742 }, { "epoch": 4.76597510373444, "grad_norm": 22.154451370239258, "learning_rate": 1.809526970954357e-05, "loss": 1.3405, "step": 5743 }, { "epoch": 4.766804979253112, "grad_norm": 17.719823837280273, "learning_rate": 1.8094937759336102e-05, "loss": 1.7335, "step": 5744 }, { "epoch": 4.767634854771784, "grad_norm": 10.842887878417969, "learning_rate": 1.809460580912863e-05, "loss": 1.1257, "step": 5745 }, { "epoch": 4.768464730290456, "grad_norm": 9.507131576538086, "learning_rate": 1.8094273858921163e-05, "loss": 1.0838, "step": 5746 }, { "epoch": 4.769294605809129, "grad_norm": 16.245555877685547, "learning_rate": 1.8093941908713695e-05, "loss": 1.9618, "step": 5747 }, { "epoch": 4.770124481327801, "grad_norm": 12.674678802490234, "learning_rate": 1.8093609958506227e-05, "loss": 0.8083, "step": 5748 }, { "epoch": 4.770954356846473, "grad_norm": 20.31821060180664, "learning_rate": 1.8093278008298756e-05, "loss": 1.5823, "step": 5749 }, { "epoch": 4.771784232365145, "grad_norm": 9.726889610290527, "learning_rate": 1.8092946058091288e-05, "loss": 1.2887, "step": 5750 }, { "epoch": 4.772614107883817, "grad_norm": 12.232330322265625, "learning_rate": 1.809261410788382e-05, "loss": 1.5549, "step": 5751 }, { "epoch": 4.7734439834024895, "grad_norm": 11.359373092651367, "learning_rate": 1.8092282157676352e-05, "loss": 1.3831, "step": 5752 }, { "epoch": 4.774273858921162, "grad_norm": 18.736600875854492, "learning_rate": 1.809195020746888e-05, "loss": 1.9402, "step": 5753 }, { "epoch": 4.775103734439834, "grad_norm": 18.226707458496094, "learning_rate": 1.8091618257261413e-05, "loss": 1.0309, "step": 5754 }, { "epoch": 4.775933609958506, "grad_norm": 15.524645805358887, "learning_rate": 1.8091286307053942e-05, "loss": 1.3589, "step": 5755 }, { "epoch": 4.776763485477178, "grad_norm": 10.293891906738281, "learning_rate": 1.8090954356846474e-05, "loss": 0.9005, "step": 5756 }, { "epoch": 4.7775933609958505, "grad_norm": 16.728788375854492, "learning_rate": 1.8090622406639006e-05, "loss": 1.6182, "step": 5757 }, { "epoch": 4.778423236514523, "grad_norm": 19.27110481262207, "learning_rate": 1.8090290456431535e-05, "loss": 2.0673, "step": 5758 }, { "epoch": 4.779253112033195, "grad_norm": 13.778679847717285, "learning_rate": 1.8089958506224067e-05, "loss": 1.3097, "step": 5759 }, { "epoch": 4.780082987551867, "grad_norm": 23.178226470947266, "learning_rate": 1.80896265560166e-05, "loss": 1.8492, "step": 5760 }, { "epoch": 4.780912863070539, "grad_norm": 9.971366882324219, "learning_rate": 1.808929460580913e-05, "loss": 1.1497, "step": 5761 }, { "epoch": 4.7817427385892115, "grad_norm": 14.02800178527832, "learning_rate": 1.808896265560166e-05, "loss": 0.8937, "step": 5762 }, { "epoch": 4.782572614107884, "grad_norm": 14.751845359802246, "learning_rate": 1.8088630705394192e-05, "loss": 1.3699, "step": 5763 }, { "epoch": 4.783402489626556, "grad_norm": 10.488628387451172, "learning_rate": 1.8088298755186724e-05, "loss": 1.084, "step": 5764 }, { "epoch": 4.784232365145228, "grad_norm": 15.923357009887695, "learning_rate": 1.8087966804979256e-05, "loss": 1.1389, "step": 5765 }, { "epoch": 4.7850622406639, "grad_norm": 14.361320495605469, "learning_rate": 1.8087634854771785e-05, "loss": 1.5132, "step": 5766 }, { "epoch": 4.7858921161825725, "grad_norm": 16.581525802612305, "learning_rate": 1.8087302904564317e-05, "loss": 1.2596, "step": 5767 }, { "epoch": 4.786721991701245, "grad_norm": 19.529659271240234, "learning_rate": 1.808697095435685e-05, "loss": 2.0074, "step": 5768 }, { "epoch": 4.787551867219917, "grad_norm": 10.76986312866211, "learning_rate": 1.808663900414938e-05, "loss": 1.5571, "step": 5769 }, { "epoch": 4.788381742738589, "grad_norm": 10.934483528137207, "learning_rate": 1.808630705394191e-05, "loss": 0.9898, "step": 5770 }, { "epoch": 4.789211618257261, "grad_norm": 17.79610824584961, "learning_rate": 1.8085975103734442e-05, "loss": 1.586, "step": 5771 }, { "epoch": 4.7900414937759335, "grad_norm": 12.705738067626953, "learning_rate": 1.8085643153526974e-05, "loss": 1.2333, "step": 5772 }, { "epoch": 4.790871369294606, "grad_norm": 11.936070442199707, "learning_rate": 1.8085311203319503e-05, "loss": 1.0529, "step": 5773 }, { "epoch": 4.791701244813278, "grad_norm": 10.861488342285156, "learning_rate": 1.8084979253112035e-05, "loss": 1.1064, "step": 5774 }, { "epoch": 4.79253112033195, "grad_norm": 19.25885581970215, "learning_rate": 1.8084647302904567e-05, "loss": 1.5948, "step": 5775 }, { "epoch": 4.793360995850622, "grad_norm": 12.724879264831543, "learning_rate": 1.8084315352697096e-05, "loss": 1.4437, "step": 5776 }, { "epoch": 4.7941908713692944, "grad_norm": 10.436806678771973, "learning_rate": 1.8083983402489628e-05, "loss": 0.9314, "step": 5777 }, { "epoch": 4.795020746887967, "grad_norm": 11.550084114074707, "learning_rate": 1.8083651452282157e-05, "loss": 1.0843, "step": 5778 }, { "epoch": 4.795850622406639, "grad_norm": 11.116769790649414, "learning_rate": 1.808331950207469e-05, "loss": 0.8525, "step": 5779 }, { "epoch": 4.796680497925311, "grad_norm": 17.491283416748047, "learning_rate": 1.808298755186722e-05, "loss": 2.401, "step": 5780 }, { "epoch": 4.797510373443983, "grad_norm": 15.563896179199219, "learning_rate": 1.8082655601659753e-05, "loss": 1.2447, "step": 5781 }, { "epoch": 4.798340248962655, "grad_norm": 17.642620086669922, "learning_rate": 1.8082323651452282e-05, "loss": 1.0125, "step": 5782 }, { "epoch": 4.799170124481328, "grad_norm": 14.210214614868164, "learning_rate": 1.8081991701244814e-05, "loss": 1.4309, "step": 5783 }, { "epoch": 4.8, "grad_norm": 9.872849464416504, "learning_rate": 1.8081659751037346e-05, "loss": 0.9545, "step": 5784 }, { "epoch": 4.800829875518672, "grad_norm": 11.418217658996582, "learning_rate": 1.8081327800829878e-05, "loss": 1.3994, "step": 5785 }, { "epoch": 4.801659751037344, "grad_norm": 16.139379501342773, "learning_rate": 1.8080995850622407e-05, "loss": 1.4451, "step": 5786 }, { "epoch": 4.802489626556016, "grad_norm": 13.767942428588867, "learning_rate": 1.808066390041494e-05, "loss": 1.2597, "step": 5787 }, { "epoch": 4.803319502074689, "grad_norm": 9.335797309875488, "learning_rate": 1.808033195020747e-05, "loss": 1.188, "step": 5788 }, { "epoch": 4.804149377593361, "grad_norm": 18.262630462646484, "learning_rate": 1.8080000000000003e-05, "loss": 1.4617, "step": 5789 }, { "epoch": 4.804979253112033, "grad_norm": 10.712263107299805, "learning_rate": 1.8079668049792535e-05, "loss": 0.9404, "step": 5790 }, { "epoch": 4.805809128630705, "grad_norm": 20.136798858642578, "learning_rate": 1.8079336099585064e-05, "loss": 2.157, "step": 5791 }, { "epoch": 4.806639004149377, "grad_norm": 16.32054901123047, "learning_rate": 1.8079004149377596e-05, "loss": 2.0574, "step": 5792 }, { "epoch": 4.80746887966805, "grad_norm": 21.98321533203125, "learning_rate": 1.8078672199170125e-05, "loss": 1.9701, "step": 5793 }, { "epoch": 4.808298755186722, "grad_norm": 20.85439109802246, "learning_rate": 1.8078340248962657e-05, "loss": 1.6432, "step": 5794 }, { "epoch": 4.809128630705394, "grad_norm": 11.451794624328613, "learning_rate": 1.807800829875519e-05, "loss": 1.5303, "step": 5795 }, { "epoch": 4.809958506224066, "grad_norm": 14.879719734191895, "learning_rate": 1.8077676348547718e-05, "loss": 1.7886, "step": 5796 }, { "epoch": 4.810788381742738, "grad_norm": 15.608558654785156, "learning_rate": 1.807734439834025e-05, "loss": 1.359, "step": 5797 }, { "epoch": 4.811618257261411, "grad_norm": 9.295662879943848, "learning_rate": 1.8077012448132782e-05, "loss": 0.9011, "step": 5798 }, { "epoch": 4.812448132780083, "grad_norm": 15.383535385131836, "learning_rate": 1.807668049792531e-05, "loss": 1.9253, "step": 5799 }, { "epoch": 4.813278008298755, "grad_norm": 20.702890396118164, "learning_rate": 1.8076348547717843e-05, "loss": 1.8711, "step": 5800 }, { "epoch": 4.814107883817427, "grad_norm": 15.710381507873535, "learning_rate": 1.8076016597510375e-05, "loss": 1.7237, "step": 5801 }, { "epoch": 4.814937759336099, "grad_norm": 13.117230415344238, "learning_rate": 1.8075684647302907e-05, "loss": 1.2277, "step": 5802 }, { "epoch": 4.8157676348547716, "grad_norm": 11.300597190856934, "learning_rate": 1.8075352697095436e-05, "loss": 0.9822, "step": 5803 }, { "epoch": 4.816597510373444, "grad_norm": 13.277480125427246, "learning_rate": 1.8075020746887968e-05, "loss": 1.1544, "step": 5804 }, { "epoch": 4.817427385892116, "grad_norm": 13.320599555969238, "learning_rate": 1.80746887966805e-05, "loss": 1.5871, "step": 5805 }, { "epoch": 4.818257261410788, "grad_norm": 10.791191101074219, "learning_rate": 1.8074356846473032e-05, "loss": 1.0282, "step": 5806 }, { "epoch": 4.81908713692946, "grad_norm": 11.570298194885254, "learning_rate": 1.807402489626556e-05, "loss": 1.1125, "step": 5807 }, { "epoch": 4.8199170124481325, "grad_norm": 12.105219841003418, "learning_rate": 1.8073692946058093e-05, "loss": 0.9342, "step": 5808 }, { "epoch": 4.820746887966805, "grad_norm": 12.550788879394531, "learning_rate": 1.8073360995850625e-05, "loss": 0.7233, "step": 5809 }, { "epoch": 4.821576763485477, "grad_norm": 13.172408103942871, "learning_rate": 1.8073029045643157e-05, "loss": 1.1979, "step": 5810 }, { "epoch": 4.822406639004149, "grad_norm": 16.702842712402344, "learning_rate": 1.8072697095435686e-05, "loss": 1.8593, "step": 5811 }, { "epoch": 4.823236514522821, "grad_norm": 24.477956771850586, "learning_rate": 1.8072365145228218e-05, "loss": 2.9505, "step": 5812 }, { "epoch": 4.8240663900414935, "grad_norm": 10.742155075073242, "learning_rate": 1.807203319502075e-05, "loss": 0.9489, "step": 5813 }, { "epoch": 4.824896265560166, "grad_norm": 14.653923034667969, "learning_rate": 1.807170124481328e-05, "loss": 1.7164, "step": 5814 }, { "epoch": 4.825726141078838, "grad_norm": 14.191996574401855, "learning_rate": 1.807136929460581e-05, "loss": 1.9524, "step": 5815 }, { "epoch": 4.82655601659751, "grad_norm": 11.141035079956055, "learning_rate": 1.807103734439834e-05, "loss": 0.8708, "step": 5816 }, { "epoch": 4.827385892116182, "grad_norm": 24.2984561920166, "learning_rate": 1.807070539419087e-05, "loss": 1.6472, "step": 5817 }, { "epoch": 4.8282157676348545, "grad_norm": 16.938199996948242, "learning_rate": 1.8070373443983404e-05, "loss": 1.7298, "step": 5818 }, { "epoch": 4.829045643153527, "grad_norm": 12.493682861328125, "learning_rate": 1.8070041493775936e-05, "loss": 1.2088, "step": 5819 }, { "epoch": 4.829875518672199, "grad_norm": 20.308914184570312, "learning_rate": 1.8069709543568465e-05, "loss": 1.8703, "step": 5820 }, { "epoch": 4.830705394190871, "grad_norm": 15.893142700195312, "learning_rate": 1.8069377593360997e-05, "loss": 2.0157, "step": 5821 }, { "epoch": 4.831535269709543, "grad_norm": 15.04019546508789, "learning_rate": 1.806904564315353e-05, "loss": 1.1908, "step": 5822 }, { "epoch": 4.8323651452282155, "grad_norm": 11.387293815612793, "learning_rate": 1.806871369294606e-05, "loss": 1.3785, "step": 5823 }, { "epoch": 4.833195020746888, "grad_norm": 14.41379165649414, "learning_rate": 1.806838174273859e-05, "loss": 1.5183, "step": 5824 }, { "epoch": 4.83402489626556, "grad_norm": 24.596607208251953, "learning_rate": 1.806804979253112e-05, "loss": 1.1613, "step": 5825 }, { "epoch": 4.834854771784232, "grad_norm": 16.1221866607666, "learning_rate": 1.8067717842323654e-05, "loss": 1.7979, "step": 5826 }, { "epoch": 4.835684647302904, "grad_norm": 15.229272842407227, "learning_rate": 1.8067385892116186e-05, "loss": 1.7983, "step": 5827 }, { "epoch": 4.8365145228215765, "grad_norm": 17.00309181213379, "learning_rate": 1.8067053941908715e-05, "loss": 1.3739, "step": 5828 }, { "epoch": 4.837344398340249, "grad_norm": 14.729134559631348, "learning_rate": 1.8066721991701247e-05, "loss": 0.9169, "step": 5829 }, { "epoch": 4.838174273858921, "grad_norm": 17.651132583618164, "learning_rate": 1.806639004149378e-05, "loss": 1.4586, "step": 5830 }, { "epoch": 4.839004149377593, "grad_norm": 10.09956169128418, "learning_rate": 1.806605809128631e-05, "loss": 1.3164, "step": 5831 }, { "epoch": 4.839834024896265, "grad_norm": 19.653013229370117, "learning_rate": 1.806572614107884e-05, "loss": 2.0388, "step": 5832 }, { "epoch": 4.840663900414937, "grad_norm": 22.965436935424805, "learning_rate": 1.8065394190871372e-05, "loss": 1.2072, "step": 5833 }, { "epoch": 4.84149377593361, "grad_norm": 16.655473709106445, "learning_rate": 1.80650622406639e-05, "loss": 1.5174, "step": 5834 }, { "epoch": 4.842323651452282, "grad_norm": 8.696548461914062, "learning_rate": 1.8064730290456433e-05, "loss": 0.8123, "step": 5835 }, { "epoch": 4.843153526970954, "grad_norm": 15.279023170471191, "learning_rate": 1.8064398340248965e-05, "loss": 1.8048, "step": 5836 }, { "epoch": 4.843983402489626, "grad_norm": 17.45270538330078, "learning_rate": 1.8064066390041493e-05, "loss": 1.2135, "step": 5837 }, { "epoch": 4.844813278008298, "grad_norm": 13.585926055908203, "learning_rate": 1.8063734439834026e-05, "loss": 0.659, "step": 5838 }, { "epoch": 4.845643153526971, "grad_norm": 12.76276683807373, "learning_rate": 1.8063402489626558e-05, "loss": 1.0921, "step": 5839 }, { "epoch": 4.846473029045643, "grad_norm": 16.50505828857422, "learning_rate": 1.806307053941909e-05, "loss": 1.5296, "step": 5840 }, { "epoch": 4.847302904564315, "grad_norm": 16.204294204711914, "learning_rate": 1.806273858921162e-05, "loss": 1.2174, "step": 5841 }, { "epoch": 4.848132780082987, "grad_norm": 19.131492614746094, "learning_rate": 1.806240663900415e-05, "loss": 1.6371, "step": 5842 }, { "epoch": 4.848962655601659, "grad_norm": 14.014527320861816, "learning_rate": 1.8062074688796683e-05, "loss": 1.4865, "step": 5843 }, { "epoch": 4.849792531120332, "grad_norm": 13.038909912109375, "learning_rate": 1.8061742738589215e-05, "loss": 1.1145, "step": 5844 }, { "epoch": 4.850622406639004, "grad_norm": 13.196813583374023, "learning_rate": 1.8061410788381744e-05, "loss": 1.3984, "step": 5845 }, { "epoch": 4.851452282157676, "grad_norm": 14.136664390563965, "learning_rate": 1.8061078838174276e-05, "loss": 0.8149, "step": 5846 }, { "epoch": 4.852282157676348, "grad_norm": 10.329492568969727, "learning_rate": 1.8060746887966808e-05, "loss": 0.9895, "step": 5847 }, { "epoch": 4.85311203319502, "grad_norm": 9.389318466186523, "learning_rate": 1.806041493775934e-05, "loss": 0.7992, "step": 5848 }, { "epoch": 4.853941908713693, "grad_norm": 22.802562713623047, "learning_rate": 1.806008298755187e-05, "loss": 1.6288, "step": 5849 }, { "epoch": 4.854771784232365, "grad_norm": 15.102727890014648, "learning_rate": 1.80597510373444e-05, "loss": 1.0232, "step": 5850 }, { "epoch": 4.855601659751037, "grad_norm": 15.795186042785645, "learning_rate": 1.8059419087136933e-05, "loss": 1.2913, "step": 5851 }, { "epoch": 4.856431535269709, "grad_norm": 11.740555763244629, "learning_rate": 1.805908713692946e-05, "loss": 1.0699, "step": 5852 }, { "epoch": 4.857261410788381, "grad_norm": 18.26926040649414, "learning_rate": 1.8058755186721994e-05, "loss": 1.5485, "step": 5853 }, { "epoch": 4.858091286307054, "grad_norm": 21.658313751220703, "learning_rate": 1.8058423236514522e-05, "loss": 1.5977, "step": 5854 }, { "epoch": 4.858921161825726, "grad_norm": 12.98011302947998, "learning_rate": 1.8058091286307054e-05, "loss": 0.5154, "step": 5855 }, { "epoch": 4.859751037344399, "grad_norm": 11.603620529174805, "learning_rate": 1.8057759336099587e-05, "loss": 1.1736, "step": 5856 }, { "epoch": 4.860580912863071, "grad_norm": 12.281085968017578, "learning_rate": 1.8057427385892115e-05, "loss": 1.0455, "step": 5857 }, { "epoch": 4.861410788381743, "grad_norm": 17.123748779296875, "learning_rate": 1.8057095435684647e-05, "loss": 1.0529, "step": 5858 }, { "epoch": 4.862240663900415, "grad_norm": 16.634050369262695, "learning_rate": 1.805676348547718e-05, "loss": 1.386, "step": 5859 }, { "epoch": 4.863070539419088, "grad_norm": 16.01263427734375, "learning_rate": 1.805643153526971e-05, "loss": 1.0884, "step": 5860 }, { "epoch": 4.86390041493776, "grad_norm": 25.679786682128906, "learning_rate": 1.805609958506224e-05, "loss": 1.8821, "step": 5861 }, { "epoch": 4.864730290456432, "grad_norm": 19.23079490661621, "learning_rate": 1.8055767634854772e-05, "loss": 1.5238, "step": 5862 }, { "epoch": 4.865560165975104, "grad_norm": 19.534650802612305, "learning_rate": 1.8055435684647305e-05, "loss": 2.5649, "step": 5863 }, { "epoch": 4.866390041493776, "grad_norm": 11.160650253295898, "learning_rate": 1.8055103734439837e-05, "loss": 1.2385, "step": 5864 }, { "epoch": 4.867219917012449, "grad_norm": 17.21571159362793, "learning_rate": 1.8054771784232365e-05, "loss": 1.5556, "step": 5865 }, { "epoch": 4.868049792531121, "grad_norm": 20.496347427368164, "learning_rate": 1.8054439834024897e-05, "loss": 1.5202, "step": 5866 }, { "epoch": 4.868879668049793, "grad_norm": 17.019512176513672, "learning_rate": 1.805410788381743e-05, "loss": 1.0335, "step": 5867 }, { "epoch": 4.869709543568465, "grad_norm": 20.832571029663086, "learning_rate": 1.805377593360996e-05, "loss": 1.6785, "step": 5868 }, { "epoch": 4.870539419087137, "grad_norm": 12.840192794799805, "learning_rate": 1.8053443983402494e-05, "loss": 1.2864, "step": 5869 }, { "epoch": 4.87136929460581, "grad_norm": 11.65784740447998, "learning_rate": 1.8053112033195022e-05, "loss": 1.1031, "step": 5870 }, { "epoch": 4.872199170124482, "grad_norm": 12.467757225036621, "learning_rate": 1.8052780082987555e-05, "loss": 0.9001, "step": 5871 }, { "epoch": 4.873029045643154, "grad_norm": 19.65966033935547, "learning_rate": 1.8052448132780083e-05, "loss": 1.7479, "step": 5872 }, { "epoch": 4.873858921161826, "grad_norm": 19.99740219116211, "learning_rate": 1.8052116182572615e-05, "loss": 1.4798, "step": 5873 }, { "epoch": 4.874688796680498, "grad_norm": 16.972339630126953, "learning_rate": 1.8051784232365148e-05, "loss": 2.0086, "step": 5874 }, { "epoch": 4.875518672199171, "grad_norm": 12.555094718933105, "learning_rate": 1.8051452282157676e-05, "loss": 1.4725, "step": 5875 }, { "epoch": 4.876348547717843, "grad_norm": 12.067596435546875, "learning_rate": 1.805112033195021e-05, "loss": 1.1868, "step": 5876 }, { "epoch": 4.877178423236515, "grad_norm": 11.632757186889648, "learning_rate": 1.805078838174274e-05, "loss": 1.0967, "step": 5877 }, { "epoch": 4.878008298755187, "grad_norm": 13.483346939086914, "learning_rate": 1.805045643153527e-05, "loss": 2.0776, "step": 5878 }, { "epoch": 4.878838174273859, "grad_norm": 11.12398910522461, "learning_rate": 1.80501244813278e-05, "loss": 0.9078, "step": 5879 }, { "epoch": 4.8796680497925315, "grad_norm": 13.213562965393066, "learning_rate": 1.8049792531120333e-05, "loss": 1.4164, "step": 5880 }, { "epoch": 4.880497925311204, "grad_norm": 15.126742362976074, "learning_rate": 1.8049460580912865e-05, "loss": 1.2001, "step": 5881 }, { "epoch": 4.881327800829876, "grad_norm": 15.899092674255371, "learning_rate": 1.8049128630705394e-05, "loss": 1.6744, "step": 5882 }, { "epoch": 4.882157676348548, "grad_norm": 18.91624641418457, "learning_rate": 1.8048796680497926e-05, "loss": 1.2558, "step": 5883 }, { "epoch": 4.88298755186722, "grad_norm": 11.59080696105957, "learning_rate": 1.804846473029046e-05, "loss": 1.1029, "step": 5884 }, { "epoch": 4.8838174273858925, "grad_norm": 15.703495025634766, "learning_rate": 1.804813278008299e-05, "loss": 1.1896, "step": 5885 }, { "epoch": 4.884647302904565, "grad_norm": 18.678802490234375, "learning_rate": 1.804780082987552e-05, "loss": 1.6332, "step": 5886 }, { "epoch": 4.885477178423237, "grad_norm": 16.525165557861328, "learning_rate": 1.804746887966805e-05, "loss": 1.7926, "step": 5887 }, { "epoch": 4.886307053941909, "grad_norm": 23.106203079223633, "learning_rate": 1.8047136929460583e-05, "loss": 0.977, "step": 5888 }, { "epoch": 4.887136929460581, "grad_norm": 17.89080810546875, "learning_rate": 1.8046804979253116e-05, "loss": 1.8025, "step": 5889 }, { "epoch": 4.8879668049792535, "grad_norm": 12.865561485290527, "learning_rate": 1.8046473029045644e-05, "loss": 1.1154, "step": 5890 }, { "epoch": 4.888796680497926, "grad_norm": 24.16082000732422, "learning_rate": 1.8046141078838176e-05, "loss": 0.8278, "step": 5891 }, { "epoch": 4.889626556016598, "grad_norm": 13.408769607543945, "learning_rate": 1.804580912863071e-05, "loss": 1.3778, "step": 5892 }, { "epoch": 4.89045643153527, "grad_norm": 15.373610496520996, "learning_rate": 1.8045477178423237e-05, "loss": 1.3288, "step": 5893 }, { "epoch": 4.891286307053942, "grad_norm": 14.506231307983398, "learning_rate": 1.804514522821577e-05, "loss": 1.5212, "step": 5894 }, { "epoch": 4.8921161825726145, "grad_norm": 12.745665550231934, "learning_rate": 1.8044813278008298e-05, "loss": 1.3929, "step": 5895 }, { "epoch": 4.892946058091287, "grad_norm": 18.954605102539062, "learning_rate": 1.804448132780083e-05, "loss": 1.1367, "step": 5896 }, { "epoch": 4.893775933609959, "grad_norm": 21.77134895324707, "learning_rate": 1.8044149377593362e-05, "loss": 1.6182, "step": 5897 }, { "epoch": 4.894605809128631, "grad_norm": 12.850749969482422, "learning_rate": 1.8043817427385894e-05, "loss": 1.476, "step": 5898 }, { "epoch": 4.895435684647303, "grad_norm": 11.759149551391602, "learning_rate": 1.8043485477178423e-05, "loss": 0.6989, "step": 5899 }, { "epoch": 4.8962655601659755, "grad_norm": 46.399356842041016, "learning_rate": 1.8043153526970955e-05, "loss": 0.8399, "step": 5900 }, { "epoch": 4.897095435684648, "grad_norm": 12.332306861877441, "learning_rate": 1.8042821576763487e-05, "loss": 0.7029, "step": 5901 }, { "epoch": 4.89792531120332, "grad_norm": 18.70918083190918, "learning_rate": 1.804248962655602e-05, "loss": 1.7429, "step": 5902 }, { "epoch": 4.898755186721992, "grad_norm": 13.312620162963867, "learning_rate": 1.8042157676348548e-05, "loss": 1.4174, "step": 5903 }, { "epoch": 4.899585062240664, "grad_norm": 13.918655395507812, "learning_rate": 1.804182572614108e-05, "loss": 1.6296, "step": 5904 }, { "epoch": 4.9004149377593365, "grad_norm": 16.4709529876709, "learning_rate": 1.8041493775933612e-05, "loss": 1.6075, "step": 5905 }, { "epoch": 4.901244813278009, "grad_norm": 13.262857437133789, "learning_rate": 1.8041161825726144e-05, "loss": 1.518, "step": 5906 }, { "epoch": 4.902074688796681, "grad_norm": 14.024945259094238, "learning_rate": 1.8040829875518673e-05, "loss": 1.14, "step": 5907 }, { "epoch": 4.902904564315353, "grad_norm": 14.245965957641602, "learning_rate": 1.8040497925311205e-05, "loss": 1.3569, "step": 5908 }, { "epoch": 4.903734439834025, "grad_norm": 18.84408950805664, "learning_rate": 1.8040165975103737e-05, "loss": 1.8919, "step": 5909 }, { "epoch": 4.904564315352697, "grad_norm": 23.75074577331543, "learning_rate": 1.8039834024896266e-05, "loss": 1.1874, "step": 5910 }, { "epoch": 4.90539419087137, "grad_norm": 15.591900825500488, "learning_rate": 1.8039502074688798e-05, "loss": 1.4397, "step": 5911 }, { "epoch": 4.906224066390042, "grad_norm": 18.900251388549805, "learning_rate": 1.803917012448133e-05, "loss": 0.7654, "step": 5912 }, { "epoch": 4.907053941908714, "grad_norm": 30.09984016418457, "learning_rate": 1.803883817427386e-05, "loss": 1.3429, "step": 5913 }, { "epoch": 4.907883817427386, "grad_norm": 18.15706443786621, "learning_rate": 1.803850622406639e-05, "loss": 1.4253, "step": 5914 }, { "epoch": 4.908713692946058, "grad_norm": 16.171504974365234, "learning_rate": 1.8038174273858923e-05, "loss": 1.5599, "step": 5915 }, { "epoch": 4.909543568464731, "grad_norm": 25.990427017211914, "learning_rate": 1.8037842323651452e-05, "loss": 1.7415, "step": 5916 }, { "epoch": 4.910373443983403, "grad_norm": 17.187204360961914, "learning_rate": 1.8037510373443984e-05, "loss": 1.2805, "step": 5917 }, { "epoch": 4.911203319502075, "grad_norm": 11.317117691040039, "learning_rate": 1.8037178423236516e-05, "loss": 1.5291, "step": 5918 }, { "epoch": 4.912033195020747, "grad_norm": 11.764777183532715, "learning_rate": 1.8036846473029045e-05, "loss": 1.0929, "step": 5919 }, { "epoch": 4.912863070539419, "grad_norm": 16.663305282592773, "learning_rate": 1.8036514522821577e-05, "loss": 1.5035, "step": 5920 }, { "epoch": 4.913692946058092, "grad_norm": 20.21378517150879, "learning_rate": 1.803618257261411e-05, "loss": 1.772, "step": 5921 }, { "epoch": 4.914522821576764, "grad_norm": 13.718648910522461, "learning_rate": 1.803585062240664e-05, "loss": 1.4685, "step": 5922 }, { "epoch": 4.915352697095436, "grad_norm": 12.412111282348633, "learning_rate": 1.8035518672199173e-05, "loss": 1.5465, "step": 5923 }, { "epoch": 4.916182572614108, "grad_norm": 15.555574417114258, "learning_rate": 1.8035186721991702e-05, "loss": 1.1551, "step": 5924 }, { "epoch": 4.91701244813278, "grad_norm": 13.554654121398926, "learning_rate": 1.8034854771784234e-05, "loss": 1.1962, "step": 5925 }, { "epoch": 4.917842323651453, "grad_norm": 13.506593704223633, "learning_rate": 1.8034522821576766e-05, "loss": 0.9087, "step": 5926 }, { "epoch": 4.918672199170125, "grad_norm": 18.78866958618164, "learning_rate": 1.80341908713693e-05, "loss": 1.5292, "step": 5927 }, { "epoch": 4.919502074688797, "grad_norm": 12.048810005187988, "learning_rate": 1.8033858921161827e-05, "loss": 1.5538, "step": 5928 }, { "epoch": 4.920331950207469, "grad_norm": 10.996705055236816, "learning_rate": 1.803352697095436e-05, "loss": 1.0104, "step": 5929 }, { "epoch": 4.921161825726141, "grad_norm": 10.509708404541016, "learning_rate": 1.803319502074689e-05, "loss": 0.9846, "step": 5930 }, { "epoch": 4.9219917012448136, "grad_norm": 16.665271759033203, "learning_rate": 1.803286307053942e-05, "loss": 1.5349, "step": 5931 }, { "epoch": 4.922821576763486, "grad_norm": 17.18172836303711, "learning_rate": 1.8032531120331952e-05, "loss": 1.2655, "step": 5932 }, { "epoch": 4.923651452282158, "grad_norm": 15.56079387664795, "learning_rate": 1.803219917012448e-05, "loss": 1.7183, "step": 5933 }, { "epoch": 4.92448132780083, "grad_norm": 10.394149780273438, "learning_rate": 1.8031867219917013e-05, "loss": 0.8383, "step": 5934 }, { "epoch": 4.925311203319502, "grad_norm": 18.165287017822266, "learning_rate": 1.8031535269709545e-05, "loss": 1.3535, "step": 5935 }, { "epoch": 4.9261410788381745, "grad_norm": 15.998475074768066, "learning_rate": 1.8031203319502074e-05, "loss": 1.915, "step": 5936 }, { "epoch": 4.926970954356847, "grad_norm": 15.432182312011719, "learning_rate": 1.8030871369294606e-05, "loss": 1.0863, "step": 5937 }, { "epoch": 4.927800829875519, "grad_norm": 13.23508071899414, "learning_rate": 1.8030539419087138e-05, "loss": 1.1907, "step": 5938 }, { "epoch": 4.928630705394191, "grad_norm": 12.23386287689209, "learning_rate": 1.803020746887967e-05, "loss": 1.0082, "step": 5939 }, { "epoch": 4.929460580912863, "grad_norm": 13.857342720031738, "learning_rate": 1.80298755186722e-05, "loss": 1.266, "step": 5940 }, { "epoch": 4.9302904564315355, "grad_norm": 23.716445922851562, "learning_rate": 1.802954356846473e-05, "loss": 1.0678, "step": 5941 }, { "epoch": 4.931120331950208, "grad_norm": 10.58016586303711, "learning_rate": 1.8029211618257263e-05, "loss": 1.0549, "step": 5942 }, { "epoch": 4.93195020746888, "grad_norm": 16.543073654174805, "learning_rate": 1.8028879668049795e-05, "loss": 1.7308, "step": 5943 }, { "epoch": 4.932780082987552, "grad_norm": 10.359193801879883, "learning_rate": 1.8028547717842324e-05, "loss": 0.6843, "step": 5944 }, { "epoch": 4.933609958506224, "grad_norm": 18.178979873657227, "learning_rate": 1.8028215767634856e-05, "loss": 1.247, "step": 5945 }, { "epoch": 4.9344398340248965, "grad_norm": 14.364327430725098, "learning_rate": 1.8027883817427388e-05, "loss": 1.22, "step": 5946 }, { "epoch": 4.935269709543569, "grad_norm": 13.999082565307617, "learning_rate": 1.802755186721992e-05, "loss": 1.0012, "step": 5947 }, { "epoch": 4.936099585062241, "grad_norm": 15.580634117126465, "learning_rate": 1.8027219917012452e-05, "loss": 1.3389, "step": 5948 }, { "epoch": 4.936929460580913, "grad_norm": 12.098422050476074, "learning_rate": 1.802688796680498e-05, "loss": 1.2455, "step": 5949 }, { "epoch": 4.937759336099585, "grad_norm": 27.890384674072266, "learning_rate": 1.8026556016597513e-05, "loss": 2.258, "step": 5950 }, { "epoch": 4.9385892116182575, "grad_norm": 18.142921447753906, "learning_rate": 1.8026224066390042e-05, "loss": 0.7564, "step": 5951 }, { "epoch": 4.93941908713693, "grad_norm": 15.832046508789062, "learning_rate": 1.8025892116182574e-05, "loss": 1.0845, "step": 5952 }, { "epoch": 4.940248962655602, "grad_norm": 14.421897888183594, "learning_rate": 1.8025560165975106e-05, "loss": 1.0485, "step": 5953 }, { "epoch": 4.941078838174274, "grad_norm": 12.64711856842041, "learning_rate": 1.8025228215767635e-05, "loss": 1.5701, "step": 5954 }, { "epoch": 4.941908713692946, "grad_norm": 21.624292373657227, "learning_rate": 1.8024896265560167e-05, "loss": 2.5853, "step": 5955 }, { "epoch": 4.9427385892116185, "grad_norm": 32.79718017578125, "learning_rate": 1.80245643153527e-05, "loss": 1.9384, "step": 5956 }, { "epoch": 4.943568464730291, "grad_norm": 12.487574577331543, "learning_rate": 1.8024232365145228e-05, "loss": 1.3533, "step": 5957 }, { "epoch": 4.944398340248963, "grad_norm": 22.168872833251953, "learning_rate": 1.802390041493776e-05, "loss": 1.6874, "step": 5958 }, { "epoch": 4.945228215767635, "grad_norm": 10.925973892211914, "learning_rate": 1.8023568464730292e-05, "loss": 1.4842, "step": 5959 }, { "epoch": 4.946058091286307, "grad_norm": 19.355562210083008, "learning_rate": 1.8023236514522824e-05, "loss": 1.7372, "step": 5960 }, { "epoch": 4.946887966804979, "grad_norm": 31.640005111694336, "learning_rate": 1.8022904564315353e-05, "loss": 1.1595, "step": 5961 }, { "epoch": 4.947717842323652, "grad_norm": 15.43310260772705, "learning_rate": 1.8022572614107885e-05, "loss": 1.0685, "step": 5962 }, { "epoch": 4.948547717842324, "grad_norm": 19.683990478515625, "learning_rate": 1.8022240663900417e-05, "loss": 1.3756, "step": 5963 }, { "epoch": 4.949377593360996, "grad_norm": 12.557894706726074, "learning_rate": 1.802190871369295e-05, "loss": 1.4916, "step": 5964 }, { "epoch": 4.950207468879668, "grad_norm": 18.975244522094727, "learning_rate": 1.8021576763485478e-05, "loss": 1.3159, "step": 5965 }, { "epoch": 4.95103734439834, "grad_norm": 11.02603530883789, "learning_rate": 1.802124481327801e-05, "loss": 1.206, "step": 5966 }, { "epoch": 4.951867219917013, "grad_norm": 13.04893684387207, "learning_rate": 1.8020912863070542e-05, "loss": 1.2581, "step": 5967 }, { "epoch": 4.952697095435685, "grad_norm": 19.157899856567383, "learning_rate": 1.8020580912863074e-05, "loss": 1.7766, "step": 5968 }, { "epoch": 4.953526970954357, "grad_norm": 13.940244674682617, "learning_rate": 1.8020248962655603e-05, "loss": 1.1543, "step": 5969 }, { "epoch": 4.954356846473029, "grad_norm": 23.274341583251953, "learning_rate": 1.8019917012448135e-05, "loss": 1.9988, "step": 5970 }, { "epoch": 4.955186721991701, "grad_norm": 9.868475914001465, "learning_rate": 1.8019585062240664e-05, "loss": 1.3523, "step": 5971 }, { "epoch": 4.956016597510374, "grad_norm": 10.477489471435547, "learning_rate": 1.8019253112033196e-05, "loss": 1.5595, "step": 5972 }, { "epoch": 4.956846473029046, "grad_norm": 13.361788749694824, "learning_rate": 1.8018921161825728e-05, "loss": 1.4313, "step": 5973 }, { "epoch": 4.957676348547718, "grad_norm": 18.21595573425293, "learning_rate": 1.8018589211618257e-05, "loss": 1.3468, "step": 5974 }, { "epoch": 4.95850622406639, "grad_norm": 15.95802116394043, "learning_rate": 1.801825726141079e-05, "loss": 1.1406, "step": 5975 }, { "epoch": 4.959336099585062, "grad_norm": 13.327428817749023, "learning_rate": 1.801792531120332e-05, "loss": 1.3294, "step": 5976 }, { "epoch": 4.960165975103735, "grad_norm": 14.340147972106934, "learning_rate": 1.8017593360995853e-05, "loss": 1.5877, "step": 5977 }, { "epoch": 4.960995850622407, "grad_norm": 20.736562728881836, "learning_rate": 1.801726141078838e-05, "loss": 2.2954, "step": 5978 }, { "epoch": 4.961825726141079, "grad_norm": 15.744644165039062, "learning_rate": 1.8016929460580914e-05, "loss": 1.4965, "step": 5979 }, { "epoch": 4.962655601659751, "grad_norm": 15.750640869140625, "learning_rate": 1.8016597510373446e-05, "loss": 1.2324, "step": 5980 }, { "epoch": 4.963485477178423, "grad_norm": 16.559463500976562, "learning_rate": 1.8016265560165978e-05, "loss": 1.1765, "step": 5981 }, { "epoch": 4.964315352697096, "grad_norm": 13.119598388671875, "learning_rate": 1.8015933609958507e-05, "loss": 1.1417, "step": 5982 }, { "epoch": 4.965145228215768, "grad_norm": 21.8712100982666, "learning_rate": 1.801560165975104e-05, "loss": 1.1376, "step": 5983 }, { "epoch": 4.96597510373444, "grad_norm": 8.881305694580078, "learning_rate": 1.801526970954357e-05, "loss": 0.5194, "step": 5984 }, { "epoch": 4.966804979253112, "grad_norm": 9.383394241333008, "learning_rate": 1.8014937759336103e-05, "loss": 0.9326, "step": 5985 }, { "epoch": 4.967634854771784, "grad_norm": 16.385231018066406, "learning_rate": 1.8014605809128632e-05, "loss": 1.4555, "step": 5986 }, { "epoch": 4.9684647302904565, "grad_norm": 17.09567642211914, "learning_rate": 1.8014273858921164e-05, "loss": 1.4104, "step": 5987 }, { "epoch": 4.969294605809129, "grad_norm": 16.93000030517578, "learning_rate": 1.8013941908713696e-05, "loss": 1.6614, "step": 5988 }, { "epoch": 4.970124481327801, "grad_norm": NaN, "learning_rate": 1.8013941908713696e-05, "loss": 1.5555, "step": 5989 }, { "epoch": 4.970954356846473, "grad_norm": 14.085704803466797, "learning_rate": 1.8013609958506225e-05, "loss": 1.2317, "step": 5990 }, { "epoch": 4.971784232365145, "grad_norm": 13.30075740814209, "learning_rate": 1.8013278008298757e-05, "loss": 1.79, "step": 5991 }, { "epoch": 4.9726141078838175, "grad_norm": 19.732221603393555, "learning_rate": 1.801294605809129e-05, "loss": 1.7721, "step": 5992 }, { "epoch": 4.97344398340249, "grad_norm": 12.861230850219727, "learning_rate": 1.8012614107883818e-05, "loss": 1.3922, "step": 5993 }, { "epoch": 4.974273858921162, "grad_norm": 20.35515785217285, "learning_rate": 1.801228215767635e-05, "loss": 1.1509, "step": 5994 }, { "epoch": 4.975103734439834, "grad_norm": 18.170913696289062, "learning_rate": 1.801195020746888e-05, "loss": 1.9246, "step": 5995 }, { "epoch": 4.975933609958506, "grad_norm": 13.793999671936035, "learning_rate": 1.801161825726141e-05, "loss": 1.2999, "step": 5996 }, { "epoch": 4.9767634854771785, "grad_norm": 8.459214210510254, "learning_rate": 1.8011286307053943e-05, "loss": 0.5845, "step": 5997 }, { "epoch": 4.977593360995851, "grad_norm": 13.681413650512695, "learning_rate": 1.8010954356846475e-05, "loss": 1.2839, "step": 5998 }, { "epoch": 4.978423236514523, "grad_norm": 12.527870178222656, "learning_rate": 1.8010622406639004e-05, "loss": 1.3525, "step": 5999 }, { "epoch": 4.979253112033195, "grad_norm": 19.782413482666016, "learning_rate": 1.8010290456431536e-05, "loss": 1.1324, "step": 6000 }, { "epoch": 4.980082987551867, "grad_norm": 18.05974578857422, "learning_rate": 1.8009958506224068e-05, "loss": 1.9029, "step": 6001 }, { "epoch": 4.9809128630705395, "grad_norm": 13.347085952758789, "learning_rate": 1.80096265560166e-05, "loss": 1.6107, "step": 6002 }, { "epoch": 4.981742738589212, "grad_norm": 6.923688888549805, "learning_rate": 1.8009294605809132e-05, "loss": 0.6891, "step": 6003 }, { "epoch": 4.982572614107884, "grad_norm": 17.381406784057617, "learning_rate": 1.800896265560166e-05, "loss": 1.5491, "step": 6004 }, { "epoch": 4.983402489626556, "grad_norm": 16.912710189819336, "learning_rate": 1.8008630705394193e-05, "loss": 2.159, "step": 6005 }, { "epoch": 4.984232365145228, "grad_norm": 14.7930269241333, "learning_rate": 1.8008298755186725e-05, "loss": 1.3464, "step": 6006 }, { "epoch": 4.9850622406639005, "grad_norm": 13.119186401367188, "learning_rate": 1.8007966804979257e-05, "loss": 1.6982, "step": 6007 }, { "epoch": 4.985892116182573, "grad_norm": 20.669933319091797, "learning_rate": 1.8007634854771786e-05, "loss": 1.7777, "step": 6008 }, { "epoch": 4.986721991701245, "grad_norm": 15.604816436767578, "learning_rate": 1.8007302904564318e-05, "loss": 1.0542, "step": 6009 }, { "epoch": 4.987551867219917, "grad_norm": 14.890642166137695, "learning_rate": 1.800697095435685e-05, "loss": 1.5545, "step": 6010 }, { "epoch": 4.988381742738589, "grad_norm": 11.28502082824707, "learning_rate": 1.800663900414938e-05, "loss": 0.6014, "step": 6011 }, { "epoch": 4.9892116182572614, "grad_norm": 14.42668628692627, "learning_rate": 1.800630705394191e-05, "loss": 1.3655, "step": 6012 }, { "epoch": 4.990041493775934, "grad_norm": 18.921550750732422, "learning_rate": 1.800597510373444e-05, "loss": 1.7735, "step": 6013 }, { "epoch": 4.990871369294606, "grad_norm": 8.311786651611328, "learning_rate": 1.800564315352697e-05, "loss": 1.4242, "step": 6014 }, { "epoch": 4.991701244813278, "grad_norm": 13.46599006652832, "learning_rate": 1.8005311203319504e-05, "loss": 1.0511, "step": 6015 }, { "epoch": 4.99253112033195, "grad_norm": 21.57683563232422, "learning_rate": 1.8004979253112032e-05, "loss": 2.0764, "step": 6016 }, { "epoch": 4.993360995850622, "grad_norm": 15.524147033691406, "learning_rate": 1.8004647302904565e-05, "loss": 1.7336, "step": 6017 }, { "epoch": 4.994190871369295, "grad_norm": 11.491412162780762, "learning_rate": 1.8004315352697097e-05, "loss": 1.5209, "step": 6018 }, { "epoch": 4.995020746887967, "grad_norm": 14.226524353027344, "learning_rate": 1.800398340248963e-05, "loss": 1.5663, "step": 6019 }, { "epoch": 4.995850622406639, "grad_norm": 13.450624465942383, "learning_rate": 1.8003651452282157e-05, "loss": 1.4133, "step": 6020 }, { "epoch": 4.996680497925311, "grad_norm": 17.622478485107422, "learning_rate": 1.800331950207469e-05, "loss": 1.0103, "step": 6021 }, { "epoch": 4.997510373443983, "grad_norm": 14.297423362731934, "learning_rate": 1.800298755186722e-05, "loss": 1.4531, "step": 6022 }, { "epoch": 4.998340248962656, "grad_norm": 20.1983699798584, "learning_rate": 1.8002655601659754e-05, "loss": 1.4186, "step": 6023 }, { "epoch": 4.999170124481328, "grad_norm": 14.482199668884277, "learning_rate": 1.8002323651452282e-05, "loss": 1.2246, "step": 6024 }, { "epoch": 5.0, "grad_norm": 19.116714477539062, "learning_rate": 1.8001991701244815e-05, "loss": 1.2783, "step": 6025 }, { "epoch": 5.000829875518672, "grad_norm": 9.854253768920898, "learning_rate": 1.8001659751037347e-05, "loss": 0.7999, "step": 6026 }, { "epoch": 5.001659751037344, "grad_norm": 11.263599395751953, "learning_rate": 1.800132780082988e-05, "loss": 0.6732, "step": 6027 }, { "epoch": 5.002489626556017, "grad_norm": 12.078829765319824, "learning_rate": 1.8000995850622408e-05, "loss": 1.1508, "step": 6028 }, { "epoch": 5.003319502074689, "grad_norm": 9.081360816955566, "learning_rate": 1.800066390041494e-05, "loss": 0.5902, "step": 6029 }, { "epoch": 5.004149377593361, "grad_norm": 9.135653495788574, "learning_rate": 1.8000331950207472e-05, "loss": 0.5046, "step": 6030 }, { "epoch": 5.004979253112033, "grad_norm": 17.249855041503906, "learning_rate": 1.8e-05, "loss": 1.9514, "step": 6031 }, { "epoch": 5.005809128630705, "grad_norm": 11.279126167297363, "learning_rate": 1.7999668049792533e-05, "loss": 1.1188, "step": 6032 }, { "epoch": 5.006639004149378, "grad_norm": 18.407142639160156, "learning_rate": 1.7999336099585065e-05, "loss": 2.698, "step": 6033 }, { "epoch": 5.00746887966805, "grad_norm": 17.260873794555664, "learning_rate": 1.7999004149377593e-05, "loss": 1.8513, "step": 6034 }, { "epoch": 5.008298755186722, "grad_norm": 13.285941123962402, "learning_rate": 1.7998672199170125e-05, "loss": 1.5226, "step": 6035 }, { "epoch": 5.009128630705394, "grad_norm": 10.693868637084961, "learning_rate": 1.7998340248962658e-05, "loss": 0.6557, "step": 6036 }, { "epoch": 5.009958506224066, "grad_norm": 16.163856506347656, "learning_rate": 1.7998008298755186e-05, "loss": 1.036, "step": 6037 }, { "epoch": 5.0107883817427386, "grad_norm": 14.206583023071289, "learning_rate": 1.799767634854772e-05, "loss": 1.2307, "step": 6038 }, { "epoch": 5.011618257261411, "grad_norm": 15.775874137878418, "learning_rate": 1.799734439834025e-05, "loss": 0.5907, "step": 6039 }, { "epoch": 5.012448132780083, "grad_norm": 14.308653831481934, "learning_rate": 1.7997012448132783e-05, "loss": 1.9052, "step": 6040 }, { "epoch": 5.013278008298755, "grad_norm": 13.071418762207031, "learning_rate": 1.799668049792531e-05, "loss": 1.4017, "step": 6041 }, { "epoch": 5.014107883817427, "grad_norm": 13.270781517028809, "learning_rate": 1.7996348547717843e-05, "loss": 1.0439, "step": 6042 }, { "epoch": 5.0149377593360995, "grad_norm": 18.701995849609375, "learning_rate": 1.7996016597510376e-05, "loss": 1.6072, "step": 6043 }, { "epoch": 5.015767634854772, "grad_norm": 14.119803428649902, "learning_rate": 1.7995684647302908e-05, "loss": 1.1461, "step": 6044 }, { "epoch": 5.016597510373444, "grad_norm": 23.323841094970703, "learning_rate": 1.7995352697095436e-05, "loss": 1.6447, "step": 6045 }, { "epoch": 5.017427385892116, "grad_norm": 17.09029769897461, "learning_rate": 1.799502074688797e-05, "loss": 1.1322, "step": 6046 }, { "epoch": 5.018257261410788, "grad_norm": 17.24197769165039, "learning_rate": 1.79946887966805e-05, "loss": 1.2285, "step": 6047 }, { "epoch": 5.0190871369294605, "grad_norm": 19.678014755249023, "learning_rate": 1.7994356846473033e-05, "loss": 2.1174, "step": 6048 }, { "epoch": 5.019917012448133, "grad_norm": 15.089794158935547, "learning_rate": 1.799402489626556e-05, "loss": 1.2894, "step": 6049 }, { "epoch": 5.020746887966805, "grad_norm": 20.239727020263672, "learning_rate": 1.7993692946058094e-05, "loss": 1.523, "step": 6050 }, { "epoch": 5.021576763485477, "grad_norm": 9.9819917678833, "learning_rate": 1.7993360995850622e-05, "loss": 0.8348, "step": 6051 }, { "epoch": 5.022406639004149, "grad_norm": 14.6899995803833, "learning_rate": 1.7993029045643154e-05, "loss": 0.9894, "step": 6052 }, { "epoch": 5.0232365145228215, "grad_norm": 11.029600143432617, "learning_rate": 1.7992697095435686e-05, "loss": 1.0049, "step": 6053 }, { "epoch": 5.024066390041494, "grad_norm": 14.40935230255127, "learning_rate": 1.7992365145228215e-05, "loss": 1.0422, "step": 6054 }, { "epoch": 5.024896265560166, "grad_norm": 11.474814414978027, "learning_rate": 1.7992033195020747e-05, "loss": 0.9433, "step": 6055 }, { "epoch": 5.025726141078838, "grad_norm": 13.861457824707031, "learning_rate": 1.799170124481328e-05, "loss": 0.8677, "step": 6056 }, { "epoch": 5.02655601659751, "grad_norm": 11.754694938659668, "learning_rate": 1.799136929460581e-05, "loss": 1.0335, "step": 6057 }, { "epoch": 5.0273858921161825, "grad_norm": 11.803680419921875, "learning_rate": 1.799103734439834e-05, "loss": 1.0172, "step": 6058 }, { "epoch": 5.028215767634855, "grad_norm": 13.281230926513672, "learning_rate": 1.7990705394190872e-05, "loss": 1.1378, "step": 6059 }, { "epoch": 5.029045643153527, "grad_norm": 10.302681922912598, "learning_rate": 1.7990373443983404e-05, "loss": 1.0426, "step": 6060 }, { "epoch": 5.029875518672199, "grad_norm": 11.803197860717773, "learning_rate": 1.7990041493775937e-05, "loss": 1.5944, "step": 6061 }, { "epoch": 5.030705394190871, "grad_norm": 14.693452835083008, "learning_rate": 1.7989709543568465e-05, "loss": 1.4809, "step": 6062 }, { "epoch": 5.0315352697095435, "grad_norm": 14.437308311462402, "learning_rate": 1.7989377593360997e-05, "loss": 1.5854, "step": 6063 }, { "epoch": 5.032365145228216, "grad_norm": 19.333478927612305, "learning_rate": 1.798904564315353e-05, "loss": 1.102, "step": 6064 }, { "epoch": 5.033195020746888, "grad_norm": 19.940351486206055, "learning_rate": 1.798871369294606e-05, "loss": 0.8509, "step": 6065 }, { "epoch": 5.03402489626556, "grad_norm": 21.127408981323242, "learning_rate": 1.798838174273859e-05, "loss": 1.9696, "step": 6066 }, { "epoch": 5.034854771784232, "grad_norm": 17.410179138183594, "learning_rate": 1.7988049792531122e-05, "loss": 1.9747, "step": 6067 }, { "epoch": 5.035684647302904, "grad_norm": 21.246421813964844, "learning_rate": 1.7987717842323655e-05, "loss": 0.6642, "step": 6068 }, { "epoch": 5.036514522821577, "grad_norm": 20.906509399414062, "learning_rate": 1.7987385892116183e-05, "loss": 1.2659, "step": 6069 }, { "epoch": 5.037344398340249, "grad_norm": 19.28052520751953, "learning_rate": 1.7987053941908715e-05, "loss": 1.2603, "step": 6070 }, { "epoch": 5.038174273858921, "grad_norm": 12.229305267333984, "learning_rate": 1.7986721991701247e-05, "loss": 1.546, "step": 6071 }, { "epoch": 5.039004149377593, "grad_norm": 14.69963264465332, "learning_rate": 1.7986390041493776e-05, "loss": 1.4406, "step": 6072 }, { "epoch": 5.039834024896265, "grad_norm": 10.495323181152344, "learning_rate": 1.798605809128631e-05, "loss": 0.4379, "step": 6073 }, { "epoch": 5.040663900414938, "grad_norm": 15.24171257019043, "learning_rate": 1.7985726141078837e-05, "loss": 0.8916, "step": 6074 }, { "epoch": 5.04149377593361, "grad_norm": 19.93663215637207, "learning_rate": 1.798539419087137e-05, "loss": 1.0391, "step": 6075 }, { "epoch": 5.042323651452282, "grad_norm": 10.47989273071289, "learning_rate": 1.79850622406639e-05, "loss": 1.293, "step": 6076 }, { "epoch": 5.043153526970954, "grad_norm": 14.94254207611084, "learning_rate": 1.7984730290456433e-05, "loss": 1.3745, "step": 6077 }, { "epoch": 5.043983402489626, "grad_norm": 18.810302734375, "learning_rate": 1.7984398340248962e-05, "loss": 1.8137, "step": 6078 }, { "epoch": 5.044813278008299, "grad_norm": 13.840970039367676, "learning_rate": 1.7984066390041494e-05, "loss": 1.0668, "step": 6079 }, { "epoch": 5.045643153526971, "grad_norm": 15.199676513671875, "learning_rate": 1.7983734439834026e-05, "loss": 1.0276, "step": 6080 }, { "epoch": 5.046473029045643, "grad_norm": 19.218429565429688, "learning_rate": 1.798340248962656e-05, "loss": 1.3094, "step": 6081 }, { "epoch": 5.047302904564315, "grad_norm": 12.441535949707031, "learning_rate": 1.798307053941909e-05, "loss": 0.9549, "step": 6082 }, { "epoch": 5.048132780082987, "grad_norm": 23.614273071289062, "learning_rate": 1.798273858921162e-05, "loss": 0.7966, "step": 6083 }, { "epoch": 5.04896265560166, "grad_norm": 12.342865943908691, "learning_rate": 1.798240663900415e-05, "loss": 1.1871, "step": 6084 }, { "epoch": 5.049792531120332, "grad_norm": 18.85127830505371, "learning_rate": 1.7982074688796683e-05, "loss": 1.7524, "step": 6085 }, { "epoch": 5.050622406639004, "grad_norm": 13.12662124633789, "learning_rate": 1.7981742738589216e-05, "loss": 1.4358, "step": 6086 }, { "epoch": 5.051452282157676, "grad_norm": 22.042024612426758, "learning_rate": 1.7981410788381744e-05, "loss": 1.5997, "step": 6087 }, { "epoch": 5.052282157676348, "grad_norm": 13.968345642089844, "learning_rate": 1.7981078838174276e-05, "loss": 1.6175, "step": 6088 }, { "epoch": 5.053112033195021, "grad_norm": 8.26659870147705, "learning_rate": 1.7980746887966805e-05, "loss": 0.5735, "step": 6089 }, { "epoch": 5.053941908713693, "grad_norm": 13.695231437683105, "learning_rate": 1.7980414937759337e-05, "loss": 1.5668, "step": 6090 }, { "epoch": 5.054771784232365, "grad_norm": 20.500062942504883, "learning_rate": 1.798008298755187e-05, "loss": 2.1326, "step": 6091 }, { "epoch": 5.055601659751037, "grad_norm": 18.827316284179688, "learning_rate": 1.7979751037344398e-05, "loss": 1.5874, "step": 6092 }, { "epoch": 5.056431535269709, "grad_norm": 14.146113395690918, "learning_rate": 1.797941908713693e-05, "loss": 1.1256, "step": 6093 }, { "epoch": 5.0572614107883815, "grad_norm": 12.621112823486328, "learning_rate": 1.7979087136929462e-05, "loss": 1.4541, "step": 6094 }, { "epoch": 5.058091286307054, "grad_norm": 19.20118522644043, "learning_rate": 1.797875518672199e-05, "loss": 2.0264, "step": 6095 }, { "epoch": 5.058921161825726, "grad_norm": 12.146151542663574, "learning_rate": 1.7978423236514523e-05, "loss": 1.513, "step": 6096 }, { "epoch": 5.059751037344398, "grad_norm": 20.498138427734375, "learning_rate": 1.7978091286307055e-05, "loss": 1.3839, "step": 6097 }, { "epoch": 5.06058091286307, "grad_norm": 16.696863174438477, "learning_rate": 1.7977759336099587e-05, "loss": 1.6069, "step": 6098 }, { "epoch": 5.0614107883817425, "grad_norm": 12.053900718688965, "learning_rate": 1.7977427385892116e-05, "loss": 0.8655, "step": 6099 }, { "epoch": 5.062240663900415, "grad_norm": 17.9855899810791, "learning_rate": 1.7977095435684648e-05, "loss": 1.0739, "step": 6100 }, { "epoch": 5.063070539419087, "grad_norm": 23.224519729614258, "learning_rate": 1.797676348547718e-05, "loss": 1.4485, "step": 6101 }, { "epoch": 5.063900414937759, "grad_norm": 21.79634666442871, "learning_rate": 1.7976431535269712e-05, "loss": 1.1008, "step": 6102 }, { "epoch": 5.064730290456431, "grad_norm": 17.54095458984375, "learning_rate": 1.797609958506224e-05, "loss": 1.7187, "step": 6103 }, { "epoch": 5.0655601659751035, "grad_norm": 15.92188835144043, "learning_rate": 1.7975767634854773e-05, "loss": 1.3722, "step": 6104 }, { "epoch": 5.066390041493776, "grad_norm": 15.00484848022461, "learning_rate": 1.7975435684647305e-05, "loss": 1.1675, "step": 6105 }, { "epoch": 5.067219917012448, "grad_norm": 10.71249008178711, "learning_rate": 1.7975103734439837e-05, "loss": 1.1803, "step": 6106 }, { "epoch": 5.06804979253112, "grad_norm": 13.897835731506348, "learning_rate": 1.7974771784232366e-05, "loss": 1.0497, "step": 6107 }, { "epoch": 5.068879668049792, "grad_norm": 16.666664123535156, "learning_rate": 1.7974439834024898e-05, "loss": 1.6102, "step": 6108 }, { "epoch": 5.0697095435684645, "grad_norm": 14.020670890808105, "learning_rate": 1.797410788381743e-05, "loss": 1.6947, "step": 6109 }, { "epoch": 5.070539419087137, "grad_norm": 14.192785263061523, "learning_rate": 1.797377593360996e-05, "loss": 1.1932, "step": 6110 }, { "epoch": 5.071369294605809, "grad_norm": 12.674696922302246, "learning_rate": 1.797344398340249e-05, "loss": 1.2019, "step": 6111 }, { "epoch": 5.072199170124481, "grad_norm": 18.465953826904297, "learning_rate": 1.797311203319502e-05, "loss": 2.4163, "step": 6112 }, { "epoch": 5.073029045643153, "grad_norm": 10.94448471069336, "learning_rate": 1.7972780082987552e-05, "loss": 1.0761, "step": 6113 }, { "epoch": 5.0738589211618255, "grad_norm": 14.202351570129395, "learning_rate": 1.7972448132780084e-05, "loss": 0.9365, "step": 6114 }, { "epoch": 5.074688796680498, "grad_norm": 18.501585006713867, "learning_rate": 1.7972116182572616e-05, "loss": 1.5538, "step": 6115 }, { "epoch": 5.07551867219917, "grad_norm": 15.860750198364258, "learning_rate": 1.7971784232365145e-05, "loss": 1.3073, "step": 6116 }, { "epoch": 5.076348547717842, "grad_norm": 15.38986587524414, "learning_rate": 1.7971452282157677e-05, "loss": 1.4185, "step": 6117 }, { "epoch": 5.077178423236514, "grad_norm": 18.56162452697754, "learning_rate": 1.797112033195021e-05, "loss": 2.0233, "step": 6118 }, { "epoch": 5.0780082987551864, "grad_norm": 14.440951347351074, "learning_rate": 1.797078838174274e-05, "loss": 1.3695, "step": 6119 }, { "epoch": 5.078838174273859, "grad_norm": 19.91214942932129, "learning_rate": 1.797045643153527e-05, "loss": 1.2819, "step": 6120 }, { "epoch": 5.079668049792531, "grad_norm": 15.95954704284668, "learning_rate": 1.7970124481327802e-05, "loss": 0.8303, "step": 6121 }, { "epoch": 5.080497925311203, "grad_norm": 19.765363693237305, "learning_rate": 1.7969792531120334e-05, "loss": 1.6871, "step": 6122 }, { "epoch": 5.081327800829875, "grad_norm": 12.96363353729248, "learning_rate": 1.7969460580912866e-05, "loss": 1.4947, "step": 6123 }, { "epoch": 5.082157676348547, "grad_norm": 13.909107208251953, "learning_rate": 1.7969128630705395e-05, "loss": 1.3327, "step": 6124 }, { "epoch": 5.08298755186722, "grad_norm": 15.580133438110352, "learning_rate": 1.7968796680497927e-05, "loss": 1.3943, "step": 6125 }, { "epoch": 5.083817427385892, "grad_norm": 17.555252075195312, "learning_rate": 1.796846473029046e-05, "loss": 0.6585, "step": 6126 }, { "epoch": 5.084647302904564, "grad_norm": 13.568745613098145, "learning_rate": 1.796813278008299e-05, "loss": 0.938, "step": 6127 }, { "epoch": 5.085477178423236, "grad_norm": 15.588433265686035, "learning_rate": 1.796780082987552e-05, "loss": 0.9794, "step": 6128 }, { "epoch": 5.086307053941908, "grad_norm": 12.677743911743164, "learning_rate": 1.7967468879668052e-05, "loss": 1.1099, "step": 6129 }, { "epoch": 5.087136929460581, "grad_norm": 14.49340534210205, "learning_rate": 1.796713692946058e-05, "loss": 1.2745, "step": 6130 }, { "epoch": 5.087966804979253, "grad_norm": 11.674714088439941, "learning_rate": 1.7966804979253113e-05, "loss": 0.8885, "step": 6131 }, { "epoch": 5.088796680497925, "grad_norm": 14.414288520812988, "learning_rate": 1.7966473029045645e-05, "loss": 1.4757, "step": 6132 }, { "epoch": 5.089626556016597, "grad_norm": 24.098901748657227, "learning_rate": 1.7966141078838174e-05, "loss": 1.9957, "step": 6133 }, { "epoch": 5.090456431535269, "grad_norm": 9.955446243286133, "learning_rate": 1.7965809128630706e-05, "loss": 0.8048, "step": 6134 }, { "epoch": 5.091286307053942, "grad_norm": 23.44527816772461, "learning_rate": 1.7965477178423238e-05, "loss": 1.1266, "step": 6135 }, { "epoch": 5.092116182572614, "grad_norm": 17.919361114501953, "learning_rate": 1.796514522821577e-05, "loss": 1.5062, "step": 6136 }, { "epoch": 5.092946058091286, "grad_norm": 13.35029125213623, "learning_rate": 1.79648132780083e-05, "loss": 1.0439, "step": 6137 }, { "epoch": 5.093775933609958, "grad_norm": 11.802058219909668, "learning_rate": 1.796448132780083e-05, "loss": 1.3093, "step": 6138 }, { "epoch": 5.09460580912863, "grad_norm": 24.7296199798584, "learning_rate": 1.7964149377593363e-05, "loss": 1.2478, "step": 6139 }, { "epoch": 5.095435684647303, "grad_norm": 15.442815780639648, "learning_rate": 1.7963817427385895e-05, "loss": 1.175, "step": 6140 }, { "epoch": 5.096265560165975, "grad_norm": 28.996826171875, "learning_rate": 1.7963485477178424e-05, "loss": 1.0836, "step": 6141 }, { "epoch": 5.097095435684647, "grad_norm": 12.507989883422852, "learning_rate": 1.7963153526970956e-05, "loss": 0.9893, "step": 6142 }, { "epoch": 5.097925311203319, "grad_norm": 22.190889358520508, "learning_rate": 1.7962821576763488e-05, "loss": 0.9902, "step": 6143 }, { "epoch": 5.098755186721991, "grad_norm": 12.62320613861084, "learning_rate": 1.796248962655602e-05, "loss": 1.2338, "step": 6144 }, { "epoch": 5.0995850622406635, "grad_norm": 19.287128448486328, "learning_rate": 1.796215767634855e-05, "loss": 1.1078, "step": 6145 }, { "epoch": 5.100414937759336, "grad_norm": 23.513771057128906, "learning_rate": 1.796182572614108e-05, "loss": 1.3927, "step": 6146 }, { "epoch": 5.101244813278008, "grad_norm": 15.650312423706055, "learning_rate": 1.7961493775933613e-05, "loss": 1.7976, "step": 6147 }, { "epoch": 5.10207468879668, "grad_norm": 15.147748947143555, "learning_rate": 1.7961161825726142e-05, "loss": 1.0151, "step": 6148 }, { "epoch": 5.102904564315352, "grad_norm": 15.367535591125488, "learning_rate": 1.7960829875518674e-05, "loss": 1.5411, "step": 6149 }, { "epoch": 5.1037344398340245, "grad_norm": 15.706616401672363, "learning_rate": 1.7960497925311206e-05, "loss": 1.2315, "step": 6150 }, { "epoch": 5.104564315352697, "grad_norm": 20.41648292541504, "learning_rate": 1.7960165975103735e-05, "loss": 0.935, "step": 6151 }, { "epoch": 5.105394190871369, "grad_norm": 14.659232139587402, "learning_rate": 1.7959834024896267e-05, "loss": 1.6132, "step": 6152 }, { "epoch": 5.106224066390041, "grad_norm": 50.60771942138672, "learning_rate": 1.7959502074688796e-05, "loss": 1.4288, "step": 6153 }, { "epoch": 5.107053941908713, "grad_norm": 17.093780517578125, "learning_rate": 1.7959170124481328e-05, "loss": 1.2551, "step": 6154 }, { "epoch": 5.1078838174273855, "grad_norm": 14.647329330444336, "learning_rate": 1.795883817427386e-05, "loss": 1.4139, "step": 6155 }, { "epoch": 5.108713692946058, "grad_norm": 23.25433921813965, "learning_rate": 1.7958506224066392e-05, "loss": 1.6616, "step": 6156 }, { "epoch": 5.10954356846473, "grad_norm": 10.899819374084473, "learning_rate": 1.795817427385892e-05, "loss": 1.2187, "step": 6157 }, { "epoch": 5.110373443983402, "grad_norm": 27.349624633789062, "learning_rate": 1.7957842323651453e-05, "loss": 2.13, "step": 6158 }, { "epoch": 5.111203319502074, "grad_norm": 23.32039451599121, "learning_rate": 1.7957510373443985e-05, "loss": 1.3036, "step": 6159 }, { "epoch": 5.1120331950207465, "grad_norm": 13.32985782623291, "learning_rate": 1.7957178423236517e-05, "loss": 0.7849, "step": 6160 }, { "epoch": 5.112863070539419, "grad_norm": 19.148395538330078, "learning_rate": 1.795684647302905e-05, "loss": 1.2909, "step": 6161 }, { "epoch": 5.113692946058091, "grad_norm": 15.925667762756348, "learning_rate": 1.7956514522821578e-05, "loss": 0.9089, "step": 6162 }, { "epoch": 5.114522821576763, "grad_norm": 17.455780029296875, "learning_rate": 1.795618257261411e-05, "loss": 0.8168, "step": 6163 }, { "epoch": 5.115352697095435, "grad_norm": 13.461809158325195, "learning_rate": 1.7955850622406642e-05, "loss": 1.2751, "step": 6164 }, { "epoch": 5.1161825726141075, "grad_norm": 12.597648620605469, "learning_rate": 1.7955518672199174e-05, "loss": 1.1049, "step": 6165 }, { "epoch": 5.11701244813278, "grad_norm": 33.78972244262695, "learning_rate": 1.7955186721991703e-05, "loss": 2.3575, "step": 6166 }, { "epoch": 5.117842323651452, "grad_norm": 11.056524276733398, "learning_rate": 1.7954854771784235e-05, "loss": 0.77, "step": 6167 }, { "epoch": 5.118672199170124, "grad_norm": 20.771196365356445, "learning_rate": 1.7954522821576764e-05, "loss": 1.4606, "step": 6168 }, { "epoch": 5.119502074688796, "grad_norm": 18.153108596801758, "learning_rate": 1.7954190871369296e-05, "loss": 1.1277, "step": 6169 }, { "epoch": 5.1203319502074685, "grad_norm": 12.371888160705566, "learning_rate": 1.7953858921161828e-05, "loss": 1.1048, "step": 6170 }, { "epoch": 5.121161825726141, "grad_norm": 24.91144561767578, "learning_rate": 1.7953526970954357e-05, "loss": 2.5841, "step": 6171 }, { "epoch": 5.121991701244813, "grad_norm": 13.747349739074707, "learning_rate": 1.795319502074689e-05, "loss": 0.8629, "step": 6172 }, { "epoch": 5.122821576763485, "grad_norm": 11.176813125610352, "learning_rate": 1.795286307053942e-05, "loss": 1.2913, "step": 6173 }, { "epoch": 5.123651452282157, "grad_norm": 16.142536163330078, "learning_rate": 1.795253112033195e-05, "loss": 1.6976, "step": 6174 }, { "epoch": 5.124481327800829, "grad_norm": 13.105274200439453, "learning_rate": 1.795219917012448e-05, "loss": 0.7758, "step": 6175 }, { "epoch": 5.125311203319502, "grad_norm": 16.39385223388672, "learning_rate": 1.7951867219917014e-05, "loss": 1.7257, "step": 6176 }, { "epoch": 5.126141078838174, "grad_norm": 14.403581619262695, "learning_rate": 1.7951535269709546e-05, "loss": 2.072, "step": 6177 }, { "epoch": 5.126970954356846, "grad_norm": 16.555803298950195, "learning_rate": 1.7951203319502075e-05, "loss": 1.7353, "step": 6178 }, { "epoch": 5.127800829875518, "grad_norm": 18.67291831970215, "learning_rate": 1.7950871369294607e-05, "loss": 1.3763, "step": 6179 }, { "epoch": 5.12863070539419, "grad_norm": 17.686491012573242, "learning_rate": 1.795053941908714e-05, "loss": 1.7163, "step": 6180 }, { "epoch": 5.1294605809128635, "grad_norm": 19.812475204467773, "learning_rate": 1.795020746887967e-05, "loss": 1.8429, "step": 6181 }, { "epoch": 5.130290456431536, "grad_norm": 20.598480224609375, "learning_rate": 1.79498755186722e-05, "loss": 0.9374, "step": 6182 }, { "epoch": 5.131120331950208, "grad_norm": 15.101882934570312, "learning_rate": 1.7949543568464732e-05, "loss": 1.3078, "step": 6183 }, { "epoch": 5.13195020746888, "grad_norm": 14.340452194213867, "learning_rate": 1.7949211618257264e-05, "loss": 1.4694, "step": 6184 }, { "epoch": 5.132780082987552, "grad_norm": 10.282286643981934, "learning_rate": 1.7948879668049796e-05, "loss": 0.7295, "step": 6185 }, { "epoch": 5.1336099585062245, "grad_norm": 17.81169891357422, "learning_rate": 1.7948547717842325e-05, "loss": 1.6823, "step": 6186 }, { "epoch": 5.134439834024897, "grad_norm": 15.13589096069336, "learning_rate": 1.7948215767634857e-05, "loss": 1.4682, "step": 6187 }, { "epoch": 5.135269709543569, "grad_norm": 12.60448169708252, "learning_rate": 1.794788381742739e-05, "loss": 1.3397, "step": 6188 }, { "epoch": 5.136099585062241, "grad_norm": 21.76105499267578, "learning_rate": 1.7947551867219918e-05, "loss": 1.5537, "step": 6189 }, { "epoch": 5.136929460580913, "grad_norm": 16.562259674072266, "learning_rate": 1.794721991701245e-05, "loss": 1.3487, "step": 6190 }, { "epoch": 5.1377593360995855, "grad_norm": 16.837812423706055, "learning_rate": 1.794688796680498e-05, "loss": 1.1779, "step": 6191 }, { "epoch": 5.138589211618258, "grad_norm": 12.435187339782715, "learning_rate": 1.794655601659751e-05, "loss": 1.1199, "step": 6192 }, { "epoch": 5.13941908713693, "grad_norm": 11.416285514831543, "learning_rate": 1.7946224066390043e-05, "loss": 1.3922, "step": 6193 }, { "epoch": 5.140248962655602, "grad_norm": 16.3325138092041, "learning_rate": 1.7945892116182575e-05, "loss": 1.15, "step": 6194 }, { "epoch": 5.141078838174274, "grad_norm": 19.216203689575195, "learning_rate": 1.7945560165975103e-05, "loss": 1.4087, "step": 6195 }, { "epoch": 5.141908713692946, "grad_norm": 11.949708938598633, "learning_rate": 1.7945228215767636e-05, "loss": 1.1141, "step": 6196 }, { "epoch": 5.142738589211619, "grad_norm": 16.274974822998047, "learning_rate": 1.7944896265560168e-05, "loss": 1.2879, "step": 6197 }, { "epoch": 5.143568464730291, "grad_norm": 13.399030685424805, "learning_rate": 1.79445643153527e-05, "loss": 0.9056, "step": 6198 }, { "epoch": 5.144398340248963, "grad_norm": 16.40725326538086, "learning_rate": 1.794423236514523e-05, "loss": 1.349, "step": 6199 }, { "epoch": 5.145228215767635, "grad_norm": 12.248807907104492, "learning_rate": 1.794390041493776e-05, "loss": 1.4063, "step": 6200 }, { "epoch": 5.146058091286307, "grad_norm": 12.045252799987793, "learning_rate": 1.7943568464730293e-05, "loss": 1.2199, "step": 6201 }, { "epoch": 5.14688796680498, "grad_norm": 14.397645950317383, "learning_rate": 1.7943236514522825e-05, "loss": 1.5472, "step": 6202 }, { "epoch": 5.147717842323652, "grad_norm": 25.121883392333984, "learning_rate": 1.7942904564315354e-05, "loss": 2.2043, "step": 6203 }, { "epoch": 5.148547717842324, "grad_norm": 12.330513954162598, "learning_rate": 1.7942572614107886e-05, "loss": 0.7669, "step": 6204 }, { "epoch": 5.149377593360996, "grad_norm": 12.724810600280762, "learning_rate": 1.7942240663900418e-05, "loss": 1.1192, "step": 6205 }, { "epoch": 5.150207468879668, "grad_norm": 25.122852325439453, "learning_rate": 1.7941908713692946e-05, "loss": 0.9379, "step": 6206 }, { "epoch": 5.151037344398341, "grad_norm": 22.18619728088379, "learning_rate": 1.794157676348548e-05, "loss": 1.2305, "step": 6207 }, { "epoch": 5.151867219917013, "grad_norm": 11.327432632446289, "learning_rate": 1.794124481327801e-05, "loss": 0.8933, "step": 6208 }, { "epoch": 5.152697095435685, "grad_norm": 26.148956298828125, "learning_rate": 1.794091286307054e-05, "loss": 1.3696, "step": 6209 }, { "epoch": 5.153526970954357, "grad_norm": 15.43882942199707, "learning_rate": 1.794058091286307e-05, "loss": 1.0165, "step": 6210 }, { "epoch": 5.154356846473029, "grad_norm": 28.391117095947266, "learning_rate": 1.7940248962655604e-05, "loss": 1.6557, "step": 6211 }, { "epoch": 5.155186721991702, "grad_norm": 11.816515922546387, "learning_rate": 1.7939917012448132e-05, "loss": 1.3199, "step": 6212 }, { "epoch": 5.156016597510374, "grad_norm": 17.5804386138916, "learning_rate": 1.7939585062240664e-05, "loss": 1.1239, "step": 6213 }, { "epoch": 5.156846473029046, "grad_norm": 26.10383415222168, "learning_rate": 1.7939253112033197e-05, "loss": 0.9373, "step": 6214 }, { "epoch": 5.157676348547718, "grad_norm": 11.311984062194824, "learning_rate": 1.793892116182573e-05, "loss": 0.7683, "step": 6215 }, { "epoch": 5.15850622406639, "grad_norm": 10.36693000793457, "learning_rate": 1.7938589211618257e-05, "loss": 0.435, "step": 6216 }, { "epoch": 5.159336099585063, "grad_norm": 19.479063034057617, "learning_rate": 1.793825726141079e-05, "loss": 1.6162, "step": 6217 }, { "epoch": 5.160165975103735, "grad_norm": 20.59271240234375, "learning_rate": 1.793792531120332e-05, "loss": 1.1365, "step": 6218 }, { "epoch": 5.160995850622407, "grad_norm": 15.32652473449707, "learning_rate": 1.7937593360995854e-05, "loss": 1.2647, "step": 6219 }, { "epoch": 5.161825726141079, "grad_norm": 19.96725082397461, "learning_rate": 1.7937261410788382e-05, "loss": 1.4772, "step": 6220 }, { "epoch": 5.162655601659751, "grad_norm": 13.589740753173828, "learning_rate": 1.7936929460580915e-05, "loss": 1.0874, "step": 6221 }, { "epoch": 5.1634854771784235, "grad_norm": 22.48311424255371, "learning_rate": 1.7936597510373447e-05, "loss": 1.5973, "step": 6222 }, { "epoch": 5.164315352697096, "grad_norm": 11.235198974609375, "learning_rate": 1.793626556016598e-05, "loss": 1.0633, "step": 6223 }, { "epoch": 5.165145228215768, "grad_norm": 17.062923431396484, "learning_rate": 1.7935933609958507e-05, "loss": 1.5557, "step": 6224 }, { "epoch": 5.16597510373444, "grad_norm": 13.362686157226562, "learning_rate": 1.793560165975104e-05, "loss": 0.9607, "step": 6225 }, { "epoch": 5.166804979253112, "grad_norm": 28.486648559570312, "learning_rate": 1.7935269709543572e-05, "loss": 1.832, "step": 6226 }, { "epoch": 5.1676348547717845, "grad_norm": 28.80851936340332, "learning_rate": 1.79349377593361e-05, "loss": 1.2704, "step": 6227 }, { "epoch": 5.168464730290457, "grad_norm": 14.692747116088867, "learning_rate": 1.7934605809128633e-05, "loss": 0.997, "step": 6228 }, { "epoch": 5.169294605809129, "grad_norm": 17.906248092651367, "learning_rate": 1.793427385892116e-05, "loss": 1.4454, "step": 6229 }, { "epoch": 5.170124481327801, "grad_norm": 20.020610809326172, "learning_rate": 1.7933941908713693e-05, "loss": 1.4173, "step": 6230 }, { "epoch": 5.170954356846473, "grad_norm": 25.289588928222656, "learning_rate": 1.7933609958506225e-05, "loss": 1.9137, "step": 6231 }, { "epoch": 5.1717842323651455, "grad_norm": 19.171003341674805, "learning_rate": 1.7933278008298754e-05, "loss": 1.0294, "step": 6232 }, { "epoch": 5.172614107883818, "grad_norm": 11.183530807495117, "learning_rate": 1.7932946058091286e-05, "loss": 1.0336, "step": 6233 }, { "epoch": 5.17344398340249, "grad_norm": 20.53968620300293, "learning_rate": 1.793261410788382e-05, "loss": 1.5492, "step": 6234 }, { "epoch": 5.174273858921162, "grad_norm": 14.881692886352539, "learning_rate": 1.793228215767635e-05, "loss": 0.9828, "step": 6235 }, { "epoch": 5.175103734439834, "grad_norm": 22.67431640625, "learning_rate": 1.793195020746888e-05, "loss": 1.0156, "step": 6236 }, { "epoch": 5.1759336099585065, "grad_norm": 11.883986473083496, "learning_rate": 1.793161825726141e-05, "loss": 1.0723, "step": 6237 }, { "epoch": 5.176763485477179, "grad_norm": 18.70692253112793, "learning_rate": 1.7931286307053943e-05, "loss": 1.1548, "step": 6238 }, { "epoch": 5.177593360995851, "grad_norm": 16.0076904296875, "learning_rate": 1.7930954356846476e-05, "loss": 1.017, "step": 6239 }, { "epoch": 5.178423236514523, "grad_norm": 11.995837211608887, "learning_rate": 1.7930622406639008e-05, "loss": 0.8487, "step": 6240 }, { "epoch": 5.179253112033195, "grad_norm": 16.42177391052246, "learning_rate": 1.7930290456431536e-05, "loss": 0.6896, "step": 6241 }, { "epoch": 5.1800829875518675, "grad_norm": 26.28922462463379, "learning_rate": 1.792995850622407e-05, "loss": 1.0332, "step": 6242 }, { "epoch": 5.18091286307054, "grad_norm": 23.619245529174805, "learning_rate": 1.79296265560166e-05, "loss": 1.3782, "step": 6243 }, { "epoch": 5.181742738589212, "grad_norm": 21.44460678100586, "learning_rate": 1.7929294605809133e-05, "loss": 1.8413, "step": 6244 }, { "epoch": 5.182572614107884, "grad_norm": 17.401012420654297, "learning_rate": 1.792896265560166e-05, "loss": 1.3972, "step": 6245 }, { "epoch": 5.183402489626556, "grad_norm": 19.987714767456055, "learning_rate": 1.7928630705394194e-05, "loss": 1.5535, "step": 6246 }, { "epoch": 5.1842323651452284, "grad_norm": 35.216796875, "learning_rate": 1.7928298755186722e-05, "loss": 1.9564, "step": 6247 }, { "epoch": 5.185062240663901, "grad_norm": 26.726097106933594, "learning_rate": 1.7927966804979254e-05, "loss": 1.2398, "step": 6248 }, { "epoch": 5.185892116182573, "grad_norm": 17.703889846801758, "learning_rate": 1.7927634854771786e-05, "loss": 1.3769, "step": 6249 }, { "epoch": 5.186721991701245, "grad_norm": 17.127906799316406, "learning_rate": 1.7927302904564315e-05, "loss": 0.7246, "step": 6250 }, { "epoch": 5.187551867219917, "grad_norm": 19.78378677368164, "learning_rate": 1.7926970954356847e-05, "loss": 1.2204, "step": 6251 }, { "epoch": 5.188381742738589, "grad_norm": 21.646831512451172, "learning_rate": 1.792663900414938e-05, "loss": 0.936, "step": 6252 }, { "epoch": 5.189211618257262, "grad_norm": 14.410604476928711, "learning_rate": 1.7926307053941908e-05, "loss": 1.1895, "step": 6253 }, { "epoch": 5.190041493775934, "grad_norm": 18.45744514465332, "learning_rate": 1.792597510373444e-05, "loss": 1.7579, "step": 6254 }, { "epoch": 5.190871369294606, "grad_norm": 17.14018440246582, "learning_rate": 1.7925643153526972e-05, "loss": 1.2108, "step": 6255 }, { "epoch": 5.191701244813278, "grad_norm": 20.616445541381836, "learning_rate": 1.7925311203319504e-05, "loss": 1.6626, "step": 6256 }, { "epoch": 5.19253112033195, "grad_norm": 20.817312240600586, "learning_rate": 1.7924979253112033e-05, "loss": 1.109, "step": 6257 }, { "epoch": 5.193360995850623, "grad_norm": 20.796777725219727, "learning_rate": 1.7924647302904565e-05, "loss": 1.6218, "step": 6258 }, { "epoch": 5.194190871369295, "grad_norm": 19.736833572387695, "learning_rate": 1.7924315352697097e-05, "loss": 1.7003, "step": 6259 }, { "epoch": 5.195020746887967, "grad_norm": 16.011619567871094, "learning_rate": 1.792398340248963e-05, "loss": 1.7147, "step": 6260 }, { "epoch": 5.195850622406639, "grad_norm": 21.188255310058594, "learning_rate": 1.7923651452282158e-05, "loss": 1.2384, "step": 6261 }, { "epoch": 5.196680497925311, "grad_norm": 15.873960494995117, "learning_rate": 1.792331950207469e-05, "loss": 1.2982, "step": 6262 }, { "epoch": 5.197510373443984, "grad_norm": 19.665348052978516, "learning_rate": 1.7922987551867222e-05, "loss": 2.1043, "step": 6263 }, { "epoch": 5.198340248962656, "grad_norm": 14.763419151306152, "learning_rate": 1.7922655601659755e-05, "loss": 1.2492, "step": 6264 }, { "epoch": 5.199170124481328, "grad_norm": 11.405424118041992, "learning_rate": 1.7922323651452283e-05, "loss": 0.5143, "step": 6265 }, { "epoch": 5.2, "grad_norm": 16.176374435424805, "learning_rate": 1.7921991701244815e-05, "loss": 1.2806, "step": 6266 }, { "epoch": 5.200829875518672, "grad_norm": 23.612642288208008, "learning_rate": 1.7921659751037347e-05, "loss": 1.3159, "step": 6267 }, { "epoch": 5.201659751037345, "grad_norm": 16.35237693786621, "learning_rate": 1.7921327800829876e-05, "loss": 1.2663, "step": 6268 }, { "epoch": 5.202489626556017, "grad_norm": 18.879962921142578, "learning_rate": 1.7920995850622408e-05, "loss": 1.7436, "step": 6269 }, { "epoch": 5.203319502074689, "grad_norm": 19.536184310913086, "learning_rate": 1.7920663900414937e-05, "loss": 1.9483, "step": 6270 }, { "epoch": 5.204149377593361, "grad_norm": 15.881807327270508, "learning_rate": 1.792033195020747e-05, "loss": 0.9879, "step": 6271 }, { "epoch": 5.204979253112033, "grad_norm": 20.213947296142578, "learning_rate": 1.792e-05, "loss": 1.446, "step": 6272 }, { "epoch": 5.2058091286307056, "grad_norm": 14.687895774841309, "learning_rate": 1.7919668049792533e-05, "loss": 1.5581, "step": 6273 }, { "epoch": 5.206639004149378, "grad_norm": 20.856801986694336, "learning_rate": 1.7919336099585062e-05, "loss": 1.06, "step": 6274 }, { "epoch": 5.20746887966805, "grad_norm": 14.051430702209473, "learning_rate": 1.7919004149377594e-05, "loss": 1.4183, "step": 6275 }, { "epoch": 5.208298755186722, "grad_norm": 16.072208404541016, "learning_rate": 1.7918672199170126e-05, "loss": 1.3178, "step": 6276 }, { "epoch": 5.209128630705394, "grad_norm": 24.6879940032959, "learning_rate": 1.791834024896266e-05, "loss": 2.0913, "step": 6277 }, { "epoch": 5.2099585062240665, "grad_norm": 17.630247116088867, "learning_rate": 1.7918008298755187e-05, "loss": 1.6475, "step": 6278 }, { "epoch": 5.210788381742739, "grad_norm": 14.621565818786621, "learning_rate": 1.791767634854772e-05, "loss": 1.0952, "step": 6279 }, { "epoch": 5.211618257261411, "grad_norm": 16.188230514526367, "learning_rate": 1.791734439834025e-05, "loss": 1.1534, "step": 6280 }, { "epoch": 5.212448132780083, "grad_norm": 13.097055435180664, "learning_rate": 1.7917012448132783e-05, "loss": 1.073, "step": 6281 }, { "epoch": 5.213278008298755, "grad_norm": 14.330608367919922, "learning_rate": 1.7916680497925312e-05, "loss": 1.3786, "step": 6282 }, { "epoch": 5.2141078838174275, "grad_norm": 18.462684631347656, "learning_rate": 1.7916348547717844e-05, "loss": 1.5736, "step": 6283 }, { "epoch": 5.2149377593361, "grad_norm": 17.84316062927246, "learning_rate": 1.7916016597510376e-05, "loss": 1.0239, "step": 6284 }, { "epoch": 5.215767634854772, "grad_norm": 17.217910766601562, "learning_rate": 1.7915684647302905e-05, "loss": 0.9727, "step": 6285 }, { "epoch": 5.216597510373444, "grad_norm": 14.686017990112305, "learning_rate": 1.7915352697095437e-05, "loss": 1.4156, "step": 6286 }, { "epoch": 5.217427385892116, "grad_norm": 11.99582576751709, "learning_rate": 1.791502074688797e-05, "loss": 0.8684, "step": 6287 }, { "epoch": 5.2182572614107885, "grad_norm": 11.417520523071289, "learning_rate": 1.7914688796680498e-05, "loss": 1.3002, "step": 6288 }, { "epoch": 5.219087136929461, "grad_norm": 11.579789161682129, "learning_rate": 1.791435684647303e-05, "loss": 0.9478, "step": 6289 }, { "epoch": 5.219917012448133, "grad_norm": 20.437166213989258, "learning_rate": 1.7914024896265562e-05, "loss": 1.7296, "step": 6290 }, { "epoch": 5.220746887966805, "grad_norm": 14.31911849975586, "learning_rate": 1.791369294605809e-05, "loss": 1.1623, "step": 6291 }, { "epoch": 5.221576763485477, "grad_norm": 12.910730361938477, "learning_rate": 1.7913360995850623e-05, "loss": 1.1092, "step": 6292 }, { "epoch": 5.2224066390041495, "grad_norm": 20.47974395751953, "learning_rate": 1.7913029045643155e-05, "loss": 1.9929, "step": 6293 }, { "epoch": 5.223236514522822, "grad_norm": 17.27054214477539, "learning_rate": 1.7912697095435687e-05, "loss": 1.3573, "step": 6294 }, { "epoch": 5.224066390041494, "grad_norm": 18.386075973510742, "learning_rate": 1.7912365145228216e-05, "loss": 1.0061, "step": 6295 }, { "epoch": 5.224896265560166, "grad_norm": 14.825286865234375, "learning_rate": 1.7912033195020748e-05, "loss": 0.9019, "step": 6296 }, { "epoch": 5.225726141078838, "grad_norm": 18.926063537597656, "learning_rate": 1.791170124481328e-05, "loss": 1.0109, "step": 6297 }, { "epoch": 5.2265560165975105, "grad_norm": 20.841941833496094, "learning_rate": 1.7911369294605812e-05, "loss": 1.5848, "step": 6298 }, { "epoch": 5.227385892116183, "grad_norm": 15.31876277923584, "learning_rate": 1.791103734439834e-05, "loss": 1.428, "step": 6299 }, { "epoch": 5.228215767634855, "grad_norm": 33.80757522583008, "learning_rate": 1.7910705394190873e-05, "loss": 1.525, "step": 6300 }, { "epoch": 5.229045643153527, "grad_norm": 19.6084041595459, "learning_rate": 1.7910373443983405e-05, "loss": 1.746, "step": 6301 }, { "epoch": 5.229875518672199, "grad_norm": 33.06914138793945, "learning_rate": 1.7910041493775937e-05, "loss": 1.4639, "step": 6302 }, { "epoch": 5.230705394190871, "grad_norm": 17.575916290283203, "learning_rate": 1.7909709543568466e-05, "loss": 0.9841, "step": 6303 }, { "epoch": 5.231535269709544, "grad_norm": 11.402789115905762, "learning_rate": 1.7909377593360998e-05, "loss": 1.1287, "step": 6304 }, { "epoch": 5.232365145228216, "grad_norm": 19.169004440307617, "learning_rate": 1.790904564315353e-05, "loss": 1.5916, "step": 6305 }, { "epoch": 5.233195020746888, "grad_norm": 12.205853462219238, "learning_rate": 1.790871369294606e-05, "loss": 1.0983, "step": 6306 }, { "epoch": 5.23402489626556, "grad_norm": 39.95079040527344, "learning_rate": 1.790838174273859e-05, "loss": 1.9338, "step": 6307 }, { "epoch": 5.234854771784232, "grad_norm": 24.779077529907227, "learning_rate": 1.790804979253112e-05, "loss": 1.3414, "step": 6308 }, { "epoch": 5.235684647302905, "grad_norm": 15.02523136138916, "learning_rate": 1.7907717842323652e-05, "loss": 1.4365, "step": 6309 }, { "epoch": 5.236514522821577, "grad_norm": 14.557611465454102, "learning_rate": 1.7907385892116184e-05, "loss": 1.1055, "step": 6310 }, { "epoch": 5.237344398340249, "grad_norm": 16.602373123168945, "learning_rate": 1.7907053941908713e-05, "loss": 1.5401, "step": 6311 }, { "epoch": 5.238174273858921, "grad_norm": 26.58405876159668, "learning_rate": 1.7906721991701245e-05, "loss": 1.0745, "step": 6312 }, { "epoch": 5.239004149377593, "grad_norm": 17.758827209472656, "learning_rate": 1.7906390041493777e-05, "loss": 1.5126, "step": 6313 }, { "epoch": 5.239834024896266, "grad_norm": 15.077035903930664, "learning_rate": 1.790605809128631e-05, "loss": 1.0257, "step": 6314 }, { "epoch": 5.240663900414938, "grad_norm": 10.433574676513672, "learning_rate": 1.7905726141078838e-05, "loss": 1.2352, "step": 6315 }, { "epoch": 5.24149377593361, "grad_norm": 17.345056533813477, "learning_rate": 1.790539419087137e-05, "loss": 1.1027, "step": 6316 }, { "epoch": 5.242323651452282, "grad_norm": 26.17383575439453, "learning_rate": 1.7905062240663902e-05, "loss": 1.123, "step": 6317 }, { "epoch": 5.243153526970954, "grad_norm": 16.47456169128418, "learning_rate": 1.7904730290456434e-05, "loss": 1.1874, "step": 6318 }, { "epoch": 5.243983402489627, "grad_norm": 12.638768196105957, "learning_rate": 1.7904398340248966e-05, "loss": 1.5131, "step": 6319 }, { "epoch": 5.244813278008299, "grad_norm": 17.591341018676758, "learning_rate": 1.7904066390041495e-05, "loss": 1.1757, "step": 6320 }, { "epoch": 5.245643153526971, "grad_norm": 10.375631332397461, "learning_rate": 1.7903734439834027e-05, "loss": 1.0941, "step": 6321 }, { "epoch": 5.246473029045643, "grad_norm": 10.909255981445312, "learning_rate": 1.790340248962656e-05, "loss": 0.9455, "step": 6322 }, { "epoch": 5.247302904564315, "grad_norm": 20.21939468383789, "learning_rate": 1.7903070539419088e-05, "loss": 1.8692, "step": 6323 }, { "epoch": 5.248132780082988, "grad_norm": 13.846928596496582, "learning_rate": 1.790273858921162e-05, "loss": 0.916, "step": 6324 }, { "epoch": 5.24896265560166, "grad_norm": 17.900373458862305, "learning_rate": 1.7902406639004152e-05, "loss": 1.5123, "step": 6325 }, { "epoch": 5.249792531120332, "grad_norm": 25.180084228515625, "learning_rate": 1.790207468879668e-05, "loss": 2.0146, "step": 6326 }, { "epoch": 5.250622406639004, "grad_norm": 16.522247314453125, "learning_rate": 1.7901742738589213e-05, "loss": 1.9548, "step": 6327 }, { "epoch": 5.251452282157676, "grad_norm": 13.489309310913086, "learning_rate": 1.7901410788381745e-05, "loss": 1.2044, "step": 6328 }, { "epoch": 5.2522821576763485, "grad_norm": 24.178224563598633, "learning_rate": 1.7901078838174274e-05, "loss": 1.4092, "step": 6329 }, { "epoch": 5.253112033195021, "grad_norm": 15.118644714355469, "learning_rate": 1.7900746887966806e-05, "loss": 1.1945, "step": 6330 }, { "epoch": 5.253941908713693, "grad_norm": 18.258718490600586, "learning_rate": 1.7900414937759338e-05, "loss": 1.5208, "step": 6331 }, { "epoch": 5.254771784232365, "grad_norm": 16.385421752929688, "learning_rate": 1.7900082987551867e-05, "loss": 0.9319, "step": 6332 }, { "epoch": 5.255601659751037, "grad_norm": 17.748226165771484, "learning_rate": 1.78997510373444e-05, "loss": 1.1004, "step": 6333 }, { "epoch": 5.2564315352697095, "grad_norm": 15.599635124206543, "learning_rate": 1.789941908713693e-05, "loss": 0.9729, "step": 6334 }, { "epoch": 5.257261410788382, "grad_norm": 17.69371795654297, "learning_rate": 1.7899087136929463e-05, "loss": 1.1845, "step": 6335 }, { "epoch": 5.258091286307054, "grad_norm": 12.674038887023926, "learning_rate": 1.7898755186721992e-05, "loss": 1.3169, "step": 6336 }, { "epoch": 5.258921161825726, "grad_norm": 11.624946594238281, "learning_rate": 1.7898423236514524e-05, "loss": 1.3568, "step": 6337 }, { "epoch": 5.259751037344398, "grad_norm": 23.547704696655273, "learning_rate": 1.7898091286307056e-05, "loss": 0.952, "step": 6338 }, { "epoch": 5.2605809128630705, "grad_norm": 15.797788619995117, "learning_rate": 1.7897759336099588e-05, "loss": 1.1272, "step": 6339 }, { "epoch": 5.261410788381743, "grad_norm": 16.860647201538086, "learning_rate": 1.7897427385892117e-05, "loss": 1.094, "step": 6340 }, { "epoch": 5.262240663900415, "grad_norm": 11.816434860229492, "learning_rate": 1.789709543568465e-05, "loss": 1.2552, "step": 6341 }, { "epoch": 5.263070539419087, "grad_norm": 21.586448669433594, "learning_rate": 1.789676348547718e-05, "loss": 1.5805, "step": 6342 }, { "epoch": 5.263900414937759, "grad_norm": 15.55482006072998, "learning_rate": 1.7896431535269713e-05, "loss": 1.063, "step": 6343 }, { "epoch": 5.2647302904564315, "grad_norm": 21.55295181274414, "learning_rate": 1.7896099585062242e-05, "loss": 0.9771, "step": 6344 }, { "epoch": 5.265560165975104, "grad_norm": 22.855472564697266, "learning_rate": 1.7895767634854774e-05, "loss": 1.1451, "step": 6345 }, { "epoch": 5.266390041493776, "grad_norm": 11.327404022216797, "learning_rate": 1.7895435684647303e-05, "loss": 0.7667, "step": 6346 }, { "epoch": 5.267219917012448, "grad_norm": 20.060029983520508, "learning_rate": 1.7895103734439835e-05, "loss": 1.0941, "step": 6347 }, { "epoch": 5.26804979253112, "grad_norm": 17.622413635253906, "learning_rate": 1.7894771784232367e-05, "loss": 1.0422, "step": 6348 }, { "epoch": 5.2688796680497925, "grad_norm": 20.863689422607422, "learning_rate": 1.7894439834024896e-05, "loss": 1.3918, "step": 6349 }, { "epoch": 5.269709543568465, "grad_norm": 10.77430534362793, "learning_rate": 1.7894107883817428e-05, "loss": 0.9025, "step": 6350 }, { "epoch": 5.270539419087137, "grad_norm": 18.08458137512207, "learning_rate": 1.789377593360996e-05, "loss": 1.4809, "step": 6351 }, { "epoch": 5.271369294605809, "grad_norm": 19.834936141967773, "learning_rate": 1.7893443983402492e-05, "loss": 1.0088, "step": 6352 }, { "epoch": 5.272199170124481, "grad_norm": 12.30484390258789, "learning_rate": 1.789311203319502e-05, "loss": 0.9537, "step": 6353 }, { "epoch": 5.2730290456431534, "grad_norm": 17.055025100708008, "learning_rate": 1.7892780082987553e-05, "loss": 1.1795, "step": 6354 }, { "epoch": 5.273858921161826, "grad_norm": 20.68287467956543, "learning_rate": 1.7892448132780085e-05, "loss": 1.0353, "step": 6355 }, { "epoch": 5.274688796680498, "grad_norm": 24.4305477142334, "learning_rate": 1.7892116182572617e-05, "loss": 2.0848, "step": 6356 }, { "epoch": 5.27551867219917, "grad_norm": 16.090063095092773, "learning_rate": 1.7891784232365146e-05, "loss": 1.3305, "step": 6357 }, { "epoch": 5.276348547717842, "grad_norm": 16.073867797851562, "learning_rate": 1.7891452282157678e-05, "loss": 1.6087, "step": 6358 }, { "epoch": 5.277178423236514, "grad_norm": 17.12565040588379, "learning_rate": 1.789112033195021e-05, "loss": 0.9336, "step": 6359 }, { "epoch": 5.278008298755187, "grad_norm": 28.446643829345703, "learning_rate": 1.7890788381742742e-05, "loss": 1.6027, "step": 6360 }, { "epoch": 5.278838174273859, "grad_norm": 22.407773971557617, "learning_rate": 1.789045643153527e-05, "loss": 1.172, "step": 6361 }, { "epoch": 5.279668049792531, "grad_norm": 17.541404724121094, "learning_rate": 1.7890124481327803e-05, "loss": 1.9924, "step": 6362 }, { "epoch": 5.280497925311203, "grad_norm": 12.168909072875977, "learning_rate": 1.7889792531120335e-05, "loss": 0.8931, "step": 6363 }, { "epoch": 5.281327800829875, "grad_norm": 17.306285858154297, "learning_rate": 1.7889460580912864e-05, "loss": 1.5288, "step": 6364 }, { "epoch": 5.282157676348548, "grad_norm": 16.611101150512695, "learning_rate": 1.7889128630705396e-05, "loss": 1.2775, "step": 6365 }, { "epoch": 5.28298755186722, "grad_norm": 28.29440689086914, "learning_rate": 1.7888796680497928e-05, "loss": 1.9993, "step": 6366 }, { "epoch": 5.283817427385892, "grad_norm": 38.363460540771484, "learning_rate": 1.7888464730290457e-05, "loss": 1.783, "step": 6367 }, { "epoch": 5.284647302904564, "grad_norm": 15.621506690979004, "learning_rate": 1.788813278008299e-05, "loss": 1.0562, "step": 6368 }, { "epoch": 5.285477178423236, "grad_norm": 10.225151062011719, "learning_rate": 1.788780082987552e-05, "loss": 0.7846, "step": 6369 }, { "epoch": 5.286307053941909, "grad_norm": 18.099571228027344, "learning_rate": 1.788746887966805e-05, "loss": 1.4643, "step": 6370 }, { "epoch": 5.287136929460581, "grad_norm": 22.035337448120117, "learning_rate": 1.788713692946058e-05, "loss": 1.3787, "step": 6371 }, { "epoch": 5.287966804979253, "grad_norm": 18.263111114501953, "learning_rate": 1.7886804979253114e-05, "loss": 1.4221, "step": 6372 }, { "epoch": 5.288796680497925, "grad_norm": 27.027467727661133, "learning_rate": 1.7886473029045646e-05, "loss": 1.2808, "step": 6373 }, { "epoch": 5.289626556016597, "grad_norm": 24.735288619995117, "learning_rate": 1.7886141078838175e-05, "loss": 0.6075, "step": 6374 }, { "epoch": 5.29045643153527, "grad_norm": 20.501935958862305, "learning_rate": 1.7885809128630707e-05, "loss": 1.7162, "step": 6375 }, { "epoch": 5.291286307053942, "grad_norm": 21.18604278564453, "learning_rate": 1.788547717842324e-05, "loss": 1.3671, "step": 6376 }, { "epoch": 5.292116182572614, "grad_norm": 12.474077224731445, "learning_rate": 1.788514522821577e-05, "loss": 0.9098, "step": 6377 }, { "epoch": 5.292946058091286, "grad_norm": 20.90989112854004, "learning_rate": 1.78848132780083e-05, "loss": 1.4156, "step": 6378 }, { "epoch": 5.293775933609958, "grad_norm": 18.014175415039062, "learning_rate": 1.7884481327800832e-05, "loss": 1.749, "step": 6379 }, { "epoch": 5.2946058091286305, "grad_norm": 10.353365898132324, "learning_rate": 1.7884149377593364e-05, "loss": 0.794, "step": 6380 }, { "epoch": 5.295435684647303, "grad_norm": 12.926217079162598, "learning_rate": 1.7883817427385896e-05, "loss": 1.1467, "step": 6381 }, { "epoch": 5.296265560165975, "grad_norm": 14.876890182495117, "learning_rate": 1.7883485477178425e-05, "loss": 1.1026, "step": 6382 }, { "epoch": 5.297095435684647, "grad_norm": 15.319450378417969, "learning_rate": 1.7883153526970957e-05, "loss": 1.0771, "step": 6383 }, { "epoch": 5.297925311203319, "grad_norm": 14.00252628326416, "learning_rate": 1.788282157676349e-05, "loss": 0.946, "step": 6384 }, { "epoch": 5.2987551867219915, "grad_norm": 20.04630470275879, "learning_rate": 1.7882489626556018e-05, "loss": 1.1239, "step": 6385 }, { "epoch": 5.299585062240664, "grad_norm": 23.944114685058594, "learning_rate": 1.788215767634855e-05, "loss": 0.9007, "step": 6386 }, { "epoch": 5.300414937759336, "grad_norm": 19.680025100708008, "learning_rate": 1.788182572614108e-05, "loss": 1.7378, "step": 6387 }, { "epoch": 5.301244813278008, "grad_norm": 21.57838249206543, "learning_rate": 1.788149377593361e-05, "loss": 2.1708, "step": 6388 }, { "epoch": 5.30207468879668, "grad_norm": 22.30620574951172, "learning_rate": 1.7881161825726143e-05, "loss": 1.3921, "step": 6389 }, { "epoch": 5.3029045643153525, "grad_norm": 12.874926567077637, "learning_rate": 1.788082987551867e-05, "loss": 1.2463, "step": 6390 }, { "epoch": 5.303734439834025, "grad_norm": 15.744827270507812, "learning_rate": 1.7880497925311203e-05, "loss": 1.1384, "step": 6391 }, { "epoch": 5.304564315352697, "grad_norm": 22.638341903686523, "learning_rate": 1.7880165975103736e-05, "loss": 1.2932, "step": 6392 }, { "epoch": 5.305394190871369, "grad_norm": 16.55810546875, "learning_rate": 1.7879834024896268e-05, "loss": 1.2381, "step": 6393 }, { "epoch": 5.306224066390041, "grad_norm": 31.990779876708984, "learning_rate": 1.7879502074688796e-05, "loss": 2.6937, "step": 6394 }, { "epoch": 5.3070539419087135, "grad_norm": 13.083547592163086, "learning_rate": 1.787917012448133e-05, "loss": 1.3238, "step": 6395 }, { "epoch": 5.307883817427386, "grad_norm": 14.11887264251709, "learning_rate": 1.787883817427386e-05, "loss": 0.5154, "step": 6396 }, { "epoch": 5.308713692946058, "grad_norm": 19.81928825378418, "learning_rate": 1.7878506224066393e-05, "loss": 1.3677, "step": 6397 }, { "epoch": 5.30954356846473, "grad_norm": 13.297928810119629, "learning_rate": 1.7878174273858925e-05, "loss": 1.1717, "step": 6398 }, { "epoch": 5.310373443983402, "grad_norm": 10.933954238891602, "learning_rate": 1.7877842323651454e-05, "loss": 1.1524, "step": 6399 }, { "epoch": 5.3112033195020745, "grad_norm": 16.6053409576416, "learning_rate": 1.7877510373443986e-05, "loss": 1.2438, "step": 6400 }, { "epoch": 5.312033195020747, "grad_norm": 11.958657264709473, "learning_rate": 1.7877178423236518e-05, "loss": 0.8136, "step": 6401 }, { "epoch": 5.312863070539419, "grad_norm": 43.97780990600586, "learning_rate": 1.7876846473029046e-05, "loss": 1.2598, "step": 6402 }, { "epoch": 5.313692946058091, "grad_norm": 14.275585174560547, "learning_rate": 1.787651452282158e-05, "loss": 1.0972, "step": 6403 }, { "epoch": 5.314522821576763, "grad_norm": 17.384628295898438, "learning_rate": 1.787618257261411e-05, "loss": 1.1191, "step": 6404 }, { "epoch": 5.3153526970954355, "grad_norm": 16.608707427978516, "learning_rate": 1.787585062240664e-05, "loss": 1.0367, "step": 6405 }, { "epoch": 5.316182572614108, "grad_norm": 21.022754669189453, "learning_rate": 1.787551867219917e-05, "loss": 2.3236, "step": 6406 }, { "epoch": 5.31701244813278, "grad_norm": 12.982013702392578, "learning_rate": 1.78751867219917e-05, "loss": 1.2236, "step": 6407 }, { "epoch": 5.317842323651452, "grad_norm": 34.29866027832031, "learning_rate": 1.7874854771784232e-05, "loss": 1.5258, "step": 6408 }, { "epoch": 5.318672199170124, "grad_norm": 14.411561965942383, "learning_rate": 1.7874522821576764e-05, "loss": 1.0012, "step": 6409 }, { "epoch": 5.319502074688796, "grad_norm": 13.054287910461426, "learning_rate": 1.7874190871369297e-05, "loss": 0.8404, "step": 6410 }, { "epoch": 5.320331950207469, "grad_norm": 14.253586769104004, "learning_rate": 1.7873858921161825e-05, "loss": 1.2579, "step": 6411 }, { "epoch": 5.321161825726141, "grad_norm": 18.237979888916016, "learning_rate": 1.7873526970954357e-05, "loss": 1.4145, "step": 6412 }, { "epoch": 5.321991701244813, "grad_norm": 15.642704963684082, "learning_rate": 1.787319502074689e-05, "loss": 1.1454, "step": 6413 }, { "epoch": 5.322821576763485, "grad_norm": 13.682307243347168, "learning_rate": 1.787286307053942e-05, "loss": 1.3097, "step": 6414 }, { "epoch": 5.323651452282157, "grad_norm": 14.865862846374512, "learning_rate": 1.787253112033195e-05, "loss": 1.178, "step": 6415 }, { "epoch": 5.32448132780083, "grad_norm": 17.57501792907715, "learning_rate": 1.7872199170124482e-05, "loss": 1.4126, "step": 6416 }, { "epoch": 5.325311203319502, "grad_norm": 14.837647438049316, "learning_rate": 1.7871867219917015e-05, "loss": 1.3698, "step": 6417 }, { "epoch": 5.326141078838174, "grad_norm": 18.533843994140625, "learning_rate": 1.7871535269709547e-05, "loss": 1.7778, "step": 6418 }, { "epoch": 5.326970954356846, "grad_norm": 26.670116424560547, "learning_rate": 1.7871203319502075e-05, "loss": 1.3942, "step": 6419 }, { "epoch": 5.327800829875518, "grad_norm": 16.710927963256836, "learning_rate": 1.7870871369294607e-05, "loss": 0.9836, "step": 6420 }, { "epoch": 5.328630705394191, "grad_norm": 14.339190483093262, "learning_rate": 1.787053941908714e-05, "loss": 1.2787, "step": 6421 }, { "epoch": 5.329460580912863, "grad_norm": 21.37794303894043, "learning_rate": 1.787020746887967e-05, "loss": 1.6954, "step": 6422 }, { "epoch": 5.330290456431535, "grad_norm": 20.919708251953125, "learning_rate": 1.78698755186722e-05, "loss": 1.3998, "step": 6423 }, { "epoch": 5.331120331950207, "grad_norm": 14.721564292907715, "learning_rate": 1.7869543568464733e-05, "loss": 1.2001, "step": 6424 }, { "epoch": 5.331950207468879, "grad_norm": 13.97684097290039, "learning_rate": 1.786921161825726e-05, "loss": 1.2736, "step": 6425 }, { "epoch": 5.332780082987552, "grad_norm": 18.884002685546875, "learning_rate": 1.7868879668049793e-05, "loss": 1.5985, "step": 6426 }, { "epoch": 5.333609958506224, "grad_norm": 12.201797485351562, "learning_rate": 1.7868547717842325e-05, "loss": 0.6847, "step": 6427 }, { "epoch": 5.334439834024896, "grad_norm": 10.606098175048828, "learning_rate": 1.7868215767634854e-05, "loss": 1.1907, "step": 6428 }, { "epoch": 5.335269709543568, "grad_norm": 21.35187339782715, "learning_rate": 1.7867883817427386e-05, "loss": 1.7058, "step": 6429 }, { "epoch": 5.33609958506224, "grad_norm": 19.547822952270508, "learning_rate": 1.786755186721992e-05, "loss": 1.3318, "step": 6430 }, { "epoch": 5.3369294605809126, "grad_norm": 17.591577529907227, "learning_rate": 1.786721991701245e-05, "loss": 1.0983, "step": 6431 }, { "epoch": 5.337759336099585, "grad_norm": 19.286296844482422, "learning_rate": 1.786688796680498e-05, "loss": 1.8125, "step": 6432 }, { "epoch": 5.338589211618257, "grad_norm": 16.058616638183594, "learning_rate": 1.786655601659751e-05, "loss": 1.4242, "step": 6433 }, { "epoch": 5.339419087136929, "grad_norm": 17.98542022705078, "learning_rate": 1.7866224066390043e-05, "loss": 1.7119, "step": 6434 }, { "epoch": 5.340248962655601, "grad_norm": 19.265844345092773, "learning_rate": 1.7865892116182576e-05, "loss": 1.1825, "step": 6435 }, { "epoch": 5.3410788381742735, "grad_norm": 18.951000213623047, "learning_rate": 1.7865560165975104e-05, "loss": 1.5386, "step": 6436 }, { "epoch": 5.341908713692946, "grad_norm": 15.326430320739746, "learning_rate": 1.7865228215767636e-05, "loss": 1.3772, "step": 6437 }, { "epoch": 5.342738589211618, "grad_norm": 12.559900283813477, "learning_rate": 1.786489626556017e-05, "loss": 0.8976, "step": 6438 }, { "epoch": 5.34356846473029, "grad_norm": 22.307838439941406, "learning_rate": 1.78645643153527e-05, "loss": 1.5696, "step": 6439 }, { "epoch": 5.344398340248962, "grad_norm": 21.304603576660156, "learning_rate": 1.786423236514523e-05, "loss": 1.665, "step": 6440 }, { "epoch": 5.3452282157676345, "grad_norm": 15.091582298278809, "learning_rate": 1.786390041493776e-05, "loss": 1.1436, "step": 6441 }, { "epoch": 5.346058091286307, "grad_norm": 15.161413192749023, "learning_rate": 1.7863568464730293e-05, "loss": 1.2947, "step": 6442 }, { "epoch": 5.346887966804979, "grad_norm": 13.674189567565918, "learning_rate": 1.7863236514522822e-05, "loss": 1.0823, "step": 6443 }, { "epoch": 5.347717842323651, "grad_norm": 15.809638977050781, "learning_rate": 1.7862904564315354e-05, "loss": 0.7732, "step": 6444 }, { "epoch": 5.348547717842323, "grad_norm": 26.77860450744629, "learning_rate": 1.7862572614107886e-05, "loss": 2.3936, "step": 6445 }, { "epoch": 5.3493775933609955, "grad_norm": 11.269048690795898, "learning_rate": 1.7862240663900415e-05, "loss": 0.9278, "step": 6446 }, { "epoch": 5.350207468879668, "grad_norm": 18.661237716674805, "learning_rate": 1.7861908713692947e-05, "loss": 1.3185, "step": 6447 }, { "epoch": 5.35103734439834, "grad_norm": 19.30773162841797, "learning_rate": 1.7861576763485476e-05, "loss": 2.4182, "step": 6448 }, { "epoch": 5.351867219917012, "grad_norm": 17.39908218383789, "learning_rate": 1.7861244813278008e-05, "loss": 1.0253, "step": 6449 }, { "epoch": 5.352697095435684, "grad_norm": 13.76186466217041, "learning_rate": 1.786091286307054e-05, "loss": 0.9886, "step": 6450 }, { "epoch": 5.3535269709543565, "grad_norm": 13.10201358795166, "learning_rate": 1.7860580912863072e-05, "loss": 1.4243, "step": 6451 }, { "epoch": 5.354356846473029, "grad_norm": 12.466069221496582, "learning_rate": 1.7860248962655604e-05, "loss": 1.2215, "step": 6452 }, { "epoch": 5.355186721991701, "grad_norm": 12.94978141784668, "learning_rate": 1.7859917012448133e-05, "loss": 0.8241, "step": 6453 }, { "epoch": 5.356016597510373, "grad_norm": 25.271282196044922, "learning_rate": 1.7859585062240665e-05, "loss": 1.2567, "step": 6454 }, { "epoch": 5.356846473029045, "grad_norm": 11.078678131103516, "learning_rate": 1.7859253112033197e-05, "loss": 0.8219, "step": 6455 }, { "epoch": 5.3576763485477175, "grad_norm": 17.950510025024414, "learning_rate": 1.785892116182573e-05, "loss": 1.361, "step": 6456 }, { "epoch": 5.35850622406639, "grad_norm": 29.751699447631836, "learning_rate": 1.7858589211618258e-05, "loss": 1.1588, "step": 6457 }, { "epoch": 5.359336099585062, "grad_norm": 13.895001411437988, "learning_rate": 1.785825726141079e-05, "loss": 1.5787, "step": 6458 }, { "epoch": 5.360165975103734, "grad_norm": 11.885936737060547, "learning_rate": 1.7857925311203322e-05, "loss": 1.2532, "step": 6459 }, { "epoch": 5.360995850622406, "grad_norm": 13.458808898925781, "learning_rate": 1.7857593360995854e-05, "loss": 1.1563, "step": 6460 }, { "epoch": 5.361825726141078, "grad_norm": 28.864816665649414, "learning_rate": 1.7857261410788383e-05, "loss": 0.9836, "step": 6461 }, { "epoch": 5.362655601659751, "grad_norm": 13.514266014099121, "learning_rate": 1.7856929460580915e-05, "loss": 1.476, "step": 6462 }, { "epoch": 5.363485477178423, "grad_norm": 18.17291259765625, "learning_rate": 1.7856597510373444e-05, "loss": 1.9053, "step": 6463 }, { "epoch": 5.364315352697095, "grad_norm": 20.462905883789062, "learning_rate": 1.7856265560165976e-05, "loss": 1.0568, "step": 6464 }, { "epoch": 5.365145228215767, "grad_norm": 12.779914855957031, "learning_rate": 1.7855933609958508e-05, "loss": 1.3893, "step": 6465 }, { "epoch": 5.365975103734439, "grad_norm": 25.212644577026367, "learning_rate": 1.7855601659751037e-05, "loss": 1.3002, "step": 6466 }, { "epoch": 5.366804979253112, "grad_norm": 19.845491409301758, "learning_rate": 1.785526970954357e-05, "loss": 0.9886, "step": 6467 }, { "epoch": 5.367634854771785, "grad_norm": 21.40374183654785, "learning_rate": 1.78549377593361e-05, "loss": 0.6787, "step": 6468 }, { "epoch": 5.368464730290457, "grad_norm": 26.90437126159668, "learning_rate": 1.785460580912863e-05, "loss": 1.8211, "step": 6469 }, { "epoch": 5.369294605809129, "grad_norm": 20.52937126159668, "learning_rate": 1.7854273858921162e-05, "loss": 1.0289, "step": 6470 }, { "epoch": 5.370124481327801, "grad_norm": 26.17784881591797, "learning_rate": 1.7853941908713694e-05, "loss": 1.835, "step": 6471 }, { "epoch": 5.3709543568464735, "grad_norm": 18.499502182006836, "learning_rate": 1.7853609958506226e-05, "loss": 1.461, "step": 6472 }, { "epoch": 5.371784232365146, "grad_norm": 20.906036376953125, "learning_rate": 1.7853278008298755e-05, "loss": 1.6288, "step": 6473 }, { "epoch": 5.372614107883818, "grad_norm": 12.731832504272461, "learning_rate": 1.7852946058091287e-05, "loss": 0.7964, "step": 6474 }, { "epoch": 5.37344398340249, "grad_norm": 15.774269104003906, "learning_rate": 1.785261410788382e-05, "loss": 1.7164, "step": 6475 }, { "epoch": 5.374273858921162, "grad_norm": 17.13233184814453, "learning_rate": 1.785228215767635e-05, "loss": 1.1798, "step": 6476 }, { "epoch": 5.3751037344398345, "grad_norm": 12.374424934387207, "learning_rate": 1.7851950207468883e-05, "loss": 0.9914, "step": 6477 }, { "epoch": 5.375933609958507, "grad_norm": 19.692424774169922, "learning_rate": 1.7851618257261412e-05, "loss": 1.7026, "step": 6478 }, { "epoch": 5.376763485477179, "grad_norm": 18.252113342285156, "learning_rate": 1.7851286307053944e-05, "loss": 0.6659, "step": 6479 }, { "epoch": 5.377593360995851, "grad_norm": 29.272607803344727, "learning_rate": 1.7850954356846476e-05, "loss": 1.5662, "step": 6480 }, { "epoch": 5.378423236514523, "grad_norm": 21.665969848632812, "learning_rate": 1.7850622406639005e-05, "loss": 1.6534, "step": 6481 }, { "epoch": 5.3792531120331954, "grad_norm": 11.540314674377441, "learning_rate": 1.7850290456431537e-05, "loss": 1.3339, "step": 6482 }, { "epoch": 5.380082987551868, "grad_norm": 16.82716178894043, "learning_rate": 1.784995850622407e-05, "loss": 1.8886, "step": 6483 }, { "epoch": 5.38091286307054, "grad_norm": 13.104644775390625, "learning_rate": 1.7849626556016598e-05, "loss": 1.0229, "step": 6484 }, { "epoch": 5.381742738589212, "grad_norm": 18.02520751953125, "learning_rate": 1.784929460580913e-05, "loss": 1.5076, "step": 6485 }, { "epoch": 5.382572614107884, "grad_norm": 10.608126640319824, "learning_rate": 1.784896265560166e-05, "loss": 0.9971, "step": 6486 }, { "epoch": 5.383402489626556, "grad_norm": 26.64962387084961, "learning_rate": 1.784863070539419e-05, "loss": 1.1772, "step": 6487 }, { "epoch": 5.384232365145229, "grad_norm": 10.566544532775879, "learning_rate": 1.7848298755186723e-05, "loss": 0.7831, "step": 6488 }, { "epoch": 5.385062240663901, "grad_norm": 14.999970436096191, "learning_rate": 1.7847966804979255e-05, "loss": 1.2644, "step": 6489 }, { "epoch": 5.385892116182573, "grad_norm": 19.424604415893555, "learning_rate": 1.7847634854771784e-05, "loss": 1.9642, "step": 6490 }, { "epoch": 5.386721991701245, "grad_norm": 23.6481876373291, "learning_rate": 1.7847302904564316e-05, "loss": 0.9361, "step": 6491 }, { "epoch": 5.387551867219917, "grad_norm": 17.89512062072754, "learning_rate": 1.7846970954356848e-05, "loss": 1.6389, "step": 6492 }, { "epoch": 5.38838174273859, "grad_norm": 15.66418743133545, "learning_rate": 1.784663900414938e-05, "loss": 1.4522, "step": 6493 }, { "epoch": 5.389211618257262, "grad_norm": 17.930898666381836, "learning_rate": 1.784630705394191e-05, "loss": 1.5507, "step": 6494 }, { "epoch": 5.390041493775934, "grad_norm": 14.653246879577637, "learning_rate": 1.784597510373444e-05, "loss": 1.7242, "step": 6495 }, { "epoch": 5.390871369294606, "grad_norm": 12.064061164855957, "learning_rate": 1.7845643153526973e-05, "loss": 1.1222, "step": 6496 }, { "epoch": 5.391701244813278, "grad_norm": 18.738805770874023, "learning_rate": 1.7845311203319505e-05, "loss": 1.5225, "step": 6497 }, { "epoch": 5.392531120331951, "grad_norm": 16.400876998901367, "learning_rate": 1.7844979253112034e-05, "loss": 1.6802, "step": 6498 }, { "epoch": 5.393360995850623, "grad_norm": 19.443227767944336, "learning_rate": 1.7844647302904566e-05, "loss": 1.6251, "step": 6499 }, { "epoch": 5.394190871369295, "grad_norm": 18.97258186340332, "learning_rate": 1.7844315352697098e-05, "loss": 1.3086, "step": 6500 }, { "epoch": 5.395020746887967, "grad_norm": 19.773263931274414, "learning_rate": 1.784398340248963e-05, "loss": 1.1191, "step": 6501 }, { "epoch": 5.395850622406639, "grad_norm": 20.569711685180664, "learning_rate": 1.784365145228216e-05, "loss": 1.1415, "step": 6502 }, { "epoch": 5.396680497925312, "grad_norm": 14.790761947631836, "learning_rate": 1.784331950207469e-05, "loss": 1.11, "step": 6503 }, { "epoch": 5.397510373443984, "grad_norm": 16.080421447753906, "learning_rate": 1.784298755186722e-05, "loss": 0.8397, "step": 6504 }, { "epoch": 5.398340248962656, "grad_norm": 11.463035583496094, "learning_rate": 1.7842655601659752e-05, "loss": 1.059, "step": 6505 }, { "epoch": 5.399170124481328, "grad_norm": 16.917438507080078, "learning_rate": 1.7842323651452284e-05, "loss": 1.1194, "step": 6506 }, { "epoch": 5.4, "grad_norm": 17.296905517578125, "learning_rate": 1.7841991701244813e-05, "loss": 1.1632, "step": 6507 }, { "epoch": 5.4008298755186726, "grad_norm": 19.17340660095215, "learning_rate": 1.7841659751037345e-05, "loss": 1.7006, "step": 6508 }, { "epoch": 5.401659751037345, "grad_norm": 13.919876098632812, "learning_rate": 1.7841327800829877e-05, "loss": 1.2741, "step": 6509 }, { "epoch": 5.402489626556017, "grad_norm": 10.861266136169434, "learning_rate": 1.784099585062241e-05, "loss": 1.0255, "step": 6510 }, { "epoch": 5.403319502074689, "grad_norm": 18.504947662353516, "learning_rate": 1.7840663900414938e-05, "loss": 1.7023, "step": 6511 }, { "epoch": 5.404149377593361, "grad_norm": 13.871827125549316, "learning_rate": 1.784033195020747e-05, "loss": 1.1204, "step": 6512 }, { "epoch": 5.4049792531120335, "grad_norm": 8.816694259643555, "learning_rate": 1.7840000000000002e-05, "loss": 1.3244, "step": 6513 }, { "epoch": 5.405809128630706, "grad_norm": 28.155601501464844, "learning_rate": 1.7839668049792534e-05, "loss": 2.6089, "step": 6514 }, { "epoch": 5.406639004149378, "grad_norm": 20.311704635620117, "learning_rate": 1.7839336099585063e-05, "loss": 1.4296, "step": 6515 }, { "epoch": 5.40746887966805, "grad_norm": 13.739974021911621, "learning_rate": 1.7839004149377595e-05, "loss": 0.9123, "step": 6516 }, { "epoch": 5.408298755186722, "grad_norm": 12.92935848236084, "learning_rate": 1.7838672199170127e-05, "loss": 1.3798, "step": 6517 }, { "epoch": 5.4091286307053945, "grad_norm": 14.470022201538086, "learning_rate": 1.783834024896266e-05, "loss": 1.1234, "step": 6518 }, { "epoch": 5.409958506224067, "grad_norm": 16.504249572753906, "learning_rate": 1.7838008298755188e-05, "loss": 1.5713, "step": 6519 }, { "epoch": 5.410788381742739, "grad_norm": 17.661102294921875, "learning_rate": 1.783767634854772e-05, "loss": 2.0774, "step": 6520 }, { "epoch": 5.411618257261411, "grad_norm": 24.110328674316406, "learning_rate": 1.7837344398340252e-05, "loss": 0.9276, "step": 6521 }, { "epoch": 5.412448132780083, "grad_norm": 18.252079010009766, "learning_rate": 1.783701244813278e-05, "loss": 1.7776, "step": 6522 }, { "epoch": 5.4132780082987555, "grad_norm": 15.182229042053223, "learning_rate": 1.7836680497925313e-05, "loss": 1.4134, "step": 6523 }, { "epoch": 5.414107883817428, "grad_norm": 14.470369338989258, "learning_rate": 1.783634854771784e-05, "loss": 1.261, "step": 6524 }, { "epoch": 5.4149377593361, "grad_norm": 11.011802673339844, "learning_rate": 1.7836016597510374e-05, "loss": 0.8604, "step": 6525 }, { "epoch": 5.415767634854772, "grad_norm": 25.94744110107422, "learning_rate": 1.7835684647302906e-05, "loss": 1.7696, "step": 6526 }, { "epoch": 5.416597510373444, "grad_norm": 25.458189010620117, "learning_rate": 1.7835352697095435e-05, "loss": 1.5488, "step": 6527 }, { "epoch": 5.4174273858921165, "grad_norm": 15.50605583190918, "learning_rate": 1.7835020746887967e-05, "loss": 1.5912, "step": 6528 }, { "epoch": 5.418257261410789, "grad_norm": 25.637889862060547, "learning_rate": 1.78346887966805e-05, "loss": 1.5772, "step": 6529 }, { "epoch": 5.419087136929461, "grad_norm": 14.829458236694336, "learning_rate": 1.783435684647303e-05, "loss": 0.937, "step": 6530 }, { "epoch": 5.419917012448133, "grad_norm": 29.686370849609375, "learning_rate": 1.7834024896265563e-05, "loss": 1.6019, "step": 6531 }, { "epoch": 5.420746887966805, "grad_norm": 12.80899429321289, "learning_rate": 1.7833692946058092e-05, "loss": 0.6683, "step": 6532 }, { "epoch": 5.4215767634854775, "grad_norm": 19.560274124145508, "learning_rate": 1.7833360995850624e-05, "loss": 1.566, "step": 6533 }, { "epoch": 5.42240663900415, "grad_norm": 11.164704322814941, "learning_rate": 1.7833029045643156e-05, "loss": 0.727, "step": 6534 }, { "epoch": 5.423236514522822, "grad_norm": 13.771370887756348, "learning_rate": 1.7832697095435688e-05, "loss": 1.347, "step": 6535 }, { "epoch": 5.424066390041494, "grad_norm": 21.81031036376953, "learning_rate": 1.7832365145228217e-05, "loss": 2.0716, "step": 6536 }, { "epoch": 5.424896265560166, "grad_norm": 18.549367904663086, "learning_rate": 1.783203319502075e-05, "loss": 1.3704, "step": 6537 }, { "epoch": 5.425726141078838, "grad_norm": 37.602447509765625, "learning_rate": 1.783170124481328e-05, "loss": 1.8346, "step": 6538 }, { "epoch": 5.426556016597511, "grad_norm": 16.219799041748047, "learning_rate": 1.7831369294605813e-05, "loss": 1.5631, "step": 6539 }, { "epoch": 5.427385892116183, "grad_norm": 34.7702522277832, "learning_rate": 1.7831037344398342e-05, "loss": 1.3389, "step": 6540 }, { "epoch": 5.428215767634855, "grad_norm": 21.092533111572266, "learning_rate": 1.7830705394190874e-05, "loss": 1.8719, "step": 6541 }, { "epoch": 5.429045643153527, "grad_norm": 13.99722671508789, "learning_rate": 1.7830373443983403e-05, "loss": 1.0077, "step": 6542 }, { "epoch": 5.429875518672199, "grad_norm": 14.711653709411621, "learning_rate": 1.7830041493775935e-05, "loss": 1.47, "step": 6543 }, { "epoch": 5.430705394190872, "grad_norm": 21.36400604248047, "learning_rate": 1.7829709543568467e-05, "loss": 1.6927, "step": 6544 }, { "epoch": 5.431535269709544, "grad_norm": 22.38652801513672, "learning_rate": 1.7829377593360996e-05, "loss": 1.5896, "step": 6545 }, { "epoch": 5.432365145228216, "grad_norm": 15.95455551147461, "learning_rate": 1.7829045643153528e-05, "loss": 0.9962, "step": 6546 }, { "epoch": 5.433195020746888, "grad_norm": 16.839683532714844, "learning_rate": 1.782871369294606e-05, "loss": 1.0977, "step": 6547 }, { "epoch": 5.43402489626556, "grad_norm": 19.736175537109375, "learning_rate": 1.782838174273859e-05, "loss": 1.4112, "step": 6548 }, { "epoch": 5.434854771784233, "grad_norm": 17.80730438232422, "learning_rate": 1.782804979253112e-05, "loss": 1.421, "step": 6549 }, { "epoch": 5.435684647302905, "grad_norm": 22.615238189697266, "learning_rate": 1.7827717842323653e-05, "loss": 1.8348, "step": 6550 }, { "epoch": 5.436514522821577, "grad_norm": 12.860834121704102, "learning_rate": 1.7827385892116185e-05, "loss": 1.6133, "step": 6551 }, { "epoch": 5.437344398340249, "grad_norm": 13.404767990112305, "learning_rate": 1.7827053941908714e-05, "loss": 1.1064, "step": 6552 }, { "epoch": 5.438174273858921, "grad_norm": 14.789213180541992, "learning_rate": 1.7826721991701246e-05, "loss": 1.1222, "step": 6553 }, { "epoch": 5.439004149377594, "grad_norm": 15.400655746459961, "learning_rate": 1.7826390041493778e-05, "loss": 1.6874, "step": 6554 }, { "epoch": 5.439834024896266, "grad_norm": 23.924835205078125, "learning_rate": 1.782605809128631e-05, "loss": 1.6566, "step": 6555 }, { "epoch": 5.440663900414938, "grad_norm": 13.712369918823242, "learning_rate": 1.7825726141078842e-05, "loss": 1.4516, "step": 6556 }, { "epoch": 5.44149377593361, "grad_norm": 12.206315994262695, "learning_rate": 1.782539419087137e-05, "loss": 1.04, "step": 6557 }, { "epoch": 5.442323651452282, "grad_norm": 16.754098892211914, "learning_rate": 1.7825062240663903e-05, "loss": 1.7062, "step": 6558 }, { "epoch": 5.443153526970955, "grad_norm": 12.484487533569336, "learning_rate": 1.7824730290456435e-05, "loss": 0.854, "step": 6559 }, { "epoch": 5.443983402489627, "grad_norm": 14.658995628356934, "learning_rate": 1.7824398340248964e-05, "loss": 0.9647, "step": 6560 }, { "epoch": 5.444813278008299, "grad_norm": 15.751155853271484, "learning_rate": 1.7824066390041496e-05, "loss": 0.8218, "step": 6561 }, { "epoch": 5.445643153526971, "grad_norm": 15.478260040283203, "learning_rate": 1.7823734439834028e-05, "loss": 1.5308, "step": 6562 }, { "epoch": 5.446473029045643, "grad_norm": 18.03141212463379, "learning_rate": 1.7823402489626557e-05, "loss": 1.4035, "step": 6563 }, { "epoch": 5.4473029045643155, "grad_norm": 15.752004623413086, "learning_rate": 1.782307053941909e-05, "loss": 1.0226, "step": 6564 }, { "epoch": 5.448132780082988, "grad_norm": 12.387696266174316, "learning_rate": 1.7822738589211617e-05, "loss": 1.3995, "step": 6565 }, { "epoch": 5.44896265560166, "grad_norm": 14.441903114318848, "learning_rate": 1.782240663900415e-05, "loss": 2.072, "step": 6566 }, { "epoch": 5.449792531120332, "grad_norm": 17.413833618164062, "learning_rate": 1.782207468879668e-05, "loss": 1.3248, "step": 6567 }, { "epoch": 5.450622406639004, "grad_norm": 20.35627555847168, "learning_rate": 1.7821742738589214e-05, "loss": 1.5851, "step": 6568 }, { "epoch": 5.4514522821576765, "grad_norm": 9.855541229248047, "learning_rate": 1.7821410788381742e-05, "loss": 1.4146, "step": 6569 }, { "epoch": 5.452282157676349, "grad_norm": 12.514718055725098, "learning_rate": 1.7821078838174275e-05, "loss": 1.1481, "step": 6570 }, { "epoch": 5.453112033195021, "grad_norm": 11.087336540222168, "learning_rate": 1.7820746887966807e-05, "loss": 0.9235, "step": 6571 }, { "epoch": 5.453941908713693, "grad_norm": 18.80316925048828, "learning_rate": 1.782041493775934e-05, "loss": 1.5618, "step": 6572 }, { "epoch": 5.454771784232365, "grad_norm": 14.763628005981445, "learning_rate": 1.7820082987551867e-05, "loss": 1.8764, "step": 6573 }, { "epoch": 5.4556016597510375, "grad_norm": 21.91397476196289, "learning_rate": 1.78197510373444e-05, "loss": 1.7708, "step": 6574 }, { "epoch": 5.45643153526971, "grad_norm": 10.784051895141602, "learning_rate": 1.781941908713693e-05, "loss": 0.9026, "step": 6575 }, { "epoch": 5.457261410788382, "grad_norm": 18.847797393798828, "learning_rate": 1.7819087136929464e-05, "loss": 1.1591, "step": 6576 }, { "epoch": 5.458091286307054, "grad_norm": 32.18601989746094, "learning_rate": 1.7818755186721993e-05, "loss": 1.7166, "step": 6577 }, { "epoch": 5.458921161825726, "grad_norm": 10.049494743347168, "learning_rate": 1.7818423236514525e-05, "loss": 0.8844, "step": 6578 }, { "epoch": 5.4597510373443985, "grad_norm": 12.57911205291748, "learning_rate": 1.7818091286307057e-05, "loss": 1.0324, "step": 6579 }, { "epoch": 5.460580912863071, "grad_norm": 14.890560150146484, "learning_rate": 1.7817759336099585e-05, "loss": 1.5492, "step": 6580 }, { "epoch": 5.461410788381743, "grad_norm": 23.891069412231445, "learning_rate": 1.7817427385892118e-05, "loss": 1.4852, "step": 6581 }, { "epoch": 5.462240663900415, "grad_norm": 11.454638481140137, "learning_rate": 1.781709543568465e-05, "loss": 0.8634, "step": 6582 }, { "epoch": 5.463070539419087, "grad_norm": 13.64653491973877, "learning_rate": 1.781676348547718e-05, "loss": 1.0059, "step": 6583 }, { "epoch": 5.4639004149377595, "grad_norm": 12.292519569396973, "learning_rate": 1.781643153526971e-05, "loss": 1.0395, "step": 6584 }, { "epoch": 5.464730290456432, "grad_norm": 15.862549781799316, "learning_rate": 1.7816099585062243e-05, "loss": 1.0219, "step": 6585 }, { "epoch": 5.465560165975104, "grad_norm": 15.097994804382324, "learning_rate": 1.781576763485477e-05, "loss": 1.1364, "step": 6586 }, { "epoch": 5.466390041493776, "grad_norm": 8.6066255569458, "learning_rate": 1.7815435684647303e-05, "loss": 0.835, "step": 6587 }, { "epoch": 5.467219917012448, "grad_norm": 17.56633758544922, "learning_rate": 1.7815103734439836e-05, "loss": 1.1741, "step": 6588 }, { "epoch": 5.4680497925311204, "grad_norm": 15.109580993652344, "learning_rate": 1.7814771784232368e-05, "loss": 1.7331, "step": 6589 }, { "epoch": 5.468879668049793, "grad_norm": 12.368700981140137, "learning_rate": 1.7814439834024896e-05, "loss": 1.1695, "step": 6590 }, { "epoch": 5.469709543568465, "grad_norm": 15.045357704162598, "learning_rate": 1.781410788381743e-05, "loss": 0.8828, "step": 6591 }, { "epoch": 5.470539419087137, "grad_norm": 16.379947662353516, "learning_rate": 1.781377593360996e-05, "loss": 1.3083, "step": 6592 }, { "epoch": 5.471369294605809, "grad_norm": 17.875341415405273, "learning_rate": 1.7813443983402493e-05, "loss": 1.0602, "step": 6593 }, { "epoch": 5.472199170124481, "grad_norm": 14.501462936401367, "learning_rate": 1.781311203319502e-05, "loss": 1.0564, "step": 6594 }, { "epoch": 5.473029045643154, "grad_norm": 11.042681694030762, "learning_rate": 1.7812780082987553e-05, "loss": 0.7892, "step": 6595 }, { "epoch": 5.473858921161826, "grad_norm": 16.64124870300293, "learning_rate": 1.7812448132780086e-05, "loss": 1.6267, "step": 6596 }, { "epoch": 5.474688796680498, "grad_norm": 15.769278526306152, "learning_rate": 1.7812116182572618e-05, "loss": 1.278, "step": 6597 }, { "epoch": 5.47551867219917, "grad_norm": 15.354843139648438, "learning_rate": 1.7811784232365146e-05, "loss": 1.4683, "step": 6598 }, { "epoch": 5.476348547717842, "grad_norm": 22.255685806274414, "learning_rate": 1.781145228215768e-05, "loss": 1.4243, "step": 6599 }, { "epoch": 5.477178423236515, "grad_norm": 22.594762802124023, "learning_rate": 1.781112033195021e-05, "loss": 1.3136, "step": 6600 }, { "epoch": 5.478008298755187, "grad_norm": 18.325647354125977, "learning_rate": 1.781078838174274e-05, "loss": 1.1514, "step": 6601 }, { "epoch": 5.478838174273859, "grad_norm": 21.346179962158203, "learning_rate": 1.781045643153527e-05, "loss": 1.8556, "step": 6602 }, { "epoch": 5.479668049792531, "grad_norm": 13.049729347229004, "learning_rate": 1.78101244813278e-05, "loss": 1.3411, "step": 6603 }, { "epoch": 5.480497925311203, "grad_norm": 12.822406768798828, "learning_rate": 1.7809792531120332e-05, "loss": 1.0251, "step": 6604 }, { "epoch": 5.481327800829876, "grad_norm": 19.604623794555664, "learning_rate": 1.7809460580912864e-05, "loss": 1.4398, "step": 6605 }, { "epoch": 5.482157676348548, "grad_norm": 15.690818786621094, "learning_rate": 1.7809128630705393e-05, "loss": 1.4342, "step": 6606 }, { "epoch": 5.48298755186722, "grad_norm": 13.950648307800293, "learning_rate": 1.7808796680497925e-05, "loss": 1.9544, "step": 6607 }, { "epoch": 5.483817427385892, "grad_norm": 16.143022537231445, "learning_rate": 1.7808464730290457e-05, "loss": 1.3622, "step": 6608 }, { "epoch": 5.484647302904564, "grad_norm": 11.123034477233887, "learning_rate": 1.780813278008299e-05, "loss": 1.3057, "step": 6609 }, { "epoch": 5.485477178423237, "grad_norm": 13.999701499938965, "learning_rate": 1.780780082987552e-05, "loss": 1.3378, "step": 6610 }, { "epoch": 5.486307053941909, "grad_norm": 23.0624942779541, "learning_rate": 1.780746887966805e-05, "loss": 1.1325, "step": 6611 }, { "epoch": 5.487136929460581, "grad_norm": 15.394474983215332, "learning_rate": 1.7807136929460582e-05, "loss": 1.4802, "step": 6612 }, { "epoch": 5.487966804979253, "grad_norm": 28.564807891845703, "learning_rate": 1.7806804979253114e-05, "loss": 1.5585, "step": 6613 }, { "epoch": 5.488796680497925, "grad_norm": 17.140567779541016, "learning_rate": 1.7806473029045647e-05, "loss": 1.3883, "step": 6614 }, { "epoch": 5.4896265560165975, "grad_norm": 16.00959587097168, "learning_rate": 1.7806141078838175e-05, "loss": 1.2734, "step": 6615 }, { "epoch": 5.49045643153527, "grad_norm": 22.06484031677246, "learning_rate": 1.7805809128630707e-05, "loss": 2.2569, "step": 6616 }, { "epoch": 5.491286307053942, "grad_norm": 14.849820137023926, "learning_rate": 1.780547717842324e-05, "loss": 1.2754, "step": 6617 }, { "epoch": 5.492116182572614, "grad_norm": 18.091157913208008, "learning_rate": 1.780514522821577e-05, "loss": 1.2242, "step": 6618 }, { "epoch": 5.492946058091286, "grad_norm": 17.828996658325195, "learning_rate": 1.78048132780083e-05, "loss": 1.0522, "step": 6619 }, { "epoch": 5.4937759336099585, "grad_norm": 14.034436225891113, "learning_rate": 1.7804481327800832e-05, "loss": 1.7476, "step": 6620 }, { "epoch": 5.494605809128631, "grad_norm": 14.864407539367676, "learning_rate": 1.780414937759336e-05, "loss": 0.9982, "step": 6621 }, { "epoch": 5.495435684647303, "grad_norm": 17.798093795776367, "learning_rate": 1.7803817427385893e-05, "loss": 1.3047, "step": 6622 }, { "epoch": 5.496265560165975, "grad_norm": 17.93083953857422, "learning_rate": 1.7803485477178425e-05, "loss": 0.9941, "step": 6623 }, { "epoch": 5.497095435684647, "grad_norm": 20.376144409179688, "learning_rate": 1.7803153526970954e-05, "loss": 1.3371, "step": 6624 }, { "epoch": 5.4979253112033195, "grad_norm": 27.225446701049805, "learning_rate": 1.7802821576763486e-05, "loss": 1.6321, "step": 6625 }, { "epoch": 5.498755186721992, "grad_norm": 12.816911697387695, "learning_rate": 1.780248962655602e-05, "loss": 1.3369, "step": 6626 }, { "epoch": 5.499585062240664, "grad_norm": 16.538766860961914, "learning_rate": 1.7802157676348547e-05, "loss": 1.5831, "step": 6627 }, { "epoch": 5.500414937759336, "grad_norm": 16.38999366760254, "learning_rate": 1.780182572614108e-05, "loss": 1.6322, "step": 6628 }, { "epoch": 5.501244813278008, "grad_norm": 18.35426139831543, "learning_rate": 1.780149377593361e-05, "loss": 1.065, "step": 6629 }, { "epoch": 5.5020746887966805, "grad_norm": 12.193941116333008, "learning_rate": 1.7801161825726143e-05, "loss": 0.9161, "step": 6630 }, { "epoch": 5.502904564315353, "grad_norm": 20.784626007080078, "learning_rate": 1.7800829875518672e-05, "loss": 2.0109, "step": 6631 }, { "epoch": 5.503734439834025, "grad_norm": 8.388463973999023, "learning_rate": 1.7800497925311204e-05, "loss": 0.5387, "step": 6632 }, { "epoch": 5.504564315352697, "grad_norm": 13.843514442443848, "learning_rate": 1.7800165975103736e-05, "loss": 1.3526, "step": 6633 }, { "epoch": 5.505394190871369, "grad_norm": 16.2950496673584, "learning_rate": 1.779983402489627e-05, "loss": 1.2518, "step": 6634 }, { "epoch": 5.5062240663900415, "grad_norm": 27.148025512695312, "learning_rate": 1.77995020746888e-05, "loss": 2.2913, "step": 6635 }, { "epoch": 5.507053941908714, "grad_norm": 29.90794563293457, "learning_rate": 1.779917012448133e-05, "loss": 1.5567, "step": 6636 }, { "epoch": 5.507883817427386, "grad_norm": 11.515872955322266, "learning_rate": 1.779883817427386e-05, "loss": 0.671, "step": 6637 }, { "epoch": 5.508713692946058, "grad_norm": 24.716594696044922, "learning_rate": 1.7798506224066393e-05, "loss": 2.0548, "step": 6638 }, { "epoch": 5.50954356846473, "grad_norm": 12.494888305664062, "learning_rate": 1.7798174273858922e-05, "loss": 0.9961, "step": 6639 }, { "epoch": 5.5103734439834025, "grad_norm": 12.935893058776855, "learning_rate": 1.7797842323651454e-05, "loss": 0.9795, "step": 6640 }, { "epoch": 5.511203319502075, "grad_norm": 10.119590759277344, "learning_rate": 1.7797510373443983e-05, "loss": 0.6553, "step": 6641 }, { "epoch": 5.512033195020747, "grad_norm": 14.014103889465332, "learning_rate": 1.7797178423236515e-05, "loss": 0.8569, "step": 6642 }, { "epoch": 5.512863070539419, "grad_norm": 10.798837661743164, "learning_rate": 1.7796846473029047e-05, "loss": 0.6623, "step": 6643 }, { "epoch": 5.513692946058091, "grad_norm": 14.308347702026367, "learning_rate": 1.7796514522821576e-05, "loss": 1.2633, "step": 6644 }, { "epoch": 5.514522821576763, "grad_norm": 12.98475170135498, "learning_rate": 1.7796182572614108e-05, "loss": 1.5634, "step": 6645 }, { "epoch": 5.515352697095436, "grad_norm": 18.116945266723633, "learning_rate": 1.779585062240664e-05, "loss": 1.339, "step": 6646 }, { "epoch": 5.516182572614108, "grad_norm": 12.087174415588379, "learning_rate": 1.7795518672199172e-05, "loss": 1.5594, "step": 6647 }, { "epoch": 5.51701244813278, "grad_norm": 15.887667655944824, "learning_rate": 1.77951867219917e-05, "loss": 1.2058, "step": 6648 }, { "epoch": 5.517842323651452, "grad_norm": 16.753604888916016, "learning_rate": 1.7794854771784233e-05, "loss": 0.8786, "step": 6649 }, { "epoch": 5.518672199170124, "grad_norm": 15.953645706176758, "learning_rate": 1.7794522821576765e-05, "loss": 1.0245, "step": 6650 }, { "epoch": 5.519502074688797, "grad_norm": 20.677759170532227, "learning_rate": 1.7794190871369297e-05, "loss": 1.5318, "step": 6651 }, { "epoch": 5.520331950207469, "grad_norm": 11.040294647216797, "learning_rate": 1.7793858921161826e-05, "loss": 0.9684, "step": 6652 }, { "epoch": 5.521161825726141, "grad_norm": 19.878631591796875, "learning_rate": 1.7793526970954358e-05, "loss": 1.3262, "step": 6653 }, { "epoch": 5.521991701244813, "grad_norm": 12.246199607849121, "learning_rate": 1.779319502074689e-05, "loss": 0.9211, "step": 6654 }, { "epoch": 5.522821576763485, "grad_norm": 25.51820182800293, "learning_rate": 1.7792863070539422e-05, "loss": 1.6942, "step": 6655 }, { "epoch": 5.523651452282158, "grad_norm": 12.954353332519531, "learning_rate": 1.779253112033195e-05, "loss": 0.9907, "step": 6656 }, { "epoch": 5.52448132780083, "grad_norm": 10.456786155700684, "learning_rate": 1.7792199170124483e-05, "loss": 0.829, "step": 6657 }, { "epoch": 5.525311203319502, "grad_norm": 16.131994247436523, "learning_rate": 1.7791867219917015e-05, "loss": 1.2346, "step": 6658 }, { "epoch": 5.526141078838174, "grad_norm": 20.278440475463867, "learning_rate": 1.7791535269709544e-05, "loss": 1.7752, "step": 6659 }, { "epoch": 5.526970954356846, "grad_norm": 16.00116729736328, "learning_rate": 1.7791203319502076e-05, "loss": 0.7529, "step": 6660 }, { "epoch": 5.527800829875519, "grad_norm": 23.09775733947754, "learning_rate": 1.7790871369294608e-05, "loss": 1.7578, "step": 6661 }, { "epoch": 5.528630705394191, "grad_norm": 25.988170623779297, "learning_rate": 1.7790539419087137e-05, "loss": 1.7317, "step": 6662 }, { "epoch": 5.529460580912863, "grad_norm": 20.1573486328125, "learning_rate": 1.779020746887967e-05, "loss": 1.6886, "step": 6663 }, { "epoch": 5.530290456431535, "grad_norm": 18.007234573364258, "learning_rate": 1.77898755186722e-05, "loss": 1.2619, "step": 6664 }, { "epoch": 5.531120331950207, "grad_norm": 19.226903915405273, "learning_rate": 1.778954356846473e-05, "loss": 1.5361, "step": 6665 }, { "epoch": 5.5319502074688796, "grad_norm": 35.29670715332031, "learning_rate": 1.7789211618257262e-05, "loss": 1.2501, "step": 6666 }, { "epoch": 5.532780082987552, "grad_norm": 21.89405059814453, "learning_rate": 1.7788879668049794e-05, "loss": 2.5321, "step": 6667 }, { "epoch": 5.533609958506224, "grad_norm": 16.702484130859375, "learning_rate": 1.7788547717842326e-05, "loss": 1.0881, "step": 6668 }, { "epoch": 5.534439834024896, "grad_norm": 20.418994903564453, "learning_rate": 1.7788215767634855e-05, "loss": 1.476, "step": 6669 }, { "epoch": 5.535269709543568, "grad_norm": 16.462373733520508, "learning_rate": 1.7787883817427387e-05, "loss": 0.9722, "step": 6670 }, { "epoch": 5.5360995850622405, "grad_norm": 19.648523330688477, "learning_rate": 1.778755186721992e-05, "loss": 1.6731, "step": 6671 }, { "epoch": 5.536929460580913, "grad_norm": 13.50486946105957, "learning_rate": 1.778721991701245e-05, "loss": 1.1158, "step": 6672 }, { "epoch": 5.537759336099585, "grad_norm": 16.452665328979492, "learning_rate": 1.778688796680498e-05, "loss": 1.136, "step": 6673 }, { "epoch": 5.538589211618257, "grad_norm": 13.918542861938477, "learning_rate": 1.7786556016597512e-05, "loss": 1.113, "step": 6674 }, { "epoch": 5.539419087136929, "grad_norm": 17.347152709960938, "learning_rate": 1.7786224066390044e-05, "loss": 1.1559, "step": 6675 }, { "epoch": 5.5402489626556015, "grad_norm": 25.44851303100586, "learning_rate": 1.7785892116182576e-05, "loss": 1.5705, "step": 6676 }, { "epoch": 5.541078838174274, "grad_norm": 34.200897216796875, "learning_rate": 1.7785560165975105e-05, "loss": 2.078, "step": 6677 }, { "epoch": 5.541908713692946, "grad_norm": 20.38959312438965, "learning_rate": 1.7785228215767637e-05, "loss": 1.6388, "step": 6678 }, { "epoch": 5.542738589211618, "grad_norm": 17.555238723754883, "learning_rate": 1.778489626556017e-05, "loss": 1.3075, "step": 6679 }, { "epoch": 5.54356846473029, "grad_norm": 13.698805809020996, "learning_rate": 1.7784564315352698e-05, "loss": 1.1033, "step": 6680 }, { "epoch": 5.5443983402489625, "grad_norm": 19.38626480102539, "learning_rate": 1.778423236514523e-05, "loss": 0.8928, "step": 6681 }, { "epoch": 5.545228215767635, "grad_norm": 10.508535385131836, "learning_rate": 1.778390041493776e-05, "loss": 0.6986, "step": 6682 }, { "epoch": 5.546058091286307, "grad_norm": 12.756000518798828, "learning_rate": 1.778356846473029e-05, "loss": 0.6373, "step": 6683 }, { "epoch": 5.546887966804979, "grad_norm": 27.844438552856445, "learning_rate": 1.7783236514522823e-05, "loss": 0.8849, "step": 6684 }, { "epoch": 5.547717842323651, "grad_norm": 20.433944702148438, "learning_rate": 1.7782904564315352e-05, "loss": 2.0203, "step": 6685 }, { "epoch": 5.5485477178423235, "grad_norm": 15.890463829040527, "learning_rate": 1.7782572614107884e-05, "loss": 1.4207, "step": 6686 }, { "epoch": 5.549377593360996, "grad_norm": 14.04343032836914, "learning_rate": 1.7782240663900416e-05, "loss": 1.2506, "step": 6687 }, { "epoch": 5.550207468879668, "grad_norm": 21.798437118530273, "learning_rate": 1.7781908713692948e-05, "loss": 1.7408, "step": 6688 }, { "epoch": 5.55103734439834, "grad_norm": 21.84244728088379, "learning_rate": 1.778157676348548e-05, "loss": 2.161, "step": 6689 }, { "epoch": 5.551867219917012, "grad_norm": 16.7262020111084, "learning_rate": 1.778124481327801e-05, "loss": 0.916, "step": 6690 }, { "epoch": 5.5526970954356845, "grad_norm": 19.133731842041016, "learning_rate": 1.778091286307054e-05, "loss": 1.091, "step": 6691 }, { "epoch": 5.553526970954357, "grad_norm": 19.49164581298828, "learning_rate": 1.7780580912863073e-05, "loss": 1.1849, "step": 6692 }, { "epoch": 5.554356846473029, "grad_norm": 28.47770881652832, "learning_rate": 1.7780248962655605e-05, "loss": 1.4709, "step": 6693 }, { "epoch": 5.555186721991701, "grad_norm": 15.991456985473633, "learning_rate": 1.7779917012448134e-05, "loss": 1.4782, "step": 6694 }, { "epoch": 5.556016597510373, "grad_norm": 16.336931228637695, "learning_rate": 1.7779585062240666e-05, "loss": 0.9259, "step": 6695 }, { "epoch": 5.556846473029045, "grad_norm": 20.83942222595215, "learning_rate": 1.7779253112033198e-05, "loss": 1.2706, "step": 6696 }, { "epoch": 5.557676348547718, "grad_norm": 16.475297927856445, "learning_rate": 1.7778921161825727e-05, "loss": 0.6852, "step": 6697 }, { "epoch": 5.55850622406639, "grad_norm": 12.294764518737793, "learning_rate": 1.777858921161826e-05, "loss": 0.9454, "step": 6698 }, { "epoch": 5.559336099585062, "grad_norm": 11.693588256835938, "learning_rate": 1.777825726141079e-05, "loss": 1.171, "step": 6699 }, { "epoch": 5.560165975103734, "grad_norm": 11.662140846252441, "learning_rate": 1.777792531120332e-05, "loss": 1.1747, "step": 6700 }, { "epoch": 5.560995850622406, "grad_norm": 16.618911743164062, "learning_rate": 1.7777593360995852e-05, "loss": 1.002, "step": 6701 }, { "epoch": 5.561825726141079, "grad_norm": 16.018701553344727, "learning_rate": 1.7777261410788384e-05, "loss": 2.1067, "step": 6702 }, { "epoch": 5.562655601659751, "grad_norm": 12.759684562683105, "learning_rate": 1.7776929460580913e-05, "loss": 1.2465, "step": 6703 }, { "epoch": 5.563485477178423, "grad_norm": 18.088172912597656, "learning_rate": 1.7776597510373445e-05, "loss": 1.4421, "step": 6704 }, { "epoch": 5.564315352697095, "grad_norm": 12.464400291442871, "learning_rate": 1.7776265560165977e-05, "loss": 0.8432, "step": 6705 }, { "epoch": 5.565145228215767, "grad_norm": 18.13966178894043, "learning_rate": 1.7775933609958506e-05, "loss": 1.8811, "step": 6706 }, { "epoch": 5.56597510373444, "grad_norm": 12.975003242492676, "learning_rate": 1.7775601659751038e-05, "loss": 0.7738, "step": 6707 }, { "epoch": 5.566804979253112, "grad_norm": 11.73341178894043, "learning_rate": 1.777526970954357e-05, "loss": 1.2863, "step": 6708 }, { "epoch": 5.567634854771784, "grad_norm": 22.4808349609375, "learning_rate": 1.7774937759336102e-05, "loss": 1.8473, "step": 6709 }, { "epoch": 5.568464730290456, "grad_norm": 19.589813232421875, "learning_rate": 1.777460580912863e-05, "loss": 2.0025, "step": 6710 }, { "epoch": 5.569294605809128, "grad_norm": 15.267922401428223, "learning_rate": 1.7774273858921163e-05, "loss": 1.1237, "step": 6711 }, { "epoch": 5.570124481327801, "grad_norm": 12.571864128112793, "learning_rate": 1.7773941908713695e-05, "loss": 0.8483, "step": 6712 }, { "epoch": 5.570954356846473, "grad_norm": 16.980030059814453, "learning_rate": 1.7773609958506227e-05, "loss": 0.6824, "step": 6713 }, { "epoch": 5.571784232365145, "grad_norm": 15.745087623596191, "learning_rate": 1.777327800829876e-05, "loss": 0.9677, "step": 6714 }, { "epoch": 5.572614107883817, "grad_norm": 10.82283878326416, "learning_rate": 1.7772946058091288e-05, "loss": 0.9559, "step": 6715 }, { "epoch": 5.573443983402489, "grad_norm": 18.652854919433594, "learning_rate": 1.777261410788382e-05, "loss": 1.4662, "step": 6716 }, { "epoch": 5.574273858921162, "grad_norm": 16.029394149780273, "learning_rate": 1.7772282157676352e-05, "loss": 1.424, "step": 6717 }, { "epoch": 5.575103734439834, "grad_norm": 14.063404083251953, "learning_rate": 1.777195020746888e-05, "loss": 1.1205, "step": 6718 }, { "epoch": 5.575933609958506, "grad_norm": 21.655813217163086, "learning_rate": 1.7771618257261413e-05, "loss": 1.3579, "step": 6719 }, { "epoch": 5.576763485477178, "grad_norm": 16.988567352294922, "learning_rate": 1.777128630705394e-05, "loss": 1.3585, "step": 6720 }, { "epoch": 5.57759336099585, "grad_norm": 12.901351928710938, "learning_rate": 1.7770954356846474e-05, "loss": 1.2865, "step": 6721 }, { "epoch": 5.5784232365145225, "grad_norm": 16.630399703979492, "learning_rate": 1.7770622406639006e-05, "loss": 1.2571, "step": 6722 }, { "epoch": 5.579253112033195, "grad_norm": 23.66988754272461, "learning_rate": 1.7770290456431535e-05, "loss": 1.7555, "step": 6723 }, { "epoch": 5.580082987551867, "grad_norm": 15.998133659362793, "learning_rate": 1.7769958506224067e-05, "loss": 1.5982, "step": 6724 }, { "epoch": 5.580912863070539, "grad_norm": 11.629157066345215, "learning_rate": 1.77696265560166e-05, "loss": 0.9745, "step": 6725 }, { "epoch": 5.581742738589211, "grad_norm": 14.723119735717773, "learning_rate": 1.776929460580913e-05, "loss": 1.1561, "step": 6726 }, { "epoch": 5.5825726141078835, "grad_norm": 17.116226196289062, "learning_rate": 1.776896265560166e-05, "loss": 1.4951, "step": 6727 }, { "epoch": 5.583402489626556, "grad_norm": 20.93159294128418, "learning_rate": 1.776863070539419e-05, "loss": 1.881, "step": 6728 }, { "epoch": 5.584232365145228, "grad_norm": 23.960878372192383, "learning_rate": 1.7768298755186724e-05, "loss": 1.6322, "step": 6729 }, { "epoch": 5.5850622406639, "grad_norm": 16.037160873413086, "learning_rate": 1.7767966804979256e-05, "loss": 0.6573, "step": 6730 }, { "epoch": 5.585892116182572, "grad_norm": 30.567970275878906, "learning_rate": 1.7767634854771785e-05, "loss": 1.0807, "step": 6731 }, { "epoch": 5.5867219917012445, "grad_norm": 20.69695281982422, "learning_rate": 1.7767302904564317e-05, "loss": 2.0089, "step": 6732 }, { "epoch": 5.587551867219917, "grad_norm": 11.114533424377441, "learning_rate": 1.776697095435685e-05, "loss": 0.9485, "step": 6733 }, { "epoch": 5.588381742738589, "grad_norm": 15.321906089782715, "learning_rate": 1.776663900414938e-05, "loss": 1.5339, "step": 6734 }, { "epoch": 5.589211618257261, "grad_norm": 18.25693130493164, "learning_rate": 1.776630705394191e-05, "loss": 1.4014, "step": 6735 }, { "epoch": 5.590041493775933, "grad_norm": 19.1358585357666, "learning_rate": 1.7765975103734442e-05, "loss": 1.8441, "step": 6736 }, { "epoch": 5.5908713692946055, "grad_norm": 14.04421615600586, "learning_rate": 1.7765643153526974e-05, "loss": 1.3636, "step": 6737 }, { "epoch": 5.591701244813278, "grad_norm": 14.764787673950195, "learning_rate": 1.7765311203319503e-05, "loss": 1.2207, "step": 6738 }, { "epoch": 5.59253112033195, "grad_norm": 18.59456443786621, "learning_rate": 1.7764979253112035e-05, "loss": 1.004, "step": 6739 }, { "epoch": 5.593360995850622, "grad_norm": 18.373165130615234, "learning_rate": 1.7764647302904567e-05, "loss": 1.7926, "step": 6740 }, { "epoch": 5.594190871369294, "grad_norm": 16.192285537719727, "learning_rate": 1.7764315352697096e-05, "loss": 1.3931, "step": 6741 }, { "epoch": 5.5950207468879665, "grad_norm": 9.031688690185547, "learning_rate": 1.7763983402489628e-05, "loss": 0.9442, "step": 6742 }, { "epoch": 5.595850622406639, "grad_norm": 16.43593406677246, "learning_rate": 1.776365145228216e-05, "loss": 1.0833, "step": 6743 }, { "epoch": 5.596680497925311, "grad_norm": 12.59272289276123, "learning_rate": 1.776331950207469e-05, "loss": 1.0848, "step": 6744 }, { "epoch": 5.597510373443983, "grad_norm": 19.551855087280273, "learning_rate": 1.776298755186722e-05, "loss": 1.342, "step": 6745 }, { "epoch": 5.598340248962655, "grad_norm": 26.752044677734375, "learning_rate": 1.7762655601659753e-05, "loss": 1.7102, "step": 6746 }, { "epoch": 5.5991701244813274, "grad_norm": 11.37656307220459, "learning_rate": 1.7762323651452285e-05, "loss": 1.0189, "step": 6747 }, { "epoch": 5.6, "grad_norm": 13.172090530395508, "learning_rate": 1.7761991701244813e-05, "loss": 1.5054, "step": 6748 }, { "epoch": 5.600829875518672, "grad_norm": 17.555526733398438, "learning_rate": 1.7761659751037346e-05, "loss": 1.165, "step": 6749 }, { "epoch": 5.601659751037344, "grad_norm": 13.147195816040039, "learning_rate": 1.7761327800829878e-05, "loss": 1.2919, "step": 6750 }, { "epoch": 5.602489626556016, "grad_norm": 20.678712844848633, "learning_rate": 1.776099585062241e-05, "loss": 2.1391, "step": 6751 }, { "epoch": 5.603319502074688, "grad_norm": 18.11423683166504, "learning_rate": 1.776066390041494e-05, "loss": 1.3335, "step": 6752 }, { "epoch": 5.604149377593361, "grad_norm": 12.301916122436523, "learning_rate": 1.776033195020747e-05, "loss": 1.0253, "step": 6753 }, { "epoch": 5.604979253112033, "grad_norm": 15.660202026367188, "learning_rate": 1.7760000000000003e-05, "loss": 0.9249, "step": 6754 }, { "epoch": 5.605809128630705, "grad_norm": 17.32033348083496, "learning_rate": 1.7759668049792535e-05, "loss": 1.0658, "step": 6755 }, { "epoch": 5.606639004149377, "grad_norm": 23.408531188964844, "learning_rate": 1.7759336099585064e-05, "loss": 1.8834, "step": 6756 }, { "epoch": 5.607468879668049, "grad_norm": 14.36127758026123, "learning_rate": 1.7759004149377596e-05, "loss": 0.8018, "step": 6757 }, { "epoch": 5.608298755186722, "grad_norm": 15.505126953125, "learning_rate": 1.7758672199170124e-05, "loss": 1.0071, "step": 6758 }, { "epoch": 5.609128630705394, "grad_norm": 22.19279670715332, "learning_rate": 1.7758340248962657e-05, "loss": 1.4103, "step": 6759 }, { "epoch": 5.609958506224066, "grad_norm": 19.954883575439453, "learning_rate": 1.775800829875519e-05, "loss": 1.5954, "step": 6760 }, { "epoch": 5.610788381742738, "grad_norm": 16.949655532836914, "learning_rate": 1.7757676348547717e-05, "loss": 1.4628, "step": 6761 }, { "epoch": 5.61161825726141, "grad_norm": 14.731554985046387, "learning_rate": 1.775734439834025e-05, "loss": 1.4861, "step": 6762 }, { "epoch": 5.612448132780083, "grad_norm": 18.142963409423828, "learning_rate": 1.775701244813278e-05, "loss": 1.7319, "step": 6763 }, { "epoch": 5.613278008298755, "grad_norm": 13.665715217590332, "learning_rate": 1.775668049792531e-05, "loss": 1.0688, "step": 6764 }, { "epoch": 5.614107883817427, "grad_norm": 14.764740943908691, "learning_rate": 1.7756348547717842e-05, "loss": 1.0604, "step": 6765 }, { "epoch": 5.614937759336099, "grad_norm": 14.626660346984863, "learning_rate": 1.7756016597510374e-05, "loss": 1.1759, "step": 6766 }, { "epoch": 5.615767634854771, "grad_norm": 15.265421867370605, "learning_rate": 1.7755684647302907e-05, "loss": 1.3628, "step": 6767 }, { "epoch": 5.616597510373444, "grad_norm": 19.052841186523438, "learning_rate": 1.775535269709544e-05, "loss": 1.1926, "step": 6768 }, { "epoch": 5.617427385892116, "grad_norm": 14.376270294189453, "learning_rate": 1.7755020746887967e-05, "loss": 0.8525, "step": 6769 }, { "epoch": 5.618257261410788, "grad_norm": 15.256893157958984, "learning_rate": 1.77546887966805e-05, "loss": 1.5585, "step": 6770 }, { "epoch": 5.61908713692946, "grad_norm": 19.70787239074707, "learning_rate": 1.775435684647303e-05, "loss": 1.5371, "step": 6771 }, { "epoch": 5.619917012448132, "grad_norm": 15.106776237487793, "learning_rate": 1.7754024896265564e-05, "loss": 1.3778, "step": 6772 }, { "epoch": 5.6207468879668046, "grad_norm": 17.218612670898438, "learning_rate": 1.7753692946058092e-05, "loss": 1.7685, "step": 6773 }, { "epoch": 5.621576763485477, "grad_norm": 23.59645652770996, "learning_rate": 1.7753360995850625e-05, "loss": 1.7399, "step": 6774 }, { "epoch": 5.622406639004149, "grad_norm": 19.73748207092285, "learning_rate": 1.7753029045643157e-05, "loss": 1.217, "step": 6775 }, { "epoch": 5.623236514522821, "grad_norm": 20.49545669555664, "learning_rate": 1.7752697095435685e-05, "loss": 1.4989, "step": 6776 }, { "epoch": 5.624066390041493, "grad_norm": 31.228759765625, "learning_rate": 1.7752365145228218e-05, "loss": 0.8425, "step": 6777 }, { "epoch": 5.6248962655601655, "grad_norm": 13.728912353515625, "learning_rate": 1.775203319502075e-05, "loss": 0.9317, "step": 6778 }, { "epoch": 5.625726141078838, "grad_norm": 28.834583282470703, "learning_rate": 1.775170124481328e-05, "loss": 0.9021, "step": 6779 }, { "epoch": 5.62655601659751, "grad_norm": 20.58734893798828, "learning_rate": 1.775136929460581e-05, "loss": 0.9596, "step": 6780 }, { "epoch": 5.627385892116182, "grad_norm": 13.519981384277344, "learning_rate": 1.775103734439834e-05, "loss": 0.6735, "step": 6781 }, { "epoch": 5.628215767634854, "grad_norm": 19.055248260498047, "learning_rate": 1.775070539419087e-05, "loss": 1.8931, "step": 6782 }, { "epoch": 5.6290456431535265, "grad_norm": 16.105316162109375, "learning_rate": 1.7750373443983403e-05, "loss": 1.3158, "step": 6783 }, { "epoch": 5.629875518672199, "grad_norm": 24.73671531677246, "learning_rate": 1.7750041493775935e-05, "loss": 2.1096, "step": 6784 }, { "epoch": 5.630705394190871, "grad_norm": 10.272314071655273, "learning_rate": 1.7749709543568464e-05, "loss": 0.8503, "step": 6785 }, { "epoch": 5.631535269709543, "grad_norm": 18.940677642822266, "learning_rate": 1.7749377593360996e-05, "loss": 1.1855, "step": 6786 }, { "epoch": 5.632365145228215, "grad_norm": 31.036375045776367, "learning_rate": 1.774904564315353e-05, "loss": 1.3226, "step": 6787 }, { "epoch": 5.6331950207468875, "grad_norm": 32.7739372253418, "learning_rate": 1.774871369294606e-05, "loss": 0.6744, "step": 6788 }, { "epoch": 5.63402489626556, "grad_norm": 20.357486724853516, "learning_rate": 1.774838174273859e-05, "loss": 1.1588, "step": 6789 }, { "epoch": 5.634854771784232, "grad_norm": 13.73305606842041, "learning_rate": 1.774804979253112e-05, "loss": 0.8623, "step": 6790 }, { "epoch": 5.635684647302904, "grad_norm": 17.572311401367188, "learning_rate": 1.7747717842323653e-05, "loss": 1.5593, "step": 6791 }, { "epoch": 5.636514522821576, "grad_norm": 22.38141632080078, "learning_rate": 1.7747385892116186e-05, "loss": 1.0378, "step": 6792 }, { "epoch": 5.6373443983402485, "grad_norm": 21.452537536621094, "learning_rate": 1.7747053941908718e-05, "loss": 1.0367, "step": 6793 }, { "epoch": 5.638174273858921, "grad_norm": 24.26498794555664, "learning_rate": 1.7746721991701246e-05, "loss": 1.4311, "step": 6794 }, { "epoch": 5.639004149377593, "grad_norm": 15.151784896850586, "learning_rate": 1.774639004149378e-05, "loss": 1.2418, "step": 6795 }, { "epoch": 5.639834024896265, "grad_norm": 18.35952377319336, "learning_rate": 1.774605809128631e-05, "loss": 0.6394, "step": 6796 }, { "epoch": 5.640663900414938, "grad_norm": 20.91531753540039, "learning_rate": 1.774572614107884e-05, "loss": 1.1728, "step": 6797 }, { "epoch": 5.64149377593361, "grad_norm": 25.08728790283203, "learning_rate": 1.774539419087137e-05, "loss": 1.3918, "step": 6798 }, { "epoch": 5.6423236514522825, "grad_norm": 15.600780487060547, "learning_rate": 1.77450622406639e-05, "loss": 1.3093, "step": 6799 }, { "epoch": 5.643153526970955, "grad_norm": 22.473285675048828, "learning_rate": 1.7744730290456432e-05, "loss": 1.1491, "step": 6800 }, { "epoch": 5.643983402489627, "grad_norm": 11.17222785949707, "learning_rate": 1.7744398340248964e-05, "loss": 1.0689, "step": 6801 }, { "epoch": 5.644813278008299, "grad_norm": 13.894633293151855, "learning_rate": 1.7744066390041493e-05, "loss": 0.9038, "step": 6802 }, { "epoch": 5.645643153526971, "grad_norm": 17.45840835571289, "learning_rate": 1.7743734439834025e-05, "loss": 1.223, "step": 6803 }, { "epoch": 5.6464730290456435, "grad_norm": 17.5812931060791, "learning_rate": 1.7743402489626557e-05, "loss": 1.462, "step": 6804 }, { "epoch": 5.647302904564316, "grad_norm": 12.75178337097168, "learning_rate": 1.774307053941909e-05, "loss": 1.4277, "step": 6805 }, { "epoch": 5.648132780082988, "grad_norm": 21.98256492614746, "learning_rate": 1.7742738589211618e-05, "loss": 0.8413, "step": 6806 }, { "epoch": 5.64896265560166, "grad_norm": 13.451891899108887, "learning_rate": 1.774240663900415e-05, "loss": 0.9706, "step": 6807 }, { "epoch": 5.649792531120332, "grad_norm": 22.97454833984375, "learning_rate": 1.7742074688796682e-05, "loss": 0.9165, "step": 6808 }, { "epoch": 5.6506224066390045, "grad_norm": 19.387142181396484, "learning_rate": 1.7741742738589214e-05, "loss": 1.3029, "step": 6809 }, { "epoch": 5.651452282157677, "grad_norm": 14.955658912658691, "learning_rate": 1.7741410788381743e-05, "loss": 1.266, "step": 6810 }, { "epoch": 5.652282157676349, "grad_norm": 13.431209564208984, "learning_rate": 1.7741078838174275e-05, "loss": 0.9399, "step": 6811 }, { "epoch": 5.653112033195021, "grad_norm": 12.387269973754883, "learning_rate": 1.7740746887966807e-05, "loss": 1.1213, "step": 6812 }, { "epoch": 5.653941908713693, "grad_norm": 13.132906913757324, "learning_rate": 1.774041493775934e-05, "loss": 0.9705, "step": 6813 }, { "epoch": 5.6547717842323655, "grad_norm": 15.05962085723877, "learning_rate": 1.7740082987551868e-05, "loss": 0.713, "step": 6814 }, { "epoch": 5.655601659751038, "grad_norm": 18.20294952392578, "learning_rate": 1.77397510373444e-05, "loss": 1.7493, "step": 6815 }, { "epoch": 5.65643153526971, "grad_norm": 14.058524131774902, "learning_rate": 1.7739419087136932e-05, "loss": 1.3911, "step": 6816 }, { "epoch": 5.657261410788382, "grad_norm": 21.146059036254883, "learning_rate": 1.773908713692946e-05, "loss": 1.0376, "step": 6817 }, { "epoch": 5.658091286307054, "grad_norm": 12.57928466796875, "learning_rate": 1.7738755186721993e-05, "loss": 0.8283, "step": 6818 }, { "epoch": 5.6589211618257265, "grad_norm": 17.00666618347168, "learning_rate": 1.7738423236514525e-05, "loss": 1.558, "step": 6819 }, { "epoch": 5.659751037344399, "grad_norm": 18.6258487701416, "learning_rate": 1.7738091286307054e-05, "loss": 1.346, "step": 6820 }, { "epoch": 5.660580912863071, "grad_norm": 19.707534790039062, "learning_rate": 1.7737759336099586e-05, "loss": 0.8818, "step": 6821 }, { "epoch": 5.661410788381743, "grad_norm": 20.69053077697754, "learning_rate": 1.773742738589212e-05, "loss": 2.1588, "step": 6822 }, { "epoch": 5.662240663900415, "grad_norm": 14.629182815551758, "learning_rate": 1.7737095435684647e-05, "loss": 0.8448, "step": 6823 }, { "epoch": 5.6630705394190874, "grad_norm": 15.025084495544434, "learning_rate": 1.773676348547718e-05, "loss": 1.0203, "step": 6824 }, { "epoch": 5.66390041493776, "grad_norm": 10.2599458694458, "learning_rate": 1.773643153526971e-05, "loss": 0.6398, "step": 6825 }, { "epoch": 5.664730290456432, "grad_norm": 14.700778007507324, "learning_rate": 1.7736099585062243e-05, "loss": 1.4758, "step": 6826 }, { "epoch": 5.665560165975104, "grad_norm": 18.80548095703125, "learning_rate": 1.7735767634854772e-05, "loss": 1.7144, "step": 6827 }, { "epoch": 5.666390041493776, "grad_norm": 15.876070022583008, "learning_rate": 1.7735435684647304e-05, "loss": 1.2692, "step": 6828 }, { "epoch": 5.667219917012448, "grad_norm": 20.064443588256836, "learning_rate": 1.7735103734439836e-05, "loss": 1.5461, "step": 6829 }, { "epoch": 5.668049792531121, "grad_norm": 21.217063903808594, "learning_rate": 1.773477178423237e-05, "loss": 1.3973, "step": 6830 }, { "epoch": 5.668879668049793, "grad_norm": 13.729336738586426, "learning_rate": 1.7734439834024897e-05, "loss": 1.1859, "step": 6831 }, { "epoch": 5.669709543568465, "grad_norm": 15.457321166992188, "learning_rate": 1.773410788381743e-05, "loss": 1.6281, "step": 6832 }, { "epoch": 5.670539419087137, "grad_norm": 19.37466049194336, "learning_rate": 1.773377593360996e-05, "loss": 1.3129, "step": 6833 }, { "epoch": 5.671369294605809, "grad_norm": 24.110111236572266, "learning_rate": 1.7733443983402493e-05, "loss": 1.5652, "step": 6834 }, { "epoch": 5.672199170124482, "grad_norm": 24.24842643737793, "learning_rate": 1.7733112033195022e-05, "loss": 1.6642, "step": 6835 }, { "epoch": 5.673029045643154, "grad_norm": 14.1952486038208, "learning_rate": 1.7732780082987554e-05, "loss": 0.9461, "step": 6836 }, { "epoch": 5.673858921161826, "grad_norm": 15.477716445922852, "learning_rate": 1.7732448132780083e-05, "loss": 0.9325, "step": 6837 }, { "epoch": 5.674688796680498, "grad_norm": 14.947596549987793, "learning_rate": 1.7732116182572615e-05, "loss": 1.1933, "step": 6838 }, { "epoch": 5.67551867219917, "grad_norm": 27.178144454956055, "learning_rate": 1.7731784232365147e-05, "loss": 1.0579, "step": 6839 }, { "epoch": 5.676348547717843, "grad_norm": 14.446019172668457, "learning_rate": 1.7731452282157676e-05, "loss": 1.2129, "step": 6840 }, { "epoch": 5.677178423236515, "grad_norm": 27.037456512451172, "learning_rate": 1.7731120331950208e-05, "loss": 2.0578, "step": 6841 }, { "epoch": 5.678008298755187, "grad_norm": 14.217062950134277, "learning_rate": 1.773078838174274e-05, "loss": 1.2142, "step": 6842 }, { "epoch": 5.678838174273859, "grad_norm": 18.824989318847656, "learning_rate": 1.773045643153527e-05, "loss": 1.4589, "step": 6843 }, { "epoch": 5.679668049792531, "grad_norm": 14.006403923034668, "learning_rate": 1.77301244813278e-05, "loss": 1.2923, "step": 6844 }, { "epoch": 5.680497925311204, "grad_norm": 13.165909767150879, "learning_rate": 1.7729792531120333e-05, "loss": 0.898, "step": 6845 }, { "epoch": 5.681327800829876, "grad_norm": 24.584360122680664, "learning_rate": 1.7729460580912865e-05, "loss": 2.2202, "step": 6846 }, { "epoch": 5.682157676348548, "grad_norm": 20.925857543945312, "learning_rate": 1.7729128630705397e-05, "loss": 1.5411, "step": 6847 }, { "epoch": 5.68298755186722, "grad_norm": 14.978717803955078, "learning_rate": 1.7728796680497926e-05, "loss": 1.3196, "step": 6848 }, { "epoch": 5.683817427385892, "grad_norm": 13.689802169799805, "learning_rate": 1.7728464730290458e-05, "loss": 0.8242, "step": 6849 }, { "epoch": 5.6846473029045645, "grad_norm": 14.525731086730957, "learning_rate": 1.772813278008299e-05, "loss": 1.3924, "step": 6850 }, { "epoch": 5.685477178423237, "grad_norm": 11.315899848937988, "learning_rate": 1.7727800829875522e-05, "loss": 0.918, "step": 6851 }, { "epoch": 5.686307053941909, "grad_norm": 21.594661712646484, "learning_rate": 1.772746887966805e-05, "loss": 1.4621, "step": 6852 }, { "epoch": 5.687136929460581, "grad_norm": 14.969995498657227, "learning_rate": 1.7727136929460583e-05, "loss": 1.0213, "step": 6853 }, { "epoch": 5.687966804979253, "grad_norm": 9.614224433898926, "learning_rate": 1.7726804979253115e-05, "loss": 0.8046, "step": 6854 }, { "epoch": 5.6887966804979255, "grad_norm": 13.634926795959473, "learning_rate": 1.7726473029045644e-05, "loss": 1.4074, "step": 6855 }, { "epoch": 5.689626556016598, "grad_norm": 20.189674377441406, "learning_rate": 1.7726141078838176e-05, "loss": 1.6718, "step": 6856 }, { "epoch": 5.69045643153527, "grad_norm": 11.53969955444336, "learning_rate": 1.7725809128630708e-05, "loss": 0.8254, "step": 6857 }, { "epoch": 5.691286307053942, "grad_norm": 15.29910659790039, "learning_rate": 1.7725477178423237e-05, "loss": 1.7119, "step": 6858 }, { "epoch": 5.692116182572614, "grad_norm": 19.129812240600586, "learning_rate": 1.772514522821577e-05, "loss": 1.167, "step": 6859 }, { "epoch": 5.6929460580912865, "grad_norm": 17.759174346923828, "learning_rate": 1.7724813278008298e-05, "loss": 1.3246, "step": 6860 }, { "epoch": 5.693775933609959, "grad_norm": 14.811552047729492, "learning_rate": 1.772448132780083e-05, "loss": 1.3336, "step": 6861 }, { "epoch": 5.694605809128631, "grad_norm": 17.150407791137695, "learning_rate": 1.7724149377593362e-05, "loss": 1.1186, "step": 6862 }, { "epoch": 5.695435684647303, "grad_norm": 35.96094512939453, "learning_rate": 1.7723817427385894e-05, "loss": 2.7994, "step": 6863 }, { "epoch": 5.696265560165975, "grad_norm": 12.54182243347168, "learning_rate": 1.7723485477178423e-05, "loss": 1.2929, "step": 6864 }, { "epoch": 5.6970954356846475, "grad_norm": 19.665027618408203, "learning_rate": 1.7723153526970955e-05, "loss": 1.8902, "step": 6865 }, { "epoch": 5.69792531120332, "grad_norm": 22.824716567993164, "learning_rate": 1.7722821576763487e-05, "loss": 1.164, "step": 6866 }, { "epoch": 5.698755186721992, "grad_norm": 21.835725784301758, "learning_rate": 1.772248962655602e-05, "loss": 1.5068, "step": 6867 }, { "epoch": 5.699585062240664, "grad_norm": 10.661690711975098, "learning_rate": 1.7722157676348548e-05, "loss": 0.8235, "step": 6868 }, { "epoch": 5.700414937759336, "grad_norm": 20.687705993652344, "learning_rate": 1.772182572614108e-05, "loss": 1.7914, "step": 6869 }, { "epoch": 5.7012448132780085, "grad_norm": 14.210023880004883, "learning_rate": 1.7721493775933612e-05, "loss": 1.2917, "step": 6870 }, { "epoch": 5.702074688796681, "grad_norm": 16.406787872314453, "learning_rate": 1.7721161825726144e-05, "loss": 1.5354, "step": 6871 }, { "epoch": 5.702904564315353, "grad_norm": 15.917618751525879, "learning_rate": 1.7720829875518676e-05, "loss": 1.5782, "step": 6872 }, { "epoch": 5.703734439834025, "grad_norm": 17.664369583129883, "learning_rate": 1.7720497925311205e-05, "loss": 1.2421, "step": 6873 }, { "epoch": 5.704564315352697, "grad_norm": 19.052583694458008, "learning_rate": 1.7720165975103737e-05, "loss": 1.0287, "step": 6874 }, { "epoch": 5.7053941908713695, "grad_norm": 17.89025115966797, "learning_rate": 1.7719834024896266e-05, "loss": 1.5094, "step": 6875 }, { "epoch": 5.706224066390042, "grad_norm": 13.668529510498047, "learning_rate": 1.7719502074688798e-05, "loss": 1.2467, "step": 6876 }, { "epoch": 5.707053941908714, "grad_norm": 14.500505447387695, "learning_rate": 1.771917012448133e-05, "loss": 1.1078, "step": 6877 }, { "epoch": 5.707883817427386, "grad_norm": 15.398799896240234, "learning_rate": 1.771883817427386e-05, "loss": 1.2135, "step": 6878 }, { "epoch": 5.708713692946058, "grad_norm": 20.739593505859375, "learning_rate": 1.771850622406639e-05, "loss": 1.356, "step": 6879 }, { "epoch": 5.70954356846473, "grad_norm": 24.243566513061523, "learning_rate": 1.7718174273858923e-05, "loss": 1.2907, "step": 6880 }, { "epoch": 5.710373443983403, "grad_norm": 13.453896522521973, "learning_rate": 1.771784232365145e-05, "loss": 1.0267, "step": 6881 }, { "epoch": 5.711203319502075, "grad_norm": 19.406312942504883, "learning_rate": 1.7717510373443984e-05, "loss": 1.8517, "step": 6882 }, { "epoch": 5.712033195020747, "grad_norm": 14.964097023010254, "learning_rate": 1.7717178423236516e-05, "loss": 1.3808, "step": 6883 }, { "epoch": 5.712863070539419, "grad_norm": 12.308141708374023, "learning_rate": 1.7716846473029048e-05, "loss": 1.058, "step": 6884 }, { "epoch": 5.713692946058091, "grad_norm": 23.624719619750977, "learning_rate": 1.7716514522821577e-05, "loss": 1.9808, "step": 6885 }, { "epoch": 5.714522821576764, "grad_norm": 16.789936065673828, "learning_rate": 1.771618257261411e-05, "loss": 1.6796, "step": 6886 }, { "epoch": 5.715352697095436, "grad_norm": 23.055654525756836, "learning_rate": 1.771585062240664e-05, "loss": 1.7757, "step": 6887 }, { "epoch": 5.716182572614108, "grad_norm": 38.45240020751953, "learning_rate": 1.7715518672199173e-05, "loss": 1.234, "step": 6888 }, { "epoch": 5.71701244813278, "grad_norm": 12.246204376220703, "learning_rate": 1.7715186721991702e-05, "loss": 0.7275, "step": 6889 }, { "epoch": 5.717842323651452, "grad_norm": 23.3117733001709, "learning_rate": 1.7714854771784234e-05, "loss": 0.9993, "step": 6890 }, { "epoch": 5.718672199170125, "grad_norm": 32.94845962524414, "learning_rate": 1.7714522821576766e-05, "loss": 1.299, "step": 6891 }, { "epoch": 5.719502074688797, "grad_norm": 18.960935592651367, "learning_rate": 1.7714190871369298e-05, "loss": 0.9777, "step": 6892 }, { "epoch": 5.720331950207469, "grad_norm": 17.651348114013672, "learning_rate": 1.7713858921161827e-05, "loss": 1.8866, "step": 6893 }, { "epoch": 5.721161825726141, "grad_norm": 11.9131498336792, "learning_rate": 1.771352697095436e-05, "loss": 0.8896, "step": 6894 }, { "epoch": 5.721991701244813, "grad_norm": 14.603504180908203, "learning_rate": 1.771319502074689e-05, "loss": 1.5131, "step": 6895 }, { "epoch": 5.722821576763486, "grad_norm": 11.576617240905762, "learning_rate": 1.771286307053942e-05, "loss": 0.9995, "step": 6896 }, { "epoch": 5.723651452282158, "grad_norm": 17.647436141967773, "learning_rate": 1.7712531120331952e-05, "loss": 1.4574, "step": 6897 }, { "epoch": 5.72448132780083, "grad_norm": 13.283679008483887, "learning_rate": 1.771219917012448e-05, "loss": 1.2115, "step": 6898 }, { "epoch": 5.725311203319502, "grad_norm": 21.06037712097168, "learning_rate": 1.7711867219917013e-05, "loss": 1.9539, "step": 6899 }, { "epoch": 5.726141078838174, "grad_norm": 21.023012161254883, "learning_rate": 1.7711535269709545e-05, "loss": 1.463, "step": 6900 }, { "epoch": 5.7269709543568466, "grad_norm": 19.89061164855957, "learning_rate": 1.7711203319502077e-05, "loss": 1.7824, "step": 6901 }, { "epoch": 5.727800829875519, "grad_norm": 11.840011596679688, "learning_rate": 1.7710871369294606e-05, "loss": 1.0148, "step": 6902 }, { "epoch": 5.728630705394191, "grad_norm": 14.57512378692627, "learning_rate": 1.7710539419087138e-05, "loss": 1.0625, "step": 6903 }, { "epoch": 5.729460580912863, "grad_norm": 16.26210594177246, "learning_rate": 1.771020746887967e-05, "loss": 1.1389, "step": 6904 }, { "epoch": 5.730290456431535, "grad_norm": 28.240081787109375, "learning_rate": 1.7709875518672202e-05, "loss": 1.3967, "step": 6905 }, { "epoch": 5.7311203319502075, "grad_norm": 15.023911476135254, "learning_rate": 1.770954356846473e-05, "loss": 1.3678, "step": 6906 }, { "epoch": 5.73195020746888, "grad_norm": 12.692961692810059, "learning_rate": 1.7709211618257263e-05, "loss": 1.2535, "step": 6907 }, { "epoch": 5.732780082987552, "grad_norm": 16.630842208862305, "learning_rate": 1.7708879668049795e-05, "loss": 1.3861, "step": 6908 }, { "epoch": 5.733609958506224, "grad_norm": 23.052318572998047, "learning_rate": 1.7708547717842327e-05, "loss": 1.3775, "step": 6909 }, { "epoch": 5.734439834024896, "grad_norm": 27.871227264404297, "learning_rate": 1.7708215767634856e-05, "loss": 1.8892, "step": 6910 }, { "epoch": 5.7352697095435685, "grad_norm": 11.608318328857422, "learning_rate": 1.7707883817427388e-05, "loss": 1.0238, "step": 6911 }, { "epoch": 5.736099585062241, "grad_norm": 12.219297409057617, "learning_rate": 1.770755186721992e-05, "loss": 0.8439, "step": 6912 }, { "epoch": 5.736929460580913, "grad_norm": 11.50914192199707, "learning_rate": 1.7707219917012452e-05, "loss": 0.8991, "step": 6913 }, { "epoch": 5.737759336099585, "grad_norm": 21.67902374267578, "learning_rate": 1.770688796680498e-05, "loss": 1.7194, "step": 6914 }, { "epoch": 5.738589211618257, "grad_norm": 18.168176651000977, "learning_rate": 1.7706556016597513e-05, "loss": 1.4444, "step": 6915 }, { "epoch": 5.7394190871369295, "grad_norm": 18.047456741333008, "learning_rate": 1.770622406639004e-05, "loss": 1.1746, "step": 6916 }, { "epoch": 5.740248962655602, "grad_norm": 10.918142318725586, "learning_rate": 1.7705892116182574e-05, "loss": 0.833, "step": 6917 }, { "epoch": 5.741078838174274, "grad_norm": 18.0733585357666, "learning_rate": 1.7705560165975106e-05, "loss": 1.0655, "step": 6918 }, { "epoch": 5.741908713692946, "grad_norm": 18.477123260498047, "learning_rate": 1.7705228215767634e-05, "loss": 1.0839, "step": 6919 }, { "epoch": 5.742738589211618, "grad_norm": 21.704444885253906, "learning_rate": 1.7704896265560167e-05, "loss": 1.7601, "step": 6920 }, { "epoch": 5.7435684647302905, "grad_norm": 15.04190731048584, "learning_rate": 1.77045643153527e-05, "loss": 0.9869, "step": 6921 }, { "epoch": 5.744398340248963, "grad_norm": 33.93878173828125, "learning_rate": 1.7704232365145227e-05, "loss": 2.2033, "step": 6922 }, { "epoch": 5.745228215767635, "grad_norm": 9.961410522460938, "learning_rate": 1.770390041493776e-05, "loss": 1.0283, "step": 6923 }, { "epoch": 5.746058091286307, "grad_norm": 15.50655746459961, "learning_rate": 1.770356846473029e-05, "loss": 1.0753, "step": 6924 }, { "epoch": 5.746887966804979, "grad_norm": 25.90603256225586, "learning_rate": 1.7703236514522824e-05, "loss": 1.8607, "step": 6925 }, { "epoch": 5.7477178423236515, "grad_norm": 16.926877975463867, "learning_rate": 1.7702904564315356e-05, "loss": 1.2992, "step": 6926 }, { "epoch": 5.748547717842324, "grad_norm": 24.7392578125, "learning_rate": 1.7702572614107885e-05, "loss": 1.6206, "step": 6927 }, { "epoch": 5.749377593360996, "grad_norm": 11.98061752319336, "learning_rate": 1.7702240663900417e-05, "loss": 0.8306, "step": 6928 }, { "epoch": 5.750207468879668, "grad_norm": 15.37975025177002, "learning_rate": 1.770190871369295e-05, "loss": 1.0716, "step": 6929 }, { "epoch": 5.75103734439834, "grad_norm": 23.845924377441406, "learning_rate": 1.770157676348548e-05, "loss": 1.9247, "step": 6930 }, { "epoch": 5.751867219917012, "grad_norm": 20.037086486816406, "learning_rate": 1.770124481327801e-05, "loss": 1.3195, "step": 6931 }, { "epoch": 5.752697095435685, "grad_norm": 16.63387680053711, "learning_rate": 1.7700912863070542e-05, "loss": 1.3874, "step": 6932 }, { "epoch": 5.753526970954357, "grad_norm": 14.651904106140137, "learning_rate": 1.7700580912863074e-05, "loss": 1.3217, "step": 6933 }, { "epoch": 5.754356846473029, "grad_norm": 15.184408187866211, "learning_rate": 1.7700248962655603e-05, "loss": 1.6385, "step": 6934 }, { "epoch": 5.755186721991701, "grad_norm": 22.013456344604492, "learning_rate": 1.7699917012448135e-05, "loss": 1.3639, "step": 6935 }, { "epoch": 5.756016597510373, "grad_norm": 18.599369049072266, "learning_rate": 1.7699585062240667e-05, "loss": 1.4105, "step": 6936 }, { "epoch": 5.756846473029046, "grad_norm": 18.054725646972656, "learning_rate": 1.7699253112033195e-05, "loss": 1.745, "step": 6937 }, { "epoch": 5.757676348547718, "grad_norm": 21.063709259033203, "learning_rate": 1.7698921161825728e-05, "loss": 1.5403, "step": 6938 }, { "epoch": 5.75850622406639, "grad_norm": 12.146917343139648, "learning_rate": 1.7698589211618256e-05, "loss": 1.0521, "step": 6939 }, { "epoch": 5.759336099585062, "grad_norm": 15.66569709777832, "learning_rate": 1.769825726141079e-05, "loss": 1.4294, "step": 6940 }, { "epoch": 5.760165975103734, "grad_norm": 25.960582733154297, "learning_rate": 1.769792531120332e-05, "loss": 1.5735, "step": 6941 }, { "epoch": 5.760995850622407, "grad_norm": 11.283007621765137, "learning_rate": 1.7697593360995853e-05, "loss": 1.2887, "step": 6942 }, { "epoch": 5.761825726141079, "grad_norm": 18.59845733642578, "learning_rate": 1.769726141078838e-05, "loss": 1.2094, "step": 6943 }, { "epoch": 5.762655601659751, "grad_norm": 13.194211959838867, "learning_rate": 1.7696929460580913e-05, "loss": 1.1566, "step": 6944 }, { "epoch": 5.763485477178423, "grad_norm": 32.317787170410156, "learning_rate": 1.7696597510373446e-05, "loss": 1.0702, "step": 6945 }, { "epoch": 5.764315352697095, "grad_norm": 14.639854431152344, "learning_rate": 1.7696265560165978e-05, "loss": 1.3562, "step": 6946 }, { "epoch": 5.765145228215768, "grad_norm": 13.92771053314209, "learning_rate": 1.7695933609958506e-05, "loss": 0.9571, "step": 6947 }, { "epoch": 5.76597510373444, "grad_norm": 13.607473373413086, "learning_rate": 1.769560165975104e-05, "loss": 1.4917, "step": 6948 }, { "epoch": 5.766804979253112, "grad_norm": 12.512898445129395, "learning_rate": 1.769526970954357e-05, "loss": 1.1199, "step": 6949 }, { "epoch": 5.767634854771784, "grad_norm": 14.021449089050293, "learning_rate": 1.7694937759336103e-05, "loss": 1.3583, "step": 6950 }, { "epoch": 5.768464730290456, "grad_norm": 15.258354187011719, "learning_rate": 1.7694605809128635e-05, "loss": 1.2943, "step": 6951 }, { "epoch": 5.769294605809129, "grad_norm": 30.04123878479004, "learning_rate": 1.7694273858921164e-05, "loss": 1.2934, "step": 6952 }, { "epoch": 5.770124481327801, "grad_norm": 17.66364860534668, "learning_rate": 1.7693941908713696e-05, "loss": 0.8453, "step": 6953 }, { "epoch": 5.770954356846473, "grad_norm": 18.422122955322266, "learning_rate": 1.7693609958506224e-05, "loss": 1.4736, "step": 6954 }, { "epoch": 5.771784232365145, "grad_norm": 12.646401405334473, "learning_rate": 1.7693278008298756e-05, "loss": 1.0011, "step": 6955 }, { "epoch": 5.772614107883817, "grad_norm": 15.642467498779297, "learning_rate": 1.769294605809129e-05, "loss": 1.1971, "step": 6956 }, { "epoch": 5.7734439834024895, "grad_norm": 16.504404067993164, "learning_rate": 1.7692614107883817e-05, "loss": 1.3379, "step": 6957 }, { "epoch": 5.774273858921162, "grad_norm": 13.54442310333252, "learning_rate": 1.769228215767635e-05, "loss": 1.4229, "step": 6958 }, { "epoch": 5.775103734439834, "grad_norm": 10.19638729095459, "learning_rate": 1.769195020746888e-05, "loss": 0.91, "step": 6959 }, { "epoch": 5.775933609958506, "grad_norm": 27.819862365722656, "learning_rate": 1.769161825726141e-05, "loss": 2.0285, "step": 6960 }, { "epoch": 5.776763485477178, "grad_norm": 25.590423583984375, "learning_rate": 1.7691286307053942e-05, "loss": 1.0791, "step": 6961 }, { "epoch": 5.7775933609958505, "grad_norm": 18.111059188842773, "learning_rate": 1.7690954356846474e-05, "loss": 1.6213, "step": 6962 }, { "epoch": 5.778423236514523, "grad_norm": 28.10150718688965, "learning_rate": 1.7690622406639007e-05, "loss": 1.6094, "step": 6963 }, { "epoch": 5.779253112033195, "grad_norm": 18.822893142700195, "learning_rate": 1.7690290456431535e-05, "loss": 1.1506, "step": 6964 }, { "epoch": 5.780082987551867, "grad_norm": 16.34459686279297, "learning_rate": 1.7689958506224067e-05, "loss": 1.8974, "step": 6965 }, { "epoch": 5.780912863070539, "grad_norm": 21.76677703857422, "learning_rate": 1.76896265560166e-05, "loss": 1.7493, "step": 6966 }, { "epoch": 5.7817427385892115, "grad_norm": 14.674230575561523, "learning_rate": 1.768929460580913e-05, "loss": 1.0044, "step": 6967 }, { "epoch": 5.782572614107884, "grad_norm": 25.993690490722656, "learning_rate": 1.768896265560166e-05, "loss": 1.4981, "step": 6968 }, { "epoch": 5.783402489626556, "grad_norm": 14.720545768737793, "learning_rate": 1.7688630705394192e-05, "loss": 1.3265, "step": 6969 }, { "epoch": 5.784232365145228, "grad_norm": 8.017705917358398, "learning_rate": 1.7688298755186725e-05, "loss": 0.6479, "step": 6970 }, { "epoch": 5.7850622406639, "grad_norm": 13.165960311889648, "learning_rate": 1.7687966804979257e-05, "loss": 0.984, "step": 6971 }, { "epoch": 5.7858921161825725, "grad_norm": 21.93701171875, "learning_rate": 1.7687634854771785e-05, "loss": 2.2713, "step": 6972 }, { "epoch": 5.786721991701245, "grad_norm": 14.724339485168457, "learning_rate": 1.7687302904564317e-05, "loss": 1.3399, "step": 6973 }, { "epoch": 5.787551867219917, "grad_norm": 17.272233963012695, "learning_rate": 1.768697095435685e-05, "loss": 1.1881, "step": 6974 }, { "epoch": 5.788381742738589, "grad_norm": 11.653136253356934, "learning_rate": 1.768663900414938e-05, "loss": 0.6076, "step": 6975 }, { "epoch": 5.789211618257261, "grad_norm": 17.459440231323242, "learning_rate": 1.768630705394191e-05, "loss": 1.3729, "step": 6976 }, { "epoch": 5.7900414937759335, "grad_norm": 23.095447540283203, "learning_rate": 1.768597510373444e-05, "loss": 1.2018, "step": 6977 }, { "epoch": 5.790871369294606, "grad_norm": 11.974332809448242, "learning_rate": 1.768564315352697e-05, "loss": 0.8857, "step": 6978 }, { "epoch": 5.791701244813278, "grad_norm": 13.937519073486328, "learning_rate": 1.7685311203319503e-05, "loss": 1.0756, "step": 6979 }, { "epoch": 5.79253112033195, "grad_norm": 31.746055603027344, "learning_rate": 1.7684979253112035e-05, "loss": 1.3694, "step": 6980 }, { "epoch": 5.793360995850622, "grad_norm": 13.329341888427734, "learning_rate": 1.7684647302904564e-05, "loss": 1.0977, "step": 6981 }, { "epoch": 5.7941908713692944, "grad_norm": 16.625492095947266, "learning_rate": 1.7684315352697096e-05, "loss": 1.4121, "step": 6982 }, { "epoch": 5.795020746887967, "grad_norm": 24.399797439575195, "learning_rate": 1.768398340248963e-05, "loss": 1.3307, "step": 6983 }, { "epoch": 5.795850622406639, "grad_norm": 20.43294334411621, "learning_rate": 1.768365145228216e-05, "loss": 1.2817, "step": 6984 }, { "epoch": 5.796680497925311, "grad_norm": 15.403223991394043, "learning_rate": 1.768331950207469e-05, "loss": 0.5331, "step": 6985 }, { "epoch": 5.797510373443983, "grad_norm": 12.336689949035645, "learning_rate": 1.768298755186722e-05, "loss": 0.7784, "step": 6986 }, { "epoch": 5.798340248962655, "grad_norm": 16.038267135620117, "learning_rate": 1.7682655601659753e-05, "loss": 1.2896, "step": 6987 }, { "epoch": 5.799170124481328, "grad_norm": 15.828507423400879, "learning_rate": 1.7682323651452286e-05, "loss": 1.0261, "step": 6988 }, { "epoch": 5.8, "grad_norm": 14.49508285522461, "learning_rate": 1.7681991701244814e-05, "loss": 0.8471, "step": 6989 }, { "epoch": 5.800829875518672, "grad_norm": 21.717283248901367, "learning_rate": 1.7681659751037346e-05, "loss": 1.0394, "step": 6990 }, { "epoch": 5.801659751037344, "grad_norm": 18.467592239379883, "learning_rate": 1.768132780082988e-05, "loss": 1.1336, "step": 6991 }, { "epoch": 5.802489626556016, "grad_norm": 21.674882888793945, "learning_rate": 1.7680995850622407e-05, "loss": 1.3514, "step": 6992 }, { "epoch": 5.803319502074689, "grad_norm": 17.459266662597656, "learning_rate": 1.768066390041494e-05, "loss": 1.2519, "step": 6993 }, { "epoch": 5.804149377593361, "grad_norm": 25.463172912597656, "learning_rate": 1.768033195020747e-05, "loss": 1.6995, "step": 6994 }, { "epoch": 5.804979253112033, "grad_norm": 15.626138687133789, "learning_rate": 1.768e-05, "loss": 1.052, "step": 6995 }, { "epoch": 5.805809128630705, "grad_norm": 16.999324798583984, "learning_rate": 1.7679668049792532e-05, "loss": 1.4375, "step": 6996 }, { "epoch": 5.806639004149377, "grad_norm": 18.77042579650879, "learning_rate": 1.7679336099585064e-05, "loss": 1.1616, "step": 6997 }, { "epoch": 5.80746887966805, "grad_norm": 18.31747055053711, "learning_rate": 1.7679004149377593e-05, "loss": 1.3627, "step": 6998 }, { "epoch": 5.808298755186722, "grad_norm": 15.119807243347168, "learning_rate": 1.7678672199170125e-05, "loss": 1.2499, "step": 6999 }, { "epoch": 5.809128630705394, "grad_norm": 14.668256759643555, "learning_rate": 1.7678340248962657e-05, "loss": 1.1326, "step": 7000 }, { "epoch": 5.809958506224066, "grad_norm": 11.379485130310059, "learning_rate": 1.7678008298755186e-05, "loss": 1.0661, "step": 7001 }, { "epoch": 5.810788381742738, "grad_norm": 16.244081497192383, "learning_rate": 1.7677676348547718e-05, "loss": 1.2903, "step": 7002 }, { "epoch": 5.811618257261411, "grad_norm": 9.842263221740723, "learning_rate": 1.767734439834025e-05, "loss": 0.7903, "step": 7003 }, { "epoch": 5.812448132780083, "grad_norm": 22.44036102294922, "learning_rate": 1.7677012448132782e-05, "loss": 1.3612, "step": 7004 }, { "epoch": 5.813278008298755, "grad_norm": 23.884418487548828, "learning_rate": 1.7676680497925314e-05, "loss": 1.4861, "step": 7005 }, { "epoch": 5.814107883817427, "grad_norm": 18.658998489379883, "learning_rate": 1.7676348547717843e-05, "loss": 1.3015, "step": 7006 }, { "epoch": 5.814937759336099, "grad_norm": 10.56731128692627, "learning_rate": 1.7676016597510375e-05, "loss": 0.9662, "step": 7007 }, { "epoch": 5.8157676348547716, "grad_norm": 11.759878158569336, "learning_rate": 1.7675684647302907e-05, "loss": 1.0283, "step": 7008 }, { "epoch": 5.816597510373444, "grad_norm": 11.584057807922363, "learning_rate": 1.767535269709544e-05, "loss": 0.9551, "step": 7009 }, { "epoch": 5.817427385892116, "grad_norm": 26.233356475830078, "learning_rate": 1.7675020746887968e-05, "loss": 2.3242, "step": 7010 }, { "epoch": 5.818257261410788, "grad_norm": 19.13241958618164, "learning_rate": 1.76746887966805e-05, "loss": 1.8003, "step": 7011 }, { "epoch": 5.81908713692946, "grad_norm": 10.673068046569824, "learning_rate": 1.7674356846473032e-05, "loss": 0.4543, "step": 7012 }, { "epoch": 5.8199170124481325, "grad_norm": 14.756726264953613, "learning_rate": 1.767402489626556e-05, "loss": 1.5416, "step": 7013 }, { "epoch": 5.820746887966805, "grad_norm": 14.079641342163086, "learning_rate": 1.7673692946058093e-05, "loss": 1.0143, "step": 7014 }, { "epoch": 5.821576763485477, "grad_norm": 12.752894401550293, "learning_rate": 1.7673360995850622e-05, "loss": 1.2049, "step": 7015 }, { "epoch": 5.822406639004149, "grad_norm": 13.56313419342041, "learning_rate": 1.7673029045643154e-05, "loss": 0.8595, "step": 7016 }, { "epoch": 5.823236514522821, "grad_norm": 21.039615631103516, "learning_rate": 1.7672697095435686e-05, "loss": 1.1228, "step": 7017 }, { "epoch": 5.8240663900414935, "grad_norm": 29.137624740600586, "learning_rate": 1.7672365145228215e-05, "loss": 1.0999, "step": 7018 }, { "epoch": 5.824896265560166, "grad_norm": 19.978307723999023, "learning_rate": 1.7672033195020747e-05, "loss": 1.526, "step": 7019 }, { "epoch": 5.825726141078838, "grad_norm": 25.6578369140625, "learning_rate": 1.767170124481328e-05, "loss": 1.0208, "step": 7020 }, { "epoch": 5.82655601659751, "grad_norm": 13.381197929382324, "learning_rate": 1.767136929460581e-05, "loss": 0.8979, "step": 7021 }, { "epoch": 5.827385892116182, "grad_norm": 12.009444236755371, "learning_rate": 1.767103734439834e-05, "loss": 1.0989, "step": 7022 }, { "epoch": 5.8282157676348545, "grad_norm": 24.17964744567871, "learning_rate": 1.7670705394190872e-05, "loss": 1.8871, "step": 7023 }, { "epoch": 5.829045643153527, "grad_norm": 17.39523696899414, "learning_rate": 1.7670373443983404e-05, "loss": 0.8605, "step": 7024 }, { "epoch": 5.829875518672199, "grad_norm": 17.596250534057617, "learning_rate": 1.7670041493775936e-05, "loss": 0.9093, "step": 7025 }, { "epoch": 5.830705394190871, "grad_norm": 15.388278007507324, "learning_rate": 1.7669709543568465e-05, "loss": 1.1471, "step": 7026 }, { "epoch": 5.831535269709543, "grad_norm": 24.71419334411621, "learning_rate": 1.7669377593360997e-05, "loss": 1.6853, "step": 7027 }, { "epoch": 5.8323651452282155, "grad_norm": 34.35791015625, "learning_rate": 1.766904564315353e-05, "loss": 2.259, "step": 7028 }, { "epoch": 5.833195020746888, "grad_norm": 12.920080184936523, "learning_rate": 1.766871369294606e-05, "loss": 1.3255, "step": 7029 }, { "epoch": 5.83402489626556, "grad_norm": 15.649923324584961, "learning_rate": 1.7668381742738593e-05, "loss": 1.7221, "step": 7030 }, { "epoch": 5.834854771784232, "grad_norm": 11.275917053222656, "learning_rate": 1.7668049792531122e-05, "loss": 0.816, "step": 7031 }, { "epoch": 5.835684647302904, "grad_norm": 34.5982780456543, "learning_rate": 1.7667717842323654e-05, "loss": 2.1163, "step": 7032 }, { "epoch": 5.8365145228215765, "grad_norm": 11.230049133300781, "learning_rate": 1.7667385892116183e-05, "loss": 1.1254, "step": 7033 }, { "epoch": 5.837344398340249, "grad_norm": 20.072221755981445, "learning_rate": 1.7667053941908715e-05, "loss": 1.558, "step": 7034 }, { "epoch": 5.838174273858921, "grad_norm": 17.18634605407715, "learning_rate": 1.7666721991701247e-05, "loss": 1.5468, "step": 7035 }, { "epoch": 5.839004149377593, "grad_norm": 15.392000198364258, "learning_rate": 1.7666390041493776e-05, "loss": 0.8896, "step": 7036 }, { "epoch": 5.839834024896265, "grad_norm": 19.61406707763672, "learning_rate": 1.7666058091286308e-05, "loss": 1.1571, "step": 7037 }, { "epoch": 5.840663900414937, "grad_norm": 14.502376556396484, "learning_rate": 1.766572614107884e-05, "loss": 1.0823, "step": 7038 }, { "epoch": 5.84149377593361, "grad_norm": 21.179828643798828, "learning_rate": 1.766539419087137e-05, "loss": 1.7938, "step": 7039 }, { "epoch": 5.842323651452282, "grad_norm": 14.421797752380371, "learning_rate": 1.76650622406639e-05, "loss": 1.4655, "step": 7040 }, { "epoch": 5.843153526970954, "grad_norm": 13.79029369354248, "learning_rate": 1.7664730290456433e-05, "loss": 1.6259, "step": 7041 }, { "epoch": 5.843983402489626, "grad_norm": 17.248035430908203, "learning_rate": 1.7664398340248965e-05, "loss": 1.1198, "step": 7042 }, { "epoch": 5.844813278008298, "grad_norm": 28.713212966918945, "learning_rate": 1.7664066390041494e-05, "loss": 1.5832, "step": 7043 }, { "epoch": 5.845643153526971, "grad_norm": 20.02271270751953, "learning_rate": 1.7663734439834026e-05, "loss": 0.8864, "step": 7044 }, { "epoch": 5.846473029045643, "grad_norm": 12.143152236938477, "learning_rate": 1.7663402489626558e-05, "loss": 1.1111, "step": 7045 }, { "epoch": 5.847302904564315, "grad_norm": 13.98454475402832, "learning_rate": 1.766307053941909e-05, "loss": 1.3282, "step": 7046 }, { "epoch": 5.848132780082987, "grad_norm": 16.11309242248535, "learning_rate": 1.766273858921162e-05, "loss": 1.386, "step": 7047 }, { "epoch": 5.848962655601659, "grad_norm": 12.336915016174316, "learning_rate": 1.766240663900415e-05, "loss": 1.3622, "step": 7048 }, { "epoch": 5.849792531120332, "grad_norm": 21.646953582763672, "learning_rate": 1.7662074688796683e-05, "loss": 1.7959, "step": 7049 }, { "epoch": 5.850622406639004, "grad_norm": 16.625673294067383, "learning_rate": 1.7661742738589215e-05, "loss": 1.4035, "step": 7050 }, { "epoch": 5.851452282157676, "grad_norm": 24.638519287109375, "learning_rate": 1.7661410788381744e-05, "loss": 2.4926, "step": 7051 }, { "epoch": 5.852282157676348, "grad_norm": 24.22650146484375, "learning_rate": 1.7661078838174276e-05, "loss": 1.9977, "step": 7052 }, { "epoch": 5.85311203319502, "grad_norm": 20.59954833984375, "learning_rate": 1.7660746887966808e-05, "loss": 1.3898, "step": 7053 }, { "epoch": 5.853941908713693, "grad_norm": 13.949178695678711, "learning_rate": 1.7660414937759337e-05, "loss": 1.1329, "step": 7054 }, { "epoch": 5.854771784232365, "grad_norm": 15.554262161254883, "learning_rate": 1.766008298755187e-05, "loss": 1.5687, "step": 7055 }, { "epoch": 5.855601659751037, "grad_norm": 20.48255729675293, "learning_rate": 1.7659751037344398e-05, "loss": 1.1229, "step": 7056 }, { "epoch": 5.856431535269709, "grad_norm": 20.878849029541016, "learning_rate": 1.765941908713693e-05, "loss": 1.1153, "step": 7057 }, { "epoch": 5.857261410788381, "grad_norm": 14.647618293762207, "learning_rate": 1.7659087136929462e-05, "loss": 1.076, "step": 7058 }, { "epoch": 5.858091286307054, "grad_norm": 16.1226863861084, "learning_rate": 1.7658755186721994e-05, "loss": 1.1452, "step": 7059 }, { "epoch": 5.858921161825726, "grad_norm": 13.937505722045898, "learning_rate": 1.7658423236514523e-05, "loss": 0.7808, "step": 7060 }, { "epoch": 5.859751037344399, "grad_norm": 10.563716888427734, "learning_rate": 1.7658091286307055e-05, "loss": 1.1676, "step": 7061 }, { "epoch": 5.860580912863071, "grad_norm": 16.875551223754883, "learning_rate": 1.7657759336099587e-05, "loss": 1.6797, "step": 7062 }, { "epoch": 5.861410788381743, "grad_norm": 25.03103256225586, "learning_rate": 1.765742738589212e-05, "loss": 1.0571, "step": 7063 }, { "epoch": 5.862240663900415, "grad_norm": 20.174781799316406, "learning_rate": 1.7657095435684648e-05, "loss": 1.2651, "step": 7064 }, { "epoch": 5.863070539419088, "grad_norm": 14.990395545959473, "learning_rate": 1.765676348547718e-05, "loss": 0.9781, "step": 7065 }, { "epoch": 5.86390041493776, "grad_norm": 15.428019523620605, "learning_rate": 1.7656431535269712e-05, "loss": 1.2256, "step": 7066 }, { "epoch": 5.864730290456432, "grad_norm": 18.931713104248047, "learning_rate": 1.7656099585062244e-05, "loss": 1.1661, "step": 7067 }, { "epoch": 5.865560165975104, "grad_norm": 11.866369247436523, "learning_rate": 1.7655767634854773e-05, "loss": 0.8299, "step": 7068 }, { "epoch": 5.866390041493776, "grad_norm": 14.238797187805176, "learning_rate": 1.7655435684647305e-05, "loss": 0.6959, "step": 7069 }, { "epoch": 5.867219917012449, "grad_norm": 19.444377899169922, "learning_rate": 1.7655103734439837e-05, "loss": 2.1397, "step": 7070 }, { "epoch": 5.868049792531121, "grad_norm": 18.46897315979004, "learning_rate": 1.7654771784232366e-05, "loss": 1.3088, "step": 7071 }, { "epoch": 5.868879668049793, "grad_norm": 15.618279457092285, "learning_rate": 1.7654439834024898e-05, "loss": 1.1028, "step": 7072 }, { "epoch": 5.869709543568465, "grad_norm": 14.14918327331543, "learning_rate": 1.765410788381743e-05, "loss": 1.5288, "step": 7073 }, { "epoch": 5.870539419087137, "grad_norm": 24.142475128173828, "learning_rate": 1.765377593360996e-05, "loss": 1.6629, "step": 7074 }, { "epoch": 5.87136929460581, "grad_norm": 18.42378044128418, "learning_rate": 1.765344398340249e-05, "loss": 1.2395, "step": 7075 }, { "epoch": 5.872199170124482, "grad_norm": 16.17551612854004, "learning_rate": 1.765311203319502e-05, "loss": 1.3612, "step": 7076 }, { "epoch": 5.873029045643154, "grad_norm": 17.86798667907715, "learning_rate": 1.765278008298755e-05, "loss": 1.4629, "step": 7077 }, { "epoch": 5.873858921161826, "grad_norm": 12.192445755004883, "learning_rate": 1.7652448132780084e-05, "loss": 1.1602, "step": 7078 }, { "epoch": 5.874688796680498, "grad_norm": 20.551645278930664, "learning_rate": 1.7652116182572616e-05, "loss": 1.4538, "step": 7079 }, { "epoch": 5.875518672199171, "grad_norm": 14.012541770935059, "learning_rate": 1.7651784232365145e-05, "loss": 0.7077, "step": 7080 }, { "epoch": 5.876348547717843, "grad_norm": 26.31743621826172, "learning_rate": 1.7651452282157677e-05, "loss": 1.2843, "step": 7081 }, { "epoch": 5.877178423236515, "grad_norm": 10.694734573364258, "learning_rate": 1.765112033195021e-05, "loss": 0.8632, "step": 7082 }, { "epoch": 5.878008298755187, "grad_norm": 19.635435104370117, "learning_rate": 1.765078838174274e-05, "loss": 1.3355, "step": 7083 }, { "epoch": 5.878838174273859, "grad_norm": 26.628223419189453, "learning_rate": 1.7650456431535273e-05, "loss": 1.161, "step": 7084 }, { "epoch": 5.8796680497925315, "grad_norm": 22.05474281311035, "learning_rate": 1.7650124481327802e-05, "loss": 1.4975, "step": 7085 }, { "epoch": 5.880497925311204, "grad_norm": 18.66758155822754, "learning_rate": 1.7649792531120334e-05, "loss": 1.6515, "step": 7086 }, { "epoch": 5.881327800829876, "grad_norm": 15.744255065917969, "learning_rate": 1.7649460580912866e-05, "loss": 1.1977, "step": 7087 }, { "epoch": 5.882157676348548, "grad_norm": 12.572093963623047, "learning_rate": 1.7649128630705398e-05, "loss": 1.0563, "step": 7088 }, { "epoch": 5.88298755186722, "grad_norm": 12.844598770141602, "learning_rate": 1.7648796680497927e-05, "loss": 0.8066, "step": 7089 }, { "epoch": 5.8838174273858925, "grad_norm": 19.440540313720703, "learning_rate": 1.764846473029046e-05, "loss": 1.3163, "step": 7090 }, { "epoch": 5.884647302904565, "grad_norm": 19.926551818847656, "learning_rate": 1.764813278008299e-05, "loss": 1.3759, "step": 7091 }, { "epoch": 5.885477178423237, "grad_norm": 14.891606330871582, "learning_rate": 1.764780082987552e-05, "loss": 1.3396, "step": 7092 }, { "epoch": 5.886307053941909, "grad_norm": 11.65367603302002, "learning_rate": 1.7647468879668052e-05, "loss": 0.8519, "step": 7093 }, { "epoch": 5.887136929460581, "grad_norm": 21.121458053588867, "learning_rate": 1.764713692946058e-05, "loss": 1.5971, "step": 7094 }, { "epoch": 5.8879668049792535, "grad_norm": 16.62226104736328, "learning_rate": 1.7646804979253113e-05, "loss": 1.5235, "step": 7095 }, { "epoch": 5.888796680497926, "grad_norm": 20.440738677978516, "learning_rate": 1.7646473029045645e-05, "loss": 1.5468, "step": 7096 }, { "epoch": 5.889626556016598, "grad_norm": 12.67397403717041, "learning_rate": 1.7646141078838173e-05, "loss": 1.3714, "step": 7097 }, { "epoch": 5.89045643153527, "grad_norm": 17.433902740478516, "learning_rate": 1.7645809128630706e-05, "loss": 1.3014, "step": 7098 }, { "epoch": 5.891286307053942, "grad_norm": 14.264928817749023, "learning_rate": 1.7645477178423238e-05, "loss": 1.0162, "step": 7099 }, { "epoch": 5.8921161825726145, "grad_norm": 14.78133487701416, "learning_rate": 1.764514522821577e-05, "loss": 1.2472, "step": 7100 }, { "epoch": 5.892946058091287, "grad_norm": 11.914535522460938, "learning_rate": 1.76448132780083e-05, "loss": 0.8467, "step": 7101 }, { "epoch": 5.893775933609959, "grad_norm": 21.218109130859375, "learning_rate": 1.764448132780083e-05, "loss": 1.5946, "step": 7102 }, { "epoch": 5.894605809128631, "grad_norm": 15.963839530944824, "learning_rate": 1.7644149377593363e-05, "loss": 0.8451, "step": 7103 }, { "epoch": 5.895435684647303, "grad_norm": 18.35880470275879, "learning_rate": 1.7643817427385895e-05, "loss": 1.2616, "step": 7104 }, { "epoch": 5.8962655601659755, "grad_norm": 15.375100135803223, "learning_rate": 1.7643485477178424e-05, "loss": 1.2702, "step": 7105 }, { "epoch": 5.897095435684648, "grad_norm": 19.348073959350586, "learning_rate": 1.7643153526970956e-05, "loss": 1.6083, "step": 7106 }, { "epoch": 5.89792531120332, "grad_norm": 15.268356323242188, "learning_rate": 1.7642821576763488e-05, "loss": 1.327, "step": 7107 }, { "epoch": 5.898755186721992, "grad_norm": 20.459375381469727, "learning_rate": 1.764248962655602e-05, "loss": 1.8557, "step": 7108 }, { "epoch": 5.899585062240664, "grad_norm": 11.430899620056152, "learning_rate": 1.764215767634855e-05, "loss": 1.2919, "step": 7109 }, { "epoch": 5.9004149377593365, "grad_norm": 19.804658889770508, "learning_rate": 1.764182572614108e-05, "loss": 1.7562, "step": 7110 }, { "epoch": 5.901244813278009, "grad_norm": 22.348684310913086, "learning_rate": 1.7641493775933613e-05, "loss": 0.7333, "step": 7111 }, { "epoch": 5.902074688796681, "grad_norm": 33.75191879272461, "learning_rate": 1.764116182572614e-05, "loss": 1.7762, "step": 7112 }, { "epoch": 5.902904564315353, "grad_norm": 18.504226684570312, "learning_rate": 1.7640829875518674e-05, "loss": 0.943, "step": 7113 }, { "epoch": 5.903734439834025, "grad_norm": 16.144960403442383, "learning_rate": 1.7640497925311206e-05, "loss": 1.4956, "step": 7114 }, { "epoch": 5.904564315352697, "grad_norm": 21.296817779541016, "learning_rate": 1.7640165975103734e-05, "loss": 1.1544, "step": 7115 }, { "epoch": 5.90539419087137, "grad_norm": 17.348098754882812, "learning_rate": 1.7639834024896267e-05, "loss": 2.2468, "step": 7116 }, { "epoch": 5.906224066390042, "grad_norm": 23.70889663696289, "learning_rate": 1.76395020746888e-05, "loss": 1.5953, "step": 7117 }, { "epoch": 5.907053941908714, "grad_norm": 14.721424102783203, "learning_rate": 1.7639170124481327e-05, "loss": 1.4156, "step": 7118 }, { "epoch": 5.907883817427386, "grad_norm": 21.299654006958008, "learning_rate": 1.763883817427386e-05, "loss": 1.5061, "step": 7119 }, { "epoch": 5.908713692946058, "grad_norm": 23.086111068725586, "learning_rate": 1.763850622406639e-05, "loss": 1.9033, "step": 7120 }, { "epoch": 5.909543568464731, "grad_norm": 26.985124588012695, "learning_rate": 1.7638174273858924e-05, "loss": 1.8417, "step": 7121 }, { "epoch": 5.910373443983403, "grad_norm": 19.99149513244629, "learning_rate": 1.7637842323651452e-05, "loss": 1.0354, "step": 7122 }, { "epoch": 5.911203319502075, "grad_norm": 14.943350791931152, "learning_rate": 1.7637510373443985e-05, "loss": 1.3152, "step": 7123 }, { "epoch": 5.912033195020747, "grad_norm": 12.799654006958008, "learning_rate": 1.7637178423236517e-05, "loss": 0.9716, "step": 7124 }, { "epoch": 5.912863070539419, "grad_norm": 22.921842575073242, "learning_rate": 1.763684647302905e-05, "loss": 1.4608, "step": 7125 }, { "epoch": 5.913692946058092, "grad_norm": 15.690655708312988, "learning_rate": 1.7636514522821577e-05, "loss": 1.187, "step": 7126 }, { "epoch": 5.914522821576764, "grad_norm": 15.535597801208496, "learning_rate": 1.763618257261411e-05, "loss": 1.4799, "step": 7127 }, { "epoch": 5.915352697095436, "grad_norm": 14.729286193847656, "learning_rate": 1.7635850622406642e-05, "loss": 1.0275, "step": 7128 }, { "epoch": 5.916182572614108, "grad_norm": 18.876649856567383, "learning_rate": 1.7635518672199174e-05, "loss": 1.3739, "step": 7129 }, { "epoch": 5.91701244813278, "grad_norm": 20.554908752441406, "learning_rate": 1.7635186721991703e-05, "loss": 1.0007, "step": 7130 }, { "epoch": 5.917842323651453, "grad_norm": 17.635385513305664, "learning_rate": 1.7634854771784235e-05, "loss": 1.3258, "step": 7131 }, { "epoch": 5.918672199170125, "grad_norm": 17.88843536376953, "learning_rate": 1.7634522821576763e-05, "loss": 1.3116, "step": 7132 }, { "epoch": 5.919502074688797, "grad_norm": 17.38819122314453, "learning_rate": 1.7634190871369295e-05, "loss": 0.8878, "step": 7133 }, { "epoch": 5.920331950207469, "grad_norm": 21.921340942382812, "learning_rate": 1.7633858921161828e-05, "loss": 1.5989, "step": 7134 }, { "epoch": 5.921161825726141, "grad_norm": 16.441608428955078, "learning_rate": 1.7633526970954356e-05, "loss": 1.8152, "step": 7135 }, { "epoch": 5.9219917012448136, "grad_norm": 14.172052383422852, "learning_rate": 1.763319502074689e-05, "loss": 0.9186, "step": 7136 }, { "epoch": 5.922821576763486, "grad_norm": 20.563064575195312, "learning_rate": 1.763286307053942e-05, "loss": 1.3318, "step": 7137 }, { "epoch": 5.923651452282158, "grad_norm": 16.929845809936523, "learning_rate": 1.7632531120331953e-05, "loss": 0.9128, "step": 7138 }, { "epoch": 5.92448132780083, "grad_norm": 12.396810531616211, "learning_rate": 1.763219917012448e-05, "loss": 1.2723, "step": 7139 }, { "epoch": 5.925311203319502, "grad_norm": 16.71782875061035, "learning_rate": 1.7631867219917013e-05, "loss": 1.0749, "step": 7140 }, { "epoch": 5.9261410788381745, "grad_norm": 16.477575302124023, "learning_rate": 1.7631535269709546e-05, "loss": 1.6607, "step": 7141 }, { "epoch": 5.926970954356847, "grad_norm": 13.52065658569336, "learning_rate": 1.7631203319502078e-05, "loss": 1.307, "step": 7142 }, { "epoch": 5.927800829875519, "grad_norm": 14.171363830566406, "learning_rate": 1.7630871369294606e-05, "loss": 1.2093, "step": 7143 }, { "epoch": 5.928630705394191, "grad_norm": 26.192819595336914, "learning_rate": 1.763053941908714e-05, "loss": 1.4968, "step": 7144 }, { "epoch": 5.929460580912863, "grad_norm": 16.63044548034668, "learning_rate": 1.763020746887967e-05, "loss": 1.2585, "step": 7145 }, { "epoch": 5.9302904564315355, "grad_norm": 9.776204109191895, "learning_rate": 1.7629875518672203e-05, "loss": 0.597, "step": 7146 }, { "epoch": 5.931120331950208, "grad_norm": 22.812406539916992, "learning_rate": 1.762954356846473e-05, "loss": 1.2929, "step": 7147 }, { "epoch": 5.93195020746888, "grad_norm": 15.238792419433594, "learning_rate": 1.7629211618257264e-05, "loss": 0.9867, "step": 7148 }, { "epoch": 5.932780082987552, "grad_norm": 31.90537452697754, "learning_rate": 1.7628879668049796e-05, "loss": 1.8207, "step": 7149 }, { "epoch": 5.933609958506224, "grad_norm": 19.777660369873047, "learning_rate": 1.7628547717842324e-05, "loss": 1.022, "step": 7150 }, { "epoch": 5.9344398340248965, "grad_norm": 12.209352493286133, "learning_rate": 1.7628215767634856e-05, "loss": 1.3873, "step": 7151 }, { "epoch": 5.935269709543569, "grad_norm": 16.845794677734375, "learning_rate": 1.762788381742739e-05, "loss": 2.0253, "step": 7152 }, { "epoch": 5.936099585062241, "grad_norm": 17.42530632019043, "learning_rate": 1.7627551867219917e-05, "loss": 1.5052, "step": 7153 }, { "epoch": 5.936929460580913, "grad_norm": 10.202346801757812, "learning_rate": 1.762721991701245e-05, "loss": 0.9248, "step": 7154 }, { "epoch": 5.937759336099585, "grad_norm": 20.462026596069336, "learning_rate": 1.7626887966804978e-05, "loss": 1.9775, "step": 7155 }, { "epoch": 5.9385892116182575, "grad_norm": 13.991439819335938, "learning_rate": 1.762655601659751e-05, "loss": 0.9732, "step": 7156 }, { "epoch": 5.93941908713693, "grad_norm": 15.628639221191406, "learning_rate": 1.7626224066390042e-05, "loss": 1.1926, "step": 7157 }, { "epoch": 5.940248962655602, "grad_norm": 19.94038963317871, "learning_rate": 1.7625892116182574e-05, "loss": 1.4093, "step": 7158 }, { "epoch": 5.941078838174274, "grad_norm": 15.660969734191895, "learning_rate": 1.7625560165975103e-05, "loss": 1.8944, "step": 7159 }, { "epoch": 5.941908713692946, "grad_norm": 18.515901565551758, "learning_rate": 1.7625228215767635e-05, "loss": 1.3635, "step": 7160 }, { "epoch": 5.9427385892116185, "grad_norm": 14.164690971374512, "learning_rate": 1.7624896265560167e-05, "loss": 1.3316, "step": 7161 }, { "epoch": 5.943568464730291, "grad_norm": 11.562313079833984, "learning_rate": 1.76245643153527e-05, "loss": 1.0009, "step": 7162 }, { "epoch": 5.944398340248963, "grad_norm": 17.689292907714844, "learning_rate": 1.762423236514523e-05, "loss": 1.776, "step": 7163 }, { "epoch": 5.945228215767635, "grad_norm": 11.346517562866211, "learning_rate": 1.762390041493776e-05, "loss": 1.1938, "step": 7164 }, { "epoch": 5.946058091286307, "grad_norm": 10.667402267456055, "learning_rate": 1.7623568464730292e-05, "loss": 0.5556, "step": 7165 }, { "epoch": 5.946887966804979, "grad_norm": 23.456682205200195, "learning_rate": 1.7623236514522825e-05, "loss": 2.4037, "step": 7166 }, { "epoch": 5.947717842323652, "grad_norm": 18.05027198791504, "learning_rate": 1.7622904564315357e-05, "loss": 1.2841, "step": 7167 }, { "epoch": 5.948547717842324, "grad_norm": 22.88262939453125, "learning_rate": 1.7622572614107885e-05, "loss": 1.4364, "step": 7168 }, { "epoch": 5.949377593360996, "grad_norm": 23.130836486816406, "learning_rate": 1.7622240663900417e-05, "loss": 1.5119, "step": 7169 }, { "epoch": 5.950207468879668, "grad_norm": 16.547977447509766, "learning_rate": 1.762190871369295e-05, "loss": 1.8637, "step": 7170 }, { "epoch": 5.95103734439834, "grad_norm": 20.071496963500977, "learning_rate": 1.7621576763485478e-05, "loss": 1.2072, "step": 7171 }, { "epoch": 5.951867219917013, "grad_norm": 18.31351661682129, "learning_rate": 1.762124481327801e-05, "loss": 1.568, "step": 7172 }, { "epoch": 5.952697095435685, "grad_norm": 19.620019912719727, "learning_rate": 1.762091286307054e-05, "loss": 2.2315, "step": 7173 }, { "epoch": 5.953526970954357, "grad_norm": 16.434783935546875, "learning_rate": 1.762058091286307e-05, "loss": 1.5634, "step": 7174 }, { "epoch": 5.954356846473029, "grad_norm": 15.03618335723877, "learning_rate": 1.7620248962655603e-05, "loss": 1.2531, "step": 7175 }, { "epoch": 5.955186721991701, "grad_norm": 26.09556007385254, "learning_rate": 1.7619917012448132e-05, "loss": 0.8704, "step": 7176 }, { "epoch": 5.956016597510374, "grad_norm": 20.810617446899414, "learning_rate": 1.7619585062240664e-05, "loss": 1.512, "step": 7177 }, { "epoch": 5.956846473029046, "grad_norm": 14.170321464538574, "learning_rate": 1.7619253112033196e-05, "loss": 1.0698, "step": 7178 }, { "epoch": 5.957676348547718, "grad_norm": 10.382955551147461, "learning_rate": 1.761892116182573e-05, "loss": 0.707, "step": 7179 }, { "epoch": 5.95850622406639, "grad_norm": 15.0039701461792, "learning_rate": 1.7618589211618257e-05, "loss": 1.2548, "step": 7180 }, { "epoch": 5.959336099585062, "grad_norm": 13.207218170166016, "learning_rate": 1.761825726141079e-05, "loss": 1.8369, "step": 7181 }, { "epoch": 5.960165975103735, "grad_norm": 18.28339958190918, "learning_rate": 1.761792531120332e-05, "loss": 1.5699, "step": 7182 }, { "epoch": 5.960995850622407, "grad_norm": 11.930313110351562, "learning_rate": 1.7617593360995853e-05, "loss": 0.6914, "step": 7183 }, { "epoch": 5.961825726141079, "grad_norm": 14.995063781738281, "learning_rate": 1.7617261410788382e-05, "loss": 1.1757, "step": 7184 }, { "epoch": 5.962655601659751, "grad_norm": 10.49337387084961, "learning_rate": 1.7616929460580914e-05, "loss": 0.7716, "step": 7185 }, { "epoch": 5.963485477178423, "grad_norm": 24.497650146484375, "learning_rate": 1.7616597510373446e-05, "loss": 0.8348, "step": 7186 }, { "epoch": 5.964315352697096, "grad_norm": 19.94114875793457, "learning_rate": 1.761626556016598e-05, "loss": 1.0023, "step": 7187 }, { "epoch": 5.965145228215768, "grad_norm": 20.495542526245117, "learning_rate": 1.7615933609958507e-05, "loss": 1.4172, "step": 7188 }, { "epoch": 5.96597510373444, "grad_norm": 10.905158042907715, "learning_rate": 1.761560165975104e-05, "loss": 1.3464, "step": 7189 }, { "epoch": 5.966804979253112, "grad_norm": 23.981311798095703, "learning_rate": 1.761526970954357e-05, "loss": 1.925, "step": 7190 }, { "epoch": 5.967634854771784, "grad_norm": 21.24484634399414, "learning_rate": 1.76149377593361e-05, "loss": 1.6317, "step": 7191 }, { "epoch": 5.9684647302904565, "grad_norm": 17.56093406677246, "learning_rate": 1.7614605809128632e-05, "loss": 2.2026, "step": 7192 }, { "epoch": 5.969294605809129, "grad_norm": 30.515487670898438, "learning_rate": 1.761427385892116e-05, "loss": 1.0903, "step": 7193 }, { "epoch": 5.970124481327801, "grad_norm": 9.370264053344727, "learning_rate": 1.7613941908713693e-05, "loss": 0.6942, "step": 7194 }, { "epoch": 5.970954356846473, "grad_norm": 14.505057334899902, "learning_rate": 1.7613609958506225e-05, "loss": 1.3343, "step": 7195 }, { "epoch": 5.971784232365145, "grad_norm": 30.762670516967773, "learning_rate": 1.7613278008298757e-05, "loss": 1.8162, "step": 7196 }, { "epoch": 5.9726141078838175, "grad_norm": 21.97382164001465, "learning_rate": 1.7612946058091286e-05, "loss": 1.3721, "step": 7197 }, { "epoch": 5.97344398340249, "grad_norm": 18.244888305664062, "learning_rate": 1.7612614107883818e-05, "loss": 1.016, "step": 7198 }, { "epoch": 5.974273858921162, "grad_norm": 12.314209938049316, "learning_rate": 1.761228215767635e-05, "loss": 1.0684, "step": 7199 }, { "epoch": 5.975103734439834, "grad_norm": 12.949970245361328, "learning_rate": 1.7611950207468882e-05, "loss": 0.7589, "step": 7200 }, { "epoch": 5.975933609958506, "grad_norm": 17.138587951660156, "learning_rate": 1.761161825726141e-05, "loss": 1.4824, "step": 7201 }, { "epoch": 5.9767634854771785, "grad_norm": 10.53682804107666, "learning_rate": 1.7611286307053943e-05, "loss": 0.7679, "step": 7202 }, { "epoch": 5.977593360995851, "grad_norm": 21.518564224243164, "learning_rate": 1.7610954356846475e-05, "loss": 0.8322, "step": 7203 }, { "epoch": 5.978423236514523, "grad_norm": 17.605436325073242, "learning_rate": 1.7610622406639007e-05, "loss": 1.0801, "step": 7204 }, { "epoch": 5.979253112033195, "grad_norm": 22.412263870239258, "learning_rate": 1.7610290456431536e-05, "loss": 1.48, "step": 7205 }, { "epoch": 5.980082987551867, "grad_norm": 18.723628997802734, "learning_rate": 1.7609958506224068e-05, "loss": 1.3597, "step": 7206 }, { "epoch": 5.9809128630705395, "grad_norm": 23.362293243408203, "learning_rate": 1.76096265560166e-05, "loss": 1.1482, "step": 7207 }, { "epoch": 5.981742738589212, "grad_norm": 17.734342575073242, "learning_rate": 1.7609294605809132e-05, "loss": 1.2217, "step": 7208 }, { "epoch": 5.982572614107884, "grad_norm": 17.54268455505371, "learning_rate": 1.760896265560166e-05, "loss": 0.8932, "step": 7209 }, { "epoch": 5.983402489626556, "grad_norm": 13.377914428710938, "learning_rate": 1.7608630705394193e-05, "loss": 1.1267, "step": 7210 }, { "epoch": 5.984232365145228, "grad_norm": 19.413105010986328, "learning_rate": 1.7608298755186722e-05, "loss": 1.7154, "step": 7211 }, { "epoch": 5.9850622406639005, "grad_norm": 14.5652437210083, "learning_rate": 1.7607966804979254e-05, "loss": 1.0724, "step": 7212 }, { "epoch": 5.985892116182573, "grad_norm": 37.72617721557617, "learning_rate": 1.7607634854771786e-05, "loss": 1.3909, "step": 7213 }, { "epoch": 5.986721991701245, "grad_norm": 16.65610122680664, "learning_rate": 1.7607302904564315e-05, "loss": 1.0, "step": 7214 }, { "epoch": 5.987551867219917, "grad_norm": 23.58333969116211, "learning_rate": 1.7606970954356847e-05, "loss": 2.0336, "step": 7215 }, { "epoch": 5.988381742738589, "grad_norm": 18.178733825683594, "learning_rate": 1.760663900414938e-05, "loss": 0.8938, "step": 7216 }, { "epoch": 5.9892116182572614, "grad_norm": 15.350951194763184, "learning_rate": 1.760630705394191e-05, "loss": 0.7218, "step": 7217 }, { "epoch": 5.990041493775934, "grad_norm": 17.090951919555664, "learning_rate": 1.760597510373444e-05, "loss": 1.0359, "step": 7218 }, { "epoch": 5.990871369294606, "grad_norm": 17.239152908325195, "learning_rate": 1.7605643153526972e-05, "loss": 1.4167, "step": 7219 }, { "epoch": 5.991701244813278, "grad_norm": 14.280414581298828, "learning_rate": 1.7605311203319504e-05, "loss": 1.0159, "step": 7220 }, { "epoch": 5.99253112033195, "grad_norm": 18.373104095458984, "learning_rate": 1.7604979253112036e-05, "loss": 1.2161, "step": 7221 }, { "epoch": 5.993360995850622, "grad_norm": 17.15353775024414, "learning_rate": 1.7604647302904565e-05, "loss": 1.7994, "step": 7222 }, { "epoch": 5.994190871369295, "grad_norm": 16.484392166137695, "learning_rate": 1.7604315352697097e-05, "loss": 1.3451, "step": 7223 }, { "epoch": 5.995020746887967, "grad_norm": 15.520444869995117, "learning_rate": 1.760398340248963e-05, "loss": 1.7793, "step": 7224 }, { "epoch": 5.995850622406639, "grad_norm": 12.22846508026123, "learning_rate": 1.760365145228216e-05, "loss": 0.8311, "step": 7225 }, { "epoch": 5.996680497925311, "grad_norm": 23.193063735961914, "learning_rate": 1.760331950207469e-05, "loss": 1.2409, "step": 7226 }, { "epoch": 5.997510373443983, "grad_norm": 16.28404426574707, "learning_rate": 1.7602987551867222e-05, "loss": 1.3222, "step": 7227 }, { "epoch": 5.998340248962656, "grad_norm": 11.691798210144043, "learning_rate": 1.7602655601659754e-05, "loss": 1.2724, "step": 7228 }, { "epoch": 5.999170124481328, "grad_norm": 20.432279586791992, "learning_rate": 1.7602323651452283e-05, "loss": 0.9198, "step": 7229 }, { "epoch": 6.0, "grad_norm": 15.928034782409668, "learning_rate": 1.7601991701244815e-05, "loss": 0.9715, "step": 7230 }, { "epoch": 6.000829875518672, "grad_norm": 10.34964370727539, "learning_rate": 1.7601659751037347e-05, "loss": 0.8848, "step": 7231 }, { "epoch": 6.001659751037344, "grad_norm": 17.193477630615234, "learning_rate": 1.7601327800829876e-05, "loss": 1.2573, "step": 7232 }, { "epoch": 6.002489626556017, "grad_norm": 13.274965286254883, "learning_rate": 1.7600995850622408e-05, "loss": 0.8869, "step": 7233 }, { "epoch": 6.003319502074689, "grad_norm": 15.590863227844238, "learning_rate": 1.7600663900414937e-05, "loss": 2.0251, "step": 7234 }, { "epoch": 6.004149377593361, "grad_norm": 18.09158706665039, "learning_rate": 1.760033195020747e-05, "loss": 1.8043, "step": 7235 }, { "epoch": 6.004979253112033, "grad_norm": 15.700906753540039, "learning_rate": 1.76e-05, "loss": 0.7799, "step": 7236 }, { "epoch": 6.005809128630705, "grad_norm": 13.34988021850586, "learning_rate": 1.7599668049792533e-05, "loss": 0.5935, "step": 7237 }, { "epoch": 6.006639004149378, "grad_norm": 14.643157958984375, "learning_rate": 1.7599336099585062e-05, "loss": 1.7951, "step": 7238 }, { "epoch": 6.00746887966805, "grad_norm": 17.148408889770508, "learning_rate": 1.7599004149377594e-05, "loss": 1.3484, "step": 7239 }, { "epoch": 6.008298755186722, "grad_norm": 11.4610595703125, "learning_rate": 1.7598672199170126e-05, "loss": 0.9419, "step": 7240 }, { "epoch": 6.009128630705394, "grad_norm": 17.479278564453125, "learning_rate": 1.7598340248962658e-05, "loss": 1.6815, "step": 7241 }, { "epoch": 6.009958506224066, "grad_norm": 17.64519500732422, "learning_rate": 1.759800829875519e-05, "loss": 0.9744, "step": 7242 }, { "epoch": 6.0107883817427386, "grad_norm": 13.568011283874512, "learning_rate": 1.759767634854772e-05, "loss": 0.8151, "step": 7243 }, { "epoch": 6.011618257261411, "grad_norm": 12.34915542602539, "learning_rate": 1.759734439834025e-05, "loss": 1.0958, "step": 7244 }, { "epoch": 6.012448132780083, "grad_norm": 11.348944664001465, "learning_rate": 1.7597012448132783e-05, "loss": 0.8707, "step": 7245 }, { "epoch": 6.013278008298755, "grad_norm": 14.999653816223145, "learning_rate": 1.7596680497925315e-05, "loss": 1.6198, "step": 7246 }, { "epoch": 6.014107883817427, "grad_norm": 19.94301414489746, "learning_rate": 1.7596348547717844e-05, "loss": 1.7895, "step": 7247 }, { "epoch": 6.0149377593360995, "grad_norm": 17.117311477661133, "learning_rate": 1.7596016597510376e-05, "loss": 1.0907, "step": 7248 }, { "epoch": 6.015767634854772, "grad_norm": 15.779550552368164, "learning_rate": 1.7595684647302905e-05, "loss": 1.2901, "step": 7249 }, { "epoch": 6.016597510373444, "grad_norm": 14.808902740478516, "learning_rate": 1.7595352697095437e-05, "loss": 1.3871, "step": 7250 }, { "epoch": 6.017427385892116, "grad_norm": 25.550312042236328, "learning_rate": 1.759502074688797e-05, "loss": 1.0693, "step": 7251 }, { "epoch": 6.018257261410788, "grad_norm": 19.601905822753906, "learning_rate": 1.7594688796680498e-05, "loss": 1.4659, "step": 7252 }, { "epoch": 6.0190871369294605, "grad_norm": 20.517717361450195, "learning_rate": 1.759435684647303e-05, "loss": 1.7811, "step": 7253 }, { "epoch": 6.019917012448133, "grad_norm": 18.942840576171875, "learning_rate": 1.7594024896265562e-05, "loss": 1.4601, "step": 7254 }, { "epoch": 6.020746887966805, "grad_norm": 18.503145217895508, "learning_rate": 1.759369294605809e-05, "loss": 1.4248, "step": 7255 }, { "epoch": 6.021576763485477, "grad_norm": 17.561521530151367, "learning_rate": 1.7593360995850623e-05, "loss": 1.669, "step": 7256 }, { "epoch": 6.022406639004149, "grad_norm": 17.018850326538086, "learning_rate": 1.7593029045643155e-05, "loss": 0.9345, "step": 7257 }, { "epoch": 6.0232365145228215, "grad_norm": 18.230117797851562, "learning_rate": 1.7592697095435687e-05, "loss": 1.0506, "step": 7258 }, { "epoch": 6.024066390041494, "grad_norm": 15.395512580871582, "learning_rate": 1.7592365145228216e-05, "loss": 0.9741, "step": 7259 }, { "epoch": 6.024896265560166, "grad_norm": 21.753204345703125, "learning_rate": 1.7592033195020748e-05, "loss": 1.7876, "step": 7260 }, { "epoch": 6.025726141078838, "grad_norm": 23.199377059936523, "learning_rate": 1.759170124481328e-05, "loss": 1.5749, "step": 7261 }, { "epoch": 6.02655601659751, "grad_norm": 13.507506370544434, "learning_rate": 1.7591369294605812e-05, "loss": 1.1946, "step": 7262 }, { "epoch": 6.0273858921161825, "grad_norm": 23.72537612915039, "learning_rate": 1.759103734439834e-05, "loss": 1.4868, "step": 7263 }, { "epoch": 6.028215767634855, "grad_norm": 16.987409591674805, "learning_rate": 1.7590705394190873e-05, "loss": 0.9214, "step": 7264 }, { "epoch": 6.029045643153527, "grad_norm": 13.415179252624512, "learning_rate": 1.7590373443983405e-05, "loss": 0.9755, "step": 7265 }, { "epoch": 6.029875518672199, "grad_norm": 12.562858581542969, "learning_rate": 1.7590041493775937e-05, "loss": 0.8911, "step": 7266 }, { "epoch": 6.030705394190871, "grad_norm": 12.60118293762207, "learning_rate": 1.7589709543568466e-05, "loss": 1.181, "step": 7267 }, { "epoch": 6.0315352697095435, "grad_norm": 22.52974510192871, "learning_rate": 1.7589377593360998e-05, "loss": 1.5275, "step": 7268 }, { "epoch": 6.032365145228216, "grad_norm": 21.267518997192383, "learning_rate": 1.758904564315353e-05, "loss": 1.5305, "step": 7269 }, { "epoch": 6.033195020746888, "grad_norm": 11.874067306518555, "learning_rate": 1.758871369294606e-05, "loss": 0.9878, "step": 7270 }, { "epoch": 6.03402489626556, "grad_norm": 14.237340927124023, "learning_rate": 1.758838174273859e-05, "loss": 0.7405, "step": 7271 }, { "epoch": 6.034854771784232, "grad_norm": 25.46441650390625, "learning_rate": 1.758804979253112e-05, "loss": 1.1589, "step": 7272 }, { "epoch": 6.035684647302904, "grad_norm": 15.860912322998047, "learning_rate": 1.758771784232365e-05, "loss": 1.2113, "step": 7273 }, { "epoch": 6.036514522821577, "grad_norm": 17.747844696044922, "learning_rate": 1.7587385892116184e-05, "loss": 1.4929, "step": 7274 }, { "epoch": 6.037344398340249, "grad_norm": 17.108856201171875, "learning_rate": 1.7587053941908716e-05, "loss": 1.3167, "step": 7275 }, { "epoch": 6.038174273858921, "grad_norm": 15.622318267822266, "learning_rate": 1.7586721991701245e-05, "loss": 1.028, "step": 7276 }, { "epoch": 6.039004149377593, "grad_norm": 26.800424575805664, "learning_rate": 1.7586390041493777e-05, "loss": 1.6185, "step": 7277 }, { "epoch": 6.039834024896265, "grad_norm": 19.224952697753906, "learning_rate": 1.758605809128631e-05, "loss": 1.0015, "step": 7278 }, { "epoch": 6.040663900414938, "grad_norm": 11.838112831115723, "learning_rate": 1.758572614107884e-05, "loss": 1.1565, "step": 7279 }, { "epoch": 6.04149377593361, "grad_norm": 24.99097442626953, "learning_rate": 1.758539419087137e-05, "loss": 1.6849, "step": 7280 }, { "epoch": 6.042323651452282, "grad_norm": 20.79340362548828, "learning_rate": 1.7585062240663902e-05, "loss": 1.1765, "step": 7281 }, { "epoch": 6.043153526970954, "grad_norm": 18.968692779541016, "learning_rate": 1.7584730290456434e-05, "loss": 0.9375, "step": 7282 }, { "epoch": 6.043983402489626, "grad_norm": 12.537525177001953, "learning_rate": 1.7584398340248966e-05, "loss": 1.2474, "step": 7283 }, { "epoch": 6.044813278008299, "grad_norm": 22.059207916259766, "learning_rate": 1.7584066390041495e-05, "loss": 1.5173, "step": 7284 }, { "epoch": 6.045643153526971, "grad_norm": 24.969074249267578, "learning_rate": 1.7583734439834027e-05, "loss": 1.5642, "step": 7285 }, { "epoch": 6.046473029045643, "grad_norm": 19.640426635742188, "learning_rate": 1.758340248962656e-05, "loss": 1.1247, "step": 7286 }, { "epoch": 6.047302904564315, "grad_norm": 14.118467330932617, "learning_rate": 1.7583070539419088e-05, "loss": 1.361, "step": 7287 }, { "epoch": 6.048132780082987, "grad_norm": 20.16131019592285, "learning_rate": 1.758273858921162e-05, "loss": 1.4278, "step": 7288 }, { "epoch": 6.04896265560166, "grad_norm": 38.20254898071289, "learning_rate": 1.7582406639004152e-05, "loss": 0.916, "step": 7289 }, { "epoch": 6.049792531120332, "grad_norm": 11.561254501342773, "learning_rate": 1.758207468879668e-05, "loss": 0.9629, "step": 7290 }, { "epoch": 6.050622406639004, "grad_norm": 19.45550537109375, "learning_rate": 1.7581742738589213e-05, "loss": 1.1492, "step": 7291 }, { "epoch": 6.051452282157676, "grad_norm": 12.339317321777344, "learning_rate": 1.7581410788381745e-05, "loss": 0.9839, "step": 7292 }, { "epoch": 6.052282157676348, "grad_norm": 17.062833786010742, "learning_rate": 1.7581078838174273e-05, "loss": 1.1835, "step": 7293 }, { "epoch": 6.053112033195021, "grad_norm": 26.04759979248047, "learning_rate": 1.7580746887966806e-05, "loss": 1.7176, "step": 7294 }, { "epoch": 6.053941908713693, "grad_norm": 22.252443313598633, "learning_rate": 1.7580414937759338e-05, "loss": 2.0704, "step": 7295 }, { "epoch": 6.054771784232365, "grad_norm": 26.499488830566406, "learning_rate": 1.758008298755187e-05, "loss": 1.3821, "step": 7296 }, { "epoch": 6.055601659751037, "grad_norm": 18.066553115844727, "learning_rate": 1.75797510373444e-05, "loss": 0.9632, "step": 7297 }, { "epoch": 6.056431535269709, "grad_norm": 20.345895767211914, "learning_rate": 1.757941908713693e-05, "loss": 1.0718, "step": 7298 }, { "epoch": 6.0572614107883815, "grad_norm": 18.393159866333008, "learning_rate": 1.7579087136929463e-05, "loss": 1.5084, "step": 7299 }, { "epoch": 6.058091286307054, "grad_norm": 15.437821388244629, "learning_rate": 1.7578755186721995e-05, "loss": 1.3519, "step": 7300 }, { "epoch": 6.058921161825726, "grad_norm": 16.225330352783203, "learning_rate": 1.7578423236514524e-05, "loss": 0.7416, "step": 7301 }, { "epoch": 6.059751037344398, "grad_norm": 24.640634536743164, "learning_rate": 1.7578091286307056e-05, "loss": 1.7412, "step": 7302 }, { "epoch": 6.06058091286307, "grad_norm": 32.808868408203125, "learning_rate": 1.7577759336099588e-05, "loss": 1.2819, "step": 7303 }, { "epoch": 6.0614107883817425, "grad_norm": 10.632092475891113, "learning_rate": 1.757742738589212e-05, "loss": 0.8447, "step": 7304 }, { "epoch": 6.062240663900415, "grad_norm": 14.912628173828125, "learning_rate": 1.757709543568465e-05, "loss": 1.3587, "step": 7305 }, { "epoch": 6.063070539419087, "grad_norm": 25.41410255432129, "learning_rate": 1.757676348547718e-05, "loss": 1.3786, "step": 7306 }, { "epoch": 6.063900414937759, "grad_norm": 17.745441436767578, "learning_rate": 1.7576431535269713e-05, "loss": 1.4005, "step": 7307 }, { "epoch": 6.064730290456431, "grad_norm": 21.196012496948242, "learning_rate": 1.757609958506224e-05, "loss": 1.2576, "step": 7308 }, { "epoch": 6.0655601659751035, "grad_norm": 10.950329780578613, "learning_rate": 1.7575767634854774e-05, "loss": 0.9416, "step": 7309 }, { "epoch": 6.066390041493776, "grad_norm": 11.097970962524414, "learning_rate": 1.7575435684647302e-05, "loss": 0.7332, "step": 7310 }, { "epoch": 6.067219917012448, "grad_norm": 22.952880859375, "learning_rate": 1.7575103734439834e-05, "loss": 1.7604, "step": 7311 }, { "epoch": 6.06804979253112, "grad_norm": 19.218915939331055, "learning_rate": 1.7574771784232367e-05, "loss": 1.4813, "step": 7312 }, { "epoch": 6.068879668049792, "grad_norm": 19.78238296508789, "learning_rate": 1.7574439834024895e-05, "loss": 1.2471, "step": 7313 }, { "epoch": 6.0697095435684645, "grad_norm": 15.35473918914795, "learning_rate": 1.7574107883817427e-05, "loss": 1.1324, "step": 7314 }, { "epoch": 6.070539419087137, "grad_norm": 23.347360610961914, "learning_rate": 1.757377593360996e-05, "loss": 2.0591, "step": 7315 }, { "epoch": 6.071369294605809, "grad_norm": 13.587725639343262, "learning_rate": 1.757344398340249e-05, "loss": 0.7876, "step": 7316 }, { "epoch": 6.072199170124481, "grad_norm": 19.965185165405273, "learning_rate": 1.757311203319502e-05, "loss": 1.6735, "step": 7317 }, { "epoch": 6.073029045643153, "grad_norm": 14.810017585754395, "learning_rate": 1.7572780082987552e-05, "loss": 1.1631, "step": 7318 }, { "epoch": 6.0738589211618255, "grad_norm": 14.488373756408691, "learning_rate": 1.7572448132780085e-05, "loss": 1.2848, "step": 7319 }, { "epoch": 6.074688796680498, "grad_norm": 13.40113639831543, "learning_rate": 1.7572116182572617e-05, "loss": 0.7432, "step": 7320 }, { "epoch": 6.07551867219917, "grad_norm": 14.582118034362793, "learning_rate": 1.757178423236515e-05, "loss": 0.797, "step": 7321 }, { "epoch": 6.076348547717842, "grad_norm": 18.887344360351562, "learning_rate": 1.7571452282157677e-05, "loss": 0.9977, "step": 7322 }, { "epoch": 6.077178423236514, "grad_norm": 12.461292266845703, "learning_rate": 1.757112033195021e-05, "loss": 1.1222, "step": 7323 }, { "epoch": 6.0780082987551864, "grad_norm": 28.79944610595703, "learning_rate": 1.757078838174274e-05, "loss": 1.5075, "step": 7324 }, { "epoch": 6.078838174273859, "grad_norm": 29.66717529296875, "learning_rate": 1.7570456431535274e-05, "loss": 1.1377, "step": 7325 }, { "epoch": 6.079668049792531, "grad_norm": 17.729835510253906, "learning_rate": 1.7570124481327802e-05, "loss": 0.802, "step": 7326 }, { "epoch": 6.080497925311203, "grad_norm": 16.783761978149414, "learning_rate": 1.7569792531120335e-05, "loss": 0.9834, "step": 7327 }, { "epoch": 6.081327800829875, "grad_norm": 17.050996780395508, "learning_rate": 1.7569460580912863e-05, "loss": 0.8632, "step": 7328 }, { "epoch": 6.082157676348547, "grad_norm": 15.238382339477539, "learning_rate": 1.7569128630705395e-05, "loss": 1.2584, "step": 7329 }, { "epoch": 6.08298755186722, "grad_norm": 16.2335262298584, "learning_rate": 1.7568796680497928e-05, "loss": 1.1945, "step": 7330 }, { "epoch": 6.083817427385892, "grad_norm": 17.961870193481445, "learning_rate": 1.7568464730290456e-05, "loss": 1.3379, "step": 7331 }, { "epoch": 6.084647302904564, "grad_norm": 14.54784870147705, "learning_rate": 1.756813278008299e-05, "loss": 1.1273, "step": 7332 }, { "epoch": 6.085477178423236, "grad_norm": 19.22010040283203, "learning_rate": 1.756780082987552e-05, "loss": 1.5439, "step": 7333 }, { "epoch": 6.086307053941908, "grad_norm": 42.332889556884766, "learning_rate": 1.756746887966805e-05, "loss": 1.2727, "step": 7334 }, { "epoch": 6.087136929460581, "grad_norm": 22.245220184326172, "learning_rate": 1.756713692946058e-05, "loss": 1.1612, "step": 7335 }, { "epoch": 6.087966804979253, "grad_norm": 14.871593475341797, "learning_rate": 1.7566804979253113e-05, "loss": 0.9787, "step": 7336 }, { "epoch": 6.088796680497925, "grad_norm": 19.195598602294922, "learning_rate": 1.7566473029045646e-05, "loss": 0.9308, "step": 7337 }, { "epoch": 6.089626556016597, "grad_norm": 27.6390438079834, "learning_rate": 1.7566141078838174e-05, "loss": 1.6376, "step": 7338 }, { "epoch": 6.090456431535269, "grad_norm": 48.3239631652832, "learning_rate": 1.7565809128630706e-05, "loss": 1.9312, "step": 7339 }, { "epoch": 6.091286307053942, "grad_norm": 14.034048080444336, "learning_rate": 1.756547717842324e-05, "loss": 1.341, "step": 7340 }, { "epoch": 6.092116182572614, "grad_norm": 21.052196502685547, "learning_rate": 1.756514522821577e-05, "loss": 1.2249, "step": 7341 }, { "epoch": 6.092946058091286, "grad_norm": 15.534478187561035, "learning_rate": 1.75648132780083e-05, "loss": 1.2441, "step": 7342 }, { "epoch": 6.093775933609958, "grad_norm": 16.527509689331055, "learning_rate": 1.756448132780083e-05, "loss": 1.0797, "step": 7343 }, { "epoch": 6.09460580912863, "grad_norm": 15.638726234436035, "learning_rate": 1.7564149377593363e-05, "loss": 1.2555, "step": 7344 }, { "epoch": 6.095435684647303, "grad_norm": 15.847016334533691, "learning_rate": 1.7563817427385896e-05, "loss": 1.0737, "step": 7345 }, { "epoch": 6.096265560165975, "grad_norm": 20.975536346435547, "learning_rate": 1.7563485477178424e-05, "loss": 0.893, "step": 7346 }, { "epoch": 6.097095435684647, "grad_norm": 13.724990844726562, "learning_rate": 1.7563153526970956e-05, "loss": 0.8156, "step": 7347 }, { "epoch": 6.097925311203319, "grad_norm": 14.224637985229492, "learning_rate": 1.756282157676349e-05, "loss": 0.7176, "step": 7348 }, { "epoch": 6.098755186721991, "grad_norm": 24.697885513305664, "learning_rate": 1.7562489626556017e-05, "loss": 1.7371, "step": 7349 }, { "epoch": 6.0995850622406635, "grad_norm": 19.764291763305664, "learning_rate": 1.756215767634855e-05, "loss": 0.9817, "step": 7350 }, { "epoch": 6.100414937759336, "grad_norm": 14.155479431152344, "learning_rate": 1.7561825726141078e-05, "loss": 1.4376, "step": 7351 }, { "epoch": 6.101244813278008, "grad_norm": 16.466373443603516, "learning_rate": 1.756149377593361e-05, "loss": 1.0607, "step": 7352 }, { "epoch": 6.10207468879668, "grad_norm": 25.742406845092773, "learning_rate": 1.7561161825726142e-05, "loss": 1.5598, "step": 7353 }, { "epoch": 6.102904564315352, "grad_norm": 16.96714210510254, "learning_rate": 1.7560829875518674e-05, "loss": 1.1496, "step": 7354 }, { "epoch": 6.1037344398340245, "grad_norm": 16.3049373626709, "learning_rate": 1.7560497925311203e-05, "loss": 1.1248, "step": 7355 }, { "epoch": 6.104564315352697, "grad_norm": 14.998152732849121, "learning_rate": 1.7560165975103735e-05, "loss": 1.3432, "step": 7356 }, { "epoch": 6.105394190871369, "grad_norm": 15.334338188171387, "learning_rate": 1.7559834024896267e-05, "loss": 1.4792, "step": 7357 }, { "epoch": 6.106224066390041, "grad_norm": 13.851250648498535, "learning_rate": 1.75595020746888e-05, "loss": 0.5896, "step": 7358 }, { "epoch": 6.107053941908713, "grad_norm": 14.680754661560059, "learning_rate": 1.7559170124481328e-05, "loss": 0.651, "step": 7359 }, { "epoch": 6.1078838174273855, "grad_norm": 19.286828994750977, "learning_rate": 1.755883817427386e-05, "loss": 1.1701, "step": 7360 }, { "epoch": 6.108713692946058, "grad_norm": 13.054076194763184, "learning_rate": 1.7558506224066392e-05, "loss": 0.8305, "step": 7361 }, { "epoch": 6.10954356846473, "grad_norm": 13.611552238464355, "learning_rate": 1.7558174273858924e-05, "loss": 1.2064, "step": 7362 }, { "epoch": 6.110373443983402, "grad_norm": 14.579587936401367, "learning_rate": 1.7557842323651453e-05, "loss": 0.9308, "step": 7363 }, { "epoch": 6.111203319502074, "grad_norm": 16.332319259643555, "learning_rate": 1.7557510373443985e-05, "loss": 0.8092, "step": 7364 }, { "epoch": 6.1120331950207465, "grad_norm": 14.036348342895508, "learning_rate": 1.7557178423236517e-05, "loss": 1.2089, "step": 7365 }, { "epoch": 6.112863070539419, "grad_norm": 22.840614318847656, "learning_rate": 1.7556846473029046e-05, "loss": 1.8972, "step": 7366 }, { "epoch": 6.113692946058091, "grad_norm": 27.83953285217285, "learning_rate": 1.7556514522821578e-05, "loss": 1.6066, "step": 7367 }, { "epoch": 6.114522821576763, "grad_norm": 17.319368362426758, "learning_rate": 1.755618257261411e-05, "loss": 0.8634, "step": 7368 }, { "epoch": 6.115352697095435, "grad_norm": 18.56357765197754, "learning_rate": 1.755585062240664e-05, "loss": 1.3288, "step": 7369 }, { "epoch": 6.1161825726141075, "grad_norm": 32.52944564819336, "learning_rate": 1.755551867219917e-05, "loss": 1.7568, "step": 7370 }, { "epoch": 6.11701244813278, "grad_norm": 33.24165344238281, "learning_rate": 1.7555186721991703e-05, "loss": 1.7517, "step": 7371 }, { "epoch": 6.117842323651452, "grad_norm": 16.22218894958496, "learning_rate": 1.7554854771784232e-05, "loss": 1.1927, "step": 7372 }, { "epoch": 6.118672199170124, "grad_norm": 21.441560745239258, "learning_rate": 1.7554522821576764e-05, "loss": 1.1932, "step": 7373 }, { "epoch": 6.119502074688796, "grad_norm": 12.654077529907227, "learning_rate": 1.7554190871369296e-05, "loss": 0.5672, "step": 7374 }, { "epoch": 6.1203319502074685, "grad_norm": 21.130298614501953, "learning_rate": 1.755385892116183e-05, "loss": 1.8406, "step": 7375 }, { "epoch": 6.121161825726141, "grad_norm": 31.1213436126709, "learning_rate": 1.7553526970954357e-05, "loss": 1.5018, "step": 7376 }, { "epoch": 6.121991701244813, "grad_norm": 17.74782371520996, "learning_rate": 1.755319502074689e-05, "loss": 0.9495, "step": 7377 }, { "epoch": 6.122821576763485, "grad_norm": 16.57986831665039, "learning_rate": 1.755286307053942e-05, "loss": 1.2593, "step": 7378 }, { "epoch": 6.123651452282157, "grad_norm": 14.40567684173584, "learning_rate": 1.7552531120331953e-05, "loss": 0.9433, "step": 7379 }, { "epoch": 6.124481327800829, "grad_norm": 14.55876636505127, "learning_rate": 1.7552199170124482e-05, "loss": 0.9775, "step": 7380 }, { "epoch": 6.125311203319502, "grad_norm": 18.806800842285156, "learning_rate": 1.7551867219917014e-05, "loss": 1.4907, "step": 7381 }, { "epoch": 6.126141078838174, "grad_norm": 14.996212005615234, "learning_rate": 1.7551535269709546e-05, "loss": 0.9754, "step": 7382 }, { "epoch": 6.126970954356846, "grad_norm": 11.5498685836792, "learning_rate": 1.755120331950208e-05, "loss": 0.6374, "step": 7383 }, { "epoch": 6.127800829875518, "grad_norm": 14.170948028564453, "learning_rate": 1.7550871369294607e-05, "loss": 0.8681, "step": 7384 }, { "epoch": 6.12863070539419, "grad_norm": 25.603824615478516, "learning_rate": 1.755053941908714e-05, "loss": 1.4486, "step": 7385 }, { "epoch": 6.1294605809128635, "grad_norm": 24.934284210205078, "learning_rate": 1.755020746887967e-05, "loss": 1.68, "step": 7386 }, { "epoch": 6.130290456431536, "grad_norm": 28.362131118774414, "learning_rate": 1.75498755186722e-05, "loss": 0.9484, "step": 7387 }, { "epoch": 6.131120331950208, "grad_norm": 24.431610107421875, "learning_rate": 1.7549543568464732e-05, "loss": 1.8601, "step": 7388 }, { "epoch": 6.13195020746888, "grad_norm": 16.2003173828125, "learning_rate": 1.754921161825726e-05, "loss": 0.8142, "step": 7389 }, { "epoch": 6.132780082987552, "grad_norm": 18.897321701049805, "learning_rate": 1.7548879668049793e-05, "loss": 1.0703, "step": 7390 }, { "epoch": 6.1336099585062245, "grad_norm": 36.0535888671875, "learning_rate": 1.7548547717842325e-05, "loss": 1.8436, "step": 7391 }, { "epoch": 6.134439834024897, "grad_norm": 9.151997566223145, "learning_rate": 1.7548215767634854e-05, "loss": 0.5337, "step": 7392 }, { "epoch": 6.135269709543569, "grad_norm": 18.675535202026367, "learning_rate": 1.7547883817427386e-05, "loss": 0.6441, "step": 7393 }, { "epoch": 6.136099585062241, "grad_norm": 25.745664596557617, "learning_rate": 1.7547551867219918e-05, "loss": 1.3771, "step": 7394 }, { "epoch": 6.136929460580913, "grad_norm": 16.740917205810547, "learning_rate": 1.754721991701245e-05, "loss": 1.0097, "step": 7395 }, { "epoch": 6.1377593360995855, "grad_norm": 18.67136573791504, "learning_rate": 1.754688796680498e-05, "loss": 0.8242, "step": 7396 }, { "epoch": 6.138589211618258, "grad_norm": 17.47848892211914, "learning_rate": 1.754655601659751e-05, "loss": 1.2709, "step": 7397 }, { "epoch": 6.13941908713693, "grad_norm": 16.30605697631836, "learning_rate": 1.7546224066390043e-05, "loss": 0.758, "step": 7398 }, { "epoch": 6.140248962655602, "grad_norm": 15.988426208496094, "learning_rate": 1.7545892116182575e-05, "loss": 1.2951, "step": 7399 }, { "epoch": 6.141078838174274, "grad_norm": 15.845932960510254, "learning_rate": 1.7545560165975107e-05, "loss": 1.0967, "step": 7400 }, { "epoch": 6.141908713692946, "grad_norm": 12.545208930969238, "learning_rate": 1.7545228215767636e-05, "loss": 0.7951, "step": 7401 }, { "epoch": 6.142738589211619, "grad_norm": 22.46970558166504, "learning_rate": 1.7544896265560168e-05, "loss": 1.5022, "step": 7402 }, { "epoch": 6.143568464730291, "grad_norm": 32.00005340576172, "learning_rate": 1.75445643153527e-05, "loss": 1.6444, "step": 7403 }, { "epoch": 6.144398340248963, "grad_norm": 25.476760864257812, "learning_rate": 1.754423236514523e-05, "loss": 1.1867, "step": 7404 }, { "epoch": 6.145228215767635, "grad_norm": 22.07842445373535, "learning_rate": 1.754390041493776e-05, "loss": 1.5671, "step": 7405 }, { "epoch": 6.146058091286307, "grad_norm": 15.597565650939941, "learning_rate": 1.7543568464730293e-05, "loss": 1.4248, "step": 7406 }, { "epoch": 6.14688796680498, "grad_norm": 23.086959838867188, "learning_rate": 1.7543236514522822e-05, "loss": 1.9102, "step": 7407 }, { "epoch": 6.147717842323652, "grad_norm": 26.044551849365234, "learning_rate": 1.7542904564315354e-05, "loss": 1.4424, "step": 7408 }, { "epoch": 6.148547717842324, "grad_norm": 21.000036239624023, "learning_rate": 1.7542572614107886e-05, "loss": 1.0368, "step": 7409 }, { "epoch": 6.149377593360996, "grad_norm": 18.342378616333008, "learning_rate": 1.7542240663900415e-05, "loss": 1.5576, "step": 7410 }, { "epoch": 6.150207468879668, "grad_norm": 28.854171752929688, "learning_rate": 1.7541908713692947e-05, "loss": 1.4353, "step": 7411 }, { "epoch": 6.151037344398341, "grad_norm": 25.428241729736328, "learning_rate": 1.754157676348548e-05, "loss": 1.7978, "step": 7412 }, { "epoch": 6.151867219917013, "grad_norm": 25.21224021911621, "learning_rate": 1.7541244813278008e-05, "loss": 1.7215, "step": 7413 }, { "epoch": 6.152697095435685, "grad_norm": 17.38994789123535, "learning_rate": 1.754091286307054e-05, "loss": 1.013, "step": 7414 }, { "epoch": 6.153526970954357, "grad_norm": 12.943525314331055, "learning_rate": 1.7540580912863072e-05, "loss": 0.964, "step": 7415 }, { "epoch": 6.154356846473029, "grad_norm": 29.646923065185547, "learning_rate": 1.7540248962655604e-05, "loss": 1.1006, "step": 7416 }, { "epoch": 6.155186721991702, "grad_norm": 20.24384117126465, "learning_rate": 1.7539917012448133e-05, "loss": 0.8625, "step": 7417 }, { "epoch": 6.156016597510374, "grad_norm": 20.655847549438477, "learning_rate": 1.7539585062240665e-05, "loss": 1.4721, "step": 7418 }, { "epoch": 6.156846473029046, "grad_norm": 25.027158737182617, "learning_rate": 1.7539253112033197e-05, "loss": 0.9186, "step": 7419 }, { "epoch": 6.157676348547718, "grad_norm": 13.93739128112793, "learning_rate": 1.753892116182573e-05, "loss": 1.182, "step": 7420 }, { "epoch": 6.15850622406639, "grad_norm": 26.390308380126953, "learning_rate": 1.7538589211618258e-05, "loss": 2.3456, "step": 7421 }, { "epoch": 6.159336099585063, "grad_norm": 23.318326950073242, "learning_rate": 1.753825726141079e-05, "loss": 1.1795, "step": 7422 }, { "epoch": 6.160165975103735, "grad_norm": 20.66965675354004, "learning_rate": 1.7537925311203322e-05, "loss": 1.5312, "step": 7423 }, { "epoch": 6.160995850622407, "grad_norm": 16.22324562072754, "learning_rate": 1.7537593360995854e-05, "loss": 1.3244, "step": 7424 }, { "epoch": 6.161825726141079, "grad_norm": 18.95966911315918, "learning_rate": 1.7537261410788383e-05, "loss": 1.7506, "step": 7425 }, { "epoch": 6.162655601659751, "grad_norm": 32.888526916503906, "learning_rate": 1.7536929460580915e-05, "loss": 1.8881, "step": 7426 }, { "epoch": 6.1634854771784235, "grad_norm": 14.666315078735352, "learning_rate": 1.7536597510373444e-05, "loss": 1.0096, "step": 7427 }, { "epoch": 6.164315352697096, "grad_norm": 12.421119689941406, "learning_rate": 1.7536265560165976e-05, "loss": 0.8005, "step": 7428 }, { "epoch": 6.165145228215768, "grad_norm": 17.676050186157227, "learning_rate": 1.7535933609958508e-05, "loss": 1.1238, "step": 7429 }, { "epoch": 6.16597510373444, "grad_norm": 25.608989715576172, "learning_rate": 1.7535601659751037e-05, "loss": 2.0996, "step": 7430 }, { "epoch": 6.166804979253112, "grad_norm": 20.788646697998047, "learning_rate": 1.753526970954357e-05, "loss": 1.1501, "step": 7431 }, { "epoch": 6.1676348547717845, "grad_norm": 16.298187255859375, "learning_rate": 1.75349377593361e-05, "loss": 1.0595, "step": 7432 }, { "epoch": 6.168464730290457, "grad_norm": 20.743690490722656, "learning_rate": 1.7534605809128633e-05, "loss": 1.9157, "step": 7433 }, { "epoch": 6.169294605809129, "grad_norm": 26.457263946533203, "learning_rate": 1.753427385892116e-05, "loss": 1.5474, "step": 7434 }, { "epoch": 6.170124481327801, "grad_norm": 19.63846778869629, "learning_rate": 1.7533941908713694e-05, "loss": 1.0147, "step": 7435 }, { "epoch": 6.170954356846473, "grad_norm": 16.696813583374023, "learning_rate": 1.7533609958506226e-05, "loss": 0.6372, "step": 7436 }, { "epoch": 6.1717842323651455, "grad_norm": 13.289008140563965, "learning_rate": 1.7533278008298758e-05, "loss": 0.9353, "step": 7437 }, { "epoch": 6.172614107883818, "grad_norm": 20.125913619995117, "learning_rate": 1.7532946058091287e-05, "loss": 0.922, "step": 7438 }, { "epoch": 6.17344398340249, "grad_norm": 21.063650131225586, "learning_rate": 1.753261410788382e-05, "loss": 1.4547, "step": 7439 }, { "epoch": 6.174273858921162, "grad_norm": 23.20006561279297, "learning_rate": 1.753228215767635e-05, "loss": 1.3723, "step": 7440 }, { "epoch": 6.175103734439834, "grad_norm": 18.183767318725586, "learning_rate": 1.7531950207468883e-05, "loss": 1.4549, "step": 7441 }, { "epoch": 6.1759336099585065, "grad_norm": 12.96597957611084, "learning_rate": 1.7531618257261412e-05, "loss": 0.9926, "step": 7442 }, { "epoch": 6.176763485477179, "grad_norm": 20.404029846191406, "learning_rate": 1.7531286307053944e-05, "loss": 1.5534, "step": 7443 }, { "epoch": 6.177593360995851, "grad_norm": 12.232129096984863, "learning_rate": 1.7530954356846476e-05, "loss": 0.7533, "step": 7444 }, { "epoch": 6.178423236514523, "grad_norm": 20.487241744995117, "learning_rate": 1.7530622406639005e-05, "loss": 1.1938, "step": 7445 }, { "epoch": 6.179253112033195, "grad_norm": 13.63498306274414, "learning_rate": 1.7530290456431537e-05, "loss": 0.845, "step": 7446 }, { "epoch": 6.1800829875518675, "grad_norm": 25.0023136138916, "learning_rate": 1.752995850622407e-05, "loss": 1.2827, "step": 7447 }, { "epoch": 6.18091286307054, "grad_norm": 20.632976531982422, "learning_rate": 1.7529626556016598e-05, "loss": 1.4021, "step": 7448 }, { "epoch": 6.181742738589212, "grad_norm": 18.1376953125, "learning_rate": 1.752929460580913e-05, "loss": 1.2685, "step": 7449 }, { "epoch": 6.182572614107884, "grad_norm": 24.68208885192871, "learning_rate": 1.752896265560166e-05, "loss": 1.6001, "step": 7450 }, { "epoch": 6.183402489626556, "grad_norm": 15.358152389526367, "learning_rate": 1.752863070539419e-05, "loss": 1.1158, "step": 7451 }, { "epoch": 6.1842323651452284, "grad_norm": 17.357261657714844, "learning_rate": 1.7528298755186723e-05, "loss": 0.8855, "step": 7452 }, { "epoch": 6.185062240663901, "grad_norm": 28.278030395507812, "learning_rate": 1.7527966804979255e-05, "loss": 1.9231, "step": 7453 }, { "epoch": 6.185892116182573, "grad_norm": 18.57719612121582, "learning_rate": 1.7527634854771787e-05, "loss": 1.179, "step": 7454 }, { "epoch": 6.186721991701245, "grad_norm": 20.467164993286133, "learning_rate": 1.7527302904564316e-05, "loss": 1.268, "step": 7455 }, { "epoch": 6.187551867219917, "grad_norm": 35.90690994262695, "learning_rate": 1.7526970954356848e-05, "loss": 1.8238, "step": 7456 }, { "epoch": 6.188381742738589, "grad_norm": 25.57001495361328, "learning_rate": 1.752663900414938e-05, "loss": 1.8493, "step": 7457 }, { "epoch": 6.189211618257262, "grad_norm": 19.348207473754883, "learning_rate": 1.7526307053941912e-05, "loss": 1.1138, "step": 7458 }, { "epoch": 6.190041493775934, "grad_norm": 21.394235610961914, "learning_rate": 1.752597510373444e-05, "loss": 1.1234, "step": 7459 }, { "epoch": 6.190871369294606, "grad_norm": 15.75191879272461, "learning_rate": 1.7525643153526973e-05, "loss": 1.3724, "step": 7460 }, { "epoch": 6.191701244813278, "grad_norm": 18.69289779663086, "learning_rate": 1.7525311203319505e-05, "loss": 1.1647, "step": 7461 }, { "epoch": 6.19253112033195, "grad_norm": 18.868986129760742, "learning_rate": 1.7524979253112037e-05, "loss": 0.9963, "step": 7462 }, { "epoch": 6.193360995850623, "grad_norm": 18.44886016845703, "learning_rate": 1.7524647302904566e-05, "loss": 1.1951, "step": 7463 }, { "epoch": 6.194190871369295, "grad_norm": 18.253990173339844, "learning_rate": 1.7524315352697098e-05, "loss": 1.277, "step": 7464 }, { "epoch": 6.195020746887967, "grad_norm": 15.392300605773926, "learning_rate": 1.752398340248963e-05, "loss": 1.3518, "step": 7465 }, { "epoch": 6.195850622406639, "grad_norm": 13.842775344848633, "learning_rate": 1.752365145228216e-05, "loss": 0.8562, "step": 7466 }, { "epoch": 6.196680497925311, "grad_norm": 30.32674217224121, "learning_rate": 1.752331950207469e-05, "loss": 1.2686, "step": 7467 }, { "epoch": 6.197510373443984, "grad_norm": 23.224008560180664, "learning_rate": 1.752298755186722e-05, "loss": 1.3284, "step": 7468 }, { "epoch": 6.198340248962656, "grad_norm": 23.15694808959961, "learning_rate": 1.752265560165975e-05, "loss": 1.5263, "step": 7469 }, { "epoch": 6.199170124481328, "grad_norm": 12.188117027282715, "learning_rate": 1.7522323651452284e-05, "loss": 1.2011, "step": 7470 }, { "epoch": 6.2, "grad_norm": 19.435224533081055, "learning_rate": 1.7521991701244812e-05, "loss": 1.2402, "step": 7471 }, { "epoch": 6.200829875518672, "grad_norm": 17.67034149169922, "learning_rate": 1.7521659751037345e-05, "loss": 1.508, "step": 7472 }, { "epoch": 6.201659751037345, "grad_norm": 12.544506072998047, "learning_rate": 1.7521327800829877e-05, "loss": 0.9785, "step": 7473 }, { "epoch": 6.202489626556017, "grad_norm": 14.830421447753906, "learning_rate": 1.752099585062241e-05, "loss": 1.0681, "step": 7474 }, { "epoch": 6.203319502074689, "grad_norm": 44.721435546875, "learning_rate": 1.7520663900414937e-05, "loss": 0.6934, "step": 7475 }, { "epoch": 6.204149377593361, "grad_norm": 15.49703598022461, "learning_rate": 1.752033195020747e-05, "loss": 0.7297, "step": 7476 }, { "epoch": 6.204979253112033, "grad_norm": 12.919940948486328, "learning_rate": 1.752e-05, "loss": 0.7388, "step": 7477 }, { "epoch": 6.2058091286307056, "grad_norm": 13.098401069641113, "learning_rate": 1.7519668049792534e-05, "loss": 0.8932, "step": 7478 }, { "epoch": 6.206639004149378, "grad_norm": 14.892609596252441, "learning_rate": 1.7519336099585066e-05, "loss": 0.7696, "step": 7479 }, { "epoch": 6.20746887966805, "grad_norm": 13.821897506713867, "learning_rate": 1.7519004149377595e-05, "loss": 0.9424, "step": 7480 }, { "epoch": 6.208298755186722, "grad_norm": 16.40886878967285, "learning_rate": 1.7518672199170127e-05, "loss": 0.9185, "step": 7481 }, { "epoch": 6.209128630705394, "grad_norm": 36.026397705078125, "learning_rate": 1.751834024896266e-05, "loss": 1.4033, "step": 7482 }, { "epoch": 6.2099585062240665, "grad_norm": 32.46321487426758, "learning_rate": 1.7518008298755188e-05, "loss": 1.2242, "step": 7483 }, { "epoch": 6.210788381742739, "grad_norm": 20.456762313842773, "learning_rate": 1.751767634854772e-05, "loss": 1.2398, "step": 7484 }, { "epoch": 6.211618257261411, "grad_norm": 20.108535766601562, "learning_rate": 1.7517344398340252e-05, "loss": 1.649, "step": 7485 }, { "epoch": 6.212448132780083, "grad_norm": 14.232515335083008, "learning_rate": 1.751701244813278e-05, "loss": 0.8, "step": 7486 }, { "epoch": 6.213278008298755, "grad_norm": 29.122053146362305, "learning_rate": 1.7516680497925313e-05, "loss": 1.0854, "step": 7487 }, { "epoch": 6.2141078838174275, "grad_norm": 20.420127868652344, "learning_rate": 1.751634854771784e-05, "loss": 1.1482, "step": 7488 }, { "epoch": 6.2149377593361, "grad_norm": 21.241533279418945, "learning_rate": 1.7516016597510373e-05, "loss": 1.1412, "step": 7489 }, { "epoch": 6.215767634854772, "grad_norm": 26.935989379882812, "learning_rate": 1.7515684647302906e-05, "loss": 1.2445, "step": 7490 }, { "epoch": 6.216597510373444, "grad_norm": 27.96795082092285, "learning_rate": 1.7515352697095438e-05, "loss": 0.8626, "step": 7491 }, { "epoch": 6.217427385892116, "grad_norm": 21.057191848754883, "learning_rate": 1.7515020746887966e-05, "loss": 1.225, "step": 7492 }, { "epoch": 6.2182572614107885, "grad_norm": 29.940526962280273, "learning_rate": 1.75146887966805e-05, "loss": 1.061, "step": 7493 }, { "epoch": 6.219087136929461, "grad_norm": 23.721935272216797, "learning_rate": 1.751435684647303e-05, "loss": 1.1938, "step": 7494 }, { "epoch": 6.219917012448133, "grad_norm": 14.664339065551758, "learning_rate": 1.7514024896265563e-05, "loss": 0.7647, "step": 7495 }, { "epoch": 6.220746887966805, "grad_norm": 19.776611328125, "learning_rate": 1.751369294605809e-05, "loss": 0.7821, "step": 7496 }, { "epoch": 6.221576763485477, "grad_norm": 19.802942276000977, "learning_rate": 1.7513360995850623e-05, "loss": 1.5149, "step": 7497 }, { "epoch": 6.2224066390041495, "grad_norm": 19.455068588256836, "learning_rate": 1.7513029045643156e-05, "loss": 1.1399, "step": 7498 }, { "epoch": 6.223236514522822, "grad_norm": 19.062034606933594, "learning_rate": 1.7512697095435688e-05, "loss": 1.2284, "step": 7499 }, { "epoch": 6.224066390041494, "grad_norm": 32.94855499267578, "learning_rate": 1.7512365145228216e-05, "loss": 0.7504, "step": 7500 }, { "epoch": 6.224896265560166, "grad_norm": 19.57490348815918, "learning_rate": 1.751203319502075e-05, "loss": 1.0333, "step": 7501 }, { "epoch": 6.225726141078838, "grad_norm": 18.971851348876953, "learning_rate": 1.751170124481328e-05, "loss": 1.1672, "step": 7502 }, { "epoch": 6.2265560165975105, "grad_norm": 33.265560150146484, "learning_rate": 1.7511369294605813e-05, "loss": 2.3424, "step": 7503 }, { "epoch": 6.227385892116183, "grad_norm": 35.39262008666992, "learning_rate": 1.751103734439834e-05, "loss": 1.6305, "step": 7504 }, { "epoch": 6.228215767634855, "grad_norm": 14.463533401489258, "learning_rate": 1.7510705394190874e-05, "loss": 0.8536, "step": 7505 }, { "epoch": 6.229045643153527, "grad_norm": 17.63410758972168, "learning_rate": 1.7510373443983402e-05, "loss": 1.0382, "step": 7506 }, { "epoch": 6.229875518672199, "grad_norm": 48.705589294433594, "learning_rate": 1.7510041493775934e-05, "loss": 1.5115, "step": 7507 }, { "epoch": 6.230705394190871, "grad_norm": 23.875167846679688, "learning_rate": 1.7509709543568467e-05, "loss": 1.931, "step": 7508 }, { "epoch": 6.231535269709544, "grad_norm": 22.068164825439453, "learning_rate": 1.7509377593360995e-05, "loss": 1.8036, "step": 7509 }, { "epoch": 6.232365145228216, "grad_norm": 16.818317413330078, "learning_rate": 1.7509045643153527e-05, "loss": 0.9351, "step": 7510 }, { "epoch": 6.233195020746888, "grad_norm": 15.516011238098145, "learning_rate": 1.750871369294606e-05, "loss": 1.1234, "step": 7511 }, { "epoch": 6.23402489626556, "grad_norm": 17.66236114501953, "learning_rate": 1.750838174273859e-05, "loss": 1.4882, "step": 7512 }, { "epoch": 6.234854771784232, "grad_norm": 16.403017044067383, "learning_rate": 1.750804979253112e-05, "loss": 0.9626, "step": 7513 }, { "epoch": 6.235684647302905, "grad_norm": 11.776891708374023, "learning_rate": 1.7507717842323652e-05, "loss": 0.7731, "step": 7514 }, { "epoch": 6.236514522821577, "grad_norm": 24.92168617248535, "learning_rate": 1.7507385892116184e-05, "loss": 2.1918, "step": 7515 }, { "epoch": 6.237344398340249, "grad_norm": 15.448765754699707, "learning_rate": 1.7507053941908717e-05, "loss": 1.3419, "step": 7516 }, { "epoch": 6.238174273858921, "grad_norm": 18.6439208984375, "learning_rate": 1.7506721991701245e-05, "loss": 2.0567, "step": 7517 }, { "epoch": 6.239004149377593, "grad_norm": 12.503771781921387, "learning_rate": 1.7506390041493777e-05, "loss": 0.6895, "step": 7518 }, { "epoch": 6.239834024896266, "grad_norm": 19.650915145874023, "learning_rate": 1.750605809128631e-05, "loss": 1.2553, "step": 7519 }, { "epoch": 6.240663900414938, "grad_norm": 22.65050506591797, "learning_rate": 1.750572614107884e-05, "loss": 1.3776, "step": 7520 }, { "epoch": 6.24149377593361, "grad_norm": 22.691612243652344, "learning_rate": 1.750539419087137e-05, "loss": 1.1924, "step": 7521 }, { "epoch": 6.242323651452282, "grad_norm": 25.194149017333984, "learning_rate": 1.7505062240663902e-05, "loss": 1.6113, "step": 7522 }, { "epoch": 6.243153526970954, "grad_norm": 21.442142486572266, "learning_rate": 1.7504730290456435e-05, "loss": 0.5679, "step": 7523 }, { "epoch": 6.243983402489627, "grad_norm": 15.629301071166992, "learning_rate": 1.7504398340248963e-05, "loss": 0.9758, "step": 7524 }, { "epoch": 6.244813278008299, "grad_norm": 12.687764167785645, "learning_rate": 1.7504066390041495e-05, "loss": 0.8216, "step": 7525 }, { "epoch": 6.245643153526971, "grad_norm": 12.022436141967773, "learning_rate": 1.7503734439834028e-05, "loss": 1.27, "step": 7526 }, { "epoch": 6.246473029045643, "grad_norm": 15.427248001098633, "learning_rate": 1.7503402489626556e-05, "loss": 1.2083, "step": 7527 }, { "epoch": 6.247302904564315, "grad_norm": 21.96967315673828, "learning_rate": 1.750307053941909e-05, "loss": 1.3997, "step": 7528 }, { "epoch": 6.248132780082988, "grad_norm": 22.710330963134766, "learning_rate": 1.7502738589211617e-05, "loss": 2.3778, "step": 7529 }, { "epoch": 6.24896265560166, "grad_norm": 23.593875885009766, "learning_rate": 1.750240663900415e-05, "loss": 1.0036, "step": 7530 }, { "epoch": 6.249792531120332, "grad_norm": 14.43371295928955, "learning_rate": 1.750207468879668e-05, "loss": 0.8404, "step": 7531 }, { "epoch": 6.250622406639004, "grad_norm": 18.109317779541016, "learning_rate": 1.7501742738589213e-05, "loss": 0.8955, "step": 7532 }, { "epoch": 6.251452282157676, "grad_norm": 20.76642417907715, "learning_rate": 1.7501410788381745e-05, "loss": 1.6031, "step": 7533 }, { "epoch": 6.2522821576763485, "grad_norm": 17.218507766723633, "learning_rate": 1.7501078838174274e-05, "loss": 0.7871, "step": 7534 }, { "epoch": 6.253112033195021, "grad_norm": 15.40943717956543, "learning_rate": 1.7500746887966806e-05, "loss": 1.3742, "step": 7535 }, { "epoch": 6.253941908713693, "grad_norm": 18.227882385253906, "learning_rate": 1.750041493775934e-05, "loss": 0.9615, "step": 7536 }, { "epoch": 6.254771784232365, "grad_norm": 20.070463180541992, "learning_rate": 1.750008298755187e-05, "loss": 1.3808, "step": 7537 }, { "epoch": 6.255601659751037, "grad_norm": 14.73840045928955, "learning_rate": 1.74997510373444e-05, "loss": 1.0927, "step": 7538 }, { "epoch": 6.2564315352697095, "grad_norm": 22.67729949951172, "learning_rate": 1.749941908713693e-05, "loss": 1.7802, "step": 7539 }, { "epoch": 6.257261410788382, "grad_norm": 36.14034652709961, "learning_rate": 1.7499087136929463e-05, "loss": 1.7732, "step": 7540 }, { "epoch": 6.258091286307054, "grad_norm": 14.58385181427002, "learning_rate": 1.7498755186721996e-05, "loss": 1.0622, "step": 7541 }, { "epoch": 6.258921161825726, "grad_norm": 15.370387077331543, "learning_rate": 1.7498423236514524e-05, "loss": 0.9066, "step": 7542 }, { "epoch": 6.259751037344398, "grad_norm": 12.383275985717773, "learning_rate": 1.7498091286307056e-05, "loss": 0.8604, "step": 7543 }, { "epoch": 6.2605809128630705, "grad_norm": 16.116779327392578, "learning_rate": 1.7497759336099585e-05, "loss": 1.2747, "step": 7544 }, { "epoch": 6.261410788381743, "grad_norm": 19.739072799682617, "learning_rate": 1.7497427385892117e-05, "loss": 0.9549, "step": 7545 }, { "epoch": 6.262240663900415, "grad_norm": 25.52800178527832, "learning_rate": 1.749709543568465e-05, "loss": 1.0544, "step": 7546 }, { "epoch": 6.263070539419087, "grad_norm": 16.40937614440918, "learning_rate": 1.7496763485477178e-05, "loss": 1.5863, "step": 7547 }, { "epoch": 6.263900414937759, "grad_norm": 18.78826904296875, "learning_rate": 1.749643153526971e-05, "loss": 1.515, "step": 7548 }, { "epoch": 6.2647302904564315, "grad_norm": 26.256999969482422, "learning_rate": 1.7496099585062242e-05, "loss": 1.9038, "step": 7549 }, { "epoch": 6.265560165975104, "grad_norm": 21.43804359436035, "learning_rate": 1.749576763485477e-05, "loss": 2.053, "step": 7550 }, { "epoch": 6.266390041493776, "grad_norm": 18.217538833618164, "learning_rate": 1.7495435684647303e-05, "loss": 1.3107, "step": 7551 }, { "epoch": 6.267219917012448, "grad_norm": 27.392230987548828, "learning_rate": 1.7495103734439835e-05, "loss": 1.1566, "step": 7552 }, { "epoch": 6.26804979253112, "grad_norm": 18.227323532104492, "learning_rate": 1.7494771784232367e-05, "loss": 1.0186, "step": 7553 }, { "epoch": 6.2688796680497925, "grad_norm": 24.07082748413086, "learning_rate": 1.7494439834024896e-05, "loss": 1.0479, "step": 7554 }, { "epoch": 6.269709543568465, "grad_norm": 14.694344520568848, "learning_rate": 1.7494107883817428e-05, "loss": 1.2488, "step": 7555 }, { "epoch": 6.270539419087137, "grad_norm": 11.307662010192871, "learning_rate": 1.749377593360996e-05, "loss": 0.7308, "step": 7556 }, { "epoch": 6.271369294605809, "grad_norm": 18.16455078125, "learning_rate": 1.7493443983402492e-05, "loss": 0.9635, "step": 7557 }, { "epoch": 6.272199170124481, "grad_norm": 33.152870178222656, "learning_rate": 1.7493112033195024e-05, "loss": 1.717, "step": 7558 }, { "epoch": 6.2730290456431534, "grad_norm": 23.789897918701172, "learning_rate": 1.7492780082987553e-05, "loss": 1.8955, "step": 7559 }, { "epoch": 6.273858921161826, "grad_norm": 12.968720436096191, "learning_rate": 1.7492448132780085e-05, "loss": 0.7758, "step": 7560 }, { "epoch": 6.274688796680498, "grad_norm": 17.86550521850586, "learning_rate": 1.7492116182572617e-05, "loss": 1.0951, "step": 7561 }, { "epoch": 6.27551867219917, "grad_norm": 15.156339645385742, "learning_rate": 1.7491784232365146e-05, "loss": 0.7635, "step": 7562 }, { "epoch": 6.276348547717842, "grad_norm": 31.312273025512695, "learning_rate": 1.7491452282157678e-05, "loss": 1.2168, "step": 7563 }, { "epoch": 6.277178423236514, "grad_norm": 29.6898250579834, "learning_rate": 1.749112033195021e-05, "loss": 1.4759, "step": 7564 }, { "epoch": 6.278008298755187, "grad_norm": 20.64043617248535, "learning_rate": 1.749078838174274e-05, "loss": 1.2337, "step": 7565 }, { "epoch": 6.278838174273859, "grad_norm": 16.610994338989258, "learning_rate": 1.749045643153527e-05, "loss": 1.219, "step": 7566 }, { "epoch": 6.279668049792531, "grad_norm": 17.21218490600586, "learning_rate": 1.74901244813278e-05, "loss": 1.4406, "step": 7567 }, { "epoch": 6.280497925311203, "grad_norm": 37.92619705200195, "learning_rate": 1.7489792531120332e-05, "loss": 1.468, "step": 7568 }, { "epoch": 6.281327800829875, "grad_norm": 15.687772750854492, "learning_rate": 1.7489460580912864e-05, "loss": 1.099, "step": 7569 }, { "epoch": 6.282157676348548, "grad_norm": 16.166013717651367, "learning_rate": 1.7489128630705396e-05, "loss": 0.8212, "step": 7570 }, { "epoch": 6.28298755186722, "grad_norm": 10.569631576538086, "learning_rate": 1.7488796680497925e-05, "loss": 0.8043, "step": 7571 }, { "epoch": 6.283817427385892, "grad_norm": 16.828365325927734, "learning_rate": 1.7488464730290457e-05, "loss": 1.439, "step": 7572 }, { "epoch": 6.284647302904564, "grad_norm": 12.509129524230957, "learning_rate": 1.748813278008299e-05, "loss": 0.9545, "step": 7573 }, { "epoch": 6.285477178423236, "grad_norm": 14.390755653381348, "learning_rate": 1.748780082987552e-05, "loss": 1.1995, "step": 7574 }, { "epoch": 6.286307053941909, "grad_norm": 20.77954864501953, "learning_rate": 1.748746887966805e-05, "loss": 1.2209, "step": 7575 }, { "epoch": 6.287136929460581, "grad_norm": 13.249897956848145, "learning_rate": 1.7487136929460582e-05, "loss": 1.1309, "step": 7576 }, { "epoch": 6.287966804979253, "grad_norm": 13.722868919372559, "learning_rate": 1.7486804979253114e-05, "loss": 1.0351, "step": 7577 }, { "epoch": 6.288796680497925, "grad_norm": 27.658985137939453, "learning_rate": 1.7486473029045646e-05, "loss": 1.1087, "step": 7578 }, { "epoch": 6.289626556016597, "grad_norm": 17.395933151245117, "learning_rate": 1.7486141078838175e-05, "loss": 1.2452, "step": 7579 }, { "epoch": 6.29045643153527, "grad_norm": 16.622941970825195, "learning_rate": 1.7485809128630707e-05, "loss": 1.5425, "step": 7580 }, { "epoch": 6.291286307053942, "grad_norm": 15.722101211547852, "learning_rate": 1.748547717842324e-05, "loss": 1.0963, "step": 7581 }, { "epoch": 6.292116182572614, "grad_norm": 29.36393165588379, "learning_rate": 1.748514522821577e-05, "loss": 1.4192, "step": 7582 }, { "epoch": 6.292946058091286, "grad_norm": 27.80118179321289, "learning_rate": 1.74848132780083e-05, "loss": 1.6545, "step": 7583 }, { "epoch": 6.293775933609958, "grad_norm": 12.879022598266602, "learning_rate": 1.7484481327800832e-05, "loss": 0.6489, "step": 7584 }, { "epoch": 6.2946058091286305, "grad_norm": 14.45394229888916, "learning_rate": 1.748414937759336e-05, "loss": 0.9284, "step": 7585 }, { "epoch": 6.295435684647303, "grad_norm": 15.79113483428955, "learning_rate": 1.7483817427385893e-05, "loss": 1.4707, "step": 7586 }, { "epoch": 6.296265560165975, "grad_norm": 18.373008728027344, "learning_rate": 1.7483485477178425e-05, "loss": 1.4076, "step": 7587 }, { "epoch": 6.297095435684647, "grad_norm": 41.5284538269043, "learning_rate": 1.7483153526970954e-05, "loss": 1.7772, "step": 7588 }, { "epoch": 6.297925311203319, "grad_norm": 20.535120010375977, "learning_rate": 1.7482821576763486e-05, "loss": 0.9692, "step": 7589 }, { "epoch": 6.2987551867219915, "grad_norm": 21.651750564575195, "learning_rate": 1.7482489626556018e-05, "loss": 1.2411, "step": 7590 }, { "epoch": 6.299585062240664, "grad_norm": 23.19308853149414, "learning_rate": 1.748215767634855e-05, "loss": 2.0755, "step": 7591 }, { "epoch": 6.300414937759336, "grad_norm": 16.683746337890625, "learning_rate": 1.748182572614108e-05, "loss": 1.3868, "step": 7592 }, { "epoch": 6.301244813278008, "grad_norm": 18.345355987548828, "learning_rate": 1.748149377593361e-05, "loss": 1.038, "step": 7593 }, { "epoch": 6.30207468879668, "grad_norm": 28.742145538330078, "learning_rate": 1.7481161825726143e-05, "loss": 0.6768, "step": 7594 }, { "epoch": 6.3029045643153525, "grad_norm": 20.180091857910156, "learning_rate": 1.7480829875518675e-05, "loss": 1.0933, "step": 7595 }, { "epoch": 6.303734439834025, "grad_norm": 44.98830795288086, "learning_rate": 1.7480497925311204e-05, "loss": 1.1437, "step": 7596 }, { "epoch": 6.304564315352697, "grad_norm": 11.497392654418945, "learning_rate": 1.7480165975103736e-05, "loss": 0.8234, "step": 7597 }, { "epoch": 6.305394190871369, "grad_norm": 25.70444107055664, "learning_rate": 1.7479834024896268e-05, "loss": 1.1636, "step": 7598 }, { "epoch": 6.306224066390041, "grad_norm": 22.714004516601562, "learning_rate": 1.74795020746888e-05, "loss": 1.1978, "step": 7599 }, { "epoch": 6.3070539419087135, "grad_norm": 34.38609313964844, "learning_rate": 1.747917012448133e-05, "loss": 1.0137, "step": 7600 }, { "epoch": 6.307883817427386, "grad_norm": 18.139291763305664, "learning_rate": 1.747883817427386e-05, "loss": 1.1419, "step": 7601 }, { "epoch": 6.308713692946058, "grad_norm": 13.921120643615723, "learning_rate": 1.7478506224066393e-05, "loss": 1.323, "step": 7602 }, { "epoch": 6.30954356846473, "grad_norm": 13.23963737487793, "learning_rate": 1.7478174273858922e-05, "loss": 1.0768, "step": 7603 }, { "epoch": 6.310373443983402, "grad_norm": 23.133195877075195, "learning_rate": 1.7477842323651454e-05, "loss": 0.8504, "step": 7604 }, { "epoch": 6.3112033195020745, "grad_norm": 21.775848388671875, "learning_rate": 1.7477510373443983e-05, "loss": 1.3238, "step": 7605 }, { "epoch": 6.312033195020747, "grad_norm": 13.051237106323242, "learning_rate": 1.7477178423236515e-05, "loss": 0.6786, "step": 7606 }, { "epoch": 6.312863070539419, "grad_norm": 13.020621299743652, "learning_rate": 1.7476846473029047e-05, "loss": 1.0133, "step": 7607 }, { "epoch": 6.313692946058091, "grad_norm": 20.075876235961914, "learning_rate": 1.7476514522821576e-05, "loss": 0.6454, "step": 7608 }, { "epoch": 6.314522821576763, "grad_norm": 13.941060066223145, "learning_rate": 1.7476182572614108e-05, "loss": 0.9098, "step": 7609 }, { "epoch": 6.3153526970954355, "grad_norm": 16.162965774536133, "learning_rate": 1.747585062240664e-05, "loss": 0.7431, "step": 7610 }, { "epoch": 6.316182572614108, "grad_norm": 16.66385269165039, "learning_rate": 1.7475518672199172e-05, "loss": 1.2637, "step": 7611 }, { "epoch": 6.31701244813278, "grad_norm": 30.465282440185547, "learning_rate": 1.7475186721991704e-05, "loss": 0.6078, "step": 7612 }, { "epoch": 6.317842323651452, "grad_norm": 23.016592025756836, "learning_rate": 1.7474854771784233e-05, "loss": 0.6575, "step": 7613 }, { "epoch": 6.318672199170124, "grad_norm": 35.47492980957031, "learning_rate": 1.7474522821576765e-05, "loss": 1.0482, "step": 7614 }, { "epoch": 6.319502074688796, "grad_norm": 27.681013107299805, "learning_rate": 1.7474190871369297e-05, "loss": 0.9153, "step": 7615 }, { "epoch": 6.320331950207469, "grad_norm": 20.196495056152344, "learning_rate": 1.747385892116183e-05, "loss": 0.9106, "step": 7616 }, { "epoch": 6.321161825726141, "grad_norm": 15.87647819519043, "learning_rate": 1.7473526970954358e-05, "loss": 0.8982, "step": 7617 }, { "epoch": 6.321991701244813, "grad_norm": 13.661940574645996, "learning_rate": 1.747319502074689e-05, "loss": 0.6337, "step": 7618 }, { "epoch": 6.322821576763485, "grad_norm": 15.720508575439453, "learning_rate": 1.7472863070539422e-05, "loss": 1.1315, "step": 7619 }, { "epoch": 6.323651452282157, "grad_norm": 17.286218643188477, "learning_rate": 1.7472531120331954e-05, "loss": 1.2031, "step": 7620 }, { "epoch": 6.32448132780083, "grad_norm": 9.284567832946777, "learning_rate": 1.7472199170124483e-05, "loss": 0.5387, "step": 7621 }, { "epoch": 6.325311203319502, "grad_norm": 30.209548950195312, "learning_rate": 1.7471867219917015e-05, "loss": 1.9365, "step": 7622 }, { "epoch": 6.326141078838174, "grad_norm": 30.947708129882812, "learning_rate": 1.7471535269709544e-05, "loss": 1.4572, "step": 7623 }, { "epoch": 6.326970954356846, "grad_norm": 13.988222122192383, "learning_rate": 1.7471203319502076e-05, "loss": 0.7702, "step": 7624 }, { "epoch": 6.327800829875518, "grad_norm": 17.656982421875, "learning_rate": 1.7470871369294608e-05, "loss": 1.2736, "step": 7625 }, { "epoch": 6.328630705394191, "grad_norm": 16.330764770507812, "learning_rate": 1.7470539419087137e-05, "loss": 1.4764, "step": 7626 }, { "epoch": 6.329460580912863, "grad_norm": 28.379636764526367, "learning_rate": 1.747020746887967e-05, "loss": 1.3101, "step": 7627 }, { "epoch": 6.330290456431535, "grad_norm": 36.485069274902344, "learning_rate": 1.74698755186722e-05, "loss": 1.2643, "step": 7628 }, { "epoch": 6.331120331950207, "grad_norm": 25.748315811157227, "learning_rate": 1.746954356846473e-05, "loss": 2.3949, "step": 7629 }, { "epoch": 6.331950207468879, "grad_norm": 31.881187438964844, "learning_rate": 1.746921161825726e-05, "loss": 0.8521, "step": 7630 }, { "epoch": 6.332780082987552, "grad_norm": 18.405349731445312, "learning_rate": 1.7468879668049794e-05, "loss": 1.3332, "step": 7631 }, { "epoch": 6.333609958506224, "grad_norm": 26.242359161376953, "learning_rate": 1.7468547717842326e-05, "loss": 1.8526, "step": 7632 }, { "epoch": 6.334439834024896, "grad_norm": 10.66959285736084, "learning_rate": 1.7468215767634855e-05, "loss": 1.2655, "step": 7633 }, { "epoch": 6.335269709543568, "grad_norm": 12.952720642089844, "learning_rate": 1.7467883817427387e-05, "loss": 0.9957, "step": 7634 }, { "epoch": 6.33609958506224, "grad_norm": 20.235729217529297, "learning_rate": 1.746755186721992e-05, "loss": 0.9213, "step": 7635 }, { "epoch": 6.3369294605809126, "grad_norm": 17.396665573120117, "learning_rate": 1.746721991701245e-05, "loss": 1.3379, "step": 7636 }, { "epoch": 6.337759336099585, "grad_norm": 20.293615341186523, "learning_rate": 1.7466887966804983e-05, "loss": 1.0178, "step": 7637 }, { "epoch": 6.338589211618257, "grad_norm": 16.04298973083496, "learning_rate": 1.7466556016597512e-05, "loss": 0.9107, "step": 7638 }, { "epoch": 6.339419087136929, "grad_norm": 11.575018882751465, "learning_rate": 1.7466224066390044e-05, "loss": 1.0758, "step": 7639 }, { "epoch": 6.340248962655601, "grad_norm": 17.859569549560547, "learning_rate": 1.7465892116182576e-05, "loss": 0.9598, "step": 7640 }, { "epoch": 6.3410788381742735, "grad_norm": 21.394573211669922, "learning_rate": 1.7465560165975105e-05, "loss": 1.7405, "step": 7641 }, { "epoch": 6.341908713692946, "grad_norm": 10.000768661499023, "learning_rate": 1.7465228215767637e-05, "loss": 0.6591, "step": 7642 }, { "epoch": 6.342738589211618, "grad_norm": 10.726304054260254, "learning_rate": 1.746489626556017e-05, "loss": 0.9012, "step": 7643 }, { "epoch": 6.34356846473029, "grad_norm": 22.791322708129883, "learning_rate": 1.7464564315352698e-05, "loss": 1.039, "step": 7644 }, { "epoch": 6.344398340248962, "grad_norm": 19.55134391784668, "learning_rate": 1.746423236514523e-05, "loss": 0.9919, "step": 7645 }, { "epoch": 6.3452282157676345, "grad_norm": 26.668964385986328, "learning_rate": 1.746390041493776e-05, "loss": 0.7184, "step": 7646 }, { "epoch": 6.346058091286307, "grad_norm": 18.99489974975586, "learning_rate": 1.746356846473029e-05, "loss": 1.1016, "step": 7647 }, { "epoch": 6.346887966804979, "grad_norm": 15.084918022155762, "learning_rate": 1.7463236514522823e-05, "loss": 1.554, "step": 7648 }, { "epoch": 6.347717842323651, "grad_norm": 20.980440139770508, "learning_rate": 1.7462904564315355e-05, "loss": 1.1394, "step": 7649 }, { "epoch": 6.348547717842323, "grad_norm": 32.86796951293945, "learning_rate": 1.7462572614107883e-05, "loss": 1.2166, "step": 7650 }, { "epoch": 6.3493775933609955, "grad_norm": 16.927839279174805, "learning_rate": 1.7462240663900416e-05, "loss": 1.3877, "step": 7651 }, { "epoch": 6.350207468879668, "grad_norm": 13.862161636352539, "learning_rate": 1.7461908713692948e-05, "loss": 1.4332, "step": 7652 }, { "epoch": 6.35103734439834, "grad_norm": 43.57988739013672, "learning_rate": 1.746157676348548e-05, "loss": 1.3186, "step": 7653 }, { "epoch": 6.351867219917012, "grad_norm": 18.201250076293945, "learning_rate": 1.746124481327801e-05, "loss": 1.1904, "step": 7654 }, { "epoch": 6.352697095435684, "grad_norm": 13.177973747253418, "learning_rate": 1.746091286307054e-05, "loss": 0.7142, "step": 7655 }, { "epoch": 6.3535269709543565, "grad_norm": 17.03507423400879, "learning_rate": 1.7460580912863073e-05, "loss": 0.8474, "step": 7656 }, { "epoch": 6.354356846473029, "grad_norm": 28.715682983398438, "learning_rate": 1.7460248962655605e-05, "loss": 1.6049, "step": 7657 }, { "epoch": 6.355186721991701, "grad_norm": 15.36640453338623, "learning_rate": 1.7459917012448134e-05, "loss": 0.5981, "step": 7658 }, { "epoch": 6.356016597510373, "grad_norm": 27.240222930908203, "learning_rate": 1.7459585062240666e-05, "loss": 1.3106, "step": 7659 }, { "epoch": 6.356846473029045, "grad_norm": 24.824106216430664, "learning_rate": 1.7459253112033198e-05, "loss": 1.9348, "step": 7660 }, { "epoch": 6.3576763485477175, "grad_norm": 19.066442489624023, "learning_rate": 1.7458921161825727e-05, "loss": 1.1191, "step": 7661 }, { "epoch": 6.35850622406639, "grad_norm": 25.82100486755371, "learning_rate": 1.745858921161826e-05, "loss": 1.3861, "step": 7662 }, { "epoch": 6.359336099585062, "grad_norm": 12.435738563537598, "learning_rate": 1.745825726141079e-05, "loss": 1.0304, "step": 7663 }, { "epoch": 6.360165975103734, "grad_norm": 27.30099868774414, "learning_rate": 1.745792531120332e-05, "loss": 1.9318, "step": 7664 }, { "epoch": 6.360995850622406, "grad_norm": 24.924015045166016, "learning_rate": 1.745759336099585e-05, "loss": 1.2428, "step": 7665 }, { "epoch": 6.361825726141078, "grad_norm": 28.592519760131836, "learning_rate": 1.7457261410788384e-05, "loss": 1.0803, "step": 7666 }, { "epoch": 6.362655601659751, "grad_norm": 13.16184139251709, "learning_rate": 1.7456929460580912e-05, "loss": 1.3008, "step": 7667 }, { "epoch": 6.363485477178423, "grad_norm": 20.824888229370117, "learning_rate": 1.7456597510373444e-05, "loss": 0.9328, "step": 7668 }, { "epoch": 6.364315352697095, "grad_norm": 21.49787712097168, "learning_rate": 1.7456265560165977e-05, "loss": 1.145, "step": 7669 }, { "epoch": 6.365145228215767, "grad_norm": 23.018857955932617, "learning_rate": 1.745593360995851e-05, "loss": 1.2463, "step": 7670 }, { "epoch": 6.365975103734439, "grad_norm": 11.107039451599121, "learning_rate": 1.7455601659751037e-05, "loss": 0.6021, "step": 7671 }, { "epoch": 6.366804979253112, "grad_norm": 39.719478607177734, "learning_rate": 1.745526970954357e-05, "loss": 1.6645, "step": 7672 }, { "epoch": 6.367634854771785, "grad_norm": 14.342501640319824, "learning_rate": 1.74549377593361e-05, "loss": 0.9544, "step": 7673 }, { "epoch": 6.368464730290457, "grad_norm": 15.366959571838379, "learning_rate": 1.7454605809128634e-05, "loss": 1.0561, "step": 7674 }, { "epoch": 6.369294605809129, "grad_norm": 22.827478408813477, "learning_rate": 1.7454273858921162e-05, "loss": 1.2342, "step": 7675 }, { "epoch": 6.370124481327801, "grad_norm": 24.009183883666992, "learning_rate": 1.7453941908713695e-05, "loss": 1.213, "step": 7676 }, { "epoch": 6.3709543568464735, "grad_norm": 35.85163116455078, "learning_rate": 1.7453609958506227e-05, "loss": 1.7377, "step": 7677 }, { "epoch": 6.371784232365146, "grad_norm": 20.13219451904297, "learning_rate": 1.745327800829876e-05, "loss": 1.4178, "step": 7678 }, { "epoch": 6.372614107883818, "grad_norm": 18.0572509765625, "learning_rate": 1.7452946058091288e-05, "loss": 1.2158, "step": 7679 }, { "epoch": 6.37344398340249, "grad_norm": 14.348331451416016, "learning_rate": 1.745261410788382e-05, "loss": 0.9986, "step": 7680 }, { "epoch": 6.374273858921162, "grad_norm": 14.79898738861084, "learning_rate": 1.7452282157676352e-05, "loss": 1.1524, "step": 7681 }, { "epoch": 6.3751037344398345, "grad_norm": 19.8277587890625, "learning_rate": 1.745195020746888e-05, "loss": 1.2443, "step": 7682 }, { "epoch": 6.375933609958507, "grad_norm": 20.010541915893555, "learning_rate": 1.7451618257261413e-05, "loss": 1.4484, "step": 7683 }, { "epoch": 6.376763485477179, "grad_norm": 21.329328536987305, "learning_rate": 1.745128630705394e-05, "loss": 1.2439, "step": 7684 }, { "epoch": 6.377593360995851, "grad_norm": 12.607309341430664, "learning_rate": 1.7450954356846473e-05, "loss": 0.9661, "step": 7685 }, { "epoch": 6.378423236514523, "grad_norm": 32.03825378417969, "learning_rate": 1.7450622406639005e-05, "loss": 1.9672, "step": 7686 }, { "epoch": 6.3792531120331954, "grad_norm": 18.446020126342773, "learning_rate": 1.7450290456431534e-05, "loss": 1.0259, "step": 7687 }, { "epoch": 6.380082987551868, "grad_norm": 54.93814468383789, "learning_rate": 1.7449958506224066e-05, "loss": 1.3067, "step": 7688 }, { "epoch": 6.38091286307054, "grad_norm": 13.323673248291016, "learning_rate": 1.74496265560166e-05, "loss": 1.2836, "step": 7689 }, { "epoch": 6.381742738589212, "grad_norm": 18.583932876586914, "learning_rate": 1.744929460580913e-05, "loss": 1.7648, "step": 7690 }, { "epoch": 6.382572614107884, "grad_norm": 16.736719131469727, "learning_rate": 1.7448962655601663e-05, "loss": 1.3901, "step": 7691 }, { "epoch": 6.383402489626556, "grad_norm": 18.705066680908203, "learning_rate": 1.744863070539419e-05, "loss": 1.3115, "step": 7692 }, { "epoch": 6.384232365145229, "grad_norm": 24.783058166503906, "learning_rate": 1.7448298755186723e-05, "loss": 1.809, "step": 7693 }, { "epoch": 6.385062240663901, "grad_norm": 13.044930458068848, "learning_rate": 1.7447966804979256e-05, "loss": 0.8274, "step": 7694 }, { "epoch": 6.385892116182573, "grad_norm": 21.85962677001953, "learning_rate": 1.7447634854771788e-05, "loss": 1.5987, "step": 7695 }, { "epoch": 6.386721991701245, "grad_norm": 23.63111686706543, "learning_rate": 1.7447302904564316e-05, "loss": 1.1616, "step": 7696 }, { "epoch": 6.387551867219917, "grad_norm": 36.297569274902344, "learning_rate": 1.744697095435685e-05, "loss": 1.2778, "step": 7697 }, { "epoch": 6.38838174273859, "grad_norm": 14.176471710205078, "learning_rate": 1.744663900414938e-05, "loss": 0.9618, "step": 7698 }, { "epoch": 6.389211618257262, "grad_norm": 16.1259765625, "learning_rate": 1.7446307053941913e-05, "loss": 1.3292, "step": 7699 }, { "epoch": 6.390041493775934, "grad_norm": 21.0507869720459, "learning_rate": 1.744597510373444e-05, "loss": 1.3974, "step": 7700 }, { "epoch": 6.390871369294606, "grad_norm": 22.7436466217041, "learning_rate": 1.7445643153526974e-05, "loss": 1.0111, "step": 7701 }, { "epoch": 6.391701244813278, "grad_norm": 24.472362518310547, "learning_rate": 1.7445311203319502e-05, "loss": 1.6315, "step": 7702 }, { "epoch": 6.392531120331951, "grad_norm": 15.973488807678223, "learning_rate": 1.7444979253112034e-05, "loss": 1.446, "step": 7703 }, { "epoch": 6.393360995850623, "grad_norm": 22.636680603027344, "learning_rate": 1.7444647302904566e-05, "loss": 1.6725, "step": 7704 }, { "epoch": 6.394190871369295, "grad_norm": 16.952747344970703, "learning_rate": 1.7444315352697095e-05, "loss": 1.1565, "step": 7705 }, { "epoch": 6.395020746887967, "grad_norm": 13.701515197753906, "learning_rate": 1.7443983402489627e-05, "loss": 0.8463, "step": 7706 }, { "epoch": 6.395850622406639, "grad_norm": 9.205513000488281, "learning_rate": 1.744365145228216e-05, "loss": 0.498, "step": 7707 }, { "epoch": 6.396680497925312, "grad_norm": 20.32866096496582, "learning_rate": 1.7443319502074688e-05, "loss": 1.4251, "step": 7708 }, { "epoch": 6.397510373443984, "grad_norm": 14.093331336975098, "learning_rate": 1.744298755186722e-05, "loss": 0.881, "step": 7709 }, { "epoch": 6.398340248962656, "grad_norm": 15.958992958068848, "learning_rate": 1.7442655601659752e-05, "loss": 1.6985, "step": 7710 }, { "epoch": 6.399170124481328, "grad_norm": 18.597370147705078, "learning_rate": 1.7442323651452284e-05, "loss": 1.5279, "step": 7711 }, { "epoch": 6.4, "grad_norm": 11.939080238342285, "learning_rate": 1.7441991701244813e-05, "loss": 1.2049, "step": 7712 }, { "epoch": 6.4008298755186726, "grad_norm": 19.064945220947266, "learning_rate": 1.7441659751037345e-05, "loss": 1.4626, "step": 7713 }, { "epoch": 6.401659751037345, "grad_norm": 11.699700355529785, "learning_rate": 1.7441327800829877e-05, "loss": 0.8892, "step": 7714 }, { "epoch": 6.402489626556017, "grad_norm": 14.232871055603027, "learning_rate": 1.744099585062241e-05, "loss": 1.1183, "step": 7715 }, { "epoch": 6.403319502074689, "grad_norm": 24.863069534301758, "learning_rate": 1.744066390041494e-05, "loss": 1.5697, "step": 7716 }, { "epoch": 6.404149377593361, "grad_norm": 23.61874771118164, "learning_rate": 1.744033195020747e-05, "loss": 1.9406, "step": 7717 }, { "epoch": 6.4049792531120335, "grad_norm": 23.607751846313477, "learning_rate": 1.7440000000000002e-05, "loss": 2.1023, "step": 7718 }, { "epoch": 6.405809128630706, "grad_norm": 26.786746978759766, "learning_rate": 1.7439668049792535e-05, "loss": 0.911, "step": 7719 }, { "epoch": 6.406639004149378, "grad_norm": 23.36604118347168, "learning_rate": 1.7439336099585063e-05, "loss": 1.4892, "step": 7720 }, { "epoch": 6.40746887966805, "grad_norm": 16.813478469848633, "learning_rate": 1.7439004149377595e-05, "loss": 1.082, "step": 7721 }, { "epoch": 6.408298755186722, "grad_norm": 19.268997192382812, "learning_rate": 1.7438672199170124e-05, "loss": 0.9145, "step": 7722 }, { "epoch": 6.4091286307053945, "grad_norm": 24.77747344970703, "learning_rate": 1.7438340248962656e-05, "loss": 1.1932, "step": 7723 }, { "epoch": 6.409958506224067, "grad_norm": 22.30681610107422, "learning_rate": 1.7438008298755188e-05, "loss": 1.5153, "step": 7724 }, { "epoch": 6.410788381742739, "grad_norm": 17.155668258666992, "learning_rate": 1.7437676348547717e-05, "loss": 1.4534, "step": 7725 }, { "epoch": 6.411618257261411, "grad_norm": 22.031742095947266, "learning_rate": 1.743734439834025e-05, "loss": 1.3862, "step": 7726 }, { "epoch": 6.412448132780083, "grad_norm": 15.097495079040527, "learning_rate": 1.743701244813278e-05, "loss": 1.4132, "step": 7727 }, { "epoch": 6.4132780082987555, "grad_norm": 15.269611358642578, "learning_rate": 1.7436680497925313e-05, "loss": 0.7865, "step": 7728 }, { "epoch": 6.414107883817428, "grad_norm": 27.392850875854492, "learning_rate": 1.7436348547717842e-05, "loss": 1.2644, "step": 7729 }, { "epoch": 6.4149377593361, "grad_norm": 27.483478546142578, "learning_rate": 1.7436016597510374e-05, "loss": 1.0362, "step": 7730 }, { "epoch": 6.415767634854772, "grad_norm": 15.164435386657715, "learning_rate": 1.7435684647302906e-05, "loss": 1.2183, "step": 7731 }, { "epoch": 6.416597510373444, "grad_norm": 21.302518844604492, "learning_rate": 1.743535269709544e-05, "loss": 1.3519, "step": 7732 }, { "epoch": 6.4174273858921165, "grad_norm": 19.681171417236328, "learning_rate": 1.7435020746887967e-05, "loss": 1.3693, "step": 7733 }, { "epoch": 6.418257261410789, "grad_norm": 17.10175895690918, "learning_rate": 1.74346887966805e-05, "loss": 1.5306, "step": 7734 }, { "epoch": 6.419087136929461, "grad_norm": 14.053706169128418, "learning_rate": 1.743435684647303e-05, "loss": 0.8176, "step": 7735 }, { "epoch": 6.419917012448133, "grad_norm": 12.841574668884277, "learning_rate": 1.7434024896265563e-05, "loss": 1.0662, "step": 7736 }, { "epoch": 6.420746887966805, "grad_norm": 17.23958396911621, "learning_rate": 1.7433692946058092e-05, "loss": 1.4477, "step": 7737 }, { "epoch": 6.4215767634854775, "grad_norm": 12.340383529663086, "learning_rate": 1.7433360995850624e-05, "loss": 0.5405, "step": 7738 }, { "epoch": 6.42240663900415, "grad_norm": 17.655176162719727, "learning_rate": 1.7433029045643156e-05, "loss": 1.9774, "step": 7739 }, { "epoch": 6.423236514522822, "grad_norm": 20.016279220581055, "learning_rate": 1.7432697095435685e-05, "loss": 1.2836, "step": 7740 }, { "epoch": 6.424066390041494, "grad_norm": 24.7131290435791, "learning_rate": 1.7432365145228217e-05, "loss": 0.9167, "step": 7741 }, { "epoch": 6.424896265560166, "grad_norm": 12.360593795776367, "learning_rate": 1.743203319502075e-05, "loss": 0.5033, "step": 7742 }, { "epoch": 6.425726141078838, "grad_norm": 20.016870498657227, "learning_rate": 1.7431701244813278e-05, "loss": 1.9119, "step": 7743 }, { "epoch": 6.426556016597511, "grad_norm": 13.71534538269043, "learning_rate": 1.743136929460581e-05, "loss": 1.0399, "step": 7744 }, { "epoch": 6.427385892116183, "grad_norm": 26.249216079711914, "learning_rate": 1.7431037344398342e-05, "loss": 1.8125, "step": 7745 }, { "epoch": 6.428215767634855, "grad_norm": 27.381023406982422, "learning_rate": 1.743070539419087e-05, "loss": 1.8674, "step": 7746 }, { "epoch": 6.429045643153527, "grad_norm": 12.883930206298828, "learning_rate": 1.7430373443983403e-05, "loss": 1.176, "step": 7747 }, { "epoch": 6.429875518672199, "grad_norm": 11.362692832946777, "learning_rate": 1.7430041493775935e-05, "loss": 0.865, "step": 7748 }, { "epoch": 6.430705394190872, "grad_norm": 24.592208862304688, "learning_rate": 1.7429709543568467e-05, "loss": 1.6642, "step": 7749 }, { "epoch": 6.431535269709544, "grad_norm": 15.671830177307129, "learning_rate": 1.7429377593360996e-05, "loss": 1.2155, "step": 7750 }, { "epoch": 6.432365145228216, "grad_norm": 30.36592674255371, "learning_rate": 1.7429045643153528e-05, "loss": 1.0206, "step": 7751 }, { "epoch": 6.433195020746888, "grad_norm": 18.131803512573242, "learning_rate": 1.742871369294606e-05, "loss": 1.462, "step": 7752 }, { "epoch": 6.43402489626556, "grad_norm": 20.49419403076172, "learning_rate": 1.7428381742738592e-05, "loss": 1.6127, "step": 7753 }, { "epoch": 6.434854771784233, "grad_norm": 16.994596481323242, "learning_rate": 1.742804979253112e-05, "loss": 1.2183, "step": 7754 }, { "epoch": 6.435684647302905, "grad_norm": 24.76921272277832, "learning_rate": 1.7427717842323653e-05, "loss": 0.7047, "step": 7755 }, { "epoch": 6.436514522821577, "grad_norm": 21.407001495361328, "learning_rate": 1.7427385892116185e-05, "loss": 1.1981, "step": 7756 }, { "epoch": 6.437344398340249, "grad_norm": 10.75798511505127, "learning_rate": 1.7427053941908717e-05, "loss": 0.4297, "step": 7757 }, { "epoch": 6.438174273858921, "grad_norm": 15.231673240661621, "learning_rate": 1.7426721991701246e-05, "loss": 1.5431, "step": 7758 }, { "epoch": 6.439004149377594, "grad_norm": 23.2497615814209, "learning_rate": 1.7426390041493778e-05, "loss": 1.1725, "step": 7759 }, { "epoch": 6.439834024896266, "grad_norm": 11.391206741333008, "learning_rate": 1.742605809128631e-05, "loss": 1.1051, "step": 7760 }, { "epoch": 6.440663900414938, "grad_norm": 19.605224609375, "learning_rate": 1.742572614107884e-05, "loss": 0.9544, "step": 7761 }, { "epoch": 6.44149377593361, "grad_norm": 27.296659469604492, "learning_rate": 1.742539419087137e-05, "loss": 1.5383, "step": 7762 }, { "epoch": 6.442323651452282, "grad_norm": 20.578712463378906, "learning_rate": 1.74250622406639e-05, "loss": 1.7326, "step": 7763 }, { "epoch": 6.443153526970955, "grad_norm": 20.91423225402832, "learning_rate": 1.7424730290456432e-05, "loss": 1.0808, "step": 7764 }, { "epoch": 6.443983402489627, "grad_norm": 16.972349166870117, "learning_rate": 1.7424398340248964e-05, "loss": 1.1973, "step": 7765 }, { "epoch": 6.444813278008299, "grad_norm": 12.943840980529785, "learning_rate": 1.7424066390041493e-05, "loss": 0.537, "step": 7766 }, { "epoch": 6.445643153526971, "grad_norm": 30.00069808959961, "learning_rate": 1.7423734439834025e-05, "loss": 1.2052, "step": 7767 }, { "epoch": 6.446473029045643, "grad_norm": 17.533761978149414, "learning_rate": 1.7423402489626557e-05, "loss": 1.0638, "step": 7768 }, { "epoch": 6.4473029045643155, "grad_norm": 20.841796875, "learning_rate": 1.742307053941909e-05, "loss": 1.4985, "step": 7769 }, { "epoch": 6.448132780082988, "grad_norm": 13.818970680236816, "learning_rate": 1.742273858921162e-05, "loss": 0.8647, "step": 7770 }, { "epoch": 6.44896265560166, "grad_norm": 12.370536804199219, "learning_rate": 1.742240663900415e-05, "loss": 0.9639, "step": 7771 }, { "epoch": 6.449792531120332, "grad_norm": 19.108434677124023, "learning_rate": 1.7422074688796682e-05, "loss": 1.3831, "step": 7772 }, { "epoch": 6.450622406639004, "grad_norm": 17.354969024658203, "learning_rate": 1.7421742738589214e-05, "loss": 1.3246, "step": 7773 }, { "epoch": 6.4514522821576765, "grad_norm": 20.911405563354492, "learning_rate": 1.7421410788381746e-05, "loss": 1.1876, "step": 7774 }, { "epoch": 6.452282157676349, "grad_norm": 26.133956909179688, "learning_rate": 1.7421078838174275e-05, "loss": 1.8176, "step": 7775 }, { "epoch": 6.453112033195021, "grad_norm": 29.467370986938477, "learning_rate": 1.7420746887966807e-05, "loss": 1.5603, "step": 7776 }, { "epoch": 6.453941908713693, "grad_norm": 43.30150604248047, "learning_rate": 1.742041493775934e-05, "loss": 0.6147, "step": 7777 }, { "epoch": 6.454771784232365, "grad_norm": 20.681852340698242, "learning_rate": 1.7420082987551868e-05, "loss": 1.3123, "step": 7778 }, { "epoch": 6.4556016597510375, "grad_norm": 21.489099502563477, "learning_rate": 1.74197510373444e-05, "loss": 1.4557, "step": 7779 }, { "epoch": 6.45643153526971, "grad_norm": 23.980073928833008, "learning_rate": 1.7419419087136932e-05, "loss": 1.8381, "step": 7780 }, { "epoch": 6.457261410788382, "grad_norm": 23.5744571685791, "learning_rate": 1.741908713692946e-05, "loss": 1.089, "step": 7781 }, { "epoch": 6.458091286307054, "grad_norm": 27.557235717773438, "learning_rate": 1.7418755186721993e-05, "loss": 1.4129, "step": 7782 }, { "epoch": 6.458921161825726, "grad_norm": 14.85185718536377, "learning_rate": 1.7418423236514525e-05, "loss": 1.1714, "step": 7783 }, { "epoch": 6.4597510373443985, "grad_norm": 50.23826217651367, "learning_rate": 1.7418091286307054e-05, "loss": 1.7363, "step": 7784 }, { "epoch": 6.460580912863071, "grad_norm": 14.930389404296875, "learning_rate": 1.7417759336099586e-05, "loss": 1.3034, "step": 7785 }, { "epoch": 6.461410788381743, "grad_norm": 18.293415069580078, "learning_rate": 1.7417427385892118e-05, "loss": 0.947, "step": 7786 }, { "epoch": 6.462240663900415, "grad_norm": 18.55876350402832, "learning_rate": 1.7417095435684647e-05, "loss": 1.4787, "step": 7787 }, { "epoch": 6.463070539419087, "grad_norm": 23.136045455932617, "learning_rate": 1.741676348547718e-05, "loss": 0.7533, "step": 7788 }, { "epoch": 6.4639004149377595, "grad_norm": 15.338878631591797, "learning_rate": 1.741643153526971e-05, "loss": 1.1302, "step": 7789 }, { "epoch": 6.464730290456432, "grad_norm": 21.918106079101562, "learning_rate": 1.7416099585062243e-05, "loss": 1.3846, "step": 7790 }, { "epoch": 6.465560165975104, "grad_norm": 15.528098106384277, "learning_rate": 1.7415767634854772e-05, "loss": 1.0553, "step": 7791 }, { "epoch": 6.466390041493776, "grad_norm": 11.411590576171875, "learning_rate": 1.7415435684647304e-05, "loss": 0.8777, "step": 7792 }, { "epoch": 6.467219917012448, "grad_norm": 13.435050964355469, "learning_rate": 1.7415103734439836e-05, "loss": 1.2905, "step": 7793 }, { "epoch": 6.4680497925311204, "grad_norm": 21.235532760620117, "learning_rate": 1.7414771784232368e-05, "loss": 1.5472, "step": 7794 }, { "epoch": 6.468879668049793, "grad_norm": 15.134590148925781, "learning_rate": 1.74144398340249e-05, "loss": 1.3442, "step": 7795 }, { "epoch": 6.469709543568465, "grad_norm": 23.31841278076172, "learning_rate": 1.741410788381743e-05, "loss": 0.8173, "step": 7796 }, { "epoch": 6.470539419087137, "grad_norm": 27.012292861938477, "learning_rate": 1.741377593360996e-05, "loss": 0.9887, "step": 7797 }, { "epoch": 6.471369294605809, "grad_norm": 20.58364486694336, "learning_rate": 1.7413443983402493e-05, "loss": 2.0454, "step": 7798 }, { "epoch": 6.472199170124481, "grad_norm": 15.413619995117188, "learning_rate": 1.7413112033195022e-05, "loss": 0.8063, "step": 7799 }, { "epoch": 6.473029045643154, "grad_norm": 20.085617065429688, "learning_rate": 1.7412780082987554e-05, "loss": 1.4155, "step": 7800 }, { "epoch": 6.473858921161826, "grad_norm": 18.432415008544922, "learning_rate": 1.7412448132780083e-05, "loss": 1.0988, "step": 7801 }, { "epoch": 6.474688796680498, "grad_norm": 26.670026779174805, "learning_rate": 1.7412116182572615e-05, "loss": 1.722, "step": 7802 }, { "epoch": 6.47551867219917, "grad_norm": 21.000890731811523, "learning_rate": 1.7411784232365147e-05, "loss": 1.6444, "step": 7803 }, { "epoch": 6.476348547717842, "grad_norm": 27.05122184753418, "learning_rate": 1.7411452282157676e-05, "loss": 1.5238, "step": 7804 }, { "epoch": 6.477178423236515, "grad_norm": 20.64327621459961, "learning_rate": 1.7411120331950208e-05, "loss": 1.8344, "step": 7805 }, { "epoch": 6.478008298755187, "grad_norm": 26.984006881713867, "learning_rate": 1.741078838174274e-05, "loss": 1.2157, "step": 7806 }, { "epoch": 6.478838174273859, "grad_norm": 19.164152145385742, "learning_rate": 1.7410456431535272e-05, "loss": 1.1822, "step": 7807 }, { "epoch": 6.479668049792531, "grad_norm": 14.372171401977539, "learning_rate": 1.74101244813278e-05, "loss": 1.1959, "step": 7808 }, { "epoch": 6.480497925311203, "grad_norm": 30.51333236694336, "learning_rate": 1.7409792531120333e-05, "loss": 2.0567, "step": 7809 }, { "epoch": 6.481327800829876, "grad_norm": 13.986751556396484, "learning_rate": 1.7409460580912865e-05, "loss": 0.6271, "step": 7810 }, { "epoch": 6.482157676348548, "grad_norm": 17.80008316040039, "learning_rate": 1.7409128630705397e-05, "loss": 1.0631, "step": 7811 }, { "epoch": 6.48298755186722, "grad_norm": 24.096647262573242, "learning_rate": 1.7408796680497926e-05, "loss": 0.9484, "step": 7812 }, { "epoch": 6.483817427385892, "grad_norm": 23.60335922241211, "learning_rate": 1.7408464730290458e-05, "loss": 1.0554, "step": 7813 }, { "epoch": 6.484647302904564, "grad_norm": 15.222533226013184, "learning_rate": 1.740813278008299e-05, "loss": 0.8763, "step": 7814 }, { "epoch": 6.485477178423237, "grad_norm": 19.48080062866211, "learning_rate": 1.7407800829875522e-05, "loss": 1.1615, "step": 7815 }, { "epoch": 6.486307053941909, "grad_norm": 10.15544605255127, "learning_rate": 1.740746887966805e-05, "loss": 0.9449, "step": 7816 }, { "epoch": 6.487136929460581, "grad_norm": 18.40871238708496, "learning_rate": 1.7407136929460583e-05, "loss": 1.6716, "step": 7817 }, { "epoch": 6.487966804979253, "grad_norm": 32.236087799072266, "learning_rate": 1.7406804979253115e-05, "loss": 2.5176, "step": 7818 }, { "epoch": 6.488796680497925, "grad_norm": 29.36351203918457, "learning_rate": 1.7406473029045644e-05, "loss": 1.6258, "step": 7819 }, { "epoch": 6.4896265560165975, "grad_norm": 15.364218711853027, "learning_rate": 1.7406141078838176e-05, "loss": 1.2268, "step": 7820 }, { "epoch": 6.49045643153527, "grad_norm": 19.09854507446289, "learning_rate": 1.7405809128630708e-05, "loss": 1.3779, "step": 7821 }, { "epoch": 6.491286307053942, "grad_norm": 16.07976531982422, "learning_rate": 1.7405477178423237e-05, "loss": 1.554, "step": 7822 }, { "epoch": 6.492116182572614, "grad_norm": 15.762815475463867, "learning_rate": 1.740514522821577e-05, "loss": 1.2449, "step": 7823 }, { "epoch": 6.492946058091286, "grad_norm": 18.27437400817871, "learning_rate": 1.74048132780083e-05, "loss": 1.3102, "step": 7824 }, { "epoch": 6.4937759336099585, "grad_norm": 17.027729034423828, "learning_rate": 1.740448132780083e-05, "loss": 0.7077, "step": 7825 }, { "epoch": 6.494605809128631, "grad_norm": 15.809619903564453, "learning_rate": 1.740414937759336e-05, "loss": 1.5463, "step": 7826 }, { "epoch": 6.495435684647303, "grad_norm": 34.757049560546875, "learning_rate": 1.7403817427385894e-05, "loss": 1.0222, "step": 7827 }, { "epoch": 6.496265560165975, "grad_norm": 19.58380699157715, "learning_rate": 1.7403485477178426e-05, "loss": 1.6998, "step": 7828 }, { "epoch": 6.497095435684647, "grad_norm": 22.66246223449707, "learning_rate": 1.7403153526970955e-05, "loss": 1.265, "step": 7829 }, { "epoch": 6.4979253112033195, "grad_norm": 26.299659729003906, "learning_rate": 1.7402821576763487e-05, "loss": 1.2221, "step": 7830 }, { "epoch": 6.498755186721992, "grad_norm": 39.461334228515625, "learning_rate": 1.740248962655602e-05, "loss": 1.1367, "step": 7831 }, { "epoch": 6.499585062240664, "grad_norm": 13.784341812133789, "learning_rate": 1.740215767634855e-05, "loss": 0.7923, "step": 7832 }, { "epoch": 6.500414937759336, "grad_norm": 19.029659271240234, "learning_rate": 1.740182572614108e-05, "loss": 1.1296, "step": 7833 }, { "epoch": 6.501244813278008, "grad_norm": 19.20414161682129, "learning_rate": 1.7401493775933612e-05, "loss": 1.1164, "step": 7834 }, { "epoch": 6.5020746887966805, "grad_norm": 11.993654251098633, "learning_rate": 1.7401161825726144e-05, "loss": 0.7257, "step": 7835 }, { "epoch": 6.502904564315353, "grad_norm": 12.943391799926758, "learning_rate": 1.7400829875518676e-05, "loss": 0.9206, "step": 7836 }, { "epoch": 6.503734439834025, "grad_norm": 19.26972770690918, "learning_rate": 1.7400497925311205e-05, "loss": 1.1837, "step": 7837 }, { "epoch": 6.504564315352697, "grad_norm": 28.422698974609375, "learning_rate": 1.7400165975103737e-05, "loss": 1.789, "step": 7838 }, { "epoch": 6.505394190871369, "grad_norm": 23.936433792114258, "learning_rate": 1.7399834024896265e-05, "loss": 1.1057, "step": 7839 }, { "epoch": 6.5062240663900415, "grad_norm": 28.511404037475586, "learning_rate": 1.7399502074688798e-05, "loss": 2.1059, "step": 7840 }, { "epoch": 6.507053941908714, "grad_norm": 15.924304962158203, "learning_rate": 1.739917012448133e-05, "loss": 1.0778, "step": 7841 }, { "epoch": 6.507883817427386, "grad_norm": 16.70020294189453, "learning_rate": 1.739883817427386e-05, "loss": 1.1507, "step": 7842 }, { "epoch": 6.508713692946058, "grad_norm": 33.47677993774414, "learning_rate": 1.739850622406639e-05, "loss": 0.9906, "step": 7843 }, { "epoch": 6.50954356846473, "grad_norm": 20.999380111694336, "learning_rate": 1.7398174273858923e-05, "loss": 1.5243, "step": 7844 }, { "epoch": 6.5103734439834025, "grad_norm": 26.11970329284668, "learning_rate": 1.739784232365145e-05, "loss": 1.0236, "step": 7845 }, { "epoch": 6.511203319502075, "grad_norm": 21.09021759033203, "learning_rate": 1.7397510373443983e-05, "loss": 1.4388, "step": 7846 }, { "epoch": 6.512033195020747, "grad_norm": 18.920888900756836, "learning_rate": 1.7397178423236516e-05, "loss": 1.4372, "step": 7847 }, { "epoch": 6.512863070539419, "grad_norm": 15.149566650390625, "learning_rate": 1.7396846473029048e-05, "loss": 1.025, "step": 7848 }, { "epoch": 6.513692946058091, "grad_norm": 16.20283317565918, "learning_rate": 1.739651452282158e-05, "loss": 1.11, "step": 7849 }, { "epoch": 6.514522821576763, "grad_norm": 24.856008529663086, "learning_rate": 1.739618257261411e-05, "loss": 1.5858, "step": 7850 }, { "epoch": 6.515352697095436, "grad_norm": 16.877099990844727, "learning_rate": 1.739585062240664e-05, "loss": 1.2181, "step": 7851 }, { "epoch": 6.516182572614108, "grad_norm": 15.666220664978027, "learning_rate": 1.7395518672199173e-05, "loss": 1.2413, "step": 7852 }, { "epoch": 6.51701244813278, "grad_norm": 16.86266326904297, "learning_rate": 1.7395186721991705e-05, "loss": 1.5621, "step": 7853 }, { "epoch": 6.517842323651452, "grad_norm": 14.596012115478516, "learning_rate": 1.7394854771784234e-05, "loss": 1.2906, "step": 7854 }, { "epoch": 6.518672199170124, "grad_norm": 35.34034729003906, "learning_rate": 1.7394522821576766e-05, "loss": 1.0184, "step": 7855 }, { "epoch": 6.519502074688797, "grad_norm": 21.94422721862793, "learning_rate": 1.7394190871369298e-05, "loss": 0.8799, "step": 7856 }, { "epoch": 6.520331950207469, "grad_norm": 14.237483024597168, "learning_rate": 1.7393858921161826e-05, "loss": 0.8479, "step": 7857 }, { "epoch": 6.521161825726141, "grad_norm": 14.995660781860352, "learning_rate": 1.739352697095436e-05, "loss": 1.5372, "step": 7858 }, { "epoch": 6.521991701244813, "grad_norm": 15.749707221984863, "learning_rate": 1.739319502074689e-05, "loss": 1.6508, "step": 7859 }, { "epoch": 6.522821576763485, "grad_norm": 25.571460723876953, "learning_rate": 1.739286307053942e-05, "loss": 1.7922, "step": 7860 }, { "epoch": 6.523651452282158, "grad_norm": 18.404939651489258, "learning_rate": 1.739253112033195e-05, "loss": 1.2047, "step": 7861 }, { "epoch": 6.52448132780083, "grad_norm": 21.16774559020996, "learning_rate": 1.739219917012448e-05, "loss": 1.1105, "step": 7862 }, { "epoch": 6.525311203319502, "grad_norm": 11.731226921081543, "learning_rate": 1.7391867219917012e-05, "loss": 1.0931, "step": 7863 }, { "epoch": 6.526141078838174, "grad_norm": 20.67555809020996, "learning_rate": 1.7391535269709544e-05, "loss": 1.0013, "step": 7864 }, { "epoch": 6.526970954356846, "grad_norm": 18.362754821777344, "learning_rate": 1.7391203319502077e-05, "loss": 0.9436, "step": 7865 }, { "epoch": 6.527800829875519, "grad_norm": 23.593732833862305, "learning_rate": 1.7390871369294605e-05, "loss": 1.6349, "step": 7866 }, { "epoch": 6.528630705394191, "grad_norm": 12.64046859741211, "learning_rate": 1.7390539419087137e-05, "loss": 1.1109, "step": 7867 }, { "epoch": 6.529460580912863, "grad_norm": 17.540130615234375, "learning_rate": 1.739020746887967e-05, "loss": 1.3763, "step": 7868 }, { "epoch": 6.530290456431535, "grad_norm": 22.2153263092041, "learning_rate": 1.73898755186722e-05, "loss": 1.118, "step": 7869 }, { "epoch": 6.531120331950207, "grad_norm": 21.721925735473633, "learning_rate": 1.738954356846473e-05, "loss": 1.0565, "step": 7870 }, { "epoch": 6.5319502074688796, "grad_norm": 23.75509262084961, "learning_rate": 1.7389211618257262e-05, "loss": 1.3462, "step": 7871 }, { "epoch": 6.532780082987552, "grad_norm": 20.421701431274414, "learning_rate": 1.7388879668049795e-05, "loss": 1.3441, "step": 7872 }, { "epoch": 6.533609958506224, "grad_norm": 19.206159591674805, "learning_rate": 1.7388547717842327e-05, "loss": 1.1292, "step": 7873 }, { "epoch": 6.534439834024896, "grad_norm": 16.84747314453125, "learning_rate": 1.738821576763486e-05, "loss": 1.3063, "step": 7874 }, { "epoch": 6.535269709543568, "grad_norm": 22.414613723754883, "learning_rate": 1.7387883817427387e-05, "loss": 1.477, "step": 7875 }, { "epoch": 6.5360995850622405, "grad_norm": 15.68396282196045, "learning_rate": 1.738755186721992e-05, "loss": 1.2563, "step": 7876 }, { "epoch": 6.536929460580913, "grad_norm": 16.40544891357422, "learning_rate": 1.738721991701245e-05, "loss": 1.2449, "step": 7877 }, { "epoch": 6.537759336099585, "grad_norm": 20.626436233520508, "learning_rate": 1.738688796680498e-05, "loss": 0.9846, "step": 7878 }, { "epoch": 6.538589211618257, "grad_norm": 20.18192481994629, "learning_rate": 1.7386556016597513e-05, "loss": 1.5967, "step": 7879 }, { "epoch": 6.539419087136929, "grad_norm": 29.083881378173828, "learning_rate": 1.738622406639004e-05, "loss": 1.5652, "step": 7880 }, { "epoch": 6.5402489626556015, "grad_norm": 24.198562622070312, "learning_rate": 1.7385892116182573e-05, "loss": 2.1439, "step": 7881 }, { "epoch": 6.541078838174274, "grad_norm": 29.161808013916016, "learning_rate": 1.7385560165975105e-05, "loss": 1.2679, "step": 7882 }, { "epoch": 6.541908713692946, "grad_norm": 17.747543334960938, "learning_rate": 1.7385228215767634e-05, "loss": 1.6221, "step": 7883 }, { "epoch": 6.542738589211618, "grad_norm": 32.10588455200195, "learning_rate": 1.7384896265560166e-05, "loss": 1.1749, "step": 7884 }, { "epoch": 6.54356846473029, "grad_norm": 22.94249725341797, "learning_rate": 1.73845643153527e-05, "loss": 1.9464, "step": 7885 }, { "epoch": 6.5443983402489625, "grad_norm": 14.556571960449219, "learning_rate": 1.738423236514523e-05, "loss": 1.0961, "step": 7886 }, { "epoch": 6.545228215767635, "grad_norm": 13.221028327941895, "learning_rate": 1.738390041493776e-05, "loss": 0.8996, "step": 7887 }, { "epoch": 6.546058091286307, "grad_norm": 28.7424373626709, "learning_rate": 1.738356846473029e-05, "loss": 2.6782, "step": 7888 }, { "epoch": 6.546887966804979, "grad_norm": 22.78950309753418, "learning_rate": 1.7383236514522823e-05, "loss": 1.3012, "step": 7889 }, { "epoch": 6.547717842323651, "grad_norm": 27.613801956176758, "learning_rate": 1.7382904564315356e-05, "loss": 1.0615, "step": 7890 }, { "epoch": 6.5485477178423235, "grad_norm": 11.636600494384766, "learning_rate": 1.7382572614107884e-05, "loss": 0.8963, "step": 7891 }, { "epoch": 6.549377593360996, "grad_norm": 19.562379837036133, "learning_rate": 1.7382240663900416e-05, "loss": 1.1646, "step": 7892 }, { "epoch": 6.550207468879668, "grad_norm": 11.906347274780273, "learning_rate": 1.738190871369295e-05, "loss": 0.8584, "step": 7893 }, { "epoch": 6.55103734439834, "grad_norm": 16.95161247253418, "learning_rate": 1.738157676348548e-05, "loss": 1.3111, "step": 7894 }, { "epoch": 6.551867219917012, "grad_norm": 26.973278045654297, "learning_rate": 1.738124481327801e-05, "loss": 1.8964, "step": 7895 }, { "epoch": 6.5526970954356845, "grad_norm": 23.16038703918457, "learning_rate": 1.738091286307054e-05, "loss": 1.4983, "step": 7896 }, { "epoch": 6.553526970954357, "grad_norm": 19.471561431884766, "learning_rate": 1.7380580912863074e-05, "loss": 1.3682, "step": 7897 }, { "epoch": 6.554356846473029, "grad_norm": 27.78094482421875, "learning_rate": 1.7380248962655602e-05, "loss": 1.5397, "step": 7898 }, { "epoch": 6.555186721991701, "grad_norm": 21.586767196655273, "learning_rate": 1.7379917012448134e-05, "loss": 1.0587, "step": 7899 }, { "epoch": 6.556016597510373, "grad_norm": 12.082122802734375, "learning_rate": 1.7379585062240666e-05, "loss": 0.4398, "step": 7900 }, { "epoch": 6.556846473029045, "grad_norm": 14.763150215148926, "learning_rate": 1.7379253112033195e-05, "loss": 1.5525, "step": 7901 }, { "epoch": 6.557676348547718, "grad_norm": 20.556499481201172, "learning_rate": 1.7378921161825727e-05, "loss": 1.1103, "step": 7902 }, { "epoch": 6.55850622406639, "grad_norm": 27.15522575378418, "learning_rate": 1.737858921161826e-05, "loss": 1.3979, "step": 7903 }, { "epoch": 6.559336099585062, "grad_norm": 18.78414535522461, "learning_rate": 1.7378257261410788e-05, "loss": 0.9505, "step": 7904 }, { "epoch": 6.560165975103734, "grad_norm": 13.787810325622559, "learning_rate": 1.737792531120332e-05, "loss": 1.1803, "step": 7905 }, { "epoch": 6.560995850622406, "grad_norm": 13.42776107788086, "learning_rate": 1.7377593360995852e-05, "loss": 1.0018, "step": 7906 }, { "epoch": 6.561825726141079, "grad_norm": 14.098533630371094, "learning_rate": 1.7377261410788384e-05, "loss": 1.0763, "step": 7907 }, { "epoch": 6.562655601659751, "grad_norm": 19.293277740478516, "learning_rate": 1.7376929460580913e-05, "loss": 1.3711, "step": 7908 }, { "epoch": 6.563485477178423, "grad_norm": 19.73581886291504, "learning_rate": 1.7376597510373445e-05, "loss": 1.1743, "step": 7909 }, { "epoch": 6.564315352697095, "grad_norm": 13.62847900390625, "learning_rate": 1.7376265560165977e-05, "loss": 1.3574, "step": 7910 }, { "epoch": 6.565145228215767, "grad_norm": 27.14822006225586, "learning_rate": 1.737593360995851e-05, "loss": 1.1476, "step": 7911 }, { "epoch": 6.56597510373444, "grad_norm": 16.090499877929688, "learning_rate": 1.7375601659751038e-05, "loss": 1.146, "step": 7912 }, { "epoch": 6.566804979253112, "grad_norm": 16.158781051635742, "learning_rate": 1.737526970954357e-05, "loss": 1.4342, "step": 7913 }, { "epoch": 6.567634854771784, "grad_norm": 15.557672500610352, "learning_rate": 1.7374937759336102e-05, "loss": 1.8272, "step": 7914 }, { "epoch": 6.568464730290456, "grad_norm": 31.055971145629883, "learning_rate": 1.7374605809128635e-05, "loss": 1.9931, "step": 7915 }, { "epoch": 6.569294605809128, "grad_norm": 21.032527923583984, "learning_rate": 1.7374273858921163e-05, "loss": 1.7917, "step": 7916 }, { "epoch": 6.570124481327801, "grad_norm": 20.07542610168457, "learning_rate": 1.7373941908713695e-05, "loss": 1.5938, "step": 7917 }, { "epoch": 6.570954356846473, "grad_norm": 18.203094482421875, "learning_rate": 1.7373609958506224e-05, "loss": 0.8651, "step": 7918 }, { "epoch": 6.571784232365145, "grad_norm": 14.589595794677734, "learning_rate": 1.7373278008298756e-05, "loss": 1.5022, "step": 7919 }, { "epoch": 6.572614107883817, "grad_norm": 15.793533325195312, "learning_rate": 1.7372946058091288e-05, "loss": 0.9621, "step": 7920 }, { "epoch": 6.573443983402489, "grad_norm": 20.731050491333008, "learning_rate": 1.7372614107883817e-05, "loss": 1.3864, "step": 7921 }, { "epoch": 6.574273858921162, "grad_norm": 17.593067169189453, "learning_rate": 1.737228215767635e-05, "loss": 1.0911, "step": 7922 }, { "epoch": 6.575103734439834, "grad_norm": 20.346155166625977, "learning_rate": 1.737195020746888e-05, "loss": 1.216, "step": 7923 }, { "epoch": 6.575933609958506, "grad_norm": 20.01076889038086, "learning_rate": 1.737161825726141e-05, "loss": 1.5089, "step": 7924 }, { "epoch": 6.576763485477178, "grad_norm": 15.037456512451172, "learning_rate": 1.7371286307053942e-05, "loss": 0.5873, "step": 7925 }, { "epoch": 6.57759336099585, "grad_norm": 32.95010757446289, "learning_rate": 1.7370954356846474e-05, "loss": 0.9248, "step": 7926 }, { "epoch": 6.5784232365145225, "grad_norm": 25.09032440185547, "learning_rate": 1.7370622406639006e-05, "loss": 1.0976, "step": 7927 }, { "epoch": 6.579253112033195, "grad_norm": 18.561838150024414, "learning_rate": 1.737029045643154e-05, "loss": 0.9612, "step": 7928 }, { "epoch": 6.580082987551867, "grad_norm": 21.34018898010254, "learning_rate": 1.7369958506224067e-05, "loss": 1.2635, "step": 7929 }, { "epoch": 6.580912863070539, "grad_norm": 16.203371047973633, "learning_rate": 1.73696265560166e-05, "loss": 1.6261, "step": 7930 }, { "epoch": 6.581742738589211, "grad_norm": 21.893264770507812, "learning_rate": 1.736929460580913e-05, "loss": 0.9374, "step": 7931 }, { "epoch": 6.5825726141078835, "grad_norm": 21.328699111938477, "learning_rate": 1.7368962655601663e-05, "loss": 1.571, "step": 7932 }, { "epoch": 6.583402489626556, "grad_norm": 16.4656925201416, "learning_rate": 1.7368630705394192e-05, "loss": 0.7243, "step": 7933 }, { "epoch": 6.584232365145228, "grad_norm": 24.626174926757812, "learning_rate": 1.7368298755186724e-05, "loss": 1.3995, "step": 7934 }, { "epoch": 6.5850622406639, "grad_norm": 16.865009307861328, "learning_rate": 1.7367966804979256e-05, "loss": 1.1869, "step": 7935 }, { "epoch": 6.585892116182572, "grad_norm": 13.222450256347656, "learning_rate": 1.7367634854771785e-05, "loss": 0.769, "step": 7936 }, { "epoch": 6.5867219917012445, "grad_norm": 17.880508422851562, "learning_rate": 1.7367302904564317e-05, "loss": 1.6099, "step": 7937 }, { "epoch": 6.587551867219917, "grad_norm": 12.406417846679688, "learning_rate": 1.736697095435685e-05, "loss": 1.2991, "step": 7938 }, { "epoch": 6.588381742738589, "grad_norm": 32.69926834106445, "learning_rate": 1.7366639004149378e-05, "loss": 0.8827, "step": 7939 }, { "epoch": 6.589211618257261, "grad_norm": 20.301563262939453, "learning_rate": 1.736630705394191e-05, "loss": 1.391, "step": 7940 }, { "epoch": 6.590041493775933, "grad_norm": 13.077038764953613, "learning_rate": 1.736597510373444e-05, "loss": 0.8689, "step": 7941 }, { "epoch": 6.5908713692946055, "grad_norm": 14.908224105834961, "learning_rate": 1.736564315352697e-05, "loss": 1.0506, "step": 7942 }, { "epoch": 6.591701244813278, "grad_norm": 15.863921165466309, "learning_rate": 1.7365311203319503e-05, "loss": 1.3693, "step": 7943 }, { "epoch": 6.59253112033195, "grad_norm": 13.562248229980469, "learning_rate": 1.7364979253112035e-05, "loss": 0.8453, "step": 7944 }, { "epoch": 6.593360995850622, "grad_norm": 19.44388198852539, "learning_rate": 1.7364647302904564e-05, "loss": 1.1507, "step": 7945 }, { "epoch": 6.594190871369294, "grad_norm": 14.936483383178711, "learning_rate": 1.7364315352697096e-05, "loss": 0.9488, "step": 7946 }, { "epoch": 6.5950207468879665, "grad_norm": 15.491985321044922, "learning_rate": 1.7363983402489628e-05, "loss": 1.1322, "step": 7947 }, { "epoch": 6.595850622406639, "grad_norm": 17.368009567260742, "learning_rate": 1.736365145228216e-05, "loss": 1.0618, "step": 7948 }, { "epoch": 6.596680497925311, "grad_norm": 26.365840911865234, "learning_rate": 1.736331950207469e-05, "loss": 0.936, "step": 7949 }, { "epoch": 6.597510373443983, "grad_norm": 30.593244552612305, "learning_rate": 1.736298755186722e-05, "loss": 1.3094, "step": 7950 }, { "epoch": 6.598340248962655, "grad_norm": 16.063831329345703, "learning_rate": 1.7362655601659753e-05, "loss": 1.1199, "step": 7951 }, { "epoch": 6.5991701244813274, "grad_norm": 19.333005905151367, "learning_rate": 1.7362323651452285e-05, "loss": 1.3942, "step": 7952 }, { "epoch": 6.6, "grad_norm": 21.38053321838379, "learning_rate": 1.7361991701244814e-05, "loss": 2.2921, "step": 7953 }, { "epoch": 6.600829875518672, "grad_norm": 30.983501434326172, "learning_rate": 1.7361659751037346e-05, "loss": 1.4377, "step": 7954 }, { "epoch": 6.601659751037344, "grad_norm": 18.305524826049805, "learning_rate": 1.7361327800829878e-05, "loss": 1.391, "step": 7955 }, { "epoch": 6.602489626556016, "grad_norm": 19.30066680908203, "learning_rate": 1.7360995850622407e-05, "loss": 1.1782, "step": 7956 }, { "epoch": 6.603319502074688, "grad_norm": 15.722156524658203, "learning_rate": 1.736066390041494e-05, "loss": 1.4645, "step": 7957 }, { "epoch": 6.604149377593361, "grad_norm": 25.131610870361328, "learning_rate": 1.736033195020747e-05, "loss": 1.6047, "step": 7958 }, { "epoch": 6.604979253112033, "grad_norm": 19.520200729370117, "learning_rate": 1.736e-05, "loss": 1.2552, "step": 7959 }, { "epoch": 6.605809128630705, "grad_norm": 38.100948333740234, "learning_rate": 1.7359668049792532e-05, "loss": 1.4187, "step": 7960 }, { "epoch": 6.606639004149377, "grad_norm": 20.081499099731445, "learning_rate": 1.7359336099585064e-05, "loss": 1.1717, "step": 7961 }, { "epoch": 6.607468879668049, "grad_norm": 19.28070640563965, "learning_rate": 1.7359004149377593e-05, "loss": 1.2666, "step": 7962 }, { "epoch": 6.608298755186722, "grad_norm": 13.387832641601562, "learning_rate": 1.7358672199170125e-05, "loss": 0.8864, "step": 7963 }, { "epoch": 6.609128630705394, "grad_norm": 33.4726676940918, "learning_rate": 1.7358340248962657e-05, "loss": 1.7657, "step": 7964 }, { "epoch": 6.609958506224066, "grad_norm": 19.04335594177246, "learning_rate": 1.735800829875519e-05, "loss": 1.0, "step": 7965 }, { "epoch": 6.610788381742738, "grad_norm": 17.871654510498047, "learning_rate": 1.7357676348547718e-05, "loss": 1.331, "step": 7966 }, { "epoch": 6.61161825726141, "grad_norm": 17.246444702148438, "learning_rate": 1.735734439834025e-05, "loss": 1.1584, "step": 7967 }, { "epoch": 6.612448132780083, "grad_norm": 14.599173545837402, "learning_rate": 1.7357012448132782e-05, "loss": 0.7371, "step": 7968 }, { "epoch": 6.613278008298755, "grad_norm": 16.773408889770508, "learning_rate": 1.7356680497925314e-05, "loss": 1.8977, "step": 7969 }, { "epoch": 6.614107883817427, "grad_norm": 12.974517822265625, "learning_rate": 1.7356348547717843e-05, "loss": 0.4708, "step": 7970 }, { "epoch": 6.614937759336099, "grad_norm": 29.545982360839844, "learning_rate": 1.7356016597510375e-05, "loss": 1.5795, "step": 7971 }, { "epoch": 6.615767634854771, "grad_norm": 14.216233253479004, "learning_rate": 1.7355684647302907e-05, "loss": 0.7287, "step": 7972 }, { "epoch": 6.616597510373444, "grad_norm": 16.5625057220459, "learning_rate": 1.735535269709544e-05, "loss": 1.0115, "step": 7973 }, { "epoch": 6.617427385892116, "grad_norm": 16.260133743286133, "learning_rate": 1.7355020746887968e-05, "loss": 0.643, "step": 7974 }, { "epoch": 6.618257261410788, "grad_norm": 16.929058074951172, "learning_rate": 1.73546887966805e-05, "loss": 1.1176, "step": 7975 }, { "epoch": 6.61908713692946, "grad_norm": 29.133392333984375, "learning_rate": 1.7354356846473032e-05, "loss": 1.105, "step": 7976 }, { "epoch": 6.619917012448132, "grad_norm": 17.8804931640625, "learning_rate": 1.735402489626556e-05, "loss": 0.922, "step": 7977 }, { "epoch": 6.6207468879668046, "grad_norm": 11.309954643249512, "learning_rate": 1.7353692946058093e-05, "loss": 0.5705, "step": 7978 }, { "epoch": 6.621576763485477, "grad_norm": 19.112520217895508, "learning_rate": 1.735336099585062e-05, "loss": 0.9114, "step": 7979 }, { "epoch": 6.622406639004149, "grad_norm": 19.127065658569336, "learning_rate": 1.7353029045643154e-05, "loss": 1.6469, "step": 7980 }, { "epoch": 6.623236514522821, "grad_norm": 14.866788864135742, "learning_rate": 1.7352697095435686e-05, "loss": 1.0433, "step": 7981 }, { "epoch": 6.624066390041493, "grad_norm": 17.301605224609375, "learning_rate": 1.7352365145228218e-05, "loss": 1.1884, "step": 7982 }, { "epoch": 6.6248962655601655, "grad_norm": 18.107240676879883, "learning_rate": 1.7352033195020747e-05, "loss": 1.3484, "step": 7983 }, { "epoch": 6.625726141078838, "grad_norm": 24.47007179260254, "learning_rate": 1.735170124481328e-05, "loss": 1.9105, "step": 7984 }, { "epoch": 6.62655601659751, "grad_norm": 29.291135787963867, "learning_rate": 1.735136929460581e-05, "loss": 1.8839, "step": 7985 }, { "epoch": 6.627385892116182, "grad_norm": 16.673437118530273, "learning_rate": 1.7351037344398343e-05, "loss": 1.3533, "step": 7986 }, { "epoch": 6.628215767634854, "grad_norm": 21.799938201904297, "learning_rate": 1.7350705394190872e-05, "loss": 0.8874, "step": 7987 }, { "epoch": 6.6290456431535265, "grad_norm": 19.523303985595703, "learning_rate": 1.7350373443983404e-05, "loss": 1.0146, "step": 7988 }, { "epoch": 6.629875518672199, "grad_norm": 24.510866165161133, "learning_rate": 1.7350041493775936e-05, "loss": 2.0179, "step": 7989 }, { "epoch": 6.630705394190871, "grad_norm": 26.189327239990234, "learning_rate": 1.7349709543568468e-05, "loss": 1.4761, "step": 7990 }, { "epoch": 6.631535269709543, "grad_norm": 16.902324676513672, "learning_rate": 1.7349377593360997e-05, "loss": 1.2651, "step": 7991 }, { "epoch": 6.632365145228215, "grad_norm": 26.36150550842285, "learning_rate": 1.734904564315353e-05, "loss": 1.7632, "step": 7992 }, { "epoch": 6.6331950207468875, "grad_norm": 17.040386199951172, "learning_rate": 1.734871369294606e-05, "loss": 1.5381, "step": 7993 }, { "epoch": 6.63402489626556, "grad_norm": 20.894269943237305, "learning_rate": 1.7348381742738593e-05, "loss": 1.353, "step": 7994 }, { "epoch": 6.634854771784232, "grad_norm": 25.662029266357422, "learning_rate": 1.7348049792531122e-05, "loss": 1.769, "step": 7995 }, { "epoch": 6.635684647302904, "grad_norm": 13.696526527404785, "learning_rate": 1.7347717842323654e-05, "loss": 1.0518, "step": 7996 }, { "epoch": 6.636514522821576, "grad_norm": 16.728477478027344, "learning_rate": 1.7347385892116183e-05, "loss": 1.2685, "step": 7997 }, { "epoch": 6.6373443983402485, "grad_norm": 26.743520736694336, "learning_rate": 1.7347053941908715e-05, "loss": 1.4868, "step": 7998 }, { "epoch": 6.638174273858921, "grad_norm": 20.816844940185547, "learning_rate": 1.7346721991701247e-05, "loss": 1.2226, "step": 7999 }, { "epoch": 6.639004149377593, "grad_norm": 18.130643844604492, "learning_rate": 1.7346390041493776e-05, "loss": 1.0727, "step": 8000 }, { "epoch": 6.639834024896265, "grad_norm": 21.81082534790039, "learning_rate": 1.7346058091286308e-05, "loss": 0.9204, "step": 8001 }, { "epoch": 6.640663900414938, "grad_norm": 30.15707015991211, "learning_rate": 1.734572614107884e-05, "loss": 1.8346, "step": 8002 }, { "epoch": 6.64149377593361, "grad_norm": 14.243931770324707, "learning_rate": 1.734539419087137e-05, "loss": 0.9364, "step": 8003 }, { "epoch": 6.6423236514522825, "grad_norm": 13.178717613220215, "learning_rate": 1.73450622406639e-05, "loss": 1.0533, "step": 8004 }, { "epoch": 6.643153526970955, "grad_norm": 14.851512908935547, "learning_rate": 1.7344730290456433e-05, "loss": 1.0162, "step": 8005 }, { "epoch": 6.643983402489627, "grad_norm": 13.215270042419434, "learning_rate": 1.7344398340248965e-05, "loss": 1.2479, "step": 8006 }, { "epoch": 6.644813278008299, "grad_norm": 21.87887191772461, "learning_rate": 1.7344066390041497e-05, "loss": 1.368, "step": 8007 }, { "epoch": 6.645643153526971, "grad_norm": 17.90068817138672, "learning_rate": 1.7343734439834026e-05, "loss": 1.096, "step": 8008 }, { "epoch": 6.6464730290456435, "grad_norm": 14.501585960388184, "learning_rate": 1.7343402489626558e-05, "loss": 1.1205, "step": 8009 }, { "epoch": 6.647302904564316, "grad_norm": 20.229585647583008, "learning_rate": 1.734307053941909e-05, "loss": 1.5603, "step": 8010 }, { "epoch": 6.648132780082988, "grad_norm": 20.289445877075195, "learning_rate": 1.7342738589211622e-05, "loss": 0.9604, "step": 8011 }, { "epoch": 6.64896265560166, "grad_norm": 26.441686630249023, "learning_rate": 1.734240663900415e-05, "loss": 1.5282, "step": 8012 }, { "epoch": 6.649792531120332, "grad_norm": 24.384456634521484, "learning_rate": 1.7342074688796683e-05, "loss": 1.2707, "step": 8013 }, { "epoch": 6.6506224066390045, "grad_norm": 13.511503219604492, "learning_rate": 1.7341742738589215e-05, "loss": 0.6322, "step": 8014 }, { "epoch": 6.651452282157677, "grad_norm": 26.560546875, "learning_rate": 1.7341410788381744e-05, "loss": 1.6226, "step": 8015 }, { "epoch": 6.652282157676349, "grad_norm": 11.914207458496094, "learning_rate": 1.7341078838174276e-05, "loss": 0.5745, "step": 8016 }, { "epoch": 6.653112033195021, "grad_norm": 16.08330726623535, "learning_rate": 1.7340746887966808e-05, "loss": 1.2586, "step": 8017 }, { "epoch": 6.653941908713693, "grad_norm": 26.843568801879883, "learning_rate": 1.7340414937759337e-05, "loss": 1.414, "step": 8018 }, { "epoch": 6.6547717842323655, "grad_norm": 22.133243560791016, "learning_rate": 1.734008298755187e-05, "loss": 1.3356, "step": 8019 }, { "epoch": 6.655601659751038, "grad_norm": 29.713869094848633, "learning_rate": 1.7339751037344397e-05, "loss": 1.2786, "step": 8020 }, { "epoch": 6.65643153526971, "grad_norm": 18.83808135986328, "learning_rate": 1.733941908713693e-05, "loss": 1.4884, "step": 8021 }, { "epoch": 6.657261410788382, "grad_norm": 13.540387153625488, "learning_rate": 1.733908713692946e-05, "loss": 1.0459, "step": 8022 }, { "epoch": 6.658091286307054, "grad_norm": 23.28159523010254, "learning_rate": 1.7338755186721994e-05, "loss": 1.0282, "step": 8023 }, { "epoch": 6.6589211618257265, "grad_norm": 14.857200622558594, "learning_rate": 1.7338423236514522e-05, "loss": 1.0651, "step": 8024 }, { "epoch": 6.659751037344399, "grad_norm": 19.28218650817871, "learning_rate": 1.7338091286307055e-05, "loss": 0.9766, "step": 8025 }, { "epoch": 6.660580912863071, "grad_norm": 18.331195831298828, "learning_rate": 1.7337759336099587e-05, "loss": 1.1879, "step": 8026 }, { "epoch": 6.661410788381743, "grad_norm": 13.005749702453613, "learning_rate": 1.733742738589212e-05, "loss": 1.3584, "step": 8027 }, { "epoch": 6.662240663900415, "grad_norm": 17.93126678466797, "learning_rate": 1.7337095435684647e-05, "loss": 1.0041, "step": 8028 }, { "epoch": 6.6630705394190874, "grad_norm": 21.574478149414062, "learning_rate": 1.733676348547718e-05, "loss": 1.3373, "step": 8029 }, { "epoch": 6.66390041493776, "grad_norm": 17.081708908081055, "learning_rate": 1.733643153526971e-05, "loss": 1.265, "step": 8030 }, { "epoch": 6.664730290456432, "grad_norm": 17.44403648376465, "learning_rate": 1.7336099585062244e-05, "loss": 1.4726, "step": 8031 }, { "epoch": 6.665560165975104, "grad_norm": 12.677777290344238, "learning_rate": 1.7335767634854773e-05, "loss": 1.3939, "step": 8032 }, { "epoch": 6.666390041493776, "grad_norm": 21.114402770996094, "learning_rate": 1.7335435684647305e-05, "loss": 1.5504, "step": 8033 }, { "epoch": 6.667219917012448, "grad_norm": 25.14763069152832, "learning_rate": 1.7335103734439837e-05, "loss": 0.8501, "step": 8034 }, { "epoch": 6.668049792531121, "grad_norm": 13.906087875366211, "learning_rate": 1.7334771784232365e-05, "loss": 1.0565, "step": 8035 }, { "epoch": 6.668879668049793, "grad_norm": 31.59501075744629, "learning_rate": 1.7334439834024898e-05, "loss": 1.9983, "step": 8036 }, { "epoch": 6.669709543568465, "grad_norm": 15.777353286743164, "learning_rate": 1.733410788381743e-05, "loss": 1.2022, "step": 8037 }, { "epoch": 6.670539419087137, "grad_norm": 19.456117630004883, "learning_rate": 1.733377593360996e-05, "loss": 0.8876, "step": 8038 }, { "epoch": 6.671369294605809, "grad_norm": 18.901426315307617, "learning_rate": 1.733344398340249e-05, "loss": 1.7181, "step": 8039 }, { "epoch": 6.672199170124482, "grad_norm": 24.601770401000977, "learning_rate": 1.7333112033195023e-05, "loss": 1.4301, "step": 8040 }, { "epoch": 6.673029045643154, "grad_norm": 20.00929069519043, "learning_rate": 1.733278008298755e-05, "loss": 1.0884, "step": 8041 }, { "epoch": 6.673858921161826, "grad_norm": 16.25446319580078, "learning_rate": 1.7332448132780083e-05, "loss": 0.9157, "step": 8042 }, { "epoch": 6.674688796680498, "grad_norm": 14.490255355834961, "learning_rate": 1.7332116182572616e-05, "loss": 1.0171, "step": 8043 }, { "epoch": 6.67551867219917, "grad_norm": 10.992144584655762, "learning_rate": 1.7331784232365148e-05, "loss": 0.6457, "step": 8044 }, { "epoch": 6.676348547717843, "grad_norm": 19.022048950195312, "learning_rate": 1.7331452282157676e-05, "loss": 1.2, "step": 8045 }, { "epoch": 6.677178423236515, "grad_norm": 15.365756034851074, "learning_rate": 1.733112033195021e-05, "loss": 1.0977, "step": 8046 }, { "epoch": 6.678008298755187, "grad_norm": 15.445202827453613, "learning_rate": 1.733078838174274e-05, "loss": 0.7156, "step": 8047 }, { "epoch": 6.678838174273859, "grad_norm": 32.144981384277344, "learning_rate": 1.7330456431535273e-05, "loss": 2.1977, "step": 8048 }, { "epoch": 6.679668049792531, "grad_norm": 29.64291763305664, "learning_rate": 1.73301244813278e-05, "loss": 2.1179, "step": 8049 }, { "epoch": 6.680497925311204, "grad_norm": 21.974214553833008, "learning_rate": 1.7329792531120334e-05, "loss": 1.5092, "step": 8050 }, { "epoch": 6.681327800829876, "grad_norm": 19.514347076416016, "learning_rate": 1.7329460580912866e-05, "loss": 1.1723, "step": 8051 }, { "epoch": 6.682157676348548, "grad_norm": 21.822620391845703, "learning_rate": 1.7329128630705398e-05, "loss": 0.8651, "step": 8052 }, { "epoch": 6.68298755186722, "grad_norm": 17.571575164794922, "learning_rate": 1.7328796680497926e-05, "loss": 1.199, "step": 8053 }, { "epoch": 6.683817427385892, "grad_norm": 26.831493377685547, "learning_rate": 1.732846473029046e-05, "loss": 1.0725, "step": 8054 }, { "epoch": 6.6846473029045645, "grad_norm": 27.56987190246582, "learning_rate": 1.732813278008299e-05, "loss": 1.0911, "step": 8055 }, { "epoch": 6.685477178423237, "grad_norm": 15.559597969055176, "learning_rate": 1.732780082987552e-05, "loss": 1.1805, "step": 8056 }, { "epoch": 6.686307053941909, "grad_norm": 12.46431827545166, "learning_rate": 1.732746887966805e-05, "loss": 0.9692, "step": 8057 }, { "epoch": 6.687136929460581, "grad_norm": 24.431976318359375, "learning_rate": 1.732713692946058e-05, "loss": 1.0098, "step": 8058 }, { "epoch": 6.687966804979253, "grad_norm": 20.16048812866211, "learning_rate": 1.7326804979253112e-05, "loss": 1.1467, "step": 8059 }, { "epoch": 6.6887966804979255, "grad_norm": 15.157846450805664, "learning_rate": 1.7326473029045644e-05, "loss": 0.5557, "step": 8060 }, { "epoch": 6.689626556016598, "grad_norm": 17.64056968688965, "learning_rate": 1.7326141078838177e-05, "loss": 1.3045, "step": 8061 }, { "epoch": 6.69045643153527, "grad_norm": 15.987342834472656, "learning_rate": 1.7325809128630705e-05, "loss": 1.0612, "step": 8062 }, { "epoch": 6.691286307053942, "grad_norm": 19.954206466674805, "learning_rate": 1.7325477178423237e-05, "loss": 1.0934, "step": 8063 }, { "epoch": 6.692116182572614, "grad_norm": 32.9012565612793, "learning_rate": 1.732514522821577e-05, "loss": 0.6907, "step": 8064 }, { "epoch": 6.6929460580912865, "grad_norm": 16.792129516601562, "learning_rate": 1.73248132780083e-05, "loss": 0.9613, "step": 8065 }, { "epoch": 6.693775933609959, "grad_norm": 13.276141166687012, "learning_rate": 1.732448132780083e-05, "loss": 0.8715, "step": 8066 }, { "epoch": 6.694605809128631, "grad_norm": 26.231897354125977, "learning_rate": 1.7324149377593362e-05, "loss": 1.7173, "step": 8067 }, { "epoch": 6.695435684647303, "grad_norm": 19.727237701416016, "learning_rate": 1.7323817427385895e-05, "loss": 1.1826, "step": 8068 }, { "epoch": 6.696265560165975, "grad_norm": 18.927080154418945, "learning_rate": 1.7323485477178427e-05, "loss": 1.5793, "step": 8069 }, { "epoch": 6.6970954356846475, "grad_norm": 12.960256576538086, "learning_rate": 1.7323153526970955e-05, "loss": 0.9398, "step": 8070 }, { "epoch": 6.69792531120332, "grad_norm": 23.304550170898438, "learning_rate": 1.7322821576763487e-05, "loss": 1.3364, "step": 8071 }, { "epoch": 6.698755186721992, "grad_norm": 23.682825088500977, "learning_rate": 1.732248962655602e-05, "loss": 2.0675, "step": 8072 }, { "epoch": 6.699585062240664, "grad_norm": 17.76519203186035, "learning_rate": 1.7322157676348548e-05, "loss": 1.3133, "step": 8073 }, { "epoch": 6.700414937759336, "grad_norm": 24.448978424072266, "learning_rate": 1.732182572614108e-05, "loss": 1.2619, "step": 8074 }, { "epoch": 6.7012448132780085, "grad_norm": 18.898921966552734, "learning_rate": 1.7321493775933612e-05, "loss": 0.9533, "step": 8075 }, { "epoch": 6.702074688796681, "grad_norm": 35.52007293701172, "learning_rate": 1.732116182572614e-05, "loss": 1.8595, "step": 8076 }, { "epoch": 6.702904564315353, "grad_norm": 31.763378143310547, "learning_rate": 1.7320829875518673e-05, "loss": 2.1616, "step": 8077 }, { "epoch": 6.703734439834025, "grad_norm": 25.456008911132812, "learning_rate": 1.7320497925311205e-05, "loss": 1.6307, "step": 8078 }, { "epoch": 6.704564315352697, "grad_norm": 17.967897415161133, "learning_rate": 1.7320165975103734e-05, "loss": 0.708, "step": 8079 }, { "epoch": 6.7053941908713695, "grad_norm": 18.807575225830078, "learning_rate": 1.7319834024896266e-05, "loss": 1.5155, "step": 8080 }, { "epoch": 6.706224066390042, "grad_norm": 18.24726104736328, "learning_rate": 1.73195020746888e-05, "loss": 1.0761, "step": 8081 }, { "epoch": 6.707053941908714, "grad_norm": 22.286705017089844, "learning_rate": 1.7319170124481327e-05, "loss": 0.8625, "step": 8082 }, { "epoch": 6.707883817427386, "grad_norm": 16.952878952026367, "learning_rate": 1.731883817427386e-05, "loss": 1.181, "step": 8083 }, { "epoch": 6.708713692946058, "grad_norm": 16.766462326049805, "learning_rate": 1.731850622406639e-05, "loss": 0.8037, "step": 8084 }, { "epoch": 6.70954356846473, "grad_norm": 15.54552936553955, "learning_rate": 1.7318174273858923e-05, "loss": 0.63, "step": 8085 }, { "epoch": 6.710373443983403, "grad_norm": 22.259302139282227, "learning_rate": 1.7317842323651456e-05, "loss": 1.9841, "step": 8086 }, { "epoch": 6.711203319502075, "grad_norm": 28.11739158630371, "learning_rate": 1.7317510373443984e-05, "loss": 1.4208, "step": 8087 }, { "epoch": 6.712033195020747, "grad_norm": 23.440319061279297, "learning_rate": 1.7317178423236516e-05, "loss": 1.5534, "step": 8088 }, { "epoch": 6.712863070539419, "grad_norm": 28.062301635742188, "learning_rate": 1.731684647302905e-05, "loss": 1.7073, "step": 8089 }, { "epoch": 6.713692946058091, "grad_norm": 22.492116928100586, "learning_rate": 1.731651452282158e-05, "loss": 0.8202, "step": 8090 }, { "epoch": 6.714522821576764, "grad_norm": 20.375164031982422, "learning_rate": 1.731618257261411e-05, "loss": 0.9968, "step": 8091 }, { "epoch": 6.715352697095436, "grad_norm": 29.704328536987305, "learning_rate": 1.731585062240664e-05, "loss": 0.9659, "step": 8092 }, { "epoch": 6.716182572614108, "grad_norm": 15.832376480102539, "learning_rate": 1.7315518672199173e-05, "loss": 1.3678, "step": 8093 }, { "epoch": 6.71701244813278, "grad_norm": 32.56948471069336, "learning_rate": 1.7315186721991702e-05, "loss": 1.6066, "step": 8094 }, { "epoch": 6.717842323651452, "grad_norm": 20.89621353149414, "learning_rate": 1.7314854771784234e-05, "loss": 1.7874, "step": 8095 }, { "epoch": 6.718672199170125, "grad_norm": 17.91637420654297, "learning_rate": 1.7314522821576763e-05, "loss": 1.7409, "step": 8096 }, { "epoch": 6.719502074688797, "grad_norm": 28.5238037109375, "learning_rate": 1.7314190871369295e-05, "loss": 1.5521, "step": 8097 }, { "epoch": 6.720331950207469, "grad_norm": 12.525930404663086, "learning_rate": 1.7313858921161827e-05, "loss": 1.2068, "step": 8098 }, { "epoch": 6.721161825726141, "grad_norm": 15.02763557434082, "learning_rate": 1.7313526970954356e-05, "loss": 1.2816, "step": 8099 }, { "epoch": 6.721991701244813, "grad_norm": 14.148111343383789, "learning_rate": 1.7313195020746888e-05, "loss": 0.9122, "step": 8100 }, { "epoch": 6.722821576763486, "grad_norm": 28.129405975341797, "learning_rate": 1.731286307053942e-05, "loss": 1.5948, "step": 8101 }, { "epoch": 6.723651452282158, "grad_norm": 16.659940719604492, "learning_rate": 1.7312531120331952e-05, "loss": 1.2388, "step": 8102 }, { "epoch": 6.72448132780083, "grad_norm": 16.527477264404297, "learning_rate": 1.731219917012448e-05, "loss": 0.9855, "step": 8103 }, { "epoch": 6.725311203319502, "grad_norm": 20.779296875, "learning_rate": 1.7311867219917013e-05, "loss": 1.8284, "step": 8104 }, { "epoch": 6.726141078838174, "grad_norm": 16.831409454345703, "learning_rate": 1.7311535269709545e-05, "loss": 1.4095, "step": 8105 }, { "epoch": 6.7269709543568466, "grad_norm": 17.39590072631836, "learning_rate": 1.7311203319502077e-05, "loss": 1.7763, "step": 8106 }, { "epoch": 6.727800829875519, "grad_norm": 12.727730751037598, "learning_rate": 1.7310871369294606e-05, "loss": 0.6199, "step": 8107 }, { "epoch": 6.728630705394191, "grad_norm": 24.36198616027832, "learning_rate": 1.7310539419087138e-05, "loss": 2.6622, "step": 8108 }, { "epoch": 6.729460580912863, "grad_norm": 15.010420799255371, "learning_rate": 1.731020746887967e-05, "loss": 1.346, "step": 8109 }, { "epoch": 6.730290456431535, "grad_norm": 20.271448135375977, "learning_rate": 1.7309875518672202e-05, "loss": 1.1824, "step": 8110 }, { "epoch": 6.7311203319502075, "grad_norm": 13.507591247558594, "learning_rate": 1.730954356846473e-05, "loss": 0.9461, "step": 8111 }, { "epoch": 6.73195020746888, "grad_norm": 12.229098320007324, "learning_rate": 1.7309211618257263e-05, "loss": 1.2365, "step": 8112 }, { "epoch": 6.732780082987552, "grad_norm": 12.638235092163086, "learning_rate": 1.7308879668049795e-05, "loss": 0.763, "step": 8113 }, { "epoch": 6.733609958506224, "grad_norm": 23.7956485748291, "learning_rate": 1.7308547717842324e-05, "loss": 1.6229, "step": 8114 }, { "epoch": 6.734439834024896, "grad_norm": 10.830265998840332, "learning_rate": 1.7308215767634856e-05, "loss": 1.2287, "step": 8115 }, { "epoch": 6.7352697095435685, "grad_norm": 16.480579376220703, "learning_rate": 1.7307883817427388e-05, "loss": 1.271, "step": 8116 }, { "epoch": 6.736099585062241, "grad_norm": 17.819543838500977, "learning_rate": 1.7307551867219917e-05, "loss": 1.1331, "step": 8117 }, { "epoch": 6.736929460580913, "grad_norm": 18.482908248901367, "learning_rate": 1.730721991701245e-05, "loss": 0.7966, "step": 8118 }, { "epoch": 6.737759336099585, "grad_norm": 11.369229316711426, "learning_rate": 1.730688796680498e-05, "loss": 0.7394, "step": 8119 }, { "epoch": 6.738589211618257, "grad_norm": 15.368234634399414, "learning_rate": 1.730655601659751e-05, "loss": 0.9832, "step": 8120 }, { "epoch": 6.7394190871369295, "grad_norm": 17.522968292236328, "learning_rate": 1.7306224066390042e-05, "loss": 1.5306, "step": 8121 }, { "epoch": 6.740248962655602, "grad_norm": 18.42627716064453, "learning_rate": 1.7305892116182574e-05, "loss": 1.7352, "step": 8122 }, { "epoch": 6.741078838174274, "grad_norm": 21.09075927734375, "learning_rate": 1.7305560165975106e-05, "loss": 1.0056, "step": 8123 }, { "epoch": 6.741908713692946, "grad_norm": 13.495163917541504, "learning_rate": 1.7305228215767635e-05, "loss": 1.1085, "step": 8124 }, { "epoch": 6.742738589211618, "grad_norm": 20.710983276367188, "learning_rate": 1.7304896265560167e-05, "loss": 1.379, "step": 8125 }, { "epoch": 6.7435684647302905, "grad_norm": 21.223119735717773, "learning_rate": 1.73045643153527e-05, "loss": 1.3002, "step": 8126 }, { "epoch": 6.744398340248963, "grad_norm": 20.200529098510742, "learning_rate": 1.730423236514523e-05, "loss": 1.3957, "step": 8127 }, { "epoch": 6.745228215767635, "grad_norm": 17.918888092041016, "learning_rate": 1.730390041493776e-05, "loss": 0.8403, "step": 8128 }, { "epoch": 6.746058091286307, "grad_norm": 12.976486206054688, "learning_rate": 1.7303568464730292e-05, "loss": 0.7867, "step": 8129 }, { "epoch": 6.746887966804979, "grad_norm": 28.79758644104004, "learning_rate": 1.7303236514522824e-05, "loss": 1.0034, "step": 8130 }, { "epoch": 6.7477178423236515, "grad_norm": 20.994131088256836, "learning_rate": 1.7302904564315356e-05, "loss": 1.4416, "step": 8131 }, { "epoch": 6.748547717842324, "grad_norm": 16.17976188659668, "learning_rate": 1.7302572614107885e-05, "loss": 1.0909, "step": 8132 }, { "epoch": 6.749377593360996, "grad_norm": 15.001608848571777, "learning_rate": 1.7302240663900417e-05, "loss": 1.0185, "step": 8133 }, { "epoch": 6.750207468879668, "grad_norm": 12.545531272888184, "learning_rate": 1.730190871369295e-05, "loss": 1.0878, "step": 8134 }, { "epoch": 6.75103734439834, "grad_norm": 18.587522506713867, "learning_rate": 1.7301576763485478e-05, "loss": 1.0415, "step": 8135 }, { "epoch": 6.751867219917012, "grad_norm": 15.311257362365723, "learning_rate": 1.730124481327801e-05, "loss": 1.1943, "step": 8136 }, { "epoch": 6.752697095435685, "grad_norm": 19.730792999267578, "learning_rate": 1.730091286307054e-05, "loss": 1.2716, "step": 8137 }, { "epoch": 6.753526970954357, "grad_norm": 11.095152854919434, "learning_rate": 1.730058091286307e-05, "loss": 0.5929, "step": 8138 }, { "epoch": 6.754356846473029, "grad_norm": 17.725934982299805, "learning_rate": 1.7300248962655603e-05, "loss": 0.8922, "step": 8139 }, { "epoch": 6.755186721991701, "grad_norm": 15.263193130493164, "learning_rate": 1.7299917012448135e-05, "loss": 0.7686, "step": 8140 }, { "epoch": 6.756016597510373, "grad_norm": 14.866007804870605, "learning_rate": 1.7299585062240664e-05, "loss": 0.8022, "step": 8141 }, { "epoch": 6.756846473029046, "grad_norm": 29.130889892578125, "learning_rate": 1.7299253112033196e-05, "loss": 1.1422, "step": 8142 }, { "epoch": 6.757676348547718, "grad_norm": 21.353717803955078, "learning_rate": 1.7298921161825728e-05, "loss": 1.8993, "step": 8143 }, { "epoch": 6.75850622406639, "grad_norm": 17.428205490112305, "learning_rate": 1.729858921161826e-05, "loss": 1.2434, "step": 8144 }, { "epoch": 6.759336099585062, "grad_norm": 14.660382270812988, "learning_rate": 1.729825726141079e-05, "loss": 1.2132, "step": 8145 }, { "epoch": 6.760165975103734, "grad_norm": 17.52030372619629, "learning_rate": 1.729792531120332e-05, "loss": 0.9025, "step": 8146 }, { "epoch": 6.760995850622407, "grad_norm": 23.649446487426758, "learning_rate": 1.7297593360995853e-05, "loss": 1.7249, "step": 8147 }, { "epoch": 6.761825726141079, "grad_norm": 22.14398765563965, "learning_rate": 1.7297261410788385e-05, "loss": 1.5974, "step": 8148 }, { "epoch": 6.762655601659751, "grad_norm": 28.574304580688477, "learning_rate": 1.7296929460580914e-05, "loss": 1.4637, "step": 8149 }, { "epoch": 6.763485477178423, "grad_norm": 18.88970375061035, "learning_rate": 1.7296597510373446e-05, "loss": 1.1414, "step": 8150 }, { "epoch": 6.764315352697095, "grad_norm": 46.93373489379883, "learning_rate": 1.7296265560165978e-05, "loss": 1.7254, "step": 8151 }, { "epoch": 6.765145228215768, "grad_norm": 14.478079795837402, "learning_rate": 1.7295933609958507e-05, "loss": 1.2892, "step": 8152 }, { "epoch": 6.76597510373444, "grad_norm": 41.09243392944336, "learning_rate": 1.729560165975104e-05, "loss": 1.3955, "step": 8153 }, { "epoch": 6.766804979253112, "grad_norm": 13.63061809539795, "learning_rate": 1.729526970954357e-05, "loss": 0.8018, "step": 8154 }, { "epoch": 6.767634854771784, "grad_norm": 23.980730056762695, "learning_rate": 1.72949377593361e-05, "loss": 1.6835, "step": 8155 }, { "epoch": 6.768464730290456, "grad_norm": 20.38770866394043, "learning_rate": 1.7294605809128632e-05, "loss": 1.606, "step": 8156 }, { "epoch": 6.769294605809129, "grad_norm": 18.018522262573242, "learning_rate": 1.729427385892116e-05, "loss": 1.4339, "step": 8157 }, { "epoch": 6.770124481327801, "grad_norm": 16.17709732055664, "learning_rate": 1.7293941908713693e-05, "loss": 0.9406, "step": 8158 }, { "epoch": 6.770954356846473, "grad_norm": 18.831274032592773, "learning_rate": 1.7293609958506225e-05, "loss": 1.3156, "step": 8159 }, { "epoch": 6.771784232365145, "grad_norm": 21.79178237915039, "learning_rate": 1.7293278008298757e-05, "loss": 1.3191, "step": 8160 }, { "epoch": 6.772614107883817, "grad_norm": 12.778273582458496, "learning_rate": 1.7292946058091286e-05, "loss": 0.9082, "step": 8161 }, { "epoch": 6.7734439834024895, "grad_norm": 19.171451568603516, "learning_rate": 1.7292614107883818e-05, "loss": 1.3101, "step": 8162 }, { "epoch": 6.774273858921162, "grad_norm": 13.743814468383789, "learning_rate": 1.729228215767635e-05, "loss": 1.2931, "step": 8163 }, { "epoch": 6.775103734439834, "grad_norm": 24.70556640625, "learning_rate": 1.7291950207468882e-05, "loss": 1.1466, "step": 8164 }, { "epoch": 6.775933609958506, "grad_norm": 17.64003562927246, "learning_rate": 1.7291618257261414e-05, "loss": 1.8642, "step": 8165 }, { "epoch": 6.776763485477178, "grad_norm": 21.286428451538086, "learning_rate": 1.7291286307053943e-05, "loss": 0.8853, "step": 8166 }, { "epoch": 6.7775933609958505, "grad_norm": 21.762128829956055, "learning_rate": 1.7290954356846475e-05, "loss": 0.979, "step": 8167 }, { "epoch": 6.778423236514523, "grad_norm": 35.85139465332031, "learning_rate": 1.7290622406639007e-05, "loss": 1.3617, "step": 8168 }, { "epoch": 6.779253112033195, "grad_norm": 24.159637451171875, "learning_rate": 1.729029045643154e-05, "loss": 1.1887, "step": 8169 }, { "epoch": 6.780082987551867, "grad_norm": 20.914777755737305, "learning_rate": 1.7289958506224068e-05, "loss": 0.9117, "step": 8170 }, { "epoch": 6.780912863070539, "grad_norm": 14.383687973022461, "learning_rate": 1.72896265560166e-05, "loss": 0.7961, "step": 8171 }, { "epoch": 6.7817427385892115, "grad_norm": 14.399641036987305, "learning_rate": 1.7289294605809132e-05, "loss": 1.4836, "step": 8172 }, { "epoch": 6.782572614107884, "grad_norm": 18.844728469848633, "learning_rate": 1.728896265560166e-05, "loss": 1.0849, "step": 8173 }, { "epoch": 6.783402489626556, "grad_norm": 16.530067443847656, "learning_rate": 1.7288630705394193e-05, "loss": 1.255, "step": 8174 }, { "epoch": 6.784232365145228, "grad_norm": 16.612632751464844, "learning_rate": 1.728829875518672e-05, "loss": 0.9663, "step": 8175 }, { "epoch": 6.7850622406639, "grad_norm": 23.729158401489258, "learning_rate": 1.7287966804979254e-05, "loss": 2.0092, "step": 8176 }, { "epoch": 6.7858921161825725, "grad_norm": 12.419305801391602, "learning_rate": 1.7287634854771786e-05, "loss": 0.9239, "step": 8177 }, { "epoch": 6.786721991701245, "grad_norm": 24.17599105834961, "learning_rate": 1.7287302904564315e-05, "loss": 1.6305, "step": 8178 }, { "epoch": 6.787551867219917, "grad_norm": 16.62470245361328, "learning_rate": 1.7286970954356847e-05, "loss": 1.098, "step": 8179 }, { "epoch": 6.788381742738589, "grad_norm": 19.795820236206055, "learning_rate": 1.728663900414938e-05, "loss": 1.1092, "step": 8180 }, { "epoch": 6.789211618257261, "grad_norm": 16.107484817504883, "learning_rate": 1.728630705394191e-05, "loss": 0.9989, "step": 8181 }, { "epoch": 6.7900414937759335, "grad_norm": 12.057184219360352, "learning_rate": 1.728597510373444e-05, "loss": 0.8196, "step": 8182 }, { "epoch": 6.790871369294606, "grad_norm": 17.605554580688477, "learning_rate": 1.728564315352697e-05, "loss": 1.2213, "step": 8183 }, { "epoch": 6.791701244813278, "grad_norm": 15.53144645690918, "learning_rate": 1.7285311203319504e-05, "loss": 1.0481, "step": 8184 }, { "epoch": 6.79253112033195, "grad_norm": 29.234033584594727, "learning_rate": 1.7284979253112036e-05, "loss": 1.2244, "step": 8185 }, { "epoch": 6.793360995850622, "grad_norm": 17.207345962524414, "learning_rate": 1.7284647302904565e-05, "loss": 1.2299, "step": 8186 }, { "epoch": 6.7941908713692944, "grad_norm": 37.23347091674805, "learning_rate": 1.7284315352697097e-05, "loss": 1.5456, "step": 8187 }, { "epoch": 6.795020746887967, "grad_norm": 18.287139892578125, "learning_rate": 1.728398340248963e-05, "loss": 1.7161, "step": 8188 }, { "epoch": 6.795850622406639, "grad_norm": 22.477025985717773, "learning_rate": 1.728365145228216e-05, "loss": 1.2763, "step": 8189 }, { "epoch": 6.796680497925311, "grad_norm": 30.75411605834961, "learning_rate": 1.728331950207469e-05, "loss": 1.5606, "step": 8190 }, { "epoch": 6.797510373443983, "grad_norm": 14.89600658416748, "learning_rate": 1.7282987551867222e-05, "loss": 1.1577, "step": 8191 }, { "epoch": 6.798340248962655, "grad_norm": 33.193321228027344, "learning_rate": 1.7282655601659754e-05, "loss": 2.5086, "step": 8192 }, { "epoch": 6.799170124481328, "grad_norm": 19.81831932067871, "learning_rate": 1.7282323651452283e-05, "loss": 1.2714, "step": 8193 }, { "epoch": 6.8, "grad_norm": 38.94649887084961, "learning_rate": 1.7281991701244815e-05, "loss": 1.8257, "step": 8194 }, { "epoch": 6.800829875518672, "grad_norm": 29.97372055053711, "learning_rate": 1.7281659751037347e-05, "loss": 0.9371, "step": 8195 }, { "epoch": 6.801659751037344, "grad_norm": 16.43467903137207, "learning_rate": 1.7281327800829876e-05, "loss": 0.9324, "step": 8196 }, { "epoch": 6.802489626556016, "grad_norm": 22.87192153930664, "learning_rate": 1.7280995850622408e-05, "loss": 1.4413, "step": 8197 }, { "epoch": 6.803319502074689, "grad_norm": 18.97901153564453, "learning_rate": 1.728066390041494e-05, "loss": 1.6095, "step": 8198 }, { "epoch": 6.804149377593361, "grad_norm": 22.9075927734375, "learning_rate": 1.728033195020747e-05, "loss": 1.2019, "step": 8199 }, { "epoch": 6.804979253112033, "grad_norm": 23.943004608154297, "learning_rate": 1.728e-05, "loss": 1.4638, "step": 8200 }, { "epoch": 6.805809128630705, "grad_norm": 16.070512771606445, "learning_rate": 1.7279668049792533e-05, "loss": 0.9467, "step": 8201 }, { "epoch": 6.806639004149377, "grad_norm": 47.228023529052734, "learning_rate": 1.7279336099585065e-05, "loss": 1.5912, "step": 8202 }, { "epoch": 6.80746887966805, "grad_norm": 15.069543838500977, "learning_rate": 1.7279004149377594e-05, "loss": 0.9998, "step": 8203 }, { "epoch": 6.808298755186722, "grad_norm": 26.566883087158203, "learning_rate": 1.7278672199170126e-05, "loss": 1.5915, "step": 8204 }, { "epoch": 6.809128630705394, "grad_norm": 25.326448440551758, "learning_rate": 1.7278340248962658e-05, "loss": 0.8898, "step": 8205 }, { "epoch": 6.809958506224066, "grad_norm": 34.19203567504883, "learning_rate": 1.727800829875519e-05, "loss": 1.9487, "step": 8206 }, { "epoch": 6.810788381742738, "grad_norm": 30.462369918823242, "learning_rate": 1.727767634854772e-05, "loss": 2.3075, "step": 8207 }, { "epoch": 6.811618257261411, "grad_norm": 24.43897247314453, "learning_rate": 1.727734439834025e-05, "loss": 1.3682, "step": 8208 }, { "epoch": 6.812448132780083, "grad_norm": 25.30711555480957, "learning_rate": 1.7277012448132783e-05, "loss": 1.1519, "step": 8209 }, { "epoch": 6.813278008298755, "grad_norm": 17.92266082763672, "learning_rate": 1.7276680497925315e-05, "loss": 0.8298, "step": 8210 }, { "epoch": 6.814107883817427, "grad_norm": 18.92755126953125, "learning_rate": 1.7276348547717844e-05, "loss": 1.6983, "step": 8211 }, { "epoch": 6.814937759336099, "grad_norm": 39.29194641113281, "learning_rate": 1.7276016597510376e-05, "loss": 0.8967, "step": 8212 }, { "epoch": 6.8157676348547716, "grad_norm": 12.80569076538086, "learning_rate": 1.7275684647302904e-05, "loss": 0.925, "step": 8213 }, { "epoch": 6.816597510373444, "grad_norm": 17.868629455566406, "learning_rate": 1.7275352697095437e-05, "loss": 1.1817, "step": 8214 }, { "epoch": 6.817427385892116, "grad_norm": 19.910165786743164, "learning_rate": 1.727502074688797e-05, "loss": 1.4342, "step": 8215 }, { "epoch": 6.818257261410788, "grad_norm": 22.359201431274414, "learning_rate": 1.7274688796680497e-05, "loss": 1.6991, "step": 8216 }, { "epoch": 6.81908713692946, "grad_norm": 36.94413375854492, "learning_rate": 1.727435684647303e-05, "loss": 1.3048, "step": 8217 }, { "epoch": 6.8199170124481325, "grad_norm": 13.79996395111084, "learning_rate": 1.727402489626556e-05, "loss": 1.2078, "step": 8218 }, { "epoch": 6.820746887966805, "grad_norm": 19.54472541809082, "learning_rate": 1.7273692946058094e-05, "loss": 1.4601, "step": 8219 }, { "epoch": 6.821576763485477, "grad_norm": 14.664898872375488, "learning_rate": 1.7273360995850622e-05, "loss": 0.8244, "step": 8220 }, { "epoch": 6.822406639004149, "grad_norm": 15.70913314819336, "learning_rate": 1.7273029045643155e-05, "loss": 1.2728, "step": 8221 }, { "epoch": 6.823236514522821, "grad_norm": 21.58011817932129, "learning_rate": 1.7272697095435687e-05, "loss": 1.5544, "step": 8222 }, { "epoch": 6.8240663900414935, "grad_norm": 13.517739295959473, "learning_rate": 1.727236514522822e-05, "loss": 1.0048, "step": 8223 }, { "epoch": 6.824896265560166, "grad_norm": 16.512937545776367, "learning_rate": 1.7272033195020747e-05, "loss": 0.9527, "step": 8224 }, { "epoch": 6.825726141078838, "grad_norm": 18.61038589477539, "learning_rate": 1.727170124481328e-05, "loss": 1.4729, "step": 8225 }, { "epoch": 6.82655601659751, "grad_norm": 25.59625816345215, "learning_rate": 1.727136929460581e-05, "loss": 1.5346, "step": 8226 }, { "epoch": 6.827385892116182, "grad_norm": 11.689444541931152, "learning_rate": 1.7271037344398344e-05, "loss": 1.0021, "step": 8227 }, { "epoch": 6.8282157676348545, "grad_norm": 16.272319793701172, "learning_rate": 1.7270705394190872e-05, "loss": 0.9472, "step": 8228 }, { "epoch": 6.829045643153527, "grad_norm": 17.170604705810547, "learning_rate": 1.7270373443983405e-05, "loss": 1.0547, "step": 8229 }, { "epoch": 6.829875518672199, "grad_norm": 27.977479934692383, "learning_rate": 1.7270041493775937e-05, "loss": 2.0289, "step": 8230 }, { "epoch": 6.830705394190871, "grad_norm": 25.757848739624023, "learning_rate": 1.7269709543568465e-05, "loss": 0.9973, "step": 8231 }, { "epoch": 6.831535269709543, "grad_norm": 13.947436332702637, "learning_rate": 1.7269377593360998e-05, "loss": 1.1393, "step": 8232 }, { "epoch": 6.8323651452282155, "grad_norm": 21.173601150512695, "learning_rate": 1.726904564315353e-05, "loss": 1.6821, "step": 8233 }, { "epoch": 6.833195020746888, "grad_norm": 16.01763916015625, "learning_rate": 1.726871369294606e-05, "loss": 1.1993, "step": 8234 }, { "epoch": 6.83402489626556, "grad_norm": 21.461402893066406, "learning_rate": 1.726838174273859e-05, "loss": 1.2026, "step": 8235 }, { "epoch": 6.834854771784232, "grad_norm": 21.753664016723633, "learning_rate": 1.726804979253112e-05, "loss": 1.7021, "step": 8236 }, { "epoch": 6.835684647302904, "grad_norm": 11.030082702636719, "learning_rate": 1.726771784232365e-05, "loss": 0.4785, "step": 8237 }, { "epoch": 6.8365145228215765, "grad_norm": 17.110380172729492, "learning_rate": 1.7267385892116183e-05, "loss": 0.8816, "step": 8238 }, { "epoch": 6.837344398340249, "grad_norm": 18.68815803527832, "learning_rate": 1.7267053941908716e-05, "loss": 1.4987, "step": 8239 }, { "epoch": 6.838174273858921, "grad_norm": 20.168006896972656, "learning_rate": 1.7266721991701244e-05, "loss": 1.019, "step": 8240 }, { "epoch": 6.839004149377593, "grad_norm": 23.92285919189453, "learning_rate": 1.7266390041493776e-05, "loss": 1.0536, "step": 8241 }, { "epoch": 6.839834024896265, "grad_norm": 21.18442153930664, "learning_rate": 1.726605809128631e-05, "loss": 1.3861, "step": 8242 }, { "epoch": 6.840663900414937, "grad_norm": 19.0244140625, "learning_rate": 1.726572614107884e-05, "loss": 1.1307, "step": 8243 }, { "epoch": 6.84149377593361, "grad_norm": 16.197280883789062, "learning_rate": 1.7265394190871373e-05, "loss": 0.9607, "step": 8244 }, { "epoch": 6.842323651452282, "grad_norm": 15.381355285644531, "learning_rate": 1.72650622406639e-05, "loss": 1.2166, "step": 8245 }, { "epoch": 6.843153526970954, "grad_norm": 18.22138023376465, "learning_rate": 1.7264730290456433e-05, "loss": 1.0202, "step": 8246 }, { "epoch": 6.843983402489626, "grad_norm": 28.9202880859375, "learning_rate": 1.7264398340248966e-05, "loss": 1.3284, "step": 8247 }, { "epoch": 6.844813278008298, "grad_norm": 26.924551010131836, "learning_rate": 1.7264066390041498e-05, "loss": 2.297, "step": 8248 }, { "epoch": 6.845643153526971, "grad_norm": 19.363643646240234, "learning_rate": 1.7263734439834026e-05, "loss": 1.489, "step": 8249 }, { "epoch": 6.846473029045643, "grad_norm": 23.739055633544922, "learning_rate": 1.726340248962656e-05, "loss": 1.2936, "step": 8250 }, { "epoch": 6.847302904564315, "grad_norm": 16.571035385131836, "learning_rate": 1.726307053941909e-05, "loss": 0.7074, "step": 8251 }, { "epoch": 6.848132780082987, "grad_norm": 45.37409591674805, "learning_rate": 1.726273858921162e-05, "loss": 2.3601, "step": 8252 }, { "epoch": 6.848962655601659, "grad_norm": 20.4896297454834, "learning_rate": 1.726240663900415e-05, "loss": 0.9932, "step": 8253 }, { "epoch": 6.849792531120332, "grad_norm": 21.94618797302246, "learning_rate": 1.726207468879668e-05, "loss": 1.0145, "step": 8254 }, { "epoch": 6.850622406639004, "grad_norm": 18.233491897583008, "learning_rate": 1.7261742738589212e-05, "loss": 1.2711, "step": 8255 }, { "epoch": 6.851452282157676, "grad_norm": 13.524221420288086, "learning_rate": 1.7261410788381744e-05, "loss": 1.0107, "step": 8256 }, { "epoch": 6.852282157676348, "grad_norm": 13.858247756958008, "learning_rate": 1.7261078838174273e-05, "loss": 1.2486, "step": 8257 }, { "epoch": 6.85311203319502, "grad_norm": 22.150514602661133, "learning_rate": 1.7260746887966805e-05, "loss": 0.8963, "step": 8258 }, { "epoch": 6.853941908713693, "grad_norm": 13.49402141571045, "learning_rate": 1.7260414937759337e-05, "loss": 0.9325, "step": 8259 }, { "epoch": 6.854771784232365, "grad_norm": 18.126245498657227, "learning_rate": 1.726008298755187e-05, "loss": 1.3717, "step": 8260 }, { "epoch": 6.855601659751037, "grad_norm": 27.77437400817871, "learning_rate": 1.7259751037344398e-05, "loss": 1.3638, "step": 8261 }, { "epoch": 6.856431535269709, "grad_norm": 19.314144134521484, "learning_rate": 1.725941908713693e-05, "loss": 1.043, "step": 8262 }, { "epoch": 6.857261410788381, "grad_norm": 13.431089401245117, "learning_rate": 1.7259087136929462e-05, "loss": 1.0978, "step": 8263 }, { "epoch": 6.858091286307054, "grad_norm": 14.596447944641113, "learning_rate": 1.7258755186721994e-05, "loss": 1.1048, "step": 8264 }, { "epoch": 6.858921161825726, "grad_norm": 12.627789497375488, "learning_rate": 1.7258423236514523e-05, "loss": 1.0316, "step": 8265 }, { "epoch": 6.859751037344399, "grad_norm": 25.292823791503906, "learning_rate": 1.7258091286307055e-05, "loss": 1.3096, "step": 8266 }, { "epoch": 6.860580912863071, "grad_norm": 20.390338897705078, "learning_rate": 1.7257759336099587e-05, "loss": 1.3332, "step": 8267 }, { "epoch": 6.861410788381743, "grad_norm": 21.883268356323242, "learning_rate": 1.725742738589212e-05, "loss": 1.6952, "step": 8268 }, { "epoch": 6.862240663900415, "grad_norm": 18.43722915649414, "learning_rate": 1.7257095435684648e-05, "loss": 1.2586, "step": 8269 }, { "epoch": 6.863070539419088, "grad_norm": 11.074379920959473, "learning_rate": 1.725676348547718e-05, "loss": 0.8884, "step": 8270 }, { "epoch": 6.86390041493776, "grad_norm": 27.218950271606445, "learning_rate": 1.7256431535269712e-05, "loss": 1.1557, "step": 8271 }, { "epoch": 6.864730290456432, "grad_norm": 16.069398880004883, "learning_rate": 1.725609958506224e-05, "loss": 1.2611, "step": 8272 }, { "epoch": 6.865560165975104, "grad_norm": 36.44678497314453, "learning_rate": 1.7255767634854773e-05, "loss": 1.3063, "step": 8273 }, { "epoch": 6.866390041493776, "grad_norm": 17.926618576049805, "learning_rate": 1.7255435684647302e-05, "loss": 1.1315, "step": 8274 }, { "epoch": 6.867219917012449, "grad_norm": 23.04139518737793, "learning_rate": 1.7255103734439834e-05, "loss": 1.7644, "step": 8275 }, { "epoch": 6.868049792531121, "grad_norm": 19.948368072509766, "learning_rate": 1.7254771784232366e-05, "loss": 1.3484, "step": 8276 }, { "epoch": 6.868879668049793, "grad_norm": 16.74683952331543, "learning_rate": 1.72544398340249e-05, "loss": 1.2959, "step": 8277 }, { "epoch": 6.869709543568465, "grad_norm": 19.71881675720215, "learning_rate": 1.7254107883817427e-05, "loss": 0.8141, "step": 8278 }, { "epoch": 6.870539419087137, "grad_norm": 34.02971649169922, "learning_rate": 1.725377593360996e-05, "loss": 0.6951, "step": 8279 }, { "epoch": 6.87136929460581, "grad_norm": 13.404871940612793, "learning_rate": 1.725344398340249e-05, "loss": 0.7732, "step": 8280 }, { "epoch": 6.872199170124482, "grad_norm": 12.816085815429688, "learning_rate": 1.7253112033195023e-05, "loss": 0.7583, "step": 8281 }, { "epoch": 6.873029045643154, "grad_norm": 16.35128402709961, "learning_rate": 1.7252780082987552e-05, "loss": 1.3589, "step": 8282 }, { "epoch": 6.873858921161826, "grad_norm": 25.231609344482422, "learning_rate": 1.7252448132780084e-05, "loss": 1.1405, "step": 8283 }, { "epoch": 6.874688796680498, "grad_norm": 24.507976531982422, "learning_rate": 1.7252116182572616e-05, "loss": 1.4278, "step": 8284 }, { "epoch": 6.875518672199171, "grad_norm": 46.424713134765625, "learning_rate": 1.725178423236515e-05, "loss": 1.7921, "step": 8285 }, { "epoch": 6.876348547717843, "grad_norm": 28.044816970825195, "learning_rate": 1.7251452282157677e-05, "loss": 1.916, "step": 8286 }, { "epoch": 6.877178423236515, "grad_norm": 29.03544044494629, "learning_rate": 1.725112033195021e-05, "loss": 1.5034, "step": 8287 }, { "epoch": 6.878008298755187, "grad_norm": 22.974313735961914, "learning_rate": 1.725078838174274e-05, "loss": 1.6898, "step": 8288 }, { "epoch": 6.878838174273859, "grad_norm": 15.014326095581055, "learning_rate": 1.7250456431535273e-05, "loss": 1.3156, "step": 8289 }, { "epoch": 6.8796680497925315, "grad_norm": 26.017440795898438, "learning_rate": 1.7250124481327802e-05, "loss": 1.167, "step": 8290 }, { "epoch": 6.880497925311204, "grad_norm": 20.460105895996094, "learning_rate": 1.7249792531120334e-05, "loss": 1.0464, "step": 8291 }, { "epoch": 6.881327800829876, "grad_norm": 13.176946640014648, "learning_rate": 1.7249460580912863e-05, "loss": 0.7357, "step": 8292 }, { "epoch": 6.882157676348548, "grad_norm": 21.600187301635742, "learning_rate": 1.7249128630705395e-05, "loss": 2.3641, "step": 8293 }, { "epoch": 6.88298755186722, "grad_norm": 25.365102767944336, "learning_rate": 1.7248796680497927e-05, "loss": 1.2986, "step": 8294 }, { "epoch": 6.8838174273858925, "grad_norm": 17.689504623413086, "learning_rate": 1.7248464730290456e-05, "loss": 1.2144, "step": 8295 }, { "epoch": 6.884647302904565, "grad_norm": 21.681150436401367, "learning_rate": 1.7248132780082988e-05, "loss": 1.4174, "step": 8296 }, { "epoch": 6.885477178423237, "grad_norm": 18.51243782043457, "learning_rate": 1.724780082987552e-05, "loss": 1.3448, "step": 8297 }, { "epoch": 6.886307053941909, "grad_norm": 13.802666664123535, "learning_rate": 1.7247468879668052e-05, "loss": 1.3456, "step": 8298 }, { "epoch": 6.887136929460581, "grad_norm": 29.998661041259766, "learning_rate": 1.724713692946058e-05, "loss": 1.3881, "step": 8299 }, { "epoch": 6.8879668049792535, "grad_norm": 13.649880409240723, "learning_rate": 1.7246804979253113e-05, "loss": 0.9337, "step": 8300 }, { "epoch": 6.888796680497926, "grad_norm": 22.580318450927734, "learning_rate": 1.7246473029045645e-05, "loss": 1.3386, "step": 8301 }, { "epoch": 6.889626556016598, "grad_norm": 25.719032287597656, "learning_rate": 1.7246141078838177e-05, "loss": 1.2725, "step": 8302 }, { "epoch": 6.89045643153527, "grad_norm": 17.754497528076172, "learning_rate": 1.7245809128630706e-05, "loss": 0.4902, "step": 8303 }, { "epoch": 6.891286307053942, "grad_norm": 29.156484603881836, "learning_rate": 1.7245477178423238e-05, "loss": 1.6875, "step": 8304 }, { "epoch": 6.8921161825726145, "grad_norm": 18.356496810913086, "learning_rate": 1.724514522821577e-05, "loss": 1.1041, "step": 8305 }, { "epoch": 6.892946058091287, "grad_norm": 15.505378723144531, "learning_rate": 1.7244813278008302e-05, "loss": 1.3746, "step": 8306 }, { "epoch": 6.893775933609959, "grad_norm": 15.037284851074219, "learning_rate": 1.724448132780083e-05, "loss": 1.0381, "step": 8307 }, { "epoch": 6.894605809128631, "grad_norm": 16.705087661743164, "learning_rate": 1.7244149377593363e-05, "loss": 0.9688, "step": 8308 }, { "epoch": 6.895435684647303, "grad_norm": 28.640222549438477, "learning_rate": 1.7243817427385895e-05, "loss": 1.4444, "step": 8309 }, { "epoch": 6.8962655601659755, "grad_norm": 28.760066986083984, "learning_rate": 1.7243485477178424e-05, "loss": 1.3563, "step": 8310 }, { "epoch": 6.897095435684648, "grad_norm": 17.300649642944336, "learning_rate": 1.7243153526970956e-05, "loss": 0.8666, "step": 8311 }, { "epoch": 6.89792531120332, "grad_norm": 23.516359329223633, "learning_rate": 1.7242821576763488e-05, "loss": 1.1962, "step": 8312 }, { "epoch": 6.898755186721992, "grad_norm": 18.634057998657227, "learning_rate": 1.7242489626556017e-05, "loss": 0.7874, "step": 8313 }, { "epoch": 6.899585062240664, "grad_norm": 20.26936912536621, "learning_rate": 1.724215767634855e-05, "loss": 0.765, "step": 8314 }, { "epoch": 6.9004149377593365, "grad_norm": 31.92725372314453, "learning_rate": 1.7241825726141078e-05, "loss": 1.3692, "step": 8315 }, { "epoch": 6.901244813278009, "grad_norm": 34.44479751586914, "learning_rate": 1.724149377593361e-05, "loss": 0.9479, "step": 8316 }, { "epoch": 6.902074688796681, "grad_norm": 41.58340072631836, "learning_rate": 1.7241161825726142e-05, "loss": 1.6519, "step": 8317 }, { "epoch": 6.902904564315353, "grad_norm": 12.329768180847168, "learning_rate": 1.7240829875518674e-05, "loss": 0.9458, "step": 8318 }, { "epoch": 6.903734439834025, "grad_norm": 20.75236701965332, "learning_rate": 1.7240497925311203e-05, "loss": 1.2147, "step": 8319 }, { "epoch": 6.904564315352697, "grad_norm": 16.2858943939209, "learning_rate": 1.7240165975103735e-05, "loss": 1.2517, "step": 8320 }, { "epoch": 6.90539419087137, "grad_norm": 17.288848876953125, "learning_rate": 1.7239834024896267e-05, "loss": 1.1234, "step": 8321 }, { "epoch": 6.906224066390042, "grad_norm": 36.9293212890625, "learning_rate": 1.72395020746888e-05, "loss": 1.7657, "step": 8322 }, { "epoch": 6.907053941908714, "grad_norm": 36.2154655456543, "learning_rate": 1.723917012448133e-05, "loss": 0.909, "step": 8323 }, { "epoch": 6.907883817427386, "grad_norm": 17.79671287536621, "learning_rate": 1.723883817427386e-05, "loss": 1.4819, "step": 8324 }, { "epoch": 6.908713692946058, "grad_norm": 12.763627052307129, "learning_rate": 1.7238506224066392e-05, "loss": 0.8576, "step": 8325 }, { "epoch": 6.909543568464731, "grad_norm": 17.125844955444336, "learning_rate": 1.7238174273858924e-05, "loss": 0.9766, "step": 8326 }, { "epoch": 6.910373443983403, "grad_norm": 21.568035125732422, "learning_rate": 1.7237842323651456e-05, "loss": 1.2815, "step": 8327 }, { "epoch": 6.911203319502075, "grad_norm": 18.422271728515625, "learning_rate": 1.7237510373443985e-05, "loss": 1.3606, "step": 8328 }, { "epoch": 6.912033195020747, "grad_norm": 33.7861442565918, "learning_rate": 1.7237178423236517e-05, "loss": 2.1156, "step": 8329 }, { "epoch": 6.912863070539419, "grad_norm": 15.829882621765137, "learning_rate": 1.7236846473029046e-05, "loss": 0.9455, "step": 8330 }, { "epoch": 6.913692946058092, "grad_norm": 42.38286209106445, "learning_rate": 1.7236514522821578e-05, "loss": 1.5976, "step": 8331 }, { "epoch": 6.914522821576764, "grad_norm": 21.709896087646484, "learning_rate": 1.723618257261411e-05, "loss": 1.3777, "step": 8332 }, { "epoch": 6.915352697095436, "grad_norm": 18.092519760131836, "learning_rate": 1.723585062240664e-05, "loss": 0.8493, "step": 8333 }, { "epoch": 6.916182572614108, "grad_norm": 22.15969467163086, "learning_rate": 1.723551867219917e-05, "loss": 1.6819, "step": 8334 }, { "epoch": 6.91701244813278, "grad_norm": 28.55291175842285, "learning_rate": 1.7235186721991703e-05, "loss": 1.8145, "step": 8335 }, { "epoch": 6.917842323651453, "grad_norm": 21.848796844482422, "learning_rate": 1.723485477178423e-05, "loss": 0.8061, "step": 8336 }, { "epoch": 6.918672199170125, "grad_norm": 17.12115478515625, "learning_rate": 1.7234522821576764e-05, "loss": 2.0325, "step": 8337 }, { "epoch": 6.919502074688797, "grad_norm": 20.905338287353516, "learning_rate": 1.7234190871369296e-05, "loss": 1.3066, "step": 8338 }, { "epoch": 6.920331950207469, "grad_norm": 19.21323013305664, "learning_rate": 1.7233858921161828e-05, "loss": 1.5265, "step": 8339 }, { "epoch": 6.921161825726141, "grad_norm": 17.881559371948242, "learning_rate": 1.7233526970954357e-05, "loss": 0.9253, "step": 8340 }, { "epoch": 6.9219917012448136, "grad_norm": 19.694820404052734, "learning_rate": 1.723319502074689e-05, "loss": 1.7924, "step": 8341 }, { "epoch": 6.922821576763486, "grad_norm": 44.39557647705078, "learning_rate": 1.723286307053942e-05, "loss": 1.3456, "step": 8342 }, { "epoch": 6.923651452282158, "grad_norm": 23.993267059326172, "learning_rate": 1.7232531120331953e-05, "loss": 1.084, "step": 8343 }, { "epoch": 6.92448132780083, "grad_norm": 16.815488815307617, "learning_rate": 1.7232199170124482e-05, "loss": 1.1865, "step": 8344 }, { "epoch": 6.925311203319502, "grad_norm": 28.055635452270508, "learning_rate": 1.7231867219917014e-05, "loss": 1.4465, "step": 8345 }, { "epoch": 6.9261410788381745, "grad_norm": 17.414016723632812, "learning_rate": 1.7231535269709546e-05, "loss": 1.3556, "step": 8346 }, { "epoch": 6.926970954356847, "grad_norm": 19.423471450805664, "learning_rate": 1.7231203319502078e-05, "loss": 1.3953, "step": 8347 }, { "epoch": 6.927800829875519, "grad_norm": 16.257301330566406, "learning_rate": 1.7230871369294607e-05, "loss": 1.6211, "step": 8348 }, { "epoch": 6.928630705394191, "grad_norm": 12.24657154083252, "learning_rate": 1.723053941908714e-05, "loss": 0.7322, "step": 8349 }, { "epoch": 6.929460580912863, "grad_norm": 25.842395782470703, "learning_rate": 1.723020746887967e-05, "loss": 0.854, "step": 8350 }, { "epoch": 6.9302904564315355, "grad_norm": 20.31739044189453, "learning_rate": 1.72298755186722e-05, "loss": 0.9252, "step": 8351 }, { "epoch": 6.931120331950208, "grad_norm": 16.791833877563477, "learning_rate": 1.7229543568464732e-05, "loss": 1.3373, "step": 8352 }, { "epoch": 6.93195020746888, "grad_norm": 19.786128997802734, "learning_rate": 1.722921161825726e-05, "loss": 1.4724, "step": 8353 }, { "epoch": 6.932780082987552, "grad_norm": 12.035268783569336, "learning_rate": 1.7228879668049793e-05, "loss": 0.9001, "step": 8354 }, { "epoch": 6.933609958506224, "grad_norm": 20.55903434753418, "learning_rate": 1.7228547717842325e-05, "loss": 0.5587, "step": 8355 }, { "epoch": 6.9344398340248965, "grad_norm": 28.98594856262207, "learning_rate": 1.7228215767634857e-05, "loss": 1.1464, "step": 8356 }, { "epoch": 6.935269709543569, "grad_norm": 19.219524383544922, "learning_rate": 1.7227883817427386e-05, "loss": 1.1911, "step": 8357 }, { "epoch": 6.936099585062241, "grad_norm": 18.828399658203125, "learning_rate": 1.7227551867219918e-05, "loss": 0.8824, "step": 8358 }, { "epoch": 6.936929460580913, "grad_norm": 15.77815055847168, "learning_rate": 1.722721991701245e-05, "loss": 0.8097, "step": 8359 }, { "epoch": 6.937759336099585, "grad_norm": 22.7460994720459, "learning_rate": 1.7226887966804982e-05, "loss": 1.2243, "step": 8360 }, { "epoch": 6.9385892116182575, "grad_norm": 13.117274284362793, "learning_rate": 1.722655601659751e-05, "loss": 1.062, "step": 8361 }, { "epoch": 6.93941908713693, "grad_norm": 8.219873428344727, "learning_rate": 1.7226224066390043e-05, "loss": 0.3855, "step": 8362 }, { "epoch": 6.940248962655602, "grad_norm": 19.120508193969727, "learning_rate": 1.7225892116182575e-05, "loss": 1.3033, "step": 8363 }, { "epoch": 6.941078838174274, "grad_norm": 13.754087448120117, "learning_rate": 1.7225560165975107e-05, "loss": 1.1508, "step": 8364 }, { "epoch": 6.941908713692946, "grad_norm": 14.5195951461792, "learning_rate": 1.7225228215767636e-05, "loss": 1.0062, "step": 8365 }, { "epoch": 6.9427385892116185, "grad_norm": 13.690793991088867, "learning_rate": 1.7224896265560168e-05, "loss": 0.9742, "step": 8366 }, { "epoch": 6.943568464730291, "grad_norm": 23.113187789916992, "learning_rate": 1.72245643153527e-05, "loss": 1.4167, "step": 8367 }, { "epoch": 6.944398340248963, "grad_norm": 14.404932022094727, "learning_rate": 1.7224232365145232e-05, "loss": 1.0779, "step": 8368 }, { "epoch": 6.945228215767635, "grad_norm": 26.156352996826172, "learning_rate": 1.722390041493776e-05, "loss": 1.9572, "step": 8369 }, { "epoch": 6.946058091286307, "grad_norm": 12.942728042602539, "learning_rate": 1.7223568464730293e-05, "loss": 0.6954, "step": 8370 }, { "epoch": 6.946887966804979, "grad_norm": 21.895132064819336, "learning_rate": 1.722323651452282e-05, "loss": 0.9774, "step": 8371 }, { "epoch": 6.947717842323652, "grad_norm": 15.716571807861328, "learning_rate": 1.7222904564315354e-05, "loss": 1.2832, "step": 8372 }, { "epoch": 6.948547717842324, "grad_norm": 13.302372932434082, "learning_rate": 1.7222572614107886e-05, "loss": 1.0959, "step": 8373 }, { "epoch": 6.949377593360996, "grad_norm": 17.033864974975586, "learning_rate": 1.7222240663900415e-05, "loss": 0.8761, "step": 8374 }, { "epoch": 6.950207468879668, "grad_norm": 26.48128890991211, "learning_rate": 1.7221908713692947e-05, "loss": 1.4645, "step": 8375 }, { "epoch": 6.95103734439834, "grad_norm": 23.903406143188477, "learning_rate": 1.722157676348548e-05, "loss": 1.5023, "step": 8376 }, { "epoch": 6.951867219917013, "grad_norm": 23.420133590698242, "learning_rate": 1.722124481327801e-05, "loss": 1.5056, "step": 8377 }, { "epoch": 6.952697095435685, "grad_norm": 14.558300018310547, "learning_rate": 1.722091286307054e-05, "loss": 0.9667, "step": 8378 }, { "epoch": 6.953526970954357, "grad_norm": 17.3398380279541, "learning_rate": 1.722058091286307e-05, "loss": 1.1894, "step": 8379 }, { "epoch": 6.954356846473029, "grad_norm": 21.686464309692383, "learning_rate": 1.7220248962655604e-05, "loss": 1.1827, "step": 8380 }, { "epoch": 6.955186721991701, "grad_norm": 19.755096435546875, "learning_rate": 1.7219917012448136e-05, "loss": 1.5693, "step": 8381 }, { "epoch": 6.956016597510374, "grad_norm": 23.225635528564453, "learning_rate": 1.7219585062240665e-05, "loss": 1.4275, "step": 8382 }, { "epoch": 6.956846473029046, "grad_norm": 19.197040557861328, "learning_rate": 1.7219253112033197e-05, "loss": 1.288, "step": 8383 }, { "epoch": 6.957676348547718, "grad_norm": 23.86927604675293, "learning_rate": 1.721892116182573e-05, "loss": 1.9365, "step": 8384 }, { "epoch": 6.95850622406639, "grad_norm": 24.358699798583984, "learning_rate": 1.721858921161826e-05, "loss": 1.8384, "step": 8385 }, { "epoch": 6.959336099585062, "grad_norm": 29.179712295532227, "learning_rate": 1.721825726141079e-05, "loss": 1.3745, "step": 8386 }, { "epoch": 6.960165975103735, "grad_norm": 13.456122398376465, "learning_rate": 1.7217925311203322e-05, "loss": 0.7442, "step": 8387 }, { "epoch": 6.960995850622407, "grad_norm": 15.50629997253418, "learning_rate": 1.7217593360995854e-05, "loss": 1.4524, "step": 8388 }, { "epoch": 6.961825726141079, "grad_norm": 18.799800872802734, "learning_rate": 1.7217261410788383e-05, "loss": 1.8696, "step": 8389 }, { "epoch": 6.962655601659751, "grad_norm": 29.178590774536133, "learning_rate": 1.7216929460580915e-05, "loss": 2.5356, "step": 8390 }, { "epoch": 6.963485477178423, "grad_norm": 21.55124855041504, "learning_rate": 1.7216597510373443e-05, "loss": 0.9981, "step": 8391 }, { "epoch": 6.964315352697096, "grad_norm": 14.83302116394043, "learning_rate": 1.7216265560165975e-05, "loss": 0.6911, "step": 8392 }, { "epoch": 6.965145228215768, "grad_norm": 23.6237735748291, "learning_rate": 1.7215933609958508e-05, "loss": 1.4826, "step": 8393 }, { "epoch": 6.96597510373444, "grad_norm": 13.903488159179688, "learning_rate": 1.7215601659751036e-05, "loss": 1.0189, "step": 8394 }, { "epoch": 6.966804979253112, "grad_norm": 15.824478149414062, "learning_rate": 1.721526970954357e-05, "loss": 1.2994, "step": 8395 }, { "epoch": 6.967634854771784, "grad_norm": 25.68168830871582, "learning_rate": 1.72149377593361e-05, "loss": 1.1612, "step": 8396 }, { "epoch": 6.9684647302904565, "grad_norm": 17.318876266479492, "learning_rate": 1.7214605809128633e-05, "loss": 1.2326, "step": 8397 }, { "epoch": 6.969294605809129, "grad_norm": 15.161534309387207, "learning_rate": 1.721427385892116e-05, "loss": 1.1391, "step": 8398 }, { "epoch": 6.970124481327801, "grad_norm": 18.844261169433594, "learning_rate": 1.7213941908713693e-05, "loss": 0.8489, "step": 8399 }, { "epoch": 6.970954356846473, "grad_norm": 16.131975173950195, "learning_rate": 1.7213609958506226e-05, "loss": 1.0948, "step": 8400 }, { "epoch": 6.971784232365145, "grad_norm": 22.407649993896484, "learning_rate": 1.7213278008298758e-05, "loss": 1.2355, "step": 8401 }, { "epoch": 6.9726141078838175, "grad_norm": 16.08414077758789, "learning_rate": 1.721294605809129e-05, "loss": 0.6364, "step": 8402 }, { "epoch": 6.97344398340249, "grad_norm": 16.925153732299805, "learning_rate": 1.721261410788382e-05, "loss": 0.9208, "step": 8403 }, { "epoch": 6.974273858921162, "grad_norm": 24.851730346679688, "learning_rate": 1.721228215767635e-05, "loss": 1.2285, "step": 8404 }, { "epoch": 6.975103734439834, "grad_norm": 23.58835220336914, "learning_rate": 1.7211950207468883e-05, "loss": 1.2986, "step": 8405 }, { "epoch": 6.975933609958506, "grad_norm": 19.469430923461914, "learning_rate": 1.7211618257261415e-05, "loss": 1.965, "step": 8406 }, { "epoch": 6.9767634854771785, "grad_norm": 18.99833869934082, "learning_rate": 1.7211286307053944e-05, "loss": 0.9673, "step": 8407 }, { "epoch": 6.977593360995851, "grad_norm": 16.338550567626953, "learning_rate": 1.7210954356846476e-05, "loss": 0.9798, "step": 8408 }, { "epoch": 6.978423236514523, "grad_norm": 20.246408462524414, "learning_rate": 1.7210622406639004e-05, "loss": 1.4716, "step": 8409 }, { "epoch": 6.979253112033195, "grad_norm": 24.531686782836914, "learning_rate": 1.7210290456431536e-05, "loss": 1.0859, "step": 8410 }, { "epoch": 6.980082987551867, "grad_norm": 25.767141342163086, "learning_rate": 1.720995850622407e-05, "loss": 1.3166, "step": 8411 }, { "epoch": 6.9809128630705395, "grad_norm": 20.91652488708496, "learning_rate": 1.7209626556016597e-05, "loss": 1.8376, "step": 8412 }, { "epoch": 6.981742738589212, "grad_norm": 33.29607391357422, "learning_rate": 1.720929460580913e-05, "loss": 2.3177, "step": 8413 }, { "epoch": 6.982572614107884, "grad_norm": 27.71199607849121, "learning_rate": 1.720896265560166e-05, "loss": 1.5206, "step": 8414 }, { "epoch": 6.983402489626556, "grad_norm": 18.520898818969727, "learning_rate": 1.720863070539419e-05, "loss": 1.6753, "step": 8415 }, { "epoch": 6.984232365145228, "grad_norm": 16.36750602722168, "learning_rate": 1.7208298755186722e-05, "loss": 1.3981, "step": 8416 }, { "epoch": 6.9850622406639005, "grad_norm": 24.124874114990234, "learning_rate": 1.7207966804979254e-05, "loss": 1.3251, "step": 8417 }, { "epoch": 6.985892116182573, "grad_norm": 17.95433235168457, "learning_rate": 1.7207634854771787e-05, "loss": 0.86, "step": 8418 }, { "epoch": 6.986721991701245, "grad_norm": 32.02473831176758, "learning_rate": 1.7207302904564315e-05, "loss": 1.4806, "step": 8419 }, { "epoch": 6.987551867219917, "grad_norm": 13.800752639770508, "learning_rate": 1.7206970954356847e-05, "loss": 0.8643, "step": 8420 }, { "epoch": 6.988381742738589, "grad_norm": 15.090991973876953, "learning_rate": 1.720663900414938e-05, "loss": 0.843, "step": 8421 }, { "epoch": 6.9892116182572614, "grad_norm": 12.954408645629883, "learning_rate": 1.720630705394191e-05, "loss": 0.7058, "step": 8422 }, { "epoch": 6.990041493775934, "grad_norm": 13.243185043334961, "learning_rate": 1.720597510373444e-05, "loss": 0.9255, "step": 8423 }, { "epoch": 6.990871369294606, "grad_norm": 21.562023162841797, "learning_rate": 1.7205643153526972e-05, "loss": 1.4922, "step": 8424 }, { "epoch": 6.991701244813278, "grad_norm": 15.745372772216797, "learning_rate": 1.7205311203319505e-05, "loss": 0.7869, "step": 8425 }, { "epoch": 6.99253112033195, "grad_norm": 12.358588218688965, "learning_rate": 1.7204979253112037e-05, "loss": 1.3198, "step": 8426 }, { "epoch": 6.993360995850622, "grad_norm": 18.58763885498047, "learning_rate": 1.7204647302904565e-05, "loss": 1.1678, "step": 8427 }, { "epoch": 6.994190871369295, "grad_norm": 21.03714942932129, "learning_rate": 1.7204315352697097e-05, "loss": 1.471, "step": 8428 }, { "epoch": 6.995020746887967, "grad_norm": 17.75506019592285, "learning_rate": 1.720398340248963e-05, "loss": 1.3069, "step": 8429 }, { "epoch": 6.995850622406639, "grad_norm": 15.317530632019043, "learning_rate": 1.720365145228216e-05, "loss": 1.2012, "step": 8430 }, { "epoch": 6.996680497925311, "grad_norm": 19.445255279541016, "learning_rate": 1.720331950207469e-05, "loss": 1.1259, "step": 8431 }, { "epoch": 6.997510373443983, "grad_norm": 22.80757713317871, "learning_rate": 1.720298755186722e-05, "loss": 1.6115, "step": 8432 }, { "epoch": 6.998340248962656, "grad_norm": 22.163063049316406, "learning_rate": 1.720265560165975e-05, "loss": 1.5128, "step": 8433 }, { "epoch": 6.999170124481328, "grad_norm": 14.21684741973877, "learning_rate": 1.7202323651452283e-05, "loss": 0.8219, "step": 8434 }, { "epoch": 7.0, "grad_norm": 26.169527053833008, "learning_rate": 1.7201991701244815e-05, "loss": 1.2424, "step": 8435 }, { "epoch": 7.000829875518672, "grad_norm": 25.69438934326172, "learning_rate": 1.7201659751037344e-05, "loss": 1.2869, "step": 8436 }, { "epoch": 7.001659751037344, "grad_norm": 15.790616989135742, "learning_rate": 1.7201327800829876e-05, "loss": 1.2368, "step": 8437 }, { "epoch": 7.002489626556017, "grad_norm": 20.257112503051758, "learning_rate": 1.720099585062241e-05, "loss": 1.0958, "step": 8438 }, { "epoch": 7.003319502074689, "grad_norm": 10.997032165527344, "learning_rate": 1.720066390041494e-05, "loss": 0.5738, "step": 8439 }, { "epoch": 7.004149377593361, "grad_norm": 30.191335678100586, "learning_rate": 1.720033195020747e-05, "loss": 1.0555, "step": 8440 }, { "epoch": 7.004979253112033, "grad_norm": 17.55173683166504, "learning_rate": 1.72e-05, "loss": 1.133, "step": 8441 }, { "epoch": 7.005809128630705, "grad_norm": 16.901599884033203, "learning_rate": 1.7199668049792533e-05, "loss": 1.1446, "step": 8442 }, { "epoch": 7.006639004149378, "grad_norm": 36.42707061767578, "learning_rate": 1.7199336099585066e-05, "loss": 2.0128, "step": 8443 }, { "epoch": 7.00746887966805, "grad_norm": 18.55646514892578, "learning_rate": 1.7199004149377594e-05, "loss": 1.1437, "step": 8444 }, { "epoch": 7.008298755186722, "grad_norm": 13.93527603149414, "learning_rate": 1.7198672199170126e-05, "loss": 0.8278, "step": 8445 }, { "epoch": 7.009128630705394, "grad_norm": 40.762611389160156, "learning_rate": 1.719834024896266e-05, "loss": 0.965, "step": 8446 }, { "epoch": 7.009958506224066, "grad_norm": 21.058948516845703, "learning_rate": 1.7198008298755187e-05, "loss": 1.5597, "step": 8447 }, { "epoch": 7.0107883817427386, "grad_norm": 34.58358383178711, "learning_rate": 1.719767634854772e-05, "loss": 1.771, "step": 8448 }, { "epoch": 7.011618257261411, "grad_norm": 14.3640775680542, "learning_rate": 1.719734439834025e-05, "loss": 0.7357, "step": 8449 }, { "epoch": 7.012448132780083, "grad_norm": 27.628623962402344, "learning_rate": 1.719701244813278e-05, "loss": 1.1181, "step": 8450 }, { "epoch": 7.013278008298755, "grad_norm": 56.92154312133789, "learning_rate": 1.7196680497925312e-05, "loss": 1.1072, "step": 8451 }, { "epoch": 7.014107883817427, "grad_norm": 18.944849014282227, "learning_rate": 1.7196348547717844e-05, "loss": 1.7329, "step": 8452 }, { "epoch": 7.0149377593360995, "grad_norm": 18.484943389892578, "learning_rate": 1.7196016597510373e-05, "loss": 1.0802, "step": 8453 }, { "epoch": 7.015767634854772, "grad_norm": 23.852294921875, "learning_rate": 1.7195684647302905e-05, "loss": 1.7457, "step": 8454 }, { "epoch": 7.016597510373444, "grad_norm": 15.229759216308594, "learning_rate": 1.7195352697095437e-05, "loss": 0.4927, "step": 8455 }, { "epoch": 7.017427385892116, "grad_norm": 16.920425415039062, "learning_rate": 1.719502074688797e-05, "loss": 1.2085, "step": 8456 }, { "epoch": 7.018257261410788, "grad_norm": 19.213714599609375, "learning_rate": 1.7194688796680498e-05, "loss": 1.1637, "step": 8457 }, { "epoch": 7.0190871369294605, "grad_norm": 16.023014068603516, "learning_rate": 1.719435684647303e-05, "loss": 0.5639, "step": 8458 }, { "epoch": 7.019917012448133, "grad_norm": 19.668161392211914, "learning_rate": 1.7194024896265562e-05, "loss": 1.1257, "step": 8459 }, { "epoch": 7.020746887966805, "grad_norm": 23.372943878173828, "learning_rate": 1.7193692946058094e-05, "loss": 1.5109, "step": 8460 }, { "epoch": 7.021576763485477, "grad_norm": 25.924386978149414, "learning_rate": 1.7193360995850623e-05, "loss": 1.1498, "step": 8461 }, { "epoch": 7.022406639004149, "grad_norm": 23.00674057006836, "learning_rate": 1.7193029045643155e-05, "loss": 1.6523, "step": 8462 }, { "epoch": 7.0232365145228215, "grad_norm": 16.098169326782227, "learning_rate": 1.7192697095435687e-05, "loss": 1.1368, "step": 8463 }, { "epoch": 7.024066390041494, "grad_norm": 26.042394638061523, "learning_rate": 1.719236514522822e-05, "loss": 1.2047, "step": 8464 }, { "epoch": 7.024896265560166, "grad_norm": 18.69598960876465, "learning_rate": 1.7192033195020748e-05, "loss": 0.9618, "step": 8465 }, { "epoch": 7.025726141078838, "grad_norm": 27.74224853515625, "learning_rate": 1.719170124481328e-05, "loss": 1.6607, "step": 8466 }, { "epoch": 7.02655601659751, "grad_norm": 15.62268352508545, "learning_rate": 1.7191369294605812e-05, "loss": 0.9921, "step": 8467 }, { "epoch": 7.0273858921161825, "grad_norm": 26.500442504882812, "learning_rate": 1.719103734439834e-05, "loss": 1.2975, "step": 8468 }, { "epoch": 7.028215767634855, "grad_norm": 30.443025588989258, "learning_rate": 1.7190705394190873e-05, "loss": 1.1522, "step": 8469 }, { "epoch": 7.029045643153527, "grad_norm": 15.316141128540039, "learning_rate": 1.7190373443983402e-05, "loss": 1.0425, "step": 8470 }, { "epoch": 7.029875518672199, "grad_norm": 19.178932189941406, "learning_rate": 1.7190041493775934e-05, "loss": 1.1078, "step": 8471 }, { "epoch": 7.030705394190871, "grad_norm": 19.395549774169922, "learning_rate": 1.7189709543568466e-05, "loss": 0.5746, "step": 8472 }, { "epoch": 7.0315352697095435, "grad_norm": 13.76216983795166, "learning_rate": 1.7189377593360995e-05, "loss": 0.8668, "step": 8473 }, { "epoch": 7.032365145228216, "grad_norm": 24.179994583129883, "learning_rate": 1.7189045643153527e-05, "loss": 1.1972, "step": 8474 }, { "epoch": 7.033195020746888, "grad_norm": 15.204923629760742, "learning_rate": 1.718871369294606e-05, "loss": 1.0427, "step": 8475 }, { "epoch": 7.03402489626556, "grad_norm": 23.453638076782227, "learning_rate": 1.718838174273859e-05, "loss": 0.7395, "step": 8476 }, { "epoch": 7.034854771784232, "grad_norm": 31.335128784179688, "learning_rate": 1.718804979253112e-05, "loss": 1.1575, "step": 8477 }, { "epoch": 7.035684647302904, "grad_norm": 24.956905364990234, "learning_rate": 1.7187717842323652e-05, "loss": 1.1042, "step": 8478 }, { "epoch": 7.036514522821577, "grad_norm": 23.917383193969727, "learning_rate": 1.7187385892116184e-05, "loss": 1.8095, "step": 8479 }, { "epoch": 7.037344398340249, "grad_norm": 22.28701400756836, "learning_rate": 1.7187053941908716e-05, "loss": 1.0193, "step": 8480 }, { "epoch": 7.038174273858921, "grad_norm": 16.149890899658203, "learning_rate": 1.7186721991701245e-05, "loss": 1.1583, "step": 8481 }, { "epoch": 7.039004149377593, "grad_norm": 24.142051696777344, "learning_rate": 1.7186390041493777e-05, "loss": 0.9544, "step": 8482 }, { "epoch": 7.039834024896265, "grad_norm": 15.19555950164795, "learning_rate": 1.718605809128631e-05, "loss": 1.1525, "step": 8483 }, { "epoch": 7.040663900414938, "grad_norm": 33.04243469238281, "learning_rate": 1.718572614107884e-05, "loss": 1.2726, "step": 8484 }, { "epoch": 7.04149377593361, "grad_norm": 22.252042770385742, "learning_rate": 1.7185394190871373e-05, "loss": 1.2383, "step": 8485 }, { "epoch": 7.042323651452282, "grad_norm": 23.50086212158203, "learning_rate": 1.7185062240663902e-05, "loss": 0.9836, "step": 8486 }, { "epoch": 7.043153526970954, "grad_norm": 21.80872917175293, "learning_rate": 1.7184730290456434e-05, "loss": 0.667, "step": 8487 }, { "epoch": 7.043983402489626, "grad_norm": 11.82590103149414, "learning_rate": 1.7184398340248963e-05, "loss": 0.6441, "step": 8488 }, { "epoch": 7.044813278008299, "grad_norm": 22.331151962280273, "learning_rate": 1.7184066390041495e-05, "loss": 1.1018, "step": 8489 }, { "epoch": 7.045643153526971, "grad_norm": 22.16447639465332, "learning_rate": 1.7183734439834027e-05, "loss": 1.1855, "step": 8490 }, { "epoch": 7.046473029045643, "grad_norm": 15.59242057800293, "learning_rate": 1.7183402489626556e-05, "loss": 0.8642, "step": 8491 }, { "epoch": 7.047302904564315, "grad_norm": 13.648763656616211, "learning_rate": 1.7183070539419088e-05, "loss": 0.6326, "step": 8492 }, { "epoch": 7.048132780082987, "grad_norm": 15.63321304321289, "learning_rate": 1.718273858921162e-05, "loss": 0.9956, "step": 8493 }, { "epoch": 7.04896265560166, "grad_norm": 15.39668083190918, "learning_rate": 1.718240663900415e-05, "loss": 0.5387, "step": 8494 }, { "epoch": 7.049792531120332, "grad_norm": 18.380630493164062, "learning_rate": 1.718207468879668e-05, "loss": 1.1717, "step": 8495 }, { "epoch": 7.050622406639004, "grad_norm": 16.903860092163086, "learning_rate": 1.7181742738589213e-05, "loss": 1.2239, "step": 8496 }, { "epoch": 7.051452282157676, "grad_norm": 22.459211349487305, "learning_rate": 1.7181410788381745e-05, "loss": 1.392, "step": 8497 }, { "epoch": 7.052282157676348, "grad_norm": 17.55362319946289, "learning_rate": 1.7181078838174274e-05, "loss": 0.712, "step": 8498 }, { "epoch": 7.053112033195021, "grad_norm": 21.347959518432617, "learning_rate": 1.7180746887966806e-05, "loss": 1.6801, "step": 8499 }, { "epoch": 7.053941908713693, "grad_norm": 11.39612102508545, "learning_rate": 1.7180414937759338e-05, "loss": 0.3702, "step": 8500 }, { "epoch": 7.054771784232365, "grad_norm": 15.15172290802002, "learning_rate": 1.718008298755187e-05, "loss": 1.4603, "step": 8501 }, { "epoch": 7.055601659751037, "grad_norm": 17.491491317749023, "learning_rate": 1.71797510373444e-05, "loss": 1.1608, "step": 8502 }, { "epoch": 7.056431535269709, "grad_norm": 28.542695999145508, "learning_rate": 1.717941908713693e-05, "loss": 1.7035, "step": 8503 }, { "epoch": 7.0572614107883815, "grad_norm": 27.68416404724121, "learning_rate": 1.7179087136929463e-05, "loss": 2.0261, "step": 8504 }, { "epoch": 7.058091286307054, "grad_norm": 21.245304107666016, "learning_rate": 1.7178755186721995e-05, "loss": 0.8844, "step": 8505 }, { "epoch": 7.058921161825726, "grad_norm": 22.7674560546875, "learning_rate": 1.7178423236514524e-05, "loss": 1.8811, "step": 8506 }, { "epoch": 7.059751037344398, "grad_norm": 15.473313331604004, "learning_rate": 1.7178091286307056e-05, "loss": 0.7559, "step": 8507 }, { "epoch": 7.06058091286307, "grad_norm": 21.44000244140625, "learning_rate": 1.7177759336099585e-05, "loss": 0.9871, "step": 8508 }, { "epoch": 7.0614107883817425, "grad_norm": 27.371065139770508, "learning_rate": 1.7177427385892117e-05, "loss": 0.9955, "step": 8509 }, { "epoch": 7.062240663900415, "grad_norm": 20.480985641479492, "learning_rate": 1.717709543568465e-05, "loss": 1.3601, "step": 8510 }, { "epoch": 7.063070539419087, "grad_norm": 21.2209529876709, "learning_rate": 1.7176763485477178e-05, "loss": 0.9144, "step": 8511 }, { "epoch": 7.063900414937759, "grad_norm": 20.822378158569336, "learning_rate": 1.717643153526971e-05, "loss": 0.8848, "step": 8512 }, { "epoch": 7.064730290456431, "grad_norm": 29.54651641845703, "learning_rate": 1.7176099585062242e-05, "loss": 1.7296, "step": 8513 }, { "epoch": 7.0655601659751035, "grad_norm": 16.076284408569336, "learning_rate": 1.7175767634854774e-05, "loss": 0.4348, "step": 8514 }, { "epoch": 7.066390041493776, "grad_norm": 20.685699462890625, "learning_rate": 1.7175435684647303e-05, "loss": 0.8217, "step": 8515 }, { "epoch": 7.067219917012448, "grad_norm": 17.703664779663086, "learning_rate": 1.7175103734439835e-05, "loss": 0.8923, "step": 8516 }, { "epoch": 7.06804979253112, "grad_norm": 21.917163848876953, "learning_rate": 1.7174771784232367e-05, "loss": 0.864, "step": 8517 }, { "epoch": 7.068879668049792, "grad_norm": 19.73590660095215, "learning_rate": 1.71744398340249e-05, "loss": 1.6436, "step": 8518 }, { "epoch": 7.0697095435684645, "grad_norm": 25.55629539489746, "learning_rate": 1.7174107883817428e-05, "loss": 1.3918, "step": 8519 }, { "epoch": 7.070539419087137, "grad_norm": 25.597148895263672, "learning_rate": 1.717377593360996e-05, "loss": 0.9144, "step": 8520 }, { "epoch": 7.071369294605809, "grad_norm": 20.51215171813965, "learning_rate": 1.7173443983402492e-05, "loss": 0.9067, "step": 8521 }, { "epoch": 7.072199170124481, "grad_norm": 15.757401466369629, "learning_rate": 1.7173112033195024e-05, "loss": 0.9177, "step": 8522 }, { "epoch": 7.073029045643153, "grad_norm": 24.46297836303711, "learning_rate": 1.7172780082987553e-05, "loss": 1.2559, "step": 8523 }, { "epoch": 7.0738589211618255, "grad_norm": 29.56853675842285, "learning_rate": 1.7172448132780085e-05, "loss": 1.7745, "step": 8524 }, { "epoch": 7.074688796680498, "grad_norm": 20.883983612060547, "learning_rate": 1.7172116182572617e-05, "loss": 1.166, "step": 8525 }, { "epoch": 7.07551867219917, "grad_norm": 31.874887466430664, "learning_rate": 1.7171784232365146e-05, "loss": 1.2265, "step": 8526 }, { "epoch": 7.076348547717842, "grad_norm": 23.715917587280273, "learning_rate": 1.7171452282157678e-05, "loss": 1.6297, "step": 8527 }, { "epoch": 7.077178423236514, "grad_norm": 19.50196647644043, "learning_rate": 1.717112033195021e-05, "loss": 1.3051, "step": 8528 }, { "epoch": 7.0780082987551864, "grad_norm": 20.33822250366211, "learning_rate": 1.717078838174274e-05, "loss": 1.3159, "step": 8529 }, { "epoch": 7.078838174273859, "grad_norm": 27.95072364807129, "learning_rate": 1.717045643153527e-05, "loss": 2.059, "step": 8530 }, { "epoch": 7.079668049792531, "grad_norm": 15.078619003295898, "learning_rate": 1.71701244813278e-05, "loss": 0.7631, "step": 8531 }, { "epoch": 7.080497925311203, "grad_norm": 15.884410858154297, "learning_rate": 1.716979253112033e-05, "loss": 1.3219, "step": 8532 }, { "epoch": 7.081327800829875, "grad_norm": 15.306049346923828, "learning_rate": 1.7169460580912864e-05, "loss": 0.5982, "step": 8533 }, { "epoch": 7.082157676348547, "grad_norm": 17.712310791015625, "learning_rate": 1.7169128630705396e-05, "loss": 0.8583, "step": 8534 }, { "epoch": 7.08298755186722, "grad_norm": 19.324827194213867, "learning_rate": 1.7168796680497928e-05, "loss": 1.4994, "step": 8535 }, { "epoch": 7.083817427385892, "grad_norm": 17.788488388061523, "learning_rate": 1.7168464730290457e-05, "loss": 1.0394, "step": 8536 }, { "epoch": 7.084647302904564, "grad_norm": 23.849409103393555, "learning_rate": 1.716813278008299e-05, "loss": 2.0866, "step": 8537 }, { "epoch": 7.085477178423236, "grad_norm": 11.889535903930664, "learning_rate": 1.716780082987552e-05, "loss": 0.6289, "step": 8538 }, { "epoch": 7.086307053941908, "grad_norm": 13.872451782226562, "learning_rate": 1.7167468879668053e-05, "loss": 0.7008, "step": 8539 }, { "epoch": 7.087136929460581, "grad_norm": 22.051856994628906, "learning_rate": 1.7167136929460582e-05, "loss": 0.8522, "step": 8540 }, { "epoch": 7.087966804979253, "grad_norm": 18.34075355529785, "learning_rate": 1.7166804979253114e-05, "loss": 0.6136, "step": 8541 }, { "epoch": 7.088796680497925, "grad_norm": 16.668567657470703, "learning_rate": 1.7166473029045646e-05, "loss": 1.393, "step": 8542 }, { "epoch": 7.089626556016597, "grad_norm": 13.595656394958496, "learning_rate": 1.7166141078838178e-05, "loss": 0.7116, "step": 8543 }, { "epoch": 7.090456431535269, "grad_norm": 14.924257278442383, "learning_rate": 1.7165809128630707e-05, "loss": 0.9871, "step": 8544 }, { "epoch": 7.091286307053942, "grad_norm": 27.6531925201416, "learning_rate": 1.716547717842324e-05, "loss": 1.2705, "step": 8545 }, { "epoch": 7.092116182572614, "grad_norm": 21.250892639160156, "learning_rate": 1.716514522821577e-05, "loss": 0.9937, "step": 8546 }, { "epoch": 7.092946058091286, "grad_norm": 16.433364868164062, "learning_rate": 1.71648132780083e-05, "loss": 1.2106, "step": 8547 }, { "epoch": 7.093775933609958, "grad_norm": 16.883024215698242, "learning_rate": 1.7164481327800832e-05, "loss": 0.5875, "step": 8548 }, { "epoch": 7.09460580912863, "grad_norm": 15.800712585449219, "learning_rate": 1.716414937759336e-05, "loss": 0.779, "step": 8549 }, { "epoch": 7.095435684647303, "grad_norm": 14.300899505615234, "learning_rate": 1.7163817427385893e-05, "loss": 1.4455, "step": 8550 }, { "epoch": 7.096265560165975, "grad_norm": 17.248659133911133, "learning_rate": 1.7163485477178425e-05, "loss": 1.053, "step": 8551 }, { "epoch": 7.097095435684647, "grad_norm": 22.89449119567871, "learning_rate": 1.7163153526970953e-05, "loss": 1.0798, "step": 8552 }, { "epoch": 7.097925311203319, "grad_norm": 20.486061096191406, "learning_rate": 1.7162821576763486e-05, "loss": 1.2735, "step": 8553 }, { "epoch": 7.098755186721991, "grad_norm": 21.81975746154785, "learning_rate": 1.7162489626556018e-05, "loss": 0.9408, "step": 8554 }, { "epoch": 7.0995850622406635, "grad_norm": 25.344226837158203, "learning_rate": 1.716215767634855e-05, "loss": 1.3643, "step": 8555 }, { "epoch": 7.100414937759336, "grad_norm": 19.455400466918945, "learning_rate": 1.716182572614108e-05, "loss": 1.0804, "step": 8556 }, { "epoch": 7.101244813278008, "grad_norm": 21.92438507080078, "learning_rate": 1.716149377593361e-05, "loss": 1.8953, "step": 8557 }, { "epoch": 7.10207468879668, "grad_norm": 27.56505584716797, "learning_rate": 1.7161161825726143e-05, "loss": 1.1177, "step": 8558 }, { "epoch": 7.102904564315352, "grad_norm": 21.077472686767578, "learning_rate": 1.7160829875518675e-05, "loss": 1.3103, "step": 8559 }, { "epoch": 7.1037344398340245, "grad_norm": 18.170894622802734, "learning_rate": 1.7160497925311204e-05, "loss": 1.448, "step": 8560 }, { "epoch": 7.104564315352697, "grad_norm": 13.251932144165039, "learning_rate": 1.7160165975103736e-05, "loss": 0.5284, "step": 8561 }, { "epoch": 7.105394190871369, "grad_norm": 15.659805297851562, "learning_rate": 1.7159834024896268e-05, "loss": 1.0441, "step": 8562 }, { "epoch": 7.106224066390041, "grad_norm": 18.878646850585938, "learning_rate": 1.71595020746888e-05, "loss": 0.6166, "step": 8563 }, { "epoch": 7.107053941908713, "grad_norm": 24.575977325439453, "learning_rate": 1.715917012448133e-05, "loss": 1.348, "step": 8564 }, { "epoch": 7.1078838174273855, "grad_norm": 21.41334342956543, "learning_rate": 1.715883817427386e-05, "loss": 1.0573, "step": 8565 }, { "epoch": 7.108713692946058, "grad_norm": 20.055221557617188, "learning_rate": 1.7158506224066393e-05, "loss": 1.3337, "step": 8566 }, { "epoch": 7.10954356846473, "grad_norm": 25.230422973632812, "learning_rate": 1.715817427385892e-05, "loss": 1.0786, "step": 8567 }, { "epoch": 7.110373443983402, "grad_norm": 24.30141830444336, "learning_rate": 1.7157842323651454e-05, "loss": 1.3711, "step": 8568 }, { "epoch": 7.111203319502074, "grad_norm": 14.960609436035156, "learning_rate": 1.7157510373443986e-05, "loss": 0.9219, "step": 8569 }, { "epoch": 7.1120331950207465, "grad_norm": 18.438016891479492, "learning_rate": 1.7157178423236514e-05, "loss": 1.2439, "step": 8570 }, { "epoch": 7.112863070539419, "grad_norm": 15.672109603881836, "learning_rate": 1.7156846473029047e-05, "loss": 0.8564, "step": 8571 }, { "epoch": 7.113692946058091, "grad_norm": 20.61782455444336, "learning_rate": 1.715651452282158e-05, "loss": 1.4841, "step": 8572 }, { "epoch": 7.114522821576763, "grad_norm": 24.10856819152832, "learning_rate": 1.7156182572614107e-05, "loss": 1.3689, "step": 8573 }, { "epoch": 7.115352697095435, "grad_norm": 21.911224365234375, "learning_rate": 1.715585062240664e-05, "loss": 1.332, "step": 8574 }, { "epoch": 7.1161825726141075, "grad_norm": 35.148681640625, "learning_rate": 1.715551867219917e-05, "loss": 1.3392, "step": 8575 }, { "epoch": 7.11701244813278, "grad_norm": 19.721023559570312, "learning_rate": 1.7155186721991704e-05, "loss": 1.4321, "step": 8576 }, { "epoch": 7.117842323651452, "grad_norm": 33.33458709716797, "learning_rate": 1.7154854771784232e-05, "loss": 0.9128, "step": 8577 }, { "epoch": 7.118672199170124, "grad_norm": 11.497068405151367, "learning_rate": 1.7154522821576765e-05, "loss": 0.648, "step": 8578 }, { "epoch": 7.119502074688796, "grad_norm": 27.227632522583008, "learning_rate": 1.7154190871369297e-05, "loss": 1.0035, "step": 8579 }, { "epoch": 7.1203319502074685, "grad_norm": 15.377707481384277, "learning_rate": 1.715385892116183e-05, "loss": 0.8809, "step": 8580 }, { "epoch": 7.121161825726141, "grad_norm": 28.467361450195312, "learning_rate": 1.7153526970954357e-05, "loss": 1.2539, "step": 8581 }, { "epoch": 7.121991701244813, "grad_norm": 20.34481430053711, "learning_rate": 1.715319502074689e-05, "loss": 1.6609, "step": 8582 }, { "epoch": 7.122821576763485, "grad_norm": 15.94701862335205, "learning_rate": 1.7152863070539422e-05, "loss": 0.7009, "step": 8583 }, { "epoch": 7.123651452282157, "grad_norm": 34.577842712402344, "learning_rate": 1.7152531120331954e-05, "loss": 1.4992, "step": 8584 }, { "epoch": 7.124481327800829, "grad_norm": 15.887591361999512, "learning_rate": 1.7152199170124483e-05, "loss": 1.2392, "step": 8585 }, { "epoch": 7.125311203319502, "grad_norm": 14.11723518371582, "learning_rate": 1.7151867219917015e-05, "loss": 0.7291, "step": 8586 }, { "epoch": 7.126141078838174, "grad_norm": 29.351842880249023, "learning_rate": 1.7151535269709543e-05, "loss": 1.3026, "step": 8587 }, { "epoch": 7.126970954356846, "grad_norm": 20.585491180419922, "learning_rate": 1.7151203319502075e-05, "loss": 0.9847, "step": 8588 }, { "epoch": 7.127800829875518, "grad_norm": 23.449434280395508, "learning_rate": 1.7150871369294608e-05, "loss": 0.9433, "step": 8589 }, { "epoch": 7.12863070539419, "grad_norm": 26.839902877807617, "learning_rate": 1.7150539419087136e-05, "loss": 1.1372, "step": 8590 }, { "epoch": 7.1294605809128635, "grad_norm": 29.00288200378418, "learning_rate": 1.715020746887967e-05, "loss": 1.7488, "step": 8591 }, { "epoch": 7.130290456431536, "grad_norm": 19.441425323486328, "learning_rate": 1.71498755186722e-05, "loss": 0.9394, "step": 8592 }, { "epoch": 7.131120331950208, "grad_norm": 19.956592559814453, "learning_rate": 1.7149543568464733e-05, "loss": 1.9195, "step": 8593 }, { "epoch": 7.13195020746888, "grad_norm": 18.44685935974121, "learning_rate": 1.714921161825726e-05, "loss": 1.7544, "step": 8594 }, { "epoch": 7.132780082987552, "grad_norm": 20.121883392333984, "learning_rate": 1.7148879668049793e-05, "loss": 0.6504, "step": 8595 }, { "epoch": 7.1336099585062245, "grad_norm": 14.897809028625488, "learning_rate": 1.7148547717842326e-05, "loss": 1.1043, "step": 8596 }, { "epoch": 7.134439834024897, "grad_norm": 23.344045639038086, "learning_rate": 1.7148215767634858e-05, "loss": 1.0194, "step": 8597 }, { "epoch": 7.135269709543569, "grad_norm": 15.848783493041992, "learning_rate": 1.7147883817427386e-05, "loss": 0.7447, "step": 8598 }, { "epoch": 7.136099585062241, "grad_norm": 36.10184860229492, "learning_rate": 1.714755186721992e-05, "loss": 1.484, "step": 8599 }, { "epoch": 7.136929460580913, "grad_norm": 15.706628799438477, "learning_rate": 1.714721991701245e-05, "loss": 0.8395, "step": 8600 }, { "epoch": 7.1377593360995855, "grad_norm": 36.157997131347656, "learning_rate": 1.7146887966804983e-05, "loss": 0.9705, "step": 8601 }, { "epoch": 7.138589211618258, "grad_norm": 42.78337478637695, "learning_rate": 1.714655601659751e-05, "loss": 1.3341, "step": 8602 }, { "epoch": 7.13941908713693, "grad_norm": 24.742807388305664, "learning_rate": 1.7146224066390044e-05, "loss": 1.4166, "step": 8603 }, { "epoch": 7.140248962655602, "grad_norm": 29.53207015991211, "learning_rate": 1.7145892116182576e-05, "loss": 1.7944, "step": 8604 }, { "epoch": 7.141078838174274, "grad_norm": 35.57652282714844, "learning_rate": 1.7145560165975104e-05, "loss": 1.4275, "step": 8605 }, { "epoch": 7.141908713692946, "grad_norm": 18.409151077270508, "learning_rate": 1.7145228215767636e-05, "loss": 0.9748, "step": 8606 }, { "epoch": 7.142738589211619, "grad_norm": 21.888124465942383, "learning_rate": 1.714489626556017e-05, "loss": 0.7127, "step": 8607 }, { "epoch": 7.143568464730291, "grad_norm": 16.026268005371094, "learning_rate": 1.7144564315352697e-05, "loss": 0.8501, "step": 8608 }, { "epoch": 7.144398340248963, "grad_norm": 24.48497772216797, "learning_rate": 1.714423236514523e-05, "loss": 1.3705, "step": 8609 }, { "epoch": 7.145228215767635, "grad_norm": 40.01828384399414, "learning_rate": 1.7143900414937758e-05, "loss": 1.0587, "step": 8610 }, { "epoch": 7.146058091286307, "grad_norm": 28.041366577148438, "learning_rate": 1.714356846473029e-05, "loss": 1.5549, "step": 8611 }, { "epoch": 7.14688796680498, "grad_norm": 34.808040618896484, "learning_rate": 1.7143236514522822e-05, "loss": 1.0362, "step": 8612 }, { "epoch": 7.147717842323652, "grad_norm": 17.295888900756836, "learning_rate": 1.7142904564315354e-05, "loss": 1.1134, "step": 8613 }, { "epoch": 7.148547717842324, "grad_norm": 24.204872131347656, "learning_rate": 1.7142572614107887e-05, "loss": 1.2048, "step": 8614 }, { "epoch": 7.149377593360996, "grad_norm": 36.91103744506836, "learning_rate": 1.7142240663900415e-05, "loss": 1.4462, "step": 8615 }, { "epoch": 7.150207468879668, "grad_norm": 21.61081886291504, "learning_rate": 1.7141908713692947e-05, "loss": 1.5169, "step": 8616 }, { "epoch": 7.151037344398341, "grad_norm": 24.414749145507812, "learning_rate": 1.714157676348548e-05, "loss": 1.1702, "step": 8617 }, { "epoch": 7.151867219917013, "grad_norm": 14.860944747924805, "learning_rate": 1.714124481327801e-05, "loss": 0.9693, "step": 8618 }, { "epoch": 7.152697095435685, "grad_norm": 38.17900848388672, "learning_rate": 1.714091286307054e-05, "loss": 1.1941, "step": 8619 }, { "epoch": 7.153526970954357, "grad_norm": 19.73838996887207, "learning_rate": 1.7140580912863072e-05, "loss": 1.1639, "step": 8620 }, { "epoch": 7.154356846473029, "grad_norm": 14.632635116577148, "learning_rate": 1.7140248962655605e-05, "loss": 0.9869, "step": 8621 }, { "epoch": 7.155186721991702, "grad_norm": 12.764305114746094, "learning_rate": 1.7139917012448137e-05, "loss": 1.1636, "step": 8622 }, { "epoch": 7.156016597510374, "grad_norm": 24.630216598510742, "learning_rate": 1.7139585062240665e-05, "loss": 1.5656, "step": 8623 }, { "epoch": 7.156846473029046, "grad_norm": 18.136211395263672, "learning_rate": 1.7139253112033197e-05, "loss": 1.2538, "step": 8624 }, { "epoch": 7.157676348547718, "grad_norm": 15.703389167785645, "learning_rate": 1.7138921161825726e-05, "loss": 0.7204, "step": 8625 }, { "epoch": 7.15850622406639, "grad_norm": 17.14686393737793, "learning_rate": 1.7138589211618258e-05, "loss": 1.4144, "step": 8626 }, { "epoch": 7.159336099585063, "grad_norm": 30.35832405090332, "learning_rate": 1.713825726141079e-05, "loss": 0.9448, "step": 8627 }, { "epoch": 7.160165975103735, "grad_norm": 15.980886459350586, "learning_rate": 1.713792531120332e-05, "loss": 0.7782, "step": 8628 }, { "epoch": 7.160995850622407, "grad_norm": 19.628347396850586, "learning_rate": 1.713759336099585e-05, "loss": 1.0615, "step": 8629 }, { "epoch": 7.161825726141079, "grad_norm": 18.347118377685547, "learning_rate": 1.7137261410788383e-05, "loss": 1.1983, "step": 8630 }, { "epoch": 7.162655601659751, "grad_norm": 15.40544319152832, "learning_rate": 1.7136929460580912e-05, "loss": 1.3058, "step": 8631 }, { "epoch": 7.1634854771784235, "grad_norm": 14.642464637756348, "learning_rate": 1.7136597510373444e-05, "loss": 0.5415, "step": 8632 }, { "epoch": 7.164315352697096, "grad_norm": 15.811664581298828, "learning_rate": 1.7136265560165976e-05, "loss": 1.2527, "step": 8633 }, { "epoch": 7.165145228215768, "grad_norm": 39.120628356933594, "learning_rate": 1.713593360995851e-05, "loss": 1.5624, "step": 8634 }, { "epoch": 7.16597510373444, "grad_norm": 16.42428207397461, "learning_rate": 1.7135601659751037e-05, "loss": 0.8037, "step": 8635 }, { "epoch": 7.166804979253112, "grad_norm": 13.415698051452637, "learning_rate": 1.713526970954357e-05, "loss": 0.8421, "step": 8636 }, { "epoch": 7.1676348547717845, "grad_norm": 43.344810485839844, "learning_rate": 1.71349377593361e-05, "loss": 0.9191, "step": 8637 }, { "epoch": 7.168464730290457, "grad_norm": 13.91257381439209, "learning_rate": 1.7134605809128633e-05, "loss": 0.6911, "step": 8638 }, { "epoch": 7.169294605809129, "grad_norm": 17.72274398803711, "learning_rate": 1.7134273858921162e-05, "loss": 0.9874, "step": 8639 }, { "epoch": 7.170124481327801, "grad_norm": 14.829520225524902, "learning_rate": 1.7133941908713694e-05, "loss": 1.204, "step": 8640 }, { "epoch": 7.170954356846473, "grad_norm": 14.345429420471191, "learning_rate": 1.7133609958506226e-05, "loss": 0.87, "step": 8641 }, { "epoch": 7.1717842323651455, "grad_norm": 25.222740173339844, "learning_rate": 1.713327800829876e-05, "loss": 0.6626, "step": 8642 }, { "epoch": 7.172614107883818, "grad_norm": 18.25530433654785, "learning_rate": 1.7132946058091287e-05, "loss": 1.6119, "step": 8643 }, { "epoch": 7.17344398340249, "grad_norm": 17.430347442626953, "learning_rate": 1.713261410788382e-05, "loss": 0.7781, "step": 8644 }, { "epoch": 7.174273858921162, "grad_norm": 21.820606231689453, "learning_rate": 1.713228215767635e-05, "loss": 0.9038, "step": 8645 }, { "epoch": 7.175103734439834, "grad_norm": 24.179115295410156, "learning_rate": 1.713195020746888e-05, "loss": 1.1691, "step": 8646 }, { "epoch": 7.1759336099585065, "grad_norm": 44.85533142089844, "learning_rate": 1.7131618257261412e-05, "loss": 1.6739, "step": 8647 }, { "epoch": 7.176763485477179, "grad_norm": 26.1885929107666, "learning_rate": 1.713128630705394e-05, "loss": 1.1804, "step": 8648 }, { "epoch": 7.177593360995851, "grad_norm": 46.45350646972656, "learning_rate": 1.7130954356846473e-05, "loss": 1.4382, "step": 8649 }, { "epoch": 7.178423236514523, "grad_norm": 23.196638107299805, "learning_rate": 1.7130622406639005e-05, "loss": 1.6492, "step": 8650 }, { "epoch": 7.179253112033195, "grad_norm": 32.32682800292969, "learning_rate": 1.7130290456431537e-05, "loss": 1.1767, "step": 8651 }, { "epoch": 7.1800829875518675, "grad_norm": 28.020410537719727, "learning_rate": 1.7129958506224066e-05, "loss": 1.5903, "step": 8652 }, { "epoch": 7.18091286307054, "grad_norm": 19.546649932861328, "learning_rate": 1.7129626556016598e-05, "loss": 1.3709, "step": 8653 }, { "epoch": 7.181742738589212, "grad_norm": 23.08062744140625, "learning_rate": 1.712929460580913e-05, "loss": 1.7602, "step": 8654 }, { "epoch": 7.182572614107884, "grad_norm": 45.622039794921875, "learning_rate": 1.7128962655601662e-05, "loss": 1.2607, "step": 8655 }, { "epoch": 7.183402489626556, "grad_norm": 24.77423858642578, "learning_rate": 1.712863070539419e-05, "loss": 1.7612, "step": 8656 }, { "epoch": 7.1842323651452284, "grad_norm": 30.23611831665039, "learning_rate": 1.7128298755186723e-05, "loss": 1.0329, "step": 8657 }, { "epoch": 7.185062240663901, "grad_norm": 37.246360778808594, "learning_rate": 1.7127966804979255e-05, "loss": 1.2175, "step": 8658 }, { "epoch": 7.185892116182573, "grad_norm": 19.569738388061523, "learning_rate": 1.7127634854771787e-05, "loss": 1.2359, "step": 8659 }, { "epoch": 7.186721991701245, "grad_norm": 13.87121868133545, "learning_rate": 1.7127302904564316e-05, "loss": 1.1464, "step": 8660 }, { "epoch": 7.187551867219917, "grad_norm": 15.211047172546387, "learning_rate": 1.7126970954356848e-05, "loss": 0.9103, "step": 8661 }, { "epoch": 7.188381742738589, "grad_norm": 14.518513679504395, "learning_rate": 1.712663900414938e-05, "loss": 0.9812, "step": 8662 }, { "epoch": 7.189211618257262, "grad_norm": 30.11065673828125, "learning_rate": 1.7126307053941912e-05, "loss": 1.223, "step": 8663 }, { "epoch": 7.190041493775934, "grad_norm": 17.640987396240234, "learning_rate": 1.712597510373444e-05, "loss": 1.0165, "step": 8664 }, { "epoch": 7.190871369294606, "grad_norm": 44.243858337402344, "learning_rate": 1.7125643153526973e-05, "loss": 0.8135, "step": 8665 }, { "epoch": 7.191701244813278, "grad_norm": 32.279571533203125, "learning_rate": 1.7125311203319502e-05, "loss": 1.4235, "step": 8666 }, { "epoch": 7.19253112033195, "grad_norm": 15.573716163635254, "learning_rate": 1.7124979253112034e-05, "loss": 0.9902, "step": 8667 }, { "epoch": 7.193360995850623, "grad_norm": 26.994159698486328, "learning_rate": 1.7124647302904566e-05, "loss": 1.024, "step": 8668 }, { "epoch": 7.194190871369295, "grad_norm": 15.821189880371094, "learning_rate": 1.7124315352697095e-05, "loss": 1.6184, "step": 8669 }, { "epoch": 7.195020746887967, "grad_norm": 11.36607837677002, "learning_rate": 1.7123983402489627e-05, "loss": 0.7782, "step": 8670 }, { "epoch": 7.195850622406639, "grad_norm": 22.25038719177246, "learning_rate": 1.712365145228216e-05, "loss": 0.924, "step": 8671 }, { "epoch": 7.196680497925311, "grad_norm": 25.9702205657959, "learning_rate": 1.712331950207469e-05, "loss": 1.0993, "step": 8672 }, { "epoch": 7.197510373443984, "grad_norm": 30.169044494628906, "learning_rate": 1.712298755186722e-05, "loss": 2.1307, "step": 8673 }, { "epoch": 7.198340248962656, "grad_norm": 29.895519256591797, "learning_rate": 1.7122655601659752e-05, "loss": 2.216, "step": 8674 }, { "epoch": 7.199170124481328, "grad_norm": 14.470712661743164, "learning_rate": 1.7122323651452284e-05, "loss": 0.9334, "step": 8675 }, { "epoch": 7.2, "grad_norm": 18.31903076171875, "learning_rate": 1.7121991701244816e-05, "loss": 1.2202, "step": 8676 }, { "epoch": 7.200829875518672, "grad_norm": 18.265769958496094, "learning_rate": 1.7121659751037345e-05, "loss": 0.9135, "step": 8677 }, { "epoch": 7.201659751037345, "grad_norm": 17.491790771484375, "learning_rate": 1.7121327800829877e-05, "loss": 0.736, "step": 8678 }, { "epoch": 7.202489626556017, "grad_norm": 49.23735809326172, "learning_rate": 1.712099585062241e-05, "loss": 1.0866, "step": 8679 }, { "epoch": 7.203319502074689, "grad_norm": 27.31113052368164, "learning_rate": 1.712066390041494e-05, "loss": 1.1362, "step": 8680 }, { "epoch": 7.204149377593361, "grad_norm": 24.87774085998535, "learning_rate": 1.712033195020747e-05, "loss": 1.0392, "step": 8681 }, { "epoch": 7.204979253112033, "grad_norm": 32.847862243652344, "learning_rate": 1.7120000000000002e-05, "loss": 1.3203, "step": 8682 }, { "epoch": 7.2058091286307056, "grad_norm": 24.187267303466797, "learning_rate": 1.7119668049792534e-05, "loss": 1.4726, "step": 8683 }, { "epoch": 7.206639004149378, "grad_norm": 20.113309860229492, "learning_rate": 1.7119336099585063e-05, "loss": 1.1453, "step": 8684 }, { "epoch": 7.20746887966805, "grad_norm": 26.361431121826172, "learning_rate": 1.7119004149377595e-05, "loss": 1.22, "step": 8685 }, { "epoch": 7.208298755186722, "grad_norm": 24.820878982543945, "learning_rate": 1.7118672199170127e-05, "loss": 1.2372, "step": 8686 }, { "epoch": 7.209128630705394, "grad_norm": 16.047809600830078, "learning_rate": 1.7118340248962656e-05, "loss": 0.8673, "step": 8687 }, { "epoch": 7.2099585062240665, "grad_norm": 30.62336540222168, "learning_rate": 1.7118008298755188e-05, "loss": 1.3032, "step": 8688 }, { "epoch": 7.210788381742739, "grad_norm": 19.38446807861328, "learning_rate": 1.7117676348547717e-05, "loss": 0.8616, "step": 8689 }, { "epoch": 7.211618257261411, "grad_norm": 14.448186874389648, "learning_rate": 1.711734439834025e-05, "loss": 1.2368, "step": 8690 }, { "epoch": 7.212448132780083, "grad_norm": 21.77968978881836, "learning_rate": 1.711701244813278e-05, "loss": 1.0158, "step": 8691 }, { "epoch": 7.213278008298755, "grad_norm": 19.30718994140625, "learning_rate": 1.7116680497925313e-05, "loss": 0.6571, "step": 8692 }, { "epoch": 7.2141078838174275, "grad_norm": 27.237442016601562, "learning_rate": 1.7116348547717845e-05, "loss": 0.9855, "step": 8693 }, { "epoch": 7.2149377593361, "grad_norm": 26.376728057861328, "learning_rate": 1.7116016597510374e-05, "loss": 1.7929, "step": 8694 }, { "epoch": 7.215767634854772, "grad_norm": 16.387786865234375, "learning_rate": 1.7115684647302906e-05, "loss": 1.0043, "step": 8695 }, { "epoch": 7.216597510373444, "grad_norm": 44.8593635559082, "learning_rate": 1.7115352697095438e-05, "loss": 1.2136, "step": 8696 }, { "epoch": 7.217427385892116, "grad_norm": 27.698745727539062, "learning_rate": 1.711502074688797e-05, "loss": 2.1625, "step": 8697 }, { "epoch": 7.2182572614107885, "grad_norm": 24.39171600341797, "learning_rate": 1.71146887966805e-05, "loss": 1.0029, "step": 8698 }, { "epoch": 7.219087136929461, "grad_norm": 18.871164321899414, "learning_rate": 1.711435684647303e-05, "loss": 0.9929, "step": 8699 }, { "epoch": 7.219917012448133, "grad_norm": 10.483588218688965, "learning_rate": 1.7114024896265563e-05, "loss": 0.6868, "step": 8700 }, { "epoch": 7.220746887966805, "grad_norm": 17.23688507080078, "learning_rate": 1.7113692946058095e-05, "loss": 0.959, "step": 8701 }, { "epoch": 7.221576763485477, "grad_norm": 19.914228439331055, "learning_rate": 1.7113360995850624e-05, "loss": 1.0808, "step": 8702 }, { "epoch": 7.2224066390041495, "grad_norm": 17.416383743286133, "learning_rate": 1.7113029045643156e-05, "loss": 1.1001, "step": 8703 }, { "epoch": 7.223236514522822, "grad_norm": 25.159921646118164, "learning_rate": 1.7112697095435685e-05, "loss": 1.603, "step": 8704 }, { "epoch": 7.224066390041494, "grad_norm": 18.61259651184082, "learning_rate": 1.7112365145228217e-05, "loss": 0.809, "step": 8705 }, { "epoch": 7.224896265560166, "grad_norm": 25.951852798461914, "learning_rate": 1.711203319502075e-05, "loss": 0.9329, "step": 8706 }, { "epoch": 7.225726141078838, "grad_norm": 24.314170837402344, "learning_rate": 1.7111701244813278e-05, "loss": 1.4435, "step": 8707 }, { "epoch": 7.2265560165975105, "grad_norm": 17.683727264404297, "learning_rate": 1.711136929460581e-05, "loss": 0.6232, "step": 8708 }, { "epoch": 7.227385892116183, "grad_norm": 23.28853988647461, "learning_rate": 1.7111037344398342e-05, "loss": 1.1969, "step": 8709 }, { "epoch": 7.228215767634855, "grad_norm": 19.874563217163086, "learning_rate": 1.711070539419087e-05, "loss": 1.0797, "step": 8710 }, { "epoch": 7.229045643153527, "grad_norm": 14.760436058044434, "learning_rate": 1.7110373443983403e-05, "loss": 0.5904, "step": 8711 }, { "epoch": 7.229875518672199, "grad_norm": 25.406322479248047, "learning_rate": 1.7110041493775935e-05, "loss": 1.103, "step": 8712 }, { "epoch": 7.230705394190871, "grad_norm": 16.301658630371094, "learning_rate": 1.7109709543568467e-05, "loss": 0.9845, "step": 8713 }, { "epoch": 7.231535269709544, "grad_norm": 18.595672607421875, "learning_rate": 1.7109377593360996e-05, "loss": 0.6576, "step": 8714 }, { "epoch": 7.232365145228216, "grad_norm": 18.961130142211914, "learning_rate": 1.7109045643153528e-05, "loss": 0.7287, "step": 8715 }, { "epoch": 7.233195020746888, "grad_norm": 34.960723876953125, "learning_rate": 1.710871369294606e-05, "loss": 1.723, "step": 8716 }, { "epoch": 7.23402489626556, "grad_norm": 18.019784927368164, "learning_rate": 1.7108381742738592e-05, "loss": 1.0116, "step": 8717 }, { "epoch": 7.234854771784232, "grad_norm": 19.1197452545166, "learning_rate": 1.710804979253112e-05, "loss": 0.8506, "step": 8718 }, { "epoch": 7.235684647302905, "grad_norm": 33.78351974487305, "learning_rate": 1.7107717842323653e-05, "loss": 1.5269, "step": 8719 }, { "epoch": 7.236514522821577, "grad_norm": 15.940787315368652, "learning_rate": 1.7107385892116185e-05, "loss": 0.6801, "step": 8720 }, { "epoch": 7.237344398340249, "grad_norm": 30.592884063720703, "learning_rate": 1.7107053941908717e-05, "loss": 1.6084, "step": 8721 }, { "epoch": 7.238174273858921, "grad_norm": 30.024261474609375, "learning_rate": 1.7106721991701246e-05, "loss": 1.031, "step": 8722 }, { "epoch": 7.239004149377593, "grad_norm": 15.692007064819336, "learning_rate": 1.7106390041493778e-05, "loss": 1.1146, "step": 8723 }, { "epoch": 7.239834024896266, "grad_norm": 24.86977767944336, "learning_rate": 1.710605809128631e-05, "loss": 1.1756, "step": 8724 }, { "epoch": 7.240663900414938, "grad_norm": 34.543418884277344, "learning_rate": 1.710572614107884e-05, "loss": 1.4051, "step": 8725 }, { "epoch": 7.24149377593361, "grad_norm": 15.157062530517578, "learning_rate": 1.710539419087137e-05, "loss": 0.816, "step": 8726 }, { "epoch": 7.242323651452282, "grad_norm": 13.915562629699707, "learning_rate": 1.71050622406639e-05, "loss": 1.0301, "step": 8727 }, { "epoch": 7.243153526970954, "grad_norm": 18.649253845214844, "learning_rate": 1.710473029045643e-05, "loss": 0.9422, "step": 8728 }, { "epoch": 7.243983402489627, "grad_norm": 20.69742774963379, "learning_rate": 1.7104398340248964e-05, "loss": 1.2107, "step": 8729 }, { "epoch": 7.244813278008299, "grad_norm": 19.581390380859375, "learning_rate": 1.7104066390041496e-05, "loss": 1.0556, "step": 8730 }, { "epoch": 7.245643153526971, "grad_norm": 15.377705574035645, "learning_rate": 1.7103734439834025e-05, "loss": 0.5334, "step": 8731 }, { "epoch": 7.246473029045643, "grad_norm": 12.946175575256348, "learning_rate": 1.7103402489626557e-05, "loss": 0.7449, "step": 8732 }, { "epoch": 7.247302904564315, "grad_norm": 24.133329391479492, "learning_rate": 1.710307053941909e-05, "loss": 1.0285, "step": 8733 }, { "epoch": 7.248132780082988, "grad_norm": 21.082048416137695, "learning_rate": 1.710273858921162e-05, "loss": 1.4765, "step": 8734 }, { "epoch": 7.24896265560166, "grad_norm": 20.555564880371094, "learning_rate": 1.710240663900415e-05, "loss": 0.7336, "step": 8735 }, { "epoch": 7.249792531120332, "grad_norm": 26.288558959960938, "learning_rate": 1.7102074688796682e-05, "loss": 1.5981, "step": 8736 }, { "epoch": 7.250622406639004, "grad_norm": 16.791004180908203, "learning_rate": 1.7101742738589214e-05, "loss": 0.9171, "step": 8737 }, { "epoch": 7.251452282157676, "grad_norm": 26.03157615661621, "learning_rate": 1.7101410788381746e-05, "loss": 1.0724, "step": 8738 }, { "epoch": 7.2522821576763485, "grad_norm": 24.50535774230957, "learning_rate": 1.7101078838174275e-05, "loss": 1.5194, "step": 8739 }, { "epoch": 7.253112033195021, "grad_norm": 22.63925552368164, "learning_rate": 1.7100746887966807e-05, "loss": 1.0601, "step": 8740 }, { "epoch": 7.253941908713693, "grad_norm": 23.168540954589844, "learning_rate": 1.710041493775934e-05, "loss": 0.952, "step": 8741 }, { "epoch": 7.254771784232365, "grad_norm": 19.360197067260742, "learning_rate": 1.7100082987551868e-05, "loss": 1.4471, "step": 8742 }, { "epoch": 7.255601659751037, "grad_norm": NaN, "learning_rate": 1.7100082987551868e-05, "loss": 1.8253, "step": 8743 }, { "epoch": 7.2564315352697095, "grad_norm": 49.34672164916992, "learning_rate": 1.70997510373444e-05, "loss": 1.5624, "step": 8744 }, { "epoch": 7.257261410788382, "grad_norm": 17.103363037109375, "learning_rate": 1.7099419087136932e-05, "loss": 1.0891, "step": 8745 }, { "epoch": 7.258091286307054, "grad_norm": 35.104984283447266, "learning_rate": 1.709908713692946e-05, "loss": 1.6804, "step": 8746 }, { "epoch": 7.258921161825726, "grad_norm": 16.431194305419922, "learning_rate": 1.7098755186721993e-05, "loss": 1.2891, "step": 8747 }, { "epoch": 7.259751037344398, "grad_norm": 16.11979866027832, "learning_rate": 1.7098423236514525e-05, "loss": 1.0216, "step": 8748 }, { "epoch": 7.2605809128630705, "grad_norm": 15.981657981872559, "learning_rate": 1.7098091286307053e-05, "loss": 0.8683, "step": 8749 }, { "epoch": 7.261410788381743, "grad_norm": 30.129241943359375, "learning_rate": 1.7097759336099586e-05, "loss": 1.0574, "step": 8750 }, { "epoch": 7.262240663900415, "grad_norm": 26.634464263916016, "learning_rate": 1.7097427385892118e-05, "loss": 1.4235, "step": 8751 }, { "epoch": 7.263070539419087, "grad_norm": 18.246070861816406, "learning_rate": 1.709709543568465e-05, "loss": 0.8635, "step": 8752 }, { "epoch": 7.263900414937759, "grad_norm": 25.15169906616211, "learning_rate": 1.709676348547718e-05, "loss": 1.4693, "step": 8753 }, { "epoch": 7.2647302904564315, "grad_norm": 18.353322982788086, "learning_rate": 1.709643153526971e-05, "loss": 1.2816, "step": 8754 }, { "epoch": 7.265560165975104, "grad_norm": 21.99224281311035, "learning_rate": 1.7096099585062243e-05, "loss": 1.1425, "step": 8755 }, { "epoch": 7.266390041493776, "grad_norm": 14.680546760559082, "learning_rate": 1.7095767634854775e-05, "loss": 0.9794, "step": 8756 }, { "epoch": 7.267219917012448, "grad_norm": 21.601734161376953, "learning_rate": 1.7095435684647304e-05, "loss": 1.0962, "step": 8757 }, { "epoch": 7.26804979253112, "grad_norm": 23.21156120300293, "learning_rate": 1.7095103734439836e-05, "loss": 1.1048, "step": 8758 }, { "epoch": 7.2688796680497925, "grad_norm": 19.7839412689209, "learning_rate": 1.7094771784232368e-05, "loss": 1.094, "step": 8759 }, { "epoch": 7.269709543568465, "grad_norm": 12.41362476348877, "learning_rate": 1.70944398340249e-05, "loss": 0.3546, "step": 8760 }, { "epoch": 7.270539419087137, "grad_norm": 12.874836921691895, "learning_rate": 1.709410788381743e-05, "loss": 0.615, "step": 8761 }, { "epoch": 7.271369294605809, "grad_norm": 31.90723419189453, "learning_rate": 1.709377593360996e-05, "loss": 1.311, "step": 8762 }, { "epoch": 7.272199170124481, "grad_norm": 29.080101013183594, "learning_rate": 1.7093443983402493e-05, "loss": 1.5638, "step": 8763 }, { "epoch": 7.2730290456431534, "grad_norm": 22.77300262451172, "learning_rate": 1.709311203319502e-05, "loss": 1.2395, "step": 8764 }, { "epoch": 7.273858921161826, "grad_norm": 16.915496826171875, "learning_rate": 1.7092780082987554e-05, "loss": 0.8199, "step": 8765 }, { "epoch": 7.274688796680498, "grad_norm": 25.481796264648438, "learning_rate": 1.7092448132780082e-05, "loss": 0.9321, "step": 8766 }, { "epoch": 7.27551867219917, "grad_norm": 24.877044677734375, "learning_rate": 1.7092116182572614e-05, "loss": 1.4338, "step": 8767 }, { "epoch": 7.276348547717842, "grad_norm": 16.14531898498535, "learning_rate": 1.7091784232365147e-05, "loss": 0.3624, "step": 8768 }, { "epoch": 7.277178423236514, "grad_norm": 19.578577041625977, "learning_rate": 1.7091452282157675e-05, "loss": 1.2255, "step": 8769 }, { "epoch": 7.278008298755187, "grad_norm": 20.711101531982422, "learning_rate": 1.7091120331950207e-05, "loss": 0.9253, "step": 8770 }, { "epoch": 7.278838174273859, "grad_norm": 13.386648178100586, "learning_rate": 1.709078838174274e-05, "loss": 0.8529, "step": 8771 }, { "epoch": 7.279668049792531, "grad_norm": 29.37633514404297, "learning_rate": 1.709045643153527e-05, "loss": 0.9739, "step": 8772 }, { "epoch": 7.280497925311203, "grad_norm": 12.511332511901855, "learning_rate": 1.7090124481327804e-05, "loss": 0.8046, "step": 8773 }, { "epoch": 7.281327800829875, "grad_norm": 26.978639602661133, "learning_rate": 1.7089792531120332e-05, "loss": 1.9135, "step": 8774 }, { "epoch": 7.282157676348548, "grad_norm": 33.41547393798828, "learning_rate": 1.7089460580912865e-05, "loss": 1.5314, "step": 8775 }, { "epoch": 7.28298755186722, "grad_norm": 19.33700180053711, "learning_rate": 1.7089128630705397e-05, "loss": 0.9268, "step": 8776 }, { "epoch": 7.283817427385892, "grad_norm": 19.33254623413086, "learning_rate": 1.708879668049793e-05, "loss": 1.2043, "step": 8777 }, { "epoch": 7.284647302904564, "grad_norm": 18.38562774658203, "learning_rate": 1.7088464730290457e-05, "loss": 1.2911, "step": 8778 }, { "epoch": 7.285477178423236, "grad_norm": 24.04576301574707, "learning_rate": 1.708813278008299e-05, "loss": 1.4916, "step": 8779 }, { "epoch": 7.286307053941909, "grad_norm": 19.635578155517578, "learning_rate": 1.708780082987552e-05, "loss": 1.4302, "step": 8780 }, { "epoch": 7.287136929460581, "grad_norm": 18.669126510620117, "learning_rate": 1.7087468879668054e-05, "loss": 1.1909, "step": 8781 }, { "epoch": 7.287966804979253, "grad_norm": 21.966970443725586, "learning_rate": 1.7087136929460583e-05, "loss": 1.4573, "step": 8782 }, { "epoch": 7.288796680497925, "grad_norm": 17.41087532043457, "learning_rate": 1.7086804979253115e-05, "loss": 0.754, "step": 8783 }, { "epoch": 7.289626556016597, "grad_norm": 33.11079406738281, "learning_rate": 1.7086473029045643e-05, "loss": 1.2674, "step": 8784 }, { "epoch": 7.29045643153527, "grad_norm": 17.17698097229004, "learning_rate": 1.7086141078838175e-05, "loss": 1.0218, "step": 8785 }, { "epoch": 7.291286307053942, "grad_norm": 20.349214553833008, "learning_rate": 1.7085809128630708e-05, "loss": 1.6364, "step": 8786 }, { "epoch": 7.292116182572614, "grad_norm": 22.182283401489258, "learning_rate": 1.7085477178423236e-05, "loss": 1.4863, "step": 8787 }, { "epoch": 7.292946058091286, "grad_norm": 18.301679611206055, "learning_rate": 1.708514522821577e-05, "loss": 1.3266, "step": 8788 }, { "epoch": 7.293775933609958, "grad_norm": 15.851244926452637, "learning_rate": 1.70848132780083e-05, "loss": 0.8161, "step": 8789 }, { "epoch": 7.2946058091286305, "grad_norm": 18.605396270751953, "learning_rate": 1.708448132780083e-05, "loss": 0.9936, "step": 8790 }, { "epoch": 7.295435684647303, "grad_norm": 20.019189834594727, "learning_rate": 1.708414937759336e-05, "loss": 1.1311, "step": 8791 }, { "epoch": 7.296265560165975, "grad_norm": 14.000717163085938, "learning_rate": 1.7083817427385893e-05, "loss": 1.224, "step": 8792 }, { "epoch": 7.297095435684647, "grad_norm": 46.2686882019043, "learning_rate": 1.7083485477178426e-05, "loss": 1.1025, "step": 8793 }, { "epoch": 7.297925311203319, "grad_norm": 23.83112335205078, "learning_rate": 1.7083153526970954e-05, "loss": 1.5832, "step": 8794 }, { "epoch": 7.2987551867219915, "grad_norm": 16.055435180664062, "learning_rate": 1.7082821576763486e-05, "loss": 0.9952, "step": 8795 }, { "epoch": 7.299585062240664, "grad_norm": 14.554351806640625, "learning_rate": 1.708248962655602e-05, "loss": 0.9158, "step": 8796 }, { "epoch": 7.300414937759336, "grad_norm": 46.94302749633789, "learning_rate": 1.708215767634855e-05, "loss": 1.7024, "step": 8797 }, { "epoch": 7.301244813278008, "grad_norm": 21.339157104492188, "learning_rate": 1.708182572614108e-05, "loss": 1.18, "step": 8798 }, { "epoch": 7.30207468879668, "grad_norm": 33.9492073059082, "learning_rate": 1.708149377593361e-05, "loss": 1.4324, "step": 8799 }, { "epoch": 7.3029045643153525, "grad_norm": 28.3996639251709, "learning_rate": 1.7081161825726143e-05, "loss": 1.3588, "step": 8800 }, { "epoch": 7.303734439834025, "grad_norm": 27.484636306762695, "learning_rate": 1.7080829875518676e-05, "loss": 1.5613, "step": 8801 }, { "epoch": 7.304564315352697, "grad_norm": 37.69239044189453, "learning_rate": 1.7080497925311204e-05, "loss": 1.1138, "step": 8802 }, { "epoch": 7.305394190871369, "grad_norm": 22.48170280456543, "learning_rate": 1.7080165975103736e-05, "loss": 1.0426, "step": 8803 }, { "epoch": 7.306224066390041, "grad_norm": 30.69989013671875, "learning_rate": 1.707983402489627e-05, "loss": 1.3862, "step": 8804 }, { "epoch": 7.3070539419087135, "grad_norm": 16.729801177978516, "learning_rate": 1.7079502074688797e-05, "loss": 1.2733, "step": 8805 }, { "epoch": 7.307883817427386, "grad_norm": 15.146526336669922, "learning_rate": 1.707917012448133e-05, "loss": 0.8964, "step": 8806 }, { "epoch": 7.308713692946058, "grad_norm": 19.884525299072266, "learning_rate": 1.7078838174273858e-05, "loss": 1.0465, "step": 8807 }, { "epoch": 7.30954356846473, "grad_norm": 23.590303421020508, "learning_rate": 1.707850622406639e-05, "loss": 0.7863, "step": 8808 }, { "epoch": 7.310373443983402, "grad_norm": 18.07733726501465, "learning_rate": 1.7078174273858922e-05, "loss": 1.1478, "step": 8809 }, { "epoch": 7.3112033195020745, "grad_norm": 20.888338088989258, "learning_rate": 1.7077842323651454e-05, "loss": 1.0341, "step": 8810 }, { "epoch": 7.312033195020747, "grad_norm": 20.226736068725586, "learning_rate": 1.7077510373443983e-05, "loss": 1.2494, "step": 8811 }, { "epoch": 7.312863070539419, "grad_norm": 16.978931427001953, "learning_rate": 1.7077178423236515e-05, "loss": 0.8679, "step": 8812 }, { "epoch": 7.313692946058091, "grad_norm": 11.379396438598633, "learning_rate": 1.7076846473029047e-05, "loss": 0.584, "step": 8813 }, { "epoch": 7.314522821576763, "grad_norm": 12.791850090026855, "learning_rate": 1.707651452282158e-05, "loss": 1.1489, "step": 8814 }, { "epoch": 7.3153526970954355, "grad_norm": 23.228076934814453, "learning_rate": 1.7076182572614108e-05, "loss": 2.2891, "step": 8815 }, { "epoch": 7.316182572614108, "grad_norm": 16.97379493713379, "learning_rate": 1.707585062240664e-05, "loss": 1.0813, "step": 8816 }, { "epoch": 7.31701244813278, "grad_norm": 28.795291900634766, "learning_rate": 1.7075518672199172e-05, "loss": 1.333, "step": 8817 }, { "epoch": 7.317842323651452, "grad_norm": 28.545513153076172, "learning_rate": 1.7075186721991704e-05, "loss": 1.0271, "step": 8818 }, { "epoch": 7.318672199170124, "grad_norm": 39.90550994873047, "learning_rate": 1.7074854771784233e-05, "loss": 1.8241, "step": 8819 }, { "epoch": 7.319502074688796, "grad_norm": 16.236295700073242, "learning_rate": 1.7074522821576765e-05, "loss": 0.7909, "step": 8820 }, { "epoch": 7.320331950207469, "grad_norm": 17.983551025390625, "learning_rate": 1.7074190871369297e-05, "loss": 1.1671, "step": 8821 }, { "epoch": 7.321161825726141, "grad_norm": 25.254919052124023, "learning_rate": 1.7073858921161826e-05, "loss": 1.5498, "step": 8822 }, { "epoch": 7.321991701244813, "grad_norm": 43.330467224121094, "learning_rate": 1.7073526970954358e-05, "loss": 1.8563, "step": 8823 }, { "epoch": 7.322821576763485, "grad_norm": 21.44152069091797, "learning_rate": 1.707319502074689e-05, "loss": 1.4693, "step": 8824 }, { "epoch": 7.323651452282157, "grad_norm": 16.007875442504883, "learning_rate": 1.707286307053942e-05, "loss": 0.8293, "step": 8825 }, { "epoch": 7.32448132780083, "grad_norm": 21.388517379760742, "learning_rate": 1.707253112033195e-05, "loss": 0.8967, "step": 8826 }, { "epoch": 7.325311203319502, "grad_norm": 13.263696670532227, "learning_rate": 1.7072199170124483e-05, "loss": 1.1171, "step": 8827 }, { "epoch": 7.326141078838174, "grad_norm": 16.149593353271484, "learning_rate": 1.7071867219917012e-05, "loss": 1.6759, "step": 8828 }, { "epoch": 7.326970954356846, "grad_norm": 24.15620231628418, "learning_rate": 1.7071535269709544e-05, "loss": 1.3678, "step": 8829 }, { "epoch": 7.327800829875518, "grad_norm": 34.837867736816406, "learning_rate": 1.7071203319502076e-05, "loss": 0.846, "step": 8830 }, { "epoch": 7.328630705394191, "grad_norm": 20.700462341308594, "learning_rate": 1.707087136929461e-05, "loss": 1.1628, "step": 8831 }, { "epoch": 7.329460580912863, "grad_norm": 15.56086540222168, "learning_rate": 1.7070539419087137e-05, "loss": 1.1269, "step": 8832 }, { "epoch": 7.330290456431535, "grad_norm": 15.165245056152344, "learning_rate": 1.707020746887967e-05, "loss": 0.8264, "step": 8833 }, { "epoch": 7.331120331950207, "grad_norm": 24.621519088745117, "learning_rate": 1.70698755186722e-05, "loss": 1.5554, "step": 8834 }, { "epoch": 7.331950207468879, "grad_norm": 57.18910598754883, "learning_rate": 1.7069543568464733e-05, "loss": 1.581, "step": 8835 }, { "epoch": 7.332780082987552, "grad_norm": 25.312543869018555, "learning_rate": 1.7069211618257262e-05, "loss": 1.1318, "step": 8836 }, { "epoch": 7.333609958506224, "grad_norm": 26.5992488861084, "learning_rate": 1.7068879668049794e-05, "loss": 1.4963, "step": 8837 }, { "epoch": 7.334439834024896, "grad_norm": 23.33147621154785, "learning_rate": 1.7068547717842326e-05, "loss": 1.3058, "step": 8838 }, { "epoch": 7.335269709543568, "grad_norm": 35.8476676940918, "learning_rate": 1.706821576763486e-05, "loss": 1.3962, "step": 8839 }, { "epoch": 7.33609958506224, "grad_norm": 34.676177978515625, "learning_rate": 1.7067883817427387e-05, "loss": 0.88, "step": 8840 }, { "epoch": 7.3369294605809126, "grad_norm": 16.961252212524414, "learning_rate": 1.706755186721992e-05, "loss": 0.5609, "step": 8841 }, { "epoch": 7.337759336099585, "grad_norm": 15.821563720703125, "learning_rate": 1.706721991701245e-05, "loss": 1.0449, "step": 8842 }, { "epoch": 7.338589211618257, "grad_norm": 29.101160049438477, "learning_rate": 1.706688796680498e-05, "loss": 0.8067, "step": 8843 }, { "epoch": 7.339419087136929, "grad_norm": 33.55781936645508, "learning_rate": 1.7066556016597512e-05, "loss": 2.4427, "step": 8844 }, { "epoch": 7.340248962655601, "grad_norm": 23.84137535095215, "learning_rate": 1.706622406639004e-05, "loss": 1.1366, "step": 8845 }, { "epoch": 7.3410788381742735, "grad_norm": 18.19670867919922, "learning_rate": 1.7065892116182573e-05, "loss": 1.3439, "step": 8846 }, { "epoch": 7.341908713692946, "grad_norm": 26.40576171875, "learning_rate": 1.7065560165975105e-05, "loss": 1.3027, "step": 8847 }, { "epoch": 7.342738589211618, "grad_norm": 38.79165267944336, "learning_rate": 1.7065228215767634e-05, "loss": 1.4218, "step": 8848 }, { "epoch": 7.34356846473029, "grad_norm": 18.935945510864258, "learning_rate": 1.7064896265560166e-05, "loss": 0.9461, "step": 8849 }, { "epoch": 7.344398340248962, "grad_norm": 41.345176696777344, "learning_rate": 1.7064564315352698e-05, "loss": 0.8482, "step": 8850 }, { "epoch": 7.3452282157676345, "grad_norm": 35.56558609008789, "learning_rate": 1.706423236514523e-05, "loss": 2.0613, "step": 8851 }, { "epoch": 7.346058091286307, "grad_norm": 14.003390312194824, "learning_rate": 1.7063900414937762e-05, "loss": 1.2085, "step": 8852 }, { "epoch": 7.346887966804979, "grad_norm": 15.158965110778809, "learning_rate": 1.706356846473029e-05, "loss": 1.7285, "step": 8853 }, { "epoch": 7.347717842323651, "grad_norm": 37.82936477661133, "learning_rate": 1.7063236514522823e-05, "loss": 0.7422, "step": 8854 }, { "epoch": 7.348547717842323, "grad_norm": 23.54196548461914, "learning_rate": 1.7062904564315355e-05, "loss": 1.6245, "step": 8855 }, { "epoch": 7.3493775933609955, "grad_norm": 22.60004425048828, "learning_rate": 1.7062572614107887e-05, "loss": 1.0689, "step": 8856 }, { "epoch": 7.350207468879668, "grad_norm": 15.307825088500977, "learning_rate": 1.7062240663900416e-05, "loss": 0.8134, "step": 8857 }, { "epoch": 7.35103734439834, "grad_norm": 25.41084098815918, "learning_rate": 1.7061908713692948e-05, "loss": 0.8933, "step": 8858 }, { "epoch": 7.351867219917012, "grad_norm": 25.127382278442383, "learning_rate": 1.706157676348548e-05, "loss": 1.8453, "step": 8859 }, { "epoch": 7.352697095435684, "grad_norm": 28.53845977783203, "learning_rate": 1.706124481327801e-05, "loss": 1.5083, "step": 8860 }, { "epoch": 7.3535269709543565, "grad_norm": 32.84507751464844, "learning_rate": 1.706091286307054e-05, "loss": 1.8952, "step": 8861 }, { "epoch": 7.354356846473029, "grad_norm": 34.606544494628906, "learning_rate": 1.7060580912863073e-05, "loss": 1.3392, "step": 8862 }, { "epoch": 7.355186721991701, "grad_norm": 14.519230842590332, "learning_rate": 1.7060248962655602e-05, "loss": 0.8396, "step": 8863 }, { "epoch": 7.356016597510373, "grad_norm": 21.93714714050293, "learning_rate": 1.7059917012448134e-05, "loss": 0.8736, "step": 8864 }, { "epoch": 7.356846473029045, "grad_norm": 13.542776107788086, "learning_rate": 1.7059585062240666e-05, "loss": 1.0508, "step": 8865 }, { "epoch": 7.3576763485477175, "grad_norm": 21.703964233398438, "learning_rate": 1.7059253112033195e-05, "loss": 1.0073, "step": 8866 }, { "epoch": 7.35850622406639, "grad_norm": 21.6248722076416, "learning_rate": 1.7058921161825727e-05, "loss": 1.5901, "step": 8867 }, { "epoch": 7.359336099585062, "grad_norm": 17.73834991455078, "learning_rate": 1.705858921161826e-05, "loss": 1.1364, "step": 8868 }, { "epoch": 7.360165975103734, "grad_norm": 16.97783088684082, "learning_rate": 1.7058257261410788e-05, "loss": 1.1384, "step": 8869 }, { "epoch": 7.360995850622406, "grad_norm": 19.93321418762207, "learning_rate": 1.705792531120332e-05, "loss": 1.1744, "step": 8870 }, { "epoch": 7.361825726141078, "grad_norm": 13.144916534423828, "learning_rate": 1.7057593360995852e-05, "loss": 0.8545, "step": 8871 }, { "epoch": 7.362655601659751, "grad_norm": 24.44997215270996, "learning_rate": 1.7057261410788384e-05, "loss": 1.2561, "step": 8872 }, { "epoch": 7.363485477178423, "grad_norm": 14.144471168518066, "learning_rate": 1.7056929460580913e-05, "loss": 0.9424, "step": 8873 }, { "epoch": 7.364315352697095, "grad_norm": 31.31165885925293, "learning_rate": 1.7056597510373445e-05, "loss": 1.955, "step": 8874 }, { "epoch": 7.365145228215767, "grad_norm": 30.794986724853516, "learning_rate": 1.7056265560165977e-05, "loss": 1.5317, "step": 8875 }, { "epoch": 7.365975103734439, "grad_norm": 31.150279998779297, "learning_rate": 1.705593360995851e-05, "loss": 1.3553, "step": 8876 }, { "epoch": 7.366804979253112, "grad_norm": 14.570249557495117, "learning_rate": 1.7055601659751038e-05, "loss": 0.815, "step": 8877 }, { "epoch": 7.367634854771785, "grad_norm": 13.940974235534668, "learning_rate": 1.705526970954357e-05, "loss": 0.7258, "step": 8878 }, { "epoch": 7.368464730290457, "grad_norm": 21.26625633239746, "learning_rate": 1.7054937759336102e-05, "loss": 1.1927, "step": 8879 }, { "epoch": 7.369294605809129, "grad_norm": 16.947704315185547, "learning_rate": 1.7054605809128634e-05, "loss": 1.1826, "step": 8880 }, { "epoch": 7.370124481327801, "grad_norm": 31.533855438232422, "learning_rate": 1.7054273858921163e-05, "loss": 0.8098, "step": 8881 }, { "epoch": 7.3709543568464735, "grad_norm": 40.24657440185547, "learning_rate": 1.7053941908713695e-05, "loss": 1.5079, "step": 8882 }, { "epoch": 7.371784232365146, "grad_norm": 17.234039306640625, "learning_rate": 1.7053609958506224e-05, "loss": 0.9576, "step": 8883 }, { "epoch": 7.372614107883818, "grad_norm": 18.146814346313477, "learning_rate": 1.7053278008298756e-05, "loss": 1.2453, "step": 8884 }, { "epoch": 7.37344398340249, "grad_norm": 22.249958038330078, "learning_rate": 1.7052946058091288e-05, "loss": 1.039, "step": 8885 }, { "epoch": 7.374273858921162, "grad_norm": 19.39145278930664, "learning_rate": 1.7052614107883817e-05, "loss": 1.2375, "step": 8886 }, { "epoch": 7.3751037344398345, "grad_norm": 16.480424880981445, "learning_rate": 1.705228215767635e-05, "loss": 1.0459, "step": 8887 }, { "epoch": 7.375933609958507, "grad_norm": 35.522037506103516, "learning_rate": 1.705195020746888e-05, "loss": 1.2546, "step": 8888 }, { "epoch": 7.376763485477179, "grad_norm": 36.490882873535156, "learning_rate": 1.7051618257261413e-05, "loss": 0.9022, "step": 8889 }, { "epoch": 7.377593360995851, "grad_norm": 25.143098831176758, "learning_rate": 1.7051286307053942e-05, "loss": 1.1209, "step": 8890 }, { "epoch": 7.378423236514523, "grad_norm": 21.381546020507812, "learning_rate": 1.7050954356846474e-05, "loss": 0.9482, "step": 8891 }, { "epoch": 7.3792531120331954, "grad_norm": 24.319726943969727, "learning_rate": 1.7050622406639006e-05, "loss": 1.1655, "step": 8892 }, { "epoch": 7.380082987551868, "grad_norm": 17.58997344970703, "learning_rate": 1.7050290456431538e-05, "loss": 0.9991, "step": 8893 }, { "epoch": 7.38091286307054, "grad_norm": 37.436279296875, "learning_rate": 1.7049958506224067e-05, "loss": 1.1067, "step": 8894 }, { "epoch": 7.381742738589212, "grad_norm": 22.456584930419922, "learning_rate": 1.70496265560166e-05, "loss": 1.316, "step": 8895 }, { "epoch": 7.382572614107884, "grad_norm": 21.748828887939453, "learning_rate": 1.704929460580913e-05, "loss": 0.799, "step": 8896 }, { "epoch": 7.383402489626556, "grad_norm": 33.34821701049805, "learning_rate": 1.7048962655601663e-05, "loss": 1.7157, "step": 8897 }, { "epoch": 7.384232365145229, "grad_norm": 16.8335018157959, "learning_rate": 1.7048630705394192e-05, "loss": 1.0322, "step": 8898 }, { "epoch": 7.385062240663901, "grad_norm": 19.119577407836914, "learning_rate": 1.7048298755186724e-05, "loss": 1.1802, "step": 8899 }, { "epoch": 7.385892116182573, "grad_norm": 59.47130584716797, "learning_rate": 1.7047966804979256e-05, "loss": 0.9564, "step": 8900 }, { "epoch": 7.386721991701245, "grad_norm": 33.03281784057617, "learning_rate": 1.7047634854771785e-05, "loss": 1.3847, "step": 8901 }, { "epoch": 7.387551867219917, "grad_norm": 30.021963119506836, "learning_rate": 1.7047302904564317e-05, "loss": 1.4937, "step": 8902 }, { "epoch": 7.38838174273859, "grad_norm": 16.309898376464844, "learning_rate": 1.704697095435685e-05, "loss": 0.9019, "step": 8903 }, { "epoch": 7.389211618257262, "grad_norm": 21.951557159423828, "learning_rate": 1.7046639004149378e-05, "loss": 1.266, "step": 8904 }, { "epoch": 7.390041493775934, "grad_norm": 17.704368591308594, "learning_rate": 1.704630705394191e-05, "loss": 0.7234, "step": 8905 }, { "epoch": 7.390871369294606, "grad_norm": 37.38725662231445, "learning_rate": 1.7045975103734442e-05, "loss": 1.1628, "step": 8906 }, { "epoch": 7.391701244813278, "grad_norm": 21.33056640625, "learning_rate": 1.704564315352697e-05, "loss": 1.1657, "step": 8907 }, { "epoch": 7.392531120331951, "grad_norm": 46.72028350830078, "learning_rate": 1.7045311203319503e-05, "loss": 0.7718, "step": 8908 }, { "epoch": 7.393360995850623, "grad_norm": 38.50086212158203, "learning_rate": 1.7044979253112035e-05, "loss": 0.9344, "step": 8909 }, { "epoch": 7.394190871369295, "grad_norm": 15.172079086303711, "learning_rate": 1.7044647302904567e-05, "loss": 0.8868, "step": 8910 }, { "epoch": 7.395020746887967, "grad_norm": 26.40378761291504, "learning_rate": 1.7044315352697096e-05, "loss": 1.3985, "step": 8911 }, { "epoch": 7.395850622406639, "grad_norm": 19.643503189086914, "learning_rate": 1.7043983402489628e-05, "loss": 1.2076, "step": 8912 }, { "epoch": 7.396680497925312, "grad_norm": 35.622344970703125, "learning_rate": 1.704365145228216e-05, "loss": 1.8077, "step": 8913 }, { "epoch": 7.397510373443984, "grad_norm": 26.40156364440918, "learning_rate": 1.7043319502074692e-05, "loss": 1.1326, "step": 8914 }, { "epoch": 7.398340248962656, "grad_norm": 17.30621910095215, "learning_rate": 1.704298755186722e-05, "loss": 0.8642, "step": 8915 }, { "epoch": 7.399170124481328, "grad_norm": 15.221858024597168, "learning_rate": 1.7042655601659753e-05, "loss": 0.8608, "step": 8916 }, { "epoch": 7.4, "grad_norm": 34.88045120239258, "learning_rate": 1.7042323651452285e-05, "loss": 1.5035, "step": 8917 }, { "epoch": 7.4008298755186726, "grad_norm": 19.10575294494629, "learning_rate": 1.7041991701244817e-05, "loss": 1.4262, "step": 8918 }, { "epoch": 7.401659751037345, "grad_norm": 22.301538467407227, "learning_rate": 1.7041659751037346e-05, "loss": 1.0558, "step": 8919 }, { "epoch": 7.402489626556017, "grad_norm": 19.81587791442871, "learning_rate": 1.7041327800829878e-05, "loss": 1.267, "step": 8920 }, { "epoch": 7.403319502074689, "grad_norm": 15.454008102416992, "learning_rate": 1.704099585062241e-05, "loss": 1.2685, "step": 8921 }, { "epoch": 7.404149377593361, "grad_norm": 15.075798988342285, "learning_rate": 1.704066390041494e-05, "loss": 0.8466, "step": 8922 }, { "epoch": 7.4049792531120335, "grad_norm": 23.899988174438477, "learning_rate": 1.704033195020747e-05, "loss": 1.3844, "step": 8923 }, { "epoch": 7.405809128630706, "grad_norm": 22.61607551574707, "learning_rate": 1.704e-05, "loss": 1.2686, "step": 8924 }, { "epoch": 7.406639004149378, "grad_norm": 22.252513885498047, "learning_rate": 1.703966804979253e-05, "loss": 0.7619, "step": 8925 }, { "epoch": 7.40746887966805, "grad_norm": 25.64833641052246, "learning_rate": 1.7039336099585064e-05, "loss": 0.9446, "step": 8926 }, { "epoch": 7.408298755186722, "grad_norm": 15.073040008544922, "learning_rate": 1.7039004149377592e-05, "loss": 0.7414, "step": 8927 }, { "epoch": 7.4091286307053945, "grad_norm": 25.583322525024414, "learning_rate": 1.7038672199170125e-05, "loss": 1.5272, "step": 8928 }, { "epoch": 7.409958506224067, "grad_norm": 10.982553482055664, "learning_rate": 1.7038340248962657e-05, "loss": 0.4686, "step": 8929 }, { "epoch": 7.410788381742739, "grad_norm": 17.04227066040039, "learning_rate": 1.703800829875519e-05, "loss": 0.5682, "step": 8930 }, { "epoch": 7.411618257261411, "grad_norm": 15.748912811279297, "learning_rate": 1.703767634854772e-05, "loss": 1.0432, "step": 8931 }, { "epoch": 7.412448132780083, "grad_norm": 24.23664665222168, "learning_rate": 1.703734439834025e-05, "loss": 1.489, "step": 8932 }, { "epoch": 7.4132780082987555, "grad_norm": 11.519492149353027, "learning_rate": 1.703701244813278e-05, "loss": 0.7859, "step": 8933 }, { "epoch": 7.414107883817428, "grad_norm": 19.481416702270508, "learning_rate": 1.7036680497925314e-05, "loss": 1.1398, "step": 8934 }, { "epoch": 7.4149377593361, "grad_norm": 34.45111846923828, "learning_rate": 1.7036348547717846e-05, "loss": 1.9583, "step": 8935 }, { "epoch": 7.415767634854772, "grad_norm": 20.279579162597656, "learning_rate": 1.7036016597510375e-05, "loss": 1.6431, "step": 8936 }, { "epoch": 7.416597510373444, "grad_norm": 35.304447174072266, "learning_rate": 1.7035684647302907e-05, "loss": 1.1558, "step": 8937 }, { "epoch": 7.4174273858921165, "grad_norm": 23.129758834838867, "learning_rate": 1.703535269709544e-05, "loss": 0.9199, "step": 8938 }, { "epoch": 7.418257261410789, "grad_norm": 16.232332229614258, "learning_rate": 1.7035020746887968e-05, "loss": 1.0136, "step": 8939 }, { "epoch": 7.419087136929461, "grad_norm": 28.939207077026367, "learning_rate": 1.70346887966805e-05, "loss": 1.0228, "step": 8940 }, { "epoch": 7.419917012448133, "grad_norm": 18.879934310913086, "learning_rate": 1.7034356846473032e-05, "loss": 1.2555, "step": 8941 }, { "epoch": 7.420746887966805, "grad_norm": 19.0170841217041, "learning_rate": 1.703402489626556e-05, "loss": 1.1963, "step": 8942 }, { "epoch": 7.4215767634854775, "grad_norm": 18.24869155883789, "learning_rate": 1.7033692946058093e-05, "loss": 1.2497, "step": 8943 }, { "epoch": 7.42240663900415, "grad_norm": 34.55162811279297, "learning_rate": 1.703336099585062e-05, "loss": 2.3071, "step": 8944 }, { "epoch": 7.423236514522822, "grad_norm": 22.388395309448242, "learning_rate": 1.7033029045643153e-05, "loss": 0.9814, "step": 8945 }, { "epoch": 7.424066390041494, "grad_norm": 27.01546859741211, "learning_rate": 1.7032697095435686e-05, "loss": 1.2413, "step": 8946 }, { "epoch": 7.424896265560166, "grad_norm": 36.6204833984375, "learning_rate": 1.7032365145228218e-05, "loss": 1.3063, "step": 8947 }, { "epoch": 7.425726141078838, "grad_norm": 17.440292358398438, "learning_rate": 1.7032033195020746e-05, "loss": 0.8364, "step": 8948 }, { "epoch": 7.426556016597511, "grad_norm": 24.01718521118164, "learning_rate": 1.703170124481328e-05, "loss": 1.3023, "step": 8949 }, { "epoch": 7.427385892116183, "grad_norm": 38.272090911865234, "learning_rate": 1.703136929460581e-05, "loss": 1.1922, "step": 8950 }, { "epoch": 7.428215767634855, "grad_norm": 13.198507308959961, "learning_rate": 1.7031037344398343e-05, "loss": 0.8743, "step": 8951 }, { "epoch": 7.429045643153527, "grad_norm": 21.52128791809082, "learning_rate": 1.703070539419087e-05, "loss": 1.4468, "step": 8952 }, { "epoch": 7.429875518672199, "grad_norm": 37.272830963134766, "learning_rate": 1.7030373443983403e-05, "loss": 1.9882, "step": 8953 }, { "epoch": 7.430705394190872, "grad_norm": 34.94969177246094, "learning_rate": 1.7030041493775936e-05, "loss": 1.2018, "step": 8954 }, { "epoch": 7.431535269709544, "grad_norm": 23.7241153717041, "learning_rate": 1.7029709543568468e-05, "loss": 1.3257, "step": 8955 }, { "epoch": 7.432365145228216, "grad_norm": 31.41844367980957, "learning_rate": 1.7029377593360996e-05, "loss": 0.7979, "step": 8956 }, { "epoch": 7.433195020746888, "grad_norm": 15.342289924621582, "learning_rate": 1.702904564315353e-05, "loss": 0.9392, "step": 8957 }, { "epoch": 7.43402489626556, "grad_norm": 16.029827117919922, "learning_rate": 1.702871369294606e-05, "loss": 1.043, "step": 8958 }, { "epoch": 7.434854771784233, "grad_norm": 21.161697387695312, "learning_rate": 1.7028381742738593e-05, "loss": 1.3197, "step": 8959 }, { "epoch": 7.435684647302905, "grad_norm": 32.40435791015625, "learning_rate": 1.702804979253112e-05, "loss": 1.8449, "step": 8960 }, { "epoch": 7.436514522821577, "grad_norm": 25.394208908081055, "learning_rate": 1.7027717842323654e-05, "loss": 1.4107, "step": 8961 }, { "epoch": 7.437344398340249, "grad_norm": 22.583267211914062, "learning_rate": 1.7027385892116182e-05, "loss": 1.3206, "step": 8962 }, { "epoch": 7.438174273858921, "grad_norm": 32.58237075805664, "learning_rate": 1.7027053941908714e-05, "loss": 0.9082, "step": 8963 }, { "epoch": 7.439004149377594, "grad_norm": 14.999957084655762, "learning_rate": 1.7026721991701247e-05, "loss": 0.8356, "step": 8964 }, { "epoch": 7.439834024896266, "grad_norm": 15.56227970123291, "learning_rate": 1.7026390041493775e-05, "loss": 0.6714, "step": 8965 }, { "epoch": 7.440663900414938, "grad_norm": 24.22975730895996, "learning_rate": 1.7026058091286307e-05, "loss": 1.0117, "step": 8966 }, { "epoch": 7.44149377593361, "grad_norm": 25.576683044433594, "learning_rate": 1.702572614107884e-05, "loss": 1.7039, "step": 8967 }, { "epoch": 7.442323651452282, "grad_norm": 21.322925567626953, "learning_rate": 1.702539419087137e-05, "loss": 1.487, "step": 8968 }, { "epoch": 7.443153526970955, "grad_norm": 13.22872543334961, "learning_rate": 1.70250622406639e-05, "loss": 0.4355, "step": 8969 }, { "epoch": 7.443983402489627, "grad_norm": 38.5567741394043, "learning_rate": 1.7024730290456432e-05, "loss": 0.9184, "step": 8970 }, { "epoch": 7.444813278008299, "grad_norm": 18.0341854095459, "learning_rate": 1.7024398340248964e-05, "loss": 1.0177, "step": 8971 }, { "epoch": 7.445643153526971, "grad_norm": 29.89453125, "learning_rate": 1.7024066390041497e-05, "loss": 1.4199, "step": 8972 }, { "epoch": 7.446473029045643, "grad_norm": 26.108112335205078, "learning_rate": 1.7023734439834025e-05, "loss": 0.7381, "step": 8973 }, { "epoch": 7.4473029045643155, "grad_norm": 19.3007869720459, "learning_rate": 1.7023402489626557e-05, "loss": 1.3235, "step": 8974 }, { "epoch": 7.448132780082988, "grad_norm": 39.93326950073242, "learning_rate": 1.702307053941909e-05, "loss": 1.3837, "step": 8975 }, { "epoch": 7.44896265560166, "grad_norm": 38.160282135009766, "learning_rate": 1.702273858921162e-05, "loss": 1.2432, "step": 8976 }, { "epoch": 7.449792531120332, "grad_norm": 14.659204483032227, "learning_rate": 1.702240663900415e-05, "loss": 1.2775, "step": 8977 }, { "epoch": 7.450622406639004, "grad_norm": 16.880096435546875, "learning_rate": 1.7022074688796682e-05, "loss": 0.822, "step": 8978 }, { "epoch": 7.4514522821576765, "grad_norm": 24.604747772216797, "learning_rate": 1.7021742738589215e-05, "loss": 1.2928, "step": 8979 }, { "epoch": 7.452282157676349, "grad_norm": 19.391347885131836, "learning_rate": 1.7021410788381743e-05, "loss": 1.2372, "step": 8980 }, { "epoch": 7.453112033195021, "grad_norm": 27.241666793823242, "learning_rate": 1.7021078838174275e-05, "loss": 0.9017, "step": 8981 }, { "epoch": 7.453941908713693, "grad_norm": 18.0445556640625, "learning_rate": 1.7020746887966808e-05, "loss": 1.094, "step": 8982 }, { "epoch": 7.454771784232365, "grad_norm": 35.81523895263672, "learning_rate": 1.7020414937759336e-05, "loss": 1.5756, "step": 8983 }, { "epoch": 7.4556016597510375, "grad_norm": 24.2291316986084, "learning_rate": 1.702008298755187e-05, "loss": 1.1635, "step": 8984 }, { "epoch": 7.45643153526971, "grad_norm": 22.505760192871094, "learning_rate": 1.70197510373444e-05, "loss": 0.8993, "step": 8985 }, { "epoch": 7.457261410788382, "grad_norm": 37.54668426513672, "learning_rate": 1.701941908713693e-05, "loss": 1.4553, "step": 8986 }, { "epoch": 7.458091286307054, "grad_norm": 26.409717559814453, "learning_rate": 1.701908713692946e-05, "loss": 1.693, "step": 8987 }, { "epoch": 7.458921161825726, "grad_norm": 13.409378051757812, "learning_rate": 1.7018755186721993e-05, "loss": 0.809, "step": 8988 }, { "epoch": 7.4597510373443985, "grad_norm": 18.319028854370117, "learning_rate": 1.7018423236514525e-05, "loss": 0.8963, "step": 8989 }, { "epoch": 7.460580912863071, "grad_norm": 20.977914810180664, "learning_rate": 1.7018091286307054e-05, "loss": 0.9972, "step": 8990 }, { "epoch": 7.461410788381743, "grad_norm": 23.24739646911621, "learning_rate": 1.7017759336099586e-05, "loss": 1.0458, "step": 8991 }, { "epoch": 7.462240663900415, "grad_norm": 17.201309204101562, "learning_rate": 1.701742738589212e-05, "loss": 1.2796, "step": 8992 }, { "epoch": 7.463070539419087, "grad_norm": 17.58072280883789, "learning_rate": 1.701709543568465e-05, "loss": 1.1846, "step": 8993 }, { "epoch": 7.4639004149377595, "grad_norm": 24.219179153442383, "learning_rate": 1.701676348547718e-05, "loss": 1.4026, "step": 8994 }, { "epoch": 7.464730290456432, "grad_norm": 33.280189514160156, "learning_rate": 1.701643153526971e-05, "loss": 1.2116, "step": 8995 }, { "epoch": 7.465560165975104, "grad_norm": 15.817856788635254, "learning_rate": 1.7016099585062243e-05, "loss": 0.859, "step": 8996 }, { "epoch": 7.466390041493776, "grad_norm": 16.71197509765625, "learning_rate": 1.7015767634854776e-05, "loss": 1.0945, "step": 8997 }, { "epoch": 7.467219917012448, "grad_norm": 12.698993682861328, "learning_rate": 1.7015435684647304e-05, "loss": 1.0114, "step": 8998 }, { "epoch": 7.4680497925311204, "grad_norm": 20.37611198425293, "learning_rate": 1.7015103734439836e-05, "loss": 1.3161, "step": 8999 }, { "epoch": 7.468879668049793, "grad_norm": 18.212175369262695, "learning_rate": 1.7014771784232365e-05, "loss": 1.1937, "step": 9000 }, { "epoch": 7.469709543568465, "grad_norm": 18.405288696289062, "learning_rate": 1.7014439834024897e-05, "loss": 1.1016, "step": 9001 }, { "epoch": 7.470539419087137, "grad_norm": 23.79177474975586, "learning_rate": 1.701410788381743e-05, "loss": 1.7007, "step": 9002 }, { "epoch": 7.471369294605809, "grad_norm": 23.547786712646484, "learning_rate": 1.7013775933609958e-05, "loss": 1.4292, "step": 9003 }, { "epoch": 7.472199170124481, "grad_norm": 16.768461227416992, "learning_rate": 1.701344398340249e-05, "loss": 0.6927, "step": 9004 }, { "epoch": 7.473029045643154, "grad_norm": 21.4794921875, "learning_rate": 1.7013112033195022e-05, "loss": 1.4618, "step": 9005 }, { "epoch": 7.473858921161826, "grad_norm": 19.363283157348633, "learning_rate": 1.701278008298755e-05, "loss": 0.8296, "step": 9006 }, { "epoch": 7.474688796680498, "grad_norm": 29.995805740356445, "learning_rate": 1.7012448132780083e-05, "loss": 1.6424, "step": 9007 }, { "epoch": 7.47551867219917, "grad_norm": 20.729114532470703, "learning_rate": 1.7012116182572615e-05, "loss": 1.6206, "step": 9008 }, { "epoch": 7.476348547717842, "grad_norm": 21.545848846435547, "learning_rate": 1.7011784232365147e-05, "loss": 1.3338, "step": 9009 }, { "epoch": 7.477178423236515, "grad_norm": 13.519347190856934, "learning_rate": 1.7011452282157676e-05, "loss": 0.8849, "step": 9010 }, { "epoch": 7.478008298755187, "grad_norm": 29.04572105407715, "learning_rate": 1.7011120331950208e-05, "loss": 1.4251, "step": 9011 }, { "epoch": 7.478838174273859, "grad_norm": 34.01475143432617, "learning_rate": 1.701078838174274e-05, "loss": 2.2394, "step": 9012 }, { "epoch": 7.479668049792531, "grad_norm": 16.31956672668457, "learning_rate": 1.7010456431535272e-05, "loss": 0.7994, "step": 9013 }, { "epoch": 7.480497925311203, "grad_norm": 34.98126983642578, "learning_rate": 1.7010124481327804e-05, "loss": 1.5987, "step": 9014 }, { "epoch": 7.481327800829876, "grad_norm": 28.802894592285156, "learning_rate": 1.7009792531120333e-05, "loss": 1.5892, "step": 9015 }, { "epoch": 7.482157676348548, "grad_norm": 15.65732192993164, "learning_rate": 1.7009460580912865e-05, "loss": 0.6708, "step": 9016 }, { "epoch": 7.48298755186722, "grad_norm": 18.80138397216797, "learning_rate": 1.7009128630705397e-05, "loss": 0.8444, "step": 9017 }, { "epoch": 7.483817427385892, "grad_norm": 26.76081085205078, "learning_rate": 1.7008796680497926e-05, "loss": 1.1406, "step": 9018 }, { "epoch": 7.484647302904564, "grad_norm": 16.14484405517578, "learning_rate": 1.7008464730290458e-05, "loss": 0.5229, "step": 9019 }, { "epoch": 7.485477178423237, "grad_norm": 21.686845779418945, "learning_rate": 1.700813278008299e-05, "loss": 1.0647, "step": 9020 }, { "epoch": 7.486307053941909, "grad_norm": 30.580530166625977, "learning_rate": 1.700780082987552e-05, "loss": 1.5866, "step": 9021 }, { "epoch": 7.487136929460581, "grad_norm": 48.31371307373047, "learning_rate": 1.700746887966805e-05, "loss": 0.9908, "step": 9022 }, { "epoch": 7.487966804979253, "grad_norm": 20.670597076416016, "learning_rate": 1.700713692946058e-05, "loss": 0.8738, "step": 9023 }, { "epoch": 7.488796680497925, "grad_norm": 14.355406761169434, "learning_rate": 1.7006804979253112e-05, "loss": 0.4289, "step": 9024 }, { "epoch": 7.4896265560165975, "grad_norm": 18.036928176879883, "learning_rate": 1.7006473029045644e-05, "loss": 1.1229, "step": 9025 }, { "epoch": 7.49045643153527, "grad_norm": 25.908710479736328, "learning_rate": 1.7006141078838176e-05, "loss": 0.8023, "step": 9026 }, { "epoch": 7.491286307053942, "grad_norm": 16.351472854614258, "learning_rate": 1.7005809128630705e-05, "loss": 0.7052, "step": 9027 }, { "epoch": 7.492116182572614, "grad_norm": 17.85040283203125, "learning_rate": 1.7005477178423237e-05, "loss": 1.1162, "step": 9028 }, { "epoch": 7.492946058091286, "grad_norm": 17.88043975830078, "learning_rate": 1.700514522821577e-05, "loss": 0.8837, "step": 9029 }, { "epoch": 7.4937759336099585, "grad_norm": 38.60813903808594, "learning_rate": 1.70048132780083e-05, "loss": 1.8647, "step": 9030 }, { "epoch": 7.494605809128631, "grad_norm": 30.85234832763672, "learning_rate": 1.700448132780083e-05, "loss": 0.8952, "step": 9031 }, { "epoch": 7.495435684647303, "grad_norm": 23.487943649291992, "learning_rate": 1.7004149377593362e-05, "loss": 1.5937, "step": 9032 }, { "epoch": 7.496265560165975, "grad_norm": 15.682843208312988, "learning_rate": 1.7003817427385894e-05, "loss": 0.8296, "step": 9033 }, { "epoch": 7.497095435684647, "grad_norm": 16.124605178833008, "learning_rate": 1.7003485477178426e-05, "loss": 0.91, "step": 9034 }, { "epoch": 7.4979253112033195, "grad_norm": 27.366622924804688, "learning_rate": 1.7003153526970955e-05, "loss": 1.4362, "step": 9035 }, { "epoch": 7.498755186721992, "grad_norm": 18.430559158325195, "learning_rate": 1.7002821576763487e-05, "loss": 1.4204, "step": 9036 }, { "epoch": 7.499585062240664, "grad_norm": 24.05529022216797, "learning_rate": 1.700248962655602e-05, "loss": 0.9931, "step": 9037 }, { "epoch": 7.500414937759336, "grad_norm": 30.192712783813477, "learning_rate": 1.7002157676348548e-05, "loss": 1.267, "step": 9038 }, { "epoch": 7.501244813278008, "grad_norm": 18.484561920166016, "learning_rate": 1.700182572614108e-05, "loss": 1.2746, "step": 9039 }, { "epoch": 7.5020746887966805, "grad_norm": 19.601898193359375, "learning_rate": 1.7001493775933612e-05, "loss": 1.0016, "step": 9040 }, { "epoch": 7.502904564315353, "grad_norm": 22.045072555541992, "learning_rate": 1.700116182572614e-05, "loss": 1.6512, "step": 9041 }, { "epoch": 7.503734439834025, "grad_norm": 17.357694625854492, "learning_rate": 1.7000829875518673e-05, "loss": 1.2494, "step": 9042 }, { "epoch": 7.504564315352697, "grad_norm": 24.738910675048828, "learning_rate": 1.7000497925311205e-05, "loss": 1.3742, "step": 9043 }, { "epoch": 7.505394190871369, "grad_norm": 18.573022842407227, "learning_rate": 1.7000165975103734e-05, "loss": 1.4075, "step": 9044 }, { "epoch": 7.5062240663900415, "grad_norm": 41.68953323364258, "learning_rate": 1.6999834024896266e-05, "loss": 1.5529, "step": 9045 }, { "epoch": 7.507053941908714, "grad_norm": 18.634132385253906, "learning_rate": 1.6999502074688798e-05, "loss": 0.9105, "step": 9046 }, { "epoch": 7.507883817427386, "grad_norm": 23.764482498168945, "learning_rate": 1.699917012448133e-05, "loss": 0.9161, "step": 9047 }, { "epoch": 7.508713692946058, "grad_norm": 24.46875762939453, "learning_rate": 1.699883817427386e-05, "loss": 1.9745, "step": 9048 }, { "epoch": 7.50954356846473, "grad_norm": 19.38358497619629, "learning_rate": 1.699850622406639e-05, "loss": 0.7331, "step": 9049 }, { "epoch": 7.5103734439834025, "grad_norm": 15.571805953979492, "learning_rate": 1.6998174273858923e-05, "loss": 0.6472, "step": 9050 }, { "epoch": 7.511203319502075, "grad_norm": 23.350236892700195, "learning_rate": 1.6997842323651455e-05, "loss": 1.129, "step": 9051 }, { "epoch": 7.512033195020747, "grad_norm": 24.099943161010742, "learning_rate": 1.6997510373443984e-05, "loss": 0.6268, "step": 9052 }, { "epoch": 7.512863070539419, "grad_norm": 17.32065200805664, "learning_rate": 1.6997178423236516e-05, "loss": 1.5021, "step": 9053 }, { "epoch": 7.513692946058091, "grad_norm": 32.8400993347168, "learning_rate": 1.6996846473029048e-05, "loss": 1.2549, "step": 9054 }, { "epoch": 7.514522821576763, "grad_norm": 15.831988334655762, "learning_rate": 1.699651452282158e-05, "loss": 0.9395, "step": 9055 }, { "epoch": 7.515352697095436, "grad_norm": 19.121301651000977, "learning_rate": 1.699618257261411e-05, "loss": 1.4571, "step": 9056 }, { "epoch": 7.516182572614108, "grad_norm": 18.846378326416016, "learning_rate": 1.699585062240664e-05, "loss": 1.1957, "step": 9057 }, { "epoch": 7.51701244813278, "grad_norm": 25.415403366088867, "learning_rate": 1.6995518672199173e-05, "loss": 1.1113, "step": 9058 }, { "epoch": 7.517842323651452, "grad_norm": 17.76137351989746, "learning_rate": 1.6995186721991702e-05, "loss": 0.6338, "step": 9059 }, { "epoch": 7.518672199170124, "grad_norm": 18.272449493408203, "learning_rate": 1.6994854771784234e-05, "loss": 0.9972, "step": 9060 }, { "epoch": 7.519502074688797, "grad_norm": 17.864761352539062, "learning_rate": 1.6994522821576763e-05, "loss": 1.5651, "step": 9061 }, { "epoch": 7.520331950207469, "grad_norm": 27.58633041381836, "learning_rate": 1.6994190871369295e-05, "loss": 1.4342, "step": 9062 }, { "epoch": 7.521161825726141, "grad_norm": 23.448450088500977, "learning_rate": 1.6993858921161827e-05, "loss": 1.2457, "step": 9063 }, { "epoch": 7.521991701244813, "grad_norm": 19.467077255249023, "learning_rate": 1.699352697095436e-05, "loss": 1.7603, "step": 9064 }, { "epoch": 7.522821576763485, "grad_norm": 14.91435432434082, "learning_rate": 1.6993195020746888e-05, "loss": 0.8814, "step": 9065 }, { "epoch": 7.523651452282158, "grad_norm": 29.374675750732422, "learning_rate": 1.699286307053942e-05, "loss": 1.328, "step": 9066 }, { "epoch": 7.52448132780083, "grad_norm": 18.858013153076172, "learning_rate": 1.6992531120331952e-05, "loss": 0.9795, "step": 9067 }, { "epoch": 7.525311203319502, "grad_norm": 14.919286727905273, "learning_rate": 1.6992199170124484e-05, "loss": 0.967, "step": 9068 }, { "epoch": 7.526141078838174, "grad_norm": 20.991416931152344, "learning_rate": 1.6991867219917013e-05, "loss": 1.2932, "step": 9069 }, { "epoch": 7.526970954356846, "grad_norm": 29.249399185180664, "learning_rate": 1.6991535269709545e-05, "loss": 1.6262, "step": 9070 }, { "epoch": 7.527800829875519, "grad_norm": 26.339502334594727, "learning_rate": 1.6991203319502077e-05, "loss": 0.9044, "step": 9071 }, { "epoch": 7.528630705394191, "grad_norm": 20.338294982910156, "learning_rate": 1.699087136929461e-05, "loss": 1.3465, "step": 9072 }, { "epoch": 7.529460580912863, "grad_norm": 53.69157409667969, "learning_rate": 1.6990539419087138e-05, "loss": 1.2797, "step": 9073 }, { "epoch": 7.530290456431535, "grad_norm": 21.96824073791504, "learning_rate": 1.699020746887967e-05, "loss": 1.1315, "step": 9074 }, { "epoch": 7.531120331950207, "grad_norm": 26.837366104125977, "learning_rate": 1.6989875518672202e-05, "loss": 1.0626, "step": 9075 }, { "epoch": 7.5319502074688796, "grad_norm": 49.701744079589844, "learning_rate": 1.6989543568464734e-05, "loss": 0.7962, "step": 9076 }, { "epoch": 7.532780082987552, "grad_norm": 29.449499130249023, "learning_rate": 1.6989211618257263e-05, "loss": 0.9624, "step": 9077 }, { "epoch": 7.533609958506224, "grad_norm": 24.430702209472656, "learning_rate": 1.6988879668049795e-05, "loss": 1.276, "step": 9078 }, { "epoch": 7.534439834024896, "grad_norm": 22.660606384277344, "learning_rate": 1.6988547717842324e-05, "loss": 1.2508, "step": 9079 }, { "epoch": 7.535269709543568, "grad_norm": 20.71818733215332, "learning_rate": 1.6988215767634856e-05, "loss": 0.8681, "step": 9080 }, { "epoch": 7.5360995850622405, "grad_norm": 39.271636962890625, "learning_rate": 1.6987883817427388e-05, "loss": 0.9243, "step": 9081 }, { "epoch": 7.536929460580913, "grad_norm": 38.9797477722168, "learning_rate": 1.6987551867219917e-05, "loss": 1.9822, "step": 9082 }, { "epoch": 7.537759336099585, "grad_norm": 36.907649993896484, "learning_rate": 1.698721991701245e-05, "loss": 1.2667, "step": 9083 }, { "epoch": 7.538589211618257, "grad_norm": 21.577348709106445, "learning_rate": 1.698688796680498e-05, "loss": 0.5063, "step": 9084 }, { "epoch": 7.539419087136929, "grad_norm": 23.942901611328125, "learning_rate": 1.698655601659751e-05, "loss": 1.0169, "step": 9085 }, { "epoch": 7.5402489626556015, "grad_norm": 41.51680374145508, "learning_rate": 1.698622406639004e-05, "loss": 2.3218, "step": 9086 }, { "epoch": 7.541078838174274, "grad_norm": 32.52909851074219, "learning_rate": 1.6985892116182574e-05, "loss": 2.0401, "step": 9087 }, { "epoch": 7.541908713692946, "grad_norm": 20.503173828125, "learning_rate": 1.6985560165975106e-05, "loss": 0.6118, "step": 9088 }, { "epoch": 7.542738589211618, "grad_norm": 17.019794464111328, "learning_rate": 1.6985228215767635e-05, "loss": 0.7714, "step": 9089 }, { "epoch": 7.54356846473029, "grad_norm": 16.211854934692383, "learning_rate": 1.6984896265560167e-05, "loss": 0.7826, "step": 9090 }, { "epoch": 7.5443983402489625, "grad_norm": 14.644721031188965, "learning_rate": 1.69845643153527e-05, "loss": 0.9713, "step": 9091 }, { "epoch": 7.545228215767635, "grad_norm": 35.639625549316406, "learning_rate": 1.698423236514523e-05, "loss": 1.6121, "step": 9092 }, { "epoch": 7.546058091286307, "grad_norm": 30.6533260345459, "learning_rate": 1.6983900414937763e-05, "loss": 1.7089, "step": 9093 }, { "epoch": 7.546887966804979, "grad_norm": 46.92535400390625, "learning_rate": 1.6983568464730292e-05, "loss": 2.6069, "step": 9094 }, { "epoch": 7.547717842323651, "grad_norm": 17.860383987426758, "learning_rate": 1.6983236514522824e-05, "loss": 1.3442, "step": 9095 }, { "epoch": 7.5485477178423235, "grad_norm": 25.687484741210938, "learning_rate": 1.6982904564315356e-05, "loss": 1.6552, "step": 9096 }, { "epoch": 7.549377593360996, "grad_norm": 27.05716323852539, "learning_rate": 1.6982572614107885e-05, "loss": 1.0838, "step": 9097 }, { "epoch": 7.550207468879668, "grad_norm": 28.934545516967773, "learning_rate": 1.6982240663900417e-05, "loss": 1.7613, "step": 9098 }, { "epoch": 7.55103734439834, "grad_norm": 30.469661712646484, "learning_rate": 1.698190871369295e-05, "loss": 0.9019, "step": 9099 }, { "epoch": 7.551867219917012, "grad_norm": 16.787979125976562, "learning_rate": 1.6981576763485478e-05, "loss": 1.1949, "step": 9100 }, { "epoch": 7.5526970954356845, "grad_norm": 16.576152801513672, "learning_rate": 1.698124481327801e-05, "loss": 1.1361, "step": 9101 }, { "epoch": 7.553526970954357, "grad_norm": 13.99895191192627, "learning_rate": 1.698091286307054e-05, "loss": 1.1309, "step": 9102 }, { "epoch": 7.554356846473029, "grad_norm": 20.32878875732422, "learning_rate": 1.698058091286307e-05, "loss": 0.7615, "step": 9103 }, { "epoch": 7.555186721991701, "grad_norm": 21.266496658325195, "learning_rate": 1.6980248962655603e-05, "loss": 1.0047, "step": 9104 }, { "epoch": 7.556016597510373, "grad_norm": 30.357040405273438, "learning_rate": 1.6979917012448135e-05, "loss": 1.8497, "step": 9105 }, { "epoch": 7.556846473029045, "grad_norm": 17.98895835876465, "learning_rate": 1.6979585062240663e-05, "loss": 1.317, "step": 9106 }, { "epoch": 7.557676348547718, "grad_norm": 27.87186050415039, "learning_rate": 1.6979253112033196e-05, "loss": 1.3967, "step": 9107 }, { "epoch": 7.55850622406639, "grad_norm": 21.518112182617188, "learning_rate": 1.6978921161825728e-05, "loss": 1.2859, "step": 9108 }, { "epoch": 7.559336099585062, "grad_norm": 17.682838439941406, "learning_rate": 1.697858921161826e-05, "loss": 1.0112, "step": 9109 }, { "epoch": 7.560165975103734, "grad_norm": 15.257080078125, "learning_rate": 1.697825726141079e-05, "loss": 1.2009, "step": 9110 }, { "epoch": 7.560995850622406, "grad_norm": 36.10189437866211, "learning_rate": 1.697792531120332e-05, "loss": 1.1592, "step": 9111 }, { "epoch": 7.561825726141079, "grad_norm": 23.67207145690918, "learning_rate": 1.6977593360995853e-05, "loss": 1.4778, "step": 9112 }, { "epoch": 7.562655601659751, "grad_norm": 15.152478218078613, "learning_rate": 1.6977261410788385e-05, "loss": 1.3306, "step": 9113 }, { "epoch": 7.563485477178423, "grad_norm": 29.587886810302734, "learning_rate": 1.6976929460580914e-05, "loss": 1.5075, "step": 9114 }, { "epoch": 7.564315352697095, "grad_norm": 20.00560760498047, "learning_rate": 1.6976597510373446e-05, "loss": 1.2691, "step": 9115 }, { "epoch": 7.565145228215767, "grad_norm": 23.00704574584961, "learning_rate": 1.6976265560165978e-05, "loss": 1.2604, "step": 9116 }, { "epoch": 7.56597510373444, "grad_norm": 18.77680206298828, "learning_rate": 1.6975933609958507e-05, "loss": 1.0813, "step": 9117 }, { "epoch": 7.566804979253112, "grad_norm": 16.745134353637695, "learning_rate": 1.697560165975104e-05, "loss": 0.7743, "step": 9118 }, { "epoch": 7.567634854771784, "grad_norm": 21.18244171142578, "learning_rate": 1.697526970954357e-05, "loss": 0.9623, "step": 9119 }, { "epoch": 7.568464730290456, "grad_norm": 23.231609344482422, "learning_rate": 1.69749377593361e-05, "loss": 0.9589, "step": 9120 }, { "epoch": 7.569294605809128, "grad_norm": 15.850347518920898, "learning_rate": 1.697460580912863e-05, "loss": 0.8225, "step": 9121 }, { "epoch": 7.570124481327801, "grad_norm": 75.6743392944336, "learning_rate": 1.6974273858921164e-05, "loss": 1.5123, "step": 9122 }, { "epoch": 7.570954356846473, "grad_norm": 22.609071731567383, "learning_rate": 1.6973941908713692e-05, "loss": 1.3055, "step": 9123 }, { "epoch": 7.571784232365145, "grad_norm": 40.522491455078125, "learning_rate": 1.6973609958506224e-05, "loss": 0.764, "step": 9124 }, { "epoch": 7.572614107883817, "grad_norm": 19.271181106567383, "learning_rate": 1.6973278008298757e-05, "loss": 0.7447, "step": 9125 }, { "epoch": 7.573443983402489, "grad_norm": 15.504175186157227, "learning_rate": 1.697294605809129e-05, "loss": 1.0954, "step": 9126 }, { "epoch": 7.574273858921162, "grad_norm": 25.180007934570312, "learning_rate": 1.6972614107883817e-05, "loss": 1.0391, "step": 9127 }, { "epoch": 7.575103734439834, "grad_norm": 23.98483657836914, "learning_rate": 1.697228215767635e-05, "loss": 1.379, "step": 9128 }, { "epoch": 7.575933609958506, "grad_norm": 29.008743286132812, "learning_rate": 1.697195020746888e-05, "loss": 1.7459, "step": 9129 }, { "epoch": 7.576763485477178, "grad_norm": 22.525419235229492, "learning_rate": 1.6971618257261414e-05, "loss": 1.997, "step": 9130 }, { "epoch": 7.57759336099585, "grad_norm": 23.164274215698242, "learning_rate": 1.6971286307053942e-05, "loss": 1.4643, "step": 9131 }, { "epoch": 7.5784232365145225, "grad_norm": 15.21870231628418, "learning_rate": 1.6970954356846475e-05, "loss": 0.8183, "step": 9132 }, { "epoch": 7.579253112033195, "grad_norm": 27.864137649536133, "learning_rate": 1.6970622406639007e-05, "loss": 0.9664, "step": 9133 }, { "epoch": 7.580082987551867, "grad_norm": 17.126338958740234, "learning_rate": 1.697029045643154e-05, "loss": 0.8052, "step": 9134 }, { "epoch": 7.580912863070539, "grad_norm": 17.132768630981445, "learning_rate": 1.6969958506224068e-05, "loss": 0.4302, "step": 9135 }, { "epoch": 7.581742738589211, "grad_norm": 18.0953311920166, "learning_rate": 1.69696265560166e-05, "loss": 1.3045, "step": 9136 }, { "epoch": 7.5825726141078835, "grad_norm": 28.191476821899414, "learning_rate": 1.6969294605809132e-05, "loss": 0.943, "step": 9137 }, { "epoch": 7.583402489626556, "grad_norm": 15.050994873046875, "learning_rate": 1.696896265560166e-05, "loss": 0.7493, "step": 9138 }, { "epoch": 7.584232365145228, "grad_norm": 15.072936058044434, "learning_rate": 1.6968630705394193e-05, "loss": 1.1477, "step": 9139 }, { "epoch": 7.5850622406639, "grad_norm": 32.64728927612305, "learning_rate": 1.696829875518672e-05, "loss": 1.6233, "step": 9140 }, { "epoch": 7.585892116182572, "grad_norm": 28.5942325592041, "learning_rate": 1.6967966804979253e-05, "loss": 1.0055, "step": 9141 }, { "epoch": 7.5867219917012445, "grad_norm": 23.119243621826172, "learning_rate": 1.6967634854771785e-05, "loss": 0.7682, "step": 9142 }, { "epoch": 7.587551867219917, "grad_norm": 32.53740310668945, "learning_rate": 1.6967302904564318e-05, "loss": 1.7221, "step": 9143 }, { "epoch": 7.588381742738589, "grad_norm": 23.126033782958984, "learning_rate": 1.6966970954356846e-05, "loss": 1.1446, "step": 9144 }, { "epoch": 7.589211618257261, "grad_norm": 21.315250396728516, "learning_rate": 1.696663900414938e-05, "loss": 1.5743, "step": 9145 }, { "epoch": 7.590041493775933, "grad_norm": 48.76718521118164, "learning_rate": 1.696630705394191e-05, "loss": 1.4446, "step": 9146 }, { "epoch": 7.5908713692946055, "grad_norm": 15.961954116821289, "learning_rate": 1.6965975103734443e-05, "loss": 1.038, "step": 9147 }, { "epoch": 7.591701244813278, "grad_norm": 31.965578079223633, "learning_rate": 1.696564315352697e-05, "loss": 1.2456, "step": 9148 }, { "epoch": 7.59253112033195, "grad_norm": 19.418914794921875, "learning_rate": 1.6965311203319503e-05, "loss": 0.8262, "step": 9149 }, { "epoch": 7.593360995850622, "grad_norm": 23.034204483032227, "learning_rate": 1.6964979253112036e-05, "loss": 1.5399, "step": 9150 }, { "epoch": 7.594190871369294, "grad_norm": 34.321781158447266, "learning_rate": 1.6964647302904568e-05, "loss": 1.5041, "step": 9151 }, { "epoch": 7.5950207468879665, "grad_norm": 22.29210090637207, "learning_rate": 1.6964315352697096e-05, "loss": 1.4166, "step": 9152 }, { "epoch": 7.595850622406639, "grad_norm": 22.735322952270508, "learning_rate": 1.696398340248963e-05, "loss": 1.0962, "step": 9153 }, { "epoch": 7.596680497925311, "grad_norm": 27.85148048400879, "learning_rate": 1.696365145228216e-05, "loss": 1.3668, "step": 9154 }, { "epoch": 7.597510373443983, "grad_norm": 27.124645233154297, "learning_rate": 1.696331950207469e-05, "loss": 1.1831, "step": 9155 }, { "epoch": 7.598340248962655, "grad_norm": 16.876380920410156, "learning_rate": 1.696298755186722e-05, "loss": 0.8086, "step": 9156 }, { "epoch": 7.5991701244813274, "grad_norm": 17.1538143157959, "learning_rate": 1.6962655601659754e-05, "loss": 1.1991, "step": 9157 }, { "epoch": 7.6, "grad_norm": 27.68843650817871, "learning_rate": 1.6962323651452282e-05, "loss": 1.6237, "step": 9158 }, { "epoch": 7.600829875518672, "grad_norm": 17.13207244873047, "learning_rate": 1.6961991701244814e-05, "loss": 1.3389, "step": 9159 }, { "epoch": 7.601659751037344, "grad_norm": 21.67805290222168, "learning_rate": 1.6961659751037346e-05, "loss": 0.8058, "step": 9160 }, { "epoch": 7.602489626556016, "grad_norm": 12.923341751098633, "learning_rate": 1.6961327800829875e-05, "loss": 1.0607, "step": 9161 }, { "epoch": 7.603319502074688, "grad_norm": 33.229331970214844, "learning_rate": 1.6960995850622407e-05, "loss": 1.053, "step": 9162 }, { "epoch": 7.604149377593361, "grad_norm": 17.766878128051758, "learning_rate": 1.696066390041494e-05, "loss": 1.2337, "step": 9163 }, { "epoch": 7.604979253112033, "grad_norm": 13.049836158752441, "learning_rate": 1.6960331950207468e-05, "loss": 0.9517, "step": 9164 }, { "epoch": 7.605809128630705, "grad_norm": 14.632448196411133, "learning_rate": 1.696e-05, "loss": 1.182, "step": 9165 }, { "epoch": 7.606639004149377, "grad_norm": 13.773676872253418, "learning_rate": 1.6959668049792532e-05, "loss": 0.708, "step": 9166 }, { "epoch": 7.607468879668049, "grad_norm": 32.232765197753906, "learning_rate": 1.6959336099585064e-05, "loss": 1.4802, "step": 9167 }, { "epoch": 7.608298755186722, "grad_norm": 24.232135772705078, "learning_rate": 1.6959004149377593e-05, "loss": 0.6034, "step": 9168 }, { "epoch": 7.609128630705394, "grad_norm": 12.815929412841797, "learning_rate": 1.6958672199170125e-05, "loss": 0.6457, "step": 9169 }, { "epoch": 7.609958506224066, "grad_norm": 9.500903129577637, "learning_rate": 1.6958340248962657e-05, "loss": 0.3312, "step": 9170 }, { "epoch": 7.610788381742738, "grad_norm": 17.45414924621582, "learning_rate": 1.695800829875519e-05, "loss": 1.3936, "step": 9171 }, { "epoch": 7.61161825726141, "grad_norm": 15.843563079833984, "learning_rate": 1.695767634854772e-05, "loss": 0.7942, "step": 9172 }, { "epoch": 7.612448132780083, "grad_norm": 15.679814338684082, "learning_rate": 1.695734439834025e-05, "loss": 1.1053, "step": 9173 }, { "epoch": 7.613278008298755, "grad_norm": 24.63580894470215, "learning_rate": 1.6957012448132782e-05, "loss": 0.9351, "step": 9174 }, { "epoch": 7.614107883817427, "grad_norm": 12.191984176635742, "learning_rate": 1.6956680497925315e-05, "loss": 0.4907, "step": 9175 }, { "epoch": 7.614937759336099, "grad_norm": 18.265396118164062, "learning_rate": 1.6956348547717843e-05, "loss": 1.4791, "step": 9176 }, { "epoch": 7.615767634854771, "grad_norm": 27.07974624633789, "learning_rate": 1.6956016597510375e-05, "loss": 1.0158, "step": 9177 }, { "epoch": 7.616597510373444, "grad_norm": 19.19553565979004, "learning_rate": 1.6955684647302904e-05, "loss": 1.2624, "step": 9178 }, { "epoch": 7.617427385892116, "grad_norm": 40.797908782958984, "learning_rate": 1.6955352697095436e-05, "loss": 0.897, "step": 9179 }, { "epoch": 7.618257261410788, "grad_norm": 19.890850067138672, "learning_rate": 1.695502074688797e-05, "loss": 1.4553, "step": 9180 }, { "epoch": 7.61908713692946, "grad_norm": 26.46341323852539, "learning_rate": 1.6954688796680497e-05, "loss": 0.8842, "step": 9181 }, { "epoch": 7.619917012448132, "grad_norm": 17.116918563842773, "learning_rate": 1.695435684647303e-05, "loss": 0.6671, "step": 9182 }, { "epoch": 7.6207468879668046, "grad_norm": 20.899715423583984, "learning_rate": 1.695402489626556e-05, "loss": 0.6312, "step": 9183 }, { "epoch": 7.621576763485477, "grad_norm": 42.98289108276367, "learning_rate": 1.6953692946058093e-05, "loss": 1.3935, "step": 9184 }, { "epoch": 7.622406639004149, "grad_norm": 23.32511329650879, "learning_rate": 1.6953360995850622e-05, "loss": 0.8443, "step": 9185 }, { "epoch": 7.623236514522821, "grad_norm": 29.419198989868164, "learning_rate": 1.6953029045643154e-05, "loss": 1.2779, "step": 9186 }, { "epoch": 7.624066390041493, "grad_norm": 19.837970733642578, "learning_rate": 1.6952697095435686e-05, "loss": 1.0827, "step": 9187 }, { "epoch": 7.6248962655601655, "grad_norm": 19.119487762451172, "learning_rate": 1.695236514522822e-05, "loss": 0.9082, "step": 9188 }, { "epoch": 7.625726141078838, "grad_norm": 15.445719718933105, "learning_rate": 1.6952033195020747e-05, "loss": 1.2394, "step": 9189 }, { "epoch": 7.62655601659751, "grad_norm": 33.5005989074707, "learning_rate": 1.695170124481328e-05, "loss": 1.1767, "step": 9190 }, { "epoch": 7.627385892116182, "grad_norm": 18.710613250732422, "learning_rate": 1.695136929460581e-05, "loss": 1.0796, "step": 9191 }, { "epoch": 7.628215767634854, "grad_norm": 35.8424186706543, "learning_rate": 1.6951037344398343e-05, "loss": 1.1146, "step": 9192 }, { "epoch": 7.6290456431535265, "grad_norm": 30.60305404663086, "learning_rate": 1.6950705394190872e-05, "loss": 1.3727, "step": 9193 }, { "epoch": 7.629875518672199, "grad_norm": 18.15964126586914, "learning_rate": 1.6950373443983404e-05, "loss": 0.9392, "step": 9194 }, { "epoch": 7.630705394190871, "grad_norm": 20.776996612548828, "learning_rate": 1.6950041493775936e-05, "loss": 1.1883, "step": 9195 }, { "epoch": 7.631535269709543, "grad_norm": 27.226886749267578, "learning_rate": 1.6949709543568465e-05, "loss": 1.3913, "step": 9196 }, { "epoch": 7.632365145228215, "grad_norm": 19.319490432739258, "learning_rate": 1.6949377593360997e-05, "loss": 1.1354, "step": 9197 }, { "epoch": 7.6331950207468875, "grad_norm": 41.77113723754883, "learning_rate": 1.694904564315353e-05, "loss": 1.6687, "step": 9198 }, { "epoch": 7.63402489626556, "grad_norm": 22.91107940673828, "learning_rate": 1.6948713692946058e-05, "loss": 1.2541, "step": 9199 }, { "epoch": 7.634854771784232, "grad_norm": 20.126089096069336, "learning_rate": 1.694838174273859e-05, "loss": 1.089, "step": 9200 }, { "epoch": 7.635684647302904, "grad_norm": 13.528326988220215, "learning_rate": 1.6948049792531122e-05, "loss": 0.8404, "step": 9201 }, { "epoch": 7.636514522821576, "grad_norm": 56.49421691894531, "learning_rate": 1.694771784232365e-05, "loss": 1.6073, "step": 9202 }, { "epoch": 7.6373443983402485, "grad_norm": 14.97666072845459, "learning_rate": 1.6947385892116183e-05, "loss": 0.9832, "step": 9203 }, { "epoch": 7.638174273858921, "grad_norm": 16.067691802978516, "learning_rate": 1.6947053941908715e-05, "loss": 0.5707, "step": 9204 }, { "epoch": 7.639004149377593, "grad_norm": 20.32194709777832, "learning_rate": 1.6946721991701247e-05, "loss": 1.1977, "step": 9205 }, { "epoch": 7.639834024896265, "grad_norm": 30.629117965698242, "learning_rate": 1.6946390041493776e-05, "loss": 1.3182, "step": 9206 }, { "epoch": 7.640663900414938, "grad_norm": 22.19706916809082, "learning_rate": 1.6946058091286308e-05, "loss": 1.3742, "step": 9207 }, { "epoch": 7.64149377593361, "grad_norm": 15.069137573242188, "learning_rate": 1.694572614107884e-05, "loss": 0.8405, "step": 9208 }, { "epoch": 7.6423236514522825, "grad_norm": 35.693328857421875, "learning_rate": 1.6945394190871372e-05, "loss": 1.0964, "step": 9209 }, { "epoch": 7.643153526970955, "grad_norm": 13.375024795532227, "learning_rate": 1.69450622406639e-05, "loss": 0.7526, "step": 9210 }, { "epoch": 7.643983402489627, "grad_norm": 19.887744903564453, "learning_rate": 1.6944730290456433e-05, "loss": 0.8381, "step": 9211 }, { "epoch": 7.644813278008299, "grad_norm": 26.37421989440918, "learning_rate": 1.6944398340248965e-05, "loss": 1.8482, "step": 9212 }, { "epoch": 7.645643153526971, "grad_norm": 28.745283126831055, "learning_rate": 1.6944066390041497e-05, "loss": 0.8451, "step": 9213 }, { "epoch": 7.6464730290456435, "grad_norm": 17.156967163085938, "learning_rate": 1.6943734439834026e-05, "loss": 0.9099, "step": 9214 }, { "epoch": 7.647302904564316, "grad_norm": 18.750934600830078, "learning_rate": 1.6943402489626558e-05, "loss": 1.5079, "step": 9215 }, { "epoch": 7.648132780082988, "grad_norm": 24.69254493713379, "learning_rate": 1.694307053941909e-05, "loss": 1.3851, "step": 9216 }, { "epoch": 7.64896265560166, "grad_norm": 16.082387924194336, "learning_rate": 1.694273858921162e-05, "loss": 0.6825, "step": 9217 }, { "epoch": 7.649792531120332, "grad_norm": 17.12508773803711, "learning_rate": 1.694240663900415e-05, "loss": 0.9813, "step": 9218 }, { "epoch": 7.6506224066390045, "grad_norm": 29.093364715576172, "learning_rate": 1.694207468879668e-05, "loss": 0.7044, "step": 9219 }, { "epoch": 7.651452282157677, "grad_norm": 23.080453872680664, "learning_rate": 1.6941742738589212e-05, "loss": 1.8835, "step": 9220 }, { "epoch": 7.652282157676349, "grad_norm": 31.407188415527344, "learning_rate": 1.6941410788381744e-05, "loss": 2.3211, "step": 9221 }, { "epoch": 7.653112033195021, "grad_norm": 22.54814910888672, "learning_rate": 1.6941078838174276e-05, "loss": 1.1659, "step": 9222 }, { "epoch": 7.653941908713693, "grad_norm": 21.211196899414062, "learning_rate": 1.6940746887966805e-05, "loss": 1.1568, "step": 9223 }, { "epoch": 7.6547717842323655, "grad_norm": 20.803203582763672, "learning_rate": 1.6940414937759337e-05, "loss": 1.1798, "step": 9224 }, { "epoch": 7.655601659751038, "grad_norm": 21.221256256103516, "learning_rate": 1.694008298755187e-05, "loss": 0.6646, "step": 9225 }, { "epoch": 7.65643153526971, "grad_norm": 13.803991317749023, "learning_rate": 1.69397510373444e-05, "loss": 0.4782, "step": 9226 }, { "epoch": 7.657261410788382, "grad_norm": 35.84105682373047, "learning_rate": 1.693941908713693e-05, "loss": 1.1487, "step": 9227 }, { "epoch": 7.658091286307054, "grad_norm": 20.75411033630371, "learning_rate": 1.6939087136929462e-05, "loss": 0.7861, "step": 9228 }, { "epoch": 7.6589211618257265, "grad_norm": 15.893176078796387, "learning_rate": 1.6938755186721994e-05, "loss": 0.9157, "step": 9229 }, { "epoch": 7.659751037344399, "grad_norm": 34.091522216796875, "learning_rate": 1.6938423236514526e-05, "loss": 0.9333, "step": 9230 }, { "epoch": 7.660580912863071, "grad_norm": 20.605018615722656, "learning_rate": 1.6938091286307055e-05, "loss": 0.9366, "step": 9231 }, { "epoch": 7.661410788381743, "grad_norm": 27.920452117919922, "learning_rate": 1.6937759336099587e-05, "loss": 1.219, "step": 9232 }, { "epoch": 7.662240663900415, "grad_norm": 30.45167350769043, "learning_rate": 1.693742738589212e-05, "loss": 1.6005, "step": 9233 }, { "epoch": 7.6630705394190874, "grad_norm": 16.884374618530273, "learning_rate": 1.6937095435684648e-05, "loss": 1.2625, "step": 9234 }, { "epoch": 7.66390041493776, "grad_norm": 22.912338256835938, "learning_rate": 1.693676348547718e-05, "loss": 1.6635, "step": 9235 }, { "epoch": 7.664730290456432, "grad_norm": 19.14704132080078, "learning_rate": 1.6936431535269712e-05, "loss": 1.2078, "step": 9236 }, { "epoch": 7.665560165975104, "grad_norm": 14.474370002746582, "learning_rate": 1.693609958506224e-05, "loss": 0.6756, "step": 9237 }, { "epoch": 7.666390041493776, "grad_norm": 30.282445907592773, "learning_rate": 1.6935767634854773e-05, "loss": 2.0823, "step": 9238 }, { "epoch": 7.667219917012448, "grad_norm": 26.722076416015625, "learning_rate": 1.69354356846473e-05, "loss": 1.3179, "step": 9239 }, { "epoch": 7.668049792531121, "grad_norm": 19.059961318969727, "learning_rate": 1.6935103734439834e-05, "loss": 0.7909, "step": 9240 }, { "epoch": 7.668879668049793, "grad_norm": 23.115198135375977, "learning_rate": 1.6934771784232366e-05, "loss": 1.2488, "step": 9241 }, { "epoch": 7.669709543568465, "grad_norm": 26.61650276184082, "learning_rate": 1.6934439834024898e-05, "loss": 1.2654, "step": 9242 }, { "epoch": 7.670539419087137, "grad_norm": 35.786041259765625, "learning_rate": 1.6934107883817427e-05, "loss": 1.3582, "step": 9243 }, { "epoch": 7.671369294605809, "grad_norm": 15.277268409729004, "learning_rate": 1.693377593360996e-05, "loss": 1.0306, "step": 9244 }, { "epoch": 7.672199170124482, "grad_norm": 30.702919006347656, "learning_rate": 1.693344398340249e-05, "loss": 1.5012, "step": 9245 }, { "epoch": 7.673029045643154, "grad_norm": 27.470272064208984, "learning_rate": 1.6933112033195023e-05, "loss": 0.8173, "step": 9246 }, { "epoch": 7.673858921161826, "grad_norm": 14.2124605178833, "learning_rate": 1.6932780082987552e-05, "loss": 1.1389, "step": 9247 }, { "epoch": 7.674688796680498, "grad_norm": 25.572568893432617, "learning_rate": 1.6932448132780084e-05, "loss": 1.4522, "step": 9248 }, { "epoch": 7.67551867219917, "grad_norm": 24.00957489013672, "learning_rate": 1.6932116182572616e-05, "loss": 0.8622, "step": 9249 }, { "epoch": 7.676348547717843, "grad_norm": 22.52567481994629, "learning_rate": 1.6931784232365148e-05, "loss": 1.2675, "step": 9250 }, { "epoch": 7.677178423236515, "grad_norm": 35.57365417480469, "learning_rate": 1.693145228215768e-05, "loss": 2.1324, "step": 9251 }, { "epoch": 7.678008298755187, "grad_norm": 35.532440185546875, "learning_rate": 1.693112033195021e-05, "loss": 0.8594, "step": 9252 }, { "epoch": 7.678838174273859, "grad_norm": 36.120269775390625, "learning_rate": 1.693078838174274e-05, "loss": 1.5809, "step": 9253 }, { "epoch": 7.679668049792531, "grad_norm": 31.00340461730957, "learning_rate": 1.6930456431535273e-05, "loss": 1.4894, "step": 9254 }, { "epoch": 7.680497925311204, "grad_norm": 14.19156551361084, "learning_rate": 1.6930124481327802e-05, "loss": 0.8992, "step": 9255 }, { "epoch": 7.681327800829876, "grad_norm": 24.566320419311523, "learning_rate": 1.6929792531120334e-05, "loss": 1.6933, "step": 9256 }, { "epoch": 7.682157676348548, "grad_norm": 24.425140380859375, "learning_rate": 1.6929460580912863e-05, "loss": 1.0534, "step": 9257 }, { "epoch": 7.68298755186722, "grad_norm": 28.36821746826172, "learning_rate": 1.6929128630705395e-05, "loss": 1.2188, "step": 9258 }, { "epoch": 7.683817427385892, "grad_norm": 26.669843673706055, "learning_rate": 1.6928796680497927e-05, "loss": 0.769, "step": 9259 }, { "epoch": 7.6846473029045645, "grad_norm": 25.361282348632812, "learning_rate": 1.6928464730290456e-05, "loss": 1.0145, "step": 9260 }, { "epoch": 7.685477178423237, "grad_norm": 25.27377700805664, "learning_rate": 1.6928132780082988e-05, "loss": 1.0243, "step": 9261 }, { "epoch": 7.686307053941909, "grad_norm": 38.24203872680664, "learning_rate": 1.692780082987552e-05, "loss": 0.7715, "step": 9262 }, { "epoch": 7.687136929460581, "grad_norm": 26.265954971313477, "learning_rate": 1.6927468879668052e-05, "loss": 1.8465, "step": 9263 }, { "epoch": 7.687966804979253, "grad_norm": 45.56053161621094, "learning_rate": 1.692713692946058e-05, "loss": 1.9035, "step": 9264 }, { "epoch": 7.6887966804979255, "grad_norm": 20.420841217041016, "learning_rate": 1.6926804979253113e-05, "loss": 1.1644, "step": 9265 }, { "epoch": 7.689626556016598, "grad_norm": 17.65645980834961, "learning_rate": 1.6926473029045645e-05, "loss": 0.9398, "step": 9266 }, { "epoch": 7.69045643153527, "grad_norm": 21.564319610595703, "learning_rate": 1.6926141078838177e-05, "loss": 1.3255, "step": 9267 }, { "epoch": 7.691286307053942, "grad_norm": 33.86155319213867, "learning_rate": 1.6925809128630706e-05, "loss": 2.4761, "step": 9268 }, { "epoch": 7.692116182572614, "grad_norm": 15.822919845581055, "learning_rate": 1.6925477178423238e-05, "loss": 1.1885, "step": 9269 }, { "epoch": 7.6929460580912865, "grad_norm": 29.369752883911133, "learning_rate": 1.692514522821577e-05, "loss": 1.23, "step": 9270 }, { "epoch": 7.693775933609959, "grad_norm": 14.916722297668457, "learning_rate": 1.6924813278008302e-05, "loss": 0.9145, "step": 9271 }, { "epoch": 7.694605809128631, "grad_norm": 35.23291778564453, "learning_rate": 1.692448132780083e-05, "loss": 1.3548, "step": 9272 }, { "epoch": 7.695435684647303, "grad_norm": 13.360051155090332, "learning_rate": 1.6924149377593363e-05, "loss": 0.944, "step": 9273 }, { "epoch": 7.696265560165975, "grad_norm": 27.765472412109375, "learning_rate": 1.6923817427385895e-05, "loss": 1.4765, "step": 9274 }, { "epoch": 7.6970954356846475, "grad_norm": 29.7554874420166, "learning_rate": 1.6923485477178424e-05, "loss": 1.643, "step": 9275 }, { "epoch": 7.69792531120332, "grad_norm": 20.622121810913086, "learning_rate": 1.6923153526970956e-05, "loss": 1.3599, "step": 9276 }, { "epoch": 7.698755186721992, "grad_norm": 17.51382827758789, "learning_rate": 1.6922821576763488e-05, "loss": 0.98, "step": 9277 }, { "epoch": 7.699585062240664, "grad_norm": 26.94982147216797, "learning_rate": 1.6922489626556017e-05, "loss": 1.8554, "step": 9278 }, { "epoch": 7.700414937759336, "grad_norm": 20.943241119384766, "learning_rate": 1.692215767634855e-05, "loss": 1.6065, "step": 9279 }, { "epoch": 7.7012448132780085, "grad_norm": 22.203826904296875, "learning_rate": 1.692182572614108e-05, "loss": 1.1524, "step": 9280 }, { "epoch": 7.702074688796681, "grad_norm": 23.46571922302246, "learning_rate": 1.692149377593361e-05, "loss": 1.7075, "step": 9281 }, { "epoch": 7.702904564315353, "grad_norm": 42.651065826416016, "learning_rate": 1.692116182572614e-05, "loss": 1.7782, "step": 9282 }, { "epoch": 7.703734439834025, "grad_norm": 29.06137466430664, "learning_rate": 1.6920829875518674e-05, "loss": 0.9255, "step": 9283 }, { "epoch": 7.704564315352697, "grad_norm": 22.52846908569336, "learning_rate": 1.6920497925311206e-05, "loss": 1.7224, "step": 9284 }, { "epoch": 7.7053941908713695, "grad_norm": 18.421463012695312, "learning_rate": 1.6920165975103735e-05, "loss": 1.2112, "step": 9285 }, { "epoch": 7.706224066390042, "grad_norm": 23.021909713745117, "learning_rate": 1.6919834024896267e-05, "loss": 1.7465, "step": 9286 }, { "epoch": 7.707053941908714, "grad_norm": 16.7547607421875, "learning_rate": 1.69195020746888e-05, "loss": 1.4961, "step": 9287 }, { "epoch": 7.707883817427386, "grad_norm": 19.743221282958984, "learning_rate": 1.691917012448133e-05, "loss": 0.9343, "step": 9288 }, { "epoch": 7.708713692946058, "grad_norm": 17.794431686401367, "learning_rate": 1.691883817427386e-05, "loss": 1.4801, "step": 9289 }, { "epoch": 7.70954356846473, "grad_norm": 16.600727081298828, "learning_rate": 1.6918506224066392e-05, "loss": 0.8815, "step": 9290 }, { "epoch": 7.710373443983403, "grad_norm": 21.046480178833008, "learning_rate": 1.6918174273858924e-05, "loss": 1.8433, "step": 9291 }, { "epoch": 7.711203319502075, "grad_norm": 22.84100914001465, "learning_rate": 1.6917842323651456e-05, "loss": 1.4609, "step": 9292 }, { "epoch": 7.712033195020747, "grad_norm": 18.083110809326172, "learning_rate": 1.6917510373443985e-05, "loss": 1.1916, "step": 9293 }, { "epoch": 7.712863070539419, "grad_norm": 32.18430709838867, "learning_rate": 1.6917178423236517e-05, "loss": 1.4357, "step": 9294 }, { "epoch": 7.713692946058091, "grad_norm": 21.06980323791504, "learning_rate": 1.6916846473029045e-05, "loss": 1.242, "step": 9295 }, { "epoch": 7.714522821576764, "grad_norm": 13.058646202087402, "learning_rate": 1.6916514522821578e-05, "loss": 0.82, "step": 9296 }, { "epoch": 7.715352697095436, "grad_norm": 27.79767608642578, "learning_rate": 1.691618257261411e-05, "loss": 1.7847, "step": 9297 }, { "epoch": 7.716182572614108, "grad_norm": 23.467615127563477, "learning_rate": 1.691585062240664e-05, "loss": 1.5382, "step": 9298 }, { "epoch": 7.71701244813278, "grad_norm": 33.84358596801758, "learning_rate": 1.691551867219917e-05, "loss": 1.5383, "step": 9299 }, { "epoch": 7.717842323651452, "grad_norm": 25.052879333496094, "learning_rate": 1.6915186721991703e-05, "loss": 1.0363, "step": 9300 }, { "epoch": 7.718672199170125, "grad_norm": 23.944934844970703, "learning_rate": 1.6914854771784235e-05, "loss": 1.2116, "step": 9301 }, { "epoch": 7.719502074688797, "grad_norm": 14.539194107055664, "learning_rate": 1.6914522821576763e-05, "loss": 0.729, "step": 9302 }, { "epoch": 7.720331950207469, "grad_norm": 22.245742797851562, "learning_rate": 1.6914190871369296e-05, "loss": 0.8139, "step": 9303 }, { "epoch": 7.721161825726141, "grad_norm": 15.69459056854248, "learning_rate": 1.6913858921161828e-05, "loss": 0.9865, "step": 9304 }, { "epoch": 7.721991701244813, "grad_norm": 29.370454788208008, "learning_rate": 1.691352697095436e-05, "loss": 0.8511, "step": 9305 }, { "epoch": 7.722821576763486, "grad_norm": 27.322978973388672, "learning_rate": 1.691319502074689e-05, "loss": 1.0108, "step": 9306 }, { "epoch": 7.723651452282158, "grad_norm": 15.272554397583008, "learning_rate": 1.691286307053942e-05, "loss": 0.6745, "step": 9307 }, { "epoch": 7.72448132780083, "grad_norm": 26.898601531982422, "learning_rate": 1.6912531120331953e-05, "loss": 1.9998, "step": 9308 }, { "epoch": 7.725311203319502, "grad_norm": 16.963491439819336, "learning_rate": 1.6912199170124485e-05, "loss": 1.0031, "step": 9309 }, { "epoch": 7.726141078838174, "grad_norm": 15.37571907043457, "learning_rate": 1.6911867219917014e-05, "loss": 0.8593, "step": 9310 }, { "epoch": 7.7269709543568466, "grad_norm": 23.53526496887207, "learning_rate": 1.6911535269709546e-05, "loss": 1.1036, "step": 9311 }, { "epoch": 7.727800829875519, "grad_norm": 28.98028564453125, "learning_rate": 1.6911203319502078e-05, "loss": 1.4666, "step": 9312 }, { "epoch": 7.728630705394191, "grad_norm": 20.661653518676758, "learning_rate": 1.6910871369294606e-05, "loss": 0.7078, "step": 9313 }, { "epoch": 7.729460580912863, "grad_norm": 16.1444034576416, "learning_rate": 1.691053941908714e-05, "loss": 0.8885, "step": 9314 }, { "epoch": 7.730290456431535, "grad_norm": 18.955354690551758, "learning_rate": 1.691020746887967e-05, "loss": 1.3935, "step": 9315 }, { "epoch": 7.7311203319502075, "grad_norm": 15.843827247619629, "learning_rate": 1.69098755186722e-05, "loss": 1.4146, "step": 9316 }, { "epoch": 7.73195020746888, "grad_norm": 16.571718215942383, "learning_rate": 1.690954356846473e-05, "loss": 0.7849, "step": 9317 }, { "epoch": 7.732780082987552, "grad_norm": 25.772109985351562, "learning_rate": 1.690921161825726e-05, "loss": 1.4676, "step": 9318 }, { "epoch": 7.733609958506224, "grad_norm": 27.88530921936035, "learning_rate": 1.6908879668049792e-05, "loss": 1.3884, "step": 9319 }, { "epoch": 7.734439834024896, "grad_norm": 24.165369033813477, "learning_rate": 1.6908547717842324e-05, "loss": 1.6349, "step": 9320 }, { "epoch": 7.7352697095435685, "grad_norm": 24.081092834472656, "learning_rate": 1.6908215767634857e-05, "loss": 0.9673, "step": 9321 }, { "epoch": 7.736099585062241, "grad_norm": 21.821313858032227, "learning_rate": 1.6907883817427385e-05, "loss": 1.1732, "step": 9322 }, { "epoch": 7.736929460580913, "grad_norm": 37.890708923339844, "learning_rate": 1.6907551867219917e-05, "loss": 2.6716, "step": 9323 }, { "epoch": 7.737759336099585, "grad_norm": 23.115610122680664, "learning_rate": 1.690721991701245e-05, "loss": 1.1709, "step": 9324 }, { "epoch": 7.738589211618257, "grad_norm": 17.899057388305664, "learning_rate": 1.690688796680498e-05, "loss": 1.3464, "step": 9325 }, { "epoch": 7.7394190871369295, "grad_norm": 30.63326644897461, "learning_rate": 1.690655601659751e-05, "loss": 1.6623, "step": 9326 }, { "epoch": 7.740248962655602, "grad_norm": 12.765715599060059, "learning_rate": 1.6906224066390042e-05, "loss": 0.9624, "step": 9327 }, { "epoch": 7.741078838174274, "grad_norm": 16.666419982910156, "learning_rate": 1.6905892116182575e-05, "loss": 0.9406, "step": 9328 }, { "epoch": 7.741908713692946, "grad_norm": 16.86795425415039, "learning_rate": 1.6905560165975107e-05, "loss": 1.0318, "step": 9329 }, { "epoch": 7.742738589211618, "grad_norm": 20.677778244018555, "learning_rate": 1.690522821576764e-05, "loss": 0.8417, "step": 9330 }, { "epoch": 7.7435684647302905, "grad_norm": 19.420989990234375, "learning_rate": 1.6904896265560167e-05, "loss": 1.1837, "step": 9331 }, { "epoch": 7.744398340248963, "grad_norm": 18.76712989807129, "learning_rate": 1.69045643153527e-05, "loss": 0.6741, "step": 9332 }, { "epoch": 7.745228215767635, "grad_norm": 17.612560272216797, "learning_rate": 1.6904232365145232e-05, "loss": 1.4741, "step": 9333 }, { "epoch": 7.746058091286307, "grad_norm": 37.70838165283203, "learning_rate": 1.690390041493776e-05, "loss": 2.0177, "step": 9334 }, { "epoch": 7.746887966804979, "grad_norm": 17.457910537719727, "learning_rate": 1.6903568464730293e-05, "loss": 1.514, "step": 9335 }, { "epoch": 7.7477178423236515, "grad_norm": 19.388591766357422, "learning_rate": 1.690323651452282e-05, "loss": 0.8462, "step": 9336 }, { "epoch": 7.748547717842324, "grad_norm": 16.016382217407227, "learning_rate": 1.6902904564315353e-05, "loss": 0.8283, "step": 9337 }, { "epoch": 7.749377593360996, "grad_norm": 43.226131439208984, "learning_rate": 1.6902572614107885e-05, "loss": 1.6917, "step": 9338 }, { "epoch": 7.750207468879668, "grad_norm": 23.058807373046875, "learning_rate": 1.6902240663900414e-05, "loss": 0.9378, "step": 9339 }, { "epoch": 7.75103734439834, "grad_norm": 20.916255950927734, "learning_rate": 1.6901908713692946e-05, "loss": 0.9016, "step": 9340 }, { "epoch": 7.751867219917012, "grad_norm": 21.059223175048828, "learning_rate": 1.690157676348548e-05, "loss": 1.3761, "step": 9341 }, { "epoch": 7.752697095435685, "grad_norm": 16.386375427246094, "learning_rate": 1.690124481327801e-05, "loss": 0.9871, "step": 9342 }, { "epoch": 7.753526970954357, "grad_norm": 18.35808753967285, "learning_rate": 1.690091286307054e-05, "loss": 1.3039, "step": 9343 }, { "epoch": 7.754356846473029, "grad_norm": 23.620134353637695, "learning_rate": 1.690058091286307e-05, "loss": 1.4091, "step": 9344 }, { "epoch": 7.755186721991701, "grad_norm": 18.54897689819336, "learning_rate": 1.6900248962655603e-05, "loss": 0.8536, "step": 9345 }, { "epoch": 7.756016597510373, "grad_norm": 48.08616256713867, "learning_rate": 1.6899917012448136e-05, "loss": 1.182, "step": 9346 }, { "epoch": 7.756846473029046, "grad_norm": 41.83755874633789, "learning_rate": 1.6899585062240664e-05, "loss": 1.1693, "step": 9347 }, { "epoch": 7.757676348547718, "grad_norm": 16.78544807434082, "learning_rate": 1.6899253112033196e-05, "loss": 0.7659, "step": 9348 }, { "epoch": 7.75850622406639, "grad_norm": 24.959157943725586, "learning_rate": 1.689892116182573e-05, "loss": 1.5894, "step": 9349 }, { "epoch": 7.759336099585062, "grad_norm": 19.391807556152344, "learning_rate": 1.689858921161826e-05, "loss": 0.9346, "step": 9350 }, { "epoch": 7.760165975103734, "grad_norm": 20.97850227355957, "learning_rate": 1.689825726141079e-05, "loss": 1.5531, "step": 9351 }, { "epoch": 7.760995850622407, "grad_norm": 13.616421699523926, "learning_rate": 1.689792531120332e-05, "loss": 0.6014, "step": 9352 }, { "epoch": 7.761825726141079, "grad_norm": 34.6104621887207, "learning_rate": 1.6897593360995854e-05, "loss": 0.8817, "step": 9353 }, { "epoch": 7.762655601659751, "grad_norm": 34.24296188354492, "learning_rate": 1.6897261410788382e-05, "loss": 0.9156, "step": 9354 }, { "epoch": 7.763485477178423, "grad_norm": 19.67713165283203, "learning_rate": 1.6896929460580914e-05, "loss": 1.1544, "step": 9355 }, { "epoch": 7.764315352697095, "grad_norm": 21.885984420776367, "learning_rate": 1.6896597510373443e-05, "loss": 1.5058, "step": 9356 }, { "epoch": 7.765145228215768, "grad_norm": 25.92817497253418, "learning_rate": 1.6896265560165975e-05, "loss": 1.4393, "step": 9357 }, { "epoch": 7.76597510373444, "grad_norm": 15.588160514831543, "learning_rate": 1.6895933609958507e-05, "loss": 0.6549, "step": 9358 }, { "epoch": 7.766804979253112, "grad_norm": 27.42253875732422, "learning_rate": 1.689560165975104e-05, "loss": 1.2905, "step": 9359 }, { "epoch": 7.767634854771784, "grad_norm": 29.05866241455078, "learning_rate": 1.6895269709543568e-05, "loss": 2.0729, "step": 9360 }, { "epoch": 7.768464730290456, "grad_norm": 15.36645793914795, "learning_rate": 1.68949377593361e-05, "loss": 1.0129, "step": 9361 }, { "epoch": 7.769294605809129, "grad_norm": 16.36781883239746, "learning_rate": 1.6894605809128632e-05, "loss": 1.0418, "step": 9362 }, { "epoch": 7.770124481327801, "grad_norm": 21.740835189819336, "learning_rate": 1.6894273858921164e-05, "loss": 1.2252, "step": 9363 }, { "epoch": 7.770954356846473, "grad_norm": 26.232101440429688, "learning_rate": 1.6893941908713693e-05, "loss": 1.1333, "step": 9364 }, { "epoch": 7.771784232365145, "grad_norm": 19.332048416137695, "learning_rate": 1.6893609958506225e-05, "loss": 1.4309, "step": 9365 }, { "epoch": 7.772614107883817, "grad_norm": 24.476160049438477, "learning_rate": 1.6893278008298757e-05, "loss": 1.3476, "step": 9366 }, { "epoch": 7.7734439834024895, "grad_norm": 24.08742332458496, "learning_rate": 1.689294605809129e-05, "loss": 2.5156, "step": 9367 }, { "epoch": 7.774273858921162, "grad_norm": 17.57389259338379, "learning_rate": 1.6892614107883818e-05, "loss": 0.9285, "step": 9368 }, { "epoch": 7.775103734439834, "grad_norm": 21.290149688720703, "learning_rate": 1.689228215767635e-05, "loss": 1.0736, "step": 9369 }, { "epoch": 7.775933609958506, "grad_norm": 22.880512237548828, "learning_rate": 1.6891950207468882e-05, "loss": 1.816, "step": 9370 }, { "epoch": 7.776763485477178, "grad_norm": 14.26343822479248, "learning_rate": 1.6891618257261415e-05, "loss": 1.3079, "step": 9371 }, { "epoch": 7.7775933609958505, "grad_norm": 41.59673309326172, "learning_rate": 1.6891286307053943e-05, "loss": 1.1965, "step": 9372 }, { "epoch": 7.778423236514523, "grad_norm": 36.151268005371094, "learning_rate": 1.6890954356846475e-05, "loss": 1.29, "step": 9373 }, { "epoch": 7.779253112033195, "grad_norm": 19.512104034423828, "learning_rate": 1.6890622406639004e-05, "loss": 1.1902, "step": 9374 }, { "epoch": 7.780082987551867, "grad_norm": 30.529369354248047, "learning_rate": 1.6890290456431536e-05, "loss": 1.8248, "step": 9375 }, { "epoch": 7.780912863070539, "grad_norm": 20.90324592590332, "learning_rate": 1.6889958506224068e-05, "loss": 0.8844, "step": 9376 }, { "epoch": 7.7817427385892115, "grad_norm": 17.200878143310547, "learning_rate": 1.6889626556016597e-05, "loss": 0.9103, "step": 9377 }, { "epoch": 7.782572614107884, "grad_norm": 53.287540435791016, "learning_rate": 1.688929460580913e-05, "loss": 1.2863, "step": 9378 }, { "epoch": 7.783402489626556, "grad_norm": 23.425670623779297, "learning_rate": 1.688896265560166e-05, "loss": 1.9722, "step": 9379 }, { "epoch": 7.784232365145228, "grad_norm": 19.102296829223633, "learning_rate": 1.6888630705394193e-05, "loss": 0.7425, "step": 9380 }, { "epoch": 7.7850622406639, "grad_norm": 26.375463485717773, "learning_rate": 1.6888298755186722e-05, "loss": 1.5008, "step": 9381 }, { "epoch": 7.7858921161825725, "grad_norm": 32.4501838684082, "learning_rate": 1.6887966804979254e-05, "loss": 1.4051, "step": 9382 }, { "epoch": 7.786721991701245, "grad_norm": 23.088802337646484, "learning_rate": 1.6887634854771786e-05, "loss": 1.0624, "step": 9383 }, { "epoch": 7.787551867219917, "grad_norm": 15.026220321655273, "learning_rate": 1.688730290456432e-05, "loss": 1.0426, "step": 9384 }, { "epoch": 7.788381742738589, "grad_norm": 15.10352611541748, "learning_rate": 1.6886970954356847e-05, "loss": 0.8714, "step": 9385 }, { "epoch": 7.789211618257261, "grad_norm": 24.863040924072266, "learning_rate": 1.688663900414938e-05, "loss": 1.5907, "step": 9386 }, { "epoch": 7.7900414937759335, "grad_norm": 31.473054885864258, "learning_rate": 1.688630705394191e-05, "loss": 0.9022, "step": 9387 }, { "epoch": 7.790871369294606, "grad_norm": 19.30825424194336, "learning_rate": 1.6885975103734443e-05, "loss": 1.4063, "step": 9388 }, { "epoch": 7.791701244813278, "grad_norm": 12.217620849609375, "learning_rate": 1.6885643153526972e-05, "loss": 0.7508, "step": 9389 }, { "epoch": 7.79253112033195, "grad_norm": 16.878265380859375, "learning_rate": 1.6885311203319504e-05, "loss": 1.13, "step": 9390 }, { "epoch": 7.793360995850622, "grad_norm": 12.985947608947754, "learning_rate": 1.6884979253112036e-05, "loss": 0.957, "step": 9391 }, { "epoch": 7.7941908713692944, "grad_norm": 20.399282455444336, "learning_rate": 1.6884647302904565e-05, "loss": 1.0099, "step": 9392 }, { "epoch": 7.795020746887967, "grad_norm": 22.434412002563477, "learning_rate": 1.6884315352697097e-05, "loss": 1.3963, "step": 9393 }, { "epoch": 7.795850622406639, "grad_norm": 28.452856063842773, "learning_rate": 1.688398340248963e-05, "loss": 1.166, "step": 9394 }, { "epoch": 7.796680497925311, "grad_norm": 26.661596298217773, "learning_rate": 1.6883651452282158e-05, "loss": 1.1084, "step": 9395 }, { "epoch": 7.797510373443983, "grad_norm": 20.578203201293945, "learning_rate": 1.688331950207469e-05, "loss": 1.3005, "step": 9396 }, { "epoch": 7.798340248962655, "grad_norm": 23.95435905456543, "learning_rate": 1.688298755186722e-05, "loss": 1.6682, "step": 9397 }, { "epoch": 7.799170124481328, "grad_norm": 23.79679298400879, "learning_rate": 1.688265560165975e-05, "loss": 1.7379, "step": 9398 }, { "epoch": 7.8, "grad_norm": 26.467853546142578, "learning_rate": 1.6882323651452283e-05, "loss": 1.377, "step": 9399 }, { "epoch": 7.800829875518672, "grad_norm": 15.840933799743652, "learning_rate": 1.6881991701244815e-05, "loss": 0.9055, "step": 9400 }, { "epoch": 7.801659751037344, "grad_norm": 16.019397735595703, "learning_rate": 1.6881659751037344e-05, "loss": 0.812, "step": 9401 }, { "epoch": 7.802489626556016, "grad_norm": 28.14915657043457, "learning_rate": 1.6881327800829876e-05, "loss": 1.0736, "step": 9402 }, { "epoch": 7.803319502074689, "grad_norm": 16.762414932250977, "learning_rate": 1.6880995850622408e-05, "loss": 1.1228, "step": 9403 }, { "epoch": 7.804149377593361, "grad_norm": 24.01934814453125, "learning_rate": 1.688066390041494e-05, "loss": 1.212, "step": 9404 }, { "epoch": 7.804979253112033, "grad_norm": 15.543124198913574, "learning_rate": 1.688033195020747e-05, "loss": 0.8929, "step": 9405 }, { "epoch": 7.805809128630705, "grad_norm": 25.459949493408203, "learning_rate": 1.688e-05, "loss": 1.2685, "step": 9406 }, { "epoch": 7.806639004149377, "grad_norm": 21.282936096191406, "learning_rate": 1.6879668049792533e-05, "loss": 1.1952, "step": 9407 }, { "epoch": 7.80746887966805, "grad_norm": 15.271258354187012, "learning_rate": 1.6879336099585065e-05, "loss": 0.8528, "step": 9408 }, { "epoch": 7.808298755186722, "grad_norm": 12.806851387023926, "learning_rate": 1.6879004149377597e-05, "loss": 1.2174, "step": 9409 }, { "epoch": 7.809128630705394, "grad_norm": 17.718542098999023, "learning_rate": 1.6878672199170126e-05, "loss": 1.0035, "step": 9410 }, { "epoch": 7.809958506224066, "grad_norm": 15.246119499206543, "learning_rate": 1.6878340248962658e-05, "loss": 0.9114, "step": 9411 }, { "epoch": 7.810788381742738, "grad_norm": 29.530969619750977, "learning_rate": 1.6878008298755187e-05, "loss": 1.4171, "step": 9412 }, { "epoch": 7.811618257261411, "grad_norm": 20.121522903442383, "learning_rate": 1.687767634854772e-05, "loss": 1.608, "step": 9413 }, { "epoch": 7.812448132780083, "grad_norm": 17.387086868286133, "learning_rate": 1.687734439834025e-05, "loss": 1.2001, "step": 9414 }, { "epoch": 7.813278008298755, "grad_norm": 22.579580307006836, "learning_rate": 1.687701244813278e-05, "loss": 1.1208, "step": 9415 }, { "epoch": 7.814107883817427, "grad_norm": 16.723594665527344, "learning_rate": 1.6876680497925312e-05, "loss": 1.2502, "step": 9416 }, { "epoch": 7.814937759336099, "grad_norm": 18.110631942749023, "learning_rate": 1.6876348547717844e-05, "loss": 1.1718, "step": 9417 }, { "epoch": 7.8157676348547716, "grad_norm": 20.709922790527344, "learning_rate": 1.6876016597510373e-05, "loss": 0.8105, "step": 9418 }, { "epoch": 7.816597510373444, "grad_norm": 27.663822174072266, "learning_rate": 1.6875684647302905e-05, "loss": 1.2455, "step": 9419 }, { "epoch": 7.817427385892116, "grad_norm": 14.309788703918457, "learning_rate": 1.6875352697095437e-05, "loss": 0.7007, "step": 9420 }, { "epoch": 7.818257261410788, "grad_norm": 31.00474739074707, "learning_rate": 1.687502074688797e-05, "loss": 1.114, "step": 9421 }, { "epoch": 7.81908713692946, "grad_norm": 16.57674217224121, "learning_rate": 1.6874688796680498e-05, "loss": 1.2992, "step": 9422 }, { "epoch": 7.8199170124481325, "grad_norm": 24.05746841430664, "learning_rate": 1.687435684647303e-05, "loss": 0.9663, "step": 9423 }, { "epoch": 7.820746887966805, "grad_norm": 21.4904727935791, "learning_rate": 1.6874024896265562e-05, "loss": 1.5006, "step": 9424 }, { "epoch": 7.821576763485477, "grad_norm": 14.641488075256348, "learning_rate": 1.6873692946058094e-05, "loss": 0.8662, "step": 9425 }, { "epoch": 7.822406639004149, "grad_norm": 34.0075798034668, "learning_rate": 1.6873360995850623e-05, "loss": 1.5717, "step": 9426 }, { "epoch": 7.823236514522821, "grad_norm": 13.208097457885742, "learning_rate": 1.6873029045643155e-05, "loss": 0.744, "step": 9427 }, { "epoch": 7.8240663900414935, "grad_norm": 26.379833221435547, "learning_rate": 1.6872697095435687e-05, "loss": 0.9687, "step": 9428 }, { "epoch": 7.824896265560166, "grad_norm": 20.803693771362305, "learning_rate": 1.687236514522822e-05, "loss": 1.1022, "step": 9429 }, { "epoch": 7.825726141078838, "grad_norm": 24.889549255371094, "learning_rate": 1.6872033195020748e-05, "loss": 0.8336, "step": 9430 }, { "epoch": 7.82655601659751, "grad_norm": 31.813779830932617, "learning_rate": 1.687170124481328e-05, "loss": 0.896, "step": 9431 }, { "epoch": 7.827385892116182, "grad_norm": 15.145109176635742, "learning_rate": 1.6871369294605812e-05, "loss": 0.6655, "step": 9432 }, { "epoch": 7.8282157676348545, "grad_norm": 32.43229675292969, "learning_rate": 1.687103734439834e-05, "loss": 2.0274, "step": 9433 }, { "epoch": 7.829045643153527, "grad_norm": 19.256093978881836, "learning_rate": 1.6870705394190873e-05, "loss": 1.1113, "step": 9434 }, { "epoch": 7.829875518672199, "grad_norm": 27.471588134765625, "learning_rate": 1.68703734439834e-05, "loss": 1.0461, "step": 9435 }, { "epoch": 7.830705394190871, "grad_norm": 29.747753143310547, "learning_rate": 1.6870041493775934e-05, "loss": 1.1078, "step": 9436 }, { "epoch": 7.831535269709543, "grad_norm": 16.36306381225586, "learning_rate": 1.6869709543568466e-05, "loss": 1.2514, "step": 9437 }, { "epoch": 7.8323651452282155, "grad_norm": 17.47032928466797, "learning_rate": 1.6869377593360998e-05, "loss": 1.0592, "step": 9438 }, { "epoch": 7.833195020746888, "grad_norm": 20.588130950927734, "learning_rate": 1.6869045643153527e-05, "loss": 0.8072, "step": 9439 }, { "epoch": 7.83402489626556, "grad_norm": 26.23670768737793, "learning_rate": 1.686871369294606e-05, "loss": 1.4062, "step": 9440 }, { "epoch": 7.834854771784232, "grad_norm": 16.12300682067871, "learning_rate": 1.686838174273859e-05, "loss": 0.7408, "step": 9441 }, { "epoch": 7.835684647302904, "grad_norm": 16.748056411743164, "learning_rate": 1.6868049792531123e-05, "loss": 1.324, "step": 9442 }, { "epoch": 7.8365145228215765, "grad_norm": 27.427196502685547, "learning_rate": 1.6867717842323652e-05, "loss": 1.0492, "step": 9443 }, { "epoch": 7.837344398340249, "grad_norm": 29.89963722229004, "learning_rate": 1.6867385892116184e-05, "loss": 1.5872, "step": 9444 }, { "epoch": 7.838174273858921, "grad_norm": 29.063390731811523, "learning_rate": 1.6867053941908716e-05, "loss": 1.4387, "step": 9445 }, { "epoch": 7.839004149377593, "grad_norm": 22.850473403930664, "learning_rate": 1.6866721991701248e-05, "loss": 1.4033, "step": 9446 }, { "epoch": 7.839834024896265, "grad_norm": 18.514474868774414, "learning_rate": 1.6866390041493777e-05, "loss": 1.0419, "step": 9447 }, { "epoch": 7.840663900414937, "grad_norm": 20.961551666259766, "learning_rate": 1.686605809128631e-05, "loss": 1.1951, "step": 9448 }, { "epoch": 7.84149377593361, "grad_norm": 28.93195915222168, "learning_rate": 1.686572614107884e-05, "loss": 1.2475, "step": 9449 }, { "epoch": 7.842323651452282, "grad_norm": 28.017559051513672, "learning_rate": 1.6865394190871373e-05, "loss": 1.1424, "step": 9450 }, { "epoch": 7.843153526970954, "grad_norm": 21.43514060974121, "learning_rate": 1.6865062240663902e-05, "loss": 1.0819, "step": 9451 }, { "epoch": 7.843983402489626, "grad_norm": 20.148357391357422, "learning_rate": 1.6864730290456434e-05, "loss": 1.4064, "step": 9452 }, { "epoch": 7.844813278008298, "grad_norm": 19.014049530029297, "learning_rate": 1.6864398340248963e-05, "loss": 0.8651, "step": 9453 }, { "epoch": 7.845643153526971, "grad_norm": 15.235774040222168, "learning_rate": 1.6864066390041495e-05, "loss": 1.3377, "step": 9454 }, { "epoch": 7.846473029045643, "grad_norm": 20.814374923706055, "learning_rate": 1.6863734439834027e-05, "loss": 1.363, "step": 9455 }, { "epoch": 7.847302904564315, "grad_norm": 29.43956756591797, "learning_rate": 1.6863402489626556e-05, "loss": 0.9636, "step": 9456 }, { "epoch": 7.848132780082987, "grad_norm": 30.18208885192871, "learning_rate": 1.6863070539419088e-05, "loss": 1.6666, "step": 9457 }, { "epoch": 7.848962655601659, "grad_norm": 37.91326141357422, "learning_rate": 1.686273858921162e-05, "loss": 1.8518, "step": 9458 }, { "epoch": 7.849792531120332, "grad_norm": 29.71998405456543, "learning_rate": 1.686240663900415e-05, "loss": 1.7214, "step": 9459 }, { "epoch": 7.850622406639004, "grad_norm": 27.73822784423828, "learning_rate": 1.686207468879668e-05, "loss": 1.9846, "step": 9460 }, { "epoch": 7.851452282157676, "grad_norm": 20.07150650024414, "learning_rate": 1.6861742738589213e-05, "loss": 1.3139, "step": 9461 }, { "epoch": 7.852282157676348, "grad_norm": 33.275856018066406, "learning_rate": 1.6861410788381745e-05, "loss": 1.7356, "step": 9462 }, { "epoch": 7.85311203319502, "grad_norm": 18.054738998413086, "learning_rate": 1.6861078838174277e-05, "loss": 1.2277, "step": 9463 }, { "epoch": 7.853941908713693, "grad_norm": 13.434743881225586, "learning_rate": 1.6860746887966806e-05, "loss": 1.1523, "step": 9464 }, { "epoch": 7.854771784232365, "grad_norm": 20.18182373046875, "learning_rate": 1.6860414937759338e-05, "loss": 1.3922, "step": 9465 }, { "epoch": 7.855601659751037, "grad_norm": 26.27667236328125, "learning_rate": 1.686008298755187e-05, "loss": 1.3294, "step": 9466 }, { "epoch": 7.856431535269709, "grad_norm": 14.743266105651855, "learning_rate": 1.6859751037344402e-05, "loss": 0.6537, "step": 9467 }, { "epoch": 7.857261410788381, "grad_norm": 11.713196754455566, "learning_rate": 1.685941908713693e-05, "loss": 0.8435, "step": 9468 }, { "epoch": 7.858091286307054, "grad_norm": 28.35867691040039, "learning_rate": 1.6859087136929463e-05, "loss": 1.5866, "step": 9469 }, { "epoch": 7.858921161825726, "grad_norm": 18.34406280517578, "learning_rate": 1.6858755186721995e-05, "loss": 0.8703, "step": 9470 }, { "epoch": 7.859751037344399, "grad_norm": 29.74981689453125, "learning_rate": 1.6858423236514524e-05, "loss": 1.5285, "step": 9471 }, { "epoch": 7.860580912863071, "grad_norm": 17.73680305480957, "learning_rate": 1.6858091286307056e-05, "loss": 0.5523, "step": 9472 }, { "epoch": 7.861410788381743, "grad_norm": 34.6728515625, "learning_rate": 1.6857759336099584e-05, "loss": 1.4722, "step": 9473 }, { "epoch": 7.862240663900415, "grad_norm": 23.090776443481445, "learning_rate": 1.6857427385892117e-05, "loss": 1.4154, "step": 9474 }, { "epoch": 7.863070539419088, "grad_norm": 31.975509643554688, "learning_rate": 1.685709543568465e-05, "loss": 1.0824, "step": 9475 }, { "epoch": 7.86390041493776, "grad_norm": 31.89617156982422, "learning_rate": 1.6856763485477177e-05, "loss": 1.1813, "step": 9476 }, { "epoch": 7.864730290456432, "grad_norm": 14.487751960754395, "learning_rate": 1.685643153526971e-05, "loss": 0.8594, "step": 9477 }, { "epoch": 7.865560165975104, "grad_norm": 22.777238845825195, "learning_rate": 1.685609958506224e-05, "loss": 0.8249, "step": 9478 }, { "epoch": 7.866390041493776, "grad_norm": 32.368736267089844, "learning_rate": 1.6855767634854774e-05, "loss": 0.9082, "step": 9479 }, { "epoch": 7.867219917012449, "grad_norm": 25.13247299194336, "learning_rate": 1.6855435684647302e-05, "loss": 1.4209, "step": 9480 }, { "epoch": 7.868049792531121, "grad_norm": 31.6503849029541, "learning_rate": 1.6855103734439835e-05, "loss": 2.4301, "step": 9481 }, { "epoch": 7.868879668049793, "grad_norm": 10.932174682617188, "learning_rate": 1.6854771784232367e-05, "loss": 0.5456, "step": 9482 }, { "epoch": 7.869709543568465, "grad_norm": 23.28485870361328, "learning_rate": 1.68544398340249e-05, "loss": 1.0579, "step": 9483 }, { "epoch": 7.870539419087137, "grad_norm": 113.18050384521484, "learning_rate": 1.6854107883817427e-05, "loss": 1.3037, "step": 9484 }, { "epoch": 7.87136929460581, "grad_norm": 17.54254913330078, "learning_rate": 1.685377593360996e-05, "loss": 0.6817, "step": 9485 }, { "epoch": 7.872199170124482, "grad_norm": 12.960819244384766, "learning_rate": 1.6853443983402492e-05, "loss": 0.4748, "step": 9486 }, { "epoch": 7.873029045643154, "grad_norm": 15.235109329223633, "learning_rate": 1.6853112033195024e-05, "loss": 1.0819, "step": 9487 }, { "epoch": 7.873858921161826, "grad_norm": 28.41048240661621, "learning_rate": 1.6852780082987556e-05, "loss": 0.7171, "step": 9488 }, { "epoch": 7.874688796680498, "grad_norm": 24.743032455444336, "learning_rate": 1.6852448132780085e-05, "loss": 1.3427, "step": 9489 }, { "epoch": 7.875518672199171, "grad_norm": 23.920259475708008, "learning_rate": 1.6852116182572617e-05, "loss": 1.6539, "step": 9490 }, { "epoch": 7.876348547717843, "grad_norm": 28.36545181274414, "learning_rate": 1.6851784232365145e-05, "loss": 0.8789, "step": 9491 }, { "epoch": 7.877178423236515, "grad_norm": 23.393327713012695, "learning_rate": 1.6851452282157678e-05, "loss": 1.6072, "step": 9492 }, { "epoch": 7.878008298755187, "grad_norm": 20.478830337524414, "learning_rate": 1.685112033195021e-05, "loss": 1.092, "step": 9493 }, { "epoch": 7.878838174273859, "grad_norm": 30.66851806640625, "learning_rate": 1.685078838174274e-05, "loss": 1.4135, "step": 9494 }, { "epoch": 7.8796680497925315, "grad_norm": 21.789018630981445, "learning_rate": 1.685045643153527e-05, "loss": 0.9013, "step": 9495 }, { "epoch": 7.880497925311204, "grad_norm": 22.224416732788086, "learning_rate": 1.6850124481327803e-05, "loss": 1.2889, "step": 9496 }, { "epoch": 7.881327800829876, "grad_norm": 17.030155181884766, "learning_rate": 1.684979253112033e-05, "loss": 0.9817, "step": 9497 }, { "epoch": 7.882157676348548, "grad_norm": 25.6243839263916, "learning_rate": 1.6849460580912863e-05, "loss": 0.7825, "step": 9498 }, { "epoch": 7.88298755186722, "grad_norm": 13.416114807128906, "learning_rate": 1.6849128630705396e-05, "loss": 0.7357, "step": 9499 }, { "epoch": 7.8838174273858925, "grad_norm": 14.74267292022705, "learning_rate": 1.6848796680497928e-05, "loss": 1.1038, "step": 9500 }, { "epoch": 7.884647302904565, "grad_norm": 14.888846397399902, "learning_rate": 1.6848464730290456e-05, "loss": 0.8045, "step": 9501 }, { "epoch": 7.885477178423237, "grad_norm": 22.869117736816406, "learning_rate": 1.684813278008299e-05, "loss": 1.6755, "step": 9502 }, { "epoch": 7.886307053941909, "grad_norm": 28.623485565185547, "learning_rate": 1.684780082987552e-05, "loss": 1.9145, "step": 9503 }, { "epoch": 7.887136929460581, "grad_norm": 22.271106719970703, "learning_rate": 1.6847468879668053e-05, "loss": 1.2335, "step": 9504 }, { "epoch": 7.8879668049792535, "grad_norm": 16.714311599731445, "learning_rate": 1.684713692946058e-05, "loss": 1.4342, "step": 9505 }, { "epoch": 7.888796680497926, "grad_norm": 19.94677734375, "learning_rate": 1.6846804979253114e-05, "loss": 1.237, "step": 9506 }, { "epoch": 7.889626556016598, "grad_norm": 19.3969783782959, "learning_rate": 1.6846473029045646e-05, "loss": 1.2167, "step": 9507 }, { "epoch": 7.89045643153527, "grad_norm": 24.928415298461914, "learning_rate": 1.6846141078838178e-05, "loss": 1.3238, "step": 9508 }, { "epoch": 7.891286307053942, "grad_norm": 19.59479331970215, "learning_rate": 1.6845809128630706e-05, "loss": 0.8396, "step": 9509 }, { "epoch": 7.8921161825726145, "grad_norm": 21.18939971923828, "learning_rate": 1.684547717842324e-05, "loss": 1.333, "step": 9510 }, { "epoch": 7.892946058091287, "grad_norm": 28.065189361572266, "learning_rate": 1.684514522821577e-05, "loss": 1.344, "step": 9511 }, { "epoch": 7.893775933609959, "grad_norm": 16.820533752441406, "learning_rate": 1.68448132780083e-05, "loss": 1.1967, "step": 9512 }, { "epoch": 7.894605809128631, "grad_norm": 16.835044860839844, "learning_rate": 1.684448132780083e-05, "loss": 0.9799, "step": 9513 }, { "epoch": 7.895435684647303, "grad_norm": 20.360435485839844, "learning_rate": 1.684414937759336e-05, "loss": 1.2571, "step": 9514 }, { "epoch": 7.8962655601659755, "grad_norm": 18.584949493408203, "learning_rate": 1.6843817427385892e-05, "loss": 1.4584, "step": 9515 }, { "epoch": 7.897095435684648, "grad_norm": 21.067569732666016, "learning_rate": 1.6843485477178424e-05, "loss": 1.5688, "step": 9516 }, { "epoch": 7.89792531120332, "grad_norm": 19.931995391845703, "learning_rate": 1.6843153526970957e-05, "loss": 1.0831, "step": 9517 }, { "epoch": 7.898755186721992, "grad_norm": 28.682239532470703, "learning_rate": 1.6842821576763485e-05, "loss": 1.8442, "step": 9518 }, { "epoch": 7.899585062240664, "grad_norm": 17.35279083251953, "learning_rate": 1.6842489626556017e-05, "loss": 1.0468, "step": 9519 }, { "epoch": 7.9004149377593365, "grad_norm": 28.972322463989258, "learning_rate": 1.684215767634855e-05, "loss": 1.0754, "step": 9520 }, { "epoch": 7.901244813278009, "grad_norm": 28.507036209106445, "learning_rate": 1.684182572614108e-05, "loss": 1.6712, "step": 9521 }, { "epoch": 7.902074688796681, "grad_norm": 24.384017944335938, "learning_rate": 1.684149377593361e-05, "loss": 1.2142, "step": 9522 }, { "epoch": 7.902904564315353, "grad_norm": 31.109783172607422, "learning_rate": 1.6841161825726142e-05, "loss": 1.4358, "step": 9523 }, { "epoch": 7.903734439834025, "grad_norm": 22.578920364379883, "learning_rate": 1.6840829875518675e-05, "loss": 1.2953, "step": 9524 }, { "epoch": 7.904564315352697, "grad_norm": 19.752483367919922, "learning_rate": 1.6840497925311207e-05, "loss": 1.2643, "step": 9525 }, { "epoch": 7.90539419087137, "grad_norm": 26.602947235107422, "learning_rate": 1.6840165975103735e-05, "loss": 1.2508, "step": 9526 }, { "epoch": 7.906224066390042, "grad_norm": 15.234162330627441, "learning_rate": 1.6839834024896267e-05, "loss": 1.0466, "step": 9527 }, { "epoch": 7.907053941908714, "grad_norm": 12.157269477844238, "learning_rate": 1.68395020746888e-05, "loss": 0.6561, "step": 9528 }, { "epoch": 7.907883817427386, "grad_norm": 17.68716812133789, "learning_rate": 1.6839170124481328e-05, "loss": 0.7407, "step": 9529 }, { "epoch": 7.908713692946058, "grad_norm": 23.37602996826172, "learning_rate": 1.683883817427386e-05, "loss": 1.0976, "step": 9530 }, { "epoch": 7.909543568464731, "grad_norm": 16.890445709228516, "learning_rate": 1.6838506224066392e-05, "loss": 0.926, "step": 9531 }, { "epoch": 7.910373443983403, "grad_norm": 18.002796173095703, "learning_rate": 1.683817427385892e-05, "loss": 1.1705, "step": 9532 }, { "epoch": 7.911203319502075, "grad_norm": 14.866105079650879, "learning_rate": 1.6837842323651453e-05, "loss": 0.7018, "step": 9533 }, { "epoch": 7.912033195020747, "grad_norm": 13.089648246765137, "learning_rate": 1.6837510373443985e-05, "loss": 0.7732, "step": 9534 }, { "epoch": 7.912863070539419, "grad_norm": 32.03831100463867, "learning_rate": 1.6837178423236514e-05, "loss": 1.523, "step": 9535 }, { "epoch": 7.913692946058092, "grad_norm": 19.348249435424805, "learning_rate": 1.6836846473029046e-05, "loss": 0.947, "step": 9536 }, { "epoch": 7.914522821576764, "grad_norm": 19.150976181030273, "learning_rate": 1.683651452282158e-05, "loss": 1.1117, "step": 9537 }, { "epoch": 7.915352697095436, "grad_norm": 15.236806869506836, "learning_rate": 1.6836182572614107e-05, "loss": 0.9364, "step": 9538 }, { "epoch": 7.916182572614108, "grad_norm": 23.422565460205078, "learning_rate": 1.683585062240664e-05, "loss": 0.8988, "step": 9539 }, { "epoch": 7.91701244813278, "grad_norm": 27.519325256347656, "learning_rate": 1.683551867219917e-05, "loss": 1.6842, "step": 9540 }, { "epoch": 7.917842323651453, "grad_norm": 24.68597412109375, "learning_rate": 1.6835186721991703e-05, "loss": 2.1169, "step": 9541 }, { "epoch": 7.918672199170125, "grad_norm": 35.10398483276367, "learning_rate": 1.6834854771784236e-05, "loss": 1.2533, "step": 9542 }, { "epoch": 7.919502074688797, "grad_norm": 19.587100982666016, "learning_rate": 1.6834522821576764e-05, "loss": 0.9573, "step": 9543 }, { "epoch": 7.920331950207469, "grad_norm": 41.2662239074707, "learning_rate": 1.6834190871369296e-05, "loss": 0.961, "step": 9544 }, { "epoch": 7.921161825726141, "grad_norm": 26.30818748474121, "learning_rate": 1.683385892116183e-05, "loss": 1.06, "step": 9545 }, { "epoch": 7.9219917012448136, "grad_norm": 20.54292106628418, "learning_rate": 1.683352697095436e-05, "loss": 0.8006, "step": 9546 }, { "epoch": 7.922821576763486, "grad_norm": 22.38356590270996, "learning_rate": 1.683319502074689e-05, "loss": 1.6494, "step": 9547 }, { "epoch": 7.923651452282158, "grad_norm": 40.69320297241211, "learning_rate": 1.683286307053942e-05, "loss": 1.7147, "step": 9548 }, { "epoch": 7.92448132780083, "grad_norm": 17.441864013671875, "learning_rate": 1.6832531120331953e-05, "loss": 1.1448, "step": 9549 }, { "epoch": 7.925311203319502, "grad_norm": 45.52158737182617, "learning_rate": 1.6832199170124482e-05, "loss": 1.5373, "step": 9550 }, { "epoch": 7.9261410788381745, "grad_norm": 21.672704696655273, "learning_rate": 1.6831867219917014e-05, "loss": 1.4475, "step": 9551 }, { "epoch": 7.926970954356847, "grad_norm": 43.11576461791992, "learning_rate": 1.6831535269709543e-05, "loss": 1.6544, "step": 9552 }, { "epoch": 7.927800829875519, "grad_norm": 25.68964385986328, "learning_rate": 1.6831203319502075e-05, "loss": 1.1727, "step": 9553 }, { "epoch": 7.928630705394191, "grad_norm": 48.04490661621094, "learning_rate": 1.6830871369294607e-05, "loss": 1.2713, "step": 9554 }, { "epoch": 7.929460580912863, "grad_norm": 29.332679748535156, "learning_rate": 1.6830539419087136e-05, "loss": 0.8451, "step": 9555 }, { "epoch": 7.9302904564315355, "grad_norm": 23.446765899658203, "learning_rate": 1.6830207468879668e-05, "loss": 1.8329, "step": 9556 }, { "epoch": 7.931120331950208, "grad_norm": 23.159011840820312, "learning_rate": 1.68298755186722e-05, "loss": 1.3009, "step": 9557 }, { "epoch": 7.93195020746888, "grad_norm": 16.771257400512695, "learning_rate": 1.6829543568464732e-05, "loss": 0.6383, "step": 9558 }, { "epoch": 7.932780082987552, "grad_norm": 25.599212646484375, "learning_rate": 1.682921161825726e-05, "loss": 1.3759, "step": 9559 }, { "epoch": 7.933609958506224, "grad_norm": 29.12187957763672, "learning_rate": 1.6828879668049793e-05, "loss": 2.0904, "step": 9560 }, { "epoch": 7.9344398340248965, "grad_norm": 19.79889488220215, "learning_rate": 1.6828547717842325e-05, "loss": 0.8794, "step": 9561 }, { "epoch": 7.935269709543569, "grad_norm": 18.652027130126953, "learning_rate": 1.6828215767634857e-05, "loss": 1.0401, "step": 9562 }, { "epoch": 7.936099585062241, "grad_norm": 32.404964447021484, "learning_rate": 1.6827883817427386e-05, "loss": 1.2244, "step": 9563 }, { "epoch": 7.936929460580913, "grad_norm": 18.042051315307617, "learning_rate": 1.6827551867219918e-05, "loss": 0.7755, "step": 9564 }, { "epoch": 7.937759336099585, "grad_norm": 28.56678581237793, "learning_rate": 1.682721991701245e-05, "loss": 0.9264, "step": 9565 }, { "epoch": 7.9385892116182575, "grad_norm": 17.57224464416504, "learning_rate": 1.6826887966804982e-05, "loss": 0.7612, "step": 9566 }, { "epoch": 7.93941908713693, "grad_norm": 19.062623977661133, "learning_rate": 1.6826556016597514e-05, "loss": 0.8858, "step": 9567 }, { "epoch": 7.940248962655602, "grad_norm": 26.161664962768555, "learning_rate": 1.6826224066390043e-05, "loss": 1.4233, "step": 9568 }, { "epoch": 7.941078838174274, "grad_norm": 11.948722839355469, "learning_rate": 1.6825892116182575e-05, "loss": 0.7478, "step": 9569 }, { "epoch": 7.941908713692946, "grad_norm": 24.195436477661133, "learning_rate": 1.6825560165975104e-05, "loss": 0.7802, "step": 9570 }, { "epoch": 7.9427385892116185, "grad_norm": 27.40378189086914, "learning_rate": 1.6825228215767636e-05, "loss": 1.7926, "step": 9571 }, { "epoch": 7.943568464730291, "grad_norm": 17.51169204711914, "learning_rate": 1.6824896265560168e-05, "loss": 1.0334, "step": 9572 }, { "epoch": 7.944398340248963, "grad_norm": 24.58717918395996, "learning_rate": 1.6824564315352697e-05, "loss": 1.3733, "step": 9573 }, { "epoch": 7.945228215767635, "grad_norm": 34.34811782836914, "learning_rate": 1.682423236514523e-05, "loss": 0.877, "step": 9574 }, { "epoch": 7.946058091286307, "grad_norm": 36.956424713134766, "learning_rate": 1.682390041493776e-05, "loss": 0.8138, "step": 9575 }, { "epoch": 7.946887966804979, "grad_norm": 17.50193214416504, "learning_rate": 1.682356846473029e-05, "loss": 1.2398, "step": 9576 }, { "epoch": 7.947717842323652, "grad_norm": 30.468303680419922, "learning_rate": 1.6823236514522822e-05, "loss": 1.7912, "step": 9577 }, { "epoch": 7.948547717842324, "grad_norm": 19.250112533569336, "learning_rate": 1.6822904564315354e-05, "loss": 1.0845, "step": 9578 }, { "epoch": 7.949377593360996, "grad_norm": 19.189254760742188, "learning_rate": 1.6822572614107886e-05, "loss": 1.1816, "step": 9579 }, { "epoch": 7.950207468879668, "grad_norm": 26.185104370117188, "learning_rate": 1.6822240663900415e-05, "loss": 1.5834, "step": 9580 }, { "epoch": 7.95103734439834, "grad_norm": 22.82033348083496, "learning_rate": 1.6821908713692947e-05, "loss": 0.9959, "step": 9581 }, { "epoch": 7.951867219917013, "grad_norm": 25.008955001831055, "learning_rate": 1.682157676348548e-05, "loss": 1.5518, "step": 9582 }, { "epoch": 7.952697095435685, "grad_norm": 29.202392578125, "learning_rate": 1.682124481327801e-05, "loss": 1.9363, "step": 9583 }, { "epoch": 7.953526970954357, "grad_norm": 18.179119110107422, "learning_rate": 1.682091286307054e-05, "loss": 0.8933, "step": 9584 }, { "epoch": 7.954356846473029, "grad_norm": 47.629180908203125, "learning_rate": 1.6820580912863072e-05, "loss": 1.7321, "step": 9585 }, { "epoch": 7.955186721991701, "grad_norm": 17.49527359008789, "learning_rate": 1.6820248962655604e-05, "loss": 0.9398, "step": 9586 }, { "epoch": 7.956016597510374, "grad_norm": 33.506778717041016, "learning_rate": 1.6819917012448136e-05, "loss": 1.2152, "step": 9587 }, { "epoch": 7.956846473029046, "grad_norm": 25.516643524169922, "learning_rate": 1.6819585062240665e-05, "loss": 1.1441, "step": 9588 }, { "epoch": 7.957676348547718, "grad_norm": 19.947031021118164, "learning_rate": 1.6819253112033197e-05, "loss": 2.0238, "step": 9589 }, { "epoch": 7.95850622406639, "grad_norm": 17.518516540527344, "learning_rate": 1.6818921161825726e-05, "loss": 1.1734, "step": 9590 }, { "epoch": 7.959336099585062, "grad_norm": 20.112722396850586, "learning_rate": 1.6818589211618258e-05, "loss": 1.1052, "step": 9591 }, { "epoch": 7.960165975103735, "grad_norm": 15.751300811767578, "learning_rate": 1.681825726141079e-05, "loss": 1.3056, "step": 9592 }, { "epoch": 7.960995850622407, "grad_norm": 14.532295227050781, "learning_rate": 1.681792531120332e-05, "loss": 0.6842, "step": 9593 }, { "epoch": 7.961825726141079, "grad_norm": 16.11003303527832, "learning_rate": 1.681759336099585e-05, "loss": 0.6717, "step": 9594 }, { "epoch": 7.962655601659751, "grad_norm": 16.52042579650879, "learning_rate": 1.6817261410788383e-05, "loss": 1.1638, "step": 9595 }, { "epoch": 7.963485477178423, "grad_norm": 18.552085876464844, "learning_rate": 1.6816929460580915e-05, "loss": 1.2031, "step": 9596 }, { "epoch": 7.964315352697096, "grad_norm": 24.163957595825195, "learning_rate": 1.6816597510373444e-05, "loss": 1.5294, "step": 9597 }, { "epoch": 7.965145228215768, "grad_norm": 22.678192138671875, "learning_rate": 1.6816265560165976e-05, "loss": 1.3275, "step": 9598 }, { "epoch": 7.96597510373444, "grad_norm": 35.3399658203125, "learning_rate": 1.6815933609958508e-05, "loss": 1.9423, "step": 9599 }, { "epoch": 7.966804979253112, "grad_norm": 21.43244743347168, "learning_rate": 1.681560165975104e-05, "loss": 1.2895, "step": 9600 }, { "epoch": 7.967634854771784, "grad_norm": 21.516639709472656, "learning_rate": 1.681526970954357e-05, "loss": 0.8804, "step": 9601 }, { "epoch": 7.9684647302904565, "grad_norm": 17.059843063354492, "learning_rate": 1.68149377593361e-05, "loss": 0.5855, "step": 9602 }, { "epoch": 7.969294605809129, "grad_norm": 22.355100631713867, "learning_rate": 1.6814605809128633e-05, "loss": 0.8512, "step": 9603 }, { "epoch": 7.970124481327801, "grad_norm": 20.56038475036621, "learning_rate": 1.6814273858921165e-05, "loss": 1.6935, "step": 9604 }, { "epoch": 7.970954356846473, "grad_norm": 19.896543502807617, "learning_rate": 1.6813941908713694e-05, "loss": 0.7369, "step": 9605 }, { "epoch": 7.971784232365145, "grad_norm": 22.691015243530273, "learning_rate": 1.6813609958506226e-05, "loss": 1.3663, "step": 9606 }, { "epoch": 7.9726141078838175, "grad_norm": 13.8524169921875, "learning_rate": 1.6813278008298758e-05, "loss": 0.9139, "step": 9607 }, { "epoch": 7.97344398340249, "grad_norm": 27.70212745666504, "learning_rate": 1.6812946058091287e-05, "loss": 1.1572, "step": 9608 }, { "epoch": 7.974273858921162, "grad_norm": 34.41645812988281, "learning_rate": 1.681261410788382e-05, "loss": 1.5089, "step": 9609 }, { "epoch": 7.975103734439834, "grad_norm": 16.337507247924805, "learning_rate": 1.681228215767635e-05, "loss": 1.2443, "step": 9610 }, { "epoch": 7.975933609958506, "grad_norm": 22.20520782470703, "learning_rate": 1.681195020746888e-05, "loss": 1.4189, "step": 9611 }, { "epoch": 7.9767634854771785, "grad_norm": 25.14599609375, "learning_rate": 1.6811618257261412e-05, "loss": 1.1164, "step": 9612 }, { "epoch": 7.977593360995851, "grad_norm": 17.077674865722656, "learning_rate": 1.681128630705394e-05, "loss": 1.048, "step": 9613 }, { "epoch": 7.978423236514523, "grad_norm": 21.226865768432617, "learning_rate": 1.6810954356846473e-05, "loss": 1.1216, "step": 9614 }, { "epoch": 7.979253112033195, "grad_norm": 21.859355926513672, "learning_rate": 1.6810622406639005e-05, "loss": 1.3965, "step": 9615 }, { "epoch": 7.980082987551867, "grad_norm": 49.13561248779297, "learning_rate": 1.6810290456431537e-05, "loss": 1.0828, "step": 9616 }, { "epoch": 7.9809128630705395, "grad_norm": 18.556324005126953, "learning_rate": 1.6809958506224066e-05, "loss": 0.7955, "step": 9617 }, { "epoch": 7.981742738589212, "grad_norm": 19.586896896362305, "learning_rate": 1.6809626556016598e-05, "loss": 1.1339, "step": 9618 }, { "epoch": 7.982572614107884, "grad_norm": 44.736019134521484, "learning_rate": 1.680929460580913e-05, "loss": 1.8735, "step": 9619 }, { "epoch": 7.983402489626556, "grad_norm": 31.242969512939453, "learning_rate": 1.6808962655601662e-05, "loss": 1.5695, "step": 9620 }, { "epoch": 7.984232365145228, "grad_norm": 20.718656539916992, "learning_rate": 1.6808630705394194e-05, "loss": 0.9879, "step": 9621 }, { "epoch": 7.9850622406639005, "grad_norm": 19.26250648498535, "learning_rate": 1.6808298755186723e-05, "loss": 1.2468, "step": 9622 }, { "epoch": 7.985892116182573, "grad_norm": 19.100120544433594, "learning_rate": 1.6807966804979255e-05, "loss": 1.1597, "step": 9623 }, { "epoch": 7.986721991701245, "grad_norm": 11.892715454101562, "learning_rate": 1.6807634854771787e-05, "loss": 0.5115, "step": 9624 }, { "epoch": 7.987551867219917, "grad_norm": 16.66145133972168, "learning_rate": 1.680730290456432e-05, "loss": 0.9145, "step": 9625 }, { "epoch": 7.988381742738589, "grad_norm": 24.26997184753418, "learning_rate": 1.6806970954356848e-05, "loss": 1.4721, "step": 9626 }, { "epoch": 7.9892116182572614, "grad_norm": 25.687143325805664, "learning_rate": 1.680663900414938e-05, "loss": 0.9688, "step": 9627 }, { "epoch": 7.990041493775934, "grad_norm": 20.231401443481445, "learning_rate": 1.6806307053941912e-05, "loss": 1.6117, "step": 9628 }, { "epoch": 7.990871369294606, "grad_norm": 12.95556926727295, "learning_rate": 1.680597510373444e-05, "loss": 0.812, "step": 9629 }, { "epoch": 7.991701244813278, "grad_norm": 14.780879974365234, "learning_rate": 1.6805643153526973e-05, "loss": 1.0612, "step": 9630 }, { "epoch": 7.99253112033195, "grad_norm": 36.10615921020508, "learning_rate": 1.68053112033195e-05, "loss": 1.9261, "step": 9631 }, { "epoch": 7.993360995850622, "grad_norm": 25.03622817993164, "learning_rate": 1.6804979253112034e-05, "loss": 1.1303, "step": 9632 }, { "epoch": 7.994190871369295, "grad_norm": 18.289142608642578, "learning_rate": 1.6804647302904566e-05, "loss": 0.7068, "step": 9633 }, { "epoch": 7.995020746887967, "grad_norm": 20.336463928222656, "learning_rate": 1.6804315352697095e-05, "loss": 1.7755, "step": 9634 }, { "epoch": 7.995850622406639, "grad_norm": 36.66522979736328, "learning_rate": 1.6803983402489627e-05, "loss": 1.5839, "step": 9635 }, { "epoch": 7.996680497925311, "grad_norm": 24.27831268310547, "learning_rate": 1.680365145228216e-05, "loss": 1.5219, "step": 9636 }, { "epoch": 7.997510373443983, "grad_norm": 16.643850326538086, "learning_rate": 1.680331950207469e-05, "loss": 0.9684, "step": 9637 }, { "epoch": 7.998340248962656, "grad_norm": 14.022441864013672, "learning_rate": 1.680298755186722e-05, "loss": 0.5679, "step": 9638 }, { "epoch": 7.999170124481328, "grad_norm": 14.560790061950684, "learning_rate": 1.6802655601659752e-05, "loss": 1.2396, "step": 9639 }, { "epoch": 8.0, "grad_norm": 15.457647323608398, "learning_rate": 1.6802323651452284e-05, "loss": 0.7032, "step": 9640 }, { "epoch": 8.000829875518672, "grad_norm": 22.490367889404297, "learning_rate": 1.6801991701244816e-05, "loss": 0.989, "step": 9641 }, { "epoch": 8.001659751037344, "grad_norm": 18.128299713134766, "learning_rate": 1.6801659751037345e-05, "loss": 1.3985, "step": 9642 }, { "epoch": 8.002489626556017, "grad_norm": 13.029786109924316, "learning_rate": 1.6801327800829877e-05, "loss": 0.806, "step": 9643 }, { "epoch": 8.003319502074689, "grad_norm": 27.304706573486328, "learning_rate": 1.680099585062241e-05, "loss": 1.017, "step": 9644 }, { "epoch": 8.004149377593361, "grad_norm": 14.221088409423828, "learning_rate": 1.680066390041494e-05, "loss": 0.9306, "step": 9645 }, { "epoch": 8.004979253112033, "grad_norm": 15.198046684265137, "learning_rate": 1.680033195020747e-05, "loss": 0.7666, "step": 9646 }, { "epoch": 8.005809128630705, "grad_norm": 17.541959762573242, "learning_rate": 1.6800000000000002e-05, "loss": 1.3579, "step": 9647 }, { "epoch": 8.006639004149378, "grad_norm": 21.80292320251465, "learning_rate": 1.6799668049792534e-05, "loss": 0.8438, "step": 9648 }, { "epoch": 8.00746887966805, "grad_norm": 17.47593116760254, "learning_rate": 1.6799336099585063e-05, "loss": 1.099, "step": 9649 }, { "epoch": 8.008298755186722, "grad_norm": 13.293254852294922, "learning_rate": 1.6799004149377595e-05, "loss": 0.8947, "step": 9650 }, { "epoch": 8.009128630705394, "grad_norm": 14.907208442687988, "learning_rate": 1.6798672199170127e-05, "loss": 1.0258, "step": 9651 }, { "epoch": 8.009958506224066, "grad_norm": 16.561445236206055, "learning_rate": 1.6798340248962656e-05, "loss": 0.9202, "step": 9652 }, { "epoch": 8.010788381742739, "grad_norm": 18.793554306030273, "learning_rate": 1.6798008298755188e-05, "loss": 0.5749, "step": 9653 }, { "epoch": 8.01161825726141, "grad_norm": 50.38898849487305, "learning_rate": 1.679767634854772e-05, "loss": 1.0267, "step": 9654 }, { "epoch": 8.012448132780083, "grad_norm": 15.94698715209961, "learning_rate": 1.679734439834025e-05, "loss": 1.1511, "step": 9655 }, { "epoch": 8.013278008298755, "grad_norm": 19.82813262939453, "learning_rate": 1.679701244813278e-05, "loss": 0.6113, "step": 9656 }, { "epoch": 8.014107883817427, "grad_norm": 22.206754684448242, "learning_rate": 1.6796680497925313e-05, "loss": 1.0289, "step": 9657 }, { "epoch": 8.0149377593361, "grad_norm": 17.181148529052734, "learning_rate": 1.6796348547717845e-05, "loss": 0.878, "step": 9658 }, { "epoch": 8.015767634854772, "grad_norm": 17.78929328918457, "learning_rate": 1.6796016597510374e-05, "loss": 1.0202, "step": 9659 }, { "epoch": 8.016597510373444, "grad_norm": 24.49411964416504, "learning_rate": 1.6795684647302906e-05, "loss": 1.1677, "step": 9660 }, { "epoch": 8.017427385892116, "grad_norm": 27.731239318847656, "learning_rate": 1.6795352697095438e-05, "loss": 1.2719, "step": 9661 }, { "epoch": 8.018257261410788, "grad_norm": 48.657386779785156, "learning_rate": 1.679502074688797e-05, "loss": 1.0369, "step": 9662 }, { "epoch": 8.01908713692946, "grad_norm": 19.164710998535156, "learning_rate": 1.67946887966805e-05, "loss": 0.6229, "step": 9663 }, { "epoch": 8.019917012448133, "grad_norm": 21.33111000061035, "learning_rate": 1.679435684647303e-05, "loss": 1.1895, "step": 9664 }, { "epoch": 8.020746887966805, "grad_norm": 15.561609268188477, "learning_rate": 1.6794024896265563e-05, "loss": 0.7254, "step": 9665 }, { "epoch": 8.021576763485477, "grad_norm": 27.439123153686523, "learning_rate": 1.6793692946058095e-05, "loss": 0.8225, "step": 9666 }, { "epoch": 8.02240663900415, "grad_norm": 17.435949325561523, "learning_rate": 1.6793360995850624e-05, "loss": 0.7098, "step": 9667 }, { "epoch": 8.023236514522821, "grad_norm": 22.733570098876953, "learning_rate": 1.6793029045643156e-05, "loss": 1.1069, "step": 9668 }, { "epoch": 8.024066390041494, "grad_norm": 30.454681396484375, "learning_rate": 1.6792697095435684e-05, "loss": 1.5799, "step": 9669 }, { "epoch": 8.024896265560166, "grad_norm": 18.339338302612305, "learning_rate": 1.6792365145228217e-05, "loss": 0.9227, "step": 9670 }, { "epoch": 8.025726141078838, "grad_norm": 20.613346099853516, "learning_rate": 1.679203319502075e-05, "loss": 0.9275, "step": 9671 }, { "epoch": 8.02655601659751, "grad_norm": 20.6619873046875, "learning_rate": 1.6791701244813277e-05, "loss": 1.1611, "step": 9672 }, { "epoch": 8.027385892116182, "grad_norm": 25.14268684387207, "learning_rate": 1.679136929460581e-05, "loss": 1.5933, "step": 9673 }, { "epoch": 8.028215767634855, "grad_norm": 23.491121292114258, "learning_rate": 1.679103734439834e-05, "loss": 1.3645, "step": 9674 }, { "epoch": 8.029045643153527, "grad_norm": 12.659332275390625, "learning_rate": 1.6790705394190874e-05, "loss": 0.5653, "step": 9675 }, { "epoch": 8.029875518672199, "grad_norm": 14.541296005249023, "learning_rate": 1.6790373443983402e-05, "loss": 1.3094, "step": 9676 }, { "epoch": 8.030705394190871, "grad_norm": 19.56731414794922, "learning_rate": 1.6790041493775935e-05, "loss": 1.16, "step": 9677 }, { "epoch": 8.031535269709543, "grad_norm": 27.14425277709961, "learning_rate": 1.6789709543568467e-05, "loss": 1.1665, "step": 9678 }, { "epoch": 8.032365145228216, "grad_norm": 17.76999282836914, "learning_rate": 1.6789377593361e-05, "loss": 0.906, "step": 9679 }, { "epoch": 8.033195020746888, "grad_norm": 18.898422241210938, "learning_rate": 1.6789045643153527e-05, "loss": 0.8306, "step": 9680 }, { "epoch": 8.03402489626556, "grad_norm": 22.677875518798828, "learning_rate": 1.678871369294606e-05, "loss": 0.7351, "step": 9681 }, { "epoch": 8.034854771784232, "grad_norm": 17.81842803955078, "learning_rate": 1.678838174273859e-05, "loss": 0.9976, "step": 9682 }, { "epoch": 8.035684647302904, "grad_norm": 26.580717086791992, "learning_rate": 1.6788049792531124e-05, "loss": 1.071, "step": 9683 }, { "epoch": 8.036514522821577, "grad_norm": 26.151742935180664, "learning_rate": 1.6787717842323652e-05, "loss": 1.3021, "step": 9684 }, { "epoch": 8.037344398340249, "grad_norm": 17.615123748779297, "learning_rate": 1.6787385892116185e-05, "loss": 0.8359, "step": 9685 }, { "epoch": 8.038174273858921, "grad_norm": 25.939420700073242, "learning_rate": 1.6787053941908717e-05, "loss": 1.4159, "step": 9686 }, { "epoch": 8.039004149377593, "grad_norm": 17.130083084106445, "learning_rate": 1.6786721991701245e-05, "loss": 0.7576, "step": 9687 }, { "epoch": 8.039834024896265, "grad_norm": 26.99539566040039, "learning_rate": 1.6786390041493778e-05, "loss": 1.5654, "step": 9688 }, { "epoch": 8.040663900414938, "grad_norm": 34.48086929321289, "learning_rate": 1.678605809128631e-05, "loss": 1.6322, "step": 9689 }, { "epoch": 8.04149377593361, "grad_norm": 14.608858108520508, "learning_rate": 1.678572614107884e-05, "loss": 0.5802, "step": 9690 }, { "epoch": 8.042323651452282, "grad_norm": 27.838781356811523, "learning_rate": 1.678539419087137e-05, "loss": 0.7747, "step": 9691 }, { "epoch": 8.043153526970954, "grad_norm": 20.3165340423584, "learning_rate": 1.67850622406639e-05, "loss": 1.4715, "step": 9692 }, { "epoch": 8.043983402489626, "grad_norm": 22.200544357299805, "learning_rate": 1.678473029045643e-05, "loss": 1.4258, "step": 9693 }, { "epoch": 8.044813278008299, "grad_norm": 22.096038818359375, "learning_rate": 1.6784398340248963e-05, "loss": 0.7023, "step": 9694 }, { "epoch": 8.04564315352697, "grad_norm": 19.853303909301758, "learning_rate": 1.6784066390041496e-05, "loss": 0.6812, "step": 9695 }, { "epoch": 8.046473029045643, "grad_norm": 18.983610153198242, "learning_rate": 1.6783734439834024e-05, "loss": 1.0644, "step": 9696 }, { "epoch": 8.047302904564315, "grad_norm": 24.028812408447266, "learning_rate": 1.6783402489626556e-05, "loss": 1.3244, "step": 9697 }, { "epoch": 8.048132780082987, "grad_norm": 33.512062072753906, "learning_rate": 1.678307053941909e-05, "loss": 1.1214, "step": 9698 }, { "epoch": 8.04896265560166, "grad_norm": 21.431215286254883, "learning_rate": 1.678273858921162e-05, "loss": 0.9375, "step": 9699 }, { "epoch": 8.049792531120332, "grad_norm": 18.043603897094727, "learning_rate": 1.6782406639004153e-05, "loss": 1.0118, "step": 9700 }, { "epoch": 8.050622406639004, "grad_norm": 24.373559951782227, "learning_rate": 1.678207468879668e-05, "loss": 0.7031, "step": 9701 }, { "epoch": 8.051452282157676, "grad_norm": 25.99396514892578, "learning_rate": 1.6781742738589213e-05, "loss": 1.317, "step": 9702 }, { "epoch": 8.052282157676348, "grad_norm": 25.47635841369629, "learning_rate": 1.6781410788381746e-05, "loss": 0.847, "step": 9703 }, { "epoch": 8.05311203319502, "grad_norm": 32.51475524902344, "learning_rate": 1.6781078838174278e-05, "loss": 0.9483, "step": 9704 }, { "epoch": 8.053941908713693, "grad_norm": 17.49312400817871, "learning_rate": 1.6780746887966806e-05, "loss": 0.7488, "step": 9705 }, { "epoch": 8.054771784232365, "grad_norm": 22.164226531982422, "learning_rate": 1.678041493775934e-05, "loss": 1.4731, "step": 9706 }, { "epoch": 8.055601659751037, "grad_norm": 21.927005767822266, "learning_rate": 1.6780082987551867e-05, "loss": 1.1948, "step": 9707 }, { "epoch": 8.05643153526971, "grad_norm": 24.803442001342773, "learning_rate": 1.67797510373444e-05, "loss": 0.8816, "step": 9708 }, { "epoch": 8.057261410788382, "grad_norm": 26.188472747802734, "learning_rate": 1.677941908713693e-05, "loss": 1.1228, "step": 9709 }, { "epoch": 8.058091286307054, "grad_norm": 18.96687126159668, "learning_rate": 1.677908713692946e-05, "loss": 0.9912, "step": 9710 }, { "epoch": 8.058921161825726, "grad_norm": 20.4188232421875, "learning_rate": 1.6778755186721992e-05, "loss": 1.4008, "step": 9711 }, { "epoch": 8.059751037344398, "grad_norm": 29.2220401763916, "learning_rate": 1.6778423236514524e-05, "loss": 1.3553, "step": 9712 }, { "epoch": 8.06058091286307, "grad_norm": 35.28011703491211, "learning_rate": 1.6778091286307053e-05, "loss": 1.2757, "step": 9713 }, { "epoch": 8.061410788381743, "grad_norm": 18.030845642089844, "learning_rate": 1.6777759336099585e-05, "loss": 0.9467, "step": 9714 }, { "epoch": 8.062240663900415, "grad_norm": 15.92806625366211, "learning_rate": 1.6777427385892117e-05, "loss": 0.648, "step": 9715 }, { "epoch": 8.063070539419087, "grad_norm": 14.299060821533203, "learning_rate": 1.677709543568465e-05, "loss": 0.9317, "step": 9716 }, { "epoch": 8.063900414937759, "grad_norm": 21.748523712158203, "learning_rate": 1.6776763485477178e-05, "loss": 0.8214, "step": 9717 }, { "epoch": 8.064730290456431, "grad_norm": 18.748428344726562, "learning_rate": 1.677643153526971e-05, "loss": 1.1664, "step": 9718 }, { "epoch": 8.065560165975104, "grad_norm": 20.260814666748047, "learning_rate": 1.6776099585062242e-05, "loss": 0.805, "step": 9719 }, { "epoch": 8.066390041493776, "grad_norm": 18.543994903564453, "learning_rate": 1.6775767634854774e-05, "loss": 1.2821, "step": 9720 }, { "epoch": 8.067219917012448, "grad_norm": 12.568991661071777, "learning_rate": 1.6775435684647303e-05, "loss": 0.6686, "step": 9721 }, { "epoch": 8.06804979253112, "grad_norm": 15.678849220275879, "learning_rate": 1.6775103734439835e-05, "loss": 1.0917, "step": 9722 }, { "epoch": 8.068879668049792, "grad_norm": 23.39310646057129, "learning_rate": 1.6774771784232367e-05, "loss": 0.8851, "step": 9723 }, { "epoch": 8.069709543568464, "grad_norm": 22.32007598876953, "learning_rate": 1.67744398340249e-05, "loss": 0.8766, "step": 9724 }, { "epoch": 8.070539419087137, "grad_norm": 25.676454544067383, "learning_rate": 1.6774107883817428e-05, "loss": 1.4858, "step": 9725 }, { "epoch": 8.071369294605809, "grad_norm": 29.29620361328125, "learning_rate": 1.677377593360996e-05, "loss": 0.9808, "step": 9726 }, { "epoch": 8.072199170124481, "grad_norm": 16.156055450439453, "learning_rate": 1.6773443983402492e-05, "loss": 0.9312, "step": 9727 }, { "epoch": 8.073029045643153, "grad_norm": 21.21198272705078, "learning_rate": 1.677311203319502e-05, "loss": 1.0888, "step": 9728 }, { "epoch": 8.073858921161825, "grad_norm": 19.82461166381836, "learning_rate": 1.6772780082987553e-05, "loss": 1.258, "step": 9729 }, { "epoch": 8.074688796680498, "grad_norm": 15.815459251403809, "learning_rate": 1.6772448132780082e-05, "loss": 0.8858, "step": 9730 }, { "epoch": 8.07551867219917, "grad_norm": 31.397409439086914, "learning_rate": 1.6772116182572614e-05, "loss": 1.3296, "step": 9731 }, { "epoch": 8.076348547717842, "grad_norm": 20.11757469177246, "learning_rate": 1.6771784232365146e-05, "loss": 1.2404, "step": 9732 }, { "epoch": 8.077178423236514, "grad_norm": 54.87519836425781, "learning_rate": 1.677145228215768e-05, "loss": 1.038, "step": 9733 }, { "epoch": 8.078008298755186, "grad_norm": 25.834562301635742, "learning_rate": 1.6771120331950207e-05, "loss": 1.3132, "step": 9734 }, { "epoch": 8.078838174273859, "grad_norm": 25.455732345581055, "learning_rate": 1.677078838174274e-05, "loss": 0.9643, "step": 9735 }, { "epoch": 8.07966804979253, "grad_norm": 20.774290084838867, "learning_rate": 1.677045643153527e-05, "loss": 1.0744, "step": 9736 }, { "epoch": 8.080497925311203, "grad_norm": 24.8490047454834, "learning_rate": 1.6770124481327803e-05, "loss": 1.3389, "step": 9737 }, { "epoch": 8.081327800829875, "grad_norm": 34.41414260864258, "learning_rate": 1.6769792531120332e-05, "loss": 1.6358, "step": 9738 }, { "epoch": 8.082157676348547, "grad_norm": 43.67936706542969, "learning_rate": 1.6769460580912864e-05, "loss": 1.1181, "step": 9739 }, { "epoch": 8.08298755186722, "grad_norm": 28.906707763671875, "learning_rate": 1.6769128630705396e-05, "loss": 1.4971, "step": 9740 }, { "epoch": 8.083817427385892, "grad_norm": 39.2369270324707, "learning_rate": 1.676879668049793e-05, "loss": 1.279, "step": 9741 }, { "epoch": 8.084647302904564, "grad_norm": 16.100976943969727, "learning_rate": 1.6768464730290457e-05, "loss": 0.9386, "step": 9742 }, { "epoch": 8.085477178423236, "grad_norm": 22.341331481933594, "learning_rate": 1.676813278008299e-05, "loss": 1.2919, "step": 9743 }, { "epoch": 8.086307053941908, "grad_norm": 22.750343322753906, "learning_rate": 1.676780082987552e-05, "loss": 0.6086, "step": 9744 }, { "epoch": 8.08713692946058, "grad_norm": 16.597795486450195, "learning_rate": 1.6767468879668053e-05, "loss": 0.7602, "step": 9745 }, { "epoch": 8.087966804979253, "grad_norm": 25.209077835083008, "learning_rate": 1.6767136929460582e-05, "loss": 1.3953, "step": 9746 }, { "epoch": 8.088796680497925, "grad_norm": 18.752099990844727, "learning_rate": 1.6766804979253114e-05, "loss": 1.0484, "step": 9747 }, { "epoch": 8.089626556016597, "grad_norm": 24.797189712524414, "learning_rate": 1.6766473029045643e-05, "loss": 0.7051, "step": 9748 }, { "epoch": 8.09045643153527, "grad_norm": 15.065347671508789, "learning_rate": 1.6766141078838175e-05, "loss": 1.1067, "step": 9749 }, { "epoch": 8.091286307053942, "grad_norm": 29.92279624938965, "learning_rate": 1.6765809128630707e-05, "loss": 1.216, "step": 9750 }, { "epoch": 8.092116182572614, "grad_norm": 21.080852508544922, "learning_rate": 1.6765477178423236e-05, "loss": 0.7723, "step": 9751 }, { "epoch": 8.092946058091286, "grad_norm": 18.384199142456055, "learning_rate": 1.6765145228215768e-05, "loss": 0.6265, "step": 9752 }, { "epoch": 8.093775933609958, "grad_norm": 24.014251708984375, "learning_rate": 1.67648132780083e-05, "loss": 1.1325, "step": 9753 }, { "epoch": 8.09460580912863, "grad_norm": 19.372303009033203, "learning_rate": 1.6764481327800832e-05, "loss": 0.587, "step": 9754 }, { "epoch": 8.095435684647303, "grad_norm": 28.712553024291992, "learning_rate": 1.676414937759336e-05, "loss": 0.7343, "step": 9755 }, { "epoch": 8.096265560165975, "grad_norm": 31.40721321105957, "learning_rate": 1.6763817427385893e-05, "loss": 0.9651, "step": 9756 }, { "epoch": 8.097095435684647, "grad_norm": 18.336257934570312, "learning_rate": 1.6763485477178425e-05, "loss": 0.7334, "step": 9757 }, { "epoch": 8.09792531120332, "grad_norm": 19.299823760986328, "learning_rate": 1.6763153526970957e-05, "loss": 1.4589, "step": 9758 }, { "epoch": 8.098755186721991, "grad_norm": 24.305740356445312, "learning_rate": 1.6762821576763486e-05, "loss": 0.8094, "step": 9759 }, { "epoch": 8.099585062240664, "grad_norm": 46.52638244628906, "learning_rate": 1.6762489626556018e-05, "loss": 1.1369, "step": 9760 }, { "epoch": 8.100414937759336, "grad_norm": 23.203617095947266, "learning_rate": 1.676215767634855e-05, "loss": 0.8104, "step": 9761 }, { "epoch": 8.101244813278008, "grad_norm": 22.611671447753906, "learning_rate": 1.6761825726141082e-05, "loss": 0.7187, "step": 9762 }, { "epoch": 8.10207468879668, "grad_norm": 15.851513862609863, "learning_rate": 1.676149377593361e-05, "loss": 0.7221, "step": 9763 }, { "epoch": 8.102904564315352, "grad_norm": 27.8574161529541, "learning_rate": 1.6761161825726143e-05, "loss": 1.4994, "step": 9764 }, { "epoch": 8.103734439834025, "grad_norm": 19.850318908691406, "learning_rate": 1.6760829875518675e-05, "loss": 0.6121, "step": 9765 }, { "epoch": 8.104564315352697, "grad_norm": 43.24320983886719, "learning_rate": 1.6760497925311204e-05, "loss": 1.1982, "step": 9766 }, { "epoch": 8.105394190871369, "grad_norm": 17.910051345825195, "learning_rate": 1.6760165975103736e-05, "loss": 0.6325, "step": 9767 }, { "epoch": 8.106224066390041, "grad_norm": 12.917659759521484, "learning_rate": 1.6759834024896268e-05, "loss": 0.6081, "step": 9768 }, { "epoch": 8.107053941908713, "grad_norm": 23.168701171875, "learning_rate": 1.6759502074688797e-05, "loss": 1.5043, "step": 9769 }, { "epoch": 8.107883817427386, "grad_norm": 26.433813095092773, "learning_rate": 1.675917012448133e-05, "loss": 1.8469, "step": 9770 }, { "epoch": 8.108713692946058, "grad_norm": 18.008710861206055, "learning_rate": 1.6758838174273858e-05, "loss": 0.7226, "step": 9771 }, { "epoch": 8.10954356846473, "grad_norm": 19.660324096679688, "learning_rate": 1.675850622406639e-05, "loss": 1.3769, "step": 9772 }, { "epoch": 8.110373443983402, "grad_norm": 51.3643913269043, "learning_rate": 1.6758174273858922e-05, "loss": 1.0033, "step": 9773 }, { "epoch": 8.111203319502074, "grad_norm": 21.85709571838379, "learning_rate": 1.6757842323651454e-05, "loss": 0.9878, "step": 9774 }, { "epoch": 8.112033195020746, "grad_norm": 19.275390625, "learning_rate": 1.6757510373443983e-05, "loss": 1.0262, "step": 9775 }, { "epoch": 8.112863070539419, "grad_norm": 17.658706665039062, "learning_rate": 1.6757178423236515e-05, "loss": 0.8946, "step": 9776 }, { "epoch": 8.11369294605809, "grad_norm": 26.43161964416504, "learning_rate": 1.6756846473029047e-05, "loss": 0.9649, "step": 9777 }, { "epoch": 8.114522821576763, "grad_norm": 25.656797409057617, "learning_rate": 1.675651452282158e-05, "loss": 1.3344, "step": 9778 }, { "epoch": 8.115352697095435, "grad_norm": 18.26152801513672, "learning_rate": 1.675618257261411e-05, "loss": 0.8376, "step": 9779 }, { "epoch": 8.116182572614107, "grad_norm": 36.1759147644043, "learning_rate": 1.675585062240664e-05, "loss": 1.7533, "step": 9780 }, { "epoch": 8.11701244813278, "grad_norm": 35.94108963012695, "learning_rate": 1.6755518672199172e-05, "loss": 1.4048, "step": 9781 }, { "epoch": 8.117842323651452, "grad_norm": 28.384580612182617, "learning_rate": 1.6755186721991704e-05, "loss": 1.1118, "step": 9782 }, { "epoch": 8.118672199170124, "grad_norm": 25.550121307373047, "learning_rate": 1.6754854771784236e-05, "loss": 1.0003, "step": 9783 }, { "epoch": 8.119502074688796, "grad_norm": 22.34051513671875, "learning_rate": 1.6754522821576765e-05, "loss": 0.5695, "step": 9784 }, { "epoch": 8.120331950207468, "grad_norm": 19.99076271057129, "learning_rate": 1.6754190871369297e-05, "loss": 0.7523, "step": 9785 }, { "epoch": 8.12116182572614, "grad_norm": 20.642805099487305, "learning_rate": 1.6753858921161826e-05, "loss": 0.9397, "step": 9786 }, { "epoch": 8.121991701244813, "grad_norm": 41.12547302246094, "learning_rate": 1.6753526970954358e-05, "loss": 1.4714, "step": 9787 }, { "epoch": 8.122821576763485, "grad_norm": 14.341169357299805, "learning_rate": 1.675319502074689e-05, "loss": 0.3557, "step": 9788 }, { "epoch": 8.123651452282157, "grad_norm": 19.027423858642578, "learning_rate": 1.675286307053942e-05, "loss": 1.203, "step": 9789 }, { "epoch": 8.12448132780083, "grad_norm": 14.632040023803711, "learning_rate": 1.675253112033195e-05, "loss": 0.5576, "step": 9790 }, { "epoch": 8.125311203319502, "grad_norm": 23.715560913085938, "learning_rate": 1.6752199170124483e-05, "loss": 0.742, "step": 9791 }, { "epoch": 8.126141078838174, "grad_norm": 16.824737548828125, "learning_rate": 1.6751867219917012e-05, "loss": 1.1862, "step": 9792 }, { "epoch": 8.126970954356846, "grad_norm": 28.307767868041992, "learning_rate": 1.6751535269709544e-05, "loss": 1.5877, "step": 9793 }, { "epoch": 8.127800829875518, "grad_norm": 29.37318229675293, "learning_rate": 1.6751203319502076e-05, "loss": 0.7476, "step": 9794 }, { "epoch": 8.12863070539419, "grad_norm": 23.646568298339844, "learning_rate": 1.6750871369294608e-05, "loss": 0.9705, "step": 9795 }, { "epoch": 8.129460580912863, "grad_norm": 15.23442554473877, "learning_rate": 1.6750539419087137e-05, "loss": 0.5955, "step": 9796 }, { "epoch": 8.130290456431535, "grad_norm": 22.288984298706055, "learning_rate": 1.675020746887967e-05, "loss": 0.9762, "step": 9797 }, { "epoch": 8.131120331950207, "grad_norm": 30.37569808959961, "learning_rate": 1.67498755186722e-05, "loss": 1.3594, "step": 9798 }, { "epoch": 8.13195020746888, "grad_norm": 55.61834716796875, "learning_rate": 1.6749543568464733e-05, "loss": 1.8819, "step": 9799 }, { "epoch": 8.132780082987551, "grad_norm": 21.83563995361328, "learning_rate": 1.6749211618257262e-05, "loss": 0.9605, "step": 9800 }, { "epoch": 8.133609958506224, "grad_norm": 33.353267669677734, "learning_rate": 1.6748879668049794e-05, "loss": 0.8903, "step": 9801 }, { "epoch": 8.134439834024896, "grad_norm": 24.604782104492188, "learning_rate": 1.6748547717842326e-05, "loss": 0.8066, "step": 9802 }, { "epoch": 8.135269709543568, "grad_norm": 26.274763107299805, "learning_rate": 1.6748215767634858e-05, "loss": 1.648, "step": 9803 }, { "epoch": 8.13609958506224, "grad_norm": 32.0426025390625, "learning_rate": 1.6747883817427387e-05, "loss": 1.593, "step": 9804 }, { "epoch": 8.136929460580912, "grad_norm": 29.137332916259766, "learning_rate": 1.674755186721992e-05, "loss": 1.3441, "step": 9805 }, { "epoch": 8.137759336099585, "grad_norm": 20.532989501953125, "learning_rate": 1.674721991701245e-05, "loss": 1.0174, "step": 9806 }, { "epoch": 8.138589211618257, "grad_norm": 29.1634521484375, "learning_rate": 1.674688796680498e-05, "loss": 0.989, "step": 9807 }, { "epoch": 8.139419087136929, "grad_norm": 18.873849868774414, "learning_rate": 1.6746556016597512e-05, "loss": 0.7233, "step": 9808 }, { "epoch": 8.140248962655601, "grad_norm": 36.35085678100586, "learning_rate": 1.674622406639004e-05, "loss": 0.8725, "step": 9809 }, { "epoch": 8.141078838174273, "grad_norm": 26.329418182373047, "learning_rate": 1.6745892116182573e-05, "loss": 1.4201, "step": 9810 }, { "epoch": 8.141908713692946, "grad_norm": 18.697383880615234, "learning_rate": 1.6745560165975105e-05, "loss": 0.7768, "step": 9811 }, { "epoch": 8.142738589211618, "grad_norm": 30.346708297729492, "learning_rate": 1.6745228215767637e-05, "loss": 1.2813, "step": 9812 }, { "epoch": 8.14356846473029, "grad_norm": 14.788519859313965, "learning_rate": 1.6744896265560166e-05, "loss": 0.8609, "step": 9813 }, { "epoch": 8.144398340248962, "grad_norm": 23.875102996826172, "learning_rate": 1.6744564315352698e-05, "loss": 1.0451, "step": 9814 }, { "epoch": 8.145228215767634, "grad_norm": 19.057355880737305, "learning_rate": 1.674423236514523e-05, "loss": 0.6441, "step": 9815 }, { "epoch": 8.146058091286307, "grad_norm": 21.64516830444336, "learning_rate": 1.6743900414937762e-05, "loss": 1.2571, "step": 9816 }, { "epoch": 8.146887966804979, "grad_norm": 25.65896224975586, "learning_rate": 1.674356846473029e-05, "loss": 1.0087, "step": 9817 }, { "epoch": 8.147717842323651, "grad_norm": 24.85023307800293, "learning_rate": 1.6743236514522823e-05, "loss": 0.8469, "step": 9818 }, { "epoch": 8.148547717842323, "grad_norm": 29.87720489501953, "learning_rate": 1.6742904564315355e-05, "loss": 0.9984, "step": 9819 }, { "epoch": 8.149377593360995, "grad_norm": 36.96342849731445, "learning_rate": 1.6742572614107887e-05, "loss": 1.2641, "step": 9820 }, { "epoch": 8.150207468879668, "grad_norm": 29.022722244262695, "learning_rate": 1.6742240663900416e-05, "loss": 1.6187, "step": 9821 }, { "epoch": 8.15103734439834, "grad_norm": 53.46784210205078, "learning_rate": 1.6741908713692948e-05, "loss": 1.4392, "step": 9822 }, { "epoch": 8.151867219917012, "grad_norm": 44.40809631347656, "learning_rate": 1.674157676348548e-05, "loss": 1.4182, "step": 9823 }, { "epoch": 8.152697095435684, "grad_norm": 118.01042938232422, "learning_rate": 1.674124481327801e-05, "loss": 1.1719, "step": 9824 }, { "epoch": 8.153526970954356, "grad_norm": 56.172481536865234, "learning_rate": 1.674091286307054e-05, "loss": 2.1864, "step": 9825 }, { "epoch": 8.154356846473028, "grad_norm": 15.72287368774414, "learning_rate": 1.6740580912863073e-05, "loss": 1.13, "step": 9826 }, { "epoch": 8.1551867219917, "grad_norm": 26.63388442993164, "learning_rate": 1.67402489626556e-05, "loss": 1.4247, "step": 9827 }, { "epoch": 8.156016597510373, "grad_norm": 30.34623146057129, "learning_rate": 1.6739917012448134e-05, "loss": 1.4687, "step": 9828 }, { "epoch": 8.156846473029045, "grad_norm": 22.41777992248535, "learning_rate": 1.6739585062240666e-05, "loss": 0.9741, "step": 9829 }, { "epoch": 8.157676348547717, "grad_norm": 18.944795608520508, "learning_rate": 1.6739253112033195e-05, "loss": 0.8709, "step": 9830 }, { "epoch": 8.15850622406639, "grad_norm": 19.622642517089844, "learning_rate": 1.6738921161825727e-05, "loss": 0.7681, "step": 9831 }, { "epoch": 8.159336099585062, "grad_norm": 27.31252098083496, "learning_rate": 1.673858921161826e-05, "loss": 1.4694, "step": 9832 }, { "epoch": 8.160165975103734, "grad_norm": 32.074737548828125, "learning_rate": 1.673825726141079e-05, "loss": 0.8867, "step": 9833 }, { "epoch": 8.160995850622406, "grad_norm": 19.112590789794922, "learning_rate": 1.673792531120332e-05, "loss": 0.8234, "step": 9834 }, { "epoch": 8.161825726141078, "grad_norm": 31.703847885131836, "learning_rate": 1.673759336099585e-05, "loss": 1.4538, "step": 9835 }, { "epoch": 8.16265560165975, "grad_norm": 26.724334716796875, "learning_rate": 1.6737261410788384e-05, "loss": 1.6848, "step": 9836 }, { "epoch": 8.163485477178423, "grad_norm": 24.561161041259766, "learning_rate": 1.6736929460580916e-05, "loss": 1.5597, "step": 9837 }, { "epoch": 8.164315352697095, "grad_norm": 35.29240798950195, "learning_rate": 1.6736597510373445e-05, "loss": 1.3505, "step": 9838 }, { "epoch": 8.165145228215767, "grad_norm": 45.32014083862305, "learning_rate": 1.6736265560165977e-05, "loss": 1.5815, "step": 9839 }, { "epoch": 8.16597510373444, "grad_norm": 24.075782775878906, "learning_rate": 1.673593360995851e-05, "loss": 1.5287, "step": 9840 }, { "epoch": 8.166804979253111, "grad_norm": 33.57357406616211, "learning_rate": 1.673560165975104e-05, "loss": 1.536, "step": 9841 }, { "epoch": 8.167634854771784, "grad_norm": 19.638206481933594, "learning_rate": 1.673526970954357e-05, "loss": 1.2315, "step": 9842 }, { "epoch": 8.168464730290456, "grad_norm": 49.159664154052734, "learning_rate": 1.6734937759336102e-05, "loss": 1.1117, "step": 9843 }, { "epoch": 8.169294605809128, "grad_norm": 19.393890380859375, "learning_rate": 1.6734605809128634e-05, "loss": 1.3899, "step": 9844 }, { "epoch": 8.1701244813278, "grad_norm": 20.68104362487793, "learning_rate": 1.6734273858921163e-05, "loss": 1.2254, "step": 9845 }, { "epoch": 8.170954356846472, "grad_norm": 23.632883071899414, "learning_rate": 1.6733941908713695e-05, "loss": 0.9488, "step": 9846 }, { "epoch": 8.171784232365145, "grad_norm": 26.495328903198242, "learning_rate": 1.6733609958506223e-05, "loss": 1.2211, "step": 9847 }, { "epoch": 8.172614107883817, "grad_norm": 28.657241821289062, "learning_rate": 1.6733278008298756e-05, "loss": 1.061, "step": 9848 }, { "epoch": 8.173443983402489, "grad_norm": 16.202571868896484, "learning_rate": 1.6732946058091288e-05, "loss": 1.1202, "step": 9849 }, { "epoch": 8.174273858921161, "grad_norm": 13.95235538482666, "learning_rate": 1.6732614107883816e-05, "loss": 0.7233, "step": 9850 }, { "epoch": 8.175103734439833, "grad_norm": 43.004005432128906, "learning_rate": 1.673228215767635e-05, "loss": 1.5478, "step": 9851 }, { "epoch": 8.175933609958506, "grad_norm": 21.890504837036133, "learning_rate": 1.673195020746888e-05, "loss": 1.0486, "step": 9852 }, { "epoch": 8.176763485477178, "grad_norm": 18.356760025024414, "learning_rate": 1.6731618257261413e-05, "loss": 0.5653, "step": 9853 }, { "epoch": 8.17759336099585, "grad_norm": 17.452178955078125, "learning_rate": 1.673128630705394e-05, "loss": 0.7977, "step": 9854 }, { "epoch": 8.178423236514522, "grad_norm": 19.24608612060547, "learning_rate": 1.6730954356846473e-05, "loss": 1.224, "step": 9855 }, { "epoch": 8.179253112033194, "grad_norm": 13.383967399597168, "learning_rate": 1.6730622406639006e-05, "loss": 0.618, "step": 9856 }, { "epoch": 8.180082987551867, "grad_norm": 21.67059326171875, "learning_rate": 1.6730290456431538e-05, "loss": 1.1727, "step": 9857 }, { "epoch": 8.180912863070539, "grad_norm": 18.7393798828125, "learning_rate": 1.672995850622407e-05, "loss": 1.3755, "step": 9858 }, { "epoch": 8.181742738589211, "grad_norm": 25.739938735961914, "learning_rate": 1.67296265560166e-05, "loss": 1.5498, "step": 9859 }, { "epoch": 8.182572614107883, "grad_norm": 30.225854873657227, "learning_rate": 1.672929460580913e-05, "loss": 0.9744, "step": 9860 }, { "epoch": 8.183402489626555, "grad_norm": 20.949234008789062, "learning_rate": 1.6728962655601663e-05, "loss": 1.3506, "step": 9861 }, { "epoch": 8.184232365145228, "grad_norm": 18.68138885498047, "learning_rate": 1.6728630705394195e-05, "loss": 1.3819, "step": 9862 }, { "epoch": 8.1850622406639, "grad_norm": 34.762699127197266, "learning_rate": 1.6728298755186724e-05, "loss": 1.2148, "step": 9863 }, { "epoch": 8.185892116182572, "grad_norm": 19.20785140991211, "learning_rate": 1.6727966804979256e-05, "loss": 0.6819, "step": 9864 }, { "epoch": 8.186721991701244, "grad_norm": 37.6182975769043, "learning_rate": 1.6727634854771784e-05, "loss": 0.7657, "step": 9865 }, { "epoch": 8.187551867219916, "grad_norm": 16.07218360900879, "learning_rate": 1.6727302904564317e-05, "loss": 1.141, "step": 9866 }, { "epoch": 8.188381742738589, "grad_norm": 20.59942626953125, "learning_rate": 1.672697095435685e-05, "loss": 1.1029, "step": 9867 }, { "epoch": 8.18921161825726, "grad_norm": 30.210805892944336, "learning_rate": 1.6726639004149377e-05, "loss": 1.3213, "step": 9868 }, { "epoch": 8.190041493775933, "grad_norm": 25.91155242919922, "learning_rate": 1.672630705394191e-05, "loss": 1.0636, "step": 9869 }, { "epoch": 8.190871369294605, "grad_norm": 36.934078216552734, "learning_rate": 1.672597510373444e-05, "loss": 1.3184, "step": 9870 }, { "epoch": 8.191701244813277, "grad_norm": 25.176177978515625, "learning_rate": 1.672564315352697e-05, "loss": 1.3503, "step": 9871 }, { "epoch": 8.19253112033195, "grad_norm": 20.360090255737305, "learning_rate": 1.6725311203319502e-05, "loss": 1.1837, "step": 9872 }, { "epoch": 8.193360995850622, "grad_norm": 22.498655319213867, "learning_rate": 1.6724979253112034e-05, "loss": 1.159, "step": 9873 }, { "epoch": 8.194190871369294, "grad_norm": 15.922652244567871, "learning_rate": 1.6724647302904567e-05, "loss": 0.7601, "step": 9874 }, { "epoch": 8.195020746887966, "grad_norm": 20.496057510375977, "learning_rate": 1.6724315352697095e-05, "loss": 1.0431, "step": 9875 }, { "epoch": 8.195850622406638, "grad_norm": 70.65092468261719, "learning_rate": 1.6723983402489627e-05, "loss": 0.932, "step": 9876 }, { "epoch": 8.19668049792531, "grad_norm": 40.71002197265625, "learning_rate": 1.672365145228216e-05, "loss": 1.4897, "step": 9877 }, { "epoch": 8.197510373443983, "grad_norm": 18.75072479248047, "learning_rate": 1.672331950207469e-05, "loss": 0.6152, "step": 9878 }, { "epoch": 8.198340248962655, "grad_norm": 21.779685974121094, "learning_rate": 1.672298755186722e-05, "loss": 0.6331, "step": 9879 }, { "epoch": 8.199170124481327, "grad_norm": 36.68938064575195, "learning_rate": 1.6722655601659752e-05, "loss": 1.1486, "step": 9880 }, { "epoch": 8.2, "grad_norm": 15.81293773651123, "learning_rate": 1.6722323651452285e-05, "loss": 1.0488, "step": 9881 }, { "epoch": 8.200829875518671, "grad_norm": 29.956558227539062, "learning_rate": 1.6721991701244817e-05, "loss": 0.3972, "step": 9882 }, { "epoch": 8.201659751037344, "grad_norm": 14.293149948120117, "learning_rate": 1.6721659751037345e-05, "loss": 1.131, "step": 9883 }, { "epoch": 8.202489626556016, "grad_norm": 18.82676124572754, "learning_rate": 1.6721327800829878e-05, "loss": 1.1773, "step": 9884 }, { "epoch": 8.203319502074688, "grad_norm": 24.68656349182129, "learning_rate": 1.672099585062241e-05, "loss": 1.1004, "step": 9885 }, { "epoch": 8.20414937759336, "grad_norm": 34.04242706298828, "learning_rate": 1.672066390041494e-05, "loss": 0.6957, "step": 9886 }, { "epoch": 8.204979253112032, "grad_norm": 25.566356658935547, "learning_rate": 1.672033195020747e-05, "loss": 1.4111, "step": 9887 }, { "epoch": 8.205809128630705, "grad_norm": 48.00962829589844, "learning_rate": 1.672e-05, "loss": 1.4482, "step": 9888 }, { "epoch": 8.206639004149377, "grad_norm": 24.07982063293457, "learning_rate": 1.671966804979253e-05, "loss": 1.0994, "step": 9889 }, { "epoch": 8.207468879668049, "grad_norm": 30.587026596069336, "learning_rate": 1.6719336099585063e-05, "loss": 1.1574, "step": 9890 }, { "epoch": 8.208298755186721, "grad_norm": 18.970373153686523, "learning_rate": 1.6719004149377595e-05, "loss": 0.8007, "step": 9891 }, { "epoch": 8.209128630705393, "grad_norm": 19.509300231933594, "learning_rate": 1.6718672199170124e-05, "loss": 1.5432, "step": 9892 }, { "epoch": 8.209958506224066, "grad_norm": 19.537260055541992, "learning_rate": 1.6718340248962656e-05, "loss": 1.0565, "step": 9893 }, { "epoch": 8.210788381742738, "grad_norm": 20.07831573486328, "learning_rate": 1.671800829875519e-05, "loss": 0.8293, "step": 9894 }, { "epoch": 8.21161825726141, "grad_norm": 18.170225143432617, "learning_rate": 1.671767634854772e-05, "loss": 0.8539, "step": 9895 }, { "epoch": 8.212448132780082, "grad_norm": 31.19930076599121, "learning_rate": 1.671734439834025e-05, "loss": 0.98, "step": 9896 }, { "epoch": 8.213278008298754, "grad_norm": 62.418094635009766, "learning_rate": 1.671701244813278e-05, "loss": 0.8134, "step": 9897 }, { "epoch": 8.214107883817427, "grad_norm": 18.592063903808594, "learning_rate": 1.6716680497925313e-05, "loss": 1.1359, "step": 9898 }, { "epoch": 8.214937759336099, "grad_norm": 25.51177406311035, "learning_rate": 1.6716348547717846e-05, "loss": 0.5397, "step": 9899 }, { "epoch": 8.215767634854771, "grad_norm": 19.141977310180664, "learning_rate": 1.6716016597510374e-05, "loss": 0.8592, "step": 9900 }, { "epoch": 8.216597510373443, "grad_norm": 17.118484497070312, "learning_rate": 1.6715684647302906e-05, "loss": 0.761, "step": 9901 }, { "epoch": 8.217427385892115, "grad_norm": 28.33919334411621, "learning_rate": 1.671535269709544e-05, "loss": 1.4221, "step": 9902 }, { "epoch": 8.218257261410788, "grad_norm": 25.00804328918457, "learning_rate": 1.6715020746887967e-05, "loss": 1.5409, "step": 9903 }, { "epoch": 8.21908713692946, "grad_norm": 31.400890350341797, "learning_rate": 1.67146887966805e-05, "loss": 1.5962, "step": 9904 }, { "epoch": 8.219917012448132, "grad_norm": 25.296096801757812, "learning_rate": 1.671435684647303e-05, "loss": 1.2404, "step": 9905 }, { "epoch": 8.220746887966804, "grad_norm": 22.962604522705078, "learning_rate": 1.671402489626556e-05, "loss": 0.9345, "step": 9906 }, { "epoch": 8.221576763485476, "grad_norm": 26.09844207763672, "learning_rate": 1.6713692946058092e-05, "loss": 1.2851, "step": 9907 }, { "epoch": 8.222406639004149, "grad_norm": 25.078590393066406, "learning_rate": 1.6713360995850624e-05, "loss": 0.7672, "step": 9908 }, { "epoch": 8.22323651452282, "grad_norm": 22.397615432739258, "learning_rate": 1.6713029045643153e-05, "loss": 1.0084, "step": 9909 }, { "epoch": 8.224066390041493, "grad_norm": 21.843429565429688, "learning_rate": 1.6712697095435685e-05, "loss": 1.1106, "step": 9910 }, { "epoch": 8.224896265560165, "grad_norm": 29.153074264526367, "learning_rate": 1.6712365145228217e-05, "loss": 0.8453, "step": 9911 }, { "epoch": 8.225726141078837, "grad_norm": 34.26409149169922, "learning_rate": 1.671203319502075e-05, "loss": 0.9234, "step": 9912 }, { "epoch": 8.22655601659751, "grad_norm": 30.034011840820312, "learning_rate": 1.6711701244813278e-05, "loss": 1.2474, "step": 9913 }, { "epoch": 8.227385892116182, "grad_norm": 29.90315055847168, "learning_rate": 1.671136929460581e-05, "loss": 1.1319, "step": 9914 }, { "epoch": 8.228215767634854, "grad_norm": 26.469894409179688, "learning_rate": 1.6711037344398342e-05, "loss": 1.2734, "step": 9915 }, { "epoch": 8.229045643153526, "grad_norm": 35.99635696411133, "learning_rate": 1.6710705394190874e-05, "loss": 1.3142, "step": 9916 }, { "epoch": 8.229875518672198, "grad_norm": 29.710756301879883, "learning_rate": 1.6710373443983403e-05, "loss": 1.1826, "step": 9917 }, { "epoch": 8.23070539419087, "grad_norm": 27.61435890197754, "learning_rate": 1.6710041493775935e-05, "loss": 1.0087, "step": 9918 }, { "epoch": 8.231535269709543, "grad_norm": 43.036128997802734, "learning_rate": 1.6709709543568467e-05, "loss": 1.539, "step": 9919 }, { "epoch": 8.232365145228215, "grad_norm": 54.15658187866211, "learning_rate": 1.6709377593361e-05, "loss": 0.5041, "step": 9920 }, { "epoch": 8.233195020746887, "grad_norm": 16.706567764282227, "learning_rate": 1.6709045643153528e-05, "loss": 1.0318, "step": 9921 }, { "epoch": 8.23402489626556, "grad_norm": 23.41126823425293, "learning_rate": 1.670871369294606e-05, "loss": 1.1082, "step": 9922 }, { "epoch": 8.234854771784232, "grad_norm": 19.17750358581543, "learning_rate": 1.6708381742738592e-05, "loss": 0.7511, "step": 9923 }, { "epoch": 8.235684647302904, "grad_norm": 49.964107513427734, "learning_rate": 1.670804979253112e-05, "loss": 1.8498, "step": 9924 }, { "epoch": 8.236514522821576, "grad_norm": 22.997201919555664, "learning_rate": 1.6707717842323653e-05, "loss": 0.9678, "step": 9925 }, { "epoch": 8.237344398340248, "grad_norm": 53.206825256347656, "learning_rate": 1.6707385892116182e-05, "loss": 1.2838, "step": 9926 }, { "epoch": 8.23817427385892, "grad_norm": 22.86639404296875, "learning_rate": 1.6707053941908714e-05, "loss": 1.1349, "step": 9927 }, { "epoch": 8.239004149377593, "grad_norm": 19.8159122467041, "learning_rate": 1.6706721991701246e-05, "loss": 0.9095, "step": 9928 }, { "epoch": 8.239834024896265, "grad_norm": 31.579530715942383, "learning_rate": 1.6706390041493775e-05, "loss": 1.4173, "step": 9929 }, { "epoch": 8.240663900414937, "grad_norm": 26.115188598632812, "learning_rate": 1.6706058091286307e-05, "loss": 1.2174, "step": 9930 }, { "epoch": 8.241493775933609, "grad_norm": 26.860239028930664, "learning_rate": 1.670572614107884e-05, "loss": 1.0937, "step": 9931 }, { "epoch": 8.242323651452281, "grad_norm": 38.12687301635742, "learning_rate": 1.670539419087137e-05, "loss": 1.6204, "step": 9932 }, { "epoch": 8.243153526970953, "grad_norm": 20.067014694213867, "learning_rate": 1.67050622406639e-05, "loss": 1.4904, "step": 9933 }, { "epoch": 8.243983402489626, "grad_norm": 50.786415100097656, "learning_rate": 1.6704730290456432e-05, "loss": 1.1391, "step": 9934 }, { "epoch": 8.244813278008298, "grad_norm": 26.77237319946289, "learning_rate": 1.6704398340248964e-05, "loss": 0.9021, "step": 9935 }, { "epoch": 8.24564315352697, "grad_norm": 16.41427993774414, "learning_rate": 1.6704066390041496e-05, "loss": 0.9709, "step": 9936 }, { "epoch": 8.246473029045642, "grad_norm": 14.299859046936035, "learning_rate": 1.670373443983403e-05, "loss": 0.7079, "step": 9937 }, { "epoch": 8.247302904564314, "grad_norm": 16.346284866333008, "learning_rate": 1.6703402489626557e-05, "loss": 0.848, "step": 9938 }, { "epoch": 8.248132780082987, "grad_norm": 36.09280014038086, "learning_rate": 1.670307053941909e-05, "loss": 1.5909, "step": 9939 }, { "epoch": 8.248962655601659, "grad_norm": 13.758082389831543, "learning_rate": 1.670273858921162e-05, "loss": 0.6047, "step": 9940 }, { "epoch": 8.249792531120331, "grad_norm": 28.443994522094727, "learning_rate": 1.670240663900415e-05, "loss": 1.1187, "step": 9941 }, { "epoch": 8.250622406639003, "grad_norm": 15.856849670410156, "learning_rate": 1.6702074688796682e-05, "loss": 0.7181, "step": 9942 }, { "epoch": 8.251452282157675, "grad_norm": 17.95823097229004, "learning_rate": 1.6701742738589214e-05, "loss": 0.7839, "step": 9943 }, { "epoch": 8.252282157676348, "grad_norm": 17.411718368530273, "learning_rate": 1.6701410788381743e-05, "loss": 0.9749, "step": 9944 }, { "epoch": 8.25311203319502, "grad_norm": 33.74513626098633, "learning_rate": 1.6701078838174275e-05, "loss": 1.4637, "step": 9945 }, { "epoch": 8.253941908713692, "grad_norm": 99.09603118896484, "learning_rate": 1.6700746887966807e-05, "loss": 1.3497, "step": 9946 }, { "epoch": 8.254771784232364, "grad_norm": 27.018888473510742, "learning_rate": 1.6700414937759336e-05, "loss": 1.4524, "step": 9947 }, { "epoch": 8.255601659751036, "grad_norm": 23.288089752197266, "learning_rate": 1.6700082987551868e-05, "loss": 0.842, "step": 9948 }, { "epoch": 8.256431535269709, "grad_norm": 24.44281768798828, "learning_rate": 1.66997510373444e-05, "loss": 1.4122, "step": 9949 }, { "epoch": 8.25726141078838, "grad_norm": 28.881397247314453, "learning_rate": 1.669941908713693e-05, "loss": 0.6661, "step": 9950 }, { "epoch": 8.258091286307055, "grad_norm": 28.235675811767578, "learning_rate": 1.669908713692946e-05, "loss": 1.5703, "step": 9951 }, { "epoch": 8.258921161825727, "grad_norm": 23.146718978881836, "learning_rate": 1.6698755186721993e-05, "loss": 1.3857, "step": 9952 }, { "epoch": 8.2597510373444, "grad_norm": 22.579383850097656, "learning_rate": 1.6698423236514525e-05, "loss": 1.0688, "step": 9953 }, { "epoch": 8.260580912863071, "grad_norm": 32.255218505859375, "learning_rate": 1.6698091286307054e-05, "loss": 1.8245, "step": 9954 }, { "epoch": 8.261410788381744, "grad_norm": 36.74323654174805, "learning_rate": 1.6697759336099586e-05, "loss": 1.054, "step": 9955 }, { "epoch": 8.262240663900416, "grad_norm": 24.00006675720215, "learning_rate": 1.6697427385892118e-05, "loss": 1.5049, "step": 9956 }, { "epoch": 8.263070539419088, "grad_norm": 31.855649948120117, "learning_rate": 1.669709543568465e-05, "loss": 1.3803, "step": 9957 }, { "epoch": 8.26390041493776, "grad_norm": 23.743301391601562, "learning_rate": 1.669676348547718e-05, "loss": 1.402, "step": 9958 }, { "epoch": 8.264730290456432, "grad_norm": 16.625577926635742, "learning_rate": 1.669643153526971e-05, "loss": 0.8212, "step": 9959 }, { "epoch": 8.265560165975105, "grad_norm": 23.35894203186035, "learning_rate": 1.6696099585062243e-05, "loss": 0.9784, "step": 9960 }, { "epoch": 8.266390041493777, "grad_norm": 59.073822021484375, "learning_rate": 1.6695767634854775e-05, "loss": 1.3936, "step": 9961 }, { "epoch": 8.267219917012449, "grad_norm": 14.640052795410156, "learning_rate": 1.6695435684647304e-05, "loss": 1.1326, "step": 9962 }, { "epoch": 8.268049792531121, "grad_norm": 53.93831253051758, "learning_rate": 1.6695103734439836e-05, "loss": 1.1895, "step": 9963 }, { "epoch": 8.268879668049793, "grad_norm": 26.907142639160156, "learning_rate": 1.6694771784232365e-05, "loss": 0.8965, "step": 9964 }, { "epoch": 8.269709543568466, "grad_norm": 43.09001922607422, "learning_rate": 1.6694439834024897e-05, "loss": 1.3737, "step": 9965 }, { "epoch": 8.270539419087138, "grad_norm": 14.06717300415039, "learning_rate": 1.669410788381743e-05, "loss": 0.8911, "step": 9966 }, { "epoch": 8.27136929460581, "grad_norm": 27.984819412231445, "learning_rate": 1.6693775933609958e-05, "loss": 1.8997, "step": 9967 }, { "epoch": 8.272199170124482, "grad_norm": 20.043170928955078, "learning_rate": 1.669344398340249e-05, "loss": 1.3488, "step": 9968 }, { "epoch": 8.273029045643154, "grad_norm": 30.726428985595703, "learning_rate": 1.6693112033195022e-05, "loss": 1.1031, "step": 9969 }, { "epoch": 8.273858921161827, "grad_norm": 19.926164627075195, "learning_rate": 1.6692780082987554e-05, "loss": 0.8393, "step": 9970 }, { "epoch": 8.274688796680499, "grad_norm": 18.532499313354492, "learning_rate": 1.6692448132780083e-05, "loss": 1.0807, "step": 9971 }, { "epoch": 8.275518672199171, "grad_norm": 25.206005096435547, "learning_rate": 1.6692116182572615e-05, "loss": 0.8668, "step": 9972 }, { "epoch": 8.276348547717843, "grad_norm": 60.40919494628906, "learning_rate": 1.6691784232365147e-05, "loss": 0.8478, "step": 9973 }, { "epoch": 8.277178423236515, "grad_norm": 31.764299392700195, "learning_rate": 1.669145228215768e-05, "loss": 1.798, "step": 9974 }, { "epoch": 8.278008298755188, "grad_norm": 24.48207664489746, "learning_rate": 1.6691120331950208e-05, "loss": 1.3244, "step": 9975 }, { "epoch": 8.27883817427386, "grad_norm": 18.3962345123291, "learning_rate": 1.669078838174274e-05, "loss": 0.8753, "step": 9976 }, { "epoch": 8.279668049792532, "grad_norm": 17.06682777404785, "learning_rate": 1.6690456431535272e-05, "loss": 0.7746, "step": 9977 }, { "epoch": 8.280497925311204, "grad_norm": 26.547773361206055, "learning_rate": 1.6690124481327804e-05, "loss": 0.7883, "step": 9978 }, { "epoch": 8.281327800829876, "grad_norm": 15.036699295043945, "learning_rate": 1.6689792531120333e-05, "loss": 0.8792, "step": 9979 }, { "epoch": 8.282157676348548, "grad_norm": 18.89925765991211, "learning_rate": 1.6689460580912865e-05, "loss": 1.0619, "step": 9980 }, { "epoch": 8.28298755186722, "grad_norm": 19.792591094970703, "learning_rate": 1.6689128630705397e-05, "loss": 1.0467, "step": 9981 }, { "epoch": 8.283817427385893, "grad_norm": 24.708084106445312, "learning_rate": 1.6688796680497926e-05, "loss": 0.7991, "step": 9982 }, { "epoch": 8.284647302904565, "grad_norm": 60.232337951660156, "learning_rate": 1.6688464730290458e-05, "loss": 0.6292, "step": 9983 }, { "epoch": 8.285477178423237, "grad_norm": 39.73465347290039, "learning_rate": 1.668813278008299e-05, "loss": 0.6547, "step": 9984 }, { "epoch": 8.28630705394191, "grad_norm": 30.329030990600586, "learning_rate": 1.668780082987552e-05, "loss": 1.4135, "step": 9985 }, { "epoch": 8.287136929460582, "grad_norm": 18.35716438293457, "learning_rate": 1.668746887966805e-05, "loss": 0.8311, "step": 9986 }, { "epoch": 8.287966804979254, "grad_norm": 20.419212341308594, "learning_rate": 1.668713692946058e-05, "loss": 1.108, "step": 9987 }, { "epoch": 8.288796680497926, "grad_norm": 23.784286499023438, "learning_rate": 1.668680497925311e-05, "loss": 1.1151, "step": 9988 }, { "epoch": 8.289626556016598, "grad_norm": 29.365175247192383, "learning_rate": 1.6686473029045644e-05, "loss": 0.6745, "step": 9989 }, { "epoch": 8.29045643153527, "grad_norm": 17.763906478881836, "learning_rate": 1.6686141078838176e-05, "loss": 0.8572, "step": 9990 }, { "epoch": 8.291286307053943, "grad_norm": 32.49473190307617, "learning_rate": 1.6685809128630708e-05, "loss": 1.8918, "step": 9991 }, { "epoch": 8.292116182572615, "grad_norm": 18.990488052368164, "learning_rate": 1.6685477178423237e-05, "loss": 0.5217, "step": 9992 }, { "epoch": 8.292946058091287, "grad_norm": 17.046262741088867, "learning_rate": 1.668514522821577e-05, "loss": 0.6914, "step": 9993 }, { "epoch": 8.29377593360996, "grad_norm": 19.72406578063965, "learning_rate": 1.66848132780083e-05, "loss": 0.9828, "step": 9994 }, { "epoch": 8.294605809128631, "grad_norm": 30.459789276123047, "learning_rate": 1.6684481327800833e-05, "loss": 1.1276, "step": 9995 }, { "epoch": 8.295435684647304, "grad_norm": 26.989049911499023, "learning_rate": 1.6684149377593362e-05, "loss": 1.5023, "step": 9996 }, { "epoch": 8.296265560165976, "grad_norm": 18.4072265625, "learning_rate": 1.6683817427385894e-05, "loss": 1.4447, "step": 9997 }, { "epoch": 8.297095435684648, "grad_norm": 11.856877326965332, "learning_rate": 1.6683485477178426e-05, "loss": 0.6526, "step": 9998 }, { "epoch": 8.29792531120332, "grad_norm": 22.22556495666504, "learning_rate": 1.6683153526970958e-05, "loss": 0.9387, "step": 9999 }, { "epoch": 8.298755186721992, "grad_norm": 39.05159378051758, "learning_rate": 1.6682821576763487e-05, "loss": 1.3645, "step": 10000 }, { "epoch": 8.299585062240665, "grad_norm": 16.212657928466797, "learning_rate": 1.668248962655602e-05, "loss": 1.2698, "step": 10001 }, { "epoch": 8.300414937759337, "grad_norm": 20.326793670654297, "learning_rate": 1.668215767634855e-05, "loss": 1.3375, "step": 10002 }, { "epoch": 8.301244813278009, "grad_norm": 20.77025604248047, "learning_rate": 1.668182572614108e-05, "loss": 1.5575, "step": 10003 }, { "epoch": 8.302074688796681, "grad_norm": 32.292781829833984, "learning_rate": 1.6681493775933612e-05, "loss": 1.3556, "step": 10004 }, { "epoch": 8.302904564315353, "grad_norm": 17.33324432373047, "learning_rate": 1.668116182572614e-05, "loss": 1.2103, "step": 10005 }, { "epoch": 8.303734439834026, "grad_norm": 26.674962997436523, "learning_rate": 1.6680829875518673e-05, "loss": 1.1982, "step": 10006 }, { "epoch": 8.304564315352698, "grad_norm": 24.272804260253906, "learning_rate": 1.6680497925311205e-05, "loss": 0.7575, "step": 10007 }, { "epoch": 8.30539419087137, "grad_norm": 17.55303192138672, "learning_rate": 1.6680165975103733e-05, "loss": 0.7806, "step": 10008 }, { "epoch": 8.306224066390042, "grad_norm": 25.32062530517578, "learning_rate": 1.6679834024896266e-05, "loss": 1.4434, "step": 10009 }, { "epoch": 8.307053941908714, "grad_norm": 17.269269943237305, "learning_rate": 1.6679502074688798e-05, "loss": 0.9892, "step": 10010 }, { "epoch": 8.307883817427387, "grad_norm": 15.708632469177246, "learning_rate": 1.667917012448133e-05, "loss": 1.3793, "step": 10011 }, { "epoch": 8.308713692946059, "grad_norm": 19.6601505279541, "learning_rate": 1.667883817427386e-05, "loss": 0.8035, "step": 10012 }, { "epoch": 8.309543568464731, "grad_norm": 31.845468521118164, "learning_rate": 1.667850622406639e-05, "loss": 1.6617, "step": 10013 }, { "epoch": 8.310373443983403, "grad_norm": 21.560274124145508, "learning_rate": 1.6678174273858923e-05, "loss": 1.0829, "step": 10014 }, { "epoch": 8.311203319502075, "grad_norm": 14.147241592407227, "learning_rate": 1.6677842323651455e-05, "loss": 0.5241, "step": 10015 }, { "epoch": 8.312033195020748, "grad_norm": 25.246232986450195, "learning_rate": 1.6677510373443987e-05, "loss": 1.1119, "step": 10016 }, { "epoch": 8.31286307053942, "grad_norm": 20.038349151611328, "learning_rate": 1.6677178423236516e-05, "loss": 0.9238, "step": 10017 }, { "epoch": 8.313692946058092, "grad_norm": 22.203195571899414, "learning_rate": 1.6676846473029048e-05, "loss": 1.1102, "step": 10018 }, { "epoch": 8.314522821576764, "grad_norm": 18.61140251159668, "learning_rate": 1.667651452282158e-05, "loss": 0.7108, "step": 10019 }, { "epoch": 8.315352697095436, "grad_norm": 29.286468505859375, "learning_rate": 1.667618257261411e-05, "loss": 1.4247, "step": 10020 }, { "epoch": 8.316182572614109, "grad_norm": 21.63912582397461, "learning_rate": 1.667585062240664e-05, "loss": 1.2214, "step": 10021 }, { "epoch": 8.31701244813278, "grad_norm": 21.888002395629883, "learning_rate": 1.6675518672199173e-05, "loss": 1.0645, "step": 10022 }, { "epoch": 8.317842323651453, "grad_norm": 23.51021957397461, "learning_rate": 1.66751867219917e-05, "loss": 0.7597, "step": 10023 }, { "epoch": 8.318672199170125, "grad_norm": 27.998523712158203, "learning_rate": 1.6674854771784234e-05, "loss": 1.7685, "step": 10024 }, { "epoch": 8.319502074688797, "grad_norm": 20.711503982543945, "learning_rate": 1.6674522821576762e-05, "loss": 1.6853, "step": 10025 }, { "epoch": 8.32033195020747, "grad_norm": 16.51190185546875, "learning_rate": 1.6674190871369294e-05, "loss": 1.0494, "step": 10026 }, { "epoch": 8.321161825726142, "grad_norm": 30.264341354370117, "learning_rate": 1.6673858921161827e-05, "loss": 1.541, "step": 10027 }, { "epoch": 8.321991701244814, "grad_norm": 28.956756591796875, "learning_rate": 1.667352697095436e-05, "loss": 2.2244, "step": 10028 }, { "epoch": 8.322821576763486, "grad_norm": 19.52418327331543, "learning_rate": 1.6673195020746887e-05, "loss": 1.1716, "step": 10029 }, { "epoch": 8.323651452282158, "grad_norm": 24.567909240722656, "learning_rate": 1.667286307053942e-05, "loss": 1.547, "step": 10030 }, { "epoch": 8.32448132780083, "grad_norm": 20.04682731628418, "learning_rate": 1.667253112033195e-05, "loss": 0.9883, "step": 10031 }, { "epoch": 8.325311203319503, "grad_norm": 22.27651023864746, "learning_rate": 1.6672199170124484e-05, "loss": 1.0961, "step": 10032 }, { "epoch": 8.326141078838175, "grad_norm": 21.25644874572754, "learning_rate": 1.6671867219917012e-05, "loss": 1.0234, "step": 10033 }, { "epoch": 8.326970954356847, "grad_norm": 24.26577377319336, "learning_rate": 1.6671535269709545e-05, "loss": 1.7479, "step": 10034 }, { "epoch": 8.32780082987552, "grad_norm": 34.27668762207031, "learning_rate": 1.6671203319502077e-05, "loss": 0.9854, "step": 10035 }, { "epoch": 8.328630705394191, "grad_norm": 28.836002349853516, "learning_rate": 1.667087136929461e-05, "loss": 0.967, "step": 10036 }, { "epoch": 8.329460580912864, "grad_norm": 17.927385330200195, "learning_rate": 1.6670539419087138e-05, "loss": 0.8648, "step": 10037 }, { "epoch": 8.330290456431536, "grad_norm": 17.484981536865234, "learning_rate": 1.667020746887967e-05, "loss": 0.8301, "step": 10038 }, { "epoch": 8.331120331950208, "grad_norm": 19.425796508789062, "learning_rate": 1.6669875518672202e-05, "loss": 1.0494, "step": 10039 }, { "epoch": 8.33195020746888, "grad_norm": 26.91592788696289, "learning_rate": 1.6669543568464734e-05, "loss": 1.2398, "step": 10040 }, { "epoch": 8.332780082987552, "grad_norm": 19.918899536132812, "learning_rate": 1.6669211618257263e-05, "loss": 1.0475, "step": 10041 }, { "epoch": 8.333609958506225, "grad_norm": 22.1141357421875, "learning_rate": 1.6668879668049795e-05, "loss": 0.8041, "step": 10042 }, { "epoch": 8.334439834024897, "grad_norm": 38.558170318603516, "learning_rate": 1.6668547717842323e-05, "loss": 1.5377, "step": 10043 }, { "epoch": 8.335269709543569, "grad_norm": 20.54024887084961, "learning_rate": 1.6668215767634855e-05, "loss": 0.912, "step": 10044 }, { "epoch": 8.336099585062241, "grad_norm": 20.34938621520996, "learning_rate": 1.6667883817427388e-05, "loss": 1.3425, "step": 10045 }, { "epoch": 8.336929460580913, "grad_norm": 14.496896743774414, "learning_rate": 1.6667551867219916e-05, "loss": 0.8523, "step": 10046 }, { "epoch": 8.337759336099586, "grad_norm": 25.614757537841797, "learning_rate": 1.666721991701245e-05, "loss": 0.7548, "step": 10047 }, { "epoch": 8.338589211618258, "grad_norm": 19.430662155151367, "learning_rate": 1.666688796680498e-05, "loss": 1.0688, "step": 10048 }, { "epoch": 8.33941908713693, "grad_norm": 28.875511169433594, "learning_rate": 1.6666556016597513e-05, "loss": 1.0266, "step": 10049 }, { "epoch": 8.340248962655602, "grad_norm": 32.4400634765625, "learning_rate": 1.666622406639004e-05, "loss": 1.4685, "step": 10050 }, { "epoch": 8.341078838174274, "grad_norm": 23.319578170776367, "learning_rate": 1.6665892116182573e-05, "loss": 1.167, "step": 10051 }, { "epoch": 8.341908713692947, "grad_norm": 23.262245178222656, "learning_rate": 1.6665560165975106e-05, "loss": 1.2417, "step": 10052 }, { "epoch": 8.342738589211619, "grad_norm": 21.71863555908203, "learning_rate": 1.6665228215767638e-05, "loss": 1.1708, "step": 10053 }, { "epoch": 8.343568464730291, "grad_norm": 27.979366302490234, "learning_rate": 1.6664896265560166e-05, "loss": 1.0112, "step": 10054 }, { "epoch": 8.344398340248963, "grad_norm": 16.338823318481445, "learning_rate": 1.66645643153527e-05, "loss": 0.8036, "step": 10055 }, { "epoch": 8.345228215767635, "grad_norm": 16.290681838989258, "learning_rate": 1.666423236514523e-05, "loss": 0.9309, "step": 10056 }, { "epoch": 8.346058091286308, "grad_norm": 25.07330322265625, "learning_rate": 1.6663900414937763e-05, "loss": 0.8698, "step": 10057 }, { "epoch": 8.34688796680498, "grad_norm": 45.58539581298828, "learning_rate": 1.666356846473029e-05, "loss": 1.6478, "step": 10058 }, { "epoch": 8.347717842323652, "grad_norm": 18.68831443786621, "learning_rate": 1.6663236514522824e-05, "loss": 1.3354, "step": 10059 }, { "epoch": 8.348547717842324, "grad_norm": 18.54182243347168, "learning_rate": 1.6662904564315356e-05, "loss": 1.141, "step": 10060 }, { "epoch": 8.349377593360996, "grad_norm": 19.456144332885742, "learning_rate": 1.6662572614107884e-05, "loss": 1.1794, "step": 10061 }, { "epoch": 8.350207468879669, "grad_norm": 30.112016677856445, "learning_rate": 1.6662240663900416e-05, "loss": 1.0487, "step": 10062 }, { "epoch": 8.35103734439834, "grad_norm": 21.494266510009766, "learning_rate": 1.666190871369295e-05, "loss": 1.0162, "step": 10063 }, { "epoch": 8.351867219917013, "grad_norm": 23.213558197021484, "learning_rate": 1.6661576763485477e-05, "loss": 1.0084, "step": 10064 }, { "epoch": 8.352697095435685, "grad_norm": 50.267459869384766, "learning_rate": 1.666124481327801e-05, "loss": 1.9788, "step": 10065 }, { "epoch": 8.353526970954357, "grad_norm": 32.001522064208984, "learning_rate": 1.6660912863070538e-05, "loss": 0.8506, "step": 10066 }, { "epoch": 8.35435684647303, "grad_norm": 16.138938903808594, "learning_rate": 1.666058091286307e-05, "loss": 0.4608, "step": 10067 }, { "epoch": 8.355186721991702, "grad_norm": 23.47779655456543, "learning_rate": 1.6660248962655602e-05, "loss": 1.5264, "step": 10068 }, { "epoch": 8.356016597510374, "grad_norm": 21.39482307434082, "learning_rate": 1.6659917012448134e-05, "loss": 1.0572, "step": 10069 }, { "epoch": 8.356846473029046, "grad_norm": 45.61530685424805, "learning_rate": 1.6659585062240667e-05, "loss": 0.9126, "step": 10070 }, { "epoch": 8.357676348547718, "grad_norm": 22.061588287353516, "learning_rate": 1.6659253112033195e-05, "loss": 0.909, "step": 10071 }, { "epoch": 8.35850622406639, "grad_norm": 26.923839569091797, "learning_rate": 1.6658921161825727e-05, "loss": 1.8095, "step": 10072 }, { "epoch": 8.359336099585063, "grad_norm": 17.552202224731445, "learning_rate": 1.665858921161826e-05, "loss": 0.7204, "step": 10073 }, { "epoch": 8.360165975103735, "grad_norm": 22.99510955810547, "learning_rate": 1.665825726141079e-05, "loss": 0.998, "step": 10074 }, { "epoch": 8.360995850622407, "grad_norm": 19.480823516845703, "learning_rate": 1.665792531120332e-05, "loss": 0.5119, "step": 10075 }, { "epoch": 8.36182572614108, "grad_norm": 23.893856048583984, "learning_rate": 1.6657593360995852e-05, "loss": 0.8889, "step": 10076 }, { "epoch": 8.362655601659752, "grad_norm": 23.362363815307617, "learning_rate": 1.6657261410788385e-05, "loss": 1.0633, "step": 10077 }, { "epoch": 8.363485477178424, "grad_norm": 20.406158447265625, "learning_rate": 1.6656929460580917e-05, "loss": 0.8802, "step": 10078 }, { "epoch": 8.364315352697096, "grad_norm": 20.614721298217773, "learning_rate": 1.6656597510373445e-05, "loss": 1.5006, "step": 10079 }, { "epoch": 8.365145228215768, "grad_norm": 17.761058807373047, "learning_rate": 1.6656265560165977e-05, "loss": 0.9297, "step": 10080 }, { "epoch": 8.36597510373444, "grad_norm": 24.776294708251953, "learning_rate": 1.6655933609958506e-05, "loss": 1.2808, "step": 10081 }, { "epoch": 8.366804979253113, "grad_norm": 31.40306854248047, "learning_rate": 1.6655601659751038e-05, "loss": 1.3109, "step": 10082 }, { "epoch": 8.367634854771785, "grad_norm": 49.69070816040039, "learning_rate": 1.665526970954357e-05, "loss": 1.0565, "step": 10083 }, { "epoch": 8.368464730290457, "grad_norm": 51.65298080444336, "learning_rate": 1.66549377593361e-05, "loss": 0.6906, "step": 10084 }, { "epoch": 8.369294605809129, "grad_norm": 25.229360580444336, "learning_rate": 1.665460580912863e-05, "loss": 1.1412, "step": 10085 }, { "epoch": 8.370124481327801, "grad_norm": 18.739816665649414, "learning_rate": 1.6654273858921163e-05, "loss": 0.961, "step": 10086 }, { "epoch": 8.370954356846473, "grad_norm": 19.13003921508789, "learning_rate": 1.6653941908713692e-05, "loss": 1.0294, "step": 10087 }, { "epoch": 8.371784232365146, "grad_norm": 24.2076358795166, "learning_rate": 1.6653609958506224e-05, "loss": 0.9401, "step": 10088 }, { "epoch": 8.372614107883818, "grad_norm": 34.110530853271484, "learning_rate": 1.6653278008298756e-05, "loss": 1.1598, "step": 10089 }, { "epoch": 8.37344398340249, "grad_norm": 21.583003997802734, "learning_rate": 1.665294605809129e-05, "loss": 0.8129, "step": 10090 }, { "epoch": 8.374273858921162, "grad_norm": 13.828065872192383, "learning_rate": 1.6652614107883817e-05, "loss": 0.6584, "step": 10091 }, { "epoch": 8.375103734439834, "grad_norm": 28.3615779876709, "learning_rate": 1.665228215767635e-05, "loss": 1.1084, "step": 10092 }, { "epoch": 8.375933609958507, "grad_norm": 18.84865951538086, "learning_rate": 1.665195020746888e-05, "loss": 1.4144, "step": 10093 }, { "epoch": 8.376763485477179, "grad_norm": 22.75324821472168, "learning_rate": 1.6651618257261413e-05, "loss": 1.2871, "step": 10094 }, { "epoch": 8.377593360995851, "grad_norm": 21.26099967956543, "learning_rate": 1.6651286307053946e-05, "loss": 1.1238, "step": 10095 }, { "epoch": 8.378423236514523, "grad_norm": 48.95374298095703, "learning_rate": 1.6650954356846474e-05, "loss": 1.0481, "step": 10096 }, { "epoch": 8.379253112033195, "grad_norm": 26.57112693786621, "learning_rate": 1.6650622406639006e-05, "loss": 1.1812, "step": 10097 }, { "epoch": 8.380082987551868, "grad_norm": 17.339086532592773, "learning_rate": 1.665029045643154e-05, "loss": 0.6528, "step": 10098 }, { "epoch": 8.38091286307054, "grad_norm": 16.469633102416992, "learning_rate": 1.6649958506224067e-05, "loss": 0.7775, "step": 10099 }, { "epoch": 8.381742738589212, "grad_norm": 39.51255798339844, "learning_rate": 1.66496265560166e-05, "loss": 1.6765, "step": 10100 }, { "epoch": 8.382572614107884, "grad_norm": 28.321699142456055, "learning_rate": 1.664929460580913e-05, "loss": 1.4968, "step": 10101 }, { "epoch": 8.383402489626556, "grad_norm": 13.916446685791016, "learning_rate": 1.664896265560166e-05, "loss": 0.4149, "step": 10102 }, { "epoch": 8.384232365145229, "grad_norm": 28.875442504882812, "learning_rate": 1.6648630705394192e-05, "loss": 1.3964, "step": 10103 }, { "epoch": 8.3850622406639, "grad_norm": 24.81781005859375, "learning_rate": 1.664829875518672e-05, "loss": 1.6992, "step": 10104 }, { "epoch": 8.385892116182573, "grad_norm": 24.217559814453125, "learning_rate": 1.6647966804979253e-05, "loss": 1.0606, "step": 10105 }, { "epoch": 8.386721991701245, "grad_norm": 20.514047622680664, "learning_rate": 1.6647634854771785e-05, "loss": 1.1812, "step": 10106 }, { "epoch": 8.387551867219917, "grad_norm": 20.127859115600586, "learning_rate": 1.6647302904564317e-05, "loss": 1.0661, "step": 10107 }, { "epoch": 8.38838174273859, "grad_norm": 33.51366424560547, "learning_rate": 1.6646970954356846e-05, "loss": 1.2865, "step": 10108 }, { "epoch": 8.389211618257262, "grad_norm": 16.20232582092285, "learning_rate": 1.6646639004149378e-05, "loss": 0.9215, "step": 10109 }, { "epoch": 8.390041493775934, "grad_norm": 24.22989845275879, "learning_rate": 1.664630705394191e-05, "loss": 1.1463, "step": 10110 }, { "epoch": 8.390871369294606, "grad_norm": 45.5305061340332, "learning_rate": 1.6645975103734442e-05, "loss": 1.1606, "step": 10111 }, { "epoch": 8.391701244813278, "grad_norm": 25.159652709960938, "learning_rate": 1.664564315352697e-05, "loss": 1.132, "step": 10112 }, { "epoch": 8.39253112033195, "grad_norm": 40.448368072509766, "learning_rate": 1.6645311203319503e-05, "loss": 0.9599, "step": 10113 }, { "epoch": 8.393360995850623, "grad_norm": 24.595245361328125, "learning_rate": 1.6644979253112035e-05, "loss": 0.874, "step": 10114 }, { "epoch": 8.394190871369295, "grad_norm": 43.961673736572266, "learning_rate": 1.6644647302904567e-05, "loss": 1.5084, "step": 10115 }, { "epoch": 8.395020746887967, "grad_norm": 27.892210006713867, "learning_rate": 1.6644315352697096e-05, "loss": 1.3781, "step": 10116 }, { "epoch": 8.39585062240664, "grad_norm": 26.51332664489746, "learning_rate": 1.6643983402489628e-05, "loss": 1.2472, "step": 10117 }, { "epoch": 8.396680497925312, "grad_norm": 43.66794967651367, "learning_rate": 1.664365145228216e-05, "loss": 1.284, "step": 10118 }, { "epoch": 8.397510373443984, "grad_norm": 24.59164047241211, "learning_rate": 1.6643319502074692e-05, "loss": 1.1759, "step": 10119 }, { "epoch": 8.398340248962656, "grad_norm": 25.0767822265625, "learning_rate": 1.664298755186722e-05, "loss": 1.3216, "step": 10120 }, { "epoch": 8.399170124481328, "grad_norm": 19.88278579711914, "learning_rate": 1.6642655601659753e-05, "loss": 1.2915, "step": 10121 }, { "epoch": 8.4, "grad_norm": 16.617300033569336, "learning_rate": 1.6642323651452282e-05, "loss": 1.3126, "step": 10122 }, { "epoch": 8.400829875518673, "grad_norm": 18.861770629882812, "learning_rate": 1.6641991701244814e-05, "loss": 1.0867, "step": 10123 }, { "epoch": 8.401659751037345, "grad_norm": 16.12782096862793, "learning_rate": 1.6641659751037346e-05, "loss": 1.0928, "step": 10124 }, { "epoch": 8.402489626556017, "grad_norm": 19.870779037475586, "learning_rate": 1.6641327800829875e-05, "loss": 0.9428, "step": 10125 }, { "epoch": 8.40331950207469, "grad_norm": 23.4696102142334, "learning_rate": 1.6640995850622407e-05, "loss": 1.6204, "step": 10126 }, { "epoch": 8.404149377593361, "grad_norm": 29.758562088012695, "learning_rate": 1.664066390041494e-05, "loss": 1.5508, "step": 10127 }, { "epoch": 8.404979253112034, "grad_norm": 26.388010025024414, "learning_rate": 1.664033195020747e-05, "loss": 1.3914, "step": 10128 }, { "epoch": 8.405809128630706, "grad_norm": 27.09659767150879, "learning_rate": 1.664e-05, "loss": 1.4507, "step": 10129 }, { "epoch": 8.406639004149378, "grad_norm": 22.398902893066406, "learning_rate": 1.6639668049792532e-05, "loss": 0.9311, "step": 10130 }, { "epoch": 8.40746887966805, "grad_norm": 12.599860191345215, "learning_rate": 1.6639336099585064e-05, "loss": 0.9484, "step": 10131 }, { "epoch": 8.408298755186722, "grad_norm": 20.650619506835938, "learning_rate": 1.6639004149377596e-05, "loss": 1.3155, "step": 10132 }, { "epoch": 8.409128630705395, "grad_norm": 17.704545974731445, "learning_rate": 1.6638672199170125e-05, "loss": 1.0256, "step": 10133 }, { "epoch": 8.409958506224067, "grad_norm": 23.76549530029297, "learning_rate": 1.6638340248962657e-05, "loss": 0.993, "step": 10134 }, { "epoch": 8.410788381742739, "grad_norm": 42.23660659790039, "learning_rate": 1.663800829875519e-05, "loss": 1.7862, "step": 10135 }, { "epoch": 8.411618257261411, "grad_norm": 18.405561447143555, "learning_rate": 1.663767634854772e-05, "loss": 1.102, "step": 10136 }, { "epoch": 8.412448132780083, "grad_norm": 20.863388061523438, "learning_rate": 1.663734439834025e-05, "loss": 0.9245, "step": 10137 }, { "epoch": 8.413278008298755, "grad_norm": 18.437257766723633, "learning_rate": 1.6637012448132782e-05, "loss": 0.8166, "step": 10138 }, { "epoch": 8.414107883817428, "grad_norm": 23.46745491027832, "learning_rate": 1.6636680497925314e-05, "loss": 1.3055, "step": 10139 }, { "epoch": 8.4149377593361, "grad_norm": 15.628838539123535, "learning_rate": 1.6636348547717843e-05, "loss": 1.0404, "step": 10140 }, { "epoch": 8.415767634854772, "grad_norm": 25.314334869384766, "learning_rate": 1.6636016597510375e-05, "loss": 1.6528, "step": 10141 }, { "epoch": 8.416597510373444, "grad_norm": 19.417463302612305, "learning_rate": 1.6635684647302904e-05, "loss": 1.2077, "step": 10142 }, { "epoch": 8.417427385892116, "grad_norm": 25.959627151489258, "learning_rate": 1.6635352697095436e-05, "loss": 0.7851, "step": 10143 }, { "epoch": 8.418257261410789, "grad_norm": 50.30210494995117, "learning_rate": 1.6635020746887968e-05, "loss": 1.057, "step": 10144 }, { "epoch": 8.41908713692946, "grad_norm": 19.880502700805664, "learning_rate": 1.6634688796680497e-05, "loss": 0.939, "step": 10145 }, { "epoch": 8.419917012448133, "grad_norm": 25.40594482421875, "learning_rate": 1.663435684647303e-05, "loss": 1.2655, "step": 10146 }, { "epoch": 8.420746887966805, "grad_norm": 31.762868881225586, "learning_rate": 1.663402489626556e-05, "loss": 0.8819, "step": 10147 }, { "epoch": 8.421576763485477, "grad_norm": 28.765830993652344, "learning_rate": 1.6633692946058093e-05, "loss": 0.6586, "step": 10148 }, { "epoch": 8.42240663900415, "grad_norm": 29.818954467773438, "learning_rate": 1.6633360995850625e-05, "loss": 1.2178, "step": 10149 }, { "epoch": 8.423236514522822, "grad_norm": 31.160781860351562, "learning_rate": 1.6633029045643154e-05, "loss": 1.416, "step": 10150 }, { "epoch": 8.424066390041494, "grad_norm": 37.90926742553711, "learning_rate": 1.6632697095435686e-05, "loss": 0.7421, "step": 10151 }, { "epoch": 8.424896265560166, "grad_norm": 30.26778793334961, "learning_rate": 1.6632365145228218e-05, "loss": 1.2904, "step": 10152 }, { "epoch": 8.425726141078838, "grad_norm": 20.19036102294922, "learning_rate": 1.663203319502075e-05, "loss": 1.2197, "step": 10153 }, { "epoch": 8.42655601659751, "grad_norm": 39.9294319152832, "learning_rate": 1.663170124481328e-05, "loss": 0.8787, "step": 10154 }, { "epoch": 8.427385892116183, "grad_norm": 13.669942855834961, "learning_rate": 1.663136929460581e-05, "loss": 0.4675, "step": 10155 }, { "epoch": 8.428215767634855, "grad_norm": 37.55342102050781, "learning_rate": 1.6631037344398343e-05, "loss": 2.1095, "step": 10156 }, { "epoch": 8.429045643153527, "grad_norm": 19.058361053466797, "learning_rate": 1.6630705394190875e-05, "loss": 0.9591, "step": 10157 }, { "epoch": 8.4298755186722, "grad_norm": 33.78434753417969, "learning_rate": 1.6630373443983404e-05, "loss": 1.1877, "step": 10158 }, { "epoch": 8.430705394190872, "grad_norm": 41.53250503540039, "learning_rate": 1.6630041493775936e-05, "loss": 1.8764, "step": 10159 }, { "epoch": 8.431535269709544, "grad_norm": 18.994901657104492, "learning_rate": 1.6629709543568465e-05, "loss": 0.9025, "step": 10160 }, { "epoch": 8.432365145228216, "grad_norm": 26.834308624267578, "learning_rate": 1.6629377593360997e-05, "loss": 1.5548, "step": 10161 }, { "epoch": 8.433195020746888, "grad_norm": 19.632598876953125, "learning_rate": 1.662904564315353e-05, "loss": 0.7901, "step": 10162 }, { "epoch": 8.43402489626556, "grad_norm": 27.723127365112305, "learning_rate": 1.6628713692946058e-05, "loss": 1.0364, "step": 10163 }, { "epoch": 8.434854771784233, "grad_norm": 15.98456859588623, "learning_rate": 1.662838174273859e-05, "loss": 0.5959, "step": 10164 }, { "epoch": 8.435684647302905, "grad_norm": 37.65420913696289, "learning_rate": 1.6628049792531122e-05, "loss": 0.9198, "step": 10165 }, { "epoch": 8.436514522821577, "grad_norm": 22.128145217895508, "learning_rate": 1.662771784232365e-05, "loss": 1.0882, "step": 10166 }, { "epoch": 8.43734439834025, "grad_norm": 16.230104446411133, "learning_rate": 1.6627385892116183e-05, "loss": 0.7127, "step": 10167 }, { "epoch": 8.438174273858921, "grad_norm": 19.636165618896484, "learning_rate": 1.6627053941908715e-05, "loss": 0.9471, "step": 10168 }, { "epoch": 8.439004149377594, "grad_norm": 20.3486385345459, "learning_rate": 1.6626721991701247e-05, "loss": 1.6095, "step": 10169 }, { "epoch": 8.439834024896266, "grad_norm": 30.14825439453125, "learning_rate": 1.6626390041493776e-05, "loss": 1.4982, "step": 10170 }, { "epoch": 8.440663900414938, "grad_norm": 41.818843841552734, "learning_rate": 1.6626058091286308e-05, "loss": 1.2182, "step": 10171 }, { "epoch": 8.44149377593361, "grad_norm": 27.825305938720703, "learning_rate": 1.662572614107884e-05, "loss": 1.5597, "step": 10172 }, { "epoch": 8.442323651452282, "grad_norm": 31.65500259399414, "learning_rate": 1.6625394190871372e-05, "loss": 1.74, "step": 10173 }, { "epoch": 8.443153526970955, "grad_norm": 18.70927619934082, "learning_rate": 1.6625062240663904e-05, "loss": 0.9498, "step": 10174 }, { "epoch": 8.443983402489627, "grad_norm": 14.098934173583984, "learning_rate": 1.6624730290456433e-05, "loss": 0.7126, "step": 10175 }, { "epoch": 8.444813278008299, "grad_norm": 18.708959579467773, "learning_rate": 1.6624398340248965e-05, "loss": 0.8453, "step": 10176 }, { "epoch": 8.445643153526971, "grad_norm": 18.53278923034668, "learning_rate": 1.6624066390041497e-05, "loss": 1.1328, "step": 10177 }, { "epoch": 8.446473029045643, "grad_norm": 29.383316040039062, "learning_rate": 1.6623734439834026e-05, "loss": 1.4133, "step": 10178 }, { "epoch": 8.447302904564316, "grad_norm": 17.244524002075195, "learning_rate": 1.6623402489626558e-05, "loss": 1.1973, "step": 10179 }, { "epoch": 8.448132780082988, "grad_norm": 16.386816024780273, "learning_rate": 1.662307053941909e-05, "loss": 0.9363, "step": 10180 }, { "epoch": 8.44896265560166, "grad_norm": 20.927165985107422, "learning_rate": 1.662273858921162e-05, "loss": 0.9656, "step": 10181 }, { "epoch": 8.449792531120332, "grad_norm": 20.48624038696289, "learning_rate": 1.662240663900415e-05, "loss": 1.7102, "step": 10182 }, { "epoch": 8.450622406639004, "grad_norm": 16.46099853515625, "learning_rate": 1.662207468879668e-05, "loss": 1.2325, "step": 10183 }, { "epoch": 8.451452282157677, "grad_norm": 21.600584030151367, "learning_rate": 1.662174273858921e-05, "loss": 0.9389, "step": 10184 }, { "epoch": 8.452282157676349, "grad_norm": 27.505218505859375, "learning_rate": 1.6621410788381744e-05, "loss": 0.7365, "step": 10185 }, { "epoch": 8.453112033195021, "grad_norm": 17.133813858032227, "learning_rate": 1.6621078838174276e-05, "loss": 1.0812, "step": 10186 }, { "epoch": 8.453941908713693, "grad_norm": 31.337722778320312, "learning_rate": 1.6620746887966805e-05, "loss": 1.5119, "step": 10187 }, { "epoch": 8.454771784232365, "grad_norm": 15.669285774230957, "learning_rate": 1.6620414937759337e-05, "loss": 0.8924, "step": 10188 }, { "epoch": 8.455601659751038, "grad_norm": 14.364842414855957, "learning_rate": 1.662008298755187e-05, "loss": 0.757, "step": 10189 }, { "epoch": 8.45643153526971, "grad_norm": 17.525436401367188, "learning_rate": 1.66197510373444e-05, "loss": 0.9024, "step": 10190 }, { "epoch": 8.457261410788382, "grad_norm": 18.091495513916016, "learning_rate": 1.661941908713693e-05, "loss": 1.027, "step": 10191 }, { "epoch": 8.458091286307054, "grad_norm": 10.890978813171387, "learning_rate": 1.6619087136929462e-05, "loss": 0.4283, "step": 10192 }, { "epoch": 8.458921161825726, "grad_norm": 20.41109275817871, "learning_rate": 1.6618755186721994e-05, "loss": 1.0143, "step": 10193 }, { "epoch": 8.459751037344398, "grad_norm": 40.438018798828125, "learning_rate": 1.6618423236514526e-05, "loss": 1.2756, "step": 10194 }, { "epoch": 8.46058091286307, "grad_norm": 29.000350952148438, "learning_rate": 1.6618091286307055e-05, "loss": 1.0038, "step": 10195 }, { "epoch": 8.461410788381743, "grad_norm": 14.506939888000488, "learning_rate": 1.6617759336099587e-05, "loss": 0.7416, "step": 10196 }, { "epoch": 8.462240663900415, "grad_norm": 24.89533805847168, "learning_rate": 1.661742738589212e-05, "loss": 1.1725, "step": 10197 }, { "epoch": 8.463070539419087, "grad_norm": 76.70423889160156, "learning_rate": 1.6617095435684648e-05, "loss": 1.015, "step": 10198 }, { "epoch": 8.46390041493776, "grad_norm": 25.702804565429688, "learning_rate": 1.661676348547718e-05, "loss": 1.0029, "step": 10199 }, { "epoch": 8.464730290456432, "grad_norm": 17.70720672607422, "learning_rate": 1.6616431535269712e-05, "loss": 0.6662, "step": 10200 }, { "epoch": 8.465560165975104, "grad_norm": 16.303939819335938, "learning_rate": 1.661609958506224e-05, "loss": 0.5462, "step": 10201 }, { "epoch": 8.466390041493776, "grad_norm": 31.932476043701172, "learning_rate": 1.6615767634854773e-05, "loss": 1.8828, "step": 10202 }, { "epoch": 8.467219917012448, "grad_norm": 25.04811668395996, "learning_rate": 1.6615435684647305e-05, "loss": 0.9077, "step": 10203 }, { "epoch": 8.46804979253112, "grad_norm": 26.893470764160156, "learning_rate": 1.6615103734439833e-05, "loss": 0.9557, "step": 10204 }, { "epoch": 8.468879668049793, "grad_norm": 22.232559204101562, "learning_rate": 1.6614771784232366e-05, "loss": 1.0346, "step": 10205 }, { "epoch": 8.469709543568465, "grad_norm": 26.84662437438965, "learning_rate": 1.6614439834024898e-05, "loss": 1.2997, "step": 10206 }, { "epoch": 8.470539419087137, "grad_norm": 25.301267623901367, "learning_rate": 1.661410788381743e-05, "loss": 1.363, "step": 10207 }, { "epoch": 8.47136929460581, "grad_norm": 37.30564880371094, "learning_rate": 1.661377593360996e-05, "loss": 0.8871, "step": 10208 }, { "epoch": 8.472199170124481, "grad_norm": 38.174224853515625, "learning_rate": 1.661344398340249e-05, "loss": 1.7143, "step": 10209 }, { "epoch": 8.473029045643154, "grad_norm": 27.00360679626465, "learning_rate": 1.6613112033195023e-05, "loss": 1.4744, "step": 10210 }, { "epoch": 8.473858921161826, "grad_norm": 21.457141876220703, "learning_rate": 1.6612780082987555e-05, "loss": 0.81, "step": 10211 }, { "epoch": 8.474688796680498, "grad_norm": 22.029293060302734, "learning_rate": 1.6612448132780084e-05, "loss": 1.0945, "step": 10212 }, { "epoch": 8.47551867219917, "grad_norm": 36.703224182128906, "learning_rate": 1.6612116182572616e-05, "loss": 1.2425, "step": 10213 }, { "epoch": 8.476348547717842, "grad_norm": 26.812856674194336, "learning_rate": 1.6611784232365148e-05, "loss": 1.5254, "step": 10214 }, { "epoch": 8.477178423236515, "grad_norm": 34.08085632324219, "learning_rate": 1.661145228215768e-05, "loss": 1.4667, "step": 10215 }, { "epoch": 8.478008298755187, "grad_norm": 41.3416862487793, "learning_rate": 1.661112033195021e-05, "loss": 2.4599, "step": 10216 }, { "epoch": 8.478838174273859, "grad_norm": 18.501630783081055, "learning_rate": 1.661078838174274e-05, "loss": 0.8629, "step": 10217 }, { "epoch": 8.479668049792531, "grad_norm": 32.85191345214844, "learning_rate": 1.6610456431535273e-05, "loss": 0.7844, "step": 10218 }, { "epoch": 8.480497925311203, "grad_norm": 19.93635368347168, "learning_rate": 1.66101244813278e-05, "loss": 0.8869, "step": 10219 }, { "epoch": 8.481327800829876, "grad_norm": 21.291032791137695, "learning_rate": 1.6609792531120334e-05, "loss": 0.7478, "step": 10220 }, { "epoch": 8.482157676348548, "grad_norm": 43.522377014160156, "learning_rate": 1.6609460580912862e-05, "loss": 1.8351, "step": 10221 }, { "epoch": 8.48298755186722, "grad_norm": 24.83841323852539, "learning_rate": 1.6609128630705394e-05, "loss": 0.9758, "step": 10222 }, { "epoch": 8.483817427385892, "grad_norm": 38.77281188964844, "learning_rate": 1.6608796680497927e-05, "loss": 1.1802, "step": 10223 }, { "epoch": 8.484647302904564, "grad_norm": 41.499176025390625, "learning_rate": 1.6608464730290455e-05, "loss": 1.3985, "step": 10224 }, { "epoch": 8.485477178423237, "grad_norm": 23.87859344482422, "learning_rate": 1.6608132780082987e-05, "loss": 0.6846, "step": 10225 }, { "epoch": 8.486307053941909, "grad_norm": 22.735111236572266, "learning_rate": 1.660780082987552e-05, "loss": 0.7377, "step": 10226 }, { "epoch": 8.487136929460581, "grad_norm": 24.94085121154785, "learning_rate": 1.660746887966805e-05, "loss": 1.5679, "step": 10227 }, { "epoch": 8.487966804979253, "grad_norm": 30.226238250732422, "learning_rate": 1.6607136929460584e-05, "loss": 1.3415, "step": 10228 }, { "epoch": 8.488796680497925, "grad_norm": 22.39824676513672, "learning_rate": 1.6606804979253112e-05, "loss": 0.8021, "step": 10229 }, { "epoch": 8.489626556016598, "grad_norm": 29.515159606933594, "learning_rate": 1.6606473029045645e-05, "loss": 1.4182, "step": 10230 }, { "epoch": 8.49045643153527, "grad_norm": 13.47427749633789, "learning_rate": 1.6606141078838177e-05, "loss": 0.6457, "step": 10231 }, { "epoch": 8.491286307053942, "grad_norm": 18.531505584716797, "learning_rate": 1.660580912863071e-05, "loss": 0.6225, "step": 10232 }, { "epoch": 8.492116182572614, "grad_norm": 22.096105575561523, "learning_rate": 1.6605477178423237e-05, "loss": 1.2375, "step": 10233 }, { "epoch": 8.492946058091286, "grad_norm": 30.123432159423828, "learning_rate": 1.660514522821577e-05, "loss": 1.3259, "step": 10234 }, { "epoch": 8.493775933609959, "grad_norm": 38.29960632324219, "learning_rate": 1.66048132780083e-05, "loss": 0.8638, "step": 10235 }, { "epoch": 8.49460580912863, "grad_norm": 29.343809127807617, "learning_rate": 1.6604481327800834e-05, "loss": 1.621, "step": 10236 }, { "epoch": 8.495435684647303, "grad_norm": 21.53001594543457, "learning_rate": 1.6604149377593363e-05, "loss": 0.8245, "step": 10237 }, { "epoch": 8.496265560165975, "grad_norm": 27.100988388061523, "learning_rate": 1.6603817427385895e-05, "loss": 0.8185, "step": 10238 }, { "epoch": 8.497095435684647, "grad_norm": 29.599855422973633, "learning_rate": 1.6603485477178423e-05, "loss": 1.4108, "step": 10239 }, { "epoch": 8.49792531120332, "grad_norm": 21.153600692749023, "learning_rate": 1.6603153526970955e-05, "loss": 1.3934, "step": 10240 }, { "epoch": 8.498755186721992, "grad_norm": 26.699356079101562, "learning_rate": 1.6602821576763488e-05, "loss": 0.8937, "step": 10241 }, { "epoch": 8.499585062240664, "grad_norm": 21.841093063354492, "learning_rate": 1.6602489626556016e-05, "loss": 1.2619, "step": 10242 }, { "epoch": 8.500414937759336, "grad_norm": 29.615694046020508, "learning_rate": 1.660215767634855e-05, "loss": 1.9593, "step": 10243 }, { "epoch": 8.501244813278008, "grad_norm": 19.155616760253906, "learning_rate": 1.660182572614108e-05, "loss": 0.8501, "step": 10244 }, { "epoch": 8.50207468879668, "grad_norm": 22.319543838500977, "learning_rate": 1.660149377593361e-05, "loss": 0.9404, "step": 10245 }, { "epoch": 8.502904564315353, "grad_norm": 22.67804527282715, "learning_rate": 1.660116182572614e-05, "loss": 1.3422, "step": 10246 }, { "epoch": 8.503734439834025, "grad_norm": 30.051170349121094, "learning_rate": 1.6600829875518673e-05, "loss": 2.1621, "step": 10247 }, { "epoch": 8.504564315352697, "grad_norm": 19.108478546142578, "learning_rate": 1.6600497925311206e-05, "loss": 1.1224, "step": 10248 }, { "epoch": 8.50539419087137, "grad_norm": 25.520614624023438, "learning_rate": 1.6600165975103734e-05, "loss": 0.8678, "step": 10249 }, { "epoch": 8.506224066390041, "grad_norm": 25.003995895385742, "learning_rate": 1.6599834024896266e-05, "loss": 1.4445, "step": 10250 }, { "epoch": 8.507053941908714, "grad_norm": 17.212282180786133, "learning_rate": 1.65995020746888e-05, "loss": 0.8108, "step": 10251 }, { "epoch": 8.507883817427386, "grad_norm": 24.20093536376953, "learning_rate": 1.659917012448133e-05, "loss": 1.2998, "step": 10252 }, { "epoch": 8.508713692946058, "grad_norm": 19.324018478393555, "learning_rate": 1.6598838174273863e-05, "loss": 0.9773, "step": 10253 }, { "epoch": 8.50954356846473, "grad_norm": 23.907018661499023, "learning_rate": 1.659850622406639e-05, "loss": 1.14, "step": 10254 }, { "epoch": 8.510373443983402, "grad_norm": 23.07064437866211, "learning_rate": 1.6598174273858924e-05, "loss": 1.2016, "step": 10255 }, { "epoch": 8.511203319502075, "grad_norm": 69.61380767822266, "learning_rate": 1.6597842323651456e-05, "loss": 1.3434, "step": 10256 }, { "epoch": 8.512033195020747, "grad_norm": 20.8829288482666, "learning_rate": 1.6597510373443984e-05, "loss": 0.9751, "step": 10257 }, { "epoch": 8.512863070539419, "grad_norm": 19.159385681152344, "learning_rate": 1.6597178423236516e-05, "loss": 1.0078, "step": 10258 }, { "epoch": 8.513692946058091, "grad_norm": 17.600374221801758, "learning_rate": 1.6596846473029045e-05, "loss": 0.6792, "step": 10259 }, { "epoch": 8.514522821576763, "grad_norm": 19.26729965209961, "learning_rate": 1.6596514522821577e-05, "loss": 1.1097, "step": 10260 }, { "epoch": 8.515352697095436, "grad_norm": 25.6475887298584, "learning_rate": 1.659618257261411e-05, "loss": 1.0807, "step": 10261 }, { "epoch": 8.516182572614108, "grad_norm": 17.29747772216797, "learning_rate": 1.6595850622406638e-05, "loss": 0.8204, "step": 10262 }, { "epoch": 8.51701244813278, "grad_norm": 18.020917892456055, "learning_rate": 1.659551867219917e-05, "loss": 0.6631, "step": 10263 }, { "epoch": 8.517842323651452, "grad_norm": 35.23978042602539, "learning_rate": 1.6595186721991702e-05, "loss": 1.1119, "step": 10264 }, { "epoch": 8.518672199170124, "grad_norm": 18.317934036254883, "learning_rate": 1.6594854771784234e-05, "loss": 0.8875, "step": 10265 }, { "epoch": 8.519502074688797, "grad_norm": 18.931486129760742, "learning_rate": 1.6594522821576763e-05, "loss": 1.0497, "step": 10266 }, { "epoch": 8.520331950207469, "grad_norm": 25.923830032348633, "learning_rate": 1.6594190871369295e-05, "loss": 1.1713, "step": 10267 }, { "epoch": 8.521161825726141, "grad_norm": 13.503453254699707, "learning_rate": 1.6593858921161827e-05, "loss": 0.4879, "step": 10268 }, { "epoch": 8.521991701244813, "grad_norm": 23.65148162841797, "learning_rate": 1.659352697095436e-05, "loss": 1.0715, "step": 10269 }, { "epoch": 8.522821576763485, "grad_norm": 20.471750259399414, "learning_rate": 1.6593195020746888e-05, "loss": 0.854, "step": 10270 }, { "epoch": 8.523651452282158, "grad_norm": 36.639366149902344, "learning_rate": 1.659286307053942e-05, "loss": 0.8658, "step": 10271 }, { "epoch": 8.52448132780083, "grad_norm": 31.704364776611328, "learning_rate": 1.6592531120331952e-05, "loss": 0.767, "step": 10272 }, { "epoch": 8.525311203319502, "grad_norm": 39.30229568481445, "learning_rate": 1.6592199170124485e-05, "loss": 1.4385, "step": 10273 }, { "epoch": 8.526141078838174, "grad_norm": 15.332484245300293, "learning_rate": 1.6591867219917013e-05, "loss": 0.7661, "step": 10274 }, { "epoch": 8.526970954356846, "grad_norm": 20.357839584350586, "learning_rate": 1.6591535269709545e-05, "loss": 0.8462, "step": 10275 }, { "epoch": 8.527800829875519, "grad_norm": 32.692649841308594, "learning_rate": 1.6591203319502077e-05, "loss": 2.0707, "step": 10276 }, { "epoch": 8.52863070539419, "grad_norm": 21.288888931274414, "learning_rate": 1.6590871369294606e-05, "loss": 0.761, "step": 10277 }, { "epoch": 8.529460580912863, "grad_norm": 31.470294952392578, "learning_rate": 1.6590539419087138e-05, "loss": 1.8935, "step": 10278 }, { "epoch": 8.530290456431535, "grad_norm": 17.44300651550293, "learning_rate": 1.659020746887967e-05, "loss": 1.1596, "step": 10279 }, { "epoch": 8.531120331950207, "grad_norm": 32.99628829956055, "learning_rate": 1.65898755186722e-05, "loss": 1.9671, "step": 10280 }, { "epoch": 8.53195020746888, "grad_norm": 16.95395851135254, "learning_rate": 1.658954356846473e-05, "loss": 0.949, "step": 10281 }, { "epoch": 8.532780082987552, "grad_norm": 24.374521255493164, "learning_rate": 1.6589211618257263e-05, "loss": 0.8587, "step": 10282 }, { "epoch": 8.533609958506224, "grad_norm": 14.539377212524414, "learning_rate": 1.6588879668049792e-05, "loss": 0.6946, "step": 10283 }, { "epoch": 8.534439834024896, "grad_norm": 28.0972843170166, "learning_rate": 1.6588547717842324e-05, "loss": 1.3339, "step": 10284 }, { "epoch": 8.535269709543568, "grad_norm": 36.56776809692383, "learning_rate": 1.6588215767634856e-05, "loss": 0.8588, "step": 10285 }, { "epoch": 8.53609958506224, "grad_norm": 24.037342071533203, "learning_rate": 1.658788381742739e-05, "loss": 0.8177, "step": 10286 }, { "epoch": 8.536929460580913, "grad_norm": 18.914464950561523, "learning_rate": 1.6587551867219917e-05, "loss": 1.2686, "step": 10287 }, { "epoch": 8.537759336099585, "grad_norm": 25.938217163085938, "learning_rate": 1.658721991701245e-05, "loss": 1.7779, "step": 10288 }, { "epoch": 8.538589211618257, "grad_norm": 21.34477424621582, "learning_rate": 1.658688796680498e-05, "loss": 1.0215, "step": 10289 }, { "epoch": 8.53941908713693, "grad_norm": 30.685375213623047, "learning_rate": 1.6586556016597513e-05, "loss": 1.2528, "step": 10290 }, { "epoch": 8.540248962655602, "grad_norm": 20.251487731933594, "learning_rate": 1.6586224066390042e-05, "loss": 1.184, "step": 10291 }, { "epoch": 8.541078838174274, "grad_norm": 15.979819297790527, "learning_rate": 1.6585892116182574e-05, "loss": 0.8128, "step": 10292 }, { "epoch": 8.541908713692946, "grad_norm": 78.95197296142578, "learning_rate": 1.6585560165975106e-05, "loss": 1.0423, "step": 10293 }, { "epoch": 8.542738589211618, "grad_norm": 20.767881393432617, "learning_rate": 1.658522821576764e-05, "loss": 1.288, "step": 10294 }, { "epoch": 8.54356846473029, "grad_norm": 19.437177658081055, "learning_rate": 1.6584896265560167e-05, "loss": 1.0878, "step": 10295 }, { "epoch": 8.544398340248962, "grad_norm": 16.305587768554688, "learning_rate": 1.65845643153527e-05, "loss": 0.7673, "step": 10296 }, { "epoch": 8.545228215767635, "grad_norm": 14.664990425109863, "learning_rate": 1.658423236514523e-05, "loss": 1.188, "step": 10297 }, { "epoch": 8.546058091286307, "grad_norm": 27.395668029785156, "learning_rate": 1.658390041493776e-05, "loss": 1.6926, "step": 10298 }, { "epoch": 8.546887966804979, "grad_norm": 30.533912658691406, "learning_rate": 1.6583568464730292e-05, "loss": 1.6932, "step": 10299 }, { "epoch": 8.547717842323651, "grad_norm": 23.760122299194336, "learning_rate": 1.658323651452282e-05, "loss": 1.5427, "step": 10300 }, { "epoch": 8.548547717842323, "grad_norm": 24.937318801879883, "learning_rate": 1.6582904564315353e-05, "loss": 1.0401, "step": 10301 }, { "epoch": 8.549377593360996, "grad_norm": 23.115100860595703, "learning_rate": 1.6582572614107885e-05, "loss": 1.0269, "step": 10302 }, { "epoch": 8.550207468879668, "grad_norm": 20.34756088256836, "learning_rate": 1.6582240663900414e-05, "loss": 1.0339, "step": 10303 }, { "epoch": 8.55103734439834, "grad_norm": 29.170347213745117, "learning_rate": 1.6581908713692946e-05, "loss": 1.9459, "step": 10304 }, { "epoch": 8.551867219917012, "grad_norm": 15.41974925994873, "learning_rate": 1.6581576763485478e-05, "loss": 0.8482, "step": 10305 }, { "epoch": 8.552697095435684, "grad_norm": 24.579912185668945, "learning_rate": 1.658124481327801e-05, "loss": 1.5928, "step": 10306 }, { "epoch": 8.553526970954357, "grad_norm": 20.0638484954834, "learning_rate": 1.6580912863070542e-05, "loss": 1.2548, "step": 10307 }, { "epoch": 8.554356846473029, "grad_norm": 29.975996017456055, "learning_rate": 1.658058091286307e-05, "loss": 1.202, "step": 10308 }, { "epoch": 8.555186721991701, "grad_norm": 16.828258514404297, "learning_rate": 1.6580248962655603e-05, "loss": 0.6568, "step": 10309 }, { "epoch": 8.556016597510373, "grad_norm": 16.93231201171875, "learning_rate": 1.6579917012448135e-05, "loss": 1.0896, "step": 10310 }, { "epoch": 8.556846473029045, "grad_norm": 36.08889389038086, "learning_rate": 1.6579585062240667e-05, "loss": 1.1572, "step": 10311 }, { "epoch": 8.557676348547718, "grad_norm": 16.53757095336914, "learning_rate": 1.6579253112033196e-05, "loss": 1.0654, "step": 10312 }, { "epoch": 8.55850622406639, "grad_norm": 16.073612213134766, "learning_rate": 1.6578921161825728e-05, "loss": 0.7617, "step": 10313 }, { "epoch": 8.559336099585062, "grad_norm": 30.5513916015625, "learning_rate": 1.657858921161826e-05, "loss": 2.2385, "step": 10314 }, { "epoch": 8.560165975103734, "grad_norm": 15.976744651794434, "learning_rate": 1.657825726141079e-05, "loss": 0.6465, "step": 10315 }, { "epoch": 8.560995850622406, "grad_norm": 18.847166061401367, "learning_rate": 1.657792531120332e-05, "loss": 1.1483, "step": 10316 }, { "epoch": 8.561825726141079, "grad_norm": 23.22795295715332, "learning_rate": 1.6577593360995853e-05, "loss": 0.8309, "step": 10317 }, { "epoch": 8.56265560165975, "grad_norm": 19.603641510009766, "learning_rate": 1.6577261410788382e-05, "loss": 0.5219, "step": 10318 }, { "epoch": 8.563485477178423, "grad_norm": 27.617523193359375, "learning_rate": 1.6576929460580914e-05, "loss": 1.5376, "step": 10319 }, { "epoch": 8.564315352697095, "grad_norm": 24.39755630493164, "learning_rate": 1.6576597510373446e-05, "loss": 1.198, "step": 10320 }, { "epoch": 8.565145228215767, "grad_norm": 42.88117980957031, "learning_rate": 1.6576265560165975e-05, "loss": 1.4817, "step": 10321 }, { "epoch": 8.56597510373444, "grad_norm": 21.191362380981445, "learning_rate": 1.6575933609958507e-05, "loss": 1.1728, "step": 10322 }, { "epoch": 8.566804979253112, "grad_norm": 19.001920700073242, "learning_rate": 1.657560165975104e-05, "loss": 0.9436, "step": 10323 }, { "epoch": 8.567634854771784, "grad_norm": 34.187679290771484, "learning_rate": 1.6575269709543568e-05, "loss": 0.8608, "step": 10324 }, { "epoch": 8.568464730290456, "grad_norm": 17.86722755432129, "learning_rate": 1.65749377593361e-05, "loss": 1.2569, "step": 10325 }, { "epoch": 8.569294605809128, "grad_norm": 35.15620422363281, "learning_rate": 1.6574605809128632e-05, "loss": 1.6516, "step": 10326 }, { "epoch": 8.5701244813278, "grad_norm": 19.727624893188477, "learning_rate": 1.6574273858921164e-05, "loss": 0.9392, "step": 10327 }, { "epoch": 8.570954356846473, "grad_norm": 24.9578914642334, "learning_rate": 1.6573941908713693e-05, "loss": 1.1803, "step": 10328 }, { "epoch": 8.571784232365145, "grad_norm": 51.289310455322266, "learning_rate": 1.6573609958506225e-05, "loss": 0.9133, "step": 10329 }, { "epoch": 8.572614107883817, "grad_norm": 20.37783432006836, "learning_rate": 1.6573278008298757e-05, "loss": 1.1184, "step": 10330 }, { "epoch": 8.57344398340249, "grad_norm": 21.504220962524414, "learning_rate": 1.657294605809129e-05, "loss": 0.8076, "step": 10331 }, { "epoch": 8.574273858921162, "grad_norm": 141.51417541503906, "learning_rate": 1.657261410788382e-05, "loss": 0.7866, "step": 10332 }, { "epoch": 8.575103734439834, "grad_norm": 16.470836639404297, "learning_rate": 1.657228215767635e-05, "loss": 0.7812, "step": 10333 }, { "epoch": 8.575933609958506, "grad_norm": 22.576929092407227, "learning_rate": 1.6571950207468882e-05, "loss": 1.167, "step": 10334 }, { "epoch": 8.576763485477178, "grad_norm": 41.36112594604492, "learning_rate": 1.6571618257261414e-05, "loss": 1.3635, "step": 10335 }, { "epoch": 8.57759336099585, "grad_norm": 32.642906188964844, "learning_rate": 1.6571286307053943e-05, "loss": 1.0365, "step": 10336 }, { "epoch": 8.578423236514523, "grad_norm": 21.46156883239746, "learning_rate": 1.6570954356846475e-05, "loss": 1.0429, "step": 10337 }, { "epoch": 8.579253112033195, "grad_norm": 29.222118377685547, "learning_rate": 1.6570622406639004e-05, "loss": 0.946, "step": 10338 }, { "epoch": 8.580082987551867, "grad_norm": 27.80172348022461, "learning_rate": 1.6570290456431536e-05, "loss": 0.7166, "step": 10339 }, { "epoch": 8.58091286307054, "grad_norm": 47.05173873901367, "learning_rate": 1.6569958506224068e-05, "loss": 1.4962, "step": 10340 }, { "epoch": 8.581742738589211, "grad_norm": 41.8704833984375, "learning_rate": 1.6569626556016597e-05, "loss": 0.7419, "step": 10341 }, { "epoch": 8.582572614107884, "grad_norm": 32.809288024902344, "learning_rate": 1.656929460580913e-05, "loss": 0.978, "step": 10342 }, { "epoch": 8.583402489626556, "grad_norm": 21.344858169555664, "learning_rate": 1.656896265560166e-05, "loss": 1.1864, "step": 10343 }, { "epoch": 8.584232365145228, "grad_norm": 15.924166679382324, "learning_rate": 1.6568630705394193e-05, "loss": 0.6463, "step": 10344 }, { "epoch": 8.5850622406639, "grad_norm": 19.747133255004883, "learning_rate": 1.6568298755186722e-05, "loss": 0.8887, "step": 10345 }, { "epoch": 8.585892116182572, "grad_norm": 20.893978118896484, "learning_rate": 1.6567966804979254e-05, "loss": 1.1611, "step": 10346 }, { "epoch": 8.586721991701245, "grad_norm": 24.137176513671875, "learning_rate": 1.6567634854771786e-05, "loss": 0.7565, "step": 10347 }, { "epoch": 8.587551867219917, "grad_norm": 20.10605812072754, "learning_rate": 1.6567302904564318e-05, "loss": 0.9812, "step": 10348 }, { "epoch": 8.588381742738589, "grad_norm": 17.959823608398438, "learning_rate": 1.6566970954356847e-05, "loss": 1.1365, "step": 10349 }, { "epoch": 8.589211618257261, "grad_norm": 27.89592742919922, "learning_rate": 1.656663900414938e-05, "loss": 0.6535, "step": 10350 }, { "epoch": 8.590041493775933, "grad_norm": 19.62351417541504, "learning_rate": 1.656630705394191e-05, "loss": 0.8996, "step": 10351 }, { "epoch": 8.590871369294605, "grad_norm": 22.822561264038086, "learning_rate": 1.6565975103734443e-05, "loss": 0.7377, "step": 10352 }, { "epoch": 8.591701244813278, "grad_norm": 27.64276695251465, "learning_rate": 1.6565643153526972e-05, "loss": 1.3095, "step": 10353 }, { "epoch": 8.59253112033195, "grad_norm": 15.89505386352539, "learning_rate": 1.6565311203319504e-05, "loss": 1.0103, "step": 10354 }, { "epoch": 8.593360995850622, "grad_norm": 19.098037719726562, "learning_rate": 1.6564979253112036e-05, "loss": 0.7112, "step": 10355 }, { "epoch": 8.594190871369294, "grad_norm": 18.8348445892334, "learning_rate": 1.6564647302904565e-05, "loss": 1.6032, "step": 10356 }, { "epoch": 8.595020746887966, "grad_norm": 29.38570785522461, "learning_rate": 1.6564315352697097e-05, "loss": 0.7044, "step": 10357 }, { "epoch": 8.595850622406639, "grad_norm": 18.61722183227539, "learning_rate": 1.656398340248963e-05, "loss": 0.9816, "step": 10358 }, { "epoch": 8.59668049792531, "grad_norm": 32.16459274291992, "learning_rate": 1.6563651452282158e-05, "loss": 1.2058, "step": 10359 }, { "epoch": 8.597510373443983, "grad_norm": 24.643802642822266, "learning_rate": 1.656331950207469e-05, "loss": 0.8268, "step": 10360 }, { "epoch": 8.598340248962655, "grad_norm": 27.798818588256836, "learning_rate": 1.6562987551867222e-05, "loss": 1.7554, "step": 10361 }, { "epoch": 8.599170124481327, "grad_norm": 18.109663009643555, "learning_rate": 1.656265560165975e-05, "loss": 0.7093, "step": 10362 }, { "epoch": 8.6, "grad_norm": 24.292617797851562, "learning_rate": 1.6562323651452283e-05, "loss": 1.1145, "step": 10363 }, { "epoch": 8.600829875518672, "grad_norm": 42.6205940246582, "learning_rate": 1.6561991701244815e-05, "loss": 1.1538, "step": 10364 }, { "epoch": 8.601659751037344, "grad_norm": 30.183420181274414, "learning_rate": 1.6561659751037347e-05, "loss": 2.0509, "step": 10365 }, { "epoch": 8.602489626556016, "grad_norm": 24.677867889404297, "learning_rate": 1.6561327800829876e-05, "loss": 1.6163, "step": 10366 }, { "epoch": 8.603319502074688, "grad_norm": 26.538393020629883, "learning_rate": 1.6560995850622408e-05, "loss": 0.8315, "step": 10367 }, { "epoch": 8.60414937759336, "grad_norm": 28.264366149902344, "learning_rate": 1.656066390041494e-05, "loss": 1.0339, "step": 10368 }, { "epoch": 8.604979253112033, "grad_norm": 18.20150375366211, "learning_rate": 1.6560331950207472e-05, "loss": 1.2059, "step": 10369 }, { "epoch": 8.605809128630705, "grad_norm": 28.93831443786621, "learning_rate": 1.656e-05, "loss": 1.8458, "step": 10370 }, { "epoch": 8.606639004149377, "grad_norm": 31.93326759338379, "learning_rate": 1.6559668049792533e-05, "loss": 1.7803, "step": 10371 }, { "epoch": 8.60746887966805, "grad_norm": 24.045339584350586, "learning_rate": 1.6559336099585065e-05, "loss": 1.324, "step": 10372 }, { "epoch": 8.608298755186722, "grad_norm": 19.65566635131836, "learning_rate": 1.6559004149377597e-05, "loss": 0.6406, "step": 10373 }, { "epoch": 8.609128630705394, "grad_norm": 26.874141693115234, "learning_rate": 1.6558672199170126e-05, "loss": 0.5295, "step": 10374 }, { "epoch": 8.609958506224066, "grad_norm": 24.230546951293945, "learning_rate": 1.6558340248962658e-05, "loss": 1.2868, "step": 10375 }, { "epoch": 8.610788381742738, "grad_norm": 12.172276496887207, "learning_rate": 1.6558008298755187e-05, "loss": 0.3784, "step": 10376 }, { "epoch": 8.61161825726141, "grad_norm": 24.468719482421875, "learning_rate": 1.655767634854772e-05, "loss": 1.0546, "step": 10377 }, { "epoch": 8.612448132780083, "grad_norm": 17.73157501220703, "learning_rate": 1.655734439834025e-05, "loss": 0.7712, "step": 10378 }, { "epoch": 8.613278008298755, "grad_norm": 18.35234832763672, "learning_rate": 1.655701244813278e-05, "loss": 0.6047, "step": 10379 }, { "epoch": 8.614107883817427, "grad_norm": 43.78858947753906, "learning_rate": 1.655668049792531e-05, "loss": 2.4687, "step": 10380 }, { "epoch": 8.6149377593361, "grad_norm": 21.57187271118164, "learning_rate": 1.6556348547717844e-05, "loss": 1.4072, "step": 10381 }, { "epoch": 8.615767634854771, "grad_norm": 38.071922302246094, "learning_rate": 1.6556016597510372e-05, "loss": 1.3032, "step": 10382 }, { "epoch": 8.616597510373444, "grad_norm": 34.55800247192383, "learning_rate": 1.6555684647302905e-05, "loss": 1.4348, "step": 10383 }, { "epoch": 8.617427385892116, "grad_norm": 19.050809860229492, "learning_rate": 1.6555352697095437e-05, "loss": 1.1747, "step": 10384 }, { "epoch": 8.618257261410788, "grad_norm": 23.235536575317383, "learning_rate": 1.655502074688797e-05, "loss": 1.2549, "step": 10385 }, { "epoch": 8.61908713692946, "grad_norm": 24.586854934692383, "learning_rate": 1.65546887966805e-05, "loss": 0.9234, "step": 10386 }, { "epoch": 8.619917012448132, "grad_norm": 18.731191635131836, "learning_rate": 1.655435684647303e-05, "loss": 1.2503, "step": 10387 }, { "epoch": 8.620746887966805, "grad_norm": 26.004045486450195, "learning_rate": 1.655402489626556e-05, "loss": 0.9293, "step": 10388 }, { "epoch": 8.621576763485477, "grad_norm": 32.74159240722656, "learning_rate": 1.6553692946058094e-05, "loss": 1.1869, "step": 10389 }, { "epoch": 8.622406639004149, "grad_norm": 20.568262100219727, "learning_rate": 1.6553360995850626e-05, "loss": 1.4486, "step": 10390 }, { "epoch": 8.623236514522821, "grad_norm": 23.197582244873047, "learning_rate": 1.6553029045643155e-05, "loss": 1.1924, "step": 10391 }, { "epoch": 8.624066390041493, "grad_norm": 15.380741119384766, "learning_rate": 1.6552697095435687e-05, "loss": 0.866, "step": 10392 }, { "epoch": 8.624896265560166, "grad_norm": 28.03728485107422, "learning_rate": 1.655236514522822e-05, "loss": 1.2985, "step": 10393 }, { "epoch": 8.625726141078838, "grad_norm": 21.829607009887695, "learning_rate": 1.6552033195020748e-05, "loss": 0.7215, "step": 10394 }, { "epoch": 8.62655601659751, "grad_norm": 19.977848052978516, "learning_rate": 1.655170124481328e-05, "loss": 1.27, "step": 10395 }, { "epoch": 8.627385892116182, "grad_norm": 32.4883918762207, "learning_rate": 1.6551369294605812e-05, "loss": 1.6264, "step": 10396 }, { "epoch": 8.628215767634854, "grad_norm": 34.83769226074219, "learning_rate": 1.655103734439834e-05, "loss": 1.2402, "step": 10397 }, { "epoch": 8.629045643153527, "grad_norm": 20.5194149017334, "learning_rate": 1.6550705394190873e-05, "loss": 1.1667, "step": 10398 }, { "epoch": 8.629875518672199, "grad_norm": 31.115785598754883, "learning_rate": 1.65503734439834e-05, "loss": 0.8587, "step": 10399 }, { "epoch": 8.630705394190871, "grad_norm": 31.075584411621094, "learning_rate": 1.6550041493775933e-05, "loss": 2.0281, "step": 10400 }, { "epoch": 8.631535269709543, "grad_norm": 26.744342803955078, "learning_rate": 1.6549709543568466e-05, "loss": 0.7516, "step": 10401 }, { "epoch": 8.632365145228215, "grad_norm": 24.79950714111328, "learning_rate": 1.6549377593360998e-05, "loss": 1.0976, "step": 10402 }, { "epoch": 8.633195020746887, "grad_norm": 20.5065860748291, "learning_rate": 1.6549045643153526e-05, "loss": 0.8555, "step": 10403 }, { "epoch": 8.63402489626556, "grad_norm": 23.98908805847168, "learning_rate": 1.654871369294606e-05, "loss": 0.9881, "step": 10404 }, { "epoch": 8.634854771784232, "grad_norm": 19.900192260742188, "learning_rate": 1.654838174273859e-05, "loss": 1.3263, "step": 10405 }, { "epoch": 8.635684647302904, "grad_norm": 22.30304527282715, "learning_rate": 1.6548049792531123e-05, "loss": 0.792, "step": 10406 }, { "epoch": 8.636514522821576, "grad_norm": 20.241111755371094, "learning_rate": 1.654771784232365e-05, "loss": 0.5369, "step": 10407 }, { "epoch": 8.637344398340248, "grad_norm": 23.775638580322266, "learning_rate": 1.6547385892116184e-05, "loss": 1.7945, "step": 10408 }, { "epoch": 8.63817427385892, "grad_norm": 21.454303741455078, "learning_rate": 1.6547053941908716e-05, "loss": 1.4586, "step": 10409 }, { "epoch": 8.639004149377593, "grad_norm": 33.8049201965332, "learning_rate": 1.6546721991701248e-05, "loss": 1.2195, "step": 10410 }, { "epoch": 8.639834024896265, "grad_norm": 29.77959632873535, "learning_rate": 1.654639004149378e-05, "loss": 1.6734, "step": 10411 }, { "epoch": 8.640663900414937, "grad_norm": 25.42279052734375, "learning_rate": 1.654605809128631e-05, "loss": 0.7871, "step": 10412 }, { "epoch": 8.64149377593361, "grad_norm": 14.57961654663086, "learning_rate": 1.654572614107884e-05, "loss": 1.1119, "step": 10413 }, { "epoch": 8.642323651452282, "grad_norm": 19.769166946411133, "learning_rate": 1.6545394190871373e-05, "loss": 1.1735, "step": 10414 }, { "epoch": 8.643153526970954, "grad_norm": 18.80046844482422, "learning_rate": 1.65450622406639e-05, "loss": 0.551, "step": 10415 }, { "epoch": 8.643983402489626, "grad_norm": 20.40193748474121, "learning_rate": 1.6544730290456434e-05, "loss": 1.2073, "step": 10416 }, { "epoch": 8.644813278008298, "grad_norm": 21.585346221923828, "learning_rate": 1.6544398340248962e-05, "loss": 1.2352, "step": 10417 }, { "epoch": 8.64564315352697, "grad_norm": 25.092254638671875, "learning_rate": 1.6544066390041494e-05, "loss": 0.8582, "step": 10418 }, { "epoch": 8.646473029045643, "grad_norm": 26.48040199279785, "learning_rate": 1.6543734439834027e-05, "loss": 0.9311, "step": 10419 }, { "epoch": 8.647302904564315, "grad_norm": 19.990646362304688, "learning_rate": 1.6543402489626555e-05, "loss": 1.2672, "step": 10420 }, { "epoch": 8.648132780082987, "grad_norm": 16.5595703125, "learning_rate": 1.6543070539419087e-05, "loss": 0.8675, "step": 10421 }, { "epoch": 8.64896265560166, "grad_norm": 20.215974807739258, "learning_rate": 1.654273858921162e-05, "loss": 1.1954, "step": 10422 }, { "epoch": 8.649792531120331, "grad_norm": 13.647250175476074, "learning_rate": 1.654240663900415e-05, "loss": 0.7624, "step": 10423 }, { "epoch": 8.650622406639004, "grad_norm": 17.99891471862793, "learning_rate": 1.654207468879668e-05, "loss": 1.0364, "step": 10424 }, { "epoch": 8.651452282157676, "grad_norm": 44.81045913696289, "learning_rate": 1.6541742738589212e-05, "loss": 1.5655, "step": 10425 }, { "epoch": 8.652282157676348, "grad_norm": 24.97833824157715, "learning_rate": 1.6541410788381745e-05, "loss": 1.0474, "step": 10426 }, { "epoch": 8.65311203319502, "grad_norm": 23.673946380615234, "learning_rate": 1.6541078838174277e-05, "loss": 1.4649, "step": 10427 }, { "epoch": 8.653941908713692, "grad_norm": 29.51765251159668, "learning_rate": 1.6540746887966805e-05, "loss": 1.302, "step": 10428 }, { "epoch": 8.654771784232365, "grad_norm": 47.014862060546875, "learning_rate": 1.6540414937759337e-05, "loss": 1.9494, "step": 10429 }, { "epoch": 8.655601659751037, "grad_norm": 31.88035011291504, "learning_rate": 1.654008298755187e-05, "loss": 1.2047, "step": 10430 }, { "epoch": 8.656431535269709, "grad_norm": 68.30313873291016, "learning_rate": 1.65397510373444e-05, "loss": 0.9131, "step": 10431 }, { "epoch": 8.657261410788381, "grad_norm": 50.12113952636719, "learning_rate": 1.653941908713693e-05, "loss": 1.0521, "step": 10432 }, { "epoch": 8.658091286307053, "grad_norm": 21.23947525024414, "learning_rate": 1.6539087136929462e-05, "loss": 0.7066, "step": 10433 }, { "epoch": 8.658921161825726, "grad_norm": 17.285188674926758, "learning_rate": 1.6538755186721995e-05, "loss": 0.9198, "step": 10434 }, { "epoch": 8.659751037344398, "grad_norm": 14.435378074645996, "learning_rate": 1.6538423236514523e-05, "loss": 0.5561, "step": 10435 }, { "epoch": 8.66058091286307, "grad_norm": 27.84634780883789, "learning_rate": 1.6538091286307055e-05, "loss": 1.3795, "step": 10436 }, { "epoch": 8.661410788381742, "grad_norm": 30.953460693359375, "learning_rate": 1.6537759336099588e-05, "loss": 0.9468, "step": 10437 }, { "epoch": 8.662240663900414, "grad_norm": 32.23552322387695, "learning_rate": 1.6537427385892116e-05, "loss": 1.0231, "step": 10438 }, { "epoch": 8.663070539419087, "grad_norm": 32.8519172668457, "learning_rate": 1.653709543568465e-05, "loss": 0.6755, "step": 10439 }, { "epoch": 8.663900414937759, "grad_norm": 18.189565658569336, "learning_rate": 1.653676348547718e-05, "loss": 1.1855, "step": 10440 }, { "epoch": 8.664730290456431, "grad_norm": 38.17177963256836, "learning_rate": 1.653643153526971e-05, "loss": 1.3457, "step": 10441 }, { "epoch": 8.665560165975103, "grad_norm": 23.71241569519043, "learning_rate": 1.653609958506224e-05, "loss": 1.4699, "step": 10442 }, { "epoch": 8.666390041493775, "grad_norm": 42.44270324707031, "learning_rate": 1.6535767634854773e-05, "loss": 1.3935, "step": 10443 }, { "epoch": 8.667219917012448, "grad_norm": 33.88222885131836, "learning_rate": 1.6535435684647306e-05, "loss": 1.6917, "step": 10444 }, { "epoch": 8.66804979253112, "grad_norm": 37.712791442871094, "learning_rate": 1.6535103734439834e-05, "loss": 1.5696, "step": 10445 }, { "epoch": 8.668879668049792, "grad_norm": 21.512014389038086, "learning_rate": 1.6534771784232366e-05, "loss": 0.7194, "step": 10446 }, { "epoch": 8.669709543568464, "grad_norm": 21.67876434326172, "learning_rate": 1.65344398340249e-05, "loss": 0.6541, "step": 10447 }, { "epoch": 8.670539419087136, "grad_norm": 24.527673721313477, "learning_rate": 1.653410788381743e-05, "loss": 1.2397, "step": 10448 }, { "epoch": 8.671369294605809, "grad_norm": 18.53772735595703, "learning_rate": 1.653377593360996e-05, "loss": 0.8384, "step": 10449 }, { "epoch": 8.67219917012448, "grad_norm": 20.360916137695312, "learning_rate": 1.653344398340249e-05, "loss": 0.897, "step": 10450 }, { "epoch": 8.673029045643153, "grad_norm": 52.75148391723633, "learning_rate": 1.6533112033195023e-05, "loss": 1.3924, "step": 10451 }, { "epoch": 8.673858921161825, "grad_norm": 25.06912612915039, "learning_rate": 1.6532780082987556e-05, "loss": 1.2027, "step": 10452 }, { "epoch": 8.674688796680497, "grad_norm": 22.973224639892578, "learning_rate": 1.6532448132780084e-05, "loss": 1.5717, "step": 10453 }, { "epoch": 8.67551867219917, "grad_norm": 24.4836368560791, "learning_rate": 1.6532116182572616e-05, "loss": 1.0211, "step": 10454 }, { "epoch": 8.676348547717842, "grad_norm": 38.435665130615234, "learning_rate": 1.6531784232365145e-05, "loss": 1.4733, "step": 10455 }, { "epoch": 8.677178423236514, "grad_norm": 40.163997650146484, "learning_rate": 1.6531452282157677e-05, "loss": 2.1247, "step": 10456 }, { "epoch": 8.678008298755186, "grad_norm": 17.20093536376953, "learning_rate": 1.653112033195021e-05, "loss": 0.8759, "step": 10457 }, { "epoch": 8.678838174273858, "grad_norm": 24.568466186523438, "learning_rate": 1.6530788381742738e-05, "loss": 1.0787, "step": 10458 }, { "epoch": 8.67966804979253, "grad_norm": 19.888511657714844, "learning_rate": 1.653045643153527e-05, "loss": 1.193, "step": 10459 }, { "epoch": 8.680497925311203, "grad_norm": 37.376747131347656, "learning_rate": 1.6530124481327802e-05, "loss": 1.1439, "step": 10460 }, { "epoch": 8.681327800829875, "grad_norm": 23.678234100341797, "learning_rate": 1.652979253112033e-05, "loss": 1.2683, "step": 10461 }, { "epoch": 8.682157676348547, "grad_norm": 30.167219161987305, "learning_rate": 1.6529460580912863e-05, "loss": 0.9899, "step": 10462 }, { "epoch": 8.68298755186722, "grad_norm": 28.189775466918945, "learning_rate": 1.6529128630705395e-05, "loss": 1.2992, "step": 10463 }, { "epoch": 8.683817427385891, "grad_norm": 32.46502685546875, "learning_rate": 1.6528796680497927e-05, "loss": 1.1633, "step": 10464 }, { "epoch": 8.684647302904564, "grad_norm": 21.628198623657227, "learning_rate": 1.652846473029046e-05, "loss": 1.1584, "step": 10465 }, { "epoch": 8.685477178423236, "grad_norm": 18.55147933959961, "learning_rate": 1.6528132780082988e-05, "loss": 0.8539, "step": 10466 }, { "epoch": 8.686307053941908, "grad_norm": 24.965167999267578, "learning_rate": 1.652780082987552e-05, "loss": 1.2562, "step": 10467 }, { "epoch": 8.68713692946058, "grad_norm": 35.46393585205078, "learning_rate": 1.6527468879668052e-05, "loss": 1.7759, "step": 10468 }, { "epoch": 8.687966804979252, "grad_norm": 25.25603675842285, "learning_rate": 1.6527136929460584e-05, "loss": 1.3944, "step": 10469 }, { "epoch": 8.688796680497925, "grad_norm": 17.810150146484375, "learning_rate": 1.6526804979253113e-05, "loss": 1.2705, "step": 10470 }, { "epoch": 8.689626556016597, "grad_norm": 43.73033142089844, "learning_rate": 1.6526473029045645e-05, "loss": 1.0136, "step": 10471 }, { "epoch": 8.690456431535269, "grad_norm": 61.823524475097656, "learning_rate": 1.6526141078838177e-05, "loss": 1.09, "step": 10472 }, { "epoch": 8.691286307053941, "grad_norm": 16.215057373046875, "learning_rate": 1.6525809128630706e-05, "loss": 0.6471, "step": 10473 }, { "epoch": 8.692116182572613, "grad_norm": 18.15584945678711, "learning_rate": 1.6525477178423238e-05, "loss": 0.9175, "step": 10474 }, { "epoch": 8.692946058091286, "grad_norm": 23.330656051635742, "learning_rate": 1.652514522821577e-05, "loss": 1.141, "step": 10475 }, { "epoch": 8.693775933609958, "grad_norm": 17.54972267150879, "learning_rate": 1.65248132780083e-05, "loss": 0.753, "step": 10476 }, { "epoch": 8.69460580912863, "grad_norm": 19.043001174926758, "learning_rate": 1.652448132780083e-05, "loss": 0.6062, "step": 10477 }, { "epoch": 8.695435684647302, "grad_norm": 29.79399299621582, "learning_rate": 1.652414937759336e-05, "loss": 1.645, "step": 10478 }, { "epoch": 8.696265560165974, "grad_norm": 19.94722557067871, "learning_rate": 1.6523817427385892e-05, "loss": 0.7628, "step": 10479 }, { "epoch": 8.697095435684647, "grad_norm": 20.59109115600586, "learning_rate": 1.6523485477178424e-05, "loss": 0.7845, "step": 10480 }, { "epoch": 8.697925311203319, "grad_norm": 16.92605972290039, "learning_rate": 1.6523153526970956e-05, "loss": 0.9127, "step": 10481 }, { "epoch": 8.698755186721991, "grad_norm": 39.08035659790039, "learning_rate": 1.6522821576763485e-05, "loss": 0.5962, "step": 10482 }, { "epoch": 8.699585062240663, "grad_norm": 35.43196487426758, "learning_rate": 1.6522489626556017e-05, "loss": 1.0749, "step": 10483 }, { "epoch": 8.700414937759335, "grad_norm": 58.936954498291016, "learning_rate": 1.652215767634855e-05, "loss": 0.8563, "step": 10484 }, { "epoch": 8.701244813278008, "grad_norm": 25.6299991607666, "learning_rate": 1.652182572614108e-05, "loss": 0.7818, "step": 10485 }, { "epoch": 8.70207468879668, "grad_norm": 26.13521957397461, "learning_rate": 1.652149377593361e-05, "loss": 2.3805, "step": 10486 }, { "epoch": 8.702904564315352, "grad_norm": 33.04170227050781, "learning_rate": 1.6521161825726142e-05, "loss": 1.2455, "step": 10487 }, { "epoch": 8.703734439834024, "grad_norm": 22.614517211914062, "learning_rate": 1.6520829875518674e-05, "loss": 0.8804, "step": 10488 }, { "epoch": 8.704564315352696, "grad_norm": 23.310199737548828, "learning_rate": 1.6520497925311206e-05, "loss": 1.1338, "step": 10489 }, { "epoch": 8.705394190871369, "grad_norm": 17.548946380615234, "learning_rate": 1.652016597510374e-05, "loss": 0.7581, "step": 10490 }, { "epoch": 8.70622406639004, "grad_norm": 14.682740211486816, "learning_rate": 1.6519834024896267e-05, "loss": 0.6872, "step": 10491 }, { "epoch": 8.707053941908713, "grad_norm": 35.788204193115234, "learning_rate": 1.65195020746888e-05, "loss": 2.0758, "step": 10492 }, { "epoch": 8.707883817427385, "grad_norm": 15.975248336791992, "learning_rate": 1.6519170124481328e-05, "loss": 0.6944, "step": 10493 }, { "epoch": 8.708713692946057, "grad_norm": 18.62581443786621, "learning_rate": 1.651883817427386e-05, "loss": 0.6648, "step": 10494 }, { "epoch": 8.70954356846473, "grad_norm": 21.428937911987305, "learning_rate": 1.6518506224066392e-05, "loss": 0.7621, "step": 10495 }, { "epoch": 8.710373443983402, "grad_norm": 24.250394821166992, "learning_rate": 1.651817427385892e-05, "loss": 1.2479, "step": 10496 }, { "epoch": 8.711203319502074, "grad_norm": 48.204715728759766, "learning_rate": 1.6517842323651453e-05, "loss": 1.2829, "step": 10497 }, { "epoch": 8.712033195020746, "grad_norm": 22.650394439697266, "learning_rate": 1.6517510373443985e-05, "loss": 1.3175, "step": 10498 }, { "epoch": 8.712863070539418, "grad_norm": 35.27265167236328, "learning_rate": 1.6517178423236514e-05, "loss": 0.765, "step": 10499 }, { "epoch": 8.71369294605809, "grad_norm": 13.741789817810059, "learning_rate": 1.6516846473029046e-05, "loss": 0.5642, "step": 10500 }, { "epoch": 8.714522821576763, "grad_norm": 31.463563919067383, "learning_rate": 1.6516514522821578e-05, "loss": 0.4091, "step": 10501 }, { "epoch": 8.715352697095435, "grad_norm": 34.99537658691406, "learning_rate": 1.651618257261411e-05, "loss": 1.3751, "step": 10502 }, { "epoch": 8.716182572614107, "grad_norm": 34.92210006713867, "learning_rate": 1.651585062240664e-05, "loss": 0.4596, "step": 10503 }, { "epoch": 8.71701244813278, "grad_norm": 18.409753799438477, "learning_rate": 1.651551867219917e-05, "loss": 0.8647, "step": 10504 }, { "epoch": 8.717842323651452, "grad_norm": 28.0291748046875, "learning_rate": 1.6515186721991703e-05, "loss": 2.2238, "step": 10505 }, { "epoch": 8.718672199170124, "grad_norm": 21.659496307373047, "learning_rate": 1.6514854771784235e-05, "loss": 0.8752, "step": 10506 }, { "epoch": 8.719502074688796, "grad_norm": 19.807323455810547, "learning_rate": 1.6514522821576764e-05, "loss": 1.1411, "step": 10507 }, { "epoch": 8.720331950207468, "grad_norm": 19.427562713623047, "learning_rate": 1.6514190871369296e-05, "loss": 1.1806, "step": 10508 }, { "epoch": 8.72116182572614, "grad_norm": 31.532981872558594, "learning_rate": 1.6513858921161828e-05, "loss": 1.9028, "step": 10509 }, { "epoch": 8.721991701244812, "grad_norm": 13.346514701843262, "learning_rate": 1.651352697095436e-05, "loss": 0.6618, "step": 10510 }, { "epoch": 8.722821576763485, "grad_norm": 25.987823486328125, "learning_rate": 1.651319502074689e-05, "loss": 0.8385, "step": 10511 }, { "epoch": 8.723651452282157, "grad_norm": 28.227283477783203, "learning_rate": 1.651286307053942e-05, "loss": 1.5354, "step": 10512 }, { "epoch": 8.724481327800829, "grad_norm": 27.469436645507812, "learning_rate": 1.6512531120331953e-05, "loss": 1.6353, "step": 10513 }, { "epoch": 8.725311203319501, "grad_norm": 16.21998405456543, "learning_rate": 1.6512199170124482e-05, "loss": 1.073, "step": 10514 }, { "epoch": 8.726141078838173, "grad_norm": 26.20847511291504, "learning_rate": 1.6511867219917014e-05, "loss": 1.488, "step": 10515 }, { "epoch": 8.726970954356846, "grad_norm": 40.256370544433594, "learning_rate": 1.6511535269709543e-05, "loss": 1.8682, "step": 10516 }, { "epoch": 8.727800829875518, "grad_norm": 29.81745719909668, "learning_rate": 1.6511203319502075e-05, "loss": 1.6049, "step": 10517 }, { "epoch": 8.72863070539419, "grad_norm": 31.368343353271484, "learning_rate": 1.6510871369294607e-05, "loss": 1.467, "step": 10518 }, { "epoch": 8.729460580912862, "grad_norm": 26.6798152923584, "learning_rate": 1.651053941908714e-05, "loss": 1.1037, "step": 10519 }, { "epoch": 8.730290456431534, "grad_norm": 26.295820236206055, "learning_rate": 1.6510207468879668e-05, "loss": 1.1818, "step": 10520 }, { "epoch": 8.731120331950207, "grad_norm": 34.042701721191406, "learning_rate": 1.65098755186722e-05, "loss": 1.1336, "step": 10521 }, { "epoch": 8.731950207468879, "grad_norm": 18.64392852783203, "learning_rate": 1.6509543568464732e-05, "loss": 0.9, "step": 10522 }, { "epoch": 8.732780082987551, "grad_norm": 15.675250053405762, "learning_rate": 1.6509211618257264e-05, "loss": 0.7624, "step": 10523 }, { "epoch": 8.733609958506223, "grad_norm": 22.935070037841797, "learning_rate": 1.6508879668049793e-05, "loss": 0.8276, "step": 10524 }, { "epoch": 8.734439834024897, "grad_norm": 23.228404998779297, "learning_rate": 1.6508547717842325e-05, "loss": 1.3168, "step": 10525 }, { "epoch": 8.73526970954357, "grad_norm": 24.31768035888672, "learning_rate": 1.6508215767634857e-05, "loss": 1.1063, "step": 10526 }, { "epoch": 8.736099585062242, "grad_norm": 21.361331939697266, "learning_rate": 1.650788381742739e-05, "loss": 1.3236, "step": 10527 }, { "epoch": 8.736929460580914, "grad_norm": 24.271902084350586, "learning_rate": 1.6507551867219918e-05, "loss": 1.0511, "step": 10528 }, { "epoch": 8.737759336099586, "grad_norm": 24.72510528564453, "learning_rate": 1.650721991701245e-05, "loss": 1.3008, "step": 10529 }, { "epoch": 8.738589211618258, "grad_norm": 23.018657684326172, "learning_rate": 1.6506887966804982e-05, "loss": 1.0793, "step": 10530 }, { "epoch": 8.73941908713693, "grad_norm": 15.163212776184082, "learning_rate": 1.6506556016597514e-05, "loss": 0.3972, "step": 10531 }, { "epoch": 8.740248962655603, "grad_norm": 23.26821517944336, "learning_rate": 1.6506224066390043e-05, "loss": 0.5888, "step": 10532 }, { "epoch": 8.741078838174275, "grad_norm": 16.76658058166504, "learning_rate": 1.6505892116182575e-05, "loss": 0.9451, "step": 10533 }, { "epoch": 8.741908713692947, "grad_norm": 13.173227310180664, "learning_rate": 1.6505560165975104e-05, "loss": 0.8137, "step": 10534 }, { "epoch": 8.74273858921162, "grad_norm": 35.83478546142578, "learning_rate": 1.6505228215767636e-05, "loss": 1.1887, "step": 10535 }, { "epoch": 8.743568464730291, "grad_norm": 15.060298919677734, "learning_rate": 1.6504896265560168e-05, "loss": 0.9833, "step": 10536 }, { "epoch": 8.744398340248964, "grad_norm": 29.58378028869629, "learning_rate": 1.6504564315352697e-05, "loss": 1.6482, "step": 10537 }, { "epoch": 8.745228215767636, "grad_norm": 31.89873695373535, "learning_rate": 1.650423236514523e-05, "loss": 1.4667, "step": 10538 }, { "epoch": 8.746058091286308, "grad_norm": 31.514963150024414, "learning_rate": 1.650390041493776e-05, "loss": 1.1569, "step": 10539 }, { "epoch": 8.74688796680498, "grad_norm": 35.35123062133789, "learning_rate": 1.650356846473029e-05, "loss": 1.8077, "step": 10540 }, { "epoch": 8.747717842323652, "grad_norm": 35.55080032348633, "learning_rate": 1.650323651452282e-05, "loss": 0.894, "step": 10541 }, { "epoch": 8.748547717842325, "grad_norm": 27.33176040649414, "learning_rate": 1.6502904564315354e-05, "loss": 1.3675, "step": 10542 }, { "epoch": 8.749377593360997, "grad_norm": 15.861701965332031, "learning_rate": 1.6502572614107886e-05, "loss": 0.6715, "step": 10543 }, { "epoch": 8.750207468879669, "grad_norm": 23.943988800048828, "learning_rate": 1.6502240663900418e-05, "loss": 1.8743, "step": 10544 }, { "epoch": 8.751037344398341, "grad_norm": 25.903806686401367, "learning_rate": 1.6501908713692947e-05, "loss": 1.2433, "step": 10545 }, { "epoch": 8.751867219917013, "grad_norm": 15.659444808959961, "learning_rate": 1.650157676348548e-05, "loss": 0.7929, "step": 10546 }, { "epoch": 8.752697095435686, "grad_norm": 16.66707420349121, "learning_rate": 1.650124481327801e-05, "loss": 0.8672, "step": 10547 }, { "epoch": 8.753526970954358, "grad_norm": 18.79715347290039, "learning_rate": 1.6500912863070543e-05, "loss": 0.5912, "step": 10548 }, { "epoch": 8.75435684647303, "grad_norm": 24.222999572753906, "learning_rate": 1.6500580912863072e-05, "loss": 1.5005, "step": 10549 }, { "epoch": 8.755186721991702, "grad_norm": 21.956279754638672, "learning_rate": 1.6500248962655604e-05, "loss": 0.6656, "step": 10550 }, { "epoch": 8.756016597510374, "grad_norm": 25.017868041992188, "learning_rate": 1.6499917012448136e-05, "loss": 0.8231, "step": 10551 }, { "epoch": 8.756846473029047, "grad_norm": 22.949024200439453, "learning_rate": 1.6499585062240665e-05, "loss": 1.2574, "step": 10552 }, { "epoch": 8.757676348547719, "grad_norm": 40.44547653198242, "learning_rate": 1.6499253112033197e-05, "loss": 0.7686, "step": 10553 }, { "epoch": 8.758506224066391, "grad_norm": 16.29043960571289, "learning_rate": 1.649892116182573e-05, "loss": 1.2398, "step": 10554 }, { "epoch": 8.759336099585063, "grad_norm": 30.751066207885742, "learning_rate": 1.6498589211618258e-05, "loss": 1.2732, "step": 10555 }, { "epoch": 8.760165975103735, "grad_norm": 25.574953079223633, "learning_rate": 1.649825726141079e-05, "loss": 1.1975, "step": 10556 }, { "epoch": 8.760995850622407, "grad_norm": 38.71738815307617, "learning_rate": 1.649792531120332e-05, "loss": 2.3825, "step": 10557 }, { "epoch": 8.76182572614108, "grad_norm": 19.38192367553711, "learning_rate": 1.649759336099585e-05, "loss": 1.2461, "step": 10558 }, { "epoch": 8.762655601659752, "grad_norm": 26.096410751342773, "learning_rate": 1.6497261410788383e-05, "loss": 1.125, "step": 10559 }, { "epoch": 8.763485477178424, "grad_norm": 34.427452087402344, "learning_rate": 1.6496929460580915e-05, "loss": 0.8655, "step": 10560 }, { "epoch": 8.764315352697096, "grad_norm": 19.204301834106445, "learning_rate": 1.6496597510373444e-05, "loss": 1.0646, "step": 10561 }, { "epoch": 8.765145228215768, "grad_norm": 28.094009399414062, "learning_rate": 1.6496265560165976e-05, "loss": 0.8923, "step": 10562 }, { "epoch": 8.76597510373444, "grad_norm": 19.095630645751953, "learning_rate": 1.6495933609958508e-05, "loss": 1.2464, "step": 10563 }, { "epoch": 8.766804979253113, "grad_norm": 18.8339900970459, "learning_rate": 1.649560165975104e-05, "loss": 0.5465, "step": 10564 }, { "epoch": 8.767634854771785, "grad_norm": 31.91010093688965, "learning_rate": 1.649526970954357e-05, "loss": 1.3336, "step": 10565 }, { "epoch": 8.768464730290457, "grad_norm": 21.148334503173828, "learning_rate": 1.64949377593361e-05, "loss": 1.3014, "step": 10566 }, { "epoch": 8.76929460580913, "grad_norm": 39.218841552734375, "learning_rate": 1.6494605809128633e-05, "loss": 1.2407, "step": 10567 }, { "epoch": 8.770124481327802, "grad_norm": 32.965782165527344, "learning_rate": 1.6494273858921165e-05, "loss": 1.6675, "step": 10568 }, { "epoch": 8.770954356846474, "grad_norm": 27.253835678100586, "learning_rate": 1.6493941908713697e-05, "loss": 0.8019, "step": 10569 }, { "epoch": 8.771784232365146, "grad_norm": 31.184326171875, "learning_rate": 1.6493609958506226e-05, "loss": 1.6085, "step": 10570 }, { "epoch": 8.772614107883818, "grad_norm": 20.790430068969727, "learning_rate": 1.6493278008298758e-05, "loss": 1.0057, "step": 10571 }, { "epoch": 8.77344398340249, "grad_norm": 36.61862564086914, "learning_rate": 1.6492946058091287e-05, "loss": 1.3067, "step": 10572 }, { "epoch": 8.774273858921163, "grad_norm": 17.110551834106445, "learning_rate": 1.649261410788382e-05, "loss": 0.6328, "step": 10573 }, { "epoch": 8.775103734439835, "grad_norm": 32.742218017578125, "learning_rate": 1.649228215767635e-05, "loss": 1.5784, "step": 10574 }, { "epoch": 8.775933609958507, "grad_norm": 20.324602127075195, "learning_rate": 1.649195020746888e-05, "loss": 1.0808, "step": 10575 }, { "epoch": 8.77676348547718, "grad_norm": 23.180076599121094, "learning_rate": 1.649161825726141e-05, "loss": 1.0271, "step": 10576 }, { "epoch": 8.777593360995851, "grad_norm": 21.678632736206055, "learning_rate": 1.6491286307053944e-05, "loss": 0.7323, "step": 10577 }, { "epoch": 8.778423236514524, "grad_norm": 25.300952911376953, "learning_rate": 1.6490954356846472e-05, "loss": 1.2343, "step": 10578 }, { "epoch": 8.779253112033196, "grad_norm": 28.40607452392578, "learning_rate": 1.6490622406639005e-05, "loss": 0.7629, "step": 10579 }, { "epoch": 8.780082987551868, "grad_norm": 15.924952507019043, "learning_rate": 1.6490290456431537e-05, "loss": 0.6851, "step": 10580 }, { "epoch": 8.78091286307054, "grad_norm": 22.599258422851562, "learning_rate": 1.648995850622407e-05, "loss": 1.1187, "step": 10581 }, { "epoch": 8.781742738589212, "grad_norm": 25.08907699584961, "learning_rate": 1.6489626556016597e-05, "loss": 1.0994, "step": 10582 }, { "epoch": 8.782572614107885, "grad_norm": 26.58446502685547, "learning_rate": 1.648929460580913e-05, "loss": 1.3079, "step": 10583 }, { "epoch": 8.783402489626557, "grad_norm": 34.93019104003906, "learning_rate": 1.648896265560166e-05, "loss": 1.3039, "step": 10584 }, { "epoch": 8.784232365145229, "grad_norm": 18.275300979614258, "learning_rate": 1.6488630705394194e-05, "loss": 1.101, "step": 10585 }, { "epoch": 8.785062240663901, "grad_norm": 35.198612213134766, "learning_rate": 1.6488298755186722e-05, "loss": 1.1702, "step": 10586 }, { "epoch": 8.785892116182573, "grad_norm": 12.491680145263672, "learning_rate": 1.6487966804979255e-05, "loss": 0.6379, "step": 10587 }, { "epoch": 8.786721991701246, "grad_norm": 14.100452423095703, "learning_rate": 1.6487634854771787e-05, "loss": 0.5848, "step": 10588 }, { "epoch": 8.787551867219918, "grad_norm": 33.47761535644531, "learning_rate": 1.648730290456432e-05, "loss": 1.7624, "step": 10589 }, { "epoch": 8.78838174273859, "grad_norm": 28.266225814819336, "learning_rate": 1.6486970954356848e-05, "loss": 1.4386, "step": 10590 }, { "epoch": 8.789211618257262, "grad_norm": 23.91045379638672, "learning_rate": 1.648663900414938e-05, "loss": 1.4003, "step": 10591 }, { "epoch": 8.790041493775934, "grad_norm": 23.95718765258789, "learning_rate": 1.6486307053941912e-05, "loss": 1.2532, "step": 10592 }, { "epoch": 8.790871369294607, "grad_norm": 19.32851791381836, "learning_rate": 1.648597510373444e-05, "loss": 1.1551, "step": 10593 }, { "epoch": 8.791701244813279, "grad_norm": 43.850399017333984, "learning_rate": 1.6485643153526973e-05, "loss": 1.8147, "step": 10594 }, { "epoch": 8.792531120331951, "grad_norm": 30.79975700378418, "learning_rate": 1.64853112033195e-05, "loss": 1.0678, "step": 10595 }, { "epoch": 8.793360995850623, "grad_norm": 21.1837215423584, "learning_rate": 1.6484979253112033e-05, "loss": 0.9393, "step": 10596 }, { "epoch": 8.794190871369295, "grad_norm": 12.926735877990723, "learning_rate": 1.6484647302904566e-05, "loss": 0.6011, "step": 10597 }, { "epoch": 8.795020746887968, "grad_norm": 15.946846008300781, "learning_rate": 1.6484315352697098e-05, "loss": 0.6573, "step": 10598 }, { "epoch": 8.79585062240664, "grad_norm": 22.568370819091797, "learning_rate": 1.6483983402489626e-05, "loss": 1.0121, "step": 10599 }, { "epoch": 8.796680497925312, "grad_norm": 21.49394989013672, "learning_rate": 1.648365145228216e-05, "loss": 1.357, "step": 10600 }, { "epoch": 8.797510373443984, "grad_norm": 19.229286193847656, "learning_rate": 1.648331950207469e-05, "loss": 0.7998, "step": 10601 }, { "epoch": 8.798340248962656, "grad_norm": 26.78708839416504, "learning_rate": 1.6482987551867223e-05, "loss": 1.6287, "step": 10602 }, { "epoch": 8.799170124481329, "grad_norm": 23.43000602722168, "learning_rate": 1.648265560165975e-05, "loss": 0.8011, "step": 10603 }, { "epoch": 8.8, "grad_norm": 26.180034637451172, "learning_rate": 1.6482323651452283e-05, "loss": 1.0049, "step": 10604 }, { "epoch": 8.800829875518673, "grad_norm": 20.50205421447754, "learning_rate": 1.6481991701244816e-05, "loss": 1.1635, "step": 10605 }, { "epoch": 8.801659751037345, "grad_norm": 20.102149963378906, "learning_rate": 1.6481659751037348e-05, "loss": 0.8916, "step": 10606 }, { "epoch": 8.802489626556017, "grad_norm": 16.6002140045166, "learning_rate": 1.6481327800829876e-05, "loss": 1.1843, "step": 10607 }, { "epoch": 8.80331950207469, "grad_norm": 39.01348876953125, "learning_rate": 1.648099585062241e-05, "loss": 1.06, "step": 10608 }, { "epoch": 8.804149377593362, "grad_norm": 24.971094131469727, "learning_rate": 1.648066390041494e-05, "loss": 1.5267, "step": 10609 }, { "epoch": 8.804979253112034, "grad_norm": 30.00849151611328, "learning_rate": 1.648033195020747e-05, "loss": 1.0213, "step": 10610 }, { "epoch": 8.805809128630706, "grad_norm": 21.231863021850586, "learning_rate": 1.648e-05, "loss": 0.9019, "step": 10611 }, { "epoch": 8.806639004149378, "grad_norm": 33.991905212402344, "learning_rate": 1.6479668049792534e-05, "loss": 1.6355, "step": 10612 }, { "epoch": 8.80746887966805, "grad_norm": 25.787668228149414, "learning_rate": 1.6479336099585062e-05, "loss": 1.1555, "step": 10613 }, { "epoch": 8.808298755186723, "grad_norm": 31.8513240814209, "learning_rate": 1.6479004149377594e-05, "loss": 1.5916, "step": 10614 }, { "epoch": 8.809128630705395, "grad_norm": 37.86273193359375, "learning_rate": 1.6478672199170126e-05, "loss": 1.4642, "step": 10615 }, { "epoch": 8.809958506224067, "grad_norm": 26.533384323120117, "learning_rate": 1.6478340248962655e-05, "loss": 1.5583, "step": 10616 }, { "epoch": 8.81078838174274, "grad_norm": 19.283246994018555, "learning_rate": 1.6478008298755187e-05, "loss": 1.4498, "step": 10617 }, { "epoch": 8.811618257261411, "grad_norm": 17.915786743164062, "learning_rate": 1.647767634854772e-05, "loss": 0.9032, "step": 10618 }, { "epoch": 8.812448132780084, "grad_norm": 22.589189529418945, "learning_rate": 1.6477344398340248e-05, "loss": 0.7526, "step": 10619 }, { "epoch": 8.813278008298756, "grad_norm": 23.64438819885254, "learning_rate": 1.647701244813278e-05, "loss": 1.0868, "step": 10620 }, { "epoch": 8.814107883817428, "grad_norm": 25.69879150390625, "learning_rate": 1.6476680497925312e-05, "loss": 1.6454, "step": 10621 }, { "epoch": 8.8149377593361, "grad_norm": 24.55695152282715, "learning_rate": 1.6476348547717844e-05, "loss": 1.0653, "step": 10622 }, { "epoch": 8.815767634854772, "grad_norm": 29.052764892578125, "learning_rate": 1.6476016597510377e-05, "loss": 1.2778, "step": 10623 }, { "epoch": 8.816597510373445, "grad_norm": 18.02433204650879, "learning_rate": 1.6475684647302905e-05, "loss": 1.0216, "step": 10624 }, { "epoch": 8.817427385892117, "grad_norm": 24.613618850708008, "learning_rate": 1.6475352697095437e-05, "loss": 1.5508, "step": 10625 }, { "epoch": 8.818257261410789, "grad_norm": 70.41494750976562, "learning_rate": 1.647502074688797e-05, "loss": 0.8628, "step": 10626 }, { "epoch": 8.819087136929461, "grad_norm": 14.968902587890625, "learning_rate": 1.64746887966805e-05, "loss": 0.8672, "step": 10627 }, { "epoch": 8.819917012448133, "grad_norm": 36.715248107910156, "learning_rate": 1.647435684647303e-05, "loss": 0.8976, "step": 10628 }, { "epoch": 8.820746887966806, "grad_norm": 29.921249389648438, "learning_rate": 1.6474024896265562e-05, "loss": 1.3019, "step": 10629 }, { "epoch": 8.821576763485478, "grad_norm": 26.66806411743164, "learning_rate": 1.6473692946058095e-05, "loss": 1.2711, "step": 10630 }, { "epoch": 8.82240663900415, "grad_norm": 17.469877243041992, "learning_rate": 1.6473360995850623e-05, "loss": 0.4436, "step": 10631 }, { "epoch": 8.823236514522822, "grad_norm": 15.025175094604492, "learning_rate": 1.6473029045643155e-05, "loss": 0.5742, "step": 10632 }, { "epoch": 8.824066390041494, "grad_norm": 23.958415985107422, "learning_rate": 1.6472697095435684e-05, "loss": 1.0819, "step": 10633 }, { "epoch": 8.824896265560167, "grad_norm": 19.320302963256836, "learning_rate": 1.6472365145228216e-05, "loss": 1.3683, "step": 10634 }, { "epoch": 8.825726141078839, "grad_norm": 24.714155197143555, "learning_rate": 1.647203319502075e-05, "loss": 1.2702, "step": 10635 }, { "epoch": 8.826556016597511, "grad_norm": 57.37648010253906, "learning_rate": 1.6471701244813277e-05, "loss": 1.5996, "step": 10636 }, { "epoch": 8.827385892116183, "grad_norm": 22.298290252685547, "learning_rate": 1.647136929460581e-05, "loss": 1.2188, "step": 10637 }, { "epoch": 8.828215767634855, "grad_norm": 19.804519653320312, "learning_rate": 1.647103734439834e-05, "loss": 1.0866, "step": 10638 }, { "epoch": 8.829045643153528, "grad_norm": 37.97105026245117, "learning_rate": 1.6470705394190873e-05, "loss": 1.4229, "step": 10639 }, { "epoch": 8.8298755186722, "grad_norm": 19.444005966186523, "learning_rate": 1.6470373443983402e-05, "loss": 1.0131, "step": 10640 }, { "epoch": 8.830705394190872, "grad_norm": 16.563919067382812, "learning_rate": 1.6470041493775934e-05, "loss": 0.75, "step": 10641 }, { "epoch": 8.831535269709544, "grad_norm": 44.03380584716797, "learning_rate": 1.6469709543568466e-05, "loss": 0.9638, "step": 10642 }, { "epoch": 8.832365145228216, "grad_norm": 18.407001495361328, "learning_rate": 1.6469377593361e-05, "loss": 0.8594, "step": 10643 }, { "epoch": 8.833195020746889, "grad_norm": 14.065415382385254, "learning_rate": 1.6469045643153527e-05, "loss": 0.8821, "step": 10644 }, { "epoch": 8.83402489626556, "grad_norm": 39.763641357421875, "learning_rate": 1.646871369294606e-05, "loss": 1.3918, "step": 10645 }, { "epoch": 8.834854771784233, "grad_norm": NaN, "learning_rate": 1.646871369294606e-05, "loss": 1.8717, "step": 10646 }, { "epoch": 8.835684647302905, "grad_norm": 20.548860549926758, "learning_rate": 1.646838174273859e-05, "loss": 1.0245, "step": 10647 }, { "epoch": 8.836514522821577, "grad_norm": 52.10501480102539, "learning_rate": 1.6468049792531123e-05, "loss": 1.2197, "step": 10648 }, { "epoch": 8.83734439834025, "grad_norm": 42.41292953491211, "learning_rate": 1.6467717842323656e-05, "loss": 1.9003, "step": 10649 }, { "epoch": 8.838174273858922, "grad_norm": 35.18064498901367, "learning_rate": 1.6467385892116184e-05, "loss": 2.3234, "step": 10650 }, { "epoch": 8.839004149377594, "grad_norm": 20.161590576171875, "learning_rate": 1.6467053941908716e-05, "loss": 0.8858, "step": 10651 }, { "epoch": 8.839834024896266, "grad_norm": 29.93235206604004, "learning_rate": 1.6466721991701245e-05, "loss": 1.2079, "step": 10652 }, { "epoch": 8.840663900414938, "grad_norm": 13.860466957092285, "learning_rate": 1.6466390041493777e-05, "loss": 1.0968, "step": 10653 }, { "epoch": 8.84149377593361, "grad_norm": 33.019283294677734, "learning_rate": 1.646605809128631e-05, "loss": 1.993, "step": 10654 }, { "epoch": 8.842323651452283, "grad_norm": 12.9821138381958, "learning_rate": 1.6465726141078838e-05, "loss": 0.7662, "step": 10655 }, { "epoch": 8.843153526970955, "grad_norm": 47.04085922241211, "learning_rate": 1.646539419087137e-05, "loss": 1.4732, "step": 10656 }, { "epoch": 8.843983402489627, "grad_norm": 17.111934661865234, "learning_rate": 1.6465062240663902e-05, "loss": 0.8745, "step": 10657 }, { "epoch": 8.8448132780083, "grad_norm": 26.729310989379883, "learning_rate": 1.646473029045643e-05, "loss": 1.5141, "step": 10658 }, { "epoch": 8.845643153526972, "grad_norm": 20.17823600769043, "learning_rate": 1.6464398340248963e-05, "loss": 0.9525, "step": 10659 }, { "epoch": 8.846473029045644, "grad_norm": 27.09564208984375, "learning_rate": 1.6464066390041495e-05, "loss": 1.9382, "step": 10660 }, { "epoch": 8.847302904564316, "grad_norm": 23.63550567626953, "learning_rate": 1.6463734439834027e-05, "loss": 0.7802, "step": 10661 }, { "epoch": 8.848132780082988, "grad_norm": 24.837800979614258, "learning_rate": 1.6463402489626556e-05, "loss": 1.1038, "step": 10662 }, { "epoch": 8.84896265560166, "grad_norm": 13.146651268005371, "learning_rate": 1.6463070539419088e-05, "loss": 0.5014, "step": 10663 }, { "epoch": 8.849792531120332, "grad_norm": 21.57480812072754, "learning_rate": 1.646273858921162e-05, "loss": 0.8479, "step": 10664 }, { "epoch": 8.850622406639005, "grad_norm": 34.76225662231445, "learning_rate": 1.6462406639004152e-05, "loss": 0.8301, "step": 10665 }, { "epoch": 8.851452282157677, "grad_norm": 36.18625259399414, "learning_rate": 1.646207468879668e-05, "loss": 2.0335, "step": 10666 }, { "epoch": 8.852282157676349, "grad_norm": 20.53046989440918, "learning_rate": 1.6461742738589213e-05, "loss": 1.528, "step": 10667 }, { "epoch": 8.853112033195021, "grad_norm": 24.863704681396484, "learning_rate": 1.6461410788381745e-05, "loss": 1.2993, "step": 10668 }, { "epoch": 8.853941908713693, "grad_norm": 33.97296142578125, "learning_rate": 1.6461078838174277e-05, "loss": 1.6627, "step": 10669 }, { "epoch": 8.854771784232366, "grad_norm": 24.6668643951416, "learning_rate": 1.6460746887966806e-05, "loss": 1.2394, "step": 10670 }, { "epoch": 8.855601659751038, "grad_norm": 23.816516876220703, "learning_rate": 1.6460414937759338e-05, "loss": 1.3714, "step": 10671 }, { "epoch": 8.85643153526971, "grad_norm": 23.008718490600586, "learning_rate": 1.6460082987551867e-05, "loss": 0.7279, "step": 10672 }, { "epoch": 8.857261410788382, "grad_norm": 30.815723419189453, "learning_rate": 1.64597510373444e-05, "loss": 1.0545, "step": 10673 }, { "epoch": 8.858091286307054, "grad_norm": 27.099828720092773, "learning_rate": 1.645941908713693e-05, "loss": 0.9449, "step": 10674 }, { "epoch": 8.858921161825727, "grad_norm": 32.68437194824219, "learning_rate": 1.645908713692946e-05, "loss": 1.2765, "step": 10675 }, { "epoch": 8.859751037344399, "grad_norm": 20.194515228271484, "learning_rate": 1.6458755186721992e-05, "loss": 1.3767, "step": 10676 }, { "epoch": 8.860580912863071, "grad_norm": 30.39210319519043, "learning_rate": 1.6458423236514524e-05, "loss": 1.1907, "step": 10677 }, { "epoch": 8.861410788381743, "grad_norm": 34.815834045410156, "learning_rate": 1.6458091286307056e-05, "loss": 1.5392, "step": 10678 }, { "epoch": 8.862240663900415, "grad_norm": 18.283084869384766, "learning_rate": 1.6457759336099585e-05, "loss": 1.0203, "step": 10679 }, { "epoch": 8.863070539419088, "grad_norm": 26.524574279785156, "learning_rate": 1.6457427385892117e-05, "loss": 1.0209, "step": 10680 }, { "epoch": 8.86390041493776, "grad_norm": 24.819730758666992, "learning_rate": 1.645709543568465e-05, "loss": 1.4382, "step": 10681 }, { "epoch": 8.864730290456432, "grad_norm": 22.5758113861084, "learning_rate": 1.645676348547718e-05, "loss": 1.655, "step": 10682 }, { "epoch": 8.865560165975104, "grad_norm": 11.893982887268066, "learning_rate": 1.645643153526971e-05, "loss": 0.7997, "step": 10683 }, { "epoch": 8.866390041493776, "grad_norm": 19.759235382080078, "learning_rate": 1.6456099585062242e-05, "loss": 1.1725, "step": 10684 }, { "epoch": 8.867219917012449, "grad_norm": 33.949607849121094, "learning_rate": 1.6455767634854774e-05, "loss": 1.1407, "step": 10685 }, { "epoch": 8.86804979253112, "grad_norm": 23.484556198120117, "learning_rate": 1.6455435684647306e-05, "loss": 1.3649, "step": 10686 }, { "epoch": 8.868879668049793, "grad_norm": 19.999378204345703, "learning_rate": 1.6455103734439835e-05, "loss": 0.6166, "step": 10687 }, { "epoch": 8.869709543568465, "grad_norm": 18.99892234802246, "learning_rate": 1.6454771784232367e-05, "loss": 0.8491, "step": 10688 }, { "epoch": 8.870539419087137, "grad_norm": 14.29520320892334, "learning_rate": 1.64544398340249e-05, "loss": 0.6301, "step": 10689 }, { "epoch": 8.87136929460581, "grad_norm": 21.336570739746094, "learning_rate": 1.6454107883817428e-05, "loss": 1.1299, "step": 10690 }, { "epoch": 8.872199170124482, "grad_norm": 28.257793426513672, "learning_rate": 1.645377593360996e-05, "loss": 1.0298, "step": 10691 }, { "epoch": 8.873029045643154, "grad_norm": 17.808034896850586, "learning_rate": 1.6453443983402492e-05, "loss": 0.8962, "step": 10692 }, { "epoch": 8.873858921161826, "grad_norm": 31.742332458496094, "learning_rate": 1.645311203319502e-05, "loss": 1.1938, "step": 10693 }, { "epoch": 8.874688796680498, "grad_norm": 28.119226455688477, "learning_rate": 1.6452780082987553e-05, "loss": 1.0389, "step": 10694 }, { "epoch": 8.87551867219917, "grad_norm": 24.939498901367188, "learning_rate": 1.645244813278008e-05, "loss": 1.2776, "step": 10695 }, { "epoch": 8.876348547717843, "grad_norm": 32.366451263427734, "learning_rate": 1.6452116182572614e-05, "loss": 1.6, "step": 10696 }, { "epoch": 8.877178423236515, "grad_norm": 23.822668075561523, "learning_rate": 1.6451784232365146e-05, "loss": 0.9628, "step": 10697 }, { "epoch": 8.878008298755187, "grad_norm": 22.910961151123047, "learning_rate": 1.6451452282157678e-05, "loss": 1.2206, "step": 10698 }, { "epoch": 8.87883817427386, "grad_norm": 24.594480514526367, "learning_rate": 1.6451120331950207e-05, "loss": 1.0886, "step": 10699 }, { "epoch": 8.879668049792532, "grad_norm": 25.569955825805664, "learning_rate": 1.645078838174274e-05, "loss": 1.937, "step": 10700 }, { "epoch": 8.880497925311204, "grad_norm": 37.248992919921875, "learning_rate": 1.645045643153527e-05, "loss": 1.267, "step": 10701 }, { "epoch": 8.881327800829876, "grad_norm": 21.948230743408203, "learning_rate": 1.6450124481327803e-05, "loss": 1.2628, "step": 10702 }, { "epoch": 8.882157676348548, "grad_norm": 14.677346229553223, "learning_rate": 1.6449792531120335e-05, "loss": 0.5189, "step": 10703 }, { "epoch": 8.88298755186722, "grad_norm": 37.402313232421875, "learning_rate": 1.6449460580912864e-05, "loss": 1.1438, "step": 10704 }, { "epoch": 8.883817427385893, "grad_norm": 14.824464797973633, "learning_rate": 1.6449128630705396e-05, "loss": 0.4446, "step": 10705 }, { "epoch": 8.884647302904565, "grad_norm": 25.15397071838379, "learning_rate": 1.6448796680497928e-05, "loss": 1.1847, "step": 10706 }, { "epoch": 8.885477178423237, "grad_norm": 18.060670852661133, "learning_rate": 1.644846473029046e-05, "loss": 0.5872, "step": 10707 }, { "epoch": 8.88630705394191, "grad_norm": 33.5959358215332, "learning_rate": 1.644813278008299e-05, "loss": 1.1094, "step": 10708 }, { "epoch": 8.887136929460581, "grad_norm": 34.78197479248047, "learning_rate": 1.644780082987552e-05, "loss": 1.5862, "step": 10709 }, { "epoch": 8.887966804979254, "grad_norm": 22.399803161621094, "learning_rate": 1.6447468879668053e-05, "loss": 1.5901, "step": 10710 }, { "epoch": 8.888796680497926, "grad_norm": 22.648746490478516, "learning_rate": 1.6447136929460582e-05, "loss": 1.4085, "step": 10711 }, { "epoch": 8.889626556016598, "grad_norm": 21.714597702026367, "learning_rate": 1.6446804979253114e-05, "loss": 1.4507, "step": 10712 }, { "epoch": 8.89045643153527, "grad_norm": 35.06501007080078, "learning_rate": 1.6446473029045643e-05, "loss": 1.6596, "step": 10713 }, { "epoch": 8.891286307053942, "grad_norm": 19.380630493164062, "learning_rate": 1.6446141078838175e-05, "loss": 0.7406, "step": 10714 }, { "epoch": 8.892116182572614, "grad_norm": 29.43877601623535, "learning_rate": 1.6445809128630707e-05, "loss": 0.9408, "step": 10715 }, { "epoch": 8.892946058091287, "grad_norm": 27.830469131469727, "learning_rate": 1.6445477178423236e-05, "loss": 0.962, "step": 10716 }, { "epoch": 8.893775933609959, "grad_norm": 26.300800323486328, "learning_rate": 1.6445145228215768e-05, "loss": 1.4023, "step": 10717 }, { "epoch": 8.894605809128631, "grad_norm": 16.31535530090332, "learning_rate": 1.64448132780083e-05, "loss": 0.7559, "step": 10718 }, { "epoch": 8.895435684647303, "grad_norm": 21.866451263427734, "learning_rate": 1.6444481327800832e-05, "loss": 0.9347, "step": 10719 }, { "epoch": 8.896265560165975, "grad_norm": 20.540353775024414, "learning_rate": 1.644414937759336e-05, "loss": 0.9528, "step": 10720 }, { "epoch": 8.897095435684648, "grad_norm": 48.74040222167969, "learning_rate": 1.6443817427385893e-05, "loss": 1.68, "step": 10721 }, { "epoch": 8.89792531120332, "grad_norm": 23.08791732788086, "learning_rate": 1.6443485477178425e-05, "loss": 1.1889, "step": 10722 }, { "epoch": 8.898755186721992, "grad_norm": 26.025875091552734, "learning_rate": 1.6443153526970957e-05, "loss": 1.3006, "step": 10723 }, { "epoch": 8.899585062240664, "grad_norm": 16.188947677612305, "learning_rate": 1.6442821576763486e-05, "loss": 0.4987, "step": 10724 }, { "epoch": 8.900414937759336, "grad_norm": 20.43985939025879, "learning_rate": 1.6442489626556018e-05, "loss": 0.9481, "step": 10725 }, { "epoch": 8.901244813278009, "grad_norm": 19.345943450927734, "learning_rate": 1.644215767634855e-05, "loss": 1.6826, "step": 10726 }, { "epoch": 8.90207468879668, "grad_norm": 19.18091583251953, "learning_rate": 1.6441825726141082e-05, "loss": 0.5916, "step": 10727 }, { "epoch": 8.902904564315353, "grad_norm": 15.09300708770752, "learning_rate": 1.644149377593361e-05, "loss": 0.4569, "step": 10728 }, { "epoch": 8.903734439834025, "grad_norm": 41.67416763305664, "learning_rate": 1.6441161825726143e-05, "loss": 1.2014, "step": 10729 }, { "epoch": 8.904564315352697, "grad_norm": 20.28356170654297, "learning_rate": 1.6440829875518675e-05, "loss": 0.8713, "step": 10730 }, { "epoch": 8.90539419087137, "grad_norm": 17.394121170043945, "learning_rate": 1.6440497925311204e-05, "loss": 1.0053, "step": 10731 }, { "epoch": 8.906224066390042, "grad_norm": 27.1885929107666, "learning_rate": 1.6440165975103736e-05, "loss": 1.1074, "step": 10732 }, { "epoch": 8.907053941908714, "grad_norm": 36.947330474853516, "learning_rate": 1.6439834024896268e-05, "loss": 1.3154, "step": 10733 }, { "epoch": 8.907883817427386, "grad_norm": 35.82273483276367, "learning_rate": 1.6439502074688797e-05, "loss": 1.9824, "step": 10734 }, { "epoch": 8.908713692946058, "grad_norm": 38.104209899902344, "learning_rate": 1.643917012448133e-05, "loss": 1.56, "step": 10735 }, { "epoch": 8.90954356846473, "grad_norm": 21.109113693237305, "learning_rate": 1.643883817427386e-05, "loss": 0.6518, "step": 10736 }, { "epoch": 8.910373443983403, "grad_norm": 29.6101016998291, "learning_rate": 1.643850622406639e-05, "loss": 1.237, "step": 10737 }, { "epoch": 8.911203319502075, "grad_norm": 44.685543060302734, "learning_rate": 1.643817427385892e-05, "loss": 1.7576, "step": 10738 }, { "epoch": 8.912033195020747, "grad_norm": 34.502323150634766, "learning_rate": 1.6437842323651454e-05, "loss": 1.0206, "step": 10739 }, { "epoch": 8.91286307053942, "grad_norm": 80.85807800292969, "learning_rate": 1.6437510373443986e-05, "loss": 1.0525, "step": 10740 }, { "epoch": 8.913692946058092, "grad_norm": 24.15941047668457, "learning_rate": 1.6437178423236515e-05, "loss": 1.2352, "step": 10741 }, { "epoch": 8.914522821576764, "grad_norm": 66.01758575439453, "learning_rate": 1.6436846473029047e-05, "loss": 1.0124, "step": 10742 }, { "epoch": 8.915352697095436, "grad_norm": 20.54751205444336, "learning_rate": 1.643651452282158e-05, "loss": 0.841, "step": 10743 }, { "epoch": 8.916182572614108, "grad_norm": 37.58769226074219, "learning_rate": 1.643618257261411e-05, "loss": 1.2891, "step": 10744 }, { "epoch": 8.91701244813278, "grad_norm": 35.708106994628906, "learning_rate": 1.643585062240664e-05, "loss": 1.3335, "step": 10745 }, { "epoch": 8.917842323651453, "grad_norm": 36.31140899658203, "learning_rate": 1.6435518672199172e-05, "loss": 1.5882, "step": 10746 }, { "epoch": 8.918672199170125, "grad_norm": 18.363876342773438, "learning_rate": 1.6435186721991704e-05, "loss": 0.6819, "step": 10747 }, { "epoch": 8.919502074688797, "grad_norm": 26.71202850341797, "learning_rate": 1.6434854771784236e-05, "loss": 0.9648, "step": 10748 }, { "epoch": 8.92033195020747, "grad_norm": 22.30022621154785, "learning_rate": 1.6434522821576765e-05, "loss": 1.4333, "step": 10749 }, { "epoch": 8.921161825726141, "grad_norm": 27.013830184936523, "learning_rate": 1.6434190871369297e-05, "loss": 1.2729, "step": 10750 }, { "epoch": 8.921991701244814, "grad_norm": 37.4941520690918, "learning_rate": 1.6433858921161826e-05, "loss": 1.9403, "step": 10751 }, { "epoch": 8.922821576763486, "grad_norm": 15.555222511291504, "learning_rate": 1.6433526970954358e-05, "loss": 0.7547, "step": 10752 }, { "epoch": 8.923651452282158, "grad_norm": 25.962356567382812, "learning_rate": 1.643319502074689e-05, "loss": 1.5204, "step": 10753 }, { "epoch": 8.92448132780083, "grad_norm": 40.34109878540039, "learning_rate": 1.643286307053942e-05, "loss": 1.2948, "step": 10754 }, { "epoch": 8.925311203319502, "grad_norm": 66.3209228515625, "learning_rate": 1.643253112033195e-05, "loss": 0.7317, "step": 10755 }, { "epoch": 8.926141078838175, "grad_norm": 29.60338592529297, "learning_rate": 1.6432199170124483e-05, "loss": 1.2048, "step": 10756 }, { "epoch": 8.926970954356847, "grad_norm": 38.04512023925781, "learning_rate": 1.6431867219917015e-05, "loss": 1.5748, "step": 10757 }, { "epoch": 8.927800829875519, "grad_norm": 17.38968849182129, "learning_rate": 1.6431535269709543e-05, "loss": 0.987, "step": 10758 }, { "epoch": 8.928630705394191, "grad_norm": 23.38762092590332, "learning_rate": 1.6431203319502076e-05, "loss": 1.8172, "step": 10759 }, { "epoch": 8.929460580912863, "grad_norm": 27.772977828979492, "learning_rate": 1.6430871369294608e-05, "loss": 1.1102, "step": 10760 }, { "epoch": 8.930290456431536, "grad_norm": 43.39682388305664, "learning_rate": 1.643053941908714e-05, "loss": 0.6216, "step": 10761 }, { "epoch": 8.931120331950208, "grad_norm": 14.925520896911621, "learning_rate": 1.643020746887967e-05, "loss": 0.5641, "step": 10762 }, { "epoch": 8.93195020746888, "grad_norm": 25.46441650390625, "learning_rate": 1.64298755186722e-05, "loss": 1.162, "step": 10763 }, { "epoch": 8.932780082987552, "grad_norm": 21.01459312438965, "learning_rate": 1.6429543568464733e-05, "loss": 0.8468, "step": 10764 }, { "epoch": 8.933609958506224, "grad_norm": 20.803266525268555, "learning_rate": 1.6429211618257265e-05, "loss": 0.8814, "step": 10765 }, { "epoch": 8.934439834024896, "grad_norm": 19.114198684692383, "learning_rate": 1.6428879668049794e-05, "loss": 0.7827, "step": 10766 }, { "epoch": 8.935269709543569, "grad_norm": 24.421024322509766, "learning_rate": 1.6428547717842326e-05, "loss": 1.0373, "step": 10767 }, { "epoch": 8.936099585062241, "grad_norm": 15.856908798217773, "learning_rate": 1.6428215767634858e-05, "loss": 0.7757, "step": 10768 }, { "epoch": 8.936929460580913, "grad_norm": 46.299766540527344, "learning_rate": 1.6427883817427386e-05, "loss": 0.948, "step": 10769 }, { "epoch": 8.937759336099585, "grad_norm": 23.053024291992188, "learning_rate": 1.642755186721992e-05, "loss": 1.0076, "step": 10770 }, { "epoch": 8.938589211618257, "grad_norm": 37.78419494628906, "learning_rate": 1.642721991701245e-05, "loss": 1.2417, "step": 10771 }, { "epoch": 8.93941908713693, "grad_norm": 28.28538703918457, "learning_rate": 1.642688796680498e-05, "loss": 1.8722, "step": 10772 }, { "epoch": 8.940248962655602, "grad_norm": 18.53399085998535, "learning_rate": 1.642655601659751e-05, "loss": 0.7619, "step": 10773 }, { "epoch": 8.941078838174274, "grad_norm": 25.22745132446289, "learning_rate": 1.642622406639004e-05, "loss": 0.726, "step": 10774 }, { "epoch": 8.941908713692946, "grad_norm": 19.43368911743164, "learning_rate": 1.6425892116182572e-05, "loss": 1.1574, "step": 10775 }, { "epoch": 8.942738589211618, "grad_norm": 22.059017181396484, "learning_rate": 1.6425560165975104e-05, "loss": 1.1629, "step": 10776 }, { "epoch": 8.94356846473029, "grad_norm": 29.393550872802734, "learning_rate": 1.6425228215767637e-05, "loss": 1.2989, "step": 10777 }, { "epoch": 8.944398340248963, "grad_norm": 26.30206871032715, "learning_rate": 1.6424896265560165e-05, "loss": 1.465, "step": 10778 }, { "epoch": 8.945228215767635, "grad_norm": 19.30988311767578, "learning_rate": 1.6424564315352697e-05, "loss": 1.0509, "step": 10779 }, { "epoch": 8.946058091286307, "grad_norm": 38.28258514404297, "learning_rate": 1.642423236514523e-05, "loss": 1.155, "step": 10780 }, { "epoch": 8.94688796680498, "grad_norm": 18.851064682006836, "learning_rate": 1.642390041493776e-05, "loss": 0.9595, "step": 10781 }, { "epoch": 8.947717842323652, "grad_norm": 27.37561798095703, "learning_rate": 1.6423568464730294e-05, "loss": 1.3926, "step": 10782 }, { "epoch": 8.948547717842324, "grad_norm": 26.90392303466797, "learning_rate": 1.6423236514522822e-05, "loss": 1.3813, "step": 10783 }, { "epoch": 8.949377593360996, "grad_norm": 16.41842269897461, "learning_rate": 1.6422904564315355e-05, "loss": 0.8138, "step": 10784 }, { "epoch": 8.950207468879668, "grad_norm": 17.605436325073242, "learning_rate": 1.6422572614107887e-05, "loss": 0.632, "step": 10785 }, { "epoch": 8.95103734439834, "grad_norm": 17.688800811767578, "learning_rate": 1.642224066390042e-05, "loss": 0.8273, "step": 10786 }, { "epoch": 8.951867219917013, "grad_norm": 25.17272186279297, "learning_rate": 1.6421908713692947e-05, "loss": 1.2388, "step": 10787 }, { "epoch": 8.952697095435685, "grad_norm": 15.8273286819458, "learning_rate": 1.642157676348548e-05, "loss": 0.5993, "step": 10788 }, { "epoch": 8.953526970954357, "grad_norm": 27.04376220703125, "learning_rate": 1.642124481327801e-05, "loss": 1.454, "step": 10789 }, { "epoch": 8.95435684647303, "grad_norm": 22.816022872924805, "learning_rate": 1.642091286307054e-05, "loss": 1.2265, "step": 10790 }, { "epoch": 8.955186721991701, "grad_norm": 18.758867263793945, "learning_rate": 1.6420580912863073e-05, "loss": 0.7771, "step": 10791 }, { "epoch": 8.956016597510374, "grad_norm": 26.107559204101562, "learning_rate": 1.64202489626556e-05, "loss": 0.9389, "step": 10792 }, { "epoch": 8.956846473029046, "grad_norm": 19.183258056640625, "learning_rate": 1.6419917012448133e-05, "loss": 1.2755, "step": 10793 }, { "epoch": 8.957676348547718, "grad_norm": 31.37224578857422, "learning_rate": 1.6419585062240665e-05, "loss": 1.1164, "step": 10794 }, { "epoch": 8.95850622406639, "grad_norm": 38.34894561767578, "learning_rate": 1.6419253112033194e-05, "loss": 0.939, "step": 10795 }, { "epoch": 8.959336099585062, "grad_norm": 17.2012996673584, "learning_rate": 1.6418921161825726e-05, "loss": 0.8089, "step": 10796 }, { "epoch": 8.960165975103735, "grad_norm": 31.282236099243164, "learning_rate": 1.641858921161826e-05, "loss": 1.8351, "step": 10797 }, { "epoch": 8.960995850622407, "grad_norm": 19.72068977355957, "learning_rate": 1.641825726141079e-05, "loss": 0.7555, "step": 10798 }, { "epoch": 8.961825726141079, "grad_norm": 24.0867977142334, "learning_rate": 1.641792531120332e-05, "loss": 1.1498, "step": 10799 }, { "epoch": 8.962655601659751, "grad_norm": 22.29632568359375, "learning_rate": 1.641759336099585e-05, "loss": 0.8126, "step": 10800 }, { "epoch": 8.963485477178423, "grad_norm": 27.07219696044922, "learning_rate": 1.6417261410788383e-05, "loss": 1.3652, "step": 10801 }, { "epoch": 8.964315352697096, "grad_norm": 21.245281219482422, "learning_rate": 1.6416929460580916e-05, "loss": 0.9284, "step": 10802 }, { "epoch": 8.965145228215768, "grad_norm": 22.883028030395508, "learning_rate": 1.6416597510373444e-05, "loss": 0.9684, "step": 10803 }, { "epoch": 8.96597510373444, "grad_norm": 33.27899169921875, "learning_rate": 1.6416265560165976e-05, "loss": 1.2543, "step": 10804 }, { "epoch": 8.966804979253112, "grad_norm": 48.51655197143555, "learning_rate": 1.641593360995851e-05, "loss": 1.6339, "step": 10805 }, { "epoch": 8.967634854771784, "grad_norm": 13.059520721435547, "learning_rate": 1.641560165975104e-05, "loss": 0.7911, "step": 10806 }, { "epoch": 8.968464730290457, "grad_norm": 23.490800857543945, "learning_rate": 1.641526970954357e-05, "loss": 1.8391, "step": 10807 }, { "epoch": 8.969294605809129, "grad_norm": 24.52874183654785, "learning_rate": 1.64149377593361e-05, "loss": 1.1697, "step": 10808 }, { "epoch": 8.970124481327801, "grad_norm": 22.394811630249023, "learning_rate": 1.6414605809128634e-05, "loss": 1.9656, "step": 10809 }, { "epoch": 8.970954356846473, "grad_norm": 17.211585998535156, "learning_rate": 1.6414273858921162e-05, "loss": 1.3864, "step": 10810 }, { "epoch": 8.971784232365145, "grad_norm": 27.200197219848633, "learning_rate": 1.6413941908713694e-05, "loss": 1.1018, "step": 10811 }, { "epoch": 8.972614107883818, "grad_norm": 15.934225082397461, "learning_rate": 1.6413609958506223e-05, "loss": 0.918, "step": 10812 }, { "epoch": 8.97344398340249, "grad_norm": 18.135847091674805, "learning_rate": 1.6413278008298755e-05, "loss": 1.224, "step": 10813 }, { "epoch": 8.974273858921162, "grad_norm": 19.372005462646484, "learning_rate": 1.6412946058091287e-05, "loss": 0.5674, "step": 10814 }, { "epoch": 8.975103734439834, "grad_norm": 22.204463958740234, "learning_rate": 1.641261410788382e-05, "loss": 1.1174, "step": 10815 }, { "epoch": 8.975933609958506, "grad_norm": 23.75932502746582, "learning_rate": 1.6412282157676348e-05, "loss": 1.3064, "step": 10816 }, { "epoch": 8.976763485477179, "grad_norm": 25.727760314941406, "learning_rate": 1.641195020746888e-05, "loss": 1.0253, "step": 10817 }, { "epoch": 8.97759336099585, "grad_norm": 50.58433151245117, "learning_rate": 1.6411618257261412e-05, "loss": 1.2933, "step": 10818 }, { "epoch": 8.978423236514523, "grad_norm": 20.576231002807617, "learning_rate": 1.6411286307053944e-05, "loss": 0.8495, "step": 10819 }, { "epoch": 8.979253112033195, "grad_norm": 23.32179832458496, "learning_rate": 1.6410954356846473e-05, "loss": 1.1756, "step": 10820 }, { "epoch": 8.980082987551867, "grad_norm": 31.702428817749023, "learning_rate": 1.6410622406639005e-05, "loss": 0.5237, "step": 10821 }, { "epoch": 8.98091286307054, "grad_norm": 24.840024948120117, "learning_rate": 1.6410290456431537e-05, "loss": 0.9372, "step": 10822 }, { "epoch": 8.981742738589212, "grad_norm": 18.39802360534668, "learning_rate": 1.640995850622407e-05, "loss": 1.0005, "step": 10823 }, { "epoch": 8.982572614107884, "grad_norm": 41.92641830444336, "learning_rate": 1.6409626556016598e-05, "loss": 0.7928, "step": 10824 }, { "epoch": 8.983402489626556, "grad_norm": 24.561372756958008, "learning_rate": 1.640929460580913e-05, "loss": 0.8838, "step": 10825 }, { "epoch": 8.984232365145228, "grad_norm": 21.445077896118164, "learning_rate": 1.6408962655601662e-05, "loss": 0.8326, "step": 10826 }, { "epoch": 8.9850622406639, "grad_norm": 27.700969696044922, "learning_rate": 1.6408630705394195e-05, "loss": 0.7501, "step": 10827 }, { "epoch": 8.985892116182573, "grad_norm": 9.40744400024414, "learning_rate": 1.6408298755186723e-05, "loss": 0.3079, "step": 10828 }, { "epoch": 8.986721991701245, "grad_norm": 17.81695556640625, "learning_rate": 1.6407966804979255e-05, "loss": 1.0092, "step": 10829 }, { "epoch": 8.987551867219917, "grad_norm": 18.564905166625977, "learning_rate": 1.6407634854771784e-05, "loss": 1.0, "step": 10830 }, { "epoch": 8.98838174273859, "grad_norm": 27.41105079650879, "learning_rate": 1.6407302904564316e-05, "loss": 0.9703, "step": 10831 }, { "epoch": 8.989211618257261, "grad_norm": 21.2534122467041, "learning_rate": 1.6406970954356848e-05, "loss": 0.9577, "step": 10832 }, { "epoch": 8.990041493775934, "grad_norm": 26.40074348449707, "learning_rate": 1.6406639004149377e-05, "loss": 1.4042, "step": 10833 }, { "epoch": 8.990871369294606, "grad_norm": 23.239171981811523, "learning_rate": 1.640630705394191e-05, "loss": 1.752, "step": 10834 }, { "epoch": 8.991701244813278, "grad_norm": 22.701860427856445, "learning_rate": 1.640597510373444e-05, "loss": 0.9352, "step": 10835 }, { "epoch": 8.99253112033195, "grad_norm": 16.663225173950195, "learning_rate": 1.6405643153526973e-05, "loss": 0.8834, "step": 10836 }, { "epoch": 8.993360995850622, "grad_norm": 29.299768447875977, "learning_rate": 1.6405311203319502e-05, "loss": 1.2608, "step": 10837 }, { "epoch": 8.994190871369295, "grad_norm": 24.991806030273438, "learning_rate": 1.6404979253112034e-05, "loss": 0.9454, "step": 10838 }, { "epoch": 8.995020746887967, "grad_norm": 31.234237670898438, "learning_rate": 1.6404647302904566e-05, "loss": 0.9803, "step": 10839 }, { "epoch": 8.995850622406639, "grad_norm": 23.066320419311523, "learning_rate": 1.64043153526971e-05, "loss": 1.2748, "step": 10840 }, { "epoch": 8.996680497925311, "grad_norm": 19.647249221801758, "learning_rate": 1.6403983402489627e-05, "loss": 1.1549, "step": 10841 }, { "epoch": 8.997510373443983, "grad_norm": 28.601381301879883, "learning_rate": 1.640365145228216e-05, "loss": 1.1993, "step": 10842 }, { "epoch": 8.998340248962656, "grad_norm": 48.76767349243164, "learning_rate": 1.640331950207469e-05, "loss": 1.5548, "step": 10843 }, { "epoch": 8.999170124481328, "grad_norm": 31.634029388427734, "learning_rate": 1.6402987551867223e-05, "loss": 1.2306, "step": 10844 }, { "epoch": 9.0, "grad_norm": 24.15964126586914, "learning_rate": 1.6402655601659752e-05, "loss": 1.205, "step": 10845 }, { "epoch": 9.000829875518672, "grad_norm": 25.870155334472656, "learning_rate": 1.6402323651452284e-05, "loss": 0.6845, "step": 10846 }, { "epoch": 9.001659751037344, "grad_norm": 22.6363468170166, "learning_rate": 1.6401991701244816e-05, "loss": 0.5967, "step": 10847 }, { "epoch": 9.002489626556017, "grad_norm": 21.14777183532715, "learning_rate": 1.6401659751037345e-05, "loss": 1.2473, "step": 10848 }, { "epoch": 9.003319502074689, "grad_norm": 20.500017166137695, "learning_rate": 1.6401327800829877e-05, "loss": 1.1217, "step": 10849 }, { "epoch": 9.004149377593361, "grad_norm": 50.11204147338867, "learning_rate": 1.640099585062241e-05, "loss": 0.5952, "step": 10850 }, { "epoch": 9.004979253112033, "grad_norm": 41.143680572509766, "learning_rate": 1.6400663900414938e-05, "loss": 0.8127, "step": 10851 }, { "epoch": 9.005809128630705, "grad_norm": 35.30193328857422, "learning_rate": 1.640033195020747e-05, "loss": 1.6945, "step": 10852 }, { "epoch": 9.006639004149378, "grad_norm": 44.3823356628418, "learning_rate": 1.64e-05, "loss": 1.4495, "step": 10853 }, { "epoch": 9.00746887966805, "grad_norm": 28.58368682861328, "learning_rate": 1.639966804979253e-05, "loss": 0.9422, "step": 10854 }, { "epoch": 9.008298755186722, "grad_norm": 21.162954330444336, "learning_rate": 1.6399336099585063e-05, "loss": 1.2725, "step": 10855 }, { "epoch": 9.009128630705394, "grad_norm": 44.15203094482422, "learning_rate": 1.6399004149377595e-05, "loss": 1.6514, "step": 10856 }, { "epoch": 9.009958506224066, "grad_norm": 22.694751739501953, "learning_rate": 1.6398672199170124e-05, "loss": 1.3892, "step": 10857 }, { "epoch": 9.010788381742739, "grad_norm": 15.449904441833496, "learning_rate": 1.6398340248962656e-05, "loss": 0.7813, "step": 10858 }, { "epoch": 9.01161825726141, "grad_norm": 16.782672882080078, "learning_rate": 1.6398008298755188e-05, "loss": 0.7415, "step": 10859 }, { "epoch": 9.012448132780083, "grad_norm": 19.94230842590332, "learning_rate": 1.639767634854772e-05, "loss": 0.9404, "step": 10860 }, { "epoch": 9.013278008298755, "grad_norm": 27.72795867919922, "learning_rate": 1.6397344398340252e-05, "loss": 0.7323, "step": 10861 }, { "epoch": 9.014107883817427, "grad_norm": 21.44916343688965, "learning_rate": 1.639701244813278e-05, "loss": 0.9249, "step": 10862 }, { "epoch": 9.0149377593361, "grad_norm": 15.731588363647461, "learning_rate": 1.6396680497925313e-05, "loss": 0.6157, "step": 10863 }, { "epoch": 9.015767634854772, "grad_norm": 70.32870483398438, "learning_rate": 1.6396348547717845e-05, "loss": 1.826, "step": 10864 }, { "epoch": 9.016597510373444, "grad_norm": 36.823760986328125, "learning_rate": 1.6396016597510377e-05, "loss": 1.3598, "step": 10865 }, { "epoch": 9.017427385892116, "grad_norm": 12.25142765045166, "learning_rate": 1.6395684647302906e-05, "loss": 0.4982, "step": 10866 }, { "epoch": 9.018257261410788, "grad_norm": 32.02821350097656, "learning_rate": 1.6395352697095438e-05, "loss": 1.6501, "step": 10867 }, { "epoch": 9.01908713692946, "grad_norm": 21.98207664489746, "learning_rate": 1.6395020746887967e-05, "loss": 1.1054, "step": 10868 }, { "epoch": 9.019917012448133, "grad_norm": 21.225210189819336, "learning_rate": 1.63946887966805e-05, "loss": 0.7495, "step": 10869 }, { "epoch": 9.020746887966805, "grad_norm": 36.36310958862305, "learning_rate": 1.639435684647303e-05, "loss": 1.3803, "step": 10870 }, { "epoch": 9.021576763485477, "grad_norm": 18.375593185424805, "learning_rate": 1.639402489626556e-05, "loss": 0.5785, "step": 10871 }, { "epoch": 9.02240663900415, "grad_norm": 26.28346824645996, "learning_rate": 1.6393692946058092e-05, "loss": 0.6206, "step": 10872 }, { "epoch": 9.023236514522821, "grad_norm": 35.83609390258789, "learning_rate": 1.6393360995850624e-05, "loss": 1.349, "step": 10873 }, { "epoch": 9.024066390041494, "grad_norm": 24.255538940429688, "learning_rate": 1.6393029045643153e-05, "loss": 0.933, "step": 10874 }, { "epoch": 9.024896265560166, "grad_norm": 76.81092834472656, "learning_rate": 1.6392697095435685e-05, "loss": 0.4924, "step": 10875 }, { "epoch": 9.025726141078838, "grad_norm": 28.939218521118164, "learning_rate": 1.6392365145228217e-05, "loss": 1.0976, "step": 10876 }, { "epoch": 9.02655601659751, "grad_norm": 19.650232315063477, "learning_rate": 1.639203319502075e-05, "loss": 1.4971, "step": 10877 }, { "epoch": 9.027385892116182, "grad_norm": 45.811153411865234, "learning_rate": 1.6391701244813278e-05, "loss": 0.9108, "step": 10878 }, { "epoch": 9.028215767634855, "grad_norm": 29.853696823120117, "learning_rate": 1.639136929460581e-05, "loss": 0.8003, "step": 10879 }, { "epoch": 9.029045643153527, "grad_norm": 29.788118362426758, "learning_rate": 1.6391037344398342e-05, "loss": 0.9898, "step": 10880 }, { "epoch": 9.029875518672199, "grad_norm": 32.72226333618164, "learning_rate": 1.6390705394190874e-05, "loss": 1.3259, "step": 10881 }, { "epoch": 9.030705394190871, "grad_norm": 40.611785888671875, "learning_rate": 1.6390373443983403e-05, "loss": 0.7601, "step": 10882 }, { "epoch": 9.031535269709543, "grad_norm": 30.74359703063965, "learning_rate": 1.6390041493775935e-05, "loss": 1.0781, "step": 10883 }, { "epoch": 9.032365145228216, "grad_norm": 27.982044219970703, "learning_rate": 1.6389709543568467e-05, "loss": 0.9282, "step": 10884 }, { "epoch": 9.033195020746888, "grad_norm": 33.1436653137207, "learning_rate": 1.6389377593361e-05, "loss": 1.2673, "step": 10885 }, { "epoch": 9.03402489626556, "grad_norm": 21.65404510498047, "learning_rate": 1.6389045643153528e-05, "loss": 1.0761, "step": 10886 }, { "epoch": 9.034854771784232, "grad_norm": 25.281497955322266, "learning_rate": 1.638871369294606e-05, "loss": 1.0551, "step": 10887 }, { "epoch": 9.035684647302904, "grad_norm": 19.657167434692383, "learning_rate": 1.6388381742738592e-05, "loss": 1.0772, "step": 10888 }, { "epoch": 9.036514522821577, "grad_norm": 27.8791561126709, "learning_rate": 1.638804979253112e-05, "loss": 0.7434, "step": 10889 }, { "epoch": 9.037344398340249, "grad_norm": 24.51932144165039, "learning_rate": 1.6387717842323653e-05, "loss": 1.0345, "step": 10890 }, { "epoch": 9.038174273858921, "grad_norm": 31.09380340576172, "learning_rate": 1.638738589211618e-05, "loss": 1.1654, "step": 10891 }, { "epoch": 9.039004149377593, "grad_norm": 18.26285171508789, "learning_rate": 1.6387053941908714e-05, "loss": 0.9136, "step": 10892 }, { "epoch": 9.039834024896265, "grad_norm": 30.670385360717773, "learning_rate": 1.6386721991701246e-05, "loss": 1.2709, "step": 10893 }, { "epoch": 9.040663900414938, "grad_norm": 20.777864456176758, "learning_rate": 1.6386390041493778e-05, "loss": 0.8279, "step": 10894 }, { "epoch": 9.04149377593361, "grad_norm": 33.36091995239258, "learning_rate": 1.6386058091286307e-05, "loss": 1.0204, "step": 10895 }, { "epoch": 9.042323651452282, "grad_norm": 23.090356826782227, "learning_rate": 1.638572614107884e-05, "loss": 0.6218, "step": 10896 }, { "epoch": 9.043153526970954, "grad_norm": 23.72138214111328, "learning_rate": 1.638539419087137e-05, "loss": 1.4178, "step": 10897 }, { "epoch": 9.043983402489626, "grad_norm": 24.45353889465332, "learning_rate": 1.6385062240663903e-05, "loss": 1.1965, "step": 10898 }, { "epoch": 9.044813278008299, "grad_norm": 21.823381423950195, "learning_rate": 1.6384730290456432e-05, "loss": 0.7706, "step": 10899 }, { "epoch": 9.04564315352697, "grad_norm": 32.518856048583984, "learning_rate": 1.6384398340248964e-05, "loss": 0.8489, "step": 10900 }, { "epoch": 9.046473029045643, "grad_norm": 29.268020629882812, "learning_rate": 1.6384066390041496e-05, "loss": 1.2441, "step": 10901 }, { "epoch": 9.047302904564315, "grad_norm": 29.73811149597168, "learning_rate": 1.6383734439834028e-05, "loss": 0.9451, "step": 10902 }, { "epoch": 9.048132780082987, "grad_norm": 19.13396644592285, "learning_rate": 1.6383402489626557e-05, "loss": 0.8071, "step": 10903 }, { "epoch": 9.04896265560166, "grad_norm": 16.774892807006836, "learning_rate": 1.638307053941909e-05, "loss": 0.9763, "step": 10904 }, { "epoch": 9.049792531120332, "grad_norm": 17.456567764282227, "learning_rate": 1.638273858921162e-05, "loss": 0.8245, "step": 10905 }, { "epoch": 9.050622406639004, "grad_norm": 20.111215591430664, "learning_rate": 1.638240663900415e-05, "loss": 0.4582, "step": 10906 }, { "epoch": 9.051452282157676, "grad_norm": 21.419349670410156, "learning_rate": 1.6382074688796682e-05, "loss": 0.8313, "step": 10907 }, { "epoch": 9.052282157676348, "grad_norm": 41.56724166870117, "learning_rate": 1.6381742738589214e-05, "loss": 1.6067, "step": 10908 }, { "epoch": 9.05311203319502, "grad_norm": 13.449090957641602, "learning_rate": 1.6381410788381743e-05, "loss": 0.5721, "step": 10909 }, { "epoch": 9.053941908713693, "grad_norm": 23.332651138305664, "learning_rate": 1.6381078838174275e-05, "loss": 1.4359, "step": 10910 }, { "epoch": 9.054771784232365, "grad_norm": 12.010392189025879, "learning_rate": 1.6380746887966807e-05, "loss": 0.3216, "step": 10911 }, { "epoch": 9.055601659751037, "grad_norm": 27.780658721923828, "learning_rate": 1.6380414937759336e-05, "loss": 0.4234, "step": 10912 }, { "epoch": 9.05643153526971, "grad_norm": 20.58645248413086, "learning_rate": 1.6380082987551868e-05, "loss": 0.7464, "step": 10913 }, { "epoch": 9.057261410788382, "grad_norm": 15.907388687133789, "learning_rate": 1.63797510373444e-05, "loss": 0.5675, "step": 10914 }, { "epoch": 9.058091286307054, "grad_norm": 21.153587341308594, "learning_rate": 1.6379419087136932e-05, "loss": 1.2427, "step": 10915 }, { "epoch": 9.058921161825726, "grad_norm": 28.72186279296875, "learning_rate": 1.637908713692946e-05, "loss": 0.9101, "step": 10916 }, { "epoch": 9.059751037344398, "grad_norm": 23.378835678100586, "learning_rate": 1.6378755186721993e-05, "loss": 1.1688, "step": 10917 }, { "epoch": 9.06058091286307, "grad_norm": 15.404033660888672, "learning_rate": 1.6378423236514525e-05, "loss": 0.7539, "step": 10918 }, { "epoch": 9.061410788381743, "grad_norm": 39.76719284057617, "learning_rate": 1.6378091286307057e-05, "loss": 1.3401, "step": 10919 }, { "epoch": 9.062240663900415, "grad_norm": 25.98837661743164, "learning_rate": 1.6377759336099586e-05, "loss": 0.9202, "step": 10920 }, { "epoch": 9.063070539419087, "grad_norm": 36.21820068359375, "learning_rate": 1.6377427385892118e-05, "loss": 0.8199, "step": 10921 }, { "epoch": 9.063900414937759, "grad_norm": 21.950159072875977, "learning_rate": 1.637709543568465e-05, "loss": 0.9801, "step": 10922 }, { "epoch": 9.064730290456431, "grad_norm": 39.66502380371094, "learning_rate": 1.6376763485477182e-05, "loss": 1.5806, "step": 10923 }, { "epoch": 9.065560165975104, "grad_norm": 31.264362335205078, "learning_rate": 1.637643153526971e-05, "loss": 0.8565, "step": 10924 }, { "epoch": 9.066390041493776, "grad_norm": 47.213436126708984, "learning_rate": 1.6376099585062243e-05, "loss": 1.3796, "step": 10925 }, { "epoch": 9.067219917012448, "grad_norm": 16.982540130615234, "learning_rate": 1.6375767634854775e-05, "loss": 0.533, "step": 10926 }, { "epoch": 9.06804979253112, "grad_norm": 31.76425552368164, "learning_rate": 1.6375435684647304e-05, "loss": 1.584, "step": 10927 }, { "epoch": 9.068879668049792, "grad_norm": 30.682710647583008, "learning_rate": 1.6375103734439836e-05, "loss": 0.6683, "step": 10928 }, { "epoch": 9.069709543568464, "grad_norm": 35.94076919555664, "learning_rate": 1.6374771784232364e-05, "loss": 0.7246, "step": 10929 }, { "epoch": 9.070539419087137, "grad_norm": 14.639731407165527, "learning_rate": 1.6374439834024897e-05, "loss": 0.4762, "step": 10930 }, { "epoch": 9.071369294605809, "grad_norm": 15.149922370910645, "learning_rate": 1.637410788381743e-05, "loss": 0.4423, "step": 10931 }, { "epoch": 9.072199170124481, "grad_norm": 18.35268211364746, "learning_rate": 1.6373775933609957e-05, "loss": 0.8455, "step": 10932 }, { "epoch": 9.073029045643153, "grad_norm": 44.47372055053711, "learning_rate": 1.637344398340249e-05, "loss": 0.7339, "step": 10933 }, { "epoch": 9.073858921161825, "grad_norm": 93.45533752441406, "learning_rate": 1.637311203319502e-05, "loss": 1.5442, "step": 10934 }, { "epoch": 9.074688796680498, "grad_norm": 34.31437301635742, "learning_rate": 1.6372780082987554e-05, "loss": 0.7652, "step": 10935 }, { "epoch": 9.07551867219917, "grad_norm": 26.880027770996094, "learning_rate": 1.6372448132780082e-05, "loss": 1.1566, "step": 10936 }, { "epoch": 9.076348547717842, "grad_norm": 27.23579978942871, "learning_rate": 1.6372116182572615e-05, "loss": 1.3391, "step": 10937 }, { "epoch": 9.077178423236514, "grad_norm": 25.62867546081543, "learning_rate": 1.6371784232365147e-05, "loss": 1.3664, "step": 10938 }, { "epoch": 9.078008298755186, "grad_norm": 29.423484802246094, "learning_rate": 1.637145228215768e-05, "loss": 1.0428, "step": 10939 }, { "epoch": 9.078838174273859, "grad_norm": 42.56993865966797, "learning_rate": 1.637112033195021e-05, "loss": 1.7768, "step": 10940 }, { "epoch": 9.07966804979253, "grad_norm": 23.46617317199707, "learning_rate": 1.637078838174274e-05, "loss": 1.0736, "step": 10941 }, { "epoch": 9.080497925311203, "grad_norm": 25.19859504699707, "learning_rate": 1.6370456431535272e-05, "loss": 0.6733, "step": 10942 }, { "epoch": 9.081327800829875, "grad_norm": 18.91291618347168, "learning_rate": 1.6370124481327804e-05, "loss": 0.7327, "step": 10943 }, { "epoch": 9.082157676348547, "grad_norm": 76.16639709472656, "learning_rate": 1.6369792531120336e-05, "loss": 1.2857, "step": 10944 }, { "epoch": 9.08298755186722, "grad_norm": 38.192073822021484, "learning_rate": 1.6369460580912865e-05, "loss": 1.3541, "step": 10945 }, { "epoch": 9.083817427385892, "grad_norm": 35.57149887084961, "learning_rate": 1.6369128630705397e-05, "loss": 1.1473, "step": 10946 }, { "epoch": 9.084647302904564, "grad_norm": 20.303621292114258, "learning_rate": 1.6368796680497925e-05, "loss": 1.0438, "step": 10947 }, { "epoch": 9.085477178423236, "grad_norm": 21.736896514892578, "learning_rate": 1.6368464730290458e-05, "loss": 0.859, "step": 10948 }, { "epoch": 9.086307053941908, "grad_norm": 33.43784713745117, "learning_rate": 1.636813278008299e-05, "loss": 1.3224, "step": 10949 }, { "epoch": 9.08713692946058, "grad_norm": 27.237625122070312, "learning_rate": 1.636780082987552e-05, "loss": 0.8836, "step": 10950 }, { "epoch": 9.087966804979253, "grad_norm": 29.906116485595703, "learning_rate": 1.636746887966805e-05, "loss": 1.2103, "step": 10951 }, { "epoch": 9.088796680497925, "grad_norm": 71.90943908691406, "learning_rate": 1.6367136929460583e-05, "loss": 1.0507, "step": 10952 }, { "epoch": 9.089626556016597, "grad_norm": 26.79404640197754, "learning_rate": 1.636680497925311e-05, "loss": 0.8309, "step": 10953 }, { "epoch": 9.09045643153527, "grad_norm": 22.887134552001953, "learning_rate": 1.6366473029045643e-05, "loss": 1.3789, "step": 10954 }, { "epoch": 9.091286307053942, "grad_norm": 32.42538070678711, "learning_rate": 1.6366141078838176e-05, "loss": 0.9332, "step": 10955 }, { "epoch": 9.092116182572614, "grad_norm": 29.686382293701172, "learning_rate": 1.6365809128630708e-05, "loss": 0.9383, "step": 10956 }, { "epoch": 9.092946058091286, "grad_norm": 39.508731842041016, "learning_rate": 1.6365477178423236e-05, "loss": 1.1192, "step": 10957 }, { "epoch": 9.093775933609958, "grad_norm": 19.333425521850586, "learning_rate": 1.636514522821577e-05, "loss": 1.1852, "step": 10958 }, { "epoch": 9.09460580912863, "grad_norm": 27.489110946655273, "learning_rate": 1.63648132780083e-05, "loss": 1.3856, "step": 10959 }, { "epoch": 9.095435684647303, "grad_norm": 18.394071578979492, "learning_rate": 1.6364481327800833e-05, "loss": 0.8806, "step": 10960 }, { "epoch": 9.096265560165975, "grad_norm": 22.506990432739258, "learning_rate": 1.636414937759336e-05, "loss": 0.7818, "step": 10961 }, { "epoch": 9.097095435684647, "grad_norm": 23.88337516784668, "learning_rate": 1.6363817427385894e-05, "loss": 0.764, "step": 10962 }, { "epoch": 9.09792531120332, "grad_norm": 27.96341896057129, "learning_rate": 1.6363485477178426e-05, "loss": 1.3406, "step": 10963 }, { "epoch": 9.098755186721991, "grad_norm": 29.568769454956055, "learning_rate": 1.6363153526970958e-05, "loss": 1.4269, "step": 10964 }, { "epoch": 9.099585062240664, "grad_norm": 37.380096435546875, "learning_rate": 1.6362821576763486e-05, "loss": 1.6248, "step": 10965 }, { "epoch": 9.100414937759336, "grad_norm": 39.92742919921875, "learning_rate": 1.636248962655602e-05, "loss": 1.3501, "step": 10966 }, { "epoch": 9.101244813278008, "grad_norm": 20.881145477294922, "learning_rate": 1.636215767634855e-05, "loss": 0.8631, "step": 10967 }, { "epoch": 9.10207468879668, "grad_norm": 19.42879867553711, "learning_rate": 1.636182572614108e-05, "loss": 0.5903, "step": 10968 }, { "epoch": 9.102904564315352, "grad_norm": 16.581642150878906, "learning_rate": 1.636149377593361e-05, "loss": 0.3547, "step": 10969 }, { "epoch": 9.103734439834025, "grad_norm": 18.90239906311035, "learning_rate": 1.636116182572614e-05, "loss": 0.6708, "step": 10970 }, { "epoch": 9.104564315352697, "grad_norm": 36.636375427246094, "learning_rate": 1.6360829875518672e-05, "loss": 0.8856, "step": 10971 }, { "epoch": 9.105394190871369, "grad_norm": 16.202804565429688, "learning_rate": 1.6360497925311204e-05, "loss": 0.3951, "step": 10972 }, { "epoch": 9.106224066390041, "grad_norm": 17.176191329956055, "learning_rate": 1.6360165975103737e-05, "loss": 1.205, "step": 10973 }, { "epoch": 9.107053941908713, "grad_norm": 28.008853912353516, "learning_rate": 1.6359834024896265e-05, "loss": 1.6788, "step": 10974 }, { "epoch": 9.107883817427386, "grad_norm": 41.15956115722656, "learning_rate": 1.6359502074688797e-05, "loss": 1.2359, "step": 10975 }, { "epoch": 9.108713692946058, "grad_norm": 16.19866180419922, "learning_rate": 1.635917012448133e-05, "loss": 0.6105, "step": 10976 }, { "epoch": 9.10954356846473, "grad_norm": 20.92970085144043, "learning_rate": 1.635883817427386e-05, "loss": 1.1421, "step": 10977 }, { "epoch": 9.110373443983402, "grad_norm": 16.545490264892578, "learning_rate": 1.635850622406639e-05, "loss": 0.8463, "step": 10978 }, { "epoch": 9.111203319502074, "grad_norm": 19.589675903320312, "learning_rate": 1.6358174273858922e-05, "loss": 0.6174, "step": 10979 }, { "epoch": 9.112033195020746, "grad_norm": 13.820355415344238, "learning_rate": 1.6357842323651455e-05, "loss": 0.7908, "step": 10980 }, { "epoch": 9.112863070539419, "grad_norm": 18.812755584716797, "learning_rate": 1.6357510373443987e-05, "loss": 0.6407, "step": 10981 }, { "epoch": 9.11369294605809, "grad_norm": 18.311946868896484, "learning_rate": 1.6357178423236515e-05, "loss": 0.8895, "step": 10982 }, { "epoch": 9.114522821576763, "grad_norm": 23.490976333618164, "learning_rate": 1.6356846473029047e-05, "loss": 0.6447, "step": 10983 }, { "epoch": 9.115352697095435, "grad_norm": 44.38840103149414, "learning_rate": 1.635651452282158e-05, "loss": 1.0828, "step": 10984 }, { "epoch": 9.116182572614107, "grad_norm": 24.18939971923828, "learning_rate": 1.6356182572614108e-05, "loss": 1.3184, "step": 10985 }, { "epoch": 9.11701244813278, "grad_norm": 51.60591506958008, "learning_rate": 1.635585062240664e-05, "loss": 0.9964, "step": 10986 }, { "epoch": 9.117842323651452, "grad_norm": 29.2447452545166, "learning_rate": 1.6355518672199173e-05, "loss": 1.024, "step": 10987 }, { "epoch": 9.118672199170124, "grad_norm": 27.787527084350586, "learning_rate": 1.63551867219917e-05, "loss": 1.3696, "step": 10988 }, { "epoch": 9.119502074688796, "grad_norm": 24.99616241455078, "learning_rate": 1.6354854771784233e-05, "loss": 1.0232, "step": 10989 }, { "epoch": 9.120331950207468, "grad_norm": 20.92179298400879, "learning_rate": 1.6354522821576762e-05, "loss": 0.5267, "step": 10990 }, { "epoch": 9.12116182572614, "grad_norm": 14.63048267364502, "learning_rate": 1.6354190871369294e-05, "loss": 0.5672, "step": 10991 }, { "epoch": 9.121991701244813, "grad_norm": 25.551734924316406, "learning_rate": 1.6353858921161826e-05, "loss": 1.4256, "step": 10992 }, { "epoch": 9.122821576763485, "grad_norm": 24.70057487487793, "learning_rate": 1.635352697095436e-05, "loss": 0.7847, "step": 10993 }, { "epoch": 9.123651452282157, "grad_norm": 46.18421936035156, "learning_rate": 1.635319502074689e-05, "loss": 0.9942, "step": 10994 }, { "epoch": 9.12448132780083, "grad_norm": 24.908769607543945, "learning_rate": 1.635286307053942e-05, "loss": 1.5183, "step": 10995 }, { "epoch": 9.125311203319502, "grad_norm": 29.1546573638916, "learning_rate": 1.635253112033195e-05, "loss": 0.6953, "step": 10996 }, { "epoch": 9.126141078838174, "grad_norm": 30.269798278808594, "learning_rate": 1.6352199170124483e-05, "loss": 0.6989, "step": 10997 }, { "epoch": 9.126970954356846, "grad_norm": 32.381431579589844, "learning_rate": 1.6351867219917016e-05, "loss": 1.1354, "step": 10998 }, { "epoch": 9.127800829875518, "grad_norm": 38.539894104003906, "learning_rate": 1.6351535269709544e-05, "loss": 1.1706, "step": 10999 }, { "epoch": 9.12863070539419, "grad_norm": 32.048583984375, "learning_rate": 1.6351203319502076e-05, "loss": 1.3572, "step": 11000 }, { "epoch": 9.129460580912863, "grad_norm": 39.91157150268555, "learning_rate": 1.635087136929461e-05, "loss": 1.2683, "step": 11001 }, { "epoch": 9.130290456431535, "grad_norm": 16.0595760345459, "learning_rate": 1.635053941908714e-05, "loss": 0.5649, "step": 11002 }, { "epoch": 9.131120331950207, "grad_norm": 28.886871337890625, "learning_rate": 1.635020746887967e-05, "loss": 1.0579, "step": 11003 }, { "epoch": 9.13195020746888, "grad_norm": 28.08995246887207, "learning_rate": 1.63498755186722e-05, "loss": 0.9586, "step": 11004 }, { "epoch": 9.132780082987551, "grad_norm": 50.03520202636719, "learning_rate": 1.6349543568464734e-05, "loss": 1.3533, "step": 11005 }, { "epoch": 9.133609958506224, "grad_norm": 29.430953979492188, "learning_rate": 1.6349211618257262e-05, "loss": 1.354, "step": 11006 }, { "epoch": 9.134439834024896, "grad_norm": 29.352005004882812, "learning_rate": 1.6348879668049794e-05, "loss": 1.0339, "step": 11007 }, { "epoch": 9.135269709543568, "grad_norm": 74.83843231201172, "learning_rate": 1.6348547717842323e-05, "loss": 1.592, "step": 11008 }, { "epoch": 9.13609958506224, "grad_norm": 28.96174430847168, "learning_rate": 1.6348215767634855e-05, "loss": 0.708, "step": 11009 }, { "epoch": 9.136929460580912, "grad_norm": 16.52823829650879, "learning_rate": 1.6347883817427387e-05, "loss": 0.7663, "step": 11010 }, { "epoch": 9.137759336099585, "grad_norm": 24.387489318847656, "learning_rate": 1.6347551867219916e-05, "loss": 1.1699, "step": 11011 }, { "epoch": 9.138589211618257, "grad_norm": 28.306819915771484, "learning_rate": 1.6347219917012448e-05, "loss": 0.607, "step": 11012 }, { "epoch": 9.139419087136929, "grad_norm": 21.725749969482422, "learning_rate": 1.634688796680498e-05, "loss": 0.8833, "step": 11013 }, { "epoch": 9.140248962655601, "grad_norm": 31.08379364013672, "learning_rate": 1.6346556016597512e-05, "loss": 1.834, "step": 11014 }, { "epoch": 9.141078838174273, "grad_norm": 24.210010528564453, "learning_rate": 1.634622406639004e-05, "loss": 1.1575, "step": 11015 }, { "epoch": 9.141908713692946, "grad_norm": 22.434776306152344, "learning_rate": 1.6345892116182573e-05, "loss": 0.7662, "step": 11016 }, { "epoch": 9.142738589211618, "grad_norm": 26.172407150268555, "learning_rate": 1.6345560165975105e-05, "loss": 1.3251, "step": 11017 }, { "epoch": 9.14356846473029, "grad_norm": 22.384368896484375, "learning_rate": 1.6345228215767637e-05, "loss": 0.9724, "step": 11018 }, { "epoch": 9.144398340248962, "grad_norm": 29.863000869750977, "learning_rate": 1.634489626556017e-05, "loss": 1.0273, "step": 11019 }, { "epoch": 9.145228215767634, "grad_norm": 31.0248966217041, "learning_rate": 1.6344564315352698e-05, "loss": 1.1255, "step": 11020 }, { "epoch": 9.146058091286307, "grad_norm": 28.687236785888672, "learning_rate": 1.634423236514523e-05, "loss": 1.2227, "step": 11021 }, { "epoch": 9.146887966804979, "grad_norm": 16.15324592590332, "learning_rate": 1.6343900414937762e-05, "loss": 0.6735, "step": 11022 }, { "epoch": 9.147717842323651, "grad_norm": 23.02199935913086, "learning_rate": 1.634356846473029e-05, "loss": 0.6338, "step": 11023 }, { "epoch": 9.148547717842323, "grad_norm": 22.92447280883789, "learning_rate": 1.6343236514522823e-05, "loss": 1.0575, "step": 11024 }, { "epoch": 9.149377593360995, "grad_norm": 34.071006774902344, "learning_rate": 1.6342904564315355e-05, "loss": 1.3522, "step": 11025 }, { "epoch": 9.150207468879668, "grad_norm": 21.78937339782715, "learning_rate": 1.6342572614107884e-05, "loss": 1.117, "step": 11026 }, { "epoch": 9.15103734439834, "grad_norm": 29.722930908203125, "learning_rate": 1.6342240663900416e-05, "loss": 1.0881, "step": 11027 }, { "epoch": 9.151867219917012, "grad_norm": 103.29349517822266, "learning_rate": 1.6341908713692948e-05, "loss": 1.0817, "step": 11028 }, { "epoch": 9.152697095435684, "grad_norm": 27.71363067626953, "learning_rate": 1.6341576763485477e-05, "loss": 1.1213, "step": 11029 }, { "epoch": 9.153526970954356, "grad_norm": 27.142784118652344, "learning_rate": 1.634124481327801e-05, "loss": 1.7509, "step": 11030 }, { "epoch": 9.154356846473028, "grad_norm": 16.11151123046875, "learning_rate": 1.634091286307054e-05, "loss": 0.8939, "step": 11031 }, { "epoch": 9.1551867219917, "grad_norm": 31.453296661376953, "learning_rate": 1.634058091286307e-05, "loss": 1.6781, "step": 11032 }, { "epoch": 9.156016597510373, "grad_norm": 17.32252311706543, "learning_rate": 1.6340248962655602e-05, "loss": 1.1322, "step": 11033 }, { "epoch": 9.156846473029045, "grad_norm": 22.802812576293945, "learning_rate": 1.6339917012448134e-05, "loss": 1.1655, "step": 11034 }, { "epoch": 9.157676348547717, "grad_norm": 22.333377838134766, "learning_rate": 1.6339585062240666e-05, "loss": 0.741, "step": 11035 }, { "epoch": 9.15850622406639, "grad_norm": 28.880842208862305, "learning_rate": 1.6339253112033195e-05, "loss": 1.2586, "step": 11036 }, { "epoch": 9.159336099585062, "grad_norm": 20.46555519104004, "learning_rate": 1.6338921161825727e-05, "loss": 1.1522, "step": 11037 }, { "epoch": 9.160165975103734, "grad_norm": 29.862070083618164, "learning_rate": 1.633858921161826e-05, "loss": 0.713, "step": 11038 }, { "epoch": 9.160995850622406, "grad_norm": 31.128849029541016, "learning_rate": 1.633825726141079e-05, "loss": 0.776, "step": 11039 }, { "epoch": 9.161825726141078, "grad_norm": 14.999696731567383, "learning_rate": 1.633792531120332e-05, "loss": 1.2171, "step": 11040 }, { "epoch": 9.16265560165975, "grad_norm": 19.0526123046875, "learning_rate": 1.6337593360995852e-05, "loss": 0.5628, "step": 11041 }, { "epoch": 9.163485477178423, "grad_norm": 25.51460075378418, "learning_rate": 1.6337261410788384e-05, "loss": 1.4585, "step": 11042 }, { "epoch": 9.164315352697095, "grad_norm": 27.75394630432129, "learning_rate": 1.6336929460580916e-05, "loss": 1.712, "step": 11043 }, { "epoch": 9.165145228215767, "grad_norm": 47.52810287475586, "learning_rate": 1.6336597510373445e-05, "loss": 0.6464, "step": 11044 }, { "epoch": 9.16597510373444, "grad_norm": 19.04924201965332, "learning_rate": 1.6336265560165977e-05, "loss": 1.1131, "step": 11045 }, { "epoch": 9.166804979253111, "grad_norm": 23.441749572753906, "learning_rate": 1.6335933609958506e-05, "loss": 0.9918, "step": 11046 }, { "epoch": 9.167634854771784, "grad_norm": 15.681282997131348, "learning_rate": 1.6335601659751038e-05, "loss": 0.9292, "step": 11047 }, { "epoch": 9.168464730290456, "grad_norm": 32.13788604736328, "learning_rate": 1.633526970954357e-05, "loss": 1.2759, "step": 11048 }, { "epoch": 9.169294605809128, "grad_norm": 41.52974319458008, "learning_rate": 1.63349377593361e-05, "loss": 1.5677, "step": 11049 }, { "epoch": 9.1701244813278, "grad_norm": 26.731035232543945, "learning_rate": 1.633460580912863e-05, "loss": 1.2336, "step": 11050 }, { "epoch": 9.170954356846472, "grad_norm": 22.057098388671875, "learning_rate": 1.6334273858921163e-05, "loss": 0.6797, "step": 11051 }, { "epoch": 9.171784232365145, "grad_norm": 30.969594955444336, "learning_rate": 1.6333941908713695e-05, "loss": 1.1044, "step": 11052 }, { "epoch": 9.172614107883817, "grad_norm": 16.520116806030273, "learning_rate": 1.6333609958506224e-05, "loss": 0.647, "step": 11053 }, { "epoch": 9.173443983402489, "grad_norm": 24.41060447692871, "learning_rate": 1.6333278008298756e-05, "loss": 0.8761, "step": 11054 }, { "epoch": 9.174273858921161, "grad_norm": 16.23439598083496, "learning_rate": 1.6332946058091288e-05, "loss": 1.0211, "step": 11055 }, { "epoch": 9.175103734439833, "grad_norm": 29.600317001342773, "learning_rate": 1.633261410788382e-05, "loss": 1.4386, "step": 11056 }, { "epoch": 9.175933609958506, "grad_norm": 21.450029373168945, "learning_rate": 1.633228215767635e-05, "loss": 1.0045, "step": 11057 }, { "epoch": 9.176763485477178, "grad_norm": 24.768531799316406, "learning_rate": 1.633195020746888e-05, "loss": 0.9417, "step": 11058 }, { "epoch": 9.17759336099585, "grad_norm": 27.8055419921875, "learning_rate": 1.6331618257261413e-05, "loss": 0.9247, "step": 11059 }, { "epoch": 9.178423236514522, "grad_norm": 38.74369812011719, "learning_rate": 1.6331286307053945e-05, "loss": 0.93, "step": 11060 }, { "epoch": 9.179253112033194, "grad_norm": 32.74187469482422, "learning_rate": 1.6330954356846474e-05, "loss": 0.9912, "step": 11061 }, { "epoch": 9.180082987551867, "grad_norm": 18.858295440673828, "learning_rate": 1.6330622406639006e-05, "loss": 0.8991, "step": 11062 }, { "epoch": 9.180912863070539, "grad_norm": 37.409671783447266, "learning_rate": 1.6330290456431538e-05, "loss": 0.8137, "step": 11063 }, { "epoch": 9.181742738589211, "grad_norm": 32.76924514770508, "learning_rate": 1.6329958506224067e-05, "loss": 0.8041, "step": 11064 }, { "epoch": 9.182572614107883, "grad_norm": 30.475414276123047, "learning_rate": 1.63296265560166e-05, "loss": 1.0577, "step": 11065 }, { "epoch": 9.183402489626555, "grad_norm": 24.96804428100586, "learning_rate": 1.632929460580913e-05, "loss": 1.3129, "step": 11066 }, { "epoch": 9.184232365145228, "grad_norm": 49.94240188598633, "learning_rate": 1.632896265560166e-05, "loss": 1.0833, "step": 11067 }, { "epoch": 9.1850622406639, "grad_norm": 29.499801635742188, "learning_rate": 1.6328630705394192e-05, "loss": 1.3905, "step": 11068 }, { "epoch": 9.185892116182572, "grad_norm": 61.42134475708008, "learning_rate": 1.632829875518672e-05, "loss": 0.9448, "step": 11069 }, { "epoch": 9.186721991701244, "grad_norm": 25.97158432006836, "learning_rate": 1.6327966804979253e-05, "loss": 0.9909, "step": 11070 }, { "epoch": 9.187551867219916, "grad_norm": 25.317276000976562, "learning_rate": 1.6327634854771785e-05, "loss": 0.9131, "step": 11071 }, { "epoch": 9.188381742738589, "grad_norm": 24.19032096862793, "learning_rate": 1.6327302904564317e-05, "loss": 0.9824, "step": 11072 }, { "epoch": 9.18921161825726, "grad_norm": 28.834228515625, "learning_rate": 1.632697095435685e-05, "loss": 0.8263, "step": 11073 }, { "epoch": 9.190041493775933, "grad_norm": 34.2288703918457, "learning_rate": 1.6326639004149378e-05, "loss": 0.9566, "step": 11074 }, { "epoch": 9.190871369294605, "grad_norm": 27.707529067993164, "learning_rate": 1.632630705394191e-05, "loss": 0.8568, "step": 11075 }, { "epoch": 9.191701244813277, "grad_norm": 23.770463943481445, "learning_rate": 1.6325975103734442e-05, "loss": 1.5174, "step": 11076 }, { "epoch": 9.19253112033195, "grad_norm": 19.322710037231445, "learning_rate": 1.6325643153526974e-05, "loss": 1.1296, "step": 11077 }, { "epoch": 9.193360995850622, "grad_norm": 28.20062255859375, "learning_rate": 1.6325311203319503e-05, "loss": 1.6544, "step": 11078 }, { "epoch": 9.194190871369294, "grad_norm": 28.66086769104004, "learning_rate": 1.6324979253112035e-05, "loss": 1.3295, "step": 11079 }, { "epoch": 9.195020746887966, "grad_norm": 18.830345153808594, "learning_rate": 1.6324647302904567e-05, "loss": 0.684, "step": 11080 }, { "epoch": 9.195850622406638, "grad_norm": 27.97766876220703, "learning_rate": 1.63243153526971e-05, "loss": 1.2991, "step": 11081 }, { "epoch": 9.19668049792531, "grad_norm": 18.37621307373047, "learning_rate": 1.6323983402489628e-05, "loss": 1.4307, "step": 11082 }, { "epoch": 9.197510373443983, "grad_norm": 25.804819107055664, "learning_rate": 1.632365145228216e-05, "loss": 0.8218, "step": 11083 }, { "epoch": 9.198340248962655, "grad_norm": 17.125852584838867, "learning_rate": 1.6323319502074692e-05, "loss": 0.5812, "step": 11084 }, { "epoch": 9.199170124481327, "grad_norm": 44.73506164550781, "learning_rate": 1.632298755186722e-05, "loss": 1.1625, "step": 11085 }, { "epoch": 9.2, "grad_norm": 17.258331298828125, "learning_rate": 1.6322655601659753e-05, "loss": 0.6531, "step": 11086 }, { "epoch": 9.200829875518671, "grad_norm": 27.140592575073242, "learning_rate": 1.632232365145228e-05, "loss": 0.8672, "step": 11087 }, { "epoch": 9.201659751037344, "grad_norm": 40.37653732299805, "learning_rate": 1.6321991701244814e-05, "loss": 1.1981, "step": 11088 }, { "epoch": 9.202489626556016, "grad_norm": 23.634302139282227, "learning_rate": 1.6321659751037346e-05, "loss": 0.8235, "step": 11089 }, { "epoch": 9.203319502074688, "grad_norm": 23.18385887145996, "learning_rate": 1.6321327800829875e-05, "loss": 0.7614, "step": 11090 }, { "epoch": 9.20414937759336, "grad_norm": 21.9251651763916, "learning_rate": 1.6320995850622407e-05, "loss": 0.8209, "step": 11091 }, { "epoch": 9.204979253112032, "grad_norm": 30.04041862487793, "learning_rate": 1.632066390041494e-05, "loss": 0.8944, "step": 11092 }, { "epoch": 9.205809128630705, "grad_norm": 14.804765701293945, "learning_rate": 1.632033195020747e-05, "loss": 0.7825, "step": 11093 }, { "epoch": 9.206639004149377, "grad_norm": 49.62969970703125, "learning_rate": 1.632e-05, "loss": 1.4414, "step": 11094 }, { "epoch": 9.207468879668049, "grad_norm": 22.42845916748047, "learning_rate": 1.6319668049792532e-05, "loss": 0.8228, "step": 11095 }, { "epoch": 9.208298755186721, "grad_norm": 20.21521759033203, "learning_rate": 1.6319336099585064e-05, "loss": 1.2351, "step": 11096 }, { "epoch": 9.209128630705393, "grad_norm": 32.59096145629883, "learning_rate": 1.6319004149377596e-05, "loss": 1.6616, "step": 11097 }, { "epoch": 9.209958506224066, "grad_norm": 33.67155838012695, "learning_rate": 1.6318672199170128e-05, "loss": 1.3725, "step": 11098 }, { "epoch": 9.210788381742738, "grad_norm": 24.779170989990234, "learning_rate": 1.6318340248962657e-05, "loss": 1.472, "step": 11099 }, { "epoch": 9.21161825726141, "grad_norm": 20.80277442932129, "learning_rate": 1.631800829875519e-05, "loss": 0.8616, "step": 11100 }, { "epoch": 9.212448132780082, "grad_norm": 16.990633010864258, "learning_rate": 1.631767634854772e-05, "loss": 0.7537, "step": 11101 }, { "epoch": 9.213278008298754, "grad_norm": 23.035053253173828, "learning_rate": 1.631734439834025e-05, "loss": 1.0527, "step": 11102 }, { "epoch": 9.214107883817427, "grad_norm": 19.273042678833008, "learning_rate": 1.6317012448132782e-05, "loss": 1.355, "step": 11103 }, { "epoch": 9.214937759336099, "grad_norm": 32.74387741088867, "learning_rate": 1.6316680497925314e-05, "loss": 1.5493, "step": 11104 }, { "epoch": 9.215767634854771, "grad_norm": 20.408525466918945, "learning_rate": 1.6316348547717843e-05, "loss": 0.989, "step": 11105 }, { "epoch": 9.216597510373443, "grad_norm": 28.90531349182129, "learning_rate": 1.6316016597510375e-05, "loss": 1.2507, "step": 11106 }, { "epoch": 9.217427385892115, "grad_norm": 31.889402389526367, "learning_rate": 1.6315684647302903e-05, "loss": 0.9342, "step": 11107 }, { "epoch": 9.218257261410788, "grad_norm": 36.74374771118164, "learning_rate": 1.6315352697095436e-05, "loss": 1.0938, "step": 11108 }, { "epoch": 9.21908713692946, "grad_norm": 33.52077102661133, "learning_rate": 1.6315020746887968e-05, "loss": 1.5475, "step": 11109 }, { "epoch": 9.219917012448132, "grad_norm": 15.917607307434082, "learning_rate": 1.63146887966805e-05, "loss": 0.5927, "step": 11110 }, { "epoch": 9.220746887966804, "grad_norm": 20.866037368774414, "learning_rate": 1.631435684647303e-05, "loss": 1.6578, "step": 11111 }, { "epoch": 9.221576763485476, "grad_norm": 24.0421199798584, "learning_rate": 1.631402489626556e-05, "loss": 1.3875, "step": 11112 }, { "epoch": 9.222406639004149, "grad_norm": 28.422218322753906, "learning_rate": 1.6313692946058093e-05, "loss": 0.9974, "step": 11113 }, { "epoch": 9.22323651452282, "grad_norm": 20.21782112121582, "learning_rate": 1.6313360995850625e-05, "loss": 1.0269, "step": 11114 }, { "epoch": 9.224066390041493, "grad_norm": 19.512962341308594, "learning_rate": 1.6313029045643154e-05, "loss": 0.7636, "step": 11115 }, { "epoch": 9.224896265560165, "grad_norm": 30.35669708251953, "learning_rate": 1.6312697095435686e-05, "loss": 1.5226, "step": 11116 }, { "epoch": 9.225726141078837, "grad_norm": 29.711509704589844, "learning_rate": 1.6312365145228218e-05, "loss": 1.4374, "step": 11117 }, { "epoch": 9.22655601659751, "grad_norm": 19.638439178466797, "learning_rate": 1.631203319502075e-05, "loss": 0.7732, "step": 11118 }, { "epoch": 9.227385892116182, "grad_norm": 22.682918548583984, "learning_rate": 1.631170124481328e-05, "loss": 1.1244, "step": 11119 }, { "epoch": 9.228215767634854, "grad_norm": 16.941320419311523, "learning_rate": 1.631136929460581e-05, "loss": 1.0495, "step": 11120 }, { "epoch": 9.229045643153526, "grad_norm": 17.268352508544922, "learning_rate": 1.6311037344398343e-05, "loss": 0.8136, "step": 11121 }, { "epoch": 9.229875518672198, "grad_norm": 22.9189453125, "learning_rate": 1.6310705394190875e-05, "loss": 0.6355, "step": 11122 }, { "epoch": 9.23070539419087, "grad_norm": 20.440549850463867, "learning_rate": 1.6310373443983404e-05, "loss": 1.1027, "step": 11123 }, { "epoch": 9.231535269709543, "grad_norm": 30.420122146606445, "learning_rate": 1.6310041493775936e-05, "loss": 1.6357, "step": 11124 }, { "epoch": 9.232365145228215, "grad_norm": 15.931254386901855, "learning_rate": 1.6309709543568464e-05, "loss": 0.7246, "step": 11125 }, { "epoch": 9.233195020746887, "grad_norm": 46.35553741455078, "learning_rate": 1.6309377593360997e-05, "loss": 1.0664, "step": 11126 }, { "epoch": 9.23402489626556, "grad_norm": 38.581207275390625, "learning_rate": 1.630904564315353e-05, "loss": 1.0393, "step": 11127 }, { "epoch": 9.234854771784232, "grad_norm": 29.949119567871094, "learning_rate": 1.6308713692946057e-05, "loss": 1.7765, "step": 11128 }, { "epoch": 9.235684647302904, "grad_norm": 24.20206642150879, "learning_rate": 1.630838174273859e-05, "loss": 0.6698, "step": 11129 }, { "epoch": 9.236514522821576, "grad_norm": 21.931184768676758, "learning_rate": 1.630804979253112e-05, "loss": 1.082, "step": 11130 }, { "epoch": 9.237344398340248, "grad_norm": 18.989652633666992, "learning_rate": 1.6307717842323654e-05, "loss": 0.73, "step": 11131 }, { "epoch": 9.23817427385892, "grad_norm": 23.082212448120117, "learning_rate": 1.6307385892116182e-05, "loss": 0.8119, "step": 11132 }, { "epoch": 9.239004149377593, "grad_norm": 30.446531295776367, "learning_rate": 1.6307053941908715e-05, "loss": 1.3943, "step": 11133 }, { "epoch": 9.239834024896265, "grad_norm": 23.724273681640625, "learning_rate": 1.6306721991701247e-05, "loss": 1.2255, "step": 11134 }, { "epoch": 9.240663900414937, "grad_norm": 24.71237564086914, "learning_rate": 1.630639004149378e-05, "loss": 0.8396, "step": 11135 }, { "epoch": 9.241493775933609, "grad_norm": 24.6689510345459, "learning_rate": 1.6306058091286307e-05, "loss": 0.4345, "step": 11136 }, { "epoch": 9.242323651452281, "grad_norm": 42.287689208984375, "learning_rate": 1.630572614107884e-05, "loss": 1.2562, "step": 11137 }, { "epoch": 9.243153526970953, "grad_norm": 39.077857971191406, "learning_rate": 1.630539419087137e-05, "loss": 1.5347, "step": 11138 }, { "epoch": 9.243983402489626, "grad_norm": 19.578716278076172, "learning_rate": 1.6305062240663904e-05, "loss": 0.7865, "step": 11139 }, { "epoch": 9.244813278008298, "grad_norm": 22.964683532714844, "learning_rate": 1.6304730290456433e-05, "loss": 1.0966, "step": 11140 }, { "epoch": 9.24564315352697, "grad_norm": 24.740440368652344, "learning_rate": 1.6304398340248965e-05, "loss": 0.7375, "step": 11141 }, { "epoch": 9.246473029045642, "grad_norm": 25.03665542602539, "learning_rate": 1.6304066390041497e-05, "loss": 0.765, "step": 11142 }, { "epoch": 9.247302904564314, "grad_norm": 35.41627883911133, "learning_rate": 1.6303734439834025e-05, "loss": 0.9928, "step": 11143 }, { "epoch": 9.248132780082987, "grad_norm": 34.719886779785156, "learning_rate": 1.6303402489626558e-05, "loss": 1.2335, "step": 11144 }, { "epoch": 9.248962655601659, "grad_norm": 41.870452880859375, "learning_rate": 1.630307053941909e-05, "loss": 1.9256, "step": 11145 }, { "epoch": 9.249792531120331, "grad_norm": 22.734600067138672, "learning_rate": 1.630273858921162e-05, "loss": 1.0911, "step": 11146 }, { "epoch": 9.250622406639003, "grad_norm": 29.733882904052734, "learning_rate": 1.630240663900415e-05, "loss": 1.0481, "step": 11147 }, { "epoch": 9.251452282157675, "grad_norm": 25.548940658569336, "learning_rate": 1.630207468879668e-05, "loss": 0.8013, "step": 11148 }, { "epoch": 9.252282157676348, "grad_norm": 24.31016731262207, "learning_rate": 1.630174273858921e-05, "loss": 0.954, "step": 11149 }, { "epoch": 9.25311203319502, "grad_norm": 38.152061462402344, "learning_rate": 1.6301410788381743e-05, "loss": 1.5712, "step": 11150 }, { "epoch": 9.253941908713692, "grad_norm": 23.13477325439453, "learning_rate": 1.6301078838174276e-05, "loss": 0.8928, "step": 11151 }, { "epoch": 9.254771784232364, "grad_norm": 25.98270034790039, "learning_rate": 1.6300746887966808e-05, "loss": 1.3063, "step": 11152 }, { "epoch": 9.255601659751036, "grad_norm": 23.42844009399414, "learning_rate": 1.6300414937759336e-05, "loss": 0.9309, "step": 11153 }, { "epoch": 9.256431535269709, "grad_norm": 23.99871826171875, "learning_rate": 1.630008298755187e-05, "loss": 1.0321, "step": 11154 }, { "epoch": 9.25726141078838, "grad_norm": 21.23006820678711, "learning_rate": 1.62997510373444e-05, "loss": 1.1654, "step": 11155 }, { "epoch": 9.258091286307055, "grad_norm": 26.768665313720703, "learning_rate": 1.6299419087136933e-05, "loss": 1.4085, "step": 11156 }, { "epoch": 9.258921161825727, "grad_norm": 22.818584442138672, "learning_rate": 1.629908713692946e-05, "loss": 0.7068, "step": 11157 }, { "epoch": 9.2597510373444, "grad_norm": 18.321725845336914, "learning_rate": 1.6298755186721994e-05, "loss": 0.7239, "step": 11158 }, { "epoch": 9.260580912863071, "grad_norm": 25.672475814819336, "learning_rate": 1.6298423236514526e-05, "loss": 1.4391, "step": 11159 }, { "epoch": 9.261410788381744, "grad_norm": 24.28582763671875, "learning_rate": 1.6298091286307058e-05, "loss": 1.4476, "step": 11160 }, { "epoch": 9.262240663900416, "grad_norm": 23.484777450561523, "learning_rate": 1.6297759336099586e-05, "loss": 0.849, "step": 11161 }, { "epoch": 9.263070539419088, "grad_norm": 27.66670799255371, "learning_rate": 1.629742738589212e-05, "loss": 1.1239, "step": 11162 }, { "epoch": 9.26390041493776, "grad_norm": 19.333040237426758, "learning_rate": 1.6297095435684647e-05, "loss": 0.8656, "step": 11163 }, { "epoch": 9.264730290456432, "grad_norm": 20.577192306518555, "learning_rate": 1.629676348547718e-05, "loss": 1.1184, "step": 11164 }, { "epoch": 9.265560165975105, "grad_norm": 34.90964126586914, "learning_rate": 1.629643153526971e-05, "loss": 1.8598, "step": 11165 }, { "epoch": 9.266390041493777, "grad_norm": 31.124332427978516, "learning_rate": 1.629609958506224e-05, "loss": 1.12, "step": 11166 }, { "epoch": 9.267219917012449, "grad_norm": 18.388301849365234, "learning_rate": 1.6295767634854772e-05, "loss": 1.2486, "step": 11167 }, { "epoch": 9.268049792531121, "grad_norm": 23.723594665527344, "learning_rate": 1.6295435684647304e-05, "loss": 1.1727, "step": 11168 }, { "epoch": 9.268879668049793, "grad_norm": 24.54149627685547, "learning_rate": 1.6295103734439833e-05, "loss": 1.182, "step": 11169 }, { "epoch": 9.269709543568466, "grad_norm": 26.715360641479492, "learning_rate": 1.6294771784232365e-05, "loss": 1.2514, "step": 11170 }, { "epoch": 9.270539419087138, "grad_norm": 17.538740158081055, "learning_rate": 1.6294439834024897e-05, "loss": 0.6102, "step": 11171 }, { "epoch": 9.27136929460581, "grad_norm": 30.816709518432617, "learning_rate": 1.629410788381743e-05, "loss": 0.8444, "step": 11172 }, { "epoch": 9.272199170124482, "grad_norm": 24.782331466674805, "learning_rate": 1.6293775933609958e-05, "loss": 1.2932, "step": 11173 }, { "epoch": 9.273029045643154, "grad_norm": 12.109030723571777, "learning_rate": 1.629344398340249e-05, "loss": 0.5196, "step": 11174 }, { "epoch": 9.273858921161827, "grad_norm": 38.236507415771484, "learning_rate": 1.6293112033195022e-05, "loss": 1.0586, "step": 11175 }, { "epoch": 9.274688796680499, "grad_norm": 13.38786506652832, "learning_rate": 1.6292780082987554e-05, "loss": 0.6634, "step": 11176 }, { "epoch": 9.275518672199171, "grad_norm": 19.84398078918457, "learning_rate": 1.6292448132780087e-05, "loss": 0.7635, "step": 11177 }, { "epoch": 9.276348547717843, "grad_norm": 24.28194236755371, "learning_rate": 1.6292116182572615e-05, "loss": 1.4647, "step": 11178 }, { "epoch": 9.277178423236515, "grad_norm": 30.428974151611328, "learning_rate": 1.6291784232365147e-05, "loss": 1.0278, "step": 11179 }, { "epoch": 9.278008298755188, "grad_norm": 27.778532028198242, "learning_rate": 1.629145228215768e-05, "loss": 1.0107, "step": 11180 }, { "epoch": 9.27883817427386, "grad_norm": 38.94574737548828, "learning_rate": 1.6291120331950208e-05, "loss": 1.1313, "step": 11181 }, { "epoch": 9.279668049792532, "grad_norm": 47.239986419677734, "learning_rate": 1.629078838174274e-05, "loss": 1.288, "step": 11182 }, { "epoch": 9.280497925311204, "grad_norm": 27.34079360961914, "learning_rate": 1.6290456431535272e-05, "loss": 1.1781, "step": 11183 }, { "epoch": 9.281327800829876, "grad_norm": 29.36865234375, "learning_rate": 1.62901244813278e-05, "loss": 1.3603, "step": 11184 }, { "epoch": 9.282157676348548, "grad_norm": 26.700109481811523, "learning_rate": 1.6289792531120333e-05, "loss": 0.9487, "step": 11185 }, { "epoch": 9.28298755186722, "grad_norm": 29.40863037109375, "learning_rate": 1.6289460580912862e-05, "loss": 1.0249, "step": 11186 }, { "epoch": 9.283817427385893, "grad_norm": 24.38871192932129, "learning_rate": 1.6289128630705394e-05, "loss": 0.8404, "step": 11187 }, { "epoch": 9.284647302904565, "grad_norm": 30.118589401245117, "learning_rate": 1.6288796680497926e-05, "loss": 1.1596, "step": 11188 }, { "epoch": 9.285477178423237, "grad_norm": 21.710371017456055, "learning_rate": 1.628846473029046e-05, "loss": 0.715, "step": 11189 }, { "epoch": 9.28630705394191, "grad_norm": 24.634166717529297, "learning_rate": 1.6288132780082987e-05, "loss": 0.6609, "step": 11190 }, { "epoch": 9.287136929460582, "grad_norm": 16.215099334716797, "learning_rate": 1.628780082987552e-05, "loss": 0.6382, "step": 11191 }, { "epoch": 9.287966804979254, "grad_norm": 25.184412002563477, "learning_rate": 1.628746887966805e-05, "loss": 0.8255, "step": 11192 }, { "epoch": 9.288796680497926, "grad_norm": 21.49614715576172, "learning_rate": 1.6287136929460583e-05, "loss": 0.8614, "step": 11193 }, { "epoch": 9.289626556016598, "grad_norm": 29.408613204956055, "learning_rate": 1.6286804979253112e-05, "loss": 1.6496, "step": 11194 }, { "epoch": 9.29045643153527, "grad_norm": 29.313739776611328, "learning_rate": 1.6286473029045644e-05, "loss": 1.0205, "step": 11195 }, { "epoch": 9.291286307053943, "grad_norm": 30.123252868652344, "learning_rate": 1.6286141078838176e-05, "loss": 1.3078, "step": 11196 }, { "epoch": 9.292116182572615, "grad_norm": 25.417253494262695, "learning_rate": 1.628580912863071e-05, "loss": 0.6947, "step": 11197 }, { "epoch": 9.292946058091287, "grad_norm": 19.391611099243164, "learning_rate": 1.6285477178423237e-05, "loss": 0.531, "step": 11198 }, { "epoch": 9.29377593360996, "grad_norm": 22.67169761657715, "learning_rate": 1.628514522821577e-05, "loss": 1.3339, "step": 11199 }, { "epoch": 9.294605809128631, "grad_norm": 21.497787475585938, "learning_rate": 1.62848132780083e-05, "loss": 1.0033, "step": 11200 }, { "epoch": 9.295435684647304, "grad_norm": 37.93194580078125, "learning_rate": 1.6284481327800833e-05, "loss": 1.0584, "step": 11201 }, { "epoch": 9.296265560165976, "grad_norm": 23.479127883911133, "learning_rate": 1.6284149377593362e-05, "loss": 0.5267, "step": 11202 }, { "epoch": 9.297095435684648, "grad_norm": 34.86320495605469, "learning_rate": 1.6283817427385894e-05, "loss": 1.2227, "step": 11203 }, { "epoch": 9.29792531120332, "grad_norm": 25.590831756591797, "learning_rate": 1.6283485477178423e-05, "loss": 0.8827, "step": 11204 }, { "epoch": 9.298755186721992, "grad_norm": 42.1112174987793, "learning_rate": 1.6283153526970955e-05, "loss": 1.1134, "step": 11205 }, { "epoch": 9.299585062240665, "grad_norm": 25.207704544067383, "learning_rate": 1.6282821576763487e-05, "loss": 0.9593, "step": 11206 }, { "epoch": 9.300414937759337, "grad_norm": 19.366405487060547, "learning_rate": 1.6282489626556016e-05, "loss": 0.7309, "step": 11207 }, { "epoch": 9.301244813278009, "grad_norm": 24.52188491821289, "learning_rate": 1.6282157676348548e-05, "loss": 1.2268, "step": 11208 }, { "epoch": 9.302074688796681, "grad_norm": 53.23998260498047, "learning_rate": 1.628182572614108e-05, "loss": 1.5543, "step": 11209 }, { "epoch": 9.302904564315353, "grad_norm": 31.266077041625977, "learning_rate": 1.6281493775933612e-05, "loss": 1.0568, "step": 11210 }, { "epoch": 9.303734439834026, "grad_norm": 25.345746994018555, "learning_rate": 1.628116182572614e-05, "loss": 1.1251, "step": 11211 }, { "epoch": 9.304564315352698, "grad_norm": 58.71098327636719, "learning_rate": 1.6280829875518673e-05, "loss": 0.7379, "step": 11212 }, { "epoch": 9.30539419087137, "grad_norm": 31.70122718811035, "learning_rate": 1.6280497925311205e-05, "loss": 1.4356, "step": 11213 }, { "epoch": 9.306224066390042, "grad_norm": 22.070878982543945, "learning_rate": 1.6280165975103737e-05, "loss": 0.9868, "step": 11214 }, { "epoch": 9.307053941908714, "grad_norm": 33.098819732666016, "learning_rate": 1.6279834024896266e-05, "loss": 1.3421, "step": 11215 }, { "epoch": 9.307883817427387, "grad_norm": 54.92607879638672, "learning_rate": 1.6279502074688798e-05, "loss": 1.4219, "step": 11216 }, { "epoch": 9.308713692946059, "grad_norm": 38.86264419555664, "learning_rate": 1.627917012448133e-05, "loss": 1.1224, "step": 11217 }, { "epoch": 9.309543568464731, "grad_norm": 21.075918197631836, "learning_rate": 1.6278838174273862e-05, "loss": 0.6484, "step": 11218 }, { "epoch": 9.310373443983403, "grad_norm": 21.424348831176758, "learning_rate": 1.627850622406639e-05, "loss": 0.8728, "step": 11219 }, { "epoch": 9.311203319502075, "grad_norm": 19.654342651367188, "learning_rate": 1.6278174273858923e-05, "loss": 1.0245, "step": 11220 }, { "epoch": 9.312033195020748, "grad_norm": 21.920743942260742, "learning_rate": 1.6277842323651455e-05, "loss": 0.5841, "step": 11221 }, { "epoch": 9.31286307053942, "grad_norm": 18.499292373657227, "learning_rate": 1.6277510373443984e-05, "loss": 0.565, "step": 11222 }, { "epoch": 9.313692946058092, "grad_norm": 27.58374786376953, "learning_rate": 1.6277178423236516e-05, "loss": 0.8736, "step": 11223 }, { "epoch": 9.314522821576764, "grad_norm": 14.891357421875, "learning_rate": 1.6276846473029045e-05, "loss": 0.9719, "step": 11224 }, { "epoch": 9.315352697095436, "grad_norm": 20.887149810791016, "learning_rate": 1.6276514522821577e-05, "loss": 1.1205, "step": 11225 }, { "epoch": 9.316182572614109, "grad_norm": 63.22389221191406, "learning_rate": 1.627618257261411e-05, "loss": 1.5768, "step": 11226 }, { "epoch": 9.31701244813278, "grad_norm": 63.527896881103516, "learning_rate": 1.6275850622406638e-05, "loss": 2.2761, "step": 11227 }, { "epoch": 9.317842323651453, "grad_norm": 19.2081241607666, "learning_rate": 1.627551867219917e-05, "loss": 0.7095, "step": 11228 }, { "epoch": 9.318672199170125, "grad_norm": 24.639171600341797, "learning_rate": 1.6275186721991702e-05, "loss": 0.9625, "step": 11229 }, { "epoch": 9.319502074688797, "grad_norm": 37.651790618896484, "learning_rate": 1.6274854771784234e-05, "loss": 1.4156, "step": 11230 }, { "epoch": 9.32033195020747, "grad_norm": 23.4586181640625, "learning_rate": 1.6274522821576766e-05, "loss": 1.4674, "step": 11231 }, { "epoch": 9.321161825726142, "grad_norm": 31.82343101501465, "learning_rate": 1.6274190871369295e-05, "loss": 1.2225, "step": 11232 }, { "epoch": 9.321991701244814, "grad_norm": 14.556546211242676, "learning_rate": 1.6273858921161827e-05, "loss": 0.4476, "step": 11233 }, { "epoch": 9.322821576763486, "grad_norm": 58.7256965637207, "learning_rate": 1.627352697095436e-05, "loss": 1.1654, "step": 11234 }, { "epoch": 9.323651452282158, "grad_norm": 18.845396041870117, "learning_rate": 1.627319502074689e-05, "loss": 0.7633, "step": 11235 }, { "epoch": 9.32448132780083, "grad_norm": 36.88822937011719, "learning_rate": 1.627286307053942e-05, "loss": 1.5803, "step": 11236 }, { "epoch": 9.325311203319503, "grad_norm": 28.65692138671875, "learning_rate": 1.6272531120331952e-05, "loss": 0.6148, "step": 11237 }, { "epoch": 9.326141078838175, "grad_norm": 33.217308044433594, "learning_rate": 1.6272199170124484e-05, "loss": 1.3731, "step": 11238 }, { "epoch": 9.326970954356847, "grad_norm": 24.974403381347656, "learning_rate": 1.6271867219917016e-05, "loss": 0.6304, "step": 11239 }, { "epoch": 9.32780082987552, "grad_norm": 16.92180633544922, "learning_rate": 1.6271535269709545e-05, "loss": 1.1872, "step": 11240 }, { "epoch": 9.328630705394191, "grad_norm": 25.149673461914062, "learning_rate": 1.6271203319502077e-05, "loss": 1.005, "step": 11241 }, { "epoch": 9.329460580912864, "grad_norm": 26.542720794677734, "learning_rate": 1.6270871369294606e-05, "loss": 0.8255, "step": 11242 }, { "epoch": 9.330290456431536, "grad_norm": 25.094127655029297, "learning_rate": 1.6270539419087138e-05, "loss": 1.0599, "step": 11243 }, { "epoch": 9.331120331950208, "grad_norm": 41.12450408935547, "learning_rate": 1.627020746887967e-05, "loss": 1.3102, "step": 11244 }, { "epoch": 9.33195020746888, "grad_norm": 26.358863830566406, "learning_rate": 1.62698755186722e-05, "loss": 0.6365, "step": 11245 }, { "epoch": 9.332780082987552, "grad_norm": 17.26900863647461, "learning_rate": 1.626954356846473e-05, "loss": 0.8617, "step": 11246 }, { "epoch": 9.333609958506225, "grad_norm": 48.12744140625, "learning_rate": 1.6269211618257263e-05, "loss": 2.0997, "step": 11247 }, { "epoch": 9.334439834024897, "grad_norm": 47.210330963134766, "learning_rate": 1.6268879668049792e-05, "loss": 1.1338, "step": 11248 }, { "epoch": 9.335269709543569, "grad_norm": 21.712627410888672, "learning_rate": 1.6268547717842324e-05, "loss": 0.954, "step": 11249 }, { "epoch": 9.336099585062241, "grad_norm": 31.249818801879883, "learning_rate": 1.6268215767634856e-05, "loss": 1.1854, "step": 11250 }, { "epoch": 9.336929460580913, "grad_norm": 23.795772552490234, "learning_rate": 1.6267883817427388e-05, "loss": 0.9617, "step": 11251 }, { "epoch": 9.337759336099586, "grad_norm": 39.792667388916016, "learning_rate": 1.6267551867219917e-05, "loss": 2.0324, "step": 11252 }, { "epoch": 9.338589211618258, "grad_norm": 30.616546630859375, "learning_rate": 1.626721991701245e-05, "loss": 0.8902, "step": 11253 }, { "epoch": 9.33941908713693, "grad_norm": 39.52893829345703, "learning_rate": 1.626688796680498e-05, "loss": 1.1361, "step": 11254 }, { "epoch": 9.340248962655602, "grad_norm": 22.501222610473633, "learning_rate": 1.6266556016597513e-05, "loss": 0.923, "step": 11255 }, { "epoch": 9.341078838174274, "grad_norm": 17.252925872802734, "learning_rate": 1.6266224066390045e-05, "loss": 0.7254, "step": 11256 }, { "epoch": 9.341908713692947, "grad_norm": 29.57539939880371, "learning_rate": 1.6265892116182574e-05, "loss": 1.0601, "step": 11257 }, { "epoch": 9.342738589211619, "grad_norm": 32.68086624145508, "learning_rate": 1.6265560165975106e-05, "loss": 1.3116, "step": 11258 }, { "epoch": 9.343568464730291, "grad_norm": 17.759986877441406, "learning_rate": 1.6265228215767638e-05, "loss": 0.6928, "step": 11259 }, { "epoch": 9.344398340248963, "grad_norm": 56.27077865600586, "learning_rate": 1.6264896265560167e-05, "loss": 1.8067, "step": 11260 }, { "epoch": 9.345228215767635, "grad_norm": 18.253097534179688, "learning_rate": 1.62645643153527e-05, "loss": 0.8449, "step": 11261 }, { "epoch": 9.346058091286308, "grad_norm": 17.879770278930664, "learning_rate": 1.626423236514523e-05, "loss": 0.8259, "step": 11262 }, { "epoch": 9.34688796680498, "grad_norm": 24.352638244628906, "learning_rate": 1.626390041493776e-05, "loss": 1.1706, "step": 11263 }, { "epoch": 9.347717842323652, "grad_norm": 22.576778411865234, "learning_rate": 1.6263568464730292e-05, "loss": 1.269, "step": 11264 }, { "epoch": 9.348547717842324, "grad_norm": 15.82395076751709, "learning_rate": 1.626323651452282e-05, "loss": 0.6107, "step": 11265 }, { "epoch": 9.349377593360996, "grad_norm": 20.173927307128906, "learning_rate": 1.6262904564315353e-05, "loss": 1.0162, "step": 11266 }, { "epoch": 9.350207468879669, "grad_norm": 17.859210968017578, "learning_rate": 1.6262572614107885e-05, "loss": 1.0173, "step": 11267 }, { "epoch": 9.35103734439834, "grad_norm": 20.18362045288086, "learning_rate": 1.6262240663900417e-05, "loss": 1.0613, "step": 11268 }, { "epoch": 9.351867219917013, "grad_norm": 22.19801902770996, "learning_rate": 1.6261908713692946e-05, "loss": 1.2188, "step": 11269 }, { "epoch": 9.352697095435685, "grad_norm": 38.69721603393555, "learning_rate": 1.6261576763485478e-05, "loss": 1.2691, "step": 11270 }, { "epoch": 9.353526970954357, "grad_norm": 19.016725540161133, "learning_rate": 1.626124481327801e-05, "loss": 1.1998, "step": 11271 }, { "epoch": 9.35435684647303, "grad_norm": 26.232023239135742, "learning_rate": 1.6260912863070542e-05, "loss": 1.1897, "step": 11272 }, { "epoch": 9.355186721991702, "grad_norm": 18.283275604248047, "learning_rate": 1.626058091286307e-05, "loss": 0.4987, "step": 11273 }, { "epoch": 9.356016597510374, "grad_norm": 21.548320770263672, "learning_rate": 1.6260248962655603e-05, "loss": 0.9844, "step": 11274 }, { "epoch": 9.356846473029046, "grad_norm": 21.56867790222168, "learning_rate": 1.6259917012448135e-05, "loss": 0.8244, "step": 11275 }, { "epoch": 9.357676348547718, "grad_norm": 22.404800415039062, "learning_rate": 1.6259585062240667e-05, "loss": 0.9765, "step": 11276 }, { "epoch": 9.35850622406639, "grad_norm": 21.683582305908203, "learning_rate": 1.6259253112033196e-05, "loss": 0.6751, "step": 11277 }, { "epoch": 9.359336099585063, "grad_norm": 20.104598999023438, "learning_rate": 1.6258921161825728e-05, "loss": 1.1065, "step": 11278 }, { "epoch": 9.360165975103735, "grad_norm": 19.384876251220703, "learning_rate": 1.625858921161826e-05, "loss": 1.188, "step": 11279 }, { "epoch": 9.360995850622407, "grad_norm": 19.296506881713867, "learning_rate": 1.625825726141079e-05, "loss": 0.805, "step": 11280 }, { "epoch": 9.36182572614108, "grad_norm": 12.504915237426758, "learning_rate": 1.625792531120332e-05, "loss": 0.455, "step": 11281 }, { "epoch": 9.362655601659752, "grad_norm": 46.089935302734375, "learning_rate": 1.6257593360995853e-05, "loss": 1.2428, "step": 11282 }, { "epoch": 9.363485477178424, "grad_norm": 26.188451766967773, "learning_rate": 1.625726141078838e-05, "loss": 1.1645, "step": 11283 }, { "epoch": 9.364315352697096, "grad_norm": 27.96018409729004, "learning_rate": 1.6256929460580914e-05, "loss": 1.0534, "step": 11284 }, { "epoch": 9.365145228215768, "grad_norm": 27.59801483154297, "learning_rate": 1.6256597510373446e-05, "loss": 0.7681, "step": 11285 }, { "epoch": 9.36597510373444, "grad_norm": 25.205854415893555, "learning_rate": 1.6256265560165975e-05, "loss": 1.4046, "step": 11286 }, { "epoch": 9.366804979253113, "grad_norm": 25.3017578125, "learning_rate": 1.6255933609958507e-05, "loss": 0.9968, "step": 11287 }, { "epoch": 9.367634854771785, "grad_norm": 51.22673416137695, "learning_rate": 1.625560165975104e-05, "loss": 1.038, "step": 11288 }, { "epoch": 9.368464730290457, "grad_norm": 39.492279052734375, "learning_rate": 1.625526970954357e-05, "loss": 0.924, "step": 11289 }, { "epoch": 9.369294605809129, "grad_norm": 29.211017608642578, "learning_rate": 1.62549377593361e-05, "loss": 1.4353, "step": 11290 }, { "epoch": 9.370124481327801, "grad_norm": 38.29137420654297, "learning_rate": 1.625460580912863e-05, "loss": 0.9909, "step": 11291 }, { "epoch": 9.370954356846473, "grad_norm": 37.36344909667969, "learning_rate": 1.6254273858921164e-05, "loss": 0.6952, "step": 11292 }, { "epoch": 9.371784232365146, "grad_norm": 15.230473518371582, "learning_rate": 1.6253941908713696e-05, "loss": 0.738, "step": 11293 }, { "epoch": 9.372614107883818, "grad_norm": 32.43437194824219, "learning_rate": 1.6253609958506225e-05, "loss": 1.1517, "step": 11294 }, { "epoch": 9.37344398340249, "grad_norm": 45.17716979980469, "learning_rate": 1.6253278008298757e-05, "loss": 1.5766, "step": 11295 }, { "epoch": 9.374273858921162, "grad_norm": 18.554079055786133, "learning_rate": 1.625294605809129e-05, "loss": 0.7352, "step": 11296 }, { "epoch": 9.375103734439834, "grad_norm": 51.62055587768555, "learning_rate": 1.625261410788382e-05, "loss": 1.1127, "step": 11297 }, { "epoch": 9.375933609958507, "grad_norm": 29.776443481445312, "learning_rate": 1.625228215767635e-05, "loss": 0.9103, "step": 11298 }, { "epoch": 9.376763485477179, "grad_norm": 26.03537368774414, "learning_rate": 1.6251950207468882e-05, "loss": 1.2218, "step": 11299 }, { "epoch": 9.377593360995851, "grad_norm": 27.3989200592041, "learning_rate": 1.6251618257261414e-05, "loss": 1.1046, "step": 11300 }, { "epoch": 9.378423236514523, "grad_norm": 37.108394622802734, "learning_rate": 1.6251286307053943e-05, "loss": 0.9491, "step": 11301 }, { "epoch": 9.379253112033195, "grad_norm": 38.60244369506836, "learning_rate": 1.6250954356846475e-05, "loss": 1.2832, "step": 11302 }, { "epoch": 9.380082987551868, "grad_norm": 21.082347869873047, "learning_rate": 1.6250622406639003e-05, "loss": 0.752, "step": 11303 }, { "epoch": 9.38091286307054, "grad_norm": 25.913585662841797, "learning_rate": 1.6250290456431536e-05, "loss": 1.3379, "step": 11304 }, { "epoch": 9.381742738589212, "grad_norm": 28.99268341064453, "learning_rate": 1.6249958506224068e-05, "loss": 1.115, "step": 11305 }, { "epoch": 9.382572614107884, "grad_norm": 21.820009231567383, "learning_rate": 1.6249626556016596e-05, "loss": 0.7992, "step": 11306 }, { "epoch": 9.383402489626556, "grad_norm": 14.340699195861816, "learning_rate": 1.624929460580913e-05, "loss": 0.717, "step": 11307 }, { "epoch": 9.384232365145229, "grad_norm": 20.977365493774414, "learning_rate": 1.624896265560166e-05, "loss": 1.0475, "step": 11308 }, { "epoch": 9.3850622406639, "grad_norm": 31.671730041503906, "learning_rate": 1.6248630705394193e-05, "loss": 0.7591, "step": 11309 }, { "epoch": 9.385892116182573, "grad_norm": 22.305830001831055, "learning_rate": 1.6248298755186725e-05, "loss": 0.755, "step": 11310 }, { "epoch": 9.386721991701245, "grad_norm": 29.481176376342773, "learning_rate": 1.6247966804979254e-05, "loss": 1.3203, "step": 11311 }, { "epoch": 9.387551867219917, "grad_norm": 24.74065589904785, "learning_rate": 1.6247634854771786e-05, "loss": 1.0079, "step": 11312 }, { "epoch": 9.38838174273859, "grad_norm": 20.77675437927246, "learning_rate": 1.6247302904564318e-05, "loss": 1.2909, "step": 11313 }, { "epoch": 9.389211618257262, "grad_norm": 16.059234619140625, "learning_rate": 1.624697095435685e-05, "loss": 0.7807, "step": 11314 }, { "epoch": 9.390041493775934, "grad_norm": 19.431211471557617, "learning_rate": 1.624663900414938e-05, "loss": 0.8598, "step": 11315 }, { "epoch": 9.390871369294606, "grad_norm": 27.738279342651367, "learning_rate": 1.624630705394191e-05, "loss": 0.9971, "step": 11316 }, { "epoch": 9.391701244813278, "grad_norm": 15.90449333190918, "learning_rate": 1.6245975103734443e-05, "loss": 0.5165, "step": 11317 }, { "epoch": 9.39253112033195, "grad_norm": 46.44729232788086, "learning_rate": 1.6245643153526975e-05, "loss": 0.7065, "step": 11318 }, { "epoch": 9.393360995850623, "grad_norm": 34.89187240600586, "learning_rate": 1.6245311203319504e-05, "loss": 0.9715, "step": 11319 }, { "epoch": 9.394190871369295, "grad_norm": 27.634700775146484, "learning_rate": 1.6244979253112036e-05, "loss": 1.0949, "step": 11320 }, { "epoch": 9.395020746887967, "grad_norm": 19.730396270751953, "learning_rate": 1.6244647302904564e-05, "loss": 0.6875, "step": 11321 }, { "epoch": 9.39585062240664, "grad_norm": 32.39017868041992, "learning_rate": 1.6244315352697097e-05, "loss": 1.1112, "step": 11322 }, { "epoch": 9.396680497925312, "grad_norm": 20.684663772583008, "learning_rate": 1.624398340248963e-05, "loss": 1.1121, "step": 11323 }, { "epoch": 9.397510373443984, "grad_norm": 50.731021881103516, "learning_rate": 1.6243651452282157e-05, "loss": 1.8209, "step": 11324 }, { "epoch": 9.398340248962656, "grad_norm": 26.383502960205078, "learning_rate": 1.624331950207469e-05, "loss": 0.5083, "step": 11325 }, { "epoch": 9.399170124481328, "grad_norm": 34.985015869140625, "learning_rate": 1.624298755186722e-05, "loss": 0.7981, "step": 11326 }, { "epoch": 9.4, "grad_norm": 22.274133682250977, "learning_rate": 1.624265560165975e-05, "loss": 1.1471, "step": 11327 }, { "epoch": 9.400829875518673, "grad_norm": 32.847652435302734, "learning_rate": 1.6242323651452282e-05, "loss": 0.8881, "step": 11328 }, { "epoch": 9.401659751037345, "grad_norm": 28.776447296142578, "learning_rate": 1.6241991701244814e-05, "loss": 1.5464, "step": 11329 }, { "epoch": 9.402489626556017, "grad_norm": 28.455554962158203, "learning_rate": 1.6241659751037347e-05, "loss": 1.228, "step": 11330 }, { "epoch": 9.40331950207469, "grad_norm": 23.1414737701416, "learning_rate": 1.6241327800829875e-05, "loss": 0.8481, "step": 11331 }, { "epoch": 9.404149377593361, "grad_norm": 36.140785217285156, "learning_rate": 1.6240995850622407e-05, "loss": 2.2355, "step": 11332 }, { "epoch": 9.404979253112034, "grad_norm": 17.856861114501953, "learning_rate": 1.624066390041494e-05, "loss": 0.5325, "step": 11333 }, { "epoch": 9.405809128630706, "grad_norm": 23.783401489257812, "learning_rate": 1.624033195020747e-05, "loss": 1.1333, "step": 11334 }, { "epoch": 9.406639004149378, "grad_norm": 29.381357192993164, "learning_rate": 1.6240000000000004e-05, "loss": 1.1223, "step": 11335 }, { "epoch": 9.40746887966805, "grad_norm": 38.02240753173828, "learning_rate": 1.6239668049792532e-05, "loss": 1.7193, "step": 11336 }, { "epoch": 9.408298755186722, "grad_norm": 29.828201293945312, "learning_rate": 1.6239336099585065e-05, "loss": 1.2902, "step": 11337 }, { "epoch": 9.409128630705395, "grad_norm": 38.48324966430664, "learning_rate": 1.6239004149377597e-05, "loss": 1.6554, "step": 11338 }, { "epoch": 9.409958506224067, "grad_norm": 13.228676795959473, "learning_rate": 1.6238672199170125e-05, "loss": 0.6223, "step": 11339 }, { "epoch": 9.410788381742739, "grad_norm": 19.467296600341797, "learning_rate": 1.6238340248962658e-05, "loss": 1.0099, "step": 11340 }, { "epoch": 9.411618257261411, "grad_norm": 39.23074722290039, "learning_rate": 1.6238008298755186e-05, "loss": 1.6465, "step": 11341 }, { "epoch": 9.412448132780083, "grad_norm": 23.65431785583496, "learning_rate": 1.623767634854772e-05, "loss": 1.0192, "step": 11342 }, { "epoch": 9.413278008298755, "grad_norm": 28.88099479675293, "learning_rate": 1.623734439834025e-05, "loss": 1.0278, "step": 11343 }, { "epoch": 9.414107883817428, "grad_norm": 19.786134719848633, "learning_rate": 1.623701244813278e-05, "loss": 0.7238, "step": 11344 }, { "epoch": 9.4149377593361, "grad_norm": 30.300596237182617, "learning_rate": 1.623668049792531e-05, "loss": 1.7085, "step": 11345 }, { "epoch": 9.415767634854772, "grad_norm": 20.40165901184082, "learning_rate": 1.6236348547717843e-05, "loss": 1.5267, "step": 11346 }, { "epoch": 9.416597510373444, "grad_norm": 19.82659912109375, "learning_rate": 1.6236016597510375e-05, "loss": 1.2726, "step": 11347 }, { "epoch": 9.417427385892116, "grad_norm": 39.1318359375, "learning_rate": 1.6235684647302904e-05, "loss": 1.4666, "step": 11348 }, { "epoch": 9.418257261410789, "grad_norm": 30.45718765258789, "learning_rate": 1.6235352697095436e-05, "loss": 1.6278, "step": 11349 }, { "epoch": 9.41908713692946, "grad_norm": 14.460484504699707, "learning_rate": 1.623502074688797e-05, "loss": 0.4688, "step": 11350 }, { "epoch": 9.419917012448133, "grad_norm": 18.38396453857422, "learning_rate": 1.62346887966805e-05, "loss": 0.6635, "step": 11351 }, { "epoch": 9.420746887966805, "grad_norm": 23.089879989624023, "learning_rate": 1.623435684647303e-05, "loss": 1.3825, "step": 11352 }, { "epoch": 9.421576763485477, "grad_norm": 17.96640968322754, "learning_rate": 1.623402489626556e-05, "loss": 0.4357, "step": 11353 }, { "epoch": 9.42240663900415, "grad_norm": 24.512226104736328, "learning_rate": 1.6233692946058093e-05, "loss": 1.0588, "step": 11354 }, { "epoch": 9.423236514522822, "grad_norm": 21.61933708190918, "learning_rate": 1.6233360995850626e-05, "loss": 0.805, "step": 11355 }, { "epoch": 9.424066390041494, "grad_norm": 23.792320251464844, "learning_rate": 1.6233029045643154e-05, "loss": 1.1317, "step": 11356 }, { "epoch": 9.424896265560166, "grad_norm": 26.329526901245117, "learning_rate": 1.6232697095435686e-05, "loss": 0.6378, "step": 11357 }, { "epoch": 9.425726141078838, "grad_norm": 21.6877384185791, "learning_rate": 1.623236514522822e-05, "loss": 1.0471, "step": 11358 }, { "epoch": 9.42655601659751, "grad_norm": 63.27497863769531, "learning_rate": 1.6232033195020747e-05, "loss": 1.137, "step": 11359 }, { "epoch": 9.427385892116183, "grad_norm": 31.33542251586914, "learning_rate": 1.623170124481328e-05, "loss": 0.9006, "step": 11360 }, { "epoch": 9.428215767634855, "grad_norm": 22.824193954467773, "learning_rate": 1.623136929460581e-05, "loss": 0.9957, "step": 11361 }, { "epoch": 9.429045643153527, "grad_norm": 29.49622344970703, "learning_rate": 1.623103734439834e-05, "loss": 1.3824, "step": 11362 }, { "epoch": 9.4298755186722, "grad_norm": 33.130760192871094, "learning_rate": 1.6230705394190872e-05, "loss": 1.2267, "step": 11363 }, { "epoch": 9.430705394190872, "grad_norm": 68.64053344726562, "learning_rate": 1.6230373443983404e-05, "loss": 0.7949, "step": 11364 }, { "epoch": 9.431535269709544, "grad_norm": 25.916332244873047, "learning_rate": 1.6230041493775933e-05, "loss": 0.8416, "step": 11365 }, { "epoch": 9.432365145228216, "grad_norm": 29.877124786376953, "learning_rate": 1.6229709543568465e-05, "loss": 1.0785, "step": 11366 }, { "epoch": 9.433195020746888, "grad_norm": 17.185413360595703, "learning_rate": 1.6229377593360997e-05, "loss": 0.9033, "step": 11367 }, { "epoch": 9.43402489626556, "grad_norm": 54.827571868896484, "learning_rate": 1.622904564315353e-05, "loss": 2.396, "step": 11368 }, { "epoch": 9.434854771784233, "grad_norm": 28.732309341430664, "learning_rate": 1.6228713692946058e-05, "loss": 0.7373, "step": 11369 }, { "epoch": 9.435684647302905, "grad_norm": 28.923479080200195, "learning_rate": 1.622838174273859e-05, "loss": 0.9048, "step": 11370 }, { "epoch": 9.436514522821577, "grad_norm": 33.467647552490234, "learning_rate": 1.6228049792531122e-05, "loss": 1.9194, "step": 11371 }, { "epoch": 9.43734439834025, "grad_norm": 32.278324127197266, "learning_rate": 1.6227717842323654e-05, "loss": 1.7831, "step": 11372 }, { "epoch": 9.438174273858921, "grad_norm": 23.75346565246582, "learning_rate": 1.6227385892116183e-05, "loss": 0.7206, "step": 11373 }, { "epoch": 9.439004149377594, "grad_norm": 26.972938537597656, "learning_rate": 1.6227053941908715e-05, "loss": 0.8433, "step": 11374 }, { "epoch": 9.439834024896266, "grad_norm": 29.269914627075195, "learning_rate": 1.6226721991701247e-05, "loss": 0.8084, "step": 11375 }, { "epoch": 9.440663900414938, "grad_norm": 25.162002563476562, "learning_rate": 1.622639004149378e-05, "loss": 0.9742, "step": 11376 }, { "epoch": 9.44149377593361, "grad_norm": 21.164997100830078, "learning_rate": 1.6226058091286308e-05, "loss": 1.3114, "step": 11377 }, { "epoch": 9.442323651452282, "grad_norm": 27.286300659179688, "learning_rate": 1.622572614107884e-05, "loss": 1.7515, "step": 11378 }, { "epoch": 9.443153526970955, "grad_norm": 23.92548179626465, "learning_rate": 1.6225394190871372e-05, "loss": 0.7969, "step": 11379 }, { "epoch": 9.443983402489627, "grad_norm": 26.443359375, "learning_rate": 1.62250622406639e-05, "loss": 1.411, "step": 11380 }, { "epoch": 9.444813278008299, "grad_norm": 28.30155372619629, "learning_rate": 1.6224730290456433e-05, "loss": 0.9511, "step": 11381 }, { "epoch": 9.445643153526971, "grad_norm": 13.728617668151855, "learning_rate": 1.6224398340248962e-05, "loss": 0.5627, "step": 11382 }, { "epoch": 9.446473029045643, "grad_norm": 28.927520751953125, "learning_rate": 1.6224066390041494e-05, "loss": 1.2512, "step": 11383 }, { "epoch": 9.447302904564316, "grad_norm": 27.137086868286133, "learning_rate": 1.6223734439834026e-05, "loss": 0.773, "step": 11384 }, { "epoch": 9.448132780082988, "grad_norm": 18.32537078857422, "learning_rate": 1.6223402489626555e-05, "loss": 0.8672, "step": 11385 }, { "epoch": 9.44896265560166, "grad_norm": 19.69890785217285, "learning_rate": 1.6223070539419087e-05, "loss": 1.1954, "step": 11386 }, { "epoch": 9.449792531120332, "grad_norm": 25.738462448120117, "learning_rate": 1.622273858921162e-05, "loss": 0.4903, "step": 11387 }, { "epoch": 9.450622406639004, "grad_norm": 26.123817443847656, "learning_rate": 1.622240663900415e-05, "loss": 1.2424, "step": 11388 }, { "epoch": 9.451452282157677, "grad_norm": 25.660188674926758, "learning_rate": 1.6222074688796683e-05, "loss": 1.0656, "step": 11389 }, { "epoch": 9.452282157676349, "grad_norm": 21.505109786987305, "learning_rate": 1.6221742738589212e-05, "loss": 0.5764, "step": 11390 }, { "epoch": 9.453112033195021, "grad_norm": 21.296037673950195, "learning_rate": 1.6221410788381744e-05, "loss": 0.715, "step": 11391 }, { "epoch": 9.453941908713693, "grad_norm": 21.038799285888672, "learning_rate": 1.6221078838174276e-05, "loss": 0.3394, "step": 11392 }, { "epoch": 9.454771784232365, "grad_norm": 52.743709564208984, "learning_rate": 1.622074688796681e-05, "loss": 1.5509, "step": 11393 }, { "epoch": 9.455601659751038, "grad_norm": 16.794641494750977, "learning_rate": 1.6220414937759337e-05, "loss": 0.7534, "step": 11394 }, { "epoch": 9.45643153526971, "grad_norm": 27.7438907623291, "learning_rate": 1.622008298755187e-05, "loss": 1.4162, "step": 11395 }, { "epoch": 9.457261410788382, "grad_norm": 28.694067001342773, "learning_rate": 1.62197510373444e-05, "loss": 0.795, "step": 11396 }, { "epoch": 9.458091286307054, "grad_norm": 35.03999328613281, "learning_rate": 1.621941908713693e-05, "loss": 0.6635, "step": 11397 }, { "epoch": 9.458921161825726, "grad_norm": 18.274105072021484, "learning_rate": 1.6219087136929462e-05, "loss": 0.8687, "step": 11398 }, { "epoch": 9.459751037344398, "grad_norm": 30.469520568847656, "learning_rate": 1.6218755186721994e-05, "loss": 1.1282, "step": 11399 }, { "epoch": 9.46058091286307, "grad_norm": 33.88047790527344, "learning_rate": 1.6218423236514523e-05, "loss": 0.9879, "step": 11400 }, { "epoch": 9.461410788381743, "grad_norm": 17.513086318969727, "learning_rate": 1.6218091286307055e-05, "loss": 0.9035, "step": 11401 }, { "epoch": 9.462240663900415, "grad_norm": 34.98102569580078, "learning_rate": 1.6217759336099587e-05, "loss": 1.4222, "step": 11402 }, { "epoch": 9.463070539419087, "grad_norm": 36.03255081176758, "learning_rate": 1.6217427385892116e-05, "loss": 1.032, "step": 11403 }, { "epoch": 9.46390041493776, "grad_norm": 15.05613899230957, "learning_rate": 1.6217095435684648e-05, "loss": 0.5533, "step": 11404 }, { "epoch": 9.464730290456432, "grad_norm": 33.287696838378906, "learning_rate": 1.621676348547718e-05, "loss": 1.5066, "step": 11405 }, { "epoch": 9.465560165975104, "grad_norm": 33.32450866699219, "learning_rate": 1.621643153526971e-05, "loss": 1.0139, "step": 11406 }, { "epoch": 9.466390041493776, "grad_norm": 25.3881778717041, "learning_rate": 1.621609958506224e-05, "loss": 0.6241, "step": 11407 }, { "epoch": 9.467219917012448, "grad_norm": 34.51172637939453, "learning_rate": 1.6215767634854773e-05, "loss": 1.2961, "step": 11408 }, { "epoch": 9.46804979253112, "grad_norm": 22.416229248046875, "learning_rate": 1.6215435684647305e-05, "loss": 1.2293, "step": 11409 }, { "epoch": 9.468879668049793, "grad_norm": 28.427087783813477, "learning_rate": 1.6215103734439834e-05, "loss": 1.1996, "step": 11410 }, { "epoch": 9.469709543568465, "grad_norm": 20.624698638916016, "learning_rate": 1.6214771784232366e-05, "loss": 0.8238, "step": 11411 }, { "epoch": 9.470539419087137, "grad_norm": 20.012619018554688, "learning_rate": 1.6214439834024898e-05, "loss": 0.528, "step": 11412 }, { "epoch": 9.47136929460581, "grad_norm": 26.106826782226562, "learning_rate": 1.621410788381743e-05, "loss": 1.3937, "step": 11413 }, { "epoch": 9.472199170124481, "grad_norm": 40.67009353637695, "learning_rate": 1.6213775933609962e-05, "loss": 1.5811, "step": 11414 }, { "epoch": 9.473029045643154, "grad_norm": 39.904048919677734, "learning_rate": 1.621344398340249e-05, "loss": 0.7583, "step": 11415 }, { "epoch": 9.473858921161826, "grad_norm": 33.7216682434082, "learning_rate": 1.6213112033195023e-05, "loss": 0.9241, "step": 11416 }, { "epoch": 9.474688796680498, "grad_norm": 43.48334884643555, "learning_rate": 1.6212780082987555e-05, "loss": 2.1102, "step": 11417 }, { "epoch": 9.47551867219917, "grad_norm": 18.818525314331055, "learning_rate": 1.6212448132780084e-05, "loss": 0.9609, "step": 11418 }, { "epoch": 9.476348547717842, "grad_norm": 29.810274124145508, "learning_rate": 1.6212116182572616e-05, "loss": 0.8977, "step": 11419 }, { "epoch": 9.477178423236515, "grad_norm": 25.384185791015625, "learning_rate": 1.6211784232365145e-05, "loss": 0.8604, "step": 11420 }, { "epoch": 9.478008298755187, "grad_norm": 20.591819763183594, "learning_rate": 1.6211452282157677e-05, "loss": 1.1111, "step": 11421 }, { "epoch": 9.478838174273859, "grad_norm": 25.795656204223633, "learning_rate": 1.621112033195021e-05, "loss": 1.2273, "step": 11422 }, { "epoch": 9.479668049792531, "grad_norm": 13.331767082214355, "learning_rate": 1.6210788381742738e-05, "loss": 0.3753, "step": 11423 }, { "epoch": 9.480497925311203, "grad_norm": 35.30902862548828, "learning_rate": 1.621045643153527e-05, "loss": 1.439, "step": 11424 }, { "epoch": 9.481327800829876, "grad_norm": 35.224857330322266, "learning_rate": 1.6210124481327802e-05, "loss": 1.2672, "step": 11425 }, { "epoch": 9.482157676348548, "grad_norm": 40.953948974609375, "learning_rate": 1.6209792531120334e-05, "loss": 1.471, "step": 11426 }, { "epoch": 9.48298755186722, "grad_norm": 26.178327560424805, "learning_rate": 1.6209460580912863e-05, "loss": 1.4751, "step": 11427 }, { "epoch": 9.483817427385892, "grad_norm": 35.9061279296875, "learning_rate": 1.6209128630705395e-05, "loss": 1.5113, "step": 11428 }, { "epoch": 9.484647302904564, "grad_norm": 28.34746551513672, "learning_rate": 1.6208796680497927e-05, "loss": 0.6672, "step": 11429 }, { "epoch": 9.485477178423237, "grad_norm": 31.036035537719727, "learning_rate": 1.620846473029046e-05, "loss": 0.7917, "step": 11430 }, { "epoch": 9.486307053941909, "grad_norm": 30.623544692993164, "learning_rate": 1.6208132780082988e-05, "loss": 1.131, "step": 11431 }, { "epoch": 9.487136929460581, "grad_norm": 20.886499404907227, "learning_rate": 1.620780082987552e-05, "loss": 0.8109, "step": 11432 }, { "epoch": 9.487966804979253, "grad_norm": 29.033327102661133, "learning_rate": 1.6207468879668052e-05, "loss": 1.0223, "step": 11433 }, { "epoch": 9.488796680497925, "grad_norm": 29.286544799804688, "learning_rate": 1.6207136929460584e-05, "loss": 1.1814, "step": 11434 }, { "epoch": 9.489626556016598, "grad_norm": 24.176088333129883, "learning_rate": 1.6206804979253113e-05, "loss": 1.0651, "step": 11435 }, { "epoch": 9.49045643153527, "grad_norm": 24.050952911376953, "learning_rate": 1.6206473029045645e-05, "loss": 0.9312, "step": 11436 }, { "epoch": 9.491286307053942, "grad_norm": 24.26302719116211, "learning_rate": 1.6206141078838177e-05, "loss": 0.8448, "step": 11437 }, { "epoch": 9.492116182572614, "grad_norm": 20.736509323120117, "learning_rate": 1.6205809128630706e-05, "loss": 0.8054, "step": 11438 }, { "epoch": 9.492946058091286, "grad_norm": 20.501195907592773, "learning_rate": 1.6205477178423238e-05, "loss": 1.0108, "step": 11439 }, { "epoch": 9.493775933609959, "grad_norm": 25.03363037109375, "learning_rate": 1.620514522821577e-05, "loss": 0.8259, "step": 11440 }, { "epoch": 9.49460580912863, "grad_norm": 49.116600036621094, "learning_rate": 1.62048132780083e-05, "loss": 1.3887, "step": 11441 }, { "epoch": 9.495435684647303, "grad_norm": 22.172618865966797, "learning_rate": 1.620448132780083e-05, "loss": 1.0249, "step": 11442 }, { "epoch": 9.496265560165975, "grad_norm": 57.653560638427734, "learning_rate": 1.6204149377593363e-05, "loss": 1.9173, "step": 11443 }, { "epoch": 9.497095435684647, "grad_norm": 15.691849708557129, "learning_rate": 1.620381742738589e-05, "loss": 0.6829, "step": 11444 }, { "epoch": 9.49792531120332, "grad_norm": 35.18213653564453, "learning_rate": 1.6203485477178424e-05, "loss": 1.3775, "step": 11445 }, { "epoch": 9.498755186721992, "grad_norm": 28.52251434326172, "learning_rate": 1.6203153526970956e-05, "loss": 1.0936, "step": 11446 }, { "epoch": 9.499585062240664, "grad_norm": 21.325843811035156, "learning_rate": 1.6202821576763488e-05, "loss": 1.4155, "step": 11447 }, { "epoch": 9.500414937759336, "grad_norm": 24.54349136352539, "learning_rate": 1.6202489626556017e-05, "loss": 1.0682, "step": 11448 }, { "epoch": 9.501244813278008, "grad_norm": 17.15530776977539, "learning_rate": 1.620215767634855e-05, "loss": 0.8333, "step": 11449 }, { "epoch": 9.50207468879668, "grad_norm": 28.765869140625, "learning_rate": 1.620182572614108e-05, "loss": 1.5367, "step": 11450 }, { "epoch": 9.502904564315353, "grad_norm": 17.979408264160156, "learning_rate": 1.6201493775933613e-05, "loss": 0.5804, "step": 11451 }, { "epoch": 9.503734439834025, "grad_norm": 16.95417594909668, "learning_rate": 1.6201161825726142e-05, "loss": 0.4679, "step": 11452 }, { "epoch": 9.504564315352697, "grad_norm": 27.920358657836914, "learning_rate": 1.6200829875518674e-05, "loss": 0.8827, "step": 11453 }, { "epoch": 9.50539419087137, "grad_norm": 22.36033821105957, "learning_rate": 1.6200497925311206e-05, "loss": 1.0377, "step": 11454 }, { "epoch": 9.506224066390041, "grad_norm": 21.883960723876953, "learning_rate": 1.6200165975103738e-05, "loss": 0.5362, "step": 11455 }, { "epoch": 9.507053941908714, "grad_norm": 31.007022857666016, "learning_rate": 1.6199834024896267e-05, "loss": 1.3761, "step": 11456 }, { "epoch": 9.507883817427386, "grad_norm": 23.133193969726562, "learning_rate": 1.61995020746888e-05, "loss": 1.2762, "step": 11457 }, { "epoch": 9.508713692946058, "grad_norm": 24.186105728149414, "learning_rate": 1.6199170124481328e-05, "loss": 0.639, "step": 11458 }, { "epoch": 9.50954356846473, "grad_norm": 27.667417526245117, "learning_rate": 1.619883817427386e-05, "loss": 0.9794, "step": 11459 }, { "epoch": 9.510373443983402, "grad_norm": 34.01515579223633, "learning_rate": 1.6198506224066392e-05, "loss": 1.6135, "step": 11460 }, { "epoch": 9.511203319502075, "grad_norm": 31.129487991333008, "learning_rate": 1.619817427385892e-05, "loss": 0.878, "step": 11461 }, { "epoch": 9.512033195020747, "grad_norm": 16.803857803344727, "learning_rate": 1.6197842323651453e-05, "loss": 0.5186, "step": 11462 }, { "epoch": 9.512863070539419, "grad_norm": 34.22011947631836, "learning_rate": 1.6197510373443985e-05, "loss": 1.1621, "step": 11463 }, { "epoch": 9.513692946058091, "grad_norm": 21.63970375061035, "learning_rate": 1.6197178423236513e-05, "loss": 0.7223, "step": 11464 }, { "epoch": 9.514522821576763, "grad_norm": 37.129581451416016, "learning_rate": 1.6196846473029046e-05, "loss": 1.1114, "step": 11465 }, { "epoch": 9.515352697095436, "grad_norm": 21.891334533691406, "learning_rate": 1.6196514522821578e-05, "loss": 0.675, "step": 11466 }, { "epoch": 9.516182572614108, "grad_norm": 30.96750831604004, "learning_rate": 1.619618257261411e-05, "loss": 1.0854, "step": 11467 }, { "epoch": 9.51701244813278, "grad_norm": 16.129789352416992, "learning_rate": 1.6195850622406642e-05, "loss": 0.6692, "step": 11468 }, { "epoch": 9.517842323651452, "grad_norm": 29.877225875854492, "learning_rate": 1.619551867219917e-05, "loss": 0.8351, "step": 11469 }, { "epoch": 9.518672199170124, "grad_norm": 44.292842864990234, "learning_rate": 1.6195186721991703e-05, "loss": 1.0396, "step": 11470 }, { "epoch": 9.519502074688797, "grad_norm": 24.41382598876953, "learning_rate": 1.6194854771784235e-05, "loss": 0.8908, "step": 11471 }, { "epoch": 9.520331950207469, "grad_norm": 33.42855453491211, "learning_rate": 1.6194522821576767e-05, "loss": 0.9252, "step": 11472 }, { "epoch": 9.521161825726141, "grad_norm": 28.801925659179688, "learning_rate": 1.6194190871369296e-05, "loss": 2.2375, "step": 11473 }, { "epoch": 9.521991701244813, "grad_norm": 22.502107620239258, "learning_rate": 1.6193858921161828e-05, "loss": 0.7781, "step": 11474 }, { "epoch": 9.522821576763485, "grad_norm": 37.16362380981445, "learning_rate": 1.619352697095436e-05, "loss": 1.2868, "step": 11475 }, { "epoch": 9.523651452282158, "grad_norm": 43.43405532836914, "learning_rate": 1.619319502074689e-05, "loss": 1.3737, "step": 11476 }, { "epoch": 9.52448132780083, "grad_norm": 25.32970428466797, "learning_rate": 1.619286307053942e-05, "loss": 0.483, "step": 11477 }, { "epoch": 9.525311203319502, "grad_norm": 34.7651481628418, "learning_rate": 1.6192531120331953e-05, "loss": 1.5675, "step": 11478 }, { "epoch": 9.526141078838174, "grad_norm": 46.773109436035156, "learning_rate": 1.619219917012448e-05, "loss": 1.1714, "step": 11479 }, { "epoch": 9.526970954356846, "grad_norm": 38.51686477661133, "learning_rate": 1.6191867219917014e-05, "loss": 1.4773, "step": 11480 }, { "epoch": 9.527800829875519, "grad_norm": 42.72273635864258, "learning_rate": 1.6191535269709542e-05, "loss": 1.2898, "step": 11481 }, { "epoch": 9.52863070539419, "grad_norm": 31.66227912902832, "learning_rate": 1.6191203319502074e-05, "loss": 0.8806, "step": 11482 }, { "epoch": 9.529460580912863, "grad_norm": 41.17682647705078, "learning_rate": 1.6190871369294607e-05, "loss": 1.3043, "step": 11483 }, { "epoch": 9.530290456431535, "grad_norm": 28.489763259887695, "learning_rate": 1.619053941908714e-05, "loss": 0.9302, "step": 11484 }, { "epoch": 9.531120331950207, "grad_norm": 15.645964622497559, "learning_rate": 1.6190207468879667e-05, "loss": 1.0796, "step": 11485 }, { "epoch": 9.53195020746888, "grad_norm": 16.637615203857422, "learning_rate": 1.61898755186722e-05, "loss": 0.7967, "step": 11486 }, { "epoch": 9.532780082987552, "grad_norm": 40.54002380371094, "learning_rate": 1.618954356846473e-05, "loss": 1.4181, "step": 11487 }, { "epoch": 9.533609958506224, "grad_norm": 28.591243743896484, "learning_rate": 1.6189211618257264e-05, "loss": 1.132, "step": 11488 }, { "epoch": 9.534439834024896, "grad_norm": 20.87606430053711, "learning_rate": 1.6188879668049792e-05, "loss": 0.5781, "step": 11489 }, { "epoch": 9.535269709543568, "grad_norm": 31.89345932006836, "learning_rate": 1.6188547717842325e-05, "loss": 1.2129, "step": 11490 }, { "epoch": 9.53609958506224, "grad_norm": 25.360204696655273, "learning_rate": 1.6188215767634857e-05, "loss": 0.7438, "step": 11491 }, { "epoch": 9.536929460580913, "grad_norm": 29.36037826538086, "learning_rate": 1.618788381742739e-05, "loss": 1.2727, "step": 11492 }, { "epoch": 9.537759336099585, "grad_norm": 25.472976684570312, "learning_rate": 1.618755186721992e-05, "loss": 0.6106, "step": 11493 }, { "epoch": 9.538589211618257, "grad_norm": 35.881649017333984, "learning_rate": 1.618721991701245e-05, "loss": 1.1914, "step": 11494 }, { "epoch": 9.53941908713693, "grad_norm": 22.493793487548828, "learning_rate": 1.6186887966804982e-05, "loss": 1.5913, "step": 11495 }, { "epoch": 9.540248962655602, "grad_norm": 19.523426055908203, "learning_rate": 1.6186556016597514e-05, "loss": 1.6496, "step": 11496 }, { "epoch": 9.541078838174274, "grad_norm": 23.414024353027344, "learning_rate": 1.6186224066390043e-05, "loss": 1.0951, "step": 11497 }, { "epoch": 9.541908713692946, "grad_norm": 25.911598205566406, "learning_rate": 1.6185892116182575e-05, "loss": 1.0044, "step": 11498 }, { "epoch": 9.542738589211618, "grad_norm": 22.996700286865234, "learning_rate": 1.6185560165975103e-05, "loss": 0.7008, "step": 11499 }, { "epoch": 9.54356846473029, "grad_norm": 17.827932357788086, "learning_rate": 1.6185228215767635e-05, "loss": 0.886, "step": 11500 }, { "epoch": 9.544398340248962, "grad_norm": 22.954723358154297, "learning_rate": 1.6184896265560168e-05, "loss": 0.6781, "step": 11501 }, { "epoch": 9.545228215767635, "grad_norm": 28.28852081298828, "learning_rate": 1.6184564315352696e-05, "loss": 1.5176, "step": 11502 }, { "epoch": 9.546058091286307, "grad_norm": 36.92082214355469, "learning_rate": 1.618423236514523e-05, "loss": 1.3071, "step": 11503 }, { "epoch": 9.546887966804979, "grad_norm": 54.18442153930664, "learning_rate": 1.618390041493776e-05, "loss": 1.4404, "step": 11504 }, { "epoch": 9.547717842323651, "grad_norm": 34.36436080932617, "learning_rate": 1.6183568464730293e-05, "loss": 1.6125, "step": 11505 }, { "epoch": 9.548547717842323, "grad_norm": 40.21954345703125, "learning_rate": 1.618323651452282e-05, "loss": 1.0627, "step": 11506 }, { "epoch": 9.549377593360996, "grad_norm": 19.187084197998047, "learning_rate": 1.6182904564315353e-05, "loss": 0.6037, "step": 11507 }, { "epoch": 9.550207468879668, "grad_norm": 25.485984802246094, "learning_rate": 1.6182572614107886e-05, "loss": 1.2539, "step": 11508 }, { "epoch": 9.55103734439834, "grad_norm": 41.59831237792969, "learning_rate": 1.6182240663900418e-05, "loss": 0.892, "step": 11509 }, { "epoch": 9.551867219917012, "grad_norm": 38.261043548583984, "learning_rate": 1.6181908713692946e-05, "loss": 0.8753, "step": 11510 }, { "epoch": 9.552697095435684, "grad_norm": 18.412168502807617, "learning_rate": 1.618157676348548e-05, "loss": 0.8116, "step": 11511 }, { "epoch": 9.553526970954357, "grad_norm": 33.8952522277832, "learning_rate": 1.618124481327801e-05, "loss": 0.776, "step": 11512 }, { "epoch": 9.554356846473029, "grad_norm": 30.114667892456055, "learning_rate": 1.6180912863070543e-05, "loss": 1.6274, "step": 11513 }, { "epoch": 9.555186721991701, "grad_norm": 17.057735443115234, "learning_rate": 1.618058091286307e-05, "loss": 0.7681, "step": 11514 }, { "epoch": 9.556016597510373, "grad_norm": 37.194061279296875, "learning_rate": 1.6180248962655604e-05, "loss": 1.9318, "step": 11515 }, { "epoch": 9.556846473029045, "grad_norm": 19.991979598999023, "learning_rate": 1.6179917012448136e-05, "loss": 1.1819, "step": 11516 }, { "epoch": 9.557676348547718, "grad_norm": 19.69610595703125, "learning_rate": 1.6179585062240664e-05, "loss": 1.0769, "step": 11517 }, { "epoch": 9.55850622406639, "grad_norm": 25.70623016357422, "learning_rate": 1.6179253112033196e-05, "loss": 1.0459, "step": 11518 }, { "epoch": 9.559336099585062, "grad_norm": 30.95256996154785, "learning_rate": 1.617892116182573e-05, "loss": 0.6076, "step": 11519 }, { "epoch": 9.560165975103734, "grad_norm": 30.779916763305664, "learning_rate": 1.6178589211618257e-05, "loss": 1.3325, "step": 11520 }, { "epoch": 9.560995850622406, "grad_norm": 22.231630325317383, "learning_rate": 1.617825726141079e-05, "loss": 0.9957, "step": 11521 }, { "epoch": 9.561825726141079, "grad_norm": 27.170461654663086, "learning_rate": 1.617792531120332e-05, "loss": 1.0397, "step": 11522 }, { "epoch": 9.56265560165975, "grad_norm": 29.78693389892578, "learning_rate": 1.617759336099585e-05, "loss": 0.8775, "step": 11523 }, { "epoch": 9.563485477178423, "grad_norm": 37.01374053955078, "learning_rate": 1.6177261410788382e-05, "loss": 1.6683, "step": 11524 }, { "epoch": 9.564315352697095, "grad_norm": 21.76835060119629, "learning_rate": 1.6176929460580914e-05, "loss": 0.7631, "step": 11525 }, { "epoch": 9.565145228215767, "grad_norm": 19.617591857910156, "learning_rate": 1.6176597510373447e-05, "loss": 1.4641, "step": 11526 }, { "epoch": 9.56597510373444, "grad_norm": 77.57504272460938, "learning_rate": 1.6176265560165975e-05, "loss": 1.1111, "step": 11527 }, { "epoch": 9.566804979253112, "grad_norm": 18.89202308654785, "learning_rate": 1.6175933609958507e-05, "loss": 0.5781, "step": 11528 }, { "epoch": 9.567634854771784, "grad_norm": 36.056549072265625, "learning_rate": 1.617560165975104e-05, "loss": 0.9821, "step": 11529 }, { "epoch": 9.568464730290456, "grad_norm": 20.09526252746582, "learning_rate": 1.617526970954357e-05, "loss": 0.8665, "step": 11530 }, { "epoch": 9.569294605809128, "grad_norm": 52.726890563964844, "learning_rate": 1.61749377593361e-05, "loss": 1.6908, "step": 11531 }, { "epoch": 9.5701244813278, "grad_norm": 51.465538024902344, "learning_rate": 1.6174605809128632e-05, "loss": 0.6033, "step": 11532 }, { "epoch": 9.570954356846473, "grad_norm": 28.016185760498047, "learning_rate": 1.6174273858921165e-05, "loss": 0.9718, "step": 11533 }, { "epoch": 9.571784232365145, "grad_norm": 30.81264877319336, "learning_rate": 1.6173941908713697e-05, "loss": 0.9134, "step": 11534 }, { "epoch": 9.572614107883817, "grad_norm": 20.841026306152344, "learning_rate": 1.6173609958506225e-05, "loss": 0.3503, "step": 11535 }, { "epoch": 9.57344398340249, "grad_norm": 22.852371215820312, "learning_rate": 1.6173278008298757e-05, "loss": 1.0626, "step": 11536 }, { "epoch": 9.574273858921162, "grad_norm": 28.262008666992188, "learning_rate": 1.6172946058091286e-05, "loss": 0.6975, "step": 11537 }, { "epoch": 9.575103734439834, "grad_norm": 47.54283142089844, "learning_rate": 1.617261410788382e-05, "loss": 0.8379, "step": 11538 }, { "epoch": 9.575933609958506, "grad_norm": 29.454843521118164, "learning_rate": 1.617228215767635e-05, "loss": 1.1601, "step": 11539 }, { "epoch": 9.576763485477178, "grad_norm": 32.04850769042969, "learning_rate": 1.617195020746888e-05, "loss": 1.1702, "step": 11540 }, { "epoch": 9.57759336099585, "grad_norm": 21.532699584960938, "learning_rate": 1.617161825726141e-05, "loss": 0.831, "step": 11541 }, { "epoch": 9.578423236514523, "grad_norm": 20.233060836791992, "learning_rate": 1.6171286307053943e-05, "loss": 1.2623, "step": 11542 }, { "epoch": 9.579253112033195, "grad_norm": 17.856704711914062, "learning_rate": 1.6170954356846472e-05, "loss": 0.5856, "step": 11543 }, { "epoch": 9.580082987551867, "grad_norm": 18.483976364135742, "learning_rate": 1.6170622406639004e-05, "loss": 0.9004, "step": 11544 }, { "epoch": 9.58091286307054, "grad_norm": 29.128618240356445, "learning_rate": 1.6170290456431536e-05, "loss": 1.1435, "step": 11545 }, { "epoch": 9.581742738589211, "grad_norm": 29.098711013793945, "learning_rate": 1.616995850622407e-05, "loss": 0.8331, "step": 11546 }, { "epoch": 9.582572614107884, "grad_norm": 30.81684684753418, "learning_rate": 1.61696265560166e-05, "loss": 1.4328, "step": 11547 }, { "epoch": 9.583402489626556, "grad_norm": 14.330070495605469, "learning_rate": 1.616929460580913e-05, "loss": 1.0258, "step": 11548 }, { "epoch": 9.584232365145228, "grad_norm": 20.884521484375, "learning_rate": 1.616896265560166e-05, "loss": 0.683, "step": 11549 }, { "epoch": 9.5850622406639, "grad_norm": 15.376072883605957, "learning_rate": 1.6168630705394193e-05, "loss": 0.6927, "step": 11550 }, { "epoch": 9.585892116182572, "grad_norm": 25.8813533782959, "learning_rate": 1.6168298755186726e-05, "loss": 0.8389, "step": 11551 }, { "epoch": 9.586721991701245, "grad_norm": 31.36067008972168, "learning_rate": 1.6167966804979254e-05, "loss": 1.3344, "step": 11552 }, { "epoch": 9.587551867219917, "grad_norm": 24.644922256469727, "learning_rate": 1.6167634854771786e-05, "loss": 0.7926, "step": 11553 }, { "epoch": 9.588381742738589, "grad_norm": 35.55747604370117, "learning_rate": 1.616730290456432e-05, "loss": 1.9527, "step": 11554 }, { "epoch": 9.589211618257261, "grad_norm": 22.93075180053711, "learning_rate": 1.6166970954356847e-05, "loss": 1.0652, "step": 11555 }, { "epoch": 9.590041493775933, "grad_norm": 45.78078079223633, "learning_rate": 1.616663900414938e-05, "loss": 1.1806, "step": 11556 }, { "epoch": 9.590871369294605, "grad_norm": 18.913183212280273, "learning_rate": 1.616630705394191e-05, "loss": 0.9069, "step": 11557 }, { "epoch": 9.591701244813278, "grad_norm": 49.2260856628418, "learning_rate": 1.616597510373444e-05, "loss": 1.3663, "step": 11558 }, { "epoch": 9.59253112033195, "grad_norm": 16.407888412475586, "learning_rate": 1.6165643153526972e-05, "loss": 1.1057, "step": 11559 }, { "epoch": 9.593360995850622, "grad_norm": 23.01656723022461, "learning_rate": 1.61653112033195e-05, "loss": 0.5539, "step": 11560 }, { "epoch": 9.594190871369294, "grad_norm": 32.30105972290039, "learning_rate": 1.6164979253112033e-05, "loss": 1.121, "step": 11561 }, { "epoch": 9.595020746887966, "grad_norm": 69.11267852783203, "learning_rate": 1.6164647302904565e-05, "loss": 1.4183, "step": 11562 }, { "epoch": 9.595850622406639, "grad_norm": 37.733367919921875, "learning_rate": 1.6164315352697097e-05, "loss": 1.703, "step": 11563 }, { "epoch": 9.59668049792531, "grad_norm": 22.815547943115234, "learning_rate": 1.6163983402489626e-05, "loss": 1.0094, "step": 11564 }, { "epoch": 9.597510373443983, "grad_norm": 18.0013370513916, "learning_rate": 1.6163651452282158e-05, "loss": 0.6259, "step": 11565 }, { "epoch": 9.598340248962655, "grad_norm": 45.29072952270508, "learning_rate": 1.616331950207469e-05, "loss": 2.0479, "step": 11566 }, { "epoch": 9.599170124481327, "grad_norm": 39.01199722290039, "learning_rate": 1.6162987551867222e-05, "loss": 1.5306, "step": 11567 }, { "epoch": 9.6, "grad_norm": 35.22612762451172, "learning_rate": 1.616265560165975e-05, "loss": 1.0149, "step": 11568 }, { "epoch": 9.600829875518672, "grad_norm": 25.215543746948242, "learning_rate": 1.6162323651452283e-05, "loss": 0.4963, "step": 11569 }, { "epoch": 9.601659751037344, "grad_norm": 31.176612854003906, "learning_rate": 1.6161991701244815e-05, "loss": 1.0709, "step": 11570 }, { "epoch": 9.602489626556016, "grad_norm": 27.148942947387695, "learning_rate": 1.6161659751037347e-05, "loss": 1.6374, "step": 11571 }, { "epoch": 9.603319502074688, "grad_norm": 20.125959396362305, "learning_rate": 1.6161327800829876e-05, "loss": 1.1455, "step": 11572 }, { "epoch": 9.60414937759336, "grad_norm": 16.119096755981445, "learning_rate": 1.6160995850622408e-05, "loss": 0.8216, "step": 11573 }, { "epoch": 9.604979253112033, "grad_norm": 19.191965103149414, "learning_rate": 1.616066390041494e-05, "loss": 0.5215, "step": 11574 }, { "epoch": 9.605809128630705, "grad_norm": 16.9846134185791, "learning_rate": 1.616033195020747e-05, "loss": 0.7252, "step": 11575 }, { "epoch": 9.606639004149377, "grad_norm": 29.732988357543945, "learning_rate": 1.616e-05, "loss": 1.866, "step": 11576 }, { "epoch": 9.60746887966805, "grad_norm": 21.41229248046875, "learning_rate": 1.6159668049792533e-05, "loss": 0.8598, "step": 11577 }, { "epoch": 9.608298755186722, "grad_norm": 36.62873077392578, "learning_rate": 1.6159336099585062e-05, "loss": 1.3807, "step": 11578 }, { "epoch": 9.609128630705394, "grad_norm": 22.200788497924805, "learning_rate": 1.6159004149377594e-05, "loss": 0.6471, "step": 11579 }, { "epoch": 9.609958506224066, "grad_norm": 35.252525329589844, "learning_rate": 1.6158672199170126e-05, "loss": 1.5209, "step": 11580 }, { "epoch": 9.610788381742738, "grad_norm": 22.700572967529297, "learning_rate": 1.6158340248962655e-05, "loss": 0.8597, "step": 11581 }, { "epoch": 9.61161825726141, "grad_norm": 22.64229965209961, "learning_rate": 1.6158008298755187e-05, "loss": 0.8267, "step": 11582 }, { "epoch": 9.612448132780083, "grad_norm": 34.104522705078125, "learning_rate": 1.615767634854772e-05, "loss": 0.7292, "step": 11583 }, { "epoch": 9.613278008298755, "grad_norm": 27.1683349609375, "learning_rate": 1.615734439834025e-05, "loss": 1.0155, "step": 11584 }, { "epoch": 9.614107883817427, "grad_norm": 47.332027435302734, "learning_rate": 1.615701244813278e-05, "loss": 1.5881, "step": 11585 }, { "epoch": 9.6149377593361, "grad_norm": 60.18696975708008, "learning_rate": 1.6156680497925312e-05, "loss": 1.2241, "step": 11586 }, { "epoch": 9.615767634854771, "grad_norm": 15.541206359863281, "learning_rate": 1.6156348547717844e-05, "loss": 0.8882, "step": 11587 }, { "epoch": 9.616597510373444, "grad_norm": 24.893070220947266, "learning_rate": 1.6156016597510376e-05, "loss": 1.111, "step": 11588 }, { "epoch": 9.617427385892116, "grad_norm": 27.569955825805664, "learning_rate": 1.6155684647302905e-05, "loss": 0.7411, "step": 11589 }, { "epoch": 9.618257261410788, "grad_norm": 27.227067947387695, "learning_rate": 1.6155352697095437e-05, "loss": 0.9557, "step": 11590 }, { "epoch": 9.61908713692946, "grad_norm": 35.442256927490234, "learning_rate": 1.615502074688797e-05, "loss": 1.6925, "step": 11591 }, { "epoch": 9.619917012448132, "grad_norm": 23.594940185546875, "learning_rate": 1.61546887966805e-05, "loss": 0.9737, "step": 11592 }, { "epoch": 9.620746887966805, "grad_norm": 20.157075881958008, "learning_rate": 1.615435684647303e-05, "loss": 0.7367, "step": 11593 }, { "epoch": 9.621576763485477, "grad_norm": 40.8910026550293, "learning_rate": 1.6154024896265562e-05, "loss": 1.2688, "step": 11594 }, { "epoch": 9.622406639004149, "grad_norm": 26.621944427490234, "learning_rate": 1.6153692946058094e-05, "loss": 0.9068, "step": 11595 }, { "epoch": 9.623236514522821, "grad_norm": 20.760774612426758, "learning_rate": 1.6153360995850623e-05, "loss": 0.3548, "step": 11596 }, { "epoch": 9.624066390041493, "grad_norm": 19.628931045532227, "learning_rate": 1.6153029045643155e-05, "loss": 0.9832, "step": 11597 }, { "epoch": 9.624896265560166, "grad_norm": 17.765514373779297, "learning_rate": 1.6152697095435684e-05, "loss": 0.8922, "step": 11598 }, { "epoch": 9.625726141078838, "grad_norm": 25.2209529876709, "learning_rate": 1.6152365145228216e-05, "loss": 0.8936, "step": 11599 }, { "epoch": 9.62655601659751, "grad_norm": 23.00138282775879, "learning_rate": 1.6152033195020748e-05, "loss": 1.0888, "step": 11600 }, { "epoch": 9.627385892116182, "grad_norm": 40.565677642822266, "learning_rate": 1.615170124481328e-05, "loss": 1.2998, "step": 11601 }, { "epoch": 9.628215767634854, "grad_norm": 27.59396743774414, "learning_rate": 1.615136929460581e-05, "loss": 1.2341, "step": 11602 }, { "epoch": 9.629045643153527, "grad_norm": 23.59332275390625, "learning_rate": 1.615103734439834e-05, "loss": 1.0951, "step": 11603 }, { "epoch": 9.629875518672199, "grad_norm": 28.05743980407715, "learning_rate": 1.6150705394190873e-05, "loss": 1.1366, "step": 11604 }, { "epoch": 9.630705394190871, "grad_norm": 30.35628318786621, "learning_rate": 1.6150373443983405e-05, "loss": 1.4128, "step": 11605 }, { "epoch": 9.631535269709543, "grad_norm": 21.541784286499023, "learning_rate": 1.6150041493775934e-05, "loss": 0.9388, "step": 11606 }, { "epoch": 9.632365145228215, "grad_norm": 57.09957504272461, "learning_rate": 1.6149709543568466e-05, "loss": 1.1381, "step": 11607 }, { "epoch": 9.633195020746887, "grad_norm": 19.032230377197266, "learning_rate": 1.6149377593360998e-05, "loss": 0.6325, "step": 11608 }, { "epoch": 9.63402489626556, "grad_norm": 17.469806671142578, "learning_rate": 1.614904564315353e-05, "loss": 0.709, "step": 11609 }, { "epoch": 9.634854771784232, "grad_norm": 26.25456428527832, "learning_rate": 1.614871369294606e-05, "loss": 1.4214, "step": 11610 }, { "epoch": 9.635684647302904, "grad_norm": 29.424245834350586, "learning_rate": 1.614838174273859e-05, "loss": 0.8418, "step": 11611 }, { "epoch": 9.636514522821576, "grad_norm": 25.623943328857422, "learning_rate": 1.6148049792531123e-05, "loss": 1.0008, "step": 11612 }, { "epoch": 9.637344398340248, "grad_norm": 23.214500427246094, "learning_rate": 1.6147717842323655e-05, "loss": 0.9035, "step": 11613 }, { "epoch": 9.63817427385892, "grad_norm": 22.250030517578125, "learning_rate": 1.6147385892116184e-05, "loss": 1.0894, "step": 11614 }, { "epoch": 9.639004149377593, "grad_norm": 29.772666931152344, "learning_rate": 1.6147053941908716e-05, "loss": 1.0858, "step": 11615 }, { "epoch": 9.639834024896265, "grad_norm": 22.390239715576172, "learning_rate": 1.6146721991701245e-05, "loss": 0.7526, "step": 11616 }, { "epoch": 9.640663900414937, "grad_norm": 24.274232864379883, "learning_rate": 1.6146390041493777e-05, "loss": 0.9502, "step": 11617 }, { "epoch": 9.64149377593361, "grad_norm": 34.30453872680664, "learning_rate": 1.614605809128631e-05, "loss": 2.1676, "step": 11618 }, { "epoch": 9.642323651452282, "grad_norm": 24.490407943725586, "learning_rate": 1.6145726141078838e-05, "loss": 0.9131, "step": 11619 }, { "epoch": 9.643153526970954, "grad_norm": 21.5035400390625, "learning_rate": 1.614539419087137e-05, "loss": 0.9795, "step": 11620 }, { "epoch": 9.643983402489626, "grad_norm": 25.98247718811035, "learning_rate": 1.6145062240663902e-05, "loss": 0.9226, "step": 11621 }, { "epoch": 9.644813278008298, "grad_norm": 15.703975677490234, "learning_rate": 1.614473029045643e-05, "loss": 0.9106, "step": 11622 }, { "epoch": 9.64564315352697, "grad_norm": 29.744951248168945, "learning_rate": 1.6144398340248963e-05, "loss": 0.962, "step": 11623 }, { "epoch": 9.646473029045643, "grad_norm": 18.685516357421875, "learning_rate": 1.6144066390041495e-05, "loss": 1.1956, "step": 11624 }, { "epoch": 9.647302904564315, "grad_norm": 47.78738021850586, "learning_rate": 1.6143734439834027e-05, "loss": 1.2297, "step": 11625 }, { "epoch": 9.648132780082987, "grad_norm": 38.89927291870117, "learning_rate": 1.614340248962656e-05, "loss": 2.1983, "step": 11626 }, { "epoch": 9.64896265560166, "grad_norm": 26.22709846496582, "learning_rate": 1.6143070539419088e-05, "loss": 2.1457, "step": 11627 }, { "epoch": 9.649792531120331, "grad_norm": 26.68062400817871, "learning_rate": 1.614273858921162e-05, "loss": 1.2323, "step": 11628 }, { "epoch": 9.650622406639004, "grad_norm": 34.4199333190918, "learning_rate": 1.6142406639004152e-05, "loss": 1.4022, "step": 11629 }, { "epoch": 9.651452282157676, "grad_norm": 22.916913986206055, "learning_rate": 1.6142074688796684e-05, "loss": 0.6952, "step": 11630 }, { "epoch": 9.652282157676348, "grad_norm": 27.73164176940918, "learning_rate": 1.6141742738589213e-05, "loss": 1.8689, "step": 11631 }, { "epoch": 9.65311203319502, "grad_norm": 25.141340255737305, "learning_rate": 1.6141410788381745e-05, "loss": 1.1301, "step": 11632 }, { "epoch": 9.653941908713692, "grad_norm": 41.31753921508789, "learning_rate": 1.6141078838174277e-05, "loss": 1.6033, "step": 11633 }, { "epoch": 9.654771784232365, "grad_norm": 21.671775817871094, "learning_rate": 1.6140746887966806e-05, "loss": 0.7726, "step": 11634 }, { "epoch": 9.655601659751037, "grad_norm": 30.069862365722656, "learning_rate": 1.6140414937759338e-05, "loss": 1.4687, "step": 11635 }, { "epoch": 9.656431535269709, "grad_norm": 40.784061431884766, "learning_rate": 1.614008298755187e-05, "loss": 1.541, "step": 11636 }, { "epoch": 9.657261410788381, "grad_norm": 16.226139068603516, "learning_rate": 1.61397510373444e-05, "loss": 0.6643, "step": 11637 }, { "epoch": 9.658091286307053, "grad_norm": 18.92342185974121, "learning_rate": 1.613941908713693e-05, "loss": 0.8167, "step": 11638 }, { "epoch": 9.658921161825726, "grad_norm": 27.3907413482666, "learning_rate": 1.613908713692946e-05, "loss": 0.7489, "step": 11639 }, { "epoch": 9.659751037344398, "grad_norm": 16.60074234008789, "learning_rate": 1.613875518672199e-05, "loss": 0.9565, "step": 11640 }, { "epoch": 9.66058091286307, "grad_norm": 22.847240447998047, "learning_rate": 1.6138423236514524e-05, "loss": 0.982, "step": 11641 }, { "epoch": 9.661410788381742, "grad_norm": 25.656478881835938, "learning_rate": 1.6138091286307056e-05, "loss": 0.9143, "step": 11642 }, { "epoch": 9.662240663900414, "grad_norm": 51.604217529296875, "learning_rate": 1.6137759336099585e-05, "loss": 1.4169, "step": 11643 }, { "epoch": 9.663070539419087, "grad_norm": 20.498661041259766, "learning_rate": 1.6137427385892117e-05, "loss": 1.0127, "step": 11644 }, { "epoch": 9.663900414937759, "grad_norm": 48.31595230102539, "learning_rate": 1.613709543568465e-05, "loss": 0.5908, "step": 11645 }, { "epoch": 9.664730290456431, "grad_norm": 39.36445999145508, "learning_rate": 1.613676348547718e-05, "loss": 1.5878, "step": 11646 }, { "epoch": 9.665560165975103, "grad_norm": 40.31716537475586, "learning_rate": 1.613643153526971e-05, "loss": 1.3868, "step": 11647 }, { "epoch": 9.666390041493775, "grad_norm": 52.47268295288086, "learning_rate": 1.6136099585062242e-05, "loss": 0.8377, "step": 11648 }, { "epoch": 9.667219917012448, "grad_norm": 34.8050537109375, "learning_rate": 1.6135767634854774e-05, "loss": 1.41, "step": 11649 }, { "epoch": 9.66804979253112, "grad_norm": 19.107128143310547, "learning_rate": 1.6135435684647306e-05, "loss": 0.9235, "step": 11650 }, { "epoch": 9.668879668049792, "grad_norm": 28.244226455688477, "learning_rate": 1.6135103734439835e-05, "loss": 0.8463, "step": 11651 }, { "epoch": 9.669709543568464, "grad_norm": 22.45610809326172, "learning_rate": 1.6134771784232367e-05, "loss": 1.3546, "step": 11652 }, { "epoch": 9.670539419087136, "grad_norm": 22.014293670654297, "learning_rate": 1.61344398340249e-05, "loss": 0.8668, "step": 11653 }, { "epoch": 9.671369294605809, "grad_norm": 34.264278411865234, "learning_rate": 1.6134107883817428e-05, "loss": 0.7034, "step": 11654 }, { "epoch": 9.67219917012448, "grad_norm": 35.39018249511719, "learning_rate": 1.613377593360996e-05, "loss": 1.6831, "step": 11655 }, { "epoch": 9.673029045643153, "grad_norm": 31.20560073852539, "learning_rate": 1.6133443983402492e-05, "loss": 0.773, "step": 11656 }, { "epoch": 9.673858921161825, "grad_norm": 19.44842529296875, "learning_rate": 1.613311203319502e-05, "loss": 0.6973, "step": 11657 }, { "epoch": 9.674688796680497, "grad_norm": 24.525840759277344, "learning_rate": 1.6132780082987553e-05, "loss": 1.2533, "step": 11658 }, { "epoch": 9.67551867219917, "grad_norm": 33.188865661621094, "learning_rate": 1.6132448132780085e-05, "loss": 1.1545, "step": 11659 }, { "epoch": 9.676348547717842, "grad_norm": 35.28559494018555, "learning_rate": 1.6132116182572613e-05, "loss": 1.2608, "step": 11660 }, { "epoch": 9.677178423236514, "grad_norm": 35.21061706542969, "learning_rate": 1.6131784232365146e-05, "loss": 1.0663, "step": 11661 }, { "epoch": 9.678008298755186, "grad_norm": 18.403350830078125, "learning_rate": 1.6131452282157678e-05, "loss": 1.1935, "step": 11662 }, { "epoch": 9.678838174273858, "grad_norm": 18.071056365966797, "learning_rate": 1.613112033195021e-05, "loss": 1.3222, "step": 11663 }, { "epoch": 9.67966804979253, "grad_norm": 14.835592269897461, "learning_rate": 1.613078838174274e-05, "loss": 0.8162, "step": 11664 }, { "epoch": 9.680497925311203, "grad_norm": 63.15774154663086, "learning_rate": 1.613045643153527e-05, "loss": 1.242, "step": 11665 }, { "epoch": 9.681327800829875, "grad_norm": 46.16560745239258, "learning_rate": 1.6130124481327803e-05, "loss": 0.9081, "step": 11666 }, { "epoch": 9.682157676348547, "grad_norm": 35.677921295166016, "learning_rate": 1.6129792531120335e-05, "loss": 1.0821, "step": 11667 }, { "epoch": 9.68298755186722, "grad_norm": 12.343634605407715, "learning_rate": 1.6129460580912864e-05, "loss": 0.6281, "step": 11668 }, { "epoch": 9.683817427385891, "grad_norm": 28.411779403686523, "learning_rate": 1.6129128630705396e-05, "loss": 0.9373, "step": 11669 }, { "epoch": 9.684647302904564, "grad_norm": 31.570606231689453, "learning_rate": 1.6128796680497928e-05, "loss": 1.6959, "step": 11670 }, { "epoch": 9.685477178423236, "grad_norm": 24.077835083007812, "learning_rate": 1.612846473029046e-05, "loss": 0.8935, "step": 11671 }, { "epoch": 9.686307053941908, "grad_norm": 45.774295806884766, "learning_rate": 1.612813278008299e-05, "loss": 1.2144, "step": 11672 }, { "epoch": 9.68713692946058, "grad_norm": 26.1201171875, "learning_rate": 1.612780082987552e-05, "loss": 1.1719, "step": 11673 }, { "epoch": 9.687966804979252, "grad_norm": 25.855731964111328, "learning_rate": 1.6127468879668053e-05, "loss": 1.3583, "step": 11674 }, { "epoch": 9.688796680497925, "grad_norm": 31.052453994750977, "learning_rate": 1.612713692946058e-05, "loss": 0.8534, "step": 11675 }, { "epoch": 9.689626556016597, "grad_norm": 118.69734191894531, "learning_rate": 1.6126804979253114e-05, "loss": 1.0267, "step": 11676 }, { "epoch": 9.690456431535269, "grad_norm": 31.450162887573242, "learning_rate": 1.6126473029045642e-05, "loss": 0.8723, "step": 11677 }, { "epoch": 9.691286307053941, "grad_norm": 23.963403701782227, "learning_rate": 1.6126141078838174e-05, "loss": 0.7171, "step": 11678 }, { "epoch": 9.692116182572613, "grad_norm": 38.09184265136719, "learning_rate": 1.6125809128630707e-05, "loss": 1.2336, "step": 11679 }, { "epoch": 9.692946058091286, "grad_norm": 50.17860794067383, "learning_rate": 1.612547717842324e-05, "loss": 1.7763, "step": 11680 }, { "epoch": 9.693775933609958, "grad_norm": 27.542959213256836, "learning_rate": 1.6125145228215767e-05, "loss": 1.2476, "step": 11681 }, { "epoch": 9.69460580912863, "grad_norm": 23.856565475463867, "learning_rate": 1.61248132780083e-05, "loss": 1.108, "step": 11682 }, { "epoch": 9.695435684647302, "grad_norm": 19.861480712890625, "learning_rate": 1.612448132780083e-05, "loss": 0.6475, "step": 11683 }, { "epoch": 9.696265560165974, "grad_norm": 25.191505432128906, "learning_rate": 1.6124149377593364e-05, "loss": 0.9533, "step": 11684 }, { "epoch": 9.697095435684647, "grad_norm": 48.2908935546875, "learning_rate": 1.6123817427385892e-05, "loss": 1.3878, "step": 11685 }, { "epoch": 9.697925311203319, "grad_norm": 21.602449417114258, "learning_rate": 1.6123485477178425e-05, "loss": 1.0544, "step": 11686 }, { "epoch": 9.698755186721991, "grad_norm": 31.049280166625977, "learning_rate": 1.6123153526970957e-05, "loss": 1.3697, "step": 11687 }, { "epoch": 9.699585062240663, "grad_norm": 22.008377075195312, "learning_rate": 1.612282157676349e-05, "loss": 0.8976, "step": 11688 }, { "epoch": 9.700414937759335, "grad_norm": 15.951498031616211, "learning_rate": 1.6122489626556017e-05, "loss": 0.7297, "step": 11689 }, { "epoch": 9.701244813278008, "grad_norm": 23.638273239135742, "learning_rate": 1.612215767634855e-05, "loss": 1.1042, "step": 11690 }, { "epoch": 9.70207468879668, "grad_norm": 37.3316764831543, "learning_rate": 1.6121825726141082e-05, "loss": 0.8275, "step": 11691 }, { "epoch": 9.702904564315352, "grad_norm": 40.13359069824219, "learning_rate": 1.612149377593361e-05, "loss": 1.3352, "step": 11692 }, { "epoch": 9.703734439834024, "grad_norm": 30.062728881835938, "learning_rate": 1.6121161825726143e-05, "loss": 0.4374, "step": 11693 }, { "epoch": 9.704564315352696, "grad_norm": 36.73507308959961, "learning_rate": 1.6120829875518675e-05, "loss": 1.0986, "step": 11694 }, { "epoch": 9.705394190871369, "grad_norm": 62.507808685302734, "learning_rate": 1.6120497925311203e-05, "loss": 0.9807, "step": 11695 }, { "epoch": 9.70622406639004, "grad_norm": 30.455041885375977, "learning_rate": 1.6120165975103735e-05, "loss": 1.5043, "step": 11696 }, { "epoch": 9.707053941908713, "grad_norm": 22.889528274536133, "learning_rate": 1.6119834024896268e-05, "loss": 0.8243, "step": 11697 }, { "epoch": 9.707883817427385, "grad_norm": 20.83912467956543, "learning_rate": 1.6119502074688796e-05, "loss": 0.9621, "step": 11698 }, { "epoch": 9.708713692946057, "grad_norm": 31.349231719970703, "learning_rate": 1.611917012448133e-05, "loss": 1.2147, "step": 11699 }, { "epoch": 9.70954356846473, "grad_norm": 31.99721336364746, "learning_rate": 1.611883817427386e-05, "loss": 1.0446, "step": 11700 }, { "epoch": 9.710373443983402, "grad_norm": 12.274801254272461, "learning_rate": 1.611850622406639e-05, "loss": 0.3517, "step": 11701 }, { "epoch": 9.711203319502074, "grad_norm": 26.002063751220703, "learning_rate": 1.611817427385892e-05, "loss": 1.7987, "step": 11702 }, { "epoch": 9.712033195020746, "grad_norm": 16.828428268432617, "learning_rate": 1.6117842323651453e-05, "loss": 0.5323, "step": 11703 }, { "epoch": 9.712863070539418, "grad_norm": 45.6501579284668, "learning_rate": 1.6117510373443986e-05, "loss": 0.7701, "step": 11704 }, { "epoch": 9.71369294605809, "grad_norm": 28.32248878479004, "learning_rate": 1.6117178423236518e-05, "loss": 0.6268, "step": 11705 }, { "epoch": 9.714522821576763, "grad_norm": 20.020416259765625, "learning_rate": 1.6116846473029046e-05, "loss": 1.3924, "step": 11706 }, { "epoch": 9.715352697095435, "grad_norm": 16.47763442993164, "learning_rate": 1.611651452282158e-05, "loss": 0.7883, "step": 11707 }, { "epoch": 9.716182572614107, "grad_norm": 30.984338760375977, "learning_rate": 1.611618257261411e-05, "loss": 1.014, "step": 11708 }, { "epoch": 9.71701244813278, "grad_norm": 17.668441772460938, "learning_rate": 1.6115850622406643e-05, "loss": 0.8389, "step": 11709 }, { "epoch": 9.717842323651452, "grad_norm": 20.8929443359375, "learning_rate": 1.611551867219917e-05, "loss": 0.6537, "step": 11710 }, { "epoch": 9.718672199170124, "grad_norm": 32.09953689575195, "learning_rate": 1.6115186721991704e-05, "loss": 0.6812, "step": 11711 }, { "epoch": 9.719502074688796, "grad_norm": 30.321273803710938, "learning_rate": 1.6114854771784236e-05, "loss": 1.2139, "step": 11712 }, { "epoch": 9.720331950207468, "grad_norm": 36.375205993652344, "learning_rate": 1.6114522821576764e-05, "loss": 1.2333, "step": 11713 }, { "epoch": 9.72116182572614, "grad_norm": 30.59935760498047, "learning_rate": 1.6114190871369296e-05, "loss": 1.1324, "step": 11714 }, { "epoch": 9.721991701244812, "grad_norm": 31.700580596923828, "learning_rate": 1.6113858921161825e-05, "loss": 1.3183, "step": 11715 }, { "epoch": 9.722821576763485, "grad_norm": 29.989303588867188, "learning_rate": 1.6113526970954357e-05, "loss": 1.5736, "step": 11716 }, { "epoch": 9.723651452282157, "grad_norm": 80.50871276855469, "learning_rate": 1.611319502074689e-05, "loss": 1.1554, "step": 11717 }, { "epoch": 9.724481327800829, "grad_norm": 31.687253952026367, "learning_rate": 1.6112863070539418e-05, "loss": 0.9063, "step": 11718 }, { "epoch": 9.725311203319501, "grad_norm": 32.452552795410156, "learning_rate": 1.611253112033195e-05, "loss": 1.0686, "step": 11719 }, { "epoch": 9.726141078838173, "grad_norm": 22.87450408935547, "learning_rate": 1.6112199170124482e-05, "loss": 1.0544, "step": 11720 }, { "epoch": 9.726970954356846, "grad_norm": 31.89764404296875, "learning_rate": 1.6111867219917014e-05, "loss": 0.8874, "step": 11721 }, { "epoch": 9.727800829875518, "grad_norm": 29.351640701293945, "learning_rate": 1.6111535269709543e-05, "loss": 1.7003, "step": 11722 }, { "epoch": 9.72863070539419, "grad_norm": 25.800949096679688, "learning_rate": 1.6111203319502075e-05, "loss": 1.1362, "step": 11723 }, { "epoch": 9.729460580912862, "grad_norm": 22.369935989379883, "learning_rate": 1.6110871369294607e-05, "loss": 0.9596, "step": 11724 }, { "epoch": 9.730290456431534, "grad_norm": 26.668428421020508, "learning_rate": 1.611053941908714e-05, "loss": 1.0322, "step": 11725 }, { "epoch": 9.731120331950207, "grad_norm": 42.0049934387207, "learning_rate": 1.6110207468879668e-05, "loss": 1.2444, "step": 11726 }, { "epoch": 9.731950207468879, "grad_norm": 24.539344787597656, "learning_rate": 1.61098755186722e-05, "loss": 1.2969, "step": 11727 }, { "epoch": 9.732780082987551, "grad_norm": 22.87459945678711, "learning_rate": 1.6109543568464732e-05, "loss": 0.6746, "step": 11728 }, { "epoch": 9.733609958506223, "grad_norm": 31.515714645385742, "learning_rate": 1.6109211618257265e-05, "loss": 1.3606, "step": 11729 }, { "epoch": 9.734439834024897, "grad_norm": 32.57532501220703, "learning_rate": 1.6108879668049793e-05, "loss": 1.0059, "step": 11730 }, { "epoch": 9.73526970954357, "grad_norm": 24.458126068115234, "learning_rate": 1.6108547717842325e-05, "loss": 0.9458, "step": 11731 }, { "epoch": 9.736099585062242, "grad_norm": 49.732147216796875, "learning_rate": 1.6108215767634857e-05, "loss": 1.4197, "step": 11732 }, { "epoch": 9.736929460580914, "grad_norm": 27.540557861328125, "learning_rate": 1.6107883817427386e-05, "loss": 0.5984, "step": 11733 }, { "epoch": 9.737759336099586, "grad_norm": 24.45018768310547, "learning_rate": 1.6107551867219918e-05, "loss": 0.6028, "step": 11734 }, { "epoch": 9.738589211618258, "grad_norm": 16.46938705444336, "learning_rate": 1.610721991701245e-05, "loss": 0.3583, "step": 11735 }, { "epoch": 9.73941908713693, "grad_norm": 27.86134910583496, "learning_rate": 1.610688796680498e-05, "loss": 0.8115, "step": 11736 }, { "epoch": 9.740248962655603, "grad_norm": 20.81971549987793, "learning_rate": 1.610655601659751e-05, "loss": 0.7186, "step": 11737 }, { "epoch": 9.741078838174275, "grad_norm": 41.754913330078125, "learning_rate": 1.6106224066390043e-05, "loss": 0.989, "step": 11738 }, { "epoch": 9.741908713692947, "grad_norm": 41.34501647949219, "learning_rate": 1.6105892116182572e-05, "loss": 1.3337, "step": 11739 }, { "epoch": 9.74273858921162, "grad_norm": 35.71712875366211, "learning_rate": 1.6105560165975104e-05, "loss": 1.3014, "step": 11740 }, { "epoch": 9.743568464730291, "grad_norm": 18.354379653930664, "learning_rate": 1.6105228215767636e-05, "loss": 0.9676, "step": 11741 }, { "epoch": 9.744398340248964, "grad_norm": 37.12610626220703, "learning_rate": 1.610489626556017e-05, "loss": 1.9201, "step": 11742 }, { "epoch": 9.745228215767636, "grad_norm": 47.272438049316406, "learning_rate": 1.6104564315352697e-05, "loss": 1.0193, "step": 11743 }, { "epoch": 9.746058091286308, "grad_norm": 24.28762435913086, "learning_rate": 1.610423236514523e-05, "loss": 0.6875, "step": 11744 }, { "epoch": 9.74688796680498, "grad_norm": 25.424427032470703, "learning_rate": 1.610390041493776e-05, "loss": 0.6763, "step": 11745 }, { "epoch": 9.747717842323652, "grad_norm": 48.725868225097656, "learning_rate": 1.6103568464730293e-05, "loss": 0.6319, "step": 11746 }, { "epoch": 9.748547717842325, "grad_norm": 15.869977951049805, "learning_rate": 1.6103236514522822e-05, "loss": 0.7932, "step": 11747 }, { "epoch": 9.749377593360997, "grad_norm": 35.37836837768555, "learning_rate": 1.6102904564315354e-05, "loss": 0.9673, "step": 11748 }, { "epoch": 9.750207468879669, "grad_norm": 33.74980163574219, "learning_rate": 1.6102572614107886e-05, "loss": 0.897, "step": 11749 }, { "epoch": 9.751037344398341, "grad_norm": 29.470144271850586, "learning_rate": 1.610224066390042e-05, "loss": 0.8562, "step": 11750 }, { "epoch": 9.751867219917013, "grad_norm": 34.2720832824707, "learning_rate": 1.6101908713692947e-05, "loss": 1.4581, "step": 11751 }, { "epoch": 9.752697095435686, "grad_norm": 16.94945526123047, "learning_rate": 1.610157676348548e-05, "loss": 0.4789, "step": 11752 }, { "epoch": 9.753526970954358, "grad_norm": 28.7180118560791, "learning_rate": 1.610124481327801e-05, "loss": 1.1101, "step": 11753 }, { "epoch": 9.75435684647303, "grad_norm": 31.185121536254883, "learning_rate": 1.610091286307054e-05, "loss": 1.2861, "step": 11754 }, { "epoch": 9.755186721991702, "grad_norm": 33.631404876708984, "learning_rate": 1.6100580912863072e-05, "loss": 1.0285, "step": 11755 }, { "epoch": 9.756016597510374, "grad_norm": 19.771465301513672, "learning_rate": 1.61002489626556e-05, "loss": 0.8061, "step": 11756 }, { "epoch": 9.756846473029047, "grad_norm": 35.273860931396484, "learning_rate": 1.6099917012448133e-05, "loss": 1.5185, "step": 11757 }, { "epoch": 9.757676348547719, "grad_norm": 26.03792953491211, "learning_rate": 1.6099585062240665e-05, "loss": 0.6506, "step": 11758 }, { "epoch": 9.758506224066391, "grad_norm": 29.689184188842773, "learning_rate": 1.6099253112033197e-05, "loss": 1.596, "step": 11759 }, { "epoch": 9.759336099585063, "grad_norm": 39.41535568237305, "learning_rate": 1.6098921161825726e-05, "loss": 0.7784, "step": 11760 }, { "epoch": 9.760165975103735, "grad_norm": 25.545347213745117, "learning_rate": 1.6098589211618258e-05, "loss": 1.2028, "step": 11761 }, { "epoch": 9.760995850622407, "grad_norm": 30.15655517578125, "learning_rate": 1.609825726141079e-05, "loss": 0.9487, "step": 11762 }, { "epoch": 9.76182572614108, "grad_norm": 35.04312515258789, "learning_rate": 1.6097925311203322e-05, "loss": 1.1277, "step": 11763 }, { "epoch": 9.762655601659752, "grad_norm": 20.07370948791504, "learning_rate": 1.609759336099585e-05, "loss": 1.1572, "step": 11764 }, { "epoch": 9.763485477178424, "grad_norm": 28.5456485748291, "learning_rate": 1.6097261410788383e-05, "loss": 0.9423, "step": 11765 }, { "epoch": 9.764315352697096, "grad_norm": 19.244985580444336, "learning_rate": 1.6096929460580915e-05, "loss": 0.8181, "step": 11766 }, { "epoch": 9.765145228215768, "grad_norm": 30.788089752197266, "learning_rate": 1.6096597510373447e-05, "loss": 1.2666, "step": 11767 }, { "epoch": 9.76597510373444, "grad_norm": 15.875161170959473, "learning_rate": 1.6096265560165976e-05, "loss": 0.6554, "step": 11768 }, { "epoch": 9.766804979253113, "grad_norm": 25.585155487060547, "learning_rate": 1.6095933609958508e-05, "loss": 0.7964, "step": 11769 }, { "epoch": 9.767634854771785, "grad_norm": 17.655094146728516, "learning_rate": 1.609560165975104e-05, "loss": 0.9795, "step": 11770 }, { "epoch": 9.768464730290457, "grad_norm": 30.797502517700195, "learning_rate": 1.609526970954357e-05, "loss": 1.2576, "step": 11771 }, { "epoch": 9.76929460580913, "grad_norm": 15.154184341430664, "learning_rate": 1.60949377593361e-05, "loss": 0.8136, "step": 11772 }, { "epoch": 9.770124481327802, "grad_norm": 23.831331253051758, "learning_rate": 1.6094605809128633e-05, "loss": 1.1569, "step": 11773 }, { "epoch": 9.770954356846474, "grad_norm": 15.337654113769531, "learning_rate": 1.6094273858921162e-05, "loss": 0.797, "step": 11774 }, { "epoch": 9.771784232365146, "grad_norm": 43.62678909301758, "learning_rate": 1.6093941908713694e-05, "loss": 1.5479, "step": 11775 }, { "epoch": 9.772614107883818, "grad_norm": 21.27137565612793, "learning_rate": 1.6093609958506223e-05, "loss": 1.2681, "step": 11776 }, { "epoch": 9.77344398340249, "grad_norm": 20.339824676513672, "learning_rate": 1.6093278008298755e-05, "loss": 0.6651, "step": 11777 }, { "epoch": 9.774273858921163, "grad_norm": 23.213727951049805, "learning_rate": 1.6092946058091287e-05, "loss": 1.0862, "step": 11778 }, { "epoch": 9.775103734439835, "grad_norm": 26.41187858581543, "learning_rate": 1.609261410788382e-05, "loss": 0.8721, "step": 11779 }, { "epoch": 9.775933609958507, "grad_norm": 24.537992477416992, "learning_rate": 1.6092282157676348e-05, "loss": 1.1133, "step": 11780 }, { "epoch": 9.77676348547718, "grad_norm": 19.17957305908203, "learning_rate": 1.609195020746888e-05, "loss": 0.9087, "step": 11781 }, { "epoch": 9.777593360995851, "grad_norm": 24.793941497802734, "learning_rate": 1.6091618257261412e-05, "loss": 1.3416, "step": 11782 }, { "epoch": 9.778423236514524, "grad_norm": 25.65264892578125, "learning_rate": 1.6091286307053944e-05, "loss": 1.2616, "step": 11783 }, { "epoch": 9.779253112033196, "grad_norm": 40.94475555419922, "learning_rate": 1.6090954356846476e-05, "loss": 1.584, "step": 11784 }, { "epoch": 9.780082987551868, "grad_norm": 27.2381591796875, "learning_rate": 1.6090622406639005e-05, "loss": 0.9775, "step": 11785 }, { "epoch": 9.78091286307054, "grad_norm": 19.69880485534668, "learning_rate": 1.6090290456431537e-05, "loss": 0.7806, "step": 11786 }, { "epoch": 9.781742738589212, "grad_norm": 26.681997299194336, "learning_rate": 1.608995850622407e-05, "loss": 1.1013, "step": 11787 }, { "epoch": 9.782572614107885, "grad_norm": 24.99700927734375, "learning_rate": 1.60896265560166e-05, "loss": 1.1576, "step": 11788 }, { "epoch": 9.783402489626557, "grad_norm": 20.674524307250977, "learning_rate": 1.608929460580913e-05, "loss": 1.0634, "step": 11789 }, { "epoch": 9.784232365145229, "grad_norm": 21.330917358398438, "learning_rate": 1.6088962655601662e-05, "loss": 0.5508, "step": 11790 }, { "epoch": 9.785062240663901, "grad_norm": 18.497411727905273, "learning_rate": 1.6088630705394194e-05, "loss": 1.0798, "step": 11791 }, { "epoch": 9.785892116182573, "grad_norm": 22.950483322143555, "learning_rate": 1.6088298755186723e-05, "loss": 0.8075, "step": 11792 }, { "epoch": 9.786721991701246, "grad_norm": 40.08317947387695, "learning_rate": 1.6087966804979255e-05, "loss": 1.2589, "step": 11793 }, { "epoch": 9.787551867219918, "grad_norm": 39.06972885131836, "learning_rate": 1.6087634854771784e-05, "loss": 1.1873, "step": 11794 }, { "epoch": 9.78838174273859, "grad_norm": 23.336149215698242, "learning_rate": 1.6087302904564316e-05, "loss": 1.0296, "step": 11795 }, { "epoch": 9.789211618257262, "grad_norm": 23.452787399291992, "learning_rate": 1.6086970954356848e-05, "loss": 1.0086, "step": 11796 }, { "epoch": 9.790041493775934, "grad_norm": 25.30480194091797, "learning_rate": 1.6086639004149377e-05, "loss": 0.7717, "step": 11797 }, { "epoch": 9.790871369294607, "grad_norm": 29.212509155273438, "learning_rate": 1.608630705394191e-05, "loss": 1.3485, "step": 11798 }, { "epoch": 9.791701244813279, "grad_norm": 24.468982696533203, "learning_rate": 1.608597510373444e-05, "loss": 1.3823, "step": 11799 }, { "epoch": 9.792531120331951, "grad_norm": 25.838998794555664, "learning_rate": 1.6085643153526973e-05, "loss": 0.8894, "step": 11800 }, { "epoch": 9.793360995850623, "grad_norm": 47.32037353515625, "learning_rate": 1.6085311203319502e-05, "loss": 0.9622, "step": 11801 }, { "epoch": 9.794190871369295, "grad_norm": 45.2977294921875, "learning_rate": 1.6084979253112034e-05, "loss": 1.2826, "step": 11802 }, { "epoch": 9.795020746887968, "grad_norm": 32.26799011230469, "learning_rate": 1.6084647302904566e-05, "loss": 1.3187, "step": 11803 }, { "epoch": 9.79585062240664, "grad_norm": 34.83725357055664, "learning_rate": 1.6084315352697098e-05, "loss": 1.295, "step": 11804 }, { "epoch": 9.796680497925312, "grad_norm": 19.43305015563965, "learning_rate": 1.6083983402489627e-05, "loss": 0.9783, "step": 11805 }, { "epoch": 9.797510373443984, "grad_norm": 82.56720733642578, "learning_rate": 1.608365145228216e-05, "loss": 1.375, "step": 11806 }, { "epoch": 9.798340248962656, "grad_norm": 19.596778869628906, "learning_rate": 1.608331950207469e-05, "loss": 0.9688, "step": 11807 }, { "epoch": 9.799170124481329, "grad_norm": 24.201372146606445, "learning_rate": 1.6082987551867223e-05, "loss": 1.1464, "step": 11808 }, { "epoch": 9.8, "grad_norm": 20.891992568969727, "learning_rate": 1.6082655601659752e-05, "loss": 1.1452, "step": 11809 }, { "epoch": 9.800829875518673, "grad_norm": 26.623929977416992, "learning_rate": 1.6082323651452284e-05, "loss": 1.154, "step": 11810 }, { "epoch": 9.801659751037345, "grad_norm": 23.81696891784668, "learning_rate": 1.6081991701244816e-05, "loss": 0.9026, "step": 11811 }, { "epoch": 9.802489626556017, "grad_norm": 34.331180572509766, "learning_rate": 1.6081659751037345e-05, "loss": 0.9897, "step": 11812 }, { "epoch": 9.80331950207469, "grad_norm": 22.65129852294922, "learning_rate": 1.6081327800829877e-05, "loss": 1.8877, "step": 11813 }, { "epoch": 9.804149377593362, "grad_norm": 21.075159072875977, "learning_rate": 1.608099585062241e-05, "loss": 0.6727, "step": 11814 }, { "epoch": 9.804979253112034, "grad_norm": 42.846885681152344, "learning_rate": 1.6080663900414938e-05, "loss": 0.8531, "step": 11815 }, { "epoch": 9.805809128630706, "grad_norm": 29.410911560058594, "learning_rate": 1.608033195020747e-05, "loss": 1.3389, "step": 11816 }, { "epoch": 9.806639004149378, "grad_norm": 23.583646774291992, "learning_rate": 1.6080000000000002e-05, "loss": 1.0277, "step": 11817 }, { "epoch": 9.80746887966805, "grad_norm": 44.729888916015625, "learning_rate": 1.607966804979253e-05, "loss": 1.1464, "step": 11818 }, { "epoch": 9.808298755186723, "grad_norm": 28.679628372192383, "learning_rate": 1.6079336099585063e-05, "loss": 1.5068, "step": 11819 }, { "epoch": 9.809128630705395, "grad_norm": 35.1036491394043, "learning_rate": 1.6079004149377595e-05, "loss": 1.311, "step": 11820 }, { "epoch": 9.809958506224067, "grad_norm": 38.64852523803711, "learning_rate": 1.6078672199170127e-05, "loss": 1.6236, "step": 11821 }, { "epoch": 9.81078838174274, "grad_norm": 13.640294075012207, "learning_rate": 1.6078340248962656e-05, "loss": 0.5417, "step": 11822 }, { "epoch": 9.811618257261411, "grad_norm": 38.52822494506836, "learning_rate": 1.6078008298755188e-05, "loss": 1.5032, "step": 11823 }, { "epoch": 9.812448132780084, "grad_norm": 18.29010009765625, "learning_rate": 1.607767634854772e-05, "loss": 0.7766, "step": 11824 }, { "epoch": 9.813278008298756, "grad_norm": 27.581298828125, "learning_rate": 1.6077344398340252e-05, "loss": 0.7183, "step": 11825 }, { "epoch": 9.814107883817428, "grad_norm": 18.359710693359375, "learning_rate": 1.607701244813278e-05, "loss": 0.8237, "step": 11826 }, { "epoch": 9.8149377593361, "grad_norm": 16.73381233215332, "learning_rate": 1.6076680497925313e-05, "loss": 0.7529, "step": 11827 }, { "epoch": 9.815767634854772, "grad_norm": 24.694425582885742, "learning_rate": 1.6076348547717845e-05, "loss": 1.2464, "step": 11828 }, { "epoch": 9.816597510373445, "grad_norm": 25.14619255065918, "learning_rate": 1.6076016597510377e-05, "loss": 1.4429, "step": 11829 }, { "epoch": 9.817427385892117, "grad_norm": 23.98735809326172, "learning_rate": 1.6075684647302906e-05, "loss": 1.0625, "step": 11830 }, { "epoch": 9.818257261410789, "grad_norm": 28.467239379882812, "learning_rate": 1.6075352697095438e-05, "loss": 1.2602, "step": 11831 }, { "epoch": 9.819087136929461, "grad_norm": 99.83595275878906, "learning_rate": 1.6075020746887967e-05, "loss": 1.2696, "step": 11832 }, { "epoch": 9.819917012448133, "grad_norm": 34.02756118774414, "learning_rate": 1.60746887966805e-05, "loss": 0.5148, "step": 11833 }, { "epoch": 9.820746887966806, "grad_norm": 31.19382667541504, "learning_rate": 1.607435684647303e-05, "loss": 1.8605, "step": 11834 }, { "epoch": 9.821576763485478, "grad_norm": 43.48203659057617, "learning_rate": 1.607402489626556e-05, "loss": 1.5058, "step": 11835 }, { "epoch": 9.82240663900415, "grad_norm": 17.880786895751953, "learning_rate": 1.607369294605809e-05, "loss": 0.81, "step": 11836 }, { "epoch": 9.823236514522822, "grad_norm": 29.8580265045166, "learning_rate": 1.6073360995850624e-05, "loss": 1.1288, "step": 11837 }, { "epoch": 9.824066390041494, "grad_norm": 60.89053726196289, "learning_rate": 1.6073029045643156e-05, "loss": 1.0976, "step": 11838 }, { "epoch": 9.824896265560167, "grad_norm": 24.55507469177246, "learning_rate": 1.6072697095435685e-05, "loss": 0.8194, "step": 11839 }, { "epoch": 9.825726141078839, "grad_norm": 27.61335563659668, "learning_rate": 1.6072365145228217e-05, "loss": 1.3439, "step": 11840 }, { "epoch": 9.826556016597511, "grad_norm": 20.370710372924805, "learning_rate": 1.607203319502075e-05, "loss": 0.8155, "step": 11841 }, { "epoch": 9.827385892116183, "grad_norm": 17.435375213623047, "learning_rate": 1.607170124481328e-05, "loss": 0.8574, "step": 11842 }, { "epoch": 9.828215767634855, "grad_norm": 15.466710090637207, "learning_rate": 1.607136929460581e-05, "loss": 0.9978, "step": 11843 }, { "epoch": 9.829045643153528, "grad_norm": 21.486108779907227, "learning_rate": 1.6071037344398342e-05, "loss": 0.7418, "step": 11844 }, { "epoch": 9.8298755186722, "grad_norm": 16.559524536132812, "learning_rate": 1.6070705394190874e-05, "loss": 0.9532, "step": 11845 }, { "epoch": 9.830705394190872, "grad_norm": 35.38923645019531, "learning_rate": 1.6070373443983406e-05, "loss": 1.1177, "step": 11846 }, { "epoch": 9.831535269709544, "grad_norm": 40.33888626098633, "learning_rate": 1.6070041493775935e-05, "loss": 1.7294, "step": 11847 }, { "epoch": 9.832365145228216, "grad_norm": 39.12934875488281, "learning_rate": 1.6069709543568467e-05, "loss": 1.3125, "step": 11848 }, { "epoch": 9.833195020746889, "grad_norm": 15.940217971801758, "learning_rate": 1.6069377593361e-05, "loss": 0.5471, "step": 11849 }, { "epoch": 9.83402489626556, "grad_norm": 28.595874786376953, "learning_rate": 1.6069045643153528e-05, "loss": 0.8603, "step": 11850 }, { "epoch": 9.834854771784233, "grad_norm": 30.263282775878906, "learning_rate": 1.606871369294606e-05, "loss": 1.0971, "step": 11851 }, { "epoch": 9.835684647302905, "grad_norm": 18.42275619506836, "learning_rate": 1.6068381742738592e-05, "loss": 0.6055, "step": 11852 }, { "epoch": 9.836514522821577, "grad_norm": 17.784378051757812, "learning_rate": 1.606804979253112e-05, "loss": 0.6281, "step": 11853 }, { "epoch": 9.83734439834025, "grad_norm": 33.57111358642578, "learning_rate": 1.6067717842323653e-05, "loss": 0.9401, "step": 11854 }, { "epoch": 9.838174273858922, "grad_norm": 23.748430252075195, "learning_rate": 1.606738589211618e-05, "loss": 0.8126, "step": 11855 }, { "epoch": 9.839004149377594, "grad_norm": 35.09144592285156, "learning_rate": 1.6067053941908713e-05, "loss": 1.2475, "step": 11856 }, { "epoch": 9.839834024896266, "grad_norm": 18.86341667175293, "learning_rate": 1.6066721991701246e-05, "loss": 0.8759, "step": 11857 }, { "epoch": 9.840663900414938, "grad_norm": 41.49941635131836, "learning_rate": 1.6066390041493778e-05, "loss": 1.3055, "step": 11858 }, { "epoch": 9.84149377593361, "grad_norm": 34.549644470214844, "learning_rate": 1.6066058091286306e-05, "loss": 0.8999, "step": 11859 }, { "epoch": 9.842323651452283, "grad_norm": 18.08779525756836, "learning_rate": 1.606572614107884e-05, "loss": 0.7406, "step": 11860 }, { "epoch": 9.843153526970955, "grad_norm": 21.801408767700195, "learning_rate": 1.606539419087137e-05, "loss": 0.912, "step": 11861 }, { "epoch": 9.843983402489627, "grad_norm": 30.16217613220215, "learning_rate": 1.6065062240663903e-05, "loss": 1.317, "step": 11862 }, { "epoch": 9.8448132780083, "grad_norm": 17.95440673828125, "learning_rate": 1.6064730290456435e-05, "loss": 0.5346, "step": 11863 }, { "epoch": 9.845643153526972, "grad_norm": 25.62726402282715, "learning_rate": 1.6064398340248964e-05, "loss": 1.0501, "step": 11864 }, { "epoch": 9.846473029045644, "grad_norm": 23.757579803466797, "learning_rate": 1.6064066390041496e-05, "loss": 0.7816, "step": 11865 }, { "epoch": 9.847302904564316, "grad_norm": 97.15174865722656, "learning_rate": 1.6063734439834028e-05, "loss": 1.4091, "step": 11866 }, { "epoch": 9.848132780082988, "grad_norm": 47.606964111328125, "learning_rate": 1.606340248962656e-05, "loss": 1.5054, "step": 11867 }, { "epoch": 9.84896265560166, "grad_norm": 30.687761306762695, "learning_rate": 1.606307053941909e-05, "loss": 0.9566, "step": 11868 }, { "epoch": 9.849792531120332, "grad_norm": 33.19036865234375, "learning_rate": 1.606273858921162e-05, "loss": 0.8991, "step": 11869 }, { "epoch": 9.850622406639005, "grad_norm": 24.083871841430664, "learning_rate": 1.6062406639004153e-05, "loss": 0.678, "step": 11870 }, { "epoch": 9.851452282157677, "grad_norm": 51.18464279174805, "learning_rate": 1.606207468879668e-05, "loss": 1.0877, "step": 11871 }, { "epoch": 9.852282157676349, "grad_norm": 30.98955535888672, "learning_rate": 1.6061742738589214e-05, "loss": 1.0122, "step": 11872 }, { "epoch": 9.853112033195021, "grad_norm": 24.860450744628906, "learning_rate": 1.6061410788381742e-05, "loss": 1.1531, "step": 11873 }, { "epoch": 9.853941908713693, "grad_norm": 45.22724533081055, "learning_rate": 1.6061078838174274e-05, "loss": 1.585, "step": 11874 }, { "epoch": 9.854771784232366, "grad_norm": 28.299135208129883, "learning_rate": 1.6060746887966807e-05, "loss": 1.306, "step": 11875 }, { "epoch": 9.855601659751038, "grad_norm": 16.793230056762695, "learning_rate": 1.6060414937759335e-05, "loss": 0.4952, "step": 11876 }, { "epoch": 9.85643153526971, "grad_norm": 22.708728790283203, "learning_rate": 1.6060082987551867e-05, "loss": 0.8454, "step": 11877 }, { "epoch": 9.857261410788382, "grad_norm": 15.229405403137207, "learning_rate": 1.60597510373444e-05, "loss": 0.4757, "step": 11878 }, { "epoch": 9.858091286307054, "grad_norm": 30.09872817993164, "learning_rate": 1.605941908713693e-05, "loss": 1.2166, "step": 11879 }, { "epoch": 9.858921161825727, "grad_norm": 26.74100112915039, "learning_rate": 1.605908713692946e-05, "loss": 1.1563, "step": 11880 }, { "epoch": 9.859751037344399, "grad_norm": 40.63578414916992, "learning_rate": 1.6058755186721992e-05, "loss": 1.3968, "step": 11881 }, { "epoch": 9.860580912863071, "grad_norm": 19.714357376098633, "learning_rate": 1.6058423236514525e-05, "loss": 0.8281, "step": 11882 }, { "epoch": 9.861410788381743, "grad_norm": 21.90019989013672, "learning_rate": 1.6058091286307057e-05, "loss": 1.4817, "step": 11883 }, { "epoch": 9.862240663900415, "grad_norm": 40.66925048828125, "learning_rate": 1.6057759336099585e-05, "loss": 0.8739, "step": 11884 }, { "epoch": 9.863070539419088, "grad_norm": 34.548004150390625, "learning_rate": 1.6057427385892117e-05, "loss": 0.7815, "step": 11885 }, { "epoch": 9.86390041493776, "grad_norm": 27.454648971557617, "learning_rate": 1.605709543568465e-05, "loss": 0.8298, "step": 11886 }, { "epoch": 9.864730290456432, "grad_norm": 23.91107749938965, "learning_rate": 1.605676348547718e-05, "loss": 1.1235, "step": 11887 }, { "epoch": 9.865560165975104, "grad_norm": 20.25925636291504, "learning_rate": 1.605643153526971e-05, "loss": 0.9153, "step": 11888 }, { "epoch": 9.866390041493776, "grad_norm": 26.652713775634766, "learning_rate": 1.6056099585062242e-05, "loss": 1.221, "step": 11889 }, { "epoch": 9.867219917012449, "grad_norm": 16.34737777709961, "learning_rate": 1.6055767634854775e-05, "loss": 0.518, "step": 11890 }, { "epoch": 9.86804979253112, "grad_norm": 15.349574089050293, "learning_rate": 1.6055435684647303e-05, "loss": 0.5431, "step": 11891 }, { "epoch": 9.868879668049793, "grad_norm": 26.713699340820312, "learning_rate": 1.6055103734439835e-05, "loss": 0.8873, "step": 11892 }, { "epoch": 9.869709543568465, "grad_norm": 17.92741584777832, "learning_rate": 1.6054771784232364e-05, "loss": 0.8026, "step": 11893 }, { "epoch": 9.870539419087137, "grad_norm": 28.067962646484375, "learning_rate": 1.6054439834024896e-05, "loss": 0.5514, "step": 11894 }, { "epoch": 9.87136929460581, "grad_norm": 26.10515785217285, "learning_rate": 1.605410788381743e-05, "loss": 1.1907, "step": 11895 }, { "epoch": 9.872199170124482, "grad_norm": 27.842376708984375, "learning_rate": 1.605377593360996e-05, "loss": 1.4686, "step": 11896 }, { "epoch": 9.873029045643154, "grad_norm": 19.603670120239258, "learning_rate": 1.605344398340249e-05, "loss": 0.7424, "step": 11897 }, { "epoch": 9.873858921161826, "grad_norm": 21.473379135131836, "learning_rate": 1.605311203319502e-05, "loss": 1.1545, "step": 11898 }, { "epoch": 9.874688796680498, "grad_norm": 22.714263916015625, "learning_rate": 1.6052780082987553e-05, "loss": 1.2564, "step": 11899 }, { "epoch": 9.87551867219917, "grad_norm": 26.952295303344727, "learning_rate": 1.6052448132780086e-05, "loss": 1.156, "step": 11900 }, { "epoch": 9.876348547717843, "grad_norm": 28.665321350097656, "learning_rate": 1.6052116182572614e-05, "loss": 0.684, "step": 11901 }, { "epoch": 9.877178423236515, "grad_norm": 26.106657028198242, "learning_rate": 1.6051784232365146e-05, "loss": 0.987, "step": 11902 }, { "epoch": 9.878008298755187, "grad_norm": 32.83277130126953, "learning_rate": 1.605145228215768e-05, "loss": 1.7493, "step": 11903 }, { "epoch": 9.87883817427386, "grad_norm": 21.444974899291992, "learning_rate": 1.605112033195021e-05, "loss": 0.6938, "step": 11904 }, { "epoch": 9.879668049792532, "grad_norm": 29.32794761657715, "learning_rate": 1.605078838174274e-05, "loss": 1.2723, "step": 11905 }, { "epoch": 9.880497925311204, "grad_norm": 30.27927589416504, "learning_rate": 1.605045643153527e-05, "loss": 1.9218, "step": 11906 }, { "epoch": 9.881327800829876, "grad_norm": 31.482574462890625, "learning_rate": 1.6050124481327803e-05, "loss": 0.8421, "step": 11907 }, { "epoch": 9.882157676348548, "grad_norm": 24.89551544189453, "learning_rate": 1.6049792531120336e-05, "loss": 1.033, "step": 11908 }, { "epoch": 9.88298755186722, "grad_norm": 18.008352279663086, "learning_rate": 1.6049460580912864e-05, "loss": 0.7089, "step": 11909 }, { "epoch": 9.883817427385893, "grad_norm": 28.345691680908203, "learning_rate": 1.6049128630705396e-05, "loss": 1.0163, "step": 11910 }, { "epoch": 9.884647302904565, "grad_norm": 27.88383674621582, "learning_rate": 1.6048796680497925e-05, "loss": 0.9491, "step": 11911 }, { "epoch": 9.885477178423237, "grad_norm": 16.78040885925293, "learning_rate": 1.6048464730290457e-05, "loss": 0.6178, "step": 11912 }, { "epoch": 9.88630705394191, "grad_norm": 36.445587158203125, "learning_rate": 1.604813278008299e-05, "loss": 1.4001, "step": 11913 }, { "epoch": 9.887136929460581, "grad_norm": 22.891206741333008, "learning_rate": 1.6047800829875518e-05, "loss": 1.1139, "step": 11914 }, { "epoch": 9.887966804979254, "grad_norm": 35.992794036865234, "learning_rate": 1.604746887966805e-05, "loss": 1.277, "step": 11915 }, { "epoch": 9.888796680497926, "grad_norm": 33.019168853759766, "learning_rate": 1.6047136929460582e-05, "loss": 1.5152, "step": 11916 }, { "epoch": 9.889626556016598, "grad_norm": 32.704002380371094, "learning_rate": 1.6046804979253114e-05, "loss": 0.6809, "step": 11917 }, { "epoch": 9.89045643153527, "grad_norm": 32.63139343261719, "learning_rate": 1.6046473029045643e-05, "loss": 1.2079, "step": 11918 }, { "epoch": 9.891286307053942, "grad_norm": 20.33224105834961, "learning_rate": 1.6046141078838175e-05, "loss": 0.6195, "step": 11919 }, { "epoch": 9.892116182572614, "grad_norm": 41.15065383911133, "learning_rate": 1.6045809128630707e-05, "loss": 1.1933, "step": 11920 }, { "epoch": 9.892946058091287, "grad_norm": 26.4097900390625, "learning_rate": 1.604547717842324e-05, "loss": 1.4414, "step": 11921 }, { "epoch": 9.893775933609959, "grad_norm": 26.15088653564453, "learning_rate": 1.6045145228215768e-05, "loss": 1.4468, "step": 11922 }, { "epoch": 9.894605809128631, "grad_norm": 33.078060150146484, "learning_rate": 1.60448132780083e-05, "loss": 0.6806, "step": 11923 }, { "epoch": 9.895435684647303, "grad_norm": 46.41095733642578, "learning_rate": 1.6044481327800832e-05, "loss": 1.2711, "step": 11924 }, { "epoch": 9.896265560165975, "grad_norm": 42.855491638183594, "learning_rate": 1.6044149377593364e-05, "loss": 1.1128, "step": 11925 }, { "epoch": 9.897095435684648, "grad_norm": 20.575258255004883, "learning_rate": 1.6043817427385893e-05, "loss": 0.3025, "step": 11926 }, { "epoch": 9.89792531120332, "grad_norm": 28.986265182495117, "learning_rate": 1.6043485477178425e-05, "loss": 1.1265, "step": 11927 }, { "epoch": 9.898755186721992, "grad_norm": 34.60319137573242, "learning_rate": 1.6043153526970957e-05, "loss": 1.6098, "step": 11928 }, { "epoch": 9.899585062240664, "grad_norm": 27.08403205871582, "learning_rate": 1.6042821576763486e-05, "loss": 0.8782, "step": 11929 }, { "epoch": 9.900414937759336, "grad_norm": 20.92365074157715, "learning_rate": 1.6042489626556018e-05, "loss": 0.8328, "step": 11930 }, { "epoch": 9.901244813278009, "grad_norm": 106.16189575195312, "learning_rate": 1.604215767634855e-05, "loss": 1.0176, "step": 11931 }, { "epoch": 9.90207468879668, "grad_norm": 20.506820678710938, "learning_rate": 1.604182572614108e-05, "loss": 0.9444, "step": 11932 }, { "epoch": 9.902904564315353, "grad_norm": 21.223482131958008, "learning_rate": 1.604149377593361e-05, "loss": 0.8411, "step": 11933 }, { "epoch": 9.903734439834025, "grad_norm": 30.043010711669922, "learning_rate": 1.604116182572614e-05, "loss": 1.3477, "step": 11934 }, { "epoch": 9.904564315352697, "grad_norm": 36.319671630859375, "learning_rate": 1.6040829875518672e-05, "loss": 1.5566, "step": 11935 }, { "epoch": 9.90539419087137, "grad_norm": 36.39801788330078, "learning_rate": 1.6040497925311204e-05, "loss": 0.7853, "step": 11936 }, { "epoch": 9.906224066390042, "grad_norm": 24.44254493713379, "learning_rate": 1.6040165975103736e-05, "loss": 1.0841, "step": 11937 }, { "epoch": 9.907053941908714, "grad_norm": 32.99134063720703, "learning_rate": 1.6039834024896265e-05, "loss": 1.3889, "step": 11938 }, { "epoch": 9.907883817427386, "grad_norm": 50.003597259521484, "learning_rate": 1.6039502074688797e-05, "loss": 1.0045, "step": 11939 }, { "epoch": 9.908713692946058, "grad_norm": 26.82940673828125, "learning_rate": 1.603917012448133e-05, "loss": 1.3015, "step": 11940 }, { "epoch": 9.90954356846473, "grad_norm": 32.514808654785156, "learning_rate": 1.603883817427386e-05, "loss": 0.9477, "step": 11941 }, { "epoch": 9.910373443983403, "grad_norm": 27.21761703491211, "learning_rate": 1.6038506224066393e-05, "loss": 1.4161, "step": 11942 }, { "epoch": 9.911203319502075, "grad_norm": 17.59282112121582, "learning_rate": 1.6038174273858922e-05, "loss": 1.0799, "step": 11943 }, { "epoch": 9.912033195020747, "grad_norm": 36.08387756347656, "learning_rate": 1.6037842323651454e-05, "loss": 1.1651, "step": 11944 }, { "epoch": 9.91286307053942, "grad_norm": 35.1153564453125, "learning_rate": 1.6037510373443986e-05, "loss": 0.5691, "step": 11945 }, { "epoch": 9.913692946058092, "grad_norm": 25.11214828491211, "learning_rate": 1.603717842323652e-05, "loss": 0.6256, "step": 11946 }, { "epoch": 9.914522821576764, "grad_norm": 21.409717559814453, "learning_rate": 1.6036846473029047e-05, "loss": 0.9351, "step": 11947 }, { "epoch": 9.915352697095436, "grad_norm": 25.442838668823242, "learning_rate": 1.603651452282158e-05, "loss": 1.2372, "step": 11948 }, { "epoch": 9.916182572614108, "grad_norm": 26.614410400390625, "learning_rate": 1.6036182572614108e-05, "loss": 0.6446, "step": 11949 }, { "epoch": 9.91701244813278, "grad_norm": 46.584808349609375, "learning_rate": 1.603585062240664e-05, "loss": 1.0122, "step": 11950 }, { "epoch": 9.917842323651453, "grad_norm": 18.941909790039062, "learning_rate": 1.6035518672199172e-05, "loss": 0.6958, "step": 11951 }, { "epoch": 9.918672199170125, "grad_norm": 21.962692260742188, "learning_rate": 1.60351867219917e-05, "loss": 0.6463, "step": 11952 }, { "epoch": 9.919502074688797, "grad_norm": 23.952980041503906, "learning_rate": 1.6034854771784233e-05, "loss": 1.0281, "step": 11953 }, { "epoch": 9.92033195020747, "grad_norm": 26.662614822387695, "learning_rate": 1.6034522821576765e-05, "loss": 0.8316, "step": 11954 }, { "epoch": 9.921161825726141, "grad_norm": 40.26768493652344, "learning_rate": 1.6034190871369294e-05, "loss": 0.8869, "step": 11955 }, { "epoch": 9.921991701244814, "grad_norm": 32.059810638427734, "learning_rate": 1.6033858921161826e-05, "loss": 1.13, "step": 11956 }, { "epoch": 9.922821576763486, "grad_norm": 26.736278533935547, "learning_rate": 1.6033526970954358e-05, "loss": 0.9505, "step": 11957 }, { "epoch": 9.923651452282158, "grad_norm": 45.71220397949219, "learning_rate": 1.603319502074689e-05, "loss": 1.6825, "step": 11958 }, { "epoch": 9.92448132780083, "grad_norm": 26.751245498657227, "learning_rate": 1.603286307053942e-05, "loss": 1.1854, "step": 11959 }, { "epoch": 9.925311203319502, "grad_norm": 25.3754940032959, "learning_rate": 1.603253112033195e-05, "loss": 0.9835, "step": 11960 }, { "epoch": 9.926141078838175, "grad_norm": 21.883323669433594, "learning_rate": 1.6032199170124483e-05, "loss": 0.9154, "step": 11961 }, { "epoch": 9.926970954356847, "grad_norm": 27.765438079833984, "learning_rate": 1.6031867219917015e-05, "loss": 1.5351, "step": 11962 }, { "epoch": 9.927800829875519, "grad_norm": 28.659914016723633, "learning_rate": 1.6031535269709544e-05, "loss": 0.9053, "step": 11963 }, { "epoch": 9.928630705394191, "grad_norm": 24.055503845214844, "learning_rate": 1.6031203319502076e-05, "loss": 1.1148, "step": 11964 }, { "epoch": 9.929460580912863, "grad_norm": 28.17723846435547, "learning_rate": 1.6030871369294608e-05, "loss": 0.9631, "step": 11965 }, { "epoch": 9.930290456431536, "grad_norm": 30.097946166992188, "learning_rate": 1.603053941908714e-05, "loss": 1.471, "step": 11966 }, { "epoch": 9.931120331950208, "grad_norm": 28.11549949645996, "learning_rate": 1.603020746887967e-05, "loss": 1.2622, "step": 11967 }, { "epoch": 9.93195020746888, "grad_norm": 17.2347412109375, "learning_rate": 1.60298755186722e-05, "loss": 0.9801, "step": 11968 }, { "epoch": 9.932780082987552, "grad_norm": 20.33702850341797, "learning_rate": 1.6029543568464733e-05, "loss": 0.7225, "step": 11969 }, { "epoch": 9.933609958506224, "grad_norm": 21.60774040222168, "learning_rate": 1.6029211618257262e-05, "loss": 0.7626, "step": 11970 }, { "epoch": 9.934439834024896, "grad_norm": 19.98735237121582, "learning_rate": 1.6028879668049794e-05, "loss": 0.5953, "step": 11971 }, { "epoch": 9.935269709543569, "grad_norm": 27.153982162475586, "learning_rate": 1.6028547717842323e-05, "loss": 1.4587, "step": 11972 }, { "epoch": 9.936099585062241, "grad_norm": 19.532594680786133, "learning_rate": 1.6028215767634855e-05, "loss": 0.6811, "step": 11973 }, { "epoch": 9.936929460580913, "grad_norm": 20.546175003051758, "learning_rate": 1.6027883817427387e-05, "loss": 0.9387, "step": 11974 }, { "epoch": 9.937759336099585, "grad_norm": 23.61428451538086, "learning_rate": 1.602755186721992e-05, "loss": 0.8253, "step": 11975 }, { "epoch": 9.938589211618257, "grad_norm": 20.693836212158203, "learning_rate": 1.6027219917012448e-05, "loss": 0.7399, "step": 11976 }, { "epoch": 9.93941908713693, "grad_norm": 20.181283950805664, "learning_rate": 1.602688796680498e-05, "loss": 0.8733, "step": 11977 }, { "epoch": 9.940248962655602, "grad_norm": 21.500635147094727, "learning_rate": 1.6026556016597512e-05, "loss": 1.0016, "step": 11978 }, { "epoch": 9.941078838174274, "grad_norm": 31.469219207763672, "learning_rate": 1.6026224066390044e-05, "loss": 0.9104, "step": 11979 }, { "epoch": 9.941908713692946, "grad_norm": 33.76664352416992, "learning_rate": 1.6025892116182573e-05, "loss": 1.1329, "step": 11980 }, { "epoch": 9.942738589211618, "grad_norm": 64.67681884765625, "learning_rate": 1.6025560165975105e-05, "loss": 1.2789, "step": 11981 }, { "epoch": 9.94356846473029, "grad_norm": 83.74832153320312, "learning_rate": 1.6025228215767637e-05, "loss": 1.5867, "step": 11982 }, { "epoch": 9.944398340248963, "grad_norm": 19.545316696166992, "learning_rate": 1.602489626556017e-05, "loss": 0.7295, "step": 11983 }, { "epoch": 9.945228215767635, "grad_norm": 22.45035743713379, "learning_rate": 1.6024564315352698e-05, "loss": 1.3067, "step": 11984 }, { "epoch": 9.946058091286307, "grad_norm": 35.871063232421875, "learning_rate": 1.602423236514523e-05, "loss": 1.1155, "step": 11985 }, { "epoch": 9.94688796680498, "grad_norm": 53.7530517578125, "learning_rate": 1.6023900414937762e-05, "loss": 1.6721, "step": 11986 }, { "epoch": 9.947717842323652, "grad_norm": 35.169044494628906, "learning_rate": 1.6023568464730294e-05, "loss": 0.981, "step": 11987 }, { "epoch": 9.948547717842324, "grad_norm": 33.19559860229492, "learning_rate": 1.6023236514522823e-05, "loss": 1.6802, "step": 11988 }, { "epoch": 9.949377593360996, "grad_norm": 35.382503509521484, "learning_rate": 1.6022904564315355e-05, "loss": 0.8521, "step": 11989 }, { "epoch": 9.950207468879668, "grad_norm": 24.765905380249023, "learning_rate": 1.6022572614107884e-05, "loss": 1.0171, "step": 11990 }, { "epoch": 9.95103734439834, "grad_norm": 27.04469871520996, "learning_rate": 1.6022240663900416e-05, "loss": 1.308, "step": 11991 }, { "epoch": 9.951867219917013, "grad_norm": 27.8568058013916, "learning_rate": 1.6021908713692948e-05, "loss": 1.0864, "step": 11992 }, { "epoch": 9.952697095435685, "grad_norm": 40.9911994934082, "learning_rate": 1.6021576763485477e-05, "loss": 1.4899, "step": 11993 }, { "epoch": 9.953526970954357, "grad_norm": 31.495132446289062, "learning_rate": 1.602124481327801e-05, "loss": 1.2226, "step": 11994 }, { "epoch": 9.95435684647303, "grad_norm": 33.777950286865234, "learning_rate": 1.602091286307054e-05, "loss": 0.8999, "step": 11995 }, { "epoch": 9.955186721991701, "grad_norm": 49.60525894165039, "learning_rate": 1.6020580912863073e-05, "loss": 1.3951, "step": 11996 }, { "epoch": 9.956016597510374, "grad_norm": 16.766063690185547, "learning_rate": 1.6020248962655602e-05, "loss": 0.7783, "step": 11997 }, { "epoch": 9.956846473029046, "grad_norm": 24.52841567993164, "learning_rate": 1.6019917012448134e-05, "loss": 0.7411, "step": 11998 }, { "epoch": 9.957676348547718, "grad_norm": 20.595081329345703, "learning_rate": 1.6019585062240666e-05, "loss": 0.7734, "step": 11999 }, { "epoch": 9.95850622406639, "grad_norm": 17.112579345703125, "learning_rate": 1.6019253112033198e-05, "loss": 0.9709, "step": 12000 }, { "epoch": 9.959336099585062, "grad_norm": 27.64229965209961, "learning_rate": 1.6018921161825727e-05, "loss": 1.068, "step": 12001 }, { "epoch": 9.960165975103735, "grad_norm": 36.296905517578125, "learning_rate": 1.601858921161826e-05, "loss": 1.6812, "step": 12002 }, { "epoch": 9.960995850622407, "grad_norm": 17.553129196166992, "learning_rate": 1.601825726141079e-05, "loss": 1.0103, "step": 12003 }, { "epoch": 9.961825726141079, "grad_norm": 19.483007431030273, "learning_rate": 1.6017925311203323e-05, "loss": 0.897, "step": 12004 }, { "epoch": 9.962655601659751, "grad_norm": 26.66695213317871, "learning_rate": 1.6017593360995852e-05, "loss": 1.1768, "step": 12005 }, { "epoch": 9.963485477178423, "grad_norm": 19.48567008972168, "learning_rate": 1.6017261410788384e-05, "loss": 0.6176, "step": 12006 }, { "epoch": 9.964315352697096, "grad_norm": 23.801958084106445, "learning_rate": 1.6016929460580916e-05, "loss": 1.0172, "step": 12007 }, { "epoch": 9.965145228215768, "grad_norm": 22.684585571289062, "learning_rate": 1.6016597510373445e-05, "loss": 1.1645, "step": 12008 }, { "epoch": 9.96597510373444, "grad_norm": 25.391233444213867, "learning_rate": 1.6016265560165977e-05, "loss": 1.8672, "step": 12009 }, { "epoch": 9.966804979253112, "grad_norm": 31.447614669799805, "learning_rate": 1.6015933609958506e-05, "loss": 1.0297, "step": 12010 }, { "epoch": 9.967634854771784, "grad_norm": 28.114418029785156, "learning_rate": 1.6015601659751038e-05, "loss": 0.7516, "step": 12011 }, { "epoch": 9.968464730290457, "grad_norm": 37.26567077636719, "learning_rate": 1.601526970954357e-05, "loss": 1.2447, "step": 12012 }, { "epoch": 9.969294605809129, "grad_norm": 35.98868179321289, "learning_rate": 1.60149377593361e-05, "loss": 1.1409, "step": 12013 }, { "epoch": 9.970124481327801, "grad_norm": 21.995044708251953, "learning_rate": 1.601460580912863e-05, "loss": 1.2312, "step": 12014 }, { "epoch": 9.970954356846473, "grad_norm": 22.445955276489258, "learning_rate": 1.6014273858921163e-05, "loss": 0.753, "step": 12015 }, { "epoch": 9.971784232365145, "grad_norm": 22.539682388305664, "learning_rate": 1.6013941908713695e-05, "loss": 0.8482, "step": 12016 }, { "epoch": 9.972614107883818, "grad_norm": 59.662200927734375, "learning_rate": 1.6013609958506224e-05, "loss": 0.7231, "step": 12017 }, { "epoch": 9.97344398340249, "grad_norm": 36.8957633972168, "learning_rate": 1.6013278008298756e-05, "loss": 1.7267, "step": 12018 }, { "epoch": 9.974273858921162, "grad_norm": 19.30508041381836, "learning_rate": 1.6012946058091288e-05, "loss": 0.8869, "step": 12019 }, { "epoch": 9.975103734439834, "grad_norm": 20.5666561126709, "learning_rate": 1.601261410788382e-05, "loss": 0.5982, "step": 12020 }, { "epoch": 9.975933609958506, "grad_norm": 47.25662612915039, "learning_rate": 1.601228215767635e-05, "loss": 1.5497, "step": 12021 }, { "epoch": 9.976763485477179, "grad_norm": 27.39366912841797, "learning_rate": 1.601195020746888e-05, "loss": 1.0664, "step": 12022 }, { "epoch": 9.97759336099585, "grad_norm": 40.074588775634766, "learning_rate": 1.6011618257261413e-05, "loss": 2.2387, "step": 12023 }, { "epoch": 9.978423236514523, "grad_norm": 26.27177619934082, "learning_rate": 1.6011286307053945e-05, "loss": 1.0907, "step": 12024 }, { "epoch": 9.979253112033195, "grad_norm": 37.578067779541016, "learning_rate": 1.6010954356846477e-05, "loss": 1.0683, "step": 12025 }, { "epoch": 9.980082987551867, "grad_norm": 35.73238754272461, "learning_rate": 1.6010622406639006e-05, "loss": 1.6521, "step": 12026 }, { "epoch": 9.98091286307054, "grad_norm": 31.421218872070312, "learning_rate": 1.6010290456431538e-05, "loss": 1.2069, "step": 12027 }, { "epoch": 9.981742738589212, "grad_norm": 28.051908493041992, "learning_rate": 1.6009958506224067e-05, "loss": 1.0429, "step": 12028 }, { "epoch": 9.982572614107884, "grad_norm": 101.3148193359375, "learning_rate": 1.60096265560166e-05, "loss": 2.2383, "step": 12029 }, { "epoch": 9.983402489626556, "grad_norm": 20.93601417541504, "learning_rate": 1.600929460580913e-05, "loss": 0.8861, "step": 12030 }, { "epoch": 9.984232365145228, "grad_norm": 25.43880271911621, "learning_rate": 1.600896265560166e-05, "loss": 1.743, "step": 12031 }, { "epoch": 9.9850622406639, "grad_norm": 33.492950439453125, "learning_rate": 1.600863070539419e-05, "loss": 0.8182, "step": 12032 }, { "epoch": 9.985892116182573, "grad_norm": 19.352937698364258, "learning_rate": 1.6008298755186724e-05, "loss": 0.8149, "step": 12033 }, { "epoch": 9.986721991701245, "grad_norm": 18.392732620239258, "learning_rate": 1.6007966804979252e-05, "loss": 0.7503, "step": 12034 }, { "epoch": 9.987551867219917, "grad_norm": 32.049251556396484, "learning_rate": 1.6007634854771785e-05, "loss": 0.785, "step": 12035 }, { "epoch": 9.98838174273859, "grad_norm": 19.921768188476562, "learning_rate": 1.6007302904564317e-05, "loss": 1.0652, "step": 12036 }, { "epoch": 9.989211618257261, "grad_norm": 27.652517318725586, "learning_rate": 1.600697095435685e-05, "loss": 0.9101, "step": 12037 }, { "epoch": 9.990041493775934, "grad_norm": 16.709041595458984, "learning_rate": 1.6006639004149377e-05, "loss": 0.7162, "step": 12038 }, { "epoch": 9.990871369294606, "grad_norm": 28.309814453125, "learning_rate": 1.600630705394191e-05, "loss": 1.0617, "step": 12039 }, { "epoch": 9.991701244813278, "grad_norm": 34.74721908569336, "learning_rate": 1.600597510373444e-05, "loss": 1.3273, "step": 12040 }, { "epoch": 9.99253112033195, "grad_norm": 24.861879348754883, "learning_rate": 1.6005643153526974e-05, "loss": 0.7467, "step": 12041 }, { "epoch": 9.993360995850622, "grad_norm": 35.078399658203125, "learning_rate": 1.6005311203319502e-05, "loss": 1.4712, "step": 12042 }, { "epoch": 9.994190871369295, "grad_norm": 21.556310653686523, "learning_rate": 1.6004979253112035e-05, "loss": 1.1843, "step": 12043 }, { "epoch": 9.995020746887967, "grad_norm": 37.70853042602539, "learning_rate": 1.6004647302904567e-05, "loss": 1.0365, "step": 12044 }, { "epoch": 9.995850622406639, "grad_norm": 18.414499282836914, "learning_rate": 1.60043153526971e-05, "loss": 0.6333, "step": 12045 }, { "epoch": 9.996680497925311, "grad_norm": 30.020353317260742, "learning_rate": 1.6003983402489628e-05, "loss": 0.808, "step": 12046 }, { "epoch": 9.997510373443983, "grad_norm": 16.833707809448242, "learning_rate": 1.600365145228216e-05, "loss": 0.4463, "step": 12047 }, { "epoch": 9.998340248962656, "grad_norm": 19.953264236450195, "learning_rate": 1.6003319502074692e-05, "loss": 0.5765, "step": 12048 }, { "epoch": 9.999170124481328, "grad_norm": 34.36959457397461, "learning_rate": 1.600298755186722e-05, "loss": 2.2927, "step": 12049 }, { "epoch": 10.0, "grad_norm": 17.05138397216797, "learning_rate": 1.6002655601659753e-05, "loss": 0.7609, "step": 12050 }, { "epoch": 10.000829875518672, "grad_norm": 13.91667366027832, "learning_rate": 1.600232365145228e-05, "loss": 0.8216, "step": 12051 }, { "epoch": 10.001659751037344, "grad_norm": 20.473928451538086, "learning_rate": 1.6001991701244813e-05, "loss": 1.4498, "step": 12052 }, { "epoch": 10.002489626556017, "grad_norm": 33.83143997192383, "learning_rate": 1.6001659751037346e-05, "loss": 0.8022, "step": 12053 }, { "epoch": 10.003319502074689, "grad_norm": 23.68581199645996, "learning_rate": 1.6001327800829878e-05, "loss": 0.745, "step": 12054 }, { "epoch": 10.004149377593361, "grad_norm": 25.733190536499023, "learning_rate": 1.6000995850622406e-05, "loss": 1.6424, "step": 12055 }, { "epoch": 10.004979253112033, "grad_norm": 23.721057891845703, "learning_rate": 1.600066390041494e-05, "loss": 1.4024, "step": 12056 }, { "epoch": 10.005809128630705, "grad_norm": 32.0172119140625, "learning_rate": 1.600033195020747e-05, "loss": 1.164, "step": 12057 }, { "epoch": 10.006639004149378, "grad_norm": 16.46984100341797, "learning_rate": 1.6000000000000003e-05, "loss": 0.6453, "step": 12058 }, { "epoch": 10.00746887966805, "grad_norm": 19.551712036132812, "learning_rate": 1.599966804979253e-05, "loss": 0.6587, "step": 12059 }, { "epoch": 10.008298755186722, "grad_norm": 42.38349914550781, "learning_rate": 1.5999336099585063e-05, "loss": 1.6375, "step": 12060 }, { "epoch": 10.009128630705394, "grad_norm": 15.94865608215332, "learning_rate": 1.5999004149377596e-05, "loss": 0.6334, "step": 12061 }, { "epoch": 10.009958506224066, "grad_norm": 20.496362686157227, "learning_rate": 1.5998672199170128e-05, "loss": 0.6504, "step": 12062 }, { "epoch": 10.010788381742739, "grad_norm": 24.839326858520508, "learning_rate": 1.5998340248962656e-05, "loss": 0.7057, "step": 12063 }, { "epoch": 10.01161825726141, "grad_norm": 14.210500717163086, "learning_rate": 1.599800829875519e-05, "loss": 0.4818, "step": 12064 }, { "epoch": 10.012448132780083, "grad_norm": 24.726926803588867, "learning_rate": 1.599767634854772e-05, "loss": 1.1883, "step": 12065 }, { "epoch": 10.013278008298755, "grad_norm": 18.57189178466797, "learning_rate": 1.599734439834025e-05, "loss": 0.8509, "step": 12066 }, { "epoch": 10.014107883817427, "grad_norm": 31.87952423095703, "learning_rate": 1.599701244813278e-05, "loss": 0.7599, "step": 12067 }, { "epoch": 10.0149377593361, "grad_norm": 23.684463500976562, "learning_rate": 1.5996680497925314e-05, "loss": 0.5142, "step": 12068 }, { "epoch": 10.015767634854772, "grad_norm": 18.230676651000977, "learning_rate": 1.5996348547717842e-05, "loss": 0.4737, "step": 12069 }, { "epoch": 10.016597510373444, "grad_norm": 16.666091918945312, "learning_rate": 1.5996016597510374e-05, "loss": 0.45, "step": 12070 }, { "epoch": 10.017427385892116, "grad_norm": 40.04541778564453, "learning_rate": 1.5995684647302907e-05, "loss": 0.8802, "step": 12071 }, { "epoch": 10.018257261410788, "grad_norm": 36.91504669189453, "learning_rate": 1.5995352697095435e-05, "loss": 1.0392, "step": 12072 }, { "epoch": 10.01908713692946, "grad_norm": 34.74126052856445, "learning_rate": 1.5995020746887967e-05, "loss": 0.9484, "step": 12073 }, { "epoch": 10.019917012448133, "grad_norm": 34.20868682861328, "learning_rate": 1.59946887966805e-05, "loss": 1.1391, "step": 12074 }, { "epoch": 10.020746887966805, "grad_norm": 36.0039176940918, "learning_rate": 1.599435684647303e-05, "loss": 1.8019, "step": 12075 }, { "epoch": 10.021576763485477, "grad_norm": 26.721385955810547, "learning_rate": 1.599402489626556e-05, "loss": 1.0026, "step": 12076 }, { "epoch": 10.02240663900415, "grad_norm": 23.664505004882812, "learning_rate": 1.5993692946058092e-05, "loss": 0.8374, "step": 12077 }, { "epoch": 10.023236514522821, "grad_norm": 31.695812225341797, "learning_rate": 1.5993360995850624e-05, "loss": 0.5637, "step": 12078 }, { "epoch": 10.024066390041494, "grad_norm": 33.40986633300781, "learning_rate": 1.5993029045643157e-05, "loss": 1.1754, "step": 12079 }, { "epoch": 10.024896265560166, "grad_norm": 29.97395896911621, "learning_rate": 1.5992697095435685e-05, "loss": 1.3888, "step": 12080 }, { "epoch": 10.025726141078838, "grad_norm": 44.78424072265625, "learning_rate": 1.5992365145228217e-05, "loss": 1.288, "step": 12081 }, { "epoch": 10.02655601659751, "grad_norm": 52.28025817871094, "learning_rate": 1.599203319502075e-05, "loss": 1.1089, "step": 12082 }, { "epoch": 10.027385892116182, "grad_norm": 24.928468704223633, "learning_rate": 1.599170124481328e-05, "loss": 1.3163, "step": 12083 }, { "epoch": 10.028215767634855, "grad_norm": 30.75466537475586, "learning_rate": 1.599136929460581e-05, "loss": 0.6958, "step": 12084 }, { "epoch": 10.029045643153527, "grad_norm": 26.714080810546875, "learning_rate": 1.5991037344398342e-05, "loss": 1.228, "step": 12085 }, { "epoch": 10.029875518672199, "grad_norm": 27.626211166381836, "learning_rate": 1.5990705394190875e-05, "loss": 0.9407, "step": 12086 }, { "epoch": 10.030705394190871, "grad_norm": 21.552772521972656, "learning_rate": 1.5990373443983403e-05, "loss": 0.6204, "step": 12087 }, { "epoch": 10.031535269709543, "grad_norm": 29.736209869384766, "learning_rate": 1.5990041493775935e-05, "loss": 1.4072, "step": 12088 }, { "epoch": 10.032365145228216, "grad_norm": 21.83477210998535, "learning_rate": 1.5989709543568464e-05, "loss": 0.4913, "step": 12089 }, { "epoch": 10.033195020746888, "grad_norm": 30.538604736328125, "learning_rate": 1.5989377593360996e-05, "loss": 1.0972, "step": 12090 }, { "epoch": 10.03402489626556, "grad_norm": 34.09442138671875, "learning_rate": 1.598904564315353e-05, "loss": 1.3182, "step": 12091 }, { "epoch": 10.034854771784232, "grad_norm": 16.703359603881836, "learning_rate": 1.5988713692946057e-05, "loss": 0.7317, "step": 12092 }, { "epoch": 10.035684647302904, "grad_norm": 13.031485557556152, "learning_rate": 1.598838174273859e-05, "loss": 0.4715, "step": 12093 }, { "epoch": 10.036514522821577, "grad_norm": 18.46248435974121, "learning_rate": 1.598804979253112e-05, "loss": 0.5666, "step": 12094 }, { "epoch": 10.037344398340249, "grad_norm": 22.630149841308594, "learning_rate": 1.5987717842323653e-05, "loss": 0.8243, "step": 12095 }, { "epoch": 10.038174273858921, "grad_norm": 38.261817932128906, "learning_rate": 1.5987385892116182e-05, "loss": 1.2962, "step": 12096 }, { "epoch": 10.039004149377593, "grad_norm": 20.641347885131836, "learning_rate": 1.5987053941908714e-05, "loss": 0.9267, "step": 12097 }, { "epoch": 10.039834024896265, "grad_norm": 28.321237564086914, "learning_rate": 1.5986721991701246e-05, "loss": 1.2664, "step": 12098 }, { "epoch": 10.040663900414938, "grad_norm": 21.765380859375, "learning_rate": 1.598639004149378e-05, "loss": 0.6361, "step": 12099 }, { "epoch": 10.04149377593361, "grad_norm": 26.00392723083496, "learning_rate": 1.5986058091286307e-05, "loss": 0.8245, "step": 12100 }, { "epoch": 10.042323651452282, "grad_norm": 41.96660232543945, "learning_rate": 1.598572614107884e-05, "loss": 2.0395, "step": 12101 }, { "epoch": 10.043153526970954, "grad_norm": 36.80487060546875, "learning_rate": 1.598539419087137e-05, "loss": 0.9857, "step": 12102 }, { "epoch": 10.043983402489626, "grad_norm": 36.895233154296875, "learning_rate": 1.5985062240663903e-05, "loss": 0.8897, "step": 12103 }, { "epoch": 10.044813278008299, "grad_norm": 25.331523895263672, "learning_rate": 1.5984730290456432e-05, "loss": 1.0462, "step": 12104 }, { "epoch": 10.04564315352697, "grad_norm": 20.785139083862305, "learning_rate": 1.5984398340248964e-05, "loss": 0.9118, "step": 12105 }, { "epoch": 10.046473029045643, "grad_norm": 43.90494155883789, "learning_rate": 1.5984066390041496e-05, "loss": 1.2556, "step": 12106 }, { "epoch": 10.047302904564315, "grad_norm": 34.50067901611328, "learning_rate": 1.5983734439834025e-05, "loss": 1.2454, "step": 12107 }, { "epoch": 10.048132780082987, "grad_norm": 27.60527229309082, "learning_rate": 1.5983402489626557e-05, "loss": 0.7689, "step": 12108 }, { "epoch": 10.04896265560166, "grad_norm": 45.19003677368164, "learning_rate": 1.598307053941909e-05, "loss": 1.3714, "step": 12109 }, { "epoch": 10.049792531120332, "grad_norm": 62.22026824951172, "learning_rate": 1.5982738589211618e-05, "loss": 0.9459, "step": 12110 }, { "epoch": 10.050622406639004, "grad_norm": 27.207231521606445, "learning_rate": 1.598240663900415e-05, "loss": 0.8345, "step": 12111 }, { "epoch": 10.051452282157676, "grad_norm": 34.728389739990234, "learning_rate": 1.5982074688796682e-05, "loss": 0.718, "step": 12112 }, { "epoch": 10.052282157676348, "grad_norm": 29.44085121154785, "learning_rate": 1.598174273858921e-05, "loss": 0.8525, "step": 12113 }, { "epoch": 10.05311203319502, "grad_norm": 50.042484283447266, "learning_rate": 1.5981410788381743e-05, "loss": 1.7962, "step": 12114 }, { "epoch": 10.053941908713693, "grad_norm": 50.06394958496094, "learning_rate": 1.5981078838174275e-05, "loss": 1.2636, "step": 12115 }, { "epoch": 10.054771784232365, "grad_norm": 56.89571762084961, "learning_rate": 1.5980746887966807e-05, "loss": 1.0043, "step": 12116 }, { "epoch": 10.055601659751037, "grad_norm": 35.173805236816406, "learning_rate": 1.5980414937759336e-05, "loss": 0.934, "step": 12117 }, { "epoch": 10.05643153526971, "grad_norm": 38.288700103759766, "learning_rate": 1.5980082987551868e-05, "loss": 1.4126, "step": 12118 }, { "epoch": 10.057261410788382, "grad_norm": 28.912643432617188, "learning_rate": 1.59797510373444e-05, "loss": 1.1996, "step": 12119 }, { "epoch": 10.058091286307054, "grad_norm": 20.415433883666992, "learning_rate": 1.5979419087136932e-05, "loss": 0.8364, "step": 12120 }, { "epoch": 10.058921161825726, "grad_norm": 18.133020401000977, "learning_rate": 1.597908713692946e-05, "loss": 0.6089, "step": 12121 }, { "epoch": 10.059751037344398, "grad_norm": 45.19356155395508, "learning_rate": 1.5978755186721993e-05, "loss": 1.1143, "step": 12122 }, { "epoch": 10.06058091286307, "grad_norm": 38.33869171142578, "learning_rate": 1.5978423236514525e-05, "loss": 1.3938, "step": 12123 }, { "epoch": 10.061410788381743, "grad_norm": 27.06639862060547, "learning_rate": 1.5978091286307057e-05, "loss": 1.1421, "step": 12124 }, { "epoch": 10.062240663900415, "grad_norm": 23.092805862426758, "learning_rate": 1.5977759336099586e-05, "loss": 0.6235, "step": 12125 }, { "epoch": 10.063070539419087, "grad_norm": 23.35403060913086, "learning_rate": 1.5977427385892118e-05, "loss": 0.4152, "step": 12126 }, { "epoch": 10.063900414937759, "grad_norm": 15.52885627746582, "learning_rate": 1.5977095435684647e-05, "loss": 0.3033, "step": 12127 }, { "epoch": 10.064730290456431, "grad_norm": 25.016908645629883, "learning_rate": 1.597676348547718e-05, "loss": 1.0878, "step": 12128 }, { "epoch": 10.065560165975104, "grad_norm": 32.97016906738281, "learning_rate": 1.597643153526971e-05, "loss": 0.8166, "step": 12129 }, { "epoch": 10.066390041493776, "grad_norm": 29.1180477142334, "learning_rate": 1.597609958506224e-05, "loss": 1.2442, "step": 12130 }, { "epoch": 10.067219917012448, "grad_norm": 65.81594848632812, "learning_rate": 1.5975767634854772e-05, "loss": 0.7591, "step": 12131 }, { "epoch": 10.06804979253112, "grad_norm": 44.0255012512207, "learning_rate": 1.5975435684647304e-05, "loss": 1.3351, "step": 12132 }, { "epoch": 10.068879668049792, "grad_norm": 30.014808654785156, "learning_rate": 1.5975103734439836e-05, "loss": 0.5902, "step": 12133 }, { "epoch": 10.069709543568464, "grad_norm": 67.40177917480469, "learning_rate": 1.5974771784232365e-05, "loss": 0.9336, "step": 12134 }, { "epoch": 10.070539419087137, "grad_norm": 26.56819725036621, "learning_rate": 1.5974439834024897e-05, "loss": 0.8934, "step": 12135 }, { "epoch": 10.071369294605809, "grad_norm": 30.321805953979492, "learning_rate": 1.597410788381743e-05, "loss": 1.4463, "step": 12136 }, { "epoch": 10.072199170124481, "grad_norm": 32.974891662597656, "learning_rate": 1.597377593360996e-05, "loss": 1.118, "step": 12137 }, { "epoch": 10.073029045643153, "grad_norm": 16.528907775878906, "learning_rate": 1.597344398340249e-05, "loss": 0.7303, "step": 12138 }, { "epoch": 10.073858921161825, "grad_norm": 56.172332763671875, "learning_rate": 1.5973112033195022e-05, "loss": 1.4436, "step": 12139 }, { "epoch": 10.074688796680498, "grad_norm": 25.844032287597656, "learning_rate": 1.5972780082987554e-05, "loss": 1.0348, "step": 12140 }, { "epoch": 10.07551867219917, "grad_norm": 33.57912063598633, "learning_rate": 1.5972448132780086e-05, "loss": 0.7064, "step": 12141 }, { "epoch": 10.076348547717842, "grad_norm": 23.52741050720215, "learning_rate": 1.5972116182572615e-05, "loss": 0.6822, "step": 12142 }, { "epoch": 10.077178423236514, "grad_norm": 27.274633407592773, "learning_rate": 1.5971784232365147e-05, "loss": 0.7556, "step": 12143 }, { "epoch": 10.078008298755186, "grad_norm": 28.83844757080078, "learning_rate": 1.597145228215768e-05, "loss": 0.8558, "step": 12144 }, { "epoch": 10.078838174273859, "grad_norm": 33.80609893798828, "learning_rate": 1.5971120331950208e-05, "loss": 0.8061, "step": 12145 }, { "epoch": 10.07966804979253, "grad_norm": 27.28614044189453, "learning_rate": 1.597078838174274e-05, "loss": 0.8072, "step": 12146 }, { "epoch": 10.080497925311203, "grad_norm": 22.95891571044922, "learning_rate": 1.5970456431535272e-05, "loss": 0.9444, "step": 12147 }, { "epoch": 10.081327800829875, "grad_norm": 16.271329879760742, "learning_rate": 1.59701244813278e-05, "loss": 0.5253, "step": 12148 }, { "epoch": 10.082157676348547, "grad_norm": 29.72582244873047, "learning_rate": 1.5969792531120333e-05, "loss": 0.8576, "step": 12149 }, { "epoch": 10.08298755186722, "grad_norm": 59.134159088134766, "learning_rate": 1.5969460580912862e-05, "loss": 0.8641, "step": 12150 }, { "epoch": 10.083817427385892, "grad_norm": 48.793418884277344, "learning_rate": 1.5969128630705394e-05, "loss": 1.163, "step": 12151 }, { "epoch": 10.084647302904564, "grad_norm": 24.35348129272461, "learning_rate": 1.5968796680497926e-05, "loss": 1.2344, "step": 12152 }, { "epoch": 10.085477178423236, "grad_norm": 25.708080291748047, "learning_rate": 1.5968464730290458e-05, "loss": 1.5107, "step": 12153 }, { "epoch": 10.086307053941908, "grad_norm": 34.05297088623047, "learning_rate": 1.596813278008299e-05, "loss": 1.0931, "step": 12154 }, { "epoch": 10.08713692946058, "grad_norm": 27.60140609741211, "learning_rate": 1.596780082987552e-05, "loss": 0.7069, "step": 12155 }, { "epoch": 10.087966804979253, "grad_norm": 28.064208984375, "learning_rate": 1.596746887966805e-05, "loss": 0.3887, "step": 12156 }, { "epoch": 10.088796680497925, "grad_norm": 30.305192947387695, "learning_rate": 1.5967136929460583e-05, "loss": 0.9372, "step": 12157 }, { "epoch": 10.089626556016597, "grad_norm": 19.53498077392578, "learning_rate": 1.5966804979253115e-05, "loss": 0.5575, "step": 12158 }, { "epoch": 10.09045643153527, "grad_norm": 95.44275665283203, "learning_rate": 1.5966473029045644e-05, "loss": 2.367, "step": 12159 }, { "epoch": 10.091286307053942, "grad_norm": 36.76862335205078, "learning_rate": 1.5966141078838176e-05, "loss": 1.3905, "step": 12160 }, { "epoch": 10.092116182572614, "grad_norm": 24.538198471069336, "learning_rate": 1.5965809128630708e-05, "loss": 0.7364, "step": 12161 }, { "epoch": 10.092946058091286, "grad_norm": 15.352672576904297, "learning_rate": 1.596547717842324e-05, "loss": 0.4303, "step": 12162 }, { "epoch": 10.093775933609958, "grad_norm": 50.53080749511719, "learning_rate": 1.596514522821577e-05, "loss": 0.5371, "step": 12163 }, { "epoch": 10.09460580912863, "grad_norm": 15.37524700164795, "learning_rate": 1.59648132780083e-05, "loss": 0.3962, "step": 12164 }, { "epoch": 10.095435684647303, "grad_norm": 16.94193458557129, "learning_rate": 1.5964481327800833e-05, "loss": 0.9859, "step": 12165 }, { "epoch": 10.096265560165975, "grad_norm": 142.84693908691406, "learning_rate": 1.5964149377593362e-05, "loss": 0.5318, "step": 12166 }, { "epoch": 10.097095435684647, "grad_norm": 37.60575485229492, "learning_rate": 1.5963817427385894e-05, "loss": 0.9914, "step": 12167 }, { "epoch": 10.09792531120332, "grad_norm": 39.5369987487793, "learning_rate": 1.5963485477178423e-05, "loss": 1.2713, "step": 12168 }, { "epoch": 10.098755186721991, "grad_norm": 19.181875228881836, "learning_rate": 1.5963153526970955e-05, "loss": 0.7444, "step": 12169 }, { "epoch": 10.099585062240664, "grad_norm": 21.249759674072266, "learning_rate": 1.5962821576763487e-05, "loss": 0.3833, "step": 12170 }, { "epoch": 10.100414937759336, "grad_norm": 52.957332611083984, "learning_rate": 1.5962489626556016e-05, "loss": 0.8216, "step": 12171 }, { "epoch": 10.101244813278008, "grad_norm": 22.79182243347168, "learning_rate": 1.5962157676348548e-05, "loss": 1.0001, "step": 12172 }, { "epoch": 10.10207468879668, "grad_norm": 16.417362213134766, "learning_rate": 1.596182572614108e-05, "loss": 0.7948, "step": 12173 }, { "epoch": 10.102904564315352, "grad_norm": 21.751079559326172, "learning_rate": 1.5961493775933612e-05, "loss": 0.9154, "step": 12174 }, { "epoch": 10.103734439834025, "grad_norm": 24.302217483520508, "learning_rate": 1.596116182572614e-05, "loss": 1.0256, "step": 12175 }, { "epoch": 10.104564315352697, "grad_norm": 26.127666473388672, "learning_rate": 1.5960829875518673e-05, "loss": 1.0508, "step": 12176 }, { "epoch": 10.105394190871369, "grad_norm": 41.248939514160156, "learning_rate": 1.5960497925311205e-05, "loss": 1.1887, "step": 12177 }, { "epoch": 10.106224066390041, "grad_norm": 49.01673889160156, "learning_rate": 1.5960165975103737e-05, "loss": 0.5902, "step": 12178 }, { "epoch": 10.107053941908713, "grad_norm": 23.401782989501953, "learning_rate": 1.5959834024896266e-05, "loss": 1.4705, "step": 12179 }, { "epoch": 10.107883817427386, "grad_norm": 25.80424690246582, "learning_rate": 1.5959502074688798e-05, "loss": 0.8416, "step": 12180 }, { "epoch": 10.108713692946058, "grad_norm": 29.610618591308594, "learning_rate": 1.595917012448133e-05, "loss": 1.1351, "step": 12181 }, { "epoch": 10.10954356846473, "grad_norm": 31.29954719543457, "learning_rate": 1.5958838174273862e-05, "loss": 1.2284, "step": 12182 }, { "epoch": 10.110373443983402, "grad_norm": 22.15643882751465, "learning_rate": 1.595850622406639e-05, "loss": 0.6865, "step": 12183 }, { "epoch": 10.111203319502074, "grad_norm": 23.28998374938965, "learning_rate": 1.5958174273858923e-05, "loss": 0.7446, "step": 12184 }, { "epoch": 10.112033195020746, "grad_norm": 34.56953811645508, "learning_rate": 1.5957842323651455e-05, "loss": 0.8255, "step": 12185 }, { "epoch": 10.112863070539419, "grad_norm": 26.765583038330078, "learning_rate": 1.5957510373443984e-05, "loss": 0.9664, "step": 12186 }, { "epoch": 10.11369294605809, "grad_norm": 24.60869026184082, "learning_rate": 1.5957178423236516e-05, "loss": 1.0625, "step": 12187 }, { "epoch": 10.114522821576763, "grad_norm": 29.245243072509766, "learning_rate": 1.5956846473029048e-05, "loss": 0.9575, "step": 12188 }, { "epoch": 10.115352697095435, "grad_norm": 23.4339599609375, "learning_rate": 1.5956514522821577e-05, "loss": 0.7733, "step": 12189 }, { "epoch": 10.116182572614107, "grad_norm": 29.548641204833984, "learning_rate": 1.595618257261411e-05, "loss": 1.4828, "step": 12190 }, { "epoch": 10.11701244813278, "grad_norm": 29.354732513427734, "learning_rate": 1.595585062240664e-05, "loss": 0.8303, "step": 12191 }, { "epoch": 10.117842323651452, "grad_norm": 39.69679641723633, "learning_rate": 1.595551867219917e-05, "loss": 0.7784, "step": 12192 }, { "epoch": 10.118672199170124, "grad_norm": 27.690397262573242, "learning_rate": 1.59551867219917e-05, "loss": 0.8088, "step": 12193 }, { "epoch": 10.119502074688796, "grad_norm": 32.26605987548828, "learning_rate": 1.5954854771784234e-05, "loss": 1.0155, "step": 12194 }, { "epoch": 10.120331950207468, "grad_norm": 95.6202392578125, "learning_rate": 1.5954522821576766e-05, "loss": 1.1594, "step": 12195 }, { "epoch": 10.12116182572614, "grad_norm": 32.49566650390625, "learning_rate": 1.5954190871369295e-05, "loss": 1.0767, "step": 12196 }, { "epoch": 10.121991701244813, "grad_norm": 47.82263946533203, "learning_rate": 1.5953858921161827e-05, "loss": 1.1287, "step": 12197 }, { "epoch": 10.122821576763485, "grad_norm": 20.25108528137207, "learning_rate": 1.595352697095436e-05, "loss": 0.7744, "step": 12198 }, { "epoch": 10.123651452282157, "grad_norm": 25.188753128051758, "learning_rate": 1.595319502074689e-05, "loss": 1.1172, "step": 12199 }, { "epoch": 10.12448132780083, "grad_norm": 20.854225158691406, "learning_rate": 1.595286307053942e-05, "loss": 0.9125, "step": 12200 }, { "epoch": 10.125311203319502, "grad_norm": 19.90213966369629, "learning_rate": 1.5952531120331952e-05, "loss": 0.4586, "step": 12201 }, { "epoch": 10.126141078838174, "grad_norm": 36.016395568847656, "learning_rate": 1.5952199170124484e-05, "loss": 1.5171, "step": 12202 }, { "epoch": 10.126970954356846, "grad_norm": 20.565174102783203, "learning_rate": 1.5951867219917016e-05, "loss": 1.0117, "step": 12203 }, { "epoch": 10.127800829875518, "grad_norm": 29.914098739624023, "learning_rate": 1.5951535269709545e-05, "loss": 0.6205, "step": 12204 }, { "epoch": 10.12863070539419, "grad_norm": 34.91154098510742, "learning_rate": 1.5951203319502077e-05, "loss": 1.6127, "step": 12205 }, { "epoch": 10.129460580912863, "grad_norm": 34.796356201171875, "learning_rate": 1.5950871369294606e-05, "loss": 1.3183, "step": 12206 }, { "epoch": 10.130290456431535, "grad_norm": 40.43929672241211, "learning_rate": 1.5950539419087138e-05, "loss": 1.1139, "step": 12207 }, { "epoch": 10.131120331950207, "grad_norm": 25.493391036987305, "learning_rate": 1.595020746887967e-05, "loss": 0.8774, "step": 12208 }, { "epoch": 10.13195020746888, "grad_norm": 21.398401260375977, "learning_rate": 1.59498755186722e-05, "loss": 0.8587, "step": 12209 }, { "epoch": 10.132780082987551, "grad_norm": 25.884477615356445, "learning_rate": 1.594954356846473e-05, "loss": 0.7151, "step": 12210 }, { "epoch": 10.133609958506224, "grad_norm": 22.866012573242188, "learning_rate": 1.5949211618257263e-05, "loss": 0.7199, "step": 12211 }, { "epoch": 10.134439834024896, "grad_norm": 24.633378982543945, "learning_rate": 1.5948879668049795e-05, "loss": 0.9262, "step": 12212 }, { "epoch": 10.135269709543568, "grad_norm": 21.109914779663086, "learning_rate": 1.5948547717842323e-05, "loss": 0.9048, "step": 12213 }, { "epoch": 10.13609958506224, "grad_norm": 35.28529357910156, "learning_rate": 1.5948215767634856e-05, "loss": 0.8141, "step": 12214 }, { "epoch": 10.136929460580912, "grad_norm": 28.488758087158203, "learning_rate": 1.5947883817427388e-05, "loss": 1.0621, "step": 12215 }, { "epoch": 10.137759336099585, "grad_norm": 18.859182357788086, "learning_rate": 1.594755186721992e-05, "loss": 0.7972, "step": 12216 }, { "epoch": 10.138589211618257, "grad_norm": 28.882925033569336, "learning_rate": 1.594721991701245e-05, "loss": 1.0683, "step": 12217 }, { "epoch": 10.139419087136929, "grad_norm": 47.17870330810547, "learning_rate": 1.594688796680498e-05, "loss": 0.9941, "step": 12218 }, { "epoch": 10.140248962655601, "grad_norm": 23.227567672729492, "learning_rate": 1.5946556016597513e-05, "loss": 0.5377, "step": 12219 }, { "epoch": 10.141078838174273, "grad_norm": 33.75358581542969, "learning_rate": 1.5946224066390045e-05, "loss": 0.9313, "step": 12220 }, { "epoch": 10.141908713692946, "grad_norm": 23.116519927978516, "learning_rate": 1.5945892116182574e-05, "loss": 1.2556, "step": 12221 }, { "epoch": 10.142738589211618, "grad_norm": 67.47093963623047, "learning_rate": 1.5945560165975106e-05, "loss": 1.6581, "step": 12222 }, { "epoch": 10.14356846473029, "grad_norm": 46.0379753112793, "learning_rate": 1.5945228215767638e-05, "loss": 1.5472, "step": 12223 }, { "epoch": 10.144398340248962, "grad_norm": 37.01351547241211, "learning_rate": 1.5944896265560167e-05, "loss": 0.5348, "step": 12224 }, { "epoch": 10.145228215767634, "grad_norm": 26.160348892211914, "learning_rate": 1.59445643153527e-05, "loss": 0.9304, "step": 12225 }, { "epoch": 10.146058091286307, "grad_norm": 21.849456787109375, "learning_rate": 1.594423236514523e-05, "loss": 0.972, "step": 12226 }, { "epoch": 10.146887966804979, "grad_norm": 44.5825080871582, "learning_rate": 1.594390041493776e-05, "loss": 0.8208, "step": 12227 }, { "epoch": 10.147717842323651, "grad_norm": 21.834300994873047, "learning_rate": 1.594356846473029e-05, "loss": 0.5768, "step": 12228 }, { "epoch": 10.148547717842323, "grad_norm": 41.23646545410156, "learning_rate": 1.594323651452282e-05, "loss": 0.8622, "step": 12229 }, { "epoch": 10.149377593360995, "grad_norm": 38.58501434326172, "learning_rate": 1.5942904564315352e-05, "loss": 1.617, "step": 12230 }, { "epoch": 10.150207468879668, "grad_norm": 81.91522216796875, "learning_rate": 1.5942572614107884e-05, "loss": 1.6307, "step": 12231 }, { "epoch": 10.15103734439834, "grad_norm": 14.240419387817383, "learning_rate": 1.5942240663900417e-05, "loss": 0.6807, "step": 12232 }, { "epoch": 10.151867219917012, "grad_norm": 22.79872703552246, "learning_rate": 1.594190871369295e-05, "loss": 1.0928, "step": 12233 }, { "epoch": 10.152697095435684, "grad_norm": 33.13569259643555, "learning_rate": 1.5941576763485477e-05, "loss": 1.6755, "step": 12234 }, { "epoch": 10.153526970954356, "grad_norm": 30.130300521850586, "learning_rate": 1.594124481327801e-05, "loss": 1.2626, "step": 12235 }, { "epoch": 10.154356846473028, "grad_norm": 24.00217056274414, "learning_rate": 1.594091286307054e-05, "loss": 0.8891, "step": 12236 }, { "epoch": 10.1551867219917, "grad_norm": 13.427141189575195, "learning_rate": 1.5940580912863074e-05, "loss": 0.5771, "step": 12237 }, { "epoch": 10.156016597510373, "grad_norm": 22.71233558654785, "learning_rate": 1.5940248962655602e-05, "loss": 0.739, "step": 12238 }, { "epoch": 10.156846473029045, "grad_norm": 33.51302719116211, "learning_rate": 1.5939917012448135e-05, "loss": 0.8881, "step": 12239 }, { "epoch": 10.157676348547717, "grad_norm": 23.778316497802734, "learning_rate": 1.5939585062240667e-05, "loss": 1.399, "step": 12240 }, { "epoch": 10.15850622406639, "grad_norm": 35.346378326416016, "learning_rate": 1.59392531120332e-05, "loss": 1.9901, "step": 12241 }, { "epoch": 10.159336099585062, "grad_norm": 29.94526481628418, "learning_rate": 1.5938921161825728e-05, "loss": 0.9626, "step": 12242 }, { "epoch": 10.160165975103734, "grad_norm": 31.275537490844727, "learning_rate": 1.593858921161826e-05, "loss": 0.9406, "step": 12243 }, { "epoch": 10.160995850622406, "grad_norm": 19.9207820892334, "learning_rate": 1.593825726141079e-05, "loss": 0.9268, "step": 12244 }, { "epoch": 10.161825726141078, "grad_norm": 41.29775619506836, "learning_rate": 1.593792531120332e-05, "loss": 1.1897, "step": 12245 }, { "epoch": 10.16265560165975, "grad_norm": 26.243995666503906, "learning_rate": 1.5937593360995853e-05, "loss": 1.3087, "step": 12246 }, { "epoch": 10.163485477178423, "grad_norm": 27.558429718017578, "learning_rate": 1.593726141078838e-05, "loss": 1.02, "step": 12247 }, { "epoch": 10.164315352697095, "grad_norm": 25.535797119140625, "learning_rate": 1.5936929460580913e-05, "loss": 1.247, "step": 12248 }, { "epoch": 10.165145228215767, "grad_norm": 36.57539367675781, "learning_rate": 1.5936597510373445e-05, "loss": 1.2344, "step": 12249 }, { "epoch": 10.16597510373444, "grad_norm": 38.95043182373047, "learning_rate": 1.5936265560165974e-05, "loss": 1.029, "step": 12250 }, { "epoch": 10.166804979253111, "grad_norm": 27.438106536865234, "learning_rate": 1.5935933609958506e-05, "loss": 0.7665, "step": 12251 }, { "epoch": 10.167634854771784, "grad_norm": 32.975250244140625, "learning_rate": 1.593560165975104e-05, "loss": 1.2906, "step": 12252 }, { "epoch": 10.168464730290456, "grad_norm": 83.28553009033203, "learning_rate": 1.593526970954357e-05, "loss": 0.9663, "step": 12253 }, { "epoch": 10.169294605809128, "grad_norm": 17.251535415649414, "learning_rate": 1.59349377593361e-05, "loss": 0.627, "step": 12254 }, { "epoch": 10.1701244813278, "grad_norm": 43.354583740234375, "learning_rate": 1.593460580912863e-05, "loss": 0.5707, "step": 12255 }, { "epoch": 10.170954356846472, "grad_norm": 32.61793518066406, "learning_rate": 1.5934273858921163e-05, "loss": 0.9929, "step": 12256 }, { "epoch": 10.171784232365145, "grad_norm": 17.37375259399414, "learning_rate": 1.5933941908713696e-05, "loss": 0.5647, "step": 12257 }, { "epoch": 10.172614107883817, "grad_norm": 30.859535217285156, "learning_rate": 1.5933609958506224e-05, "loss": 1.077, "step": 12258 }, { "epoch": 10.173443983402489, "grad_norm": 35.33827590942383, "learning_rate": 1.5933278008298756e-05, "loss": 0.709, "step": 12259 }, { "epoch": 10.174273858921161, "grad_norm": 26.53341293334961, "learning_rate": 1.593294605809129e-05, "loss": 1.0425, "step": 12260 }, { "epoch": 10.175103734439833, "grad_norm": 26.55712890625, "learning_rate": 1.593261410788382e-05, "loss": 0.7494, "step": 12261 }, { "epoch": 10.175933609958506, "grad_norm": 17.077861785888672, "learning_rate": 1.593228215767635e-05, "loss": 0.6976, "step": 12262 }, { "epoch": 10.176763485477178, "grad_norm": 37.80149459838867, "learning_rate": 1.593195020746888e-05, "loss": 0.9969, "step": 12263 }, { "epoch": 10.17759336099585, "grad_norm": 29.36860466003418, "learning_rate": 1.5931618257261414e-05, "loss": 0.9275, "step": 12264 }, { "epoch": 10.178423236514522, "grad_norm": 40.049835205078125, "learning_rate": 1.5931286307053942e-05, "loss": 0.6862, "step": 12265 }, { "epoch": 10.179253112033194, "grad_norm": 35.17726516723633, "learning_rate": 1.5930954356846474e-05, "loss": 1.1151, "step": 12266 }, { "epoch": 10.180082987551867, "grad_norm": 15.113360404968262, "learning_rate": 1.5930622406639003e-05, "loss": 0.4927, "step": 12267 }, { "epoch": 10.180912863070539, "grad_norm": 24.971996307373047, "learning_rate": 1.5930290456431535e-05, "loss": 0.8546, "step": 12268 }, { "epoch": 10.181742738589211, "grad_norm": 44.7606201171875, "learning_rate": 1.5929958506224067e-05, "loss": 1.2839, "step": 12269 }, { "epoch": 10.182572614107883, "grad_norm": 39.70235824584961, "learning_rate": 1.59296265560166e-05, "loss": 0.8335, "step": 12270 }, { "epoch": 10.183402489626555, "grad_norm": 81.90718841552734, "learning_rate": 1.5929294605809128e-05, "loss": 1.2327, "step": 12271 }, { "epoch": 10.184232365145228, "grad_norm": 33.100067138671875, "learning_rate": 1.592896265560166e-05, "loss": 1.5058, "step": 12272 }, { "epoch": 10.1850622406639, "grad_norm": 13.907510757446289, "learning_rate": 1.5928630705394192e-05, "loss": 0.5044, "step": 12273 }, { "epoch": 10.185892116182572, "grad_norm": 28.290027618408203, "learning_rate": 1.5928298755186724e-05, "loss": 0.9896, "step": 12274 }, { "epoch": 10.186721991701244, "grad_norm": 29.51206398010254, "learning_rate": 1.5927966804979253e-05, "loss": 1.0843, "step": 12275 }, { "epoch": 10.187551867219916, "grad_norm": 36.91592788696289, "learning_rate": 1.5927634854771785e-05, "loss": 1.4177, "step": 12276 }, { "epoch": 10.188381742738589, "grad_norm": 42.86763381958008, "learning_rate": 1.5927302904564317e-05, "loss": 0.7543, "step": 12277 }, { "epoch": 10.18921161825726, "grad_norm": 20.875009536743164, "learning_rate": 1.592697095435685e-05, "loss": 1.2564, "step": 12278 }, { "epoch": 10.190041493775933, "grad_norm": 22.327661514282227, "learning_rate": 1.5926639004149378e-05, "loss": 0.4953, "step": 12279 }, { "epoch": 10.190871369294605, "grad_norm": 26.243459701538086, "learning_rate": 1.592630705394191e-05, "loss": 0.9248, "step": 12280 }, { "epoch": 10.191701244813277, "grad_norm": 32.48436737060547, "learning_rate": 1.5925975103734442e-05, "loss": 1.0642, "step": 12281 }, { "epoch": 10.19253112033195, "grad_norm": 29.784042358398438, "learning_rate": 1.5925643153526975e-05, "loss": 1.2134, "step": 12282 }, { "epoch": 10.193360995850622, "grad_norm": 22.30481719970703, "learning_rate": 1.5925311203319503e-05, "loss": 0.9501, "step": 12283 }, { "epoch": 10.194190871369294, "grad_norm": 18.108736038208008, "learning_rate": 1.5924979253112035e-05, "loss": 0.7334, "step": 12284 }, { "epoch": 10.195020746887966, "grad_norm": 14.006446838378906, "learning_rate": 1.5924647302904564e-05, "loss": 0.3972, "step": 12285 }, { "epoch": 10.195850622406638, "grad_norm": 41.14051818847656, "learning_rate": 1.5924315352697096e-05, "loss": 1.075, "step": 12286 }, { "epoch": 10.19668049792531, "grad_norm": 27.537376403808594, "learning_rate": 1.5923983402489628e-05, "loss": 1.382, "step": 12287 }, { "epoch": 10.197510373443983, "grad_norm": 40.01142120361328, "learning_rate": 1.5923651452282157e-05, "loss": 0.8492, "step": 12288 }, { "epoch": 10.198340248962655, "grad_norm": 17.271085739135742, "learning_rate": 1.592331950207469e-05, "loss": 0.6011, "step": 12289 }, { "epoch": 10.199170124481327, "grad_norm": 28.22098159790039, "learning_rate": 1.592298755186722e-05, "loss": 1.6914, "step": 12290 }, { "epoch": 10.2, "grad_norm": 20.57200813293457, "learning_rate": 1.5922655601659753e-05, "loss": 0.8655, "step": 12291 }, { "epoch": 10.200829875518671, "grad_norm": 15.854609489440918, "learning_rate": 1.5922323651452282e-05, "loss": 0.5748, "step": 12292 }, { "epoch": 10.201659751037344, "grad_norm": 19.3148250579834, "learning_rate": 1.5921991701244814e-05, "loss": 0.6226, "step": 12293 }, { "epoch": 10.202489626556016, "grad_norm": 25.543249130249023, "learning_rate": 1.5921659751037346e-05, "loss": 1.384, "step": 12294 }, { "epoch": 10.203319502074688, "grad_norm": 19.862171173095703, "learning_rate": 1.592132780082988e-05, "loss": 0.7991, "step": 12295 }, { "epoch": 10.20414937759336, "grad_norm": 34.825592041015625, "learning_rate": 1.5920995850622407e-05, "loss": 1.2847, "step": 12296 }, { "epoch": 10.204979253112032, "grad_norm": 20.51717185974121, "learning_rate": 1.592066390041494e-05, "loss": 0.6753, "step": 12297 }, { "epoch": 10.205809128630705, "grad_norm": 43.591331481933594, "learning_rate": 1.592033195020747e-05, "loss": 1.3053, "step": 12298 }, { "epoch": 10.206639004149377, "grad_norm": 47.71166229248047, "learning_rate": 1.5920000000000003e-05, "loss": 0.9948, "step": 12299 }, { "epoch": 10.207468879668049, "grad_norm": 28.722293853759766, "learning_rate": 1.5919668049792532e-05, "loss": 1.0839, "step": 12300 }, { "epoch": 10.208298755186721, "grad_norm": 20.81661605834961, "learning_rate": 1.5919336099585064e-05, "loss": 0.7519, "step": 12301 }, { "epoch": 10.209128630705393, "grad_norm": 26.683504104614258, "learning_rate": 1.5919004149377596e-05, "loss": 1.1496, "step": 12302 }, { "epoch": 10.209958506224066, "grad_norm": 27.719593048095703, "learning_rate": 1.5918672199170125e-05, "loss": 1.6922, "step": 12303 }, { "epoch": 10.210788381742738, "grad_norm": 24.72738265991211, "learning_rate": 1.5918340248962657e-05, "loss": 0.8509, "step": 12304 }, { "epoch": 10.21161825726141, "grad_norm": 21.50743865966797, "learning_rate": 1.5918008298755186e-05, "loss": 0.7875, "step": 12305 }, { "epoch": 10.212448132780082, "grad_norm": 39.86024856567383, "learning_rate": 1.5917676348547718e-05, "loss": 1.5562, "step": 12306 }, { "epoch": 10.213278008298754, "grad_norm": 43.30699920654297, "learning_rate": 1.591734439834025e-05, "loss": 1.1877, "step": 12307 }, { "epoch": 10.214107883817427, "grad_norm": 20.779502868652344, "learning_rate": 1.591701244813278e-05, "loss": 1.1556, "step": 12308 }, { "epoch": 10.214937759336099, "grad_norm": 20.267831802368164, "learning_rate": 1.591668049792531e-05, "loss": 1.1098, "step": 12309 }, { "epoch": 10.215767634854771, "grad_norm": 25.370906829833984, "learning_rate": 1.5916348547717843e-05, "loss": 0.9649, "step": 12310 }, { "epoch": 10.216597510373443, "grad_norm": 26.383869171142578, "learning_rate": 1.5916016597510375e-05, "loss": 1.479, "step": 12311 }, { "epoch": 10.217427385892115, "grad_norm": 32.93770980834961, "learning_rate": 1.5915684647302907e-05, "loss": 1.4639, "step": 12312 }, { "epoch": 10.218257261410788, "grad_norm": 22.914134979248047, "learning_rate": 1.5915352697095436e-05, "loss": 0.7418, "step": 12313 }, { "epoch": 10.21908713692946, "grad_norm": 17.14981460571289, "learning_rate": 1.5915020746887968e-05, "loss": 0.6783, "step": 12314 }, { "epoch": 10.219917012448132, "grad_norm": 30.689781188964844, "learning_rate": 1.59146887966805e-05, "loss": 1.2172, "step": 12315 }, { "epoch": 10.220746887966804, "grad_norm": 29.702987670898438, "learning_rate": 1.5914356846473032e-05, "loss": 0.8281, "step": 12316 }, { "epoch": 10.221576763485476, "grad_norm": 28.17049217224121, "learning_rate": 1.591402489626556e-05, "loss": 1.1639, "step": 12317 }, { "epoch": 10.222406639004149, "grad_norm": 25.67308235168457, "learning_rate": 1.5913692946058093e-05, "loss": 1.1617, "step": 12318 }, { "epoch": 10.22323651452282, "grad_norm": 39.50951385498047, "learning_rate": 1.5913360995850625e-05, "loss": 0.7507, "step": 12319 }, { "epoch": 10.224066390041493, "grad_norm": 18.661069869995117, "learning_rate": 1.5913029045643157e-05, "loss": 0.7361, "step": 12320 }, { "epoch": 10.224896265560165, "grad_norm": 17.702791213989258, "learning_rate": 1.5912697095435686e-05, "loss": 0.7245, "step": 12321 }, { "epoch": 10.225726141078837, "grad_norm": 18.210254669189453, "learning_rate": 1.5912365145228218e-05, "loss": 0.9651, "step": 12322 }, { "epoch": 10.22655601659751, "grad_norm": 28.424898147583008, "learning_rate": 1.5912033195020747e-05, "loss": 0.5687, "step": 12323 }, { "epoch": 10.227385892116182, "grad_norm": 44.07460403442383, "learning_rate": 1.591170124481328e-05, "loss": 0.768, "step": 12324 }, { "epoch": 10.228215767634854, "grad_norm": 25.690519332885742, "learning_rate": 1.591136929460581e-05, "loss": 0.7717, "step": 12325 }, { "epoch": 10.229045643153526, "grad_norm": 49.93040084838867, "learning_rate": 1.591103734439834e-05, "loss": 0.9767, "step": 12326 }, { "epoch": 10.229875518672198, "grad_norm": 24.150022506713867, "learning_rate": 1.5910705394190872e-05, "loss": 1.0003, "step": 12327 }, { "epoch": 10.23070539419087, "grad_norm": 29.420543670654297, "learning_rate": 1.5910373443983404e-05, "loss": 0.8052, "step": 12328 }, { "epoch": 10.231535269709543, "grad_norm": 43.38895797729492, "learning_rate": 1.5910041493775933e-05, "loss": 1.0011, "step": 12329 }, { "epoch": 10.232365145228215, "grad_norm": 26.1684513092041, "learning_rate": 1.5909709543568465e-05, "loss": 1.2099, "step": 12330 }, { "epoch": 10.233195020746887, "grad_norm": 23.321077346801758, "learning_rate": 1.5909377593360997e-05, "loss": 0.7785, "step": 12331 }, { "epoch": 10.23402489626556, "grad_norm": 33.381439208984375, "learning_rate": 1.590904564315353e-05, "loss": 0.6328, "step": 12332 }, { "epoch": 10.234854771784232, "grad_norm": 39.53702926635742, "learning_rate": 1.5908713692946058e-05, "loss": 1.0664, "step": 12333 }, { "epoch": 10.235684647302904, "grad_norm": 28.702465057373047, "learning_rate": 1.590838174273859e-05, "loss": 0.7321, "step": 12334 }, { "epoch": 10.236514522821576, "grad_norm": 21.211498260498047, "learning_rate": 1.5908049792531122e-05, "loss": 0.6892, "step": 12335 }, { "epoch": 10.237344398340248, "grad_norm": 83.1720199584961, "learning_rate": 1.5907717842323654e-05, "loss": 1.2701, "step": 12336 }, { "epoch": 10.23817427385892, "grad_norm": 23.878992080688477, "learning_rate": 1.5907385892116183e-05, "loss": 1.0219, "step": 12337 }, { "epoch": 10.239004149377593, "grad_norm": 25.791650772094727, "learning_rate": 1.5907053941908715e-05, "loss": 0.7611, "step": 12338 }, { "epoch": 10.239834024896265, "grad_norm": 28.2889461517334, "learning_rate": 1.5906721991701247e-05, "loss": 0.9945, "step": 12339 }, { "epoch": 10.240663900414937, "grad_norm": 46.98233413696289, "learning_rate": 1.590639004149378e-05, "loss": 1.208, "step": 12340 }, { "epoch": 10.241493775933609, "grad_norm": 48.692081451416016, "learning_rate": 1.5906058091286308e-05, "loss": 1.1686, "step": 12341 }, { "epoch": 10.242323651452281, "grad_norm": 44.3680534362793, "learning_rate": 1.590572614107884e-05, "loss": 1.4526, "step": 12342 }, { "epoch": 10.243153526970953, "grad_norm": 79.474365234375, "learning_rate": 1.5905394190871372e-05, "loss": 0.8183, "step": 12343 }, { "epoch": 10.243983402489626, "grad_norm": 14.396717071533203, "learning_rate": 1.59050622406639e-05, "loss": 0.4381, "step": 12344 }, { "epoch": 10.244813278008298, "grad_norm": 28.302671432495117, "learning_rate": 1.5904730290456433e-05, "loss": 0.8828, "step": 12345 }, { "epoch": 10.24564315352697, "grad_norm": 29.379108428955078, "learning_rate": 1.590439834024896e-05, "loss": 1.1326, "step": 12346 }, { "epoch": 10.246473029045642, "grad_norm": 29.99441146850586, "learning_rate": 1.5904066390041494e-05, "loss": 0.4658, "step": 12347 }, { "epoch": 10.247302904564314, "grad_norm": 22.619091033935547, "learning_rate": 1.5903734439834026e-05, "loss": 0.8623, "step": 12348 }, { "epoch": 10.248132780082987, "grad_norm": 60.59454345703125, "learning_rate": 1.5903402489626558e-05, "loss": 1.0849, "step": 12349 }, { "epoch": 10.248962655601659, "grad_norm": 22.47368812561035, "learning_rate": 1.5903070539419087e-05, "loss": 0.6274, "step": 12350 }, { "epoch": 10.249792531120331, "grad_norm": 31.16140365600586, "learning_rate": 1.590273858921162e-05, "loss": 1.1824, "step": 12351 }, { "epoch": 10.250622406639003, "grad_norm": 32.04460525512695, "learning_rate": 1.590240663900415e-05, "loss": 0.6968, "step": 12352 }, { "epoch": 10.251452282157675, "grad_norm": 31.805757522583008, "learning_rate": 1.5902074688796683e-05, "loss": 1.5172, "step": 12353 }, { "epoch": 10.252282157676348, "grad_norm": 24.807859420776367, "learning_rate": 1.5901742738589212e-05, "loss": 1.142, "step": 12354 }, { "epoch": 10.25311203319502, "grad_norm": 21.308122634887695, "learning_rate": 1.5901410788381744e-05, "loss": 0.8412, "step": 12355 }, { "epoch": 10.253941908713692, "grad_norm": 29.25956153869629, "learning_rate": 1.5901078838174276e-05, "loss": 0.9175, "step": 12356 }, { "epoch": 10.254771784232364, "grad_norm": 48.291343688964844, "learning_rate": 1.5900746887966808e-05, "loss": 1.5839, "step": 12357 }, { "epoch": 10.255601659751036, "grad_norm": 36.06330871582031, "learning_rate": 1.5900414937759337e-05, "loss": 1.5735, "step": 12358 }, { "epoch": 10.256431535269709, "grad_norm": 34.311588287353516, "learning_rate": 1.590008298755187e-05, "loss": 1.0502, "step": 12359 }, { "epoch": 10.25726141078838, "grad_norm": 18.478240966796875, "learning_rate": 1.58997510373444e-05, "loss": 0.5823, "step": 12360 }, { "epoch": 10.258091286307055, "grad_norm": 81.35645294189453, "learning_rate": 1.589941908713693e-05, "loss": 0.9909, "step": 12361 }, { "epoch": 10.258921161825727, "grad_norm": 43.14909744262695, "learning_rate": 1.5899087136929462e-05, "loss": 0.9159, "step": 12362 }, { "epoch": 10.2597510373444, "grad_norm": 19.984663009643555, "learning_rate": 1.5898755186721994e-05, "loss": 1.1326, "step": 12363 }, { "epoch": 10.260580912863071, "grad_norm": 24.177444458007812, "learning_rate": 1.5898423236514523e-05, "loss": 0.6575, "step": 12364 }, { "epoch": 10.261410788381744, "grad_norm": 29.93803596496582, "learning_rate": 1.5898091286307055e-05, "loss": 0.791, "step": 12365 }, { "epoch": 10.262240663900416, "grad_norm": 28.42815399169922, "learning_rate": 1.5897759336099587e-05, "loss": 0.6889, "step": 12366 }, { "epoch": 10.263070539419088, "grad_norm": 18.51521110534668, "learning_rate": 1.5897427385892116e-05, "loss": 0.402, "step": 12367 }, { "epoch": 10.26390041493776, "grad_norm": 43.5114631652832, "learning_rate": 1.5897095435684648e-05, "loss": 2.1032, "step": 12368 }, { "epoch": 10.264730290456432, "grad_norm": 32.228023529052734, "learning_rate": 1.589676348547718e-05, "loss": 0.9276, "step": 12369 }, { "epoch": 10.265560165975105, "grad_norm": 42.18113708496094, "learning_rate": 1.5896431535269712e-05, "loss": 1.0489, "step": 12370 }, { "epoch": 10.266390041493777, "grad_norm": 58.44841003417969, "learning_rate": 1.589609958506224e-05, "loss": 1.2479, "step": 12371 }, { "epoch": 10.267219917012449, "grad_norm": 29.128475189208984, "learning_rate": 1.5895767634854773e-05, "loss": 1.1578, "step": 12372 }, { "epoch": 10.268049792531121, "grad_norm": 23.506196975708008, "learning_rate": 1.5895435684647305e-05, "loss": 1.5955, "step": 12373 }, { "epoch": 10.268879668049793, "grad_norm": 30.409040451049805, "learning_rate": 1.5895103734439837e-05, "loss": 1.4361, "step": 12374 }, { "epoch": 10.269709543568466, "grad_norm": 30.331281661987305, "learning_rate": 1.5894771784232366e-05, "loss": 1.2799, "step": 12375 }, { "epoch": 10.270539419087138, "grad_norm": 29.452054977416992, "learning_rate": 1.5894439834024898e-05, "loss": 0.9612, "step": 12376 }, { "epoch": 10.27136929460581, "grad_norm": 25.45162582397461, "learning_rate": 1.589410788381743e-05, "loss": 0.9458, "step": 12377 }, { "epoch": 10.272199170124482, "grad_norm": 28.44475555419922, "learning_rate": 1.5893775933609962e-05, "loss": 0.7601, "step": 12378 }, { "epoch": 10.273029045643154, "grad_norm": 19.568622589111328, "learning_rate": 1.589344398340249e-05, "loss": 0.7772, "step": 12379 }, { "epoch": 10.273858921161827, "grad_norm": 32.442806243896484, "learning_rate": 1.5893112033195023e-05, "loss": 1.1418, "step": 12380 }, { "epoch": 10.274688796680499, "grad_norm": 22.255130767822266, "learning_rate": 1.5892780082987555e-05, "loss": 0.9297, "step": 12381 }, { "epoch": 10.275518672199171, "grad_norm": 52.62108612060547, "learning_rate": 1.5892448132780084e-05, "loss": 1.1805, "step": 12382 }, { "epoch": 10.276348547717843, "grad_norm": 17.19550132751465, "learning_rate": 1.5892116182572616e-05, "loss": 0.9947, "step": 12383 }, { "epoch": 10.277178423236515, "grad_norm": 39.31540298461914, "learning_rate": 1.5891784232365144e-05, "loss": 0.6804, "step": 12384 }, { "epoch": 10.278008298755188, "grad_norm": 24.456951141357422, "learning_rate": 1.5891452282157677e-05, "loss": 1.1347, "step": 12385 }, { "epoch": 10.27883817427386, "grad_norm": 20.251632690429688, "learning_rate": 1.589112033195021e-05, "loss": 1.066, "step": 12386 }, { "epoch": 10.279668049792532, "grad_norm": 22.71440315246582, "learning_rate": 1.5890788381742737e-05, "loss": 0.9476, "step": 12387 }, { "epoch": 10.280497925311204, "grad_norm": 26.3824462890625, "learning_rate": 1.589045643153527e-05, "loss": 0.9257, "step": 12388 }, { "epoch": 10.281327800829876, "grad_norm": 59.58626174926758, "learning_rate": 1.58901244813278e-05, "loss": 1.3336, "step": 12389 }, { "epoch": 10.282157676348548, "grad_norm": 23.52527618408203, "learning_rate": 1.5889792531120334e-05, "loss": 0.764, "step": 12390 }, { "epoch": 10.28298755186722, "grad_norm": 19.93800163269043, "learning_rate": 1.5889460580912866e-05, "loss": 1.1626, "step": 12391 }, { "epoch": 10.283817427385893, "grad_norm": 17.685632705688477, "learning_rate": 1.5889128630705395e-05, "loss": 0.8977, "step": 12392 }, { "epoch": 10.284647302904565, "grad_norm": 14.060598373413086, "learning_rate": 1.5888796680497927e-05, "loss": 0.4667, "step": 12393 }, { "epoch": 10.285477178423237, "grad_norm": 25.87797737121582, "learning_rate": 1.588846473029046e-05, "loss": 0.9849, "step": 12394 }, { "epoch": 10.28630705394191, "grad_norm": 18.94121742248535, "learning_rate": 1.588813278008299e-05, "loss": 0.7741, "step": 12395 }, { "epoch": 10.287136929460582, "grad_norm": 58.1832275390625, "learning_rate": 1.588780082987552e-05, "loss": 1.3551, "step": 12396 }, { "epoch": 10.287966804979254, "grad_norm": 16.175600051879883, "learning_rate": 1.5887468879668052e-05, "loss": 0.693, "step": 12397 }, { "epoch": 10.288796680497926, "grad_norm": 19.25424575805664, "learning_rate": 1.5887136929460584e-05, "loss": 0.8981, "step": 12398 }, { "epoch": 10.289626556016598, "grad_norm": 28.474931716918945, "learning_rate": 1.5886804979253116e-05, "loss": 1.1359, "step": 12399 }, { "epoch": 10.29045643153527, "grad_norm": 24.671924591064453, "learning_rate": 1.5886473029045645e-05, "loss": 1.0949, "step": 12400 }, { "epoch": 10.291286307053943, "grad_norm": 23.24901008605957, "learning_rate": 1.5886141078838177e-05, "loss": 1.0811, "step": 12401 }, { "epoch": 10.292116182572615, "grad_norm": 27.603546142578125, "learning_rate": 1.5885809128630705e-05, "loss": 0.9158, "step": 12402 }, { "epoch": 10.292946058091287, "grad_norm": 37.942359924316406, "learning_rate": 1.5885477178423238e-05, "loss": 0.6317, "step": 12403 }, { "epoch": 10.29377593360996, "grad_norm": 37.02399826049805, "learning_rate": 1.588514522821577e-05, "loss": 1.3956, "step": 12404 }, { "epoch": 10.294605809128631, "grad_norm": 17.41533851623535, "learning_rate": 1.58848132780083e-05, "loss": 0.4678, "step": 12405 }, { "epoch": 10.295435684647304, "grad_norm": 34.39312744140625, "learning_rate": 1.588448132780083e-05, "loss": 1.5905, "step": 12406 }, { "epoch": 10.296265560165976, "grad_norm": 21.985654830932617, "learning_rate": 1.5884149377593363e-05, "loss": 0.6525, "step": 12407 }, { "epoch": 10.297095435684648, "grad_norm": 21.165302276611328, "learning_rate": 1.588381742738589e-05, "loss": 0.6234, "step": 12408 }, { "epoch": 10.29792531120332, "grad_norm": 27.58403968811035, "learning_rate": 1.5883485477178423e-05, "loss": 1.6196, "step": 12409 }, { "epoch": 10.298755186721992, "grad_norm": 14.299050331115723, "learning_rate": 1.5883153526970956e-05, "loss": 0.4231, "step": 12410 }, { "epoch": 10.299585062240665, "grad_norm": 25.78923225402832, "learning_rate": 1.5882821576763488e-05, "loss": 0.9575, "step": 12411 }, { "epoch": 10.300414937759337, "grad_norm": 27.078474044799805, "learning_rate": 1.5882489626556016e-05, "loss": 0.6262, "step": 12412 }, { "epoch": 10.301244813278009, "grad_norm": 22.781009674072266, "learning_rate": 1.588215767634855e-05, "loss": 0.7422, "step": 12413 }, { "epoch": 10.302074688796681, "grad_norm": 38.79790115356445, "learning_rate": 1.588182572614108e-05, "loss": 0.8749, "step": 12414 }, { "epoch": 10.302904564315353, "grad_norm": 21.25007438659668, "learning_rate": 1.5881493775933613e-05, "loss": 0.4485, "step": 12415 }, { "epoch": 10.303734439834026, "grad_norm": 29.90151023864746, "learning_rate": 1.588116182572614e-05, "loss": 1.0333, "step": 12416 }, { "epoch": 10.304564315352698, "grad_norm": 26.582704544067383, "learning_rate": 1.5880829875518674e-05, "loss": 1.0759, "step": 12417 }, { "epoch": 10.30539419087137, "grad_norm": 21.07566261291504, "learning_rate": 1.5880497925311206e-05, "loss": 0.6208, "step": 12418 }, { "epoch": 10.306224066390042, "grad_norm": 27.152185440063477, "learning_rate": 1.5880165975103738e-05, "loss": 0.7165, "step": 12419 }, { "epoch": 10.307053941908714, "grad_norm": 34.82927703857422, "learning_rate": 1.5879834024896266e-05, "loss": 1.5257, "step": 12420 }, { "epoch": 10.307883817427387, "grad_norm": 28.517892837524414, "learning_rate": 1.58795020746888e-05, "loss": 0.881, "step": 12421 }, { "epoch": 10.308713692946059, "grad_norm": 31.616558074951172, "learning_rate": 1.5879170124481327e-05, "loss": 2.0498, "step": 12422 }, { "epoch": 10.309543568464731, "grad_norm": 26.69139289855957, "learning_rate": 1.587883817427386e-05, "loss": 1.0711, "step": 12423 }, { "epoch": 10.310373443983403, "grad_norm": 31.765151977539062, "learning_rate": 1.587850622406639e-05, "loss": 1.3232, "step": 12424 }, { "epoch": 10.311203319502075, "grad_norm": 22.360225677490234, "learning_rate": 1.587817427385892e-05, "loss": 1.1375, "step": 12425 }, { "epoch": 10.312033195020748, "grad_norm": 36.33777618408203, "learning_rate": 1.5877842323651452e-05, "loss": 0.934, "step": 12426 }, { "epoch": 10.31286307053942, "grad_norm": 24.99587631225586, "learning_rate": 1.5877510373443984e-05, "loss": 0.6773, "step": 12427 }, { "epoch": 10.313692946058092, "grad_norm": 28.99636459350586, "learning_rate": 1.5877178423236517e-05, "loss": 0.6868, "step": 12428 }, { "epoch": 10.314522821576764, "grad_norm": 19.748252868652344, "learning_rate": 1.5876846473029045e-05, "loss": 0.7149, "step": 12429 }, { "epoch": 10.315352697095436, "grad_norm": 52.391143798828125, "learning_rate": 1.5876514522821577e-05, "loss": 1.6542, "step": 12430 }, { "epoch": 10.316182572614109, "grad_norm": 40.55815124511719, "learning_rate": 1.587618257261411e-05, "loss": 0.9224, "step": 12431 }, { "epoch": 10.31701244813278, "grad_norm": 39.51520919799805, "learning_rate": 1.587585062240664e-05, "loss": 0.7971, "step": 12432 }, { "epoch": 10.317842323651453, "grad_norm": 21.89140510559082, "learning_rate": 1.587551867219917e-05, "loss": 0.4567, "step": 12433 }, { "epoch": 10.318672199170125, "grad_norm": 19.471437454223633, "learning_rate": 1.5875186721991702e-05, "loss": 0.729, "step": 12434 }, { "epoch": 10.319502074688797, "grad_norm": 33.57347869873047, "learning_rate": 1.5874854771784235e-05, "loss": 0.8406, "step": 12435 }, { "epoch": 10.32033195020747, "grad_norm": 87.76049041748047, "learning_rate": 1.5874522821576767e-05, "loss": 1.062, "step": 12436 }, { "epoch": 10.321161825726142, "grad_norm": 51.28569030761719, "learning_rate": 1.5874190871369295e-05, "loss": 1.8244, "step": 12437 }, { "epoch": 10.321991701244814, "grad_norm": 41.615108489990234, "learning_rate": 1.5873858921161827e-05, "loss": 1.3514, "step": 12438 }, { "epoch": 10.322821576763486, "grad_norm": 57.94111251831055, "learning_rate": 1.587352697095436e-05, "loss": 0.6732, "step": 12439 }, { "epoch": 10.323651452282158, "grad_norm": 34.95764923095703, "learning_rate": 1.5873195020746888e-05, "loss": 0.7947, "step": 12440 }, { "epoch": 10.32448132780083, "grad_norm": 30.200088500976562, "learning_rate": 1.587286307053942e-05, "loss": 0.8154, "step": 12441 }, { "epoch": 10.325311203319503, "grad_norm": 57.085628509521484, "learning_rate": 1.5872531120331953e-05, "loss": 0.9573, "step": 12442 }, { "epoch": 10.326141078838175, "grad_norm": 36.769065856933594, "learning_rate": 1.587219917012448e-05, "loss": 0.9478, "step": 12443 }, { "epoch": 10.326970954356847, "grad_norm": 42.51632308959961, "learning_rate": 1.5871867219917013e-05, "loss": 1.4145, "step": 12444 }, { "epoch": 10.32780082987552, "grad_norm": 44.58073806762695, "learning_rate": 1.5871535269709545e-05, "loss": 0.7092, "step": 12445 }, { "epoch": 10.328630705394191, "grad_norm": 36.272674560546875, "learning_rate": 1.5871203319502074e-05, "loss": 1.1369, "step": 12446 }, { "epoch": 10.329460580912864, "grad_norm": 32.536766052246094, "learning_rate": 1.5870871369294606e-05, "loss": 0.5807, "step": 12447 }, { "epoch": 10.330290456431536, "grad_norm": 23.829694747924805, "learning_rate": 1.587053941908714e-05, "loss": 1.0348, "step": 12448 }, { "epoch": 10.331120331950208, "grad_norm": 26.55997657775879, "learning_rate": 1.587020746887967e-05, "loss": 0.5758, "step": 12449 }, { "epoch": 10.33195020746888, "grad_norm": 41.086387634277344, "learning_rate": 1.58698755186722e-05, "loss": 1.3384, "step": 12450 }, { "epoch": 10.332780082987552, "grad_norm": 21.868627548217773, "learning_rate": 1.586954356846473e-05, "loss": 0.7085, "step": 12451 }, { "epoch": 10.333609958506225, "grad_norm": 19.116573333740234, "learning_rate": 1.5869211618257263e-05, "loss": 1.083, "step": 12452 }, { "epoch": 10.334439834024897, "grad_norm": 68.54716491699219, "learning_rate": 1.5868879668049796e-05, "loss": 1.4476, "step": 12453 }, { "epoch": 10.335269709543569, "grad_norm": 27.399261474609375, "learning_rate": 1.5868547717842324e-05, "loss": 0.9708, "step": 12454 }, { "epoch": 10.336099585062241, "grad_norm": 30.866132736206055, "learning_rate": 1.5868215767634856e-05, "loss": 0.7964, "step": 12455 }, { "epoch": 10.336929460580913, "grad_norm": 34.228111267089844, "learning_rate": 1.586788381742739e-05, "loss": 1.0119, "step": 12456 }, { "epoch": 10.337759336099586, "grad_norm": 27.645517349243164, "learning_rate": 1.586755186721992e-05, "loss": 0.6604, "step": 12457 }, { "epoch": 10.338589211618258, "grad_norm": 17.639652252197266, "learning_rate": 1.586721991701245e-05, "loss": 0.4422, "step": 12458 }, { "epoch": 10.33941908713693, "grad_norm": 46.71657943725586, "learning_rate": 1.586688796680498e-05, "loss": 1.4291, "step": 12459 }, { "epoch": 10.340248962655602, "grad_norm": 35.01985168457031, "learning_rate": 1.5866556016597514e-05, "loss": 1.3512, "step": 12460 }, { "epoch": 10.341078838174274, "grad_norm": 33.05735778808594, "learning_rate": 1.5866224066390042e-05, "loss": 0.6866, "step": 12461 }, { "epoch": 10.341908713692947, "grad_norm": 28.804048538208008, "learning_rate": 1.5865892116182574e-05, "loss": 1.3269, "step": 12462 }, { "epoch": 10.342738589211619, "grad_norm": 8.854634284973145, "learning_rate": 1.5865560165975103e-05, "loss": 0.2633, "step": 12463 }, { "epoch": 10.343568464730291, "grad_norm": 15.378521919250488, "learning_rate": 1.5865228215767635e-05, "loss": 0.6331, "step": 12464 }, { "epoch": 10.344398340248963, "grad_norm": 17.682363510131836, "learning_rate": 1.5864896265560167e-05, "loss": 0.7164, "step": 12465 }, { "epoch": 10.345228215767635, "grad_norm": 38.733577728271484, "learning_rate": 1.5864564315352696e-05, "loss": 1.2747, "step": 12466 }, { "epoch": 10.346058091286308, "grad_norm": 32.55359649658203, "learning_rate": 1.5864232365145228e-05, "loss": 1.2826, "step": 12467 }, { "epoch": 10.34688796680498, "grad_norm": 25.35719871520996, "learning_rate": 1.586390041493776e-05, "loss": 1.1419, "step": 12468 }, { "epoch": 10.347717842323652, "grad_norm": 22.678693771362305, "learning_rate": 1.5863568464730292e-05, "loss": 0.7792, "step": 12469 }, { "epoch": 10.348547717842324, "grad_norm": 32.39374542236328, "learning_rate": 1.5863236514522824e-05, "loss": 1.0851, "step": 12470 }, { "epoch": 10.349377593360996, "grad_norm": 37.68131637573242, "learning_rate": 1.5862904564315353e-05, "loss": 1.2425, "step": 12471 }, { "epoch": 10.350207468879669, "grad_norm": 25.638004302978516, "learning_rate": 1.5862572614107885e-05, "loss": 0.965, "step": 12472 }, { "epoch": 10.35103734439834, "grad_norm": 35.997711181640625, "learning_rate": 1.5862240663900417e-05, "loss": 0.9508, "step": 12473 }, { "epoch": 10.351867219917013, "grad_norm": 42.984893798828125, "learning_rate": 1.586190871369295e-05, "loss": 1.4699, "step": 12474 }, { "epoch": 10.352697095435685, "grad_norm": 40.40092468261719, "learning_rate": 1.5861576763485478e-05, "loss": 1.3364, "step": 12475 }, { "epoch": 10.353526970954357, "grad_norm": 21.268787384033203, "learning_rate": 1.586124481327801e-05, "loss": 1.3677, "step": 12476 }, { "epoch": 10.35435684647303, "grad_norm": 27.63455581665039, "learning_rate": 1.5860912863070542e-05, "loss": 0.9493, "step": 12477 }, { "epoch": 10.355186721991702, "grad_norm": 23.74992561340332, "learning_rate": 1.586058091286307e-05, "loss": 1.1019, "step": 12478 }, { "epoch": 10.356016597510374, "grad_norm": 26.032114028930664, "learning_rate": 1.5860248962655603e-05, "loss": 1.2087, "step": 12479 }, { "epoch": 10.356846473029046, "grad_norm": 18.737529754638672, "learning_rate": 1.5859917012448135e-05, "loss": 1.1537, "step": 12480 }, { "epoch": 10.357676348547718, "grad_norm": 26.526283264160156, "learning_rate": 1.5859585062240664e-05, "loss": 0.7482, "step": 12481 }, { "epoch": 10.35850622406639, "grad_norm": 29.153717041015625, "learning_rate": 1.5859253112033196e-05, "loss": 1.1483, "step": 12482 }, { "epoch": 10.359336099585063, "grad_norm": 31.069732666015625, "learning_rate": 1.5858921161825728e-05, "loss": 1.6648, "step": 12483 }, { "epoch": 10.360165975103735, "grad_norm": 46.151729583740234, "learning_rate": 1.5858589211618257e-05, "loss": 0.8408, "step": 12484 }, { "epoch": 10.360995850622407, "grad_norm": 24.507282257080078, "learning_rate": 1.585825726141079e-05, "loss": 0.6017, "step": 12485 }, { "epoch": 10.36182572614108, "grad_norm": 42.34922409057617, "learning_rate": 1.585792531120332e-05, "loss": 1.5585, "step": 12486 }, { "epoch": 10.362655601659752, "grad_norm": 41.43198776245117, "learning_rate": 1.585759336099585e-05, "loss": 1.0941, "step": 12487 }, { "epoch": 10.363485477178424, "grad_norm": 19.72385597229004, "learning_rate": 1.5857261410788382e-05, "loss": 0.8128, "step": 12488 }, { "epoch": 10.364315352697096, "grad_norm": 26.417369842529297, "learning_rate": 1.5856929460580914e-05, "loss": 0.7183, "step": 12489 }, { "epoch": 10.365145228215768, "grad_norm": 55.94279861450195, "learning_rate": 1.5856597510373446e-05, "loss": 1.0054, "step": 12490 }, { "epoch": 10.36597510373444, "grad_norm": 27.146331787109375, "learning_rate": 1.5856265560165975e-05, "loss": 0.8819, "step": 12491 }, { "epoch": 10.366804979253113, "grad_norm": 25.466535568237305, "learning_rate": 1.5855933609958507e-05, "loss": 1.0361, "step": 12492 }, { "epoch": 10.367634854771785, "grad_norm": 13.696935653686523, "learning_rate": 1.585560165975104e-05, "loss": 0.552, "step": 12493 }, { "epoch": 10.368464730290457, "grad_norm": 56.481929779052734, "learning_rate": 1.585526970954357e-05, "loss": 1.2324, "step": 12494 }, { "epoch": 10.369294605809129, "grad_norm": 24.406272888183594, "learning_rate": 1.58549377593361e-05, "loss": 0.5338, "step": 12495 }, { "epoch": 10.370124481327801, "grad_norm": 41.06743621826172, "learning_rate": 1.5854605809128632e-05, "loss": 1.0577, "step": 12496 }, { "epoch": 10.370954356846473, "grad_norm": 27.468894958496094, "learning_rate": 1.5854273858921164e-05, "loss": 0.8135, "step": 12497 }, { "epoch": 10.371784232365146, "grad_norm": 19.559547424316406, "learning_rate": 1.5853941908713696e-05, "loss": 1.1197, "step": 12498 }, { "epoch": 10.372614107883818, "grad_norm": 24.409652709960938, "learning_rate": 1.5853609958506225e-05, "loss": 1.0254, "step": 12499 }, { "epoch": 10.37344398340249, "grad_norm": 34.31458282470703, "learning_rate": 1.5853278008298757e-05, "loss": 1.4218, "step": 12500 }, { "epoch": 10.374273858921162, "grad_norm": 12.526611328125, "learning_rate": 1.5852946058091286e-05, "loss": 0.3978, "step": 12501 }, { "epoch": 10.375103734439834, "grad_norm": 27.411195755004883, "learning_rate": 1.5852614107883818e-05, "loss": 1.2464, "step": 12502 }, { "epoch": 10.375933609958507, "grad_norm": 41.17075729370117, "learning_rate": 1.585228215767635e-05, "loss": 1.7859, "step": 12503 }, { "epoch": 10.376763485477179, "grad_norm": 20.58063507080078, "learning_rate": 1.585195020746888e-05, "loss": 1.1763, "step": 12504 }, { "epoch": 10.377593360995851, "grad_norm": 20.31747817993164, "learning_rate": 1.585161825726141e-05, "loss": 0.8192, "step": 12505 }, { "epoch": 10.378423236514523, "grad_norm": 16.791738510131836, "learning_rate": 1.5851286307053943e-05, "loss": 0.6731, "step": 12506 }, { "epoch": 10.379253112033195, "grad_norm": 33.431373596191406, "learning_rate": 1.5850954356846475e-05, "loss": 1.1548, "step": 12507 }, { "epoch": 10.380082987551868, "grad_norm": 21.584115982055664, "learning_rate": 1.5850622406639004e-05, "loss": 0.9095, "step": 12508 }, { "epoch": 10.38091286307054, "grad_norm": 55.3244514465332, "learning_rate": 1.5850290456431536e-05, "loss": 1.4595, "step": 12509 }, { "epoch": 10.381742738589212, "grad_norm": 26.725584030151367, "learning_rate": 1.5849958506224068e-05, "loss": 1.1539, "step": 12510 }, { "epoch": 10.382572614107884, "grad_norm": 27.750959396362305, "learning_rate": 1.58496265560166e-05, "loss": 0.9533, "step": 12511 }, { "epoch": 10.383402489626556, "grad_norm": 28.207096099853516, "learning_rate": 1.584929460580913e-05, "loss": 1.0902, "step": 12512 }, { "epoch": 10.384232365145229, "grad_norm": 55.939910888671875, "learning_rate": 1.584896265560166e-05, "loss": 0.6492, "step": 12513 }, { "epoch": 10.3850622406639, "grad_norm": 24.312747955322266, "learning_rate": 1.5848630705394193e-05, "loss": 1.3869, "step": 12514 }, { "epoch": 10.385892116182573, "grad_norm": 33.863258361816406, "learning_rate": 1.5848298755186725e-05, "loss": 1.2623, "step": 12515 }, { "epoch": 10.386721991701245, "grad_norm": 26.34783935546875, "learning_rate": 1.5847966804979254e-05, "loss": 0.706, "step": 12516 }, { "epoch": 10.387551867219917, "grad_norm": 25.206867218017578, "learning_rate": 1.5847634854771786e-05, "loss": 1.0393, "step": 12517 }, { "epoch": 10.38838174273859, "grad_norm": 28.916664123535156, "learning_rate": 1.5847302904564318e-05, "loss": 0.3312, "step": 12518 }, { "epoch": 10.389211618257262, "grad_norm": 25.752975463867188, "learning_rate": 1.5846970954356847e-05, "loss": 0.5247, "step": 12519 }, { "epoch": 10.390041493775934, "grad_norm": 21.62795066833496, "learning_rate": 1.584663900414938e-05, "loss": 0.9002, "step": 12520 }, { "epoch": 10.390871369294606, "grad_norm": 22.292837142944336, "learning_rate": 1.584630705394191e-05, "loss": 0.4795, "step": 12521 }, { "epoch": 10.391701244813278, "grad_norm": 58.877681732177734, "learning_rate": 1.584597510373444e-05, "loss": 1.0572, "step": 12522 }, { "epoch": 10.39253112033195, "grad_norm": 35.48392105102539, "learning_rate": 1.5845643153526972e-05, "loss": 0.7793, "step": 12523 }, { "epoch": 10.393360995850623, "grad_norm": 54.49559783935547, "learning_rate": 1.5845311203319504e-05, "loss": 1.3922, "step": 12524 }, { "epoch": 10.394190871369295, "grad_norm": 60.51029586791992, "learning_rate": 1.5844979253112033e-05, "loss": 1.2954, "step": 12525 }, { "epoch": 10.395020746887967, "grad_norm": 21.976327896118164, "learning_rate": 1.5844647302904565e-05, "loss": 0.7149, "step": 12526 }, { "epoch": 10.39585062240664, "grad_norm": 21.5556640625, "learning_rate": 1.5844315352697097e-05, "loss": 0.4868, "step": 12527 }, { "epoch": 10.396680497925312, "grad_norm": 26.553783416748047, "learning_rate": 1.584398340248963e-05, "loss": 0.9852, "step": 12528 }, { "epoch": 10.397510373443984, "grad_norm": 30.260299682617188, "learning_rate": 1.5843651452282158e-05, "loss": 0.4843, "step": 12529 }, { "epoch": 10.398340248962656, "grad_norm": 23.849143981933594, "learning_rate": 1.584331950207469e-05, "loss": 0.6241, "step": 12530 }, { "epoch": 10.399170124481328, "grad_norm": 38.093833923339844, "learning_rate": 1.5842987551867222e-05, "loss": 1.4919, "step": 12531 }, { "epoch": 10.4, "grad_norm": 64.68514251708984, "learning_rate": 1.5842655601659754e-05, "loss": 2.1206, "step": 12532 }, { "epoch": 10.400829875518673, "grad_norm": 23.715608596801758, "learning_rate": 1.5842323651452283e-05, "loss": 0.7242, "step": 12533 }, { "epoch": 10.401659751037345, "grad_norm": 30.48096466064453, "learning_rate": 1.5841991701244815e-05, "loss": 1.368, "step": 12534 }, { "epoch": 10.402489626556017, "grad_norm": 70.64112091064453, "learning_rate": 1.5841659751037347e-05, "loss": 1.9947, "step": 12535 }, { "epoch": 10.40331950207469, "grad_norm": 25.34534454345703, "learning_rate": 1.584132780082988e-05, "loss": 0.9323, "step": 12536 }, { "epoch": 10.404149377593361, "grad_norm": 37.7054443359375, "learning_rate": 1.5840995850622408e-05, "loss": 1.305, "step": 12537 }, { "epoch": 10.404979253112034, "grad_norm": 22.887956619262695, "learning_rate": 1.584066390041494e-05, "loss": 0.712, "step": 12538 }, { "epoch": 10.405809128630706, "grad_norm": 28.527027130126953, "learning_rate": 1.584033195020747e-05, "loss": 1.1221, "step": 12539 }, { "epoch": 10.406639004149378, "grad_norm": 26.328330993652344, "learning_rate": 1.584e-05, "loss": 0.922, "step": 12540 }, { "epoch": 10.40746887966805, "grad_norm": 15.622586250305176, "learning_rate": 1.5839668049792533e-05, "loss": 0.3099, "step": 12541 }, { "epoch": 10.408298755186722, "grad_norm": 18.954364776611328, "learning_rate": 1.583933609958506e-05, "loss": 0.7911, "step": 12542 }, { "epoch": 10.409128630705395, "grad_norm": 28.451507568359375, "learning_rate": 1.5839004149377594e-05, "loss": 0.8861, "step": 12543 }, { "epoch": 10.409958506224067, "grad_norm": 29.645200729370117, "learning_rate": 1.5838672199170126e-05, "loss": 0.6654, "step": 12544 }, { "epoch": 10.410788381742739, "grad_norm": 20.8387393951416, "learning_rate": 1.5838340248962655e-05, "loss": 0.8518, "step": 12545 }, { "epoch": 10.411618257261411, "grad_norm": 20.63350486755371, "learning_rate": 1.5838008298755187e-05, "loss": 0.6144, "step": 12546 }, { "epoch": 10.412448132780083, "grad_norm": 32.24546432495117, "learning_rate": 1.583767634854772e-05, "loss": 1.196, "step": 12547 }, { "epoch": 10.413278008298755, "grad_norm": 36.88913345336914, "learning_rate": 1.583734439834025e-05, "loss": 1.2159, "step": 12548 }, { "epoch": 10.414107883817428, "grad_norm": 52.85338592529297, "learning_rate": 1.583701244813278e-05, "loss": 1.1368, "step": 12549 }, { "epoch": 10.4149377593361, "grad_norm": 27.309192657470703, "learning_rate": 1.5836680497925312e-05, "loss": 1.0617, "step": 12550 }, { "epoch": 10.415767634854772, "grad_norm": 34.87625503540039, "learning_rate": 1.5836348547717844e-05, "loss": 1.0885, "step": 12551 }, { "epoch": 10.416597510373444, "grad_norm": 17.401941299438477, "learning_rate": 1.5836016597510376e-05, "loss": 0.6434, "step": 12552 }, { "epoch": 10.417427385892116, "grad_norm": 31.355974197387695, "learning_rate": 1.5835684647302908e-05, "loss": 1.4468, "step": 12553 }, { "epoch": 10.418257261410789, "grad_norm": 44.3802490234375, "learning_rate": 1.5835352697095437e-05, "loss": 1.9408, "step": 12554 }, { "epoch": 10.41908713692946, "grad_norm": 38.6297607421875, "learning_rate": 1.583502074688797e-05, "loss": 0.6533, "step": 12555 }, { "epoch": 10.419917012448133, "grad_norm": 28.231218338012695, "learning_rate": 1.58346887966805e-05, "loss": 0.7742, "step": 12556 }, { "epoch": 10.420746887966805, "grad_norm": 23.494592666625977, "learning_rate": 1.583435684647303e-05, "loss": 0.7986, "step": 12557 }, { "epoch": 10.421576763485477, "grad_norm": 13.398402214050293, "learning_rate": 1.5834024896265562e-05, "loss": 0.3011, "step": 12558 }, { "epoch": 10.42240663900415, "grad_norm": 82.36404418945312, "learning_rate": 1.5833692946058094e-05, "loss": 1.0641, "step": 12559 }, { "epoch": 10.423236514522822, "grad_norm": 23.018901824951172, "learning_rate": 1.5833360995850623e-05, "loss": 0.779, "step": 12560 }, { "epoch": 10.424066390041494, "grad_norm": 26.880922317504883, "learning_rate": 1.5833029045643155e-05, "loss": 1.0129, "step": 12561 }, { "epoch": 10.424896265560166, "grad_norm": 38.46097183227539, "learning_rate": 1.5832697095435683e-05, "loss": 0.9871, "step": 12562 }, { "epoch": 10.425726141078838, "grad_norm": 27.869461059570312, "learning_rate": 1.5832365145228216e-05, "loss": 0.7717, "step": 12563 }, { "epoch": 10.42655601659751, "grad_norm": 85.91267395019531, "learning_rate": 1.5832033195020748e-05, "loss": 1.0691, "step": 12564 }, { "epoch": 10.427385892116183, "grad_norm": 24.926284790039062, "learning_rate": 1.583170124481328e-05, "loss": 1.7355, "step": 12565 }, { "epoch": 10.428215767634855, "grad_norm": 49.51350402832031, "learning_rate": 1.583136929460581e-05, "loss": 1.1631, "step": 12566 }, { "epoch": 10.429045643153527, "grad_norm": 37.074642181396484, "learning_rate": 1.583103734439834e-05, "loss": 1.3076, "step": 12567 }, { "epoch": 10.4298755186722, "grad_norm": 23.56760025024414, "learning_rate": 1.5830705394190873e-05, "loss": 0.6339, "step": 12568 }, { "epoch": 10.430705394190872, "grad_norm": 30.740774154663086, "learning_rate": 1.5830373443983405e-05, "loss": 0.6971, "step": 12569 }, { "epoch": 10.431535269709544, "grad_norm": 30.152433395385742, "learning_rate": 1.5830041493775934e-05, "loss": 0.8255, "step": 12570 }, { "epoch": 10.432365145228216, "grad_norm": 27.288480758666992, "learning_rate": 1.5829709543568466e-05, "loss": 1.1883, "step": 12571 }, { "epoch": 10.433195020746888, "grad_norm": 17.235410690307617, "learning_rate": 1.5829377593360998e-05, "loss": 0.8955, "step": 12572 }, { "epoch": 10.43402489626556, "grad_norm": 38.237300872802734, "learning_rate": 1.582904564315353e-05, "loss": 0.7349, "step": 12573 }, { "epoch": 10.434854771784233, "grad_norm": 21.319652557373047, "learning_rate": 1.582871369294606e-05, "loss": 0.6591, "step": 12574 }, { "epoch": 10.435684647302905, "grad_norm": 36.06829833984375, "learning_rate": 1.582838174273859e-05, "loss": 1.7702, "step": 12575 }, { "epoch": 10.436514522821577, "grad_norm": 31.672191619873047, "learning_rate": 1.5828049792531123e-05, "loss": 1.0171, "step": 12576 }, { "epoch": 10.43734439834025, "grad_norm": 30.728168487548828, "learning_rate": 1.5827717842323655e-05, "loss": 1.1354, "step": 12577 }, { "epoch": 10.438174273858921, "grad_norm": 69.28252410888672, "learning_rate": 1.5827385892116184e-05, "loss": 1.4093, "step": 12578 }, { "epoch": 10.439004149377594, "grad_norm": 30.42104148864746, "learning_rate": 1.5827053941908716e-05, "loss": 1.0705, "step": 12579 }, { "epoch": 10.439834024896266, "grad_norm": 20.194416046142578, "learning_rate": 1.5826721991701244e-05, "loss": 0.7101, "step": 12580 }, { "epoch": 10.440663900414938, "grad_norm": 20.733863830566406, "learning_rate": 1.5826390041493777e-05, "loss": 1.0773, "step": 12581 }, { "epoch": 10.44149377593361, "grad_norm": 44.53879928588867, "learning_rate": 1.582605809128631e-05, "loss": 1.1202, "step": 12582 }, { "epoch": 10.442323651452282, "grad_norm": 20.442106246948242, "learning_rate": 1.5825726141078837e-05, "loss": 0.8226, "step": 12583 }, { "epoch": 10.443153526970955, "grad_norm": 38.42241287231445, "learning_rate": 1.582539419087137e-05, "loss": 1.0598, "step": 12584 }, { "epoch": 10.443983402489627, "grad_norm": 26.795413970947266, "learning_rate": 1.58250622406639e-05, "loss": 1.2396, "step": 12585 }, { "epoch": 10.444813278008299, "grad_norm": 27.0576229095459, "learning_rate": 1.5824730290456434e-05, "loss": 0.7176, "step": 12586 }, { "epoch": 10.445643153526971, "grad_norm": 35.86697769165039, "learning_rate": 1.5824398340248962e-05, "loss": 1.8961, "step": 12587 }, { "epoch": 10.446473029045643, "grad_norm": 19.108383178710938, "learning_rate": 1.5824066390041495e-05, "loss": 0.6039, "step": 12588 }, { "epoch": 10.447302904564316, "grad_norm": 39.81734848022461, "learning_rate": 1.5823734439834027e-05, "loss": 1.2291, "step": 12589 }, { "epoch": 10.448132780082988, "grad_norm": 23.78711700439453, "learning_rate": 1.582340248962656e-05, "loss": 0.8569, "step": 12590 }, { "epoch": 10.44896265560166, "grad_norm": 35.53934860229492, "learning_rate": 1.5823070539419087e-05, "loss": 1.4221, "step": 12591 }, { "epoch": 10.449792531120332, "grad_norm": 24.83329200744629, "learning_rate": 1.582273858921162e-05, "loss": 0.9649, "step": 12592 }, { "epoch": 10.450622406639004, "grad_norm": 53.65639877319336, "learning_rate": 1.582240663900415e-05, "loss": 0.794, "step": 12593 }, { "epoch": 10.451452282157677, "grad_norm": 16.968074798583984, "learning_rate": 1.5822074688796684e-05, "loss": 0.7031, "step": 12594 }, { "epoch": 10.452282157676349, "grad_norm": 18.01002311706543, "learning_rate": 1.5821742738589213e-05, "loss": 0.8108, "step": 12595 }, { "epoch": 10.453112033195021, "grad_norm": 26.921661376953125, "learning_rate": 1.5821410788381745e-05, "loss": 0.5052, "step": 12596 }, { "epoch": 10.453941908713693, "grad_norm": 24.982704162597656, "learning_rate": 1.5821078838174277e-05, "loss": 0.6647, "step": 12597 }, { "epoch": 10.454771784232365, "grad_norm": 18.75313377380371, "learning_rate": 1.5820746887966805e-05, "loss": 0.5948, "step": 12598 }, { "epoch": 10.455601659751038, "grad_norm": 66.80998229980469, "learning_rate": 1.5820414937759338e-05, "loss": 1.126, "step": 12599 }, { "epoch": 10.45643153526971, "grad_norm": 25.096616744995117, "learning_rate": 1.582008298755187e-05, "loss": 0.9772, "step": 12600 }, { "epoch": 10.457261410788382, "grad_norm": 31.07512664794922, "learning_rate": 1.58197510373444e-05, "loss": 1.4178, "step": 12601 }, { "epoch": 10.458091286307054, "grad_norm": 40.69554138183594, "learning_rate": 1.581941908713693e-05, "loss": 1.5787, "step": 12602 }, { "epoch": 10.458921161825726, "grad_norm": 26.632568359375, "learning_rate": 1.5819087136929463e-05, "loss": 0.956, "step": 12603 }, { "epoch": 10.459751037344398, "grad_norm": 25.06363868713379, "learning_rate": 1.581875518672199e-05, "loss": 0.8831, "step": 12604 }, { "epoch": 10.46058091286307, "grad_norm": 24.236207962036133, "learning_rate": 1.5818423236514523e-05, "loss": 1.2754, "step": 12605 }, { "epoch": 10.461410788381743, "grad_norm": 22.6333065032959, "learning_rate": 1.5818091286307056e-05, "loss": 0.7774, "step": 12606 }, { "epoch": 10.462240663900415, "grad_norm": 49.136173248291016, "learning_rate": 1.5817759336099588e-05, "loss": 0.954, "step": 12607 }, { "epoch": 10.463070539419087, "grad_norm": 33.52372360229492, "learning_rate": 1.5817427385892116e-05, "loss": 0.8817, "step": 12608 }, { "epoch": 10.46390041493776, "grad_norm": 25.730714797973633, "learning_rate": 1.581709543568465e-05, "loss": 0.6859, "step": 12609 }, { "epoch": 10.464730290456432, "grad_norm": 28.91240692138672, "learning_rate": 1.581676348547718e-05, "loss": 1.6536, "step": 12610 }, { "epoch": 10.465560165975104, "grad_norm": 22.594228744506836, "learning_rate": 1.5816431535269713e-05, "loss": 0.4924, "step": 12611 }, { "epoch": 10.466390041493776, "grad_norm": 30.30599021911621, "learning_rate": 1.581609958506224e-05, "loss": 0.7234, "step": 12612 }, { "epoch": 10.467219917012448, "grad_norm": 24.44576072692871, "learning_rate": 1.5815767634854774e-05, "loss": 0.6829, "step": 12613 }, { "epoch": 10.46804979253112, "grad_norm": 40.17700958251953, "learning_rate": 1.5815435684647306e-05, "loss": 1.1989, "step": 12614 }, { "epoch": 10.468879668049793, "grad_norm": 29.49817657470703, "learning_rate": 1.5815103734439838e-05, "loss": 0.8658, "step": 12615 }, { "epoch": 10.469709543568465, "grad_norm": 61.402793884277344, "learning_rate": 1.5814771784232366e-05, "loss": 0.9583, "step": 12616 }, { "epoch": 10.470539419087137, "grad_norm": 18.702701568603516, "learning_rate": 1.58144398340249e-05, "loss": 0.7581, "step": 12617 }, { "epoch": 10.47136929460581, "grad_norm": 35.13493347167969, "learning_rate": 1.5814107883817427e-05, "loss": 1.0578, "step": 12618 }, { "epoch": 10.472199170124481, "grad_norm": 43.88648223876953, "learning_rate": 1.581377593360996e-05, "loss": 1.2418, "step": 12619 }, { "epoch": 10.473029045643154, "grad_norm": 18.7697696685791, "learning_rate": 1.581344398340249e-05, "loss": 0.6862, "step": 12620 }, { "epoch": 10.473858921161826, "grad_norm": 57.16655731201172, "learning_rate": 1.581311203319502e-05, "loss": 0.9785, "step": 12621 }, { "epoch": 10.474688796680498, "grad_norm": 29.840307235717773, "learning_rate": 1.5812780082987552e-05, "loss": 0.925, "step": 12622 }, { "epoch": 10.47551867219917, "grad_norm": 36.32516098022461, "learning_rate": 1.5812448132780084e-05, "loss": 0.9712, "step": 12623 }, { "epoch": 10.476348547717842, "grad_norm": 34.99387741088867, "learning_rate": 1.5812116182572613e-05, "loss": 1.1011, "step": 12624 }, { "epoch": 10.477178423236515, "grad_norm": 44.572837829589844, "learning_rate": 1.5811784232365145e-05, "loss": 1.3927, "step": 12625 }, { "epoch": 10.478008298755187, "grad_norm": 36.7244758605957, "learning_rate": 1.5811452282157677e-05, "loss": 0.859, "step": 12626 }, { "epoch": 10.478838174273859, "grad_norm": 71.28211975097656, "learning_rate": 1.581112033195021e-05, "loss": 1.1377, "step": 12627 }, { "epoch": 10.479668049792531, "grad_norm": 22.231361389160156, "learning_rate": 1.5810788381742738e-05, "loss": 0.9145, "step": 12628 }, { "epoch": 10.480497925311203, "grad_norm": 21.854564666748047, "learning_rate": 1.581045643153527e-05, "loss": 0.7976, "step": 12629 }, { "epoch": 10.481327800829876, "grad_norm": 28.93659782409668, "learning_rate": 1.5810124481327802e-05, "loss": 1.3113, "step": 12630 }, { "epoch": 10.482157676348548, "grad_norm": 38.35513687133789, "learning_rate": 1.5809792531120335e-05, "loss": 1.4064, "step": 12631 }, { "epoch": 10.48298755186722, "grad_norm": 26.157339096069336, "learning_rate": 1.5809460580912867e-05, "loss": 0.7838, "step": 12632 }, { "epoch": 10.483817427385892, "grad_norm": 19.022117614746094, "learning_rate": 1.5809128630705395e-05, "loss": 0.691, "step": 12633 }, { "epoch": 10.484647302904564, "grad_norm": 24.02214813232422, "learning_rate": 1.5808796680497927e-05, "loss": 0.9484, "step": 12634 }, { "epoch": 10.485477178423237, "grad_norm": 34.8223991394043, "learning_rate": 1.580846473029046e-05, "loss": 0.926, "step": 12635 }, { "epoch": 10.486307053941909, "grad_norm": 29.99270248413086, "learning_rate": 1.5808132780082988e-05, "loss": 1.4207, "step": 12636 }, { "epoch": 10.487136929460581, "grad_norm": 26.3729190826416, "learning_rate": 1.580780082987552e-05, "loss": 0.7279, "step": 12637 }, { "epoch": 10.487966804979253, "grad_norm": 18.380294799804688, "learning_rate": 1.5807468879668052e-05, "loss": 0.8479, "step": 12638 }, { "epoch": 10.488796680497925, "grad_norm": 25.002592086791992, "learning_rate": 1.580713692946058e-05, "loss": 1.0169, "step": 12639 }, { "epoch": 10.489626556016598, "grad_norm": 28.515859603881836, "learning_rate": 1.5806804979253113e-05, "loss": 0.411, "step": 12640 }, { "epoch": 10.49045643153527, "grad_norm": 21.295907974243164, "learning_rate": 1.5806473029045642e-05, "loss": 0.7831, "step": 12641 }, { "epoch": 10.491286307053942, "grad_norm": 21.477832794189453, "learning_rate": 1.5806141078838174e-05, "loss": 1.2711, "step": 12642 }, { "epoch": 10.492116182572614, "grad_norm": 22.227890014648438, "learning_rate": 1.5805809128630706e-05, "loss": 0.8225, "step": 12643 }, { "epoch": 10.492946058091286, "grad_norm": 141.83602905273438, "learning_rate": 1.580547717842324e-05, "loss": 1.9616, "step": 12644 }, { "epoch": 10.493775933609959, "grad_norm": 27.429370880126953, "learning_rate": 1.5805145228215767e-05, "loss": 0.9122, "step": 12645 }, { "epoch": 10.49460580912863, "grad_norm": 24.68763542175293, "learning_rate": 1.58048132780083e-05, "loss": 0.4796, "step": 12646 }, { "epoch": 10.495435684647303, "grad_norm": 41.91549301147461, "learning_rate": 1.580448132780083e-05, "loss": 1.145, "step": 12647 }, { "epoch": 10.496265560165975, "grad_norm": 21.80715560913086, "learning_rate": 1.5804149377593363e-05, "loss": 0.5873, "step": 12648 }, { "epoch": 10.497095435684647, "grad_norm": 24.44403839111328, "learning_rate": 1.5803817427385892e-05, "loss": 0.5429, "step": 12649 }, { "epoch": 10.49792531120332, "grad_norm": 37.87229537963867, "learning_rate": 1.5803485477178424e-05, "loss": 1.379, "step": 12650 }, { "epoch": 10.498755186721992, "grad_norm": 34.758243560791016, "learning_rate": 1.5803153526970956e-05, "loss": 1.5765, "step": 12651 }, { "epoch": 10.499585062240664, "grad_norm": 36.28766632080078, "learning_rate": 1.580282157676349e-05, "loss": 0.9066, "step": 12652 }, { "epoch": 10.500414937759336, "grad_norm": 30.522811889648438, "learning_rate": 1.5802489626556017e-05, "loss": 1.0406, "step": 12653 }, { "epoch": 10.501244813278008, "grad_norm": 31.874021530151367, "learning_rate": 1.580215767634855e-05, "loss": 0.5735, "step": 12654 }, { "epoch": 10.50207468879668, "grad_norm": 24.975067138671875, "learning_rate": 1.580182572614108e-05, "loss": 1.0939, "step": 12655 }, { "epoch": 10.502904564315353, "grad_norm": 27.988372802734375, "learning_rate": 1.580149377593361e-05, "loss": 0.8554, "step": 12656 }, { "epoch": 10.503734439834025, "grad_norm": 103.95647430419922, "learning_rate": 1.5801161825726142e-05, "loss": 1.0536, "step": 12657 }, { "epoch": 10.504564315352697, "grad_norm": 27.971294403076172, "learning_rate": 1.5800829875518674e-05, "loss": 1.4484, "step": 12658 }, { "epoch": 10.50539419087137, "grad_norm": 17.1766414642334, "learning_rate": 1.5800497925311203e-05, "loss": 0.6105, "step": 12659 }, { "epoch": 10.506224066390041, "grad_norm": 18.68918800354004, "learning_rate": 1.5800165975103735e-05, "loss": 0.8348, "step": 12660 }, { "epoch": 10.507053941908714, "grad_norm": 34.51792907714844, "learning_rate": 1.5799834024896267e-05, "loss": 0.8625, "step": 12661 }, { "epoch": 10.507883817427386, "grad_norm": 36.92464828491211, "learning_rate": 1.5799502074688796e-05, "loss": 1.0537, "step": 12662 }, { "epoch": 10.508713692946058, "grad_norm": 31.41092872619629, "learning_rate": 1.5799170124481328e-05, "loss": 1.0644, "step": 12663 }, { "epoch": 10.50954356846473, "grad_norm": 27.062362670898438, "learning_rate": 1.579883817427386e-05, "loss": 0.9366, "step": 12664 }, { "epoch": 10.510373443983402, "grad_norm": 29.327075958251953, "learning_rate": 1.5798506224066392e-05, "loss": 0.8514, "step": 12665 }, { "epoch": 10.511203319502075, "grad_norm": 23.45693016052246, "learning_rate": 1.579817427385892e-05, "loss": 0.9032, "step": 12666 }, { "epoch": 10.512033195020747, "grad_norm": 71.38844299316406, "learning_rate": 1.5797842323651453e-05, "loss": 1.2467, "step": 12667 }, { "epoch": 10.512863070539419, "grad_norm": 21.79010009765625, "learning_rate": 1.5797510373443985e-05, "loss": 0.5624, "step": 12668 }, { "epoch": 10.513692946058091, "grad_norm": 42.92616271972656, "learning_rate": 1.5797178423236517e-05, "loss": 1.4193, "step": 12669 }, { "epoch": 10.514522821576763, "grad_norm": 31.098464965820312, "learning_rate": 1.5796846473029046e-05, "loss": 1.1703, "step": 12670 }, { "epoch": 10.515352697095436, "grad_norm": 35.80774688720703, "learning_rate": 1.5796514522821578e-05, "loss": 1.0694, "step": 12671 }, { "epoch": 10.516182572614108, "grad_norm": 27.586515426635742, "learning_rate": 1.579618257261411e-05, "loss": 1.3556, "step": 12672 }, { "epoch": 10.51701244813278, "grad_norm": 69.58785247802734, "learning_rate": 1.5795850622406642e-05, "loss": 1.4673, "step": 12673 }, { "epoch": 10.517842323651452, "grad_norm": 25.743314743041992, "learning_rate": 1.579551867219917e-05, "loss": 0.6714, "step": 12674 }, { "epoch": 10.518672199170124, "grad_norm": 33.741004943847656, "learning_rate": 1.5795186721991703e-05, "loss": 0.8562, "step": 12675 }, { "epoch": 10.519502074688797, "grad_norm": 16.779403686523438, "learning_rate": 1.5794854771784235e-05, "loss": 0.6012, "step": 12676 }, { "epoch": 10.520331950207469, "grad_norm": 25.92643928527832, "learning_rate": 1.5794522821576764e-05, "loss": 0.814, "step": 12677 }, { "epoch": 10.521161825726141, "grad_norm": 46.491329193115234, "learning_rate": 1.5794190871369296e-05, "loss": 1.4478, "step": 12678 }, { "epoch": 10.521991701244813, "grad_norm": 39.56655502319336, "learning_rate": 1.5793858921161825e-05, "loss": 0.9403, "step": 12679 }, { "epoch": 10.522821576763485, "grad_norm": 27.121410369873047, "learning_rate": 1.5793526970954357e-05, "loss": 0.8489, "step": 12680 }, { "epoch": 10.523651452282158, "grad_norm": 28.353164672851562, "learning_rate": 1.579319502074689e-05, "loss": 0.4911, "step": 12681 }, { "epoch": 10.52448132780083, "grad_norm": 21.959095001220703, "learning_rate": 1.579286307053942e-05, "loss": 1.1346, "step": 12682 }, { "epoch": 10.525311203319502, "grad_norm": 26.55206871032715, "learning_rate": 1.579253112033195e-05, "loss": 0.9135, "step": 12683 }, { "epoch": 10.526141078838174, "grad_norm": 27.075153350830078, "learning_rate": 1.5792199170124482e-05, "loss": 1.0118, "step": 12684 }, { "epoch": 10.526970954356846, "grad_norm": 26.683395385742188, "learning_rate": 1.5791867219917014e-05, "loss": 0.5138, "step": 12685 }, { "epoch": 10.527800829875519, "grad_norm": 39.25912857055664, "learning_rate": 1.5791535269709546e-05, "loss": 1.3622, "step": 12686 }, { "epoch": 10.52863070539419, "grad_norm": 29.79800033569336, "learning_rate": 1.5791203319502075e-05, "loss": 0.7385, "step": 12687 }, { "epoch": 10.529460580912863, "grad_norm": 35.52409362792969, "learning_rate": 1.5790871369294607e-05, "loss": 0.8273, "step": 12688 }, { "epoch": 10.530290456431535, "grad_norm": 30.008516311645508, "learning_rate": 1.579053941908714e-05, "loss": 1.1689, "step": 12689 }, { "epoch": 10.531120331950207, "grad_norm": 30.760662078857422, "learning_rate": 1.579020746887967e-05, "loss": 0.9486, "step": 12690 }, { "epoch": 10.53195020746888, "grad_norm": 45.53132247924805, "learning_rate": 1.57898755186722e-05, "loss": 1.0871, "step": 12691 }, { "epoch": 10.532780082987552, "grad_norm": 25.79899024963379, "learning_rate": 1.5789543568464732e-05, "loss": 1.101, "step": 12692 }, { "epoch": 10.533609958506224, "grad_norm": 36.57255554199219, "learning_rate": 1.5789211618257264e-05, "loss": 0.8968, "step": 12693 }, { "epoch": 10.534439834024896, "grad_norm": 36.92770004272461, "learning_rate": 1.5788879668049796e-05, "loss": 1.0274, "step": 12694 }, { "epoch": 10.535269709543568, "grad_norm": 34.63094711303711, "learning_rate": 1.5788547717842325e-05, "loss": 2.1227, "step": 12695 }, { "epoch": 10.53609958506224, "grad_norm": 26.193256378173828, "learning_rate": 1.5788215767634857e-05, "loss": 0.8033, "step": 12696 }, { "epoch": 10.536929460580913, "grad_norm": 47.31886672973633, "learning_rate": 1.5787883817427386e-05, "loss": 1.1581, "step": 12697 }, { "epoch": 10.537759336099585, "grad_norm": 33.39823532104492, "learning_rate": 1.5787551867219918e-05, "loss": 0.8837, "step": 12698 }, { "epoch": 10.538589211618257, "grad_norm": 40.77879333496094, "learning_rate": 1.578721991701245e-05, "loss": 1.1737, "step": 12699 }, { "epoch": 10.53941908713693, "grad_norm": 32.2287483215332, "learning_rate": 1.578688796680498e-05, "loss": 1.3806, "step": 12700 }, { "epoch": 10.540248962655602, "grad_norm": 34.47874450683594, "learning_rate": 1.578655601659751e-05, "loss": 0.6883, "step": 12701 }, { "epoch": 10.541078838174274, "grad_norm": 24.040660858154297, "learning_rate": 1.5786224066390043e-05, "loss": 1.2429, "step": 12702 }, { "epoch": 10.541908713692946, "grad_norm": 15.5853271484375, "learning_rate": 1.5785892116182572e-05, "loss": 0.6323, "step": 12703 }, { "epoch": 10.542738589211618, "grad_norm": 14.828570365905762, "learning_rate": 1.5785560165975104e-05, "loss": 0.3472, "step": 12704 }, { "epoch": 10.54356846473029, "grad_norm": 23.392093658447266, "learning_rate": 1.5785228215767636e-05, "loss": 1.2158, "step": 12705 }, { "epoch": 10.544398340248962, "grad_norm": 22.499101638793945, "learning_rate": 1.5784896265560168e-05, "loss": 1.02, "step": 12706 }, { "epoch": 10.545228215767635, "grad_norm": 24.045286178588867, "learning_rate": 1.5784564315352697e-05, "loss": 1.1979, "step": 12707 }, { "epoch": 10.546058091286307, "grad_norm": 26.834665298461914, "learning_rate": 1.578423236514523e-05, "loss": 1.13, "step": 12708 }, { "epoch": 10.546887966804979, "grad_norm": 19.708139419555664, "learning_rate": 1.578390041493776e-05, "loss": 0.6052, "step": 12709 }, { "epoch": 10.547717842323651, "grad_norm": 38.337223052978516, "learning_rate": 1.5783568464730293e-05, "loss": 1.1145, "step": 12710 }, { "epoch": 10.548547717842323, "grad_norm": 15.980960845947266, "learning_rate": 1.5783236514522825e-05, "loss": 0.3668, "step": 12711 }, { "epoch": 10.549377593360996, "grad_norm": 27.030773162841797, "learning_rate": 1.5782904564315354e-05, "loss": 0.9545, "step": 12712 }, { "epoch": 10.550207468879668, "grad_norm": 46.645381927490234, "learning_rate": 1.5782572614107886e-05, "loss": 0.7961, "step": 12713 }, { "epoch": 10.55103734439834, "grad_norm": 82.91357421875, "learning_rate": 1.5782240663900418e-05, "loss": 1.4476, "step": 12714 }, { "epoch": 10.551867219917012, "grad_norm": 42.98943328857422, "learning_rate": 1.5781908713692947e-05, "loss": 1.3223, "step": 12715 }, { "epoch": 10.552697095435684, "grad_norm": 21.13046646118164, "learning_rate": 1.578157676348548e-05, "loss": 0.8468, "step": 12716 }, { "epoch": 10.553526970954357, "grad_norm": 32.25068283081055, "learning_rate": 1.578124481327801e-05, "loss": 1.1936, "step": 12717 }, { "epoch": 10.554356846473029, "grad_norm": 35.383949279785156, "learning_rate": 1.578091286307054e-05, "loss": 1.5839, "step": 12718 }, { "epoch": 10.555186721991701, "grad_norm": 28.627239227294922, "learning_rate": 1.5780580912863072e-05, "loss": 1.0194, "step": 12719 }, { "epoch": 10.556016597510373, "grad_norm": 45.71725082397461, "learning_rate": 1.57802489626556e-05, "loss": 1.1483, "step": 12720 }, { "epoch": 10.556846473029045, "grad_norm": 29.395843505859375, "learning_rate": 1.5779917012448133e-05, "loss": 0.7809, "step": 12721 }, { "epoch": 10.557676348547718, "grad_norm": 22.03908348083496, "learning_rate": 1.5779585062240665e-05, "loss": 1.0659, "step": 12722 }, { "epoch": 10.55850622406639, "grad_norm": 93.95532989501953, "learning_rate": 1.5779253112033197e-05, "loss": 1.1607, "step": 12723 }, { "epoch": 10.559336099585062, "grad_norm": 20.712862014770508, "learning_rate": 1.5778921161825726e-05, "loss": 0.9043, "step": 12724 }, { "epoch": 10.560165975103734, "grad_norm": 38.36386489868164, "learning_rate": 1.5778589211618258e-05, "loss": 0.7579, "step": 12725 }, { "epoch": 10.560995850622406, "grad_norm": 33.325592041015625, "learning_rate": 1.577825726141079e-05, "loss": 1.4002, "step": 12726 }, { "epoch": 10.561825726141079, "grad_norm": 29.771133422851562, "learning_rate": 1.5777925311203322e-05, "loss": 1.1267, "step": 12727 }, { "epoch": 10.56265560165975, "grad_norm": 33.59830093383789, "learning_rate": 1.577759336099585e-05, "loss": 0.9513, "step": 12728 }, { "epoch": 10.563485477178423, "grad_norm": 49.765018463134766, "learning_rate": 1.5777261410788383e-05, "loss": 0.8973, "step": 12729 }, { "epoch": 10.564315352697095, "grad_norm": 29.47197914123535, "learning_rate": 1.5776929460580915e-05, "loss": 0.6853, "step": 12730 }, { "epoch": 10.565145228215767, "grad_norm": 15.908502578735352, "learning_rate": 1.5776597510373447e-05, "loss": 0.6085, "step": 12731 }, { "epoch": 10.56597510373444, "grad_norm": 14.020574569702148, "learning_rate": 1.5776265560165976e-05, "loss": 0.2883, "step": 12732 }, { "epoch": 10.566804979253112, "grad_norm": 22.233488082885742, "learning_rate": 1.5775933609958508e-05, "loss": 0.6554, "step": 12733 }, { "epoch": 10.567634854771784, "grad_norm": 28.676589965820312, "learning_rate": 1.577560165975104e-05, "loss": 1.0844, "step": 12734 }, { "epoch": 10.568464730290456, "grad_norm": 43.83271026611328, "learning_rate": 1.577526970954357e-05, "loss": 1.5271, "step": 12735 }, { "epoch": 10.569294605809128, "grad_norm": 26.1290283203125, "learning_rate": 1.57749377593361e-05, "loss": 0.9752, "step": 12736 }, { "epoch": 10.5701244813278, "grad_norm": 42.72997283935547, "learning_rate": 1.5774605809128633e-05, "loss": 0.8823, "step": 12737 }, { "epoch": 10.570954356846473, "grad_norm": 35.86225509643555, "learning_rate": 1.577427385892116e-05, "loss": 1.1142, "step": 12738 }, { "epoch": 10.571784232365145, "grad_norm": 31.191869735717773, "learning_rate": 1.5773941908713694e-05, "loss": 1.1022, "step": 12739 }, { "epoch": 10.572614107883817, "grad_norm": 31.07482147216797, "learning_rate": 1.5773609958506226e-05, "loss": 0.8942, "step": 12740 }, { "epoch": 10.57344398340249, "grad_norm": 72.56246948242188, "learning_rate": 1.5773278008298755e-05, "loss": 1.1992, "step": 12741 }, { "epoch": 10.574273858921162, "grad_norm": 45.152862548828125, "learning_rate": 1.5772946058091287e-05, "loss": 1.2452, "step": 12742 }, { "epoch": 10.575103734439834, "grad_norm": 30.796859741210938, "learning_rate": 1.577261410788382e-05, "loss": 0.9911, "step": 12743 }, { "epoch": 10.575933609958506, "grad_norm": 22.991241455078125, "learning_rate": 1.577228215767635e-05, "loss": 1.0394, "step": 12744 }, { "epoch": 10.576763485477178, "grad_norm": 27.506126403808594, "learning_rate": 1.577195020746888e-05, "loss": 1.2877, "step": 12745 }, { "epoch": 10.57759336099585, "grad_norm": 23.665006637573242, "learning_rate": 1.577161825726141e-05, "loss": 1.8523, "step": 12746 }, { "epoch": 10.578423236514523, "grad_norm": 23.610692977905273, "learning_rate": 1.5771286307053944e-05, "loss": 0.6553, "step": 12747 }, { "epoch": 10.579253112033195, "grad_norm": 32.16549301147461, "learning_rate": 1.5770954356846476e-05, "loss": 1.0111, "step": 12748 }, { "epoch": 10.580082987551867, "grad_norm": 51.471099853515625, "learning_rate": 1.5770622406639005e-05, "loss": 1.1838, "step": 12749 }, { "epoch": 10.58091286307054, "grad_norm": 36.78926086425781, "learning_rate": 1.5770290456431537e-05, "loss": 0.8077, "step": 12750 }, { "epoch": 10.581742738589211, "grad_norm": 21.68535804748535, "learning_rate": 1.576995850622407e-05, "loss": 0.5145, "step": 12751 }, { "epoch": 10.582572614107884, "grad_norm": 21.77201271057129, "learning_rate": 1.57696265560166e-05, "loss": 0.7792, "step": 12752 }, { "epoch": 10.583402489626556, "grad_norm": 22.16028594970703, "learning_rate": 1.576929460580913e-05, "loss": 0.6832, "step": 12753 }, { "epoch": 10.584232365145228, "grad_norm": 25.083093643188477, "learning_rate": 1.5768962655601662e-05, "loss": 1.3146, "step": 12754 }, { "epoch": 10.5850622406639, "grad_norm": 25.813791275024414, "learning_rate": 1.5768630705394194e-05, "loss": 1.1084, "step": 12755 }, { "epoch": 10.585892116182572, "grad_norm": 35.12803649902344, "learning_rate": 1.5768298755186723e-05, "loss": 1.268, "step": 12756 }, { "epoch": 10.586721991701245, "grad_norm": 35.81027603149414, "learning_rate": 1.5767966804979255e-05, "loss": 0.7767, "step": 12757 }, { "epoch": 10.587551867219917, "grad_norm": 15.072115898132324, "learning_rate": 1.5767634854771783e-05, "loss": 0.4079, "step": 12758 }, { "epoch": 10.588381742738589, "grad_norm": 51.9240837097168, "learning_rate": 1.5767302904564316e-05, "loss": 0.6658, "step": 12759 }, { "epoch": 10.589211618257261, "grad_norm": 33.76634216308594, "learning_rate": 1.5766970954356848e-05, "loss": 0.8329, "step": 12760 }, { "epoch": 10.590041493775933, "grad_norm": 24.790414810180664, "learning_rate": 1.576663900414938e-05, "loss": 1.2942, "step": 12761 }, { "epoch": 10.590871369294605, "grad_norm": 14.213682174682617, "learning_rate": 1.576630705394191e-05, "loss": 0.4544, "step": 12762 }, { "epoch": 10.591701244813278, "grad_norm": 29.990543365478516, "learning_rate": 1.576597510373444e-05, "loss": 1.3321, "step": 12763 }, { "epoch": 10.59253112033195, "grad_norm": 28.875469207763672, "learning_rate": 1.5765643153526973e-05, "loss": 0.5334, "step": 12764 }, { "epoch": 10.593360995850622, "grad_norm": 37.30323791503906, "learning_rate": 1.5765311203319505e-05, "loss": 1.6965, "step": 12765 }, { "epoch": 10.594190871369294, "grad_norm": 43.38182067871094, "learning_rate": 1.5764979253112034e-05, "loss": 1.1419, "step": 12766 }, { "epoch": 10.595020746887966, "grad_norm": 37.04690170288086, "learning_rate": 1.5764647302904566e-05, "loss": 0.9444, "step": 12767 }, { "epoch": 10.595850622406639, "grad_norm": 25.060178756713867, "learning_rate": 1.5764315352697098e-05, "loss": 1.3902, "step": 12768 }, { "epoch": 10.59668049792531, "grad_norm": 36.50053024291992, "learning_rate": 1.576398340248963e-05, "loss": 1.2116, "step": 12769 }, { "epoch": 10.597510373443983, "grad_norm": 21.146163940429688, "learning_rate": 1.576365145228216e-05, "loss": 0.6053, "step": 12770 }, { "epoch": 10.598340248962655, "grad_norm": 32.33743667602539, "learning_rate": 1.576331950207469e-05, "loss": 1.0438, "step": 12771 }, { "epoch": 10.599170124481327, "grad_norm": 27.98202133178711, "learning_rate": 1.5762987551867223e-05, "loss": 1.5393, "step": 12772 }, { "epoch": 10.6, "grad_norm": 30.955270767211914, "learning_rate": 1.576265560165975e-05, "loss": 1.1351, "step": 12773 }, { "epoch": 10.600829875518672, "grad_norm": 21.73537254333496, "learning_rate": 1.5762323651452284e-05, "loss": 0.7618, "step": 12774 }, { "epoch": 10.601659751037344, "grad_norm": 21.27459144592285, "learning_rate": 1.5761991701244816e-05, "loss": 0.7688, "step": 12775 }, { "epoch": 10.602489626556016, "grad_norm": 15.120573997497559, "learning_rate": 1.5761659751037344e-05, "loss": 0.4356, "step": 12776 }, { "epoch": 10.603319502074688, "grad_norm": 39.7107048034668, "learning_rate": 1.5761327800829877e-05, "loss": 1.4095, "step": 12777 }, { "epoch": 10.60414937759336, "grad_norm": 31.267866134643555, "learning_rate": 1.576099585062241e-05, "loss": 1.3907, "step": 12778 }, { "epoch": 10.604979253112033, "grad_norm": 26.3422794342041, "learning_rate": 1.5760663900414937e-05, "loss": 0.8005, "step": 12779 }, { "epoch": 10.605809128630705, "grad_norm": 45.48666000366211, "learning_rate": 1.576033195020747e-05, "loss": 1.9634, "step": 12780 }, { "epoch": 10.606639004149377, "grad_norm": 23.036853790283203, "learning_rate": 1.576e-05, "loss": 1.0218, "step": 12781 }, { "epoch": 10.60746887966805, "grad_norm": 31.781692504882812, "learning_rate": 1.575966804979253e-05, "loss": 1.2387, "step": 12782 }, { "epoch": 10.608298755186722, "grad_norm": 26.391311645507812, "learning_rate": 1.5759336099585062e-05, "loss": 1.3822, "step": 12783 }, { "epoch": 10.609128630705394, "grad_norm": 20.678203582763672, "learning_rate": 1.5759004149377595e-05, "loss": 0.7644, "step": 12784 }, { "epoch": 10.609958506224066, "grad_norm": 35.20811080932617, "learning_rate": 1.5758672199170127e-05, "loss": 1.7839, "step": 12785 }, { "epoch": 10.610788381742738, "grad_norm": 18.184951782226562, "learning_rate": 1.5758340248962655e-05, "loss": 1.0851, "step": 12786 }, { "epoch": 10.61161825726141, "grad_norm": 27.126976013183594, "learning_rate": 1.5758008298755187e-05, "loss": 0.7472, "step": 12787 }, { "epoch": 10.612448132780083, "grad_norm": 32.47286605834961, "learning_rate": 1.575767634854772e-05, "loss": 0.4765, "step": 12788 }, { "epoch": 10.613278008298755, "grad_norm": 13.906286239624023, "learning_rate": 1.575734439834025e-05, "loss": 0.452, "step": 12789 }, { "epoch": 10.614107883817427, "grad_norm": 19.221574783325195, "learning_rate": 1.5757012448132784e-05, "loss": 0.5801, "step": 12790 }, { "epoch": 10.6149377593361, "grad_norm": 49.021156311035156, "learning_rate": 1.5756680497925312e-05, "loss": 1.9223, "step": 12791 }, { "epoch": 10.615767634854771, "grad_norm": 57.04249954223633, "learning_rate": 1.5756348547717845e-05, "loss": 1.2985, "step": 12792 }, { "epoch": 10.616597510373444, "grad_norm": 23.744752883911133, "learning_rate": 1.5756016597510377e-05, "loss": 1.0383, "step": 12793 }, { "epoch": 10.617427385892116, "grad_norm": 21.744712829589844, "learning_rate": 1.5755684647302905e-05, "loss": 0.4571, "step": 12794 }, { "epoch": 10.618257261410788, "grad_norm": 38.39677429199219, "learning_rate": 1.5755352697095438e-05, "loss": 0.9799, "step": 12795 }, { "epoch": 10.61908713692946, "grad_norm": 45.05598449707031, "learning_rate": 1.5755020746887966e-05, "loss": 0.9802, "step": 12796 }, { "epoch": 10.619917012448132, "grad_norm": 43.98451232910156, "learning_rate": 1.57546887966805e-05, "loss": 0.609, "step": 12797 }, { "epoch": 10.620746887966805, "grad_norm": 26.732046127319336, "learning_rate": 1.575435684647303e-05, "loss": 0.9276, "step": 12798 }, { "epoch": 10.621576763485477, "grad_norm": 26.849010467529297, "learning_rate": 1.575402489626556e-05, "loss": 1.4479, "step": 12799 }, { "epoch": 10.622406639004149, "grad_norm": 24.434738159179688, "learning_rate": 1.575369294605809e-05, "loss": 0.6671, "step": 12800 }, { "epoch": 10.623236514522821, "grad_norm": 25.525434494018555, "learning_rate": 1.5753360995850623e-05, "loss": 0.9886, "step": 12801 }, { "epoch": 10.624066390041493, "grad_norm": 28.420124053955078, "learning_rate": 1.5753029045643156e-05, "loss": 1.0335, "step": 12802 }, { "epoch": 10.624896265560166, "grad_norm": 23.54205322265625, "learning_rate": 1.5752697095435684e-05, "loss": 0.6022, "step": 12803 }, { "epoch": 10.625726141078838, "grad_norm": 32.96625900268555, "learning_rate": 1.5752365145228216e-05, "loss": 1.2478, "step": 12804 }, { "epoch": 10.62655601659751, "grad_norm": 26.576946258544922, "learning_rate": 1.575203319502075e-05, "loss": 1.0612, "step": 12805 }, { "epoch": 10.627385892116182, "grad_norm": 35.437889099121094, "learning_rate": 1.575170124481328e-05, "loss": 0.943, "step": 12806 }, { "epoch": 10.628215767634854, "grad_norm": 60.540802001953125, "learning_rate": 1.575136929460581e-05, "loss": 1.3563, "step": 12807 }, { "epoch": 10.629045643153527, "grad_norm": 24.816160202026367, "learning_rate": 1.575103734439834e-05, "loss": 0.8878, "step": 12808 }, { "epoch": 10.629875518672199, "grad_norm": 35.05427551269531, "learning_rate": 1.5750705394190873e-05, "loss": 0.7005, "step": 12809 }, { "epoch": 10.630705394190871, "grad_norm": 33.93339538574219, "learning_rate": 1.5750373443983406e-05, "loss": 1.2156, "step": 12810 }, { "epoch": 10.631535269709543, "grad_norm": 59.78627395629883, "learning_rate": 1.5750041493775934e-05, "loss": 1.3619, "step": 12811 }, { "epoch": 10.632365145228215, "grad_norm": 24.278587341308594, "learning_rate": 1.5749709543568466e-05, "loss": 1.093, "step": 12812 }, { "epoch": 10.633195020746887, "grad_norm": 15.412342071533203, "learning_rate": 1.5749377593361e-05, "loss": 0.7545, "step": 12813 }, { "epoch": 10.63402489626556, "grad_norm": 26.30113410949707, "learning_rate": 1.5749045643153527e-05, "loss": 1.0841, "step": 12814 }, { "epoch": 10.634854771784232, "grad_norm": 27.325162887573242, "learning_rate": 1.574871369294606e-05, "loss": 0.8097, "step": 12815 }, { "epoch": 10.635684647302904, "grad_norm": 43.67332077026367, "learning_rate": 1.574838174273859e-05, "loss": 1.3732, "step": 12816 }, { "epoch": 10.636514522821576, "grad_norm": 31.17249298095703, "learning_rate": 1.574804979253112e-05, "loss": 1.0984, "step": 12817 }, { "epoch": 10.637344398340248, "grad_norm": 19.65708351135254, "learning_rate": 1.5747717842323652e-05, "loss": 0.8567, "step": 12818 }, { "epoch": 10.63817427385892, "grad_norm": 28.52100372314453, "learning_rate": 1.5747385892116184e-05, "loss": 1.065, "step": 12819 }, { "epoch": 10.639004149377593, "grad_norm": 32.48527526855469, "learning_rate": 1.5747053941908713e-05, "loss": 0.9812, "step": 12820 }, { "epoch": 10.639834024896265, "grad_norm": 31.477252960205078, "learning_rate": 1.5746721991701245e-05, "loss": 1.2502, "step": 12821 }, { "epoch": 10.640663900414937, "grad_norm": 27.321788787841797, "learning_rate": 1.5746390041493777e-05, "loss": 0.7948, "step": 12822 }, { "epoch": 10.64149377593361, "grad_norm": 28.570528030395508, "learning_rate": 1.574605809128631e-05, "loss": 1.2038, "step": 12823 }, { "epoch": 10.642323651452282, "grad_norm": 15.488235473632812, "learning_rate": 1.5745726141078838e-05, "loss": 0.5903, "step": 12824 }, { "epoch": 10.643153526970954, "grad_norm": 27.397537231445312, "learning_rate": 1.574539419087137e-05, "loss": 1.7361, "step": 12825 }, { "epoch": 10.643983402489626, "grad_norm": 23.557815551757812, "learning_rate": 1.5745062240663902e-05, "loss": 1.1026, "step": 12826 }, { "epoch": 10.644813278008298, "grad_norm": 39.37639236450195, "learning_rate": 1.5744730290456434e-05, "loss": 1.0757, "step": 12827 }, { "epoch": 10.64564315352697, "grad_norm": 83.44943237304688, "learning_rate": 1.5744398340248963e-05, "loss": 1.0097, "step": 12828 }, { "epoch": 10.646473029045643, "grad_norm": 48.12215805053711, "learning_rate": 1.5744066390041495e-05, "loss": 0.897, "step": 12829 }, { "epoch": 10.647302904564315, "grad_norm": 35.715843200683594, "learning_rate": 1.5743734439834027e-05, "loss": 1.1356, "step": 12830 }, { "epoch": 10.648132780082987, "grad_norm": 34.73796844482422, "learning_rate": 1.574340248962656e-05, "loss": 0.5917, "step": 12831 }, { "epoch": 10.64896265560166, "grad_norm": 36.5994758605957, "learning_rate": 1.5743070539419088e-05, "loss": 1.5142, "step": 12832 }, { "epoch": 10.649792531120331, "grad_norm": 14.509200096130371, "learning_rate": 1.574273858921162e-05, "loss": 0.5176, "step": 12833 }, { "epoch": 10.650622406639004, "grad_norm": 31.708789825439453, "learning_rate": 1.5742406639004152e-05, "loss": 0.9147, "step": 12834 }, { "epoch": 10.651452282157676, "grad_norm": 54.07421875, "learning_rate": 1.574207468879668e-05, "loss": 1.1148, "step": 12835 }, { "epoch": 10.652282157676348, "grad_norm": 17.600379943847656, "learning_rate": 1.5741742738589213e-05, "loss": 1.089, "step": 12836 }, { "epoch": 10.65311203319502, "grad_norm": 43.009971618652344, "learning_rate": 1.5741410788381742e-05, "loss": 0.7337, "step": 12837 }, { "epoch": 10.653941908713692, "grad_norm": 18.022342681884766, "learning_rate": 1.5741078838174274e-05, "loss": 0.3886, "step": 12838 }, { "epoch": 10.654771784232365, "grad_norm": 23.351909637451172, "learning_rate": 1.5740746887966806e-05, "loss": 1.0126, "step": 12839 }, { "epoch": 10.655601659751037, "grad_norm": 23.324657440185547, "learning_rate": 1.574041493775934e-05, "loss": 0.6035, "step": 12840 }, { "epoch": 10.656431535269709, "grad_norm": 27.866701126098633, "learning_rate": 1.5740082987551867e-05, "loss": 1.1973, "step": 12841 }, { "epoch": 10.657261410788381, "grad_norm": 23.45029067993164, "learning_rate": 1.57397510373444e-05, "loss": 1.4273, "step": 12842 }, { "epoch": 10.658091286307053, "grad_norm": 49.804176330566406, "learning_rate": 1.573941908713693e-05, "loss": 1.4484, "step": 12843 }, { "epoch": 10.658921161825726, "grad_norm": 31.064849853515625, "learning_rate": 1.5739087136929463e-05, "loss": 0.9693, "step": 12844 }, { "epoch": 10.659751037344398, "grad_norm": 62.038265228271484, "learning_rate": 1.5738755186721992e-05, "loss": 0.868, "step": 12845 }, { "epoch": 10.66058091286307, "grad_norm": 28.29058837890625, "learning_rate": 1.5738423236514524e-05, "loss": 0.8505, "step": 12846 }, { "epoch": 10.661410788381742, "grad_norm": 30.65601348876953, "learning_rate": 1.5738091286307056e-05, "loss": 1.3541, "step": 12847 }, { "epoch": 10.662240663900414, "grad_norm": 18.883481979370117, "learning_rate": 1.573775933609959e-05, "loss": 0.8682, "step": 12848 }, { "epoch": 10.663070539419087, "grad_norm": 20.352195739746094, "learning_rate": 1.5737427385892117e-05, "loss": 0.9503, "step": 12849 }, { "epoch": 10.663900414937759, "grad_norm": 35.858306884765625, "learning_rate": 1.573709543568465e-05, "loss": 0.977, "step": 12850 }, { "epoch": 10.664730290456431, "grad_norm": 22.305217742919922, "learning_rate": 1.573676348547718e-05, "loss": 0.5428, "step": 12851 }, { "epoch": 10.665560165975103, "grad_norm": 31.757312774658203, "learning_rate": 1.573643153526971e-05, "loss": 0.9927, "step": 12852 }, { "epoch": 10.666390041493775, "grad_norm": 41.43294143676758, "learning_rate": 1.5736099585062242e-05, "loss": 1.0329, "step": 12853 }, { "epoch": 10.667219917012448, "grad_norm": 21.495006561279297, "learning_rate": 1.5735767634854774e-05, "loss": 1.041, "step": 12854 }, { "epoch": 10.66804979253112, "grad_norm": 28.871795654296875, "learning_rate": 1.5735435684647303e-05, "loss": 0.6834, "step": 12855 }, { "epoch": 10.668879668049792, "grad_norm": 36.25926208496094, "learning_rate": 1.5735103734439835e-05, "loss": 1.4456, "step": 12856 }, { "epoch": 10.669709543568464, "grad_norm": 25.377641677856445, "learning_rate": 1.5734771784232364e-05, "loss": 0.7811, "step": 12857 }, { "epoch": 10.670539419087136, "grad_norm": 22.72406768798828, "learning_rate": 1.5734439834024896e-05, "loss": 1.219, "step": 12858 }, { "epoch": 10.671369294605809, "grad_norm": 30.732942581176758, "learning_rate": 1.5734107883817428e-05, "loss": 0.5564, "step": 12859 }, { "epoch": 10.67219917012448, "grad_norm": 24.022663116455078, "learning_rate": 1.573377593360996e-05, "loss": 1.0341, "step": 12860 }, { "epoch": 10.673029045643153, "grad_norm": 31.27135467529297, "learning_rate": 1.573344398340249e-05, "loss": 1.0966, "step": 12861 }, { "epoch": 10.673858921161825, "grad_norm": 38.22126388549805, "learning_rate": 1.573311203319502e-05, "loss": 1.5232, "step": 12862 }, { "epoch": 10.674688796680497, "grad_norm": 33.85181427001953, "learning_rate": 1.5732780082987553e-05, "loss": 1.366, "step": 12863 }, { "epoch": 10.67551867219917, "grad_norm": 30.18642234802246, "learning_rate": 1.5732448132780085e-05, "loss": 1.5546, "step": 12864 }, { "epoch": 10.676348547717842, "grad_norm": 22.182830810546875, "learning_rate": 1.5732116182572614e-05, "loss": 0.6285, "step": 12865 }, { "epoch": 10.677178423236514, "grad_norm": 27.265182495117188, "learning_rate": 1.5731784232365146e-05, "loss": 1.234, "step": 12866 }, { "epoch": 10.678008298755186, "grad_norm": 24.632598876953125, "learning_rate": 1.5731452282157678e-05, "loss": 0.8707, "step": 12867 }, { "epoch": 10.678838174273858, "grad_norm": 47.36589431762695, "learning_rate": 1.573112033195021e-05, "loss": 1.1168, "step": 12868 }, { "epoch": 10.67966804979253, "grad_norm": 31.560352325439453, "learning_rate": 1.5730788381742742e-05, "loss": 1.5198, "step": 12869 }, { "epoch": 10.680497925311203, "grad_norm": 13.5322265625, "learning_rate": 1.573045643153527e-05, "loss": 0.5181, "step": 12870 }, { "epoch": 10.681327800829875, "grad_norm": 20.29183006286621, "learning_rate": 1.5730124481327803e-05, "loss": 0.7928, "step": 12871 }, { "epoch": 10.682157676348547, "grad_norm": 50.50200653076172, "learning_rate": 1.5729792531120335e-05, "loss": 1.0855, "step": 12872 }, { "epoch": 10.68298755186722, "grad_norm": 23.144489288330078, "learning_rate": 1.5729460580912864e-05, "loss": 0.5642, "step": 12873 }, { "epoch": 10.683817427385891, "grad_norm": 31.661176681518555, "learning_rate": 1.5729128630705396e-05, "loss": 0.7081, "step": 12874 }, { "epoch": 10.684647302904564, "grad_norm": 29.878459930419922, "learning_rate": 1.5728796680497925e-05, "loss": 0.8403, "step": 12875 }, { "epoch": 10.685477178423236, "grad_norm": 18.067073822021484, "learning_rate": 1.5728464730290457e-05, "loss": 0.6501, "step": 12876 }, { "epoch": 10.686307053941908, "grad_norm": 28.830339431762695, "learning_rate": 1.572813278008299e-05, "loss": 1.7378, "step": 12877 }, { "epoch": 10.68713692946058, "grad_norm": 121.08198547363281, "learning_rate": 1.5727800829875518e-05, "loss": 1.2811, "step": 12878 }, { "epoch": 10.687966804979252, "grad_norm": 26.547767639160156, "learning_rate": 1.572746887966805e-05, "loss": 1.0731, "step": 12879 }, { "epoch": 10.688796680497925, "grad_norm": 54.61368942260742, "learning_rate": 1.5727136929460582e-05, "loss": 0.9983, "step": 12880 }, { "epoch": 10.689626556016597, "grad_norm": 35.31550216674805, "learning_rate": 1.5726804979253114e-05, "loss": 1.1339, "step": 12881 }, { "epoch": 10.690456431535269, "grad_norm": 47.54045486450195, "learning_rate": 1.5726473029045643e-05, "loss": 0.7019, "step": 12882 }, { "epoch": 10.691286307053941, "grad_norm": 40.48414993286133, "learning_rate": 1.5726141078838175e-05, "loss": 1.5161, "step": 12883 }, { "epoch": 10.692116182572613, "grad_norm": 25.75868034362793, "learning_rate": 1.5725809128630707e-05, "loss": 0.981, "step": 12884 }, { "epoch": 10.692946058091286, "grad_norm": 51.31242370605469, "learning_rate": 1.572547717842324e-05, "loss": 1.7337, "step": 12885 }, { "epoch": 10.693775933609958, "grad_norm": 35.75935363769531, "learning_rate": 1.5725145228215768e-05, "loss": 0.8758, "step": 12886 }, { "epoch": 10.69460580912863, "grad_norm": 31.111024856567383, "learning_rate": 1.57248132780083e-05, "loss": 1.6336, "step": 12887 }, { "epoch": 10.695435684647302, "grad_norm": 37.674652099609375, "learning_rate": 1.5724481327800832e-05, "loss": 0.9238, "step": 12888 }, { "epoch": 10.696265560165974, "grad_norm": 29.632591247558594, "learning_rate": 1.5724149377593364e-05, "loss": 1.0111, "step": 12889 }, { "epoch": 10.697095435684647, "grad_norm": 66.26597595214844, "learning_rate": 1.5723817427385893e-05, "loss": 0.6386, "step": 12890 }, { "epoch": 10.697925311203319, "grad_norm": 51.2489128112793, "learning_rate": 1.5723485477178425e-05, "loss": 0.6247, "step": 12891 }, { "epoch": 10.698755186721991, "grad_norm": 21.673145294189453, "learning_rate": 1.5723153526970957e-05, "loss": 0.9864, "step": 12892 }, { "epoch": 10.699585062240663, "grad_norm": 22.57362174987793, "learning_rate": 1.5722821576763486e-05, "loss": 1.0943, "step": 12893 }, { "epoch": 10.700414937759335, "grad_norm": 37.334083557128906, "learning_rate": 1.5722489626556018e-05, "loss": 1.5359, "step": 12894 }, { "epoch": 10.701244813278008, "grad_norm": 58.73371124267578, "learning_rate": 1.572215767634855e-05, "loss": 0.7513, "step": 12895 }, { "epoch": 10.70207468879668, "grad_norm": 29.28864097595215, "learning_rate": 1.572182572614108e-05, "loss": 1.1839, "step": 12896 }, { "epoch": 10.702904564315352, "grad_norm": 21.835773468017578, "learning_rate": 1.572149377593361e-05, "loss": 0.6629, "step": 12897 }, { "epoch": 10.703734439834024, "grad_norm": 34.73855972290039, "learning_rate": 1.5721161825726143e-05, "loss": 1.0726, "step": 12898 }, { "epoch": 10.704564315352696, "grad_norm": 20.71234703063965, "learning_rate": 1.572082987551867e-05, "loss": 0.7186, "step": 12899 }, { "epoch": 10.705394190871369, "grad_norm": 48.74393844604492, "learning_rate": 1.5720497925311204e-05, "loss": 1.6802, "step": 12900 }, { "epoch": 10.70622406639004, "grad_norm": 36.329261779785156, "learning_rate": 1.5720165975103736e-05, "loss": 0.9948, "step": 12901 }, { "epoch": 10.707053941908713, "grad_norm": 29.82072639465332, "learning_rate": 1.5719834024896268e-05, "loss": 1.0076, "step": 12902 }, { "epoch": 10.707883817427385, "grad_norm": 25.782073974609375, "learning_rate": 1.5719502074688797e-05, "loss": 1.4295, "step": 12903 }, { "epoch": 10.708713692946057, "grad_norm": 22.610694885253906, "learning_rate": 1.571917012448133e-05, "loss": 0.7453, "step": 12904 }, { "epoch": 10.70954356846473, "grad_norm": 22.373615264892578, "learning_rate": 1.571883817427386e-05, "loss": 0.7586, "step": 12905 }, { "epoch": 10.710373443983402, "grad_norm": 38.449588775634766, "learning_rate": 1.5718506224066393e-05, "loss": 1.3451, "step": 12906 }, { "epoch": 10.711203319502074, "grad_norm": 27.349111557006836, "learning_rate": 1.5718174273858922e-05, "loss": 0.9296, "step": 12907 }, { "epoch": 10.712033195020746, "grad_norm": 19.417160034179688, "learning_rate": 1.5717842323651454e-05, "loss": 0.6303, "step": 12908 }, { "epoch": 10.712863070539418, "grad_norm": 37.61640167236328, "learning_rate": 1.5717510373443986e-05, "loss": 1.1159, "step": 12909 }, { "epoch": 10.71369294605809, "grad_norm": 34.41372299194336, "learning_rate": 1.5717178423236518e-05, "loss": 1.33, "step": 12910 }, { "epoch": 10.714522821576763, "grad_norm": 18.500715255737305, "learning_rate": 1.5716846473029047e-05, "loss": 0.9613, "step": 12911 }, { "epoch": 10.715352697095435, "grad_norm": 32.889949798583984, "learning_rate": 1.571651452282158e-05, "loss": 1.2678, "step": 12912 }, { "epoch": 10.716182572614107, "grad_norm": 24.57909393310547, "learning_rate": 1.5716182572614108e-05, "loss": 1.0936, "step": 12913 }, { "epoch": 10.71701244813278, "grad_norm": 24.783679962158203, "learning_rate": 1.571585062240664e-05, "loss": 0.9115, "step": 12914 }, { "epoch": 10.717842323651452, "grad_norm": 28.11805534362793, "learning_rate": 1.5715518672199172e-05, "loss": 1.0332, "step": 12915 }, { "epoch": 10.718672199170124, "grad_norm": 20.826173782348633, "learning_rate": 1.57151867219917e-05, "loss": 0.8999, "step": 12916 }, { "epoch": 10.719502074688796, "grad_norm": 24.004676818847656, "learning_rate": 1.5714854771784233e-05, "loss": 0.6561, "step": 12917 }, { "epoch": 10.720331950207468, "grad_norm": 33.697269439697266, "learning_rate": 1.5714522821576765e-05, "loss": 0.4754, "step": 12918 }, { "epoch": 10.72116182572614, "grad_norm": 28.372804641723633, "learning_rate": 1.5714190871369297e-05, "loss": 1.2538, "step": 12919 }, { "epoch": 10.721991701244812, "grad_norm": 50.272377014160156, "learning_rate": 1.5713858921161826e-05, "loss": 1.1056, "step": 12920 }, { "epoch": 10.722821576763485, "grad_norm": 51.756961822509766, "learning_rate": 1.5713526970954358e-05, "loss": 1.2729, "step": 12921 }, { "epoch": 10.723651452282157, "grad_norm": 38.71610641479492, "learning_rate": 1.571319502074689e-05, "loss": 0.9005, "step": 12922 }, { "epoch": 10.724481327800829, "grad_norm": 33.01743698120117, "learning_rate": 1.5712863070539422e-05, "loss": 1.6084, "step": 12923 }, { "epoch": 10.725311203319501, "grad_norm": 37.03828048706055, "learning_rate": 1.571253112033195e-05, "loss": 1.1971, "step": 12924 }, { "epoch": 10.726141078838173, "grad_norm": 21.785585403442383, "learning_rate": 1.5712199170124483e-05, "loss": 1.2905, "step": 12925 }, { "epoch": 10.726970954356846, "grad_norm": 30.265422821044922, "learning_rate": 1.5711867219917015e-05, "loss": 1.1772, "step": 12926 }, { "epoch": 10.727800829875518, "grad_norm": 19.36847496032715, "learning_rate": 1.5711535269709547e-05, "loss": 0.8162, "step": 12927 }, { "epoch": 10.72863070539419, "grad_norm": 23.243181228637695, "learning_rate": 1.5711203319502076e-05, "loss": 1.111, "step": 12928 }, { "epoch": 10.729460580912862, "grad_norm": 36.34516525268555, "learning_rate": 1.5710871369294608e-05, "loss": 1.0631, "step": 12929 }, { "epoch": 10.730290456431534, "grad_norm": 36.43130874633789, "learning_rate": 1.571053941908714e-05, "loss": 1.3541, "step": 12930 }, { "epoch": 10.731120331950207, "grad_norm": 22.788986206054688, "learning_rate": 1.571020746887967e-05, "loss": 1.2345, "step": 12931 }, { "epoch": 10.731950207468879, "grad_norm": 27.794260025024414, "learning_rate": 1.57098755186722e-05, "loss": 0.5602, "step": 12932 }, { "epoch": 10.732780082987551, "grad_norm": 26.056133270263672, "learning_rate": 1.5709543568464733e-05, "loss": 1.1811, "step": 12933 }, { "epoch": 10.733609958506223, "grad_norm": 19.847267150878906, "learning_rate": 1.570921161825726e-05, "loss": 0.6659, "step": 12934 }, { "epoch": 10.734439834024897, "grad_norm": 35.67465591430664, "learning_rate": 1.5708879668049794e-05, "loss": 1.3055, "step": 12935 }, { "epoch": 10.73526970954357, "grad_norm": 31.26683807373047, "learning_rate": 1.5708547717842322e-05, "loss": 1.2167, "step": 12936 }, { "epoch": 10.736099585062242, "grad_norm": 21.3089599609375, "learning_rate": 1.5708215767634855e-05, "loss": 0.6865, "step": 12937 }, { "epoch": 10.736929460580914, "grad_norm": 22.075244903564453, "learning_rate": 1.5707883817427387e-05, "loss": 0.6537, "step": 12938 }, { "epoch": 10.737759336099586, "grad_norm": 24.653242111206055, "learning_rate": 1.570755186721992e-05, "loss": 0.8854, "step": 12939 }, { "epoch": 10.738589211618258, "grad_norm": 23.532054901123047, "learning_rate": 1.5707219917012447e-05, "loss": 1.2515, "step": 12940 }, { "epoch": 10.73941908713693, "grad_norm": 41.81641387939453, "learning_rate": 1.570688796680498e-05, "loss": 0.7349, "step": 12941 }, { "epoch": 10.740248962655603, "grad_norm": 21.889328002929688, "learning_rate": 1.570655601659751e-05, "loss": 0.6617, "step": 12942 }, { "epoch": 10.741078838174275, "grad_norm": 26.284481048583984, "learning_rate": 1.5706224066390044e-05, "loss": 0.8049, "step": 12943 }, { "epoch": 10.741908713692947, "grad_norm": 42.41669464111328, "learning_rate": 1.5705892116182572e-05, "loss": 1.1887, "step": 12944 }, { "epoch": 10.74273858921162, "grad_norm": 39.912349700927734, "learning_rate": 1.5705560165975105e-05, "loss": 1.224, "step": 12945 }, { "epoch": 10.743568464730291, "grad_norm": 24.625490188598633, "learning_rate": 1.5705228215767637e-05, "loss": 0.9636, "step": 12946 }, { "epoch": 10.744398340248964, "grad_norm": 30.88641929626465, "learning_rate": 1.570489626556017e-05, "loss": 0.677, "step": 12947 }, { "epoch": 10.745228215767636, "grad_norm": 39.924041748046875, "learning_rate": 1.57045643153527e-05, "loss": 1.8236, "step": 12948 }, { "epoch": 10.746058091286308, "grad_norm": 30.886629104614258, "learning_rate": 1.570423236514523e-05, "loss": 1.0697, "step": 12949 }, { "epoch": 10.74688796680498, "grad_norm": 35.62948989868164, "learning_rate": 1.5703900414937762e-05, "loss": 1.0463, "step": 12950 }, { "epoch": 10.747717842323652, "grad_norm": 34.205936431884766, "learning_rate": 1.5703568464730294e-05, "loss": 1.7766, "step": 12951 }, { "epoch": 10.748547717842325, "grad_norm": 56.982513427734375, "learning_rate": 1.5703236514522823e-05, "loss": 1.2046, "step": 12952 }, { "epoch": 10.749377593360997, "grad_norm": 25.39275360107422, "learning_rate": 1.5702904564315355e-05, "loss": 1.1883, "step": 12953 }, { "epoch": 10.750207468879669, "grad_norm": 47.91823959350586, "learning_rate": 1.5702572614107883e-05, "loss": 1.0037, "step": 12954 }, { "epoch": 10.751037344398341, "grad_norm": 22.148752212524414, "learning_rate": 1.5702240663900416e-05, "loss": 0.6372, "step": 12955 }, { "epoch": 10.751867219917013, "grad_norm": 37.05037307739258, "learning_rate": 1.5701908713692948e-05, "loss": 2.0563, "step": 12956 }, { "epoch": 10.752697095435686, "grad_norm": 29.866748809814453, "learning_rate": 1.5701576763485476e-05, "loss": 0.9674, "step": 12957 }, { "epoch": 10.753526970954358, "grad_norm": 21.96530532836914, "learning_rate": 1.570124481327801e-05, "loss": 0.6539, "step": 12958 }, { "epoch": 10.75435684647303, "grad_norm": 21.96457290649414, "learning_rate": 1.570091286307054e-05, "loss": 0.7435, "step": 12959 }, { "epoch": 10.755186721991702, "grad_norm": 19.862340927124023, "learning_rate": 1.5700580912863073e-05, "loss": 0.4932, "step": 12960 }, { "epoch": 10.756016597510374, "grad_norm": 25.509883880615234, "learning_rate": 1.57002489626556e-05, "loss": 1.045, "step": 12961 }, { "epoch": 10.756846473029047, "grad_norm": 28.792341232299805, "learning_rate": 1.5699917012448133e-05, "loss": 1.0011, "step": 12962 }, { "epoch": 10.757676348547719, "grad_norm": 21.459470748901367, "learning_rate": 1.5699585062240666e-05, "loss": 0.68, "step": 12963 }, { "epoch": 10.758506224066391, "grad_norm": 47.639732360839844, "learning_rate": 1.5699253112033198e-05, "loss": 1.3354, "step": 12964 }, { "epoch": 10.759336099585063, "grad_norm": 21.491779327392578, "learning_rate": 1.5698921161825726e-05, "loss": 0.9864, "step": 12965 }, { "epoch": 10.760165975103735, "grad_norm": 19.06121253967285, "learning_rate": 1.569858921161826e-05, "loss": 0.5637, "step": 12966 }, { "epoch": 10.760995850622407, "grad_norm": 31.956754684448242, "learning_rate": 1.569825726141079e-05, "loss": 1.2886, "step": 12967 }, { "epoch": 10.76182572614108, "grad_norm": 16.072290420532227, "learning_rate": 1.5697925311203323e-05, "loss": 0.7671, "step": 12968 }, { "epoch": 10.762655601659752, "grad_norm": 27.48858070373535, "learning_rate": 1.569759336099585e-05, "loss": 0.9636, "step": 12969 }, { "epoch": 10.763485477178424, "grad_norm": 26.514633178710938, "learning_rate": 1.5697261410788384e-05, "loss": 0.9993, "step": 12970 }, { "epoch": 10.764315352697096, "grad_norm": 64.24851989746094, "learning_rate": 1.5696929460580916e-05, "loss": 0.6887, "step": 12971 }, { "epoch": 10.765145228215768, "grad_norm": 25.942520141601562, "learning_rate": 1.5696597510373444e-05, "loss": 1.0587, "step": 12972 }, { "epoch": 10.76597510373444, "grad_norm": 20.175336837768555, "learning_rate": 1.5696265560165977e-05, "loss": 0.7338, "step": 12973 }, { "epoch": 10.766804979253113, "grad_norm": 33.5572624206543, "learning_rate": 1.5695933609958505e-05, "loss": 0.8632, "step": 12974 }, { "epoch": 10.767634854771785, "grad_norm": 32.12742614746094, "learning_rate": 1.5695601659751037e-05, "loss": 1.4927, "step": 12975 }, { "epoch": 10.768464730290457, "grad_norm": 29.904508590698242, "learning_rate": 1.569526970954357e-05, "loss": 1.1336, "step": 12976 }, { "epoch": 10.76929460580913, "grad_norm": 24.670581817626953, "learning_rate": 1.56949377593361e-05, "loss": 1.2232, "step": 12977 }, { "epoch": 10.770124481327802, "grad_norm": 25.034547805786133, "learning_rate": 1.569460580912863e-05, "loss": 0.7258, "step": 12978 }, { "epoch": 10.770954356846474, "grad_norm": 32.34086990356445, "learning_rate": 1.5694273858921162e-05, "loss": 1.2476, "step": 12979 }, { "epoch": 10.771784232365146, "grad_norm": 35.322593688964844, "learning_rate": 1.5693941908713694e-05, "loss": 1.2297, "step": 12980 }, { "epoch": 10.772614107883818, "grad_norm": 46.0033073425293, "learning_rate": 1.5693609958506227e-05, "loss": 0.636, "step": 12981 }, { "epoch": 10.77344398340249, "grad_norm": 23.544889450073242, "learning_rate": 1.5693278008298755e-05, "loss": 0.891, "step": 12982 }, { "epoch": 10.774273858921163, "grad_norm": 22.74131202697754, "learning_rate": 1.5692946058091287e-05, "loss": 0.7813, "step": 12983 }, { "epoch": 10.775103734439835, "grad_norm": 21.378311157226562, "learning_rate": 1.569261410788382e-05, "loss": 0.7612, "step": 12984 }, { "epoch": 10.775933609958507, "grad_norm": 30.14836311340332, "learning_rate": 1.569228215767635e-05, "loss": 0.7981, "step": 12985 }, { "epoch": 10.77676348547718, "grad_norm": 48.26728820800781, "learning_rate": 1.569195020746888e-05, "loss": 1.5219, "step": 12986 }, { "epoch": 10.777593360995851, "grad_norm": 24.20222282409668, "learning_rate": 1.5691618257261412e-05, "loss": 0.7768, "step": 12987 }, { "epoch": 10.778423236514524, "grad_norm": 17.907812118530273, "learning_rate": 1.5691286307053945e-05, "loss": 0.7164, "step": 12988 }, { "epoch": 10.779253112033196, "grad_norm": 41.342288970947266, "learning_rate": 1.5690954356846477e-05, "loss": 1.2502, "step": 12989 }, { "epoch": 10.780082987551868, "grad_norm": 27.813209533691406, "learning_rate": 1.5690622406639005e-05, "loss": 0.6617, "step": 12990 }, { "epoch": 10.78091286307054, "grad_norm": 25.46149444580078, "learning_rate": 1.5690290456431537e-05, "loss": 1.1057, "step": 12991 }, { "epoch": 10.781742738589212, "grad_norm": 46.66425323486328, "learning_rate": 1.5689958506224066e-05, "loss": 1.3906, "step": 12992 }, { "epoch": 10.782572614107885, "grad_norm": 46.23161697387695, "learning_rate": 1.56896265560166e-05, "loss": 1.7222, "step": 12993 }, { "epoch": 10.783402489626557, "grad_norm": 60.73182678222656, "learning_rate": 1.568929460580913e-05, "loss": 0.6623, "step": 12994 }, { "epoch": 10.784232365145229, "grad_norm": 33.49846267700195, "learning_rate": 1.568896265560166e-05, "loss": 0.8721, "step": 12995 }, { "epoch": 10.785062240663901, "grad_norm": 31.44457244873047, "learning_rate": 1.568863070539419e-05, "loss": 0.7507, "step": 12996 }, { "epoch": 10.785892116182573, "grad_norm": 21.379018783569336, "learning_rate": 1.5688298755186723e-05, "loss": 0.797, "step": 12997 }, { "epoch": 10.786721991701246, "grad_norm": 115.8975601196289, "learning_rate": 1.5687966804979255e-05, "loss": 0.8778, "step": 12998 }, { "epoch": 10.787551867219918, "grad_norm": 22.60971450805664, "learning_rate": 1.5687634854771784e-05, "loss": 1.1499, "step": 12999 }, { "epoch": 10.78838174273859, "grad_norm": 19.038183212280273, "learning_rate": 1.5687302904564316e-05, "loss": 1.0269, "step": 13000 }, { "epoch": 10.789211618257262, "grad_norm": 53.81783676147461, "learning_rate": 1.568697095435685e-05, "loss": 1.2094, "step": 13001 }, { "epoch": 10.790041493775934, "grad_norm": 22.959880828857422, "learning_rate": 1.568663900414938e-05, "loss": 0.8231, "step": 13002 }, { "epoch": 10.790871369294607, "grad_norm": 44.79844284057617, "learning_rate": 1.568630705394191e-05, "loss": 1.3537, "step": 13003 }, { "epoch": 10.791701244813279, "grad_norm": 44.567779541015625, "learning_rate": 1.568597510373444e-05, "loss": 1.4723, "step": 13004 }, { "epoch": 10.792531120331951, "grad_norm": 29.890514373779297, "learning_rate": 1.5685643153526973e-05, "loss": 0.9373, "step": 13005 }, { "epoch": 10.793360995850623, "grad_norm": 27.184179306030273, "learning_rate": 1.5685311203319506e-05, "loss": 1.05, "step": 13006 }, { "epoch": 10.794190871369295, "grad_norm": 34.19107437133789, "learning_rate": 1.5684979253112034e-05, "loss": 1.2832, "step": 13007 }, { "epoch": 10.795020746887968, "grad_norm": 21.559675216674805, "learning_rate": 1.5684647302904566e-05, "loss": 0.6894, "step": 13008 }, { "epoch": 10.79585062240664, "grad_norm": 27.341339111328125, "learning_rate": 1.56843153526971e-05, "loss": 0.9406, "step": 13009 }, { "epoch": 10.796680497925312, "grad_norm": 33.39335250854492, "learning_rate": 1.5683983402489627e-05, "loss": 1.2239, "step": 13010 }, { "epoch": 10.797510373443984, "grad_norm": 32.33154296875, "learning_rate": 1.568365145228216e-05, "loss": 0.889, "step": 13011 }, { "epoch": 10.798340248962656, "grad_norm": 24.132946014404297, "learning_rate": 1.568331950207469e-05, "loss": 0.8841, "step": 13012 }, { "epoch": 10.799170124481329, "grad_norm": 24.634384155273438, "learning_rate": 1.568298755186722e-05, "loss": 0.6454, "step": 13013 }, { "epoch": 10.8, "grad_norm": 22.527450561523438, "learning_rate": 1.5682655601659752e-05, "loss": 0.6378, "step": 13014 }, { "epoch": 10.800829875518673, "grad_norm": 28.44883155822754, "learning_rate": 1.568232365145228e-05, "loss": 1.1704, "step": 13015 }, { "epoch": 10.801659751037345, "grad_norm": 21.548917770385742, "learning_rate": 1.5681991701244813e-05, "loss": 0.4943, "step": 13016 }, { "epoch": 10.802489626556017, "grad_norm": 33.642696380615234, "learning_rate": 1.5681659751037345e-05, "loss": 0.5554, "step": 13017 }, { "epoch": 10.80331950207469, "grad_norm": 26.168140411376953, "learning_rate": 1.5681327800829877e-05, "loss": 0.8168, "step": 13018 }, { "epoch": 10.804149377593362, "grad_norm": 33.40690231323242, "learning_rate": 1.5680995850622406e-05, "loss": 0.8781, "step": 13019 }, { "epoch": 10.804979253112034, "grad_norm": 65.94953918457031, "learning_rate": 1.5680663900414938e-05, "loss": 1.171, "step": 13020 }, { "epoch": 10.805809128630706, "grad_norm": 27.43096351623535, "learning_rate": 1.568033195020747e-05, "loss": 0.4682, "step": 13021 }, { "epoch": 10.806639004149378, "grad_norm": 31.043800354003906, "learning_rate": 1.5680000000000002e-05, "loss": 0.8972, "step": 13022 }, { "epoch": 10.80746887966805, "grad_norm": 37.025115966796875, "learning_rate": 1.567966804979253e-05, "loss": 0.6107, "step": 13023 }, { "epoch": 10.808298755186723, "grad_norm": 31.30979347229004, "learning_rate": 1.5679336099585063e-05, "loss": 0.8006, "step": 13024 }, { "epoch": 10.809128630705395, "grad_norm": 33.95930480957031, "learning_rate": 1.5679004149377595e-05, "loss": 1.4682, "step": 13025 }, { "epoch": 10.809958506224067, "grad_norm": 46.10725021362305, "learning_rate": 1.5678672199170127e-05, "loss": 1.6662, "step": 13026 }, { "epoch": 10.81078838174274, "grad_norm": 34.136287689208984, "learning_rate": 1.567834024896266e-05, "loss": 1.3823, "step": 13027 }, { "epoch": 10.811618257261411, "grad_norm": 16.471952438354492, "learning_rate": 1.5678008298755188e-05, "loss": 1.1889, "step": 13028 }, { "epoch": 10.812448132780084, "grad_norm": 23.736766815185547, "learning_rate": 1.567767634854772e-05, "loss": 0.653, "step": 13029 }, { "epoch": 10.813278008298756, "grad_norm": 26.58735466003418, "learning_rate": 1.567734439834025e-05, "loss": 0.5462, "step": 13030 }, { "epoch": 10.814107883817428, "grad_norm": 35.162940979003906, "learning_rate": 1.567701244813278e-05, "loss": 0.7653, "step": 13031 }, { "epoch": 10.8149377593361, "grad_norm": 33.49311447143555, "learning_rate": 1.5676680497925313e-05, "loss": 1.3036, "step": 13032 }, { "epoch": 10.815767634854772, "grad_norm": 16.97074317932129, "learning_rate": 1.5676348547717842e-05, "loss": 0.8416, "step": 13033 }, { "epoch": 10.816597510373445, "grad_norm": 43.226524353027344, "learning_rate": 1.5676016597510374e-05, "loss": 1.315, "step": 13034 }, { "epoch": 10.817427385892117, "grad_norm": 40.26278305053711, "learning_rate": 1.5675684647302906e-05, "loss": 1.5431, "step": 13035 }, { "epoch": 10.818257261410789, "grad_norm": 28.16303825378418, "learning_rate": 1.5675352697095435e-05, "loss": 1.0194, "step": 13036 }, { "epoch": 10.819087136929461, "grad_norm": 31.02773094177246, "learning_rate": 1.5675020746887967e-05, "loss": 1.2525, "step": 13037 }, { "epoch": 10.819917012448133, "grad_norm": 25.177322387695312, "learning_rate": 1.56746887966805e-05, "loss": 1.0212, "step": 13038 }, { "epoch": 10.820746887966806, "grad_norm": 52.84494400024414, "learning_rate": 1.567435684647303e-05, "loss": 0.7724, "step": 13039 }, { "epoch": 10.821576763485478, "grad_norm": 26.54474639892578, "learning_rate": 1.567402489626556e-05, "loss": 0.8988, "step": 13040 }, { "epoch": 10.82240663900415, "grad_norm": 25.332843780517578, "learning_rate": 1.5673692946058092e-05, "loss": 1.0978, "step": 13041 }, { "epoch": 10.823236514522822, "grad_norm": 37.49795913696289, "learning_rate": 1.5673360995850624e-05, "loss": 0.8291, "step": 13042 }, { "epoch": 10.824066390041494, "grad_norm": 13.677711486816406, "learning_rate": 1.5673029045643156e-05, "loss": 0.4363, "step": 13043 }, { "epoch": 10.824896265560167, "grad_norm": 38.485740661621094, "learning_rate": 1.5672697095435685e-05, "loss": 1.6069, "step": 13044 }, { "epoch": 10.825726141078839, "grad_norm": 25.177431106567383, "learning_rate": 1.5672365145228217e-05, "loss": 0.718, "step": 13045 }, { "epoch": 10.826556016597511, "grad_norm": 68.63544464111328, "learning_rate": 1.567203319502075e-05, "loss": 1.3011, "step": 13046 }, { "epoch": 10.827385892116183, "grad_norm": 25.20284080505371, "learning_rate": 1.567170124481328e-05, "loss": 0.9999, "step": 13047 }, { "epoch": 10.828215767634855, "grad_norm": 24.58271598815918, "learning_rate": 1.567136929460581e-05, "loss": 0.9818, "step": 13048 }, { "epoch": 10.829045643153528, "grad_norm": 28.192991256713867, "learning_rate": 1.5671037344398342e-05, "loss": 1.0875, "step": 13049 }, { "epoch": 10.8298755186722, "grad_norm": 42.585086822509766, "learning_rate": 1.5670705394190874e-05, "loss": 0.4093, "step": 13050 }, { "epoch": 10.830705394190872, "grad_norm": 21.534948348999023, "learning_rate": 1.5670373443983403e-05, "loss": 0.9724, "step": 13051 }, { "epoch": 10.831535269709544, "grad_norm": 20.72092628479004, "learning_rate": 1.5670041493775935e-05, "loss": 1.155, "step": 13052 }, { "epoch": 10.832365145228216, "grad_norm": 26.64068031311035, "learning_rate": 1.5669709543568464e-05, "loss": 0.9791, "step": 13053 }, { "epoch": 10.833195020746889, "grad_norm": 25.67548942565918, "learning_rate": 1.5669377593360996e-05, "loss": 0.5542, "step": 13054 }, { "epoch": 10.83402489626556, "grad_norm": 30.422883987426758, "learning_rate": 1.5669045643153528e-05, "loss": 0.6876, "step": 13055 }, { "epoch": 10.834854771784233, "grad_norm": 33.10129928588867, "learning_rate": 1.566871369294606e-05, "loss": 0.9733, "step": 13056 }, { "epoch": 10.835684647302905, "grad_norm": 18.445903778076172, "learning_rate": 1.566838174273859e-05, "loss": 0.7799, "step": 13057 }, { "epoch": 10.836514522821577, "grad_norm": 32.63671875, "learning_rate": 1.566804979253112e-05, "loss": 1.2587, "step": 13058 }, { "epoch": 10.83734439834025, "grad_norm": 21.01382827758789, "learning_rate": 1.5667717842323653e-05, "loss": 0.8263, "step": 13059 }, { "epoch": 10.838174273858922, "grad_norm": 17.98056411743164, "learning_rate": 1.5667385892116185e-05, "loss": 0.8312, "step": 13060 }, { "epoch": 10.839004149377594, "grad_norm": 38.94369888305664, "learning_rate": 1.5667053941908714e-05, "loss": 1.3009, "step": 13061 }, { "epoch": 10.839834024896266, "grad_norm": 30.353660583496094, "learning_rate": 1.5666721991701246e-05, "loss": 1.2755, "step": 13062 }, { "epoch": 10.840663900414938, "grad_norm": 31.484590530395508, "learning_rate": 1.5666390041493778e-05, "loss": 1.3253, "step": 13063 }, { "epoch": 10.84149377593361, "grad_norm": 22.213571548461914, "learning_rate": 1.566605809128631e-05, "loss": 0.6797, "step": 13064 }, { "epoch": 10.842323651452283, "grad_norm": 36.259056091308594, "learning_rate": 1.566572614107884e-05, "loss": 1.5738, "step": 13065 }, { "epoch": 10.843153526970955, "grad_norm": 29.474271774291992, "learning_rate": 1.566539419087137e-05, "loss": 0.6322, "step": 13066 }, { "epoch": 10.843983402489627, "grad_norm": 26.745901107788086, "learning_rate": 1.5665062240663903e-05, "loss": 0.8169, "step": 13067 }, { "epoch": 10.8448132780083, "grad_norm": 17.595443725585938, "learning_rate": 1.5664730290456435e-05, "loss": 0.7559, "step": 13068 }, { "epoch": 10.845643153526972, "grad_norm": NaN, "learning_rate": 1.5664730290456435e-05, "loss": 1.6168, "step": 13069 }, { "epoch": 10.846473029045644, "grad_norm": 27.441539764404297, "learning_rate": 1.5664398340248964e-05, "loss": 1.0835, "step": 13070 }, { "epoch": 10.847302904564316, "grad_norm": 21.737638473510742, "learning_rate": 1.5664066390041496e-05, "loss": 0.8864, "step": 13071 }, { "epoch": 10.848132780082988, "grad_norm": 25.274688720703125, "learning_rate": 1.5663734439834025e-05, "loss": 0.9413, "step": 13072 }, { "epoch": 10.84896265560166, "grad_norm": 17.3131160736084, "learning_rate": 1.5663402489626557e-05, "loss": 0.7655, "step": 13073 }, { "epoch": 10.849792531120332, "grad_norm": 50.30132293701172, "learning_rate": 1.566307053941909e-05, "loss": 1.9433, "step": 13074 }, { "epoch": 10.850622406639005, "grad_norm": 19.71530532836914, "learning_rate": 1.5662738589211618e-05, "loss": 1.1012, "step": 13075 }, { "epoch": 10.851452282157677, "grad_norm": 33.94110870361328, "learning_rate": 1.566240663900415e-05, "loss": 0.663, "step": 13076 }, { "epoch": 10.852282157676349, "grad_norm": 20.663366317749023, "learning_rate": 1.5662074688796682e-05, "loss": 0.8129, "step": 13077 }, { "epoch": 10.853112033195021, "grad_norm": 47.031803131103516, "learning_rate": 1.566174273858921e-05, "loss": 1.4784, "step": 13078 }, { "epoch": 10.853941908713693, "grad_norm": 25.774627685546875, "learning_rate": 1.5661410788381743e-05, "loss": 0.7865, "step": 13079 }, { "epoch": 10.854771784232366, "grad_norm": 29.145540237426758, "learning_rate": 1.5661078838174275e-05, "loss": 1.0707, "step": 13080 }, { "epoch": 10.855601659751038, "grad_norm": 35.89156723022461, "learning_rate": 1.5660746887966807e-05, "loss": 0.81, "step": 13081 }, { "epoch": 10.85643153526971, "grad_norm": 132.89146423339844, "learning_rate": 1.566041493775934e-05, "loss": 0.7005, "step": 13082 }, { "epoch": 10.857261410788382, "grad_norm": 27.05105209350586, "learning_rate": 1.5660082987551868e-05, "loss": 0.9241, "step": 13083 }, { "epoch": 10.858091286307054, "grad_norm": 83.85198211669922, "learning_rate": 1.56597510373444e-05, "loss": 1.6883, "step": 13084 }, { "epoch": 10.858921161825727, "grad_norm": 77.6290512084961, "learning_rate": 1.5659419087136932e-05, "loss": 1.2056, "step": 13085 }, { "epoch": 10.859751037344399, "grad_norm": 42.510501861572266, "learning_rate": 1.5659087136929464e-05, "loss": 0.9573, "step": 13086 }, { "epoch": 10.860580912863071, "grad_norm": 26.141752243041992, "learning_rate": 1.5658755186721993e-05, "loss": 0.8509, "step": 13087 }, { "epoch": 10.861410788381743, "grad_norm": 43.655181884765625, "learning_rate": 1.5658423236514525e-05, "loss": 1.7242, "step": 13088 }, { "epoch": 10.862240663900415, "grad_norm": 28.718643188476562, "learning_rate": 1.5658091286307057e-05, "loss": 1.0983, "step": 13089 }, { "epoch": 10.863070539419088, "grad_norm": 45.733829498291016, "learning_rate": 1.5657759336099586e-05, "loss": 1.6611, "step": 13090 }, { "epoch": 10.86390041493776, "grad_norm": 22.444358825683594, "learning_rate": 1.5657427385892118e-05, "loss": 0.7926, "step": 13091 }, { "epoch": 10.864730290456432, "grad_norm": 24.97829246520996, "learning_rate": 1.5657095435684647e-05, "loss": 1.2085, "step": 13092 }, { "epoch": 10.865560165975104, "grad_norm": 33.12797927856445, "learning_rate": 1.565676348547718e-05, "loss": 0.5645, "step": 13093 }, { "epoch": 10.866390041493776, "grad_norm": 25.703712463378906, "learning_rate": 1.565643153526971e-05, "loss": 0.8046, "step": 13094 }, { "epoch": 10.867219917012449, "grad_norm": 39.892818450927734, "learning_rate": 1.565609958506224e-05, "loss": 1.563, "step": 13095 }, { "epoch": 10.86804979253112, "grad_norm": 36.78218460083008, "learning_rate": 1.565576763485477e-05, "loss": 1.4886, "step": 13096 }, { "epoch": 10.868879668049793, "grad_norm": 29.364944458007812, "learning_rate": 1.5655435684647304e-05, "loss": 0.8517, "step": 13097 }, { "epoch": 10.869709543568465, "grad_norm": 27.39267349243164, "learning_rate": 1.5655103734439836e-05, "loss": 0.9637, "step": 13098 }, { "epoch": 10.870539419087137, "grad_norm": 52.016231536865234, "learning_rate": 1.5654771784232365e-05, "loss": 0.6568, "step": 13099 }, { "epoch": 10.87136929460581, "grad_norm": 31.394214630126953, "learning_rate": 1.5654439834024897e-05, "loss": 0.7322, "step": 13100 }, { "epoch": 10.872199170124482, "grad_norm": 23.41533088684082, "learning_rate": 1.565410788381743e-05, "loss": 0.5452, "step": 13101 }, { "epoch": 10.873029045643154, "grad_norm": 34.586788177490234, "learning_rate": 1.565377593360996e-05, "loss": 1.2006, "step": 13102 }, { "epoch": 10.873858921161826, "grad_norm": 40.885772705078125, "learning_rate": 1.565344398340249e-05, "loss": 1.4724, "step": 13103 }, { "epoch": 10.874688796680498, "grad_norm": 55.60905075073242, "learning_rate": 1.5653112033195022e-05, "loss": 1.0257, "step": 13104 }, { "epoch": 10.87551867219917, "grad_norm": 25.342222213745117, "learning_rate": 1.5652780082987554e-05, "loss": 0.8765, "step": 13105 }, { "epoch": 10.876348547717843, "grad_norm": 25.58391571044922, "learning_rate": 1.5652448132780086e-05, "loss": 0.902, "step": 13106 }, { "epoch": 10.877178423236515, "grad_norm": 52.04306411743164, "learning_rate": 1.5652116182572618e-05, "loss": 1.1885, "step": 13107 }, { "epoch": 10.878008298755187, "grad_norm": 29.69977378845215, "learning_rate": 1.5651784232365147e-05, "loss": 0.7428, "step": 13108 }, { "epoch": 10.87883817427386, "grad_norm": 19.3131103515625, "learning_rate": 1.565145228215768e-05, "loss": 0.5067, "step": 13109 }, { "epoch": 10.879668049792532, "grad_norm": 28.735897064208984, "learning_rate": 1.5651120331950208e-05, "loss": 1.5331, "step": 13110 }, { "epoch": 10.880497925311204, "grad_norm": 26.010820388793945, "learning_rate": 1.565078838174274e-05, "loss": 0.8525, "step": 13111 }, { "epoch": 10.881327800829876, "grad_norm": 29.13235855102539, "learning_rate": 1.5650456431535272e-05, "loss": 0.654, "step": 13112 }, { "epoch": 10.882157676348548, "grad_norm": 72.06893920898438, "learning_rate": 1.56501244813278e-05, "loss": 1.2876, "step": 13113 }, { "epoch": 10.88298755186722, "grad_norm": 40.783897399902344, "learning_rate": 1.5649792531120333e-05, "loss": 0.8191, "step": 13114 }, { "epoch": 10.883817427385893, "grad_norm": 17.753572463989258, "learning_rate": 1.5649460580912865e-05, "loss": 0.6016, "step": 13115 }, { "epoch": 10.884647302904565, "grad_norm": 25.277742385864258, "learning_rate": 1.5649128630705393e-05, "loss": 0.8956, "step": 13116 }, { "epoch": 10.885477178423237, "grad_norm": 21.150440216064453, "learning_rate": 1.5648796680497926e-05, "loss": 0.5879, "step": 13117 }, { "epoch": 10.88630705394191, "grad_norm": 54.551513671875, "learning_rate": 1.5648464730290458e-05, "loss": 0.7582, "step": 13118 }, { "epoch": 10.887136929460581, "grad_norm": 19.99100112915039, "learning_rate": 1.564813278008299e-05, "loss": 0.6273, "step": 13119 }, { "epoch": 10.887966804979254, "grad_norm": 25.169828414916992, "learning_rate": 1.564780082987552e-05, "loss": 1.0182, "step": 13120 }, { "epoch": 10.888796680497926, "grad_norm": 44.3314323425293, "learning_rate": 1.564746887966805e-05, "loss": 0.8619, "step": 13121 }, { "epoch": 10.889626556016598, "grad_norm": 52.315067291259766, "learning_rate": 1.5647136929460583e-05, "loss": 1.8504, "step": 13122 }, { "epoch": 10.89045643153527, "grad_norm": 19.024179458618164, "learning_rate": 1.5646804979253115e-05, "loss": 1.1736, "step": 13123 }, { "epoch": 10.891286307053942, "grad_norm": 24.704803466796875, "learning_rate": 1.5646473029045644e-05, "loss": 0.9968, "step": 13124 }, { "epoch": 10.892116182572614, "grad_norm": 26.766950607299805, "learning_rate": 1.5646141078838176e-05, "loss": 1.2427, "step": 13125 }, { "epoch": 10.892946058091287, "grad_norm": 33.27659606933594, "learning_rate": 1.5645809128630708e-05, "loss": 0.902, "step": 13126 }, { "epoch": 10.893775933609959, "grad_norm": 23.848651885986328, "learning_rate": 1.564547717842324e-05, "loss": 0.6829, "step": 13127 }, { "epoch": 10.894605809128631, "grad_norm": 29.74351692199707, "learning_rate": 1.564514522821577e-05, "loss": 0.903, "step": 13128 }, { "epoch": 10.895435684647303, "grad_norm": 26.823862075805664, "learning_rate": 1.56448132780083e-05, "loss": 1.1436, "step": 13129 }, { "epoch": 10.896265560165975, "grad_norm": 29.062788009643555, "learning_rate": 1.5644481327800833e-05, "loss": 1.4552, "step": 13130 }, { "epoch": 10.897095435684648, "grad_norm": 23.51688003540039, "learning_rate": 1.564414937759336e-05, "loss": 0.7773, "step": 13131 }, { "epoch": 10.89792531120332, "grad_norm": 25.61040496826172, "learning_rate": 1.5643817427385894e-05, "loss": 0.6945, "step": 13132 }, { "epoch": 10.898755186721992, "grad_norm": 21.897886276245117, "learning_rate": 1.5643485477178422e-05, "loss": 0.7739, "step": 13133 }, { "epoch": 10.899585062240664, "grad_norm": 28.930265426635742, "learning_rate": 1.5643153526970954e-05, "loss": 0.8568, "step": 13134 }, { "epoch": 10.900414937759336, "grad_norm": 26.956865310668945, "learning_rate": 1.5642821576763487e-05, "loss": 0.9442, "step": 13135 }, { "epoch": 10.901244813278009, "grad_norm": 22.24260902404785, "learning_rate": 1.564248962655602e-05, "loss": 0.4195, "step": 13136 }, { "epoch": 10.90207468879668, "grad_norm": 88.13616180419922, "learning_rate": 1.5642157676348547e-05, "loss": 0.7006, "step": 13137 }, { "epoch": 10.902904564315353, "grad_norm": 26.087017059326172, "learning_rate": 1.564182572614108e-05, "loss": 0.867, "step": 13138 }, { "epoch": 10.903734439834025, "grad_norm": 24.270732879638672, "learning_rate": 1.564149377593361e-05, "loss": 1.0638, "step": 13139 }, { "epoch": 10.904564315352697, "grad_norm": 39.961524963378906, "learning_rate": 1.5641161825726144e-05, "loss": 1.235, "step": 13140 }, { "epoch": 10.90539419087137, "grad_norm": 34.8082160949707, "learning_rate": 1.5640829875518672e-05, "loss": 1.1986, "step": 13141 }, { "epoch": 10.906224066390042, "grad_norm": 24.464153289794922, "learning_rate": 1.5640497925311205e-05, "loss": 0.9676, "step": 13142 }, { "epoch": 10.907053941908714, "grad_norm": 24.234172821044922, "learning_rate": 1.5640165975103737e-05, "loss": 1.1993, "step": 13143 }, { "epoch": 10.907883817427386, "grad_norm": 36.347408294677734, "learning_rate": 1.563983402489627e-05, "loss": 1.5446, "step": 13144 }, { "epoch": 10.908713692946058, "grad_norm": 30.685775756835938, "learning_rate": 1.5639502074688797e-05, "loss": 1.4082, "step": 13145 }, { "epoch": 10.90954356846473, "grad_norm": 14.486976623535156, "learning_rate": 1.563917012448133e-05, "loss": 0.4817, "step": 13146 }, { "epoch": 10.910373443983403, "grad_norm": 22.361724853515625, "learning_rate": 1.5638838174273862e-05, "loss": 0.6045, "step": 13147 }, { "epoch": 10.911203319502075, "grad_norm": 23.297571182250977, "learning_rate": 1.563850622406639e-05, "loss": 1.0791, "step": 13148 }, { "epoch": 10.912033195020747, "grad_norm": 18.403223037719727, "learning_rate": 1.5638174273858923e-05, "loss": 0.3316, "step": 13149 }, { "epoch": 10.91286307053942, "grad_norm": 24.788814544677734, "learning_rate": 1.5637842323651455e-05, "loss": 1.3531, "step": 13150 }, { "epoch": 10.913692946058092, "grad_norm": 84.10678100585938, "learning_rate": 1.5637510373443983e-05, "loss": 0.8862, "step": 13151 }, { "epoch": 10.914522821576764, "grad_norm": 15.508625030517578, "learning_rate": 1.5637178423236515e-05, "loss": 0.9039, "step": 13152 }, { "epoch": 10.915352697095436, "grad_norm": 33.21190643310547, "learning_rate": 1.5636846473029048e-05, "loss": 0.8195, "step": 13153 }, { "epoch": 10.916182572614108, "grad_norm": 24.9794979095459, "learning_rate": 1.5636514522821576e-05, "loss": 0.6665, "step": 13154 }, { "epoch": 10.91701244813278, "grad_norm": 32.059730529785156, "learning_rate": 1.563618257261411e-05, "loss": 0.8009, "step": 13155 }, { "epoch": 10.917842323651453, "grad_norm": 30.28691864013672, "learning_rate": 1.563585062240664e-05, "loss": 1.1286, "step": 13156 }, { "epoch": 10.918672199170125, "grad_norm": 32.56866455078125, "learning_rate": 1.563551867219917e-05, "loss": 1.816, "step": 13157 }, { "epoch": 10.919502074688797, "grad_norm": 21.04045295715332, "learning_rate": 1.56351867219917e-05, "loss": 0.8124, "step": 13158 }, { "epoch": 10.92033195020747, "grad_norm": 26.1612491607666, "learning_rate": 1.5634854771784233e-05, "loss": 0.5914, "step": 13159 }, { "epoch": 10.921161825726141, "grad_norm": 29.149045944213867, "learning_rate": 1.5634522821576766e-05, "loss": 0.6937, "step": 13160 }, { "epoch": 10.921991701244814, "grad_norm": 36.19776153564453, "learning_rate": 1.5634190871369298e-05, "loss": 1.421, "step": 13161 }, { "epoch": 10.922821576763486, "grad_norm": 53.054080963134766, "learning_rate": 1.5633858921161826e-05, "loss": 1.1393, "step": 13162 }, { "epoch": 10.923651452282158, "grad_norm": 16.146814346313477, "learning_rate": 1.563352697095436e-05, "loss": 0.5709, "step": 13163 }, { "epoch": 10.92448132780083, "grad_norm": 44.34049606323242, "learning_rate": 1.563319502074689e-05, "loss": 0.7802, "step": 13164 }, { "epoch": 10.925311203319502, "grad_norm": 33.86005401611328, "learning_rate": 1.5632863070539423e-05, "loss": 1.0298, "step": 13165 }, { "epoch": 10.926141078838175, "grad_norm": 19.69382667541504, "learning_rate": 1.563253112033195e-05, "loss": 0.7889, "step": 13166 }, { "epoch": 10.926970954356847, "grad_norm": 29.484384536743164, "learning_rate": 1.5632199170124484e-05, "loss": 0.7526, "step": 13167 }, { "epoch": 10.927800829875519, "grad_norm": 20.521926879882812, "learning_rate": 1.5631867219917016e-05, "loss": 0.6314, "step": 13168 }, { "epoch": 10.928630705394191, "grad_norm": 24.257057189941406, "learning_rate": 1.5631535269709544e-05, "loss": 0.801, "step": 13169 }, { "epoch": 10.929460580912863, "grad_norm": 31.374765396118164, "learning_rate": 1.5631203319502076e-05, "loss": 0.6032, "step": 13170 }, { "epoch": 10.930290456431536, "grad_norm": 19.351091384887695, "learning_rate": 1.5630871369294605e-05, "loss": 0.9346, "step": 13171 }, { "epoch": 10.931120331950208, "grad_norm": 30.289199829101562, "learning_rate": 1.5630539419087137e-05, "loss": 0.8109, "step": 13172 }, { "epoch": 10.93195020746888, "grad_norm": 65.3702163696289, "learning_rate": 1.563020746887967e-05, "loss": 0.7754, "step": 13173 }, { "epoch": 10.932780082987552, "grad_norm": 27.419221878051758, "learning_rate": 1.5629875518672198e-05, "loss": 1.4188, "step": 13174 }, { "epoch": 10.933609958506224, "grad_norm": 18.287492752075195, "learning_rate": 1.562954356846473e-05, "loss": 0.6656, "step": 13175 }, { "epoch": 10.934439834024896, "grad_norm": 57.11397933959961, "learning_rate": 1.5629211618257262e-05, "loss": 0.7286, "step": 13176 }, { "epoch": 10.935269709543569, "grad_norm": 25.86751937866211, "learning_rate": 1.5628879668049794e-05, "loss": 1.1761, "step": 13177 }, { "epoch": 10.936099585062241, "grad_norm": 25.68511962890625, "learning_rate": 1.5628547717842323e-05, "loss": 1.1875, "step": 13178 }, { "epoch": 10.936929460580913, "grad_norm": 24.818912506103516, "learning_rate": 1.5628215767634855e-05, "loss": 0.5937, "step": 13179 }, { "epoch": 10.937759336099585, "grad_norm": 40.54454040527344, "learning_rate": 1.5627883817427387e-05, "loss": 1.1709, "step": 13180 }, { "epoch": 10.938589211618257, "grad_norm": 25.107547760009766, "learning_rate": 1.562755186721992e-05, "loss": 0.9313, "step": 13181 }, { "epoch": 10.93941908713693, "grad_norm": 28.524084091186523, "learning_rate": 1.5627219917012448e-05, "loss": 0.9721, "step": 13182 }, { "epoch": 10.940248962655602, "grad_norm": 39.91887664794922, "learning_rate": 1.562688796680498e-05, "loss": 1.2807, "step": 13183 }, { "epoch": 10.941078838174274, "grad_norm": 45.57924270629883, "learning_rate": 1.5626556016597512e-05, "loss": 0.9406, "step": 13184 }, { "epoch": 10.941908713692946, "grad_norm": 35.882911682128906, "learning_rate": 1.5626224066390045e-05, "loss": 0.943, "step": 13185 }, { "epoch": 10.942738589211618, "grad_norm": 27.028356552124023, "learning_rate": 1.5625892116182577e-05, "loss": 0.9497, "step": 13186 }, { "epoch": 10.94356846473029, "grad_norm": 34.26891326904297, "learning_rate": 1.5625560165975105e-05, "loss": 1.0493, "step": 13187 }, { "epoch": 10.944398340248963, "grad_norm": 31.711902618408203, "learning_rate": 1.5625228215767637e-05, "loss": 1.0531, "step": 13188 }, { "epoch": 10.945228215767635, "grad_norm": 32.26546859741211, "learning_rate": 1.5624896265560166e-05, "loss": 0.8572, "step": 13189 }, { "epoch": 10.946058091286307, "grad_norm": 23.129898071289062, "learning_rate": 1.5624564315352698e-05, "loss": 1.1578, "step": 13190 }, { "epoch": 10.94688796680498, "grad_norm": 32.57297897338867, "learning_rate": 1.562423236514523e-05, "loss": 1.3326, "step": 13191 }, { "epoch": 10.947717842323652, "grad_norm": 26.688148498535156, "learning_rate": 1.562390041493776e-05, "loss": 0.8114, "step": 13192 }, { "epoch": 10.948547717842324, "grad_norm": 21.925556182861328, "learning_rate": 1.562356846473029e-05, "loss": 1.0723, "step": 13193 }, { "epoch": 10.949377593360996, "grad_norm": 30.513330459594727, "learning_rate": 1.5623236514522823e-05, "loss": 0.9081, "step": 13194 }, { "epoch": 10.950207468879668, "grad_norm": 25.84771728515625, "learning_rate": 1.5622904564315352e-05, "loss": 0.8845, "step": 13195 }, { "epoch": 10.95103734439834, "grad_norm": 14.82620906829834, "learning_rate": 1.5622572614107884e-05, "loss": 0.9673, "step": 13196 }, { "epoch": 10.951867219917013, "grad_norm": 15.52076244354248, "learning_rate": 1.5622240663900416e-05, "loss": 0.5604, "step": 13197 }, { "epoch": 10.952697095435685, "grad_norm": 21.347084045410156, "learning_rate": 1.562190871369295e-05, "loss": 1.0607, "step": 13198 }, { "epoch": 10.953526970954357, "grad_norm": 41.35366439819336, "learning_rate": 1.5621576763485477e-05, "loss": 0.9125, "step": 13199 }, { "epoch": 10.95435684647303, "grad_norm": 27.69632911682129, "learning_rate": 1.562124481327801e-05, "loss": 0.6308, "step": 13200 }, { "epoch": 10.955186721991701, "grad_norm": 27.863815307617188, "learning_rate": 1.562091286307054e-05, "loss": 0.9562, "step": 13201 }, { "epoch": 10.956016597510374, "grad_norm": 19.864822387695312, "learning_rate": 1.5620580912863073e-05, "loss": 1.3927, "step": 13202 }, { "epoch": 10.956846473029046, "grad_norm": 22.32975959777832, "learning_rate": 1.5620248962655602e-05, "loss": 0.7855, "step": 13203 }, { "epoch": 10.957676348547718, "grad_norm": 26.544343948364258, "learning_rate": 1.5619917012448134e-05, "loss": 0.7718, "step": 13204 }, { "epoch": 10.95850622406639, "grad_norm": 29.309669494628906, "learning_rate": 1.5619585062240666e-05, "loss": 1.2628, "step": 13205 }, { "epoch": 10.959336099585062, "grad_norm": 37.84300994873047, "learning_rate": 1.56192531120332e-05, "loss": 0.966, "step": 13206 }, { "epoch": 10.960165975103735, "grad_norm": 26.655351638793945, "learning_rate": 1.5618921161825727e-05, "loss": 0.5937, "step": 13207 }, { "epoch": 10.960995850622407, "grad_norm": 17.02336883544922, "learning_rate": 1.561858921161826e-05, "loss": 0.5718, "step": 13208 }, { "epoch": 10.961825726141079, "grad_norm": 20.873510360717773, "learning_rate": 1.5618257261410788e-05, "loss": 1.0106, "step": 13209 }, { "epoch": 10.962655601659751, "grad_norm": 18.44037437438965, "learning_rate": 1.561792531120332e-05, "loss": 0.719, "step": 13210 }, { "epoch": 10.963485477178423, "grad_norm": 41.070682525634766, "learning_rate": 1.5617593360995852e-05, "loss": 0.911, "step": 13211 }, { "epoch": 10.964315352697096, "grad_norm": 20.63454246520996, "learning_rate": 1.561726141078838e-05, "loss": 0.8388, "step": 13212 }, { "epoch": 10.965145228215768, "grad_norm": 36.10408401489258, "learning_rate": 1.5616929460580913e-05, "loss": 1.4591, "step": 13213 }, { "epoch": 10.96597510373444, "grad_norm": 24.774642944335938, "learning_rate": 1.5616597510373445e-05, "loss": 0.7928, "step": 13214 }, { "epoch": 10.966804979253112, "grad_norm": 35.403900146484375, "learning_rate": 1.5616265560165977e-05, "loss": 0.8935, "step": 13215 }, { "epoch": 10.967634854771784, "grad_norm": 23.911169052124023, "learning_rate": 1.5615933609958506e-05, "loss": 0.4482, "step": 13216 }, { "epoch": 10.968464730290457, "grad_norm": 51.04118728637695, "learning_rate": 1.5615601659751038e-05, "loss": 0.802, "step": 13217 }, { "epoch": 10.969294605809129, "grad_norm": 32.33427047729492, "learning_rate": 1.561526970954357e-05, "loss": 1.272, "step": 13218 }, { "epoch": 10.970124481327801, "grad_norm": 53.96444320678711, "learning_rate": 1.5614937759336102e-05, "loss": 0.8452, "step": 13219 }, { "epoch": 10.970954356846473, "grad_norm": 33.757686614990234, "learning_rate": 1.561460580912863e-05, "loss": 0.9439, "step": 13220 }, { "epoch": 10.971784232365145, "grad_norm": 36.36822509765625, "learning_rate": 1.5614273858921163e-05, "loss": 1.1221, "step": 13221 }, { "epoch": 10.972614107883818, "grad_norm": 41.93727111816406, "learning_rate": 1.5613941908713695e-05, "loss": 1.3096, "step": 13222 }, { "epoch": 10.97344398340249, "grad_norm": 50.52135467529297, "learning_rate": 1.5613609958506227e-05, "loss": 2.2249, "step": 13223 }, { "epoch": 10.974273858921162, "grad_norm": 39.40471649169922, "learning_rate": 1.5613278008298756e-05, "loss": 1.755, "step": 13224 }, { "epoch": 10.975103734439834, "grad_norm": 62.53281021118164, "learning_rate": 1.5612946058091288e-05, "loss": 1.3753, "step": 13225 }, { "epoch": 10.975933609958506, "grad_norm": 74.74532318115234, "learning_rate": 1.561261410788382e-05, "loss": 2.1603, "step": 13226 }, { "epoch": 10.976763485477179, "grad_norm": 23.915637969970703, "learning_rate": 1.561228215767635e-05, "loss": 0.685, "step": 13227 }, { "epoch": 10.97759336099585, "grad_norm": 38.822750091552734, "learning_rate": 1.561195020746888e-05, "loss": 1.2548, "step": 13228 }, { "epoch": 10.978423236514523, "grad_norm": 51.738372802734375, "learning_rate": 1.5611618257261413e-05, "loss": 0.9117, "step": 13229 }, { "epoch": 10.979253112033195, "grad_norm": 58.72908401489258, "learning_rate": 1.5611286307053942e-05, "loss": 1.2091, "step": 13230 }, { "epoch": 10.980082987551867, "grad_norm": 30.5390567779541, "learning_rate": 1.5610954356846474e-05, "loss": 0.7607, "step": 13231 }, { "epoch": 10.98091286307054, "grad_norm": 19.428424835205078, "learning_rate": 1.5610622406639003e-05, "loss": 1.3489, "step": 13232 }, { "epoch": 10.981742738589212, "grad_norm": 31.786048889160156, "learning_rate": 1.5610290456431535e-05, "loss": 0.5468, "step": 13233 }, { "epoch": 10.982572614107884, "grad_norm": 37.791969299316406, "learning_rate": 1.5609958506224067e-05, "loss": 1.4888, "step": 13234 }, { "epoch": 10.983402489626556, "grad_norm": 24.28880500793457, "learning_rate": 1.56096265560166e-05, "loss": 0.6682, "step": 13235 }, { "epoch": 10.984232365145228, "grad_norm": 20.71735191345215, "learning_rate": 1.5609294605809128e-05, "loss": 0.6772, "step": 13236 }, { "epoch": 10.9850622406639, "grad_norm": 33.862274169921875, "learning_rate": 1.560896265560166e-05, "loss": 1.1594, "step": 13237 }, { "epoch": 10.985892116182573, "grad_norm": 45.667423248291016, "learning_rate": 1.5608630705394192e-05, "loss": 1.2209, "step": 13238 }, { "epoch": 10.986721991701245, "grad_norm": 29.18592643737793, "learning_rate": 1.5608298755186724e-05, "loss": 0.9165, "step": 13239 }, { "epoch": 10.987551867219917, "grad_norm": 33.94458770751953, "learning_rate": 1.5607966804979256e-05, "loss": 1.5437, "step": 13240 }, { "epoch": 10.98838174273859, "grad_norm": 31.035194396972656, "learning_rate": 1.5607634854771785e-05, "loss": 1.3258, "step": 13241 }, { "epoch": 10.989211618257261, "grad_norm": 30.791440963745117, "learning_rate": 1.5607302904564317e-05, "loss": 1.2418, "step": 13242 }, { "epoch": 10.990041493775934, "grad_norm": 31.480661392211914, "learning_rate": 1.560697095435685e-05, "loss": 1.1779, "step": 13243 }, { "epoch": 10.990871369294606, "grad_norm": 33.74220657348633, "learning_rate": 1.560663900414938e-05, "loss": 0.9225, "step": 13244 }, { "epoch": 10.991701244813278, "grad_norm": 28.06056022644043, "learning_rate": 1.560630705394191e-05, "loss": 0.8731, "step": 13245 }, { "epoch": 10.99253112033195, "grad_norm": 41.256771087646484, "learning_rate": 1.5605975103734442e-05, "loss": 1.4076, "step": 13246 }, { "epoch": 10.993360995850622, "grad_norm": 23.68463897705078, "learning_rate": 1.5605643153526974e-05, "loss": 0.8683, "step": 13247 }, { "epoch": 10.994190871369295, "grad_norm": 24.7220458984375, "learning_rate": 1.5605311203319503e-05, "loss": 1.0434, "step": 13248 }, { "epoch": 10.995020746887967, "grad_norm": 13.724303245544434, "learning_rate": 1.5604979253112035e-05, "loss": 0.5203, "step": 13249 }, { "epoch": 10.995850622406639, "grad_norm": 24.375591278076172, "learning_rate": 1.5604647302904564e-05, "loss": 0.7126, "step": 13250 }, { "epoch": 10.996680497925311, "grad_norm": 25.45621109008789, "learning_rate": 1.5604315352697096e-05, "loss": 1.2226, "step": 13251 }, { "epoch": 10.997510373443983, "grad_norm": 23.14603614807129, "learning_rate": 1.5603983402489628e-05, "loss": 0.607, "step": 13252 }, { "epoch": 10.998340248962656, "grad_norm": 22.83147430419922, "learning_rate": 1.5603651452282157e-05, "loss": 0.6375, "step": 13253 }, { "epoch": 10.999170124481328, "grad_norm": 64.57720947265625, "learning_rate": 1.560331950207469e-05, "loss": 0.9327, "step": 13254 }, { "epoch": 11.0, "grad_norm": 43.13486862182617, "learning_rate": 1.560298755186722e-05, "loss": 0.7623, "step": 13255 }, { "epoch": 11.000829875518672, "grad_norm": 31.726484298706055, "learning_rate": 1.5602655601659753e-05, "loss": 1.0053, "step": 13256 }, { "epoch": 11.001659751037344, "grad_norm": 27.315942764282227, "learning_rate": 1.5602323651452282e-05, "loss": 1.0465, "step": 13257 }, { "epoch": 11.002489626556017, "grad_norm": 34.453739166259766, "learning_rate": 1.5601991701244814e-05, "loss": 1.5644, "step": 13258 }, { "epoch": 11.003319502074689, "grad_norm": 24.68008041381836, "learning_rate": 1.5601659751037346e-05, "loss": 0.5933, "step": 13259 }, { "epoch": 11.004149377593361, "grad_norm": 38.11684799194336, "learning_rate": 1.5601327800829878e-05, "loss": 1.0167, "step": 13260 }, { "epoch": 11.004979253112033, "grad_norm": 29.238069534301758, "learning_rate": 1.5600995850622407e-05, "loss": 0.8172, "step": 13261 }, { "epoch": 11.005809128630705, "grad_norm": 42.9595947265625, "learning_rate": 1.560066390041494e-05, "loss": 0.9839, "step": 13262 }, { "epoch": 11.006639004149378, "grad_norm": 16.40656089782715, "learning_rate": 1.560033195020747e-05, "loss": 0.5195, "step": 13263 }, { "epoch": 11.00746887966805, "grad_norm": 31.46295928955078, "learning_rate": 1.5600000000000003e-05, "loss": 1.0952, "step": 13264 }, { "epoch": 11.008298755186722, "grad_norm": 17.71945571899414, "learning_rate": 1.5599668049792532e-05, "loss": 1.0664, "step": 13265 }, { "epoch": 11.009128630705394, "grad_norm": 100.85710144042969, "learning_rate": 1.5599336099585064e-05, "loss": 0.7677, "step": 13266 }, { "epoch": 11.009958506224066, "grad_norm": 48.81715393066406, "learning_rate": 1.5599004149377596e-05, "loss": 1.4846, "step": 13267 }, { "epoch": 11.010788381742739, "grad_norm": 23.649505615234375, "learning_rate": 1.5598672199170125e-05, "loss": 0.6628, "step": 13268 }, { "epoch": 11.01161825726141, "grad_norm": 22.60551643371582, "learning_rate": 1.5598340248962657e-05, "loss": 0.655, "step": 13269 }, { "epoch": 11.012448132780083, "grad_norm": 39.02106475830078, "learning_rate": 1.559800829875519e-05, "loss": 0.7131, "step": 13270 }, { "epoch": 11.013278008298755, "grad_norm": 33.198516845703125, "learning_rate": 1.5597676348547718e-05, "loss": 1.0475, "step": 13271 }, { "epoch": 11.014107883817427, "grad_norm": 42.984405517578125, "learning_rate": 1.559734439834025e-05, "loss": 1.4095, "step": 13272 }, { "epoch": 11.0149377593361, "grad_norm": 24.329984664916992, "learning_rate": 1.5597012448132782e-05, "loss": 0.9235, "step": 13273 }, { "epoch": 11.015767634854772, "grad_norm": 25.915130615234375, "learning_rate": 1.559668049792531e-05, "loss": 0.7346, "step": 13274 }, { "epoch": 11.016597510373444, "grad_norm": 25.684934616088867, "learning_rate": 1.5596348547717843e-05, "loss": 0.902, "step": 13275 }, { "epoch": 11.017427385892116, "grad_norm": 25.62208366394043, "learning_rate": 1.5596016597510375e-05, "loss": 0.8867, "step": 13276 }, { "epoch": 11.018257261410788, "grad_norm": 32.848472595214844, "learning_rate": 1.5595684647302907e-05, "loss": 0.8369, "step": 13277 }, { "epoch": 11.01908713692946, "grad_norm": 67.68759155273438, "learning_rate": 1.5595352697095436e-05, "loss": 0.8653, "step": 13278 }, { "epoch": 11.019917012448133, "grad_norm": 36.3262939453125, "learning_rate": 1.5595020746887968e-05, "loss": 1.1503, "step": 13279 }, { "epoch": 11.020746887966805, "grad_norm": 30.492834091186523, "learning_rate": 1.55946887966805e-05, "loss": 0.8707, "step": 13280 }, { "epoch": 11.021576763485477, "grad_norm": 22.148210525512695, "learning_rate": 1.5594356846473032e-05, "loss": 0.7664, "step": 13281 }, { "epoch": 11.02240663900415, "grad_norm": 42.99190139770508, "learning_rate": 1.559402489626556e-05, "loss": 1.035, "step": 13282 }, { "epoch": 11.023236514522821, "grad_norm": 42.23788070678711, "learning_rate": 1.5593692946058093e-05, "loss": 0.8455, "step": 13283 }, { "epoch": 11.024066390041494, "grad_norm": 47.19395065307617, "learning_rate": 1.5593360995850625e-05, "loss": 0.9726, "step": 13284 }, { "epoch": 11.024896265560166, "grad_norm": 25.561962127685547, "learning_rate": 1.5593029045643157e-05, "loss": 1.0936, "step": 13285 }, { "epoch": 11.025726141078838, "grad_norm": 31.10030746459961, "learning_rate": 1.5592697095435686e-05, "loss": 0.5116, "step": 13286 }, { "epoch": 11.02655601659751, "grad_norm": 22.055482864379883, "learning_rate": 1.5592365145228218e-05, "loss": 0.6832, "step": 13287 }, { "epoch": 11.027385892116182, "grad_norm": 24.961042404174805, "learning_rate": 1.5592033195020747e-05, "loss": 0.4845, "step": 13288 }, { "epoch": 11.028215767634855, "grad_norm": 30.59581756591797, "learning_rate": 1.559170124481328e-05, "loss": 0.8021, "step": 13289 }, { "epoch": 11.029045643153527, "grad_norm": 25.893476486206055, "learning_rate": 1.559136929460581e-05, "loss": 0.7563, "step": 13290 }, { "epoch": 11.029875518672199, "grad_norm": 34.39362335205078, "learning_rate": 1.559103734439834e-05, "loss": 1.4628, "step": 13291 }, { "epoch": 11.030705394190871, "grad_norm": 46.86252975463867, "learning_rate": 1.559070539419087e-05, "loss": 0.7861, "step": 13292 }, { "epoch": 11.031535269709543, "grad_norm": 38.854339599609375, "learning_rate": 1.5590373443983404e-05, "loss": 1.2467, "step": 13293 }, { "epoch": 11.032365145228216, "grad_norm": 47.14409255981445, "learning_rate": 1.5590041493775936e-05, "loss": 1.7504, "step": 13294 }, { "epoch": 11.033195020746888, "grad_norm": 117.32127380371094, "learning_rate": 1.5589709543568465e-05, "loss": 0.7665, "step": 13295 }, { "epoch": 11.03402489626556, "grad_norm": 39.097530364990234, "learning_rate": 1.5589377593360997e-05, "loss": 0.6421, "step": 13296 }, { "epoch": 11.034854771784232, "grad_norm": 20.953107833862305, "learning_rate": 1.558904564315353e-05, "loss": 0.5771, "step": 13297 }, { "epoch": 11.035684647302904, "grad_norm": 21.775564193725586, "learning_rate": 1.558871369294606e-05, "loss": 0.8799, "step": 13298 }, { "epoch": 11.036514522821577, "grad_norm": 52.57421112060547, "learning_rate": 1.558838174273859e-05, "loss": 0.9152, "step": 13299 }, { "epoch": 11.037344398340249, "grad_norm": 21.738101959228516, "learning_rate": 1.5588049792531122e-05, "loss": 0.739, "step": 13300 }, { "epoch": 11.038174273858921, "grad_norm": 40.162818908691406, "learning_rate": 1.5587717842323654e-05, "loss": 1.2277, "step": 13301 }, { "epoch": 11.039004149377593, "grad_norm": 15.251832962036133, "learning_rate": 1.5587385892116186e-05, "loss": 0.3502, "step": 13302 }, { "epoch": 11.039834024896265, "grad_norm": 20.786239624023438, "learning_rate": 1.5587053941908715e-05, "loss": 0.9097, "step": 13303 }, { "epoch": 11.040663900414938, "grad_norm": 24.517276763916016, "learning_rate": 1.5586721991701247e-05, "loss": 1.2902, "step": 13304 }, { "epoch": 11.04149377593361, "grad_norm": 43.535499572753906, "learning_rate": 1.558639004149378e-05, "loss": 1.0968, "step": 13305 }, { "epoch": 11.042323651452282, "grad_norm": 71.13919830322266, "learning_rate": 1.5586058091286308e-05, "loss": 0.8671, "step": 13306 }, { "epoch": 11.043153526970954, "grad_norm": 54.00944137573242, "learning_rate": 1.558572614107884e-05, "loss": 1.4556, "step": 13307 }, { "epoch": 11.043983402489626, "grad_norm": 27.58109474182129, "learning_rate": 1.5585394190871372e-05, "loss": 0.7196, "step": 13308 }, { "epoch": 11.044813278008299, "grad_norm": 24.497230529785156, "learning_rate": 1.55850622406639e-05, "loss": 1.0314, "step": 13309 }, { "epoch": 11.04564315352697, "grad_norm": 24.24781036376953, "learning_rate": 1.5584730290456433e-05, "loss": 0.7492, "step": 13310 }, { "epoch": 11.046473029045643, "grad_norm": 40.75026321411133, "learning_rate": 1.558439834024896e-05, "loss": 1.3146, "step": 13311 }, { "epoch": 11.047302904564315, "grad_norm": 24.776512145996094, "learning_rate": 1.5584066390041493e-05, "loss": 0.8758, "step": 13312 }, { "epoch": 11.048132780082987, "grad_norm": 19.657697677612305, "learning_rate": 1.5583734439834026e-05, "loss": 0.9448, "step": 13313 }, { "epoch": 11.04896265560166, "grad_norm": 16.735557556152344, "learning_rate": 1.5583402489626558e-05, "loss": 0.9555, "step": 13314 }, { "epoch": 11.049792531120332, "grad_norm": 33.75859069824219, "learning_rate": 1.5583070539419086e-05, "loss": 1.2001, "step": 13315 }, { "epoch": 11.050622406639004, "grad_norm": 34.76654052734375, "learning_rate": 1.558273858921162e-05, "loss": 2.0659, "step": 13316 }, { "epoch": 11.051452282157676, "grad_norm": 27.551509857177734, "learning_rate": 1.558240663900415e-05, "loss": 1.1476, "step": 13317 }, { "epoch": 11.052282157676348, "grad_norm": 76.47699737548828, "learning_rate": 1.5582074688796683e-05, "loss": 1.5935, "step": 13318 }, { "epoch": 11.05311203319502, "grad_norm": 36.56039810180664, "learning_rate": 1.5581742738589215e-05, "loss": 1.1742, "step": 13319 }, { "epoch": 11.053941908713693, "grad_norm": 31.95877456665039, "learning_rate": 1.5581410788381744e-05, "loss": 1.6208, "step": 13320 }, { "epoch": 11.054771784232365, "grad_norm": 20.900062561035156, "learning_rate": 1.5581078838174276e-05, "loss": 0.6782, "step": 13321 }, { "epoch": 11.055601659751037, "grad_norm": 49.11664581298828, "learning_rate": 1.5580746887966808e-05, "loss": 0.8628, "step": 13322 }, { "epoch": 11.05643153526971, "grad_norm": 22.64731216430664, "learning_rate": 1.558041493775934e-05, "loss": 0.9802, "step": 13323 }, { "epoch": 11.057261410788382, "grad_norm": 42.598594665527344, "learning_rate": 1.558008298755187e-05, "loss": 1.3589, "step": 13324 }, { "epoch": 11.058091286307054, "grad_norm": 44.82014846801758, "learning_rate": 1.55797510373444e-05, "loss": 0.6934, "step": 13325 }, { "epoch": 11.058921161825726, "grad_norm": 15.52579116821289, "learning_rate": 1.557941908713693e-05, "loss": 0.4435, "step": 13326 }, { "epoch": 11.059751037344398, "grad_norm": 91.1451416015625, "learning_rate": 1.557908713692946e-05, "loss": 1.1555, "step": 13327 }, { "epoch": 11.06058091286307, "grad_norm": 34.06922912597656, "learning_rate": 1.5578755186721994e-05, "loss": 1.3355, "step": 13328 }, { "epoch": 11.061410788381743, "grad_norm": 22.603515625, "learning_rate": 1.5578423236514522e-05, "loss": 0.6748, "step": 13329 }, { "epoch": 11.062240663900415, "grad_norm": 17.88463020324707, "learning_rate": 1.5578091286307054e-05, "loss": 0.4612, "step": 13330 }, { "epoch": 11.063070539419087, "grad_norm": 69.02342987060547, "learning_rate": 1.5577759336099587e-05, "loss": 0.5793, "step": 13331 }, { "epoch": 11.063900414937759, "grad_norm": 44.407875061035156, "learning_rate": 1.5577427385892115e-05, "loss": 0.7253, "step": 13332 }, { "epoch": 11.064730290456431, "grad_norm": 22.676389694213867, "learning_rate": 1.5577095435684647e-05, "loss": 1.202, "step": 13333 }, { "epoch": 11.065560165975104, "grad_norm": 54.731353759765625, "learning_rate": 1.557676348547718e-05, "loss": 0.621, "step": 13334 }, { "epoch": 11.066390041493776, "grad_norm": 38.313297271728516, "learning_rate": 1.557643153526971e-05, "loss": 1.0111, "step": 13335 }, { "epoch": 11.067219917012448, "grad_norm": 51.2476806640625, "learning_rate": 1.557609958506224e-05, "loss": 0.9078, "step": 13336 }, { "epoch": 11.06804979253112, "grad_norm": 26.341510772705078, "learning_rate": 1.5575767634854772e-05, "loss": 0.5916, "step": 13337 }, { "epoch": 11.068879668049792, "grad_norm": 49.08055114746094, "learning_rate": 1.5575435684647305e-05, "loss": 1.0665, "step": 13338 }, { "epoch": 11.069709543568464, "grad_norm": 36.87318420410156, "learning_rate": 1.5575103734439837e-05, "loss": 1.3083, "step": 13339 }, { "epoch": 11.070539419087137, "grad_norm": 32.858699798583984, "learning_rate": 1.5574771784232365e-05, "loss": 0.8186, "step": 13340 }, { "epoch": 11.071369294605809, "grad_norm": 32.159095764160156, "learning_rate": 1.5574439834024897e-05, "loss": 0.8628, "step": 13341 }, { "epoch": 11.072199170124481, "grad_norm": 21.537185668945312, "learning_rate": 1.557410788381743e-05, "loss": 0.5693, "step": 13342 }, { "epoch": 11.073029045643153, "grad_norm": 45.76437759399414, "learning_rate": 1.557377593360996e-05, "loss": 1.2132, "step": 13343 }, { "epoch": 11.073858921161825, "grad_norm": 17.822385787963867, "learning_rate": 1.557344398340249e-05, "loss": 0.6084, "step": 13344 }, { "epoch": 11.074688796680498, "grad_norm": 51.65360641479492, "learning_rate": 1.5573112033195023e-05, "loss": 1.0786, "step": 13345 }, { "epoch": 11.07551867219917, "grad_norm": 12.617968559265137, "learning_rate": 1.5572780082987555e-05, "loss": 0.3494, "step": 13346 }, { "epoch": 11.076348547717842, "grad_norm": 31.925743103027344, "learning_rate": 1.5572448132780083e-05, "loss": 1.0293, "step": 13347 }, { "epoch": 11.077178423236514, "grad_norm": 36.380794525146484, "learning_rate": 1.5572116182572615e-05, "loss": 1.0754, "step": 13348 }, { "epoch": 11.078008298755186, "grad_norm": 39.96987533569336, "learning_rate": 1.5571784232365144e-05, "loss": 0.9451, "step": 13349 }, { "epoch": 11.078838174273859, "grad_norm": 36.03715515136719, "learning_rate": 1.5571452282157676e-05, "loss": 0.8918, "step": 13350 }, { "epoch": 11.07966804979253, "grad_norm": 21.092660903930664, "learning_rate": 1.557112033195021e-05, "loss": 1.0023, "step": 13351 }, { "epoch": 11.080497925311203, "grad_norm": 18.495237350463867, "learning_rate": 1.557078838174274e-05, "loss": 0.8537, "step": 13352 }, { "epoch": 11.081327800829875, "grad_norm": 49.05571365356445, "learning_rate": 1.557045643153527e-05, "loss": 0.5588, "step": 13353 }, { "epoch": 11.082157676348547, "grad_norm": 38.21946334838867, "learning_rate": 1.55701244813278e-05, "loss": 1.1337, "step": 13354 }, { "epoch": 11.08298755186722, "grad_norm": 30.80261993408203, "learning_rate": 1.5569792531120333e-05, "loss": 1.3012, "step": 13355 }, { "epoch": 11.083817427385892, "grad_norm": 21.01976203918457, "learning_rate": 1.5569460580912866e-05, "loss": 0.4965, "step": 13356 }, { "epoch": 11.084647302904564, "grad_norm": 56.580997467041016, "learning_rate": 1.5569128630705394e-05, "loss": 1.0363, "step": 13357 }, { "epoch": 11.085477178423236, "grad_norm": 25.809600830078125, "learning_rate": 1.5568796680497926e-05, "loss": 0.4015, "step": 13358 }, { "epoch": 11.086307053941908, "grad_norm": 20.32781410217285, "learning_rate": 1.556846473029046e-05, "loss": 0.6655, "step": 13359 }, { "epoch": 11.08713692946058, "grad_norm": 32.006229400634766, "learning_rate": 1.556813278008299e-05, "loss": 0.4449, "step": 13360 }, { "epoch": 11.087966804979253, "grad_norm": 31.02689552307129, "learning_rate": 1.556780082987552e-05, "loss": 1.1979, "step": 13361 }, { "epoch": 11.088796680497925, "grad_norm": 30.201414108276367, "learning_rate": 1.556746887966805e-05, "loss": 0.75, "step": 13362 }, { "epoch": 11.089626556016597, "grad_norm": 48.74565124511719, "learning_rate": 1.5567136929460584e-05, "loss": 1.2275, "step": 13363 }, { "epoch": 11.09045643153527, "grad_norm": 25.303850173950195, "learning_rate": 1.5566804979253116e-05, "loss": 0.7584, "step": 13364 }, { "epoch": 11.091286307053942, "grad_norm": 21.24517059326172, "learning_rate": 1.5566473029045644e-05, "loss": 0.4971, "step": 13365 }, { "epoch": 11.092116182572614, "grad_norm": 24.580078125, "learning_rate": 1.5566141078838176e-05, "loss": 0.9641, "step": 13366 }, { "epoch": 11.092946058091286, "grad_norm": 30.90903663635254, "learning_rate": 1.5565809128630705e-05, "loss": 0.6404, "step": 13367 }, { "epoch": 11.093775933609958, "grad_norm": 19.84929656982422, "learning_rate": 1.5565477178423237e-05, "loss": 0.5948, "step": 13368 }, { "epoch": 11.09460580912863, "grad_norm": 49.338768005371094, "learning_rate": 1.556514522821577e-05, "loss": 0.785, "step": 13369 }, { "epoch": 11.095435684647303, "grad_norm": 15.120865821838379, "learning_rate": 1.5564813278008298e-05, "loss": 0.4739, "step": 13370 }, { "epoch": 11.096265560165975, "grad_norm": 29.505674362182617, "learning_rate": 1.556448132780083e-05, "loss": 0.9773, "step": 13371 }, { "epoch": 11.097095435684647, "grad_norm": 33.824100494384766, "learning_rate": 1.5564149377593362e-05, "loss": 1.0006, "step": 13372 }, { "epoch": 11.09792531120332, "grad_norm": 26.140377044677734, "learning_rate": 1.5563817427385894e-05, "loss": 0.8518, "step": 13373 }, { "epoch": 11.098755186721991, "grad_norm": 46.22686767578125, "learning_rate": 1.5563485477178423e-05, "loss": 0.9982, "step": 13374 }, { "epoch": 11.099585062240664, "grad_norm": 47.74344253540039, "learning_rate": 1.5563153526970955e-05, "loss": 0.6384, "step": 13375 }, { "epoch": 11.100414937759336, "grad_norm": 25.28278160095215, "learning_rate": 1.5562821576763487e-05, "loss": 0.9989, "step": 13376 }, { "epoch": 11.101244813278008, "grad_norm": 48.75901794433594, "learning_rate": 1.556248962655602e-05, "loss": 1.1567, "step": 13377 }, { "epoch": 11.10207468879668, "grad_norm": 35.190853118896484, "learning_rate": 1.5562157676348548e-05, "loss": 0.5581, "step": 13378 }, { "epoch": 11.102904564315352, "grad_norm": 51.280555725097656, "learning_rate": 1.556182572614108e-05, "loss": 0.6486, "step": 13379 }, { "epoch": 11.103734439834025, "grad_norm": 46.75189971923828, "learning_rate": 1.5561493775933612e-05, "loss": 1.133, "step": 13380 }, { "epoch": 11.104564315352697, "grad_norm": 15.208054542541504, "learning_rate": 1.5561161825726145e-05, "loss": 0.3508, "step": 13381 }, { "epoch": 11.105394190871369, "grad_norm": 15.436466217041016, "learning_rate": 1.5560829875518673e-05, "loss": 0.6408, "step": 13382 }, { "epoch": 11.106224066390041, "grad_norm": 21.411745071411133, "learning_rate": 1.5560497925311205e-05, "loss": 0.7767, "step": 13383 }, { "epoch": 11.107053941908713, "grad_norm": 64.44049072265625, "learning_rate": 1.5560165975103737e-05, "loss": 1.3482, "step": 13384 }, { "epoch": 11.107883817427386, "grad_norm": 66.11968231201172, "learning_rate": 1.5559834024896266e-05, "loss": 1.2542, "step": 13385 }, { "epoch": 11.108713692946058, "grad_norm": 34.62207794189453, "learning_rate": 1.5559502074688798e-05, "loss": 0.7331, "step": 13386 }, { "epoch": 11.10954356846473, "grad_norm": 94.20352935791016, "learning_rate": 1.555917012448133e-05, "loss": 1.2465, "step": 13387 }, { "epoch": 11.110373443983402, "grad_norm": 23.849632263183594, "learning_rate": 1.555883817427386e-05, "loss": 0.7158, "step": 13388 }, { "epoch": 11.111203319502074, "grad_norm": 28.869611740112305, "learning_rate": 1.555850622406639e-05, "loss": 1.1526, "step": 13389 }, { "epoch": 11.112033195020746, "grad_norm": 40.09805679321289, "learning_rate": 1.555817427385892e-05, "loss": 0.6006, "step": 13390 }, { "epoch": 11.112863070539419, "grad_norm": 24.258298873901367, "learning_rate": 1.5557842323651452e-05, "loss": 0.7199, "step": 13391 }, { "epoch": 11.11369294605809, "grad_norm": 36.649566650390625, "learning_rate": 1.5557510373443984e-05, "loss": 1.3386, "step": 13392 }, { "epoch": 11.114522821576763, "grad_norm": 23.521099090576172, "learning_rate": 1.5557178423236516e-05, "loss": 0.7412, "step": 13393 }, { "epoch": 11.115352697095435, "grad_norm": 29.587892532348633, "learning_rate": 1.5556846473029045e-05, "loss": 1.2667, "step": 13394 }, { "epoch": 11.116182572614107, "grad_norm": 37.15192794799805, "learning_rate": 1.5556514522821577e-05, "loss": 1.139, "step": 13395 }, { "epoch": 11.11701244813278, "grad_norm": 21.39067840576172, "learning_rate": 1.555618257261411e-05, "loss": 1.0036, "step": 13396 }, { "epoch": 11.117842323651452, "grad_norm": 24.52741241455078, "learning_rate": 1.555585062240664e-05, "loss": 0.5526, "step": 13397 }, { "epoch": 11.118672199170124, "grad_norm": 36.93035888671875, "learning_rate": 1.5555518672199173e-05, "loss": 0.7161, "step": 13398 }, { "epoch": 11.119502074688796, "grad_norm": 23.669015884399414, "learning_rate": 1.5555186721991702e-05, "loss": 1.0235, "step": 13399 }, { "epoch": 11.120331950207468, "grad_norm": 18.972761154174805, "learning_rate": 1.5554854771784234e-05, "loss": 0.9923, "step": 13400 }, { "epoch": 11.12116182572614, "grad_norm": 34.33028030395508, "learning_rate": 1.5554522821576766e-05, "loss": 1.6516, "step": 13401 }, { "epoch": 11.121991701244813, "grad_norm": 28.5897159576416, "learning_rate": 1.55541908713693e-05, "loss": 0.4168, "step": 13402 }, { "epoch": 11.122821576763485, "grad_norm": 29.21533203125, "learning_rate": 1.5553858921161827e-05, "loss": 0.9607, "step": 13403 }, { "epoch": 11.123651452282157, "grad_norm": 16.231853485107422, "learning_rate": 1.555352697095436e-05, "loss": 0.4648, "step": 13404 }, { "epoch": 11.12448132780083, "grad_norm": 34.68293380737305, "learning_rate": 1.5553195020746888e-05, "loss": 1.1895, "step": 13405 }, { "epoch": 11.125311203319502, "grad_norm": 43.19119644165039, "learning_rate": 1.555286307053942e-05, "loss": 0.7375, "step": 13406 }, { "epoch": 11.126141078838174, "grad_norm": 33.929500579833984, "learning_rate": 1.5552531120331952e-05, "loss": 0.9441, "step": 13407 }, { "epoch": 11.126970954356846, "grad_norm": 15.19284439086914, "learning_rate": 1.555219917012448e-05, "loss": 0.3734, "step": 13408 }, { "epoch": 11.127800829875518, "grad_norm": 37.86792755126953, "learning_rate": 1.5551867219917013e-05, "loss": 1.4865, "step": 13409 }, { "epoch": 11.12863070539419, "grad_norm": 49.997867584228516, "learning_rate": 1.5551535269709545e-05, "loss": 1.3133, "step": 13410 }, { "epoch": 11.129460580912863, "grad_norm": 39.643062591552734, "learning_rate": 1.5551203319502074e-05, "loss": 1.1992, "step": 13411 }, { "epoch": 11.130290456431535, "grad_norm": 13.239843368530273, "learning_rate": 1.5550871369294606e-05, "loss": 0.3228, "step": 13412 }, { "epoch": 11.131120331950207, "grad_norm": 27.38642692565918, "learning_rate": 1.5550539419087138e-05, "loss": 0.6166, "step": 13413 }, { "epoch": 11.13195020746888, "grad_norm": 34.740745544433594, "learning_rate": 1.555020746887967e-05, "loss": 0.8599, "step": 13414 }, { "epoch": 11.132780082987551, "grad_norm": 33.278690338134766, "learning_rate": 1.55498755186722e-05, "loss": 1.1929, "step": 13415 }, { "epoch": 11.133609958506224, "grad_norm": 67.06544494628906, "learning_rate": 1.554954356846473e-05, "loss": 1.0094, "step": 13416 }, { "epoch": 11.134439834024896, "grad_norm": 36.36762237548828, "learning_rate": 1.5549211618257263e-05, "loss": 1.3456, "step": 13417 }, { "epoch": 11.135269709543568, "grad_norm": 29.481285095214844, "learning_rate": 1.5548879668049795e-05, "loss": 1.1855, "step": 13418 }, { "epoch": 11.13609958506224, "grad_norm": 35.1609001159668, "learning_rate": 1.5548547717842324e-05, "loss": 0.698, "step": 13419 }, { "epoch": 11.136929460580912, "grad_norm": 97.19469451904297, "learning_rate": 1.5548215767634856e-05, "loss": 0.9818, "step": 13420 }, { "epoch": 11.137759336099585, "grad_norm": 19.639223098754883, "learning_rate": 1.5547883817427388e-05, "loss": 0.7357, "step": 13421 }, { "epoch": 11.138589211618257, "grad_norm": 22.40607261657715, "learning_rate": 1.554755186721992e-05, "loss": 0.8087, "step": 13422 }, { "epoch": 11.139419087136929, "grad_norm": 28.086931228637695, "learning_rate": 1.554721991701245e-05, "loss": 1.1275, "step": 13423 }, { "epoch": 11.140248962655601, "grad_norm": 22.011463165283203, "learning_rate": 1.554688796680498e-05, "loss": 0.3864, "step": 13424 }, { "epoch": 11.141078838174273, "grad_norm": 18.479963302612305, "learning_rate": 1.5546556016597513e-05, "loss": 0.2237, "step": 13425 }, { "epoch": 11.141908713692946, "grad_norm": 30.757421493530273, "learning_rate": 1.5546224066390042e-05, "loss": 0.5671, "step": 13426 }, { "epoch": 11.142738589211618, "grad_norm": 25.505685806274414, "learning_rate": 1.5545892116182574e-05, "loss": 0.4174, "step": 13427 }, { "epoch": 11.14356846473029, "grad_norm": 25.183361053466797, "learning_rate": 1.5545560165975103e-05, "loss": 0.6888, "step": 13428 }, { "epoch": 11.144398340248962, "grad_norm": 52.11037826538086, "learning_rate": 1.5545228215767635e-05, "loss": 1.3796, "step": 13429 }, { "epoch": 11.145228215767634, "grad_norm": 25.478755950927734, "learning_rate": 1.5544896265560167e-05, "loss": 0.802, "step": 13430 }, { "epoch": 11.146058091286307, "grad_norm": 38.68452835083008, "learning_rate": 1.55445643153527e-05, "loss": 0.8912, "step": 13431 }, { "epoch": 11.146887966804979, "grad_norm": 34.454586029052734, "learning_rate": 1.5544232365145228e-05, "loss": 1.3243, "step": 13432 }, { "epoch": 11.147717842323651, "grad_norm": 21.721385955810547, "learning_rate": 1.554390041493776e-05, "loss": 0.7726, "step": 13433 }, { "epoch": 11.148547717842323, "grad_norm": 46.923912048339844, "learning_rate": 1.5543568464730292e-05, "loss": 0.6508, "step": 13434 }, { "epoch": 11.149377593360995, "grad_norm": 28.652677536010742, "learning_rate": 1.5543236514522824e-05, "loss": 0.7793, "step": 13435 }, { "epoch": 11.150207468879668, "grad_norm": 30.75908660888672, "learning_rate": 1.5542904564315353e-05, "loss": 0.6583, "step": 13436 }, { "epoch": 11.15103734439834, "grad_norm": 16.099163055419922, "learning_rate": 1.5542572614107885e-05, "loss": 0.3165, "step": 13437 }, { "epoch": 11.151867219917012, "grad_norm": 19.462739944458008, "learning_rate": 1.5542240663900417e-05, "loss": 0.5589, "step": 13438 }, { "epoch": 11.152697095435684, "grad_norm": 148.05796813964844, "learning_rate": 1.554190871369295e-05, "loss": 1.7132, "step": 13439 }, { "epoch": 11.153526970954356, "grad_norm": 36.50559997558594, "learning_rate": 1.5541576763485478e-05, "loss": 1.0139, "step": 13440 }, { "epoch": 11.154356846473028, "grad_norm": 44.28426742553711, "learning_rate": 1.554124481327801e-05, "loss": 1.6318, "step": 13441 }, { "epoch": 11.1551867219917, "grad_norm": 24.16826820373535, "learning_rate": 1.5540912863070542e-05, "loss": 0.7131, "step": 13442 }, { "epoch": 11.156016597510373, "grad_norm": 33.621620178222656, "learning_rate": 1.554058091286307e-05, "loss": 0.7665, "step": 13443 }, { "epoch": 11.156846473029045, "grad_norm": 28.497968673706055, "learning_rate": 1.5540248962655603e-05, "loss": 0.9773, "step": 13444 }, { "epoch": 11.157676348547717, "grad_norm": 27.666784286499023, "learning_rate": 1.5539917012448135e-05, "loss": 0.7735, "step": 13445 }, { "epoch": 11.15850622406639, "grad_norm": 47.58610534667969, "learning_rate": 1.5539585062240664e-05, "loss": 0.7672, "step": 13446 }, { "epoch": 11.159336099585062, "grad_norm": 36.5857048034668, "learning_rate": 1.5539253112033196e-05, "loss": 0.9771, "step": 13447 }, { "epoch": 11.160165975103734, "grad_norm": 25.01738739013672, "learning_rate": 1.5538921161825728e-05, "loss": 0.933, "step": 13448 }, { "epoch": 11.160995850622406, "grad_norm": 23.422714233398438, "learning_rate": 1.5538589211618257e-05, "loss": 0.4525, "step": 13449 }, { "epoch": 11.161825726141078, "grad_norm": 25.92742919921875, "learning_rate": 1.553825726141079e-05, "loss": 0.4309, "step": 13450 }, { "epoch": 11.16265560165975, "grad_norm": 37.19786071777344, "learning_rate": 1.553792531120332e-05, "loss": 1.1393, "step": 13451 }, { "epoch": 11.163485477178423, "grad_norm": 24.881750106811523, "learning_rate": 1.5537593360995853e-05, "loss": 1.0836, "step": 13452 }, { "epoch": 11.164315352697095, "grad_norm": 18.266338348388672, "learning_rate": 1.5537261410788382e-05, "loss": 0.2879, "step": 13453 }, { "epoch": 11.165145228215767, "grad_norm": 32.622398376464844, "learning_rate": 1.5536929460580914e-05, "loss": 0.9163, "step": 13454 }, { "epoch": 11.16597510373444, "grad_norm": 63.76389694213867, "learning_rate": 1.5536597510373446e-05, "loss": 1.4842, "step": 13455 }, { "epoch": 11.166804979253111, "grad_norm": 24.018680572509766, "learning_rate": 1.5536265560165978e-05, "loss": 0.5799, "step": 13456 }, { "epoch": 11.167634854771784, "grad_norm": 20.828067779541016, "learning_rate": 1.5535933609958507e-05, "loss": 0.9866, "step": 13457 }, { "epoch": 11.168464730290456, "grad_norm": 26.809581756591797, "learning_rate": 1.553560165975104e-05, "loss": 0.6874, "step": 13458 }, { "epoch": 11.169294605809128, "grad_norm": 31.667757034301758, "learning_rate": 1.553526970954357e-05, "loss": 0.936, "step": 13459 }, { "epoch": 11.1701244813278, "grad_norm": 51.18049240112305, "learning_rate": 1.5534937759336103e-05, "loss": 1.0921, "step": 13460 }, { "epoch": 11.170954356846472, "grad_norm": 27.2960147857666, "learning_rate": 1.5534605809128632e-05, "loss": 0.8318, "step": 13461 }, { "epoch": 11.171784232365145, "grad_norm": 27.03180503845215, "learning_rate": 1.5534273858921164e-05, "loss": 0.9836, "step": 13462 }, { "epoch": 11.172614107883817, "grad_norm": 25.116458892822266, "learning_rate": 1.5533941908713696e-05, "loss": 0.6363, "step": 13463 }, { "epoch": 11.173443983402489, "grad_norm": 29.578977584838867, "learning_rate": 1.5533609958506225e-05, "loss": 0.7059, "step": 13464 }, { "epoch": 11.174273858921161, "grad_norm": 27.728843688964844, "learning_rate": 1.5533278008298757e-05, "loss": 0.4886, "step": 13465 }, { "epoch": 11.175103734439833, "grad_norm": 32.34976577758789, "learning_rate": 1.5532946058091286e-05, "loss": 1.125, "step": 13466 }, { "epoch": 11.175933609958506, "grad_norm": 20.509906768798828, "learning_rate": 1.5532614107883818e-05, "loss": 0.6553, "step": 13467 }, { "epoch": 11.176763485477178, "grad_norm": 27.796913146972656, "learning_rate": 1.553228215767635e-05, "loss": 0.9755, "step": 13468 }, { "epoch": 11.17759336099585, "grad_norm": 36.514244079589844, "learning_rate": 1.553195020746888e-05, "loss": 1.0886, "step": 13469 }, { "epoch": 11.178423236514522, "grad_norm": 18.421428680419922, "learning_rate": 1.553161825726141e-05, "loss": 0.4824, "step": 13470 }, { "epoch": 11.179253112033194, "grad_norm": 21.83297348022461, "learning_rate": 1.5531286307053943e-05, "loss": 0.7025, "step": 13471 }, { "epoch": 11.180082987551867, "grad_norm": 30.576257705688477, "learning_rate": 1.5530954356846475e-05, "loss": 0.8338, "step": 13472 }, { "epoch": 11.180912863070539, "grad_norm": 47.89128112792969, "learning_rate": 1.5530622406639004e-05, "loss": 1.9137, "step": 13473 }, { "epoch": 11.181742738589211, "grad_norm": 20.85703468322754, "learning_rate": 1.5530290456431536e-05, "loss": 0.7206, "step": 13474 }, { "epoch": 11.182572614107883, "grad_norm": 29.267770767211914, "learning_rate": 1.5529958506224068e-05, "loss": 1.4113, "step": 13475 }, { "epoch": 11.183402489626555, "grad_norm": 60.057186126708984, "learning_rate": 1.55296265560166e-05, "loss": 0.6519, "step": 13476 }, { "epoch": 11.184232365145228, "grad_norm": 40.29188537597656, "learning_rate": 1.5529294605809132e-05, "loss": 0.9651, "step": 13477 }, { "epoch": 11.1850622406639, "grad_norm": 55.04912567138672, "learning_rate": 1.552896265560166e-05, "loss": 1.3035, "step": 13478 }, { "epoch": 11.185892116182572, "grad_norm": 44.871395111083984, "learning_rate": 1.5528630705394193e-05, "loss": 1.074, "step": 13479 }, { "epoch": 11.186721991701244, "grad_norm": 25.61417579650879, "learning_rate": 1.5528298755186725e-05, "loss": 0.6887, "step": 13480 }, { "epoch": 11.187551867219916, "grad_norm": 29.165307998657227, "learning_rate": 1.5527966804979257e-05, "loss": 0.9002, "step": 13481 }, { "epoch": 11.188381742738589, "grad_norm": 27.45404624938965, "learning_rate": 1.5527634854771786e-05, "loss": 1.5171, "step": 13482 }, { "epoch": 11.18921161825726, "grad_norm": 37.65836715698242, "learning_rate": 1.5527302904564318e-05, "loss": 0.7131, "step": 13483 }, { "epoch": 11.190041493775933, "grad_norm": 28.564973831176758, "learning_rate": 1.5526970954356847e-05, "loss": 0.9775, "step": 13484 }, { "epoch": 11.190871369294605, "grad_norm": 30.29228401184082, "learning_rate": 1.552663900414938e-05, "loss": 1.3632, "step": 13485 }, { "epoch": 11.191701244813277, "grad_norm": 19.255043029785156, "learning_rate": 1.552630705394191e-05, "loss": 0.6146, "step": 13486 }, { "epoch": 11.19253112033195, "grad_norm": 18.73171043395996, "learning_rate": 1.552597510373444e-05, "loss": 0.5861, "step": 13487 }, { "epoch": 11.193360995850622, "grad_norm": 111.28555297851562, "learning_rate": 1.552564315352697e-05, "loss": 1.162, "step": 13488 }, { "epoch": 11.194190871369294, "grad_norm": 23.421157836914062, "learning_rate": 1.5525311203319504e-05, "loss": 0.6235, "step": 13489 }, { "epoch": 11.195020746887966, "grad_norm": 31.253480911254883, "learning_rate": 1.5524979253112032e-05, "loss": 0.7666, "step": 13490 }, { "epoch": 11.195850622406638, "grad_norm": 31.70294952392578, "learning_rate": 1.5524647302904565e-05, "loss": 0.9416, "step": 13491 }, { "epoch": 11.19668049792531, "grad_norm": 24.958131790161133, "learning_rate": 1.5524315352697097e-05, "loss": 1.0902, "step": 13492 }, { "epoch": 11.197510373443983, "grad_norm": 37.053932189941406, "learning_rate": 1.552398340248963e-05, "loss": 1.1105, "step": 13493 }, { "epoch": 11.198340248962655, "grad_norm": 27.120512008666992, "learning_rate": 1.5523651452282157e-05, "loss": 1.1497, "step": 13494 }, { "epoch": 11.199170124481327, "grad_norm": 34.401893615722656, "learning_rate": 1.552331950207469e-05, "loss": 0.8828, "step": 13495 }, { "epoch": 11.2, "grad_norm": 30.572925567626953, "learning_rate": 1.552298755186722e-05, "loss": 1.0587, "step": 13496 }, { "epoch": 11.200829875518671, "grad_norm": 19.302356719970703, "learning_rate": 1.5522655601659754e-05, "loss": 0.2869, "step": 13497 }, { "epoch": 11.201659751037344, "grad_norm": 18.602073669433594, "learning_rate": 1.5522323651452283e-05, "loss": 0.9052, "step": 13498 }, { "epoch": 11.202489626556016, "grad_norm": 13.16601848602295, "learning_rate": 1.5521991701244815e-05, "loss": 0.3668, "step": 13499 }, { "epoch": 11.203319502074688, "grad_norm": 52.52915954589844, "learning_rate": 1.5521659751037347e-05, "loss": 1.3909, "step": 13500 }, { "epoch": 11.20414937759336, "grad_norm": 24.366703033447266, "learning_rate": 1.552132780082988e-05, "loss": 1.1469, "step": 13501 }, { "epoch": 11.204979253112032, "grad_norm": 32.4938850402832, "learning_rate": 1.5520995850622408e-05, "loss": 0.9199, "step": 13502 }, { "epoch": 11.205809128630705, "grad_norm": 26.731523513793945, "learning_rate": 1.552066390041494e-05, "loss": 1.4073, "step": 13503 }, { "epoch": 11.206639004149377, "grad_norm": 37.02077102661133, "learning_rate": 1.5520331950207472e-05, "loss": 1.2936, "step": 13504 }, { "epoch": 11.207468879668049, "grad_norm": 38.7563591003418, "learning_rate": 1.552e-05, "loss": 0.9443, "step": 13505 }, { "epoch": 11.208298755186721, "grad_norm": 36.08129119873047, "learning_rate": 1.5519668049792533e-05, "loss": 1.2779, "step": 13506 }, { "epoch": 11.209128630705393, "grad_norm": 27.23787498474121, "learning_rate": 1.551933609958506e-05, "loss": 0.7392, "step": 13507 }, { "epoch": 11.209958506224066, "grad_norm": 20.202064514160156, "learning_rate": 1.5519004149377593e-05, "loss": 0.6546, "step": 13508 }, { "epoch": 11.210788381742738, "grad_norm": 35.474884033203125, "learning_rate": 1.5518672199170126e-05, "loss": 0.9961, "step": 13509 }, { "epoch": 11.21161825726141, "grad_norm": 27.0629940032959, "learning_rate": 1.5518340248962658e-05, "loss": 0.8395, "step": 13510 }, { "epoch": 11.212448132780082, "grad_norm": 22.24888038635254, "learning_rate": 1.5518008298755186e-05, "loss": 1.3192, "step": 13511 }, { "epoch": 11.213278008298754, "grad_norm": 69.07994079589844, "learning_rate": 1.551767634854772e-05, "loss": 0.9128, "step": 13512 }, { "epoch": 11.214107883817427, "grad_norm": 30.411518096923828, "learning_rate": 1.551734439834025e-05, "loss": 0.862, "step": 13513 }, { "epoch": 11.214937759336099, "grad_norm": 39.025634765625, "learning_rate": 1.5517012448132783e-05, "loss": 1.1692, "step": 13514 }, { "epoch": 11.215767634854771, "grad_norm": 36.46155548095703, "learning_rate": 1.551668049792531e-05, "loss": 1.733, "step": 13515 }, { "epoch": 11.216597510373443, "grad_norm": 29.847379684448242, "learning_rate": 1.5516348547717844e-05, "loss": 1.368, "step": 13516 }, { "epoch": 11.217427385892115, "grad_norm": 20.660606384277344, "learning_rate": 1.5516016597510376e-05, "loss": 0.5135, "step": 13517 }, { "epoch": 11.218257261410788, "grad_norm": 34.6572380065918, "learning_rate": 1.5515684647302908e-05, "loss": 0.7066, "step": 13518 }, { "epoch": 11.21908713692946, "grad_norm": 31.168508529663086, "learning_rate": 1.5515352697095436e-05, "loss": 0.8922, "step": 13519 }, { "epoch": 11.219917012448132, "grad_norm": 35.639774322509766, "learning_rate": 1.551502074688797e-05, "loss": 1.2311, "step": 13520 }, { "epoch": 11.220746887966804, "grad_norm": 32.143062591552734, "learning_rate": 1.55146887966805e-05, "loss": 1.0766, "step": 13521 }, { "epoch": 11.221576763485476, "grad_norm": 28.768199920654297, "learning_rate": 1.551435684647303e-05, "loss": 0.8521, "step": 13522 }, { "epoch": 11.222406639004149, "grad_norm": 24.132402420043945, "learning_rate": 1.551402489626556e-05, "loss": 1.0098, "step": 13523 }, { "epoch": 11.22323651452282, "grad_norm": 30.924528121948242, "learning_rate": 1.5513692946058094e-05, "loss": 1.2058, "step": 13524 }, { "epoch": 11.224066390041493, "grad_norm": 18.577260971069336, "learning_rate": 1.5513360995850622e-05, "loss": 0.5016, "step": 13525 }, { "epoch": 11.224896265560165, "grad_norm": 79.31043243408203, "learning_rate": 1.5513029045643154e-05, "loss": 1.1314, "step": 13526 }, { "epoch": 11.225726141078837, "grad_norm": 25.132078170776367, "learning_rate": 1.5512697095435687e-05, "loss": 0.5567, "step": 13527 }, { "epoch": 11.22655601659751, "grad_norm": 47.152427673339844, "learning_rate": 1.5512365145228215e-05, "loss": 1.3936, "step": 13528 }, { "epoch": 11.227385892116182, "grad_norm": 20.136817932128906, "learning_rate": 1.5512033195020747e-05, "loss": 0.4701, "step": 13529 }, { "epoch": 11.228215767634854, "grad_norm": 51.61183547973633, "learning_rate": 1.551170124481328e-05, "loss": 1.8496, "step": 13530 }, { "epoch": 11.229045643153526, "grad_norm": 46.885772705078125, "learning_rate": 1.551136929460581e-05, "loss": 1.033, "step": 13531 }, { "epoch": 11.229875518672198, "grad_norm": 30.883045196533203, "learning_rate": 1.551103734439834e-05, "loss": 0.8397, "step": 13532 }, { "epoch": 11.23070539419087, "grad_norm": 42.580421447753906, "learning_rate": 1.5510705394190872e-05, "loss": 1.3855, "step": 13533 }, { "epoch": 11.231535269709543, "grad_norm": 29.366289138793945, "learning_rate": 1.5510373443983404e-05, "loss": 1.0045, "step": 13534 }, { "epoch": 11.232365145228215, "grad_norm": 26.52224349975586, "learning_rate": 1.5510041493775937e-05, "loss": 0.73, "step": 13535 }, { "epoch": 11.233195020746887, "grad_norm": 60.10206604003906, "learning_rate": 1.5509709543568465e-05, "loss": 1.5928, "step": 13536 }, { "epoch": 11.23402489626556, "grad_norm": 35.492130279541016, "learning_rate": 1.5509377593360997e-05, "loss": 0.8114, "step": 13537 }, { "epoch": 11.234854771784232, "grad_norm": 16.856769561767578, "learning_rate": 1.550904564315353e-05, "loss": 0.6027, "step": 13538 }, { "epoch": 11.235684647302904, "grad_norm": 24.339252471923828, "learning_rate": 1.550871369294606e-05, "loss": 0.8835, "step": 13539 }, { "epoch": 11.236514522821576, "grad_norm": 21.738807678222656, "learning_rate": 1.550838174273859e-05, "loss": 0.5551, "step": 13540 }, { "epoch": 11.237344398340248, "grad_norm": 41.90132522583008, "learning_rate": 1.5508049792531122e-05, "loss": 0.9104, "step": 13541 }, { "epoch": 11.23817427385892, "grad_norm": 19.87047004699707, "learning_rate": 1.5507717842323655e-05, "loss": 0.5877, "step": 13542 }, { "epoch": 11.239004149377593, "grad_norm": 19.962024688720703, "learning_rate": 1.5507385892116183e-05, "loss": 0.62, "step": 13543 }, { "epoch": 11.239834024896265, "grad_norm": 32.961158752441406, "learning_rate": 1.5507053941908715e-05, "loss": 1.3726, "step": 13544 }, { "epoch": 11.240663900414937, "grad_norm": 32.006866455078125, "learning_rate": 1.5506721991701244e-05, "loss": 1.1605, "step": 13545 }, { "epoch": 11.241493775933609, "grad_norm": 84.94430541992188, "learning_rate": 1.5506390041493776e-05, "loss": 1.3086, "step": 13546 }, { "epoch": 11.242323651452281, "grad_norm": 29.364770889282227, "learning_rate": 1.550605809128631e-05, "loss": 0.8383, "step": 13547 }, { "epoch": 11.243153526970953, "grad_norm": 25.151290893554688, "learning_rate": 1.5505726141078837e-05, "loss": 0.5599, "step": 13548 }, { "epoch": 11.243983402489626, "grad_norm": 20.910968780517578, "learning_rate": 1.550539419087137e-05, "loss": 0.7402, "step": 13549 }, { "epoch": 11.244813278008298, "grad_norm": 34.673583984375, "learning_rate": 1.55050622406639e-05, "loss": 1.4613, "step": 13550 }, { "epoch": 11.24564315352697, "grad_norm": 16.79854965209961, "learning_rate": 1.5504730290456433e-05, "loss": 0.4933, "step": 13551 }, { "epoch": 11.246473029045642, "grad_norm": 41.76502990722656, "learning_rate": 1.5504398340248962e-05, "loss": 1.0745, "step": 13552 }, { "epoch": 11.247302904564314, "grad_norm": 27.352115631103516, "learning_rate": 1.5504066390041494e-05, "loss": 1.0333, "step": 13553 }, { "epoch": 11.248132780082987, "grad_norm": 24.936199188232422, "learning_rate": 1.5503734439834026e-05, "loss": 0.675, "step": 13554 }, { "epoch": 11.248962655601659, "grad_norm": 50.110198974609375, "learning_rate": 1.550340248962656e-05, "loss": 1.0362, "step": 13555 }, { "epoch": 11.249792531120331, "grad_norm": 34.33130645751953, "learning_rate": 1.550307053941909e-05, "loss": 0.4885, "step": 13556 }, { "epoch": 11.250622406639003, "grad_norm": 36.70589828491211, "learning_rate": 1.550273858921162e-05, "loss": 1.3229, "step": 13557 }, { "epoch": 11.251452282157675, "grad_norm": 32.28785705566406, "learning_rate": 1.550240663900415e-05, "loss": 0.7911, "step": 13558 }, { "epoch": 11.252282157676348, "grad_norm": 32.43205642700195, "learning_rate": 1.5502074688796683e-05, "loss": 0.5843, "step": 13559 }, { "epoch": 11.25311203319502, "grad_norm": 24.17051887512207, "learning_rate": 1.5501742738589212e-05, "loss": 0.713, "step": 13560 }, { "epoch": 11.253941908713692, "grad_norm": 39.54808807373047, "learning_rate": 1.5501410788381744e-05, "loss": 0.8206, "step": 13561 }, { "epoch": 11.254771784232364, "grad_norm": 21.9354305267334, "learning_rate": 1.5501078838174276e-05, "loss": 0.6164, "step": 13562 }, { "epoch": 11.255601659751036, "grad_norm": 23.549747467041016, "learning_rate": 1.5500746887966805e-05, "loss": 0.6556, "step": 13563 }, { "epoch": 11.256431535269709, "grad_norm": 22.445701599121094, "learning_rate": 1.5500414937759337e-05, "loss": 0.5009, "step": 13564 }, { "epoch": 11.25726141078838, "grad_norm": 29.394445419311523, "learning_rate": 1.550008298755187e-05, "loss": 0.5484, "step": 13565 }, { "epoch": 11.258091286307055, "grad_norm": 28.262821197509766, "learning_rate": 1.5499751037344398e-05, "loss": 1.0832, "step": 13566 }, { "epoch": 11.258921161825727, "grad_norm": 22.092559814453125, "learning_rate": 1.549941908713693e-05, "loss": 1.0722, "step": 13567 }, { "epoch": 11.2597510373444, "grad_norm": 27.409164428710938, "learning_rate": 1.5499087136929462e-05, "loss": 0.8584, "step": 13568 }, { "epoch": 11.260580912863071, "grad_norm": 38.23835754394531, "learning_rate": 1.549875518672199e-05, "loss": 1.0096, "step": 13569 }, { "epoch": 11.261410788381744, "grad_norm": 23.061908721923828, "learning_rate": 1.5498423236514523e-05, "loss": 1.0654, "step": 13570 }, { "epoch": 11.262240663900416, "grad_norm": 32.87773132324219, "learning_rate": 1.5498091286307055e-05, "loss": 0.931, "step": 13571 }, { "epoch": 11.263070539419088, "grad_norm": 23.158767700195312, "learning_rate": 1.5497759336099587e-05, "loss": 0.536, "step": 13572 }, { "epoch": 11.26390041493776, "grad_norm": 28.350984573364258, "learning_rate": 1.5497427385892116e-05, "loss": 0.7769, "step": 13573 }, { "epoch": 11.264730290456432, "grad_norm": 35.546119689941406, "learning_rate": 1.5497095435684648e-05, "loss": 1.065, "step": 13574 }, { "epoch": 11.265560165975105, "grad_norm": 30.678136825561523, "learning_rate": 1.549676348547718e-05, "loss": 0.5453, "step": 13575 }, { "epoch": 11.266390041493777, "grad_norm": 50.27688217163086, "learning_rate": 1.5496431535269712e-05, "loss": 0.8112, "step": 13576 }, { "epoch": 11.267219917012449, "grad_norm": 35.23858642578125, "learning_rate": 1.549609958506224e-05, "loss": 0.8416, "step": 13577 }, { "epoch": 11.268049792531121, "grad_norm": 27.506393432617188, "learning_rate": 1.5495767634854773e-05, "loss": 0.8116, "step": 13578 }, { "epoch": 11.268879668049793, "grad_norm": 65.50526428222656, "learning_rate": 1.5495435684647305e-05, "loss": 1.7412, "step": 13579 }, { "epoch": 11.269709543568466, "grad_norm": 19.396495819091797, "learning_rate": 1.5495103734439837e-05, "loss": 0.5277, "step": 13580 }, { "epoch": 11.270539419087138, "grad_norm": 36.188785552978516, "learning_rate": 1.5494771784232366e-05, "loss": 0.8022, "step": 13581 }, { "epoch": 11.27136929460581, "grad_norm": 31.16643714904785, "learning_rate": 1.5494439834024898e-05, "loss": 0.7406, "step": 13582 }, { "epoch": 11.272199170124482, "grad_norm": 23.522253036499023, "learning_rate": 1.5494107883817427e-05, "loss": 0.7824, "step": 13583 }, { "epoch": 11.273029045643154, "grad_norm": 21.69644546508789, "learning_rate": 1.549377593360996e-05, "loss": 0.579, "step": 13584 }, { "epoch": 11.273858921161827, "grad_norm": 19.867464065551758, "learning_rate": 1.549344398340249e-05, "loss": 0.7179, "step": 13585 }, { "epoch": 11.274688796680499, "grad_norm": 33.636722564697266, "learning_rate": 1.549311203319502e-05, "loss": 0.7401, "step": 13586 }, { "epoch": 11.275518672199171, "grad_norm": 57.29033279418945, "learning_rate": 1.5492780082987552e-05, "loss": 1.5663, "step": 13587 }, { "epoch": 11.276348547717843, "grad_norm": 40.968360900878906, "learning_rate": 1.5492448132780084e-05, "loss": 0.9432, "step": 13588 }, { "epoch": 11.277178423236515, "grad_norm": 38.47065353393555, "learning_rate": 1.5492116182572616e-05, "loss": 0.9796, "step": 13589 }, { "epoch": 11.278008298755188, "grad_norm": 30.500513076782227, "learning_rate": 1.5491784232365145e-05, "loss": 0.7098, "step": 13590 }, { "epoch": 11.27883817427386, "grad_norm": 41.55025863647461, "learning_rate": 1.5491452282157677e-05, "loss": 0.8998, "step": 13591 }, { "epoch": 11.279668049792532, "grad_norm": 44.95449447631836, "learning_rate": 1.549112033195021e-05, "loss": 1.3156, "step": 13592 }, { "epoch": 11.280497925311204, "grad_norm": 36.639305114746094, "learning_rate": 1.549078838174274e-05, "loss": 1.3106, "step": 13593 }, { "epoch": 11.281327800829876, "grad_norm": 49.9825553894043, "learning_rate": 1.549045643153527e-05, "loss": 0.8312, "step": 13594 }, { "epoch": 11.282157676348548, "grad_norm": 34.28514099121094, "learning_rate": 1.5490124481327802e-05, "loss": 1.0494, "step": 13595 }, { "epoch": 11.28298755186722, "grad_norm": 34.54429626464844, "learning_rate": 1.5489792531120334e-05, "loss": 1.2585, "step": 13596 }, { "epoch": 11.283817427385893, "grad_norm": 26.847476959228516, "learning_rate": 1.5489460580912866e-05, "loss": 0.6251, "step": 13597 }, { "epoch": 11.284647302904565, "grad_norm": 26.14539337158203, "learning_rate": 1.5489128630705395e-05, "loss": 0.8492, "step": 13598 }, { "epoch": 11.285477178423237, "grad_norm": 55.06449890136719, "learning_rate": 1.5488796680497927e-05, "loss": 0.9323, "step": 13599 }, { "epoch": 11.28630705394191, "grad_norm": 59.473976135253906, "learning_rate": 1.548846473029046e-05, "loss": 1.29, "step": 13600 }, { "epoch": 11.287136929460582, "grad_norm": 23.14409828186035, "learning_rate": 1.5488132780082988e-05, "loss": 0.6637, "step": 13601 }, { "epoch": 11.287966804979254, "grad_norm": 27.626693725585938, "learning_rate": 1.548780082987552e-05, "loss": 1.0564, "step": 13602 }, { "epoch": 11.288796680497926, "grad_norm": 50.44826889038086, "learning_rate": 1.5487468879668052e-05, "loss": 0.8494, "step": 13603 }, { "epoch": 11.289626556016598, "grad_norm": 28.57015037536621, "learning_rate": 1.548713692946058e-05, "loss": 0.9626, "step": 13604 }, { "epoch": 11.29045643153527, "grad_norm": 55.1053466796875, "learning_rate": 1.5486804979253113e-05, "loss": 1.2025, "step": 13605 }, { "epoch": 11.291286307053943, "grad_norm": 42.52720260620117, "learning_rate": 1.5486473029045642e-05, "loss": 1.1822, "step": 13606 }, { "epoch": 11.292116182572615, "grad_norm": 19.889572143554688, "learning_rate": 1.5486141078838174e-05, "loss": 0.4277, "step": 13607 }, { "epoch": 11.292946058091287, "grad_norm": 19.495113372802734, "learning_rate": 1.5485809128630706e-05, "loss": 0.5718, "step": 13608 }, { "epoch": 11.29377593360996, "grad_norm": 26.950876235961914, "learning_rate": 1.5485477178423238e-05, "loss": 0.9648, "step": 13609 }, { "epoch": 11.294605809128631, "grad_norm": 37.84157180786133, "learning_rate": 1.548514522821577e-05, "loss": 1.0443, "step": 13610 }, { "epoch": 11.295435684647304, "grad_norm": 24.106794357299805, "learning_rate": 1.54848132780083e-05, "loss": 0.6578, "step": 13611 }, { "epoch": 11.296265560165976, "grad_norm": 24.975507736206055, "learning_rate": 1.548448132780083e-05, "loss": 0.7918, "step": 13612 }, { "epoch": 11.297095435684648, "grad_norm": 37.42241668701172, "learning_rate": 1.5484149377593363e-05, "loss": 1.3023, "step": 13613 }, { "epoch": 11.29792531120332, "grad_norm": 35.40402603149414, "learning_rate": 1.5483817427385895e-05, "loss": 1.2843, "step": 13614 }, { "epoch": 11.298755186721992, "grad_norm": 38.07725524902344, "learning_rate": 1.5483485477178424e-05, "loss": 1.1279, "step": 13615 }, { "epoch": 11.299585062240665, "grad_norm": 46.231258392333984, "learning_rate": 1.5483153526970956e-05, "loss": 0.7767, "step": 13616 }, { "epoch": 11.300414937759337, "grad_norm": 26.247753143310547, "learning_rate": 1.5482821576763488e-05, "loss": 0.7471, "step": 13617 }, { "epoch": 11.301244813278009, "grad_norm": 45.9449348449707, "learning_rate": 1.548248962655602e-05, "loss": 0.9722, "step": 13618 }, { "epoch": 11.302074688796681, "grad_norm": 32.86830139160156, "learning_rate": 1.548215767634855e-05, "loss": 0.7047, "step": 13619 }, { "epoch": 11.302904564315353, "grad_norm": 59.05746078491211, "learning_rate": 1.548182572614108e-05, "loss": 0.8442, "step": 13620 }, { "epoch": 11.303734439834026, "grad_norm": 33.45981216430664, "learning_rate": 1.5481493775933613e-05, "loss": 0.7512, "step": 13621 }, { "epoch": 11.304564315352698, "grad_norm": 35.2508659362793, "learning_rate": 1.5481161825726142e-05, "loss": 1.1385, "step": 13622 }, { "epoch": 11.30539419087137, "grad_norm": 46.38847351074219, "learning_rate": 1.5480829875518674e-05, "loss": 1.166, "step": 13623 }, { "epoch": 11.306224066390042, "grad_norm": 32.54387664794922, "learning_rate": 1.5480497925311203e-05, "loss": 1.1705, "step": 13624 }, { "epoch": 11.307053941908714, "grad_norm": 24.897891998291016, "learning_rate": 1.5480165975103735e-05, "loss": 0.735, "step": 13625 }, { "epoch": 11.307883817427387, "grad_norm": 58.81105422973633, "learning_rate": 1.5479834024896267e-05, "loss": 1.2208, "step": 13626 }, { "epoch": 11.308713692946059, "grad_norm": 28.82060432434082, "learning_rate": 1.5479502074688796e-05, "loss": 0.8089, "step": 13627 }, { "epoch": 11.309543568464731, "grad_norm": 45.69380569458008, "learning_rate": 1.5479170124481328e-05, "loss": 1.4049, "step": 13628 }, { "epoch": 11.310373443983403, "grad_norm": 45.515350341796875, "learning_rate": 1.547883817427386e-05, "loss": 0.986, "step": 13629 }, { "epoch": 11.311203319502075, "grad_norm": 46.550865173339844, "learning_rate": 1.5478506224066392e-05, "loss": 0.6839, "step": 13630 }, { "epoch": 11.312033195020748, "grad_norm": 25.786434173583984, "learning_rate": 1.547817427385892e-05, "loss": 0.9162, "step": 13631 }, { "epoch": 11.31286307053942, "grad_norm": 21.698219299316406, "learning_rate": 1.5477842323651453e-05, "loss": 0.4147, "step": 13632 }, { "epoch": 11.313692946058092, "grad_norm": 54.466339111328125, "learning_rate": 1.5477510373443985e-05, "loss": 2.0456, "step": 13633 }, { "epoch": 11.314522821576764, "grad_norm": 58.39303207397461, "learning_rate": 1.5477178423236517e-05, "loss": 0.9813, "step": 13634 }, { "epoch": 11.315352697095436, "grad_norm": 22.650299072265625, "learning_rate": 1.547684647302905e-05, "loss": 1.0849, "step": 13635 }, { "epoch": 11.316182572614109, "grad_norm": 49.37322998046875, "learning_rate": 1.5476514522821578e-05, "loss": 1.1377, "step": 13636 }, { "epoch": 11.31701244813278, "grad_norm": 29.99172019958496, "learning_rate": 1.547618257261411e-05, "loss": 0.9583, "step": 13637 }, { "epoch": 11.317842323651453, "grad_norm": 25.35952377319336, "learning_rate": 1.5475850622406642e-05, "loss": 1.1534, "step": 13638 }, { "epoch": 11.318672199170125, "grad_norm": 40.463661193847656, "learning_rate": 1.547551867219917e-05, "loss": 0.7161, "step": 13639 }, { "epoch": 11.319502074688797, "grad_norm": 21.893077850341797, "learning_rate": 1.5475186721991703e-05, "loss": 1.3441, "step": 13640 }, { "epoch": 11.32033195020747, "grad_norm": 52.30380630493164, "learning_rate": 1.5474854771784235e-05, "loss": 1.8878, "step": 13641 }, { "epoch": 11.321161825726142, "grad_norm": 33.07826614379883, "learning_rate": 1.5474522821576764e-05, "loss": 0.7912, "step": 13642 }, { "epoch": 11.321991701244814, "grad_norm": 33.62781524658203, "learning_rate": 1.5474190871369296e-05, "loss": 0.9715, "step": 13643 }, { "epoch": 11.322821576763486, "grad_norm": 29.76824951171875, "learning_rate": 1.5473858921161825e-05, "loss": 1.1698, "step": 13644 }, { "epoch": 11.323651452282158, "grad_norm": 52.92623519897461, "learning_rate": 1.5473526970954357e-05, "loss": 0.7572, "step": 13645 }, { "epoch": 11.32448132780083, "grad_norm": 26.271512985229492, "learning_rate": 1.547319502074689e-05, "loss": 0.7898, "step": 13646 }, { "epoch": 11.325311203319503, "grad_norm": 36.30817794799805, "learning_rate": 1.547286307053942e-05, "loss": 0.7331, "step": 13647 }, { "epoch": 11.326141078838175, "grad_norm": 27.498384475708008, "learning_rate": 1.547253112033195e-05, "loss": 0.7294, "step": 13648 }, { "epoch": 11.326970954356847, "grad_norm": 30.79348373413086, "learning_rate": 1.547219917012448e-05, "loss": 0.6237, "step": 13649 }, { "epoch": 11.32780082987552, "grad_norm": 33.006309509277344, "learning_rate": 1.5471867219917014e-05, "loss": 1.0519, "step": 13650 }, { "epoch": 11.328630705394191, "grad_norm": 24.84119987487793, "learning_rate": 1.5471535269709546e-05, "loss": 1.062, "step": 13651 }, { "epoch": 11.329460580912864, "grad_norm": 19.8292293548584, "learning_rate": 1.5471203319502075e-05, "loss": 0.5492, "step": 13652 }, { "epoch": 11.330290456431536, "grad_norm": 28.64804458618164, "learning_rate": 1.5470871369294607e-05, "loss": 0.8849, "step": 13653 }, { "epoch": 11.331120331950208, "grad_norm": 39.19512176513672, "learning_rate": 1.547053941908714e-05, "loss": 0.9312, "step": 13654 }, { "epoch": 11.33195020746888, "grad_norm": 29.622501373291016, "learning_rate": 1.547020746887967e-05, "loss": 1.3977, "step": 13655 }, { "epoch": 11.332780082987552, "grad_norm": 28.97798728942871, "learning_rate": 1.54698755186722e-05, "loss": 1.0457, "step": 13656 }, { "epoch": 11.333609958506225, "grad_norm": 31.16741943359375, "learning_rate": 1.5469543568464732e-05, "loss": 0.6363, "step": 13657 }, { "epoch": 11.334439834024897, "grad_norm": 19.62911605834961, "learning_rate": 1.5469211618257264e-05, "loss": 0.2762, "step": 13658 }, { "epoch": 11.335269709543569, "grad_norm": 29.196420669555664, "learning_rate": 1.5468879668049796e-05, "loss": 1.0042, "step": 13659 }, { "epoch": 11.336099585062241, "grad_norm": 61.91983413696289, "learning_rate": 1.5468547717842325e-05, "loss": 1.2676, "step": 13660 }, { "epoch": 11.336929460580913, "grad_norm": 29.708816528320312, "learning_rate": 1.5468215767634857e-05, "loss": 1.1516, "step": 13661 }, { "epoch": 11.337759336099586, "grad_norm": 32.63835525512695, "learning_rate": 1.5467883817427386e-05, "loss": 1.3145, "step": 13662 }, { "epoch": 11.338589211618258, "grad_norm": 30.220867156982422, "learning_rate": 1.5467551867219918e-05, "loss": 0.8664, "step": 13663 }, { "epoch": 11.33941908713693, "grad_norm": 40.62736892700195, "learning_rate": 1.546721991701245e-05, "loss": 0.9966, "step": 13664 }, { "epoch": 11.340248962655602, "grad_norm": 52.69572830200195, "learning_rate": 1.546688796680498e-05, "loss": 1.1139, "step": 13665 }, { "epoch": 11.341078838174274, "grad_norm": 20.079235076904297, "learning_rate": 1.546655601659751e-05, "loss": 0.4743, "step": 13666 }, { "epoch": 11.341908713692947, "grad_norm": 25.798898696899414, "learning_rate": 1.5466224066390043e-05, "loss": 0.741, "step": 13667 }, { "epoch": 11.342738589211619, "grad_norm": 32.102142333984375, "learning_rate": 1.5465892116182575e-05, "loss": 1.2432, "step": 13668 }, { "epoch": 11.343568464730291, "grad_norm": 52.77617645263672, "learning_rate": 1.5465560165975104e-05, "loss": 1.3699, "step": 13669 }, { "epoch": 11.344398340248963, "grad_norm": 35.57265090942383, "learning_rate": 1.5465228215767636e-05, "loss": 0.8914, "step": 13670 }, { "epoch": 11.345228215767635, "grad_norm": 23.773317337036133, "learning_rate": 1.5464896265560168e-05, "loss": 1.0935, "step": 13671 }, { "epoch": 11.346058091286308, "grad_norm": 28.25395393371582, "learning_rate": 1.54645643153527e-05, "loss": 0.4162, "step": 13672 }, { "epoch": 11.34688796680498, "grad_norm": 26.747379302978516, "learning_rate": 1.546423236514523e-05, "loss": 0.7749, "step": 13673 }, { "epoch": 11.347717842323652, "grad_norm": 23.42445945739746, "learning_rate": 1.546390041493776e-05, "loss": 0.801, "step": 13674 }, { "epoch": 11.348547717842324, "grad_norm": 26.032697677612305, "learning_rate": 1.5463568464730293e-05, "loss": 1.2259, "step": 13675 }, { "epoch": 11.349377593360996, "grad_norm": 27.366300582885742, "learning_rate": 1.5463236514522825e-05, "loss": 0.4588, "step": 13676 }, { "epoch": 11.350207468879669, "grad_norm": 41.70001220703125, "learning_rate": 1.5462904564315354e-05, "loss": 1.0871, "step": 13677 }, { "epoch": 11.35103734439834, "grad_norm": 37.72441482543945, "learning_rate": 1.5462572614107886e-05, "loss": 0.4542, "step": 13678 }, { "epoch": 11.351867219917013, "grad_norm": 52.5670051574707, "learning_rate": 1.5462240663900418e-05, "loss": 1.2687, "step": 13679 }, { "epoch": 11.352697095435685, "grad_norm": 43.12263870239258, "learning_rate": 1.5461908713692947e-05, "loss": 1.5751, "step": 13680 }, { "epoch": 11.353526970954357, "grad_norm": 25.863746643066406, "learning_rate": 1.546157676348548e-05, "loss": 1.3489, "step": 13681 }, { "epoch": 11.35435684647303, "grad_norm": 35.54370880126953, "learning_rate": 1.546124481327801e-05, "loss": 1.254, "step": 13682 }, { "epoch": 11.355186721991702, "grad_norm": 21.334508895874023, "learning_rate": 1.546091286307054e-05, "loss": 1.0339, "step": 13683 }, { "epoch": 11.356016597510374, "grad_norm": 31.345069885253906, "learning_rate": 1.546058091286307e-05, "loss": 0.9833, "step": 13684 }, { "epoch": 11.356846473029046, "grad_norm": 29.06247901916504, "learning_rate": 1.54602489626556e-05, "loss": 0.5918, "step": 13685 }, { "epoch": 11.357676348547718, "grad_norm": 31.053991317749023, "learning_rate": 1.5459917012448132e-05, "loss": 0.8582, "step": 13686 }, { "epoch": 11.35850622406639, "grad_norm": 32.7894172668457, "learning_rate": 1.5459585062240664e-05, "loss": 0.7977, "step": 13687 }, { "epoch": 11.359336099585063, "grad_norm": 23.6597843170166, "learning_rate": 1.5459253112033197e-05, "loss": 0.6413, "step": 13688 }, { "epoch": 11.360165975103735, "grad_norm": 12.111703872680664, "learning_rate": 1.545892116182573e-05, "loss": 0.4717, "step": 13689 }, { "epoch": 11.360995850622407, "grad_norm": 26.269289016723633, "learning_rate": 1.5458589211618257e-05, "loss": 1.0873, "step": 13690 }, { "epoch": 11.36182572614108, "grad_norm": 26.443645477294922, "learning_rate": 1.545825726141079e-05, "loss": 0.4665, "step": 13691 }, { "epoch": 11.362655601659752, "grad_norm": 25.172115325927734, "learning_rate": 1.545792531120332e-05, "loss": 1.0119, "step": 13692 }, { "epoch": 11.363485477178424, "grad_norm": 21.98232650756836, "learning_rate": 1.5457593360995854e-05, "loss": 0.7702, "step": 13693 }, { "epoch": 11.364315352697096, "grad_norm": 27.264604568481445, "learning_rate": 1.5457261410788382e-05, "loss": 0.6195, "step": 13694 }, { "epoch": 11.365145228215768, "grad_norm": 32.79033279418945, "learning_rate": 1.5456929460580915e-05, "loss": 0.9809, "step": 13695 }, { "epoch": 11.36597510373444, "grad_norm": 27.53361701965332, "learning_rate": 1.5456597510373447e-05, "loss": 0.7451, "step": 13696 }, { "epoch": 11.366804979253113, "grad_norm": 40.483089447021484, "learning_rate": 1.545626556016598e-05, "loss": 1.7747, "step": 13697 }, { "epoch": 11.367634854771785, "grad_norm": 37.06024169921875, "learning_rate": 1.5455933609958508e-05, "loss": 0.4906, "step": 13698 }, { "epoch": 11.368464730290457, "grad_norm": 18.72709083557129, "learning_rate": 1.545560165975104e-05, "loss": 0.548, "step": 13699 }, { "epoch": 11.369294605809129, "grad_norm": 26.06487464904785, "learning_rate": 1.545526970954357e-05, "loss": 0.6434, "step": 13700 }, { "epoch": 11.370124481327801, "grad_norm": 28.969371795654297, "learning_rate": 1.54549377593361e-05, "loss": 0.5328, "step": 13701 }, { "epoch": 11.370954356846473, "grad_norm": 46.88198471069336, "learning_rate": 1.5454605809128633e-05, "loss": 1.0636, "step": 13702 }, { "epoch": 11.371784232365146, "grad_norm": 34.90916442871094, "learning_rate": 1.545427385892116e-05, "loss": 0.7449, "step": 13703 }, { "epoch": 11.372614107883818, "grad_norm": 33.861209869384766, "learning_rate": 1.5453941908713693e-05, "loss": 1.2189, "step": 13704 }, { "epoch": 11.37344398340249, "grad_norm": 31.03929901123047, "learning_rate": 1.5453609958506225e-05, "loss": 1.2997, "step": 13705 }, { "epoch": 11.374273858921162, "grad_norm": 21.653308868408203, "learning_rate": 1.5453278008298754e-05, "loss": 0.7281, "step": 13706 }, { "epoch": 11.375103734439834, "grad_norm": 55.36621856689453, "learning_rate": 1.5452946058091286e-05, "loss": 1.3765, "step": 13707 }, { "epoch": 11.375933609958507, "grad_norm": 41.36511993408203, "learning_rate": 1.545261410788382e-05, "loss": 0.678, "step": 13708 }, { "epoch": 11.376763485477179, "grad_norm": 38.761138916015625, "learning_rate": 1.545228215767635e-05, "loss": 1.3851, "step": 13709 }, { "epoch": 11.377593360995851, "grad_norm": 20.99536895751953, "learning_rate": 1.545195020746888e-05, "loss": 0.7255, "step": 13710 }, { "epoch": 11.378423236514523, "grad_norm": 18.670957565307617, "learning_rate": 1.545161825726141e-05, "loss": 0.5548, "step": 13711 }, { "epoch": 11.379253112033195, "grad_norm": 31.75310707092285, "learning_rate": 1.5451286307053943e-05, "loss": 0.9756, "step": 13712 }, { "epoch": 11.380082987551868, "grad_norm": 57.11746597290039, "learning_rate": 1.5450954356846476e-05, "loss": 0.5548, "step": 13713 }, { "epoch": 11.38091286307054, "grad_norm": 20.357460021972656, "learning_rate": 1.5450622406639008e-05, "loss": 0.6885, "step": 13714 }, { "epoch": 11.381742738589212, "grad_norm": 36.89778137207031, "learning_rate": 1.5450290456431536e-05, "loss": 1.0441, "step": 13715 }, { "epoch": 11.382572614107884, "grad_norm": 43.370609283447266, "learning_rate": 1.544995850622407e-05, "loss": 1.9606, "step": 13716 }, { "epoch": 11.383402489626556, "grad_norm": 25.554269790649414, "learning_rate": 1.54496265560166e-05, "loss": 0.9547, "step": 13717 }, { "epoch": 11.384232365145229, "grad_norm": 25.090816497802734, "learning_rate": 1.544929460580913e-05, "loss": 1.0885, "step": 13718 }, { "epoch": 11.3850622406639, "grad_norm": 48.6373291015625, "learning_rate": 1.544896265560166e-05, "loss": 0.8986, "step": 13719 }, { "epoch": 11.385892116182573, "grad_norm": 39.6986083984375, "learning_rate": 1.5448630705394194e-05, "loss": 0.9153, "step": 13720 }, { "epoch": 11.386721991701245, "grad_norm": 25.5577392578125, "learning_rate": 1.5448298755186722e-05, "loss": 0.6055, "step": 13721 }, { "epoch": 11.387551867219917, "grad_norm": 30.367847442626953, "learning_rate": 1.5447966804979254e-05, "loss": 0.9719, "step": 13722 }, { "epoch": 11.38838174273859, "grad_norm": 45.918270111083984, "learning_rate": 1.5447634854771783e-05, "loss": 2.1222, "step": 13723 }, { "epoch": 11.389211618257262, "grad_norm": 32.365108489990234, "learning_rate": 1.5447302904564315e-05, "loss": 1.2085, "step": 13724 }, { "epoch": 11.390041493775934, "grad_norm": 38.97664260864258, "learning_rate": 1.5446970954356847e-05, "loss": 0.9537, "step": 13725 }, { "epoch": 11.390871369294606, "grad_norm": 26.718868255615234, "learning_rate": 1.544663900414938e-05, "loss": 0.6678, "step": 13726 }, { "epoch": 11.391701244813278, "grad_norm": 37.31155776977539, "learning_rate": 1.5446307053941908e-05, "loss": 0.9096, "step": 13727 }, { "epoch": 11.39253112033195, "grad_norm": 58.268619537353516, "learning_rate": 1.544597510373444e-05, "loss": 1.014, "step": 13728 }, { "epoch": 11.393360995850623, "grad_norm": 27.011775970458984, "learning_rate": 1.5445643153526972e-05, "loss": 1.4991, "step": 13729 }, { "epoch": 11.394190871369295, "grad_norm": 25.224754333496094, "learning_rate": 1.5445311203319504e-05, "loss": 1.0382, "step": 13730 }, { "epoch": 11.395020746887967, "grad_norm": 33.7278938293457, "learning_rate": 1.5444979253112033e-05, "loss": 0.5983, "step": 13731 }, { "epoch": 11.39585062240664, "grad_norm": 26.873733520507812, "learning_rate": 1.5444647302904565e-05, "loss": 0.5529, "step": 13732 }, { "epoch": 11.396680497925312, "grad_norm": 28.6890869140625, "learning_rate": 1.5444315352697097e-05, "loss": 0.6102, "step": 13733 }, { "epoch": 11.397510373443984, "grad_norm": 28.960201263427734, "learning_rate": 1.544398340248963e-05, "loss": 0.5579, "step": 13734 }, { "epoch": 11.398340248962656, "grad_norm": 41.04633712768555, "learning_rate": 1.5443651452282158e-05, "loss": 1.4345, "step": 13735 }, { "epoch": 11.399170124481328, "grad_norm": 42.864105224609375, "learning_rate": 1.544331950207469e-05, "loss": 1.1922, "step": 13736 }, { "epoch": 11.4, "grad_norm": 33.86054611206055, "learning_rate": 1.5442987551867222e-05, "loss": 1.5297, "step": 13737 }, { "epoch": 11.400829875518673, "grad_norm": 59.1304817199707, "learning_rate": 1.5442655601659755e-05, "loss": 0.6175, "step": 13738 }, { "epoch": 11.401659751037345, "grad_norm": 31.934202194213867, "learning_rate": 1.5442323651452283e-05, "loss": 0.9928, "step": 13739 }, { "epoch": 11.402489626556017, "grad_norm": 68.78337097167969, "learning_rate": 1.5441991701244815e-05, "loss": 0.9598, "step": 13740 }, { "epoch": 11.40331950207469, "grad_norm": 29.761667251586914, "learning_rate": 1.5441659751037344e-05, "loss": 0.8804, "step": 13741 }, { "epoch": 11.404149377593361, "grad_norm": 26.476734161376953, "learning_rate": 1.5441327800829876e-05, "loss": 0.6735, "step": 13742 }, { "epoch": 11.404979253112034, "grad_norm": 32.135101318359375, "learning_rate": 1.544099585062241e-05, "loss": 0.7543, "step": 13743 }, { "epoch": 11.405809128630706, "grad_norm": 23.577625274658203, "learning_rate": 1.5440663900414937e-05, "loss": 0.8376, "step": 13744 }, { "epoch": 11.406639004149378, "grad_norm": 22.919536590576172, "learning_rate": 1.544033195020747e-05, "loss": 1.0388, "step": 13745 }, { "epoch": 11.40746887966805, "grad_norm": 39.94142532348633, "learning_rate": 1.544e-05, "loss": 0.9367, "step": 13746 }, { "epoch": 11.408298755186722, "grad_norm": 31.71872901916504, "learning_rate": 1.5439668049792533e-05, "loss": 0.6823, "step": 13747 }, { "epoch": 11.409128630705395, "grad_norm": 63.54988098144531, "learning_rate": 1.5439336099585062e-05, "loss": 0.6764, "step": 13748 }, { "epoch": 11.409958506224067, "grad_norm": 17.191272735595703, "learning_rate": 1.5439004149377594e-05, "loss": 0.5758, "step": 13749 }, { "epoch": 11.410788381742739, "grad_norm": 19.632448196411133, "learning_rate": 1.5438672199170126e-05, "loss": 0.5303, "step": 13750 }, { "epoch": 11.411618257261411, "grad_norm": 28.56118392944336, "learning_rate": 1.543834024896266e-05, "loss": 1.0022, "step": 13751 }, { "epoch": 11.412448132780083, "grad_norm": 35.76969528198242, "learning_rate": 1.5438008298755187e-05, "loss": 1.2346, "step": 13752 }, { "epoch": 11.413278008298755, "grad_norm": 21.676166534423828, "learning_rate": 1.543767634854772e-05, "loss": 0.6306, "step": 13753 }, { "epoch": 11.414107883817428, "grad_norm": 65.16837310791016, "learning_rate": 1.543734439834025e-05, "loss": 2.3289, "step": 13754 }, { "epoch": 11.4149377593361, "grad_norm": 32.51539993286133, "learning_rate": 1.5437012448132783e-05, "loss": 0.9421, "step": 13755 }, { "epoch": 11.415767634854772, "grad_norm": 30.322294235229492, "learning_rate": 1.5436680497925312e-05, "loss": 0.5899, "step": 13756 }, { "epoch": 11.416597510373444, "grad_norm": 50.73400115966797, "learning_rate": 1.5436348547717844e-05, "loss": 0.6032, "step": 13757 }, { "epoch": 11.417427385892116, "grad_norm": 35.28937530517578, "learning_rate": 1.5436016597510376e-05, "loss": 1.086, "step": 13758 }, { "epoch": 11.418257261410789, "grad_norm": 21.924976348876953, "learning_rate": 1.5435684647302905e-05, "loss": 0.7213, "step": 13759 }, { "epoch": 11.41908713692946, "grad_norm": 13.913114547729492, "learning_rate": 1.5435352697095437e-05, "loss": 0.41, "step": 13760 }, { "epoch": 11.419917012448133, "grad_norm": 17.15191650390625, "learning_rate": 1.5435020746887966e-05, "loss": 0.4494, "step": 13761 }, { "epoch": 11.420746887966805, "grad_norm": 26.07701873779297, "learning_rate": 1.5434688796680498e-05, "loss": 0.7176, "step": 13762 }, { "epoch": 11.421576763485477, "grad_norm": 17.26490592956543, "learning_rate": 1.543435684647303e-05, "loss": 0.6902, "step": 13763 }, { "epoch": 11.42240663900415, "grad_norm": 60.4210090637207, "learning_rate": 1.543402489626556e-05, "loss": 0.831, "step": 13764 }, { "epoch": 11.423236514522822, "grad_norm": 18.87513542175293, "learning_rate": 1.543369294605809e-05, "loss": 0.6409, "step": 13765 }, { "epoch": 11.424066390041494, "grad_norm": 35.52062225341797, "learning_rate": 1.5433360995850623e-05, "loss": 0.7662, "step": 13766 }, { "epoch": 11.424896265560166, "grad_norm": 25.663179397583008, "learning_rate": 1.5433029045643155e-05, "loss": 0.4997, "step": 13767 }, { "epoch": 11.425726141078838, "grad_norm": 23.501235961914062, "learning_rate": 1.5432697095435687e-05, "loss": 1.0436, "step": 13768 }, { "epoch": 11.42655601659751, "grad_norm": 29.174150466918945, "learning_rate": 1.5432365145228216e-05, "loss": 0.4973, "step": 13769 }, { "epoch": 11.427385892116183, "grad_norm": 35.95869445800781, "learning_rate": 1.5432033195020748e-05, "loss": 0.5661, "step": 13770 }, { "epoch": 11.428215767634855, "grad_norm": 38.581382751464844, "learning_rate": 1.543170124481328e-05, "loss": 1.3495, "step": 13771 }, { "epoch": 11.429045643153527, "grad_norm": 25.443981170654297, "learning_rate": 1.5431369294605812e-05, "loss": 0.6676, "step": 13772 }, { "epoch": 11.4298755186722, "grad_norm": 71.15544891357422, "learning_rate": 1.543103734439834e-05, "loss": 1.0825, "step": 13773 }, { "epoch": 11.430705394190872, "grad_norm": 29.297300338745117, "learning_rate": 1.5430705394190873e-05, "loss": 0.4944, "step": 13774 }, { "epoch": 11.431535269709544, "grad_norm": 61.4461669921875, "learning_rate": 1.5430373443983405e-05, "loss": 1.3284, "step": 13775 }, { "epoch": 11.432365145228216, "grad_norm": 41.50499725341797, "learning_rate": 1.5430041493775937e-05, "loss": 0.9317, "step": 13776 }, { "epoch": 11.433195020746888, "grad_norm": 26.832927703857422, "learning_rate": 1.5429709543568466e-05, "loss": 1.0208, "step": 13777 }, { "epoch": 11.43402489626556, "grad_norm": 51.774017333984375, "learning_rate": 1.5429377593360998e-05, "loss": 1.0136, "step": 13778 }, { "epoch": 11.434854771784233, "grad_norm": 35.079017639160156, "learning_rate": 1.5429045643153527e-05, "loss": 1.1863, "step": 13779 }, { "epoch": 11.435684647302905, "grad_norm": 82.24711608886719, "learning_rate": 1.542871369294606e-05, "loss": 1.0312, "step": 13780 }, { "epoch": 11.436514522821577, "grad_norm": 25.7565975189209, "learning_rate": 1.542838174273859e-05, "loss": 0.5523, "step": 13781 }, { "epoch": 11.43734439834025, "grad_norm": 24.9628963470459, "learning_rate": 1.542804979253112e-05, "loss": 0.8736, "step": 13782 }, { "epoch": 11.438174273858921, "grad_norm": 36.91185760498047, "learning_rate": 1.5427717842323652e-05, "loss": 0.7667, "step": 13783 }, { "epoch": 11.439004149377594, "grad_norm": 30.67849349975586, "learning_rate": 1.5427385892116184e-05, "loss": 0.8779, "step": 13784 }, { "epoch": 11.439834024896266, "grad_norm": 100.8187484741211, "learning_rate": 1.5427053941908713e-05, "loss": 0.816, "step": 13785 }, { "epoch": 11.440663900414938, "grad_norm": 40.626686096191406, "learning_rate": 1.5426721991701245e-05, "loss": 1.1191, "step": 13786 }, { "epoch": 11.44149377593361, "grad_norm": 15.808125495910645, "learning_rate": 1.5426390041493777e-05, "loss": 0.5345, "step": 13787 }, { "epoch": 11.442323651452282, "grad_norm": 21.43292999267578, "learning_rate": 1.542605809128631e-05, "loss": 0.6542, "step": 13788 }, { "epoch": 11.443153526970955, "grad_norm": 33.17180252075195, "learning_rate": 1.5425726141078838e-05, "loss": 1.043, "step": 13789 }, { "epoch": 11.443983402489627, "grad_norm": 36.7093620300293, "learning_rate": 1.542539419087137e-05, "loss": 1.5374, "step": 13790 }, { "epoch": 11.444813278008299, "grad_norm": 24.397113800048828, "learning_rate": 1.5425062240663902e-05, "loss": 0.4151, "step": 13791 }, { "epoch": 11.445643153526971, "grad_norm": 50.63193130493164, "learning_rate": 1.5424730290456434e-05, "loss": 1.4267, "step": 13792 }, { "epoch": 11.446473029045643, "grad_norm": 42.550392150878906, "learning_rate": 1.5424398340248966e-05, "loss": 0.9038, "step": 13793 }, { "epoch": 11.447302904564316, "grad_norm": 27.978452682495117, "learning_rate": 1.5424066390041495e-05, "loss": 1.0, "step": 13794 }, { "epoch": 11.448132780082988, "grad_norm": 23.933061599731445, "learning_rate": 1.5423734439834027e-05, "loss": 0.7159, "step": 13795 }, { "epoch": 11.44896265560166, "grad_norm": 48.0728874206543, "learning_rate": 1.542340248962656e-05, "loss": 0.9451, "step": 13796 }, { "epoch": 11.449792531120332, "grad_norm": 28.431753158569336, "learning_rate": 1.5423070539419088e-05, "loss": 1.0865, "step": 13797 }, { "epoch": 11.450622406639004, "grad_norm": 23.09954833984375, "learning_rate": 1.542273858921162e-05, "loss": 1.0912, "step": 13798 }, { "epoch": 11.451452282157677, "grad_norm": 22.043277740478516, "learning_rate": 1.5422406639004152e-05, "loss": 0.9529, "step": 13799 }, { "epoch": 11.452282157676349, "grad_norm": 55.081111907958984, "learning_rate": 1.542207468879668e-05, "loss": 1.1469, "step": 13800 }, { "epoch": 11.453112033195021, "grad_norm": 35.780677795410156, "learning_rate": 1.5421742738589213e-05, "loss": 1.1788, "step": 13801 }, { "epoch": 11.453941908713693, "grad_norm": 39.86151123046875, "learning_rate": 1.542141078838174e-05, "loss": 1.1428, "step": 13802 }, { "epoch": 11.454771784232365, "grad_norm": 21.33844757080078, "learning_rate": 1.5421078838174274e-05, "loss": 0.5288, "step": 13803 }, { "epoch": 11.455601659751038, "grad_norm": 39.388328552246094, "learning_rate": 1.5420746887966806e-05, "loss": 1.1612, "step": 13804 }, { "epoch": 11.45643153526971, "grad_norm": 16.05953025817871, "learning_rate": 1.5420414937759338e-05, "loss": 0.3875, "step": 13805 }, { "epoch": 11.457261410788382, "grad_norm": 28.194875717163086, "learning_rate": 1.5420082987551867e-05, "loss": 0.7933, "step": 13806 }, { "epoch": 11.458091286307054, "grad_norm": 34.62693405151367, "learning_rate": 1.54197510373444e-05, "loss": 0.8004, "step": 13807 }, { "epoch": 11.458921161825726, "grad_norm": 29.54817771911621, "learning_rate": 1.541941908713693e-05, "loss": 0.8739, "step": 13808 }, { "epoch": 11.459751037344398, "grad_norm": 20.69984245300293, "learning_rate": 1.5419087136929463e-05, "loss": 0.5342, "step": 13809 }, { "epoch": 11.46058091286307, "grad_norm": 31.492799758911133, "learning_rate": 1.5418755186721992e-05, "loss": 1.0724, "step": 13810 }, { "epoch": 11.461410788381743, "grad_norm": 18.130842208862305, "learning_rate": 1.5418423236514524e-05, "loss": 0.8136, "step": 13811 }, { "epoch": 11.462240663900415, "grad_norm": 30.366018295288086, "learning_rate": 1.5418091286307056e-05, "loss": 1.6523, "step": 13812 }, { "epoch": 11.463070539419087, "grad_norm": 25.54343032836914, "learning_rate": 1.5417759336099588e-05, "loss": 0.4239, "step": 13813 }, { "epoch": 11.46390041493776, "grad_norm": 68.45242309570312, "learning_rate": 1.5417427385892117e-05, "loss": 1.2833, "step": 13814 }, { "epoch": 11.464730290456432, "grad_norm": 19.808483123779297, "learning_rate": 1.541709543568465e-05, "loss": 0.9676, "step": 13815 }, { "epoch": 11.465560165975104, "grad_norm": 45.80937194824219, "learning_rate": 1.541676348547718e-05, "loss": 1.3982, "step": 13816 }, { "epoch": 11.466390041493776, "grad_norm": 18.221189498901367, "learning_rate": 1.541643153526971e-05, "loss": 0.6466, "step": 13817 }, { "epoch": 11.467219917012448, "grad_norm": 17.2198486328125, "learning_rate": 1.5416099585062242e-05, "loss": 0.5592, "step": 13818 }, { "epoch": 11.46804979253112, "grad_norm": 40.349220275878906, "learning_rate": 1.5415767634854774e-05, "loss": 1.0919, "step": 13819 }, { "epoch": 11.468879668049793, "grad_norm": 26.521638870239258, "learning_rate": 1.5415435684647303e-05, "loss": 0.8344, "step": 13820 }, { "epoch": 11.469709543568465, "grad_norm": 43.72655487060547, "learning_rate": 1.5415103734439835e-05, "loss": 1.0817, "step": 13821 }, { "epoch": 11.470539419087137, "grad_norm": 34.33098220825195, "learning_rate": 1.5414771784232367e-05, "loss": 1.0225, "step": 13822 }, { "epoch": 11.47136929460581, "grad_norm": 36.68328094482422, "learning_rate": 1.5414439834024896e-05, "loss": 1.057, "step": 13823 }, { "epoch": 11.472199170124481, "grad_norm": 35.949501037597656, "learning_rate": 1.5414107883817428e-05, "loss": 1.3872, "step": 13824 }, { "epoch": 11.473029045643154, "grad_norm": 35.9775505065918, "learning_rate": 1.541377593360996e-05, "loss": 0.8866, "step": 13825 }, { "epoch": 11.473858921161826, "grad_norm": 21.66157341003418, "learning_rate": 1.5413443983402492e-05, "loss": 0.5665, "step": 13826 }, { "epoch": 11.474688796680498, "grad_norm": 23.004623413085938, "learning_rate": 1.541311203319502e-05, "loss": 0.5976, "step": 13827 }, { "epoch": 11.47551867219917, "grad_norm": 53.684898376464844, "learning_rate": 1.5412780082987553e-05, "loss": 1.4205, "step": 13828 }, { "epoch": 11.476348547717842, "grad_norm": 33.85408401489258, "learning_rate": 1.5412448132780085e-05, "loss": 0.8633, "step": 13829 }, { "epoch": 11.477178423236515, "grad_norm": 32.0283317565918, "learning_rate": 1.5412116182572617e-05, "loss": 0.5664, "step": 13830 }, { "epoch": 11.478008298755187, "grad_norm": 20.521100997924805, "learning_rate": 1.5411784232365146e-05, "loss": 0.894, "step": 13831 }, { "epoch": 11.478838174273859, "grad_norm": 35.62565612792969, "learning_rate": 1.5411452282157678e-05, "loss": 1.4734, "step": 13832 }, { "epoch": 11.479668049792531, "grad_norm": 32.12928771972656, "learning_rate": 1.541112033195021e-05, "loss": 0.9681, "step": 13833 }, { "epoch": 11.480497925311203, "grad_norm": 29.70967674255371, "learning_rate": 1.5410788381742742e-05, "loss": 1.127, "step": 13834 }, { "epoch": 11.481327800829876, "grad_norm": 37.758113861083984, "learning_rate": 1.541045643153527e-05, "loss": 1.5625, "step": 13835 }, { "epoch": 11.482157676348548, "grad_norm": 30.89388084411621, "learning_rate": 1.5410124481327803e-05, "loss": 0.5291, "step": 13836 }, { "epoch": 11.48298755186722, "grad_norm": 17.038707733154297, "learning_rate": 1.5409792531120335e-05, "loss": 0.5699, "step": 13837 }, { "epoch": 11.483817427385892, "grad_norm": 41.41407775878906, "learning_rate": 1.5409460580912864e-05, "loss": 0.7793, "step": 13838 }, { "epoch": 11.484647302904564, "grad_norm": 72.14559936523438, "learning_rate": 1.5409128630705396e-05, "loss": 0.8408, "step": 13839 }, { "epoch": 11.485477178423237, "grad_norm": 23.900617599487305, "learning_rate": 1.5408796680497924e-05, "loss": 0.6836, "step": 13840 }, { "epoch": 11.486307053941909, "grad_norm": 61.63872146606445, "learning_rate": 1.5408464730290457e-05, "loss": 0.8576, "step": 13841 }, { "epoch": 11.487136929460581, "grad_norm": 57.01315689086914, "learning_rate": 1.540813278008299e-05, "loss": 0.9994, "step": 13842 }, { "epoch": 11.487966804979253, "grad_norm": 31.53871726989746, "learning_rate": 1.5407800829875517e-05, "loss": 0.8366, "step": 13843 }, { "epoch": 11.488796680497925, "grad_norm": 52.053245544433594, "learning_rate": 1.540746887966805e-05, "loss": 0.7468, "step": 13844 }, { "epoch": 11.489626556016598, "grad_norm": 29.723651885986328, "learning_rate": 1.540713692946058e-05, "loss": 1.1033, "step": 13845 }, { "epoch": 11.49045643153527, "grad_norm": 47.59241485595703, "learning_rate": 1.5406804979253114e-05, "loss": 0.9345, "step": 13846 }, { "epoch": 11.491286307053942, "grad_norm": 25.611448287963867, "learning_rate": 1.5406473029045646e-05, "loss": 0.9416, "step": 13847 }, { "epoch": 11.492116182572614, "grad_norm": 22.45682144165039, "learning_rate": 1.5406141078838175e-05, "loss": 0.6689, "step": 13848 }, { "epoch": 11.492946058091286, "grad_norm": 23.386314392089844, "learning_rate": 1.5405809128630707e-05, "loss": 1.2028, "step": 13849 }, { "epoch": 11.493775933609959, "grad_norm": 34.42802810668945, "learning_rate": 1.540547717842324e-05, "loss": 0.9333, "step": 13850 }, { "epoch": 11.49460580912863, "grad_norm": 28.800512313842773, "learning_rate": 1.540514522821577e-05, "loss": 0.9959, "step": 13851 }, { "epoch": 11.495435684647303, "grad_norm": 34.12215805053711, "learning_rate": 1.54048132780083e-05, "loss": 0.6993, "step": 13852 }, { "epoch": 11.496265560165975, "grad_norm": 26.028573989868164, "learning_rate": 1.5404481327800832e-05, "loss": 1.0037, "step": 13853 }, { "epoch": 11.497095435684647, "grad_norm": 34.60651779174805, "learning_rate": 1.5404149377593364e-05, "loss": 1.1925, "step": 13854 }, { "epoch": 11.49792531120332, "grad_norm": 37.5411376953125, "learning_rate": 1.5403817427385893e-05, "loss": 0.6595, "step": 13855 }, { "epoch": 11.498755186721992, "grad_norm": 15.945497512817383, "learning_rate": 1.5403485477178425e-05, "loss": 0.5904, "step": 13856 }, { "epoch": 11.499585062240664, "grad_norm": 24.934900283813477, "learning_rate": 1.5403153526970957e-05, "loss": 0.7577, "step": 13857 }, { "epoch": 11.500414937759336, "grad_norm": 25.499269485473633, "learning_rate": 1.5402821576763485e-05, "loss": 0.8834, "step": 13858 }, { "epoch": 11.501244813278008, "grad_norm": 38.20539093017578, "learning_rate": 1.5402489626556018e-05, "loss": 0.9226, "step": 13859 }, { "epoch": 11.50207468879668, "grad_norm": 21.944652557373047, "learning_rate": 1.540215767634855e-05, "loss": 0.7884, "step": 13860 }, { "epoch": 11.502904564315353, "grad_norm": 46.5319709777832, "learning_rate": 1.540182572614108e-05, "loss": 1.2605, "step": 13861 }, { "epoch": 11.503734439834025, "grad_norm": 29.303571701049805, "learning_rate": 1.540149377593361e-05, "loss": 0.9747, "step": 13862 }, { "epoch": 11.504564315352697, "grad_norm": 28.80795669555664, "learning_rate": 1.5401161825726143e-05, "loss": 1.5976, "step": 13863 }, { "epoch": 11.50539419087137, "grad_norm": 25.670156478881836, "learning_rate": 1.540082987551867e-05, "loss": 1.2764, "step": 13864 }, { "epoch": 11.506224066390041, "grad_norm": 35.30474853515625, "learning_rate": 1.5400497925311203e-05, "loss": 1.553, "step": 13865 }, { "epoch": 11.507053941908714, "grad_norm": 24.845138549804688, "learning_rate": 1.5400165975103736e-05, "loss": 0.6717, "step": 13866 }, { "epoch": 11.507883817427386, "grad_norm": 38.08015823364258, "learning_rate": 1.5399834024896268e-05, "loss": 1.3806, "step": 13867 }, { "epoch": 11.508713692946058, "grad_norm": 29.02741050720215, "learning_rate": 1.5399502074688796e-05, "loss": 1.2103, "step": 13868 }, { "epoch": 11.50954356846473, "grad_norm": 50.84145736694336, "learning_rate": 1.539917012448133e-05, "loss": 0.9417, "step": 13869 }, { "epoch": 11.510373443983402, "grad_norm": 40.54026794433594, "learning_rate": 1.539883817427386e-05, "loss": 0.8144, "step": 13870 }, { "epoch": 11.511203319502075, "grad_norm": 32.89345169067383, "learning_rate": 1.5398506224066393e-05, "loss": 0.7822, "step": 13871 }, { "epoch": 11.512033195020747, "grad_norm": 22.914461135864258, "learning_rate": 1.5398174273858925e-05, "loss": 0.7619, "step": 13872 }, { "epoch": 11.512863070539419, "grad_norm": 22.227474212646484, "learning_rate": 1.5397842323651454e-05, "loss": 0.3342, "step": 13873 }, { "epoch": 11.513692946058091, "grad_norm": 18.136137008666992, "learning_rate": 1.5397510373443986e-05, "loss": 1.0178, "step": 13874 }, { "epoch": 11.514522821576763, "grad_norm": 35.6909294128418, "learning_rate": 1.5397178423236518e-05, "loss": 0.8365, "step": 13875 }, { "epoch": 11.515352697095436, "grad_norm": 43.69633865356445, "learning_rate": 1.5396846473029046e-05, "loss": 0.9236, "step": 13876 }, { "epoch": 11.516182572614108, "grad_norm": 39.42997741699219, "learning_rate": 1.539651452282158e-05, "loss": 0.8164, "step": 13877 }, { "epoch": 11.51701244813278, "grad_norm": 23.57265853881836, "learning_rate": 1.5396182572614107e-05, "loss": 0.8584, "step": 13878 }, { "epoch": 11.517842323651452, "grad_norm": 26.18815040588379, "learning_rate": 1.539585062240664e-05, "loss": 0.707, "step": 13879 }, { "epoch": 11.518672199170124, "grad_norm": 28.542144775390625, "learning_rate": 1.539551867219917e-05, "loss": 0.5222, "step": 13880 }, { "epoch": 11.519502074688797, "grad_norm": 69.82530212402344, "learning_rate": 1.53951867219917e-05, "loss": 0.8717, "step": 13881 }, { "epoch": 11.520331950207469, "grad_norm": 20.60202980041504, "learning_rate": 1.5394854771784232e-05, "loss": 0.7501, "step": 13882 }, { "epoch": 11.521161825726141, "grad_norm": 36.55028533935547, "learning_rate": 1.5394522821576764e-05, "loss": 0.8289, "step": 13883 }, { "epoch": 11.521991701244813, "grad_norm": 23.739398956298828, "learning_rate": 1.5394190871369297e-05, "loss": 0.5892, "step": 13884 }, { "epoch": 11.522821576763485, "grad_norm": 24.98933219909668, "learning_rate": 1.5393858921161825e-05, "loss": 0.7252, "step": 13885 }, { "epoch": 11.523651452282158, "grad_norm": 38.597679138183594, "learning_rate": 1.5393526970954357e-05, "loss": 0.7167, "step": 13886 }, { "epoch": 11.52448132780083, "grad_norm": 30.68303680419922, "learning_rate": 1.539319502074689e-05, "loss": 0.5549, "step": 13887 }, { "epoch": 11.525311203319502, "grad_norm": 19.842561721801758, "learning_rate": 1.539286307053942e-05, "loss": 0.5949, "step": 13888 }, { "epoch": 11.526141078838174, "grad_norm": 57.123714447021484, "learning_rate": 1.539253112033195e-05, "loss": 0.7527, "step": 13889 }, { "epoch": 11.526970954356846, "grad_norm": 46.167327880859375, "learning_rate": 1.5392199170124482e-05, "loss": 0.3864, "step": 13890 }, { "epoch": 11.527800829875519, "grad_norm": 41.77433776855469, "learning_rate": 1.5391867219917015e-05, "loss": 1.2785, "step": 13891 }, { "epoch": 11.52863070539419, "grad_norm": 16.760787963867188, "learning_rate": 1.5391535269709547e-05, "loss": 0.4284, "step": 13892 }, { "epoch": 11.529460580912863, "grad_norm": 31.963960647583008, "learning_rate": 1.5391203319502075e-05, "loss": 0.9754, "step": 13893 }, { "epoch": 11.530290456431535, "grad_norm": 38.868560791015625, "learning_rate": 1.5390871369294607e-05, "loss": 0.6431, "step": 13894 }, { "epoch": 11.531120331950207, "grad_norm": 37.008079528808594, "learning_rate": 1.539053941908714e-05, "loss": 0.8393, "step": 13895 }, { "epoch": 11.53195020746888, "grad_norm": 25.181184768676758, "learning_rate": 1.539020746887967e-05, "loss": 1.0021, "step": 13896 }, { "epoch": 11.532780082987552, "grad_norm": 41.76821517944336, "learning_rate": 1.53898755186722e-05, "loss": 1.012, "step": 13897 }, { "epoch": 11.533609958506224, "grad_norm": 16.289278030395508, "learning_rate": 1.5389543568464733e-05, "loss": 0.5021, "step": 13898 }, { "epoch": 11.534439834024896, "grad_norm": 43.31513595581055, "learning_rate": 1.538921161825726e-05, "loss": 0.5693, "step": 13899 }, { "epoch": 11.535269709543568, "grad_norm": 47.176204681396484, "learning_rate": 1.5388879668049793e-05, "loss": 1.1708, "step": 13900 }, { "epoch": 11.53609958506224, "grad_norm": 15.738344192504883, "learning_rate": 1.5388547717842325e-05, "loss": 0.6419, "step": 13901 }, { "epoch": 11.536929460580913, "grad_norm": 42.531951904296875, "learning_rate": 1.5388215767634854e-05, "loss": 0.7817, "step": 13902 }, { "epoch": 11.537759336099585, "grad_norm": 53.30753707885742, "learning_rate": 1.5387883817427386e-05, "loss": 1.3583, "step": 13903 }, { "epoch": 11.538589211618257, "grad_norm": 69.88249206542969, "learning_rate": 1.538755186721992e-05, "loss": 1.3043, "step": 13904 }, { "epoch": 11.53941908713693, "grad_norm": 43.35449981689453, "learning_rate": 1.538721991701245e-05, "loss": 1.7195, "step": 13905 }, { "epoch": 11.540248962655602, "grad_norm": 24.35219955444336, "learning_rate": 1.538688796680498e-05, "loss": 0.9321, "step": 13906 }, { "epoch": 11.541078838174274, "grad_norm": 43.12789535522461, "learning_rate": 1.538655601659751e-05, "loss": 1.3284, "step": 13907 }, { "epoch": 11.541908713692946, "grad_norm": 21.444833755493164, "learning_rate": 1.5386224066390043e-05, "loss": 0.8064, "step": 13908 }, { "epoch": 11.542738589211618, "grad_norm": 44.67421340942383, "learning_rate": 1.5385892116182576e-05, "loss": 1.2725, "step": 13909 }, { "epoch": 11.54356846473029, "grad_norm": 24.297128677368164, "learning_rate": 1.5385560165975104e-05, "loss": 0.5331, "step": 13910 }, { "epoch": 11.544398340248962, "grad_norm": 38.68476867675781, "learning_rate": 1.5385228215767636e-05, "loss": 1.3692, "step": 13911 }, { "epoch": 11.545228215767635, "grad_norm": 24.210241317749023, "learning_rate": 1.538489626556017e-05, "loss": 1.2139, "step": 13912 }, { "epoch": 11.546058091286307, "grad_norm": 43.62128448486328, "learning_rate": 1.53845643153527e-05, "loss": 0.8583, "step": 13913 }, { "epoch": 11.546887966804979, "grad_norm": 27.800046920776367, "learning_rate": 1.538423236514523e-05, "loss": 0.881, "step": 13914 }, { "epoch": 11.547717842323651, "grad_norm": 28.65275001525879, "learning_rate": 1.538390041493776e-05, "loss": 0.742, "step": 13915 }, { "epoch": 11.548547717842323, "grad_norm": 31.777612686157227, "learning_rate": 1.5383568464730294e-05, "loss": 1.9895, "step": 13916 }, { "epoch": 11.549377593360996, "grad_norm": 34.441497802734375, "learning_rate": 1.5383236514522822e-05, "loss": 1.2649, "step": 13917 }, { "epoch": 11.550207468879668, "grad_norm": 18.969772338867188, "learning_rate": 1.5382904564315354e-05, "loss": 0.4388, "step": 13918 }, { "epoch": 11.55103734439834, "grad_norm": 59.763946533203125, "learning_rate": 1.5382572614107883e-05, "loss": 1.0721, "step": 13919 }, { "epoch": 11.551867219917012, "grad_norm": 23.499719619750977, "learning_rate": 1.5382240663900415e-05, "loss": 0.5956, "step": 13920 }, { "epoch": 11.552697095435684, "grad_norm": 47.852352142333984, "learning_rate": 1.5381908713692947e-05, "loss": 1.2473, "step": 13921 }, { "epoch": 11.553526970954357, "grad_norm": 29.976030349731445, "learning_rate": 1.5381576763485476e-05, "loss": 0.6677, "step": 13922 }, { "epoch": 11.554356846473029, "grad_norm": 22.57073402404785, "learning_rate": 1.5381244813278008e-05, "loss": 0.3831, "step": 13923 }, { "epoch": 11.555186721991701, "grad_norm": 39.362548828125, "learning_rate": 1.538091286307054e-05, "loss": 1.0115, "step": 13924 }, { "epoch": 11.556016597510373, "grad_norm": 37.150970458984375, "learning_rate": 1.5380580912863072e-05, "loss": 1.2047, "step": 13925 }, { "epoch": 11.556846473029045, "grad_norm": 37.49553298950195, "learning_rate": 1.5380248962655604e-05, "loss": 0.9121, "step": 13926 }, { "epoch": 11.557676348547718, "grad_norm": 16.845991134643555, "learning_rate": 1.5379917012448133e-05, "loss": 0.4786, "step": 13927 }, { "epoch": 11.55850622406639, "grad_norm": 20.151519775390625, "learning_rate": 1.5379585062240665e-05, "loss": 0.7403, "step": 13928 }, { "epoch": 11.559336099585062, "grad_norm": 37.967498779296875, "learning_rate": 1.5379253112033197e-05, "loss": 0.8126, "step": 13929 }, { "epoch": 11.560165975103734, "grad_norm": 25.225507736206055, "learning_rate": 1.537892116182573e-05, "loss": 0.7549, "step": 13930 }, { "epoch": 11.560995850622406, "grad_norm": 25.427614212036133, "learning_rate": 1.5378589211618258e-05, "loss": 1.0821, "step": 13931 }, { "epoch": 11.561825726141079, "grad_norm": 28.346633911132812, "learning_rate": 1.537825726141079e-05, "loss": 0.8612, "step": 13932 }, { "epoch": 11.56265560165975, "grad_norm": 18.55755615234375, "learning_rate": 1.5377925311203322e-05, "loss": 0.6282, "step": 13933 }, { "epoch": 11.563485477178423, "grad_norm": 21.73421287536621, "learning_rate": 1.537759336099585e-05, "loss": 0.6223, "step": 13934 }, { "epoch": 11.564315352697095, "grad_norm": 33.86594009399414, "learning_rate": 1.5377261410788383e-05, "loss": 1.1094, "step": 13935 }, { "epoch": 11.565145228215767, "grad_norm": 69.23396301269531, "learning_rate": 1.5376929460580915e-05, "loss": 0.661, "step": 13936 }, { "epoch": 11.56597510373444, "grad_norm": 26.97052574157715, "learning_rate": 1.5376597510373444e-05, "loss": 1.2679, "step": 13937 }, { "epoch": 11.566804979253112, "grad_norm": 20.80919075012207, "learning_rate": 1.5376265560165976e-05, "loss": 0.335, "step": 13938 }, { "epoch": 11.567634854771784, "grad_norm": 30.81864356994629, "learning_rate": 1.5375933609958508e-05, "loss": 1.3917, "step": 13939 }, { "epoch": 11.568464730290456, "grad_norm": 23.328214645385742, "learning_rate": 1.5375601659751037e-05, "loss": 1.0786, "step": 13940 }, { "epoch": 11.569294605809128, "grad_norm": 49.54061508178711, "learning_rate": 1.537526970954357e-05, "loss": 0.6461, "step": 13941 }, { "epoch": 11.5701244813278, "grad_norm": 66.5604248046875, "learning_rate": 1.53749377593361e-05, "loss": 1.1847, "step": 13942 }, { "epoch": 11.570954356846473, "grad_norm": 40.99094009399414, "learning_rate": 1.537460580912863e-05, "loss": 1.2935, "step": 13943 }, { "epoch": 11.571784232365145, "grad_norm": 34.999229431152344, "learning_rate": 1.5374273858921162e-05, "loss": 0.7585, "step": 13944 }, { "epoch": 11.572614107883817, "grad_norm": 60.81425857543945, "learning_rate": 1.5373941908713694e-05, "loss": 0.9948, "step": 13945 }, { "epoch": 11.57344398340249, "grad_norm": 34.44478988647461, "learning_rate": 1.5373609958506226e-05, "loss": 1.1779, "step": 13946 }, { "epoch": 11.574273858921162, "grad_norm": 21.378738403320312, "learning_rate": 1.5373278008298755e-05, "loss": 0.4917, "step": 13947 }, { "epoch": 11.575103734439834, "grad_norm": 41.14138412475586, "learning_rate": 1.5372946058091287e-05, "loss": 1.1806, "step": 13948 }, { "epoch": 11.575933609958506, "grad_norm": 35.11640930175781, "learning_rate": 1.537261410788382e-05, "loss": 0.6689, "step": 13949 }, { "epoch": 11.576763485477178, "grad_norm": 20.124919891357422, "learning_rate": 1.537228215767635e-05, "loss": 0.5232, "step": 13950 }, { "epoch": 11.57759336099585, "grad_norm": 75.1960678100586, "learning_rate": 1.5371950207468883e-05, "loss": 0.7578, "step": 13951 }, { "epoch": 11.578423236514523, "grad_norm": 28.458093643188477, "learning_rate": 1.5371618257261412e-05, "loss": 0.8506, "step": 13952 }, { "epoch": 11.579253112033195, "grad_norm": 23.545255661010742, "learning_rate": 1.5371286307053944e-05, "loss": 0.898, "step": 13953 }, { "epoch": 11.580082987551867, "grad_norm": 37.19095230102539, "learning_rate": 1.5370954356846476e-05, "loss": 1.0178, "step": 13954 }, { "epoch": 11.58091286307054, "grad_norm": 58.474693298339844, "learning_rate": 1.5370622406639005e-05, "loss": 1.1042, "step": 13955 }, { "epoch": 11.581742738589211, "grad_norm": 27.634370803833008, "learning_rate": 1.5370290456431537e-05, "loss": 0.6515, "step": 13956 }, { "epoch": 11.582572614107884, "grad_norm": 28.044225692749023, "learning_rate": 1.5369958506224066e-05, "loss": 0.9062, "step": 13957 }, { "epoch": 11.583402489626556, "grad_norm": 26.3231201171875, "learning_rate": 1.5369626556016598e-05, "loss": 0.7395, "step": 13958 }, { "epoch": 11.584232365145228, "grad_norm": 33.01863098144531, "learning_rate": 1.536929460580913e-05, "loss": 0.5828, "step": 13959 }, { "epoch": 11.5850622406639, "grad_norm": 51.972965240478516, "learning_rate": 1.536896265560166e-05, "loss": 0.8787, "step": 13960 }, { "epoch": 11.585892116182572, "grad_norm": 39.45372009277344, "learning_rate": 1.536863070539419e-05, "loss": 0.8422, "step": 13961 }, { "epoch": 11.586721991701245, "grad_norm": 69.28501892089844, "learning_rate": 1.5368298755186723e-05, "loss": 1.2465, "step": 13962 }, { "epoch": 11.587551867219917, "grad_norm": 19.94000816345215, "learning_rate": 1.5367966804979255e-05, "loss": 0.7844, "step": 13963 }, { "epoch": 11.588381742738589, "grad_norm": 54.028907775878906, "learning_rate": 1.5367634854771784e-05, "loss": 1.396, "step": 13964 }, { "epoch": 11.589211618257261, "grad_norm": 40.83028030395508, "learning_rate": 1.5367302904564316e-05, "loss": 0.8963, "step": 13965 }, { "epoch": 11.590041493775933, "grad_norm": 19.87403678894043, "learning_rate": 1.5366970954356848e-05, "loss": 0.4053, "step": 13966 }, { "epoch": 11.590871369294605, "grad_norm": 35.55244827270508, "learning_rate": 1.536663900414938e-05, "loss": 1.0091, "step": 13967 }, { "epoch": 11.591701244813278, "grad_norm": 36.544105529785156, "learning_rate": 1.536630705394191e-05, "loss": 0.6473, "step": 13968 }, { "epoch": 11.59253112033195, "grad_norm": 48.23023223876953, "learning_rate": 1.536597510373444e-05, "loss": 1.3243, "step": 13969 }, { "epoch": 11.593360995850622, "grad_norm": 27.02042579650879, "learning_rate": 1.5365643153526973e-05, "loss": 0.9707, "step": 13970 }, { "epoch": 11.594190871369294, "grad_norm": 64.46978759765625, "learning_rate": 1.5365311203319505e-05, "loss": 1.7822, "step": 13971 }, { "epoch": 11.595020746887966, "grad_norm": 41.3785285949707, "learning_rate": 1.5364979253112034e-05, "loss": 0.6795, "step": 13972 }, { "epoch": 11.595850622406639, "grad_norm": 47.45145797729492, "learning_rate": 1.5364647302904566e-05, "loss": 0.7963, "step": 13973 }, { "epoch": 11.59668049792531, "grad_norm": 25.90673828125, "learning_rate": 1.5364315352697098e-05, "loss": 0.8683, "step": 13974 }, { "epoch": 11.597510373443983, "grad_norm": 31.92437171936035, "learning_rate": 1.5363983402489627e-05, "loss": 0.7104, "step": 13975 }, { "epoch": 11.598340248962655, "grad_norm": 29.083908081054688, "learning_rate": 1.536365145228216e-05, "loss": 0.6717, "step": 13976 }, { "epoch": 11.599170124481327, "grad_norm": 23.28618812561035, "learning_rate": 1.536331950207469e-05, "loss": 0.9178, "step": 13977 }, { "epoch": 11.6, "grad_norm": 30.942319869995117, "learning_rate": 1.536298755186722e-05, "loss": 0.9002, "step": 13978 }, { "epoch": 11.600829875518672, "grad_norm": 19.168216705322266, "learning_rate": 1.5362655601659752e-05, "loss": 0.5224, "step": 13979 }, { "epoch": 11.601659751037344, "grad_norm": 30.94623565673828, "learning_rate": 1.5362323651452284e-05, "loss": 1.5746, "step": 13980 }, { "epoch": 11.602489626556016, "grad_norm": 23.19076919555664, "learning_rate": 1.5361991701244813e-05, "loss": 0.6547, "step": 13981 }, { "epoch": 11.603319502074688, "grad_norm": 48.610618591308594, "learning_rate": 1.5361659751037345e-05, "loss": 0.6152, "step": 13982 }, { "epoch": 11.60414937759336, "grad_norm": 16.87337875366211, "learning_rate": 1.5361327800829877e-05, "loss": 0.5585, "step": 13983 }, { "epoch": 11.604979253112033, "grad_norm": 36.12323760986328, "learning_rate": 1.536099585062241e-05, "loss": 1.6614, "step": 13984 }, { "epoch": 11.605809128630705, "grad_norm": 27.473039627075195, "learning_rate": 1.5360663900414938e-05, "loss": 0.7321, "step": 13985 }, { "epoch": 11.606639004149377, "grad_norm": 28.44176483154297, "learning_rate": 1.536033195020747e-05, "loss": 1.3483, "step": 13986 }, { "epoch": 11.60746887966805, "grad_norm": 30.315181732177734, "learning_rate": 1.5360000000000002e-05, "loss": 0.7814, "step": 13987 }, { "epoch": 11.608298755186722, "grad_norm": 20.0170841217041, "learning_rate": 1.5359668049792534e-05, "loss": 0.4636, "step": 13988 }, { "epoch": 11.609128630705394, "grad_norm": 23.490253448486328, "learning_rate": 1.5359336099585063e-05, "loss": 0.7125, "step": 13989 }, { "epoch": 11.609958506224066, "grad_norm": 32.902950286865234, "learning_rate": 1.5359004149377595e-05, "loss": 1.0102, "step": 13990 }, { "epoch": 11.610788381742738, "grad_norm": 42.7095832824707, "learning_rate": 1.5358672199170127e-05, "loss": 0.5954, "step": 13991 }, { "epoch": 11.61161825726141, "grad_norm": 67.20862579345703, "learning_rate": 1.535834024896266e-05, "loss": 1.6734, "step": 13992 }, { "epoch": 11.612448132780083, "grad_norm": 22.173730850219727, "learning_rate": 1.5358008298755188e-05, "loss": 0.448, "step": 13993 }, { "epoch": 11.613278008298755, "grad_norm": 38.982730865478516, "learning_rate": 1.535767634854772e-05, "loss": 1.4356, "step": 13994 }, { "epoch": 11.614107883817427, "grad_norm": 22.44761848449707, "learning_rate": 1.535734439834025e-05, "loss": 1.4104, "step": 13995 }, { "epoch": 11.6149377593361, "grad_norm": 30.684823989868164, "learning_rate": 1.535701244813278e-05, "loss": 1.1664, "step": 13996 }, { "epoch": 11.615767634854771, "grad_norm": 53.26997375488281, "learning_rate": 1.5356680497925313e-05, "loss": 0.8125, "step": 13997 }, { "epoch": 11.616597510373444, "grad_norm": 32.21089172363281, "learning_rate": 1.535634854771784e-05, "loss": 0.7092, "step": 13998 }, { "epoch": 11.617427385892116, "grad_norm": 43.75377655029297, "learning_rate": 1.5356016597510374e-05, "loss": 1.0459, "step": 13999 }, { "epoch": 11.618257261410788, "grad_norm": 34.83644485473633, "learning_rate": 1.5355684647302906e-05, "loss": 1.1274, "step": 14000 }, { "epoch": 11.61908713692946, "grad_norm": 56.96488952636719, "learning_rate": 1.5355352697095435e-05, "loss": 0.7673, "step": 14001 }, { "epoch": 11.619917012448132, "grad_norm": 24.744728088378906, "learning_rate": 1.5355020746887967e-05, "loss": 0.6593, "step": 14002 }, { "epoch": 11.620746887966805, "grad_norm": 24.425373077392578, "learning_rate": 1.53546887966805e-05, "loss": 0.8937, "step": 14003 }, { "epoch": 11.621576763485477, "grad_norm": 21.29231071472168, "learning_rate": 1.535435684647303e-05, "loss": 0.7516, "step": 14004 }, { "epoch": 11.622406639004149, "grad_norm": 21.27227783203125, "learning_rate": 1.5354024896265563e-05, "loss": 0.8107, "step": 14005 }, { "epoch": 11.623236514522821, "grad_norm": 40.58613586425781, "learning_rate": 1.5353692946058092e-05, "loss": 1.4005, "step": 14006 }, { "epoch": 11.624066390041493, "grad_norm": 30.34941864013672, "learning_rate": 1.5353360995850624e-05, "loss": 0.8466, "step": 14007 }, { "epoch": 11.624896265560166, "grad_norm": 29.625051498413086, "learning_rate": 1.5353029045643156e-05, "loss": 1.0116, "step": 14008 }, { "epoch": 11.625726141078838, "grad_norm": 19.01729393005371, "learning_rate": 1.5352697095435688e-05, "loss": 0.8149, "step": 14009 }, { "epoch": 11.62655601659751, "grad_norm": 20.815767288208008, "learning_rate": 1.5352365145228217e-05, "loss": 0.7139, "step": 14010 }, { "epoch": 11.627385892116182, "grad_norm": 24.79673194885254, "learning_rate": 1.535203319502075e-05, "loss": 0.5126, "step": 14011 }, { "epoch": 11.628215767634854, "grad_norm": 28.35413360595703, "learning_rate": 1.535170124481328e-05, "loss": 0.5828, "step": 14012 }, { "epoch": 11.629045643153527, "grad_norm": 24.859638214111328, "learning_rate": 1.535136929460581e-05, "loss": 0.6067, "step": 14013 }, { "epoch": 11.629875518672199, "grad_norm": 35.01056671142578, "learning_rate": 1.5351037344398342e-05, "loss": 1.179, "step": 14014 }, { "epoch": 11.630705394190871, "grad_norm": 33.26251983642578, "learning_rate": 1.5350705394190874e-05, "loss": 1.0837, "step": 14015 }, { "epoch": 11.631535269709543, "grad_norm": 41.430503845214844, "learning_rate": 1.5350373443983403e-05, "loss": 1.2977, "step": 14016 }, { "epoch": 11.632365145228215, "grad_norm": 34.65494155883789, "learning_rate": 1.5350041493775935e-05, "loss": 1.1607, "step": 14017 }, { "epoch": 11.633195020746887, "grad_norm": 26.093618392944336, "learning_rate": 1.5349709543568463e-05, "loss": 0.9266, "step": 14018 }, { "epoch": 11.63402489626556, "grad_norm": 33.39816665649414, "learning_rate": 1.5349377593360996e-05, "loss": 0.9566, "step": 14019 }, { "epoch": 11.634854771784232, "grad_norm": 27.0670223236084, "learning_rate": 1.5349045643153528e-05, "loss": 0.6891, "step": 14020 }, { "epoch": 11.635684647302904, "grad_norm": 22.266145706176758, "learning_rate": 1.534871369294606e-05, "loss": 1.0676, "step": 14021 }, { "epoch": 11.636514522821576, "grad_norm": 32.41864776611328, "learning_rate": 1.534838174273859e-05, "loss": 1.179, "step": 14022 }, { "epoch": 11.637344398340248, "grad_norm": 55.61316680908203, "learning_rate": 1.534804979253112e-05, "loss": 1.3879, "step": 14023 }, { "epoch": 11.63817427385892, "grad_norm": 49.06218338012695, "learning_rate": 1.5347717842323653e-05, "loss": 0.9025, "step": 14024 }, { "epoch": 11.639004149377593, "grad_norm": 29.563737869262695, "learning_rate": 1.5347385892116185e-05, "loss": 0.8179, "step": 14025 }, { "epoch": 11.639834024896265, "grad_norm": 38.861419677734375, "learning_rate": 1.5347053941908714e-05, "loss": 1.9666, "step": 14026 }, { "epoch": 11.640663900414937, "grad_norm": 28.604225158691406, "learning_rate": 1.5346721991701246e-05, "loss": 1.115, "step": 14027 }, { "epoch": 11.64149377593361, "grad_norm": 22.89633560180664, "learning_rate": 1.5346390041493778e-05, "loss": 1.1217, "step": 14028 }, { "epoch": 11.642323651452282, "grad_norm": 34.89695739746094, "learning_rate": 1.534605809128631e-05, "loss": 0.8663, "step": 14029 }, { "epoch": 11.643153526970954, "grad_norm": 53.307979583740234, "learning_rate": 1.5345726141078842e-05, "loss": 1.6761, "step": 14030 }, { "epoch": 11.643983402489626, "grad_norm": 21.904964447021484, "learning_rate": 1.534539419087137e-05, "loss": 0.8452, "step": 14031 }, { "epoch": 11.644813278008298, "grad_norm": 30.62139129638672, "learning_rate": 1.5345062240663903e-05, "loss": 1.1282, "step": 14032 }, { "epoch": 11.64564315352697, "grad_norm": 31.320892333984375, "learning_rate": 1.5344730290456435e-05, "loss": 0.8185, "step": 14033 }, { "epoch": 11.646473029045643, "grad_norm": 19.285158157348633, "learning_rate": 1.5344398340248964e-05, "loss": 0.5889, "step": 14034 }, { "epoch": 11.647302904564315, "grad_norm": 83.08737182617188, "learning_rate": 1.5344066390041496e-05, "loss": 0.7643, "step": 14035 }, { "epoch": 11.648132780082987, "grad_norm": 51.98304748535156, "learning_rate": 1.5343734439834024e-05, "loss": 1.0188, "step": 14036 }, { "epoch": 11.64896265560166, "grad_norm": 38.72239685058594, "learning_rate": 1.5343402489626557e-05, "loss": 1.3397, "step": 14037 }, { "epoch": 11.649792531120331, "grad_norm": 35.82166290283203, "learning_rate": 1.534307053941909e-05, "loss": 1.6841, "step": 14038 }, { "epoch": 11.650622406639004, "grad_norm": 47.40489196777344, "learning_rate": 1.5342738589211617e-05, "loss": 1.2973, "step": 14039 }, { "epoch": 11.651452282157676, "grad_norm": 31.609874725341797, "learning_rate": 1.534240663900415e-05, "loss": 1.3528, "step": 14040 }, { "epoch": 11.652282157676348, "grad_norm": 49.55680847167969, "learning_rate": 1.534207468879668e-05, "loss": 1.4212, "step": 14041 }, { "epoch": 11.65311203319502, "grad_norm": 30.023277282714844, "learning_rate": 1.5341742738589214e-05, "loss": 1.6274, "step": 14042 }, { "epoch": 11.653941908713692, "grad_norm": 26.125028610229492, "learning_rate": 1.5341410788381742e-05, "loss": 1.265, "step": 14043 }, { "epoch": 11.654771784232365, "grad_norm": 27.97618865966797, "learning_rate": 1.5341078838174275e-05, "loss": 1.5303, "step": 14044 }, { "epoch": 11.655601659751037, "grad_norm": 34.862998962402344, "learning_rate": 1.5340746887966807e-05, "loss": 1.1523, "step": 14045 }, { "epoch": 11.656431535269709, "grad_norm": 40.20466232299805, "learning_rate": 1.534041493775934e-05, "loss": 1.4238, "step": 14046 }, { "epoch": 11.657261410788381, "grad_norm": 37.04498291015625, "learning_rate": 1.5340082987551867e-05, "loss": 1.246, "step": 14047 }, { "epoch": 11.658091286307053, "grad_norm": 22.243732452392578, "learning_rate": 1.53397510373444e-05, "loss": 0.9905, "step": 14048 }, { "epoch": 11.658921161825726, "grad_norm": 24.445058822631836, "learning_rate": 1.5339419087136932e-05, "loss": 0.7267, "step": 14049 }, { "epoch": 11.659751037344398, "grad_norm": 22.677221298217773, "learning_rate": 1.5339087136929464e-05, "loss": 0.6699, "step": 14050 }, { "epoch": 11.66058091286307, "grad_norm": 40.094200134277344, "learning_rate": 1.5338755186721993e-05, "loss": 1.2298, "step": 14051 }, { "epoch": 11.661410788381742, "grad_norm": 47.24623489379883, "learning_rate": 1.5338423236514525e-05, "loss": 0.9006, "step": 14052 }, { "epoch": 11.662240663900414, "grad_norm": 29.63519859313965, "learning_rate": 1.5338091286307057e-05, "loss": 1.2798, "step": 14053 }, { "epoch": 11.663070539419087, "grad_norm": 29.567583084106445, "learning_rate": 1.5337759336099585e-05, "loss": 0.834, "step": 14054 }, { "epoch": 11.663900414937759, "grad_norm": 24.330350875854492, "learning_rate": 1.5337427385892118e-05, "loss": 1.08, "step": 14055 }, { "epoch": 11.664730290456431, "grad_norm": 31.955657958984375, "learning_rate": 1.5337095435684646e-05, "loss": 1.2123, "step": 14056 }, { "epoch": 11.665560165975103, "grad_norm": 20.577289581298828, "learning_rate": 1.533676348547718e-05, "loss": 1.0239, "step": 14057 }, { "epoch": 11.666390041493775, "grad_norm": 18.79166603088379, "learning_rate": 1.533643153526971e-05, "loss": 0.7407, "step": 14058 }, { "epoch": 11.667219917012448, "grad_norm": 22.40352439880371, "learning_rate": 1.5336099585062243e-05, "loss": 0.8061, "step": 14059 }, { "epoch": 11.66804979253112, "grad_norm": 38.493526458740234, "learning_rate": 1.533576763485477e-05, "loss": 1.2798, "step": 14060 }, { "epoch": 11.668879668049792, "grad_norm": 32.77124786376953, "learning_rate": 1.5335435684647303e-05, "loss": 0.8469, "step": 14061 }, { "epoch": 11.669709543568464, "grad_norm": 28.938119888305664, "learning_rate": 1.5335103734439836e-05, "loss": 0.9721, "step": 14062 }, { "epoch": 11.670539419087136, "grad_norm": 63.93435287475586, "learning_rate": 1.5334771784232368e-05, "loss": 1.0161, "step": 14063 }, { "epoch": 11.671369294605809, "grad_norm": 34.4343376159668, "learning_rate": 1.5334439834024896e-05, "loss": 1.2128, "step": 14064 }, { "epoch": 11.67219917012448, "grad_norm": 36.403743743896484, "learning_rate": 1.533410788381743e-05, "loss": 1.1215, "step": 14065 }, { "epoch": 11.673029045643153, "grad_norm": 43.32292556762695, "learning_rate": 1.533377593360996e-05, "loss": 1.4745, "step": 14066 }, { "epoch": 11.673858921161825, "grad_norm": 30.610437393188477, "learning_rate": 1.5333443983402493e-05, "loss": 0.959, "step": 14067 }, { "epoch": 11.674688796680497, "grad_norm": 20.537370681762695, "learning_rate": 1.533311203319502e-05, "loss": 0.8195, "step": 14068 }, { "epoch": 11.67551867219917, "grad_norm": 22.94315528869629, "learning_rate": 1.5332780082987554e-05, "loss": 0.7628, "step": 14069 }, { "epoch": 11.676348547717842, "grad_norm": 21.695470809936523, "learning_rate": 1.5332448132780086e-05, "loss": 0.6628, "step": 14070 }, { "epoch": 11.677178423236514, "grad_norm": 25.562335968017578, "learning_rate": 1.5332116182572618e-05, "loss": 0.7989, "step": 14071 }, { "epoch": 11.678008298755186, "grad_norm": 26.363611221313477, "learning_rate": 1.5331784232365146e-05, "loss": 0.3326, "step": 14072 }, { "epoch": 11.678838174273858, "grad_norm": 21.826778411865234, "learning_rate": 1.533145228215768e-05, "loss": 1.2662, "step": 14073 }, { "epoch": 11.67966804979253, "grad_norm": 27.075937271118164, "learning_rate": 1.5331120331950207e-05, "loss": 1.1276, "step": 14074 }, { "epoch": 11.680497925311203, "grad_norm": 31.95628547668457, "learning_rate": 1.533078838174274e-05, "loss": 0.9013, "step": 14075 }, { "epoch": 11.681327800829875, "grad_norm": 32.10505676269531, "learning_rate": 1.533045643153527e-05, "loss": 1.1796, "step": 14076 }, { "epoch": 11.682157676348547, "grad_norm": 34.256858825683594, "learning_rate": 1.53301244813278e-05, "loss": 0.7461, "step": 14077 }, { "epoch": 11.68298755186722, "grad_norm": 64.71910095214844, "learning_rate": 1.5329792531120332e-05, "loss": 1.0568, "step": 14078 }, { "epoch": 11.683817427385891, "grad_norm": 24.204357147216797, "learning_rate": 1.5329460580912864e-05, "loss": 0.6139, "step": 14079 }, { "epoch": 11.684647302904564, "grad_norm": 59.97072219848633, "learning_rate": 1.5329128630705393e-05, "loss": 1.2252, "step": 14080 }, { "epoch": 11.685477178423236, "grad_norm": 40.807186126708984, "learning_rate": 1.5328796680497925e-05, "loss": 1.3134, "step": 14081 }, { "epoch": 11.686307053941908, "grad_norm": 33.14185333251953, "learning_rate": 1.5328464730290457e-05, "loss": 1.198, "step": 14082 }, { "epoch": 11.68713692946058, "grad_norm": 44.486366271972656, "learning_rate": 1.532813278008299e-05, "loss": 1.1234, "step": 14083 }, { "epoch": 11.687966804979252, "grad_norm": 28.932497024536133, "learning_rate": 1.532780082987552e-05, "loss": 1.0152, "step": 14084 }, { "epoch": 11.688796680497925, "grad_norm": 27.880109786987305, "learning_rate": 1.532746887966805e-05, "loss": 0.7129, "step": 14085 }, { "epoch": 11.689626556016597, "grad_norm": 51.386966705322266, "learning_rate": 1.5327136929460582e-05, "loss": 1.3426, "step": 14086 }, { "epoch": 11.690456431535269, "grad_norm": 33.49739074707031, "learning_rate": 1.5326804979253115e-05, "loss": 1.1374, "step": 14087 }, { "epoch": 11.691286307053941, "grad_norm": 32.600460052490234, "learning_rate": 1.5326473029045647e-05, "loss": 0.7855, "step": 14088 }, { "epoch": 11.692116182572613, "grad_norm": 37.02742385864258, "learning_rate": 1.5326141078838175e-05, "loss": 1.935, "step": 14089 }, { "epoch": 11.692946058091286, "grad_norm": 17.14187240600586, "learning_rate": 1.5325809128630707e-05, "loss": 0.4515, "step": 14090 }, { "epoch": 11.693775933609958, "grad_norm": 36.53705596923828, "learning_rate": 1.532547717842324e-05, "loss": 0.959, "step": 14091 }, { "epoch": 11.69460580912863, "grad_norm": 26.214075088500977, "learning_rate": 1.5325145228215768e-05, "loss": 1.0629, "step": 14092 }, { "epoch": 11.695435684647302, "grad_norm": 57.45131301879883, "learning_rate": 1.53248132780083e-05, "loss": 0.9749, "step": 14093 }, { "epoch": 11.696265560165974, "grad_norm": 22.053293228149414, "learning_rate": 1.5324481327800832e-05, "loss": 0.9528, "step": 14094 }, { "epoch": 11.697095435684647, "grad_norm": 28.722442626953125, "learning_rate": 1.532414937759336e-05, "loss": 1.3207, "step": 14095 }, { "epoch": 11.697925311203319, "grad_norm": 55.6269416809082, "learning_rate": 1.5323817427385893e-05, "loss": 0.9191, "step": 14096 }, { "epoch": 11.698755186721991, "grad_norm": 47.7078857421875, "learning_rate": 1.5323485477178422e-05, "loss": 1.6413, "step": 14097 }, { "epoch": 11.699585062240663, "grad_norm": 32.22056198120117, "learning_rate": 1.5323153526970954e-05, "loss": 0.6665, "step": 14098 }, { "epoch": 11.700414937759335, "grad_norm": 20.884685516357422, "learning_rate": 1.5322821576763486e-05, "loss": 0.5388, "step": 14099 }, { "epoch": 11.701244813278008, "grad_norm": 54.3507080078125, "learning_rate": 1.532248962655602e-05, "loss": 0.9336, "step": 14100 }, { "epoch": 11.70207468879668, "grad_norm": 23.569387435913086, "learning_rate": 1.5322157676348547e-05, "loss": 0.7648, "step": 14101 }, { "epoch": 11.702904564315352, "grad_norm": 31.750947952270508, "learning_rate": 1.532182572614108e-05, "loss": 1.1487, "step": 14102 }, { "epoch": 11.703734439834024, "grad_norm": 13.708573341369629, "learning_rate": 1.532149377593361e-05, "loss": 0.3486, "step": 14103 }, { "epoch": 11.704564315352696, "grad_norm": 24.186405181884766, "learning_rate": 1.5321161825726143e-05, "loss": 1.2969, "step": 14104 }, { "epoch": 11.705394190871369, "grad_norm": 38.62946701049805, "learning_rate": 1.5320829875518672e-05, "loss": 0.5204, "step": 14105 }, { "epoch": 11.70622406639004, "grad_norm": 22.601537704467773, "learning_rate": 1.5320497925311204e-05, "loss": 0.654, "step": 14106 }, { "epoch": 11.707053941908713, "grad_norm": 34.928558349609375, "learning_rate": 1.5320165975103736e-05, "loss": 0.9658, "step": 14107 }, { "epoch": 11.707883817427385, "grad_norm": 37.6673698425293, "learning_rate": 1.531983402489627e-05, "loss": 0.5958, "step": 14108 }, { "epoch": 11.708713692946057, "grad_norm": 38.06999588012695, "learning_rate": 1.53195020746888e-05, "loss": 1.2233, "step": 14109 }, { "epoch": 11.70954356846473, "grad_norm": 28.28514862060547, "learning_rate": 1.531917012448133e-05, "loss": 0.7085, "step": 14110 }, { "epoch": 11.710373443983402, "grad_norm": 22.109861373901367, "learning_rate": 1.531883817427386e-05, "loss": 0.7465, "step": 14111 }, { "epoch": 11.711203319502074, "grad_norm": 28.22170639038086, "learning_rate": 1.531850622406639e-05, "loss": 0.5007, "step": 14112 }, { "epoch": 11.712033195020746, "grad_norm": 78.7039794921875, "learning_rate": 1.5318174273858922e-05, "loss": 0.9016, "step": 14113 }, { "epoch": 11.712863070539418, "grad_norm": 28.20404624938965, "learning_rate": 1.5317842323651454e-05, "loss": 0.472, "step": 14114 }, { "epoch": 11.71369294605809, "grad_norm": 25.571426391601562, "learning_rate": 1.5317510373443983e-05, "loss": 0.707, "step": 14115 }, { "epoch": 11.714522821576763, "grad_norm": 45.65071105957031, "learning_rate": 1.5317178423236515e-05, "loss": 0.8213, "step": 14116 }, { "epoch": 11.715352697095435, "grad_norm": 53.5412483215332, "learning_rate": 1.5316846473029047e-05, "loss": 1.1411, "step": 14117 }, { "epoch": 11.716182572614107, "grad_norm": 37.76567840576172, "learning_rate": 1.5316514522821576e-05, "loss": 1.2838, "step": 14118 }, { "epoch": 11.71701244813278, "grad_norm": 27.239748001098633, "learning_rate": 1.5316182572614108e-05, "loss": 0.9871, "step": 14119 }, { "epoch": 11.717842323651452, "grad_norm": 26.630727767944336, "learning_rate": 1.531585062240664e-05, "loss": 0.7351, "step": 14120 }, { "epoch": 11.718672199170124, "grad_norm": 47.73747253417969, "learning_rate": 1.5315518672199172e-05, "loss": 1.8306, "step": 14121 }, { "epoch": 11.719502074688796, "grad_norm": 43.21876525878906, "learning_rate": 1.53151867219917e-05, "loss": 0.7747, "step": 14122 }, { "epoch": 11.720331950207468, "grad_norm": 118.87591552734375, "learning_rate": 1.5314854771784233e-05, "loss": 1.1111, "step": 14123 }, { "epoch": 11.72116182572614, "grad_norm": 30.28447914123535, "learning_rate": 1.5314522821576765e-05, "loss": 1.0596, "step": 14124 }, { "epoch": 11.721991701244812, "grad_norm": 35.89801025390625, "learning_rate": 1.5314190871369297e-05, "loss": 1.1095, "step": 14125 }, { "epoch": 11.722821576763485, "grad_norm": 32.958343505859375, "learning_rate": 1.5313858921161826e-05, "loss": 1.167, "step": 14126 }, { "epoch": 11.723651452282157, "grad_norm": 64.40898895263672, "learning_rate": 1.5313526970954358e-05, "loss": 1.4641, "step": 14127 }, { "epoch": 11.724481327800829, "grad_norm": 33.80759048461914, "learning_rate": 1.531319502074689e-05, "loss": 1.18, "step": 14128 }, { "epoch": 11.725311203319501, "grad_norm": 19.853967666625977, "learning_rate": 1.5312863070539422e-05, "loss": 0.6173, "step": 14129 }, { "epoch": 11.726141078838173, "grad_norm": 31.95928382873535, "learning_rate": 1.531253112033195e-05, "loss": 1.5027, "step": 14130 }, { "epoch": 11.726970954356846, "grad_norm": 21.172279357910156, "learning_rate": 1.5312199170124483e-05, "loss": 0.806, "step": 14131 }, { "epoch": 11.727800829875518, "grad_norm": 51.17243957519531, "learning_rate": 1.5311867219917015e-05, "loss": 0.8074, "step": 14132 }, { "epoch": 11.72863070539419, "grad_norm": 25.13410186767578, "learning_rate": 1.5311535269709544e-05, "loss": 0.7847, "step": 14133 }, { "epoch": 11.729460580912862, "grad_norm": 30.88905143737793, "learning_rate": 1.5311203319502076e-05, "loss": 0.8259, "step": 14134 }, { "epoch": 11.730290456431534, "grad_norm": 28.35874366760254, "learning_rate": 1.5310871369294605e-05, "loss": 1.2778, "step": 14135 }, { "epoch": 11.731120331950207, "grad_norm": 25.95331573486328, "learning_rate": 1.5310539419087137e-05, "loss": 0.7074, "step": 14136 }, { "epoch": 11.731950207468879, "grad_norm": 29.95103645324707, "learning_rate": 1.531020746887967e-05, "loss": 1.4119, "step": 14137 }, { "epoch": 11.732780082987551, "grad_norm": 25.702430725097656, "learning_rate": 1.53098755186722e-05, "loss": 0.9964, "step": 14138 }, { "epoch": 11.733609958506223, "grad_norm": 25.046855926513672, "learning_rate": 1.530954356846473e-05, "loss": 0.8832, "step": 14139 }, { "epoch": 11.734439834024897, "grad_norm": 43.031944274902344, "learning_rate": 1.5309211618257262e-05, "loss": 0.9024, "step": 14140 }, { "epoch": 11.73526970954357, "grad_norm": 37.05046081542969, "learning_rate": 1.5308879668049794e-05, "loss": 0.9268, "step": 14141 }, { "epoch": 11.736099585062242, "grad_norm": 20.04612922668457, "learning_rate": 1.5308547717842326e-05, "loss": 0.7077, "step": 14142 }, { "epoch": 11.736929460580914, "grad_norm": 26.670366287231445, "learning_rate": 1.5308215767634855e-05, "loss": 0.5043, "step": 14143 }, { "epoch": 11.737759336099586, "grad_norm": 32.31817626953125, "learning_rate": 1.5307883817427387e-05, "loss": 1.0567, "step": 14144 }, { "epoch": 11.738589211618258, "grad_norm": 76.12614440917969, "learning_rate": 1.530755186721992e-05, "loss": 1.021, "step": 14145 }, { "epoch": 11.73941908713693, "grad_norm": 68.35882568359375, "learning_rate": 1.530721991701245e-05, "loss": 1.1798, "step": 14146 }, { "epoch": 11.740248962655603, "grad_norm": 19.950828552246094, "learning_rate": 1.530688796680498e-05, "loss": 0.5648, "step": 14147 }, { "epoch": 11.741078838174275, "grad_norm": 28.42745590209961, "learning_rate": 1.5306556016597512e-05, "loss": 0.9308, "step": 14148 }, { "epoch": 11.741908713692947, "grad_norm": 22.74279022216797, "learning_rate": 1.5306224066390044e-05, "loss": 0.7994, "step": 14149 }, { "epoch": 11.74273858921162, "grad_norm": 77.79706573486328, "learning_rate": 1.5305892116182576e-05, "loss": 0.8429, "step": 14150 }, { "epoch": 11.743568464730291, "grad_norm": 37.07796859741211, "learning_rate": 1.5305560165975105e-05, "loss": 1.3984, "step": 14151 }, { "epoch": 11.744398340248964, "grad_norm": 39.19758987426758, "learning_rate": 1.5305228215767637e-05, "loss": 1.3019, "step": 14152 }, { "epoch": 11.745228215767636, "grad_norm": 24.851428985595703, "learning_rate": 1.5304896265560166e-05, "loss": 1.1979, "step": 14153 }, { "epoch": 11.746058091286308, "grad_norm": 18.438676834106445, "learning_rate": 1.5304564315352698e-05, "loss": 0.7949, "step": 14154 }, { "epoch": 11.74688796680498, "grad_norm": 22.16476821899414, "learning_rate": 1.530423236514523e-05, "loss": 0.6643, "step": 14155 }, { "epoch": 11.747717842323652, "grad_norm": 24.867874145507812, "learning_rate": 1.530390041493776e-05, "loss": 1.0542, "step": 14156 }, { "epoch": 11.748547717842325, "grad_norm": 33.1027946472168, "learning_rate": 1.530356846473029e-05, "loss": 0.9004, "step": 14157 }, { "epoch": 11.749377593360997, "grad_norm": 25.873926162719727, "learning_rate": 1.5303236514522823e-05, "loss": 0.7328, "step": 14158 }, { "epoch": 11.750207468879669, "grad_norm": 55.520076751708984, "learning_rate": 1.5302904564315352e-05, "loss": 0.6547, "step": 14159 }, { "epoch": 11.751037344398341, "grad_norm": 41.42988586425781, "learning_rate": 1.5302572614107884e-05, "loss": 0.9123, "step": 14160 }, { "epoch": 11.751867219917013, "grad_norm": 62.03681945800781, "learning_rate": 1.5302240663900416e-05, "loss": 1.0566, "step": 14161 }, { "epoch": 11.752697095435686, "grad_norm": 24.325031280517578, "learning_rate": 1.5301908713692948e-05, "loss": 0.9194, "step": 14162 }, { "epoch": 11.753526970954358, "grad_norm": 31.625272750854492, "learning_rate": 1.530157676348548e-05, "loss": 1.1628, "step": 14163 }, { "epoch": 11.75435684647303, "grad_norm": 50.59919357299805, "learning_rate": 1.530124481327801e-05, "loss": 0.9928, "step": 14164 }, { "epoch": 11.755186721991702, "grad_norm": 41.828453063964844, "learning_rate": 1.530091286307054e-05, "loss": 1.2759, "step": 14165 }, { "epoch": 11.756016597510374, "grad_norm": 35.38457107543945, "learning_rate": 1.5300580912863073e-05, "loss": 1.1676, "step": 14166 }, { "epoch": 11.756846473029047, "grad_norm": 28.41856575012207, "learning_rate": 1.5300248962655605e-05, "loss": 0.8445, "step": 14167 }, { "epoch": 11.757676348547719, "grad_norm": 26.503944396972656, "learning_rate": 1.5299917012448134e-05, "loss": 0.5945, "step": 14168 }, { "epoch": 11.758506224066391, "grad_norm": 19.03133773803711, "learning_rate": 1.5299585062240666e-05, "loss": 0.4575, "step": 14169 }, { "epoch": 11.759336099585063, "grad_norm": 83.73745727539062, "learning_rate": 1.5299253112033198e-05, "loss": 2.339, "step": 14170 }, { "epoch": 11.760165975103735, "grad_norm": 25.888513565063477, "learning_rate": 1.5298921161825727e-05, "loss": 0.9305, "step": 14171 }, { "epoch": 11.760995850622407, "grad_norm": 18.937658309936523, "learning_rate": 1.529858921161826e-05, "loss": 0.7185, "step": 14172 }, { "epoch": 11.76182572614108, "grad_norm": 27.00698471069336, "learning_rate": 1.5298257261410788e-05, "loss": 1.075, "step": 14173 }, { "epoch": 11.762655601659752, "grad_norm": 39.75080871582031, "learning_rate": 1.529792531120332e-05, "loss": 1.2807, "step": 14174 }, { "epoch": 11.763485477178424, "grad_norm": 22.458820343017578, "learning_rate": 1.5297593360995852e-05, "loss": 0.5255, "step": 14175 }, { "epoch": 11.764315352697096, "grad_norm": 29.541683197021484, "learning_rate": 1.529726141078838e-05, "loss": 0.8709, "step": 14176 }, { "epoch": 11.765145228215768, "grad_norm": 21.22007179260254, "learning_rate": 1.5296929460580913e-05, "loss": 0.5584, "step": 14177 }, { "epoch": 11.76597510373444, "grad_norm": 65.94412231445312, "learning_rate": 1.5296597510373445e-05, "loss": 1.3033, "step": 14178 }, { "epoch": 11.766804979253113, "grad_norm": 25.92841911315918, "learning_rate": 1.5296265560165977e-05, "loss": 0.6666, "step": 14179 }, { "epoch": 11.767634854771785, "grad_norm": 22.120227813720703, "learning_rate": 1.5295933609958506e-05, "loss": 0.9168, "step": 14180 }, { "epoch": 11.768464730290457, "grad_norm": 20.565107345581055, "learning_rate": 1.5295601659751038e-05, "loss": 0.7273, "step": 14181 }, { "epoch": 11.76929460580913, "grad_norm": 27.85518455505371, "learning_rate": 1.529526970954357e-05, "loss": 0.7055, "step": 14182 }, { "epoch": 11.770124481327802, "grad_norm": 20.626338958740234, "learning_rate": 1.5294937759336102e-05, "loss": 0.5762, "step": 14183 }, { "epoch": 11.770954356846474, "grad_norm": 40.38020324707031, "learning_rate": 1.529460580912863e-05, "loss": 1.6449, "step": 14184 }, { "epoch": 11.771784232365146, "grad_norm": 22.863550186157227, "learning_rate": 1.5294273858921163e-05, "loss": 0.6494, "step": 14185 }, { "epoch": 11.772614107883818, "grad_norm": 21.315885543823242, "learning_rate": 1.5293941908713695e-05, "loss": 1.0795, "step": 14186 }, { "epoch": 11.77344398340249, "grad_norm": 61.98628234863281, "learning_rate": 1.5293609958506227e-05, "loss": 1.5998, "step": 14187 }, { "epoch": 11.774273858921163, "grad_norm": 41.23320770263672, "learning_rate": 1.529327800829876e-05, "loss": 1.2801, "step": 14188 }, { "epoch": 11.775103734439835, "grad_norm": 55.17265701293945, "learning_rate": 1.5292946058091288e-05, "loss": 1.1639, "step": 14189 }, { "epoch": 11.775933609958507, "grad_norm": 37.75702667236328, "learning_rate": 1.529261410788382e-05, "loss": 0.8436, "step": 14190 }, { "epoch": 11.77676348547718, "grad_norm": 38.86235427856445, "learning_rate": 1.529228215767635e-05, "loss": 0.8198, "step": 14191 }, { "epoch": 11.777593360995851, "grad_norm": 44.21221160888672, "learning_rate": 1.529195020746888e-05, "loss": 1.1987, "step": 14192 }, { "epoch": 11.778423236514524, "grad_norm": 22.6207332611084, "learning_rate": 1.5291618257261413e-05, "loss": 0.5383, "step": 14193 }, { "epoch": 11.779253112033196, "grad_norm": 33.24131774902344, "learning_rate": 1.529128630705394e-05, "loss": 1.294, "step": 14194 }, { "epoch": 11.780082987551868, "grad_norm": 24.950307846069336, "learning_rate": 1.5290954356846474e-05, "loss": 0.7716, "step": 14195 }, { "epoch": 11.78091286307054, "grad_norm": 33.9839973449707, "learning_rate": 1.5290622406639006e-05, "loss": 1.1204, "step": 14196 }, { "epoch": 11.781742738589212, "grad_norm": 36.76120376586914, "learning_rate": 1.5290290456431535e-05, "loss": 0.4995, "step": 14197 }, { "epoch": 11.782572614107885, "grad_norm": 22.903573989868164, "learning_rate": 1.5289958506224067e-05, "loss": 0.666, "step": 14198 }, { "epoch": 11.783402489626557, "grad_norm": 101.4477310180664, "learning_rate": 1.52896265560166e-05, "loss": 1.7404, "step": 14199 }, { "epoch": 11.784232365145229, "grad_norm": 21.846284866333008, "learning_rate": 1.528929460580913e-05, "loss": 1.285, "step": 14200 }, { "epoch": 11.785062240663901, "grad_norm": 64.82817840576172, "learning_rate": 1.528896265560166e-05, "loss": 0.5767, "step": 14201 }, { "epoch": 11.785892116182573, "grad_norm": 30.60039520263672, "learning_rate": 1.5288630705394192e-05, "loss": 0.7264, "step": 14202 }, { "epoch": 11.786721991701246, "grad_norm": 18.08597183227539, "learning_rate": 1.5288298755186724e-05, "loss": 0.7567, "step": 14203 }, { "epoch": 11.787551867219918, "grad_norm": 34.59355545043945, "learning_rate": 1.5287966804979256e-05, "loss": 0.6762, "step": 14204 }, { "epoch": 11.78838174273859, "grad_norm": 38.67965316772461, "learning_rate": 1.5287634854771785e-05, "loss": 1.1008, "step": 14205 }, { "epoch": 11.789211618257262, "grad_norm": 41.69404983520508, "learning_rate": 1.5287302904564317e-05, "loss": 0.5894, "step": 14206 }, { "epoch": 11.790041493775934, "grad_norm": 29.376827239990234, "learning_rate": 1.528697095435685e-05, "loss": 1.078, "step": 14207 }, { "epoch": 11.790871369294607, "grad_norm": 37.04117202758789, "learning_rate": 1.528663900414938e-05, "loss": 0.7558, "step": 14208 }, { "epoch": 11.791701244813279, "grad_norm": 19.53508186340332, "learning_rate": 1.528630705394191e-05, "loss": 0.7858, "step": 14209 }, { "epoch": 11.792531120331951, "grad_norm": 30.566743850708008, "learning_rate": 1.5285975103734442e-05, "loss": 1.0922, "step": 14210 }, { "epoch": 11.793360995850623, "grad_norm": 13.231078147888184, "learning_rate": 1.5285643153526974e-05, "loss": 0.4883, "step": 14211 }, { "epoch": 11.794190871369295, "grad_norm": 52.94475555419922, "learning_rate": 1.5285311203319503e-05, "loss": 0.7697, "step": 14212 }, { "epoch": 11.795020746887968, "grad_norm": 16.95598793029785, "learning_rate": 1.5284979253112035e-05, "loss": 0.6919, "step": 14213 }, { "epoch": 11.79585062240664, "grad_norm": 31.947111129760742, "learning_rate": 1.5284647302904563e-05, "loss": 1.071, "step": 14214 }, { "epoch": 11.796680497925312, "grad_norm": 54.71186065673828, "learning_rate": 1.5284315352697096e-05, "loss": 0.9107, "step": 14215 }, { "epoch": 11.797510373443984, "grad_norm": 32.07297134399414, "learning_rate": 1.5283983402489628e-05, "loss": 0.7371, "step": 14216 }, { "epoch": 11.798340248962656, "grad_norm": 33.97866439819336, "learning_rate": 1.528365145228216e-05, "loss": 0.8781, "step": 14217 }, { "epoch": 11.799170124481329, "grad_norm": 31.462818145751953, "learning_rate": 1.528331950207469e-05, "loss": 0.7724, "step": 14218 }, { "epoch": 11.8, "grad_norm": 34.711219787597656, "learning_rate": 1.528298755186722e-05, "loss": 0.7591, "step": 14219 }, { "epoch": 11.800829875518673, "grad_norm": 31.308237075805664, "learning_rate": 1.5282655601659753e-05, "loss": 0.9648, "step": 14220 }, { "epoch": 11.801659751037345, "grad_norm": 30.277002334594727, "learning_rate": 1.5282323651452285e-05, "loss": 0.6158, "step": 14221 }, { "epoch": 11.802489626556017, "grad_norm": 23.277490615844727, "learning_rate": 1.5281991701244814e-05, "loss": 1.8033, "step": 14222 }, { "epoch": 11.80331950207469, "grad_norm": 25.050548553466797, "learning_rate": 1.5281659751037346e-05, "loss": 0.7779, "step": 14223 }, { "epoch": 11.804149377593362, "grad_norm": 23.5787353515625, "learning_rate": 1.5281327800829878e-05, "loss": 0.9798, "step": 14224 }, { "epoch": 11.804979253112034, "grad_norm": 43.07222366333008, "learning_rate": 1.528099585062241e-05, "loss": 1.3502, "step": 14225 }, { "epoch": 11.805809128630706, "grad_norm": 33.03315353393555, "learning_rate": 1.528066390041494e-05, "loss": 0.7183, "step": 14226 }, { "epoch": 11.806639004149378, "grad_norm": 20.083267211914062, "learning_rate": 1.528033195020747e-05, "loss": 0.7742, "step": 14227 }, { "epoch": 11.80746887966805, "grad_norm": 13.38442611694336, "learning_rate": 1.5280000000000003e-05, "loss": 0.3187, "step": 14228 }, { "epoch": 11.808298755186723, "grad_norm": 21.67539405822754, "learning_rate": 1.527966804979253e-05, "loss": 0.4755, "step": 14229 }, { "epoch": 11.809128630705395, "grad_norm": 28.330671310424805, "learning_rate": 1.5279336099585064e-05, "loss": 0.7019, "step": 14230 }, { "epoch": 11.809958506224067, "grad_norm": 27.482080459594727, "learning_rate": 1.5279004149377596e-05, "loss": 1.0998, "step": 14231 }, { "epoch": 11.81078838174274, "grad_norm": 32.66862106323242, "learning_rate": 1.5278672199170124e-05, "loss": 1.104, "step": 14232 }, { "epoch": 11.811618257261411, "grad_norm": 40.43312072753906, "learning_rate": 1.5278340248962657e-05, "loss": 0.9044, "step": 14233 }, { "epoch": 11.812448132780084, "grad_norm": 31.600988388061523, "learning_rate": 1.527800829875519e-05, "loss": 1.3416, "step": 14234 }, { "epoch": 11.813278008298756, "grad_norm": 63.81083297729492, "learning_rate": 1.5277676348547717e-05, "loss": 0.631, "step": 14235 }, { "epoch": 11.814107883817428, "grad_norm": 21.089460372924805, "learning_rate": 1.527734439834025e-05, "loss": 0.4717, "step": 14236 }, { "epoch": 11.8149377593361, "grad_norm": 23.45633316040039, "learning_rate": 1.527701244813278e-05, "loss": 1.0182, "step": 14237 }, { "epoch": 11.815767634854772, "grad_norm": 49.003746032714844, "learning_rate": 1.527668049792531e-05, "loss": 1.8185, "step": 14238 }, { "epoch": 11.816597510373445, "grad_norm": 34.84872817993164, "learning_rate": 1.5276348547717842e-05, "loss": 0.8221, "step": 14239 }, { "epoch": 11.817427385892117, "grad_norm": 54.344356536865234, "learning_rate": 1.5276016597510375e-05, "loss": 1.6151, "step": 14240 }, { "epoch": 11.818257261410789, "grad_norm": 21.501588821411133, "learning_rate": 1.5275684647302907e-05, "loss": 0.7632, "step": 14241 }, { "epoch": 11.819087136929461, "grad_norm": 25.444597244262695, "learning_rate": 1.527535269709544e-05, "loss": 0.7034, "step": 14242 }, { "epoch": 11.819917012448133, "grad_norm": 25.092885971069336, "learning_rate": 1.5275020746887967e-05, "loss": 0.959, "step": 14243 }, { "epoch": 11.820746887966806, "grad_norm": 61.39686584472656, "learning_rate": 1.52746887966805e-05, "loss": 1.3194, "step": 14244 }, { "epoch": 11.821576763485478, "grad_norm": 26.656578063964844, "learning_rate": 1.527435684647303e-05, "loss": 0.6933, "step": 14245 }, { "epoch": 11.82240663900415, "grad_norm": 25.193567276000977, "learning_rate": 1.5274024896265564e-05, "loss": 0.8968, "step": 14246 }, { "epoch": 11.823236514522822, "grad_norm": 41.6221923828125, "learning_rate": 1.5273692946058092e-05, "loss": 0.9232, "step": 14247 }, { "epoch": 11.824066390041494, "grad_norm": 24.654565811157227, "learning_rate": 1.5273360995850625e-05, "loss": 0.5306, "step": 14248 }, { "epoch": 11.824896265560167, "grad_norm": 28.456151962280273, "learning_rate": 1.5273029045643157e-05, "loss": 1.3379, "step": 14249 }, { "epoch": 11.825726141078839, "grad_norm": 36.202598571777344, "learning_rate": 1.5272697095435685e-05, "loss": 0.6428, "step": 14250 }, { "epoch": 11.826556016597511, "grad_norm": 20.19853973388672, "learning_rate": 1.5272365145228218e-05, "loss": 0.5259, "step": 14251 }, { "epoch": 11.827385892116183, "grad_norm": 34.31724548339844, "learning_rate": 1.5272033195020746e-05, "loss": 0.9702, "step": 14252 }, { "epoch": 11.828215767634855, "grad_norm": 36.20841598510742, "learning_rate": 1.527170124481328e-05, "loss": 1.0247, "step": 14253 }, { "epoch": 11.829045643153528, "grad_norm": 30.37775993347168, "learning_rate": 1.527136929460581e-05, "loss": 1.0031, "step": 14254 }, { "epoch": 11.8298755186722, "grad_norm": 20.0894718170166, "learning_rate": 1.527103734439834e-05, "loss": 0.4992, "step": 14255 }, { "epoch": 11.830705394190872, "grad_norm": 30.948144912719727, "learning_rate": 1.527070539419087e-05, "loss": 1.1094, "step": 14256 }, { "epoch": 11.831535269709544, "grad_norm": 41.52383041381836, "learning_rate": 1.5270373443983403e-05, "loss": 1.0696, "step": 14257 }, { "epoch": 11.832365145228216, "grad_norm": 35.383827209472656, "learning_rate": 1.5270041493775936e-05, "loss": 0.8711, "step": 14258 }, { "epoch": 11.833195020746889, "grad_norm": 41.20761489868164, "learning_rate": 1.5269709543568464e-05, "loss": 0.9034, "step": 14259 }, { "epoch": 11.83402489626556, "grad_norm": 49.44551086425781, "learning_rate": 1.5269377593360996e-05, "loss": 1.3233, "step": 14260 }, { "epoch": 11.834854771784233, "grad_norm": 26.374019622802734, "learning_rate": 1.526904564315353e-05, "loss": 0.7007, "step": 14261 }, { "epoch": 11.835684647302905, "grad_norm": 43.73075485229492, "learning_rate": 1.526871369294606e-05, "loss": 1.1733, "step": 14262 }, { "epoch": 11.836514522821577, "grad_norm": 48.522430419921875, "learning_rate": 1.526838174273859e-05, "loss": 1.5233, "step": 14263 }, { "epoch": 11.83734439834025, "grad_norm": 37.7183952331543, "learning_rate": 1.526804979253112e-05, "loss": 0.3275, "step": 14264 }, { "epoch": 11.838174273858922, "grad_norm": 39.75921630859375, "learning_rate": 1.5267717842323653e-05, "loss": 0.7782, "step": 14265 }, { "epoch": 11.839004149377594, "grad_norm": 35.16101837158203, "learning_rate": 1.5267385892116186e-05, "loss": 0.6046, "step": 14266 }, { "epoch": 11.839834024896266, "grad_norm": 27.908723831176758, "learning_rate": 1.5267053941908718e-05, "loss": 0.6917, "step": 14267 }, { "epoch": 11.840663900414938, "grad_norm": 51.674896240234375, "learning_rate": 1.5266721991701246e-05, "loss": 0.755, "step": 14268 }, { "epoch": 11.84149377593361, "grad_norm": 43.82762908935547, "learning_rate": 1.526639004149378e-05, "loss": 1.4262, "step": 14269 }, { "epoch": 11.842323651452283, "grad_norm": 36.085731506347656, "learning_rate": 1.5266058091286307e-05, "loss": 1.5391, "step": 14270 }, { "epoch": 11.843153526970955, "grad_norm": 24.0769100189209, "learning_rate": 1.526572614107884e-05, "loss": 0.7801, "step": 14271 }, { "epoch": 11.843983402489627, "grad_norm": 57.86623764038086, "learning_rate": 1.526539419087137e-05, "loss": 1.0272, "step": 14272 }, { "epoch": 11.8448132780083, "grad_norm": 36.0461540222168, "learning_rate": 1.52650622406639e-05, "loss": 1.2641, "step": 14273 }, { "epoch": 11.845643153526972, "grad_norm": 26.483247756958008, "learning_rate": 1.5264730290456432e-05, "loss": 0.8144, "step": 14274 }, { "epoch": 11.846473029045644, "grad_norm": 28.008853912353516, "learning_rate": 1.5264398340248964e-05, "loss": 1.1996, "step": 14275 }, { "epoch": 11.847302904564316, "grad_norm": 23.99821662902832, "learning_rate": 1.5264066390041493e-05, "loss": 0.5508, "step": 14276 }, { "epoch": 11.848132780082988, "grad_norm": 21.390823364257812, "learning_rate": 1.5263734439834025e-05, "loss": 0.8098, "step": 14277 }, { "epoch": 11.84896265560166, "grad_norm": 51.954830169677734, "learning_rate": 1.5263402489626557e-05, "loss": 0.6173, "step": 14278 }, { "epoch": 11.849792531120332, "grad_norm": 23.48539924621582, "learning_rate": 1.526307053941909e-05, "loss": 0.7928, "step": 14279 }, { "epoch": 11.850622406639005, "grad_norm": 30.05337905883789, "learning_rate": 1.5262738589211618e-05, "loss": 1.1136, "step": 14280 }, { "epoch": 11.851452282157677, "grad_norm": 63.756996154785156, "learning_rate": 1.526240663900415e-05, "loss": 0.8809, "step": 14281 }, { "epoch": 11.852282157676349, "grad_norm": 40.482582092285156, "learning_rate": 1.5262074688796682e-05, "loss": 1.1706, "step": 14282 }, { "epoch": 11.853112033195021, "grad_norm": 24.051862716674805, "learning_rate": 1.5261742738589214e-05, "loss": 0.4438, "step": 14283 }, { "epoch": 11.853941908713693, "grad_norm": 38.028564453125, "learning_rate": 1.5261410788381743e-05, "loss": 0.5985, "step": 14284 }, { "epoch": 11.854771784232366, "grad_norm": 41.494747161865234, "learning_rate": 1.5261078838174275e-05, "loss": 0.9074, "step": 14285 }, { "epoch": 11.855601659751038, "grad_norm": 49.63980484008789, "learning_rate": 1.5260746887966807e-05, "loss": 1.0991, "step": 14286 }, { "epoch": 11.85643153526971, "grad_norm": 16.316936492919922, "learning_rate": 1.526041493775934e-05, "loss": 0.4858, "step": 14287 }, { "epoch": 11.857261410788382, "grad_norm": 25.61111068725586, "learning_rate": 1.5260082987551868e-05, "loss": 0.6788, "step": 14288 }, { "epoch": 11.858091286307054, "grad_norm": 27.48394775390625, "learning_rate": 1.52597510373444e-05, "loss": 0.7287, "step": 14289 }, { "epoch": 11.858921161825727, "grad_norm": 32.31770324707031, "learning_rate": 1.525941908713693e-05, "loss": 1.2595, "step": 14290 }, { "epoch": 11.859751037344399, "grad_norm": 16.774982452392578, "learning_rate": 1.525908713692946e-05, "loss": 0.5741, "step": 14291 }, { "epoch": 11.860580912863071, "grad_norm": 29.032142639160156, "learning_rate": 1.5258755186721992e-05, "loss": 0.6992, "step": 14292 }, { "epoch": 11.861410788381743, "grad_norm": 42.27257537841797, "learning_rate": 1.5258423236514524e-05, "loss": 1.2631, "step": 14293 }, { "epoch": 11.862240663900415, "grad_norm": 25.968822479248047, "learning_rate": 1.5258091286307056e-05, "loss": 0.9689, "step": 14294 }, { "epoch": 11.863070539419088, "grad_norm": 29.087112426757812, "learning_rate": 1.5257759336099586e-05, "loss": 0.7289, "step": 14295 }, { "epoch": 11.86390041493776, "grad_norm": 32.651554107666016, "learning_rate": 1.5257427385892118e-05, "loss": 1.3476, "step": 14296 }, { "epoch": 11.864730290456432, "grad_norm": 49.44853210449219, "learning_rate": 1.5257095435684649e-05, "loss": 0.7354, "step": 14297 }, { "epoch": 11.865560165975104, "grad_norm": 30.04679298400879, "learning_rate": 1.525676348547718e-05, "loss": 0.9136, "step": 14298 }, { "epoch": 11.866390041493776, "grad_norm": 39.470890045166016, "learning_rate": 1.5256431535269711e-05, "loss": 1.3593, "step": 14299 }, { "epoch": 11.867219917012449, "grad_norm": 28.21726417541504, "learning_rate": 1.5256099585062243e-05, "loss": 0.5714, "step": 14300 }, { "epoch": 11.86804979253112, "grad_norm": 23.651323318481445, "learning_rate": 1.5255767634854772e-05, "loss": 0.5242, "step": 14301 }, { "epoch": 11.868879668049793, "grad_norm": 50.03401565551758, "learning_rate": 1.5255435684647304e-05, "loss": 1.3209, "step": 14302 }, { "epoch": 11.869709543568465, "grad_norm": 25.812150955200195, "learning_rate": 1.5255103734439836e-05, "loss": 0.9388, "step": 14303 }, { "epoch": 11.870539419087137, "grad_norm": 39.73281478881836, "learning_rate": 1.5254771784232367e-05, "loss": 0.9228, "step": 14304 }, { "epoch": 11.87136929460581, "grad_norm": 26.893030166625977, "learning_rate": 1.5254439834024897e-05, "loss": 0.606, "step": 14305 }, { "epoch": 11.872199170124482, "grad_norm": 30.111398696899414, "learning_rate": 1.5254107883817428e-05, "loss": 1.3422, "step": 14306 }, { "epoch": 11.873029045643154, "grad_norm": 48.40488052368164, "learning_rate": 1.525377593360996e-05, "loss": 0.957, "step": 14307 }, { "epoch": 11.873858921161826, "grad_norm": 25.307336807250977, "learning_rate": 1.5253443983402492e-05, "loss": 0.7952, "step": 14308 }, { "epoch": 11.874688796680498, "grad_norm": 41.671775817871094, "learning_rate": 1.525311203319502e-05, "loss": 1.6255, "step": 14309 }, { "epoch": 11.87551867219917, "grad_norm": 35.334346771240234, "learning_rate": 1.5252780082987553e-05, "loss": 1.3632, "step": 14310 }, { "epoch": 11.876348547717843, "grad_norm": 51.743404388427734, "learning_rate": 1.5252448132780085e-05, "loss": 1.2393, "step": 14311 }, { "epoch": 11.877178423236515, "grad_norm": 29.785568237304688, "learning_rate": 1.5252116182572617e-05, "loss": 0.828, "step": 14312 }, { "epoch": 11.878008298755187, "grad_norm": 31.89262580871582, "learning_rate": 1.5251784232365146e-05, "loss": 1.0271, "step": 14313 }, { "epoch": 11.87883817427386, "grad_norm": 38.88106918334961, "learning_rate": 1.5251452282157678e-05, "loss": 1.4292, "step": 14314 }, { "epoch": 11.879668049792532, "grad_norm": 34.694580078125, "learning_rate": 1.5251120331950208e-05, "loss": 0.971, "step": 14315 }, { "epoch": 11.880497925311204, "grad_norm": 43.87274169921875, "learning_rate": 1.525078838174274e-05, "loss": 1.531, "step": 14316 }, { "epoch": 11.881327800829876, "grad_norm": 16.927490234375, "learning_rate": 1.525045643153527e-05, "loss": 0.6109, "step": 14317 }, { "epoch": 11.882157676348548, "grad_norm": 42.108333587646484, "learning_rate": 1.5250124481327801e-05, "loss": 0.8246, "step": 14318 }, { "epoch": 11.88298755186722, "grad_norm": 29.987220764160156, "learning_rate": 1.5249792531120333e-05, "loss": 1.0991, "step": 14319 }, { "epoch": 11.883817427385893, "grad_norm": 18.155214309692383, "learning_rate": 1.5249460580912865e-05, "loss": 0.7765, "step": 14320 }, { "epoch": 11.884647302904565, "grad_norm": 30.314836502075195, "learning_rate": 1.5249128630705397e-05, "loss": 0.9843, "step": 14321 }, { "epoch": 11.885477178423237, "grad_norm": 49.59234619140625, "learning_rate": 1.5248796680497926e-05, "loss": 1.1118, "step": 14322 }, { "epoch": 11.88630705394191, "grad_norm": 28.366657257080078, "learning_rate": 1.5248464730290458e-05, "loss": 1.0757, "step": 14323 }, { "epoch": 11.887136929460581, "grad_norm": 23.769838333129883, "learning_rate": 1.5248132780082989e-05, "loss": 1.1833, "step": 14324 }, { "epoch": 11.887966804979254, "grad_norm": 20.748003005981445, "learning_rate": 1.524780082987552e-05, "loss": 0.6837, "step": 14325 }, { "epoch": 11.888796680497926, "grad_norm": 23.230087280273438, "learning_rate": 1.5247468879668051e-05, "loss": 0.8289, "step": 14326 }, { "epoch": 11.889626556016598, "grad_norm": 24.569049835205078, "learning_rate": 1.5247136929460581e-05, "loss": 0.6778, "step": 14327 }, { "epoch": 11.89045643153527, "grad_norm": 17.37928581237793, "learning_rate": 1.5246804979253114e-05, "loss": 0.5569, "step": 14328 }, { "epoch": 11.891286307053942, "grad_norm": 25.743335723876953, "learning_rate": 1.5246473029045646e-05, "loss": 0.9398, "step": 14329 }, { "epoch": 11.892116182572614, "grad_norm": 70.98980712890625, "learning_rate": 1.5246141078838174e-05, "loss": 0.9767, "step": 14330 }, { "epoch": 11.892946058091287, "grad_norm": 34.77755355834961, "learning_rate": 1.5245809128630707e-05, "loss": 0.839, "step": 14331 }, { "epoch": 11.893775933609959, "grad_norm": 23.414142608642578, "learning_rate": 1.5245477178423239e-05, "loss": 0.5166, "step": 14332 }, { "epoch": 11.894605809128631, "grad_norm": 45.85662078857422, "learning_rate": 1.5245145228215769e-05, "loss": 1.0861, "step": 14333 }, { "epoch": 11.895435684647303, "grad_norm": 32.11171340942383, "learning_rate": 1.52448132780083e-05, "loss": 0.5638, "step": 14334 }, { "epoch": 11.896265560165975, "grad_norm": 18.738529205322266, "learning_rate": 1.5244481327800832e-05, "loss": 0.5201, "step": 14335 }, { "epoch": 11.897095435684648, "grad_norm": 42.088436126708984, "learning_rate": 1.5244149377593362e-05, "loss": 1.1262, "step": 14336 }, { "epoch": 11.89792531120332, "grad_norm": 27.0211238861084, "learning_rate": 1.5243817427385894e-05, "loss": 0.9296, "step": 14337 }, { "epoch": 11.898755186721992, "grad_norm": 25.04914093017578, "learning_rate": 1.5243485477178423e-05, "loss": 0.8261, "step": 14338 }, { "epoch": 11.899585062240664, "grad_norm": 30.800912857055664, "learning_rate": 1.5243153526970955e-05, "loss": 0.968, "step": 14339 }, { "epoch": 11.900414937759336, "grad_norm": 31.981149673461914, "learning_rate": 1.5242821576763487e-05, "loss": 1.0525, "step": 14340 }, { "epoch": 11.901244813278009, "grad_norm": 40.951385498046875, "learning_rate": 1.5242489626556019e-05, "loss": 1.0576, "step": 14341 }, { "epoch": 11.90207468879668, "grad_norm": 60.905662536621094, "learning_rate": 1.5242157676348548e-05, "loss": 0.7174, "step": 14342 }, { "epoch": 11.902904564315353, "grad_norm": 39.19316101074219, "learning_rate": 1.524182572614108e-05, "loss": 1.6608, "step": 14343 }, { "epoch": 11.903734439834025, "grad_norm": 42.067893981933594, "learning_rate": 1.5241493775933612e-05, "loss": 1.0111, "step": 14344 }, { "epoch": 11.904564315352697, "grad_norm": 26.8343563079834, "learning_rate": 1.5241161825726142e-05, "loss": 0.7601, "step": 14345 }, { "epoch": 11.90539419087137, "grad_norm": 24.196168899536133, "learning_rate": 1.5240829875518675e-05, "loss": 0.8537, "step": 14346 }, { "epoch": 11.906224066390042, "grad_norm": 59.60919952392578, "learning_rate": 1.5240497925311203e-05, "loss": 0.9979, "step": 14347 }, { "epoch": 11.907053941908714, "grad_norm": 26.836807250976562, "learning_rate": 1.5240165975103735e-05, "loss": 0.5518, "step": 14348 }, { "epoch": 11.907883817427386, "grad_norm": 54.929935455322266, "learning_rate": 1.5239834024896268e-05, "loss": 0.8291, "step": 14349 }, { "epoch": 11.908713692946058, "grad_norm": 23.915672302246094, "learning_rate": 1.52395020746888e-05, "loss": 0.813, "step": 14350 }, { "epoch": 11.90954356846473, "grad_norm": 29.03556251525879, "learning_rate": 1.5239170124481328e-05, "loss": 0.8813, "step": 14351 }, { "epoch": 11.910373443983403, "grad_norm": 42.97587203979492, "learning_rate": 1.523883817427386e-05, "loss": 1.1867, "step": 14352 }, { "epoch": 11.911203319502075, "grad_norm": 20.458362579345703, "learning_rate": 1.5238506224066391e-05, "loss": 0.6992, "step": 14353 }, { "epoch": 11.912033195020747, "grad_norm": 28.538192749023438, "learning_rate": 1.5238174273858923e-05, "loss": 1.07, "step": 14354 }, { "epoch": 11.91286307053942, "grad_norm": 33.26719284057617, "learning_rate": 1.5237842323651453e-05, "loss": 0.8777, "step": 14355 }, { "epoch": 11.913692946058092, "grad_norm": 61.052040100097656, "learning_rate": 1.5237510373443984e-05, "loss": 0.7239, "step": 14356 }, { "epoch": 11.914522821576764, "grad_norm": 37.9050178527832, "learning_rate": 1.5237178423236516e-05, "loss": 0.8085, "step": 14357 }, { "epoch": 11.915352697095436, "grad_norm": 30.061613082885742, "learning_rate": 1.5236846473029048e-05, "loss": 0.758, "step": 14358 }, { "epoch": 11.916182572614108, "grad_norm": 64.94734954833984, "learning_rate": 1.5236514522821577e-05, "loss": 1.5266, "step": 14359 }, { "epoch": 11.91701244813278, "grad_norm": 33.93263244628906, "learning_rate": 1.5236182572614109e-05, "loss": 0.6761, "step": 14360 }, { "epoch": 11.917842323651453, "grad_norm": 16.202634811401367, "learning_rate": 1.5235850622406641e-05, "loss": 0.4222, "step": 14361 }, { "epoch": 11.918672199170125, "grad_norm": 15.306351661682129, "learning_rate": 1.5235518672199171e-05, "loss": 0.464, "step": 14362 }, { "epoch": 11.919502074688797, "grad_norm": 20.46955680847168, "learning_rate": 1.5235186721991702e-05, "loss": 0.8574, "step": 14363 }, { "epoch": 11.92033195020747, "grad_norm": 34.041290283203125, "learning_rate": 1.5234854771784234e-05, "loss": 1.1669, "step": 14364 }, { "epoch": 11.921161825726141, "grad_norm": 20.912525177001953, "learning_rate": 1.5234522821576764e-05, "loss": 0.7409, "step": 14365 }, { "epoch": 11.921991701244814, "grad_norm": 20.52345085144043, "learning_rate": 1.5234190871369296e-05, "loss": 1.0477, "step": 14366 }, { "epoch": 11.922821576763486, "grad_norm": 51.490299224853516, "learning_rate": 1.5233858921161825e-05, "loss": 1.8712, "step": 14367 }, { "epoch": 11.923651452282158, "grad_norm": 18.161197662353516, "learning_rate": 1.5233526970954357e-05, "loss": 0.7101, "step": 14368 }, { "epoch": 11.92448132780083, "grad_norm": 23.212106704711914, "learning_rate": 1.523319502074689e-05, "loss": 0.7899, "step": 14369 }, { "epoch": 11.925311203319502, "grad_norm": 43.79177474975586, "learning_rate": 1.5232863070539421e-05, "loss": 1.2312, "step": 14370 }, { "epoch": 11.926141078838175, "grad_norm": 52.606388092041016, "learning_rate": 1.523253112033195e-05, "loss": 1.6002, "step": 14371 }, { "epoch": 11.926970954356847, "grad_norm": 30.1981143951416, "learning_rate": 1.5232199170124482e-05, "loss": 1.1974, "step": 14372 }, { "epoch": 11.927800829875519, "grad_norm": 17.70712661743164, "learning_rate": 1.5231867219917014e-05, "loss": 1.0575, "step": 14373 }, { "epoch": 11.928630705394191, "grad_norm": 25.956022262573242, "learning_rate": 1.5231535269709545e-05, "loss": 0.9813, "step": 14374 }, { "epoch": 11.929460580912863, "grad_norm": 20.1336727142334, "learning_rate": 1.5231203319502077e-05, "loss": 0.5274, "step": 14375 }, { "epoch": 11.930290456431536, "grad_norm": 51.30802917480469, "learning_rate": 1.5230871369294606e-05, "loss": 0.7353, "step": 14376 }, { "epoch": 11.931120331950208, "grad_norm": 60.72193145751953, "learning_rate": 1.5230539419087138e-05, "loss": 1.4064, "step": 14377 }, { "epoch": 11.93195020746888, "grad_norm": 20.464143753051758, "learning_rate": 1.523020746887967e-05, "loss": 0.8344, "step": 14378 }, { "epoch": 11.932780082987552, "grad_norm": 35.750213623046875, "learning_rate": 1.5229875518672202e-05, "loss": 1.1625, "step": 14379 }, { "epoch": 11.933609958506224, "grad_norm": 44.144195556640625, "learning_rate": 1.522954356846473e-05, "loss": 1.6084, "step": 14380 }, { "epoch": 11.934439834024896, "grad_norm": 26.47698402404785, "learning_rate": 1.5229211618257263e-05, "loss": 1.3966, "step": 14381 }, { "epoch": 11.935269709543569, "grad_norm": 41.54173278808594, "learning_rate": 1.5228879668049795e-05, "loss": 1.09, "step": 14382 }, { "epoch": 11.936099585062241, "grad_norm": 33.98320770263672, "learning_rate": 1.5228547717842325e-05, "loss": 0.6654, "step": 14383 }, { "epoch": 11.936929460580913, "grad_norm": 39.085323333740234, "learning_rate": 1.5228215767634856e-05, "loss": 0.8683, "step": 14384 }, { "epoch": 11.937759336099585, "grad_norm": 31.04749870300293, "learning_rate": 1.5227883817427386e-05, "loss": 0.8379, "step": 14385 }, { "epoch": 11.938589211618257, "grad_norm": 27.795148849487305, "learning_rate": 1.5227551867219918e-05, "loss": 0.6833, "step": 14386 }, { "epoch": 11.93941908713693, "grad_norm": 28.34585189819336, "learning_rate": 1.522721991701245e-05, "loss": 0.7774, "step": 14387 }, { "epoch": 11.940248962655602, "grad_norm": 23.354679107666016, "learning_rate": 1.5226887966804979e-05, "loss": 0.7013, "step": 14388 }, { "epoch": 11.941078838174274, "grad_norm": 19.77083969116211, "learning_rate": 1.5226556016597511e-05, "loss": 0.7922, "step": 14389 }, { "epoch": 11.941908713692946, "grad_norm": 29.165523529052734, "learning_rate": 1.5226224066390043e-05, "loss": 0.588, "step": 14390 }, { "epoch": 11.942738589211618, "grad_norm": 46.14681625366211, "learning_rate": 1.5225892116182575e-05, "loss": 1.4996, "step": 14391 }, { "epoch": 11.94356846473029, "grad_norm": 31.963348388671875, "learning_rate": 1.5225560165975104e-05, "loss": 0.9768, "step": 14392 }, { "epoch": 11.944398340248963, "grad_norm": 21.642131805419922, "learning_rate": 1.5225228215767636e-05, "loss": 0.5625, "step": 14393 }, { "epoch": 11.945228215767635, "grad_norm": 33.41478729248047, "learning_rate": 1.5224896265560167e-05, "loss": 1.5025, "step": 14394 }, { "epoch": 11.946058091286307, "grad_norm": 29.842498779296875, "learning_rate": 1.5224564315352699e-05, "loss": 0.9606, "step": 14395 }, { "epoch": 11.94688796680498, "grad_norm": 28.58875846862793, "learning_rate": 1.5224232365145229e-05, "loss": 0.8021, "step": 14396 }, { "epoch": 11.947717842323652, "grad_norm": 22.095352172851562, "learning_rate": 1.522390041493776e-05, "loss": 1.0657, "step": 14397 }, { "epoch": 11.948547717842324, "grad_norm": 52.93907165527344, "learning_rate": 1.5223568464730292e-05, "loss": 1.774, "step": 14398 }, { "epoch": 11.949377593360996, "grad_norm": 20.39304542541504, "learning_rate": 1.5223236514522824e-05, "loss": 0.9457, "step": 14399 }, { "epoch": 11.950207468879668, "grad_norm": 20.452545166015625, "learning_rate": 1.5222904564315354e-05, "loss": 0.5334, "step": 14400 }, { "epoch": 11.95103734439834, "grad_norm": 66.4724349975586, "learning_rate": 1.5222572614107885e-05, "loss": 0.8652, "step": 14401 }, { "epoch": 11.951867219917013, "grad_norm": 27.52678871154785, "learning_rate": 1.5222240663900417e-05, "loss": 0.8585, "step": 14402 }, { "epoch": 11.952697095435685, "grad_norm": 21.048473358154297, "learning_rate": 1.5221908713692947e-05, "loss": 0.8054, "step": 14403 }, { "epoch": 11.953526970954357, "grad_norm": 27.003801345825195, "learning_rate": 1.522157676348548e-05, "loss": 1.0047, "step": 14404 }, { "epoch": 11.95435684647303, "grad_norm": 30.183605194091797, "learning_rate": 1.522124481327801e-05, "loss": 0.9222, "step": 14405 }, { "epoch": 11.955186721991701, "grad_norm": 27.595224380493164, "learning_rate": 1.522091286307054e-05, "loss": 0.9187, "step": 14406 }, { "epoch": 11.956016597510374, "grad_norm": 51.78797912597656, "learning_rate": 1.5220580912863072e-05, "loss": 0.6715, "step": 14407 }, { "epoch": 11.956846473029046, "grad_norm": 29.744487762451172, "learning_rate": 1.5220248962655604e-05, "loss": 1.3678, "step": 14408 }, { "epoch": 11.957676348547718, "grad_norm": 33.20219039916992, "learning_rate": 1.5219917012448133e-05, "loss": 0.5704, "step": 14409 }, { "epoch": 11.95850622406639, "grad_norm": 97.8574447631836, "learning_rate": 1.5219585062240665e-05, "loss": 0.9189, "step": 14410 }, { "epoch": 11.959336099585062, "grad_norm": 73.27848052978516, "learning_rate": 1.5219253112033197e-05, "loss": 0.6927, "step": 14411 }, { "epoch": 11.960165975103735, "grad_norm": 23.193395614624023, "learning_rate": 1.5218921161825728e-05, "loss": 1.1368, "step": 14412 }, { "epoch": 11.960995850622407, "grad_norm": 32.379215240478516, "learning_rate": 1.5218589211618258e-05, "loss": 1.426, "step": 14413 }, { "epoch": 11.961825726141079, "grad_norm": 28.8500919342041, "learning_rate": 1.5218257261410788e-05, "loss": 1.7479, "step": 14414 }, { "epoch": 11.962655601659751, "grad_norm": 37.12906265258789, "learning_rate": 1.521792531120332e-05, "loss": 0.8613, "step": 14415 }, { "epoch": 11.963485477178423, "grad_norm": 27.45022201538086, "learning_rate": 1.5217593360995853e-05, "loss": 1.0165, "step": 14416 }, { "epoch": 11.964315352697096, "grad_norm": 27.836122512817383, "learning_rate": 1.5217261410788381e-05, "loss": 1.1558, "step": 14417 }, { "epoch": 11.965145228215768, "grad_norm": 26.669069290161133, "learning_rate": 1.5216929460580913e-05, "loss": 0.9329, "step": 14418 }, { "epoch": 11.96597510373444, "grad_norm": 17.534717559814453, "learning_rate": 1.5216597510373446e-05, "loss": 0.6512, "step": 14419 }, { "epoch": 11.966804979253112, "grad_norm": 135.5391845703125, "learning_rate": 1.5216265560165978e-05, "loss": 0.6123, "step": 14420 }, { "epoch": 11.967634854771784, "grad_norm": 32.735191345214844, "learning_rate": 1.5215933609958506e-05, "loss": 0.9264, "step": 14421 }, { "epoch": 11.968464730290457, "grad_norm": 36.206974029541016, "learning_rate": 1.5215601659751039e-05, "loss": 1.31, "step": 14422 }, { "epoch": 11.969294605809129, "grad_norm": 37.77040481567383, "learning_rate": 1.5215269709543569e-05, "loss": 0.9922, "step": 14423 }, { "epoch": 11.970124481327801, "grad_norm": 9.461751937866211, "learning_rate": 1.5214937759336101e-05, "loss": 0.2585, "step": 14424 }, { "epoch": 11.970954356846473, "grad_norm": 31.414960861206055, "learning_rate": 1.5214605809128633e-05, "loss": 0.9841, "step": 14425 }, { "epoch": 11.971784232365145, "grad_norm": 28.467607498168945, "learning_rate": 1.5214273858921162e-05, "loss": 0.9461, "step": 14426 }, { "epoch": 11.972614107883818, "grad_norm": 34.505104064941406, "learning_rate": 1.5213941908713694e-05, "loss": 1.2169, "step": 14427 }, { "epoch": 11.97344398340249, "grad_norm": 34.904563903808594, "learning_rate": 1.5213609958506226e-05, "loss": 0.7137, "step": 14428 }, { "epoch": 11.974273858921162, "grad_norm": 39.5076904296875, "learning_rate": 1.5213278008298758e-05, "loss": 0.8854, "step": 14429 }, { "epoch": 11.975103734439834, "grad_norm": 34.31503677368164, "learning_rate": 1.5212946058091287e-05, "loss": 1.4872, "step": 14430 }, { "epoch": 11.975933609958506, "grad_norm": 44.93986511230469, "learning_rate": 1.5212614107883819e-05, "loss": 1.6806, "step": 14431 }, { "epoch": 11.976763485477179, "grad_norm": 27.529708862304688, "learning_rate": 1.521228215767635e-05, "loss": 1.1012, "step": 14432 }, { "epoch": 11.97759336099585, "grad_norm": 36.86647033691406, "learning_rate": 1.5211950207468882e-05, "loss": 1.2596, "step": 14433 }, { "epoch": 11.978423236514523, "grad_norm": 32.437469482421875, "learning_rate": 1.5211618257261412e-05, "loss": 0.969, "step": 14434 }, { "epoch": 11.979253112033195, "grad_norm": 23.116487503051758, "learning_rate": 1.5211286307053942e-05, "loss": 0.6126, "step": 14435 }, { "epoch": 11.980082987551867, "grad_norm": 25.592138290405273, "learning_rate": 1.5210954356846474e-05, "loss": 1.1354, "step": 14436 }, { "epoch": 11.98091286307054, "grad_norm": 46.63780975341797, "learning_rate": 1.5210622406639007e-05, "loss": 1.04, "step": 14437 }, { "epoch": 11.981742738589212, "grad_norm": 36.52238082885742, "learning_rate": 1.5210290456431535e-05, "loss": 1.7416, "step": 14438 }, { "epoch": 11.982572614107884, "grad_norm": 57.086395263671875, "learning_rate": 1.5209958506224067e-05, "loss": 1.0001, "step": 14439 }, { "epoch": 11.983402489626556, "grad_norm": 49.95214080810547, "learning_rate": 1.52096265560166e-05, "loss": 1.2761, "step": 14440 }, { "epoch": 11.984232365145228, "grad_norm": 64.39794158935547, "learning_rate": 1.520929460580913e-05, "loss": 1.9074, "step": 14441 }, { "epoch": 11.9850622406639, "grad_norm": 44.1442756652832, "learning_rate": 1.520896265560166e-05, "loss": 1.5679, "step": 14442 }, { "epoch": 11.985892116182573, "grad_norm": 40.165531158447266, "learning_rate": 1.5208630705394192e-05, "loss": 1.1497, "step": 14443 }, { "epoch": 11.986721991701245, "grad_norm": 27.619022369384766, "learning_rate": 1.5208298755186723e-05, "loss": 0.5973, "step": 14444 }, { "epoch": 11.987551867219917, "grad_norm": 46.80043029785156, "learning_rate": 1.5207966804979255e-05, "loss": 0.7382, "step": 14445 }, { "epoch": 11.98838174273859, "grad_norm": 36.26533508300781, "learning_rate": 1.5207634854771784e-05, "loss": 1.5124, "step": 14446 }, { "epoch": 11.989211618257261, "grad_norm": 22.652421951293945, "learning_rate": 1.5207302904564316e-05, "loss": 0.9474, "step": 14447 }, { "epoch": 11.990041493775934, "grad_norm": 16.02031898498535, "learning_rate": 1.5206970954356848e-05, "loss": 0.9283, "step": 14448 }, { "epoch": 11.990871369294606, "grad_norm": 17.787073135375977, "learning_rate": 1.520663900414938e-05, "loss": 0.5207, "step": 14449 }, { "epoch": 11.991701244813278, "grad_norm": 50.66590118408203, "learning_rate": 1.5206307053941909e-05, "loss": 1.3407, "step": 14450 }, { "epoch": 11.99253112033195, "grad_norm": 17.479915618896484, "learning_rate": 1.5205975103734441e-05, "loss": 0.5376, "step": 14451 }, { "epoch": 11.993360995850622, "grad_norm": 25.968109130859375, "learning_rate": 1.5205643153526973e-05, "loss": 0.5323, "step": 14452 }, { "epoch": 11.994190871369295, "grad_norm": 39.41802978515625, "learning_rate": 1.5205311203319503e-05, "loss": 0.7822, "step": 14453 }, { "epoch": 11.995020746887967, "grad_norm": 45.55524444580078, "learning_rate": 1.5204979253112035e-05, "loss": 1.2676, "step": 14454 }, { "epoch": 11.995850622406639, "grad_norm": 28.952760696411133, "learning_rate": 1.5204647302904564e-05, "loss": 1.7392, "step": 14455 }, { "epoch": 11.996680497925311, "grad_norm": 40.81464767456055, "learning_rate": 1.5204315352697096e-05, "loss": 0.8322, "step": 14456 }, { "epoch": 11.997510373443983, "grad_norm": 24.95952796936035, "learning_rate": 1.5203983402489628e-05, "loss": 1.3006, "step": 14457 }, { "epoch": 11.998340248962656, "grad_norm": 36.99165725708008, "learning_rate": 1.520365145228216e-05, "loss": 1.0551, "step": 14458 }, { "epoch": 11.999170124481328, "grad_norm": 27.468292236328125, "learning_rate": 1.520331950207469e-05, "loss": 1.2457, "step": 14459 }, { "epoch": 12.0, "grad_norm": 21.77971839904785, "learning_rate": 1.5202987551867221e-05, "loss": 1.5409, "step": 14460 }, { "epoch": 12.000829875518672, "grad_norm": 17.933176040649414, "learning_rate": 1.5202655601659753e-05, "loss": 0.5018, "step": 14461 }, { "epoch": 12.001659751037344, "grad_norm": 20.19286346435547, "learning_rate": 1.5202323651452284e-05, "loss": 0.6359, "step": 14462 }, { "epoch": 12.002489626556017, "grad_norm": 15.35857105255127, "learning_rate": 1.5201991701244814e-05, "loss": 0.4148, "step": 14463 }, { "epoch": 12.003319502074689, "grad_norm": 23.0023250579834, "learning_rate": 1.5201659751037345e-05, "loss": 1.1904, "step": 14464 }, { "epoch": 12.004149377593361, "grad_norm": 25.31648063659668, "learning_rate": 1.5201327800829877e-05, "loss": 0.9357, "step": 14465 }, { "epoch": 12.004979253112033, "grad_norm": 29.847488403320312, "learning_rate": 1.5200995850622409e-05, "loss": 1.2917, "step": 14466 }, { "epoch": 12.005809128630705, "grad_norm": 21.125886917114258, "learning_rate": 1.5200663900414938e-05, "loss": 0.6467, "step": 14467 }, { "epoch": 12.006639004149378, "grad_norm": 28.455703735351562, "learning_rate": 1.520033195020747e-05, "loss": 1.1474, "step": 14468 }, { "epoch": 12.00746887966805, "grad_norm": 22.14291763305664, "learning_rate": 1.5200000000000002e-05, "loss": 0.9662, "step": 14469 }, { "epoch": 12.008298755186722, "grad_norm": 31.28782844543457, "learning_rate": 1.5199668049792532e-05, "loss": 0.9658, "step": 14470 }, { "epoch": 12.009128630705394, "grad_norm": 17.93338394165039, "learning_rate": 1.5199336099585063e-05, "loss": 0.7547, "step": 14471 }, { "epoch": 12.009958506224066, "grad_norm": 22.99774169921875, "learning_rate": 1.5199004149377595e-05, "loss": 0.9657, "step": 14472 }, { "epoch": 12.010788381742739, "grad_norm": 40.42129898071289, "learning_rate": 1.5198672199170125e-05, "loss": 1.6935, "step": 14473 }, { "epoch": 12.01161825726141, "grad_norm": 37.01362991333008, "learning_rate": 1.5198340248962657e-05, "loss": 0.8803, "step": 14474 }, { "epoch": 12.012448132780083, "grad_norm": 33.549625396728516, "learning_rate": 1.5198008298755188e-05, "loss": 0.8654, "step": 14475 }, { "epoch": 12.013278008298755, "grad_norm": 49.41689682006836, "learning_rate": 1.5197676348547718e-05, "loss": 0.5041, "step": 14476 }, { "epoch": 12.014107883817427, "grad_norm": 32.412193298339844, "learning_rate": 1.519734439834025e-05, "loss": 0.6929, "step": 14477 }, { "epoch": 12.0149377593361, "grad_norm": 20.356412887573242, "learning_rate": 1.5197012448132782e-05, "loss": 0.6337, "step": 14478 }, { "epoch": 12.015767634854772, "grad_norm": 26.17376136779785, "learning_rate": 1.5196680497925313e-05, "loss": 0.8504, "step": 14479 }, { "epoch": 12.016597510373444, "grad_norm": 26.87525749206543, "learning_rate": 1.5196348547717843e-05, "loss": 0.8045, "step": 14480 }, { "epoch": 12.017427385892116, "grad_norm": 25.2560977935791, "learning_rate": 1.5196016597510375e-05, "loss": 0.4367, "step": 14481 }, { "epoch": 12.018257261410788, "grad_norm": 58.87041473388672, "learning_rate": 1.5195684647302906e-05, "loss": 0.7402, "step": 14482 }, { "epoch": 12.01908713692946, "grad_norm": 22.009506225585938, "learning_rate": 1.5195352697095438e-05, "loss": 0.6275, "step": 14483 }, { "epoch": 12.019917012448133, "grad_norm": 49.44062423706055, "learning_rate": 1.5195020746887967e-05, "loss": 0.5571, "step": 14484 }, { "epoch": 12.020746887966805, "grad_norm": 21.342641830444336, "learning_rate": 1.5194688796680499e-05, "loss": 0.5674, "step": 14485 }, { "epoch": 12.021576763485477, "grad_norm": 20.58146095275879, "learning_rate": 1.519435684647303e-05, "loss": 1.0023, "step": 14486 }, { "epoch": 12.02240663900415, "grad_norm": 27.111923217773438, "learning_rate": 1.5194024896265563e-05, "loss": 0.9817, "step": 14487 }, { "epoch": 12.023236514522821, "grad_norm": 45.97806930541992, "learning_rate": 1.5193692946058092e-05, "loss": 0.9174, "step": 14488 }, { "epoch": 12.024066390041494, "grad_norm": 28.687326431274414, "learning_rate": 1.5193360995850624e-05, "loss": 0.9825, "step": 14489 }, { "epoch": 12.024896265560166, "grad_norm": 31.080368041992188, "learning_rate": 1.5193029045643156e-05, "loss": 0.7596, "step": 14490 }, { "epoch": 12.025726141078838, "grad_norm": 20.676795959472656, "learning_rate": 1.5192697095435686e-05, "loss": 0.5952, "step": 14491 }, { "epoch": 12.02655601659751, "grad_norm": 40.128013610839844, "learning_rate": 1.5192365145228217e-05, "loss": 0.5757, "step": 14492 }, { "epoch": 12.027385892116182, "grad_norm": 19.97398567199707, "learning_rate": 1.5192033195020747e-05, "loss": 0.5171, "step": 14493 }, { "epoch": 12.028215767634855, "grad_norm": 31.738508224487305, "learning_rate": 1.5191701244813279e-05, "loss": 0.9884, "step": 14494 }, { "epoch": 12.029045643153527, "grad_norm": 41.0209846496582, "learning_rate": 1.5191369294605811e-05, "loss": 1.2047, "step": 14495 }, { "epoch": 12.029875518672199, "grad_norm": 9.880369186401367, "learning_rate": 1.519103734439834e-05, "loss": 0.2542, "step": 14496 }, { "epoch": 12.030705394190871, "grad_norm": 27.315351486206055, "learning_rate": 1.5190705394190872e-05, "loss": 0.8596, "step": 14497 }, { "epoch": 12.031535269709543, "grad_norm": 37.94832229614258, "learning_rate": 1.5190373443983404e-05, "loss": 1.04, "step": 14498 }, { "epoch": 12.032365145228216, "grad_norm": 27.235164642333984, "learning_rate": 1.5190041493775936e-05, "loss": 0.819, "step": 14499 }, { "epoch": 12.033195020746888, "grad_norm": 26.488218307495117, "learning_rate": 1.5189709543568465e-05, "loss": 0.9222, "step": 14500 }, { "epoch": 12.03402489626556, "grad_norm": 38.643470764160156, "learning_rate": 1.5189377593360997e-05, "loss": 1.3558, "step": 14501 }, { "epoch": 12.034854771784232, "grad_norm": 25.346065521240234, "learning_rate": 1.5189045643153528e-05, "loss": 0.8893, "step": 14502 }, { "epoch": 12.035684647302904, "grad_norm": 21.352197647094727, "learning_rate": 1.518871369294606e-05, "loss": 0.6296, "step": 14503 }, { "epoch": 12.036514522821577, "grad_norm": 31.54058837890625, "learning_rate": 1.5188381742738592e-05, "loss": 1.08, "step": 14504 }, { "epoch": 12.037344398340249, "grad_norm": 62.430572509765625, "learning_rate": 1.518804979253112e-05, "loss": 0.6969, "step": 14505 }, { "epoch": 12.038174273858921, "grad_norm": 42.73781204223633, "learning_rate": 1.5187717842323653e-05, "loss": 0.6897, "step": 14506 }, { "epoch": 12.039004149377593, "grad_norm": 23.739686965942383, "learning_rate": 1.5187385892116185e-05, "loss": 0.7701, "step": 14507 }, { "epoch": 12.039834024896265, "grad_norm": 26.06785774230957, "learning_rate": 1.5187053941908717e-05, "loss": 1.238, "step": 14508 }, { "epoch": 12.040663900414938, "grad_norm": 25.550302505493164, "learning_rate": 1.5186721991701246e-05, "loss": 0.4382, "step": 14509 }, { "epoch": 12.04149377593361, "grad_norm": 25.47640037536621, "learning_rate": 1.5186390041493778e-05, "loss": 1.1136, "step": 14510 }, { "epoch": 12.042323651452282, "grad_norm": 22.466577529907227, "learning_rate": 1.5186058091286308e-05, "loss": 0.6278, "step": 14511 }, { "epoch": 12.043153526970954, "grad_norm": 49.40440368652344, "learning_rate": 1.518572614107884e-05, "loss": 0.6586, "step": 14512 }, { "epoch": 12.043983402489626, "grad_norm": 37.13346481323242, "learning_rate": 1.518539419087137e-05, "loss": 1.2033, "step": 14513 }, { "epoch": 12.044813278008299, "grad_norm": 39.80511474609375, "learning_rate": 1.5185062240663901e-05, "loss": 1.0102, "step": 14514 }, { "epoch": 12.04564315352697, "grad_norm": 53.85580825805664, "learning_rate": 1.5184730290456433e-05, "loss": 0.585, "step": 14515 }, { "epoch": 12.046473029045643, "grad_norm": 41.490291595458984, "learning_rate": 1.5184398340248965e-05, "loss": 0.7855, "step": 14516 }, { "epoch": 12.047302904564315, "grad_norm": 27.30461883544922, "learning_rate": 1.5184066390041494e-05, "loss": 0.6755, "step": 14517 }, { "epoch": 12.048132780082987, "grad_norm": 27.330894470214844, "learning_rate": 1.5183734439834026e-05, "loss": 0.665, "step": 14518 }, { "epoch": 12.04896265560166, "grad_norm": 59.81740951538086, "learning_rate": 1.5183402489626558e-05, "loss": 1.8165, "step": 14519 }, { "epoch": 12.049792531120332, "grad_norm": 32.60542297363281, "learning_rate": 1.5183070539419089e-05, "loss": 0.9512, "step": 14520 }, { "epoch": 12.050622406639004, "grad_norm": 56.8011589050293, "learning_rate": 1.5182738589211619e-05, "loss": 1.2864, "step": 14521 }, { "epoch": 12.051452282157676, "grad_norm": 15.593744277954102, "learning_rate": 1.5182406639004151e-05, "loss": 0.2829, "step": 14522 }, { "epoch": 12.052282157676348, "grad_norm": 35.68610763549805, "learning_rate": 1.5182074688796681e-05, "loss": 0.7373, "step": 14523 }, { "epoch": 12.05311203319502, "grad_norm": 61.4327392578125, "learning_rate": 1.5181742738589214e-05, "loss": 1.377, "step": 14524 }, { "epoch": 12.053941908713693, "grad_norm": 32.85201644897461, "learning_rate": 1.5181410788381742e-05, "loss": 0.9569, "step": 14525 }, { "epoch": 12.054771784232365, "grad_norm": 17.347883224487305, "learning_rate": 1.5181078838174274e-05, "loss": 0.4363, "step": 14526 }, { "epoch": 12.055601659751037, "grad_norm": 17.402990341186523, "learning_rate": 1.5180746887966806e-05, "loss": 0.4267, "step": 14527 }, { "epoch": 12.05643153526971, "grad_norm": 70.90033721923828, "learning_rate": 1.5180414937759339e-05, "loss": 1.2892, "step": 14528 }, { "epoch": 12.057261410788382, "grad_norm": 20.33357810974121, "learning_rate": 1.5180082987551867e-05, "loss": 0.4539, "step": 14529 }, { "epoch": 12.058091286307054, "grad_norm": 31.64490509033203, "learning_rate": 1.51797510373444e-05, "loss": 1.026, "step": 14530 }, { "epoch": 12.058921161825726, "grad_norm": 41.307247161865234, "learning_rate": 1.517941908713693e-05, "loss": 1.0345, "step": 14531 }, { "epoch": 12.059751037344398, "grad_norm": 25.520740509033203, "learning_rate": 1.5179087136929462e-05, "loss": 0.868, "step": 14532 }, { "epoch": 12.06058091286307, "grad_norm": 21.40387535095215, "learning_rate": 1.5178755186721994e-05, "loss": 0.8117, "step": 14533 }, { "epoch": 12.061410788381743, "grad_norm": 34.58030700683594, "learning_rate": 1.5178423236514523e-05, "loss": 0.6375, "step": 14534 }, { "epoch": 12.062240663900415, "grad_norm": 29.58501625061035, "learning_rate": 1.5178091286307055e-05, "loss": 0.6927, "step": 14535 }, { "epoch": 12.063070539419087, "grad_norm": 42.1685676574707, "learning_rate": 1.5177759336099587e-05, "loss": 0.7653, "step": 14536 }, { "epoch": 12.063900414937759, "grad_norm": 28.943498611450195, "learning_rate": 1.5177427385892119e-05, "loss": 0.6206, "step": 14537 }, { "epoch": 12.064730290456431, "grad_norm": 21.684789657592773, "learning_rate": 1.5177095435684648e-05, "loss": 0.6182, "step": 14538 }, { "epoch": 12.065560165975104, "grad_norm": 64.5214614868164, "learning_rate": 1.517676348547718e-05, "loss": 1.8984, "step": 14539 }, { "epoch": 12.066390041493776, "grad_norm": 77.17687225341797, "learning_rate": 1.517643153526971e-05, "loss": 0.7792, "step": 14540 }, { "epoch": 12.067219917012448, "grad_norm": 27.005531311035156, "learning_rate": 1.5176099585062242e-05, "loss": 0.8394, "step": 14541 }, { "epoch": 12.06804979253112, "grad_norm": 33.92257308959961, "learning_rate": 1.5175767634854773e-05, "loss": 0.6658, "step": 14542 }, { "epoch": 12.068879668049792, "grad_norm": 43.897281646728516, "learning_rate": 1.5175435684647303e-05, "loss": 0.7567, "step": 14543 }, { "epoch": 12.069709543568464, "grad_norm": 33.40607452392578, "learning_rate": 1.5175103734439835e-05, "loss": 0.908, "step": 14544 }, { "epoch": 12.070539419087137, "grad_norm": 18.43226432800293, "learning_rate": 1.5174771784232367e-05, "loss": 0.2906, "step": 14545 }, { "epoch": 12.071369294605809, "grad_norm": 30.54247283935547, "learning_rate": 1.5174439834024896e-05, "loss": 0.5593, "step": 14546 }, { "epoch": 12.072199170124481, "grad_norm": 27.18871307373047, "learning_rate": 1.5174107883817428e-05, "loss": 0.7467, "step": 14547 }, { "epoch": 12.073029045643153, "grad_norm": 33.55829620361328, "learning_rate": 1.517377593360996e-05, "loss": 0.8143, "step": 14548 }, { "epoch": 12.073858921161825, "grad_norm": 35.026424407958984, "learning_rate": 1.517344398340249e-05, "loss": 0.6882, "step": 14549 }, { "epoch": 12.074688796680498, "grad_norm": 32.7476692199707, "learning_rate": 1.5173112033195021e-05, "loss": 0.6804, "step": 14550 }, { "epoch": 12.07551867219917, "grad_norm": 29.364463806152344, "learning_rate": 1.5172780082987553e-05, "loss": 0.728, "step": 14551 }, { "epoch": 12.076348547717842, "grad_norm": 68.34780883789062, "learning_rate": 1.5172448132780084e-05, "loss": 1.6592, "step": 14552 }, { "epoch": 12.077178423236514, "grad_norm": 40.069400787353516, "learning_rate": 1.5172116182572616e-05, "loss": 0.6775, "step": 14553 }, { "epoch": 12.078008298755186, "grad_norm": 38.83357238769531, "learning_rate": 1.5171784232365145e-05, "loss": 0.591, "step": 14554 }, { "epoch": 12.078838174273859, "grad_norm": 28.59935188293457, "learning_rate": 1.5171452282157677e-05, "loss": 0.6976, "step": 14555 }, { "epoch": 12.07966804979253, "grad_norm": 43.28636169433594, "learning_rate": 1.5171120331950209e-05, "loss": 0.8841, "step": 14556 }, { "epoch": 12.080497925311203, "grad_norm": 126.35541534423828, "learning_rate": 1.5170788381742741e-05, "loss": 1.18, "step": 14557 }, { "epoch": 12.081327800829875, "grad_norm": 29.610187530517578, "learning_rate": 1.5170456431535271e-05, "loss": 1.332, "step": 14558 }, { "epoch": 12.082157676348547, "grad_norm": 30.3592529296875, "learning_rate": 1.5170124481327802e-05, "loss": 1.0018, "step": 14559 }, { "epoch": 12.08298755186722, "grad_norm": 29.860809326171875, "learning_rate": 1.5169792531120334e-05, "loss": 0.9148, "step": 14560 }, { "epoch": 12.083817427385892, "grad_norm": 37.61275863647461, "learning_rate": 1.5169460580912864e-05, "loss": 0.9201, "step": 14561 }, { "epoch": 12.084647302904564, "grad_norm": 51.01354217529297, "learning_rate": 1.5169128630705396e-05, "loss": 0.9278, "step": 14562 }, { "epoch": 12.085477178423236, "grad_norm": 39.538970947265625, "learning_rate": 1.5168796680497925e-05, "loss": 0.8796, "step": 14563 }, { "epoch": 12.086307053941908, "grad_norm": 41.930423736572266, "learning_rate": 1.5168464730290457e-05, "loss": 1.1069, "step": 14564 }, { "epoch": 12.08713692946058, "grad_norm": 25.397865295410156, "learning_rate": 1.516813278008299e-05, "loss": 0.6782, "step": 14565 }, { "epoch": 12.087966804979253, "grad_norm": 24.749603271484375, "learning_rate": 1.5167800829875521e-05, "loss": 0.7398, "step": 14566 }, { "epoch": 12.088796680497925, "grad_norm": 19.61667251586914, "learning_rate": 1.516746887966805e-05, "loss": 0.6283, "step": 14567 }, { "epoch": 12.089626556016597, "grad_norm": 27.50466537475586, "learning_rate": 1.5167136929460582e-05, "loss": 1.1523, "step": 14568 }, { "epoch": 12.09045643153527, "grad_norm": 24.81903076171875, "learning_rate": 1.5166804979253114e-05, "loss": 0.6069, "step": 14569 }, { "epoch": 12.091286307053942, "grad_norm": 30.488792419433594, "learning_rate": 1.5166473029045645e-05, "loss": 1.209, "step": 14570 }, { "epoch": 12.092116182572614, "grad_norm": 36.461368560791016, "learning_rate": 1.5166141078838175e-05, "loss": 1.1935, "step": 14571 }, { "epoch": 12.092946058091286, "grad_norm": 23.802413940429688, "learning_rate": 1.5165809128630706e-05, "loss": 0.6802, "step": 14572 }, { "epoch": 12.093775933609958, "grad_norm": 24.843196868896484, "learning_rate": 1.5165477178423238e-05, "loss": 0.6262, "step": 14573 }, { "epoch": 12.09460580912863, "grad_norm": 41.27593994140625, "learning_rate": 1.516514522821577e-05, "loss": 1.4227, "step": 14574 }, { "epoch": 12.095435684647303, "grad_norm": 18.50208854675293, "learning_rate": 1.5164813278008299e-05, "loss": 0.4102, "step": 14575 }, { "epoch": 12.096265560165975, "grad_norm": 51.30118179321289, "learning_rate": 1.516448132780083e-05, "loss": 0.9936, "step": 14576 }, { "epoch": 12.097095435684647, "grad_norm": 41.07271194458008, "learning_rate": 1.5164149377593363e-05, "loss": 1.5398, "step": 14577 }, { "epoch": 12.09792531120332, "grad_norm": 41.13874053955078, "learning_rate": 1.5163817427385895e-05, "loss": 0.5989, "step": 14578 }, { "epoch": 12.098755186721991, "grad_norm": 10.569453239440918, "learning_rate": 1.5163485477178424e-05, "loss": 0.2292, "step": 14579 }, { "epoch": 12.099585062240664, "grad_norm": 29.432767868041992, "learning_rate": 1.5163153526970956e-05, "loss": 0.8703, "step": 14580 }, { "epoch": 12.100414937759336, "grad_norm": 49.47548294067383, "learning_rate": 1.5162821576763486e-05, "loss": 0.7365, "step": 14581 }, { "epoch": 12.101244813278008, "grad_norm": 14.7730073928833, "learning_rate": 1.5162489626556018e-05, "loss": 0.4037, "step": 14582 }, { "epoch": 12.10207468879668, "grad_norm": 41.02260208129883, "learning_rate": 1.5162157676348549e-05, "loss": 1.1005, "step": 14583 }, { "epoch": 12.102904564315352, "grad_norm": 27.393802642822266, "learning_rate": 1.5161825726141079e-05, "loss": 0.4707, "step": 14584 }, { "epoch": 12.103734439834025, "grad_norm": 12.87627124786377, "learning_rate": 1.5161493775933611e-05, "loss": 0.3119, "step": 14585 }, { "epoch": 12.104564315352697, "grad_norm": 48.81438064575195, "learning_rate": 1.5161161825726143e-05, "loss": 1.1797, "step": 14586 }, { "epoch": 12.105394190871369, "grad_norm": 53.22785568237305, "learning_rate": 1.5160829875518674e-05, "loss": 1.0888, "step": 14587 }, { "epoch": 12.106224066390041, "grad_norm": 36.17035675048828, "learning_rate": 1.5160497925311204e-05, "loss": 1.0524, "step": 14588 }, { "epoch": 12.107053941908713, "grad_norm": 28.728883743286133, "learning_rate": 1.5160165975103736e-05, "loss": 1.0091, "step": 14589 }, { "epoch": 12.107883817427386, "grad_norm": 41.53778839111328, "learning_rate": 1.5159834024896267e-05, "loss": 0.7928, "step": 14590 }, { "epoch": 12.108713692946058, "grad_norm": 32.99716567993164, "learning_rate": 1.5159502074688799e-05, "loss": 0.7108, "step": 14591 }, { "epoch": 12.10954356846473, "grad_norm": 21.356849670410156, "learning_rate": 1.5159170124481329e-05, "loss": 0.5536, "step": 14592 }, { "epoch": 12.110373443983402, "grad_norm": 55.03199005126953, "learning_rate": 1.515883817427386e-05, "loss": 0.7366, "step": 14593 }, { "epoch": 12.111203319502074, "grad_norm": 31.73851203918457, "learning_rate": 1.5158506224066392e-05, "loss": 0.8026, "step": 14594 }, { "epoch": 12.112033195020746, "grad_norm": 19.070266723632812, "learning_rate": 1.5158174273858924e-05, "loss": 0.4019, "step": 14595 }, { "epoch": 12.112863070539419, "grad_norm": 21.302541732788086, "learning_rate": 1.5157842323651452e-05, "loss": 0.4459, "step": 14596 }, { "epoch": 12.11369294605809, "grad_norm": 47.956504821777344, "learning_rate": 1.5157510373443985e-05, "loss": 1.0219, "step": 14597 }, { "epoch": 12.114522821576763, "grad_norm": 37.56644058227539, "learning_rate": 1.5157178423236517e-05, "loss": 1.1338, "step": 14598 }, { "epoch": 12.115352697095435, "grad_norm": 37.70013427734375, "learning_rate": 1.5156846473029047e-05, "loss": 0.7665, "step": 14599 }, { "epoch": 12.116182572614107, "grad_norm": 21.55307960510254, "learning_rate": 1.5156514522821578e-05, "loss": 0.8018, "step": 14600 }, { "epoch": 12.11701244813278, "grad_norm": 67.55640411376953, "learning_rate": 1.5156182572614108e-05, "loss": 1.614, "step": 14601 }, { "epoch": 12.117842323651452, "grad_norm": 43.621131896972656, "learning_rate": 1.515585062240664e-05, "loss": 1.0474, "step": 14602 }, { "epoch": 12.118672199170124, "grad_norm": 47.86866760253906, "learning_rate": 1.5155518672199172e-05, "loss": 0.9742, "step": 14603 }, { "epoch": 12.119502074688796, "grad_norm": 33.799556732177734, "learning_rate": 1.5155186721991701e-05, "loss": 0.6481, "step": 14604 }, { "epoch": 12.120331950207468, "grad_norm": 19.68724250793457, "learning_rate": 1.5154854771784233e-05, "loss": 0.3963, "step": 14605 }, { "epoch": 12.12116182572614, "grad_norm": 44.69550323486328, "learning_rate": 1.5154522821576765e-05, "loss": 0.8732, "step": 14606 }, { "epoch": 12.121991701244813, "grad_norm": 39.86317825317383, "learning_rate": 1.5154190871369297e-05, "loss": 1.3543, "step": 14607 }, { "epoch": 12.122821576763485, "grad_norm": 100.07255554199219, "learning_rate": 1.5153858921161826e-05, "loss": 0.8487, "step": 14608 }, { "epoch": 12.123651452282157, "grad_norm": 38.955692291259766, "learning_rate": 1.5153526970954358e-05, "loss": 1.2368, "step": 14609 }, { "epoch": 12.12448132780083, "grad_norm": 32.35308074951172, "learning_rate": 1.5153195020746888e-05, "loss": 0.714, "step": 14610 }, { "epoch": 12.125311203319502, "grad_norm": 49.001041412353516, "learning_rate": 1.515286307053942e-05, "loss": 0.5388, "step": 14611 }, { "epoch": 12.126141078838174, "grad_norm": 99.59669494628906, "learning_rate": 1.5152531120331953e-05, "loss": 0.6463, "step": 14612 }, { "epoch": 12.126970954356846, "grad_norm": 33.61554718017578, "learning_rate": 1.5152199170124481e-05, "loss": 0.7107, "step": 14613 }, { "epoch": 12.127800829875518, "grad_norm": 30.38461685180664, "learning_rate": 1.5151867219917013e-05, "loss": 0.7025, "step": 14614 }, { "epoch": 12.12863070539419, "grad_norm": 31.0985050201416, "learning_rate": 1.5151535269709546e-05, "loss": 0.5395, "step": 14615 }, { "epoch": 12.129460580912863, "grad_norm": 18.323272705078125, "learning_rate": 1.5151203319502078e-05, "loss": 0.3608, "step": 14616 }, { "epoch": 12.130290456431535, "grad_norm": 43.18254089355469, "learning_rate": 1.5150871369294606e-05, "loss": 1.2511, "step": 14617 }, { "epoch": 12.131120331950207, "grad_norm": 17.076528549194336, "learning_rate": 1.5150539419087139e-05, "loss": 0.4777, "step": 14618 }, { "epoch": 12.13195020746888, "grad_norm": 13.972143173217773, "learning_rate": 1.5150207468879669e-05, "loss": 0.2888, "step": 14619 }, { "epoch": 12.132780082987551, "grad_norm": 28.00502586364746, "learning_rate": 1.5149875518672201e-05, "loss": 0.6075, "step": 14620 }, { "epoch": 12.133609958506224, "grad_norm": 35.03505325317383, "learning_rate": 1.5149543568464731e-05, "loss": 0.7052, "step": 14621 }, { "epoch": 12.134439834024896, "grad_norm": 40.21438980102539, "learning_rate": 1.5149211618257262e-05, "loss": 0.9474, "step": 14622 }, { "epoch": 12.135269709543568, "grad_norm": 35.675254821777344, "learning_rate": 1.5148879668049794e-05, "loss": 0.8709, "step": 14623 }, { "epoch": 12.13609958506224, "grad_norm": 68.7044906616211, "learning_rate": 1.5148547717842326e-05, "loss": 1.0929, "step": 14624 }, { "epoch": 12.136929460580912, "grad_norm": 25.666545867919922, "learning_rate": 1.5148215767634855e-05, "loss": 0.77, "step": 14625 }, { "epoch": 12.137759336099585, "grad_norm": 57.717044830322266, "learning_rate": 1.5147883817427387e-05, "loss": 1.3824, "step": 14626 }, { "epoch": 12.138589211618257, "grad_norm": 30.05895233154297, "learning_rate": 1.5147551867219919e-05, "loss": 0.4134, "step": 14627 }, { "epoch": 12.139419087136929, "grad_norm": 47.1544189453125, "learning_rate": 1.514721991701245e-05, "loss": 0.8459, "step": 14628 }, { "epoch": 12.140248962655601, "grad_norm": 73.67203521728516, "learning_rate": 1.514688796680498e-05, "loss": 0.9875, "step": 14629 }, { "epoch": 12.141078838174273, "grad_norm": 37.03329849243164, "learning_rate": 1.5146556016597512e-05, "loss": 1.4502, "step": 14630 }, { "epoch": 12.141908713692946, "grad_norm": 29.50599479675293, "learning_rate": 1.5146224066390042e-05, "loss": 0.9695, "step": 14631 }, { "epoch": 12.142738589211618, "grad_norm": 31.934926986694336, "learning_rate": 1.5145892116182574e-05, "loss": 0.9844, "step": 14632 }, { "epoch": 12.14356846473029, "grad_norm": 28.557802200317383, "learning_rate": 1.5145560165975103e-05, "loss": 0.6521, "step": 14633 }, { "epoch": 12.144398340248962, "grad_norm": 81.23416900634766, "learning_rate": 1.5145228215767635e-05, "loss": 1.2753, "step": 14634 }, { "epoch": 12.145228215767634, "grad_norm": 79.882568359375, "learning_rate": 1.5144896265560167e-05, "loss": 0.9729, "step": 14635 }, { "epoch": 12.146058091286307, "grad_norm": 26.669954299926758, "learning_rate": 1.51445643153527e-05, "loss": 1.2769, "step": 14636 }, { "epoch": 12.146887966804979, "grad_norm": 61.344017028808594, "learning_rate": 1.514423236514523e-05, "loss": 1.2208, "step": 14637 }, { "epoch": 12.147717842323651, "grad_norm": 29.16172218322754, "learning_rate": 1.514390041493776e-05, "loss": 0.8799, "step": 14638 }, { "epoch": 12.148547717842323, "grad_norm": 26.45843505859375, "learning_rate": 1.5143568464730292e-05, "loss": 0.6889, "step": 14639 }, { "epoch": 12.149377593360995, "grad_norm": 62.04697799682617, "learning_rate": 1.5143236514522823e-05, "loss": 0.7495, "step": 14640 }, { "epoch": 12.150207468879668, "grad_norm": 36.58930587768555, "learning_rate": 1.5142904564315355e-05, "loss": 0.8741, "step": 14641 }, { "epoch": 12.15103734439834, "grad_norm": 53.46331024169922, "learning_rate": 1.5142572614107884e-05, "loss": 1.1119, "step": 14642 }, { "epoch": 12.151867219917012, "grad_norm": 66.3092041015625, "learning_rate": 1.5142240663900416e-05, "loss": 0.7632, "step": 14643 }, { "epoch": 12.152697095435684, "grad_norm": 47.83609390258789, "learning_rate": 1.5141908713692948e-05, "loss": 1.2286, "step": 14644 }, { "epoch": 12.153526970954356, "grad_norm": 131.15919494628906, "learning_rate": 1.514157676348548e-05, "loss": 0.9351, "step": 14645 }, { "epoch": 12.154356846473028, "grad_norm": 27.367563247680664, "learning_rate": 1.5141244813278009e-05, "loss": 0.6166, "step": 14646 }, { "epoch": 12.1551867219917, "grad_norm": 17.557445526123047, "learning_rate": 1.514091286307054e-05, "loss": 0.469, "step": 14647 }, { "epoch": 12.156016597510373, "grad_norm": 28.944265365600586, "learning_rate": 1.5140580912863071e-05, "loss": 0.8431, "step": 14648 }, { "epoch": 12.156846473029045, "grad_norm": 43.090328216552734, "learning_rate": 1.5140248962655603e-05, "loss": 1.3753, "step": 14649 }, { "epoch": 12.157676348547717, "grad_norm": 42.59023666381836, "learning_rate": 1.5139917012448134e-05, "loss": 1.1494, "step": 14650 }, { "epoch": 12.15850622406639, "grad_norm": 79.6661148071289, "learning_rate": 1.5139585062240664e-05, "loss": 1.316, "step": 14651 }, { "epoch": 12.159336099585062, "grad_norm": 27.006250381469727, "learning_rate": 1.5139253112033196e-05, "loss": 0.7824, "step": 14652 }, { "epoch": 12.160165975103734, "grad_norm": 22.310997009277344, "learning_rate": 1.5138921161825728e-05, "loss": 0.5854, "step": 14653 }, { "epoch": 12.160995850622406, "grad_norm": 34.02522659301758, "learning_rate": 1.5138589211618257e-05, "loss": 0.688, "step": 14654 }, { "epoch": 12.161825726141078, "grad_norm": 19.546560287475586, "learning_rate": 1.513825726141079e-05, "loss": 0.7454, "step": 14655 }, { "epoch": 12.16265560165975, "grad_norm": 38.055747985839844, "learning_rate": 1.5137925311203321e-05, "loss": 1.0254, "step": 14656 }, { "epoch": 12.163485477178423, "grad_norm": 26.634944915771484, "learning_rate": 1.5137593360995852e-05, "loss": 0.7067, "step": 14657 }, { "epoch": 12.164315352697095, "grad_norm": 75.4610824584961, "learning_rate": 1.5137261410788382e-05, "loss": 0.8969, "step": 14658 }, { "epoch": 12.165145228215767, "grad_norm": 20.642772674560547, "learning_rate": 1.5136929460580914e-05, "loss": 0.6825, "step": 14659 }, { "epoch": 12.16597510373444, "grad_norm": 40.9449462890625, "learning_rate": 1.5136597510373445e-05, "loss": 0.8458, "step": 14660 }, { "epoch": 12.166804979253111, "grad_norm": 26.968019485473633, "learning_rate": 1.5136265560165977e-05, "loss": 0.9299, "step": 14661 }, { "epoch": 12.167634854771784, "grad_norm": 22.644329071044922, "learning_rate": 1.5135933609958507e-05, "loss": 0.9889, "step": 14662 }, { "epoch": 12.168464730290456, "grad_norm": 35.71836853027344, "learning_rate": 1.5135601659751038e-05, "loss": 0.8095, "step": 14663 }, { "epoch": 12.169294605809128, "grad_norm": 42.37697982788086, "learning_rate": 1.513526970954357e-05, "loss": 1.633, "step": 14664 }, { "epoch": 12.1701244813278, "grad_norm": 41.766448974609375, "learning_rate": 1.5134937759336102e-05, "loss": 1.0807, "step": 14665 }, { "epoch": 12.170954356846472, "grad_norm": 24.48636817932129, "learning_rate": 1.5134605809128632e-05, "loss": 0.6809, "step": 14666 }, { "epoch": 12.171784232365145, "grad_norm": 48.6440315246582, "learning_rate": 1.5134273858921163e-05, "loss": 1.1612, "step": 14667 }, { "epoch": 12.172614107883817, "grad_norm": 18.42131805419922, "learning_rate": 1.5133941908713695e-05, "loss": 0.626, "step": 14668 }, { "epoch": 12.173443983402489, "grad_norm": 40.836795806884766, "learning_rate": 1.5133609958506225e-05, "loss": 0.8803, "step": 14669 }, { "epoch": 12.174273858921161, "grad_norm": 68.59368896484375, "learning_rate": 1.5133278008298757e-05, "loss": 0.5, "step": 14670 }, { "epoch": 12.175103734439833, "grad_norm": 46.689918518066406, "learning_rate": 1.5132946058091286e-05, "loss": 0.9267, "step": 14671 }, { "epoch": 12.175933609958506, "grad_norm": 21.122940063476562, "learning_rate": 1.5132614107883818e-05, "loss": 0.6131, "step": 14672 }, { "epoch": 12.176763485477178, "grad_norm": 29.9328556060791, "learning_rate": 1.513228215767635e-05, "loss": 0.8462, "step": 14673 }, { "epoch": 12.17759336099585, "grad_norm": 11.956513404846191, "learning_rate": 1.5131950207468882e-05, "loss": 0.3759, "step": 14674 }, { "epoch": 12.178423236514522, "grad_norm": 125.93617248535156, "learning_rate": 1.5131618257261411e-05, "loss": 1.0253, "step": 14675 }, { "epoch": 12.179253112033194, "grad_norm": 34.19145202636719, "learning_rate": 1.5131286307053943e-05, "loss": 0.9016, "step": 14676 }, { "epoch": 12.180082987551867, "grad_norm": 52.887081146240234, "learning_rate": 1.5130954356846475e-05, "loss": 1.4116, "step": 14677 }, { "epoch": 12.180912863070539, "grad_norm": 39.54621887207031, "learning_rate": 1.5130622406639006e-05, "loss": 0.6345, "step": 14678 }, { "epoch": 12.181742738589211, "grad_norm": 65.10039520263672, "learning_rate": 1.5130290456431536e-05, "loss": 1.4442, "step": 14679 }, { "epoch": 12.182572614107883, "grad_norm": 39.706329345703125, "learning_rate": 1.5129958506224066e-05, "loss": 0.596, "step": 14680 }, { "epoch": 12.183402489626555, "grad_norm": 28.453670501708984, "learning_rate": 1.5129626556016599e-05, "loss": 0.5855, "step": 14681 }, { "epoch": 12.184232365145228, "grad_norm": 32.78007507324219, "learning_rate": 1.512929460580913e-05, "loss": 1.5004, "step": 14682 }, { "epoch": 12.1850622406639, "grad_norm": 33.05668258666992, "learning_rate": 1.512896265560166e-05, "loss": 0.7581, "step": 14683 }, { "epoch": 12.185892116182572, "grad_norm": 97.07363891601562, "learning_rate": 1.5128630705394192e-05, "loss": 0.6603, "step": 14684 }, { "epoch": 12.186721991701244, "grad_norm": 21.012611389160156, "learning_rate": 1.5128298755186724e-05, "loss": 0.3931, "step": 14685 }, { "epoch": 12.187551867219916, "grad_norm": 33.313499450683594, "learning_rate": 1.5127966804979256e-05, "loss": 0.6809, "step": 14686 }, { "epoch": 12.188381742738589, "grad_norm": 26.25397300720215, "learning_rate": 1.5127634854771784e-05, "loss": 0.7659, "step": 14687 }, { "epoch": 12.18921161825726, "grad_norm": 23.714635848999023, "learning_rate": 1.5127302904564317e-05, "loss": 0.7718, "step": 14688 }, { "epoch": 12.190041493775933, "grad_norm": 23.20806121826172, "learning_rate": 1.5126970954356847e-05, "loss": 0.7053, "step": 14689 }, { "epoch": 12.190871369294605, "grad_norm": 21.67548942565918, "learning_rate": 1.5126639004149379e-05, "loss": 0.5259, "step": 14690 }, { "epoch": 12.191701244813277, "grad_norm": 29.806894302368164, "learning_rate": 1.5126307053941911e-05, "loss": 0.7688, "step": 14691 }, { "epoch": 12.19253112033195, "grad_norm": 38.56364059448242, "learning_rate": 1.512597510373444e-05, "loss": 1.0301, "step": 14692 }, { "epoch": 12.193360995850622, "grad_norm": 28.673620223999023, "learning_rate": 1.5125643153526972e-05, "loss": 0.8638, "step": 14693 }, { "epoch": 12.194190871369294, "grad_norm": 40.895118713378906, "learning_rate": 1.5125311203319504e-05, "loss": 0.641, "step": 14694 }, { "epoch": 12.195020746887966, "grad_norm": 49.02021026611328, "learning_rate": 1.5124979253112036e-05, "loss": 0.8138, "step": 14695 }, { "epoch": 12.195850622406638, "grad_norm": 44.47480392456055, "learning_rate": 1.5124647302904565e-05, "loss": 1.0627, "step": 14696 }, { "epoch": 12.19668049792531, "grad_norm": 38.98480224609375, "learning_rate": 1.5124315352697097e-05, "loss": 0.5455, "step": 14697 }, { "epoch": 12.197510373443983, "grad_norm": 39.49174118041992, "learning_rate": 1.5123983402489627e-05, "loss": 1.2702, "step": 14698 }, { "epoch": 12.198340248962655, "grad_norm": 26.66924285888672, "learning_rate": 1.512365145228216e-05, "loss": 0.566, "step": 14699 }, { "epoch": 12.199170124481327, "grad_norm": 64.19090270996094, "learning_rate": 1.512331950207469e-05, "loss": 1.179, "step": 14700 }, { "epoch": 12.2, "grad_norm": 24.353227615356445, "learning_rate": 1.512298755186722e-05, "loss": 0.4766, "step": 14701 }, { "epoch": 12.200829875518671, "grad_norm": 30.709026336669922, "learning_rate": 1.5122655601659753e-05, "loss": 0.5154, "step": 14702 }, { "epoch": 12.201659751037344, "grad_norm": 51.44205093383789, "learning_rate": 1.5122323651452285e-05, "loss": 1.3526, "step": 14703 }, { "epoch": 12.202489626556016, "grad_norm": 28.37705421447754, "learning_rate": 1.5121991701244813e-05, "loss": 0.614, "step": 14704 }, { "epoch": 12.203319502074688, "grad_norm": 32.14195251464844, "learning_rate": 1.5121659751037345e-05, "loss": 1.5492, "step": 14705 }, { "epoch": 12.20414937759336, "grad_norm": 43.5649528503418, "learning_rate": 1.5121327800829878e-05, "loss": 0.982, "step": 14706 }, { "epoch": 12.204979253112032, "grad_norm": 26.544658660888672, "learning_rate": 1.5120995850622408e-05, "loss": 0.6351, "step": 14707 }, { "epoch": 12.205809128630705, "grad_norm": 24.00773048400879, "learning_rate": 1.5120663900414938e-05, "loss": 0.6089, "step": 14708 }, { "epoch": 12.206639004149377, "grad_norm": 23.854734420776367, "learning_rate": 1.512033195020747e-05, "loss": 0.4419, "step": 14709 }, { "epoch": 12.207468879668049, "grad_norm": 33.02674102783203, "learning_rate": 1.5120000000000001e-05, "loss": 0.8351, "step": 14710 }, { "epoch": 12.208298755186721, "grad_norm": 42.564491271972656, "learning_rate": 1.5119668049792533e-05, "loss": 1.1548, "step": 14711 }, { "epoch": 12.209128630705393, "grad_norm": 54.6437873840332, "learning_rate": 1.5119336099585062e-05, "loss": 1.5751, "step": 14712 }, { "epoch": 12.209958506224066, "grad_norm": 41.24478530883789, "learning_rate": 1.5119004149377594e-05, "loss": 0.9145, "step": 14713 }, { "epoch": 12.210788381742738, "grad_norm": 27.758380889892578, "learning_rate": 1.5118672199170126e-05, "loss": 0.9042, "step": 14714 }, { "epoch": 12.21161825726141, "grad_norm": 21.472604751586914, "learning_rate": 1.5118340248962658e-05, "loss": 0.3739, "step": 14715 }, { "epoch": 12.212448132780082, "grad_norm": 25.17925262451172, "learning_rate": 1.5118008298755188e-05, "loss": 0.8696, "step": 14716 }, { "epoch": 12.213278008298754, "grad_norm": 18.859848022460938, "learning_rate": 1.5117676348547719e-05, "loss": 0.7134, "step": 14717 }, { "epoch": 12.214107883817427, "grad_norm": 22.97142219543457, "learning_rate": 1.511734439834025e-05, "loss": 0.473, "step": 14718 }, { "epoch": 12.214937759336099, "grad_norm": 29.319303512573242, "learning_rate": 1.5117012448132781e-05, "loss": 1.197, "step": 14719 }, { "epoch": 12.215767634854771, "grad_norm": 22.488040924072266, "learning_rate": 1.5116680497925314e-05, "loss": 0.64, "step": 14720 }, { "epoch": 12.216597510373443, "grad_norm": 32.78485870361328, "learning_rate": 1.5116348547717842e-05, "loss": 0.9112, "step": 14721 }, { "epoch": 12.217427385892115, "grad_norm": 34.37078094482422, "learning_rate": 1.5116016597510374e-05, "loss": 0.8105, "step": 14722 }, { "epoch": 12.218257261410788, "grad_norm": 31.4255428314209, "learning_rate": 1.5115684647302906e-05, "loss": 0.5692, "step": 14723 }, { "epoch": 12.21908713692946, "grad_norm": 32.20527267456055, "learning_rate": 1.5115352697095439e-05, "loss": 0.7422, "step": 14724 }, { "epoch": 12.219917012448132, "grad_norm": 50.16234588623047, "learning_rate": 1.5115020746887967e-05, "loss": 0.8765, "step": 14725 }, { "epoch": 12.220746887966804, "grad_norm": 56.15762710571289, "learning_rate": 1.51146887966805e-05, "loss": 1.1247, "step": 14726 }, { "epoch": 12.221576763485476, "grad_norm": 31.69577407836914, "learning_rate": 1.511435684647303e-05, "loss": 0.551, "step": 14727 }, { "epoch": 12.222406639004149, "grad_norm": 22.397502899169922, "learning_rate": 1.5114024896265562e-05, "loss": 0.6933, "step": 14728 }, { "epoch": 12.22323651452282, "grad_norm": 25.374364852905273, "learning_rate": 1.5113692946058092e-05, "loss": 0.6434, "step": 14729 }, { "epoch": 12.224066390041493, "grad_norm": 33.67367172241211, "learning_rate": 1.5113360995850623e-05, "loss": 0.9174, "step": 14730 }, { "epoch": 12.224896265560165, "grad_norm": 49.527286529541016, "learning_rate": 1.5113029045643155e-05, "loss": 1.2592, "step": 14731 }, { "epoch": 12.225726141078837, "grad_norm": 33.835845947265625, "learning_rate": 1.5112697095435687e-05, "loss": 0.7096, "step": 14732 }, { "epoch": 12.22655601659751, "grad_norm": 11.667774200439453, "learning_rate": 1.5112365145228216e-05, "loss": 0.3482, "step": 14733 }, { "epoch": 12.227385892116182, "grad_norm": 56.82062530517578, "learning_rate": 1.5112033195020748e-05, "loss": 1.1447, "step": 14734 }, { "epoch": 12.228215767634854, "grad_norm": 41.02507019042969, "learning_rate": 1.511170124481328e-05, "loss": 0.9696, "step": 14735 }, { "epoch": 12.229045643153526, "grad_norm": 24.326093673706055, "learning_rate": 1.511136929460581e-05, "loss": 0.6306, "step": 14736 }, { "epoch": 12.229875518672198, "grad_norm": 19.917091369628906, "learning_rate": 1.511103734439834e-05, "loss": 0.7022, "step": 14737 }, { "epoch": 12.23070539419087, "grad_norm": 49.14579772949219, "learning_rate": 1.5110705394190873e-05, "loss": 1.5254, "step": 14738 }, { "epoch": 12.231535269709543, "grad_norm": 33.37917709350586, "learning_rate": 1.5110373443983403e-05, "loss": 1.056, "step": 14739 }, { "epoch": 12.232365145228215, "grad_norm": 44.51853942871094, "learning_rate": 1.5110041493775935e-05, "loss": 1.193, "step": 14740 }, { "epoch": 12.233195020746887, "grad_norm": 24.87104034423828, "learning_rate": 1.5109709543568464e-05, "loss": 0.8798, "step": 14741 }, { "epoch": 12.23402489626556, "grad_norm": 18.396137237548828, "learning_rate": 1.5109377593360996e-05, "loss": 0.3713, "step": 14742 }, { "epoch": 12.234854771784232, "grad_norm": 23.292003631591797, "learning_rate": 1.5109045643153528e-05, "loss": 0.5036, "step": 14743 }, { "epoch": 12.235684647302904, "grad_norm": 24.25527572631836, "learning_rate": 1.510871369294606e-05, "loss": 0.5517, "step": 14744 }, { "epoch": 12.236514522821576, "grad_norm": 34.54572296142578, "learning_rate": 1.510838174273859e-05, "loss": 1.5838, "step": 14745 }, { "epoch": 12.237344398340248, "grad_norm": 40.22478485107422, "learning_rate": 1.5108049792531121e-05, "loss": 0.816, "step": 14746 }, { "epoch": 12.23817427385892, "grad_norm": 33.18376541137695, "learning_rate": 1.5107717842323653e-05, "loss": 0.9514, "step": 14747 }, { "epoch": 12.239004149377593, "grad_norm": 34.18948745727539, "learning_rate": 1.5107385892116184e-05, "loss": 1.3152, "step": 14748 }, { "epoch": 12.239834024896265, "grad_norm": 26.43978500366211, "learning_rate": 1.5107053941908716e-05, "loss": 0.675, "step": 14749 }, { "epoch": 12.240663900414937, "grad_norm": 20.24427604675293, "learning_rate": 1.5106721991701245e-05, "loss": 0.8431, "step": 14750 }, { "epoch": 12.241493775933609, "grad_norm": 20.3148250579834, "learning_rate": 1.5106390041493777e-05, "loss": 0.4435, "step": 14751 }, { "epoch": 12.242323651452281, "grad_norm": 29.626550674438477, "learning_rate": 1.5106058091286309e-05, "loss": 0.7933, "step": 14752 }, { "epoch": 12.243153526970953, "grad_norm": 98.0853500366211, "learning_rate": 1.5105726141078841e-05, "loss": 1.1279, "step": 14753 }, { "epoch": 12.243983402489626, "grad_norm": 26.740381240844727, "learning_rate": 1.510539419087137e-05, "loss": 0.5078, "step": 14754 }, { "epoch": 12.244813278008298, "grad_norm": 25.28329849243164, "learning_rate": 1.5105062240663902e-05, "loss": 0.9848, "step": 14755 }, { "epoch": 12.24564315352697, "grad_norm": 27.963464736938477, "learning_rate": 1.5104730290456434e-05, "loss": 0.7775, "step": 14756 }, { "epoch": 12.246473029045642, "grad_norm": 48.78346633911133, "learning_rate": 1.5104398340248964e-05, "loss": 0.8381, "step": 14757 }, { "epoch": 12.247302904564314, "grad_norm": 25.465103149414062, "learning_rate": 1.5104066390041495e-05, "loss": 0.863, "step": 14758 }, { "epoch": 12.248132780082987, "grad_norm": 29.1914119720459, "learning_rate": 1.5103734439834025e-05, "loss": 1.0449, "step": 14759 }, { "epoch": 12.248962655601659, "grad_norm": 32.6239013671875, "learning_rate": 1.5103402489626557e-05, "loss": 0.8642, "step": 14760 }, { "epoch": 12.249792531120331, "grad_norm": 24.537267684936523, "learning_rate": 1.510307053941909e-05, "loss": 1.0084, "step": 14761 }, { "epoch": 12.250622406639003, "grad_norm": 31.055194854736328, "learning_rate": 1.5102738589211618e-05, "loss": 0.587, "step": 14762 }, { "epoch": 12.251452282157675, "grad_norm": 36.23523712158203, "learning_rate": 1.510240663900415e-05, "loss": 1.2561, "step": 14763 }, { "epoch": 12.252282157676348, "grad_norm": 21.314350128173828, "learning_rate": 1.5102074688796682e-05, "loss": 0.6176, "step": 14764 }, { "epoch": 12.25311203319502, "grad_norm": 31.9601993560791, "learning_rate": 1.5101742738589213e-05, "loss": 0.9432, "step": 14765 }, { "epoch": 12.253941908713692, "grad_norm": 50.49190139770508, "learning_rate": 1.5101410788381743e-05, "loss": 0.7904, "step": 14766 }, { "epoch": 12.254771784232364, "grad_norm": 22.206430435180664, "learning_rate": 1.5101078838174275e-05, "loss": 0.9946, "step": 14767 }, { "epoch": 12.255601659751036, "grad_norm": 37.07511520385742, "learning_rate": 1.5100746887966806e-05, "loss": 0.7615, "step": 14768 }, { "epoch": 12.256431535269709, "grad_norm": 48.35355758666992, "learning_rate": 1.5100414937759338e-05, "loss": 1.0437, "step": 14769 }, { "epoch": 12.25726141078838, "grad_norm": 30.88665008544922, "learning_rate": 1.510008298755187e-05, "loss": 0.8462, "step": 14770 }, { "epoch": 12.258091286307055, "grad_norm": 32.399078369140625, "learning_rate": 1.5099751037344399e-05, "loss": 0.9667, "step": 14771 }, { "epoch": 12.258921161825727, "grad_norm": 40.22319793701172, "learning_rate": 1.509941908713693e-05, "loss": 0.8287, "step": 14772 }, { "epoch": 12.2597510373444, "grad_norm": 22.811840057373047, "learning_rate": 1.5099087136929463e-05, "loss": 0.6595, "step": 14773 }, { "epoch": 12.260580912863071, "grad_norm": 19.314760208129883, "learning_rate": 1.5098755186721993e-05, "loss": 1.2807, "step": 14774 }, { "epoch": 12.261410788381744, "grad_norm": 23.973730087280273, "learning_rate": 1.5098423236514524e-05, "loss": 0.7345, "step": 14775 }, { "epoch": 12.262240663900416, "grad_norm": 30.000686645507812, "learning_rate": 1.5098091286307056e-05, "loss": 1.1729, "step": 14776 }, { "epoch": 12.263070539419088, "grad_norm": 47.969825744628906, "learning_rate": 1.5097759336099586e-05, "loss": 0.7104, "step": 14777 }, { "epoch": 12.26390041493776, "grad_norm": 16.409008026123047, "learning_rate": 1.5097427385892118e-05, "loss": 0.5953, "step": 14778 }, { "epoch": 12.264730290456432, "grad_norm": 50.908687591552734, "learning_rate": 1.5097095435684649e-05, "loss": 1.4756, "step": 14779 }, { "epoch": 12.265560165975105, "grad_norm": 68.53561401367188, "learning_rate": 1.5096763485477179e-05, "loss": 1.0772, "step": 14780 }, { "epoch": 12.266390041493777, "grad_norm": 25.186920166015625, "learning_rate": 1.5096431535269711e-05, "loss": 0.8132, "step": 14781 }, { "epoch": 12.267219917012449, "grad_norm": 37.931095123291016, "learning_rate": 1.5096099585062243e-05, "loss": 1.0935, "step": 14782 }, { "epoch": 12.268049792531121, "grad_norm": 41.81313705444336, "learning_rate": 1.5095767634854772e-05, "loss": 0.7135, "step": 14783 }, { "epoch": 12.268879668049793, "grad_norm": 23.075538635253906, "learning_rate": 1.5095435684647304e-05, "loss": 0.542, "step": 14784 }, { "epoch": 12.269709543568466, "grad_norm": 99.95785522460938, "learning_rate": 1.5095103734439836e-05, "loss": 1.0513, "step": 14785 }, { "epoch": 12.270539419087138, "grad_norm": 25.654441833496094, "learning_rate": 1.5094771784232367e-05, "loss": 0.8905, "step": 14786 }, { "epoch": 12.27136929460581, "grad_norm": 29.522403717041016, "learning_rate": 1.5094439834024897e-05, "loss": 0.5657, "step": 14787 }, { "epoch": 12.272199170124482, "grad_norm": 29.97787857055664, "learning_rate": 1.5094107883817427e-05, "loss": 0.8891, "step": 14788 }, { "epoch": 12.273029045643154, "grad_norm": 46.864437103271484, "learning_rate": 1.509377593360996e-05, "loss": 0.8695, "step": 14789 }, { "epoch": 12.273858921161827, "grad_norm": 33.83390426635742, "learning_rate": 1.5093443983402492e-05, "loss": 0.9694, "step": 14790 }, { "epoch": 12.274688796680499, "grad_norm": 23.496875762939453, "learning_rate": 1.509311203319502e-05, "loss": 0.3849, "step": 14791 }, { "epoch": 12.275518672199171, "grad_norm": 27.26556968688965, "learning_rate": 1.5092780082987552e-05, "loss": 0.7087, "step": 14792 }, { "epoch": 12.276348547717843, "grad_norm": 30.266414642333984, "learning_rate": 1.5092448132780085e-05, "loss": 0.6363, "step": 14793 }, { "epoch": 12.277178423236515, "grad_norm": 42.09921646118164, "learning_rate": 1.5092116182572617e-05, "loss": 1.06, "step": 14794 }, { "epoch": 12.278008298755188, "grad_norm": 88.3946304321289, "learning_rate": 1.5091784232365147e-05, "loss": 1.2616, "step": 14795 }, { "epoch": 12.27883817427386, "grad_norm": 40.69990539550781, "learning_rate": 1.5091452282157677e-05, "loss": 1.1363, "step": 14796 }, { "epoch": 12.279668049792532, "grad_norm": 43.59919357299805, "learning_rate": 1.5091120331950208e-05, "loss": 1.0556, "step": 14797 }, { "epoch": 12.280497925311204, "grad_norm": 25.298234939575195, "learning_rate": 1.509078838174274e-05, "loss": 0.7878, "step": 14798 }, { "epoch": 12.281327800829876, "grad_norm": 37.64469528198242, "learning_rate": 1.5090456431535272e-05, "loss": 0.8475, "step": 14799 }, { "epoch": 12.282157676348548, "grad_norm": 25.0045166015625, "learning_rate": 1.50901244813278e-05, "loss": 0.975, "step": 14800 }, { "epoch": 12.28298755186722, "grad_norm": 44.21019744873047, "learning_rate": 1.5089792531120333e-05, "loss": 0.8364, "step": 14801 }, { "epoch": 12.283817427385893, "grad_norm": 52.793968200683594, "learning_rate": 1.5089460580912865e-05, "loss": 1.1667, "step": 14802 }, { "epoch": 12.284647302904565, "grad_norm": 23.720237731933594, "learning_rate": 1.5089128630705397e-05, "loss": 0.6748, "step": 14803 }, { "epoch": 12.285477178423237, "grad_norm": 29.448923110961914, "learning_rate": 1.5088796680497926e-05, "loss": 0.6402, "step": 14804 }, { "epoch": 12.28630705394191, "grad_norm": 30.844257354736328, "learning_rate": 1.5088464730290458e-05, "loss": 0.6022, "step": 14805 }, { "epoch": 12.287136929460582, "grad_norm": 23.098190307617188, "learning_rate": 1.5088132780082988e-05, "loss": 0.9349, "step": 14806 }, { "epoch": 12.287966804979254, "grad_norm": 25.120656967163086, "learning_rate": 1.508780082987552e-05, "loss": 0.7233, "step": 14807 }, { "epoch": 12.288796680497926, "grad_norm": 28.812082290649414, "learning_rate": 1.5087468879668051e-05, "loss": 0.5796, "step": 14808 }, { "epoch": 12.289626556016598, "grad_norm": 26.262903213500977, "learning_rate": 1.5087136929460581e-05, "loss": 0.8673, "step": 14809 }, { "epoch": 12.29045643153527, "grad_norm": 19.41391372680664, "learning_rate": 1.5086804979253113e-05, "loss": 0.5692, "step": 14810 }, { "epoch": 12.291286307053943, "grad_norm": 21.493303298950195, "learning_rate": 1.5086473029045646e-05, "loss": 0.7831, "step": 14811 }, { "epoch": 12.292116182572615, "grad_norm": 31.16189956665039, "learning_rate": 1.5086141078838174e-05, "loss": 1.1442, "step": 14812 }, { "epoch": 12.292946058091287, "grad_norm": 30.00539779663086, "learning_rate": 1.5085809128630706e-05, "loss": 1.0082, "step": 14813 }, { "epoch": 12.29377593360996, "grad_norm": 16.267866134643555, "learning_rate": 1.5085477178423238e-05, "loss": 0.3175, "step": 14814 }, { "epoch": 12.294605809128631, "grad_norm": 39.94706344604492, "learning_rate": 1.5085145228215769e-05, "loss": 0.7597, "step": 14815 }, { "epoch": 12.295435684647304, "grad_norm": 27.67393684387207, "learning_rate": 1.50848132780083e-05, "loss": 0.6844, "step": 14816 }, { "epoch": 12.296265560165976, "grad_norm": 21.033321380615234, "learning_rate": 1.5084481327800831e-05, "loss": 0.5274, "step": 14817 }, { "epoch": 12.297095435684648, "grad_norm": 19.340187072753906, "learning_rate": 1.5084149377593362e-05, "loss": 0.5104, "step": 14818 }, { "epoch": 12.29792531120332, "grad_norm": 26.487672805786133, "learning_rate": 1.5083817427385894e-05, "loss": 1.1169, "step": 14819 }, { "epoch": 12.298755186721992, "grad_norm": 26.293310165405273, "learning_rate": 1.5083485477178423e-05, "loss": 0.4814, "step": 14820 }, { "epoch": 12.299585062240665, "grad_norm": 50.45589065551758, "learning_rate": 1.5083153526970955e-05, "loss": 1.9665, "step": 14821 }, { "epoch": 12.300414937759337, "grad_norm": 46.13751983642578, "learning_rate": 1.5082821576763487e-05, "loss": 0.6646, "step": 14822 }, { "epoch": 12.301244813278009, "grad_norm": 69.42716217041016, "learning_rate": 1.5082489626556019e-05, "loss": 1.4621, "step": 14823 }, { "epoch": 12.302074688796681, "grad_norm": 75.7235107421875, "learning_rate": 1.508215767634855e-05, "loss": 0.7267, "step": 14824 }, { "epoch": 12.302904564315353, "grad_norm": 39.57694625854492, "learning_rate": 1.508182572614108e-05, "loss": 1.3246, "step": 14825 }, { "epoch": 12.303734439834026, "grad_norm": 44.207801818847656, "learning_rate": 1.5081493775933612e-05, "loss": 0.8644, "step": 14826 }, { "epoch": 12.304564315352698, "grad_norm": 49.39995574951172, "learning_rate": 1.5081161825726142e-05, "loss": 1.7018, "step": 14827 }, { "epoch": 12.30539419087137, "grad_norm": 30.27351188659668, "learning_rate": 1.5080829875518674e-05, "loss": 1.1741, "step": 14828 }, { "epoch": 12.306224066390042, "grad_norm": 26.891326904296875, "learning_rate": 1.5080497925311203e-05, "loss": 0.6013, "step": 14829 }, { "epoch": 12.307053941908714, "grad_norm": 21.194250106811523, "learning_rate": 1.5080165975103735e-05, "loss": 0.6891, "step": 14830 }, { "epoch": 12.307883817427387, "grad_norm": 30.00379753112793, "learning_rate": 1.5079834024896267e-05, "loss": 0.7674, "step": 14831 }, { "epoch": 12.308713692946059, "grad_norm": 19.065467834472656, "learning_rate": 1.50795020746888e-05, "loss": 0.5099, "step": 14832 }, { "epoch": 12.309543568464731, "grad_norm": 27.14452362060547, "learning_rate": 1.5079170124481328e-05, "loss": 0.4002, "step": 14833 }, { "epoch": 12.310373443983403, "grad_norm": 46.48944091796875, "learning_rate": 1.507883817427386e-05, "loss": 0.9722, "step": 14834 }, { "epoch": 12.311203319502075, "grad_norm": 42.48095703125, "learning_rate": 1.507850622406639e-05, "loss": 0.7268, "step": 14835 }, { "epoch": 12.312033195020748, "grad_norm": 20.621416091918945, "learning_rate": 1.5078174273858923e-05, "loss": 0.4365, "step": 14836 }, { "epoch": 12.31286307053942, "grad_norm": 38.90390396118164, "learning_rate": 1.5077842323651453e-05, "loss": 0.4437, "step": 14837 }, { "epoch": 12.313692946058092, "grad_norm": 32.2768440246582, "learning_rate": 1.5077510373443984e-05, "loss": 0.7922, "step": 14838 }, { "epoch": 12.314522821576764, "grad_norm": 44.504600524902344, "learning_rate": 1.5077178423236516e-05, "loss": 1.0335, "step": 14839 }, { "epoch": 12.315352697095436, "grad_norm": 37.300472259521484, "learning_rate": 1.5076846473029048e-05, "loss": 1.3063, "step": 14840 }, { "epoch": 12.316182572614109, "grad_norm": 38.2790641784668, "learning_rate": 1.5076514522821577e-05, "loss": 0.7842, "step": 14841 }, { "epoch": 12.31701244813278, "grad_norm": 37.198692321777344, "learning_rate": 1.5076182572614109e-05, "loss": 0.8717, "step": 14842 }, { "epoch": 12.317842323651453, "grad_norm": 22.5350341796875, "learning_rate": 1.507585062240664e-05, "loss": 0.66, "step": 14843 }, { "epoch": 12.318672199170125, "grad_norm": 47.484291076660156, "learning_rate": 1.5075518672199171e-05, "loss": 1.2152, "step": 14844 }, { "epoch": 12.319502074688797, "grad_norm": 30.73380470275879, "learning_rate": 1.5075186721991702e-05, "loss": 0.2867, "step": 14845 }, { "epoch": 12.32033195020747, "grad_norm": 33.105621337890625, "learning_rate": 1.5074854771784234e-05, "loss": 1.3255, "step": 14846 }, { "epoch": 12.321161825726142, "grad_norm": 28.943342208862305, "learning_rate": 1.5074522821576764e-05, "loss": 0.439, "step": 14847 }, { "epoch": 12.321991701244814, "grad_norm": 24.680931091308594, "learning_rate": 1.5074190871369296e-05, "loss": 0.984, "step": 14848 }, { "epoch": 12.322821576763486, "grad_norm": 46.20735549926758, "learning_rate": 1.5073858921161828e-05, "loss": 0.6473, "step": 14849 }, { "epoch": 12.323651452282158, "grad_norm": 17.635501861572266, "learning_rate": 1.5073526970954357e-05, "loss": 0.6474, "step": 14850 }, { "epoch": 12.32448132780083, "grad_norm": 22.99529457092285, "learning_rate": 1.507319502074689e-05, "loss": 0.9117, "step": 14851 }, { "epoch": 12.325311203319503, "grad_norm": 38.870094299316406, "learning_rate": 1.5072863070539421e-05, "loss": 1.1286, "step": 14852 }, { "epoch": 12.326141078838175, "grad_norm": 57.43406677246094, "learning_rate": 1.5072531120331952e-05, "loss": 0.7151, "step": 14853 }, { "epoch": 12.326970954356847, "grad_norm": 38.060997009277344, "learning_rate": 1.5072199170124482e-05, "loss": 0.9853, "step": 14854 }, { "epoch": 12.32780082987552, "grad_norm": 39.2154426574707, "learning_rate": 1.5071867219917014e-05, "loss": 0.7271, "step": 14855 }, { "epoch": 12.328630705394191, "grad_norm": 37.01459884643555, "learning_rate": 1.5071535269709545e-05, "loss": 0.9559, "step": 14856 }, { "epoch": 12.329460580912864, "grad_norm": 35.29411697387695, "learning_rate": 1.5071203319502077e-05, "loss": 0.6765, "step": 14857 }, { "epoch": 12.330290456431536, "grad_norm": 45.92013168334961, "learning_rate": 1.5070871369294605e-05, "loss": 0.9158, "step": 14858 }, { "epoch": 12.331120331950208, "grad_norm": 91.53597259521484, "learning_rate": 1.5070539419087138e-05, "loss": 1.0258, "step": 14859 }, { "epoch": 12.33195020746888, "grad_norm": 32.891483306884766, "learning_rate": 1.507020746887967e-05, "loss": 1.1352, "step": 14860 }, { "epoch": 12.332780082987552, "grad_norm": 29.62147331237793, "learning_rate": 1.5069875518672202e-05, "loss": 0.8382, "step": 14861 }, { "epoch": 12.333609958506225, "grad_norm": 18.16274642944336, "learning_rate": 1.506954356846473e-05, "loss": 0.3957, "step": 14862 }, { "epoch": 12.334439834024897, "grad_norm": 26.522212982177734, "learning_rate": 1.5069211618257263e-05, "loss": 0.4399, "step": 14863 }, { "epoch": 12.335269709543569, "grad_norm": 37.613189697265625, "learning_rate": 1.5068879668049795e-05, "loss": 0.7745, "step": 14864 }, { "epoch": 12.336099585062241, "grad_norm": 36.54509353637695, "learning_rate": 1.5068547717842325e-05, "loss": 0.6344, "step": 14865 }, { "epoch": 12.336929460580913, "grad_norm": 27.042909622192383, "learning_rate": 1.5068215767634856e-05, "loss": 0.7131, "step": 14866 }, { "epoch": 12.337759336099586, "grad_norm": 19.95502471923828, "learning_rate": 1.5067883817427386e-05, "loss": 0.7747, "step": 14867 }, { "epoch": 12.338589211618258, "grad_norm": 62.61585998535156, "learning_rate": 1.5067551867219918e-05, "loss": 0.7629, "step": 14868 }, { "epoch": 12.33941908713693, "grad_norm": 21.7186279296875, "learning_rate": 1.506721991701245e-05, "loss": 0.5638, "step": 14869 }, { "epoch": 12.340248962655602, "grad_norm": 43.67771530151367, "learning_rate": 1.5066887966804979e-05, "loss": 0.8174, "step": 14870 }, { "epoch": 12.341078838174274, "grad_norm": 44.863765716552734, "learning_rate": 1.5066556016597511e-05, "loss": 0.603, "step": 14871 }, { "epoch": 12.341908713692947, "grad_norm": 23.75444793701172, "learning_rate": 1.5066224066390043e-05, "loss": 0.4963, "step": 14872 }, { "epoch": 12.342738589211619, "grad_norm": 25.951744079589844, "learning_rate": 1.5065892116182575e-05, "loss": 0.6393, "step": 14873 }, { "epoch": 12.343568464730291, "grad_norm": 25.51119613647461, "learning_rate": 1.5065560165975106e-05, "loss": 0.4213, "step": 14874 }, { "epoch": 12.344398340248963, "grad_norm": 25.80154800415039, "learning_rate": 1.5065228215767636e-05, "loss": 1.1447, "step": 14875 }, { "epoch": 12.345228215767635, "grad_norm": 41.91518783569336, "learning_rate": 1.5064896265560166e-05, "loss": 0.7596, "step": 14876 }, { "epoch": 12.346058091286308, "grad_norm": 81.75056457519531, "learning_rate": 1.5064564315352699e-05, "loss": 0.7266, "step": 14877 }, { "epoch": 12.34688796680498, "grad_norm": 106.17596435546875, "learning_rate": 1.506423236514523e-05, "loss": 0.6913, "step": 14878 }, { "epoch": 12.347717842323652, "grad_norm": 40.32538604736328, "learning_rate": 1.506390041493776e-05, "loss": 0.5541, "step": 14879 }, { "epoch": 12.348547717842324, "grad_norm": 70.3188247680664, "learning_rate": 1.5063568464730292e-05, "loss": 0.649, "step": 14880 }, { "epoch": 12.349377593360996, "grad_norm": 36.678409576416016, "learning_rate": 1.5063236514522824e-05, "loss": 1.1307, "step": 14881 }, { "epoch": 12.350207468879669, "grad_norm": 36.91232681274414, "learning_rate": 1.5062904564315354e-05, "loss": 0.7405, "step": 14882 }, { "epoch": 12.35103734439834, "grad_norm": 14.178662300109863, "learning_rate": 1.5062572614107884e-05, "loss": 0.422, "step": 14883 }, { "epoch": 12.351867219917013, "grad_norm": 42.548301696777344, "learning_rate": 1.5062240663900417e-05, "loss": 1.3559, "step": 14884 }, { "epoch": 12.352697095435685, "grad_norm": 83.29303741455078, "learning_rate": 1.5061908713692947e-05, "loss": 0.7623, "step": 14885 }, { "epoch": 12.353526970954357, "grad_norm": 50.2427978515625, "learning_rate": 1.5061576763485479e-05, "loss": 0.8877, "step": 14886 }, { "epoch": 12.35435684647303, "grad_norm": 51.18794250488281, "learning_rate": 1.506124481327801e-05, "loss": 0.8522, "step": 14887 }, { "epoch": 12.355186721991702, "grad_norm": 39.595298767089844, "learning_rate": 1.506091286307054e-05, "loss": 0.9663, "step": 14888 }, { "epoch": 12.356016597510374, "grad_norm": 85.19058227539062, "learning_rate": 1.5060580912863072e-05, "loss": 1.4561, "step": 14889 }, { "epoch": 12.356846473029046, "grad_norm": 66.15580749511719, "learning_rate": 1.5060248962655604e-05, "loss": 1.2348, "step": 14890 }, { "epoch": 12.357676348547718, "grad_norm": 57.58449935913086, "learning_rate": 1.5059917012448133e-05, "loss": 1.3659, "step": 14891 }, { "epoch": 12.35850622406639, "grad_norm": 40.18770217895508, "learning_rate": 1.5059585062240665e-05, "loss": 2.016, "step": 14892 }, { "epoch": 12.359336099585063, "grad_norm": 47.105838775634766, "learning_rate": 1.5059253112033197e-05, "loss": 1.6745, "step": 14893 }, { "epoch": 12.360165975103735, "grad_norm": 32.025184631347656, "learning_rate": 1.5058921161825727e-05, "loss": 0.7864, "step": 14894 }, { "epoch": 12.360995850622407, "grad_norm": 26.26203155517578, "learning_rate": 1.5058589211618258e-05, "loss": 0.9358, "step": 14895 }, { "epoch": 12.36182572614108, "grad_norm": 30.379974365234375, "learning_rate": 1.505825726141079e-05, "loss": 0.5239, "step": 14896 }, { "epoch": 12.362655601659752, "grad_norm": 41.0369873046875, "learning_rate": 1.505792531120332e-05, "loss": 1.1666, "step": 14897 }, { "epoch": 12.363485477178424, "grad_norm": 42.42410659790039, "learning_rate": 1.5057593360995853e-05, "loss": 1.4408, "step": 14898 }, { "epoch": 12.364315352697096, "grad_norm": 30.587329864501953, "learning_rate": 1.5057261410788381e-05, "loss": 0.6827, "step": 14899 }, { "epoch": 12.365145228215768, "grad_norm": 21.220928192138672, "learning_rate": 1.5056929460580913e-05, "loss": 0.632, "step": 14900 }, { "epoch": 12.36597510373444, "grad_norm": 30.366535186767578, "learning_rate": 1.5056597510373445e-05, "loss": 0.6262, "step": 14901 }, { "epoch": 12.366804979253113, "grad_norm": 19.85978889465332, "learning_rate": 1.5056265560165978e-05, "loss": 0.9331, "step": 14902 }, { "epoch": 12.367634854771785, "grad_norm": 31.826892852783203, "learning_rate": 1.5055933609958508e-05, "loss": 0.8529, "step": 14903 }, { "epoch": 12.368464730290457, "grad_norm": 27.03479766845703, "learning_rate": 1.5055601659751038e-05, "loss": 0.5668, "step": 14904 }, { "epoch": 12.369294605809129, "grad_norm": 27.213594436645508, "learning_rate": 1.5055269709543569e-05, "loss": 0.9623, "step": 14905 }, { "epoch": 12.370124481327801, "grad_norm": 29.940258026123047, "learning_rate": 1.5054937759336101e-05, "loss": 0.9581, "step": 14906 }, { "epoch": 12.370954356846473, "grad_norm": 29.181602478027344, "learning_rate": 1.5054605809128633e-05, "loss": 0.4403, "step": 14907 }, { "epoch": 12.371784232365146, "grad_norm": 28.308923721313477, "learning_rate": 1.5054273858921162e-05, "loss": 1.0942, "step": 14908 }, { "epoch": 12.372614107883818, "grad_norm": 31.361745834350586, "learning_rate": 1.5053941908713694e-05, "loss": 0.8653, "step": 14909 }, { "epoch": 12.37344398340249, "grad_norm": 34.702667236328125, "learning_rate": 1.5053609958506226e-05, "loss": 0.6586, "step": 14910 }, { "epoch": 12.374273858921162, "grad_norm": 20.90018081665039, "learning_rate": 1.5053278008298758e-05, "loss": 0.6526, "step": 14911 }, { "epoch": 12.375103734439834, "grad_norm": 23.022048950195312, "learning_rate": 1.5052946058091287e-05, "loss": 0.581, "step": 14912 }, { "epoch": 12.375933609958507, "grad_norm": 25.704465866088867, "learning_rate": 1.5052614107883819e-05, "loss": 0.749, "step": 14913 }, { "epoch": 12.376763485477179, "grad_norm": 33.00296401977539, "learning_rate": 1.505228215767635e-05, "loss": 0.6895, "step": 14914 }, { "epoch": 12.377593360995851, "grad_norm": 18.297954559326172, "learning_rate": 1.5051950207468881e-05, "loss": 0.4602, "step": 14915 }, { "epoch": 12.378423236514523, "grad_norm": 32.77772521972656, "learning_rate": 1.5051618257261412e-05, "loss": 0.6041, "step": 14916 }, { "epoch": 12.379253112033195, "grad_norm": 21.491044998168945, "learning_rate": 1.5051286307053942e-05, "loss": 0.5831, "step": 14917 }, { "epoch": 12.380082987551868, "grad_norm": 81.43693542480469, "learning_rate": 1.5050954356846474e-05, "loss": 0.7714, "step": 14918 }, { "epoch": 12.38091286307054, "grad_norm": 37.14741897583008, "learning_rate": 1.5050622406639006e-05, "loss": 1.1978, "step": 14919 }, { "epoch": 12.381742738589212, "grad_norm": 40.682010650634766, "learning_rate": 1.5050290456431535e-05, "loss": 0.8381, "step": 14920 }, { "epoch": 12.382572614107884, "grad_norm": 30.55554962158203, "learning_rate": 1.5049958506224067e-05, "loss": 0.8848, "step": 14921 }, { "epoch": 12.383402489626556, "grad_norm": 24.99700355529785, "learning_rate": 1.50496265560166e-05, "loss": 1.1849, "step": 14922 }, { "epoch": 12.384232365145229, "grad_norm": 34.03689193725586, "learning_rate": 1.504929460580913e-05, "loss": 0.8363, "step": 14923 }, { "epoch": 12.3850622406639, "grad_norm": 41.436336517333984, "learning_rate": 1.504896265560166e-05, "loss": 1.0876, "step": 14924 }, { "epoch": 12.385892116182573, "grad_norm": 52.84587860107422, "learning_rate": 1.5048630705394192e-05, "loss": 0.5181, "step": 14925 }, { "epoch": 12.386721991701245, "grad_norm": 71.34101867675781, "learning_rate": 1.5048298755186723e-05, "loss": 1.5072, "step": 14926 }, { "epoch": 12.387551867219917, "grad_norm": 30.692304611206055, "learning_rate": 1.5047966804979255e-05, "loss": 1.2049, "step": 14927 }, { "epoch": 12.38838174273859, "grad_norm": 37.7820930480957, "learning_rate": 1.5047634854771787e-05, "loss": 0.7869, "step": 14928 }, { "epoch": 12.389211618257262, "grad_norm": 17.964271545410156, "learning_rate": 1.5047302904564316e-05, "loss": 0.8586, "step": 14929 }, { "epoch": 12.390041493775934, "grad_norm": 29.173891067504883, "learning_rate": 1.5046970954356848e-05, "loss": 0.6342, "step": 14930 }, { "epoch": 12.390871369294606, "grad_norm": 30.761394500732422, "learning_rate": 1.504663900414938e-05, "loss": 0.958, "step": 14931 }, { "epoch": 12.391701244813278, "grad_norm": 43.79690933227539, "learning_rate": 1.504630705394191e-05, "loss": 1.236, "step": 14932 }, { "epoch": 12.39253112033195, "grad_norm": 44.635047912597656, "learning_rate": 1.504597510373444e-05, "loss": 1.6788, "step": 14933 }, { "epoch": 12.393360995850623, "grad_norm": 22.914390563964844, "learning_rate": 1.5045643153526973e-05, "loss": 0.913, "step": 14934 }, { "epoch": 12.394190871369295, "grad_norm": 34.56623458862305, "learning_rate": 1.5045311203319503e-05, "loss": 1.5044, "step": 14935 }, { "epoch": 12.395020746887967, "grad_norm": 36.65424346923828, "learning_rate": 1.5044979253112035e-05, "loss": 0.7002, "step": 14936 }, { "epoch": 12.39585062240664, "grad_norm": 31.679967880249023, "learning_rate": 1.5044647302904564e-05, "loss": 0.9875, "step": 14937 }, { "epoch": 12.396680497925312, "grad_norm": 39.297969818115234, "learning_rate": 1.5044315352697096e-05, "loss": 1.0329, "step": 14938 }, { "epoch": 12.397510373443984, "grad_norm": 22.488346099853516, "learning_rate": 1.5043983402489628e-05, "loss": 0.5804, "step": 14939 }, { "epoch": 12.398340248962656, "grad_norm": 29.192481994628906, "learning_rate": 1.504365145228216e-05, "loss": 0.4239, "step": 14940 }, { "epoch": 12.399170124481328, "grad_norm": 38.848838806152344, "learning_rate": 1.5043319502074689e-05, "loss": 0.8396, "step": 14941 }, { "epoch": 12.4, "grad_norm": 48.90373229980469, "learning_rate": 1.5042987551867221e-05, "loss": 1.3221, "step": 14942 }, { "epoch": 12.400829875518673, "grad_norm": 31.2346248626709, "learning_rate": 1.5042655601659753e-05, "loss": 0.7112, "step": 14943 }, { "epoch": 12.401659751037345, "grad_norm": 30.87505531311035, "learning_rate": 1.5042323651452284e-05, "loss": 1.0252, "step": 14944 }, { "epoch": 12.402489626556017, "grad_norm": 36.60501480102539, "learning_rate": 1.5041991701244814e-05, "loss": 0.9236, "step": 14945 }, { "epoch": 12.40331950207469, "grad_norm": 34.88236618041992, "learning_rate": 1.5041659751037345e-05, "loss": 0.7531, "step": 14946 }, { "epoch": 12.404149377593361, "grad_norm": 44.30925369262695, "learning_rate": 1.5041327800829877e-05, "loss": 1.0463, "step": 14947 }, { "epoch": 12.404979253112034, "grad_norm": 27.521711349487305, "learning_rate": 1.5040995850622409e-05, "loss": 0.8633, "step": 14948 }, { "epoch": 12.405809128630706, "grad_norm": 21.078454971313477, "learning_rate": 1.5040663900414937e-05, "loss": 0.3326, "step": 14949 }, { "epoch": 12.406639004149378, "grad_norm": 33.35177993774414, "learning_rate": 1.504033195020747e-05, "loss": 0.9962, "step": 14950 }, { "epoch": 12.40746887966805, "grad_norm": 18.024906158447266, "learning_rate": 1.5040000000000002e-05, "loss": 0.5572, "step": 14951 }, { "epoch": 12.408298755186722, "grad_norm": 40.079071044921875, "learning_rate": 1.5039668049792532e-05, "loss": 1.255, "step": 14952 }, { "epoch": 12.409128630705395, "grad_norm": 34.58698654174805, "learning_rate": 1.5039336099585064e-05, "loss": 1.1738, "step": 14953 }, { "epoch": 12.409958506224067, "grad_norm": 38.43534469604492, "learning_rate": 1.5039004149377595e-05, "loss": 1.1871, "step": 14954 }, { "epoch": 12.410788381742739, "grad_norm": 43.97665786743164, "learning_rate": 1.5038672199170125e-05, "loss": 0.772, "step": 14955 }, { "epoch": 12.411618257261411, "grad_norm": 38.851566314697266, "learning_rate": 1.5038340248962657e-05, "loss": 1.1856, "step": 14956 }, { "epoch": 12.412448132780083, "grad_norm": 24.03473472595215, "learning_rate": 1.503800829875519e-05, "loss": 0.5951, "step": 14957 }, { "epoch": 12.413278008298755, "grad_norm": 28.38623046875, "learning_rate": 1.5037676348547718e-05, "loss": 0.8016, "step": 14958 }, { "epoch": 12.414107883817428, "grad_norm": 22.353803634643555, "learning_rate": 1.503734439834025e-05, "loss": 0.7891, "step": 14959 }, { "epoch": 12.4149377593361, "grad_norm": 35.50852584838867, "learning_rate": 1.5037012448132782e-05, "loss": 0.9713, "step": 14960 }, { "epoch": 12.415767634854772, "grad_norm": 33.671722412109375, "learning_rate": 1.5036680497925313e-05, "loss": 0.6982, "step": 14961 }, { "epoch": 12.416597510373444, "grad_norm": 25.055755615234375, "learning_rate": 1.5036348547717843e-05, "loss": 0.7199, "step": 14962 }, { "epoch": 12.417427385892116, "grad_norm": 25.157773971557617, "learning_rate": 1.5036016597510375e-05, "loss": 0.6563, "step": 14963 }, { "epoch": 12.418257261410789, "grad_norm": 11.593255996704102, "learning_rate": 1.5035684647302906e-05, "loss": 0.3586, "step": 14964 }, { "epoch": 12.41908713692946, "grad_norm": 36.52093505859375, "learning_rate": 1.5035352697095438e-05, "loss": 0.725, "step": 14965 }, { "epoch": 12.419917012448133, "grad_norm": 27.64684295654297, "learning_rate": 1.5035020746887966e-05, "loss": 0.9786, "step": 14966 }, { "epoch": 12.420746887966805, "grad_norm": 59.67488098144531, "learning_rate": 1.5034688796680498e-05, "loss": 0.4021, "step": 14967 }, { "epoch": 12.421576763485477, "grad_norm": 19.290010452270508, "learning_rate": 1.503435684647303e-05, "loss": 0.7333, "step": 14968 }, { "epoch": 12.42240663900415, "grad_norm": 20.29589080810547, "learning_rate": 1.5034024896265563e-05, "loss": 0.7816, "step": 14969 }, { "epoch": 12.423236514522822, "grad_norm": 70.4708023071289, "learning_rate": 1.5033692946058091e-05, "loss": 0.5996, "step": 14970 }, { "epoch": 12.424066390041494, "grad_norm": 13.724599838256836, "learning_rate": 1.5033360995850624e-05, "loss": 0.4319, "step": 14971 }, { "epoch": 12.424896265560166, "grad_norm": 37.966365814208984, "learning_rate": 1.5033029045643156e-05, "loss": 1.1818, "step": 14972 }, { "epoch": 12.425726141078838, "grad_norm": 16.995946884155273, "learning_rate": 1.5032697095435686e-05, "loss": 0.4303, "step": 14973 }, { "epoch": 12.42655601659751, "grad_norm": 82.34785461425781, "learning_rate": 1.5032365145228216e-05, "loss": 1.2278, "step": 14974 }, { "epoch": 12.427385892116183, "grad_norm": 33.380802154541016, "learning_rate": 1.5032033195020747e-05, "loss": 0.9506, "step": 14975 }, { "epoch": 12.428215767634855, "grad_norm": 93.9922103881836, "learning_rate": 1.5031701244813279e-05, "loss": 1.3041, "step": 14976 }, { "epoch": 12.429045643153527, "grad_norm": 40.46199035644531, "learning_rate": 1.5031369294605811e-05, "loss": 1.7988, "step": 14977 }, { "epoch": 12.4298755186722, "grad_norm": 70.64376068115234, "learning_rate": 1.503103734439834e-05, "loss": 1.039, "step": 14978 }, { "epoch": 12.430705394190872, "grad_norm": 13.985042572021484, "learning_rate": 1.5030705394190872e-05, "loss": 0.3173, "step": 14979 }, { "epoch": 12.431535269709544, "grad_norm": 31.2120304107666, "learning_rate": 1.5030373443983404e-05, "loss": 0.5428, "step": 14980 }, { "epoch": 12.432365145228216, "grad_norm": 36.37394332885742, "learning_rate": 1.5030041493775936e-05, "loss": 1.0425, "step": 14981 }, { "epoch": 12.433195020746888, "grad_norm": 32.012535095214844, "learning_rate": 1.5029709543568467e-05, "loss": 0.827, "step": 14982 }, { "epoch": 12.43402489626556, "grad_norm": 32.07660675048828, "learning_rate": 1.5029377593360997e-05, "loss": 0.644, "step": 14983 }, { "epoch": 12.434854771784233, "grad_norm": 42.62921142578125, "learning_rate": 1.5029045643153527e-05, "loss": 1.0484, "step": 14984 }, { "epoch": 12.435684647302905, "grad_norm": 48.69640350341797, "learning_rate": 1.502871369294606e-05, "loss": 1.174, "step": 14985 }, { "epoch": 12.436514522821577, "grad_norm": 28.39075469970703, "learning_rate": 1.5028381742738592e-05, "loss": 1.0196, "step": 14986 }, { "epoch": 12.43734439834025, "grad_norm": 30.233400344848633, "learning_rate": 1.502804979253112e-05, "loss": 0.5996, "step": 14987 }, { "epoch": 12.438174273858921, "grad_norm": 47.48905944824219, "learning_rate": 1.5027717842323652e-05, "loss": 1.0555, "step": 14988 }, { "epoch": 12.439004149377594, "grad_norm": 53.1746826171875, "learning_rate": 1.5027385892116185e-05, "loss": 0.7947, "step": 14989 }, { "epoch": 12.439834024896266, "grad_norm": 61.43684768676758, "learning_rate": 1.5027053941908717e-05, "loss": 1.3282, "step": 14990 }, { "epoch": 12.440663900414938, "grad_norm": 36.4638557434082, "learning_rate": 1.5026721991701245e-05, "loss": 0.8445, "step": 14991 }, { "epoch": 12.44149377593361, "grad_norm": 14.213826179504395, "learning_rate": 1.5026390041493777e-05, "loss": 0.4829, "step": 14992 }, { "epoch": 12.442323651452282, "grad_norm": 22.80320930480957, "learning_rate": 1.5026058091286308e-05, "loss": 0.551, "step": 14993 }, { "epoch": 12.443153526970955, "grad_norm": 41.32740020751953, "learning_rate": 1.502572614107884e-05, "loss": 0.7601, "step": 14994 }, { "epoch": 12.443983402489627, "grad_norm": 63.67103576660156, "learning_rate": 1.502539419087137e-05, "loss": 1.8025, "step": 14995 }, { "epoch": 12.444813278008299, "grad_norm": 32.32594680786133, "learning_rate": 1.50250622406639e-05, "loss": 1.3262, "step": 14996 }, { "epoch": 12.445643153526971, "grad_norm": 19.784318923950195, "learning_rate": 1.5024730290456433e-05, "loss": 0.4646, "step": 14997 }, { "epoch": 12.446473029045643, "grad_norm": 53.954158782958984, "learning_rate": 1.5024398340248965e-05, "loss": 1.0518, "step": 14998 }, { "epoch": 12.447302904564316, "grad_norm": 40.08091735839844, "learning_rate": 1.5024066390041494e-05, "loss": 1.1728, "step": 14999 }, { "epoch": 12.448132780082988, "grad_norm": 23.102264404296875, "learning_rate": 1.5023734439834026e-05, "loss": 0.5321, "step": 15000 }, { "epoch": 12.44896265560166, "grad_norm": 50.45634460449219, "learning_rate": 1.5023402489626558e-05, "loss": 1.055, "step": 15001 }, { "epoch": 12.449792531120332, "grad_norm": 25.48217010498047, "learning_rate": 1.5023070539419088e-05, "loss": 1.0096, "step": 15002 }, { "epoch": 12.450622406639004, "grad_norm": 58.90632629394531, "learning_rate": 1.5022738589211619e-05, "loss": 1.1915, "step": 15003 }, { "epoch": 12.451452282157677, "grad_norm": 45.61941146850586, "learning_rate": 1.5022406639004151e-05, "loss": 1.0753, "step": 15004 }, { "epoch": 12.452282157676349, "grad_norm": 39.08237838745117, "learning_rate": 1.5022074688796681e-05, "loss": 0.6864, "step": 15005 }, { "epoch": 12.453112033195021, "grad_norm": 47.64824295043945, "learning_rate": 1.5021742738589213e-05, "loss": 1.3697, "step": 15006 }, { "epoch": 12.453941908713693, "grad_norm": 31.065441131591797, "learning_rate": 1.5021410788381746e-05, "loss": 0.8614, "step": 15007 }, { "epoch": 12.454771784232365, "grad_norm": 98.28157043457031, "learning_rate": 1.5021078838174274e-05, "loss": 1.4386, "step": 15008 }, { "epoch": 12.455601659751038, "grad_norm": 25.13918113708496, "learning_rate": 1.5020746887966806e-05, "loss": 0.6315, "step": 15009 }, { "epoch": 12.45643153526971, "grad_norm": 20.22789764404297, "learning_rate": 1.5020414937759338e-05, "loss": 0.6276, "step": 15010 }, { "epoch": 12.457261410788382, "grad_norm": 20.47821807861328, "learning_rate": 1.5020082987551869e-05, "loss": 0.3976, "step": 15011 }, { "epoch": 12.458091286307054, "grad_norm": 32.509376525878906, "learning_rate": 1.50197510373444e-05, "loss": 1.2861, "step": 15012 }, { "epoch": 12.458921161825726, "grad_norm": 25.55685806274414, "learning_rate": 1.5019419087136931e-05, "loss": 0.7719, "step": 15013 }, { "epoch": 12.459751037344398, "grad_norm": 29.935894012451172, "learning_rate": 1.5019087136929462e-05, "loss": 0.6833, "step": 15014 }, { "epoch": 12.46058091286307, "grad_norm": 43.48841857910156, "learning_rate": 1.5018755186721994e-05, "loss": 0.6876, "step": 15015 }, { "epoch": 12.461410788381743, "grad_norm": 20.058704376220703, "learning_rate": 1.5018423236514523e-05, "loss": 0.4949, "step": 15016 }, { "epoch": 12.462240663900415, "grad_norm": 22.115339279174805, "learning_rate": 1.5018091286307055e-05, "loss": 0.6675, "step": 15017 }, { "epoch": 12.463070539419087, "grad_norm": 24.426103591918945, "learning_rate": 1.5017759336099587e-05, "loss": 0.9846, "step": 15018 }, { "epoch": 12.46390041493776, "grad_norm": 33.549163818359375, "learning_rate": 1.5017427385892119e-05, "loss": 1.2626, "step": 15019 }, { "epoch": 12.464730290456432, "grad_norm": 52.42515563964844, "learning_rate": 1.5017095435684648e-05, "loss": 0.7223, "step": 15020 }, { "epoch": 12.465560165975104, "grad_norm": 44.97998046875, "learning_rate": 1.501676348547718e-05, "loss": 0.8845, "step": 15021 }, { "epoch": 12.466390041493776, "grad_norm": 37.39994430541992, "learning_rate": 1.501643153526971e-05, "loss": 1.022, "step": 15022 }, { "epoch": 12.467219917012448, "grad_norm": 46.955711364746094, "learning_rate": 1.5016099585062242e-05, "loss": 1.0941, "step": 15023 }, { "epoch": 12.46804979253112, "grad_norm": 51.25733947753906, "learning_rate": 1.5015767634854773e-05, "loss": 1.1777, "step": 15024 }, { "epoch": 12.468879668049793, "grad_norm": 48.866214752197266, "learning_rate": 1.5015435684647303e-05, "loss": 1.2126, "step": 15025 }, { "epoch": 12.469709543568465, "grad_norm": 30.994407653808594, "learning_rate": 1.5015103734439835e-05, "loss": 0.8639, "step": 15026 }, { "epoch": 12.470539419087137, "grad_norm": 32.583003997802734, "learning_rate": 1.5014771784232367e-05, "loss": 1.1214, "step": 15027 }, { "epoch": 12.47136929460581, "grad_norm": 50.836669921875, "learning_rate": 1.5014439834024896e-05, "loss": 0.777, "step": 15028 }, { "epoch": 12.472199170124481, "grad_norm": 30.974300384521484, "learning_rate": 1.5014107883817428e-05, "loss": 1.3983, "step": 15029 }, { "epoch": 12.473029045643154, "grad_norm": 29.735050201416016, "learning_rate": 1.501377593360996e-05, "loss": 0.7032, "step": 15030 }, { "epoch": 12.473858921161826, "grad_norm": 99.384033203125, "learning_rate": 1.501344398340249e-05, "loss": 1.3214, "step": 15031 }, { "epoch": 12.474688796680498, "grad_norm": 49.03498077392578, "learning_rate": 1.5013112033195023e-05, "loss": 1.0236, "step": 15032 }, { "epoch": 12.47551867219917, "grad_norm": 81.94532012939453, "learning_rate": 1.5012780082987553e-05, "loss": 0.9967, "step": 15033 }, { "epoch": 12.476348547717842, "grad_norm": 27.743013381958008, "learning_rate": 1.5012448132780084e-05, "loss": 0.67, "step": 15034 }, { "epoch": 12.477178423236515, "grad_norm": 15.764322280883789, "learning_rate": 1.5012116182572616e-05, "loss": 0.3832, "step": 15035 }, { "epoch": 12.478008298755187, "grad_norm": 40.58840560913086, "learning_rate": 1.5011784232365148e-05, "loss": 0.6791, "step": 15036 }, { "epoch": 12.478838174273859, "grad_norm": 43.38751220703125, "learning_rate": 1.5011452282157677e-05, "loss": 0.3735, "step": 15037 }, { "epoch": 12.479668049792531, "grad_norm": 41.82848358154297, "learning_rate": 1.5011120331950209e-05, "loss": 0.9921, "step": 15038 }, { "epoch": 12.480497925311203, "grad_norm": 29.29086685180664, "learning_rate": 1.501078838174274e-05, "loss": 0.6773, "step": 15039 }, { "epoch": 12.481327800829876, "grad_norm": 18.95371437072754, "learning_rate": 1.5010456431535271e-05, "loss": 0.7455, "step": 15040 }, { "epoch": 12.482157676348548, "grad_norm": 28.72467613220215, "learning_rate": 1.5010124481327802e-05, "loss": 1.3159, "step": 15041 }, { "epoch": 12.48298755186722, "grad_norm": 24.868080139160156, "learning_rate": 1.5009792531120334e-05, "loss": 0.9673, "step": 15042 }, { "epoch": 12.483817427385892, "grad_norm": 18.3408260345459, "learning_rate": 1.5009460580912864e-05, "loss": 0.8636, "step": 15043 }, { "epoch": 12.484647302904564, "grad_norm": 20.110754013061523, "learning_rate": 1.5009128630705396e-05, "loss": 0.8254, "step": 15044 }, { "epoch": 12.485477178423237, "grad_norm": 33.12883758544922, "learning_rate": 1.5008796680497925e-05, "loss": 0.4656, "step": 15045 }, { "epoch": 12.486307053941909, "grad_norm": 36.045833587646484, "learning_rate": 1.5008464730290457e-05, "loss": 0.7069, "step": 15046 }, { "epoch": 12.487136929460581, "grad_norm": 29.066749572753906, "learning_rate": 1.500813278008299e-05, "loss": 0.9954, "step": 15047 }, { "epoch": 12.487966804979253, "grad_norm": 16.87571144104004, "learning_rate": 1.5007800829875521e-05, "loss": 0.5785, "step": 15048 }, { "epoch": 12.488796680497925, "grad_norm": 27.23621368408203, "learning_rate": 1.500746887966805e-05, "loss": 0.7368, "step": 15049 }, { "epoch": 12.489626556016598, "grad_norm": 32.304161071777344, "learning_rate": 1.5007136929460582e-05, "loss": 1.5926, "step": 15050 }, { "epoch": 12.49045643153527, "grad_norm": 25.787723541259766, "learning_rate": 1.5006804979253114e-05, "loss": 0.5623, "step": 15051 }, { "epoch": 12.491286307053942, "grad_norm": 23.528100967407227, "learning_rate": 1.5006473029045645e-05, "loss": 0.5891, "step": 15052 }, { "epoch": 12.492116182572614, "grad_norm": 18.332796096801758, "learning_rate": 1.5006141078838175e-05, "loss": 0.6043, "step": 15053 }, { "epoch": 12.492946058091286, "grad_norm": 29.658117294311523, "learning_rate": 1.5005809128630705e-05, "loss": 0.8869, "step": 15054 }, { "epoch": 12.493775933609959, "grad_norm": 53.50551986694336, "learning_rate": 1.5005477178423238e-05, "loss": 1.2382, "step": 15055 }, { "epoch": 12.49460580912863, "grad_norm": 25.50760841369629, "learning_rate": 1.500514522821577e-05, "loss": 0.6788, "step": 15056 }, { "epoch": 12.495435684647303, "grad_norm": 34.55586624145508, "learning_rate": 1.5004813278008298e-05, "loss": 0.8191, "step": 15057 }, { "epoch": 12.496265560165975, "grad_norm": 32.98295974731445, "learning_rate": 1.500448132780083e-05, "loss": 0.6315, "step": 15058 }, { "epoch": 12.497095435684647, "grad_norm": 47.04200744628906, "learning_rate": 1.5004149377593363e-05, "loss": 0.7921, "step": 15059 }, { "epoch": 12.49792531120332, "grad_norm": 31.704055786132812, "learning_rate": 1.5003817427385895e-05, "loss": 0.8444, "step": 15060 }, { "epoch": 12.498755186721992, "grad_norm": 122.4950180053711, "learning_rate": 1.5003485477178425e-05, "loss": 2.3202, "step": 15061 }, { "epoch": 12.499585062240664, "grad_norm": 32.33879470825195, "learning_rate": 1.5003153526970956e-05, "loss": 0.5016, "step": 15062 }, { "epoch": 12.500414937759336, "grad_norm": 68.12596130371094, "learning_rate": 1.5002821576763486e-05, "loss": 1.2762, "step": 15063 }, { "epoch": 12.501244813278008, "grad_norm": 50.944114685058594, "learning_rate": 1.5002489626556018e-05, "loss": 1.6314, "step": 15064 }, { "epoch": 12.50207468879668, "grad_norm": 32.2343864440918, "learning_rate": 1.500215767634855e-05, "loss": 1.104, "step": 15065 }, { "epoch": 12.502904564315353, "grad_norm": 20.305593490600586, "learning_rate": 1.5001825726141079e-05, "loss": 0.984, "step": 15066 }, { "epoch": 12.503734439834025, "grad_norm": 53.444339752197266, "learning_rate": 1.5001493775933611e-05, "loss": 1.2986, "step": 15067 }, { "epoch": 12.504564315352697, "grad_norm": 50.78523635864258, "learning_rate": 1.5001161825726143e-05, "loss": 1.6614, "step": 15068 }, { "epoch": 12.50539419087137, "grad_norm": 39.28565979003906, "learning_rate": 1.5000829875518673e-05, "loss": 0.5959, "step": 15069 }, { "epoch": 12.506224066390041, "grad_norm": 22.727766036987305, "learning_rate": 1.5000497925311204e-05, "loss": 1.012, "step": 15070 }, { "epoch": 12.507053941908714, "grad_norm": 25.303674697875977, "learning_rate": 1.5000165975103736e-05, "loss": 0.5731, "step": 15071 }, { "epoch": 12.507883817427386, "grad_norm": 26.212417602539062, "learning_rate": 1.4999834024896266e-05, "loss": 0.6108, "step": 15072 }, { "epoch": 12.508713692946058, "grad_norm": 29.75885772705078, "learning_rate": 1.4999502074688799e-05, "loss": 1.0341, "step": 15073 }, { "epoch": 12.50954356846473, "grad_norm": 21.6247501373291, "learning_rate": 1.4999170124481329e-05, "loss": 0.6858, "step": 15074 }, { "epoch": 12.510373443983402, "grad_norm": 70.15201568603516, "learning_rate": 1.499883817427386e-05, "loss": 1.4698, "step": 15075 }, { "epoch": 12.511203319502075, "grad_norm": 85.36952209472656, "learning_rate": 1.4998506224066391e-05, "loss": 0.9342, "step": 15076 }, { "epoch": 12.512033195020747, "grad_norm": 50.336978912353516, "learning_rate": 1.4998174273858924e-05, "loss": 1.4792, "step": 15077 }, { "epoch": 12.512863070539419, "grad_norm": 24.234628677368164, "learning_rate": 1.4997842323651452e-05, "loss": 0.5066, "step": 15078 }, { "epoch": 12.513692946058091, "grad_norm": 89.103515625, "learning_rate": 1.4997510373443984e-05, "loss": 1.0125, "step": 15079 }, { "epoch": 12.514522821576763, "grad_norm": 33.39036560058594, "learning_rate": 1.4997178423236517e-05, "loss": 0.8183, "step": 15080 }, { "epoch": 12.515352697095436, "grad_norm": 22.466209411621094, "learning_rate": 1.4996846473029047e-05, "loss": 0.476, "step": 15081 }, { "epoch": 12.516182572614108, "grad_norm": 29.855995178222656, "learning_rate": 1.4996514522821577e-05, "loss": 1.1227, "step": 15082 }, { "epoch": 12.51701244813278, "grad_norm": 24.347484588623047, "learning_rate": 1.4996182572614108e-05, "loss": 0.4207, "step": 15083 }, { "epoch": 12.517842323651452, "grad_norm": 26.632246017456055, "learning_rate": 1.499585062240664e-05, "loss": 0.4205, "step": 15084 }, { "epoch": 12.518672199170124, "grad_norm": 23.067188262939453, "learning_rate": 1.4995518672199172e-05, "loss": 0.4146, "step": 15085 }, { "epoch": 12.519502074688797, "grad_norm": 29.72584342956543, "learning_rate": 1.4995186721991704e-05, "loss": 0.7088, "step": 15086 }, { "epoch": 12.520331950207469, "grad_norm": 29.307727813720703, "learning_rate": 1.4994854771784233e-05, "loss": 1.4651, "step": 15087 }, { "epoch": 12.521161825726141, "grad_norm": 39.3339729309082, "learning_rate": 1.4994522821576765e-05, "loss": 1.3665, "step": 15088 }, { "epoch": 12.521991701244813, "grad_norm": 28.902904510498047, "learning_rate": 1.4994190871369297e-05, "loss": 0.8093, "step": 15089 }, { "epoch": 12.522821576763485, "grad_norm": 26.208545684814453, "learning_rate": 1.4993858921161827e-05, "loss": 0.7098, "step": 15090 }, { "epoch": 12.523651452282158, "grad_norm": 17.446823120117188, "learning_rate": 1.4993526970954358e-05, "loss": 0.8433, "step": 15091 }, { "epoch": 12.52448132780083, "grad_norm": 36.53337097167969, "learning_rate": 1.4993195020746888e-05, "loss": 0.8732, "step": 15092 }, { "epoch": 12.525311203319502, "grad_norm": 29.94363021850586, "learning_rate": 1.499286307053942e-05, "loss": 0.9345, "step": 15093 }, { "epoch": 12.526141078838174, "grad_norm": 57.62419509887695, "learning_rate": 1.4992531120331952e-05, "loss": 0.9756, "step": 15094 }, { "epoch": 12.526970954356846, "grad_norm": 31.056188583374023, "learning_rate": 1.4992199170124481e-05, "loss": 0.7628, "step": 15095 }, { "epoch": 12.527800829875519, "grad_norm": 23.09514808654785, "learning_rate": 1.4991867219917013e-05, "loss": 0.7177, "step": 15096 }, { "epoch": 12.52863070539419, "grad_norm": 25.974346160888672, "learning_rate": 1.4991535269709545e-05, "loss": 0.6518, "step": 15097 }, { "epoch": 12.529460580912863, "grad_norm": 17.650371551513672, "learning_rate": 1.4991203319502078e-05, "loss": 0.4675, "step": 15098 }, { "epoch": 12.530290456431535, "grad_norm": 28.23750877380371, "learning_rate": 1.4990871369294606e-05, "loss": 0.6063, "step": 15099 }, { "epoch": 12.531120331950207, "grad_norm": 29.09221649169922, "learning_rate": 1.4990539419087138e-05, "loss": 0.3891, "step": 15100 }, { "epoch": 12.53195020746888, "grad_norm": 27.971986770629883, "learning_rate": 1.4990207468879669e-05, "loss": 1.0092, "step": 15101 }, { "epoch": 12.532780082987552, "grad_norm": 46.2693977355957, "learning_rate": 1.4989875518672201e-05, "loss": 1.0734, "step": 15102 }, { "epoch": 12.533609958506224, "grad_norm": 75.39654541015625, "learning_rate": 1.4989543568464731e-05, "loss": 0.6824, "step": 15103 }, { "epoch": 12.534439834024896, "grad_norm": 70.89694213867188, "learning_rate": 1.4989211618257262e-05, "loss": 0.5112, "step": 15104 }, { "epoch": 12.535269709543568, "grad_norm": 46.638893127441406, "learning_rate": 1.4988879668049794e-05, "loss": 0.7554, "step": 15105 }, { "epoch": 12.53609958506224, "grad_norm": 49.871944427490234, "learning_rate": 1.4988547717842326e-05, "loss": 0.7585, "step": 15106 }, { "epoch": 12.536929460580913, "grad_norm": 23.334545135498047, "learning_rate": 1.4988215767634855e-05, "loss": 0.3952, "step": 15107 }, { "epoch": 12.537759336099585, "grad_norm": 52.97928237915039, "learning_rate": 1.4987883817427387e-05, "loss": 0.9836, "step": 15108 }, { "epoch": 12.538589211618257, "grad_norm": 19.409122467041016, "learning_rate": 1.4987551867219919e-05, "loss": 0.4543, "step": 15109 }, { "epoch": 12.53941908713693, "grad_norm": 48.994483947753906, "learning_rate": 1.498721991701245e-05, "loss": 0.8684, "step": 15110 }, { "epoch": 12.540248962655602, "grad_norm": 30.72734832763672, "learning_rate": 1.498688796680498e-05, "loss": 0.5721, "step": 15111 }, { "epoch": 12.541078838174274, "grad_norm": 47.06922912597656, "learning_rate": 1.4986556016597512e-05, "loss": 0.5596, "step": 15112 }, { "epoch": 12.541908713692946, "grad_norm": 43.87285232543945, "learning_rate": 1.4986224066390042e-05, "loss": 1.332, "step": 15113 }, { "epoch": 12.542738589211618, "grad_norm": 72.73308563232422, "learning_rate": 1.4985892116182574e-05, "loss": 0.9167, "step": 15114 }, { "epoch": 12.54356846473029, "grad_norm": 27.09540367126465, "learning_rate": 1.4985560165975106e-05, "loss": 0.5737, "step": 15115 }, { "epoch": 12.544398340248962, "grad_norm": 46.6554069519043, "learning_rate": 1.4985228215767635e-05, "loss": 1.6591, "step": 15116 }, { "epoch": 12.545228215767635, "grad_norm": 25.782459259033203, "learning_rate": 1.4984896265560167e-05, "loss": 0.6241, "step": 15117 }, { "epoch": 12.546058091286307, "grad_norm": 27.832704544067383, "learning_rate": 1.49845643153527e-05, "loss": 0.7222, "step": 15118 }, { "epoch": 12.546887966804979, "grad_norm": 44.86245346069336, "learning_rate": 1.498423236514523e-05, "loss": 0.7356, "step": 15119 }, { "epoch": 12.547717842323651, "grad_norm": 26.845277786254883, "learning_rate": 1.498390041493776e-05, "loss": 0.5958, "step": 15120 }, { "epoch": 12.548547717842323, "grad_norm": 20.148277282714844, "learning_rate": 1.4983568464730292e-05, "loss": 0.5292, "step": 15121 }, { "epoch": 12.549377593360996, "grad_norm": 38.5678596496582, "learning_rate": 1.4983236514522823e-05, "loss": 0.8869, "step": 15122 }, { "epoch": 12.550207468879668, "grad_norm": 27.97007942199707, "learning_rate": 1.4982904564315355e-05, "loss": 0.7863, "step": 15123 }, { "epoch": 12.55103734439834, "grad_norm": 42.12407684326172, "learning_rate": 1.4982572614107884e-05, "loss": 1.3183, "step": 15124 }, { "epoch": 12.551867219917012, "grad_norm": 44.90385818481445, "learning_rate": 1.4982240663900416e-05, "loss": 1.1344, "step": 15125 }, { "epoch": 12.552697095435684, "grad_norm": 20.9716854095459, "learning_rate": 1.4981908713692948e-05, "loss": 0.662, "step": 15126 }, { "epoch": 12.553526970954357, "grad_norm": 76.68226623535156, "learning_rate": 1.498157676348548e-05, "loss": 1.5507, "step": 15127 }, { "epoch": 12.554356846473029, "grad_norm": 35.360191345214844, "learning_rate": 1.4981244813278009e-05, "loss": 0.9457, "step": 15128 }, { "epoch": 12.555186721991701, "grad_norm": 24.987722396850586, "learning_rate": 1.498091286307054e-05, "loss": 0.7641, "step": 15129 }, { "epoch": 12.556016597510373, "grad_norm": 35.15848922729492, "learning_rate": 1.4980580912863071e-05, "loss": 0.76, "step": 15130 }, { "epoch": 12.556846473029045, "grad_norm": 26.34409523010254, "learning_rate": 1.4980248962655603e-05, "loss": 0.9253, "step": 15131 }, { "epoch": 12.557676348547718, "grad_norm": 29.469594955444336, "learning_rate": 1.4979917012448134e-05, "loss": 0.8651, "step": 15132 }, { "epoch": 12.55850622406639, "grad_norm": 24.44146156311035, "learning_rate": 1.4979585062240664e-05, "loss": 0.5035, "step": 15133 }, { "epoch": 12.559336099585062, "grad_norm": 36.76739501953125, "learning_rate": 1.4979253112033196e-05, "loss": 1.0582, "step": 15134 }, { "epoch": 12.560165975103734, "grad_norm": 29.65229034423828, "learning_rate": 1.4978921161825728e-05, "loss": 0.7979, "step": 15135 }, { "epoch": 12.560995850622406, "grad_norm": 49.19693374633789, "learning_rate": 1.4978589211618257e-05, "loss": 1.4124, "step": 15136 }, { "epoch": 12.561825726141079, "grad_norm": 34.78461456298828, "learning_rate": 1.4978257261410789e-05, "loss": 0.9207, "step": 15137 }, { "epoch": 12.56265560165975, "grad_norm": 71.17463684082031, "learning_rate": 1.4977925311203321e-05, "loss": 0.5363, "step": 15138 }, { "epoch": 12.563485477178423, "grad_norm": 31.446956634521484, "learning_rate": 1.4977593360995852e-05, "loss": 1.039, "step": 15139 }, { "epoch": 12.564315352697095, "grad_norm": 33.95186996459961, "learning_rate": 1.4977261410788384e-05, "loss": 1.2279, "step": 15140 }, { "epoch": 12.565145228215767, "grad_norm": 28.993879318237305, "learning_rate": 1.4976929460580914e-05, "loss": 1.0496, "step": 15141 }, { "epoch": 12.56597510373444, "grad_norm": 32.7948112487793, "learning_rate": 1.4976597510373445e-05, "loss": 0.9532, "step": 15142 }, { "epoch": 12.566804979253112, "grad_norm": 19.737607955932617, "learning_rate": 1.4976265560165977e-05, "loss": 0.448, "step": 15143 }, { "epoch": 12.567634854771784, "grad_norm": 38.58378219604492, "learning_rate": 1.4975933609958509e-05, "loss": 0.8456, "step": 15144 }, { "epoch": 12.568464730290456, "grad_norm": 24.48952293395996, "learning_rate": 1.4975601659751037e-05, "loss": 0.833, "step": 15145 }, { "epoch": 12.569294605809128, "grad_norm": 32.10940170288086, "learning_rate": 1.497526970954357e-05, "loss": 0.7216, "step": 15146 }, { "epoch": 12.5701244813278, "grad_norm": 33.37357711791992, "learning_rate": 1.4974937759336102e-05, "loss": 1.876, "step": 15147 }, { "epoch": 12.570954356846473, "grad_norm": 31.146974563598633, "learning_rate": 1.4974605809128632e-05, "loss": 0.492, "step": 15148 }, { "epoch": 12.571784232365145, "grad_norm": 36.682167053222656, "learning_rate": 1.4974273858921162e-05, "loss": 0.9007, "step": 15149 }, { "epoch": 12.572614107883817, "grad_norm": 30.906715393066406, "learning_rate": 1.4973941908713695e-05, "loss": 0.9953, "step": 15150 }, { "epoch": 12.57344398340249, "grad_norm": 33.950889587402344, "learning_rate": 1.4973609958506225e-05, "loss": 0.863, "step": 15151 }, { "epoch": 12.574273858921162, "grad_norm": 31.113248825073242, "learning_rate": 1.4973278008298757e-05, "loss": 0.4022, "step": 15152 }, { "epoch": 12.575103734439834, "grad_norm": 22.387176513671875, "learning_rate": 1.4972946058091286e-05, "loss": 0.8211, "step": 15153 }, { "epoch": 12.575933609958506, "grad_norm": 25.6173095703125, "learning_rate": 1.4972614107883818e-05, "loss": 0.5994, "step": 15154 }, { "epoch": 12.576763485477178, "grad_norm": 41.351016998291016, "learning_rate": 1.497228215767635e-05, "loss": 1.211, "step": 15155 }, { "epoch": 12.57759336099585, "grad_norm": 38.621089935302734, "learning_rate": 1.4971950207468882e-05, "loss": 0.7956, "step": 15156 }, { "epoch": 12.578423236514523, "grad_norm": 31.689245223999023, "learning_rate": 1.4971618257261411e-05, "loss": 0.781, "step": 15157 }, { "epoch": 12.579253112033195, "grad_norm": 26.484830856323242, "learning_rate": 1.4971286307053943e-05, "loss": 0.7694, "step": 15158 }, { "epoch": 12.580082987551867, "grad_norm": 24.090076446533203, "learning_rate": 1.4970954356846475e-05, "loss": 0.3627, "step": 15159 }, { "epoch": 12.58091286307054, "grad_norm": 22.80998420715332, "learning_rate": 1.4970622406639006e-05, "loss": 0.637, "step": 15160 }, { "epoch": 12.581742738589211, "grad_norm": 28.53865623474121, "learning_rate": 1.4970290456431536e-05, "loss": 0.806, "step": 15161 }, { "epoch": 12.582572614107884, "grad_norm": 57.7449951171875, "learning_rate": 1.4969958506224066e-05, "loss": 1.3099, "step": 15162 }, { "epoch": 12.583402489626556, "grad_norm": 48.18309020996094, "learning_rate": 1.4969626556016598e-05, "loss": 1.0541, "step": 15163 }, { "epoch": 12.584232365145228, "grad_norm": 38.24694061279297, "learning_rate": 1.496929460580913e-05, "loss": 1.2872, "step": 15164 }, { "epoch": 12.5850622406639, "grad_norm": 36.927921295166016, "learning_rate": 1.4968962655601663e-05, "loss": 1.1554, "step": 15165 }, { "epoch": 12.585892116182572, "grad_norm": 20.648513793945312, "learning_rate": 1.4968630705394191e-05, "loss": 0.3919, "step": 15166 }, { "epoch": 12.586721991701245, "grad_norm": 27.60149574279785, "learning_rate": 1.4968298755186723e-05, "loss": 0.8009, "step": 15167 }, { "epoch": 12.587551867219917, "grad_norm": 41.43494415283203, "learning_rate": 1.4967966804979256e-05, "loss": 1.3454, "step": 15168 }, { "epoch": 12.588381742738589, "grad_norm": 23.46846580505371, "learning_rate": 1.4967634854771786e-05, "loss": 0.5338, "step": 15169 }, { "epoch": 12.589211618257261, "grad_norm": 24.892120361328125, "learning_rate": 1.4967302904564316e-05, "loss": 1.2022, "step": 15170 }, { "epoch": 12.590041493775933, "grad_norm": 25.459867477416992, "learning_rate": 1.4966970954356847e-05, "loss": 1.1984, "step": 15171 }, { "epoch": 12.590871369294605, "grad_norm": 11.215469360351562, "learning_rate": 1.4966639004149379e-05, "loss": 0.2942, "step": 15172 }, { "epoch": 12.591701244813278, "grad_norm": 32.819793701171875, "learning_rate": 1.4966307053941911e-05, "loss": 0.893, "step": 15173 }, { "epoch": 12.59253112033195, "grad_norm": 21.946168899536133, "learning_rate": 1.496597510373444e-05, "loss": 0.5533, "step": 15174 }, { "epoch": 12.593360995850622, "grad_norm": 29.746305465698242, "learning_rate": 1.4965643153526972e-05, "loss": 0.6755, "step": 15175 }, { "epoch": 12.594190871369294, "grad_norm": 21.433923721313477, "learning_rate": 1.4965311203319504e-05, "loss": 0.6752, "step": 15176 }, { "epoch": 12.595020746887966, "grad_norm": 29.10305404663086, "learning_rate": 1.4964979253112036e-05, "loss": 0.723, "step": 15177 }, { "epoch": 12.595850622406639, "grad_norm": 32.947998046875, "learning_rate": 1.4964647302904565e-05, "loss": 1.5213, "step": 15178 }, { "epoch": 12.59668049792531, "grad_norm": 31.55196762084961, "learning_rate": 1.4964315352697097e-05, "loss": 0.5673, "step": 15179 }, { "epoch": 12.597510373443983, "grad_norm": 38.15187072753906, "learning_rate": 1.4963983402489627e-05, "loss": 1.1996, "step": 15180 }, { "epoch": 12.598340248962655, "grad_norm": 76.32069396972656, "learning_rate": 1.496365145228216e-05, "loss": 0.8894, "step": 15181 }, { "epoch": 12.599170124481327, "grad_norm": 37.68809509277344, "learning_rate": 1.496331950207469e-05, "loss": 1.2818, "step": 15182 }, { "epoch": 12.6, "grad_norm": 59.28669357299805, "learning_rate": 1.496298755186722e-05, "loss": 1.4446, "step": 15183 }, { "epoch": 12.600829875518672, "grad_norm": 62.08974075317383, "learning_rate": 1.4962655601659752e-05, "loss": 1.0699, "step": 15184 }, { "epoch": 12.601659751037344, "grad_norm": 39.710262298583984, "learning_rate": 1.4962323651452284e-05, "loss": 0.9582, "step": 15185 }, { "epoch": 12.602489626556016, "grad_norm": 26.12967300415039, "learning_rate": 1.4961991701244813e-05, "loss": 0.5953, "step": 15186 }, { "epoch": 12.603319502074688, "grad_norm": 53.70193099975586, "learning_rate": 1.4961659751037345e-05, "loss": 1.0405, "step": 15187 }, { "epoch": 12.60414937759336, "grad_norm": 44.009761810302734, "learning_rate": 1.4961327800829877e-05, "loss": 1.8888, "step": 15188 }, { "epoch": 12.604979253112033, "grad_norm": 34.96536636352539, "learning_rate": 1.4960995850622408e-05, "loss": 0.8154, "step": 15189 }, { "epoch": 12.605809128630705, "grad_norm": 17.27834129333496, "learning_rate": 1.4960663900414938e-05, "loss": 0.3616, "step": 15190 }, { "epoch": 12.606639004149377, "grad_norm": 28.79216957092285, "learning_rate": 1.496033195020747e-05, "loss": 0.8168, "step": 15191 }, { "epoch": 12.60746887966805, "grad_norm": 43.80278396606445, "learning_rate": 1.496e-05, "loss": 1.0432, "step": 15192 }, { "epoch": 12.608298755186722, "grad_norm": 25.087369918823242, "learning_rate": 1.4959668049792533e-05, "loss": 1.0336, "step": 15193 }, { "epoch": 12.609128630705394, "grad_norm": 28.174556732177734, "learning_rate": 1.4959336099585065e-05, "loss": 0.5573, "step": 15194 }, { "epoch": 12.609958506224066, "grad_norm": 21.811458587646484, "learning_rate": 1.4959004149377594e-05, "loss": 0.9959, "step": 15195 }, { "epoch": 12.610788381742738, "grad_norm": 41.90830993652344, "learning_rate": 1.4958672199170126e-05, "loss": 0.5916, "step": 15196 }, { "epoch": 12.61161825726141, "grad_norm": 49.02859878540039, "learning_rate": 1.4958340248962658e-05, "loss": 1.4921, "step": 15197 }, { "epoch": 12.612448132780083, "grad_norm": 34.38338851928711, "learning_rate": 1.4958008298755188e-05, "loss": 0.7283, "step": 15198 }, { "epoch": 12.613278008298755, "grad_norm": 64.36913299560547, "learning_rate": 1.4957676348547719e-05, "loss": 1.5197, "step": 15199 }, { "epoch": 12.614107883817427, "grad_norm": 28.140792846679688, "learning_rate": 1.495734439834025e-05, "loss": 1.3138, "step": 15200 }, { "epoch": 12.6149377593361, "grad_norm": 29.369314193725586, "learning_rate": 1.4957012448132781e-05, "loss": 1.0881, "step": 15201 }, { "epoch": 12.615767634854771, "grad_norm": 30.642635345458984, "learning_rate": 1.4956680497925313e-05, "loss": 0.9709, "step": 15202 }, { "epoch": 12.616597510373444, "grad_norm": 28.202878952026367, "learning_rate": 1.4956348547717842e-05, "loss": 1.1999, "step": 15203 }, { "epoch": 12.617427385892116, "grad_norm": 39.76163864135742, "learning_rate": 1.4956016597510374e-05, "loss": 1.0512, "step": 15204 }, { "epoch": 12.618257261410788, "grad_norm": 19.004419326782227, "learning_rate": 1.4955684647302906e-05, "loss": 0.5988, "step": 15205 }, { "epoch": 12.61908713692946, "grad_norm": 49.465423583984375, "learning_rate": 1.4955352697095438e-05, "loss": 1.1355, "step": 15206 }, { "epoch": 12.619917012448132, "grad_norm": 62.02299880981445, "learning_rate": 1.4955020746887967e-05, "loss": 0.5368, "step": 15207 }, { "epoch": 12.620746887966805, "grad_norm": 23.91803741455078, "learning_rate": 1.49546887966805e-05, "loss": 0.6975, "step": 15208 }, { "epoch": 12.621576763485477, "grad_norm": 43.26057815551758, "learning_rate": 1.495435684647303e-05, "loss": 1.291, "step": 15209 }, { "epoch": 12.622406639004149, "grad_norm": 26.491971969604492, "learning_rate": 1.4954024896265562e-05, "loss": 0.522, "step": 15210 }, { "epoch": 12.623236514522821, "grad_norm": 25.90308380126953, "learning_rate": 1.4953692946058092e-05, "loss": 1.0723, "step": 15211 }, { "epoch": 12.624066390041493, "grad_norm": 31.287771224975586, "learning_rate": 1.4953360995850623e-05, "loss": 1.2944, "step": 15212 }, { "epoch": 12.624896265560166, "grad_norm": 22.24471092224121, "learning_rate": 1.4953029045643155e-05, "loss": 0.6827, "step": 15213 }, { "epoch": 12.625726141078838, "grad_norm": 28.36367416381836, "learning_rate": 1.4952697095435687e-05, "loss": 0.7497, "step": 15214 }, { "epoch": 12.62655601659751, "grad_norm": 35.13325119018555, "learning_rate": 1.4952365145228216e-05, "loss": 0.9376, "step": 15215 }, { "epoch": 12.627385892116182, "grad_norm": 30.125608444213867, "learning_rate": 1.4952033195020748e-05, "loss": 0.9345, "step": 15216 }, { "epoch": 12.628215767634854, "grad_norm": 22.636709213256836, "learning_rate": 1.495170124481328e-05, "loss": 0.7262, "step": 15217 }, { "epoch": 12.629045643153527, "grad_norm": 22.887310028076172, "learning_rate": 1.495136929460581e-05, "loss": 0.5672, "step": 15218 }, { "epoch": 12.629875518672199, "grad_norm": 27.076416015625, "learning_rate": 1.4951037344398342e-05, "loss": 1.1417, "step": 15219 }, { "epoch": 12.630705394190871, "grad_norm": 19.88845443725586, "learning_rate": 1.4950705394190873e-05, "loss": 0.5001, "step": 15220 }, { "epoch": 12.631535269709543, "grad_norm": 16.983993530273438, "learning_rate": 1.4950373443983403e-05, "loss": 0.4453, "step": 15221 }, { "epoch": 12.632365145228215, "grad_norm": 28.301483154296875, "learning_rate": 1.4950041493775935e-05, "loss": 0.616, "step": 15222 }, { "epoch": 12.633195020746887, "grad_norm": 27.422222137451172, "learning_rate": 1.4949709543568467e-05, "loss": 0.865, "step": 15223 }, { "epoch": 12.63402489626556, "grad_norm": 23.652528762817383, "learning_rate": 1.4949377593360996e-05, "loss": 0.7285, "step": 15224 }, { "epoch": 12.634854771784232, "grad_norm": 31.89266014099121, "learning_rate": 1.4949045643153528e-05, "loss": 0.885, "step": 15225 }, { "epoch": 12.635684647302904, "grad_norm": 43.865997314453125, "learning_rate": 1.494871369294606e-05, "loss": 0.649, "step": 15226 }, { "epoch": 12.636514522821576, "grad_norm": 43.88642883300781, "learning_rate": 1.494838174273859e-05, "loss": 0.7442, "step": 15227 }, { "epoch": 12.637344398340248, "grad_norm": 58.582366943359375, "learning_rate": 1.4948049792531121e-05, "loss": 1.0165, "step": 15228 }, { "epoch": 12.63817427385892, "grad_norm": 24.19590950012207, "learning_rate": 1.4947717842323653e-05, "loss": 0.7111, "step": 15229 }, { "epoch": 12.639004149377593, "grad_norm": 44.48744583129883, "learning_rate": 1.4947385892116184e-05, "loss": 1.2937, "step": 15230 }, { "epoch": 12.639834024896265, "grad_norm": 14.630227088928223, "learning_rate": 1.4947053941908716e-05, "loss": 0.334, "step": 15231 }, { "epoch": 12.640663900414937, "grad_norm": 28.597414016723633, "learning_rate": 1.4946721991701244e-05, "loss": 0.7379, "step": 15232 }, { "epoch": 12.64149377593361, "grad_norm": 50.791351318359375, "learning_rate": 1.4946390041493777e-05, "loss": 1.0253, "step": 15233 }, { "epoch": 12.642323651452282, "grad_norm": 60.6644401550293, "learning_rate": 1.4946058091286309e-05, "loss": 0.9693, "step": 15234 }, { "epoch": 12.643153526970954, "grad_norm": 23.49799156188965, "learning_rate": 1.494572614107884e-05, "loss": 1.1847, "step": 15235 }, { "epoch": 12.643983402489626, "grad_norm": 63.38035202026367, "learning_rate": 1.494539419087137e-05, "loss": 0.5481, "step": 15236 }, { "epoch": 12.644813278008298, "grad_norm": 47.691410064697266, "learning_rate": 1.4945062240663902e-05, "loss": 1.4125, "step": 15237 }, { "epoch": 12.64564315352697, "grad_norm": 36.81526184082031, "learning_rate": 1.4944730290456434e-05, "loss": 0.8342, "step": 15238 }, { "epoch": 12.646473029045643, "grad_norm": 84.0074462890625, "learning_rate": 1.4944398340248964e-05, "loss": 1.3887, "step": 15239 }, { "epoch": 12.647302904564315, "grad_norm": 35.3454475402832, "learning_rate": 1.4944066390041494e-05, "loss": 0.5558, "step": 15240 }, { "epoch": 12.648132780082987, "grad_norm": 31.489595413208008, "learning_rate": 1.4943734439834025e-05, "loss": 0.4229, "step": 15241 }, { "epoch": 12.64896265560166, "grad_norm": 41.54793930053711, "learning_rate": 1.4943402489626557e-05, "loss": 0.9855, "step": 15242 }, { "epoch": 12.649792531120331, "grad_norm": 51.83306884765625, "learning_rate": 1.4943070539419089e-05, "loss": 0.8168, "step": 15243 }, { "epoch": 12.650622406639004, "grad_norm": 28.018695831298828, "learning_rate": 1.4942738589211621e-05, "loss": 0.7236, "step": 15244 }, { "epoch": 12.651452282157676, "grad_norm": 22.90679931640625, "learning_rate": 1.494240663900415e-05, "loss": 0.6653, "step": 15245 }, { "epoch": 12.652282157676348, "grad_norm": 26.35418128967285, "learning_rate": 1.4942074688796682e-05, "loss": 0.4065, "step": 15246 }, { "epoch": 12.65311203319502, "grad_norm": 27.541595458984375, "learning_rate": 1.4941742738589212e-05, "loss": 1.0552, "step": 15247 }, { "epoch": 12.653941908713692, "grad_norm": 41.28142166137695, "learning_rate": 1.4941410788381745e-05, "loss": 0.5753, "step": 15248 }, { "epoch": 12.654771784232365, "grad_norm": 33.7426643371582, "learning_rate": 1.4941078838174275e-05, "loss": 0.9839, "step": 15249 }, { "epoch": 12.655601659751037, "grad_norm": 45.56111145019531, "learning_rate": 1.4940746887966805e-05, "loss": 1.4866, "step": 15250 }, { "epoch": 12.656431535269709, "grad_norm": 33.76953887939453, "learning_rate": 1.4940414937759338e-05, "loss": 0.9766, "step": 15251 }, { "epoch": 12.657261410788381, "grad_norm": 50.76598358154297, "learning_rate": 1.494008298755187e-05, "loss": 2.14, "step": 15252 }, { "epoch": 12.658091286307053, "grad_norm": 33.406620025634766, "learning_rate": 1.4939751037344398e-05, "loss": 1.168, "step": 15253 }, { "epoch": 12.658921161825726, "grad_norm": 35.05739974975586, "learning_rate": 1.493941908713693e-05, "loss": 1.0318, "step": 15254 }, { "epoch": 12.659751037344398, "grad_norm": 52.96551513671875, "learning_rate": 1.4939087136929463e-05, "loss": 1.4114, "step": 15255 }, { "epoch": 12.66058091286307, "grad_norm": 41.5356559753418, "learning_rate": 1.4938755186721993e-05, "loss": 0.5934, "step": 15256 }, { "epoch": 12.661410788381742, "grad_norm": 32.43939971923828, "learning_rate": 1.4938423236514523e-05, "loss": 1.0981, "step": 15257 }, { "epoch": 12.662240663900414, "grad_norm": 27.277841567993164, "learning_rate": 1.4938091286307055e-05, "loss": 0.7504, "step": 15258 }, { "epoch": 12.663070539419087, "grad_norm": 16.823450088500977, "learning_rate": 1.4937759336099586e-05, "loss": 0.5762, "step": 15259 }, { "epoch": 12.663900414937759, "grad_norm": 22.62451934814453, "learning_rate": 1.4937427385892118e-05, "loss": 1.5948, "step": 15260 }, { "epoch": 12.664730290456431, "grad_norm": 28.798856735229492, "learning_rate": 1.4937095435684648e-05, "loss": 0.3779, "step": 15261 }, { "epoch": 12.665560165975103, "grad_norm": 38.661224365234375, "learning_rate": 1.4936763485477179e-05, "loss": 1.3119, "step": 15262 }, { "epoch": 12.666390041493775, "grad_norm": 44.85926055908203, "learning_rate": 1.4936431535269711e-05, "loss": 1.123, "step": 15263 }, { "epoch": 12.667219917012448, "grad_norm": 39.07621765136719, "learning_rate": 1.4936099585062243e-05, "loss": 1.3956, "step": 15264 }, { "epoch": 12.66804979253112, "grad_norm": 23.38675308227539, "learning_rate": 1.4935767634854772e-05, "loss": 0.6955, "step": 15265 }, { "epoch": 12.668879668049792, "grad_norm": 23.93486785888672, "learning_rate": 1.4935435684647304e-05, "loss": 0.641, "step": 15266 }, { "epoch": 12.669709543568464, "grad_norm": 26.41585350036621, "learning_rate": 1.4935103734439836e-05, "loss": 0.5266, "step": 15267 }, { "epoch": 12.670539419087136, "grad_norm": 32.240623474121094, "learning_rate": 1.4934771784232366e-05, "loss": 0.5473, "step": 15268 }, { "epoch": 12.671369294605809, "grad_norm": 32.05142593383789, "learning_rate": 1.4934439834024897e-05, "loss": 0.7307, "step": 15269 }, { "epoch": 12.67219917012448, "grad_norm": 52.67828369140625, "learning_rate": 1.4934107883817427e-05, "loss": 0.6774, "step": 15270 }, { "epoch": 12.673029045643153, "grad_norm": 42.6026725769043, "learning_rate": 1.493377593360996e-05, "loss": 0.9135, "step": 15271 }, { "epoch": 12.673858921161825, "grad_norm": 45.53245544433594, "learning_rate": 1.4933443983402491e-05, "loss": 1.2212, "step": 15272 }, { "epoch": 12.674688796680497, "grad_norm": 18.955175399780273, "learning_rate": 1.4933112033195024e-05, "loss": 0.5932, "step": 15273 }, { "epoch": 12.67551867219917, "grad_norm": 29.740989685058594, "learning_rate": 1.4932780082987552e-05, "loss": 0.8428, "step": 15274 }, { "epoch": 12.676348547717842, "grad_norm": 44.35927200317383, "learning_rate": 1.4932448132780084e-05, "loss": 0.751, "step": 15275 }, { "epoch": 12.677178423236514, "grad_norm": 34.13978958129883, "learning_rate": 1.4932116182572616e-05, "loss": 0.9535, "step": 15276 }, { "epoch": 12.678008298755186, "grad_norm": 32.80262756347656, "learning_rate": 1.4931784232365147e-05, "loss": 1.3206, "step": 15277 }, { "epoch": 12.678838174273858, "grad_norm": 45.53830337524414, "learning_rate": 1.4931452282157677e-05, "loss": 0.7846, "step": 15278 }, { "epoch": 12.67966804979253, "grad_norm": 29.98546028137207, "learning_rate": 1.4931120331950208e-05, "loss": 1.5456, "step": 15279 }, { "epoch": 12.680497925311203, "grad_norm": 30.926244735717773, "learning_rate": 1.493078838174274e-05, "loss": 0.8708, "step": 15280 }, { "epoch": 12.681327800829875, "grad_norm": 31.284399032592773, "learning_rate": 1.4930456431535272e-05, "loss": 1.5865, "step": 15281 }, { "epoch": 12.682157676348547, "grad_norm": 24.845115661621094, "learning_rate": 1.49301244813278e-05, "loss": 0.5803, "step": 15282 }, { "epoch": 12.68298755186722, "grad_norm": 30.918521881103516, "learning_rate": 1.4929792531120333e-05, "loss": 0.8451, "step": 15283 }, { "epoch": 12.683817427385891, "grad_norm": 27.150609970092773, "learning_rate": 1.4929460580912865e-05, "loss": 0.5129, "step": 15284 }, { "epoch": 12.684647302904564, "grad_norm": 43.706050872802734, "learning_rate": 1.4929128630705397e-05, "loss": 0.7417, "step": 15285 }, { "epoch": 12.685477178423236, "grad_norm": 56.02668380737305, "learning_rate": 1.4928796680497926e-05, "loss": 1.8122, "step": 15286 }, { "epoch": 12.686307053941908, "grad_norm": 23.31096076965332, "learning_rate": 1.4928464730290458e-05, "loss": 0.9342, "step": 15287 }, { "epoch": 12.68713692946058, "grad_norm": 23.542869567871094, "learning_rate": 1.4928132780082988e-05, "loss": 0.6778, "step": 15288 }, { "epoch": 12.687966804979252, "grad_norm": 20.24341583251953, "learning_rate": 1.492780082987552e-05, "loss": 0.4043, "step": 15289 }, { "epoch": 12.688796680497925, "grad_norm": 44.43815994262695, "learning_rate": 1.492746887966805e-05, "loss": 1.1597, "step": 15290 }, { "epoch": 12.689626556016597, "grad_norm": 63.96629333496094, "learning_rate": 1.4927136929460581e-05, "loss": 0.935, "step": 15291 }, { "epoch": 12.690456431535269, "grad_norm": 33.936805725097656, "learning_rate": 1.4926804979253113e-05, "loss": 1.0614, "step": 15292 }, { "epoch": 12.691286307053941, "grad_norm": 30.70985984802246, "learning_rate": 1.4926473029045645e-05, "loss": 0.6463, "step": 15293 }, { "epoch": 12.692116182572613, "grad_norm": 60.03437423706055, "learning_rate": 1.4926141078838174e-05, "loss": 0.9531, "step": 15294 }, { "epoch": 12.692946058091286, "grad_norm": 46.825679779052734, "learning_rate": 1.4925809128630706e-05, "loss": 1.2687, "step": 15295 }, { "epoch": 12.693775933609958, "grad_norm": 37.4345703125, "learning_rate": 1.4925477178423238e-05, "loss": 1.2235, "step": 15296 }, { "epoch": 12.69460580912863, "grad_norm": 36.06510543823242, "learning_rate": 1.4925145228215769e-05, "loss": 0.862, "step": 15297 }, { "epoch": 12.695435684647302, "grad_norm": 37.251625061035156, "learning_rate": 1.49248132780083e-05, "loss": 0.9786, "step": 15298 }, { "epoch": 12.696265560165974, "grad_norm": 29.040376663208008, "learning_rate": 1.4924481327800831e-05, "loss": 0.8295, "step": 15299 }, { "epoch": 12.697095435684647, "grad_norm": 29.17964744567871, "learning_rate": 1.4924149377593362e-05, "loss": 0.5399, "step": 15300 }, { "epoch": 12.697925311203319, "grad_norm": 46.64225387573242, "learning_rate": 1.4923817427385894e-05, "loss": 1.8832, "step": 15301 }, { "epoch": 12.698755186721991, "grad_norm": 31.82694435119629, "learning_rate": 1.4923485477178426e-05, "loss": 1.0958, "step": 15302 }, { "epoch": 12.699585062240663, "grad_norm": 34.986751556396484, "learning_rate": 1.4923153526970955e-05, "loss": 0.6357, "step": 15303 }, { "epoch": 12.700414937759335, "grad_norm": 21.495054244995117, "learning_rate": 1.4922821576763487e-05, "loss": 0.5509, "step": 15304 }, { "epoch": 12.701244813278008, "grad_norm": 31.990671157836914, "learning_rate": 1.4922489626556019e-05, "loss": 1.2899, "step": 15305 }, { "epoch": 12.70207468879668, "grad_norm": 31.179319381713867, "learning_rate": 1.492215767634855e-05, "loss": 0.7027, "step": 15306 }, { "epoch": 12.702904564315352, "grad_norm": 51.93061447143555, "learning_rate": 1.492182572614108e-05, "loss": 1.7355, "step": 15307 }, { "epoch": 12.703734439834024, "grad_norm": 26.00179672241211, "learning_rate": 1.4921493775933612e-05, "loss": 0.8982, "step": 15308 }, { "epoch": 12.704564315352696, "grad_norm": 27.452106475830078, "learning_rate": 1.4921161825726142e-05, "loss": 0.723, "step": 15309 }, { "epoch": 12.705394190871369, "grad_norm": 35.16644287109375, "learning_rate": 1.4920829875518674e-05, "loss": 0.8515, "step": 15310 }, { "epoch": 12.70622406639004, "grad_norm": 19.957664489746094, "learning_rate": 1.4920497925311203e-05, "loss": 0.371, "step": 15311 }, { "epoch": 12.707053941908713, "grad_norm": 42.2035026550293, "learning_rate": 1.4920165975103735e-05, "loss": 1.0006, "step": 15312 }, { "epoch": 12.707883817427385, "grad_norm": 95.5361328125, "learning_rate": 1.4919834024896267e-05, "loss": 1.082, "step": 15313 }, { "epoch": 12.708713692946057, "grad_norm": 44.90713119506836, "learning_rate": 1.49195020746888e-05, "loss": 0.7709, "step": 15314 }, { "epoch": 12.70954356846473, "grad_norm": 22.301538467407227, "learning_rate": 1.4919170124481328e-05, "loss": 0.6179, "step": 15315 }, { "epoch": 12.710373443983402, "grad_norm": 23.1517276763916, "learning_rate": 1.491883817427386e-05, "loss": 0.8603, "step": 15316 }, { "epoch": 12.711203319502074, "grad_norm": 71.4620132446289, "learning_rate": 1.491850622406639e-05, "loss": 1.0354, "step": 15317 }, { "epoch": 12.712033195020746, "grad_norm": 23.878164291381836, "learning_rate": 1.4918174273858923e-05, "loss": 0.9012, "step": 15318 }, { "epoch": 12.712863070539418, "grad_norm": 29.848052978515625, "learning_rate": 1.4917842323651453e-05, "loss": 0.8912, "step": 15319 }, { "epoch": 12.71369294605809, "grad_norm": 34.56328201293945, "learning_rate": 1.4917510373443983e-05, "loss": 0.9785, "step": 15320 }, { "epoch": 12.714522821576763, "grad_norm": 39.90723419189453, "learning_rate": 1.4917178423236516e-05, "loss": 1.1501, "step": 15321 }, { "epoch": 12.715352697095435, "grad_norm": 34.13270568847656, "learning_rate": 1.4916846473029048e-05, "loss": 0.9774, "step": 15322 }, { "epoch": 12.716182572614107, "grad_norm": 37.069210052490234, "learning_rate": 1.491651452282158e-05, "loss": 0.7021, "step": 15323 }, { "epoch": 12.71701244813278, "grad_norm": 109.73320007324219, "learning_rate": 1.4916182572614109e-05, "loss": 0.6664, "step": 15324 }, { "epoch": 12.717842323651452, "grad_norm": 25.029142379760742, "learning_rate": 1.491585062240664e-05, "loss": 0.4557, "step": 15325 }, { "epoch": 12.718672199170124, "grad_norm": 32.07505798339844, "learning_rate": 1.4915518672199171e-05, "loss": 1.1064, "step": 15326 }, { "epoch": 12.719502074688796, "grad_norm": 41.24100875854492, "learning_rate": 1.4915186721991703e-05, "loss": 1.2037, "step": 15327 }, { "epoch": 12.720331950207468, "grad_norm": 23.121212005615234, "learning_rate": 1.4914854771784234e-05, "loss": 0.5266, "step": 15328 }, { "epoch": 12.72116182572614, "grad_norm": 62.98186492919922, "learning_rate": 1.4914522821576764e-05, "loss": 1.837, "step": 15329 }, { "epoch": 12.721991701244812, "grad_norm": 22.696157455444336, "learning_rate": 1.4914190871369296e-05, "loss": 1.0017, "step": 15330 }, { "epoch": 12.722821576763485, "grad_norm": 25.911542892456055, "learning_rate": 1.4913858921161828e-05, "loss": 0.6646, "step": 15331 }, { "epoch": 12.723651452282157, "grad_norm": 37.4542121887207, "learning_rate": 1.4913526970954357e-05, "loss": 0.973, "step": 15332 }, { "epoch": 12.724481327800829, "grad_norm": 29.205652236938477, "learning_rate": 1.4913195020746889e-05, "loss": 1.0103, "step": 15333 }, { "epoch": 12.725311203319501, "grad_norm": 49.15050506591797, "learning_rate": 1.4912863070539421e-05, "loss": 1.5002, "step": 15334 }, { "epoch": 12.726141078838173, "grad_norm": 41.87044906616211, "learning_rate": 1.4912531120331952e-05, "loss": 1.1261, "step": 15335 }, { "epoch": 12.726970954356846, "grad_norm": 38.681217193603516, "learning_rate": 1.4912199170124482e-05, "loss": 1.0027, "step": 15336 }, { "epoch": 12.727800829875518, "grad_norm": 21.232376098632812, "learning_rate": 1.4911867219917014e-05, "loss": 0.5449, "step": 15337 }, { "epoch": 12.72863070539419, "grad_norm": 23.68186378479004, "learning_rate": 1.4911535269709544e-05, "loss": 0.4521, "step": 15338 }, { "epoch": 12.729460580912862, "grad_norm": 70.46726989746094, "learning_rate": 1.4911203319502077e-05, "loss": 0.5659, "step": 15339 }, { "epoch": 12.730290456431534, "grad_norm": 34.032249450683594, "learning_rate": 1.4910871369294605e-05, "loss": 1.075, "step": 15340 }, { "epoch": 12.731120331950207, "grad_norm": 34.10642623901367, "learning_rate": 1.4910539419087137e-05, "loss": 0.5875, "step": 15341 }, { "epoch": 12.731950207468879, "grad_norm": 32.04414749145508, "learning_rate": 1.491020746887967e-05, "loss": 1.1622, "step": 15342 }, { "epoch": 12.732780082987551, "grad_norm": 53.85970687866211, "learning_rate": 1.4909875518672202e-05, "loss": 1.2529, "step": 15343 }, { "epoch": 12.733609958506223, "grad_norm": 41.64252853393555, "learning_rate": 1.490954356846473e-05, "loss": 1.041, "step": 15344 }, { "epoch": 12.734439834024897, "grad_norm": 34.48173522949219, "learning_rate": 1.4909211618257262e-05, "loss": 1.0085, "step": 15345 }, { "epoch": 12.73526970954357, "grad_norm": 54.40801239013672, "learning_rate": 1.4908879668049795e-05, "loss": 1.2621, "step": 15346 }, { "epoch": 12.736099585062242, "grad_norm": 45.72206115722656, "learning_rate": 1.4908547717842325e-05, "loss": 1.0614, "step": 15347 }, { "epoch": 12.736929460580914, "grad_norm": 32.77067184448242, "learning_rate": 1.4908215767634855e-05, "loss": 1.1331, "step": 15348 }, { "epoch": 12.737759336099586, "grad_norm": 47.91493225097656, "learning_rate": 1.4907883817427386e-05, "loss": 1.3012, "step": 15349 }, { "epoch": 12.738589211618258, "grad_norm": 39.7322883605957, "learning_rate": 1.4907551867219918e-05, "loss": 0.8989, "step": 15350 }, { "epoch": 12.73941908713693, "grad_norm": 35.37910842895508, "learning_rate": 1.490721991701245e-05, "loss": 1.0492, "step": 15351 }, { "epoch": 12.740248962655603, "grad_norm": 24.1204833984375, "learning_rate": 1.4906887966804982e-05, "loss": 0.4982, "step": 15352 }, { "epoch": 12.741078838174275, "grad_norm": 26.868532180786133, "learning_rate": 1.490655601659751e-05, "loss": 1.244, "step": 15353 }, { "epoch": 12.741908713692947, "grad_norm": 31.76620864868164, "learning_rate": 1.4906224066390043e-05, "loss": 0.8021, "step": 15354 }, { "epoch": 12.74273858921162, "grad_norm": 22.645681381225586, "learning_rate": 1.4905892116182575e-05, "loss": 0.6558, "step": 15355 }, { "epoch": 12.743568464730291, "grad_norm": 30.550086975097656, "learning_rate": 1.4905560165975105e-05, "loss": 1.0553, "step": 15356 }, { "epoch": 12.744398340248964, "grad_norm": 26.523033142089844, "learning_rate": 1.4905228215767636e-05, "loss": 1.0019, "step": 15357 }, { "epoch": 12.745228215767636, "grad_norm": 44.018775939941406, "learning_rate": 1.4904896265560166e-05, "loss": 0.7385, "step": 15358 }, { "epoch": 12.746058091286308, "grad_norm": 28.15435218811035, "learning_rate": 1.4904564315352698e-05, "loss": 0.7463, "step": 15359 }, { "epoch": 12.74688796680498, "grad_norm": 21.91577911376953, "learning_rate": 1.490423236514523e-05, "loss": 1.0565, "step": 15360 }, { "epoch": 12.747717842323652, "grad_norm": 53.205169677734375, "learning_rate": 1.490390041493776e-05, "loss": 1.0416, "step": 15361 }, { "epoch": 12.748547717842325, "grad_norm": 24.987014770507812, "learning_rate": 1.4903568464730291e-05, "loss": 0.7054, "step": 15362 }, { "epoch": 12.749377593360997, "grad_norm": 21.0860652923584, "learning_rate": 1.4903236514522823e-05, "loss": 0.6781, "step": 15363 }, { "epoch": 12.750207468879669, "grad_norm": 41.02686309814453, "learning_rate": 1.4902904564315354e-05, "loss": 0.8098, "step": 15364 }, { "epoch": 12.751037344398341, "grad_norm": 25.105104446411133, "learning_rate": 1.4902572614107884e-05, "loss": 1.0527, "step": 15365 }, { "epoch": 12.751867219917013, "grad_norm": 29.125877380371094, "learning_rate": 1.4902240663900416e-05, "loss": 0.957, "step": 15366 }, { "epoch": 12.752697095435686, "grad_norm": 16.11611557006836, "learning_rate": 1.4901908713692947e-05, "loss": 0.419, "step": 15367 }, { "epoch": 12.753526970954358, "grad_norm": 22.294431686401367, "learning_rate": 1.4901576763485479e-05, "loss": 0.481, "step": 15368 }, { "epoch": 12.75435684647303, "grad_norm": 38.77490997314453, "learning_rate": 1.490124481327801e-05, "loss": 1.1272, "step": 15369 }, { "epoch": 12.755186721991702, "grad_norm": 44.20836639404297, "learning_rate": 1.490091286307054e-05, "loss": 0.7136, "step": 15370 }, { "epoch": 12.756016597510374, "grad_norm": 42.42164611816406, "learning_rate": 1.4900580912863072e-05, "loss": 1.0992, "step": 15371 }, { "epoch": 12.756846473029047, "grad_norm": 50.8177375793457, "learning_rate": 1.4900248962655604e-05, "loss": 0.7398, "step": 15372 }, { "epoch": 12.757676348547719, "grad_norm": 54.0120849609375, "learning_rate": 1.4899917012448133e-05, "loss": 0.9373, "step": 15373 }, { "epoch": 12.758506224066391, "grad_norm": 61.6114387512207, "learning_rate": 1.4899585062240665e-05, "loss": 0.7095, "step": 15374 }, { "epoch": 12.759336099585063, "grad_norm": 35.578514099121094, "learning_rate": 1.4899253112033197e-05, "loss": 1.2425, "step": 15375 }, { "epoch": 12.760165975103735, "grad_norm": 28.934206008911133, "learning_rate": 1.4898921161825727e-05, "loss": 0.6136, "step": 15376 }, { "epoch": 12.760995850622407, "grad_norm": 27.697124481201172, "learning_rate": 1.489858921161826e-05, "loss": 0.7052, "step": 15377 }, { "epoch": 12.76182572614108, "grad_norm": 28.52094268798828, "learning_rate": 1.489825726141079e-05, "loss": 1.0877, "step": 15378 }, { "epoch": 12.762655601659752, "grad_norm": 14.29814624786377, "learning_rate": 1.489792531120332e-05, "loss": 0.6197, "step": 15379 }, { "epoch": 12.763485477178424, "grad_norm": 34.584041595458984, "learning_rate": 1.4897593360995852e-05, "loss": 1.2627, "step": 15380 }, { "epoch": 12.764315352697096, "grad_norm": 25.271400451660156, "learning_rate": 1.4897261410788384e-05, "loss": 0.6791, "step": 15381 }, { "epoch": 12.765145228215768, "grad_norm": 25.28502655029297, "learning_rate": 1.4896929460580913e-05, "loss": 0.4973, "step": 15382 }, { "epoch": 12.76597510373444, "grad_norm": 31.23248291015625, "learning_rate": 1.4896597510373445e-05, "loss": 1.2685, "step": 15383 }, { "epoch": 12.766804979253113, "grad_norm": 65.4446792602539, "learning_rate": 1.4896265560165977e-05, "loss": 1.1391, "step": 15384 }, { "epoch": 12.767634854771785, "grad_norm": 62.75609588623047, "learning_rate": 1.4895933609958508e-05, "loss": 1.3675, "step": 15385 }, { "epoch": 12.768464730290457, "grad_norm": 18.092430114746094, "learning_rate": 1.4895601659751038e-05, "loss": 0.3942, "step": 15386 }, { "epoch": 12.76929460580913, "grad_norm": 46.90262222290039, "learning_rate": 1.4895269709543569e-05, "loss": 0.5734, "step": 15387 }, { "epoch": 12.770124481327802, "grad_norm": 55.38947677612305, "learning_rate": 1.48949377593361e-05, "loss": 1.1915, "step": 15388 }, { "epoch": 12.770954356846474, "grad_norm": 48.98143005371094, "learning_rate": 1.4894605809128633e-05, "loss": 0.815, "step": 15389 }, { "epoch": 12.771784232365146, "grad_norm": 47.3117790222168, "learning_rate": 1.4894273858921162e-05, "loss": 1.2834, "step": 15390 }, { "epoch": 12.772614107883818, "grad_norm": 51.496795654296875, "learning_rate": 1.4893941908713694e-05, "loss": 0.9707, "step": 15391 }, { "epoch": 12.77344398340249, "grad_norm": 38.41409683227539, "learning_rate": 1.4893609958506226e-05, "loss": 0.5077, "step": 15392 }, { "epoch": 12.774273858921163, "grad_norm": 82.60981750488281, "learning_rate": 1.4893278008298758e-05, "loss": 0.8256, "step": 15393 }, { "epoch": 12.775103734439835, "grad_norm": 33.71500778198242, "learning_rate": 1.4892946058091287e-05, "loss": 1.1572, "step": 15394 }, { "epoch": 12.775933609958507, "grad_norm": 31.98659896850586, "learning_rate": 1.4892614107883819e-05, "loss": 0.5248, "step": 15395 }, { "epoch": 12.77676348547718, "grad_norm": 79.34061431884766, "learning_rate": 1.4892282157676349e-05, "loss": 0.9727, "step": 15396 }, { "epoch": 12.777593360995851, "grad_norm": 32.93106460571289, "learning_rate": 1.4891950207468881e-05, "loss": 1.0583, "step": 15397 }, { "epoch": 12.778423236514524, "grad_norm": 22.991249084472656, "learning_rate": 1.4891618257261412e-05, "loss": 0.562, "step": 15398 }, { "epoch": 12.779253112033196, "grad_norm": 49.32659912109375, "learning_rate": 1.4891286307053942e-05, "loss": 0.7223, "step": 15399 }, { "epoch": 12.780082987551868, "grad_norm": 26.715444564819336, "learning_rate": 1.4890954356846474e-05, "loss": 0.7132, "step": 15400 }, { "epoch": 12.78091286307054, "grad_norm": 20.281877517700195, "learning_rate": 1.4890622406639006e-05, "loss": 0.4537, "step": 15401 }, { "epoch": 12.781742738589212, "grad_norm": 49.707115173339844, "learning_rate": 1.4890290456431538e-05, "loss": 0.6947, "step": 15402 }, { "epoch": 12.782572614107885, "grad_norm": 41.786033630371094, "learning_rate": 1.4889958506224067e-05, "loss": 0.6804, "step": 15403 }, { "epoch": 12.783402489626557, "grad_norm": 22.090839385986328, "learning_rate": 1.48896265560166e-05, "loss": 0.5878, "step": 15404 }, { "epoch": 12.784232365145229, "grad_norm": 32.42010498046875, "learning_rate": 1.488929460580913e-05, "loss": 0.9028, "step": 15405 }, { "epoch": 12.785062240663901, "grad_norm": 43.59713363647461, "learning_rate": 1.4888962655601662e-05, "loss": 0.7037, "step": 15406 }, { "epoch": 12.785892116182573, "grad_norm": 27.106334686279297, "learning_rate": 1.4888630705394192e-05, "loss": 0.3234, "step": 15407 }, { "epoch": 12.786721991701246, "grad_norm": 37.96315383911133, "learning_rate": 1.4888298755186723e-05, "loss": 0.4364, "step": 15408 }, { "epoch": 12.787551867219918, "grad_norm": 49.95423126220703, "learning_rate": 1.4887966804979255e-05, "loss": 1.6811, "step": 15409 }, { "epoch": 12.78838174273859, "grad_norm": 26.401514053344727, "learning_rate": 1.4887634854771787e-05, "loss": 0.8215, "step": 15410 }, { "epoch": 12.789211618257262, "grad_norm": 63.60087585449219, "learning_rate": 1.4887302904564315e-05, "loss": 1.1902, "step": 15411 }, { "epoch": 12.790041493775934, "grad_norm": 34.29484939575195, "learning_rate": 1.4886970954356848e-05, "loss": 0.6581, "step": 15412 }, { "epoch": 12.790871369294607, "grad_norm": 25.982656478881836, "learning_rate": 1.488663900414938e-05, "loss": 0.6439, "step": 15413 }, { "epoch": 12.791701244813279, "grad_norm": 29.153532028198242, "learning_rate": 1.488630705394191e-05, "loss": 1.3375, "step": 15414 }, { "epoch": 12.792531120331951, "grad_norm": 34.97621536254883, "learning_rate": 1.488597510373444e-05, "loss": 0.8667, "step": 15415 }, { "epoch": 12.793360995850623, "grad_norm": 28.728233337402344, "learning_rate": 1.4885643153526973e-05, "loss": 0.8034, "step": 15416 }, { "epoch": 12.794190871369295, "grad_norm": 43.21095657348633, "learning_rate": 1.4885311203319503e-05, "loss": 0.5871, "step": 15417 }, { "epoch": 12.795020746887968, "grad_norm": 55.46143341064453, "learning_rate": 1.4884979253112035e-05, "loss": 1.2312, "step": 15418 }, { "epoch": 12.79585062240664, "grad_norm": 26.777057647705078, "learning_rate": 1.4884647302904564e-05, "loss": 1.0182, "step": 15419 }, { "epoch": 12.796680497925312, "grad_norm": 21.602052688598633, "learning_rate": 1.4884315352697096e-05, "loss": 0.4573, "step": 15420 }, { "epoch": 12.797510373443984, "grad_norm": 118.5970687866211, "learning_rate": 1.4883983402489628e-05, "loss": 1.0711, "step": 15421 }, { "epoch": 12.798340248962656, "grad_norm": 30.0068302154541, "learning_rate": 1.488365145228216e-05, "loss": 0.7821, "step": 15422 }, { "epoch": 12.799170124481329, "grad_norm": 56.430519104003906, "learning_rate": 1.4883319502074689e-05, "loss": 0.7074, "step": 15423 }, { "epoch": 12.8, "grad_norm": 35.837486267089844, "learning_rate": 1.4882987551867221e-05, "loss": 1.1469, "step": 15424 }, { "epoch": 12.800829875518673, "grad_norm": 49.5520133972168, "learning_rate": 1.4882655601659753e-05, "loss": 0.8014, "step": 15425 }, { "epoch": 12.801659751037345, "grad_norm": 24.578323364257812, "learning_rate": 1.4882323651452284e-05, "loss": 0.3724, "step": 15426 }, { "epoch": 12.802489626556017, "grad_norm": 28.590057373046875, "learning_rate": 1.4881991701244814e-05, "loss": 0.8129, "step": 15427 }, { "epoch": 12.80331950207469, "grad_norm": 51.02524185180664, "learning_rate": 1.4881659751037344e-05, "loss": 1.0162, "step": 15428 }, { "epoch": 12.804149377593362, "grad_norm": 38.8724365234375, "learning_rate": 1.4881327800829876e-05, "loss": 1.1776, "step": 15429 }, { "epoch": 12.804979253112034, "grad_norm": 25.70915412902832, "learning_rate": 1.4880995850622409e-05, "loss": 0.8363, "step": 15430 }, { "epoch": 12.805809128630706, "grad_norm": 49.7345085144043, "learning_rate": 1.488066390041494e-05, "loss": 1.5127, "step": 15431 }, { "epoch": 12.806639004149378, "grad_norm": 16.2484188079834, "learning_rate": 1.488033195020747e-05, "loss": 0.4604, "step": 15432 }, { "epoch": 12.80746887966805, "grad_norm": 52.12473678588867, "learning_rate": 1.4880000000000002e-05, "loss": 0.8601, "step": 15433 }, { "epoch": 12.808298755186723, "grad_norm": 25.995243072509766, "learning_rate": 1.4879668049792532e-05, "loss": 1.3216, "step": 15434 }, { "epoch": 12.809128630705395, "grad_norm": 24.742963790893555, "learning_rate": 1.4879336099585064e-05, "loss": 0.9216, "step": 15435 }, { "epoch": 12.809958506224067, "grad_norm": 27.614276885986328, "learning_rate": 1.4879004149377594e-05, "loss": 0.7443, "step": 15436 }, { "epoch": 12.81078838174274, "grad_norm": 38.82601547241211, "learning_rate": 1.4878672199170125e-05, "loss": 1.0907, "step": 15437 }, { "epoch": 12.811618257261411, "grad_norm": 23.084251403808594, "learning_rate": 1.4878340248962657e-05, "loss": 0.4386, "step": 15438 }, { "epoch": 12.812448132780084, "grad_norm": 63.71030807495117, "learning_rate": 1.4878008298755189e-05, "loss": 1.641, "step": 15439 }, { "epoch": 12.813278008298756, "grad_norm": 40.040260314941406, "learning_rate": 1.4877676348547718e-05, "loss": 1.5496, "step": 15440 }, { "epoch": 12.814107883817428, "grad_norm": 45.915775299072266, "learning_rate": 1.487734439834025e-05, "loss": 1.1052, "step": 15441 }, { "epoch": 12.8149377593361, "grad_norm": 21.85254669189453, "learning_rate": 1.4877012448132782e-05, "loss": 0.7135, "step": 15442 }, { "epoch": 12.815767634854772, "grad_norm": 34.93099594116211, "learning_rate": 1.4876680497925312e-05, "loss": 0.571, "step": 15443 }, { "epoch": 12.816597510373445, "grad_norm": 27.325544357299805, "learning_rate": 1.4876348547717843e-05, "loss": 0.754, "step": 15444 }, { "epoch": 12.817427385892117, "grad_norm": 18.613027572631836, "learning_rate": 1.4876016597510375e-05, "loss": 0.5604, "step": 15445 }, { "epoch": 12.818257261410789, "grad_norm": 61.6496696472168, "learning_rate": 1.4875684647302905e-05, "loss": 1.7336, "step": 15446 }, { "epoch": 12.819087136929461, "grad_norm": 24.50905990600586, "learning_rate": 1.4875352697095437e-05, "loss": 0.6567, "step": 15447 }, { "epoch": 12.819917012448133, "grad_norm": 27.837888717651367, "learning_rate": 1.4875020746887966e-05, "loss": 1.0155, "step": 15448 }, { "epoch": 12.820746887966806, "grad_norm": 30.57039451599121, "learning_rate": 1.4874688796680498e-05, "loss": 0.5821, "step": 15449 }, { "epoch": 12.821576763485478, "grad_norm": 26.427597045898438, "learning_rate": 1.487435684647303e-05, "loss": 1.0962, "step": 15450 }, { "epoch": 12.82240663900415, "grad_norm": 24.03767967224121, "learning_rate": 1.4874024896265563e-05, "loss": 0.7608, "step": 15451 }, { "epoch": 12.823236514522822, "grad_norm": 23.454431533813477, "learning_rate": 1.4873692946058091e-05, "loss": 1.0176, "step": 15452 }, { "epoch": 12.824066390041494, "grad_norm": 55.500526428222656, "learning_rate": 1.4873360995850623e-05, "loss": 1.1132, "step": 15453 }, { "epoch": 12.824896265560167, "grad_norm": 16.81621551513672, "learning_rate": 1.4873029045643155e-05, "loss": 0.6105, "step": 15454 }, { "epoch": 12.825726141078839, "grad_norm": 38.18855285644531, "learning_rate": 1.4872697095435686e-05, "loss": 1.385, "step": 15455 }, { "epoch": 12.826556016597511, "grad_norm": 29.966007232666016, "learning_rate": 1.4872365145228218e-05, "loss": 0.7508, "step": 15456 }, { "epoch": 12.827385892116183, "grad_norm": 19.848194122314453, "learning_rate": 1.4872033195020747e-05, "loss": 0.9729, "step": 15457 }, { "epoch": 12.828215767634855, "grad_norm": 26.523639678955078, "learning_rate": 1.4871701244813279e-05, "loss": 0.7405, "step": 15458 }, { "epoch": 12.829045643153528, "grad_norm": 24.22442626953125, "learning_rate": 1.4871369294605811e-05, "loss": 0.8625, "step": 15459 }, { "epoch": 12.8298755186722, "grad_norm": 25.968891143798828, "learning_rate": 1.4871037344398343e-05, "loss": 0.7543, "step": 15460 }, { "epoch": 12.830705394190872, "grad_norm": 45.41655731201172, "learning_rate": 1.4870705394190872e-05, "loss": 0.9073, "step": 15461 }, { "epoch": 12.831535269709544, "grad_norm": 29.599855422973633, "learning_rate": 1.4870373443983404e-05, "loss": 0.7717, "step": 15462 }, { "epoch": 12.832365145228216, "grad_norm": 20.255720138549805, "learning_rate": 1.4870041493775936e-05, "loss": 1.0416, "step": 15463 }, { "epoch": 12.833195020746889, "grad_norm": 40.835182189941406, "learning_rate": 1.4869709543568466e-05, "loss": 1.1915, "step": 15464 }, { "epoch": 12.83402489626556, "grad_norm": 19.832714080810547, "learning_rate": 1.4869377593360997e-05, "loss": 0.7373, "step": 15465 }, { "epoch": 12.834854771784233, "grad_norm": 84.82331085205078, "learning_rate": 1.4869045643153527e-05, "loss": 1.8938, "step": 15466 }, { "epoch": 12.835684647302905, "grad_norm": 46.90367126464844, "learning_rate": 1.486871369294606e-05, "loss": 0.9646, "step": 15467 }, { "epoch": 12.836514522821577, "grad_norm": 37.25657272338867, "learning_rate": 1.4868381742738591e-05, "loss": 0.9871, "step": 15468 }, { "epoch": 12.83734439834025, "grad_norm": 29.210512161254883, "learning_rate": 1.486804979253112e-05, "loss": 0.723, "step": 15469 }, { "epoch": 12.838174273858922, "grad_norm": 36.08564758300781, "learning_rate": 1.4867717842323652e-05, "loss": 0.8957, "step": 15470 }, { "epoch": 12.839004149377594, "grad_norm": 94.04277038574219, "learning_rate": 1.4867385892116184e-05, "loss": 1.1701, "step": 15471 }, { "epoch": 12.839834024896266, "grad_norm": 43.843936920166016, "learning_rate": 1.4867053941908716e-05, "loss": 0.8454, "step": 15472 }, { "epoch": 12.840663900414938, "grad_norm": 124.80096435546875, "learning_rate": 1.4866721991701245e-05, "loss": 1.3933, "step": 15473 }, { "epoch": 12.84149377593361, "grad_norm": 27.3668212890625, "learning_rate": 1.4866390041493777e-05, "loss": 1.4558, "step": 15474 }, { "epoch": 12.842323651452283, "grad_norm": 29.902612686157227, "learning_rate": 1.4866058091286308e-05, "loss": 0.5513, "step": 15475 }, { "epoch": 12.843153526970955, "grad_norm": 20.179624557495117, "learning_rate": 1.486572614107884e-05, "loss": 0.8495, "step": 15476 }, { "epoch": 12.843983402489627, "grad_norm": 31.606733322143555, "learning_rate": 1.486539419087137e-05, "loss": 0.5969, "step": 15477 }, { "epoch": 12.8448132780083, "grad_norm": 45.611549377441406, "learning_rate": 1.48650622406639e-05, "loss": 1.5023, "step": 15478 }, { "epoch": 12.845643153526972, "grad_norm": 35.79363250732422, "learning_rate": 1.4864730290456433e-05, "loss": 1.3252, "step": 15479 }, { "epoch": 12.846473029045644, "grad_norm": 24.865009307861328, "learning_rate": 1.4864398340248965e-05, "loss": 0.6576, "step": 15480 }, { "epoch": 12.847302904564316, "grad_norm": 32.11238479614258, "learning_rate": 1.4864066390041495e-05, "loss": 0.6948, "step": 15481 }, { "epoch": 12.848132780082988, "grad_norm": 44.64011764526367, "learning_rate": 1.4863734439834026e-05, "loss": 0.6507, "step": 15482 }, { "epoch": 12.84896265560166, "grad_norm": 63.721405029296875, "learning_rate": 1.4863402489626558e-05, "loss": 0.7836, "step": 15483 }, { "epoch": 12.849792531120332, "grad_norm": 71.98312377929688, "learning_rate": 1.4863070539419088e-05, "loss": 1.0956, "step": 15484 }, { "epoch": 12.850622406639005, "grad_norm": 57.661537170410156, "learning_rate": 1.486273858921162e-05, "loss": 0.9012, "step": 15485 }, { "epoch": 12.851452282157677, "grad_norm": 36.102806091308594, "learning_rate": 1.486240663900415e-05, "loss": 0.8345, "step": 15486 }, { "epoch": 12.852282157676349, "grad_norm": 41.868492126464844, "learning_rate": 1.4862074688796681e-05, "loss": 0.9457, "step": 15487 }, { "epoch": 12.853112033195021, "grad_norm": 19.13906478881836, "learning_rate": 1.4861742738589213e-05, "loss": 0.5181, "step": 15488 }, { "epoch": 12.853941908713693, "grad_norm": 32.5592041015625, "learning_rate": 1.4861410788381745e-05, "loss": 0.9501, "step": 15489 }, { "epoch": 12.854771784232366, "grad_norm": 31.398103713989258, "learning_rate": 1.4861078838174274e-05, "loss": 0.8664, "step": 15490 }, { "epoch": 12.855601659751038, "grad_norm": 14.809691429138184, "learning_rate": 1.4860746887966806e-05, "loss": 0.4022, "step": 15491 }, { "epoch": 12.85643153526971, "grad_norm": 65.28579711914062, "learning_rate": 1.4860414937759338e-05, "loss": 1.1833, "step": 15492 }, { "epoch": 12.857261410788382, "grad_norm": 27.936939239501953, "learning_rate": 1.4860082987551869e-05, "loss": 0.9918, "step": 15493 }, { "epoch": 12.858091286307054, "grad_norm": 37.17484664916992, "learning_rate": 1.4859751037344399e-05, "loss": 1.3609, "step": 15494 }, { "epoch": 12.858921161825727, "grad_norm": 26.216218948364258, "learning_rate": 1.4859419087136931e-05, "loss": 0.4675, "step": 15495 }, { "epoch": 12.859751037344399, "grad_norm": 21.781858444213867, "learning_rate": 1.4859087136929462e-05, "loss": 0.4153, "step": 15496 }, { "epoch": 12.860580912863071, "grad_norm": 23.874937057495117, "learning_rate": 1.4858755186721994e-05, "loss": 0.7875, "step": 15497 }, { "epoch": 12.861410788381743, "grad_norm": 42.452247619628906, "learning_rate": 1.4858423236514522e-05, "loss": 0.856, "step": 15498 }, { "epoch": 12.862240663900415, "grad_norm": 48.08595275878906, "learning_rate": 1.4858091286307055e-05, "loss": 1.2688, "step": 15499 }, { "epoch": 12.863070539419088, "grad_norm": 30.648906707763672, "learning_rate": 1.4857759336099587e-05, "loss": 0.8612, "step": 15500 }, { "epoch": 12.86390041493776, "grad_norm": 51.93117904663086, "learning_rate": 1.4857427385892119e-05, "loss": 0.5601, "step": 15501 }, { "epoch": 12.864730290456432, "grad_norm": 13.996971130371094, "learning_rate": 1.4857095435684647e-05, "loss": 0.4408, "step": 15502 }, { "epoch": 12.865560165975104, "grad_norm": 34.05885696411133, "learning_rate": 1.485676348547718e-05, "loss": 1.3327, "step": 15503 }, { "epoch": 12.866390041493776, "grad_norm": 43.30253982543945, "learning_rate": 1.485643153526971e-05, "loss": 1.1073, "step": 15504 }, { "epoch": 12.867219917012449, "grad_norm": 18.580202102661133, "learning_rate": 1.4856099585062242e-05, "loss": 0.5301, "step": 15505 }, { "epoch": 12.86804979253112, "grad_norm": 21.507247924804688, "learning_rate": 1.4855767634854773e-05, "loss": 0.7072, "step": 15506 }, { "epoch": 12.868879668049793, "grad_norm": 47.07130432128906, "learning_rate": 1.4855435684647303e-05, "loss": 1.7623, "step": 15507 }, { "epoch": 12.869709543568465, "grad_norm": 33.833194732666016, "learning_rate": 1.4855103734439835e-05, "loss": 0.8456, "step": 15508 }, { "epoch": 12.870539419087137, "grad_norm": 32.792091369628906, "learning_rate": 1.4854771784232367e-05, "loss": 0.7788, "step": 15509 }, { "epoch": 12.87136929460581, "grad_norm": 29.324705123901367, "learning_rate": 1.48544398340249e-05, "loss": 0.6069, "step": 15510 }, { "epoch": 12.872199170124482, "grad_norm": 48.980037689208984, "learning_rate": 1.4854107883817428e-05, "loss": 1.0822, "step": 15511 }, { "epoch": 12.873029045643154, "grad_norm": 55.74541091918945, "learning_rate": 1.485377593360996e-05, "loss": 1.4756, "step": 15512 }, { "epoch": 12.873858921161826, "grad_norm": 42.616600036621094, "learning_rate": 1.485344398340249e-05, "loss": 0.4751, "step": 15513 }, { "epoch": 12.874688796680498, "grad_norm": 33.98905944824219, "learning_rate": 1.4853112033195023e-05, "loss": 0.6125, "step": 15514 }, { "epoch": 12.87551867219917, "grad_norm": 48.76573944091797, "learning_rate": 1.4852780082987553e-05, "loss": 0.8826, "step": 15515 }, { "epoch": 12.876348547717843, "grad_norm": 66.65974426269531, "learning_rate": 1.4852448132780083e-05, "loss": 1.4926, "step": 15516 }, { "epoch": 12.877178423236515, "grad_norm": 28.364118576049805, "learning_rate": 1.4852116182572616e-05, "loss": 0.8629, "step": 15517 }, { "epoch": 12.878008298755187, "grad_norm": 37.37667465209961, "learning_rate": 1.4851784232365148e-05, "loss": 0.7282, "step": 15518 }, { "epoch": 12.87883817427386, "grad_norm": 44.71793746948242, "learning_rate": 1.4851452282157676e-05, "loss": 1.2763, "step": 15519 }, { "epoch": 12.879668049792532, "grad_norm": 51.49951934814453, "learning_rate": 1.4851120331950208e-05, "loss": 1.1597, "step": 15520 }, { "epoch": 12.880497925311204, "grad_norm": 22.39841079711914, "learning_rate": 1.485078838174274e-05, "loss": 0.3851, "step": 15521 }, { "epoch": 12.881327800829876, "grad_norm": 35.98171615600586, "learning_rate": 1.4850456431535271e-05, "loss": 1.1426, "step": 15522 }, { "epoch": 12.882157676348548, "grad_norm": 27.181406021118164, "learning_rate": 1.4850124481327801e-05, "loss": 1.1022, "step": 15523 }, { "epoch": 12.88298755186722, "grad_norm": 39.64450454711914, "learning_rate": 1.4849792531120334e-05, "loss": 1.2031, "step": 15524 }, { "epoch": 12.883817427385893, "grad_norm": 39.348594665527344, "learning_rate": 1.4849460580912864e-05, "loss": 0.8598, "step": 15525 }, { "epoch": 12.884647302904565, "grad_norm": 27.39580726623535, "learning_rate": 1.4849128630705396e-05, "loss": 0.7815, "step": 15526 }, { "epoch": 12.885477178423237, "grad_norm": 21.040433883666992, "learning_rate": 1.4848796680497925e-05, "loss": 0.6672, "step": 15527 }, { "epoch": 12.88630705394191, "grad_norm": 15.332378387451172, "learning_rate": 1.4848464730290457e-05, "loss": 0.6399, "step": 15528 }, { "epoch": 12.887136929460581, "grad_norm": 35.685672760009766, "learning_rate": 1.4848132780082989e-05, "loss": 0.6482, "step": 15529 }, { "epoch": 12.887966804979254, "grad_norm": 37.8885612487793, "learning_rate": 1.4847800829875521e-05, "loss": 0.7656, "step": 15530 }, { "epoch": 12.888796680497926, "grad_norm": 18.26302719116211, "learning_rate": 1.484746887966805e-05, "loss": 0.8403, "step": 15531 }, { "epoch": 12.889626556016598, "grad_norm": 31.233503341674805, "learning_rate": 1.4847136929460582e-05, "loss": 0.7505, "step": 15532 }, { "epoch": 12.89045643153527, "grad_norm": 24.043684005737305, "learning_rate": 1.4846804979253114e-05, "loss": 0.7219, "step": 15533 }, { "epoch": 12.891286307053942, "grad_norm": 20.02383041381836, "learning_rate": 1.4846473029045644e-05, "loss": 0.7122, "step": 15534 }, { "epoch": 12.892116182572614, "grad_norm": 27.01434326171875, "learning_rate": 1.4846141078838177e-05, "loss": 1.1206, "step": 15535 }, { "epoch": 12.892946058091287, "grad_norm": 51.01577377319336, "learning_rate": 1.4845809128630705e-05, "loss": 1.4298, "step": 15536 }, { "epoch": 12.893775933609959, "grad_norm": 20.428768157958984, "learning_rate": 1.4845477178423237e-05, "loss": 0.7168, "step": 15537 }, { "epoch": 12.894605809128631, "grad_norm": 25.779396057128906, "learning_rate": 1.484514522821577e-05, "loss": 0.7661, "step": 15538 }, { "epoch": 12.895435684647303, "grad_norm": 44.262901306152344, "learning_rate": 1.4844813278008302e-05, "loss": 0.8826, "step": 15539 }, { "epoch": 12.896265560165975, "grad_norm": 41.44842529296875, "learning_rate": 1.484448132780083e-05, "loss": 1.3751, "step": 15540 }, { "epoch": 12.897095435684648, "grad_norm": 31.93351936340332, "learning_rate": 1.4844149377593362e-05, "loss": 0.8478, "step": 15541 }, { "epoch": 12.89792531120332, "grad_norm": 38.284934997558594, "learning_rate": 1.4843817427385895e-05, "loss": 1.4971, "step": 15542 }, { "epoch": 12.898755186721992, "grad_norm": 27.8789119720459, "learning_rate": 1.4843485477178425e-05, "loss": 0.4705, "step": 15543 }, { "epoch": 12.899585062240664, "grad_norm": 12.93568229675293, "learning_rate": 1.4843153526970955e-05, "loss": 0.3549, "step": 15544 }, { "epoch": 12.900414937759336, "grad_norm": 42.393714904785156, "learning_rate": 1.4842821576763486e-05, "loss": 1.2394, "step": 15545 }, { "epoch": 12.901244813278009, "grad_norm": 21.330467224121094, "learning_rate": 1.4842489626556018e-05, "loss": 0.5242, "step": 15546 }, { "epoch": 12.90207468879668, "grad_norm": 20.35427474975586, "learning_rate": 1.484215767634855e-05, "loss": 1.0608, "step": 15547 }, { "epoch": 12.902904564315353, "grad_norm": 35.05899429321289, "learning_rate": 1.4841825726141079e-05, "loss": 0.8481, "step": 15548 }, { "epoch": 12.903734439834025, "grad_norm": 25.877155303955078, "learning_rate": 1.484149377593361e-05, "loss": 0.968, "step": 15549 }, { "epoch": 12.904564315352697, "grad_norm": 29.39904022216797, "learning_rate": 1.4841161825726143e-05, "loss": 1.2846, "step": 15550 }, { "epoch": 12.90539419087137, "grad_norm": 42.584869384765625, "learning_rate": 1.4840829875518673e-05, "loss": 1.2851, "step": 15551 }, { "epoch": 12.906224066390042, "grad_norm": 35.49138259887695, "learning_rate": 1.4840497925311204e-05, "loss": 1.1369, "step": 15552 }, { "epoch": 12.907053941908714, "grad_norm": 96.30809020996094, "learning_rate": 1.4840165975103736e-05, "loss": 1.4003, "step": 15553 }, { "epoch": 12.907883817427386, "grad_norm": 34.11397171020508, "learning_rate": 1.4839834024896266e-05, "loss": 1.4697, "step": 15554 }, { "epoch": 12.908713692946058, "grad_norm": 30.4688720703125, "learning_rate": 1.4839502074688798e-05, "loss": 0.4524, "step": 15555 }, { "epoch": 12.90954356846473, "grad_norm": 38.2381477355957, "learning_rate": 1.4839170124481329e-05, "loss": 0.6323, "step": 15556 }, { "epoch": 12.910373443983403, "grad_norm": 33.77097702026367, "learning_rate": 1.483883817427386e-05, "loss": 1.2222, "step": 15557 }, { "epoch": 12.911203319502075, "grad_norm": 29.350568771362305, "learning_rate": 1.4838506224066391e-05, "loss": 0.5959, "step": 15558 }, { "epoch": 12.912033195020747, "grad_norm": 21.025131225585938, "learning_rate": 1.4838174273858923e-05, "loss": 0.5459, "step": 15559 }, { "epoch": 12.91286307053942, "grad_norm": 25.723329544067383, "learning_rate": 1.4837842323651454e-05, "loss": 0.5374, "step": 15560 }, { "epoch": 12.913692946058092, "grad_norm": 27.6890811920166, "learning_rate": 1.4837510373443984e-05, "loss": 0.9259, "step": 15561 }, { "epoch": 12.914522821576764, "grad_norm": 39.74543762207031, "learning_rate": 1.4837178423236516e-05, "loss": 1.2256, "step": 15562 }, { "epoch": 12.915352697095436, "grad_norm": 26.10910415649414, "learning_rate": 1.4836846473029047e-05, "loss": 1.0018, "step": 15563 }, { "epoch": 12.916182572614108, "grad_norm": 34.75135040283203, "learning_rate": 1.4836514522821579e-05, "loss": 1.2058, "step": 15564 }, { "epoch": 12.91701244813278, "grad_norm": 36.733177185058594, "learning_rate": 1.4836182572614108e-05, "loss": 0.9905, "step": 15565 }, { "epoch": 12.917842323651453, "grad_norm": 115.03254699707031, "learning_rate": 1.483585062240664e-05, "loss": 1.2436, "step": 15566 }, { "epoch": 12.918672199170125, "grad_norm": 29.34842300415039, "learning_rate": 1.4835518672199172e-05, "loss": 0.7007, "step": 15567 }, { "epoch": 12.919502074688797, "grad_norm": 35.59575271606445, "learning_rate": 1.4835186721991704e-05, "loss": 0.7221, "step": 15568 }, { "epoch": 12.92033195020747, "grad_norm": 64.69251251220703, "learning_rate": 1.4834854771784233e-05, "loss": 1.171, "step": 15569 }, { "epoch": 12.921161825726141, "grad_norm": 29.432662963867188, "learning_rate": 1.4834522821576765e-05, "loss": 0.6912, "step": 15570 }, { "epoch": 12.921991701244814, "grad_norm": 45.093204498291016, "learning_rate": 1.4834190871369297e-05, "loss": 0.6929, "step": 15571 }, { "epoch": 12.922821576763486, "grad_norm": 34.397483825683594, "learning_rate": 1.4833858921161827e-05, "loss": 1.0136, "step": 15572 }, { "epoch": 12.923651452282158, "grad_norm": 46.21805191040039, "learning_rate": 1.4833526970954358e-05, "loss": 0.8932, "step": 15573 }, { "epoch": 12.92448132780083, "grad_norm": 79.54784393310547, "learning_rate": 1.4833195020746888e-05, "loss": 1.0611, "step": 15574 }, { "epoch": 12.925311203319502, "grad_norm": 35.82966613769531, "learning_rate": 1.483286307053942e-05, "loss": 1.6774, "step": 15575 }, { "epoch": 12.926141078838175, "grad_norm": 30.89079475402832, "learning_rate": 1.4832531120331952e-05, "loss": 0.7964, "step": 15576 }, { "epoch": 12.926970954356847, "grad_norm": 34.82743453979492, "learning_rate": 1.4832199170124481e-05, "loss": 0.7783, "step": 15577 }, { "epoch": 12.927800829875519, "grad_norm": 28.88064193725586, "learning_rate": 1.4831867219917013e-05, "loss": 0.6979, "step": 15578 }, { "epoch": 12.928630705394191, "grad_norm": 21.338088989257812, "learning_rate": 1.4831535269709545e-05, "loss": 0.5622, "step": 15579 }, { "epoch": 12.929460580912863, "grad_norm": 28.041109085083008, "learning_rate": 1.4831203319502077e-05, "loss": 0.6754, "step": 15580 }, { "epoch": 12.930290456431536, "grad_norm": 20.479393005371094, "learning_rate": 1.4830871369294606e-05, "loss": 0.2974, "step": 15581 }, { "epoch": 12.931120331950208, "grad_norm": 46.66649627685547, "learning_rate": 1.4830539419087138e-05, "loss": 1.037, "step": 15582 }, { "epoch": 12.93195020746888, "grad_norm": 43.158321380615234, "learning_rate": 1.4830207468879669e-05, "loss": 0.8988, "step": 15583 }, { "epoch": 12.932780082987552, "grad_norm": 34.0733642578125, "learning_rate": 1.48298755186722e-05, "loss": 1.0642, "step": 15584 }, { "epoch": 12.933609958506224, "grad_norm": 55.55599594116211, "learning_rate": 1.4829543568464731e-05, "loss": 1.1954, "step": 15585 }, { "epoch": 12.934439834024896, "grad_norm": 34.93118667602539, "learning_rate": 1.4829211618257262e-05, "loss": 1.0094, "step": 15586 }, { "epoch": 12.935269709543569, "grad_norm": 51.24524688720703, "learning_rate": 1.4828879668049794e-05, "loss": 1.0469, "step": 15587 }, { "epoch": 12.936099585062241, "grad_norm": 32.142513275146484, "learning_rate": 1.4828547717842326e-05, "loss": 1.3173, "step": 15588 }, { "epoch": 12.936929460580913, "grad_norm": 36.3237419128418, "learning_rate": 1.4828215767634858e-05, "loss": 1.3324, "step": 15589 }, { "epoch": 12.937759336099585, "grad_norm": 32.26552963256836, "learning_rate": 1.4827883817427387e-05, "loss": 0.6046, "step": 15590 }, { "epoch": 12.938589211618257, "grad_norm": 29.593372344970703, "learning_rate": 1.4827551867219919e-05, "loss": 0.8816, "step": 15591 }, { "epoch": 12.93941908713693, "grad_norm": 35.98046875, "learning_rate": 1.4827219917012449e-05, "loss": 1.1366, "step": 15592 }, { "epoch": 12.940248962655602, "grad_norm": 59.65305709838867, "learning_rate": 1.4826887966804981e-05, "loss": 0.7386, "step": 15593 }, { "epoch": 12.941078838174274, "grad_norm": 10.80039119720459, "learning_rate": 1.4826556016597512e-05, "loss": 0.2644, "step": 15594 }, { "epoch": 12.941908713692946, "grad_norm": 41.47377395629883, "learning_rate": 1.4826224066390042e-05, "loss": 0.6438, "step": 15595 }, { "epoch": 12.942738589211618, "grad_norm": 63.2038688659668, "learning_rate": 1.4825892116182574e-05, "loss": 0.7263, "step": 15596 }, { "epoch": 12.94356846473029, "grad_norm": 86.54603576660156, "learning_rate": 1.4825560165975106e-05, "loss": 1.0592, "step": 15597 }, { "epoch": 12.944398340248963, "grad_norm": 20.153675079345703, "learning_rate": 1.4825228215767635e-05, "loss": 0.5996, "step": 15598 }, { "epoch": 12.945228215767635, "grad_norm": 28.483016967773438, "learning_rate": 1.4824896265560167e-05, "loss": 1.0557, "step": 15599 }, { "epoch": 12.946058091286307, "grad_norm": 26.609973907470703, "learning_rate": 1.48245643153527e-05, "loss": 0.8298, "step": 15600 }, { "epoch": 12.94688796680498, "grad_norm": 48.94330596923828, "learning_rate": 1.482423236514523e-05, "loss": 1.5849, "step": 15601 }, { "epoch": 12.947717842323652, "grad_norm": 24.069236755371094, "learning_rate": 1.482390041493776e-05, "loss": 0.8511, "step": 15602 }, { "epoch": 12.948547717842324, "grad_norm": 33.86003112792969, "learning_rate": 1.4823568464730292e-05, "loss": 0.6886, "step": 15603 }, { "epoch": 12.949377593360996, "grad_norm": 24.9017391204834, "learning_rate": 1.4823236514522823e-05, "loss": 1.7301, "step": 15604 }, { "epoch": 12.950207468879668, "grad_norm": 37.467777252197266, "learning_rate": 1.4822904564315355e-05, "loss": 0.6363, "step": 15605 }, { "epoch": 12.95103734439834, "grad_norm": 20.29438018798828, "learning_rate": 1.4822572614107883e-05, "loss": 0.5379, "step": 15606 }, { "epoch": 12.951867219917013, "grad_norm": 34.97762680053711, "learning_rate": 1.4822240663900415e-05, "loss": 0.8566, "step": 15607 }, { "epoch": 12.952697095435685, "grad_norm": 24.692880630493164, "learning_rate": 1.4821908713692948e-05, "loss": 0.5251, "step": 15608 }, { "epoch": 12.953526970954357, "grad_norm": 82.35528564453125, "learning_rate": 1.482157676348548e-05, "loss": 1.1581, "step": 15609 }, { "epoch": 12.95435684647303, "grad_norm": 36.288291931152344, "learning_rate": 1.4821244813278008e-05, "loss": 1.2594, "step": 15610 }, { "epoch": 12.955186721991701, "grad_norm": 43.74072265625, "learning_rate": 1.482091286307054e-05, "loss": 1.1783, "step": 15611 }, { "epoch": 12.956016597510374, "grad_norm": 38.37656021118164, "learning_rate": 1.4820580912863073e-05, "loss": 0.5444, "step": 15612 }, { "epoch": 12.956846473029046, "grad_norm": 26.680959701538086, "learning_rate": 1.4820248962655603e-05, "loss": 0.5369, "step": 15613 }, { "epoch": 12.957676348547718, "grad_norm": 49.27885055541992, "learning_rate": 1.4819917012448135e-05, "loss": 0.8666, "step": 15614 }, { "epoch": 12.95850622406639, "grad_norm": 41.515663146972656, "learning_rate": 1.4819585062240664e-05, "loss": 0.6648, "step": 15615 }, { "epoch": 12.959336099585062, "grad_norm": 29.178695678710938, "learning_rate": 1.4819253112033196e-05, "loss": 0.7949, "step": 15616 }, { "epoch": 12.960165975103735, "grad_norm": 22.979772567749023, "learning_rate": 1.4818921161825728e-05, "loss": 0.8169, "step": 15617 }, { "epoch": 12.960995850622407, "grad_norm": 50.39662170410156, "learning_rate": 1.481858921161826e-05, "loss": 1.1795, "step": 15618 }, { "epoch": 12.961825726141079, "grad_norm": 32.404319763183594, "learning_rate": 1.4818257261410789e-05, "loss": 1.1514, "step": 15619 }, { "epoch": 12.962655601659751, "grad_norm": 18.24943733215332, "learning_rate": 1.4817925311203321e-05, "loss": 0.6062, "step": 15620 }, { "epoch": 12.963485477178423, "grad_norm": 61.020751953125, "learning_rate": 1.4817593360995851e-05, "loss": 1.6237, "step": 15621 }, { "epoch": 12.964315352697096, "grad_norm": 26.921010971069336, "learning_rate": 1.4817261410788384e-05, "loss": 0.5745, "step": 15622 }, { "epoch": 12.965145228215768, "grad_norm": 43.82315444946289, "learning_rate": 1.4816929460580914e-05, "loss": 1.1835, "step": 15623 }, { "epoch": 12.96597510373444, "grad_norm": 94.10450744628906, "learning_rate": 1.4816597510373444e-05, "loss": 0.5579, "step": 15624 }, { "epoch": 12.966804979253112, "grad_norm": 37.23371124267578, "learning_rate": 1.4816265560165976e-05, "loss": 0.6315, "step": 15625 }, { "epoch": 12.967634854771784, "grad_norm": 82.18697357177734, "learning_rate": 1.4815933609958509e-05, "loss": 1.2923, "step": 15626 }, { "epoch": 12.968464730290457, "grad_norm": 49.04158020019531, "learning_rate": 1.4815601659751037e-05, "loss": 0.7032, "step": 15627 }, { "epoch": 12.969294605809129, "grad_norm": 31.970258712768555, "learning_rate": 1.481526970954357e-05, "loss": 0.5988, "step": 15628 }, { "epoch": 12.970124481327801, "grad_norm": 66.22111511230469, "learning_rate": 1.4814937759336101e-05, "loss": 1.7675, "step": 15629 }, { "epoch": 12.970954356846473, "grad_norm": 28.086759567260742, "learning_rate": 1.4814605809128632e-05, "loss": 0.5342, "step": 15630 }, { "epoch": 12.971784232365145, "grad_norm": 85.50386047363281, "learning_rate": 1.4814273858921162e-05, "loss": 1.1883, "step": 15631 }, { "epoch": 12.972614107883818, "grad_norm": 21.075061798095703, "learning_rate": 1.4813941908713694e-05, "loss": 0.8217, "step": 15632 }, { "epoch": 12.97344398340249, "grad_norm": 23.571500778198242, "learning_rate": 1.4813609958506225e-05, "loss": 0.4847, "step": 15633 }, { "epoch": 12.974273858921162, "grad_norm": 30.569564819335938, "learning_rate": 1.4813278008298757e-05, "loss": 0.6678, "step": 15634 }, { "epoch": 12.975103734439834, "grad_norm": 26.747159957885742, "learning_rate": 1.4812946058091286e-05, "loss": 0.6245, "step": 15635 }, { "epoch": 12.975933609958506, "grad_norm": 26.116073608398438, "learning_rate": 1.4812614107883818e-05, "loss": 1.2492, "step": 15636 }, { "epoch": 12.976763485477179, "grad_norm": 38.9295654296875, "learning_rate": 1.481228215767635e-05, "loss": 1.0017, "step": 15637 }, { "epoch": 12.97759336099585, "grad_norm": 41.684471130371094, "learning_rate": 1.4811950207468882e-05, "loss": 0.8225, "step": 15638 }, { "epoch": 12.978423236514523, "grad_norm": 35.29377365112305, "learning_rate": 1.481161825726141e-05, "loss": 0.6784, "step": 15639 }, { "epoch": 12.979253112033195, "grad_norm": 16.95745849609375, "learning_rate": 1.4811286307053943e-05, "loss": 0.4705, "step": 15640 }, { "epoch": 12.980082987551867, "grad_norm": 28.00006675720215, "learning_rate": 1.4810954356846475e-05, "loss": 0.797, "step": 15641 }, { "epoch": 12.98091286307054, "grad_norm": 49.77497863769531, "learning_rate": 1.4810622406639005e-05, "loss": 1.2396, "step": 15642 }, { "epoch": 12.981742738589212, "grad_norm": 57.06021499633789, "learning_rate": 1.4810290456431537e-05, "loss": 0.8822, "step": 15643 }, { "epoch": 12.982572614107884, "grad_norm": 34.294227600097656, "learning_rate": 1.4809958506224066e-05, "loss": 0.7303, "step": 15644 }, { "epoch": 12.983402489626556, "grad_norm": 37.859615325927734, "learning_rate": 1.4809626556016598e-05, "loss": 1.2379, "step": 15645 }, { "epoch": 12.984232365145228, "grad_norm": 32.428768157958984, "learning_rate": 1.480929460580913e-05, "loss": 1.1181, "step": 15646 }, { "epoch": 12.9850622406639, "grad_norm": 22.587326049804688, "learning_rate": 1.4808962655601662e-05, "loss": 0.973, "step": 15647 }, { "epoch": 12.985892116182573, "grad_norm": 45.41221618652344, "learning_rate": 1.4808630705394191e-05, "loss": 1.2397, "step": 15648 }, { "epoch": 12.986721991701245, "grad_norm": 30.03298568725586, "learning_rate": 1.4808298755186723e-05, "loss": 0.7032, "step": 15649 }, { "epoch": 12.987551867219917, "grad_norm": 33.36663055419922, "learning_rate": 1.4807966804979255e-05, "loss": 0.4987, "step": 15650 }, { "epoch": 12.98838174273859, "grad_norm": 28.35347557067871, "learning_rate": 1.4807634854771786e-05, "loss": 0.7273, "step": 15651 }, { "epoch": 12.989211618257261, "grad_norm": 29.010234832763672, "learning_rate": 1.4807302904564316e-05, "loss": 0.8706, "step": 15652 }, { "epoch": 12.990041493775934, "grad_norm": 57.82573318481445, "learning_rate": 1.4806970954356847e-05, "loss": 0.8086, "step": 15653 }, { "epoch": 12.990871369294606, "grad_norm": 40.650840759277344, "learning_rate": 1.4806639004149379e-05, "loss": 0.7694, "step": 15654 }, { "epoch": 12.991701244813278, "grad_norm": 28.391109466552734, "learning_rate": 1.4806307053941911e-05, "loss": 1.5767, "step": 15655 }, { "epoch": 12.99253112033195, "grad_norm": 37.48820114135742, "learning_rate": 1.480597510373444e-05, "loss": 0.8678, "step": 15656 }, { "epoch": 12.993360995850622, "grad_norm": 25.72486686706543, "learning_rate": 1.4805643153526972e-05, "loss": 1.0046, "step": 15657 }, { "epoch": 12.994190871369295, "grad_norm": 25.088314056396484, "learning_rate": 1.4805311203319504e-05, "loss": 0.8627, "step": 15658 }, { "epoch": 12.995020746887967, "grad_norm": 13.683789253234863, "learning_rate": 1.4804979253112036e-05, "loss": 0.4416, "step": 15659 }, { "epoch": 12.995850622406639, "grad_norm": 40.738670349121094, "learning_rate": 1.4804647302904565e-05, "loss": 1.0491, "step": 15660 }, { "epoch": 12.996680497925311, "grad_norm": 30.064260482788086, "learning_rate": 1.4804315352697097e-05, "loss": 0.9619, "step": 15661 }, { "epoch": 12.997510373443983, "grad_norm": 74.38868713378906, "learning_rate": 1.4803983402489627e-05, "loss": 1.0041, "step": 15662 }, { "epoch": 12.998340248962656, "grad_norm": 46.341583251953125, "learning_rate": 1.480365145228216e-05, "loss": 1.4742, "step": 15663 }, { "epoch": 12.999170124481328, "grad_norm": 42.43754577636719, "learning_rate": 1.480331950207469e-05, "loss": 1.0957, "step": 15664 }, { "epoch": 13.0, "grad_norm": 63.028316497802734, "learning_rate": 1.480298755186722e-05, "loss": 1.1246, "step": 15665 }, { "epoch": 13.000829875518672, "grad_norm": 28.15406036376953, "learning_rate": 1.4802655601659752e-05, "loss": 1.012, "step": 15666 }, { "epoch": 13.001659751037344, "grad_norm": 18.403968811035156, "learning_rate": 1.4802323651452284e-05, "loss": 0.5781, "step": 15667 }, { "epoch": 13.002489626556017, "grad_norm": 18.68001937866211, "learning_rate": 1.4801991701244815e-05, "loss": 0.6805, "step": 15668 }, { "epoch": 13.003319502074689, "grad_norm": 31.176740646362305, "learning_rate": 1.4801659751037345e-05, "loss": 1.3888, "step": 15669 }, { "epoch": 13.004149377593361, "grad_norm": 41.26369094848633, "learning_rate": 1.4801327800829877e-05, "loss": 1.3821, "step": 15670 }, { "epoch": 13.004979253112033, "grad_norm": 18.13051986694336, "learning_rate": 1.4800995850622408e-05, "loss": 0.4755, "step": 15671 }, { "epoch": 13.005809128630705, "grad_norm": 21.997636795043945, "learning_rate": 1.480066390041494e-05, "loss": 0.8167, "step": 15672 }, { "epoch": 13.006639004149378, "grad_norm": 40.8958740234375, "learning_rate": 1.480033195020747e-05, "loss": 1.2781, "step": 15673 }, { "epoch": 13.00746887966805, "grad_norm": 24.067140579223633, "learning_rate": 1.48e-05, "loss": 0.9009, "step": 15674 }, { "epoch": 13.008298755186722, "grad_norm": 19.91901969909668, "learning_rate": 1.4799668049792533e-05, "loss": 0.7382, "step": 15675 }, { "epoch": 13.009128630705394, "grad_norm": 24.78540802001953, "learning_rate": 1.4799336099585065e-05, "loss": 0.6914, "step": 15676 }, { "epoch": 13.009958506224066, "grad_norm": 22.615184783935547, "learning_rate": 1.4799004149377594e-05, "loss": 0.6818, "step": 15677 }, { "epoch": 13.010788381742739, "grad_norm": 28.342716217041016, "learning_rate": 1.4798672199170126e-05, "loss": 0.6187, "step": 15678 }, { "epoch": 13.01161825726141, "grad_norm": 33.74626922607422, "learning_rate": 1.4798340248962658e-05, "loss": 1.1234, "step": 15679 }, { "epoch": 13.012448132780083, "grad_norm": 32.723052978515625, "learning_rate": 1.4798008298755188e-05, "loss": 0.431, "step": 15680 }, { "epoch": 13.013278008298755, "grad_norm": 38.51655960083008, "learning_rate": 1.4797676348547719e-05, "loss": 0.875, "step": 15681 }, { "epoch": 13.014107883817427, "grad_norm": 50.11960983276367, "learning_rate": 1.4797344398340249e-05, "loss": 0.7479, "step": 15682 }, { "epoch": 13.0149377593361, "grad_norm": 31.767004013061523, "learning_rate": 1.4797012448132781e-05, "loss": 0.6437, "step": 15683 }, { "epoch": 13.015767634854772, "grad_norm": 46.27381896972656, "learning_rate": 1.4796680497925313e-05, "loss": 0.6352, "step": 15684 }, { "epoch": 13.016597510373444, "grad_norm": 59.37400436401367, "learning_rate": 1.4796348547717842e-05, "loss": 1.044, "step": 15685 }, { "epoch": 13.017427385892116, "grad_norm": 55.56200408935547, "learning_rate": 1.4796016597510374e-05, "loss": 0.9042, "step": 15686 }, { "epoch": 13.018257261410788, "grad_norm": 37.02638244628906, "learning_rate": 1.4795684647302906e-05, "loss": 0.5902, "step": 15687 }, { "epoch": 13.01908713692946, "grad_norm": 51.254268646240234, "learning_rate": 1.4795352697095438e-05, "loss": 1.2831, "step": 15688 }, { "epoch": 13.019917012448133, "grad_norm": 46.40297317504883, "learning_rate": 1.4795020746887967e-05, "loss": 1.0393, "step": 15689 }, { "epoch": 13.020746887966805, "grad_norm": 40.85630416870117, "learning_rate": 1.4794688796680499e-05, "loss": 1.0693, "step": 15690 }, { "epoch": 13.021576763485477, "grad_norm": 53.5020866394043, "learning_rate": 1.479435684647303e-05, "loss": 1.0038, "step": 15691 }, { "epoch": 13.02240663900415, "grad_norm": 41.42206573486328, "learning_rate": 1.4794024896265562e-05, "loss": 0.8852, "step": 15692 }, { "epoch": 13.023236514522821, "grad_norm": 79.37980651855469, "learning_rate": 1.4793692946058094e-05, "loss": 0.7359, "step": 15693 }, { "epoch": 13.024066390041494, "grad_norm": 34.01654815673828, "learning_rate": 1.4793360995850622e-05, "loss": 0.6182, "step": 15694 }, { "epoch": 13.024896265560166, "grad_norm": 90.21375274658203, "learning_rate": 1.4793029045643155e-05, "loss": 0.7817, "step": 15695 }, { "epoch": 13.025726141078838, "grad_norm": 38.673851013183594, "learning_rate": 1.4792697095435687e-05, "loss": 0.6346, "step": 15696 }, { "epoch": 13.02655601659751, "grad_norm": 35.100215911865234, "learning_rate": 1.4792365145228219e-05, "loss": 0.7275, "step": 15697 }, { "epoch": 13.027385892116182, "grad_norm": 25.034597396850586, "learning_rate": 1.4792033195020747e-05, "loss": 0.6558, "step": 15698 }, { "epoch": 13.028215767634855, "grad_norm": 59.663177490234375, "learning_rate": 1.479170124481328e-05, "loss": 0.4304, "step": 15699 }, { "epoch": 13.029045643153527, "grad_norm": 35.81401062011719, "learning_rate": 1.479136929460581e-05, "loss": 1.3018, "step": 15700 }, { "epoch": 13.029875518672199, "grad_norm": 48.59892272949219, "learning_rate": 1.4791037344398342e-05, "loss": 0.8023, "step": 15701 }, { "epoch": 13.030705394190871, "grad_norm": 32.605472564697266, "learning_rate": 1.4790705394190873e-05, "loss": 0.7585, "step": 15702 }, { "epoch": 13.031535269709543, "grad_norm": 39.41792297363281, "learning_rate": 1.4790373443983403e-05, "loss": 0.5632, "step": 15703 }, { "epoch": 13.032365145228216, "grad_norm": 16.68419647216797, "learning_rate": 1.4790041493775935e-05, "loss": 0.3603, "step": 15704 }, { "epoch": 13.033195020746888, "grad_norm": 34.89815902709961, "learning_rate": 1.4789709543568467e-05, "loss": 1.4823, "step": 15705 }, { "epoch": 13.03402489626556, "grad_norm": 37.991214752197266, "learning_rate": 1.4789377593360996e-05, "loss": 0.7469, "step": 15706 }, { "epoch": 13.034854771784232, "grad_norm": 26.322010040283203, "learning_rate": 1.4789045643153528e-05, "loss": 0.8324, "step": 15707 }, { "epoch": 13.035684647302904, "grad_norm": 40.80404281616211, "learning_rate": 1.478871369294606e-05, "loss": 0.7151, "step": 15708 }, { "epoch": 13.036514522821577, "grad_norm": 25.544105529785156, "learning_rate": 1.478838174273859e-05, "loss": 0.4267, "step": 15709 }, { "epoch": 13.037344398340249, "grad_norm": 17.197160720825195, "learning_rate": 1.4788049792531121e-05, "loss": 0.4615, "step": 15710 }, { "epoch": 13.038174273858921, "grad_norm": 30.737545013427734, "learning_rate": 1.4787717842323653e-05, "loss": 0.9187, "step": 15711 }, { "epoch": 13.039004149377593, "grad_norm": 18.36267852783203, "learning_rate": 1.4787385892116183e-05, "loss": 0.5356, "step": 15712 }, { "epoch": 13.039834024896265, "grad_norm": 44.12843322753906, "learning_rate": 1.4787053941908716e-05, "loss": 0.998, "step": 15713 }, { "epoch": 13.040663900414938, "grad_norm": 42.10063552856445, "learning_rate": 1.4786721991701244e-05, "loss": 0.5043, "step": 15714 }, { "epoch": 13.04149377593361, "grad_norm": 50.42413330078125, "learning_rate": 1.4786390041493776e-05, "loss": 0.7536, "step": 15715 }, { "epoch": 13.042323651452282, "grad_norm": 25.37396240234375, "learning_rate": 1.4786058091286308e-05, "loss": 0.4284, "step": 15716 }, { "epoch": 13.043153526970954, "grad_norm": 34.19621276855469, "learning_rate": 1.478572614107884e-05, "loss": 0.7364, "step": 15717 }, { "epoch": 13.043983402489626, "grad_norm": 18.20524787902832, "learning_rate": 1.478539419087137e-05, "loss": 0.4621, "step": 15718 }, { "epoch": 13.044813278008299, "grad_norm": 31.08255386352539, "learning_rate": 1.4785062240663901e-05, "loss": 0.354, "step": 15719 }, { "epoch": 13.04564315352697, "grad_norm": 33.06745910644531, "learning_rate": 1.4784730290456434e-05, "loss": 0.6659, "step": 15720 }, { "epoch": 13.046473029045643, "grad_norm": 24.408771514892578, "learning_rate": 1.4784398340248964e-05, "loss": 1.0639, "step": 15721 }, { "epoch": 13.047302904564315, "grad_norm": 47.7978630065918, "learning_rate": 1.4784066390041496e-05, "loss": 0.7397, "step": 15722 }, { "epoch": 13.048132780082987, "grad_norm": 15.979460716247559, "learning_rate": 1.4783734439834025e-05, "loss": 0.3738, "step": 15723 }, { "epoch": 13.04896265560166, "grad_norm": 37.18720626831055, "learning_rate": 1.4783402489626557e-05, "loss": 0.8459, "step": 15724 }, { "epoch": 13.049792531120332, "grad_norm": 38.06345748901367, "learning_rate": 1.4783070539419089e-05, "loss": 0.4601, "step": 15725 }, { "epoch": 13.050622406639004, "grad_norm": 41.48759841918945, "learning_rate": 1.4782738589211621e-05, "loss": 0.9963, "step": 15726 }, { "epoch": 13.051452282157676, "grad_norm": 51.19313430786133, "learning_rate": 1.478240663900415e-05, "loss": 1.1407, "step": 15727 }, { "epoch": 13.052282157676348, "grad_norm": 24.66977882385254, "learning_rate": 1.4782074688796682e-05, "loss": 0.3948, "step": 15728 }, { "epoch": 13.05311203319502, "grad_norm": 27.667734146118164, "learning_rate": 1.4781742738589214e-05, "loss": 0.3833, "step": 15729 }, { "epoch": 13.053941908713693, "grad_norm": 44.287506103515625, "learning_rate": 1.4781410788381744e-05, "loss": 0.9391, "step": 15730 }, { "epoch": 13.054771784232365, "grad_norm": NaN, "learning_rate": 1.4781410788381744e-05, "loss": 1.0164, "step": 15731 }, { "epoch": 13.055601659751037, "grad_norm": 30.562559127807617, "learning_rate": 1.4781078838174275e-05, "loss": 0.9034, "step": 15732 }, { "epoch": 13.05643153526971, "grad_norm": 47.99113845825195, "learning_rate": 1.4780746887966805e-05, "loss": 0.7362, "step": 15733 }, { "epoch": 13.057261410788382, "grad_norm": 17.906139373779297, "learning_rate": 1.4780414937759337e-05, "loss": 0.4225, "step": 15734 }, { "epoch": 13.058091286307054, "grad_norm": 30.17359733581543, "learning_rate": 1.478008298755187e-05, "loss": 0.5612, "step": 15735 }, { "epoch": 13.058921161825726, "grad_norm": 35.59728240966797, "learning_rate": 1.4779751037344398e-05, "loss": 0.4331, "step": 15736 }, { "epoch": 13.059751037344398, "grad_norm": 49.98762893676758, "learning_rate": 1.477941908713693e-05, "loss": 1.026, "step": 15737 }, { "epoch": 13.06058091286307, "grad_norm": 43.19065475463867, "learning_rate": 1.4779087136929462e-05, "loss": 1.0196, "step": 15738 }, { "epoch": 13.061410788381743, "grad_norm": 54.35730743408203, "learning_rate": 1.4778755186721993e-05, "loss": 0.918, "step": 15739 }, { "epoch": 13.062240663900415, "grad_norm": 21.696334838867188, "learning_rate": 1.4778423236514523e-05, "loss": 0.6708, "step": 15740 }, { "epoch": 13.063070539419087, "grad_norm": 48.6459846496582, "learning_rate": 1.4778091286307055e-05, "loss": 1.0514, "step": 15741 }, { "epoch": 13.063900414937759, "grad_norm": 85.52975463867188, "learning_rate": 1.4777759336099586e-05, "loss": 0.355, "step": 15742 }, { "epoch": 13.064730290456431, "grad_norm": 66.65824127197266, "learning_rate": 1.4777427385892118e-05, "loss": 1.3431, "step": 15743 }, { "epoch": 13.065560165975104, "grad_norm": 28.154476165771484, "learning_rate": 1.4777095435684648e-05, "loss": 0.4448, "step": 15744 }, { "epoch": 13.066390041493776, "grad_norm": 47.16875076293945, "learning_rate": 1.4776763485477179e-05, "loss": 0.601, "step": 15745 }, { "epoch": 13.067219917012448, "grad_norm": 39.43735885620117, "learning_rate": 1.477643153526971e-05, "loss": 0.9355, "step": 15746 }, { "epoch": 13.06804979253112, "grad_norm": 24.294830322265625, "learning_rate": 1.4776099585062243e-05, "loss": 0.4972, "step": 15747 }, { "epoch": 13.068879668049792, "grad_norm": 44.46695327758789, "learning_rate": 1.4775767634854773e-05, "loss": 0.9115, "step": 15748 }, { "epoch": 13.069709543568464, "grad_norm": 38.93302917480469, "learning_rate": 1.4775435684647304e-05, "loss": 0.717, "step": 15749 }, { "epoch": 13.070539419087137, "grad_norm": 38.90850067138672, "learning_rate": 1.4775103734439836e-05, "loss": 0.4315, "step": 15750 }, { "epoch": 13.071369294605809, "grad_norm": 33.485443115234375, "learning_rate": 1.4774771784232366e-05, "loss": 1.1328, "step": 15751 }, { "epoch": 13.072199170124481, "grad_norm": 30.478425979614258, "learning_rate": 1.4774439834024898e-05, "loss": 0.6071, "step": 15752 }, { "epoch": 13.073029045643153, "grad_norm": 26.622556686401367, "learning_rate": 1.4774107883817427e-05, "loss": 0.6907, "step": 15753 }, { "epoch": 13.073858921161825, "grad_norm": 39.1812744140625, "learning_rate": 1.477377593360996e-05, "loss": 1.0857, "step": 15754 }, { "epoch": 13.074688796680498, "grad_norm": 42.69581604003906, "learning_rate": 1.4773443983402491e-05, "loss": 0.726, "step": 15755 }, { "epoch": 13.07551867219917, "grad_norm": 72.00402069091797, "learning_rate": 1.4773112033195023e-05, "loss": 0.9228, "step": 15756 }, { "epoch": 13.076348547717842, "grad_norm": 33.86761474609375, "learning_rate": 1.4772780082987552e-05, "loss": 1.1059, "step": 15757 }, { "epoch": 13.077178423236514, "grad_norm": 25.290437698364258, "learning_rate": 1.4772448132780084e-05, "loss": 0.481, "step": 15758 }, { "epoch": 13.078008298755186, "grad_norm": 35.285396575927734, "learning_rate": 1.4772116182572616e-05, "loss": 0.3882, "step": 15759 }, { "epoch": 13.078838174273859, "grad_norm": 33.053382873535156, "learning_rate": 1.4771784232365147e-05, "loss": 0.3372, "step": 15760 }, { "epoch": 13.07966804979253, "grad_norm": 30.46614646911621, "learning_rate": 1.4771452282157677e-05, "loss": 0.4103, "step": 15761 }, { "epoch": 13.080497925311203, "grad_norm": 32.03567123413086, "learning_rate": 1.4771120331950208e-05, "loss": 0.8964, "step": 15762 }, { "epoch": 13.081327800829875, "grad_norm": 62.010005950927734, "learning_rate": 1.477078838174274e-05, "loss": 0.5759, "step": 15763 }, { "epoch": 13.082157676348547, "grad_norm": 27.58878517150879, "learning_rate": 1.4770456431535272e-05, "loss": 0.9922, "step": 15764 }, { "epoch": 13.08298755186722, "grad_norm": 29.29714584350586, "learning_rate": 1.47701244813278e-05, "loss": 1.2046, "step": 15765 }, { "epoch": 13.083817427385892, "grad_norm": 27.258615493774414, "learning_rate": 1.4769792531120333e-05, "loss": 0.8385, "step": 15766 }, { "epoch": 13.084647302904564, "grad_norm": 123.04074096679688, "learning_rate": 1.4769460580912865e-05, "loss": 1.2011, "step": 15767 }, { "epoch": 13.085477178423236, "grad_norm": 54.03703689575195, "learning_rate": 1.4769128630705397e-05, "loss": 0.9467, "step": 15768 }, { "epoch": 13.086307053941908, "grad_norm": 22.231321334838867, "learning_rate": 1.4768796680497926e-05, "loss": 0.9034, "step": 15769 }, { "epoch": 13.08713692946058, "grad_norm": 20.178836822509766, "learning_rate": 1.4768464730290458e-05, "loss": 0.7225, "step": 15770 }, { "epoch": 13.087966804979253, "grad_norm": 39.81005859375, "learning_rate": 1.4768132780082988e-05, "loss": 1.1265, "step": 15771 }, { "epoch": 13.088796680497925, "grad_norm": 34.399070739746094, "learning_rate": 1.476780082987552e-05, "loss": 1.3579, "step": 15772 }, { "epoch": 13.089626556016597, "grad_norm": 100.49070739746094, "learning_rate": 1.4767468879668052e-05, "loss": 0.7806, "step": 15773 }, { "epoch": 13.09045643153527, "grad_norm": 21.130258560180664, "learning_rate": 1.4767136929460581e-05, "loss": 0.563, "step": 15774 }, { "epoch": 13.091286307053942, "grad_norm": 26.249372482299805, "learning_rate": 1.4766804979253113e-05, "loss": 0.4543, "step": 15775 }, { "epoch": 13.092116182572614, "grad_norm": 24.58478355407715, "learning_rate": 1.4766473029045645e-05, "loss": 0.446, "step": 15776 }, { "epoch": 13.092946058091286, "grad_norm": 22.56879997253418, "learning_rate": 1.4766141078838177e-05, "loss": 0.5614, "step": 15777 }, { "epoch": 13.093775933609958, "grad_norm": 41.512393951416016, "learning_rate": 1.4765809128630706e-05, "loss": 0.8777, "step": 15778 }, { "epoch": 13.09460580912863, "grad_norm": 15.678603172302246, "learning_rate": 1.4765477178423238e-05, "loss": 0.4181, "step": 15779 }, { "epoch": 13.095435684647303, "grad_norm": 25.290233612060547, "learning_rate": 1.4765145228215769e-05, "loss": 0.5006, "step": 15780 }, { "epoch": 13.096265560165975, "grad_norm": 31.903608322143555, "learning_rate": 1.47648132780083e-05, "loss": 0.4972, "step": 15781 }, { "epoch": 13.097095435684647, "grad_norm": 32.64115524291992, "learning_rate": 1.4764481327800831e-05, "loss": 0.9335, "step": 15782 }, { "epoch": 13.09792531120332, "grad_norm": 49.111183166503906, "learning_rate": 1.4764149377593361e-05, "loss": 0.6735, "step": 15783 }, { "epoch": 13.098755186721991, "grad_norm": 30.991775512695312, "learning_rate": 1.4763817427385894e-05, "loss": 1.1532, "step": 15784 }, { "epoch": 13.099585062240664, "grad_norm": 42.1986083984375, "learning_rate": 1.4763485477178426e-05, "loss": 0.6436, "step": 15785 }, { "epoch": 13.100414937759336, "grad_norm": 40.513675689697266, "learning_rate": 1.4763153526970954e-05, "loss": 0.8161, "step": 15786 }, { "epoch": 13.101244813278008, "grad_norm": 33.968685150146484, "learning_rate": 1.4762821576763487e-05, "loss": 0.719, "step": 15787 }, { "epoch": 13.10207468879668, "grad_norm": 22.548507690429688, "learning_rate": 1.4762489626556019e-05, "loss": 0.3942, "step": 15788 }, { "epoch": 13.102904564315352, "grad_norm": 55.10652160644531, "learning_rate": 1.4762157676348549e-05, "loss": 0.8922, "step": 15789 }, { "epoch": 13.103734439834025, "grad_norm": 30.819421768188477, "learning_rate": 1.476182572614108e-05, "loss": 0.7805, "step": 15790 }, { "epoch": 13.104564315352697, "grad_norm": 39.51045608520508, "learning_rate": 1.4761493775933612e-05, "loss": 0.7251, "step": 15791 }, { "epoch": 13.105394190871369, "grad_norm": 46.5970458984375, "learning_rate": 1.4761161825726142e-05, "loss": 0.4391, "step": 15792 }, { "epoch": 13.106224066390041, "grad_norm": 46.92253494262695, "learning_rate": 1.4760829875518674e-05, "loss": 1.1477, "step": 15793 }, { "epoch": 13.107053941908713, "grad_norm": 28.85897445678711, "learning_rate": 1.4760497925311203e-05, "loss": 1.2454, "step": 15794 }, { "epoch": 13.107883817427386, "grad_norm": 45.71010208129883, "learning_rate": 1.4760165975103735e-05, "loss": 1.2042, "step": 15795 }, { "epoch": 13.108713692946058, "grad_norm": 39.58175277709961, "learning_rate": 1.4759834024896267e-05, "loss": 0.5847, "step": 15796 }, { "epoch": 13.10954356846473, "grad_norm": 64.03437805175781, "learning_rate": 1.4759502074688799e-05, "loss": 1.434, "step": 15797 }, { "epoch": 13.110373443983402, "grad_norm": 47.79529571533203, "learning_rate": 1.4759170124481328e-05, "loss": 0.4439, "step": 15798 }, { "epoch": 13.111203319502074, "grad_norm": 28.300081253051758, "learning_rate": 1.475883817427386e-05, "loss": 0.3077, "step": 15799 }, { "epoch": 13.112033195020746, "grad_norm": 27.950605392456055, "learning_rate": 1.475850622406639e-05, "loss": 0.6102, "step": 15800 }, { "epoch": 13.112863070539419, "grad_norm": 30.245281219482422, "learning_rate": 1.4758174273858922e-05, "loss": 0.9343, "step": 15801 }, { "epoch": 13.11369294605809, "grad_norm": 40.14509963989258, "learning_rate": 1.4757842323651455e-05, "loss": 1.2927, "step": 15802 }, { "epoch": 13.114522821576763, "grad_norm": 39.83197784423828, "learning_rate": 1.4757510373443983e-05, "loss": 1.2105, "step": 15803 }, { "epoch": 13.115352697095435, "grad_norm": 47.6693000793457, "learning_rate": 1.4757178423236515e-05, "loss": 0.6782, "step": 15804 }, { "epoch": 13.116182572614107, "grad_norm": 36.82758331298828, "learning_rate": 1.4756846473029048e-05, "loss": 0.8539, "step": 15805 }, { "epoch": 13.11701244813278, "grad_norm": 35.76002883911133, "learning_rate": 1.475651452282158e-05, "loss": 0.8568, "step": 15806 }, { "epoch": 13.117842323651452, "grad_norm": 31.765228271484375, "learning_rate": 1.4756182572614108e-05, "loss": 0.9774, "step": 15807 }, { "epoch": 13.118672199170124, "grad_norm": 13.707487106323242, "learning_rate": 1.475585062240664e-05, "loss": 0.3575, "step": 15808 }, { "epoch": 13.119502074688796, "grad_norm": 14.244229316711426, "learning_rate": 1.4755518672199171e-05, "loss": 0.3728, "step": 15809 }, { "epoch": 13.120331950207468, "grad_norm": 38.08344268798828, "learning_rate": 1.4755186721991703e-05, "loss": 0.8598, "step": 15810 }, { "epoch": 13.12116182572614, "grad_norm": 36.56351089477539, "learning_rate": 1.4754854771784233e-05, "loss": 0.6544, "step": 15811 }, { "epoch": 13.121991701244813, "grad_norm": 10.703325271606445, "learning_rate": 1.4754522821576764e-05, "loss": 0.3135, "step": 15812 }, { "epoch": 13.122821576763485, "grad_norm": 35.35214614868164, "learning_rate": 1.4754190871369296e-05, "loss": 0.9573, "step": 15813 }, { "epoch": 13.123651452282157, "grad_norm": 34.65727233886719, "learning_rate": 1.4753858921161828e-05, "loss": 1.0418, "step": 15814 }, { "epoch": 13.12448132780083, "grad_norm": 23.660554885864258, "learning_rate": 1.4753526970954357e-05, "loss": 0.7585, "step": 15815 }, { "epoch": 13.125311203319502, "grad_norm": 20.32256507873535, "learning_rate": 1.4753195020746889e-05, "loss": 0.5871, "step": 15816 }, { "epoch": 13.126141078838174, "grad_norm": 24.84573745727539, "learning_rate": 1.4752863070539421e-05, "loss": 0.5993, "step": 15817 }, { "epoch": 13.126970954356846, "grad_norm": 26.051406860351562, "learning_rate": 1.4752531120331951e-05, "loss": 0.6019, "step": 15818 }, { "epoch": 13.127800829875518, "grad_norm": 28.199445724487305, "learning_rate": 1.4752199170124482e-05, "loss": 0.5327, "step": 15819 }, { "epoch": 13.12863070539419, "grad_norm": 35.0614013671875, "learning_rate": 1.4751867219917014e-05, "loss": 0.995, "step": 15820 }, { "epoch": 13.129460580912863, "grad_norm": 58.77534866333008, "learning_rate": 1.4751535269709544e-05, "loss": 0.8916, "step": 15821 }, { "epoch": 13.130290456431535, "grad_norm": 47.416404724121094, "learning_rate": 1.4751203319502076e-05, "loss": 0.6849, "step": 15822 }, { "epoch": 13.131120331950207, "grad_norm": 28.356603622436523, "learning_rate": 1.4750871369294605e-05, "loss": 1.0601, "step": 15823 }, { "epoch": 13.13195020746888, "grad_norm": 25.17653465270996, "learning_rate": 1.4750539419087137e-05, "loss": 0.8557, "step": 15824 }, { "epoch": 13.132780082987551, "grad_norm": 76.5343246459961, "learning_rate": 1.475020746887967e-05, "loss": 0.788, "step": 15825 }, { "epoch": 13.133609958506224, "grad_norm": 39.198890686035156, "learning_rate": 1.4749875518672201e-05, "loss": 0.754, "step": 15826 }, { "epoch": 13.134439834024896, "grad_norm": 22.997129440307617, "learning_rate": 1.4749543568464732e-05, "loss": 0.6905, "step": 15827 }, { "epoch": 13.135269709543568, "grad_norm": 62.62251663208008, "learning_rate": 1.4749211618257262e-05, "loss": 1.0916, "step": 15828 }, { "epoch": 13.13609958506224, "grad_norm": 27.70880889892578, "learning_rate": 1.4748879668049794e-05, "loss": 0.8985, "step": 15829 }, { "epoch": 13.136929460580912, "grad_norm": 37.07672882080078, "learning_rate": 1.4748547717842325e-05, "loss": 0.6766, "step": 15830 }, { "epoch": 13.137759336099585, "grad_norm": 31.31519889831543, "learning_rate": 1.4748215767634857e-05, "loss": 0.5277, "step": 15831 }, { "epoch": 13.138589211618257, "grad_norm": 26.541988372802734, "learning_rate": 1.4747883817427386e-05, "loss": 1.4002, "step": 15832 }, { "epoch": 13.139419087136929, "grad_norm": 75.77335357666016, "learning_rate": 1.4747551867219918e-05, "loss": 0.5091, "step": 15833 }, { "epoch": 13.140248962655601, "grad_norm": 24.110137939453125, "learning_rate": 1.474721991701245e-05, "loss": 0.7705, "step": 15834 }, { "epoch": 13.141078838174273, "grad_norm": 65.13681030273438, "learning_rate": 1.4746887966804982e-05, "loss": 1.1108, "step": 15835 }, { "epoch": 13.141908713692946, "grad_norm": 52.33769226074219, "learning_rate": 1.474655601659751e-05, "loss": 0.7739, "step": 15836 }, { "epoch": 13.142738589211618, "grad_norm": 34.8755989074707, "learning_rate": 1.4746224066390043e-05, "loss": 1.7537, "step": 15837 }, { "epoch": 13.14356846473029, "grad_norm": 15.310151100158691, "learning_rate": 1.4745892116182575e-05, "loss": 0.3605, "step": 15838 }, { "epoch": 13.144398340248962, "grad_norm": 44.34347915649414, "learning_rate": 1.4745560165975105e-05, "loss": 0.7866, "step": 15839 }, { "epoch": 13.145228215767634, "grad_norm": 15.136945724487305, "learning_rate": 1.4745228215767636e-05, "loss": 0.2899, "step": 15840 }, { "epoch": 13.146058091286307, "grad_norm": 36.114601135253906, "learning_rate": 1.4744896265560166e-05, "loss": 0.9839, "step": 15841 }, { "epoch": 13.146887966804979, "grad_norm": 61.90733337402344, "learning_rate": 1.4744564315352698e-05, "loss": 0.5389, "step": 15842 }, { "epoch": 13.147717842323651, "grad_norm": 31.75872802734375, "learning_rate": 1.474423236514523e-05, "loss": 0.8779, "step": 15843 }, { "epoch": 13.148547717842323, "grad_norm": 156.10916137695312, "learning_rate": 1.4743900414937759e-05, "loss": 0.8828, "step": 15844 }, { "epoch": 13.149377593360995, "grad_norm": 46.18838882446289, "learning_rate": 1.4743568464730291e-05, "loss": 0.658, "step": 15845 }, { "epoch": 13.150207468879668, "grad_norm": 58.69926452636719, "learning_rate": 1.4743236514522823e-05, "loss": 1.1598, "step": 15846 }, { "epoch": 13.15103734439834, "grad_norm": 65.79117584228516, "learning_rate": 1.4742904564315354e-05, "loss": 1.6575, "step": 15847 }, { "epoch": 13.151867219917012, "grad_norm": 36.64295196533203, "learning_rate": 1.4742572614107884e-05, "loss": 0.6807, "step": 15848 }, { "epoch": 13.152697095435684, "grad_norm": 20.439010620117188, "learning_rate": 1.4742240663900416e-05, "loss": 0.5084, "step": 15849 }, { "epoch": 13.153526970954356, "grad_norm": 21.12236976623535, "learning_rate": 1.4741908713692947e-05, "loss": 0.6904, "step": 15850 }, { "epoch": 13.154356846473028, "grad_norm": 20.183807373046875, "learning_rate": 1.4741576763485479e-05, "loss": 0.5195, "step": 15851 }, { "epoch": 13.1551867219917, "grad_norm": 50.408634185791016, "learning_rate": 1.4741244813278011e-05, "loss": 1.0108, "step": 15852 }, { "epoch": 13.156016597510373, "grad_norm": 27.021827697753906, "learning_rate": 1.474091286307054e-05, "loss": 0.7271, "step": 15853 }, { "epoch": 13.156846473029045, "grad_norm": 32.66682815551758, "learning_rate": 1.4740580912863072e-05, "loss": 1.1579, "step": 15854 }, { "epoch": 13.157676348547717, "grad_norm": 29.595544815063477, "learning_rate": 1.4740248962655604e-05, "loss": 0.3698, "step": 15855 }, { "epoch": 13.15850622406639, "grad_norm": 26.28352165222168, "learning_rate": 1.4739917012448134e-05, "loss": 0.9693, "step": 15856 }, { "epoch": 13.159336099585062, "grad_norm": 30.811725616455078, "learning_rate": 1.4739585062240665e-05, "loss": 1.0048, "step": 15857 }, { "epoch": 13.160165975103734, "grad_norm": 24.562196731567383, "learning_rate": 1.4739253112033197e-05, "loss": 0.7029, "step": 15858 }, { "epoch": 13.160995850622406, "grad_norm": 44.41636276245117, "learning_rate": 1.4738921161825727e-05, "loss": 1.5521, "step": 15859 }, { "epoch": 13.161825726141078, "grad_norm": 36.87843704223633, "learning_rate": 1.473858921161826e-05, "loss": 1.1912, "step": 15860 }, { "epoch": 13.16265560165975, "grad_norm": 20.21044921875, "learning_rate": 1.473825726141079e-05, "loss": 0.6043, "step": 15861 }, { "epoch": 13.163485477178423, "grad_norm": 36.97043228149414, "learning_rate": 1.473792531120332e-05, "loss": 0.5105, "step": 15862 }, { "epoch": 13.164315352697095, "grad_norm": 52.878822326660156, "learning_rate": 1.4737593360995852e-05, "loss": 1.9522, "step": 15863 }, { "epoch": 13.165145228215767, "grad_norm": 31.95339012145996, "learning_rate": 1.4737261410788384e-05, "loss": 1.2774, "step": 15864 }, { "epoch": 13.16597510373444, "grad_norm": 43.19318771362305, "learning_rate": 1.4736929460580913e-05, "loss": 0.7913, "step": 15865 }, { "epoch": 13.166804979253111, "grad_norm": 89.65840148925781, "learning_rate": 1.4736597510373445e-05, "loss": 1.2024, "step": 15866 }, { "epoch": 13.167634854771784, "grad_norm": 22.619792938232422, "learning_rate": 1.4736265560165977e-05, "loss": 0.5199, "step": 15867 }, { "epoch": 13.168464730290456, "grad_norm": 39.15717315673828, "learning_rate": 1.4735933609958508e-05, "loss": 1.0494, "step": 15868 }, { "epoch": 13.169294605809128, "grad_norm": 38.254295349121094, "learning_rate": 1.4735601659751038e-05, "loss": 0.6522, "step": 15869 }, { "epoch": 13.1701244813278, "grad_norm": 47.058589935302734, "learning_rate": 1.4735269709543568e-05, "loss": 0.9219, "step": 15870 }, { "epoch": 13.170954356846472, "grad_norm": 56.82769012451172, "learning_rate": 1.47349377593361e-05, "loss": 0.6629, "step": 15871 }, { "epoch": 13.171784232365145, "grad_norm": 19.6241455078125, "learning_rate": 1.4734605809128633e-05, "loss": 0.5939, "step": 15872 }, { "epoch": 13.172614107883817, "grad_norm": 10.784390449523926, "learning_rate": 1.4734273858921161e-05, "loss": 0.3407, "step": 15873 }, { "epoch": 13.173443983402489, "grad_norm": 28.236984252929688, "learning_rate": 1.4733941908713694e-05, "loss": 0.663, "step": 15874 }, { "epoch": 13.174273858921161, "grad_norm": 34.58835220336914, "learning_rate": 1.4733609958506226e-05, "loss": 0.8705, "step": 15875 }, { "epoch": 13.175103734439833, "grad_norm": 66.16084289550781, "learning_rate": 1.4733278008298758e-05, "loss": 0.7992, "step": 15876 }, { "epoch": 13.175933609958506, "grad_norm": 30.074052810668945, "learning_rate": 1.4732946058091286e-05, "loss": 0.7928, "step": 15877 }, { "epoch": 13.176763485477178, "grad_norm": 29.75493049621582, "learning_rate": 1.4732614107883819e-05, "loss": 0.6145, "step": 15878 }, { "epoch": 13.17759336099585, "grad_norm": 21.081066131591797, "learning_rate": 1.4732282157676349e-05, "loss": 0.3222, "step": 15879 }, { "epoch": 13.178423236514522, "grad_norm": 37.07485580444336, "learning_rate": 1.4731950207468881e-05, "loss": 0.6877, "step": 15880 }, { "epoch": 13.179253112033194, "grad_norm": 35.317054748535156, "learning_rate": 1.4731618257261413e-05, "loss": 0.5843, "step": 15881 }, { "epoch": 13.180082987551867, "grad_norm": 18.286951065063477, "learning_rate": 1.4731286307053942e-05, "loss": 0.5171, "step": 15882 }, { "epoch": 13.180912863070539, "grad_norm": 37.015533447265625, "learning_rate": 1.4730954356846474e-05, "loss": 0.8618, "step": 15883 }, { "epoch": 13.181742738589211, "grad_norm": 45.56247329711914, "learning_rate": 1.4730622406639006e-05, "loss": 0.7902, "step": 15884 }, { "epoch": 13.182572614107883, "grad_norm": 45.13071060180664, "learning_rate": 1.4730290456431538e-05, "loss": 1.5404, "step": 15885 }, { "epoch": 13.183402489626555, "grad_norm": 66.53580474853516, "learning_rate": 1.4729958506224067e-05, "loss": 0.938, "step": 15886 }, { "epoch": 13.184232365145228, "grad_norm": 121.65361785888672, "learning_rate": 1.4729626556016599e-05, "loss": 0.7827, "step": 15887 }, { "epoch": 13.1850622406639, "grad_norm": 51.20486831665039, "learning_rate": 1.472929460580913e-05, "loss": 0.4806, "step": 15888 }, { "epoch": 13.185892116182572, "grad_norm": 93.76437377929688, "learning_rate": 1.4728962655601662e-05, "loss": 1.2102, "step": 15889 }, { "epoch": 13.186721991701244, "grad_norm": 35.29441833496094, "learning_rate": 1.4728630705394192e-05, "loss": 0.6764, "step": 15890 }, { "epoch": 13.187551867219916, "grad_norm": 44.262393951416016, "learning_rate": 1.4728298755186722e-05, "loss": 0.9314, "step": 15891 }, { "epoch": 13.188381742738589, "grad_norm": 84.54241943359375, "learning_rate": 1.4727966804979255e-05, "loss": 1.2118, "step": 15892 }, { "epoch": 13.18921161825726, "grad_norm": 25.899412155151367, "learning_rate": 1.4727634854771787e-05, "loss": 0.5853, "step": 15893 }, { "epoch": 13.190041493775933, "grad_norm": 31.648717880249023, "learning_rate": 1.4727302904564315e-05, "loss": 0.8274, "step": 15894 }, { "epoch": 13.190871369294605, "grad_norm": 24.244733810424805, "learning_rate": 1.4726970954356847e-05, "loss": 0.9009, "step": 15895 }, { "epoch": 13.191701244813277, "grad_norm": 41.137733459472656, "learning_rate": 1.472663900414938e-05, "loss": 0.7784, "step": 15896 }, { "epoch": 13.19253112033195, "grad_norm": 43.34742736816406, "learning_rate": 1.472630705394191e-05, "loss": 1.1044, "step": 15897 }, { "epoch": 13.193360995850622, "grad_norm": 52.27677536010742, "learning_rate": 1.472597510373444e-05, "loss": 0.8306, "step": 15898 }, { "epoch": 13.194190871369294, "grad_norm": 34.947444915771484, "learning_rate": 1.4725643153526972e-05, "loss": 0.6408, "step": 15899 }, { "epoch": 13.195020746887966, "grad_norm": 52.265995025634766, "learning_rate": 1.4725311203319503e-05, "loss": 1.0853, "step": 15900 }, { "epoch": 13.195850622406638, "grad_norm": 24.239477157592773, "learning_rate": 1.4724979253112035e-05, "loss": 1.0072, "step": 15901 }, { "epoch": 13.19668049792531, "grad_norm": 16.17587661743164, "learning_rate": 1.4724647302904564e-05, "loss": 0.4095, "step": 15902 }, { "epoch": 13.197510373443983, "grad_norm": 35.49964141845703, "learning_rate": 1.4724315352697096e-05, "loss": 1.4241, "step": 15903 }, { "epoch": 13.198340248962655, "grad_norm": 52.07166290283203, "learning_rate": 1.4723983402489628e-05, "loss": 1.4245, "step": 15904 }, { "epoch": 13.199170124481327, "grad_norm": 39.545654296875, "learning_rate": 1.472365145228216e-05, "loss": 1.0888, "step": 15905 }, { "epoch": 13.2, "grad_norm": 59.77772903442383, "learning_rate": 1.472331950207469e-05, "loss": 1.0154, "step": 15906 }, { "epoch": 13.200829875518671, "grad_norm": 23.803863525390625, "learning_rate": 1.4722987551867221e-05, "loss": 0.6614, "step": 15907 }, { "epoch": 13.201659751037344, "grad_norm": 27.920442581176758, "learning_rate": 1.4722655601659753e-05, "loss": 0.6641, "step": 15908 }, { "epoch": 13.202489626556016, "grad_norm": 37.86351013183594, "learning_rate": 1.4722323651452283e-05, "loss": 1.1209, "step": 15909 }, { "epoch": 13.203319502074688, "grad_norm": 22.56744956970215, "learning_rate": 1.4721991701244815e-05, "loss": 0.6921, "step": 15910 }, { "epoch": 13.20414937759336, "grad_norm": 67.94129180908203, "learning_rate": 1.4721659751037344e-05, "loss": 0.7358, "step": 15911 }, { "epoch": 13.204979253112032, "grad_norm": 34.52901077270508, "learning_rate": 1.4721327800829876e-05, "loss": 0.8139, "step": 15912 }, { "epoch": 13.205809128630705, "grad_norm": 40.59910583496094, "learning_rate": 1.4720995850622408e-05, "loss": 0.5822, "step": 15913 }, { "epoch": 13.206639004149377, "grad_norm": 26.177501678466797, "learning_rate": 1.472066390041494e-05, "loss": 0.6965, "step": 15914 }, { "epoch": 13.207468879668049, "grad_norm": 26.84950828552246, "learning_rate": 1.472033195020747e-05, "loss": 0.8515, "step": 15915 }, { "epoch": 13.208298755186721, "grad_norm": 24.426362991333008, "learning_rate": 1.4720000000000001e-05, "loss": 0.9143, "step": 15916 }, { "epoch": 13.209128630705393, "grad_norm": 37.353858947753906, "learning_rate": 1.4719668049792532e-05, "loss": 0.5632, "step": 15917 }, { "epoch": 13.209958506224066, "grad_norm": 60.65129470825195, "learning_rate": 1.4719336099585064e-05, "loss": 1.3068, "step": 15918 }, { "epoch": 13.210788381742738, "grad_norm": 22.19947624206543, "learning_rate": 1.4719004149377594e-05, "loss": 0.6895, "step": 15919 }, { "epoch": 13.21161825726141, "grad_norm": 84.98445129394531, "learning_rate": 1.4718672199170125e-05, "loss": 1.2491, "step": 15920 }, { "epoch": 13.212448132780082, "grad_norm": 47.93247985839844, "learning_rate": 1.4718340248962657e-05, "loss": 0.9235, "step": 15921 }, { "epoch": 13.213278008298754, "grad_norm": 16.516868591308594, "learning_rate": 1.4718008298755189e-05, "loss": 0.5666, "step": 15922 }, { "epoch": 13.214107883817427, "grad_norm": 26.818073272705078, "learning_rate": 1.4717676348547718e-05, "loss": 0.4637, "step": 15923 }, { "epoch": 13.214937759336099, "grad_norm": 23.650211334228516, "learning_rate": 1.471734439834025e-05, "loss": 0.4606, "step": 15924 }, { "epoch": 13.215767634854771, "grad_norm": 21.13850212097168, "learning_rate": 1.4717012448132782e-05, "loss": 0.5032, "step": 15925 }, { "epoch": 13.216597510373443, "grad_norm": 59.15947723388672, "learning_rate": 1.4716680497925312e-05, "loss": 0.7707, "step": 15926 }, { "epoch": 13.217427385892115, "grad_norm": 42.4507942199707, "learning_rate": 1.4716348547717843e-05, "loss": 0.9387, "step": 15927 }, { "epoch": 13.218257261410788, "grad_norm": 69.38037109375, "learning_rate": 1.4716016597510375e-05, "loss": 0.4609, "step": 15928 }, { "epoch": 13.21908713692946, "grad_norm": 28.350435256958008, "learning_rate": 1.4715684647302905e-05, "loss": 0.9977, "step": 15929 }, { "epoch": 13.219917012448132, "grad_norm": 35.675235748291016, "learning_rate": 1.4715352697095437e-05, "loss": 1.1412, "step": 15930 }, { "epoch": 13.220746887966804, "grad_norm": 40.04389572143555, "learning_rate": 1.471502074688797e-05, "loss": 1.057, "step": 15931 }, { "epoch": 13.221576763485476, "grad_norm": 244.07139587402344, "learning_rate": 1.4714688796680498e-05, "loss": 1.7622, "step": 15932 }, { "epoch": 13.222406639004149, "grad_norm": 33.0927848815918, "learning_rate": 1.471435684647303e-05, "loss": 0.4354, "step": 15933 }, { "epoch": 13.22323651452282, "grad_norm": 42.221439361572266, "learning_rate": 1.4714024896265562e-05, "loss": 0.9112, "step": 15934 }, { "epoch": 13.224066390041493, "grad_norm": 22.28762435913086, "learning_rate": 1.4713692946058093e-05, "loss": 0.5629, "step": 15935 }, { "epoch": 13.224896265560165, "grad_norm": 21.22564697265625, "learning_rate": 1.4713360995850623e-05, "loss": 0.3959, "step": 15936 }, { "epoch": 13.225726141078837, "grad_norm": 60.410457611083984, "learning_rate": 1.4713029045643155e-05, "loss": 0.78, "step": 15937 }, { "epoch": 13.22655601659751, "grad_norm": 28.456134796142578, "learning_rate": 1.4712697095435686e-05, "loss": 0.5879, "step": 15938 }, { "epoch": 13.227385892116182, "grad_norm": 20.359493255615234, "learning_rate": 1.4712365145228218e-05, "loss": 0.5578, "step": 15939 }, { "epoch": 13.228215767634854, "grad_norm": 28.204055786132812, "learning_rate": 1.4712033195020747e-05, "loss": 1.2008, "step": 15940 }, { "epoch": 13.229045643153526, "grad_norm": 43.710899353027344, "learning_rate": 1.4711701244813279e-05, "loss": 0.6623, "step": 15941 }, { "epoch": 13.229875518672198, "grad_norm": 33.5881233215332, "learning_rate": 1.471136929460581e-05, "loss": 0.9486, "step": 15942 }, { "epoch": 13.23070539419087, "grad_norm": 25.073863983154297, "learning_rate": 1.4711037344398343e-05, "loss": 0.5299, "step": 15943 }, { "epoch": 13.231535269709543, "grad_norm": 31.10942840576172, "learning_rate": 1.4710705394190872e-05, "loss": 0.8529, "step": 15944 }, { "epoch": 13.232365145228215, "grad_norm": 68.25021362304688, "learning_rate": 1.4710373443983404e-05, "loss": 1.1125, "step": 15945 }, { "epoch": 13.233195020746887, "grad_norm": 50.68031692504883, "learning_rate": 1.4710041493775936e-05, "loss": 0.8918, "step": 15946 }, { "epoch": 13.23402489626556, "grad_norm": 52.0284538269043, "learning_rate": 1.4709709543568466e-05, "loss": 1.2589, "step": 15947 }, { "epoch": 13.234854771784232, "grad_norm": 27.916589736938477, "learning_rate": 1.4709377593360997e-05, "loss": 0.3702, "step": 15948 }, { "epoch": 13.235684647302904, "grad_norm": 33.92209243774414, "learning_rate": 1.4709045643153527e-05, "loss": 0.9451, "step": 15949 }, { "epoch": 13.236514522821576, "grad_norm": 39.49519348144531, "learning_rate": 1.4708713692946059e-05, "loss": 0.6101, "step": 15950 }, { "epoch": 13.237344398340248, "grad_norm": 27.6351375579834, "learning_rate": 1.4708381742738591e-05, "loss": 0.6832, "step": 15951 }, { "epoch": 13.23817427385892, "grad_norm": 41.249237060546875, "learning_rate": 1.470804979253112e-05, "loss": 0.7573, "step": 15952 }, { "epoch": 13.239004149377593, "grad_norm": 56.94164276123047, "learning_rate": 1.4707717842323652e-05, "loss": 1.2226, "step": 15953 }, { "epoch": 13.239834024896265, "grad_norm": 22.49837303161621, "learning_rate": 1.4707385892116184e-05, "loss": 0.4177, "step": 15954 }, { "epoch": 13.240663900414937, "grad_norm": 36.0432243347168, "learning_rate": 1.4707053941908716e-05, "loss": 0.6709, "step": 15955 }, { "epoch": 13.241493775933609, "grad_norm": 96.94156646728516, "learning_rate": 1.4706721991701245e-05, "loss": 0.7473, "step": 15956 }, { "epoch": 13.242323651452281, "grad_norm": 16.044940948486328, "learning_rate": 1.4706390041493777e-05, "loss": 0.3385, "step": 15957 }, { "epoch": 13.243153526970953, "grad_norm": 21.739595413208008, "learning_rate": 1.4706058091286308e-05, "loss": 0.7861, "step": 15958 }, { "epoch": 13.243983402489626, "grad_norm": 23.961071014404297, "learning_rate": 1.470572614107884e-05, "loss": 0.4924, "step": 15959 }, { "epoch": 13.244813278008298, "grad_norm": 37.50337600708008, "learning_rate": 1.4705394190871372e-05, "loss": 0.5215, "step": 15960 }, { "epoch": 13.24564315352697, "grad_norm": 42.124237060546875, "learning_rate": 1.47050622406639e-05, "loss": 1.2342, "step": 15961 }, { "epoch": 13.246473029045642, "grad_norm": 28.399402618408203, "learning_rate": 1.4704730290456433e-05, "loss": 0.8481, "step": 15962 }, { "epoch": 13.247302904564314, "grad_norm": 32.969871520996094, "learning_rate": 1.4704398340248965e-05, "loss": 0.6151, "step": 15963 }, { "epoch": 13.248132780082987, "grad_norm": 25.576852798461914, "learning_rate": 1.4704066390041495e-05, "loss": 0.5126, "step": 15964 }, { "epoch": 13.248962655601659, "grad_norm": 54.6905517578125, "learning_rate": 1.4703734439834026e-05, "loss": 0.58, "step": 15965 }, { "epoch": 13.249792531120331, "grad_norm": 20.825775146484375, "learning_rate": 1.4703402489626558e-05, "loss": 0.6153, "step": 15966 }, { "epoch": 13.250622406639003, "grad_norm": 42.36216735839844, "learning_rate": 1.4703070539419088e-05, "loss": 1.0982, "step": 15967 }, { "epoch": 13.251452282157675, "grad_norm": 28.67407989501953, "learning_rate": 1.470273858921162e-05, "loss": 0.937, "step": 15968 }, { "epoch": 13.252282157676348, "grad_norm": 30.24030876159668, "learning_rate": 1.470240663900415e-05, "loss": 0.7641, "step": 15969 }, { "epoch": 13.25311203319502, "grad_norm": 32.5504035949707, "learning_rate": 1.4702074688796681e-05, "loss": 0.9709, "step": 15970 }, { "epoch": 13.253941908713692, "grad_norm": 27.67074966430664, "learning_rate": 1.4701742738589213e-05, "loss": 0.5033, "step": 15971 }, { "epoch": 13.254771784232364, "grad_norm": 96.50711059570312, "learning_rate": 1.4701410788381745e-05, "loss": 0.7996, "step": 15972 }, { "epoch": 13.255601659751036, "grad_norm": 23.50665283203125, "learning_rate": 1.4701078838174274e-05, "loss": 0.6196, "step": 15973 }, { "epoch": 13.256431535269709, "grad_norm": 19.893985748291016, "learning_rate": 1.4700746887966806e-05, "loss": 0.553, "step": 15974 }, { "epoch": 13.25726141078838, "grad_norm": 46.72661209106445, "learning_rate": 1.4700414937759338e-05, "loss": 0.593, "step": 15975 }, { "epoch": 13.258091286307055, "grad_norm": 50.707393646240234, "learning_rate": 1.4700082987551869e-05, "loss": 1.0197, "step": 15976 }, { "epoch": 13.258921161825727, "grad_norm": 35.36137390136719, "learning_rate": 1.4699751037344399e-05, "loss": 0.7391, "step": 15977 }, { "epoch": 13.2597510373444, "grad_norm": 33.0281982421875, "learning_rate": 1.4699419087136931e-05, "loss": 0.9917, "step": 15978 }, { "epoch": 13.260580912863071, "grad_norm": 30.727245330810547, "learning_rate": 1.4699087136929461e-05, "loss": 0.78, "step": 15979 }, { "epoch": 13.261410788381744, "grad_norm": 44.1932373046875, "learning_rate": 1.4698755186721994e-05, "loss": 0.6618, "step": 15980 }, { "epoch": 13.262240663900416, "grad_norm": 42.628448486328125, "learning_rate": 1.4698423236514522e-05, "loss": 0.4979, "step": 15981 }, { "epoch": 13.263070539419088, "grad_norm": 36.902278900146484, "learning_rate": 1.4698091286307054e-05, "loss": 0.6201, "step": 15982 }, { "epoch": 13.26390041493776, "grad_norm": 22.164512634277344, "learning_rate": 1.4697759336099587e-05, "loss": 0.6487, "step": 15983 }, { "epoch": 13.264730290456432, "grad_norm": 20.345561981201172, "learning_rate": 1.4697427385892119e-05, "loss": 0.5345, "step": 15984 }, { "epoch": 13.265560165975105, "grad_norm": 43.644229888916016, "learning_rate": 1.4697095435684649e-05, "loss": 0.7787, "step": 15985 }, { "epoch": 13.266390041493777, "grad_norm": 23.87523078918457, "learning_rate": 1.469676348547718e-05, "loss": 0.9335, "step": 15986 }, { "epoch": 13.267219917012449, "grad_norm": 24.220226287841797, "learning_rate": 1.469643153526971e-05, "loss": 0.6917, "step": 15987 }, { "epoch": 13.268049792531121, "grad_norm": 23.63812828063965, "learning_rate": 1.4696099585062242e-05, "loss": 0.5474, "step": 15988 }, { "epoch": 13.268879668049793, "grad_norm": 17.68450355529785, "learning_rate": 1.4695767634854774e-05, "loss": 0.407, "step": 15989 }, { "epoch": 13.269709543568466, "grad_norm": 26.36467170715332, "learning_rate": 1.4695435684647303e-05, "loss": 0.6141, "step": 15990 }, { "epoch": 13.270539419087138, "grad_norm": 42.23691940307617, "learning_rate": 1.4695103734439835e-05, "loss": 0.6445, "step": 15991 }, { "epoch": 13.27136929460581, "grad_norm": 30.462696075439453, "learning_rate": 1.4694771784232367e-05, "loss": 0.5662, "step": 15992 }, { "epoch": 13.272199170124482, "grad_norm": 62.190311431884766, "learning_rate": 1.4694439834024899e-05, "loss": 0.6845, "step": 15993 }, { "epoch": 13.273029045643154, "grad_norm": 56.14601135253906, "learning_rate": 1.4694107883817428e-05, "loss": 0.6968, "step": 15994 }, { "epoch": 13.273858921161827, "grad_norm": 36.21408462524414, "learning_rate": 1.469377593360996e-05, "loss": 0.8206, "step": 15995 }, { "epoch": 13.274688796680499, "grad_norm": 15.809011459350586, "learning_rate": 1.469344398340249e-05, "loss": 0.3543, "step": 15996 }, { "epoch": 13.275518672199171, "grad_norm": 70.57843017578125, "learning_rate": 1.4693112033195022e-05, "loss": 0.5199, "step": 15997 }, { "epoch": 13.276348547717843, "grad_norm": 36.07936096191406, "learning_rate": 1.4692780082987553e-05, "loss": 0.7022, "step": 15998 }, { "epoch": 13.277178423236515, "grad_norm": 45.43635177612305, "learning_rate": 1.4692448132780083e-05, "loss": 1.8626, "step": 15999 }, { "epoch": 13.278008298755188, "grad_norm": 21.92746925354004, "learning_rate": 1.4692116182572615e-05, "loss": 0.4214, "step": 16000 }, { "epoch": 13.27883817427386, "grad_norm": 49.24614715576172, "learning_rate": 1.4691784232365148e-05, "loss": 0.6512, "step": 16001 }, { "epoch": 13.279668049792532, "grad_norm": 55.05119323730469, "learning_rate": 1.4691452282157676e-05, "loss": 0.5185, "step": 16002 }, { "epoch": 13.280497925311204, "grad_norm": 57.18662643432617, "learning_rate": 1.4691120331950208e-05, "loss": 1.1773, "step": 16003 }, { "epoch": 13.281327800829876, "grad_norm": 41.69756317138672, "learning_rate": 1.469078838174274e-05, "loss": 0.839, "step": 16004 }, { "epoch": 13.282157676348548, "grad_norm": 62.373680114746094, "learning_rate": 1.4690456431535271e-05, "loss": 0.7341, "step": 16005 }, { "epoch": 13.28298755186722, "grad_norm": 32.95460510253906, "learning_rate": 1.4690124481327801e-05, "loss": 0.6039, "step": 16006 }, { "epoch": 13.283817427385893, "grad_norm": 39.138671875, "learning_rate": 1.4689792531120333e-05, "loss": 0.7998, "step": 16007 }, { "epoch": 13.284647302904565, "grad_norm": 34.924381256103516, "learning_rate": 1.4689460580912864e-05, "loss": 1.523, "step": 16008 }, { "epoch": 13.285477178423237, "grad_norm": 47.9974250793457, "learning_rate": 1.4689128630705396e-05, "loss": 0.7605, "step": 16009 }, { "epoch": 13.28630705394191, "grad_norm": 61.13759994506836, "learning_rate": 1.4688796680497928e-05, "loss": 1.0962, "step": 16010 }, { "epoch": 13.287136929460582, "grad_norm": 32.40362548828125, "learning_rate": 1.4688464730290457e-05, "loss": 1.1772, "step": 16011 }, { "epoch": 13.287966804979254, "grad_norm": 74.33531951904297, "learning_rate": 1.4688132780082989e-05, "loss": 0.5263, "step": 16012 }, { "epoch": 13.288796680497926, "grad_norm": 51.45745849609375, "learning_rate": 1.4687800829875521e-05, "loss": 0.6315, "step": 16013 }, { "epoch": 13.289626556016598, "grad_norm": 55.23823547363281, "learning_rate": 1.4687468879668051e-05, "loss": 0.7756, "step": 16014 }, { "epoch": 13.29045643153527, "grad_norm": 25.687280654907227, "learning_rate": 1.4687136929460582e-05, "loss": 0.6928, "step": 16015 }, { "epoch": 13.291286307053943, "grad_norm": 93.07949829101562, "learning_rate": 1.4686804979253114e-05, "loss": 1.195, "step": 16016 }, { "epoch": 13.292116182572615, "grad_norm": 48.20707321166992, "learning_rate": 1.4686473029045644e-05, "loss": 0.4379, "step": 16017 }, { "epoch": 13.292946058091287, "grad_norm": 41.1224479675293, "learning_rate": 1.4686141078838176e-05, "loss": 0.9923, "step": 16018 }, { "epoch": 13.29377593360996, "grad_norm": 42.175357818603516, "learning_rate": 1.4685809128630705e-05, "loss": 1.2369, "step": 16019 }, { "epoch": 13.294605809128631, "grad_norm": 38.901092529296875, "learning_rate": 1.4685477178423237e-05, "loss": 0.9384, "step": 16020 }, { "epoch": 13.295435684647304, "grad_norm": 35.94820022583008, "learning_rate": 1.468514522821577e-05, "loss": 0.3655, "step": 16021 }, { "epoch": 13.296265560165976, "grad_norm": 42.44773864746094, "learning_rate": 1.4684813278008301e-05, "loss": 0.9184, "step": 16022 }, { "epoch": 13.297095435684648, "grad_norm": 28.30377197265625, "learning_rate": 1.468448132780083e-05, "loss": 0.6388, "step": 16023 }, { "epoch": 13.29792531120332, "grad_norm": 22.206905364990234, "learning_rate": 1.4684149377593362e-05, "loss": 0.4512, "step": 16024 }, { "epoch": 13.298755186721992, "grad_norm": 30.871074676513672, "learning_rate": 1.4683817427385894e-05, "loss": 0.9278, "step": 16025 }, { "epoch": 13.299585062240665, "grad_norm": 45.682289123535156, "learning_rate": 1.4683485477178425e-05, "loss": 0.943, "step": 16026 }, { "epoch": 13.300414937759337, "grad_norm": 81.46929168701172, "learning_rate": 1.4683153526970955e-05, "loss": 1.4629, "step": 16027 }, { "epoch": 13.301244813278009, "grad_norm": 39.06892776489258, "learning_rate": 1.4682821576763486e-05, "loss": 0.9552, "step": 16028 }, { "epoch": 13.302074688796681, "grad_norm": 34.38498306274414, "learning_rate": 1.4682489626556018e-05, "loss": 0.9827, "step": 16029 }, { "epoch": 13.302904564315353, "grad_norm": 23.635881423950195, "learning_rate": 1.468215767634855e-05, "loss": 0.3803, "step": 16030 }, { "epoch": 13.303734439834026, "grad_norm": 43.99521255493164, "learning_rate": 1.4681825726141079e-05, "loss": 1.0565, "step": 16031 }, { "epoch": 13.304564315352698, "grad_norm": 49.08487319946289, "learning_rate": 1.468149377593361e-05, "loss": 0.6057, "step": 16032 }, { "epoch": 13.30539419087137, "grad_norm": 40.561153411865234, "learning_rate": 1.4681161825726143e-05, "loss": 1.4648, "step": 16033 }, { "epoch": 13.306224066390042, "grad_norm": 28.468875885009766, "learning_rate": 1.4680829875518673e-05, "loss": 0.6111, "step": 16034 }, { "epoch": 13.307053941908714, "grad_norm": 30.01173210144043, "learning_rate": 1.4680497925311204e-05, "loss": 0.7335, "step": 16035 }, { "epoch": 13.307883817427387, "grad_norm": 18.516584396362305, "learning_rate": 1.4680165975103736e-05, "loss": 0.4826, "step": 16036 }, { "epoch": 13.308713692946059, "grad_norm": 51.478599548339844, "learning_rate": 1.4679834024896266e-05, "loss": 1.3192, "step": 16037 }, { "epoch": 13.309543568464731, "grad_norm": 46.00634002685547, "learning_rate": 1.4679502074688798e-05, "loss": 0.4758, "step": 16038 }, { "epoch": 13.310373443983403, "grad_norm": 33.62826919555664, "learning_rate": 1.467917012448133e-05, "loss": 1.5723, "step": 16039 }, { "epoch": 13.311203319502075, "grad_norm": 23.682558059692383, "learning_rate": 1.4678838174273859e-05, "loss": 0.5044, "step": 16040 }, { "epoch": 13.312033195020748, "grad_norm": 21.80272102355957, "learning_rate": 1.4678506224066391e-05, "loss": 0.9116, "step": 16041 }, { "epoch": 13.31286307053942, "grad_norm": 48.15559768676758, "learning_rate": 1.4678174273858923e-05, "loss": 0.5525, "step": 16042 }, { "epoch": 13.313692946058092, "grad_norm": 24.696308135986328, "learning_rate": 1.4677842323651454e-05, "loss": 0.6171, "step": 16043 }, { "epoch": 13.314522821576764, "grad_norm": 42.57676696777344, "learning_rate": 1.4677510373443984e-05, "loss": 0.552, "step": 16044 }, { "epoch": 13.315352697095436, "grad_norm": 35.366355895996094, "learning_rate": 1.4677178423236516e-05, "loss": 0.8322, "step": 16045 }, { "epoch": 13.316182572614109, "grad_norm": 66.17202758789062, "learning_rate": 1.4676846473029047e-05, "loss": 0.7927, "step": 16046 }, { "epoch": 13.31701244813278, "grad_norm": 34.9229621887207, "learning_rate": 1.4676514522821579e-05, "loss": 0.9274, "step": 16047 }, { "epoch": 13.317842323651453, "grad_norm": 34.02840042114258, "learning_rate": 1.4676182572614107e-05, "loss": 0.7717, "step": 16048 }, { "epoch": 13.318672199170125, "grad_norm": 83.557373046875, "learning_rate": 1.467585062240664e-05, "loss": 1.3825, "step": 16049 }, { "epoch": 13.319502074688797, "grad_norm": 30.830793380737305, "learning_rate": 1.4675518672199172e-05, "loss": 0.4923, "step": 16050 }, { "epoch": 13.32033195020747, "grad_norm": 64.32363891601562, "learning_rate": 1.4675186721991704e-05, "loss": 2.0516, "step": 16051 }, { "epoch": 13.321161825726142, "grad_norm": 39.676082611083984, "learning_rate": 1.4674854771784232e-05, "loss": 0.7254, "step": 16052 }, { "epoch": 13.321991701244814, "grad_norm": 38.6737174987793, "learning_rate": 1.4674522821576765e-05, "loss": 0.9649, "step": 16053 }, { "epoch": 13.322821576763486, "grad_norm": 50.98023223876953, "learning_rate": 1.4674190871369297e-05, "loss": 1.8306, "step": 16054 }, { "epoch": 13.323651452282158, "grad_norm": 26.816679000854492, "learning_rate": 1.4673858921161827e-05, "loss": 0.6722, "step": 16055 }, { "epoch": 13.32448132780083, "grad_norm": 36.375492095947266, "learning_rate": 1.4673526970954358e-05, "loss": 0.7403, "step": 16056 }, { "epoch": 13.325311203319503, "grad_norm": 37.808353424072266, "learning_rate": 1.4673195020746888e-05, "loss": 0.9527, "step": 16057 }, { "epoch": 13.326141078838175, "grad_norm": 44.6379508972168, "learning_rate": 1.467286307053942e-05, "loss": 0.9683, "step": 16058 }, { "epoch": 13.326970954356847, "grad_norm": 32.925445556640625, "learning_rate": 1.4672531120331952e-05, "loss": 0.9198, "step": 16059 }, { "epoch": 13.32780082987552, "grad_norm": 22.81494903564453, "learning_rate": 1.4672199170124481e-05, "loss": 0.6996, "step": 16060 }, { "epoch": 13.328630705394191, "grad_norm": 33.78524398803711, "learning_rate": 1.4671867219917013e-05, "loss": 0.5514, "step": 16061 }, { "epoch": 13.329460580912864, "grad_norm": 20.62493133544922, "learning_rate": 1.4671535269709545e-05, "loss": 0.334, "step": 16062 }, { "epoch": 13.330290456431536, "grad_norm": 31.43947982788086, "learning_rate": 1.4671203319502077e-05, "loss": 0.9497, "step": 16063 }, { "epoch": 13.331120331950208, "grad_norm": 32.908504486083984, "learning_rate": 1.4670871369294608e-05, "loss": 0.7712, "step": 16064 }, { "epoch": 13.33195020746888, "grad_norm": 32.629722595214844, "learning_rate": 1.4670539419087138e-05, "loss": 1.1401, "step": 16065 }, { "epoch": 13.332780082987552, "grad_norm": 24.13759422302246, "learning_rate": 1.4670207468879668e-05, "loss": 0.4789, "step": 16066 }, { "epoch": 13.333609958506225, "grad_norm": 41.24845886230469, "learning_rate": 1.46698755186722e-05, "loss": 0.8241, "step": 16067 }, { "epoch": 13.334439834024897, "grad_norm": 85.73126983642578, "learning_rate": 1.4669543568464733e-05, "loss": 1.1204, "step": 16068 }, { "epoch": 13.335269709543569, "grad_norm": 58.1562385559082, "learning_rate": 1.4669211618257261e-05, "loss": 0.6105, "step": 16069 }, { "epoch": 13.336099585062241, "grad_norm": 41.545814514160156, "learning_rate": 1.4668879668049793e-05, "loss": 1.441, "step": 16070 }, { "epoch": 13.336929460580913, "grad_norm": 23.43390655517578, "learning_rate": 1.4668547717842326e-05, "loss": 0.7004, "step": 16071 }, { "epoch": 13.337759336099586, "grad_norm": 20.20660972595215, "learning_rate": 1.4668215767634858e-05, "loss": 0.3823, "step": 16072 }, { "epoch": 13.338589211618258, "grad_norm": 22.739234924316406, "learning_rate": 1.4667883817427386e-05, "loss": 0.25, "step": 16073 }, { "epoch": 13.33941908713693, "grad_norm": 38.762062072753906, "learning_rate": 1.4667551867219919e-05, "loss": 0.9576, "step": 16074 }, { "epoch": 13.340248962655602, "grad_norm": 37.97826385498047, "learning_rate": 1.4667219917012449e-05, "loss": 0.8182, "step": 16075 }, { "epoch": 13.341078838174274, "grad_norm": 26.457733154296875, "learning_rate": 1.4666887966804981e-05, "loss": 0.497, "step": 16076 }, { "epoch": 13.341908713692947, "grad_norm": 48.89453887939453, "learning_rate": 1.4666556016597511e-05, "loss": 0.768, "step": 16077 }, { "epoch": 13.342738589211619, "grad_norm": 40.65740203857422, "learning_rate": 1.4666224066390042e-05, "loss": 0.7374, "step": 16078 }, { "epoch": 13.343568464730291, "grad_norm": 43.309349060058594, "learning_rate": 1.4665892116182574e-05, "loss": 1.0853, "step": 16079 }, { "epoch": 13.344398340248963, "grad_norm": 10.870119094848633, "learning_rate": 1.4665560165975106e-05, "loss": 0.2092, "step": 16080 }, { "epoch": 13.345228215767635, "grad_norm": 46.14705276489258, "learning_rate": 1.4665228215767635e-05, "loss": 1.0863, "step": 16081 }, { "epoch": 13.346058091286308, "grad_norm": 45.13956069946289, "learning_rate": 1.4664896265560167e-05, "loss": 1.1594, "step": 16082 }, { "epoch": 13.34688796680498, "grad_norm": 13.163622856140137, "learning_rate": 1.4664564315352699e-05, "loss": 0.3359, "step": 16083 }, { "epoch": 13.347717842323652, "grad_norm": 71.78093719482422, "learning_rate": 1.466423236514523e-05, "loss": 0.9508, "step": 16084 }, { "epoch": 13.348547717842324, "grad_norm": 57.70832443237305, "learning_rate": 1.466390041493776e-05, "loss": 0.7189, "step": 16085 }, { "epoch": 13.349377593360996, "grad_norm": 18.584943771362305, "learning_rate": 1.4663568464730292e-05, "loss": 0.5582, "step": 16086 }, { "epoch": 13.350207468879669, "grad_norm": 22.192272186279297, "learning_rate": 1.4663236514522822e-05, "loss": 0.357, "step": 16087 }, { "epoch": 13.35103734439834, "grad_norm": 36.45952606201172, "learning_rate": 1.4662904564315354e-05, "loss": 1.1984, "step": 16088 }, { "epoch": 13.351867219917013, "grad_norm": 26.97850227355957, "learning_rate": 1.4662572614107887e-05, "loss": 0.7379, "step": 16089 }, { "epoch": 13.352697095435685, "grad_norm": 30.65138816833496, "learning_rate": 1.4662240663900415e-05, "loss": 0.6365, "step": 16090 }, { "epoch": 13.353526970954357, "grad_norm": 40.27388000488281, "learning_rate": 1.4661908713692947e-05, "loss": 1.0005, "step": 16091 }, { "epoch": 13.35435684647303, "grad_norm": 28.367298126220703, "learning_rate": 1.466157676348548e-05, "loss": 0.897, "step": 16092 }, { "epoch": 13.355186721991702, "grad_norm": 30.354780197143555, "learning_rate": 1.466124481327801e-05, "loss": 0.6089, "step": 16093 }, { "epoch": 13.356016597510374, "grad_norm": 21.14191436767578, "learning_rate": 1.466091286307054e-05, "loss": 0.5652, "step": 16094 }, { "epoch": 13.356846473029046, "grad_norm": 33.888519287109375, "learning_rate": 1.4660580912863072e-05, "loss": 0.6468, "step": 16095 }, { "epoch": 13.357676348547718, "grad_norm": 30.750032424926758, "learning_rate": 1.4660248962655603e-05, "loss": 0.6605, "step": 16096 }, { "epoch": 13.35850622406639, "grad_norm": 29.575111389160156, "learning_rate": 1.4659917012448135e-05, "loss": 0.7474, "step": 16097 }, { "epoch": 13.359336099585063, "grad_norm": 29.894886016845703, "learning_rate": 1.4659585062240664e-05, "loss": 1.4166, "step": 16098 }, { "epoch": 13.360165975103735, "grad_norm": 45.440208435058594, "learning_rate": 1.4659253112033196e-05, "loss": 0.8459, "step": 16099 }, { "epoch": 13.360995850622407, "grad_norm": 61.020870208740234, "learning_rate": 1.4658921161825728e-05, "loss": 1.1953, "step": 16100 }, { "epoch": 13.36182572614108, "grad_norm": 51.828582763671875, "learning_rate": 1.465858921161826e-05, "loss": 0.9206, "step": 16101 }, { "epoch": 13.362655601659752, "grad_norm": 30.529865264892578, "learning_rate": 1.4658257261410789e-05, "loss": 0.488, "step": 16102 }, { "epoch": 13.363485477178424, "grad_norm": 38.26826477050781, "learning_rate": 1.465792531120332e-05, "loss": 1.1974, "step": 16103 }, { "epoch": 13.364315352697096, "grad_norm": 32.66770935058594, "learning_rate": 1.4657593360995851e-05, "loss": 0.7109, "step": 16104 }, { "epoch": 13.365145228215768, "grad_norm": 122.94258117675781, "learning_rate": 1.4657261410788383e-05, "loss": 1.093, "step": 16105 }, { "epoch": 13.36597510373444, "grad_norm": 70.43266296386719, "learning_rate": 1.4656929460580914e-05, "loss": 0.8087, "step": 16106 }, { "epoch": 13.366804979253113, "grad_norm": 46.01008224487305, "learning_rate": 1.4656597510373444e-05, "loss": 1.1324, "step": 16107 }, { "epoch": 13.367634854771785, "grad_norm": 40.65729904174805, "learning_rate": 1.4656265560165976e-05, "loss": 0.8002, "step": 16108 }, { "epoch": 13.368464730290457, "grad_norm": 35.94105529785156, "learning_rate": 1.4655933609958508e-05, "loss": 0.9185, "step": 16109 }, { "epoch": 13.369294605809129, "grad_norm": 96.05714416503906, "learning_rate": 1.4655601659751037e-05, "loss": 0.9688, "step": 16110 }, { "epoch": 13.370124481327801, "grad_norm": 51.0817985534668, "learning_rate": 1.465526970954357e-05, "loss": 1.2256, "step": 16111 }, { "epoch": 13.370954356846473, "grad_norm": 25.87387466430664, "learning_rate": 1.4654937759336101e-05, "loss": 0.4795, "step": 16112 }, { "epoch": 13.371784232365146, "grad_norm": 17.20317268371582, "learning_rate": 1.4654605809128632e-05, "loss": 0.4073, "step": 16113 }, { "epoch": 13.372614107883818, "grad_norm": 55.324649810791016, "learning_rate": 1.4654273858921162e-05, "loss": 1.0675, "step": 16114 }, { "epoch": 13.37344398340249, "grad_norm": 18.67184066772461, "learning_rate": 1.4653941908713694e-05, "loss": 0.4932, "step": 16115 }, { "epoch": 13.374273858921162, "grad_norm": 28.756710052490234, "learning_rate": 1.4653609958506225e-05, "loss": 0.9684, "step": 16116 }, { "epoch": 13.375103734439834, "grad_norm": 11.166210174560547, "learning_rate": 1.4653278008298757e-05, "loss": 0.2634, "step": 16117 }, { "epoch": 13.375933609958507, "grad_norm": 41.7188835144043, "learning_rate": 1.4652946058091289e-05, "loss": 0.8629, "step": 16118 }, { "epoch": 13.376763485477179, "grad_norm": 33.8421516418457, "learning_rate": 1.4652614107883818e-05, "loss": 0.7382, "step": 16119 }, { "epoch": 13.377593360995851, "grad_norm": 35.08332443237305, "learning_rate": 1.465228215767635e-05, "loss": 0.6594, "step": 16120 }, { "epoch": 13.378423236514523, "grad_norm": 43.356754302978516, "learning_rate": 1.4651950207468882e-05, "loss": 1.051, "step": 16121 }, { "epoch": 13.379253112033195, "grad_norm": 25.81720542907715, "learning_rate": 1.4651618257261412e-05, "loss": 0.291, "step": 16122 }, { "epoch": 13.380082987551868, "grad_norm": 47.28814697265625, "learning_rate": 1.4651286307053943e-05, "loss": 0.5838, "step": 16123 }, { "epoch": 13.38091286307054, "grad_norm": 37.736854553222656, "learning_rate": 1.4650954356846475e-05, "loss": 0.7093, "step": 16124 }, { "epoch": 13.381742738589212, "grad_norm": 43.205379486083984, "learning_rate": 1.4650622406639005e-05, "loss": 1.054, "step": 16125 }, { "epoch": 13.382572614107884, "grad_norm": 45.8195915222168, "learning_rate": 1.4650290456431537e-05, "loss": 0.8698, "step": 16126 }, { "epoch": 13.383402489626556, "grad_norm": 53.20627975463867, "learning_rate": 1.4649958506224066e-05, "loss": 1.0497, "step": 16127 }, { "epoch": 13.384232365145229, "grad_norm": 19.610816955566406, "learning_rate": 1.4649626556016598e-05, "loss": 0.5158, "step": 16128 }, { "epoch": 13.3850622406639, "grad_norm": 61.105648040771484, "learning_rate": 1.464929460580913e-05, "loss": 1.333, "step": 16129 }, { "epoch": 13.385892116182573, "grad_norm": 19.276344299316406, "learning_rate": 1.4648962655601662e-05, "loss": 0.4229, "step": 16130 }, { "epoch": 13.386721991701245, "grad_norm": 33.91252517700195, "learning_rate": 1.4648630705394191e-05, "loss": 0.4965, "step": 16131 }, { "epoch": 13.387551867219917, "grad_norm": 33.902679443359375, "learning_rate": 1.4648298755186723e-05, "loss": 0.9623, "step": 16132 }, { "epoch": 13.38838174273859, "grad_norm": 44.0704231262207, "learning_rate": 1.4647966804979255e-05, "loss": 1.2077, "step": 16133 }, { "epoch": 13.389211618257262, "grad_norm": 42.119380950927734, "learning_rate": 1.4647634854771786e-05, "loss": 0.6562, "step": 16134 }, { "epoch": 13.390041493775934, "grad_norm": 43.72426986694336, "learning_rate": 1.4647302904564316e-05, "loss": 1.0673, "step": 16135 }, { "epoch": 13.390871369294606, "grad_norm": 29.805191040039062, "learning_rate": 1.4646970954356847e-05, "loss": 0.6307, "step": 16136 }, { "epoch": 13.391701244813278, "grad_norm": NaN, "learning_rate": 1.4646970954356847e-05, "loss": 1.9476, "step": 16137 }, { "epoch": 13.39253112033195, "grad_norm": 26.25113296508789, "learning_rate": 1.4646639004149379e-05, "loss": 0.6698, "step": 16138 }, { "epoch": 13.393360995850623, "grad_norm": 53.88145446777344, "learning_rate": 1.464630705394191e-05, "loss": 0.7185, "step": 16139 }, { "epoch": 13.394190871369295, "grad_norm": 66.45101165771484, "learning_rate": 1.464597510373444e-05, "loss": 0.9429, "step": 16140 }, { "epoch": 13.395020746887967, "grad_norm": 45.19211959838867, "learning_rate": 1.4645643153526972e-05, "loss": 1.1589, "step": 16141 }, { "epoch": 13.39585062240664, "grad_norm": 25.823455810546875, "learning_rate": 1.4645311203319504e-05, "loss": 0.6877, "step": 16142 }, { "epoch": 13.396680497925312, "grad_norm": 33.39036178588867, "learning_rate": 1.4644979253112036e-05, "loss": 0.9061, "step": 16143 }, { "epoch": 13.397510373443984, "grad_norm": 23.141380310058594, "learning_rate": 1.4644647302904566e-05, "loss": 0.4863, "step": 16144 }, { "epoch": 13.398340248962656, "grad_norm": 23.666658401489258, "learning_rate": 1.4644315352697097e-05, "loss": 0.5204, "step": 16145 }, { "epoch": 13.399170124481328, "grad_norm": 50.406822204589844, "learning_rate": 1.4643983402489627e-05, "loss": 0.541, "step": 16146 }, { "epoch": 13.4, "grad_norm": 34.29875946044922, "learning_rate": 1.4643651452282159e-05, "loss": 0.496, "step": 16147 }, { "epoch": 13.400829875518673, "grad_norm": 29.57008934020996, "learning_rate": 1.4643319502074691e-05, "loss": 0.7211, "step": 16148 }, { "epoch": 13.401659751037345, "grad_norm": 46.3770866394043, "learning_rate": 1.464298755186722e-05, "loss": 1.7321, "step": 16149 }, { "epoch": 13.402489626556017, "grad_norm": 27.215669631958008, "learning_rate": 1.4642655601659752e-05, "loss": 0.8095, "step": 16150 }, { "epoch": 13.40331950207469, "grad_norm": 27.367820739746094, "learning_rate": 1.4642323651452284e-05, "loss": 0.7943, "step": 16151 }, { "epoch": 13.404149377593361, "grad_norm": 54.04507064819336, "learning_rate": 1.4641991701244815e-05, "loss": 1.0955, "step": 16152 }, { "epoch": 13.404979253112034, "grad_norm": 39.53891372680664, "learning_rate": 1.4641659751037345e-05, "loss": 0.8033, "step": 16153 }, { "epoch": 13.405809128630706, "grad_norm": 36.33482360839844, "learning_rate": 1.4641327800829877e-05, "loss": 0.8619, "step": 16154 }, { "epoch": 13.406639004149378, "grad_norm": 35.5899543762207, "learning_rate": 1.4640995850622408e-05, "loss": 1.2211, "step": 16155 }, { "epoch": 13.40746887966805, "grad_norm": 50.94382858276367, "learning_rate": 1.464066390041494e-05, "loss": 0.6934, "step": 16156 }, { "epoch": 13.408298755186722, "grad_norm": 48.95648956298828, "learning_rate": 1.464033195020747e-05, "loss": 0.6971, "step": 16157 }, { "epoch": 13.409128630705395, "grad_norm": 42.669593811035156, "learning_rate": 1.464e-05, "loss": 1.0266, "step": 16158 }, { "epoch": 13.409958506224067, "grad_norm": 71.82768249511719, "learning_rate": 1.4639668049792533e-05, "loss": 1.2341, "step": 16159 }, { "epoch": 13.410788381742739, "grad_norm": 30.50078582763672, "learning_rate": 1.4639336099585065e-05, "loss": 0.9976, "step": 16160 }, { "epoch": 13.411618257261411, "grad_norm": 39.18241882324219, "learning_rate": 1.4639004149377593e-05, "loss": 0.9366, "step": 16161 }, { "epoch": 13.412448132780083, "grad_norm": 39.9369010925293, "learning_rate": 1.4638672199170125e-05, "loss": 0.7765, "step": 16162 }, { "epoch": 13.413278008298755, "grad_norm": 52.926841735839844, "learning_rate": 1.4638340248962658e-05, "loss": 1.7773, "step": 16163 }, { "epoch": 13.414107883817428, "grad_norm": 49.60702133178711, "learning_rate": 1.4638008298755188e-05, "loss": 0.7609, "step": 16164 }, { "epoch": 13.4149377593361, "grad_norm": 36.519107818603516, "learning_rate": 1.4637676348547718e-05, "loss": 0.6843, "step": 16165 }, { "epoch": 13.415767634854772, "grad_norm": 31.850337982177734, "learning_rate": 1.4637344398340249e-05, "loss": 0.7162, "step": 16166 }, { "epoch": 13.416597510373444, "grad_norm": 28.280752182006836, "learning_rate": 1.4637012448132781e-05, "loss": 0.7265, "step": 16167 }, { "epoch": 13.417427385892116, "grad_norm": 39.22420883178711, "learning_rate": 1.4636680497925313e-05, "loss": 0.6311, "step": 16168 }, { "epoch": 13.418257261410789, "grad_norm": 51.63954162597656, "learning_rate": 1.4636348547717842e-05, "loss": 0.7894, "step": 16169 }, { "epoch": 13.41908713692946, "grad_norm": 31.040546417236328, "learning_rate": 1.4636016597510374e-05, "loss": 1.2067, "step": 16170 }, { "epoch": 13.419917012448133, "grad_norm": 36.64881134033203, "learning_rate": 1.4635684647302906e-05, "loss": 1.1789, "step": 16171 }, { "epoch": 13.420746887966805, "grad_norm": 49.84702682495117, "learning_rate": 1.4635352697095438e-05, "loss": 0.8904, "step": 16172 }, { "epoch": 13.421576763485477, "grad_norm": 28.5026798248291, "learning_rate": 1.4635020746887969e-05, "loss": 0.7671, "step": 16173 }, { "epoch": 13.42240663900415, "grad_norm": 31.099523544311523, "learning_rate": 1.4634688796680499e-05, "loss": 0.5605, "step": 16174 }, { "epoch": 13.423236514522822, "grad_norm": 42.99766540527344, "learning_rate": 1.463435684647303e-05, "loss": 0.9126, "step": 16175 }, { "epoch": 13.424066390041494, "grad_norm": 114.36314392089844, "learning_rate": 1.4634024896265561e-05, "loss": 1.1041, "step": 16176 }, { "epoch": 13.424896265560166, "grad_norm": 31.289058685302734, "learning_rate": 1.4633692946058094e-05, "loss": 0.6981, "step": 16177 }, { "epoch": 13.425726141078838, "grad_norm": 47.51389694213867, "learning_rate": 1.4633360995850622e-05, "loss": 0.8568, "step": 16178 }, { "epoch": 13.42655601659751, "grad_norm": 73.36801147460938, "learning_rate": 1.4633029045643154e-05, "loss": 0.7707, "step": 16179 }, { "epoch": 13.427385892116183, "grad_norm": 28.58342933654785, "learning_rate": 1.4632697095435686e-05, "loss": 0.5517, "step": 16180 }, { "epoch": 13.428215767634855, "grad_norm": 29.078857421875, "learning_rate": 1.4632365145228219e-05, "loss": 0.9954, "step": 16181 }, { "epoch": 13.429045643153527, "grad_norm": 40.2997932434082, "learning_rate": 1.4632033195020747e-05, "loss": 0.9339, "step": 16182 }, { "epoch": 13.4298755186722, "grad_norm": 47.566471099853516, "learning_rate": 1.463170124481328e-05, "loss": 0.7752, "step": 16183 }, { "epoch": 13.430705394190872, "grad_norm": 40.52621078491211, "learning_rate": 1.463136929460581e-05, "loss": 0.862, "step": 16184 }, { "epoch": 13.431535269709544, "grad_norm": 18.890260696411133, "learning_rate": 1.4631037344398342e-05, "loss": 0.4294, "step": 16185 }, { "epoch": 13.432365145228216, "grad_norm": 40.13406753540039, "learning_rate": 1.4630705394190872e-05, "loss": 0.6878, "step": 16186 }, { "epoch": 13.433195020746888, "grad_norm": 36.71409606933594, "learning_rate": 1.4630373443983403e-05, "loss": 1.0565, "step": 16187 }, { "epoch": 13.43402489626556, "grad_norm": 27.192405700683594, "learning_rate": 1.4630041493775935e-05, "loss": 0.8262, "step": 16188 }, { "epoch": 13.434854771784233, "grad_norm": 42.80336380004883, "learning_rate": 1.4629709543568467e-05, "loss": 1.6037, "step": 16189 }, { "epoch": 13.435684647302905, "grad_norm": 23.11791229248047, "learning_rate": 1.4629377593360996e-05, "loss": 0.5348, "step": 16190 }, { "epoch": 13.436514522821577, "grad_norm": 64.98897552490234, "learning_rate": 1.4629045643153528e-05, "loss": 1.2048, "step": 16191 }, { "epoch": 13.43734439834025, "grad_norm": 27.52871322631836, "learning_rate": 1.462871369294606e-05, "loss": 0.6894, "step": 16192 }, { "epoch": 13.438174273858921, "grad_norm": 28.882450103759766, "learning_rate": 1.462838174273859e-05, "loss": 0.4859, "step": 16193 }, { "epoch": 13.439004149377594, "grad_norm": 28.946992874145508, "learning_rate": 1.462804979253112e-05, "loss": 0.6113, "step": 16194 }, { "epoch": 13.439834024896266, "grad_norm": 53.25514221191406, "learning_rate": 1.4627717842323653e-05, "loss": 0.6075, "step": 16195 }, { "epoch": 13.440663900414938, "grad_norm": 43.316444396972656, "learning_rate": 1.4627385892116183e-05, "loss": 0.9642, "step": 16196 }, { "epoch": 13.44149377593361, "grad_norm": 28.93720054626465, "learning_rate": 1.4627053941908715e-05, "loss": 0.6175, "step": 16197 }, { "epoch": 13.442323651452282, "grad_norm": 53.918331146240234, "learning_rate": 1.4626721991701247e-05, "loss": 1.22, "step": 16198 }, { "epoch": 13.443153526970955, "grad_norm": 29.922260284423828, "learning_rate": 1.4626390041493776e-05, "loss": 0.6757, "step": 16199 }, { "epoch": 13.443983402489627, "grad_norm": 30.734569549560547, "learning_rate": 1.4626058091286308e-05, "loss": 0.8081, "step": 16200 }, { "epoch": 13.444813278008299, "grad_norm": 27.44904327392578, "learning_rate": 1.462572614107884e-05, "loss": 0.5228, "step": 16201 }, { "epoch": 13.445643153526971, "grad_norm": 25.266477584838867, "learning_rate": 1.462539419087137e-05, "loss": 0.6315, "step": 16202 }, { "epoch": 13.446473029045643, "grad_norm": 14.745438575744629, "learning_rate": 1.4625062240663901e-05, "loss": 0.4826, "step": 16203 }, { "epoch": 13.447302904564316, "grad_norm": 15.036062240600586, "learning_rate": 1.4624730290456433e-05, "loss": 0.3511, "step": 16204 }, { "epoch": 13.448132780082988, "grad_norm": 35.154197692871094, "learning_rate": 1.4624398340248964e-05, "loss": 0.5913, "step": 16205 }, { "epoch": 13.44896265560166, "grad_norm": 21.675451278686523, "learning_rate": 1.4624066390041496e-05, "loss": 0.6456, "step": 16206 }, { "epoch": 13.449792531120332, "grad_norm": 56.83579635620117, "learning_rate": 1.4623734439834025e-05, "loss": 1.0504, "step": 16207 }, { "epoch": 13.450622406639004, "grad_norm": 43.43748474121094, "learning_rate": 1.4623402489626557e-05, "loss": 0.7808, "step": 16208 }, { "epoch": 13.451452282157677, "grad_norm": 64.78966522216797, "learning_rate": 1.4623070539419089e-05, "loss": 0.8723, "step": 16209 }, { "epoch": 13.452282157676349, "grad_norm": 52.11946105957031, "learning_rate": 1.4622738589211621e-05, "loss": 0.8933, "step": 16210 }, { "epoch": 13.453112033195021, "grad_norm": 22.00904655456543, "learning_rate": 1.462240663900415e-05, "loss": 0.4488, "step": 16211 }, { "epoch": 13.453941908713693, "grad_norm": 44.010276794433594, "learning_rate": 1.4622074688796682e-05, "loss": 0.9281, "step": 16212 }, { "epoch": 13.454771784232365, "grad_norm": 46.6307373046875, "learning_rate": 1.4621742738589214e-05, "loss": 0.8517, "step": 16213 }, { "epoch": 13.455601659751038, "grad_norm": 25.572647094726562, "learning_rate": 1.4621410788381744e-05, "loss": 0.6515, "step": 16214 }, { "epoch": 13.45643153526971, "grad_norm": 35.67730712890625, "learning_rate": 1.4621078838174275e-05, "loss": 0.5978, "step": 16215 }, { "epoch": 13.457261410788382, "grad_norm": 25.65198516845703, "learning_rate": 1.4620746887966805e-05, "loss": 0.7978, "step": 16216 }, { "epoch": 13.458091286307054, "grad_norm": 20.7385311126709, "learning_rate": 1.4620414937759337e-05, "loss": 0.4481, "step": 16217 }, { "epoch": 13.458921161825726, "grad_norm": 49.54014205932617, "learning_rate": 1.462008298755187e-05, "loss": 1.465, "step": 16218 }, { "epoch": 13.459751037344398, "grad_norm": 42.40576171875, "learning_rate": 1.4619751037344398e-05, "loss": 0.9611, "step": 16219 }, { "epoch": 13.46058091286307, "grad_norm": 36.65413284301758, "learning_rate": 1.461941908713693e-05, "loss": 1.1136, "step": 16220 }, { "epoch": 13.461410788381743, "grad_norm": 73.58585357666016, "learning_rate": 1.4619087136929462e-05, "loss": 1.3685, "step": 16221 }, { "epoch": 13.462240663900415, "grad_norm": 44.4516716003418, "learning_rate": 1.4618755186721993e-05, "loss": 0.9543, "step": 16222 }, { "epoch": 13.463070539419087, "grad_norm": 51.5377197265625, "learning_rate": 1.4618423236514525e-05, "loss": 0.8049, "step": 16223 }, { "epoch": 13.46390041493776, "grad_norm": 36.563209533691406, "learning_rate": 1.4618091286307055e-05, "loss": 1.2418, "step": 16224 }, { "epoch": 13.464730290456432, "grad_norm": 24.580615997314453, "learning_rate": 1.4617759336099586e-05, "loss": 0.6088, "step": 16225 }, { "epoch": 13.465560165975104, "grad_norm": 26.405263900756836, "learning_rate": 1.4617427385892118e-05, "loss": 0.6413, "step": 16226 }, { "epoch": 13.466390041493776, "grad_norm": 26.86263084411621, "learning_rate": 1.461709543568465e-05, "loss": 0.9165, "step": 16227 }, { "epoch": 13.467219917012448, "grad_norm": 33.203617095947266, "learning_rate": 1.4616763485477179e-05, "loss": 0.8489, "step": 16228 }, { "epoch": 13.46804979253112, "grad_norm": 32.85379409790039, "learning_rate": 1.461643153526971e-05, "loss": 0.9252, "step": 16229 }, { "epoch": 13.468879668049793, "grad_norm": 38.36007308959961, "learning_rate": 1.4616099585062243e-05, "loss": 1.0344, "step": 16230 }, { "epoch": 13.469709543568465, "grad_norm": 17.86061668395996, "learning_rate": 1.4615767634854773e-05, "loss": 0.593, "step": 16231 }, { "epoch": 13.470539419087137, "grad_norm": 38.41400909423828, "learning_rate": 1.4615435684647304e-05, "loss": 0.6337, "step": 16232 }, { "epoch": 13.47136929460581, "grad_norm": 31.97283363342285, "learning_rate": 1.4615103734439836e-05, "loss": 0.5384, "step": 16233 }, { "epoch": 13.472199170124481, "grad_norm": 40.8370475769043, "learning_rate": 1.4614771784232366e-05, "loss": 1.4912, "step": 16234 }, { "epoch": 13.473029045643154, "grad_norm": 68.96844482421875, "learning_rate": 1.4614439834024898e-05, "loss": 1.978, "step": 16235 }, { "epoch": 13.473858921161826, "grad_norm": 57.400306701660156, "learning_rate": 1.4614107883817427e-05, "loss": 1.1177, "step": 16236 }, { "epoch": 13.474688796680498, "grad_norm": 54.067527770996094, "learning_rate": 1.4613775933609959e-05, "loss": 1.0644, "step": 16237 }, { "epoch": 13.47551867219917, "grad_norm": 36.38629913330078, "learning_rate": 1.4613443983402491e-05, "loss": 1.3103, "step": 16238 }, { "epoch": 13.476348547717842, "grad_norm": 55.12594223022461, "learning_rate": 1.4613112033195023e-05, "loss": 0.735, "step": 16239 }, { "epoch": 13.477178423236515, "grad_norm": 26.990854263305664, "learning_rate": 1.4612780082987552e-05, "loss": 0.6296, "step": 16240 }, { "epoch": 13.478008298755187, "grad_norm": 74.15862274169922, "learning_rate": 1.4612448132780084e-05, "loss": 0.6388, "step": 16241 }, { "epoch": 13.478838174273859, "grad_norm": 17.47833824157715, "learning_rate": 1.4612116182572616e-05, "loss": 0.8358, "step": 16242 }, { "epoch": 13.479668049792531, "grad_norm": 61.407920837402344, "learning_rate": 1.4611784232365147e-05, "loss": 0.8927, "step": 16243 }, { "epoch": 13.480497925311203, "grad_norm": 19.912384033203125, "learning_rate": 1.4611452282157677e-05, "loss": 0.7314, "step": 16244 }, { "epoch": 13.481327800829876, "grad_norm": 32.529991149902344, "learning_rate": 1.4611120331950207e-05, "loss": 1.4506, "step": 16245 }, { "epoch": 13.482157676348548, "grad_norm": 98.23319244384766, "learning_rate": 1.461078838174274e-05, "loss": 0.7168, "step": 16246 }, { "epoch": 13.48298755186722, "grad_norm": 18.374927520751953, "learning_rate": 1.4610456431535272e-05, "loss": 0.4652, "step": 16247 }, { "epoch": 13.483817427385892, "grad_norm": 23.243432998657227, "learning_rate": 1.46101244813278e-05, "loss": 1.2143, "step": 16248 }, { "epoch": 13.484647302904564, "grad_norm": 25.230762481689453, "learning_rate": 1.4609792531120332e-05, "loss": 0.8629, "step": 16249 }, { "epoch": 13.485477178423237, "grad_norm": 54.43413543701172, "learning_rate": 1.4609460580912865e-05, "loss": 1.5859, "step": 16250 }, { "epoch": 13.486307053941909, "grad_norm": 35.278846740722656, "learning_rate": 1.4609128630705397e-05, "loss": 1.2412, "step": 16251 }, { "epoch": 13.487136929460581, "grad_norm": 19.272851943969727, "learning_rate": 1.4608796680497927e-05, "loss": 0.5387, "step": 16252 }, { "epoch": 13.487966804979253, "grad_norm": 69.84567260742188, "learning_rate": 1.4608464730290457e-05, "loss": 0.897, "step": 16253 }, { "epoch": 13.488796680497925, "grad_norm": 71.10151672363281, "learning_rate": 1.4608132780082988e-05, "loss": 0.9269, "step": 16254 }, { "epoch": 13.489626556016598, "grad_norm": 49.98297882080078, "learning_rate": 1.460780082987552e-05, "loss": 1.1205, "step": 16255 }, { "epoch": 13.49045643153527, "grad_norm": 37.24268341064453, "learning_rate": 1.4607468879668052e-05, "loss": 0.7567, "step": 16256 }, { "epoch": 13.491286307053942, "grad_norm": 36.18445587158203, "learning_rate": 1.460713692946058e-05, "loss": 0.7176, "step": 16257 }, { "epoch": 13.492116182572614, "grad_norm": 28.36318016052246, "learning_rate": 1.4606804979253113e-05, "loss": 1.0077, "step": 16258 }, { "epoch": 13.492946058091286, "grad_norm": 35.34244918823242, "learning_rate": 1.4606473029045645e-05, "loss": 1.1979, "step": 16259 }, { "epoch": 13.493775933609959, "grad_norm": 137.6935577392578, "learning_rate": 1.4606141078838177e-05, "loss": 0.9677, "step": 16260 }, { "epoch": 13.49460580912863, "grad_norm": 38.495933532714844, "learning_rate": 1.4605809128630706e-05, "loss": 1.3049, "step": 16261 }, { "epoch": 13.495435684647303, "grad_norm": 31.119524002075195, "learning_rate": 1.4605477178423238e-05, "loss": 0.5112, "step": 16262 }, { "epoch": 13.496265560165975, "grad_norm": 30.20896339416504, "learning_rate": 1.4605145228215768e-05, "loss": 0.5984, "step": 16263 }, { "epoch": 13.497095435684647, "grad_norm": 38.76263427734375, "learning_rate": 1.46048132780083e-05, "loss": 0.7884, "step": 16264 }, { "epoch": 13.49792531120332, "grad_norm": 67.72544860839844, "learning_rate": 1.4604481327800831e-05, "loss": 0.9587, "step": 16265 }, { "epoch": 13.498755186721992, "grad_norm": 15.511548042297363, "learning_rate": 1.4604149377593361e-05, "loss": 0.477, "step": 16266 }, { "epoch": 13.499585062240664, "grad_norm": 19.64495849609375, "learning_rate": 1.4603817427385893e-05, "loss": 0.8514, "step": 16267 }, { "epoch": 13.500414937759336, "grad_norm": 26.76097869873047, "learning_rate": 1.4603485477178426e-05, "loss": 1.1646, "step": 16268 }, { "epoch": 13.501244813278008, "grad_norm": 31.16179847717285, "learning_rate": 1.4603153526970954e-05, "loss": 0.5859, "step": 16269 }, { "epoch": 13.50207468879668, "grad_norm": 23.276994705200195, "learning_rate": 1.4602821576763486e-05, "loss": 0.5934, "step": 16270 }, { "epoch": 13.502904564315353, "grad_norm": 24.5467472076416, "learning_rate": 1.4602489626556018e-05, "loss": 0.6985, "step": 16271 }, { "epoch": 13.503734439834025, "grad_norm": 38.70527267456055, "learning_rate": 1.4602157676348549e-05, "loss": 0.6373, "step": 16272 }, { "epoch": 13.504564315352697, "grad_norm": 70.1861343383789, "learning_rate": 1.460182572614108e-05, "loss": 1.8046, "step": 16273 }, { "epoch": 13.50539419087137, "grad_norm": 37.600914001464844, "learning_rate": 1.4601493775933611e-05, "loss": 0.9754, "step": 16274 }, { "epoch": 13.506224066390041, "grad_norm": 44.377227783203125, "learning_rate": 1.4601161825726142e-05, "loss": 1.0578, "step": 16275 }, { "epoch": 13.507053941908714, "grad_norm": 32.117923736572266, "learning_rate": 1.4600829875518674e-05, "loss": 1.016, "step": 16276 }, { "epoch": 13.507883817427386, "grad_norm": 38.042640686035156, "learning_rate": 1.4600497925311206e-05, "loss": 0.653, "step": 16277 }, { "epoch": 13.508713692946058, "grad_norm": 45.515438079833984, "learning_rate": 1.4600165975103735e-05, "loss": 0.9134, "step": 16278 }, { "epoch": 13.50954356846473, "grad_norm": 51.15311813354492, "learning_rate": 1.4599834024896267e-05, "loss": 0.5829, "step": 16279 }, { "epoch": 13.510373443983402, "grad_norm": 32.03905487060547, "learning_rate": 1.4599502074688799e-05, "loss": 0.4029, "step": 16280 }, { "epoch": 13.511203319502075, "grad_norm": 31.158199310302734, "learning_rate": 1.459917012448133e-05, "loss": 0.7216, "step": 16281 }, { "epoch": 13.512033195020747, "grad_norm": 25.884740829467773, "learning_rate": 1.459883817427386e-05, "loss": 0.5213, "step": 16282 }, { "epoch": 13.512863070539419, "grad_norm": 36.69434356689453, "learning_rate": 1.459850622406639e-05, "loss": 0.9917, "step": 16283 }, { "epoch": 13.513692946058091, "grad_norm": 64.87442779541016, "learning_rate": 1.4598174273858922e-05, "loss": 1.1037, "step": 16284 }, { "epoch": 13.514522821576763, "grad_norm": 29.6448974609375, "learning_rate": 1.4597842323651454e-05, "loss": 0.5898, "step": 16285 }, { "epoch": 13.515352697095436, "grad_norm": 23.23463249206543, "learning_rate": 1.4597510373443983e-05, "loss": 0.6853, "step": 16286 }, { "epoch": 13.516182572614108, "grad_norm": 45.64805603027344, "learning_rate": 1.4597178423236515e-05, "loss": 0.7561, "step": 16287 }, { "epoch": 13.51701244813278, "grad_norm": 45.81834030151367, "learning_rate": 1.4596846473029047e-05, "loss": 0.7357, "step": 16288 }, { "epoch": 13.517842323651452, "grad_norm": 40.218441009521484, "learning_rate": 1.459651452282158e-05, "loss": 0.4773, "step": 16289 }, { "epoch": 13.518672199170124, "grad_norm": 33.480464935302734, "learning_rate": 1.4596182572614108e-05, "loss": 0.4058, "step": 16290 }, { "epoch": 13.519502074688797, "grad_norm": 25.455163955688477, "learning_rate": 1.459585062240664e-05, "loss": 0.6556, "step": 16291 }, { "epoch": 13.520331950207469, "grad_norm": 69.05634307861328, "learning_rate": 1.459551867219917e-05, "loss": 1.0797, "step": 16292 }, { "epoch": 13.521161825726141, "grad_norm": 42.758052825927734, "learning_rate": 1.4595186721991703e-05, "loss": 0.8505, "step": 16293 }, { "epoch": 13.521991701244813, "grad_norm": 28.552600860595703, "learning_rate": 1.4594854771784233e-05, "loss": 1.0652, "step": 16294 }, { "epoch": 13.522821576763485, "grad_norm": 19.054433822631836, "learning_rate": 1.4594522821576764e-05, "loss": 0.3916, "step": 16295 }, { "epoch": 13.523651452282158, "grad_norm": 35.22889709472656, "learning_rate": 1.4594190871369296e-05, "loss": 0.7841, "step": 16296 }, { "epoch": 13.52448132780083, "grad_norm": 23.28464126586914, "learning_rate": 1.4593858921161828e-05, "loss": 0.4718, "step": 16297 }, { "epoch": 13.525311203319502, "grad_norm": 39.72929382324219, "learning_rate": 1.4593526970954357e-05, "loss": 1.5829, "step": 16298 }, { "epoch": 13.526141078838174, "grad_norm": 31.82294464111328, "learning_rate": 1.4593195020746889e-05, "loss": 0.9605, "step": 16299 }, { "epoch": 13.526970954356846, "grad_norm": 15.700234413146973, "learning_rate": 1.459286307053942e-05, "loss": 0.4982, "step": 16300 }, { "epoch": 13.527800829875519, "grad_norm": 25.735639572143555, "learning_rate": 1.4592531120331951e-05, "loss": 0.8211, "step": 16301 }, { "epoch": 13.52863070539419, "grad_norm": 59.762691497802734, "learning_rate": 1.4592199170124483e-05, "loss": 0.6502, "step": 16302 }, { "epoch": 13.529460580912863, "grad_norm": 21.233766555786133, "learning_rate": 1.4591867219917014e-05, "loss": 0.9351, "step": 16303 }, { "epoch": 13.530290456431535, "grad_norm": 30.95573616027832, "learning_rate": 1.4591535269709544e-05, "loss": 0.4825, "step": 16304 }, { "epoch": 13.531120331950207, "grad_norm": 26.726205825805664, "learning_rate": 1.4591203319502076e-05, "loss": 0.5437, "step": 16305 }, { "epoch": 13.53195020746888, "grad_norm": 30.831417083740234, "learning_rate": 1.4590871369294608e-05, "loss": 0.6595, "step": 16306 }, { "epoch": 13.532780082987552, "grad_norm": 28.264232635498047, "learning_rate": 1.4590539419087137e-05, "loss": 1.032, "step": 16307 }, { "epoch": 13.533609958506224, "grad_norm": 36.888545989990234, "learning_rate": 1.459020746887967e-05, "loss": 1.011, "step": 16308 }, { "epoch": 13.534439834024896, "grad_norm": 9.546163558959961, "learning_rate": 1.4589875518672201e-05, "loss": 0.2654, "step": 16309 }, { "epoch": 13.535269709543568, "grad_norm": 40.15546798706055, "learning_rate": 1.4589543568464732e-05, "loss": 1.2097, "step": 16310 }, { "epoch": 13.53609958506224, "grad_norm": 23.340255737304688, "learning_rate": 1.4589211618257262e-05, "loss": 0.5367, "step": 16311 }, { "epoch": 13.536929460580913, "grad_norm": 43.05437088012695, "learning_rate": 1.4588879668049794e-05, "loss": 0.8585, "step": 16312 }, { "epoch": 13.537759336099585, "grad_norm": 49.94133377075195, "learning_rate": 1.4588547717842325e-05, "loss": 0.9946, "step": 16313 }, { "epoch": 13.538589211618257, "grad_norm": 36.98630142211914, "learning_rate": 1.4588215767634857e-05, "loss": 0.9458, "step": 16314 }, { "epoch": 13.53941908713693, "grad_norm": 83.06476593017578, "learning_rate": 1.4587883817427385e-05, "loss": 0.7548, "step": 16315 }, { "epoch": 13.540248962655602, "grad_norm": 21.369611740112305, "learning_rate": 1.4587551867219918e-05, "loss": 0.4153, "step": 16316 }, { "epoch": 13.541078838174274, "grad_norm": 28.55898666381836, "learning_rate": 1.458721991701245e-05, "loss": 0.3794, "step": 16317 }, { "epoch": 13.541908713692946, "grad_norm": 48.677391052246094, "learning_rate": 1.4586887966804982e-05, "loss": 1.1453, "step": 16318 }, { "epoch": 13.542738589211618, "grad_norm": 30.579946517944336, "learning_rate": 1.458655601659751e-05, "loss": 0.7784, "step": 16319 }, { "epoch": 13.54356846473029, "grad_norm": 73.66932678222656, "learning_rate": 1.4586224066390043e-05, "loss": 0.8257, "step": 16320 }, { "epoch": 13.544398340248962, "grad_norm": 22.652135848999023, "learning_rate": 1.4585892116182575e-05, "loss": 0.5377, "step": 16321 }, { "epoch": 13.545228215767635, "grad_norm": 61.75945281982422, "learning_rate": 1.4585560165975105e-05, "loss": 1.7368, "step": 16322 }, { "epoch": 13.546058091286307, "grad_norm": 33.28215408325195, "learning_rate": 1.4585228215767636e-05, "loss": 0.8294, "step": 16323 }, { "epoch": 13.546887966804979, "grad_norm": 27.98933982849121, "learning_rate": 1.4584896265560166e-05, "loss": 1.6733, "step": 16324 }, { "epoch": 13.547717842323651, "grad_norm": 43.68907165527344, "learning_rate": 1.4584564315352698e-05, "loss": 0.5892, "step": 16325 }, { "epoch": 13.548547717842323, "grad_norm": 29.3814697265625, "learning_rate": 1.458423236514523e-05, "loss": 0.5543, "step": 16326 }, { "epoch": 13.549377593360996, "grad_norm": 30.691299438476562, "learning_rate": 1.4583900414937759e-05, "loss": 0.7539, "step": 16327 }, { "epoch": 13.550207468879668, "grad_norm": 53.23104476928711, "learning_rate": 1.4583568464730291e-05, "loss": 1.3794, "step": 16328 }, { "epoch": 13.55103734439834, "grad_norm": 79.15048217773438, "learning_rate": 1.4583236514522823e-05, "loss": 1.2806, "step": 16329 }, { "epoch": 13.551867219917012, "grad_norm": 27.295257568359375, "learning_rate": 1.4582904564315355e-05, "loss": 1.0001, "step": 16330 }, { "epoch": 13.552697095435684, "grad_norm": 34.15714645385742, "learning_rate": 1.4582572614107886e-05, "loss": 1.1058, "step": 16331 }, { "epoch": 13.553526970954357, "grad_norm": 30.255550384521484, "learning_rate": 1.4582240663900416e-05, "loss": 0.49, "step": 16332 }, { "epoch": 13.554356846473029, "grad_norm": 31.006776809692383, "learning_rate": 1.4581908713692946e-05, "loss": 0.6853, "step": 16333 }, { "epoch": 13.555186721991701, "grad_norm": 32.8096809387207, "learning_rate": 1.4581576763485479e-05, "loss": 0.4827, "step": 16334 }, { "epoch": 13.556016597510373, "grad_norm": 28.959619522094727, "learning_rate": 1.458124481327801e-05, "loss": 0.4761, "step": 16335 }, { "epoch": 13.556846473029045, "grad_norm": 26.348377227783203, "learning_rate": 1.458091286307054e-05, "loss": 0.5758, "step": 16336 }, { "epoch": 13.557676348547718, "grad_norm": 25.18998908996582, "learning_rate": 1.4580580912863072e-05, "loss": 0.8551, "step": 16337 }, { "epoch": 13.55850622406639, "grad_norm": 29.805683135986328, "learning_rate": 1.4580248962655604e-05, "loss": 0.9522, "step": 16338 }, { "epoch": 13.559336099585062, "grad_norm": 22.99024772644043, "learning_rate": 1.4579917012448134e-05, "loss": 0.9178, "step": 16339 }, { "epoch": 13.560165975103734, "grad_norm": 46.2636833190918, "learning_rate": 1.4579585062240664e-05, "loss": 1.0634, "step": 16340 }, { "epoch": 13.560995850622406, "grad_norm": 32.08465576171875, "learning_rate": 1.4579253112033197e-05, "loss": 1.2157, "step": 16341 }, { "epoch": 13.561825726141079, "grad_norm": 43.60335922241211, "learning_rate": 1.4578921161825727e-05, "loss": 1.0704, "step": 16342 }, { "epoch": 13.56265560165975, "grad_norm": 42.749839782714844, "learning_rate": 1.4578589211618259e-05, "loss": 0.923, "step": 16343 }, { "epoch": 13.563485477178423, "grad_norm": 22.459789276123047, "learning_rate": 1.457825726141079e-05, "loss": 0.5694, "step": 16344 }, { "epoch": 13.564315352697095, "grad_norm": 41.54988479614258, "learning_rate": 1.457792531120332e-05, "loss": 0.7815, "step": 16345 }, { "epoch": 13.565145228215767, "grad_norm": 30.730697631835938, "learning_rate": 1.4577593360995852e-05, "loss": 1.0089, "step": 16346 }, { "epoch": 13.56597510373444, "grad_norm": 30.523082733154297, "learning_rate": 1.4577261410788384e-05, "loss": 1.0245, "step": 16347 }, { "epoch": 13.566804979253112, "grad_norm": 41.23383331298828, "learning_rate": 1.4576929460580913e-05, "loss": 0.6612, "step": 16348 }, { "epoch": 13.567634854771784, "grad_norm": 38.91877746582031, "learning_rate": 1.4576597510373445e-05, "loss": 1.066, "step": 16349 }, { "epoch": 13.568464730290456, "grad_norm": 27.729528427124023, "learning_rate": 1.4576265560165977e-05, "loss": 0.7636, "step": 16350 }, { "epoch": 13.569294605809128, "grad_norm": 32.066436767578125, "learning_rate": 1.4575933609958507e-05, "loss": 0.6621, "step": 16351 }, { "epoch": 13.5701244813278, "grad_norm": 34.925941467285156, "learning_rate": 1.4575601659751038e-05, "loss": 0.9165, "step": 16352 }, { "epoch": 13.570954356846473, "grad_norm": 29.37798309326172, "learning_rate": 1.4575269709543568e-05, "loss": 1.1035, "step": 16353 }, { "epoch": 13.571784232365145, "grad_norm": 207.04563903808594, "learning_rate": 1.45749377593361e-05, "loss": 1.1089, "step": 16354 }, { "epoch": 13.572614107883817, "grad_norm": 59.016685485839844, "learning_rate": 1.4574605809128633e-05, "loss": 1.2552, "step": 16355 }, { "epoch": 13.57344398340249, "grad_norm": 27.376951217651367, "learning_rate": 1.4574273858921165e-05, "loss": 0.5534, "step": 16356 }, { "epoch": 13.574273858921162, "grad_norm": 26.729694366455078, "learning_rate": 1.4573941908713693e-05, "loss": 0.6807, "step": 16357 }, { "epoch": 13.575103734439834, "grad_norm": 34.520172119140625, "learning_rate": 1.4573609958506225e-05, "loss": 0.4599, "step": 16358 }, { "epoch": 13.575933609958506, "grad_norm": 42.546382904052734, "learning_rate": 1.4573278008298758e-05, "loss": 1.5569, "step": 16359 }, { "epoch": 13.576763485477178, "grad_norm": 63.02703857421875, "learning_rate": 1.4572946058091288e-05, "loss": 1.2576, "step": 16360 }, { "epoch": 13.57759336099585, "grad_norm": 73.7861557006836, "learning_rate": 1.4572614107883818e-05, "loss": 0.9704, "step": 16361 }, { "epoch": 13.578423236514523, "grad_norm": 41.8494758605957, "learning_rate": 1.4572282157676349e-05, "loss": 1.2974, "step": 16362 }, { "epoch": 13.579253112033195, "grad_norm": 17.02163314819336, "learning_rate": 1.4571950207468881e-05, "loss": 0.3823, "step": 16363 }, { "epoch": 13.580082987551867, "grad_norm": 22.37427520751953, "learning_rate": 1.4571618257261413e-05, "loss": 0.7262, "step": 16364 }, { "epoch": 13.58091286307054, "grad_norm": 56.68680191040039, "learning_rate": 1.4571286307053942e-05, "loss": 0.7297, "step": 16365 }, { "epoch": 13.581742738589211, "grad_norm": 78.53411865234375, "learning_rate": 1.4570954356846474e-05, "loss": 1.2743, "step": 16366 }, { "epoch": 13.582572614107884, "grad_norm": 31.04229736328125, "learning_rate": 1.4570622406639006e-05, "loss": 0.3242, "step": 16367 }, { "epoch": 13.583402489626556, "grad_norm": 47.171016693115234, "learning_rate": 1.4570290456431538e-05, "loss": 1.2372, "step": 16368 }, { "epoch": 13.584232365145228, "grad_norm": 37.650550842285156, "learning_rate": 1.4569958506224067e-05, "loss": 0.8352, "step": 16369 }, { "epoch": 13.5850622406639, "grad_norm": 29.399120330810547, "learning_rate": 1.4569626556016599e-05, "loss": 1.293, "step": 16370 }, { "epoch": 13.585892116182572, "grad_norm": 44.498775482177734, "learning_rate": 1.456929460580913e-05, "loss": 1.0146, "step": 16371 }, { "epoch": 13.586721991701245, "grad_norm": 47.358768463134766, "learning_rate": 1.4568962655601661e-05, "loss": 1.1269, "step": 16372 }, { "epoch": 13.587551867219917, "grad_norm": 23.106224060058594, "learning_rate": 1.4568630705394192e-05, "loss": 0.6298, "step": 16373 }, { "epoch": 13.588381742738589, "grad_norm": 23.099599838256836, "learning_rate": 1.4568298755186722e-05, "loss": 0.7686, "step": 16374 }, { "epoch": 13.589211618257261, "grad_norm": 34.152286529541016, "learning_rate": 1.4567966804979254e-05, "loss": 1.1338, "step": 16375 }, { "epoch": 13.590041493775933, "grad_norm": 24.126358032226562, "learning_rate": 1.4567634854771786e-05, "loss": 0.581, "step": 16376 }, { "epoch": 13.590871369294605, "grad_norm": 27.445579528808594, "learning_rate": 1.4567302904564315e-05, "loss": 0.5051, "step": 16377 }, { "epoch": 13.591701244813278, "grad_norm": 35.88288116455078, "learning_rate": 1.4566970954356847e-05, "loss": 1.2715, "step": 16378 }, { "epoch": 13.59253112033195, "grad_norm": 37.91212463378906, "learning_rate": 1.456663900414938e-05, "loss": 0.5545, "step": 16379 }, { "epoch": 13.593360995850622, "grad_norm": 39.118621826171875, "learning_rate": 1.456630705394191e-05, "loss": 0.8082, "step": 16380 }, { "epoch": 13.594190871369294, "grad_norm": 48.79591751098633, "learning_rate": 1.4565975103734442e-05, "loss": 1.0888, "step": 16381 }, { "epoch": 13.595020746887966, "grad_norm": 45.61395263671875, "learning_rate": 1.4565643153526972e-05, "loss": 0.6737, "step": 16382 }, { "epoch": 13.595850622406639, "grad_norm": 23.298362731933594, "learning_rate": 1.4565311203319503e-05, "loss": 0.8269, "step": 16383 }, { "epoch": 13.59668049792531, "grad_norm": 26.462743759155273, "learning_rate": 1.4564979253112035e-05, "loss": 0.8436, "step": 16384 }, { "epoch": 13.597510373443983, "grad_norm": 58.58858871459961, "learning_rate": 1.4564647302904567e-05, "loss": 1.7914, "step": 16385 }, { "epoch": 13.598340248962655, "grad_norm": 115.46757507324219, "learning_rate": 1.4564315352697096e-05, "loss": 0.449, "step": 16386 }, { "epoch": 13.599170124481327, "grad_norm": 56.31806182861328, "learning_rate": 1.4563983402489628e-05, "loss": 1.1217, "step": 16387 }, { "epoch": 13.6, "grad_norm": 82.48248291015625, "learning_rate": 1.456365145228216e-05, "loss": 1.3013, "step": 16388 }, { "epoch": 13.600829875518672, "grad_norm": 39.67814254760742, "learning_rate": 1.456331950207469e-05, "loss": 1.1077, "step": 16389 }, { "epoch": 13.601659751037344, "grad_norm": 28.201295852661133, "learning_rate": 1.456298755186722e-05, "loss": 0.6644, "step": 16390 }, { "epoch": 13.602489626556016, "grad_norm": 39.95391845703125, "learning_rate": 1.4562655601659753e-05, "loss": 1.0631, "step": 16391 }, { "epoch": 13.603319502074688, "grad_norm": 55.3460578918457, "learning_rate": 1.4562323651452283e-05, "loss": 0.7379, "step": 16392 }, { "epoch": 13.60414937759336, "grad_norm": 22.461566925048828, "learning_rate": 1.4561991701244815e-05, "loss": 0.8434, "step": 16393 }, { "epoch": 13.604979253112033, "grad_norm": 45.15147399902344, "learning_rate": 1.4561659751037344e-05, "loss": 1.0432, "step": 16394 }, { "epoch": 13.605809128630705, "grad_norm": 35.71351623535156, "learning_rate": 1.4561327800829876e-05, "loss": 0.6729, "step": 16395 }, { "epoch": 13.606639004149377, "grad_norm": 56.66767501831055, "learning_rate": 1.4560995850622408e-05, "loss": 1.3041, "step": 16396 }, { "epoch": 13.60746887966805, "grad_norm": 39.37720489501953, "learning_rate": 1.456066390041494e-05, "loss": 1.2725, "step": 16397 }, { "epoch": 13.608298755186722, "grad_norm": 28.610889434814453, "learning_rate": 1.4560331950207469e-05, "loss": 0.8542, "step": 16398 }, { "epoch": 13.609128630705394, "grad_norm": 30.637683868408203, "learning_rate": 1.4560000000000001e-05, "loss": 0.6332, "step": 16399 }, { "epoch": 13.609958506224066, "grad_norm": 41.971099853515625, "learning_rate": 1.4559668049792532e-05, "loss": 1.0523, "step": 16400 }, { "epoch": 13.610788381742738, "grad_norm": 27.44907569885254, "learning_rate": 1.4559336099585064e-05, "loss": 0.5833, "step": 16401 }, { "epoch": 13.61161825726141, "grad_norm": 32.12452697753906, "learning_rate": 1.4559004149377594e-05, "loss": 0.6926, "step": 16402 }, { "epoch": 13.612448132780083, "grad_norm": 30.4572811126709, "learning_rate": 1.4558672199170125e-05, "loss": 1.1941, "step": 16403 }, { "epoch": 13.613278008298755, "grad_norm": 53.29884719848633, "learning_rate": 1.4558340248962657e-05, "loss": 1.1579, "step": 16404 }, { "epoch": 13.614107883817427, "grad_norm": 32.84497833251953, "learning_rate": 1.4558008298755189e-05, "loss": 1.0257, "step": 16405 }, { "epoch": 13.6149377593361, "grad_norm": 21.263425827026367, "learning_rate": 1.4557676348547717e-05, "loss": 1.081, "step": 16406 }, { "epoch": 13.615767634854771, "grad_norm": 28.027217864990234, "learning_rate": 1.455734439834025e-05, "loss": 0.742, "step": 16407 }, { "epoch": 13.616597510373444, "grad_norm": 23.43642234802246, "learning_rate": 1.4557012448132782e-05, "loss": 0.7497, "step": 16408 }, { "epoch": 13.617427385892116, "grad_norm": 58.904685974121094, "learning_rate": 1.4556680497925312e-05, "loss": 1.1164, "step": 16409 }, { "epoch": 13.618257261410788, "grad_norm": 27.34797477722168, "learning_rate": 1.4556348547717844e-05, "loss": 0.7305, "step": 16410 }, { "epoch": 13.61908713692946, "grad_norm": 19.8287353515625, "learning_rate": 1.4556016597510375e-05, "loss": 0.9365, "step": 16411 }, { "epoch": 13.619917012448132, "grad_norm": 39.53858184814453, "learning_rate": 1.4555684647302905e-05, "loss": 1.1354, "step": 16412 }, { "epoch": 13.620746887966805, "grad_norm": 48.40629196166992, "learning_rate": 1.4555352697095437e-05, "loss": 1.6544, "step": 16413 }, { "epoch": 13.621576763485477, "grad_norm": 21.578535079956055, "learning_rate": 1.455502074688797e-05, "loss": 0.4609, "step": 16414 }, { "epoch": 13.622406639004149, "grad_norm": 26.420379638671875, "learning_rate": 1.4554688796680498e-05, "loss": 0.4108, "step": 16415 }, { "epoch": 13.623236514522821, "grad_norm": 35.726375579833984, "learning_rate": 1.455435684647303e-05, "loss": 0.858, "step": 16416 }, { "epoch": 13.624066390041493, "grad_norm": 15.474628448486328, "learning_rate": 1.4554024896265562e-05, "loss": 0.582, "step": 16417 }, { "epoch": 13.624896265560166, "grad_norm": 23.016860961914062, "learning_rate": 1.4553692946058093e-05, "loss": 1.1261, "step": 16418 }, { "epoch": 13.625726141078838, "grad_norm": 29.742923736572266, "learning_rate": 1.4553360995850623e-05, "loss": 0.5862, "step": 16419 }, { "epoch": 13.62655601659751, "grad_norm": 34.58549880981445, "learning_rate": 1.4553029045643155e-05, "loss": 0.9376, "step": 16420 }, { "epoch": 13.627385892116182, "grad_norm": 25.511905670166016, "learning_rate": 1.4552697095435686e-05, "loss": 0.5601, "step": 16421 }, { "epoch": 13.628215767634854, "grad_norm": 57.57016372680664, "learning_rate": 1.4552365145228218e-05, "loss": 1.2276, "step": 16422 }, { "epoch": 13.629045643153527, "grad_norm": 27.866621017456055, "learning_rate": 1.4552033195020746e-05, "loss": 0.4974, "step": 16423 }, { "epoch": 13.629875518672199, "grad_norm": 84.82589721679688, "learning_rate": 1.4551701244813278e-05, "loss": 1.6812, "step": 16424 }, { "epoch": 13.630705394190871, "grad_norm": 40.282737731933594, "learning_rate": 1.455136929460581e-05, "loss": 0.8756, "step": 16425 }, { "epoch": 13.631535269709543, "grad_norm": 28.720951080322266, "learning_rate": 1.4551037344398343e-05, "loss": 0.8336, "step": 16426 }, { "epoch": 13.632365145228215, "grad_norm": 38.754371643066406, "learning_rate": 1.4550705394190871e-05, "loss": 0.4812, "step": 16427 }, { "epoch": 13.633195020746887, "grad_norm": 31.978914260864258, "learning_rate": 1.4550373443983404e-05, "loss": 0.9648, "step": 16428 }, { "epoch": 13.63402489626556, "grad_norm": 49.68903732299805, "learning_rate": 1.4550041493775936e-05, "loss": 1.6886, "step": 16429 }, { "epoch": 13.634854771784232, "grad_norm": 33.78684997558594, "learning_rate": 1.4549709543568466e-05, "loss": 0.7207, "step": 16430 }, { "epoch": 13.635684647302904, "grad_norm": 26.29250717163086, "learning_rate": 1.4549377593360996e-05, "loss": 0.5535, "step": 16431 }, { "epoch": 13.636514522821576, "grad_norm": 23.04964828491211, "learning_rate": 1.4549045643153527e-05, "loss": 0.8564, "step": 16432 }, { "epoch": 13.637344398340248, "grad_norm": 48.45291519165039, "learning_rate": 1.4548713692946059e-05, "loss": 0.6428, "step": 16433 }, { "epoch": 13.63817427385892, "grad_norm": 48.29230880737305, "learning_rate": 1.4548381742738591e-05, "loss": 1.015, "step": 16434 }, { "epoch": 13.639004149377593, "grad_norm": 23.351221084594727, "learning_rate": 1.4548049792531123e-05, "loss": 0.6002, "step": 16435 }, { "epoch": 13.639834024896265, "grad_norm": 48.039100646972656, "learning_rate": 1.4547717842323652e-05, "loss": 0.6133, "step": 16436 }, { "epoch": 13.640663900414937, "grad_norm": 51.618408203125, "learning_rate": 1.4547385892116184e-05, "loss": 1.1228, "step": 16437 }, { "epoch": 13.64149377593361, "grad_norm": 16.21868324279785, "learning_rate": 1.4547053941908716e-05, "loss": 0.3838, "step": 16438 }, { "epoch": 13.642323651452282, "grad_norm": 19.65548324584961, "learning_rate": 1.4546721991701247e-05, "loss": 0.4868, "step": 16439 }, { "epoch": 13.643153526970954, "grad_norm": 46.851295471191406, "learning_rate": 1.4546390041493777e-05, "loss": 0.7157, "step": 16440 }, { "epoch": 13.643983402489626, "grad_norm": 23.52826690673828, "learning_rate": 1.4546058091286307e-05, "loss": 0.5378, "step": 16441 }, { "epoch": 13.644813278008298, "grad_norm": 41.54447937011719, "learning_rate": 1.454572614107884e-05, "loss": 1.0437, "step": 16442 }, { "epoch": 13.64564315352697, "grad_norm": 37.87438201904297, "learning_rate": 1.4545394190871372e-05, "loss": 0.4982, "step": 16443 }, { "epoch": 13.646473029045643, "grad_norm": 35.51891326904297, "learning_rate": 1.45450622406639e-05, "loss": 0.7128, "step": 16444 }, { "epoch": 13.647302904564315, "grad_norm": 33.43079376220703, "learning_rate": 1.4544730290456432e-05, "loss": 0.8932, "step": 16445 }, { "epoch": 13.648132780082987, "grad_norm": 85.6081314086914, "learning_rate": 1.4544398340248965e-05, "loss": 1.0953, "step": 16446 }, { "epoch": 13.64896265560166, "grad_norm": 25.644594192504883, "learning_rate": 1.4544066390041497e-05, "loss": 0.8335, "step": 16447 }, { "epoch": 13.649792531120331, "grad_norm": 46.6155891418457, "learning_rate": 1.4543734439834025e-05, "loss": 1.1047, "step": 16448 }, { "epoch": 13.650622406639004, "grad_norm": 31.485082626342773, "learning_rate": 1.4543402489626557e-05, "loss": 1.4252, "step": 16449 }, { "epoch": 13.651452282157676, "grad_norm": 50.53562545776367, "learning_rate": 1.4543070539419088e-05, "loss": 0.7056, "step": 16450 }, { "epoch": 13.652282157676348, "grad_norm": 33.56012725830078, "learning_rate": 1.454273858921162e-05, "loss": 0.9545, "step": 16451 }, { "epoch": 13.65311203319502, "grad_norm": 22.31904411315918, "learning_rate": 1.454240663900415e-05, "loss": 0.6408, "step": 16452 }, { "epoch": 13.653941908713692, "grad_norm": 34.47079086303711, "learning_rate": 1.454207468879668e-05, "loss": 0.6167, "step": 16453 }, { "epoch": 13.654771784232365, "grad_norm": 29.535049438476562, "learning_rate": 1.4541742738589213e-05, "loss": 0.9352, "step": 16454 }, { "epoch": 13.655601659751037, "grad_norm": 24.036453247070312, "learning_rate": 1.4541410788381745e-05, "loss": 0.5732, "step": 16455 }, { "epoch": 13.656431535269709, "grad_norm": 33.252891540527344, "learning_rate": 1.4541078838174274e-05, "loss": 0.8547, "step": 16456 }, { "epoch": 13.657261410788381, "grad_norm": 32.34508514404297, "learning_rate": 1.4540746887966806e-05, "loss": 1.0216, "step": 16457 }, { "epoch": 13.658091286307053, "grad_norm": 33.790748596191406, "learning_rate": 1.4540414937759338e-05, "loss": 1.4373, "step": 16458 }, { "epoch": 13.658921161825726, "grad_norm": 49.85882568359375, "learning_rate": 1.4540082987551868e-05, "loss": 0.6618, "step": 16459 }, { "epoch": 13.659751037344398, "grad_norm": 24.28862762451172, "learning_rate": 1.45397510373444e-05, "loss": 0.69, "step": 16460 }, { "epoch": 13.66058091286307, "grad_norm": 55.64018249511719, "learning_rate": 1.4539419087136931e-05, "loss": 0.83, "step": 16461 }, { "epoch": 13.661410788381742, "grad_norm": 18.806859970092773, "learning_rate": 1.4539087136929461e-05, "loss": 1.0056, "step": 16462 }, { "epoch": 13.662240663900414, "grad_norm": 31.14631462097168, "learning_rate": 1.4538755186721993e-05, "loss": 1.2446, "step": 16463 }, { "epoch": 13.663070539419087, "grad_norm": 30.071012496948242, "learning_rate": 1.4538423236514526e-05, "loss": 0.8641, "step": 16464 }, { "epoch": 13.663900414937759, "grad_norm": 22.89892578125, "learning_rate": 1.4538091286307054e-05, "loss": 0.8069, "step": 16465 }, { "epoch": 13.664730290456431, "grad_norm": 34.868438720703125, "learning_rate": 1.4537759336099586e-05, "loss": 0.8312, "step": 16466 }, { "epoch": 13.665560165975103, "grad_norm": 31.766456604003906, "learning_rate": 1.4537427385892118e-05, "loss": 0.7473, "step": 16467 }, { "epoch": 13.666390041493775, "grad_norm": 38.698970794677734, "learning_rate": 1.4537095435684649e-05, "loss": 1.5976, "step": 16468 }, { "epoch": 13.667219917012448, "grad_norm": 28.41403579711914, "learning_rate": 1.453676348547718e-05, "loss": 0.6253, "step": 16469 }, { "epoch": 13.66804979253112, "grad_norm": 51.51092529296875, "learning_rate": 1.453643153526971e-05, "loss": 1.4221, "step": 16470 }, { "epoch": 13.668879668049792, "grad_norm": 49.2899284362793, "learning_rate": 1.4536099585062242e-05, "loss": 1.2965, "step": 16471 }, { "epoch": 13.669709543568464, "grad_norm": 36.36006164550781, "learning_rate": 1.4535767634854774e-05, "loss": 0.4865, "step": 16472 }, { "epoch": 13.670539419087136, "grad_norm": 22.2783203125, "learning_rate": 1.4535435684647303e-05, "loss": 0.685, "step": 16473 }, { "epoch": 13.671369294605809, "grad_norm": 55.16065979003906, "learning_rate": 1.4535103734439835e-05, "loss": 1.0786, "step": 16474 }, { "epoch": 13.67219917012448, "grad_norm": 36.960018157958984, "learning_rate": 1.4534771784232367e-05, "loss": 0.7522, "step": 16475 }, { "epoch": 13.673029045643153, "grad_norm": 23.403423309326172, "learning_rate": 1.4534439834024899e-05, "loss": 0.5827, "step": 16476 }, { "epoch": 13.673858921161825, "grad_norm": 21.354894638061523, "learning_rate": 1.4534107883817428e-05, "loss": 0.8231, "step": 16477 }, { "epoch": 13.674688796680497, "grad_norm": 51.27610397338867, "learning_rate": 1.453377593360996e-05, "loss": 0.5611, "step": 16478 }, { "epoch": 13.67551867219917, "grad_norm": 54.01047134399414, "learning_rate": 1.453344398340249e-05, "loss": 1.4845, "step": 16479 }, { "epoch": 13.676348547717842, "grad_norm": 24.99005126953125, "learning_rate": 1.4533112033195022e-05, "loss": 0.7654, "step": 16480 }, { "epoch": 13.677178423236514, "grad_norm": 40.0710334777832, "learning_rate": 1.4532780082987553e-05, "loss": 0.8565, "step": 16481 }, { "epoch": 13.678008298755186, "grad_norm": 47.92324447631836, "learning_rate": 1.4532448132780083e-05, "loss": 0.8964, "step": 16482 }, { "epoch": 13.678838174273858, "grad_norm": 57.55611038208008, "learning_rate": 1.4532116182572615e-05, "loss": 1.2829, "step": 16483 }, { "epoch": 13.67966804979253, "grad_norm": 40.38777160644531, "learning_rate": 1.4531784232365147e-05, "loss": 0.9004, "step": 16484 }, { "epoch": 13.680497925311203, "grad_norm": 16.01397132873535, "learning_rate": 1.4531452282157676e-05, "loss": 0.3472, "step": 16485 }, { "epoch": 13.681327800829875, "grad_norm": 38.921138763427734, "learning_rate": 1.4531120331950208e-05, "loss": 0.8979, "step": 16486 }, { "epoch": 13.682157676348547, "grad_norm": 44.25019454956055, "learning_rate": 1.453078838174274e-05, "loss": 0.7381, "step": 16487 }, { "epoch": 13.68298755186722, "grad_norm": 33.59400177001953, "learning_rate": 1.453045643153527e-05, "loss": 1.1933, "step": 16488 }, { "epoch": 13.683817427385891, "grad_norm": 32.90849685668945, "learning_rate": 1.4530124481327803e-05, "loss": 0.9097, "step": 16489 }, { "epoch": 13.684647302904564, "grad_norm": 99.83934020996094, "learning_rate": 1.4529792531120333e-05, "loss": 0.8914, "step": 16490 }, { "epoch": 13.685477178423236, "grad_norm": 39.593936920166016, "learning_rate": 1.4529460580912864e-05, "loss": 0.8082, "step": 16491 }, { "epoch": 13.686307053941908, "grad_norm": 39.37751388549805, "learning_rate": 1.4529128630705396e-05, "loss": 1.3083, "step": 16492 }, { "epoch": 13.68713692946058, "grad_norm": 52.2065544128418, "learning_rate": 1.4528796680497928e-05, "loss": 1.4568, "step": 16493 }, { "epoch": 13.687966804979252, "grad_norm": 29.909135818481445, "learning_rate": 1.4528464730290457e-05, "loss": 0.7355, "step": 16494 }, { "epoch": 13.688796680497925, "grad_norm": 39.72385025024414, "learning_rate": 1.4528132780082989e-05, "loss": 1.0067, "step": 16495 }, { "epoch": 13.689626556016597, "grad_norm": 29.006237030029297, "learning_rate": 1.452780082987552e-05, "loss": 1.0623, "step": 16496 }, { "epoch": 13.690456431535269, "grad_norm": 37.536399841308594, "learning_rate": 1.4527468879668051e-05, "loss": 0.7623, "step": 16497 }, { "epoch": 13.691286307053941, "grad_norm": 65.82341766357422, "learning_rate": 1.4527136929460582e-05, "loss": 0.6083, "step": 16498 }, { "epoch": 13.692116182572613, "grad_norm": 49.63440704345703, "learning_rate": 1.4526804979253114e-05, "loss": 1.5254, "step": 16499 }, { "epoch": 13.692946058091286, "grad_norm": 50.491764068603516, "learning_rate": 1.4526473029045644e-05, "loss": 1.572, "step": 16500 }, { "epoch": 13.693775933609958, "grad_norm": 34.9974250793457, "learning_rate": 1.4526141078838176e-05, "loss": 0.9585, "step": 16501 }, { "epoch": 13.69460580912863, "grad_norm": 29.604328155517578, "learning_rate": 1.4525809128630705e-05, "loss": 0.7638, "step": 16502 }, { "epoch": 13.695435684647302, "grad_norm": 37.22750473022461, "learning_rate": 1.4525477178423237e-05, "loss": 0.7578, "step": 16503 }, { "epoch": 13.696265560165974, "grad_norm": 43.027347564697266, "learning_rate": 1.452514522821577e-05, "loss": 1.208, "step": 16504 }, { "epoch": 13.697095435684647, "grad_norm": 36.38091278076172, "learning_rate": 1.4524813278008301e-05, "loss": 1.3957, "step": 16505 }, { "epoch": 13.697925311203319, "grad_norm": 34.89442825317383, "learning_rate": 1.452448132780083e-05, "loss": 0.8303, "step": 16506 }, { "epoch": 13.698755186721991, "grad_norm": 24.126766204833984, "learning_rate": 1.4524149377593362e-05, "loss": 0.9243, "step": 16507 }, { "epoch": 13.699585062240663, "grad_norm": 23.905729293823242, "learning_rate": 1.4523817427385894e-05, "loss": 0.7798, "step": 16508 }, { "epoch": 13.700414937759335, "grad_norm": 31.397064208984375, "learning_rate": 1.4523485477178425e-05, "loss": 0.4746, "step": 16509 }, { "epoch": 13.701244813278008, "grad_norm": 47.26971435546875, "learning_rate": 1.4523153526970955e-05, "loss": 1.3626, "step": 16510 }, { "epoch": 13.70207468879668, "grad_norm": 37.95038986206055, "learning_rate": 1.4522821576763485e-05, "loss": 0.7022, "step": 16511 }, { "epoch": 13.702904564315352, "grad_norm": 21.033464431762695, "learning_rate": 1.4522489626556018e-05, "loss": 0.9082, "step": 16512 }, { "epoch": 13.703734439834024, "grad_norm": 38.9677619934082, "learning_rate": 1.452215767634855e-05, "loss": 1.14, "step": 16513 }, { "epoch": 13.704564315352696, "grad_norm": 58.34987258911133, "learning_rate": 1.4521825726141082e-05, "loss": 0.7777, "step": 16514 }, { "epoch": 13.705394190871369, "grad_norm": 38.36479187011719, "learning_rate": 1.452149377593361e-05, "loss": 1.3814, "step": 16515 }, { "epoch": 13.70622406639004, "grad_norm": 25.722822189331055, "learning_rate": 1.4521161825726143e-05, "loss": 0.4904, "step": 16516 }, { "epoch": 13.707053941908713, "grad_norm": 52.287601470947266, "learning_rate": 1.4520829875518673e-05, "loss": 1.1645, "step": 16517 }, { "epoch": 13.707883817427385, "grad_norm": 50.415061950683594, "learning_rate": 1.4520497925311205e-05, "loss": 0.5358, "step": 16518 }, { "epoch": 13.708713692946057, "grad_norm": 50.95372009277344, "learning_rate": 1.4520165975103736e-05, "loss": 0.6552, "step": 16519 }, { "epoch": 13.70954356846473, "grad_norm": 21.930734634399414, "learning_rate": 1.4519834024896266e-05, "loss": 0.4652, "step": 16520 }, { "epoch": 13.710373443983402, "grad_norm": 43.9061393737793, "learning_rate": 1.4519502074688798e-05, "loss": 1.3878, "step": 16521 }, { "epoch": 13.711203319502074, "grad_norm": 60.546913146972656, "learning_rate": 1.451917012448133e-05, "loss": 0.9762, "step": 16522 }, { "epoch": 13.712033195020746, "grad_norm": 16.769384384155273, "learning_rate": 1.4518838174273859e-05, "loss": 0.6033, "step": 16523 }, { "epoch": 13.712863070539418, "grad_norm": 21.166332244873047, "learning_rate": 1.4518506224066391e-05, "loss": 0.9051, "step": 16524 }, { "epoch": 13.71369294605809, "grad_norm": 39.43355178833008, "learning_rate": 1.4518174273858923e-05, "loss": 0.9654, "step": 16525 }, { "epoch": 13.714522821576763, "grad_norm": 29.400188446044922, "learning_rate": 1.4517842323651454e-05, "loss": 0.8436, "step": 16526 }, { "epoch": 13.715352697095435, "grad_norm": 57.172420501708984, "learning_rate": 1.4517510373443984e-05, "loss": 1.0065, "step": 16527 }, { "epoch": 13.716182572614107, "grad_norm": 46.17461013793945, "learning_rate": 1.4517178423236516e-05, "loss": 0.957, "step": 16528 }, { "epoch": 13.71701244813278, "grad_norm": 34.23505783081055, "learning_rate": 1.4516846473029046e-05, "loss": 0.5865, "step": 16529 }, { "epoch": 13.717842323651452, "grad_norm": 37.501502990722656, "learning_rate": 1.4516514522821579e-05, "loss": 0.7067, "step": 16530 }, { "epoch": 13.718672199170124, "grad_norm": 27.114511489868164, "learning_rate": 1.4516182572614109e-05, "loss": 1.1054, "step": 16531 }, { "epoch": 13.719502074688796, "grad_norm": 24.31606674194336, "learning_rate": 1.451585062240664e-05, "loss": 0.516, "step": 16532 }, { "epoch": 13.720331950207468, "grad_norm": 41.26829147338867, "learning_rate": 1.4515518672199171e-05, "loss": 1.1369, "step": 16533 }, { "epoch": 13.72116182572614, "grad_norm": 46.65999221801758, "learning_rate": 1.4515186721991704e-05, "loss": 0.8993, "step": 16534 }, { "epoch": 13.721991701244812, "grad_norm": 32.613922119140625, "learning_rate": 1.4514854771784232e-05, "loss": 0.7541, "step": 16535 }, { "epoch": 13.722821576763485, "grad_norm": 31.165348052978516, "learning_rate": 1.4514522821576764e-05, "loss": 0.8252, "step": 16536 }, { "epoch": 13.723651452282157, "grad_norm": 45.01563262939453, "learning_rate": 1.4514190871369297e-05, "loss": 0.8059, "step": 16537 }, { "epoch": 13.724481327800829, "grad_norm": 28.930011749267578, "learning_rate": 1.4513858921161827e-05, "loss": 0.5697, "step": 16538 }, { "epoch": 13.725311203319501, "grad_norm": 26.913169860839844, "learning_rate": 1.4513526970954359e-05, "loss": 0.6027, "step": 16539 }, { "epoch": 13.726141078838173, "grad_norm": 17.982240676879883, "learning_rate": 1.4513195020746888e-05, "loss": 0.4471, "step": 16540 }, { "epoch": 13.726970954356846, "grad_norm": 24.696653366088867, "learning_rate": 1.451286307053942e-05, "loss": 0.834, "step": 16541 }, { "epoch": 13.727800829875518, "grad_norm": 21.949979782104492, "learning_rate": 1.4512531120331952e-05, "loss": 0.4837, "step": 16542 }, { "epoch": 13.72863070539419, "grad_norm": 26.746337890625, "learning_rate": 1.4512199170124484e-05, "loss": 0.7238, "step": 16543 }, { "epoch": 13.729460580912862, "grad_norm": 29.596160888671875, "learning_rate": 1.4511867219917013e-05, "loss": 0.7866, "step": 16544 }, { "epoch": 13.730290456431534, "grad_norm": 60.52825164794922, "learning_rate": 1.4511535269709545e-05, "loss": 0.8878, "step": 16545 }, { "epoch": 13.731120331950207, "grad_norm": 27.705387115478516, "learning_rate": 1.4511203319502077e-05, "loss": 1.3997, "step": 16546 }, { "epoch": 13.731950207468879, "grad_norm": 88.92086029052734, "learning_rate": 1.4510871369294607e-05, "loss": 1.2737, "step": 16547 }, { "epoch": 13.732780082987551, "grad_norm": 61.7694206237793, "learning_rate": 1.4510539419087138e-05, "loss": 0.891, "step": 16548 }, { "epoch": 13.733609958506223, "grad_norm": 28.40062141418457, "learning_rate": 1.4510207468879668e-05, "loss": 0.6417, "step": 16549 }, { "epoch": 13.734439834024897, "grad_norm": 20.60833740234375, "learning_rate": 1.45098755186722e-05, "loss": 0.4081, "step": 16550 }, { "epoch": 13.73526970954357, "grad_norm": 30.759258270263672, "learning_rate": 1.4509543568464732e-05, "loss": 0.5373, "step": 16551 }, { "epoch": 13.736099585062242, "grad_norm": 54.23345947265625, "learning_rate": 1.4509211618257261e-05, "loss": 0.4923, "step": 16552 }, { "epoch": 13.736929460580914, "grad_norm": 49.014652252197266, "learning_rate": 1.4508879668049793e-05, "loss": 0.6454, "step": 16553 }, { "epoch": 13.737759336099586, "grad_norm": 58.73792266845703, "learning_rate": 1.4508547717842325e-05, "loss": 1.4017, "step": 16554 }, { "epoch": 13.738589211618258, "grad_norm": 22.67926597595215, "learning_rate": 1.4508215767634858e-05, "loss": 1.3064, "step": 16555 }, { "epoch": 13.73941908713693, "grad_norm": 50.5142822265625, "learning_rate": 1.4507883817427386e-05, "loss": 0.3963, "step": 16556 }, { "epoch": 13.740248962655603, "grad_norm": 40.99430847167969, "learning_rate": 1.4507551867219918e-05, "loss": 0.4426, "step": 16557 }, { "epoch": 13.741078838174275, "grad_norm": 32.41435623168945, "learning_rate": 1.4507219917012449e-05, "loss": 0.3864, "step": 16558 }, { "epoch": 13.741908713692947, "grad_norm": 26.994529724121094, "learning_rate": 1.4506887966804981e-05, "loss": 0.532, "step": 16559 }, { "epoch": 13.74273858921162, "grad_norm": 50.884010314941406, "learning_rate": 1.4506556016597511e-05, "loss": 0.5774, "step": 16560 }, { "epoch": 13.743568464730291, "grad_norm": 53.99303436279297, "learning_rate": 1.4506224066390042e-05, "loss": 1.3656, "step": 16561 }, { "epoch": 13.744398340248964, "grad_norm": 58.24440383911133, "learning_rate": 1.4505892116182574e-05, "loss": 1.2347, "step": 16562 }, { "epoch": 13.745228215767636, "grad_norm": 28.097938537597656, "learning_rate": 1.4505560165975106e-05, "loss": 0.4577, "step": 16563 }, { "epoch": 13.746058091286308, "grad_norm": 51.755638122558594, "learning_rate": 1.4505228215767635e-05, "loss": 1.158, "step": 16564 }, { "epoch": 13.74688796680498, "grad_norm": 51.9119873046875, "learning_rate": 1.4504896265560167e-05, "loss": 0.4864, "step": 16565 }, { "epoch": 13.747717842323652, "grad_norm": 26.4263858795166, "learning_rate": 1.4504564315352699e-05, "loss": 0.7027, "step": 16566 }, { "epoch": 13.748547717842325, "grad_norm": 28.4714298248291, "learning_rate": 1.450423236514523e-05, "loss": 0.659, "step": 16567 }, { "epoch": 13.749377593360997, "grad_norm": 21.461280822753906, "learning_rate": 1.4503900414937761e-05, "loss": 0.3749, "step": 16568 }, { "epoch": 13.750207468879669, "grad_norm": 32.471012115478516, "learning_rate": 1.4503568464730292e-05, "loss": 0.7721, "step": 16569 }, { "epoch": 13.751037344398341, "grad_norm": 35.26089859008789, "learning_rate": 1.4503236514522822e-05, "loss": 0.8543, "step": 16570 }, { "epoch": 13.751867219917013, "grad_norm": 42.10589599609375, "learning_rate": 1.4502904564315354e-05, "loss": 1.3854, "step": 16571 }, { "epoch": 13.752697095435686, "grad_norm": 26.938817977905273, "learning_rate": 1.4502572614107886e-05, "loss": 0.637, "step": 16572 }, { "epoch": 13.753526970954358, "grad_norm": 23.691246032714844, "learning_rate": 1.4502240663900415e-05, "loss": 0.9391, "step": 16573 }, { "epoch": 13.75435684647303, "grad_norm": 15.143439292907715, "learning_rate": 1.4501908713692947e-05, "loss": 0.3455, "step": 16574 }, { "epoch": 13.755186721991702, "grad_norm": 59.02429962158203, "learning_rate": 1.450157676348548e-05, "loss": 1.3886, "step": 16575 }, { "epoch": 13.756016597510374, "grad_norm": 20.87101936340332, "learning_rate": 1.450124481327801e-05, "loss": 0.4376, "step": 16576 }, { "epoch": 13.756846473029047, "grad_norm": 40.90685272216797, "learning_rate": 1.450091286307054e-05, "loss": 0.7974, "step": 16577 }, { "epoch": 13.757676348547719, "grad_norm": 30.828834533691406, "learning_rate": 1.4500580912863072e-05, "loss": 1.0882, "step": 16578 }, { "epoch": 13.758506224066391, "grad_norm": 102.32353210449219, "learning_rate": 1.4500248962655603e-05, "loss": 1.579, "step": 16579 }, { "epoch": 13.759336099585063, "grad_norm": 24.9006404876709, "learning_rate": 1.4499917012448135e-05, "loss": 0.6462, "step": 16580 }, { "epoch": 13.760165975103735, "grad_norm": 26.07994270324707, "learning_rate": 1.4499585062240664e-05, "loss": 0.4359, "step": 16581 }, { "epoch": 13.760995850622407, "grad_norm": 41.95911407470703, "learning_rate": 1.4499253112033196e-05, "loss": 0.3759, "step": 16582 }, { "epoch": 13.76182572614108, "grad_norm": 29.322559356689453, "learning_rate": 1.4498921161825728e-05, "loss": 1.2373, "step": 16583 }, { "epoch": 13.762655601659752, "grad_norm": 17.522464752197266, "learning_rate": 1.449858921161826e-05, "loss": 0.4478, "step": 16584 }, { "epoch": 13.763485477178424, "grad_norm": 58.76836395263672, "learning_rate": 1.4498257261410789e-05, "loss": 0.744, "step": 16585 }, { "epoch": 13.764315352697096, "grad_norm": 42.55122375488281, "learning_rate": 1.449792531120332e-05, "loss": 0.9359, "step": 16586 }, { "epoch": 13.765145228215768, "grad_norm": 33.03590393066406, "learning_rate": 1.4497593360995851e-05, "loss": 0.7876, "step": 16587 }, { "epoch": 13.76597510373444, "grad_norm": 28.299524307250977, "learning_rate": 1.4497261410788383e-05, "loss": 0.5517, "step": 16588 }, { "epoch": 13.766804979253113, "grad_norm": 31.101696014404297, "learning_rate": 1.4496929460580914e-05, "loss": 0.6775, "step": 16589 }, { "epoch": 13.767634854771785, "grad_norm": 59.21754455566406, "learning_rate": 1.4496597510373444e-05, "loss": 0.9613, "step": 16590 }, { "epoch": 13.768464730290457, "grad_norm": 53.699039459228516, "learning_rate": 1.4496265560165976e-05, "loss": 0.5165, "step": 16591 }, { "epoch": 13.76929460580913, "grad_norm": 50.35204315185547, "learning_rate": 1.4495933609958508e-05, "loss": 1.081, "step": 16592 }, { "epoch": 13.770124481327802, "grad_norm": 66.80307006835938, "learning_rate": 1.449560165975104e-05, "loss": 0.663, "step": 16593 }, { "epoch": 13.770954356846474, "grad_norm": 36.18699264526367, "learning_rate": 1.4495269709543569e-05, "loss": 0.7523, "step": 16594 }, { "epoch": 13.771784232365146, "grad_norm": 54.48859786987305, "learning_rate": 1.4494937759336101e-05, "loss": 1.0282, "step": 16595 }, { "epoch": 13.772614107883818, "grad_norm": 52.14504623413086, "learning_rate": 1.4494605809128632e-05, "loss": 1.1595, "step": 16596 }, { "epoch": 13.77344398340249, "grad_norm": 32.92266845703125, "learning_rate": 1.4494273858921164e-05, "loss": 0.7707, "step": 16597 }, { "epoch": 13.774273858921163, "grad_norm": 62.19657516479492, "learning_rate": 1.4493941908713694e-05, "loss": 0.9701, "step": 16598 }, { "epoch": 13.775103734439835, "grad_norm": 41.160396575927734, "learning_rate": 1.4493609958506225e-05, "loss": 0.8624, "step": 16599 }, { "epoch": 13.775933609958507, "grad_norm": 34.83687973022461, "learning_rate": 1.4493278008298757e-05, "loss": 1.3658, "step": 16600 }, { "epoch": 13.77676348547718, "grad_norm": 72.2635726928711, "learning_rate": 1.4492946058091289e-05, "loss": 0.6063, "step": 16601 }, { "epoch": 13.777593360995851, "grad_norm": 69.85037994384766, "learning_rate": 1.4492614107883817e-05, "loss": 1.0603, "step": 16602 }, { "epoch": 13.778423236514524, "grad_norm": 79.51607513427734, "learning_rate": 1.449228215767635e-05, "loss": 1.3669, "step": 16603 }, { "epoch": 13.779253112033196, "grad_norm": 53.400428771972656, "learning_rate": 1.4491950207468882e-05, "loss": 0.776, "step": 16604 }, { "epoch": 13.780082987551868, "grad_norm": 40.88530349731445, "learning_rate": 1.4491618257261412e-05, "loss": 0.6106, "step": 16605 }, { "epoch": 13.78091286307054, "grad_norm": 26.768835067749023, "learning_rate": 1.4491286307053942e-05, "loss": 0.4969, "step": 16606 }, { "epoch": 13.781742738589212, "grad_norm": 22.74188804626465, "learning_rate": 1.4490954356846475e-05, "loss": 0.674, "step": 16607 }, { "epoch": 13.782572614107885, "grad_norm": 26.459903717041016, "learning_rate": 1.4490622406639005e-05, "loss": 1.1662, "step": 16608 }, { "epoch": 13.783402489626557, "grad_norm": 46.433326721191406, "learning_rate": 1.4490290456431537e-05, "loss": 0.7228, "step": 16609 }, { "epoch": 13.784232365145229, "grad_norm": 45.68252944946289, "learning_rate": 1.4489958506224066e-05, "loss": 0.8101, "step": 16610 }, { "epoch": 13.785062240663901, "grad_norm": 34.99354553222656, "learning_rate": 1.4489626556016598e-05, "loss": 0.871, "step": 16611 }, { "epoch": 13.785892116182573, "grad_norm": 24.972349166870117, "learning_rate": 1.448929460580913e-05, "loss": 0.9382, "step": 16612 }, { "epoch": 13.786721991701246, "grad_norm": 33.05036163330078, "learning_rate": 1.4488962655601662e-05, "loss": 0.6081, "step": 16613 }, { "epoch": 13.787551867219918, "grad_norm": 39.172523498535156, "learning_rate": 1.4488630705394191e-05, "loss": 2.0111, "step": 16614 }, { "epoch": 13.78838174273859, "grad_norm": 30.43540382385254, "learning_rate": 1.4488298755186723e-05, "loss": 0.3518, "step": 16615 }, { "epoch": 13.789211618257262, "grad_norm": 21.213960647583008, "learning_rate": 1.4487966804979255e-05, "loss": 0.3059, "step": 16616 }, { "epoch": 13.790041493775934, "grad_norm": 32.939395904541016, "learning_rate": 1.4487634854771786e-05, "loss": 1.6111, "step": 16617 }, { "epoch": 13.790871369294607, "grad_norm": 24.542909622192383, "learning_rate": 1.4487302904564316e-05, "loss": 1.0232, "step": 16618 }, { "epoch": 13.791701244813279, "grad_norm": 21.631772994995117, "learning_rate": 1.4486970954356846e-05, "loss": 0.5223, "step": 16619 }, { "epoch": 13.792531120331951, "grad_norm": 79.86119079589844, "learning_rate": 1.4486639004149378e-05, "loss": 0.9378, "step": 16620 }, { "epoch": 13.793360995850623, "grad_norm": 25.61663055419922, "learning_rate": 1.448630705394191e-05, "loss": 0.7114, "step": 16621 }, { "epoch": 13.794190871369295, "grad_norm": 23.85950469970703, "learning_rate": 1.4485975103734443e-05, "loss": 0.4504, "step": 16622 }, { "epoch": 13.795020746887968, "grad_norm": 156.7353973388672, "learning_rate": 1.4485643153526971e-05, "loss": 0.6001, "step": 16623 }, { "epoch": 13.79585062240664, "grad_norm": 27.051877975463867, "learning_rate": 1.4485311203319503e-05, "loss": 0.8, "step": 16624 }, { "epoch": 13.796680497925312, "grad_norm": 38.50188446044922, "learning_rate": 1.4484979253112036e-05, "loss": 0.6781, "step": 16625 }, { "epoch": 13.797510373443984, "grad_norm": 23.903661727905273, "learning_rate": 1.4484647302904566e-05, "loss": 0.8103, "step": 16626 }, { "epoch": 13.798340248962656, "grad_norm": 42.64488983154297, "learning_rate": 1.4484315352697096e-05, "loss": 0.418, "step": 16627 }, { "epoch": 13.799170124481329, "grad_norm": 35.35896682739258, "learning_rate": 1.4483983402489627e-05, "loss": 0.7652, "step": 16628 }, { "epoch": 13.8, "grad_norm": 61.636680603027344, "learning_rate": 1.4483651452282159e-05, "loss": 1.0587, "step": 16629 }, { "epoch": 13.800829875518673, "grad_norm": 32.53717041015625, "learning_rate": 1.4483319502074691e-05, "loss": 0.535, "step": 16630 }, { "epoch": 13.801659751037345, "grad_norm": 36.738372802734375, "learning_rate": 1.448298755186722e-05, "loss": 1.093, "step": 16631 }, { "epoch": 13.802489626556017, "grad_norm": 36.71175765991211, "learning_rate": 1.4482655601659752e-05, "loss": 0.5112, "step": 16632 }, { "epoch": 13.80331950207469, "grad_norm": 57.057212829589844, "learning_rate": 1.4482323651452284e-05, "loss": 0.7621, "step": 16633 }, { "epoch": 13.804149377593362, "grad_norm": 34.26393127441406, "learning_rate": 1.4481991701244814e-05, "loss": 0.375, "step": 16634 }, { "epoch": 13.804979253112034, "grad_norm": 53.79976272583008, "learning_rate": 1.4481659751037345e-05, "loss": 0.869, "step": 16635 }, { "epoch": 13.805809128630706, "grad_norm": 86.66986846923828, "learning_rate": 1.4481327800829877e-05, "loss": 0.9796, "step": 16636 }, { "epoch": 13.806639004149378, "grad_norm": 41.89595031738281, "learning_rate": 1.4480995850622407e-05, "loss": 0.8302, "step": 16637 }, { "epoch": 13.80746887966805, "grad_norm": 35.15324401855469, "learning_rate": 1.448066390041494e-05, "loss": 1.0029, "step": 16638 }, { "epoch": 13.808298755186723, "grad_norm": 42.08485794067383, "learning_rate": 1.448033195020747e-05, "loss": 1.4868, "step": 16639 }, { "epoch": 13.809128630705395, "grad_norm": 95.30309295654297, "learning_rate": 1.448e-05, "loss": 1.2428, "step": 16640 }, { "epoch": 13.809958506224067, "grad_norm": 37.65353775024414, "learning_rate": 1.4479668049792532e-05, "loss": 1.0127, "step": 16641 }, { "epoch": 13.81078838174274, "grad_norm": 28.454608917236328, "learning_rate": 1.4479336099585064e-05, "loss": 0.6052, "step": 16642 }, { "epoch": 13.811618257261411, "grad_norm": 22.440223693847656, "learning_rate": 1.4479004149377593e-05, "loss": 0.6114, "step": 16643 }, { "epoch": 13.812448132780084, "grad_norm": 36.75968551635742, "learning_rate": 1.4478672199170125e-05, "loss": 0.7198, "step": 16644 }, { "epoch": 13.813278008298756, "grad_norm": 48.77050018310547, "learning_rate": 1.4478340248962657e-05, "loss": 1.3486, "step": 16645 }, { "epoch": 13.814107883817428, "grad_norm": 34.227149963378906, "learning_rate": 1.4478008298755188e-05, "loss": 0.9586, "step": 16646 }, { "epoch": 13.8149377593361, "grad_norm": 21.69150733947754, "learning_rate": 1.447767634854772e-05, "loss": 0.5152, "step": 16647 }, { "epoch": 13.815767634854772, "grad_norm": 21.64289093017578, "learning_rate": 1.447734439834025e-05, "loss": 0.5036, "step": 16648 }, { "epoch": 13.816597510373445, "grad_norm": 30.051321029663086, "learning_rate": 1.447701244813278e-05, "loss": 1.1507, "step": 16649 }, { "epoch": 13.817427385892117, "grad_norm": 32.35662078857422, "learning_rate": 1.4476680497925313e-05, "loss": 1.4838, "step": 16650 }, { "epoch": 13.818257261410789, "grad_norm": 33.75348663330078, "learning_rate": 1.4476348547717845e-05, "loss": 1.0366, "step": 16651 }, { "epoch": 13.819087136929461, "grad_norm": 64.14188385009766, "learning_rate": 1.4476016597510374e-05, "loss": 1.0498, "step": 16652 }, { "epoch": 13.819917012448133, "grad_norm": 41.15034103393555, "learning_rate": 1.4475684647302906e-05, "loss": 1.4641, "step": 16653 }, { "epoch": 13.820746887966806, "grad_norm": 33.06264877319336, "learning_rate": 1.4475352697095438e-05, "loss": 0.6407, "step": 16654 }, { "epoch": 13.821576763485478, "grad_norm": 39.51482009887695, "learning_rate": 1.4475020746887968e-05, "loss": 0.7609, "step": 16655 }, { "epoch": 13.82240663900415, "grad_norm": 21.3614501953125, "learning_rate": 1.4474688796680499e-05, "loss": 0.4561, "step": 16656 }, { "epoch": 13.823236514522822, "grad_norm": 54.87507247924805, "learning_rate": 1.447435684647303e-05, "loss": 1.1018, "step": 16657 }, { "epoch": 13.824066390041494, "grad_norm": 21.818408966064453, "learning_rate": 1.4474024896265561e-05, "loss": 0.9381, "step": 16658 }, { "epoch": 13.824896265560167, "grad_norm": 33.38654708862305, "learning_rate": 1.4473692946058093e-05, "loss": 1.3005, "step": 16659 }, { "epoch": 13.825726141078839, "grad_norm": 28.891101837158203, "learning_rate": 1.4473360995850622e-05, "loss": 0.8334, "step": 16660 }, { "epoch": 13.826556016597511, "grad_norm": 25.952741622924805, "learning_rate": 1.4473029045643154e-05, "loss": 0.7925, "step": 16661 }, { "epoch": 13.827385892116183, "grad_norm": 27.22991180419922, "learning_rate": 1.4472697095435686e-05, "loss": 0.2647, "step": 16662 }, { "epoch": 13.828215767634855, "grad_norm": 14.366483688354492, "learning_rate": 1.4472365145228218e-05, "loss": 0.2757, "step": 16663 }, { "epoch": 13.829045643153528, "grad_norm": 29.337064743041992, "learning_rate": 1.4472033195020747e-05, "loss": 0.811, "step": 16664 }, { "epoch": 13.8298755186722, "grad_norm": 18.895017623901367, "learning_rate": 1.447170124481328e-05, "loss": 0.4763, "step": 16665 }, { "epoch": 13.830705394190872, "grad_norm": 60.14879608154297, "learning_rate": 1.447136929460581e-05, "loss": 0.7974, "step": 16666 }, { "epoch": 13.831535269709544, "grad_norm": 52.76056671142578, "learning_rate": 1.4471037344398342e-05, "loss": 0.8955, "step": 16667 }, { "epoch": 13.832365145228216, "grad_norm": 30.042856216430664, "learning_rate": 1.4470705394190872e-05, "loss": 0.7819, "step": 16668 }, { "epoch": 13.833195020746889, "grad_norm": 45.48800277709961, "learning_rate": 1.4470373443983403e-05, "loss": 0.7581, "step": 16669 }, { "epoch": 13.83402489626556, "grad_norm": 23.870750427246094, "learning_rate": 1.4470041493775935e-05, "loss": 0.5219, "step": 16670 }, { "epoch": 13.834854771784233, "grad_norm": 58.15994644165039, "learning_rate": 1.4469709543568467e-05, "loss": 1.1147, "step": 16671 }, { "epoch": 13.835684647302905, "grad_norm": 28.301197052001953, "learning_rate": 1.4469377593360999e-05, "loss": 0.9571, "step": 16672 }, { "epoch": 13.836514522821577, "grad_norm": 31.59069061279297, "learning_rate": 1.4469045643153528e-05, "loss": 0.6571, "step": 16673 }, { "epoch": 13.83734439834025, "grad_norm": 45.960479736328125, "learning_rate": 1.446871369294606e-05, "loss": 0.5338, "step": 16674 }, { "epoch": 13.838174273858922, "grad_norm": 58.48896026611328, "learning_rate": 1.446838174273859e-05, "loss": 0.7211, "step": 16675 }, { "epoch": 13.839004149377594, "grad_norm": 40.59589767456055, "learning_rate": 1.4468049792531122e-05, "loss": 0.7764, "step": 16676 }, { "epoch": 13.839834024896266, "grad_norm": 39.78786849975586, "learning_rate": 1.4467717842323653e-05, "loss": 0.911, "step": 16677 }, { "epoch": 13.840663900414938, "grad_norm": 40.91973114013672, "learning_rate": 1.4467385892116183e-05, "loss": 1.2066, "step": 16678 }, { "epoch": 13.84149377593361, "grad_norm": 23.9078311920166, "learning_rate": 1.4467053941908715e-05, "loss": 0.3798, "step": 16679 }, { "epoch": 13.842323651452283, "grad_norm": 31.99066162109375, "learning_rate": 1.4466721991701247e-05, "loss": 0.7701, "step": 16680 }, { "epoch": 13.843153526970955, "grad_norm": 24.615869522094727, "learning_rate": 1.4466390041493776e-05, "loss": 0.7016, "step": 16681 }, { "epoch": 13.843983402489627, "grad_norm": 37.451133728027344, "learning_rate": 1.4466058091286308e-05, "loss": 1.1285, "step": 16682 }, { "epoch": 13.8448132780083, "grad_norm": 20.87245750427246, "learning_rate": 1.446572614107884e-05, "loss": 0.5799, "step": 16683 }, { "epoch": 13.845643153526972, "grad_norm": 31.280460357666016, "learning_rate": 1.446539419087137e-05, "loss": 0.8772, "step": 16684 }, { "epoch": 13.846473029045644, "grad_norm": 47.61805725097656, "learning_rate": 1.4465062240663901e-05, "loss": 0.9377, "step": 16685 }, { "epoch": 13.847302904564316, "grad_norm": 44.48487854003906, "learning_rate": 1.4464730290456433e-05, "loss": 1.1344, "step": 16686 }, { "epoch": 13.848132780082988, "grad_norm": 25.601396560668945, "learning_rate": 1.4464398340248964e-05, "loss": 0.4439, "step": 16687 }, { "epoch": 13.84896265560166, "grad_norm": 19.951854705810547, "learning_rate": 1.4464066390041496e-05, "loss": 0.5037, "step": 16688 }, { "epoch": 13.849792531120332, "grad_norm": 57.85150146484375, "learning_rate": 1.4463734439834024e-05, "loss": 1.1391, "step": 16689 }, { "epoch": 13.850622406639005, "grad_norm": 32.07823944091797, "learning_rate": 1.4463402489626557e-05, "loss": 1.0447, "step": 16690 }, { "epoch": 13.851452282157677, "grad_norm": 35.363990783691406, "learning_rate": 1.4463070539419089e-05, "loss": 0.7696, "step": 16691 }, { "epoch": 13.852282157676349, "grad_norm": 28.331056594848633, "learning_rate": 1.446273858921162e-05, "loss": 0.5451, "step": 16692 }, { "epoch": 13.853112033195021, "grad_norm": 23.417905807495117, "learning_rate": 1.446240663900415e-05, "loss": 0.9252, "step": 16693 }, { "epoch": 13.853941908713693, "grad_norm": 28.161252975463867, "learning_rate": 1.4462074688796682e-05, "loss": 0.8638, "step": 16694 }, { "epoch": 13.854771784232366, "grad_norm": 31.503904342651367, "learning_rate": 1.4461742738589214e-05, "loss": 0.5927, "step": 16695 }, { "epoch": 13.855601659751038, "grad_norm": 59.19108581542969, "learning_rate": 1.4461410788381744e-05, "loss": 0.7825, "step": 16696 }, { "epoch": 13.85643153526971, "grad_norm": 15.530597686767578, "learning_rate": 1.4461078838174275e-05, "loss": 0.3499, "step": 16697 }, { "epoch": 13.857261410788382, "grad_norm": 74.93158721923828, "learning_rate": 1.4460746887966805e-05, "loss": 1.1724, "step": 16698 }, { "epoch": 13.858091286307054, "grad_norm": 106.8910140991211, "learning_rate": 1.4460414937759337e-05, "loss": 1.1434, "step": 16699 }, { "epoch": 13.858921161825727, "grad_norm": 38.224700927734375, "learning_rate": 1.4460082987551869e-05, "loss": 0.7864, "step": 16700 }, { "epoch": 13.859751037344399, "grad_norm": 37.810821533203125, "learning_rate": 1.4459751037344401e-05, "loss": 0.7486, "step": 16701 }, { "epoch": 13.860580912863071, "grad_norm": 43.54936599731445, "learning_rate": 1.445941908713693e-05, "loss": 0.6841, "step": 16702 }, { "epoch": 13.861410788381743, "grad_norm": 33.11280059814453, "learning_rate": 1.4459087136929462e-05, "loss": 1.0341, "step": 16703 }, { "epoch": 13.862240663900415, "grad_norm": 26.63153076171875, "learning_rate": 1.4458755186721992e-05, "loss": 0.8485, "step": 16704 }, { "epoch": 13.863070539419088, "grad_norm": 29.600038528442383, "learning_rate": 1.4458423236514525e-05, "loss": 1.0932, "step": 16705 }, { "epoch": 13.86390041493776, "grad_norm": 21.8031063079834, "learning_rate": 1.4458091286307055e-05, "loss": 0.5511, "step": 16706 }, { "epoch": 13.864730290456432, "grad_norm": 18.976490020751953, "learning_rate": 1.4457759336099585e-05, "loss": 0.3548, "step": 16707 }, { "epoch": 13.865560165975104, "grad_norm": 55.98710632324219, "learning_rate": 1.4457427385892118e-05, "loss": 0.4057, "step": 16708 }, { "epoch": 13.866390041493776, "grad_norm": 30.203231811523438, "learning_rate": 1.445709543568465e-05, "loss": 1.1022, "step": 16709 }, { "epoch": 13.867219917012449, "grad_norm": 70.23120880126953, "learning_rate": 1.4456763485477178e-05, "loss": 0.7589, "step": 16710 }, { "epoch": 13.86804979253112, "grad_norm": 23.08807945251465, "learning_rate": 1.445643153526971e-05, "loss": 0.5347, "step": 16711 }, { "epoch": 13.868879668049793, "grad_norm": 67.63764953613281, "learning_rate": 1.4456099585062243e-05, "loss": 0.7044, "step": 16712 }, { "epoch": 13.869709543568465, "grad_norm": 33.94748306274414, "learning_rate": 1.4455767634854773e-05, "loss": 0.9825, "step": 16713 }, { "epoch": 13.870539419087137, "grad_norm": 51.0577392578125, "learning_rate": 1.4455435684647303e-05, "loss": 1.5316, "step": 16714 }, { "epoch": 13.87136929460581, "grad_norm": 39.02167892456055, "learning_rate": 1.4455103734439836e-05, "loss": 1.0244, "step": 16715 }, { "epoch": 13.872199170124482, "grad_norm": 38.164581298828125, "learning_rate": 1.4454771784232366e-05, "loss": 0.8362, "step": 16716 }, { "epoch": 13.873029045643154, "grad_norm": 27.75682830810547, "learning_rate": 1.4454439834024898e-05, "loss": 0.5424, "step": 16717 }, { "epoch": 13.873858921161826, "grad_norm": 99.1742172241211, "learning_rate": 1.4454107883817427e-05, "loss": 1.3301, "step": 16718 }, { "epoch": 13.874688796680498, "grad_norm": 19.270980834960938, "learning_rate": 1.4453775933609959e-05, "loss": 0.5734, "step": 16719 }, { "epoch": 13.87551867219917, "grad_norm": 28.94858741760254, "learning_rate": 1.4453443983402491e-05, "loss": 0.9646, "step": 16720 }, { "epoch": 13.876348547717843, "grad_norm": 25.239990234375, "learning_rate": 1.4453112033195023e-05, "loss": 0.6214, "step": 16721 }, { "epoch": 13.877178423236515, "grad_norm": 21.20543098449707, "learning_rate": 1.4452780082987552e-05, "loss": 0.4184, "step": 16722 }, { "epoch": 13.878008298755187, "grad_norm": 29.565670013427734, "learning_rate": 1.4452448132780084e-05, "loss": 1.0945, "step": 16723 }, { "epoch": 13.87883817427386, "grad_norm": 27.53741455078125, "learning_rate": 1.4452116182572616e-05, "loss": 0.9356, "step": 16724 }, { "epoch": 13.879668049792532, "grad_norm": 29.057209014892578, "learning_rate": 1.4451784232365146e-05, "loss": 0.6367, "step": 16725 }, { "epoch": 13.880497925311204, "grad_norm": 18.863840103149414, "learning_rate": 1.4451452282157679e-05, "loss": 0.4996, "step": 16726 }, { "epoch": 13.881327800829876, "grad_norm": 52.94955825805664, "learning_rate": 1.4451120331950207e-05, "loss": 1.2253, "step": 16727 }, { "epoch": 13.882157676348548, "grad_norm": 34.38994598388672, "learning_rate": 1.445078838174274e-05, "loss": 0.8738, "step": 16728 }, { "epoch": 13.88298755186722, "grad_norm": 22.484455108642578, "learning_rate": 1.4450456431535271e-05, "loss": 0.7215, "step": 16729 }, { "epoch": 13.883817427385893, "grad_norm": 101.20806121826172, "learning_rate": 1.4450124481327804e-05, "loss": 1.0304, "step": 16730 }, { "epoch": 13.884647302904565, "grad_norm": 12.752330780029297, "learning_rate": 1.4449792531120332e-05, "loss": 0.461, "step": 16731 }, { "epoch": 13.885477178423237, "grad_norm": 38.47562026977539, "learning_rate": 1.4449460580912864e-05, "loss": 0.6043, "step": 16732 }, { "epoch": 13.88630705394191, "grad_norm": 20.984825134277344, "learning_rate": 1.4449128630705396e-05, "loss": 0.4245, "step": 16733 }, { "epoch": 13.887136929460581, "grad_norm": 42.879302978515625, "learning_rate": 1.4448796680497927e-05, "loss": 1.0494, "step": 16734 }, { "epoch": 13.887966804979254, "grad_norm": 62.77973556518555, "learning_rate": 1.4448464730290457e-05, "loss": 0.9879, "step": 16735 }, { "epoch": 13.888796680497926, "grad_norm": 65.2929916381836, "learning_rate": 1.4448132780082988e-05, "loss": 0.5374, "step": 16736 }, { "epoch": 13.889626556016598, "grad_norm": 35.29282760620117, "learning_rate": 1.444780082987552e-05, "loss": 0.8027, "step": 16737 }, { "epoch": 13.89045643153527, "grad_norm": 38.478538513183594, "learning_rate": 1.4447468879668052e-05, "loss": 0.8867, "step": 16738 }, { "epoch": 13.891286307053942, "grad_norm": 23.22362518310547, "learning_rate": 1.444713692946058e-05, "loss": 0.7946, "step": 16739 }, { "epoch": 13.892116182572614, "grad_norm": 75.92618560791016, "learning_rate": 1.4446804979253113e-05, "loss": 1.0993, "step": 16740 }, { "epoch": 13.892946058091287, "grad_norm": 35.05873489379883, "learning_rate": 1.4446473029045645e-05, "loss": 1.1452, "step": 16741 }, { "epoch": 13.893775933609959, "grad_norm": 49.203426361083984, "learning_rate": 1.4446141078838177e-05, "loss": 1.2194, "step": 16742 }, { "epoch": 13.894605809128631, "grad_norm": 61.17250442504883, "learning_rate": 1.4445809128630706e-05, "loss": 1.2809, "step": 16743 }, { "epoch": 13.895435684647303, "grad_norm": 37.82389450073242, "learning_rate": 1.4445477178423238e-05, "loss": 0.9196, "step": 16744 }, { "epoch": 13.896265560165975, "grad_norm": 25.16361427307129, "learning_rate": 1.4445145228215768e-05, "loss": 0.5849, "step": 16745 }, { "epoch": 13.897095435684648, "grad_norm": 20.614452362060547, "learning_rate": 1.44448132780083e-05, "loss": 0.5801, "step": 16746 }, { "epoch": 13.89792531120332, "grad_norm": 36.57675552368164, "learning_rate": 1.444448132780083e-05, "loss": 0.6211, "step": 16747 }, { "epoch": 13.898755186721992, "grad_norm": 37.72770309448242, "learning_rate": 1.4444149377593361e-05, "loss": 0.5498, "step": 16748 }, { "epoch": 13.899585062240664, "grad_norm": 33.080387115478516, "learning_rate": 1.4443817427385893e-05, "loss": 0.9958, "step": 16749 }, { "epoch": 13.900414937759336, "grad_norm": 32.380859375, "learning_rate": 1.4443485477178425e-05, "loss": 0.6067, "step": 16750 }, { "epoch": 13.901244813278009, "grad_norm": 38.37990951538086, "learning_rate": 1.4443153526970956e-05, "loss": 0.5073, "step": 16751 }, { "epoch": 13.90207468879668, "grad_norm": 27.342147827148438, "learning_rate": 1.4442821576763486e-05, "loss": 0.7281, "step": 16752 }, { "epoch": 13.902904564315353, "grad_norm": 57.955963134765625, "learning_rate": 1.4442489626556018e-05, "loss": 1.0517, "step": 16753 }, { "epoch": 13.903734439834025, "grad_norm": 22.170007705688477, "learning_rate": 1.4442157676348549e-05, "loss": 0.2995, "step": 16754 }, { "epoch": 13.904564315352697, "grad_norm": 46.533512115478516, "learning_rate": 1.444182572614108e-05, "loss": 1.5924, "step": 16755 }, { "epoch": 13.90539419087137, "grad_norm": 43.72378921508789, "learning_rate": 1.4441493775933611e-05, "loss": 0.8388, "step": 16756 }, { "epoch": 13.906224066390042, "grad_norm": 42.901126861572266, "learning_rate": 1.4441161825726142e-05, "loss": 1.2399, "step": 16757 }, { "epoch": 13.907053941908714, "grad_norm": 21.06532859802246, "learning_rate": 1.4440829875518674e-05, "loss": 0.7081, "step": 16758 }, { "epoch": 13.907883817427386, "grad_norm": 51.87728500366211, "learning_rate": 1.4440497925311206e-05, "loss": 0.9724, "step": 16759 }, { "epoch": 13.908713692946058, "grad_norm": 29.706750869750977, "learning_rate": 1.4440165975103735e-05, "loss": 1.0253, "step": 16760 }, { "epoch": 13.90954356846473, "grad_norm": 24.785423278808594, "learning_rate": 1.4439834024896267e-05, "loss": 0.7662, "step": 16761 }, { "epoch": 13.910373443983403, "grad_norm": 26.874074935913086, "learning_rate": 1.4439502074688799e-05, "loss": 0.9344, "step": 16762 }, { "epoch": 13.911203319502075, "grad_norm": 29.529373168945312, "learning_rate": 1.443917012448133e-05, "loss": 1.0439, "step": 16763 }, { "epoch": 13.912033195020747, "grad_norm": 50.033203125, "learning_rate": 1.443883817427386e-05, "loss": 0.5961, "step": 16764 }, { "epoch": 13.91286307053942, "grad_norm": 30.429222106933594, "learning_rate": 1.443850622406639e-05, "loss": 1.0317, "step": 16765 }, { "epoch": 13.913692946058092, "grad_norm": 48.37630844116211, "learning_rate": 1.4438174273858922e-05, "loss": 1.3697, "step": 16766 }, { "epoch": 13.914522821576764, "grad_norm": 52.59975814819336, "learning_rate": 1.4437842323651454e-05, "loss": 1.5987, "step": 16767 }, { "epoch": 13.915352697095436, "grad_norm": 29.246627807617188, "learning_rate": 1.4437510373443983e-05, "loss": 0.6401, "step": 16768 }, { "epoch": 13.916182572614108, "grad_norm": 60.74733352661133, "learning_rate": 1.4437178423236515e-05, "loss": 1.0373, "step": 16769 }, { "epoch": 13.91701244813278, "grad_norm": 14.60702896118164, "learning_rate": 1.4436846473029047e-05, "loss": 0.4264, "step": 16770 }, { "epoch": 13.917842323651453, "grad_norm": 50.87611389160156, "learning_rate": 1.443651452282158e-05, "loss": 0.693, "step": 16771 }, { "epoch": 13.918672199170125, "grad_norm": 34.26746368408203, "learning_rate": 1.4436182572614108e-05, "loss": 0.5341, "step": 16772 }, { "epoch": 13.919502074688797, "grad_norm": 20.508026123046875, "learning_rate": 1.443585062240664e-05, "loss": 1.037, "step": 16773 }, { "epoch": 13.92033195020747, "grad_norm": 54.409793853759766, "learning_rate": 1.443551867219917e-05, "loss": 0.6014, "step": 16774 }, { "epoch": 13.921161825726141, "grad_norm": 66.60840606689453, "learning_rate": 1.4435186721991703e-05, "loss": 0.4826, "step": 16775 }, { "epoch": 13.921991701244814, "grad_norm": 24.167526245117188, "learning_rate": 1.4434854771784233e-05, "loss": 0.681, "step": 16776 }, { "epoch": 13.922821576763486, "grad_norm": 20.41640281677246, "learning_rate": 1.4434522821576763e-05, "loss": 0.612, "step": 16777 }, { "epoch": 13.923651452282158, "grad_norm": 41.76162338256836, "learning_rate": 1.4434190871369296e-05, "loss": 0.9365, "step": 16778 }, { "epoch": 13.92448132780083, "grad_norm": 29.850584030151367, "learning_rate": 1.4433858921161828e-05, "loss": 0.7949, "step": 16779 }, { "epoch": 13.925311203319502, "grad_norm": 32.50110626220703, "learning_rate": 1.443352697095436e-05, "loss": 0.831, "step": 16780 }, { "epoch": 13.926141078838175, "grad_norm": 64.3088150024414, "learning_rate": 1.4433195020746889e-05, "loss": 1.1435, "step": 16781 }, { "epoch": 13.926970954356847, "grad_norm": 20.43891716003418, "learning_rate": 1.443286307053942e-05, "loss": 0.6811, "step": 16782 }, { "epoch": 13.927800829875519, "grad_norm": 47.07841110229492, "learning_rate": 1.4432531120331951e-05, "loss": 0.7878, "step": 16783 }, { "epoch": 13.928630705394191, "grad_norm": 15.554651260375977, "learning_rate": 1.4432199170124483e-05, "loss": 0.4818, "step": 16784 }, { "epoch": 13.929460580912863, "grad_norm": 21.7088623046875, "learning_rate": 1.4431867219917014e-05, "loss": 0.5093, "step": 16785 }, { "epoch": 13.930290456431536, "grad_norm": 22.6588191986084, "learning_rate": 1.4431535269709544e-05, "loss": 0.3779, "step": 16786 }, { "epoch": 13.931120331950208, "grad_norm": 30.875829696655273, "learning_rate": 1.4431203319502076e-05, "loss": 0.8725, "step": 16787 }, { "epoch": 13.93195020746888, "grad_norm": 36.33367156982422, "learning_rate": 1.4430871369294608e-05, "loss": 0.5455, "step": 16788 }, { "epoch": 13.932780082987552, "grad_norm": 22.768264770507812, "learning_rate": 1.4430539419087137e-05, "loss": 0.3831, "step": 16789 }, { "epoch": 13.933609958506224, "grad_norm": 25.229801177978516, "learning_rate": 1.4430207468879669e-05, "loss": 0.4834, "step": 16790 }, { "epoch": 13.934439834024896, "grad_norm": 96.00982666015625, "learning_rate": 1.4429875518672201e-05, "loss": 1.2074, "step": 16791 }, { "epoch": 13.935269709543569, "grad_norm": 29.10576057434082, "learning_rate": 1.4429543568464732e-05, "loss": 0.5578, "step": 16792 }, { "epoch": 13.936099585062241, "grad_norm": 41.2779541015625, "learning_rate": 1.4429211618257262e-05, "loss": 0.8521, "step": 16793 }, { "epoch": 13.936929460580913, "grad_norm": 35.3319206237793, "learning_rate": 1.4428879668049794e-05, "loss": 0.8324, "step": 16794 }, { "epoch": 13.937759336099585, "grad_norm": 35.83606719970703, "learning_rate": 1.4428547717842324e-05, "loss": 0.607, "step": 16795 }, { "epoch": 13.938589211618257, "grad_norm": 41.34834289550781, "learning_rate": 1.4428215767634857e-05, "loss": 1.2276, "step": 16796 }, { "epoch": 13.93941908713693, "grad_norm": 27.428386688232422, "learning_rate": 1.4427883817427385e-05, "loss": 0.6092, "step": 16797 }, { "epoch": 13.940248962655602, "grad_norm": 38.450050354003906, "learning_rate": 1.4427551867219917e-05, "loss": 1.3378, "step": 16798 }, { "epoch": 13.941078838174274, "grad_norm": 41.716243743896484, "learning_rate": 1.442721991701245e-05, "loss": 0.549, "step": 16799 }, { "epoch": 13.941908713692946, "grad_norm": 24.404165267944336, "learning_rate": 1.4426887966804982e-05, "loss": 0.6187, "step": 16800 }, { "epoch": 13.942738589211618, "grad_norm": 29.1680965423584, "learning_rate": 1.442655601659751e-05, "loss": 1.1152, "step": 16801 }, { "epoch": 13.94356846473029, "grad_norm": 32.6217041015625, "learning_rate": 1.4426224066390042e-05, "loss": 0.508, "step": 16802 }, { "epoch": 13.944398340248963, "grad_norm": 41.55080795288086, "learning_rate": 1.4425892116182575e-05, "loss": 0.7372, "step": 16803 }, { "epoch": 13.945228215767635, "grad_norm": 37.83256149291992, "learning_rate": 1.4425560165975105e-05, "loss": 1.0647, "step": 16804 }, { "epoch": 13.946058091286307, "grad_norm": 57.05563735961914, "learning_rate": 1.4425228215767637e-05, "loss": 1.4704, "step": 16805 }, { "epoch": 13.94688796680498, "grad_norm": 29.50660514831543, "learning_rate": 1.4424896265560166e-05, "loss": 0.7922, "step": 16806 }, { "epoch": 13.947717842323652, "grad_norm": 42.41972732543945, "learning_rate": 1.4424564315352698e-05, "loss": 1.0577, "step": 16807 }, { "epoch": 13.948547717842324, "grad_norm": 56.3043212890625, "learning_rate": 1.442423236514523e-05, "loss": 1.5087, "step": 16808 }, { "epoch": 13.949377593360996, "grad_norm": 30.617259979248047, "learning_rate": 1.4423900414937762e-05, "loss": 0.7363, "step": 16809 }, { "epoch": 13.950207468879668, "grad_norm": 44.75285339355469, "learning_rate": 1.4423568464730291e-05, "loss": 0.9743, "step": 16810 }, { "epoch": 13.95103734439834, "grad_norm": 26.83798599243164, "learning_rate": 1.4423236514522823e-05, "loss": 1.1865, "step": 16811 }, { "epoch": 13.951867219917013, "grad_norm": 33.71141052246094, "learning_rate": 1.4422904564315355e-05, "loss": 0.6982, "step": 16812 }, { "epoch": 13.952697095435685, "grad_norm": 57.321922302246094, "learning_rate": 1.4422572614107885e-05, "loss": 0.4997, "step": 16813 }, { "epoch": 13.953526970954357, "grad_norm": 55.6983757019043, "learning_rate": 1.4422240663900416e-05, "loss": 1.2102, "step": 16814 }, { "epoch": 13.95435684647303, "grad_norm": 39.65797805786133, "learning_rate": 1.4421908713692946e-05, "loss": 0.8007, "step": 16815 }, { "epoch": 13.955186721991701, "grad_norm": 33.92543411254883, "learning_rate": 1.4421576763485478e-05, "loss": 1.063, "step": 16816 }, { "epoch": 13.956016597510374, "grad_norm": 29.188074111938477, "learning_rate": 1.442124481327801e-05, "loss": 0.8358, "step": 16817 }, { "epoch": 13.956846473029046, "grad_norm": 50.6756591796875, "learning_rate": 1.442091286307054e-05, "loss": 1.1477, "step": 16818 }, { "epoch": 13.957676348547718, "grad_norm": 39.93839645385742, "learning_rate": 1.4420580912863071e-05, "loss": 1.0755, "step": 16819 }, { "epoch": 13.95850622406639, "grad_norm": 31.366397857666016, "learning_rate": 1.4420248962655603e-05, "loss": 0.6472, "step": 16820 }, { "epoch": 13.959336099585062, "grad_norm": 35.467529296875, "learning_rate": 1.4419917012448134e-05, "loss": 1.1599, "step": 16821 }, { "epoch": 13.960165975103735, "grad_norm": 36.371192932128906, "learning_rate": 1.4419585062240664e-05, "loss": 0.845, "step": 16822 }, { "epoch": 13.960995850622407, "grad_norm": 46.87541198730469, "learning_rate": 1.4419253112033196e-05, "loss": 0.9494, "step": 16823 }, { "epoch": 13.961825726141079, "grad_norm": 19.05035400390625, "learning_rate": 1.4418921161825727e-05, "loss": 0.4645, "step": 16824 }, { "epoch": 13.962655601659751, "grad_norm": 20.300201416015625, "learning_rate": 1.4418589211618259e-05, "loss": 0.4176, "step": 16825 }, { "epoch": 13.963485477178423, "grad_norm": 33.619380950927734, "learning_rate": 1.441825726141079e-05, "loss": 0.6116, "step": 16826 }, { "epoch": 13.964315352697096, "grad_norm": 28.229692459106445, "learning_rate": 1.441792531120332e-05, "loss": 0.7674, "step": 16827 }, { "epoch": 13.965145228215768, "grad_norm": 30.427059173583984, "learning_rate": 1.4417593360995852e-05, "loss": 1.1755, "step": 16828 }, { "epoch": 13.96597510373444, "grad_norm": 26.24864387512207, "learning_rate": 1.4417261410788384e-05, "loss": 0.7718, "step": 16829 }, { "epoch": 13.966804979253112, "grad_norm": 24.57832908630371, "learning_rate": 1.4416929460580914e-05, "loss": 0.7123, "step": 16830 }, { "epoch": 13.967634854771784, "grad_norm": 65.47406005859375, "learning_rate": 1.4416597510373445e-05, "loss": 1.2839, "step": 16831 }, { "epoch": 13.968464730290457, "grad_norm": 32.992549896240234, "learning_rate": 1.4416265560165977e-05, "loss": 0.8138, "step": 16832 }, { "epoch": 13.969294605809129, "grad_norm": 29.746768951416016, "learning_rate": 1.4415933609958507e-05, "loss": 1.245, "step": 16833 }, { "epoch": 13.970124481327801, "grad_norm": 20.400115966796875, "learning_rate": 1.441560165975104e-05, "loss": 0.3622, "step": 16834 }, { "epoch": 13.970954356846473, "grad_norm": 48.446746826171875, "learning_rate": 1.4415269709543568e-05, "loss": 0.8781, "step": 16835 }, { "epoch": 13.971784232365145, "grad_norm": 36.83527374267578, "learning_rate": 1.44149377593361e-05, "loss": 0.5646, "step": 16836 }, { "epoch": 13.972614107883818, "grad_norm": 43.45823287963867, "learning_rate": 1.4414605809128632e-05, "loss": 1.1841, "step": 16837 }, { "epoch": 13.97344398340249, "grad_norm": 49.01694107055664, "learning_rate": 1.4414273858921164e-05, "loss": 0.9529, "step": 16838 }, { "epoch": 13.974273858921162, "grad_norm": 45.51223373413086, "learning_rate": 1.4413941908713693e-05, "loss": 1.3599, "step": 16839 }, { "epoch": 13.975103734439834, "grad_norm": 24.184154510498047, "learning_rate": 1.4413609958506225e-05, "loss": 0.5949, "step": 16840 }, { "epoch": 13.975933609958506, "grad_norm": 32.10837173461914, "learning_rate": 1.4413278008298757e-05, "loss": 0.7061, "step": 16841 }, { "epoch": 13.976763485477179, "grad_norm": 25.778669357299805, "learning_rate": 1.4412946058091288e-05, "loss": 0.6537, "step": 16842 }, { "epoch": 13.97759336099585, "grad_norm": 32.97513961791992, "learning_rate": 1.4412614107883818e-05, "loss": 0.7917, "step": 16843 }, { "epoch": 13.978423236514523, "grad_norm": 56.030174255371094, "learning_rate": 1.4412282157676349e-05, "loss": 0.807, "step": 16844 }, { "epoch": 13.979253112033195, "grad_norm": 27.161293029785156, "learning_rate": 1.441195020746888e-05, "loss": 0.6018, "step": 16845 }, { "epoch": 13.980082987551867, "grad_norm": 40.54700469970703, "learning_rate": 1.4411618257261413e-05, "loss": 0.6371, "step": 16846 }, { "epoch": 13.98091286307054, "grad_norm": 31.59906578063965, "learning_rate": 1.4411286307053942e-05, "loss": 0.9381, "step": 16847 }, { "epoch": 13.981742738589212, "grad_norm": 27.918136596679688, "learning_rate": 1.4410954356846474e-05, "loss": 0.5545, "step": 16848 }, { "epoch": 13.982572614107884, "grad_norm": 28.68439483642578, "learning_rate": 1.4410622406639006e-05, "loss": 0.9043, "step": 16849 }, { "epoch": 13.983402489626556, "grad_norm": 41.676536560058594, "learning_rate": 1.4410290456431538e-05, "loss": 1.525, "step": 16850 }, { "epoch": 13.984232365145228, "grad_norm": 16.94954490661621, "learning_rate": 1.4409958506224067e-05, "loss": 0.4774, "step": 16851 }, { "epoch": 13.9850622406639, "grad_norm": 17.79486846923828, "learning_rate": 1.4409626556016599e-05, "loss": 0.6796, "step": 16852 }, { "epoch": 13.985892116182573, "grad_norm": 70.72628784179688, "learning_rate": 1.4409294605809129e-05, "loss": 0.8394, "step": 16853 }, { "epoch": 13.986721991701245, "grad_norm": 74.60723114013672, "learning_rate": 1.4408962655601661e-05, "loss": 0.6696, "step": 16854 }, { "epoch": 13.987551867219917, "grad_norm": 44.520660400390625, "learning_rate": 1.4408630705394192e-05, "loss": 1.0918, "step": 16855 }, { "epoch": 13.98838174273859, "grad_norm": 25.103294372558594, "learning_rate": 1.4408298755186722e-05, "loss": 0.5889, "step": 16856 }, { "epoch": 13.989211618257261, "grad_norm": 42.06807327270508, "learning_rate": 1.4407966804979254e-05, "loss": 0.8304, "step": 16857 }, { "epoch": 13.990041493775934, "grad_norm": 38.64164352416992, "learning_rate": 1.4407634854771786e-05, "loss": 1.2603, "step": 16858 }, { "epoch": 13.990871369294606, "grad_norm": 46.6401252746582, "learning_rate": 1.4407302904564318e-05, "loss": 1.2216, "step": 16859 }, { "epoch": 13.991701244813278, "grad_norm": 25.730939865112305, "learning_rate": 1.4406970954356847e-05, "loss": 0.8166, "step": 16860 }, { "epoch": 13.99253112033195, "grad_norm": 23.821041107177734, "learning_rate": 1.440663900414938e-05, "loss": 0.5597, "step": 16861 }, { "epoch": 13.993360995850622, "grad_norm": 25.625022888183594, "learning_rate": 1.440630705394191e-05, "loss": 0.5206, "step": 16862 }, { "epoch": 13.994190871369295, "grad_norm": 24.98287582397461, "learning_rate": 1.4405975103734442e-05, "loss": 0.6482, "step": 16863 }, { "epoch": 13.995020746887967, "grad_norm": 60.48137283325195, "learning_rate": 1.4405643153526972e-05, "loss": 0.7777, "step": 16864 }, { "epoch": 13.995850622406639, "grad_norm": 21.213693618774414, "learning_rate": 1.4405311203319503e-05, "loss": 0.5236, "step": 16865 }, { "epoch": 13.996680497925311, "grad_norm": 51.329063415527344, "learning_rate": 1.4404979253112035e-05, "loss": 0.6561, "step": 16866 }, { "epoch": 13.997510373443983, "grad_norm": 25.336576461791992, "learning_rate": 1.4404647302904567e-05, "loss": 0.8998, "step": 16867 }, { "epoch": 13.998340248962656, "grad_norm": 32.4039306640625, "learning_rate": 1.4404315352697096e-05, "loss": 1.0144, "step": 16868 }, { "epoch": 13.999170124481328, "grad_norm": 50.54963684082031, "learning_rate": 1.4403983402489628e-05, "loss": 1.2206, "step": 16869 }, { "epoch": 14.0, "grad_norm": 35.162349700927734, "learning_rate": 1.440365145228216e-05, "loss": 1.0113, "step": 16870 }, { "epoch": 14.000829875518672, "grad_norm": 33.16037368774414, "learning_rate": 1.440331950207469e-05, "loss": 0.5594, "step": 16871 }, { "epoch": 14.001659751037344, "grad_norm": 24.62651252746582, "learning_rate": 1.440298755186722e-05, "loss": 0.6931, "step": 16872 }, { "epoch": 14.002489626556017, "grad_norm": 44.092552185058594, "learning_rate": 1.4402655601659753e-05, "loss": 1.0552, "step": 16873 }, { "epoch": 14.003319502074689, "grad_norm": 31.295988082885742, "learning_rate": 1.4402323651452283e-05, "loss": 0.9081, "step": 16874 }, { "epoch": 14.004149377593361, "grad_norm": 31.378890991210938, "learning_rate": 1.4401991701244815e-05, "loss": 0.9395, "step": 16875 }, { "epoch": 14.004979253112033, "grad_norm": 13.04053020477295, "learning_rate": 1.4401659751037344e-05, "loss": 0.3829, "step": 16876 }, { "epoch": 14.005809128630705, "grad_norm": 30.711212158203125, "learning_rate": 1.4401327800829876e-05, "loss": 1.1986, "step": 16877 }, { "epoch": 14.006639004149378, "grad_norm": 44.313846588134766, "learning_rate": 1.4400995850622408e-05, "loss": 0.978, "step": 16878 }, { "epoch": 14.00746887966805, "grad_norm": 44.293663024902344, "learning_rate": 1.440066390041494e-05, "loss": 1.2485, "step": 16879 }, { "epoch": 14.008298755186722, "grad_norm": 96.24246215820312, "learning_rate": 1.4400331950207469e-05, "loss": 0.5892, "step": 16880 }, { "epoch": 14.009128630705394, "grad_norm": 33.0840950012207, "learning_rate": 1.4400000000000001e-05, "loss": 1.1199, "step": 16881 }, { "epoch": 14.009958506224066, "grad_norm": 23.366744995117188, "learning_rate": 1.4399668049792531e-05, "loss": 0.5411, "step": 16882 }, { "epoch": 14.010788381742739, "grad_norm": 28.48468017578125, "learning_rate": 1.4399336099585064e-05, "loss": 0.6845, "step": 16883 }, { "epoch": 14.01161825726141, "grad_norm": 28.496131896972656, "learning_rate": 1.4399004149377596e-05, "loss": 0.584, "step": 16884 }, { "epoch": 14.012448132780083, "grad_norm": 33.24047088623047, "learning_rate": 1.4398672199170124e-05, "loss": 0.4469, "step": 16885 }, { "epoch": 14.013278008298755, "grad_norm": 27.68849754333496, "learning_rate": 1.4398340248962656e-05, "loss": 0.7726, "step": 16886 }, { "epoch": 14.014107883817427, "grad_norm": 21.632978439331055, "learning_rate": 1.4398008298755189e-05, "loss": 0.7938, "step": 16887 }, { "epoch": 14.0149377593361, "grad_norm": 34.02706527709961, "learning_rate": 1.439767634854772e-05, "loss": 0.8001, "step": 16888 }, { "epoch": 14.015767634854772, "grad_norm": 29.848726272583008, "learning_rate": 1.439734439834025e-05, "loss": 0.4747, "step": 16889 }, { "epoch": 14.016597510373444, "grad_norm": 37.869754791259766, "learning_rate": 1.4397012448132782e-05, "loss": 0.9298, "step": 16890 }, { "epoch": 14.017427385892116, "grad_norm": 25.054683685302734, "learning_rate": 1.4396680497925312e-05, "loss": 0.5378, "step": 16891 }, { "epoch": 14.018257261410788, "grad_norm": 24.57187843322754, "learning_rate": 1.4396348547717844e-05, "loss": 0.5526, "step": 16892 }, { "epoch": 14.01908713692946, "grad_norm": 30.91054344177246, "learning_rate": 1.4396016597510374e-05, "loss": 0.4746, "step": 16893 }, { "epoch": 14.019917012448133, "grad_norm": 33.85954666137695, "learning_rate": 1.4395684647302905e-05, "loss": 0.6528, "step": 16894 }, { "epoch": 14.020746887966805, "grad_norm": 45.71002197265625, "learning_rate": 1.4395352697095437e-05, "loss": 0.843, "step": 16895 }, { "epoch": 14.021576763485477, "grad_norm": 62.327266693115234, "learning_rate": 1.4395020746887969e-05, "loss": 0.4169, "step": 16896 }, { "epoch": 14.02240663900415, "grad_norm": 19.4583683013916, "learning_rate": 1.4394688796680498e-05, "loss": 0.6074, "step": 16897 }, { "epoch": 14.023236514522821, "grad_norm": 69.34488677978516, "learning_rate": 1.439435684647303e-05, "loss": 0.8524, "step": 16898 }, { "epoch": 14.024066390041494, "grad_norm": 46.950836181640625, "learning_rate": 1.4394024896265562e-05, "loss": 1.194, "step": 16899 }, { "epoch": 14.024896265560166, "grad_norm": 43.454105377197266, "learning_rate": 1.4393692946058092e-05, "loss": 0.7275, "step": 16900 }, { "epoch": 14.025726141078838, "grad_norm": 29.546630859375, "learning_rate": 1.4393360995850623e-05, "loss": 0.4375, "step": 16901 }, { "epoch": 14.02655601659751, "grad_norm": 53.61839294433594, "learning_rate": 1.4393029045643155e-05, "loss": 0.5597, "step": 16902 }, { "epoch": 14.027385892116182, "grad_norm": 32.86001205444336, "learning_rate": 1.4392697095435685e-05, "loss": 0.9811, "step": 16903 }, { "epoch": 14.028215767634855, "grad_norm": 52.26194763183594, "learning_rate": 1.4392365145228217e-05, "loss": 1.1425, "step": 16904 }, { "epoch": 14.029045643153527, "grad_norm": 81.43724822998047, "learning_rate": 1.4392033195020746e-05, "loss": 0.6276, "step": 16905 }, { "epoch": 14.029875518672199, "grad_norm": 26.469676971435547, "learning_rate": 1.4391701244813278e-05, "loss": 0.6375, "step": 16906 }, { "epoch": 14.030705394190871, "grad_norm": 20.345006942749023, "learning_rate": 1.439136929460581e-05, "loss": 0.4175, "step": 16907 }, { "epoch": 14.031535269709543, "grad_norm": 42.68903732299805, "learning_rate": 1.4391037344398343e-05, "loss": 1.0192, "step": 16908 }, { "epoch": 14.032365145228216, "grad_norm": 48.32445526123047, "learning_rate": 1.4390705394190873e-05, "loss": 0.6453, "step": 16909 }, { "epoch": 14.033195020746888, "grad_norm": 48.93073272705078, "learning_rate": 1.4390373443983403e-05, "loss": 1.1119, "step": 16910 }, { "epoch": 14.03402489626556, "grad_norm": 38.771629333496094, "learning_rate": 1.4390041493775935e-05, "loss": 1.0189, "step": 16911 }, { "epoch": 14.034854771784232, "grad_norm": 47.03920364379883, "learning_rate": 1.4389709543568466e-05, "loss": 0.7508, "step": 16912 }, { "epoch": 14.035684647302904, "grad_norm": 39.51985549926758, "learning_rate": 1.4389377593360998e-05, "loss": 0.7465, "step": 16913 }, { "epoch": 14.036514522821577, "grad_norm": 33.36824417114258, "learning_rate": 1.4389045643153527e-05, "loss": 0.7084, "step": 16914 }, { "epoch": 14.037344398340249, "grad_norm": 102.8520736694336, "learning_rate": 1.4388713692946059e-05, "loss": 0.6572, "step": 16915 }, { "epoch": 14.038174273858921, "grad_norm": 20.747802734375, "learning_rate": 1.4388381742738591e-05, "loss": 0.4367, "step": 16916 }, { "epoch": 14.039004149377593, "grad_norm": 42.09752655029297, "learning_rate": 1.4388049792531123e-05, "loss": 1.0494, "step": 16917 }, { "epoch": 14.039834024896265, "grad_norm": 50.53681564331055, "learning_rate": 1.4387717842323652e-05, "loss": 1.4613, "step": 16918 }, { "epoch": 14.040663900414938, "grad_norm": 40.327266693115234, "learning_rate": 1.4387385892116184e-05, "loss": 1.1621, "step": 16919 }, { "epoch": 14.04149377593361, "grad_norm": 14.414925575256348, "learning_rate": 1.4387053941908716e-05, "loss": 0.2957, "step": 16920 }, { "epoch": 14.042323651452282, "grad_norm": 73.83399963378906, "learning_rate": 1.4386721991701246e-05, "loss": 0.9125, "step": 16921 }, { "epoch": 14.043153526970954, "grad_norm": 32.190147399902344, "learning_rate": 1.4386390041493777e-05, "loss": 0.9771, "step": 16922 }, { "epoch": 14.043983402489626, "grad_norm": 32.50409698486328, "learning_rate": 1.4386058091286307e-05, "loss": 0.4613, "step": 16923 }, { "epoch": 14.044813278008299, "grad_norm": 67.9633560180664, "learning_rate": 1.438572614107884e-05, "loss": 0.6632, "step": 16924 }, { "epoch": 14.04564315352697, "grad_norm": 76.83466339111328, "learning_rate": 1.4385394190871371e-05, "loss": 0.9783, "step": 16925 }, { "epoch": 14.046473029045643, "grad_norm": 30.938444137573242, "learning_rate": 1.43850622406639e-05, "loss": 0.8906, "step": 16926 }, { "epoch": 14.047302904564315, "grad_norm": 29.566762924194336, "learning_rate": 1.4384730290456432e-05, "loss": 0.9719, "step": 16927 }, { "epoch": 14.048132780082987, "grad_norm": 66.81951904296875, "learning_rate": 1.4384398340248964e-05, "loss": 1.066, "step": 16928 }, { "epoch": 14.04896265560166, "grad_norm": 57.10285186767578, "learning_rate": 1.4384066390041496e-05, "loss": 0.7791, "step": 16929 }, { "epoch": 14.049792531120332, "grad_norm": 28.745969772338867, "learning_rate": 1.4383734439834025e-05, "loss": 0.7435, "step": 16930 }, { "epoch": 14.050622406639004, "grad_norm": 30.221454620361328, "learning_rate": 1.4383402489626557e-05, "loss": 0.809, "step": 16931 }, { "epoch": 14.051452282157676, "grad_norm": 48.95492172241211, "learning_rate": 1.4383070539419088e-05, "loss": 0.9198, "step": 16932 }, { "epoch": 14.052282157676348, "grad_norm": 76.36296081542969, "learning_rate": 1.438273858921162e-05, "loss": 0.5015, "step": 16933 }, { "epoch": 14.05311203319502, "grad_norm": 23.239707946777344, "learning_rate": 1.438240663900415e-05, "loss": 0.4184, "step": 16934 }, { "epoch": 14.053941908713693, "grad_norm": 34.32476043701172, "learning_rate": 1.438207468879668e-05, "loss": 1.0405, "step": 16935 }, { "epoch": 14.054771784232365, "grad_norm": 101.73912048339844, "learning_rate": 1.4381742738589213e-05, "loss": 0.713, "step": 16936 }, { "epoch": 14.055601659751037, "grad_norm": 44.362483978271484, "learning_rate": 1.4381410788381745e-05, "loss": 1.2237, "step": 16937 }, { "epoch": 14.05643153526971, "grad_norm": 49.51325988769531, "learning_rate": 1.4381078838174275e-05, "loss": 0.5822, "step": 16938 }, { "epoch": 14.057261410788382, "grad_norm": 18.986103057861328, "learning_rate": 1.4380746887966806e-05, "loss": 0.2853, "step": 16939 }, { "epoch": 14.058091286307054, "grad_norm": 26.323877334594727, "learning_rate": 1.4380414937759338e-05, "loss": 0.5719, "step": 16940 }, { "epoch": 14.058921161825726, "grad_norm": 44.74270248413086, "learning_rate": 1.4380082987551868e-05, "loss": 0.8649, "step": 16941 }, { "epoch": 14.059751037344398, "grad_norm": 81.42604064941406, "learning_rate": 1.43797510373444e-05, "loss": 0.6586, "step": 16942 }, { "epoch": 14.06058091286307, "grad_norm": 31.591272354125977, "learning_rate": 1.437941908713693e-05, "loss": 0.6669, "step": 16943 }, { "epoch": 14.061410788381743, "grad_norm": 41.53738784790039, "learning_rate": 1.4379087136929461e-05, "loss": 1.1472, "step": 16944 }, { "epoch": 14.062240663900415, "grad_norm": 78.69261169433594, "learning_rate": 1.4378755186721993e-05, "loss": 0.8286, "step": 16945 }, { "epoch": 14.063070539419087, "grad_norm": 28.65282440185547, "learning_rate": 1.4378423236514525e-05, "loss": 0.7584, "step": 16946 }, { "epoch": 14.063900414937759, "grad_norm": 31.425336837768555, "learning_rate": 1.4378091286307054e-05, "loss": 0.9493, "step": 16947 }, { "epoch": 14.064730290456431, "grad_norm": 33.5496940612793, "learning_rate": 1.4377759336099586e-05, "loss": 0.3458, "step": 16948 }, { "epoch": 14.065560165975104, "grad_norm": 25.57681655883789, "learning_rate": 1.4377427385892118e-05, "loss": 0.5267, "step": 16949 }, { "epoch": 14.066390041493776, "grad_norm": 27.768293380737305, "learning_rate": 1.4377095435684649e-05, "loss": 0.8508, "step": 16950 }, { "epoch": 14.067219917012448, "grad_norm": 62.48857879638672, "learning_rate": 1.4376763485477179e-05, "loss": 0.7388, "step": 16951 }, { "epoch": 14.06804979253112, "grad_norm": 38.25503158569336, "learning_rate": 1.437643153526971e-05, "loss": 0.6705, "step": 16952 }, { "epoch": 14.068879668049792, "grad_norm": 58.30080795288086, "learning_rate": 1.4376099585062242e-05, "loss": 0.4606, "step": 16953 }, { "epoch": 14.069709543568464, "grad_norm": 38.70116424560547, "learning_rate": 1.4375767634854774e-05, "loss": 0.5962, "step": 16954 }, { "epoch": 14.070539419087137, "grad_norm": 45.5283317565918, "learning_rate": 1.4375435684647302e-05, "loss": 0.7676, "step": 16955 }, { "epoch": 14.071369294605809, "grad_norm": 14.667092323303223, "learning_rate": 1.4375103734439835e-05, "loss": 0.3549, "step": 16956 }, { "epoch": 14.072199170124481, "grad_norm": 32.89346694946289, "learning_rate": 1.4374771784232367e-05, "loss": 0.5467, "step": 16957 }, { "epoch": 14.073029045643153, "grad_norm": 20.363447189331055, "learning_rate": 1.4374439834024899e-05, "loss": 0.4443, "step": 16958 }, { "epoch": 14.073858921161825, "grad_norm": 15.022452354431152, "learning_rate": 1.4374107883817428e-05, "loss": 0.3956, "step": 16959 }, { "epoch": 14.074688796680498, "grad_norm": 39.421226501464844, "learning_rate": 1.437377593360996e-05, "loss": 0.7161, "step": 16960 }, { "epoch": 14.07551867219917, "grad_norm": 32.10633087158203, "learning_rate": 1.437344398340249e-05, "loss": 0.7325, "step": 16961 }, { "epoch": 14.076348547717842, "grad_norm": 79.73303985595703, "learning_rate": 1.4373112033195022e-05, "loss": 0.6162, "step": 16962 }, { "epoch": 14.077178423236514, "grad_norm": 35.41780090332031, "learning_rate": 1.4372780082987554e-05, "loss": 0.5709, "step": 16963 }, { "epoch": 14.078008298755186, "grad_norm": 63.69845962524414, "learning_rate": 1.4372448132780083e-05, "loss": 0.8598, "step": 16964 }, { "epoch": 14.078838174273859, "grad_norm": 42.780513763427734, "learning_rate": 1.4372116182572615e-05, "loss": 0.785, "step": 16965 }, { "epoch": 14.07966804979253, "grad_norm": 32.828556060791016, "learning_rate": 1.4371784232365147e-05, "loss": 0.5128, "step": 16966 }, { "epoch": 14.080497925311203, "grad_norm": 71.88509368896484, "learning_rate": 1.437145228215768e-05, "loss": 0.8127, "step": 16967 }, { "epoch": 14.081327800829875, "grad_norm": 28.68441390991211, "learning_rate": 1.4371120331950208e-05, "loss": 0.8771, "step": 16968 }, { "epoch": 14.082157676348547, "grad_norm": 66.02500915527344, "learning_rate": 1.437078838174274e-05, "loss": 0.7145, "step": 16969 }, { "epoch": 14.08298755186722, "grad_norm": 56.829036712646484, "learning_rate": 1.437045643153527e-05, "loss": 0.6179, "step": 16970 }, { "epoch": 14.083817427385892, "grad_norm": 67.2924575805664, "learning_rate": 1.4370124481327803e-05, "loss": 0.963, "step": 16971 }, { "epoch": 14.084647302904564, "grad_norm": 52.292625427246094, "learning_rate": 1.4369792531120333e-05, "loss": 0.8567, "step": 16972 }, { "epoch": 14.085477178423236, "grad_norm": 34.39386749267578, "learning_rate": 1.4369460580912863e-05, "loss": 0.5783, "step": 16973 }, { "epoch": 14.086307053941908, "grad_norm": 36.48178482055664, "learning_rate": 1.4369128630705396e-05, "loss": 0.5516, "step": 16974 }, { "epoch": 14.08713692946058, "grad_norm": 33.51902389526367, "learning_rate": 1.4368796680497928e-05, "loss": 0.8347, "step": 16975 }, { "epoch": 14.087966804979253, "grad_norm": 30.209463119506836, "learning_rate": 1.4368464730290456e-05, "loss": 0.7199, "step": 16976 }, { "epoch": 14.088796680497925, "grad_norm": 46.87710952758789, "learning_rate": 1.4368132780082989e-05, "loss": 1.1881, "step": 16977 }, { "epoch": 14.089626556016597, "grad_norm": 20.27918815612793, "learning_rate": 1.436780082987552e-05, "loss": 0.3575, "step": 16978 }, { "epoch": 14.09045643153527, "grad_norm": 17.77094268798828, "learning_rate": 1.4367468879668051e-05, "loss": 0.4214, "step": 16979 }, { "epoch": 14.091286307053942, "grad_norm": 42.68156433105469, "learning_rate": 1.4367136929460581e-05, "loss": 1.3435, "step": 16980 }, { "epoch": 14.092116182572614, "grad_norm": 83.87068939208984, "learning_rate": 1.4366804979253114e-05, "loss": 0.7539, "step": 16981 }, { "epoch": 14.092946058091286, "grad_norm": 13.558157920837402, "learning_rate": 1.4366473029045644e-05, "loss": 0.277, "step": 16982 }, { "epoch": 14.093775933609958, "grad_norm": 49.81568908691406, "learning_rate": 1.4366141078838176e-05, "loss": 0.9239, "step": 16983 }, { "epoch": 14.09460580912863, "grad_norm": 32.374053955078125, "learning_rate": 1.4365809128630705e-05, "loss": 0.7547, "step": 16984 }, { "epoch": 14.095435684647303, "grad_norm": 30.627164840698242, "learning_rate": 1.4365477178423237e-05, "loss": 0.9869, "step": 16985 }, { "epoch": 14.096265560165975, "grad_norm": 35.90603256225586, "learning_rate": 1.4365145228215769e-05, "loss": 1.0163, "step": 16986 }, { "epoch": 14.097095435684647, "grad_norm": 35.959510803222656, "learning_rate": 1.4364813278008301e-05, "loss": 1.0281, "step": 16987 }, { "epoch": 14.09792531120332, "grad_norm": 35.71550369262695, "learning_rate": 1.4364481327800832e-05, "loss": 0.6855, "step": 16988 }, { "epoch": 14.098755186721991, "grad_norm": 20.07259178161621, "learning_rate": 1.4364149377593362e-05, "loss": 0.5044, "step": 16989 }, { "epoch": 14.099585062240664, "grad_norm": 33.4423942565918, "learning_rate": 1.4363817427385894e-05, "loss": 1.0282, "step": 16990 }, { "epoch": 14.100414937759336, "grad_norm": 24.108911514282227, "learning_rate": 1.4363485477178424e-05, "loss": 0.4757, "step": 16991 }, { "epoch": 14.101244813278008, "grad_norm": 39.29739761352539, "learning_rate": 1.4363153526970957e-05, "loss": 0.9572, "step": 16992 }, { "epoch": 14.10207468879668, "grad_norm": 33.555702209472656, "learning_rate": 1.4362821576763485e-05, "loss": 0.6092, "step": 16993 }, { "epoch": 14.102904564315352, "grad_norm": 20.72306251525879, "learning_rate": 1.4362489626556017e-05, "loss": 0.4327, "step": 16994 }, { "epoch": 14.103734439834025, "grad_norm": 31.030454635620117, "learning_rate": 1.436215767634855e-05, "loss": 0.761, "step": 16995 }, { "epoch": 14.104564315352697, "grad_norm": 45.93869400024414, "learning_rate": 1.4361825726141082e-05, "loss": 0.7529, "step": 16996 }, { "epoch": 14.105394190871369, "grad_norm": 43.44696807861328, "learning_rate": 1.436149377593361e-05, "loss": 1.3368, "step": 16997 }, { "epoch": 14.106224066390041, "grad_norm": 58.41657638549805, "learning_rate": 1.4361161825726142e-05, "loss": 0.3383, "step": 16998 }, { "epoch": 14.107053941908713, "grad_norm": 36.334556579589844, "learning_rate": 1.4360829875518673e-05, "loss": 0.652, "step": 16999 }, { "epoch": 14.107883817427386, "grad_norm": 123.99468994140625, "learning_rate": 1.4360497925311205e-05, "loss": 0.7548, "step": 17000 }, { "epoch": 14.108713692946058, "grad_norm": 43.3160514831543, "learning_rate": 1.4360165975103735e-05, "loss": 0.3577, "step": 17001 }, { "epoch": 14.10954356846473, "grad_norm": 57.64722442626953, "learning_rate": 1.4359834024896266e-05, "loss": 0.6059, "step": 17002 }, { "epoch": 14.110373443983402, "grad_norm": 36.183685302734375, "learning_rate": 1.4359502074688798e-05, "loss": 0.5135, "step": 17003 }, { "epoch": 14.111203319502074, "grad_norm": 51.06283950805664, "learning_rate": 1.435917012448133e-05, "loss": 1.0142, "step": 17004 }, { "epoch": 14.112033195020746, "grad_norm": 70.74353790283203, "learning_rate": 1.4358838174273859e-05, "loss": 0.2749, "step": 17005 }, { "epoch": 14.112863070539419, "grad_norm": 41.33143997192383, "learning_rate": 1.435850622406639e-05, "loss": 0.5257, "step": 17006 }, { "epoch": 14.11369294605809, "grad_norm": 61.636661529541016, "learning_rate": 1.4358174273858923e-05, "loss": 1.2291, "step": 17007 }, { "epoch": 14.114522821576763, "grad_norm": 107.84270477294922, "learning_rate": 1.4357842323651453e-05, "loss": 0.7925, "step": 17008 }, { "epoch": 14.115352697095435, "grad_norm": 20.891948699951172, "learning_rate": 1.4357510373443984e-05, "loss": 0.3588, "step": 17009 }, { "epoch": 14.116182572614107, "grad_norm": 30.821884155273438, "learning_rate": 1.4357178423236516e-05, "loss": 0.3112, "step": 17010 }, { "epoch": 14.11701244813278, "grad_norm": 32.507896423339844, "learning_rate": 1.4356846473029046e-05, "loss": 0.6797, "step": 17011 }, { "epoch": 14.117842323651452, "grad_norm": 19.6857852935791, "learning_rate": 1.4356514522821578e-05, "loss": 0.5173, "step": 17012 }, { "epoch": 14.118672199170124, "grad_norm": 53.203636169433594, "learning_rate": 1.4356182572614109e-05, "loss": 1.1477, "step": 17013 }, { "epoch": 14.119502074688796, "grad_norm": 25.07965660095215, "learning_rate": 1.435585062240664e-05, "loss": 0.5682, "step": 17014 }, { "epoch": 14.120331950207468, "grad_norm": 61.33209991455078, "learning_rate": 1.4355518672199171e-05, "loss": 0.8061, "step": 17015 }, { "epoch": 14.12116182572614, "grad_norm": 25.980276107788086, "learning_rate": 1.4355186721991703e-05, "loss": 0.6069, "step": 17016 }, { "epoch": 14.121991701244813, "grad_norm": 22.466598510742188, "learning_rate": 1.4354854771784234e-05, "loss": 0.5649, "step": 17017 }, { "epoch": 14.122821576763485, "grad_norm": 26.535322189331055, "learning_rate": 1.4354522821576764e-05, "loss": 0.4319, "step": 17018 }, { "epoch": 14.123651452282157, "grad_norm": 31.98516845703125, "learning_rate": 1.4354190871369296e-05, "loss": 0.9606, "step": 17019 }, { "epoch": 14.12448132780083, "grad_norm": 29.696224212646484, "learning_rate": 1.4353858921161827e-05, "loss": 0.5831, "step": 17020 }, { "epoch": 14.125311203319502, "grad_norm": 14.27514362335205, "learning_rate": 1.4353526970954359e-05, "loss": 0.3538, "step": 17021 }, { "epoch": 14.126141078838174, "grad_norm": 67.95591735839844, "learning_rate": 1.4353195020746888e-05, "loss": 0.4961, "step": 17022 }, { "epoch": 14.126970954356846, "grad_norm": 28.390758514404297, "learning_rate": 1.435286307053942e-05, "loss": 0.4466, "step": 17023 }, { "epoch": 14.127800829875518, "grad_norm": 39.320499420166016, "learning_rate": 1.4352531120331952e-05, "loss": 0.783, "step": 17024 }, { "epoch": 14.12863070539419, "grad_norm": 78.39484405517578, "learning_rate": 1.4352199170124484e-05, "loss": 0.6251, "step": 17025 }, { "epoch": 14.129460580912863, "grad_norm": 36.58730697631836, "learning_rate": 1.4351867219917013e-05, "loss": 0.6719, "step": 17026 }, { "epoch": 14.130290456431535, "grad_norm": 48.6446647644043, "learning_rate": 1.4351535269709545e-05, "loss": 1.0139, "step": 17027 }, { "epoch": 14.131120331950207, "grad_norm": 28.888410568237305, "learning_rate": 1.4351203319502077e-05, "loss": 0.8978, "step": 17028 }, { "epoch": 14.13195020746888, "grad_norm": 25.977746963500977, "learning_rate": 1.4350871369294607e-05, "loss": 0.67, "step": 17029 }, { "epoch": 14.132780082987551, "grad_norm": 27.656871795654297, "learning_rate": 1.4350539419087138e-05, "loss": 0.479, "step": 17030 }, { "epoch": 14.133609958506224, "grad_norm": 29.1475830078125, "learning_rate": 1.4350207468879668e-05, "loss": 0.7792, "step": 17031 }, { "epoch": 14.134439834024896, "grad_norm": 52.0392951965332, "learning_rate": 1.43498755186722e-05, "loss": 1.0663, "step": 17032 }, { "epoch": 14.135269709543568, "grad_norm": 30.469411849975586, "learning_rate": 1.4349543568464732e-05, "loss": 1.1236, "step": 17033 }, { "epoch": 14.13609958506224, "grad_norm": 28.02646827697754, "learning_rate": 1.4349211618257261e-05, "loss": 0.6945, "step": 17034 }, { "epoch": 14.136929460580912, "grad_norm": 41.42253875732422, "learning_rate": 1.4348879668049793e-05, "loss": 0.6655, "step": 17035 }, { "epoch": 14.137759336099585, "grad_norm": 22.384042739868164, "learning_rate": 1.4348547717842325e-05, "loss": 0.9436, "step": 17036 }, { "epoch": 14.138589211618257, "grad_norm": 38.930633544921875, "learning_rate": 1.4348215767634857e-05, "loss": 0.6048, "step": 17037 }, { "epoch": 14.139419087136929, "grad_norm": 14.907967567443848, "learning_rate": 1.4347883817427386e-05, "loss": 0.3245, "step": 17038 }, { "epoch": 14.140248962655601, "grad_norm": 27.76801300048828, "learning_rate": 1.4347551867219918e-05, "loss": 0.6274, "step": 17039 }, { "epoch": 14.141078838174273, "grad_norm": 36.528106689453125, "learning_rate": 1.4347219917012449e-05, "loss": 1.3826, "step": 17040 }, { "epoch": 14.141908713692946, "grad_norm": 20.917461395263672, "learning_rate": 1.434688796680498e-05, "loss": 0.4785, "step": 17041 }, { "epoch": 14.142738589211618, "grad_norm": 122.30091094970703, "learning_rate": 1.4346556016597513e-05, "loss": 1.0787, "step": 17042 }, { "epoch": 14.14356846473029, "grad_norm": 65.97027587890625, "learning_rate": 1.4346224066390042e-05, "loss": 1.0944, "step": 17043 }, { "epoch": 14.144398340248962, "grad_norm": 155.7740478515625, "learning_rate": 1.4345892116182574e-05, "loss": 0.4128, "step": 17044 }, { "epoch": 14.145228215767634, "grad_norm": 25.16229820251465, "learning_rate": 1.4345560165975106e-05, "loss": 0.6238, "step": 17045 }, { "epoch": 14.146058091286307, "grad_norm": 46.15010452270508, "learning_rate": 1.4345228215767638e-05, "loss": 0.6591, "step": 17046 }, { "epoch": 14.146887966804979, "grad_norm": 20.81608009338379, "learning_rate": 1.4344896265560167e-05, "loss": 0.2752, "step": 17047 }, { "epoch": 14.147717842323651, "grad_norm": 39.100467681884766, "learning_rate": 1.4344564315352699e-05, "loss": 0.8297, "step": 17048 }, { "epoch": 14.148547717842323, "grad_norm": 31.65302276611328, "learning_rate": 1.4344232365145229e-05, "loss": 0.5197, "step": 17049 }, { "epoch": 14.149377593360995, "grad_norm": 46.4171142578125, "learning_rate": 1.4343900414937761e-05, "loss": 0.8762, "step": 17050 }, { "epoch": 14.150207468879668, "grad_norm": 90.7409896850586, "learning_rate": 1.4343568464730292e-05, "loss": 0.7628, "step": 17051 }, { "epoch": 14.15103734439834, "grad_norm": 18.727022171020508, "learning_rate": 1.4343236514522822e-05, "loss": 0.2968, "step": 17052 }, { "epoch": 14.151867219917012, "grad_norm": 41.927371978759766, "learning_rate": 1.4342904564315354e-05, "loss": 0.8219, "step": 17053 }, { "epoch": 14.152697095435684, "grad_norm": 20.475582122802734, "learning_rate": 1.4342572614107886e-05, "loss": 0.5546, "step": 17054 }, { "epoch": 14.153526970954356, "grad_norm": 29.00655174255371, "learning_rate": 1.4342240663900415e-05, "loss": 0.4318, "step": 17055 }, { "epoch": 14.154356846473028, "grad_norm": 26.437307357788086, "learning_rate": 1.4341908713692947e-05, "loss": 0.584, "step": 17056 }, { "epoch": 14.1551867219917, "grad_norm": 54.452476501464844, "learning_rate": 1.434157676348548e-05, "loss": 1.0232, "step": 17057 }, { "epoch": 14.156016597510373, "grad_norm": 51.25889205932617, "learning_rate": 1.434124481327801e-05, "loss": 1.03, "step": 17058 }, { "epoch": 14.156846473029045, "grad_norm": 19.216487884521484, "learning_rate": 1.434091286307054e-05, "loss": 0.7041, "step": 17059 }, { "epoch": 14.157676348547717, "grad_norm": 34.823299407958984, "learning_rate": 1.4340580912863072e-05, "loss": 0.6461, "step": 17060 }, { "epoch": 14.15850622406639, "grad_norm": 63.9906120300293, "learning_rate": 1.4340248962655603e-05, "loss": 1.0344, "step": 17061 }, { "epoch": 14.159336099585062, "grad_norm": 32.20780563354492, "learning_rate": 1.4339917012448135e-05, "loss": 0.4639, "step": 17062 }, { "epoch": 14.160165975103734, "grad_norm": 21.708690643310547, "learning_rate": 1.4339585062240663e-05, "loss": 0.4303, "step": 17063 }, { "epoch": 14.160995850622406, "grad_norm": 49.117366790771484, "learning_rate": 1.4339253112033195e-05, "loss": 1.0449, "step": 17064 }, { "epoch": 14.161825726141078, "grad_norm": 32.45524978637695, "learning_rate": 1.4338921161825728e-05, "loss": 0.8652, "step": 17065 }, { "epoch": 14.16265560165975, "grad_norm": 23.357004165649414, "learning_rate": 1.433858921161826e-05, "loss": 0.8852, "step": 17066 }, { "epoch": 14.163485477178423, "grad_norm": 32.92020034790039, "learning_rate": 1.433825726141079e-05, "loss": 0.8832, "step": 17067 }, { "epoch": 14.164315352697095, "grad_norm": 45.94520950317383, "learning_rate": 1.433792531120332e-05, "loss": 1.1984, "step": 17068 }, { "epoch": 14.165145228215767, "grad_norm": 28.467227935791016, "learning_rate": 1.4337593360995851e-05, "loss": 0.6068, "step": 17069 }, { "epoch": 14.16597510373444, "grad_norm": 16.958833694458008, "learning_rate": 1.4337261410788383e-05, "loss": 0.2986, "step": 17070 }, { "epoch": 14.166804979253111, "grad_norm": 68.5755386352539, "learning_rate": 1.4336929460580915e-05, "loss": 1.0918, "step": 17071 }, { "epoch": 14.167634854771784, "grad_norm": 43.90007019042969, "learning_rate": 1.4336597510373444e-05, "loss": 1.1091, "step": 17072 }, { "epoch": 14.168464730290456, "grad_norm": 41.03374481201172, "learning_rate": 1.4336265560165976e-05, "loss": 0.8429, "step": 17073 }, { "epoch": 14.169294605809128, "grad_norm": 47.20672607421875, "learning_rate": 1.4335933609958508e-05, "loss": 0.5646, "step": 17074 }, { "epoch": 14.1701244813278, "grad_norm": 32.1185417175293, "learning_rate": 1.433560165975104e-05, "loss": 0.5682, "step": 17075 }, { "epoch": 14.170954356846472, "grad_norm": 31.926345825195312, "learning_rate": 1.4335269709543569e-05, "loss": 0.3523, "step": 17076 }, { "epoch": 14.171784232365145, "grad_norm": 24.379549026489258, "learning_rate": 1.4334937759336101e-05, "loss": 0.3606, "step": 17077 }, { "epoch": 14.172614107883817, "grad_norm": 92.67494201660156, "learning_rate": 1.4334605809128631e-05, "loss": 0.6191, "step": 17078 }, { "epoch": 14.173443983402489, "grad_norm": 50.596805572509766, "learning_rate": 1.4334273858921164e-05, "loss": 0.6701, "step": 17079 }, { "epoch": 14.174273858921161, "grad_norm": 21.706052780151367, "learning_rate": 1.4333941908713694e-05, "loss": 1.0959, "step": 17080 }, { "epoch": 14.175103734439833, "grad_norm": 72.87464141845703, "learning_rate": 1.4333609958506224e-05, "loss": 0.621, "step": 17081 }, { "epoch": 14.175933609958506, "grad_norm": 35.834625244140625, "learning_rate": 1.4333278008298756e-05, "loss": 1.0971, "step": 17082 }, { "epoch": 14.176763485477178, "grad_norm": 25.597787857055664, "learning_rate": 1.4332946058091289e-05, "loss": 0.6068, "step": 17083 }, { "epoch": 14.17759336099585, "grad_norm": 35.69569396972656, "learning_rate": 1.4332614107883817e-05, "loss": 0.6558, "step": 17084 }, { "epoch": 14.178423236514522, "grad_norm": 37.9462776184082, "learning_rate": 1.433228215767635e-05, "loss": 0.7682, "step": 17085 }, { "epoch": 14.179253112033194, "grad_norm": 61.119285583496094, "learning_rate": 1.4331950207468882e-05, "loss": 0.4464, "step": 17086 }, { "epoch": 14.180082987551867, "grad_norm": 21.526559829711914, "learning_rate": 1.4331618257261412e-05, "loss": 0.381, "step": 17087 }, { "epoch": 14.180912863070539, "grad_norm": 43.8127555847168, "learning_rate": 1.4331286307053942e-05, "loss": 0.4984, "step": 17088 }, { "epoch": 14.181742738589211, "grad_norm": 22.381240844726562, "learning_rate": 1.4330954356846474e-05, "loss": 0.4152, "step": 17089 }, { "epoch": 14.182572614107883, "grad_norm": 25.1195125579834, "learning_rate": 1.4330622406639005e-05, "loss": 0.7662, "step": 17090 }, { "epoch": 14.183402489626555, "grad_norm": 75.60279846191406, "learning_rate": 1.4330290456431537e-05, "loss": 0.7379, "step": 17091 }, { "epoch": 14.184232365145228, "grad_norm": 56.35022735595703, "learning_rate": 1.4329958506224066e-05, "loss": 1.1255, "step": 17092 }, { "epoch": 14.1850622406639, "grad_norm": 18.916406631469727, "learning_rate": 1.4329626556016598e-05, "loss": 0.4402, "step": 17093 }, { "epoch": 14.185892116182572, "grad_norm": 75.43289947509766, "learning_rate": 1.432929460580913e-05, "loss": 1.1182, "step": 17094 }, { "epoch": 14.186721991701244, "grad_norm": 12.369115829467773, "learning_rate": 1.4328962655601662e-05, "loss": 0.3349, "step": 17095 }, { "epoch": 14.187551867219916, "grad_norm": 17.59101104736328, "learning_rate": 1.4328630705394192e-05, "loss": 0.6028, "step": 17096 }, { "epoch": 14.188381742738589, "grad_norm": 55.650596618652344, "learning_rate": 1.4328298755186723e-05, "loss": 0.8171, "step": 17097 }, { "epoch": 14.18921161825726, "grad_norm": 26.357593536376953, "learning_rate": 1.4327966804979255e-05, "loss": 0.7631, "step": 17098 }, { "epoch": 14.190041493775933, "grad_norm": 45.46114730834961, "learning_rate": 1.4327634854771785e-05, "loss": 0.72, "step": 17099 }, { "epoch": 14.190871369294605, "grad_norm": 30.733367919921875, "learning_rate": 1.4327302904564317e-05, "loss": 0.5431, "step": 17100 }, { "epoch": 14.191701244813277, "grad_norm": 58.03046417236328, "learning_rate": 1.4326970954356846e-05, "loss": 0.6988, "step": 17101 }, { "epoch": 14.19253112033195, "grad_norm": 15.887880325317383, "learning_rate": 1.4326639004149378e-05, "loss": 0.3207, "step": 17102 }, { "epoch": 14.193360995850622, "grad_norm": 31.168514251708984, "learning_rate": 1.432630705394191e-05, "loss": 0.8326, "step": 17103 }, { "epoch": 14.194190871369294, "grad_norm": 31.359220504760742, "learning_rate": 1.4325975103734443e-05, "loss": 0.6616, "step": 17104 }, { "epoch": 14.195020746887966, "grad_norm": 49.60015869140625, "learning_rate": 1.4325643153526971e-05, "loss": 0.7997, "step": 17105 }, { "epoch": 14.195850622406638, "grad_norm": 39.418296813964844, "learning_rate": 1.4325311203319503e-05, "loss": 1.4971, "step": 17106 }, { "epoch": 14.19668049792531, "grad_norm": 27.385116577148438, "learning_rate": 1.4324979253112035e-05, "loss": 0.4352, "step": 17107 }, { "epoch": 14.197510373443983, "grad_norm": 54.256568908691406, "learning_rate": 1.4324647302904566e-05, "loss": 1.4896, "step": 17108 }, { "epoch": 14.198340248962655, "grad_norm": 34.24220657348633, "learning_rate": 1.4324315352697096e-05, "loss": 0.8086, "step": 17109 }, { "epoch": 14.199170124481327, "grad_norm": 28.655410766601562, "learning_rate": 1.4323983402489627e-05, "loss": 0.8337, "step": 17110 }, { "epoch": 14.2, "grad_norm": 22.299039840698242, "learning_rate": 1.4323651452282159e-05, "loss": 0.2377, "step": 17111 }, { "epoch": 14.200829875518671, "grad_norm": 47.435672760009766, "learning_rate": 1.4323319502074691e-05, "loss": 0.7154, "step": 17112 }, { "epoch": 14.201659751037344, "grad_norm": 24.89506721496582, "learning_rate": 1.432298755186722e-05, "loss": 0.5272, "step": 17113 }, { "epoch": 14.202489626556016, "grad_norm": 31.33269691467285, "learning_rate": 1.4322655601659752e-05, "loss": 1.1392, "step": 17114 }, { "epoch": 14.203319502074688, "grad_norm": 23.97222900390625, "learning_rate": 1.4322323651452284e-05, "loss": 0.6353, "step": 17115 }, { "epoch": 14.20414937759336, "grad_norm": 23.790193557739258, "learning_rate": 1.4321991701244814e-05, "loss": 0.4307, "step": 17116 }, { "epoch": 14.204979253112032, "grad_norm": 57.87824249267578, "learning_rate": 1.4321659751037345e-05, "loss": 1.6385, "step": 17117 }, { "epoch": 14.205809128630705, "grad_norm": 58.11931610107422, "learning_rate": 1.4321327800829877e-05, "loss": 1.1535, "step": 17118 }, { "epoch": 14.206639004149377, "grad_norm": 26.176706314086914, "learning_rate": 1.4320995850622407e-05, "loss": 0.3557, "step": 17119 }, { "epoch": 14.207468879668049, "grad_norm": 42.86327362060547, "learning_rate": 1.432066390041494e-05, "loss": 0.9547, "step": 17120 }, { "epoch": 14.208298755186721, "grad_norm": 20.489534378051758, "learning_rate": 1.4320331950207471e-05, "loss": 1.1952, "step": 17121 }, { "epoch": 14.209128630705393, "grad_norm": 31.325519561767578, "learning_rate": 1.432e-05, "loss": 0.9323, "step": 17122 }, { "epoch": 14.209958506224066, "grad_norm": 37.5147705078125, "learning_rate": 1.4319668049792532e-05, "loss": 1.4563, "step": 17123 }, { "epoch": 14.210788381742738, "grad_norm": 53.75690841674805, "learning_rate": 1.4319336099585064e-05, "loss": 1.2307, "step": 17124 }, { "epoch": 14.21161825726141, "grad_norm": 34.74467086791992, "learning_rate": 1.4319004149377595e-05, "loss": 1.0314, "step": 17125 }, { "epoch": 14.212448132780082, "grad_norm": 125.83186340332031, "learning_rate": 1.4318672199170125e-05, "loss": 1.3022, "step": 17126 }, { "epoch": 14.213278008298754, "grad_norm": 32.75931167602539, "learning_rate": 1.4318340248962657e-05, "loss": 0.8864, "step": 17127 }, { "epoch": 14.214107883817427, "grad_norm": 41.215919494628906, "learning_rate": 1.4318008298755188e-05, "loss": 1.0106, "step": 17128 }, { "epoch": 14.214937759336099, "grad_norm": 19.546615600585938, "learning_rate": 1.431767634854772e-05, "loss": 0.4704, "step": 17129 }, { "epoch": 14.215767634854771, "grad_norm": 20.294015884399414, "learning_rate": 1.431734439834025e-05, "loss": 0.3803, "step": 17130 }, { "epoch": 14.216597510373443, "grad_norm": 43.50775909423828, "learning_rate": 1.431701244813278e-05, "loss": 0.7396, "step": 17131 }, { "epoch": 14.217427385892115, "grad_norm": 32.52995681762695, "learning_rate": 1.4316680497925313e-05, "loss": 0.8465, "step": 17132 }, { "epoch": 14.218257261410788, "grad_norm": 39.74017333984375, "learning_rate": 1.4316348547717845e-05, "loss": 1.2809, "step": 17133 }, { "epoch": 14.21908713692946, "grad_norm": 47.39471435546875, "learning_rate": 1.4316016597510374e-05, "loss": 1.0008, "step": 17134 }, { "epoch": 14.219917012448132, "grad_norm": 40.059326171875, "learning_rate": 1.4315684647302906e-05, "loss": 0.4538, "step": 17135 }, { "epoch": 14.220746887966804, "grad_norm": 32.64807891845703, "learning_rate": 1.4315352697095438e-05, "loss": 0.5135, "step": 17136 }, { "epoch": 14.221576763485476, "grad_norm": 26.93421745300293, "learning_rate": 1.4315020746887968e-05, "loss": 1.0629, "step": 17137 }, { "epoch": 14.222406639004149, "grad_norm": 85.34721374511719, "learning_rate": 1.4314688796680499e-05, "loss": 0.6244, "step": 17138 }, { "epoch": 14.22323651452282, "grad_norm": 34.90650177001953, "learning_rate": 1.4314356846473029e-05, "loss": 0.5637, "step": 17139 }, { "epoch": 14.224066390041493, "grad_norm": 32.4307861328125, "learning_rate": 1.4314024896265561e-05, "loss": 0.6919, "step": 17140 }, { "epoch": 14.224896265560165, "grad_norm": 45.75321960449219, "learning_rate": 1.4313692946058093e-05, "loss": 1.306, "step": 17141 }, { "epoch": 14.225726141078837, "grad_norm": 19.14703941345215, "learning_rate": 1.4313360995850622e-05, "loss": 0.4328, "step": 17142 }, { "epoch": 14.22655601659751, "grad_norm": 34.845394134521484, "learning_rate": 1.4313029045643154e-05, "loss": 0.575, "step": 17143 }, { "epoch": 14.227385892116182, "grad_norm": 37.696651458740234, "learning_rate": 1.4312697095435686e-05, "loss": 0.4763, "step": 17144 }, { "epoch": 14.228215767634854, "grad_norm": 33.07188415527344, "learning_rate": 1.4312365145228218e-05, "loss": 0.5212, "step": 17145 }, { "epoch": 14.229045643153526, "grad_norm": 43.595428466796875, "learning_rate": 1.4312033195020747e-05, "loss": 0.767, "step": 17146 }, { "epoch": 14.229875518672198, "grad_norm": 17.73513412475586, "learning_rate": 1.4311701244813279e-05, "loss": 0.5224, "step": 17147 }, { "epoch": 14.23070539419087, "grad_norm": 57.839271545410156, "learning_rate": 1.431136929460581e-05, "loss": 0.5519, "step": 17148 }, { "epoch": 14.231535269709543, "grad_norm": 64.37432861328125, "learning_rate": 1.4311037344398342e-05, "loss": 0.5625, "step": 17149 }, { "epoch": 14.232365145228215, "grad_norm": 37.94575881958008, "learning_rate": 1.4310705394190874e-05, "loss": 1.2379, "step": 17150 }, { "epoch": 14.233195020746887, "grad_norm": 86.56932067871094, "learning_rate": 1.4310373443983402e-05, "loss": 1.9462, "step": 17151 }, { "epoch": 14.23402489626556, "grad_norm": 52.961830139160156, "learning_rate": 1.4310041493775935e-05, "loss": 0.9282, "step": 17152 }, { "epoch": 14.234854771784232, "grad_norm": 78.97406768798828, "learning_rate": 1.4309709543568467e-05, "loss": 0.6872, "step": 17153 }, { "epoch": 14.235684647302904, "grad_norm": 31.456342697143555, "learning_rate": 1.4309377593360999e-05, "loss": 0.7666, "step": 17154 }, { "epoch": 14.236514522821576, "grad_norm": 48.3062629699707, "learning_rate": 1.4309045643153527e-05, "loss": 1.5652, "step": 17155 }, { "epoch": 14.237344398340248, "grad_norm": 26.69272232055664, "learning_rate": 1.430871369294606e-05, "loss": 0.5177, "step": 17156 }, { "epoch": 14.23817427385892, "grad_norm": 45.240394592285156, "learning_rate": 1.430838174273859e-05, "loss": 1.0796, "step": 17157 }, { "epoch": 14.239004149377593, "grad_norm": 27.290971755981445, "learning_rate": 1.4308049792531122e-05, "loss": 0.6782, "step": 17158 }, { "epoch": 14.239834024896265, "grad_norm": 31.759883880615234, "learning_rate": 1.4307717842323653e-05, "loss": 0.6485, "step": 17159 }, { "epoch": 14.240663900414937, "grad_norm": 38.21503448486328, "learning_rate": 1.4307385892116183e-05, "loss": 1.1156, "step": 17160 }, { "epoch": 14.241493775933609, "grad_norm": 36.415771484375, "learning_rate": 1.4307053941908715e-05, "loss": 0.7516, "step": 17161 }, { "epoch": 14.242323651452281, "grad_norm": NaN, "learning_rate": 1.4307053941908715e-05, "loss": 1.3896, "step": 17162 }, { "epoch": 14.243153526970953, "grad_norm": 30.9377384185791, "learning_rate": 1.4306721991701247e-05, "loss": 0.5709, "step": 17163 }, { "epoch": 14.243983402489626, "grad_norm": 15.350746154785156, "learning_rate": 1.4306390041493776e-05, "loss": 0.3686, "step": 17164 }, { "epoch": 14.244813278008298, "grad_norm": 48.15811538696289, "learning_rate": 1.4306058091286308e-05, "loss": 1.2869, "step": 17165 }, { "epoch": 14.24564315352697, "grad_norm": 42.47882080078125, "learning_rate": 1.430572614107884e-05, "loss": 0.8944, "step": 17166 }, { "epoch": 14.246473029045642, "grad_norm": 16.601520538330078, "learning_rate": 1.430539419087137e-05, "loss": 0.4314, "step": 17167 }, { "epoch": 14.247302904564314, "grad_norm": 33.455169677734375, "learning_rate": 1.4305062240663901e-05, "loss": 0.5615, "step": 17168 }, { "epoch": 14.248132780082987, "grad_norm": 26.550907135009766, "learning_rate": 1.4304730290456433e-05, "loss": 0.5893, "step": 17169 }, { "epoch": 14.248962655601659, "grad_norm": 30.844688415527344, "learning_rate": 1.4304398340248963e-05, "loss": 0.7778, "step": 17170 }, { "epoch": 14.249792531120331, "grad_norm": 27.644580841064453, "learning_rate": 1.4304066390041496e-05, "loss": 0.7376, "step": 17171 }, { "epoch": 14.250622406639003, "grad_norm": 39.41595458984375, "learning_rate": 1.4303734439834024e-05, "loss": 1.188, "step": 17172 }, { "epoch": 14.251452282157675, "grad_norm": 35.044342041015625, "learning_rate": 1.4303402489626556e-05, "loss": 0.9, "step": 17173 }, { "epoch": 14.252282157676348, "grad_norm": 37.63114929199219, "learning_rate": 1.4303070539419088e-05, "loss": 0.8184, "step": 17174 }, { "epoch": 14.25311203319502, "grad_norm": 53.31366729736328, "learning_rate": 1.430273858921162e-05, "loss": 1.1502, "step": 17175 }, { "epoch": 14.253941908713692, "grad_norm": 26.89984703063965, "learning_rate": 1.4302406639004151e-05, "loss": 0.4687, "step": 17176 }, { "epoch": 14.254771784232364, "grad_norm": 36.872154235839844, "learning_rate": 1.4302074688796681e-05, "loss": 0.9234, "step": 17177 }, { "epoch": 14.255601659751036, "grad_norm": 33.95425796508789, "learning_rate": 1.4301742738589214e-05, "loss": 1.2032, "step": 17178 }, { "epoch": 14.256431535269709, "grad_norm": 27.672086715698242, "learning_rate": 1.4301410788381744e-05, "loss": 0.3997, "step": 17179 }, { "epoch": 14.25726141078838, "grad_norm": 40.78659439086914, "learning_rate": 1.4301078838174276e-05, "loss": 1.2708, "step": 17180 }, { "epoch": 14.258091286307055, "grad_norm": 37.69070816040039, "learning_rate": 1.4300746887966805e-05, "loss": 0.6865, "step": 17181 }, { "epoch": 14.258921161825727, "grad_norm": 54.87360382080078, "learning_rate": 1.4300414937759337e-05, "loss": 0.8038, "step": 17182 }, { "epoch": 14.2597510373444, "grad_norm": 23.554676055908203, "learning_rate": 1.4300082987551869e-05, "loss": 0.654, "step": 17183 }, { "epoch": 14.260580912863071, "grad_norm": 30.133134841918945, "learning_rate": 1.4299751037344401e-05, "loss": 0.8503, "step": 17184 }, { "epoch": 14.261410788381744, "grad_norm": 23.785337448120117, "learning_rate": 1.429941908713693e-05, "loss": 0.511, "step": 17185 }, { "epoch": 14.262240663900416, "grad_norm": 28.44411849975586, "learning_rate": 1.4299087136929462e-05, "loss": 0.4095, "step": 17186 }, { "epoch": 14.263070539419088, "grad_norm": 53.578914642333984, "learning_rate": 1.4298755186721992e-05, "loss": 0.9283, "step": 17187 }, { "epoch": 14.26390041493776, "grad_norm": 35.409488677978516, "learning_rate": 1.4298423236514524e-05, "loss": 1.0088, "step": 17188 }, { "epoch": 14.264730290456432, "grad_norm": 38.64303207397461, "learning_rate": 1.4298091286307055e-05, "loss": 0.7221, "step": 17189 }, { "epoch": 14.265560165975105, "grad_norm": 82.87120819091797, "learning_rate": 1.4297759336099585e-05, "loss": 1.1982, "step": 17190 }, { "epoch": 14.266390041493777, "grad_norm": 29.599689483642578, "learning_rate": 1.4297427385892117e-05, "loss": 1.3077, "step": 17191 }, { "epoch": 14.267219917012449, "grad_norm": 30.763134002685547, "learning_rate": 1.429709543568465e-05, "loss": 0.5303, "step": 17192 }, { "epoch": 14.268049792531121, "grad_norm": 49.49937438964844, "learning_rate": 1.4296763485477178e-05, "loss": 1.0636, "step": 17193 }, { "epoch": 14.268879668049793, "grad_norm": 28.576416015625, "learning_rate": 1.429643153526971e-05, "loss": 1.0051, "step": 17194 }, { "epoch": 14.269709543568466, "grad_norm": 32.072418212890625, "learning_rate": 1.4296099585062242e-05, "loss": 0.7406, "step": 17195 }, { "epoch": 14.270539419087138, "grad_norm": 113.52700805664062, "learning_rate": 1.4295767634854773e-05, "loss": 0.5908, "step": 17196 }, { "epoch": 14.27136929460581, "grad_norm": 38.058006286621094, "learning_rate": 1.4295435684647303e-05, "loss": 0.7249, "step": 17197 }, { "epoch": 14.272199170124482, "grad_norm": 37.736228942871094, "learning_rate": 1.4295103734439835e-05, "loss": 0.9053, "step": 17198 }, { "epoch": 14.273029045643154, "grad_norm": 29.47570037841797, "learning_rate": 1.4294771784232366e-05, "loss": 0.6451, "step": 17199 }, { "epoch": 14.273858921161827, "grad_norm": 83.9964599609375, "learning_rate": 1.4294439834024898e-05, "loss": 0.9167, "step": 17200 }, { "epoch": 14.274688796680499, "grad_norm": 40.0993537902832, "learning_rate": 1.429410788381743e-05, "loss": 0.4135, "step": 17201 }, { "epoch": 14.275518672199171, "grad_norm": 37.38430404663086, "learning_rate": 1.4293775933609959e-05, "loss": 0.59, "step": 17202 }, { "epoch": 14.276348547717843, "grad_norm": 53.56827163696289, "learning_rate": 1.429344398340249e-05, "loss": 0.9247, "step": 17203 }, { "epoch": 14.277178423236515, "grad_norm": 25.81781005859375, "learning_rate": 1.4293112033195023e-05, "loss": 0.3898, "step": 17204 }, { "epoch": 14.278008298755188, "grad_norm": 45.31134796142578, "learning_rate": 1.4292780082987553e-05, "loss": 0.9186, "step": 17205 }, { "epoch": 14.27883817427386, "grad_norm": 31.59949493408203, "learning_rate": 1.4292448132780084e-05, "loss": 0.6684, "step": 17206 }, { "epoch": 14.279668049792532, "grad_norm": 35.931644439697266, "learning_rate": 1.4292116182572616e-05, "loss": 0.6884, "step": 17207 }, { "epoch": 14.280497925311204, "grad_norm": 144.33473205566406, "learning_rate": 1.4291784232365146e-05, "loss": 0.3317, "step": 17208 }, { "epoch": 14.281327800829876, "grad_norm": 30.410533905029297, "learning_rate": 1.4291452282157678e-05, "loss": 1.1546, "step": 17209 }, { "epoch": 14.282157676348548, "grad_norm": 50.62548065185547, "learning_rate": 1.4291120331950207e-05, "loss": 0.9565, "step": 17210 }, { "epoch": 14.28298755186722, "grad_norm": 30.788164138793945, "learning_rate": 1.429078838174274e-05, "loss": 0.3902, "step": 17211 }, { "epoch": 14.283817427385893, "grad_norm": 54.168399810791016, "learning_rate": 1.4290456431535271e-05, "loss": 0.842, "step": 17212 }, { "epoch": 14.284647302904565, "grad_norm": 45.42509078979492, "learning_rate": 1.4290124481327803e-05, "loss": 0.8944, "step": 17213 }, { "epoch": 14.285477178423237, "grad_norm": 46.40196990966797, "learning_rate": 1.4289792531120332e-05, "loss": 0.863, "step": 17214 }, { "epoch": 14.28630705394191, "grad_norm": 46.62279510498047, "learning_rate": 1.4289460580912864e-05, "loss": 0.6635, "step": 17215 }, { "epoch": 14.287136929460582, "grad_norm": 31.336462020874023, "learning_rate": 1.4289128630705396e-05, "loss": 0.6979, "step": 17216 }, { "epoch": 14.287966804979254, "grad_norm": 36.40177536010742, "learning_rate": 1.4288796680497927e-05, "loss": 0.5573, "step": 17217 }, { "epoch": 14.288796680497926, "grad_norm": 22.540416717529297, "learning_rate": 1.4288464730290457e-05, "loss": 0.4059, "step": 17218 }, { "epoch": 14.289626556016598, "grad_norm": 45.26414489746094, "learning_rate": 1.4288132780082988e-05, "loss": 1.0164, "step": 17219 }, { "epoch": 14.29045643153527, "grad_norm": 25.901702880859375, "learning_rate": 1.428780082987552e-05, "loss": 0.7603, "step": 17220 }, { "epoch": 14.291286307053943, "grad_norm": 28.188072204589844, "learning_rate": 1.4287468879668052e-05, "loss": 0.559, "step": 17221 }, { "epoch": 14.292116182572615, "grad_norm": 26.956960678100586, "learning_rate": 1.428713692946058e-05, "loss": 0.5252, "step": 17222 }, { "epoch": 14.292946058091287, "grad_norm": 23.13015365600586, "learning_rate": 1.4286804979253113e-05, "loss": 0.7787, "step": 17223 }, { "epoch": 14.29377593360996, "grad_norm": 35.10295486450195, "learning_rate": 1.4286473029045645e-05, "loss": 0.5858, "step": 17224 }, { "epoch": 14.294605809128631, "grad_norm": 56.71954345703125, "learning_rate": 1.4286141078838177e-05, "loss": 1.4211, "step": 17225 }, { "epoch": 14.295435684647304, "grad_norm": 81.21491241455078, "learning_rate": 1.4285809128630706e-05, "loss": 0.8788, "step": 17226 }, { "epoch": 14.296265560165976, "grad_norm": 69.46907043457031, "learning_rate": 1.4285477178423238e-05, "loss": 0.4974, "step": 17227 }, { "epoch": 14.297095435684648, "grad_norm": 23.41455078125, "learning_rate": 1.4285145228215768e-05, "loss": 0.8022, "step": 17228 }, { "epoch": 14.29792531120332, "grad_norm": 39.3682861328125, "learning_rate": 1.42848132780083e-05, "loss": 0.4573, "step": 17229 }, { "epoch": 14.298755186721992, "grad_norm": 57.901756286621094, "learning_rate": 1.4284481327800832e-05, "loss": 1.0624, "step": 17230 }, { "epoch": 14.299585062240665, "grad_norm": 28.935049057006836, "learning_rate": 1.4284149377593361e-05, "loss": 0.8713, "step": 17231 }, { "epoch": 14.300414937759337, "grad_norm": 25.969186782836914, "learning_rate": 1.4283817427385893e-05, "loss": 0.5651, "step": 17232 }, { "epoch": 14.301244813278009, "grad_norm": 24.287839889526367, "learning_rate": 1.4283485477178425e-05, "loss": 0.6699, "step": 17233 }, { "epoch": 14.302074688796681, "grad_norm": 48.99265670776367, "learning_rate": 1.4283153526970956e-05, "loss": 1.0239, "step": 17234 }, { "epoch": 14.302904564315353, "grad_norm": 22.41016960144043, "learning_rate": 1.4282821576763486e-05, "loss": 0.6037, "step": 17235 }, { "epoch": 14.303734439834026, "grad_norm": 36.72230529785156, "learning_rate": 1.4282489626556018e-05, "loss": 1.4283, "step": 17236 }, { "epoch": 14.304564315352698, "grad_norm": 36.770565032958984, "learning_rate": 1.4282157676348549e-05, "loss": 0.8282, "step": 17237 }, { "epoch": 14.30539419087137, "grad_norm": 19.252172470092773, "learning_rate": 1.428182572614108e-05, "loss": 0.5303, "step": 17238 }, { "epoch": 14.306224066390042, "grad_norm": 13.044593811035156, "learning_rate": 1.4281493775933611e-05, "loss": 0.3059, "step": 17239 }, { "epoch": 14.307053941908714, "grad_norm": 31.22275733947754, "learning_rate": 1.4281161825726142e-05, "loss": 0.5863, "step": 17240 }, { "epoch": 14.307883817427387, "grad_norm": 24.129199981689453, "learning_rate": 1.4280829875518674e-05, "loss": 0.5861, "step": 17241 }, { "epoch": 14.308713692946059, "grad_norm": 31.78467559814453, "learning_rate": 1.4280497925311206e-05, "loss": 0.8437, "step": 17242 }, { "epoch": 14.309543568464731, "grad_norm": 65.9566650390625, "learning_rate": 1.4280165975103734e-05, "loss": 0.7672, "step": 17243 }, { "epoch": 14.310373443983403, "grad_norm": 16.280282974243164, "learning_rate": 1.4279834024896267e-05, "loss": 0.3202, "step": 17244 }, { "epoch": 14.311203319502075, "grad_norm": 44.42189407348633, "learning_rate": 1.4279502074688799e-05, "loss": 1.0092, "step": 17245 }, { "epoch": 14.312033195020748, "grad_norm": 91.74565887451172, "learning_rate": 1.4279170124481329e-05, "loss": 0.6404, "step": 17246 }, { "epoch": 14.31286307053942, "grad_norm": 32.89316940307617, "learning_rate": 1.427883817427386e-05, "loss": 0.5283, "step": 17247 }, { "epoch": 14.313692946058092, "grad_norm": 38.55175018310547, "learning_rate": 1.4278506224066392e-05, "loss": 0.6666, "step": 17248 }, { "epoch": 14.314522821576764, "grad_norm": 63.12691116333008, "learning_rate": 1.4278174273858922e-05, "loss": 1.113, "step": 17249 }, { "epoch": 14.315352697095436, "grad_norm": 13.720381736755371, "learning_rate": 1.4277842323651454e-05, "loss": 0.3403, "step": 17250 }, { "epoch": 14.316182572614109, "grad_norm": 55.78025817871094, "learning_rate": 1.4277510373443983e-05, "loss": 1.4684, "step": 17251 }, { "epoch": 14.31701244813278, "grad_norm": 50.157501220703125, "learning_rate": 1.4277178423236515e-05, "loss": 0.6628, "step": 17252 }, { "epoch": 14.317842323651453, "grad_norm": 20.089685440063477, "learning_rate": 1.4276846473029047e-05, "loss": 0.3537, "step": 17253 }, { "epoch": 14.318672199170125, "grad_norm": 88.10043334960938, "learning_rate": 1.427651452282158e-05, "loss": 1.1619, "step": 17254 }, { "epoch": 14.319502074688797, "grad_norm": 33.93871307373047, "learning_rate": 1.427618257261411e-05, "loss": 0.7163, "step": 17255 }, { "epoch": 14.32033195020747, "grad_norm": 31.057716369628906, "learning_rate": 1.427585062240664e-05, "loss": 0.4984, "step": 17256 }, { "epoch": 14.321161825726142, "grad_norm": 39.434078216552734, "learning_rate": 1.427551867219917e-05, "loss": 0.7738, "step": 17257 }, { "epoch": 14.321991701244814, "grad_norm": 30.000158309936523, "learning_rate": 1.4275186721991703e-05, "loss": 0.7695, "step": 17258 }, { "epoch": 14.322821576763486, "grad_norm": 62.89570999145508, "learning_rate": 1.4274854771784235e-05, "loss": 1.2786, "step": 17259 }, { "epoch": 14.323651452282158, "grad_norm": 25.46807098388672, "learning_rate": 1.4274522821576763e-05, "loss": 0.6386, "step": 17260 }, { "epoch": 14.32448132780083, "grad_norm": 46.52408218383789, "learning_rate": 1.4274190871369295e-05, "loss": 0.5346, "step": 17261 }, { "epoch": 14.325311203319503, "grad_norm": 67.64141082763672, "learning_rate": 1.4273858921161828e-05, "loss": 1.1463, "step": 17262 }, { "epoch": 14.326141078838175, "grad_norm": 23.018415451049805, "learning_rate": 1.427352697095436e-05, "loss": 0.5036, "step": 17263 }, { "epoch": 14.326970954356847, "grad_norm": 37.20813751220703, "learning_rate": 1.4273195020746888e-05, "loss": 0.4562, "step": 17264 }, { "epoch": 14.32780082987552, "grad_norm": 45.75726318359375, "learning_rate": 1.427286307053942e-05, "loss": 0.6955, "step": 17265 }, { "epoch": 14.328630705394191, "grad_norm": 25.567771911621094, "learning_rate": 1.4272531120331951e-05, "loss": 0.8739, "step": 17266 }, { "epoch": 14.329460580912864, "grad_norm": 38.32134246826172, "learning_rate": 1.4272199170124483e-05, "loss": 0.799, "step": 17267 }, { "epoch": 14.330290456431536, "grad_norm": 24.445505142211914, "learning_rate": 1.4271867219917013e-05, "loss": 0.5232, "step": 17268 }, { "epoch": 14.331120331950208, "grad_norm": 46.606544494628906, "learning_rate": 1.4271535269709544e-05, "loss": 1.0035, "step": 17269 }, { "epoch": 14.33195020746888, "grad_norm": 58.76773452758789, "learning_rate": 1.4271203319502076e-05, "loss": 0.7391, "step": 17270 }, { "epoch": 14.332780082987552, "grad_norm": 58.22164535522461, "learning_rate": 1.4270871369294608e-05, "loss": 0.8996, "step": 17271 }, { "epoch": 14.333609958506225, "grad_norm": 228.45806884765625, "learning_rate": 1.4270539419087137e-05, "loss": 1.3293, "step": 17272 }, { "epoch": 14.334439834024897, "grad_norm": 25.491445541381836, "learning_rate": 1.4270207468879669e-05, "loss": 0.6155, "step": 17273 }, { "epoch": 14.335269709543569, "grad_norm": 41.259185791015625, "learning_rate": 1.4269875518672201e-05, "loss": 0.6724, "step": 17274 }, { "epoch": 14.336099585062241, "grad_norm": 42.2874870300293, "learning_rate": 1.4269543568464731e-05, "loss": 0.4851, "step": 17275 }, { "epoch": 14.336929460580913, "grad_norm": 69.5655288696289, "learning_rate": 1.4269211618257262e-05, "loss": 1.8502, "step": 17276 }, { "epoch": 14.337759336099586, "grad_norm": 36.56281661987305, "learning_rate": 1.4268879668049794e-05, "loss": 1.1813, "step": 17277 }, { "epoch": 14.338589211618258, "grad_norm": 34.245277404785156, "learning_rate": 1.4268547717842324e-05, "loss": 0.5448, "step": 17278 }, { "epoch": 14.33941908713693, "grad_norm": 52.12690734863281, "learning_rate": 1.4268215767634856e-05, "loss": 1.0314, "step": 17279 }, { "epoch": 14.340248962655602, "grad_norm": 43.21198272705078, "learning_rate": 1.4267883817427389e-05, "loss": 0.9626, "step": 17280 }, { "epoch": 14.341078838174274, "grad_norm": 26.281871795654297, "learning_rate": 1.4267551867219917e-05, "loss": 0.7701, "step": 17281 }, { "epoch": 14.341908713692947, "grad_norm": 22.22873306274414, "learning_rate": 1.426721991701245e-05, "loss": 0.6853, "step": 17282 }, { "epoch": 14.342738589211619, "grad_norm": 59.060184478759766, "learning_rate": 1.4266887966804981e-05, "loss": 1.1791, "step": 17283 }, { "epoch": 14.343568464730291, "grad_norm": 41.848670959472656, "learning_rate": 1.4266556016597512e-05, "loss": 0.9194, "step": 17284 }, { "epoch": 14.344398340248963, "grad_norm": 39.6072883605957, "learning_rate": 1.4266224066390042e-05, "loss": 0.4557, "step": 17285 }, { "epoch": 14.345228215767635, "grad_norm": 28.356136322021484, "learning_rate": 1.4265892116182574e-05, "loss": 0.5464, "step": 17286 }, { "epoch": 14.346058091286308, "grad_norm": 21.265968322753906, "learning_rate": 1.4265560165975105e-05, "loss": 0.5603, "step": 17287 }, { "epoch": 14.34688796680498, "grad_norm": 74.44816589355469, "learning_rate": 1.4265228215767637e-05, "loss": 0.715, "step": 17288 }, { "epoch": 14.347717842323652, "grad_norm": 31.3553466796875, "learning_rate": 1.4264896265560166e-05, "loss": 0.9698, "step": 17289 }, { "epoch": 14.348547717842324, "grad_norm": 105.14598846435547, "learning_rate": 1.4264564315352698e-05, "loss": 0.6379, "step": 17290 }, { "epoch": 14.349377593360996, "grad_norm": 67.80709075927734, "learning_rate": 1.426423236514523e-05, "loss": 0.5688, "step": 17291 }, { "epoch": 14.350207468879669, "grad_norm": 76.68297576904297, "learning_rate": 1.4263900414937762e-05, "loss": 0.894, "step": 17292 }, { "epoch": 14.35103734439834, "grad_norm": 19.344791412353516, "learning_rate": 1.426356846473029e-05, "loss": 0.4205, "step": 17293 }, { "epoch": 14.351867219917013, "grad_norm": 45.993072509765625, "learning_rate": 1.4263236514522823e-05, "loss": 0.457, "step": 17294 }, { "epoch": 14.352697095435685, "grad_norm": 39.04256057739258, "learning_rate": 1.4262904564315355e-05, "loss": 0.9358, "step": 17295 }, { "epoch": 14.353526970954357, "grad_norm": 28.213476181030273, "learning_rate": 1.4262572614107885e-05, "loss": 1.1976, "step": 17296 }, { "epoch": 14.35435684647303, "grad_norm": 86.7805404663086, "learning_rate": 1.4262240663900416e-05, "loss": 1.1979, "step": 17297 }, { "epoch": 14.355186721991702, "grad_norm": 16.414140701293945, "learning_rate": 1.4261908713692946e-05, "loss": 0.4856, "step": 17298 }, { "epoch": 14.356016597510374, "grad_norm": 32.23603057861328, "learning_rate": 1.4261576763485478e-05, "loss": 0.3158, "step": 17299 }, { "epoch": 14.356846473029046, "grad_norm": 47.18294143676758, "learning_rate": 1.426124481327801e-05, "loss": 1.1632, "step": 17300 }, { "epoch": 14.357676348547718, "grad_norm": 68.21648406982422, "learning_rate": 1.4260912863070539e-05, "loss": 1.2626, "step": 17301 }, { "epoch": 14.35850622406639, "grad_norm": 35.83855056762695, "learning_rate": 1.4260580912863071e-05, "loss": 0.6193, "step": 17302 }, { "epoch": 14.359336099585063, "grad_norm": 33.44934844970703, "learning_rate": 1.4260248962655603e-05, "loss": 0.9092, "step": 17303 }, { "epoch": 14.360165975103735, "grad_norm": 45.21152114868164, "learning_rate": 1.4259917012448134e-05, "loss": 1.6682, "step": 17304 }, { "epoch": 14.360995850622407, "grad_norm": 35.54306411743164, "learning_rate": 1.4259585062240664e-05, "loss": 1.2472, "step": 17305 }, { "epoch": 14.36182572614108, "grad_norm": 31.57291030883789, "learning_rate": 1.4259253112033196e-05, "loss": 0.876, "step": 17306 }, { "epoch": 14.362655601659752, "grad_norm": 22.046064376831055, "learning_rate": 1.4258921161825727e-05, "loss": 0.5635, "step": 17307 }, { "epoch": 14.363485477178424, "grad_norm": 37.812416076660156, "learning_rate": 1.4258589211618259e-05, "loss": 0.745, "step": 17308 }, { "epoch": 14.364315352697096, "grad_norm": 68.71144104003906, "learning_rate": 1.4258257261410791e-05, "loss": 0.818, "step": 17309 }, { "epoch": 14.365145228215768, "grad_norm": 45.08897399902344, "learning_rate": 1.425792531120332e-05, "loss": 0.9934, "step": 17310 }, { "epoch": 14.36597510373444, "grad_norm": 46.02709197998047, "learning_rate": 1.4257593360995852e-05, "loss": 1.3556, "step": 17311 }, { "epoch": 14.366804979253113, "grad_norm": 29.660579681396484, "learning_rate": 1.4257261410788384e-05, "loss": 0.9796, "step": 17312 }, { "epoch": 14.367634854771785, "grad_norm": 25.788408279418945, "learning_rate": 1.4256929460580914e-05, "loss": 0.6484, "step": 17313 }, { "epoch": 14.368464730290457, "grad_norm": 52.244388580322266, "learning_rate": 1.4256597510373445e-05, "loss": 0.6832, "step": 17314 }, { "epoch": 14.369294605809129, "grad_norm": 99.74312591552734, "learning_rate": 1.4256265560165977e-05, "loss": 0.9124, "step": 17315 }, { "epoch": 14.370124481327801, "grad_norm": 68.13922119140625, "learning_rate": 1.4255933609958507e-05, "loss": 0.9741, "step": 17316 }, { "epoch": 14.370954356846473, "grad_norm": 32.494346618652344, "learning_rate": 1.425560165975104e-05, "loss": 0.6832, "step": 17317 }, { "epoch": 14.371784232365146, "grad_norm": 52.85382843017578, "learning_rate": 1.4255269709543568e-05, "loss": 0.941, "step": 17318 }, { "epoch": 14.372614107883818, "grad_norm": 56.20469665527344, "learning_rate": 1.42549377593361e-05, "loss": 0.9151, "step": 17319 }, { "epoch": 14.37344398340249, "grad_norm": 22.898284912109375, "learning_rate": 1.4254605809128632e-05, "loss": 0.5263, "step": 17320 }, { "epoch": 14.374273858921162, "grad_norm": 38.76557922363281, "learning_rate": 1.4254273858921164e-05, "loss": 0.7388, "step": 17321 }, { "epoch": 14.375103734439834, "grad_norm": 28.703155517578125, "learning_rate": 1.4253941908713693e-05, "loss": 0.5805, "step": 17322 }, { "epoch": 14.375933609958507, "grad_norm": 42.97801971435547, "learning_rate": 1.4253609958506225e-05, "loss": 0.4869, "step": 17323 }, { "epoch": 14.376763485477179, "grad_norm": 37.506290435791016, "learning_rate": 1.4253278008298757e-05, "loss": 1.2158, "step": 17324 }, { "epoch": 14.377593360995851, "grad_norm": 53.57917022705078, "learning_rate": 1.4252946058091288e-05, "loss": 0.8254, "step": 17325 }, { "epoch": 14.378423236514523, "grad_norm": 20.515867233276367, "learning_rate": 1.4252614107883818e-05, "loss": 0.3725, "step": 17326 }, { "epoch": 14.379253112033195, "grad_norm": 41.644737243652344, "learning_rate": 1.4252282157676348e-05, "loss": 0.9762, "step": 17327 }, { "epoch": 14.380082987551868, "grad_norm": 47.04551315307617, "learning_rate": 1.425195020746888e-05, "loss": 1.8907, "step": 17328 }, { "epoch": 14.38091286307054, "grad_norm": 39.01466369628906, "learning_rate": 1.4251618257261413e-05, "loss": 0.5897, "step": 17329 }, { "epoch": 14.381742738589212, "grad_norm": 29.62656593322754, "learning_rate": 1.4251286307053941e-05, "loss": 0.3854, "step": 17330 }, { "epoch": 14.382572614107884, "grad_norm": 32.87398910522461, "learning_rate": 1.4250954356846474e-05, "loss": 0.4981, "step": 17331 }, { "epoch": 14.383402489626556, "grad_norm": 34.03871154785156, "learning_rate": 1.4250622406639006e-05, "loss": 0.6729, "step": 17332 }, { "epoch": 14.384232365145229, "grad_norm": 57.80973434448242, "learning_rate": 1.4250290456431538e-05, "loss": 1.3032, "step": 17333 }, { "epoch": 14.3850622406639, "grad_norm": 22.99005126953125, "learning_rate": 1.4249958506224068e-05, "loss": 0.4577, "step": 17334 }, { "epoch": 14.385892116182573, "grad_norm": 29.32468032836914, "learning_rate": 1.4249626556016599e-05, "loss": 0.9651, "step": 17335 }, { "epoch": 14.386721991701245, "grad_norm": 32.18104934692383, "learning_rate": 1.4249294605809129e-05, "loss": 0.5382, "step": 17336 }, { "epoch": 14.387551867219917, "grad_norm": 46.26752853393555, "learning_rate": 1.4248962655601661e-05, "loss": 0.9288, "step": 17337 }, { "epoch": 14.38838174273859, "grad_norm": 33.05839538574219, "learning_rate": 1.4248630705394193e-05, "loss": 0.7026, "step": 17338 }, { "epoch": 14.389211618257262, "grad_norm": 52.345760345458984, "learning_rate": 1.4248298755186722e-05, "loss": 0.5645, "step": 17339 }, { "epoch": 14.390041493775934, "grad_norm": 43.16908264160156, "learning_rate": 1.4247966804979254e-05, "loss": 0.8133, "step": 17340 }, { "epoch": 14.390871369294606, "grad_norm": 36.101810455322266, "learning_rate": 1.4247634854771786e-05, "loss": 0.7922, "step": 17341 }, { "epoch": 14.391701244813278, "grad_norm": 23.415563583374023, "learning_rate": 1.4247302904564318e-05, "loss": 0.8888, "step": 17342 }, { "epoch": 14.39253112033195, "grad_norm": 34.5712776184082, "learning_rate": 1.4246970954356847e-05, "loss": 0.9681, "step": 17343 }, { "epoch": 14.393360995850623, "grad_norm": 47.566551208496094, "learning_rate": 1.4246639004149379e-05, "loss": 0.9797, "step": 17344 }, { "epoch": 14.394190871369295, "grad_norm": 20.072298049926758, "learning_rate": 1.424630705394191e-05, "loss": 0.3954, "step": 17345 }, { "epoch": 14.395020746887967, "grad_norm": 50.211849212646484, "learning_rate": 1.4245975103734442e-05, "loss": 1.0953, "step": 17346 }, { "epoch": 14.39585062240664, "grad_norm": 24.992141723632812, "learning_rate": 1.4245643153526972e-05, "loss": 0.6933, "step": 17347 }, { "epoch": 14.396680497925312, "grad_norm": 39.449363708496094, "learning_rate": 1.4245311203319502e-05, "loss": 0.7069, "step": 17348 }, { "epoch": 14.397510373443984, "grad_norm": 125.11134338378906, "learning_rate": 1.4244979253112035e-05, "loss": 0.9757, "step": 17349 }, { "epoch": 14.398340248962656, "grad_norm": 58.138641357421875, "learning_rate": 1.4244647302904567e-05, "loss": 0.9829, "step": 17350 }, { "epoch": 14.399170124481328, "grad_norm": 38.25715255737305, "learning_rate": 1.4244315352697095e-05, "loss": 0.5674, "step": 17351 }, { "epoch": 14.4, "grad_norm": 37.867027282714844, "learning_rate": 1.4243983402489627e-05, "loss": 0.737, "step": 17352 }, { "epoch": 14.400829875518673, "grad_norm": 36.6298942565918, "learning_rate": 1.424365145228216e-05, "loss": 0.7997, "step": 17353 }, { "epoch": 14.401659751037345, "grad_norm": 25.211509704589844, "learning_rate": 1.424331950207469e-05, "loss": 0.5409, "step": 17354 }, { "epoch": 14.402489626556017, "grad_norm": 20.556869506835938, "learning_rate": 1.424298755186722e-05, "loss": 0.5015, "step": 17355 }, { "epoch": 14.40331950207469, "grad_norm": 29.686744689941406, "learning_rate": 1.4242655601659752e-05, "loss": 0.5925, "step": 17356 }, { "epoch": 14.404149377593361, "grad_norm": 44.06581115722656, "learning_rate": 1.4242323651452283e-05, "loss": 1.0194, "step": 17357 }, { "epoch": 14.404979253112034, "grad_norm": 26.014694213867188, "learning_rate": 1.4241991701244815e-05, "loss": 0.6743, "step": 17358 }, { "epoch": 14.405809128630706, "grad_norm": 65.41624450683594, "learning_rate": 1.4241659751037347e-05, "loss": 0.654, "step": 17359 }, { "epoch": 14.406639004149378, "grad_norm": 65.01896667480469, "learning_rate": 1.4241327800829876e-05, "loss": 1.1422, "step": 17360 }, { "epoch": 14.40746887966805, "grad_norm": 12.531323432922363, "learning_rate": 1.4240995850622408e-05, "loss": 0.4125, "step": 17361 }, { "epoch": 14.408298755186722, "grad_norm": 37.72733688354492, "learning_rate": 1.424066390041494e-05, "loss": 0.6159, "step": 17362 }, { "epoch": 14.409128630705395, "grad_norm": 37.4242057800293, "learning_rate": 1.424033195020747e-05, "loss": 1.0707, "step": 17363 }, { "epoch": 14.409958506224067, "grad_norm": 61.33451843261719, "learning_rate": 1.4240000000000001e-05, "loss": 0.8738, "step": 17364 }, { "epoch": 14.410788381742739, "grad_norm": 88.34649658203125, "learning_rate": 1.4239668049792533e-05, "loss": 1.2453, "step": 17365 }, { "epoch": 14.411618257261411, "grad_norm": 62.26142501831055, "learning_rate": 1.4239336099585063e-05, "loss": 0.7457, "step": 17366 }, { "epoch": 14.412448132780083, "grad_norm": 34.80384063720703, "learning_rate": 1.4239004149377596e-05, "loss": 0.7224, "step": 17367 }, { "epoch": 14.413278008298755, "grad_norm": 115.15481567382812, "learning_rate": 1.4238672199170124e-05, "loss": 1.4119, "step": 17368 }, { "epoch": 14.414107883817428, "grad_norm": 28.6875057220459, "learning_rate": 1.4238340248962656e-05, "loss": 0.6425, "step": 17369 }, { "epoch": 14.4149377593361, "grad_norm": 48.89046096801758, "learning_rate": 1.4238008298755188e-05, "loss": 0.7092, "step": 17370 }, { "epoch": 14.415767634854772, "grad_norm": 53.04434585571289, "learning_rate": 1.423767634854772e-05, "loss": 0.7523, "step": 17371 }, { "epoch": 14.416597510373444, "grad_norm": 37.17930603027344, "learning_rate": 1.423734439834025e-05, "loss": 0.7062, "step": 17372 }, { "epoch": 14.417427385892116, "grad_norm": 33.79483413696289, "learning_rate": 1.4237012448132781e-05, "loss": 0.9412, "step": 17373 }, { "epoch": 14.418257261410789, "grad_norm": 35.11576843261719, "learning_rate": 1.4236680497925312e-05, "loss": 0.5145, "step": 17374 }, { "epoch": 14.41908713692946, "grad_norm": 32.01492691040039, "learning_rate": 1.4236348547717844e-05, "loss": 1.6589, "step": 17375 }, { "epoch": 14.419917012448133, "grad_norm": 29.933847427368164, "learning_rate": 1.4236016597510374e-05, "loss": 0.4669, "step": 17376 }, { "epoch": 14.420746887966805, "grad_norm": 25.702112197875977, "learning_rate": 1.4235684647302905e-05, "loss": 0.5245, "step": 17377 }, { "epoch": 14.421576763485477, "grad_norm": 16.930543899536133, "learning_rate": 1.4235352697095437e-05, "loss": 0.381, "step": 17378 }, { "epoch": 14.42240663900415, "grad_norm": 30.031949996948242, "learning_rate": 1.4235020746887969e-05, "loss": 0.8466, "step": 17379 }, { "epoch": 14.423236514522822, "grad_norm": 71.98806762695312, "learning_rate": 1.4234688796680498e-05, "loss": 1.079, "step": 17380 }, { "epoch": 14.424066390041494, "grad_norm": 40.35321807861328, "learning_rate": 1.423435684647303e-05, "loss": 0.4975, "step": 17381 }, { "epoch": 14.424896265560166, "grad_norm": 49.586158752441406, "learning_rate": 1.4234024896265562e-05, "loss": 0.9158, "step": 17382 }, { "epoch": 14.425726141078838, "grad_norm": 26.02690315246582, "learning_rate": 1.4233692946058092e-05, "loss": 0.8588, "step": 17383 }, { "epoch": 14.42655601659751, "grad_norm": 55.71603012084961, "learning_rate": 1.4233360995850623e-05, "loss": 0.8565, "step": 17384 }, { "epoch": 14.427385892116183, "grad_norm": 77.9598617553711, "learning_rate": 1.4233029045643155e-05, "loss": 0.618, "step": 17385 }, { "epoch": 14.428215767634855, "grad_norm": 49.59783172607422, "learning_rate": 1.4232697095435685e-05, "loss": 1.0122, "step": 17386 }, { "epoch": 14.429045643153527, "grad_norm": 31.250934600830078, "learning_rate": 1.4232365145228217e-05, "loss": 0.5958, "step": 17387 }, { "epoch": 14.4298755186722, "grad_norm": 20.24726104736328, "learning_rate": 1.423203319502075e-05, "loss": 0.4483, "step": 17388 }, { "epoch": 14.430705394190872, "grad_norm": 14.205921173095703, "learning_rate": 1.4231701244813278e-05, "loss": 0.2886, "step": 17389 }, { "epoch": 14.431535269709544, "grad_norm": 24.002418518066406, "learning_rate": 1.423136929460581e-05, "loss": 0.6675, "step": 17390 }, { "epoch": 14.432365145228216, "grad_norm": 20.12115478515625, "learning_rate": 1.4231037344398342e-05, "loss": 0.45, "step": 17391 }, { "epoch": 14.433195020746888, "grad_norm": 47.68551254272461, "learning_rate": 1.4230705394190873e-05, "loss": 0.7099, "step": 17392 }, { "epoch": 14.43402489626556, "grad_norm": 48.93651580810547, "learning_rate": 1.4230373443983403e-05, "loss": 1.4443, "step": 17393 }, { "epoch": 14.434854771784233, "grad_norm": 26.001108169555664, "learning_rate": 1.4230041493775935e-05, "loss": 0.5498, "step": 17394 }, { "epoch": 14.435684647302905, "grad_norm": 31.011049270629883, "learning_rate": 1.4229709543568466e-05, "loss": 0.5422, "step": 17395 }, { "epoch": 14.436514522821577, "grad_norm": 38.48245620727539, "learning_rate": 1.4229377593360998e-05, "loss": 0.6784, "step": 17396 }, { "epoch": 14.43734439834025, "grad_norm": 47.47777557373047, "learning_rate": 1.4229045643153527e-05, "loss": 0.6704, "step": 17397 }, { "epoch": 14.438174273858921, "grad_norm": 22.89386749267578, "learning_rate": 1.4228713692946059e-05, "loss": 0.5009, "step": 17398 }, { "epoch": 14.439004149377594, "grad_norm": 22.917736053466797, "learning_rate": 1.422838174273859e-05, "loss": 0.5372, "step": 17399 }, { "epoch": 14.439834024896266, "grad_norm": 24.525184631347656, "learning_rate": 1.4228049792531123e-05, "loss": 0.6162, "step": 17400 }, { "epoch": 14.440663900414938, "grad_norm": 42.596778869628906, "learning_rate": 1.4227717842323652e-05, "loss": 0.523, "step": 17401 }, { "epoch": 14.44149377593361, "grad_norm": 36.41172790527344, "learning_rate": 1.4227385892116184e-05, "loss": 1.0287, "step": 17402 }, { "epoch": 14.442323651452282, "grad_norm": 61.44330978393555, "learning_rate": 1.4227053941908716e-05, "loss": 1.3125, "step": 17403 }, { "epoch": 14.443153526970955, "grad_norm": 38.576725006103516, "learning_rate": 1.4226721991701246e-05, "loss": 0.8177, "step": 17404 }, { "epoch": 14.443983402489627, "grad_norm": 34.42032241821289, "learning_rate": 1.4226390041493777e-05, "loss": 0.5616, "step": 17405 }, { "epoch": 14.444813278008299, "grad_norm": 65.00179290771484, "learning_rate": 1.4226058091286307e-05, "loss": 0.5091, "step": 17406 }, { "epoch": 14.445643153526971, "grad_norm": 22.210901260375977, "learning_rate": 1.422572614107884e-05, "loss": 0.3286, "step": 17407 }, { "epoch": 14.446473029045643, "grad_norm": 25.538257598876953, "learning_rate": 1.4225394190871371e-05, "loss": 0.4728, "step": 17408 }, { "epoch": 14.447302904564316, "grad_norm": 29.176010131835938, "learning_rate": 1.42250622406639e-05, "loss": 0.5227, "step": 17409 }, { "epoch": 14.448132780082988, "grad_norm": 71.01884460449219, "learning_rate": 1.4224730290456432e-05, "loss": 0.7612, "step": 17410 }, { "epoch": 14.44896265560166, "grad_norm": 31.864730834960938, "learning_rate": 1.4224398340248964e-05, "loss": 1.3056, "step": 17411 }, { "epoch": 14.449792531120332, "grad_norm": 42.1014404296875, "learning_rate": 1.4224066390041496e-05, "loss": 1.7402, "step": 17412 }, { "epoch": 14.450622406639004, "grad_norm": 39.56366729736328, "learning_rate": 1.4223734439834027e-05, "loss": 1.0546, "step": 17413 }, { "epoch": 14.451452282157677, "grad_norm": 51.09156799316406, "learning_rate": 1.4223402489626557e-05, "loss": 0.6032, "step": 17414 }, { "epoch": 14.452282157676349, "grad_norm": 67.36746978759766, "learning_rate": 1.4223070539419088e-05, "loss": 0.6318, "step": 17415 }, { "epoch": 14.453112033195021, "grad_norm": 22.403223037719727, "learning_rate": 1.422273858921162e-05, "loss": 0.4937, "step": 17416 }, { "epoch": 14.453941908713693, "grad_norm": 17.68507194519043, "learning_rate": 1.4222406639004152e-05, "loss": 0.2928, "step": 17417 }, { "epoch": 14.454771784232365, "grad_norm": 41.01365661621094, "learning_rate": 1.422207468879668e-05, "loss": 0.6216, "step": 17418 }, { "epoch": 14.455601659751038, "grad_norm": 61.79750061035156, "learning_rate": 1.4221742738589213e-05, "loss": 0.5654, "step": 17419 }, { "epoch": 14.45643153526971, "grad_norm": 33.69770812988281, "learning_rate": 1.4221410788381745e-05, "loss": 0.8874, "step": 17420 }, { "epoch": 14.457261410788382, "grad_norm": 21.02650260925293, "learning_rate": 1.4221078838174275e-05, "loss": 0.4256, "step": 17421 }, { "epoch": 14.458091286307054, "grad_norm": 66.31442260742188, "learning_rate": 1.4220746887966806e-05, "loss": 1.2389, "step": 17422 }, { "epoch": 14.458921161825726, "grad_norm": 62.47199249267578, "learning_rate": 1.4220414937759338e-05, "loss": 0.495, "step": 17423 }, { "epoch": 14.459751037344398, "grad_norm": 47.7312126159668, "learning_rate": 1.4220082987551868e-05, "loss": 0.927, "step": 17424 }, { "epoch": 14.46058091286307, "grad_norm": 21.91088104248047, "learning_rate": 1.42197510373444e-05, "loss": 0.479, "step": 17425 }, { "epoch": 14.461410788381743, "grad_norm": 58.68038558959961, "learning_rate": 1.421941908713693e-05, "loss": 0.8755, "step": 17426 }, { "epoch": 14.462240663900415, "grad_norm": 30.990694046020508, "learning_rate": 1.4219087136929461e-05, "loss": 0.465, "step": 17427 }, { "epoch": 14.463070539419087, "grad_norm": 19.216915130615234, "learning_rate": 1.4218755186721993e-05, "loss": 0.4197, "step": 17428 }, { "epoch": 14.46390041493776, "grad_norm": 23.79374885559082, "learning_rate": 1.4218423236514525e-05, "loss": 0.8506, "step": 17429 }, { "epoch": 14.464730290456432, "grad_norm": 19.716028213500977, "learning_rate": 1.4218091286307054e-05, "loss": 0.3823, "step": 17430 }, { "epoch": 14.465560165975104, "grad_norm": 36.18132400512695, "learning_rate": 1.4217759336099586e-05, "loss": 0.837, "step": 17431 }, { "epoch": 14.466390041493776, "grad_norm": 60.90641403198242, "learning_rate": 1.4217427385892118e-05, "loss": 0.6329, "step": 17432 }, { "epoch": 14.467219917012448, "grad_norm": 37.53594970703125, "learning_rate": 1.4217095435684649e-05, "loss": 1.042, "step": 17433 }, { "epoch": 14.46804979253112, "grad_norm": 25.17070770263672, "learning_rate": 1.4216763485477179e-05, "loss": 0.5617, "step": 17434 }, { "epoch": 14.468879668049793, "grad_norm": 20.218914031982422, "learning_rate": 1.421643153526971e-05, "loss": 0.4753, "step": 17435 }, { "epoch": 14.469709543568465, "grad_norm": 44.71751403808594, "learning_rate": 1.4216099585062241e-05, "loss": 1.163, "step": 17436 }, { "epoch": 14.470539419087137, "grad_norm": 23.78206443786621, "learning_rate": 1.4215767634854774e-05, "loss": 0.5523, "step": 17437 }, { "epoch": 14.47136929460581, "grad_norm": 43.05573272705078, "learning_rate": 1.4215435684647306e-05, "loss": 1.4665, "step": 17438 }, { "epoch": 14.472199170124481, "grad_norm": 85.01536560058594, "learning_rate": 1.4215103734439834e-05, "loss": 0.7928, "step": 17439 }, { "epoch": 14.473029045643154, "grad_norm": 62.328887939453125, "learning_rate": 1.4214771784232367e-05, "loss": 0.8031, "step": 17440 }, { "epoch": 14.473858921161826, "grad_norm": 22.807777404785156, "learning_rate": 1.4214439834024899e-05, "loss": 0.5967, "step": 17441 }, { "epoch": 14.474688796680498, "grad_norm": 44.877830505371094, "learning_rate": 1.4214107883817429e-05, "loss": 0.7705, "step": 17442 }, { "epoch": 14.47551867219917, "grad_norm": 55.63860321044922, "learning_rate": 1.421377593360996e-05, "loss": 0.9184, "step": 17443 }, { "epoch": 14.476348547717842, "grad_norm": 71.68611907958984, "learning_rate": 1.421344398340249e-05, "loss": 1.9736, "step": 17444 }, { "epoch": 14.477178423236515, "grad_norm": 48.73188781738281, "learning_rate": 1.4213112033195022e-05, "loss": 0.6763, "step": 17445 }, { "epoch": 14.478008298755187, "grad_norm": 56.32884979248047, "learning_rate": 1.4212780082987554e-05, "loss": 0.647, "step": 17446 }, { "epoch": 14.478838174273859, "grad_norm": 39.58423614501953, "learning_rate": 1.4212448132780083e-05, "loss": 0.7042, "step": 17447 }, { "epoch": 14.479668049792531, "grad_norm": 21.138227462768555, "learning_rate": 1.4212116182572615e-05, "loss": 0.821, "step": 17448 }, { "epoch": 14.480497925311203, "grad_norm": 108.71833801269531, "learning_rate": 1.4211784232365147e-05, "loss": 0.9457, "step": 17449 }, { "epoch": 14.481327800829876, "grad_norm": 25.687376022338867, "learning_rate": 1.4211452282157679e-05, "loss": 0.6127, "step": 17450 }, { "epoch": 14.482157676348548, "grad_norm": 26.49664306640625, "learning_rate": 1.4211120331950208e-05, "loss": 0.4081, "step": 17451 }, { "epoch": 14.48298755186722, "grad_norm": 33.52963638305664, "learning_rate": 1.421078838174274e-05, "loss": 1.2071, "step": 17452 }, { "epoch": 14.483817427385892, "grad_norm": 65.72109985351562, "learning_rate": 1.421045643153527e-05, "loss": 0.8184, "step": 17453 }, { "epoch": 14.484647302904564, "grad_norm": 55.64577102661133, "learning_rate": 1.4210124481327802e-05, "loss": 0.624, "step": 17454 }, { "epoch": 14.485477178423237, "grad_norm": 35.614444732666016, "learning_rate": 1.4209792531120333e-05, "loss": 0.6237, "step": 17455 }, { "epoch": 14.486307053941909, "grad_norm": 74.46229553222656, "learning_rate": 1.4209460580912863e-05, "loss": 1.4999, "step": 17456 }, { "epoch": 14.487136929460581, "grad_norm": 31.409639358520508, "learning_rate": 1.4209128630705395e-05, "loss": 0.7041, "step": 17457 }, { "epoch": 14.487966804979253, "grad_norm": 14.556291580200195, "learning_rate": 1.4208796680497928e-05, "loss": 0.3272, "step": 17458 }, { "epoch": 14.488796680497925, "grad_norm": 76.72541809082031, "learning_rate": 1.4208464730290456e-05, "loss": 0.7964, "step": 17459 }, { "epoch": 14.489626556016598, "grad_norm": 54.32866668701172, "learning_rate": 1.4208132780082988e-05, "loss": 0.7499, "step": 17460 }, { "epoch": 14.49045643153527, "grad_norm": 43.97822570800781, "learning_rate": 1.420780082987552e-05, "loss": 1.5316, "step": 17461 }, { "epoch": 14.491286307053942, "grad_norm": 113.87774658203125, "learning_rate": 1.4207468879668051e-05, "loss": 1.1903, "step": 17462 }, { "epoch": 14.492116182572614, "grad_norm": 31.827739715576172, "learning_rate": 1.4207136929460581e-05, "loss": 0.5452, "step": 17463 }, { "epoch": 14.492946058091286, "grad_norm": 40.35261917114258, "learning_rate": 1.4206804979253113e-05, "loss": 1.0876, "step": 17464 }, { "epoch": 14.493775933609959, "grad_norm": 52.389461517333984, "learning_rate": 1.4206473029045644e-05, "loss": 1.1761, "step": 17465 }, { "epoch": 14.49460580912863, "grad_norm": 52.9361572265625, "learning_rate": 1.4206141078838176e-05, "loss": 0.8169, "step": 17466 }, { "epoch": 14.495435684647303, "grad_norm": 78.34171295166016, "learning_rate": 1.4205809128630708e-05, "loss": 0.804, "step": 17467 }, { "epoch": 14.496265560165975, "grad_norm": 30.689929962158203, "learning_rate": 1.4205477178423237e-05, "loss": 0.3216, "step": 17468 }, { "epoch": 14.497095435684647, "grad_norm": 28.459810256958008, "learning_rate": 1.4205145228215769e-05, "loss": 0.8353, "step": 17469 }, { "epoch": 14.49792531120332, "grad_norm": 29.627798080444336, "learning_rate": 1.4204813278008301e-05, "loss": 0.8999, "step": 17470 }, { "epoch": 14.498755186721992, "grad_norm": 32.58808517456055, "learning_rate": 1.4204481327800831e-05, "loss": 0.4652, "step": 17471 }, { "epoch": 14.499585062240664, "grad_norm": 37.0335807800293, "learning_rate": 1.4204149377593362e-05, "loss": 0.7735, "step": 17472 }, { "epoch": 14.500414937759336, "grad_norm": 22.10443878173828, "learning_rate": 1.4203817427385894e-05, "loss": 0.8478, "step": 17473 }, { "epoch": 14.501244813278008, "grad_norm": 28.68058967590332, "learning_rate": 1.4203485477178424e-05, "loss": 1.1931, "step": 17474 }, { "epoch": 14.50207468879668, "grad_norm": 42.84679412841797, "learning_rate": 1.4203153526970956e-05, "loss": 1.1055, "step": 17475 }, { "epoch": 14.502904564315353, "grad_norm": 43.53732681274414, "learning_rate": 1.4202821576763485e-05, "loss": 1.19, "step": 17476 }, { "epoch": 14.503734439834025, "grad_norm": 163.29676818847656, "learning_rate": 1.4202489626556017e-05, "loss": 0.7879, "step": 17477 }, { "epoch": 14.504564315352697, "grad_norm": 24.65013313293457, "learning_rate": 1.420215767634855e-05, "loss": 0.7305, "step": 17478 }, { "epoch": 14.50539419087137, "grad_norm": 30.57929801940918, "learning_rate": 1.4201825726141081e-05, "loss": 0.9666, "step": 17479 }, { "epoch": 14.506224066390041, "grad_norm": 23.165542602539062, "learning_rate": 1.420149377593361e-05, "loss": 0.4843, "step": 17480 }, { "epoch": 14.507053941908714, "grad_norm": 28.394987106323242, "learning_rate": 1.4201161825726142e-05, "loss": 0.4915, "step": 17481 }, { "epoch": 14.507883817427386, "grad_norm": 25.0695743560791, "learning_rate": 1.4200829875518674e-05, "loss": 0.7786, "step": 17482 }, { "epoch": 14.508713692946058, "grad_norm": 39.8633918762207, "learning_rate": 1.4200497925311205e-05, "loss": 1.0219, "step": 17483 }, { "epoch": 14.50954356846473, "grad_norm": 81.80960845947266, "learning_rate": 1.4200165975103735e-05, "loss": 0.6666, "step": 17484 }, { "epoch": 14.510373443983402, "grad_norm": 45.871253967285156, "learning_rate": 1.4199834024896266e-05, "loss": 0.725, "step": 17485 }, { "epoch": 14.511203319502075, "grad_norm": 33.84882736206055, "learning_rate": 1.4199502074688798e-05, "loss": 1.4592, "step": 17486 }, { "epoch": 14.512033195020747, "grad_norm": 25.638025283813477, "learning_rate": 1.419917012448133e-05, "loss": 0.8795, "step": 17487 }, { "epoch": 14.512863070539419, "grad_norm": 107.37214660644531, "learning_rate": 1.4198838174273859e-05, "loss": 1.7092, "step": 17488 }, { "epoch": 14.513692946058091, "grad_norm": 34.92033004760742, "learning_rate": 1.419850622406639e-05, "loss": 0.5803, "step": 17489 }, { "epoch": 14.514522821576763, "grad_norm": 38.00273513793945, "learning_rate": 1.4198174273858923e-05, "loss": 0.548, "step": 17490 }, { "epoch": 14.515352697095436, "grad_norm": 92.26258087158203, "learning_rate": 1.4197842323651453e-05, "loss": 1.3139, "step": 17491 }, { "epoch": 14.516182572614108, "grad_norm": 30.752532958984375, "learning_rate": 1.4197510373443985e-05, "loss": 0.5015, "step": 17492 }, { "epoch": 14.51701244813278, "grad_norm": 43.431217193603516, "learning_rate": 1.4197178423236516e-05, "loss": 1.3672, "step": 17493 }, { "epoch": 14.517842323651452, "grad_norm": 41.66345977783203, "learning_rate": 1.4196846473029046e-05, "loss": 0.5323, "step": 17494 }, { "epoch": 14.518672199170124, "grad_norm": 34.07504653930664, "learning_rate": 1.4196514522821578e-05, "loss": 0.4368, "step": 17495 }, { "epoch": 14.519502074688797, "grad_norm": 42.709590911865234, "learning_rate": 1.419618257261411e-05, "loss": 0.8232, "step": 17496 }, { "epoch": 14.520331950207469, "grad_norm": 26.16935920715332, "learning_rate": 1.4195850622406639e-05, "loss": 0.5885, "step": 17497 }, { "epoch": 14.521161825726141, "grad_norm": 57.08894729614258, "learning_rate": 1.4195518672199171e-05, "loss": 0.7563, "step": 17498 }, { "epoch": 14.521991701244813, "grad_norm": 27.00014877319336, "learning_rate": 1.4195186721991703e-05, "loss": 0.6984, "step": 17499 }, { "epoch": 14.522821576763485, "grad_norm": 32.30324172973633, "learning_rate": 1.4194854771784234e-05, "loss": 0.8074, "step": 17500 }, { "epoch": 14.523651452282158, "grad_norm": 22.904024124145508, "learning_rate": 1.4194522821576764e-05, "loss": 0.3688, "step": 17501 }, { "epoch": 14.52448132780083, "grad_norm": 42.99258041381836, "learning_rate": 1.4194190871369296e-05, "loss": 0.7308, "step": 17502 }, { "epoch": 14.525311203319502, "grad_norm": 25.632341384887695, "learning_rate": 1.4193858921161827e-05, "loss": 0.8621, "step": 17503 }, { "epoch": 14.526141078838174, "grad_norm": 47.51762390136719, "learning_rate": 1.4193526970954359e-05, "loss": 0.9582, "step": 17504 }, { "epoch": 14.526970954356846, "grad_norm": 44.243186950683594, "learning_rate": 1.4193195020746887e-05, "loss": 1.1422, "step": 17505 }, { "epoch": 14.527800829875519, "grad_norm": 17.06575584411621, "learning_rate": 1.419286307053942e-05, "loss": 0.2619, "step": 17506 }, { "epoch": 14.52863070539419, "grad_norm": 97.5133056640625, "learning_rate": 1.4192531120331952e-05, "loss": 0.6492, "step": 17507 }, { "epoch": 14.529460580912863, "grad_norm": 49.61612319946289, "learning_rate": 1.4192199170124484e-05, "loss": 0.9731, "step": 17508 }, { "epoch": 14.530290456431535, "grad_norm": 33.04309844970703, "learning_rate": 1.4191867219917012e-05, "loss": 0.9552, "step": 17509 }, { "epoch": 14.531120331950207, "grad_norm": 61.08540725708008, "learning_rate": 1.4191535269709545e-05, "loss": 1.5615, "step": 17510 }, { "epoch": 14.53195020746888, "grad_norm": 57.8933219909668, "learning_rate": 1.4191203319502077e-05, "loss": 0.6458, "step": 17511 }, { "epoch": 14.532780082987552, "grad_norm": 24.16045570373535, "learning_rate": 1.4190871369294607e-05, "loss": 0.4623, "step": 17512 }, { "epoch": 14.533609958506224, "grad_norm": 23.472217559814453, "learning_rate": 1.4190539419087138e-05, "loss": 0.4613, "step": 17513 }, { "epoch": 14.534439834024896, "grad_norm": 43.75916290283203, "learning_rate": 1.4190207468879668e-05, "loss": 0.8916, "step": 17514 }, { "epoch": 14.535269709543568, "grad_norm": 36.09248733520508, "learning_rate": 1.41898755186722e-05, "loss": 1.3565, "step": 17515 }, { "epoch": 14.53609958506224, "grad_norm": 35.3233757019043, "learning_rate": 1.4189543568464732e-05, "loss": 1.4286, "step": 17516 }, { "epoch": 14.536929460580913, "grad_norm": 33.095333099365234, "learning_rate": 1.4189211618257264e-05, "loss": 0.8418, "step": 17517 }, { "epoch": 14.537759336099585, "grad_norm": 53.96403884887695, "learning_rate": 1.4188879668049793e-05, "loss": 1.1946, "step": 17518 }, { "epoch": 14.538589211618257, "grad_norm": 21.928197860717773, "learning_rate": 1.4188547717842325e-05, "loss": 0.5515, "step": 17519 }, { "epoch": 14.53941908713693, "grad_norm": 21.694522857666016, "learning_rate": 1.4188215767634857e-05, "loss": 0.5183, "step": 17520 }, { "epoch": 14.540248962655602, "grad_norm": 47.28261184692383, "learning_rate": 1.4187883817427388e-05, "loss": 0.7195, "step": 17521 }, { "epoch": 14.541078838174274, "grad_norm": 30.890331268310547, "learning_rate": 1.4187551867219918e-05, "loss": 0.7434, "step": 17522 }, { "epoch": 14.541908713692946, "grad_norm": 23.522262573242188, "learning_rate": 1.4187219917012448e-05, "loss": 0.6702, "step": 17523 }, { "epoch": 14.542738589211618, "grad_norm": 22.258129119873047, "learning_rate": 1.418688796680498e-05, "loss": 0.3662, "step": 17524 }, { "epoch": 14.54356846473029, "grad_norm": 37.52656173706055, "learning_rate": 1.4186556016597513e-05, "loss": 0.7076, "step": 17525 }, { "epoch": 14.544398340248962, "grad_norm": 49.37898254394531, "learning_rate": 1.4186224066390041e-05, "loss": 0.704, "step": 17526 }, { "epoch": 14.545228215767635, "grad_norm": 77.11491394042969, "learning_rate": 1.4185892116182573e-05, "loss": 0.6217, "step": 17527 }, { "epoch": 14.546058091286307, "grad_norm": 39.858482360839844, "learning_rate": 1.4185560165975106e-05, "loss": 1.0323, "step": 17528 }, { "epoch": 14.546887966804979, "grad_norm": 52.617279052734375, "learning_rate": 1.4185228215767638e-05, "loss": 1.0832, "step": 17529 }, { "epoch": 14.547717842323651, "grad_norm": 45.64452362060547, "learning_rate": 1.4184896265560166e-05, "loss": 0.9548, "step": 17530 }, { "epoch": 14.548547717842323, "grad_norm": 82.23384094238281, "learning_rate": 1.4184564315352699e-05, "loss": 0.4949, "step": 17531 }, { "epoch": 14.549377593360996, "grad_norm": 28.44747543334961, "learning_rate": 1.4184232365145229e-05, "loss": 0.4935, "step": 17532 }, { "epoch": 14.550207468879668, "grad_norm": 73.38423919677734, "learning_rate": 1.4183900414937761e-05, "loss": 1.015, "step": 17533 }, { "epoch": 14.55103734439834, "grad_norm": 50.09967041015625, "learning_rate": 1.4183568464730291e-05, "loss": 0.6045, "step": 17534 }, { "epoch": 14.551867219917012, "grad_norm": 25.47850227355957, "learning_rate": 1.4183236514522822e-05, "loss": 0.54, "step": 17535 }, { "epoch": 14.552697095435684, "grad_norm": 55.46153259277344, "learning_rate": 1.4182904564315354e-05, "loss": 1.1183, "step": 17536 }, { "epoch": 14.553526970954357, "grad_norm": 13.92392349243164, "learning_rate": 1.4182572614107886e-05, "loss": 0.3706, "step": 17537 }, { "epoch": 14.554356846473029, "grad_norm": 43.86358642578125, "learning_rate": 1.4182240663900415e-05, "loss": 0.3804, "step": 17538 }, { "epoch": 14.555186721991701, "grad_norm": 56.4968147277832, "learning_rate": 1.4181908713692947e-05, "loss": 0.8281, "step": 17539 }, { "epoch": 14.556016597510373, "grad_norm": 28.622852325439453, "learning_rate": 1.4181576763485479e-05, "loss": 0.7664, "step": 17540 }, { "epoch": 14.556846473029045, "grad_norm": 23.53169822692871, "learning_rate": 1.418124481327801e-05, "loss": 0.5635, "step": 17541 }, { "epoch": 14.557676348547718, "grad_norm": 69.5911636352539, "learning_rate": 1.418091286307054e-05, "loss": 0.5371, "step": 17542 }, { "epoch": 14.55850622406639, "grad_norm": 38.03599548339844, "learning_rate": 1.4180580912863072e-05, "loss": 0.8985, "step": 17543 }, { "epoch": 14.559336099585062, "grad_norm": 47.32300567626953, "learning_rate": 1.4180248962655602e-05, "loss": 1.0204, "step": 17544 }, { "epoch": 14.560165975103734, "grad_norm": 37.1959342956543, "learning_rate": 1.4179917012448134e-05, "loss": 1.1456, "step": 17545 }, { "epoch": 14.560995850622406, "grad_norm": 53.501678466796875, "learning_rate": 1.4179585062240667e-05, "loss": 0.5839, "step": 17546 }, { "epoch": 14.561825726141079, "grad_norm": 51.21486282348633, "learning_rate": 1.4179253112033195e-05, "loss": 0.5575, "step": 17547 }, { "epoch": 14.56265560165975, "grad_norm": 28.11212921142578, "learning_rate": 1.4178921161825727e-05, "loss": 0.7237, "step": 17548 }, { "epoch": 14.563485477178423, "grad_norm": 50.20412826538086, "learning_rate": 1.417858921161826e-05, "loss": 0.5832, "step": 17549 }, { "epoch": 14.564315352697095, "grad_norm": 78.2357406616211, "learning_rate": 1.417825726141079e-05, "loss": 0.851, "step": 17550 }, { "epoch": 14.565145228215767, "grad_norm": 37.89143753051758, "learning_rate": 1.417792531120332e-05, "loss": 0.4683, "step": 17551 }, { "epoch": 14.56597510373444, "grad_norm": 46.71353530883789, "learning_rate": 1.417759336099585e-05, "loss": 0.8076, "step": 17552 }, { "epoch": 14.566804979253112, "grad_norm": 50.11685562133789, "learning_rate": 1.4177261410788383e-05, "loss": 0.5083, "step": 17553 }, { "epoch": 14.567634854771784, "grad_norm": 24.923519134521484, "learning_rate": 1.4176929460580915e-05, "loss": 0.6495, "step": 17554 }, { "epoch": 14.568464730290456, "grad_norm": 69.18856048583984, "learning_rate": 1.4176597510373444e-05, "loss": 1.3551, "step": 17555 }, { "epoch": 14.569294605809128, "grad_norm": 53.93576431274414, "learning_rate": 1.4176265560165976e-05, "loss": 0.6124, "step": 17556 }, { "epoch": 14.5701244813278, "grad_norm": 22.55959129333496, "learning_rate": 1.4175933609958508e-05, "loss": 0.551, "step": 17557 }, { "epoch": 14.570954356846473, "grad_norm": 24.57284164428711, "learning_rate": 1.417560165975104e-05, "loss": 0.5111, "step": 17558 }, { "epoch": 14.571784232365145, "grad_norm": 27.382850646972656, "learning_rate": 1.4175269709543569e-05, "loss": 0.7518, "step": 17559 }, { "epoch": 14.572614107883817, "grad_norm": 35.96708297729492, "learning_rate": 1.4174937759336101e-05, "loss": 0.4662, "step": 17560 }, { "epoch": 14.57344398340249, "grad_norm": 49.90659713745117, "learning_rate": 1.4174605809128631e-05, "loss": 0.9181, "step": 17561 }, { "epoch": 14.574273858921162, "grad_norm": 37.4671745300293, "learning_rate": 1.4174273858921163e-05, "loss": 0.5405, "step": 17562 }, { "epoch": 14.575103734439834, "grad_norm": 39.83641052246094, "learning_rate": 1.4173941908713694e-05, "loss": 0.8262, "step": 17563 }, { "epoch": 14.575933609958506, "grad_norm": 69.04744720458984, "learning_rate": 1.4173609958506224e-05, "loss": 1.2608, "step": 17564 }, { "epoch": 14.576763485477178, "grad_norm": 48.377769470214844, "learning_rate": 1.4173278008298756e-05, "loss": 0.6433, "step": 17565 }, { "epoch": 14.57759336099585, "grad_norm": 49.14692306518555, "learning_rate": 1.4172946058091288e-05, "loss": 0.9365, "step": 17566 }, { "epoch": 14.578423236514523, "grad_norm": 54.611236572265625, "learning_rate": 1.4172614107883817e-05, "loss": 0.6894, "step": 17567 }, { "epoch": 14.579253112033195, "grad_norm": 39.89440155029297, "learning_rate": 1.417228215767635e-05, "loss": 0.8012, "step": 17568 }, { "epoch": 14.580082987551867, "grad_norm": 57.025936126708984, "learning_rate": 1.4171950207468881e-05, "loss": 0.565, "step": 17569 }, { "epoch": 14.58091286307054, "grad_norm": 30.915197372436523, "learning_rate": 1.4171618257261412e-05, "loss": 0.7411, "step": 17570 }, { "epoch": 14.581742738589211, "grad_norm": 58.30996322631836, "learning_rate": 1.4171286307053944e-05, "loss": 0.6768, "step": 17571 }, { "epoch": 14.582572614107884, "grad_norm": 42.511417388916016, "learning_rate": 1.4170954356846474e-05, "loss": 0.7367, "step": 17572 }, { "epoch": 14.583402489626556, "grad_norm": 41.60010528564453, "learning_rate": 1.4170622406639005e-05, "loss": 0.4454, "step": 17573 }, { "epoch": 14.584232365145228, "grad_norm": 36.83684158325195, "learning_rate": 1.4170290456431537e-05, "loss": 0.9729, "step": 17574 }, { "epoch": 14.5850622406639, "grad_norm": 43.76133346557617, "learning_rate": 1.4169958506224069e-05, "loss": 1.2012, "step": 17575 }, { "epoch": 14.585892116182572, "grad_norm": 32.77153778076172, "learning_rate": 1.4169626556016598e-05, "loss": 1.2696, "step": 17576 }, { "epoch": 14.586721991701245, "grad_norm": 39.018367767333984, "learning_rate": 1.416929460580913e-05, "loss": 0.8944, "step": 17577 }, { "epoch": 14.587551867219917, "grad_norm": 35.55006790161133, "learning_rate": 1.4168962655601662e-05, "loss": 0.5813, "step": 17578 }, { "epoch": 14.588381742738589, "grad_norm": 37.34959030151367, "learning_rate": 1.4168630705394192e-05, "loss": 0.7044, "step": 17579 }, { "epoch": 14.589211618257261, "grad_norm": 32.43830871582031, "learning_rate": 1.4168298755186723e-05, "loss": 0.7461, "step": 17580 }, { "epoch": 14.590041493775933, "grad_norm": 23.549217224121094, "learning_rate": 1.4167966804979255e-05, "loss": 0.4648, "step": 17581 }, { "epoch": 14.590871369294605, "grad_norm": 102.63323211669922, "learning_rate": 1.4167634854771785e-05, "loss": 0.8751, "step": 17582 }, { "epoch": 14.591701244813278, "grad_norm": 34.726478576660156, "learning_rate": 1.4167302904564317e-05, "loss": 1.3139, "step": 17583 }, { "epoch": 14.59253112033195, "grad_norm": 51.85805130004883, "learning_rate": 1.4166970954356846e-05, "loss": 1.0653, "step": 17584 }, { "epoch": 14.593360995850622, "grad_norm": 17.56317710876465, "learning_rate": 1.4166639004149378e-05, "loss": 0.3959, "step": 17585 }, { "epoch": 14.594190871369294, "grad_norm": 30.41834831237793, "learning_rate": 1.416630705394191e-05, "loss": 0.578, "step": 17586 }, { "epoch": 14.595020746887966, "grad_norm": 33.64430236816406, "learning_rate": 1.4165975103734442e-05, "loss": 0.4462, "step": 17587 }, { "epoch": 14.595850622406639, "grad_norm": 93.51565551757812, "learning_rate": 1.4165643153526971e-05, "loss": 1.0123, "step": 17588 }, { "epoch": 14.59668049792531, "grad_norm": 52.488609313964844, "learning_rate": 1.4165311203319503e-05, "loss": 0.8502, "step": 17589 }, { "epoch": 14.597510373443983, "grad_norm": 33.914344787597656, "learning_rate": 1.4164979253112035e-05, "loss": 0.6279, "step": 17590 }, { "epoch": 14.598340248962655, "grad_norm": 70.77877807617188, "learning_rate": 1.4164647302904566e-05, "loss": 0.9881, "step": 17591 }, { "epoch": 14.599170124481327, "grad_norm": 58.06865692138672, "learning_rate": 1.4164315352697096e-05, "loss": 1.0964, "step": 17592 }, { "epoch": 14.6, "grad_norm": 22.061742782592773, "learning_rate": 1.4163983402489627e-05, "loss": 0.4271, "step": 17593 }, { "epoch": 14.600829875518672, "grad_norm": 29.16151237487793, "learning_rate": 1.4163651452282159e-05, "loss": 0.4927, "step": 17594 }, { "epoch": 14.601659751037344, "grad_norm": 63.98958206176758, "learning_rate": 1.416331950207469e-05, "loss": 1.1252, "step": 17595 }, { "epoch": 14.602489626556016, "grad_norm": 48.9224967956543, "learning_rate": 1.4162987551867223e-05, "loss": 0.8632, "step": 17596 }, { "epoch": 14.603319502074688, "grad_norm": 22.972484588623047, "learning_rate": 1.4162655601659752e-05, "loss": 0.2546, "step": 17597 }, { "epoch": 14.60414937759336, "grad_norm": 26.312528610229492, "learning_rate": 1.4162323651452284e-05, "loss": 0.7364, "step": 17598 }, { "epoch": 14.604979253112033, "grad_norm": 66.94125366210938, "learning_rate": 1.4161991701244814e-05, "loss": 0.7672, "step": 17599 }, { "epoch": 14.605809128630705, "grad_norm": 65.32363891601562, "learning_rate": 1.4161659751037346e-05, "loss": 0.6939, "step": 17600 }, { "epoch": 14.606639004149377, "grad_norm": 38.89180374145508, "learning_rate": 1.4161327800829877e-05, "loss": 1.1649, "step": 17601 }, { "epoch": 14.60746887966805, "grad_norm": 93.82952117919922, "learning_rate": 1.4160995850622407e-05, "loss": 1.7216, "step": 17602 }, { "epoch": 14.608298755186722, "grad_norm": 16.604019165039062, "learning_rate": 1.4160663900414939e-05, "loss": 0.5107, "step": 17603 }, { "epoch": 14.609128630705394, "grad_norm": 33.42551040649414, "learning_rate": 1.4160331950207471e-05, "loss": 0.8646, "step": 17604 }, { "epoch": 14.609958506224066, "grad_norm": 48.8902473449707, "learning_rate": 1.416e-05, "loss": 1.1814, "step": 17605 }, { "epoch": 14.610788381742738, "grad_norm": 22.567665100097656, "learning_rate": 1.4159668049792532e-05, "loss": 0.5303, "step": 17606 }, { "epoch": 14.61161825726141, "grad_norm": 50.4097900390625, "learning_rate": 1.4159336099585064e-05, "loss": 0.9448, "step": 17607 }, { "epoch": 14.612448132780083, "grad_norm": 46.599308013916016, "learning_rate": 1.4159004149377595e-05, "loss": 1.7287, "step": 17608 }, { "epoch": 14.613278008298755, "grad_norm": 58.724365234375, "learning_rate": 1.4158672199170125e-05, "loss": 0.6442, "step": 17609 }, { "epoch": 14.614107883817427, "grad_norm": 47.848697662353516, "learning_rate": 1.4158340248962657e-05, "loss": 1.3106, "step": 17610 }, { "epoch": 14.6149377593361, "grad_norm": 90.77247619628906, "learning_rate": 1.4158008298755188e-05, "loss": 0.846, "step": 17611 }, { "epoch": 14.615767634854771, "grad_norm": 42.475040435791016, "learning_rate": 1.415767634854772e-05, "loss": 1.1617, "step": 17612 }, { "epoch": 14.616597510373444, "grad_norm": 39.09463882446289, "learning_rate": 1.415734439834025e-05, "loss": 1.4102, "step": 17613 }, { "epoch": 14.617427385892116, "grad_norm": 31.625123977661133, "learning_rate": 1.415701244813278e-05, "loss": 0.6267, "step": 17614 }, { "epoch": 14.618257261410788, "grad_norm": 20.304487228393555, "learning_rate": 1.4156680497925313e-05, "loss": 0.3256, "step": 17615 }, { "epoch": 14.61908713692946, "grad_norm": 51.06381607055664, "learning_rate": 1.4156348547717845e-05, "loss": 1.3743, "step": 17616 }, { "epoch": 14.619917012448132, "grad_norm": 57.90556335449219, "learning_rate": 1.4156016597510373e-05, "loss": 0.7901, "step": 17617 }, { "epoch": 14.620746887966805, "grad_norm": 18.388517379760742, "learning_rate": 1.4155684647302905e-05, "loss": 0.3206, "step": 17618 }, { "epoch": 14.621576763485477, "grad_norm": 31.1912784576416, "learning_rate": 1.4155352697095438e-05, "loss": 0.5505, "step": 17619 }, { "epoch": 14.622406639004149, "grad_norm": 16.21700668334961, "learning_rate": 1.4155020746887968e-05, "loss": 0.5048, "step": 17620 }, { "epoch": 14.623236514522821, "grad_norm": 47.78147506713867, "learning_rate": 1.4154688796680498e-05, "loss": 0.8105, "step": 17621 }, { "epoch": 14.624066390041493, "grad_norm": 72.50377655029297, "learning_rate": 1.4154356846473029e-05, "loss": 0.8318, "step": 17622 }, { "epoch": 14.624896265560166, "grad_norm": 27.050167083740234, "learning_rate": 1.4154024896265561e-05, "loss": 0.8046, "step": 17623 }, { "epoch": 14.625726141078838, "grad_norm": 29.388120651245117, "learning_rate": 1.4153692946058093e-05, "loss": 0.8741, "step": 17624 }, { "epoch": 14.62655601659751, "grad_norm": 34.74603271484375, "learning_rate": 1.4153360995850625e-05, "loss": 0.7339, "step": 17625 }, { "epoch": 14.627385892116182, "grad_norm": 67.47248077392578, "learning_rate": 1.4153029045643154e-05, "loss": 0.6849, "step": 17626 }, { "epoch": 14.628215767634854, "grad_norm": 32.48389434814453, "learning_rate": 1.4152697095435686e-05, "loss": 0.7441, "step": 17627 }, { "epoch": 14.629045643153527, "grad_norm": 26.059654235839844, "learning_rate": 1.4152365145228218e-05, "loss": 0.6888, "step": 17628 }, { "epoch": 14.629875518672199, "grad_norm": 17.888261795043945, "learning_rate": 1.4152033195020749e-05, "loss": 0.4754, "step": 17629 }, { "epoch": 14.630705394190871, "grad_norm": 22.767597198486328, "learning_rate": 1.4151701244813279e-05, "loss": 0.8347, "step": 17630 }, { "epoch": 14.631535269709543, "grad_norm": 43.14255905151367, "learning_rate": 1.415136929460581e-05, "loss": 0.6741, "step": 17631 }, { "epoch": 14.632365145228215, "grad_norm": 96.33139038085938, "learning_rate": 1.4151037344398341e-05, "loss": 0.8217, "step": 17632 }, { "epoch": 14.633195020746887, "grad_norm": 21.95212745666504, "learning_rate": 1.4150705394190874e-05, "loss": 0.4429, "step": 17633 }, { "epoch": 14.63402489626556, "grad_norm": 41.4429817199707, "learning_rate": 1.4150373443983402e-05, "loss": 1.0547, "step": 17634 }, { "epoch": 14.634854771784232, "grad_norm": 33.07980728149414, "learning_rate": 1.4150041493775934e-05, "loss": 0.6629, "step": 17635 }, { "epoch": 14.635684647302904, "grad_norm": 49.5890007019043, "learning_rate": 1.4149709543568466e-05, "loss": 0.6626, "step": 17636 }, { "epoch": 14.636514522821576, "grad_norm": 37.431915283203125, "learning_rate": 1.4149377593360999e-05, "loss": 1.0162, "step": 17637 }, { "epoch": 14.637344398340248, "grad_norm": 35.82333755493164, "learning_rate": 1.4149045643153527e-05, "loss": 1.0036, "step": 17638 }, { "epoch": 14.63817427385892, "grad_norm": 34.803070068359375, "learning_rate": 1.414871369294606e-05, "loss": 0.4049, "step": 17639 }, { "epoch": 14.639004149377593, "grad_norm": 52.51633834838867, "learning_rate": 1.414838174273859e-05, "loss": 0.6266, "step": 17640 }, { "epoch": 14.639834024896265, "grad_norm": 35.04158020019531, "learning_rate": 1.4148049792531122e-05, "loss": 0.6668, "step": 17641 }, { "epoch": 14.640663900414937, "grad_norm": 24.02688217163086, "learning_rate": 1.4147717842323652e-05, "loss": 0.8944, "step": 17642 }, { "epoch": 14.64149377593361, "grad_norm": 21.18486976623535, "learning_rate": 1.4147385892116183e-05, "loss": 0.5622, "step": 17643 }, { "epoch": 14.642323651452282, "grad_norm": 46.94489669799805, "learning_rate": 1.4147053941908715e-05, "loss": 1.0368, "step": 17644 }, { "epoch": 14.643153526970954, "grad_norm": 35.207557678222656, "learning_rate": 1.4146721991701247e-05, "loss": 0.4287, "step": 17645 }, { "epoch": 14.643983402489626, "grad_norm": 32.55474090576172, "learning_rate": 1.4146390041493776e-05, "loss": 0.7428, "step": 17646 }, { "epoch": 14.644813278008298, "grad_norm": 79.77755737304688, "learning_rate": 1.4146058091286308e-05, "loss": 0.9308, "step": 17647 }, { "epoch": 14.64564315352697, "grad_norm": 21.419309616088867, "learning_rate": 1.414572614107884e-05, "loss": 0.5643, "step": 17648 }, { "epoch": 14.646473029045643, "grad_norm": 29.371843338012695, "learning_rate": 1.414539419087137e-05, "loss": 0.8723, "step": 17649 }, { "epoch": 14.647302904564315, "grad_norm": 33.6983642578125, "learning_rate": 1.4145062240663902e-05, "loss": 0.7124, "step": 17650 }, { "epoch": 14.648132780082987, "grad_norm": 29.839250564575195, "learning_rate": 1.4144730290456433e-05, "loss": 0.7246, "step": 17651 }, { "epoch": 14.64896265560166, "grad_norm": 24.505626678466797, "learning_rate": 1.4144398340248963e-05, "loss": 0.5992, "step": 17652 }, { "epoch": 14.649792531120331, "grad_norm": 54.97067642211914, "learning_rate": 1.4144066390041495e-05, "loss": 0.7509, "step": 17653 }, { "epoch": 14.650622406639004, "grad_norm": 36.19294738769531, "learning_rate": 1.4143734439834027e-05, "loss": 1.1541, "step": 17654 }, { "epoch": 14.651452282157676, "grad_norm": 43.337738037109375, "learning_rate": 1.4143402489626556e-05, "loss": 0.8845, "step": 17655 }, { "epoch": 14.652282157676348, "grad_norm": 35.23871994018555, "learning_rate": 1.4143070539419088e-05, "loss": 0.4398, "step": 17656 }, { "epoch": 14.65311203319502, "grad_norm": 42.03295135498047, "learning_rate": 1.414273858921162e-05, "loss": 0.9687, "step": 17657 }, { "epoch": 14.653941908713692, "grad_norm": 67.55241394042969, "learning_rate": 1.414240663900415e-05, "loss": 0.6404, "step": 17658 }, { "epoch": 14.654771784232365, "grad_norm": 61.69319152832031, "learning_rate": 1.4142074688796681e-05, "loss": 0.7047, "step": 17659 }, { "epoch": 14.655601659751037, "grad_norm": 32.6443977355957, "learning_rate": 1.4141742738589213e-05, "loss": 0.5049, "step": 17660 }, { "epoch": 14.656431535269709, "grad_norm": 23.302064895629883, "learning_rate": 1.4141410788381744e-05, "loss": 0.6063, "step": 17661 }, { "epoch": 14.657261410788381, "grad_norm": 26.088661193847656, "learning_rate": 1.4141078838174276e-05, "loss": 0.9271, "step": 17662 }, { "epoch": 14.658091286307053, "grad_norm": 34.507938385009766, "learning_rate": 1.4140746887966805e-05, "loss": 0.9898, "step": 17663 }, { "epoch": 14.658921161825726, "grad_norm": 94.0716781616211, "learning_rate": 1.4140414937759337e-05, "loss": 0.9582, "step": 17664 }, { "epoch": 14.659751037344398, "grad_norm": 28.83285903930664, "learning_rate": 1.4140082987551869e-05, "loss": 0.7432, "step": 17665 }, { "epoch": 14.66058091286307, "grad_norm": 40.06218338012695, "learning_rate": 1.4139751037344401e-05, "loss": 0.6427, "step": 17666 }, { "epoch": 14.661410788381742, "grad_norm": 24.05020523071289, "learning_rate": 1.413941908713693e-05, "loss": 0.5495, "step": 17667 }, { "epoch": 14.662240663900414, "grad_norm": 17.319683074951172, "learning_rate": 1.4139087136929462e-05, "loss": 0.5059, "step": 17668 }, { "epoch": 14.663070539419087, "grad_norm": 46.950721740722656, "learning_rate": 1.4138755186721992e-05, "loss": 0.7222, "step": 17669 }, { "epoch": 14.663900414937759, "grad_norm": 89.6106948852539, "learning_rate": 1.4138423236514524e-05, "loss": 1.5628, "step": 17670 }, { "epoch": 14.664730290456431, "grad_norm": 38.947425842285156, "learning_rate": 1.4138091286307055e-05, "loss": 0.8374, "step": 17671 }, { "epoch": 14.665560165975103, "grad_norm": 52.45377731323242, "learning_rate": 1.4137759336099585e-05, "loss": 0.6289, "step": 17672 }, { "epoch": 14.666390041493775, "grad_norm": 33.05103302001953, "learning_rate": 1.4137427385892117e-05, "loss": 0.6844, "step": 17673 }, { "epoch": 14.667219917012448, "grad_norm": 44.093502044677734, "learning_rate": 1.413709543568465e-05, "loss": 0.7401, "step": 17674 }, { "epoch": 14.66804979253112, "grad_norm": 23.88553810119629, "learning_rate": 1.4136763485477178e-05, "loss": 0.6389, "step": 17675 }, { "epoch": 14.668879668049792, "grad_norm": 117.75006103515625, "learning_rate": 1.413643153526971e-05, "loss": 1.2609, "step": 17676 }, { "epoch": 14.669709543568464, "grad_norm": 49.612892150878906, "learning_rate": 1.4136099585062242e-05, "loss": 0.9688, "step": 17677 }, { "epoch": 14.670539419087136, "grad_norm": 50.83950424194336, "learning_rate": 1.4135767634854773e-05, "loss": 1.3171, "step": 17678 }, { "epoch": 14.671369294605809, "grad_norm": 43.451881408691406, "learning_rate": 1.4135435684647305e-05, "loss": 0.8253, "step": 17679 }, { "epoch": 14.67219917012448, "grad_norm": 35.95194625854492, "learning_rate": 1.4135103734439835e-05, "loss": 0.7922, "step": 17680 }, { "epoch": 14.673029045643153, "grad_norm": 52.07651138305664, "learning_rate": 1.4134771784232366e-05, "loss": 1.0845, "step": 17681 }, { "epoch": 14.673858921161825, "grad_norm": 35.24103927612305, "learning_rate": 1.4134439834024898e-05, "loss": 0.959, "step": 17682 }, { "epoch": 14.674688796680497, "grad_norm": 70.25628662109375, "learning_rate": 1.413410788381743e-05, "loss": 0.7372, "step": 17683 }, { "epoch": 14.67551867219917, "grad_norm": 47.56557846069336, "learning_rate": 1.4133775933609959e-05, "loss": 0.853, "step": 17684 }, { "epoch": 14.676348547717842, "grad_norm": 12.68368911743164, "learning_rate": 1.413344398340249e-05, "loss": 0.3111, "step": 17685 }, { "epoch": 14.677178423236514, "grad_norm": 13.880681037902832, "learning_rate": 1.4133112033195023e-05, "loss": 0.3665, "step": 17686 }, { "epoch": 14.678008298755186, "grad_norm": 20.87657928466797, "learning_rate": 1.4132780082987553e-05, "loss": 0.5895, "step": 17687 }, { "epoch": 14.678838174273858, "grad_norm": 31.458145141601562, "learning_rate": 1.4132448132780084e-05, "loss": 0.7685, "step": 17688 }, { "epoch": 14.67966804979253, "grad_norm": 43.86437225341797, "learning_rate": 1.4132116182572616e-05, "loss": 1.0347, "step": 17689 }, { "epoch": 14.680497925311203, "grad_norm": 23.39232635498047, "learning_rate": 1.4131784232365146e-05, "loss": 0.6214, "step": 17690 }, { "epoch": 14.681327800829875, "grad_norm": 32.4015007019043, "learning_rate": 1.4131452282157678e-05, "loss": 0.4927, "step": 17691 }, { "epoch": 14.682157676348547, "grad_norm": 54.09208679199219, "learning_rate": 1.4131120331950207e-05, "loss": 0.9053, "step": 17692 }, { "epoch": 14.68298755186722, "grad_norm": 48.29694366455078, "learning_rate": 1.4130788381742739e-05, "loss": 1.0754, "step": 17693 }, { "epoch": 14.683817427385891, "grad_norm": 30.0028018951416, "learning_rate": 1.4130456431535271e-05, "loss": 0.8587, "step": 17694 }, { "epoch": 14.684647302904564, "grad_norm": 54.86110305786133, "learning_rate": 1.4130124481327803e-05, "loss": 0.8089, "step": 17695 }, { "epoch": 14.685477178423236, "grad_norm": 32.7430305480957, "learning_rate": 1.4129792531120332e-05, "loss": 0.3861, "step": 17696 }, { "epoch": 14.686307053941908, "grad_norm": 25.10338020324707, "learning_rate": 1.4129460580912864e-05, "loss": 0.8373, "step": 17697 }, { "epoch": 14.68713692946058, "grad_norm": 23.460668563842773, "learning_rate": 1.4129128630705396e-05, "loss": 0.5873, "step": 17698 }, { "epoch": 14.687966804979252, "grad_norm": 24.897579193115234, "learning_rate": 1.4128796680497927e-05, "loss": 0.5526, "step": 17699 }, { "epoch": 14.688796680497925, "grad_norm": 46.28727722167969, "learning_rate": 1.4128464730290457e-05, "loss": 1.6371, "step": 17700 }, { "epoch": 14.689626556016597, "grad_norm": 22.96822738647461, "learning_rate": 1.4128132780082987e-05, "loss": 0.4421, "step": 17701 }, { "epoch": 14.690456431535269, "grad_norm": 51.92457580566406, "learning_rate": 1.412780082987552e-05, "loss": 1.2051, "step": 17702 }, { "epoch": 14.691286307053941, "grad_norm": 58.33155822753906, "learning_rate": 1.4127468879668052e-05, "loss": 0.9364, "step": 17703 }, { "epoch": 14.692116182572613, "grad_norm": 34.8642463684082, "learning_rate": 1.4127136929460584e-05, "loss": 0.8702, "step": 17704 }, { "epoch": 14.692946058091286, "grad_norm": 44.13131332397461, "learning_rate": 1.4126804979253112e-05, "loss": 0.5888, "step": 17705 }, { "epoch": 14.693775933609958, "grad_norm": 58.52980422973633, "learning_rate": 1.4126473029045645e-05, "loss": 0.518, "step": 17706 }, { "epoch": 14.69460580912863, "grad_norm": 31.523361206054688, "learning_rate": 1.4126141078838177e-05, "loss": 1.3341, "step": 17707 }, { "epoch": 14.695435684647302, "grad_norm": 33.24656295776367, "learning_rate": 1.4125809128630707e-05, "loss": 0.6676, "step": 17708 }, { "epoch": 14.696265560165974, "grad_norm": 19.94529151916504, "learning_rate": 1.4125477178423238e-05, "loss": 0.396, "step": 17709 }, { "epoch": 14.697095435684647, "grad_norm": 36.87724304199219, "learning_rate": 1.4125145228215768e-05, "loss": 1.0001, "step": 17710 }, { "epoch": 14.697925311203319, "grad_norm": 23.72918701171875, "learning_rate": 1.41248132780083e-05, "loss": 0.9858, "step": 17711 }, { "epoch": 14.698755186721991, "grad_norm": 42.07859420776367, "learning_rate": 1.4124481327800832e-05, "loss": 0.5525, "step": 17712 }, { "epoch": 14.699585062240663, "grad_norm": 20.044601440429688, "learning_rate": 1.4124149377593361e-05, "loss": 0.7107, "step": 17713 }, { "epoch": 14.700414937759335, "grad_norm": 40.36675262451172, "learning_rate": 1.4123817427385893e-05, "loss": 1.0699, "step": 17714 }, { "epoch": 14.701244813278008, "grad_norm": 47.78955078125, "learning_rate": 1.4123485477178425e-05, "loss": 0.899, "step": 17715 }, { "epoch": 14.70207468879668, "grad_norm": 47.241844177246094, "learning_rate": 1.4123153526970955e-05, "loss": 0.9319, "step": 17716 }, { "epoch": 14.702904564315352, "grad_norm": 51.87575912475586, "learning_rate": 1.4122821576763486e-05, "loss": 0.591, "step": 17717 }, { "epoch": 14.703734439834024, "grad_norm": 45.77127456665039, "learning_rate": 1.4122489626556018e-05, "loss": 0.7949, "step": 17718 }, { "epoch": 14.704564315352696, "grad_norm": 23.69058609008789, "learning_rate": 1.4122157676348548e-05, "loss": 0.4733, "step": 17719 }, { "epoch": 14.705394190871369, "grad_norm": 28.772796630859375, "learning_rate": 1.412182572614108e-05, "loss": 0.788, "step": 17720 }, { "epoch": 14.70622406639004, "grad_norm": 29.19161033630371, "learning_rate": 1.4121493775933611e-05, "loss": 0.9981, "step": 17721 }, { "epoch": 14.707053941908713, "grad_norm": 30.4207820892334, "learning_rate": 1.4121161825726141e-05, "loss": 0.4759, "step": 17722 }, { "epoch": 14.707883817427385, "grad_norm": 45.10277557373047, "learning_rate": 1.4120829875518673e-05, "loss": 1.0529, "step": 17723 }, { "epoch": 14.708713692946057, "grad_norm": 37.82943344116211, "learning_rate": 1.4120497925311206e-05, "loss": 0.7158, "step": 17724 }, { "epoch": 14.70954356846473, "grad_norm": 27.371423721313477, "learning_rate": 1.4120165975103734e-05, "loss": 0.458, "step": 17725 }, { "epoch": 14.710373443983402, "grad_norm": 22.592084884643555, "learning_rate": 1.4119834024896266e-05, "loss": 0.6159, "step": 17726 }, { "epoch": 14.711203319502074, "grad_norm": 29.72715187072754, "learning_rate": 1.4119502074688798e-05, "loss": 0.3798, "step": 17727 }, { "epoch": 14.712033195020746, "grad_norm": 48.423824310302734, "learning_rate": 1.4119170124481329e-05, "loss": 0.8897, "step": 17728 }, { "epoch": 14.712863070539418, "grad_norm": 19.17466926574707, "learning_rate": 1.4118838174273861e-05, "loss": 0.3992, "step": 17729 }, { "epoch": 14.71369294605809, "grad_norm": 13.565740585327148, "learning_rate": 1.4118506224066391e-05, "loss": 0.4123, "step": 17730 }, { "epoch": 14.714522821576763, "grad_norm": 34.285743713378906, "learning_rate": 1.4118174273858922e-05, "loss": 0.6536, "step": 17731 }, { "epoch": 14.715352697095435, "grad_norm": 36.897621154785156, "learning_rate": 1.4117842323651454e-05, "loss": 1.232, "step": 17732 }, { "epoch": 14.716182572614107, "grad_norm": 44.97300720214844, "learning_rate": 1.4117510373443986e-05, "loss": 0.926, "step": 17733 }, { "epoch": 14.71701244813278, "grad_norm": 30.716245651245117, "learning_rate": 1.4117178423236515e-05, "loss": 0.511, "step": 17734 }, { "epoch": 14.717842323651452, "grad_norm": 55.09062957763672, "learning_rate": 1.4116846473029047e-05, "loss": 0.9646, "step": 17735 }, { "epoch": 14.718672199170124, "grad_norm": 29.922584533691406, "learning_rate": 1.4116514522821579e-05, "loss": 0.6984, "step": 17736 }, { "epoch": 14.719502074688796, "grad_norm": 20.887489318847656, "learning_rate": 1.411618257261411e-05, "loss": 0.4944, "step": 17737 }, { "epoch": 14.720331950207468, "grad_norm": 37.339622497558594, "learning_rate": 1.411585062240664e-05, "loss": 1.2607, "step": 17738 }, { "epoch": 14.72116182572614, "grad_norm": 27.8750057220459, "learning_rate": 1.411551867219917e-05, "loss": 0.8759, "step": 17739 }, { "epoch": 14.721991701244812, "grad_norm": 48.16259002685547, "learning_rate": 1.4115186721991702e-05, "loss": 1.0684, "step": 17740 }, { "epoch": 14.722821576763485, "grad_norm": 49.537742614746094, "learning_rate": 1.4114854771784234e-05, "loss": 0.7328, "step": 17741 }, { "epoch": 14.723651452282157, "grad_norm": 20.60191535949707, "learning_rate": 1.4114522821576763e-05, "loss": 0.3931, "step": 17742 }, { "epoch": 14.724481327800829, "grad_norm": 57.38188934326172, "learning_rate": 1.4114190871369295e-05, "loss": 0.592, "step": 17743 }, { "epoch": 14.725311203319501, "grad_norm": 47.54384231567383, "learning_rate": 1.4113858921161827e-05, "loss": 0.8686, "step": 17744 }, { "epoch": 14.726141078838173, "grad_norm": 34.03571701049805, "learning_rate": 1.411352697095436e-05, "loss": 0.7934, "step": 17745 }, { "epoch": 14.726970954356846, "grad_norm": 27.792604446411133, "learning_rate": 1.4113195020746888e-05, "loss": 0.5707, "step": 17746 }, { "epoch": 14.727800829875518, "grad_norm": 63.528873443603516, "learning_rate": 1.411286307053942e-05, "loss": 0.9141, "step": 17747 }, { "epoch": 14.72863070539419, "grad_norm": 38.39903259277344, "learning_rate": 1.411253112033195e-05, "loss": 0.9401, "step": 17748 }, { "epoch": 14.729460580912862, "grad_norm": 50.633968353271484, "learning_rate": 1.4112199170124483e-05, "loss": 0.7492, "step": 17749 }, { "epoch": 14.730290456431534, "grad_norm": 42.96613311767578, "learning_rate": 1.4111867219917013e-05, "loss": 1.1118, "step": 17750 }, { "epoch": 14.731120331950207, "grad_norm": 17.522632598876953, "learning_rate": 1.4111535269709544e-05, "loss": 0.3719, "step": 17751 }, { "epoch": 14.731950207468879, "grad_norm": 32.843406677246094, "learning_rate": 1.4111203319502076e-05, "loss": 1.0642, "step": 17752 }, { "epoch": 14.732780082987551, "grad_norm": 38.821205139160156, "learning_rate": 1.4110871369294608e-05, "loss": 0.7077, "step": 17753 }, { "epoch": 14.733609958506223, "grad_norm": 55.78059005737305, "learning_rate": 1.4110539419087137e-05, "loss": 1.2111, "step": 17754 }, { "epoch": 14.734439834024897, "grad_norm": 26.074071884155273, "learning_rate": 1.4110207468879669e-05, "loss": 0.6963, "step": 17755 }, { "epoch": 14.73526970954357, "grad_norm": 28.22536277770996, "learning_rate": 1.41098755186722e-05, "loss": 0.8339, "step": 17756 }, { "epoch": 14.736099585062242, "grad_norm": 27.344526290893555, "learning_rate": 1.4109543568464731e-05, "loss": 0.3248, "step": 17757 }, { "epoch": 14.736929460580914, "grad_norm": 27.361835479736328, "learning_rate": 1.4109211618257263e-05, "loss": 1.4584, "step": 17758 }, { "epoch": 14.737759336099586, "grad_norm": 51.0424690246582, "learning_rate": 1.4108879668049794e-05, "loss": 1.2575, "step": 17759 }, { "epoch": 14.738589211618258, "grad_norm": 59.826568603515625, "learning_rate": 1.4108547717842324e-05, "loss": 0.7412, "step": 17760 }, { "epoch": 14.73941908713693, "grad_norm": 32.18272399902344, "learning_rate": 1.4108215767634856e-05, "loss": 0.9072, "step": 17761 }, { "epoch": 14.740248962655603, "grad_norm": 72.12503051757812, "learning_rate": 1.4107883817427388e-05, "loss": 0.8491, "step": 17762 }, { "epoch": 14.741078838174275, "grad_norm": 48.56920623779297, "learning_rate": 1.4107551867219917e-05, "loss": 1.1768, "step": 17763 }, { "epoch": 14.741908713692947, "grad_norm": 12.29263687133789, "learning_rate": 1.410721991701245e-05, "loss": 0.3057, "step": 17764 }, { "epoch": 14.74273858921162, "grad_norm": 44.19899368286133, "learning_rate": 1.4106887966804981e-05, "loss": 0.8386, "step": 17765 }, { "epoch": 14.743568464730291, "grad_norm": 37.277099609375, "learning_rate": 1.4106556016597512e-05, "loss": 0.6164, "step": 17766 }, { "epoch": 14.744398340248964, "grad_norm": 21.052642822265625, "learning_rate": 1.4106224066390042e-05, "loss": 0.9447, "step": 17767 }, { "epoch": 14.745228215767636, "grad_norm": 51.748661041259766, "learning_rate": 1.4105892116182574e-05, "loss": 0.6615, "step": 17768 }, { "epoch": 14.746058091286308, "grad_norm": 18.6558895111084, "learning_rate": 1.4105560165975105e-05, "loss": 0.7617, "step": 17769 }, { "epoch": 14.74688796680498, "grad_norm": 25.756866455078125, "learning_rate": 1.4105228215767637e-05, "loss": 0.7565, "step": 17770 }, { "epoch": 14.747717842323652, "grad_norm": 105.84378051757812, "learning_rate": 1.4104896265560165e-05, "loss": 1.5781, "step": 17771 }, { "epoch": 14.748547717842325, "grad_norm": 41.29300308227539, "learning_rate": 1.4104564315352698e-05, "loss": 0.556, "step": 17772 }, { "epoch": 14.749377593360997, "grad_norm": 37.57259750366211, "learning_rate": 1.410423236514523e-05, "loss": 1.1908, "step": 17773 }, { "epoch": 14.750207468879669, "grad_norm": 23.986841201782227, "learning_rate": 1.4103900414937762e-05, "loss": 0.5761, "step": 17774 }, { "epoch": 14.751037344398341, "grad_norm": 15.545878410339355, "learning_rate": 1.410356846473029e-05, "loss": 0.3158, "step": 17775 }, { "epoch": 14.751867219917013, "grad_norm": 31.915624618530273, "learning_rate": 1.4103236514522823e-05, "loss": 1.0537, "step": 17776 }, { "epoch": 14.752697095435686, "grad_norm": 46.78226089477539, "learning_rate": 1.4102904564315355e-05, "loss": 0.9103, "step": 17777 }, { "epoch": 14.753526970954358, "grad_norm": 31.067642211914062, "learning_rate": 1.4102572614107885e-05, "loss": 1.002, "step": 17778 }, { "epoch": 14.75435684647303, "grad_norm": 37.06666946411133, "learning_rate": 1.4102240663900416e-05, "loss": 1.0388, "step": 17779 }, { "epoch": 14.755186721991702, "grad_norm": 54.236690521240234, "learning_rate": 1.4101908713692946e-05, "loss": 0.9983, "step": 17780 }, { "epoch": 14.756016597510374, "grad_norm": 54.01569366455078, "learning_rate": 1.4101576763485478e-05, "loss": 1.0385, "step": 17781 }, { "epoch": 14.756846473029047, "grad_norm": 26.099102020263672, "learning_rate": 1.410124481327801e-05, "loss": 0.6282, "step": 17782 }, { "epoch": 14.757676348547719, "grad_norm": 17.720781326293945, "learning_rate": 1.4100912863070542e-05, "loss": 0.4663, "step": 17783 }, { "epoch": 14.758506224066391, "grad_norm": 32.069366455078125, "learning_rate": 1.4100580912863071e-05, "loss": 0.6907, "step": 17784 }, { "epoch": 14.759336099585063, "grad_norm": 27.201534271240234, "learning_rate": 1.4100248962655603e-05, "loss": 0.5574, "step": 17785 }, { "epoch": 14.760165975103735, "grad_norm": 31.575178146362305, "learning_rate": 1.4099917012448134e-05, "loss": 1.0396, "step": 17786 }, { "epoch": 14.760995850622407, "grad_norm": 28.57803726196289, "learning_rate": 1.4099585062240666e-05, "loss": 0.9166, "step": 17787 }, { "epoch": 14.76182572614108, "grad_norm": 38.311580657958984, "learning_rate": 1.4099253112033196e-05, "loss": 1.0941, "step": 17788 }, { "epoch": 14.762655601659752, "grad_norm": 38.06781768798828, "learning_rate": 1.4098921161825726e-05, "loss": 1.499, "step": 17789 }, { "epoch": 14.763485477178424, "grad_norm": 16.78238868713379, "learning_rate": 1.4098589211618259e-05, "loss": 0.4787, "step": 17790 }, { "epoch": 14.764315352697096, "grad_norm": 30.52137565612793, "learning_rate": 1.409825726141079e-05, "loss": 0.5741, "step": 17791 }, { "epoch": 14.765145228215768, "grad_norm": 35.62935256958008, "learning_rate": 1.409792531120332e-05, "loss": 0.7067, "step": 17792 }, { "epoch": 14.76597510373444, "grad_norm": 37.68198013305664, "learning_rate": 1.4097593360995852e-05, "loss": 0.6483, "step": 17793 }, { "epoch": 14.766804979253113, "grad_norm": 49.57453918457031, "learning_rate": 1.4097261410788384e-05, "loss": 1.1946, "step": 17794 }, { "epoch": 14.767634854771785, "grad_norm": 20.16486167907715, "learning_rate": 1.4096929460580914e-05, "loss": 0.3225, "step": 17795 }, { "epoch": 14.768464730290457, "grad_norm": 35.59760665893555, "learning_rate": 1.4096597510373444e-05, "loss": 0.5011, "step": 17796 }, { "epoch": 14.76929460580913, "grad_norm": 37.476078033447266, "learning_rate": 1.4096265560165977e-05, "loss": 0.5584, "step": 17797 }, { "epoch": 14.770124481327802, "grad_norm": 43.16032791137695, "learning_rate": 1.4095933609958507e-05, "loss": 0.849, "step": 17798 }, { "epoch": 14.770954356846474, "grad_norm": 33.94725799560547, "learning_rate": 1.4095601659751039e-05, "loss": 1.0247, "step": 17799 }, { "epoch": 14.771784232365146, "grad_norm": 39.81755447387695, "learning_rate": 1.4095269709543568e-05, "loss": 0.8359, "step": 17800 }, { "epoch": 14.772614107883818, "grad_norm": 30.3528995513916, "learning_rate": 1.40949377593361e-05, "loss": 1.0393, "step": 17801 }, { "epoch": 14.77344398340249, "grad_norm": 23.508390426635742, "learning_rate": 1.4094605809128632e-05, "loss": 0.3691, "step": 17802 }, { "epoch": 14.774273858921163, "grad_norm": 29.076642990112305, "learning_rate": 1.4094273858921164e-05, "loss": 0.974, "step": 17803 }, { "epoch": 14.775103734439835, "grad_norm": 30.930809020996094, "learning_rate": 1.4093941908713693e-05, "loss": 0.571, "step": 17804 }, { "epoch": 14.775933609958507, "grad_norm": 44.72468566894531, "learning_rate": 1.4093609958506225e-05, "loss": 0.6273, "step": 17805 }, { "epoch": 14.77676348547718, "grad_norm": 40.886634826660156, "learning_rate": 1.4093278008298757e-05, "loss": 0.8459, "step": 17806 }, { "epoch": 14.777593360995851, "grad_norm": 20.531892776489258, "learning_rate": 1.4092946058091287e-05, "loss": 0.6394, "step": 17807 }, { "epoch": 14.778423236514524, "grad_norm": 28.983394622802734, "learning_rate": 1.409261410788382e-05, "loss": 0.8897, "step": 17808 }, { "epoch": 14.779253112033196, "grad_norm": 126.8173599243164, "learning_rate": 1.4092282157676348e-05, "loss": 0.7344, "step": 17809 }, { "epoch": 14.780082987551868, "grad_norm": 20.90137481689453, "learning_rate": 1.409195020746888e-05, "loss": 0.3606, "step": 17810 }, { "epoch": 14.78091286307054, "grad_norm": 33.846717834472656, "learning_rate": 1.4091618257261413e-05, "loss": 0.6503, "step": 17811 }, { "epoch": 14.781742738589212, "grad_norm": 34.36363220214844, "learning_rate": 1.4091286307053945e-05, "loss": 0.5993, "step": 17812 }, { "epoch": 14.782572614107885, "grad_norm": 44.129276275634766, "learning_rate": 1.4090954356846473e-05, "loss": 0.7231, "step": 17813 }, { "epoch": 14.783402489626557, "grad_norm": 37.968589782714844, "learning_rate": 1.4090622406639005e-05, "loss": 0.6515, "step": 17814 }, { "epoch": 14.784232365145229, "grad_norm": 44.791656494140625, "learning_rate": 1.4090290456431538e-05, "loss": 1.2206, "step": 17815 }, { "epoch": 14.785062240663901, "grad_norm": 35.88974380493164, "learning_rate": 1.4089958506224068e-05, "loss": 0.6068, "step": 17816 }, { "epoch": 14.785892116182573, "grad_norm": 112.52531433105469, "learning_rate": 1.4089626556016598e-05, "loss": 1.3142, "step": 17817 }, { "epoch": 14.786721991701246, "grad_norm": 39.624732971191406, "learning_rate": 1.4089294605809129e-05, "loss": 0.7088, "step": 17818 }, { "epoch": 14.787551867219918, "grad_norm": 29.609973907470703, "learning_rate": 1.4088962655601661e-05, "loss": 0.3719, "step": 17819 }, { "epoch": 14.78838174273859, "grad_norm": 29.07513427734375, "learning_rate": 1.4088630705394193e-05, "loss": 0.6646, "step": 17820 }, { "epoch": 14.789211618257262, "grad_norm": 81.80583953857422, "learning_rate": 1.4088298755186722e-05, "loss": 1.3231, "step": 17821 }, { "epoch": 14.790041493775934, "grad_norm": 89.4677734375, "learning_rate": 1.4087966804979254e-05, "loss": 1.1084, "step": 17822 }, { "epoch": 14.790871369294607, "grad_norm": 22.1209659576416, "learning_rate": 1.4087634854771786e-05, "loss": 0.7512, "step": 17823 }, { "epoch": 14.791701244813279, "grad_norm": 54.536563873291016, "learning_rate": 1.4087302904564318e-05, "loss": 1.5773, "step": 17824 }, { "epoch": 14.792531120331951, "grad_norm": 29.284109115600586, "learning_rate": 1.4086970954356847e-05, "loss": 0.44, "step": 17825 }, { "epoch": 14.793360995850623, "grad_norm": 55.07900619506836, "learning_rate": 1.4086639004149379e-05, "loss": 0.9806, "step": 17826 }, { "epoch": 14.794190871369295, "grad_norm": 26.592226028442383, "learning_rate": 1.408630705394191e-05, "loss": 0.6177, "step": 17827 }, { "epoch": 14.795020746887968, "grad_norm": 125.20074462890625, "learning_rate": 1.4085975103734441e-05, "loss": 1.0664, "step": 17828 }, { "epoch": 14.79585062240664, "grad_norm": 67.52567291259766, "learning_rate": 1.4085643153526972e-05, "loss": 1.0264, "step": 17829 }, { "epoch": 14.796680497925312, "grad_norm": 33.097633361816406, "learning_rate": 1.4085311203319502e-05, "loss": 0.8548, "step": 17830 }, { "epoch": 14.797510373443984, "grad_norm": 21.921340942382812, "learning_rate": 1.4084979253112034e-05, "loss": 0.4327, "step": 17831 }, { "epoch": 14.798340248962656, "grad_norm": 49.786380767822266, "learning_rate": 1.4084647302904566e-05, "loss": 1.1024, "step": 17832 }, { "epoch": 14.799170124481329, "grad_norm": 39.71504211425781, "learning_rate": 1.4084315352697095e-05, "loss": 0.632, "step": 17833 }, { "epoch": 14.8, "grad_norm": 43.57304763793945, "learning_rate": 1.4083983402489627e-05, "loss": 1.3913, "step": 17834 }, { "epoch": 14.800829875518673, "grad_norm": 65.62545776367188, "learning_rate": 1.408365145228216e-05, "loss": 0.9047, "step": 17835 }, { "epoch": 14.801659751037345, "grad_norm": 26.21154022216797, "learning_rate": 1.408331950207469e-05, "loss": 0.3627, "step": 17836 }, { "epoch": 14.802489626556017, "grad_norm": 34.86542892456055, "learning_rate": 1.4082987551867222e-05, "loss": 0.7406, "step": 17837 }, { "epoch": 14.80331950207469, "grad_norm": 36.49151611328125, "learning_rate": 1.4082655601659752e-05, "loss": 0.7129, "step": 17838 }, { "epoch": 14.804149377593362, "grad_norm": 36.47821807861328, "learning_rate": 1.4082323651452283e-05, "loss": 0.654, "step": 17839 }, { "epoch": 14.804979253112034, "grad_norm": 43.897071838378906, "learning_rate": 1.4081991701244815e-05, "loss": 1.1108, "step": 17840 }, { "epoch": 14.805809128630706, "grad_norm": 50.00870132446289, "learning_rate": 1.4081659751037347e-05, "loss": 1.3178, "step": 17841 }, { "epoch": 14.806639004149378, "grad_norm": 24.851774215698242, "learning_rate": 1.4081327800829876e-05, "loss": 0.8589, "step": 17842 }, { "epoch": 14.80746887966805, "grad_norm": 36.08539581298828, "learning_rate": 1.4080995850622408e-05, "loss": 0.8464, "step": 17843 }, { "epoch": 14.808298755186723, "grad_norm": 48.23470687866211, "learning_rate": 1.408066390041494e-05, "loss": 0.8395, "step": 17844 }, { "epoch": 14.809128630705395, "grad_norm": 39.16786193847656, "learning_rate": 1.408033195020747e-05, "loss": 0.6602, "step": 17845 }, { "epoch": 14.809958506224067, "grad_norm": 45.36329650878906, "learning_rate": 1.408e-05, "loss": 0.9637, "step": 17846 }, { "epoch": 14.81078838174274, "grad_norm": 25.17336654663086, "learning_rate": 1.4079668049792533e-05, "loss": 0.3633, "step": 17847 }, { "epoch": 14.811618257261411, "grad_norm": 21.4827938079834, "learning_rate": 1.4079336099585063e-05, "loss": 0.3512, "step": 17848 }, { "epoch": 14.812448132780084, "grad_norm": 34.043487548828125, "learning_rate": 1.4079004149377595e-05, "loss": 0.6719, "step": 17849 }, { "epoch": 14.813278008298756, "grad_norm": 43.556068420410156, "learning_rate": 1.4078672199170124e-05, "loss": 0.57, "step": 17850 }, { "epoch": 14.814107883817428, "grad_norm": 29.901018142700195, "learning_rate": 1.4078340248962656e-05, "loss": 1.1861, "step": 17851 }, { "epoch": 14.8149377593361, "grad_norm": 50.83641052246094, "learning_rate": 1.4078008298755188e-05, "loss": 0.8545, "step": 17852 }, { "epoch": 14.815767634854772, "grad_norm": 83.54767608642578, "learning_rate": 1.407767634854772e-05, "loss": 0.9624, "step": 17853 }, { "epoch": 14.816597510373445, "grad_norm": 56.47981262207031, "learning_rate": 1.4077344398340249e-05, "loss": 1.0274, "step": 17854 }, { "epoch": 14.817427385892117, "grad_norm": 18.89324188232422, "learning_rate": 1.4077012448132781e-05, "loss": 0.5786, "step": 17855 }, { "epoch": 14.818257261410789, "grad_norm": 45.719444274902344, "learning_rate": 1.4076680497925312e-05, "loss": 0.4945, "step": 17856 }, { "epoch": 14.819087136929461, "grad_norm": 20.437870025634766, "learning_rate": 1.4076348547717844e-05, "loss": 0.5022, "step": 17857 }, { "epoch": 14.819917012448133, "grad_norm": 25.525663375854492, "learning_rate": 1.4076016597510374e-05, "loss": 0.7905, "step": 17858 }, { "epoch": 14.820746887966806, "grad_norm": 51.370849609375, "learning_rate": 1.4075684647302905e-05, "loss": 1.1139, "step": 17859 }, { "epoch": 14.821576763485478, "grad_norm": 30.596282958984375, "learning_rate": 1.4075352697095437e-05, "loss": 0.9319, "step": 17860 }, { "epoch": 14.82240663900415, "grad_norm": 53.83036804199219, "learning_rate": 1.4075020746887969e-05, "loss": 1.0101, "step": 17861 }, { "epoch": 14.823236514522822, "grad_norm": 21.33890724182129, "learning_rate": 1.4074688796680501e-05, "loss": 0.5, "step": 17862 }, { "epoch": 14.824066390041494, "grad_norm": 16.78356170654297, "learning_rate": 1.407435684647303e-05, "loss": 0.4621, "step": 17863 }, { "epoch": 14.824896265560167, "grad_norm": 63.3901252746582, "learning_rate": 1.4074024896265562e-05, "loss": 1.0449, "step": 17864 }, { "epoch": 14.825726141078839, "grad_norm": 41.60363006591797, "learning_rate": 1.4073692946058092e-05, "loss": 0.8308, "step": 17865 }, { "epoch": 14.826556016597511, "grad_norm": 34.37580108642578, "learning_rate": 1.4073360995850624e-05, "loss": 1.1615, "step": 17866 }, { "epoch": 14.827385892116183, "grad_norm": 21.29909324645996, "learning_rate": 1.4073029045643155e-05, "loss": 0.6764, "step": 17867 }, { "epoch": 14.828215767634855, "grad_norm": 70.62158203125, "learning_rate": 1.4072697095435685e-05, "loss": 0.8376, "step": 17868 }, { "epoch": 14.829045643153528, "grad_norm": 41.653717041015625, "learning_rate": 1.4072365145228217e-05, "loss": 1.1759, "step": 17869 }, { "epoch": 14.8298755186722, "grad_norm": 49.57920455932617, "learning_rate": 1.407203319502075e-05, "loss": 0.7452, "step": 17870 }, { "epoch": 14.830705394190872, "grad_norm": 29.597332000732422, "learning_rate": 1.4071701244813278e-05, "loss": 0.55, "step": 17871 }, { "epoch": 14.831535269709544, "grad_norm": 89.92679595947266, "learning_rate": 1.407136929460581e-05, "loss": 1.0419, "step": 17872 }, { "epoch": 14.832365145228216, "grad_norm": 23.98638916015625, "learning_rate": 1.4071037344398342e-05, "loss": 0.4202, "step": 17873 }, { "epoch": 14.833195020746889, "grad_norm": 21.752796173095703, "learning_rate": 1.4070705394190873e-05, "loss": 0.4674, "step": 17874 }, { "epoch": 14.83402489626556, "grad_norm": 20.268051147460938, "learning_rate": 1.4070373443983403e-05, "loss": 0.4419, "step": 17875 }, { "epoch": 14.834854771784233, "grad_norm": 32.92224884033203, "learning_rate": 1.4070041493775935e-05, "loss": 0.7486, "step": 17876 }, { "epoch": 14.835684647302905, "grad_norm": 47.49368667602539, "learning_rate": 1.4069709543568466e-05, "loss": 0.5716, "step": 17877 }, { "epoch": 14.836514522821577, "grad_norm": 39.867000579833984, "learning_rate": 1.4069377593360998e-05, "loss": 0.9143, "step": 17878 }, { "epoch": 14.83734439834025, "grad_norm": 74.38481903076172, "learning_rate": 1.4069045643153526e-05, "loss": 1.8107, "step": 17879 }, { "epoch": 14.838174273858922, "grad_norm": 70.92442321777344, "learning_rate": 1.4068713692946058e-05, "loss": 1.0485, "step": 17880 }, { "epoch": 14.839004149377594, "grad_norm": 63.22795486450195, "learning_rate": 1.406838174273859e-05, "loss": 0.6292, "step": 17881 }, { "epoch": 14.839834024896266, "grad_norm": 41.544368743896484, "learning_rate": 1.4068049792531123e-05, "loss": 1.1165, "step": 17882 }, { "epoch": 14.840663900414938, "grad_norm": 27.535987854003906, "learning_rate": 1.4067717842323651e-05, "loss": 0.5008, "step": 17883 }, { "epoch": 14.84149377593361, "grad_norm": 50.08567428588867, "learning_rate": 1.4067385892116184e-05, "loss": 0.5372, "step": 17884 }, { "epoch": 14.842323651452283, "grad_norm": 55.14590072631836, "learning_rate": 1.4067053941908716e-05, "loss": 0.7193, "step": 17885 }, { "epoch": 14.843153526970955, "grad_norm": 78.11067962646484, "learning_rate": 1.4066721991701246e-05, "loss": 1.1409, "step": 17886 }, { "epoch": 14.843983402489627, "grad_norm": 35.26216125488281, "learning_rate": 1.4066390041493778e-05, "loss": 0.4791, "step": 17887 }, { "epoch": 14.8448132780083, "grad_norm": 56.714229583740234, "learning_rate": 1.4066058091286307e-05, "loss": 1.3996, "step": 17888 }, { "epoch": 14.845643153526972, "grad_norm": 43.600921630859375, "learning_rate": 1.4065726141078839e-05, "loss": 0.5068, "step": 17889 }, { "epoch": 14.846473029045644, "grad_norm": 35.911136627197266, "learning_rate": 1.4065394190871371e-05, "loss": 0.7205, "step": 17890 }, { "epoch": 14.847302904564316, "grad_norm": 33.34485626220703, "learning_rate": 1.4065062240663903e-05, "loss": 0.68, "step": 17891 }, { "epoch": 14.848132780082988, "grad_norm": 33.2568244934082, "learning_rate": 1.4064730290456432e-05, "loss": 1.0488, "step": 17892 }, { "epoch": 14.84896265560166, "grad_norm": 34.148353576660156, "learning_rate": 1.4064398340248964e-05, "loss": 0.7044, "step": 17893 }, { "epoch": 14.849792531120332, "grad_norm": 28.983243942260742, "learning_rate": 1.4064066390041496e-05, "loss": 0.8486, "step": 17894 }, { "epoch": 14.850622406639005, "grad_norm": 16.197620391845703, "learning_rate": 1.4063734439834027e-05, "loss": 0.3852, "step": 17895 }, { "epoch": 14.851452282157677, "grad_norm": 86.99488830566406, "learning_rate": 1.4063402489626557e-05, "loss": 1.2177, "step": 17896 }, { "epoch": 14.852282157676349, "grad_norm": 37.82725524902344, "learning_rate": 1.4063070539419087e-05, "loss": 1.4991, "step": 17897 }, { "epoch": 14.853112033195021, "grad_norm": 47.050743103027344, "learning_rate": 1.406273858921162e-05, "loss": 1.2646, "step": 17898 }, { "epoch": 14.853941908713693, "grad_norm": 21.297273635864258, "learning_rate": 1.4062406639004152e-05, "loss": 0.5075, "step": 17899 }, { "epoch": 14.854771784232366, "grad_norm": 15.650785446166992, "learning_rate": 1.406207468879668e-05, "loss": 0.3404, "step": 17900 }, { "epoch": 14.855601659751038, "grad_norm": 34.9842529296875, "learning_rate": 1.4061742738589212e-05, "loss": 0.5808, "step": 17901 }, { "epoch": 14.85643153526971, "grad_norm": 18.376853942871094, "learning_rate": 1.4061410788381745e-05, "loss": 0.4645, "step": 17902 }, { "epoch": 14.857261410788382, "grad_norm": 20.122886657714844, "learning_rate": 1.4061078838174275e-05, "loss": 0.3031, "step": 17903 }, { "epoch": 14.858091286307054, "grad_norm": 29.2403507232666, "learning_rate": 1.4060746887966805e-05, "loss": 1.0086, "step": 17904 }, { "epoch": 14.858921161825727, "grad_norm": 26.853837966918945, "learning_rate": 1.4060414937759337e-05, "loss": 0.4704, "step": 17905 }, { "epoch": 14.859751037344399, "grad_norm": 45.57449722290039, "learning_rate": 1.4060082987551868e-05, "loss": 0.974, "step": 17906 }, { "epoch": 14.860580912863071, "grad_norm": 23.525833129882812, "learning_rate": 1.40597510373444e-05, "loss": 0.4339, "step": 17907 }, { "epoch": 14.861410788381743, "grad_norm": 29.133481979370117, "learning_rate": 1.405941908713693e-05, "loss": 0.4035, "step": 17908 }, { "epoch": 14.862240663900415, "grad_norm": 30.21022605895996, "learning_rate": 1.405908713692946e-05, "loss": 0.9321, "step": 17909 }, { "epoch": 14.863070539419088, "grad_norm": 61.972450256347656, "learning_rate": 1.4058755186721993e-05, "loss": 0.7019, "step": 17910 }, { "epoch": 14.86390041493776, "grad_norm": 72.91999816894531, "learning_rate": 1.4058423236514525e-05, "loss": 0.8317, "step": 17911 }, { "epoch": 14.864730290456432, "grad_norm": 36.52111053466797, "learning_rate": 1.4058091286307054e-05, "loss": 0.7521, "step": 17912 }, { "epoch": 14.865560165975104, "grad_norm": 25.40961456298828, "learning_rate": 1.4057759336099586e-05, "loss": 0.5506, "step": 17913 }, { "epoch": 14.866390041493776, "grad_norm": 19.711803436279297, "learning_rate": 1.4057427385892118e-05, "loss": 0.432, "step": 17914 }, { "epoch": 14.867219917012449, "grad_norm": 11.012909889221191, "learning_rate": 1.4057095435684648e-05, "loss": 0.433, "step": 17915 }, { "epoch": 14.86804979253112, "grad_norm": 120.58209228515625, "learning_rate": 1.405676348547718e-05, "loss": 0.9892, "step": 17916 }, { "epoch": 14.868879668049793, "grad_norm": 64.06027221679688, "learning_rate": 1.405643153526971e-05, "loss": 1.4298, "step": 17917 }, { "epoch": 14.869709543568465, "grad_norm": 34.9401969909668, "learning_rate": 1.4056099585062241e-05, "loss": 0.7291, "step": 17918 }, { "epoch": 14.870539419087137, "grad_norm": 69.22806549072266, "learning_rate": 1.4055767634854773e-05, "loss": 0.9318, "step": 17919 }, { "epoch": 14.87136929460581, "grad_norm": 52.762542724609375, "learning_rate": 1.4055435684647306e-05, "loss": 0.8006, "step": 17920 }, { "epoch": 14.872199170124482, "grad_norm": 55.23587417602539, "learning_rate": 1.4055103734439834e-05, "loss": 1.2396, "step": 17921 }, { "epoch": 14.873029045643154, "grad_norm": 76.5653076171875, "learning_rate": 1.4054771784232366e-05, "loss": 0.9901, "step": 17922 }, { "epoch": 14.873858921161826, "grad_norm": 39.17169952392578, "learning_rate": 1.4054439834024898e-05, "loss": 0.6105, "step": 17923 }, { "epoch": 14.874688796680498, "grad_norm": 62.69223403930664, "learning_rate": 1.4054107883817429e-05, "loss": 0.6684, "step": 17924 }, { "epoch": 14.87551867219917, "grad_norm": 38.907981872558594, "learning_rate": 1.405377593360996e-05, "loss": 0.8537, "step": 17925 }, { "epoch": 14.876348547717843, "grad_norm": 21.938783645629883, "learning_rate": 1.405344398340249e-05, "loss": 0.334, "step": 17926 }, { "epoch": 14.877178423236515, "grad_norm": 53.8626708984375, "learning_rate": 1.4053112033195022e-05, "loss": 0.6223, "step": 17927 }, { "epoch": 14.878008298755187, "grad_norm": 30.97766876220703, "learning_rate": 1.4052780082987554e-05, "loss": 0.6282, "step": 17928 }, { "epoch": 14.87883817427386, "grad_norm": 20.71587371826172, "learning_rate": 1.4052448132780083e-05, "loss": 0.3644, "step": 17929 }, { "epoch": 14.879668049792532, "grad_norm": 40.128623962402344, "learning_rate": 1.4052116182572615e-05, "loss": 1.1176, "step": 17930 }, { "epoch": 14.880497925311204, "grad_norm": 42.202510833740234, "learning_rate": 1.4051784232365147e-05, "loss": 0.6796, "step": 17931 }, { "epoch": 14.881327800829876, "grad_norm": 63.74666213989258, "learning_rate": 1.4051452282157679e-05, "loss": 0.8175, "step": 17932 }, { "epoch": 14.882157676348548, "grad_norm": 51.691707611083984, "learning_rate": 1.4051120331950208e-05, "loss": 0.8622, "step": 17933 }, { "epoch": 14.88298755186722, "grad_norm": 39.38742446899414, "learning_rate": 1.405078838174274e-05, "loss": 0.6946, "step": 17934 }, { "epoch": 14.883817427385893, "grad_norm": 32.06050491333008, "learning_rate": 1.405045643153527e-05, "loss": 0.6972, "step": 17935 }, { "epoch": 14.884647302904565, "grad_norm": 32.586708068847656, "learning_rate": 1.4050124481327802e-05, "loss": 1.0401, "step": 17936 }, { "epoch": 14.885477178423237, "grad_norm": 31.404022216796875, "learning_rate": 1.4049792531120333e-05, "loss": 1.1228, "step": 17937 }, { "epoch": 14.88630705394191, "grad_norm": 28.71963119506836, "learning_rate": 1.4049460580912863e-05, "loss": 0.4487, "step": 17938 }, { "epoch": 14.887136929460581, "grad_norm": 30.0714111328125, "learning_rate": 1.4049128630705395e-05, "loss": 0.7659, "step": 17939 }, { "epoch": 14.887966804979254, "grad_norm": 42.68463134765625, "learning_rate": 1.4048796680497927e-05, "loss": 1.2177, "step": 17940 }, { "epoch": 14.888796680497926, "grad_norm": 26.324434280395508, "learning_rate": 1.404846473029046e-05, "loss": 0.5043, "step": 17941 }, { "epoch": 14.889626556016598, "grad_norm": 26.299694061279297, "learning_rate": 1.4048132780082988e-05, "loss": 0.6591, "step": 17942 }, { "epoch": 14.89045643153527, "grad_norm": 41.040863037109375, "learning_rate": 1.404780082987552e-05, "loss": 0.6163, "step": 17943 }, { "epoch": 14.891286307053942, "grad_norm": 19.499189376831055, "learning_rate": 1.404746887966805e-05, "loss": 0.3729, "step": 17944 }, { "epoch": 14.892116182572614, "grad_norm": 77.3508071899414, "learning_rate": 1.4047136929460583e-05, "loss": 0.9491, "step": 17945 }, { "epoch": 14.892946058091287, "grad_norm": 37.67876434326172, "learning_rate": 1.4046804979253113e-05, "loss": 0.859, "step": 17946 }, { "epoch": 14.893775933609959, "grad_norm": 26.6387939453125, "learning_rate": 1.4046473029045644e-05, "loss": 0.7169, "step": 17947 }, { "epoch": 14.894605809128631, "grad_norm": 23.793781280517578, "learning_rate": 1.4046141078838176e-05, "loss": 0.738, "step": 17948 }, { "epoch": 14.895435684647303, "grad_norm": 25.40498924255371, "learning_rate": 1.4045809128630708e-05, "loss": 0.5833, "step": 17949 }, { "epoch": 14.896265560165975, "grad_norm": 49.66643524169922, "learning_rate": 1.4045477178423237e-05, "loss": 0.4258, "step": 17950 }, { "epoch": 14.897095435684648, "grad_norm": 43.162139892578125, "learning_rate": 1.4045145228215769e-05, "loss": 1.109, "step": 17951 }, { "epoch": 14.89792531120332, "grad_norm": 49.99754333496094, "learning_rate": 1.40448132780083e-05, "loss": 0.8808, "step": 17952 }, { "epoch": 14.898755186721992, "grad_norm": 21.553619384765625, "learning_rate": 1.4044481327800831e-05, "loss": 0.4463, "step": 17953 }, { "epoch": 14.899585062240664, "grad_norm": 104.85626983642578, "learning_rate": 1.4044149377593362e-05, "loss": 2.2698, "step": 17954 }, { "epoch": 14.900414937759336, "grad_norm": 67.35718536376953, "learning_rate": 1.4043817427385894e-05, "loss": 0.9929, "step": 17955 }, { "epoch": 14.901244813278009, "grad_norm": 90.83677673339844, "learning_rate": 1.4043485477178424e-05, "loss": 1.6664, "step": 17956 }, { "epoch": 14.90207468879668, "grad_norm": 41.89274215698242, "learning_rate": 1.4043153526970956e-05, "loss": 0.5323, "step": 17957 }, { "epoch": 14.902904564315353, "grad_norm": 34.10661697387695, "learning_rate": 1.4042821576763485e-05, "loss": 1.0147, "step": 17958 }, { "epoch": 14.903734439834025, "grad_norm": 20.531166076660156, "learning_rate": 1.4042489626556017e-05, "loss": 0.354, "step": 17959 }, { "epoch": 14.904564315352697, "grad_norm": 111.77934265136719, "learning_rate": 1.404215767634855e-05, "loss": 0.782, "step": 17960 }, { "epoch": 14.90539419087137, "grad_norm": 29.369661331176758, "learning_rate": 1.4041825726141081e-05, "loss": 0.8158, "step": 17961 }, { "epoch": 14.906224066390042, "grad_norm": 37.10415267944336, "learning_rate": 1.404149377593361e-05, "loss": 0.6513, "step": 17962 }, { "epoch": 14.907053941908714, "grad_norm": 24.93302345275879, "learning_rate": 1.4041161825726142e-05, "loss": 0.6868, "step": 17963 }, { "epoch": 14.907883817427386, "grad_norm": 28.161264419555664, "learning_rate": 1.4040829875518674e-05, "loss": 0.7843, "step": 17964 }, { "epoch": 14.908713692946058, "grad_norm": 39.88092041015625, "learning_rate": 1.4040497925311205e-05, "loss": 0.6502, "step": 17965 }, { "epoch": 14.90954356846473, "grad_norm": 27.2445011138916, "learning_rate": 1.4040165975103737e-05, "loss": 0.3521, "step": 17966 }, { "epoch": 14.910373443983403, "grad_norm": 30.026525497436523, "learning_rate": 1.4039834024896265e-05, "loss": 0.949, "step": 17967 }, { "epoch": 14.911203319502075, "grad_norm": 61.813629150390625, "learning_rate": 1.4039502074688798e-05, "loss": 1.176, "step": 17968 }, { "epoch": 14.912033195020747, "grad_norm": 47.64974594116211, "learning_rate": 1.403917012448133e-05, "loss": 1.3003, "step": 17969 }, { "epoch": 14.91286307053942, "grad_norm": 35.60593795776367, "learning_rate": 1.4038838174273862e-05, "loss": 1.1933, "step": 17970 }, { "epoch": 14.913692946058092, "grad_norm": 30.5228328704834, "learning_rate": 1.403850622406639e-05, "loss": 0.4694, "step": 17971 }, { "epoch": 14.914522821576764, "grad_norm": 43.35858154296875, "learning_rate": 1.4038174273858923e-05, "loss": 0.556, "step": 17972 }, { "epoch": 14.915352697095436, "grad_norm": 44.89398193359375, "learning_rate": 1.4037842323651453e-05, "loss": 0.5998, "step": 17973 }, { "epoch": 14.916182572614108, "grad_norm": 44.53179931640625, "learning_rate": 1.4037510373443985e-05, "loss": 0.8633, "step": 17974 }, { "epoch": 14.91701244813278, "grad_norm": 43.45869064331055, "learning_rate": 1.4037178423236516e-05, "loss": 1.1425, "step": 17975 }, { "epoch": 14.917842323651453, "grad_norm": 59.879024505615234, "learning_rate": 1.4036846473029046e-05, "loss": 1.0596, "step": 17976 }, { "epoch": 14.918672199170125, "grad_norm": 49.438751220703125, "learning_rate": 1.4036514522821578e-05, "loss": 0.9611, "step": 17977 }, { "epoch": 14.919502074688797, "grad_norm": 40.1866455078125, "learning_rate": 1.403618257261411e-05, "loss": 0.9332, "step": 17978 }, { "epoch": 14.92033195020747, "grad_norm": 23.969480514526367, "learning_rate": 1.4035850622406639e-05, "loss": 0.4716, "step": 17979 }, { "epoch": 14.921161825726141, "grad_norm": 18.715932846069336, "learning_rate": 1.4035518672199171e-05, "loss": 0.5847, "step": 17980 }, { "epoch": 14.921991701244814, "grad_norm": 27.109363555908203, "learning_rate": 1.4035186721991703e-05, "loss": 1.2302, "step": 17981 }, { "epoch": 14.922821576763486, "grad_norm": 35.972633361816406, "learning_rate": 1.4034854771784234e-05, "loss": 0.8423, "step": 17982 }, { "epoch": 14.923651452282158, "grad_norm": 19.229694366455078, "learning_rate": 1.4034522821576764e-05, "loss": 0.4618, "step": 17983 }, { "epoch": 14.92448132780083, "grad_norm": 34.167545318603516, "learning_rate": 1.4034190871369296e-05, "loss": 0.6956, "step": 17984 }, { "epoch": 14.925311203319502, "grad_norm": 31.48992347717285, "learning_rate": 1.4033858921161826e-05, "loss": 0.6003, "step": 17985 }, { "epoch": 14.926141078838175, "grad_norm": 133.5050048828125, "learning_rate": 1.4033526970954359e-05, "loss": 2.007, "step": 17986 }, { "epoch": 14.926970954356847, "grad_norm": 25.677261352539062, "learning_rate": 1.4033195020746887e-05, "loss": 0.7127, "step": 17987 }, { "epoch": 14.927800829875519, "grad_norm": 38.83340072631836, "learning_rate": 1.403286307053942e-05, "loss": 0.7092, "step": 17988 }, { "epoch": 14.928630705394191, "grad_norm": 87.6953125, "learning_rate": 1.4032531120331951e-05, "loss": 0.4457, "step": 17989 }, { "epoch": 14.929460580912863, "grad_norm": 36.648590087890625, "learning_rate": 1.4032199170124484e-05, "loss": 0.9015, "step": 17990 }, { "epoch": 14.930290456431536, "grad_norm": 39.831111907958984, "learning_rate": 1.4031867219917012e-05, "loss": 0.9597, "step": 17991 }, { "epoch": 14.931120331950208, "grad_norm": 41.473777770996094, "learning_rate": 1.4031535269709544e-05, "loss": 1.8382, "step": 17992 }, { "epoch": 14.93195020746888, "grad_norm": 36.2167854309082, "learning_rate": 1.4031203319502077e-05, "loss": 0.4425, "step": 17993 }, { "epoch": 14.932780082987552, "grad_norm": 42.72978591918945, "learning_rate": 1.4030871369294607e-05, "loss": 0.9842, "step": 17994 }, { "epoch": 14.933609958506224, "grad_norm": 58.09613037109375, "learning_rate": 1.4030539419087139e-05, "loss": 1.4045, "step": 17995 }, { "epoch": 14.934439834024896, "grad_norm": 40.5374641418457, "learning_rate": 1.4030207468879668e-05, "loss": 0.9284, "step": 17996 }, { "epoch": 14.935269709543569, "grad_norm": 30.789325714111328, "learning_rate": 1.40298755186722e-05, "loss": 1.0707, "step": 17997 }, { "epoch": 14.936099585062241, "grad_norm": 34.08730697631836, "learning_rate": 1.4029543568464732e-05, "loss": 0.9373, "step": 17998 }, { "epoch": 14.936929460580913, "grad_norm": 41.18758010864258, "learning_rate": 1.4029211618257264e-05, "loss": 0.4216, "step": 17999 }, { "epoch": 14.937759336099585, "grad_norm": 63.337554931640625, "learning_rate": 1.4028879668049793e-05, "loss": 0.4631, "step": 18000 }, { "epoch": 14.938589211618257, "grad_norm": 34.92989730834961, "learning_rate": 1.4028547717842325e-05, "loss": 0.9378, "step": 18001 }, { "epoch": 14.93941908713693, "grad_norm": 33.08452224731445, "learning_rate": 1.4028215767634857e-05, "loss": 0.975, "step": 18002 }, { "epoch": 14.940248962655602, "grad_norm": 28.274051666259766, "learning_rate": 1.4027883817427387e-05, "loss": 0.7123, "step": 18003 }, { "epoch": 14.941078838174274, "grad_norm": 37.84892272949219, "learning_rate": 1.4027551867219918e-05, "loss": 1.0307, "step": 18004 }, { "epoch": 14.941908713692946, "grad_norm": 33.447994232177734, "learning_rate": 1.4027219917012448e-05, "loss": 1.0039, "step": 18005 }, { "epoch": 14.942738589211618, "grad_norm": 36.020469665527344, "learning_rate": 1.402688796680498e-05, "loss": 0.811, "step": 18006 }, { "epoch": 14.94356846473029, "grad_norm": 48.038841247558594, "learning_rate": 1.4026556016597512e-05, "loss": 0.8289, "step": 18007 }, { "epoch": 14.944398340248963, "grad_norm": 59.450313568115234, "learning_rate": 1.4026224066390041e-05, "loss": 0.9346, "step": 18008 }, { "epoch": 14.945228215767635, "grad_norm": 76.85282897949219, "learning_rate": 1.4025892116182573e-05, "loss": 0.8243, "step": 18009 }, { "epoch": 14.946058091286307, "grad_norm": 28.757123947143555, "learning_rate": 1.4025560165975105e-05, "loss": 0.5572, "step": 18010 }, { "epoch": 14.94688796680498, "grad_norm": 40.229644775390625, "learning_rate": 1.4025228215767638e-05, "loss": 0.5786, "step": 18011 }, { "epoch": 14.947717842323652, "grad_norm": 28.799707412719727, "learning_rate": 1.4024896265560166e-05, "loss": 0.876, "step": 18012 }, { "epoch": 14.948547717842324, "grad_norm": 83.9297866821289, "learning_rate": 1.4024564315352698e-05, "loss": 0.8974, "step": 18013 }, { "epoch": 14.949377593360996, "grad_norm": 54.024940490722656, "learning_rate": 1.4024232365145229e-05, "loss": 0.9225, "step": 18014 }, { "epoch": 14.950207468879668, "grad_norm": 31.3812313079834, "learning_rate": 1.4023900414937761e-05, "loss": 0.611, "step": 18015 }, { "epoch": 14.95103734439834, "grad_norm": 29.258516311645508, "learning_rate": 1.4023568464730291e-05, "loss": 0.8404, "step": 18016 }, { "epoch": 14.951867219917013, "grad_norm": 62.87629699707031, "learning_rate": 1.4023236514522822e-05, "loss": 1.8942, "step": 18017 }, { "epoch": 14.952697095435685, "grad_norm": 36.24202346801758, "learning_rate": 1.4022904564315354e-05, "loss": 0.522, "step": 18018 }, { "epoch": 14.953526970954357, "grad_norm": 37.78412628173828, "learning_rate": 1.4022572614107886e-05, "loss": 0.864, "step": 18019 }, { "epoch": 14.95435684647303, "grad_norm": 43.68141555786133, "learning_rate": 1.4022240663900416e-05, "loss": 1.565, "step": 18020 }, { "epoch": 14.955186721991701, "grad_norm": 89.08174133300781, "learning_rate": 1.4021908713692947e-05, "loss": 0.9079, "step": 18021 }, { "epoch": 14.956016597510374, "grad_norm": 50.53477096557617, "learning_rate": 1.4021576763485479e-05, "loss": 1.0557, "step": 18022 }, { "epoch": 14.956846473029046, "grad_norm": 40.47731399536133, "learning_rate": 1.402124481327801e-05, "loss": 0.5288, "step": 18023 }, { "epoch": 14.957676348547718, "grad_norm": 46.01179122924805, "learning_rate": 1.4020912863070541e-05, "loss": 0.8987, "step": 18024 }, { "epoch": 14.95850622406639, "grad_norm": 17.250627517700195, "learning_rate": 1.4020580912863072e-05, "loss": 0.2638, "step": 18025 }, { "epoch": 14.959336099585062, "grad_norm": 47.67586135864258, "learning_rate": 1.4020248962655602e-05, "loss": 1.2862, "step": 18026 }, { "epoch": 14.960165975103735, "grad_norm": 32.078983306884766, "learning_rate": 1.4019917012448134e-05, "loss": 0.909, "step": 18027 }, { "epoch": 14.960995850622407, "grad_norm": 40.693607330322266, "learning_rate": 1.4019585062240666e-05, "loss": 1.0442, "step": 18028 }, { "epoch": 14.961825726141079, "grad_norm": 47.2708854675293, "learning_rate": 1.4019253112033195e-05, "loss": 0.8023, "step": 18029 }, { "epoch": 14.962655601659751, "grad_norm": 50.120025634765625, "learning_rate": 1.4018921161825727e-05, "loss": 0.7979, "step": 18030 }, { "epoch": 14.963485477178423, "grad_norm": 21.861108779907227, "learning_rate": 1.401858921161826e-05, "loss": 0.4046, "step": 18031 }, { "epoch": 14.964315352697096, "grad_norm": 47.82630157470703, "learning_rate": 1.401825726141079e-05, "loss": 1.1761, "step": 18032 }, { "epoch": 14.965145228215768, "grad_norm": 40.20353317260742, "learning_rate": 1.401792531120332e-05, "loss": 1.2271, "step": 18033 }, { "epoch": 14.96597510373444, "grad_norm": 28.285764694213867, "learning_rate": 1.401759336099585e-05, "loss": 0.5506, "step": 18034 }, { "epoch": 14.966804979253112, "grad_norm": 30.48558235168457, "learning_rate": 1.4017261410788383e-05, "loss": 0.9561, "step": 18035 }, { "epoch": 14.967634854771784, "grad_norm": 51.97221755981445, "learning_rate": 1.4016929460580915e-05, "loss": 0.9958, "step": 18036 }, { "epoch": 14.968464730290457, "grad_norm": 83.84978485107422, "learning_rate": 1.4016597510373444e-05, "loss": 0.8699, "step": 18037 }, { "epoch": 14.969294605809129, "grad_norm": 27.33574867248535, "learning_rate": 1.4016265560165976e-05, "loss": 0.6129, "step": 18038 }, { "epoch": 14.970124481327801, "grad_norm": 31.0252685546875, "learning_rate": 1.4015933609958508e-05, "loss": 0.8431, "step": 18039 }, { "epoch": 14.970954356846473, "grad_norm": 23.118488311767578, "learning_rate": 1.401560165975104e-05, "loss": 0.9381, "step": 18040 }, { "epoch": 14.971784232365145, "grad_norm": 74.30473327636719, "learning_rate": 1.4015269709543569e-05, "loss": 1.1597, "step": 18041 }, { "epoch": 14.972614107883818, "grad_norm": 26.239543914794922, "learning_rate": 1.40149377593361e-05, "loss": 0.8311, "step": 18042 }, { "epoch": 14.97344398340249, "grad_norm": 46.233985900878906, "learning_rate": 1.4014605809128631e-05, "loss": 1.1178, "step": 18043 }, { "epoch": 14.974273858921162, "grad_norm": 46.676429748535156, "learning_rate": 1.4014273858921163e-05, "loss": 0.9711, "step": 18044 }, { "epoch": 14.975103734439834, "grad_norm": 75.42142486572266, "learning_rate": 1.4013941908713695e-05, "loss": 1.1078, "step": 18045 }, { "epoch": 14.975933609958506, "grad_norm": 28.60027503967285, "learning_rate": 1.4013609958506224e-05, "loss": 0.7128, "step": 18046 }, { "epoch": 14.976763485477179, "grad_norm": 36.079620361328125, "learning_rate": 1.4013278008298756e-05, "loss": 0.7092, "step": 18047 }, { "epoch": 14.97759336099585, "grad_norm": 37.66642761230469, "learning_rate": 1.4012946058091288e-05, "loss": 0.423, "step": 18048 }, { "epoch": 14.978423236514523, "grad_norm": 26.67182159423828, "learning_rate": 1.401261410788382e-05, "loss": 0.8989, "step": 18049 }, { "epoch": 14.979253112033195, "grad_norm": 17.792688369750977, "learning_rate": 1.4012282157676349e-05, "loss": 0.3746, "step": 18050 }, { "epoch": 14.980082987551867, "grad_norm": 23.073331832885742, "learning_rate": 1.4011950207468881e-05, "loss": 0.5614, "step": 18051 }, { "epoch": 14.98091286307054, "grad_norm": 25.94376564025879, "learning_rate": 1.4011618257261412e-05, "loss": 0.4804, "step": 18052 }, { "epoch": 14.981742738589212, "grad_norm": 21.0565128326416, "learning_rate": 1.4011286307053944e-05, "loss": 0.6272, "step": 18053 }, { "epoch": 14.982572614107884, "grad_norm": 40.620418548583984, "learning_rate": 1.4010954356846474e-05, "loss": 0.6398, "step": 18054 }, { "epoch": 14.983402489626556, "grad_norm": 47.613468170166016, "learning_rate": 1.4010622406639005e-05, "loss": 1.2099, "step": 18055 }, { "epoch": 14.984232365145228, "grad_norm": 18.924049377441406, "learning_rate": 1.4010290456431537e-05, "loss": 0.4145, "step": 18056 }, { "epoch": 14.9850622406639, "grad_norm": 64.83737182617188, "learning_rate": 1.4009958506224069e-05, "loss": 0.5585, "step": 18057 }, { "epoch": 14.985892116182573, "grad_norm": 9.846196174621582, "learning_rate": 1.4009626556016597e-05, "loss": 0.3262, "step": 18058 }, { "epoch": 14.986721991701245, "grad_norm": 28.64510154724121, "learning_rate": 1.400929460580913e-05, "loss": 0.7668, "step": 18059 }, { "epoch": 14.987551867219917, "grad_norm": 62.69477081298828, "learning_rate": 1.4008962655601662e-05, "loss": 0.9064, "step": 18060 }, { "epoch": 14.98838174273859, "grad_norm": 32.00288772583008, "learning_rate": 1.4008630705394192e-05, "loss": 0.5409, "step": 18061 }, { "epoch": 14.989211618257261, "grad_norm": 31.820253372192383, "learning_rate": 1.4008298755186723e-05, "loss": 1.2267, "step": 18062 }, { "epoch": 14.990041493775934, "grad_norm": 35.17877197265625, "learning_rate": 1.4007966804979255e-05, "loss": 1.2253, "step": 18063 }, { "epoch": 14.990871369294606, "grad_norm": 29.185993194580078, "learning_rate": 1.4007634854771785e-05, "loss": 0.8064, "step": 18064 }, { "epoch": 14.991701244813278, "grad_norm": 33.291629791259766, "learning_rate": 1.4007302904564317e-05, "loss": 0.5074, "step": 18065 }, { "epoch": 14.99253112033195, "grad_norm": 19.874065399169922, "learning_rate": 1.4006970954356846e-05, "loss": 0.4377, "step": 18066 }, { "epoch": 14.993360995850622, "grad_norm": 45.56024169921875, "learning_rate": 1.4006639004149378e-05, "loss": 1.1697, "step": 18067 }, { "epoch": 14.994190871369295, "grad_norm": 47.320980072021484, "learning_rate": 1.400630705394191e-05, "loss": 0.6967, "step": 18068 }, { "epoch": 14.995020746887967, "grad_norm": 60.594032287597656, "learning_rate": 1.4005975103734442e-05, "loss": 1.1303, "step": 18069 }, { "epoch": 14.995850622406639, "grad_norm": 53.320255279541016, "learning_rate": 1.4005643153526971e-05, "loss": 0.7356, "step": 18070 }, { "epoch": 14.996680497925311, "grad_norm": 22.780370712280273, "learning_rate": 1.4005311203319503e-05, "loss": 0.4535, "step": 18071 }, { "epoch": 14.997510373443983, "grad_norm": 26.024089813232422, "learning_rate": 1.4004979253112035e-05, "loss": 0.6427, "step": 18072 }, { "epoch": 14.998340248962656, "grad_norm": 28.47209930419922, "learning_rate": 1.4004647302904566e-05, "loss": 0.5504, "step": 18073 }, { "epoch": 14.999170124481328, "grad_norm": 104.408203125, "learning_rate": 1.4004315352697098e-05, "loss": 1.451, "step": 18074 }, { "epoch": 15.0, "grad_norm": 44.4991455078125, "learning_rate": 1.4003983402489626e-05, "loss": 0.5894, "step": 18075 }, { "epoch": 15.000829875518672, "grad_norm": 36.667232513427734, "learning_rate": 1.4003651452282158e-05, "loss": 0.5069, "step": 18076 }, { "epoch": 15.001659751037344, "grad_norm": 17.198387145996094, "learning_rate": 1.400331950207469e-05, "loss": 0.3553, "step": 18077 }, { "epoch": 15.002489626556017, "grad_norm": 62.94799041748047, "learning_rate": 1.4002987551867223e-05, "loss": 0.9108, "step": 18078 }, { "epoch": 15.003319502074689, "grad_norm": 27.790491104125977, "learning_rate": 1.4002655601659751e-05, "loss": 0.3816, "step": 18079 }, { "epoch": 15.004149377593361, "grad_norm": 59.936885833740234, "learning_rate": 1.4002323651452284e-05, "loss": 0.8843, "step": 18080 }, { "epoch": 15.004979253112033, "grad_norm": 42.32512664794922, "learning_rate": 1.4001991701244816e-05, "loss": 1.1951, "step": 18081 }, { "epoch": 15.005809128630705, "grad_norm": 33.19117736816406, "learning_rate": 1.4001659751037346e-05, "loss": 0.8756, "step": 18082 }, { "epoch": 15.006639004149378, "grad_norm": 40.761756896972656, "learning_rate": 1.4001327800829876e-05, "loss": 0.6654, "step": 18083 }, { "epoch": 15.00746887966805, "grad_norm": 23.119600296020508, "learning_rate": 1.4000995850622407e-05, "loss": 0.5148, "step": 18084 }, { "epoch": 15.008298755186722, "grad_norm": 23.27796745300293, "learning_rate": 1.4000663900414939e-05, "loss": 1.0783, "step": 18085 }, { "epoch": 15.009128630705394, "grad_norm": 43.30402755737305, "learning_rate": 1.4000331950207471e-05, "loss": 0.7721, "step": 18086 }, { "epoch": 15.009958506224066, "grad_norm": 19.188823699951172, "learning_rate": 1.4e-05, "loss": 0.3284, "step": 18087 }, { "epoch": 15.010788381742739, "grad_norm": 18.8387508392334, "learning_rate": 1.3999668049792532e-05, "loss": 0.3634, "step": 18088 }, { "epoch": 15.01161825726141, "grad_norm": 45.41461944580078, "learning_rate": 1.3999336099585064e-05, "loss": 0.7081, "step": 18089 }, { "epoch": 15.012448132780083, "grad_norm": 91.33330535888672, "learning_rate": 1.3999004149377594e-05, "loss": 1.2735, "step": 18090 }, { "epoch": 15.013278008298755, "grad_norm": 59.90639114379883, "learning_rate": 1.3998672199170125e-05, "loss": 1.2427, "step": 18091 }, { "epoch": 15.014107883817427, "grad_norm": 42.296478271484375, "learning_rate": 1.3998340248962657e-05, "loss": 0.5808, "step": 18092 }, { "epoch": 15.0149377593361, "grad_norm": 30.81914520263672, "learning_rate": 1.3998008298755187e-05, "loss": 0.5716, "step": 18093 }, { "epoch": 15.015767634854772, "grad_norm": 41.561058044433594, "learning_rate": 1.399767634854772e-05, "loss": 0.4898, "step": 18094 }, { "epoch": 15.016597510373444, "grad_norm": 45.277889251708984, "learning_rate": 1.399734439834025e-05, "loss": 0.649, "step": 18095 }, { "epoch": 15.017427385892116, "grad_norm": 96.33584594726562, "learning_rate": 1.399701244813278e-05, "loss": 0.9862, "step": 18096 }, { "epoch": 15.018257261410788, "grad_norm": 116.29176330566406, "learning_rate": 1.3996680497925312e-05, "loss": 0.6687, "step": 18097 }, { "epoch": 15.01908713692946, "grad_norm": 26.883220672607422, "learning_rate": 1.3996348547717845e-05, "loss": 0.448, "step": 18098 }, { "epoch": 15.019917012448133, "grad_norm": 43.074623107910156, "learning_rate": 1.3996016597510375e-05, "loss": 1.1471, "step": 18099 }, { "epoch": 15.020746887966805, "grad_norm": 52.8401985168457, "learning_rate": 1.3995684647302905e-05, "loss": 0.5233, "step": 18100 }, { "epoch": 15.021576763485477, "grad_norm": 40.657230377197266, "learning_rate": 1.3995352697095437e-05, "loss": 0.6318, "step": 18101 }, { "epoch": 15.02240663900415, "grad_norm": 26.565624237060547, "learning_rate": 1.3995020746887968e-05, "loss": 0.7468, "step": 18102 }, { "epoch": 15.023236514522821, "grad_norm": 19.659557342529297, "learning_rate": 1.39946887966805e-05, "loss": 0.5878, "step": 18103 }, { "epoch": 15.024066390041494, "grad_norm": 57.155242919921875, "learning_rate": 1.3994356846473029e-05, "loss": 0.776, "step": 18104 }, { "epoch": 15.024896265560166, "grad_norm": 39.428707122802734, "learning_rate": 1.399402489626556e-05, "loss": 0.7618, "step": 18105 }, { "epoch": 15.025726141078838, "grad_norm": 26.27628517150879, "learning_rate": 1.3993692946058093e-05, "loss": 0.6039, "step": 18106 }, { "epoch": 15.02655601659751, "grad_norm": 27.88252830505371, "learning_rate": 1.3993360995850625e-05, "loss": 0.5704, "step": 18107 }, { "epoch": 15.027385892116182, "grad_norm": 86.93607330322266, "learning_rate": 1.3993029045643154e-05, "loss": 0.7424, "step": 18108 }, { "epoch": 15.028215767634855, "grad_norm": 28.745136260986328, "learning_rate": 1.3992697095435686e-05, "loss": 0.7013, "step": 18109 }, { "epoch": 15.029045643153527, "grad_norm": 119.35618591308594, "learning_rate": 1.3992365145228218e-05, "loss": 1.0405, "step": 18110 }, { "epoch": 15.029875518672199, "grad_norm": 38.03929138183594, "learning_rate": 1.3992033195020748e-05, "loss": 0.4251, "step": 18111 }, { "epoch": 15.030705394190871, "grad_norm": 22.59294891357422, "learning_rate": 1.3991701244813279e-05, "loss": 0.4901, "step": 18112 }, { "epoch": 15.031535269709543, "grad_norm": 26.976242065429688, "learning_rate": 1.399136929460581e-05, "loss": 0.5, "step": 18113 }, { "epoch": 15.032365145228216, "grad_norm": 41.10540771484375, "learning_rate": 1.3991037344398341e-05, "loss": 0.5464, "step": 18114 }, { "epoch": 15.033195020746888, "grad_norm": 28.735977172851562, "learning_rate": 1.3990705394190873e-05, "loss": 0.8468, "step": 18115 }, { "epoch": 15.03402489626556, "grad_norm": 30.164108276367188, "learning_rate": 1.3990373443983402e-05, "loss": 0.5841, "step": 18116 }, { "epoch": 15.034854771784232, "grad_norm": 52.94415283203125, "learning_rate": 1.3990041493775934e-05, "loss": 1.0795, "step": 18117 }, { "epoch": 15.035684647302904, "grad_norm": 25.067520141601562, "learning_rate": 1.3989709543568466e-05, "loss": 0.4829, "step": 18118 }, { "epoch": 15.036514522821577, "grad_norm": 46.36637496948242, "learning_rate": 1.3989377593360998e-05, "loss": 0.9952, "step": 18119 }, { "epoch": 15.037344398340249, "grad_norm": 48.56099319458008, "learning_rate": 1.3989045643153527e-05, "loss": 0.8067, "step": 18120 }, { "epoch": 15.038174273858921, "grad_norm": 31.661649703979492, "learning_rate": 1.398871369294606e-05, "loss": 0.7058, "step": 18121 }, { "epoch": 15.039004149377593, "grad_norm": 53.81690216064453, "learning_rate": 1.398838174273859e-05, "loss": 1.2887, "step": 18122 }, { "epoch": 15.039834024896265, "grad_norm": 57.935420989990234, "learning_rate": 1.3988049792531122e-05, "loss": 0.9041, "step": 18123 }, { "epoch": 15.040663900414938, "grad_norm": 45.09136962890625, "learning_rate": 1.3987717842323654e-05, "loss": 0.5207, "step": 18124 }, { "epoch": 15.04149377593361, "grad_norm": 57.61117935180664, "learning_rate": 1.3987385892116183e-05, "loss": 0.5558, "step": 18125 }, { "epoch": 15.042323651452282, "grad_norm": 42.94247055053711, "learning_rate": 1.3987053941908715e-05, "loss": 1.0294, "step": 18126 }, { "epoch": 15.043153526970954, "grad_norm": 77.92450714111328, "learning_rate": 1.3986721991701247e-05, "loss": 0.9428, "step": 18127 }, { "epoch": 15.043983402489626, "grad_norm": 25.554689407348633, "learning_rate": 1.3986390041493779e-05, "loss": 0.8173, "step": 18128 }, { "epoch": 15.044813278008299, "grad_norm": 38.414791107177734, "learning_rate": 1.3986058091286308e-05, "loss": 0.7569, "step": 18129 }, { "epoch": 15.04564315352697, "grad_norm": 22.731056213378906, "learning_rate": 1.398572614107884e-05, "loss": 0.4259, "step": 18130 }, { "epoch": 15.046473029045643, "grad_norm": 26.19361686706543, "learning_rate": 1.398539419087137e-05, "loss": 0.4745, "step": 18131 }, { "epoch": 15.047302904564315, "grad_norm": 38.91714859008789, "learning_rate": 1.3985062240663902e-05, "loss": 1.1137, "step": 18132 }, { "epoch": 15.048132780082987, "grad_norm": 38.16240310668945, "learning_rate": 1.3984730290456433e-05, "loss": 1.169, "step": 18133 }, { "epoch": 15.04896265560166, "grad_norm": 46.015464782714844, "learning_rate": 1.3984398340248963e-05, "loss": 0.9862, "step": 18134 }, { "epoch": 15.049792531120332, "grad_norm": 90.36517333984375, "learning_rate": 1.3984066390041495e-05, "loss": 1.0287, "step": 18135 }, { "epoch": 15.050622406639004, "grad_norm": 14.826319694519043, "learning_rate": 1.3983734439834027e-05, "loss": 0.2655, "step": 18136 }, { "epoch": 15.051452282157676, "grad_norm": 43.037452697753906, "learning_rate": 1.3983402489626556e-05, "loss": 0.6706, "step": 18137 }, { "epoch": 15.052282157676348, "grad_norm": 53.457786560058594, "learning_rate": 1.3983070539419088e-05, "loss": 0.649, "step": 18138 }, { "epoch": 15.05311203319502, "grad_norm": 46.80116653442383, "learning_rate": 1.398273858921162e-05, "loss": 0.4509, "step": 18139 }, { "epoch": 15.053941908713693, "grad_norm": 42.846744537353516, "learning_rate": 1.398240663900415e-05, "loss": 0.5742, "step": 18140 }, { "epoch": 15.054771784232365, "grad_norm": 68.56616973876953, "learning_rate": 1.3982074688796681e-05, "loss": 1.1111, "step": 18141 }, { "epoch": 15.055601659751037, "grad_norm": 31.49810791015625, "learning_rate": 1.3981742738589213e-05, "loss": 0.6899, "step": 18142 }, { "epoch": 15.05643153526971, "grad_norm": 21.28243637084961, "learning_rate": 1.3981410788381744e-05, "loss": 0.5224, "step": 18143 }, { "epoch": 15.057261410788382, "grad_norm": 20.020864486694336, "learning_rate": 1.3981078838174276e-05, "loss": 0.4794, "step": 18144 }, { "epoch": 15.058091286307054, "grad_norm": 37.92180252075195, "learning_rate": 1.3980746887966804e-05, "loss": 0.4441, "step": 18145 }, { "epoch": 15.058921161825726, "grad_norm": 41.705387115478516, "learning_rate": 1.3980414937759337e-05, "loss": 0.787, "step": 18146 }, { "epoch": 15.059751037344398, "grad_norm": 26.572166442871094, "learning_rate": 1.3980082987551869e-05, "loss": 0.789, "step": 18147 }, { "epoch": 15.06058091286307, "grad_norm": 38.22999572753906, "learning_rate": 1.39797510373444e-05, "loss": 0.4065, "step": 18148 }, { "epoch": 15.061410788381743, "grad_norm": 59.7874755859375, "learning_rate": 1.397941908713693e-05, "loss": 1.4288, "step": 18149 }, { "epoch": 15.062240663900415, "grad_norm": 43.728858947753906, "learning_rate": 1.3979087136929462e-05, "loss": 0.5242, "step": 18150 }, { "epoch": 15.063070539419087, "grad_norm": 37.5938606262207, "learning_rate": 1.3978755186721992e-05, "loss": 0.6687, "step": 18151 }, { "epoch": 15.063900414937759, "grad_norm": 25.658946990966797, "learning_rate": 1.3978423236514524e-05, "loss": 0.4754, "step": 18152 }, { "epoch": 15.064730290456431, "grad_norm": 47.24879455566406, "learning_rate": 1.3978091286307056e-05, "loss": 0.6614, "step": 18153 }, { "epoch": 15.065560165975104, "grad_norm": 21.733903884887695, "learning_rate": 1.3977759336099585e-05, "loss": 0.3169, "step": 18154 }, { "epoch": 15.066390041493776, "grad_norm": 69.2774887084961, "learning_rate": 1.3977427385892117e-05, "loss": 0.8761, "step": 18155 }, { "epoch": 15.067219917012448, "grad_norm": 71.21296691894531, "learning_rate": 1.3977095435684649e-05, "loss": 0.5897, "step": 18156 }, { "epoch": 15.06804979253112, "grad_norm": 93.05538177490234, "learning_rate": 1.3976763485477181e-05, "loss": 0.6906, "step": 18157 }, { "epoch": 15.068879668049792, "grad_norm": 45.551273345947266, "learning_rate": 1.397643153526971e-05, "loss": 0.5565, "step": 18158 }, { "epoch": 15.069709543568464, "grad_norm": 28.333189010620117, "learning_rate": 1.3976099585062242e-05, "loss": 1.0163, "step": 18159 }, { "epoch": 15.070539419087137, "grad_norm": 55.84169006347656, "learning_rate": 1.3975767634854772e-05, "loss": 0.8529, "step": 18160 }, { "epoch": 15.071369294605809, "grad_norm": 38.96179962158203, "learning_rate": 1.3975435684647305e-05, "loss": 0.4211, "step": 18161 }, { "epoch": 15.072199170124481, "grad_norm": 38.51055145263672, "learning_rate": 1.3975103734439835e-05, "loss": 1.3558, "step": 18162 }, { "epoch": 15.073029045643153, "grad_norm": 32.31841278076172, "learning_rate": 1.3974771784232365e-05, "loss": 0.5954, "step": 18163 }, { "epoch": 15.073858921161825, "grad_norm": 33.02811813354492, "learning_rate": 1.3974439834024898e-05, "loss": 0.528, "step": 18164 }, { "epoch": 15.074688796680498, "grad_norm": 20.9721736907959, "learning_rate": 1.397410788381743e-05, "loss": 0.5749, "step": 18165 }, { "epoch": 15.07551867219917, "grad_norm": 55.48347091674805, "learning_rate": 1.3973775933609958e-05, "loss": 1.5672, "step": 18166 }, { "epoch": 15.076348547717842, "grad_norm": 75.9891357421875, "learning_rate": 1.397344398340249e-05, "loss": 0.3093, "step": 18167 }, { "epoch": 15.077178423236514, "grad_norm": 42.47557067871094, "learning_rate": 1.3973112033195023e-05, "loss": 0.8261, "step": 18168 }, { "epoch": 15.078008298755186, "grad_norm": 20.154855728149414, "learning_rate": 1.3972780082987553e-05, "loss": 0.3737, "step": 18169 }, { "epoch": 15.078838174273859, "grad_norm": 19.26059341430664, "learning_rate": 1.3972448132780083e-05, "loss": 0.5948, "step": 18170 }, { "epoch": 15.07966804979253, "grad_norm": 57.10189437866211, "learning_rate": 1.3972116182572616e-05, "loss": 0.9494, "step": 18171 }, { "epoch": 15.080497925311203, "grad_norm": 24.386009216308594, "learning_rate": 1.3971784232365146e-05, "loss": 0.5626, "step": 18172 }, { "epoch": 15.081327800829875, "grad_norm": 24.758987426757812, "learning_rate": 1.3971452282157678e-05, "loss": 0.6524, "step": 18173 }, { "epoch": 15.082157676348547, "grad_norm": 34.97978591918945, "learning_rate": 1.3971120331950207e-05, "loss": 0.5476, "step": 18174 }, { "epoch": 15.08298755186722, "grad_norm": 43.71034622192383, "learning_rate": 1.3970788381742739e-05, "loss": 0.8229, "step": 18175 }, { "epoch": 15.083817427385892, "grad_norm": 22.392475128173828, "learning_rate": 1.3970456431535271e-05, "loss": 0.4218, "step": 18176 }, { "epoch": 15.084647302904564, "grad_norm": 54.16012191772461, "learning_rate": 1.3970124481327803e-05, "loss": 0.5158, "step": 18177 }, { "epoch": 15.085477178423236, "grad_norm": 24.546083450317383, "learning_rate": 1.3969792531120333e-05, "loss": 0.6218, "step": 18178 }, { "epoch": 15.086307053941908, "grad_norm": 36.3441276550293, "learning_rate": 1.3969460580912864e-05, "loss": 0.689, "step": 18179 }, { "epoch": 15.08713692946058, "grad_norm": 41.528072357177734, "learning_rate": 1.3969128630705396e-05, "loss": 0.628, "step": 18180 }, { "epoch": 15.087966804979253, "grad_norm": 21.07320213317871, "learning_rate": 1.3968796680497926e-05, "loss": 0.3516, "step": 18181 }, { "epoch": 15.088796680497925, "grad_norm": 28.298917770385742, "learning_rate": 1.3968464730290459e-05, "loss": 0.5396, "step": 18182 }, { "epoch": 15.089626556016597, "grad_norm": 53.57394790649414, "learning_rate": 1.3968132780082987e-05, "loss": 0.334, "step": 18183 }, { "epoch": 15.09045643153527, "grad_norm": 21.6387882232666, "learning_rate": 1.396780082987552e-05, "loss": 0.7081, "step": 18184 }, { "epoch": 15.091286307053942, "grad_norm": 25.162261962890625, "learning_rate": 1.3967468879668051e-05, "loss": 0.5369, "step": 18185 }, { "epoch": 15.092116182572614, "grad_norm": 38.537471771240234, "learning_rate": 1.3967136929460584e-05, "loss": 0.8199, "step": 18186 }, { "epoch": 15.092946058091286, "grad_norm": 28.19313621520996, "learning_rate": 1.3966804979253112e-05, "loss": 0.5437, "step": 18187 }, { "epoch": 15.093775933609958, "grad_norm": 36.48519515991211, "learning_rate": 1.3966473029045644e-05, "loss": 0.53, "step": 18188 }, { "epoch": 15.09460580912863, "grad_norm": 70.83200073242188, "learning_rate": 1.3966141078838177e-05, "loss": 1.1358, "step": 18189 }, { "epoch": 15.095435684647303, "grad_norm": 27.10422706604004, "learning_rate": 1.3965809128630707e-05, "loss": 0.3745, "step": 18190 }, { "epoch": 15.096265560165975, "grad_norm": 64.30844116210938, "learning_rate": 1.3965477178423237e-05, "loss": 0.6875, "step": 18191 }, { "epoch": 15.097095435684647, "grad_norm": 75.33487701416016, "learning_rate": 1.3965145228215768e-05, "loss": 0.6274, "step": 18192 }, { "epoch": 15.09792531120332, "grad_norm": 28.219852447509766, "learning_rate": 1.39648132780083e-05, "loss": 0.5141, "step": 18193 }, { "epoch": 15.098755186721991, "grad_norm": 28.726232528686523, "learning_rate": 1.3964481327800832e-05, "loss": 0.6279, "step": 18194 }, { "epoch": 15.099585062240664, "grad_norm": 34.16147994995117, "learning_rate": 1.396414937759336e-05, "loss": 0.6852, "step": 18195 }, { "epoch": 15.100414937759336, "grad_norm": 130.29458618164062, "learning_rate": 1.3963817427385893e-05, "loss": 0.8451, "step": 18196 }, { "epoch": 15.101244813278008, "grad_norm": 44.783931732177734, "learning_rate": 1.3963485477178425e-05, "loss": 0.5349, "step": 18197 }, { "epoch": 15.10207468879668, "grad_norm": 45.44843292236328, "learning_rate": 1.3963153526970957e-05, "loss": 0.6628, "step": 18198 }, { "epoch": 15.102904564315352, "grad_norm": 35.22526931762695, "learning_rate": 1.3962821576763486e-05, "loss": 0.7395, "step": 18199 }, { "epoch": 15.103734439834025, "grad_norm": 35.472190856933594, "learning_rate": 1.3962489626556018e-05, "loss": 0.9892, "step": 18200 }, { "epoch": 15.104564315352697, "grad_norm": 68.7078857421875, "learning_rate": 1.3962157676348548e-05, "loss": 0.6086, "step": 18201 }, { "epoch": 15.105394190871369, "grad_norm": 34.254295349121094, "learning_rate": 1.396182572614108e-05, "loss": 0.4101, "step": 18202 }, { "epoch": 15.106224066390041, "grad_norm": 30.991865158081055, "learning_rate": 1.396149377593361e-05, "loss": 0.9142, "step": 18203 }, { "epoch": 15.107053941908713, "grad_norm": 47.847476959228516, "learning_rate": 1.3961161825726141e-05, "loss": 0.5546, "step": 18204 }, { "epoch": 15.107883817427386, "grad_norm": 32.4304084777832, "learning_rate": 1.3960829875518673e-05, "loss": 0.503, "step": 18205 }, { "epoch": 15.108713692946058, "grad_norm": 61.73725509643555, "learning_rate": 1.3960497925311205e-05, "loss": 0.9362, "step": 18206 }, { "epoch": 15.10954356846473, "grad_norm": 29.008298873901367, "learning_rate": 1.3960165975103736e-05, "loss": 0.5709, "step": 18207 }, { "epoch": 15.110373443983402, "grad_norm": 35.497650146484375, "learning_rate": 1.3959834024896266e-05, "loss": 0.4002, "step": 18208 }, { "epoch": 15.111203319502074, "grad_norm": 29.238235473632812, "learning_rate": 1.3959502074688798e-05, "loss": 0.7018, "step": 18209 }, { "epoch": 15.112033195020746, "grad_norm": 31.023900985717773, "learning_rate": 1.3959170124481329e-05, "loss": 0.8788, "step": 18210 }, { "epoch": 15.112863070539419, "grad_norm": 46.164588928222656, "learning_rate": 1.3958838174273861e-05, "loss": 0.5341, "step": 18211 }, { "epoch": 15.11369294605809, "grad_norm": 33.58842849731445, "learning_rate": 1.3958506224066391e-05, "loss": 0.3002, "step": 18212 }, { "epoch": 15.114522821576763, "grad_norm": 55.855445861816406, "learning_rate": 1.3958174273858922e-05, "loss": 0.6522, "step": 18213 }, { "epoch": 15.115352697095435, "grad_norm": 42.748573303222656, "learning_rate": 1.3957842323651454e-05, "loss": 0.8709, "step": 18214 }, { "epoch": 15.116182572614107, "grad_norm": 58.889892578125, "learning_rate": 1.3957510373443986e-05, "loss": 0.7824, "step": 18215 }, { "epoch": 15.11701244813278, "grad_norm": 54.32147979736328, "learning_rate": 1.3957178423236515e-05, "loss": 0.6885, "step": 18216 }, { "epoch": 15.117842323651452, "grad_norm": 34.52783966064453, "learning_rate": 1.3956846473029047e-05, "loss": 0.62, "step": 18217 }, { "epoch": 15.118672199170124, "grad_norm": 94.75990295410156, "learning_rate": 1.3956514522821579e-05, "loss": 1.1503, "step": 18218 }, { "epoch": 15.119502074688796, "grad_norm": 71.4673843383789, "learning_rate": 1.395618257261411e-05, "loss": 0.5986, "step": 18219 }, { "epoch": 15.120331950207468, "grad_norm": 92.29083251953125, "learning_rate": 1.395585062240664e-05, "loss": 0.7674, "step": 18220 }, { "epoch": 15.12116182572614, "grad_norm": 46.168617248535156, "learning_rate": 1.395551867219917e-05, "loss": 0.6003, "step": 18221 }, { "epoch": 15.121991701244813, "grad_norm": 37.95470428466797, "learning_rate": 1.3955186721991702e-05, "loss": 0.6895, "step": 18222 }, { "epoch": 15.122821576763485, "grad_norm": 63.12152099609375, "learning_rate": 1.3954854771784234e-05, "loss": 0.8955, "step": 18223 }, { "epoch": 15.123651452282157, "grad_norm": 26.25731086730957, "learning_rate": 1.3954522821576763e-05, "loss": 0.7067, "step": 18224 }, { "epoch": 15.12448132780083, "grad_norm": 32.66158676147461, "learning_rate": 1.3954190871369295e-05, "loss": 0.5542, "step": 18225 }, { "epoch": 15.125311203319502, "grad_norm": 32.91440200805664, "learning_rate": 1.3953858921161827e-05, "loss": 0.8902, "step": 18226 }, { "epoch": 15.126141078838174, "grad_norm": 60.25278091430664, "learning_rate": 1.395352697095436e-05, "loss": 1.0573, "step": 18227 }, { "epoch": 15.126970954356846, "grad_norm": 80.46627807617188, "learning_rate": 1.3953195020746888e-05, "loss": 0.9066, "step": 18228 }, { "epoch": 15.127800829875518, "grad_norm": 44.40149688720703, "learning_rate": 1.395286307053942e-05, "loss": 1.2441, "step": 18229 }, { "epoch": 15.12863070539419, "grad_norm": 35.23640441894531, "learning_rate": 1.395253112033195e-05, "loss": 0.644, "step": 18230 }, { "epoch": 15.129460580912863, "grad_norm": 27.799755096435547, "learning_rate": 1.3952199170124483e-05, "loss": 0.7316, "step": 18231 }, { "epoch": 15.130290456431535, "grad_norm": 36.7829475402832, "learning_rate": 1.3951867219917015e-05, "loss": 0.7507, "step": 18232 }, { "epoch": 15.131120331950207, "grad_norm": 43.601131439208984, "learning_rate": 1.3951535269709544e-05, "loss": 0.6277, "step": 18233 }, { "epoch": 15.13195020746888, "grad_norm": 23.83646583557129, "learning_rate": 1.3951203319502076e-05, "loss": 0.4519, "step": 18234 }, { "epoch": 15.132780082987551, "grad_norm": 64.61066436767578, "learning_rate": 1.3950871369294608e-05, "loss": 0.9605, "step": 18235 }, { "epoch": 15.133609958506224, "grad_norm": 39.6420783996582, "learning_rate": 1.395053941908714e-05, "loss": 0.4775, "step": 18236 }, { "epoch": 15.134439834024896, "grad_norm": 27.395374298095703, "learning_rate": 1.3950207468879669e-05, "loss": 0.6026, "step": 18237 }, { "epoch": 15.135269709543568, "grad_norm": 40.384185791015625, "learning_rate": 1.39498755186722e-05, "loss": 0.4961, "step": 18238 }, { "epoch": 15.13609958506224, "grad_norm": 25.022197723388672, "learning_rate": 1.3949543568464731e-05, "loss": 0.5724, "step": 18239 }, { "epoch": 15.136929460580912, "grad_norm": 32.7849006652832, "learning_rate": 1.3949211618257263e-05, "loss": 0.6059, "step": 18240 }, { "epoch": 15.137759336099585, "grad_norm": 51.39459991455078, "learning_rate": 1.3948879668049794e-05, "loss": 0.4405, "step": 18241 }, { "epoch": 15.138589211618257, "grad_norm": 21.388334274291992, "learning_rate": 1.3948547717842324e-05, "loss": 0.6976, "step": 18242 }, { "epoch": 15.139419087136929, "grad_norm": 30.554710388183594, "learning_rate": 1.3948215767634856e-05, "loss": 0.7401, "step": 18243 }, { "epoch": 15.140248962655601, "grad_norm": 27.78432273864746, "learning_rate": 1.3947883817427388e-05, "loss": 1.0533, "step": 18244 }, { "epoch": 15.141078838174273, "grad_norm": 28.934682846069336, "learning_rate": 1.3947551867219917e-05, "loss": 0.5415, "step": 18245 }, { "epoch": 15.141908713692946, "grad_norm": 46.39167022705078, "learning_rate": 1.3947219917012449e-05, "loss": 0.8576, "step": 18246 }, { "epoch": 15.142738589211618, "grad_norm": 47.239200592041016, "learning_rate": 1.3946887966804981e-05, "loss": 0.4267, "step": 18247 }, { "epoch": 15.14356846473029, "grad_norm": 19.245651245117188, "learning_rate": 1.3946556016597512e-05, "loss": 0.3378, "step": 18248 }, { "epoch": 15.144398340248962, "grad_norm": 16.517627716064453, "learning_rate": 1.3946224066390042e-05, "loss": 0.3292, "step": 18249 }, { "epoch": 15.145228215767634, "grad_norm": 22.61627197265625, "learning_rate": 1.3945892116182574e-05, "loss": 0.5612, "step": 18250 }, { "epoch": 15.146058091286307, "grad_norm": 22.207120895385742, "learning_rate": 1.3945560165975105e-05, "loss": 0.4771, "step": 18251 }, { "epoch": 15.146887966804979, "grad_norm": 27.67198371887207, "learning_rate": 1.3945228215767637e-05, "loss": 0.5232, "step": 18252 }, { "epoch": 15.147717842323651, "grad_norm": 36.08893585205078, "learning_rate": 1.3944896265560165e-05, "loss": 0.8229, "step": 18253 }, { "epoch": 15.148547717842323, "grad_norm": 49.110477447509766, "learning_rate": 1.3944564315352697e-05, "loss": 0.6299, "step": 18254 }, { "epoch": 15.149377593360995, "grad_norm": 17.955312728881836, "learning_rate": 1.394423236514523e-05, "loss": 0.4576, "step": 18255 }, { "epoch": 15.150207468879668, "grad_norm": 25.111202239990234, "learning_rate": 1.3943900414937762e-05, "loss": 0.4839, "step": 18256 }, { "epoch": 15.15103734439834, "grad_norm": 34.81715393066406, "learning_rate": 1.3943568464730292e-05, "loss": 0.5604, "step": 18257 }, { "epoch": 15.151867219917012, "grad_norm": 56.000762939453125, "learning_rate": 1.3943236514522822e-05, "loss": 0.8598, "step": 18258 }, { "epoch": 15.152697095435684, "grad_norm": 21.81765365600586, "learning_rate": 1.3942904564315355e-05, "loss": 0.4804, "step": 18259 }, { "epoch": 15.153526970954356, "grad_norm": 54.752254486083984, "learning_rate": 1.3942572614107885e-05, "loss": 0.4302, "step": 18260 }, { "epoch": 15.154356846473028, "grad_norm": 27.894479751586914, "learning_rate": 1.3942240663900417e-05, "loss": 0.5233, "step": 18261 }, { "epoch": 15.1551867219917, "grad_norm": 41.26435470581055, "learning_rate": 1.3941908713692946e-05, "loss": 0.8042, "step": 18262 }, { "epoch": 15.156016597510373, "grad_norm": 37.7504997253418, "learning_rate": 1.3941576763485478e-05, "loss": 0.5676, "step": 18263 }, { "epoch": 15.156846473029045, "grad_norm": 39.80844497680664, "learning_rate": 1.394124481327801e-05, "loss": 1.0748, "step": 18264 }, { "epoch": 15.157676348547717, "grad_norm": 78.26506805419922, "learning_rate": 1.3940912863070542e-05, "loss": 1.8386, "step": 18265 }, { "epoch": 15.15850622406639, "grad_norm": 46.21063995361328, "learning_rate": 1.3940580912863071e-05, "loss": 0.7901, "step": 18266 }, { "epoch": 15.159336099585062, "grad_norm": 24.48743438720703, "learning_rate": 1.3940248962655603e-05, "loss": 0.489, "step": 18267 }, { "epoch": 15.160165975103734, "grad_norm": 16.71525001525879, "learning_rate": 1.3939917012448133e-05, "loss": 0.2527, "step": 18268 }, { "epoch": 15.160995850622406, "grad_norm": 29.02290153503418, "learning_rate": 1.3939585062240665e-05, "loss": 0.927, "step": 18269 }, { "epoch": 15.161825726141078, "grad_norm": 33.45742416381836, "learning_rate": 1.3939253112033196e-05, "loss": 0.7675, "step": 18270 }, { "epoch": 15.16265560165975, "grad_norm": 52.93946075439453, "learning_rate": 1.3938921161825726e-05, "loss": 0.8463, "step": 18271 }, { "epoch": 15.163485477178423, "grad_norm": 25.945178985595703, "learning_rate": 1.3938589211618258e-05, "loss": 1.5149, "step": 18272 }, { "epoch": 15.164315352697095, "grad_norm": 26.301450729370117, "learning_rate": 1.393825726141079e-05, "loss": 0.5405, "step": 18273 }, { "epoch": 15.165145228215767, "grad_norm": 31.671499252319336, "learning_rate": 1.393792531120332e-05, "loss": 0.7153, "step": 18274 }, { "epoch": 15.16597510373444, "grad_norm": 49.818389892578125, "learning_rate": 1.3937593360995851e-05, "loss": 0.8324, "step": 18275 }, { "epoch": 15.166804979253111, "grad_norm": 61.97704315185547, "learning_rate": 1.3937261410788383e-05, "loss": 1.1462, "step": 18276 }, { "epoch": 15.167634854771784, "grad_norm": 56.583740234375, "learning_rate": 1.3936929460580914e-05, "loss": 1.3268, "step": 18277 }, { "epoch": 15.168464730290456, "grad_norm": 28.908184051513672, "learning_rate": 1.3936597510373444e-05, "loss": 0.2544, "step": 18278 }, { "epoch": 15.169294605809128, "grad_norm": 52.538970947265625, "learning_rate": 1.3936265560165976e-05, "loss": 0.8891, "step": 18279 }, { "epoch": 15.1701244813278, "grad_norm": 42.37086868286133, "learning_rate": 1.3935933609958507e-05, "loss": 1.4189, "step": 18280 }, { "epoch": 15.170954356846472, "grad_norm": 26.85041618347168, "learning_rate": 1.3935601659751039e-05, "loss": 0.3846, "step": 18281 }, { "epoch": 15.171784232365145, "grad_norm": 50.581424713134766, "learning_rate": 1.393526970954357e-05, "loss": 0.9693, "step": 18282 }, { "epoch": 15.172614107883817, "grad_norm": 49.10380554199219, "learning_rate": 1.39349377593361e-05, "loss": 0.9927, "step": 18283 }, { "epoch": 15.173443983402489, "grad_norm": 27.10883903503418, "learning_rate": 1.3934605809128632e-05, "loss": 0.6048, "step": 18284 }, { "epoch": 15.174273858921161, "grad_norm": 12.117507934570312, "learning_rate": 1.3934273858921164e-05, "loss": 0.2803, "step": 18285 }, { "epoch": 15.175103734439833, "grad_norm": 46.784210205078125, "learning_rate": 1.3933941908713694e-05, "loss": 1.1915, "step": 18286 }, { "epoch": 15.175933609958506, "grad_norm": 45.95527267456055, "learning_rate": 1.3933609958506225e-05, "loss": 1.0172, "step": 18287 }, { "epoch": 15.176763485477178, "grad_norm": 43.99650955200195, "learning_rate": 1.3933278008298757e-05, "loss": 1.0768, "step": 18288 }, { "epoch": 15.17759336099585, "grad_norm": 29.20037841796875, "learning_rate": 1.3932946058091287e-05, "loss": 0.6319, "step": 18289 }, { "epoch": 15.178423236514522, "grad_norm": 34.255802154541016, "learning_rate": 1.393261410788382e-05, "loss": 0.6652, "step": 18290 }, { "epoch": 15.179253112033194, "grad_norm": 31.46104621887207, "learning_rate": 1.3932282157676348e-05, "loss": 0.6077, "step": 18291 }, { "epoch": 15.180082987551867, "grad_norm": 22.177581787109375, "learning_rate": 1.393195020746888e-05, "loss": 0.9309, "step": 18292 }, { "epoch": 15.180912863070539, "grad_norm": 42.066280364990234, "learning_rate": 1.3931618257261412e-05, "loss": 0.6748, "step": 18293 }, { "epoch": 15.181742738589211, "grad_norm": 57.1785774230957, "learning_rate": 1.3931286307053944e-05, "loss": 1.0095, "step": 18294 }, { "epoch": 15.182572614107883, "grad_norm": 31.318077087402344, "learning_rate": 1.3930954356846473e-05, "loss": 0.4709, "step": 18295 }, { "epoch": 15.183402489626555, "grad_norm": 38.06824493408203, "learning_rate": 1.3930622406639005e-05, "loss": 0.792, "step": 18296 }, { "epoch": 15.184232365145228, "grad_norm": 30.10996437072754, "learning_rate": 1.3930290456431537e-05, "loss": 0.6647, "step": 18297 }, { "epoch": 15.1850622406639, "grad_norm": 50.182220458984375, "learning_rate": 1.3929958506224068e-05, "loss": 1.4517, "step": 18298 }, { "epoch": 15.185892116182572, "grad_norm": 26.910144805908203, "learning_rate": 1.3929626556016598e-05, "loss": 0.4733, "step": 18299 }, { "epoch": 15.186721991701244, "grad_norm": 88.46025848388672, "learning_rate": 1.3929294605809129e-05, "loss": 1.1142, "step": 18300 }, { "epoch": 15.187551867219916, "grad_norm": 30.9897403717041, "learning_rate": 1.392896265560166e-05, "loss": 1.2118, "step": 18301 }, { "epoch": 15.188381742738589, "grad_norm": 41.09385681152344, "learning_rate": 1.3928630705394193e-05, "loss": 0.7972, "step": 18302 }, { "epoch": 15.18921161825726, "grad_norm": 82.10098266601562, "learning_rate": 1.3928298755186722e-05, "loss": 0.5102, "step": 18303 }, { "epoch": 15.190041493775933, "grad_norm": 64.2464370727539, "learning_rate": 1.3927966804979254e-05, "loss": 0.7651, "step": 18304 }, { "epoch": 15.190871369294605, "grad_norm": 46.254966735839844, "learning_rate": 1.3927634854771786e-05, "loss": 0.4684, "step": 18305 }, { "epoch": 15.191701244813277, "grad_norm": 33.309303283691406, "learning_rate": 1.3927302904564318e-05, "loss": 0.3974, "step": 18306 }, { "epoch": 15.19253112033195, "grad_norm": 27.551416397094727, "learning_rate": 1.3926970954356847e-05, "loss": 0.4837, "step": 18307 }, { "epoch": 15.193360995850622, "grad_norm": 34.52130889892578, "learning_rate": 1.3926639004149379e-05, "loss": 0.3961, "step": 18308 }, { "epoch": 15.194190871369294, "grad_norm": 48.451541900634766, "learning_rate": 1.3926307053941909e-05, "loss": 0.6525, "step": 18309 }, { "epoch": 15.195020746887966, "grad_norm": 27.07370376586914, "learning_rate": 1.3925975103734441e-05, "loss": 0.4821, "step": 18310 }, { "epoch": 15.195850622406638, "grad_norm": 42.445926666259766, "learning_rate": 1.3925643153526973e-05, "loss": 0.62, "step": 18311 }, { "epoch": 15.19668049792531, "grad_norm": 41.07157516479492, "learning_rate": 1.3925311203319502e-05, "loss": 1.1689, "step": 18312 }, { "epoch": 15.197510373443983, "grad_norm": 24.903324127197266, "learning_rate": 1.3924979253112034e-05, "loss": 0.4388, "step": 18313 }, { "epoch": 15.198340248962655, "grad_norm": 20.972631454467773, "learning_rate": 1.3924647302904566e-05, "loss": 0.4884, "step": 18314 }, { "epoch": 15.199170124481327, "grad_norm": 27.247421264648438, "learning_rate": 1.3924315352697097e-05, "loss": 0.6861, "step": 18315 }, { "epoch": 15.2, "grad_norm": 36.5233039855957, "learning_rate": 1.3923983402489627e-05, "loss": 0.9009, "step": 18316 }, { "epoch": 15.200829875518671, "grad_norm": 46.95698928833008, "learning_rate": 1.392365145228216e-05, "loss": 0.6538, "step": 18317 }, { "epoch": 15.201659751037344, "grad_norm": 30.14944839477539, "learning_rate": 1.392331950207469e-05, "loss": 1.0208, "step": 18318 }, { "epoch": 15.202489626556016, "grad_norm": 34.393863677978516, "learning_rate": 1.3922987551867222e-05, "loss": 0.413, "step": 18319 }, { "epoch": 15.203319502074688, "grad_norm": 61.860198974609375, "learning_rate": 1.3922655601659752e-05, "loss": 1.1126, "step": 18320 }, { "epoch": 15.20414937759336, "grad_norm": 46.129371643066406, "learning_rate": 1.3922323651452283e-05, "loss": 0.506, "step": 18321 }, { "epoch": 15.204979253112032, "grad_norm": 16.300321578979492, "learning_rate": 1.3921991701244815e-05, "loss": 0.3162, "step": 18322 }, { "epoch": 15.205809128630705, "grad_norm": 74.07626342773438, "learning_rate": 1.3921659751037347e-05, "loss": 1.2027, "step": 18323 }, { "epoch": 15.206639004149377, "grad_norm": 28.334949493408203, "learning_rate": 1.3921327800829876e-05, "loss": 0.7498, "step": 18324 }, { "epoch": 15.207468879668049, "grad_norm": 55.79890823364258, "learning_rate": 1.3920995850622408e-05, "loss": 0.6016, "step": 18325 }, { "epoch": 15.208298755186721, "grad_norm": 33.526092529296875, "learning_rate": 1.392066390041494e-05, "loss": 0.5842, "step": 18326 }, { "epoch": 15.209128630705393, "grad_norm": 52.713993072509766, "learning_rate": 1.392033195020747e-05, "loss": 1.0232, "step": 18327 }, { "epoch": 15.209958506224066, "grad_norm": 79.84339141845703, "learning_rate": 1.392e-05, "loss": 1.1404, "step": 18328 }, { "epoch": 15.210788381742738, "grad_norm": 41.62434005737305, "learning_rate": 1.3919668049792533e-05, "loss": 0.6786, "step": 18329 }, { "epoch": 15.21161825726141, "grad_norm": 37.33951187133789, "learning_rate": 1.3919336099585063e-05, "loss": 0.8068, "step": 18330 }, { "epoch": 15.212448132780082, "grad_norm": 28.481889724731445, "learning_rate": 1.3919004149377595e-05, "loss": 0.5732, "step": 18331 }, { "epoch": 15.213278008298754, "grad_norm": 47.20077896118164, "learning_rate": 1.3918672199170124e-05, "loss": 0.5154, "step": 18332 }, { "epoch": 15.214107883817427, "grad_norm": 38.6266975402832, "learning_rate": 1.3918340248962656e-05, "loss": 0.9668, "step": 18333 }, { "epoch": 15.214937759336099, "grad_norm": 37.970340728759766, "learning_rate": 1.3918008298755188e-05, "loss": 0.8847, "step": 18334 }, { "epoch": 15.215767634854771, "grad_norm": 41.77008819580078, "learning_rate": 1.391767634854772e-05, "loss": 1.0392, "step": 18335 }, { "epoch": 15.216597510373443, "grad_norm": 28.40459632873535, "learning_rate": 1.391734439834025e-05, "loss": 0.5512, "step": 18336 }, { "epoch": 15.217427385892115, "grad_norm": 23.183645248413086, "learning_rate": 1.3917012448132781e-05, "loss": 0.3898, "step": 18337 }, { "epoch": 15.218257261410788, "grad_norm": 31.151111602783203, "learning_rate": 1.3916680497925311e-05, "loss": 0.6009, "step": 18338 }, { "epoch": 15.21908713692946, "grad_norm": 27.44729995727539, "learning_rate": 1.3916348547717844e-05, "loss": 0.5151, "step": 18339 }, { "epoch": 15.219917012448132, "grad_norm": 19.08247947692871, "learning_rate": 1.3916016597510376e-05, "loss": 0.3787, "step": 18340 }, { "epoch": 15.220746887966804, "grad_norm": 43.897518157958984, "learning_rate": 1.3915684647302904e-05, "loss": 0.6257, "step": 18341 }, { "epoch": 15.221576763485476, "grad_norm": 54.513587951660156, "learning_rate": 1.3915352697095437e-05, "loss": 0.4316, "step": 18342 }, { "epoch": 15.222406639004149, "grad_norm": 20.84127426147461, "learning_rate": 1.3915020746887969e-05, "loss": 0.3506, "step": 18343 }, { "epoch": 15.22323651452282, "grad_norm": 24.183698654174805, "learning_rate": 1.39146887966805e-05, "loss": 0.8073, "step": 18344 }, { "epoch": 15.224066390041493, "grad_norm": 21.58254623413086, "learning_rate": 1.391435684647303e-05, "loss": 0.5429, "step": 18345 }, { "epoch": 15.224896265560165, "grad_norm": 24.764659881591797, "learning_rate": 1.3914024896265562e-05, "loss": 0.2667, "step": 18346 }, { "epoch": 15.225726141078837, "grad_norm": 47.80538558959961, "learning_rate": 1.3913692946058092e-05, "loss": 1.1514, "step": 18347 }, { "epoch": 15.22655601659751, "grad_norm": 27.860910415649414, "learning_rate": 1.3913360995850624e-05, "loss": 0.3618, "step": 18348 }, { "epoch": 15.227385892116182, "grad_norm": 29.222244262695312, "learning_rate": 1.3913029045643154e-05, "loss": 0.8033, "step": 18349 }, { "epoch": 15.228215767634854, "grad_norm": 16.834110260009766, "learning_rate": 1.3912697095435685e-05, "loss": 0.3647, "step": 18350 }, { "epoch": 15.229045643153526, "grad_norm": 23.400604248046875, "learning_rate": 1.3912365145228217e-05, "loss": 0.4208, "step": 18351 }, { "epoch": 15.229875518672198, "grad_norm": 28.40768814086914, "learning_rate": 1.3912033195020749e-05, "loss": 0.6334, "step": 18352 }, { "epoch": 15.23070539419087, "grad_norm": 50.96148681640625, "learning_rate": 1.3911701244813278e-05, "loss": 0.6835, "step": 18353 }, { "epoch": 15.231535269709543, "grad_norm": 34.938411712646484, "learning_rate": 1.391136929460581e-05, "loss": 0.6654, "step": 18354 }, { "epoch": 15.232365145228215, "grad_norm": 41.8386344909668, "learning_rate": 1.3911037344398342e-05, "loss": 1.1128, "step": 18355 }, { "epoch": 15.233195020746887, "grad_norm": 46.54117965698242, "learning_rate": 1.3910705394190872e-05, "loss": 0.7591, "step": 18356 }, { "epoch": 15.23402489626556, "grad_norm": 34.01176452636719, "learning_rate": 1.3910373443983403e-05, "loss": 1.1244, "step": 18357 }, { "epoch": 15.234854771784232, "grad_norm": 123.98016357421875, "learning_rate": 1.3910041493775935e-05, "loss": 0.8217, "step": 18358 }, { "epoch": 15.235684647302904, "grad_norm": 24.308149337768555, "learning_rate": 1.3909709543568465e-05, "loss": 0.7629, "step": 18359 }, { "epoch": 15.236514522821576, "grad_norm": 80.4778060913086, "learning_rate": 1.3909377593360998e-05, "loss": 1.8851, "step": 18360 }, { "epoch": 15.237344398340248, "grad_norm": 32.866905212402344, "learning_rate": 1.3909045643153526e-05, "loss": 0.857, "step": 18361 }, { "epoch": 15.23817427385892, "grad_norm": 53.111000061035156, "learning_rate": 1.3908713692946058e-05, "loss": 1.6593, "step": 18362 }, { "epoch": 15.239004149377593, "grad_norm": 89.43509674072266, "learning_rate": 1.390838174273859e-05, "loss": 1.1621, "step": 18363 }, { "epoch": 15.239834024896265, "grad_norm": 39.06916046142578, "learning_rate": 1.3908049792531123e-05, "loss": 1.0365, "step": 18364 }, { "epoch": 15.240663900414937, "grad_norm": 77.55420684814453, "learning_rate": 1.3907717842323653e-05, "loss": 0.5774, "step": 18365 }, { "epoch": 15.241493775933609, "grad_norm": 43.51786422729492, "learning_rate": 1.3907385892116183e-05, "loss": 0.8441, "step": 18366 }, { "epoch": 15.242323651452281, "grad_norm": 30.776878356933594, "learning_rate": 1.3907053941908715e-05, "loss": 0.6683, "step": 18367 }, { "epoch": 15.243153526970953, "grad_norm": 60.680076599121094, "learning_rate": 1.3906721991701246e-05, "loss": 0.9375, "step": 18368 }, { "epoch": 15.243983402489626, "grad_norm": 28.751312255859375, "learning_rate": 1.3906390041493778e-05, "loss": 0.7208, "step": 18369 }, { "epoch": 15.244813278008298, "grad_norm": 26.5504150390625, "learning_rate": 1.3906058091286307e-05, "loss": 0.6254, "step": 18370 }, { "epoch": 15.24564315352697, "grad_norm": 14.874983787536621, "learning_rate": 1.3905726141078839e-05, "loss": 0.3305, "step": 18371 }, { "epoch": 15.246473029045642, "grad_norm": 38.525238037109375, "learning_rate": 1.3905394190871371e-05, "loss": 0.4583, "step": 18372 }, { "epoch": 15.247302904564314, "grad_norm": 15.245340347290039, "learning_rate": 1.3905062240663903e-05, "loss": 0.3544, "step": 18373 }, { "epoch": 15.248132780082987, "grad_norm": 42.7935676574707, "learning_rate": 1.3904730290456432e-05, "loss": 0.5024, "step": 18374 }, { "epoch": 15.248962655601659, "grad_norm": 59.83258056640625, "learning_rate": 1.3904398340248964e-05, "loss": 1.2986, "step": 18375 }, { "epoch": 15.249792531120331, "grad_norm": 53.697261810302734, "learning_rate": 1.3904066390041496e-05, "loss": 0.8389, "step": 18376 }, { "epoch": 15.250622406639003, "grad_norm": 45.385074615478516, "learning_rate": 1.3903734439834026e-05, "loss": 0.4802, "step": 18377 }, { "epoch": 15.251452282157675, "grad_norm": 42.24882125854492, "learning_rate": 1.3903402489626557e-05, "loss": 0.6847, "step": 18378 }, { "epoch": 15.252282157676348, "grad_norm": 58.20920181274414, "learning_rate": 1.3903070539419087e-05, "loss": 1.1311, "step": 18379 }, { "epoch": 15.25311203319502, "grad_norm": 43.71552658081055, "learning_rate": 1.390273858921162e-05, "loss": 0.908, "step": 18380 }, { "epoch": 15.253941908713692, "grad_norm": 25.726306915283203, "learning_rate": 1.3902406639004151e-05, "loss": 1.0794, "step": 18381 }, { "epoch": 15.254771784232364, "grad_norm": 19.863666534423828, "learning_rate": 1.390207468879668e-05, "loss": 0.6897, "step": 18382 }, { "epoch": 15.255601659751036, "grad_norm": 28.81501579284668, "learning_rate": 1.3901742738589212e-05, "loss": 0.8921, "step": 18383 }, { "epoch": 15.256431535269709, "grad_norm": 45.50078201293945, "learning_rate": 1.3901410788381744e-05, "loss": 1.1025, "step": 18384 }, { "epoch": 15.25726141078838, "grad_norm": 55.27637481689453, "learning_rate": 1.3901078838174275e-05, "loss": 0.7054, "step": 18385 }, { "epoch": 15.258091286307055, "grad_norm": 24.411558151245117, "learning_rate": 1.3900746887966805e-05, "loss": 0.3998, "step": 18386 }, { "epoch": 15.258921161825727, "grad_norm": 28.5589656829834, "learning_rate": 1.3900414937759337e-05, "loss": 0.3857, "step": 18387 }, { "epoch": 15.2597510373444, "grad_norm": 12.673486709594727, "learning_rate": 1.3900082987551868e-05, "loss": 0.2801, "step": 18388 }, { "epoch": 15.260580912863071, "grad_norm": 19.63353157043457, "learning_rate": 1.38997510373444e-05, "loss": 0.3898, "step": 18389 }, { "epoch": 15.261410788381744, "grad_norm": 51.15908432006836, "learning_rate": 1.3899419087136932e-05, "loss": 0.9541, "step": 18390 }, { "epoch": 15.262240663900416, "grad_norm": 87.30024719238281, "learning_rate": 1.389908713692946e-05, "loss": 0.7024, "step": 18391 }, { "epoch": 15.263070539419088, "grad_norm": 35.988555908203125, "learning_rate": 1.3898755186721993e-05, "loss": 0.5887, "step": 18392 }, { "epoch": 15.26390041493776, "grad_norm": 71.72148132324219, "learning_rate": 1.3898423236514525e-05, "loss": 0.5818, "step": 18393 }, { "epoch": 15.264730290456432, "grad_norm": 46.604248046875, "learning_rate": 1.3898091286307055e-05, "loss": 0.5001, "step": 18394 }, { "epoch": 15.265560165975105, "grad_norm": 15.86157512664795, "learning_rate": 1.3897759336099586e-05, "loss": 0.3468, "step": 18395 }, { "epoch": 15.266390041493777, "grad_norm": 60.02962875366211, "learning_rate": 1.3897427385892118e-05, "loss": 0.8997, "step": 18396 }, { "epoch": 15.267219917012449, "grad_norm": 49.87171173095703, "learning_rate": 1.3897095435684648e-05, "loss": 1.0348, "step": 18397 }, { "epoch": 15.268049792531121, "grad_norm": 26.41221046447754, "learning_rate": 1.389676348547718e-05, "loss": 0.561, "step": 18398 }, { "epoch": 15.268879668049793, "grad_norm": 29.86948013305664, "learning_rate": 1.389643153526971e-05, "loss": 0.4186, "step": 18399 }, { "epoch": 15.269709543568466, "grad_norm": 21.96122932434082, "learning_rate": 1.3896099585062241e-05, "loss": 0.2802, "step": 18400 }, { "epoch": 15.270539419087138, "grad_norm": 34.798622131347656, "learning_rate": 1.3895767634854773e-05, "loss": 0.709, "step": 18401 }, { "epoch": 15.27136929460581, "grad_norm": 40.60997772216797, "learning_rate": 1.3895435684647305e-05, "loss": 0.8385, "step": 18402 }, { "epoch": 15.272199170124482, "grad_norm": 69.66165161132812, "learning_rate": 1.3895103734439834e-05, "loss": 0.9425, "step": 18403 }, { "epoch": 15.273029045643154, "grad_norm": 37.79839324951172, "learning_rate": 1.3894771784232366e-05, "loss": 1.3847, "step": 18404 }, { "epoch": 15.273858921161827, "grad_norm": 36.590492248535156, "learning_rate": 1.3894439834024898e-05, "loss": 0.8611, "step": 18405 }, { "epoch": 15.274688796680499, "grad_norm": 73.78792572021484, "learning_rate": 1.3894107883817429e-05, "loss": 0.6235, "step": 18406 }, { "epoch": 15.275518672199171, "grad_norm": 30.444398880004883, "learning_rate": 1.3893775933609959e-05, "loss": 0.4717, "step": 18407 }, { "epoch": 15.276348547717843, "grad_norm": 41.28487777709961, "learning_rate": 1.389344398340249e-05, "loss": 0.6481, "step": 18408 }, { "epoch": 15.277178423236515, "grad_norm": 22.93361473083496, "learning_rate": 1.3893112033195022e-05, "loss": 0.5225, "step": 18409 }, { "epoch": 15.278008298755188, "grad_norm": 41.29789352416992, "learning_rate": 1.3892780082987554e-05, "loss": 1.4446, "step": 18410 }, { "epoch": 15.27883817427386, "grad_norm": 35.23436737060547, "learning_rate": 1.3892448132780082e-05, "loss": 0.5361, "step": 18411 }, { "epoch": 15.279668049792532, "grad_norm": 36.1809196472168, "learning_rate": 1.3892116182572615e-05, "loss": 0.5712, "step": 18412 }, { "epoch": 15.280497925311204, "grad_norm": 34.9419059753418, "learning_rate": 1.3891784232365147e-05, "loss": 0.8867, "step": 18413 }, { "epoch": 15.281327800829876, "grad_norm": 49.09061050415039, "learning_rate": 1.3891452282157679e-05, "loss": 0.8517, "step": 18414 }, { "epoch": 15.282157676348548, "grad_norm": 92.66098022460938, "learning_rate": 1.389112033195021e-05, "loss": 0.4761, "step": 18415 }, { "epoch": 15.28298755186722, "grad_norm": 66.5853042602539, "learning_rate": 1.389078838174274e-05, "loss": 0.7359, "step": 18416 }, { "epoch": 15.283817427385893, "grad_norm": 17.916624069213867, "learning_rate": 1.389045643153527e-05, "loss": 0.3339, "step": 18417 }, { "epoch": 15.284647302904565, "grad_norm": 32.443946838378906, "learning_rate": 1.3890124481327802e-05, "loss": 0.6945, "step": 18418 }, { "epoch": 15.285477178423237, "grad_norm": 21.997211456298828, "learning_rate": 1.3889792531120334e-05, "loss": 0.3087, "step": 18419 }, { "epoch": 15.28630705394191, "grad_norm": 39.2054443359375, "learning_rate": 1.3889460580912863e-05, "loss": 0.4082, "step": 18420 }, { "epoch": 15.287136929460582, "grad_norm": 37.026187896728516, "learning_rate": 1.3889128630705395e-05, "loss": 0.6562, "step": 18421 }, { "epoch": 15.287966804979254, "grad_norm": 22.46609115600586, "learning_rate": 1.3888796680497927e-05, "loss": 0.4925, "step": 18422 }, { "epoch": 15.288796680497926, "grad_norm": 69.91017150878906, "learning_rate": 1.388846473029046e-05, "loss": 1.7356, "step": 18423 }, { "epoch": 15.289626556016598, "grad_norm": 90.35822296142578, "learning_rate": 1.3888132780082988e-05, "loss": 0.8501, "step": 18424 }, { "epoch": 15.29045643153527, "grad_norm": 86.80455017089844, "learning_rate": 1.388780082987552e-05, "loss": 1.0913, "step": 18425 }, { "epoch": 15.291286307053943, "grad_norm": 42.5941047668457, "learning_rate": 1.388746887966805e-05, "loss": 1.0613, "step": 18426 }, { "epoch": 15.292116182572615, "grad_norm": 31.173730850219727, "learning_rate": 1.3887136929460583e-05, "loss": 0.5033, "step": 18427 }, { "epoch": 15.292946058091287, "grad_norm": 45.6299934387207, "learning_rate": 1.3886804979253113e-05, "loss": 0.6167, "step": 18428 }, { "epoch": 15.29377593360996, "grad_norm": 43.98166275024414, "learning_rate": 1.3886473029045643e-05, "loss": 1.3837, "step": 18429 }, { "epoch": 15.294605809128631, "grad_norm": 25.681936264038086, "learning_rate": 1.3886141078838176e-05, "loss": 0.6256, "step": 18430 }, { "epoch": 15.295435684647304, "grad_norm": 14.907707214355469, "learning_rate": 1.3885809128630708e-05, "loss": 0.4351, "step": 18431 }, { "epoch": 15.296265560165976, "grad_norm": 15.570572853088379, "learning_rate": 1.3885477178423236e-05, "loss": 0.5162, "step": 18432 }, { "epoch": 15.297095435684648, "grad_norm": 16.934274673461914, "learning_rate": 1.3885145228215769e-05, "loss": 0.3485, "step": 18433 }, { "epoch": 15.29792531120332, "grad_norm": 33.098388671875, "learning_rate": 1.38848132780083e-05, "loss": 0.8267, "step": 18434 }, { "epoch": 15.298755186721992, "grad_norm": 52.80481719970703, "learning_rate": 1.3884481327800831e-05, "loss": 1.6685, "step": 18435 }, { "epoch": 15.299585062240665, "grad_norm": 43.3015022277832, "learning_rate": 1.3884149377593361e-05, "loss": 0.6208, "step": 18436 }, { "epoch": 15.300414937759337, "grad_norm": 82.85228729248047, "learning_rate": 1.3883817427385894e-05, "loss": 0.6249, "step": 18437 }, { "epoch": 15.301244813278009, "grad_norm": 96.34964752197266, "learning_rate": 1.3883485477178424e-05, "loss": 0.7782, "step": 18438 }, { "epoch": 15.302074688796681, "grad_norm": 66.77535247802734, "learning_rate": 1.3883153526970956e-05, "loss": 0.6891, "step": 18439 }, { "epoch": 15.302904564315353, "grad_norm": 60.2362060546875, "learning_rate": 1.3882821576763485e-05, "loss": 1.198, "step": 18440 }, { "epoch": 15.303734439834026, "grad_norm": 37.241947174072266, "learning_rate": 1.3882489626556017e-05, "loss": 0.5155, "step": 18441 }, { "epoch": 15.304564315352698, "grad_norm": 34.98218536376953, "learning_rate": 1.3882157676348549e-05, "loss": 0.5617, "step": 18442 }, { "epoch": 15.30539419087137, "grad_norm": 30.51979637145996, "learning_rate": 1.3881825726141081e-05, "loss": 0.8953, "step": 18443 }, { "epoch": 15.306224066390042, "grad_norm": 63.967079162597656, "learning_rate": 1.3881493775933612e-05, "loss": 0.9937, "step": 18444 }, { "epoch": 15.307053941908714, "grad_norm": 14.386248588562012, "learning_rate": 1.3881161825726142e-05, "loss": 0.3398, "step": 18445 }, { "epoch": 15.307883817427387, "grad_norm": 58.87217330932617, "learning_rate": 1.3880829875518674e-05, "loss": 1.1819, "step": 18446 }, { "epoch": 15.308713692946059, "grad_norm": 30.75370979309082, "learning_rate": 1.3880497925311204e-05, "loss": 0.78, "step": 18447 }, { "epoch": 15.309543568464731, "grad_norm": 60.007469177246094, "learning_rate": 1.3880165975103737e-05, "loss": 1.158, "step": 18448 }, { "epoch": 15.310373443983403, "grad_norm": 24.255624771118164, "learning_rate": 1.3879834024896265e-05, "loss": 0.4421, "step": 18449 }, { "epoch": 15.311203319502075, "grad_norm": 39.40778350830078, "learning_rate": 1.3879502074688797e-05, "loss": 0.4832, "step": 18450 }, { "epoch": 15.312033195020748, "grad_norm": 60.26811981201172, "learning_rate": 1.387917012448133e-05, "loss": 0.6805, "step": 18451 }, { "epoch": 15.31286307053942, "grad_norm": 34.42686080932617, "learning_rate": 1.3878838174273862e-05, "loss": 0.7011, "step": 18452 }, { "epoch": 15.313692946058092, "grad_norm": 61.72910690307617, "learning_rate": 1.387850622406639e-05, "loss": 1.5003, "step": 18453 }, { "epoch": 15.314522821576764, "grad_norm": 30.144733428955078, "learning_rate": 1.3878174273858922e-05, "loss": 0.5089, "step": 18454 }, { "epoch": 15.315352697095436, "grad_norm": 50.40978240966797, "learning_rate": 1.3877842323651453e-05, "loss": 0.5177, "step": 18455 }, { "epoch": 15.316182572614109, "grad_norm": 57.414894104003906, "learning_rate": 1.3877510373443985e-05, "loss": 0.6114, "step": 18456 }, { "epoch": 15.31701244813278, "grad_norm": 73.32588195800781, "learning_rate": 1.3877178423236515e-05, "loss": 0.4944, "step": 18457 }, { "epoch": 15.317842323651453, "grad_norm": 42.747962951660156, "learning_rate": 1.3876846473029046e-05, "loss": 0.9071, "step": 18458 }, { "epoch": 15.318672199170125, "grad_norm": 24.189125061035156, "learning_rate": 1.3876514522821578e-05, "loss": 0.3978, "step": 18459 }, { "epoch": 15.319502074688797, "grad_norm": 36.726898193359375, "learning_rate": 1.387618257261411e-05, "loss": 0.3421, "step": 18460 }, { "epoch": 15.32033195020747, "grad_norm": 67.14862823486328, "learning_rate": 1.3875850622406639e-05, "loss": 1.1164, "step": 18461 }, { "epoch": 15.321161825726142, "grad_norm": 64.14600372314453, "learning_rate": 1.387551867219917e-05, "loss": 1.2619, "step": 18462 }, { "epoch": 15.321991701244814, "grad_norm": 27.01013946533203, "learning_rate": 1.3875186721991703e-05, "loss": 0.4938, "step": 18463 }, { "epoch": 15.322821576763486, "grad_norm": 21.350696563720703, "learning_rate": 1.3874854771784233e-05, "loss": 0.4703, "step": 18464 }, { "epoch": 15.323651452282158, "grad_norm": 92.01130676269531, "learning_rate": 1.3874522821576764e-05, "loss": 1.1067, "step": 18465 }, { "epoch": 15.32448132780083, "grad_norm": 64.91100311279297, "learning_rate": 1.3874190871369296e-05, "loss": 1.2019, "step": 18466 }, { "epoch": 15.325311203319503, "grad_norm": 46.87801742553711, "learning_rate": 1.3873858921161826e-05, "loss": 0.7587, "step": 18467 }, { "epoch": 15.326141078838175, "grad_norm": 48.757225036621094, "learning_rate": 1.3873526970954358e-05, "loss": 0.3443, "step": 18468 }, { "epoch": 15.326970954356847, "grad_norm": 24.088054656982422, "learning_rate": 1.387319502074689e-05, "loss": 0.3364, "step": 18469 }, { "epoch": 15.32780082987552, "grad_norm": 40.221805572509766, "learning_rate": 1.387286307053942e-05, "loss": 0.9166, "step": 18470 }, { "epoch": 15.328630705394191, "grad_norm": 31.40512466430664, "learning_rate": 1.3872531120331951e-05, "loss": 0.9349, "step": 18471 }, { "epoch": 15.329460580912864, "grad_norm": 61.20055389404297, "learning_rate": 1.3872199170124483e-05, "loss": 0.933, "step": 18472 }, { "epoch": 15.330290456431536, "grad_norm": 30.88923454284668, "learning_rate": 1.3871867219917014e-05, "loss": 1.2399, "step": 18473 }, { "epoch": 15.331120331950208, "grad_norm": 57.771263122558594, "learning_rate": 1.3871535269709544e-05, "loss": 0.8588, "step": 18474 }, { "epoch": 15.33195020746888, "grad_norm": 45.6231575012207, "learning_rate": 1.3871203319502076e-05, "loss": 1.4375, "step": 18475 }, { "epoch": 15.332780082987552, "grad_norm": 100.75060272216797, "learning_rate": 1.3870871369294607e-05, "loss": 1.0564, "step": 18476 }, { "epoch": 15.333609958506225, "grad_norm": 44.02323913574219, "learning_rate": 1.3870539419087139e-05, "loss": 0.4393, "step": 18477 }, { "epoch": 15.334439834024897, "grad_norm": 47.46158218383789, "learning_rate": 1.3870207468879668e-05, "loss": 0.6896, "step": 18478 }, { "epoch": 15.335269709543569, "grad_norm": 76.5503158569336, "learning_rate": 1.38698755186722e-05, "loss": 1.2919, "step": 18479 }, { "epoch": 15.336099585062241, "grad_norm": 36.414581298828125, "learning_rate": 1.3869543568464732e-05, "loss": 0.7654, "step": 18480 }, { "epoch": 15.336929460580913, "grad_norm": 16.050443649291992, "learning_rate": 1.3869211618257264e-05, "loss": 0.4229, "step": 18481 }, { "epoch": 15.337759336099586, "grad_norm": 22.67620086669922, "learning_rate": 1.3868879668049793e-05, "loss": 0.3989, "step": 18482 }, { "epoch": 15.338589211618258, "grad_norm": 38.525611877441406, "learning_rate": 1.3868547717842325e-05, "loss": 0.8139, "step": 18483 }, { "epoch": 15.33941908713693, "grad_norm": 54.3824577331543, "learning_rate": 1.3868215767634857e-05, "loss": 0.9226, "step": 18484 }, { "epoch": 15.340248962655602, "grad_norm": 40.74042510986328, "learning_rate": 1.3867883817427387e-05, "loss": 0.5307, "step": 18485 }, { "epoch": 15.341078838174274, "grad_norm": 42.7231559753418, "learning_rate": 1.3867551867219918e-05, "loss": 0.5594, "step": 18486 }, { "epoch": 15.341908713692947, "grad_norm": 45.01777648925781, "learning_rate": 1.3867219917012448e-05, "loss": 0.6341, "step": 18487 }, { "epoch": 15.342738589211619, "grad_norm": 21.193527221679688, "learning_rate": 1.386688796680498e-05, "loss": 0.5097, "step": 18488 }, { "epoch": 15.343568464730291, "grad_norm": 61.02610778808594, "learning_rate": 1.3866556016597512e-05, "loss": 1.2976, "step": 18489 }, { "epoch": 15.344398340248963, "grad_norm": 34.919189453125, "learning_rate": 1.3866224066390041e-05, "loss": 0.7096, "step": 18490 }, { "epoch": 15.345228215767635, "grad_norm": 27.741029739379883, "learning_rate": 1.3865892116182573e-05, "loss": 0.5017, "step": 18491 }, { "epoch": 15.346058091286308, "grad_norm": 56.90035629272461, "learning_rate": 1.3865560165975105e-05, "loss": 0.9443, "step": 18492 }, { "epoch": 15.34688796680498, "grad_norm": 29.205228805541992, "learning_rate": 1.3865228215767637e-05, "loss": 0.7426, "step": 18493 }, { "epoch": 15.347717842323652, "grad_norm": 30.784229278564453, "learning_rate": 1.3864896265560168e-05, "loss": 0.6059, "step": 18494 }, { "epoch": 15.348547717842324, "grad_norm": 48.03257751464844, "learning_rate": 1.3864564315352698e-05, "loss": 1.4592, "step": 18495 }, { "epoch": 15.349377593360996, "grad_norm": 47.34561538696289, "learning_rate": 1.3864232365145229e-05, "loss": 1.0573, "step": 18496 }, { "epoch": 15.350207468879669, "grad_norm": 41.99517059326172, "learning_rate": 1.386390041493776e-05, "loss": 0.901, "step": 18497 }, { "epoch": 15.35103734439834, "grad_norm": 35.22740173339844, "learning_rate": 1.3863568464730293e-05, "loss": 0.9237, "step": 18498 }, { "epoch": 15.351867219917013, "grad_norm": 30.684070587158203, "learning_rate": 1.3863236514522822e-05, "loss": 0.4292, "step": 18499 }, { "epoch": 15.352697095435685, "grad_norm": 36.430110931396484, "learning_rate": 1.3862904564315354e-05, "loss": 1.0899, "step": 18500 }, { "epoch": 15.353526970954357, "grad_norm": 63.86764907836914, "learning_rate": 1.3862572614107886e-05, "loss": 0.5402, "step": 18501 }, { "epoch": 15.35435684647303, "grad_norm": 35.06377410888672, "learning_rate": 1.3862240663900416e-05, "loss": 0.4989, "step": 18502 }, { "epoch": 15.355186721991702, "grad_norm": 49.375396728515625, "learning_rate": 1.3861908713692947e-05, "loss": 0.6797, "step": 18503 }, { "epoch": 15.356016597510374, "grad_norm": 54.0859260559082, "learning_rate": 1.3861576763485479e-05, "loss": 0.5687, "step": 18504 }, { "epoch": 15.356846473029046, "grad_norm": 44.40122604370117, "learning_rate": 1.3861244813278009e-05, "loss": 0.6689, "step": 18505 }, { "epoch": 15.357676348547718, "grad_norm": 80.13398742675781, "learning_rate": 1.3860912863070541e-05, "loss": 0.7125, "step": 18506 }, { "epoch": 15.35850622406639, "grad_norm": 40.716949462890625, "learning_rate": 1.3860580912863072e-05, "loss": 1.0503, "step": 18507 }, { "epoch": 15.359336099585063, "grad_norm": 39.39451217651367, "learning_rate": 1.3860248962655602e-05, "loss": 0.4513, "step": 18508 }, { "epoch": 15.360165975103735, "grad_norm": 83.7860336303711, "learning_rate": 1.3859917012448134e-05, "loss": 0.4342, "step": 18509 }, { "epoch": 15.360995850622407, "grad_norm": 32.33136749267578, "learning_rate": 1.3859585062240666e-05, "loss": 0.3988, "step": 18510 }, { "epoch": 15.36182572614108, "grad_norm": 73.24239349365234, "learning_rate": 1.3859253112033195e-05, "loss": 0.6531, "step": 18511 }, { "epoch": 15.362655601659752, "grad_norm": 31.159488677978516, "learning_rate": 1.3858921161825727e-05, "loss": 1.0926, "step": 18512 }, { "epoch": 15.363485477178424, "grad_norm": 42.6546630859375, "learning_rate": 1.385858921161826e-05, "loss": 0.7897, "step": 18513 }, { "epoch": 15.364315352697096, "grad_norm": 30.27620506286621, "learning_rate": 1.385825726141079e-05, "loss": 0.8867, "step": 18514 }, { "epoch": 15.365145228215768, "grad_norm": 68.06594848632812, "learning_rate": 1.385792531120332e-05, "loss": 0.6765, "step": 18515 }, { "epoch": 15.36597510373444, "grad_norm": 51.275909423828125, "learning_rate": 1.385759336099585e-05, "loss": 0.4482, "step": 18516 }, { "epoch": 15.366804979253113, "grad_norm": 52.36790084838867, "learning_rate": 1.3857261410788383e-05, "loss": 1.082, "step": 18517 }, { "epoch": 15.367634854771785, "grad_norm": 18.226205825805664, "learning_rate": 1.3856929460580915e-05, "loss": 0.6985, "step": 18518 }, { "epoch": 15.368464730290457, "grad_norm": 44.93293762207031, "learning_rate": 1.3856597510373443e-05, "loss": 0.3262, "step": 18519 }, { "epoch": 15.369294605809129, "grad_norm": 38.210609436035156, "learning_rate": 1.3856265560165975e-05, "loss": 0.4684, "step": 18520 }, { "epoch": 15.370124481327801, "grad_norm": 27.1173038482666, "learning_rate": 1.3855933609958508e-05, "loss": 0.5092, "step": 18521 }, { "epoch": 15.370954356846473, "grad_norm": 20.40658187866211, "learning_rate": 1.385560165975104e-05, "loss": 0.4974, "step": 18522 }, { "epoch": 15.371784232365146, "grad_norm": 22.31588363647461, "learning_rate": 1.385526970954357e-05, "loss": 0.6272, "step": 18523 }, { "epoch": 15.372614107883818, "grad_norm": 31.413007736206055, "learning_rate": 1.38549377593361e-05, "loss": 0.5492, "step": 18524 }, { "epoch": 15.37344398340249, "grad_norm": 28.949108123779297, "learning_rate": 1.3854605809128631e-05, "loss": 0.686, "step": 18525 }, { "epoch": 15.374273858921162, "grad_norm": 41.34235763549805, "learning_rate": 1.3854273858921163e-05, "loss": 0.396, "step": 18526 }, { "epoch": 15.375103734439834, "grad_norm": 38.98930740356445, "learning_rate": 1.3853941908713695e-05, "loss": 1.2019, "step": 18527 }, { "epoch": 15.375933609958507, "grad_norm": 37.23221969604492, "learning_rate": 1.3853609958506224e-05, "loss": 0.413, "step": 18528 }, { "epoch": 15.376763485477179, "grad_norm": 45.958099365234375, "learning_rate": 1.3853278008298756e-05, "loss": 0.7725, "step": 18529 }, { "epoch": 15.377593360995851, "grad_norm": 23.955425262451172, "learning_rate": 1.3852946058091288e-05, "loss": 0.3849, "step": 18530 }, { "epoch": 15.378423236514523, "grad_norm": 43.750205993652344, "learning_rate": 1.385261410788382e-05, "loss": 0.6265, "step": 18531 }, { "epoch": 15.379253112033195, "grad_norm": 92.05840301513672, "learning_rate": 1.3852282157676349e-05, "loss": 0.8732, "step": 18532 }, { "epoch": 15.380082987551868, "grad_norm": 33.82767105102539, "learning_rate": 1.3851950207468881e-05, "loss": 0.7166, "step": 18533 }, { "epoch": 15.38091286307054, "grad_norm": 53.675926208496094, "learning_rate": 1.3851618257261411e-05, "loss": 0.5697, "step": 18534 }, { "epoch": 15.381742738589212, "grad_norm": 56.42411804199219, "learning_rate": 1.3851286307053944e-05, "loss": 0.7233, "step": 18535 }, { "epoch": 15.382572614107884, "grad_norm": 71.04215240478516, "learning_rate": 1.3850954356846474e-05, "loss": 0.7237, "step": 18536 }, { "epoch": 15.383402489626556, "grad_norm": 89.6199722290039, "learning_rate": 1.3850622406639004e-05, "loss": 0.7406, "step": 18537 }, { "epoch": 15.384232365145229, "grad_norm": 36.32239532470703, "learning_rate": 1.3850290456431536e-05, "loss": 0.8189, "step": 18538 }, { "epoch": 15.3850622406639, "grad_norm": 42.1140251159668, "learning_rate": 1.3849958506224069e-05, "loss": 0.8098, "step": 18539 }, { "epoch": 15.385892116182573, "grad_norm": 78.61566925048828, "learning_rate": 1.3849626556016597e-05, "loss": 1.2201, "step": 18540 }, { "epoch": 15.386721991701245, "grad_norm": 45.57210159301758, "learning_rate": 1.384929460580913e-05, "loss": 0.6797, "step": 18541 }, { "epoch": 15.387551867219917, "grad_norm": 39.374332427978516, "learning_rate": 1.3848962655601662e-05, "loss": 0.8058, "step": 18542 }, { "epoch": 15.38838174273859, "grad_norm": 39.78105545043945, "learning_rate": 1.3848630705394192e-05, "loss": 0.687, "step": 18543 }, { "epoch": 15.389211618257262, "grad_norm": 25.55030059814453, "learning_rate": 1.3848298755186722e-05, "loss": 0.3392, "step": 18544 }, { "epoch": 15.390041493775934, "grad_norm": 39.69191360473633, "learning_rate": 1.3847966804979254e-05, "loss": 0.5939, "step": 18545 }, { "epoch": 15.390871369294606, "grad_norm": 191.7064666748047, "learning_rate": 1.3847634854771785e-05, "loss": 2.2074, "step": 18546 }, { "epoch": 15.391701244813278, "grad_norm": 21.12916374206543, "learning_rate": 1.3847302904564317e-05, "loss": 0.3427, "step": 18547 }, { "epoch": 15.39253112033195, "grad_norm": 17.84360122680664, "learning_rate": 1.3846970954356849e-05, "loss": 0.2704, "step": 18548 }, { "epoch": 15.393360995850623, "grad_norm": 32.41455078125, "learning_rate": 1.3846639004149378e-05, "loss": 0.5998, "step": 18549 }, { "epoch": 15.394190871369295, "grad_norm": 57.79342269897461, "learning_rate": 1.384630705394191e-05, "loss": 0.8348, "step": 18550 }, { "epoch": 15.395020746887967, "grad_norm": 55.72222900390625, "learning_rate": 1.3845975103734442e-05, "loss": 0.6015, "step": 18551 }, { "epoch": 15.39585062240664, "grad_norm": 23.625717163085938, "learning_rate": 1.3845643153526972e-05, "loss": 0.4187, "step": 18552 }, { "epoch": 15.396680497925312, "grad_norm": 24.624662399291992, "learning_rate": 1.3845311203319503e-05, "loss": 0.4885, "step": 18553 }, { "epoch": 15.397510373443984, "grad_norm": 24.461511611938477, "learning_rate": 1.3844979253112035e-05, "loss": 0.6668, "step": 18554 }, { "epoch": 15.398340248962656, "grad_norm": 23.764833450317383, "learning_rate": 1.3844647302904565e-05, "loss": 0.3727, "step": 18555 }, { "epoch": 15.399170124481328, "grad_norm": 71.16075134277344, "learning_rate": 1.3844315352697097e-05, "loss": 1.1682, "step": 18556 }, { "epoch": 15.4, "grad_norm": 32.794342041015625, "learning_rate": 1.3843983402489626e-05, "loss": 0.4872, "step": 18557 }, { "epoch": 15.400829875518673, "grad_norm": 33.57942581176758, "learning_rate": 1.3843651452282158e-05, "loss": 0.6274, "step": 18558 }, { "epoch": 15.401659751037345, "grad_norm": 16.369810104370117, "learning_rate": 1.384331950207469e-05, "loss": 0.3375, "step": 18559 }, { "epoch": 15.402489626556017, "grad_norm": 51.53743362426758, "learning_rate": 1.3842987551867223e-05, "loss": 0.7756, "step": 18560 }, { "epoch": 15.40331950207469, "grad_norm": 42.48311233520508, "learning_rate": 1.3842655601659751e-05, "loss": 0.6166, "step": 18561 }, { "epoch": 15.404149377593361, "grad_norm": 49.79118728637695, "learning_rate": 1.3842323651452283e-05, "loss": 1.5273, "step": 18562 }, { "epoch": 15.404979253112034, "grad_norm": 39.477169036865234, "learning_rate": 1.3841991701244815e-05, "loss": 0.8751, "step": 18563 }, { "epoch": 15.405809128630706, "grad_norm": 33.75798416137695, "learning_rate": 1.3841659751037346e-05, "loss": 0.6447, "step": 18564 }, { "epoch": 15.406639004149378, "grad_norm": 43.65189743041992, "learning_rate": 1.3841327800829876e-05, "loss": 0.5219, "step": 18565 }, { "epoch": 15.40746887966805, "grad_norm": 56.53430938720703, "learning_rate": 1.3840995850622407e-05, "loss": 1.0886, "step": 18566 }, { "epoch": 15.408298755186722, "grad_norm": 39.2552604675293, "learning_rate": 1.3840663900414939e-05, "loss": 0.7263, "step": 18567 }, { "epoch": 15.409128630705395, "grad_norm": 86.34314727783203, "learning_rate": 1.3840331950207471e-05, "loss": 0.6773, "step": 18568 }, { "epoch": 15.409958506224067, "grad_norm": 15.223381996154785, "learning_rate": 1.384e-05, "loss": 0.409, "step": 18569 }, { "epoch": 15.410788381742739, "grad_norm": 23.57506561279297, "learning_rate": 1.3839668049792532e-05, "loss": 0.6265, "step": 18570 }, { "epoch": 15.411618257261411, "grad_norm": 40.3552131652832, "learning_rate": 1.3839336099585064e-05, "loss": 0.7961, "step": 18571 }, { "epoch": 15.412448132780083, "grad_norm": 90.80349731445312, "learning_rate": 1.3839004149377594e-05, "loss": 0.7394, "step": 18572 }, { "epoch": 15.413278008298755, "grad_norm": 67.57636260986328, "learning_rate": 1.3838672199170126e-05, "loss": 1.1769, "step": 18573 }, { "epoch": 15.414107883817428, "grad_norm": 37.80217742919922, "learning_rate": 1.3838340248962657e-05, "loss": 0.4556, "step": 18574 }, { "epoch": 15.4149377593361, "grad_norm": 37.126224517822266, "learning_rate": 1.3838008298755187e-05, "loss": 0.4068, "step": 18575 }, { "epoch": 15.415767634854772, "grad_norm": 31.291234970092773, "learning_rate": 1.383767634854772e-05, "loss": 0.645, "step": 18576 }, { "epoch": 15.416597510373444, "grad_norm": 19.31110191345215, "learning_rate": 1.3837344398340251e-05, "loss": 0.4229, "step": 18577 }, { "epoch": 15.417427385892116, "grad_norm": 25.58763885498047, "learning_rate": 1.383701244813278e-05, "loss": 0.6721, "step": 18578 }, { "epoch": 15.418257261410789, "grad_norm": 24.7281551361084, "learning_rate": 1.3836680497925312e-05, "loss": 0.5368, "step": 18579 }, { "epoch": 15.41908713692946, "grad_norm": 29.474332809448242, "learning_rate": 1.3836348547717844e-05, "loss": 0.6427, "step": 18580 }, { "epoch": 15.419917012448133, "grad_norm": 41.94707107543945, "learning_rate": 1.3836016597510375e-05, "loss": 0.5845, "step": 18581 }, { "epoch": 15.420746887966805, "grad_norm": 34.39613723754883, "learning_rate": 1.3835684647302905e-05, "loss": 0.5263, "step": 18582 }, { "epoch": 15.421576763485477, "grad_norm": 23.51113510131836, "learning_rate": 1.3835352697095437e-05, "loss": 0.42, "step": 18583 }, { "epoch": 15.42240663900415, "grad_norm": 47.07460021972656, "learning_rate": 1.3835020746887968e-05, "loss": 0.7404, "step": 18584 }, { "epoch": 15.423236514522822, "grad_norm": 82.89775848388672, "learning_rate": 1.38346887966805e-05, "loss": 0.8342, "step": 18585 }, { "epoch": 15.424066390041494, "grad_norm": 32.054935455322266, "learning_rate": 1.3834356846473029e-05, "loss": 0.4638, "step": 18586 }, { "epoch": 15.424896265560166, "grad_norm": 18.4774169921875, "learning_rate": 1.383402489626556e-05, "loss": 0.4032, "step": 18587 }, { "epoch": 15.425726141078838, "grad_norm": 42.16188049316406, "learning_rate": 1.3833692946058093e-05, "loss": 0.743, "step": 18588 }, { "epoch": 15.42655601659751, "grad_norm": 33.852909088134766, "learning_rate": 1.3833360995850625e-05, "loss": 0.7373, "step": 18589 }, { "epoch": 15.427385892116183, "grad_norm": 49.9056282043457, "learning_rate": 1.3833029045643154e-05, "loss": 0.8144, "step": 18590 }, { "epoch": 15.428215767634855, "grad_norm": 50.39944076538086, "learning_rate": 1.3832697095435686e-05, "loss": 0.843, "step": 18591 }, { "epoch": 15.429045643153527, "grad_norm": 15.618391990661621, "learning_rate": 1.3832365145228218e-05, "loss": 0.2796, "step": 18592 }, { "epoch": 15.4298755186722, "grad_norm": 35.21379470825195, "learning_rate": 1.3832033195020748e-05, "loss": 0.6893, "step": 18593 }, { "epoch": 15.430705394190872, "grad_norm": 77.12654113769531, "learning_rate": 1.3831701244813279e-05, "loss": 1.0872, "step": 18594 }, { "epoch": 15.431535269709544, "grad_norm": 23.15406036376953, "learning_rate": 1.3831369294605809e-05, "loss": 0.4469, "step": 18595 }, { "epoch": 15.432365145228216, "grad_norm": 165.62879943847656, "learning_rate": 1.3831037344398341e-05, "loss": 1.4727, "step": 18596 }, { "epoch": 15.433195020746888, "grad_norm": 96.99134826660156, "learning_rate": 1.3830705394190873e-05, "loss": 0.5778, "step": 18597 }, { "epoch": 15.43402489626556, "grad_norm": 27.200286865234375, "learning_rate": 1.3830373443983402e-05, "loss": 0.8066, "step": 18598 }, { "epoch": 15.434854771784233, "grad_norm": 77.99435424804688, "learning_rate": 1.3830041493775934e-05, "loss": 0.5593, "step": 18599 }, { "epoch": 15.435684647302905, "grad_norm": 34.44944763183594, "learning_rate": 1.3829709543568466e-05, "loss": 0.5058, "step": 18600 }, { "epoch": 15.436514522821577, "grad_norm": 94.32852172851562, "learning_rate": 1.3829377593360998e-05, "loss": 1.2222, "step": 18601 }, { "epoch": 15.43734439834025, "grad_norm": 53.0340690612793, "learning_rate": 1.3829045643153529e-05, "loss": 1.1626, "step": 18602 }, { "epoch": 15.438174273858921, "grad_norm": 55.45000457763672, "learning_rate": 1.3828713692946059e-05, "loss": 1.0286, "step": 18603 }, { "epoch": 15.439004149377594, "grad_norm": 43.835350036621094, "learning_rate": 1.382838174273859e-05, "loss": 0.8066, "step": 18604 }, { "epoch": 15.439834024896266, "grad_norm": 64.965087890625, "learning_rate": 1.3828049792531122e-05, "loss": 0.6703, "step": 18605 }, { "epoch": 15.440663900414938, "grad_norm": 67.21875762939453, "learning_rate": 1.3827717842323654e-05, "loss": 0.7505, "step": 18606 }, { "epoch": 15.44149377593361, "grad_norm": 57.53106689453125, "learning_rate": 1.3827385892116182e-05, "loss": 0.4683, "step": 18607 }, { "epoch": 15.442323651452282, "grad_norm": 50.59621047973633, "learning_rate": 1.3827053941908715e-05, "loss": 0.5521, "step": 18608 }, { "epoch": 15.443153526970955, "grad_norm": 76.90424346923828, "learning_rate": 1.3826721991701247e-05, "loss": 1.2816, "step": 18609 }, { "epoch": 15.443983402489627, "grad_norm": 25.59779167175293, "learning_rate": 1.3826390041493779e-05, "loss": 0.5259, "step": 18610 }, { "epoch": 15.444813278008299, "grad_norm": 59.62851333618164, "learning_rate": 1.3826058091286307e-05, "loss": 0.6476, "step": 18611 }, { "epoch": 15.445643153526971, "grad_norm": 51.9713134765625, "learning_rate": 1.382572614107884e-05, "loss": 0.6298, "step": 18612 }, { "epoch": 15.446473029045643, "grad_norm": 44.395286560058594, "learning_rate": 1.382539419087137e-05, "loss": 0.7591, "step": 18613 }, { "epoch": 15.447302904564316, "grad_norm": 17.11724090576172, "learning_rate": 1.3825062240663902e-05, "loss": 0.3931, "step": 18614 }, { "epoch": 15.448132780082988, "grad_norm": 95.5618667602539, "learning_rate": 1.3824730290456433e-05, "loss": 1.2862, "step": 18615 }, { "epoch": 15.44896265560166, "grad_norm": 44.575374603271484, "learning_rate": 1.3824398340248963e-05, "loss": 1.3036, "step": 18616 }, { "epoch": 15.449792531120332, "grad_norm": 75.28148651123047, "learning_rate": 1.3824066390041495e-05, "loss": 1.0584, "step": 18617 }, { "epoch": 15.450622406639004, "grad_norm": 41.2021598815918, "learning_rate": 1.3823734439834027e-05, "loss": 0.5745, "step": 18618 }, { "epoch": 15.451452282157677, "grad_norm": 30.13123893737793, "learning_rate": 1.3823402489626556e-05, "loss": 0.4155, "step": 18619 }, { "epoch": 15.452282157676349, "grad_norm": 38.76001739501953, "learning_rate": 1.3823070539419088e-05, "loss": 0.7931, "step": 18620 }, { "epoch": 15.453112033195021, "grad_norm": 100.04353332519531, "learning_rate": 1.382273858921162e-05, "loss": 0.4667, "step": 18621 }, { "epoch": 15.453941908713693, "grad_norm": 28.62308692932129, "learning_rate": 1.382240663900415e-05, "loss": 0.6453, "step": 18622 }, { "epoch": 15.454771784232365, "grad_norm": 29.17645835876465, "learning_rate": 1.3822074688796681e-05, "loss": 0.6129, "step": 18623 }, { "epoch": 15.455601659751038, "grad_norm": 69.55496978759766, "learning_rate": 1.3821742738589213e-05, "loss": 0.858, "step": 18624 }, { "epoch": 15.45643153526971, "grad_norm": 33.08955764770508, "learning_rate": 1.3821410788381743e-05, "loss": 0.9191, "step": 18625 }, { "epoch": 15.457261410788382, "grad_norm": 41.699668884277344, "learning_rate": 1.3821078838174276e-05, "loss": 0.7268, "step": 18626 }, { "epoch": 15.458091286307054, "grad_norm": 24.076095581054688, "learning_rate": 1.3820746887966808e-05, "loss": 0.4195, "step": 18627 }, { "epoch": 15.458921161825726, "grad_norm": 36.81037139892578, "learning_rate": 1.3820414937759336e-05, "loss": 0.682, "step": 18628 }, { "epoch": 15.459751037344398, "grad_norm": 16.837902069091797, "learning_rate": 1.3820082987551868e-05, "loss": 0.4094, "step": 18629 }, { "epoch": 15.46058091286307, "grad_norm": 34.04557418823242, "learning_rate": 1.38197510373444e-05, "loss": 0.8913, "step": 18630 }, { "epoch": 15.461410788381743, "grad_norm": 42.24201965332031, "learning_rate": 1.3819419087136931e-05, "loss": 0.8683, "step": 18631 }, { "epoch": 15.462240663900415, "grad_norm": 30.77220916748047, "learning_rate": 1.3819087136929461e-05, "loss": 0.6899, "step": 18632 }, { "epoch": 15.463070539419087, "grad_norm": 34.83113479614258, "learning_rate": 1.3818755186721992e-05, "loss": 1.083, "step": 18633 }, { "epoch": 15.46390041493776, "grad_norm": 28.269256591796875, "learning_rate": 1.3818423236514524e-05, "loss": 0.7559, "step": 18634 }, { "epoch": 15.464730290456432, "grad_norm": 48.16188049316406, "learning_rate": 1.3818091286307056e-05, "loss": 1.6195, "step": 18635 }, { "epoch": 15.465560165975104, "grad_norm": 33.996925354003906, "learning_rate": 1.3817759336099585e-05, "loss": 0.5704, "step": 18636 }, { "epoch": 15.466390041493776, "grad_norm": 32.50754165649414, "learning_rate": 1.3817427385892117e-05, "loss": 0.3998, "step": 18637 }, { "epoch": 15.467219917012448, "grad_norm": 98.60359191894531, "learning_rate": 1.3817095435684649e-05, "loss": 0.8079, "step": 18638 }, { "epoch": 15.46804979253112, "grad_norm": 30.52718162536621, "learning_rate": 1.3816763485477181e-05, "loss": 0.8572, "step": 18639 }, { "epoch": 15.468879668049793, "grad_norm": 43.60757827758789, "learning_rate": 1.381643153526971e-05, "loss": 0.5273, "step": 18640 }, { "epoch": 15.469709543568465, "grad_norm": 95.75257110595703, "learning_rate": 1.3816099585062242e-05, "loss": 1.0222, "step": 18641 }, { "epoch": 15.470539419087137, "grad_norm": 28.381818771362305, "learning_rate": 1.3815767634854772e-05, "loss": 0.3838, "step": 18642 }, { "epoch": 15.47136929460581, "grad_norm": 38.009788513183594, "learning_rate": 1.3815435684647304e-05, "loss": 1.4043, "step": 18643 }, { "epoch": 15.472199170124481, "grad_norm": 27.04263687133789, "learning_rate": 1.3815103734439835e-05, "loss": 0.3631, "step": 18644 }, { "epoch": 15.473029045643154, "grad_norm": 39.5959358215332, "learning_rate": 1.3814771784232365e-05, "loss": 0.5243, "step": 18645 }, { "epoch": 15.473858921161826, "grad_norm": 61.05318832397461, "learning_rate": 1.3814439834024897e-05, "loss": 1.3828, "step": 18646 }, { "epoch": 15.474688796680498, "grad_norm": 29.61741065979004, "learning_rate": 1.381410788381743e-05, "loss": 0.3815, "step": 18647 }, { "epoch": 15.47551867219917, "grad_norm": 113.41907501220703, "learning_rate": 1.3813775933609958e-05, "loss": 0.8815, "step": 18648 }, { "epoch": 15.476348547717842, "grad_norm": 58.14303970336914, "learning_rate": 1.381344398340249e-05, "loss": 1.1623, "step": 18649 }, { "epoch": 15.477178423236515, "grad_norm": 121.8448486328125, "learning_rate": 1.3813112033195022e-05, "loss": 0.5711, "step": 18650 }, { "epoch": 15.478008298755187, "grad_norm": 60.52627182006836, "learning_rate": 1.3812780082987553e-05, "loss": 0.6112, "step": 18651 }, { "epoch": 15.478838174273859, "grad_norm": 39.772193908691406, "learning_rate": 1.3812448132780083e-05, "loss": 0.6647, "step": 18652 }, { "epoch": 15.479668049792531, "grad_norm": 51.66145706176758, "learning_rate": 1.3812116182572615e-05, "loss": 0.7688, "step": 18653 }, { "epoch": 15.480497925311203, "grad_norm": 41.324920654296875, "learning_rate": 1.3811784232365146e-05, "loss": 1.1319, "step": 18654 }, { "epoch": 15.481327800829876, "grad_norm": 37.903961181640625, "learning_rate": 1.3811452282157678e-05, "loss": 0.7482, "step": 18655 }, { "epoch": 15.482157676348548, "grad_norm": 39.03537368774414, "learning_rate": 1.381112033195021e-05, "loss": 0.7166, "step": 18656 }, { "epoch": 15.48298755186722, "grad_norm": 61.196529388427734, "learning_rate": 1.3810788381742739e-05, "loss": 1.4429, "step": 18657 }, { "epoch": 15.483817427385892, "grad_norm": 38.96015167236328, "learning_rate": 1.381045643153527e-05, "loss": 0.493, "step": 18658 }, { "epoch": 15.484647302904564, "grad_norm": 53.43727111816406, "learning_rate": 1.3810124481327803e-05, "loss": 0.7488, "step": 18659 }, { "epoch": 15.485477178423237, "grad_norm": 36.44498825073242, "learning_rate": 1.3809792531120333e-05, "loss": 0.8807, "step": 18660 }, { "epoch": 15.486307053941909, "grad_norm": 27.081199645996094, "learning_rate": 1.3809460580912864e-05, "loss": 0.7531, "step": 18661 }, { "epoch": 15.487136929460581, "grad_norm": 47.52208709716797, "learning_rate": 1.3809128630705396e-05, "loss": 1.0138, "step": 18662 }, { "epoch": 15.487966804979253, "grad_norm": 48.15461730957031, "learning_rate": 1.3808796680497926e-05, "loss": 0.902, "step": 18663 }, { "epoch": 15.488796680497925, "grad_norm": 45.34363555908203, "learning_rate": 1.3808464730290458e-05, "loss": 0.9144, "step": 18664 }, { "epoch": 15.489626556016598, "grad_norm": 29.38663673400879, "learning_rate": 1.3808132780082987e-05, "loss": 0.4223, "step": 18665 }, { "epoch": 15.49045643153527, "grad_norm": 21.13532829284668, "learning_rate": 1.380780082987552e-05, "loss": 0.4543, "step": 18666 }, { "epoch": 15.491286307053942, "grad_norm": 75.7364501953125, "learning_rate": 1.3807468879668051e-05, "loss": 1.1074, "step": 18667 }, { "epoch": 15.492116182572614, "grad_norm": 37.334434509277344, "learning_rate": 1.3807136929460583e-05, "loss": 1.0308, "step": 18668 }, { "epoch": 15.492946058091286, "grad_norm": 19.88787841796875, "learning_rate": 1.3806804979253112e-05, "loss": 0.5853, "step": 18669 }, { "epoch": 15.493775933609959, "grad_norm": 16.605684280395508, "learning_rate": 1.3806473029045644e-05, "loss": 0.3324, "step": 18670 }, { "epoch": 15.49460580912863, "grad_norm": 68.0937728881836, "learning_rate": 1.3806141078838176e-05, "loss": 0.7736, "step": 18671 }, { "epoch": 15.495435684647303, "grad_norm": 67.80827331542969, "learning_rate": 1.3805809128630707e-05, "loss": 0.7682, "step": 18672 }, { "epoch": 15.496265560165975, "grad_norm": 38.27519989013672, "learning_rate": 1.3805477178423237e-05, "loss": 0.7722, "step": 18673 }, { "epoch": 15.497095435684647, "grad_norm": 55.53055953979492, "learning_rate": 1.3805145228215768e-05, "loss": 0.364, "step": 18674 }, { "epoch": 15.49792531120332, "grad_norm": 37.93025588989258, "learning_rate": 1.38048132780083e-05, "loss": 0.7999, "step": 18675 }, { "epoch": 15.498755186721992, "grad_norm": 31.782732009887695, "learning_rate": 1.3804481327800832e-05, "loss": 0.6876, "step": 18676 }, { "epoch": 15.499585062240664, "grad_norm": 37.944496154785156, "learning_rate": 1.380414937759336e-05, "loss": 1.3088, "step": 18677 }, { "epoch": 15.500414937759336, "grad_norm": 91.27904510498047, "learning_rate": 1.3803817427385893e-05, "loss": 0.8938, "step": 18678 }, { "epoch": 15.501244813278008, "grad_norm": 32.64696502685547, "learning_rate": 1.3803485477178425e-05, "loss": 0.7677, "step": 18679 }, { "epoch": 15.50207468879668, "grad_norm": 36.97595977783203, "learning_rate": 1.3803153526970957e-05, "loss": 0.713, "step": 18680 }, { "epoch": 15.502904564315353, "grad_norm": 28.86326026916504, "learning_rate": 1.3802821576763487e-05, "loss": 0.8925, "step": 18681 }, { "epoch": 15.503734439834025, "grad_norm": 61.26229476928711, "learning_rate": 1.3802489626556018e-05, "loss": 0.556, "step": 18682 }, { "epoch": 15.504564315352697, "grad_norm": 51.945247650146484, "learning_rate": 1.3802157676348548e-05, "loss": 0.7949, "step": 18683 }, { "epoch": 15.50539419087137, "grad_norm": 31.25296974182129, "learning_rate": 1.380182572614108e-05, "loss": 0.6793, "step": 18684 }, { "epoch": 15.506224066390041, "grad_norm": 34.94560623168945, "learning_rate": 1.3801493775933612e-05, "loss": 1.3098, "step": 18685 }, { "epoch": 15.507053941908714, "grad_norm": 29.494529724121094, "learning_rate": 1.3801161825726141e-05, "loss": 0.4025, "step": 18686 }, { "epoch": 15.507883817427386, "grad_norm": 27.412511825561523, "learning_rate": 1.3800829875518673e-05, "loss": 0.8341, "step": 18687 }, { "epoch": 15.508713692946058, "grad_norm": 52.944149017333984, "learning_rate": 1.3800497925311205e-05, "loss": 1.0382, "step": 18688 }, { "epoch": 15.50954356846473, "grad_norm": 35.807838439941406, "learning_rate": 1.3800165975103736e-05, "loss": 1.3534, "step": 18689 }, { "epoch": 15.510373443983402, "grad_norm": 68.82061004638672, "learning_rate": 1.3799834024896266e-05, "loss": 0.7135, "step": 18690 }, { "epoch": 15.511203319502075, "grad_norm": 46.148101806640625, "learning_rate": 1.3799502074688798e-05, "loss": 0.6786, "step": 18691 }, { "epoch": 15.512033195020747, "grad_norm": 40.04324722290039, "learning_rate": 1.3799170124481329e-05, "loss": 0.8518, "step": 18692 }, { "epoch": 15.512863070539419, "grad_norm": 38.30829620361328, "learning_rate": 1.379883817427386e-05, "loss": 0.8795, "step": 18693 }, { "epoch": 15.513692946058091, "grad_norm": 27.263086318969727, "learning_rate": 1.3798506224066391e-05, "loss": 0.6339, "step": 18694 }, { "epoch": 15.514522821576763, "grad_norm": 34.37141036987305, "learning_rate": 1.3798174273858922e-05, "loss": 0.4283, "step": 18695 }, { "epoch": 15.515352697095436, "grad_norm": 17.01752281188965, "learning_rate": 1.3797842323651454e-05, "loss": 0.2365, "step": 18696 }, { "epoch": 15.516182572614108, "grad_norm": 47.81901550292969, "learning_rate": 1.3797510373443986e-05, "loss": 1.0729, "step": 18697 }, { "epoch": 15.51701244813278, "grad_norm": 12.561394691467285, "learning_rate": 1.3797178423236514e-05, "loss": 0.3691, "step": 18698 }, { "epoch": 15.517842323651452, "grad_norm": 35.53403091430664, "learning_rate": 1.3796846473029047e-05, "loss": 0.4674, "step": 18699 }, { "epoch": 15.518672199170124, "grad_norm": 16.965089797973633, "learning_rate": 1.3796514522821579e-05, "loss": 0.3798, "step": 18700 }, { "epoch": 15.519502074688797, "grad_norm": 82.0162582397461, "learning_rate": 1.3796182572614109e-05, "loss": 0.6855, "step": 18701 }, { "epoch": 15.520331950207469, "grad_norm": 53.78080368041992, "learning_rate": 1.379585062240664e-05, "loss": 0.8556, "step": 18702 }, { "epoch": 15.521161825726141, "grad_norm": 22.744413375854492, "learning_rate": 1.379551867219917e-05, "loss": 0.3826, "step": 18703 }, { "epoch": 15.521991701244813, "grad_norm": 112.0110092163086, "learning_rate": 1.3795186721991702e-05, "loss": 1.0018, "step": 18704 }, { "epoch": 15.522821576763485, "grad_norm": 16.917634963989258, "learning_rate": 1.3794854771784234e-05, "loss": 0.3288, "step": 18705 }, { "epoch": 15.523651452282158, "grad_norm": 72.85472106933594, "learning_rate": 1.3794522821576766e-05, "loss": 0.9818, "step": 18706 }, { "epoch": 15.52448132780083, "grad_norm": 77.97093200683594, "learning_rate": 1.3794190871369295e-05, "loss": 0.7334, "step": 18707 }, { "epoch": 15.525311203319502, "grad_norm": 60.61661911010742, "learning_rate": 1.3793858921161827e-05, "loss": 0.8961, "step": 18708 }, { "epoch": 15.526141078838174, "grad_norm": 21.515602111816406, "learning_rate": 1.379352697095436e-05, "loss": 0.413, "step": 18709 }, { "epoch": 15.526970954356846, "grad_norm": 16.679975509643555, "learning_rate": 1.379319502074689e-05, "loss": 0.3774, "step": 18710 }, { "epoch": 15.527800829875519, "grad_norm": 28.011512756347656, "learning_rate": 1.379286307053942e-05, "loss": 0.4657, "step": 18711 }, { "epoch": 15.52863070539419, "grad_norm": 47.70686721801758, "learning_rate": 1.379253112033195e-05, "loss": 1.0894, "step": 18712 }, { "epoch": 15.529460580912863, "grad_norm": 60.380165100097656, "learning_rate": 1.3792199170124483e-05, "loss": 0.686, "step": 18713 }, { "epoch": 15.530290456431535, "grad_norm": 44.579654693603516, "learning_rate": 1.3791867219917015e-05, "loss": 0.8922, "step": 18714 }, { "epoch": 15.531120331950207, "grad_norm": 18.53616714477539, "learning_rate": 1.3791535269709543e-05, "loss": 0.5277, "step": 18715 }, { "epoch": 15.53195020746888, "grad_norm": 59.6619987487793, "learning_rate": 1.3791203319502075e-05, "loss": 0.661, "step": 18716 }, { "epoch": 15.532780082987552, "grad_norm": 62.3972053527832, "learning_rate": 1.3790871369294608e-05, "loss": 0.6667, "step": 18717 }, { "epoch": 15.533609958506224, "grad_norm": 63.11347579956055, "learning_rate": 1.379053941908714e-05, "loss": 0.9924, "step": 18718 }, { "epoch": 15.534439834024896, "grad_norm": 25.396753311157227, "learning_rate": 1.3790207468879668e-05, "loss": 0.4286, "step": 18719 }, { "epoch": 15.535269709543568, "grad_norm": 56.96278762817383, "learning_rate": 1.37898755186722e-05, "loss": 0.5639, "step": 18720 }, { "epoch": 15.53609958506224, "grad_norm": 21.103538513183594, "learning_rate": 1.3789543568464731e-05, "loss": 0.4978, "step": 18721 }, { "epoch": 15.536929460580913, "grad_norm": 20.88692855834961, "learning_rate": 1.3789211618257263e-05, "loss": 0.4245, "step": 18722 }, { "epoch": 15.537759336099585, "grad_norm": 44.12409591674805, "learning_rate": 1.3788879668049793e-05, "loss": 0.8043, "step": 18723 }, { "epoch": 15.538589211618257, "grad_norm": 50.193748474121094, "learning_rate": 1.3788547717842324e-05, "loss": 0.7827, "step": 18724 }, { "epoch": 15.53941908713693, "grad_norm": 46.82112503051758, "learning_rate": 1.3788215767634856e-05, "loss": 0.8355, "step": 18725 }, { "epoch": 15.540248962655602, "grad_norm": 49.801090240478516, "learning_rate": 1.3787883817427388e-05, "loss": 1.0581, "step": 18726 }, { "epoch": 15.541078838174274, "grad_norm": 38.937557220458984, "learning_rate": 1.3787551867219917e-05, "loss": 0.7204, "step": 18727 }, { "epoch": 15.541908713692946, "grad_norm": 36.09785842895508, "learning_rate": 1.3787219917012449e-05, "loss": 0.6726, "step": 18728 }, { "epoch": 15.542738589211618, "grad_norm": 64.38936614990234, "learning_rate": 1.3786887966804981e-05, "loss": 0.9473, "step": 18729 }, { "epoch": 15.54356846473029, "grad_norm": 28.090599060058594, "learning_rate": 1.3786556016597511e-05, "loss": 0.5217, "step": 18730 }, { "epoch": 15.544398340248962, "grad_norm": 86.61302185058594, "learning_rate": 1.3786224066390042e-05, "loss": 0.8362, "step": 18731 }, { "epoch": 15.545228215767635, "grad_norm": 38.236785888671875, "learning_rate": 1.3785892116182574e-05, "loss": 0.9839, "step": 18732 }, { "epoch": 15.546058091286307, "grad_norm": 53.91276550292969, "learning_rate": 1.3785560165975104e-05, "loss": 0.539, "step": 18733 }, { "epoch": 15.546887966804979, "grad_norm": 82.28461456298828, "learning_rate": 1.3785228215767636e-05, "loss": 1.013, "step": 18734 }, { "epoch": 15.547717842323651, "grad_norm": 26.49997901916504, "learning_rate": 1.3784896265560169e-05, "loss": 0.3453, "step": 18735 }, { "epoch": 15.548547717842323, "grad_norm": 24.62605857849121, "learning_rate": 1.3784564315352697e-05, "loss": 0.8864, "step": 18736 }, { "epoch": 15.549377593360996, "grad_norm": 42.145389556884766, "learning_rate": 1.378423236514523e-05, "loss": 1.212, "step": 18737 }, { "epoch": 15.550207468879668, "grad_norm": 35.88066101074219, "learning_rate": 1.3783900414937761e-05, "loss": 0.4898, "step": 18738 }, { "epoch": 15.55103734439834, "grad_norm": 25.479583740234375, "learning_rate": 1.3783568464730292e-05, "loss": 0.6066, "step": 18739 }, { "epoch": 15.551867219917012, "grad_norm": 33.98544692993164, "learning_rate": 1.3783236514522822e-05, "loss": 0.7324, "step": 18740 }, { "epoch": 15.552697095435684, "grad_norm": 45.55821228027344, "learning_rate": 1.3782904564315354e-05, "loss": 1.0855, "step": 18741 }, { "epoch": 15.553526970954357, "grad_norm": 37.55120086669922, "learning_rate": 1.3782572614107885e-05, "loss": 0.4496, "step": 18742 }, { "epoch": 15.554356846473029, "grad_norm": 65.44532775878906, "learning_rate": 1.3782240663900417e-05, "loss": 0.7639, "step": 18743 }, { "epoch": 15.555186721991701, "grad_norm": 50.871334075927734, "learning_rate": 1.3781908713692946e-05, "loss": 0.9798, "step": 18744 }, { "epoch": 15.556016597510373, "grad_norm": 34.61165237426758, "learning_rate": 1.3781576763485478e-05, "loss": 0.5683, "step": 18745 }, { "epoch": 15.556846473029045, "grad_norm": 23.85079574584961, "learning_rate": 1.378124481327801e-05, "loss": 0.5723, "step": 18746 }, { "epoch": 15.557676348547718, "grad_norm": 45.63056564331055, "learning_rate": 1.3780912863070542e-05, "loss": 1.2304, "step": 18747 }, { "epoch": 15.55850622406639, "grad_norm": 75.96922302246094, "learning_rate": 1.378058091286307e-05, "loss": 0.8786, "step": 18748 }, { "epoch": 15.559336099585062, "grad_norm": 22.663373947143555, "learning_rate": 1.3780248962655603e-05, "loss": 0.5065, "step": 18749 }, { "epoch": 15.560165975103734, "grad_norm": 43.2859992980957, "learning_rate": 1.3779917012448133e-05, "loss": 0.6693, "step": 18750 }, { "epoch": 15.560995850622406, "grad_norm": 35.56203079223633, "learning_rate": 1.3779585062240665e-05, "loss": 0.6204, "step": 18751 }, { "epoch": 15.561825726141079, "grad_norm": 45.43070602416992, "learning_rate": 1.3779253112033196e-05, "loss": 0.7709, "step": 18752 }, { "epoch": 15.56265560165975, "grad_norm": 24.235931396484375, "learning_rate": 1.3778921161825726e-05, "loss": 0.4362, "step": 18753 }, { "epoch": 15.563485477178423, "grad_norm": 12.841803550720215, "learning_rate": 1.3778589211618258e-05, "loss": 0.2538, "step": 18754 }, { "epoch": 15.564315352697095, "grad_norm": 44.464569091796875, "learning_rate": 1.377825726141079e-05, "loss": 0.6243, "step": 18755 }, { "epoch": 15.565145228215767, "grad_norm": 39.070369720458984, "learning_rate": 1.3777925311203319e-05, "loss": 0.8772, "step": 18756 }, { "epoch": 15.56597510373444, "grad_norm": 44.95233917236328, "learning_rate": 1.3777593360995851e-05, "loss": 0.6244, "step": 18757 }, { "epoch": 15.566804979253112, "grad_norm": 20.889890670776367, "learning_rate": 1.3777261410788383e-05, "loss": 0.4927, "step": 18758 }, { "epoch": 15.567634854771784, "grad_norm": 43.540855407714844, "learning_rate": 1.3776929460580914e-05, "loss": 0.9604, "step": 18759 }, { "epoch": 15.568464730290456, "grad_norm": 56.900665283203125, "learning_rate": 1.3776597510373446e-05, "loss": 1.5277, "step": 18760 }, { "epoch": 15.569294605809128, "grad_norm": 48.3472785949707, "learning_rate": 1.3776265560165976e-05, "loss": 0.6901, "step": 18761 }, { "epoch": 15.5701244813278, "grad_norm": 42.54093933105469, "learning_rate": 1.3775933609958507e-05, "loss": 0.7051, "step": 18762 }, { "epoch": 15.570954356846473, "grad_norm": 62.709468841552734, "learning_rate": 1.3775601659751039e-05, "loss": 0.6724, "step": 18763 }, { "epoch": 15.571784232365145, "grad_norm": 35.651702880859375, "learning_rate": 1.3775269709543571e-05, "loss": 0.6605, "step": 18764 }, { "epoch": 15.572614107883817, "grad_norm": 29.7126407623291, "learning_rate": 1.37749377593361e-05, "loss": 0.8404, "step": 18765 }, { "epoch": 15.57344398340249, "grad_norm": 54.74332046508789, "learning_rate": 1.3774605809128632e-05, "loss": 0.7731, "step": 18766 }, { "epoch": 15.574273858921162, "grad_norm": 89.12276458740234, "learning_rate": 1.3774273858921164e-05, "loss": 1.6023, "step": 18767 }, { "epoch": 15.575103734439834, "grad_norm": 28.978771209716797, "learning_rate": 1.3773941908713694e-05, "loss": 0.886, "step": 18768 }, { "epoch": 15.575933609958506, "grad_norm": 16.89887046813965, "learning_rate": 1.3773609958506225e-05, "loss": 0.3359, "step": 18769 }, { "epoch": 15.576763485477178, "grad_norm": 21.524520874023438, "learning_rate": 1.3773278008298757e-05, "loss": 0.6623, "step": 18770 }, { "epoch": 15.57759336099585, "grad_norm": 56.26248550415039, "learning_rate": 1.3772946058091287e-05, "loss": 1.0011, "step": 18771 }, { "epoch": 15.578423236514523, "grad_norm": 59.99872970581055, "learning_rate": 1.377261410788382e-05, "loss": 1.2422, "step": 18772 }, { "epoch": 15.579253112033195, "grad_norm": 36.5223503112793, "learning_rate": 1.3772282157676348e-05, "loss": 1.2501, "step": 18773 }, { "epoch": 15.580082987551867, "grad_norm": 35.707557678222656, "learning_rate": 1.377195020746888e-05, "loss": 0.5596, "step": 18774 }, { "epoch": 15.58091286307054, "grad_norm": 49.003761291503906, "learning_rate": 1.3771618257261412e-05, "loss": 0.9259, "step": 18775 }, { "epoch": 15.581742738589211, "grad_norm": 30.666032791137695, "learning_rate": 1.3771286307053944e-05, "loss": 0.5731, "step": 18776 }, { "epoch": 15.582572614107884, "grad_norm": 26.059711456298828, "learning_rate": 1.3770954356846473e-05, "loss": 0.5568, "step": 18777 }, { "epoch": 15.583402489626556, "grad_norm": 26.79836654663086, "learning_rate": 1.3770622406639005e-05, "loss": 0.5798, "step": 18778 }, { "epoch": 15.584232365145228, "grad_norm": 27.527599334716797, "learning_rate": 1.3770290456431537e-05, "loss": 0.3917, "step": 18779 }, { "epoch": 15.5850622406639, "grad_norm": 37.057987213134766, "learning_rate": 1.3769958506224068e-05, "loss": 0.5236, "step": 18780 }, { "epoch": 15.585892116182572, "grad_norm": 43.643001556396484, "learning_rate": 1.3769626556016598e-05, "loss": 1.0826, "step": 18781 }, { "epoch": 15.586721991701245, "grad_norm": 54.092586517333984, "learning_rate": 1.3769294605809128e-05, "loss": 1.8418, "step": 18782 }, { "epoch": 15.587551867219917, "grad_norm": 39.134822845458984, "learning_rate": 1.376896265560166e-05, "loss": 0.5484, "step": 18783 }, { "epoch": 15.588381742738589, "grad_norm": 29.451047897338867, "learning_rate": 1.3768630705394193e-05, "loss": 1.1241, "step": 18784 }, { "epoch": 15.589211618257261, "grad_norm": 42.61357879638672, "learning_rate": 1.3768298755186725e-05, "loss": 0.5796, "step": 18785 }, { "epoch": 15.590041493775933, "grad_norm": 74.52262878417969, "learning_rate": 1.3767966804979254e-05, "loss": 0.7754, "step": 18786 }, { "epoch": 15.590871369294605, "grad_norm": 58.49813461303711, "learning_rate": 1.3767634854771786e-05, "loss": 0.8523, "step": 18787 }, { "epoch": 15.591701244813278, "grad_norm": 55.50067138671875, "learning_rate": 1.3767302904564318e-05, "loss": 0.7873, "step": 18788 }, { "epoch": 15.59253112033195, "grad_norm": 17.698759078979492, "learning_rate": 1.3766970954356848e-05, "loss": 0.4855, "step": 18789 }, { "epoch": 15.593360995850622, "grad_norm": 18.701404571533203, "learning_rate": 1.3766639004149379e-05, "loss": 0.7508, "step": 18790 }, { "epoch": 15.594190871369294, "grad_norm": 34.43741226196289, "learning_rate": 1.3766307053941909e-05, "loss": 0.8316, "step": 18791 }, { "epoch": 15.595020746887966, "grad_norm": 57.47854232788086, "learning_rate": 1.3765975103734441e-05, "loss": 0.8639, "step": 18792 }, { "epoch": 15.595850622406639, "grad_norm": 44.67206573486328, "learning_rate": 1.3765643153526973e-05, "loss": 0.6116, "step": 18793 }, { "epoch": 15.59668049792531, "grad_norm": 30.697431564331055, "learning_rate": 1.3765311203319502e-05, "loss": 0.5705, "step": 18794 }, { "epoch": 15.597510373443983, "grad_norm": 39.349822998046875, "learning_rate": 1.3764979253112034e-05, "loss": 0.446, "step": 18795 }, { "epoch": 15.598340248962655, "grad_norm": 27.106796264648438, "learning_rate": 1.3764647302904566e-05, "loss": 1.1521, "step": 18796 }, { "epoch": 15.599170124481327, "grad_norm": 115.38301849365234, "learning_rate": 1.3764315352697098e-05, "loss": 1.8131, "step": 18797 }, { "epoch": 15.6, "grad_norm": 37.883392333984375, "learning_rate": 1.3763983402489627e-05, "loss": 0.6192, "step": 18798 }, { "epoch": 15.600829875518672, "grad_norm": 9.757400512695312, "learning_rate": 1.3763651452282159e-05, "loss": 0.2945, "step": 18799 }, { "epoch": 15.601659751037344, "grad_norm": 94.3349380493164, "learning_rate": 1.376331950207469e-05, "loss": 0.6746, "step": 18800 }, { "epoch": 15.602489626556016, "grad_norm": 18.06053352355957, "learning_rate": 1.3762987551867222e-05, "loss": 0.5184, "step": 18801 }, { "epoch": 15.603319502074688, "grad_norm": 24.761829376220703, "learning_rate": 1.3762655601659752e-05, "loss": 0.4632, "step": 18802 }, { "epoch": 15.60414937759336, "grad_norm": 27.839799880981445, "learning_rate": 1.3762323651452282e-05, "loss": 0.8434, "step": 18803 }, { "epoch": 15.604979253112033, "grad_norm": 26.74380874633789, "learning_rate": 1.3761991701244815e-05, "loss": 0.597, "step": 18804 }, { "epoch": 15.605809128630705, "grad_norm": 28.867435455322266, "learning_rate": 1.3761659751037347e-05, "loss": 0.8136, "step": 18805 }, { "epoch": 15.606639004149377, "grad_norm": 39.20671463012695, "learning_rate": 1.3761327800829875e-05, "loss": 0.7072, "step": 18806 }, { "epoch": 15.60746887966805, "grad_norm": 51.574710845947266, "learning_rate": 1.3760995850622407e-05, "loss": 1.0167, "step": 18807 }, { "epoch": 15.608298755186722, "grad_norm": 61.881202697753906, "learning_rate": 1.376066390041494e-05, "loss": 0.8052, "step": 18808 }, { "epoch": 15.609128630705394, "grad_norm": 15.471268653869629, "learning_rate": 1.376033195020747e-05, "loss": 0.329, "step": 18809 }, { "epoch": 15.609958506224066, "grad_norm": 49.45948791503906, "learning_rate": 1.376e-05, "loss": 1.3167, "step": 18810 }, { "epoch": 15.610788381742738, "grad_norm": 44.8215446472168, "learning_rate": 1.3759668049792533e-05, "loss": 0.8052, "step": 18811 }, { "epoch": 15.61161825726141, "grad_norm": 44.261817932128906, "learning_rate": 1.3759336099585063e-05, "loss": 0.7547, "step": 18812 }, { "epoch": 15.612448132780083, "grad_norm": 51.15113067626953, "learning_rate": 1.3759004149377595e-05, "loss": 0.9869, "step": 18813 }, { "epoch": 15.613278008298755, "grad_norm": 25.40990447998047, "learning_rate": 1.3758672199170127e-05, "loss": 0.4805, "step": 18814 }, { "epoch": 15.614107883817427, "grad_norm": 11.843338012695312, "learning_rate": 1.3758340248962656e-05, "loss": 0.3508, "step": 18815 }, { "epoch": 15.6149377593361, "grad_norm": 36.925575256347656, "learning_rate": 1.3758008298755188e-05, "loss": 0.7623, "step": 18816 }, { "epoch": 15.615767634854771, "grad_norm": 31.145618438720703, "learning_rate": 1.375767634854772e-05, "loss": 1.1376, "step": 18817 }, { "epoch": 15.616597510373444, "grad_norm": 51.77385330200195, "learning_rate": 1.375734439834025e-05, "loss": 0.9673, "step": 18818 }, { "epoch": 15.617427385892116, "grad_norm": 39.46660232543945, "learning_rate": 1.3757012448132781e-05, "loss": 0.864, "step": 18819 }, { "epoch": 15.618257261410788, "grad_norm": 50.850669860839844, "learning_rate": 1.3756680497925311e-05, "loss": 0.554, "step": 18820 }, { "epoch": 15.61908713692946, "grad_norm": 26.09305763244629, "learning_rate": 1.3756348547717843e-05, "loss": 0.5061, "step": 18821 }, { "epoch": 15.619917012448132, "grad_norm": 40.96061325073242, "learning_rate": 1.3756016597510376e-05, "loss": 0.3419, "step": 18822 }, { "epoch": 15.620746887966805, "grad_norm": 16.364919662475586, "learning_rate": 1.3755684647302904e-05, "loss": 0.3634, "step": 18823 }, { "epoch": 15.621576763485477, "grad_norm": 34.81652069091797, "learning_rate": 1.3755352697095436e-05, "loss": 0.9179, "step": 18824 }, { "epoch": 15.622406639004149, "grad_norm": 45.2883186340332, "learning_rate": 1.3755020746887968e-05, "loss": 1.3973, "step": 18825 }, { "epoch": 15.623236514522821, "grad_norm": 22.97898292541504, "learning_rate": 1.37546887966805e-05, "loss": 0.5151, "step": 18826 }, { "epoch": 15.624066390041493, "grad_norm": 45.65333938598633, "learning_rate": 1.375435684647303e-05, "loss": 0.9481, "step": 18827 }, { "epoch": 15.624896265560166, "grad_norm": 25.779022216796875, "learning_rate": 1.3754024896265561e-05, "loss": 0.547, "step": 18828 }, { "epoch": 15.625726141078838, "grad_norm": 37.48727798461914, "learning_rate": 1.3753692946058092e-05, "loss": 0.5286, "step": 18829 }, { "epoch": 15.62655601659751, "grad_norm": 63.02762222290039, "learning_rate": 1.3753360995850624e-05, "loss": 1.0575, "step": 18830 }, { "epoch": 15.627385892116182, "grad_norm": 23.397676467895508, "learning_rate": 1.3753029045643154e-05, "loss": 0.7888, "step": 18831 }, { "epoch": 15.628215767634854, "grad_norm": 17.390369415283203, "learning_rate": 1.3752697095435685e-05, "loss": 0.2953, "step": 18832 }, { "epoch": 15.629045643153527, "grad_norm": 34.22410583496094, "learning_rate": 1.3752365145228217e-05, "loss": 0.5438, "step": 18833 }, { "epoch": 15.629875518672199, "grad_norm": 34.68877029418945, "learning_rate": 1.3752033195020749e-05, "loss": 0.7106, "step": 18834 }, { "epoch": 15.630705394190871, "grad_norm": 28.232715606689453, "learning_rate": 1.3751701244813278e-05, "loss": 0.6684, "step": 18835 }, { "epoch": 15.631535269709543, "grad_norm": 26.60148048400879, "learning_rate": 1.375136929460581e-05, "loss": 0.7642, "step": 18836 }, { "epoch": 15.632365145228215, "grad_norm": 87.18589782714844, "learning_rate": 1.3751037344398342e-05, "loss": 0.6513, "step": 18837 }, { "epoch": 15.633195020746887, "grad_norm": 72.0682144165039, "learning_rate": 1.3750705394190872e-05, "loss": 1.0287, "step": 18838 }, { "epoch": 15.63402489626556, "grad_norm": 25.49542236328125, "learning_rate": 1.3750373443983404e-05, "loss": 0.5098, "step": 18839 }, { "epoch": 15.634854771784232, "grad_norm": 23.280948638916016, "learning_rate": 1.3750041493775935e-05, "loss": 0.6419, "step": 18840 }, { "epoch": 15.635684647302904, "grad_norm": 36.42230987548828, "learning_rate": 1.3749709543568465e-05, "loss": 0.5673, "step": 18841 }, { "epoch": 15.636514522821576, "grad_norm": 89.07032775878906, "learning_rate": 1.3749377593360997e-05, "loss": 0.7082, "step": 18842 }, { "epoch": 15.637344398340248, "grad_norm": 63.246578216552734, "learning_rate": 1.374904564315353e-05, "loss": 1.1508, "step": 18843 }, { "epoch": 15.63817427385892, "grad_norm": 106.43543243408203, "learning_rate": 1.3748713692946058e-05, "loss": 0.6448, "step": 18844 }, { "epoch": 15.639004149377593, "grad_norm": 73.90364074707031, "learning_rate": 1.374838174273859e-05, "loss": 0.7315, "step": 18845 }, { "epoch": 15.639834024896265, "grad_norm": 20.350040435791016, "learning_rate": 1.3748049792531122e-05, "loss": 0.4066, "step": 18846 }, { "epoch": 15.640663900414937, "grad_norm": 26.005931854248047, "learning_rate": 1.3747717842323653e-05, "loss": 0.7283, "step": 18847 }, { "epoch": 15.64149377593361, "grad_norm": 25.954748153686523, "learning_rate": 1.3747385892116183e-05, "loss": 0.382, "step": 18848 }, { "epoch": 15.642323651452282, "grad_norm": 27.63299560546875, "learning_rate": 1.3747053941908715e-05, "loss": 0.7632, "step": 18849 }, { "epoch": 15.643153526970954, "grad_norm": 28.504459381103516, "learning_rate": 1.3746721991701246e-05, "loss": 0.4941, "step": 18850 }, { "epoch": 15.643983402489626, "grad_norm": 30.48088836669922, "learning_rate": 1.3746390041493778e-05, "loss": 0.8162, "step": 18851 }, { "epoch": 15.644813278008298, "grad_norm": 22.699018478393555, "learning_rate": 1.3746058091286307e-05, "loss": 0.7059, "step": 18852 }, { "epoch": 15.64564315352697, "grad_norm": 35.386043548583984, "learning_rate": 1.3745726141078839e-05, "loss": 0.8776, "step": 18853 }, { "epoch": 15.646473029045643, "grad_norm": 36.929874420166016, "learning_rate": 1.374539419087137e-05, "loss": 0.4924, "step": 18854 }, { "epoch": 15.647302904564315, "grad_norm": 32.750732421875, "learning_rate": 1.3745062240663903e-05, "loss": 0.4176, "step": 18855 }, { "epoch": 15.648132780082987, "grad_norm": 32.77773666381836, "learning_rate": 1.3744730290456432e-05, "loss": 0.4079, "step": 18856 }, { "epoch": 15.64896265560166, "grad_norm": 77.14512634277344, "learning_rate": 1.3744398340248964e-05, "loss": 0.6697, "step": 18857 }, { "epoch": 15.649792531120331, "grad_norm": 88.34331512451172, "learning_rate": 1.3744066390041496e-05, "loss": 0.5712, "step": 18858 }, { "epoch": 15.650622406639004, "grad_norm": 69.68391418457031, "learning_rate": 1.3743734439834026e-05, "loss": 1.6117, "step": 18859 }, { "epoch": 15.651452282157676, "grad_norm": 26.79482078552246, "learning_rate": 1.3743402489626557e-05, "loss": 0.4518, "step": 18860 }, { "epoch": 15.652282157676348, "grad_norm": 72.01860809326172, "learning_rate": 1.3743070539419087e-05, "loss": 0.9067, "step": 18861 }, { "epoch": 15.65311203319502, "grad_norm": 33.27907943725586, "learning_rate": 1.374273858921162e-05, "loss": 0.4533, "step": 18862 }, { "epoch": 15.653941908713692, "grad_norm": 38.43571090698242, "learning_rate": 1.3742406639004151e-05, "loss": 0.7247, "step": 18863 }, { "epoch": 15.654771784232365, "grad_norm": 72.81654357910156, "learning_rate": 1.3742074688796683e-05, "loss": 0.6486, "step": 18864 }, { "epoch": 15.655601659751037, "grad_norm": 49.058815002441406, "learning_rate": 1.3741742738589212e-05, "loss": 0.7769, "step": 18865 }, { "epoch": 15.656431535269709, "grad_norm": 35.522804260253906, "learning_rate": 1.3741410788381744e-05, "loss": 0.8864, "step": 18866 }, { "epoch": 15.657261410788381, "grad_norm": 47.91383743286133, "learning_rate": 1.3741078838174275e-05, "loss": 1.352, "step": 18867 }, { "epoch": 15.658091286307053, "grad_norm": 22.51371192932129, "learning_rate": 1.3740746887966807e-05, "loss": 0.2992, "step": 18868 }, { "epoch": 15.658921161825726, "grad_norm": 68.55451965332031, "learning_rate": 1.3740414937759337e-05, "loss": 1.3752, "step": 18869 }, { "epoch": 15.659751037344398, "grad_norm": 48.13986587524414, "learning_rate": 1.3740082987551868e-05, "loss": 1.2555, "step": 18870 }, { "epoch": 15.66058091286307, "grad_norm": 43.45576095581055, "learning_rate": 1.37397510373444e-05, "loss": 0.5613, "step": 18871 }, { "epoch": 15.661410788381742, "grad_norm": 32.580894470214844, "learning_rate": 1.3739419087136932e-05, "loss": 0.7096, "step": 18872 }, { "epoch": 15.662240663900414, "grad_norm": 28.64255142211914, "learning_rate": 1.373908713692946e-05, "loss": 0.7191, "step": 18873 }, { "epoch": 15.663070539419087, "grad_norm": 27.634849548339844, "learning_rate": 1.3738755186721993e-05, "loss": 0.3414, "step": 18874 }, { "epoch": 15.663900414937759, "grad_norm": 29.58416175842285, "learning_rate": 1.3738423236514525e-05, "loss": 0.5292, "step": 18875 }, { "epoch": 15.664730290456431, "grad_norm": 42.34107208251953, "learning_rate": 1.3738091286307055e-05, "loss": 0.8716, "step": 18876 }, { "epoch": 15.665560165975103, "grad_norm": 64.07559967041016, "learning_rate": 1.3737759336099586e-05, "loss": 0.9433, "step": 18877 }, { "epoch": 15.666390041493775, "grad_norm": 32.51591110229492, "learning_rate": 1.3737427385892118e-05, "loss": 0.5428, "step": 18878 }, { "epoch": 15.667219917012448, "grad_norm": 23.373674392700195, "learning_rate": 1.3737095435684648e-05, "loss": 1.0176, "step": 18879 }, { "epoch": 15.66804979253112, "grad_norm": 32.30957794189453, "learning_rate": 1.373676348547718e-05, "loss": 0.7146, "step": 18880 }, { "epoch": 15.668879668049792, "grad_norm": 22.429656982421875, "learning_rate": 1.373643153526971e-05, "loss": 0.5699, "step": 18881 }, { "epoch": 15.669709543568464, "grad_norm": 82.00778198242188, "learning_rate": 1.3736099585062241e-05, "loss": 0.7135, "step": 18882 }, { "epoch": 15.670539419087136, "grad_norm": 31.365468978881836, "learning_rate": 1.3735767634854773e-05, "loss": 0.5873, "step": 18883 }, { "epoch": 15.671369294605809, "grad_norm": 30.209835052490234, "learning_rate": 1.3735435684647305e-05, "loss": 0.3603, "step": 18884 }, { "epoch": 15.67219917012448, "grad_norm": 35.390830993652344, "learning_rate": 1.3735103734439834e-05, "loss": 0.7639, "step": 18885 }, { "epoch": 15.673029045643153, "grad_norm": 33.25837707519531, "learning_rate": 1.3734771784232366e-05, "loss": 0.4958, "step": 18886 }, { "epoch": 15.673858921161825, "grad_norm": 58.25687789916992, "learning_rate": 1.3734439834024898e-05, "loss": 1.4262, "step": 18887 }, { "epoch": 15.674688796680497, "grad_norm": 26.799680709838867, "learning_rate": 1.3734107883817429e-05, "loss": 0.966, "step": 18888 }, { "epoch": 15.67551867219917, "grad_norm": 32.61589431762695, "learning_rate": 1.3733775933609959e-05, "loss": 0.4994, "step": 18889 }, { "epoch": 15.676348547717842, "grad_norm": 41.99335861206055, "learning_rate": 1.373344398340249e-05, "loss": 0.7518, "step": 18890 }, { "epoch": 15.677178423236514, "grad_norm": 41.538997650146484, "learning_rate": 1.3733112033195021e-05, "loss": 0.7374, "step": 18891 }, { "epoch": 15.678008298755186, "grad_norm": 26.701763153076172, "learning_rate": 1.3732780082987554e-05, "loss": 0.6009, "step": 18892 }, { "epoch": 15.678838174273858, "grad_norm": 58.592594146728516, "learning_rate": 1.3732448132780086e-05, "loss": 1.1426, "step": 18893 }, { "epoch": 15.67966804979253, "grad_norm": 36.50707244873047, "learning_rate": 1.3732116182572614e-05, "loss": 0.9415, "step": 18894 }, { "epoch": 15.680497925311203, "grad_norm": 63.22010803222656, "learning_rate": 1.3731784232365147e-05, "loss": 1.0694, "step": 18895 }, { "epoch": 15.681327800829875, "grad_norm": 41.876129150390625, "learning_rate": 1.3731452282157679e-05, "loss": 0.69, "step": 18896 }, { "epoch": 15.682157676348547, "grad_norm": 70.14097595214844, "learning_rate": 1.3731120331950209e-05, "loss": 0.8331, "step": 18897 }, { "epoch": 15.68298755186722, "grad_norm": 34.71601486206055, "learning_rate": 1.373078838174274e-05, "loss": 1.1948, "step": 18898 }, { "epoch": 15.683817427385891, "grad_norm": 14.759875297546387, "learning_rate": 1.373045643153527e-05, "loss": 0.3739, "step": 18899 }, { "epoch": 15.684647302904564, "grad_norm": 50.56768798828125, "learning_rate": 1.3730124481327802e-05, "loss": 0.6167, "step": 18900 }, { "epoch": 15.685477178423236, "grad_norm": 32.35624313354492, "learning_rate": 1.3729792531120334e-05, "loss": 0.6087, "step": 18901 }, { "epoch": 15.686307053941908, "grad_norm": 120.0516357421875, "learning_rate": 1.3729460580912863e-05, "loss": 0.6457, "step": 18902 }, { "epoch": 15.68713692946058, "grad_norm": 35.539329528808594, "learning_rate": 1.3729128630705395e-05, "loss": 0.9978, "step": 18903 }, { "epoch": 15.687966804979252, "grad_norm": 18.1760311126709, "learning_rate": 1.3728796680497927e-05, "loss": 0.5947, "step": 18904 }, { "epoch": 15.688796680497925, "grad_norm": 25.959142684936523, "learning_rate": 1.3728464730290459e-05, "loss": 0.6112, "step": 18905 }, { "epoch": 15.689626556016597, "grad_norm": 45.875, "learning_rate": 1.3728132780082988e-05, "loss": 0.9773, "step": 18906 }, { "epoch": 15.690456431535269, "grad_norm": 16.77631187438965, "learning_rate": 1.372780082987552e-05, "loss": 0.3188, "step": 18907 }, { "epoch": 15.691286307053941, "grad_norm": 26.56178092956543, "learning_rate": 1.372746887966805e-05, "loss": 0.4647, "step": 18908 }, { "epoch": 15.692116182572613, "grad_norm": 40.275474548339844, "learning_rate": 1.3727136929460582e-05, "loss": 1.0526, "step": 18909 }, { "epoch": 15.692946058091286, "grad_norm": 37.88353729248047, "learning_rate": 1.3726804979253113e-05, "loss": 0.6354, "step": 18910 }, { "epoch": 15.693775933609958, "grad_norm": 60.2664909362793, "learning_rate": 1.3726473029045643e-05, "loss": 0.9643, "step": 18911 }, { "epoch": 15.69460580912863, "grad_norm": 37.36164474487305, "learning_rate": 1.3726141078838175e-05, "loss": 0.699, "step": 18912 }, { "epoch": 15.695435684647302, "grad_norm": 36.24302673339844, "learning_rate": 1.3725809128630708e-05, "loss": 0.8614, "step": 18913 }, { "epoch": 15.696265560165974, "grad_norm": 51.78007507324219, "learning_rate": 1.3725477178423236e-05, "loss": 0.7088, "step": 18914 }, { "epoch": 15.697095435684647, "grad_norm": 51.695865631103516, "learning_rate": 1.3725145228215768e-05, "loss": 0.4977, "step": 18915 }, { "epoch": 15.697925311203319, "grad_norm": 23.26517105102539, "learning_rate": 1.37248132780083e-05, "loss": 0.5636, "step": 18916 }, { "epoch": 15.698755186721991, "grad_norm": 76.45548248291016, "learning_rate": 1.3724481327800831e-05, "loss": 0.832, "step": 18917 }, { "epoch": 15.699585062240663, "grad_norm": 37.968204498291016, "learning_rate": 1.3724149377593363e-05, "loss": 0.8467, "step": 18918 }, { "epoch": 15.700414937759335, "grad_norm": 42.695648193359375, "learning_rate": 1.3723817427385893e-05, "loss": 0.8115, "step": 18919 }, { "epoch": 15.701244813278008, "grad_norm": 90.21634674072266, "learning_rate": 1.3723485477178424e-05, "loss": 0.8099, "step": 18920 }, { "epoch": 15.70207468879668, "grad_norm": 38.5506706237793, "learning_rate": 1.3723153526970956e-05, "loss": 0.6958, "step": 18921 }, { "epoch": 15.702904564315352, "grad_norm": 81.17068481445312, "learning_rate": 1.3722821576763488e-05, "loss": 1.7368, "step": 18922 }, { "epoch": 15.703734439834024, "grad_norm": 50.82227325439453, "learning_rate": 1.3722489626556017e-05, "loss": 0.6876, "step": 18923 }, { "epoch": 15.704564315352696, "grad_norm": 25.550249099731445, "learning_rate": 1.3722157676348549e-05, "loss": 0.3947, "step": 18924 }, { "epoch": 15.705394190871369, "grad_norm": 38.8393440246582, "learning_rate": 1.3721825726141081e-05, "loss": 0.8936, "step": 18925 }, { "epoch": 15.70622406639004, "grad_norm": 58.04832077026367, "learning_rate": 1.3721493775933611e-05, "loss": 1.2528, "step": 18926 }, { "epoch": 15.707053941908713, "grad_norm": 32.31283950805664, "learning_rate": 1.3721161825726142e-05, "loss": 0.8888, "step": 18927 }, { "epoch": 15.707883817427385, "grad_norm": 55.4532356262207, "learning_rate": 1.3720829875518674e-05, "loss": 1.0192, "step": 18928 }, { "epoch": 15.708713692946057, "grad_norm": 80.10224151611328, "learning_rate": 1.3720497925311204e-05, "loss": 1.0888, "step": 18929 }, { "epoch": 15.70954356846473, "grad_norm": 57.593196868896484, "learning_rate": 1.3720165975103736e-05, "loss": 1.0537, "step": 18930 }, { "epoch": 15.710373443983402, "grad_norm": 37.652191162109375, "learning_rate": 1.3719834024896265e-05, "loss": 0.7093, "step": 18931 }, { "epoch": 15.711203319502074, "grad_norm": 44.47477340698242, "learning_rate": 1.3719502074688797e-05, "loss": 0.6841, "step": 18932 }, { "epoch": 15.712033195020746, "grad_norm": 27.443471908569336, "learning_rate": 1.371917012448133e-05, "loss": 0.4368, "step": 18933 }, { "epoch": 15.712863070539418, "grad_norm": 42.527732849121094, "learning_rate": 1.3718838174273861e-05, "loss": 0.8217, "step": 18934 }, { "epoch": 15.71369294605809, "grad_norm": 28.907346725463867, "learning_rate": 1.371850622406639e-05, "loss": 0.7334, "step": 18935 }, { "epoch": 15.714522821576763, "grad_norm": 56.94412612915039, "learning_rate": 1.3718174273858922e-05, "loss": 0.5285, "step": 18936 }, { "epoch": 15.715352697095435, "grad_norm": 26.444561004638672, "learning_rate": 1.3717842323651453e-05, "loss": 0.3756, "step": 18937 }, { "epoch": 15.716182572614107, "grad_norm": 24.945791244506836, "learning_rate": 1.3717510373443985e-05, "loss": 0.6141, "step": 18938 }, { "epoch": 15.71701244813278, "grad_norm": 124.20863342285156, "learning_rate": 1.3717178423236515e-05, "loss": 0.8449, "step": 18939 }, { "epoch": 15.717842323651452, "grad_norm": 41.853721618652344, "learning_rate": 1.3716846473029046e-05, "loss": 0.4523, "step": 18940 }, { "epoch": 15.718672199170124, "grad_norm": 86.82391357421875, "learning_rate": 1.3716514522821578e-05, "loss": 0.5831, "step": 18941 }, { "epoch": 15.719502074688796, "grad_norm": 38.14246368408203, "learning_rate": 1.371618257261411e-05, "loss": 0.5933, "step": 18942 }, { "epoch": 15.720331950207468, "grad_norm": 20.36490821838379, "learning_rate": 1.3715850622406642e-05, "loss": 0.3782, "step": 18943 }, { "epoch": 15.72116182572614, "grad_norm": 32.75835037231445, "learning_rate": 1.371551867219917e-05, "loss": 0.8819, "step": 18944 }, { "epoch": 15.721991701244812, "grad_norm": 49.04749298095703, "learning_rate": 1.3715186721991703e-05, "loss": 0.573, "step": 18945 }, { "epoch": 15.722821576763485, "grad_norm": 48.904884338378906, "learning_rate": 1.3714854771784233e-05, "loss": 0.5637, "step": 18946 }, { "epoch": 15.723651452282157, "grad_norm": 26.64374542236328, "learning_rate": 1.3714522821576765e-05, "loss": 0.4908, "step": 18947 }, { "epoch": 15.724481327800829, "grad_norm": 69.20774841308594, "learning_rate": 1.3714190871369296e-05, "loss": 0.7986, "step": 18948 }, { "epoch": 15.725311203319501, "grad_norm": 27.25590705871582, "learning_rate": 1.3713858921161826e-05, "loss": 0.6969, "step": 18949 }, { "epoch": 15.726141078838173, "grad_norm": 100.53072357177734, "learning_rate": 1.3713526970954358e-05, "loss": 0.9688, "step": 18950 }, { "epoch": 15.726970954356846, "grad_norm": 100.57000732421875, "learning_rate": 1.371319502074689e-05, "loss": 1.2418, "step": 18951 }, { "epoch": 15.727800829875518, "grad_norm": 58.070125579833984, "learning_rate": 1.3712863070539419e-05, "loss": 0.7405, "step": 18952 }, { "epoch": 15.72863070539419, "grad_norm": 31.771484375, "learning_rate": 1.3712531120331951e-05, "loss": 0.5847, "step": 18953 }, { "epoch": 15.729460580912862, "grad_norm": 68.7875747680664, "learning_rate": 1.3712199170124483e-05, "loss": 0.8976, "step": 18954 }, { "epoch": 15.730290456431534, "grad_norm": 58.83372497558594, "learning_rate": 1.3711867219917014e-05, "loss": 1.2876, "step": 18955 }, { "epoch": 15.731120331950207, "grad_norm": 35.726356506347656, "learning_rate": 1.3711535269709544e-05, "loss": 0.6414, "step": 18956 }, { "epoch": 15.731950207468879, "grad_norm": 40.59648132324219, "learning_rate": 1.3711203319502076e-05, "loss": 1.2478, "step": 18957 }, { "epoch": 15.732780082987551, "grad_norm": 33.80963134765625, "learning_rate": 1.3710871369294607e-05, "loss": 1.0476, "step": 18958 }, { "epoch": 15.733609958506223, "grad_norm": 35.59269714355469, "learning_rate": 1.3710539419087139e-05, "loss": 0.5301, "step": 18959 }, { "epoch": 15.734439834024897, "grad_norm": 31.859514236450195, "learning_rate": 1.3710207468879667e-05, "loss": 1.0724, "step": 18960 }, { "epoch": 15.73526970954357, "grad_norm": 35.88850784301758, "learning_rate": 1.37098755186722e-05, "loss": 0.365, "step": 18961 }, { "epoch": 15.736099585062242, "grad_norm": 18.927776336669922, "learning_rate": 1.3709543568464732e-05, "loss": 0.4679, "step": 18962 }, { "epoch": 15.736929460580914, "grad_norm": 22.24471664428711, "learning_rate": 1.3709211618257264e-05, "loss": 0.8971, "step": 18963 }, { "epoch": 15.737759336099586, "grad_norm": 18.223840713500977, "learning_rate": 1.3708879668049793e-05, "loss": 0.3147, "step": 18964 }, { "epoch": 15.738589211618258, "grad_norm": 60.08319854736328, "learning_rate": 1.3708547717842325e-05, "loss": 1.1806, "step": 18965 }, { "epoch": 15.73941908713693, "grad_norm": 97.84393310546875, "learning_rate": 1.3708215767634857e-05, "loss": 1.2173, "step": 18966 }, { "epoch": 15.740248962655603, "grad_norm": 25.874897003173828, "learning_rate": 1.3707883817427387e-05, "loss": 0.6721, "step": 18967 }, { "epoch": 15.741078838174275, "grad_norm": 50.53995895385742, "learning_rate": 1.3707551867219918e-05, "loss": 0.6174, "step": 18968 }, { "epoch": 15.741908713692947, "grad_norm": 17.782943725585938, "learning_rate": 1.3707219917012448e-05, "loss": 0.3954, "step": 18969 }, { "epoch": 15.74273858921162, "grad_norm": 37.3547477722168, "learning_rate": 1.370688796680498e-05, "loss": 0.8933, "step": 18970 }, { "epoch": 15.743568464730291, "grad_norm": 33.34930419921875, "learning_rate": 1.3706556016597512e-05, "loss": 0.8422, "step": 18971 }, { "epoch": 15.744398340248964, "grad_norm": 50.97537612915039, "learning_rate": 1.3706224066390044e-05, "loss": 1.0266, "step": 18972 }, { "epoch": 15.745228215767636, "grad_norm": 67.98883819580078, "learning_rate": 1.3705892116182573e-05, "loss": 0.4734, "step": 18973 }, { "epoch": 15.746058091286308, "grad_norm": 28.486671447753906, "learning_rate": 1.3705560165975105e-05, "loss": 0.5258, "step": 18974 }, { "epoch": 15.74688796680498, "grad_norm": 37.16122817993164, "learning_rate": 1.3705228215767637e-05, "loss": 0.7547, "step": 18975 }, { "epoch": 15.747717842323652, "grad_norm": 54.09028244018555, "learning_rate": 1.3704896265560168e-05, "loss": 0.7502, "step": 18976 }, { "epoch": 15.748547717842325, "grad_norm": 31.385726928710938, "learning_rate": 1.3704564315352698e-05, "loss": 0.9768, "step": 18977 }, { "epoch": 15.749377593360997, "grad_norm": 26.183931350708008, "learning_rate": 1.3704232365145228e-05, "loss": 0.5281, "step": 18978 }, { "epoch": 15.750207468879669, "grad_norm": 22.472515106201172, "learning_rate": 1.370390041493776e-05, "loss": 0.5969, "step": 18979 }, { "epoch": 15.751037344398341, "grad_norm": 34.118873596191406, "learning_rate": 1.3703568464730293e-05, "loss": 0.8047, "step": 18980 }, { "epoch": 15.751867219917013, "grad_norm": 43.8685188293457, "learning_rate": 1.3703236514522821e-05, "loss": 0.751, "step": 18981 }, { "epoch": 15.752697095435686, "grad_norm": 18.532054901123047, "learning_rate": 1.3702904564315353e-05, "loss": 0.4849, "step": 18982 }, { "epoch": 15.753526970954358, "grad_norm": 33.79096221923828, "learning_rate": 1.3702572614107886e-05, "loss": 0.927, "step": 18983 }, { "epoch": 15.75435684647303, "grad_norm": 67.51527404785156, "learning_rate": 1.3702240663900416e-05, "loss": 0.376, "step": 18984 }, { "epoch": 15.755186721991702, "grad_norm": 42.290565490722656, "learning_rate": 1.3701908713692946e-05, "loss": 0.7716, "step": 18985 }, { "epoch": 15.756016597510374, "grad_norm": 71.16342163085938, "learning_rate": 1.3701576763485479e-05, "loss": 0.8305, "step": 18986 }, { "epoch": 15.756846473029047, "grad_norm": 53.80695724487305, "learning_rate": 1.3701244813278009e-05, "loss": 0.9147, "step": 18987 }, { "epoch": 15.757676348547719, "grad_norm": 91.81792449951172, "learning_rate": 1.3700912863070541e-05, "loss": 1.33, "step": 18988 }, { "epoch": 15.758506224066391, "grad_norm": 88.75914001464844, "learning_rate": 1.3700580912863071e-05, "loss": 1.1097, "step": 18989 }, { "epoch": 15.759336099585063, "grad_norm": 56.7688102722168, "learning_rate": 1.3700248962655602e-05, "loss": 0.8366, "step": 18990 }, { "epoch": 15.760165975103735, "grad_norm": 49.599002838134766, "learning_rate": 1.3699917012448134e-05, "loss": 0.4493, "step": 18991 }, { "epoch": 15.760995850622407, "grad_norm": 19.998504638671875, "learning_rate": 1.3699585062240666e-05, "loss": 0.3793, "step": 18992 }, { "epoch": 15.76182572614108, "grad_norm": 62.35162353515625, "learning_rate": 1.3699253112033195e-05, "loss": 0.9711, "step": 18993 }, { "epoch": 15.762655601659752, "grad_norm": 35.49908447265625, "learning_rate": 1.3698921161825727e-05, "loss": 0.6681, "step": 18994 }, { "epoch": 15.763485477178424, "grad_norm": 86.3689193725586, "learning_rate": 1.3698589211618259e-05, "loss": 1.4286, "step": 18995 }, { "epoch": 15.764315352697096, "grad_norm": 29.947696685791016, "learning_rate": 1.369825726141079e-05, "loss": 0.4992, "step": 18996 }, { "epoch": 15.765145228215768, "grad_norm": 34.10737991333008, "learning_rate": 1.3697925311203322e-05, "loss": 0.3269, "step": 18997 }, { "epoch": 15.76597510373444, "grad_norm": 34.666046142578125, "learning_rate": 1.3697593360995852e-05, "loss": 0.5131, "step": 18998 }, { "epoch": 15.766804979253113, "grad_norm": 59.99134826660156, "learning_rate": 1.3697261410788382e-05, "loss": 1.1331, "step": 18999 }, { "epoch": 15.767634854771785, "grad_norm": 42.55265808105469, "learning_rate": 1.3696929460580914e-05, "loss": 0.4624, "step": 19000 }, { "epoch": 15.768464730290457, "grad_norm": 47.59992599487305, "learning_rate": 1.3696597510373447e-05, "loss": 0.6111, "step": 19001 }, { "epoch": 15.76929460580913, "grad_norm": 42.78104019165039, "learning_rate": 1.3696265560165975e-05, "loss": 0.409, "step": 19002 }, { "epoch": 15.770124481327802, "grad_norm": 48.88803482055664, "learning_rate": 1.3695933609958507e-05, "loss": 1.0316, "step": 19003 }, { "epoch": 15.770954356846474, "grad_norm": 32.58818054199219, "learning_rate": 1.369560165975104e-05, "loss": 0.6467, "step": 19004 }, { "epoch": 15.771784232365146, "grad_norm": 22.579553604125977, "learning_rate": 1.369526970954357e-05, "loss": 0.3679, "step": 19005 }, { "epoch": 15.772614107883818, "grad_norm": 30.928451538085938, "learning_rate": 1.36949377593361e-05, "loss": 0.8446, "step": 19006 }, { "epoch": 15.77344398340249, "grad_norm": 43.528480529785156, "learning_rate": 1.369460580912863e-05, "loss": 0.796, "step": 19007 }, { "epoch": 15.774273858921163, "grad_norm": 35.05073928833008, "learning_rate": 1.3694273858921163e-05, "loss": 1.0967, "step": 19008 }, { "epoch": 15.775103734439835, "grad_norm": 188.9298553466797, "learning_rate": 1.3693941908713695e-05, "loss": 1.1093, "step": 19009 }, { "epoch": 15.775933609958507, "grad_norm": 33.230804443359375, "learning_rate": 1.3693609958506224e-05, "loss": 0.6731, "step": 19010 }, { "epoch": 15.77676348547718, "grad_norm": 26.164636611938477, "learning_rate": 1.3693278008298756e-05, "loss": 0.5929, "step": 19011 }, { "epoch": 15.777593360995851, "grad_norm": 20.747657775878906, "learning_rate": 1.3692946058091288e-05, "loss": 0.4287, "step": 19012 }, { "epoch": 15.778423236514524, "grad_norm": 43.77604293823242, "learning_rate": 1.369261410788382e-05, "loss": 0.7082, "step": 19013 }, { "epoch": 15.779253112033196, "grad_norm": 11.456196784973145, "learning_rate": 1.3692282157676349e-05, "loss": 0.2316, "step": 19014 }, { "epoch": 15.780082987551868, "grad_norm": 20.78323745727539, "learning_rate": 1.3691950207468881e-05, "loss": 0.4484, "step": 19015 }, { "epoch": 15.78091286307054, "grad_norm": 46.076107025146484, "learning_rate": 1.3691618257261411e-05, "loss": 1.1014, "step": 19016 }, { "epoch": 15.781742738589212, "grad_norm": 21.383821487426758, "learning_rate": 1.3691286307053943e-05, "loss": 0.447, "step": 19017 }, { "epoch": 15.782572614107885, "grad_norm": 37.274391174316406, "learning_rate": 1.3690954356846474e-05, "loss": 0.7432, "step": 19018 }, { "epoch": 15.783402489626557, "grad_norm": 39.75529098510742, "learning_rate": 1.3690622406639004e-05, "loss": 1.1681, "step": 19019 }, { "epoch": 15.784232365145229, "grad_norm": 34.17103576660156, "learning_rate": 1.3690290456431536e-05, "loss": 0.7263, "step": 19020 }, { "epoch": 15.785062240663901, "grad_norm": 44.81175994873047, "learning_rate": 1.3689958506224068e-05, "loss": 0.4217, "step": 19021 }, { "epoch": 15.785892116182573, "grad_norm": 53.10309982299805, "learning_rate": 1.36896265560166e-05, "loss": 0.4664, "step": 19022 }, { "epoch": 15.786721991701246, "grad_norm": 39.704681396484375, "learning_rate": 1.368929460580913e-05, "loss": 0.4872, "step": 19023 }, { "epoch": 15.787551867219918, "grad_norm": 66.90692901611328, "learning_rate": 1.3688962655601661e-05, "loss": 1.0722, "step": 19024 }, { "epoch": 15.78838174273859, "grad_norm": 37.817806243896484, "learning_rate": 1.3688630705394192e-05, "loss": 0.8197, "step": 19025 }, { "epoch": 15.789211618257262, "grad_norm": 30.620603561401367, "learning_rate": 1.3688298755186724e-05, "loss": 0.979, "step": 19026 }, { "epoch": 15.790041493775934, "grad_norm": 25.182119369506836, "learning_rate": 1.3687966804979254e-05, "loss": 0.4986, "step": 19027 }, { "epoch": 15.790871369294607, "grad_norm": 30.28852081298828, "learning_rate": 1.3687634854771785e-05, "loss": 0.8469, "step": 19028 }, { "epoch": 15.791701244813279, "grad_norm": 140.01463317871094, "learning_rate": 1.3687302904564317e-05, "loss": 0.4101, "step": 19029 }, { "epoch": 15.792531120331951, "grad_norm": 68.78626251220703, "learning_rate": 1.3686970954356849e-05, "loss": 1.5297, "step": 19030 }, { "epoch": 15.793360995850623, "grad_norm": 76.40296173095703, "learning_rate": 1.3686639004149378e-05, "loss": 0.4902, "step": 19031 }, { "epoch": 15.794190871369295, "grad_norm": 38.91243362426758, "learning_rate": 1.368630705394191e-05, "loss": 0.8873, "step": 19032 }, { "epoch": 15.795020746887968, "grad_norm": 39.76091766357422, "learning_rate": 1.3685975103734442e-05, "loss": 0.5937, "step": 19033 }, { "epoch": 15.79585062240664, "grad_norm": 85.18645477294922, "learning_rate": 1.3685643153526972e-05, "loss": 1.2835, "step": 19034 }, { "epoch": 15.796680497925312, "grad_norm": 42.561668395996094, "learning_rate": 1.3685311203319503e-05, "loss": 0.6929, "step": 19035 }, { "epoch": 15.797510373443984, "grad_norm": 72.84474182128906, "learning_rate": 1.3684979253112035e-05, "loss": 1.0747, "step": 19036 }, { "epoch": 15.798340248962656, "grad_norm": 16.155241012573242, "learning_rate": 1.3684647302904565e-05, "loss": 0.3036, "step": 19037 }, { "epoch": 15.799170124481329, "grad_norm": 64.9382095336914, "learning_rate": 1.3684315352697097e-05, "loss": 0.7682, "step": 19038 }, { "epoch": 15.8, "grad_norm": 54.30643844604492, "learning_rate": 1.3683983402489626e-05, "loss": 0.9958, "step": 19039 }, { "epoch": 15.800829875518673, "grad_norm": 53.12626266479492, "learning_rate": 1.3683651452282158e-05, "loss": 0.6245, "step": 19040 }, { "epoch": 15.801659751037345, "grad_norm": 82.19365692138672, "learning_rate": 1.368331950207469e-05, "loss": 0.5097, "step": 19041 }, { "epoch": 15.802489626556017, "grad_norm": 41.42133712768555, "learning_rate": 1.3682987551867222e-05, "loss": 0.2911, "step": 19042 }, { "epoch": 15.80331950207469, "grad_norm": 51.75008773803711, "learning_rate": 1.3682655601659751e-05, "loss": 0.952, "step": 19043 }, { "epoch": 15.804149377593362, "grad_norm": 36.34773254394531, "learning_rate": 1.3682323651452283e-05, "loss": 0.4435, "step": 19044 }, { "epoch": 15.804979253112034, "grad_norm": 27.496679306030273, "learning_rate": 1.3681991701244815e-05, "loss": 0.5814, "step": 19045 }, { "epoch": 15.805809128630706, "grad_norm": 42.44684600830078, "learning_rate": 1.3681659751037346e-05, "loss": 0.6128, "step": 19046 }, { "epoch": 15.806639004149378, "grad_norm": 59.61656188964844, "learning_rate": 1.3681327800829876e-05, "loss": 0.967, "step": 19047 }, { "epoch": 15.80746887966805, "grad_norm": 45.128353118896484, "learning_rate": 1.3680995850622407e-05, "loss": 0.5112, "step": 19048 }, { "epoch": 15.808298755186723, "grad_norm": 73.3160629272461, "learning_rate": 1.3680663900414939e-05, "loss": 1.2541, "step": 19049 }, { "epoch": 15.809128630705395, "grad_norm": 22.22357749938965, "learning_rate": 1.368033195020747e-05, "loss": 0.3185, "step": 19050 }, { "epoch": 15.809958506224067, "grad_norm": 48.330684661865234, "learning_rate": 1.3680000000000003e-05, "loss": 0.5539, "step": 19051 }, { "epoch": 15.81078838174274, "grad_norm": 43.09428024291992, "learning_rate": 1.3679668049792532e-05, "loss": 0.8391, "step": 19052 }, { "epoch": 15.811618257261411, "grad_norm": 57.82419204711914, "learning_rate": 1.3679336099585064e-05, "loss": 0.8731, "step": 19053 }, { "epoch": 15.812448132780084, "grad_norm": 23.930932998657227, "learning_rate": 1.3679004149377594e-05, "loss": 0.6492, "step": 19054 }, { "epoch": 15.813278008298756, "grad_norm": 35.5737190246582, "learning_rate": 1.3678672199170126e-05, "loss": 0.9127, "step": 19055 }, { "epoch": 15.814107883817428, "grad_norm": 27.7899112701416, "learning_rate": 1.3678340248962657e-05, "loss": 0.8442, "step": 19056 }, { "epoch": 15.8149377593361, "grad_norm": 27.564395904541016, "learning_rate": 1.3678008298755187e-05, "loss": 0.961, "step": 19057 }, { "epoch": 15.815767634854772, "grad_norm": 40.20610809326172, "learning_rate": 1.3677676348547719e-05, "loss": 0.3893, "step": 19058 }, { "epoch": 15.816597510373445, "grad_norm": 25.468347549438477, "learning_rate": 1.3677344398340251e-05, "loss": 0.4466, "step": 19059 }, { "epoch": 15.817427385892117, "grad_norm": 43.90180969238281, "learning_rate": 1.367701244813278e-05, "loss": 0.5385, "step": 19060 }, { "epoch": 15.818257261410789, "grad_norm": 57.63179016113281, "learning_rate": 1.3676680497925312e-05, "loss": 0.7962, "step": 19061 }, { "epoch": 15.819087136929461, "grad_norm": 51.32318115234375, "learning_rate": 1.3676348547717844e-05, "loss": 1.2143, "step": 19062 }, { "epoch": 15.819917012448133, "grad_norm": 26.68981170654297, "learning_rate": 1.3676016597510375e-05, "loss": 0.4099, "step": 19063 }, { "epoch": 15.820746887966806, "grad_norm": 25.559791564941406, "learning_rate": 1.3675684647302905e-05, "loss": 0.6867, "step": 19064 }, { "epoch": 15.821576763485478, "grad_norm": 34.0150032043457, "learning_rate": 1.3675352697095437e-05, "loss": 0.7545, "step": 19065 }, { "epoch": 15.82240663900415, "grad_norm": 46.09016418457031, "learning_rate": 1.3675020746887968e-05, "loss": 0.7518, "step": 19066 }, { "epoch": 15.823236514522822, "grad_norm": 26.988977432250977, "learning_rate": 1.36746887966805e-05, "loss": 0.3798, "step": 19067 }, { "epoch": 15.824066390041494, "grad_norm": 34.77218246459961, "learning_rate": 1.3674356846473028e-05, "loss": 0.5197, "step": 19068 }, { "epoch": 15.824896265560167, "grad_norm": 28.721017837524414, "learning_rate": 1.367402489626556e-05, "loss": 0.4146, "step": 19069 }, { "epoch": 15.825726141078839, "grad_norm": 44.80170440673828, "learning_rate": 1.3673692946058093e-05, "loss": 1.2758, "step": 19070 }, { "epoch": 15.826556016597511, "grad_norm": 16.86713218688965, "learning_rate": 1.3673360995850625e-05, "loss": 0.2662, "step": 19071 }, { "epoch": 15.827385892116183, "grad_norm": 20.268157958984375, "learning_rate": 1.3673029045643153e-05, "loss": 0.3364, "step": 19072 }, { "epoch": 15.828215767634855, "grad_norm": 156.5028076171875, "learning_rate": 1.3672697095435686e-05, "loss": 1.2458, "step": 19073 }, { "epoch": 15.829045643153528, "grad_norm": 45.62596130371094, "learning_rate": 1.3672365145228218e-05, "loss": 0.9629, "step": 19074 }, { "epoch": 15.8298755186722, "grad_norm": 49.25597381591797, "learning_rate": 1.3672033195020748e-05, "loss": 0.7943, "step": 19075 }, { "epoch": 15.830705394190872, "grad_norm": 47.76519775390625, "learning_rate": 1.367170124481328e-05, "loss": 0.5401, "step": 19076 }, { "epoch": 15.831535269709544, "grad_norm": 43.146888732910156, "learning_rate": 1.3671369294605809e-05, "loss": 0.8476, "step": 19077 }, { "epoch": 15.832365145228216, "grad_norm": 10.572075843811035, "learning_rate": 1.3671037344398341e-05, "loss": 0.2692, "step": 19078 }, { "epoch": 15.833195020746889, "grad_norm": 42.86717987060547, "learning_rate": 1.3670705394190873e-05, "loss": 1.0169, "step": 19079 }, { "epoch": 15.83402489626556, "grad_norm": 48.84684753417969, "learning_rate": 1.3670373443983405e-05, "loss": 1.2002, "step": 19080 }, { "epoch": 15.834854771784233, "grad_norm": 40.24052429199219, "learning_rate": 1.3670041493775934e-05, "loss": 1.1347, "step": 19081 }, { "epoch": 15.835684647302905, "grad_norm": 96.66094970703125, "learning_rate": 1.3669709543568466e-05, "loss": 0.7643, "step": 19082 }, { "epoch": 15.836514522821577, "grad_norm": 55.53568649291992, "learning_rate": 1.3669377593360998e-05, "loss": 0.6519, "step": 19083 }, { "epoch": 15.83734439834025, "grad_norm": 47.8858642578125, "learning_rate": 1.3669045643153529e-05, "loss": 0.5662, "step": 19084 }, { "epoch": 15.838174273858922, "grad_norm": 28.18923568725586, "learning_rate": 1.3668713692946059e-05, "loss": 0.415, "step": 19085 }, { "epoch": 15.839004149377594, "grad_norm": 25.920930862426758, "learning_rate": 1.366838174273859e-05, "loss": 0.8153, "step": 19086 }, { "epoch": 15.839834024896266, "grad_norm": 43.50764083862305, "learning_rate": 1.3668049792531121e-05, "loss": 0.3686, "step": 19087 }, { "epoch": 15.840663900414938, "grad_norm": 70.2597427368164, "learning_rate": 1.3667717842323654e-05, "loss": 0.6161, "step": 19088 }, { "epoch": 15.84149377593361, "grad_norm": 26.449853897094727, "learning_rate": 1.3667385892116182e-05, "loss": 0.5684, "step": 19089 }, { "epoch": 15.842323651452283, "grad_norm": 30.53862953186035, "learning_rate": 1.3667053941908714e-05, "loss": 0.6332, "step": 19090 }, { "epoch": 15.843153526970955, "grad_norm": 53.059791564941406, "learning_rate": 1.3666721991701247e-05, "loss": 1.3667, "step": 19091 }, { "epoch": 15.843983402489627, "grad_norm": 39.57944869995117, "learning_rate": 1.3666390041493779e-05, "loss": 1.0136, "step": 19092 }, { "epoch": 15.8448132780083, "grad_norm": 45.67530822753906, "learning_rate": 1.3666058091286307e-05, "loss": 1.1281, "step": 19093 }, { "epoch": 15.845643153526972, "grad_norm": 30.799957275390625, "learning_rate": 1.366572614107884e-05, "loss": 0.6312, "step": 19094 }, { "epoch": 15.846473029045644, "grad_norm": 40.452552795410156, "learning_rate": 1.366539419087137e-05, "loss": 0.66, "step": 19095 }, { "epoch": 15.847302904564316, "grad_norm": 88.52955627441406, "learning_rate": 1.3665062240663902e-05, "loss": 1.0381, "step": 19096 }, { "epoch": 15.848132780082988, "grad_norm": 34.41126251220703, "learning_rate": 1.3664730290456432e-05, "loss": 0.7017, "step": 19097 }, { "epoch": 15.84896265560166, "grad_norm": 26.131486892700195, "learning_rate": 1.3664398340248963e-05, "loss": 0.6136, "step": 19098 }, { "epoch": 15.849792531120332, "grad_norm": 45.93892288208008, "learning_rate": 1.3664066390041495e-05, "loss": 0.7781, "step": 19099 }, { "epoch": 15.850622406639005, "grad_norm": 71.77468872070312, "learning_rate": 1.3663734439834027e-05, "loss": 1.1795, "step": 19100 }, { "epoch": 15.851452282157677, "grad_norm": 49.79068374633789, "learning_rate": 1.3663402489626557e-05, "loss": 0.5244, "step": 19101 }, { "epoch": 15.852282157676349, "grad_norm": 34.83695602416992, "learning_rate": 1.3663070539419088e-05, "loss": 1.0554, "step": 19102 }, { "epoch": 15.853112033195021, "grad_norm": 36.46366882324219, "learning_rate": 1.366273858921162e-05, "loss": 0.8967, "step": 19103 }, { "epoch": 15.853941908713693, "grad_norm": 43.88405990600586, "learning_rate": 1.366240663900415e-05, "loss": 0.9702, "step": 19104 }, { "epoch": 15.854771784232366, "grad_norm": 38.147708892822266, "learning_rate": 1.3662074688796682e-05, "loss": 0.582, "step": 19105 }, { "epoch": 15.855601659751038, "grad_norm": 28.646963119506836, "learning_rate": 1.3661742738589213e-05, "loss": 0.4276, "step": 19106 }, { "epoch": 15.85643153526971, "grad_norm": 64.2640151977539, "learning_rate": 1.3661410788381743e-05, "loss": 1.0288, "step": 19107 }, { "epoch": 15.857261410788382, "grad_norm": 89.33770751953125, "learning_rate": 1.3661078838174275e-05, "loss": 0.7381, "step": 19108 }, { "epoch": 15.858091286307054, "grad_norm": 39.502986907958984, "learning_rate": 1.3660746887966807e-05, "loss": 1.2342, "step": 19109 }, { "epoch": 15.858921161825727, "grad_norm": 46.189605712890625, "learning_rate": 1.3660414937759336e-05, "loss": 0.7709, "step": 19110 }, { "epoch": 15.859751037344399, "grad_norm": 11.948304176330566, "learning_rate": 1.3660082987551868e-05, "loss": 0.3129, "step": 19111 }, { "epoch": 15.860580912863071, "grad_norm": 34.0445556640625, "learning_rate": 1.36597510373444e-05, "loss": 1.0628, "step": 19112 }, { "epoch": 15.861410788381743, "grad_norm": 51.72134780883789, "learning_rate": 1.365941908713693e-05, "loss": 1.7841, "step": 19113 }, { "epoch": 15.862240663900415, "grad_norm": 40.74916076660156, "learning_rate": 1.3659087136929461e-05, "loss": 1.2434, "step": 19114 }, { "epoch": 15.863070539419088, "grad_norm": 29.80098533630371, "learning_rate": 1.3658755186721993e-05, "loss": 0.8305, "step": 19115 }, { "epoch": 15.86390041493776, "grad_norm": 24.551538467407227, "learning_rate": 1.3658423236514524e-05, "loss": 0.3378, "step": 19116 }, { "epoch": 15.864730290456432, "grad_norm": 42.5313606262207, "learning_rate": 1.3658091286307056e-05, "loss": 1.399, "step": 19117 }, { "epoch": 15.865560165975104, "grad_norm": 19.679933547973633, "learning_rate": 1.3657759336099585e-05, "loss": 0.4513, "step": 19118 }, { "epoch": 15.866390041493776, "grad_norm": 77.02513122558594, "learning_rate": 1.3657427385892117e-05, "loss": 1.4635, "step": 19119 }, { "epoch": 15.867219917012449, "grad_norm": 29.2365779876709, "learning_rate": 1.3657095435684649e-05, "loss": 0.4132, "step": 19120 }, { "epoch": 15.86804979253112, "grad_norm": 27.269968032836914, "learning_rate": 1.3656763485477181e-05, "loss": 0.4569, "step": 19121 }, { "epoch": 15.868879668049793, "grad_norm": 26.219161987304688, "learning_rate": 1.365643153526971e-05, "loss": 0.6159, "step": 19122 }, { "epoch": 15.869709543568465, "grad_norm": 49.82868576049805, "learning_rate": 1.3656099585062242e-05, "loss": 0.9787, "step": 19123 }, { "epoch": 15.870539419087137, "grad_norm": 31.53469467163086, "learning_rate": 1.3655767634854772e-05, "loss": 1.3839, "step": 19124 }, { "epoch": 15.87136929460581, "grad_norm": 22.532384872436523, "learning_rate": 1.3655435684647304e-05, "loss": 0.4017, "step": 19125 }, { "epoch": 15.872199170124482, "grad_norm": 22.946317672729492, "learning_rate": 1.3655103734439835e-05, "loss": 0.4887, "step": 19126 }, { "epoch": 15.873029045643154, "grad_norm": 37.61653137207031, "learning_rate": 1.3654771784232365e-05, "loss": 0.9292, "step": 19127 }, { "epoch": 15.873858921161826, "grad_norm": 25.8229923248291, "learning_rate": 1.3654439834024897e-05, "loss": 1.0416, "step": 19128 }, { "epoch": 15.874688796680498, "grad_norm": 48.33757781982422, "learning_rate": 1.365410788381743e-05, "loss": 1.2305, "step": 19129 }, { "epoch": 15.87551867219917, "grad_norm": 66.76782989501953, "learning_rate": 1.3653775933609961e-05, "loss": 1.2411, "step": 19130 }, { "epoch": 15.876348547717843, "grad_norm": 46.308441162109375, "learning_rate": 1.365344398340249e-05, "loss": 0.7036, "step": 19131 }, { "epoch": 15.877178423236515, "grad_norm": 42.565277099609375, "learning_rate": 1.3653112033195022e-05, "loss": 0.5916, "step": 19132 }, { "epoch": 15.878008298755187, "grad_norm": 48.82793426513672, "learning_rate": 1.3652780082987553e-05, "loss": 1.0741, "step": 19133 }, { "epoch": 15.87883817427386, "grad_norm": 36.373260498046875, "learning_rate": 1.3652448132780085e-05, "loss": 1.2532, "step": 19134 }, { "epoch": 15.879668049792532, "grad_norm": 45.81296920776367, "learning_rate": 1.3652116182572615e-05, "loss": 1.2576, "step": 19135 }, { "epoch": 15.880497925311204, "grad_norm": 46.470909118652344, "learning_rate": 1.3651784232365146e-05, "loss": 0.9045, "step": 19136 }, { "epoch": 15.881327800829876, "grad_norm": 41.853675842285156, "learning_rate": 1.3651452282157678e-05, "loss": 0.4414, "step": 19137 }, { "epoch": 15.882157676348548, "grad_norm": 33.705291748046875, "learning_rate": 1.365112033195021e-05, "loss": 0.3926, "step": 19138 }, { "epoch": 15.88298755186722, "grad_norm": 107.58517456054688, "learning_rate": 1.3650788381742739e-05, "loss": 1.2112, "step": 19139 }, { "epoch": 15.883817427385893, "grad_norm": 62.33340835571289, "learning_rate": 1.365045643153527e-05, "loss": 1.3217, "step": 19140 }, { "epoch": 15.884647302904565, "grad_norm": 21.037294387817383, "learning_rate": 1.3650124481327803e-05, "loss": 0.8463, "step": 19141 }, { "epoch": 15.885477178423237, "grad_norm": 27.491289138793945, "learning_rate": 1.3649792531120333e-05, "loss": 0.8443, "step": 19142 }, { "epoch": 15.88630705394191, "grad_norm": 20.833086013793945, "learning_rate": 1.3649460580912864e-05, "loss": 0.7236, "step": 19143 }, { "epoch": 15.887136929460581, "grad_norm": 40.85328674316406, "learning_rate": 1.3649128630705396e-05, "loss": 0.5143, "step": 19144 }, { "epoch": 15.887966804979254, "grad_norm": 29.450342178344727, "learning_rate": 1.3648796680497926e-05, "loss": 0.6504, "step": 19145 }, { "epoch": 15.888796680497926, "grad_norm": 26.785261154174805, "learning_rate": 1.3648464730290458e-05, "loss": 0.5911, "step": 19146 }, { "epoch": 15.889626556016598, "grad_norm": 43.42040252685547, "learning_rate": 1.3648132780082987e-05, "loss": 0.7972, "step": 19147 }, { "epoch": 15.89045643153527, "grad_norm": 30.59306526184082, "learning_rate": 1.3647800829875519e-05, "loss": 1.3922, "step": 19148 }, { "epoch": 15.891286307053942, "grad_norm": 12.906402587890625, "learning_rate": 1.3647468879668051e-05, "loss": 0.3926, "step": 19149 }, { "epoch": 15.892116182572614, "grad_norm": 36.21845626831055, "learning_rate": 1.3647136929460583e-05, "loss": 1.0743, "step": 19150 }, { "epoch": 15.892946058091287, "grad_norm": 15.049493789672852, "learning_rate": 1.3646804979253112e-05, "loss": 0.4944, "step": 19151 }, { "epoch": 15.893775933609959, "grad_norm": 26.163021087646484, "learning_rate": 1.3646473029045644e-05, "loss": 0.4887, "step": 19152 }, { "epoch": 15.894605809128631, "grad_norm": 34.81358337402344, "learning_rate": 1.3646141078838176e-05, "loss": 0.9886, "step": 19153 }, { "epoch": 15.895435684647303, "grad_norm": 37.79833984375, "learning_rate": 1.3645809128630707e-05, "loss": 0.6576, "step": 19154 }, { "epoch": 15.896265560165975, "grad_norm": 24.775850296020508, "learning_rate": 1.3645477178423239e-05, "loss": 0.5746, "step": 19155 }, { "epoch": 15.897095435684648, "grad_norm": 29.00259780883789, "learning_rate": 1.3645145228215767e-05, "loss": 0.5267, "step": 19156 }, { "epoch": 15.89792531120332, "grad_norm": 56.26164627075195, "learning_rate": 1.36448132780083e-05, "loss": 0.7669, "step": 19157 }, { "epoch": 15.898755186721992, "grad_norm": 21.020950317382812, "learning_rate": 1.3644481327800832e-05, "loss": 0.5232, "step": 19158 }, { "epoch": 15.899585062240664, "grad_norm": 43.19089889526367, "learning_rate": 1.3644149377593364e-05, "loss": 0.7731, "step": 19159 }, { "epoch": 15.900414937759336, "grad_norm": 30.606876373291016, "learning_rate": 1.3643817427385892e-05, "loss": 0.8092, "step": 19160 }, { "epoch": 15.901244813278009, "grad_norm": 68.38330841064453, "learning_rate": 1.3643485477178425e-05, "loss": 1.1336, "step": 19161 }, { "epoch": 15.90207468879668, "grad_norm": 43.23575210571289, "learning_rate": 1.3643153526970957e-05, "loss": 0.6948, "step": 19162 }, { "epoch": 15.902904564315353, "grad_norm": 32.56697463989258, "learning_rate": 1.3642821576763487e-05, "loss": 0.7874, "step": 19163 }, { "epoch": 15.903734439834025, "grad_norm": 39.33596420288086, "learning_rate": 1.3642489626556018e-05, "loss": 0.4319, "step": 19164 }, { "epoch": 15.904564315352697, "grad_norm": 45.33266067504883, "learning_rate": 1.3642157676348548e-05, "loss": 1.0088, "step": 19165 }, { "epoch": 15.90539419087137, "grad_norm": 102.86712646484375, "learning_rate": 1.364182572614108e-05, "loss": 0.7285, "step": 19166 }, { "epoch": 15.906224066390042, "grad_norm": 39.19039535522461, "learning_rate": 1.3641493775933612e-05, "loss": 0.9071, "step": 19167 }, { "epoch": 15.907053941908714, "grad_norm": 67.36791229248047, "learning_rate": 1.3641161825726141e-05, "loss": 0.5935, "step": 19168 }, { "epoch": 15.907883817427386, "grad_norm": 23.051950454711914, "learning_rate": 1.3640829875518673e-05, "loss": 0.3806, "step": 19169 }, { "epoch": 15.908713692946058, "grad_norm": 22.96479034423828, "learning_rate": 1.3640497925311205e-05, "loss": 0.6194, "step": 19170 }, { "epoch": 15.90954356846473, "grad_norm": 120.38052368164062, "learning_rate": 1.3640165975103735e-05, "loss": 0.9207, "step": 19171 }, { "epoch": 15.910373443983403, "grad_norm": 101.0389404296875, "learning_rate": 1.3639834024896266e-05, "loss": 0.8019, "step": 19172 }, { "epoch": 15.911203319502075, "grad_norm": 33.419219970703125, "learning_rate": 1.3639502074688798e-05, "loss": 0.758, "step": 19173 }, { "epoch": 15.912033195020747, "grad_norm": 41.28553771972656, "learning_rate": 1.3639170124481328e-05, "loss": 0.6738, "step": 19174 }, { "epoch": 15.91286307053942, "grad_norm": 76.00251007080078, "learning_rate": 1.363883817427386e-05, "loss": 1.1204, "step": 19175 }, { "epoch": 15.913692946058092, "grad_norm": 21.792142868041992, "learning_rate": 1.3638506224066391e-05, "loss": 0.5595, "step": 19176 }, { "epoch": 15.914522821576764, "grad_norm": 43.349727630615234, "learning_rate": 1.3638174273858921e-05, "loss": 0.7912, "step": 19177 }, { "epoch": 15.915352697095436, "grad_norm": 51.935081481933594, "learning_rate": 1.3637842323651453e-05, "loss": 0.5663, "step": 19178 }, { "epoch": 15.916182572614108, "grad_norm": 66.86454010009766, "learning_rate": 1.3637510373443986e-05, "loss": 0.6946, "step": 19179 }, { "epoch": 15.91701244813278, "grad_norm": 34.72559356689453, "learning_rate": 1.3637178423236514e-05, "loss": 0.9472, "step": 19180 }, { "epoch": 15.917842323651453, "grad_norm": 29.098127365112305, "learning_rate": 1.3636846473029046e-05, "loss": 0.9378, "step": 19181 }, { "epoch": 15.918672199170125, "grad_norm": 21.81303596496582, "learning_rate": 1.3636514522821579e-05, "loss": 0.3624, "step": 19182 }, { "epoch": 15.919502074688797, "grad_norm": 40.32904815673828, "learning_rate": 1.3636182572614109e-05, "loss": 0.6913, "step": 19183 }, { "epoch": 15.92033195020747, "grad_norm": 67.34618377685547, "learning_rate": 1.3635850622406641e-05, "loss": 1.1154, "step": 19184 }, { "epoch": 15.921161825726141, "grad_norm": 66.7310562133789, "learning_rate": 1.363551867219917e-05, "loss": 1.1642, "step": 19185 }, { "epoch": 15.921991701244814, "grad_norm": 25.3960018157959, "learning_rate": 1.3635186721991702e-05, "loss": 0.4586, "step": 19186 }, { "epoch": 15.922821576763486, "grad_norm": 25.081266403198242, "learning_rate": 1.3634854771784234e-05, "loss": 0.9362, "step": 19187 }, { "epoch": 15.923651452282158, "grad_norm": 49.41946792602539, "learning_rate": 1.3634522821576766e-05, "loss": 0.7836, "step": 19188 }, { "epoch": 15.92448132780083, "grad_norm": 24.168691635131836, "learning_rate": 1.3634190871369295e-05, "loss": 0.2974, "step": 19189 }, { "epoch": 15.925311203319502, "grad_norm": 43.046531677246094, "learning_rate": 1.3633858921161827e-05, "loss": 0.7621, "step": 19190 }, { "epoch": 15.926141078838175, "grad_norm": 24.894662857055664, "learning_rate": 1.3633526970954359e-05, "loss": 0.405, "step": 19191 }, { "epoch": 15.926970954356847, "grad_norm": 32.025421142578125, "learning_rate": 1.363319502074689e-05, "loss": 1.0098, "step": 19192 }, { "epoch": 15.927800829875519, "grad_norm": 37.73101043701172, "learning_rate": 1.363286307053942e-05, "loss": 1.0982, "step": 19193 }, { "epoch": 15.928630705394191, "grad_norm": 23.97945785522461, "learning_rate": 1.363253112033195e-05, "loss": 0.5054, "step": 19194 }, { "epoch": 15.929460580912863, "grad_norm": 58.94740676879883, "learning_rate": 1.3632199170124482e-05, "loss": 0.9102, "step": 19195 }, { "epoch": 15.930290456431536, "grad_norm": 24.78770637512207, "learning_rate": 1.3631867219917014e-05, "loss": 0.6163, "step": 19196 }, { "epoch": 15.931120331950208, "grad_norm": 33.4680061340332, "learning_rate": 1.3631535269709543e-05, "loss": 0.7712, "step": 19197 }, { "epoch": 15.93195020746888, "grad_norm": 26.25690269470215, "learning_rate": 1.3631203319502075e-05, "loss": 0.7161, "step": 19198 }, { "epoch": 15.932780082987552, "grad_norm": 75.09529876708984, "learning_rate": 1.3630871369294607e-05, "loss": 1.1654, "step": 19199 }, { "epoch": 15.933609958506224, "grad_norm": 24.390161514282227, "learning_rate": 1.363053941908714e-05, "loss": 0.5509, "step": 19200 }, { "epoch": 15.934439834024896, "grad_norm": 39.827117919921875, "learning_rate": 1.3630207468879668e-05, "loss": 0.6114, "step": 19201 }, { "epoch": 15.935269709543569, "grad_norm": 42.93565368652344, "learning_rate": 1.36298755186722e-05, "loss": 1.0602, "step": 19202 }, { "epoch": 15.936099585062241, "grad_norm": 32.687255859375, "learning_rate": 1.362954356846473e-05, "loss": 0.6397, "step": 19203 }, { "epoch": 15.936929460580913, "grad_norm": 39.99262237548828, "learning_rate": 1.3629211618257263e-05, "loss": 1.0157, "step": 19204 }, { "epoch": 15.937759336099585, "grad_norm": 28.933961868286133, "learning_rate": 1.3628879668049793e-05, "loss": 0.8812, "step": 19205 }, { "epoch": 15.938589211618257, "grad_norm": 41.39279556274414, "learning_rate": 1.3628547717842324e-05, "loss": 0.8829, "step": 19206 }, { "epoch": 15.93941908713693, "grad_norm": 26.660526275634766, "learning_rate": 1.3628215767634856e-05, "loss": 0.5043, "step": 19207 }, { "epoch": 15.940248962655602, "grad_norm": 54.8006591796875, "learning_rate": 1.3627883817427388e-05, "loss": 0.729, "step": 19208 }, { "epoch": 15.941078838174274, "grad_norm": 24.87523078918457, "learning_rate": 1.362755186721992e-05, "loss": 0.5047, "step": 19209 }, { "epoch": 15.941908713692946, "grad_norm": 25.746150970458984, "learning_rate": 1.3627219917012449e-05, "loss": 0.4595, "step": 19210 }, { "epoch": 15.942738589211618, "grad_norm": 20.673320770263672, "learning_rate": 1.362688796680498e-05, "loss": 0.4291, "step": 19211 }, { "epoch": 15.94356846473029, "grad_norm": 36.336151123046875, "learning_rate": 1.3626556016597511e-05, "loss": 0.4135, "step": 19212 }, { "epoch": 15.944398340248963, "grad_norm": 13.592964172363281, "learning_rate": 1.3626224066390043e-05, "loss": 0.3734, "step": 19213 }, { "epoch": 15.945228215767635, "grad_norm": 67.50108337402344, "learning_rate": 1.3625892116182574e-05, "loss": 1.0224, "step": 19214 }, { "epoch": 15.946058091286307, "grad_norm": 22.6779727935791, "learning_rate": 1.3625560165975104e-05, "loss": 0.5454, "step": 19215 }, { "epoch": 15.94688796680498, "grad_norm": 40.5654182434082, "learning_rate": 1.3625228215767636e-05, "loss": 0.5129, "step": 19216 }, { "epoch": 15.947717842323652, "grad_norm": 28.625381469726562, "learning_rate": 1.3624896265560168e-05, "loss": 0.8187, "step": 19217 }, { "epoch": 15.948547717842324, "grad_norm": 10.31014633178711, "learning_rate": 1.3624564315352697e-05, "loss": 0.3953, "step": 19218 }, { "epoch": 15.949377593360996, "grad_norm": 21.346529006958008, "learning_rate": 1.362423236514523e-05, "loss": 0.7485, "step": 19219 }, { "epoch": 15.950207468879668, "grad_norm": 29.691904067993164, "learning_rate": 1.3623900414937761e-05, "loss": 0.7704, "step": 19220 }, { "epoch": 15.95103734439834, "grad_norm": 12.081551551818848, "learning_rate": 1.3623568464730292e-05, "loss": 0.261, "step": 19221 }, { "epoch": 15.951867219917013, "grad_norm": 58.61458969116211, "learning_rate": 1.3623236514522822e-05, "loss": 1.6947, "step": 19222 }, { "epoch": 15.952697095435685, "grad_norm": 62.17714309692383, "learning_rate": 1.3622904564315354e-05, "loss": 0.9105, "step": 19223 }, { "epoch": 15.953526970954357, "grad_norm": 38.341495513916016, "learning_rate": 1.3622572614107885e-05, "loss": 0.6063, "step": 19224 }, { "epoch": 15.95435684647303, "grad_norm": 25.62385368347168, "learning_rate": 1.3622240663900417e-05, "loss": 0.749, "step": 19225 }, { "epoch": 15.955186721991701, "grad_norm": 24.359472274780273, "learning_rate": 1.3621908713692946e-05, "loss": 0.3696, "step": 19226 }, { "epoch": 15.956016597510374, "grad_norm": 42.21541213989258, "learning_rate": 1.3621576763485478e-05, "loss": 0.7501, "step": 19227 }, { "epoch": 15.956846473029046, "grad_norm": 30.632770538330078, "learning_rate": 1.362124481327801e-05, "loss": 0.9641, "step": 19228 }, { "epoch": 15.957676348547718, "grad_norm": 32.91746139526367, "learning_rate": 1.3620912863070542e-05, "loss": 0.741, "step": 19229 }, { "epoch": 15.95850622406639, "grad_norm": 35.51979064941406, "learning_rate": 1.362058091286307e-05, "loss": 0.778, "step": 19230 }, { "epoch": 15.959336099585062, "grad_norm": 84.35643768310547, "learning_rate": 1.3620248962655603e-05, "loss": 0.864, "step": 19231 }, { "epoch": 15.960165975103735, "grad_norm": 42.95826721191406, "learning_rate": 1.3619917012448133e-05, "loss": 0.4816, "step": 19232 }, { "epoch": 15.960995850622407, "grad_norm": 57.42134475708008, "learning_rate": 1.3619585062240665e-05, "loss": 0.7907, "step": 19233 }, { "epoch": 15.961825726141079, "grad_norm": 87.001220703125, "learning_rate": 1.3619253112033197e-05, "loss": 0.8508, "step": 19234 }, { "epoch": 15.962655601659751, "grad_norm": 42.69491195678711, "learning_rate": 1.3618921161825726e-05, "loss": 0.5898, "step": 19235 }, { "epoch": 15.963485477178423, "grad_norm": 34.11935043334961, "learning_rate": 1.3618589211618258e-05, "loss": 0.6008, "step": 19236 }, { "epoch": 15.964315352697096, "grad_norm": 48.35453796386719, "learning_rate": 1.361825726141079e-05, "loss": 0.5101, "step": 19237 }, { "epoch": 15.965145228215768, "grad_norm": 36.40116500854492, "learning_rate": 1.3617925311203322e-05, "loss": 1.1175, "step": 19238 }, { "epoch": 15.96597510373444, "grad_norm": 42.97867202758789, "learning_rate": 1.3617593360995851e-05, "loss": 0.9625, "step": 19239 }, { "epoch": 15.966804979253112, "grad_norm": 25.195289611816406, "learning_rate": 1.3617261410788383e-05, "loss": 0.441, "step": 19240 }, { "epoch": 15.967634854771784, "grad_norm": 29.682125091552734, "learning_rate": 1.3616929460580914e-05, "loss": 0.3864, "step": 19241 }, { "epoch": 15.968464730290457, "grad_norm": 20.854459762573242, "learning_rate": 1.3616597510373446e-05, "loss": 0.5497, "step": 19242 }, { "epoch": 15.969294605809129, "grad_norm": 38.60763931274414, "learning_rate": 1.3616265560165976e-05, "loss": 0.5741, "step": 19243 }, { "epoch": 15.970124481327801, "grad_norm": 46.16656494140625, "learning_rate": 1.3615933609958506e-05, "loss": 0.8631, "step": 19244 }, { "epoch": 15.970954356846473, "grad_norm": 43.41602325439453, "learning_rate": 1.3615601659751039e-05, "loss": 1.0103, "step": 19245 }, { "epoch": 15.971784232365145, "grad_norm": 36.590755462646484, "learning_rate": 1.361526970954357e-05, "loss": 0.7926, "step": 19246 }, { "epoch": 15.972614107883818, "grad_norm": 31.491771697998047, "learning_rate": 1.36149377593361e-05, "loss": 0.5678, "step": 19247 }, { "epoch": 15.97344398340249, "grad_norm": 43.59025955200195, "learning_rate": 1.3614605809128632e-05, "loss": 0.7411, "step": 19248 }, { "epoch": 15.974273858921162, "grad_norm": 38.49297332763672, "learning_rate": 1.3614273858921164e-05, "loss": 0.4002, "step": 19249 }, { "epoch": 15.975103734439834, "grad_norm": 121.29963684082031, "learning_rate": 1.3613941908713694e-05, "loss": 1.1209, "step": 19250 }, { "epoch": 15.975933609958506, "grad_norm": 47.195735931396484, "learning_rate": 1.3613609958506224e-05, "loss": 0.7883, "step": 19251 }, { "epoch": 15.976763485477179, "grad_norm": 21.757715225219727, "learning_rate": 1.3613278008298757e-05, "loss": 0.4167, "step": 19252 }, { "epoch": 15.97759336099585, "grad_norm": 72.68899536132812, "learning_rate": 1.3612946058091287e-05, "loss": 1.1121, "step": 19253 }, { "epoch": 15.978423236514523, "grad_norm": 32.56765365600586, "learning_rate": 1.3612614107883819e-05, "loss": 0.3815, "step": 19254 }, { "epoch": 15.979253112033195, "grad_norm": 61.48192596435547, "learning_rate": 1.3612282157676348e-05, "loss": 0.5367, "step": 19255 }, { "epoch": 15.980082987551867, "grad_norm": 62.26150894165039, "learning_rate": 1.361195020746888e-05, "loss": 1.2994, "step": 19256 }, { "epoch": 15.98091286307054, "grad_norm": 33.05610275268555, "learning_rate": 1.3611618257261412e-05, "loss": 0.476, "step": 19257 }, { "epoch": 15.981742738589212, "grad_norm": 44.99143600463867, "learning_rate": 1.3611286307053944e-05, "loss": 1.0099, "step": 19258 }, { "epoch": 15.982572614107884, "grad_norm": 49.90045928955078, "learning_rate": 1.3610954356846473e-05, "loss": 0.5957, "step": 19259 }, { "epoch": 15.983402489626556, "grad_norm": 20.25307846069336, "learning_rate": 1.3610622406639005e-05, "loss": 0.5763, "step": 19260 }, { "epoch": 15.984232365145228, "grad_norm": 69.53738403320312, "learning_rate": 1.3610290456431537e-05, "loss": 0.5436, "step": 19261 }, { "epoch": 15.9850622406639, "grad_norm": 58.13890075683594, "learning_rate": 1.3609958506224067e-05, "loss": 1.1548, "step": 19262 }, { "epoch": 15.985892116182573, "grad_norm": 45.83788299560547, "learning_rate": 1.36096265560166e-05, "loss": 1.0928, "step": 19263 }, { "epoch": 15.986721991701245, "grad_norm": 30.37890625, "learning_rate": 1.3609294605809128e-05, "loss": 0.3267, "step": 19264 }, { "epoch": 15.987551867219917, "grad_norm": 27.679889678955078, "learning_rate": 1.360896265560166e-05, "loss": 0.7915, "step": 19265 }, { "epoch": 15.98838174273859, "grad_norm": 43.612525939941406, "learning_rate": 1.3608630705394193e-05, "loss": 0.983, "step": 19266 }, { "epoch": 15.989211618257261, "grad_norm": 23.721450805664062, "learning_rate": 1.3608298755186725e-05, "loss": 0.4343, "step": 19267 }, { "epoch": 15.990041493775934, "grad_norm": 28.174028396606445, "learning_rate": 1.3607966804979253e-05, "loss": 0.4326, "step": 19268 }, { "epoch": 15.990871369294606, "grad_norm": 43.38072204589844, "learning_rate": 1.3607634854771785e-05, "loss": 0.9258, "step": 19269 }, { "epoch": 15.991701244813278, "grad_norm": 40.856136322021484, "learning_rate": 1.3607302904564318e-05, "loss": 0.9127, "step": 19270 }, { "epoch": 15.99253112033195, "grad_norm": 39.61643600463867, "learning_rate": 1.3606970954356848e-05, "loss": 0.5439, "step": 19271 }, { "epoch": 15.993360995850622, "grad_norm": 59.85895919799805, "learning_rate": 1.3606639004149378e-05, "loss": 0.8352, "step": 19272 }, { "epoch": 15.994190871369295, "grad_norm": 39.07185363769531, "learning_rate": 1.3606307053941909e-05, "loss": 0.8874, "step": 19273 }, { "epoch": 15.995020746887967, "grad_norm": 49.16483688354492, "learning_rate": 1.3605975103734441e-05, "loss": 0.9938, "step": 19274 }, { "epoch": 15.995850622406639, "grad_norm": 31.702482223510742, "learning_rate": 1.3605643153526973e-05, "loss": 0.8511, "step": 19275 }, { "epoch": 15.996680497925311, "grad_norm": 31.020610809326172, "learning_rate": 1.3605311203319502e-05, "loss": 0.7085, "step": 19276 }, { "epoch": 15.997510373443983, "grad_norm": 37.47185134887695, "learning_rate": 1.3604979253112034e-05, "loss": 0.7508, "step": 19277 }, { "epoch": 15.998340248962656, "grad_norm": 48.639610290527344, "learning_rate": 1.3604647302904566e-05, "loss": 1.1368, "step": 19278 }, { "epoch": 15.999170124481328, "grad_norm": 47.29324722290039, "learning_rate": 1.3604315352697098e-05, "loss": 1.0723, "step": 19279 }, { "epoch": 16.0, "grad_norm": 21.79613494873047, "learning_rate": 1.3603983402489627e-05, "loss": 0.6264, "step": 19280 }, { "epoch": 16.000829875518672, "grad_norm": 50.70699691772461, "learning_rate": 1.3603651452282159e-05, "loss": 0.5269, "step": 19281 }, { "epoch": 16.001659751037344, "grad_norm": 17.947378158569336, "learning_rate": 1.360331950207469e-05, "loss": 0.3383, "step": 19282 }, { "epoch": 16.002489626556017, "grad_norm": 77.5477066040039, "learning_rate": 1.3602987551867221e-05, "loss": 0.7385, "step": 19283 }, { "epoch": 16.00331950207469, "grad_norm": 37.0363655090332, "learning_rate": 1.3602655601659752e-05, "loss": 1.0956, "step": 19284 }, { "epoch": 16.00414937759336, "grad_norm": 32.1049690246582, "learning_rate": 1.3602323651452282e-05, "loss": 0.4199, "step": 19285 }, { "epoch": 16.004979253112033, "grad_norm": 22.293088912963867, "learning_rate": 1.3601991701244814e-05, "loss": 0.6364, "step": 19286 }, { "epoch": 16.005809128630705, "grad_norm": 24.9448299407959, "learning_rate": 1.3601659751037346e-05, "loss": 0.7842, "step": 19287 }, { "epoch": 16.006639004149378, "grad_norm": 34.38740158081055, "learning_rate": 1.3601327800829877e-05, "loss": 0.4743, "step": 19288 }, { "epoch": 16.00746887966805, "grad_norm": 30.117572784423828, "learning_rate": 1.3600995850622407e-05, "loss": 0.7528, "step": 19289 }, { "epoch": 16.008298755186722, "grad_norm": 25.240785598754883, "learning_rate": 1.360066390041494e-05, "loss": 0.5406, "step": 19290 }, { "epoch": 16.009128630705394, "grad_norm": 38.02204132080078, "learning_rate": 1.360033195020747e-05, "loss": 0.351, "step": 19291 }, { "epoch": 16.009958506224066, "grad_norm": 65.0567855834961, "learning_rate": 1.3600000000000002e-05, "loss": 0.846, "step": 19292 }, { "epoch": 16.01078838174274, "grad_norm": 38.80903625488281, "learning_rate": 1.3599668049792532e-05, "loss": 0.5752, "step": 19293 }, { "epoch": 16.01161825726141, "grad_norm": 33.970523834228516, "learning_rate": 1.3599336099585063e-05, "loss": 0.6004, "step": 19294 }, { "epoch": 16.012448132780083, "grad_norm": 18.154375076293945, "learning_rate": 1.3599004149377595e-05, "loss": 0.2738, "step": 19295 }, { "epoch": 16.013278008298755, "grad_norm": 39.702484130859375, "learning_rate": 1.3598672199170127e-05, "loss": 0.8831, "step": 19296 }, { "epoch": 16.014107883817427, "grad_norm": 35.106849670410156, "learning_rate": 1.3598340248962656e-05, "loss": 1.0585, "step": 19297 }, { "epoch": 16.0149377593361, "grad_norm": 19.86592674255371, "learning_rate": 1.3598008298755188e-05, "loss": 0.5377, "step": 19298 }, { "epoch": 16.01576763485477, "grad_norm": 33.80756378173828, "learning_rate": 1.359767634854772e-05, "loss": 0.662, "step": 19299 }, { "epoch": 16.016597510373444, "grad_norm": 18.652408599853516, "learning_rate": 1.359734439834025e-05, "loss": 0.3529, "step": 19300 }, { "epoch": 16.017427385892116, "grad_norm": 111.1132583618164, "learning_rate": 1.359701244813278e-05, "loss": 0.7243, "step": 19301 }, { "epoch": 16.01825726141079, "grad_norm": 26.107177734375, "learning_rate": 1.3596680497925311e-05, "loss": 0.5126, "step": 19302 }, { "epoch": 16.01908713692946, "grad_norm": 79.73033142089844, "learning_rate": 1.3596348547717843e-05, "loss": 1.2815, "step": 19303 }, { "epoch": 16.019917012448133, "grad_norm": 58.63920974731445, "learning_rate": 1.3596016597510375e-05, "loss": 1.0302, "step": 19304 }, { "epoch": 16.020746887966805, "grad_norm": 34.59926223754883, "learning_rate": 1.3595684647302904e-05, "loss": 0.5517, "step": 19305 }, { "epoch": 16.021576763485477, "grad_norm": 30.526325225830078, "learning_rate": 1.3595352697095436e-05, "loss": 0.4866, "step": 19306 }, { "epoch": 16.02240663900415, "grad_norm": 32.798240661621094, "learning_rate": 1.3595020746887968e-05, "loss": 0.4725, "step": 19307 }, { "epoch": 16.02323651452282, "grad_norm": 75.69757080078125, "learning_rate": 1.35946887966805e-05, "loss": 0.4746, "step": 19308 }, { "epoch": 16.024066390041494, "grad_norm": 169.68162536621094, "learning_rate": 1.3594356846473029e-05, "loss": 0.8017, "step": 19309 }, { "epoch": 16.024896265560166, "grad_norm": 89.23942565917969, "learning_rate": 1.3594024896265561e-05, "loss": 1.8315, "step": 19310 }, { "epoch": 16.025726141078838, "grad_norm": 19.281124114990234, "learning_rate": 1.3593692946058092e-05, "loss": 0.3426, "step": 19311 }, { "epoch": 16.02655601659751, "grad_norm": 44.13534164428711, "learning_rate": 1.3593360995850624e-05, "loss": 0.5711, "step": 19312 }, { "epoch": 16.027385892116182, "grad_norm": 70.8602066040039, "learning_rate": 1.3593029045643156e-05, "loss": 0.4787, "step": 19313 }, { "epoch": 16.028215767634855, "grad_norm": 66.71186065673828, "learning_rate": 1.3592697095435685e-05, "loss": 0.3314, "step": 19314 }, { "epoch": 16.029045643153527, "grad_norm": 17.893016815185547, "learning_rate": 1.3592365145228217e-05, "loss": 0.2317, "step": 19315 }, { "epoch": 16.0298755186722, "grad_norm": 27.63676643371582, "learning_rate": 1.3592033195020749e-05, "loss": 0.3928, "step": 19316 }, { "epoch": 16.03070539419087, "grad_norm": 21.532085418701172, "learning_rate": 1.3591701244813281e-05, "loss": 0.6591, "step": 19317 }, { "epoch": 16.031535269709543, "grad_norm": 19.774751663208008, "learning_rate": 1.359136929460581e-05, "loss": 0.3255, "step": 19318 }, { "epoch": 16.032365145228216, "grad_norm": 11.689875602722168, "learning_rate": 1.3591037344398342e-05, "loss": 0.2349, "step": 19319 }, { "epoch": 16.033195020746888, "grad_norm": 46.33852005004883, "learning_rate": 1.3590705394190872e-05, "loss": 1.0527, "step": 19320 }, { "epoch": 16.03402489626556, "grad_norm": 41.282012939453125, "learning_rate": 1.3590373443983404e-05, "loss": 0.6145, "step": 19321 }, { "epoch": 16.034854771784232, "grad_norm": 36.67019271850586, "learning_rate": 1.3590041493775935e-05, "loss": 0.722, "step": 19322 }, { "epoch": 16.035684647302904, "grad_norm": 40.536006927490234, "learning_rate": 1.3589709543568465e-05, "loss": 0.9442, "step": 19323 }, { "epoch": 16.036514522821577, "grad_norm": 89.79558563232422, "learning_rate": 1.3589377593360997e-05, "loss": 0.5776, "step": 19324 }, { "epoch": 16.03734439834025, "grad_norm": 33.63444137573242, "learning_rate": 1.358904564315353e-05, "loss": 0.7506, "step": 19325 }, { "epoch": 16.03817427385892, "grad_norm": 39.8585090637207, "learning_rate": 1.3588713692946058e-05, "loss": 0.5688, "step": 19326 }, { "epoch": 16.039004149377593, "grad_norm": 27.924291610717773, "learning_rate": 1.358838174273859e-05, "loss": 0.5144, "step": 19327 }, { "epoch": 16.039834024896265, "grad_norm": 51.38240051269531, "learning_rate": 1.3588049792531122e-05, "loss": 1.2676, "step": 19328 }, { "epoch": 16.040663900414938, "grad_norm": 62.91279983520508, "learning_rate": 1.3587717842323653e-05, "loss": 0.7759, "step": 19329 }, { "epoch": 16.04149377593361, "grad_norm": 100.52232360839844, "learning_rate": 1.3587385892116183e-05, "loss": 0.3603, "step": 19330 }, { "epoch": 16.042323651452282, "grad_norm": 33.25191116333008, "learning_rate": 1.3587053941908715e-05, "loss": 0.6478, "step": 19331 }, { "epoch": 16.043153526970954, "grad_norm": 53.903812408447266, "learning_rate": 1.3586721991701246e-05, "loss": 0.8694, "step": 19332 }, { "epoch": 16.043983402489626, "grad_norm": 35.61116027832031, "learning_rate": 1.3586390041493778e-05, "loss": 0.3299, "step": 19333 }, { "epoch": 16.0448132780083, "grad_norm": 51.24192810058594, "learning_rate": 1.3586058091286306e-05, "loss": 0.9677, "step": 19334 }, { "epoch": 16.04564315352697, "grad_norm": 69.76847839355469, "learning_rate": 1.3585726141078839e-05, "loss": 1.0947, "step": 19335 }, { "epoch": 16.046473029045643, "grad_norm": 47.38341522216797, "learning_rate": 1.358539419087137e-05, "loss": 1.2166, "step": 19336 }, { "epoch": 16.047302904564315, "grad_norm": 52.46830368041992, "learning_rate": 1.3585062240663903e-05, "loss": 0.7377, "step": 19337 }, { "epoch": 16.048132780082987, "grad_norm": 31.898412704467773, "learning_rate": 1.3584730290456431e-05, "loss": 0.5071, "step": 19338 }, { "epoch": 16.04896265560166, "grad_norm": 27.067710876464844, "learning_rate": 1.3584398340248964e-05, "loss": 0.5758, "step": 19339 }, { "epoch": 16.04979253112033, "grad_norm": 84.27674865722656, "learning_rate": 1.3584066390041496e-05, "loss": 0.9909, "step": 19340 }, { "epoch": 16.050622406639004, "grad_norm": 26.422618865966797, "learning_rate": 1.3583734439834026e-05, "loss": 0.426, "step": 19341 }, { "epoch": 16.051452282157676, "grad_norm": 30.091768264770508, "learning_rate": 1.3583402489626558e-05, "loss": 0.6878, "step": 19342 }, { "epoch": 16.05228215767635, "grad_norm": 65.11734771728516, "learning_rate": 1.3583070539419087e-05, "loss": 0.9995, "step": 19343 }, { "epoch": 16.05311203319502, "grad_norm": 14.891758918762207, "learning_rate": 1.3582738589211619e-05, "loss": 0.2728, "step": 19344 }, { "epoch": 16.053941908713693, "grad_norm": 34.84830856323242, "learning_rate": 1.3582406639004151e-05, "loss": 0.5785, "step": 19345 }, { "epoch": 16.054771784232365, "grad_norm": 27.63554573059082, "learning_rate": 1.3582074688796683e-05, "loss": 0.6214, "step": 19346 }, { "epoch": 16.055601659751037, "grad_norm": 25.602876663208008, "learning_rate": 1.3581742738589212e-05, "loss": 0.5244, "step": 19347 }, { "epoch": 16.05643153526971, "grad_norm": 34.51087951660156, "learning_rate": 1.3581410788381744e-05, "loss": 0.4619, "step": 19348 }, { "epoch": 16.05726141078838, "grad_norm": 68.79843139648438, "learning_rate": 1.3581078838174274e-05, "loss": 0.9016, "step": 19349 }, { "epoch": 16.058091286307054, "grad_norm": 26.101030349731445, "learning_rate": 1.3580746887966807e-05, "loss": 0.3685, "step": 19350 }, { "epoch": 16.058921161825726, "grad_norm": 34.58590316772461, "learning_rate": 1.3580414937759337e-05, "loss": 0.4359, "step": 19351 }, { "epoch": 16.059751037344398, "grad_norm": 46.23960494995117, "learning_rate": 1.3580082987551867e-05, "loss": 0.6172, "step": 19352 }, { "epoch": 16.06058091286307, "grad_norm": 126.3233642578125, "learning_rate": 1.35797510373444e-05, "loss": 0.8642, "step": 19353 }, { "epoch": 16.061410788381743, "grad_norm": 51.427337646484375, "learning_rate": 1.3579419087136932e-05, "loss": 0.9888, "step": 19354 }, { "epoch": 16.062240663900415, "grad_norm": 30.781400680541992, "learning_rate": 1.357908713692946e-05, "loss": 0.9528, "step": 19355 }, { "epoch": 16.063070539419087, "grad_norm": 54.679649353027344, "learning_rate": 1.3578755186721992e-05, "loss": 0.5901, "step": 19356 }, { "epoch": 16.06390041493776, "grad_norm": 18.47075843811035, "learning_rate": 1.3578423236514525e-05, "loss": 0.335, "step": 19357 }, { "epoch": 16.06473029045643, "grad_norm": 27.95116424560547, "learning_rate": 1.3578091286307055e-05, "loss": 0.775, "step": 19358 }, { "epoch": 16.065560165975104, "grad_norm": 52.644588470458984, "learning_rate": 1.3577759336099585e-05, "loss": 0.6136, "step": 19359 }, { "epoch": 16.066390041493776, "grad_norm": 28.775903701782227, "learning_rate": 1.3577427385892117e-05, "loss": 0.8211, "step": 19360 }, { "epoch": 16.067219917012448, "grad_norm": 33.300228118896484, "learning_rate": 1.3577095435684648e-05, "loss": 0.5701, "step": 19361 }, { "epoch": 16.06804979253112, "grad_norm": 82.09136962890625, "learning_rate": 1.357676348547718e-05, "loss": 0.485, "step": 19362 }, { "epoch": 16.068879668049792, "grad_norm": 120.13259887695312, "learning_rate": 1.357643153526971e-05, "loss": 0.8411, "step": 19363 }, { "epoch": 16.069709543568464, "grad_norm": 48.638214111328125, "learning_rate": 1.357609958506224e-05, "loss": 0.4555, "step": 19364 }, { "epoch": 16.070539419087137, "grad_norm": 25.512887954711914, "learning_rate": 1.3575767634854773e-05, "loss": 0.6225, "step": 19365 }, { "epoch": 16.07136929460581, "grad_norm": 26.881765365600586, "learning_rate": 1.3575435684647305e-05, "loss": 0.4974, "step": 19366 }, { "epoch": 16.07219917012448, "grad_norm": 16.922605514526367, "learning_rate": 1.3575103734439835e-05, "loss": 0.382, "step": 19367 }, { "epoch": 16.073029045643153, "grad_norm": 51.62948989868164, "learning_rate": 1.3574771784232366e-05, "loss": 0.7649, "step": 19368 }, { "epoch": 16.073858921161825, "grad_norm": 20.76728630065918, "learning_rate": 1.3574439834024898e-05, "loss": 0.4633, "step": 19369 }, { "epoch": 16.074688796680498, "grad_norm": 79.498046875, "learning_rate": 1.3574107883817428e-05, "loss": 0.3924, "step": 19370 }, { "epoch": 16.07551867219917, "grad_norm": 49.549922943115234, "learning_rate": 1.357377593360996e-05, "loss": 1.1403, "step": 19371 }, { "epoch": 16.076348547717842, "grad_norm": 27.26320457458496, "learning_rate": 1.357344398340249e-05, "loss": 0.5176, "step": 19372 }, { "epoch": 16.077178423236514, "grad_norm": 31.726844787597656, "learning_rate": 1.3573112033195021e-05, "loss": 0.5902, "step": 19373 }, { "epoch": 16.078008298755186, "grad_norm": 22.63402557373047, "learning_rate": 1.3572780082987553e-05, "loss": 0.5208, "step": 19374 }, { "epoch": 16.07883817427386, "grad_norm": 48.896400451660156, "learning_rate": 1.3572448132780086e-05, "loss": 0.7634, "step": 19375 }, { "epoch": 16.07966804979253, "grad_norm": 29.567556381225586, "learning_rate": 1.3572116182572614e-05, "loss": 0.7027, "step": 19376 }, { "epoch": 16.080497925311203, "grad_norm": 82.20735931396484, "learning_rate": 1.3571784232365146e-05, "loss": 0.8866, "step": 19377 }, { "epoch": 16.081327800829875, "grad_norm": 28.714387893676758, "learning_rate": 1.3571452282157678e-05, "loss": 0.4467, "step": 19378 }, { "epoch": 16.082157676348547, "grad_norm": 23.292470932006836, "learning_rate": 1.3571120331950209e-05, "loss": 0.4789, "step": 19379 }, { "epoch": 16.08298755186722, "grad_norm": 50.922847747802734, "learning_rate": 1.357078838174274e-05, "loss": 0.8286, "step": 19380 }, { "epoch": 16.083817427385892, "grad_norm": 38.39535903930664, "learning_rate": 1.357045643153527e-05, "loss": 0.6729, "step": 19381 }, { "epoch": 16.084647302904564, "grad_norm": 85.9173812866211, "learning_rate": 1.3570124481327802e-05, "loss": 0.8839, "step": 19382 }, { "epoch": 16.085477178423236, "grad_norm": 60.620426177978516, "learning_rate": 1.3569792531120334e-05, "loss": 1.2524, "step": 19383 }, { "epoch": 16.08630705394191, "grad_norm": 35.73016357421875, "learning_rate": 1.3569460580912863e-05, "loss": 0.4009, "step": 19384 }, { "epoch": 16.08713692946058, "grad_norm": 31.5747013092041, "learning_rate": 1.3569128630705395e-05, "loss": 0.5827, "step": 19385 }, { "epoch": 16.087966804979253, "grad_norm": 76.77299499511719, "learning_rate": 1.3568796680497927e-05, "loss": 1.2894, "step": 19386 }, { "epoch": 16.088796680497925, "grad_norm": 16.0306339263916, "learning_rate": 1.3568464730290459e-05, "loss": 0.251, "step": 19387 }, { "epoch": 16.089626556016597, "grad_norm": 53.137062072753906, "learning_rate": 1.3568132780082988e-05, "loss": 0.5023, "step": 19388 }, { "epoch": 16.09045643153527, "grad_norm": 26.214235305786133, "learning_rate": 1.356780082987552e-05, "loss": 0.8549, "step": 19389 }, { "epoch": 16.09128630705394, "grad_norm": 90.99063110351562, "learning_rate": 1.356746887966805e-05, "loss": 0.8736, "step": 19390 }, { "epoch": 16.092116182572614, "grad_norm": 30.35910415649414, "learning_rate": 1.3567136929460582e-05, "loss": 0.3651, "step": 19391 }, { "epoch": 16.092946058091286, "grad_norm": 43.738468170166016, "learning_rate": 1.3566804979253114e-05, "loss": 1.0673, "step": 19392 }, { "epoch": 16.093775933609958, "grad_norm": 34.238182067871094, "learning_rate": 1.3566473029045643e-05, "loss": 0.371, "step": 19393 }, { "epoch": 16.09460580912863, "grad_norm": 28.511199951171875, "learning_rate": 1.3566141078838175e-05, "loss": 0.3162, "step": 19394 }, { "epoch": 16.095435684647303, "grad_norm": 44.488861083984375, "learning_rate": 1.3565809128630707e-05, "loss": 1.0021, "step": 19395 }, { "epoch": 16.096265560165975, "grad_norm": 53.786277770996094, "learning_rate": 1.356547717842324e-05, "loss": 0.63, "step": 19396 }, { "epoch": 16.097095435684647, "grad_norm": 33.3431282043457, "learning_rate": 1.3565145228215768e-05, "loss": 0.5171, "step": 19397 }, { "epoch": 16.09792531120332, "grad_norm": 33.75510025024414, "learning_rate": 1.35648132780083e-05, "loss": 0.614, "step": 19398 }, { "epoch": 16.09875518672199, "grad_norm": 38.515953063964844, "learning_rate": 1.356448132780083e-05, "loss": 0.5054, "step": 19399 }, { "epoch": 16.099585062240664, "grad_norm": 36.00232696533203, "learning_rate": 1.3564149377593363e-05, "loss": 0.5953, "step": 19400 }, { "epoch": 16.100414937759336, "grad_norm": 55.60415267944336, "learning_rate": 1.3563817427385893e-05, "loss": 0.7221, "step": 19401 }, { "epoch": 16.101244813278008, "grad_norm": 63.02545928955078, "learning_rate": 1.3563485477178424e-05, "loss": 1.5476, "step": 19402 }, { "epoch": 16.10207468879668, "grad_norm": 20.62975311279297, "learning_rate": 1.3563153526970956e-05, "loss": 0.339, "step": 19403 }, { "epoch": 16.102904564315352, "grad_norm": 41.246883392333984, "learning_rate": 1.3562821576763488e-05, "loss": 0.7097, "step": 19404 }, { "epoch": 16.103734439834025, "grad_norm": 31.857013702392578, "learning_rate": 1.3562489626556017e-05, "loss": 0.5657, "step": 19405 }, { "epoch": 16.104564315352697, "grad_norm": 30.7247314453125, "learning_rate": 1.3562157676348549e-05, "loss": 0.5303, "step": 19406 }, { "epoch": 16.10539419087137, "grad_norm": 29.61648178100586, "learning_rate": 1.356182572614108e-05, "loss": 0.4924, "step": 19407 }, { "epoch": 16.10622406639004, "grad_norm": 41.76514434814453, "learning_rate": 1.3561493775933611e-05, "loss": 0.4682, "step": 19408 }, { "epoch": 16.107053941908713, "grad_norm": 37.77986145019531, "learning_rate": 1.3561161825726142e-05, "loss": 1.1214, "step": 19409 }, { "epoch": 16.107883817427386, "grad_norm": 38.17340850830078, "learning_rate": 1.3560829875518674e-05, "loss": 0.661, "step": 19410 }, { "epoch": 16.108713692946058, "grad_norm": 16.45071792602539, "learning_rate": 1.3560497925311204e-05, "loss": 0.2628, "step": 19411 }, { "epoch": 16.10954356846473, "grad_norm": 44.99611282348633, "learning_rate": 1.3560165975103736e-05, "loss": 0.3943, "step": 19412 }, { "epoch": 16.110373443983402, "grad_norm": 24.97607421875, "learning_rate": 1.3559834024896265e-05, "loss": 0.3452, "step": 19413 }, { "epoch": 16.111203319502074, "grad_norm": 40.77619552612305, "learning_rate": 1.3559502074688797e-05, "loss": 0.9775, "step": 19414 }, { "epoch": 16.112033195020746, "grad_norm": 40.09857177734375, "learning_rate": 1.355917012448133e-05, "loss": 1.0021, "step": 19415 }, { "epoch": 16.11286307053942, "grad_norm": 23.383039474487305, "learning_rate": 1.3558838174273861e-05, "loss": 0.4136, "step": 19416 }, { "epoch": 16.11369294605809, "grad_norm": 34.76485061645508, "learning_rate": 1.355850622406639e-05, "loss": 0.5761, "step": 19417 }, { "epoch": 16.114522821576763, "grad_norm": 19.911893844604492, "learning_rate": 1.3558174273858922e-05, "loss": 0.2403, "step": 19418 }, { "epoch": 16.115352697095435, "grad_norm": 33.1035041809082, "learning_rate": 1.3557842323651453e-05, "loss": 0.7084, "step": 19419 }, { "epoch": 16.116182572614107, "grad_norm": 31.497486114501953, "learning_rate": 1.3557510373443985e-05, "loss": 0.8194, "step": 19420 }, { "epoch": 16.11701244813278, "grad_norm": 29.247032165527344, "learning_rate": 1.3557178423236517e-05, "loss": 0.7794, "step": 19421 }, { "epoch": 16.117842323651452, "grad_norm": 27.29798126220703, "learning_rate": 1.3556846473029045e-05, "loss": 0.5914, "step": 19422 }, { "epoch": 16.118672199170124, "grad_norm": 26.10812759399414, "learning_rate": 1.3556514522821578e-05, "loss": 0.7324, "step": 19423 }, { "epoch": 16.119502074688796, "grad_norm": 40.10273361206055, "learning_rate": 1.355618257261411e-05, "loss": 0.5669, "step": 19424 }, { "epoch": 16.12033195020747, "grad_norm": 21.4647274017334, "learning_rate": 1.3555850622406642e-05, "loss": 0.362, "step": 19425 }, { "epoch": 16.12116182572614, "grad_norm": 60.741031646728516, "learning_rate": 1.355551867219917e-05, "loss": 1.3095, "step": 19426 }, { "epoch": 16.121991701244813, "grad_norm": 25.528778076171875, "learning_rate": 1.3555186721991703e-05, "loss": 0.6033, "step": 19427 }, { "epoch": 16.122821576763485, "grad_norm": 45.64008712768555, "learning_rate": 1.3554854771784233e-05, "loss": 0.9372, "step": 19428 }, { "epoch": 16.123651452282157, "grad_norm": 90.24552154541016, "learning_rate": 1.3554522821576765e-05, "loss": 0.9089, "step": 19429 }, { "epoch": 16.12448132780083, "grad_norm": 26.931406021118164, "learning_rate": 1.3554190871369296e-05, "loss": 0.5804, "step": 19430 }, { "epoch": 16.1253112033195, "grad_norm": 150.9734649658203, "learning_rate": 1.3553858921161826e-05, "loss": 0.5502, "step": 19431 }, { "epoch": 16.126141078838174, "grad_norm": 46.81064224243164, "learning_rate": 1.3553526970954358e-05, "loss": 0.5434, "step": 19432 }, { "epoch": 16.126970954356846, "grad_norm": 69.77497100830078, "learning_rate": 1.355319502074689e-05, "loss": 0.7358, "step": 19433 }, { "epoch": 16.127800829875518, "grad_norm": 31.199708938598633, "learning_rate": 1.3552863070539419e-05, "loss": 0.5625, "step": 19434 }, { "epoch": 16.12863070539419, "grad_norm": 43.749298095703125, "learning_rate": 1.3552531120331951e-05, "loss": 0.814, "step": 19435 }, { "epoch": 16.129460580912863, "grad_norm": 49.69740676879883, "learning_rate": 1.3552199170124483e-05, "loss": 0.8851, "step": 19436 }, { "epoch": 16.130290456431535, "grad_norm": 29.063817977905273, "learning_rate": 1.3551867219917014e-05, "loss": 0.5384, "step": 19437 }, { "epoch": 16.131120331950207, "grad_norm": 46.54408645629883, "learning_rate": 1.3551535269709544e-05, "loss": 0.6088, "step": 19438 }, { "epoch": 16.13195020746888, "grad_norm": 23.78046226501465, "learning_rate": 1.3551203319502076e-05, "loss": 0.2876, "step": 19439 }, { "epoch": 16.13278008298755, "grad_norm": 42.95933151245117, "learning_rate": 1.3550871369294606e-05, "loss": 0.6324, "step": 19440 }, { "epoch": 16.133609958506224, "grad_norm": 21.042179107666016, "learning_rate": 1.3550539419087139e-05, "loss": 0.3998, "step": 19441 }, { "epoch": 16.134439834024896, "grad_norm": 30.413068771362305, "learning_rate": 1.3550207468879667e-05, "loss": 0.558, "step": 19442 }, { "epoch": 16.135269709543568, "grad_norm": 30.954687118530273, "learning_rate": 1.35498755186722e-05, "loss": 0.5017, "step": 19443 }, { "epoch": 16.13609958506224, "grad_norm": 58.98762130737305, "learning_rate": 1.3549543568464732e-05, "loss": 0.8186, "step": 19444 }, { "epoch": 16.136929460580912, "grad_norm": 35.18097686767578, "learning_rate": 1.3549211618257264e-05, "loss": 0.4566, "step": 19445 }, { "epoch": 16.137759336099585, "grad_norm": 32.00752639770508, "learning_rate": 1.3548879668049794e-05, "loss": 0.5386, "step": 19446 }, { "epoch": 16.138589211618257, "grad_norm": 47.13591003417969, "learning_rate": 1.3548547717842324e-05, "loss": 1.2342, "step": 19447 }, { "epoch": 16.13941908713693, "grad_norm": 60.18550491333008, "learning_rate": 1.3548215767634857e-05, "loss": 1.1763, "step": 19448 }, { "epoch": 16.1402489626556, "grad_norm": 47.491539001464844, "learning_rate": 1.3547883817427387e-05, "loss": 0.9589, "step": 19449 }, { "epoch": 16.141078838174273, "grad_norm": 61.91720962524414, "learning_rate": 1.3547551867219919e-05, "loss": 1.0701, "step": 19450 }, { "epoch": 16.141908713692946, "grad_norm": 44.417091369628906, "learning_rate": 1.3547219917012448e-05, "loss": 0.4484, "step": 19451 }, { "epoch": 16.142738589211618, "grad_norm": 159.5121307373047, "learning_rate": 1.354688796680498e-05, "loss": 0.618, "step": 19452 }, { "epoch": 16.14356846473029, "grad_norm": 66.49415588378906, "learning_rate": 1.3546556016597512e-05, "loss": 0.9473, "step": 19453 }, { "epoch": 16.144398340248962, "grad_norm": 48.51332092285156, "learning_rate": 1.3546224066390044e-05, "loss": 0.8941, "step": 19454 }, { "epoch": 16.145228215767634, "grad_norm": 30.210023880004883, "learning_rate": 1.3545892116182573e-05, "loss": 0.5726, "step": 19455 }, { "epoch": 16.146058091286307, "grad_norm": 42.07880783081055, "learning_rate": 1.3545560165975105e-05, "loss": 0.5245, "step": 19456 }, { "epoch": 16.14688796680498, "grad_norm": 32.653831481933594, "learning_rate": 1.3545228215767637e-05, "loss": 0.5154, "step": 19457 }, { "epoch": 16.14771784232365, "grad_norm": 37.00153732299805, "learning_rate": 1.3544896265560167e-05, "loss": 0.5151, "step": 19458 }, { "epoch": 16.148547717842323, "grad_norm": 65.68439483642578, "learning_rate": 1.3544564315352698e-05, "loss": 0.8038, "step": 19459 }, { "epoch": 16.149377593360995, "grad_norm": 62.66651153564453, "learning_rate": 1.3544232365145228e-05, "loss": 0.7929, "step": 19460 }, { "epoch": 16.150207468879668, "grad_norm": 66.36579895019531, "learning_rate": 1.354390041493776e-05, "loss": 0.9449, "step": 19461 }, { "epoch": 16.15103734439834, "grad_norm": 22.48912811279297, "learning_rate": 1.3543568464730293e-05, "loss": 0.2199, "step": 19462 }, { "epoch": 16.151867219917012, "grad_norm": 38.73810958862305, "learning_rate": 1.3543236514522821e-05, "loss": 0.947, "step": 19463 }, { "epoch": 16.152697095435684, "grad_norm": 31.29863166809082, "learning_rate": 1.3542904564315353e-05, "loss": 0.7081, "step": 19464 }, { "epoch": 16.153526970954356, "grad_norm": 72.88130950927734, "learning_rate": 1.3542572614107885e-05, "loss": 1.2593, "step": 19465 }, { "epoch": 16.15435684647303, "grad_norm": 44.50312805175781, "learning_rate": 1.3542240663900416e-05, "loss": 0.9244, "step": 19466 }, { "epoch": 16.1551867219917, "grad_norm": 86.44933319091797, "learning_rate": 1.3541908713692946e-05, "loss": 0.4887, "step": 19467 }, { "epoch": 16.156016597510373, "grad_norm": 23.744098663330078, "learning_rate": 1.3541576763485478e-05, "loss": 0.7247, "step": 19468 }, { "epoch": 16.156846473029045, "grad_norm": 53.54940414428711, "learning_rate": 1.3541244813278009e-05, "loss": 1.4195, "step": 19469 }, { "epoch": 16.157676348547717, "grad_norm": 21.758155822753906, "learning_rate": 1.3540912863070541e-05, "loss": 0.5745, "step": 19470 }, { "epoch": 16.15850622406639, "grad_norm": 52.444664001464844, "learning_rate": 1.3540580912863073e-05, "loss": 0.9661, "step": 19471 }, { "epoch": 16.15933609958506, "grad_norm": 30.32815933227539, "learning_rate": 1.3540248962655602e-05, "loss": 1.1065, "step": 19472 }, { "epoch": 16.160165975103734, "grad_norm": 25.22618293762207, "learning_rate": 1.3539917012448134e-05, "loss": 0.3405, "step": 19473 }, { "epoch": 16.160995850622406, "grad_norm": 34.405216217041016, "learning_rate": 1.3539585062240666e-05, "loss": 0.8066, "step": 19474 }, { "epoch": 16.16182572614108, "grad_norm": 23.73233985900879, "learning_rate": 1.3539253112033196e-05, "loss": 0.3558, "step": 19475 }, { "epoch": 16.16265560165975, "grad_norm": 16.19456672668457, "learning_rate": 1.3538921161825727e-05, "loss": 0.3582, "step": 19476 }, { "epoch": 16.163485477178423, "grad_norm": 31.58771324157715, "learning_rate": 1.3538589211618259e-05, "loss": 0.7171, "step": 19477 }, { "epoch": 16.164315352697095, "grad_norm": 42.59714126586914, "learning_rate": 1.353825726141079e-05, "loss": 0.3674, "step": 19478 }, { "epoch": 16.165145228215767, "grad_norm": 30.859434127807617, "learning_rate": 1.3537925311203321e-05, "loss": 0.734, "step": 19479 }, { "epoch": 16.16597510373444, "grad_norm": 19.030315399169922, "learning_rate": 1.3537593360995852e-05, "loss": 0.474, "step": 19480 }, { "epoch": 16.16680497925311, "grad_norm": 35.385948181152344, "learning_rate": 1.3537261410788382e-05, "loss": 0.6183, "step": 19481 }, { "epoch": 16.167634854771784, "grad_norm": 25.84025001525879, "learning_rate": 1.3536929460580914e-05, "loss": 0.5227, "step": 19482 }, { "epoch": 16.168464730290456, "grad_norm": 49.30347442626953, "learning_rate": 1.3536597510373446e-05, "loss": 0.4437, "step": 19483 }, { "epoch": 16.169294605809128, "grad_norm": 35.73270034790039, "learning_rate": 1.3536265560165975e-05, "loss": 0.7223, "step": 19484 }, { "epoch": 16.1701244813278, "grad_norm": 63.02073287963867, "learning_rate": 1.3535933609958507e-05, "loss": 1.042, "step": 19485 }, { "epoch": 16.170954356846472, "grad_norm": 34.7027587890625, "learning_rate": 1.353560165975104e-05, "loss": 0.6676, "step": 19486 }, { "epoch": 16.171784232365145, "grad_norm": 28.137887954711914, "learning_rate": 1.353526970954357e-05, "loss": 0.6443, "step": 19487 }, { "epoch": 16.172614107883817, "grad_norm": 22.674394607543945, "learning_rate": 1.35349377593361e-05, "loss": 0.2932, "step": 19488 }, { "epoch": 16.17344398340249, "grad_norm": 45.12051773071289, "learning_rate": 1.353460580912863e-05, "loss": 0.5294, "step": 19489 }, { "epoch": 16.17427385892116, "grad_norm": 50.27847671508789, "learning_rate": 1.3534273858921163e-05, "loss": 0.5776, "step": 19490 }, { "epoch": 16.175103734439833, "grad_norm": 27.12799072265625, "learning_rate": 1.3533941908713695e-05, "loss": 0.5233, "step": 19491 }, { "epoch": 16.175933609958506, "grad_norm": 21.318439483642578, "learning_rate": 1.3533609958506224e-05, "loss": 0.4391, "step": 19492 }, { "epoch": 16.176763485477178, "grad_norm": 29.652223587036133, "learning_rate": 1.3533278008298756e-05, "loss": 0.4317, "step": 19493 }, { "epoch": 16.17759336099585, "grad_norm": 34.2003059387207, "learning_rate": 1.3532946058091288e-05, "loss": 0.8336, "step": 19494 }, { "epoch": 16.178423236514522, "grad_norm": 28.181716918945312, "learning_rate": 1.353261410788382e-05, "loss": 0.5138, "step": 19495 }, { "epoch": 16.179253112033194, "grad_norm": 47.26136016845703, "learning_rate": 1.3532282157676349e-05, "loss": 0.693, "step": 19496 }, { "epoch": 16.180082987551867, "grad_norm": 20.63214874267578, "learning_rate": 1.353195020746888e-05, "loss": 0.4551, "step": 19497 }, { "epoch": 16.18091286307054, "grad_norm": 161.59707641601562, "learning_rate": 1.3531618257261411e-05, "loss": 0.6439, "step": 19498 }, { "epoch": 16.18174273858921, "grad_norm": 24.96455955505371, "learning_rate": 1.3531286307053943e-05, "loss": 0.7313, "step": 19499 }, { "epoch": 16.182572614107883, "grad_norm": 38.46547317504883, "learning_rate": 1.3530954356846475e-05, "loss": 0.841, "step": 19500 }, { "epoch": 16.183402489626555, "grad_norm": 53.785972595214844, "learning_rate": 1.3530622406639004e-05, "loss": 0.4954, "step": 19501 }, { "epoch": 16.184232365145228, "grad_norm": 54.19425964355469, "learning_rate": 1.3530290456431536e-05, "loss": 0.6848, "step": 19502 }, { "epoch": 16.1850622406639, "grad_norm": 62.210121154785156, "learning_rate": 1.3529958506224068e-05, "loss": 0.6462, "step": 19503 }, { "epoch": 16.185892116182572, "grad_norm": 20.051647186279297, "learning_rate": 1.35296265560166e-05, "loss": 0.2856, "step": 19504 }, { "epoch": 16.186721991701244, "grad_norm": 77.48616027832031, "learning_rate": 1.3529294605809129e-05, "loss": 1.4782, "step": 19505 }, { "epoch": 16.187551867219916, "grad_norm": 12.510072708129883, "learning_rate": 1.3528962655601661e-05, "loss": 0.2966, "step": 19506 }, { "epoch": 16.18838174273859, "grad_norm": 29.965843200683594, "learning_rate": 1.3528630705394192e-05, "loss": 0.5216, "step": 19507 }, { "epoch": 16.18921161825726, "grad_norm": 23.334835052490234, "learning_rate": 1.3528298755186724e-05, "loss": 0.5216, "step": 19508 }, { "epoch": 16.190041493775933, "grad_norm": 22.335739135742188, "learning_rate": 1.3527966804979254e-05, "loss": 0.4644, "step": 19509 }, { "epoch": 16.190871369294605, "grad_norm": 24.198244094848633, "learning_rate": 1.3527634854771785e-05, "loss": 0.3743, "step": 19510 }, { "epoch": 16.191701244813277, "grad_norm": 22.169179916381836, "learning_rate": 1.3527302904564317e-05, "loss": 0.3325, "step": 19511 }, { "epoch": 16.19253112033195, "grad_norm": 36.75680923461914, "learning_rate": 1.3526970954356849e-05, "loss": 0.6427, "step": 19512 }, { "epoch": 16.19336099585062, "grad_norm": 35.284366607666016, "learning_rate": 1.3526639004149377e-05, "loss": 0.5757, "step": 19513 }, { "epoch": 16.194190871369294, "grad_norm": 51.58815002441406, "learning_rate": 1.352630705394191e-05, "loss": 1.41, "step": 19514 }, { "epoch": 16.195020746887966, "grad_norm": 39.813594818115234, "learning_rate": 1.3525975103734442e-05, "loss": 0.5982, "step": 19515 }, { "epoch": 16.19585062240664, "grad_norm": 64.769287109375, "learning_rate": 1.3525643153526972e-05, "loss": 0.9226, "step": 19516 }, { "epoch": 16.19668049792531, "grad_norm": 122.61152648925781, "learning_rate": 1.3525311203319503e-05, "loss": 0.5147, "step": 19517 }, { "epoch": 16.197510373443983, "grad_norm": 52.16667556762695, "learning_rate": 1.3524979253112035e-05, "loss": 0.9302, "step": 19518 }, { "epoch": 16.198340248962655, "grad_norm": 76.79842376708984, "learning_rate": 1.3524647302904565e-05, "loss": 0.9853, "step": 19519 }, { "epoch": 16.199170124481327, "grad_norm": 52.42169952392578, "learning_rate": 1.3524315352697097e-05, "loss": 1.01, "step": 19520 }, { "epoch": 16.2, "grad_norm": 33.971717834472656, "learning_rate": 1.3523983402489626e-05, "loss": 0.7048, "step": 19521 }, { "epoch": 16.20082987551867, "grad_norm": 57.381587982177734, "learning_rate": 1.3523651452282158e-05, "loss": 0.9566, "step": 19522 }, { "epoch": 16.201659751037344, "grad_norm": 14.348437309265137, "learning_rate": 1.352331950207469e-05, "loss": 0.3655, "step": 19523 }, { "epoch": 16.202489626556016, "grad_norm": 35.129276275634766, "learning_rate": 1.3522987551867222e-05, "loss": 0.5774, "step": 19524 }, { "epoch": 16.203319502074688, "grad_norm": 32.223167419433594, "learning_rate": 1.3522655601659753e-05, "loss": 0.3976, "step": 19525 }, { "epoch": 16.20414937759336, "grad_norm": 26.55182647705078, "learning_rate": 1.3522323651452283e-05, "loss": 0.5335, "step": 19526 }, { "epoch": 16.204979253112032, "grad_norm": 37.64799880981445, "learning_rate": 1.3521991701244815e-05, "loss": 0.7005, "step": 19527 }, { "epoch": 16.205809128630705, "grad_norm": 57.957340240478516, "learning_rate": 1.3521659751037346e-05, "loss": 0.7937, "step": 19528 }, { "epoch": 16.206639004149377, "grad_norm": 36.78163146972656, "learning_rate": 1.3521327800829878e-05, "loss": 0.663, "step": 19529 }, { "epoch": 16.20746887966805, "grad_norm": 21.135080337524414, "learning_rate": 1.3520995850622406e-05, "loss": 0.3413, "step": 19530 }, { "epoch": 16.20829875518672, "grad_norm": 50.792911529541016, "learning_rate": 1.3520663900414938e-05, "loss": 0.8662, "step": 19531 }, { "epoch": 16.209128630705393, "grad_norm": 36.89656448364258, "learning_rate": 1.352033195020747e-05, "loss": 0.6556, "step": 19532 }, { "epoch": 16.209958506224066, "grad_norm": 64.42772674560547, "learning_rate": 1.3520000000000003e-05, "loss": 0.8794, "step": 19533 }, { "epoch": 16.210788381742738, "grad_norm": 26.41282081604004, "learning_rate": 1.3519668049792531e-05, "loss": 0.5056, "step": 19534 }, { "epoch": 16.21161825726141, "grad_norm": 32.77997970581055, "learning_rate": 1.3519336099585064e-05, "loss": 0.7968, "step": 19535 }, { "epoch": 16.212448132780082, "grad_norm": 31.755455017089844, "learning_rate": 1.3519004149377594e-05, "loss": 0.5356, "step": 19536 }, { "epoch": 16.213278008298754, "grad_norm": 38.44672775268555, "learning_rate": 1.3518672199170126e-05, "loss": 0.5814, "step": 19537 }, { "epoch": 16.214107883817427, "grad_norm": 30.18851089477539, "learning_rate": 1.3518340248962656e-05, "loss": 0.4839, "step": 19538 }, { "epoch": 16.2149377593361, "grad_norm": 13.435155868530273, "learning_rate": 1.3518008298755187e-05, "loss": 0.2493, "step": 19539 }, { "epoch": 16.21576763485477, "grad_norm": 58.256492614746094, "learning_rate": 1.3517676348547719e-05, "loss": 0.9457, "step": 19540 }, { "epoch": 16.216597510373443, "grad_norm": 50.93056106567383, "learning_rate": 1.3517344398340251e-05, "loss": 0.9541, "step": 19541 }, { "epoch": 16.217427385892115, "grad_norm": 31.424631118774414, "learning_rate": 1.351701244813278e-05, "loss": 0.8124, "step": 19542 }, { "epoch": 16.218257261410788, "grad_norm": 40.12428665161133, "learning_rate": 1.3516680497925312e-05, "loss": 0.9411, "step": 19543 }, { "epoch": 16.21908713692946, "grad_norm": 65.227783203125, "learning_rate": 1.3516348547717844e-05, "loss": 0.9784, "step": 19544 }, { "epoch": 16.219917012448132, "grad_norm": 57.36344909667969, "learning_rate": 1.3516016597510374e-05, "loss": 0.647, "step": 19545 }, { "epoch": 16.220746887966804, "grad_norm": 30.26966667175293, "learning_rate": 1.3515684647302905e-05, "loss": 0.5092, "step": 19546 }, { "epoch": 16.221576763485476, "grad_norm": 38.10757827758789, "learning_rate": 1.3515352697095437e-05, "loss": 1.0794, "step": 19547 }, { "epoch": 16.22240663900415, "grad_norm": 43.6043701171875, "learning_rate": 1.3515020746887967e-05, "loss": 0.4768, "step": 19548 }, { "epoch": 16.22323651452282, "grad_norm": 79.50538635253906, "learning_rate": 1.35146887966805e-05, "loss": 0.2861, "step": 19549 }, { "epoch": 16.224066390041493, "grad_norm": 90.69949340820312, "learning_rate": 1.3514356846473032e-05, "loss": 0.3815, "step": 19550 }, { "epoch": 16.224896265560165, "grad_norm": 26.302444458007812, "learning_rate": 1.351402489626556e-05, "loss": 0.7464, "step": 19551 }, { "epoch": 16.225726141078837, "grad_norm": 73.32432556152344, "learning_rate": 1.3513692946058092e-05, "loss": 0.5299, "step": 19552 }, { "epoch": 16.22655601659751, "grad_norm": 50.03292465209961, "learning_rate": 1.3513360995850625e-05, "loss": 0.9268, "step": 19553 }, { "epoch": 16.22738589211618, "grad_norm": 51.01246643066406, "learning_rate": 1.3513029045643155e-05, "loss": 0.4978, "step": 19554 }, { "epoch": 16.228215767634854, "grad_norm": 27.045169830322266, "learning_rate": 1.3512697095435685e-05, "loss": 0.5503, "step": 19555 }, { "epoch": 16.229045643153526, "grad_norm": 41.53741455078125, "learning_rate": 1.3512365145228217e-05, "loss": 0.8365, "step": 19556 }, { "epoch": 16.2298755186722, "grad_norm": 31.99352264404297, "learning_rate": 1.3512033195020748e-05, "loss": 0.6386, "step": 19557 }, { "epoch": 16.23070539419087, "grad_norm": 44.24583435058594, "learning_rate": 1.351170124481328e-05, "loss": 0.5534, "step": 19558 }, { "epoch": 16.231535269709543, "grad_norm": 59.681396484375, "learning_rate": 1.3511369294605809e-05, "loss": 0.5351, "step": 19559 }, { "epoch": 16.232365145228215, "grad_norm": 33.28873825073242, "learning_rate": 1.351103734439834e-05, "loss": 0.6114, "step": 19560 }, { "epoch": 16.233195020746887, "grad_norm": 151.39830017089844, "learning_rate": 1.3510705394190873e-05, "loss": 0.9808, "step": 19561 }, { "epoch": 16.23402489626556, "grad_norm": 38.93743896484375, "learning_rate": 1.3510373443983405e-05, "loss": 0.6511, "step": 19562 }, { "epoch": 16.23485477178423, "grad_norm": 68.71793365478516, "learning_rate": 1.3510041493775934e-05, "loss": 0.8853, "step": 19563 }, { "epoch": 16.235684647302904, "grad_norm": 23.362905502319336, "learning_rate": 1.3509709543568466e-05, "loss": 0.3835, "step": 19564 }, { "epoch": 16.236514522821576, "grad_norm": 54.23506164550781, "learning_rate": 1.3509377593360998e-05, "loss": 0.6913, "step": 19565 }, { "epoch": 16.237344398340248, "grad_norm": 41.95052719116211, "learning_rate": 1.3509045643153528e-05, "loss": 0.647, "step": 19566 }, { "epoch": 16.23817427385892, "grad_norm": 52.01785659790039, "learning_rate": 1.3508713692946059e-05, "loss": 0.7495, "step": 19567 }, { "epoch": 16.239004149377593, "grad_norm": 41.81587219238281, "learning_rate": 1.350838174273859e-05, "loss": 0.8293, "step": 19568 }, { "epoch": 16.239834024896265, "grad_norm": 57.9051399230957, "learning_rate": 1.3508049792531121e-05, "loss": 0.72, "step": 19569 }, { "epoch": 16.240663900414937, "grad_norm": 42.74135971069336, "learning_rate": 1.3507717842323653e-05, "loss": 0.7182, "step": 19570 }, { "epoch": 16.24149377593361, "grad_norm": 29.956775665283203, "learning_rate": 1.3507385892116182e-05, "loss": 0.654, "step": 19571 }, { "epoch": 16.24232365145228, "grad_norm": 36.648929595947266, "learning_rate": 1.3507053941908714e-05, "loss": 1.1363, "step": 19572 }, { "epoch": 16.243153526970953, "grad_norm": 21.045284271240234, "learning_rate": 1.3506721991701246e-05, "loss": 0.3155, "step": 19573 }, { "epoch": 16.243983402489626, "grad_norm": 124.4712142944336, "learning_rate": 1.3506390041493778e-05, "loss": 0.6749, "step": 19574 }, { "epoch": 16.244813278008298, "grad_norm": 43.25661087036133, "learning_rate": 1.3506058091286307e-05, "loss": 0.9853, "step": 19575 }, { "epoch": 16.24564315352697, "grad_norm": 33.69390106201172, "learning_rate": 1.350572614107884e-05, "loss": 0.8254, "step": 19576 }, { "epoch": 16.246473029045642, "grad_norm": 32.47958755493164, "learning_rate": 1.350539419087137e-05, "loss": 0.971, "step": 19577 }, { "epoch": 16.247302904564314, "grad_norm": 56.79735565185547, "learning_rate": 1.3505062240663902e-05, "loss": 1.0756, "step": 19578 }, { "epoch": 16.248132780082987, "grad_norm": 35.2561149597168, "learning_rate": 1.3504730290456434e-05, "loss": 0.5343, "step": 19579 }, { "epoch": 16.24896265560166, "grad_norm": 20.668455123901367, "learning_rate": 1.3504398340248963e-05, "loss": 0.3648, "step": 19580 }, { "epoch": 16.24979253112033, "grad_norm": 56.858707427978516, "learning_rate": 1.3504066390041495e-05, "loss": 1.0832, "step": 19581 }, { "epoch": 16.250622406639003, "grad_norm": 50.6656494140625, "learning_rate": 1.3503734439834027e-05, "loss": 0.5164, "step": 19582 }, { "epoch": 16.251452282157675, "grad_norm": 75.30613708496094, "learning_rate": 1.3503402489626557e-05, "loss": 0.6556, "step": 19583 }, { "epoch": 16.252282157676348, "grad_norm": 19.814489364624023, "learning_rate": 1.3503070539419088e-05, "loss": 0.3419, "step": 19584 }, { "epoch": 16.25311203319502, "grad_norm": 27.096435546875, "learning_rate": 1.350273858921162e-05, "loss": 0.6724, "step": 19585 }, { "epoch": 16.253941908713692, "grad_norm": 32.422489166259766, "learning_rate": 1.350240663900415e-05, "loss": 0.6337, "step": 19586 }, { "epoch": 16.254771784232364, "grad_norm": 13.139772415161133, "learning_rate": 1.3502074688796682e-05, "loss": 0.3094, "step": 19587 }, { "epoch": 16.255601659751036, "grad_norm": 100.32685089111328, "learning_rate": 1.3501742738589213e-05, "loss": 0.6733, "step": 19588 }, { "epoch": 16.25643153526971, "grad_norm": 37.50799560546875, "learning_rate": 1.3501410788381743e-05, "loss": 0.8547, "step": 19589 }, { "epoch": 16.25726141078838, "grad_norm": 35.120121002197266, "learning_rate": 1.3501078838174275e-05, "loss": 0.7155, "step": 19590 }, { "epoch": 16.258091286307053, "grad_norm": 37.36592102050781, "learning_rate": 1.3500746887966807e-05, "loss": 0.543, "step": 19591 }, { "epoch": 16.258921161825725, "grad_norm": 35.65935516357422, "learning_rate": 1.3500414937759336e-05, "loss": 1.5277, "step": 19592 }, { "epoch": 16.259751037344397, "grad_norm": 28.345062255859375, "learning_rate": 1.3500082987551868e-05, "loss": 0.4564, "step": 19593 }, { "epoch": 16.26058091286307, "grad_norm": 106.33439636230469, "learning_rate": 1.34997510373444e-05, "loss": 0.6605, "step": 19594 }, { "epoch": 16.261410788381742, "grad_norm": 27.989789962768555, "learning_rate": 1.349941908713693e-05, "loss": 0.2093, "step": 19595 }, { "epoch": 16.262240663900414, "grad_norm": 54.1489372253418, "learning_rate": 1.3499087136929461e-05, "loss": 1.1483, "step": 19596 }, { "epoch": 16.263070539419086, "grad_norm": 54.48630905151367, "learning_rate": 1.3498755186721993e-05, "loss": 0.5554, "step": 19597 }, { "epoch": 16.26390041493776, "grad_norm": 36.71306610107422, "learning_rate": 1.3498423236514524e-05, "loss": 0.7497, "step": 19598 }, { "epoch": 16.26473029045643, "grad_norm": 59.96095657348633, "learning_rate": 1.3498091286307056e-05, "loss": 0.8699, "step": 19599 }, { "epoch": 16.265560165975103, "grad_norm": 83.34194946289062, "learning_rate": 1.3497759336099584e-05, "loss": 0.9434, "step": 19600 }, { "epoch": 16.266390041493775, "grad_norm": 13.630558013916016, "learning_rate": 1.3497427385892117e-05, "loss": 0.2469, "step": 19601 }, { "epoch": 16.267219917012447, "grad_norm": 28.573259353637695, "learning_rate": 1.3497095435684649e-05, "loss": 0.3615, "step": 19602 }, { "epoch": 16.26804979253112, "grad_norm": 21.002737045288086, "learning_rate": 1.349676348547718e-05, "loss": 0.4089, "step": 19603 }, { "epoch": 16.26887966804979, "grad_norm": 37.6177978515625, "learning_rate": 1.3496431535269711e-05, "loss": 1.1642, "step": 19604 }, { "epoch": 16.269709543568464, "grad_norm": 26.181915283203125, "learning_rate": 1.3496099585062242e-05, "loss": 0.3191, "step": 19605 }, { "epoch": 16.270539419087136, "grad_norm": 72.66588592529297, "learning_rate": 1.3495767634854772e-05, "loss": 1.4432, "step": 19606 }, { "epoch": 16.271369294605808, "grad_norm": 35.35585403442383, "learning_rate": 1.3495435684647304e-05, "loss": 0.486, "step": 19607 }, { "epoch": 16.27219917012448, "grad_norm": 30.989774703979492, "learning_rate": 1.3495103734439836e-05, "loss": 0.4635, "step": 19608 }, { "epoch": 16.273029045643153, "grad_norm": 37.74027633666992, "learning_rate": 1.3494771784232365e-05, "loss": 0.5583, "step": 19609 }, { "epoch": 16.273858921161825, "grad_norm": 37.79054260253906, "learning_rate": 1.3494439834024897e-05, "loss": 0.4991, "step": 19610 }, { "epoch": 16.274688796680497, "grad_norm": 32.508235931396484, "learning_rate": 1.349410788381743e-05, "loss": 0.5708, "step": 19611 }, { "epoch": 16.27551867219917, "grad_norm": 22.99188232421875, "learning_rate": 1.3493775933609961e-05, "loss": 0.5016, "step": 19612 }, { "epoch": 16.27634854771784, "grad_norm": 47.92741775512695, "learning_rate": 1.349344398340249e-05, "loss": 0.8998, "step": 19613 }, { "epoch": 16.277178423236514, "grad_norm": 43.064453125, "learning_rate": 1.3493112033195022e-05, "loss": 0.9458, "step": 19614 }, { "epoch": 16.278008298755186, "grad_norm": 79.7968978881836, "learning_rate": 1.3492780082987553e-05, "loss": 0.944, "step": 19615 }, { "epoch": 16.278838174273858, "grad_norm": 33.815101623535156, "learning_rate": 1.3492448132780085e-05, "loss": 1.4571, "step": 19616 }, { "epoch": 16.27966804979253, "grad_norm": 26.338659286499023, "learning_rate": 1.3492116182572615e-05, "loss": 0.4584, "step": 19617 }, { "epoch": 16.280497925311202, "grad_norm": 76.13693237304688, "learning_rate": 1.3491784232365145e-05, "loss": 0.8328, "step": 19618 }, { "epoch": 16.281327800829875, "grad_norm": 21.005292892456055, "learning_rate": 1.3491452282157678e-05, "loss": 0.5236, "step": 19619 }, { "epoch": 16.282157676348547, "grad_norm": 14.448748588562012, "learning_rate": 1.349112033195021e-05, "loss": 0.371, "step": 19620 }, { "epoch": 16.28298755186722, "grad_norm": 76.756103515625, "learning_rate": 1.3490788381742738e-05, "loss": 0.8985, "step": 19621 }, { "epoch": 16.28381742738589, "grad_norm": 32.33761215209961, "learning_rate": 1.349045643153527e-05, "loss": 0.6144, "step": 19622 }, { "epoch": 16.284647302904563, "grad_norm": 51.12566375732422, "learning_rate": 1.3490124481327803e-05, "loss": 1.0498, "step": 19623 }, { "epoch": 16.285477178423236, "grad_norm": 22.140422821044922, "learning_rate": 1.3489792531120333e-05, "loss": 0.3719, "step": 19624 }, { "epoch": 16.286307053941908, "grad_norm": 23.520009994506836, "learning_rate": 1.3489460580912863e-05, "loss": 0.4205, "step": 19625 }, { "epoch": 16.28713692946058, "grad_norm": 25.46561622619629, "learning_rate": 1.3489128630705396e-05, "loss": 0.5682, "step": 19626 }, { "epoch": 16.287966804979252, "grad_norm": 46.31325149536133, "learning_rate": 1.3488796680497926e-05, "loss": 0.7843, "step": 19627 }, { "epoch": 16.288796680497924, "grad_norm": 26.678388595581055, "learning_rate": 1.3488464730290458e-05, "loss": 0.4292, "step": 19628 }, { "epoch": 16.289626556016596, "grad_norm": 28.876636505126953, "learning_rate": 1.348813278008299e-05, "loss": 0.3803, "step": 19629 }, { "epoch": 16.29045643153527, "grad_norm": 15.761913299560547, "learning_rate": 1.3487800829875519e-05, "loss": 0.3198, "step": 19630 }, { "epoch": 16.29128630705394, "grad_norm": 32.632720947265625, "learning_rate": 1.3487468879668051e-05, "loss": 0.4891, "step": 19631 }, { "epoch": 16.292116182572613, "grad_norm": 67.60218811035156, "learning_rate": 1.3487136929460583e-05, "loss": 1.423, "step": 19632 }, { "epoch": 16.292946058091285, "grad_norm": 61.752662658691406, "learning_rate": 1.3486804979253114e-05, "loss": 1.2338, "step": 19633 }, { "epoch": 16.293775933609957, "grad_norm": 85.43379974365234, "learning_rate": 1.3486473029045644e-05, "loss": 0.7378, "step": 19634 }, { "epoch": 16.29460580912863, "grad_norm": 18.683218002319336, "learning_rate": 1.3486141078838176e-05, "loss": 0.4914, "step": 19635 }, { "epoch": 16.295435684647302, "grad_norm": 18.177915573120117, "learning_rate": 1.3485809128630706e-05, "loss": 0.2933, "step": 19636 }, { "epoch": 16.296265560165974, "grad_norm": 38.38712692260742, "learning_rate": 1.3485477178423239e-05, "loss": 1.0794, "step": 19637 }, { "epoch": 16.297095435684646, "grad_norm": 34.24187469482422, "learning_rate": 1.3485145228215767e-05, "loss": 0.954, "step": 19638 }, { "epoch": 16.29792531120332, "grad_norm": 29.088804244995117, "learning_rate": 1.34848132780083e-05, "loss": 0.5977, "step": 19639 }, { "epoch": 16.29875518672199, "grad_norm": 53.87008285522461, "learning_rate": 1.3484481327800831e-05, "loss": 0.8068, "step": 19640 }, { "epoch": 16.299585062240663, "grad_norm": 60.052650451660156, "learning_rate": 1.3484149377593364e-05, "loss": 1.3781, "step": 19641 }, { "epoch": 16.300414937759335, "grad_norm": 26.092214584350586, "learning_rate": 1.3483817427385892e-05, "loss": 0.675, "step": 19642 }, { "epoch": 16.301244813278007, "grad_norm": 35.66817855834961, "learning_rate": 1.3483485477178424e-05, "loss": 0.8139, "step": 19643 }, { "epoch": 16.30207468879668, "grad_norm": 34.562164306640625, "learning_rate": 1.3483153526970957e-05, "loss": 0.8476, "step": 19644 }, { "epoch": 16.30290456431535, "grad_norm": 25.82488441467285, "learning_rate": 1.3482821576763487e-05, "loss": 0.7458, "step": 19645 }, { "epoch": 16.303734439834024, "grad_norm": 38.28956604003906, "learning_rate": 1.3482489626556017e-05, "loss": 0.3177, "step": 19646 }, { "epoch": 16.304564315352696, "grad_norm": 52.9063720703125, "learning_rate": 1.3482157676348548e-05, "loss": 0.492, "step": 19647 }, { "epoch": 16.305394190871368, "grad_norm": 29.454683303833008, "learning_rate": 1.348182572614108e-05, "loss": 0.4638, "step": 19648 }, { "epoch": 16.30622406639004, "grad_norm": 41.66798400878906, "learning_rate": 1.3481493775933612e-05, "loss": 1.2893, "step": 19649 }, { "epoch": 16.307053941908713, "grad_norm": 48.791812896728516, "learning_rate": 1.348116182572614e-05, "loss": 0.8328, "step": 19650 }, { "epoch": 16.307883817427385, "grad_norm": 31.407602310180664, "learning_rate": 1.3480829875518673e-05, "loss": 0.6178, "step": 19651 }, { "epoch": 16.308713692946057, "grad_norm": 48.920509338378906, "learning_rate": 1.3480497925311205e-05, "loss": 0.9913, "step": 19652 }, { "epoch": 16.30954356846473, "grad_norm": 29.086992263793945, "learning_rate": 1.3480165975103735e-05, "loss": 0.489, "step": 19653 }, { "epoch": 16.3103734439834, "grad_norm": 41.610931396484375, "learning_rate": 1.3479834024896266e-05, "loss": 0.8642, "step": 19654 }, { "epoch": 16.311203319502074, "grad_norm": 46.58773422241211, "learning_rate": 1.3479502074688798e-05, "loss": 0.8291, "step": 19655 }, { "epoch": 16.312033195020746, "grad_norm": 33.45606994628906, "learning_rate": 1.3479170124481328e-05, "loss": 0.6167, "step": 19656 }, { "epoch": 16.312863070539418, "grad_norm": 24.540369033813477, "learning_rate": 1.347883817427386e-05, "loss": 0.5079, "step": 19657 }, { "epoch": 16.31369294605809, "grad_norm": 26.548498153686523, "learning_rate": 1.3478506224066392e-05, "loss": 0.5088, "step": 19658 }, { "epoch": 16.314522821576762, "grad_norm": 77.89083099365234, "learning_rate": 1.3478174273858921e-05, "loss": 0.5402, "step": 19659 }, { "epoch": 16.315352697095435, "grad_norm": 31.885305404663086, "learning_rate": 1.3477842323651453e-05, "loss": 0.4808, "step": 19660 }, { "epoch": 16.316182572614107, "grad_norm": 32.181453704833984, "learning_rate": 1.3477510373443985e-05, "loss": 0.5483, "step": 19661 }, { "epoch": 16.31701244813278, "grad_norm": 23.734508514404297, "learning_rate": 1.3477178423236516e-05, "loss": 0.4076, "step": 19662 }, { "epoch": 16.31784232365145, "grad_norm": 282.7619323730469, "learning_rate": 1.3476846473029046e-05, "loss": 0.4263, "step": 19663 }, { "epoch": 16.318672199170123, "grad_norm": 95.22969818115234, "learning_rate": 1.3476514522821578e-05, "loss": 1.0941, "step": 19664 }, { "epoch": 16.319502074688796, "grad_norm": 49.9927978515625, "learning_rate": 1.3476182572614109e-05, "loss": 0.4318, "step": 19665 }, { "epoch": 16.320331950207468, "grad_norm": 53.74355697631836, "learning_rate": 1.3475850622406641e-05, "loss": 0.4411, "step": 19666 }, { "epoch": 16.32116182572614, "grad_norm": 60.4715576171875, "learning_rate": 1.347551867219917e-05, "loss": 0.8261, "step": 19667 }, { "epoch": 16.321991701244812, "grad_norm": 49.77219772338867, "learning_rate": 1.3475186721991702e-05, "loss": 0.5771, "step": 19668 }, { "epoch": 16.322821576763484, "grad_norm": 38.501930236816406, "learning_rate": 1.3474854771784234e-05, "loss": 0.3895, "step": 19669 }, { "epoch": 16.323651452282157, "grad_norm": 43.838104248046875, "learning_rate": 1.3474522821576766e-05, "loss": 1.1584, "step": 19670 }, { "epoch": 16.32448132780083, "grad_norm": 30.91897964477539, "learning_rate": 1.3474190871369295e-05, "loss": 0.6608, "step": 19671 }, { "epoch": 16.3253112033195, "grad_norm": 67.63502502441406, "learning_rate": 1.3473858921161827e-05, "loss": 0.6644, "step": 19672 }, { "epoch": 16.326141078838173, "grad_norm": 180.10079956054688, "learning_rate": 1.3473526970954359e-05, "loss": 0.704, "step": 19673 }, { "epoch": 16.326970954356845, "grad_norm": 20.640663146972656, "learning_rate": 1.347319502074689e-05, "loss": 0.869, "step": 19674 }, { "epoch": 16.327800829875518, "grad_norm": 24.83824348449707, "learning_rate": 1.347286307053942e-05, "loss": 0.6491, "step": 19675 }, { "epoch": 16.32863070539419, "grad_norm": 28.614599227905273, "learning_rate": 1.347253112033195e-05, "loss": 0.5508, "step": 19676 }, { "epoch": 16.329460580912862, "grad_norm": 21.97637939453125, "learning_rate": 1.3472199170124482e-05, "loss": 0.5048, "step": 19677 }, { "epoch": 16.330290456431534, "grad_norm": 43.737335205078125, "learning_rate": 1.3471867219917014e-05, "loss": 0.6537, "step": 19678 }, { "epoch": 16.331120331950206, "grad_norm": 15.098505020141602, "learning_rate": 1.3471535269709543e-05, "loss": 0.3014, "step": 19679 }, { "epoch": 16.33195020746888, "grad_norm": 49.31706619262695, "learning_rate": 1.3471203319502075e-05, "loss": 0.8949, "step": 19680 }, { "epoch": 16.33278008298755, "grad_norm": 25.585182189941406, "learning_rate": 1.3470871369294607e-05, "loss": 0.3879, "step": 19681 }, { "epoch": 16.333609958506223, "grad_norm": 34.66896438598633, "learning_rate": 1.347053941908714e-05, "loss": 0.4958, "step": 19682 }, { "epoch": 16.334439834024895, "grad_norm": 51.645668029785156, "learning_rate": 1.347020746887967e-05, "loss": 0.3145, "step": 19683 }, { "epoch": 16.335269709543567, "grad_norm": 37.804691314697266, "learning_rate": 1.34698755186722e-05, "loss": 0.3693, "step": 19684 }, { "epoch": 16.33609958506224, "grad_norm": 68.06610870361328, "learning_rate": 1.346954356846473e-05, "loss": 0.7962, "step": 19685 }, { "epoch": 16.33692946058091, "grad_norm": 55.536346435546875, "learning_rate": 1.3469211618257263e-05, "loss": 0.7201, "step": 19686 }, { "epoch": 16.337759336099584, "grad_norm": 71.15544128417969, "learning_rate": 1.3468879668049795e-05, "loss": 0.7514, "step": 19687 }, { "epoch": 16.338589211618256, "grad_norm": 22.37129783630371, "learning_rate": 1.3468547717842324e-05, "loss": 0.3192, "step": 19688 }, { "epoch": 16.33941908713693, "grad_norm": 68.69680786132812, "learning_rate": 1.3468215767634856e-05, "loss": 0.9988, "step": 19689 }, { "epoch": 16.3402489626556, "grad_norm": 56.2160530090332, "learning_rate": 1.3467883817427388e-05, "loss": 0.7043, "step": 19690 }, { "epoch": 16.341078838174273, "grad_norm": 60.27069854736328, "learning_rate": 1.346755186721992e-05, "loss": 1.0084, "step": 19691 }, { "epoch": 16.341908713692945, "grad_norm": 39.10106658935547, "learning_rate": 1.3467219917012449e-05, "loss": 0.8679, "step": 19692 }, { "epoch": 16.342738589211617, "grad_norm": 28.300207138061523, "learning_rate": 1.346688796680498e-05, "loss": 0.6285, "step": 19693 }, { "epoch": 16.34356846473029, "grad_norm": 47.926578521728516, "learning_rate": 1.3466556016597511e-05, "loss": 0.5236, "step": 19694 }, { "epoch": 16.34439834024896, "grad_norm": 42.133949279785156, "learning_rate": 1.3466224066390043e-05, "loss": 0.9315, "step": 19695 }, { "epoch": 16.345228215767634, "grad_norm": 24.72223663330078, "learning_rate": 1.3465892116182574e-05, "loss": 0.3874, "step": 19696 }, { "epoch": 16.346058091286306, "grad_norm": 65.76386260986328, "learning_rate": 1.3465560165975104e-05, "loss": 1.3613, "step": 19697 }, { "epoch": 16.346887966804978, "grad_norm": 47.113101959228516, "learning_rate": 1.3465228215767636e-05, "loss": 0.8689, "step": 19698 }, { "epoch": 16.34771784232365, "grad_norm": 26.657848358154297, "learning_rate": 1.3464896265560168e-05, "loss": 1.0276, "step": 19699 }, { "epoch": 16.348547717842322, "grad_norm": 35.21425247192383, "learning_rate": 1.3464564315352697e-05, "loss": 0.7409, "step": 19700 }, { "epoch": 16.349377593360995, "grad_norm": 32.447025299072266, "learning_rate": 1.3464232365145229e-05, "loss": 0.7619, "step": 19701 }, { "epoch": 16.350207468879667, "grad_norm": 25.79952049255371, "learning_rate": 1.3463900414937761e-05, "loss": 0.4232, "step": 19702 }, { "epoch": 16.35103734439834, "grad_norm": 35.064697265625, "learning_rate": 1.3463568464730292e-05, "loss": 0.8786, "step": 19703 }, { "epoch": 16.35186721991701, "grad_norm": 31.310773849487305, "learning_rate": 1.3463236514522822e-05, "loss": 0.7089, "step": 19704 }, { "epoch": 16.352697095435683, "grad_norm": 63.925113677978516, "learning_rate": 1.3462904564315354e-05, "loss": 1.1763, "step": 19705 }, { "epoch": 16.353526970954356, "grad_norm": 42.48305892944336, "learning_rate": 1.3462572614107885e-05, "loss": 0.6747, "step": 19706 }, { "epoch": 16.354356846473028, "grad_norm": 53.30379104614258, "learning_rate": 1.3462240663900417e-05, "loss": 0.7481, "step": 19707 }, { "epoch": 16.3551867219917, "grad_norm": 52.16838073730469, "learning_rate": 1.3461908713692945e-05, "loss": 0.8785, "step": 19708 }, { "epoch": 16.356016597510372, "grad_norm": 29.594684600830078, "learning_rate": 1.3461576763485477e-05, "loss": 0.6207, "step": 19709 }, { "epoch": 16.356846473029044, "grad_norm": 20.345916748046875, "learning_rate": 1.346124481327801e-05, "loss": 0.2788, "step": 19710 }, { "epoch": 16.357676348547717, "grad_norm": 48.78744125366211, "learning_rate": 1.3460912863070542e-05, "loss": 0.9055, "step": 19711 }, { "epoch": 16.35850622406639, "grad_norm": 25.335371017456055, "learning_rate": 1.3460580912863072e-05, "loss": 1.1017, "step": 19712 }, { "epoch": 16.35933609958506, "grad_norm": 78.61476135253906, "learning_rate": 1.3460248962655602e-05, "loss": 1.0665, "step": 19713 }, { "epoch": 16.360165975103733, "grad_norm": 37.1080207824707, "learning_rate": 1.3459917012448135e-05, "loss": 0.8786, "step": 19714 }, { "epoch": 16.360995850622405, "grad_norm": 17.469011306762695, "learning_rate": 1.3459585062240665e-05, "loss": 0.4021, "step": 19715 }, { "epoch": 16.361825726141078, "grad_norm": 51.38054656982422, "learning_rate": 1.3459253112033197e-05, "loss": 1.718, "step": 19716 }, { "epoch": 16.36265560165975, "grad_norm": 18.887460708618164, "learning_rate": 1.3458921161825726e-05, "loss": 0.5763, "step": 19717 }, { "epoch": 16.363485477178422, "grad_norm": 33.9042854309082, "learning_rate": 1.3458589211618258e-05, "loss": 0.6355, "step": 19718 }, { "epoch": 16.364315352697094, "grad_norm": 20.2059268951416, "learning_rate": 1.345825726141079e-05, "loss": 0.4424, "step": 19719 }, { "epoch": 16.365145228215766, "grad_norm": 56.487457275390625, "learning_rate": 1.3457925311203322e-05, "loss": 0.6082, "step": 19720 }, { "epoch": 16.36597510373444, "grad_norm": 41.24871063232422, "learning_rate": 1.3457593360995851e-05, "loss": 0.6986, "step": 19721 }, { "epoch": 16.36680497925311, "grad_norm": 108.5700912475586, "learning_rate": 1.3457261410788383e-05, "loss": 1.1924, "step": 19722 }, { "epoch": 16.367634854771783, "grad_norm": 35.84331130981445, "learning_rate": 1.3456929460580913e-05, "loss": 0.7819, "step": 19723 }, { "epoch": 16.368464730290455, "grad_norm": 8.744650840759277, "learning_rate": 1.3456597510373446e-05, "loss": 0.2876, "step": 19724 }, { "epoch": 16.369294605809127, "grad_norm": 107.15495300292969, "learning_rate": 1.3456265560165976e-05, "loss": 1.155, "step": 19725 }, { "epoch": 16.3701244813278, "grad_norm": 58.441810607910156, "learning_rate": 1.3455933609958506e-05, "loss": 0.7109, "step": 19726 }, { "epoch": 16.37095435684647, "grad_norm": 29.03992462158203, "learning_rate": 1.3455601659751038e-05, "loss": 0.3893, "step": 19727 }, { "epoch": 16.371784232365144, "grad_norm": 94.5877456665039, "learning_rate": 1.345526970954357e-05, "loss": 0.8083, "step": 19728 }, { "epoch": 16.372614107883816, "grad_norm": 41.38674545288086, "learning_rate": 1.34549377593361e-05, "loss": 0.5674, "step": 19729 }, { "epoch": 16.37344398340249, "grad_norm": 24.140439987182617, "learning_rate": 1.3454605809128631e-05, "loss": 0.549, "step": 19730 }, { "epoch": 16.37427385892116, "grad_norm": 87.03385162353516, "learning_rate": 1.3454273858921163e-05, "loss": 1.1635, "step": 19731 }, { "epoch": 16.375103734439833, "grad_norm": 38.48493576049805, "learning_rate": 1.3453941908713694e-05, "loss": 0.6964, "step": 19732 }, { "epoch": 16.375933609958505, "grad_norm": 36.043704986572266, "learning_rate": 1.3453609958506224e-05, "loss": 0.5077, "step": 19733 }, { "epoch": 16.376763485477177, "grad_norm": 54.486602783203125, "learning_rate": 1.3453278008298756e-05, "loss": 0.6958, "step": 19734 }, { "epoch": 16.37759336099585, "grad_norm": 36.58180618286133, "learning_rate": 1.3452946058091287e-05, "loss": 1.0845, "step": 19735 }, { "epoch": 16.37842323651452, "grad_norm": 101.67440032958984, "learning_rate": 1.3452614107883819e-05, "loss": 0.7737, "step": 19736 }, { "epoch": 16.379253112033194, "grad_norm": 56.31779098510742, "learning_rate": 1.3452282157676351e-05, "loss": 0.7889, "step": 19737 }, { "epoch": 16.380082987551866, "grad_norm": 19.96343994140625, "learning_rate": 1.345195020746888e-05, "loss": 0.4312, "step": 19738 }, { "epoch": 16.380912863070538, "grad_norm": 67.53573608398438, "learning_rate": 1.3451618257261412e-05, "loss": 1.1838, "step": 19739 }, { "epoch": 16.38174273858921, "grad_norm": 42.1451530456543, "learning_rate": 1.3451286307053944e-05, "loss": 0.7226, "step": 19740 }, { "epoch": 16.382572614107882, "grad_norm": 49.010108947753906, "learning_rate": 1.3450954356846474e-05, "loss": 0.7924, "step": 19741 }, { "epoch": 16.383402489626555, "grad_norm": 35.72743225097656, "learning_rate": 1.3450622406639005e-05, "loss": 0.6259, "step": 19742 }, { "epoch": 16.384232365145227, "grad_norm": 41.09866714477539, "learning_rate": 1.3450290456431537e-05, "loss": 0.6058, "step": 19743 }, { "epoch": 16.3850622406639, "grad_norm": 57.56957244873047, "learning_rate": 1.3449958506224067e-05, "loss": 0.7552, "step": 19744 }, { "epoch": 16.38589211618257, "grad_norm": 31.713804244995117, "learning_rate": 1.34496265560166e-05, "loss": 0.967, "step": 19745 }, { "epoch": 16.386721991701243, "grad_norm": 58.92781066894531, "learning_rate": 1.3449294605809128e-05, "loss": 0.7055, "step": 19746 }, { "epoch": 16.387551867219916, "grad_norm": 36.44329833984375, "learning_rate": 1.344896265560166e-05, "loss": 0.7704, "step": 19747 }, { "epoch": 16.388381742738588, "grad_norm": 54.54646682739258, "learning_rate": 1.3448630705394192e-05, "loss": 0.958, "step": 19748 }, { "epoch": 16.38921161825726, "grad_norm": 60.65403747558594, "learning_rate": 1.3448298755186724e-05, "loss": 1.0834, "step": 19749 }, { "epoch": 16.390041493775932, "grad_norm": 41.540611267089844, "learning_rate": 1.3447966804979253e-05, "loss": 0.3891, "step": 19750 }, { "epoch": 16.390871369294604, "grad_norm": 19.194942474365234, "learning_rate": 1.3447634854771785e-05, "loss": 0.4059, "step": 19751 }, { "epoch": 16.391701244813277, "grad_norm": 19.248737335205078, "learning_rate": 1.3447302904564317e-05, "loss": 0.3731, "step": 19752 }, { "epoch": 16.39253112033195, "grad_norm": 49.23813247680664, "learning_rate": 1.3446970954356848e-05, "loss": 0.9342, "step": 19753 }, { "epoch": 16.39336099585062, "grad_norm": 40.55500030517578, "learning_rate": 1.3446639004149378e-05, "loss": 0.6213, "step": 19754 }, { "epoch": 16.394190871369293, "grad_norm": 39.268802642822266, "learning_rate": 1.3446307053941909e-05, "loss": 0.4236, "step": 19755 }, { "epoch": 16.395020746887965, "grad_norm": 47.54068374633789, "learning_rate": 1.344597510373444e-05, "loss": 0.9823, "step": 19756 }, { "epoch": 16.395850622406638, "grad_norm": 33.84043884277344, "learning_rate": 1.3445643153526973e-05, "loss": 0.6086, "step": 19757 }, { "epoch": 16.39668049792531, "grad_norm": 33.376373291015625, "learning_rate": 1.3445311203319502e-05, "loss": 0.7626, "step": 19758 }, { "epoch": 16.397510373443982, "grad_norm": 28.50605583190918, "learning_rate": 1.3444979253112034e-05, "loss": 0.7329, "step": 19759 }, { "epoch": 16.398340248962654, "grad_norm": 38.759063720703125, "learning_rate": 1.3444647302904566e-05, "loss": 0.8281, "step": 19760 }, { "epoch": 16.399170124481326, "grad_norm": 50.540706634521484, "learning_rate": 1.3444315352697098e-05, "loss": 1.2153, "step": 19761 }, { "epoch": 16.4, "grad_norm": 40.32263946533203, "learning_rate": 1.3443983402489628e-05, "loss": 1.3334, "step": 19762 }, { "epoch": 16.40082987551867, "grad_norm": 58.713871002197266, "learning_rate": 1.3443651452282159e-05, "loss": 0.7037, "step": 19763 }, { "epoch": 16.401659751037343, "grad_norm": 31.75513458251953, "learning_rate": 1.344331950207469e-05, "loss": 0.4582, "step": 19764 }, { "epoch": 16.402489626556015, "grad_norm": 109.40628051757812, "learning_rate": 1.3442987551867221e-05, "loss": 0.5901, "step": 19765 }, { "epoch": 16.403319502074687, "grad_norm": 14.627826690673828, "learning_rate": 1.3442655601659753e-05, "loss": 0.2974, "step": 19766 }, { "epoch": 16.40414937759336, "grad_norm": 42.37529754638672, "learning_rate": 1.3442323651452282e-05, "loss": 0.7162, "step": 19767 }, { "epoch": 16.40497925311203, "grad_norm": 36.6041374206543, "learning_rate": 1.3441991701244814e-05, "loss": 0.5333, "step": 19768 }, { "epoch": 16.405809128630704, "grad_norm": 15.33155345916748, "learning_rate": 1.3441659751037346e-05, "loss": 0.2836, "step": 19769 }, { "epoch": 16.406639004149376, "grad_norm": 30.00164222717285, "learning_rate": 1.3441327800829877e-05, "loss": 0.549, "step": 19770 }, { "epoch": 16.40746887966805, "grad_norm": 74.2865982055664, "learning_rate": 1.3440995850622407e-05, "loss": 0.8727, "step": 19771 }, { "epoch": 16.40829875518672, "grad_norm": 35.766197204589844, "learning_rate": 1.344066390041494e-05, "loss": 0.4796, "step": 19772 }, { "epoch": 16.409128630705393, "grad_norm": 21.953781127929688, "learning_rate": 1.344033195020747e-05, "loss": 0.4673, "step": 19773 }, { "epoch": 16.409958506224065, "grad_norm": 32.54084777832031, "learning_rate": 1.3440000000000002e-05, "loss": 0.7555, "step": 19774 }, { "epoch": 16.410788381742737, "grad_norm": 38.57712173461914, "learning_rate": 1.3439668049792532e-05, "loss": 0.7946, "step": 19775 }, { "epoch": 16.41161825726141, "grad_norm": 20.239858627319336, "learning_rate": 1.3439336099585063e-05, "loss": 0.3909, "step": 19776 }, { "epoch": 16.41244813278008, "grad_norm": 53.474552154541016, "learning_rate": 1.3439004149377595e-05, "loss": 0.7723, "step": 19777 }, { "epoch": 16.413278008298754, "grad_norm": 98.51432037353516, "learning_rate": 1.3438672199170127e-05, "loss": 0.5106, "step": 19778 }, { "epoch": 16.414107883817426, "grad_norm": 55.7563362121582, "learning_rate": 1.3438340248962656e-05, "loss": 0.408, "step": 19779 }, { "epoch": 16.414937759336098, "grad_norm": 24.892513275146484, "learning_rate": 1.3438008298755188e-05, "loss": 0.6853, "step": 19780 }, { "epoch": 16.41576763485477, "grad_norm": 38.99453353881836, "learning_rate": 1.343767634854772e-05, "loss": 0.7078, "step": 19781 }, { "epoch": 16.416597510373443, "grad_norm": 19.5186710357666, "learning_rate": 1.343734439834025e-05, "loss": 0.6534, "step": 19782 }, { "epoch": 16.417427385892115, "grad_norm": 61.451271057128906, "learning_rate": 1.343701244813278e-05, "loss": 1.069, "step": 19783 }, { "epoch": 16.418257261410787, "grad_norm": 39.87184524536133, "learning_rate": 1.3436680497925311e-05, "loss": 0.7331, "step": 19784 }, { "epoch": 16.41908713692946, "grad_norm": 42.15053939819336, "learning_rate": 1.3436348547717843e-05, "loss": 0.9205, "step": 19785 }, { "epoch": 16.41991701244813, "grad_norm": 95.68466186523438, "learning_rate": 1.3436016597510375e-05, "loss": 0.5405, "step": 19786 }, { "epoch": 16.420746887966803, "grad_norm": 54.582489013671875, "learning_rate": 1.3435684647302904e-05, "loss": 1.0185, "step": 19787 }, { "epoch": 16.421576763485476, "grad_norm": 24.953706741333008, "learning_rate": 1.3435352697095436e-05, "loss": 0.3861, "step": 19788 }, { "epoch": 16.422406639004148, "grad_norm": 42.995582580566406, "learning_rate": 1.3435020746887968e-05, "loss": 1.4599, "step": 19789 }, { "epoch": 16.42323651452282, "grad_norm": 36.670936584472656, "learning_rate": 1.34346887966805e-05, "loss": 1.1087, "step": 19790 }, { "epoch": 16.424066390041492, "grad_norm": 79.0429458618164, "learning_rate": 1.343435684647303e-05, "loss": 0.9911, "step": 19791 }, { "epoch": 16.424896265560164, "grad_norm": 31.355844497680664, "learning_rate": 1.3434024896265561e-05, "loss": 0.7076, "step": 19792 }, { "epoch": 16.425726141078837, "grad_norm": 62.115570068359375, "learning_rate": 1.3433692946058091e-05, "loss": 1.1957, "step": 19793 }, { "epoch": 16.42655601659751, "grad_norm": 62.79648208618164, "learning_rate": 1.3433360995850624e-05, "loss": 0.9284, "step": 19794 }, { "epoch": 16.42738589211618, "grad_norm": 25.680971145629883, "learning_rate": 1.3433029045643156e-05, "loss": 0.4113, "step": 19795 }, { "epoch": 16.428215767634853, "grad_norm": 34.5294303894043, "learning_rate": 1.3432697095435684e-05, "loss": 0.6954, "step": 19796 }, { "epoch": 16.429045643153525, "grad_norm": 26.725963592529297, "learning_rate": 1.3432365145228217e-05, "loss": 0.5638, "step": 19797 }, { "epoch": 16.429875518672198, "grad_norm": 33.177921295166016, "learning_rate": 1.3432033195020749e-05, "loss": 0.5716, "step": 19798 }, { "epoch": 16.43070539419087, "grad_norm": 38.13648986816406, "learning_rate": 1.343170124481328e-05, "loss": 0.6734, "step": 19799 }, { "epoch": 16.431535269709542, "grad_norm": 55.22050476074219, "learning_rate": 1.343136929460581e-05, "loss": 0.7694, "step": 19800 }, { "epoch": 16.432365145228214, "grad_norm": 27.334867477416992, "learning_rate": 1.3431037344398342e-05, "loss": 0.4823, "step": 19801 }, { "epoch": 16.433195020746886, "grad_norm": 84.4629135131836, "learning_rate": 1.3430705394190872e-05, "loss": 0.6189, "step": 19802 }, { "epoch": 16.43402489626556, "grad_norm": 67.25626373291016, "learning_rate": 1.3430373443983404e-05, "loss": 1.1221, "step": 19803 }, { "epoch": 16.43485477178423, "grad_norm": 107.59418487548828, "learning_rate": 1.3430041493775934e-05, "loss": 0.7746, "step": 19804 }, { "epoch": 16.435684647302903, "grad_norm": 53.52480697631836, "learning_rate": 1.3429709543568465e-05, "loss": 0.9304, "step": 19805 }, { "epoch": 16.436514522821575, "grad_norm": 41.18205642700195, "learning_rate": 1.3429377593360997e-05, "loss": 0.6759, "step": 19806 }, { "epoch": 16.437344398340247, "grad_norm": 41.79802322387695, "learning_rate": 1.3429045643153529e-05, "loss": 0.9743, "step": 19807 }, { "epoch": 16.43817427385892, "grad_norm": 35.244651794433594, "learning_rate": 1.3428713692946058e-05, "loss": 0.9687, "step": 19808 }, { "epoch": 16.439004149377592, "grad_norm": 34.61335372924805, "learning_rate": 1.342838174273859e-05, "loss": 0.5849, "step": 19809 }, { "epoch": 16.439834024896264, "grad_norm": 21.633726119995117, "learning_rate": 1.3428049792531122e-05, "loss": 0.3802, "step": 19810 }, { "epoch": 16.440663900414936, "grad_norm": 44.43168258666992, "learning_rate": 1.3427717842323652e-05, "loss": 0.5315, "step": 19811 }, { "epoch": 16.44149377593361, "grad_norm": 41.6062126159668, "learning_rate": 1.3427385892116183e-05, "loss": 0.5097, "step": 19812 }, { "epoch": 16.44232365145228, "grad_norm": 38.163143157958984, "learning_rate": 1.3427053941908715e-05, "loss": 0.7941, "step": 19813 }, { "epoch": 16.443153526970953, "grad_norm": 23.14753532409668, "learning_rate": 1.3426721991701245e-05, "loss": 0.4795, "step": 19814 }, { "epoch": 16.443983402489625, "grad_norm": 22.188549041748047, "learning_rate": 1.3426390041493778e-05, "loss": 0.4984, "step": 19815 }, { "epoch": 16.444813278008297, "grad_norm": 10.698742866516113, "learning_rate": 1.342605809128631e-05, "loss": 0.3716, "step": 19816 }, { "epoch": 16.44564315352697, "grad_norm": 34.869224548339844, "learning_rate": 1.3425726141078838e-05, "loss": 0.8389, "step": 19817 }, { "epoch": 16.44647302904564, "grad_norm": 45.428462982177734, "learning_rate": 1.342539419087137e-05, "loss": 1.1035, "step": 19818 }, { "epoch": 16.447302904564314, "grad_norm": 22.91371726989746, "learning_rate": 1.3425062240663903e-05, "loss": 0.4184, "step": 19819 }, { "epoch": 16.448132780082986, "grad_norm": 26.418859481811523, "learning_rate": 1.3424730290456433e-05, "loss": 0.6754, "step": 19820 }, { "epoch": 16.448962655601658, "grad_norm": 16.707592010498047, "learning_rate": 1.3424398340248963e-05, "loss": 0.3285, "step": 19821 }, { "epoch": 16.44979253112033, "grad_norm": 96.93152618408203, "learning_rate": 1.3424066390041495e-05, "loss": 1.2038, "step": 19822 }, { "epoch": 16.450622406639003, "grad_norm": 28.803869247436523, "learning_rate": 1.3423734439834026e-05, "loss": 0.3119, "step": 19823 }, { "epoch": 16.451452282157675, "grad_norm": 34.39360809326172, "learning_rate": 1.3423402489626558e-05, "loss": 0.7389, "step": 19824 }, { "epoch": 16.452282157676347, "grad_norm": 16.77461051940918, "learning_rate": 1.3423070539419087e-05, "loss": 0.2397, "step": 19825 }, { "epoch": 16.45311203319502, "grad_norm": 18.397886276245117, "learning_rate": 1.3422738589211619e-05, "loss": 0.3524, "step": 19826 }, { "epoch": 16.45394190871369, "grad_norm": 22.982694625854492, "learning_rate": 1.3422406639004151e-05, "loss": 0.469, "step": 19827 }, { "epoch": 16.454771784232364, "grad_norm": 22.75274658203125, "learning_rate": 1.3422074688796683e-05, "loss": 0.4142, "step": 19828 }, { "epoch": 16.455601659751036, "grad_norm": 20.274272918701172, "learning_rate": 1.3421742738589212e-05, "loss": 0.3772, "step": 19829 }, { "epoch": 16.456431535269708, "grad_norm": 29.1684513092041, "learning_rate": 1.3421410788381744e-05, "loss": 0.6912, "step": 19830 }, { "epoch": 16.45726141078838, "grad_norm": 48.510860443115234, "learning_rate": 1.3421078838174276e-05, "loss": 0.8387, "step": 19831 }, { "epoch": 16.458091286307052, "grad_norm": 34.464508056640625, "learning_rate": 1.3420746887966806e-05, "loss": 0.5423, "step": 19832 }, { "epoch": 16.458921161825725, "grad_norm": 49.70706558227539, "learning_rate": 1.3420414937759337e-05, "loss": 0.5371, "step": 19833 }, { "epoch": 16.459751037344397, "grad_norm": 73.0421371459961, "learning_rate": 1.3420082987551867e-05, "loss": 0.6032, "step": 19834 }, { "epoch": 16.46058091286307, "grad_norm": 28.961170196533203, "learning_rate": 1.34197510373444e-05, "loss": 0.3511, "step": 19835 }, { "epoch": 16.46141078838174, "grad_norm": 25.23625373840332, "learning_rate": 1.3419419087136931e-05, "loss": 0.5972, "step": 19836 }, { "epoch": 16.462240663900413, "grad_norm": 65.12357330322266, "learning_rate": 1.341908713692946e-05, "loss": 0.6684, "step": 19837 }, { "epoch": 16.463070539419085, "grad_norm": 44.165645599365234, "learning_rate": 1.3418755186721992e-05, "loss": 1.1693, "step": 19838 }, { "epoch": 16.463900414937758, "grad_norm": 39.740779876708984, "learning_rate": 1.3418423236514524e-05, "loss": 0.7604, "step": 19839 }, { "epoch": 16.46473029045643, "grad_norm": 37.13005828857422, "learning_rate": 1.3418091286307055e-05, "loss": 0.6625, "step": 19840 }, { "epoch": 16.465560165975102, "grad_norm": 50.056575775146484, "learning_rate": 1.3417759336099587e-05, "loss": 0.3535, "step": 19841 }, { "epoch": 16.466390041493774, "grad_norm": 53.375736236572266, "learning_rate": 1.3417427385892117e-05, "loss": 0.426, "step": 19842 }, { "epoch": 16.467219917012446, "grad_norm": 40.336273193359375, "learning_rate": 1.3417095435684648e-05, "loss": 1.0402, "step": 19843 }, { "epoch": 16.46804979253112, "grad_norm": 46.77277374267578, "learning_rate": 1.341676348547718e-05, "loss": 0.7146, "step": 19844 }, { "epoch": 16.46887966804979, "grad_norm": 57.43928146362305, "learning_rate": 1.3416431535269712e-05, "loss": 0.673, "step": 19845 }, { "epoch": 16.469709543568463, "grad_norm": 49.848297119140625, "learning_rate": 1.341609958506224e-05, "loss": 0.6997, "step": 19846 }, { "epoch": 16.470539419087135, "grad_norm": 29.90823745727539, "learning_rate": 1.3415767634854773e-05, "loss": 0.399, "step": 19847 }, { "epoch": 16.471369294605807, "grad_norm": 59.96108627319336, "learning_rate": 1.3415435684647305e-05, "loss": 0.4976, "step": 19848 }, { "epoch": 16.47219917012448, "grad_norm": 15.290128707885742, "learning_rate": 1.3415103734439835e-05, "loss": 0.3235, "step": 19849 }, { "epoch": 16.473029045643152, "grad_norm": 43.82880401611328, "learning_rate": 1.3414771784232366e-05, "loss": 1.6484, "step": 19850 }, { "epoch": 16.473858921161824, "grad_norm": 16.58974266052246, "learning_rate": 1.3414439834024898e-05, "loss": 0.4931, "step": 19851 }, { "epoch": 16.474688796680496, "grad_norm": 34.43030548095703, "learning_rate": 1.3414107883817428e-05, "loss": 1.0357, "step": 19852 }, { "epoch": 16.47551867219917, "grad_norm": 45.41572952270508, "learning_rate": 1.341377593360996e-05, "loss": 0.7006, "step": 19853 }, { "epoch": 16.47634854771784, "grad_norm": 61.397525787353516, "learning_rate": 1.3413443983402489e-05, "loss": 0.6587, "step": 19854 }, { "epoch": 16.477178423236513, "grad_norm": 33.343563079833984, "learning_rate": 1.3413112033195021e-05, "loss": 0.5934, "step": 19855 }, { "epoch": 16.478008298755185, "grad_norm": 33.40081024169922, "learning_rate": 1.3412780082987553e-05, "loss": 0.4786, "step": 19856 }, { "epoch": 16.478838174273857, "grad_norm": 32.221282958984375, "learning_rate": 1.3412448132780085e-05, "loss": 0.8312, "step": 19857 }, { "epoch": 16.47966804979253, "grad_norm": 34.5344352722168, "learning_rate": 1.3412116182572614e-05, "loss": 0.9035, "step": 19858 }, { "epoch": 16.4804979253112, "grad_norm": 109.71984100341797, "learning_rate": 1.3411784232365146e-05, "loss": 0.4183, "step": 19859 }, { "epoch": 16.481327800829874, "grad_norm": 64.3835220336914, "learning_rate": 1.3411452282157678e-05, "loss": 1.2375, "step": 19860 }, { "epoch": 16.482157676348546, "grad_norm": 56.848907470703125, "learning_rate": 1.3411120331950209e-05, "loss": 1.3402, "step": 19861 }, { "epoch": 16.482987551867218, "grad_norm": 69.66616821289062, "learning_rate": 1.3410788381742739e-05, "loss": 1.0784, "step": 19862 }, { "epoch": 16.48381742738589, "grad_norm": 31.618009567260742, "learning_rate": 1.341045643153527e-05, "loss": 0.4331, "step": 19863 }, { "epoch": 16.484647302904563, "grad_norm": 64.54484558105469, "learning_rate": 1.3410124481327802e-05, "loss": 1.5712, "step": 19864 }, { "epoch": 16.485477178423235, "grad_norm": 37.636295318603516, "learning_rate": 1.3409792531120334e-05, "loss": 0.3687, "step": 19865 }, { "epoch": 16.486307053941907, "grad_norm": 32.78441619873047, "learning_rate": 1.3409460580912862e-05, "loss": 0.389, "step": 19866 }, { "epoch": 16.48713692946058, "grad_norm": 58.03727340698242, "learning_rate": 1.3409128630705395e-05, "loss": 1.0373, "step": 19867 }, { "epoch": 16.48796680497925, "grad_norm": 58.867069244384766, "learning_rate": 1.3408796680497927e-05, "loss": 0.7438, "step": 19868 }, { "epoch": 16.488796680497924, "grad_norm": 35.59922409057617, "learning_rate": 1.3408464730290459e-05, "loss": 1.2638, "step": 19869 }, { "epoch": 16.489626556016596, "grad_norm": 48.57460403442383, "learning_rate": 1.340813278008299e-05, "loss": 0.426, "step": 19870 }, { "epoch": 16.490456431535268, "grad_norm": 45.23235321044922, "learning_rate": 1.340780082987552e-05, "loss": 0.5805, "step": 19871 }, { "epoch": 16.49128630705394, "grad_norm": 72.39725494384766, "learning_rate": 1.340746887966805e-05, "loss": 0.9164, "step": 19872 }, { "epoch": 16.492116182572612, "grad_norm": 39.99205780029297, "learning_rate": 1.3407136929460582e-05, "loss": 0.9431, "step": 19873 }, { "epoch": 16.492946058091285, "grad_norm": 36.41365432739258, "learning_rate": 1.3406804979253114e-05, "loss": 0.5562, "step": 19874 }, { "epoch": 16.49377593360996, "grad_norm": 38.83356475830078, "learning_rate": 1.3406473029045643e-05, "loss": 1.2241, "step": 19875 }, { "epoch": 16.49460580912863, "grad_norm": 19.480953216552734, "learning_rate": 1.3406141078838175e-05, "loss": 0.4082, "step": 19876 }, { "epoch": 16.495435684647305, "grad_norm": 34.42509078979492, "learning_rate": 1.3405809128630707e-05, "loss": 0.4483, "step": 19877 }, { "epoch": 16.496265560165973, "grad_norm": 29.383398056030273, "learning_rate": 1.340547717842324e-05, "loss": 0.5904, "step": 19878 }, { "epoch": 16.49709543568465, "grad_norm": 30.809297561645508, "learning_rate": 1.3405145228215768e-05, "loss": 1.0583, "step": 19879 }, { "epoch": 16.497925311203318, "grad_norm": 37.304901123046875, "learning_rate": 1.34048132780083e-05, "loss": 0.6483, "step": 19880 }, { "epoch": 16.498755186721993, "grad_norm": 45.74934387207031, "learning_rate": 1.340448132780083e-05, "loss": 0.6895, "step": 19881 }, { "epoch": 16.499585062240662, "grad_norm": 65.24268341064453, "learning_rate": 1.3404149377593363e-05, "loss": 0.7041, "step": 19882 }, { "epoch": 16.500414937759338, "grad_norm": 22.68982696533203, "learning_rate": 1.3403817427385893e-05, "loss": 0.3257, "step": 19883 }, { "epoch": 16.501244813278007, "grad_norm": 22.01398277282715, "learning_rate": 1.3403485477178423e-05, "loss": 0.5812, "step": 19884 }, { "epoch": 16.502074688796682, "grad_norm": 32.226463317871094, "learning_rate": 1.3403153526970956e-05, "loss": 0.7378, "step": 19885 }, { "epoch": 16.50290456431535, "grad_norm": 104.66414642333984, "learning_rate": 1.3402821576763488e-05, "loss": 1.5286, "step": 19886 }, { "epoch": 16.503734439834027, "grad_norm": 68.98478698730469, "learning_rate": 1.3402489626556016e-05, "loss": 0.84, "step": 19887 }, { "epoch": 16.504564315352695, "grad_norm": 29.79265594482422, "learning_rate": 1.3402157676348549e-05, "loss": 1.2356, "step": 19888 }, { "epoch": 16.50539419087137, "grad_norm": 32.26980209350586, "learning_rate": 1.340182572614108e-05, "loss": 0.4842, "step": 19889 }, { "epoch": 16.50622406639004, "grad_norm": 69.94686126708984, "learning_rate": 1.3401493775933611e-05, "loss": 1.4856, "step": 19890 }, { "epoch": 16.507053941908715, "grad_norm": 25.883975982666016, "learning_rate": 1.3401161825726141e-05, "loss": 0.4287, "step": 19891 }, { "epoch": 16.507883817427384, "grad_norm": 61.548858642578125, "learning_rate": 1.3400829875518674e-05, "loss": 0.8842, "step": 19892 }, { "epoch": 16.50871369294606, "grad_norm": 58.29625701904297, "learning_rate": 1.3400497925311204e-05, "loss": 0.3779, "step": 19893 }, { "epoch": 16.50954356846473, "grad_norm": 20.62819480895996, "learning_rate": 1.3400165975103736e-05, "loss": 0.3298, "step": 19894 }, { "epoch": 16.510373443983404, "grad_norm": 40.11358642578125, "learning_rate": 1.3399834024896268e-05, "loss": 1.1267, "step": 19895 }, { "epoch": 16.511203319502073, "grad_norm": 27.745075225830078, "learning_rate": 1.3399502074688797e-05, "loss": 0.5341, "step": 19896 }, { "epoch": 16.51203319502075, "grad_norm": 48.09120178222656, "learning_rate": 1.3399170124481329e-05, "loss": 1.1234, "step": 19897 }, { "epoch": 16.512863070539417, "grad_norm": 52.455501556396484, "learning_rate": 1.3398838174273861e-05, "loss": 1.0372, "step": 19898 }, { "epoch": 16.513692946058093, "grad_norm": 29.420207977294922, "learning_rate": 1.3398506224066392e-05, "loss": 0.7041, "step": 19899 }, { "epoch": 16.51452282157676, "grad_norm": 36.5825080871582, "learning_rate": 1.3398174273858922e-05, "loss": 0.5576, "step": 19900 }, { "epoch": 16.515352697095437, "grad_norm": 34.31532287597656, "learning_rate": 1.3397842323651452e-05, "loss": 0.9328, "step": 19901 }, { "epoch": 16.51618257261411, "grad_norm": 22.55410385131836, "learning_rate": 1.3397510373443984e-05, "loss": 0.4608, "step": 19902 }, { "epoch": 16.517012448132782, "grad_norm": 49.0317268371582, "learning_rate": 1.3397178423236517e-05, "loss": 1.385, "step": 19903 }, { "epoch": 16.517842323651454, "grad_norm": 28.28879165649414, "learning_rate": 1.3396846473029045e-05, "loss": 0.6114, "step": 19904 }, { "epoch": 16.518672199170126, "grad_norm": 56.75046157836914, "learning_rate": 1.3396514522821577e-05, "loss": 0.6404, "step": 19905 }, { "epoch": 16.5195020746888, "grad_norm": 26.83633804321289, "learning_rate": 1.339618257261411e-05, "loss": 0.4895, "step": 19906 }, { "epoch": 16.52033195020747, "grad_norm": 23.032814025878906, "learning_rate": 1.3395850622406642e-05, "loss": 0.6566, "step": 19907 }, { "epoch": 16.521161825726143, "grad_norm": 36.26712417602539, "learning_rate": 1.339551867219917e-05, "loss": 0.7156, "step": 19908 }, { "epoch": 16.521991701244815, "grad_norm": 35.55184555053711, "learning_rate": 1.3395186721991702e-05, "loss": 0.9468, "step": 19909 }, { "epoch": 16.522821576763487, "grad_norm": 16.027812957763672, "learning_rate": 1.3394854771784233e-05, "loss": 0.3891, "step": 19910 }, { "epoch": 16.52365145228216, "grad_norm": 20.17902946472168, "learning_rate": 1.3394522821576765e-05, "loss": 0.465, "step": 19911 }, { "epoch": 16.52448132780083, "grad_norm": 34.901161193847656, "learning_rate": 1.3394190871369295e-05, "loss": 0.4153, "step": 19912 }, { "epoch": 16.525311203319504, "grad_norm": 12.563765525817871, "learning_rate": 1.3393858921161826e-05, "loss": 0.2821, "step": 19913 }, { "epoch": 16.526141078838176, "grad_norm": 29.7442626953125, "learning_rate": 1.3393526970954358e-05, "loss": 0.4952, "step": 19914 }, { "epoch": 16.526970954356848, "grad_norm": 77.46187591552734, "learning_rate": 1.339319502074689e-05, "loss": 0.93, "step": 19915 }, { "epoch": 16.52780082987552, "grad_norm": 78.38811492919922, "learning_rate": 1.3392863070539419e-05, "loss": 0.4452, "step": 19916 }, { "epoch": 16.528630705394193, "grad_norm": 116.41352081298828, "learning_rate": 1.3392531120331951e-05, "loss": 0.3638, "step": 19917 }, { "epoch": 16.529460580912865, "grad_norm": 37.6656379699707, "learning_rate": 1.3392199170124483e-05, "loss": 0.3466, "step": 19918 }, { "epoch": 16.530290456431537, "grad_norm": 31.041715621948242, "learning_rate": 1.3391867219917013e-05, "loss": 0.5282, "step": 19919 }, { "epoch": 16.53112033195021, "grad_norm": 33.11457061767578, "learning_rate": 1.3391535269709545e-05, "loss": 0.8093, "step": 19920 }, { "epoch": 16.53195020746888, "grad_norm": 25.479494094848633, "learning_rate": 1.3391203319502076e-05, "loss": 0.4129, "step": 19921 }, { "epoch": 16.532780082987554, "grad_norm": 38.30903625488281, "learning_rate": 1.3390871369294606e-05, "loss": 0.3749, "step": 19922 }, { "epoch": 16.533609958506226, "grad_norm": 45.77708053588867, "learning_rate": 1.3390539419087138e-05, "loss": 0.5046, "step": 19923 }, { "epoch": 16.534439834024898, "grad_norm": 33.20881652832031, "learning_rate": 1.339020746887967e-05, "loss": 0.375, "step": 19924 }, { "epoch": 16.53526970954357, "grad_norm": 70.42798614501953, "learning_rate": 1.33898755186722e-05, "loss": 1.05, "step": 19925 }, { "epoch": 16.536099585062242, "grad_norm": 76.49212646484375, "learning_rate": 1.3389543568464731e-05, "loss": 0.6394, "step": 19926 }, { "epoch": 16.536929460580915, "grad_norm": 27.51543617248535, "learning_rate": 1.3389211618257263e-05, "loss": 0.4781, "step": 19927 }, { "epoch": 16.537759336099587, "grad_norm": 42.509368896484375, "learning_rate": 1.3388879668049794e-05, "loss": 0.7778, "step": 19928 }, { "epoch": 16.53858921161826, "grad_norm": 42.45976257324219, "learning_rate": 1.3388547717842324e-05, "loss": 0.7172, "step": 19929 }, { "epoch": 16.53941908713693, "grad_norm": 30.739742279052734, "learning_rate": 1.3388215767634856e-05, "loss": 0.4184, "step": 19930 }, { "epoch": 16.540248962655603, "grad_norm": 24.770545959472656, "learning_rate": 1.3387883817427387e-05, "loss": 0.6145, "step": 19931 }, { "epoch": 16.541078838174275, "grad_norm": 30.630657196044922, "learning_rate": 1.3387551867219919e-05, "loss": 0.5175, "step": 19932 }, { "epoch": 16.541908713692948, "grad_norm": 58.74087905883789, "learning_rate": 1.3387219917012448e-05, "loss": 0.6322, "step": 19933 }, { "epoch": 16.54273858921162, "grad_norm": 52.62181854248047, "learning_rate": 1.338688796680498e-05, "loss": 0.9075, "step": 19934 }, { "epoch": 16.543568464730292, "grad_norm": 38.5825309753418, "learning_rate": 1.3386556016597512e-05, "loss": 0.3672, "step": 19935 }, { "epoch": 16.544398340248964, "grad_norm": 83.55912017822266, "learning_rate": 1.3386224066390044e-05, "loss": 0.7398, "step": 19936 }, { "epoch": 16.545228215767636, "grad_norm": 48.478336334228516, "learning_rate": 1.3385892116182573e-05, "loss": 0.8413, "step": 19937 }, { "epoch": 16.54605809128631, "grad_norm": 63.82270431518555, "learning_rate": 1.3385560165975105e-05, "loss": 0.9159, "step": 19938 }, { "epoch": 16.54688796680498, "grad_norm": 32.61478042602539, "learning_rate": 1.3385228215767637e-05, "loss": 0.6793, "step": 19939 }, { "epoch": 16.547717842323653, "grad_norm": 21.700328826904297, "learning_rate": 1.3384896265560167e-05, "loss": 0.5494, "step": 19940 }, { "epoch": 16.548547717842325, "grad_norm": 42.91531753540039, "learning_rate": 1.3384564315352698e-05, "loss": 1.1799, "step": 19941 }, { "epoch": 16.549377593360997, "grad_norm": 46.41109085083008, "learning_rate": 1.3384232365145228e-05, "loss": 0.9395, "step": 19942 }, { "epoch": 16.55020746887967, "grad_norm": 45.48531723022461, "learning_rate": 1.338390041493776e-05, "loss": 0.614, "step": 19943 }, { "epoch": 16.551037344398342, "grad_norm": 25.824209213256836, "learning_rate": 1.3383568464730292e-05, "loss": 0.324, "step": 19944 }, { "epoch": 16.551867219917014, "grad_norm": 79.08534240722656, "learning_rate": 1.3383236514522821e-05, "loss": 0.7921, "step": 19945 }, { "epoch": 16.552697095435686, "grad_norm": 22.911041259765625, "learning_rate": 1.3382904564315353e-05, "loss": 0.376, "step": 19946 }, { "epoch": 16.55352697095436, "grad_norm": 44.41580581665039, "learning_rate": 1.3382572614107885e-05, "loss": 0.8764, "step": 19947 }, { "epoch": 16.55435684647303, "grad_norm": 23.663917541503906, "learning_rate": 1.3382240663900416e-05, "loss": 0.3897, "step": 19948 }, { "epoch": 16.555186721991703, "grad_norm": 86.13882446289062, "learning_rate": 1.3381908713692948e-05, "loss": 0.6765, "step": 19949 }, { "epoch": 16.556016597510375, "grad_norm": 49.1920166015625, "learning_rate": 1.3381576763485478e-05, "loss": 0.425, "step": 19950 }, { "epoch": 16.556846473029047, "grad_norm": 27.635366439819336, "learning_rate": 1.3381244813278009e-05, "loss": 0.3725, "step": 19951 }, { "epoch": 16.55767634854772, "grad_norm": 47.9204216003418, "learning_rate": 1.338091286307054e-05, "loss": 0.9148, "step": 19952 }, { "epoch": 16.55850622406639, "grad_norm": 59.24076843261719, "learning_rate": 1.3380580912863073e-05, "loss": 1.0056, "step": 19953 }, { "epoch": 16.559336099585064, "grad_norm": 63.322200775146484, "learning_rate": 1.3380248962655602e-05, "loss": 0.8737, "step": 19954 }, { "epoch": 16.560165975103736, "grad_norm": 34.000877380371094, "learning_rate": 1.3379917012448134e-05, "loss": 0.6203, "step": 19955 }, { "epoch": 16.560995850622408, "grad_norm": 49.223045349121094, "learning_rate": 1.3379585062240666e-05, "loss": 1.6551, "step": 19956 }, { "epoch": 16.56182572614108, "grad_norm": 94.61856079101562, "learning_rate": 1.3379253112033196e-05, "loss": 0.9833, "step": 19957 }, { "epoch": 16.562655601659753, "grad_norm": 29.39551544189453, "learning_rate": 1.3378921161825727e-05, "loss": 0.7711, "step": 19958 }, { "epoch": 16.563485477178425, "grad_norm": 32.393043518066406, "learning_rate": 1.3378589211618259e-05, "loss": 0.4511, "step": 19959 }, { "epoch": 16.564315352697097, "grad_norm": 44.507896423339844, "learning_rate": 1.3378257261410789e-05, "loss": 0.537, "step": 19960 }, { "epoch": 16.56514522821577, "grad_norm": 47.12276077270508, "learning_rate": 1.3377925311203321e-05, "loss": 1.4276, "step": 19961 }, { "epoch": 16.56597510373444, "grad_norm": 18.276287078857422, "learning_rate": 1.3377593360995852e-05, "loss": 0.2822, "step": 19962 }, { "epoch": 16.566804979253114, "grad_norm": 49.96858215332031, "learning_rate": 1.3377261410788382e-05, "loss": 0.8348, "step": 19963 }, { "epoch": 16.567634854771786, "grad_norm": 65.85921478271484, "learning_rate": 1.3376929460580914e-05, "loss": 1.0585, "step": 19964 }, { "epoch": 16.568464730290458, "grad_norm": 49.34452819824219, "learning_rate": 1.3376597510373446e-05, "loss": 0.7071, "step": 19965 }, { "epoch": 16.56929460580913, "grad_norm": 27.19073486328125, "learning_rate": 1.3376265560165975e-05, "loss": 0.5426, "step": 19966 }, { "epoch": 16.570124481327802, "grad_norm": 74.62936401367188, "learning_rate": 1.3375933609958507e-05, "loss": 0.7524, "step": 19967 }, { "epoch": 16.570954356846475, "grad_norm": 42.282649993896484, "learning_rate": 1.337560165975104e-05, "loss": 0.5689, "step": 19968 }, { "epoch": 16.571784232365147, "grad_norm": 35.067298889160156, "learning_rate": 1.337526970954357e-05, "loss": 0.5911, "step": 19969 }, { "epoch": 16.57261410788382, "grad_norm": 60.679161071777344, "learning_rate": 1.33749377593361e-05, "loss": 0.7014, "step": 19970 }, { "epoch": 16.57344398340249, "grad_norm": 66.77157592773438, "learning_rate": 1.337460580912863e-05, "loss": 1.5502, "step": 19971 }, { "epoch": 16.574273858921163, "grad_norm": 47.47951889038086, "learning_rate": 1.3374273858921163e-05, "loss": 1.1684, "step": 19972 }, { "epoch": 16.575103734439836, "grad_norm": 35.23896026611328, "learning_rate": 1.3373941908713695e-05, "loss": 0.5662, "step": 19973 }, { "epoch": 16.575933609958508, "grad_norm": 21.11182403564453, "learning_rate": 1.3373609958506227e-05, "loss": 0.463, "step": 19974 }, { "epoch": 16.57676348547718, "grad_norm": 86.20349884033203, "learning_rate": 1.3373278008298755e-05, "loss": 0.7405, "step": 19975 }, { "epoch": 16.577593360995852, "grad_norm": 38.35658264160156, "learning_rate": 1.3372946058091288e-05, "loss": 0.781, "step": 19976 }, { "epoch": 16.578423236514524, "grad_norm": 19.723360061645508, "learning_rate": 1.337261410788382e-05, "loss": 0.3337, "step": 19977 }, { "epoch": 16.579253112033197, "grad_norm": 103.09727478027344, "learning_rate": 1.337228215767635e-05, "loss": 1.0758, "step": 19978 }, { "epoch": 16.58008298755187, "grad_norm": 55.761940002441406, "learning_rate": 1.337195020746888e-05, "loss": 1.3345, "step": 19979 }, { "epoch": 16.58091286307054, "grad_norm": 22.923309326171875, "learning_rate": 1.3371618257261411e-05, "loss": 0.3792, "step": 19980 }, { "epoch": 16.581742738589213, "grad_norm": 50.37618637084961, "learning_rate": 1.3371286307053943e-05, "loss": 1.5331, "step": 19981 }, { "epoch": 16.582572614107885, "grad_norm": 54.781951904296875, "learning_rate": 1.3370954356846475e-05, "loss": 1.3539, "step": 19982 }, { "epoch": 16.583402489626557, "grad_norm": 26.89964485168457, "learning_rate": 1.3370622406639004e-05, "loss": 0.3018, "step": 19983 }, { "epoch": 16.58423236514523, "grad_norm": 28.71274185180664, "learning_rate": 1.3370290456431536e-05, "loss": 0.5079, "step": 19984 }, { "epoch": 16.585062240663902, "grad_norm": 22.00162124633789, "learning_rate": 1.3369958506224068e-05, "loss": 0.3261, "step": 19985 }, { "epoch": 16.585892116182574, "grad_norm": 68.24591064453125, "learning_rate": 1.33696265560166e-05, "loss": 0.9821, "step": 19986 }, { "epoch": 16.586721991701246, "grad_norm": 39.09504318237305, "learning_rate": 1.3369294605809129e-05, "loss": 1.0951, "step": 19987 }, { "epoch": 16.58755186721992, "grad_norm": 45.40039825439453, "learning_rate": 1.3368962655601661e-05, "loss": 0.7315, "step": 19988 }, { "epoch": 16.58838174273859, "grad_norm": 31.115699768066406, "learning_rate": 1.3368630705394191e-05, "loss": 1.0577, "step": 19989 }, { "epoch": 16.589211618257263, "grad_norm": 35.42839813232422, "learning_rate": 1.3368298755186724e-05, "loss": 0.7578, "step": 19990 }, { "epoch": 16.590041493775935, "grad_norm": 63.877296447753906, "learning_rate": 1.3367966804979254e-05, "loss": 1.0846, "step": 19991 }, { "epoch": 16.590871369294607, "grad_norm": 39.78837585449219, "learning_rate": 1.3367634854771784e-05, "loss": 0.7732, "step": 19992 }, { "epoch": 16.59170124481328, "grad_norm": 36.20892333984375, "learning_rate": 1.3367302904564316e-05, "loss": 0.7145, "step": 19993 }, { "epoch": 16.59253112033195, "grad_norm": 39.88992691040039, "learning_rate": 1.3366970954356849e-05, "loss": 0.7723, "step": 19994 }, { "epoch": 16.593360995850624, "grad_norm": 16.50882911682129, "learning_rate": 1.3366639004149377e-05, "loss": 0.3584, "step": 19995 }, { "epoch": 16.594190871369296, "grad_norm": 44.19966125488281, "learning_rate": 1.336630705394191e-05, "loss": 0.7643, "step": 19996 }, { "epoch": 16.59502074688797, "grad_norm": 75.08779907226562, "learning_rate": 1.3365975103734442e-05, "loss": 0.9902, "step": 19997 }, { "epoch": 16.59585062240664, "grad_norm": 27.525362014770508, "learning_rate": 1.3365643153526972e-05, "loss": 0.6668, "step": 19998 }, { "epoch": 16.596680497925313, "grad_norm": 67.96011352539062, "learning_rate": 1.3365311203319504e-05, "loss": 1.6009, "step": 19999 }, { "epoch": 16.597510373443985, "grad_norm": 40.511680603027344, "learning_rate": 1.3364979253112034e-05, "loss": 1.0228, "step": 20000 }, { "epoch": 16.598340248962657, "grad_norm": 57.24261474609375, "learning_rate": 1.3364647302904565e-05, "loss": 0.3841, "step": 20001 }, { "epoch": 16.59917012448133, "grad_norm": 20.375537872314453, "learning_rate": 1.3364315352697097e-05, "loss": 0.6409, "step": 20002 }, { "epoch": 16.6, "grad_norm": 23.62830352783203, "learning_rate": 1.3363983402489629e-05, "loss": 0.4288, "step": 20003 }, { "epoch": 16.600829875518674, "grad_norm": 22.610618591308594, "learning_rate": 1.3363651452282158e-05, "loss": 0.368, "step": 20004 }, { "epoch": 16.601659751037346, "grad_norm": 65.74935150146484, "learning_rate": 1.336331950207469e-05, "loss": 1.3156, "step": 20005 }, { "epoch": 16.602489626556018, "grad_norm": 80.50880432128906, "learning_rate": 1.3362987551867222e-05, "loss": 1.1269, "step": 20006 }, { "epoch": 16.60331950207469, "grad_norm": 43.8286018371582, "learning_rate": 1.3362655601659752e-05, "loss": 0.5474, "step": 20007 }, { "epoch": 16.604149377593362, "grad_norm": 58.41742706298828, "learning_rate": 1.3362323651452283e-05, "loss": 0.7491, "step": 20008 }, { "epoch": 16.604979253112035, "grad_norm": 37.35915756225586, "learning_rate": 1.3361991701244815e-05, "loss": 0.6742, "step": 20009 }, { "epoch": 16.605809128630707, "grad_norm": 62.848533630371094, "learning_rate": 1.3361659751037345e-05, "loss": 0.9142, "step": 20010 }, { "epoch": 16.60663900414938, "grad_norm": 35.75340270996094, "learning_rate": 1.3361327800829877e-05, "loss": 0.7308, "step": 20011 }, { "epoch": 16.60746887966805, "grad_norm": 27.61753273010254, "learning_rate": 1.3360995850622406e-05, "loss": 0.5458, "step": 20012 }, { "epoch": 16.608298755186723, "grad_norm": 37.17820739746094, "learning_rate": 1.3360663900414938e-05, "loss": 0.519, "step": 20013 }, { "epoch": 16.609128630705396, "grad_norm": 16.356842041015625, "learning_rate": 1.336033195020747e-05, "loss": 0.4562, "step": 20014 }, { "epoch": 16.609958506224068, "grad_norm": 57.30643844604492, "learning_rate": 1.3360000000000003e-05, "loss": 0.7469, "step": 20015 }, { "epoch": 16.61078838174274, "grad_norm": 33.08942794799805, "learning_rate": 1.3359668049792531e-05, "loss": 0.7698, "step": 20016 }, { "epoch": 16.611618257261412, "grad_norm": 62.63901901245117, "learning_rate": 1.3359336099585063e-05, "loss": 0.8398, "step": 20017 }, { "epoch": 16.612448132780084, "grad_norm": 32.229286193847656, "learning_rate": 1.3359004149377594e-05, "loss": 0.9507, "step": 20018 }, { "epoch": 16.613278008298757, "grad_norm": 90.09099578857422, "learning_rate": 1.3358672199170126e-05, "loss": 0.9361, "step": 20019 }, { "epoch": 16.61410788381743, "grad_norm": 49.875694274902344, "learning_rate": 1.3358340248962656e-05, "loss": 0.6661, "step": 20020 }, { "epoch": 16.6149377593361, "grad_norm": 18.7353572845459, "learning_rate": 1.3358008298755187e-05, "loss": 0.3498, "step": 20021 }, { "epoch": 16.615767634854773, "grad_norm": 30.408388137817383, "learning_rate": 1.3357676348547719e-05, "loss": 0.8601, "step": 20022 }, { "epoch": 16.616597510373445, "grad_norm": 201.93075561523438, "learning_rate": 1.3357344398340251e-05, "loss": 1.1966, "step": 20023 }, { "epoch": 16.617427385892118, "grad_norm": 25.47352409362793, "learning_rate": 1.335701244813278e-05, "loss": 0.3181, "step": 20024 }, { "epoch": 16.61825726141079, "grad_norm": 44.325340270996094, "learning_rate": 1.3356680497925312e-05, "loss": 0.5971, "step": 20025 }, { "epoch": 16.619087136929462, "grad_norm": 17.87966537475586, "learning_rate": 1.3356348547717844e-05, "loss": 0.4562, "step": 20026 }, { "epoch": 16.619917012448134, "grad_norm": 73.74443817138672, "learning_rate": 1.3356016597510374e-05, "loss": 1.0795, "step": 20027 }, { "epoch": 16.620746887966806, "grad_norm": 69.7632827758789, "learning_rate": 1.3355684647302906e-05, "loss": 1.2174, "step": 20028 }, { "epoch": 16.62157676348548, "grad_norm": 56.19786834716797, "learning_rate": 1.3355352697095437e-05, "loss": 1.1237, "step": 20029 }, { "epoch": 16.62240663900415, "grad_norm": 24.21681785583496, "learning_rate": 1.3355020746887967e-05, "loss": 0.7402, "step": 20030 }, { "epoch": 16.623236514522823, "grad_norm": 26.510446548461914, "learning_rate": 1.33546887966805e-05, "loss": 0.6172, "step": 20031 }, { "epoch": 16.624066390041495, "grad_norm": 43.63494873046875, "learning_rate": 1.3354356846473031e-05, "loss": 0.9594, "step": 20032 }, { "epoch": 16.624896265560167, "grad_norm": 34.377647399902344, "learning_rate": 1.335402489626556e-05, "loss": 1.0809, "step": 20033 }, { "epoch": 16.62572614107884, "grad_norm": 22.827789306640625, "learning_rate": 1.3353692946058092e-05, "loss": 0.3246, "step": 20034 }, { "epoch": 16.62655601659751, "grad_norm": 29.229610443115234, "learning_rate": 1.3353360995850624e-05, "loss": 1.2736, "step": 20035 }, { "epoch": 16.627385892116184, "grad_norm": 88.05204010009766, "learning_rate": 1.3353029045643155e-05, "loss": 1.202, "step": 20036 }, { "epoch": 16.628215767634856, "grad_norm": 55.867977142333984, "learning_rate": 1.3352697095435685e-05, "loss": 1.3122, "step": 20037 }, { "epoch": 16.62904564315353, "grad_norm": 10.406760215759277, "learning_rate": 1.3352365145228217e-05, "loss": 0.2692, "step": 20038 }, { "epoch": 16.6298755186722, "grad_norm": 62.12839889526367, "learning_rate": 1.3352033195020748e-05, "loss": 0.9356, "step": 20039 }, { "epoch": 16.630705394190873, "grad_norm": 29.659347534179688, "learning_rate": 1.335170124481328e-05, "loss": 0.8651, "step": 20040 }, { "epoch": 16.631535269709545, "grad_norm": 18.346141815185547, "learning_rate": 1.3351369294605809e-05, "loss": 0.3183, "step": 20041 }, { "epoch": 16.632365145228217, "grad_norm": 89.22234344482422, "learning_rate": 1.335103734439834e-05, "loss": 0.9761, "step": 20042 }, { "epoch": 16.63319502074689, "grad_norm": 61.567264556884766, "learning_rate": 1.3350705394190873e-05, "loss": 0.5783, "step": 20043 }, { "epoch": 16.63402489626556, "grad_norm": 71.4827651977539, "learning_rate": 1.3350373443983405e-05, "loss": 0.8849, "step": 20044 }, { "epoch": 16.634854771784234, "grad_norm": 34.997337341308594, "learning_rate": 1.3350041493775934e-05, "loss": 0.6134, "step": 20045 }, { "epoch": 16.635684647302906, "grad_norm": 34.99873352050781, "learning_rate": 1.3349709543568466e-05, "loss": 0.4086, "step": 20046 }, { "epoch": 16.636514522821578, "grad_norm": 71.46434783935547, "learning_rate": 1.3349377593360998e-05, "loss": 0.4225, "step": 20047 }, { "epoch": 16.63734439834025, "grad_norm": 25.807666778564453, "learning_rate": 1.3349045643153528e-05, "loss": 0.4395, "step": 20048 }, { "epoch": 16.638174273858922, "grad_norm": 23.932695388793945, "learning_rate": 1.3348713692946059e-05, "loss": 0.3296, "step": 20049 }, { "epoch": 16.639004149377595, "grad_norm": 35.01417922973633, "learning_rate": 1.3348381742738589e-05, "loss": 0.3852, "step": 20050 }, { "epoch": 16.639834024896267, "grad_norm": 47.308494567871094, "learning_rate": 1.3348049792531121e-05, "loss": 0.5792, "step": 20051 }, { "epoch": 16.64066390041494, "grad_norm": 40.842105865478516, "learning_rate": 1.3347717842323653e-05, "loss": 0.4701, "step": 20052 }, { "epoch": 16.64149377593361, "grad_norm": 59.330406188964844, "learning_rate": 1.3347385892116185e-05, "loss": 0.5504, "step": 20053 }, { "epoch": 16.642323651452283, "grad_norm": 69.7313003540039, "learning_rate": 1.3347053941908714e-05, "loss": 0.4867, "step": 20054 }, { "epoch": 16.643153526970956, "grad_norm": 22.6375732421875, "learning_rate": 1.3346721991701246e-05, "loss": 0.5085, "step": 20055 }, { "epoch": 16.643983402489628, "grad_norm": 15.524096488952637, "learning_rate": 1.3346390041493778e-05, "loss": 0.3412, "step": 20056 }, { "epoch": 16.6448132780083, "grad_norm": 35.19900131225586, "learning_rate": 1.3346058091286309e-05, "loss": 0.5396, "step": 20057 }, { "epoch": 16.645643153526972, "grad_norm": 35.2786750793457, "learning_rate": 1.3345726141078839e-05, "loss": 0.669, "step": 20058 }, { "epoch": 16.646473029045644, "grad_norm": 97.03438568115234, "learning_rate": 1.334539419087137e-05, "loss": 2.0569, "step": 20059 }, { "epoch": 16.647302904564317, "grad_norm": 83.00540161132812, "learning_rate": 1.3345062240663902e-05, "loss": 0.4194, "step": 20060 }, { "epoch": 16.64813278008299, "grad_norm": 43.72213363647461, "learning_rate": 1.3344730290456434e-05, "loss": 0.6235, "step": 20061 }, { "epoch": 16.64896265560166, "grad_norm": 58.3004035949707, "learning_rate": 1.3344398340248962e-05, "loss": 0.9387, "step": 20062 }, { "epoch": 16.649792531120333, "grad_norm": 37.649497985839844, "learning_rate": 1.3344066390041495e-05, "loss": 0.8642, "step": 20063 }, { "epoch": 16.650622406639005, "grad_norm": 75.3789291381836, "learning_rate": 1.3343734439834027e-05, "loss": 0.8812, "step": 20064 }, { "epoch": 16.651452282157678, "grad_norm": 62.81369400024414, "learning_rate": 1.3343402489626557e-05, "loss": 1.1762, "step": 20065 }, { "epoch": 16.65228215767635, "grad_norm": 69.4964828491211, "learning_rate": 1.3343070539419088e-05, "loss": 0.5364, "step": 20066 }, { "epoch": 16.653112033195022, "grad_norm": 47.107723236083984, "learning_rate": 1.334273858921162e-05, "loss": 0.4925, "step": 20067 }, { "epoch": 16.653941908713694, "grad_norm": 38.30704879760742, "learning_rate": 1.334240663900415e-05, "loss": 0.8099, "step": 20068 }, { "epoch": 16.654771784232366, "grad_norm": 22.235427856445312, "learning_rate": 1.3342074688796682e-05, "loss": 0.7217, "step": 20069 }, { "epoch": 16.65560165975104, "grad_norm": 69.16236114501953, "learning_rate": 1.3341742738589213e-05, "loss": 0.5629, "step": 20070 }, { "epoch": 16.65643153526971, "grad_norm": 82.41657257080078, "learning_rate": 1.3341410788381743e-05, "loss": 0.6034, "step": 20071 }, { "epoch": 16.657261410788383, "grad_norm": 26.92138671875, "learning_rate": 1.3341078838174275e-05, "loss": 0.3682, "step": 20072 }, { "epoch": 16.658091286307055, "grad_norm": 32.95354461669922, "learning_rate": 1.3340746887966807e-05, "loss": 0.4734, "step": 20073 }, { "epoch": 16.658921161825727, "grad_norm": 63.75830841064453, "learning_rate": 1.3340414937759336e-05, "loss": 0.53, "step": 20074 }, { "epoch": 16.6597510373444, "grad_norm": 49.3989372253418, "learning_rate": 1.3340082987551868e-05, "loss": 0.8298, "step": 20075 }, { "epoch": 16.66058091286307, "grad_norm": 33.068992614746094, "learning_rate": 1.33397510373444e-05, "loss": 0.5087, "step": 20076 }, { "epoch": 16.661410788381744, "grad_norm": 101.49363708496094, "learning_rate": 1.333941908713693e-05, "loss": 0.7296, "step": 20077 }, { "epoch": 16.662240663900416, "grad_norm": 48.04365921020508, "learning_rate": 1.3339087136929463e-05, "loss": 0.5254, "step": 20078 }, { "epoch": 16.66307053941909, "grad_norm": 53.21076202392578, "learning_rate": 1.3338755186721993e-05, "loss": 0.6971, "step": 20079 }, { "epoch": 16.66390041493776, "grad_norm": 43.423397064208984, "learning_rate": 1.3338423236514523e-05, "loss": 0.9904, "step": 20080 }, { "epoch": 16.664730290456433, "grad_norm": 54.788639068603516, "learning_rate": 1.3338091286307056e-05, "loss": 1.0941, "step": 20081 }, { "epoch": 16.665560165975105, "grad_norm": 41.57114791870117, "learning_rate": 1.3337759336099588e-05, "loss": 1.0671, "step": 20082 }, { "epoch": 16.666390041493777, "grad_norm": 60.7925910949707, "learning_rate": 1.3337427385892116e-05, "loss": 0.658, "step": 20083 }, { "epoch": 16.66721991701245, "grad_norm": 11.33795166015625, "learning_rate": 1.3337095435684648e-05, "loss": 0.4372, "step": 20084 }, { "epoch": 16.66804979253112, "grad_norm": 48.01856231689453, "learning_rate": 1.333676348547718e-05, "loss": 0.9565, "step": 20085 }, { "epoch": 16.668879668049794, "grad_norm": 34.7841911315918, "learning_rate": 1.3336431535269711e-05, "loss": 1.1467, "step": 20086 }, { "epoch": 16.669709543568466, "grad_norm": 47.3765869140625, "learning_rate": 1.3336099585062241e-05, "loss": 1.0086, "step": 20087 }, { "epoch": 16.670539419087138, "grad_norm": 50.46836853027344, "learning_rate": 1.3335767634854772e-05, "loss": 0.8498, "step": 20088 }, { "epoch": 16.67136929460581, "grad_norm": 46.92559051513672, "learning_rate": 1.3335435684647304e-05, "loss": 0.7461, "step": 20089 }, { "epoch": 16.672199170124482, "grad_norm": 42.53823471069336, "learning_rate": 1.3335103734439836e-05, "loss": 0.6513, "step": 20090 }, { "epoch": 16.673029045643155, "grad_norm": 29.645227432250977, "learning_rate": 1.3334771784232365e-05, "loss": 0.2976, "step": 20091 }, { "epoch": 16.673858921161827, "grad_norm": 50.537254333496094, "learning_rate": 1.3334439834024897e-05, "loss": 0.8947, "step": 20092 }, { "epoch": 16.6746887966805, "grad_norm": 44.354217529296875, "learning_rate": 1.3334107883817429e-05, "loss": 0.6385, "step": 20093 }, { "epoch": 16.67551867219917, "grad_norm": 20.096969604492188, "learning_rate": 1.3333775933609961e-05, "loss": 0.4356, "step": 20094 }, { "epoch": 16.676348547717843, "grad_norm": 28.930044174194336, "learning_rate": 1.333344398340249e-05, "loss": 0.852, "step": 20095 }, { "epoch": 16.677178423236516, "grad_norm": 21.402156829833984, "learning_rate": 1.3333112033195022e-05, "loss": 0.5584, "step": 20096 }, { "epoch": 16.678008298755188, "grad_norm": 24.9105224609375, "learning_rate": 1.3332780082987552e-05, "loss": 0.7084, "step": 20097 }, { "epoch": 16.67883817427386, "grad_norm": 26.855655670166016, "learning_rate": 1.3332448132780084e-05, "loss": 0.6401, "step": 20098 }, { "epoch": 16.679668049792532, "grad_norm": 40.641143798828125, "learning_rate": 1.3332116182572615e-05, "loss": 0.9127, "step": 20099 }, { "epoch": 16.680497925311204, "grad_norm": 60.066802978515625, "learning_rate": 1.3331784232365145e-05, "loss": 1.1905, "step": 20100 }, { "epoch": 16.681327800829877, "grad_norm": 120.78718566894531, "learning_rate": 1.3331452282157677e-05, "loss": 1.0649, "step": 20101 }, { "epoch": 16.68215767634855, "grad_norm": 18.062538146972656, "learning_rate": 1.333112033195021e-05, "loss": 0.3383, "step": 20102 }, { "epoch": 16.68298755186722, "grad_norm": 31.689016342163086, "learning_rate": 1.3330788381742738e-05, "loss": 0.4728, "step": 20103 }, { "epoch": 16.683817427385893, "grad_norm": 38.8204231262207, "learning_rate": 1.333045643153527e-05, "loss": 0.5851, "step": 20104 }, { "epoch": 16.684647302904565, "grad_norm": 36.66726303100586, "learning_rate": 1.3330124481327802e-05, "loss": 1.0546, "step": 20105 }, { "epoch": 16.685477178423238, "grad_norm": 26.421236038208008, "learning_rate": 1.3329792531120333e-05, "loss": 0.4355, "step": 20106 }, { "epoch": 16.68630705394191, "grad_norm": 56.67576217651367, "learning_rate": 1.3329460580912865e-05, "loss": 1.0069, "step": 20107 }, { "epoch": 16.687136929460582, "grad_norm": 27.011333465576172, "learning_rate": 1.3329128630705395e-05, "loss": 0.6096, "step": 20108 }, { "epoch": 16.687966804979254, "grad_norm": 35.81233596801758, "learning_rate": 1.3328796680497926e-05, "loss": 0.5431, "step": 20109 }, { "epoch": 16.688796680497926, "grad_norm": 76.89146423339844, "learning_rate": 1.3328464730290458e-05, "loss": 0.925, "step": 20110 }, { "epoch": 16.6896265560166, "grad_norm": 28.406314849853516, "learning_rate": 1.332813278008299e-05, "loss": 0.5593, "step": 20111 }, { "epoch": 16.69045643153527, "grad_norm": 46.67021942138672, "learning_rate": 1.3327800829875519e-05, "loss": 0.8363, "step": 20112 }, { "epoch": 16.691286307053943, "grad_norm": 29.763019561767578, "learning_rate": 1.332746887966805e-05, "loss": 0.445, "step": 20113 }, { "epoch": 16.692116182572615, "grad_norm": 20.769123077392578, "learning_rate": 1.3327136929460583e-05, "loss": 0.3845, "step": 20114 }, { "epoch": 16.692946058091287, "grad_norm": 39.03623962402344, "learning_rate": 1.3326804979253113e-05, "loss": 0.759, "step": 20115 }, { "epoch": 16.69377593360996, "grad_norm": 55.66388702392578, "learning_rate": 1.3326473029045644e-05, "loss": 1.0156, "step": 20116 }, { "epoch": 16.694605809128632, "grad_norm": 16.958450317382812, "learning_rate": 1.3326141078838176e-05, "loss": 0.3216, "step": 20117 }, { "epoch": 16.695435684647304, "grad_norm": 42.0082893371582, "learning_rate": 1.3325809128630706e-05, "loss": 0.727, "step": 20118 }, { "epoch": 16.696265560165976, "grad_norm": 53.27704620361328, "learning_rate": 1.3325477178423238e-05, "loss": 0.5959, "step": 20119 }, { "epoch": 16.69709543568465, "grad_norm": 18.513837814331055, "learning_rate": 1.3325145228215767e-05, "loss": 0.359, "step": 20120 }, { "epoch": 16.69792531120332, "grad_norm": 11.609146118164062, "learning_rate": 1.33248132780083e-05, "loss": 0.2463, "step": 20121 }, { "epoch": 16.698755186721993, "grad_norm": 143.62049865722656, "learning_rate": 1.3324481327800831e-05, "loss": 1.414, "step": 20122 }, { "epoch": 16.699585062240665, "grad_norm": 21.424949645996094, "learning_rate": 1.3324149377593363e-05, "loss": 0.4802, "step": 20123 }, { "epoch": 16.700414937759337, "grad_norm": 28.881210327148438, "learning_rate": 1.3323817427385892e-05, "loss": 0.6033, "step": 20124 }, { "epoch": 16.70124481327801, "grad_norm": 59.40174102783203, "learning_rate": 1.3323485477178424e-05, "loss": 0.5567, "step": 20125 }, { "epoch": 16.70207468879668, "grad_norm": 71.67731475830078, "learning_rate": 1.3323153526970956e-05, "loss": 0.7851, "step": 20126 }, { "epoch": 16.702904564315354, "grad_norm": 88.1539535522461, "learning_rate": 1.3322821576763487e-05, "loss": 0.9455, "step": 20127 }, { "epoch": 16.703734439834026, "grad_norm": 55.305240631103516, "learning_rate": 1.3322489626556017e-05, "loss": 1.2097, "step": 20128 }, { "epoch": 16.704564315352698, "grad_norm": 65.77651977539062, "learning_rate": 1.3322157676348548e-05, "loss": 1.127, "step": 20129 }, { "epoch": 16.70539419087137, "grad_norm": 24.650318145751953, "learning_rate": 1.332182572614108e-05, "loss": 0.5314, "step": 20130 }, { "epoch": 16.706224066390043, "grad_norm": 32.537540435791016, "learning_rate": 1.3321493775933612e-05, "loss": 0.6988, "step": 20131 }, { "epoch": 16.707053941908715, "grad_norm": 36.287322998046875, "learning_rate": 1.3321161825726144e-05, "loss": 1.1639, "step": 20132 }, { "epoch": 16.707883817427387, "grad_norm": 61.83639907836914, "learning_rate": 1.3320829875518673e-05, "loss": 0.9868, "step": 20133 }, { "epoch": 16.70871369294606, "grad_norm": 42.22099304199219, "learning_rate": 1.3320497925311205e-05, "loss": 0.7818, "step": 20134 }, { "epoch": 16.70954356846473, "grad_norm": 36.70425796508789, "learning_rate": 1.3320165975103735e-05, "loss": 0.3025, "step": 20135 }, { "epoch": 16.710373443983404, "grad_norm": 50.005943298339844, "learning_rate": 1.3319834024896267e-05, "loss": 0.8117, "step": 20136 }, { "epoch": 16.711203319502076, "grad_norm": 21.675521850585938, "learning_rate": 1.3319502074688798e-05, "loss": 0.4316, "step": 20137 }, { "epoch": 16.712033195020748, "grad_norm": 35.53024673461914, "learning_rate": 1.3319170124481328e-05, "loss": 0.6624, "step": 20138 }, { "epoch": 16.71286307053942, "grad_norm": 31.12385368347168, "learning_rate": 1.331883817427386e-05, "loss": 0.5298, "step": 20139 }, { "epoch": 16.713692946058092, "grad_norm": 40.254676818847656, "learning_rate": 1.3318506224066392e-05, "loss": 0.8012, "step": 20140 }, { "epoch": 16.714522821576764, "grad_norm": 67.31124877929688, "learning_rate": 1.3318174273858921e-05, "loss": 0.829, "step": 20141 }, { "epoch": 16.715352697095437, "grad_norm": 43.88144302368164, "learning_rate": 1.3317842323651453e-05, "loss": 1.0013, "step": 20142 }, { "epoch": 16.71618257261411, "grad_norm": 27.20998764038086, "learning_rate": 1.3317510373443985e-05, "loss": 0.5468, "step": 20143 }, { "epoch": 16.71701244813278, "grad_norm": 34.36540222167969, "learning_rate": 1.3317178423236516e-05, "loss": 0.4429, "step": 20144 }, { "epoch": 16.717842323651453, "grad_norm": 17.17654037475586, "learning_rate": 1.3316846473029046e-05, "loss": 0.4319, "step": 20145 }, { "epoch": 16.718672199170125, "grad_norm": 40.08921813964844, "learning_rate": 1.3316514522821578e-05, "loss": 0.6651, "step": 20146 }, { "epoch": 16.719502074688798, "grad_norm": 30.426668167114258, "learning_rate": 1.3316182572614109e-05, "loss": 0.5464, "step": 20147 }, { "epoch": 16.72033195020747, "grad_norm": 69.02722930908203, "learning_rate": 1.331585062240664e-05, "loss": 0.7479, "step": 20148 }, { "epoch": 16.721161825726142, "grad_norm": 24.42546272277832, "learning_rate": 1.331551867219917e-05, "loss": 0.4115, "step": 20149 }, { "epoch": 16.721991701244814, "grad_norm": 138.74420166015625, "learning_rate": 1.3315186721991702e-05, "loss": 0.5456, "step": 20150 }, { "epoch": 16.722821576763486, "grad_norm": 31.791780471801758, "learning_rate": 1.3314854771784234e-05, "loss": 0.4206, "step": 20151 }, { "epoch": 16.72365145228216, "grad_norm": 44.30790328979492, "learning_rate": 1.3314522821576766e-05, "loss": 0.7067, "step": 20152 }, { "epoch": 16.72448132780083, "grad_norm": 63.40266036987305, "learning_rate": 1.3314190871369294e-05, "loss": 0.5311, "step": 20153 }, { "epoch": 16.725311203319503, "grad_norm": 26.38679313659668, "learning_rate": 1.3313858921161827e-05, "loss": 0.4384, "step": 20154 }, { "epoch": 16.726141078838175, "grad_norm": 29.848979949951172, "learning_rate": 1.3313526970954359e-05, "loss": 0.3654, "step": 20155 }, { "epoch": 16.726970954356847, "grad_norm": 29.004192352294922, "learning_rate": 1.3313195020746889e-05, "loss": 0.8411, "step": 20156 }, { "epoch": 16.72780082987552, "grad_norm": 19.680267333984375, "learning_rate": 1.3312863070539421e-05, "loss": 0.3565, "step": 20157 }, { "epoch": 16.728630705394192, "grad_norm": 17.50577735900879, "learning_rate": 1.331253112033195e-05, "loss": 0.3752, "step": 20158 }, { "epoch": 16.729460580912864, "grad_norm": 28.09739875793457, "learning_rate": 1.3312199170124482e-05, "loss": 0.5063, "step": 20159 }, { "epoch": 16.730290456431536, "grad_norm": 31.29257583618164, "learning_rate": 1.3311867219917014e-05, "loss": 0.4789, "step": 20160 }, { "epoch": 16.73112033195021, "grad_norm": 82.34453582763672, "learning_rate": 1.3311535269709546e-05, "loss": 0.8102, "step": 20161 }, { "epoch": 16.73195020746888, "grad_norm": 90.98601531982422, "learning_rate": 1.3311203319502075e-05, "loss": 0.6018, "step": 20162 }, { "epoch": 16.732780082987553, "grad_norm": 64.6052474975586, "learning_rate": 1.3310871369294607e-05, "loss": 0.7381, "step": 20163 }, { "epoch": 16.733609958506225, "grad_norm": 22.949115753173828, "learning_rate": 1.331053941908714e-05, "loss": 0.6074, "step": 20164 }, { "epoch": 16.734439834024897, "grad_norm": 34.29581832885742, "learning_rate": 1.331020746887967e-05, "loss": 0.7725, "step": 20165 }, { "epoch": 16.73526970954357, "grad_norm": 43.221744537353516, "learning_rate": 1.33098755186722e-05, "loss": 0.5056, "step": 20166 }, { "epoch": 16.73609958506224, "grad_norm": 21.201805114746094, "learning_rate": 1.330954356846473e-05, "loss": 0.4665, "step": 20167 }, { "epoch": 16.736929460580914, "grad_norm": 58.8960075378418, "learning_rate": 1.3309211618257263e-05, "loss": 0.3975, "step": 20168 }, { "epoch": 16.737759336099586, "grad_norm": 44.95729446411133, "learning_rate": 1.3308879668049795e-05, "loss": 1.0964, "step": 20169 }, { "epoch": 16.738589211618258, "grad_norm": 48.45002365112305, "learning_rate": 1.3308547717842323e-05, "loss": 1.407, "step": 20170 }, { "epoch": 16.73941908713693, "grad_norm": 24.994718551635742, "learning_rate": 1.3308215767634855e-05, "loss": 0.6, "step": 20171 }, { "epoch": 16.740248962655603, "grad_norm": 33.07323455810547, "learning_rate": 1.3307883817427388e-05, "loss": 0.6422, "step": 20172 }, { "epoch": 16.741078838174275, "grad_norm": 48.564170837402344, "learning_rate": 1.330755186721992e-05, "loss": 0.6301, "step": 20173 }, { "epoch": 16.741908713692947, "grad_norm": 19.81848907470703, "learning_rate": 1.3307219917012448e-05, "loss": 0.3951, "step": 20174 }, { "epoch": 16.74273858921162, "grad_norm": 63.37447738647461, "learning_rate": 1.330688796680498e-05, "loss": 0.8993, "step": 20175 }, { "epoch": 16.74356846473029, "grad_norm": 37.942481994628906, "learning_rate": 1.3306556016597511e-05, "loss": 0.5215, "step": 20176 }, { "epoch": 16.744398340248964, "grad_norm": 18.206565856933594, "learning_rate": 1.3306224066390043e-05, "loss": 0.3957, "step": 20177 }, { "epoch": 16.745228215767636, "grad_norm": 42.000125885009766, "learning_rate": 1.3305892116182573e-05, "loss": 0.9805, "step": 20178 }, { "epoch": 16.746058091286308, "grad_norm": 69.83853149414062, "learning_rate": 1.3305560165975104e-05, "loss": 0.7152, "step": 20179 }, { "epoch": 16.74688796680498, "grad_norm": 29.241046905517578, "learning_rate": 1.3305228215767636e-05, "loss": 0.4819, "step": 20180 }, { "epoch": 16.747717842323652, "grad_norm": 81.62631225585938, "learning_rate": 1.3304896265560168e-05, "loss": 0.6935, "step": 20181 }, { "epoch": 16.748547717842325, "grad_norm": 29.333877563476562, "learning_rate": 1.3304564315352697e-05, "loss": 0.55, "step": 20182 }, { "epoch": 16.749377593360997, "grad_norm": 20.76583480834961, "learning_rate": 1.3304232365145229e-05, "loss": 0.6259, "step": 20183 }, { "epoch": 16.75020746887967, "grad_norm": 48.74827194213867, "learning_rate": 1.3303900414937761e-05, "loss": 0.5148, "step": 20184 }, { "epoch": 16.75103734439834, "grad_norm": 39.61945343017578, "learning_rate": 1.3303568464730291e-05, "loss": 0.6461, "step": 20185 }, { "epoch": 16.751867219917013, "grad_norm": 51.50303268432617, "learning_rate": 1.3303236514522824e-05, "loss": 0.7773, "step": 20186 }, { "epoch": 16.752697095435686, "grad_norm": 37.91788101196289, "learning_rate": 1.3302904564315354e-05, "loss": 0.5119, "step": 20187 }, { "epoch": 16.753526970954358, "grad_norm": 32.61070251464844, "learning_rate": 1.3302572614107884e-05, "loss": 0.9594, "step": 20188 }, { "epoch": 16.75435684647303, "grad_norm": 44.78034973144531, "learning_rate": 1.3302240663900416e-05, "loss": 1.0863, "step": 20189 }, { "epoch": 16.755186721991702, "grad_norm": 30.67465591430664, "learning_rate": 1.3301908713692949e-05, "loss": 0.4835, "step": 20190 }, { "epoch": 16.756016597510374, "grad_norm": 69.80274200439453, "learning_rate": 1.3301576763485477e-05, "loss": 1.0459, "step": 20191 }, { "epoch": 16.756846473029047, "grad_norm": 49.08628463745117, "learning_rate": 1.330124481327801e-05, "loss": 0.516, "step": 20192 }, { "epoch": 16.75767634854772, "grad_norm": 27.929052352905273, "learning_rate": 1.3300912863070542e-05, "loss": 0.5881, "step": 20193 }, { "epoch": 16.75850622406639, "grad_norm": 55.691505432128906, "learning_rate": 1.3300580912863072e-05, "loss": 1.0306, "step": 20194 }, { "epoch": 16.759336099585063, "grad_norm": 34.86140823364258, "learning_rate": 1.3300248962655602e-05, "loss": 0.7799, "step": 20195 }, { "epoch": 16.760165975103735, "grad_norm": 25.13848304748535, "learning_rate": 1.3299917012448134e-05, "loss": 0.5274, "step": 20196 }, { "epoch": 16.760995850622407, "grad_norm": 52.179805755615234, "learning_rate": 1.3299585062240665e-05, "loss": 0.6607, "step": 20197 }, { "epoch": 16.76182572614108, "grad_norm": 22.687334060668945, "learning_rate": 1.3299253112033197e-05, "loss": 0.5002, "step": 20198 }, { "epoch": 16.762655601659752, "grad_norm": 22.868555068969727, "learning_rate": 1.3298921161825726e-05, "loss": 0.8293, "step": 20199 }, { "epoch": 16.763485477178424, "grad_norm": 49.72743606567383, "learning_rate": 1.3298589211618258e-05, "loss": 0.7934, "step": 20200 }, { "epoch": 16.764315352697096, "grad_norm": 60.707130432128906, "learning_rate": 1.329825726141079e-05, "loss": 1.0492, "step": 20201 }, { "epoch": 16.76514522821577, "grad_norm": 35.244625091552734, "learning_rate": 1.3297925311203322e-05, "loss": 0.6312, "step": 20202 }, { "epoch": 16.76597510373444, "grad_norm": 28.744369506835938, "learning_rate": 1.329759336099585e-05, "loss": 0.6351, "step": 20203 }, { "epoch": 16.766804979253113, "grad_norm": 24.801034927368164, "learning_rate": 1.3297261410788383e-05, "loss": 0.3988, "step": 20204 }, { "epoch": 16.767634854771785, "grad_norm": 41.50674819946289, "learning_rate": 1.3296929460580913e-05, "loss": 1.0498, "step": 20205 }, { "epoch": 16.768464730290457, "grad_norm": 32.42502975463867, "learning_rate": 1.3296597510373445e-05, "loss": 0.8469, "step": 20206 }, { "epoch": 16.76929460580913, "grad_norm": 25.999494552612305, "learning_rate": 1.3296265560165976e-05, "loss": 0.5381, "step": 20207 }, { "epoch": 16.7701244813278, "grad_norm": 14.81570053100586, "learning_rate": 1.3295933609958506e-05, "loss": 0.4395, "step": 20208 }, { "epoch": 16.770954356846474, "grad_norm": 69.4969482421875, "learning_rate": 1.3295601659751038e-05, "loss": 0.6983, "step": 20209 }, { "epoch": 16.771784232365146, "grad_norm": 42.3823127746582, "learning_rate": 1.329526970954357e-05, "loss": 0.9473, "step": 20210 }, { "epoch": 16.77261410788382, "grad_norm": 60.64713668823242, "learning_rate": 1.3294937759336102e-05, "loss": 0.5187, "step": 20211 }, { "epoch": 16.77344398340249, "grad_norm": 36.51796340942383, "learning_rate": 1.3294605809128631e-05, "loss": 0.3957, "step": 20212 }, { "epoch": 16.774273858921163, "grad_norm": 101.25218200683594, "learning_rate": 1.3294273858921163e-05, "loss": 1.0793, "step": 20213 }, { "epoch": 16.775103734439835, "grad_norm": 102.62995147705078, "learning_rate": 1.3293941908713694e-05, "loss": 1.7132, "step": 20214 }, { "epoch": 16.775933609958507, "grad_norm": 57.73170852661133, "learning_rate": 1.3293609958506226e-05, "loss": 0.8118, "step": 20215 }, { "epoch": 16.77676348547718, "grad_norm": 34.072017669677734, "learning_rate": 1.3293278008298756e-05, "loss": 1.1696, "step": 20216 }, { "epoch": 16.77759336099585, "grad_norm": 52.86883544921875, "learning_rate": 1.3292946058091287e-05, "loss": 0.7522, "step": 20217 }, { "epoch": 16.778423236514524, "grad_norm": 46.04389190673828, "learning_rate": 1.3292614107883819e-05, "loss": 1.1297, "step": 20218 }, { "epoch": 16.779253112033196, "grad_norm": 64.52745056152344, "learning_rate": 1.3292282157676351e-05, "loss": 0.5803, "step": 20219 }, { "epoch": 16.780082987551868, "grad_norm": 35.10600280761719, "learning_rate": 1.329195020746888e-05, "loss": 0.7138, "step": 20220 }, { "epoch": 16.78091286307054, "grad_norm": 53.10883712768555, "learning_rate": 1.3291618257261412e-05, "loss": 0.6401, "step": 20221 }, { "epoch": 16.781742738589212, "grad_norm": 30.448678970336914, "learning_rate": 1.3291286307053944e-05, "loss": 0.5882, "step": 20222 }, { "epoch": 16.782572614107885, "grad_norm": 30.478904724121094, "learning_rate": 1.3290954356846474e-05, "loss": 0.4413, "step": 20223 }, { "epoch": 16.783402489626557, "grad_norm": 35.451961517333984, "learning_rate": 1.3290622406639005e-05, "loss": 0.7485, "step": 20224 }, { "epoch": 16.78423236514523, "grad_norm": 28.53379249572754, "learning_rate": 1.3290290456431537e-05, "loss": 0.7195, "step": 20225 }, { "epoch": 16.7850622406639, "grad_norm": 52.63212203979492, "learning_rate": 1.3289958506224067e-05, "loss": 0.7393, "step": 20226 }, { "epoch": 16.785892116182573, "grad_norm": 54.71962356567383, "learning_rate": 1.32896265560166e-05, "loss": 0.6164, "step": 20227 }, { "epoch": 16.786721991701246, "grad_norm": 21.44580841064453, "learning_rate": 1.3289294605809128e-05, "loss": 0.3133, "step": 20228 }, { "epoch": 16.787551867219918, "grad_norm": 51.259361267089844, "learning_rate": 1.328896265560166e-05, "loss": 0.4816, "step": 20229 }, { "epoch": 16.78838174273859, "grad_norm": 23.163101196289062, "learning_rate": 1.3288630705394192e-05, "loss": 0.438, "step": 20230 }, { "epoch": 16.789211618257262, "grad_norm": 53.16592025756836, "learning_rate": 1.3288298755186724e-05, "loss": 0.9232, "step": 20231 }, { "epoch": 16.790041493775934, "grad_norm": 27.031190872192383, "learning_rate": 1.3287966804979253e-05, "loss": 0.7389, "step": 20232 }, { "epoch": 16.790871369294607, "grad_norm": 25.35051918029785, "learning_rate": 1.3287634854771785e-05, "loss": 0.4129, "step": 20233 }, { "epoch": 16.79170124481328, "grad_norm": 39.02009963989258, "learning_rate": 1.3287302904564317e-05, "loss": 0.2752, "step": 20234 }, { "epoch": 16.79253112033195, "grad_norm": 77.03926086425781, "learning_rate": 1.3286970954356848e-05, "loss": 0.8224, "step": 20235 }, { "epoch": 16.793360995850623, "grad_norm": 30.466638565063477, "learning_rate": 1.3286639004149378e-05, "loss": 0.6231, "step": 20236 }, { "epoch": 16.794190871369295, "grad_norm": 51.027835845947266, "learning_rate": 1.3286307053941908e-05, "loss": 1.1824, "step": 20237 }, { "epoch": 16.795020746887968, "grad_norm": 27.404348373413086, "learning_rate": 1.328597510373444e-05, "loss": 0.8693, "step": 20238 }, { "epoch": 16.79585062240664, "grad_norm": 43.6989860534668, "learning_rate": 1.3285643153526973e-05, "loss": 0.5737, "step": 20239 }, { "epoch": 16.796680497925312, "grad_norm": 40.03417205810547, "learning_rate": 1.3285311203319505e-05, "loss": 0.8149, "step": 20240 }, { "epoch": 16.797510373443984, "grad_norm": 31.138471603393555, "learning_rate": 1.3284979253112034e-05, "loss": 0.7022, "step": 20241 }, { "epoch": 16.798340248962656, "grad_norm": 121.42173767089844, "learning_rate": 1.3284647302904566e-05, "loss": 1.0853, "step": 20242 }, { "epoch": 16.79917012448133, "grad_norm": 51.90686798095703, "learning_rate": 1.3284315352697098e-05, "loss": 0.6038, "step": 20243 }, { "epoch": 16.8, "grad_norm": 99.62237548828125, "learning_rate": 1.3283983402489628e-05, "loss": 1.1814, "step": 20244 }, { "epoch": 16.800829875518673, "grad_norm": 42.37559127807617, "learning_rate": 1.3283651452282159e-05, "loss": 0.9181, "step": 20245 }, { "epoch": 16.801659751037345, "grad_norm": 15.599336624145508, "learning_rate": 1.3283319502074689e-05, "loss": 0.2643, "step": 20246 }, { "epoch": 16.802489626556017, "grad_norm": 45.70414352416992, "learning_rate": 1.3282987551867221e-05, "loss": 0.5319, "step": 20247 }, { "epoch": 16.80331950207469, "grad_norm": 57.64194107055664, "learning_rate": 1.3282655601659753e-05, "loss": 0.9504, "step": 20248 }, { "epoch": 16.80414937759336, "grad_norm": 151.73033142089844, "learning_rate": 1.3282323651452282e-05, "loss": 2.1628, "step": 20249 }, { "epoch": 16.804979253112034, "grad_norm": 89.17975616455078, "learning_rate": 1.3281991701244814e-05, "loss": 1.5027, "step": 20250 }, { "epoch": 16.805809128630706, "grad_norm": 25.43655014038086, "learning_rate": 1.3281659751037346e-05, "loss": 0.5667, "step": 20251 }, { "epoch": 16.80663900414938, "grad_norm": 39.82316207885742, "learning_rate": 1.3281327800829877e-05, "loss": 0.8364, "step": 20252 }, { "epoch": 16.80746887966805, "grad_norm": 35.16030502319336, "learning_rate": 1.3280995850622407e-05, "loss": 0.7943, "step": 20253 }, { "epoch": 16.808298755186723, "grad_norm": 23.822561264038086, "learning_rate": 1.3280663900414939e-05, "loss": 0.4195, "step": 20254 }, { "epoch": 16.809128630705395, "grad_norm": 36.01682662963867, "learning_rate": 1.328033195020747e-05, "loss": 0.9899, "step": 20255 }, { "epoch": 16.809958506224067, "grad_norm": 125.50503540039062, "learning_rate": 1.3280000000000002e-05, "loss": 0.5592, "step": 20256 }, { "epoch": 16.81078838174274, "grad_norm": 26.34400177001953, "learning_rate": 1.3279668049792532e-05, "loss": 0.3632, "step": 20257 }, { "epoch": 16.81161825726141, "grad_norm": 29.834548950195312, "learning_rate": 1.3279336099585062e-05, "loss": 0.4658, "step": 20258 }, { "epoch": 16.812448132780084, "grad_norm": 31.111757278442383, "learning_rate": 1.3279004149377595e-05, "loss": 0.7996, "step": 20259 }, { "epoch": 16.813278008298756, "grad_norm": 20.83683967590332, "learning_rate": 1.3278672199170127e-05, "loss": 0.4796, "step": 20260 }, { "epoch": 16.814107883817428, "grad_norm": 16.226173400878906, "learning_rate": 1.3278340248962655e-05, "loss": 0.3444, "step": 20261 }, { "epoch": 16.8149377593361, "grad_norm": 26.221275329589844, "learning_rate": 1.3278008298755187e-05, "loss": 0.7844, "step": 20262 }, { "epoch": 16.815767634854772, "grad_norm": 45.396244049072266, "learning_rate": 1.327767634854772e-05, "loss": 0.7576, "step": 20263 }, { "epoch": 16.816597510373445, "grad_norm": 27.92645263671875, "learning_rate": 1.327734439834025e-05, "loss": 0.6587, "step": 20264 }, { "epoch": 16.817427385892117, "grad_norm": 40.092533111572266, "learning_rate": 1.3277012448132782e-05, "loss": 0.6453, "step": 20265 }, { "epoch": 16.81825726141079, "grad_norm": 29.06108283996582, "learning_rate": 1.327668049792531e-05, "loss": 0.6152, "step": 20266 }, { "epoch": 16.81908713692946, "grad_norm": 45.08784103393555, "learning_rate": 1.3276348547717843e-05, "loss": 0.6803, "step": 20267 }, { "epoch": 16.819917012448133, "grad_norm": 70.89769744873047, "learning_rate": 1.3276016597510375e-05, "loss": 1.0193, "step": 20268 }, { "epoch": 16.820746887966806, "grad_norm": 45.06898880004883, "learning_rate": 1.3275684647302907e-05, "loss": 0.7905, "step": 20269 }, { "epoch": 16.821576763485478, "grad_norm": 13.968897819519043, "learning_rate": 1.3275352697095436e-05, "loss": 0.3781, "step": 20270 }, { "epoch": 16.82240663900415, "grad_norm": 20.875730514526367, "learning_rate": 1.3275020746887968e-05, "loss": 0.3673, "step": 20271 }, { "epoch": 16.823236514522822, "grad_norm": 21.429418563842773, "learning_rate": 1.32746887966805e-05, "loss": 0.3489, "step": 20272 }, { "epoch": 16.824066390041494, "grad_norm": 32.403377532958984, "learning_rate": 1.327435684647303e-05, "loss": 0.5484, "step": 20273 }, { "epoch": 16.824896265560167, "grad_norm": 53.782630920410156, "learning_rate": 1.3274024896265561e-05, "loss": 0.6518, "step": 20274 }, { "epoch": 16.82572614107884, "grad_norm": 58.354671478271484, "learning_rate": 1.3273692946058091e-05, "loss": 0.6036, "step": 20275 }, { "epoch": 16.82655601659751, "grad_norm": 36.77773666381836, "learning_rate": 1.3273360995850623e-05, "loss": 0.7252, "step": 20276 }, { "epoch": 16.827385892116183, "grad_norm": 29.10407257080078, "learning_rate": 1.3273029045643156e-05, "loss": 0.4467, "step": 20277 }, { "epoch": 16.828215767634855, "grad_norm": 65.63162994384766, "learning_rate": 1.3272697095435684e-05, "loss": 1.2954, "step": 20278 }, { "epoch": 16.829045643153528, "grad_norm": 79.18243408203125, "learning_rate": 1.3272365145228216e-05, "loss": 0.6224, "step": 20279 }, { "epoch": 16.8298755186722, "grad_norm": 91.08719635009766, "learning_rate": 1.3272033195020748e-05, "loss": 0.8861, "step": 20280 }, { "epoch": 16.830705394190872, "grad_norm": 44.34624099731445, "learning_rate": 1.327170124481328e-05, "loss": 0.5214, "step": 20281 }, { "epoch": 16.831535269709544, "grad_norm": 69.10681915283203, "learning_rate": 1.327136929460581e-05, "loss": 0.8926, "step": 20282 }, { "epoch": 16.832365145228216, "grad_norm": 95.99068450927734, "learning_rate": 1.3271037344398341e-05, "loss": 1.331, "step": 20283 }, { "epoch": 16.83319502074689, "grad_norm": 35.131080627441406, "learning_rate": 1.3270705394190872e-05, "loss": 0.6322, "step": 20284 }, { "epoch": 16.83402489626556, "grad_norm": 45.04826736450195, "learning_rate": 1.3270373443983404e-05, "loss": 0.4244, "step": 20285 }, { "epoch": 16.834854771784233, "grad_norm": 38.5531120300293, "learning_rate": 1.3270041493775934e-05, "loss": 0.5147, "step": 20286 }, { "epoch": 16.835684647302905, "grad_norm": 38.940181732177734, "learning_rate": 1.3269709543568465e-05, "loss": 0.9356, "step": 20287 }, { "epoch": 16.836514522821577, "grad_norm": 28.257667541503906, "learning_rate": 1.3269377593360997e-05, "loss": 0.4066, "step": 20288 }, { "epoch": 16.83734439834025, "grad_norm": 10.613676071166992, "learning_rate": 1.3269045643153529e-05, "loss": 0.2722, "step": 20289 }, { "epoch": 16.83817427385892, "grad_norm": 39.47736358642578, "learning_rate": 1.3268713692946061e-05, "loss": 1.0626, "step": 20290 }, { "epoch": 16.839004149377594, "grad_norm": 21.788299560546875, "learning_rate": 1.326838174273859e-05, "loss": 0.4564, "step": 20291 }, { "epoch": 16.839834024896266, "grad_norm": 33.08158874511719, "learning_rate": 1.3268049792531122e-05, "loss": 0.6876, "step": 20292 }, { "epoch": 16.84066390041494, "grad_norm": 49.72807312011719, "learning_rate": 1.3267717842323652e-05, "loss": 0.5272, "step": 20293 }, { "epoch": 16.84149377593361, "grad_norm": 30.693525314331055, "learning_rate": 1.3267385892116184e-05, "loss": 0.6035, "step": 20294 }, { "epoch": 16.842323651452283, "grad_norm": 71.66634368896484, "learning_rate": 1.3267053941908715e-05, "loss": 0.5645, "step": 20295 }, { "epoch": 16.843153526970955, "grad_norm": 46.93547821044922, "learning_rate": 1.3266721991701245e-05, "loss": 1.2969, "step": 20296 }, { "epoch": 16.843983402489627, "grad_norm": 28.548633575439453, "learning_rate": 1.3266390041493777e-05, "loss": 0.2798, "step": 20297 }, { "epoch": 16.8448132780083, "grad_norm": 56.29289627075195, "learning_rate": 1.326605809128631e-05, "loss": 0.5783, "step": 20298 }, { "epoch": 16.84564315352697, "grad_norm": 58.904964447021484, "learning_rate": 1.3265726141078838e-05, "loss": 0.6757, "step": 20299 }, { "epoch": 16.846473029045644, "grad_norm": 25.384180068969727, "learning_rate": 1.326539419087137e-05, "loss": 1.0404, "step": 20300 }, { "epoch": 16.847302904564316, "grad_norm": 45.96072006225586, "learning_rate": 1.3265062240663902e-05, "loss": 0.8295, "step": 20301 }, { "epoch": 16.848132780082988, "grad_norm": 69.04408264160156, "learning_rate": 1.3264730290456433e-05, "loss": 0.5275, "step": 20302 }, { "epoch": 16.84896265560166, "grad_norm": 32.815521240234375, "learning_rate": 1.3264398340248963e-05, "loss": 0.7728, "step": 20303 }, { "epoch": 16.849792531120332, "grad_norm": 27.078969955444336, "learning_rate": 1.3264066390041495e-05, "loss": 0.4046, "step": 20304 }, { "epoch": 16.850622406639005, "grad_norm": 29.212984085083008, "learning_rate": 1.3263734439834026e-05, "loss": 0.3509, "step": 20305 }, { "epoch": 16.851452282157677, "grad_norm": 34.052406311035156, "learning_rate": 1.3263402489626558e-05, "loss": 0.6934, "step": 20306 }, { "epoch": 16.85228215767635, "grad_norm": 54.28562545776367, "learning_rate": 1.3263070539419087e-05, "loss": 0.9368, "step": 20307 }, { "epoch": 16.85311203319502, "grad_norm": 95.58253479003906, "learning_rate": 1.3262738589211619e-05, "loss": 1.3326, "step": 20308 }, { "epoch": 16.853941908713693, "grad_norm": 39.860137939453125, "learning_rate": 1.326240663900415e-05, "loss": 0.3768, "step": 20309 }, { "epoch": 16.854771784232366, "grad_norm": 28.071393966674805, "learning_rate": 1.3262074688796683e-05, "loss": 0.3907, "step": 20310 }, { "epoch": 16.855601659751038, "grad_norm": 48.319923400878906, "learning_rate": 1.3261742738589212e-05, "loss": 0.5716, "step": 20311 }, { "epoch": 16.85643153526971, "grad_norm": 94.34789276123047, "learning_rate": 1.3261410788381744e-05, "loss": 0.8705, "step": 20312 }, { "epoch": 16.857261410788382, "grad_norm": 103.57734680175781, "learning_rate": 1.3261078838174276e-05, "loss": 0.9044, "step": 20313 }, { "epoch": 16.858091286307054, "grad_norm": 64.02953338623047, "learning_rate": 1.3260746887966806e-05, "loss": 1.0361, "step": 20314 }, { "epoch": 16.858921161825727, "grad_norm": 38.342041015625, "learning_rate": 1.3260414937759337e-05, "loss": 0.4387, "step": 20315 }, { "epoch": 16.8597510373444, "grad_norm": 34.91637420654297, "learning_rate": 1.3260082987551867e-05, "loss": 0.5718, "step": 20316 }, { "epoch": 16.86058091286307, "grad_norm": 71.54853057861328, "learning_rate": 1.32597510373444e-05, "loss": 1.2209, "step": 20317 }, { "epoch": 16.861410788381743, "grad_norm": 71.94227600097656, "learning_rate": 1.3259419087136931e-05, "loss": 0.959, "step": 20318 }, { "epoch": 16.862240663900415, "grad_norm": 47.33855438232422, "learning_rate": 1.3259087136929463e-05, "loss": 0.633, "step": 20319 }, { "epoch": 16.863070539419088, "grad_norm": 61.368873596191406, "learning_rate": 1.3258755186721992e-05, "loss": 0.9388, "step": 20320 }, { "epoch": 16.86390041493776, "grad_norm": 49.554317474365234, "learning_rate": 1.3258423236514524e-05, "loss": 0.8428, "step": 20321 }, { "epoch": 16.864730290456432, "grad_norm": 23.04176902770996, "learning_rate": 1.3258091286307055e-05, "loss": 0.7007, "step": 20322 }, { "epoch": 16.865560165975104, "grad_norm": 68.3257827758789, "learning_rate": 1.3257759336099587e-05, "loss": 0.9035, "step": 20323 }, { "epoch": 16.866390041493776, "grad_norm": 18.61224937438965, "learning_rate": 1.3257427385892117e-05, "loss": 0.5564, "step": 20324 }, { "epoch": 16.86721991701245, "grad_norm": 27.04553985595703, "learning_rate": 1.3257095435684648e-05, "loss": 0.5407, "step": 20325 }, { "epoch": 16.86804979253112, "grad_norm": 45.82027053833008, "learning_rate": 1.325676348547718e-05, "loss": 0.6025, "step": 20326 }, { "epoch": 16.868879668049793, "grad_norm": 28.914525985717773, "learning_rate": 1.3256431535269712e-05, "loss": 0.7361, "step": 20327 }, { "epoch": 16.869709543568465, "grad_norm": 46.0615234375, "learning_rate": 1.325609958506224e-05, "loss": 0.6539, "step": 20328 }, { "epoch": 16.870539419087137, "grad_norm": 27.26583480834961, "learning_rate": 1.3255767634854773e-05, "loss": 0.6546, "step": 20329 }, { "epoch": 16.87136929460581, "grad_norm": 29.590229034423828, "learning_rate": 1.3255435684647305e-05, "loss": 1.0538, "step": 20330 }, { "epoch": 16.872199170124482, "grad_norm": 27.402057647705078, "learning_rate": 1.3255103734439835e-05, "loss": 0.732, "step": 20331 }, { "epoch": 16.873029045643154, "grad_norm": 32.64902877807617, "learning_rate": 1.3254771784232366e-05, "loss": 0.8709, "step": 20332 }, { "epoch": 16.873858921161826, "grad_norm": 28.01475715637207, "learning_rate": 1.3254439834024898e-05, "loss": 0.5832, "step": 20333 }, { "epoch": 16.8746887966805, "grad_norm": 44.38076400756836, "learning_rate": 1.3254107883817428e-05, "loss": 0.4121, "step": 20334 }, { "epoch": 16.87551867219917, "grad_norm": 50.70522689819336, "learning_rate": 1.325377593360996e-05, "loss": 0.6202, "step": 20335 }, { "epoch": 16.876348547717843, "grad_norm": 43.651126861572266, "learning_rate": 1.3253443983402489e-05, "loss": 0.4662, "step": 20336 }, { "epoch": 16.877178423236515, "grad_norm": 49.96854782104492, "learning_rate": 1.3253112033195021e-05, "loss": 0.9403, "step": 20337 }, { "epoch": 16.878008298755187, "grad_norm": 16.242412567138672, "learning_rate": 1.3252780082987553e-05, "loss": 0.4047, "step": 20338 }, { "epoch": 16.87883817427386, "grad_norm": 41.917724609375, "learning_rate": 1.3252448132780085e-05, "loss": 0.7382, "step": 20339 }, { "epoch": 16.87966804979253, "grad_norm": 74.43141174316406, "learning_rate": 1.3252116182572614e-05, "loss": 0.7749, "step": 20340 }, { "epoch": 16.880497925311204, "grad_norm": 22.58761215209961, "learning_rate": 1.3251784232365146e-05, "loss": 0.6517, "step": 20341 }, { "epoch": 16.881327800829876, "grad_norm": 43.664825439453125, "learning_rate": 1.3251452282157678e-05, "loss": 1.2113, "step": 20342 }, { "epoch": 16.882157676348548, "grad_norm": 67.22654724121094, "learning_rate": 1.3251120331950209e-05, "loss": 0.504, "step": 20343 }, { "epoch": 16.88298755186722, "grad_norm": 32.008026123046875, "learning_rate": 1.325078838174274e-05, "loss": 0.8953, "step": 20344 }, { "epoch": 16.883817427385893, "grad_norm": 39.80376052856445, "learning_rate": 1.325045643153527e-05, "loss": 0.4236, "step": 20345 }, { "epoch": 16.884647302904565, "grad_norm": 35.156219482421875, "learning_rate": 1.3250124481327802e-05, "loss": 0.9155, "step": 20346 }, { "epoch": 16.885477178423237, "grad_norm": 20.257017135620117, "learning_rate": 1.3249792531120334e-05, "loss": 0.6189, "step": 20347 }, { "epoch": 16.88630705394191, "grad_norm": 27.544368743896484, "learning_rate": 1.3249460580912866e-05, "loss": 0.4385, "step": 20348 }, { "epoch": 16.88713692946058, "grad_norm": 51.05367660522461, "learning_rate": 1.3249128630705394e-05, "loss": 0.7939, "step": 20349 }, { "epoch": 16.887966804979254, "grad_norm": 42.38581085205078, "learning_rate": 1.3248796680497927e-05, "loss": 0.801, "step": 20350 }, { "epoch": 16.888796680497926, "grad_norm": 38.35100173950195, "learning_rate": 1.3248464730290459e-05, "loss": 0.7407, "step": 20351 }, { "epoch": 16.889626556016598, "grad_norm": 82.83345794677734, "learning_rate": 1.3248132780082989e-05, "loss": 1.5672, "step": 20352 }, { "epoch": 16.89045643153527, "grad_norm": 79.35076141357422, "learning_rate": 1.324780082987552e-05, "loss": 0.6749, "step": 20353 }, { "epoch": 16.891286307053942, "grad_norm": 39.522621154785156, "learning_rate": 1.324746887966805e-05, "loss": 0.8134, "step": 20354 }, { "epoch": 16.892116182572614, "grad_norm": 55.96847915649414, "learning_rate": 1.3247136929460582e-05, "loss": 1.5974, "step": 20355 }, { "epoch": 16.892946058091287, "grad_norm": 61.21510696411133, "learning_rate": 1.3246804979253114e-05, "loss": 0.3932, "step": 20356 }, { "epoch": 16.89377593360996, "grad_norm": 40.51866912841797, "learning_rate": 1.3246473029045643e-05, "loss": 0.7248, "step": 20357 }, { "epoch": 16.89460580912863, "grad_norm": 45.657325744628906, "learning_rate": 1.3246141078838175e-05, "loss": 1.1663, "step": 20358 }, { "epoch": 16.895435684647303, "grad_norm": 77.10726928710938, "learning_rate": 1.3245809128630707e-05, "loss": 1.1976, "step": 20359 }, { "epoch": 16.896265560165975, "grad_norm": 39.10625457763672, "learning_rate": 1.3245477178423239e-05, "loss": 0.7999, "step": 20360 }, { "epoch": 16.897095435684648, "grad_norm": 42.64250183105469, "learning_rate": 1.3245145228215768e-05, "loss": 0.7269, "step": 20361 }, { "epoch": 16.89792531120332, "grad_norm": 43.3442497253418, "learning_rate": 1.32448132780083e-05, "loss": 1.1067, "step": 20362 }, { "epoch": 16.898755186721992, "grad_norm": 80.72984313964844, "learning_rate": 1.324448132780083e-05, "loss": 1.1794, "step": 20363 }, { "epoch": 16.899585062240664, "grad_norm": 19.397891998291016, "learning_rate": 1.3244149377593362e-05, "loss": 0.5285, "step": 20364 }, { "epoch": 16.900414937759336, "grad_norm": 23.790939331054688, "learning_rate": 1.3243817427385893e-05, "loss": 0.7324, "step": 20365 }, { "epoch": 16.90124481327801, "grad_norm": 30.46128273010254, "learning_rate": 1.3243485477178423e-05, "loss": 0.5137, "step": 20366 }, { "epoch": 16.90207468879668, "grad_norm": 58.23445510864258, "learning_rate": 1.3243153526970955e-05, "loss": 0.7381, "step": 20367 }, { "epoch": 16.902904564315353, "grad_norm": 27.216384887695312, "learning_rate": 1.3242821576763488e-05, "loss": 1.2561, "step": 20368 }, { "epoch": 16.903734439834025, "grad_norm": 37.28019714355469, "learning_rate": 1.3242489626556018e-05, "loss": 0.5225, "step": 20369 }, { "epoch": 16.904564315352697, "grad_norm": 49.294532775878906, "learning_rate": 1.3242157676348548e-05, "loss": 0.694, "step": 20370 }, { "epoch": 16.90539419087137, "grad_norm": 23.201967239379883, "learning_rate": 1.324182572614108e-05, "loss": 0.3411, "step": 20371 }, { "epoch": 16.906224066390042, "grad_norm": 60.08164596557617, "learning_rate": 1.3241493775933611e-05, "loss": 1.373, "step": 20372 }, { "epoch": 16.907053941908714, "grad_norm": 78.36054229736328, "learning_rate": 1.3241161825726143e-05, "loss": 0.542, "step": 20373 }, { "epoch": 16.907883817427386, "grad_norm": 23.899478912353516, "learning_rate": 1.3240829875518673e-05, "loss": 0.5471, "step": 20374 }, { "epoch": 16.90871369294606, "grad_norm": 117.83311462402344, "learning_rate": 1.3240497925311204e-05, "loss": 0.4152, "step": 20375 }, { "epoch": 16.90954356846473, "grad_norm": 34.12507247924805, "learning_rate": 1.3240165975103736e-05, "loss": 0.9972, "step": 20376 }, { "epoch": 16.910373443983403, "grad_norm": 28.563922882080078, "learning_rate": 1.3239834024896268e-05, "loss": 0.5905, "step": 20377 }, { "epoch": 16.911203319502075, "grad_norm": 27.536314010620117, "learning_rate": 1.3239502074688797e-05, "loss": 0.4454, "step": 20378 }, { "epoch": 16.912033195020747, "grad_norm": 30.194438934326172, "learning_rate": 1.3239170124481329e-05, "loss": 0.8937, "step": 20379 }, { "epoch": 16.91286307053942, "grad_norm": 76.69271087646484, "learning_rate": 1.3238838174273861e-05, "loss": 0.6719, "step": 20380 }, { "epoch": 16.91369294605809, "grad_norm": 44.849884033203125, "learning_rate": 1.3238506224066391e-05, "loss": 0.4784, "step": 20381 }, { "epoch": 16.914522821576764, "grad_norm": 26.82001495361328, "learning_rate": 1.3238174273858922e-05, "loss": 0.487, "step": 20382 }, { "epoch": 16.915352697095436, "grad_norm": 59.86610794067383, "learning_rate": 1.3237842323651452e-05, "loss": 0.66, "step": 20383 }, { "epoch": 16.916182572614108, "grad_norm": 31.911483764648438, "learning_rate": 1.3237510373443984e-05, "loss": 0.9142, "step": 20384 }, { "epoch": 16.91701244813278, "grad_norm": 26.304306030273438, "learning_rate": 1.3237178423236516e-05, "loss": 0.4821, "step": 20385 }, { "epoch": 16.917842323651453, "grad_norm": 36.85800552368164, "learning_rate": 1.3236846473029045e-05, "loss": 0.7404, "step": 20386 }, { "epoch": 16.918672199170125, "grad_norm": 85.20454406738281, "learning_rate": 1.3236514522821577e-05, "loss": 0.6796, "step": 20387 }, { "epoch": 16.919502074688797, "grad_norm": 55.24330139160156, "learning_rate": 1.323618257261411e-05, "loss": 0.7304, "step": 20388 }, { "epoch": 16.92033195020747, "grad_norm": 72.91122436523438, "learning_rate": 1.3235850622406641e-05, "loss": 0.9872, "step": 20389 }, { "epoch": 16.92116182572614, "grad_norm": 53.232154846191406, "learning_rate": 1.323551867219917e-05, "loss": 0.4549, "step": 20390 }, { "epoch": 16.921991701244814, "grad_norm": 91.99327850341797, "learning_rate": 1.3235186721991702e-05, "loss": 0.8159, "step": 20391 }, { "epoch": 16.922821576763486, "grad_norm": 25.821958541870117, "learning_rate": 1.3234854771784233e-05, "loss": 0.5439, "step": 20392 }, { "epoch": 16.923651452282158, "grad_norm": 89.62265014648438, "learning_rate": 1.3234522821576765e-05, "loss": 0.9197, "step": 20393 }, { "epoch": 16.92448132780083, "grad_norm": 72.0965576171875, "learning_rate": 1.3234190871369295e-05, "loss": 1.0075, "step": 20394 }, { "epoch": 16.925311203319502, "grad_norm": 43.62442398071289, "learning_rate": 1.3233858921161826e-05, "loss": 0.9045, "step": 20395 }, { "epoch": 16.926141078838175, "grad_norm": 47.902706146240234, "learning_rate": 1.3233526970954358e-05, "loss": 0.8514, "step": 20396 }, { "epoch": 16.926970954356847, "grad_norm": 29.235593795776367, "learning_rate": 1.323319502074689e-05, "loss": 0.6996, "step": 20397 }, { "epoch": 16.92780082987552, "grad_norm": 34.654273986816406, "learning_rate": 1.3232863070539422e-05, "loss": 0.4927, "step": 20398 }, { "epoch": 16.92863070539419, "grad_norm": 30.901519775390625, "learning_rate": 1.323253112033195e-05, "loss": 0.6443, "step": 20399 }, { "epoch": 16.929460580912863, "grad_norm": 80.74608612060547, "learning_rate": 1.3232199170124483e-05, "loss": 0.457, "step": 20400 }, { "epoch": 16.930290456431536, "grad_norm": 22.265233993530273, "learning_rate": 1.3231867219917013e-05, "loss": 0.5141, "step": 20401 }, { "epoch": 16.931120331950208, "grad_norm": 91.51262664794922, "learning_rate": 1.3231535269709545e-05, "loss": 0.7398, "step": 20402 }, { "epoch": 16.93195020746888, "grad_norm": 34.784339904785156, "learning_rate": 1.3231203319502076e-05, "loss": 0.3612, "step": 20403 }, { "epoch": 16.932780082987552, "grad_norm": 64.33372497558594, "learning_rate": 1.3230871369294606e-05, "loss": 0.5044, "step": 20404 }, { "epoch": 16.933609958506224, "grad_norm": 45.42831039428711, "learning_rate": 1.3230539419087138e-05, "loss": 0.479, "step": 20405 }, { "epoch": 16.934439834024896, "grad_norm": 29.683185577392578, "learning_rate": 1.323020746887967e-05, "loss": 0.6279, "step": 20406 }, { "epoch": 16.93526970954357, "grad_norm": 13.762029647827148, "learning_rate": 1.3229875518672199e-05, "loss": 0.4174, "step": 20407 }, { "epoch": 16.93609958506224, "grad_norm": 19.413015365600586, "learning_rate": 1.3229543568464731e-05, "loss": 0.352, "step": 20408 }, { "epoch": 16.936929460580913, "grad_norm": 94.42327117919922, "learning_rate": 1.3229211618257263e-05, "loss": 0.5832, "step": 20409 }, { "epoch": 16.937759336099585, "grad_norm": 46.0169792175293, "learning_rate": 1.3228879668049794e-05, "loss": 0.7804, "step": 20410 }, { "epoch": 16.938589211618257, "grad_norm": 29.15230941772461, "learning_rate": 1.3228547717842324e-05, "loss": 0.9236, "step": 20411 }, { "epoch": 16.93941908713693, "grad_norm": 30.783790588378906, "learning_rate": 1.3228215767634856e-05, "loss": 0.8611, "step": 20412 }, { "epoch": 16.940248962655602, "grad_norm": 19.81673240661621, "learning_rate": 1.3227883817427387e-05, "loss": 0.4249, "step": 20413 }, { "epoch": 16.941078838174274, "grad_norm": 33.294742584228516, "learning_rate": 1.3227551867219919e-05, "loss": 0.702, "step": 20414 }, { "epoch": 16.941908713692946, "grad_norm": 24.958993911743164, "learning_rate": 1.3227219917012447e-05, "loss": 0.5584, "step": 20415 }, { "epoch": 16.94273858921162, "grad_norm": 31.484983444213867, "learning_rate": 1.322688796680498e-05, "loss": 0.5404, "step": 20416 }, { "epoch": 16.94356846473029, "grad_norm": 56.79356002807617, "learning_rate": 1.3226556016597512e-05, "loss": 0.8799, "step": 20417 }, { "epoch": 16.944398340248963, "grad_norm": 67.58293151855469, "learning_rate": 1.3226224066390044e-05, "loss": 0.4649, "step": 20418 }, { "epoch": 16.945228215767635, "grad_norm": 81.91764068603516, "learning_rate": 1.3225892116182573e-05, "loss": 1.0292, "step": 20419 }, { "epoch": 16.946058091286307, "grad_norm": 20.458431243896484, "learning_rate": 1.3225560165975105e-05, "loss": 0.2954, "step": 20420 }, { "epoch": 16.94688796680498, "grad_norm": 54.64384460449219, "learning_rate": 1.3225228215767637e-05, "loss": 0.699, "step": 20421 }, { "epoch": 16.94771784232365, "grad_norm": 53.55765914916992, "learning_rate": 1.3224896265560167e-05, "loss": 0.4325, "step": 20422 }, { "epoch": 16.948547717842324, "grad_norm": 24.1865234375, "learning_rate": 1.32245643153527e-05, "loss": 0.4978, "step": 20423 }, { "epoch": 16.949377593360996, "grad_norm": 46.570106506347656, "learning_rate": 1.3224232365145228e-05, "loss": 0.5923, "step": 20424 }, { "epoch": 16.95020746887967, "grad_norm": 20.866350173950195, "learning_rate": 1.322390041493776e-05, "loss": 0.4559, "step": 20425 }, { "epoch": 16.95103734439834, "grad_norm": 25.328815460205078, "learning_rate": 1.3223568464730292e-05, "loss": 0.3297, "step": 20426 }, { "epoch": 16.951867219917013, "grad_norm": 30.853172302246094, "learning_rate": 1.3223236514522824e-05, "loss": 0.4573, "step": 20427 }, { "epoch": 16.952697095435685, "grad_norm": 51.129093170166016, "learning_rate": 1.3222904564315353e-05, "loss": 0.822, "step": 20428 }, { "epoch": 16.953526970954357, "grad_norm": 118.93649291992188, "learning_rate": 1.3222572614107885e-05, "loss": 1.1176, "step": 20429 }, { "epoch": 16.95435684647303, "grad_norm": 30.020339965820312, "learning_rate": 1.3222240663900417e-05, "loss": 0.9107, "step": 20430 }, { "epoch": 16.9551867219917, "grad_norm": 59.60105514526367, "learning_rate": 1.3221908713692948e-05, "loss": 1.3153, "step": 20431 }, { "epoch": 16.956016597510374, "grad_norm": 24.91193199157715, "learning_rate": 1.3221576763485478e-05, "loss": 0.2888, "step": 20432 }, { "epoch": 16.956846473029046, "grad_norm": 24.616453170776367, "learning_rate": 1.3221244813278008e-05, "loss": 0.4438, "step": 20433 }, { "epoch": 16.957676348547718, "grad_norm": 42.01851272583008, "learning_rate": 1.322091286307054e-05, "loss": 0.4312, "step": 20434 }, { "epoch": 16.95850622406639, "grad_norm": 53.933258056640625, "learning_rate": 1.3220580912863073e-05, "loss": 0.7719, "step": 20435 }, { "epoch": 16.959336099585062, "grad_norm": 41.443199157714844, "learning_rate": 1.3220248962655601e-05, "loss": 0.8421, "step": 20436 }, { "epoch": 16.960165975103735, "grad_norm": 68.36832427978516, "learning_rate": 1.3219917012448134e-05, "loss": 0.9607, "step": 20437 }, { "epoch": 16.960995850622407, "grad_norm": 38.20642852783203, "learning_rate": 1.3219585062240666e-05, "loss": 0.7485, "step": 20438 }, { "epoch": 16.96182572614108, "grad_norm": 83.29920959472656, "learning_rate": 1.3219253112033196e-05, "loss": 0.7172, "step": 20439 }, { "epoch": 16.96265560165975, "grad_norm": 41.96546173095703, "learning_rate": 1.3218921161825726e-05, "loss": 1.0573, "step": 20440 }, { "epoch": 16.963485477178423, "grad_norm": 41.286067962646484, "learning_rate": 1.3218589211618259e-05, "loss": 0.8343, "step": 20441 }, { "epoch": 16.964315352697096, "grad_norm": 18.167448043823242, "learning_rate": 1.3218257261410789e-05, "loss": 0.2661, "step": 20442 }, { "epoch": 16.965145228215768, "grad_norm": 27.387741088867188, "learning_rate": 1.3217925311203321e-05, "loss": 0.6952, "step": 20443 }, { "epoch": 16.96597510373444, "grad_norm": 15.801067352294922, "learning_rate": 1.3217593360995851e-05, "loss": 0.3, "step": 20444 }, { "epoch": 16.966804979253112, "grad_norm": 27.968915939331055, "learning_rate": 1.3217261410788382e-05, "loss": 0.5161, "step": 20445 }, { "epoch": 16.967634854771784, "grad_norm": 33.67974853515625, "learning_rate": 1.3216929460580914e-05, "loss": 0.5432, "step": 20446 }, { "epoch": 16.968464730290457, "grad_norm": 51.714962005615234, "learning_rate": 1.3216597510373446e-05, "loss": 0.9487, "step": 20447 }, { "epoch": 16.96929460580913, "grad_norm": 12.19399642944336, "learning_rate": 1.3216265560165977e-05, "loss": 0.2506, "step": 20448 }, { "epoch": 16.9701244813278, "grad_norm": 18.467613220214844, "learning_rate": 1.3215933609958507e-05, "loss": 0.5317, "step": 20449 }, { "epoch": 16.970954356846473, "grad_norm": 46.155582427978516, "learning_rate": 1.3215601659751039e-05, "loss": 0.6458, "step": 20450 }, { "epoch": 16.971784232365145, "grad_norm": 67.75735473632812, "learning_rate": 1.321526970954357e-05, "loss": 0.8612, "step": 20451 }, { "epoch": 16.972614107883818, "grad_norm": 29.583152770996094, "learning_rate": 1.3214937759336102e-05, "loss": 0.6311, "step": 20452 }, { "epoch": 16.97344398340249, "grad_norm": 30.487911224365234, "learning_rate": 1.321460580912863e-05, "loss": 0.558, "step": 20453 }, { "epoch": 16.974273858921162, "grad_norm": 48.976131439208984, "learning_rate": 1.3214273858921162e-05, "loss": 0.7985, "step": 20454 }, { "epoch": 16.975103734439834, "grad_norm": 45.606109619140625, "learning_rate": 1.3213941908713695e-05, "loss": 1.2416, "step": 20455 }, { "epoch": 16.975933609958506, "grad_norm": 118.49442291259766, "learning_rate": 1.3213609958506227e-05, "loss": 0.5603, "step": 20456 }, { "epoch": 16.97676348547718, "grad_norm": 61.92831802368164, "learning_rate": 1.3213278008298755e-05, "loss": 0.5995, "step": 20457 }, { "epoch": 16.97759336099585, "grad_norm": 13.078834533691406, "learning_rate": 1.3212946058091287e-05, "loss": 0.3365, "step": 20458 }, { "epoch": 16.978423236514523, "grad_norm": 38.089683532714844, "learning_rate": 1.321261410788382e-05, "loss": 0.5884, "step": 20459 }, { "epoch": 16.979253112033195, "grad_norm": 62.508216857910156, "learning_rate": 1.321228215767635e-05, "loss": 0.5911, "step": 20460 }, { "epoch": 16.980082987551867, "grad_norm": 36.351356506347656, "learning_rate": 1.321195020746888e-05, "loss": 0.671, "step": 20461 }, { "epoch": 16.98091286307054, "grad_norm": 68.90534210205078, "learning_rate": 1.321161825726141e-05, "loss": 0.9154, "step": 20462 }, { "epoch": 16.98174273858921, "grad_norm": 47.343379974365234, "learning_rate": 1.3211286307053943e-05, "loss": 0.6564, "step": 20463 }, { "epoch": 16.982572614107884, "grad_norm": 22.379030227661133, "learning_rate": 1.3210954356846475e-05, "loss": 0.4252, "step": 20464 }, { "epoch": 16.983402489626556, "grad_norm": 23.89146614074707, "learning_rate": 1.3210622406639004e-05, "loss": 0.3706, "step": 20465 }, { "epoch": 16.98423236514523, "grad_norm": 51.093597412109375, "learning_rate": 1.3210290456431536e-05, "loss": 0.6911, "step": 20466 }, { "epoch": 16.9850622406639, "grad_norm": 38.72893524169922, "learning_rate": 1.3209958506224068e-05, "loss": 0.4973, "step": 20467 }, { "epoch": 16.985892116182573, "grad_norm": 32.61996078491211, "learning_rate": 1.32096265560166e-05, "loss": 0.5299, "step": 20468 }, { "epoch": 16.986721991701245, "grad_norm": 52.58641815185547, "learning_rate": 1.3209294605809129e-05, "loss": 0.5725, "step": 20469 }, { "epoch": 16.987551867219917, "grad_norm": 43.24815368652344, "learning_rate": 1.3208962655601661e-05, "loss": 0.7, "step": 20470 }, { "epoch": 16.98838174273859, "grad_norm": 40.38703155517578, "learning_rate": 1.3208630705394191e-05, "loss": 0.7421, "step": 20471 }, { "epoch": 16.98921161825726, "grad_norm": 13.581704139709473, "learning_rate": 1.3208298755186723e-05, "loss": 0.3802, "step": 20472 }, { "epoch": 16.990041493775934, "grad_norm": 30.765785217285156, "learning_rate": 1.3207966804979254e-05, "loss": 0.5317, "step": 20473 }, { "epoch": 16.990871369294606, "grad_norm": 44.118202209472656, "learning_rate": 1.3207634854771784e-05, "loss": 0.643, "step": 20474 }, { "epoch": 16.991701244813278, "grad_norm": 27.98017692565918, "learning_rate": 1.3207302904564316e-05, "loss": 0.3842, "step": 20475 }, { "epoch": 16.99253112033195, "grad_norm": 77.01346588134766, "learning_rate": 1.3206970954356848e-05, "loss": 0.4949, "step": 20476 }, { "epoch": 16.993360995850622, "grad_norm": 18.19371223449707, "learning_rate": 1.320663900414938e-05, "loss": 0.4043, "step": 20477 }, { "epoch": 16.994190871369295, "grad_norm": 35.941078186035156, "learning_rate": 1.320630705394191e-05, "loss": 0.5225, "step": 20478 }, { "epoch": 16.995020746887967, "grad_norm": 34.53343963623047, "learning_rate": 1.3205975103734441e-05, "loss": 0.9155, "step": 20479 }, { "epoch": 16.99585062240664, "grad_norm": 59.614810943603516, "learning_rate": 1.3205643153526972e-05, "loss": 0.9153, "step": 20480 }, { "epoch": 16.99668049792531, "grad_norm": 73.27315521240234, "learning_rate": 1.3205311203319504e-05, "loss": 1.4627, "step": 20481 }, { "epoch": 16.997510373443983, "grad_norm": 84.74464416503906, "learning_rate": 1.3204979253112034e-05, "loss": 0.69, "step": 20482 }, { "epoch": 16.998340248962656, "grad_norm": 52.418373107910156, "learning_rate": 1.3204647302904565e-05, "loss": 0.5487, "step": 20483 }, { "epoch": 16.999170124481328, "grad_norm": 29.13086700439453, "learning_rate": 1.3204315352697097e-05, "loss": 0.5843, "step": 20484 }, { "epoch": 17.0, "grad_norm": 28.12331771850586, "learning_rate": 1.3203983402489629e-05, "loss": 0.4182, "step": 20485 }, { "epoch": 17.000829875518672, "grad_norm": 25.14204978942871, "learning_rate": 1.3203651452282158e-05, "loss": 0.4686, "step": 20486 }, { "epoch": 17.001659751037344, "grad_norm": 24.57921028137207, "learning_rate": 1.320331950207469e-05, "loss": 1.2145, "step": 20487 }, { "epoch": 17.002489626556017, "grad_norm": 54.20383834838867, "learning_rate": 1.3202987551867222e-05, "loss": 0.8299, "step": 20488 }, { "epoch": 17.00331950207469, "grad_norm": 70.96055603027344, "learning_rate": 1.3202655601659752e-05, "loss": 0.779, "step": 20489 }, { "epoch": 17.00414937759336, "grad_norm": 77.22660827636719, "learning_rate": 1.3202323651452283e-05, "loss": 0.8609, "step": 20490 }, { "epoch": 17.004979253112033, "grad_norm": 30.409957885742188, "learning_rate": 1.3201991701244815e-05, "loss": 0.4993, "step": 20491 }, { "epoch": 17.005809128630705, "grad_norm": 44.4742546081543, "learning_rate": 1.3201659751037345e-05, "loss": 0.8195, "step": 20492 }, { "epoch": 17.006639004149378, "grad_norm": 44.48530960083008, "learning_rate": 1.3201327800829877e-05, "loss": 0.6574, "step": 20493 }, { "epoch": 17.00746887966805, "grad_norm": 19.460412979125977, "learning_rate": 1.3200995850622406e-05, "loss": 0.4073, "step": 20494 }, { "epoch": 17.008298755186722, "grad_norm": 45.647682189941406, "learning_rate": 1.3200663900414938e-05, "loss": 0.7588, "step": 20495 }, { "epoch": 17.009128630705394, "grad_norm": 94.67152404785156, "learning_rate": 1.320033195020747e-05, "loss": 0.8995, "step": 20496 }, { "epoch": 17.009958506224066, "grad_norm": 31.644386291503906, "learning_rate": 1.3200000000000002e-05, "loss": 0.7778, "step": 20497 }, { "epoch": 17.01078838174274, "grad_norm": 64.48638153076172, "learning_rate": 1.3199668049792531e-05, "loss": 0.7536, "step": 20498 }, { "epoch": 17.01161825726141, "grad_norm": 36.82996368408203, "learning_rate": 1.3199336099585063e-05, "loss": 0.5031, "step": 20499 }, { "epoch": 17.012448132780083, "grad_norm": 28.689790725708008, "learning_rate": 1.3199004149377594e-05, "loss": 0.4274, "step": 20500 }, { "epoch": 17.013278008298755, "grad_norm": 40.06448745727539, "learning_rate": 1.3198672199170126e-05, "loss": 0.4478, "step": 20501 }, { "epoch": 17.014107883817427, "grad_norm": 53.223785400390625, "learning_rate": 1.3198340248962658e-05, "loss": 0.383, "step": 20502 }, { "epoch": 17.0149377593361, "grad_norm": 28.189008712768555, "learning_rate": 1.3198008298755187e-05, "loss": 0.6372, "step": 20503 }, { "epoch": 17.01576763485477, "grad_norm": 30.87715721130371, "learning_rate": 1.3197676348547719e-05, "loss": 0.7702, "step": 20504 }, { "epoch": 17.016597510373444, "grad_norm": 22.88237190246582, "learning_rate": 1.319734439834025e-05, "loss": 0.4101, "step": 20505 }, { "epoch": 17.017427385892116, "grad_norm": 65.93253326416016, "learning_rate": 1.3197012448132783e-05, "loss": 0.8865, "step": 20506 }, { "epoch": 17.01825726141079, "grad_norm": 49.65734100341797, "learning_rate": 1.3196680497925312e-05, "loss": 1.1407, "step": 20507 }, { "epoch": 17.01908713692946, "grad_norm": 24.861309051513672, "learning_rate": 1.3196348547717844e-05, "loss": 0.6174, "step": 20508 }, { "epoch": 17.019917012448133, "grad_norm": 41.400211334228516, "learning_rate": 1.3196016597510374e-05, "loss": 0.3801, "step": 20509 }, { "epoch": 17.020746887966805, "grad_norm": 31.418664932250977, "learning_rate": 1.3195684647302906e-05, "loss": 0.6652, "step": 20510 }, { "epoch": 17.021576763485477, "grad_norm": 22.800983428955078, "learning_rate": 1.3195352697095437e-05, "loss": 0.519, "step": 20511 }, { "epoch": 17.02240663900415, "grad_norm": 117.21415710449219, "learning_rate": 1.3195020746887967e-05, "loss": 1.0538, "step": 20512 }, { "epoch": 17.02323651452282, "grad_norm": 22.699798583984375, "learning_rate": 1.3194688796680499e-05, "loss": 0.3421, "step": 20513 }, { "epoch": 17.024066390041494, "grad_norm": 37.49604034423828, "learning_rate": 1.3194356846473031e-05, "loss": 0.4938, "step": 20514 }, { "epoch": 17.024896265560166, "grad_norm": 37.02336502075195, "learning_rate": 1.319402489626556e-05, "loss": 0.6713, "step": 20515 }, { "epoch": 17.025726141078838, "grad_norm": 31.845611572265625, "learning_rate": 1.3193692946058092e-05, "loss": 0.6204, "step": 20516 }, { "epoch": 17.02655601659751, "grad_norm": 27.21266746520996, "learning_rate": 1.3193360995850624e-05, "loss": 0.4035, "step": 20517 }, { "epoch": 17.027385892116182, "grad_norm": 17.781837463378906, "learning_rate": 1.3193029045643155e-05, "loss": 0.374, "step": 20518 }, { "epoch": 17.028215767634855, "grad_norm": 31.767908096313477, "learning_rate": 1.3192697095435685e-05, "loss": 0.4149, "step": 20519 }, { "epoch": 17.029045643153527, "grad_norm": 25.127283096313477, "learning_rate": 1.3192365145228217e-05, "loss": 0.3681, "step": 20520 }, { "epoch": 17.0298755186722, "grad_norm": 20.77366828918457, "learning_rate": 1.3192033195020748e-05, "loss": 0.3776, "step": 20521 }, { "epoch": 17.03070539419087, "grad_norm": 19.932409286499023, "learning_rate": 1.319170124481328e-05, "loss": 0.5172, "step": 20522 }, { "epoch": 17.031535269709543, "grad_norm": 75.82686614990234, "learning_rate": 1.3191369294605808e-05, "loss": 0.9428, "step": 20523 }, { "epoch": 17.032365145228216, "grad_norm": 41.74815368652344, "learning_rate": 1.319103734439834e-05, "loss": 1.0131, "step": 20524 }, { "epoch": 17.033195020746888, "grad_norm": 74.33763885498047, "learning_rate": 1.3190705394190873e-05, "loss": 1.0537, "step": 20525 }, { "epoch": 17.03402489626556, "grad_norm": 37.866966247558594, "learning_rate": 1.3190373443983405e-05, "loss": 0.7362, "step": 20526 }, { "epoch": 17.034854771784232, "grad_norm": 19.72052001953125, "learning_rate": 1.3190041493775935e-05, "loss": 0.6103, "step": 20527 }, { "epoch": 17.035684647302904, "grad_norm": 50.95781707763672, "learning_rate": 1.3189709543568466e-05, "loss": 0.6793, "step": 20528 }, { "epoch": 17.036514522821577, "grad_norm": 59.1302490234375, "learning_rate": 1.3189377593360998e-05, "loss": 1.3761, "step": 20529 }, { "epoch": 17.03734439834025, "grad_norm": 95.3822250366211, "learning_rate": 1.3189045643153528e-05, "loss": 0.4907, "step": 20530 }, { "epoch": 17.03817427385892, "grad_norm": 41.79157257080078, "learning_rate": 1.318871369294606e-05, "loss": 0.349, "step": 20531 }, { "epoch": 17.039004149377593, "grad_norm": 49.673946380615234, "learning_rate": 1.3188381742738589e-05, "loss": 0.761, "step": 20532 }, { "epoch": 17.039834024896265, "grad_norm": 15.069578170776367, "learning_rate": 1.3188049792531121e-05, "loss": 0.3308, "step": 20533 }, { "epoch": 17.040663900414938, "grad_norm": 19.229585647583008, "learning_rate": 1.3187717842323653e-05, "loss": 0.3878, "step": 20534 }, { "epoch": 17.04149377593361, "grad_norm": 45.85938262939453, "learning_rate": 1.3187385892116185e-05, "loss": 0.8148, "step": 20535 }, { "epoch": 17.042323651452282, "grad_norm": 20.645450592041016, "learning_rate": 1.3187053941908714e-05, "loss": 0.4718, "step": 20536 }, { "epoch": 17.043153526970954, "grad_norm": 56.53644943237305, "learning_rate": 1.3186721991701246e-05, "loss": 0.8415, "step": 20537 }, { "epoch": 17.043983402489626, "grad_norm": 19.80898666381836, "learning_rate": 1.3186390041493778e-05, "loss": 0.4238, "step": 20538 }, { "epoch": 17.0448132780083, "grad_norm": 23.433000564575195, "learning_rate": 1.3186058091286309e-05, "loss": 0.515, "step": 20539 }, { "epoch": 17.04564315352697, "grad_norm": 26.701648712158203, "learning_rate": 1.3185726141078839e-05, "loss": 0.3658, "step": 20540 }, { "epoch": 17.046473029045643, "grad_norm": 33.160858154296875, "learning_rate": 1.318539419087137e-05, "loss": 0.7602, "step": 20541 }, { "epoch": 17.047302904564315, "grad_norm": 46.990028381347656, "learning_rate": 1.3185062240663901e-05, "loss": 0.9953, "step": 20542 }, { "epoch": 17.048132780082987, "grad_norm": 52.733341217041016, "learning_rate": 1.3184730290456434e-05, "loss": 0.7596, "step": 20543 }, { "epoch": 17.04896265560166, "grad_norm": 51.34209442138672, "learning_rate": 1.3184398340248962e-05, "loss": 1.2551, "step": 20544 }, { "epoch": 17.04979253112033, "grad_norm": 36.44776916503906, "learning_rate": 1.3184066390041494e-05, "loss": 0.5162, "step": 20545 }, { "epoch": 17.050622406639004, "grad_norm": 44.66563034057617, "learning_rate": 1.3183734439834027e-05, "loss": 0.8251, "step": 20546 }, { "epoch": 17.051452282157676, "grad_norm": 37.353519439697266, "learning_rate": 1.3183402489626559e-05, "loss": 0.5788, "step": 20547 }, { "epoch": 17.05228215767635, "grad_norm": 51.290584564208984, "learning_rate": 1.3183070539419087e-05, "loss": 0.6718, "step": 20548 }, { "epoch": 17.05311203319502, "grad_norm": 111.52257537841797, "learning_rate": 1.318273858921162e-05, "loss": 0.5949, "step": 20549 }, { "epoch": 17.053941908713693, "grad_norm": 58.436317443847656, "learning_rate": 1.318240663900415e-05, "loss": 0.5964, "step": 20550 }, { "epoch": 17.054771784232365, "grad_norm": 28.75116539001465, "learning_rate": 1.3182074688796682e-05, "loss": 0.9866, "step": 20551 }, { "epoch": 17.055601659751037, "grad_norm": 43.61750030517578, "learning_rate": 1.3181742738589212e-05, "loss": 0.5515, "step": 20552 }, { "epoch": 17.05643153526971, "grad_norm": 25.83440589904785, "learning_rate": 1.3181410788381743e-05, "loss": 0.3957, "step": 20553 }, { "epoch": 17.05726141078838, "grad_norm": 61.362998962402344, "learning_rate": 1.3181078838174275e-05, "loss": 0.8841, "step": 20554 }, { "epoch": 17.058091286307054, "grad_norm": 20.38593864440918, "learning_rate": 1.3180746887966807e-05, "loss": 0.3812, "step": 20555 }, { "epoch": 17.058921161825726, "grad_norm": 25.83259391784668, "learning_rate": 1.3180414937759337e-05, "loss": 0.4741, "step": 20556 }, { "epoch": 17.059751037344398, "grad_norm": 29.14516258239746, "learning_rate": 1.3180082987551868e-05, "loss": 0.6982, "step": 20557 }, { "epoch": 17.06058091286307, "grad_norm": 14.8949556350708, "learning_rate": 1.31797510373444e-05, "loss": 0.3053, "step": 20558 }, { "epoch": 17.061410788381743, "grad_norm": 61.5190315246582, "learning_rate": 1.317941908713693e-05, "loss": 0.499, "step": 20559 }, { "epoch": 17.062240663900415, "grad_norm": 24.13422393798828, "learning_rate": 1.3179087136929462e-05, "loss": 0.4118, "step": 20560 }, { "epoch": 17.063070539419087, "grad_norm": 30.99203109741211, "learning_rate": 1.3178755186721993e-05, "loss": 0.5226, "step": 20561 }, { "epoch": 17.06390041493776, "grad_norm": 103.22598266601562, "learning_rate": 1.3178423236514523e-05, "loss": 0.3883, "step": 20562 }, { "epoch": 17.06473029045643, "grad_norm": 72.01307678222656, "learning_rate": 1.3178091286307055e-05, "loss": 1.0214, "step": 20563 }, { "epoch": 17.065560165975104, "grad_norm": 43.70970916748047, "learning_rate": 1.3177759336099588e-05, "loss": 0.8073, "step": 20564 }, { "epoch": 17.066390041493776, "grad_norm": 71.63568878173828, "learning_rate": 1.3177427385892116e-05, "loss": 0.6526, "step": 20565 }, { "epoch": 17.067219917012448, "grad_norm": 40.96821212768555, "learning_rate": 1.3177095435684648e-05, "loss": 0.9299, "step": 20566 }, { "epoch": 17.06804979253112, "grad_norm": 38.47596740722656, "learning_rate": 1.317676348547718e-05, "loss": 0.8287, "step": 20567 }, { "epoch": 17.068879668049792, "grad_norm": 68.86825561523438, "learning_rate": 1.3176431535269711e-05, "loss": 0.9564, "step": 20568 }, { "epoch": 17.069709543568464, "grad_norm": 30.780139923095703, "learning_rate": 1.3176099585062241e-05, "loss": 0.4454, "step": 20569 }, { "epoch": 17.070539419087137, "grad_norm": 35.259273529052734, "learning_rate": 1.3175767634854772e-05, "loss": 1.1722, "step": 20570 }, { "epoch": 17.07136929460581, "grad_norm": 40.092193603515625, "learning_rate": 1.3175435684647304e-05, "loss": 0.9237, "step": 20571 }, { "epoch": 17.07219917012448, "grad_norm": 34.372352600097656, "learning_rate": 1.3175103734439836e-05, "loss": 0.6624, "step": 20572 }, { "epoch": 17.073029045643153, "grad_norm": 31.925676345825195, "learning_rate": 1.3174771784232365e-05, "loss": 0.8081, "step": 20573 }, { "epoch": 17.073858921161825, "grad_norm": 36.526302337646484, "learning_rate": 1.3174439834024897e-05, "loss": 0.4887, "step": 20574 }, { "epoch": 17.074688796680498, "grad_norm": 43.23468017578125, "learning_rate": 1.3174107883817429e-05, "loss": 0.8912, "step": 20575 }, { "epoch": 17.07551867219917, "grad_norm": 62.81997299194336, "learning_rate": 1.3173775933609961e-05, "loss": 1.5158, "step": 20576 }, { "epoch": 17.076348547717842, "grad_norm": 35.81999206542969, "learning_rate": 1.317344398340249e-05, "loss": 0.4059, "step": 20577 }, { "epoch": 17.077178423236514, "grad_norm": 10.853670120239258, "learning_rate": 1.3173112033195022e-05, "loss": 0.3421, "step": 20578 }, { "epoch": 17.078008298755186, "grad_norm": 60.65732192993164, "learning_rate": 1.3172780082987552e-05, "loss": 0.8306, "step": 20579 }, { "epoch": 17.07883817427386, "grad_norm": 35.40028381347656, "learning_rate": 1.3172448132780084e-05, "loss": 0.3598, "step": 20580 }, { "epoch": 17.07966804979253, "grad_norm": 31.708993911743164, "learning_rate": 1.3172116182572616e-05, "loss": 0.5943, "step": 20581 }, { "epoch": 17.080497925311203, "grad_norm": 46.113521575927734, "learning_rate": 1.3171784232365145e-05, "loss": 0.8401, "step": 20582 }, { "epoch": 17.081327800829875, "grad_norm": 23.743270874023438, "learning_rate": 1.3171452282157677e-05, "loss": 0.3272, "step": 20583 }, { "epoch": 17.082157676348547, "grad_norm": 53.498634338378906, "learning_rate": 1.317112033195021e-05, "loss": 0.4493, "step": 20584 }, { "epoch": 17.08298755186722, "grad_norm": 25.41541290283203, "learning_rate": 1.3170788381742741e-05, "loss": 0.2805, "step": 20585 }, { "epoch": 17.083817427385892, "grad_norm": 52.757232666015625, "learning_rate": 1.317045643153527e-05, "loss": 0.7231, "step": 20586 }, { "epoch": 17.084647302904564, "grad_norm": 59.78019332885742, "learning_rate": 1.3170124481327802e-05, "loss": 1.5215, "step": 20587 }, { "epoch": 17.085477178423236, "grad_norm": 56.64208984375, "learning_rate": 1.3169792531120333e-05, "loss": 0.7331, "step": 20588 }, { "epoch": 17.08630705394191, "grad_norm": 167.0548095703125, "learning_rate": 1.3169460580912865e-05, "loss": 1.0778, "step": 20589 }, { "epoch": 17.08713692946058, "grad_norm": 49.60323715209961, "learning_rate": 1.3169128630705395e-05, "loss": 0.5391, "step": 20590 }, { "epoch": 17.087966804979253, "grad_norm": 98.2999267578125, "learning_rate": 1.3168796680497926e-05, "loss": 0.809, "step": 20591 }, { "epoch": 17.088796680497925, "grad_norm": 25.797229766845703, "learning_rate": 1.3168464730290458e-05, "loss": 0.4299, "step": 20592 }, { "epoch": 17.089626556016597, "grad_norm": 59.94583511352539, "learning_rate": 1.316813278008299e-05, "loss": 0.4052, "step": 20593 }, { "epoch": 17.09045643153527, "grad_norm": 54.57034683227539, "learning_rate": 1.3167800829875519e-05, "loss": 0.4219, "step": 20594 }, { "epoch": 17.09128630705394, "grad_norm": 48.09050750732422, "learning_rate": 1.316746887966805e-05, "loss": 0.6217, "step": 20595 }, { "epoch": 17.092116182572614, "grad_norm": 91.42047882080078, "learning_rate": 1.3167136929460583e-05, "loss": 0.5512, "step": 20596 }, { "epoch": 17.092946058091286, "grad_norm": 34.527435302734375, "learning_rate": 1.3166804979253113e-05, "loss": 0.6489, "step": 20597 }, { "epoch": 17.093775933609958, "grad_norm": 62.86341857910156, "learning_rate": 1.3166473029045644e-05, "loss": 1.0908, "step": 20598 }, { "epoch": 17.09460580912863, "grad_norm": 41.323814392089844, "learning_rate": 1.3166141078838176e-05, "loss": 0.5675, "step": 20599 }, { "epoch": 17.095435684647303, "grad_norm": 30.030385971069336, "learning_rate": 1.3165809128630706e-05, "loss": 0.4585, "step": 20600 }, { "epoch": 17.096265560165975, "grad_norm": 53.752254486083984, "learning_rate": 1.3165477178423238e-05, "loss": 0.5633, "step": 20601 }, { "epoch": 17.097095435684647, "grad_norm": 39.68375015258789, "learning_rate": 1.3165145228215767e-05, "loss": 0.793, "step": 20602 }, { "epoch": 17.09792531120332, "grad_norm": 69.48107147216797, "learning_rate": 1.3164813278008299e-05, "loss": 0.9388, "step": 20603 }, { "epoch": 17.09875518672199, "grad_norm": 50.19795608520508, "learning_rate": 1.3164481327800831e-05, "loss": 0.3453, "step": 20604 }, { "epoch": 17.099585062240664, "grad_norm": 12.221921920776367, "learning_rate": 1.3164149377593363e-05, "loss": 0.2752, "step": 20605 }, { "epoch": 17.100414937759336, "grad_norm": 39.85902786254883, "learning_rate": 1.3163817427385894e-05, "loss": 0.8756, "step": 20606 }, { "epoch": 17.101244813278008, "grad_norm": 46.05659866333008, "learning_rate": 1.3163485477178424e-05, "loss": 0.5414, "step": 20607 }, { "epoch": 17.10207468879668, "grad_norm": 27.040191650390625, "learning_rate": 1.3163153526970956e-05, "loss": 0.448, "step": 20608 }, { "epoch": 17.102904564315352, "grad_norm": 49.63711929321289, "learning_rate": 1.3162821576763487e-05, "loss": 1.0476, "step": 20609 }, { "epoch": 17.103734439834025, "grad_norm": 31.949356079101562, "learning_rate": 1.3162489626556019e-05, "loss": 0.7466, "step": 20610 }, { "epoch": 17.104564315352697, "grad_norm": 36.394813537597656, "learning_rate": 1.3162157676348547e-05, "loss": 0.8164, "step": 20611 }, { "epoch": 17.10539419087137, "grad_norm": 38.145050048828125, "learning_rate": 1.316182572614108e-05, "loss": 0.5559, "step": 20612 }, { "epoch": 17.10622406639004, "grad_norm": 46.933956146240234, "learning_rate": 1.3161493775933612e-05, "loss": 0.3268, "step": 20613 }, { "epoch": 17.107053941908713, "grad_norm": 32.311683654785156, "learning_rate": 1.3161161825726144e-05, "loss": 0.5879, "step": 20614 }, { "epoch": 17.107883817427386, "grad_norm": 39.98297882080078, "learning_rate": 1.3160829875518672e-05, "loss": 0.4508, "step": 20615 }, { "epoch": 17.108713692946058, "grad_norm": 18.29276466369629, "learning_rate": 1.3160497925311205e-05, "loss": 0.2099, "step": 20616 }, { "epoch": 17.10954356846473, "grad_norm": 36.38954162597656, "learning_rate": 1.3160165975103735e-05, "loss": 0.7229, "step": 20617 }, { "epoch": 17.110373443983402, "grad_norm": 53.17993927001953, "learning_rate": 1.3159834024896267e-05, "loss": 0.38, "step": 20618 }, { "epoch": 17.111203319502074, "grad_norm": 24.76873016357422, "learning_rate": 1.3159502074688798e-05, "loss": 0.4057, "step": 20619 }, { "epoch": 17.112033195020746, "grad_norm": 12.406889915466309, "learning_rate": 1.3159170124481328e-05, "loss": 0.2237, "step": 20620 }, { "epoch": 17.11286307053942, "grad_norm": 12.678866386413574, "learning_rate": 1.315883817427386e-05, "loss": 0.3131, "step": 20621 }, { "epoch": 17.11369294605809, "grad_norm": 138.52850341796875, "learning_rate": 1.3158506224066392e-05, "loss": 1.1122, "step": 20622 }, { "epoch": 17.114522821576763, "grad_norm": 68.67080688476562, "learning_rate": 1.3158174273858921e-05, "loss": 0.6915, "step": 20623 }, { "epoch": 17.115352697095435, "grad_norm": 57.7653694152832, "learning_rate": 1.3157842323651453e-05, "loss": 0.4826, "step": 20624 }, { "epoch": 17.116182572614107, "grad_norm": 35.98318099975586, "learning_rate": 1.3157510373443985e-05, "loss": 1.0456, "step": 20625 }, { "epoch": 17.11701244813278, "grad_norm": 20.165239334106445, "learning_rate": 1.3157178423236516e-05, "loss": 0.3227, "step": 20626 }, { "epoch": 17.117842323651452, "grad_norm": 36.086368560791016, "learning_rate": 1.3156846473029046e-05, "loss": 0.4698, "step": 20627 }, { "epoch": 17.118672199170124, "grad_norm": 17.040555953979492, "learning_rate": 1.3156514522821578e-05, "loss": 0.3435, "step": 20628 }, { "epoch": 17.119502074688796, "grad_norm": 49.949195861816406, "learning_rate": 1.3156182572614108e-05, "loss": 0.8576, "step": 20629 }, { "epoch": 17.12033195020747, "grad_norm": 90.8026351928711, "learning_rate": 1.315585062240664e-05, "loss": 1.2567, "step": 20630 }, { "epoch": 17.12116182572614, "grad_norm": 38.77274703979492, "learning_rate": 1.3155518672199171e-05, "loss": 0.6388, "step": 20631 }, { "epoch": 17.121991701244813, "grad_norm": 37.037025451660156, "learning_rate": 1.3155186721991701e-05, "loss": 1.0803, "step": 20632 }, { "epoch": 17.122821576763485, "grad_norm": 53.35799026489258, "learning_rate": 1.3154854771784233e-05, "loss": 0.4713, "step": 20633 }, { "epoch": 17.123651452282157, "grad_norm": 36.45729064941406, "learning_rate": 1.3154522821576766e-05, "loss": 1.0664, "step": 20634 }, { "epoch": 17.12448132780083, "grad_norm": 42.27101135253906, "learning_rate": 1.3154190871369296e-05, "loss": 0.8823, "step": 20635 }, { "epoch": 17.1253112033195, "grad_norm": 63.70497512817383, "learning_rate": 1.3153858921161826e-05, "loss": 0.4058, "step": 20636 }, { "epoch": 17.126141078838174, "grad_norm": 23.705028533935547, "learning_rate": 1.3153526970954359e-05, "loss": 0.3158, "step": 20637 }, { "epoch": 17.126970954356846, "grad_norm": 19.497758865356445, "learning_rate": 1.3153195020746889e-05, "loss": 0.3044, "step": 20638 }, { "epoch": 17.127800829875518, "grad_norm": 64.37519836425781, "learning_rate": 1.3152863070539421e-05, "loss": 0.5058, "step": 20639 }, { "epoch": 17.12863070539419, "grad_norm": 21.13979148864746, "learning_rate": 1.315253112033195e-05, "loss": 0.381, "step": 20640 }, { "epoch": 17.129460580912863, "grad_norm": 103.74818420410156, "learning_rate": 1.3152199170124482e-05, "loss": 0.5822, "step": 20641 }, { "epoch": 17.130290456431535, "grad_norm": 49.08559799194336, "learning_rate": 1.3151867219917014e-05, "loss": 0.5576, "step": 20642 }, { "epoch": 17.131120331950207, "grad_norm": 37.76104736328125, "learning_rate": 1.3151535269709546e-05, "loss": 1.1109, "step": 20643 }, { "epoch": 17.13195020746888, "grad_norm": 26.445960998535156, "learning_rate": 1.3151203319502075e-05, "loss": 0.5109, "step": 20644 }, { "epoch": 17.13278008298755, "grad_norm": 38.09077453613281, "learning_rate": 1.3150871369294607e-05, "loss": 0.9962, "step": 20645 }, { "epoch": 17.133609958506224, "grad_norm": 46.47822189331055, "learning_rate": 1.3150539419087139e-05, "loss": 0.4083, "step": 20646 }, { "epoch": 17.134439834024896, "grad_norm": 79.41284942626953, "learning_rate": 1.315020746887967e-05, "loss": 0.747, "step": 20647 }, { "epoch": 17.135269709543568, "grad_norm": 49.0706787109375, "learning_rate": 1.31498755186722e-05, "loss": 1.1974, "step": 20648 }, { "epoch": 17.13609958506224, "grad_norm": 25.653554916381836, "learning_rate": 1.314954356846473e-05, "loss": 0.3139, "step": 20649 }, { "epoch": 17.136929460580912, "grad_norm": 43.93746566772461, "learning_rate": 1.3149211618257262e-05, "loss": 0.493, "step": 20650 }, { "epoch": 17.137759336099585, "grad_norm": 40.49061584472656, "learning_rate": 1.3148879668049794e-05, "loss": 0.5738, "step": 20651 }, { "epoch": 17.138589211618257, "grad_norm": 21.890291213989258, "learning_rate": 1.3148547717842323e-05, "loss": 0.2241, "step": 20652 }, { "epoch": 17.13941908713693, "grad_norm": 66.26168823242188, "learning_rate": 1.3148215767634855e-05, "loss": 0.5843, "step": 20653 }, { "epoch": 17.1402489626556, "grad_norm": 36.300689697265625, "learning_rate": 1.3147883817427387e-05, "loss": 0.7461, "step": 20654 }, { "epoch": 17.141078838174273, "grad_norm": 92.19920349121094, "learning_rate": 1.314755186721992e-05, "loss": 0.9199, "step": 20655 }, { "epoch": 17.141908713692946, "grad_norm": 114.5192642211914, "learning_rate": 1.3147219917012448e-05, "loss": 1.0619, "step": 20656 }, { "epoch": 17.142738589211618, "grad_norm": 69.65467071533203, "learning_rate": 1.314688796680498e-05, "loss": 1.2708, "step": 20657 }, { "epoch": 17.14356846473029, "grad_norm": 33.7930908203125, "learning_rate": 1.314655601659751e-05, "loss": 0.3245, "step": 20658 }, { "epoch": 17.144398340248962, "grad_norm": 43.7301139831543, "learning_rate": 1.3146224066390043e-05, "loss": 0.8174, "step": 20659 }, { "epoch": 17.145228215767634, "grad_norm": 26.998207092285156, "learning_rate": 1.3145892116182575e-05, "loss": 0.5159, "step": 20660 }, { "epoch": 17.146058091286307, "grad_norm": 51.620304107666016, "learning_rate": 1.3145560165975104e-05, "loss": 0.5782, "step": 20661 }, { "epoch": 17.14688796680498, "grad_norm": 35.3052864074707, "learning_rate": 1.3145228215767636e-05, "loss": 0.4522, "step": 20662 }, { "epoch": 17.14771784232365, "grad_norm": 14.911306381225586, "learning_rate": 1.3144896265560168e-05, "loss": 0.3784, "step": 20663 }, { "epoch": 17.148547717842323, "grad_norm": 45.77388000488281, "learning_rate": 1.3144564315352698e-05, "loss": 0.6298, "step": 20664 }, { "epoch": 17.149377593360995, "grad_norm": 69.76905059814453, "learning_rate": 1.3144232365145229e-05, "loss": 1.2195, "step": 20665 }, { "epoch": 17.150207468879668, "grad_norm": 16.035228729248047, "learning_rate": 1.314390041493776e-05, "loss": 0.2993, "step": 20666 }, { "epoch": 17.15103734439834, "grad_norm": 15.853296279907227, "learning_rate": 1.3143568464730291e-05, "loss": 0.2804, "step": 20667 }, { "epoch": 17.151867219917012, "grad_norm": 38.74089813232422, "learning_rate": 1.3143236514522823e-05, "loss": 0.304, "step": 20668 }, { "epoch": 17.152697095435684, "grad_norm": 19.459321975708008, "learning_rate": 1.3142904564315354e-05, "loss": 0.2746, "step": 20669 }, { "epoch": 17.153526970954356, "grad_norm": 33.9633903503418, "learning_rate": 1.3142572614107884e-05, "loss": 0.702, "step": 20670 }, { "epoch": 17.15435684647303, "grad_norm": 58.79163360595703, "learning_rate": 1.3142240663900416e-05, "loss": 0.6404, "step": 20671 }, { "epoch": 17.1551867219917, "grad_norm": 90.09657287597656, "learning_rate": 1.3141908713692948e-05, "loss": 0.4887, "step": 20672 }, { "epoch": 17.156016597510373, "grad_norm": 32.241764068603516, "learning_rate": 1.3141576763485477e-05, "loss": 0.3243, "step": 20673 }, { "epoch": 17.156846473029045, "grad_norm": 233.6007537841797, "learning_rate": 1.314124481327801e-05, "loss": 0.6512, "step": 20674 }, { "epoch": 17.157676348547717, "grad_norm": 65.97474670410156, "learning_rate": 1.3140912863070541e-05, "loss": 0.7762, "step": 20675 }, { "epoch": 17.15850622406639, "grad_norm": 13.2251615524292, "learning_rate": 1.3140580912863072e-05, "loss": 0.2731, "step": 20676 }, { "epoch": 17.15933609958506, "grad_norm": 72.74412536621094, "learning_rate": 1.3140248962655602e-05, "loss": 0.4965, "step": 20677 }, { "epoch": 17.160165975103734, "grad_norm": 131.10533142089844, "learning_rate": 1.3139917012448134e-05, "loss": 0.6872, "step": 20678 }, { "epoch": 17.160995850622406, "grad_norm": 48.314693450927734, "learning_rate": 1.3139585062240665e-05, "loss": 0.749, "step": 20679 }, { "epoch": 17.16182572614108, "grad_norm": 21.484535217285156, "learning_rate": 1.3139253112033197e-05, "loss": 0.2167, "step": 20680 }, { "epoch": 17.16265560165975, "grad_norm": 84.55299377441406, "learning_rate": 1.3138921161825726e-05, "loss": 0.893, "step": 20681 }, { "epoch": 17.163485477178423, "grad_norm": 38.76819610595703, "learning_rate": 1.3138589211618258e-05, "loss": 0.5131, "step": 20682 }, { "epoch": 17.164315352697095, "grad_norm": 25.03412437438965, "learning_rate": 1.313825726141079e-05, "loss": 0.3607, "step": 20683 }, { "epoch": 17.165145228215767, "grad_norm": 23.07210922241211, "learning_rate": 1.3137925311203322e-05, "loss": 0.6687, "step": 20684 }, { "epoch": 17.16597510373444, "grad_norm": 13.66949462890625, "learning_rate": 1.3137593360995852e-05, "loss": 0.3223, "step": 20685 }, { "epoch": 17.16680497925311, "grad_norm": 48.93015670776367, "learning_rate": 1.3137261410788383e-05, "loss": 0.7037, "step": 20686 }, { "epoch": 17.167634854771784, "grad_norm": 25.110553741455078, "learning_rate": 1.3136929460580913e-05, "loss": 0.8116, "step": 20687 }, { "epoch": 17.168464730290456, "grad_norm": 51.65727615356445, "learning_rate": 1.3136597510373445e-05, "loss": 0.7049, "step": 20688 }, { "epoch": 17.169294605809128, "grad_norm": 23.305431365966797, "learning_rate": 1.3136265560165977e-05, "loss": 0.3023, "step": 20689 }, { "epoch": 17.1701244813278, "grad_norm": 28.57677459716797, "learning_rate": 1.3135933609958506e-05, "loss": 0.5951, "step": 20690 }, { "epoch": 17.170954356846472, "grad_norm": 26.5633487701416, "learning_rate": 1.3135601659751038e-05, "loss": 0.9179, "step": 20691 }, { "epoch": 17.171784232365145, "grad_norm": 30.639829635620117, "learning_rate": 1.313526970954357e-05, "loss": 0.7254, "step": 20692 }, { "epoch": 17.172614107883817, "grad_norm": 21.20893669128418, "learning_rate": 1.3134937759336102e-05, "loss": 0.2258, "step": 20693 }, { "epoch": 17.17344398340249, "grad_norm": 28.355836868286133, "learning_rate": 1.3134605809128631e-05, "loss": 0.7271, "step": 20694 }, { "epoch": 17.17427385892116, "grad_norm": 23.785663604736328, "learning_rate": 1.3134273858921163e-05, "loss": 0.3892, "step": 20695 }, { "epoch": 17.175103734439833, "grad_norm": 89.21283721923828, "learning_rate": 1.3133941908713694e-05, "loss": 1.1525, "step": 20696 }, { "epoch": 17.175933609958506, "grad_norm": 9.55291748046875, "learning_rate": 1.3133609958506226e-05, "loss": 0.2417, "step": 20697 }, { "epoch": 17.176763485477178, "grad_norm": 45.75227355957031, "learning_rate": 1.3133278008298756e-05, "loss": 0.5989, "step": 20698 }, { "epoch": 17.17759336099585, "grad_norm": 25.204490661621094, "learning_rate": 1.3132946058091287e-05, "loss": 0.4702, "step": 20699 }, { "epoch": 17.178423236514522, "grad_norm": 16.980571746826172, "learning_rate": 1.3132614107883819e-05, "loss": 0.3026, "step": 20700 }, { "epoch": 17.179253112033194, "grad_norm": 35.969573974609375, "learning_rate": 1.313228215767635e-05, "loss": 0.413, "step": 20701 }, { "epoch": 17.180082987551867, "grad_norm": 31.731103897094727, "learning_rate": 1.313195020746888e-05, "loss": 1.0109, "step": 20702 }, { "epoch": 17.18091286307054, "grad_norm": 26.023221969604492, "learning_rate": 1.3131618257261412e-05, "loss": 0.386, "step": 20703 }, { "epoch": 17.18174273858921, "grad_norm": 38.201255798339844, "learning_rate": 1.3131286307053944e-05, "loss": 0.6982, "step": 20704 }, { "epoch": 17.182572614107883, "grad_norm": 46.38979721069336, "learning_rate": 1.3130954356846474e-05, "loss": 0.6731, "step": 20705 }, { "epoch": 17.183402489626555, "grad_norm": 25.842527389526367, "learning_rate": 1.3130622406639004e-05, "loss": 0.4863, "step": 20706 }, { "epoch": 17.184232365145228, "grad_norm": 51.58678436279297, "learning_rate": 1.3130290456431537e-05, "loss": 0.5447, "step": 20707 }, { "epoch": 17.1850622406639, "grad_norm": 51.36558151245117, "learning_rate": 1.3129958506224067e-05, "loss": 0.7577, "step": 20708 }, { "epoch": 17.185892116182572, "grad_norm": 83.50971221923828, "learning_rate": 1.3129626556016599e-05, "loss": 0.8685, "step": 20709 }, { "epoch": 17.186721991701244, "grad_norm": 34.96181106567383, "learning_rate": 1.3129294605809128e-05, "loss": 0.4354, "step": 20710 }, { "epoch": 17.187551867219916, "grad_norm": 35.6521110534668, "learning_rate": 1.312896265560166e-05, "loss": 0.7483, "step": 20711 }, { "epoch": 17.18838174273859, "grad_norm": 54.433265686035156, "learning_rate": 1.3128630705394192e-05, "loss": 0.8337, "step": 20712 }, { "epoch": 17.18921161825726, "grad_norm": 16.089393615722656, "learning_rate": 1.3128298755186724e-05, "loss": 0.4284, "step": 20713 }, { "epoch": 17.190041493775933, "grad_norm": 29.374448776245117, "learning_rate": 1.3127966804979255e-05, "loss": 0.4822, "step": 20714 }, { "epoch": 17.190871369294605, "grad_norm": 32.07292175292969, "learning_rate": 1.3127634854771785e-05, "loss": 0.4285, "step": 20715 }, { "epoch": 17.191701244813277, "grad_norm": 41.22269058227539, "learning_rate": 1.3127302904564317e-05, "loss": 0.6862, "step": 20716 }, { "epoch": 17.19253112033195, "grad_norm": 49.21648406982422, "learning_rate": 1.3126970954356848e-05, "loss": 1.089, "step": 20717 }, { "epoch": 17.19336099585062, "grad_norm": 43.18610763549805, "learning_rate": 1.312663900414938e-05, "loss": 0.8954, "step": 20718 }, { "epoch": 17.194190871369294, "grad_norm": 10.746736526489258, "learning_rate": 1.3126307053941908e-05, "loss": 0.2557, "step": 20719 }, { "epoch": 17.195020746887966, "grad_norm": 65.34024810791016, "learning_rate": 1.312597510373444e-05, "loss": 0.417, "step": 20720 }, { "epoch": 17.19585062240664, "grad_norm": 33.35895538330078, "learning_rate": 1.3125643153526973e-05, "loss": 0.4171, "step": 20721 }, { "epoch": 17.19668049792531, "grad_norm": 22.42656135559082, "learning_rate": 1.3125311203319505e-05, "loss": 0.5388, "step": 20722 }, { "epoch": 17.197510373443983, "grad_norm": 101.49417877197266, "learning_rate": 1.3124979253112033e-05, "loss": 0.8786, "step": 20723 }, { "epoch": 17.198340248962655, "grad_norm": 158.47705078125, "learning_rate": 1.3124647302904565e-05, "loss": 0.5218, "step": 20724 }, { "epoch": 17.199170124481327, "grad_norm": 113.8860855102539, "learning_rate": 1.3124315352697098e-05, "loss": 0.6108, "step": 20725 }, { "epoch": 17.2, "grad_norm": 53.795265197753906, "learning_rate": 1.3123983402489628e-05, "loss": 0.6684, "step": 20726 }, { "epoch": 17.20082987551867, "grad_norm": 61.015724182128906, "learning_rate": 1.3123651452282158e-05, "loss": 0.8412, "step": 20727 }, { "epoch": 17.201659751037344, "grad_norm": 13.815367698669434, "learning_rate": 1.3123319502074689e-05, "loss": 0.5369, "step": 20728 }, { "epoch": 17.202489626556016, "grad_norm": 21.307924270629883, "learning_rate": 1.3122987551867221e-05, "loss": 0.3521, "step": 20729 }, { "epoch": 17.203319502074688, "grad_norm": 47.51506805419922, "learning_rate": 1.3122655601659753e-05, "loss": 0.7238, "step": 20730 }, { "epoch": 17.20414937759336, "grad_norm": 32.23807907104492, "learning_rate": 1.3122323651452282e-05, "loss": 0.3349, "step": 20731 }, { "epoch": 17.204979253112032, "grad_norm": 60.25782775878906, "learning_rate": 1.3121991701244814e-05, "loss": 0.6401, "step": 20732 }, { "epoch": 17.205809128630705, "grad_norm": 32.45595169067383, "learning_rate": 1.3121659751037346e-05, "loss": 0.3381, "step": 20733 }, { "epoch": 17.206639004149377, "grad_norm": 43.26120376586914, "learning_rate": 1.3121327800829876e-05, "loss": 0.3648, "step": 20734 }, { "epoch": 17.20746887966805, "grad_norm": 53.303062438964844, "learning_rate": 1.3120995850622407e-05, "loss": 0.4776, "step": 20735 }, { "epoch": 17.20829875518672, "grad_norm": 38.2301139831543, "learning_rate": 1.3120663900414939e-05, "loss": 0.6018, "step": 20736 }, { "epoch": 17.209128630705393, "grad_norm": 30.99861717224121, "learning_rate": 1.312033195020747e-05, "loss": 0.6201, "step": 20737 }, { "epoch": 17.209958506224066, "grad_norm": 40.226566314697266, "learning_rate": 1.3120000000000001e-05, "loss": 0.7262, "step": 20738 }, { "epoch": 17.210788381742738, "grad_norm": 43.68324661254883, "learning_rate": 1.3119668049792534e-05, "loss": 0.5749, "step": 20739 }, { "epoch": 17.21161825726141, "grad_norm": 35.7002067565918, "learning_rate": 1.3119336099585062e-05, "loss": 0.4745, "step": 20740 }, { "epoch": 17.212448132780082, "grad_norm": 31.75673484802246, "learning_rate": 1.3119004149377594e-05, "loss": 0.4726, "step": 20741 }, { "epoch": 17.213278008298754, "grad_norm": 29.631393432617188, "learning_rate": 1.3118672199170126e-05, "loss": 0.5199, "step": 20742 }, { "epoch": 17.214107883817427, "grad_norm": 41.51274490356445, "learning_rate": 1.3118340248962657e-05, "loss": 0.5734, "step": 20743 }, { "epoch": 17.2149377593361, "grad_norm": 47.055511474609375, "learning_rate": 1.3118008298755187e-05, "loss": 0.8299, "step": 20744 }, { "epoch": 17.21576763485477, "grad_norm": 47.72441482543945, "learning_rate": 1.311767634854772e-05, "loss": 0.9664, "step": 20745 }, { "epoch": 17.216597510373443, "grad_norm": 41.679466247558594, "learning_rate": 1.311734439834025e-05, "loss": 0.589, "step": 20746 }, { "epoch": 17.217427385892115, "grad_norm": 55.67218780517578, "learning_rate": 1.3117012448132782e-05, "loss": 0.8966, "step": 20747 }, { "epoch": 17.218257261410788, "grad_norm": 30.18508529663086, "learning_rate": 1.3116680497925312e-05, "loss": 0.851, "step": 20748 }, { "epoch": 17.21908713692946, "grad_norm": 120.56305694580078, "learning_rate": 1.3116348547717843e-05, "loss": 0.7582, "step": 20749 }, { "epoch": 17.219917012448132, "grad_norm": 72.51031494140625, "learning_rate": 1.3116016597510375e-05, "loss": 0.7833, "step": 20750 }, { "epoch": 17.220746887966804, "grad_norm": 72.12067413330078, "learning_rate": 1.3115684647302907e-05, "loss": 0.4756, "step": 20751 }, { "epoch": 17.221576763485476, "grad_norm": 32.25689697265625, "learning_rate": 1.3115352697095436e-05, "loss": 0.4623, "step": 20752 }, { "epoch": 17.22240663900415, "grad_norm": 50.439918518066406, "learning_rate": 1.3115020746887968e-05, "loss": 0.7881, "step": 20753 }, { "epoch": 17.22323651452282, "grad_norm": 29.223302841186523, "learning_rate": 1.31146887966805e-05, "loss": 0.2733, "step": 20754 }, { "epoch": 17.224066390041493, "grad_norm": 22.959932327270508, "learning_rate": 1.311435684647303e-05, "loss": 0.4728, "step": 20755 }, { "epoch": 17.224896265560165, "grad_norm": 15.463401794433594, "learning_rate": 1.311402489626556e-05, "loss": 0.3009, "step": 20756 }, { "epoch": 17.225726141078837, "grad_norm": 49.32272720336914, "learning_rate": 1.3113692946058091e-05, "loss": 0.7088, "step": 20757 }, { "epoch": 17.22655601659751, "grad_norm": 42.67744064331055, "learning_rate": 1.3113360995850623e-05, "loss": 0.4893, "step": 20758 }, { "epoch": 17.22738589211618, "grad_norm": 40.0844612121582, "learning_rate": 1.3113029045643155e-05, "loss": 0.8602, "step": 20759 }, { "epoch": 17.228215767634854, "grad_norm": 46.830902099609375, "learning_rate": 1.3112697095435684e-05, "loss": 0.9632, "step": 20760 }, { "epoch": 17.229045643153526, "grad_norm": 25.452951431274414, "learning_rate": 1.3112365145228216e-05, "loss": 0.5882, "step": 20761 }, { "epoch": 17.2298755186722, "grad_norm": 20.81720542907715, "learning_rate": 1.3112033195020748e-05, "loss": 0.387, "step": 20762 }, { "epoch": 17.23070539419087, "grad_norm": 11.652308464050293, "learning_rate": 1.311170124481328e-05, "loss": 0.316, "step": 20763 }, { "epoch": 17.231535269709543, "grad_norm": 32.32045364379883, "learning_rate": 1.3111369294605809e-05, "loss": 0.4454, "step": 20764 }, { "epoch": 17.232365145228215, "grad_norm": 43.51376724243164, "learning_rate": 1.3111037344398341e-05, "loss": 0.8254, "step": 20765 }, { "epoch": 17.233195020746887, "grad_norm": 58.89017105102539, "learning_rate": 1.3110705394190872e-05, "loss": 1.4876, "step": 20766 }, { "epoch": 17.23402489626556, "grad_norm": 18.450471878051758, "learning_rate": 1.3110373443983404e-05, "loss": 0.5638, "step": 20767 }, { "epoch": 17.23485477178423, "grad_norm": 37.663875579833984, "learning_rate": 1.3110041493775936e-05, "loss": 0.5028, "step": 20768 }, { "epoch": 17.235684647302904, "grad_norm": 46.553260803222656, "learning_rate": 1.3109709543568465e-05, "loss": 1.0428, "step": 20769 }, { "epoch": 17.236514522821576, "grad_norm": 57.49300765991211, "learning_rate": 1.3109377593360997e-05, "loss": 0.5745, "step": 20770 }, { "epoch": 17.237344398340248, "grad_norm": 19.37065887451172, "learning_rate": 1.3109045643153529e-05, "loss": 0.4365, "step": 20771 }, { "epoch": 17.23817427385892, "grad_norm": 45.49647521972656, "learning_rate": 1.3108713692946061e-05, "loss": 0.4874, "step": 20772 }, { "epoch": 17.239004149377593, "grad_norm": 72.46519470214844, "learning_rate": 1.310838174273859e-05, "loss": 1.0801, "step": 20773 }, { "epoch": 17.239834024896265, "grad_norm": 37.529212951660156, "learning_rate": 1.3108049792531122e-05, "loss": 0.409, "step": 20774 }, { "epoch": 17.240663900414937, "grad_norm": 74.04170227050781, "learning_rate": 1.3107717842323652e-05, "loss": 0.8076, "step": 20775 }, { "epoch": 17.24149377593361, "grad_norm": 120.5694580078125, "learning_rate": 1.3107385892116184e-05, "loss": 0.5553, "step": 20776 }, { "epoch": 17.24232365145228, "grad_norm": 82.22388458251953, "learning_rate": 1.3107053941908715e-05, "loss": 0.5648, "step": 20777 }, { "epoch": 17.243153526970953, "grad_norm": 36.142215728759766, "learning_rate": 1.3106721991701245e-05, "loss": 0.4802, "step": 20778 }, { "epoch": 17.243983402489626, "grad_norm": 72.87101745605469, "learning_rate": 1.3106390041493777e-05, "loss": 0.6435, "step": 20779 }, { "epoch": 17.244813278008298, "grad_norm": 29.3228759765625, "learning_rate": 1.310605809128631e-05, "loss": 0.2626, "step": 20780 }, { "epoch": 17.24564315352697, "grad_norm": 30.37281608581543, "learning_rate": 1.3105726141078838e-05, "loss": 0.4794, "step": 20781 }, { "epoch": 17.246473029045642, "grad_norm": 55.9784049987793, "learning_rate": 1.310539419087137e-05, "loss": 0.7152, "step": 20782 }, { "epoch": 17.247302904564314, "grad_norm": 102.27214813232422, "learning_rate": 1.3105062240663902e-05, "loss": 0.8055, "step": 20783 }, { "epoch": 17.248132780082987, "grad_norm": 40.60019302368164, "learning_rate": 1.3104730290456433e-05, "loss": 0.9467, "step": 20784 }, { "epoch": 17.24896265560166, "grad_norm": 37.46431350708008, "learning_rate": 1.3104398340248963e-05, "loss": 1.0296, "step": 20785 }, { "epoch": 17.24979253112033, "grad_norm": 59.50032043457031, "learning_rate": 1.3104066390041495e-05, "loss": 0.3106, "step": 20786 }, { "epoch": 17.250622406639003, "grad_norm": 26.46089744567871, "learning_rate": 1.3103734439834026e-05, "loss": 0.9827, "step": 20787 }, { "epoch": 17.251452282157675, "grad_norm": 80.06402587890625, "learning_rate": 1.3103402489626558e-05, "loss": 0.8316, "step": 20788 }, { "epoch": 17.252282157676348, "grad_norm": 52.585594177246094, "learning_rate": 1.3103070539419086e-05, "loss": 0.9177, "step": 20789 }, { "epoch": 17.25311203319502, "grad_norm": 49.46927261352539, "learning_rate": 1.3102738589211619e-05, "loss": 0.9325, "step": 20790 }, { "epoch": 17.253941908713692, "grad_norm": 69.27592468261719, "learning_rate": 1.310240663900415e-05, "loss": 1.4088, "step": 20791 }, { "epoch": 17.254771784232364, "grad_norm": 48.17059326171875, "learning_rate": 1.3102074688796683e-05, "loss": 1.007, "step": 20792 }, { "epoch": 17.255601659751036, "grad_norm": 49.50810241699219, "learning_rate": 1.3101742738589213e-05, "loss": 0.7639, "step": 20793 }, { "epoch": 17.25643153526971, "grad_norm": 44.542118072509766, "learning_rate": 1.3101410788381744e-05, "loss": 0.3972, "step": 20794 }, { "epoch": 17.25726141078838, "grad_norm": 44.273075103759766, "learning_rate": 1.3101078838174276e-05, "loss": 0.5552, "step": 20795 }, { "epoch": 17.258091286307053, "grad_norm": 15.997833251953125, "learning_rate": 1.3100746887966806e-05, "loss": 0.3286, "step": 20796 }, { "epoch": 17.258921161825725, "grad_norm": 17.97207260131836, "learning_rate": 1.3100414937759338e-05, "loss": 0.4432, "step": 20797 }, { "epoch": 17.259751037344397, "grad_norm": 19.266660690307617, "learning_rate": 1.3100082987551867e-05, "loss": 0.4395, "step": 20798 }, { "epoch": 17.26058091286307, "grad_norm": 68.60623931884766, "learning_rate": 1.3099751037344399e-05, "loss": 0.6217, "step": 20799 }, { "epoch": 17.261410788381742, "grad_norm": 70.32320404052734, "learning_rate": 1.3099419087136931e-05, "loss": 0.3823, "step": 20800 }, { "epoch": 17.262240663900414, "grad_norm": 48.15919876098633, "learning_rate": 1.3099087136929463e-05, "loss": 0.3544, "step": 20801 }, { "epoch": 17.263070539419086, "grad_norm": 28.65829849243164, "learning_rate": 1.3098755186721992e-05, "loss": 0.5459, "step": 20802 }, { "epoch": 17.26390041493776, "grad_norm": 54.52997970581055, "learning_rate": 1.3098423236514524e-05, "loss": 0.8804, "step": 20803 }, { "epoch": 17.26473029045643, "grad_norm": 48.1435661315918, "learning_rate": 1.3098091286307054e-05, "loss": 0.8543, "step": 20804 }, { "epoch": 17.265560165975103, "grad_norm": 30.61232566833496, "learning_rate": 1.3097759336099587e-05, "loss": 0.6147, "step": 20805 }, { "epoch": 17.266390041493775, "grad_norm": 23.355735778808594, "learning_rate": 1.3097427385892117e-05, "loss": 0.3046, "step": 20806 }, { "epoch": 17.267219917012447, "grad_norm": 34.01382827758789, "learning_rate": 1.3097095435684647e-05, "loss": 0.3649, "step": 20807 }, { "epoch": 17.26804979253112, "grad_norm": 24.76443862915039, "learning_rate": 1.309676348547718e-05, "loss": 0.6853, "step": 20808 }, { "epoch": 17.26887966804979, "grad_norm": 31.265958786010742, "learning_rate": 1.3096431535269712e-05, "loss": 0.543, "step": 20809 }, { "epoch": 17.269709543568464, "grad_norm": 15.630785942077637, "learning_rate": 1.309609958506224e-05, "loss": 0.4014, "step": 20810 }, { "epoch": 17.270539419087136, "grad_norm": 61.59348678588867, "learning_rate": 1.3095767634854772e-05, "loss": 0.7142, "step": 20811 }, { "epoch": 17.271369294605808, "grad_norm": 30.715858459472656, "learning_rate": 1.3095435684647305e-05, "loss": 0.4441, "step": 20812 }, { "epoch": 17.27219917012448, "grad_norm": 31.020296096801758, "learning_rate": 1.3095103734439835e-05, "loss": 0.8572, "step": 20813 }, { "epoch": 17.273029045643153, "grad_norm": 58.760398864746094, "learning_rate": 1.3094771784232365e-05, "loss": 1.3738, "step": 20814 }, { "epoch": 17.273858921161825, "grad_norm": 24.42115020751953, "learning_rate": 1.3094439834024897e-05, "loss": 0.6346, "step": 20815 }, { "epoch": 17.274688796680497, "grad_norm": 32.47047805786133, "learning_rate": 1.3094107883817428e-05, "loss": 0.7617, "step": 20816 }, { "epoch": 17.27551867219917, "grad_norm": 27.37102508544922, "learning_rate": 1.309377593360996e-05, "loss": 0.4905, "step": 20817 }, { "epoch": 17.27634854771784, "grad_norm": 31.594797134399414, "learning_rate": 1.3093443983402492e-05, "loss": 0.5357, "step": 20818 }, { "epoch": 17.277178423236514, "grad_norm": 23.784812927246094, "learning_rate": 1.309311203319502e-05, "loss": 0.3827, "step": 20819 }, { "epoch": 17.278008298755186, "grad_norm": 21.087499618530273, "learning_rate": 1.3092780082987553e-05, "loss": 0.5192, "step": 20820 }, { "epoch": 17.278838174273858, "grad_norm": 43.195003509521484, "learning_rate": 1.3092448132780085e-05, "loss": 0.7348, "step": 20821 }, { "epoch": 17.27966804979253, "grad_norm": 41.754783630371094, "learning_rate": 1.3092116182572615e-05, "loss": 0.898, "step": 20822 }, { "epoch": 17.280497925311202, "grad_norm": 47.65876007080078, "learning_rate": 1.3091784232365146e-05, "loss": 0.3621, "step": 20823 }, { "epoch": 17.281327800829875, "grad_norm": 81.29830169677734, "learning_rate": 1.3091452282157678e-05, "loss": 0.554, "step": 20824 }, { "epoch": 17.282157676348547, "grad_norm": 31.480478286743164, "learning_rate": 1.3091120331950208e-05, "loss": 0.3851, "step": 20825 }, { "epoch": 17.28298755186722, "grad_norm": 52.79515838623047, "learning_rate": 1.309078838174274e-05, "loss": 0.4154, "step": 20826 }, { "epoch": 17.28381742738589, "grad_norm": 44.52351760864258, "learning_rate": 1.309045643153527e-05, "loss": 0.4447, "step": 20827 }, { "epoch": 17.284647302904563, "grad_norm": 73.18864440917969, "learning_rate": 1.3090124481327801e-05, "loss": 0.9074, "step": 20828 }, { "epoch": 17.285477178423236, "grad_norm": 29.016803741455078, "learning_rate": 1.3089792531120333e-05, "loss": 0.5073, "step": 20829 }, { "epoch": 17.286307053941908, "grad_norm": 27.235816955566406, "learning_rate": 1.3089460580912866e-05, "loss": 0.5649, "step": 20830 }, { "epoch": 17.28713692946058, "grad_norm": 36.34536361694336, "learning_rate": 1.3089128630705394e-05, "loss": 0.3543, "step": 20831 }, { "epoch": 17.287966804979252, "grad_norm": 60.394081115722656, "learning_rate": 1.3088796680497926e-05, "loss": 0.737, "step": 20832 }, { "epoch": 17.288796680497924, "grad_norm": 65.47802734375, "learning_rate": 1.3088464730290458e-05, "loss": 1.5496, "step": 20833 }, { "epoch": 17.289626556016596, "grad_norm": 34.008750915527344, "learning_rate": 1.3088132780082989e-05, "loss": 0.3281, "step": 20834 }, { "epoch": 17.29045643153527, "grad_norm": 23.875234603881836, "learning_rate": 1.308780082987552e-05, "loss": 0.5532, "step": 20835 }, { "epoch": 17.29128630705394, "grad_norm": 65.05403137207031, "learning_rate": 1.308746887966805e-05, "loss": 0.9705, "step": 20836 }, { "epoch": 17.292116182572613, "grad_norm": 71.57181549072266, "learning_rate": 1.3087136929460582e-05, "loss": 0.6134, "step": 20837 }, { "epoch": 17.292946058091285, "grad_norm": 11.00917911529541, "learning_rate": 1.3086804979253114e-05, "loss": 0.2473, "step": 20838 }, { "epoch": 17.293775933609957, "grad_norm": 32.15484619140625, "learning_rate": 1.3086473029045643e-05, "loss": 0.7557, "step": 20839 }, { "epoch": 17.29460580912863, "grad_norm": 32.809165954589844, "learning_rate": 1.3086141078838175e-05, "loss": 0.8758, "step": 20840 }, { "epoch": 17.295435684647302, "grad_norm": 60.46780776977539, "learning_rate": 1.3085809128630707e-05, "loss": 0.9187, "step": 20841 }, { "epoch": 17.296265560165974, "grad_norm": 41.535865783691406, "learning_rate": 1.3085477178423239e-05, "loss": 0.71, "step": 20842 }, { "epoch": 17.297095435684646, "grad_norm": 30.636842727661133, "learning_rate": 1.3085145228215768e-05, "loss": 0.4078, "step": 20843 }, { "epoch": 17.29792531120332, "grad_norm": 38.276851654052734, "learning_rate": 1.30848132780083e-05, "loss": 0.6595, "step": 20844 }, { "epoch": 17.29875518672199, "grad_norm": 92.14677429199219, "learning_rate": 1.308448132780083e-05, "loss": 0.5863, "step": 20845 }, { "epoch": 17.299585062240663, "grad_norm": 22.292390823364258, "learning_rate": 1.3084149377593362e-05, "loss": 0.3735, "step": 20846 }, { "epoch": 17.300414937759335, "grad_norm": 23.663610458374023, "learning_rate": 1.3083817427385894e-05, "loss": 0.6818, "step": 20847 }, { "epoch": 17.301244813278007, "grad_norm": 77.13749694824219, "learning_rate": 1.3083485477178423e-05, "loss": 0.8752, "step": 20848 }, { "epoch": 17.30207468879668, "grad_norm": 35.50542449951172, "learning_rate": 1.3083153526970955e-05, "loss": 0.9188, "step": 20849 }, { "epoch": 17.30290456431535, "grad_norm": 20.593170166015625, "learning_rate": 1.3082821576763487e-05, "loss": 0.2709, "step": 20850 }, { "epoch": 17.303734439834024, "grad_norm": 55.9203987121582, "learning_rate": 1.3082489626556018e-05, "loss": 0.8795, "step": 20851 }, { "epoch": 17.304564315352696, "grad_norm": 37.11790084838867, "learning_rate": 1.3082157676348548e-05, "loss": 1.3441, "step": 20852 }, { "epoch": 17.305394190871368, "grad_norm": 36.721004486083984, "learning_rate": 1.308182572614108e-05, "loss": 0.4009, "step": 20853 }, { "epoch": 17.30622406639004, "grad_norm": 55.62468338012695, "learning_rate": 1.308149377593361e-05, "loss": 0.6816, "step": 20854 }, { "epoch": 17.307053941908713, "grad_norm": 16.57520866394043, "learning_rate": 1.3081161825726143e-05, "loss": 0.327, "step": 20855 }, { "epoch": 17.307883817427385, "grad_norm": 13.03477668762207, "learning_rate": 1.3080829875518673e-05, "loss": 0.3099, "step": 20856 }, { "epoch": 17.308713692946057, "grad_norm": 43.86397171020508, "learning_rate": 1.3080497925311204e-05, "loss": 0.8693, "step": 20857 }, { "epoch": 17.30954356846473, "grad_norm": 30.081092834472656, "learning_rate": 1.3080165975103736e-05, "loss": 0.3331, "step": 20858 }, { "epoch": 17.3103734439834, "grad_norm": 20.910722732543945, "learning_rate": 1.3079834024896268e-05, "loss": 0.6606, "step": 20859 }, { "epoch": 17.311203319502074, "grad_norm": 20.934906005859375, "learning_rate": 1.3079502074688797e-05, "loss": 0.3101, "step": 20860 }, { "epoch": 17.312033195020746, "grad_norm": 44.65210723876953, "learning_rate": 1.3079170124481329e-05, "loss": 0.4646, "step": 20861 }, { "epoch": 17.312863070539418, "grad_norm": 23.305572509765625, "learning_rate": 1.307883817427386e-05, "loss": 0.598, "step": 20862 }, { "epoch": 17.31369294605809, "grad_norm": 26.9007511138916, "learning_rate": 1.3078506224066391e-05, "loss": 0.971, "step": 20863 }, { "epoch": 17.314522821576762, "grad_norm": 29.894662857055664, "learning_rate": 1.3078174273858922e-05, "loss": 0.5527, "step": 20864 }, { "epoch": 17.315352697095435, "grad_norm": 24.036209106445312, "learning_rate": 1.3077842323651452e-05, "loss": 0.4684, "step": 20865 }, { "epoch": 17.316182572614107, "grad_norm": 35.4366455078125, "learning_rate": 1.3077510373443984e-05, "loss": 0.5087, "step": 20866 }, { "epoch": 17.31701244813278, "grad_norm": 27.192922592163086, "learning_rate": 1.3077178423236516e-05, "loss": 0.3992, "step": 20867 }, { "epoch": 17.31784232365145, "grad_norm": 62.059906005859375, "learning_rate": 1.3076846473029045e-05, "loss": 1.0255, "step": 20868 }, { "epoch": 17.318672199170123, "grad_norm": 27.624746322631836, "learning_rate": 1.3076514522821577e-05, "loss": 0.5989, "step": 20869 }, { "epoch": 17.319502074688796, "grad_norm": 91.8294448852539, "learning_rate": 1.307618257261411e-05, "loss": 0.4088, "step": 20870 }, { "epoch": 17.320331950207468, "grad_norm": 52.03795623779297, "learning_rate": 1.3075850622406641e-05, "loss": 1.0078, "step": 20871 }, { "epoch": 17.32116182572614, "grad_norm": 45.63404083251953, "learning_rate": 1.3075518672199172e-05, "loss": 0.5777, "step": 20872 }, { "epoch": 17.321991701244812, "grad_norm": 49.244537353515625, "learning_rate": 1.3075186721991702e-05, "loss": 0.7482, "step": 20873 }, { "epoch": 17.322821576763484, "grad_norm": 27.480485916137695, "learning_rate": 1.3074854771784233e-05, "loss": 0.3641, "step": 20874 }, { "epoch": 17.323651452282157, "grad_norm": 38.85862731933594, "learning_rate": 1.3074522821576765e-05, "loss": 0.4131, "step": 20875 }, { "epoch": 17.32448132780083, "grad_norm": 26.56351089477539, "learning_rate": 1.3074190871369297e-05, "loss": 0.3708, "step": 20876 }, { "epoch": 17.3253112033195, "grad_norm": 31.613473892211914, "learning_rate": 1.3073858921161825e-05, "loss": 0.7629, "step": 20877 }, { "epoch": 17.326141078838173, "grad_norm": 37.32618713378906, "learning_rate": 1.3073526970954358e-05, "loss": 0.9922, "step": 20878 }, { "epoch": 17.326970954356845, "grad_norm": 78.92891693115234, "learning_rate": 1.307319502074689e-05, "loss": 0.5697, "step": 20879 }, { "epoch": 17.327800829875518, "grad_norm": 52.012516021728516, "learning_rate": 1.3072863070539422e-05, "loss": 0.9952, "step": 20880 }, { "epoch": 17.32863070539419, "grad_norm": 32.87968063354492, "learning_rate": 1.307253112033195e-05, "loss": 0.4611, "step": 20881 }, { "epoch": 17.329460580912862, "grad_norm": 45.30496597290039, "learning_rate": 1.3072199170124483e-05, "loss": 0.8154, "step": 20882 }, { "epoch": 17.330290456431534, "grad_norm": 16.437868118286133, "learning_rate": 1.3071867219917013e-05, "loss": 0.2972, "step": 20883 }, { "epoch": 17.331120331950206, "grad_norm": 47.96906280517578, "learning_rate": 1.3071535269709545e-05, "loss": 1.0251, "step": 20884 }, { "epoch": 17.33195020746888, "grad_norm": 18.3795223236084, "learning_rate": 1.3071203319502076e-05, "loss": 0.3137, "step": 20885 }, { "epoch": 17.33278008298755, "grad_norm": 25.3145809173584, "learning_rate": 1.3070871369294606e-05, "loss": 0.499, "step": 20886 }, { "epoch": 17.333609958506223, "grad_norm": 35.43115234375, "learning_rate": 1.3070539419087138e-05, "loss": 1.0449, "step": 20887 }, { "epoch": 17.334439834024895, "grad_norm": 108.75891876220703, "learning_rate": 1.307020746887967e-05, "loss": 0.4352, "step": 20888 }, { "epoch": 17.335269709543567, "grad_norm": 36.10436248779297, "learning_rate": 1.3069875518672199e-05, "loss": 0.5405, "step": 20889 }, { "epoch": 17.33609958506224, "grad_norm": 47.516849517822266, "learning_rate": 1.3069543568464731e-05, "loss": 0.3945, "step": 20890 }, { "epoch": 17.33692946058091, "grad_norm": 22.121788024902344, "learning_rate": 1.3069211618257263e-05, "loss": 0.3784, "step": 20891 }, { "epoch": 17.337759336099584, "grad_norm": 64.24057006835938, "learning_rate": 1.3068879668049794e-05, "loss": 0.9857, "step": 20892 }, { "epoch": 17.338589211618256, "grad_norm": 89.30256652832031, "learning_rate": 1.3068547717842324e-05, "loss": 0.7966, "step": 20893 }, { "epoch": 17.33941908713693, "grad_norm": 27.52763557434082, "learning_rate": 1.3068215767634856e-05, "loss": 0.5359, "step": 20894 }, { "epoch": 17.3402489626556, "grad_norm": 35.70771789550781, "learning_rate": 1.3067883817427386e-05, "loss": 0.6455, "step": 20895 }, { "epoch": 17.341078838174273, "grad_norm": 55.365234375, "learning_rate": 1.3067551867219919e-05, "loss": 0.6691, "step": 20896 }, { "epoch": 17.341908713692945, "grad_norm": 101.6836166381836, "learning_rate": 1.306721991701245e-05, "loss": 0.8766, "step": 20897 }, { "epoch": 17.342738589211617, "grad_norm": 64.15043640136719, "learning_rate": 1.306688796680498e-05, "loss": 0.4021, "step": 20898 }, { "epoch": 17.34356846473029, "grad_norm": 77.07942199707031, "learning_rate": 1.3066556016597512e-05, "loss": 0.5719, "step": 20899 }, { "epoch": 17.34439834024896, "grad_norm": 43.895301818847656, "learning_rate": 1.3066224066390044e-05, "loss": 1.0483, "step": 20900 }, { "epoch": 17.345228215767634, "grad_norm": 108.28587341308594, "learning_rate": 1.3065892116182574e-05, "loss": 0.8599, "step": 20901 }, { "epoch": 17.346058091286306, "grad_norm": 107.72145080566406, "learning_rate": 1.3065560165975104e-05, "loss": 0.7283, "step": 20902 }, { "epoch": 17.346887966804978, "grad_norm": 44.317012786865234, "learning_rate": 1.3065228215767637e-05, "loss": 0.7532, "step": 20903 }, { "epoch": 17.34771784232365, "grad_norm": 157.3938751220703, "learning_rate": 1.3064896265560167e-05, "loss": 0.8166, "step": 20904 }, { "epoch": 17.348547717842322, "grad_norm": 27.9775390625, "learning_rate": 1.3064564315352699e-05, "loss": 0.4134, "step": 20905 }, { "epoch": 17.349377593360995, "grad_norm": 32.77178192138672, "learning_rate": 1.3064232365145228e-05, "loss": 0.3453, "step": 20906 }, { "epoch": 17.350207468879667, "grad_norm": 42.231021881103516, "learning_rate": 1.306390041493776e-05, "loss": 0.5801, "step": 20907 }, { "epoch": 17.35103734439834, "grad_norm": 35.10001754760742, "learning_rate": 1.3063568464730292e-05, "loss": 0.4597, "step": 20908 }, { "epoch": 17.35186721991701, "grad_norm": 49.565242767333984, "learning_rate": 1.3063236514522824e-05, "loss": 1.2793, "step": 20909 }, { "epoch": 17.352697095435683, "grad_norm": 24.00005340576172, "learning_rate": 1.3062904564315353e-05, "loss": 0.3933, "step": 20910 }, { "epoch": 17.353526970954356, "grad_norm": 27.892356872558594, "learning_rate": 1.3062572614107885e-05, "loss": 0.5312, "step": 20911 }, { "epoch": 17.354356846473028, "grad_norm": 22.0683536529541, "learning_rate": 1.3062240663900417e-05, "loss": 0.3677, "step": 20912 }, { "epoch": 17.3551867219917, "grad_norm": 44.447998046875, "learning_rate": 1.3061908713692947e-05, "loss": 0.8918, "step": 20913 }, { "epoch": 17.356016597510372, "grad_norm": 39.80817794799805, "learning_rate": 1.3061576763485478e-05, "loss": 0.9065, "step": 20914 }, { "epoch": 17.356846473029044, "grad_norm": 100.00022888183594, "learning_rate": 1.3061244813278008e-05, "loss": 0.5639, "step": 20915 }, { "epoch": 17.357676348547717, "grad_norm": 47.29094696044922, "learning_rate": 1.306091286307054e-05, "loss": 0.3593, "step": 20916 }, { "epoch": 17.35850622406639, "grad_norm": 17.81839370727539, "learning_rate": 1.3060580912863073e-05, "loss": 0.4357, "step": 20917 }, { "epoch": 17.35933609958506, "grad_norm": 45.517967224121094, "learning_rate": 1.3060248962655601e-05, "loss": 0.529, "step": 20918 }, { "epoch": 17.360165975103733, "grad_norm": 59.44544982910156, "learning_rate": 1.3059917012448133e-05, "loss": 0.5572, "step": 20919 }, { "epoch": 17.360995850622405, "grad_norm": 40.54777526855469, "learning_rate": 1.3059585062240665e-05, "loss": 0.5191, "step": 20920 }, { "epoch": 17.361825726141078, "grad_norm": 30.887969970703125, "learning_rate": 1.3059253112033196e-05, "loss": 0.6285, "step": 20921 }, { "epoch": 17.36265560165975, "grad_norm": 34.674415588378906, "learning_rate": 1.3058921161825726e-05, "loss": 0.3531, "step": 20922 }, { "epoch": 17.363485477178422, "grad_norm": 47.67985534667969, "learning_rate": 1.3058589211618258e-05, "loss": 0.5309, "step": 20923 }, { "epoch": 17.364315352697094, "grad_norm": 92.55226135253906, "learning_rate": 1.3058257261410789e-05, "loss": 0.4169, "step": 20924 }, { "epoch": 17.365145228215766, "grad_norm": 50.965484619140625, "learning_rate": 1.3057925311203321e-05, "loss": 0.7493, "step": 20925 }, { "epoch": 17.36597510373444, "grad_norm": 60.772857666015625, "learning_rate": 1.3057593360995853e-05, "loss": 0.8281, "step": 20926 }, { "epoch": 17.36680497925311, "grad_norm": 66.74295043945312, "learning_rate": 1.3057261410788382e-05, "loss": 0.6969, "step": 20927 }, { "epoch": 17.367634854771783, "grad_norm": 27.405776977539062, "learning_rate": 1.3056929460580914e-05, "loss": 0.5635, "step": 20928 }, { "epoch": 17.368464730290455, "grad_norm": 26.452125549316406, "learning_rate": 1.3056597510373446e-05, "loss": 0.3327, "step": 20929 }, { "epoch": 17.369294605809127, "grad_norm": 23.672725677490234, "learning_rate": 1.3056265560165976e-05, "loss": 0.6591, "step": 20930 }, { "epoch": 17.3701244813278, "grad_norm": 74.11882019042969, "learning_rate": 1.3055933609958507e-05, "loss": 0.483, "step": 20931 }, { "epoch": 17.37095435684647, "grad_norm": 54.004886627197266, "learning_rate": 1.3055601659751039e-05, "loss": 0.749, "step": 20932 }, { "epoch": 17.371784232365144, "grad_norm": 60.15687942504883, "learning_rate": 1.305526970954357e-05, "loss": 0.4855, "step": 20933 }, { "epoch": 17.372614107883816, "grad_norm": 88.06634521484375, "learning_rate": 1.3054937759336101e-05, "loss": 0.7697, "step": 20934 }, { "epoch": 17.37344398340249, "grad_norm": 55.841163635253906, "learning_rate": 1.305460580912863e-05, "loss": 0.6086, "step": 20935 }, { "epoch": 17.37427385892116, "grad_norm": 19.773983001708984, "learning_rate": 1.3054273858921162e-05, "loss": 0.4264, "step": 20936 }, { "epoch": 17.375103734439833, "grad_norm": 36.152244567871094, "learning_rate": 1.3053941908713694e-05, "loss": 0.4575, "step": 20937 }, { "epoch": 17.375933609958505, "grad_norm": 33.57899856567383, "learning_rate": 1.3053609958506226e-05, "loss": 0.6567, "step": 20938 }, { "epoch": 17.376763485477177, "grad_norm": 36.244014739990234, "learning_rate": 1.3053278008298755e-05, "loss": 0.7499, "step": 20939 }, { "epoch": 17.37759336099585, "grad_norm": 91.06033325195312, "learning_rate": 1.3052946058091287e-05, "loss": 0.4186, "step": 20940 }, { "epoch": 17.37842323651452, "grad_norm": 36.95408630371094, "learning_rate": 1.305261410788382e-05, "loss": 0.6955, "step": 20941 }, { "epoch": 17.379253112033194, "grad_norm": 25.601335525512695, "learning_rate": 1.305228215767635e-05, "loss": 0.5402, "step": 20942 }, { "epoch": 17.380082987551866, "grad_norm": 22.327468872070312, "learning_rate": 1.305195020746888e-05, "loss": 0.3944, "step": 20943 }, { "epoch": 17.380912863070538, "grad_norm": 19.900972366333008, "learning_rate": 1.305161825726141e-05, "loss": 0.4603, "step": 20944 }, { "epoch": 17.38174273858921, "grad_norm": 49.322940826416016, "learning_rate": 1.3051286307053943e-05, "loss": 1.3464, "step": 20945 }, { "epoch": 17.382572614107882, "grad_norm": 24.971738815307617, "learning_rate": 1.3050954356846475e-05, "loss": 0.5166, "step": 20946 }, { "epoch": 17.383402489626555, "grad_norm": 44.76383972167969, "learning_rate": 1.3050622406639004e-05, "loss": 0.8344, "step": 20947 }, { "epoch": 17.384232365145227, "grad_norm": 36.2972297668457, "learning_rate": 1.3050290456431536e-05, "loss": 0.6037, "step": 20948 }, { "epoch": 17.3850622406639, "grad_norm": 44.077308654785156, "learning_rate": 1.3049958506224068e-05, "loss": 0.6306, "step": 20949 }, { "epoch": 17.38589211618257, "grad_norm": 43.14393997192383, "learning_rate": 1.30496265560166e-05, "loss": 1.2084, "step": 20950 }, { "epoch": 17.386721991701243, "grad_norm": 90.7895736694336, "learning_rate": 1.304929460580913e-05, "loss": 0.5873, "step": 20951 }, { "epoch": 17.387551867219916, "grad_norm": 43.566768646240234, "learning_rate": 1.304896265560166e-05, "loss": 0.5911, "step": 20952 }, { "epoch": 17.388381742738588, "grad_norm": 29.498537063598633, "learning_rate": 1.3048630705394191e-05, "loss": 0.5435, "step": 20953 }, { "epoch": 17.38921161825726, "grad_norm": 81.77719116210938, "learning_rate": 1.3048298755186723e-05, "loss": 0.6295, "step": 20954 }, { "epoch": 17.390041493775932, "grad_norm": 64.87553405761719, "learning_rate": 1.3047966804979255e-05, "loss": 0.4768, "step": 20955 }, { "epoch": 17.390871369294604, "grad_norm": 37.603187561035156, "learning_rate": 1.3047634854771784e-05, "loss": 0.5892, "step": 20956 }, { "epoch": 17.391701244813277, "grad_norm": 22.61227798461914, "learning_rate": 1.3047302904564316e-05, "loss": 0.5895, "step": 20957 }, { "epoch": 17.39253112033195, "grad_norm": 32.71023941040039, "learning_rate": 1.3046970954356848e-05, "loss": 0.4273, "step": 20958 }, { "epoch": 17.39336099585062, "grad_norm": 26.964900970458984, "learning_rate": 1.304663900414938e-05, "loss": 0.6744, "step": 20959 }, { "epoch": 17.394190871369293, "grad_norm": 19.521345138549805, "learning_rate": 1.3046307053941909e-05, "loss": 0.2801, "step": 20960 }, { "epoch": 17.395020746887965, "grad_norm": 82.29832458496094, "learning_rate": 1.3045975103734441e-05, "loss": 0.8007, "step": 20961 }, { "epoch": 17.395850622406638, "grad_norm": 43.80384826660156, "learning_rate": 1.3045643153526972e-05, "loss": 0.5222, "step": 20962 }, { "epoch": 17.39668049792531, "grad_norm": 47.34200668334961, "learning_rate": 1.3045311203319504e-05, "loss": 0.8821, "step": 20963 }, { "epoch": 17.397510373443982, "grad_norm": 65.6229476928711, "learning_rate": 1.3044979253112034e-05, "loss": 0.7402, "step": 20964 }, { "epoch": 17.398340248962654, "grad_norm": 30.672639846801758, "learning_rate": 1.3044647302904565e-05, "loss": 0.2888, "step": 20965 }, { "epoch": 17.399170124481326, "grad_norm": 39.57957077026367, "learning_rate": 1.3044315352697097e-05, "loss": 0.498, "step": 20966 }, { "epoch": 17.4, "grad_norm": 89.55846405029297, "learning_rate": 1.3043983402489629e-05, "loss": 1.025, "step": 20967 }, { "epoch": 17.40082987551867, "grad_norm": 32.38731384277344, "learning_rate": 1.3043651452282157e-05, "loss": 0.5457, "step": 20968 }, { "epoch": 17.401659751037343, "grad_norm": 33.444889068603516, "learning_rate": 1.304331950207469e-05, "loss": 0.5663, "step": 20969 }, { "epoch": 17.402489626556015, "grad_norm": 25.6397705078125, "learning_rate": 1.3042987551867222e-05, "loss": 0.4224, "step": 20970 }, { "epoch": 17.403319502074687, "grad_norm": 46.2242317199707, "learning_rate": 1.3042655601659752e-05, "loss": 1.0411, "step": 20971 }, { "epoch": 17.40414937759336, "grad_norm": 58.63471603393555, "learning_rate": 1.3042323651452283e-05, "loss": 0.386, "step": 20972 }, { "epoch": 17.40497925311203, "grad_norm": 57.18737030029297, "learning_rate": 1.3041991701244815e-05, "loss": 0.8926, "step": 20973 }, { "epoch": 17.405809128630704, "grad_norm": 93.65142822265625, "learning_rate": 1.3041659751037345e-05, "loss": 0.9122, "step": 20974 }, { "epoch": 17.406639004149376, "grad_norm": 69.15828704833984, "learning_rate": 1.3041327800829877e-05, "loss": 0.5219, "step": 20975 }, { "epoch": 17.40746887966805, "grad_norm": 35.64369201660156, "learning_rate": 1.304099585062241e-05, "loss": 0.4456, "step": 20976 }, { "epoch": 17.40829875518672, "grad_norm": 27.22960090637207, "learning_rate": 1.3040663900414938e-05, "loss": 0.449, "step": 20977 }, { "epoch": 17.409128630705393, "grad_norm": 69.75163269042969, "learning_rate": 1.304033195020747e-05, "loss": 1.0147, "step": 20978 }, { "epoch": 17.409958506224065, "grad_norm": 38.12411117553711, "learning_rate": 1.3040000000000002e-05, "loss": 0.6441, "step": 20979 }, { "epoch": 17.410788381742737, "grad_norm": 22.257047653198242, "learning_rate": 1.3039668049792533e-05, "loss": 0.4509, "step": 20980 }, { "epoch": 17.41161825726141, "grad_norm": 67.18852233886719, "learning_rate": 1.3039336099585063e-05, "loss": 0.5518, "step": 20981 }, { "epoch": 17.41244813278008, "grad_norm": 25.027000427246094, "learning_rate": 1.3039004149377593e-05, "loss": 0.2548, "step": 20982 }, { "epoch": 17.413278008298754, "grad_norm": 37.08755874633789, "learning_rate": 1.3038672199170126e-05, "loss": 0.5866, "step": 20983 }, { "epoch": 17.414107883817426, "grad_norm": 80.1197738647461, "learning_rate": 1.3038340248962658e-05, "loss": 1.788, "step": 20984 }, { "epoch": 17.414937759336098, "grad_norm": 64.34912109375, "learning_rate": 1.3038008298755186e-05, "loss": 0.445, "step": 20985 }, { "epoch": 17.41576763485477, "grad_norm": 66.73624420166016, "learning_rate": 1.3037676348547718e-05, "loss": 0.7591, "step": 20986 }, { "epoch": 17.416597510373443, "grad_norm": 26.794355392456055, "learning_rate": 1.303734439834025e-05, "loss": 0.3463, "step": 20987 }, { "epoch": 17.417427385892115, "grad_norm": 15.422075271606445, "learning_rate": 1.3037012448132783e-05, "loss": 0.3025, "step": 20988 }, { "epoch": 17.418257261410787, "grad_norm": 38.58086013793945, "learning_rate": 1.3036680497925311e-05, "loss": 0.9862, "step": 20989 }, { "epoch": 17.41908713692946, "grad_norm": 12.196571350097656, "learning_rate": 1.3036348547717844e-05, "loss": 0.169, "step": 20990 }, { "epoch": 17.41991701244813, "grad_norm": 73.87357330322266, "learning_rate": 1.3036016597510374e-05, "loss": 0.9999, "step": 20991 }, { "epoch": 17.420746887966803, "grad_norm": 23.793243408203125, "learning_rate": 1.3035684647302906e-05, "loss": 0.5473, "step": 20992 }, { "epoch": 17.421576763485476, "grad_norm": 40.134193420410156, "learning_rate": 1.3035352697095436e-05, "loss": 0.9739, "step": 20993 }, { "epoch": 17.422406639004148, "grad_norm": 61.78086853027344, "learning_rate": 1.3035020746887967e-05, "loss": 1.123, "step": 20994 }, { "epoch": 17.42323651452282, "grad_norm": 34.47287368774414, "learning_rate": 1.3034688796680499e-05, "loss": 0.5625, "step": 20995 }, { "epoch": 17.424066390041492, "grad_norm": 40.084232330322266, "learning_rate": 1.3034356846473031e-05, "loss": 0.429, "step": 20996 }, { "epoch": 17.424896265560164, "grad_norm": 109.48064422607422, "learning_rate": 1.303402489626556e-05, "loss": 1.1576, "step": 20997 }, { "epoch": 17.425726141078837, "grad_norm": 21.90526580810547, "learning_rate": 1.3033692946058092e-05, "loss": 0.4164, "step": 20998 }, { "epoch": 17.42655601659751, "grad_norm": 16.301664352416992, "learning_rate": 1.3033360995850624e-05, "loss": 0.3054, "step": 20999 }, { "epoch": 17.42738589211618, "grad_norm": 85.72824096679688, "learning_rate": 1.3033029045643154e-05, "loss": 0.6454, "step": 21000 }, { "epoch": 17.428215767634853, "grad_norm": 65.89409637451172, "learning_rate": 1.3032697095435685e-05, "loss": 0.9553, "step": 21001 }, { "epoch": 17.429045643153525, "grad_norm": 8.207706451416016, "learning_rate": 1.3032365145228217e-05, "loss": 0.2355, "step": 21002 }, { "epoch": 17.429875518672198, "grad_norm": 18.57600212097168, "learning_rate": 1.3032033195020747e-05, "loss": 0.4051, "step": 21003 }, { "epoch": 17.43070539419087, "grad_norm": 19.77780532836914, "learning_rate": 1.303170124481328e-05, "loss": 0.3213, "step": 21004 }, { "epoch": 17.431535269709542, "grad_norm": 33.18184280395508, "learning_rate": 1.3031369294605812e-05, "loss": 0.7956, "step": 21005 }, { "epoch": 17.432365145228214, "grad_norm": 36.85219192504883, "learning_rate": 1.303103734439834e-05, "loss": 0.7749, "step": 21006 }, { "epoch": 17.433195020746886, "grad_norm": 32.695003509521484, "learning_rate": 1.3030705394190872e-05, "loss": 0.677, "step": 21007 }, { "epoch": 17.43402489626556, "grad_norm": 28.470252990722656, "learning_rate": 1.3030373443983405e-05, "loss": 0.8457, "step": 21008 }, { "epoch": 17.43485477178423, "grad_norm": 46.756229400634766, "learning_rate": 1.3030041493775935e-05, "loss": 0.8744, "step": 21009 }, { "epoch": 17.435684647302903, "grad_norm": 33.99361038208008, "learning_rate": 1.3029709543568465e-05, "loss": 0.7565, "step": 21010 }, { "epoch": 17.436514522821575, "grad_norm": 107.90892791748047, "learning_rate": 1.3029377593360997e-05, "loss": 0.5968, "step": 21011 }, { "epoch": 17.437344398340247, "grad_norm": 39.4974250793457, "learning_rate": 1.3029045643153528e-05, "loss": 0.5652, "step": 21012 }, { "epoch": 17.43817427385892, "grad_norm": 25.509672164916992, "learning_rate": 1.302871369294606e-05, "loss": 0.7103, "step": 21013 }, { "epoch": 17.439004149377592, "grad_norm": 29.656368255615234, "learning_rate": 1.3028381742738589e-05, "loss": 0.6001, "step": 21014 }, { "epoch": 17.439834024896264, "grad_norm": 16.217008590698242, "learning_rate": 1.302804979253112e-05, "loss": 0.2834, "step": 21015 }, { "epoch": 17.440663900414936, "grad_norm": 79.50562286376953, "learning_rate": 1.3027717842323653e-05, "loss": 1.0702, "step": 21016 }, { "epoch": 17.44149377593361, "grad_norm": 46.15480041503906, "learning_rate": 1.3027385892116185e-05, "loss": 0.5866, "step": 21017 }, { "epoch": 17.44232365145228, "grad_norm": 89.57884216308594, "learning_rate": 1.3027053941908714e-05, "loss": 1.1007, "step": 21018 }, { "epoch": 17.443153526970953, "grad_norm": 35.05741500854492, "learning_rate": 1.3026721991701246e-05, "loss": 0.4758, "step": 21019 }, { "epoch": 17.443983402489625, "grad_norm": 68.95484924316406, "learning_rate": 1.3026390041493778e-05, "loss": 0.9969, "step": 21020 }, { "epoch": 17.444813278008297, "grad_norm": 58.42424392700195, "learning_rate": 1.3026058091286308e-05, "loss": 0.7482, "step": 21021 }, { "epoch": 17.44564315352697, "grad_norm": 44.5971794128418, "learning_rate": 1.3025726141078839e-05, "loss": 0.606, "step": 21022 }, { "epoch": 17.44647302904564, "grad_norm": 29.265518188476562, "learning_rate": 1.302539419087137e-05, "loss": 0.2702, "step": 21023 }, { "epoch": 17.447302904564314, "grad_norm": 56.35448455810547, "learning_rate": 1.3025062240663901e-05, "loss": 0.4329, "step": 21024 }, { "epoch": 17.448132780082986, "grad_norm": 17.124616622924805, "learning_rate": 1.3024730290456433e-05, "loss": 0.6843, "step": 21025 }, { "epoch": 17.448962655601658, "grad_norm": 73.41156768798828, "learning_rate": 1.3024398340248962e-05, "loss": 0.759, "step": 21026 }, { "epoch": 17.44979253112033, "grad_norm": 75.01520538330078, "learning_rate": 1.3024066390041494e-05, "loss": 0.5258, "step": 21027 }, { "epoch": 17.450622406639003, "grad_norm": 52.71560287475586, "learning_rate": 1.3023734439834026e-05, "loss": 0.8674, "step": 21028 }, { "epoch": 17.451452282157675, "grad_norm": 45.39691162109375, "learning_rate": 1.3023402489626558e-05, "loss": 0.915, "step": 21029 }, { "epoch": 17.452282157676347, "grad_norm": 27.083606719970703, "learning_rate": 1.3023070539419089e-05, "loss": 0.5231, "step": 21030 }, { "epoch": 17.45311203319502, "grad_norm": 42.65047073364258, "learning_rate": 1.302273858921162e-05, "loss": 0.7458, "step": 21031 }, { "epoch": 17.45394190871369, "grad_norm": 16.858781814575195, "learning_rate": 1.302240663900415e-05, "loss": 0.3469, "step": 21032 }, { "epoch": 17.454771784232364, "grad_norm": 34.577796936035156, "learning_rate": 1.3022074688796682e-05, "loss": 0.637, "step": 21033 }, { "epoch": 17.455601659751036, "grad_norm": 33.55061721801758, "learning_rate": 1.3021742738589214e-05, "loss": 0.7758, "step": 21034 }, { "epoch": 17.456431535269708, "grad_norm": 49.674564361572266, "learning_rate": 1.3021410788381743e-05, "loss": 0.6794, "step": 21035 }, { "epoch": 17.45726141078838, "grad_norm": 36.33512496948242, "learning_rate": 1.3021078838174275e-05, "loss": 0.9454, "step": 21036 }, { "epoch": 17.458091286307052, "grad_norm": 51.558719635009766, "learning_rate": 1.3020746887966807e-05, "loss": 0.4811, "step": 21037 }, { "epoch": 17.458921161825725, "grad_norm": 22.061548233032227, "learning_rate": 1.3020414937759337e-05, "loss": 0.5016, "step": 21038 }, { "epoch": 17.459751037344397, "grad_norm": 111.0943374633789, "learning_rate": 1.3020082987551868e-05, "loss": 1.8427, "step": 21039 }, { "epoch": 17.46058091286307, "grad_norm": 53.967002868652344, "learning_rate": 1.30197510373444e-05, "loss": 0.9516, "step": 21040 }, { "epoch": 17.46141078838174, "grad_norm": 43.10416030883789, "learning_rate": 1.301941908713693e-05, "loss": 1.002, "step": 21041 }, { "epoch": 17.462240663900413, "grad_norm": 57.569374084472656, "learning_rate": 1.3019087136929462e-05, "loss": 0.7759, "step": 21042 }, { "epoch": 17.463070539419085, "grad_norm": 41.668949127197266, "learning_rate": 1.3018755186721993e-05, "loss": 0.651, "step": 21043 }, { "epoch": 17.463900414937758, "grad_norm": 33.09930419921875, "learning_rate": 1.3018423236514523e-05, "loss": 0.4857, "step": 21044 }, { "epoch": 17.46473029045643, "grad_norm": 56.70696258544922, "learning_rate": 1.3018091286307055e-05, "loss": 0.796, "step": 21045 }, { "epoch": 17.465560165975102, "grad_norm": 26.575546264648438, "learning_rate": 1.3017759336099587e-05, "loss": 0.4524, "step": 21046 }, { "epoch": 17.466390041493774, "grad_norm": 63.181304931640625, "learning_rate": 1.3017427385892116e-05, "loss": 0.5246, "step": 21047 }, { "epoch": 17.467219917012446, "grad_norm": 23.29679298400879, "learning_rate": 1.3017095435684648e-05, "loss": 0.5403, "step": 21048 }, { "epoch": 17.46804979253112, "grad_norm": 30.39276695251465, "learning_rate": 1.301676348547718e-05, "loss": 0.5264, "step": 21049 }, { "epoch": 17.46887966804979, "grad_norm": 73.92263793945312, "learning_rate": 1.301643153526971e-05, "loss": 0.4292, "step": 21050 }, { "epoch": 17.469709543568463, "grad_norm": 38.08555221557617, "learning_rate": 1.3016099585062241e-05, "loss": 0.7405, "step": 21051 }, { "epoch": 17.470539419087135, "grad_norm": 54.192352294921875, "learning_rate": 1.3015767634854772e-05, "loss": 0.5656, "step": 21052 }, { "epoch": 17.471369294605807, "grad_norm": 97.5355453491211, "learning_rate": 1.3015435684647304e-05, "loss": 0.715, "step": 21053 }, { "epoch": 17.47219917012448, "grad_norm": 29.373157501220703, "learning_rate": 1.3015103734439836e-05, "loss": 0.6063, "step": 21054 }, { "epoch": 17.473029045643152, "grad_norm": 47.33827209472656, "learning_rate": 1.3014771784232368e-05, "loss": 0.4805, "step": 21055 }, { "epoch": 17.473858921161824, "grad_norm": 32.15505599975586, "learning_rate": 1.3014439834024897e-05, "loss": 0.4266, "step": 21056 }, { "epoch": 17.474688796680496, "grad_norm": 56.55694580078125, "learning_rate": 1.3014107883817429e-05, "loss": 0.6188, "step": 21057 }, { "epoch": 17.47551867219917, "grad_norm": 38.398475646972656, "learning_rate": 1.301377593360996e-05, "loss": 1.0173, "step": 21058 }, { "epoch": 17.47634854771784, "grad_norm": 26.22679328918457, "learning_rate": 1.3013443983402491e-05, "loss": 0.3317, "step": 21059 }, { "epoch": 17.477178423236513, "grad_norm": 33.330413818359375, "learning_rate": 1.3013112033195022e-05, "loss": 1.0221, "step": 21060 }, { "epoch": 17.478008298755185, "grad_norm": 33.038944244384766, "learning_rate": 1.3012780082987552e-05, "loss": 0.5007, "step": 21061 }, { "epoch": 17.478838174273857, "grad_norm": 24.884477615356445, "learning_rate": 1.3012448132780084e-05, "loss": 0.4909, "step": 21062 }, { "epoch": 17.47966804979253, "grad_norm": 86.44589233398438, "learning_rate": 1.3012116182572616e-05, "loss": 0.8794, "step": 21063 }, { "epoch": 17.4804979253112, "grad_norm": 42.796695709228516, "learning_rate": 1.3011784232365145e-05, "loss": 0.3657, "step": 21064 }, { "epoch": 17.481327800829874, "grad_norm": 18.73896598815918, "learning_rate": 1.3011452282157677e-05, "loss": 0.3756, "step": 21065 }, { "epoch": 17.482157676348546, "grad_norm": 19.15943717956543, "learning_rate": 1.301112033195021e-05, "loss": 0.5396, "step": 21066 }, { "epoch": 17.482987551867218, "grad_norm": 27.257131576538086, "learning_rate": 1.3010788381742741e-05, "loss": 0.4398, "step": 21067 }, { "epoch": 17.48381742738589, "grad_norm": 55.804351806640625, "learning_rate": 1.301045643153527e-05, "loss": 0.5746, "step": 21068 }, { "epoch": 17.484647302904563, "grad_norm": 47.303287506103516, "learning_rate": 1.3010124481327802e-05, "loss": 0.8535, "step": 21069 }, { "epoch": 17.485477178423235, "grad_norm": 71.62890625, "learning_rate": 1.3009792531120333e-05, "loss": 0.8215, "step": 21070 }, { "epoch": 17.486307053941907, "grad_norm": 70.94807434082031, "learning_rate": 1.3009460580912865e-05, "loss": 1.0884, "step": 21071 }, { "epoch": 17.48713692946058, "grad_norm": 29.30878448486328, "learning_rate": 1.3009128630705395e-05, "loss": 0.5243, "step": 21072 }, { "epoch": 17.48796680497925, "grad_norm": 75.34037780761719, "learning_rate": 1.3008796680497925e-05, "loss": 0.7305, "step": 21073 }, { "epoch": 17.488796680497924, "grad_norm": 16.919694900512695, "learning_rate": 1.3008464730290458e-05, "loss": 0.277, "step": 21074 }, { "epoch": 17.489626556016596, "grad_norm": 28.556381225585938, "learning_rate": 1.300813278008299e-05, "loss": 0.4545, "step": 21075 }, { "epoch": 17.490456431535268, "grad_norm": 23.744783401489258, "learning_rate": 1.3007800829875518e-05, "loss": 0.6855, "step": 21076 }, { "epoch": 17.49128630705394, "grad_norm": 37.34202194213867, "learning_rate": 1.300746887966805e-05, "loss": 0.5359, "step": 21077 }, { "epoch": 17.492116182572612, "grad_norm": 42.9633674621582, "learning_rate": 1.3007136929460583e-05, "loss": 0.6856, "step": 21078 }, { "epoch": 17.492946058091285, "grad_norm": 42.76778793334961, "learning_rate": 1.3006804979253113e-05, "loss": 0.8231, "step": 21079 }, { "epoch": 17.49377593360996, "grad_norm": 33.47832489013672, "learning_rate": 1.3006473029045643e-05, "loss": 0.4375, "step": 21080 }, { "epoch": 17.49460580912863, "grad_norm": 27.229999542236328, "learning_rate": 1.3006141078838176e-05, "loss": 0.5755, "step": 21081 }, { "epoch": 17.495435684647305, "grad_norm": 41.62232208251953, "learning_rate": 1.3005809128630706e-05, "loss": 0.5356, "step": 21082 }, { "epoch": 17.496265560165973, "grad_norm": 48.526336669921875, "learning_rate": 1.3005477178423238e-05, "loss": 0.3858, "step": 21083 }, { "epoch": 17.49709543568465, "grad_norm": 59.016021728515625, "learning_rate": 1.300514522821577e-05, "loss": 0.9585, "step": 21084 }, { "epoch": 17.497925311203318, "grad_norm": 17.577104568481445, "learning_rate": 1.3004813278008299e-05, "loss": 0.3129, "step": 21085 }, { "epoch": 17.498755186721993, "grad_norm": 24.912750244140625, "learning_rate": 1.3004481327800831e-05, "loss": 0.5616, "step": 21086 }, { "epoch": 17.499585062240662, "grad_norm": 49.86006164550781, "learning_rate": 1.3004149377593363e-05, "loss": 0.8075, "step": 21087 }, { "epoch": 17.500414937759338, "grad_norm": 43.00465774536133, "learning_rate": 1.3003817427385894e-05, "loss": 0.5679, "step": 21088 }, { "epoch": 17.501244813278007, "grad_norm": 34.71257019042969, "learning_rate": 1.3003485477178424e-05, "loss": 0.5569, "step": 21089 }, { "epoch": 17.502074688796682, "grad_norm": 25.41893768310547, "learning_rate": 1.3003153526970956e-05, "loss": 0.4756, "step": 21090 }, { "epoch": 17.50290456431535, "grad_norm": 36.30864715576172, "learning_rate": 1.3002821576763486e-05, "loss": 0.7738, "step": 21091 }, { "epoch": 17.503734439834027, "grad_norm": 26.51950454711914, "learning_rate": 1.3002489626556019e-05, "loss": 0.6924, "step": 21092 }, { "epoch": 17.504564315352695, "grad_norm": 59.91584014892578, "learning_rate": 1.3002157676348547e-05, "loss": 0.6976, "step": 21093 }, { "epoch": 17.50539419087137, "grad_norm": 35.14752960205078, "learning_rate": 1.300182572614108e-05, "loss": 0.5624, "step": 21094 }, { "epoch": 17.50622406639004, "grad_norm": 28.647031784057617, "learning_rate": 1.3001493775933611e-05, "loss": 0.4572, "step": 21095 }, { "epoch": 17.507053941908715, "grad_norm": 131.83103942871094, "learning_rate": 1.3001161825726144e-05, "loss": 1.1803, "step": 21096 }, { "epoch": 17.507883817427384, "grad_norm": 55.464351654052734, "learning_rate": 1.3000829875518672e-05, "loss": 0.865, "step": 21097 }, { "epoch": 17.50871369294606, "grad_norm": 179.6553192138672, "learning_rate": 1.3000497925311204e-05, "loss": 0.9883, "step": 21098 }, { "epoch": 17.50954356846473, "grad_norm": 43.126739501953125, "learning_rate": 1.3000165975103735e-05, "loss": 0.3896, "step": 21099 }, { "epoch": 17.510373443983404, "grad_norm": 37.5101318359375, "learning_rate": 1.2999834024896267e-05, "loss": 0.6483, "step": 21100 }, { "epoch": 17.511203319502073, "grad_norm": 30.38300132751465, "learning_rate": 1.2999502074688797e-05, "loss": 0.4297, "step": 21101 }, { "epoch": 17.51203319502075, "grad_norm": 44.90464401245117, "learning_rate": 1.2999170124481328e-05, "loss": 0.5742, "step": 21102 }, { "epoch": 17.512863070539417, "grad_norm": 36.52410888671875, "learning_rate": 1.299883817427386e-05, "loss": 0.7513, "step": 21103 }, { "epoch": 17.513692946058093, "grad_norm": 31.1029052734375, "learning_rate": 1.2998506224066392e-05, "loss": 0.7603, "step": 21104 }, { "epoch": 17.51452282157676, "grad_norm": 30.322879791259766, "learning_rate": 1.299817427385892e-05, "loss": 0.6422, "step": 21105 }, { "epoch": 17.515352697095437, "grad_norm": 32.24488067626953, "learning_rate": 1.2997842323651453e-05, "loss": 0.8352, "step": 21106 }, { "epoch": 17.51618257261411, "grad_norm": 57.49036407470703, "learning_rate": 1.2997510373443985e-05, "loss": 0.9431, "step": 21107 }, { "epoch": 17.517012448132782, "grad_norm": 31.122215270996094, "learning_rate": 1.2997178423236515e-05, "loss": 0.6999, "step": 21108 }, { "epoch": 17.517842323651454, "grad_norm": 38.60524368286133, "learning_rate": 1.2996846473029047e-05, "loss": 0.8098, "step": 21109 }, { "epoch": 17.518672199170126, "grad_norm": 36.4984130859375, "learning_rate": 1.2996514522821578e-05, "loss": 0.4381, "step": 21110 }, { "epoch": 17.5195020746888, "grad_norm": 119.06967163085938, "learning_rate": 1.2996182572614108e-05, "loss": 0.8205, "step": 21111 }, { "epoch": 17.52033195020747, "grad_norm": 20.64191436767578, "learning_rate": 1.299585062240664e-05, "loss": 0.4272, "step": 21112 }, { "epoch": 17.521161825726143, "grad_norm": 82.64745330810547, "learning_rate": 1.2995518672199172e-05, "loss": 1.5546, "step": 21113 }, { "epoch": 17.521991701244815, "grad_norm": 56.21860122680664, "learning_rate": 1.2995186721991701e-05, "loss": 0.8719, "step": 21114 }, { "epoch": 17.522821576763487, "grad_norm": 51.90513610839844, "learning_rate": 1.2994854771784233e-05, "loss": 0.4374, "step": 21115 }, { "epoch": 17.52365145228216, "grad_norm": 40.68312072753906, "learning_rate": 1.2994522821576765e-05, "loss": 0.5173, "step": 21116 }, { "epoch": 17.52448132780083, "grad_norm": 36.11193084716797, "learning_rate": 1.2994190871369296e-05, "loss": 0.6189, "step": 21117 }, { "epoch": 17.525311203319504, "grad_norm": 107.25447845458984, "learning_rate": 1.2993858921161826e-05, "loss": 0.9353, "step": 21118 }, { "epoch": 17.526141078838176, "grad_norm": 36.85802459716797, "learning_rate": 1.2993526970954358e-05, "loss": 0.4801, "step": 21119 }, { "epoch": 17.526970954356848, "grad_norm": 89.3031005859375, "learning_rate": 1.2993195020746889e-05, "loss": 0.5219, "step": 21120 }, { "epoch": 17.52780082987552, "grad_norm": 60.54364776611328, "learning_rate": 1.2992863070539421e-05, "loss": 1.1301, "step": 21121 }, { "epoch": 17.528630705394193, "grad_norm": 18.464935302734375, "learning_rate": 1.299253112033195e-05, "loss": 0.4817, "step": 21122 }, { "epoch": 17.529460580912865, "grad_norm": 58.23026657104492, "learning_rate": 1.2992199170124482e-05, "loss": 0.8011, "step": 21123 }, { "epoch": 17.530290456431537, "grad_norm": 31.3060245513916, "learning_rate": 1.2991867219917014e-05, "loss": 0.2923, "step": 21124 }, { "epoch": 17.53112033195021, "grad_norm": 39.6594352722168, "learning_rate": 1.2991535269709546e-05, "loss": 0.8062, "step": 21125 }, { "epoch": 17.53195020746888, "grad_norm": 42.1220588684082, "learning_rate": 1.2991203319502075e-05, "loss": 0.7042, "step": 21126 }, { "epoch": 17.532780082987554, "grad_norm": 16.116838455200195, "learning_rate": 1.2990871369294607e-05, "loss": 0.3554, "step": 21127 }, { "epoch": 17.533609958506226, "grad_norm": 46.389644622802734, "learning_rate": 1.2990539419087139e-05, "loss": 0.4781, "step": 21128 }, { "epoch": 17.534439834024898, "grad_norm": 29.811410903930664, "learning_rate": 1.299020746887967e-05, "loss": 0.6596, "step": 21129 }, { "epoch": 17.53526970954357, "grad_norm": 109.2185287475586, "learning_rate": 1.29898755186722e-05, "loss": 0.8636, "step": 21130 }, { "epoch": 17.536099585062242, "grad_norm": 18.74613380432129, "learning_rate": 1.298954356846473e-05, "loss": 0.3442, "step": 21131 }, { "epoch": 17.536929460580915, "grad_norm": 38.18446731567383, "learning_rate": 1.2989211618257262e-05, "loss": 0.4872, "step": 21132 }, { "epoch": 17.537759336099587, "grad_norm": 70.33405303955078, "learning_rate": 1.2988879668049794e-05, "loss": 1.0168, "step": 21133 }, { "epoch": 17.53858921161826, "grad_norm": 117.15707397460938, "learning_rate": 1.2988547717842326e-05, "loss": 1.0065, "step": 21134 }, { "epoch": 17.53941908713693, "grad_norm": 48.594268798828125, "learning_rate": 1.2988215767634855e-05, "loss": 0.4133, "step": 21135 }, { "epoch": 17.540248962655603, "grad_norm": 78.95072174072266, "learning_rate": 1.2987883817427387e-05, "loss": 0.912, "step": 21136 }, { "epoch": 17.541078838174275, "grad_norm": 43.32381820678711, "learning_rate": 1.298755186721992e-05, "loss": 0.3385, "step": 21137 }, { "epoch": 17.541908713692948, "grad_norm": 52.850494384765625, "learning_rate": 1.298721991701245e-05, "loss": 1.2284, "step": 21138 }, { "epoch": 17.54273858921162, "grad_norm": 27.228527069091797, "learning_rate": 1.298688796680498e-05, "loss": 0.2811, "step": 21139 }, { "epoch": 17.543568464730292, "grad_norm": 96.84933471679688, "learning_rate": 1.298655601659751e-05, "loss": 0.7844, "step": 21140 }, { "epoch": 17.544398340248964, "grad_norm": 55.81489181518555, "learning_rate": 1.2986224066390043e-05, "loss": 0.4428, "step": 21141 }, { "epoch": 17.545228215767636, "grad_norm": 81.73381805419922, "learning_rate": 1.2985892116182575e-05, "loss": 0.9748, "step": 21142 }, { "epoch": 17.54605809128631, "grad_norm": 39.73832702636719, "learning_rate": 1.2985560165975104e-05, "loss": 0.5753, "step": 21143 }, { "epoch": 17.54688796680498, "grad_norm": 41.572235107421875, "learning_rate": 1.2985228215767636e-05, "loss": 0.5306, "step": 21144 }, { "epoch": 17.547717842323653, "grad_norm": 32.254417419433594, "learning_rate": 1.2984896265560168e-05, "loss": 0.505, "step": 21145 }, { "epoch": 17.548547717842325, "grad_norm": 39.45062255859375, "learning_rate": 1.29845643153527e-05, "loss": 0.784, "step": 21146 }, { "epoch": 17.549377593360997, "grad_norm": 26.590591430664062, "learning_rate": 1.2984232365145229e-05, "loss": 0.501, "step": 21147 }, { "epoch": 17.55020746887967, "grad_norm": 22.81784439086914, "learning_rate": 1.298390041493776e-05, "loss": 0.3218, "step": 21148 }, { "epoch": 17.551037344398342, "grad_norm": 21.968814849853516, "learning_rate": 1.2983568464730291e-05, "loss": 0.3914, "step": 21149 }, { "epoch": 17.551867219917014, "grad_norm": 87.36023712158203, "learning_rate": 1.2983236514522823e-05, "loss": 0.9277, "step": 21150 }, { "epoch": 17.552697095435686, "grad_norm": 50.496315002441406, "learning_rate": 1.2982904564315354e-05, "loss": 0.4124, "step": 21151 }, { "epoch": 17.55352697095436, "grad_norm": 50.86601638793945, "learning_rate": 1.2982572614107884e-05, "loss": 0.8384, "step": 21152 }, { "epoch": 17.55435684647303, "grad_norm": 53.01044464111328, "learning_rate": 1.2982240663900416e-05, "loss": 0.5609, "step": 21153 }, { "epoch": 17.555186721991703, "grad_norm": 27.845848083496094, "learning_rate": 1.2981908713692948e-05, "loss": 0.5774, "step": 21154 }, { "epoch": 17.556016597510375, "grad_norm": 40.05671310424805, "learning_rate": 1.2981576763485477e-05, "loss": 0.6167, "step": 21155 }, { "epoch": 17.556846473029047, "grad_norm": 49.45063400268555, "learning_rate": 1.2981244813278009e-05, "loss": 0.7108, "step": 21156 }, { "epoch": 17.55767634854772, "grad_norm": 48.73021697998047, "learning_rate": 1.2980912863070541e-05, "loss": 0.7397, "step": 21157 }, { "epoch": 17.55850622406639, "grad_norm": 50.8307991027832, "learning_rate": 1.2980580912863072e-05, "loss": 0.5015, "step": 21158 }, { "epoch": 17.559336099585064, "grad_norm": 38.744606018066406, "learning_rate": 1.2980248962655602e-05, "loss": 0.5343, "step": 21159 }, { "epoch": 17.560165975103736, "grad_norm": 46.9560546875, "learning_rate": 1.2979917012448134e-05, "loss": 0.7531, "step": 21160 }, { "epoch": 17.560995850622408, "grad_norm": 38.09792709350586, "learning_rate": 1.2979585062240665e-05, "loss": 0.5182, "step": 21161 }, { "epoch": 17.56182572614108, "grad_norm": 30.77906036376953, "learning_rate": 1.2979253112033197e-05, "loss": 0.7523, "step": 21162 }, { "epoch": 17.562655601659753, "grad_norm": 53.6708984375, "learning_rate": 1.2978921161825729e-05, "loss": 0.9935, "step": 21163 }, { "epoch": 17.563485477178425, "grad_norm": 8.381134033203125, "learning_rate": 1.2978589211618257e-05, "loss": 0.3432, "step": 21164 }, { "epoch": 17.564315352697097, "grad_norm": 42.07711410522461, "learning_rate": 1.297825726141079e-05, "loss": 0.4871, "step": 21165 }, { "epoch": 17.56514522821577, "grad_norm": 38.04590606689453, "learning_rate": 1.2977925311203322e-05, "loss": 0.8123, "step": 21166 }, { "epoch": 17.56597510373444, "grad_norm": 44.054744720458984, "learning_rate": 1.2977593360995852e-05, "loss": 1.1981, "step": 21167 }, { "epoch": 17.566804979253114, "grad_norm": 22.619054794311523, "learning_rate": 1.2977261410788383e-05, "loss": 0.4325, "step": 21168 }, { "epoch": 17.567634854771786, "grad_norm": 39.41309356689453, "learning_rate": 1.2976929460580913e-05, "loss": 1.0569, "step": 21169 }, { "epoch": 17.568464730290458, "grad_norm": 40.587005615234375, "learning_rate": 1.2976597510373445e-05, "loss": 0.5625, "step": 21170 }, { "epoch": 17.56929460580913, "grad_norm": 67.41061401367188, "learning_rate": 1.2976265560165977e-05, "loss": 1.1621, "step": 21171 }, { "epoch": 17.570124481327802, "grad_norm": 65.72297668457031, "learning_rate": 1.2975933609958506e-05, "loss": 0.5945, "step": 21172 }, { "epoch": 17.570954356846475, "grad_norm": 35.542877197265625, "learning_rate": 1.2975601659751038e-05, "loss": 0.9602, "step": 21173 }, { "epoch": 17.571784232365147, "grad_norm": 35.15875244140625, "learning_rate": 1.297526970954357e-05, "loss": 0.3787, "step": 21174 }, { "epoch": 17.57261410788382, "grad_norm": 66.74211120605469, "learning_rate": 1.2974937759336102e-05, "loss": 1.2373, "step": 21175 }, { "epoch": 17.57344398340249, "grad_norm": 34.11564254760742, "learning_rate": 1.2974605809128631e-05, "loss": 0.7063, "step": 21176 }, { "epoch": 17.574273858921163, "grad_norm": 29.415672302246094, "learning_rate": 1.2974273858921163e-05, "loss": 0.6132, "step": 21177 }, { "epoch": 17.575103734439836, "grad_norm": 58.204071044921875, "learning_rate": 1.2973941908713693e-05, "loss": 0.4933, "step": 21178 }, { "epoch": 17.575933609958508, "grad_norm": 50.416473388671875, "learning_rate": 1.2973609958506226e-05, "loss": 1.0961, "step": 21179 }, { "epoch": 17.57676348547718, "grad_norm": 53.50723648071289, "learning_rate": 1.2973278008298756e-05, "loss": 0.9474, "step": 21180 }, { "epoch": 17.577593360995852, "grad_norm": 27.91851234436035, "learning_rate": 1.2972946058091286e-05, "loss": 0.37, "step": 21181 }, { "epoch": 17.578423236514524, "grad_norm": 16.864158630371094, "learning_rate": 1.2972614107883818e-05, "loss": 0.4794, "step": 21182 }, { "epoch": 17.579253112033197, "grad_norm": 36.781307220458984, "learning_rate": 1.297228215767635e-05, "loss": 0.5207, "step": 21183 }, { "epoch": 17.58008298755187, "grad_norm": 32.131317138671875, "learning_rate": 1.297195020746888e-05, "loss": 0.815, "step": 21184 }, { "epoch": 17.58091286307054, "grad_norm": 90.718505859375, "learning_rate": 1.2971618257261411e-05, "loss": 0.4743, "step": 21185 }, { "epoch": 17.581742738589213, "grad_norm": 131.2760009765625, "learning_rate": 1.2971286307053943e-05, "loss": 1.1576, "step": 21186 }, { "epoch": 17.582572614107885, "grad_norm": 24.526914596557617, "learning_rate": 1.2970954356846474e-05, "loss": 0.3754, "step": 21187 }, { "epoch": 17.583402489626557, "grad_norm": 46.0498161315918, "learning_rate": 1.2970622406639006e-05, "loss": 1.1525, "step": 21188 }, { "epoch": 17.58423236514523, "grad_norm": 25.86608123779297, "learning_rate": 1.2970290456431536e-05, "loss": 0.5365, "step": 21189 }, { "epoch": 17.585062240663902, "grad_norm": 60.02025604248047, "learning_rate": 1.2969958506224067e-05, "loss": 1.1475, "step": 21190 }, { "epoch": 17.585892116182574, "grad_norm": 8.069499015808105, "learning_rate": 1.2969626556016599e-05, "loss": 0.2466, "step": 21191 }, { "epoch": 17.586721991701246, "grad_norm": 35.37647247314453, "learning_rate": 1.2969294605809131e-05, "loss": 0.5589, "step": 21192 }, { "epoch": 17.58755186721992, "grad_norm": 21.596242904663086, "learning_rate": 1.296896265560166e-05, "loss": 0.419, "step": 21193 }, { "epoch": 17.58838174273859, "grad_norm": 28.876623153686523, "learning_rate": 1.2968630705394192e-05, "loss": 0.8481, "step": 21194 }, { "epoch": 17.589211618257263, "grad_norm": 29.74294662475586, "learning_rate": 1.2968298755186724e-05, "loss": 0.4219, "step": 21195 }, { "epoch": 17.590041493775935, "grad_norm": 29.72373390197754, "learning_rate": 1.2967966804979254e-05, "loss": 0.5093, "step": 21196 }, { "epoch": 17.590871369294607, "grad_norm": 84.3162612915039, "learning_rate": 1.2967634854771785e-05, "loss": 0.5959, "step": 21197 }, { "epoch": 17.59170124481328, "grad_norm": 41.8980598449707, "learning_rate": 1.2967302904564317e-05, "loss": 0.8447, "step": 21198 }, { "epoch": 17.59253112033195, "grad_norm": 32.37068176269531, "learning_rate": 1.2966970954356847e-05, "loss": 0.6468, "step": 21199 }, { "epoch": 17.593360995850624, "grad_norm": 20.746532440185547, "learning_rate": 1.296663900414938e-05, "loss": 0.3151, "step": 21200 }, { "epoch": 17.594190871369296, "grad_norm": 41.86631774902344, "learning_rate": 1.2966307053941908e-05, "loss": 0.7798, "step": 21201 }, { "epoch": 17.59502074688797, "grad_norm": 34.0682373046875, "learning_rate": 1.296597510373444e-05, "loss": 0.9853, "step": 21202 }, { "epoch": 17.59585062240664, "grad_norm": 30.18744468688965, "learning_rate": 1.2965643153526972e-05, "loss": 0.5359, "step": 21203 }, { "epoch": 17.596680497925313, "grad_norm": 65.12092590332031, "learning_rate": 1.2965311203319504e-05, "loss": 0.6966, "step": 21204 }, { "epoch": 17.597510373443985, "grad_norm": 59.04705047607422, "learning_rate": 1.2964979253112033e-05, "loss": 1.1402, "step": 21205 }, { "epoch": 17.598340248962657, "grad_norm": 63.71128463745117, "learning_rate": 1.2964647302904565e-05, "loss": 1.0339, "step": 21206 }, { "epoch": 17.59917012448133, "grad_norm": 48.264713287353516, "learning_rate": 1.2964315352697097e-05, "loss": 0.8808, "step": 21207 }, { "epoch": 17.6, "grad_norm": 29.3533935546875, "learning_rate": 1.2963983402489628e-05, "loss": 0.4199, "step": 21208 }, { "epoch": 17.600829875518674, "grad_norm": 49.948978424072266, "learning_rate": 1.2963651452282158e-05, "loss": 1.0054, "step": 21209 }, { "epoch": 17.601659751037346, "grad_norm": 42.40326690673828, "learning_rate": 1.2963319502074689e-05, "loss": 0.4616, "step": 21210 }, { "epoch": 17.602489626556018, "grad_norm": 45.270423889160156, "learning_rate": 1.296298755186722e-05, "loss": 0.5544, "step": 21211 }, { "epoch": 17.60331950207469, "grad_norm": 62.61103057861328, "learning_rate": 1.2962655601659753e-05, "loss": 0.541, "step": 21212 }, { "epoch": 17.604149377593362, "grad_norm": 35.42201614379883, "learning_rate": 1.2962323651452282e-05, "loss": 0.3859, "step": 21213 }, { "epoch": 17.604979253112035, "grad_norm": 25.794631958007812, "learning_rate": 1.2961991701244814e-05, "loss": 0.4198, "step": 21214 }, { "epoch": 17.605809128630707, "grad_norm": 55.019691467285156, "learning_rate": 1.2961659751037346e-05, "loss": 1.1312, "step": 21215 }, { "epoch": 17.60663900414938, "grad_norm": 16.2051944732666, "learning_rate": 1.2961327800829876e-05, "loss": 0.3018, "step": 21216 }, { "epoch": 17.60746887966805, "grad_norm": 38.26075744628906, "learning_rate": 1.2960995850622408e-05, "loss": 0.5091, "step": 21217 }, { "epoch": 17.608298755186723, "grad_norm": 36.1803092956543, "learning_rate": 1.2960663900414939e-05, "loss": 0.5231, "step": 21218 }, { "epoch": 17.609128630705396, "grad_norm": 50.07362747192383, "learning_rate": 1.296033195020747e-05, "loss": 0.6754, "step": 21219 }, { "epoch": 17.609958506224068, "grad_norm": 23.144445419311523, "learning_rate": 1.2960000000000001e-05, "loss": 0.4172, "step": 21220 }, { "epoch": 17.61078838174274, "grad_norm": 48.161495208740234, "learning_rate": 1.2959668049792533e-05, "loss": 0.9599, "step": 21221 }, { "epoch": 17.611618257261412, "grad_norm": 80.68302154541016, "learning_rate": 1.2959336099585062e-05, "loss": 0.9003, "step": 21222 }, { "epoch": 17.612448132780084, "grad_norm": 57.69939422607422, "learning_rate": 1.2959004149377594e-05, "loss": 0.8926, "step": 21223 }, { "epoch": 17.613278008298757, "grad_norm": 86.83167266845703, "learning_rate": 1.2958672199170126e-05, "loss": 0.7982, "step": 21224 }, { "epoch": 17.61410788381743, "grad_norm": 36.414955139160156, "learning_rate": 1.2958340248962657e-05, "loss": 0.3616, "step": 21225 }, { "epoch": 17.6149377593361, "grad_norm": 17.60011100769043, "learning_rate": 1.2958008298755187e-05, "loss": 0.3129, "step": 21226 }, { "epoch": 17.615767634854773, "grad_norm": 42.23982238769531, "learning_rate": 1.295767634854772e-05, "loss": 1.0612, "step": 21227 }, { "epoch": 17.616597510373445, "grad_norm": 22.841283798217773, "learning_rate": 1.295734439834025e-05, "loss": 0.4679, "step": 21228 }, { "epoch": 17.617427385892118, "grad_norm": 60.61336898803711, "learning_rate": 1.2957012448132782e-05, "loss": 0.5516, "step": 21229 }, { "epoch": 17.61825726141079, "grad_norm": 49.42547607421875, "learning_rate": 1.2956680497925312e-05, "loss": 1.0319, "step": 21230 }, { "epoch": 17.619087136929462, "grad_norm": 61.18234634399414, "learning_rate": 1.2956348547717843e-05, "loss": 1.0129, "step": 21231 }, { "epoch": 17.619917012448134, "grad_norm": 50.44778823852539, "learning_rate": 1.2956016597510375e-05, "loss": 0.9062, "step": 21232 }, { "epoch": 17.620746887966806, "grad_norm": 17.443687438964844, "learning_rate": 1.2955684647302907e-05, "loss": 0.3383, "step": 21233 }, { "epoch": 17.62157676348548, "grad_norm": 38.176456451416016, "learning_rate": 1.2955352697095436e-05, "loss": 0.7034, "step": 21234 }, { "epoch": 17.62240663900415, "grad_norm": 51.460750579833984, "learning_rate": 1.2955020746887968e-05, "loss": 0.5202, "step": 21235 }, { "epoch": 17.623236514522823, "grad_norm": 46.739044189453125, "learning_rate": 1.29546887966805e-05, "loss": 1.2582, "step": 21236 }, { "epoch": 17.624066390041495, "grad_norm": 41.963897705078125, "learning_rate": 1.295435684647303e-05, "loss": 0.5584, "step": 21237 }, { "epoch": 17.624896265560167, "grad_norm": 38.264137268066406, "learning_rate": 1.295402489626556e-05, "loss": 1.2244, "step": 21238 }, { "epoch": 17.62572614107884, "grad_norm": 54.06499099731445, "learning_rate": 1.2953692946058091e-05, "loss": 0.9836, "step": 21239 }, { "epoch": 17.62655601659751, "grad_norm": 23.40829086303711, "learning_rate": 1.2953360995850623e-05, "loss": 0.5154, "step": 21240 }, { "epoch": 17.627385892116184, "grad_norm": 88.27009582519531, "learning_rate": 1.2953029045643155e-05, "loss": 0.8766, "step": 21241 }, { "epoch": 17.628215767634856, "grad_norm": 41.839447021484375, "learning_rate": 1.2952697095435687e-05, "loss": 0.47, "step": 21242 }, { "epoch": 17.62904564315353, "grad_norm": 29.509300231933594, "learning_rate": 1.2952365145228216e-05, "loss": 0.5025, "step": 21243 }, { "epoch": 17.6298755186722, "grad_norm": 25.43568229675293, "learning_rate": 1.2952033195020748e-05, "loss": 0.7628, "step": 21244 }, { "epoch": 17.630705394190873, "grad_norm": 45.90725326538086, "learning_rate": 1.295170124481328e-05, "loss": 1.0715, "step": 21245 }, { "epoch": 17.631535269709545, "grad_norm": 43.30559158325195, "learning_rate": 1.295136929460581e-05, "loss": 0.6751, "step": 21246 }, { "epoch": 17.632365145228217, "grad_norm": 78.17112731933594, "learning_rate": 1.2951037344398341e-05, "loss": 0.8324, "step": 21247 }, { "epoch": 17.63319502074689, "grad_norm": 15.719155311584473, "learning_rate": 1.2950705394190871e-05, "loss": 0.365, "step": 21248 }, { "epoch": 17.63402489626556, "grad_norm": 81.19586181640625, "learning_rate": 1.2950373443983404e-05, "loss": 0.9995, "step": 21249 }, { "epoch": 17.634854771784234, "grad_norm": 79.31556701660156, "learning_rate": 1.2950041493775936e-05, "loss": 0.663, "step": 21250 }, { "epoch": 17.635684647302906, "grad_norm": 70.88532257080078, "learning_rate": 1.2949709543568464e-05, "loss": 1.2193, "step": 21251 }, { "epoch": 17.636514522821578, "grad_norm": 44.4582633972168, "learning_rate": 1.2949377593360997e-05, "loss": 0.5157, "step": 21252 }, { "epoch": 17.63734439834025, "grad_norm": 26.20313262939453, "learning_rate": 1.2949045643153529e-05, "loss": 0.4024, "step": 21253 }, { "epoch": 17.638174273858922, "grad_norm": 95.88309478759766, "learning_rate": 1.294871369294606e-05, "loss": 0.7996, "step": 21254 }, { "epoch": 17.639004149377595, "grad_norm": 29.796010971069336, "learning_rate": 1.294838174273859e-05, "loss": 0.2918, "step": 21255 }, { "epoch": 17.639834024896267, "grad_norm": 50.654296875, "learning_rate": 1.2948049792531122e-05, "loss": 0.746, "step": 21256 }, { "epoch": 17.64066390041494, "grad_norm": 26.280229568481445, "learning_rate": 1.2947717842323652e-05, "loss": 0.3342, "step": 21257 }, { "epoch": 17.64149377593361, "grad_norm": 73.6968765258789, "learning_rate": 1.2947385892116184e-05, "loss": 0.8271, "step": 21258 }, { "epoch": 17.642323651452283, "grad_norm": 18.60101890563965, "learning_rate": 1.2947053941908715e-05, "loss": 0.5709, "step": 21259 }, { "epoch": 17.643153526970956, "grad_norm": 46.33393478393555, "learning_rate": 1.2946721991701245e-05, "loss": 0.4258, "step": 21260 }, { "epoch": 17.643983402489628, "grad_norm": 43.62903594970703, "learning_rate": 1.2946390041493777e-05, "loss": 0.5524, "step": 21261 }, { "epoch": 17.6448132780083, "grad_norm": 38.58173370361328, "learning_rate": 1.2946058091286309e-05, "loss": 0.6066, "step": 21262 }, { "epoch": 17.645643153526972, "grad_norm": 60.54512023925781, "learning_rate": 1.2945726141078838e-05, "loss": 1.182, "step": 21263 }, { "epoch": 17.646473029045644, "grad_norm": 58.62816619873047, "learning_rate": 1.294539419087137e-05, "loss": 1.0932, "step": 21264 }, { "epoch": 17.647302904564317, "grad_norm": 54.32481384277344, "learning_rate": 1.2945062240663902e-05, "loss": 1.0012, "step": 21265 }, { "epoch": 17.64813278008299, "grad_norm": 50.64128494262695, "learning_rate": 1.2944730290456432e-05, "loss": 0.5423, "step": 21266 }, { "epoch": 17.64896265560166, "grad_norm": 38.65068054199219, "learning_rate": 1.2944398340248965e-05, "loss": 0.4259, "step": 21267 }, { "epoch": 17.649792531120333, "grad_norm": 30.349111557006836, "learning_rate": 1.2944066390041495e-05, "loss": 0.5681, "step": 21268 }, { "epoch": 17.650622406639005, "grad_norm": 30.15411376953125, "learning_rate": 1.2943734439834025e-05, "loss": 0.2808, "step": 21269 }, { "epoch": 17.651452282157678, "grad_norm": 18.73928451538086, "learning_rate": 1.2943402489626558e-05, "loss": 0.3045, "step": 21270 }, { "epoch": 17.65228215767635, "grad_norm": 93.68995666503906, "learning_rate": 1.294307053941909e-05, "loss": 0.7247, "step": 21271 }, { "epoch": 17.653112033195022, "grad_norm": 31.146574020385742, "learning_rate": 1.2942738589211618e-05, "loss": 0.6169, "step": 21272 }, { "epoch": 17.653941908713694, "grad_norm": 55.85266876220703, "learning_rate": 1.294240663900415e-05, "loss": 0.732, "step": 21273 }, { "epoch": 17.654771784232366, "grad_norm": 28.189491271972656, "learning_rate": 1.2942074688796683e-05, "loss": 0.5542, "step": 21274 }, { "epoch": 17.65560165975104, "grad_norm": 25.618408203125, "learning_rate": 1.2941742738589213e-05, "loss": 0.971, "step": 21275 }, { "epoch": 17.65643153526971, "grad_norm": 51.485538482666016, "learning_rate": 1.2941410788381743e-05, "loss": 1.2446, "step": 21276 }, { "epoch": 17.657261410788383, "grad_norm": 18.16900634765625, "learning_rate": 1.2941078838174276e-05, "loss": 0.4049, "step": 21277 }, { "epoch": 17.658091286307055, "grad_norm": 25.105241775512695, "learning_rate": 1.2940746887966806e-05, "loss": 0.4963, "step": 21278 }, { "epoch": 17.658921161825727, "grad_norm": 43.698978424072266, "learning_rate": 1.2940414937759338e-05, "loss": 0.7966, "step": 21279 }, { "epoch": 17.6597510373444, "grad_norm": 16.4930362701416, "learning_rate": 1.2940082987551867e-05, "loss": 0.3166, "step": 21280 }, { "epoch": 17.66058091286307, "grad_norm": 30.35564613342285, "learning_rate": 1.2939751037344399e-05, "loss": 0.8691, "step": 21281 }, { "epoch": 17.661410788381744, "grad_norm": 48.535526275634766, "learning_rate": 1.2939419087136931e-05, "loss": 0.7337, "step": 21282 }, { "epoch": 17.662240663900416, "grad_norm": 45.643333435058594, "learning_rate": 1.2939087136929463e-05, "loss": 0.9161, "step": 21283 }, { "epoch": 17.66307053941909, "grad_norm": 14.239896774291992, "learning_rate": 1.2938755186721992e-05, "loss": 0.3429, "step": 21284 }, { "epoch": 17.66390041493776, "grad_norm": 52.08469772338867, "learning_rate": 1.2938423236514524e-05, "loss": 0.5465, "step": 21285 }, { "epoch": 17.664730290456433, "grad_norm": 47.464263916015625, "learning_rate": 1.2938091286307054e-05, "loss": 1.1919, "step": 21286 }, { "epoch": 17.665560165975105, "grad_norm": 41.166412353515625, "learning_rate": 1.2937759336099586e-05, "loss": 0.5129, "step": 21287 }, { "epoch": 17.666390041493777, "grad_norm": 37.413875579833984, "learning_rate": 1.2937427385892117e-05, "loss": 0.577, "step": 21288 }, { "epoch": 17.66721991701245, "grad_norm": 54.774600982666016, "learning_rate": 1.2937095435684647e-05, "loss": 0.7081, "step": 21289 }, { "epoch": 17.66804979253112, "grad_norm": 30.622949600219727, "learning_rate": 1.293676348547718e-05, "loss": 0.8852, "step": 21290 }, { "epoch": 17.668879668049794, "grad_norm": 47.416873931884766, "learning_rate": 1.2936431535269711e-05, "loss": 0.6942, "step": 21291 }, { "epoch": 17.669709543568466, "grad_norm": 26.267620086669922, "learning_rate": 1.293609958506224e-05, "loss": 0.5271, "step": 21292 }, { "epoch": 17.670539419087138, "grad_norm": 69.85455322265625, "learning_rate": 1.2935767634854772e-05, "loss": 1.0002, "step": 21293 }, { "epoch": 17.67136929460581, "grad_norm": 23.523897171020508, "learning_rate": 1.2935435684647304e-05, "loss": 0.334, "step": 21294 }, { "epoch": 17.672199170124482, "grad_norm": 99.8997802734375, "learning_rate": 1.2935103734439835e-05, "loss": 2.0559, "step": 21295 }, { "epoch": 17.673029045643155, "grad_norm": 26.44883155822754, "learning_rate": 1.2934771784232367e-05, "loss": 0.3593, "step": 21296 }, { "epoch": 17.673858921161827, "grad_norm": 57.07829284667969, "learning_rate": 1.2934439834024897e-05, "loss": 0.7469, "step": 21297 }, { "epoch": 17.6746887966805, "grad_norm": 147.2504425048828, "learning_rate": 1.2934107883817428e-05, "loss": 1.1366, "step": 21298 }, { "epoch": 17.67551867219917, "grad_norm": 44.2268180847168, "learning_rate": 1.293377593360996e-05, "loss": 0.3501, "step": 21299 }, { "epoch": 17.676348547717843, "grad_norm": 71.97200775146484, "learning_rate": 1.2933443983402492e-05, "loss": 0.5527, "step": 21300 }, { "epoch": 17.677178423236516, "grad_norm": 43.0802116394043, "learning_rate": 1.293311203319502e-05, "loss": 0.3669, "step": 21301 }, { "epoch": 17.678008298755188, "grad_norm": 37.24966812133789, "learning_rate": 1.2932780082987553e-05, "loss": 0.8341, "step": 21302 }, { "epoch": 17.67883817427386, "grad_norm": 42.90779113769531, "learning_rate": 1.2932448132780085e-05, "loss": 0.6285, "step": 21303 }, { "epoch": 17.679668049792532, "grad_norm": 37.32575225830078, "learning_rate": 1.2932116182572615e-05, "loss": 0.7285, "step": 21304 }, { "epoch": 17.680497925311204, "grad_norm": 165.09007263183594, "learning_rate": 1.2931784232365146e-05, "loss": 1.6447, "step": 21305 }, { "epoch": 17.681327800829877, "grad_norm": 44.15494155883789, "learning_rate": 1.2931452282157678e-05, "loss": 0.7355, "step": 21306 }, { "epoch": 17.68215767634855, "grad_norm": 31.454605102539062, "learning_rate": 1.2931120331950208e-05, "loss": 0.5412, "step": 21307 }, { "epoch": 17.68298755186722, "grad_norm": 40.75131607055664, "learning_rate": 1.293078838174274e-05, "loss": 0.3202, "step": 21308 }, { "epoch": 17.683817427385893, "grad_norm": 11.065804481506348, "learning_rate": 1.2930456431535269e-05, "loss": 0.269, "step": 21309 }, { "epoch": 17.684647302904565, "grad_norm": 38.40560531616211, "learning_rate": 1.2930124481327801e-05, "loss": 0.5663, "step": 21310 }, { "epoch": 17.685477178423238, "grad_norm": 33.445648193359375, "learning_rate": 1.2929792531120333e-05, "loss": 0.5836, "step": 21311 }, { "epoch": 17.68630705394191, "grad_norm": 40.37431335449219, "learning_rate": 1.2929460580912865e-05, "loss": 1.0449, "step": 21312 }, { "epoch": 17.687136929460582, "grad_norm": 77.7303466796875, "learning_rate": 1.2929128630705394e-05, "loss": 1.2534, "step": 21313 }, { "epoch": 17.687966804979254, "grad_norm": 40.291893005371094, "learning_rate": 1.2928796680497926e-05, "loss": 0.5476, "step": 21314 }, { "epoch": 17.688796680497926, "grad_norm": 10.289053916931152, "learning_rate": 1.2928464730290458e-05, "loss": 0.3703, "step": 21315 }, { "epoch": 17.6896265560166, "grad_norm": 49.72766876220703, "learning_rate": 1.2928132780082989e-05, "loss": 1.3987, "step": 21316 }, { "epoch": 17.69045643153527, "grad_norm": 87.02722930908203, "learning_rate": 1.292780082987552e-05, "loss": 0.5626, "step": 21317 }, { "epoch": 17.691286307053943, "grad_norm": 36.65720748901367, "learning_rate": 1.292746887966805e-05, "loss": 0.6008, "step": 21318 }, { "epoch": 17.692116182572615, "grad_norm": 29.861364364624023, "learning_rate": 1.2927136929460582e-05, "loss": 0.6209, "step": 21319 }, { "epoch": 17.692946058091287, "grad_norm": 73.84566497802734, "learning_rate": 1.2926804979253114e-05, "loss": 0.9026, "step": 21320 }, { "epoch": 17.69377593360996, "grad_norm": 147.99452209472656, "learning_rate": 1.2926473029045646e-05, "loss": 0.7678, "step": 21321 }, { "epoch": 17.694605809128632, "grad_norm": 45.40885925292969, "learning_rate": 1.2926141078838175e-05, "loss": 0.468, "step": 21322 }, { "epoch": 17.695435684647304, "grad_norm": 28.17807388305664, "learning_rate": 1.2925809128630707e-05, "loss": 0.328, "step": 21323 }, { "epoch": 17.696265560165976, "grad_norm": 39.46246337890625, "learning_rate": 1.2925477178423239e-05, "loss": 0.8428, "step": 21324 }, { "epoch": 17.69709543568465, "grad_norm": 47.756778717041016, "learning_rate": 1.292514522821577e-05, "loss": 0.7757, "step": 21325 }, { "epoch": 17.69792531120332, "grad_norm": 56.98556900024414, "learning_rate": 1.29248132780083e-05, "loss": 0.3597, "step": 21326 }, { "epoch": 17.698755186721993, "grad_norm": 41.728912353515625, "learning_rate": 1.292448132780083e-05, "loss": 0.8218, "step": 21327 }, { "epoch": 17.699585062240665, "grad_norm": 20.980852127075195, "learning_rate": 1.2924149377593362e-05, "loss": 0.4058, "step": 21328 }, { "epoch": 17.700414937759337, "grad_norm": 20.942947387695312, "learning_rate": 1.2923817427385894e-05, "loss": 0.4935, "step": 21329 }, { "epoch": 17.70124481327801, "grad_norm": 51.590999603271484, "learning_rate": 1.2923485477178423e-05, "loss": 0.6126, "step": 21330 }, { "epoch": 17.70207468879668, "grad_norm": 52.96318817138672, "learning_rate": 1.2923153526970955e-05, "loss": 1.0331, "step": 21331 }, { "epoch": 17.702904564315354, "grad_norm": 26.011157989501953, "learning_rate": 1.2922821576763487e-05, "loss": 0.6688, "step": 21332 }, { "epoch": 17.703734439834026, "grad_norm": 13.50977897644043, "learning_rate": 1.2922489626556018e-05, "loss": 0.3654, "step": 21333 }, { "epoch": 17.704564315352698, "grad_norm": 41.15022277832031, "learning_rate": 1.2922157676348548e-05, "loss": 1.5259, "step": 21334 }, { "epoch": 17.70539419087137, "grad_norm": 47.179195404052734, "learning_rate": 1.292182572614108e-05, "loss": 0.4034, "step": 21335 }, { "epoch": 17.706224066390043, "grad_norm": 124.58143615722656, "learning_rate": 1.292149377593361e-05, "loss": 1.0051, "step": 21336 }, { "epoch": 17.707053941908715, "grad_norm": 70.02265930175781, "learning_rate": 1.2921161825726143e-05, "loss": 0.8146, "step": 21337 }, { "epoch": 17.707883817427387, "grad_norm": 41.46446990966797, "learning_rate": 1.2920829875518673e-05, "loss": 0.756, "step": 21338 }, { "epoch": 17.70871369294606, "grad_norm": 84.22105407714844, "learning_rate": 1.2920497925311203e-05, "loss": 0.5346, "step": 21339 }, { "epoch": 17.70954356846473, "grad_norm": 34.71220779418945, "learning_rate": 1.2920165975103736e-05, "loss": 0.4998, "step": 21340 }, { "epoch": 17.710373443983404, "grad_norm": 44.88175582885742, "learning_rate": 1.2919834024896268e-05, "loss": 0.5868, "step": 21341 }, { "epoch": 17.711203319502076, "grad_norm": 148.56748962402344, "learning_rate": 1.2919502074688796e-05, "loss": 1.2508, "step": 21342 }, { "epoch": 17.712033195020748, "grad_norm": 41.07445526123047, "learning_rate": 1.2919170124481329e-05, "loss": 1.3782, "step": 21343 }, { "epoch": 17.71286307053942, "grad_norm": 14.692333221435547, "learning_rate": 1.291883817427386e-05, "loss": 0.322, "step": 21344 }, { "epoch": 17.713692946058092, "grad_norm": 45.43960189819336, "learning_rate": 1.2918506224066391e-05, "loss": 0.6514, "step": 21345 }, { "epoch": 17.714522821576764, "grad_norm": 28.925718307495117, "learning_rate": 1.2918174273858923e-05, "loss": 0.5781, "step": 21346 }, { "epoch": 17.715352697095437, "grad_norm": 12.326144218444824, "learning_rate": 1.2917842323651454e-05, "loss": 0.2758, "step": 21347 }, { "epoch": 17.71618257261411, "grad_norm": 43.264671325683594, "learning_rate": 1.2917510373443984e-05, "loss": 0.5255, "step": 21348 }, { "epoch": 17.71701244813278, "grad_norm": 34.73857116699219, "learning_rate": 1.2917178423236516e-05, "loss": 0.5264, "step": 21349 }, { "epoch": 17.717842323651453, "grad_norm": 34.36304473876953, "learning_rate": 1.2916846473029048e-05, "loss": 0.8691, "step": 21350 }, { "epoch": 17.718672199170125, "grad_norm": 50.293697357177734, "learning_rate": 1.2916514522821577e-05, "loss": 0.5644, "step": 21351 }, { "epoch": 17.719502074688798, "grad_norm": 31.18671989440918, "learning_rate": 1.2916182572614109e-05, "loss": 0.5312, "step": 21352 }, { "epoch": 17.72033195020747, "grad_norm": 19.676193237304688, "learning_rate": 1.2915850622406641e-05, "loss": 0.2913, "step": 21353 }, { "epoch": 17.721161825726142, "grad_norm": 15.615130424499512, "learning_rate": 1.2915518672199172e-05, "loss": 0.3424, "step": 21354 }, { "epoch": 17.721991701244814, "grad_norm": 57.74445724487305, "learning_rate": 1.2915186721991702e-05, "loss": 0.7607, "step": 21355 }, { "epoch": 17.722821576763486, "grad_norm": 52.1844596862793, "learning_rate": 1.2914854771784232e-05, "loss": 0.4872, "step": 21356 }, { "epoch": 17.72365145228216, "grad_norm": 18.635190963745117, "learning_rate": 1.2914522821576764e-05, "loss": 0.338, "step": 21357 }, { "epoch": 17.72448132780083, "grad_norm": 31.9537410736084, "learning_rate": 1.2914190871369297e-05, "loss": 0.5823, "step": 21358 }, { "epoch": 17.725311203319503, "grad_norm": 46.53549575805664, "learning_rate": 1.2913858921161825e-05, "loss": 1.1567, "step": 21359 }, { "epoch": 17.726141078838175, "grad_norm": 45.58831024169922, "learning_rate": 1.2913526970954357e-05, "loss": 1.0145, "step": 21360 }, { "epoch": 17.726970954356847, "grad_norm": 89.5632553100586, "learning_rate": 1.291319502074689e-05, "loss": 0.7019, "step": 21361 }, { "epoch": 17.72780082987552, "grad_norm": 62.5386962890625, "learning_rate": 1.2912863070539422e-05, "loss": 0.719, "step": 21362 }, { "epoch": 17.728630705394192, "grad_norm": 27.648038864135742, "learning_rate": 1.291253112033195e-05, "loss": 0.4265, "step": 21363 }, { "epoch": 17.729460580912864, "grad_norm": 57.7659912109375, "learning_rate": 1.2912199170124482e-05, "loss": 1.071, "step": 21364 }, { "epoch": 17.730290456431536, "grad_norm": 29.365365982055664, "learning_rate": 1.2911867219917013e-05, "loss": 0.4601, "step": 21365 }, { "epoch": 17.73112033195021, "grad_norm": 26.01816177368164, "learning_rate": 1.2911535269709545e-05, "loss": 0.4846, "step": 21366 }, { "epoch": 17.73195020746888, "grad_norm": 51.995460510253906, "learning_rate": 1.2911203319502075e-05, "loss": 0.8028, "step": 21367 }, { "epoch": 17.732780082987553, "grad_norm": 56.63298797607422, "learning_rate": 1.2910871369294606e-05, "loss": 1.612, "step": 21368 }, { "epoch": 17.733609958506225, "grad_norm": 22.443004608154297, "learning_rate": 1.2910539419087138e-05, "loss": 0.6474, "step": 21369 }, { "epoch": 17.734439834024897, "grad_norm": 99.46318054199219, "learning_rate": 1.291020746887967e-05, "loss": 0.8603, "step": 21370 }, { "epoch": 17.73526970954357, "grad_norm": 32.039794921875, "learning_rate": 1.2909875518672199e-05, "loss": 0.5507, "step": 21371 }, { "epoch": 17.73609958506224, "grad_norm": 37.00586700439453, "learning_rate": 1.2909543568464731e-05, "loss": 0.7578, "step": 21372 }, { "epoch": 17.736929460580914, "grad_norm": 14.946846961975098, "learning_rate": 1.2909211618257263e-05, "loss": 0.4268, "step": 21373 }, { "epoch": 17.737759336099586, "grad_norm": 38.13934326171875, "learning_rate": 1.2908879668049793e-05, "loss": 1.299, "step": 21374 }, { "epoch": 17.738589211618258, "grad_norm": 80.81364440917969, "learning_rate": 1.2908547717842325e-05, "loss": 1.2461, "step": 21375 }, { "epoch": 17.73941908713693, "grad_norm": 33.31061935424805, "learning_rate": 1.2908215767634856e-05, "loss": 0.4635, "step": 21376 }, { "epoch": 17.740248962655603, "grad_norm": 37.92790985107422, "learning_rate": 1.2907883817427386e-05, "loss": 0.524, "step": 21377 }, { "epoch": 17.741078838174275, "grad_norm": 39.904727935791016, "learning_rate": 1.2907551867219918e-05, "loss": 0.6259, "step": 21378 }, { "epoch": 17.741908713692947, "grad_norm": 29.540342330932617, "learning_rate": 1.290721991701245e-05, "loss": 0.7273, "step": 21379 }, { "epoch": 17.74273858921162, "grad_norm": 39.15315628051758, "learning_rate": 1.290688796680498e-05, "loss": 0.7671, "step": 21380 }, { "epoch": 17.74356846473029, "grad_norm": 18.056398391723633, "learning_rate": 1.2906556016597511e-05, "loss": 0.4028, "step": 21381 }, { "epoch": 17.744398340248964, "grad_norm": 44.59931945800781, "learning_rate": 1.2906224066390043e-05, "loss": 0.6647, "step": 21382 }, { "epoch": 17.745228215767636, "grad_norm": 39.12834167480469, "learning_rate": 1.2905892116182574e-05, "loss": 1.0183, "step": 21383 }, { "epoch": 17.746058091286308, "grad_norm": 59.96345901489258, "learning_rate": 1.2905560165975104e-05, "loss": 1.4651, "step": 21384 }, { "epoch": 17.74688796680498, "grad_norm": 26.51173973083496, "learning_rate": 1.2905228215767636e-05, "loss": 0.2884, "step": 21385 }, { "epoch": 17.747717842323652, "grad_norm": 39.39788818359375, "learning_rate": 1.2904896265560167e-05, "loss": 0.556, "step": 21386 }, { "epoch": 17.748547717842325, "grad_norm": 18.52195930480957, "learning_rate": 1.2904564315352699e-05, "loss": 0.4085, "step": 21387 }, { "epoch": 17.749377593360997, "grad_norm": 23.77589225769043, "learning_rate": 1.2904232365145228e-05, "loss": 0.3016, "step": 21388 }, { "epoch": 17.75020746887967, "grad_norm": 20.925012588500977, "learning_rate": 1.290390041493776e-05, "loss": 0.3282, "step": 21389 }, { "epoch": 17.75103734439834, "grad_norm": 154.89959716796875, "learning_rate": 1.2903568464730292e-05, "loss": 1.801, "step": 21390 }, { "epoch": 17.751867219917013, "grad_norm": 70.73370361328125, "learning_rate": 1.2903236514522824e-05, "loss": 0.85, "step": 21391 }, { "epoch": 17.752697095435686, "grad_norm": 36.81129455566406, "learning_rate": 1.2902904564315353e-05, "loss": 0.6084, "step": 21392 }, { "epoch": 17.753526970954358, "grad_norm": 83.59638214111328, "learning_rate": 1.2902572614107885e-05, "loss": 0.6129, "step": 21393 }, { "epoch": 17.75435684647303, "grad_norm": 116.35580444335938, "learning_rate": 1.2902240663900417e-05, "loss": 0.8122, "step": 21394 }, { "epoch": 17.755186721991702, "grad_norm": 30.69695472717285, "learning_rate": 1.2901908713692947e-05, "loss": 0.737, "step": 21395 }, { "epoch": 17.756016597510374, "grad_norm": 57.55897521972656, "learning_rate": 1.2901576763485478e-05, "loss": 0.3913, "step": 21396 }, { "epoch": 17.756846473029047, "grad_norm": 54.780113220214844, "learning_rate": 1.2901244813278008e-05, "loss": 0.6386, "step": 21397 }, { "epoch": 17.75767634854772, "grad_norm": 29.19164276123047, "learning_rate": 1.290091286307054e-05, "loss": 0.4818, "step": 21398 }, { "epoch": 17.75850622406639, "grad_norm": 69.7445297241211, "learning_rate": 1.2900580912863072e-05, "loss": 0.6533, "step": 21399 }, { "epoch": 17.759336099585063, "grad_norm": 21.02471351623535, "learning_rate": 1.2900248962655604e-05, "loss": 0.5137, "step": 21400 }, { "epoch": 17.760165975103735, "grad_norm": 30.58531379699707, "learning_rate": 1.2899917012448133e-05, "loss": 0.4894, "step": 21401 }, { "epoch": 17.760995850622407, "grad_norm": 60.30454635620117, "learning_rate": 1.2899585062240665e-05, "loss": 0.9818, "step": 21402 }, { "epoch": 17.76182572614108, "grad_norm": 42.02513885498047, "learning_rate": 1.2899253112033196e-05, "loss": 0.7679, "step": 21403 }, { "epoch": 17.762655601659752, "grad_norm": 48.94942855834961, "learning_rate": 1.2898921161825728e-05, "loss": 0.684, "step": 21404 }, { "epoch": 17.763485477178424, "grad_norm": 55.01568603515625, "learning_rate": 1.2898589211618258e-05, "loss": 0.7649, "step": 21405 }, { "epoch": 17.764315352697096, "grad_norm": 21.82683753967285, "learning_rate": 1.2898257261410789e-05, "loss": 0.2452, "step": 21406 }, { "epoch": 17.76514522821577, "grad_norm": 87.0467758178711, "learning_rate": 1.289792531120332e-05, "loss": 0.8947, "step": 21407 }, { "epoch": 17.76597510373444, "grad_norm": 54.969852447509766, "learning_rate": 1.2897593360995853e-05, "loss": 0.4607, "step": 21408 }, { "epoch": 17.766804979253113, "grad_norm": 86.01793670654297, "learning_rate": 1.2897261410788382e-05, "loss": 0.6891, "step": 21409 }, { "epoch": 17.767634854771785, "grad_norm": 92.68199157714844, "learning_rate": 1.2896929460580914e-05, "loss": 0.8609, "step": 21410 }, { "epoch": 17.768464730290457, "grad_norm": 60.91073226928711, "learning_rate": 1.2896597510373446e-05, "loss": 0.739, "step": 21411 }, { "epoch": 17.76929460580913, "grad_norm": 77.59156036376953, "learning_rate": 1.2896265560165976e-05, "loss": 0.7901, "step": 21412 }, { "epoch": 17.7701244813278, "grad_norm": 47.766849517822266, "learning_rate": 1.2895933609958507e-05, "loss": 0.8864, "step": 21413 }, { "epoch": 17.770954356846474, "grad_norm": 54.46124267578125, "learning_rate": 1.2895601659751039e-05, "loss": 0.3907, "step": 21414 }, { "epoch": 17.771784232365146, "grad_norm": 40.816123962402344, "learning_rate": 1.2895269709543569e-05, "loss": 0.3793, "step": 21415 }, { "epoch": 17.77261410788382, "grad_norm": 51.602821350097656, "learning_rate": 1.2894937759336101e-05, "loss": 0.8426, "step": 21416 }, { "epoch": 17.77344398340249, "grad_norm": 66.26994323730469, "learning_rate": 1.289460580912863e-05, "loss": 0.6069, "step": 21417 }, { "epoch": 17.774273858921163, "grad_norm": 43.64567947387695, "learning_rate": 1.2894273858921162e-05, "loss": 0.7912, "step": 21418 }, { "epoch": 17.775103734439835, "grad_norm": 60.01079559326172, "learning_rate": 1.2893941908713694e-05, "loss": 0.7861, "step": 21419 }, { "epoch": 17.775933609958507, "grad_norm": 38.986572265625, "learning_rate": 1.2893609958506226e-05, "loss": 0.5949, "step": 21420 }, { "epoch": 17.77676348547718, "grad_norm": 47.31295394897461, "learning_rate": 1.2893278008298755e-05, "loss": 0.9595, "step": 21421 }, { "epoch": 17.77759336099585, "grad_norm": 24.72629737854004, "learning_rate": 1.2892946058091287e-05, "loss": 0.3638, "step": 21422 }, { "epoch": 17.778423236514524, "grad_norm": 31.40552520751953, "learning_rate": 1.289261410788382e-05, "loss": 0.8701, "step": 21423 }, { "epoch": 17.779253112033196, "grad_norm": 32.61851501464844, "learning_rate": 1.289228215767635e-05, "loss": 0.6154, "step": 21424 }, { "epoch": 17.780082987551868, "grad_norm": 17.2307186126709, "learning_rate": 1.2891950207468882e-05, "loss": 0.2665, "step": 21425 }, { "epoch": 17.78091286307054, "grad_norm": 66.53982543945312, "learning_rate": 1.289161825726141e-05, "loss": 1.0403, "step": 21426 }, { "epoch": 17.781742738589212, "grad_norm": 47.97631072998047, "learning_rate": 1.2891286307053943e-05, "loss": 0.788, "step": 21427 }, { "epoch": 17.782572614107885, "grad_norm": 44.071022033691406, "learning_rate": 1.2890954356846475e-05, "loss": 0.5805, "step": 21428 }, { "epoch": 17.783402489626557, "grad_norm": 12.268234252929688, "learning_rate": 1.2890622406639007e-05, "loss": 0.2484, "step": 21429 }, { "epoch": 17.78423236514523, "grad_norm": 30.920427322387695, "learning_rate": 1.2890290456431536e-05, "loss": 0.5956, "step": 21430 }, { "epoch": 17.7850622406639, "grad_norm": 76.21016693115234, "learning_rate": 1.2889958506224068e-05, "loss": 1.2728, "step": 21431 }, { "epoch": 17.785892116182573, "grad_norm": 31.939149856567383, "learning_rate": 1.28896265560166e-05, "loss": 0.6355, "step": 21432 }, { "epoch": 17.786721991701246, "grad_norm": 45.490089416503906, "learning_rate": 1.288929460580913e-05, "loss": 0.8016, "step": 21433 }, { "epoch": 17.787551867219918, "grad_norm": 37.52822494506836, "learning_rate": 1.288896265560166e-05, "loss": 0.5806, "step": 21434 }, { "epoch": 17.78838174273859, "grad_norm": 34.447723388671875, "learning_rate": 1.2888630705394191e-05, "loss": 0.7179, "step": 21435 }, { "epoch": 17.789211618257262, "grad_norm": 115.60889434814453, "learning_rate": 1.2888298755186723e-05, "loss": 1.0903, "step": 21436 }, { "epoch": 17.790041493775934, "grad_norm": 38.768245697021484, "learning_rate": 1.2887966804979255e-05, "loss": 0.733, "step": 21437 }, { "epoch": 17.790871369294607, "grad_norm": 53.39358139038086, "learning_rate": 1.2887634854771784e-05, "loss": 1.3423, "step": 21438 }, { "epoch": 17.79170124481328, "grad_norm": 26.393173217773438, "learning_rate": 1.2887302904564316e-05, "loss": 0.6067, "step": 21439 }, { "epoch": 17.79253112033195, "grad_norm": 26.530635833740234, "learning_rate": 1.2886970954356848e-05, "loss": 0.3862, "step": 21440 }, { "epoch": 17.793360995850623, "grad_norm": 115.50618743896484, "learning_rate": 1.288663900414938e-05, "loss": 0.7361, "step": 21441 }, { "epoch": 17.794190871369295, "grad_norm": 39.498695373535156, "learning_rate": 1.2886307053941909e-05, "loss": 1.0912, "step": 21442 }, { "epoch": 17.795020746887968, "grad_norm": 47.5312614440918, "learning_rate": 1.2885975103734441e-05, "loss": 0.6947, "step": 21443 }, { "epoch": 17.79585062240664, "grad_norm": 40.567543029785156, "learning_rate": 1.2885643153526971e-05, "loss": 0.8423, "step": 21444 }, { "epoch": 17.796680497925312, "grad_norm": 56.1571159362793, "learning_rate": 1.2885311203319504e-05, "loss": 0.5768, "step": 21445 }, { "epoch": 17.797510373443984, "grad_norm": 24.593076705932617, "learning_rate": 1.2884979253112034e-05, "loss": 0.3993, "step": 21446 }, { "epoch": 17.798340248962656, "grad_norm": 36.87010192871094, "learning_rate": 1.2884647302904564e-05, "loss": 0.6004, "step": 21447 }, { "epoch": 17.79917012448133, "grad_norm": 54.763729095458984, "learning_rate": 1.2884315352697097e-05, "loss": 0.8114, "step": 21448 }, { "epoch": 17.8, "grad_norm": 13.870447158813477, "learning_rate": 1.2883983402489629e-05, "loss": 0.3358, "step": 21449 }, { "epoch": 17.800829875518673, "grad_norm": 37.86174392700195, "learning_rate": 1.2883651452282157e-05, "loss": 0.6014, "step": 21450 }, { "epoch": 17.801659751037345, "grad_norm": 54.99567794799805, "learning_rate": 1.288331950207469e-05, "loss": 0.8543, "step": 21451 }, { "epoch": 17.802489626556017, "grad_norm": 25.608667373657227, "learning_rate": 1.2882987551867222e-05, "loss": 0.4572, "step": 21452 }, { "epoch": 17.80331950207469, "grad_norm": 32.4357795715332, "learning_rate": 1.2882655601659752e-05, "loss": 0.4098, "step": 21453 }, { "epoch": 17.80414937759336, "grad_norm": 102.59024810791016, "learning_rate": 1.2882323651452284e-05, "loss": 1.0616, "step": 21454 }, { "epoch": 17.804979253112034, "grad_norm": 53.9525260925293, "learning_rate": 1.2881991701244814e-05, "loss": 0.7124, "step": 21455 }, { "epoch": 17.805809128630706, "grad_norm": 115.43174743652344, "learning_rate": 1.2881659751037345e-05, "loss": 1.2644, "step": 21456 }, { "epoch": 17.80663900414938, "grad_norm": 42.065242767333984, "learning_rate": 1.2881327800829877e-05, "loss": 0.6328, "step": 21457 }, { "epoch": 17.80746887966805, "grad_norm": 17.58635902404785, "learning_rate": 1.2880995850622409e-05, "loss": 0.3753, "step": 21458 }, { "epoch": 17.808298755186723, "grad_norm": 74.66314697265625, "learning_rate": 1.2880663900414938e-05, "loss": 0.9451, "step": 21459 }, { "epoch": 17.809128630705395, "grad_norm": 74.30593872070312, "learning_rate": 1.288033195020747e-05, "loss": 0.8205, "step": 21460 }, { "epoch": 17.809958506224067, "grad_norm": 28.44823455810547, "learning_rate": 1.2880000000000002e-05, "loss": 0.6079, "step": 21461 }, { "epoch": 17.81078838174274, "grad_norm": 29.338333129882812, "learning_rate": 1.2879668049792532e-05, "loss": 0.6543, "step": 21462 }, { "epoch": 17.81161825726141, "grad_norm": 52.19349670410156, "learning_rate": 1.2879336099585063e-05, "loss": 0.5669, "step": 21463 }, { "epoch": 17.812448132780084, "grad_norm": 63.5623664855957, "learning_rate": 1.2879004149377595e-05, "loss": 0.3849, "step": 21464 }, { "epoch": 17.813278008298756, "grad_norm": 57.87320327758789, "learning_rate": 1.2878672199170125e-05, "loss": 0.6149, "step": 21465 }, { "epoch": 17.814107883817428, "grad_norm": 18.371395111083984, "learning_rate": 1.2878340248962657e-05, "loss": 0.3348, "step": 21466 }, { "epoch": 17.8149377593361, "grad_norm": 26.30646324157715, "learning_rate": 1.2878008298755186e-05, "loss": 1.1619, "step": 21467 }, { "epoch": 17.815767634854772, "grad_norm": 27.847837448120117, "learning_rate": 1.2877676348547718e-05, "loss": 0.6072, "step": 21468 }, { "epoch": 17.816597510373445, "grad_norm": 77.45082092285156, "learning_rate": 1.287734439834025e-05, "loss": 0.539, "step": 21469 }, { "epoch": 17.817427385892117, "grad_norm": 49.7308235168457, "learning_rate": 1.2877012448132783e-05, "loss": 0.5762, "step": 21470 }, { "epoch": 17.81825726141079, "grad_norm": 48.168514251708984, "learning_rate": 1.2876680497925311e-05, "loss": 0.9481, "step": 21471 }, { "epoch": 17.81908713692946, "grad_norm": 44.344078063964844, "learning_rate": 1.2876348547717843e-05, "loss": 0.4152, "step": 21472 }, { "epoch": 17.819917012448133, "grad_norm": 40.738182067871094, "learning_rate": 1.2876016597510374e-05, "loss": 0.9325, "step": 21473 }, { "epoch": 17.820746887966806, "grad_norm": 21.27322006225586, "learning_rate": 1.2875684647302906e-05, "loss": 0.8309, "step": 21474 }, { "epoch": 17.821576763485478, "grad_norm": 60.77215576171875, "learning_rate": 1.2875352697095436e-05, "loss": 1.2211, "step": 21475 }, { "epoch": 17.82240663900415, "grad_norm": 28.03347396850586, "learning_rate": 1.2875020746887967e-05, "loss": 0.4636, "step": 21476 }, { "epoch": 17.823236514522822, "grad_norm": 46.17153549194336, "learning_rate": 1.2874688796680499e-05, "loss": 1.1582, "step": 21477 }, { "epoch": 17.824066390041494, "grad_norm": 42.25920104980469, "learning_rate": 1.2874356846473031e-05, "loss": 0.4036, "step": 21478 }, { "epoch": 17.824896265560167, "grad_norm": 40.20829391479492, "learning_rate": 1.2874024896265563e-05, "loss": 1.039, "step": 21479 }, { "epoch": 17.82572614107884, "grad_norm": 231.4873046875, "learning_rate": 1.2873692946058092e-05, "loss": 1.0142, "step": 21480 }, { "epoch": 17.82655601659751, "grad_norm": 32.443912506103516, "learning_rate": 1.2873360995850624e-05, "loss": 0.4834, "step": 21481 }, { "epoch": 17.827385892116183, "grad_norm": 89.49452209472656, "learning_rate": 1.2873029045643154e-05, "loss": 1.0675, "step": 21482 }, { "epoch": 17.828215767634855, "grad_norm": 24.764699935913086, "learning_rate": 1.2872697095435686e-05, "loss": 0.4483, "step": 21483 }, { "epoch": 17.829045643153528, "grad_norm": 51.76800537109375, "learning_rate": 1.2872365145228217e-05, "loss": 0.746, "step": 21484 }, { "epoch": 17.8298755186722, "grad_norm": 20.74468231201172, "learning_rate": 1.2872033195020747e-05, "loss": 0.332, "step": 21485 }, { "epoch": 17.830705394190872, "grad_norm": 56.236785888671875, "learning_rate": 1.287170124481328e-05, "loss": 0.8835, "step": 21486 }, { "epoch": 17.831535269709544, "grad_norm": 25.638059616088867, "learning_rate": 1.2871369294605811e-05, "loss": 0.7073, "step": 21487 }, { "epoch": 17.832365145228216, "grad_norm": 28.61624526977539, "learning_rate": 1.287103734439834e-05, "loss": 0.4495, "step": 21488 }, { "epoch": 17.83319502074689, "grad_norm": 19.902057647705078, "learning_rate": 1.2870705394190872e-05, "loss": 0.4236, "step": 21489 }, { "epoch": 17.83402489626556, "grad_norm": 44.02446365356445, "learning_rate": 1.2870373443983404e-05, "loss": 0.7999, "step": 21490 }, { "epoch": 17.834854771784233, "grad_norm": 48.15915298461914, "learning_rate": 1.2870041493775935e-05, "loss": 0.6032, "step": 21491 }, { "epoch": 17.835684647302905, "grad_norm": 25.695615768432617, "learning_rate": 1.2869709543568465e-05, "loss": 0.4174, "step": 21492 }, { "epoch": 17.836514522821577, "grad_norm": 14.905468940734863, "learning_rate": 1.2869377593360997e-05, "loss": 0.4299, "step": 21493 }, { "epoch": 17.83734439834025, "grad_norm": 58.437774658203125, "learning_rate": 1.2869045643153528e-05, "loss": 0.9006, "step": 21494 }, { "epoch": 17.83817427385892, "grad_norm": 42.5013542175293, "learning_rate": 1.286871369294606e-05, "loss": 0.8189, "step": 21495 }, { "epoch": 17.839004149377594, "grad_norm": 69.3091812133789, "learning_rate": 1.2868381742738589e-05, "loss": 0.7482, "step": 21496 }, { "epoch": 17.839834024896266, "grad_norm": 45.7614860534668, "learning_rate": 1.286804979253112e-05, "loss": 0.457, "step": 21497 }, { "epoch": 17.84066390041494, "grad_norm": 61.68111801147461, "learning_rate": 1.2867717842323653e-05, "loss": 0.992, "step": 21498 }, { "epoch": 17.84149377593361, "grad_norm": 38.597755432128906, "learning_rate": 1.2867385892116185e-05, "loss": 0.4234, "step": 21499 }, { "epoch": 17.842323651452283, "grad_norm": 50.06224822998047, "learning_rate": 1.2867053941908714e-05, "loss": 0.7115, "step": 21500 }, { "epoch": 17.843153526970955, "grad_norm": 59.82938766479492, "learning_rate": 1.2866721991701246e-05, "loss": 0.4711, "step": 21501 }, { "epoch": 17.843983402489627, "grad_norm": 47.77835464477539, "learning_rate": 1.2866390041493778e-05, "loss": 1.6038, "step": 21502 }, { "epoch": 17.8448132780083, "grad_norm": 29.870742797851562, "learning_rate": 1.2866058091286308e-05, "loss": 0.5188, "step": 21503 }, { "epoch": 17.84564315352697, "grad_norm": 50.15196990966797, "learning_rate": 1.286572614107884e-05, "loss": 0.6998, "step": 21504 }, { "epoch": 17.846473029045644, "grad_norm": 44.897674560546875, "learning_rate": 1.2865394190871369e-05, "loss": 0.4932, "step": 21505 }, { "epoch": 17.847302904564316, "grad_norm": 34.048038482666016, "learning_rate": 1.2865062240663901e-05, "loss": 0.5624, "step": 21506 }, { "epoch": 17.848132780082988, "grad_norm": 26.643646240234375, "learning_rate": 1.2864730290456433e-05, "loss": 0.4555, "step": 21507 }, { "epoch": 17.84896265560166, "grad_norm": 59.03225326538086, "learning_rate": 1.2864398340248965e-05, "loss": 0.7112, "step": 21508 }, { "epoch": 17.849792531120332, "grad_norm": 27.357036590576172, "learning_rate": 1.2864066390041494e-05, "loss": 0.4931, "step": 21509 }, { "epoch": 17.850622406639005, "grad_norm": 16.068702697753906, "learning_rate": 1.2863734439834026e-05, "loss": 0.3286, "step": 21510 }, { "epoch": 17.851452282157677, "grad_norm": 41.72917556762695, "learning_rate": 1.2863402489626558e-05, "loss": 1.124, "step": 21511 }, { "epoch": 17.85228215767635, "grad_norm": 50.80866622924805, "learning_rate": 1.2863070539419089e-05, "loss": 0.8552, "step": 21512 }, { "epoch": 17.85311203319502, "grad_norm": 63.17099380493164, "learning_rate": 1.2862738589211619e-05, "loss": 0.5782, "step": 21513 }, { "epoch": 17.853941908713693, "grad_norm": 32.52210998535156, "learning_rate": 1.286240663900415e-05, "loss": 0.563, "step": 21514 }, { "epoch": 17.854771784232366, "grad_norm": 20.47496795654297, "learning_rate": 1.2862074688796682e-05, "loss": 0.3059, "step": 21515 }, { "epoch": 17.855601659751038, "grad_norm": 109.22159576416016, "learning_rate": 1.2861742738589214e-05, "loss": 0.8322, "step": 21516 }, { "epoch": 17.85643153526971, "grad_norm": 50.274269104003906, "learning_rate": 1.2861410788381742e-05, "loss": 0.6788, "step": 21517 }, { "epoch": 17.857261410788382, "grad_norm": 20.075241088867188, "learning_rate": 1.2861078838174275e-05, "loss": 0.3272, "step": 21518 }, { "epoch": 17.858091286307054, "grad_norm": 28.348045349121094, "learning_rate": 1.2860746887966807e-05, "loss": 0.4617, "step": 21519 }, { "epoch": 17.858921161825727, "grad_norm": 46.480430603027344, "learning_rate": 1.2860414937759337e-05, "loss": 0.5949, "step": 21520 }, { "epoch": 17.8597510373444, "grad_norm": 25.766267776489258, "learning_rate": 1.2860082987551868e-05, "loss": 0.4816, "step": 21521 }, { "epoch": 17.86058091286307, "grad_norm": 26.589298248291016, "learning_rate": 1.28597510373444e-05, "loss": 0.7672, "step": 21522 }, { "epoch": 17.861410788381743, "grad_norm": 74.03035736083984, "learning_rate": 1.285941908713693e-05, "loss": 0.6098, "step": 21523 }, { "epoch": 17.862240663900415, "grad_norm": 46.69693374633789, "learning_rate": 1.2859087136929462e-05, "loss": 1.6645, "step": 21524 }, { "epoch": 17.863070539419088, "grad_norm": 30.63718605041504, "learning_rate": 1.2858755186721993e-05, "loss": 0.4807, "step": 21525 }, { "epoch": 17.86390041493776, "grad_norm": 39.016357421875, "learning_rate": 1.2858423236514523e-05, "loss": 1.2682, "step": 21526 }, { "epoch": 17.864730290456432, "grad_norm": 124.29803466796875, "learning_rate": 1.2858091286307055e-05, "loss": 0.537, "step": 21527 }, { "epoch": 17.865560165975104, "grad_norm": 15.323978424072266, "learning_rate": 1.2857759336099587e-05, "loss": 0.3809, "step": 21528 }, { "epoch": 17.866390041493776, "grad_norm": 63.80323791503906, "learning_rate": 1.2857427385892116e-05, "loss": 1.2614, "step": 21529 }, { "epoch": 17.86721991701245, "grad_norm": 64.5934066772461, "learning_rate": 1.2857095435684648e-05, "loss": 0.6968, "step": 21530 }, { "epoch": 17.86804979253112, "grad_norm": 95.12786102294922, "learning_rate": 1.285676348547718e-05, "loss": 0.5283, "step": 21531 }, { "epoch": 17.868879668049793, "grad_norm": 38.16279983520508, "learning_rate": 1.285643153526971e-05, "loss": 0.6202, "step": 21532 }, { "epoch": 17.869709543568465, "grad_norm": 51.5438232421875, "learning_rate": 1.2856099585062243e-05, "loss": 0.711, "step": 21533 }, { "epoch": 17.870539419087137, "grad_norm": 88.53333282470703, "learning_rate": 1.2855767634854771e-05, "loss": 0.5654, "step": 21534 }, { "epoch": 17.87136929460581, "grad_norm": 85.2795639038086, "learning_rate": 1.2855435684647303e-05, "loss": 1.2519, "step": 21535 }, { "epoch": 17.872199170124482, "grad_norm": 39.20488739013672, "learning_rate": 1.2855103734439836e-05, "loss": 0.4607, "step": 21536 }, { "epoch": 17.873029045643154, "grad_norm": 54.09790802001953, "learning_rate": 1.2854771784232368e-05, "loss": 0.7052, "step": 21537 }, { "epoch": 17.873858921161826, "grad_norm": 90.2610092163086, "learning_rate": 1.2854439834024896e-05, "loss": 0.4486, "step": 21538 }, { "epoch": 17.8746887966805, "grad_norm": 30.840084075927734, "learning_rate": 1.2854107883817429e-05, "loss": 0.7882, "step": 21539 }, { "epoch": 17.87551867219917, "grad_norm": 58.095367431640625, "learning_rate": 1.285377593360996e-05, "loss": 0.9762, "step": 21540 }, { "epoch": 17.876348547717843, "grad_norm": 48.37800598144531, "learning_rate": 1.2853443983402491e-05, "loss": 0.4975, "step": 21541 }, { "epoch": 17.877178423236515, "grad_norm": 34.53758239746094, "learning_rate": 1.2853112033195021e-05, "loss": 0.7066, "step": 21542 }, { "epoch": 17.878008298755187, "grad_norm": 51.52092361450195, "learning_rate": 1.2852780082987552e-05, "loss": 0.7831, "step": 21543 }, { "epoch": 17.87883817427386, "grad_norm": 33.181312561035156, "learning_rate": 1.2852448132780084e-05, "loss": 0.6892, "step": 21544 }, { "epoch": 17.87966804979253, "grad_norm": 50.2013053894043, "learning_rate": 1.2852116182572616e-05, "loss": 1.0011, "step": 21545 }, { "epoch": 17.880497925311204, "grad_norm": 30.015697479248047, "learning_rate": 1.2851784232365145e-05, "loss": 0.575, "step": 21546 }, { "epoch": 17.881327800829876, "grad_norm": 80.99813079833984, "learning_rate": 1.2851452282157677e-05, "loss": 0.8292, "step": 21547 }, { "epoch": 17.882157676348548, "grad_norm": 35.94968795776367, "learning_rate": 1.2851120331950209e-05, "loss": 0.6682, "step": 21548 }, { "epoch": 17.88298755186722, "grad_norm": 16.837779998779297, "learning_rate": 1.2850788381742741e-05, "loss": 0.377, "step": 21549 }, { "epoch": 17.883817427385893, "grad_norm": 49.12791061401367, "learning_rate": 1.285045643153527e-05, "loss": 0.5134, "step": 21550 }, { "epoch": 17.884647302904565, "grad_norm": 30.6972713470459, "learning_rate": 1.2850124481327802e-05, "loss": 0.9642, "step": 21551 }, { "epoch": 17.885477178423237, "grad_norm": 42.75857925415039, "learning_rate": 1.2849792531120332e-05, "loss": 1.0633, "step": 21552 }, { "epoch": 17.88630705394191, "grad_norm": 48.667423248291016, "learning_rate": 1.2849460580912864e-05, "loss": 0.7689, "step": 21553 }, { "epoch": 17.88713692946058, "grad_norm": 23.176071166992188, "learning_rate": 1.2849128630705395e-05, "loss": 0.3954, "step": 21554 }, { "epoch": 17.887966804979254, "grad_norm": 33.528202056884766, "learning_rate": 1.2848796680497925e-05, "loss": 0.9107, "step": 21555 }, { "epoch": 17.888796680497926, "grad_norm": 47.70392990112305, "learning_rate": 1.2848464730290457e-05, "loss": 1.1362, "step": 21556 }, { "epoch": 17.889626556016598, "grad_norm": 38.68852996826172, "learning_rate": 1.284813278008299e-05, "loss": 0.6926, "step": 21557 }, { "epoch": 17.89045643153527, "grad_norm": 57.714683532714844, "learning_rate": 1.2847800829875522e-05, "loss": 0.9931, "step": 21558 }, { "epoch": 17.891286307053942, "grad_norm": 20.47731590270996, "learning_rate": 1.284746887966805e-05, "loss": 0.4813, "step": 21559 }, { "epoch": 17.892116182572614, "grad_norm": 22.205970764160156, "learning_rate": 1.2847136929460582e-05, "loss": 0.485, "step": 21560 }, { "epoch": 17.892946058091287, "grad_norm": 33.00054168701172, "learning_rate": 1.2846804979253113e-05, "loss": 0.542, "step": 21561 }, { "epoch": 17.89377593360996, "grad_norm": 39.217899322509766, "learning_rate": 1.2846473029045645e-05, "loss": 0.746, "step": 21562 }, { "epoch": 17.89460580912863, "grad_norm": 23.637413024902344, "learning_rate": 1.2846141078838175e-05, "loss": 0.6402, "step": 21563 }, { "epoch": 17.895435684647303, "grad_norm": 27.39730453491211, "learning_rate": 1.2845809128630706e-05, "loss": 0.5305, "step": 21564 }, { "epoch": 17.896265560165975, "grad_norm": 29.743696212768555, "learning_rate": 1.2845477178423238e-05, "loss": 0.6227, "step": 21565 }, { "epoch": 17.897095435684648, "grad_norm": 24.75309944152832, "learning_rate": 1.284514522821577e-05, "loss": 0.4086, "step": 21566 }, { "epoch": 17.89792531120332, "grad_norm": 25.000253677368164, "learning_rate": 1.2844813278008299e-05, "loss": 0.6685, "step": 21567 }, { "epoch": 17.898755186721992, "grad_norm": 41.37412643432617, "learning_rate": 1.284448132780083e-05, "loss": 1.0261, "step": 21568 }, { "epoch": 17.899585062240664, "grad_norm": 39.494441986083984, "learning_rate": 1.2844149377593363e-05, "loss": 0.617, "step": 21569 }, { "epoch": 17.900414937759336, "grad_norm": 26.759490966796875, "learning_rate": 1.2843817427385893e-05, "loss": 0.4791, "step": 21570 }, { "epoch": 17.90124481327801, "grad_norm": 41.10995864868164, "learning_rate": 1.2843485477178424e-05, "loss": 1.5035, "step": 21571 }, { "epoch": 17.90207468879668, "grad_norm": 48.855735778808594, "learning_rate": 1.2843153526970956e-05, "loss": 0.7863, "step": 21572 }, { "epoch": 17.902904564315353, "grad_norm": 52.458412170410156, "learning_rate": 1.2842821576763486e-05, "loss": 0.6833, "step": 21573 }, { "epoch": 17.903734439834025, "grad_norm": 38.56178283691406, "learning_rate": 1.2842489626556018e-05, "loss": 0.8679, "step": 21574 }, { "epoch": 17.904564315352697, "grad_norm": 38.694419860839844, "learning_rate": 1.2842157676348547e-05, "loss": 0.4416, "step": 21575 }, { "epoch": 17.90539419087137, "grad_norm": 13.477372169494629, "learning_rate": 1.284182572614108e-05, "loss": 0.2394, "step": 21576 }, { "epoch": 17.906224066390042, "grad_norm": 81.84082794189453, "learning_rate": 1.2841493775933611e-05, "loss": 1.0029, "step": 21577 }, { "epoch": 17.907053941908714, "grad_norm": 35.87731170654297, "learning_rate": 1.2841161825726143e-05, "loss": 0.5656, "step": 21578 }, { "epoch": 17.907883817427386, "grad_norm": 32.46135711669922, "learning_rate": 1.2840829875518672e-05, "loss": 0.6904, "step": 21579 }, { "epoch": 17.90871369294606, "grad_norm": 45.1544189453125, "learning_rate": 1.2840497925311204e-05, "loss": 0.3611, "step": 21580 }, { "epoch": 17.90954356846473, "grad_norm": 32.00068664550781, "learning_rate": 1.2840165975103736e-05, "loss": 0.7835, "step": 21581 }, { "epoch": 17.910373443983403, "grad_norm": 17.54750633239746, "learning_rate": 1.2839834024896267e-05, "loss": 0.3559, "step": 21582 }, { "epoch": 17.911203319502075, "grad_norm": 36.927181243896484, "learning_rate": 1.2839502074688799e-05, "loss": 0.4485, "step": 21583 }, { "epoch": 17.912033195020747, "grad_norm": 98.96454620361328, "learning_rate": 1.2839170124481328e-05, "loss": 1.0588, "step": 21584 }, { "epoch": 17.91286307053942, "grad_norm": 47.07752227783203, "learning_rate": 1.283883817427386e-05, "loss": 0.687, "step": 21585 }, { "epoch": 17.91369294605809, "grad_norm": 48.235103607177734, "learning_rate": 1.2838506224066392e-05, "loss": 0.6169, "step": 21586 }, { "epoch": 17.914522821576764, "grad_norm": 27.46709632873535, "learning_rate": 1.2838174273858924e-05, "loss": 0.3273, "step": 21587 }, { "epoch": 17.915352697095436, "grad_norm": 70.21192169189453, "learning_rate": 1.2837842323651453e-05, "loss": 0.6355, "step": 21588 }, { "epoch": 17.916182572614108, "grad_norm": 62.936622619628906, "learning_rate": 1.2837510373443985e-05, "loss": 1.0479, "step": 21589 }, { "epoch": 17.91701244813278, "grad_norm": 93.19879913330078, "learning_rate": 1.2837178423236515e-05, "loss": 1.0122, "step": 21590 }, { "epoch": 17.917842323651453, "grad_norm": 43.531864166259766, "learning_rate": 1.2836846473029047e-05, "loss": 0.5568, "step": 21591 }, { "epoch": 17.918672199170125, "grad_norm": 64.42412567138672, "learning_rate": 1.2836514522821578e-05, "loss": 0.523, "step": 21592 }, { "epoch": 17.919502074688797, "grad_norm": 58.06624221801758, "learning_rate": 1.2836182572614108e-05, "loss": 0.457, "step": 21593 }, { "epoch": 17.92033195020747, "grad_norm": 20.041702270507812, "learning_rate": 1.283585062240664e-05, "loss": 0.4091, "step": 21594 }, { "epoch": 17.92116182572614, "grad_norm": 28.116117477416992, "learning_rate": 1.2835518672199172e-05, "loss": 0.5478, "step": 21595 }, { "epoch": 17.921991701244814, "grad_norm": 34.29698181152344, "learning_rate": 1.2835186721991701e-05, "loss": 0.6354, "step": 21596 }, { "epoch": 17.922821576763486, "grad_norm": 17.631986618041992, "learning_rate": 1.2834854771784233e-05, "loss": 0.3443, "step": 21597 }, { "epoch": 17.923651452282158, "grad_norm": 30.94077491760254, "learning_rate": 1.2834522821576765e-05, "loss": 0.7816, "step": 21598 }, { "epoch": 17.92448132780083, "grad_norm": 45.909854888916016, "learning_rate": 1.2834190871369296e-05, "loss": 0.5289, "step": 21599 }, { "epoch": 17.925311203319502, "grad_norm": 41.05848693847656, "learning_rate": 1.2833858921161826e-05, "loss": 0.6241, "step": 21600 }, { "epoch": 17.926141078838175, "grad_norm": 24.980878829956055, "learning_rate": 1.2833526970954358e-05, "loss": 0.4285, "step": 21601 }, { "epoch": 17.926970954356847, "grad_norm": 13.509607315063477, "learning_rate": 1.2833195020746889e-05, "loss": 0.2519, "step": 21602 }, { "epoch": 17.92780082987552, "grad_norm": 50.329532623291016, "learning_rate": 1.283286307053942e-05, "loss": 0.7294, "step": 21603 }, { "epoch": 17.92863070539419, "grad_norm": 31.344709396362305, "learning_rate": 1.283253112033195e-05, "loss": 1.1086, "step": 21604 }, { "epoch": 17.929460580912863, "grad_norm": 37.031864166259766, "learning_rate": 1.2832199170124482e-05, "loss": 0.4879, "step": 21605 }, { "epoch": 17.930290456431536, "grad_norm": 43.812782287597656, "learning_rate": 1.2831867219917014e-05, "loss": 0.5384, "step": 21606 }, { "epoch": 17.931120331950208, "grad_norm": 27.772680282592773, "learning_rate": 1.2831535269709546e-05, "loss": 0.7143, "step": 21607 }, { "epoch": 17.93195020746888, "grad_norm": 99.30323028564453, "learning_rate": 1.2831203319502074e-05, "loss": 1.0256, "step": 21608 }, { "epoch": 17.932780082987552, "grad_norm": 79.92074584960938, "learning_rate": 1.2830871369294607e-05, "loss": 0.8188, "step": 21609 }, { "epoch": 17.933609958506224, "grad_norm": 86.1965560913086, "learning_rate": 1.2830539419087139e-05, "loss": 0.9408, "step": 21610 }, { "epoch": 17.934439834024896, "grad_norm": 25.916597366333008, "learning_rate": 1.2830207468879669e-05, "loss": 0.3406, "step": 21611 }, { "epoch": 17.93526970954357, "grad_norm": 45.9475212097168, "learning_rate": 1.2829875518672201e-05, "loss": 0.577, "step": 21612 }, { "epoch": 17.93609958506224, "grad_norm": 40.259708404541016, "learning_rate": 1.282954356846473e-05, "loss": 1.3583, "step": 21613 }, { "epoch": 17.936929460580913, "grad_norm": 32.365264892578125, "learning_rate": 1.2829211618257262e-05, "loss": 0.63, "step": 21614 }, { "epoch": 17.937759336099585, "grad_norm": 69.13970184326172, "learning_rate": 1.2828879668049794e-05, "loss": 0.7108, "step": 21615 }, { "epoch": 17.938589211618257, "grad_norm": 22.95999526977539, "learning_rate": 1.2828547717842326e-05, "loss": 0.3376, "step": 21616 }, { "epoch": 17.93941908713693, "grad_norm": 63.8108024597168, "learning_rate": 1.2828215767634855e-05, "loss": 0.6886, "step": 21617 }, { "epoch": 17.940248962655602, "grad_norm": 37.87937545776367, "learning_rate": 1.2827883817427387e-05, "loss": 0.8427, "step": 21618 }, { "epoch": 17.941078838174274, "grad_norm": 21.039213180541992, "learning_rate": 1.282755186721992e-05, "loss": 0.5303, "step": 21619 }, { "epoch": 17.941908713692946, "grad_norm": 22.7623348236084, "learning_rate": 1.282721991701245e-05, "loss": 0.3229, "step": 21620 }, { "epoch": 17.94273858921162, "grad_norm": 35.62734603881836, "learning_rate": 1.282688796680498e-05, "loss": 0.6816, "step": 21621 }, { "epoch": 17.94356846473029, "grad_norm": 37.773929595947266, "learning_rate": 1.282655601659751e-05, "loss": 0.7728, "step": 21622 }, { "epoch": 17.944398340248963, "grad_norm": 46.6807861328125, "learning_rate": 1.2826224066390043e-05, "loss": 0.715, "step": 21623 }, { "epoch": 17.945228215767635, "grad_norm": 96.88912200927734, "learning_rate": 1.2825892116182575e-05, "loss": 0.9731, "step": 21624 }, { "epoch": 17.946058091286307, "grad_norm": 30.445783615112305, "learning_rate": 1.2825560165975103e-05, "loss": 0.445, "step": 21625 }, { "epoch": 17.94688796680498, "grad_norm": 35.665077209472656, "learning_rate": 1.2825228215767635e-05, "loss": 0.4037, "step": 21626 }, { "epoch": 17.94771784232365, "grad_norm": 10.410635948181152, "learning_rate": 1.2824896265560168e-05, "loss": 0.2755, "step": 21627 }, { "epoch": 17.948547717842324, "grad_norm": 32.6807975769043, "learning_rate": 1.28245643153527e-05, "loss": 0.6257, "step": 21628 }, { "epoch": 17.949377593360996, "grad_norm": 81.0211410522461, "learning_rate": 1.2824232365145228e-05, "loss": 0.8395, "step": 21629 }, { "epoch": 17.95020746887967, "grad_norm": 22.091718673706055, "learning_rate": 1.282390041493776e-05, "loss": 0.4741, "step": 21630 }, { "epoch": 17.95103734439834, "grad_norm": 34.95893096923828, "learning_rate": 1.2823568464730291e-05, "loss": 0.4874, "step": 21631 }, { "epoch": 17.951867219917013, "grad_norm": 60.13858413696289, "learning_rate": 1.2823236514522823e-05, "loss": 1.1185, "step": 21632 }, { "epoch": 17.952697095435685, "grad_norm": 59.92265319824219, "learning_rate": 1.2822904564315353e-05, "loss": 0.7464, "step": 21633 }, { "epoch": 17.953526970954357, "grad_norm": 24.97330665588379, "learning_rate": 1.2822572614107884e-05, "loss": 0.4254, "step": 21634 }, { "epoch": 17.95435684647303, "grad_norm": 19.292831420898438, "learning_rate": 1.2822240663900416e-05, "loss": 0.3279, "step": 21635 }, { "epoch": 17.9551867219917, "grad_norm": 36.55126953125, "learning_rate": 1.2821908713692948e-05, "loss": 0.396, "step": 21636 }, { "epoch": 17.956016597510374, "grad_norm": 41.093021392822266, "learning_rate": 1.2821576763485478e-05, "loss": 0.5703, "step": 21637 }, { "epoch": 17.956846473029046, "grad_norm": 49.17100143432617, "learning_rate": 1.2821244813278009e-05, "loss": 1.1806, "step": 21638 }, { "epoch": 17.957676348547718, "grad_norm": 41.50421905517578, "learning_rate": 1.2820912863070541e-05, "loss": 0.9574, "step": 21639 }, { "epoch": 17.95850622406639, "grad_norm": 19.78026008605957, "learning_rate": 1.2820580912863071e-05, "loss": 0.3587, "step": 21640 }, { "epoch": 17.959336099585062, "grad_norm": 41.068485260009766, "learning_rate": 1.2820248962655604e-05, "loss": 0.7876, "step": 21641 }, { "epoch": 17.960165975103735, "grad_norm": 59.02637481689453, "learning_rate": 1.2819917012448134e-05, "loss": 1.1677, "step": 21642 }, { "epoch": 17.960995850622407, "grad_norm": 13.148202896118164, "learning_rate": 1.2819585062240664e-05, "loss": 0.2967, "step": 21643 }, { "epoch": 17.96182572614108, "grad_norm": 97.76546478271484, "learning_rate": 1.2819253112033196e-05, "loss": 0.9741, "step": 21644 }, { "epoch": 17.96265560165975, "grad_norm": 41.861778259277344, "learning_rate": 1.2818921161825729e-05, "loss": 0.4334, "step": 21645 }, { "epoch": 17.963485477178423, "grad_norm": 13.804153442382812, "learning_rate": 1.2818589211618257e-05, "loss": 0.2836, "step": 21646 }, { "epoch": 17.964315352697096, "grad_norm": 28.928592681884766, "learning_rate": 1.281825726141079e-05, "loss": 0.5099, "step": 21647 }, { "epoch": 17.965145228215768, "grad_norm": 28.587997436523438, "learning_rate": 1.2817925311203322e-05, "loss": 0.5253, "step": 21648 }, { "epoch": 17.96597510373444, "grad_norm": 41.66322326660156, "learning_rate": 1.2817593360995852e-05, "loss": 0.4103, "step": 21649 }, { "epoch": 17.966804979253112, "grad_norm": 76.46198272705078, "learning_rate": 1.2817261410788382e-05, "loss": 0.884, "step": 21650 }, { "epoch": 17.967634854771784, "grad_norm": 60.72410202026367, "learning_rate": 1.2816929460580913e-05, "loss": 0.7903, "step": 21651 }, { "epoch": 17.968464730290457, "grad_norm": 37.251155853271484, "learning_rate": 1.2816597510373445e-05, "loss": 0.5669, "step": 21652 }, { "epoch": 17.96929460580913, "grad_norm": 30.34373664855957, "learning_rate": 1.2816265560165977e-05, "loss": 0.4963, "step": 21653 }, { "epoch": 17.9701244813278, "grad_norm": 40.35531997680664, "learning_rate": 1.2815933609958506e-05, "loss": 0.9303, "step": 21654 }, { "epoch": 17.970954356846473, "grad_norm": 24.421348571777344, "learning_rate": 1.2815601659751038e-05, "loss": 0.5555, "step": 21655 }, { "epoch": 17.971784232365145, "grad_norm": 49.77437210083008, "learning_rate": 1.281526970954357e-05, "loss": 0.6835, "step": 21656 }, { "epoch": 17.972614107883818, "grad_norm": 17.04315757751465, "learning_rate": 1.2814937759336102e-05, "loss": 0.4466, "step": 21657 }, { "epoch": 17.97344398340249, "grad_norm": 39.280540466308594, "learning_rate": 1.281460580912863e-05, "loss": 0.5382, "step": 21658 }, { "epoch": 17.974273858921162, "grad_norm": 27.99213218688965, "learning_rate": 1.2814273858921163e-05, "loss": 0.4474, "step": 21659 }, { "epoch": 17.975103734439834, "grad_norm": 29.952768325805664, "learning_rate": 1.2813941908713693e-05, "loss": 0.5347, "step": 21660 }, { "epoch": 17.975933609958506, "grad_norm": 22.890777587890625, "learning_rate": 1.2813609958506225e-05, "loss": 0.3971, "step": 21661 }, { "epoch": 17.97676348547718, "grad_norm": 103.6596450805664, "learning_rate": 1.2813278008298757e-05, "loss": 0.6006, "step": 21662 }, { "epoch": 17.97759336099585, "grad_norm": 20.091054916381836, "learning_rate": 1.2812946058091286e-05, "loss": 0.5953, "step": 21663 }, { "epoch": 17.978423236514523, "grad_norm": 35.2404899597168, "learning_rate": 1.2812614107883818e-05, "loss": 0.4465, "step": 21664 }, { "epoch": 17.979253112033195, "grad_norm": 55.13685989379883, "learning_rate": 1.281228215767635e-05, "loss": 0.8584, "step": 21665 }, { "epoch": 17.980082987551867, "grad_norm": 73.48493957519531, "learning_rate": 1.2811950207468883e-05, "loss": 0.7091, "step": 21666 }, { "epoch": 17.98091286307054, "grad_norm": 31.635969161987305, "learning_rate": 1.2811618257261411e-05, "loss": 0.5058, "step": 21667 }, { "epoch": 17.98174273858921, "grad_norm": 48.356624603271484, "learning_rate": 1.2811286307053943e-05, "loss": 0.894, "step": 21668 }, { "epoch": 17.982572614107884, "grad_norm": 54.149776458740234, "learning_rate": 1.2810954356846474e-05, "loss": 0.8558, "step": 21669 }, { "epoch": 17.983402489626556, "grad_norm": 42.6585693359375, "learning_rate": 1.2810622406639006e-05, "loss": 1.0123, "step": 21670 }, { "epoch": 17.98423236514523, "grad_norm": 161.63510131835938, "learning_rate": 1.2810290456431536e-05, "loss": 0.5297, "step": 21671 }, { "epoch": 17.9850622406639, "grad_norm": 23.223249435424805, "learning_rate": 1.2809958506224067e-05, "loss": 0.5557, "step": 21672 }, { "epoch": 17.985892116182573, "grad_norm": 55.317501068115234, "learning_rate": 1.2809626556016599e-05, "loss": 0.7845, "step": 21673 }, { "epoch": 17.986721991701245, "grad_norm": 20.861902236938477, "learning_rate": 1.2809294605809131e-05, "loss": 0.4819, "step": 21674 }, { "epoch": 17.987551867219917, "grad_norm": 32.302284240722656, "learning_rate": 1.280896265560166e-05, "loss": 0.6698, "step": 21675 }, { "epoch": 17.98838174273859, "grad_norm": 41.01325607299805, "learning_rate": 1.2808630705394192e-05, "loss": 0.7355, "step": 21676 }, { "epoch": 17.98921161825726, "grad_norm": 26.412080764770508, "learning_rate": 1.2808298755186724e-05, "loss": 0.3727, "step": 21677 }, { "epoch": 17.990041493775934, "grad_norm": 76.91996002197266, "learning_rate": 1.2807966804979254e-05, "loss": 0.5948, "step": 21678 }, { "epoch": 17.990871369294606, "grad_norm": 30.658916473388672, "learning_rate": 1.2807634854771785e-05, "loss": 0.4173, "step": 21679 }, { "epoch": 17.991701244813278, "grad_norm": 115.9312973022461, "learning_rate": 1.2807302904564317e-05, "loss": 0.8023, "step": 21680 }, { "epoch": 17.99253112033195, "grad_norm": 48.18496322631836, "learning_rate": 1.2806970954356847e-05, "loss": 0.9181, "step": 21681 }, { "epoch": 17.993360995850622, "grad_norm": 35.654937744140625, "learning_rate": 1.280663900414938e-05, "loss": 0.8327, "step": 21682 }, { "epoch": 17.994190871369295, "grad_norm": 83.2281265258789, "learning_rate": 1.2806307053941908e-05, "loss": 0.9889, "step": 21683 }, { "epoch": 17.995020746887967, "grad_norm": 36.14139938354492, "learning_rate": 1.280597510373444e-05, "loss": 0.7005, "step": 21684 }, { "epoch": 17.99585062240664, "grad_norm": 25.934654235839844, "learning_rate": 1.2805643153526972e-05, "loss": 0.5339, "step": 21685 }, { "epoch": 17.99668049792531, "grad_norm": 32.22114944458008, "learning_rate": 1.2805311203319504e-05, "loss": 0.8962, "step": 21686 }, { "epoch": 17.997510373443983, "grad_norm": 119.9889144897461, "learning_rate": 1.2804979253112033e-05, "loss": 2.0382, "step": 21687 }, { "epoch": 17.998340248962656, "grad_norm": 57.722957611083984, "learning_rate": 1.2804647302904565e-05, "loss": 0.9837, "step": 21688 }, { "epoch": 17.999170124481328, "grad_norm": 39.15982437133789, "learning_rate": 1.2804315352697097e-05, "loss": 0.5934, "step": 21689 }, { "epoch": 18.0, "grad_norm": 43.29914855957031, "learning_rate": 1.2803983402489628e-05, "loss": 0.6141, "step": 21690 }, { "epoch": 18.000829875518672, "grad_norm": 27.47710418701172, "learning_rate": 1.280365145228216e-05, "loss": 0.833, "step": 21691 }, { "epoch": 18.001659751037344, "grad_norm": 11.515726089477539, "learning_rate": 1.2803319502074689e-05, "loss": 0.2415, "step": 21692 }, { "epoch": 18.002489626556017, "grad_norm": 31.281999588012695, "learning_rate": 1.280298755186722e-05, "loss": 0.4645, "step": 21693 }, { "epoch": 18.00331950207469, "grad_norm": 22.20561408996582, "learning_rate": 1.2802655601659753e-05, "loss": 0.3743, "step": 21694 }, { "epoch": 18.00414937759336, "grad_norm": 96.76493835449219, "learning_rate": 1.2802323651452285e-05, "loss": 0.4512, "step": 21695 }, { "epoch": 18.004979253112033, "grad_norm": 27.721515655517578, "learning_rate": 1.2801991701244814e-05, "loss": 0.563, "step": 21696 }, { "epoch": 18.005809128630705, "grad_norm": 34.467926025390625, "learning_rate": 1.2801659751037346e-05, "loss": 1.3292, "step": 21697 }, { "epoch": 18.006639004149378, "grad_norm": 29.380844116210938, "learning_rate": 1.2801327800829876e-05, "loss": 0.4996, "step": 21698 }, { "epoch": 18.00746887966805, "grad_norm": 34.93477249145508, "learning_rate": 1.2800995850622408e-05, "loss": 0.7206, "step": 21699 }, { "epoch": 18.008298755186722, "grad_norm": 49.94742965698242, "learning_rate": 1.2800663900414939e-05, "loss": 0.951, "step": 21700 }, { "epoch": 18.009128630705394, "grad_norm": 28.21578598022461, "learning_rate": 1.2800331950207469e-05, "loss": 0.5227, "step": 21701 }, { "epoch": 18.009958506224066, "grad_norm": 61.5879020690918, "learning_rate": 1.2800000000000001e-05, "loss": 0.8249, "step": 21702 }, { "epoch": 18.01078838174274, "grad_norm": 39.462528228759766, "learning_rate": 1.2799668049792533e-05, "loss": 0.7601, "step": 21703 }, { "epoch": 18.01161825726141, "grad_norm": 144.69239807128906, "learning_rate": 1.2799336099585062e-05, "loss": 0.6438, "step": 21704 }, { "epoch": 18.012448132780083, "grad_norm": 55.08936309814453, "learning_rate": 1.2799004149377594e-05, "loss": 1.1418, "step": 21705 }, { "epoch": 18.013278008298755, "grad_norm": 35.44489288330078, "learning_rate": 1.2798672199170126e-05, "loss": 0.3364, "step": 21706 }, { "epoch": 18.014107883817427, "grad_norm": 41.931068420410156, "learning_rate": 1.2798340248962657e-05, "loss": 0.737, "step": 21707 }, { "epoch": 18.0149377593361, "grad_norm": 35.4659423828125, "learning_rate": 1.2798008298755187e-05, "loss": 0.6457, "step": 21708 }, { "epoch": 18.01576763485477, "grad_norm": 20.261987686157227, "learning_rate": 1.2797676348547719e-05, "loss": 0.4306, "step": 21709 }, { "epoch": 18.016597510373444, "grad_norm": 45.625911712646484, "learning_rate": 1.279734439834025e-05, "loss": 0.5988, "step": 21710 }, { "epoch": 18.017427385892116, "grad_norm": 40.30323791503906, "learning_rate": 1.2797012448132782e-05, "loss": 0.816, "step": 21711 }, { "epoch": 18.01825726141079, "grad_norm": 43.77588653564453, "learning_rate": 1.2796680497925312e-05, "loss": 0.6662, "step": 21712 }, { "epoch": 18.01908713692946, "grad_norm": 25.498517990112305, "learning_rate": 1.2796348547717842e-05, "loss": 0.495, "step": 21713 }, { "epoch": 18.019917012448133, "grad_norm": 46.05558776855469, "learning_rate": 1.2796016597510375e-05, "loss": 0.4536, "step": 21714 }, { "epoch": 18.020746887966805, "grad_norm": 14.789251327514648, "learning_rate": 1.2795684647302907e-05, "loss": 0.3116, "step": 21715 }, { "epoch": 18.021576763485477, "grad_norm": 26.602638244628906, "learning_rate": 1.2795352697095437e-05, "loss": 0.3907, "step": 21716 }, { "epoch": 18.02240663900415, "grad_norm": 20.142229080200195, "learning_rate": 1.2795020746887967e-05, "loss": 0.2079, "step": 21717 }, { "epoch": 18.02323651452282, "grad_norm": 32.95012664794922, "learning_rate": 1.27946887966805e-05, "loss": 0.7271, "step": 21718 }, { "epoch": 18.024066390041494, "grad_norm": 121.54371643066406, "learning_rate": 1.279435684647303e-05, "loss": 0.8931, "step": 21719 }, { "epoch": 18.024896265560166, "grad_norm": 61.01438903808594, "learning_rate": 1.2794024896265562e-05, "loss": 0.9659, "step": 21720 }, { "epoch": 18.025726141078838, "grad_norm": 40.50257873535156, "learning_rate": 1.279369294605809e-05, "loss": 1.1423, "step": 21721 }, { "epoch": 18.02655601659751, "grad_norm": 19.022615432739258, "learning_rate": 1.2793360995850623e-05, "loss": 0.2832, "step": 21722 }, { "epoch": 18.027385892116182, "grad_norm": 83.0019302368164, "learning_rate": 1.2793029045643155e-05, "loss": 0.7713, "step": 21723 }, { "epoch": 18.028215767634855, "grad_norm": 110.87460327148438, "learning_rate": 1.2792697095435687e-05, "loss": 1.1787, "step": 21724 }, { "epoch": 18.029045643153527, "grad_norm": 29.607948303222656, "learning_rate": 1.2792365145228216e-05, "loss": 0.8068, "step": 21725 }, { "epoch": 18.0298755186722, "grad_norm": 32.584739685058594, "learning_rate": 1.2792033195020748e-05, "loss": 0.6791, "step": 21726 }, { "epoch": 18.03070539419087, "grad_norm": 41.0217399597168, "learning_rate": 1.279170124481328e-05, "loss": 0.5988, "step": 21727 }, { "epoch": 18.031535269709543, "grad_norm": 5.7236552238464355, "learning_rate": 1.279136929460581e-05, "loss": 0.1949, "step": 21728 }, { "epoch": 18.032365145228216, "grad_norm": 16.137338638305664, "learning_rate": 1.2791037344398341e-05, "loss": 0.3107, "step": 21729 }, { "epoch": 18.033195020746888, "grad_norm": 105.94750213623047, "learning_rate": 1.2790705394190871e-05, "loss": 1.0704, "step": 21730 }, { "epoch": 18.03402489626556, "grad_norm": 80.62234497070312, "learning_rate": 1.2790373443983403e-05, "loss": 0.4281, "step": 21731 }, { "epoch": 18.034854771784232, "grad_norm": 66.91960144042969, "learning_rate": 1.2790041493775936e-05, "loss": 0.7572, "step": 21732 }, { "epoch": 18.035684647302904, "grad_norm": 137.98809814453125, "learning_rate": 1.2789709543568464e-05, "loss": 0.9264, "step": 21733 }, { "epoch": 18.036514522821577, "grad_norm": 38.05413818359375, "learning_rate": 1.2789377593360996e-05, "loss": 0.356, "step": 21734 }, { "epoch": 18.03734439834025, "grad_norm": 40.73760986328125, "learning_rate": 1.2789045643153528e-05, "loss": 0.8442, "step": 21735 }, { "epoch": 18.03817427385892, "grad_norm": 23.78729820251465, "learning_rate": 1.278871369294606e-05, "loss": 0.365, "step": 21736 }, { "epoch": 18.039004149377593, "grad_norm": 24.284286499023438, "learning_rate": 1.278838174273859e-05, "loss": 0.2813, "step": 21737 }, { "epoch": 18.039834024896265, "grad_norm": 136.80368041992188, "learning_rate": 1.2788049792531121e-05, "loss": 0.8636, "step": 21738 }, { "epoch": 18.040663900414938, "grad_norm": 16.60312271118164, "learning_rate": 1.2787717842323652e-05, "loss": 0.2874, "step": 21739 }, { "epoch": 18.04149377593361, "grad_norm": 36.99807357788086, "learning_rate": 1.2787385892116184e-05, "loss": 0.5504, "step": 21740 }, { "epoch": 18.042323651452282, "grad_norm": 56.16712951660156, "learning_rate": 1.2787053941908714e-05, "loss": 0.7945, "step": 21741 }, { "epoch": 18.043153526970954, "grad_norm": 27.323884963989258, "learning_rate": 1.2786721991701245e-05, "loss": 0.5412, "step": 21742 }, { "epoch": 18.043983402489626, "grad_norm": 33.50778579711914, "learning_rate": 1.2786390041493777e-05, "loss": 0.6518, "step": 21743 }, { "epoch": 18.0448132780083, "grad_norm": 56.27691650390625, "learning_rate": 1.2786058091286309e-05, "loss": 0.8795, "step": 21744 }, { "epoch": 18.04564315352697, "grad_norm": 61.36296844482422, "learning_rate": 1.2785726141078841e-05, "loss": 0.6878, "step": 21745 }, { "epoch": 18.046473029045643, "grad_norm": 24.279043197631836, "learning_rate": 1.278539419087137e-05, "loss": 0.4383, "step": 21746 }, { "epoch": 18.047302904564315, "grad_norm": 30.34923553466797, "learning_rate": 1.2785062240663902e-05, "loss": 0.7605, "step": 21747 }, { "epoch": 18.048132780082987, "grad_norm": 77.10686492919922, "learning_rate": 1.2784730290456432e-05, "loss": 0.7609, "step": 21748 }, { "epoch": 18.04896265560166, "grad_norm": 24.754810333251953, "learning_rate": 1.2784398340248964e-05, "loss": 0.2723, "step": 21749 }, { "epoch": 18.04979253112033, "grad_norm": 37.74574279785156, "learning_rate": 1.2784066390041495e-05, "loss": 0.3836, "step": 21750 }, { "epoch": 18.050622406639004, "grad_norm": 26.76319122314453, "learning_rate": 1.2783734439834025e-05, "loss": 0.3123, "step": 21751 }, { "epoch": 18.051452282157676, "grad_norm": 33.040706634521484, "learning_rate": 1.2783402489626557e-05, "loss": 0.7341, "step": 21752 }, { "epoch": 18.05228215767635, "grad_norm": 61.63425064086914, "learning_rate": 1.278307053941909e-05, "loss": 0.7688, "step": 21753 }, { "epoch": 18.05311203319502, "grad_norm": 35.87261199951172, "learning_rate": 1.2782738589211618e-05, "loss": 0.4954, "step": 21754 }, { "epoch": 18.053941908713693, "grad_norm": 41.61762237548828, "learning_rate": 1.278240663900415e-05, "loss": 0.7707, "step": 21755 }, { "epoch": 18.054771784232365, "grad_norm": 33.141319274902344, "learning_rate": 1.2782074688796682e-05, "loss": 0.7725, "step": 21756 }, { "epoch": 18.055601659751037, "grad_norm": 37.39798355102539, "learning_rate": 1.2781742738589213e-05, "loss": 0.436, "step": 21757 }, { "epoch": 18.05643153526971, "grad_norm": 42.45672607421875, "learning_rate": 1.2781410788381743e-05, "loss": 0.6955, "step": 21758 }, { "epoch": 18.05726141078838, "grad_norm": 77.45443725585938, "learning_rate": 1.2781078838174275e-05, "loss": 0.6499, "step": 21759 }, { "epoch": 18.058091286307054, "grad_norm": 20.779319763183594, "learning_rate": 1.2780746887966806e-05, "loss": 0.3366, "step": 21760 }, { "epoch": 18.058921161825726, "grad_norm": 39.5081672668457, "learning_rate": 1.2780414937759338e-05, "loss": 0.4352, "step": 21761 }, { "epoch": 18.059751037344398, "grad_norm": 37.9312858581543, "learning_rate": 1.2780082987551867e-05, "loss": 0.5156, "step": 21762 }, { "epoch": 18.06058091286307, "grad_norm": 34.899471282958984, "learning_rate": 1.2779751037344399e-05, "loss": 0.277, "step": 21763 }, { "epoch": 18.061410788381743, "grad_norm": 65.31411743164062, "learning_rate": 1.277941908713693e-05, "loss": 0.6712, "step": 21764 }, { "epoch": 18.062240663900415, "grad_norm": 60.367008209228516, "learning_rate": 1.2779087136929463e-05, "loss": 0.7116, "step": 21765 }, { "epoch": 18.063070539419087, "grad_norm": 21.569019317626953, "learning_rate": 1.2778755186721992e-05, "loss": 0.4266, "step": 21766 }, { "epoch": 18.06390041493776, "grad_norm": 18.73630142211914, "learning_rate": 1.2778423236514524e-05, "loss": 0.2836, "step": 21767 }, { "epoch": 18.06473029045643, "grad_norm": 37.262760162353516, "learning_rate": 1.2778091286307054e-05, "loss": 0.4268, "step": 21768 }, { "epoch": 18.065560165975104, "grad_norm": 46.6523323059082, "learning_rate": 1.2777759336099586e-05, "loss": 0.9289, "step": 21769 }, { "epoch": 18.066390041493776, "grad_norm": 32.13640213012695, "learning_rate": 1.2777427385892118e-05, "loss": 0.3115, "step": 21770 }, { "epoch": 18.067219917012448, "grad_norm": 82.3825912475586, "learning_rate": 1.2777095435684647e-05, "loss": 0.5133, "step": 21771 }, { "epoch": 18.06804979253112, "grad_norm": 44.391273498535156, "learning_rate": 1.277676348547718e-05, "loss": 0.4611, "step": 21772 }, { "epoch": 18.068879668049792, "grad_norm": 17.13840675354004, "learning_rate": 1.2776431535269711e-05, "loss": 0.2457, "step": 21773 }, { "epoch": 18.069709543568464, "grad_norm": 104.71781158447266, "learning_rate": 1.2776099585062243e-05, "loss": 1.0537, "step": 21774 }, { "epoch": 18.070539419087137, "grad_norm": 38.30485153198242, "learning_rate": 1.2775767634854772e-05, "loss": 0.6059, "step": 21775 }, { "epoch": 18.07136929460581, "grad_norm": 32.537960052490234, "learning_rate": 1.2775435684647304e-05, "loss": 0.4729, "step": 21776 }, { "epoch": 18.07219917012448, "grad_norm": 54.21660614013672, "learning_rate": 1.2775103734439835e-05, "loss": 1.0894, "step": 21777 }, { "epoch": 18.073029045643153, "grad_norm": 32.9851188659668, "learning_rate": 1.2774771784232367e-05, "loss": 0.431, "step": 21778 }, { "epoch": 18.073858921161825, "grad_norm": 20.010168075561523, "learning_rate": 1.2774439834024897e-05, "loss": 0.3157, "step": 21779 }, { "epoch": 18.074688796680498, "grad_norm": 24.07271957397461, "learning_rate": 1.2774107883817428e-05, "loss": 0.3453, "step": 21780 }, { "epoch": 18.07551867219917, "grad_norm": 15.055073738098145, "learning_rate": 1.277377593360996e-05, "loss": 0.3278, "step": 21781 }, { "epoch": 18.076348547717842, "grad_norm": 84.94273376464844, "learning_rate": 1.2773443983402492e-05, "loss": 1.0327, "step": 21782 }, { "epoch": 18.077178423236514, "grad_norm": 28.95423126220703, "learning_rate": 1.277311203319502e-05, "loss": 0.4863, "step": 21783 }, { "epoch": 18.078008298755186, "grad_norm": 41.68378829956055, "learning_rate": 1.2772780082987553e-05, "loss": 0.5896, "step": 21784 }, { "epoch": 18.07883817427386, "grad_norm": 61.463035583496094, "learning_rate": 1.2772448132780085e-05, "loss": 0.5387, "step": 21785 }, { "epoch": 18.07966804979253, "grad_norm": 22.1193790435791, "learning_rate": 1.2772116182572615e-05, "loss": 0.4042, "step": 21786 }, { "epoch": 18.080497925311203, "grad_norm": 109.13855743408203, "learning_rate": 1.2771784232365146e-05, "loss": 0.8503, "step": 21787 }, { "epoch": 18.081327800829875, "grad_norm": 56.5266227722168, "learning_rate": 1.2771452282157678e-05, "loss": 0.7253, "step": 21788 }, { "epoch": 18.082157676348547, "grad_norm": 21.831384658813477, "learning_rate": 1.2771120331950208e-05, "loss": 0.4797, "step": 21789 }, { "epoch": 18.08298755186722, "grad_norm": 48.62618637084961, "learning_rate": 1.277078838174274e-05, "loss": 0.2844, "step": 21790 }, { "epoch": 18.083817427385892, "grad_norm": 25.96116828918457, "learning_rate": 1.2770456431535269e-05, "loss": 0.5817, "step": 21791 }, { "epoch": 18.084647302904564, "grad_norm": 46.51464080810547, "learning_rate": 1.2770124481327801e-05, "loss": 0.6917, "step": 21792 }, { "epoch": 18.085477178423236, "grad_norm": 38.53782272338867, "learning_rate": 1.2769792531120333e-05, "loss": 0.5901, "step": 21793 }, { "epoch": 18.08630705394191, "grad_norm": 34.71221923828125, "learning_rate": 1.2769460580912865e-05, "loss": 1.1329, "step": 21794 }, { "epoch": 18.08713692946058, "grad_norm": 101.09912872314453, "learning_rate": 1.2769128630705396e-05, "loss": 0.6029, "step": 21795 }, { "epoch": 18.087966804979253, "grad_norm": 28.12273406982422, "learning_rate": 1.2768796680497926e-05, "loss": 0.4103, "step": 21796 }, { "epoch": 18.088796680497925, "grad_norm": 35.68085479736328, "learning_rate": 1.2768464730290458e-05, "loss": 0.9318, "step": 21797 }, { "epoch": 18.089626556016597, "grad_norm": 11.340202331542969, "learning_rate": 1.2768132780082989e-05, "loss": 0.2524, "step": 21798 }, { "epoch": 18.09045643153527, "grad_norm": 61.26094055175781, "learning_rate": 1.276780082987552e-05, "loss": 0.7565, "step": 21799 }, { "epoch": 18.09128630705394, "grad_norm": 36.39967727661133, "learning_rate": 1.276746887966805e-05, "loss": 0.5404, "step": 21800 }, { "epoch": 18.092116182572614, "grad_norm": NaN, "learning_rate": 1.276746887966805e-05, "loss": 0.7237, "step": 21801 }, { "epoch": 18.092946058091286, "grad_norm": 22.865232467651367, "learning_rate": 1.2767136929460582e-05, "loss": 0.652, "step": 21802 }, { "epoch": 18.093775933609958, "grad_norm": 51.980690002441406, "learning_rate": 1.2766804979253114e-05, "loss": 0.3217, "step": 21803 }, { "epoch": 18.09460580912863, "grad_norm": 27.65752410888672, "learning_rate": 1.2766473029045646e-05, "loss": 0.2224, "step": 21804 }, { "epoch": 18.095435684647303, "grad_norm": 31.93718910217285, "learning_rate": 1.2766141078838174e-05, "loss": 0.4997, "step": 21805 }, { "epoch": 18.096265560165975, "grad_norm": 64.00515747070312, "learning_rate": 1.2765809128630707e-05, "loss": 0.485, "step": 21806 }, { "epoch": 18.097095435684647, "grad_norm": 66.57514190673828, "learning_rate": 1.2765477178423239e-05, "loss": 0.9937, "step": 21807 }, { "epoch": 18.09792531120332, "grad_norm": 49.65084457397461, "learning_rate": 1.2765145228215769e-05, "loss": 1.247, "step": 21808 }, { "epoch": 18.09875518672199, "grad_norm": 78.51302337646484, "learning_rate": 1.27648132780083e-05, "loss": 0.3456, "step": 21809 }, { "epoch": 18.099585062240664, "grad_norm": 23.717870712280273, "learning_rate": 1.276448132780083e-05, "loss": 0.3754, "step": 21810 }, { "epoch": 18.100414937759336, "grad_norm": 26.603878021240234, "learning_rate": 1.2764149377593362e-05, "loss": 0.6313, "step": 21811 }, { "epoch": 18.101244813278008, "grad_norm": 207.37522888183594, "learning_rate": 1.2763817427385894e-05, "loss": 0.8767, "step": 21812 }, { "epoch": 18.10207468879668, "grad_norm": 59.8862190246582, "learning_rate": 1.2763485477178423e-05, "loss": 0.8298, "step": 21813 }, { "epoch": 18.102904564315352, "grad_norm": 61.720619201660156, "learning_rate": 1.2763153526970955e-05, "loss": 0.9718, "step": 21814 }, { "epoch": 18.103734439834025, "grad_norm": 30.617719650268555, "learning_rate": 1.2762821576763487e-05, "loss": 0.8851, "step": 21815 }, { "epoch": 18.104564315352697, "grad_norm": 63.604522705078125, "learning_rate": 1.2762489626556017e-05, "loss": 0.6723, "step": 21816 }, { "epoch": 18.10539419087137, "grad_norm": 144.12908935546875, "learning_rate": 1.2762157676348548e-05, "loss": 0.6123, "step": 21817 }, { "epoch": 18.10622406639004, "grad_norm": 42.02265548706055, "learning_rate": 1.276182572614108e-05, "loss": 0.5904, "step": 21818 }, { "epoch": 18.107053941908713, "grad_norm": 74.19273376464844, "learning_rate": 1.276149377593361e-05, "loss": 0.913, "step": 21819 }, { "epoch": 18.107883817427386, "grad_norm": 32.30306625366211, "learning_rate": 1.2761161825726143e-05, "loss": 0.9727, "step": 21820 }, { "epoch": 18.108713692946058, "grad_norm": 22.7593994140625, "learning_rate": 1.2760829875518673e-05, "loss": 0.3604, "step": 21821 }, { "epoch": 18.10954356846473, "grad_norm": 107.70619201660156, "learning_rate": 1.2760497925311203e-05, "loss": 1.6392, "step": 21822 }, { "epoch": 18.110373443983402, "grad_norm": 108.8605728149414, "learning_rate": 1.2760165975103735e-05, "loss": 0.4665, "step": 21823 }, { "epoch": 18.111203319502074, "grad_norm": 21.676618576049805, "learning_rate": 1.2759834024896268e-05, "loss": 0.2129, "step": 21824 }, { "epoch": 18.112033195020746, "grad_norm": 50.21567916870117, "learning_rate": 1.2759502074688798e-05, "loss": 0.7156, "step": 21825 }, { "epoch": 18.11286307053942, "grad_norm": 89.3456802368164, "learning_rate": 1.2759170124481328e-05, "loss": 0.7282, "step": 21826 }, { "epoch": 18.11369294605809, "grad_norm": 38.60289764404297, "learning_rate": 1.275883817427386e-05, "loss": 0.6059, "step": 21827 }, { "epoch": 18.114522821576763, "grad_norm": 35.3555908203125, "learning_rate": 1.2758506224066391e-05, "loss": 0.4583, "step": 21828 }, { "epoch": 18.115352697095435, "grad_norm": 17.49538803100586, "learning_rate": 1.2758174273858923e-05, "loss": 0.3963, "step": 21829 }, { "epoch": 18.116182572614107, "grad_norm": 55.290809631347656, "learning_rate": 1.2757842323651453e-05, "loss": 0.9343, "step": 21830 }, { "epoch": 18.11701244813278, "grad_norm": 11.216927528381348, "learning_rate": 1.2757510373443984e-05, "loss": 0.262, "step": 21831 }, { "epoch": 18.117842323651452, "grad_norm": 50.787349700927734, "learning_rate": 1.2757178423236516e-05, "loss": 0.8774, "step": 21832 }, { "epoch": 18.118672199170124, "grad_norm": 62.92486572265625, "learning_rate": 1.2756846473029048e-05, "loss": 0.7638, "step": 21833 }, { "epoch": 18.119502074688796, "grad_norm": 97.4581069946289, "learning_rate": 1.2756514522821577e-05, "loss": 1.1772, "step": 21834 }, { "epoch": 18.12033195020747, "grad_norm": 50.93906784057617, "learning_rate": 1.2756182572614109e-05, "loss": 0.4739, "step": 21835 }, { "epoch": 18.12116182572614, "grad_norm": 20.42996597290039, "learning_rate": 1.2755850622406641e-05, "loss": 0.3347, "step": 21836 }, { "epoch": 18.121991701244813, "grad_norm": 65.06092834472656, "learning_rate": 1.2755518672199171e-05, "loss": 0.5422, "step": 21837 }, { "epoch": 18.122821576763485, "grad_norm": 55.26713943481445, "learning_rate": 1.2755186721991702e-05, "loss": 0.8532, "step": 21838 }, { "epoch": 18.123651452282157, "grad_norm": 58.7275390625, "learning_rate": 1.2754854771784232e-05, "loss": 0.716, "step": 21839 }, { "epoch": 18.12448132780083, "grad_norm": 10.953634262084961, "learning_rate": 1.2754522821576764e-05, "loss": 0.3291, "step": 21840 }, { "epoch": 18.1253112033195, "grad_norm": 71.52224731445312, "learning_rate": 1.2754190871369296e-05, "loss": 0.659, "step": 21841 }, { "epoch": 18.126141078838174, "grad_norm": 40.19197082519531, "learning_rate": 1.2753858921161825e-05, "loss": 1.0462, "step": 21842 }, { "epoch": 18.126970954356846, "grad_norm": 21.41324806213379, "learning_rate": 1.2753526970954357e-05, "loss": 0.4867, "step": 21843 }, { "epoch": 18.127800829875518, "grad_norm": 27.593212127685547, "learning_rate": 1.275319502074689e-05, "loss": 0.4009, "step": 21844 }, { "epoch": 18.12863070539419, "grad_norm": 82.68075561523438, "learning_rate": 1.2752863070539421e-05, "loss": 0.7277, "step": 21845 }, { "epoch": 18.129460580912863, "grad_norm": 46.854835510253906, "learning_rate": 1.275253112033195e-05, "loss": 0.9773, "step": 21846 }, { "epoch": 18.130290456431535, "grad_norm": 50.144989013671875, "learning_rate": 1.2752199170124482e-05, "loss": 0.4472, "step": 21847 }, { "epoch": 18.131120331950207, "grad_norm": 133.5869903564453, "learning_rate": 1.2751867219917013e-05, "loss": 0.8292, "step": 21848 }, { "epoch": 18.13195020746888, "grad_norm": 20.18417739868164, "learning_rate": 1.2751535269709545e-05, "loss": 0.4197, "step": 21849 }, { "epoch": 18.13278008298755, "grad_norm": 31.873571395874023, "learning_rate": 1.2751203319502077e-05, "loss": 0.5603, "step": 21850 }, { "epoch": 18.133609958506224, "grad_norm": 30.140567779541016, "learning_rate": 1.2750871369294606e-05, "loss": 0.4108, "step": 21851 }, { "epoch": 18.134439834024896, "grad_norm": 37.1334342956543, "learning_rate": 1.2750539419087138e-05, "loss": 0.7155, "step": 21852 }, { "epoch": 18.135269709543568, "grad_norm": 118.48411560058594, "learning_rate": 1.275020746887967e-05, "loss": 0.4585, "step": 21853 }, { "epoch": 18.13609958506224, "grad_norm": 13.515458106994629, "learning_rate": 1.2749875518672202e-05, "loss": 0.3657, "step": 21854 }, { "epoch": 18.136929460580912, "grad_norm": 39.30359649658203, "learning_rate": 1.274954356846473e-05, "loss": 0.7712, "step": 21855 }, { "epoch": 18.137759336099585, "grad_norm": 23.108654022216797, "learning_rate": 1.2749211618257263e-05, "loss": 0.4476, "step": 21856 }, { "epoch": 18.138589211618257, "grad_norm": 67.97064971923828, "learning_rate": 1.2748879668049793e-05, "loss": 0.8501, "step": 21857 }, { "epoch": 18.13941908713693, "grad_norm": 60.97514343261719, "learning_rate": 1.2748547717842325e-05, "loss": 1.2136, "step": 21858 }, { "epoch": 18.1402489626556, "grad_norm": 19.19842529296875, "learning_rate": 1.2748215767634856e-05, "loss": 0.3514, "step": 21859 }, { "epoch": 18.141078838174273, "grad_norm": 33.575355529785156, "learning_rate": 1.2747883817427386e-05, "loss": 0.3569, "step": 21860 }, { "epoch": 18.141908713692946, "grad_norm": 35.40448760986328, "learning_rate": 1.2747551867219918e-05, "loss": 0.6364, "step": 21861 }, { "epoch": 18.142738589211618, "grad_norm": 74.76557922363281, "learning_rate": 1.274721991701245e-05, "loss": 0.7901, "step": 21862 }, { "epoch": 18.14356846473029, "grad_norm": 107.7401123046875, "learning_rate": 1.2746887966804979e-05, "loss": 1.6038, "step": 21863 }, { "epoch": 18.144398340248962, "grad_norm": 25.981359481811523, "learning_rate": 1.2746556016597511e-05, "loss": 0.3745, "step": 21864 }, { "epoch": 18.145228215767634, "grad_norm": 50.823463439941406, "learning_rate": 1.2746224066390043e-05, "loss": 0.5823, "step": 21865 }, { "epoch": 18.146058091286307, "grad_norm": 112.25994110107422, "learning_rate": 1.2745892116182574e-05, "loss": 0.6034, "step": 21866 }, { "epoch": 18.14688796680498, "grad_norm": 32.82072067260742, "learning_rate": 1.2745560165975104e-05, "loss": 0.4831, "step": 21867 }, { "epoch": 18.14771784232365, "grad_norm": 75.19132232666016, "learning_rate": 1.2745228215767636e-05, "loss": 0.7586, "step": 21868 }, { "epoch": 18.148547717842323, "grad_norm": 137.39503479003906, "learning_rate": 1.2744896265560167e-05, "loss": 1.2115, "step": 21869 }, { "epoch": 18.149377593360995, "grad_norm": 37.530601501464844, "learning_rate": 1.2744564315352699e-05, "loss": 0.593, "step": 21870 }, { "epoch": 18.150207468879668, "grad_norm": 25.614227294921875, "learning_rate": 1.2744232365145227e-05, "loss": 0.6229, "step": 21871 }, { "epoch": 18.15103734439834, "grad_norm": 37.73643493652344, "learning_rate": 1.274390041493776e-05, "loss": 1.0686, "step": 21872 }, { "epoch": 18.151867219917012, "grad_norm": 25.598005294799805, "learning_rate": 1.2743568464730292e-05, "loss": 0.6145, "step": 21873 }, { "epoch": 18.152697095435684, "grad_norm": 61.24698257446289, "learning_rate": 1.2743236514522824e-05, "loss": 0.9825, "step": 21874 }, { "epoch": 18.153526970954356, "grad_norm": 19.854961395263672, "learning_rate": 1.2742904564315354e-05, "loss": 0.2651, "step": 21875 }, { "epoch": 18.15435684647303, "grad_norm": 76.58792114257812, "learning_rate": 1.2742572614107885e-05, "loss": 1.065, "step": 21876 }, { "epoch": 18.1551867219917, "grad_norm": 49.842864990234375, "learning_rate": 1.2742240663900417e-05, "loss": 0.7155, "step": 21877 }, { "epoch": 18.156016597510373, "grad_norm": 64.65132141113281, "learning_rate": 1.2741908713692947e-05, "loss": 0.6182, "step": 21878 }, { "epoch": 18.156846473029045, "grad_norm": 10.502608299255371, "learning_rate": 1.274157676348548e-05, "loss": 0.2961, "step": 21879 }, { "epoch": 18.157676348547717, "grad_norm": 16.301753997802734, "learning_rate": 1.2741244813278008e-05, "loss": 0.3897, "step": 21880 }, { "epoch": 18.15850622406639, "grad_norm": 77.50534057617188, "learning_rate": 1.274091286307054e-05, "loss": 0.9544, "step": 21881 }, { "epoch": 18.15933609958506, "grad_norm": 54.8571891784668, "learning_rate": 1.2740580912863072e-05, "loss": 0.5705, "step": 21882 }, { "epoch": 18.160165975103734, "grad_norm": 67.81861114501953, "learning_rate": 1.2740248962655604e-05, "loss": 0.4315, "step": 21883 }, { "epoch": 18.160995850622406, "grad_norm": 24.6351261138916, "learning_rate": 1.2739917012448133e-05, "loss": 0.5104, "step": 21884 }, { "epoch": 18.16182572614108, "grad_norm": 18.286296844482422, "learning_rate": 1.2739585062240665e-05, "loss": 0.3002, "step": 21885 }, { "epoch": 18.16265560165975, "grad_norm": 71.3202133178711, "learning_rate": 1.2739253112033196e-05, "loss": 0.5812, "step": 21886 }, { "epoch": 18.163485477178423, "grad_norm": 30.371685028076172, "learning_rate": 1.2738921161825728e-05, "loss": 0.4374, "step": 21887 }, { "epoch": 18.164315352697095, "grad_norm": 41.977962493896484, "learning_rate": 1.2738589211618258e-05, "loss": 0.7742, "step": 21888 }, { "epoch": 18.165145228215767, "grad_norm": 51.08816146850586, "learning_rate": 1.2738257261410788e-05, "loss": 0.5118, "step": 21889 }, { "epoch": 18.16597510373444, "grad_norm": 24.844165802001953, "learning_rate": 1.273792531120332e-05, "loss": 0.4246, "step": 21890 }, { "epoch": 18.16680497925311, "grad_norm": 66.28079986572266, "learning_rate": 1.2737593360995853e-05, "loss": 0.4901, "step": 21891 }, { "epoch": 18.167634854771784, "grad_norm": 74.39859008789062, "learning_rate": 1.2737261410788381e-05, "loss": 0.8388, "step": 21892 }, { "epoch": 18.168464730290456, "grad_norm": 27.063308715820312, "learning_rate": 1.2736929460580914e-05, "loss": 0.644, "step": 21893 }, { "epoch": 18.169294605809128, "grad_norm": 24.3509521484375, "learning_rate": 1.2736597510373446e-05, "loss": 0.3176, "step": 21894 }, { "epoch": 18.1701244813278, "grad_norm": 24.439559936523438, "learning_rate": 1.2736265560165976e-05, "loss": 0.3139, "step": 21895 }, { "epoch": 18.170954356846472, "grad_norm": 36.13871383666992, "learning_rate": 1.2735933609958506e-05, "loss": 0.7312, "step": 21896 }, { "epoch": 18.171784232365145, "grad_norm": 11.57420825958252, "learning_rate": 1.2735601659751039e-05, "loss": 0.361, "step": 21897 }, { "epoch": 18.172614107883817, "grad_norm": 79.78478240966797, "learning_rate": 1.2735269709543569e-05, "loss": 1.2257, "step": 21898 }, { "epoch": 18.17344398340249, "grad_norm": 66.19188690185547, "learning_rate": 1.2734937759336101e-05, "loss": 1.0373, "step": 21899 }, { "epoch": 18.17427385892116, "grad_norm": 41.984596252441406, "learning_rate": 1.273460580912863e-05, "loss": 0.5198, "step": 21900 }, { "epoch": 18.175103734439833, "grad_norm": 69.78650665283203, "learning_rate": 1.2734273858921162e-05, "loss": 0.7842, "step": 21901 }, { "epoch": 18.175933609958506, "grad_norm": 35.34950256347656, "learning_rate": 1.2733941908713694e-05, "loss": 1.1999, "step": 21902 }, { "epoch": 18.176763485477178, "grad_norm": 31.746978759765625, "learning_rate": 1.2733609958506226e-05, "loss": 0.4762, "step": 21903 }, { "epoch": 18.17759336099585, "grad_norm": 230.01954650878906, "learning_rate": 1.2733278008298757e-05, "loss": 0.4669, "step": 21904 }, { "epoch": 18.178423236514522, "grad_norm": 39.95161437988281, "learning_rate": 1.2732946058091287e-05, "loss": 0.8678, "step": 21905 }, { "epoch": 18.179253112033194, "grad_norm": 39.005393981933594, "learning_rate": 1.2732614107883819e-05, "loss": 1.0009, "step": 21906 }, { "epoch": 18.180082987551867, "grad_norm": 43.860374450683594, "learning_rate": 1.273228215767635e-05, "loss": 0.9111, "step": 21907 }, { "epoch": 18.18091286307054, "grad_norm": 40.85447692871094, "learning_rate": 1.2731950207468882e-05, "loss": 0.7232, "step": 21908 }, { "epoch": 18.18174273858921, "grad_norm": 42.65614700317383, "learning_rate": 1.273161825726141e-05, "loss": 0.9215, "step": 21909 }, { "epoch": 18.182572614107883, "grad_norm": 31.533443450927734, "learning_rate": 1.2731286307053942e-05, "loss": 0.4884, "step": 21910 }, { "epoch": 18.183402489626555, "grad_norm": 64.16384887695312, "learning_rate": 1.2730954356846475e-05, "loss": 0.5822, "step": 21911 }, { "epoch": 18.184232365145228, "grad_norm": 30.41524314880371, "learning_rate": 1.2730622406639007e-05, "loss": 0.8521, "step": 21912 }, { "epoch": 18.1850622406639, "grad_norm": 47.20539474487305, "learning_rate": 1.2730290456431535e-05, "loss": 0.4832, "step": 21913 }, { "epoch": 18.185892116182572, "grad_norm": 36.120262145996094, "learning_rate": 1.2729958506224067e-05, "loss": 0.8904, "step": 21914 }, { "epoch": 18.186721991701244, "grad_norm": 27.598093032836914, "learning_rate": 1.27296265560166e-05, "loss": 0.4951, "step": 21915 }, { "epoch": 18.187551867219916, "grad_norm": 26.754873275756836, "learning_rate": 1.272929460580913e-05, "loss": 0.4382, "step": 21916 }, { "epoch": 18.18838174273859, "grad_norm": 42.60979461669922, "learning_rate": 1.272896265560166e-05, "loss": 0.7842, "step": 21917 }, { "epoch": 18.18921161825726, "grad_norm": 19.319122314453125, "learning_rate": 1.272863070539419e-05, "loss": 0.3565, "step": 21918 }, { "epoch": 18.190041493775933, "grad_norm": 58.77855682373047, "learning_rate": 1.2728298755186723e-05, "loss": 1.287, "step": 21919 }, { "epoch": 18.190871369294605, "grad_norm": 55.888587951660156, "learning_rate": 1.2727966804979255e-05, "loss": 0.4169, "step": 21920 }, { "epoch": 18.191701244813277, "grad_norm": 49.15097427368164, "learning_rate": 1.2727634854771784e-05, "loss": 1.1433, "step": 21921 }, { "epoch": 18.19253112033195, "grad_norm": 50.47346496582031, "learning_rate": 1.2727302904564316e-05, "loss": 0.6698, "step": 21922 }, { "epoch": 18.19336099585062, "grad_norm": 29.558603286743164, "learning_rate": 1.2726970954356848e-05, "loss": 0.3797, "step": 21923 }, { "epoch": 18.194190871369294, "grad_norm": 47.2077522277832, "learning_rate": 1.272663900414938e-05, "loss": 0.5779, "step": 21924 }, { "epoch": 18.195020746887966, "grad_norm": 57.05439758300781, "learning_rate": 1.2726307053941909e-05, "loss": 0.6159, "step": 21925 }, { "epoch": 18.19585062240664, "grad_norm": 36.24587631225586, "learning_rate": 1.2725975103734441e-05, "loss": 0.5586, "step": 21926 }, { "epoch": 18.19668049792531, "grad_norm": 70.36026000976562, "learning_rate": 1.2725643153526971e-05, "loss": 0.869, "step": 21927 }, { "epoch": 18.197510373443983, "grad_norm": 21.682628631591797, "learning_rate": 1.2725311203319503e-05, "loss": 0.3514, "step": 21928 }, { "epoch": 18.198340248962655, "grad_norm": 49.86571502685547, "learning_rate": 1.2724979253112036e-05, "loss": 0.6173, "step": 21929 }, { "epoch": 18.199170124481327, "grad_norm": 33.667423248291016, "learning_rate": 1.2724647302904564e-05, "loss": 0.5474, "step": 21930 }, { "epoch": 18.2, "grad_norm": 34.72721481323242, "learning_rate": 1.2724315352697096e-05, "loss": 0.6927, "step": 21931 }, { "epoch": 18.20082987551867, "grad_norm": 29.114364624023438, "learning_rate": 1.2723983402489628e-05, "loss": 0.8446, "step": 21932 }, { "epoch": 18.201659751037344, "grad_norm": 34.60280990600586, "learning_rate": 1.2723651452282159e-05, "loss": 0.3867, "step": 21933 }, { "epoch": 18.202489626556016, "grad_norm": 36.37369918823242, "learning_rate": 1.272331950207469e-05, "loss": 0.731, "step": 21934 }, { "epoch": 18.203319502074688, "grad_norm": 65.14712524414062, "learning_rate": 1.2722987551867221e-05, "loss": 0.8864, "step": 21935 }, { "epoch": 18.20414937759336, "grad_norm": 45.7370491027832, "learning_rate": 1.2722655601659752e-05, "loss": 0.5921, "step": 21936 }, { "epoch": 18.204979253112032, "grad_norm": 57.99871063232422, "learning_rate": 1.2722323651452284e-05, "loss": 0.6488, "step": 21937 }, { "epoch": 18.205809128630705, "grad_norm": 20.36517906188965, "learning_rate": 1.2721991701244814e-05, "loss": 0.2543, "step": 21938 }, { "epoch": 18.206639004149377, "grad_norm": 73.54132080078125, "learning_rate": 1.2721659751037345e-05, "loss": 0.5531, "step": 21939 }, { "epoch": 18.20746887966805, "grad_norm": 27.125690460205078, "learning_rate": 1.2721327800829877e-05, "loss": 0.4389, "step": 21940 }, { "epoch": 18.20829875518672, "grad_norm": 20.635740280151367, "learning_rate": 1.2720995850622409e-05, "loss": 0.4298, "step": 21941 }, { "epoch": 18.209128630705393, "grad_norm": 49.209232330322266, "learning_rate": 1.2720663900414938e-05, "loss": 1.2852, "step": 21942 }, { "epoch": 18.209958506224066, "grad_norm": 22.820743560791016, "learning_rate": 1.272033195020747e-05, "loss": 0.5089, "step": 21943 }, { "epoch": 18.210788381742738, "grad_norm": 12.072704315185547, "learning_rate": 1.2720000000000002e-05, "loss": 0.2307, "step": 21944 }, { "epoch": 18.21161825726141, "grad_norm": 32.23005676269531, "learning_rate": 1.2719668049792532e-05, "loss": 0.4673, "step": 21945 }, { "epoch": 18.212448132780082, "grad_norm": 125.00062561035156, "learning_rate": 1.2719336099585063e-05, "loss": 0.477, "step": 21946 }, { "epoch": 18.213278008298754, "grad_norm": 16.551498413085938, "learning_rate": 1.2719004149377595e-05, "loss": 0.3959, "step": 21947 }, { "epoch": 18.214107883817427, "grad_norm": 33.57807922363281, "learning_rate": 1.2718672199170125e-05, "loss": 0.8539, "step": 21948 }, { "epoch": 18.2149377593361, "grad_norm": 134.73614501953125, "learning_rate": 1.2718340248962657e-05, "loss": 1.3005, "step": 21949 }, { "epoch": 18.21576763485477, "grad_norm": 95.2577896118164, "learning_rate": 1.2718008298755186e-05, "loss": 0.99, "step": 21950 }, { "epoch": 18.216597510373443, "grad_norm": 40.81562042236328, "learning_rate": 1.2717676348547718e-05, "loss": 0.4912, "step": 21951 }, { "epoch": 18.217427385892115, "grad_norm": 56.68117141723633, "learning_rate": 1.271734439834025e-05, "loss": 0.4282, "step": 21952 }, { "epoch": 18.218257261410788, "grad_norm": 42.528526306152344, "learning_rate": 1.2717012448132782e-05, "loss": 0.3986, "step": 21953 }, { "epoch": 18.21908713692946, "grad_norm": 43.883888244628906, "learning_rate": 1.2716680497925313e-05, "loss": 0.6967, "step": 21954 }, { "epoch": 18.219917012448132, "grad_norm": 50.13715744018555, "learning_rate": 1.2716348547717843e-05, "loss": 0.6212, "step": 21955 }, { "epoch": 18.220746887966804, "grad_norm": 20.707651138305664, "learning_rate": 1.2716016597510374e-05, "loss": 0.3155, "step": 21956 }, { "epoch": 18.221576763485476, "grad_norm": 47.70234298706055, "learning_rate": 1.2715684647302906e-05, "loss": 0.6355, "step": 21957 }, { "epoch": 18.22240663900415, "grad_norm": 37.53256607055664, "learning_rate": 1.2715352697095438e-05, "loss": 0.7283, "step": 21958 }, { "epoch": 18.22323651452282, "grad_norm": 62.424522399902344, "learning_rate": 1.2715020746887967e-05, "loss": 1.0589, "step": 21959 }, { "epoch": 18.224066390041493, "grad_norm": 25.53992462158203, "learning_rate": 1.2714688796680499e-05, "loss": 0.3723, "step": 21960 }, { "epoch": 18.224896265560165, "grad_norm": 13.501631736755371, "learning_rate": 1.271435684647303e-05, "loss": 0.2465, "step": 21961 }, { "epoch": 18.225726141078837, "grad_norm": 26.072980880737305, "learning_rate": 1.2714024896265563e-05, "loss": 0.4474, "step": 21962 }, { "epoch": 18.22655601659751, "grad_norm": 28.040311813354492, "learning_rate": 1.2713692946058092e-05, "loss": 0.4726, "step": 21963 }, { "epoch": 18.22738589211618, "grad_norm": 58.6143798828125, "learning_rate": 1.2713360995850624e-05, "loss": 0.8476, "step": 21964 }, { "epoch": 18.228215767634854, "grad_norm": 35.83305740356445, "learning_rate": 1.2713029045643154e-05, "loss": 0.4563, "step": 21965 }, { "epoch": 18.229045643153526, "grad_norm": 56.40015411376953, "learning_rate": 1.2712697095435686e-05, "loss": 0.4573, "step": 21966 }, { "epoch": 18.2298755186722, "grad_norm": 43.83913803100586, "learning_rate": 1.2712365145228217e-05, "loss": 0.4992, "step": 21967 }, { "epoch": 18.23070539419087, "grad_norm": 31.010242462158203, "learning_rate": 1.2712033195020747e-05, "loss": 0.4692, "step": 21968 }, { "epoch": 18.231535269709543, "grad_norm": 11.791433334350586, "learning_rate": 1.271170124481328e-05, "loss": 0.2492, "step": 21969 }, { "epoch": 18.232365145228215, "grad_norm": 37.24704360961914, "learning_rate": 1.2711369294605811e-05, "loss": 0.7182, "step": 21970 }, { "epoch": 18.233195020746887, "grad_norm": 49.08977508544922, "learning_rate": 1.271103734439834e-05, "loss": 0.8202, "step": 21971 }, { "epoch": 18.23402489626556, "grad_norm": 46.691131591796875, "learning_rate": 1.2710705394190872e-05, "loss": 0.4192, "step": 21972 }, { "epoch": 18.23485477178423, "grad_norm": 111.80005645751953, "learning_rate": 1.2710373443983404e-05, "loss": 1.1114, "step": 21973 }, { "epoch": 18.235684647302904, "grad_norm": 19.353599548339844, "learning_rate": 1.2710041493775935e-05, "loss": 0.3432, "step": 21974 }, { "epoch": 18.236514522821576, "grad_norm": 103.16567993164062, "learning_rate": 1.2709709543568465e-05, "loss": 0.9828, "step": 21975 }, { "epoch": 18.237344398340248, "grad_norm": 108.93740844726562, "learning_rate": 1.2709377593360997e-05, "loss": 1.1187, "step": 21976 }, { "epoch": 18.23817427385892, "grad_norm": 35.05026626586914, "learning_rate": 1.2709045643153528e-05, "loss": 0.7752, "step": 21977 }, { "epoch": 18.239004149377593, "grad_norm": 87.18988037109375, "learning_rate": 1.270871369294606e-05, "loss": 0.7004, "step": 21978 }, { "epoch": 18.239834024896265, "grad_norm": 81.56770324707031, "learning_rate": 1.2708381742738588e-05, "loss": 0.4575, "step": 21979 }, { "epoch": 18.240663900414937, "grad_norm": 28.280475616455078, "learning_rate": 1.270804979253112e-05, "loss": 0.9878, "step": 21980 }, { "epoch": 18.24149377593361, "grad_norm": 119.65003204345703, "learning_rate": 1.2707717842323653e-05, "loss": 0.5576, "step": 21981 }, { "epoch": 18.24232365145228, "grad_norm": 23.790857315063477, "learning_rate": 1.2707385892116185e-05, "loss": 0.4073, "step": 21982 }, { "epoch": 18.243153526970953, "grad_norm": 54.31703567504883, "learning_rate": 1.2707053941908715e-05, "loss": 0.9644, "step": 21983 }, { "epoch": 18.243983402489626, "grad_norm": 13.550278663635254, "learning_rate": 1.2706721991701246e-05, "loss": 0.3374, "step": 21984 }, { "epoch": 18.244813278008298, "grad_norm": 137.17083740234375, "learning_rate": 1.2706390041493778e-05, "loss": 0.4992, "step": 21985 }, { "epoch": 18.24564315352697, "grad_norm": 59.00633239746094, "learning_rate": 1.2706058091286308e-05, "loss": 0.5945, "step": 21986 }, { "epoch": 18.246473029045642, "grad_norm": 40.52093505859375, "learning_rate": 1.270572614107884e-05, "loss": 0.5082, "step": 21987 }, { "epoch": 18.247302904564314, "grad_norm": 48.01331329345703, "learning_rate": 1.2705394190871369e-05, "loss": 1.0017, "step": 21988 }, { "epoch": 18.248132780082987, "grad_norm": 42.97458267211914, "learning_rate": 1.2705062240663901e-05, "loss": 0.5611, "step": 21989 }, { "epoch": 18.24896265560166, "grad_norm": 31.176315307617188, "learning_rate": 1.2704730290456433e-05, "loss": 0.6779, "step": 21990 }, { "epoch": 18.24979253112033, "grad_norm": 45.11481857299805, "learning_rate": 1.2704398340248965e-05, "loss": 0.4628, "step": 21991 }, { "epoch": 18.250622406639003, "grad_norm": 54.5433235168457, "learning_rate": 1.2704066390041494e-05, "loss": 0.9122, "step": 21992 }, { "epoch": 18.251452282157675, "grad_norm": 73.57144927978516, "learning_rate": 1.2703734439834026e-05, "loss": 0.7788, "step": 21993 }, { "epoch": 18.252282157676348, "grad_norm": 43.72026062011719, "learning_rate": 1.2703402489626558e-05, "loss": 0.6611, "step": 21994 }, { "epoch": 18.25311203319502, "grad_norm": 28.706783294677734, "learning_rate": 1.2703070539419089e-05, "loss": 0.4821, "step": 21995 }, { "epoch": 18.253941908713692, "grad_norm": 42.3671875, "learning_rate": 1.2702738589211619e-05, "loss": 0.7091, "step": 21996 }, { "epoch": 18.254771784232364, "grad_norm": 37.141048431396484, "learning_rate": 1.270240663900415e-05, "loss": 0.6918, "step": 21997 }, { "epoch": 18.255601659751036, "grad_norm": 86.52622985839844, "learning_rate": 1.2702074688796681e-05, "loss": 0.6834, "step": 21998 }, { "epoch": 18.25643153526971, "grad_norm": 88.9013442993164, "learning_rate": 1.2701742738589214e-05, "loss": 0.8204, "step": 21999 }, { "epoch": 18.25726141078838, "grad_norm": 40.9168586730957, "learning_rate": 1.2701410788381742e-05, "loss": 0.9951, "step": 22000 }, { "epoch": 18.258091286307053, "grad_norm": 36.49919891357422, "learning_rate": 1.2701078838174274e-05, "loss": 0.6979, "step": 22001 }, { "epoch": 18.258921161825725, "grad_norm": 38.38949966430664, "learning_rate": 1.2700746887966807e-05, "loss": 0.8285, "step": 22002 }, { "epoch": 18.259751037344397, "grad_norm": 39.33864212036133, "learning_rate": 1.2700414937759337e-05, "loss": 0.9498, "step": 22003 }, { "epoch": 18.26058091286307, "grad_norm": 59.64594268798828, "learning_rate": 1.2700082987551867e-05, "loss": 0.2175, "step": 22004 }, { "epoch": 18.261410788381742, "grad_norm": 18.998884201049805, "learning_rate": 1.26997510373444e-05, "loss": 0.3475, "step": 22005 }, { "epoch": 18.262240663900414, "grad_norm": 22.8809814453125, "learning_rate": 1.269941908713693e-05, "loss": 0.5837, "step": 22006 }, { "epoch": 18.263070539419086, "grad_norm": 33.21395492553711, "learning_rate": 1.2699087136929462e-05, "loss": 0.2874, "step": 22007 }, { "epoch": 18.26390041493776, "grad_norm": 14.949251174926758, "learning_rate": 1.2698755186721994e-05, "loss": 0.3475, "step": 22008 }, { "epoch": 18.26473029045643, "grad_norm": 45.88800048828125, "learning_rate": 1.2698423236514523e-05, "loss": 1.2223, "step": 22009 }, { "epoch": 18.265560165975103, "grad_norm": 45.94751739501953, "learning_rate": 1.2698091286307055e-05, "loss": 1.0733, "step": 22010 }, { "epoch": 18.266390041493775, "grad_norm": 55.78507614135742, "learning_rate": 1.2697759336099587e-05, "loss": 0.2963, "step": 22011 }, { "epoch": 18.267219917012447, "grad_norm": 16.535362243652344, "learning_rate": 1.2697427385892117e-05, "loss": 0.3217, "step": 22012 }, { "epoch": 18.26804979253112, "grad_norm": 15.63805866241455, "learning_rate": 1.2697095435684648e-05, "loss": 0.2668, "step": 22013 }, { "epoch": 18.26887966804979, "grad_norm": 48.63483810424805, "learning_rate": 1.269676348547718e-05, "loss": 1.3543, "step": 22014 }, { "epoch": 18.269709543568464, "grad_norm": 32.59939193725586, "learning_rate": 1.269643153526971e-05, "loss": 0.5628, "step": 22015 }, { "epoch": 18.270539419087136, "grad_norm": 100.7207260131836, "learning_rate": 1.2696099585062242e-05, "loss": 0.4694, "step": 22016 }, { "epoch": 18.271369294605808, "grad_norm": 18.33806037902832, "learning_rate": 1.2695767634854771e-05, "loss": 0.4751, "step": 22017 }, { "epoch": 18.27219917012448, "grad_norm": 46.67856216430664, "learning_rate": 1.2695435684647303e-05, "loss": 0.6021, "step": 22018 }, { "epoch": 18.273029045643153, "grad_norm": 28.079490661621094, "learning_rate": 1.2695103734439835e-05, "loss": 0.3411, "step": 22019 }, { "epoch": 18.273858921161825, "grad_norm": 132.77090454101562, "learning_rate": 1.2694771784232368e-05, "loss": 1.0679, "step": 22020 }, { "epoch": 18.274688796680497, "grad_norm": 63.859683990478516, "learning_rate": 1.2694439834024896e-05, "loss": 0.8888, "step": 22021 }, { "epoch": 18.27551867219917, "grad_norm": 63.837947845458984, "learning_rate": 1.2694107883817428e-05, "loss": 0.8921, "step": 22022 }, { "epoch": 18.27634854771784, "grad_norm": 48.30106735229492, "learning_rate": 1.269377593360996e-05, "loss": 0.7452, "step": 22023 }, { "epoch": 18.277178423236514, "grad_norm": 37.98708724975586, "learning_rate": 1.2693443983402491e-05, "loss": 0.5147, "step": 22024 }, { "epoch": 18.278008298755186, "grad_norm": 65.22298431396484, "learning_rate": 1.2693112033195021e-05, "loss": 0.8524, "step": 22025 }, { "epoch": 18.278838174273858, "grad_norm": 32.244483947753906, "learning_rate": 1.2692780082987552e-05, "loss": 0.59, "step": 22026 }, { "epoch": 18.27966804979253, "grad_norm": 66.54643249511719, "learning_rate": 1.2692448132780084e-05, "loss": 0.6613, "step": 22027 }, { "epoch": 18.280497925311202, "grad_norm": 116.56388854980469, "learning_rate": 1.2692116182572616e-05, "loss": 0.4935, "step": 22028 }, { "epoch": 18.281327800829875, "grad_norm": 71.7942886352539, "learning_rate": 1.2691784232365145e-05, "loss": 0.819, "step": 22029 }, { "epoch": 18.282157676348547, "grad_norm": 74.62435150146484, "learning_rate": 1.2691452282157677e-05, "loss": 0.8612, "step": 22030 }, { "epoch": 18.28298755186722, "grad_norm": 97.13323974609375, "learning_rate": 1.2691120331950209e-05, "loss": 1.3305, "step": 22031 }, { "epoch": 18.28381742738589, "grad_norm": 47.861610412597656, "learning_rate": 1.2690788381742741e-05, "loss": 1.124, "step": 22032 }, { "epoch": 18.284647302904563, "grad_norm": 71.71715545654297, "learning_rate": 1.2690456431535271e-05, "loss": 0.6587, "step": 22033 }, { "epoch": 18.285477178423236, "grad_norm": 29.29131317138672, "learning_rate": 1.2690124481327802e-05, "loss": 0.6106, "step": 22034 }, { "epoch": 18.286307053941908, "grad_norm": 35.865482330322266, "learning_rate": 1.2689792531120332e-05, "loss": 0.6919, "step": 22035 }, { "epoch": 18.28713692946058, "grad_norm": 14.95226001739502, "learning_rate": 1.2689460580912864e-05, "loss": 0.5046, "step": 22036 }, { "epoch": 18.287966804979252, "grad_norm": 21.430278778076172, "learning_rate": 1.2689128630705396e-05, "loss": 0.2838, "step": 22037 }, { "epoch": 18.288796680497924, "grad_norm": 42.24677276611328, "learning_rate": 1.2688796680497925e-05, "loss": 0.9778, "step": 22038 }, { "epoch": 18.289626556016596, "grad_norm": 55.094261169433594, "learning_rate": 1.2688464730290457e-05, "loss": 0.4856, "step": 22039 }, { "epoch": 18.29045643153527, "grad_norm": 26.66217041015625, "learning_rate": 1.268813278008299e-05, "loss": 0.4847, "step": 22040 }, { "epoch": 18.29128630705394, "grad_norm": 48.63100051879883, "learning_rate": 1.2687800829875521e-05, "loss": 0.7232, "step": 22041 }, { "epoch": 18.292116182572613, "grad_norm": 190.35751342773438, "learning_rate": 1.268746887966805e-05, "loss": 0.8924, "step": 22042 }, { "epoch": 18.292946058091285, "grad_norm": 122.95187377929688, "learning_rate": 1.2687136929460582e-05, "loss": 0.534, "step": 22043 }, { "epoch": 18.293775933609957, "grad_norm": 12.161294937133789, "learning_rate": 1.2686804979253113e-05, "loss": 0.3443, "step": 22044 }, { "epoch": 18.29460580912863, "grad_norm": 28.00972557067871, "learning_rate": 1.2686473029045645e-05, "loss": 0.3521, "step": 22045 }, { "epoch": 18.295435684647302, "grad_norm": 33.776893615722656, "learning_rate": 1.2686141078838175e-05, "loss": 0.4583, "step": 22046 }, { "epoch": 18.296265560165974, "grad_norm": 44.73988342285156, "learning_rate": 1.2685809128630706e-05, "loss": 1.0483, "step": 22047 }, { "epoch": 18.297095435684646, "grad_norm": 43.01481628417969, "learning_rate": 1.2685477178423238e-05, "loss": 0.5838, "step": 22048 }, { "epoch": 18.29792531120332, "grad_norm": 34.65860366821289, "learning_rate": 1.268514522821577e-05, "loss": 0.6052, "step": 22049 }, { "epoch": 18.29875518672199, "grad_norm": 58.19119644165039, "learning_rate": 1.2684813278008299e-05, "loss": 0.6824, "step": 22050 }, { "epoch": 18.299585062240663, "grad_norm": 39.64234924316406, "learning_rate": 1.268448132780083e-05, "loss": 1.1219, "step": 22051 }, { "epoch": 18.300414937759335, "grad_norm": 36.09959030151367, "learning_rate": 1.2684149377593363e-05, "loss": 0.7027, "step": 22052 }, { "epoch": 18.301244813278007, "grad_norm": 20.750633239746094, "learning_rate": 1.2683817427385893e-05, "loss": 0.4689, "step": 22053 }, { "epoch": 18.30207468879668, "grad_norm": 31.041547775268555, "learning_rate": 1.2683485477178424e-05, "loss": 0.6401, "step": 22054 }, { "epoch": 18.30290456431535, "grad_norm": 71.77845764160156, "learning_rate": 1.2683153526970956e-05, "loss": 0.6688, "step": 22055 }, { "epoch": 18.303734439834024, "grad_norm": 114.575439453125, "learning_rate": 1.2682821576763486e-05, "loss": 0.6683, "step": 22056 }, { "epoch": 18.304564315352696, "grad_norm": 21.611101150512695, "learning_rate": 1.2682489626556018e-05, "loss": 0.4575, "step": 22057 }, { "epoch": 18.305394190871368, "grad_norm": 40.003578186035156, "learning_rate": 1.2682157676348547e-05, "loss": 1.2442, "step": 22058 }, { "epoch": 18.30622406639004, "grad_norm": 27.825422286987305, "learning_rate": 1.2681825726141079e-05, "loss": 0.432, "step": 22059 }, { "epoch": 18.307053941908713, "grad_norm": 27.871259689331055, "learning_rate": 1.2681493775933611e-05, "loss": 0.6155, "step": 22060 }, { "epoch": 18.307883817427385, "grad_norm": 16.862001419067383, "learning_rate": 1.2681161825726143e-05, "loss": 0.2962, "step": 22061 }, { "epoch": 18.308713692946057, "grad_norm": 36.16237258911133, "learning_rate": 1.2680829875518674e-05, "loss": 0.8371, "step": 22062 }, { "epoch": 18.30954356846473, "grad_norm": 8.6837739944458, "learning_rate": 1.2680497925311204e-05, "loss": 0.2455, "step": 22063 }, { "epoch": 18.3103734439834, "grad_norm": 32.207305908203125, "learning_rate": 1.2680165975103736e-05, "loss": 0.6474, "step": 22064 }, { "epoch": 18.311203319502074, "grad_norm": 24.18539810180664, "learning_rate": 1.2679834024896267e-05, "loss": 0.3061, "step": 22065 }, { "epoch": 18.312033195020746, "grad_norm": 69.23500061035156, "learning_rate": 1.2679502074688799e-05, "loss": 0.6758, "step": 22066 }, { "epoch": 18.312863070539418, "grad_norm": 34.11750411987305, "learning_rate": 1.2679170124481327e-05, "loss": 0.5708, "step": 22067 }, { "epoch": 18.31369294605809, "grad_norm": 54.13932800292969, "learning_rate": 1.267883817427386e-05, "loss": 0.7594, "step": 22068 }, { "epoch": 18.314522821576762, "grad_norm": 55.27436065673828, "learning_rate": 1.2678506224066392e-05, "loss": 1.0113, "step": 22069 }, { "epoch": 18.315352697095435, "grad_norm": 36.59442138671875, "learning_rate": 1.2678174273858924e-05, "loss": 0.7559, "step": 22070 }, { "epoch": 18.316182572614107, "grad_norm": 110.0249252319336, "learning_rate": 1.2677842323651452e-05, "loss": 0.8319, "step": 22071 }, { "epoch": 18.31701244813278, "grad_norm": 44.6645622253418, "learning_rate": 1.2677510373443985e-05, "loss": 0.3583, "step": 22072 }, { "epoch": 18.31784232365145, "grad_norm": 20.129030227661133, "learning_rate": 1.2677178423236515e-05, "loss": 0.4064, "step": 22073 }, { "epoch": 18.318672199170123, "grad_norm": 19.91526222229004, "learning_rate": 1.2676846473029047e-05, "loss": 0.7291, "step": 22074 }, { "epoch": 18.319502074688796, "grad_norm": 34.04402542114258, "learning_rate": 1.2676514522821578e-05, "loss": 0.5041, "step": 22075 }, { "epoch": 18.320331950207468, "grad_norm": 42.918487548828125, "learning_rate": 1.2676182572614108e-05, "loss": 0.5487, "step": 22076 }, { "epoch": 18.32116182572614, "grad_norm": 61.991512298583984, "learning_rate": 1.267585062240664e-05, "loss": 0.977, "step": 22077 }, { "epoch": 18.321991701244812, "grad_norm": 72.8436508178711, "learning_rate": 1.2675518672199172e-05, "loss": 0.5939, "step": 22078 }, { "epoch": 18.322821576763484, "grad_norm": 19.71198272705078, "learning_rate": 1.2675186721991701e-05, "loss": 0.6642, "step": 22079 }, { "epoch": 18.323651452282157, "grad_norm": 26.238601684570312, "learning_rate": 1.2674854771784233e-05, "loss": 0.4664, "step": 22080 }, { "epoch": 18.32448132780083, "grad_norm": 49.279441833496094, "learning_rate": 1.2674522821576765e-05, "loss": 0.4888, "step": 22081 }, { "epoch": 18.3253112033195, "grad_norm": 50.15164566040039, "learning_rate": 1.2674190871369296e-05, "loss": 1.0188, "step": 22082 }, { "epoch": 18.326141078838173, "grad_norm": 36.77503967285156, "learning_rate": 1.2673858921161826e-05, "loss": 0.352, "step": 22083 }, { "epoch": 18.326970954356845, "grad_norm": 71.35067749023438, "learning_rate": 1.2673526970954358e-05, "loss": 0.473, "step": 22084 }, { "epoch": 18.327800829875518, "grad_norm": 24.909521102905273, "learning_rate": 1.2673195020746888e-05, "loss": 0.3561, "step": 22085 }, { "epoch": 18.32863070539419, "grad_norm": 48.013153076171875, "learning_rate": 1.267286307053942e-05, "loss": 0.9002, "step": 22086 }, { "epoch": 18.329460580912862, "grad_norm": 53.25585174560547, "learning_rate": 1.2672531120331953e-05, "loss": 1.1245, "step": 22087 }, { "epoch": 18.330290456431534, "grad_norm": 47.287620544433594, "learning_rate": 1.2672199170124481e-05, "loss": 1.1649, "step": 22088 }, { "epoch": 18.331120331950206, "grad_norm": 76.2594985961914, "learning_rate": 1.2671867219917013e-05, "loss": 0.6109, "step": 22089 }, { "epoch": 18.33195020746888, "grad_norm": 29.997507095336914, "learning_rate": 1.2671535269709546e-05, "loss": 0.2482, "step": 22090 }, { "epoch": 18.33278008298755, "grad_norm": 81.3930892944336, "learning_rate": 1.2671203319502076e-05, "loss": 0.5251, "step": 22091 }, { "epoch": 18.333609958506223, "grad_norm": 133.84738159179688, "learning_rate": 1.2670871369294606e-05, "loss": 0.7017, "step": 22092 }, { "epoch": 18.334439834024895, "grad_norm": 40.5411376953125, "learning_rate": 1.2670539419087139e-05, "loss": 0.9426, "step": 22093 }, { "epoch": 18.335269709543567, "grad_norm": 25.29444122314453, "learning_rate": 1.2670207468879669e-05, "loss": 0.4896, "step": 22094 }, { "epoch": 18.33609958506224, "grad_norm": 46.947967529296875, "learning_rate": 1.2669875518672201e-05, "loss": 0.8902, "step": 22095 }, { "epoch": 18.33692946058091, "grad_norm": 65.91920471191406, "learning_rate": 1.266954356846473e-05, "loss": 0.6229, "step": 22096 }, { "epoch": 18.337759336099584, "grad_norm": 15.604333877563477, "learning_rate": 1.2669211618257262e-05, "loss": 0.2748, "step": 22097 }, { "epoch": 18.338589211618256, "grad_norm": 47.541900634765625, "learning_rate": 1.2668879668049794e-05, "loss": 0.6919, "step": 22098 }, { "epoch": 18.33941908713693, "grad_norm": 14.593671798706055, "learning_rate": 1.2668547717842326e-05, "loss": 0.319, "step": 22099 }, { "epoch": 18.3402489626556, "grad_norm": 23.427453994750977, "learning_rate": 1.2668215767634855e-05, "loss": 0.607, "step": 22100 }, { "epoch": 18.341078838174273, "grad_norm": 53.09349822998047, "learning_rate": 1.2667883817427387e-05, "loss": 0.7211, "step": 22101 }, { "epoch": 18.341908713692945, "grad_norm": 40.175167083740234, "learning_rate": 1.2667551867219919e-05, "loss": 0.8158, "step": 22102 }, { "epoch": 18.342738589211617, "grad_norm": 28.692441940307617, "learning_rate": 1.266721991701245e-05, "loss": 0.6891, "step": 22103 }, { "epoch": 18.34356846473029, "grad_norm": 47.290470123291016, "learning_rate": 1.266688796680498e-05, "loss": 0.889, "step": 22104 }, { "epoch": 18.34439834024896, "grad_norm": 48.096317291259766, "learning_rate": 1.266655601659751e-05, "loss": 0.7533, "step": 22105 }, { "epoch": 18.345228215767634, "grad_norm": 18.88526153564453, "learning_rate": 1.2666224066390042e-05, "loss": 0.4349, "step": 22106 }, { "epoch": 18.346058091286306, "grad_norm": 37.62629699707031, "learning_rate": 1.2665892116182574e-05, "loss": 0.572, "step": 22107 }, { "epoch": 18.346887966804978, "grad_norm": 33.93010330200195, "learning_rate": 1.2665560165975103e-05, "loss": 0.5721, "step": 22108 }, { "epoch": 18.34771784232365, "grad_norm": 55.415283203125, "learning_rate": 1.2665228215767635e-05, "loss": 0.6637, "step": 22109 }, { "epoch": 18.348547717842322, "grad_norm": 76.55992126464844, "learning_rate": 1.2664896265560167e-05, "loss": 0.7021, "step": 22110 }, { "epoch": 18.349377593360995, "grad_norm": 19.366647720336914, "learning_rate": 1.26645643153527e-05, "loss": 0.44, "step": 22111 }, { "epoch": 18.350207468879667, "grad_norm": 48.568416595458984, "learning_rate": 1.266423236514523e-05, "loss": 1.0294, "step": 22112 }, { "epoch": 18.35103734439834, "grad_norm": 75.25699615478516, "learning_rate": 1.266390041493776e-05, "loss": 0.5007, "step": 22113 }, { "epoch": 18.35186721991701, "grad_norm": 57.33696746826172, "learning_rate": 1.266356846473029e-05, "loss": 0.5551, "step": 22114 }, { "epoch": 18.352697095435683, "grad_norm": 25.293006896972656, "learning_rate": 1.2663236514522823e-05, "loss": 0.6177, "step": 22115 }, { "epoch": 18.353526970954356, "grad_norm": 24.93411636352539, "learning_rate": 1.2662904564315355e-05, "loss": 0.5342, "step": 22116 }, { "epoch": 18.354356846473028, "grad_norm": 49.19715118408203, "learning_rate": 1.2662572614107884e-05, "loss": 0.5315, "step": 22117 }, { "epoch": 18.3551867219917, "grad_norm": 73.2127456665039, "learning_rate": 1.2662240663900416e-05, "loss": 0.6221, "step": 22118 }, { "epoch": 18.356016597510372, "grad_norm": 33.843379974365234, "learning_rate": 1.2661908713692948e-05, "loss": 0.6572, "step": 22119 }, { "epoch": 18.356846473029044, "grad_norm": 30.589929580688477, "learning_rate": 1.2661576763485478e-05, "loss": 0.3191, "step": 22120 }, { "epoch": 18.357676348547717, "grad_norm": 76.42770385742188, "learning_rate": 1.2661244813278009e-05, "loss": 0.7289, "step": 22121 }, { "epoch": 18.35850622406639, "grad_norm": 48.83271026611328, "learning_rate": 1.2660912863070541e-05, "loss": 0.5606, "step": 22122 }, { "epoch": 18.35933609958506, "grad_norm": 77.6390609741211, "learning_rate": 1.2660580912863071e-05, "loss": 0.6162, "step": 22123 }, { "epoch": 18.360165975103733, "grad_norm": 64.28099060058594, "learning_rate": 1.2660248962655603e-05, "loss": 0.6645, "step": 22124 }, { "epoch": 18.360995850622405, "grad_norm": 110.24299621582031, "learning_rate": 1.2659917012448134e-05, "loss": 0.4621, "step": 22125 }, { "epoch": 18.361825726141078, "grad_norm": 56.770389556884766, "learning_rate": 1.2659585062240664e-05, "loss": 0.7636, "step": 22126 }, { "epoch": 18.36265560165975, "grad_norm": 21.214263916015625, "learning_rate": 1.2659253112033196e-05, "loss": 0.3821, "step": 22127 }, { "epoch": 18.363485477178422, "grad_norm": 57.895965576171875, "learning_rate": 1.2658921161825728e-05, "loss": 0.4247, "step": 22128 }, { "epoch": 18.364315352697094, "grad_norm": 35.579566955566406, "learning_rate": 1.2658589211618257e-05, "loss": 1.2116, "step": 22129 }, { "epoch": 18.365145228215766, "grad_norm": 46.532958984375, "learning_rate": 1.265825726141079e-05, "loss": 0.7423, "step": 22130 }, { "epoch": 18.36597510373444, "grad_norm": 29.76235008239746, "learning_rate": 1.2657925311203321e-05, "loss": 0.877, "step": 22131 }, { "epoch": 18.36680497925311, "grad_norm": 53.4117317199707, "learning_rate": 1.2657593360995852e-05, "loss": 0.6678, "step": 22132 }, { "epoch": 18.367634854771783, "grad_norm": 14.461723327636719, "learning_rate": 1.2657261410788382e-05, "loss": 0.4404, "step": 22133 }, { "epoch": 18.368464730290455, "grad_norm": 100.63783264160156, "learning_rate": 1.2656929460580913e-05, "loss": 0.449, "step": 22134 }, { "epoch": 18.369294605809127, "grad_norm": 20.58186149597168, "learning_rate": 1.2656597510373445e-05, "loss": 0.3728, "step": 22135 }, { "epoch": 18.3701244813278, "grad_norm": 90.18828582763672, "learning_rate": 1.2656265560165977e-05, "loss": 1.4009, "step": 22136 }, { "epoch": 18.37095435684647, "grad_norm": 36.14408874511719, "learning_rate": 1.2655933609958506e-05, "loss": 0.4292, "step": 22137 }, { "epoch": 18.371784232365144, "grad_norm": 124.22714233398438, "learning_rate": 1.2655601659751038e-05, "loss": 1.1746, "step": 22138 }, { "epoch": 18.372614107883816, "grad_norm": 58.52836608886719, "learning_rate": 1.265526970954357e-05, "loss": 0.5984, "step": 22139 }, { "epoch": 18.37344398340249, "grad_norm": 11.092010498046875, "learning_rate": 1.2654937759336102e-05, "loss": 0.2365, "step": 22140 }, { "epoch": 18.37427385892116, "grad_norm": 41.059322357177734, "learning_rate": 1.2654605809128632e-05, "loss": 0.9332, "step": 22141 }, { "epoch": 18.375103734439833, "grad_norm": 40.530792236328125, "learning_rate": 1.2654273858921163e-05, "loss": 0.819, "step": 22142 }, { "epoch": 18.375933609958505, "grad_norm": 18.291946411132812, "learning_rate": 1.2653941908713693e-05, "loss": 0.365, "step": 22143 }, { "epoch": 18.376763485477177, "grad_norm": 49.32537078857422, "learning_rate": 1.2653609958506225e-05, "loss": 0.528, "step": 22144 }, { "epoch": 18.37759336099585, "grad_norm": 41.64908218383789, "learning_rate": 1.2653278008298757e-05, "loss": 0.9522, "step": 22145 }, { "epoch": 18.37842323651452, "grad_norm": 32.513648986816406, "learning_rate": 1.2652946058091286e-05, "loss": 0.8763, "step": 22146 }, { "epoch": 18.379253112033194, "grad_norm": 101.6010513305664, "learning_rate": 1.2652614107883818e-05, "loss": 0.9298, "step": 22147 }, { "epoch": 18.380082987551866, "grad_norm": 28.643817901611328, "learning_rate": 1.265228215767635e-05, "loss": 0.6001, "step": 22148 }, { "epoch": 18.380912863070538, "grad_norm": 23.81601333618164, "learning_rate": 1.2651950207468882e-05, "loss": 0.4314, "step": 22149 }, { "epoch": 18.38174273858921, "grad_norm": 29.09113311767578, "learning_rate": 1.2651618257261411e-05, "loss": 0.3706, "step": 22150 }, { "epoch": 18.382572614107882, "grad_norm": 41.6602783203125, "learning_rate": 1.2651286307053943e-05, "loss": 0.4611, "step": 22151 }, { "epoch": 18.383402489626555, "grad_norm": 39.74753189086914, "learning_rate": 1.2650954356846474e-05, "loss": 0.6072, "step": 22152 }, { "epoch": 18.384232365145227, "grad_norm": 37.95462417602539, "learning_rate": 1.2650622406639006e-05, "loss": 0.8435, "step": 22153 }, { "epoch": 18.3850622406639, "grad_norm": 109.47852325439453, "learning_rate": 1.2650290456431536e-05, "loss": 0.7826, "step": 22154 }, { "epoch": 18.38589211618257, "grad_norm": 23.759103775024414, "learning_rate": 1.2649958506224067e-05, "loss": 0.3891, "step": 22155 }, { "epoch": 18.386721991701243, "grad_norm": 37.86311340332031, "learning_rate": 1.2649626556016599e-05, "loss": 0.7092, "step": 22156 }, { "epoch": 18.387551867219916, "grad_norm": 81.89030456542969, "learning_rate": 1.264929460580913e-05, "loss": 0.7027, "step": 22157 }, { "epoch": 18.388381742738588, "grad_norm": 10.983985900878906, "learning_rate": 1.264896265560166e-05, "loss": 0.2859, "step": 22158 }, { "epoch": 18.38921161825726, "grad_norm": 63.865745544433594, "learning_rate": 1.2648630705394192e-05, "loss": 0.7824, "step": 22159 }, { "epoch": 18.390041493775932, "grad_norm": 41.69953918457031, "learning_rate": 1.2648298755186724e-05, "loss": 0.7195, "step": 22160 }, { "epoch": 18.390871369294604, "grad_norm": 25.213783264160156, "learning_rate": 1.2647966804979254e-05, "loss": 0.6346, "step": 22161 }, { "epoch": 18.391701244813277, "grad_norm": 27.052640914916992, "learning_rate": 1.2647634854771785e-05, "loss": 0.3841, "step": 22162 }, { "epoch": 18.39253112033195, "grad_norm": 48.679176330566406, "learning_rate": 1.2647302904564317e-05, "loss": 1.217, "step": 22163 }, { "epoch": 18.39336099585062, "grad_norm": 68.24842071533203, "learning_rate": 1.2646970954356847e-05, "loss": 0.6239, "step": 22164 }, { "epoch": 18.394190871369293, "grad_norm": 20.073408126831055, "learning_rate": 1.2646639004149379e-05, "loss": 0.4361, "step": 22165 }, { "epoch": 18.395020746887965, "grad_norm": 24.281686782836914, "learning_rate": 1.2646307053941911e-05, "loss": 0.4672, "step": 22166 }, { "epoch": 18.395850622406638, "grad_norm": 31.783859252929688, "learning_rate": 1.264597510373444e-05, "loss": 0.9905, "step": 22167 }, { "epoch": 18.39668049792531, "grad_norm": 94.3821792602539, "learning_rate": 1.2645643153526972e-05, "loss": 0.7552, "step": 22168 }, { "epoch": 18.397510373443982, "grad_norm": 50.18841552734375, "learning_rate": 1.2645311203319504e-05, "loss": 0.7055, "step": 22169 }, { "epoch": 18.398340248962654, "grad_norm": 52.354347229003906, "learning_rate": 1.2644979253112035e-05, "loss": 0.6729, "step": 22170 }, { "epoch": 18.399170124481326, "grad_norm": 16.546220779418945, "learning_rate": 1.2644647302904565e-05, "loss": 0.5029, "step": 22171 }, { "epoch": 18.4, "grad_norm": 20.49582290649414, "learning_rate": 1.2644315352697097e-05, "loss": 0.2871, "step": 22172 }, { "epoch": 18.40082987551867, "grad_norm": 27.390731811523438, "learning_rate": 1.2643983402489628e-05, "loss": 0.7759, "step": 22173 }, { "epoch": 18.401659751037343, "grad_norm": 81.38341522216797, "learning_rate": 1.264365145228216e-05, "loss": 1.1775, "step": 22174 }, { "epoch": 18.402489626556015, "grad_norm": 51.653053283691406, "learning_rate": 1.2643319502074688e-05, "loss": 0.5462, "step": 22175 }, { "epoch": 18.403319502074687, "grad_norm": 32.49674606323242, "learning_rate": 1.264298755186722e-05, "loss": 0.5174, "step": 22176 }, { "epoch": 18.40414937759336, "grad_norm": 34.68003463745117, "learning_rate": 1.2642655601659753e-05, "loss": 0.6088, "step": 22177 }, { "epoch": 18.40497925311203, "grad_norm": 82.70561218261719, "learning_rate": 1.2642323651452285e-05, "loss": 0.8574, "step": 22178 }, { "epoch": 18.405809128630704, "grad_norm": 13.187626838684082, "learning_rate": 1.2641991701244813e-05, "loss": 0.3361, "step": 22179 }, { "epoch": 18.406639004149376, "grad_norm": 65.67362976074219, "learning_rate": 1.2641659751037345e-05, "loss": 0.3983, "step": 22180 }, { "epoch": 18.40746887966805, "grad_norm": 44.265480041503906, "learning_rate": 1.2641327800829878e-05, "loss": 1.062, "step": 22181 }, { "epoch": 18.40829875518672, "grad_norm": 14.763851165771484, "learning_rate": 1.2640995850622408e-05, "loss": 0.3405, "step": 22182 }, { "epoch": 18.409128630705393, "grad_norm": 36.878639221191406, "learning_rate": 1.2640663900414938e-05, "loss": 0.5229, "step": 22183 }, { "epoch": 18.409958506224065, "grad_norm": 25.619604110717773, "learning_rate": 1.2640331950207469e-05, "loss": 0.3963, "step": 22184 }, { "epoch": 18.410788381742737, "grad_norm": 26.8958797454834, "learning_rate": 1.2640000000000001e-05, "loss": 0.4532, "step": 22185 }, { "epoch": 18.41161825726141, "grad_norm": 30.865646362304688, "learning_rate": 1.2639668049792533e-05, "loss": 0.563, "step": 22186 }, { "epoch": 18.41244813278008, "grad_norm": 29.765466690063477, "learning_rate": 1.2639336099585062e-05, "loss": 0.7499, "step": 22187 }, { "epoch": 18.413278008298754, "grad_norm": 55.35352325439453, "learning_rate": 1.2639004149377594e-05, "loss": 0.9996, "step": 22188 }, { "epoch": 18.414107883817426, "grad_norm": 25.47221565246582, "learning_rate": 1.2638672199170126e-05, "loss": 0.3636, "step": 22189 }, { "epoch": 18.414937759336098, "grad_norm": 41.03028869628906, "learning_rate": 1.2638340248962656e-05, "loss": 0.698, "step": 22190 }, { "epoch": 18.41576763485477, "grad_norm": 83.67620849609375, "learning_rate": 1.2638008298755189e-05, "loss": 0.5689, "step": 22191 }, { "epoch": 18.416597510373443, "grad_norm": 53.035133361816406, "learning_rate": 1.2637676348547719e-05, "loss": 0.6399, "step": 22192 }, { "epoch": 18.417427385892115, "grad_norm": 51.367530822753906, "learning_rate": 1.263734439834025e-05, "loss": 0.5621, "step": 22193 }, { "epoch": 18.418257261410787, "grad_norm": 31.4771728515625, "learning_rate": 1.2637012448132781e-05, "loss": 0.8507, "step": 22194 }, { "epoch": 18.41908713692946, "grad_norm": 41.2595100402832, "learning_rate": 1.2636680497925314e-05, "loss": 0.4325, "step": 22195 }, { "epoch": 18.41991701244813, "grad_norm": 14.349177360534668, "learning_rate": 1.2636348547717842e-05, "loss": 0.275, "step": 22196 }, { "epoch": 18.420746887966803, "grad_norm": 73.26232147216797, "learning_rate": 1.2636016597510374e-05, "loss": 0.5044, "step": 22197 }, { "epoch": 18.421576763485476, "grad_norm": 81.02835845947266, "learning_rate": 1.2635684647302906e-05, "loss": 0.4436, "step": 22198 }, { "epoch": 18.422406639004148, "grad_norm": 18.733230590820312, "learning_rate": 1.2635352697095437e-05, "loss": 0.4233, "step": 22199 }, { "epoch": 18.42323651452282, "grad_norm": 25.924531936645508, "learning_rate": 1.2635020746887967e-05, "loss": 0.3014, "step": 22200 }, { "epoch": 18.424066390041492, "grad_norm": 19.325464248657227, "learning_rate": 1.26346887966805e-05, "loss": 0.3209, "step": 22201 }, { "epoch": 18.424896265560164, "grad_norm": 50.968841552734375, "learning_rate": 1.263435684647303e-05, "loss": 0.8057, "step": 22202 }, { "epoch": 18.425726141078837, "grad_norm": 41.0140266418457, "learning_rate": 1.2634024896265562e-05, "loss": 0.5621, "step": 22203 }, { "epoch": 18.42655601659751, "grad_norm": 47.024559020996094, "learning_rate": 1.263369294605809e-05, "loss": 0.5784, "step": 22204 }, { "epoch": 18.42738589211618, "grad_norm": 53.36300277709961, "learning_rate": 1.2633360995850623e-05, "loss": 0.879, "step": 22205 }, { "epoch": 18.428215767634853, "grad_norm": 21.953031539916992, "learning_rate": 1.2633029045643155e-05, "loss": 0.4524, "step": 22206 }, { "epoch": 18.429045643153525, "grad_norm": 80.4747085571289, "learning_rate": 1.2632697095435687e-05, "loss": 0.9469, "step": 22207 }, { "epoch": 18.429875518672198, "grad_norm": 40.962257385253906, "learning_rate": 1.2632365145228216e-05, "loss": 0.3183, "step": 22208 }, { "epoch": 18.43070539419087, "grad_norm": 27.57347869873047, "learning_rate": 1.2632033195020748e-05, "loss": 0.4, "step": 22209 }, { "epoch": 18.431535269709542, "grad_norm": 49.90401840209961, "learning_rate": 1.263170124481328e-05, "loss": 0.4426, "step": 22210 }, { "epoch": 18.432365145228214, "grad_norm": 38.5861930847168, "learning_rate": 1.263136929460581e-05, "loss": 0.4609, "step": 22211 }, { "epoch": 18.433195020746886, "grad_norm": 68.65984344482422, "learning_rate": 1.263103734439834e-05, "loss": 0.6999, "step": 22212 }, { "epoch": 18.43402489626556, "grad_norm": 14.231437683105469, "learning_rate": 1.2630705394190871e-05, "loss": 0.2898, "step": 22213 }, { "epoch": 18.43485477178423, "grad_norm": 18.167503356933594, "learning_rate": 1.2630373443983403e-05, "loss": 0.2846, "step": 22214 }, { "epoch": 18.435684647302903, "grad_norm": 48.25035858154297, "learning_rate": 1.2630041493775935e-05, "loss": 0.4338, "step": 22215 }, { "epoch": 18.436514522821575, "grad_norm": 69.19182586669922, "learning_rate": 1.2629709543568464e-05, "loss": 0.78, "step": 22216 }, { "epoch": 18.437344398340247, "grad_norm": 20.582138061523438, "learning_rate": 1.2629377593360996e-05, "loss": 0.5313, "step": 22217 }, { "epoch": 18.43817427385892, "grad_norm": 61.61682891845703, "learning_rate": 1.2629045643153528e-05, "loss": 0.5411, "step": 22218 }, { "epoch": 18.439004149377592, "grad_norm": 42.22027587890625, "learning_rate": 1.262871369294606e-05, "loss": 0.6875, "step": 22219 }, { "epoch": 18.439834024896264, "grad_norm": 33.03836441040039, "learning_rate": 1.262838174273859e-05, "loss": 0.468, "step": 22220 }, { "epoch": 18.440663900414936, "grad_norm": 70.06773376464844, "learning_rate": 1.2628049792531121e-05, "loss": 1.1509, "step": 22221 }, { "epoch": 18.44149377593361, "grad_norm": 51.8425178527832, "learning_rate": 1.2627717842323652e-05, "loss": 1.4077, "step": 22222 }, { "epoch": 18.44232365145228, "grad_norm": 56.1898193359375, "learning_rate": 1.2627385892116184e-05, "loss": 0.733, "step": 22223 }, { "epoch": 18.443153526970953, "grad_norm": 37.53912353515625, "learning_rate": 1.2627053941908716e-05, "loss": 0.6226, "step": 22224 }, { "epoch": 18.443983402489625, "grad_norm": 34.86555480957031, "learning_rate": 1.2626721991701245e-05, "loss": 0.4765, "step": 22225 }, { "epoch": 18.444813278008297, "grad_norm": 25.60370635986328, "learning_rate": 1.2626390041493777e-05, "loss": 0.4293, "step": 22226 }, { "epoch": 18.44564315352697, "grad_norm": 62.27621841430664, "learning_rate": 1.2626058091286309e-05, "loss": 0.708, "step": 22227 }, { "epoch": 18.44647302904564, "grad_norm": 59.94643783569336, "learning_rate": 1.2625726141078841e-05, "loss": 0.6099, "step": 22228 }, { "epoch": 18.447302904564314, "grad_norm": 29.96392822265625, "learning_rate": 1.262539419087137e-05, "loss": 1.0317, "step": 22229 }, { "epoch": 18.448132780082986, "grad_norm": 39.32514953613281, "learning_rate": 1.2625062240663902e-05, "loss": 0.8479, "step": 22230 }, { "epoch": 18.448962655601658, "grad_norm": 44.143890380859375, "learning_rate": 1.2624730290456432e-05, "loss": 0.5479, "step": 22231 }, { "epoch": 18.44979253112033, "grad_norm": 31.610830307006836, "learning_rate": 1.2624398340248964e-05, "loss": 0.5078, "step": 22232 }, { "epoch": 18.450622406639003, "grad_norm": 45.07810974121094, "learning_rate": 1.2624066390041495e-05, "loss": 0.4931, "step": 22233 }, { "epoch": 18.451452282157675, "grad_norm": 64.44586181640625, "learning_rate": 1.2623734439834025e-05, "loss": 0.9776, "step": 22234 }, { "epoch": 18.452282157676347, "grad_norm": 28.183732986450195, "learning_rate": 1.2623402489626557e-05, "loss": 0.3405, "step": 22235 }, { "epoch": 18.45311203319502, "grad_norm": 50.128570556640625, "learning_rate": 1.262307053941909e-05, "loss": 0.4313, "step": 22236 }, { "epoch": 18.45394190871369, "grad_norm": 58.35454177856445, "learning_rate": 1.2622738589211618e-05, "loss": 0.9487, "step": 22237 }, { "epoch": 18.454771784232364, "grad_norm": 40.92201614379883, "learning_rate": 1.262240663900415e-05, "loss": 0.5346, "step": 22238 }, { "epoch": 18.455601659751036, "grad_norm": 22.31747055053711, "learning_rate": 1.2622074688796682e-05, "loss": 0.4168, "step": 22239 }, { "epoch": 18.456431535269708, "grad_norm": 77.68634796142578, "learning_rate": 1.2621742738589213e-05, "loss": 1.1014, "step": 22240 }, { "epoch": 18.45726141078838, "grad_norm": 51.63520050048828, "learning_rate": 1.2621410788381743e-05, "loss": 0.7299, "step": 22241 }, { "epoch": 18.458091286307052, "grad_norm": 33.6718635559082, "learning_rate": 1.2621078838174275e-05, "loss": 0.34, "step": 22242 }, { "epoch": 18.458921161825725, "grad_norm": 47.39147186279297, "learning_rate": 1.2620746887966806e-05, "loss": 0.4895, "step": 22243 }, { "epoch": 18.459751037344397, "grad_norm": 45.81785583496094, "learning_rate": 1.2620414937759338e-05, "loss": 0.6974, "step": 22244 }, { "epoch": 18.46058091286307, "grad_norm": 59.11335754394531, "learning_rate": 1.262008298755187e-05, "loss": 0.8128, "step": 22245 }, { "epoch": 18.46141078838174, "grad_norm": 61.53804397583008, "learning_rate": 1.2619751037344399e-05, "loss": 0.6089, "step": 22246 }, { "epoch": 18.462240663900413, "grad_norm": 50.697906494140625, "learning_rate": 1.261941908713693e-05, "loss": 0.6441, "step": 22247 }, { "epoch": 18.463070539419085, "grad_norm": 61.45114517211914, "learning_rate": 1.2619087136929463e-05, "loss": 0.3358, "step": 22248 }, { "epoch": 18.463900414937758, "grad_norm": 124.95052337646484, "learning_rate": 1.2618755186721993e-05, "loss": 0.885, "step": 22249 }, { "epoch": 18.46473029045643, "grad_norm": 48.443817138671875, "learning_rate": 1.2618423236514524e-05, "loss": 0.9899, "step": 22250 }, { "epoch": 18.465560165975102, "grad_norm": 69.5517807006836, "learning_rate": 1.2618091286307054e-05, "loss": 0.7733, "step": 22251 }, { "epoch": 18.466390041493774, "grad_norm": 33.02997970581055, "learning_rate": 1.2617759336099586e-05, "loss": 0.6516, "step": 22252 }, { "epoch": 18.467219917012446, "grad_norm": 65.85641479492188, "learning_rate": 1.2617427385892118e-05, "loss": 1.0518, "step": 22253 }, { "epoch": 18.46804979253112, "grad_norm": 22.695219039916992, "learning_rate": 1.2617095435684647e-05, "loss": 0.4625, "step": 22254 }, { "epoch": 18.46887966804979, "grad_norm": 20.337139129638672, "learning_rate": 1.2616763485477179e-05, "loss": 0.3356, "step": 22255 }, { "epoch": 18.469709543568463, "grad_norm": 35.04601287841797, "learning_rate": 1.2616431535269711e-05, "loss": 0.4244, "step": 22256 }, { "epoch": 18.470539419087135, "grad_norm": 24.285362243652344, "learning_rate": 1.2616099585062243e-05, "loss": 0.5476, "step": 22257 }, { "epoch": 18.471369294605807, "grad_norm": 38.78068161010742, "learning_rate": 1.2615767634854772e-05, "loss": 0.6916, "step": 22258 }, { "epoch": 18.47219917012448, "grad_norm": 34.10485076904297, "learning_rate": 1.2615435684647304e-05, "loss": 0.3945, "step": 22259 }, { "epoch": 18.473029045643152, "grad_norm": 34.079193115234375, "learning_rate": 1.2615103734439834e-05, "loss": 0.4661, "step": 22260 }, { "epoch": 18.473858921161824, "grad_norm": 21.92348289489746, "learning_rate": 1.2614771784232367e-05, "loss": 0.4032, "step": 22261 }, { "epoch": 18.474688796680496, "grad_norm": 28.617727279663086, "learning_rate": 1.2614439834024897e-05, "loss": 0.3873, "step": 22262 }, { "epoch": 18.47551867219917, "grad_norm": 19.713695526123047, "learning_rate": 1.2614107883817427e-05, "loss": 0.3882, "step": 22263 }, { "epoch": 18.47634854771784, "grad_norm": 42.337886810302734, "learning_rate": 1.261377593360996e-05, "loss": 0.7229, "step": 22264 }, { "epoch": 18.477178423236513, "grad_norm": 19.229034423828125, "learning_rate": 1.2613443983402492e-05, "loss": 0.3351, "step": 22265 }, { "epoch": 18.478008298755185, "grad_norm": 46.9174919128418, "learning_rate": 1.261311203319502e-05, "loss": 0.7393, "step": 22266 }, { "epoch": 18.478838174273857, "grad_norm": 37.6333122253418, "learning_rate": 1.2612780082987552e-05, "loss": 0.4976, "step": 22267 }, { "epoch": 18.47966804979253, "grad_norm": 33.160240173339844, "learning_rate": 1.2612448132780085e-05, "loss": 0.4461, "step": 22268 }, { "epoch": 18.4804979253112, "grad_norm": 54.87648391723633, "learning_rate": 1.2612116182572615e-05, "loss": 0.5287, "step": 22269 }, { "epoch": 18.481327800829874, "grad_norm": 44.942054748535156, "learning_rate": 1.2611784232365145e-05, "loss": 0.5721, "step": 22270 }, { "epoch": 18.482157676348546, "grad_norm": 15.410564422607422, "learning_rate": 1.2611452282157678e-05, "loss": 0.3732, "step": 22271 }, { "epoch": 18.482987551867218, "grad_norm": 103.55939483642578, "learning_rate": 1.2611120331950208e-05, "loss": 0.8385, "step": 22272 }, { "epoch": 18.48381742738589, "grad_norm": 30.48946189880371, "learning_rate": 1.261078838174274e-05, "loss": 0.6984, "step": 22273 }, { "epoch": 18.484647302904563, "grad_norm": 31.979671478271484, "learning_rate": 1.2610456431535272e-05, "loss": 0.5523, "step": 22274 }, { "epoch": 18.485477178423235, "grad_norm": 46.237483978271484, "learning_rate": 1.2610124481327801e-05, "loss": 1.3411, "step": 22275 }, { "epoch": 18.486307053941907, "grad_norm": 82.5370101928711, "learning_rate": 1.2609792531120333e-05, "loss": 1.1578, "step": 22276 }, { "epoch": 18.48713692946058, "grad_norm": 21.703128814697266, "learning_rate": 1.2609460580912865e-05, "loss": 0.3979, "step": 22277 }, { "epoch": 18.48796680497925, "grad_norm": 51.055965423583984, "learning_rate": 1.2609128630705395e-05, "loss": 0.8695, "step": 22278 }, { "epoch": 18.488796680497924, "grad_norm": 66.9588851928711, "learning_rate": 1.2608796680497926e-05, "loss": 0.6578, "step": 22279 }, { "epoch": 18.489626556016596, "grad_norm": 21.613069534301758, "learning_rate": 1.2608464730290458e-05, "loss": 0.5038, "step": 22280 }, { "epoch": 18.490456431535268, "grad_norm": 22.016992568969727, "learning_rate": 1.2608132780082988e-05, "loss": 0.4653, "step": 22281 }, { "epoch": 18.49128630705394, "grad_norm": 27.990007400512695, "learning_rate": 1.260780082987552e-05, "loss": 0.4029, "step": 22282 }, { "epoch": 18.492116182572612, "grad_norm": 22.232282638549805, "learning_rate": 1.260746887966805e-05, "loss": 0.2144, "step": 22283 }, { "epoch": 18.492946058091285, "grad_norm": 36.39302444458008, "learning_rate": 1.2607136929460581e-05, "loss": 0.9985, "step": 22284 }, { "epoch": 18.49377593360996, "grad_norm": 23.728534698486328, "learning_rate": 1.2606804979253113e-05, "loss": 0.4864, "step": 22285 }, { "epoch": 18.49460580912863, "grad_norm": 42.30455017089844, "learning_rate": 1.2606473029045646e-05, "loss": 0.4494, "step": 22286 }, { "epoch": 18.495435684647305, "grad_norm": 21.60386085510254, "learning_rate": 1.2606141078838174e-05, "loss": 0.5407, "step": 22287 }, { "epoch": 18.496265560165973, "grad_norm": 68.0842056274414, "learning_rate": 1.2605809128630706e-05, "loss": 0.8382, "step": 22288 }, { "epoch": 18.49709543568465, "grad_norm": 53.272708892822266, "learning_rate": 1.2605477178423239e-05, "loss": 0.8476, "step": 22289 }, { "epoch": 18.497925311203318, "grad_norm": 32.47196578979492, "learning_rate": 1.2605145228215769e-05, "loss": 0.5026, "step": 22290 }, { "epoch": 18.498755186721993, "grad_norm": 30.301084518432617, "learning_rate": 1.26048132780083e-05, "loss": 0.6073, "step": 22291 }, { "epoch": 18.499585062240662, "grad_norm": 202.98507690429688, "learning_rate": 1.260448132780083e-05, "loss": 0.5294, "step": 22292 }, { "epoch": 18.500414937759338, "grad_norm": 17.77593994140625, "learning_rate": 1.2604149377593362e-05, "loss": 0.4363, "step": 22293 }, { "epoch": 18.501244813278007, "grad_norm": 22.938440322875977, "learning_rate": 1.2603817427385894e-05, "loss": 0.4031, "step": 22294 }, { "epoch": 18.502074688796682, "grad_norm": 28.92312240600586, "learning_rate": 1.2603485477178423e-05, "loss": 0.5112, "step": 22295 }, { "epoch": 18.50290456431535, "grad_norm": 43.33734130859375, "learning_rate": 1.2603153526970955e-05, "loss": 0.8409, "step": 22296 }, { "epoch": 18.503734439834027, "grad_norm": 49.06324005126953, "learning_rate": 1.2602821576763487e-05, "loss": 1.1421, "step": 22297 }, { "epoch": 18.504564315352695, "grad_norm": 23.42459487915039, "learning_rate": 1.2602489626556019e-05, "loss": 0.2833, "step": 22298 }, { "epoch": 18.50539419087137, "grad_norm": 59.96842956542969, "learning_rate": 1.260215767634855e-05, "loss": 0.8707, "step": 22299 }, { "epoch": 18.50622406639004, "grad_norm": 42.39360427856445, "learning_rate": 1.260182572614108e-05, "loss": 0.4201, "step": 22300 }, { "epoch": 18.507053941908715, "grad_norm": 39.64201736450195, "learning_rate": 1.260149377593361e-05, "loss": 0.5708, "step": 22301 }, { "epoch": 18.507883817427384, "grad_norm": 39.85029220581055, "learning_rate": 1.2601161825726142e-05, "loss": 0.5727, "step": 22302 }, { "epoch": 18.50871369294606, "grad_norm": 29.902315139770508, "learning_rate": 1.2600829875518674e-05, "loss": 0.8471, "step": 22303 }, { "epoch": 18.50954356846473, "grad_norm": 112.85240173339844, "learning_rate": 1.2600497925311203e-05, "loss": 0.4994, "step": 22304 }, { "epoch": 18.510373443983404, "grad_norm": 23.643081665039062, "learning_rate": 1.2600165975103735e-05, "loss": 0.4244, "step": 22305 }, { "epoch": 18.511203319502073, "grad_norm": 52.9122428894043, "learning_rate": 1.2599834024896267e-05, "loss": 0.6563, "step": 22306 }, { "epoch": 18.51203319502075, "grad_norm": 49.037803649902344, "learning_rate": 1.2599502074688798e-05, "loss": 0.4948, "step": 22307 }, { "epoch": 18.512863070539417, "grad_norm": 59.445125579833984, "learning_rate": 1.2599170124481328e-05, "loss": 0.7839, "step": 22308 }, { "epoch": 18.513692946058093, "grad_norm": 33.334930419921875, "learning_rate": 1.259883817427386e-05, "loss": 0.654, "step": 22309 }, { "epoch": 18.51452282157676, "grad_norm": 25.371435165405273, "learning_rate": 1.259850622406639e-05, "loss": 0.6554, "step": 22310 }, { "epoch": 18.515352697095437, "grad_norm": 18.483484268188477, "learning_rate": 1.2598174273858923e-05, "loss": 0.2718, "step": 22311 }, { "epoch": 18.51618257261411, "grad_norm": 29.70159339904785, "learning_rate": 1.2597842323651453e-05, "loss": 0.7205, "step": 22312 }, { "epoch": 18.517012448132782, "grad_norm": 25.74879264831543, "learning_rate": 1.2597510373443984e-05, "loss": 0.4792, "step": 22313 }, { "epoch": 18.517842323651454, "grad_norm": 56.71813201904297, "learning_rate": 1.2597178423236516e-05, "loss": 0.6602, "step": 22314 }, { "epoch": 18.518672199170126, "grad_norm": 31.267019271850586, "learning_rate": 1.2596846473029048e-05, "loss": 0.5173, "step": 22315 }, { "epoch": 18.5195020746888, "grad_norm": 36.650047302246094, "learning_rate": 1.2596514522821577e-05, "loss": 0.3628, "step": 22316 }, { "epoch": 18.52033195020747, "grad_norm": 59.66433334350586, "learning_rate": 1.2596182572614109e-05, "loss": 0.4642, "step": 22317 }, { "epoch": 18.521161825726143, "grad_norm": 68.583251953125, "learning_rate": 1.259585062240664e-05, "loss": 0.925, "step": 22318 }, { "epoch": 18.521991701244815, "grad_norm": 51.749305725097656, "learning_rate": 1.2595518672199171e-05, "loss": 0.5113, "step": 22319 }, { "epoch": 18.522821576763487, "grad_norm": 41.44965362548828, "learning_rate": 1.2595186721991702e-05, "loss": 0.4502, "step": 22320 }, { "epoch": 18.52365145228216, "grad_norm": 37.758670806884766, "learning_rate": 1.2594854771784232e-05, "loss": 0.3546, "step": 22321 }, { "epoch": 18.52448132780083, "grad_norm": 54.763423919677734, "learning_rate": 1.2594522821576764e-05, "loss": 0.7444, "step": 22322 }, { "epoch": 18.525311203319504, "grad_norm": 30.752079010009766, "learning_rate": 1.2594190871369296e-05, "loss": 0.665, "step": 22323 }, { "epoch": 18.526141078838176, "grad_norm": 23.99835968017578, "learning_rate": 1.2593858921161828e-05, "loss": 0.44, "step": 22324 }, { "epoch": 18.526970954356848, "grad_norm": 27.874073028564453, "learning_rate": 1.2593526970954357e-05, "loss": 0.4693, "step": 22325 }, { "epoch": 18.52780082987552, "grad_norm": 27.593509674072266, "learning_rate": 1.259319502074689e-05, "loss": 0.7422, "step": 22326 }, { "epoch": 18.528630705394193, "grad_norm": 28.583599090576172, "learning_rate": 1.2592863070539421e-05, "loss": 0.426, "step": 22327 }, { "epoch": 18.529460580912865, "grad_norm": 75.29866790771484, "learning_rate": 1.2592531120331952e-05, "loss": 0.7801, "step": 22328 }, { "epoch": 18.530290456431537, "grad_norm": 16.59493064880371, "learning_rate": 1.2592199170124482e-05, "loss": 0.457, "step": 22329 }, { "epoch": 18.53112033195021, "grad_norm": 28.812232971191406, "learning_rate": 1.2591867219917013e-05, "loss": 0.4344, "step": 22330 }, { "epoch": 18.53195020746888, "grad_norm": 43.650691986083984, "learning_rate": 1.2591535269709545e-05, "loss": 0.3727, "step": 22331 }, { "epoch": 18.532780082987554, "grad_norm": 33.87913131713867, "learning_rate": 1.2591203319502077e-05, "loss": 0.4663, "step": 22332 }, { "epoch": 18.533609958506226, "grad_norm": 34.09304428100586, "learning_rate": 1.2590871369294605e-05, "loss": 0.4616, "step": 22333 }, { "epoch": 18.534439834024898, "grad_norm": 50.23204040527344, "learning_rate": 1.2590539419087138e-05, "loss": 0.6768, "step": 22334 }, { "epoch": 18.53526970954357, "grad_norm": 49.0659294128418, "learning_rate": 1.259020746887967e-05, "loss": 0.473, "step": 22335 }, { "epoch": 18.536099585062242, "grad_norm": 37.73316955566406, "learning_rate": 1.2589875518672202e-05, "loss": 0.6026, "step": 22336 }, { "epoch": 18.536929460580915, "grad_norm": 44.43150329589844, "learning_rate": 1.258954356846473e-05, "loss": 0.6303, "step": 22337 }, { "epoch": 18.537759336099587, "grad_norm": 23.490713119506836, "learning_rate": 1.2589211618257263e-05, "loss": 0.3312, "step": 22338 }, { "epoch": 18.53858921161826, "grad_norm": 34.14905548095703, "learning_rate": 1.2588879668049793e-05, "loss": 0.3522, "step": 22339 }, { "epoch": 18.53941908713693, "grad_norm": 65.82205963134766, "learning_rate": 1.2588547717842325e-05, "loss": 0.4951, "step": 22340 }, { "epoch": 18.540248962655603, "grad_norm": 34.63356018066406, "learning_rate": 1.2588215767634856e-05, "loss": 0.5725, "step": 22341 }, { "epoch": 18.541078838174275, "grad_norm": 125.422607421875, "learning_rate": 1.2587883817427386e-05, "loss": 0.7238, "step": 22342 }, { "epoch": 18.541908713692948, "grad_norm": 71.8851547241211, "learning_rate": 1.2587551867219918e-05, "loss": 1.1043, "step": 22343 }, { "epoch": 18.54273858921162, "grad_norm": 36.333251953125, "learning_rate": 1.258721991701245e-05, "loss": 0.4643, "step": 22344 }, { "epoch": 18.543568464730292, "grad_norm": 38.35822296142578, "learning_rate": 1.2586887966804979e-05, "loss": 0.7144, "step": 22345 }, { "epoch": 18.544398340248964, "grad_norm": 104.65104675292969, "learning_rate": 1.2586556016597511e-05, "loss": 1.7663, "step": 22346 }, { "epoch": 18.545228215767636, "grad_norm": 19.094099044799805, "learning_rate": 1.2586224066390043e-05, "loss": 0.2706, "step": 22347 }, { "epoch": 18.54605809128631, "grad_norm": 71.47735595703125, "learning_rate": 1.2585892116182574e-05, "loss": 0.878, "step": 22348 }, { "epoch": 18.54688796680498, "grad_norm": 39.486175537109375, "learning_rate": 1.2585560165975104e-05, "loss": 0.6818, "step": 22349 }, { "epoch": 18.547717842323653, "grad_norm": 33.54694747924805, "learning_rate": 1.2585228215767636e-05, "loss": 0.4042, "step": 22350 }, { "epoch": 18.548547717842325, "grad_norm": 39.72480010986328, "learning_rate": 1.2584896265560166e-05, "loss": 0.7496, "step": 22351 }, { "epoch": 18.549377593360997, "grad_norm": 78.86054992675781, "learning_rate": 1.2584564315352699e-05, "loss": 0.7722, "step": 22352 }, { "epoch": 18.55020746887967, "grad_norm": 57.41131591796875, "learning_rate": 1.258423236514523e-05, "loss": 0.5431, "step": 22353 }, { "epoch": 18.551037344398342, "grad_norm": 44.703887939453125, "learning_rate": 1.258390041493776e-05, "loss": 0.3783, "step": 22354 }, { "epoch": 18.551867219917014, "grad_norm": 12.10240364074707, "learning_rate": 1.2583568464730292e-05, "loss": 0.2813, "step": 22355 }, { "epoch": 18.552697095435686, "grad_norm": 73.28861236572266, "learning_rate": 1.2583236514522824e-05, "loss": 0.9547, "step": 22356 }, { "epoch": 18.55352697095436, "grad_norm": 52.89291763305664, "learning_rate": 1.2582904564315354e-05, "loss": 0.859, "step": 22357 }, { "epoch": 18.55435684647303, "grad_norm": 18.853580474853516, "learning_rate": 1.2582572614107884e-05, "loss": 0.3957, "step": 22358 }, { "epoch": 18.555186721991703, "grad_norm": 53.599021911621094, "learning_rate": 1.2582240663900417e-05, "loss": 0.821, "step": 22359 }, { "epoch": 18.556016597510375, "grad_norm": 56.28306198120117, "learning_rate": 1.2581908713692947e-05, "loss": 0.5787, "step": 22360 }, { "epoch": 18.556846473029047, "grad_norm": 22.90977668762207, "learning_rate": 1.2581576763485479e-05, "loss": 0.4229, "step": 22361 }, { "epoch": 18.55767634854772, "grad_norm": 22.88443374633789, "learning_rate": 1.2581244813278008e-05, "loss": 0.3525, "step": 22362 }, { "epoch": 18.55850622406639, "grad_norm": 49.41560363769531, "learning_rate": 1.258091286307054e-05, "loss": 0.6874, "step": 22363 }, { "epoch": 18.559336099585064, "grad_norm": 46.6413459777832, "learning_rate": 1.2580580912863072e-05, "loss": 0.6512, "step": 22364 }, { "epoch": 18.560165975103736, "grad_norm": 28.847253799438477, "learning_rate": 1.2580248962655604e-05, "loss": 0.4224, "step": 22365 }, { "epoch": 18.560995850622408, "grad_norm": 40.14158630371094, "learning_rate": 1.2579917012448133e-05, "loss": 0.5435, "step": 22366 }, { "epoch": 18.56182572614108, "grad_norm": 40.460147857666016, "learning_rate": 1.2579585062240665e-05, "loss": 0.6251, "step": 22367 }, { "epoch": 18.562655601659753, "grad_norm": 45.22329330444336, "learning_rate": 1.2579253112033195e-05, "loss": 0.6305, "step": 22368 }, { "epoch": 18.563485477178425, "grad_norm": 28.59531593322754, "learning_rate": 1.2578921161825727e-05, "loss": 0.4728, "step": 22369 }, { "epoch": 18.564315352697097, "grad_norm": 57.60291290283203, "learning_rate": 1.2578589211618258e-05, "loss": 0.9182, "step": 22370 }, { "epoch": 18.56514522821577, "grad_norm": 40.6146240234375, "learning_rate": 1.2578257261410788e-05, "loss": 0.6748, "step": 22371 }, { "epoch": 18.56597510373444, "grad_norm": 43.99192428588867, "learning_rate": 1.257792531120332e-05, "loss": 0.5527, "step": 22372 }, { "epoch": 18.566804979253114, "grad_norm": 27.44799041748047, "learning_rate": 1.2577593360995853e-05, "loss": 0.6833, "step": 22373 }, { "epoch": 18.567634854771786, "grad_norm": 27.15460777282715, "learning_rate": 1.2577261410788381e-05, "loss": 0.5381, "step": 22374 }, { "epoch": 18.568464730290458, "grad_norm": 15.748169898986816, "learning_rate": 1.2576929460580913e-05, "loss": 0.2426, "step": 22375 }, { "epoch": 18.56929460580913, "grad_norm": 30.659833908081055, "learning_rate": 1.2576597510373445e-05, "loss": 0.6424, "step": 22376 }, { "epoch": 18.570124481327802, "grad_norm": 13.474777221679688, "learning_rate": 1.2576265560165976e-05, "loss": 0.3733, "step": 22377 }, { "epoch": 18.570954356846475, "grad_norm": 32.16291809082031, "learning_rate": 1.2575933609958508e-05, "loss": 0.4264, "step": 22378 }, { "epoch": 18.571784232365147, "grad_norm": 71.5505599975586, "learning_rate": 1.2575601659751038e-05, "loss": 0.7265, "step": 22379 }, { "epoch": 18.57261410788382, "grad_norm": 58.76626205444336, "learning_rate": 1.2575269709543569e-05, "loss": 0.8838, "step": 22380 }, { "epoch": 18.57344398340249, "grad_norm": 20.57087516784668, "learning_rate": 1.2574937759336101e-05, "loss": 0.3719, "step": 22381 }, { "epoch": 18.574273858921163, "grad_norm": 99.93038940429688, "learning_rate": 1.2574605809128633e-05, "loss": 0.4172, "step": 22382 }, { "epoch": 18.575103734439836, "grad_norm": 51.069847106933594, "learning_rate": 1.2574273858921162e-05, "loss": 0.3996, "step": 22383 }, { "epoch": 18.575933609958508, "grad_norm": 54.140296936035156, "learning_rate": 1.2573941908713694e-05, "loss": 0.3572, "step": 22384 }, { "epoch": 18.57676348547718, "grad_norm": 47.26465606689453, "learning_rate": 1.2573609958506226e-05, "loss": 0.5722, "step": 22385 }, { "epoch": 18.577593360995852, "grad_norm": 39.75824737548828, "learning_rate": 1.2573278008298756e-05, "loss": 0.5383, "step": 22386 }, { "epoch": 18.578423236514524, "grad_norm": 28.983060836791992, "learning_rate": 1.2572946058091287e-05, "loss": 0.2994, "step": 22387 }, { "epoch": 18.579253112033197, "grad_norm": 81.68976593017578, "learning_rate": 1.2572614107883819e-05, "loss": 0.3788, "step": 22388 }, { "epoch": 18.58008298755187, "grad_norm": 32.55586624145508, "learning_rate": 1.257228215767635e-05, "loss": 0.5499, "step": 22389 }, { "epoch": 18.58091286307054, "grad_norm": 60.24423599243164, "learning_rate": 1.2571950207468881e-05, "loss": 0.7293, "step": 22390 }, { "epoch": 18.581742738589213, "grad_norm": 34.956565856933594, "learning_rate": 1.257161825726141e-05, "loss": 0.4094, "step": 22391 }, { "epoch": 18.582572614107885, "grad_norm": 34.045326232910156, "learning_rate": 1.2571286307053942e-05, "loss": 0.8559, "step": 22392 }, { "epoch": 18.583402489626557, "grad_norm": 29.180391311645508, "learning_rate": 1.2570954356846474e-05, "loss": 0.4854, "step": 22393 }, { "epoch": 18.58423236514523, "grad_norm": 38.61222839355469, "learning_rate": 1.2570622406639006e-05, "loss": 0.6362, "step": 22394 }, { "epoch": 18.585062240663902, "grad_norm": 32.636234283447266, "learning_rate": 1.2570290456431535e-05, "loss": 0.3725, "step": 22395 }, { "epoch": 18.585892116182574, "grad_norm": 24.08544921875, "learning_rate": 1.2569958506224067e-05, "loss": 0.5217, "step": 22396 }, { "epoch": 18.586721991701246, "grad_norm": 22.585128784179688, "learning_rate": 1.25696265560166e-05, "loss": 0.5506, "step": 22397 }, { "epoch": 18.58755186721992, "grad_norm": 115.2188949584961, "learning_rate": 1.256929460580913e-05, "loss": 0.9166, "step": 22398 }, { "epoch": 18.58838174273859, "grad_norm": 69.24939727783203, "learning_rate": 1.256896265560166e-05, "loss": 0.4912, "step": 22399 }, { "epoch": 18.589211618257263, "grad_norm": 40.018959045410156, "learning_rate": 1.256863070539419e-05, "loss": 0.8793, "step": 22400 }, { "epoch": 18.590041493775935, "grad_norm": 249.80386352539062, "learning_rate": 1.2568298755186723e-05, "loss": 0.6811, "step": 22401 }, { "epoch": 18.590871369294607, "grad_norm": 57.194175720214844, "learning_rate": 1.2567966804979255e-05, "loss": 0.4786, "step": 22402 }, { "epoch": 18.59170124481328, "grad_norm": 73.47003936767578, "learning_rate": 1.2567634854771787e-05, "loss": 0.669, "step": 22403 }, { "epoch": 18.59253112033195, "grad_norm": 38.09138870239258, "learning_rate": 1.2567302904564316e-05, "loss": 0.439, "step": 22404 }, { "epoch": 18.593360995850624, "grad_norm": 103.27587127685547, "learning_rate": 1.2566970954356848e-05, "loss": 1.3912, "step": 22405 }, { "epoch": 18.594190871369296, "grad_norm": 48.105613708496094, "learning_rate": 1.256663900414938e-05, "loss": 0.745, "step": 22406 }, { "epoch": 18.59502074688797, "grad_norm": 28.50786590576172, "learning_rate": 1.256630705394191e-05, "loss": 0.7049, "step": 22407 }, { "epoch": 18.59585062240664, "grad_norm": 48.18037033081055, "learning_rate": 1.256597510373444e-05, "loss": 0.9708, "step": 22408 }, { "epoch": 18.596680497925313, "grad_norm": 65.03426361083984, "learning_rate": 1.2565643153526971e-05, "loss": 0.8003, "step": 22409 }, { "epoch": 18.597510373443985, "grad_norm": 15.498379707336426, "learning_rate": 1.2565311203319503e-05, "loss": 0.2699, "step": 22410 }, { "epoch": 18.598340248962657, "grad_norm": 30.946918487548828, "learning_rate": 1.2564979253112035e-05, "loss": 0.5249, "step": 22411 }, { "epoch": 18.59917012448133, "grad_norm": 59.91448211669922, "learning_rate": 1.2564647302904564e-05, "loss": 0.7397, "step": 22412 }, { "epoch": 18.6, "grad_norm": 52.785911560058594, "learning_rate": 1.2564315352697096e-05, "loss": 0.713, "step": 22413 }, { "epoch": 18.600829875518674, "grad_norm": 41.36532974243164, "learning_rate": 1.2563983402489628e-05, "loss": 0.6359, "step": 22414 }, { "epoch": 18.601659751037346, "grad_norm": 50.76171112060547, "learning_rate": 1.2563651452282159e-05, "loss": 0.5186, "step": 22415 }, { "epoch": 18.602489626556018, "grad_norm": 16.489959716796875, "learning_rate": 1.2563319502074689e-05, "loss": 0.3434, "step": 22416 }, { "epoch": 18.60331950207469, "grad_norm": 30.201263427734375, "learning_rate": 1.2562987551867221e-05, "loss": 0.3646, "step": 22417 }, { "epoch": 18.604149377593362, "grad_norm": 22.642946243286133, "learning_rate": 1.2562655601659752e-05, "loss": 0.6014, "step": 22418 }, { "epoch": 18.604979253112035, "grad_norm": 18.34067726135254, "learning_rate": 1.2562323651452284e-05, "loss": 0.3616, "step": 22419 }, { "epoch": 18.605809128630707, "grad_norm": 62.181766510009766, "learning_rate": 1.2561991701244814e-05, "loss": 1.1793, "step": 22420 }, { "epoch": 18.60663900414938, "grad_norm": 25.300886154174805, "learning_rate": 1.2561659751037345e-05, "loss": 0.661, "step": 22421 }, { "epoch": 18.60746887966805, "grad_norm": 16.6008243560791, "learning_rate": 1.2561327800829877e-05, "loss": 0.3107, "step": 22422 }, { "epoch": 18.608298755186723, "grad_norm": 27.828672409057617, "learning_rate": 1.2560995850622409e-05, "loss": 0.6251, "step": 22423 }, { "epoch": 18.609128630705396, "grad_norm": 55.71732711791992, "learning_rate": 1.2560663900414938e-05, "loss": 0.5647, "step": 22424 }, { "epoch": 18.609958506224068, "grad_norm": 38.54203796386719, "learning_rate": 1.256033195020747e-05, "loss": 0.429, "step": 22425 }, { "epoch": 18.61078838174274, "grad_norm": 17.712444305419922, "learning_rate": 1.2560000000000002e-05, "loss": 0.4416, "step": 22426 }, { "epoch": 18.611618257261412, "grad_norm": 24.997119903564453, "learning_rate": 1.2559668049792532e-05, "loss": 0.6827, "step": 22427 }, { "epoch": 18.612448132780084, "grad_norm": 27.8997859954834, "learning_rate": 1.2559336099585063e-05, "loss": 0.3923, "step": 22428 }, { "epoch": 18.613278008298757, "grad_norm": 39.57330322265625, "learning_rate": 1.2559004149377595e-05, "loss": 0.4735, "step": 22429 }, { "epoch": 18.61410788381743, "grad_norm": 56.00859832763672, "learning_rate": 1.2558672199170125e-05, "loss": 0.4609, "step": 22430 }, { "epoch": 18.6149377593361, "grad_norm": 19.327726364135742, "learning_rate": 1.2558340248962657e-05, "loss": 0.2782, "step": 22431 }, { "epoch": 18.615767634854773, "grad_norm": 59.88347625732422, "learning_rate": 1.255800829875519e-05, "loss": 0.908, "step": 22432 }, { "epoch": 18.616597510373445, "grad_norm": 32.627010345458984, "learning_rate": 1.2557676348547718e-05, "loss": 0.686, "step": 22433 }, { "epoch": 18.617427385892118, "grad_norm": 39.62940216064453, "learning_rate": 1.255734439834025e-05, "loss": 0.9592, "step": 22434 }, { "epoch": 18.61825726141079, "grad_norm": 59.50377655029297, "learning_rate": 1.2557012448132782e-05, "loss": 0.8131, "step": 22435 }, { "epoch": 18.619087136929462, "grad_norm": 12.267131805419922, "learning_rate": 1.2556680497925313e-05, "loss": 0.258, "step": 22436 }, { "epoch": 18.619917012448134, "grad_norm": 44.53223419189453, "learning_rate": 1.2556348547717843e-05, "loss": 0.7702, "step": 22437 }, { "epoch": 18.620746887966806, "grad_norm": 74.8986587524414, "learning_rate": 1.2556016597510373e-05, "loss": 0.5327, "step": 22438 }, { "epoch": 18.62157676348548, "grad_norm": 23.1192569732666, "learning_rate": 1.2555684647302906e-05, "loss": 0.4431, "step": 22439 }, { "epoch": 18.62240663900415, "grad_norm": 168.35719299316406, "learning_rate": 1.2555352697095438e-05, "loss": 1.5387, "step": 22440 }, { "epoch": 18.623236514522823, "grad_norm": 21.479394912719727, "learning_rate": 1.2555020746887966e-05, "loss": 0.3514, "step": 22441 }, { "epoch": 18.624066390041495, "grad_norm": 28.001184463500977, "learning_rate": 1.2554688796680498e-05, "loss": 0.4741, "step": 22442 }, { "epoch": 18.624896265560167, "grad_norm": 23.298185348510742, "learning_rate": 1.255435684647303e-05, "loss": 0.3995, "step": 22443 }, { "epoch": 18.62572614107884, "grad_norm": 59.207183837890625, "learning_rate": 1.2554024896265563e-05, "loss": 0.903, "step": 22444 }, { "epoch": 18.62655601659751, "grad_norm": 30.51221466064453, "learning_rate": 1.2553692946058091e-05, "loss": 0.7056, "step": 22445 }, { "epoch": 18.627385892116184, "grad_norm": 51.35770034790039, "learning_rate": 1.2553360995850624e-05, "loss": 0.4847, "step": 22446 }, { "epoch": 18.628215767634856, "grad_norm": 51.676536560058594, "learning_rate": 1.2553029045643154e-05, "loss": 0.7615, "step": 22447 }, { "epoch": 18.62904564315353, "grad_norm": 95.942138671875, "learning_rate": 1.2552697095435686e-05, "loss": 1.252, "step": 22448 }, { "epoch": 18.6298755186722, "grad_norm": 52.87224578857422, "learning_rate": 1.2552365145228216e-05, "loss": 0.3874, "step": 22449 }, { "epoch": 18.630705394190873, "grad_norm": 91.02510070800781, "learning_rate": 1.2552033195020747e-05, "loss": 0.9571, "step": 22450 }, { "epoch": 18.631535269709545, "grad_norm": 28.690351486206055, "learning_rate": 1.2551701244813279e-05, "loss": 0.415, "step": 22451 }, { "epoch": 18.632365145228217, "grad_norm": 59.13260269165039, "learning_rate": 1.2551369294605811e-05, "loss": 0.6109, "step": 22452 }, { "epoch": 18.63319502074689, "grad_norm": 28.116199493408203, "learning_rate": 1.255103734439834e-05, "loss": 0.4355, "step": 22453 }, { "epoch": 18.63402489626556, "grad_norm": 33.99360275268555, "learning_rate": 1.2550705394190872e-05, "loss": 0.5989, "step": 22454 }, { "epoch": 18.634854771784234, "grad_norm": 32.87229537963867, "learning_rate": 1.2550373443983404e-05, "loss": 0.6713, "step": 22455 }, { "epoch": 18.635684647302906, "grad_norm": 36.31761169433594, "learning_rate": 1.2550041493775934e-05, "loss": 0.5574, "step": 22456 }, { "epoch": 18.636514522821578, "grad_norm": 30.73382568359375, "learning_rate": 1.2549709543568467e-05, "loss": 0.9865, "step": 22457 }, { "epoch": 18.63734439834025, "grad_norm": 36.644710540771484, "learning_rate": 1.2549377593360997e-05, "loss": 0.5483, "step": 22458 }, { "epoch": 18.638174273858922, "grad_norm": 18.343069076538086, "learning_rate": 1.2549045643153527e-05, "loss": 0.3749, "step": 22459 }, { "epoch": 18.639004149377595, "grad_norm": 25.950754165649414, "learning_rate": 1.254871369294606e-05, "loss": 0.4173, "step": 22460 }, { "epoch": 18.639834024896267, "grad_norm": 33.568153381347656, "learning_rate": 1.2548381742738592e-05, "loss": 0.8255, "step": 22461 }, { "epoch": 18.64066390041494, "grad_norm": 19.937772750854492, "learning_rate": 1.254804979253112e-05, "loss": 0.3917, "step": 22462 }, { "epoch": 18.64149377593361, "grad_norm": 22.400096893310547, "learning_rate": 1.2547717842323652e-05, "loss": 0.2874, "step": 22463 }, { "epoch": 18.642323651452283, "grad_norm": 22.506059646606445, "learning_rate": 1.2547385892116185e-05, "loss": 0.3865, "step": 22464 }, { "epoch": 18.643153526970956, "grad_norm": 13.307305335998535, "learning_rate": 1.2547053941908715e-05, "loss": 0.2341, "step": 22465 }, { "epoch": 18.643983402489628, "grad_norm": 46.998695373535156, "learning_rate": 1.2546721991701245e-05, "loss": 0.4699, "step": 22466 }, { "epoch": 18.6448132780083, "grad_norm": 73.2284164428711, "learning_rate": 1.2546390041493777e-05, "loss": 0.6457, "step": 22467 }, { "epoch": 18.645643153526972, "grad_norm": 55.87373352050781, "learning_rate": 1.2546058091286308e-05, "loss": 0.5464, "step": 22468 }, { "epoch": 18.646473029045644, "grad_norm": 38.524986267089844, "learning_rate": 1.254572614107884e-05, "loss": 0.9516, "step": 22469 }, { "epoch": 18.647302904564317, "grad_norm": 19.996767044067383, "learning_rate": 1.2545394190871369e-05, "loss": 0.4595, "step": 22470 }, { "epoch": 18.64813278008299, "grad_norm": 48.91508865356445, "learning_rate": 1.25450622406639e-05, "loss": 0.7322, "step": 22471 }, { "epoch": 18.64896265560166, "grad_norm": 41.18612289428711, "learning_rate": 1.2544730290456433e-05, "loss": 1.4772, "step": 22472 }, { "epoch": 18.649792531120333, "grad_norm": 21.62267303466797, "learning_rate": 1.2544398340248965e-05, "loss": 0.3149, "step": 22473 }, { "epoch": 18.650622406639005, "grad_norm": 19.476736068725586, "learning_rate": 1.2544066390041494e-05, "loss": 0.3561, "step": 22474 }, { "epoch": 18.651452282157678, "grad_norm": 28.061994552612305, "learning_rate": 1.2543734439834026e-05, "loss": 0.4532, "step": 22475 }, { "epoch": 18.65228215767635, "grad_norm": 36.73426055908203, "learning_rate": 1.2543402489626558e-05, "loss": 0.8285, "step": 22476 }, { "epoch": 18.653112033195022, "grad_norm": 42.52742385864258, "learning_rate": 1.2543070539419088e-05, "loss": 0.428, "step": 22477 }, { "epoch": 18.653941908713694, "grad_norm": 26.441486358642578, "learning_rate": 1.2542738589211619e-05, "loss": 0.3552, "step": 22478 }, { "epoch": 18.654771784232366, "grad_norm": 65.51449584960938, "learning_rate": 1.254240663900415e-05, "loss": 0.4934, "step": 22479 }, { "epoch": 18.65560165975104, "grad_norm": 71.43370819091797, "learning_rate": 1.2542074688796681e-05, "loss": 0.8263, "step": 22480 }, { "epoch": 18.65643153526971, "grad_norm": 42.8465576171875, "learning_rate": 1.2541742738589213e-05, "loss": 0.6022, "step": 22481 }, { "epoch": 18.657261410788383, "grad_norm": 37.32240295410156, "learning_rate": 1.2541410788381746e-05, "loss": 0.6344, "step": 22482 }, { "epoch": 18.658091286307055, "grad_norm": 24.333410263061523, "learning_rate": 1.2541078838174274e-05, "loss": 0.4266, "step": 22483 }, { "epoch": 18.658921161825727, "grad_norm": 84.58702850341797, "learning_rate": 1.2540746887966806e-05, "loss": 1.126, "step": 22484 }, { "epoch": 18.6597510373444, "grad_norm": 89.71599578857422, "learning_rate": 1.2540414937759337e-05, "loss": 0.688, "step": 22485 }, { "epoch": 18.66058091286307, "grad_norm": 49.7054443359375, "learning_rate": 1.2540082987551869e-05, "loss": 0.4934, "step": 22486 }, { "epoch": 18.661410788381744, "grad_norm": 41.9663200378418, "learning_rate": 1.25397510373444e-05, "loss": 0.5883, "step": 22487 }, { "epoch": 18.662240663900416, "grad_norm": 18.36931610107422, "learning_rate": 1.253941908713693e-05, "loss": 0.6518, "step": 22488 }, { "epoch": 18.66307053941909, "grad_norm": 43.73106002807617, "learning_rate": 1.2539087136929462e-05, "loss": 0.7108, "step": 22489 }, { "epoch": 18.66390041493776, "grad_norm": 47.76915740966797, "learning_rate": 1.2538755186721994e-05, "loss": 0.5563, "step": 22490 }, { "epoch": 18.664730290456433, "grad_norm": 45.672306060791016, "learning_rate": 1.2538423236514523e-05, "loss": 0.4374, "step": 22491 }, { "epoch": 18.665560165975105, "grad_norm": 61.854087829589844, "learning_rate": 1.2538091286307055e-05, "loss": 0.847, "step": 22492 }, { "epoch": 18.666390041493777, "grad_norm": 72.76595306396484, "learning_rate": 1.2537759336099587e-05, "loss": 1.1497, "step": 22493 }, { "epoch": 18.66721991701245, "grad_norm": 23.645801544189453, "learning_rate": 1.2537427385892117e-05, "loss": 0.4482, "step": 22494 }, { "epoch": 18.66804979253112, "grad_norm": 17.863563537597656, "learning_rate": 1.2537095435684648e-05, "loss": 0.4013, "step": 22495 }, { "epoch": 18.668879668049794, "grad_norm": 33.197845458984375, "learning_rate": 1.253676348547718e-05, "loss": 0.5508, "step": 22496 }, { "epoch": 18.669709543568466, "grad_norm": 39.2933349609375, "learning_rate": 1.253643153526971e-05, "loss": 0.5808, "step": 22497 }, { "epoch": 18.670539419087138, "grad_norm": 73.20101928710938, "learning_rate": 1.2536099585062242e-05, "loss": 0.6254, "step": 22498 }, { "epoch": 18.67136929460581, "grad_norm": 58.96604537963867, "learning_rate": 1.2535767634854773e-05, "loss": 0.8729, "step": 22499 }, { "epoch": 18.672199170124482, "grad_norm": 69.40776062011719, "learning_rate": 1.2535435684647303e-05, "loss": 0.6205, "step": 22500 }, { "epoch": 18.673029045643155, "grad_norm": 45.254669189453125, "learning_rate": 1.2535103734439835e-05, "loss": 1.1237, "step": 22501 }, { "epoch": 18.673858921161827, "grad_norm": 41.71721267700195, "learning_rate": 1.2534771784232367e-05, "loss": 0.7211, "step": 22502 }, { "epoch": 18.6746887966805, "grad_norm": 46.89588928222656, "learning_rate": 1.2534439834024896e-05, "loss": 0.7515, "step": 22503 }, { "epoch": 18.67551867219917, "grad_norm": 42.10499572753906, "learning_rate": 1.2534107883817428e-05, "loss": 0.5865, "step": 22504 }, { "epoch": 18.676348547717843, "grad_norm": 25.20686912536621, "learning_rate": 1.253377593360996e-05, "loss": 0.3855, "step": 22505 }, { "epoch": 18.677178423236516, "grad_norm": 26.720144271850586, "learning_rate": 1.253344398340249e-05, "loss": 0.2635, "step": 22506 }, { "epoch": 18.678008298755188, "grad_norm": 55.81528091430664, "learning_rate": 1.2533112033195021e-05, "loss": 0.7491, "step": 22507 }, { "epoch": 18.67883817427386, "grad_norm": 26.34223747253418, "learning_rate": 1.2532780082987552e-05, "loss": 0.4689, "step": 22508 }, { "epoch": 18.679668049792532, "grad_norm": 12.015002250671387, "learning_rate": 1.2532448132780084e-05, "loss": 0.3133, "step": 22509 }, { "epoch": 18.680497925311204, "grad_norm": 35.28986358642578, "learning_rate": 1.2532116182572616e-05, "loss": 0.5652, "step": 22510 }, { "epoch": 18.681327800829877, "grad_norm": 35.234249114990234, "learning_rate": 1.2531784232365148e-05, "loss": 0.6165, "step": 22511 }, { "epoch": 18.68215767634855, "grad_norm": 27.367618560791016, "learning_rate": 1.2531452282157677e-05, "loss": 0.3704, "step": 22512 }, { "epoch": 18.68298755186722, "grad_norm": 50.725730895996094, "learning_rate": 1.2531120331950209e-05, "loss": 0.4465, "step": 22513 }, { "epoch": 18.683817427385893, "grad_norm": 30.783456802368164, "learning_rate": 1.253078838174274e-05, "loss": 0.4234, "step": 22514 }, { "epoch": 18.684647302904565, "grad_norm": 31.307748794555664, "learning_rate": 1.2530456431535271e-05, "loss": 0.4992, "step": 22515 }, { "epoch": 18.685477178423238, "grad_norm": 130.5441131591797, "learning_rate": 1.2530124481327802e-05, "loss": 0.6977, "step": 22516 }, { "epoch": 18.68630705394191, "grad_norm": 57.03620147705078, "learning_rate": 1.2529792531120332e-05, "loss": 0.6586, "step": 22517 }, { "epoch": 18.687136929460582, "grad_norm": 100.52288055419922, "learning_rate": 1.2529460580912864e-05, "loss": 0.6404, "step": 22518 }, { "epoch": 18.687966804979254, "grad_norm": 46.33559799194336, "learning_rate": 1.2529128630705396e-05, "loss": 1.2348, "step": 22519 }, { "epoch": 18.688796680497926, "grad_norm": 21.744709014892578, "learning_rate": 1.2528796680497925e-05, "loss": 0.4521, "step": 22520 }, { "epoch": 18.6896265560166, "grad_norm": 19.50412368774414, "learning_rate": 1.2528464730290457e-05, "loss": 0.4494, "step": 22521 }, { "epoch": 18.69045643153527, "grad_norm": 48.2598876953125, "learning_rate": 1.252813278008299e-05, "loss": 0.7065, "step": 22522 }, { "epoch": 18.691286307053943, "grad_norm": 101.56305694580078, "learning_rate": 1.2527800829875521e-05, "loss": 1.6746, "step": 22523 }, { "epoch": 18.692116182572615, "grad_norm": 21.9345760345459, "learning_rate": 1.252746887966805e-05, "loss": 0.4304, "step": 22524 }, { "epoch": 18.692946058091287, "grad_norm": 16.4401912689209, "learning_rate": 1.2527136929460582e-05, "loss": 0.3594, "step": 22525 }, { "epoch": 18.69377593360996, "grad_norm": 62.99947738647461, "learning_rate": 1.2526804979253113e-05, "loss": 0.5981, "step": 22526 }, { "epoch": 18.694605809128632, "grad_norm": 26.45430564880371, "learning_rate": 1.2526473029045645e-05, "loss": 0.4615, "step": 22527 }, { "epoch": 18.695435684647304, "grad_norm": 45.82741165161133, "learning_rate": 1.2526141078838175e-05, "loss": 0.514, "step": 22528 }, { "epoch": 18.696265560165976, "grad_norm": 35.452640533447266, "learning_rate": 1.2525809128630705e-05, "loss": 0.3543, "step": 22529 }, { "epoch": 18.69709543568465, "grad_norm": 32.7021369934082, "learning_rate": 1.2525477178423238e-05, "loss": 0.5165, "step": 22530 }, { "epoch": 18.69792531120332, "grad_norm": 40.8073616027832, "learning_rate": 1.252514522821577e-05, "loss": 0.4901, "step": 22531 }, { "epoch": 18.698755186721993, "grad_norm": 78.1683578491211, "learning_rate": 1.2524813278008298e-05, "loss": 0.4078, "step": 22532 }, { "epoch": 18.699585062240665, "grad_norm": 26.74301528930664, "learning_rate": 1.252448132780083e-05, "loss": 0.3277, "step": 22533 }, { "epoch": 18.700414937759337, "grad_norm": 38.48231506347656, "learning_rate": 1.2524149377593363e-05, "loss": 0.6015, "step": 22534 }, { "epoch": 18.70124481327801, "grad_norm": 51.56300735473633, "learning_rate": 1.2523817427385893e-05, "loss": 0.5481, "step": 22535 }, { "epoch": 18.70207468879668, "grad_norm": 61.7424201965332, "learning_rate": 1.2523485477178425e-05, "loss": 0.8817, "step": 22536 }, { "epoch": 18.702904564315354, "grad_norm": 60.04473876953125, "learning_rate": 1.2523153526970956e-05, "loss": 0.6655, "step": 22537 }, { "epoch": 18.703734439834026, "grad_norm": 94.66492462158203, "learning_rate": 1.2522821576763486e-05, "loss": 0.9421, "step": 22538 }, { "epoch": 18.704564315352698, "grad_norm": 40.40806579589844, "learning_rate": 1.2522489626556018e-05, "loss": 0.6413, "step": 22539 }, { "epoch": 18.70539419087137, "grad_norm": 23.16155242919922, "learning_rate": 1.252215767634855e-05, "loss": 0.5102, "step": 22540 }, { "epoch": 18.706224066390043, "grad_norm": 64.34619140625, "learning_rate": 1.2521825726141079e-05, "loss": 0.9764, "step": 22541 }, { "epoch": 18.707053941908715, "grad_norm": 47.14838790893555, "learning_rate": 1.2521493775933611e-05, "loss": 0.5964, "step": 22542 }, { "epoch": 18.707883817427387, "grad_norm": 66.65117645263672, "learning_rate": 1.2521161825726143e-05, "loss": 0.7605, "step": 22543 }, { "epoch": 18.70871369294606, "grad_norm": 58.949562072753906, "learning_rate": 1.2520829875518674e-05, "loss": 0.5478, "step": 22544 }, { "epoch": 18.70954356846473, "grad_norm": 77.11450958251953, "learning_rate": 1.2520497925311204e-05, "loss": 0.488, "step": 22545 }, { "epoch": 18.710373443983404, "grad_norm": 42.856658935546875, "learning_rate": 1.2520165975103736e-05, "loss": 0.6761, "step": 22546 }, { "epoch": 18.711203319502076, "grad_norm": 25.5737247467041, "learning_rate": 1.2519834024896266e-05, "loss": 0.3981, "step": 22547 }, { "epoch": 18.712033195020748, "grad_norm": 36.574649810791016, "learning_rate": 1.2519502074688799e-05, "loss": 0.4645, "step": 22548 }, { "epoch": 18.71286307053942, "grad_norm": 21.594497680664062, "learning_rate": 1.2519170124481327e-05, "loss": 0.3112, "step": 22549 }, { "epoch": 18.713692946058092, "grad_norm": 48.61298751831055, "learning_rate": 1.251883817427386e-05, "loss": 0.4353, "step": 22550 }, { "epoch": 18.714522821576764, "grad_norm": 76.8025131225586, "learning_rate": 1.2518506224066392e-05, "loss": 0.9502, "step": 22551 }, { "epoch": 18.715352697095437, "grad_norm": 77.5479736328125, "learning_rate": 1.2518174273858924e-05, "loss": 0.5354, "step": 22552 }, { "epoch": 18.71618257261411, "grad_norm": 21.451658248901367, "learning_rate": 1.2517842323651452e-05, "loss": 0.3323, "step": 22553 }, { "epoch": 18.71701244813278, "grad_norm": 34.881866455078125, "learning_rate": 1.2517510373443984e-05, "loss": 0.4914, "step": 22554 }, { "epoch": 18.717842323651453, "grad_norm": 39.888465881347656, "learning_rate": 1.2517178423236515e-05, "loss": 0.3714, "step": 22555 }, { "epoch": 18.718672199170125, "grad_norm": 93.1566390991211, "learning_rate": 1.2516846473029047e-05, "loss": 0.9332, "step": 22556 }, { "epoch": 18.719502074688798, "grad_norm": 77.95401000976562, "learning_rate": 1.2516514522821577e-05, "loss": 1.6016, "step": 22557 }, { "epoch": 18.72033195020747, "grad_norm": 64.39596557617188, "learning_rate": 1.2516182572614108e-05, "loss": 0.6814, "step": 22558 }, { "epoch": 18.721161825726142, "grad_norm": 21.41069793701172, "learning_rate": 1.251585062240664e-05, "loss": 0.4186, "step": 22559 }, { "epoch": 18.721991701244814, "grad_norm": 126.21142578125, "learning_rate": 1.2515518672199172e-05, "loss": 0.988, "step": 22560 }, { "epoch": 18.722821576763486, "grad_norm": 15.705763816833496, "learning_rate": 1.2515186721991704e-05, "loss": 0.2909, "step": 22561 }, { "epoch": 18.72365145228216, "grad_norm": 62.251312255859375, "learning_rate": 1.2514854771784233e-05, "loss": 0.9411, "step": 22562 }, { "epoch": 18.72448132780083, "grad_norm": 70.36229705810547, "learning_rate": 1.2514522821576765e-05, "loss": 0.5291, "step": 22563 }, { "epoch": 18.725311203319503, "grad_norm": 48.551246643066406, "learning_rate": 1.2514190871369295e-05, "loss": 0.7145, "step": 22564 }, { "epoch": 18.726141078838175, "grad_norm": 52.363868713378906, "learning_rate": 1.2513858921161827e-05, "loss": 0.4927, "step": 22565 }, { "epoch": 18.726970954356847, "grad_norm": 35.78392791748047, "learning_rate": 1.2513526970954358e-05, "loss": 0.425, "step": 22566 }, { "epoch": 18.72780082987552, "grad_norm": 87.84304809570312, "learning_rate": 1.2513195020746888e-05, "loss": 0.6064, "step": 22567 }, { "epoch": 18.728630705394192, "grad_norm": 50.92877960205078, "learning_rate": 1.251286307053942e-05, "loss": 0.86, "step": 22568 }, { "epoch": 18.729460580912864, "grad_norm": 47.92180252075195, "learning_rate": 1.2512531120331953e-05, "loss": 0.3105, "step": 22569 }, { "epoch": 18.730290456431536, "grad_norm": 39.00175094604492, "learning_rate": 1.2512199170124481e-05, "loss": 0.3985, "step": 22570 }, { "epoch": 18.73112033195021, "grad_norm": 33.61611557006836, "learning_rate": 1.2511867219917013e-05, "loss": 0.5371, "step": 22571 }, { "epoch": 18.73195020746888, "grad_norm": 32.31096267700195, "learning_rate": 1.2511535269709545e-05, "loss": 0.793, "step": 22572 }, { "epoch": 18.732780082987553, "grad_norm": 55.217010498046875, "learning_rate": 1.2511203319502076e-05, "loss": 0.5938, "step": 22573 }, { "epoch": 18.733609958506225, "grad_norm": 23.722797393798828, "learning_rate": 1.2510871369294606e-05, "loss": 0.3722, "step": 22574 }, { "epoch": 18.734439834024897, "grad_norm": 37.03485870361328, "learning_rate": 1.2510539419087138e-05, "loss": 0.9931, "step": 22575 }, { "epoch": 18.73526970954357, "grad_norm": 39.46662139892578, "learning_rate": 1.2510207468879669e-05, "loss": 0.5947, "step": 22576 }, { "epoch": 18.73609958506224, "grad_norm": 20.165714263916016, "learning_rate": 1.2509875518672201e-05, "loss": 1.0866, "step": 22577 }, { "epoch": 18.736929460580914, "grad_norm": 46.636314392089844, "learning_rate": 1.250954356846473e-05, "loss": 0.5291, "step": 22578 }, { "epoch": 18.737759336099586, "grad_norm": 31.389419555664062, "learning_rate": 1.2509211618257262e-05, "loss": 0.7581, "step": 22579 }, { "epoch": 18.738589211618258, "grad_norm": 31.52849769592285, "learning_rate": 1.2508879668049794e-05, "loss": 0.4941, "step": 22580 }, { "epoch": 18.73941908713693, "grad_norm": 50.22771072387695, "learning_rate": 1.2508547717842326e-05, "loss": 0.5229, "step": 22581 }, { "epoch": 18.740248962655603, "grad_norm": 38.833457946777344, "learning_rate": 1.2508215767634855e-05, "loss": 0.4877, "step": 22582 }, { "epoch": 18.741078838174275, "grad_norm": 35.200775146484375, "learning_rate": 1.2507883817427387e-05, "loss": 0.6682, "step": 22583 }, { "epoch": 18.741908713692947, "grad_norm": 28.63397979736328, "learning_rate": 1.2507551867219919e-05, "loss": 0.6017, "step": 22584 }, { "epoch": 18.74273858921162, "grad_norm": 77.52361297607422, "learning_rate": 1.250721991701245e-05, "loss": 0.6544, "step": 22585 }, { "epoch": 18.74356846473029, "grad_norm": 39.99318313598633, "learning_rate": 1.250688796680498e-05, "loss": 0.5866, "step": 22586 }, { "epoch": 18.744398340248964, "grad_norm": 19.981128692626953, "learning_rate": 1.250655601659751e-05, "loss": 0.4407, "step": 22587 }, { "epoch": 18.745228215767636, "grad_norm": 22.567363739013672, "learning_rate": 1.2506224066390042e-05, "loss": 0.4009, "step": 22588 }, { "epoch": 18.746058091286308, "grad_norm": 58.818519592285156, "learning_rate": 1.2505892116182574e-05, "loss": 0.789, "step": 22589 }, { "epoch": 18.74688796680498, "grad_norm": 43.95555114746094, "learning_rate": 1.2505560165975106e-05, "loss": 0.993, "step": 22590 }, { "epoch": 18.747717842323652, "grad_norm": 43.4552116394043, "learning_rate": 1.2505228215767635e-05, "loss": 0.5975, "step": 22591 }, { "epoch": 18.748547717842325, "grad_norm": 21.941354751586914, "learning_rate": 1.2504896265560167e-05, "loss": 0.5348, "step": 22592 }, { "epoch": 18.749377593360997, "grad_norm": 37.35282897949219, "learning_rate": 1.25045643153527e-05, "loss": 0.4577, "step": 22593 }, { "epoch": 18.75020746887967, "grad_norm": 163.62847900390625, "learning_rate": 1.250423236514523e-05, "loss": 0.4468, "step": 22594 }, { "epoch": 18.75103734439834, "grad_norm": 57.04153060913086, "learning_rate": 1.250390041493776e-05, "loss": 0.689, "step": 22595 }, { "epoch": 18.751867219917013, "grad_norm": 51.941410064697266, "learning_rate": 1.250356846473029e-05, "loss": 1.0101, "step": 22596 }, { "epoch": 18.752697095435686, "grad_norm": 40.01025390625, "learning_rate": 1.2503236514522823e-05, "loss": 0.4962, "step": 22597 }, { "epoch": 18.753526970954358, "grad_norm": 102.03974914550781, "learning_rate": 1.2502904564315355e-05, "loss": 0.54, "step": 22598 }, { "epoch": 18.75435684647303, "grad_norm": 64.3177490234375, "learning_rate": 1.2502572614107884e-05, "loss": 0.6212, "step": 22599 }, { "epoch": 18.755186721991702, "grad_norm": 32.47431564331055, "learning_rate": 1.2502240663900416e-05, "loss": 0.5591, "step": 22600 }, { "epoch": 18.756016597510374, "grad_norm": 21.120769500732422, "learning_rate": 1.2501908713692948e-05, "loss": 0.335, "step": 22601 }, { "epoch": 18.756846473029047, "grad_norm": 43.49738311767578, "learning_rate": 1.2501576763485478e-05, "loss": 1.1151, "step": 22602 }, { "epoch": 18.75767634854772, "grad_norm": 26.57257652282715, "learning_rate": 1.2501244813278009e-05, "loss": 0.428, "step": 22603 }, { "epoch": 18.75850622406639, "grad_norm": 40.13313293457031, "learning_rate": 1.250091286307054e-05, "loss": 0.6214, "step": 22604 }, { "epoch": 18.759336099585063, "grad_norm": 26.124113082885742, "learning_rate": 1.2500580912863071e-05, "loss": 0.4498, "step": 22605 }, { "epoch": 18.760165975103735, "grad_norm": 31.35296630859375, "learning_rate": 1.2500248962655603e-05, "loss": 0.4557, "step": 22606 }, { "epoch": 18.760995850622407, "grad_norm": 30.07027244567871, "learning_rate": 1.2499917012448134e-05, "loss": 0.8011, "step": 22607 }, { "epoch": 18.76182572614108, "grad_norm": 13.414512634277344, "learning_rate": 1.2499585062240664e-05, "loss": 0.2627, "step": 22608 }, { "epoch": 18.762655601659752, "grad_norm": 17.46495246887207, "learning_rate": 1.2499253112033196e-05, "loss": 0.273, "step": 22609 }, { "epoch": 18.763485477178424, "grad_norm": 71.76538848876953, "learning_rate": 1.2498921161825728e-05, "loss": 0.8313, "step": 22610 }, { "epoch": 18.764315352697096, "grad_norm": 56.16041946411133, "learning_rate": 1.2498589211618257e-05, "loss": 0.976, "step": 22611 }, { "epoch": 18.76514522821577, "grad_norm": 30.242616653442383, "learning_rate": 1.2498257261410789e-05, "loss": 0.39, "step": 22612 }, { "epoch": 18.76597510373444, "grad_norm": 10.873265266418457, "learning_rate": 1.2497925311203321e-05, "loss": 0.2916, "step": 22613 }, { "epoch": 18.766804979253113, "grad_norm": 27.037954330444336, "learning_rate": 1.2497593360995852e-05, "loss": 0.5239, "step": 22614 }, { "epoch": 18.767634854771785, "grad_norm": 83.48362731933594, "learning_rate": 1.2497261410788384e-05, "loss": 0.4005, "step": 22615 }, { "epoch": 18.768464730290457, "grad_norm": 49.64830017089844, "learning_rate": 1.2496929460580912e-05, "loss": 0.8713, "step": 22616 }, { "epoch": 18.76929460580913, "grad_norm": 57.233314514160156, "learning_rate": 1.2496597510373445e-05, "loss": 0.7532, "step": 22617 }, { "epoch": 18.7701244813278, "grad_norm": 40.898704528808594, "learning_rate": 1.2496265560165977e-05, "loss": 0.594, "step": 22618 }, { "epoch": 18.770954356846474, "grad_norm": 89.07310485839844, "learning_rate": 1.2495933609958509e-05, "loss": 0.6948, "step": 22619 }, { "epoch": 18.771784232365146, "grad_norm": 33.272674560546875, "learning_rate": 1.2495601659751037e-05, "loss": 0.5401, "step": 22620 }, { "epoch": 18.77261410788382, "grad_norm": 52.22311782836914, "learning_rate": 1.249526970954357e-05, "loss": 0.674, "step": 22621 }, { "epoch": 18.77344398340249, "grad_norm": 35.96990203857422, "learning_rate": 1.2494937759336102e-05, "loss": 0.4551, "step": 22622 }, { "epoch": 18.774273858921163, "grad_norm": 64.61392211914062, "learning_rate": 1.2494605809128632e-05, "loss": 0.2955, "step": 22623 }, { "epoch": 18.775103734439835, "grad_norm": 60.18544006347656, "learning_rate": 1.2494273858921163e-05, "loss": 0.6779, "step": 22624 }, { "epoch": 18.775933609958507, "grad_norm": 51.06595230102539, "learning_rate": 1.2493941908713693e-05, "loss": 0.9134, "step": 22625 }, { "epoch": 18.77676348547718, "grad_norm": 45.72135543823242, "learning_rate": 1.2493609958506225e-05, "loss": 0.3353, "step": 22626 }, { "epoch": 18.77759336099585, "grad_norm": 41.12207794189453, "learning_rate": 1.2493278008298757e-05, "loss": 0.6743, "step": 22627 }, { "epoch": 18.778423236514524, "grad_norm": 24.366090774536133, "learning_rate": 1.2492946058091286e-05, "loss": 0.5203, "step": 22628 }, { "epoch": 18.779253112033196, "grad_norm": 21.011775970458984, "learning_rate": 1.2492614107883818e-05, "loss": 0.3802, "step": 22629 }, { "epoch": 18.780082987551868, "grad_norm": 82.54741668701172, "learning_rate": 1.249228215767635e-05, "loss": 0.8237, "step": 22630 }, { "epoch": 18.78091286307054, "grad_norm": 39.69716262817383, "learning_rate": 1.2491950207468882e-05, "loss": 0.6395, "step": 22631 }, { "epoch": 18.781742738589212, "grad_norm": 22.974010467529297, "learning_rate": 1.2491618257261411e-05, "loss": 0.2623, "step": 22632 }, { "epoch": 18.782572614107885, "grad_norm": 58.54909896850586, "learning_rate": 1.2491286307053943e-05, "loss": 0.3587, "step": 22633 }, { "epoch": 18.783402489626557, "grad_norm": 56.70950698852539, "learning_rate": 1.2490954356846473e-05, "loss": 1.4003, "step": 22634 }, { "epoch": 18.78423236514523, "grad_norm": 48.307701110839844, "learning_rate": 1.2490622406639006e-05, "loss": 0.5659, "step": 22635 }, { "epoch": 18.7850622406639, "grad_norm": 105.62401580810547, "learning_rate": 1.2490290456431536e-05, "loss": 0.7855, "step": 22636 }, { "epoch": 18.785892116182573, "grad_norm": 82.31343841552734, "learning_rate": 1.2489958506224066e-05, "loss": 0.6069, "step": 22637 }, { "epoch": 18.786721991701246, "grad_norm": 50.52754592895508, "learning_rate": 1.2489626556016598e-05, "loss": 0.5093, "step": 22638 }, { "epoch": 18.787551867219918, "grad_norm": 89.13587951660156, "learning_rate": 1.248929460580913e-05, "loss": 0.6568, "step": 22639 }, { "epoch": 18.78838174273859, "grad_norm": 40.9959831237793, "learning_rate": 1.2488962655601663e-05, "loss": 0.4237, "step": 22640 }, { "epoch": 18.789211618257262, "grad_norm": 38.8616943359375, "learning_rate": 1.2488630705394191e-05, "loss": 0.4574, "step": 22641 }, { "epoch": 18.790041493775934, "grad_norm": 38.205081939697266, "learning_rate": 1.2488298755186724e-05, "loss": 0.6392, "step": 22642 }, { "epoch": 18.790871369294607, "grad_norm": 27.108747482299805, "learning_rate": 1.2487966804979254e-05, "loss": 0.6404, "step": 22643 }, { "epoch": 18.79170124481328, "grad_norm": 40.007591247558594, "learning_rate": 1.2487634854771786e-05, "loss": 1.0219, "step": 22644 }, { "epoch": 18.79253112033195, "grad_norm": 39.49176788330078, "learning_rate": 1.2487302904564316e-05, "loss": 0.5596, "step": 22645 }, { "epoch": 18.793360995850623, "grad_norm": 53.041202545166016, "learning_rate": 1.2486970954356847e-05, "loss": 1.1853, "step": 22646 }, { "epoch": 18.794190871369295, "grad_norm": 32.15089797973633, "learning_rate": 1.2486639004149379e-05, "loss": 0.4534, "step": 22647 }, { "epoch": 18.795020746887968, "grad_norm": 75.59168243408203, "learning_rate": 1.2486307053941911e-05, "loss": 0.5447, "step": 22648 }, { "epoch": 18.79585062240664, "grad_norm": 14.652605056762695, "learning_rate": 1.248597510373444e-05, "loss": 0.3126, "step": 22649 }, { "epoch": 18.796680497925312, "grad_norm": 30.244091033935547, "learning_rate": 1.2485643153526972e-05, "loss": 0.7864, "step": 22650 }, { "epoch": 18.797510373443984, "grad_norm": 34.735511779785156, "learning_rate": 1.2485311203319504e-05, "loss": 0.5881, "step": 22651 }, { "epoch": 18.798340248962656, "grad_norm": 49.897911071777344, "learning_rate": 1.2484979253112034e-05, "loss": 0.7337, "step": 22652 }, { "epoch": 18.79917012448133, "grad_norm": 83.88874816894531, "learning_rate": 1.2484647302904565e-05, "loss": 0.8849, "step": 22653 }, { "epoch": 18.8, "grad_norm": 26.617443084716797, "learning_rate": 1.2484315352697097e-05, "loss": 0.3554, "step": 22654 }, { "epoch": 18.800829875518673, "grad_norm": 39.65867233276367, "learning_rate": 1.2483983402489627e-05, "loss": 0.7468, "step": 22655 }, { "epoch": 18.801659751037345, "grad_norm": 51.666011810302734, "learning_rate": 1.248365145228216e-05, "loss": 0.4996, "step": 22656 }, { "epoch": 18.802489626556017, "grad_norm": 37.00661849975586, "learning_rate": 1.2483319502074688e-05, "loss": 0.4269, "step": 22657 }, { "epoch": 18.80331950207469, "grad_norm": 19.412784576416016, "learning_rate": 1.248298755186722e-05, "loss": 0.4473, "step": 22658 }, { "epoch": 18.80414937759336, "grad_norm": 49.30656433105469, "learning_rate": 1.2482655601659752e-05, "loss": 0.9129, "step": 22659 }, { "epoch": 18.804979253112034, "grad_norm": 27.6422176361084, "learning_rate": 1.2482323651452285e-05, "loss": 0.391, "step": 22660 }, { "epoch": 18.805809128630706, "grad_norm": 39.095088958740234, "learning_rate": 1.2481991701244813e-05, "loss": 0.672, "step": 22661 }, { "epoch": 18.80663900414938, "grad_norm": 274.49102783203125, "learning_rate": 1.2481659751037345e-05, "loss": 0.7087, "step": 22662 }, { "epoch": 18.80746887966805, "grad_norm": 15.125473976135254, "learning_rate": 1.2481327800829877e-05, "loss": 0.3813, "step": 22663 }, { "epoch": 18.808298755186723, "grad_norm": 32.51805114746094, "learning_rate": 1.2480995850622408e-05, "loss": 0.3027, "step": 22664 }, { "epoch": 18.809128630705395, "grad_norm": 74.59031677246094, "learning_rate": 1.2480663900414938e-05, "loss": 0.8258, "step": 22665 }, { "epoch": 18.809958506224067, "grad_norm": 18.571578979492188, "learning_rate": 1.2480331950207469e-05, "loss": 0.4585, "step": 22666 }, { "epoch": 18.81078838174274, "grad_norm": 57.95769500732422, "learning_rate": 1.248e-05, "loss": 0.7836, "step": 22667 }, { "epoch": 18.81161825726141, "grad_norm": 66.92097473144531, "learning_rate": 1.2479668049792533e-05, "loss": 0.9072, "step": 22668 }, { "epoch": 18.812448132780084, "grad_norm": 12.554108619689941, "learning_rate": 1.2479336099585065e-05, "loss": 0.3494, "step": 22669 }, { "epoch": 18.813278008298756, "grad_norm": 77.81502532958984, "learning_rate": 1.2479004149377594e-05, "loss": 0.7832, "step": 22670 }, { "epoch": 18.814107883817428, "grad_norm": 35.247413635253906, "learning_rate": 1.2478672199170126e-05, "loss": 0.6542, "step": 22671 }, { "epoch": 18.8149377593361, "grad_norm": 26.221027374267578, "learning_rate": 1.2478340248962656e-05, "loss": 0.8291, "step": 22672 }, { "epoch": 18.815767634854772, "grad_norm": 108.54642486572266, "learning_rate": 1.2478008298755188e-05, "loss": 0.9633, "step": 22673 }, { "epoch": 18.816597510373445, "grad_norm": 14.777688980102539, "learning_rate": 1.2477676348547719e-05, "loss": 0.4547, "step": 22674 }, { "epoch": 18.817427385892117, "grad_norm": 34.74509048461914, "learning_rate": 1.247734439834025e-05, "loss": 0.3802, "step": 22675 }, { "epoch": 18.81825726141079, "grad_norm": 98.78629302978516, "learning_rate": 1.2477012448132781e-05, "loss": 1.1539, "step": 22676 }, { "epoch": 18.81908713692946, "grad_norm": 90.81549072265625, "learning_rate": 1.2476680497925313e-05, "loss": 0.5499, "step": 22677 }, { "epoch": 18.819917012448133, "grad_norm": 75.88184356689453, "learning_rate": 1.2476348547717842e-05, "loss": 1.5147, "step": 22678 }, { "epoch": 18.820746887966806, "grad_norm": 17.60586929321289, "learning_rate": 1.2476016597510374e-05, "loss": 0.2832, "step": 22679 }, { "epoch": 18.821576763485478, "grad_norm": 58.899696350097656, "learning_rate": 1.2475684647302906e-05, "loss": 0.6973, "step": 22680 }, { "epoch": 18.82240663900415, "grad_norm": 32.27702713012695, "learning_rate": 1.2475352697095437e-05, "loss": 0.4471, "step": 22681 }, { "epoch": 18.823236514522822, "grad_norm": 68.9532470703125, "learning_rate": 1.2475020746887967e-05, "loss": 0.6617, "step": 22682 }, { "epoch": 18.824066390041494, "grad_norm": 74.0235824584961, "learning_rate": 1.24746887966805e-05, "loss": 0.8757, "step": 22683 }, { "epoch": 18.824896265560167, "grad_norm": 107.57428741455078, "learning_rate": 1.247435684647303e-05, "loss": 1.0223, "step": 22684 }, { "epoch": 18.82572614107884, "grad_norm": 26.467313766479492, "learning_rate": 1.2474024896265562e-05, "loss": 0.4066, "step": 22685 }, { "epoch": 18.82655601659751, "grad_norm": 17.41488265991211, "learning_rate": 1.247369294605809e-05, "loss": 0.4627, "step": 22686 }, { "epoch": 18.827385892116183, "grad_norm": 17.79334831237793, "learning_rate": 1.2473360995850623e-05, "loss": 0.2904, "step": 22687 }, { "epoch": 18.828215767634855, "grad_norm": 51.251060485839844, "learning_rate": 1.2473029045643155e-05, "loss": 0.7447, "step": 22688 }, { "epoch": 18.829045643153528, "grad_norm": 41.14412307739258, "learning_rate": 1.2472697095435687e-05, "loss": 0.4038, "step": 22689 }, { "epoch": 18.8298755186722, "grad_norm": 20.476701736450195, "learning_rate": 1.2472365145228216e-05, "loss": 0.3661, "step": 22690 }, { "epoch": 18.830705394190872, "grad_norm": 16.75266456604004, "learning_rate": 1.2472033195020748e-05, "loss": 0.2311, "step": 22691 }, { "epoch": 18.831535269709544, "grad_norm": 43.73716735839844, "learning_rate": 1.247170124481328e-05, "loss": 0.6319, "step": 22692 }, { "epoch": 18.832365145228216, "grad_norm": 23.558639526367188, "learning_rate": 1.247136929460581e-05, "loss": 0.3222, "step": 22693 }, { "epoch": 18.83319502074689, "grad_norm": 119.38928985595703, "learning_rate": 1.2471037344398342e-05, "loss": 0.6891, "step": 22694 }, { "epoch": 18.83402489626556, "grad_norm": 19.402944564819336, "learning_rate": 1.2470705394190871e-05, "loss": 0.4004, "step": 22695 }, { "epoch": 18.834854771784233, "grad_norm": 46.9070930480957, "learning_rate": 1.2470373443983403e-05, "loss": 1.1019, "step": 22696 }, { "epoch": 18.835684647302905, "grad_norm": 78.0602035522461, "learning_rate": 1.2470041493775935e-05, "loss": 0.6233, "step": 22697 }, { "epoch": 18.836514522821577, "grad_norm": 43.6992073059082, "learning_rate": 1.2469709543568467e-05, "loss": 0.6805, "step": 22698 }, { "epoch": 18.83734439834025, "grad_norm": 56.60996627807617, "learning_rate": 1.2469377593360996e-05, "loss": 0.9276, "step": 22699 }, { "epoch": 18.83817427385892, "grad_norm": 145.02243041992188, "learning_rate": 1.2469045643153528e-05, "loss": 1.4749, "step": 22700 }, { "epoch": 18.839004149377594, "grad_norm": 87.62638092041016, "learning_rate": 1.246871369294606e-05, "loss": 0.6484, "step": 22701 }, { "epoch": 18.839834024896266, "grad_norm": 98.4533920288086, "learning_rate": 1.246838174273859e-05, "loss": 1.8001, "step": 22702 }, { "epoch": 18.84066390041494, "grad_norm": 48.11466598510742, "learning_rate": 1.2468049792531121e-05, "loss": 1.3022, "step": 22703 }, { "epoch": 18.84149377593361, "grad_norm": 23.798290252685547, "learning_rate": 1.2467717842323652e-05, "loss": 0.838, "step": 22704 }, { "epoch": 18.842323651452283, "grad_norm": 29.99013900756836, "learning_rate": 1.2467385892116184e-05, "loss": 0.57, "step": 22705 }, { "epoch": 18.843153526970955, "grad_norm": 25.937074661254883, "learning_rate": 1.2467053941908716e-05, "loss": 0.4633, "step": 22706 }, { "epoch": 18.843983402489627, "grad_norm": 68.11580657958984, "learning_rate": 1.2466721991701244e-05, "loss": 0.6618, "step": 22707 }, { "epoch": 18.8448132780083, "grad_norm": 21.5389347076416, "learning_rate": 1.2466390041493777e-05, "loss": 0.2656, "step": 22708 }, { "epoch": 18.84564315352697, "grad_norm": 54.87641525268555, "learning_rate": 1.2466058091286309e-05, "loss": 1.3265, "step": 22709 }, { "epoch": 18.846473029045644, "grad_norm": 16.24810218811035, "learning_rate": 1.246572614107884e-05, "loss": 0.2647, "step": 22710 }, { "epoch": 18.847302904564316, "grad_norm": 31.072982788085938, "learning_rate": 1.246539419087137e-05, "loss": 0.5256, "step": 22711 }, { "epoch": 18.848132780082988, "grad_norm": 18.360210418701172, "learning_rate": 1.2465062240663902e-05, "loss": 0.4221, "step": 22712 }, { "epoch": 18.84896265560166, "grad_norm": 25.496746063232422, "learning_rate": 1.2464730290456432e-05, "loss": 0.5185, "step": 22713 }, { "epoch": 18.849792531120332, "grad_norm": 78.28866577148438, "learning_rate": 1.2464398340248964e-05, "loss": 0.8763, "step": 22714 }, { "epoch": 18.850622406639005, "grad_norm": 61.52127456665039, "learning_rate": 1.2464066390041495e-05, "loss": 1.3771, "step": 22715 }, { "epoch": 18.851452282157677, "grad_norm": 39.00690841674805, "learning_rate": 1.2463734439834025e-05, "loss": 0.6587, "step": 22716 }, { "epoch": 18.85228215767635, "grad_norm": 53.195587158203125, "learning_rate": 1.2463402489626557e-05, "loss": 0.5762, "step": 22717 }, { "epoch": 18.85311203319502, "grad_norm": 45.684940338134766, "learning_rate": 1.2463070539419089e-05, "loss": 0.7994, "step": 22718 }, { "epoch": 18.853941908713693, "grad_norm": 95.7150650024414, "learning_rate": 1.246273858921162e-05, "loss": 0.6433, "step": 22719 }, { "epoch": 18.854771784232366, "grad_norm": 92.61454010009766, "learning_rate": 1.246240663900415e-05, "loss": 0.8585, "step": 22720 }, { "epoch": 18.855601659751038, "grad_norm": 39.24214172363281, "learning_rate": 1.2462074688796682e-05, "loss": 0.4224, "step": 22721 }, { "epoch": 18.85643153526971, "grad_norm": 31.497880935668945, "learning_rate": 1.2461742738589212e-05, "loss": 0.5009, "step": 22722 }, { "epoch": 18.857261410788382, "grad_norm": 103.20672607421875, "learning_rate": 1.2461410788381745e-05, "loss": 0.7387, "step": 22723 }, { "epoch": 18.858091286307054, "grad_norm": 53.672035217285156, "learning_rate": 1.2461078838174275e-05, "loss": 0.5574, "step": 22724 }, { "epoch": 18.858921161825727, "grad_norm": 28.332168579101562, "learning_rate": 1.2460746887966805e-05, "loss": 0.6515, "step": 22725 }, { "epoch": 18.8597510373444, "grad_norm": 72.10359191894531, "learning_rate": 1.2460414937759338e-05, "loss": 0.3915, "step": 22726 }, { "epoch": 18.86058091286307, "grad_norm": 65.57524871826172, "learning_rate": 1.246008298755187e-05, "loss": 0.526, "step": 22727 }, { "epoch": 18.861410788381743, "grad_norm": 43.21188735961914, "learning_rate": 1.2459751037344398e-05, "loss": 0.6098, "step": 22728 }, { "epoch": 18.862240663900415, "grad_norm": 87.71617889404297, "learning_rate": 1.245941908713693e-05, "loss": 0.3969, "step": 22729 }, { "epoch": 18.863070539419088, "grad_norm": 38.81851577758789, "learning_rate": 1.2459087136929463e-05, "loss": 0.842, "step": 22730 }, { "epoch": 18.86390041493776, "grad_norm": 41.45244598388672, "learning_rate": 1.2458755186721993e-05, "loss": 0.4371, "step": 22731 }, { "epoch": 18.864730290456432, "grad_norm": 51.41645431518555, "learning_rate": 1.2458423236514523e-05, "loss": 0.792, "step": 22732 }, { "epoch": 18.865560165975104, "grad_norm": 24.513019561767578, "learning_rate": 1.2458091286307054e-05, "loss": 0.4502, "step": 22733 }, { "epoch": 18.866390041493776, "grad_norm": 82.68944549560547, "learning_rate": 1.2457759336099586e-05, "loss": 0.883, "step": 22734 }, { "epoch": 18.86721991701245, "grad_norm": 77.11145782470703, "learning_rate": 1.2457427385892118e-05, "loss": 0.9504, "step": 22735 }, { "epoch": 18.86804979253112, "grad_norm": 34.85544967651367, "learning_rate": 1.2457095435684647e-05, "loss": 0.3662, "step": 22736 }, { "epoch": 18.868879668049793, "grad_norm": 47.85881805419922, "learning_rate": 1.2456763485477179e-05, "loss": 0.8946, "step": 22737 }, { "epoch": 18.869709543568465, "grad_norm": 66.85548400878906, "learning_rate": 1.2456431535269711e-05, "loss": 0.8613, "step": 22738 }, { "epoch": 18.870539419087137, "grad_norm": 65.68043518066406, "learning_rate": 1.2456099585062243e-05, "loss": 0.5764, "step": 22739 }, { "epoch": 18.87136929460581, "grad_norm": 49.92544174194336, "learning_rate": 1.2455767634854772e-05, "loss": 0.708, "step": 22740 }, { "epoch": 18.872199170124482, "grad_norm": 19.470722198486328, "learning_rate": 1.2455435684647304e-05, "loss": 0.3989, "step": 22741 }, { "epoch": 18.873029045643154, "grad_norm": 68.94100952148438, "learning_rate": 1.2455103734439834e-05, "loss": 1.194, "step": 22742 }, { "epoch": 18.873858921161826, "grad_norm": 62.38318634033203, "learning_rate": 1.2454771784232366e-05, "loss": 1.1471, "step": 22743 }, { "epoch": 18.8746887966805, "grad_norm": 22.715059280395508, "learning_rate": 1.2454439834024897e-05, "loss": 0.5497, "step": 22744 }, { "epoch": 18.87551867219917, "grad_norm": 55.84169387817383, "learning_rate": 1.2454107883817427e-05, "loss": 0.9589, "step": 22745 }, { "epoch": 18.876348547717843, "grad_norm": 57.23863983154297, "learning_rate": 1.245377593360996e-05, "loss": 0.5402, "step": 22746 }, { "epoch": 18.877178423236515, "grad_norm": 82.72950744628906, "learning_rate": 1.2453443983402491e-05, "loss": 1.2478, "step": 22747 }, { "epoch": 18.878008298755187, "grad_norm": 23.286666870117188, "learning_rate": 1.2453112033195024e-05, "loss": 0.3816, "step": 22748 }, { "epoch": 18.87883817427386, "grad_norm": 46.592586517333984, "learning_rate": 1.2452780082987552e-05, "loss": 1.5886, "step": 22749 }, { "epoch": 18.87966804979253, "grad_norm": 31.54878807067871, "learning_rate": 1.2452448132780084e-05, "loss": 0.3186, "step": 22750 }, { "epoch": 18.880497925311204, "grad_norm": 30.11716651916504, "learning_rate": 1.2452116182572615e-05, "loss": 0.3708, "step": 22751 }, { "epoch": 18.881327800829876, "grad_norm": 35.28388977050781, "learning_rate": 1.2451784232365147e-05, "loss": 0.4904, "step": 22752 }, { "epoch": 18.882157676348548, "grad_norm": 26.83660888671875, "learning_rate": 1.2451452282157677e-05, "loss": 0.3726, "step": 22753 }, { "epoch": 18.88298755186722, "grad_norm": 30.891441345214844, "learning_rate": 1.2451120331950208e-05, "loss": 0.3838, "step": 22754 }, { "epoch": 18.883817427385893, "grad_norm": 33.094505310058594, "learning_rate": 1.245078838174274e-05, "loss": 0.8509, "step": 22755 }, { "epoch": 18.884647302904565, "grad_norm": 62.80251693725586, "learning_rate": 1.2450456431535272e-05, "loss": 0.8218, "step": 22756 }, { "epoch": 18.885477178423237, "grad_norm": 43.692665100097656, "learning_rate": 1.24501244813278e-05, "loss": 1.1076, "step": 22757 }, { "epoch": 18.88630705394191, "grad_norm": 106.97856140136719, "learning_rate": 1.2449792531120333e-05, "loss": 0.7173, "step": 22758 }, { "epoch": 18.88713692946058, "grad_norm": 61.13669204711914, "learning_rate": 1.2449460580912865e-05, "loss": 0.9369, "step": 22759 }, { "epoch": 18.887966804979254, "grad_norm": 29.328144073486328, "learning_rate": 1.2449128630705395e-05, "loss": 0.5163, "step": 22760 }, { "epoch": 18.888796680497926, "grad_norm": 40.7692985534668, "learning_rate": 1.2448796680497926e-05, "loss": 1.0275, "step": 22761 }, { "epoch": 18.889626556016598, "grad_norm": 57.22111511230469, "learning_rate": 1.2448464730290458e-05, "loss": 1.1963, "step": 22762 }, { "epoch": 18.89045643153527, "grad_norm": 14.809988021850586, "learning_rate": 1.2448132780082988e-05, "loss": 0.237, "step": 22763 }, { "epoch": 18.891286307053942, "grad_norm": 18.755931854248047, "learning_rate": 1.244780082987552e-05, "loss": 0.3508, "step": 22764 }, { "epoch": 18.892116182572614, "grad_norm": 49.7050666809082, "learning_rate": 1.2447468879668049e-05, "loss": 0.4505, "step": 22765 }, { "epoch": 18.892946058091287, "grad_norm": 52.024017333984375, "learning_rate": 1.2447136929460581e-05, "loss": 0.5085, "step": 22766 }, { "epoch": 18.89377593360996, "grad_norm": 34.28717803955078, "learning_rate": 1.2446804979253113e-05, "loss": 0.4829, "step": 22767 }, { "epoch": 18.89460580912863, "grad_norm": 46.44149398803711, "learning_rate": 1.2446473029045645e-05, "loss": 1.3356, "step": 22768 }, { "epoch": 18.895435684647303, "grad_norm": 17.385852813720703, "learning_rate": 1.2446141078838174e-05, "loss": 0.3573, "step": 22769 }, { "epoch": 18.896265560165975, "grad_norm": 53.05583572387695, "learning_rate": 1.2445809128630706e-05, "loss": 0.8365, "step": 22770 }, { "epoch": 18.897095435684648, "grad_norm": 33.49765396118164, "learning_rate": 1.2445477178423238e-05, "loss": 0.3922, "step": 22771 }, { "epoch": 18.89792531120332, "grad_norm": 57.86187744140625, "learning_rate": 1.2445145228215769e-05, "loss": 0.5041, "step": 22772 }, { "epoch": 18.898755186721992, "grad_norm": 25.01085662841797, "learning_rate": 1.2444813278008301e-05, "loss": 0.4529, "step": 22773 }, { "epoch": 18.899585062240664, "grad_norm": 84.88636016845703, "learning_rate": 1.244448132780083e-05, "loss": 1.6719, "step": 22774 }, { "epoch": 18.900414937759336, "grad_norm": 21.308883666992188, "learning_rate": 1.2444149377593362e-05, "loss": 0.4805, "step": 22775 }, { "epoch": 18.90124481327801, "grad_norm": 27.567127227783203, "learning_rate": 1.2443817427385894e-05, "loss": 0.7906, "step": 22776 }, { "epoch": 18.90207468879668, "grad_norm": 57.49617385864258, "learning_rate": 1.2443485477178426e-05, "loss": 1.409, "step": 22777 }, { "epoch": 18.902904564315353, "grad_norm": 34.5003547668457, "learning_rate": 1.2443153526970955e-05, "loss": 0.6042, "step": 22778 }, { "epoch": 18.903734439834025, "grad_norm": 46.13072967529297, "learning_rate": 1.2442821576763487e-05, "loss": 0.7203, "step": 22779 }, { "epoch": 18.904564315352697, "grad_norm": 29.933645248413086, "learning_rate": 1.2442489626556019e-05, "loss": 0.5721, "step": 22780 }, { "epoch": 18.90539419087137, "grad_norm": 19.503524780273438, "learning_rate": 1.244215767634855e-05, "loss": 0.286, "step": 22781 }, { "epoch": 18.906224066390042, "grad_norm": 83.28936004638672, "learning_rate": 1.244182572614108e-05, "loss": 0.4571, "step": 22782 }, { "epoch": 18.907053941908714, "grad_norm": 43.257049560546875, "learning_rate": 1.244149377593361e-05, "loss": 0.648, "step": 22783 }, { "epoch": 18.907883817427386, "grad_norm": 43.514984130859375, "learning_rate": 1.2441161825726142e-05, "loss": 0.6896, "step": 22784 }, { "epoch": 18.90871369294606, "grad_norm": 28.331947326660156, "learning_rate": 1.2440829875518674e-05, "loss": 0.5478, "step": 22785 }, { "epoch": 18.90954356846473, "grad_norm": 107.1787338256836, "learning_rate": 1.2440497925311203e-05, "loss": 0.7095, "step": 22786 }, { "epoch": 18.910373443983403, "grad_norm": 101.37013244628906, "learning_rate": 1.2440165975103735e-05, "loss": 0.5688, "step": 22787 }, { "epoch": 18.911203319502075, "grad_norm": 31.372493743896484, "learning_rate": 1.2439834024896267e-05, "loss": 0.6889, "step": 22788 }, { "epoch": 18.912033195020747, "grad_norm": 60.4506950378418, "learning_rate": 1.2439502074688798e-05, "loss": 0.657, "step": 22789 }, { "epoch": 18.91286307053942, "grad_norm": 42.165653228759766, "learning_rate": 1.2439170124481328e-05, "loss": 0.9124, "step": 22790 }, { "epoch": 18.91369294605809, "grad_norm": 84.47577667236328, "learning_rate": 1.243883817427386e-05, "loss": 0.4872, "step": 22791 }, { "epoch": 18.914522821576764, "grad_norm": 42.824806213378906, "learning_rate": 1.243850622406639e-05, "loss": 0.6875, "step": 22792 }, { "epoch": 18.915352697095436, "grad_norm": 53.26784896850586, "learning_rate": 1.2438174273858923e-05, "loss": 0.6427, "step": 22793 }, { "epoch": 18.916182572614108, "grad_norm": 83.66592407226562, "learning_rate": 1.2437842323651453e-05, "loss": 0.6249, "step": 22794 }, { "epoch": 18.91701244813278, "grad_norm": 18.580921173095703, "learning_rate": 1.2437510373443984e-05, "loss": 0.4001, "step": 22795 }, { "epoch": 18.917842323651453, "grad_norm": 52.7303581237793, "learning_rate": 1.2437178423236516e-05, "loss": 1.0814, "step": 22796 }, { "epoch": 18.918672199170125, "grad_norm": 16.965328216552734, "learning_rate": 1.2436846473029048e-05, "loss": 0.3058, "step": 22797 }, { "epoch": 18.919502074688797, "grad_norm": 20.90243148803711, "learning_rate": 1.2436514522821576e-05, "loss": 0.3491, "step": 22798 }, { "epoch": 18.92033195020747, "grad_norm": 24.79297637939453, "learning_rate": 1.2436182572614109e-05, "loss": 0.5287, "step": 22799 }, { "epoch": 18.92116182572614, "grad_norm": 55.92525100708008, "learning_rate": 1.243585062240664e-05, "loss": 0.5382, "step": 22800 }, { "epoch": 18.921991701244814, "grad_norm": 86.91161346435547, "learning_rate": 1.2435518672199171e-05, "loss": 0.5481, "step": 22801 }, { "epoch": 18.922821576763486, "grad_norm": 14.372282028198242, "learning_rate": 1.2435186721991703e-05, "loss": 0.3602, "step": 22802 }, { "epoch": 18.923651452282158, "grad_norm": 64.54644775390625, "learning_rate": 1.2434854771784232e-05, "loss": 0.7188, "step": 22803 }, { "epoch": 18.92448132780083, "grad_norm": 18.858196258544922, "learning_rate": 1.2434522821576764e-05, "loss": 0.3485, "step": 22804 }, { "epoch": 18.925311203319502, "grad_norm": 18.272369384765625, "learning_rate": 1.2434190871369296e-05, "loss": 0.3525, "step": 22805 }, { "epoch": 18.926141078838175, "grad_norm": 21.204538345336914, "learning_rate": 1.2433858921161828e-05, "loss": 0.371, "step": 22806 }, { "epoch": 18.926970954356847, "grad_norm": 38.25841522216797, "learning_rate": 1.2433526970954357e-05, "loss": 1.0066, "step": 22807 }, { "epoch": 18.92780082987552, "grad_norm": 54.872249603271484, "learning_rate": 1.2433195020746889e-05, "loss": 1.1422, "step": 22808 }, { "epoch": 18.92863070539419, "grad_norm": 27.223464965820312, "learning_rate": 1.2432863070539421e-05, "loss": 0.6108, "step": 22809 }, { "epoch": 18.929460580912863, "grad_norm": 28.30219841003418, "learning_rate": 1.2432531120331952e-05, "loss": 0.5914, "step": 22810 }, { "epoch": 18.930290456431536, "grad_norm": 65.53679656982422, "learning_rate": 1.2432199170124482e-05, "loss": 0.9996, "step": 22811 }, { "epoch": 18.931120331950208, "grad_norm": 41.024784088134766, "learning_rate": 1.2431867219917012e-05, "loss": 0.8048, "step": 22812 }, { "epoch": 18.93195020746888, "grad_norm": 33.651710510253906, "learning_rate": 1.2431535269709545e-05, "loss": 0.7656, "step": 22813 }, { "epoch": 18.932780082987552, "grad_norm": 53.66952896118164, "learning_rate": 1.2431203319502077e-05, "loss": 0.3813, "step": 22814 }, { "epoch": 18.933609958506224, "grad_norm": 25.967607498168945, "learning_rate": 1.2430871369294605e-05, "loss": 0.429, "step": 22815 }, { "epoch": 18.934439834024896, "grad_norm": 32.68497085571289, "learning_rate": 1.2430539419087137e-05, "loss": 0.8215, "step": 22816 }, { "epoch": 18.93526970954357, "grad_norm": 46.47458267211914, "learning_rate": 1.243020746887967e-05, "loss": 0.753, "step": 22817 }, { "epoch": 18.93609958506224, "grad_norm": 37.83744812011719, "learning_rate": 1.2429875518672202e-05, "loss": 0.4548, "step": 22818 }, { "epoch": 18.936929460580913, "grad_norm": 38.47050476074219, "learning_rate": 1.242954356846473e-05, "loss": 0.8039, "step": 22819 }, { "epoch": 18.937759336099585, "grad_norm": 66.34513092041016, "learning_rate": 1.2429211618257262e-05, "loss": 0.7011, "step": 22820 }, { "epoch": 18.938589211618257, "grad_norm": 53.55831527709961, "learning_rate": 1.2428879668049793e-05, "loss": 0.9036, "step": 22821 }, { "epoch": 18.93941908713693, "grad_norm": 37.76398849487305, "learning_rate": 1.2428547717842325e-05, "loss": 1.0401, "step": 22822 }, { "epoch": 18.940248962655602, "grad_norm": 20.25979995727539, "learning_rate": 1.2428215767634855e-05, "loss": 0.3719, "step": 22823 }, { "epoch": 18.941078838174274, "grad_norm": 44.87400817871094, "learning_rate": 1.2427883817427386e-05, "loss": 0.8342, "step": 22824 }, { "epoch": 18.941908713692946, "grad_norm": 22.51711654663086, "learning_rate": 1.2427551867219918e-05, "loss": 0.5399, "step": 22825 }, { "epoch": 18.94273858921162, "grad_norm": 60.463523864746094, "learning_rate": 1.242721991701245e-05, "loss": 0.5851, "step": 22826 }, { "epoch": 18.94356846473029, "grad_norm": 18.832529067993164, "learning_rate": 1.2426887966804982e-05, "loss": 0.2747, "step": 22827 }, { "epoch": 18.944398340248963, "grad_norm": 43.38418197631836, "learning_rate": 1.2426556016597511e-05, "loss": 0.5391, "step": 22828 }, { "epoch": 18.945228215767635, "grad_norm": 50.83173751831055, "learning_rate": 1.2426224066390043e-05, "loss": 0.3019, "step": 22829 }, { "epoch": 18.946058091286307, "grad_norm": 99.94744873046875, "learning_rate": 1.2425892116182573e-05, "loss": 1.0795, "step": 22830 }, { "epoch": 18.94688796680498, "grad_norm": 44.028648376464844, "learning_rate": 1.2425560165975106e-05, "loss": 0.6283, "step": 22831 }, { "epoch": 18.94771784232365, "grad_norm": 57.67058181762695, "learning_rate": 1.2425228215767636e-05, "loss": 0.5392, "step": 22832 }, { "epoch": 18.948547717842324, "grad_norm": 109.62345123291016, "learning_rate": 1.2424896265560166e-05, "loss": 0.4286, "step": 22833 }, { "epoch": 18.949377593360996, "grad_norm": 55.6640625, "learning_rate": 1.2424564315352698e-05, "loss": 0.9042, "step": 22834 }, { "epoch": 18.95020746887967, "grad_norm": 23.428186416625977, "learning_rate": 1.242423236514523e-05, "loss": 0.4188, "step": 22835 }, { "epoch": 18.95103734439834, "grad_norm": 26.746295928955078, "learning_rate": 1.242390041493776e-05, "loss": 0.3974, "step": 22836 }, { "epoch": 18.951867219917013, "grad_norm": 39.64632797241211, "learning_rate": 1.2423568464730291e-05, "loss": 0.2613, "step": 22837 }, { "epoch": 18.952697095435685, "grad_norm": 40.739315032958984, "learning_rate": 1.2423236514522823e-05, "loss": 0.5767, "step": 22838 }, { "epoch": 18.953526970954357, "grad_norm": 38.75146484375, "learning_rate": 1.2422904564315354e-05, "loss": 0.9746, "step": 22839 }, { "epoch": 18.95435684647303, "grad_norm": 29.27117156982422, "learning_rate": 1.2422572614107884e-05, "loss": 0.7569, "step": 22840 }, { "epoch": 18.9551867219917, "grad_norm": 34.86811447143555, "learning_rate": 1.2422240663900416e-05, "loss": 0.6832, "step": 22841 }, { "epoch": 18.956016597510374, "grad_norm": 58.96218490600586, "learning_rate": 1.2421908713692947e-05, "loss": 0.877, "step": 22842 }, { "epoch": 18.956846473029046, "grad_norm": 99.0925064086914, "learning_rate": 1.2421576763485479e-05, "loss": 0.6928, "step": 22843 }, { "epoch": 18.957676348547718, "grad_norm": 35.96626663208008, "learning_rate": 1.2421244813278008e-05, "loss": 0.7428, "step": 22844 }, { "epoch": 18.95850622406639, "grad_norm": 66.49989318847656, "learning_rate": 1.242091286307054e-05, "loss": 0.8177, "step": 22845 }, { "epoch": 18.959336099585062, "grad_norm": 62.484378814697266, "learning_rate": 1.2420580912863072e-05, "loss": 0.8539, "step": 22846 }, { "epoch": 18.960165975103735, "grad_norm": 33.04430389404297, "learning_rate": 1.2420248962655604e-05, "loss": 0.5225, "step": 22847 }, { "epoch": 18.960995850622407, "grad_norm": 70.57593536376953, "learning_rate": 1.2419917012448133e-05, "loss": 0.3423, "step": 22848 }, { "epoch": 18.96182572614108, "grad_norm": 35.25508499145508, "learning_rate": 1.2419585062240665e-05, "loss": 0.4984, "step": 22849 }, { "epoch": 18.96265560165975, "grad_norm": 45.79894256591797, "learning_rate": 1.2419253112033195e-05, "loss": 0.6444, "step": 22850 }, { "epoch": 18.963485477178423, "grad_norm": 20.447546005249023, "learning_rate": 1.2418921161825727e-05, "loss": 0.4105, "step": 22851 }, { "epoch": 18.964315352697096, "grad_norm": 66.64767456054688, "learning_rate": 1.241858921161826e-05, "loss": 0.441, "step": 22852 }, { "epoch": 18.965145228215768, "grad_norm": 31.614974975585938, "learning_rate": 1.2418257261410788e-05, "loss": 0.4305, "step": 22853 }, { "epoch": 18.96597510373444, "grad_norm": 52.25691223144531, "learning_rate": 1.241792531120332e-05, "loss": 0.4046, "step": 22854 }, { "epoch": 18.966804979253112, "grad_norm": 31.956783294677734, "learning_rate": 1.2417593360995852e-05, "loss": 0.2705, "step": 22855 }, { "epoch": 18.967634854771784, "grad_norm": 54.731590270996094, "learning_rate": 1.2417261410788384e-05, "loss": 0.7892, "step": 22856 }, { "epoch": 18.968464730290457, "grad_norm": 47.99906539916992, "learning_rate": 1.2416929460580913e-05, "loss": 0.3614, "step": 22857 }, { "epoch": 18.96929460580913, "grad_norm": 19.71210479736328, "learning_rate": 1.2416597510373445e-05, "loss": 0.3788, "step": 22858 }, { "epoch": 18.9701244813278, "grad_norm": 39.301239013671875, "learning_rate": 1.2416265560165976e-05, "loss": 0.3841, "step": 22859 }, { "epoch": 18.970954356846473, "grad_norm": 21.75003433227539, "learning_rate": 1.2415933609958508e-05, "loss": 0.4856, "step": 22860 }, { "epoch": 18.971784232365145, "grad_norm": 48.721900939941406, "learning_rate": 1.2415601659751038e-05, "loss": 0.5794, "step": 22861 }, { "epoch": 18.972614107883818, "grad_norm": 45.02616882324219, "learning_rate": 1.2415269709543569e-05, "loss": 0.7362, "step": 22862 }, { "epoch": 18.97344398340249, "grad_norm": 53.779151916503906, "learning_rate": 1.24149377593361e-05, "loss": 0.6123, "step": 22863 }, { "epoch": 18.974273858921162, "grad_norm": 69.57410430908203, "learning_rate": 1.2414605809128633e-05, "loss": 1.3805, "step": 22864 }, { "epoch": 18.975103734439834, "grad_norm": 11.597380638122559, "learning_rate": 1.2414273858921162e-05, "loss": 0.2386, "step": 22865 }, { "epoch": 18.975933609958506, "grad_norm": 42.62143325805664, "learning_rate": 1.2413941908713694e-05, "loss": 0.7828, "step": 22866 }, { "epoch": 18.97676348547718, "grad_norm": 48.96354293823242, "learning_rate": 1.2413609958506226e-05, "loss": 0.5747, "step": 22867 }, { "epoch": 18.97759336099585, "grad_norm": 74.34418487548828, "learning_rate": 1.2413278008298756e-05, "loss": 0.9955, "step": 22868 }, { "epoch": 18.978423236514523, "grad_norm": 22.309478759765625, "learning_rate": 1.2412946058091287e-05, "loss": 0.3349, "step": 22869 }, { "epoch": 18.979253112033195, "grad_norm": 42.699012756347656, "learning_rate": 1.2412614107883819e-05, "loss": 0.3854, "step": 22870 }, { "epoch": 18.980082987551867, "grad_norm": 17.696935653686523, "learning_rate": 1.2412282157676349e-05, "loss": 0.3174, "step": 22871 }, { "epoch": 18.98091286307054, "grad_norm": 81.78433227539062, "learning_rate": 1.2411950207468881e-05, "loss": 1.2542, "step": 22872 }, { "epoch": 18.98174273858921, "grad_norm": 28.675657272338867, "learning_rate": 1.241161825726141e-05, "loss": 0.8201, "step": 22873 }, { "epoch": 18.982572614107884, "grad_norm": 39.28052520751953, "learning_rate": 1.2411286307053942e-05, "loss": 0.7857, "step": 22874 }, { "epoch": 18.983402489626556, "grad_norm": 64.30847930908203, "learning_rate": 1.2410954356846474e-05, "loss": 0.5479, "step": 22875 }, { "epoch": 18.98423236514523, "grad_norm": 53.431888580322266, "learning_rate": 1.2410622406639006e-05, "loss": 1.0437, "step": 22876 }, { "epoch": 18.9850622406639, "grad_norm": 21.653444290161133, "learning_rate": 1.2410290456431535e-05, "loss": 0.4277, "step": 22877 }, { "epoch": 18.985892116182573, "grad_norm": 36.220088958740234, "learning_rate": 1.2409958506224067e-05, "loss": 0.9399, "step": 22878 }, { "epoch": 18.986721991701245, "grad_norm": 57.49355697631836, "learning_rate": 1.24096265560166e-05, "loss": 1.0465, "step": 22879 }, { "epoch": 18.987551867219917, "grad_norm": 13.661674499511719, "learning_rate": 1.240929460580913e-05, "loss": 0.2863, "step": 22880 }, { "epoch": 18.98838174273859, "grad_norm": 69.0641098022461, "learning_rate": 1.2408962655601662e-05, "loss": 0.3327, "step": 22881 }, { "epoch": 18.98921161825726, "grad_norm": 11.264206886291504, "learning_rate": 1.240863070539419e-05, "loss": 0.2672, "step": 22882 }, { "epoch": 18.990041493775934, "grad_norm": 65.4451904296875, "learning_rate": 1.2408298755186723e-05, "loss": 0.5944, "step": 22883 }, { "epoch": 18.990871369294606, "grad_norm": 33.129520416259766, "learning_rate": 1.2407966804979255e-05, "loss": 0.7635, "step": 22884 }, { "epoch": 18.991701244813278, "grad_norm": 85.66668701171875, "learning_rate": 1.2407634854771787e-05, "loss": 1.0541, "step": 22885 }, { "epoch": 18.99253112033195, "grad_norm": 17.86252212524414, "learning_rate": 1.2407302904564316e-05, "loss": 0.3447, "step": 22886 }, { "epoch": 18.993360995850622, "grad_norm": 72.97108459472656, "learning_rate": 1.2406970954356848e-05, "loss": 1.0075, "step": 22887 }, { "epoch": 18.994190871369295, "grad_norm": 56.036861419677734, "learning_rate": 1.240663900414938e-05, "loss": 0.622, "step": 22888 }, { "epoch": 18.995020746887967, "grad_norm": 26.961423873901367, "learning_rate": 1.240630705394191e-05, "loss": 0.4067, "step": 22889 }, { "epoch": 18.99585062240664, "grad_norm": 46.56235885620117, "learning_rate": 1.240597510373444e-05, "loss": 0.5304, "step": 22890 }, { "epoch": 18.99668049792531, "grad_norm": 60.707298278808594, "learning_rate": 1.2405643153526971e-05, "loss": 0.6528, "step": 22891 }, { "epoch": 18.997510373443983, "grad_norm": 19.034374237060547, "learning_rate": 1.2405311203319503e-05, "loss": 0.3055, "step": 22892 }, { "epoch": 18.998340248962656, "grad_norm": 72.64897155761719, "learning_rate": 1.2404979253112035e-05, "loss": 0.9879, "step": 22893 }, { "epoch": 18.999170124481328, "grad_norm": 36.03799819946289, "learning_rate": 1.2404647302904564e-05, "loss": 0.5429, "step": 22894 }, { "epoch": 19.0, "grad_norm": 92.94341278076172, "learning_rate": 1.2404315352697096e-05, "loss": 1.2862, "step": 22895 }, { "epoch": 19.000829875518672, "grad_norm": 15.430269241333008, "learning_rate": 1.2403983402489628e-05, "loss": 0.278, "step": 22896 }, { "epoch": 19.001659751037344, "grad_norm": 18.23659324645996, "learning_rate": 1.240365145228216e-05, "loss": 0.3188, "step": 22897 }, { "epoch": 19.002489626556017, "grad_norm": 37.80315017700195, "learning_rate": 1.2403319502074689e-05, "loss": 0.4196, "step": 22898 }, { "epoch": 19.00331950207469, "grad_norm": 31.76215171813965, "learning_rate": 1.2402987551867221e-05, "loss": 0.3819, "step": 22899 }, { "epoch": 19.00414937759336, "grad_norm": 14.774113655090332, "learning_rate": 1.2402655601659751e-05, "loss": 0.3367, "step": 22900 }, { "epoch": 19.004979253112033, "grad_norm": 71.12393188476562, "learning_rate": 1.2402323651452284e-05, "loss": 0.7866, "step": 22901 }, { "epoch": 19.005809128630705, "grad_norm": 21.069969177246094, "learning_rate": 1.2401991701244814e-05, "loss": 0.3944, "step": 22902 }, { "epoch": 19.006639004149378, "grad_norm": 45.659820556640625, "learning_rate": 1.2401659751037344e-05, "loss": 0.4452, "step": 22903 }, { "epoch": 19.00746887966805, "grad_norm": 44.02784729003906, "learning_rate": 1.2401327800829877e-05, "loss": 0.6132, "step": 22904 }, { "epoch": 19.008298755186722, "grad_norm": 27.53093147277832, "learning_rate": 1.2400995850622409e-05, "loss": 0.3081, "step": 22905 }, { "epoch": 19.009128630705394, "grad_norm": 81.73992919921875, "learning_rate": 1.2400663900414939e-05, "loss": 0.8068, "step": 22906 }, { "epoch": 19.009958506224066, "grad_norm": 49.042171478271484, "learning_rate": 1.240033195020747e-05, "loss": 0.3984, "step": 22907 }, { "epoch": 19.01078838174274, "grad_norm": 30.9971923828125, "learning_rate": 1.2400000000000002e-05, "loss": 0.5753, "step": 22908 }, { "epoch": 19.01161825726141, "grad_norm": 40.145565032958984, "learning_rate": 1.2399668049792532e-05, "loss": 0.3569, "step": 22909 }, { "epoch": 19.012448132780083, "grad_norm": 33.39128494262695, "learning_rate": 1.2399336099585064e-05, "loss": 0.4301, "step": 22910 }, { "epoch": 19.013278008298755, "grad_norm": 37.85942840576172, "learning_rate": 1.2399004149377594e-05, "loss": 0.5153, "step": 22911 }, { "epoch": 19.014107883817427, "grad_norm": 29.323766708374023, "learning_rate": 1.2398672199170125e-05, "loss": 0.3581, "step": 22912 }, { "epoch": 19.0149377593361, "grad_norm": 45.903411865234375, "learning_rate": 1.2398340248962657e-05, "loss": 0.3325, "step": 22913 }, { "epoch": 19.01576763485477, "grad_norm": 35.08662033081055, "learning_rate": 1.2398008298755189e-05, "loss": 0.8222, "step": 22914 }, { "epoch": 19.016597510373444, "grad_norm": 17.531808853149414, "learning_rate": 1.2397676348547718e-05, "loss": 0.2817, "step": 22915 }, { "epoch": 19.017427385892116, "grad_norm": 169.8944854736328, "learning_rate": 1.239734439834025e-05, "loss": 0.9143, "step": 22916 }, { "epoch": 19.01825726141079, "grad_norm": 52.84943771362305, "learning_rate": 1.2397012448132782e-05, "loss": 0.4839, "step": 22917 }, { "epoch": 19.01908713692946, "grad_norm": 140.5834503173828, "learning_rate": 1.2396680497925312e-05, "loss": 1.3644, "step": 22918 }, { "epoch": 19.019917012448133, "grad_norm": 9.878242492675781, "learning_rate": 1.2396348547717843e-05, "loss": 0.1957, "step": 22919 }, { "epoch": 19.020746887966805, "grad_norm": 41.13429260253906, "learning_rate": 1.2396016597510373e-05, "loss": 0.5662, "step": 22920 }, { "epoch": 19.021576763485477, "grad_norm": 44.56265640258789, "learning_rate": 1.2395684647302905e-05, "loss": 0.6547, "step": 22921 }, { "epoch": 19.02240663900415, "grad_norm": 21.028310775756836, "learning_rate": 1.2395352697095438e-05, "loss": 0.3402, "step": 22922 }, { "epoch": 19.02323651452282, "grad_norm": 9.542778015136719, "learning_rate": 1.2395020746887966e-05, "loss": 0.2071, "step": 22923 }, { "epoch": 19.024066390041494, "grad_norm": 36.7280158996582, "learning_rate": 1.2394688796680498e-05, "loss": 0.4101, "step": 22924 }, { "epoch": 19.024896265560166, "grad_norm": 46.50445556640625, "learning_rate": 1.239435684647303e-05, "loss": 0.8195, "step": 22925 }, { "epoch": 19.025726141078838, "grad_norm": 30.40775489807129, "learning_rate": 1.2394024896265563e-05, "loss": 0.593, "step": 22926 }, { "epoch": 19.02655601659751, "grad_norm": 56.714813232421875, "learning_rate": 1.2393692946058091e-05, "loss": 0.9829, "step": 22927 }, { "epoch": 19.027385892116182, "grad_norm": 23.189605712890625, "learning_rate": 1.2393360995850623e-05, "loss": 0.2202, "step": 22928 }, { "epoch": 19.028215767634855, "grad_norm": 33.063438415527344, "learning_rate": 1.2393029045643154e-05, "loss": 0.3289, "step": 22929 }, { "epoch": 19.029045643153527, "grad_norm": 28.42108917236328, "learning_rate": 1.2392697095435686e-05, "loss": 0.4683, "step": 22930 }, { "epoch": 19.0298755186722, "grad_norm": 41.19142532348633, "learning_rate": 1.2392365145228218e-05, "loss": 0.7237, "step": 22931 }, { "epoch": 19.03070539419087, "grad_norm": 81.71017456054688, "learning_rate": 1.2392033195020747e-05, "loss": 0.26, "step": 22932 }, { "epoch": 19.031535269709543, "grad_norm": 52.274051666259766, "learning_rate": 1.2391701244813279e-05, "loss": 0.7145, "step": 22933 }, { "epoch": 19.032365145228216, "grad_norm": 65.36959838867188, "learning_rate": 1.2391369294605811e-05, "loss": 0.5128, "step": 22934 }, { "epoch": 19.033195020746888, "grad_norm": 71.17911529541016, "learning_rate": 1.2391037344398343e-05, "loss": 0.9774, "step": 22935 }, { "epoch": 19.03402489626556, "grad_norm": 55.24815368652344, "learning_rate": 1.2390705394190872e-05, "loss": 0.4811, "step": 22936 }, { "epoch": 19.034854771784232, "grad_norm": 37.96321105957031, "learning_rate": 1.2390373443983404e-05, "loss": 0.4371, "step": 22937 }, { "epoch": 19.035684647302904, "grad_norm": 40.06789016723633, "learning_rate": 1.2390041493775934e-05, "loss": 0.6163, "step": 22938 }, { "epoch": 19.036514522821577, "grad_norm": 50.62946701049805, "learning_rate": 1.2389709543568466e-05, "loss": 0.547, "step": 22939 }, { "epoch": 19.03734439834025, "grad_norm": 38.58588409423828, "learning_rate": 1.2389377593360997e-05, "loss": 0.4373, "step": 22940 }, { "epoch": 19.03817427385892, "grad_norm": 27.56968116760254, "learning_rate": 1.2389045643153527e-05, "loss": 0.5438, "step": 22941 }, { "epoch": 19.039004149377593, "grad_norm": 21.452640533447266, "learning_rate": 1.238871369294606e-05, "loss": 0.3233, "step": 22942 }, { "epoch": 19.039834024896265, "grad_norm": 91.12928771972656, "learning_rate": 1.2388381742738591e-05, "loss": 0.4436, "step": 22943 }, { "epoch": 19.040663900414938, "grad_norm": 28.574003219604492, "learning_rate": 1.238804979253112e-05, "loss": 0.4259, "step": 22944 }, { "epoch": 19.04149377593361, "grad_norm": 18.402238845825195, "learning_rate": 1.2387717842323652e-05, "loss": 0.3039, "step": 22945 }, { "epoch": 19.042323651452282, "grad_norm": 26.597225189208984, "learning_rate": 1.2387385892116184e-05, "loss": 0.56, "step": 22946 }, { "epoch": 19.043153526970954, "grad_norm": 63.47224044799805, "learning_rate": 1.2387053941908715e-05, "loss": 0.4491, "step": 22947 }, { "epoch": 19.043983402489626, "grad_norm": 54.10134506225586, "learning_rate": 1.2386721991701245e-05, "loss": 0.7433, "step": 22948 }, { "epoch": 19.0448132780083, "grad_norm": 26.909440994262695, "learning_rate": 1.2386390041493777e-05, "loss": 0.5768, "step": 22949 }, { "epoch": 19.04564315352697, "grad_norm": 62.63896942138672, "learning_rate": 1.2386058091286308e-05, "loss": 0.8864, "step": 22950 }, { "epoch": 19.046473029045643, "grad_norm": 39.54637145996094, "learning_rate": 1.238572614107884e-05, "loss": 0.7338, "step": 22951 }, { "epoch": 19.047302904564315, "grad_norm": 25.107946395874023, "learning_rate": 1.2385394190871369e-05, "loss": 0.3598, "step": 22952 }, { "epoch": 19.048132780082987, "grad_norm": 33.25355911254883, "learning_rate": 1.23850622406639e-05, "loss": 0.63, "step": 22953 }, { "epoch": 19.04896265560166, "grad_norm": 63.03348922729492, "learning_rate": 1.2384730290456433e-05, "loss": 1.246, "step": 22954 }, { "epoch": 19.04979253112033, "grad_norm": 50.734371185302734, "learning_rate": 1.2384398340248965e-05, "loss": 0.7488, "step": 22955 }, { "epoch": 19.050622406639004, "grad_norm": 52.71772766113281, "learning_rate": 1.2384066390041494e-05, "loss": 0.5934, "step": 22956 }, { "epoch": 19.051452282157676, "grad_norm": 65.7840576171875, "learning_rate": 1.2383734439834026e-05, "loss": 0.648, "step": 22957 }, { "epoch": 19.05228215767635, "grad_norm": 41.2092170715332, "learning_rate": 1.2383402489626558e-05, "loss": 0.7281, "step": 22958 }, { "epoch": 19.05311203319502, "grad_norm": 39.45547866821289, "learning_rate": 1.2383070539419088e-05, "loss": 0.733, "step": 22959 }, { "epoch": 19.053941908713693, "grad_norm": 34.94773483276367, "learning_rate": 1.238273858921162e-05, "loss": 0.5985, "step": 22960 }, { "epoch": 19.054771784232365, "grad_norm": 30.645925521850586, "learning_rate": 1.2382406639004149e-05, "loss": 0.5327, "step": 22961 }, { "epoch": 19.055601659751037, "grad_norm": 111.03823852539062, "learning_rate": 1.2382074688796681e-05, "loss": 0.9008, "step": 22962 }, { "epoch": 19.05643153526971, "grad_norm": 12.708146095275879, "learning_rate": 1.2381742738589213e-05, "loss": 0.3253, "step": 22963 }, { "epoch": 19.05726141078838, "grad_norm": 51.79788589477539, "learning_rate": 1.2381410788381745e-05, "loss": 0.6638, "step": 22964 }, { "epoch": 19.058091286307054, "grad_norm": 8.980169296264648, "learning_rate": 1.2381078838174274e-05, "loss": 0.2254, "step": 22965 }, { "epoch": 19.058921161825726, "grad_norm": 79.9443130493164, "learning_rate": 1.2380746887966806e-05, "loss": 1.0419, "step": 22966 }, { "epoch": 19.059751037344398, "grad_norm": 46.837486267089844, "learning_rate": 1.2380414937759337e-05, "loss": 0.7611, "step": 22967 }, { "epoch": 19.06058091286307, "grad_norm": 38.29787063598633, "learning_rate": 1.2380082987551869e-05, "loss": 0.5243, "step": 22968 }, { "epoch": 19.061410788381743, "grad_norm": 55.57406234741211, "learning_rate": 1.2379751037344399e-05, "loss": 0.784, "step": 22969 }, { "epoch": 19.062240663900415, "grad_norm": 13.635222434997559, "learning_rate": 1.237941908713693e-05, "loss": 0.2969, "step": 22970 }, { "epoch": 19.063070539419087, "grad_norm": 35.49837875366211, "learning_rate": 1.2379087136929462e-05, "loss": 0.4365, "step": 22971 }, { "epoch": 19.06390041493776, "grad_norm": 34.98203659057617, "learning_rate": 1.2378755186721994e-05, "loss": 0.651, "step": 22972 }, { "epoch": 19.06473029045643, "grad_norm": 49.58867263793945, "learning_rate": 1.2378423236514522e-05, "loss": 0.4749, "step": 22973 }, { "epoch": 19.065560165975104, "grad_norm": 68.69096374511719, "learning_rate": 1.2378091286307055e-05, "loss": 0.9033, "step": 22974 }, { "epoch": 19.066390041493776, "grad_norm": 33.90522003173828, "learning_rate": 1.2377759336099587e-05, "loss": 0.6953, "step": 22975 }, { "epoch": 19.067219917012448, "grad_norm": 42.88526916503906, "learning_rate": 1.2377427385892117e-05, "loss": 0.3389, "step": 22976 }, { "epoch": 19.06804979253112, "grad_norm": 39.591800689697266, "learning_rate": 1.2377095435684648e-05, "loss": 0.568, "step": 22977 }, { "epoch": 19.068879668049792, "grad_norm": 59.2730827331543, "learning_rate": 1.237676348547718e-05, "loss": 0.5042, "step": 22978 }, { "epoch": 19.069709543568464, "grad_norm": 46.183231353759766, "learning_rate": 1.237643153526971e-05, "loss": 0.8388, "step": 22979 }, { "epoch": 19.070539419087137, "grad_norm": 159.20484924316406, "learning_rate": 1.2376099585062242e-05, "loss": 0.9459, "step": 22980 }, { "epoch": 19.07136929460581, "grad_norm": 17.04379653930664, "learning_rate": 1.2375767634854773e-05, "loss": 0.2507, "step": 22981 }, { "epoch": 19.07219917012448, "grad_norm": 65.99685668945312, "learning_rate": 1.2375435684647303e-05, "loss": 0.966, "step": 22982 }, { "epoch": 19.073029045643153, "grad_norm": 68.99566650390625, "learning_rate": 1.2375103734439835e-05, "loss": 0.9225, "step": 22983 }, { "epoch": 19.073858921161825, "grad_norm": 44.61451721191406, "learning_rate": 1.2374771784232367e-05, "loss": 0.9396, "step": 22984 }, { "epoch": 19.074688796680498, "grad_norm": 30.441179275512695, "learning_rate": 1.2374439834024898e-05, "loss": 0.5886, "step": 22985 }, { "epoch": 19.07551867219917, "grad_norm": 42.87687683105469, "learning_rate": 1.2374107883817428e-05, "loss": 0.6026, "step": 22986 }, { "epoch": 19.076348547717842, "grad_norm": 36.123146057128906, "learning_rate": 1.237377593360996e-05, "loss": 0.3971, "step": 22987 }, { "epoch": 19.077178423236514, "grad_norm": 32.427818298339844, "learning_rate": 1.237344398340249e-05, "loss": 0.5381, "step": 22988 }, { "epoch": 19.078008298755186, "grad_norm": 22.850692749023438, "learning_rate": 1.2373112033195023e-05, "loss": 0.2888, "step": 22989 }, { "epoch": 19.07883817427386, "grad_norm": 27.472503662109375, "learning_rate": 1.2372780082987551e-05, "loss": 0.4677, "step": 22990 }, { "epoch": 19.07966804979253, "grad_norm": 40.45310974121094, "learning_rate": 1.2372448132780083e-05, "loss": 0.4204, "step": 22991 }, { "epoch": 19.080497925311203, "grad_norm": 19.19632339477539, "learning_rate": 1.2372116182572616e-05, "loss": 0.3722, "step": 22992 }, { "epoch": 19.081327800829875, "grad_norm": 17.933237075805664, "learning_rate": 1.2371784232365148e-05, "loss": 0.1809, "step": 22993 }, { "epoch": 19.082157676348547, "grad_norm": 36.61989212036133, "learning_rate": 1.2371452282157676e-05, "loss": 0.6828, "step": 22994 }, { "epoch": 19.08298755186722, "grad_norm": 44.02252960205078, "learning_rate": 1.2371120331950209e-05, "loss": 0.7788, "step": 22995 }, { "epoch": 19.083817427385892, "grad_norm": 27.659997940063477, "learning_rate": 1.237078838174274e-05, "loss": 0.3587, "step": 22996 }, { "epoch": 19.084647302904564, "grad_norm": 35.646305084228516, "learning_rate": 1.2370456431535271e-05, "loss": 0.708, "step": 22997 }, { "epoch": 19.085477178423236, "grad_norm": 101.78581237792969, "learning_rate": 1.2370124481327801e-05, "loss": 0.9726, "step": 22998 }, { "epoch": 19.08630705394191, "grad_norm": 43.39297103881836, "learning_rate": 1.2369792531120332e-05, "loss": 0.4611, "step": 22999 }, { "epoch": 19.08713692946058, "grad_norm": 38.648502349853516, "learning_rate": 1.2369460580912864e-05, "loss": 0.6297, "step": 23000 }, { "epoch": 19.087966804979253, "grad_norm": 21.4343204498291, "learning_rate": 1.2369128630705396e-05, "loss": 0.3627, "step": 23001 }, { "epoch": 19.088796680497925, "grad_norm": 36.367462158203125, "learning_rate": 1.2368796680497925e-05, "loss": 0.4817, "step": 23002 }, { "epoch": 19.089626556016597, "grad_norm": 45.066925048828125, "learning_rate": 1.2368464730290457e-05, "loss": 1.0709, "step": 23003 }, { "epoch": 19.09045643153527, "grad_norm": 60.30480194091797, "learning_rate": 1.2368132780082989e-05, "loss": 0.6547, "step": 23004 }, { "epoch": 19.09128630705394, "grad_norm": 38.47522735595703, "learning_rate": 1.2367800829875521e-05, "loss": 0.4396, "step": 23005 }, { "epoch": 19.092116182572614, "grad_norm": 44.427001953125, "learning_rate": 1.236746887966805e-05, "loss": 0.4537, "step": 23006 }, { "epoch": 19.092946058091286, "grad_norm": 43.762882232666016, "learning_rate": 1.2367136929460582e-05, "loss": 0.7303, "step": 23007 }, { "epoch": 19.093775933609958, "grad_norm": 37.334571838378906, "learning_rate": 1.2366804979253112e-05, "loss": 0.9595, "step": 23008 }, { "epoch": 19.09460580912863, "grad_norm": 40.51959228515625, "learning_rate": 1.2366473029045644e-05, "loss": 0.3362, "step": 23009 }, { "epoch": 19.095435684647303, "grad_norm": 34.587501525878906, "learning_rate": 1.2366141078838177e-05, "loss": 0.2384, "step": 23010 }, { "epoch": 19.096265560165975, "grad_norm": 15.97689151763916, "learning_rate": 1.2365809128630705e-05, "loss": 0.3827, "step": 23011 }, { "epoch": 19.097095435684647, "grad_norm": 30.119752883911133, "learning_rate": 1.2365477178423237e-05, "loss": 0.4377, "step": 23012 }, { "epoch": 19.09792531120332, "grad_norm": 38.03837585449219, "learning_rate": 1.236514522821577e-05, "loss": 0.4274, "step": 23013 }, { "epoch": 19.09875518672199, "grad_norm": 36.2002067565918, "learning_rate": 1.2364813278008302e-05, "loss": 0.5854, "step": 23014 }, { "epoch": 19.099585062240664, "grad_norm": 45.776947021484375, "learning_rate": 1.236448132780083e-05, "loss": 0.3114, "step": 23015 }, { "epoch": 19.100414937759336, "grad_norm": 21.42366600036621, "learning_rate": 1.2364149377593362e-05, "loss": 0.3709, "step": 23016 }, { "epoch": 19.101244813278008, "grad_norm": 20.405336380004883, "learning_rate": 1.2363817427385893e-05, "loss": 0.5062, "step": 23017 }, { "epoch": 19.10207468879668, "grad_norm": 124.33644104003906, "learning_rate": 1.2363485477178425e-05, "loss": 0.6274, "step": 23018 }, { "epoch": 19.102904564315352, "grad_norm": 32.9005012512207, "learning_rate": 1.2363153526970955e-05, "loss": 0.4964, "step": 23019 }, { "epoch": 19.103734439834025, "grad_norm": 48.84981155395508, "learning_rate": 1.2362821576763486e-05, "loss": 0.7449, "step": 23020 }, { "epoch": 19.104564315352697, "grad_norm": 56.302833557128906, "learning_rate": 1.2362489626556018e-05, "loss": 0.9863, "step": 23021 }, { "epoch": 19.10539419087137, "grad_norm": 40.895606994628906, "learning_rate": 1.236215767634855e-05, "loss": 0.578, "step": 23022 }, { "epoch": 19.10622406639004, "grad_norm": 42.65787887573242, "learning_rate": 1.2361825726141079e-05, "loss": 0.6652, "step": 23023 }, { "epoch": 19.107053941908713, "grad_norm": 126.39088439941406, "learning_rate": 1.236149377593361e-05, "loss": 0.407, "step": 23024 }, { "epoch": 19.107883817427386, "grad_norm": 14.494391441345215, "learning_rate": 1.2361161825726143e-05, "loss": 0.3158, "step": 23025 }, { "epoch": 19.108713692946058, "grad_norm": 26.829734802246094, "learning_rate": 1.2360829875518673e-05, "loss": 0.302, "step": 23026 }, { "epoch": 19.10954356846473, "grad_norm": 28.94752311706543, "learning_rate": 1.2360497925311204e-05, "loss": 0.3992, "step": 23027 }, { "epoch": 19.110373443983402, "grad_norm": 27.167831420898438, "learning_rate": 1.2360165975103736e-05, "loss": 0.3426, "step": 23028 }, { "epoch": 19.111203319502074, "grad_norm": 54.894527435302734, "learning_rate": 1.2359834024896266e-05, "loss": 0.6875, "step": 23029 }, { "epoch": 19.112033195020746, "grad_norm": 24.23451042175293, "learning_rate": 1.2359502074688798e-05, "loss": 0.3454, "step": 23030 }, { "epoch": 19.11286307053942, "grad_norm": 11.493512153625488, "learning_rate": 1.2359170124481327e-05, "loss": 0.236, "step": 23031 }, { "epoch": 19.11369294605809, "grad_norm": 24.004850387573242, "learning_rate": 1.235883817427386e-05, "loss": 0.3213, "step": 23032 }, { "epoch": 19.114522821576763, "grad_norm": 18.767629623413086, "learning_rate": 1.2358506224066391e-05, "loss": 0.4401, "step": 23033 }, { "epoch": 19.115352697095435, "grad_norm": 52.35391616821289, "learning_rate": 1.2358174273858923e-05, "loss": 0.656, "step": 23034 }, { "epoch": 19.116182572614107, "grad_norm": 68.96157836914062, "learning_rate": 1.2357842323651452e-05, "loss": 1.1058, "step": 23035 }, { "epoch": 19.11701244813278, "grad_norm": 84.0580825805664, "learning_rate": 1.2357510373443984e-05, "loss": 1.067, "step": 23036 }, { "epoch": 19.117842323651452, "grad_norm": 42.060890197753906, "learning_rate": 1.2357178423236515e-05, "loss": 0.7007, "step": 23037 }, { "epoch": 19.118672199170124, "grad_norm": 48.10526657104492, "learning_rate": 1.2356846473029047e-05, "loss": 0.4903, "step": 23038 }, { "epoch": 19.119502074688796, "grad_norm": 31.85093879699707, "learning_rate": 1.2356514522821579e-05, "loss": 0.3174, "step": 23039 }, { "epoch": 19.12033195020747, "grad_norm": 19.19491195678711, "learning_rate": 1.2356182572614108e-05, "loss": 0.4246, "step": 23040 }, { "epoch": 19.12116182572614, "grad_norm": 14.40827465057373, "learning_rate": 1.235585062240664e-05, "loss": 0.3724, "step": 23041 }, { "epoch": 19.121991701244813, "grad_norm": 23.852508544921875, "learning_rate": 1.2355518672199172e-05, "loss": 0.5382, "step": 23042 }, { "epoch": 19.122821576763485, "grad_norm": 36.40558624267578, "learning_rate": 1.2355186721991704e-05, "loss": 0.2685, "step": 23043 }, { "epoch": 19.123651452282157, "grad_norm": 39.82513427734375, "learning_rate": 1.2354854771784233e-05, "loss": 0.6772, "step": 23044 }, { "epoch": 19.12448132780083, "grad_norm": 102.49980926513672, "learning_rate": 1.2354522821576765e-05, "loss": 0.3307, "step": 23045 }, { "epoch": 19.1253112033195, "grad_norm": 16.4138126373291, "learning_rate": 1.2354190871369295e-05, "loss": 0.364, "step": 23046 }, { "epoch": 19.126141078838174, "grad_norm": 14.07610034942627, "learning_rate": 1.2353858921161827e-05, "loss": 0.3449, "step": 23047 }, { "epoch": 19.126970954356846, "grad_norm": 39.83883285522461, "learning_rate": 1.2353526970954358e-05, "loss": 0.9319, "step": 23048 }, { "epoch": 19.127800829875518, "grad_norm": 59.419776916503906, "learning_rate": 1.2353195020746888e-05, "loss": 1.1876, "step": 23049 }, { "epoch": 19.12863070539419, "grad_norm": 32.95022964477539, "learning_rate": 1.235286307053942e-05, "loss": 0.7791, "step": 23050 }, { "epoch": 19.129460580912863, "grad_norm": 72.51676940917969, "learning_rate": 1.2352531120331952e-05, "loss": 0.5766, "step": 23051 }, { "epoch": 19.130290456431535, "grad_norm": 11.1727933883667, "learning_rate": 1.2352199170124481e-05, "loss": 0.2464, "step": 23052 }, { "epoch": 19.131120331950207, "grad_norm": 26.416194915771484, "learning_rate": 1.2351867219917013e-05, "loss": 0.3796, "step": 23053 }, { "epoch": 19.13195020746888, "grad_norm": 71.79254913330078, "learning_rate": 1.2351535269709545e-05, "loss": 1.0477, "step": 23054 }, { "epoch": 19.13278008298755, "grad_norm": 12.236696243286133, "learning_rate": 1.2351203319502076e-05, "loss": 0.36, "step": 23055 }, { "epoch": 19.133609958506224, "grad_norm": 103.77215576171875, "learning_rate": 1.2350871369294606e-05, "loss": 1.7871, "step": 23056 }, { "epoch": 19.134439834024896, "grad_norm": 94.50151062011719, "learning_rate": 1.2350539419087138e-05, "loss": 0.5418, "step": 23057 }, { "epoch": 19.135269709543568, "grad_norm": 56.9427490234375, "learning_rate": 1.2350207468879669e-05, "loss": 0.6836, "step": 23058 }, { "epoch": 19.13609958506224, "grad_norm": 9.218589782714844, "learning_rate": 1.23498755186722e-05, "loss": 0.2381, "step": 23059 }, { "epoch": 19.136929460580912, "grad_norm": 54.92517852783203, "learning_rate": 1.234954356846473e-05, "loss": 0.4919, "step": 23060 }, { "epoch": 19.137759336099585, "grad_norm": 38.08631896972656, "learning_rate": 1.2349211618257262e-05, "loss": 0.7513, "step": 23061 }, { "epoch": 19.138589211618257, "grad_norm": 58.49817657470703, "learning_rate": 1.2348879668049794e-05, "loss": 0.5307, "step": 23062 }, { "epoch": 19.13941908713693, "grad_norm": 23.492708206176758, "learning_rate": 1.2348547717842326e-05, "loss": 0.3584, "step": 23063 }, { "epoch": 19.1402489626556, "grad_norm": 36.342979431152344, "learning_rate": 1.2348215767634856e-05, "loss": 0.5406, "step": 23064 }, { "epoch": 19.141078838174273, "grad_norm": 42.62749099731445, "learning_rate": 1.2347883817427387e-05, "loss": 0.628, "step": 23065 }, { "epoch": 19.141908713692946, "grad_norm": 41.76641845703125, "learning_rate": 1.2347551867219919e-05, "loss": 0.5771, "step": 23066 }, { "epoch": 19.142738589211618, "grad_norm": 40.49495315551758, "learning_rate": 1.2347219917012449e-05, "loss": 0.4637, "step": 23067 }, { "epoch": 19.14356846473029, "grad_norm": 32.39115905761719, "learning_rate": 1.2346887966804981e-05, "loss": 0.5245, "step": 23068 }, { "epoch": 19.144398340248962, "grad_norm": 39.42250061035156, "learning_rate": 1.234655601659751e-05, "loss": 0.7432, "step": 23069 }, { "epoch": 19.145228215767634, "grad_norm": 38.209407806396484, "learning_rate": 1.2346224066390042e-05, "loss": 0.3463, "step": 23070 }, { "epoch": 19.146058091286307, "grad_norm": 54.80344772338867, "learning_rate": 1.2345892116182574e-05, "loss": 0.6652, "step": 23071 }, { "epoch": 19.14688796680498, "grad_norm": 44.927391052246094, "learning_rate": 1.2345560165975106e-05, "loss": 0.4143, "step": 23072 }, { "epoch": 19.14771784232365, "grad_norm": 18.805767059326172, "learning_rate": 1.2345228215767635e-05, "loss": 0.2941, "step": 23073 }, { "epoch": 19.148547717842323, "grad_norm": 31.524837493896484, "learning_rate": 1.2344896265560167e-05, "loss": 0.5449, "step": 23074 }, { "epoch": 19.149377593360995, "grad_norm": 50.57075500488281, "learning_rate": 1.23445643153527e-05, "loss": 0.3719, "step": 23075 }, { "epoch": 19.150207468879668, "grad_norm": 46.58750534057617, "learning_rate": 1.234423236514523e-05, "loss": 0.5974, "step": 23076 }, { "epoch": 19.15103734439834, "grad_norm": 44.9470100402832, "learning_rate": 1.234390041493776e-05, "loss": 0.8069, "step": 23077 }, { "epoch": 19.151867219917012, "grad_norm": 37.8687629699707, "learning_rate": 1.234356846473029e-05, "loss": 0.671, "step": 23078 }, { "epoch": 19.152697095435684, "grad_norm": 45.76518630981445, "learning_rate": 1.2343236514522823e-05, "loss": 0.914, "step": 23079 }, { "epoch": 19.153526970954356, "grad_norm": 45.68893051147461, "learning_rate": 1.2342904564315355e-05, "loss": 0.6023, "step": 23080 }, { "epoch": 19.15435684647303, "grad_norm": 25.910301208496094, "learning_rate": 1.2342572614107883e-05, "loss": 0.4064, "step": 23081 }, { "epoch": 19.1551867219917, "grad_norm": 25.01700782775879, "learning_rate": 1.2342240663900415e-05, "loss": 0.5172, "step": 23082 }, { "epoch": 19.156016597510373, "grad_norm": 12.537373542785645, "learning_rate": 1.2341908713692948e-05, "loss": 0.3124, "step": 23083 }, { "epoch": 19.156846473029045, "grad_norm": 37.06248474121094, "learning_rate": 1.2341576763485478e-05, "loss": 0.5578, "step": 23084 }, { "epoch": 19.157676348547717, "grad_norm": 37.57490539550781, "learning_rate": 1.2341244813278008e-05, "loss": 0.5181, "step": 23085 }, { "epoch": 19.15850622406639, "grad_norm": 22.10820770263672, "learning_rate": 1.234091286307054e-05, "loss": 0.2848, "step": 23086 }, { "epoch": 19.15933609958506, "grad_norm": 29.50404930114746, "learning_rate": 1.2340580912863071e-05, "loss": 0.6688, "step": 23087 }, { "epoch": 19.160165975103734, "grad_norm": 54.129371643066406, "learning_rate": 1.2340248962655603e-05, "loss": 0.7911, "step": 23088 }, { "epoch": 19.160995850622406, "grad_norm": 35.53643035888672, "learning_rate": 1.2339917012448135e-05, "loss": 0.4304, "step": 23089 }, { "epoch": 19.16182572614108, "grad_norm": 38.044883728027344, "learning_rate": 1.2339585062240664e-05, "loss": 0.4568, "step": 23090 }, { "epoch": 19.16265560165975, "grad_norm": 13.277424812316895, "learning_rate": 1.2339253112033196e-05, "loss": 0.2695, "step": 23091 }, { "epoch": 19.163485477178423, "grad_norm": 58.051021575927734, "learning_rate": 1.2338921161825728e-05, "loss": 0.5774, "step": 23092 }, { "epoch": 19.164315352697095, "grad_norm": 28.54776382446289, "learning_rate": 1.2338589211618259e-05, "loss": 0.314, "step": 23093 }, { "epoch": 19.165145228215767, "grad_norm": 107.42744445800781, "learning_rate": 1.2338257261410789e-05, "loss": 0.5341, "step": 23094 }, { "epoch": 19.16597510373444, "grad_norm": 28.60265350341797, "learning_rate": 1.2337925311203321e-05, "loss": 0.4843, "step": 23095 }, { "epoch": 19.16680497925311, "grad_norm": 40.126644134521484, "learning_rate": 1.2337593360995851e-05, "loss": 1.0651, "step": 23096 }, { "epoch": 19.167634854771784, "grad_norm": 39.83370590209961, "learning_rate": 1.2337261410788384e-05, "loss": 0.4448, "step": 23097 }, { "epoch": 19.168464730290456, "grad_norm": 50.99589920043945, "learning_rate": 1.2336929460580914e-05, "loss": 0.6496, "step": 23098 }, { "epoch": 19.169294605809128, "grad_norm": 50.65076446533203, "learning_rate": 1.2336597510373444e-05, "loss": 0.5796, "step": 23099 }, { "epoch": 19.1701244813278, "grad_norm": 53.1254768371582, "learning_rate": 1.2336265560165976e-05, "loss": 0.6838, "step": 23100 }, { "epoch": 19.170954356846472, "grad_norm": 65.0743179321289, "learning_rate": 1.2335933609958509e-05, "loss": 0.6774, "step": 23101 }, { "epoch": 19.171784232365145, "grad_norm": 58.52358627319336, "learning_rate": 1.2335601659751037e-05, "loss": 0.3977, "step": 23102 }, { "epoch": 19.172614107883817, "grad_norm": 29.71428680419922, "learning_rate": 1.233526970954357e-05, "loss": 0.7232, "step": 23103 }, { "epoch": 19.17344398340249, "grad_norm": 32.9535026550293, "learning_rate": 1.2334937759336102e-05, "loss": 0.5261, "step": 23104 }, { "epoch": 19.17427385892116, "grad_norm": 64.52388000488281, "learning_rate": 1.2334605809128632e-05, "loss": 0.5257, "step": 23105 }, { "epoch": 19.175103734439833, "grad_norm": 63.99011993408203, "learning_rate": 1.2334273858921162e-05, "loss": 0.6171, "step": 23106 }, { "epoch": 19.175933609958506, "grad_norm": 22.927873611450195, "learning_rate": 1.2333941908713693e-05, "loss": 0.4173, "step": 23107 }, { "epoch": 19.176763485477178, "grad_norm": 60.0294189453125, "learning_rate": 1.2333609958506225e-05, "loss": 0.5158, "step": 23108 }, { "epoch": 19.17759336099585, "grad_norm": 73.07926177978516, "learning_rate": 1.2333278008298757e-05, "loss": 0.9496, "step": 23109 }, { "epoch": 19.178423236514522, "grad_norm": 44.72026062011719, "learning_rate": 1.2332946058091286e-05, "loss": 0.4058, "step": 23110 }, { "epoch": 19.179253112033194, "grad_norm": 17.105947494506836, "learning_rate": 1.2332614107883818e-05, "loss": 0.3754, "step": 23111 }, { "epoch": 19.180082987551867, "grad_norm": 31.941062927246094, "learning_rate": 1.233228215767635e-05, "loss": 0.3138, "step": 23112 }, { "epoch": 19.18091286307054, "grad_norm": 96.25398254394531, "learning_rate": 1.2331950207468882e-05, "loss": 0.8106, "step": 23113 }, { "epoch": 19.18174273858921, "grad_norm": 144.5984344482422, "learning_rate": 1.233161825726141e-05, "loss": 1.2287, "step": 23114 }, { "epoch": 19.182572614107883, "grad_norm": 60.12741470336914, "learning_rate": 1.2331286307053943e-05, "loss": 0.7002, "step": 23115 }, { "epoch": 19.183402489626555, "grad_norm": 94.65859985351562, "learning_rate": 1.2330954356846473e-05, "loss": 0.8017, "step": 23116 }, { "epoch": 19.184232365145228, "grad_norm": 17.739608764648438, "learning_rate": 1.2330622406639005e-05, "loss": 0.3363, "step": 23117 }, { "epoch": 19.1850622406639, "grad_norm": 97.28055572509766, "learning_rate": 1.2330290456431537e-05, "loss": 0.9363, "step": 23118 }, { "epoch": 19.185892116182572, "grad_norm": 20.15096092224121, "learning_rate": 1.2329958506224066e-05, "loss": 0.3889, "step": 23119 }, { "epoch": 19.186721991701244, "grad_norm": 34.94677734375, "learning_rate": 1.2329626556016598e-05, "loss": 0.779, "step": 23120 }, { "epoch": 19.187551867219916, "grad_norm": 34.42756652832031, "learning_rate": 1.232929460580913e-05, "loss": 0.2953, "step": 23121 }, { "epoch": 19.18838174273859, "grad_norm": 24.786376953125, "learning_rate": 1.2328962655601663e-05, "loss": 0.3387, "step": 23122 }, { "epoch": 19.18921161825726, "grad_norm": 62.0858039855957, "learning_rate": 1.2328630705394191e-05, "loss": 0.5692, "step": 23123 }, { "epoch": 19.190041493775933, "grad_norm": 24.870363235473633, "learning_rate": 1.2328298755186723e-05, "loss": 0.377, "step": 23124 }, { "epoch": 19.190871369294605, "grad_norm": 53.3299674987793, "learning_rate": 1.2327966804979254e-05, "loss": 0.8337, "step": 23125 }, { "epoch": 19.191701244813277, "grad_norm": 23.67899513244629, "learning_rate": 1.2327634854771786e-05, "loss": 0.2974, "step": 23126 }, { "epoch": 19.19253112033195, "grad_norm": 49.616390228271484, "learning_rate": 1.2327302904564316e-05, "loss": 0.4547, "step": 23127 }, { "epoch": 19.19336099585062, "grad_norm": 28.637195587158203, "learning_rate": 1.2326970954356847e-05, "loss": 0.4605, "step": 23128 }, { "epoch": 19.194190871369294, "grad_norm": 32.461387634277344, "learning_rate": 1.2326639004149379e-05, "loss": 0.3868, "step": 23129 }, { "epoch": 19.195020746887966, "grad_norm": 47.12144088745117, "learning_rate": 1.2326307053941911e-05, "loss": 1.218, "step": 23130 }, { "epoch": 19.19585062240664, "grad_norm": 14.793975830078125, "learning_rate": 1.232597510373444e-05, "loss": 0.4294, "step": 23131 }, { "epoch": 19.19668049792531, "grad_norm": 28.73897361755371, "learning_rate": 1.2325643153526972e-05, "loss": 0.2812, "step": 23132 }, { "epoch": 19.197510373443983, "grad_norm": 47.81282043457031, "learning_rate": 1.2325311203319504e-05, "loss": 0.4087, "step": 23133 }, { "epoch": 19.198340248962655, "grad_norm": 63.25971984863281, "learning_rate": 1.2324979253112034e-05, "loss": 0.4183, "step": 23134 }, { "epoch": 19.199170124481327, "grad_norm": 67.42619323730469, "learning_rate": 1.2324647302904565e-05, "loss": 0.7808, "step": 23135 }, { "epoch": 19.2, "grad_norm": 100.89437866210938, "learning_rate": 1.2324315352697097e-05, "loss": 1.1046, "step": 23136 }, { "epoch": 19.20082987551867, "grad_norm": 50.828697204589844, "learning_rate": 1.2323983402489627e-05, "loss": 0.5738, "step": 23137 }, { "epoch": 19.201659751037344, "grad_norm": 19.016708374023438, "learning_rate": 1.232365145228216e-05, "loss": 0.379, "step": 23138 }, { "epoch": 19.202489626556016, "grad_norm": 29.538436889648438, "learning_rate": 1.2323319502074688e-05, "loss": 0.329, "step": 23139 }, { "epoch": 19.203319502074688, "grad_norm": 107.26972961425781, "learning_rate": 1.232298755186722e-05, "loss": 0.473, "step": 23140 }, { "epoch": 19.20414937759336, "grad_norm": 23.63834571838379, "learning_rate": 1.2322655601659752e-05, "loss": 0.542, "step": 23141 }, { "epoch": 19.204979253112032, "grad_norm": 34.91853713989258, "learning_rate": 1.2322323651452284e-05, "loss": 0.6305, "step": 23142 }, { "epoch": 19.205809128630705, "grad_norm": 23.09331512451172, "learning_rate": 1.2321991701244815e-05, "loss": 0.579, "step": 23143 }, { "epoch": 19.206639004149377, "grad_norm": 48.229888916015625, "learning_rate": 1.2321659751037345e-05, "loss": 0.3264, "step": 23144 }, { "epoch": 19.20746887966805, "grad_norm": 26.633869171142578, "learning_rate": 1.2321327800829877e-05, "loss": 0.3175, "step": 23145 }, { "epoch": 19.20829875518672, "grad_norm": 60.179935455322266, "learning_rate": 1.2320995850622408e-05, "loss": 0.4661, "step": 23146 }, { "epoch": 19.209128630705393, "grad_norm": 30.610958099365234, "learning_rate": 1.232066390041494e-05, "loss": 0.3437, "step": 23147 }, { "epoch": 19.209958506224066, "grad_norm": 19.473180770874023, "learning_rate": 1.2320331950207469e-05, "loss": 0.1856, "step": 23148 }, { "epoch": 19.210788381742738, "grad_norm": 35.88548278808594, "learning_rate": 1.232e-05, "loss": 0.6, "step": 23149 }, { "epoch": 19.21161825726141, "grad_norm": 58.67755126953125, "learning_rate": 1.2319668049792533e-05, "loss": 1.0367, "step": 23150 }, { "epoch": 19.212448132780082, "grad_norm": 27.86690902709961, "learning_rate": 1.2319336099585065e-05, "loss": 0.578, "step": 23151 }, { "epoch": 19.213278008298754, "grad_norm": 29.913223266601562, "learning_rate": 1.2319004149377594e-05, "loss": 0.421, "step": 23152 }, { "epoch": 19.214107883817427, "grad_norm": 55.56278991699219, "learning_rate": 1.2318672199170126e-05, "loss": 0.7706, "step": 23153 }, { "epoch": 19.2149377593361, "grad_norm": 45.02007293701172, "learning_rate": 1.2318340248962656e-05, "loss": 0.3959, "step": 23154 }, { "epoch": 19.21576763485477, "grad_norm": 27.869829177856445, "learning_rate": 1.2318008298755188e-05, "loss": 0.9714, "step": 23155 }, { "epoch": 19.216597510373443, "grad_norm": 32.70707321166992, "learning_rate": 1.2317676348547719e-05, "loss": 0.3905, "step": 23156 }, { "epoch": 19.217427385892115, "grad_norm": 48.899818420410156, "learning_rate": 1.2317344398340249e-05, "loss": 0.3452, "step": 23157 }, { "epoch": 19.218257261410788, "grad_norm": 50.18507385253906, "learning_rate": 1.2317012448132781e-05, "loss": 0.9031, "step": 23158 }, { "epoch": 19.21908713692946, "grad_norm": 73.30293273925781, "learning_rate": 1.2316680497925313e-05, "loss": 0.932, "step": 23159 }, { "epoch": 19.219917012448132, "grad_norm": 28.743309020996094, "learning_rate": 1.2316348547717842e-05, "loss": 0.2944, "step": 23160 }, { "epoch": 19.220746887966804, "grad_norm": 97.53907012939453, "learning_rate": 1.2316016597510374e-05, "loss": 0.5635, "step": 23161 }, { "epoch": 19.221576763485476, "grad_norm": 129.7108917236328, "learning_rate": 1.2315684647302906e-05, "loss": 1.0039, "step": 23162 }, { "epoch": 19.22240663900415, "grad_norm": 60.1115608215332, "learning_rate": 1.2315352697095437e-05, "loss": 0.6506, "step": 23163 }, { "epoch": 19.22323651452282, "grad_norm": 70.14756774902344, "learning_rate": 1.2315020746887967e-05, "loss": 0.7443, "step": 23164 }, { "epoch": 19.224066390041493, "grad_norm": 80.65087127685547, "learning_rate": 1.2314688796680499e-05, "loss": 0.6508, "step": 23165 }, { "epoch": 19.224896265560165, "grad_norm": 23.030426025390625, "learning_rate": 1.231435684647303e-05, "loss": 0.3129, "step": 23166 }, { "epoch": 19.225726141078837, "grad_norm": 27.56414222717285, "learning_rate": 1.2314024896265562e-05, "loss": 0.3119, "step": 23167 }, { "epoch": 19.22655601659751, "grad_norm": 47.35818099975586, "learning_rate": 1.2313692946058094e-05, "loss": 0.5498, "step": 23168 }, { "epoch": 19.22738589211618, "grad_norm": 52.63026428222656, "learning_rate": 1.2313360995850622e-05, "loss": 0.5738, "step": 23169 }, { "epoch": 19.228215767634854, "grad_norm": 253.192138671875, "learning_rate": 1.2313029045643155e-05, "loss": 0.3768, "step": 23170 }, { "epoch": 19.229045643153526, "grad_norm": 38.34859085083008, "learning_rate": 1.2312697095435687e-05, "loss": 0.5891, "step": 23171 }, { "epoch": 19.2298755186722, "grad_norm": 48.61481475830078, "learning_rate": 1.2312365145228217e-05, "loss": 0.8974, "step": 23172 }, { "epoch": 19.23070539419087, "grad_norm": 16.198993682861328, "learning_rate": 1.2312033195020747e-05, "loss": 0.2757, "step": 23173 }, { "epoch": 19.231535269709543, "grad_norm": 22.120281219482422, "learning_rate": 1.231170124481328e-05, "loss": 0.3833, "step": 23174 }, { "epoch": 19.232365145228215, "grad_norm": 30.141408920288086, "learning_rate": 1.231136929460581e-05, "loss": 0.6061, "step": 23175 }, { "epoch": 19.233195020746887, "grad_norm": 29.912147521972656, "learning_rate": 1.2311037344398342e-05, "loss": 0.3501, "step": 23176 }, { "epoch": 19.23402489626556, "grad_norm": 19.00522232055664, "learning_rate": 1.231070539419087e-05, "loss": 0.3962, "step": 23177 }, { "epoch": 19.23485477178423, "grad_norm": 47.54963302612305, "learning_rate": 1.2310373443983403e-05, "loss": 0.8675, "step": 23178 }, { "epoch": 19.235684647302904, "grad_norm": 40.33854675292969, "learning_rate": 1.2310041493775935e-05, "loss": 0.4776, "step": 23179 }, { "epoch": 19.236514522821576, "grad_norm": 112.31098937988281, "learning_rate": 1.2309709543568467e-05, "loss": 0.5624, "step": 23180 }, { "epoch": 19.237344398340248, "grad_norm": 59.90228271484375, "learning_rate": 1.2309377593360996e-05, "loss": 0.8873, "step": 23181 }, { "epoch": 19.23817427385892, "grad_norm": 25.6768741607666, "learning_rate": 1.2309045643153528e-05, "loss": 0.9203, "step": 23182 }, { "epoch": 19.239004149377593, "grad_norm": 35.913543701171875, "learning_rate": 1.230871369294606e-05, "loss": 0.5849, "step": 23183 }, { "epoch": 19.239834024896265, "grad_norm": 31.727466583251953, "learning_rate": 1.230838174273859e-05, "loss": 0.5118, "step": 23184 }, { "epoch": 19.240663900414937, "grad_norm": 13.980216979980469, "learning_rate": 1.2308049792531121e-05, "loss": 0.3242, "step": 23185 }, { "epoch": 19.24149377593361, "grad_norm": 17.356117248535156, "learning_rate": 1.2307717842323651e-05, "loss": 0.2861, "step": 23186 }, { "epoch": 19.24232365145228, "grad_norm": 50.8414421081543, "learning_rate": 1.2307385892116183e-05, "loss": 0.7259, "step": 23187 }, { "epoch": 19.243153526970953, "grad_norm": 103.28507995605469, "learning_rate": 1.2307053941908716e-05, "loss": 0.8317, "step": 23188 }, { "epoch": 19.243983402489626, "grad_norm": 64.6327896118164, "learning_rate": 1.2306721991701244e-05, "loss": 0.7318, "step": 23189 }, { "epoch": 19.244813278008298, "grad_norm": 37.62362289428711, "learning_rate": 1.2306390041493776e-05, "loss": 0.2684, "step": 23190 }, { "epoch": 19.24564315352697, "grad_norm": 33.95423889160156, "learning_rate": 1.2306058091286308e-05, "loss": 0.5034, "step": 23191 }, { "epoch": 19.246473029045642, "grad_norm": 47.58420944213867, "learning_rate": 1.230572614107884e-05, "loss": 0.4383, "step": 23192 }, { "epoch": 19.247302904564314, "grad_norm": 96.99313354492188, "learning_rate": 1.230539419087137e-05, "loss": 1.1047, "step": 23193 }, { "epoch": 19.248132780082987, "grad_norm": 41.29703140258789, "learning_rate": 1.2305062240663901e-05, "loss": 0.4495, "step": 23194 }, { "epoch": 19.24896265560166, "grad_norm": 31.116113662719727, "learning_rate": 1.2304730290456432e-05, "loss": 0.4322, "step": 23195 }, { "epoch": 19.24979253112033, "grad_norm": 38.25920867919922, "learning_rate": 1.2304398340248964e-05, "loss": 0.8886, "step": 23196 }, { "epoch": 19.250622406639003, "grad_norm": 34.84244155883789, "learning_rate": 1.2304066390041496e-05, "loss": 0.5127, "step": 23197 }, { "epoch": 19.251452282157675, "grad_norm": 69.47974395751953, "learning_rate": 1.2303734439834025e-05, "loss": 0.5329, "step": 23198 }, { "epoch": 19.252282157676348, "grad_norm": 49.11204147338867, "learning_rate": 1.2303402489626557e-05, "loss": 0.4222, "step": 23199 }, { "epoch": 19.25311203319502, "grad_norm": 39.46717834472656, "learning_rate": 1.2303070539419089e-05, "loss": 0.4168, "step": 23200 }, { "epoch": 19.253941908713692, "grad_norm": 53.254364013671875, "learning_rate": 1.230273858921162e-05, "loss": 0.8724, "step": 23201 }, { "epoch": 19.254771784232364, "grad_norm": 22.870634078979492, "learning_rate": 1.230240663900415e-05, "loss": 0.3909, "step": 23202 }, { "epoch": 19.255601659751036, "grad_norm": 70.68101501464844, "learning_rate": 1.2302074688796682e-05, "loss": 0.7018, "step": 23203 }, { "epoch": 19.25643153526971, "grad_norm": 18.490163803100586, "learning_rate": 1.2301742738589212e-05, "loss": 0.2803, "step": 23204 }, { "epoch": 19.25726141078838, "grad_norm": 31.071027755737305, "learning_rate": 1.2301410788381744e-05, "loss": 0.4633, "step": 23205 }, { "epoch": 19.258091286307053, "grad_norm": 30.67660140991211, "learning_rate": 1.2301078838174275e-05, "loss": 0.3953, "step": 23206 }, { "epoch": 19.258921161825725, "grad_norm": 41.29511642456055, "learning_rate": 1.2300746887966805e-05, "loss": 0.4426, "step": 23207 }, { "epoch": 19.259751037344397, "grad_norm": 38.08104705810547, "learning_rate": 1.2300414937759337e-05, "loss": 0.5102, "step": 23208 }, { "epoch": 19.26058091286307, "grad_norm": 41.60642623901367, "learning_rate": 1.230008298755187e-05, "loss": 0.4327, "step": 23209 }, { "epoch": 19.261410788381742, "grad_norm": 25.091699600219727, "learning_rate": 1.2299751037344398e-05, "loss": 0.4053, "step": 23210 }, { "epoch": 19.262240663900414, "grad_norm": 36.58946228027344, "learning_rate": 1.229941908713693e-05, "loss": 0.5786, "step": 23211 }, { "epoch": 19.263070539419086, "grad_norm": 76.92811584472656, "learning_rate": 1.2299087136929462e-05, "loss": 1.1422, "step": 23212 }, { "epoch": 19.26390041493776, "grad_norm": 56.87721633911133, "learning_rate": 1.2298755186721993e-05, "loss": 0.9578, "step": 23213 }, { "epoch": 19.26473029045643, "grad_norm": 29.097461700439453, "learning_rate": 1.2298423236514523e-05, "loss": 0.6477, "step": 23214 }, { "epoch": 19.265560165975103, "grad_norm": 42.12302780151367, "learning_rate": 1.2298091286307055e-05, "loss": 0.9583, "step": 23215 }, { "epoch": 19.266390041493775, "grad_norm": 21.618558883666992, "learning_rate": 1.2297759336099586e-05, "loss": 0.4371, "step": 23216 }, { "epoch": 19.267219917012447, "grad_norm": 23.755388259887695, "learning_rate": 1.2297427385892118e-05, "loss": 0.4686, "step": 23217 }, { "epoch": 19.26804979253112, "grad_norm": 37.525550842285156, "learning_rate": 1.2297095435684647e-05, "loss": 0.6872, "step": 23218 }, { "epoch": 19.26887966804979, "grad_norm": 15.679490089416504, "learning_rate": 1.2296763485477179e-05, "loss": 0.2468, "step": 23219 }, { "epoch": 19.269709543568464, "grad_norm": 33.64818572998047, "learning_rate": 1.229643153526971e-05, "loss": 0.3596, "step": 23220 }, { "epoch": 19.270539419087136, "grad_norm": 38.683319091796875, "learning_rate": 1.2296099585062243e-05, "loss": 0.368, "step": 23221 }, { "epoch": 19.271369294605808, "grad_norm": 18.927459716796875, "learning_rate": 1.2295767634854773e-05, "loss": 0.3409, "step": 23222 }, { "epoch": 19.27219917012448, "grad_norm": 25.752521514892578, "learning_rate": 1.2295435684647304e-05, "loss": 0.6716, "step": 23223 }, { "epoch": 19.273029045643153, "grad_norm": 22.920175552368164, "learning_rate": 1.2295103734439834e-05, "loss": 0.353, "step": 23224 }, { "epoch": 19.273858921161825, "grad_norm": 86.58354949951172, "learning_rate": 1.2294771784232366e-05, "loss": 0.6744, "step": 23225 }, { "epoch": 19.274688796680497, "grad_norm": 20.83855628967285, "learning_rate": 1.2294439834024898e-05, "loss": 0.4672, "step": 23226 }, { "epoch": 19.27551867219917, "grad_norm": 170.2249298095703, "learning_rate": 1.2294107883817427e-05, "loss": 1.2259, "step": 23227 }, { "epoch": 19.27634854771784, "grad_norm": 41.17778778076172, "learning_rate": 1.229377593360996e-05, "loss": 0.4829, "step": 23228 }, { "epoch": 19.277178423236514, "grad_norm": 192.0850830078125, "learning_rate": 1.2293443983402491e-05, "loss": 1.0024, "step": 23229 }, { "epoch": 19.278008298755186, "grad_norm": 44.390865325927734, "learning_rate": 1.2293112033195023e-05, "loss": 0.645, "step": 23230 }, { "epoch": 19.278838174273858, "grad_norm": 51.4698371887207, "learning_rate": 1.2292780082987552e-05, "loss": 1.0073, "step": 23231 }, { "epoch": 19.27966804979253, "grad_norm": 28.30317497253418, "learning_rate": 1.2292448132780084e-05, "loss": 0.4226, "step": 23232 }, { "epoch": 19.280497925311202, "grad_norm": 66.53638458251953, "learning_rate": 1.2292116182572615e-05, "loss": 0.6465, "step": 23233 }, { "epoch": 19.281327800829875, "grad_norm": 35.67771911621094, "learning_rate": 1.2291784232365147e-05, "loss": 0.5657, "step": 23234 }, { "epoch": 19.282157676348547, "grad_norm": 70.56864929199219, "learning_rate": 1.2291452282157677e-05, "loss": 1.051, "step": 23235 }, { "epoch": 19.28298755186722, "grad_norm": 53.51493835449219, "learning_rate": 1.2291120331950208e-05, "loss": 1.0354, "step": 23236 }, { "epoch": 19.28381742738589, "grad_norm": 40.897369384765625, "learning_rate": 1.229078838174274e-05, "loss": 0.279, "step": 23237 }, { "epoch": 19.284647302904563, "grad_norm": 36.03170394897461, "learning_rate": 1.2290456431535272e-05, "loss": 0.3601, "step": 23238 }, { "epoch": 19.285477178423236, "grad_norm": 25.701753616333008, "learning_rate": 1.22901244813278e-05, "loss": 0.3469, "step": 23239 }, { "epoch": 19.286307053941908, "grad_norm": 42.36179733276367, "learning_rate": 1.2289792531120333e-05, "loss": 0.7363, "step": 23240 }, { "epoch": 19.28713692946058, "grad_norm": 61.20774841308594, "learning_rate": 1.2289460580912865e-05, "loss": 1.1606, "step": 23241 }, { "epoch": 19.287966804979252, "grad_norm": 21.756866455078125, "learning_rate": 1.2289128630705395e-05, "loss": 0.388, "step": 23242 }, { "epoch": 19.288796680497924, "grad_norm": 86.39945983886719, "learning_rate": 1.2288796680497926e-05, "loss": 0.5415, "step": 23243 }, { "epoch": 19.289626556016596, "grad_norm": 26.715349197387695, "learning_rate": 1.2288464730290458e-05, "loss": 0.2276, "step": 23244 }, { "epoch": 19.29045643153527, "grad_norm": 18.034502029418945, "learning_rate": 1.2288132780082988e-05, "loss": 0.3534, "step": 23245 }, { "epoch": 19.29128630705394, "grad_norm": 26.108259201049805, "learning_rate": 1.228780082987552e-05, "loss": 0.402, "step": 23246 }, { "epoch": 19.292116182572613, "grad_norm": 49.9566764831543, "learning_rate": 1.2287468879668049e-05, "loss": 0.762, "step": 23247 }, { "epoch": 19.292946058091285, "grad_norm": 171.3915252685547, "learning_rate": 1.2287136929460581e-05, "loss": 1.9045, "step": 23248 }, { "epoch": 19.293775933609957, "grad_norm": 62.789432525634766, "learning_rate": 1.2286804979253113e-05, "loss": 0.6957, "step": 23249 }, { "epoch": 19.29460580912863, "grad_norm": 53.90299987792969, "learning_rate": 1.2286473029045645e-05, "loss": 0.413, "step": 23250 }, { "epoch": 19.295435684647302, "grad_norm": 57.33953094482422, "learning_rate": 1.2286141078838176e-05, "loss": 0.789, "step": 23251 }, { "epoch": 19.296265560165974, "grad_norm": 48.2227783203125, "learning_rate": 1.2285809128630706e-05, "loss": 0.6131, "step": 23252 }, { "epoch": 19.297095435684646, "grad_norm": 17.348180770874023, "learning_rate": 1.2285477178423238e-05, "loss": 0.3161, "step": 23253 }, { "epoch": 19.29792531120332, "grad_norm": 66.25896453857422, "learning_rate": 1.2285145228215769e-05, "loss": 1.1763, "step": 23254 }, { "epoch": 19.29875518672199, "grad_norm": 50.560062408447266, "learning_rate": 1.22848132780083e-05, "loss": 0.3104, "step": 23255 }, { "epoch": 19.299585062240663, "grad_norm": 20.460786819458008, "learning_rate": 1.228448132780083e-05, "loss": 0.4662, "step": 23256 }, { "epoch": 19.300414937759335, "grad_norm": 72.39892578125, "learning_rate": 1.2284149377593362e-05, "loss": 0.9287, "step": 23257 }, { "epoch": 19.301244813278007, "grad_norm": 13.048826217651367, "learning_rate": 1.2283817427385894e-05, "loss": 0.2056, "step": 23258 }, { "epoch": 19.30207468879668, "grad_norm": 31.886457443237305, "learning_rate": 1.2283485477178426e-05, "loss": 0.6966, "step": 23259 }, { "epoch": 19.30290456431535, "grad_norm": 32.27434158325195, "learning_rate": 1.2283153526970954e-05, "loss": 1.0229, "step": 23260 }, { "epoch": 19.303734439834024, "grad_norm": 104.26543426513672, "learning_rate": 1.2282821576763487e-05, "loss": 0.8963, "step": 23261 }, { "epoch": 19.304564315352696, "grad_norm": 45.79526138305664, "learning_rate": 1.2282489626556019e-05, "loss": 0.5271, "step": 23262 }, { "epoch": 19.305394190871368, "grad_norm": 124.48100280761719, "learning_rate": 1.2282157676348549e-05, "loss": 0.9635, "step": 23263 }, { "epoch": 19.30622406639004, "grad_norm": 29.76671028137207, "learning_rate": 1.228182572614108e-05, "loss": 0.4798, "step": 23264 }, { "epoch": 19.307053941908713, "grad_norm": 37.814056396484375, "learning_rate": 1.228149377593361e-05, "loss": 0.907, "step": 23265 }, { "epoch": 19.307883817427385, "grad_norm": 36.43086624145508, "learning_rate": 1.2281161825726142e-05, "loss": 0.7724, "step": 23266 }, { "epoch": 19.308713692946057, "grad_norm": 12.113146781921387, "learning_rate": 1.2280829875518674e-05, "loss": 0.3318, "step": 23267 }, { "epoch": 19.30954356846473, "grad_norm": 36.32527160644531, "learning_rate": 1.2280497925311203e-05, "loss": 0.739, "step": 23268 }, { "epoch": 19.3103734439834, "grad_norm": 36.151451110839844, "learning_rate": 1.2280165975103735e-05, "loss": 0.4685, "step": 23269 }, { "epoch": 19.311203319502074, "grad_norm": 26.51133155822754, "learning_rate": 1.2279834024896267e-05, "loss": 0.6248, "step": 23270 }, { "epoch": 19.312033195020746, "grad_norm": 37.6649284362793, "learning_rate": 1.2279502074688797e-05, "loss": 0.3505, "step": 23271 }, { "epoch": 19.312863070539418, "grad_norm": 47.023193359375, "learning_rate": 1.2279170124481328e-05, "loss": 0.333, "step": 23272 }, { "epoch": 19.31369294605809, "grad_norm": 58.25226593017578, "learning_rate": 1.227883817427386e-05, "loss": 0.7951, "step": 23273 }, { "epoch": 19.314522821576762, "grad_norm": 43.50238800048828, "learning_rate": 1.227850622406639e-05, "loss": 0.3345, "step": 23274 }, { "epoch": 19.315352697095435, "grad_norm": 21.774797439575195, "learning_rate": 1.2278174273858923e-05, "loss": 0.6309, "step": 23275 }, { "epoch": 19.316182572614107, "grad_norm": 11.356110572814941, "learning_rate": 1.2277842323651455e-05, "loss": 0.3378, "step": 23276 }, { "epoch": 19.31701244813278, "grad_norm": 43.396732330322266, "learning_rate": 1.2277510373443983e-05, "loss": 0.6798, "step": 23277 }, { "epoch": 19.31784232365145, "grad_norm": 24.821922302246094, "learning_rate": 1.2277178423236515e-05, "loss": 0.5785, "step": 23278 }, { "epoch": 19.318672199170123, "grad_norm": 29.091161727905273, "learning_rate": 1.2276846473029048e-05, "loss": 0.746, "step": 23279 }, { "epoch": 19.319502074688796, "grad_norm": 49.62974548339844, "learning_rate": 1.2276514522821578e-05, "loss": 0.5404, "step": 23280 }, { "epoch": 19.320331950207468, "grad_norm": 25.027902603149414, "learning_rate": 1.2276182572614108e-05, "loss": 0.2963, "step": 23281 }, { "epoch": 19.32116182572614, "grad_norm": 40.85527420043945, "learning_rate": 1.227585062240664e-05, "loss": 0.5961, "step": 23282 }, { "epoch": 19.321991701244812, "grad_norm": 31.273441314697266, "learning_rate": 1.2275518672199171e-05, "loss": 0.6498, "step": 23283 }, { "epoch": 19.322821576763484, "grad_norm": 22.94335174560547, "learning_rate": 1.2275186721991703e-05, "loss": 0.424, "step": 23284 }, { "epoch": 19.323651452282157, "grad_norm": 56.53791046142578, "learning_rate": 1.2274854771784232e-05, "loss": 0.587, "step": 23285 }, { "epoch": 19.32448132780083, "grad_norm": 36.49992370605469, "learning_rate": 1.2274522821576764e-05, "loss": 0.4423, "step": 23286 }, { "epoch": 19.3253112033195, "grad_norm": 27.52719497680664, "learning_rate": 1.2274190871369296e-05, "loss": 0.4285, "step": 23287 }, { "epoch": 19.326141078838173, "grad_norm": 129.93264770507812, "learning_rate": 1.2273858921161828e-05, "loss": 1.0187, "step": 23288 }, { "epoch": 19.326970954356845, "grad_norm": 34.75791549682617, "learning_rate": 1.2273526970954357e-05, "loss": 0.8506, "step": 23289 }, { "epoch": 19.327800829875518, "grad_norm": 32.06393814086914, "learning_rate": 1.2273195020746889e-05, "loss": 0.4824, "step": 23290 }, { "epoch": 19.32863070539419, "grad_norm": 23.862672805786133, "learning_rate": 1.2272863070539421e-05, "loss": 0.4377, "step": 23291 }, { "epoch": 19.329460580912862, "grad_norm": 57.66351318359375, "learning_rate": 1.2272531120331951e-05, "loss": 0.4709, "step": 23292 }, { "epoch": 19.330290456431534, "grad_norm": 52.07065963745117, "learning_rate": 1.2272199170124482e-05, "loss": 0.5898, "step": 23293 }, { "epoch": 19.331120331950206, "grad_norm": 40.979740142822266, "learning_rate": 1.2271867219917012e-05, "loss": 0.3631, "step": 23294 }, { "epoch": 19.33195020746888, "grad_norm": 20.900146484375, "learning_rate": 1.2271535269709544e-05, "loss": 0.3545, "step": 23295 }, { "epoch": 19.33278008298755, "grad_norm": 15.747137069702148, "learning_rate": 1.2271203319502076e-05, "loss": 0.3992, "step": 23296 }, { "epoch": 19.333609958506223, "grad_norm": 54.841087341308594, "learning_rate": 1.2270871369294605e-05, "loss": 0.4653, "step": 23297 }, { "epoch": 19.334439834024895, "grad_norm": 40.76340103149414, "learning_rate": 1.2270539419087137e-05, "loss": 0.6605, "step": 23298 }, { "epoch": 19.335269709543567, "grad_norm": 68.10013580322266, "learning_rate": 1.227020746887967e-05, "loss": 0.5568, "step": 23299 }, { "epoch": 19.33609958506224, "grad_norm": 40.29947280883789, "learning_rate": 1.2269875518672201e-05, "loss": 0.3857, "step": 23300 }, { "epoch": 19.33692946058091, "grad_norm": 29.662399291992188, "learning_rate": 1.2269543568464732e-05, "loss": 0.4431, "step": 23301 }, { "epoch": 19.337759336099584, "grad_norm": 90.11341857910156, "learning_rate": 1.2269211618257262e-05, "loss": 0.6187, "step": 23302 }, { "epoch": 19.338589211618256, "grad_norm": 122.20318603515625, "learning_rate": 1.2268879668049793e-05, "loss": 1.0144, "step": 23303 }, { "epoch": 19.33941908713693, "grad_norm": 45.494476318359375, "learning_rate": 1.2268547717842325e-05, "loss": 0.3977, "step": 23304 }, { "epoch": 19.3402489626556, "grad_norm": 66.2430648803711, "learning_rate": 1.2268215767634857e-05, "loss": 0.434, "step": 23305 }, { "epoch": 19.341078838174273, "grad_norm": 30.692413330078125, "learning_rate": 1.2267883817427386e-05, "loss": 0.4601, "step": 23306 }, { "epoch": 19.341908713692945, "grad_norm": 31.727048873901367, "learning_rate": 1.2267551867219918e-05, "loss": 0.4663, "step": 23307 }, { "epoch": 19.342738589211617, "grad_norm": 37.70459747314453, "learning_rate": 1.226721991701245e-05, "loss": 0.4996, "step": 23308 }, { "epoch": 19.34356846473029, "grad_norm": 48.23320007324219, "learning_rate": 1.2266887966804982e-05, "loss": 0.6778, "step": 23309 }, { "epoch": 19.34439834024896, "grad_norm": 80.46619415283203, "learning_rate": 1.226655601659751e-05, "loss": 0.5005, "step": 23310 }, { "epoch": 19.345228215767634, "grad_norm": 19.094907760620117, "learning_rate": 1.2266224066390043e-05, "loss": 0.2547, "step": 23311 }, { "epoch": 19.346058091286306, "grad_norm": 37.02641296386719, "learning_rate": 1.2265892116182573e-05, "loss": 0.6271, "step": 23312 }, { "epoch": 19.346887966804978, "grad_norm": 109.31228637695312, "learning_rate": 1.2265560165975105e-05, "loss": 0.7938, "step": 23313 }, { "epoch": 19.34771784232365, "grad_norm": 56.726253509521484, "learning_rate": 1.2265228215767636e-05, "loss": 1.0296, "step": 23314 }, { "epoch": 19.348547717842322, "grad_norm": 26.434595108032227, "learning_rate": 1.2264896265560166e-05, "loss": 0.5013, "step": 23315 }, { "epoch": 19.349377593360995, "grad_norm": 56.382808685302734, "learning_rate": 1.2264564315352698e-05, "loss": 0.6296, "step": 23316 }, { "epoch": 19.350207468879667, "grad_norm": 69.78992462158203, "learning_rate": 1.226423236514523e-05, "loss": 0.4958, "step": 23317 }, { "epoch": 19.35103734439834, "grad_norm": 41.77690887451172, "learning_rate": 1.2263900414937759e-05, "loss": 0.2462, "step": 23318 }, { "epoch": 19.35186721991701, "grad_norm": 68.73931121826172, "learning_rate": 1.2263568464730291e-05, "loss": 1.1082, "step": 23319 }, { "epoch": 19.352697095435683, "grad_norm": 53.46902847290039, "learning_rate": 1.2263236514522823e-05, "loss": 0.7084, "step": 23320 }, { "epoch": 19.353526970954356, "grad_norm": 24.23274803161621, "learning_rate": 1.2262904564315354e-05, "loss": 0.3321, "step": 23321 }, { "epoch": 19.354356846473028, "grad_norm": 42.87570571899414, "learning_rate": 1.2262572614107884e-05, "loss": 0.4736, "step": 23322 }, { "epoch": 19.3551867219917, "grad_norm": 29.33539581298828, "learning_rate": 1.2262240663900416e-05, "loss": 0.5719, "step": 23323 }, { "epoch": 19.356016597510372, "grad_norm": 194.40406799316406, "learning_rate": 1.2261908713692947e-05, "loss": 0.6254, "step": 23324 }, { "epoch": 19.356846473029044, "grad_norm": 65.51812744140625, "learning_rate": 1.2261576763485479e-05, "loss": 1.2547, "step": 23325 }, { "epoch": 19.357676348547717, "grad_norm": 31.05182456970215, "learning_rate": 1.2261244813278007e-05, "loss": 0.755, "step": 23326 }, { "epoch": 19.35850622406639, "grad_norm": 57.675926208496094, "learning_rate": 1.226091286307054e-05, "loss": 0.9702, "step": 23327 }, { "epoch": 19.35933609958506, "grad_norm": 52.2413330078125, "learning_rate": 1.2260580912863072e-05, "loss": 0.4756, "step": 23328 }, { "epoch": 19.360165975103733, "grad_norm": 14.307769775390625, "learning_rate": 1.2260248962655604e-05, "loss": 0.2083, "step": 23329 }, { "epoch": 19.360995850622405, "grad_norm": 122.40397644042969, "learning_rate": 1.2259917012448134e-05, "loss": 0.8588, "step": 23330 }, { "epoch": 19.361825726141078, "grad_norm": 104.89437103271484, "learning_rate": 1.2259585062240665e-05, "loss": 0.4595, "step": 23331 }, { "epoch": 19.36265560165975, "grad_norm": 45.53318405151367, "learning_rate": 1.2259253112033195e-05, "loss": 0.827, "step": 23332 }, { "epoch": 19.363485477178422, "grad_norm": 12.77584457397461, "learning_rate": 1.2258921161825727e-05, "loss": 0.3095, "step": 23333 }, { "epoch": 19.364315352697094, "grad_norm": 9.883018493652344, "learning_rate": 1.225858921161826e-05, "loss": 0.3713, "step": 23334 }, { "epoch": 19.365145228215766, "grad_norm": 58.45777893066406, "learning_rate": 1.2258257261410788e-05, "loss": 0.372, "step": 23335 }, { "epoch": 19.36597510373444, "grad_norm": 40.72942352294922, "learning_rate": 1.225792531120332e-05, "loss": 0.5377, "step": 23336 }, { "epoch": 19.36680497925311, "grad_norm": 56.12840270996094, "learning_rate": 1.2257593360995852e-05, "loss": 0.9897, "step": 23337 }, { "epoch": 19.367634854771783, "grad_norm": 31.13857650756836, "learning_rate": 1.2257261410788384e-05, "loss": 0.3148, "step": 23338 }, { "epoch": 19.368464730290455, "grad_norm": 58.48500442504883, "learning_rate": 1.2256929460580913e-05, "loss": 0.4643, "step": 23339 }, { "epoch": 19.369294605809127, "grad_norm": 7.539999485015869, "learning_rate": 1.2256597510373445e-05, "loss": 0.2328, "step": 23340 }, { "epoch": 19.3701244813278, "grad_norm": 42.33572769165039, "learning_rate": 1.2256265560165976e-05, "loss": 0.538, "step": 23341 }, { "epoch": 19.37095435684647, "grad_norm": 33.01655578613281, "learning_rate": 1.2255933609958508e-05, "loss": 0.6904, "step": 23342 }, { "epoch": 19.371784232365144, "grad_norm": 102.40521240234375, "learning_rate": 1.2255601659751038e-05, "loss": 1.5085, "step": 23343 }, { "epoch": 19.372614107883816, "grad_norm": 30.763315200805664, "learning_rate": 1.2255269709543568e-05, "loss": 0.5192, "step": 23344 }, { "epoch": 19.37344398340249, "grad_norm": 23.70391845703125, "learning_rate": 1.22549377593361e-05, "loss": 0.42, "step": 23345 }, { "epoch": 19.37427385892116, "grad_norm": 19.482473373413086, "learning_rate": 1.2254605809128633e-05, "loss": 0.3505, "step": 23346 }, { "epoch": 19.375103734439833, "grad_norm": 28.007610321044922, "learning_rate": 1.2254273858921161e-05, "loss": 0.5533, "step": 23347 }, { "epoch": 19.375933609958505, "grad_norm": 52.863765716552734, "learning_rate": 1.2253941908713694e-05, "loss": 0.7537, "step": 23348 }, { "epoch": 19.376763485477177, "grad_norm": 32.378379821777344, "learning_rate": 1.2253609958506226e-05, "loss": 0.3176, "step": 23349 }, { "epoch": 19.37759336099585, "grad_norm": 80.46784973144531, "learning_rate": 1.2253278008298756e-05, "loss": 0.83, "step": 23350 }, { "epoch": 19.37842323651452, "grad_norm": 67.7422103881836, "learning_rate": 1.2252946058091286e-05, "loss": 0.4466, "step": 23351 }, { "epoch": 19.379253112033194, "grad_norm": 62.02438735961914, "learning_rate": 1.2252614107883819e-05, "loss": 0.8436, "step": 23352 }, { "epoch": 19.380082987551866, "grad_norm": 50.68901062011719, "learning_rate": 1.2252282157676349e-05, "loss": 0.4973, "step": 23353 }, { "epoch": 19.380912863070538, "grad_norm": 44.226253509521484, "learning_rate": 1.2251950207468881e-05, "loss": 0.8438, "step": 23354 }, { "epoch": 19.38174273858921, "grad_norm": 39.26181411743164, "learning_rate": 1.2251618257261413e-05, "loss": 0.7595, "step": 23355 }, { "epoch": 19.382572614107882, "grad_norm": 50.82793426513672, "learning_rate": 1.2251286307053942e-05, "loss": 0.5427, "step": 23356 }, { "epoch": 19.383402489626555, "grad_norm": 24.553743362426758, "learning_rate": 1.2250954356846474e-05, "loss": 0.3895, "step": 23357 }, { "epoch": 19.384232365145227, "grad_norm": 85.21342468261719, "learning_rate": 1.2250622406639006e-05, "loss": 1.5537, "step": 23358 }, { "epoch": 19.3850622406639, "grad_norm": 28.679441452026367, "learning_rate": 1.2250290456431537e-05, "loss": 0.6269, "step": 23359 }, { "epoch": 19.38589211618257, "grad_norm": 18.55516815185547, "learning_rate": 1.2249958506224067e-05, "loss": 0.4719, "step": 23360 }, { "epoch": 19.386721991701243, "grad_norm": 43.69548416137695, "learning_rate": 1.2249626556016599e-05, "loss": 0.7898, "step": 23361 }, { "epoch": 19.387551867219916, "grad_norm": 23.814437866210938, "learning_rate": 1.224929460580913e-05, "loss": 0.3617, "step": 23362 }, { "epoch": 19.388381742738588, "grad_norm": 27.40795135498047, "learning_rate": 1.2248962655601662e-05, "loss": 0.5332, "step": 23363 }, { "epoch": 19.38921161825726, "grad_norm": 91.97119140625, "learning_rate": 1.224863070539419e-05, "loss": 0.7714, "step": 23364 }, { "epoch": 19.390041493775932, "grad_norm": 143.1653594970703, "learning_rate": 1.2248298755186722e-05, "loss": 0.6646, "step": 23365 }, { "epoch": 19.390871369294604, "grad_norm": 54.35136032104492, "learning_rate": 1.2247966804979255e-05, "loss": 0.5826, "step": 23366 }, { "epoch": 19.391701244813277, "grad_norm": 30.455793380737305, "learning_rate": 1.2247634854771787e-05, "loss": 0.5317, "step": 23367 }, { "epoch": 19.39253112033195, "grad_norm": 39.85237121582031, "learning_rate": 1.2247302904564315e-05, "loss": 1.4118, "step": 23368 }, { "epoch": 19.39336099585062, "grad_norm": 91.40157318115234, "learning_rate": 1.2246970954356847e-05, "loss": 0.7755, "step": 23369 }, { "epoch": 19.394190871369293, "grad_norm": 37.20486068725586, "learning_rate": 1.224663900414938e-05, "loss": 0.8356, "step": 23370 }, { "epoch": 19.395020746887965, "grad_norm": 38.01303482055664, "learning_rate": 1.224630705394191e-05, "loss": 0.3402, "step": 23371 }, { "epoch": 19.395850622406638, "grad_norm": 24.234970092773438, "learning_rate": 1.224597510373444e-05, "loss": 0.4147, "step": 23372 }, { "epoch": 19.39668049792531, "grad_norm": 48.1997184753418, "learning_rate": 1.224564315352697e-05, "loss": 1.0553, "step": 23373 }, { "epoch": 19.397510373443982, "grad_norm": 35.25064468383789, "learning_rate": 1.2245311203319503e-05, "loss": 0.742, "step": 23374 }, { "epoch": 19.398340248962654, "grad_norm": 46.5392951965332, "learning_rate": 1.2244979253112035e-05, "loss": 0.5298, "step": 23375 }, { "epoch": 19.399170124481326, "grad_norm": 37.62094497680664, "learning_rate": 1.2244647302904564e-05, "loss": 0.577, "step": 23376 }, { "epoch": 19.4, "grad_norm": 74.65604400634766, "learning_rate": 1.2244315352697096e-05, "loss": 1.2758, "step": 23377 }, { "epoch": 19.40082987551867, "grad_norm": 26.894773483276367, "learning_rate": 1.2243983402489628e-05, "loss": 0.4936, "step": 23378 }, { "epoch": 19.401659751037343, "grad_norm": 34.164955139160156, "learning_rate": 1.224365145228216e-05, "loss": 0.3912, "step": 23379 }, { "epoch": 19.402489626556015, "grad_norm": 29.005205154418945, "learning_rate": 1.224331950207469e-05, "loss": 0.3834, "step": 23380 }, { "epoch": 19.403319502074687, "grad_norm": 52.02354049682617, "learning_rate": 1.2242987551867221e-05, "loss": 0.8988, "step": 23381 }, { "epoch": 19.40414937759336, "grad_norm": 35.13784408569336, "learning_rate": 1.2242655601659751e-05, "loss": 0.5369, "step": 23382 }, { "epoch": 19.40497925311203, "grad_norm": 31.61273765563965, "learning_rate": 1.2242323651452283e-05, "loss": 0.4452, "step": 23383 }, { "epoch": 19.405809128630704, "grad_norm": 14.745405197143555, "learning_rate": 1.2241991701244816e-05, "loss": 0.3473, "step": 23384 }, { "epoch": 19.406639004149376, "grad_norm": 23.757810592651367, "learning_rate": 1.2241659751037344e-05, "loss": 0.3365, "step": 23385 }, { "epoch": 19.40746887966805, "grad_norm": 24.866899490356445, "learning_rate": 1.2241327800829876e-05, "loss": 0.3966, "step": 23386 }, { "epoch": 19.40829875518672, "grad_norm": 101.48854064941406, "learning_rate": 1.2240995850622408e-05, "loss": 0.6096, "step": 23387 }, { "epoch": 19.409128630705393, "grad_norm": 149.62828063964844, "learning_rate": 1.2240663900414939e-05, "loss": 0.8737, "step": 23388 }, { "epoch": 19.409958506224065, "grad_norm": 37.785789489746094, "learning_rate": 1.224033195020747e-05, "loss": 0.5533, "step": 23389 }, { "epoch": 19.410788381742737, "grad_norm": 45.1309700012207, "learning_rate": 1.2240000000000001e-05, "loss": 0.4822, "step": 23390 }, { "epoch": 19.41161825726141, "grad_norm": 40.961158752441406, "learning_rate": 1.2239668049792532e-05, "loss": 0.5565, "step": 23391 }, { "epoch": 19.41244813278008, "grad_norm": 86.68018341064453, "learning_rate": 1.2239336099585064e-05, "loss": 0.8078, "step": 23392 }, { "epoch": 19.413278008298754, "grad_norm": 41.54923629760742, "learning_rate": 1.2239004149377594e-05, "loss": 0.5356, "step": 23393 }, { "epoch": 19.414107883817426, "grad_norm": 79.7476577758789, "learning_rate": 1.2238672199170125e-05, "loss": 0.7653, "step": 23394 }, { "epoch": 19.414937759336098, "grad_norm": 25.876678466796875, "learning_rate": 1.2238340248962657e-05, "loss": 0.4642, "step": 23395 }, { "epoch": 19.41576763485477, "grad_norm": 56.904109954833984, "learning_rate": 1.2238008298755189e-05, "loss": 0.5379, "step": 23396 }, { "epoch": 19.416597510373443, "grad_norm": 66.33372497558594, "learning_rate": 1.2237676348547718e-05, "loss": 0.569, "step": 23397 }, { "epoch": 19.417427385892115, "grad_norm": 111.21036529541016, "learning_rate": 1.223734439834025e-05, "loss": 0.446, "step": 23398 }, { "epoch": 19.418257261410787, "grad_norm": 21.71167755126953, "learning_rate": 1.2237012448132782e-05, "loss": 0.2996, "step": 23399 }, { "epoch": 19.41908713692946, "grad_norm": 66.2287826538086, "learning_rate": 1.2236680497925312e-05, "loss": 1.3473, "step": 23400 }, { "epoch": 19.41991701244813, "grad_norm": 98.80620574951172, "learning_rate": 1.2236348547717843e-05, "loss": 0.577, "step": 23401 }, { "epoch": 19.420746887966803, "grad_norm": 22.637027740478516, "learning_rate": 1.2236016597510373e-05, "loss": 0.2464, "step": 23402 }, { "epoch": 19.421576763485476, "grad_norm": 11.850778579711914, "learning_rate": 1.2235684647302905e-05, "loss": 0.36, "step": 23403 }, { "epoch": 19.422406639004148, "grad_norm": 43.86542892456055, "learning_rate": 1.2235352697095437e-05, "loss": 0.8677, "step": 23404 }, { "epoch": 19.42323651452282, "grad_norm": 105.72040557861328, "learning_rate": 1.2235020746887966e-05, "loss": 0.8163, "step": 23405 }, { "epoch": 19.424066390041492, "grad_norm": 90.74398803710938, "learning_rate": 1.2234688796680498e-05, "loss": 0.9181, "step": 23406 }, { "epoch": 19.424896265560164, "grad_norm": 75.33069610595703, "learning_rate": 1.223435684647303e-05, "loss": 0.5234, "step": 23407 }, { "epoch": 19.425726141078837, "grad_norm": 76.55968475341797, "learning_rate": 1.2234024896265562e-05, "loss": 0.6275, "step": 23408 }, { "epoch": 19.42655601659751, "grad_norm": 107.598876953125, "learning_rate": 1.2233692946058093e-05, "loss": 0.7925, "step": 23409 }, { "epoch": 19.42738589211618, "grad_norm": 60.05015182495117, "learning_rate": 1.2233360995850623e-05, "loss": 0.9701, "step": 23410 }, { "epoch": 19.428215767634853, "grad_norm": 35.49273681640625, "learning_rate": 1.2233029045643154e-05, "loss": 0.5892, "step": 23411 }, { "epoch": 19.429045643153525, "grad_norm": 26.785919189453125, "learning_rate": 1.2232697095435686e-05, "loss": 0.3907, "step": 23412 }, { "epoch": 19.429875518672198, "grad_norm": 39.7702522277832, "learning_rate": 1.2232365145228218e-05, "loss": 0.3451, "step": 23413 }, { "epoch": 19.43070539419087, "grad_norm": 17.606739044189453, "learning_rate": 1.2232033195020747e-05, "loss": 0.3517, "step": 23414 }, { "epoch": 19.431535269709542, "grad_norm": 174.46592712402344, "learning_rate": 1.2231701244813279e-05, "loss": 0.8725, "step": 23415 }, { "epoch": 19.432365145228214, "grad_norm": 14.335762023925781, "learning_rate": 1.223136929460581e-05, "loss": 0.3807, "step": 23416 }, { "epoch": 19.433195020746886, "grad_norm": 58.92921447753906, "learning_rate": 1.2231037344398343e-05, "loss": 0.9916, "step": 23417 }, { "epoch": 19.43402489626556, "grad_norm": 107.68124389648438, "learning_rate": 1.2230705394190872e-05, "loss": 0.7892, "step": 23418 }, { "epoch": 19.43485477178423, "grad_norm": 21.336288452148438, "learning_rate": 1.2230373443983404e-05, "loss": 0.4991, "step": 23419 }, { "epoch": 19.435684647302903, "grad_norm": 57.43265914916992, "learning_rate": 1.2230041493775934e-05, "loss": 0.7741, "step": 23420 }, { "epoch": 19.436514522821575, "grad_norm": 34.40850067138672, "learning_rate": 1.2229709543568466e-05, "loss": 0.3943, "step": 23421 }, { "epoch": 19.437344398340247, "grad_norm": 39.21143341064453, "learning_rate": 1.2229377593360997e-05, "loss": 0.6419, "step": 23422 }, { "epoch": 19.43817427385892, "grad_norm": 116.80950164794922, "learning_rate": 1.2229045643153527e-05, "loss": 1.1344, "step": 23423 }, { "epoch": 19.439004149377592, "grad_norm": 39.3476448059082, "learning_rate": 1.222871369294606e-05, "loss": 0.7178, "step": 23424 }, { "epoch": 19.439834024896264, "grad_norm": 39.00752639770508, "learning_rate": 1.2228381742738591e-05, "loss": 0.318, "step": 23425 }, { "epoch": 19.440663900414936, "grad_norm": 19.29625701904297, "learning_rate": 1.222804979253112e-05, "loss": 0.3229, "step": 23426 }, { "epoch": 19.44149377593361, "grad_norm": 40.88243103027344, "learning_rate": 1.2227717842323652e-05, "loss": 0.8505, "step": 23427 }, { "epoch": 19.44232365145228, "grad_norm": 36.318302154541016, "learning_rate": 1.2227385892116184e-05, "loss": 0.4044, "step": 23428 }, { "epoch": 19.443153526970953, "grad_norm": 23.57339859008789, "learning_rate": 1.2227053941908715e-05, "loss": 0.3558, "step": 23429 }, { "epoch": 19.443983402489625, "grad_norm": 31.888355255126953, "learning_rate": 1.2226721991701245e-05, "loss": 0.4383, "step": 23430 }, { "epoch": 19.444813278008297, "grad_norm": 108.60382843017578, "learning_rate": 1.2226390041493777e-05, "loss": 0.5965, "step": 23431 }, { "epoch": 19.44564315352697, "grad_norm": 29.236623764038086, "learning_rate": 1.2226058091286308e-05, "loss": 0.5398, "step": 23432 }, { "epoch": 19.44647302904564, "grad_norm": 34.3447380065918, "learning_rate": 1.222572614107884e-05, "loss": 0.3664, "step": 23433 }, { "epoch": 19.447302904564314, "grad_norm": 23.684682846069336, "learning_rate": 1.2225394190871372e-05, "loss": 0.2642, "step": 23434 }, { "epoch": 19.448132780082986, "grad_norm": 57.062862396240234, "learning_rate": 1.22250622406639e-05, "loss": 1.0648, "step": 23435 }, { "epoch": 19.448962655601658, "grad_norm": 61.754783630371094, "learning_rate": 1.2224730290456433e-05, "loss": 1.3499, "step": 23436 }, { "epoch": 19.44979253112033, "grad_norm": 58.26706314086914, "learning_rate": 1.2224398340248965e-05, "loss": 0.6981, "step": 23437 }, { "epoch": 19.450622406639003, "grad_norm": 38.74864196777344, "learning_rate": 1.2224066390041495e-05, "loss": 0.4793, "step": 23438 }, { "epoch": 19.451452282157675, "grad_norm": 49.329872131347656, "learning_rate": 1.2223734439834026e-05, "loss": 0.4319, "step": 23439 }, { "epoch": 19.452282157676347, "grad_norm": 37.591495513916016, "learning_rate": 1.2223402489626558e-05, "loss": 0.603, "step": 23440 }, { "epoch": 19.45311203319502, "grad_norm": 37.31427001953125, "learning_rate": 1.2223070539419088e-05, "loss": 0.579, "step": 23441 }, { "epoch": 19.45394190871369, "grad_norm": 64.62395477294922, "learning_rate": 1.222273858921162e-05, "loss": 0.6336, "step": 23442 }, { "epoch": 19.454771784232364, "grad_norm": 15.836472511291504, "learning_rate": 1.2222406639004149e-05, "loss": 0.3186, "step": 23443 }, { "epoch": 19.455601659751036, "grad_norm": 39.816959381103516, "learning_rate": 1.2222074688796681e-05, "loss": 0.5028, "step": 23444 }, { "epoch": 19.456431535269708, "grad_norm": 21.23164939880371, "learning_rate": 1.2221742738589213e-05, "loss": 0.53, "step": 23445 }, { "epoch": 19.45726141078838, "grad_norm": 43.321014404296875, "learning_rate": 1.2221410788381745e-05, "loss": 1.1609, "step": 23446 }, { "epoch": 19.458091286307052, "grad_norm": 38.297794342041016, "learning_rate": 1.2221078838174274e-05, "loss": 0.4192, "step": 23447 }, { "epoch": 19.458921161825725, "grad_norm": 37.73292922973633, "learning_rate": 1.2220746887966806e-05, "loss": 0.3083, "step": 23448 }, { "epoch": 19.459751037344397, "grad_norm": 55.975040435791016, "learning_rate": 1.2220414937759336e-05, "loss": 0.5056, "step": 23449 }, { "epoch": 19.46058091286307, "grad_norm": 42.696537017822266, "learning_rate": 1.2220082987551869e-05, "loss": 0.9206, "step": 23450 }, { "epoch": 19.46141078838174, "grad_norm": 23.398557662963867, "learning_rate": 1.2219751037344399e-05, "loss": 0.4134, "step": 23451 }, { "epoch": 19.462240663900413, "grad_norm": 8.445429801940918, "learning_rate": 1.221941908713693e-05, "loss": 0.2417, "step": 23452 }, { "epoch": 19.463070539419085, "grad_norm": 46.86134338378906, "learning_rate": 1.2219087136929461e-05, "loss": 0.4177, "step": 23453 }, { "epoch": 19.463900414937758, "grad_norm": 31.934207916259766, "learning_rate": 1.2218755186721994e-05, "loss": 0.3396, "step": 23454 }, { "epoch": 19.46473029045643, "grad_norm": 48.95780563354492, "learning_rate": 1.2218423236514522e-05, "loss": 0.371, "step": 23455 }, { "epoch": 19.465560165975102, "grad_norm": 10.658434867858887, "learning_rate": 1.2218091286307054e-05, "loss": 0.3357, "step": 23456 }, { "epoch": 19.466390041493774, "grad_norm": 48.28669738769531, "learning_rate": 1.2217759336099587e-05, "loss": 1.1207, "step": 23457 }, { "epoch": 19.467219917012446, "grad_norm": 57.922821044921875, "learning_rate": 1.2217427385892117e-05, "loss": 0.4623, "step": 23458 }, { "epoch": 19.46804979253112, "grad_norm": 59.038719177246094, "learning_rate": 1.2217095435684649e-05, "loss": 0.8201, "step": 23459 }, { "epoch": 19.46887966804979, "grad_norm": 34.62841796875, "learning_rate": 1.221676348547718e-05, "loss": 0.5981, "step": 23460 }, { "epoch": 19.469709543568463, "grad_norm": 123.21988677978516, "learning_rate": 1.221643153526971e-05, "loss": 0.8036, "step": 23461 }, { "epoch": 19.470539419087135, "grad_norm": 55.36482620239258, "learning_rate": 1.2216099585062242e-05, "loss": 0.6002, "step": 23462 }, { "epoch": 19.471369294605807, "grad_norm": 82.35917663574219, "learning_rate": 1.2215767634854774e-05, "loss": 1.0766, "step": 23463 }, { "epoch": 19.47219917012448, "grad_norm": 41.81862258911133, "learning_rate": 1.2215435684647303e-05, "loss": 0.6784, "step": 23464 }, { "epoch": 19.473029045643152, "grad_norm": 68.05533599853516, "learning_rate": 1.2215103734439835e-05, "loss": 0.6666, "step": 23465 }, { "epoch": 19.473858921161824, "grad_norm": 44.57693099975586, "learning_rate": 1.2214771784232367e-05, "loss": 0.5501, "step": 23466 }, { "epoch": 19.474688796680496, "grad_norm": 34.67146301269531, "learning_rate": 1.2214439834024897e-05, "loss": 0.4013, "step": 23467 }, { "epoch": 19.47551867219917, "grad_norm": 43.352500915527344, "learning_rate": 1.2214107883817428e-05, "loss": 0.7239, "step": 23468 }, { "epoch": 19.47634854771784, "grad_norm": 61.507171630859375, "learning_rate": 1.221377593360996e-05, "loss": 0.6022, "step": 23469 }, { "epoch": 19.477178423236513, "grad_norm": 17.71707534790039, "learning_rate": 1.221344398340249e-05, "loss": 0.2878, "step": 23470 }, { "epoch": 19.478008298755185, "grad_norm": 42.762123107910156, "learning_rate": 1.2213112033195022e-05, "loss": 0.4808, "step": 23471 }, { "epoch": 19.478838174273857, "grad_norm": 22.69872283935547, "learning_rate": 1.2212780082987551e-05, "loss": 0.3844, "step": 23472 }, { "epoch": 19.47966804979253, "grad_norm": 23.805545806884766, "learning_rate": 1.2212448132780083e-05, "loss": 0.3068, "step": 23473 }, { "epoch": 19.4804979253112, "grad_norm": 56.40873718261719, "learning_rate": 1.2212116182572615e-05, "loss": 0.6431, "step": 23474 }, { "epoch": 19.481327800829874, "grad_norm": 144.44143676757812, "learning_rate": 1.2211784232365148e-05, "loss": 0.6497, "step": 23475 }, { "epoch": 19.482157676348546, "grad_norm": 22.72098731994629, "learning_rate": 1.2211452282157676e-05, "loss": 0.2698, "step": 23476 }, { "epoch": 19.482987551867218, "grad_norm": 44.5533561706543, "learning_rate": 1.2211120331950208e-05, "loss": 0.6481, "step": 23477 }, { "epoch": 19.48381742738589, "grad_norm": 44.08161163330078, "learning_rate": 1.221078838174274e-05, "loss": 1.2062, "step": 23478 }, { "epoch": 19.484647302904563, "grad_norm": 24.02724266052246, "learning_rate": 1.2210456431535271e-05, "loss": 0.402, "step": 23479 }, { "epoch": 19.485477178423235, "grad_norm": 38.38784408569336, "learning_rate": 1.2210124481327801e-05, "loss": 0.526, "step": 23480 }, { "epoch": 19.486307053941907, "grad_norm": 70.07811737060547, "learning_rate": 1.2209792531120332e-05, "loss": 0.9576, "step": 23481 }, { "epoch": 19.48713692946058, "grad_norm": 17.3817195892334, "learning_rate": 1.2209460580912864e-05, "loss": 0.2858, "step": 23482 }, { "epoch": 19.48796680497925, "grad_norm": 24.027341842651367, "learning_rate": 1.2209128630705396e-05, "loss": 0.7034, "step": 23483 }, { "epoch": 19.488796680497924, "grad_norm": 59.884796142578125, "learning_rate": 1.2208796680497925e-05, "loss": 0.7109, "step": 23484 }, { "epoch": 19.489626556016596, "grad_norm": 18.370994567871094, "learning_rate": 1.2208464730290457e-05, "loss": 0.6072, "step": 23485 }, { "epoch": 19.490456431535268, "grad_norm": 28.959257125854492, "learning_rate": 1.2208132780082989e-05, "loss": 0.6469, "step": 23486 }, { "epoch": 19.49128630705394, "grad_norm": 12.465886116027832, "learning_rate": 1.2207800829875521e-05, "loss": 0.2935, "step": 23487 }, { "epoch": 19.492116182572612, "grad_norm": 33.87083435058594, "learning_rate": 1.2207468879668051e-05, "loss": 0.7182, "step": 23488 }, { "epoch": 19.492946058091285, "grad_norm": 119.29572296142578, "learning_rate": 1.2207136929460582e-05, "loss": 1.0137, "step": 23489 }, { "epoch": 19.49377593360996, "grad_norm": 28.307979583740234, "learning_rate": 1.2206804979253112e-05, "loss": 0.2706, "step": 23490 }, { "epoch": 19.49460580912863, "grad_norm": 32.87251281738281, "learning_rate": 1.2206473029045644e-05, "loss": 0.3955, "step": 23491 }, { "epoch": 19.495435684647305, "grad_norm": 31.705121994018555, "learning_rate": 1.2206141078838176e-05, "loss": 0.3301, "step": 23492 }, { "epoch": 19.496265560165973, "grad_norm": 19.132381439208984, "learning_rate": 1.2205809128630705e-05, "loss": 0.3525, "step": 23493 }, { "epoch": 19.49709543568465, "grad_norm": 159.5259246826172, "learning_rate": 1.2205477178423237e-05, "loss": 0.8326, "step": 23494 }, { "epoch": 19.497925311203318, "grad_norm": 50.087276458740234, "learning_rate": 1.220514522821577e-05, "loss": 0.5136, "step": 23495 }, { "epoch": 19.498755186721993, "grad_norm": 134.17343139648438, "learning_rate": 1.2204813278008301e-05, "loss": 0.508, "step": 23496 }, { "epoch": 19.499585062240662, "grad_norm": 29.356061935424805, "learning_rate": 1.220448132780083e-05, "loss": 0.3765, "step": 23497 }, { "epoch": 19.500414937759338, "grad_norm": 123.54688262939453, "learning_rate": 1.2204149377593362e-05, "loss": 0.8271, "step": 23498 }, { "epoch": 19.501244813278007, "grad_norm": 75.86539459228516, "learning_rate": 1.2203817427385893e-05, "loss": 0.7319, "step": 23499 }, { "epoch": 19.502074688796682, "grad_norm": 20.195756912231445, "learning_rate": 1.2203485477178425e-05, "loss": 0.3679, "step": 23500 }, { "epoch": 19.50290456431535, "grad_norm": 37.49209213256836, "learning_rate": 1.2203153526970955e-05, "loss": 0.4221, "step": 23501 }, { "epoch": 19.503734439834027, "grad_norm": 14.967569351196289, "learning_rate": 1.2202821576763486e-05, "loss": 0.3379, "step": 23502 }, { "epoch": 19.504564315352695, "grad_norm": 34.8475456237793, "learning_rate": 1.2202489626556018e-05, "loss": 0.8335, "step": 23503 }, { "epoch": 19.50539419087137, "grad_norm": 16.84505844116211, "learning_rate": 1.220215767634855e-05, "loss": 0.4219, "step": 23504 }, { "epoch": 19.50622406639004, "grad_norm": 82.28154754638672, "learning_rate": 1.2201825726141079e-05, "loss": 0.6862, "step": 23505 }, { "epoch": 19.507053941908715, "grad_norm": 83.78990936279297, "learning_rate": 1.220149377593361e-05, "loss": 0.3675, "step": 23506 }, { "epoch": 19.507883817427384, "grad_norm": 104.11419677734375, "learning_rate": 1.2201161825726143e-05, "loss": 0.5226, "step": 23507 }, { "epoch": 19.50871369294606, "grad_norm": 41.54027557373047, "learning_rate": 1.2200829875518673e-05, "loss": 0.7731, "step": 23508 }, { "epoch": 19.50954356846473, "grad_norm": 23.33441925048828, "learning_rate": 1.2200497925311204e-05, "loss": 0.4208, "step": 23509 }, { "epoch": 19.510373443983404, "grad_norm": 32.117637634277344, "learning_rate": 1.2200165975103736e-05, "loss": 0.2987, "step": 23510 }, { "epoch": 19.511203319502073, "grad_norm": 36.114410400390625, "learning_rate": 1.2199834024896266e-05, "loss": 0.8285, "step": 23511 }, { "epoch": 19.51203319502075, "grad_norm": 87.29510498046875, "learning_rate": 1.2199502074688798e-05, "loss": 1.0062, "step": 23512 }, { "epoch": 19.512863070539417, "grad_norm": 36.56289291381836, "learning_rate": 1.219917012448133e-05, "loss": 0.6794, "step": 23513 }, { "epoch": 19.513692946058093, "grad_norm": 121.17326354980469, "learning_rate": 1.2198838174273859e-05, "loss": 0.4695, "step": 23514 }, { "epoch": 19.51452282157676, "grad_norm": 36.46261978149414, "learning_rate": 1.2198506224066391e-05, "loss": 0.6655, "step": 23515 }, { "epoch": 19.515352697095437, "grad_norm": 84.18913269042969, "learning_rate": 1.2198174273858923e-05, "loss": 0.8981, "step": 23516 }, { "epoch": 19.51618257261411, "grad_norm": 90.8463363647461, "learning_rate": 1.2197842323651454e-05, "loss": 0.8084, "step": 23517 }, { "epoch": 19.517012448132782, "grad_norm": 50.939697265625, "learning_rate": 1.2197510373443984e-05, "loss": 0.8726, "step": 23518 }, { "epoch": 19.517842323651454, "grad_norm": 59.32215118408203, "learning_rate": 1.2197178423236515e-05, "loss": 1.0479, "step": 23519 }, { "epoch": 19.518672199170126, "grad_norm": 30.471614837646484, "learning_rate": 1.2196846473029047e-05, "loss": 0.4775, "step": 23520 }, { "epoch": 19.5195020746888, "grad_norm": 56.582515716552734, "learning_rate": 1.2196514522821579e-05, "loss": 0.633, "step": 23521 }, { "epoch": 19.52033195020747, "grad_norm": 63.93098831176758, "learning_rate": 1.2196182572614107e-05, "loss": 0.2625, "step": 23522 }, { "epoch": 19.521161825726143, "grad_norm": 45.809932708740234, "learning_rate": 1.219585062240664e-05, "loss": 0.6219, "step": 23523 }, { "epoch": 19.521991701244815, "grad_norm": 14.547477722167969, "learning_rate": 1.2195518672199172e-05, "loss": 0.3527, "step": 23524 }, { "epoch": 19.522821576763487, "grad_norm": 27.105533599853516, "learning_rate": 1.2195186721991704e-05, "loss": 0.8187, "step": 23525 }, { "epoch": 19.52365145228216, "grad_norm": 109.89376068115234, "learning_rate": 1.2194854771784233e-05, "loss": 0.5878, "step": 23526 }, { "epoch": 19.52448132780083, "grad_norm": 28.009140014648438, "learning_rate": 1.2194522821576765e-05, "loss": 0.5063, "step": 23527 }, { "epoch": 19.525311203319504, "grad_norm": 55.7808837890625, "learning_rate": 1.2194190871369295e-05, "loss": 0.9747, "step": 23528 }, { "epoch": 19.526141078838176, "grad_norm": 62.13850021362305, "learning_rate": 1.2193858921161827e-05, "loss": 0.7598, "step": 23529 }, { "epoch": 19.526970954356848, "grad_norm": 12.397394180297852, "learning_rate": 1.2193526970954358e-05, "loss": 0.2971, "step": 23530 }, { "epoch": 19.52780082987552, "grad_norm": 88.65164184570312, "learning_rate": 1.2193195020746888e-05, "loss": 0.5215, "step": 23531 }, { "epoch": 19.528630705394193, "grad_norm": 80.06608581542969, "learning_rate": 1.219286307053942e-05, "loss": 0.3787, "step": 23532 }, { "epoch": 19.529460580912865, "grad_norm": 30.19135284423828, "learning_rate": 1.2192531120331952e-05, "loss": 0.4483, "step": 23533 }, { "epoch": 19.530290456431537, "grad_norm": 39.419395446777344, "learning_rate": 1.2192199170124481e-05, "loss": 0.8267, "step": 23534 }, { "epoch": 19.53112033195021, "grad_norm": 58.5561637878418, "learning_rate": 1.2191867219917013e-05, "loss": 0.5067, "step": 23535 }, { "epoch": 19.53195020746888, "grad_norm": 42.77517318725586, "learning_rate": 1.2191535269709545e-05, "loss": 0.5599, "step": 23536 }, { "epoch": 19.532780082987554, "grad_norm": 20.38264274597168, "learning_rate": 1.2191203319502076e-05, "loss": 0.3001, "step": 23537 }, { "epoch": 19.533609958506226, "grad_norm": 100.10595703125, "learning_rate": 1.2190871369294608e-05, "loss": 1.058, "step": 23538 }, { "epoch": 19.534439834024898, "grad_norm": 24.661287307739258, "learning_rate": 1.2190539419087138e-05, "loss": 0.5656, "step": 23539 }, { "epoch": 19.53526970954357, "grad_norm": 24.7049560546875, "learning_rate": 1.2190207468879668e-05, "loss": 0.4186, "step": 23540 }, { "epoch": 19.536099585062242, "grad_norm": 24.902587890625, "learning_rate": 1.21898755186722e-05, "loss": 0.4452, "step": 23541 }, { "epoch": 19.536929460580915, "grad_norm": 47.254051208496094, "learning_rate": 1.2189543568464733e-05, "loss": 0.6455, "step": 23542 }, { "epoch": 19.537759336099587, "grad_norm": 28.47188377380371, "learning_rate": 1.2189211618257261e-05, "loss": 0.5342, "step": 23543 }, { "epoch": 19.53858921161826, "grad_norm": 69.5471420288086, "learning_rate": 1.2188879668049794e-05, "loss": 0.9255, "step": 23544 }, { "epoch": 19.53941908713693, "grad_norm": 44.02875900268555, "learning_rate": 1.2188547717842326e-05, "loss": 0.4767, "step": 23545 }, { "epoch": 19.540248962655603, "grad_norm": 74.34307861328125, "learning_rate": 1.2188215767634856e-05, "loss": 0.801, "step": 23546 }, { "epoch": 19.541078838174275, "grad_norm": 36.0087776184082, "learning_rate": 1.2187883817427386e-05, "loss": 0.4218, "step": 23547 }, { "epoch": 19.541908713692948, "grad_norm": 17.04020881652832, "learning_rate": 1.2187551867219919e-05, "loss": 0.3181, "step": 23548 }, { "epoch": 19.54273858921162, "grad_norm": 65.45307922363281, "learning_rate": 1.2187219917012449e-05, "loss": 0.5929, "step": 23549 }, { "epoch": 19.543568464730292, "grad_norm": 32.740352630615234, "learning_rate": 1.2186887966804981e-05, "loss": 0.6174, "step": 23550 }, { "epoch": 19.544398340248964, "grad_norm": 10.929577827453613, "learning_rate": 1.218655601659751e-05, "loss": 0.2197, "step": 23551 }, { "epoch": 19.545228215767636, "grad_norm": 34.30669021606445, "learning_rate": 1.2186224066390042e-05, "loss": 0.434, "step": 23552 }, { "epoch": 19.54605809128631, "grad_norm": 56.310176849365234, "learning_rate": 1.2185892116182574e-05, "loss": 0.3835, "step": 23553 }, { "epoch": 19.54688796680498, "grad_norm": 47.43811798095703, "learning_rate": 1.2185560165975106e-05, "loss": 0.5989, "step": 23554 }, { "epoch": 19.547717842323653, "grad_norm": 41.42866897583008, "learning_rate": 1.2185228215767635e-05, "loss": 0.6303, "step": 23555 }, { "epoch": 19.548547717842325, "grad_norm": 122.63723754882812, "learning_rate": 1.2184896265560167e-05, "loss": 0.499, "step": 23556 }, { "epoch": 19.549377593360997, "grad_norm": 101.41272735595703, "learning_rate": 1.2184564315352699e-05, "loss": 0.4979, "step": 23557 }, { "epoch": 19.55020746887967, "grad_norm": 68.9608383178711, "learning_rate": 1.218423236514523e-05, "loss": 0.5935, "step": 23558 }, { "epoch": 19.551037344398342, "grad_norm": 43.84592819213867, "learning_rate": 1.218390041493776e-05, "loss": 0.6112, "step": 23559 }, { "epoch": 19.551867219917014, "grad_norm": 31.877439498901367, "learning_rate": 1.218356846473029e-05, "loss": 0.5415, "step": 23560 }, { "epoch": 19.552697095435686, "grad_norm": 21.25442123413086, "learning_rate": 1.2183236514522822e-05, "loss": 0.4257, "step": 23561 }, { "epoch": 19.55352697095436, "grad_norm": 15.157034873962402, "learning_rate": 1.2182904564315354e-05, "loss": 0.3449, "step": 23562 }, { "epoch": 19.55435684647303, "grad_norm": 28.268970489501953, "learning_rate": 1.2182572614107883e-05, "loss": 0.4078, "step": 23563 }, { "epoch": 19.555186721991703, "grad_norm": 69.9295425415039, "learning_rate": 1.2182240663900415e-05, "loss": 0.5841, "step": 23564 }, { "epoch": 19.556016597510375, "grad_norm": 50.55995559692383, "learning_rate": 1.2181908713692947e-05, "loss": 0.4861, "step": 23565 }, { "epoch": 19.556846473029047, "grad_norm": 30.397680282592773, "learning_rate": 1.2181576763485478e-05, "loss": 0.7941, "step": 23566 }, { "epoch": 19.55767634854772, "grad_norm": 25.054588317871094, "learning_rate": 1.218124481327801e-05, "loss": 0.3196, "step": 23567 }, { "epoch": 19.55850622406639, "grad_norm": 51.58271789550781, "learning_rate": 1.218091286307054e-05, "loss": 0.5216, "step": 23568 }, { "epoch": 19.559336099585064, "grad_norm": 51.555667877197266, "learning_rate": 1.218058091286307e-05, "loss": 0.5769, "step": 23569 }, { "epoch": 19.560165975103736, "grad_norm": 255.2854461669922, "learning_rate": 1.2180248962655603e-05, "loss": 0.4778, "step": 23570 }, { "epoch": 19.560995850622408, "grad_norm": 26.513185501098633, "learning_rate": 1.2179917012448135e-05, "loss": 0.3332, "step": 23571 }, { "epoch": 19.56182572614108, "grad_norm": 35.3983268737793, "learning_rate": 1.2179585062240664e-05, "loss": 0.7557, "step": 23572 }, { "epoch": 19.562655601659753, "grad_norm": 62.50508117675781, "learning_rate": 1.2179253112033196e-05, "loss": 1.0397, "step": 23573 }, { "epoch": 19.563485477178425, "grad_norm": 53.35866165161133, "learning_rate": 1.2178921161825728e-05, "loss": 0.5042, "step": 23574 }, { "epoch": 19.564315352697097, "grad_norm": 69.81379699707031, "learning_rate": 1.2178589211618258e-05, "loss": 0.6216, "step": 23575 }, { "epoch": 19.56514522821577, "grad_norm": 24.52911949157715, "learning_rate": 1.2178257261410789e-05, "loss": 0.339, "step": 23576 }, { "epoch": 19.56597510373444, "grad_norm": 51.70841979980469, "learning_rate": 1.2177925311203321e-05, "loss": 0.5995, "step": 23577 }, { "epoch": 19.566804979253114, "grad_norm": 29.34541893005371, "learning_rate": 1.2177593360995851e-05, "loss": 0.3056, "step": 23578 }, { "epoch": 19.567634854771786, "grad_norm": 61.57466506958008, "learning_rate": 1.2177261410788383e-05, "loss": 0.5211, "step": 23579 }, { "epoch": 19.568464730290458, "grad_norm": 71.35128021240234, "learning_rate": 1.2176929460580914e-05, "loss": 0.4276, "step": 23580 }, { "epoch": 19.56929460580913, "grad_norm": 25.840972900390625, "learning_rate": 1.2176597510373444e-05, "loss": 0.3276, "step": 23581 }, { "epoch": 19.570124481327802, "grad_norm": 80.2052993774414, "learning_rate": 1.2176265560165976e-05, "loss": 0.9039, "step": 23582 }, { "epoch": 19.570954356846475, "grad_norm": 32.839168548583984, "learning_rate": 1.2175933609958508e-05, "loss": 0.3083, "step": 23583 }, { "epoch": 19.571784232365147, "grad_norm": 21.56446075439453, "learning_rate": 1.2175601659751037e-05, "loss": 0.4624, "step": 23584 }, { "epoch": 19.57261410788382, "grad_norm": 37.7010612487793, "learning_rate": 1.217526970954357e-05, "loss": 0.3861, "step": 23585 }, { "epoch": 19.57344398340249, "grad_norm": 35.22346878051758, "learning_rate": 1.2174937759336101e-05, "loss": 0.4141, "step": 23586 }, { "epoch": 19.574273858921163, "grad_norm": 39.863853454589844, "learning_rate": 1.2174605809128632e-05, "loss": 0.4186, "step": 23587 }, { "epoch": 19.575103734439836, "grad_norm": 139.4908905029297, "learning_rate": 1.2174273858921162e-05, "loss": 0.5419, "step": 23588 }, { "epoch": 19.575933609958508, "grad_norm": 42.63896179199219, "learning_rate": 1.2173941908713693e-05, "loss": 0.509, "step": 23589 }, { "epoch": 19.57676348547718, "grad_norm": 64.83887481689453, "learning_rate": 1.2173609958506225e-05, "loss": 0.7368, "step": 23590 }, { "epoch": 19.577593360995852, "grad_norm": 52.34571838378906, "learning_rate": 1.2173278008298757e-05, "loss": 0.4787, "step": 23591 }, { "epoch": 19.578423236514524, "grad_norm": 47.9932861328125, "learning_rate": 1.2172946058091289e-05, "loss": 0.3489, "step": 23592 }, { "epoch": 19.579253112033197, "grad_norm": 43.40193176269531, "learning_rate": 1.2172614107883818e-05, "loss": 0.4959, "step": 23593 }, { "epoch": 19.58008298755187, "grad_norm": 48.17012023925781, "learning_rate": 1.217228215767635e-05, "loss": 0.7562, "step": 23594 }, { "epoch": 19.58091286307054, "grad_norm": 52.57120895385742, "learning_rate": 1.2171950207468882e-05, "loss": 0.4781, "step": 23595 }, { "epoch": 19.581742738589213, "grad_norm": 44.100120544433594, "learning_rate": 1.2171618257261412e-05, "loss": 0.6587, "step": 23596 }, { "epoch": 19.582572614107885, "grad_norm": 53.790809631347656, "learning_rate": 1.2171286307053943e-05, "loss": 0.4528, "step": 23597 }, { "epoch": 19.583402489626557, "grad_norm": 53.07167434692383, "learning_rate": 1.2170954356846473e-05, "loss": 0.6346, "step": 23598 }, { "epoch": 19.58423236514523, "grad_norm": 31.948692321777344, "learning_rate": 1.2170622406639005e-05, "loss": 0.3205, "step": 23599 }, { "epoch": 19.585062240663902, "grad_norm": 30.736814498901367, "learning_rate": 1.2170290456431537e-05, "loss": 0.5509, "step": 23600 }, { "epoch": 19.585892116182574, "grad_norm": 54.39112091064453, "learning_rate": 1.2169958506224066e-05, "loss": 0.802, "step": 23601 }, { "epoch": 19.586721991701246, "grad_norm": 15.836645126342773, "learning_rate": 1.2169626556016598e-05, "loss": 0.2775, "step": 23602 }, { "epoch": 19.58755186721992, "grad_norm": 32.30561065673828, "learning_rate": 1.216929460580913e-05, "loss": 0.5061, "step": 23603 }, { "epoch": 19.58838174273859, "grad_norm": 33.680274963378906, "learning_rate": 1.2168962655601662e-05, "loss": 0.3089, "step": 23604 }, { "epoch": 19.589211618257263, "grad_norm": 37.2278938293457, "learning_rate": 1.2168630705394191e-05, "loss": 0.3538, "step": 23605 }, { "epoch": 19.590041493775935, "grad_norm": 27.96431541442871, "learning_rate": 1.2168298755186723e-05, "loss": 0.3755, "step": 23606 }, { "epoch": 19.590871369294607, "grad_norm": 38.1223258972168, "learning_rate": 1.2167966804979254e-05, "loss": 0.6514, "step": 23607 }, { "epoch": 19.59170124481328, "grad_norm": 74.26069641113281, "learning_rate": 1.2167634854771786e-05, "loss": 0.6302, "step": 23608 }, { "epoch": 19.59253112033195, "grad_norm": 16.316307067871094, "learning_rate": 1.2167302904564316e-05, "loss": 0.2414, "step": 23609 }, { "epoch": 19.593360995850624, "grad_norm": 16.195446014404297, "learning_rate": 1.2166970954356847e-05, "loss": 0.2395, "step": 23610 }, { "epoch": 19.594190871369296, "grad_norm": 85.17546844482422, "learning_rate": 1.2166639004149379e-05, "loss": 0.8692, "step": 23611 }, { "epoch": 19.59502074688797, "grad_norm": 51.5944709777832, "learning_rate": 1.216630705394191e-05, "loss": 0.6781, "step": 23612 }, { "epoch": 19.59585062240664, "grad_norm": 31.429689407348633, "learning_rate": 1.216597510373444e-05, "loss": 0.4926, "step": 23613 }, { "epoch": 19.596680497925313, "grad_norm": 46.3596305847168, "learning_rate": 1.2165643153526972e-05, "loss": 0.8328, "step": 23614 }, { "epoch": 19.597510373443985, "grad_norm": 17.24855613708496, "learning_rate": 1.2165311203319504e-05, "loss": 0.3496, "step": 23615 }, { "epoch": 19.598340248962657, "grad_norm": 24.82795524597168, "learning_rate": 1.2164979253112034e-05, "loss": 0.4515, "step": 23616 }, { "epoch": 19.59917012448133, "grad_norm": 102.66006469726562, "learning_rate": 1.2164647302904566e-05, "loss": 0.5777, "step": 23617 }, { "epoch": 19.6, "grad_norm": 46.15195846557617, "learning_rate": 1.2164315352697097e-05, "loss": 0.5427, "step": 23618 }, { "epoch": 19.600829875518674, "grad_norm": 45.04432678222656, "learning_rate": 1.2163983402489627e-05, "loss": 0.5895, "step": 23619 }, { "epoch": 19.601659751037346, "grad_norm": 29.37443733215332, "learning_rate": 1.2163651452282159e-05, "loss": 0.462, "step": 23620 }, { "epoch": 19.602489626556018, "grad_norm": 41.971004486083984, "learning_rate": 1.2163319502074691e-05, "loss": 1.1295, "step": 23621 }, { "epoch": 19.60331950207469, "grad_norm": 73.60417938232422, "learning_rate": 1.216298755186722e-05, "loss": 0.8545, "step": 23622 }, { "epoch": 19.604149377593362, "grad_norm": 104.16305541992188, "learning_rate": 1.2162655601659752e-05, "loss": 1.3687, "step": 23623 }, { "epoch": 19.604979253112035, "grad_norm": 26.29170799255371, "learning_rate": 1.2162323651452284e-05, "loss": 0.356, "step": 23624 }, { "epoch": 19.605809128630707, "grad_norm": 50.3004264831543, "learning_rate": 1.2161991701244815e-05, "loss": 1.2193, "step": 23625 }, { "epoch": 19.60663900414938, "grad_norm": 32.955421447753906, "learning_rate": 1.2161659751037345e-05, "loss": 0.6399, "step": 23626 }, { "epoch": 19.60746887966805, "grad_norm": 68.38849639892578, "learning_rate": 1.2161327800829877e-05, "loss": 1.5572, "step": 23627 }, { "epoch": 19.608298755186723, "grad_norm": 39.45838928222656, "learning_rate": 1.2160995850622408e-05, "loss": 0.7566, "step": 23628 }, { "epoch": 19.609128630705396, "grad_norm": 64.83971405029297, "learning_rate": 1.216066390041494e-05, "loss": 1.1638, "step": 23629 }, { "epoch": 19.609958506224068, "grad_norm": 25.374298095703125, "learning_rate": 1.2160331950207468e-05, "loss": 0.5689, "step": 23630 }, { "epoch": 19.61078838174274, "grad_norm": 20.204021453857422, "learning_rate": 1.216e-05, "loss": 0.4122, "step": 23631 }, { "epoch": 19.611618257261412, "grad_norm": 36.981903076171875, "learning_rate": 1.2159668049792533e-05, "loss": 0.8045, "step": 23632 }, { "epoch": 19.612448132780084, "grad_norm": 37.23401641845703, "learning_rate": 1.2159336099585065e-05, "loss": 0.6145, "step": 23633 }, { "epoch": 19.613278008298757, "grad_norm": 17.924423217773438, "learning_rate": 1.2159004149377593e-05, "loss": 0.5718, "step": 23634 }, { "epoch": 19.61410788381743, "grad_norm": 80.90381622314453, "learning_rate": 1.2158672199170126e-05, "loss": 0.6691, "step": 23635 }, { "epoch": 19.6149377593361, "grad_norm": 48.0706672668457, "learning_rate": 1.2158340248962656e-05, "loss": 0.5732, "step": 23636 }, { "epoch": 19.615767634854773, "grad_norm": 16.362751007080078, "learning_rate": 1.2158008298755188e-05, "loss": 0.413, "step": 23637 }, { "epoch": 19.616597510373445, "grad_norm": 34.80287551879883, "learning_rate": 1.2157676348547718e-05, "loss": 0.4082, "step": 23638 }, { "epoch": 19.617427385892118, "grad_norm": 37.479331970214844, "learning_rate": 1.2157344398340249e-05, "loss": 0.7662, "step": 23639 }, { "epoch": 19.61825726141079, "grad_norm": 49.92367172241211, "learning_rate": 1.2157012448132781e-05, "loss": 0.5219, "step": 23640 }, { "epoch": 19.619087136929462, "grad_norm": 85.85396575927734, "learning_rate": 1.2156680497925313e-05, "loss": 0.5544, "step": 23641 }, { "epoch": 19.619917012448134, "grad_norm": 24.240598678588867, "learning_rate": 1.2156348547717842e-05, "loss": 0.4952, "step": 23642 }, { "epoch": 19.620746887966806, "grad_norm": 58.73554229736328, "learning_rate": 1.2156016597510374e-05, "loss": 1.1286, "step": 23643 }, { "epoch": 19.62157676348548, "grad_norm": 43.023223876953125, "learning_rate": 1.2155684647302906e-05, "loss": 0.9422, "step": 23644 }, { "epoch": 19.62240663900415, "grad_norm": 23.632205963134766, "learning_rate": 1.2155352697095436e-05, "loss": 0.3898, "step": 23645 }, { "epoch": 19.623236514522823, "grad_norm": 45.50345993041992, "learning_rate": 1.2155020746887969e-05, "loss": 0.8422, "step": 23646 }, { "epoch": 19.624066390041495, "grad_norm": 35.41149139404297, "learning_rate": 1.2154688796680499e-05, "loss": 0.6867, "step": 23647 }, { "epoch": 19.624896265560167, "grad_norm": 25.787296295166016, "learning_rate": 1.215435684647303e-05, "loss": 0.4037, "step": 23648 }, { "epoch": 19.62572614107884, "grad_norm": 17.647523880004883, "learning_rate": 1.2154024896265561e-05, "loss": 0.2898, "step": 23649 }, { "epoch": 19.62655601659751, "grad_norm": 15.198878288269043, "learning_rate": 1.2153692946058094e-05, "loss": 0.2598, "step": 23650 }, { "epoch": 19.627385892116184, "grad_norm": 23.112070083618164, "learning_rate": 1.2153360995850622e-05, "loss": 0.4201, "step": 23651 }, { "epoch": 19.628215767634856, "grad_norm": 17.197362899780273, "learning_rate": 1.2153029045643154e-05, "loss": 0.411, "step": 23652 }, { "epoch": 19.62904564315353, "grad_norm": 51.83090591430664, "learning_rate": 1.2152697095435687e-05, "loss": 0.716, "step": 23653 }, { "epoch": 19.6298755186722, "grad_norm": 27.885211944580078, "learning_rate": 1.2152365145228217e-05, "loss": 0.9512, "step": 23654 }, { "epoch": 19.630705394190873, "grad_norm": 58.87021255493164, "learning_rate": 1.2152033195020747e-05, "loss": 0.7142, "step": 23655 }, { "epoch": 19.631535269709545, "grad_norm": 73.809326171875, "learning_rate": 1.215170124481328e-05, "loss": 0.6866, "step": 23656 }, { "epoch": 19.632365145228217, "grad_norm": 29.02088737487793, "learning_rate": 1.215136929460581e-05, "loss": 0.3631, "step": 23657 }, { "epoch": 19.63319502074689, "grad_norm": 43.97351837158203, "learning_rate": 1.2151037344398342e-05, "loss": 0.5978, "step": 23658 }, { "epoch": 19.63402489626556, "grad_norm": 32.79393005371094, "learning_rate": 1.215070539419087e-05, "loss": 0.4634, "step": 23659 }, { "epoch": 19.634854771784234, "grad_norm": 51.287071228027344, "learning_rate": 1.2150373443983403e-05, "loss": 0.5353, "step": 23660 }, { "epoch": 19.635684647302906, "grad_norm": 41.36356735229492, "learning_rate": 1.2150041493775935e-05, "loss": 0.4654, "step": 23661 }, { "epoch": 19.636514522821578, "grad_norm": 32.97511291503906, "learning_rate": 1.2149709543568467e-05, "loss": 0.8095, "step": 23662 }, { "epoch": 19.63734439834025, "grad_norm": 19.051349639892578, "learning_rate": 1.2149377593360996e-05, "loss": 0.3147, "step": 23663 }, { "epoch": 19.638174273858922, "grad_norm": 24.016752243041992, "learning_rate": 1.2149045643153528e-05, "loss": 0.5048, "step": 23664 }, { "epoch": 19.639004149377595, "grad_norm": 25.804645538330078, "learning_rate": 1.214871369294606e-05, "loss": 0.3566, "step": 23665 }, { "epoch": 19.639834024896267, "grad_norm": 28.41779327392578, "learning_rate": 1.214838174273859e-05, "loss": 0.6622, "step": 23666 }, { "epoch": 19.64066390041494, "grad_norm": 29.506025314331055, "learning_rate": 1.214804979253112e-05, "loss": 0.7788, "step": 23667 }, { "epoch": 19.64149377593361, "grad_norm": 55.45180130004883, "learning_rate": 1.2147717842323651e-05, "loss": 0.383, "step": 23668 }, { "epoch": 19.642323651452283, "grad_norm": 77.80854797363281, "learning_rate": 1.2147385892116183e-05, "loss": 0.6167, "step": 23669 }, { "epoch": 19.643153526970956, "grad_norm": 52.47123718261719, "learning_rate": 1.2147053941908715e-05, "loss": 0.5269, "step": 23670 }, { "epoch": 19.643983402489628, "grad_norm": 42.65156555175781, "learning_rate": 1.2146721991701248e-05, "loss": 1.1545, "step": 23671 }, { "epoch": 19.6448132780083, "grad_norm": 23.665433883666992, "learning_rate": 1.2146390041493776e-05, "loss": 0.3505, "step": 23672 }, { "epoch": 19.645643153526972, "grad_norm": 50.87181091308594, "learning_rate": 1.2146058091286308e-05, "loss": 0.8064, "step": 23673 }, { "epoch": 19.646473029045644, "grad_norm": 39.169002532958984, "learning_rate": 1.214572614107884e-05, "loss": 0.447, "step": 23674 }, { "epoch": 19.647302904564317, "grad_norm": 45.92135238647461, "learning_rate": 1.2145394190871371e-05, "loss": 0.492, "step": 23675 }, { "epoch": 19.64813278008299, "grad_norm": 46.64751434326172, "learning_rate": 1.2145062240663901e-05, "loss": 0.8216, "step": 23676 }, { "epoch": 19.64896265560166, "grad_norm": 62.353919982910156, "learning_rate": 1.2144730290456432e-05, "loss": 0.7395, "step": 23677 }, { "epoch": 19.649792531120333, "grad_norm": 29.041112899780273, "learning_rate": 1.2144398340248964e-05, "loss": 0.5981, "step": 23678 }, { "epoch": 19.650622406639005, "grad_norm": 28.784748077392578, "learning_rate": 1.2144066390041496e-05, "loss": 0.7232, "step": 23679 }, { "epoch": 19.651452282157678, "grad_norm": 64.05693817138672, "learning_rate": 1.2143734439834025e-05, "loss": 0.2951, "step": 23680 }, { "epoch": 19.65228215767635, "grad_norm": 29.65126609802246, "learning_rate": 1.2143402489626557e-05, "loss": 0.4519, "step": 23681 }, { "epoch": 19.653112033195022, "grad_norm": 32.74720001220703, "learning_rate": 1.2143070539419089e-05, "loss": 0.3951, "step": 23682 }, { "epoch": 19.653941908713694, "grad_norm": 52.639984130859375, "learning_rate": 1.214273858921162e-05, "loss": 0.5232, "step": 23683 }, { "epoch": 19.654771784232366, "grad_norm": 28.791000366210938, "learning_rate": 1.214240663900415e-05, "loss": 0.5463, "step": 23684 }, { "epoch": 19.65560165975104, "grad_norm": 46.77262496948242, "learning_rate": 1.2142074688796682e-05, "loss": 0.7717, "step": 23685 }, { "epoch": 19.65643153526971, "grad_norm": 34.61600112915039, "learning_rate": 1.2141742738589212e-05, "loss": 0.6466, "step": 23686 }, { "epoch": 19.657261410788383, "grad_norm": 36.41764450073242, "learning_rate": 1.2141410788381744e-05, "loss": 0.5506, "step": 23687 }, { "epoch": 19.658091286307055, "grad_norm": 35.87940979003906, "learning_rate": 1.2141078838174275e-05, "loss": 0.6564, "step": 23688 }, { "epoch": 19.658921161825727, "grad_norm": 29.149255752563477, "learning_rate": 1.2140746887966805e-05, "loss": 0.3316, "step": 23689 }, { "epoch": 19.6597510373444, "grad_norm": 29.48280906677246, "learning_rate": 1.2140414937759337e-05, "loss": 0.6977, "step": 23690 }, { "epoch": 19.66058091286307, "grad_norm": 36.57709884643555, "learning_rate": 1.214008298755187e-05, "loss": 0.4258, "step": 23691 }, { "epoch": 19.661410788381744, "grad_norm": 31.867904663085938, "learning_rate": 1.2139751037344398e-05, "loss": 0.8157, "step": 23692 }, { "epoch": 19.662240663900416, "grad_norm": 62.88938903808594, "learning_rate": 1.213941908713693e-05, "loss": 0.5353, "step": 23693 }, { "epoch": 19.66307053941909, "grad_norm": 42.50875473022461, "learning_rate": 1.2139087136929462e-05, "loss": 0.7675, "step": 23694 }, { "epoch": 19.66390041493776, "grad_norm": 65.19171905517578, "learning_rate": 1.2138755186721993e-05, "loss": 0.598, "step": 23695 }, { "epoch": 19.664730290456433, "grad_norm": 68.74626159667969, "learning_rate": 1.2138423236514525e-05, "loss": 0.921, "step": 23696 }, { "epoch": 19.665560165975105, "grad_norm": 145.36373901367188, "learning_rate": 1.2138091286307055e-05, "loss": 1.6651, "step": 23697 }, { "epoch": 19.666390041493777, "grad_norm": 47.119014739990234, "learning_rate": 1.2137759336099586e-05, "loss": 0.784, "step": 23698 }, { "epoch": 19.66721991701245, "grad_norm": 48.6309700012207, "learning_rate": 1.2137427385892118e-05, "loss": 0.8736, "step": 23699 }, { "epoch": 19.66804979253112, "grad_norm": 69.06124877929688, "learning_rate": 1.213709543568465e-05, "loss": 0.9336, "step": 23700 }, { "epoch": 19.668879668049794, "grad_norm": 51.737205505371094, "learning_rate": 1.2136763485477179e-05, "loss": 0.8618, "step": 23701 }, { "epoch": 19.669709543568466, "grad_norm": 31.38808822631836, "learning_rate": 1.213643153526971e-05, "loss": 0.4172, "step": 23702 }, { "epoch": 19.670539419087138, "grad_norm": 144.48939514160156, "learning_rate": 1.2136099585062243e-05, "loss": 0.3501, "step": 23703 }, { "epoch": 19.67136929460581, "grad_norm": 46.169254302978516, "learning_rate": 1.2135767634854773e-05, "loss": 0.5924, "step": 23704 }, { "epoch": 19.672199170124482, "grad_norm": 43.78098678588867, "learning_rate": 1.2135435684647304e-05, "loss": 1.1474, "step": 23705 }, { "epoch": 19.673029045643155, "grad_norm": 36.425174713134766, "learning_rate": 1.2135103734439834e-05, "loss": 0.4156, "step": 23706 }, { "epoch": 19.673858921161827, "grad_norm": 39.701541900634766, "learning_rate": 1.2134771784232366e-05, "loss": 0.665, "step": 23707 }, { "epoch": 19.6746887966805, "grad_norm": 36.485111236572266, "learning_rate": 1.2134439834024898e-05, "loss": 1.1695, "step": 23708 }, { "epoch": 19.67551867219917, "grad_norm": 33.41413116455078, "learning_rate": 1.2134107883817427e-05, "loss": 0.4527, "step": 23709 }, { "epoch": 19.676348547717843, "grad_norm": 49.5323486328125, "learning_rate": 1.2133775933609959e-05, "loss": 0.3999, "step": 23710 }, { "epoch": 19.677178423236516, "grad_norm": 55.019954681396484, "learning_rate": 1.2133443983402491e-05, "loss": 1.158, "step": 23711 }, { "epoch": 19.678008298755188, "grad_norm": 161.44190979003906, "learning_rate": 1.2133112033195023e-05, "loss": 1.0767, "step": 23712 }, { "epoch": 19.67883817427386, "grad_norm": 24.25724983215332, "learning_rate": 1.2132780082987552e-05, "loss": 0.3582, "step": 23713 }, { "epoch": 19.679668049792532, "grad_norm": 48.109310150146484, "learning_rate": 1.2132448132780084e-05, "loss": 1.1054, "step": 23714 }, { "epoch": 19.680497925311204, "grad_norm": 48.463687896728516, "learning_rate": 1.2132116182572614e-05, "loss": 0.6764, "step": 23715 }, { "epoch": 19.681327800829877, "grad_norm": 59.332645416259766, "learning_rate": 1.2131784232365147e-05, "loss": 0.7718, "step": 23716 }, { "epoch": 19.68215767634855, "grad_norm": 49.288230895996094, "learning_rate": 1.2131452282157677e-05, "loss": 0.9015, "step": 23717 }, { "epoch": 19.68298755186722, "grad_norm": 28.600814819335938, "learning_rate": 1.2131120331950207e-05, "loss": 0.4125, "step": 23718 }, { "epoch": 19.683817427385893, "grad_norm": 68.12986755371094, "learning_rate": 1.213078838174274e-05, "loss": 0.8282, "step": 23719 }, { "epoch": 19.684647302904565, "grad_norm": 28.40816879272461, "learning_rate": 1.2130456431535272e-05, "loss": 0.4127, "step": 23720 }, { "epoch": 19.685477178423238, "grad_norm": 104.02812957763672, "learning_rate": 1.21301244813278e-05, "loss": 0.782, "step": 23721 }, { "epoch": 19.68630705394191, "grad_norm": 68.76110076904297, "learning_rate": 1.2129792531120332e-05, "loss": 1.1519, "step": 23722 }, { "epoch": 19.687136929460582, "grad_norm": 54.14628982543945, "learning_rate": 1.2129460580912865e-05, "loss": 1.0859, "step": 23723 }, { "epoch": 19.687966804979254, "grad_norm": 117.4521255493164, "learning_rate": 1.2129128630705395e-05, "loss": 1.5132, "step": 23724 }, { "epoch": 19.688796680497926, "grad_norm": 63.83131408691406, "learning_rate": 1.2128796680497927e-05, "loss": 0.6804, "step": 23725 }, { "epoch": 19.6896265560166, "grad_norm": 18.714481353759766, "learning_rate": 1.2128464730290458e-05, "loss": 0.3454, "step": 23726 }, { "epoch": 19.69045643153527, "grad_norm": 27.6943416595459, "learning_rate": 1.2128132780082988e-05, "loss": 0.3586, "step": 23727 }, { "epoch": 19.691286307053943, "grad_norm": 77.70207977294922, "learning_rate": 1.212780082987552e-05, "loss": 0.4866, "step": 23728 }, { "epoch": 19.692116182572615, "grad_norm": 46.18693542480469, "learning_rate": 1.2127468879668052e-05, "loss": 0.4316, "step": 23729 }, { "epoch": 19.692946058091287, "grad_norm": 69.89705657958984, "learning_rate": 1.2127136929460581e-05, "loss": 0.3704, "step": 23730 }, { "epoch": 19.69377593360996, "grad_norm": 35.90583419799805, "learning_rate": 1.2126804979253113e-05, "loss": 0.5796, "step": 23731 }, { "epoch": 19.694605809128632, "grad_norm": 41.74568176269531, "learning_rate": 1.2126473029045645e-05, "loss": 0.5033, "step": 23732 }, { "epoch": 19.695435684647304, "grad_norm": 44.947471618652344, "learning_rate": 1.2126141078838175e-05, "loss": 0.5621, "step": 23733 }, { "epoch": 19.696265560165976, "grad_norm": 36.5411491394043, "learning_rate": 1.2125809128630706e-05, "loss": 0.4982, "step": 23734 }, { "epoch": 19.69709543568465, "grad_norm": 20.69956398010254, "learning_rate": 1.2125477178423238e-05, "loss": 0.3216, "step": 23735 }, { "epoch": 19.69792531120332, "grad_norm": 40.23213577270508, "learning_rate": 1.2125145228215768e-05, "loss": 0.3211, "step": 23736 }, { "epoch": 19.698755186721993, "grad_norm": 52.11113739013672, "learning_rate": 1.21248132780083e-05, "loss": 1.4004, "step": 23737 }, { "epoch": 19.699585062240665, "grad_norm": 56.57379913330078, "learning_rate": 1.212448132780083e-05, "loss": 1.2272, "step": 23738 }, { "epoch": 19.700414937759337, "grad_norm": 22.691896438598633, "learning_rate": 1.2124149377593361e-05, "loss": 0.4251, "step": 23739 }, { "epoch": 19.70124481327801, "grad_norm": 29.648080825805664, "learning_rate": 1.2123817427385893e-05, "loss": 0.3954, "step": 23740 }, { "epoch": 19.70207468879668, "grad_norm": 41.134891510009766, "learning_rate": 1.2123485477178426e-05, "loss": 0.7439, "step": 23741 }, { "epoch": 19.702904564315354, "grad_norm": 20.075525283813477, "learning_rate": 1.2123153526970954e-05, "loss": 0.4484, "step": 23742 }, { "epoch": 19.703734439834026, "grad_norm": 67.77556610107422, "learning_rate": 1.2122821576763486e-05, "loss": 0.4724, "step": 23743 }, { "epoch": 19.704564315352698, "grad_norm": 13.353166580200195, "learning_rate": 1.2122489626556019e-05, "loss": 0.208, "step": 23744 }, { "epoch": 19.70539419087137, "grad_norm": 20.779855728149414, "learning_rate": 1.2122157676348549e-05, "loss": 0.2999, "step": 23745 }, { "epoch": 19.706224066390043, "grad_norm": 45.866188049316406, "learning_rate": 1.212182572614108e-05, "loss": 1.1361, "step": 23746 }, { "epoch": 19.707053941908715, "grad_norm": 28.71831512451172, "learning_rate": 1.212149377593361e-05, "loss": 0.6806, "step": 23747 }, { "epoch": 19.707883817427387, "grad_norm": 36.18320083618164, "learning_rate": 1.2121161825726142e-05, "loss": 0.6396, "step": 23748 }, { "epoch": 19.70871369294606, "grad_norm": 81.27330017089844, "learning_rate": 1.2120829875518674e-05, "loss": 0.718, "step": 23749 }, { "epoch": 19.70954356846473, "grad_norm": 37.631099700927734, "learning_rate": 1.2120497925311206e-05, "loss": 0.4576, "step": 23750 }, { "epoch": 19.710373443983404, "grad_norm": 43.595703125, "learning_rate": 1.2120165975103735e-05, "loss": 0.5415, "step": 23751 }, { "epoch": 19.711203319502076, "grad_norm": 21.549880981445312, "learning_rate": 1.2119834024896267e-05, "loss": 0.3051, "step": 23752 }, { "epoch": 19.712033195020748, "grad_norm": 32.08234405517578, "learning_rate": 1.2119502074688797e-05, "loss": 0.647, "step": 23753 }, { "epoch": 19.71286307053942, "grad_norm": 44.271324157714844, "learning_rate": 1.211917012448133e-05, "loss": 0.7804, "step": 23754 }, { "epoch": 19.713692946058092, "grad_norm": 111.41650390625, "learning_rate": 1.211883817427386e-05, "loss": 0.5533, "step": 23755 }, { "epoch": 19.714522821576764, "grad_norm": 45.87218475341797, "learning_rate": 1.211850622406639e-05, "loss": 0.614, "step": 23756 }, { "epoch": 19.715352697095437, "grad_norm": 92.68588256835938, "learning_rate": 1.2118174273858922e-05, "loss": 0.8369, "step": 23757 }, { "epoch": 19.71618257261411, "grad_norm": 50.69757843017578, "learning_rate": 1.2117842323651454e-05, "loss": 1.0539, "step": 23758 }, { "epoch": 19.71701244813278, "grad_norm": 34.51540756225586, "learning_rate": 1.2117510373443983e-05, "loss": 0.8143, "step": 23759 }, { "epoch": 19.717842323651453, "grad_norm": 50.108219146728516, "learning_rate": 1.2117178423236515e-05, "loss": 0.3639, "step": 23760 }, { "epoch": 19.718672199170125, "grad_norm": 37.06401062011719, "learning_rate": 1.2116846473029047e-05, "loss": 0.5006, "step": 23761 }, { "epoch": 19.719502074688798, "grad_norm": 22.682222366333008, "learning_rate": 1.2116514522821578e-05, "loss": 0.3081, "step": 23762 }, { "epoch": 19.72033195020747, "grad_norm": 37.55036544799805, "learning_rate": 1.2116182572614108e-05, "loss": 0.5485, "step": 23763 }, { "epoch": 19.721161825726142, "grad_norm": 111.61121368408203, "learning_rate": 1.211585062240664e-05, "loss": 0.9115, "step": 23764 }, { "epoch": 19.721991701244814, "grad_norm": 30.09785270690918, "learning_rate": 1.211551867219917e-05, "loss": 0.3527, "step": 23765 }, { "epoch": 19.722821576763486, "grad_norm": 45.260169982910156, "learning_rate": 1.2115186721991703e-05, "loss": 0.596, "step": 23766 }, { "epoch": 19.72365145228216, "grad_norm": 119.0989990234375, "learning_rate": 1.2114854771784232e-05, "loss": 0.6606, "step": 23767 }, { "epoch": 19.72448132780083, "grad_norm": 74.04777526855469, "learning_rate": 1.2114522821576764e-05, "loss": 0.5866, "step": 23768 }, { "epoch": 19.725311203319503, "grad_norm": 49.289764404296875, "learning_rate": 1.2114190871369296e-05, "loss": 0.7399, "step": 23769 }, { "epoch": 19.726141078838175, "grad_norm": 52.71346664428711, "learning_rate": 1.2113858921161828e-05, "loss": 0.7545, "step": 23770 }, { "epoch": 19.726970954356847, "grad_norm": 44.681922912597656, "learning_rate": 1.2113526970954357e-05, "loss": 0.3911, "step": 23771 }, { "epoch": 19.72780082987552, "grad_norm": 83.39239501953125, "learning_rate": 1.2113195020746889e-05, "loss": 0.6533, "step": 23772 }, { "epoch": 19.728630705394192, "grad_norm": 35.25242233276367, "learning_rate": 1.211286307053942e-05, "loss": 0.493, "step": 23773 }, { "epoch": 19.729460580912864, "grad_norm": 58.23968505859375, "learning_rate": 1.2112531120331951e-05, "loss": 0.6718, "step": 23774 }, { "epoch": 19.730290456431536, "grad_norm": 28.592004776000977, "learning_rate": 1.2112199170124482e-05, "loss": 0.4532, "step": 23775 }, { "epoch": 19.73112033195021, "grad_norm": 45.837646484375, "learning_rate": 1.2111867219917012e-05, "loss": 0.5216, "step": 23776 }, { "epoch": 19.73195020746888, "grad_norm": 56.86872100830078, "learning_rate": 1.2111535269709544e-05, "loss": 0.5168, "step": 23777 }, { "epoch": 19.732780082987553, "grad_norm": 63.001155853271484, "learning_rate": 1.2111203319502076e-05, "loss": 0.5014, "step": 23778 }, { "epoch": 19.733609958506225, "grad_norm": 86.83867645263672, "learning_rate": 1.2110871369294608e-05, "loss": 0.6515, "step": 23779 }, { "epoch": 19.734439834024897, "grad_norm": 39.295249938964844, "learning_rate": 1.2110539419087137e-05, "loss": 0.5805, "step": 23780 }, { "epoch": 19.73526970954357, "grad_norm": 67.58926391601562, "learning_rate": 1.211020746887967e-05, "loss": 0.6779, "step": 23781 }, { "epoch": 19.73609958506224, "grad_norm": 69.5451431274414, "learning_rate": 1.2109875518672201e-05, "loss": 1.2717, "step": 23782 }, { "epoch": 19.736929460580914, "grad_norm": 45.15687561035156, "learning_rate": 1.2109543568464732e-05, "loss": 0.3554, "step": 23783 }, { "epoch": 19.737759336099586, "grad_norm": 68.90471649169922, "learning_rate": 1.2109211618257262e-05, "loss": 1.0491, "step": 23784 }, { "epoch": 19.738589211618258, "grad_norm": 83.39823913574219, "learning_rate": 1.2108879668049793e-05, "loss": 0.421, "step": 23785 }, { "epoch": 19.73941908713693, "grad_norm": 65.8620376586914, "learning_rate": 1.2108547717842325e-05, "loss": 0.3969, "step": 23786 }, { "epoch": 19.740248962655603, "grad_norm": 96.26473999023438, "learning_rate": 1.2108215767634857e-05, "loss": 0.6562, "step": 23787 }, { "epoch": 19.741078838174275, "grad_norm": 51.90190887451172, "learning_rate": 1.2107883817427386e-05, "loss": 0.7051, "step": 23788 }, { "epoch": 19.741908713692947, "grad_norm": 45.27952575683594, "learning_rate": 1.2107551867219918e-05, "loss": 0.5685, "step": 23789 }, { "epoch": 19.74273858921162, "grad_norm": 39.167816162109375, "learning_rate": 1.210721991701245e-05, "loss": 0.6303, "step": 23790 }, { "epoch": 19.74356846473029, "grad_norm": 101.99581909179688, "learning_rate": 1.2106887966804982e-05, "loss": 0.8881, "step": 23791 }, { "epoch": 19.744398340248964, "grad_norm": 39.622554779052734, "learning_rate": 1.210655601659751e-05, "loss": 0.4619, "step": 23792 }, { "epoch": 19.745228215767636, "grad_norm": 38.626861572265625, "learning_rate": 1.2106224066390043e-05, "loss": 0.5501, "step": 23793 }, { "epoch": 19.746058091286308, "grad_norm": 51.81126403808594, "learning_rate": 1.2105892116182573e-05, "loss": 0.4508, "step": 23794 }, { "epoch": 19.74688796680498, "grad_norm": 35.32428741455078, "learning_rate": 1.2105560165975105e-05, "loss": 0.4012, "step": 23795 }, { "epoch": 19.747717842323652, "grad_norm": 43.60474395751953, "learning_rate": 1.2105228215767636e-05, "loss": 0.4393, "step": 23796 }, { "epoch": 19.748547717842325, "grad_norm": 38.572792053222656, "learning_rate": 1.2104896265560166e-05, "loss": 0.2523, "step": 23797 }, { "epoch": 19.749377593360997, "grad_norm": 26.509464263916016, "learning_rate": 1.2104564315352698e-05, "loss": 0.9695, "step": 23798 }, { "epoch": 19.75020746887967, "grad_norm": 27.60357093811035, "learning_rate": 1.210423236514523e-05, "loss": 0.8742, "step": 23799 }, { "epoch": 19.75103734439834, "grad_norm": 33.48333740234375, "learning_rate": 1.2103900414937759e-05, "loss": 0.5728, "step": 23800 }, { "epoch": 19.751867219917013, "grad_norm": 103.18828582763672, "learning_rate": 1.2103568464730291e-05, "loss": 0.6387, "step": 23801 }, { "epoch": 19.752697095435686, "grad_norm": 45.1522216796875, "learning_rate": 1.2103236514522823e-05, "loss": 0.8576, "step": 23802 }, { "epoch": 19.753526970954358, "grad_norm": 23.700092315673828, "learning_rate": 1.2102904564315354e-05, "loss": 0.4624, "step": 23803 }, { "epoch": 19.75435684647303, "grad_norm": 41.33818817138672, "learning_rate": 1.2102572614107886e-05, "loss": 0.3898, "step": 23804 }, { "epoch": 19.755186721991702, "grad_norm": 81.67431640625, "learning_rate": 1.2102240663900416e-05, "loss": 0.5345, "step": 23805 }, { "epoch": 19.756016597510374, "grad_norm": 41.96642303466797, "learning_rate": 1.2101908713692947e-05, "loss": 0.3923, "step": 23806 }, { "epoch": 19.756846473029047, "grad_norm": 29.217777252197266, "learning_rate": 1.2101576763485479e-05, "loss": 0.3959, "step": 23807 }, { "epoch": 19.75767634854772, "grad_norm": 90.2228775024414, "learning_rate": 1.210124481327801e-05, "loss": 0.5402, "step": 23808 }, { "epoch": 19.75850622406639, "grad_norm": 44.1225700378418, "learning_rate": 1.210091286307054e-05, "loss": 0.5432, "step": 23809 }, { "epoch": 19.759336099585063, "grad_norm": 36.331050872802734, "learning_rate": 1.2100580912863072e-05, "loss": 0.4948, "step": 23810 }, { "epoch": 19.760165975103735, "grad_norm": 59.570068359375, "learning_rate": 1.2100248962655604e-05, "loss": 0.8825, "step": 23811 }, { "epoch": 19.760995850622407, "grad_norm": 70.82615661621094, "learning_rate": 1.2099917012448134e-05, "loss": 0.8942, "step": 23812 }, { "epoch": 19.76182572614108, "grad_norm": 50.8933219909668, "learning_rate": 1.2099585062240664e-05, "loss": 0.7763, "step": 23813 }, { "epoch": 19.762655601659752, "grad_norm": 51.86668014526367, "learning_rate": 1.2099253112033197e-05, "loss": 0.928, "step": 23814 }, { "epoch": 19.763485477178424, "grad_norm": 34.252723693847656, "learning_rate": 1.2098921161825727e-05, "loss": 0.3605, "step": 23815 }, { "epoch": 19.764315352697096, "grad_norm": 13.799625396728516, "learning_rate": 1.2098589211618259e-05, "loss": 0.3099, "step": 23816 }, { "epoch": 19.76514522821577, "grad_norm": 24.315996170043945, "learning_rate": 1.2098257261410788e-05, "loss": 0.3332, "step": 23817 }, { "epoch": 19.76597510373444, "grad_norm": 38.09517288208008, "learning_rate": 1.209792531120332e-05, "loss": 0.496, "step": 23818 }, { "epoch": 19.766804979253113, "grad_norm": 70.47034454345703, "learning_rate": 1.2097593360995852e-05, "loss": 0.9615, "step": 23819 }, { "epoch": 19.767634854771785, "grad_norm": 41.39888381958008, "learning_rate": 1.2097261410788384e-05, "loss": 0.6124, "step": 23820 }, { "epoch": 19.768464730290457, "grad_norm": 48.83897018432617, "learning_rate": 1.2096929460580913e-05, "loss": 0.6301, "step": 23821 }, { "epoch": 19.76929460580913, "grad_norm": 21.93697738647461, "learning_rate": 1.2096597510373445e-05, "loss": 0.3701, "step": 23822 }, { "epoch": 19.7701244813278, "grad_norm": 16.26296043395996, "learning_rate": 1.2096265560165975e-05, "loss": 0.4469, "step": 23823 }, { "epoch": 19.770954356846474, "grad_norm": 33.2658576965332, "learning_rate": 1.2095933609958508e-05, "loss": 0.3574, "step": 23824 }, { "epoch": 19.771784232365146, "grad_norm": 28.992156982421875, "learning_rate": 1.2095601659751038e-05, "loss": 0.3995, "step": 23825 }, { "epoch": 19.77261410788382, "grad_norm": 15.619942665100098, "learning_rate": 1.2095269709543568e-05, "loss": 0.4046, "step": 23826 }, { "epoch": 19.77344398340249, "grad_norm": 107.41114044189453, "learning_rate": 1.20949377593361e-05, "loss": 0.3301, "step": 23827 }, { "epoch": 19.774273858921163, "grad_norm": 35.61674118041992, "learning_rate": 1.2094605809128633e-05, "loss": 0.3769, "step": 23828 }, { "epoch": 19.775103734439835, "grad_norm": 48.92934036254883, "learning_rate": 1.2094273858921165e-05, "loss": 0.9922, "step": 23829 }, { "epoch": 19.775933609958507, "grad_norm": 34.22807312011719, "learning_rate": 1.2093941908713693e-05, "loss": 0.5144, "step": 23830 }, { "epoch": 19.77676348547718, "grad_norm": 53.3177604675293, "learning_rate": 1.2093609958506225e-05, "loss": 0.4738, "step": 23831 }, { "epoch": 19.77759336099585, "grad_norm": 59.5179443359375, "learning_rate": 1.2093278008298756e-05, "loss": 0.3941, "step": 23832 }, { "epoch": 19.778423236514524, "grad_norm": 36.69589614868164, "learning_rate": 1.2092946058091288e-05, "loss": 0.5091, "step": 23833 }, { "epoch": 19.779253112033196, "grad_norm": 38.6396369934082, "learning_rate": 1.2092614107883818e-05, "loss": 0.409, "step": 23834 }, { "epoch": 19.780082987551868, "grad_norm": 105.33273315429688, "learning_rate": 1.2092282157676349e-05, "loss": 0.7064, "step": 23835 }, { "epoch": 19.78091286307054, "grad_norm": 80.15524291992188, "learning_rate": 1.2091950207468881e-05, "loss": 0.9036, "step": 23836 }, { "epoch": 19.781742738589212, "grad_norm": 39.619930267333984, "learning_rate": 1.2091618257261413e-05, "loss": 0.4619, "step": 23837 }, { "epoch": 19.782572614107885, "grad_norm": 44.69241714477539, "learning_rate": 1.2091286307053942e-05, "loss": 0.5318, "step": 23838 }, { "epoch": 19.783402489626557, "grad_norm": 21.607297897338867, "learning_rate": 1.2090954356846474e-05, "loss": 0.4155, "step": 23839 }, { "epoch": 19.78423236514523, "grad_norm": 106.48112487792969, "learning_rate": 1.2090622406639006e-05, "loss": 0.7343, "step": 23840 }, { "epoch": 19.7850622406639, "grad_norm": 115.68765258789062, "learning_rate": 1.2090290456431536e-05, "loss": 0.4868, "step": 23841 }, { "epoch": 19.785892116182573, "grad_norm": 26.508100509643555, "learning_rate": 1.2089958506224067e-05, "loss": 0.3564, "step": 23842 }, { "epoch": 19.786721991701246, "grad_norm": 45.307960510253906, "learning_rate": 1.2089626556016599e-05, "loss": 0.8833, "step": 23843 }, { "epoch": 19.787551867219918, "grad_norm": 58.831878662109375, "learning_rate": 1.208929460580913e-05, "loss": 0.8656, "step": 23844 }, { "epoch": 19.78838174273859, "grad_norm": 85.54783630371094, "learning_rate": 1.2088962655601661e-05, "loss": 0.6162, "step": 23845 }, { "epoch": 19.789211618257262, "grad_norm": 39.71961975097656, "learning_rate": 1.208863070539419e-05, "loss": 0.7512, "step": 23846 }, { "epoch": 19.790041493775934, "grad_norm": 31.727426528930664, "learning_rate": 1.2088298755186722e-05, "loss": 0.4433, "step": 23847 }, { "epoch": 19.790871369294607, "grad_norm": 75.1752700805664, "learning_rate": 1.2087966804979254e-05, "loss": 0.9733, "step": 23848 }, { "epoch": 19.79170124481328, "grad_norm": 24.473989486694336, "learning_rate": 1.2087634854771786e-05, "loss": 0.531, "step": 23849 }, { "epoch": 19.79253112033195, "grad_norm": 46.215911865234375, "learning_rate": 1.2087302904564315e-05, "loss": 0.3725, "step": 23850 }, { "epoch": 19.793360995850623, "grad_norm": 29.38193130493164, "learning_rate": 1.2086970954356847e-05, "loss": 0.8124, "step": 23851 }, { "epoch": 19.794190871369295, "grad_norm": 54.818817138671875, "learning_rate": 1.208663900414938e-05, "loss": 0.7225, "step": 23852 }, { "epoch": 19.795020746887968, "grad_norm": 71.55036926269531, "learning_rate": 1.208630705394191e-05, "loss": 0.7991, "step": 23853 }, { "epoch": 19.79585062240664, "grad_norm": 48.6226692199707, "learning_rate": 1.208597510373444e-05, "loss": 0.9839, "step": 23854 }, { "epoch": 19.796680497925312, "grad_norm": 36.97178649902344, "learning_rate": 1.208564315352697e-05, "loss": 0.568, "step": 23855 }, { "epoch": 19.797510373443984, "grad_norm": 44.96764373779297, "learning_rate": 1.2085311203319503e-05, "loss": 0.6062, "step": 23856 }, { "epoch": 19.798340248962656, "grad_norm": 126.6003646850586, "learning_rate": 1.2084979253112035e-05, "loss": 0.9763, "step": 23857 }, { "epoch": 19.79917012448133, "grad_norm": 32.59682846069336, "learning_rate": 1.2084647302904567e-05, "loss": 0.5679, "step": 23858 }, { "epoch": 19.8, "grad_norm": 109.81340026855469, "learning_rate": 1.2084315352697096e-05, "loss": 0.7299, "step": 23859 }, { "epoch": 19.800829875518673, "grad_norm": 38.34233856201172, "learning_rate": 1.2083983402489628e-05, "loss": 0.5973, "step": 23860 }, { "epoch": 19.801659751037345, "grad_norm": 47.165496826171875, "learning_rate": 1.208365145228216e-05, "loss": 1.2664, "step": 23861 }, { "epoch": 19.802489626556017, "grad_norm": 21.40834617614746, "learning_rate": 1.208331950207469e-05, "loss": 0.4304, "step": 23862 }, { "epoch": 19.80331950207469, "grad_norm": 80.47618865966797, "learning_rate": 1.208298755186722e-05, "loss": 0.4397, "step": 23863 }, { "epoch": 19.80414937759336, "grad_norm": 79.618408203125, "learning_rate": 1.2082655601659751e-05, "loss": 0.8069, "step": 23864 }, { "epoch": 19.804979253112034, "grad_norm": 145.64920043945312, "learning_rate": 1.2082323651452283e-05, "loss": 0.6494, "step": 23865 }, { "epoch": 19.805809128630706, "grad_norm": 18.77979850769043, "learning_rate": 1.2081991701244815e-05, "loss": 0.4162, "step": 23866 }, { "epoch": 19.80663900414938, "grad_norm": 39.77259826660156, "learning_rate": 1.2081659751037344e-05, "loss": 0.6191, "step": 23867 }, { "epoch": 19.80746887966805, "grad_norm": 39.36777114868164, "learning_rate": 1.2081327800829876e-05, "loss": 0.3029, "step": 23868 }, { "epoch": 19.808298755186723, "grad_norm": 33.605533599853516, "learning_rate": 1.2080995850622408e-05, "loss": 0.8945, "step": 23869 }, { "epoch": 19.809128630705395, "grad_norm": 22.241065979003906, "learning_rate": 1.2080663900414939e-05, "loss": 0.3737, "step": 23870 }, { "epoch": 19.809958506224067, "grad_norm": 39.06415557861328, "learning_rate": 1.2080331950207469e-05, "loss": 0.5963, "step": 23871 }, { "epoch": 19.81078838174274, "grad_norm": 42.375701904296875, "learning_rate": 1.2080000000000001e-05, "loss": 0.7109, "step": 23872 }, { "epoch": 19.81161825726141, "grad_norm": 39.028099060058594, "learning_rate": 1.2079668049792532e-05, "loss": 0.8058, "step": 23873 }, { "epoch": 19.812448132780084, "grad_norm": 20.702653884887695, "learning_rate": 1.2079336099585064e-05, "loss": 0.3496, "step": 23874 }, { "epoch": 19.813278008298756, "grad_norm": 75.04622650146484, "learning_rate": 1.2079004149377594e-05, "loss": 1.1207, "step": 23875 }, { "epoch": 19.814107883817428, "grad_norm": 79.82191467285156, "learning_rate": 1.2078672199170125e-05, "loss": 0.5463, "step": 23876 }, { "epoch": 19.8149377593361, "grad_norm": 18.649333953857422, "learning_rate": 1.2078340248962657e-05, "loss": 0.5603, "step": 23877 }, { "epoch": 19.815767634854772, "grad_norm": 31.683034896850586, "learning_rate": 1.2078008298755189e-05, "loss": 0.611, "step": 23878 }, { "epoch": 19.816597510373445, "grad_norm": 33.277671813964844, "learning_rate": 1.2077676348547718e-05, "loss": 0.9047, "step": 23879 }, { "epoch": 19.817427385892117, "grad_norm": 24.831371307373047, "learning_rate": 1.207734439834025e-05, "loss": 0.5106, "step": 23880 }, { "epoch": 19.81825726141079, "grad_norm": 20.55502700805664, "learning_rate": 1.2077012448132782e-05, "loss": 0.2731, "step": 23881 }, { "epoch": 19.81908713692946, "grad_norm": 65.24813842773438, "learning_rate": 1.2076680497925312e-05, "loss": 0.6631, "step": 23882 }, { "epoch": 19.819917012448133, "grad_norm": 29.85175895690918, "learning_rate": 1.2076348547717844e-05, "loss": 0.6242, "step": 23883 }, { "epoch": 19.820746887966806, "grad_norm": 27.654544830322266, "learning_rate": 1.2076016597510373e-05, "loss": 0.3281, "step": 23884 }, { "epoch": 19.821576763485478, "grad_norm": 52.45937728881836, "learning_rate": 1.2075684647302905e-05, "loss": 0.5813, "step": 23885 }, { "epoch": 19.82240663900415, "grad_norm": 64.35527038574219, "learning_rate": 1.2075352697095437e-05, "loss": 0.5066, "step": 23886 }, { "epoch": 19.823236514522822, "grad_norm": 54.093074798583984, "learning_rate": 1.207502074688797e-05, "loss": 0.3849, "step": 23887 }, { "epoch": 19.824066390041494, "grad_norm": 67.76478576660156, "learning_rate": 1.2074688796680498e-05, "loss": 1.3321, "step": 23888 }, { "epoch": 19.824896265560167, "grad_norm": 49.56477355957031, "learning_rate": 1.207435684647303e-05, "loss": 0.5867, "step": 23889 }, { "epoch": 19.82572614107884, "grad_norm": 18.09744644165039, "learning_rate": 1.2074024896265562e-05, "loss": 0.3591, "step": 23890 }, { "epoch": 19.82655601659751, "grad_norm": 32.13579559326172, "learning_rate": 1.2073692946058093e-05, "loss": 0.4216, "step": 23891 }, { "epoch": 19.827385892116183, "grad_norm": 37.31633758544922, "learning_rate": 1.2073360995850623e-05, "loss": 0.4061, "step": 23892 }, { "epoch": 19.828215767634855, "grad_norm": 87.0744857788086, "learning_rate": 1.2073029045643153e-05, "loss": 0.4484, "step": 23893 }, { "epoch": 19.829045643153528, "grad_norm": 51.0653190612793, "learning_rate": 1.2072697095435686e-05, "loss": 0.4359, "step": 23894 }, { "epoch": 19.8298755186722, "grad_norm": 155.84359741210938, "learning_rate": 1.2072365145228218e-05, "loss": 0.8054, "step": 23895 }, { "epoch": 19.830705394190872, "grad_norm": 30.642858505249023, "learning_rate": 1.2072033195020746e-05, "loss": 0.5692, "step": 23896 }, { "epoch": 19.831535269709544, "grad_norm": 60.54050064086914, "learning_rate": 1.2071701244813279e-05, "loss": 0.3346, "step": 23897 }, { "epoch": 19.832365145228216, "grad_norm": 55.98771286010742, "learning_rate": 1.207136929460581e-05, "loss": 0.9596, "step": 23898 }, { "epoch": 19.83319502074689, "grad_norm": 37.891868591308594, "learning_rate": 1.2071037344398343e-05, "loss": 1.2286, "step": 23899 }, { "epoch": 19.83402489626556, "grad_norm": 22.043851852416992, "learning_rate": 1.2070705394190871e-05, "loss": 0.2823, "step": 23900 }, { "epoch": 19.834854771784233, "grad_norm": 37.50635528564453, "learning_rate": 1.2070373443983404e-05, "loss": 0.3955, "step": 23901 }, { "epoch": 19.835684647302905, "grad_norm": 42.57914733886719, "learning_rate": 1.2070041493775934e-05, "loss": 0.4175, "step": 23902 }, { "epoch": 19.836514522821577, "grad_norm": 87.3003158569336, "learning_rate": 1.2069709543568466e-05, "loss": 0.6344, "step": 23903 }, { "epoch": 19.83734439834025, "grad_norm": 65.0412368774414, "learning_rate": 1.2069377593360996e-05, "loss": 0.9962, "step": 23904 }, { "epoch": 19.83817427385892, "grad_norm": 45.44572067260742, "learning_rate": 1.2069045643153527e-05, "loss": 0.9501, "step": 23905 }, { "epoch": 19.839004149377594, "grad_norm": 30.970067977905273, "learning_rate": 1.2068713692946059e-05, "loss": 0.4647, "step": 23906 }, { "epoch": 19.839834024896266, "grad_norm": 45.237030029296875, "learning_rate": 1.2068381742738591e-05, "loss": 0.4099, "step": 23907 }, { "epoch": 19.84066390041494, "grad_norm": 34.21257781982422, "learning_rate": 1.2068049792531123e-05, "loss": 0.317, "step": 23908 }, { "epoch": 19.84149377593361, "grad_norm": 59.77739715576172, "learning_rate": 1.2067717842323652e-05, "loss": 0.4738, "step": 23909 }, { "epoch": 19.842323651452283, "grad_norm": 39.26498794555664, "learning_rate": 1.2067385892116184e-05, "loss": 0.4766, "step": 23910 }, { "epoch": 19.843153526970955, "grad_norm": 26.296031951904297, "learning_rate": 1.2067053941908714e-05, "loss": 0.4252, "step": 23911 }, { "epoch": 19.843983402489627, "grad_norm": 42.407928466796875, "learning_rate": 1.2066721991701247e-05, "loss": 1.121, "step": 23912 }, { "epoch": 19.8448132780083, "grad_norm": 69.61461639404297, "learning_rate": 1.2066390041493777e-05, "loss": 0.8185, "step": 23913 }, { "epoch": 19.84564315352697, "grad_norm": 59.82355499267578, "learning_rate": 1.2066058091286307e-05, "loss": 1.3731, "step": 23914 }, { "epoch": 19.846473029045644, "grad_norm": 39.07449722290039, "learning_rate": 1.206572614107884e-05, "loss": 0.286, "step": 23915 }, { "epoch": 19.847302904564316, "grad_norm": 69.44074249267578, "learning_rate": 1.2065394190871372e-05, "loss": 0.524, "step": 23916 }, { "epoch": 19.848132780082988, "grad_norm": 17.26833152770996, "learning_rate": 1.20650622406639e-05, "loss": 0.3103, "step": 23917 }, { "epoch": 19.84896265560166, "grad_norm": 95.07258605957031, "learning_rate": 1.2064730290456432e-05, "loss": 0.767, "step": 23918 }, { "epoch": 19.849792531120332, "grad_norm": 21.233327865600586, "learning_rate": 1.2064398340248965e-05, "loss": 0.5996, "step": 23919 }, { "epoch": 19.850622406639005, "grad_norm": 39.17354965209961, "learning_rate": 1.2064066390041495e-05, "loss": 0.5135, "step": 23920 }, { "epoch": 19.851452282157677, "grad_norm": 32.63652038574219, "learning_rate": 1.2063734439834025e-05, "loss": 0.6745, "step": 23921 }, { "epoch": 19.85228215767635, "grad_norm": 34.4818000793457, "learning_rate": 1.2063402489626557e-05, "loss": 0.701, "step": 23922 }, { "epoch": 19.85311203319502, "grad_norm": 44.79916763305664, "learning_rate": 1.2063070539419088e-05, "loss": 0.6011, "step": 23923 }, { "epoch": 19.853941908713693, "grad_norm": 60.12187194824219, "learning_rate": 1.206273858921162e-05, "loss": 0.5618, "step": 23924 }, { "epoch": 19.854771784232366, "grad_norm": 22.250682830810547, "learning_rate": 1.2062406639004149e-05, "loss": 0.3361, "step": 23925 }, { "epoch": 19.855601659751038, "grad_norm": 61.8610954284668, "learning_rate": 1.206207468879668e-05, "loss": 0.9763, "step": 23926 }, { "epoch": 19.85643153526971, "grad_norm": 33.58388900756836, "learning_rate": 1.2061742738589213e-05, "loss": 0.9716, "step": 23927 }, { "epoch": 19.857261410788382, "grad_norm": 41.81687545776367, "learning_rate": 1.2061410788381745e-05, "loss": 0.5594, "step": 23928 }, { "epoch": 19.858091286307054, "grad_norm": 55.04802703857422, "learning_rate": 1.2061078838174274e-05, "loss": 0.7887, "step": 23929 }, { "epoch": 19.858921161825727, "grad_norm": 35.436588287353516, "learning_rate": 1.2060746887966806e-05, "loss": 0.4703, "step": 23930 }, { "epoch": 19.8597510373444, "grad_norm": 27.73816680908203, "learning_rate": 1.2060414937759338e-05, "loss": 0.4414, "step": 23931 }, { "epoch": 19.86058091286307, "grad_norm": 51.39085388183594, "learning_rate": 1.2060082987551868e-05, "loss": 0.5798, "step": 23932 }, { "epoch": 19.861410788381743, "grad_norm": 44.20178985595703, "learning_rate": 1.2059751037344399e-05, "loss": 0.6732, "step": 23933 }, { "epoch": 19.862240663900415, "grad_norm": 44.62611389160156, "learning_rate": 1.205941908713693e-05, "loss": 0.8288, "step": 23934 }, { "epoch": 19.863070539419088, "grad_norm": 70.03857421875, "learning_rate": 1.2059087136929461e-05, "loss": 0.7878, "step": 23935 }, { "epoch": 19.86390041493776, "grad_norm": 34.76892852783203, "learning_rate": 1.2058755186721993e-05, "loss": 0.4579, "step": 23936 }, { "epoch": 19.864730290456432, "grad_norm": 15.774948120117188, "learning_rate": 1.2058423236514526e-05, "loss": 0.2628, "step": 23937 }, { "epoch": 19.865560165975104, "grad_norm": 14.834177017211914, "learning_rate": 1.2058091286307054e-05, "loss": 0.2865, "step": 23938 }, { "epoch": 19.866390041493776, "grad_norm": 42.11200714111328, "learning_rate": 1.2057759336099586e-05, "loss": 0.8039, "step": 23939 }, { "epoch": 19.86721991701245, "grad_norm": 14.313316345214844, "learning_rate": 1.2057427385892117e-05, "loss": 0.2594, "step": 23940 }, { "epoch": 19.86804979253112, "grad_norm": 17.102195739746094, "learning_rate": 1.2057095435684649e-05, "loss": 0.2992, "step": 23941 }, { "epoch": 19.868879668049793, "grad_norm": 52.015445709228516, "learning_rate": 1.205676348547718e-05, "loss": 0.8303, "step": 23942 }, { "epoch": 19.869709543568465, "grad_norm": 43.0827751159668, "learning_rate": 1.205643153526971e-05, "loss": 0.3912, "step": 23943 }, { "epoch": 19.870539419087137, "grad_norm": 39.220401763916016, "learning_rate": 1.2056099585062242e-05, "loss": 0.659, "step": 23944 }, { "epoch": 19.87136929460581, "grad_norm": 19.078245162963867, "learning_rate": 1.2055767634854774e-05, "loss": 0.2956, "step": 23945 }, { "epoch": 19.872199170124482, "grad_norm": 67.28402709960938, "learning_rate": 1.2055435684647303e-05, "loss": 0.3141, "step": 23946 }, { "epoch": 19.873029045643154, "grad_norm": 41.15301513671875, "learning_rate": 1.2055103734439835e-05, "loss": 0.4683, "step": 23947 }, { "epoch": 19.873858921161826, "grad_norm": 36.29445266723633, "learning_rate": 1.2054771784232367e-05, "loss": 0.7081, "step": 23948 }, { "epoch": 19.8746887966805, "grad_norm": 31.485668182373047, "learning_rate": 1.2054439834024897e-05, "loss": 0.5118, "step": 23949 }, { "epoch": 19.87551867219917, "grad_norm": 27.67552375793457, "learning_rate": 1.2054107883817428e-05, "loss": 0.3516, "step": 23950 }, { "epoch": 19.876348547717843, "grad_norm": 35.62739181518555, "learning_rate": 1.205377593360996e-05, "loss": 0.5882, "step": 23951 }, { "epoch": 19.877178423236515, "grad_norm": 82.82621002197266, "learning_rate": 1.205344398340249e-05, "loss": 0.7856, "step": 23952 }, { "epoch": 19.878008298755187, "grad_norm": 11.572467803955078, "learning_rate": 1.2053112033195022e-05, "loss": 0.2325, "step": 23953 }, { "epoch": 19.87883817427386, "grad_norm": 23.167362213134766, "learning_rate": 1.2052780082987551e-05, "loss": 0.4374, "step": 23954 }, { "epoch": 19.87966804979253, "grad_norm": 31.847070693969727, "learning_rate": 1.2052448132780083e-05, "loss": 0.6363, "step": 23955 }, { "epoch": 19.880497925311204, "grad_norm": 42.8136100769043, "learning_rate": 1.2052116182572615e-05, "loss": 0.4269, "step": 23956 }, { "epoch": 19.881327800829876, "grad_norm": 36.32194519042969, "learning_rate": 1.2051784232365147e-05, "loss": 0.4968, "step": 23957 }, { "epoch": 19.882157676348548, "grad_norm": 37.18268966674805, "learning_rate": 1.2051452282157676e-05, "loss": 0.5321, "step": 23958 }, { "epoch": 19.88298755186722, "grad_norm": 82.468017578125, "learning_rate": 1.2051120331950208e-05, "loss": 1.0805, "step": 23959 }, { "epoch": 19.883817427385893, "grad_norm": 32.5939826965332, "learning_rate": 1.205078838174274e-05, "loss": 0.4372, "step": 23960 }, { "epoch": 19.884647302904565, "grad_norm": 68.08867645263672, "learning_rate": 1.205045643153527e-05, "loss": 0.7541, "step": 23961 }, { "epoch": 19.885477178423237, "grad_norm": 90.1001968383789, "learning_rate": 1.2050124481327803e-05, "loss": 0.4172, "step": 23962 }, { "epoch": 19.88630705394191, "grad_norm": 26.831645965576172, "learning_rate": 1.2049792531120332e-05, "loss": 0.3808, "step": 23963 }, { "epoch": 19.88713692946058, "grad_norm": 11.809968948364258, "learning_rate": 1.2049460580912864e-05, "loss": 0.2507, "step": 23964 }, { "epoch": 19.887966804979254, "grad_norm": 19.603124618530273, "learning_rate": 1.2049128630705396e-05, "loss": 0.4781, "step": 23965 }, { "epoch": 19.888796680497926, "grad_norm": 29.331727981567383, "learning_rate": 1.2048796680497928e-05, "loss": 0.5254, "step": 23966 }, { "epoch": 19.889626556016598, "grad_norm": 62.678184509277344, "learning_rate": 1.2048464730290457e-05, "loss": 0.7617, "step": 23967 }, { "epoch": 19.89045643153527, "grad_norm": 34.60889434814453, "learning_rate": 1.2048132780082989e-05, "loss": 0.5302, "step": 23968 }, { "epoch": 19.891286307053942, "grad_norm": 43.81254196166992, "learning_rate": 1.204780082987552e-05, "loss": 0.7226, "step": 23969 }, { "epoch": 19.892116182572614, "grad_norm": 36.16294479370117, "learning_rate": 1.2047468879668051e-05, "loss": 0.4574, "step": 23970 }, { "epoch": 19.892946058091287, "grad_norm": 226.96734619140625, "learning_rate": 1.2047136929460582e-05, "loss": 1.0648, "step": 23971 }, { "epoch": 19.89377593360996, "grad_norm": 81.75108337402344, "learning_rate": 1.2046804979253112e-05, "loss": 0.5433, "step": 23972 }, { "epoch": 19.89460580912863, "grad_norm": 30.57389259338379, "learning_rate": 1.2046473029045644e-05, "loss": 0.4087, "step": 23973 }, { "epoch": 19.895435684647303, "grad_norm": 39.82106018066406, "learning_rate": 1.2046141078838176e-05, "loss": 0.663, "step": 23974 }, { "epoch": 19.896265560165975, "grad_norm": 24.72063446044922, "learning_rate": 1.2045809128630705e-05, "loss": 0.5944, "step": 23975 }, { "epoch": 19.897095435684648, "grad_norm": 42.63638687133789, "learning_rate": 1.2045477178423237e-05, "loss": 0.7993, "step": 23976 }, { "epoch": 19.89792531120332, "grad_norm": 66.23725891113281, "learning_rate": 1.204514522821577e-05, "loss": 0.6705, "step": 23977 }, { "epoch": 19.898755186721992, "grad_norm": 99.64192962646484, "learning_rate": 1.2044813278008301e-05, "loss": 1.1654, "step": 23978 }, { "epoch": 19.899585062240664, "grad_norm": 21.597352981567383, "learning_rate": 1.204448132780083e-05, "loss": 0.7763, "step": 23979 }, { "epoch": 19.900414937759336, "grad_norm": 115.2398681640625, "learning_rate": 1.2044149377593362e-05, "loss": 0.7649, "step": 23980 }, { "epoch": 19.90124481327801, "grad_norm": 49.17686080932617, "learning_rate": 1.2043817427385893e-05, "loss": 0.4767, "step": 23981 }, { "epoch": 19.90207468879668, "grad_norm": 41.414405822753906, "learning_rate": 1.2043485477178425e-05, "loss": 0.4825, "step": 23982 }, { "epoch": 19.902904564315353, "grad_norm": 38.03223419189453, "learning_rate": 1.2043153526970955e-05, "loss": 0.7179, "step": 23983 }, { "epoch": 19.903734439834025, "grad_norm": 57.78040313720703, "learning_rate": 1.2042821576763485e-05, "loss": 0.9748, "step": 23984 }, { "epoch": 19.904564315352697, "grad_norm": 27.657371520996094, "learning_rate": 1.2042489626556018e-05, "loss": 0.5199, "step": 23985 }, { "epoch": 19.90539419087137, "grad_norm": 63.28644561767578, "learning_rate": 1.204215767634855e-05, "loss": 1.1457, "step": 23986 }, { "epoch": 19.906224066390042, "grad_norm": 74.36408996582031, "learning_rate": 1.204182572614108e-05, "loss": 0.3678, "step": 23987 }, { "epoch": 19.907053941908714, "grad_norm": 70.35336303710938, "learning_rate": 1.204149377593361e-05, "loss": 0.5358, "step": 23988 }, { "epoch": 19.907883817427386, "grad_norm": 43.89039993286133, "learning_rate": 1.2041161825726143e-05, "loss": 0.5642, "step": 23989 }, { "epoch": 19.90871369294606, "grad_norm": 76.07522583007812, "learning_rate": 1.2040829875518673e-05, "loss": 0.492, "step": 23990 }, { "epoch": 19.90954356846473, "grad_norm": 24.90438461303711, "learning_rate": 1.2040497925311205e-05, "loss": 0.4132, "step": 23991 }, { "epoch": 19.910373443983403, "grad_norm": 33.11888122558594, "learning_rate": 1.2040165975103736e-05, "loss": 0.8969, "step": 23992 }, { "epoch": 19.911203319502075, "grad_norm": 62.64251708984375, "learning_rate": 1.2039834024896266e-05, "loss": 0.4081, "step": 23993 }, { "epoch": 19.912033195020747, "grad_norm": 47.829593658447266, "learning_rate": 1.2039502074688798e-05, "loss": 0.8562, "step": 23994 }, { "epoch": 19.91286307053942, "grad_norm": 22.464014053344727, "learning_rate": 1.203917012448133e-05, "loss": 0.3985, "step": 23995 }, { "epoch": 19.91369294605809, "grad_norm": 89.20173645019531, "learning_rate": 1.2038838174273859e-05, "loss": 0.5003, "step": 23996 }, { "epoch": 19.914522821576764, "grad_norm": 63.474082946777344, "learning_rate": 1.2038506224066391e-05, "loss": 0.789, "step": 23997 }, { "epoch": 19.915352697095436, "grad_norm": 27.20584487915039, "learning_rate": 1.2038174273858923e-05, "loss": 0.5109, "step": 23998 }, { "epoch": 19.916182572614108, "grad_norm": 36.41649627685547, "learning_rate": 1.2037842323651454e-05, "loss": 0.6203, "step": 23999 }, { "epoch": 19.91701244813278, "grad_norm": 20.514148712158203, "learning_rate": 1.2037510373443984e-05, "loss": 0.4559, "step": 24000 }, { "epoch": 19.917842323651453, "grad_norm": 28.963014602661133, "learning_rate": 1.2037178423236514e-05, "loss": 0.4238, "step": 24001 }, { "epoch": 19.918672199170125, "grad_norm": 28.859323501586914, "learning_rate": 1.2036846473029046e-05, "loss": 0.4247, "step": 24002 }, { "epoch": 19.919502074688797, "grad_norm": 59.57999038696289, "learning_rate": 1.2036514522821579e-05, "loss": 0.8455, "step": 24003 }, { "epoch": 19.92033195020747, "grad_norm": 43.36153030395508, "learning_rate": 1.2036182572614107e-05, "loss": 0.4034, "step": 24004 }, { "epoch": 19.92116182572614, "grad_norm": 35.9567985534668, "learning_rate": 1.203585062240664e-05, "loss": 0.8795, "step": 24005 }, { "epoch": 19.921991701244814, "grad_norm": 43.30082321166992, "learning_rate": 1.2035518672199172e-05, "loss": 0.7756, "step": 24006 }, { "epoch": 19.922821576763486, "grad_norm": 89.29500579833984, "learning_rate": 1.2035186721991704e-05, "loss": 0.984, "step": 24007 }, { "epoch": 19.923651452282158, "grad_norm": 20.90682029724121, "learning_rate": 1.2034854771784232e-05, "loss": 0.4838, "step": 24008 }, { "epoch": 19.92448132780083, "grad_norm": 14.507314682006836, "learning_rate": 1.2034522821576764e-05, "loss": 0.3635, "step": 24009 }, { "epoch": 19.925311203319502, "grad_norm": 66.08331298828125, "learning_rate": 1.2034190871369295e-05, "loss": 0.532, "step": 24010 }, { "epoch": 19.926141078838175, "grad_norm": 72.11297607421875, "learning_rate": 1.2033858921161827e-05, "loss": 0.8607, "step": 24011 }, { "epoch": 19.926970954356847, "grad_norm": 42.34954833984375, "learning_rate": 1.2033526970954357e-05, "loss": 0.4802, "step": 24012 }, { "epoch": 19.92780082987552, "grad_norm": 21.865447998046875, "learning_rate": 1.2033195020746888e-05, "loss": 0.432, "step": 24013 }, { "epoch": 19.92863070539419, "grad_norm": 101.93994140625, "learning_rate": 1.203286307053942e-05, "loss": 1.0295, "step": 24014 }, { "epoch": 19.929460580912863, "grad_norm": 120.32237243652344, "learning_rate": 1.2032531120331952e-05, "loss": 0.7632, "step": 24015 }, { "epoch": 19.930290456431536, "grad_norm": 35.42522430419922, "learning_rate": 1.2032199170124484e-05, "loss": 0.3756, "step": 24016 }, { "epoch": 19.931120331950208, "grad_norm": 29.092655181884766, "learning_rate": 1.2031867219917013e-05, "loss": 0.4001, "step": 24017 }, { "epoch": 19.93195020746888, "grad_norm": 38.205322265625, "learning_rate": 1.2031535269709545e-05, "loss": 0.5185, "step": 24018 }, { "epoch": 19.932780082987552, "grad_norm": 49.72969436645508, "learning_rate": 1.2031203319502075e-05, "loss": 0.6692, "step": 24019 }, { "epoch": 19.933609958506224, "grad_norm": 55.621368408203125, "learning_rate": 1.2030871369294607e-05, "loss": 0.8737, "step": 24020 }, { "epoch": 19.934439834024896, "grad_norm": 50.91151428222656, "learning_rate": 1.2030539419087138e-05, "loss": 0.7965, "step": 24021 }, { "epoch": 19.93526970954357, "grad_norm": 14.649812698364258, "learning_rate": 1.2030207468879668e-05, "loss": 0.2938, "step": 24022 }, { "epoch": 19.93609958506224, "grad_norm": 19.089887619018555, "learning_rate": 1.20298755186722e-05, "loss": 0.3663, "step": 24023 }, { "epoch": 19.936929460580913, "grad_norm": 74.35843658447266, "learning_rate": 1.2029543568464733e-05, "loss": 0.5844, "step": 24024 }, { "epoch": 19.937759336099585, "grad_norm": 58.33056640625, "learning_rate": 1.2029211618257261e-05, "loss": 0.8117, "step": 24025 }, { "epoch": 19.938589211618257, "grad_norm": 198.2432403564453, "learning_rate": 1.2028879668049793e-05, "loss": 0.7387, "step": 24026 }, { "epoch": 19.93941908713693, "grad_norm": 26.530336380004883, "learning_rate": 1.2028547717842325e-05, "loss": 0.2942, "step": 24027 }, { "epoch": 19.940248962655602, "grad_norm": 57.1782112121582, "learning_rate": 1.2028215767634856e-05, "loss": 1.4348, "step": 24028 }, { "epoch": 19.941078838174274, "grad_norm": 32.642208099365234, "learning_rate": 1.2027883817427386e-05, "loss": 0.5048, "step": 24029 }, { "epoch": 19.941908713692946, "grad_norm": 41.1871223449707, "learning_rate": 1.2027551867219918e-05, "loss": 0.7849, "step": 24030 }, { "epoch": 19.94273858921162, "grad_norm": 45.18282699584961, "learning_rate": 1.2027219917012449e-05, "loss": 0.6476, "step": 24031 }, { "epoch": 19.94356846473029, "grad_norm": 17.02081298828125, "learning_rate": 1.2026887966804981e-05, "loss": 0.3786, "step": 24032 }, { "epoch": 19.944398340248963, "grad_norm": 34.68101501464844, "learning_rate": 1.202655601659751e-05, "loss": 0.5872, "step": 24033 }, { "epoch": 19.945228215767635, "grad_norm": 38.85983657836914, "learning_rate": 1.2026224066390042e-05, "loss": 0.8445, "step": 24034 }, { "epoch": 19.946058091286307, "grad_norm": 31.421598434448242, "learning_rate": 1.2025892116182574e-05, "loss": 0.5109, "step": 24035 }, { "epoch": 19.94688796680498, "grad_norm": 30.480693817138672, "learning_rate": 1.2025560165975106e-05, "loss": 0.3699, "step": 24036 }, { "epoch": 19.94771784232365, "grad_norm": 49.8619384765625, "learning_rate": 1.2025228215767635e-05, "loss": 0.9186, "step": 24037 }, { "epoch": 19.948547717842324, "grad_norm": 47.46535110473633, "learning_rate": 1.2024896265560167e-05, "loss": 0.6714, "step": 24038 }, { "epoch": 19.949377593360996, "grad_norm": 43.281700134277344, "learning_rate": 1.2024564315352699e-05, "loss": 1.1071, "step": 24039 }, { "epoch": 19.95020746887967, "grad_norm": 16.920259475708008, "learning_rate": 1.202423236514523e-05, "loss": 0.2782, "step": 24040 }, { "epoch": 19.95103734439834, "grad_norm": 24.386308670043945, "learning_rate": 1.2023900414937761e-05, "loss": 0.4008, "step": 24041 }, { "epoch": 19.951867219917013, "grad_norm": 34.13594055175781, "learning_rate": 1.202356846473029e-05, "loss": 0.5453, "step": 24042 }, { "epoch": 19.952697095435685, "grad_norm": 107.86170959472656, "learning_rate": 1.2023236514522822e-05, "loss": 0.5665, "step": 24043 }, { "epoch": 19.953526970954357, "grad_norm": 28.089433670043945, "learning_rate": 1.2022904564315354e-05, "loss": 0.6521, "step": 24044 }, { "epoch": 19.95435684647303, "grad_norm": 91.19698333740234, "learning_rate": 1.2022572614107886e-05, "loss": 0.9042, "step": 24045 }, { "epoch": 19.9551867219917, "grad_norm": 49.68342208862305, "learning_rate": 1.2022240663900415e-05, "loss": 0.4425, "step": 24046 }, { "epoch": 19.956016597510374, "grad_norm": 34.28984832763672, "learning_rate": 1.2021908713692947e-05, "loss": 0.6572, "step": 24047 }, { "epoch": 19.956846473029046, "grad_norm": 79.65203857421875, "learning_rate": 1.2021576763485478e-05, "loss": 0.5534, "step": 24048 }, { "epoch": 19.957676348547718, "grad_norm": 43.57000732421875, "learning_rate": 1.202124481327801e-05, "loss": 1.2066, "step": 24049 }, { "epoch": 19.95850622406639, "grad_norm": 47.00410079956055, "learning_rate": 1.202091286307054e-05, "loss": 0.6341, "step": 24050 }, { "epoch": 19.959336099585062, "grad_norm": 43.0117073059082, "learning_rate": 1.202058091286307e-05, "loss": 0.6889, "step": 24051 }, { "epoch": 19.960165975103735, "grad_norm": 41.53288269042969, "learning_rate": 1.2020248962655603e-05, "loss": 0.6528, "step": 24052 }, { "epoch": 19.960995850622407, "grad_norm": 39.33417892456055, "learning_rate": 1.2019917012448135e-05, "loss": 0.812, "step": 24053 }, { "epoch": 19.96182572614108, "grad_norm": 143.89694213867188, "learning_rate": 1.2019585062240664e-05, "loss": 1.1259, "step": 24054 }, { "epoch": 19.96265560165975, "grad_norm": 81.49195098876953, "learning_rate": 1.2019253112033196e-05, "loss": 0.7074, "step": 24055 }, { "epoch": 19.963485477178423, "grad_norm": 40.946556091308594, "learning_rate": 1.2018921161825728e-05, "loss": 0.772, "step": 24056 }, { "epoch": 19.964315352697096, "grad_norm": 20.707672119140625, "learning_rate": 1.2018589211618258e-05, "loss": 0.3875, "step": 24057 }, { "epoch": 19.965145228215768, "grad_norm": 56.87511444091797, "learning_rate": 1.2018257261410789e-05, "loss": 1.0494, "step": 24058 }, { "epoch": 19.96597510373444, "grad_norm": 72.06826782226562, "learning_rate": 1.201792531120332e-05, "loss": 0.4047, "step": 24059 }, { "epoch": 19.966804979253112, "grad_norm": 24.678218841552734, "learning_rate": 1.2017593360995851e-05, "loss": 0.3797, "step": 24060 }, { "epoch": 19.967634854771784, "grad_norm": 28.01926040649414, "learning_rate": 1.2017261410788383e-05, "loss": 0.3238, "step": 24061 }, { "epoch": 19.968464730290457, "grad_norm": 43.69015884399414, "learning_rate": 1.2016929460580914e-05, "loss": 0.5, "step": 24062 }, { "epoch": 19.96929460580913, "grad_norm": 63.96136474609375, "learning_rate": 1.2016597510373444e-05, "loss": 1.1324, "step": 24063 }, { "epoch": 19.9701244813278, "grad_norm": 38.41462707519531, "learning_rate": 1.2016265560165976e-05, "loss": 0.3018, "step": 24064 }, { "epoch": 19.970954356846473, "grad_norm": 67.16752624511719, "learning_rate": 1.2015933609958508e-05, "loss": 1.1525, "step": 24065 }, { "epoch": 19.971784232365145, "grad_norm": 53.076988220214844, "learning_rate": 1.2015601659751039e-05, "loss": 0.4659, "step": 24066 }, { "epoch": 19.972614107883818, "grad_norm": 16.455581665039062, "learning_rate": 1.2015269709543569e-05, "loss": 0.4697, "step": 24067 }, { "epoch": 19.97344398340249, "grad_norm": 37.22460174560547, "learning_rate": 1.2014937759336101e-05, "loss": 0.3926, "step": 24068 }, { "epoch": 19.974273858921162, "grad_norm": 25.900453567504883, "learning_rate": 1.2014605809128632e-05, "loss": 0.5977, "step": 24069 }, { "epoch": 19.975103734439834, "grad_norm": 34.782073974609375, "learning_rate": 1.2014273858921164e-05, "loss": 0.715, "step": 24070 }, { "epoch": 19.975933609958506, "grad_norm": 52.32095718383789, "learning_rate": 1.2013941908713692e-05, "loss": 0.9101, "step": 24071 }, { "epoch": 19.97676348547718, "grad_norm": 61.77006912231445, "learning_rate": 1.2013609958506225e-05, "loss": 0.5961, "step": 24072 }, { "epoch": 19.97759336099585, "grad_norm": 64.40467834472656, "learning_rate": 1.2013278008298757e-05, "loss": 0.8785, "step": 24073 }, { "epoch": 19.978423236514523, "grad_norm": 48.987060546875, "learning_rate": 1.2012946058091289e-05, "loss": 0.7697, "step": 24074 }, { "epoch": 19.979253112033195, "grad_norm": 34.034637451171875, "learning_rate": 1.2012614107883817e-05, "loss": 0.7981, "step": 24075 }, { "epoch": 19.980082987551867, "grad_norm": 37.69438171386719, "learning_rate": 1.201228215767635e-05, "loss": 0.4891, "step": 24076 }, { "epoch": 19.98091286307054, "grad_norm": 82.78060150146484, "learning_rate": 1.2011950207468882e-05, "loss": 1.0846, "step": 24077 }, { "epoch": 19.98174273858921, "grad_norm": 36.65487289428711, "learning_rate": 1.2011618257261412e-05, "loss": 0.5709, "step": 24078 }, { "epoch": 19.982572614107884, "grad_norm": 38.87962341308594, "learning_rate": 1.2011286307053943e-05, "loss": 0.9024, "step": 24079 }, { "epoch": 19.983402489626556, "grad_norm": 108.13147735595703, "learning_rate": 1.2010954356846473e-05, "loss": 0.5873, "step": 24080 }, { "epoch": 19.98423236514523, "grad_norm": 45.92655944824219, "learning_rate": 1.2010622406639005e-05, "loss": 0.6772, "step": 24081 }, { "epoch": 19.9850622406639, "grad_norm": 34.44564437866211, "learning_rate": 1.2010290456431537e-05, "loss": 0.7186, "step": 24082 }, { "epoch": 19.985892116182573, "grad_norm": 82.91189575195312, "learning_rate": 1.2009958506224066e-05, "loss": 0.3915, "step": 24083 }, { "epoch": 19.986721991701245, "grad_norm": 37.301063537597656, "learning_rate": 1.2009626556016598e-05, "loss": 0.5528, "step": 24084 }, { "epoch": 19.987551867219917, "grad_norm": 20.128276824951172, "learning_rate": 1.200929460580913e-05, "loss": 0.3224, "step": 24085 }, { "epoch": 19.98838174273859, "grad_norm": 51.115474700927734, "learning_rate": 1.2008962655601662e-05, "loss": 0.7602, "step": 24086 }, { "epoch": 19.98921161825726, "grad_norm": 28.271257400512695, "learning_rate": 1.2008630705394191e-05, "loss": 0.4029, "step": 24087 }, { "epoch": 19.990041493775934, "grad_norm": 24.30863380432129, "learning_rate": 1.2008298755186723e-05, "loss": 0.6747, "step": 24088 }, { "epoch": 19.990871369294606, "grad_norm": 56.355342864990234, "learning_rate": 1.2007966804979253e-05, "loss": 0.9165, "step": 24089 }, { "epoch": 19.991701244813278, "grad_norm": 66.95279693603516, "learning_rate": 1.2007634854771786e-05, "loss": 0.6829, "step": 24090 }, { "epoch": 19.99253112033195, "grad_norm": 77.28800964355469, "learning_rate": 1.2007302904564316e-05, "loss": 0.4624, "step": 24091 }, { "epoch": 19.993360995850622, "grad_norm": 63.118003845214844, "learning_rate": 1.2006970954356846e-05, "loss": 1.0312, "step": 24092 }, { "epoch": 19.994190871369295, "grad_norm": 64.91676330566406, "learning_rate": 1.2006639004149378e-05, "loss": 0.7289, "step": 24093 }, { "epoch": 19.995020746887967, "grad_norm": 44.807403564453125, "learning_rate": 1.200630705394191e-05, "loss": 1.2276, "step": 24094 }, { "epoch": 19.99585062240664, "grad_norm": 72.31671905517578, "learning_rate": 1.2005975103734443e-05, "loss": 0.4793, "step": 24095 }, { "epoch": 19.99668049792531, "grad_norm": 29.60814666748047, "learning_rate": 1.2005643153526971e-05, "loss": 0.6746, "step": 24096 }, { "epoch": 19.997510373443983, "grad_norm": 52.78318405151367, "learning_rate": 1.2005311203319504e-05, "loss": 0.7573, "step": 24097 }, { "epoch": 19.998340248962656, "grad_norm": 46.558746337890625, "learning_rate": 1.2004979253112034e-05, "loss": 0.4411, "step": 24098 }, { "epoch": 19.999170124481328, "grad_norm": 32.91558837890625, "learning_rate": 1.2004647302904566e-05, "loss": 0.5774, "step": 24099 }, { "epoch": 20.0, "grad_norm": 39.29780960083008, "learning_rate": 1.2004315352697096e-05, "loss": 0.6438, "step": 24100 }, { "epoch": 20.000829875518672, "grad_norm": 21.295316696166992, "learning_rate": 1.2003983402489627e-05, "loss": 0.5101, "step": 24101 }, { "epoch": 20.001659751037344, "grad_norm": 36.037227630615234, "learning_rate": 1.2003651452282159e-05, "loss": 0.3227, "step": 24102 }, { "epoch": 20.002489626556017, "grad_norm": 67.18720245361328, "learning_rate": 1.2003319502074691e-05, "loss": 0.4633, "step": 24103 }, { "epoch": 20.00331950207469, "grad_norm": 30.191041946411133, "learning_rate": 1.200298755186722e-05, "loss": 0.4284, "step": 24104 }, { "epoch": 20.00414937759336, "grad_norm": 19.09278678894043, "learning_rate": 1.2002655601659752e-05, "loss": 0.2517, "step": 24105 }, { "epoch": 20.004979253112033, "grad_norm": 32.865455627441406, "learning_rate": 1.2002323651452284e-05, "loss": 0.4472, "step": 24106 }, { "epoch": 20.005809128630705, "grad_norm": 84.2457275390625, "learning_rate": 1.2001991701244814e-05, "loss": 0.3772, "step": 24107 }, { "epoch": 20.006639004149378, "grad_norm": 64.85175323486328, "learning_rate": 1.2001659751037345e-05, "loss": 0.2913, "step": 24108 }, { "epoch": 20.00746887966805, "grad_norm": 30.250078201293945, "learning_rate": 1.2001327800829877e-05, "loss": 0.5571, "step": 24109 }, { "epoch": 20.008298755186722, "grad_norm": 60.20288848876953, "learning_rate": 1.2000995850622407e-05, "loss": 1.2354, "step": 24110 }, { "epoch": 20.009128630705394, "grad_norm": 72.56114959716797, "learning_rate": 1.200066390041494e-05, "loss": 0.631, "step": 24111 }, { "epoch": 20.009958506224066, "grad_norm": 21.55632781982422, "learning_rate": 1.2000331950207468e-05, "loss": 0.2626, "step": 24112 }, { "epoch": 20.01078838174274, "grad_norm": 52.234981536865234, "learning_rate": 1.2e-05, "loss": 0.936, "step": 24113 }, { "epoch": 20.01161825726141, "grad_norm": 30.073753356933594, "learning_rate": 1.1999668049792532e-05, "loss": 0.3831, "step": 24114 }, { "epoch": 20.012448132780083, "grad_norm": 65.38517761230469, "learning_rate": 1.1999336099585065e-05, "loss": 0.3336, "step": 24115 }, { "epoch": 20.013278008298755, "grad_norm": 23.314760208129883, "learning_rate": 1.1999004149377593e-05, "loss": 0.4556, "step": 24116 }, { "epoch": 20.014107883817427, "grad_norm": 35.4423942565918, "learning_rate": 1.1998672199170125e-05, "loss": 0.3792, "step": 24117 }, { "epoch": 20.0149377593361, "grad_norm": 61.15105056762695, "learning_rate": 1.1998340248962656e-05, "loss": 0.6429, "step": 24118 }, { "epoch": 20.01576763485477, "grad_norm": 15.614065170288086, "learning_rate": 1.1998008298755188e-05, "loss": 0.2182, "step": 24119 }, { "epoch": 20.016597510373444, "grad_norm": 75.8851318359375, "learning_rate": 1.199767634854772e-05, "loss": 0.9277, "step": 24120 }, { "epoch": 20.017427385892116, "grad_norm": 99.23001098632812, "learning_rate": 1.1997344398340249e-05, "loss": 0.616, "step": 24121 }, { "epoch": 20.01825726141079, "grad_norm": 37.1620979309082, "learning_rate": 1.199701244813278e-05, "loss": 0.3744, "step": 24122 }, { "epoch": 20.01908713692946, "grad_norm": 113.5456771850586, "learning_rate": 1.1996680497925313e-05, "loss": 0.5305, "step": 24123 }, { "epoch": 20.019917012448133, "grad_norm": 32.74360275268555, "learning_rate": 1.1996348547717845e-05, "loss": 0.3139, "step": 24124 }, { "epoch": 20.020746887966805, "grad_norm": 70.02335357666016, "learning_rate": 1.1996016597510374e-05, "loss": 0.5232, "step": 24125 }, { "epoch": 20.021576763485477, "grad_norm": 20.094995498657227, "learning_rate": 1.1995684647302906e-05, "loss": 0.3051, "step": 24126 }, { "epoch": 20.02240663900415, "grad_norm": 44.14045333862305, "learning_rate": 1.1995352697095436e-05, "loss": 0.653, "step": 24127 }, { "epoch": 20.02323651452282, "grad_norm": 29.798383712768555, "learning_rate": 1.1995020746887968e-05, "loss": 0.5534, "step": 24128 }, { "epoch": 20.024066390041494, "grad_norm": 75.85807800292969, "learning_rate": 1.1994688796680499e-05, "loss": 0.5001, "step": 24129 }, { "epoch": 20.024896265560166, "grad_norm": 33.86444854736328, "learning_rate": 1.199435684647303e-05, "loss": 0.504, "step": 24130 }, { "epoch": 20.025726141078838, "grad_norm": 125.69618225097656, "learning_rate": 1.1994024896265561e-05, "loss": 0.6209, "step": 24131 }, { "epoch": 20.02655601659751, "grad_norm": 43.629432678222656, "learning_rate": 1.1993692946058093e-05, "loss": 0.8656, "step": 24132 }, { "epoch": 20.027385892116182, "grad_norm": 11.794194221496582, "learning_rate": 1.1993360995850622e-05, "loss": 0.2115, "step": 24133 }, { "epoch": 20.028215767634855, "grad_norm": 20.10560417175293, "learning_rate": 1.1993029045643154e-05, "loss": 0.2921, "step": 24134 }, { "epoch": 20.029045643153527, "grad_norm": 55.903358459472656, "learning_rate": 1.1992697095435686e-05, "loss": 0.6922, "step": 24135 }, { "epoch": 20.0298755186722, "grad_norm": 89.12957000732422, "learning_rate": 1.1992365145228217e-05, "loss": 0.6846, "step": 24136 }, { "epoch": 20.03070539419087, "grad_norm": 48.77976608276367, "learning_rate": 1.1992033195020747e-05, "loss": 0.43, "step": 24137 }, { "epoch": 20.031535269709543, "grad_norm": 68.23426818847656, "learning_rate": 1.199170124481328e-05, "loss": 1.106, "step": 24138 }, { "epoch": 20.032365145228216, "grad_norm": 26.912960052490234, "learning_rate": 1.199136929460581e-05, "loss": 0.4582, "step": 24139 }, { "epoch": 20.033195020746888, "grad_norm": 56.18047332763672, "learning_rate": 1.1991037344398342e-05, "loss": 0.3858, "step": 24140 }, { "epoch": 20.03402489626556, "grad_norm": 32.12263488769531, "learning_rate": 1.199070539419087e-05, "loss": 0.3875, "step": 24141 }, { "epoch": 20.034854771784232, "grad_norm": 33.69205093383789, "learning_rate": 1.1990373443983403e-05, "loss": 0.6504, "step": 24142 }, { "epoch": 20.035684647302904, "grad_norm": 38.834205627441406, "learning_rate": 1.1990041493775935e-05, "loss": 0.4125, "step": 24143 }, { "epoch": 20.036514522821577, "grad_norm": 35.864830017089844, "learning_rate": 1.1989709543568467e-05, "loss": 0.2994, "step": 24144 }, { "epoch": 20.03734439834025, "grad_norm": 25.52174949645996, "learning_rate": 1.1989377593360997e-05, "loss": 0.2609, "step": 24145 }, { "epoch": 20.03817427385892, "grad_norm": 53.769962310791016, "learning_rate": 1.1989045643153528e-05, "loss": 1.1479, "step": 24146 }, { "epoch": 20.039004149377593, "grad_norm": 49.49797439575195, "learning_rate": 1.198871369294606e-05, "loss": 0.8756, "step": 24147 }, { "epoch": 20.039834024896265, "grad_norm": 9.781135559082031, "learning_rate": 1.198838174273859e-05, "loss": 0.2094, "step": 24148 }, { "epoch": 20.040663900414938, "grad_norm": 39.76169204711914, "learning_rate": 1.1988049792531122e-05, "loss": 0.4535, "step": 24149 }, { "epoch": 20.04149377593361, "grad_norm": 46.35698318481445, "learning_rate": 1.1987717842323651e-05, "loss": 0.6614, "step": 24150 }, { "epoch": 20.042323651452282, "grad_norm": 33.10051345825195, "learning_rate": 1.1987385892116183e-05, "loss": 0.2812, "step": 24151 }, { "epoch": 20.043153526970954, "grad_norm": 28.595870971679688, "learning_rate": 1.1987053941908715e-05, "loss": 0.471, "step": 24152 }, { "epoch": 20.043983402489626, "grad_norm": 33.28356170654297, "learning_rate": 1.1986721991701247e-05, "loss": 0.5632, "step": 24153 }, { "epoch": 20.0448132780083, "grad_norm": 46.5262451171875, "learning_rate": 1.1986390041493776e-05, "loss": 0.7437, "step": 24154 }, { "epoch": 20.04564315352697, "grad_norm": 116.05364227294922, "learning_rate": 1.1986058091286308e-05, "loss": 0.9271, "step": 24155 }, { "epoch": 20.046473029045643, "grad_norm": 13.566553115844727, "learning_rate": 1.198572614107884e-05, "loss": 0.2554, "step": 24156 }, { "epoch": 20.047302904564315, "grad_norm": 46.530757904052734, "learning_rate": 1.198539419087137e-05, "loss": 0.3364, "step": 24157 }, { "epoch": 20.048132780082987, "grad_norm": 37.78620910644531, "learning_rate": 1.1985062240663901e-05, "loss": 0.5232, "step": 24158 }, { "epoch": 20.04896265560166, "grad_norm": 18.06694793701172, "learning_rate": 1.1984730290456432e-05, "loss": 0.3642, "step": 24159 }, { "epoch": 20.04979253112033, "grad_norm": 40.3496208190918, "learning_rate": 1.1984398340248964e-05, "loss": 0.4772, "step": 24160 }, { "epoch": 20.050622406639004, "grad_norm": 28.10236358642578, "learning_rate": 1.1984066390041496e-05, "loss": 0.6073, "step": 24161 }, { "epoch": 20.051452282157676, "grad_norm": 50.69245529174805, "learning_rate": 1.1983734439834024e-05, "loss": 0.4268, "step": 24162 }, { "epoch": 20.05228215767635, "grad_norm": 32.70517349243164, "learning_rate": 1.1983402489626557e-05, "loss": 0.4642, "step": 24163 }, { "epoch": 20.05311203319502, "grad_norm": 54.3905143737793, "learning_rate": 1.1983070539419089e-05, "loss": 0.6324, "step": 24164 }, { "epoch": 20.053941908713693, "grad_norm": 25.005695343017578, "learning_rate": 1.1982738589211619e-05, "loss": 0.2519, "step": 24165 }, { "epoch": 20.054771784232365, "grad_norm": 48.78412628173828, "learning_rate": 1.198240663900415e-05, "loss": 0.4632, "step": 24166 }, { "epoch": 20.055601659751037, "grad_norm": 23.96294403076172, "learning_rate": 1.1982074688796682e-05, "loss": 0.3417, "step": 24167 }, { "epoch": 20.05643153526971, "grad_norm": 21.70981788635254, "learning_rate": 1.1981742738589212e-05, "loss": 0.3019, "step": 24168 }, { "epoch": 20.05726141078838, "grad_norm": 64.6502914428711, "learning_rate": 1.1981410788381744e-05, "loss": 0.4493, "step": 24169 }, { "epoch": 20.058091286307054, "grad_norm": 99.87894439697266, "learning_rate": 1.1981078838174275e-05, "loss": 0.6993, "step": 24170 }, { "epoch": 20.058921161825726, "grad_norm": 73.71599578857422, "learning_rate": 1.1980746887966805e-05, "loss": 0.396, "step": 24171 }, { "epoch": 20.059751037344398, "grad_norm": 33.02164840698242, "learning_rate": 1.1980414937759337e-05, "loss": 0.6626, "step": 24172 }, { "epoch": 20.06058091286307, "grad_norm": 8.254907608032227, "learning_rate": 1.198008298755187e-05, "loss": 0.2184, "step": 24173 }, { "epoch": 20.061410788381743, "grad_norm": 33.777099609375, "learning_rate": 1.19797510373444e-05, "loss": 0.4881, "step": 24174 }, { "epoch": 20.062240663900415, "grad_norm": 67.07471466064453, "learning_rate": 1.197941908713693e-05, "loss": 0.88, "step": 24175 }, { "epoch": 20.063070539419087, "grad_norm": 41.03260040283203, "learning_rate": 1.1979087136929462e-05, "loss": 0.5844, "step": 24176 }, { "epoch": 20.06390041493776, "grad_norm": 56.84727478027344, "learning_rate": 1.1978755186721993e-05, "loss": 0.8202, "step": 24177 }, { "epoch": 20.06473029045643, "grad_norm": 11.56972885131836, "learning_rate": 1.1978423236514525e-05, "loss": 0.2142, "step": 24178 }, { "epoch": 20.065560165975104, "grad_norm": 11.327668190002441, "learning_rate": 1.1978091286307055e-05, "loss": 0.2659, "step": 24179 }, { "epoch": 20.066390041493776, "grad_norm": 38.42555236816406, "learning_rate": 1.1977759336099585e-05, "loss": 0.6568, "step": 24180 }, { "epoch": 20.067219917012448, "grad_norm": 34.87081527709961, "learning_rate": 1.1977427385892118e-05, "loss": 0.5556, "step": 24181 }, { "epoch": 20.06804979253112, "grad_norm": 39.49409866333008, "learning_rate": 1.197709543568465e-05, "loss": 0.6123, "step": 24182 }, { "epoch": 20.068879668049792, "grad_norm": 15.212105751037598, "learning_rate": 1.1976763485477178e-05, "loss": 0.2957, "step": 24183 }, { "epoch": 20.069709543568464, "grad_norm": 46.50784683227539, "learning_rate": 1.197643153526971e-05, "loss": 0.2734, "step": 24184 }, { "epoch": 20.070539419087137, "grad_norm": 80.69664001464844, "learning_rate": 1.1976099585062243e-05, "loss": 0.5769, "step": 24185 }, { "epoch": 20.07136929460581, "grad_norm": 35.02699661254883, "learning_rate": 1.1975767634854773e-05, "loss": 0.3077, "step": 24186 }, { "epoch": 20.07219917012448, "grad_norm": 36.2513427734375, "learning_rate": 1.1975435684647303e-05, "loss": 0.5623, "step": 24187 }, { "epoch": 20.073029045643153, "grad_norm": 54.9249267578125, "learning_rate": 1.1975103734439834e-05, "loss": 0.4986, "step": 24188 }, { "epoch": 20.073858921161825, "grad_norm": 31.840959548950195, "learning_rate": 1.1974771784232366e-05, "loss": 0.5199, "step": 24189 }, { "epoch": 20.074688796680498, "grad_norm": 37.36648941040039, "learning_rate": 1.1974439834024898e-05, "loss": 0.5537, "step": 24190 }, { "epoch": 20.07551867219917, "grad_norm": 39.99020004272461, "learning_rate": 1.1974107883817427e-05, "loss": 0.8519, "step": 24191 }, { "epoch": 20.076348547717842, "grad_norm": 40.55805969238281, "learning_rate": 1.1973775933609959e-05, "loss": 0.5036, "step": 24192 }, { "epoch": 20.077178423236514, "grad_norm": 43.661537170410156, "learning_rate": 1.1973443983402491e-05, "loss": 0.7002, "step": 24193 }, { "epoch": 20.078008298755186, "grad_norm": 50.37960433959961, "learning_rate": 1.1973112033195023e-05, "loss": 0.5514, "step": 24194 }, { "epoch": 20.07883817427386, "grad_norm": 12.140332221984863, "learning_rate": 1.1972780082987552e-05, "loss": 0.1764, "step": 24195 }, { "epoch": 20.07966804979253, "grad_norm": 54.12191390991211, "learning_rate": 1.1972448132780084e-05, "loss": 0.7642, "step": 24196 }, { "epoch": 20.080497925311203, "grad_norm": 39.1712532043457, "learning_rate": 1.1972116182572614e-05, "loss": 0.6412, "step": 24197 }, { "epoch": 20.081327800829875, "grad_norm": 11.554394721984863, "learning_rate": 1.1971784232365146e-05, "loss": 0.2902, "step": 24198 }, { "epoch": 20.082157676348547, "grad_norm": 14.402520179748535, "learning_rate": 1.1971452282157679e-05, "loss": 0.2551, "step": 24199 }, { "epoch": 20.08298755186722, "grad_norm": 90.26815032958984, "learning_rate": 1.1971120331950207e-05, "loss": 0.6467, "step": 24200 }, { "epoch": 20.083817427385892, "grad_norm": 22.21133041381836, "learning_rate": 1.197078838174274e-05, "loss": 0.5154, "step": 24201 }, { "epoch": 20.084647302904564, "grad_norm": 39.92775344848633, "learning_rate": 1.1970456431535271e-05, "loss": 0.2948, "step": 24202 }, { "epoch": 20.085477178423236, "grad_norm": 14.65491008758545, "learning_rate": 1.1970124481327804e-05, "loss": 0.387, "step": 24203 }, { "epoch": 20.08630705394191, "grad_norm": 31.113994598388672, "learning_rate": 1.1969792531120332e-05, "loss": 0.4418, "step": 24204 }, { "epoch": 20.08713692946058, "grad_norm": 53.81993865966797, "learning_rate": 1.1969460580912864e-05, "loss": 0.5099, "step": 24205 }, { "epoch": 20.087966804979253, "grad_norm": 13.47651195526123, "learning_rate": 1.1969128630705395e-05, "loss": 0.3658, "step": 24206 }, { "epoch": 20.088796680497925, "grad_norm": 41.03876876831055, "learning_rate": 1.1968796680497927e-05, "loss": 0.8092, "step": 24207 }, { "epoch": 20.089626556016597, "grad_norm": 124.53321075439453, "learning_rate": 1.1968464730290457e-05, "loss": 1.0423, "step": 24208 }, { "epoch": 20.09045643153527, "grad_norm": 31.268959045410156, "learning_rate": 1.1968132780082988e-05, "loss": 0.887, "step": 24209 }, { "epoch": 20.09128630705394, "grad_norm": 44.90869903564453, "learning_rate": 1.196780082987552e-05, "loss": 0.3779, "step": 24210 }, { "epoch": 20.092116182572614, "grad_norm": 36.931785583496094, "learning_rate": 1.1967468879668052e-05, "loss": 0.3165, "step": 24211 }, { "epoch": 20.092946058091286, "grad_norm": 32.24037170410156, "learning_rate": 1.196713692946058e-05, "loss": 0.3342, "step": 24212 }, { "epoch": 20.093775933609958, "grad_norm": 76.228271484375, "learning_rate": 1.1966804979253113e-05, "loss": 0.9342, "step": 24213 }, { "epoch": 20.09460580912863, "grad_norm": 66.43559265136719, "learning_rate": 1.1966473029045645e-05, "loss": 1.1149, "step": 24214 }, { "epoch": 20.095435684647303, "grad_norm": 36.466896057128906, "learning_rate": 1.1966141078838175e-05, "loss": 0.4512, "step": 24215 }, { "epoch": 20.096265560165975, "grad_norm": 52.11326599121094, "learning_rate": 1.1965809128630706e-05, "loss": 0.7708, "step": 24216 }, { "epoch": 20.097095435684647, "grad_norm": 13.166604042053223, "learning_rate": 1.1965477178423238e-05, "loss": 0.2603, "step": 24217 }, { "epoch": 20.09792531120332, "grad_norm": 18.36313819885254, "learning_rate": 1.1965145228215768e-05, "loss": 0.2988, "step": 24218 }, { "epoch": 20.09875518672199, "grad_norm": 52.14692306518555, "learning_rate": 1.19648132780083e-05, "loss": 0.3744, "step": 24219 }, { "epoch": 20.099585062240664, "grad_norm": 49.358985900878906, "learning_rate": 1.1964481327800829e-05, "loss": 0.508, "step": 24220 }, { "epoch": 20.100414937759336, "grad_norm": 44.01761245727539, "learning_rate": 1.1964149377593361e-05, "loss": 1.0224, "step": 24221 }, { "epoch": 20.101244813278008, "grad_norm": 18.657716751098633, "learning_rate": 1.1963817427385893e-05, "loss": 0.5045, "step": 24222 }, { "epoch": 20.10207468879668, "grad_norm": 48.80533218383789, "learning_rate": 1.1963485477178425e-05, "loss": 1.1391, "step": 24223 }, { "epoch": 20.102904564315352, "grad_norm": 76.934326171875, "learning_rate": 1.1963153526970956e-05, "loss": 0.5933, "step": 24224 }, { "epoch": 20.103734439834025, "grad_norm": 16.106849670410156, "learning_rate": 1.1962821576763486e-05, "loss": 0.4022, "step": 24225 }, { "epoch": 20.104564315352697, "grad_norm": 75.54058074951172, "learning_rate": 1.1962489626556018e-05, "loss": 0.3874, "step": 24226 }, { "epoch": 20.10539419087137, "grad_norm": 56.65399932861328, "learning_rate": 1.1962157676348549e-05, "loss": 0.746, "step": 24227 }, { "epoch": 20.10622406639004, "grad_norm": 15.617894172668457, "learning_rate": 1.1961825726141081e-05, "loss": 0.3139, "step": 24228 }, { "epoch": 20.107053941908713, "grad_norm": 64.44649505615234, "learning_rate": 1.196149377593361e-05, "loss": 0.6745, "step": 24229 }, { "epoch": 20.107883817427386, "grad_norm": 40.17529296875, "learning_rate": 1.1961161825726142e-05, "loss": 0.2686, "step": 24230 }, { "epoch": 20.108713692946058, "grad_norm": 14.79927921295166, "learning_rate": 1.1960829875518674e-05, "loss": 0.2047, "step": 24231 }, { "epoch": 20.10954356846473, "grad_norm": 85.9955825805664, "learning_rate": 1.1960497925311206e-05, "loss": 0.8052, "step": 24232 }, { "epoch": 20.110373443983402, "grad_norm": 40.09718704223633, "learning_rate": 1.1960165975103735e-05, "loss": 0.8862, "step": 24233 }, { "epoch": 20.111203319502074, "grad_norm": 35.160682678222656, "learning_rate": 1.1959834024896267e-05, "loss": 0.6238, "step": 24234 }, { "epoch": 20.112033195020746, "grad_norm": 69.02357482910156, "learning_rate": 1.1959502074688797e-05, "loss": 0.6305, "step": 24235 }, { "epoch": 20.11286307053942, "grad_norm": 31.149837493896484, "learning_rate": 1.195917012448133e-05, "loss": 0.5362, "step": 24236 }, { "epoch": 20.11369294605809, "grad_norm": 34.96137619018555, "learning_rate": 1.195883817427386e-05, "loss": 0.5294, "step": 24237 }, { "epoch": 20.114522821576763, "grad_norm": 54.80592346191406, "learning_rate": 1.195850622406639e-05, "loss": 0.3606, "step": 24238 }, { "epoch": 20.115352697095435, "grad_norm": 73.08020782470703, "learning_rate": 1.1958174273858922e-05, "loss": 1.0183, "step": 24239 }, { "epoch": 20.116182572614107, "grad_norm": 22.991334915161133, "learning_rate": 1.1957842323651454e-05, "loss": 0.3228, "step": 24240 }, { "epoch": 20.11701244813278, "grad_norm": 92.50263214111328, "learning_rate": 1.1957510373443983e-05, "loss": 0.7844, "step": 24241 }, { "epoch": 20.117842323651452, "grad_norm": 34.191978454589844, "learning_rate": 1.1957178423236515e-05, "loss": 0.307, "step": 24242 }, { "epoch": 20.118672199170124, "grad_norm": 51.31660461425781, "learning_rate": 1.1956846473029047e-05, "loss": 0.3655, "step": 24243 }, { "epoch": 20.119502074688796, "grad_norm": 36.78668212890625, "learning_rate": 1.1956514522821578e-05, "loss": 0.4378, "step": 24244 }, { "epoch": 20.12033195020747, "grad_norm": 73.55619812011719, "learning_rate": 1.1956182572614108e-05, "loss": 0.5597, "step": 24245 }, { "epoch": 20.12116182572614, "grad_norm": 46.65239715576172, "learning_rate": 1.195585062240664e-05, "loss": 0.8656, "step": 24246 }, { "epoch": 20.121991701244813, "grad_norm": 28.49964141845703, "learning_rate": 1.195551867219917e-05, "loss": 0.4286, "step": 24247 }, { "epoch": 20.122821576763485, "grad_norm": 47.673606872558594, "learning_rate": 1.1955186721991703e-05, "loss": 0.5339, "step": 24248 }, { "epoch": 20.123651452282157, "grad_norm": 136.23593139648438, "learning_rate": 1.1954854771784231e-05, "loss": 1.083, "step": 24249 }, { "epoch": 20.12448132780083, "grad_norm": 40.378353118896484, "learning_rate": 1.1954522821576764e-05, "loss": 0.8134, "step": 24250 }, { "epoch": 20.1253112033195, "grad_norm": 35.63267135620117, "learning_rate": 1.1954190871369296e-05, "loss": 0.3755, "step": 24251 }, { "epoch": 20.126141078838174, "grad_norm": 38.35243225097656, "learning_rate": 1.1953858921161828e-05, "loss": 0.6831, "step": 24252 }, { "epoch": 20.126970954356846, "grad_norm": 74.30098724365234, "learning_rate": 1.1953526970954358e-05, "loss": 0.5084, "step": 24253 }, { "epoch": 20.127800829875518, "grad_norm": 26.526517868041992, "learning_rate": 1.1953195020746889e-05, "loss": 0.4604, "step": 24254 }, { "epoch": 20.12863070539419, "grad_norm": 106.79206085205078, "learning_rate": 1.195286307053942e-05, "loss": 0.8809, "step": 24255 }, { "epoch": 20.129460580912863, "grad_norm": 22.037492752075195, "learning_rate": 1.1952531120331951e-05, "loss": 0.2727, "step": 24256 }, { "epoch": 20.130290456431535, "grad_norm": 35.90711212158203, "learning_rate": 1.1952199170124483e-05, "loss": 0.5663, "step": 24257 }, { "epoch": 20.131120331950207, "grad_norm": 21.501129150390625, "learning_rate": 1.1951867219917012e-05, "loss": 0.2817, "step": 24258 }, { "epoch": 20.13195020746888, "grad_norm": 41.6331787109375, "learning_rate": 1.1951535269709544e-05, "loss": 0.6132, "step": 24259 }, { "epoch": 20.13278008298755, "grad_norm": 69.12740325927734, "learning_rate": 1.1951203319502076e-05, "loss": 0.3764, "step": 24260 }, { "epoch": 20.133609958506224, "grad_norm": 53.88090133666992, "learning_rate": 1.1950871369294608e-05, "loss": 0.4052, "step": 24261 }, { "epoch": 20.134439834024896, "grad_norm": 82.83998107910156, "learning_rate": 1.1950539419087137e-05, "loss": 0.5516, "step": 24262 }, { "epoch": 20.135269709543568, "grad_norm": 26.132368087768555, "learning_rate": 1.1950207468879669e-05, "loss": 0.3085, "step": 24263 }, { "epoch": 20.13609958506224, "grad_norm": 24.899494171142578, "learning_rate": 1.1949875518672201e-05, "loss": 0.5051, "step": 24264 }, { "epoch": 20.136929460580912, "grad_norm": 43.91547393798828, "learning_rate": 1.1949543568464732e-05, "loss": 0.5127, "step": 24265 }, { "epoch": 20.137759336099585, "grad_norm": 12.79809856414795, "learning_rate": 1.1949211618257262e-05, "loss": 0.2607, "step": 24266 }, { "epoch": 20.138589211618257, "grad_norm": 40.676414489746094, "learning_rate": 1.1948879668049792e-05, "loss": 0.471, "step": 24267 }, { "epoch": 20.13941908713693, "grad_norm": 25.13785171508789, "learning_rate": 1.1948547717842325e-05, "loss": 0.6068, "step": 24268 }, { "epoch": 20.1402489626556, "grad_norm": 27.421667098999023, "learning_rate": 1.1948215767634857e-05, "loss": 0.3404, "step": 24269 }, { "epoch": 20.141078838174273, "grad_norm": 33.355037689208984, "learning_rate": 1.1947883817427385e-05, "loss": 0.7368, "step": 24270 }, { "epoch": 20.141908713692946, "grad_norm": 22.23029899597168, "learning_rate": 1.1947551867219917e-05, "loss": 0.3018, "step": 24271 }, { "epoch": 20.142738589211618, "grad_norm": 54.320526123046875, "learning_rate": 1.194721991701245e-05, "loss": 0.6582, "step": 24272 }, { "epoch": 20.14356846473029, "grad_norm": 54.7995719909668, "learning_rate": 1.1946887966804982e-05, "loss": 0.6377, "step": 24273 }, { "epoch": 20.144398340248962, "grad_norm": 27.426528930664062, "learning_rate": 1.194655601659751e-05, "loss": 0.6059, "step": 24274 }, { "epoch": 20.145228215767634, "grad_norm": 51.12004089355469, "learning_rate": 1.1946224066390042e-05, "loss": 0.6135, "step": 24275 }, { "epoch": 20.146058091286307, "grad_norm": 39.51173400878906, "learning_rate": 1.1945892116182573e-05, "loss": 0.5635, "step": 24276 }, { "epoch": 20.14688796680498, "grad_norm": 21.516353607177734, "learning_rate": 1.1945560165975105e-05, "loss": 0.3099, "step": 24277 }, { "epoch": 20.14771784232365, "grad_norm": 40.53506851196289, "learning_rate": 1.1945228215767637e-05, "loss": 0.7039, "step": 24278 }, { "epoch": 20.148547717842323, "grad_norm": 23.53040313720703, "learning_rate": 1.1944896265560166e-05, "loss": 0.3986, "step": 24279 }, { "epoch": 20.149377593360995, "grad_norm": 46.95442581176758, "learning_rate": 1.1944564315352698e-05, "loss": 0.3627, "step": 24280 }, { "epoch": 20.150207468879668, "grad_norm": 11.46721363067627, "learning_rate": 1.194423236514523e-05, "loss": 0.241, "step": 24281 }, { "epoch": 20.15103734439834, "grad_norm": 15.56885051727295, "learning_rate": 1.194390041493776e-05, "loss": 0.2586, "step": 24282 }, { "epoch": 20.151867219917012, "grad_norm": 22.176420211791992, "learning_rate": 1.1943568464730291e-05, "loss": 0.2441, "step": 24283 }, { "epoch": 20.152697095435684, "grad_norm": 39.284976959228516, "learning_rate": 1.1943236514522823e-05, "loss": 1.0187, "step": 24284 }, { "epoch": 20.153526970954356, "grad_norm": 71.14929962158203, "learning_rate": 1.1942904564315353e-05, "loss": 1.0229, "step": 24285 }, { "epoch": 20.15435684647303, "grad_norm": 33.33713912963867, "learning_rate": 1.1942572614107886e-05, "loss": 0.5589, "step": 24286 }, { "epoch": 20.1551867219917, "grad_norm": 59.33892059326172, "learning_rate": 1.1942240663900416e-05, "loss": 0.5443, "step": 24287 }, { "epoch": 20.156016597510373, "grad_norm": 28.952468872070312, "learning_rate": 1.1941908713692946e-05, "loss": 0.4888, "step": 24288 }, { "epoch": 20.156846473029045, "grad_norm": 81.87203979492188, "learning_rate": 1.1941576763485478e-05, "loss": 0.4821, "step": 24289 }, { "epoch": 20.157676348547717, "grad_norm": 41.94670104980469, "learning_rate": 1.194124481327801e-05, "loss": 0.4121, "step": 24290 }, { "epoch": 20.15850622406639, "grad_norm": 87.57600402832031, "learning_rate": 1.194091286307054e-05, "loss": 0.5293, "step": 24291 }, { "epoch": 20.15933609958506, "grad_norm": 24.603498458862305, "learning_rate": 1.1940580912863071e-05, "loss": 0.2669, "step": 24292 }, { "epoch": 20.160165975103734, "grad_norm": 27.6824951171875, "learning_rate": 1.1940248962655603e-05, "loss": 0.338, "step": 24293 }, { "epoch": 20.160995850622406, "grad_norm": 14.940285682678223, "learning_rate": 1.1939917012448134e-05, "loss": 0.2697, "step": 24294 }, { "epoch": 20.16182572614108, "grad_norm": 34.51017379760742, "learning_rate": 1.1939585062240664e-05, "loss": 0.8855, "step": 24295 }, { "epoch": 20.16265560165975, "grad_norm": 22.4537410736084, "learning_rate": 1.1939253112033196e-05, "loss": 0.3186, "step": 24296 }, { "epoch": 20.163485477178423, "grad_norm": 23.05934715270996, "learning_rate": 1.1938921161825727e-05, "loss": 0.3013, "step": 24297 }, { "epoch": 20.164315352697095, "grad_norm": 41.4232177734375, "learning_rate": 1.1938589211618259e-05, "loss": 0.5393, "step": 24298 }, { "epoch": 20.165145228215767, "grad_norm": 48.757598876953125, "learning_rate": 1.1938257261410788e-05, "loss": 1.377, "step": 24299 }, { "epoch": 20.16597510373444, "grad_norm": 17.1500186920166, "learning_rate": 1.193792531120332e-05, "loss": 0.3331, "step": 24300 }, { "epoch": 20.16680497925311, "grad_norm": 21.16400146484375, "learning_rate": 1.1937593360995852e-05, "loss": 0.5079, "step": 24301 }, { "epoch": 20.167634854771784, "grad_norm": 44.263755798339844, "learning_rate": 1.1937261410788384e-05, "loss": 0.5714, "step": 24302 }, { "epoch": 20.168464730290456, "grad_norm": 55.6775016784668, "learning_rate": 1.1936929460580913e-05, "loss": 0.393, "step": 24303 }, { "epoch": 20.169294605809128, "grad_norm": 59.84079360961914, "learning_rate": 1.1936597510373445e-05, "loss": 0.7233, "step": 24304 }, { "epoch": 20.1701244813278, "grad_norm": 33.5845947265625, "learning_rate": 1.1936265560165975e-05, "loss": 0.4978, "step": 24305 }, { "epoch": 20.170954356846472, "grad_norm": 14.368537902832031, "learning_rate": 1.1935933609958507e-05, "loss": 0.3056, "step": 24306 }, { "epoch": 20.171784232365145, "grad_norm": 47.62522888183594, "learning_rate": 1.193560165975104e-05, "loss": 0.5676, "step": 24307 }, { "epoch": 20.172614107883817, "grad_norm": 40.12052917480469, "learning_rate": 1.1935269709543568e-05, "loss": 0.4328, "step": 24308 }, { "epoch": 20.17344398340249, "grad_norm": 70.46415710449219, "learning_rate": 1.19349377593361e-05, "loss": 0.4754, "step": 24309 }, { "epoch": 20.17427385892116, "grad_norm": 35.474884033203125, "learning_rate": 1.1934605809128632e-05, "loss": 0.7468, "step": 24310 }, { "epoch": 20.175103734439833, "grad_norm": 11.340543746948242, "learning_rate": 1.1934273858921164e-05, "loss": 0.2713, "step": 24311 }, { "epoch": 20.175933609958506, "grad_norm": 22.93026351928711, "learning_rate": 1.1933941908713693e-05, "loss": 0.3697, "step": 24312 }, { "epoch": 20.176763485477178, "grad_norm": 17.280872344970703, "learning_rate": 1.1933609958506225e-05, "loss": 0.3458, "step": 24313 }, { "epoch": 20.17759336099585, "grad_norm": 39.27004623413086, "learning_rate": 1.1933278008298756e-05, "loss": 0.6197, "step": 24314 }, { "epoch": 20.178423236514522, "grad_norm": 22.491769790649414, "learning_rate": 1.1932946058091288e-05, "loss": 0.4097, "step": 24315 }, { "epoch": 20.179253112033194, "grad_norm": 41.06846618652344, "learning_rate": 1.1932614107883818e-05, "loss": 0.2226, "step": 24316 }, { "epoch": 20.180082987551867, "grad_norm": 100.2371826171875, "learning_rate": 1.1932282157676349e-05, "loss": 0.6891, "step": 24317 }, { "epoch": 20.18091286307054, "grad_norm": 36.316829681396484, "learning_rate": 1.193195020746888e-05, "loss": 0.3212, "step": 24318 }, { "epoch": 20.18174273858921, "grad_norm": 51.07666015625, "learning_rate": 1.1931618257261413e-05, "loss": 0.8119, "step": 24319 }, { "epoch": 20.182572614107883, "grad_norm": 20.691381454467773, "learning_rate": 1.1931286307053942e-05, "loss": 0.3522, "step": 24320 }, { "epoch": 20.183402489626555, "grad_norm": 43.839508056640625, "learning_rate": 1.1930954356846474e-05, "loss": 0.4161, "step": 24321 }, { "epoch": 20.184232365145228, "grad_norm": 11.47905158996582, "learning_rate": 1.1930622406639006e-05, "loss": 0.3422, "step": 24322 }, { "epoch": 20.1850622406639, "grad_norm": 13.760342597961426, "learning_rate": 1.1930290456431536e-05, "loss": 0.2999, "step": 24323 }, { "epoch": 20.185892116182572, "grad_norm": 23.090782165527344, "learning_rate": 1.1929958506224067e-05, "loss": 0.3448, "step": 24324 }, { "epoch": 20.186721991701244, "grad_norm": 32.85417556762695, "learning_rate": 1.1929626556016599e-05, "loss": 0.4337, "step": 24325 }, { "epoch": 20.187551867219916, "grad_norm": 23.496509552001953, "learning_rate": 1.192929460580913e-05, "loss": 0.3301, "step": 24326 }, { "epoch": 20.18838174273859, "grad_norm": 50.39927673339844, "learning_rate": 1.1928962655601661e-05, "loss": 0.5819, "step": 24327 }, { "epoch": 20.18921161825726, "grad_norm": 126.3863754272461, "learning_rate": 1.192863070539419e-05, "loss": 0.7926, "step": 24328 }, { "epoch": 20.190041493775933, "grad_norm": 22.5928955078125, "learning_rate": 1.1928298755186722e-05, "loss": 0.8226, "step": 24329 }, { "epoch": 20.190871369294605, "grad_norm": 35.0935173034668, "learning_rate": 1.1927966804979254e-05, "loss": 0.5531, "step": 24330 }, { "epoch": 20.191701244813277, "grad_norm": 87.70077514648438, "learning_rate": 1.1927634854771786e-05, "loss": 0.8398, "step": 24331 }, { "epoch": 20.19253112033195, "grad_norm": 119.8255844116211, "learning_rate": 1.1927302904564317e-05, "loss": 0.6855, "step": 24332 }, { "epoch": 20.19336099585062, "grad_norm": 32.99205780029297, "learning_rate": 1.1926970954356847e-05, "loss": 0.388, "step": 24333 }, { "epoch": 20.194190871369294, "grad_norm": 18.63002586364746, "learning_rate": 1.192663900414938e-05, "loss": 0.3657, "step": 24334 }, { "epoch": 20.195020746887966, "grad_norm": 22.029268264770508, "learning_rate": 1.192630705394191e-05, "loss": 0.2382, "step": 24335 }, { "epoch": 20.19585062240664, "grad_norm": 66.4735107421875, "learning_rate": 1.1925975103734442e-05, "loss": 0.5186, "step": 24336 }, { "epoch": 20.19668049792531, "grad_norm": 31.845430374145508, "learning_rate": 1.192564315352697e-05, "loss": 0.597, "step": 24337 }, { "epoch": 20.197510373443983, "grad_norm": 25.247697830200195, "learning_rate": 1.1925311203319503e-05, "loss": 0.377, "step": 24338 }, { "epoch": 20.198340248962655, "grad_norm": 41.00906753540039, "learning_rate": 1.1924979253112035e-05, "loss": 0.8232, "step": 24339 }, { "epoch": 20.199170124481327, "grad_norm": 49.191192626953125, "learning_rate": 1.1924647302904567e-05, "loss": 0.7744, "step": 24340 }, { "epoch": 20.2, "grad_norm": 22.857877731323242, "learning_rate": 1.1924315352697096e-05, "loss": 0.3472, "step": 24341 }, { "epoch": 20.20082987551867, "grad_norm": 73.12306213378906, "learning_rate": 1.1923983402489628e-05, "loss": 0.4431, "step": 24342 }, { "epoch": 20.201659751037344, "grad_norm": 37.701316833496094, "learning_rate": 1.192365145228216e-05, "loss": 0.4336, "step": 24343 }, { "epoch": 20.202489626556016, "grad_norm": 72.38722229003906, "learning_rate": 1.192331950207469e-05, "loss": 0.7467, "step": 24344 }, { "epoch": 20.203319502074688, "grad_norm": 111.46485137939453, "learning_rate": 1.192298755186722e-05, "loss": 1.1562, "step": 24345 }, { "epoch": 20.20414937759336, "grad_norm": 87.70343780517578, "learning_rate": 1.1922655601659751e-05, "loss": 0.5891, "step": 24346 }, { "epoch": 20.204979253112032, "grad_norm": 18.77153205871582, "learning_rate": 1.1922323651452283e-05, "loss": 0.2855, "step": 24347 }, { "epoch": 20.205809128630705, "grad_norm": 29.84185218811035, "learning_rate": 1.1921991701244815e-05, "loss": 0.497, "step": 24348 }, { "epoch": 20.206639004149377, "grad_norm": 37.599124908447266, "learning_rate": 1.1921659751037344e-05, "loss": 0.65, "step": 24349 }, { "epoch": 20.20746887966805, "grad_norm": 17.00146484375, "learning_rate": 1.1921327800829876e-05, "loss": 0.2389, "step": 24350 }, { "epoch": 20.20829875518672, "grad_norm": 24.189359664916992, "learning_rate": 1.1920995850622408e-05, "loss": 0.3781, "step": 24351 }, { "epoch": 20.209128630705393, "grad_norm": 70.26386260986328, "learning_rate": 1.1920663900414939e-05, "loss": 0.8343, "step": 24352 }, { "epoch": 20.209958506224066, "grad_norm": 60.292694091796875, "learning_rate": 1.1920331950207469e-05, "loss": 0.5284, "step": 24353 }, { "epoch": 20.210788381742738, "grad_norm": 80.72036743164062, "learning_rate": 1.1920000000000001e-05, "loss": 1.0642, "step": 24354 }, { "epoch": 20.21161825726141, "grad_norm": 49.67361831665039, "learning_rate": 1.1919668049792531e-05, "loss": 0.579, "step": 24355 }, { "epoch": 20.212448132780082, "grad_norm": 37.754249572753906, "learning_rate": 1.1919336099585064e-05, "loss": 0.68, "step": 24356 }, { "epoch": 20.213278008298754, "grad_norm": 40.486968994140625, "learning_rate": 1.1919004149377596e-05, "loss": 0.4593, "step": 24357 }, { "epoch": 20.214107883817427, "grad_norm": 14.995272636413574, "learning_rate": 1.1918672199170124e-05, "loss": 0.4321, "step": 24358 }, { "epoch": 20.2149377593361, "grad_norm": 33.41884231567383, "learning_rate": 1.1918340248962657e-05, "loss": 0.6144, "step": 24359 }, { "epoch": 20.21576763485477, "grad_norm": 43.82008361816406, "learning_rate": 1.1918008298755189e-05, "loss": 0.4205, "step": 24360 }, { "epoch": 20.216597510373443, "grad_norm": 46.077110290527344, "learning_rate": 1.1917676348547719e-05, "loss": 0.3316, "step": 24361 }, { "epoch": 20.217427385892115, "grad_norm": 96.0908432006836, "learning_rate": 1.191734439834025e-05, "loss": 0.6736, "step": 24362 }, { "epoch": 20.218257261410788, "grad_norm": 29.632165908813477, "learning_rate": 1.1917012448132782e-05, "loss": 0.6179, "step": 24363 }, { "epoch": 20.21908713692946, "grad_norm": 53.484703063964844, "learning_rate": 1.1916680497925312e-05, "loss": 0.8809, "step": 24364 }, { "epoch": 20.219917012448132, "grad_norm": 45.881710052490234, "learning_rate": 1.1916348547717844e-05, "loss": 1.1223, "step": 24365 }, { "epoch": 20.220746887966804, "grad_norm": 26.766830444335938, "learning_rate": 1.1916016597510373e-05, "loss": 0.4568, "step": 24366 }, { "epoch": 20.221576763485476, "grad_norm": 29.45906639099121, "learning_rate": 1.1915684647302905e-05, "loss": 0.548, "step": 24367 }, { "epoch": 20.22240663900415, "grad_norm": 40.635284423828125, "learning_rate": 1.1915352697095437e-05, "loss": 0.3428, "step": 24368 }, { "epoch": 20.22323651452282, "grad_norm": 33.68731689453125, "learning_rate": 1.1915020746887969e-05, "loss": 0.3965, "step": 24369 }, { "epoch": 20.224066390041493, "grad_norm": 19.52891731262207, "learning_rate": 1.1914688796680498e-05, "loss": 0.3459, "step": 24370 }, { "epoch": 20.224896265560165, "grad_norm": 59.562522888183594, "learning_rate": 1.191435684647303e-05, "loss": 1.2534, "step": 24371 }, { "epoch": 20.225726141078837, "grad_norm": 99.73126220703125, "learning_rate": 1.1914024896265562e-05, "loss": 1.5589, "step": 24372 }, { "epoch": 20.22655601659751, "grad_norm": 60.325782775878906, "learning_rate": 1.1913692946058092e-05, "loss": 0.4846, "step": 24373 }, { "epoch": 20.22738589211618, "grad_norm": 208.66720581054688, "learning_rate": 1.1913360995850623e-05, "loss": 0.5234, "step": 24374 }, { "epoch": 20.228215767634854, "grad_norm": 22.655838012695312, "learning_rate": 1.1913029045643153e-05, "loss": 0.3886, "step": 24375 }, { "epoch": 20.229045643153526, "grad_norm": 45.27436828613281, "learning_rate": 1.1912697095435685e-05, "loss": 0.4619, "step": 24376 }, { "epoch": 20.2298755186722, "grad_norm": 18.26099395751953, "learning_rate": 1.1912365145228218e-05, "loss": 0.3316, "step": 24377 }, { "epoch": 20.23070539419087, "grad_norm": 37.95600509643555, "learning_rate": 1.1912033195020746e-05, "loss": 0.7848, "step": 24378 }, { "epoch": 20.231535269709543, "grad_norm": 22.946569442749023, "learning_rate": 1.1911701244813278e-05, "loss": 0.3512, "step": 24379 }, { "epoch": 20.232365145228215, "grad_norm": 14.04300594329834, "learning_rate": 1.191136929460581e-05, "loss": 0.3236, "step": 24380 }, { "epoch": 20.233195020746887, "grad_norm": 16.768095016479492, "learning_rate": 1.1911037344398343e-05, "loss": 0.2743, "step": 24381 }, { "epoch": 20.23402489626556, "grad_norm": 72.10774993896484, "learning_rate": 1.1910705394190871e-05, "loss": 0.4384, "step": 24382 }, { "epoch": 20.23485477178423, "grad_norm": 54.361114501953125, "learning_rate": 1.1910373443983403e-05, "loss": 0.4807, "step": 24383 }, { "epoch": 20.235684647302904, "grad_norm": 186.58290100097656, "learning_rate": 1.1910041493775934e-05, "loss": 0.325, "step": 24384 }, { "epoch": 20.236514522821576, "grad_norm": 71.78955841064453, "learning_rate": 1.1909709543568466e-05, "loss": 0.6556, "step": 24385 }, { "epoch": 20.237344398340248, "grad_norm": 104.44862365722656, "learning_rate": 1.1909377593360998e-05, "loss": 0.8203, "step": 24386 }, { "epoch": 20.23817427385892, "grad_norm": 30.09269142150879, "learning_rate": 1.1909045643153527e-05, "loss": 0.3993, "step": 24387 }, { "epoch": 20.239004149377593, "grad_norm": 58.6926155090332, "learning_rate": 1.1908713692946059e-05, "loss": 0.4913, "step": 24388 }, { "epoch": 20.239834024896265, "grad_norm": 52.90858840942383, "learning_rate": 1.1908381742738591e-05, "loss": 0.6693, "step": 24389 }, { "epoch": 20.240663900414937, "grad_norm": 38.30445861816406, "learning_rate": 1.1908049792531123e-05, "loss": 0.5192, "step": 24390 }, { "epoch": 20.24149377593361, "grad_norm": 98.21353149414062, "learning_rate": 1.1907717842323652e-05, "loss": 0.6008, "step": 24391 }, { "epoch": 20.24232365145228, "grad_norm": 47.38985061645508, "learning_rate": 1.1907385892116184e-05, "loss": 0.8169, "step": 24392 }, { "epoch": 20.243153526970953, "grad_norm": 39.21809387207031, "learning_rate": 1.1907053941908714e-05, "loss": 0.7456, "step": 24393 }, { "epoch": 20.243983402489626, "grad_norm": 120.25527954101562, "learning_rate": 1.1906721991701246e-05, "loss": 0.5644, "step": 24394 }, { "epoch": 20.244813278008298, "grad_norm": 52.97163772583008, "learning_rate": 1.1906390041493777e-05, "loss": 0.9802, "step": 24395 }, { "epoch": 20.24564315352697, "grad_norm": 39.03596115112305, "learning_rate": 1.1906058091286307e-05, "loss": 0.5243, "step": 24396 }, { "epoch": 20.246473029045642, "grad_norm": 17.918397903442383, "learning_rate": 1.190572614107884e-05, "loss": 0.332, "step": 24397 }, { "epoch": 20.247302904564314, "grad_norm": 115.785888671875, "learning_rate": 1.1905394190871371e-05, "loss": 0.9674, "step": 24398 }, { "epoch": 20.248132780082987, "grad_norm": 21.072437286376953, "learning_rate": 1.19050622406639e-05, "loss": 0.4074, "step": 24399 }, { "epoch": 20.24896265560166, "grad_norm": 66.65349578857422, "learning_rate": 1.1904730290456432e-05, "loss": 0.3609, "step": 24400 }, { "epoch": 20.24979253112033, "grad_norm": 48.38246536254883, "learning_rate": 1.1904398340248964e-05, "loss": 0.824, "step": 24401 }, { "epoch": 20.250622406639003, "grad_norm": 35.15709686279297, "learning_rate": 1.1904066390041495e-05, "loss": 0.8035, "step": 24402 }, { "epoch": 20.251452282157675, "grad_norm": 21.226261138916016, "learning_rate": 1.1903734439834025e-05, "loss": 0.3362, "step": 24403 }, { "epoch": 20.252282157676348, "grad_norm": 131.0150909423828, "learning_rate": 1.1903402489626557e-05, "loss": 0.58, "step": 24404 }, { "epoch": 20.25311203319502, "grad_norm": 19.74151611328125, "learning_rate": 1.1903070539419088e-05, "loss": 0.2887, "step": 24405 }, { "epoch": 20.253941908713692, "grad_norm": 32.359222412109375, "learning_rate": 1.190273858921162e-05, "loss": 0.4965, "step": 24406 }, { "epoch": 20.254771784232364, "grad_norm": 9.849571228027344, "learning_rate": 1.1902406639004149e-05, "loss": 0.2366, "step": 24407 }, { "epoch": 20.255601659751036, "grad_norm": 43.44038772583008, "learning_rate": 1.190207468879668e-05, "loss": 0.4287, "step": 24408 }, { "epoch": 20.25643153526971, "grad_norm": 21.714874267578125, "learning_rate": 1.1901742738589213e-05, "loss": 0.2762, "step": 24409 }, { "epoch": 20.25726141078838, "grad_norm": 16.09842872619629, "learning_rate": 1.1901410788381745e-05, "loss": 0.3367, "step": 24410 }, { "epoch": 20.258091286307053, "grad_norm": 37.05317687988281, "learning_rate": 1.1901078838174275e-05, "loss": 0.3836, "step": 24411 }, { "epoch": 20.258921161825725, "grad_norm": 15.305907249450684, "learning_rate": 1.1900746887966806e-05, "loss": 0.418, "step": 24412 }, { "epoch": 20.259751037344397, "grad_norm": 34.424827575683594, "learning_rate": 1.1900414937759338e-05, "loss": 0.6366, "step": 24413 }, { "epoch": 20.26058091286307, "grad_norm": 23.662694931030273, "learning_rate": 1.1900082987551868e-05, "loss": 0.3198, "step": 24414 }, { "epoch": 20.261410788381742, "grad_norm": 47.69426345825195, "learning_rate": 1.18997510373444e-05, "loss": 0.47, "step": 24415 }, { "epoch": 20.262240663900414, "grad_norm": 40.85089111328125, "learning_rate": 1.1899419087136929e-05, "loss": 0.4285, "step": 24416 }, { "epoch": 20.263070539419086, "grad_norm": 39.77858352661133, "learning_rate": 1.1899087136929461e-05, "loss": 0.7947, "step": 24417 }, { "epoch": 20.26390041493776, "grad_norm": 47.37696075439453, "learning_rate": 1.1898755186721993e-05, "loss": 0.2902, "step": 24418 }, { "epoch": 20.26473029045643, "grad_norm": 37.43998718261719, "learning_rate": 1.1898423236514525e-05, "loss": 0.4526, "step": 24419 }, { "epoch": 20.265560165975103, "grad_norm": 33.530113220214844, "learning_rate": 1.1898091286307054e-05, "loss": 0.5045, "step": 24420 }, { "epoch": 20.266390041493775, "grad_norm": 24.608013153076172, "learning_rate": 1.1897759336099586e-05, "loss": 0.2752, "step": 24421 }, { "epoch": 20.267219917012447, "grad_norm": 151.59153747558594, "learning_rate": 1.1897427385892117e-05, "loss": 0.3693, "step": 24422 }, { "epoch": 20.26804979253112, "grad_norm": 44.71160888671875, "learning_rate": 1.1897095435684649e-05, "loss": 0.5177, "step": 24423 }, { "epoch": 20.26887966804979, "grad_norm": 47.53386306762695, "learning_rate": 1.1896763485477179e-05, "loss": 0.438, "step": 24424 }, { "epoch": 20.269709543568464, "grad_norm": 85.50150299072266, "learning_rate": 1.189643153526971e-05, "loss": 0.6874, "step": 24425 }, { "epoch": 20.270539419087136, "grad_norm": 99.28560638427734, "learning_rate": 1.1896099585062242e-05, "loss": 0.8736, "step": 24426 }, { "epoch": 20.271369294605808, "grad_norm": 38.017974853515625, "learning_rate": 1.1895767634854774e-05, "loss": 1.1015, "step": 24427 }, { "epoch": 20.27219917012448, "grad_norm": 44.54716873168945, "learning_rate": 1.1895435684647302e-05, "loss": 0.3319, "step": 24428 }, { "epoch": 20.273029045643153, "grad_norm": 62.89657974243164, "learning_rate": 1.1895103734439835e-05, "loss": 0.636, "step": 24429 }, { "epoch": 20.273858921161825, "grad_norm": 30.275531768798828, "learning_rate": 1.1894771784232367e-05, "loss": 0.5391, "step": 24430 }, { "epoch": 20.274688796680497, "grad_norm": 81.94509887695312, "learning_rate": 1.1894439834024897e-05, "loss": 0.7547, "step": 24431 }, { "epoch": 20.27551867219917, "grad_norm": 57.47829055786133, "learning_rate": 1.1894107883817428e-05, "loss": 0.5719, "step": 24432 }, { "epoch": 20.27634854771784, "grad_norm": 46.66845703125, "learning_rate": 1.189377593360996e-05, "loss": 0.5608, "step": 24433 }, { "epoch": 20.277178423236514, "grad_norm": 68.90406036376953, "learning_rate": 1.189344398340249e-05, "loss": 0.4419, "step": 24434 }, { "epoch": 20.278008298755186, "grad_norm": 44.53969192504883, "learning_rate": 1.1893112033195022e-05, "loss": 0.794, "step": 24435 }, { "epoch": 20.278838174273858, "grad_norm": 34.322227478027344, "learning_rate": 1.1892780082987554e-05, "loss": 0.7796, "step": 24436 }, { "epoch": 20.27966804979253, "grad_norm": 58.36095428466797, "learning_rate": 1.1892448132780083e-05, "loss": 1.3459, "step": 24437 }, { "epoch": 20.280497925311202, "grad_norm": 76.97940826416016, "learning_rate": 1.1892116182572615e-05, "loss": 0.6719, "step": 24438 }, { "epoch": 20.281327800829875, "grad_norm": 71.3866958618164, "learning_rate": 1.1891784232365147e-05, "loss": 0.5906, "step": 24439 }, { "epoch": 20.282157676348547, "grad_norm": 34.76941680908203, "learning_rate": 1.1891452282157678e-05, "loss": 0.4021, "step": 24440 }, { "epoch": 20.28298755186722, "grad_norm": 17.371685028076172, "learning_rate": 1.1891120331950208e-05, "loss": 0.3439, "step": 24441 }, { "epoch": 20.28381742738589, "grad_norm": 25.638147354125977, "learning_rate": 1.189078838174274e-05, "loss": 0.5975, "step": 24442 }, { "epoch": 20.284647302904563, "grad_norm": 115.93939208984375, "learning_rate": 1.189045643153527e-05, "loss": 0.6424, "step": 24443 }, { "epoch": 20.285477178423236, "grad_norm": 45.8105583190918, "learning_rate": 1.1890124481327803e-05, "loss": 0.4508, "step": 24444 }, { "epoch": 20.286307053941908, "grad_norm": 47.65932846069336, "learning_rate": 1.1889792531120331e-05, "loss": 0.6642, "step": 24445 }, { "epoch": 20.28713692946058, "grad_norm": 50.91044235229492, "learning_rate": 1.1889460580912863e-05, "loss": 0.5606, "step": 24446 }, { "epoch": 20.287966804979252, "grad_norm": 41.121646881103516, "learning_rate": 1.1889128630705396e-05, "loss": 0.4709, "step": 24447 }, { "epoch": 20.288796680497924, "grad_norm": 24.174898147583008, "learning_rate": 1.1888796680497928e-05, "loss": 0.46, "step": 24448 }, { "epoch": 20.289626556016596, "grad_norm": 18.381572723388672, "learning_rate": 1.1888464730290456e-05, "loss": 0.3039, "step": 24449 }, { "epoch": 20.29045643153527, "grad_norm": 42.7586784362793, "learning_rate": 1.1888132780082989e-05, "loss": 0.4334, "step": 24450 }, { "epoch": 20.29128630705394, "grad_norm": 21.343727111816406, "learning_rate": 1.188780082987552e-05, "loss": 0.3275, "step": 24451 }, { "epoch": 20.292116182572613, "grad_norm": 30.462387084960938, "learning_rate": 1.1887468879668051e-05, "loss": 0.5198, "step": 24452 }, { "epoch": 20.292946058091285, "grad_norm": 26.713041305541992, "learning_rate": 1.1887136929460581e-05, "loss": 0.5727, "step": 24453 }, { "epoch": 20.293775933609957, "grad_norm": 105.74853515625, "learning_rate": 1.1886804979253112e-05, "loss": 0.7311, "step": 24454 }, { "epoch": 20.29460580912863, "grad_norm": 37.93943405151367, "learning_rate": 1.1886473029045644e-05, "loss": 0.6174, "step": 24455 }, { "epoch": 20.295435684647302, "grad_norm": 26.967500686645508, "learning_rate": 1.1886141078838176e-05, "loss": 0.4432, "step": 24456 }, { "epoch": 20.296265560165974, "grad_norm": 11.802449226379395, "learning_rate": 1.1885809128630705e-05, "loss": 0.4148, "step": 24457 }, { "epoch": 20.297095435684646, "grad_norm": 91.46868896484375, "learning_rate": 1.1885477178423237e-05, "loss": 0.738, "step": 24458 }, { "epoch": 20.29792531120332, "grad_norm": 27.873746871948242, "learning_rate": 1.1885145228215769e-05, "loss": 0.3639, "step": 24459 }, { "epoch": 20.29875518672199, "grad_norm": 45.199886322021484, "learning_rate": 1.1884813278008301e-05, "loss": 0.8587, "step": 24460 }, { "epoch": 20.299585062240663, "grad_norm": 18.848644256591797, "learning_rate": 1.188448132780083e-05, "loss": 0.2439, "step": 24461 }, { "epoch": 20.300414937759335, "grad_norm": 78.73800659179688, "learning_rate": 1.1884149377593362e-05, "loss": 0.9438, "step": 24462 }, { "epoch": 20.301244813278007, "grad_norm": 46.72570037841797, "learning_rate": 1.1883817427385892e-05, "loss": 0.4343, "step": 24463 }, { "epoch": 20.30207468879668, "grad_norm": 142.5006103515625, "learning_rate": 1.1883485477178424e-05, "loss": 0.5375, "step": 24464 }, { "epoch": 20.30290456431535, "grad_norm": 81.47664642333984, "learning_rate": 1.1883153526970957e-05, "loss": 0.553, "step": 24465 }, { "epoch": 20.303734439834024, "grad_norm": 67.80500030517578, "learning_rate": 1.1882821576763485e-05, "loss": 0.7632, "step": 24466 }, { "epoch": 20.304564315352696, "grad_norm": 31.816436767578125, "learning_rate": 1.1882489626556017e-05, "loss": 0.4507, "step": 24467 }, { "epoch": 20.305394190871368, "grad_norm": 47.29252243041992, "learning_rate": 1.188215767634855e-05, "loss": 1.1117, "step": 24468 }, { "epoch": 20.30622406639004, "grad_norm": 30.318086624145508, "learning_rate": 1.188182572614108e-05, "loss": 0.3962, "step": 24469 }, { "epoch": 20.307053941908713, "grad_norm": 48.32554626464844, "learning_rate": 1.188149377593361e-05, "loss": 0.4343, "step": 24470 }, { "epoch": 20.307883817427385, "grad_norm": 20.222623825073242, "learning_rate": 1.1881161825726142e-05, "loss": 0.424, "step": 24471 }, { "epoch": 20.308713692946057, "grad_norm": 20.871448516845703, "learning_rate": 1.1880829875518673e-05, "loss": 0.2672, "step": 24472 }, { "epoch": 20.30954356846473, "grad_norm": 36.3350830078125, "learning_rate": 1.1880497925311205e-05, "loss": 0.3227, "step": 24473 }, { "epoch": 20.3103734439834, "grad_norm": 43.47929000854492, "learning_rate": 1.1880165975103735e-05, "loss": 0.9078, "step": 24474 }, { "epoch": 20.311203319502074, "grad_norm": 58.83269500732422, "learning_rate": 1.1879834024896266e-05, "loss": 0.4852, "step": 24475 }, { "epoch": 20.312033195020746, "grad_norm": 31.51858139038086, "learning_rate": 1.1879502074688798e-05, "loss": 0.4153, "step": 24476 }, { "epoch": 20.312863070539418, "grad_norm": 155.9599609375, "learning_rate": 1.187917012448133e-05, "loss": 1.7277, "step": 24477 }, { "epoch": 20.31369294605809, "grad_norm": 27.48918914794922, "learning_rate": 1.1878838174273859e-05, "loss": 0.572, "step": 24478 }, { "epoch": 20.314522821576762, "grad_norm": 32.90510559082031, "learning_rate": 1.1878506224066391e-05, "loss": 0.704, "step": 24479 }, { "epoch": 20.315352697095435, "grad_norm": 29.023326873779297, "learning_rate": 1.1878174273858923e-05, "loss": 0.4035, "step": 24480 }, { "epoch": 20.316182572614107, "grad_norm": 51.44747543334961, "learning_rate": 1.1877842323651453e-05, "loss": 0.8328, "step": 24481 }, { "epoch": 20.31701244813278, "grad_norm": 47.54240036010742, "learning_rate": 1.1877510373443984e-05, "loss": 0.3994, "step": 24482 }, { "epoch": 20.31784232365145, "grad_norm": 69.10677337646484, "learning_rate": 1.1877178423236514e-05, "loss": 0.3679, "step": 24483 }, { "epoch": 20.318672199170123, "grad_norm": 100.31903839111328, "learning_rate": 1.1876846473029046e-05, "loss": 0.7323, "step": 24484 }, { "epoch": 20.319502074688796, "grad_norm": 14.16309928894043, "learning_rate": 1.1876514522821578e-05, "loss": 0.3067, "step": 24485 }, { "epoch": 20.320331950207468, "grad_norm": 44.69068908691406, "learning_rate": 1.1876182572614107e-05, "loss": 0.4459, "step": 24486 }, { "epoch": 20.32116182572614, "grad_norm": 83.29570770263672, "learning_rate": 1.187585062240664e-05, "loss": 1.0564, "step": 24487 }, { "epoch": 20.321991701244812, "grad_norm": 119.32110595703125, "learning_rate": 1.1875518672199171e-05, "loss": 0.3779, "step": 24488 }, { "epoch": 20.322821576763484, "grad_norm": 30.333505630493164, "learning_rate": 1.1875186721991703e-05, "loss": 0.3798, "step": 24489 }, { "epoch": 20.323651452282157, "grad_norm": 37.89042282104492, "learning_rate": 1.1874854771784234e-05, "loss": 0.3906, "step": 24490 }, { "epoch": 20.32448132780083, "grad_norm": 70.26396179199219, "learning_rate": 1.1874522821576764e-05, "loss": 0.9294, "step": 24491 }, { "epoch": 20.3253112033195, "grad_norm": 30.6326961517334, "learning_rate": 1.1874190871369295e-05, "loss": 0.9985, "step": 24492 }, { "epoch": 20.326141078838173, "grad_norm": 63.93394470214844, "learning_rate": 1.1873858921161827e-05, "loss": 0.6888, "step": 24493 }, { "epoch": 20.326970954356845, "grad_norm": 49.21611022949219, "learning_rate": 1.1873526970954359e-05, "loss": 0.6029, "step": 24494 }, { "epoch": 20.327800829875518, "grad_norm": 43.46583938598633, "learning_rate": 1.1873195020746888e-05, "loss": 0.4807, "step": 24495 }, { "epoch": 20.32863070539419, "grad_norm": 32.23881149291992, "learning_rate": 1.187286307053942e-05, "loss": 0.353, "step": 24496 }, { "epoch": 20.329460580912862, "grad_norm": 42.25748825073242, "learning_rate": 1.1872531120331952e-05, "loss": 0.5942, "step": 24497 }, { "epoch": 20.330290456431534, "grad_norm": 33.51602554321289, "learning_rate": 1.1872199170124484e-05, "loss": 0.318, "step": 24498 }, { "epoch": 20.331120331950206, "grad_norm": 55.1730842590332, "learning_rate": 1.1871867219917013e-05, "loss": 0.9257, "step": 24499 }, { "epoch": 20.33195020746888, "grad_norm": 26.61240005493164, "learning_rate": 1.1871535269709545e-05, "loss": 0.3006, "step": 24500 }, { "epoch": 20.33278008298755, "grad_norm": 32.12769317626953, "learning_rate": 1.1871203319502075e-05, "loss": 0.4725, "step": 24501 }, { "epoch": 20.333609958506223, "grad_norm": 21.755739212036133, "learning_rate": 1.1870871369294607e-05, "loss": 0.3388, "step": 24502 }, { "epoch": 20.334439834024895, "grad_norm": 14.9909029006958, "learning_rate": 1.1870539419087138e-05, "loss": 0.3637, "step": 24503 }, { "epoch": 20.335269709543567, "grad_norm": 61.67257308959961, "learning_rate": 1.1870207468879668e-05, "loss": 0.6709, "step": 24504 }, { "epoch": 20.33609958506224, "grad_norm": 88.87399291992188, "learning_rate": 1.18698755186722e-05, "loss": 0.5748, "step": 24505 }, { "epoch": 20.33692946058091, "grad_norm": 45.053611755371094, "learning_rate": 1.1869543568464732e-05, "loss": 0.6468, "step": 24506 }, { "epoch": 20.337759336099584, "grad_norm": 29.873031616210938, "learning_rate": 1.1869211618257261e-05, "loss": 0.4617, "step": 24507 }, { "epoch": 20.338589211618256, "grad_norm": 17.140832901000977, "learning_rate": 1.1868879668049793e-05, "loss": 0.2543, "step": 24508 }, { "epoch": 20.33941908713693, "grad_norm": 42.806488037109375, "learning_rate": 1.1868547717842325e-05, "loss": 0.5495, "step": 24509 }, { "epoch": 20.3402489626556, "grad_norm": 32.81538772583008, "learning_rate": 1.1868215767634856e-05, "loss": 0.384, "step": 24510 }, { "epoch": 20.341078838174273, "grad_norm": 56.832889556884766, "learning_rate": 1.1867883817427386e-05, "loss": 0.5933, "step": 24511 }, { "epoch": 20.341908713692945, "grad_norm": 50.20028305053711, "learning_rate": 1.1867551867219918e-05, "loss": 0.8372, "step": 24512 }, { "epoch": 20.342738589211617, "grad_norm": 73.12952423095703, "learning_rate": 1.1867219917012449e-05, "loss": 0.4259, "step": 24513 }, { "epoch": 20.34356846473029, "grad_norm": 38.65990447998047, "learning_rate": 1.186688796680498e-05, "loss": 0.4824, "step": 24514 }, { "epoch": 20.34439834024896, "grad_norm": 32.59820556640625, "learning_rate": 1.1866556016597513e-05, "loss": 0.5761, "step": 24515 }, { "epoch": 20.345228215767634, "grad_norm": 32.508811950683594, "learning_rate": 1.1866224066390042e-05, "loss": 0.5766, "step": 24516 }, { "epoch": 20.346058091286306, "grad_norm": 30.50147247314453, "learning_rate": 1.1865892116182574e-05, "loss": 0.4534, "step": 24517 }, { "epoch": 20.346887966804978, "grad_norm": 47.645286560058594, "learning_rate": 1.1865560165975106e-05, "loss": 0.3686, "step": 24518 }, { "epoch": 20.34771784232365, "grad_norm": 41.804229736328125, "learning_rate": 1.1865228215767636e-05, "loss": 0.483, "step": 24519 }, { "epoch": 20.348547717842322, "grad_norm": 52.60273361206055, "learning_rate": 1.1864896265560167e-05, "loss": 0.5284, "step": 24520 }, { "epoch": 20.349377593360995, "grad_norm": 17.03137969970703, "learning_rate": 1.1864564315352699e-05, "loss": 0.4366, "step": 24521 }, { "epoch": 20.350207468879667, "grad_norm": 72.06403350830078, "learning_rate": 1.1864232365145229e-05, "loss": 0.756, "step": 24522 }, { "epoch": 20.35103734439834, "grad_norm": 20.245342254638672, "learning_rate": 1.1863900414937761e-05, "loss": 0.2827, "step": 24523 }, { "epoch": 20.35186721991701, "grad_norm": 12.077899932861328, "learning_rate": 1.186356846473029e-05, "loss": 0.2343, "step": 24524 }, { "epoch": 20.352697095435683, "grad_norm": 112.4175033569336, "learning_rate": 1.1863236514522822e-05, "loss": 0.9768, "step": 24525 }, { "epoch": 20.353526970954356, "grad_norm": 52.301448822021484, "learning_rate": 1.1862904564315354e-05, "loss": 0.5099, "step": 24526 }, { "epoch": 20.354356846473028, "grad_norm": 22.417037963867188, "learning_rate": 1.1862572614107886e-05, "loss": 0.2971, "step": 24527 }, { "epoch": 20.3551867219917, "grad_norm": 17.41590118408203, "learning_rate": 1.1862240663900415e-05, "loss": 0.2744, "step": 24528 }, { "epoch": 20.356016597510372, "grad_norm": 16.426593780517578, "learning_rate": 1.1861908713692947e-05, "loss": 0.3169, "step": 24529 }, { "epoch": 20.356846473029044, "grad_norm": 33.524627685546875, "learning_rate": 1.186157676348548e-05, "loss": 0.3821, "step": 24530 }, { "epoch": 20.357676348547717, "grad_norm": 117.86685180664062, "learning_rate": 1.186124481327801e-05, "loss": 1.2031, "step": 24531 }, { "epoch": 20.35850622406639, "grad_norm": 23.69708251953125, "learning_rate": 1.186091286307054e-05, "loss": 0.4226, "step": 24532 }, { "epoch": 20.35933609958506, "grad_norm": 10.945131301879883, "learning_rate": 1.186058091286307e-05, "loss": 0.2547, "step": 24533 }, { "epoch": 20.360165975103733, "grad_norm": 83.5906982421875, "learning_rate": 1.1860248962655603e-05, "loss": 0.9602, "step": 24534 }, { "epoch": 20.360995850622405, "grad_norm": 66.52790069580078, "learning_rate": 1.1859917012448135e-05, "loss": 0.475, "step": 24535 }, { "epoch": 20.361825726141078, "grad_norm": 50.15256881713867, "learning_rate": 1.1859585062240663e-05, "loss": 0.6428, "step": 24536 }, { "epoch": 20.36265560165975, "grad_norm": 52.646949768066406, "learning_rate": 1.1859253112033195e-05, "loss": 0.6994, "step": 24537 }, { "epoch": 20.363485477178422, "grad_norm": 50.428565979003906, "learning_rate": 1.1858921161825728e-05, "loss": 0.91, "step": 24538 }, { "epoch": 20.364315352697094, "grad_norm": 73.51005554199219, "learning_rate": 1.1858589211618258e-05, "loss": 1.0599, "step": 24539 }, { "epoch": 20.365145228215766, "grad_norm": 19.775737762451172, "learning_rate": 1.1858257261410788e-05, "loss": 0.3532, "step": 24540 }, { "epoch": 20.36597510373444, "grad_norm": 47.470394134521484, "learning_rate": 1.185792531120332e-05, "loss": 0.4099, "step": 24541 }, { "epoch": 20.36680497925311, "grad_norm": 87.8204116821289, "learning_rate": 1.1857593360995851e-05, "loss": 0.6051, "step": 24542 }, { "epoch": 20.367634854771783, "grad_norm": 70.74331665039062, "learning_rate": 1.1857261410788383e-05, "loss": 0.71, "step": 24543 }, { "epoch": 20.368464730290455, "grad_norm": 108.64916229248047, "learning_rate": 1.1856929460580915e-05, "loss": 1.6335, "step": 24544 }, { "epoch": 20.369294605809127, "grad_norm": 134.7415771484375, "learning_rate": 1.1856597510373444e-05, "loss": 0.8345, "step": 24545 }, { "epoch": 20.3701244813278, "grad_norm": 30.687511444091797, "learning_rate": 1.1856265560165976e-05, "loss": 0.5252, "step": 24546 }, { "epoch": 20.37095435684647, "grad_norm": 19.394773483276367, "learning_rate": 1.1855933609958508e-05, "loss": 0.3293, "step": 24547 }, { "epoch": 20.371784232365144, "grad_norm": 40.927879333496094, "learning_rate": 1.1855601659751039e-05, "loss": 0.8297, "step": 24548 }, { "epoch": 20.372614107883816, "grad_norm": 30.42609977722168, "learning_rate": 1.1855269709543569e-05, "loss": 0.4165, "step": 24549 }, { "epoch": 20.37344398340249, "grad_norm": 40.46522521972656, "learning_rate": 1.1854937759336101e-05, "loss": 0.5024, "step": 24550 }, { "epoch": 20.37427385892116, "grad_norm": 80.51690673828125, "learning_rate": 1.1854605809128631e-05, "loss": 0.6819, "step": 24551 }, { "epoch": 20.375103734439833, "grad_norm": 33.68494415283203, "learning_rate": 1.1854273858921164e-05, "loss": 0.7082, "step": 24552 }, { "epoch": 20.375933609958505, "grad_norm": 15.961559295654297, "learning_rate": 1.1853941908713692e-05, "loss": 0.3686, "step": 24553 }, { "epoch": 20.376763485477177, "grad_norm": 35.72149658203125, "learning_rate": 1.1853609958506224e-05, "loss": 0.5366, "step": 24554 }, { "epoch": 20.37759336099585, "grad_norm": 53.284664154052734, "learning_rate": 1.1853278008298756e-05, "loss": 0.7195, "step": 24555 }, { "epoch": 20.37842323651452, "grad_norm": 33.8961181640625, "learning_rate": 1.1852946058091289e-05, "loss": 0.3328, "step": 24556 }, { "epoch": 20.379253112033194, "grad_norm": 27.010297775268555, "learning_rate": 1.1852614107883817e-05, "loss": 0.5552, "step": 24557 }, { "epoch": 20.380082987551866, "grad_norm": 48.74326705932617, "learning_rate": 1.185228215767635e-05, "loss": 0.5178, "step": 24558 }, { "epoch": 20.380912863070538, "grad_norm": 61.434654235839844, "learning_rate": 1.1851950207468882e-05, "loss": 0.667, "step": 24559 }, { "epoch": 20.38174273858921, "grad_norm": 151.1956787109375, "learning_rate": 1.1851618257261412e-05, "loss": 1.1959, "step": 24560 }, { "epoch": 20.382572614107882, "grad_norm": 77.49488067626953, "learning_rate": 1.1851286307053942e-05, "loss": 0.7319, "step": 24561 }, { "epoch": 20.383402489626555, "grad_norm": 22.859113693237305, "learning_rate": 1.1850954356846473e-05, "loss": 0.298, "step": 24562 }, { "epoch": 20.384232365145227, "grad_norm": 9.82752799987793, "learning_rate": 1.1850622406639005e-05, "loss": 0.2812, "step": 24563 }, { "epoch": 20.3850622406639, "grad_norm": 46.66674041748047, "learning_rate": 1.1850290456431537e-05, "loss": 0.656, "step": 24564 }, { "epoch": 20.38589211618257, "grad_norm": 13.086100578308105, "learning_rate": 1.1849958506224066e-05, "loss": 0.4789, "step": 24565 }, { "epoch": 20.386721991701243, "grad_norm": 59.78038024902344, "learning_rate": 1.1849626556016598e-05, "loss": 1.204, "step": 24566 }, { "epoch": 20.387551867219916, "grad_norm": 53.7869873046875, "learning_rate": 1.184929460580913e-05, "loss": 0.4955, "step": 24567 }, { "epoch": 20.388381742738588, "grad_norm": 42.84028625488281, "learning_rate": 1.1848962655601662e-05, "loss": 0.7802, "step": 24568 }, { "epoch": 20.38921161825726, "grad_norm": 70.88899993896484, "learning_rate": 1.1848630705394192e-05, "loss": 0.7765, "step": 24569 }, { "epoch": 20.390041493775932, "grad_norm": 56.710453033447266, "learning_rate": 1.1848298755186723e-05, "loss": 0.5055, "step": 24570 }, { "epoch": 20.390871369294604, "grad_norm": 31.872058868408203, "learning_rate": 1.1847966804979253e-05, "loss": 0.499, "step": 24571 }, { "epoch": 20.391701244813277, "grad_norm": 58.305015563964844, "learning_rate": 1.1847634854771785e-05, "loss": 0.6287, "step": 24572 }, { "epoch": 20.39253112033195, "grad_norm": 17.40771484375, "learning_rate": 1.1847302904564317e-05, "loss": 0.3626, "step": 24573 }, { "epoch": 20.39336099585062, "grad_norm": 22.87641143798828, "learning_rate": 1.1846970954356846e-05, "loss": 0.2577, "step": 24574 }, { "epoch": 20.394190871369293, "grad_norm": 50.44590759277344, "learning_rate": 1.1846639004149378e-05, "loss": 0.7429, "step": 24575 }, { "epoch": 20.395020746887965, "grad_norm": 61.04876708984375, "learning_rate": 1.184630705394191e-05, "loss": 0.677, "step": 24576 }, { "epoch": 20.395850622406638, "grad_norm": 23.808544158935547, "learning_rate": 1.1845975103734443e-05, "loss": 0.3849, "step": 24577 }, { "epoch": 20.39668049792531, "grad_norm": 72.30059051513672, "learning_rate": 1.1845643153526971e-05, "loss": 0.8409, "step": 24578 }, { "epoch": 20.397510373443982, "grad_norm": 44.84156036376953, "learning_rate": 1.1845311203319503e-05, "loss": 0.9158, "step": 24579 }, { "epoch": 20.398340248962654, "grad_norm": 36.61946487426758, "learning_rate": 1.1844979253112034e-05, "loss": 0.5747, "step": 24580 }, { "epoch": 20.399170124481326, "grad_norm": 56.8428840637207, "learning_rate": 1.1844647302904566e-05, "loss": 0.5913, "step": 24581 }, { "epoch": 20.4, "grad_norm": 32.18907928466797, "learning_rate": 1.1844315352697096e-05, "loss": 0.2757, "step": 24582 }, { "epoch": 20.40082987551867, "grad_norm": 18.497791290283203, "learning_rate": 1.1843983402489627e-05, "loss": 0.3662, "step": 24583 }, { "epoch": 20.401659751037343, "grad_norm": 16.61855125427246, "learning_rate": 1.1843651452282159e-05, "loss": 0.4171, "step": 24584 }, { "epoch": 20.402489626556015, "grad_norm": 9.850317001342773, "learning_rate": 1.1843319502074691e-05, "loss": 0.2287, "step": 24585 }, { "epoch": 20.403319502074687, "grad_norm": 50.1109619140625, "learning_rate": 1.184298755186722e-05, "loss": 0.9663, "step": 24586 }, { "epoch": 20.40414937759336, "grad_norm": 43.497589111328125, "learning_rate": 1.1842655601659752e-05, "loss": 0.7855, "step": 24587 }, { "epoch": 20.40497925311203, "grad_norm": 89.62254333496094, "learning_rate": 1.1842323651452284e-05, "loss": 0.4598, "step": 24588 }, { "epoch": 20.405809128630704, "grad_norm": 36.30587387084961, "learning_rate": 1.1841991701244814e-05, "loss": 0.4182, "step": 24589 }, { "epoch": 20.406639004149376, "grad_norm": 64.25245666503906, "learning_rate": 1.1841659751037345e-05, "loss": 0.5174, "step": 24590 }, { "epoch": 20.40746887966805, "grad_norm": 100.14486694335938, "learning_rate": 1.1841327800829877e-05, "loss": 0.7568, "step": 24591 }, { "epoch": 20.40829875518672, "grad_norm": 52.77606201171875, "learning_rate": 1.1840995850622407e-05, "loss": 0.6217, "step": 24592 }, { "epoch": 20.409128630705393, "grad_norm": 59.84082794189453, "learning_rate": 1.184066390041494e-05, "loss": 0.5709, "step": 24593 }, { "epoch": 20.409958506224065, "grad_norm": 46.557674407958984, "learning_rate": 1.1840331950207471e-05, "loss": 0.7258, "step": 24594 }, { "epoch": 20.410788381742737, "grad_norm": 46.18434143066406, "learning_rate": 1.184e-05, "loss": 0.4484, "step": 24595 }, { "epoch": 20.41161825726141, "grad_norm": 25.90923309326172, "learning_rate": 1.1839668049792532e-05, "loss": 0.3363, "step": 24596 }, { "epoch": 20.41244813278008, "grad_norm": 70.14984893798828, "learning_rate": 1.1839336099585064e-05, "loss": 0.7525, "step": 24597 }, { "epoch": 20.413278008298754, "grad_norm": 51.237308502197266, "learning_rate": 1.1839004149377595e-05, "loss": 0.2426, "step": 24598 }, { "epoch": 20.414107883817426, "grad_norm": 49.21601104736328, "learning_rate": 1.1838672199170125e-05, "loss": 0.7468, "step": 24599 }, { "epoch": 20.414937759336098, "grad_norm": 28.0198974609375, "learning_rate": 1.1838340248962656e-05, "loss": 0.5954, "step": 24600 }, { "epoch": 20.41576763485477, "grad_norm": 20.04579734802246, "learning_rate": 1.1838008298755188e-05, "loss": 0.477, "step": 24601 }, { "epoch": 20.416597510373443, "grad_norm": 35.513092041015625, "learning_rate": 1.183767634854772e-05, "loss": 0.5487, "step": 24602 }, { "epoch": 20.417427385892115, "grad_norm": 46.59294891357422, "learning_rate": 1.1837344398340249e-05, "loss": 0.6515, "step": 24603 }, { "epoch": 20.418257261410787, "grad_norm": 29.9141788482666, "learning_rate": 1.183701244813278e-05, "loss": 0.3085, "step": 24604 }, { "epoch": 20.41908713692946, "grad_norm": 62.71794128417969, "learning_rate": 1.1836680497925313e-05, "loss": 0.6999, "step": 24605 }, { "epoch": 20.41991701244813, "grad_norm": 22.08191680908203, "learning_rate": 1.1836348547717845e-05, "loss": 0.5127, "step": 24606 }, { "epoch": 20.420746887966803, "grad_norm": 33.09006881713867, "learning_rate": 1.1836016597510374e-05, "loss": 0.515, "step": 24607 }, { "epoch": 20.421576763485476, "grad_norm": 30.90471649169922, "learning_rate": 1.1835684647302906e-05, "loss": 0.904, "step": 24608 }, { "epoch": 20.422406639004148, "grad_norm": 103.1650619506836, "learning_rate": 1.1835352697095436e-05, "loss": 0.6341, "step": 24609 }, { "epoch": 20.42323651452282, "grad_norm": 38.98891067504883, "learning_rate": 1.1835020746887968e-05, "loss": 0.4939, "step": 24610 }, { "epoch": 20.424066390041492, "grad_norm": 36.62269973754883, "learning_rate": 1.1834688796680499e-05, "loss": 0.5172, "step": 24611 }, { "epoch": 20.424896265560164, "grad_norm": 41.021156311035156, "learning_rate": 1.1834356846473029e-05, "loss": 0.501, "step": 24612 }, { "epoch": 20.425726141078837, "grad_norm": 31.524539947509766, "learning_rate": 1.1834024896265561e-05, "loss": 0.3652, "step": 24613 }, { "epoch": 20.42655601659751, "grad_norm": 95.46430206298828, "learning_rate": 1.1833692946058093e-05, "loss": 0.4223, "step": 24614 }, { "epoch": 20.42738589211618, "grad_norm": 55.06877899169922, "learning_rate": 1.1833360995850622e-05, "loss": 0.7072, "step": 24615 }, { "epoch": 20.428215767634853, "grad_norm": 39.301063537597656, "learning_rate": 1.1833029045643154e-05, "loss": 0.6848, "step": 24616 }, { "epoch": 20.429045643153525, "grad_norm": 49.234375, "learning_rate": 1.1832697095435686e-05, "loss": 0.4928, "step": 24617 }, { "epoch": 20.429875518672198, "grad_norm": 115.95672607421875, "learning_rate": 1.1832365145228217e-05, "loss": 0.8983, "step": 24618 }, { "epoch": 20.43070539419087, "grad_norm": 33.95146942138672, "learning_rate": 1.1832033195020747e-05, "loss": 0.6152, "step": 24619 }, { "epoch": 20.431535269709542, "grad_norm": 43.0835075378418, "learning_rate": 1.1831701244813279e-05, "loss": 0.8384, "step": 24620 }, { "epoch": 20.432365145228214, "grad_norm": 27.862545013427734, "learning_rate": 1.183136929460581e-05, "loss": 0.3581, "step": 24621 }, { "epoch": 20.433195020746886, "grad_norm": 30.51797103881836, "learning_rate": 1.1831037344398342e-05, "loss": 0.4487, "step": 24622 }, { "epoch": 20.43402489626556, "grad_norm": 19.773433685302734, "learning_rate": 1.1830705394190874e-05, "loss": 0.3732, "step": 24623 }, { "epoch": 20.43485477178423, "grad_norm": 95.13851165771484, "learning_rate": 1.1830373443983402e-05, "loss": 0.8885, "step": 24624 }, { "epoch": 20.435684647302903, "grad_norm": 54.558719635009766, "learning_rate": 1.1830041493775935e-05, "loss": 0.8057, "step": 24625 }, { "epoch": 20.436514522821575, "grad_norm": 20.48946762084961, "learning_rate": 1.1829709543568467e-05, "loss": 0.4909, "step": 24626 }, { "epoch": 20.437344398340247, "grad_norm": 104.91105651855469, "learning_rate": 1.1829377593360997e-05, "loss": 0.697, "step": 24627 }, { "epoch": 20.43817427385892, "grad_norm": 70.50605773925781, "learning_rate": 1.1829045643153528e-05, "loss": 0.6943, "step": 24628 }, { "epoch": 20.439004149377592, "grad_norm": 41.755165100097656, "learning_rate": 1.182871369294606e-05, "loss": 0.3817, "step": 24629 }, { "epoch": 20.439834024896264, "grad_norm": 61.37456130981445, "learning_rate": 1.182838174273859e-05, "loss": 1.232, "step": 24630 }, { "epoch": 20.440663900414936, "grad_norm": 18.138832092285156, "learning_rate": 1.1828049792531122e-05, "loss": 0.3757, "step": 24631 }, { "epoch": 20.44149377593361, "grad_norm": 51.303977966308594, "learning_rate": 1.1827717842323651e-05, "loss": 0.9634, "step": 24632 }, { "epoch": 20.44232365145228, "grad_norm": 35.257545471191406, "learning_rate": 1.1827385892116183e-05, "loss": 0.5189, "step": 24633 }, { "epoch": 20.443153526970953, "grad_norm": 28.97947883605957, "learning_rate": 1.1827053941908715e-05, "loss": 0.5892, "step": 24634 }, { "epoch": 20.443983402489625, "grad_norm": 39.667911529541016, "learning_rate": 1.1826721991701247e-05, "loss": 0.5862, "step": 24635 }, { "epoch": 20.444813278008297, "grad_norm": 63.145912170410156, "learning_rate": 1.1826390041493776e-05, "loss": 0.826, "step": 24636 }, { "epoch": 20.44564315352697, "grad_norm": 33.561805725097656, "learning_rate": 1.1826058091286308e-05, "loss": 0.466, "step": 24637 }, { "epoch": 20.44647302904564, "grad_norm": 15.117536544799805, "learning_rate": 1.182572614107884e-05, "loss": 0.3287, "step": 24638 }, { "epoch": 20.447302904564314, "grad_norm": 49.29261016845703, "learning_rate": 1.182539419087137e-05, "loss": 0.7839, "step": 24639 }, { "epoch": 20.448132780082986, "grad_norm": 90.07080078125, "learning_rate": 1.1825062240663901e-05, "loss": 0.4469, "step": 24640 }, { "epoch": 20.448962655601658, "grad_norm": 66.33741760253906, "learning_rate": 1.1824730290456431e-05, "loss": 0.8612, "step": 24641 }, { "epoch": 20.44979253112033, "grad_norm": 62.051212310791016, "learning_rate": 1.1824398340248963e-05, "loss": 0.8952, "step": 24642 }, { "epoch": 20.450622406639003, "grad_norm": 51.289737701416016, "learning_rate": 1.1824066390041496e-05, "loss": 0.8504, "step": 24643 }, { "epoch": 20.451452282157675, "grad_norm": 25.982955932617188, "learning_rate": 1.1823734439834024e-05, "loss": 0.3628, "step": 24644 }, { "epoch": 20.452282157676347, "grad_norm": 21.06883430480957, "learning_rate": 1.1823402489626556e-05, "loss": 0.4845, "step": 24645 }, { "epoch": 20.45311203319502, "grad_norm": 17.90052032470703, "learning_rate": 1.1823070539419089e-05, "loss": 0.3974, "step": 24646 }, { "epoch": 20.45394190871369, "grad_norm": 23.330175399780273, "learning_rate": 1.182273858921162e-05, "loss": 0.3845, "step": 24647 }, { "epoch": 20.454771784232364, "grad_norm": 28.129148483276367, "learning_rate": 1.1822406639004151e-05, "loss": 0.7218, "step": 24648 }, { "epoch": 20.455601659751036, "grad_norm": 35.714298248291016, "learning_rate": 1.1822074688796681e-05, "loss": 0.3256, "step": 24649 }, { "epoch": 20.456431535269708, "grad_norm": 41.5992317199707, "learning_rate": 1.1821742738589212e-05, "loss": 0.8031, "step": 24650 }, { "epoch": 20.45726141078838, "grad_norm": 27.88664436340332, "learning_rate": 1.1821410788381744e-05, "loss": 0.3996, "step": 24651 }, { "epoch": 20.458091286307052, "grad_norm": 101.69792175292969, "learning_rate": 1.1821078838174276e-05, "loss": 0.6454, "step": 24652 }, { "epoch": 20.458921161825725, "grad_norm": 52.62185287475586, "learning_rate": 1.1820746887966805e-05, "loss": 1.0655, "step": 24653 }, { "epoch": 20.459751037344397, "grad_norm": 131.13282775878906, "learning_rate": 1.1820414937759337e-05, "loss": 0.5603, "step": 24654 }, { "epoch": 20.46058091286307, "grad_norm": 26.101215362548828, "learning_rate": 1.1820082987551869e-05, "loss": 0.4873, "step": 24655 }, { "epoch": 20.46141078838174, "grad_norm": 37.204795837402344, "learning_rate": 1.18197510373444e-05, "loss": 0.3468, "step": 24656 }, { "epoch": 20.462240663900413, "grad_norm": 45.77617645263672, "learning_rate": 1.181941908713693e-05, "loss": 1.073, "step": 24657 }, { "epoch": 20.463070539419085, "grad_norm": 56.02748489379883, "learning_rate": 1.1819087136929462e-05, "loss": 0.3813, "step": 24658 }, { "epoch": 20.463900414937758, "grad_norm": 73.78326416015625, "learning_rate": 1.1818755186721992e-05, "loss": 0.7803, "step": 24659 }, { "epoch": 20.46473029045643, "grad_norm": 47.408485412597656, "learning_rate": 1.1818423236514524e-05, "loss": 0.4539, "step": 24660 }, { "epoch": 20.465560165975102, "grad_norm": 13.570992469787598, "learning_rate": 1.1818091286307055e-05, "loss": 0.2566, "step": 24661 }, { "epoch": 20.466390041493774, "grad_norm": 60.27466583251953, "learning_rate": 1.1817759336099585e-05, "loss": 0.7959, "step": 24662 }, { "epoch": 20.467219917012446, "grad_norm": 47.742042541503906, "learning_rate": 1.1817427385892117e-05, "loss": 1.1672, "step": 24663 }, { "epoch": 20.46804979253112, "grad_norm": 38.506858825683594, "learning_rate": 1.181709543568465e-05, "loss": 0.5602, "step": 24664 }, { "epoch": 20.46887966804979, "grad_norm": 33.773502349853516, "learning_rate": 1.1816763485477178e-05, "loss": 0.4622, "step": 24665 }, { "epoch": 20.469709543568463, "grad_norm": 33.670875549316406, "learning_rate": 1.181643153526971e-05, "loss": 0.4263, "step": 24666 }, { "epoch": 20.470539419087135, "grad_norm": 41.42301559448242, "learning_rate": 1.1816099585062242e-05, "loss": 0.7597, "step": 24667 }, { "epoch": 20.471369294605807, "grad_norm": 30.6630859375, "learning_rate": 1.1815767634854773e-05, "loss": 0.5, "step": 24668 }, { "epoch": 20.47219917012448, "grad_norm": 16.37602424621582, "learning_rate": 1.1815435684647303e-05, "loss": 0.3282, "step": 24669 }, { "epoch": 20.473029045643152, "grad_norm": 29.82187271118164, "learning_rate": 1.1815103734439834e-05, "loss": 0.4583, "step": 24670 }, { "epoch": 20.473858921161824, "grad_norm": 18.3013973236084, "learning_rate": 1.1814771784232366e-05, "loss": 0.3035, "step": 24671 }, { "epoch": 20.474688796680496, "grad_norm": 30.11347007751465, "learning_rate": 1.1814439834024898e-05, "loss": 0.6367, "step": 24672 }, { "epoch": 20.47551867219917, "grad_norm": 49.77604675292969, "learning_rate": 1.181410788381743e-05, "loss": 0.6287, "step": 24673 }, { "epoch": 20.47634854771784, "grad_norm": 27.28102684020996, "learning_rate": 1.1813775933609959e-05, "loss": 0.3475, "step": 24674 }, { "epoch": 20.477178423236513, "grad_norm": 62.80278396606445, "learning_rate": 1.181344398340249e-05, "loss": 0.5995, "step": 24675 }, { "epoch": 20.478008298755185, "grad_norm": 21.529674530029297, "learning_rate": 1.1813112033195023e-05, "loss": 0.3828, "step": 24676 }, { "epoch": 20.478838174273857, "grad_norm": 57.84114074707031, "learning_rate": 1.1812780082987553e-05, "loss": 0.4912, "step": 24677 }, { "epoch": 20.47966804979253, "grad_norm": 19.088119506835938, "learning_rate": 1.1812448132780084e-05, "loss": 0.2729, "step": 24678 }, { "epoch": 20.4804979253112, "grad_norm": 39.636329650878906, "learning_rate": 1.1812116182572614e-05, "loss": 0.7827, "step": 24679 }, { "epoch": 20.481327800829874, "grad_norm": 84.40081024169922, "learning_rate": 1.1811784232365146e-05, "loss": 0.6265, "step": 24680 }, { "epoch": 20.482157676348546, "grad_norm": 43.89842224121094, "learning_rate": 1.1811452282157678e-05, "loss": 0.4947, "step": 24681 }, { "epoch": 20.482987551867218, "grad_norm": 22.708751678466797, "learning_rate": 1.1811120331950207e-05, "loss": 0.5644, "step": 24682 }, { "epoch": 20.48381742738589, "grad_norm": 13.37369441986084, "learning_rate": 1.181078838174274e-05, "loss": 0.3019, "step": 24683 }, { "epoch": 20.484647302904563, "grad_norm": 42.59375, "learning_rate": 1.1810456431535271e-05, "loss": 0.2999, "step": 24684 }, { "epoch": 20.485477178423235, "grad_norm": 35.489471435546875, "learning_rate": 1.1810124481327803e-05, "loss": 0.5037, "step": 24685 }, { "epoch": 20.486307053941907, "grad_norm": 84.55486297607422, "learning_rate": 1.1809792531120332e-05, "loss": 0.6735, "step": 24686 }, { "epoch": 20.48713692946058, "grad_norm": 26.84174156188965, "learning_rate": 1.1809460580912864e-05, "loss": 0.3472, "step": 24687 }, { "epoch": 20.48796680497925, "grad_norm": 15.990350723266602, "learning_rate": 1.1809128630705395e-05, "loss": 0.3789, "step": 24688 }, { "epoch": 20.488796680497924, "grad_norm": 48.14002227783203, "learning_rate": 1.1808796680497927e-05, "loss": 0.4089, "step": 24689 }, { "epoch": 20.489626556016596, "grad_norm": 45.744407653808594, "learning_rate": 1.1808464730290457e-05, "loss": 0.7319, "step": 24690 }, { "epoch": 20.490456431535268, "grad_norm": 28.395719528198242, "learning_rate": 1.1808132780082988e-05, "loss": 0.5462, "step": 24691 }, { "epoch": 20.49128630705394, "grad_norm": 130.8680419921875, "learning_rate": 1.180780082987552e-05, "loss": 0.4389, "step": 24692 }, { "epoch": 20.492116182572612, "grad_norm": 15.044330596923828, "learning_rate": 1.1807468879668052e-05, "loss": 0.2559, "step": 24693 }, { "epoch": 20.492946058091285, "grad_norm": 123.18375396728516, "learning_rate": 1.180713692946058e-05, "loss": 0.474, "step": 24694 }, { "epoch": 20.49377593360996, "grad_norm": 163.65357971191406, "learning_rate": 1.1806804979253113e-05, "loss": 0.5978, "step": 24695 }, { "epoch": 20.49460580912863, "grad_norm": 47.72071838378906, "learning_rate": 1.1806473029045645e-05, "loss": 0.4225, "step": 24696 }, { "epoch": 20.495435684647305, "grad_norm": 47.329402923583984, "learning_rate": 1.1806141078838175e-05, "loss": 0.8706, "step": 24697 }, { "epoch": 20.496265560165973, "grad_norm": 44.69307327270508, "learning_rate": 1.1805809128630706e-05, "loss": 0.6246, "step": 24698 }, { "epoch": 20.49709543568465, "grad_norm": 26.4096736907959, "learning_rate": 1.1805477178423238e-05, "loss": 0.4418, "step": 24699 }, { "epoch": 20.497925311203318, "grad_norm": 58.928001403808594, "learning_rate": 1.1805145228215768e-05, "loss": 0.8126, "step": 24700 }, { "epoch": 20.498755186721993, "grad_norm": 113.22323608398438, "learning_rate": 1.18048132780083e-05, "loss": 0.628, "step": 24701 }, { "epoch": 20.499585062240662, "grad_norm": 50.291011810302734, "learning_rate": 1.1804481327800832e-05, "loss": 1.0762, "step": 24702 }, { "epoch": 20.500414937759338, "grad_norm": 54.20252227783203, "learning_rate": 1.1804149377593361e-05, "loss": 0.8612, "step": 24703 }, { "epoch": 20.501244813278007, "grad_norm": 50.64624786376953, "learning_rate": 1.1803817427385893e-05, "loss": 0.4155, "step": 24704 }, { "epoch": 20.502074688796682, "grad_norm": 31.411556243896484, "learning_rate": 1.1803485477178425e-05, "loss": 0.3809, "step": 24705 }, { "epoch": 20.50290456431535, "grad_norm": 65.6673583984375, "learning_rate": 1.1803153526970956e-05, "loss": 0.7221, "step": 24706 }, { "epoch": 20.503734439834027, "grad_norm": 35.03654098510742, "learning_rate": 1.1802821576763486e-05, "loss": 0.3691, "step": 24707 }, { "epoch": 20.504564315352695, "grad_norm": 29.180255889892578, "learning_rate": 1.1802489626556018e-05, "loss": 0.6887, "step": 24708 }, { "epoch": 20.50539419087137, "grad_norm": 32.24498748779297, "learning_rate": 1.1802157676348549e-05, "loss": 0.3474, "step": 24709 }, { "epoch": 20.50622406639004, "grad_norm": 47.284950256347656, "learning_rate": 1.180182572614108e-05, "loss": 1.1521, "step": 24710 }, { "epoch": 20.507053941908715, "grad_norm": 21.364120483398438, "learning_rate": 1.180149377593361e-05, "loss": 0.5152, "step": 24711 }, { "epoch": 20.507883817427384, "grad_norm": 24.566877365112305, "learning_rate": 1.1801161825726142e-05, "loss": 0.5203, "step": 24712 }, { "epoch": 20.50871369294606, "grad_norm": 62.22105407714844, "learning_rate": 1.1800829875518674e-05, "loss": 0.4681, "step": 24713 }, { "epoch": 20.50954356846473, "grad_norm": 47.052371978759766, "learning_rate": 1.1800497925311206e-05, "loss": 0.4672, "step": 24714 }, { "epoch": 20.510373443983404, "grad_norm": 29.566308975219727, "learning_rate": 1.1800165975103734e-05, "loss": 0.5688, "step": 24715 }, { "epoch": 20.511203319502073, "grad_norm": 42.928443908691406, "learning_rate": 1.1799834024896267e-05, "loss": 0.3988, "step": 24716 }, { "epoch": 20.51203319502075, "grad_norm": 38.21375274658203, "learning_rate": 1.1799502074688797e-05, "loss": 0.7943, "step": 24717 }, { "epoch": 20.512863070539417, "grad_norm": 37.75648880004883, "learning_rate": 1.1799170124481329e-05, "loss": 0.3817, "step": 24718 }, { "epoch": 20.513692946058093, "grad_norm": 28.715761184692383, "learning_rate": 1.179883817427386e-05, "loss": 0.5697, "step": 24719 }, { "epoch": 20.51452282157676, "grad_norm": 69.38085174560547, "learning_rate": 1.179850622406639e-05, "loss": 0.9601, "step": 24720 }, { "epoch": 20.515352697095437, "grad_norm": 48.97321319580078, "learning_rate": 1.1798174273858922e-05, "loss": 0.4317, "step": 24721 }, { "epoch": 20.51618257261411, "grad_norm": 89.73519134521484, "learning_rate": 1.1797842323651454e-05, "loss": 0.4435, "step": 24722 }, { "epoch": 20.517012448132782, "grad_norm": 28.686519622802734, "learning_rate": 1.1797510373443983e-05, "loss": 0.386, "step": 24723 }, { "epoch": 20.517842323651454, "grad_norm": 23.89406394958496, "learning_rate": 1.1797178423236515e-05, "loss": 0.4199, "step": 24724 }, { "epoch": 20.518672199170126, "grad_norm": 45.91604232788086, "learning_rate": 1.1796846473029047e-05, "loss": 0.4804, "step": 24725 }, { "epoch": 20.5195020746888, "grad_norm": 83.05763244628906, "learning_rate": 1.1796514522821577e-05, "loss": 0.5016, "step": 24726 }, { "epoch": 20.52033195020747, "grad_norm": 31.29456901550293, "learning_rate": 1.179618257261411e-05, "loss": 0.4283, "step": 24727 }, { "epoch": 20.521161825726143, "grad_norm": 44.134368896484375, "learning_rate": 1.179585062240664e-05, "loss": 0.6526, "step": 24728 }, { "epoch": 20.521991701244815, "grad_norm": 44.25891876220703, "learning_rate": 1.179551867219917e-05, "loss": 0.6409, "step": 24729 }, { "epoch": 20.522821576763487, "grad_norm": 78.46599578857422, "learning_rate": 1.1795186721991703e-05, "loss": 0.8035, "step": 24730 }, { "epoch": 20.52365145228216, "grad_norm": 75.67050170898438, "learning_rate": 1.1794854771784235e-05, "loss": 1.0632, "step": 24731 }, { "epoch": 20.52448132780083, "grad_norm": 20.350805282592773, "learning_rate": 1.1794522821576763e-05, "loss": 0.4088, "step": 24732 }, { "epoch": 20.525311203319504, "grad_norm": 52.70756149291992, "learning_rate": 1.1794190871369295e-05, "loss": 0.6359, "step": 24733 }, { "epoch": 20.526141078838176, "grad_norm": 17.17171859741211, "learning_rate": 1.1793858921161828e-05, "loss": 0.3027, "step": 24734 }, { "epoch": 20.526970954356848, "grad_norm": 17.503154754638672, "learning_rate": 1.1793526970954358e-05, "loss": 0.3602, "step": 24735 }, { "epoch": 20.52780082987552, "grad_norm": 62.1400260925293, "learning_rate": 1.1793195020746888e-05, "loss": 0.5565, "step": 24736 }, { "epoch": 20.528630705394193, "grad_norm": 55.14841842651367, "learning_rate": 1.179286307053942e-05, "loss": 0.4042, "step": 24737 }, { "epoch": 20.529460580912865, "grad_norm": 93.12822723388672, "learning_rate": 1.1792531120331951e-05, "loss": 1.2015, "step": 24738 }, { "epoch": 20.530290456431537, "grad_norm": 30.404521942138672, "learning_rate": 1.1792199170124483e-05, "loss": 0.3889, "step": 24739 }, { "epoch": 20.53112033195021, "grad_norm": 15.63279914855957, "learning_rate": 1.1791867219917012e-05, "loss": 0.3138, "step": 24740 }, { "epoch": 20.53195020746888, "grad_norm": 34.64120101928711, "learning_rate": 1.1791535269709544e-05, "loss": 0.3356, "step": 24741 }, { "epoch": 20.532780082987554, "grad_norm": 24.3779296875, "learning_rate": 1.1791203319502076e-05, "loss": 0.3384, "step": 24742 }, { "epoch": 20.533609958506226, "grad_norm": 29.633747100830078, "learning_rate": 1.1790871369294608e-05, "loss": 0.3972, "step": 24743 }, { "epoch": 20.534439834024898, "grad_norm": 50.00566864013672, "learning_rate": 1.1790539419087137e-05, "loss": 0.6391, "step": 24744 }, { "epoch": 20.53526970954357, "grad_norm": 60.123680114746094, "learning_rate": 1.1790207468879669e-05, "loss": 0.7421, "step": 24745 }, { "epoch": 20.536099585062242, "grad_norm": 42.535221099853516, "learning_rate": 1.1789875518672201e-05, "loss": 1.0907, "step": 24746 }, { "epoch": 20.536929460580915, "grad_norm": 53.647850036621094, "learning_rate": 1.1789543568464731e-05, "loss": 0.5862, "step": 24747 }, { "epoch": 20.537759336099587, "grad_norm": 23.201091766357422, "learning_rate": 1.1789211618257262e-05, "loss": 0.3564, "step": 24748 }, { "epoch": 20.53858921161826, "grad_norm": 74.95140838623047, "learning_rate": 1.1788879668049792e-05, "loss": 0.4827, "step": 24749 }, { "epoch": 20.53941908713693, "grad_norm": 55.23887634277344, "learning_rate": 1.1788547717842324e-05, "loss": 0.7551, "step": 24750 }, { "epoch": 20.540248962655603, "grad_norm": 14.490676879882812, "learning_rate": 1.1788215767634856e-05, "loss": 0.3155, "step": 24751 }, { "epoch": 20.541078838174275, "grad_norm": 14.004182815551758, "learning_rate": 1.1787883817427389e-05, "loss": 0.2544, "step": 24752 }, { "epoch": 20.541908713692948, "grad_norm": 27.769113540649414, "learning_rate": 1.1787551867219917e-05, "loss": 0.5002, "step": 24753 }, { "epoch": 20.54273858921162, "grad_norm": 25.768917083740234, "learning_rate": 1.178721991701245e-05, "loss": 0.2951, "step": 24754 }, { "epoch": 20.543568464730292, "grad_norm": 38.03419876098633, "learning_rate": 1.1786887966804982e-05, "loss": 0.3421, "step": 24755 }, { "epoch": 20.544398340248964, "grad_norm": 26.43023109436035, "learning_rate": 1.1786556016597512e-05, "loss": 0.3911, "step": 24756 }, { "epoch": 20.545228215767636, "grad_norm": 47.02510452270508, "learning_rate": 1.1786224066390042e-05, "loss": 0.6515, "step": 24757 }, { "epoch": 20.54605809128631, "grad_norm": 15.374485969543457, "learning_rate": 1.1785892116182573e-05, "loss": 0.2522, "step": 24758 }, { "epoch": 20.54688796680498, "grad_norm": 100.88235473632812, "learning_rate": 1.1785560165975105e-05, "loss": 0.9547, "step": 24759 }, { "epoch": 20.547717842323653, "grad_norm": 114.41175842285156, "learning_rate": 1.1785228215767637e-05, "loss": 0.61, "step": 24760 }, { "epoch": 20.548547717842325, "grad_norm": 105.52751922607422, "learning_rate": 1.1784896265560166e-05, "loss": 0.8592, "step": 24761 }, { "epoch": 20.549377593360997, "grad_norm": 31.39963722229004, "learning_rate": 1.1784564315352698e-05, "loss": 0.5273, "step": 24762 }, { "epoch": 20.55020746887967, "grad_norm": 28.17359161376953, "learning_rate": 1.178423236514523e-05, "loss": 0.4218, "step": 24763 }, { "epoch": 20.551037344398342, "grad_norm": 44.975643157958984, "learning_rate": 1.178390041493776e-05, "loss": 0.5209, "step": 24764 }, { "epoch": 20.551867219917014, "grad_norm": 22.544681549072266, "learning_rate": 1.178356846473029e-05, "loss": 0.4812, "step": 24765 }, { "epoch": 20.552697095435686, "grad_norm": 36.28451919555664, "learning_rate": 1.1783236514522823e-05, "loss": 0.2942, "step": 24766 }, { "epoch": 20.55352697095436, "grad_norm": 35.25014114379883, "learning_rate": 1.1782904564315353e-05, "loss": 0.538, "step": 24767 }, { "epoch": 20.55435684647303, "grad_norm": 54.66896438598633, "learning_rate": 1.1782572614107885e-05, "loss": 0.9578, "step": 24768 }, { "epoch": 20.555186721991703, "grad_norm": 82.74639892578125, "learning_rate": 1.1782240663900416e-05, "loss": 0.4229, "step": 24769 }, { "epoch": 20.556016597510375, "grad_norm": 127.04065704345703, "learning_rate": 1.1781908713692946e-05, "loss": 0.7646, "step": 24770 }, { "epoch": 20.556846473029047, "grad_norm": 133.16909790039062, "learning_rate": 1.1781576763485478e-05, "loss": 0.5241, "step": 24771 }, { "epoch": 20.55767634854772, "grad_norm": 19.63822364807129, "learning_rate": 1.178124481327801e-05, "loss": 0.5123, "step": 24772 }, { "epoch": 20.55850622406639, "grad_norm": 26.047080993652344, "learning_rate": 1.1780912863070539e-05, "loss": 0.4932, "step": 24773 }, { "epoch": 20.559336099585064, "grad_norm": 101.03227996826172, "learning_rate": 1.1780580912863071e-05, "loss": 0.8316, "step": 24774 }, { "epoch": 20.560165975103736, "grad_norm": 20.45231819152832, "learning_rate": 1.1780248962655603e-05, "loss": 0.3199, "step": 24775 }, { "epoch": 20.560995850622408, "grad_norm": 64.64348602294922, "learning_rate": 1.1779917012448134e-05, "loss": 0.5624, "step": 24776 }, { "epoch": 20.56182572614108, "grad_norm": 59.459686279296875, "learning_rate": 1.1779585062240664e-05, "loss": 1.205, "step": 24777 }, { "epoch": 20.562655601659753, "grad_norm": 49.85456085205078, "learning_rate": 1.1779253112033196e-05, "loss": 0.7586, "step": 24778 }, { "epoch": 20.563485477178425, "grad_norm": 27.413602828979492, "learning_rate": 1.1778921161825727e-05, "loss": 0.4014, "step": 24779 }, { "epoch": 20.564315352697097, "grad_norm": 20.893163681030273, "learning_rate": 1.1778589211618259e-05, "loss": 0.3652, "step": 24780 }, { "epoch": 20.56514522821577, "grad_norm": 21.637699127197266, "learning_rate": 1.1778257261410791e-05, "loss": 0.4562, "step": 24781 }, { "epoch": 20.56597510373444, "grad_norm": 26.00843620300293, "learning_rate": 1.177792531120332e-05, "loss": 0.423, "step": 24782 }, { "epoch": 20.566804979253114, "grad_norm": 27.59236717224121, "learning_rate": 1.1777593360995852e-05, "loss": 0.4572, "step": 24783 }, { "epoch": 20.567634854771786, "grad_norm": 32.70541763305664, "learning_rate": 1.1777261410788384e-05, "loss": 0.4297, "step": 24784 }, { "epoch": 20.568464730290458, "grad_norm": 37.8974723815918, "learning_rate": 1.1776929460580914e-05, "loss": 0.3254, "step": 24785 }, { "epoch": 20.56929460580913, "grad_norm": 52.807037353515625, "learning_rate": 1.1776597510373445e-05, "loss": 0.6994, "step": 24786 }, { "epoch": 20.570124481327802, "grad_norm": 33.104896545410156, "learning_rate": 1.1776265560165975e-05, "loss": 0.4375, "step": 24787 }, { "epoch": 20.570954356846475, "grad_norm": 130.44667053222656, "learning_rate": 1.1775933609958507e-05, "loss": 0.6631, "step": 24788 }, { "epoch": 20.571784232365147, "grad_norm": 28.26507568359375, "learning_rate": 1.177560165975104e-05, "loss": 0.372, "step": 24789 }, { "epoch": 20.57261410788382, "grad_norm": 26.518457412719727, "learning_rate": 1.1775269709543568e-05, "loss": 0.5128, "step": 24790 }, { "epoch": 20.57344398340249, "grad_norm": 57.51908874511719, "learning_rate": 1.17749377593361e-05, "loss": 1.3996, "step": 24791 }, { "epoch": 20.574273858921163, "grad_norm": 85.30878448486328, "learning_rate": 1.1774605809128632e-05, "loss": 0.7555, "step": 24792 }, { "epoch": 20.575103734439836, "grad_norm": 7.128228664398193, "learning_rate": 1.1774273858921164e-05, "loss": 0.3385, "step": 24793 }, { "epoch": 20.575933609958508, "grad_norm": 75.3912582397461, "learning_rate": 1.1773941908713693e-05, "loss": 0.5745, "step": 24794 }, { "epoch": 20.57676348547718, "grad_norm": 33.8084716796875, "learning_rate": 1.1773609958506225e-05, "loss": 0.4358, "step": 24795 }, { "epoch": 20.577593360995852, "grad_norm": 101.24821472167969, "learning_rate": 1.1773278008298756e-05, "loss": 0.6693, "step": 24796 }, { "epoch": 20.578423236514524, "grad_norm": 9.248444557189941, "learning_rate": 1.1772946058091288e-05, "loss": 0.2285, "step": 24797 }, { "epoch": 20.579253112033197, "grad_norm": 22.932222366333008, "learning_rate": 1.1772614107883818e-05, "loss": 0.3333, "step": 24798 }, { "epoch": 20.58008298755187, "grad_norm": 23.462596893310547, "learning_rate": 1.1772282157676349e-05, "loss": 0.4599, "step": 24799 }, { "epoch": 20.58091286307054, "grad_norm": 48.82775115966797, "learning_rate": 1.177195020746888e-05, "loss": 0.7772, "step": 24800 }, { "epoch": 20.581742738589213, "grad_norm": 22.405485153198242, "learning_rate": 1.1771618257261413e-05, "loss": 0.5298, "step": 24801 }, { "epoch": 20.582572614107885, "grad_norm": 72.14303588867188, "learning_rate": 1.1771286307053941e-05, "loss": 0.7592, "step": 24802 }, { "epoch": 20.583402489626557, "grad_norm": 38.26287841796875, "learning_rate": 1.1770954356846474e-05, "loss": 0.2892, "step": 24803 }, { "epoch": 20.58423236514523, "grad_norm": 56.15603256225586, "learning_rate": 1.1770622406639006e-05, "loss": 0.924, "step": 24804 }, { "epoch": 20.585062240663902, "grad_norm": 54.172428131103516, "learning_rate": 1.1770290456431536e-05, "loss": 0.3811, "step": 24805 }, { "epoch": 20.585892116182574, "grad_norm": 75.0380859375, "learning_rate": 1.1769958506224068e-05, "loss": 0.4445, "step": 24806 }, { "epoch": 20.586721991701246, "grad_norm": 48.227291107177734, "learning_rate": 1.1769626556016599e-05, "loss": 0.5976, "step": 24807 }, { "epoch": 20.58755186721992, "grad_norm": 44.62492370605469, "learning_rate": 1.1769294605809129e-05, "loss": 0.7459, "step": 24808 }, { "epoch": 20.58838174273859, "grad_norm": 29.40734100341797, "learning_rate": 1.1768962655601661e-05, "loss": 0.3433, "step": 24809 }, { "epoch": 20.589211618257263, "grad_norm": 37.72382354736328, "learning_rate": 1.1768630705394193e-05, "loss": 0.3622, "step": 24810 }, { "epoch": 20.590041493775935, "grad_norm": 18.15859031677246, "learning_rate": 1.1768298755186722e-05, "loss": 0.362, "step": 24811 }, { "epoch": 20.590871369294607, "grad_norm": 17.073110580444336, "learning_rate": 1.1767966804979254e-05, "loss": 0.3328, "step": 24812 }, { "epoch": 20.59170124481328, "grad_norm": 44.39689254760742, "learning_rate": 1.1767634854771786e-05, "loss": 0.7915, "step": 24813 }, { "epoch": 20.59253112033195, "grad_norm": 17.8062801361084, "learning_rate": 1.1767302904564317e-05, "loss": 0.3039, "step": 24814 }, { "epoch": 20.593360995850624, "grad_norm": 30.173603057861328, "learning_rate": 1.1766970954356847e-05, "loss": 0.3307, "step": 24815 }, { "epoch": 20.594190871369296, "grad_norm": 102.44170379638672, "learning_rate": 1.1766639004149379e-05, "loss": 0.8444, "step": 24816 }, { "epoch": 20.59502074688797, "grad_norm": 16.348247528076172, "learning_rate": 1.176630705394191e-05, "loss": 0.283, "step": 24817 }, { "epoch": 20.59585062240664, "grad_norm": 49.16710662841797, "learning_rate": 1.1765975103734442e-05, "loss": 0.4355, "step": 24818 }, { "epoch": 20.596680497925313, "grad_norm": 65.85977172851562, "learning_rate": 1.176564315352697e-05, "loss": 0.8695, "step": 24819 }, { "epoch": 20.597510373443985, "grad_norm": 23.86747169494629, "learning_rate": 1.1765311203319502e-05, "loss": 0.3538, "step": 24820 }, { "epoch": 20.598340248962657, "grad_norm": 21.035255432128906, "learning_rate": 1.1764979253112035e-05, "loss": 0.374, "step": 24821 }, { "epoch": 20.59917012448133, "grad_norm": 39.297088623046875, "learning_rate": 1.1764647302904567e-05, "loss": 1.0457, "step": 24822 }, { "epoch": 20.6, "grad_norm": 43.65879440307617, "learning_rate": 1.1764315352697095e-05, "loss": 0.5643, "step": 24823 }, { "epoch": 20.600829875518674, "grad_norm": 27.984167098999023, "learning_rate": 1.1763983402489627e-05, "loss": 0.273, "step": 24824 }, { "epoch": 20.601659751037346, "grad_norm": 71.06204223632812, "learning_rate": 1.176365145228216e-05, "loss": 0.5377, "step": 24825 }, { "epoch": 20.602489626556018, "grad_norm": 16.183382034301758, "learning_rate": 1.176331950207469e-05, "loss": 0.2932, "step": 24826 }, { "epoch": 20.60331950207469, "grad_norm": 80.90831756591797, "learning_rate": 1.176298755186722e-05, "loss": 0.729, "step": 24827 }, { "epoch": 20.604149377593362, "grad_norm": 18.540502548217773, "learning_rate": 1.176265560165975e-05, "loss": 0.3361, "step": 24828 }, { "epoch": 20.604979253112035, "grad_norm": 82.89427185058594, "learning_rate": 1.1762323651452283e-05, "loss": 1.2071, "step": 24829 }, { "epoch": 20.605809128630707, "grad_norm": 24.124649047851562, "learning_rate": 1.1761991701244815e-05, "loss": 0.4148, "step": 24830 }, { "epoch": 20.60663900414938, "grad_norm": 29.4971866607666, "learning_rate": 1.1761659751037344e-05, "loss": 0.3495, "step": 24831 }, { "epoch": 20.60746887966805, "grad_norm": 41.08047866821289, "learning_rate": 1.1761327800829876e-05, "loss": 0.5797, "step": 24832 }, { "epoch": 20.608298755186723, "grad_norm": 35.01319122314453, "learning_rate": 1.1760995850622408e-05, "loss": 0.3957, "step": 24833 }, { "epoch": 20.609128630705396, "grad_norm": 33.67498779296875, "learning_rate": 1.1760663900414938e-05, "loss": 0.3847, "step": 24834 }, { "epoch": 20.609958506224068, "grad_norm": 96.52965545654297, "learning_rate": 1.176033195020747e-05, "loss": 0.9359, "step": 24835 }, { "epoch": 20.61078838174274, "grad_norm": 91.7390365600586, "learning_rate": 1.1760000000000001e-05, "loss": 0.8412, "step": 24836 }, { "epoch": 20.611618257261412, "grad_norm": 80.52766418457031, "learning_rate": 1.1759668049792531e-05, "loss": 0.4912, "step": 24837 }, { "epoch": 20.612448132780084, "grad_norm": 28.70573616027832, "learning_rate": 1.1759336099585063e-05, "loss": 0.5167, "step": 24838 }, { "epoch": 20.613278008298757, "grad_norm": 68.64107513427734, "learning_rate": 1.1759004149377596e-05, "loss": 0.9539, "step": 24839 }, { "epoch": 20.61410788381743, "grad_norm": 26.559816360473633, "learning_rate": 1.1758672199170124e-05, "loss": 0.425, "step": 24840 }, { "epoch": 20.6149377593361, "grad_norm": 152.17886352539062, "learning_rate": 1.1758340248962656e-05, "loss": 1.1055, "step": 24841 }, { "epoch": 20.615767634854773, "grad_norm": 49.58354568481445, "learning_rate": 1.1758008298755188e-05, "loss": 0.8822, "step": 24842 }, { "epoch": 20.616597510373445, "grad_norm": 49.92757797241211, "learning_rate": 1.1757676348547719e-05, "loss": 0.688, "step": 24843 }, { "epoch": 20.617427385892118, "grad_norm": 42.87976837158203, "learning_rate": 1.175734439834025e-05, "loss": 0.868, "step": 24844 }, { "epoch": 20.61825726141079, "grad_norm": 22.601329803466797, "learning_rate": 1.1757012448132781e-05, "loss": 0.377, "step": 24845 }, { "epoch": 20.619087136929462, "grad_norm": 36.74895095825195, "learning_rate": 1.1756680497925312e-05, "loss": 0.9798, "step": 24846 }, { "epoch": 20.619917012448134, "grad_norm": 37.6053352355957, "learning_rate": 1.1756348547717844e-05, "loss": 0.3501, "step": 24847 }, { "epoch": 20.620746887966806, "grad_norm": 118.45654296875, "learning_rate": 1.1756016597510374e-05, "loss": 0.6495, "step": 24848 }, { "epoch": 20.62157676348548, "grad_norm": 19.724929809570312, "learning_rate": 1.1755684647302905e-05, "loss": 0.4707, "step": 24849 }, { "epoch": 20.62240663900415, "grad_norm": 12.753202438354492, "learning_rate": 1.1755352697095437e-05, "loss": 0.3006, "step": 24850 }, { "epoch": 20.623236514522823, "grad_norm": 61.393558502197266, "learning_rate": 1.1755020746887969e-05, "loss": 1.4193, "step": 24851 }, { "epoch": 20.624066390041495, "grad_norm": 42.606300354003906, "learning_rate": 1.1754688796680498e-05, "loss": 0.4867, "step": 24852 }, { "epoch": 20.624896265560167, "grad_norm": 37.895145416259766, "learning_rate": 1.175435684647303e-05, "loss": 0.4573, "step": 24853 }, { "epoch": 20.62572614107884, "grad_norm": 21.278249740600586, "learning_rate": 1.1754024896265562e-05, "loss": 0.3651, "step": 24854 }, { "epoch": 20.62655601659751, "grad_norm": 32.918209075927734, "learning_rate": 1.1753692946058092e-05, "loss": 0.6261, "step": 24855 }, { "epoch": 20.627385892116184, "grad_norm": 37.87032699584961, "learning_rate": 1.1753360995850623e-05, "loss": 0.5712, "step": 24856 }, { "epoch": 20.628215767634856, "grad_norm": 23.737363815307617, "learning_rate": 1.1753029045643153e-05, "loss": 0.4166, "step": 24857 }, { "epoch": 20.62904564315353, "grad_norm": 24.626293182373047, "learning_rate": 1.1752697095435685e-05, "loss": 0.382, "step": 24858 }, { "epoch": 20.6298755186722, "grad_norm": 12.488759994506836, "learning_rate": 1.1752365145228217e-05, "loss": 0.3288, "step": 24859 }, { "epoch": 20.630705394190873, "grad_norm": 57.858882904052734, "learning_rate": 1.175203319502075e-05, "loss": 0.6974, "step": 24860 }, { "epoch": 20.631535269709545, "grad_norm": 39.345802307128906, "learning_rate": 1.1751701244813278e-05, "loss": 0.6645, "step": 24861 }, { "epoch": 20.632365145228217, "grad_norm": 91.29238891601562, "learning_rate": 1.175136929460581e-05, "loss": 0.622, "step": 24862 }, { "epoch": 20.63319502074689, "grad_norm": 26.431283950805664, "learning_rate": 1.1751037344398342e-05, "loss": 0.3137, "step": 24863 }, { "epoch": 20.63402489626556, "grad_norm": 63.323707580566406, "learning_rate": 1.1750705394190873e-05, "loss": 0.6747, "step": 24864 }, { "epoch": 20.634854771784234, "grad_norm": 22.016849517822266, "learning_rate": 1.1750373443983403e-05, "loss": 0.3843, "step": 24865 }, { "epoch": 20.635684647302906, "grad_norm": 73.01127624511719, "learning_rate": 1.1750041493775934e-05, "loss": 0.4475, "step": 24866 }, { "epoch": 20.636514522821578, "grad_norm": 37.51280212402344, "learning_rate": 1.1749709543568466e-05, "loss": 0.5198, "step": 24867 }, { "epoch": 20.63734439834025, "grad_norm": 47.78148651123047, "learning_rate": 1.1749377593360998e-05, "loss": 0.7142, "step": 24868 }, { "epoch": 20.638174273858922, "grad_norm": 36.42730712890625, "learning_rate": 1.1749045643153527e-05, "loss": 0.4008, "step": 24869 }, { "epoch": 20.639004149377595, "grad_norm": 42.41849899291992, "learning_rate": 1.1748713692946059e-05, "loss": 0.4895, "step": 24870 }, { "epoch": 20.639834024896267, "grad_norm": 94.16075897216797, "learning_rate": 1.174838174273859e-05, "loss": 0.8578, "step": 24871 }, { "epoch": 20.64066390041494, "grad_norm": 50.96706008911133, "learning_rate": 1.1748049792531123e-05, "loss": 0.5388, "step": 24872 }, { "epoch": 20.64149377593361, "grad_norm": 44.354068756103516, "learning_rate": 1.1747717842323652e-05, "loss": 0.6432, "step": 24873 }, { "epoch": 20.642323651452283, "grad_norm": 56.56925964355469, "learning_rate": 1.1747385892116184e-05, "loss": 0.4482, "step": 24874 }, { "epoch": 20.643153526970956, "grad_norm": 19.90665054321289, "learning_rate": 1.1747053941908714e-05, "loss": 0.3155, "step": 24875 }, { "epoch": 20.643983402489628, "grad_norm": 21.622737884521484, "learning_rate": 1.1746721991701246e-05, "loss": 0.3006, "step": 24876 }, { "epoch": 20.6448132780083, "grad_norm": 41.50525665283203, "learning_rate": 1.1746390041493777e-05, "loss": 0.468, "step": 24877 }, { "epoch": 20.645643153526972, "grad_norm": 54.85870361328125, "learning_rate": 1.1746058091286307e-05, "loss": 0.4515, "step": 24878 }, { "epoch": 20.646473029045644, "grad_norm": 24.226093292236328, "learning_rate": 1.174572614107884e-05, "loss": 0.573, "step": 24879 }, { "epoch": 20.647302904564317, "grad_norm": 133.86663818359375, "learning_rate": 1.1745394190871371e-05, "loss": 0.4441, "step": 24880 }, { "epoch": 20.64813278008299, "grad_norm": 30.83577537536621, "learning_rate": 1.17450622406639e-05, "loss": 0.3333, "step": 24881 }, { "epoch": 20.64896265560166, "grad_norm": 44.191490173339844, "learning_rate": 1.1744730290456432e-05, "loss": 0.4831, "step": 24882 }, { "epoch": 20.649792531120333, "grad_norm": 13.99752426147461, "learning_rate": 1.1744398340248964e-05, "loss": 0.2801, "step": 24883 }, { "epoch": 20.650622406639005, "grad_norm": 24.287492752075195, "learning_rate": 1.1744066390041495e-05, "loss": 0.505, "step": 24884 }, { "epoch": 20.651452282157678, "grad_norm": 11.536199569702148, "learning_rate": 1.1743734439834027e-05, "loss": 0.3337, "step": 24885 }, { "epoch": 20.65228215767635, "grad_norm": 180.758056640625, "learning_rate": 1.1743402489626557e-05, "loss": 0.7108, "step": 24886 }, { "epoch": 20.653112033195022, "grad_norm": 51.45026779174805, "learning_rate": 1.1743070539419088e-05, "loss": 0.5868, "step": 24887 }, { "epoch": 20.653941908713694, "grad_norm": 14.4135160446167, "learning_rate": 1.174273858921162e-05, "loss": 0.3598, "step": 24888 }, { "epoch": 20.654771784232366, "grad_norm": 68.3906021118164, "learning_rate": 1.1742406639004152e-05, "loss": 1.125, "step": 24889 }, { "epoch": 20.65560165975104, "grad_norm": 69.49650573730469, "learning_rate": 1.174207468879668e-05, "loss": 0.7204, "step": 24890 }, { "epoch": 20.65643153526971, "grad_norm": 24.19615364074707, "learning_rate": 1.1741742738589213e-05, "loss": 0.4212, "step": 24891 }, { "epoch": 20.657261410788383, "grad_norm": 36.58893966674805, "learning_rate": 1.1741410788381745e-05, "loss": 0.4055, "step": 24892 }, { "epoch": 20.658091286307055, "grad_norm": 47.39741516113281, "learning_rate": 1.1741078838174275e-05, "loss": 0.3822, "step": 24893 }, { "epoch": 20.658921161825727, "grad_norm": 93.55802154541016, "learning_rate": 1.1740746887966806e-05, "loss": 0.4065, "step": 24894 }, { "epoch": 20.6597510373444, "grad_norm": 127.26280212402344, "learning_rate": 1.1740414937759338e-05, "loss": 0.9785, "step": 24895 }, { "epoch": 20.66058091286307, "grad_norm": 83.7123031616211, "learning_rate": 1.1740082987551868e-05, "loss": 0.6307, "step": 24896 }, { "epoch": 20.661410788381744, "grad_norm": 24.903635025024414, "learning_rate": 1.17397510373444e-05, "loss": 0.4007, "step": 24897 }, { "epoch": 20.662240663900416, "grad_norm": 45.222511291503906, "learning_rate": 1.1739419087136929e-05, "loss": 0.8947, "step": 24898 }, { "epoch": 20.66307053941909, "grad_norm": 30.179859161376953, "learning_rate": 1.1739087136929461e-05, "loss": 0.396, "step": 24899 }, { "epoch": 20.66390041493776, "grad_norm": 68.0692367553711, "learning_rate": 1.1738755186721993e-05, "loss": 0.8204, "step": 24900 }, { "epoch": 20.664730290456433, "grad_norm": 15.57373046875, "learning_rate": 1.1738423236514525e-05, "loss": 0.3696, "step": 24901 }, { "epoch": 20.665560165975105, "grad_norm": 31.14904022216797, "learning_rate": 1.1738091286307054e-05, "loss": 0.3448, "step": 24902 }, { "epoch": 20.666390041493777, "grad_norm": 39.7573356628418, "learning_rate": 1.1737759336099586e-05, "loss": 0.4662, "step": 24903 }, { "epoch": 20.66721991701245, "grad_norm": 25.736665725708008, "learning_rate": 1.1737427385892116e-05, "loss": 0.4624, "step": 24904 }, { "epoch": 20.66804979253112, "grad_norm": 77.87406921386719, "learning_rate": 1.1737095435684649e-05, "loss": 0.4287, "step": 24905 }, { "epoch": 20.668879668049794, "grad_norm": 38.70598602294922, "learning_rate": 1.1736763485477179e-05, "loss": 1.0461, "step": 24906 }, { "epoch": 20.669709543568466, "grad_norm": 38.21788787841797, "learning_rate": 1.173643153526971e-05, "loss": 0.2854, "step": 24907 }, { "epoch": 20.670539419087138, "grad_norm": 35.02672576904297, "learning_rate": 1.1736099585062242e-05, "loss": 0.3266, "step": 24908 }, { "epoch": 20.67136929460581, "grad_norm": 40.5175895690918, "learning_rate": 1.1735767634854774e-05, "loss": 0.9442, "step": 24909 }, { "epoch": 20.672199170124482, "grad_norm": 53.53308868408203, "learning_rate": 1.1735435684647302e-05, "loss": 0.6484, "step": 24910 }, { "epoch": 20.673029045643155, "grad_norm": 41.32924270629883, "learning_rate": 1.1735103734439834e-05, "loss": 0.698, "step": 24911 }, { "epoch": 20.673858921161827, "grad_norm": 13.493617057800293, "learning_rate": 1.1734771784232367e-05, "loss": 0.3583, "step": 24912 }, { "epoch": 20.6746887966805, "grad_norm": 30.30019187927246, "learning_rate": 1.1734439834024897e-05, "loss": 1.0849, "step": 24913 }, { "epoch": 20.67551867219917, "grad_norm": 63.43657684326172, "learning_rate": 1.1734107883817429e-05, "loss": 0.5415, "step": 24914 }, { "epoch": 20.676348547717843, "grad_norm": 31.70017433166504, "learning_rate": 1.173377593360996e-05, "loss": 0.3632, "step": 24915 }, { "epoch": 20.677178423236516, "grad_norm": 39.23539733886719, "learning_rate": 1.173344398340249e-05, "loss": 0.5554, "step": 24916 }, { "epoch": 20.678008298755188, "grad_norm": 24.8907470703125, "learning_rate": 1.1733112033195022e-05, "loss": 0.3497, "step": 24917 }, { "epoch": 20.67883817427386, "grad_norm": 21.205280303955078, "learning_rate": 1.1732780082987554e-05, "loss": 0.353, "step": 24918 }, { "epoch": 20.679668049792532, "grad_norm": 28.978126525878906, "learning_rate": 1.1732448132780083e-05, "loss": 0.3175, "step": 24919 }, { "epoch": 20.680497925311204, "grad_norm": 43.16628646850586, "learning_rate": 1.1732116182572615e-05, "loss": 0.6855, "step": 24920 }, { "epoch": 20.681327800829877, "grad_norm": 133.1629638671875, "learning_rate": 1.1731784232365147e-05, "loss": 0.8605, "step": 24921 }, { "epoch": 20.68215767634855, "grad_norm": 34.132049560546875, "learning_rate": 1.1731452282157677e-05, "loss": 0.4946, "step": 24922 }, { "epoch": 20.68298755186722, "grad_norm": 33.12385940551758, "learning_rate": 1.1731120331950208e-05, "loss": 0.6534, "step": 24923 }, { "epoch": 20.683817427385893, "grad_norm": 78.18758392333984, "learning_rate": 1.173078838174274e-05, "loss": 0.576, "step": 24924 }, { "epoch": 20.684647302904565, "grad_norm": 24.79116439819336, "learning_rate": 1.173045643153527e-05, "loss": 0.4614, "step": 24925 }, { "epoch": 20.685477178423238, "grad_norm": 17.657398223876953, "learning_rate": 1.1730124481327803e-05, "loss": 0.2966, "step": 24926 }, { "epoch": 20.68630705394191, "grad_norm": 87.75232696533203, "learning_rate": 1.1729792531120331e-05, "loss": 0.7593, "step": 24927 }, { "epoch": 20.687136929460582, "grad_norm": 10.608316421508789, "learning_rate": 1.1729460580912863e-05, "loss": 0.3624, "step": 24928 }, { "epoch": 20.687966804979254, "grad_norm": 25.601613998413086, "learning_rate": 1.1729128630705395e-05, "loss": 0.5318, "step": 24929 }, { "epoch": 20.688796680497926, "grad_norm": 48.820777893066406, "learning_rate": 1.1728796680497928e-05, "loss": 0.4102, "step": 24930 }, { "epoch": 20.6896265560166, "grad_norm": 178.52671813964844, "learning_rate": 1.1728464730290456e-05, "loss": 2.1831, "step": 24931 }, { "epoch": 20.69045643153527, "grad_norm": 30.640758514404297, "learning_rate": 1.1728132780082988e-05, "loss": 0.6585, "step": 24932 }, { "epoch": 20.691286307053943, "grad_norm": 38.709712982177734, "learning_rate": 1.172780082987552e-05, "loss": 0.3931, "step": 24933 }, { "epoch": 20.692116182572615, "grad_norm": 64.6128921508789, "learning_rate": 1.1727468879668051e-05, "loss": 0.8701, "step": 24934 }, { "epoch": 20.692946058091287, "grad_norm": 30.653636932373047, "learning_rate": 1.1727136929460581e-05, "loss": 0.5122, "step": 24935 }, { "epoch": 20.69377593360996, "grad_norm": 24.618101119995117, "learning_rate": 1.1726804979253112e-05, "loss": 0.3296, "step": 24936 }, { "epoch": 20.694605809128632, "grad_norm": 38.529109954833984, "learning_rate": 1.1726473029045644e-05, "loss": 0.496, "step": 24937 }, { "epoch": 20.695435684647304, "grad_norm": 13.252232551574707, "learning_rate": 1.1726141078838176e-05, "loss": 0.2497, "step": 24938 }, { "epoch": 20.696265560165976, "grad_norm": 42.927391052246094, "learning_rate": 1.1725809128630708e-05, "loss": 0.5361, "step": 24939 }, { "epoch": 20.69709543568465, "grad_norm": 54.2990837097168, "learning_rate": 1.1725477178423237e-05, "loss": 0.904, "step": 24940 }, { "epoch": 20.69792531120332, "grad_norm": 37.66452407836914, "learning_rate": 1.1725145228215769e-05, "loss": 0.4765, "step": 24941 }, { "epoch": 20.698755186721993, "grad_norm": 30.01953125, "learning_rate": 1.1724813278008301e-05, "loss": 0.2667, "step": 24942 }, { "epoch": 20.699585062240665, "grad_norm": 26.373701095581055, "learning_rate": 1.1724481327800831e-05, "loss": 0.3444, "step": 24943 }, { "epoch": 20.700414937759337, "grad_norm": 51.783241271972656, "learning_rate": 1.1724149377593362e-05, "loss": 0.6417, "step": 24944 }, { "epoch": 20.70124481327801, "grad_norm": 25.160226821899414, "learning_rate": 1.1723817427385892e-05, "loss": 0.3154, "step": 24945 }, { "epoch": 20.70207468879668, "grad_norm": 61.43998336791992, "learning_rate": 1.1723485477178424e-05, "loss": 0.7035, "step": 24946 }, { "epoch": 20.702904564315354, "grad_norm": 32.67695236206055, "learning_rate": 1.1723153526970956e-05, "loss": 0.3783, "step": 24947 }, { "epoch": 20.703734439834026, "grad_norm": 37.25923156738281, "learning_rate": 1.1722821576763485e-05, "loss": 0.9583, "step": 24948 }, { "epoch": 20.704564315352698, "grad_norm": 69.7783432006836, "learning_rate": 1.1722489626556017e-05, "loss": 0.9369, "step": 24949 }, { "epoch": 20.70539419087137, "grad_norm": 36.84342956542969, "learning_rate": 1.172215767634855e-05, "loss": 0.3282, "step": 24950 }, { "epoch": 20.706224066390043, "grad_norm": 62.14891052246094, "learning_rate": 1.172182572614108e-05, "loss": 0.7155, "step": 24951 }, { "epoch": 20.707053941908715, "grad_norm": 63.53789138793945, "learning_rate": 1.172149377593361e-05, "loss": 0.9222, "step": 24952 }, { "epoch": 20.707883817427387, "grad_norm": 69.99006652832031, "learning_rate": 1.1721161825726142e-05, "loss": 0.944, "step": 24953 }, { "epoch": 20.70871369294606, "grad_norm": 23.58104133605957, "learning_rate": 1.1720829875518673e-05, "loss": 0.3322, "step": 24954 }, { "epoch": 20.70954356846473, "grad_norm": 32.88851547241211, "learning_rate": 1.1720497925311205e-05, "loss": 0.6685, "step": 24955 }, { "epoch": 20.710373443983404, "grad_norm": 22.735485076904297, "learning_rate": 1.1720165975103735e-05, "loss": 0.3502, "step": 24956 }, { "epoch": 20.711203319502076, "grad_norm": 35.84616470336914, "learning_rate": 1.1719834024896266e-05, "loss": 0.5816, "step": 24957 }, { "epoch": 20.712033195020748, "grad_norm": 23.860715866088867, "learning_rate": 1.1719502074688798e-05, "loss": 0.5224, "step": 24958 }, { "epoch": 20.71286307053942, "grad_norm": 35.65192794799805, "learning_rate": 1.171917012448133e-05, "loss": 0.4307, "step": 24959 }, { "epoch": 20.713692946058092, "grad_norm": 43.924766540527344, "learning_rate": 1.1718838174273859e-05, "loss": 0.9845, "step": 24960 }, { "epoch": 20.714522821576764, "grad_norm": 35.211097717285156, "learning_rate": 1.171850622406639e-05, "loss": 0.3638, "step": 24961 }, { "epoch": 20.715352697095437, "grad_norm": 31.392580032348633, "learning_rate": 1.1718174273858923e-05, "loss": 0.8741, "step": 24962 }, { "epoch": 20.71618257261411, "grad_norm": 25.41290283203125, "learning_rate": 1.1717842323651453e-05, "loss": 0.2871, "step": 24963 }, { "epoch": 20.71701244813278, "grad_norm": 144.90115356445312, "learning_rate": 1.1717510373443985e-05, "loss": 0.4645, "step": 24964 }, { "epoch": 20.717842323651453, "grad_norm": 21.631078720092773, "learning_rate": 1.1717178423236514e-05, "loss": 0.4474, "step": 24965 }, { "epoch": 20.718672199170125, "grad_norm": 29.146665573120117, "learning_rate": 1.1716846473029046e-05, "loss": 0.4693, "step": 24966 }, { "epoch": 20.719502074688798, "grad_norm": 14.548932075500488, "learning_rate": 1.1716514522821578e-05, "loss": 0.2707, "step": 24967 }, { "epoch": 20.72033195020747, "grad_norm": 81.8696060180664, "learning_rate": 1.171618257261411e-05, "loss": 0.875, "step": 24968 }, { "epoch": 20.721161825726142, "grad_norm": 35.70653533935547, "learning_rate": 1.1715850622406639e-05, "loss": 0.7148, "step": 24969 }, { "epoch": 20.721991701244814, "grad_norm": 79.56747436523438, "learning_rate": 1.1715518672199171e-05, "loss": 0.9115, "step": 24970 }, { "epoch": 20.722821576763486, "grad_norm": 38.670223236083984, "learning_rate": 1.1715186721991703e-05, "loss": 0.9156, "step": 24971 }, { "epoch": 20.72365145228216, "grad_norm": 22.30095863342285, "learning_rate": 1.1714854771784234e-05, "loss": 0.7878, "step": 24972 }, { "epoch": 20.72448132780083, "grad_norm": 43.187191009521484, "learning_rate": 1.1714522821576764e-05, "loss": 0.66, "step": 24973 }, { "epoch": 20.725311203319503, "grad_norm": 33.273414611816406, "learning_rate": 1.1714190871369295e-05, "loss": 0.8001, "step": 24974 }, { "epoch": 20.726141078838175, "grad_norm": 82.20062255859375, "learning_rate": 1.1713858921161827e-05, "loss": 0.798, "step": 24975 }, { "epoch": 20.726970954356847, "grad_norm": 60.444114685058594, "learning_rate": 1.1713526970954359e-05, "loss": 0.7925, "step": 24976 }, { "epoch": 20.72780082987552, "grad_norm": 60.968658447265625, "learning_rate": 1.1713195020746887e-05, "loss": 0.4772, "step": 24977 }, { "epoch": 20.728630705394192, "grad_norm": 90.5749740600586, "learning_rate": 1.171286307053942e-05, "loss": 0.5922, "step": 24978 }, { "epoch": 20.729460580912864, "grad_norm": 35.514957427978516, "learning_rate": 1.1712531120331952e-05, "loss": 0.4572, "step": 24979 }, { "epoch": 20.730290456431536, "grad_norm": 11.359467506408691, "learning_rate": 1.1712199170124484e-05, "loss": 0.2398, "step": 24980 }, { "epoch": 20.73112033195021, "grad_norm": 56.16737365722656, "learning_rate": 1.1711867219917013e-05, "loss": 0.6688, "step": 24981 }, { "epoch": 20.73195020746888, "grad_norm": 55.200218200683594, "learning_rate": 1.1711535269709545e-05, "loss": 0.4964, "step": 24982 }, { "epoch": 20.732780082987553, "grad_norm": 29.72111701965332, "learning_rate": 1.1711203319502075e-05, "loss": 0.6297, "step": 24983 }, { "epoch": 20.733609958506225, "grad_norm": 60.411460876464844, "learning_rate": 1.1710871369294607e-05, "loss": 0.8056, "step": 24984 }, { "epoch": 20.734439834024897, "grad_norm": 28.494352340698242, "learning_rate": 1.1710539419087138e-05, "loss": 0.5299, "step": 24985 }, { "epoch": 20.73526970954357, "grad_norm": 25.07181739807129, "learning_rate": 1.1710207468879668e-05, "loss": 0.584, "step": 24986 }, { "epoch": 20.73609958506224, "grad_norm": 86.55892181396484, "learning_rate": 1.17098755186722e-05, "loss": 0.5053, "step": 24987 }, { "epoch": 20.736929460580914, "grad_norm": 25.343338012695312, "learning_rate": 1.1709543568464732e-05, "loss": 0.2591, "step": 24988 }, { "epoch": 20.737759336099586, "grad_norm": 13.43091869354248, "learning_rate": 1.1709211618257261e-05, "loss": 0.197, "step": 24989 }, { "epoch": 20.738589211618258, "grad_norm": 49.099632263183594, "learning_rate": 1.1708879668049793e-05, "loss": 0.8141, "step": 24990 }, { "epoch": 20.73941908713693, "grad_norm": 38.4443359375, "learning_rate": 1.1708547717842325e-05, "loss": 0.7325, "step": 24991 }, { "epoch": 20.740248962655603, "grad_norm": 45.855655670166016, "learning_rate": 1.1708215767634856e-05, "loss": 0.8932, "step": 24992 }, { "epoch": 20.741078838174275, "grad_norm": 45.06761169433594, "learning_rate": 1.1707883817427388e-05, "loss": 0.8129, "step": 24993 }, { "epoch": 20.741908713692947, "grad_norm": 37.15732192993164, "learning_rate": 1.1707551867219918e-05, "loss": 0.7449, "step": 24994 }, { "epoch": 20.74273858921162, "grad_norm": 52.60761260986328, "learning_rate": 1.1707219917012448e-05, "loss": 0.4805, "step": 24995 }, { "epoch": 20.74356846473029, "grad_norm": 17.041860580444336, "learning_rate": 1.170688796680498e-05, "loss": 0.4219, "step": 24996 }, { "epoch": 20.744398340248964, "grad_norm": 55.620548248291016, "learning_rate": 1.1706556016597513e-05, "loss": 0.4504, "step": 24997 }, { "epoch": 20.745228215767636, "grad_norm": 36.23405456542969, "learning_rate": 1.1706224066390041e-05, "loss": 0.4925, "step": 24998 }, { "epoch": 20.746058091286308, "grad_norm": 38.489898681640625, "learning_rate": 1.1705892116182574e-05, "loss": 0.4276, "step": 24999 }, { "epoch": 20.74688796680498, "grad_norm": 35.03873062133789, "learning_rate": 1.1705560165975106e-05, "loss": 0.4589, "step": 25000 }, { "epoch": 20.747717842323652, "grad_norm": 46.38803482055664, "learning_rate": 1.1705228215767636e-05, "loss": 0.5945, "step": 25001 }, { "epoch": 20.748547717842325, "grad_norm": 49.39891815185547, "learning_rate": 1.1704896265560166e-05, "loss": 0.9322, "step": 25002 }, { "epoch": 20.749377593360997, "grad_norm": 67.45137786865234, "learning_rate": 1.1704564315352699e-05, "loss": 0.8141, "step": 25003 }, { "epoch": 20.75020746887967, "grad_norm": 38.85932159423828, "learning_rate": 1.1704232365145229e-05, "loss": 0.8218, "step": 25004 }, { "epoch": 20.75103734439834, "grad_norm": 80.6930160522461, "learning_rate": 1.1703900414937761e-05, "loss": 0.9295, "step": 25005 }, { "epoch": 20.751867219917013, "grad_norm": 64.17060852050781, "learning_rate": 1.170356846473029e-05, "loss": 0.5025, "step": 25006 }, { "epoch": 20.752697095435686, "grad_norm": 113.9051742553711, "learning_rate": 1.1703236514522822e-05, "loss": 0.6805, "step": 25007 }, { "epoch": 20.753526970954358, "grad_norm": 76.17259216308594, "learning_rate": 1.1702904564315354e-05, "loss": 0.4268, "step": 25008 }, { "epoch": 20.75435684647303, "grad_norm": 70.63532257080078, "learning_rate": 1.1702572614107886e-05, "loss": 0.2974, "step": 25009 }, { "epoch": 20.755186721991702, "grad_norm": 62.36946105957031, "learning_rate": 1.1702240663900415e-05, "loss": 0.6728, "step": 25010 }, { "epoch": 20.756016597510374, "grad_norm": 72.21141052246094, "learning_rate": 1.1701908713692947e-05, "loss": 0.7038, "step": 25011 }, { "epoch": 20.756846473029047, "grad_norm": 48.028594970703125, "learning_rate": 1.1701576763485479e-05, "loss": 0.4993, "step": 25012 }, { "epoch": 20.75767634854772, "grad_norm": 34.68073654174805, "learning_rate": 1.170124481327801e-05, "loss": 0.8321, "step": 25013 }, { "epoch": 20.75850622406639, "grad_norm": 45.7408447265625, "learning_rate": 1.170091286307054e-05, "loss": 0.6011, "step": 25014 }, { "epoch": 20.759336099585063, "grad_norm": 48.67662811279297, "learning_rate": 1.170058091286307e-05, "loss": 0.6127, "step": 25015 }, { "epoch": 20.760165975103735, "grad_norm": 30.952194213867188, "learning_rate": 1.1700248962655602e-05, "loss": 0.7531, "step": 25016 }, { "epoch": 20.760995850622407, "grad_norm": 39.883853912353516, "learning_rate": 1.1699917012448135e-05, "loss": 0.5346, "step": 25017 }, { "epoch": 20.76182572614108, "grad_norm": 65.1518325805664, "learning_rate": 1.1699585062240667e-05, "loss": 0.4948, "step": 25018 }, { "epoch": 20.762655601659752, "grad_norm": 101.40482330322266, "learning_rate": 1.1699253112033195e-05, "loss": 0.9066, "step": 25019 }, { "epoch": 20.763485477178424, "grad_norm": 54.85114288330078, "learning_rate": 1.1698921161825727e-05, "loss": 0.5041, "step": 25020 }, { "epoch": 20.764315352697096, "grad_norm": 28.11196517944336, "learning_rate": 1.1698589211618258e-05, "loss": 0.3598, "step": 25021 }, { "epoch": 20.76514522821577, "grad_norm": 56.08853530883789, "learning_rate": 1.169825726141079e-05, "loss": 0.9938, "step": 25022 }, { "epoch": 20.76597510373444, "grad_norm": 37.476409912109375, "learning_rate": 1.169792531120332e-05, "loss": 0.434, "step": 25023 }, { "epoch": 20.766804979253113, "grad_norm": 100.81982421875, "learning_rate": 1.169759336099585e-05, "loss": 0.6792, "step": 25024 }, { "epoch": 20.767634854771785, "grad_norm": 69.73470306396484, "learning_rate": 1.1697261410788383e-05, "loss": 0.5384, "step": 25025 }, { "epoch": 20.768464730290457, "grad_norm": 37.523372650146484, "learning_rate": 1.1696929460580915e-05, "loss": 0.4328, "step": 25026 }, { "epoch": 20.76929460580913, "grad_norm": 59.361663818359375, "learning_rate": 1.1696597510373444e-05, "loss": 0.874, "step": 25027 }, { "epoch": 20.7701244813278, "grad_norm": 69.27915954589844, "learning_rate": 1.1696265560165976e-05, "loss": 0.746, "step": 25028 }, { "epoch": 20.770954356846474, "grad_norm": 43.352752685546875, "learning_rate": 1.1695933609958508e-05, "loss": 0.4884, "step": 25029 }, { "epoch": 20.771784232365146, "grad_norm": 33.834720611572266, "learning_rate": 1.1695601659751038e-05, "loss": 0.6095, "step": 25030 }, { "epoch": 20.77261410788382, "grad_norm": 57.54338455200195, "learning_rate": 1.1695269709543569e-05, "loss": 0.4705, "step": 25031 }, { "epoch": 20.77344398340249, "grad_norm": 82.03052520751953, "learning_rate": 1.1694937759336101e-05, "loss": 0.9318, "step": 25032 }, { "epoch": 20.774273858921163, "grad_norm": 58.76000213623047, "learning_rate": 1.1694605809128631e-05, "loss": 0.5039, "step": 25033 }, { "epoch": 20.775103734439835, "grad_norm": 76.160888671875, "learning_rate": 1.1694273858921163e-05, "loss": 0.9303, "step": 25034 }, { "epoch": 20.775933609958507, "grad_norm": 92.8222885131836, "learning_rate": 1.1693941908713692e-05, "loss": 1.0446, "step": 25035 }, { "epoch": 20.77676348547718, "grad_norm": 51.16263961791992, "learning_rate": 1.1693609958506224e-05, "loss": 0.8766, "step": 25036 }, { "epoch": 20.77759336099585, "grad_norm": 16.85626220703125, "learning_rate": 1.1693278008298756e-05, "loss": 0.3602, "step": 25037 }, { "epoch": 20.778423236514524, "grad_norm": 37.174354553222656, "learning_rate": 1.1692946058091288e-05, "loss": 0.7188, "step": 25038 }, { "epoch": 20.779253112033196, "grad_norm": 25.59253692626953, "learning_rate": 1.1692614107883817e-05, "loss": 0.4554, "step": 25039 }, { "epoch": 20.780082987551868, "grad_norm": 46.23347473144531, "learning_rate": 1.169228215767635e-05, "loss": 1.0263, "step": 25040 }, { "epoch": 20.78091286307054, "grad_norm": 22.44211196899414, "learning_rate": 1.1691950207468881e-05, "loss": 0.5756, "step": 25041 }, { "epoch": 20.781742738589212, "grad_norm": 67.59430694580078, "learning_rate": 1.1691618257261412e-05, "loss": 1.4161, "step": 25042 }, { "epoch": 20.782572614107885, "grad_norm": 10.528923988342285, "learning_rate": 1.1691286307053944e-05, "loss": 0.2166, "step": 25043 }, { "epoch": 20.783402489626557, "grad_norm": 12.96828556060791, "learning_rate": 1.1690954356846473e-05, "loss": 0.2249, "step": 25044 }, { "epoch": 20.78423236514523, "grad_norm": 29.564157485961914, "learning_rate": 1.1690622406639005e-05, "loss": 0.7255, "step": 25045 }, { "epoch": 20.7850622406639, "grad_norm": 29.647783279418945, "learning_rate": 1.1690290456431537e-05, "loss": 0.4197, "step": 25046 }, { "epoch": 20.785892116182573, "grad_norm": 13.01944351196289, "learning_rate": 1.1689958506224069e-05, "loss": 0.3161, "step": 25047 }, { "epoch": 20.786721991701246, "grad_norm": 51.23183822631836, "learning_rate": 1.1689626556016598e-05, "loss": 0.5605, "step": 25048 }, { "epoch": 20.787551867219918, "grad_norm": 58.76129150390625, "learning_rate": 1.168929460580913e-05, "loss": 1.0126, "step": 25049 }, { "epoch": 20.78838174273859, "grad_norm": 66.97559356689453, "learning_rate": 1.1688962655601662e-05, "loss": 0.6001, "step": 25050 }, { "epoch": 20.789211618257262, "grad_norm": 63.443878173828125, "learning_rate": 1.1688630705394192e-05, "loss": 0.4865, "step": 25051 }, { "epoch": 20.790041493775934, "grad_norm": 52.273433685302734, "learning_rate": 1.1688298755186723e-05, "loss": 0.5159, "step": 25052 }, { "epoch": 20.790871369294607, "grad_norm": 60.25814437866211, "learning_rate": 1.1687966804979253e-05, "loss": 0.586, "step": 25053 }, { "epoch": 20.79170124481328, "grad_norm": 40.82767868041992, "learning_rate": 1.1687634854771785e-05, "loss": 0.6751, "step": 25054 }, { "epoch": 20.79253112033195, "grad_norm": 30.012746810913086, "learning_rate": 1.1687302904564317e-05, "loss": 0.4385, "step": 25055 }, { "epoch": 20.793360995850623, "grad_norm": 28.363725662231445, "learning_rate": 1.1686970954356846e-05, "loss": 0.5799, "step": 25056 }, { "epoch": 20.794190871369295, "grad_norm": 28.285043716430664, "learning_rate": 1.1686639004149378e-05, "loss": 0.3277, "step": 25057 }, { "epoch": 20.795020746887968, "grad_norm": 31.378360748291016, "learning_rate": 1.168630705394191e-05, "loss": 0.4458, "step": 25058 }, { "epoch": 20.79585062240664, "grad_norm": 24.383094787597656, "learning_rate": 1.1685975103734442e-05, "loss": 0.652, "step": 25059 }, { "epoch": 20.796680497925312, "grad_norm": 104.06726837158203, "learning_rate": 1.1685643153526971e-05, "loss": 1.1075, "step": 25060 }, { "epoch": 20.797510373443984, "grad_norm": 29.290206909179688, "learning_rate": 1.1685311203319503e-05, "loss": 0.7644, "step": 25061 }, { "epoch": 20.798340248962656, "grad_norm": 23.52834129333496, "learning_rate": 1.1684979253112034e-05, "loss": 0.2862, "step": 25062 }, { "epoch": 20.79917012448133, "grad_norm": 57.40923309326172, "learning_rate": 1.1684647302904566e-05, "loss": 1.0097, "step": 25063 }, { "epoch": 20.8, "grad_norm": 55.74185562133789, "learning_rate": 1.1684315352697096e-05, "loss": 0.4269, "step": 25064 }, { "epoch": 20.800829875518673, "grad_norm": 130.60704040527344, "learning_rate": 1.1683983402489627e-05, "loss": 0.3608, "step": 25065 }, { "epoch": 20.801659751037345, "grad_norm": 44.7193489074707, "learning_rate": 1.1683651452282159e-05, "loss": 0.49, "step": 25066 }, { "epoch": 20.802489626556017, "grad_norm": 45.589176177978516, "learning_rate": 1.168331950207469e-05, "loss": 0.6509, "step": 25067 }, { "epoch": 20.80331950207469, "grad_norm": 24.473344802856445, "learning_rate": 1.168298755186722e-05, "loss": 0.3413, "step": 25068 }, { "epoch": 20.80414937759336, "grad_norm": 32.61430358886719, "learning_rate": 1.1682655601659752e-05, "loss": 0.3397, "step": 25069 }, { "epoch": 20.804979253112034, "grad_norm": 49.05080032348633, "learning_rate": 1.1682323651452284e-05, "loss": 0.6047, "step": 25070 }, { "epoch": 20.805809128630706, "grad_norm": 18.552932739257812, "learning_rate": 1.1681991701244814e-05, "loss": 0.2936, "step": 25071 }, { "epoch": 20.80663900414938, "grad_norm": 50.64704895019531, "learning_rate": 1.1681659751037346e-05, "loss": 0.9153, "step": 25072 }, { "epoch": 20.80746887966805, "grad_norm": 69.82466888427734, "learning_rate": 1.1681327800829877e-05, "loss": 0.4965, "step": 25073 }, { "epoch": 20.808298755186723, "grad_norm": 102.67527770996094, "learning_rate": 1.1680995850622407e-05, "loss": 0.897, "step": 25074 }, { "epoch": 20.809128630705395, "grad_norm": 27.375329971313477, "learning_rate": 1.1680663900414939e-05, "loss": 0.4882, "step": 25075 }, { "epoch": 20.809958506224067, "grad_norm": 45.72307586669922, "learning_rate": 1.1680331950207471e-05, "loss": 0.8046, "step": 25076 }, { "epoch": 20.81078838174274, "grad_norm": 50.871910095214844, "learning_rate": 1.168e-05, "loss": 1.3616, "step": 25077 }, { "epoch": 20.81161825726141, "grad_norm": 61.09267044067383, "learning_rate": 1.1679668049792532e-05, "loss": 0.5556, "step": 25078 }, { "epoch": 20.812448132780084, "grad_norm": 29.62167739868164, "learning_rate": 1.1679336099585064e-05, "loss": 0.7354, "step": 25079 }, { "epoch": 20.813278008298756, "grad_norm": 37.09503173828125, "learning_rate": 1.1679004149377595e-05, "loss": 1.0303, "step": 25080 }, { "epoch": 20.814107883817428, "grad_norm": 98.99404907226562, "learning_rate": 1.1678672199170125e-05, "loss": 0.5171, "step": 25081 }, { "epoch": 20.8149377593361, "grad_norm": 60.19041061401367, "learning_rate": 1.1678340248962655e-05, "loss": 0.5795, "step": 25082 }, { "epoch": 20.815767634854772, "grad_norm": 14.551288604736328, "learning_rate": 1.1678008298755188e-05, "loss": 0.2254, "step": 25083 }, { "epoch": 20.816597510373445, "grad_norm": 32.60315704345703, "learning_rate": 1.167767634854772e-05, "loss": 0.3863, "step": 25084 }, { "epoch": 20.817427385892117, "grad_norm": 44.22785568237305, "learning_rate": 1.1677344398340248e-05, "loss": 0.3954, "step": 25085 }, { "epoch": 20.81825726141079, "grad_norm": 32.137760162353516, "learning_rate": 1.167701244813278e-05, "loss": 0.7926, "step": 25086 }, { "epoch": 20.81908713692946, "grad_norm": 35.05803680419922, "learning_rate": 1.1676680497925313e-05, "loss": 0.7493, "step": 25087 }, { "epoch": 20.819917012448133, "grad_norm": 108.73709869384766, "learning_rate": 1.1676348547717845e-05, "loss": 0.5183, "step": 25088 }, { "epoch": 20.820746887966806, "grad_norm": 23.06017303466797, "learning_rate": 1.1676016597510373e-05, "loss": 0.3625, "step": 25089 }, { "epoch": 20.821576763485478, "grad_norm": 33.92000961303711, "learning_rate": 1.1675684647302906e-05, "loss": 0.5413, "step": 25090 }, { "epoch": 20.82240663900415, "grad_norm": 28.77426528930664, "learning_rate": 1.1675352697095436e-05, "loss": 0.6614, "step": 25091 }, { "epoch": 20.823236514522822, "grad_norm": 34.454750061035156, "learning_rate": 1.1675020746887968e-05, "loss": 0.4744, "step": 25092 }, { "epoch": 20.824066390041494, "grad_norm": 30.598066329956055, "learning_rate": 1.1674688796680498e-05, "loss": 0.3146, "step": 25093 }, { "epoch": 20.824896265560167, "grad_norm": 19.89686393737793, "learning_rate": 1.1674356846473029e-05, "loss": 0.3276, "step": 25094 }, { "epoch": 20.82572614107884, "grad_norm": 48.894081115722656, "learning_rate": 1.1674024896265561e-05, "loss": 0.78, "step": 25095 }, { "epoch": 20.82655601659751, "grad_norm": 68.44365692138672, "learning_rate": 1.1673692946058093e-05, "loss": 0.8214, "step": 25096 }, { "epoch": 20.827385892116183, "grad_norm": 46.34104919433594, "learning_rate": 1.1673360995850625e-05, "loss": 0.4475, "step": 25097 }, { "epoch": 20.828215767634855, "grad_norm": 20.566343307495117, "learning_rate": 1.1673029045643154e-05, "loss": 0.3807, "step": 25098 }, { "epoch": 20.829045643153528, "grad_norm": 36.8885383605957, "learning_rate": 1.1672697095435686e-05, "loss": 0.5176, "step": 25099 }, { "epoch": 20.8298755186722, "grad_norm": 25.516130447387695, "learning_rate": 1.1672365145228216e-05, "loss": 0.3376, "step": 25100 }, { "epoch": 20.830705394190872, "grad_norm": 80.88500213623047, "learning_rate": 1.1672033195020749e-05, "loss": 0.5785, "step": 25101 }, { "epoch": 20.831535269709544, "grad_norm": 29.20416831970215, "learning_rate": 1.1671701244813279e-05, "loss": 0.4296, "step": 25102 }, { "epoch": 20.832365145228216, "grad_norm": 70.63905334472656, "learning_rate": 1.167136929460581e-05, "loss": 0.364, "step": 25103 }, { "epoch": 20.83319502074689, "grad_norm": 33.16685104370117, "learning_rate": 1.1671037344398341e-05, "loss": 0.4612, "step": 25104 }, { "epoch": 20.83402489626556, "grad_norm": 25.57594108581543, "learning_rate": 1.1670705394190874e-05, "loss": 0.3353, "step": 25105 }, { "epoch": 20.834854771784233, "grad_norm": 41.93861770629883, "learning_rate": 1.1670373443983402e-05, "loss": 0.9659, "step": 25106 }, { "epoch": 20.835684647302905, "grad_norm": 78.65459442138672, "learning_rate": 1.1670041493775934e-05, "loss": 0.5347, "step": 25107 }, { "epoch": 20.836514522821577, "grad_norm": 21.260427474975586, "learning_rate": 1.1669709543568467e-05, "loss": 0.3434, "step": 25108 }, { "epoch": 20.83734439834025, "grad_norm": 11.93016242980957, "learning_rate": 1.1669377593360997e-05, "loss": 0.3132, "step": 25109 }, { "epoch": 20.83817427385892, "grad_norm": 40.88826370239258, "learning_rate": 1.1669045643153527e-05, "loss": 0.4789, "step": 25110 }, { "epoch": 20.839004149377594, "grad_norm": 25.00521469116211, "learning_rate": 1.166871369294606e-05, "loss": 0.5046, "step": 25111 }, { "epoch": 20.839834024896266, "grad_norm": 87.37606048583984, "learning_rate": 1.166838174273859e-05, "loss": 0.5703, "step": 25112 }, { "epoch": 20.84066390041494, "grad_norm": 55.374755859375, "learning_rate": 1.1668049792531122e-05, "loss": 0.7039, "step": 25113 }, { "epoch": 20.84149377593361, "grad_norm": 51.719608306884766, "learning_rate": 1.166771784232365e-05, "loss": 0.4633, "step": 25114 }, { "epoch": 20.842323651452283, "grad_norm": 79.04248046875, "learning_rate": 1.1667385892116183e-05, "loss": 0.6812, "step": 25115 }, { "epoch": 20.843153526970955, "grad_norm": 13.268340110778809, "learning_rate": 1.1667053941908715e-05, "loss": 0.2677, "step": 25116 }, { "epoch": 20.843983402489627, "grad_norm": 77.0388412475586, "learning_rate": 1.1666721991701247e-05, "loss": 1.1494, "step": 25117 }, { "epoch": 20.8448132780083, "grad_norm": 86.99472045898438, "learning_rate": 1.1666390041493776e-05, "loss": 0.4647, "step": 25118 }, { "epoch": 20.84564315352697, "grad_norm": 38.28798294067383, "learning_rate": 1.1666058091286308e-05, "loss": 1.0491, "step": 25119 }, { "epoch": 20.846473029045644, "grad_norm": 10.764754295349121, "learning_rate": 1.166572614107884e-05, "loss": 0.352, "step": 25120 }, { "epoch": 20.847302904564316, "grad_norm": 50.04196548461914, "learning_rate": 1.166539419087137e-05, "loss": 0.3374, "step": 25121 }, { "epoch": 20.848132780082988, "grad_norm": 25.632965087890625, "learning_rate": 1.1665062240663902e-05, "loss": 0.4601, "step": 25122 }, { "epoch": 20.84896265560166, "grad_norm": 114.75946807861328, "learning_rate": 1.1664730290456431e-05, "loss": 1.6619, "step": 25123 }, { "epoch": 20.849792531120332, "grad_norm": 28.943889617919922, "learning_rate": 1.1664398340248963e-05, "loss": 0.3585, "step": 25124 }, { "epoch": 20.850622406639005, "grad_norm": 26.896728515625, "learning_rate": 1.1664066390041495e-05, "loss": 0.3236, "step": 25125 }, { "epoch": 20.851452282157677, "grad_norm": 52.6418571472168, "learning_rate": 1.1663734439834028e-05, "loss": 0.9897, "step": 25126 }, { "epoch": 20.85228215767635, "grad_norm": 39.79808044433594, "learning_rate": 1.1663402489626556e-05, "loss": 0.5557, "step": 25127 }, { "epoch": 20.85311203319502, "grad_norm": 35.64667892456055, "learning_rate": 1.1663070539419088e-05, "loss": 0.3518, "step": 25128 }, { "epoch": 20.853941908713693, "grad_norm": 37.399627685546875, "learning_rate": 1.166273858921162e-05, "loss": 0.4374, "step": 25129 }, { "epoch": 20.854771784232366, "grad_norm": 23.87605857849121, "learning_rate": 1.1662406639004151e-05, "loss": 0.3761, "step": 25130 }, { "epoch": 20.855601659751038, "grad_norm": 35.6091194152832, "learning_rate": 1.1662074688796681e-05, "loss": 0.412, "step": 25131 }, { "epoch": 20.85643153526971, "grad_norm": 54.78323745727539, "learning_rate": 1.1661742738589212e-05, "loss": 0.728, "step": 25132 }, { "epoch": 20.857261410788382, "grad_norm": 26.240888595581055, "learning_rate": 1.1661410788381744e-05, "loss": 0.3686, "step": 25133 }, { "epoch": 20.858091286307054, "grad_norm": 52.51986312866211, "learning_rate": 1.1661078838174276e-05, "loss": 0.651, "step": 25134 }, { "epoch": 20.858921161825727, "grad_norm": 31.410810470581055, "learning_rate": 1.1660746887966805e-05, "loss": 0.36, "step": 25135 }, { "epoch": 20.8597510373444, "grad_norm": 23.983333587646484, "learning_rate": 1.1660414937759337e-05, "loss": 0.3562, "step": 25136 }, { "epoch": 20.86058091286307, "grad_norm": 22.723966598510742, "learning_rate": 1.1660082987551869e-05, "loss": 0.7092, "step": 25137 }, { "epoch": 20.861410788381743, "grad_norm": 23.488962173461914, "learning_rate": 1.16597510373444e-05, "loss": 0.4362, "step": 25138 }, { "epoch": 20.862240663900415, "grad_norm": 37.62904739379883, "learning_rate": 1.165941908713693e-05, "loss": 0.4558, "step": 25139 }, { "epoch": 20.863070539419088, "grad_norm": 36.76832580566406, "learning_rate": 1.1659087136929462e-05, "loss": 0.8055, "step": 25140 }, { "epoch": 20.86390041493776, "grad_norm": 18.58598518371582, "learning_rate": 1.1658755186721992e-05, "loss": 0.2555, "step": 25141 }, { "epoch": 20.864730290456432, "grad_norm": 37.20444869995117, "learning_rate": 1.1658423236514524e-05, "loss": 0.4875, "step": 25142 }, { "epoch": 20.865560165975104, "grad_norm": 88.03718566894531, "learning_rate": 1.1658091286307055e-05, "loss": 0.2897, "step": 25143 }, { "epoch": 20.866390041493776, "grad_norm": 26.607519149780273, "learning_rate": 1.1657759336099585e-05, "loss": 0.2997, "step": 25144 }, { "epoch": 20.86721991701245, "grad_norm": 77.65941619873047, "learning_rate": 1.1657427385892117e-05, "loss": 0.7913, "step": 25145 }, { "epoch": 20.86804979253112, "grad_norm": 97.93586730957031, "learning_rate": 1.165709543568465e-05, "loss": 0.8157, "step": 25146 }, { "epoch": 20.868879668049793, "grad_norm": 105.9333267211914, "learning_rate": 1.1656763485477178e-05, "loss": 0.8283, "step": 25147 }, { "epoch": 20.869709543568465, "grad_norm": 20.062580108642578, "learning_rate": 1.165643153526971e-05, "loss": 0.5016, "step": 25148 }, { "epoch": 20.870539419087137, "grad_norm": 15.000041007995605, "learning_rate": 1.1656099585062242e-05, "loss": 0.3946, "step": 25149 }, { "epoch": 20.87136929460581, "grad_norm": 58.77934265136719, "learning_rate": 1.1655767634854773e-05, "loss": 0.8493, "step": 25150 }, { "epoch": 20.872199170124482, "grad_norm": 29.816530227661133, "learning_rate": 1.1655435684647305e-05, "loss": 0.4452, "step": 25151 }, { "epoch": 20.873029045643154, "grad_norm": 17.70279312133789, "learning_rate": 1.1655103734439834e-05, "loss": 0.2337, "step": 25152 }, { "epoch": 20.873858921161826, "grad_norm": 67.64055633544922, "learning_rate": 1.1654771784232366e-05, "loss": 0.5096, "step": 25153 }, { "epoch": 20.8746887966805, "grad_norm": 24.61114501953125, "learning_rate": 1.1654439834024898e-05, "loss": 0.404, "step": 25154 }, { "epoch": 20.87551867219917, "grad_norm": 82.74783325195312, "learning_rate": 1.165410788381743e-05, "loss": 0.6269, "step": 25155 }, { "epoch": 20.876348547717843, "grad_norm": 122.69214630126953, "learning_rate": 1.1653775933609959e-05, "loss": 1.0053, "step": 25156 }, { "epoch": 20.877178423236515, "grad_norm": 49.51277160644531, "learning_rate": 1.165344398340249e-05, "loss": 0.5741, "step": 25157 }, { "epoch": 20.878008298755187, "grad_norm": 104.4039077758789, "learning_rate": 1.1653112033195023e-05, "loss": 0.9147, "step": 25158 }, { "epoch": 20.87883817427386, "grad_norm": 26.315824508666992, "learning_rate": 1.1652780082987553e-05, "loss": 0.4102, "step": 25159 }, { "epoch": 20.87966804979253, "grad_norm": 27.68229866027832, "learning_rate": 1.1652448132780084e-05, "loss": 0.6997, "step": 25160 }, { "epoch": 20.880497925311204, "grad_norm": 68.77510833740234, "learning_rate": 1.1652116182572614e-05, "loss": 0.7571, "step": 25161 }, { "epoch": 20.881327800829876, "grad_norm": 93.3858413696289, "learning_rate": 1.1651784232365146e-05, "loss": 0.5315, "step": 25162 }, { "epoch": 20.882157676348548, "grad_norm": 44.94486999511719, "learning_rate": 1.1651452282157678e-05, "loss": 0.4654, "step": 25163 }, { "epoch": 20.88298755186722, "grad_norm": 53.56572723388672, "learning_rate": 1.1651120331950207e-05, "loss": 0.5494, "step": 25164 }, { "epoch": 20.883817427385893, "grad_norm": 87.51129913330078, "learning_rate": 1.1650788381742739e-05, "loss": 0.5865, "step": 25165 }, { "epoch": 20.884647302904565, "grad_norm": 22.00396728515625, "learning_rate": 1.1650456431535271e-05, "loss": 0.3262, "step": 25166 }, { "epoch": 20.885477178423237, "grad_norm": 55.80290985107422, "learning_rate": 1.1650124481327803e-05, "loss": 1.1082, "step": 25167 }, { "epoch": 20.88630705394191, "grad_norm": 92.361572265625, "learning_rate": 1.1649792531120332e-05, "loss": 1.4266, "step": 25168 }, { "epoch": 20.88713692946058, "grad_norm": 15.884233474731445, "learning_rate": 1.1649460580912864e-05, "loss": 0.374, "step": 25169 }, { "epoch": 20.887966804979254, "grad_norm": 37.023521423339844, "learning_rate": 1.1649128630705395e-05, "loss": 0.6777, "step": 25170 }, { "epoch": 20.888796680497926, "grad_norm": 146.027587890625, "learning_rate": 1.1648796680497927e-05, "loss": 0.9287, "step": 25171 }, { "epoch": 20.889626556016598, "grad_norm": 57.973018646240234, "learning_rate": 1.1648464730290457e-05, "loss": 0.5075, "step": 25172 }, { "epoch": 20.89045643153527, "grad_norm": 117.72924041748047, "learning_rate": 1.1648132780082987e-05, "loss": 0.7414, "step": 25173 }, { "epoch": 20.891286307053942, "grad_norm": 30.81612777709961, "learning_rate": 1.164780082987552e-05, "loss": 0.4893, "step": 25174 }, { "epoch": 20.892116182572614, "grad_norm": 67.286865234375, "learning_rate": 1.1647468879668052e-05, "loss": 0.8865, "step": 25175 }, { "epoch": 20.892946058091287, "grad_norm": 21.614776611328125, "learning_rate": 1.1647136929460584e-05, "loss": 0.3453, "step": 25176 }, { "epoch": 20.89377593360996, "grad_norm": 54.44887161254883, "learning_rate": 1.1646804979253112e-05, "loss": 0.5693, "step": 25177 }, { "epoch": 20.89460580912863, "grad_norm": 70.82007598876953, "learning_rate": 1.1646473029045645e-05, "loss": 0.8361, "step": 25178 }, { "epoch": 20.895435684647303, "grad_norm": 38.505836486816406, "learning_rate": 1.1646141078838175e-05, "loss": 0.698, "step": 25179 }, { "epoch": 20.896265560165975, "grad_norm": 45.84160232543945, "learning_rate": 1.1645809128630707e-05, "loss": 0.58, "step": 25180 }, { "epoch": 20.897095435684648, "grad_norm": 46.90790557861328, "learning_rate": 1.1645477178423238e-05, "loss": 0.602, "step": 25181 }, { "epoch": 20.89792531120332, "grad_norm": 30.554656982421875, "learning_rate": 1.1645145228215768e-05, "loss": 0.6166, "step": 25182 }, { "epoch": 20.898755186721992, "grad_norm": 25.55599594116211, "learning_rate": 1.16448132780083e-05, "loss": 0.4751, "step": 25183 }, { "epoch": 20.899585062240664, "grad_norm": 18.573816299438477, "learning_rate": 1.1644481327800832e-05, "loss": 0.4482, "step": 25184 }, { "epoch": 20.900414937759336, "grad_norm": 70.39969635009766, "learning_rate": 1.1644149377593361e-05, "loss": 0.4349, "step": 25185 }, { "epoch": 20.90124481327801, "grad_norm": 23.023908615112305, "learning_rate": 1.1643817427385893e-05, "loss": 0.3602, "step": 25186 }, { "epoch": 20.90207468879668, "grad_norm": 57.693145751953125, "learning_rate": 1.1643485477178425e-05, "loss": 0.8124, "step": 25187 }, { "epoch": 20.902904564315353, "grad_norm": 43.47038269042969, "learning_rate": 1.1643153526970956e-05, "loss": 0.7264, "step": 25188 }, { "epoch": 20.903734439834025, "grad_norm": 32.126747131347656, "learning_rate": 1.1642821576763486e-05, "loss": 0.5623, "step": 25189 }, { "epoch": 20.904564315352697, "grad_norm": 58.06911087036133, "learning_rate": 1.1642489626556018e-05, "loss": 0.4024, "step": 25190 }, { "epoch": 20.90539419087137, "grad_norm": 18.90534210205078, "learning_rate": 1.1642157676348548e-05, "loss": 0.6734, "step": 25191 }, { "epoch": 20.906224066390042, "grad_norm": 34.3247184753418, "learning_rate": 1.164182572614108e-05, "loss": 0.3737, "step": 25192 }, { "epoch": 20.907053941908714, "grad_norm": 59.141441345214844, "learning_rate": 1.164149377593361e-05, "loss": 0.5577, "step": 25193 }, { "epoch": 20.907883817427386, "grad_norm": 41.02766799926758, "learning_rate": 1.1641161825726141e-05, "loss": 0.4509, "step": 25194 }, { "epoch": 20.90871369294606, "grad_norm": 39.01387023925781, "learning_rate": 1.1640829875518673e-05, "loss": 0.5018, "step": 25195 }, { "epoch": 20.90954356846473, "grad_norm": 34.671932220458984, "learning_rate": 1.1640497925311206e-05, "loss": 0.4667, "step": 25196 }, { "epoch": 20.910373443983403, "grad_norm": 47.4550666809082, "learning_rate": 1.1640165975103734e-05, "loss": 1.176, "step": 25197 }, { "epoch": 20.911203319502075, "grad_norm": 20.7603702545166, "learning_rate": 1.1639834024896266e-05, "loss": 0.3676, "step": 25198 }, { "epoch": 20.912033195020747, "grad_norm": 39.57686996459961, "learning_rate": 1.1639502074688797e-05, "loss": 0.4159, "step": 25199 }, { "epoch": 20.91286307053942, "grad_norm": 50.221397399902344, "learning_rate": 1.1639170124481329e-05, "loss": 1.0374, "step": 25200 }, { "epoch": 20.91369294605809, "grad_norm": 39.53728485107422, "learning_rate": 1.1638838174273861e-05, "loss": 0.4757, "step": 25201 }, { "epoch": 20.914522821576764, "grad_norm": 44.41738510131836, "learning_rate": 1.163850622406639e-05, "loss": 0.3752, "step": 25202 }, { "epoch": 20.915352697095436, "grad_norm": 33.76614761352539, "learning_rate": 1.1638174273858922e-05, "loss": 0.5013, "step": 25203 }, { "epoch": 20.916182572614108, "grad_norm": 22.80520248413086, "learning_rate": 1.1637842323651454e-05, "loss": 0.6462, "step": 25204 }, { "epoch": 20.91701244813278, "grad_norm": 46.99990463256836, "learning_rate": 1.1637510373443986e-05, "loss": 0.5734, "step": 25205 }, { "epoch": 20.917842323651453, "grad_norm": 54.83530044555664, "learning_rate": 1.1637178423236515e-05, "loss": 0.7415, "step": 25206 }, { "epoch": 20.918672199170125, "grad_norm": 120.4239730834961, "learning_rate": 1.1636846473029047e-05, "loss": 0.5904, "step": 25207 }, { "epoch": 20.919502074688797, "grad_norm": 35.59958267211914, "learning_rate": 1.1636514522821577e-05, "loss": 0.5416, "step": 25208 }, { "epoch": 20.92033195020747, "grad_norm": 28.096576690673828, "learning_rate": 1.163618257261411e-05, "loss": 0.7297, "step": 25209 }, { "epoch": 20.92116182572614, "grad_norm": 33.42837905883789, "learning_rate": 1.163585062240664e-05, "loss": 0.366, "step": 25210 }, { "epoch": 20.921991701244814, "grad_norm": 89.60366821289062, "learning_rate": 1.163551867219917e-05, "loss": 1.0907, "step": 25211 }, { "epoch": 20.922821576763486, "grad_norm": 18.782304763793945, "learning_rate": 1.1635186721991702e-05, "loss": 0.3265, "step": 25212 }, { "epoch": 20.923651452282158, "grad_norm": 25.913734436035156, "learning_rate": 1.1634854771784234e-05, "loss": 0.5472, "step": 25213 }, { "epoch": 20.92448132780083, "grad_norm": 42.77021789550781, "learning_rate": 1.1634522821576763e-05, "loss": 1.0047, "step": 25214 }, { "epoch": 20.925311203319502, "grad_norm": 32.57242202758789, "learning_rate": 1.1634190871369295e-05, "loss": 0.5022, "step": 25215 }, { "epoch": 20.926141078838175, "grad_norm": 25.560102462768555, "learning_rate": 1.1633858921161827e-05, "loss": 0.4383, "step": 25216 }, { "epoch": 20.926970954356847, "grad_norm": 54.74795913696289, "learning_rate": 1.1633526970954358e-05, "loss": 0.8871, "step": 25217 }, { "epoch": 20.92780082987552, "grad_norm": 37.530906677246094, "learning_rate": 1.1633195020746888e-05, "loss": 0.6424, "step": 25218 }, { "epoch": 20.92863070539419, "grad_norm": 33.73419952392578, "learning_rate": 1.163286307053942e-05, "loss": 0.4001, "step": 25219 }, { "epoch": 20.929460580912863, "grad_norm": 43.320098876953125, "learning_rate": 1.163253112033195e-05, "loss": 0.7905, "step": 25220 }, { "epoch": 20.930290456431536, "grad_norm": 44.24805450439453, "learning_rate": 1.1632199170124483e-05, "loss": 0.6495, "step": 25221 }, { "epoch": 20.931120331950208, "grad_norm": 23.110767364501953, "learning_rate": 1.1631867219917012e-05, "loss": 0.3588, "step": 25222 }, { "epoch": 20.93195020746888, "grad_norm": 206.93460083007812, "learning_rate": 1.1631535269709544e-05, "loss": 0.8279, "step": 25223 }, { "epoch": 20.932780082987552, "grad_norm": 19.015043258666992, "learning_rate": 1.1631203319502076e-05, "loss": 0.3626, "step": 25224 }, { "epoch": 20.933609958506224, "grad_norm": 62.337703704833984, "learning_rate": 1.1630871369294608e-05, "loss": 1.009, "step": 25225 }, { "epoch": 20.934439834024896, "grad_norm": 21.917747497558594, "learning_rate": 1.1630539419087137e-05, "loss": 0.48, "step": 25226 }, { "epoch": 20.93526970954357, "grad_norm": 60.692230224609375, "learning_rate": 1.1630207468879669e-05, "loss": 1.3832, "step": 25227 }, { "epoch": 20.93609958506224, "grad_norm": 22.534648895263672, "learning_rate": 1.16298755186722e-05, "loss": 0.3337, "step": 25228 }, { "epoch": 20.936929460580913, "grad_norm": 27.620208740234375, "learning_rate": 1.1629543568464731e-05, "loss": 0.4041, "step": 25229 }, { "epoch": 20.937759336099585, "grad_norm": 48.58393096923828, "learning_rate": 1.1629211618257263e-05, "loss": 0.3233, "step": 25230 }, { "epoch": 20.938589211618257, "grad_norm": 70.29842376708984, "learning_rate": 1.1628879668049792e-05, "loss": 0.7281, "step": 25231 }, { "epoch": 20.93941908713693, "grad_norm": 29.902238845825195, "learning_rate": 1.1628547717842324e-05, "loss": 0.5539, "step": 25232 }, { "epoch": 20.940248962655602, "grad_norm": 28.872098922729492, "learning_rate": 1.1628215767634856e-05, "loss": 0.3752, "step": 25233 }, { "epoch": 20.941078838174274, "grad_norm": 52.694122314453125, "learning_rate": 1.1627883817427388e-05, "loss": 0.9564, "step": 25234 }, { "epoch": 20.941908713692946, "grad_norm": 49.7904167175293, "learning_rate": 1.1627551867219917e-05, "loss": 0.6185, "step": 25235 }, { "epoch": 20.94273858921162, "grad_norm": 66.59615325927734, "learning_rate": 1.162721991701245e-05, "loss": 0.4269, "step": 25236 }, { "epoch": 20.94356846473029, "grad_norm": 59.572113037109375, "learning_rate": 1.1626887966804981e-05, "loss": 0.7774, "step": 25237 }, { "epoch": 20.944398340248963, "grad_norm": 18.412769317626953, "learning_rate": 1.1626556016597512e-05, "loss": 0.3177, "step": 25238 }, { "epoch": 20.945228215767635, "grad_norm": 24.217241287231445, "learning_rate": 1.1626224066390042e-05, "loss": 0.2799, "step": 25239 }, { "epoch": 20.946058091286307, "grad_norm": 30.23943328857422, "learning_rate": 1.1625892116182573e-05, "loss": 0.4455, "step": 25240 }, { "epoch": 20.94688796680498, "grad_norm": 39.91646957397461, "learning_rate": 1.1625560165975105e-05, "loss": 0.3403, "step": 25241 }, { "epoch": 20.94771784232365, "grad_norm": 65.66001892089844, "learning_rate": 1.1625228215767637e-05, "loss": 0.6989, "step": 25242 }, { "epoch": 20.948547717842324, "grad_norm": 42.60157775878906, "learning_rate": 1.1624896265560166e-05, "loss": 0.464, "step": 25243 }, { "epoch": 20.949377593360996, "grad_norm": 66.14363098144531, "learning_rate": 1.1624564315352698e-05, "loss": 0.7608, "step": 25244 }, { "epoch": 20.95020746887967, "grad_norm": 13.793923377990723, "learning_rate": 1.162423236514523e-05, "loss": 0.3355, "step": 25245 }, { "epoch": 20.95103734439834, "grad_norm": 126.59746551513672, "learning_rate": 1.1623900414937762e-05, "loss": 1.0988, "step": 25246 }, { "epoch": 20.951867219917013, "grad_norm": 23.147172927856445, "learning_rate": 1.162356846473029e-05, "loss": 0.3343, "step": 25247 }, { "epoch": 20.952697095435685, "grad_norm": 15.827230453491211, "learning_rate": 1.1623236514522823e-05, "loss": 0.3748, "step": 25248 }, { "epoch": 20.953526970954357, "grad_norm": 39.48566436767578, "learning_rate": 1.1622904564315353e-05, "loss": 0.6682, "step": 25249 }, { "epoch": 20.95435684647303, "grad_norm": 42.836204528808594, "learning_rate": 1.1622572614107885e-05, "loss": 0.7182, "step": 25250 }, { "epoch": 20.9551867219917, "grad_norm": 58.357669830322266, "learning_rate": 1.1622240663900416e-05, "loss": 0.8121, "step": 25251 }, { "epoch": 20.956016597510374, "grad_norm": 28.384624481201172, "learning_rate": 1.1621908713692946e-05, "loss": 0.6954, "step": 25252 }, { "epoch": 20.956846473029046, "grad_norm": 46.95656204223633, "learning_rate": 1.1621576763485478e-05, "loss": 0.9655, "step": 25253 }, { "epoch": 20.957676348547718, "grad_norm": 31.347766876220703, "learning_rate": 1.162124481327801e-05, "loss": 0.4866, "step": 25254 }, { "epoch": 20.95850622406639, "grad_norm": 98.81822204589844, "learning_rate": 1.162091286307054e-05, "loss": 0.8021, "step": 25255 }, { "epoch": 20.959336099585062, "grad_norm": 27.848346710205078, "learning_rate": 1.1620580912863071e-05, "loss": 0.4723, "step": 25256 }, { "epoch": 20.960165975103735, "grad_norm": 17.82355308532715, "learning_rate": 1.1620248962655603e-05, "loss": 0.3018, "step": 25257 }, { "epoch": 20.960995850622407, "grad_norm": 33.3406867980957, "learning_rate": 1.1619917012448134e-05, "loss": 0.7108, "step": 25258 }, { "epoch": 20.96182572614108, "grad_norm": 45.818477630615234, "learning_rate": 1.1619585062240666e-05, "loss": 1.1207, "step": 25259 }, { "epoch": 20.96265560165975, "grad_norm": 16.167341232299805, "learning_rate": 1.1619253112033196e-05, "loss": 0.3494, "step": 25260 }, { "epoch": 20.963485477178423, "grad_norm": 28.44134521484375, "learning_rate": 1.1618921161825727e-05, "loss": 0.5372, "step": 25261 }, { "epoch": 20.964315352697096, "grad_norm": 31.36061668395996, "learning_rate": 1.1618589211618259e-05, "loss": 0.3723, "step": 25262 }, { "epoch": 20.965145228215768, "grad_norm": 24.29277801513672, "learning_rate": 1.161825726141079e-05, "loss": 0.4585, "step": 25263 }, { "epoch": 20.96597510373444, "grad_norm": 80.69928741455078, "learning_rate": 1.161792531120332e-05, "loss": 0.7638, "step": 25264 }, { "epoch": 20.966804979253112, "grad_norm": 74.3637924194336, "learning_rate": 1.1617593360995852e-05, "loss": 0.6544, "step": 25265 }, { "epoch": 20.967634854771784, "grad_norm": 99.56444549560547, "learning_rate": 1.1617261410788384e-05, "loss": 0.5298, "step": 25266 }, { "epoch": 20.968464730290457, "grad_norm": 34.74372863769531, "learning_rate": 1.1616929460580914e-05, "loss": 0.4739, "step": 25267 }, { "epoch": 20.96929460580913, "grad_norm": 30.94198989868164, "learning_rate": 1.1616597510373444e-05, "loss": 0.3513, "step": 25268 }, { "epoch": 20.9701244813278, "grad_norm": 127.11957550048828, "learning_rate": 1.1616265560165975e-05, "loss": 0.6249, "step": 25269 }, { "epoch": 20.970954356846473, "grad_norm": 17.14002227783203, "learning_rate": 1.1615933609958507e-05, "loss": 0.5796, "step": 25270 }, { "epoch": 20.971784232365145, "grad_norm": 34.02305221557617, "learning_rate": 1.1615601659751039e-05, "loss": 0.5192, "step": 25271 }, { "epoch": 20.972614107883818, "grad_norm": 53.06755828857422, "learning_rate": 1.1615269709543568e-05, "loss": 0.5117, "step": 25272 }, { "epoch": 20.97344398340249, "grad_norm": 46.61006164550781, "learning_rate": 1.16149377593361e-05, "loss": 0.4091, "step": 25273 }, { "epoch": 20.974273858921162, "grad_norm": 47.43490219116211, "learning_rate": 1.1614605809128632e-05, "loss": 0.5899, "step": 25274 }, { "epoch": 20.975103734439834, "grad_norm": 40.95315933227539, "learning_rate": 1.1614273858921164e-05, "loss": 0.6699, "step": 25275 }, { "epoch": 20.975933609958506, "grad_norm": 24.301803588867188, "learning_rate": 1.1613941908713693e-05, "loss": 0.3356, "step": 25276 }, { "epoch": 20.97676348547718, "grad_norm": 34.79703903198242, "learning_rate": 1.1613609958506225e-05, "loss": 0.6928, "step": 25277 }, { "epoch": 20.97759336099585, "grad_norm": 34.91092300415039, "learning_rate": 1.1613278008298755e-05, "loss": 0.311, "step": 25278 }, { "epoch": 20.978423236514523, "grad_norm": 16.052677154541016, "learning_rate": 1.1612946058091288e-05, "loss": 0.3031, "step": 25279 }, { "epoch": 20.979253112033195, "grad_norm": 85.29935455322266, "learning_rate": 1.161261410788382e-05, "loss": 0.5229, "step": 25280 }, { "epoch": 20.980082987551867, "grad_norm": 86.01295471191406, "learning_rate": 1.1612282157676348e-05, "loss": 0.5818, "step": 25281 }, { "epoch": 20.98091286307054, "grad_norm": 28.97834014892578, "learning_rate": 1.161195020746888e-05, "loss": 0.4666, "step": 25282 }, { "epoch": 20.98174273858921, "grad_norm": 83.02983856201172, "learning_rate": 1.1611618257261413e-05, "loss": 0.5833, "step": 25283 }, { "epoch": 20.982572614107884, "grad_norm": 143.98626708984375, "learning_rate": 1.1611286307053945e-05, "loss": 0.4158, "step": 25284 }, { "epoch": 20.983402489626556, "grad_norm": 42.15449142456055, "learning_rate": 1.1610954356846473e-05, "loss": 0.6902, "step": 25285 }, { "epoch": 20.98423236514523, "grad_norm": 34.209434509277344, "learning_rate": 1.1610622406639005e-05, "loss": 0.4722, "step": 25286 }, { "epoch": 20.9850622406639, "grad_norm": 33.551673889160156, "learning_rate": 1.1610290456431536e-05, "loss": 0.4278, "step": 25287 }, { "epoch": 20.985892116182573, "grad_norm": 58.98248291015625, "learning_rate": 1.1609958506224068e-05, "loss": 0.7928, "step": 25288 }, { "epoch": 20.986721991701245, "grad_norm": 33.07848358154297, "learning_rate": 1.1609626556016598e-05, "loss": 0.5683, "step": 25289 }, { "epoch": 20.987551867219917, "grad_norm": 35.791053771972656, "learning_rate": 1.1609294605809129e-05, "loss": 0.7825, "step": 25290 }, { "epoch": 20.98838174273859, "grad_norm": 51.95216751098633, "learning_rate": 1.1608962655601661e-05, "loss": 0.9005, "step": 25291 }, { "epoch": 20.98921161825726, "grad_norm": 45.63948440551758, "learning_rate": 1.1608630705394193e-05, "loss": 0.8207, "step": 25292 }, { "epoch": 20.990041493775934, "grad_norm": 212.4920196533203, "learning_rate": 1.1608298755186722e-05, "loss": 0.5836, "step": 25293 }, { "epoch": 20.990871369294606, "grad_norm": 40.72311782836914, "learning_rate": 1.1607966804979254e-05, "loss": 0.4831, "step": 25294 }, { "epoch": 20.991701244813278, "grad_norm": 90.12165069580078, "learning_rate": 1.1607634854771786e-05, "loss": 0.8922, "step": 25295 }, { "epoch": 20.99253112033195, "grad_norm": 73.68183135986328, "learning_rate": 1.1607302904564316e-05, "loss": 0.8405, "step": 25296 }, { "epoch": 20.993360995850622, "grad_norm": 30.408700942993164, "learning_rate": 1.1606970954356847e-05, "loss": 0.4863, "step": 25297 }, { "epoch": 20.994190871369295, "grad_norm": 27.193437576293945, "learning_rate": 1.1606639004149379e-05, "loss": 0.2891, "step": 25298 }, { "epoch": 20.995020746887967, "grad_norm": 35.859615325927734, "learning_rate": 1.160630705394191e-05, "loss": 0.5105, "step": 25299 }, { "epoch": 20.99585062240664, "grad_norm": 25.72071647644043, "learning_rate": 1.1605975103734441e-05, "loss": 0.5613, "step": 25300 }, { "epoch": 20.99668049792531, "grad_norm": 48.78190612792969, "learning_rate": 1.160564315352697e-05, "loss": 0.634, "step": 25301 }, { "epoch": 20.997510373443983, "grad_norm": 64.23766326904297, "learning_rate": 1.1605311203319502e-05, "loss": 0.4079, "step": 25302 }, { "epoch": 20.998340248962656, "grad_norm": 24.731828689575195, "learning_rate": 1.1604979253112034e-05, "loss": 0.374, "step": 25303 }, { "epoch": 20.999170124481328, "grad_norm": 42.62525177001953, "learning_rate": 1.1604647302904566e-05, "loss": 0.5979, "step": 25304 }, { "epoch": 21.0, "grad_norm": 78.90017700195312, "learning_rate": 1.1604315352697095e-05, "loss": 1.0498, "step": 25305 }, { "epoch": 21.000829875518672, "grad_norm": 27.47809410095215, "learning_rate": 1.1603983402489627e-05, "loss": 0.4049, "step": 25306 }, { "epoch": 21.001659751037344, "grad_norm": 58.3680305480957, "learning_rate": 1.160365145228216e-05, "loss": 0.656, "step": 25307 }, { "epoch": 21.002489626556017, "grad_norm": 37.96755599975586, "learning_rate": 1.160331950207469e-05, "loss": 0.9562, "step": 25308 }, { "epoch": 21.00331950207469, "grad_norm": 35.80086135864258, "learning_rate": 1.1602987551867222e-05, "loss": 0.4785, "step": 25309 }, { "epoch": 21.00414937759336, "grad_norm": 27.627294540405273, "learning_rate": 1.160265560165975e-05, "loss": 0.3674, "step": 25310 }, { "epoch": 21.004979253112033, "grad_norm": 20.031494140625, "learning_rate": 1.1602323651452283e-05, "loss": 0.3956, "step": 25311 }, { "epoch": 21.005809128630705, "grad_norm": 72.3814697265625, "learning_rate": 1.1601991701244815e-05, "loss": 0.6918, "step": 25312 }, { "epoch": 21.006639004149378, "grad_norm": 27.890518188476562, "learning_rate": 1.1601659751037347e-05, "loss": 1.0261, "step": 25313 }, { "epoch": 21.00746887966805, "grad_norm": 80.97357177734375, "learning_rate": 1.1601327800829876e-05, "loss": 0.4242, "step": 25314 }, { "epoch": 21.008298755186722, "grad_norm": 57.18412399291992, "learning_rate": 1.1600995850622408e-05, "loss": 0.399, "step": 25315 }, { "epoch": 21.009128630705394, "grad_norm": 37.72225570678711, "learning_rate": 1.1600663900414938e-05, "loss": 0.8894, "step": 25316 }, { "epoch": 21.009958506224066, "grad_norm": 48.604400634765625, "learning_rate": 1.160033195020747e-05, "loss": 0.3535, "step": 25317 }, { "epoch": 21.01078838174274, "grad_norm": 26.129316329956055, "learning_rate": 1.16e-05, "loss": 0.4461, "step": 25318 }, { "epoch": 21.01161825726141, "grad_norm": 60.550941467285156, "learning_rate": 1.1599668049792531e-05, "loss": 0.781, "step": 25319 }, { "epoch": 21.012448132780083, "grad_norm": 22.55280113220215, "learning_rate": 1.1599336099585063e-05, "loss": 0.4307, "step": 25320 }, { "epoch": 21.013278008298755, "grad_norm": 35.11656951904297, "learning_rate": 1.1599004149377595e-05, "loss": 0.4035, "step": 25321 }, { "epoch": 21.014107883817427, "grad_norm": 45.99323654174805, "learning_rate": 1.1598672199170124e-05, "loss": 0.3783, "step": 25322 }, { "epoch": 21.0149377593361, "grad_norm": 35.36077880859375, "learning_rate": 1.1598340248962656e-05, "loss": 0.468, "step": 25323 }, { "epoch": 21.01576763485477, "grad_norm": 18.203571319580078, "learning_rate": 1.1598008298755188e-05, "loss": 0.2934, "step": 25324 }, { "epoch": 21.016597510373444, "grad_norm": 37.29098129272461, "learning_rate": 1.1597676348547719e-05, "loss": 0.5602, "step": 25325 }, { "epoch": 21.017427385892116, "grad_norm": 37.65028762817383, "learning_rate": 1.1597344398340249e-05, "loss": 0.6416, "step": 25326 }, { "epoch": 21.01825726141079, "grad_norm": 58.28018569946289, "learning_rate": 1.1597012448132781e-05, "loss": 0.4025, "step": 25327 }, { "epoch": 21.01908713692946, "grad_norm": 49.02104949951172, "learning_rate": 1.1596680497925312e-05, "loss": 0.5493, "step": 25328 }, { "epoch": 21.019917012448133, "grad_norm": 31.140214920043945, "learning_rate": 1.1596348547717844e-05, "loss": 0.4514, "step": 25329 }, { "epoch": 21.020746887966805, "grad_norm": 37.86643981933594, "learning_rate": 1.1596016597510374e-05, "loss": 0.4735, "step": 25330 }, { "epoch": 21.021576763485477, "grad_norm": 25.8784122467041, "learning_rate": 1.1595684647302905e-05, "loss": 0.4623, "step": 25331 }, { "epoch": 21.02240663900415, "grad_norm": 12.575567245483398, "learning_rate": 1.1595352697095437e-05, "loss": 0.3563, "step": 25332 }, { "epoch": 21.02323651452282, "grad_norm": 22.468339920043945, "learning_rate": 1.1595020746887969e-05, "loss": 0.3177, "step": 25333 }, { "epoch": 21.024066390041494, "grad_norm": 13.185990333557129, "learning_rate": 1.15946887966805e-05, "loss": 0.3236, "step": 25334 }, { "epoch": 21.024896265560166, "grad_norm": 22.123611450195312, "learning_rate": 1.159435684647303e-05, "loss": 0.285, "step": 25335 }, { "epoch": 21.025726141078838, "grad_norm": 16.271821975708008, "learning_rate": 1.1594024896265562e-05, "loss": 0.1979, "step": 25336 }, { "epoch": 21.02655601659751, "grad_norm": 117.41093444824219, "learning_rate": 1.1593692946058092e-05, "loss": 0.6229, "step": 25337 }, { "epoch": 21.027385892116182, "grad_norm": 113.37994384765625, "learning_rate": 1.1593360995850624e-05, "loss": 0.6447, "step": 25338 }, { "epoch": 21.028215767634855, "grad_norm": 33.31106948852539, "learning_rate": 1.1593029045643153e-05, "loss": 0.6273, "step": 25339 }, { "epoch": 21.029045643153527, "grad_norm": 33.348960876464844, "learning_rate": 1.1592697095435685e-05, "loss": 0.2633, "step": 25340 }, { "epoch": 21.0298755186722, "grad_norm": 30.384265899658203, "learning_rate": 1.1592365145228217e-05, "loss": 0.3893, "step": 25341 }, { "epoch": 21.03070539419087, "grad_norm": 34.10078811645508, "learning_rate": 1.159203319502075e-05, "loss": 0.3414, "step": 25342 }, { "epoch": 21.031535269709543, "grad_norm": 30.762493133544922, "learning_rate": 1.1591701244813278e-05, "loss": 0.3174, "step": 25343 }, { "epoch": 21.032365145228216, "grad_norm": 22.54483985900879, "learning_rate": 1.159136929460581e-05, "loss": 0.3046, "step": 25344 }, { "epoch": 21.033195020746888, "grad_norm": 40.186187744140625, "learning_rate": 1.1591037344398342e-05, "loss": 0.4387, "step": 25345 }, { "epoch": 21.03402489626556, "grad_norm": 41.79429244995117, "learning_rate": 1.1590705394190873e-05, "loss": 0.4881, "step": 25346 }, { "epoch": 21.034854771784232, "grad_norm": 63.494102478027344, "learning_rate": 1.1590373443983403e-05, "loss": 0.826, "step": 25347 }, { "epoch": 21.035684647302904, "grad_norm": 68.32001495361328, "learning_rate": 1.1590041493775933e-05, "loss": 0.8986, "step": 25348 }, { "epoch": 21.036514522821577, "grad_norm": 54.09483337402344, "learning_rate": 1.1589709543568466e-05, "loss": 0.6374, "step": 25349 }, { "epoch": 21.03734439834025, "grad_norm": 41.789405822753906, "learning_rate": 1.1589377593360998e-05, "loss": 0.4006, "step": 25350 }, { "epoch": 21.03817427385892, "grad_norm": 23.74015998840332, "learning_rate": 1.1589045643153526e-05, "loss": 0.2736, "step": 25351 }, { "epoch": 21.039004149377593, "grad_norm": 90.94388580322266, "learning_rate": 1.1588713692946059e-05, "loss": 0.5177, "step": 25352 }, { "epoch": 21.039834024896265, "grad_norm": 24.198877334594727, "learning_rate": 1.158838174273859e-05, "loss": 0.3791, "step": 25353 }, { "epoch": 21.040663900414938, "grad_norm": 49.59156036376953, "learning_rate": 1.1588049792531123e-05, "loss": 0.4808, "step": 25354 }, { "epoch": 21.04149377593361, "grad_norm": 17.46357536315918, "learning_rate": 1.1587717842323651e-05, "loss": 0.3318, "step": 25355 }, { "epoch": 21.042323651452282, "grad_norm": 14.900073051452637, "learning_rate": 1.1587385892116184e-05, "loss": 0.2577, "step": 25356 }, { "epoch": 21.043153526970954, "grad_norm": 55.19319534301758, "learning_rate": 1.1587053941908714e-05, "loss": 0.2656, "step": 25357 }, { "epoch": 21.043983402489626, "grad_norm": 25.04404640197754, "learning_rate": 1.1586721991701246e-05, "loss": 0.2335, "step": 25358 }, { "epoch": 21.0448132780083, "grad_norm": 89.92153930664062, "learning_rate": 1.1586390041493777e-05, "loss": 0.5223, "step": 25359 }, { "epoch": 21.04564315352697, "grad_norm": 58.49444580078125, "learning_rate": 1.1586058091286307e-05, "loss": 0.3555, "step": 25360 }, { "epoch": 21.046473029045643, "grad_norm": 63.349693298339844, "learning_rate": 1.1585726141078839e-05, "loss": 0.2102, "step": 25361 }, { "epoch": 21.047302904564315, "grad_norm": 26.777189254760742, "learning_rate": 1.1585394190871371e-05, "loss": 0.2821, "step": 25362 }, { "epoch": 21.048132780082987, "grad_norm": 40.87884521484375, "learning_rate": 1.1585062240663903e-05, "loss": 0.325, "step": 25363 }, { "epoch": 21.04896265560166, "grad_norm": 95.65440368652344, "learning_rate": 1.1584730290456432e-05, "loss": 0.7506, "step": 25364 }, { "epoch": 21.04979253112033, "grad_norm": 12.339609146118164, "learning_rate": 1.1584398340248964e-05, "loss": 0.2596, "step": 25365 }, { "epoch": 21.050622406639004, "grad_norm": 49.96308517456055, "learning_rate": 1.1584066390041494e-05, "loss": 0.6369, "step": 25366 }, { "epoch": 21.051452282157676, "grad_norm": 29.61524772644043, "learning_rate": 1.1583734439834027e-05, "loss": 0.4704, "step": 25367 }, { "epoch": 21.05228215767635, "grad_norm": 42.65925598144531, "learning_rate": 1.1583402489626557e-05, "loss": 0.3673, "step": 25368 }, { "epoch": 21.05311203319502, "grad_norm": 46.202388763427734, "learning_rate": 1.1583070539419087e-05, "loss": 0.3809, "step": 25369 }, { "epoch": 21.053941908713693, "grad_norm": 16.888208389282227, "learning_rate": 1.158273858921162e-05, "loss": 0.381, "step": 25370 }, { "epoch": 21.054771784232365, "grad_norm": 36.16720199584961, "learning_rate": 1.1582406639004152e-05, "loss": 0.5407, "step": 25371 }, { "epoch": 21.055601659751037, "grad_norm": 81.15313720703125, "learning_rate": 1.158207468879668e-05, "loss": 0.4712, "step": 25372 }, { "epoch": 21.05643153526971, "grad_norm": 50.38385009765625, "learning_rate": 1.1581742738589212e-05, "loss": 0.6165, "step": 25373 }, { "epoch": 21.05726141078838, "grad_norm": 70.2305908203125, "learning_rate": 1.1581410788381745e-05, "loss": 0.8139, "step": 25374 }, { "epoch": 21.058091286307054, "grad_norm": 13.543917655944824, "learning_rate": 1.1581078838174275e-05, "loss": 0.2069, "step": 25375 }, { "epoch": 21.058921161825726, "grad_norm": 23.90308952331543, "learning_rate": 1.1580746887966805e-05, "loss": 0.3661, "step": 25376 }, { "epoch": 21.059751037344398, "grad_norm": 32.91831588745117, "learning_rate": 1.1580414937759337e-05, "loss": 0.6187, "step": 25377 }, { "epoch": 21.06058091286307, "grad_norm": 33.43762969970703, "learning_rate": 1.1580082987551868e-05, "loss": 0.4269, "step": 25378 }, { "epoch": 21.061410788381743, "grad_norm": 56.70680618286133, "learning_rate": 1.15797510373444e-05, "loss": 1.0516, "step": 25379 }, { "epoch": 21.062240663900415, "grad_norm": 47.517967224121094, "learning_rate": 1.1579419087136929e-05, "loss": 0.272, "step": 25380 }, { "epoch": 21.063070539419087, "grad_norm": 50.014102935791016, "learning_rate": 1.157908713692946e-05, "loss": 0.4395, "step": 25381 }, { "epoch": 21.06390041493776, "grad_norm": 15.216620445251465, "learning_rate": 1.1578755186721993e-05, "loss": 0.1919, "step": 25382 }, { "epoch": 21.06473029045643, "grad_norm": 51.30482864379883, "learning_rate": 1.1578423236514525e-05, "loss": 0.5134, "step": 25383 }, { "epoch": 21.065560165975104, "grad_norm": 23.328018188476562, "learning_rate": 1.1578091286307054e-05, "loss": 0.414, "step": 25384 }, { "epoch": 21.066390041493776, "grad_norm": 41.65214920043945, "learning_rate": 1.1577759336099586e-05, "loss": 0.44, "step": 25385 }, { "epoch": 21.067219917012448, "grad_norm": 85.43733978271484, "learning_rate": 1.1577427385892116e-05, "loss": 0.7023, "step": 25386 }, { "epoch": 21.06804979253112, "grad_norm": 132.9402313232422, "learning_rate": 1.1577095435684648e-05, "loss": 0.3572, "step": 25387 }, { "epoch": 21.068879668049792, "grad_norm": 44.53165817260742, "learning_rate": 1.157676348547718e-05, "loss": 0.4133, "step": 25388 }, { "epoch": 21.069709543568464, "grad_norm": 15.957858085632324, "learning_rate": 1.157643153526971e-05, "loss": 0.3321, "step": 25389 }, { "epoch": 21.070539419087137, "grad_norm": 25.962360382080078, "learning_rate": 1.1576099585062241e-05, "loss": 0.4934, "step": 25390 }, { "epoch": 21.07136929460581, "grad_norm": 14.683563232421875, "learning_rate": 1.1575767634854773e-05, "loss": 0.417, "step": 25391 }, { "epoch": 21.07219917012448, "grad_norm": 54.67500305175781, "learning_rate": 1.1575435684647306e-05, "loss": 0.9944, "step": 25392 }, { "epoch": 21.073029045643153, "grad_norm": 32.27696228027344, "learning_rate": 1.1575103734439834e-05, "loss": 0.3691, "step": 25393 }, { "epoch": 21.073858921161825, "grad_norm": 92.78306579589844, "learning_rate": 1.1574771784232366e-05, "loss": 0.3486, "step": 25394 }, { "epoch": 21.074688796680498, "grad_norm": 28.202749252319336, "learning_rate": 1.1574439834024897e-05, "loss": 0.8182, "step": 25395 }, { "epoch": 21.07551867219917, "grad_norm": 22.173551559448242, "learning_rate": 1.1574107883817429e-05, "loss": 0.3387, "step": 25396 }, { "epoch": 21.076348547717842, "grad_norm": 40.90922546386719, "learning_rate": 1.157377593360996e-05, "loss": 0.3688, "step": 25397 }, { "epoch": 21.077178423236514, "grad_norm": 68.36106872558594, "learning_rate": 1.157344398340249e-05, "loss": 0.6415, "step": 25398 }, { "epoch": 21.078008298755186, "grad_norm": 17.208669662475586, "learning_rate": 1.1573112033195022e-05, "loss": 0.3857, "step": 25399 }, { "epoch": 21.07883817427386, "grad_norm": 35.87124252319336, "learning_rate": 1.1572780082987554e-05, "loss": 0.3227, "step": 25400 }, { "epoch": 21.07966804979253, "grad_norm": 9.622591972351074, "learning_rate": 1.1572448132780083e-05, "loss": 0.271, "step": 25401 }, { "epoch": 21.080497925311203, "grad_norm": 20.985782623291016, "learning_rate": 1.1572116182572615e-05, "loss": 0.3442, "step": 25402 }, { "epoch": 21.081327800829875, "grad_norm": 27.295705795288086, "learning_rate": 1.1571784232365147e-05, "loss": 0.9942, "step": 25403 }, { "epoch": 21.082157676348547, "grad_norm": 43.436283111572266, "learning_rate": 1.1571452282157677e-05, "loss": 0.6729, "step": 25404 }, { "epoch": 21.08298755186722, "grad_norm": 31.285066604614258, "learning_rate": 1.1571120331950208e-05, "loss": 0.7113, "step": 25405 }, { "epoch": 21.083817427385892, "grad_norm": 41.62228012084961, "learning_rate": 1.157078838174274e-05, "loss": 0.5733, "step": 25406 }, { "epoch": 21.084647302904564, "grad_norm": 24.321807861328125, "learning_rate": 1.157045643153527e-05, "loss": 0.3151, "step": 25407 }, { "epoch": 21.085477178423236, "grad_norm": 47.372459411621094, "learning_rate": 1.1570124481327802e-05, "loss": 0.5387, "step": 25408 }, { "epoch": 21.08630705394191, "grad_norm": 54.56550979614258, "learning_rate": 1.1569792531120331e-05, "loss": 1.0605, "step": 25409 }, { "epoch": 21.08713692946058, "grad_norm": 18.768692016601562, "learning_rate": 1.1569460580912863e-05, "loss": 0.3716, "step": 25410 }, { "epoch": 21.087966804979253, "grad_norm": 34.166629791259766, "learning_rate": 1.1569128630705395e-05, "loss": 0.5964, "step": 25411 }, { "epoch": 21.088796680497925, "grad_norm": 139.42601013183594, "learning_rate": 1.1568796680497927e-05, "loss": 1.0069, "step": 25412 }, { "epoch": 21.089626556016597, "grad_norm": 38.229862213134766, "learning_rate": 1.1568464730290458e-05, "loss": 0.7572, "step": 25413 }, { "epoch": 21.09045643153527, "grad_norm": 90.72972869873047, "learning_rate": 1.1568132780082988e-05, "loss": 0.4653, "step": 25414 }, { "epoch": 21.09128630705394, "grad_norm": 68.3828353881836, "learning_rate": 1.156780082987552e-05, "loss": 1.1008, "step": 25415 }, { "epoch": 21.092116182572614, "grad_norm": 39.551570892333984, "learning_rate": 1.156746887966805e-05, "loss": 0.5084, "step": 25416 }, { "epoch": 21.092946058091286, "grad_norm": 25.34058380126953, "learning_rate": 1.1567136929460583e-05, "loss": 0.3278, "step": 25417 }, { "epoch": 21.093775933609958, "grad_norm": 101.13787078857422, "learning_rate": 1.1566804979253112e-05, "loss": 0.6786, "step": 25418 }, { "epoch": 21.09460580912863, "grad_norm": 59.42803192138672, "learning_rate": 1.1566473029045644e-05, "loss": 0.7005, "step": 25419 }, { "epoch": 21.095435684647303, "grad_norm": 120.1617202758789, "learning_rate": 1.1566141078838176e-05, "loss": 0.3357, "step": 25420 }, { "epoch": 21.096265560165975, "grad_norm": 37.51751708984375, "learning_rate": 1.1565809128630708e-05, "loss": 0.587, "step": 25421 }, { "epoch": 21.097095435684647, "grad_norm": 46.15898513793945, "learning_rate": 1.1565477178423237e-05, "loss": 0.4741, "step": 25422 }, { "epoch": 21.09792531120332, "grad_norm": 46.52073287963867, "learning_rate": 1.1565145228215769e-05, "loss": 0.3325, "step": 25423 }, { "epoch": 21.09875518672199, "grad_norm": 44.14128112792969, "learning_rate": 1.15648132780083e-05, "loss": 0.7437, "step": 25424 }, { "epoch": 21.099585062240664, "grad_norm": 21.220827102661133, "learning_rate": 1.1564481327800831e-05, "loss": 0.255, "step": 25425 }, { "epoch": 21.100414937759336, "grad_norm": 114.89814758300781, "learning_rate": 1.1564149377593362e-05, "loss": 0.498, "step": 25426 }, { "epoch": 21.101244813278008, "grad_norm": 53.237552642822266, "learning_rate": 1.1563817427385892e-05, "loss": 0.5429, "step": 25427 }, { "epoch": 21.10207468879668, "grad_norm": 14.899142265319824, "learning_rate": 1.1563485477178424e-05, "loss": 0.2603, "step": 25428 }, { "epoch": 21.102904564315352, "grad_norm": 48.07515335083008, "learning_rate": 1.1563153526970956e-05, "loss": 0.4219, "step": 25429 }, { "epoch": 21.103734439834025, "grad_norm": 36.58616638183594, "learning_rate": 1.1562821576763485e-05, "loss": 0.381, "step": 25430 }, { "epoch": 21.104564315352697, "grad_norm": 21.250308990478516, "learning_rate": 1.1562489626556017e-05, "loss": 0.3912, "step": 25431 }, { "epoch": 21.10539419087137, "grad_norm": 92.14789581298828, "learning_rate": 1.156215767634855e-05, "loss": 0.9124, "step": 25432 }, { "epoch": 21.10622406639004, "grad_norm": 35.707130432128906, "learning_rate": 1.156182572614108e-05, "loss": 0.7891, "step": 25433 }, { "epoch": 21.107053941908713, "grad_norm": 41.91947937011719, "learning_rate": 1.156149377593361e-05, "loss": 0.5444, "step": 25434 }, { "epoch": 21.107883817427386, "grad_norm": 42.942359924316406, "learning_rate": 1.1561161825726142e-05, "loss": 0.6398, "step": 25435 }, { "epoch": 21.108713692946058, "grad_norm": 17.321426391601562, "learning_rate": 1.1560829875518673e-05, "loss": 0.2625, "step": 25436 }, { "epoch": 21.10954356846473, "grad_norm": 54.51597595214844, "learning_rate": 1.1560497925311205e-05, "loss": 0.3389, "step": 25437 }, { "epoch": 21.110373443983402, "grad_norm": 111.02061462402344, "learning_rate": 1.1560165975103735e-05, "loss": 0.8135, "step": 25438 }, { "epoch": 21.111203319502074, "grad_norm": 19.46518898010254, "learning_rate": 1.1559834024896265e-05, "loss": 0.3121, "step": 25439 }, { "epoch": 21.112033195020746, "grad_norm": 19.282026290893555, "learning_rate": 1.1559502074688798e-05, "loss": 0.4143, "step": 25440 }, { "epoch": 21.11286307053942, "grad_norm": 34.33344268798828, "learning_rate": 1.155917012448133e-05, "loss": 0.4394, "step": 25441 }, { "epoch": 21.11369294605809, "grad_norm": 39.95151901245117, "learning_rate": 1.155883817427386e-05, "loss": 0.5092, "step": 25442 }, { "epoch": 21.114522821576763, "grad_norm": 11.972146034240723, "learning_rate": 1.155850622406639e-05, "loss": 0.3213, "step": 25443 }, { "epoch": 21.115352697095435, "grad_norm": 54.05147171020508, "learning_rate": 1.1558174273858923e-05, "loss": 0.7729, "step": 25444 }, { "epoch": 21.116182572614107, "grad_norm": 58.82754135131836, "learning_rate": 1.1557842323651453e-05, "loss": 0.7073, "step": 25445 }, { "epoch": 21.11701244813278, "grad_norm": 25.564048767089844, "learning_rate": 1.1557510373443985e-05, "loss": 0.467, "step": 25446 }, { "epoch": 21.117842323651452, "grad_norm": 66.75028228759766, "learning_rate": 1.1557178423236516e-05, "loss": 1.0319, "step": 25447 }, { "epoch": 21.118672199170124, "grad_norm": 49.73345947265625, "learning_rate": 1.1556846473029046e-05, "loss": 0.9804, "step": 25448 }, { "epoch": 21.119502074688796, "grad_norm": 51.49262619018555, "learning_rate": 1.1556514522821578e-05, "loss": 0.5165, "step": 25449 }, { "epoch": 21.12033195020747, "grad_norm": 54.311336517333984, "learning_rate": 1.155618257261411e-05, "loss": 0.4145, "step": 25450 }, { "epoch": 21.12116182572614, "grad_norm": 24.869365692138672, "learning_rate": 1.1555850622406639e-05, "loss": 0.2941, "step": 25451 }, { "epoch": 21.121991701244813, "grad_norm": 79.96624755859375, "learning_rate": 1.1555518672199171e-05, "loss": 0.5842, "step": 25452 }, { "epoch": 21.122821576763485, "grad_norm": 19.487625122070312, "learning_rate": 1.1555186721991703e-05, "loss": 0.3039, "step": 25453 }, { "epoch": 21.123651452282157, "grad_norm": 29.44561195373535, "learning_rate": 1.1554854771784234e-05, "loss": 0.3972, "step": 25454 }, { "epoch": 21.12448132780083, "grad_norm": 61.953495025634766, "learning_rate": 1.1554522821576764e-05, "loss": 0.6187, "step": 25455 }, { "epoch": 21.1253112033195, "grad_norm": 92.1985855102539, "learning_rate": 1.1554190871369294e-05, "loss": 0.4775, "step": 25456 }, { "epoch": 21.126141078838174, "grad_norm": 34.24755096435547, "learning_rate": 1.1553858921161826e-05, "loss": 0.4083, "step": 25457 }, { "epoch": 21.126970954356846, "grad_norm": 39.69024658203125, "learning_rate": 1.1553526970954359e-05, "loss": 0.864, "step": 25458 }, { "epoch": 21.127800829875518, "grad_norm": 20.25880241394043, "learning_rate": 1.1553195020746887e-05, "loss": 0.343, "step": 25459 }, { "epoch": 21.12863070539419, "grad_norm": 103.02143096923828, "learning_rate": 1.155286307053942e-05, "loss": 0.2806, "step": 25460 }, { "epoch": 21.129460580912863, "grad_norm": 13.985121726989746, "learning_rate": 1.1552531120331952e-05, "loss": 0.2367, "step": 25461 }, { "epoch": 21.130290456431535, "grad_norm": 50.936134338378906, "learning_rate": 1.1552199170124484e-05, "loss": 0.6083, "step": 25462 }, { "epoch": 21.131120331950207, "grad_norm": 22.498279571533203, "learning_rate": 1.1551867219917012e-05, "loss": 0.3514, "step": 25463 }, { "epoch": 21.13195020746888, "grad_norm": 25.60974884033203, "learning_rate": 1.1551535269709544e-05, "loss": 0.4728, "step": 25464 }, { "epoch": 21.13278008298755, "grad_norm": 51.91370391845703, "learning_rate": 1.1551203319502075e-05, "loss": 0.7522, "step": 25465 }, { "epoch": 21.133609958506224, "grad_norm": 42.381126403808594, "learning_rate": 1.1550871369294607e-05, "loss": 0.3252, "step": 25466 }, { "epoch": 21.134439834024896, "grad_norm": 118.43680572509766, "learning_rate": 1.1550539419087139e-05, "loss": 0.6483, "step": 25467 }, { "epoch": 21.135269709543568, "grad_norm": 25.913854598999023, "learning_rate": 1.1550207468879668e-05, "loss": 0.3405, "step": 25468 }, { "epoch": 21.13609958506224, "grad_norm": 44.907569885253906, "learning_rate": 1.15498755186722e-05, "loss": 0.5123, "step": 25469 }, { "epoch": 21.136929460580912, "grad_norm": 24.819366455078125, "learning_rate": 1.1549543568464732e-05, "loss": 0.4708, "step": 25470 }, { "epoch": 21.137759336099585, "grad_norm": 83.4653549194336, "learning_rate": 1.1549211618257264e-05, "loss": 0.4168, "step": 25471 }, { "epoch": 21.138589211618257, "grad_norm": 30.83622932434082, "learning_rate": 1.1548879668049793e-05, "loss": 0.579, "step": 25472 }, { "epoch": 21.13941908713693, "grad_norm": 21.693828582763672, "learning_rate": 1.1548547717842325e-05, "loss": 0.3049, "step": 25473 }, { "epoch": 21.1402489626556, "grad_norm": 35.82440948486328, "learning_rate": 1.1548215767634855e-05, "loss": 0.5106, "step": 25474 }, { "epoch": 21.141078838174273, "grad_norm": 46.8291130065918, "learning_rate": 1.1547883817427387e-05, "loss": 0.4978, "step": 25475 }, { "epoch": 21.141908713692946, "grad_norm": 49.966064453125, "learning_rate": 1.1547551867219918e-05, "loss": 0.7121, "step": 25476 }, { "epoch": 21.142738589211618, "grad_norm": 56.476661682128906, "learning_rate": 1.1547219917012448e-05, "loss": 0.6889, "step": 25477 }, { "epoch": 21.14356846473029, "grad_norm": 23.051013946533203, "learning_rate": 1.154688796680498e-05, "loss": 0.3134, "step": 25478 }, { "epoch": 21.144398340248962, "grad_norm": 22.22833824157715, "learning_rate": 1.1546556016597513e-05, "loss": 0.6445, "step": 25479 }, { "epoch": 21.145228215767634, "grad_norm": 23.13085174560547, "learning_rate": 1.1546224066390041e-05, "loss": 0.3349, "step": 25480 }, { "epoch": 21.146058091286307, "grad_norm": 31.335899353027344, "learning_rate": 1.1545892116182573e-05, "loss": 0.2929, "step": 25481 }, { "epoch": 21.14688796680498, "grad_norm": 23.301889419555664, "learning_rate": 1.1545560165975105e-05, "loss": 0.3236, "step": 25482 }, { "epoch": 21.14771784232365, "grad_norm": 99.00537872314453, "learning_rate": 1.1545228215767636e-05, "loss": 1.2558, "step": 25483 }, { "epoch": 21.148547717842323, "grad_norm": 40.99671173095703, "learning_rate": 1.1544896265560166e-05, "loss": 0.4083, "step": 25484 }, { "epoch": 21.149377593360995, "grad_norm": 72.49638366699219, "learning_rate": 1.1544564315352698e-05, "loss": 0.8495, "step": 25485 }, { "epoch": 21.150207468879668, "grad_norm": 35.30818176269531, "learning_rate": 1.1544232365145229e-05, "loss": 0.6571, "step": 25486 }, { "epoch": 21.15103734439834, "grad_norm": 46.18763732910156, "learning_rate": 1.1543900414937761e-05, "loss": 0.6666, "step": 25487 }, { "epoch": 21.151867219917012, "grad_norm": 9.816570281982422, "learning_rate": 1.154356846473029e-05, "loss": 0.2099, "step": 25488 }, { "epoch": 21.152697095435684, "grad_norm": 30.531841278076172, "learning_rate": 1.1543236514522822e-05, "loss": 0.382, "step": 25489 }, { "epoch": 21.153526970954356, "grad_norm": 92.81005096435547, "learning_rate": 1.1542904564315354e-05, "loss": 0.8776, "step": 25490 }, { "epoch": 21.15435684647303, "grad_norm": 88.39568328857422, "learning_rate": 1.1542572614107886e-05, "loss": 0.4305, "step": 25491 }, { "epoch": 21.1551867219917, "grad_norm": 55.85063934326172, "learning_rate": 1.1542240663900416e-05, "loss": 0.4269, "step": 25492 }, { "epoch": 21.156016597510373, "grad_norm": 12.843761444091797, "learning_rate": 1.1541908713692947e-05, "loss": 0.2179, "step": 25493 }, { "epoch": 21.156846473029045, "grad_norm": 32.27701950073242, "learning_rate": 1.1541576763485479e-05, "loss": 0.5533, "step": 25494 }, { "epoch": 21.157676348547717, "grad_norm": 72.1642837524414, "learning_rate": 1.154124481327801e-05, "loss": 0.6468, "step": 25495 }, { "epoch": 21.15850622406639, "grad_norm": 65.04100036621094, "learning_rate": 1.1540912863070541e-05, "loss": 0.8659, "step": 25496 }, { "epoch": 21.15933609958506, "grad_norm": 48.805389404296875, "learning_rate": 1.154058091286307e-05, "loss": 0.3736, "step": 25497 }, { "epoch": 21.160165975103734, "grad_norm": 40.18868637084961, "learning_rate": 1.1540248962655602e-05, "loss": 0.3833, "step": 25498 }, { "epoch": 21.160995850622406, "grad_norm": 32.48764419555664, "learning_rate": 1.1539917012448134e-05, "loss": 0.308, "step": 25499 }, { "epoch": 21.16182572614108, "grad_norm": 45.530731201171875, "learning_rate": 1.1539585062240666e-05, "loss": 0.6637, "step": 25500 }, { "epoch": 21.16265560165975, "grad_norm": 46.74814224243164, "learning_rate": 1.1539253112033195e-05, "loss": 0.5162, "step": 25501 }, { "epoch": 21.163485477178423, "grad_norm": 19.63306427001953, "learning_rate": 1.1538921161825727e-05, "loss": 0.3023, "step": 25502 }, { "epoch": 21.164315352697095, "grad_norm": 90.77879333496094, "learning_rate": 1.1538589211618258e-05, "loss": 0.7279, "step": 25503 }, { "epoch": 21.165145228215767, "grad_norm": 44.563201904296875, "learning_rate": 1.153825726141079e-05, "loss": 0.8817, "step": 25504 }, { "epoch": 21.16597510373444, "grad_norm": 151.55967712402344, "learning_rate": 1.153792531120332e-05, "loss": 0.4762, "step": 25505 }, { "epoch": 21.16680497925311, "grad_norm": 69.95704650878906, "learning_rate": 1.153759336099585e-05, "loss": 0.5466, "step": 25506 }, { "epoch": 21.167634854771784, "grad_norm": 133.2249755859375, "learning_rate": 1.1537261410788383e-05, "loss": 0.7248, "step": 25507 }, { "epoch": 21.168464730290456, "grad_norm": 24.778234481811523, "learning_rate": 1.1536929460580915e-05, "loss": 0.3503, "step": 25508 }, { "epoch": 21.169294605809128, "grad_norm": 30.35495376586914, "learning_rate": 1.1536597510373444e-05, "loss": 0.5664, "step": 25509 }, { "epoch": 21.1701244813278, "grad_norm": 12.201026916503906, "learning_rate": 1.1536265560165976e-05, "loss": 0.3241, "step": 25510 }, { "epoch": 21.170954356846472, "grad_norm": 99.0828857421875, "learning_rate": 1.1535933609958508e-05, "loss": 0.4455, "step": 25511 }, { "epoch": 21.171784232365145, "grad_norm": 30.732830047607422, "learning_rate": 1.1535601659751038e-05, "loss": 0.5381, "step": 25512 }, { "epoch": 21.172614107883817, "grad_norm": 30.132253646850586, "learning_rate": 1.1535269709543569e-05, "loss": 0.3504, "step": 25513 }, { "epoch": 21.17344398340249, "grad_norm": 20.087303161621094, "learning_rate": 1.15349377593361e-05, "loss": 0.3417, "step": 25514 }, { "epoch": 21.17427385892116, "grad_norm": 34.57375717163086, "learning_rate": 1.1534605809128631e-05, "loss": 0.4858, "step": 25515 }, { "epoch": 21.175103734439833, "grad_norm": 28.995532989501953, "learning_rate": 1.1534273858921163e-05, "loss": 0.56, "step": 25516 }, { "epoch": 21.175933609958506, "grad_norm": 41.3264045715332, "learning_rate": 1.1533941908713692e-05, "loss": 0.6338, "step": 25517 }, { "epoch": 21.176763485477178, "grad_norm": 15.304985046386719, "learning_rate": 1.1533609958506224e-05, "loss": 0.3296, "step": 25518 }, { "epoch": 21.17759336099585, "grad_norm": 38.69384765625, "learning_rate": 1.1533278008298756e-05, "loss": 0.6447, "step": 25519 }, { "epoch": 21.178423236514522, "grad_norm": 46.99766159057617, "learning_rate": 1.1532946058091288e-05, "loss": 0.5593, "step": 25520 }, { "epoch": 21.179253112033194, "grad_norm": 51.987247467041016, "learning_rate": 1.1532614107883819e-05, "loss": 1.0232, "step": 25521 }, { "epoch": 21.180082987551867, "grad_norm": 26.287996292114258, "learning_rate": 1.1532282157676349e-05, "loss": 0.3735, "step": 25522 }, { "epoch": 21.18091286307054, "grad_norm": 49.77684020996094, "learning_rate": 1.1531950207468881e-05, "loss": 0.925, "step": 25523 }, { "epoch": 21.18174273858921, "grad_norm": 34.93507766723633, "learning_rate": 1.1531618257261412e-05, "loss": 0.776, "step": 25524 }, { "epoch": 21.182572614107883, "grad_norm": 14.159074783325195, "learning_rate": 1.1531286307053944e-05, "loss": 0.2683, "step": 25525 }, { "epoch": 21.183402489626555, "grad_norm": 17.098180770874023, "learning_rate": 1.1530954356846472e-05, "loss": 0.2994, "step": 25526 }, { "epoch": 21.184232365145228, "grad_norm": 22.150381088256836, "learning_rate": 1.1530622406639005e-05, "loss": 0.3615, "step": 25527 }, { "epoch": 21.1850622406639, "grad_norm": 38.40119171142578, "learning_rate": 1.1530290456431537e-05, "loss": 0.6742, "step": 25528 }, { "epoch": 21.185892116182572, "grad_norm": 32.902069091796875, "learning_rate": 1.1529958506224069e-05, "loss": 0.7017, "step": 25529 }, { "epoch": 21.186721991701244, "grad_norm": 43.08153533935547, "learning_rate": 1.1529626556016597e-05, "loss": 0.3316, "step": 25530 }, { "epoch": 21.187551867219916, "grad_norm": 32.3986701965332, "learning_rate": 1.152929460580913e-05, "loss": 0.5264, "step": 25531 }, { "epoch": 21.18838174273859, "grad_norm": 53.25448989868164, "learning_rate": 1.1528962655601662e-05, "loss": 0.295, "step": 25532 }, { "epoch": 21.18921161825726, "grad_norm": 48.7035026550293, "learning_rate": 1.1528630705394192e-05, "loss": 0.4231, "step": 25533 }, { "epoch": 21.190041493775933, "grad_norm": 38.75851058959961, "learning_rate": 1.1528298755186723e-05, "loss": 0.5872, "step": 25534 }, { "epoch": 21.190871369294605, "grad_norm": 10.098859786987305, "learning_rate": 1.1527966804979253e-05, "loss": 0.3018, "step": 25535 }, { "epoch": 21.191701244813277, "grad_norm": 19.316404342651367, "learning_rate": 1.1527634854771785e-05, "loss": 0.2567, "step": 25536 }, { "epoch": 21.19253112033195, "grad_norm": 50.35611343383789, "learning_rate": 1.1527302904564317e-05, "loss": 0.9363, "step": 25537 }, { "epoch": 21.19336099585062, "grad_norm": 62.824581146240234, "learning_rate": 1.1526970954356846e-05, "loss": 0.5661, "step": 25538 }, { "epoch": 21.194190871369294, "grad_norm": 24.939239501953125, "learning_rate": 1.1526639004149378e-05, "loss": 0.2831, "step": 25539 }, { "epoch": 21.195020746887966, "grad_norm": 108.82495880126953, "learning_rate": 1.152630705394191e-05, "loss": 0.5014, "step": 25540 }, { "epoch": 21.19585062240664, "grad_norm": 26.38905143737793, "learning_rate": 1.1525975103734442e-05, "loss": 0.322, "step": 25541 }, { "epoch": 21.19668049792531, "grad_norm": 19.760765075683594, "learning_rate": 1.1525643153526971e-05, "loss": 0.3115, "step": 25542 }, { "epoch": 21.197510373443983, "grad_norm": 36.32806396484375, "learning_rate": 1.1525311203319503e-05, "loss": 0.8328, "step": 25543 }, { "epoch": 21.198340248962655, "grad_norm": 87.53820037841797, "learning_rate": 1.1524979253112033e-05, "loss": 0.3665, "step": 25544 }, { "epoch": 21.199170124481327, "grad_norm": 35.95024108886719, "learning_rate": 1.1524647302904566e-05, "loss": 0.8633, "step": 25545 }, { "epoch": 21.2, "grad_norm": 44.20783615112305, "learning_rate": 1.1524315352697098e-05, "loss": 0.76, "step": 25546 }, { "epoch": 21.20082987551867, "grad_norm": 17.05226707458496, "learning_rate": 1.1523983402489626e-05, "loss": 0.2347, "step": 25547 }, { "epoch": 21.201659751037344, "grad_norm": 90.84508514404297, "learning_rate": 1.1523651452282158e-05, "loss": 0.6686, "step": 25548 }, { "epoch": 21.202489626556016, "grad_norm": 87.94279479980469, "learning_rate": 1.152331950207469e-05, "loss": 0.7728, "step": 25549 }, { "epoch": 21.203319502074688, "grad_norm": 74.03512573242188, "learning_rate": 1.1522987551867221e-05, "loss": 0.549, "step": 25550 }, { "epoch": 21.20414937759336, "grad_norm": 21.34310531616211, "learning_rate": 1.1522655601659751e-05, "loss": 0.3319, "step": 25551 }, { "epoch": 21.204979253112032, "grad_norm": 46.15092849731445, "learning_rate": 1.1522323651452284e-05, "loss": 0.5578, "step": 25552 }, { "epoch": 21.205809128630705, "grad_norm": 44.962650299072266, "learning_rate": 1.1521991701244814e-05, "loss": 0.4168, "step": 25553 }, { "epoch": 21.206639004149377, "grad_norm": 90.37028503417969, "learning_rate": 1.1521659751037346e-05, "loss": 1.1519, "step": 25554 }, { "epoch": 21.20746887966805, "grad_norm": 88.47205352783203, "learning_rate": 1.1521327800829876e-05, "loss": 0.959, "step": 25555 }, { "epoch": 21.20829875518672, "grad_norm": 24.771530151367188, "learning_rate": 1.1520995850622407e-05, "loss": 0.3936, "step": 25556 }, { "epoch": 21.209128630705393, "grad_norm": 51.29042434692383, "learning_rate": 1.1520663900414939e-05, "loss": 0.9291, "step": 25557 }, { "epoch": 21.209958506224066, "grad_norm": 31.239391326904297, "learning_rate": 1.1520331950207471e-05, "loss": 0.4457, "step": 25558 }, { "epoch": 21.210788381742738, "grad_norm": 33.155975341796875, "learning_rate": 1.152e-05, "loss": 0.4026, "step": 25559 }, { "epoch": 21.21161825726141, "grad_norm": 13.237593650817871, "learning_rate": 1.1519668049792532e-05, "loss": 0.2762, "step": 25560 }, { "epoch": 21.212448132780082, "grad_norm": 39.061241149902344, "learning_rate": 1.1519336099585064e-05, "loss": 0.4682, "step": 25561 }, { "epoch": 21.213278008298754, "grad_norm": 45.97096633911133, "learning_rate": 1.1519004149377594e-05, "loss": 0.3522, "step": 25562 }, { "epoch": 21.214107883817427, "grad_norm": 52.55274963378906, "learning_rate": 1.1518672199170125e-05, "loss": 0.6575, "step": 25563 }, { "epoch": 21.2149377593361, "grad_norm": 128.2119140625, "learning_rate": 1.1518340248962657e-05, "loss": 1.7313, "step": 25564 }, { "epoch": 21.21576763485477, "grad_norm": 15.552796363830566, "learning_rate": 1.1518008298755187e-05, "loss": 0.2227, "step": 25565 }, { "epoch": 21.216597510373443, "grad_norm": 30.899150848388672, "learning_rate": 1.151767634854772e-05, "loss": 0.4154, "step": 25566 }, { "epoch": 21.217427385892115, "grad_norm": 95.06758117675781, "learning_rate": 1.1517344398340248e-05, "loss": 0.5715, "step": 25567 }, { "epoch": 21.218257261410788, "grad_norm": 27.404979705810547, "learning_rate": 1.151701244813278e-05, "loss": 0.4241, "step": 25568 }, { "epoch": 21.21908713692946, "grad_norm": 32.54257583618164, "learning_rate": 1.1516680497925312e-05, "loss": 0.5123, "step": 25569 }, { "epoch": 21.219917012448132, "grad_norm": 79.03857421875, "learning_rate": 1.1516348547717845e-05, "loss": 0.6824, "step": 25570 }, { "epoch": 21.220746887966804, "grad_norm": 61.98723220825195, "learning_rate": 1.1516016597510375e-05, "loss": 0.5613, "step": 25571 }, { "epoch": 21.221576763485476, "grad_norm": 41.79189682006836, "learning_rate": 1.1515684647302905e-05, "loss": 0.4707, "step": 25572 }, { "epoch": 21.22240663900415, "grad_norm": 44.79808807373047, "learning_rate": 1.1515352697095436e-05, "loss": 0.4869, "step": 25573 }, { "epoch": 21.22323651452282, "grad_norm": 44.838993072509766, "learning_rate": 1.1515020746887968e-05, "loss": 0.3107, "step": 25574 }, { "epoch": 21.224066390041493, "grad_norm": 47.0797233581543, "learning_rate": 1.15146887966805e-05, "loss": 0.4488, "step": 25575 }, { "epoch": 21.224896265560165, "grad_norm": 61.30029296875, "learning_rate": 1.1514356846473029e-05, "loss": 0.9781, "step": 25576 }, { "epoch": 21.225726141078837, "grad_norm": 30.10378646850586, "learning_rate": 1.151402489626556e-05, "loss": 0.6512, "step": 25577 }, { "epoch": 21.22655601659751, "grad_norm": 78.4212875366211, "learning_rate": 1.1513692946058093e-05, "loss": 0.7044, "step": 25578 }, { "epoch": 21.22738589211618, "grad_norm": 36.3818359375, "learning_rate": 1.1513360995850625e-05, "loss": 0.5539, "step": 25579 }, { "epoch": 21.228215767634854, "grad_norm": 20.949506759643555, "learning_rate": 1.1513029045643154e-05, "loss": 0.3421, "step": 25580 }, { "epoch": 21.229045643153526, "grad_norm": 34.670310974121094, "learning_rate": 1.1512697095435686e-05, "loss": 0.8362, "step": 25581 }, { "epoch": 21.2298755186722, "grad_norm": 31.8924617767334, "learning_rate": 1.1512365145228216e-05, "loss": 0.4463, "step": 25582 }, { "epoch": 21.23070539419087, "grad_norm": 33.81136703491211, "learning_rate": 1.1512033195020748e-05, "loss": 0.3667, "step": 25583 }, { "epoch": 21.231535269709543, "grad_norm": 27.349754333496094, "learning_rate": 1.1511701244813279e-05, "loss": 0.51, "step": 25584 }, { "epoch": 21.232365145228215, "grad_norm": 40.81522750854492, "learning_rate": 1.151136929460581e-05, "loss": 0.4021, "step": 25585 }, { "epoch": 21.233195020746887, "grad_norm": 19.46596336364746, "learning_rate": 1.1511037344398341e-05, "loss": 0.3163, "step": 25586 }, { "epoch": 21.23402489626556, "grad_norm": 43.61155700683594, "learning_rate": 1.1510705394190873e-05, "loss": 0.3953, "step": 25587 }, { "epoch": 21.23485477178423, "grad_norm": 42.112300872802734, "learning_rate": 1.1510373443983402e-05, "loss": 0.5371, "step": 25588 }, { "epoch": 21.235684647302904, "grad_norm": 20.738290786743164, "learning_rate": 1.1510041493775934e-05, "loss": 0.3834, "step": 25589 }, { "epoch": 21.236514522821576, "grad_norm": 25.36322593688965, "learning_rate": 1.1509709543568466e-05, "loss": 0.5523, "step": 25590 }, { "epoch": 21.237344398340248, "grad_norm": 40.205196380615234, "learning_rate": 1.1509377593360997e-05, "loss": 0.2389, "step": 25591 }, { "epoch": 21.23817427385892, "grad_norm": 43.85402297973633, "learning_rate": 1.1509045643153527e-05, "loss": 0.7125, "step": 25592 }, { "epoch": 21.239004149377593, "grad_norm": 41.10438537597656, "learning_rate": 1.150871369294606e-05, "loss": 1.0106, "step": 25593 }, { "epoch": 21.239834024896265, "grad_norm": 25.020313262939453, "learning_rate": 1.150838174273859e-05, "loss": 0.6384, "step": 25594 }, { "epoch": 21.240663900414937, "grad_norm": 25.554244995117188, "learning_rate": 1.1508049792531122e-05, "loss": 0.4282, "step": 25595 }, { "epoch": 21.24149377593361, "grad_norm": 38.46757888793945, "learning_rate": 1.150771784232365e-05, "loss": 0.7073, "step": 25596 }, { "epoch": 21.24232365145228, "grad_norm": 51.02778244018555, "learning_rate": 1.1507385892116183e-05, "loss": 0.5276, "step": 25597 }, { "epoch": 21.243153526970953, "grad_norm": 37.80741882324219, "learning_rate": 1.1507053941908715e-05, "loss": 0.4746, "step": 25598 }, { "epoch": 21.243983402489626, "grad_norm": 22.11552619934082, "learning_rate": 1.1506721991701247e-05, "loss": 0.2428, "step": 25599 }, { "epoch": 21.244813278008298, "grad_norm": 95.015869140625, "learning_rate": 1.1506390041493777e-05, "loss": 0.5648, "step": 25600 }, { "epoch": 21.24564315352697, "grad_norm": 55.909996032714844, "learning_rate": 1.1506058091286308e-05, "loss": 0.6883, "step": 25601 }, { "epoch": 21.246473029045642, "grad_norm": 34.99110794067383, "learning_rate": 1.150572614107884e-05, "loss": 0.3561, "step": 25602 }, { "epoch": 21.247302904564314, "grad_norm": 68.34549713134766, "learning_rate": 1.150539419087137e-05, "loss": 0.4477, "step": 25603 }, { "epoch": 21.248132780082987, "grad_norm": 43.82637023925781, "learning_rate": 1.1505062240663902e-05, "loss": 1.1645, "step": 25604 }, { "epoch": 21.24896265560166, "grad_norm": 60.36518859863281, "learning_rate": 1.1504730290456431e-05, "loss": 0.7828, "step": 25605 }, { "epoch": 21.24979253112033, "grad_norm": 47.82870101928711, "learning_rate": 1.1504398340248963e-05, "loss": 0.4877, "step": 25606 }, { "epoch": 21.250622406639003, "grad_norm": 24.69027328491211, "learning_rate": 1.1504066390041495e-05, "loss": 0.3354, "step": 25607 }, { "epoch": 21.251452282157675, "grad_norm": 38.069759368896484, "learning_rate": 1.1503734439834027e-05, "loss": 0.4126, "step": 25608 }, { "epoch": 21.252282157676348, "grad_norm": 18.55552864074707, "learning_rate": 1.1503402489626556e-05, "loss": 0.2506, "step": 25609 }, { "epoch": 21.25311203319502, "grad_norm": 25.947376251220703, "learning_rate": 1.1503070539419088e-05, "loss": 0.4073, "step": 25610 }, { "epoch": 21.253941908713692, "grad_norm": 54.31667709350586, "learning_rate": 1.150273858921162e-05, "loss": 0.453, "step": 25611 }, { "epoch": 21.254771784232364, "grad_norm": 27.883275985717773, "learning_rate": 1.150240663900415e-05, "loss": 0.3404, "step": 25612 }, { "epoch": 21.255601659751036, "grad_norm": 34.78464889526367, "learning_rate": 1.1502074688796681e-05, "loss": 0.3405, "step": 25613 }, { "epoch": 21.25643153526971, "grad_norm": 8.465738296508789, "learning_rate": 1.1501742738589212e-05, "loss": 0.2199, "step": 25614 }, { "epoch": 21.25726141078838, "grad_norm": 27.881500244140625, "learning_rate": 1.1501410788381744e-05, "loss": 0.4137, "step": 25615 }, { "epoch": 21.258091286307053, "grad_norm": 32.39488220214844, "learning_rate": 1.1501078838174276e-05, "loss": 0.4104, "step": 25616 }, { "epoch": 21.258921161825725, "grad_norm": 46.10076141357422, "learning_rate": 1.1500746887966804e-05, "loss": 0.5054, "step": 25617 }, { "epoch": 21.259751037344397, "grad_norm": 103.24410247802734, "learning_rate": 1.1500414937759337e-05, "loss": 1.69, "step": 25618 }, { "epoch": 21.26058091286307, "grad_norm": 44.79558563232422, "learning_rate": 1.1500082987551869e-05, "loss": 0.4054, "step": 25619 }, { "epoch": 21.261410788381742, "grad_norm": 34.903995513916016, "learning_rate": 1.1499751037344399e-05, "loss": 0.5322, "step": 25620 }, { "epoch": 21.262240663900414, "grad_norm": 28.871017456054688, "learning_rate": 1.149941908713693e-05, "loss": 0.4971, "step": 25621 }, { "epoch": 21.263070539419086, "grad_norm": 51.081844329833984, "learning_rate": 1.1499087136929462e-05, "loss": 0.2968, "step": 25622 }, { "epoch": 21.26390041493776, "grad_norm": 170.27096557617188, "learning_rate": 1.1498755186721992e-05, "loss": 0.5453, "step": 25623 }, { "epoch": 21.26473029045643, "grad_norm": 46.27880859375, "learning_rate": 1.1498423236514524e-05, "loss": 1.057, "step": 25624 }, { "epoch": 21.265560165975103, "grad_norm": 78.80746459960938, "learning_rate": 1.1498091286307056e-05, "loss": 1.1486, "step": 25625 }, { "epoch": 21.266390041493775, "grad_norm": 40.99831008911133, "learning_rate": 1.1497759336099585e-05, "loss": 0.4472, "step": 25626 }, { "epoch": 21.267219917012447, "grad_norm": 317.51519775390625, "learning_rate": 1.1497427385892117e-05, "loss": 0.6516, "step": 25627 }, { "epoch": 21.26804979253112, "grad_norm": 36.82558059692383, "learning_rate": 1.149709543568465e-05, "loss": 0.7921, "step": 25628 }, { "epoch": 21.26887966804979, "grad_norm": 32.939979553222656, "learning_rate": 1.149676348547718e-05, "loss": 0.3497, "step": 25629 }, { "epoch": 21.269709543568464, "grad_norm": 35.33080291748047, "learning_rate": 1.149643153526971e-05, "loss": 0.4364, "step": 25630 }, { "epoch": 21.270539419087136, "grad_norm": 80.58748626708984, "learning_rate": 1.1496099585062242e-05, "loss": 0.9502, "step": 25631 }, { "epoch": 21.271369294605808, "grad_norm": 42.897499084472656, "learning_rate": 1.1495767634854773e-05, "loss": 0.7322, "step": 25632 }, { "epoch": 21.27219917012448, "grad_norm": 34.90504837036133, "learning_rate": 1.1495435684647305e-05, "loss": 0.6124, "step": 25633 }, { "epoch": 21.273029045643153, "grad_norm": 17.555049896240234, "learning_rate": 1.1495103734439833e-05, "loss": 0.4087, "step": 25634 }, { "epoch": 21.273858921161825, "grad_norm": 62.87807083129883, "learning_rate": 1.1494771784232365e-05, "loss": 0.5428, "step": 25635 }, { "epoch": 21.274688796680497, "grad_norm": 21.546430587768555, "learning_rate": 1.1494439834024898e-05, "loss": 0.3041, "step": 25636 }, { "epoch": 21.27551867219917, "grad_norm": 44.727962493896484, "learning_rate": 1.149410788381743e-05, "loss": 0.4101, "step": 25637 }, { "epoch": 21.27634854771784, "grad_norm": 53.21896743774414, "learning_rate": 1.1493775933609958e-05, "loss": 0.4187, "step": 25638 }, { "epoch": 21.277178423236514, "grad_norm": 30.64746856689453, "learning_rate": 1.149344398340249e-05, "loss": 0.7006, "step": 25639 }, { "epoch": 21.278008298755186, "grad_norm": 122.28244018554688, "learning_rate": 1.1493112033195023e-05, "loss": 0.6632, "step": 25640 }, { "epoch": 21.278838174273858, "grad_norm": 66.56785583496094, "learning_rate": 1.1492780082987553e-05, "loss": 0.4915, "step": 25641 }, { "epoch": 21.27966804979253, "grad_norm": 28.5828857421875, "learning_rate": 1.1492448132780083e-05, "loss": 0.3572, "step": 25642 }, { "epoch": 21.280497925311202, "grad_norm": 63.117881774902344, "learning_rate": 1.1492116182572614e-05, "loss": 0.5642, "step": 25643 }, { "epoch": 21.281327800829875, "grad_norm": 59.047142028808594, "learning_rate": 1.1491784232365146e-05, "loss": 0.8366, "step": 25644 }, { "epoch": 21.282157676348547, "grad_norm": 37.74636459350586, "learning_rate": 1.1491452282157678e-05, "loss": 0.6363, "step": 25645 }, { "epoch": 21.28298755186722, "grad_norm": 50.81167221069336, "learning_rate": 1.1491120331950207e-05, "loss": 0.698, "step": 25646 }, { "epoch": 21.28381742738589, "grad_norm": 64.57793426513672, "learning_rate": 1.1490788381742739e-05, "loss": 0.8365, "step": 25647 }, { "epoch": 21.284647302904563, "grad_norm": 69.25404357910156, "learning_rate": 1.1490456431535271e-05, "loss": 0.3418, "step": 25648 }, { "epoch": 21.285477178423236, "grad_norm": 63.29838562011719, "learning_rate": 1.1490124481327803e-05, "loss": 0.6624, "step": 25649 }, { "epoch": 21.286307053941908, "grad_norm": 71.08541107177734, "learning_rate": 1.1489792531120334e-05, "loss": 0.541, "step": 25650 }, { "epoch": 21.28713692946058, "grad_norm": 56.57718276977539, "learning_rate": 1.1489460580912864e-05, "loss": 0.6109, "step": 25651 }, { "epoch": 21.287966804979252, "grad_norm": 47.34943771362305, "learning_rate": 1.1489128630705394e-05, "loss": 0.7658, "step": 25652 }, { "epoch": 21.288796680497924, "grad_norm": 34.49358367919922, "learning_rate": 1.1488796680497926e-05, "loss": 0.6782, "step": 25653 }, { "epoch": 21.289626556016596, "grad_norm": 65.8954849243164, "learning_rate": 1.1488464730290459e-05, "loss": 0.7116, "step": 25654 }, { "epoch": 21.29045643153527, "grad_norm": 13.047417640686035, "learning_rate": 1.1488132780082987e-05, "loss": 0.2894, "step": 25655 }, { "epoch": 21.29128630705394, "grad_norm": 34.782752990722656, "learning_rate": 1.148780082987552e-05, "loss": 0.4667, "step": 25656 }, { "epoch": 21.292116182572613, "grad_norm": 38.44356155395508, "learning_rate": 1.1487468879668051e-05, "loss": 0.531, "step": 25657 }, { "epoch": 21.292946058091285, "grad_norm": 20.73521614074707, "learning_rate": 1.1487136929460584e-05, "loss": 0.2923, "step": 25658 }, { "epoch": 21.293775933609957, "grad_norm": 70.89274597167969, "learning_rate": 1.1486804979253112e-05, "loss": 1.1551, "step": 25659 }, { "epoch": 21.29460580912863, "grad_norm": 15.841334342956543, "learning_rate": 1.1486473029045644e-05, "loss": 0.2911, "step": 25660 }, { "epoch": 21.295435684647302, "grad_norm": 47.989444732666016, "learning_rate": 1.1486141078838175e-05, "loss": 0.7241, "step": 25661 }, { "epoch": 21.296265560165974, "grad_norm": 83.71199035644531, "learning_rate": 1.1485809128630707e-05, "loss": 0.6152, "step": 25662 }, { "epoch": 21.297095435684646, "grad_norm": 30.888877868652344, "learning_rate": 1.1485477178423237e-05, "loss": 0.5145, "step": 25663 }, { "epoch": 21.29792531120332, "grad_norm": 44.577266693115234, "learning_rate": 1.1485145228215768e-05, "loss": 0.366, "step": 25664 }, { "epoch": 21.29875518672199, "grad_norm": 60.577003479003906, "learning_rate": 1.14848132780083e-05, "loss": 0.3957, "step": 25665 }, { "epoch": 21.299585062240663, "grad_norm": 35.03719711303711, "learning_rate": 1.1484481327800832e-05, "loss": 0.3943, "step": 25666 }, { "epoch": 21.300414937759335, "grad_norm": 54.391780853271484, "learning_rate": 1.148414937759336e-05, "loss": 0.5452, "step": 25667 }, { "epoch": 21.301244813278007, "grad_norm": 103.62513732910156, "learning_rate": 1.1483817427385893e-05, "loss": 0.66, "step": 25668 }, { "epoch": 21.30207468879668, "grad_norm": 32.92561721801758, "learning_rate": 1.1483485477178425e-05, "loss": 0.456, "step": 25669 }, { "epoch": 21.30290456431535, "grad_norm": 61.59231185913086, "learning_rate": 1.1483153526970955e-05, "loss": 0.4073, "step": 25670 }, { "epoch": 21.303734439834024, "grad_norm": 20.34712028503418, "learning_rate": 1.1482821576763486e-05, "loss": 0.6232, "step": 25671 }, { "epoch": 21.304564315352696, "grad_norm": 63.576820373535156, "learning_rate": 1.1482489626556018e-05, "loss": 0.5335, "step": 25672 }, { "epoch": 21.305394190871368, "grad_norm": 119.74589538574219, "learning_rate": 1.1482157676348548e-05, "loss": 0.7268, "step": 25673 }, { "epoch": 21.30622406639004, "grad_norm": 79.40808868408203, "learning_rate": 1.148182572614108e-05, "loss": 0.4638, "step": 25674 }, { "epoch": 21.307053941908713, "grad_norm": 19.35064697265625, "learning_rate": 1.1481493775933609e-05, "loss": 0.3349, "step": 25675 }, { "epoch": 21.307883817427385, "grad_norm": 69.07066345214844, "learning_rate": 1.1481161825726141e-05, "loss": 0.3873, "step": 25676 }, { "epoch": 21.308713692946057, "grad_norm": 74.97744750976562, "learning_rate": 1.1480829875518673e-05, "loss": 0.587, "step": 25677 }, { "epoch": 21.30954356846473, "grad_norm": 23.024965286254883, "learning_rate": 1.1480497925311205e-05, "loss": 0.3867, "step": 25678 }, { "epoch": 21.3103734439834, "grad_norm": 33.99810791015625, "learning_rate": 1.1480165975103736e-05, "loss": 0.5306, "step": 25679 }, { "epoch": 21.311203319502074, "grad_norm": 31.603038787841797, "learning_rate": 1.1479834024896266e-05, "loss": 0.4054, "step": 25680 }, { "epoch": 21.312033195020746, "grad_norm": 44.117759704589844, "learning_rate": 1.1479502074688797e-05, "loss": 0.8816, "step": 25681 }, { "epoch": 21.312863070539418, "grad_norm": 36.082786560058594, "learning_rate": 1.1479170124481329e-05, "loss": 0.3243, "step": 25682 }, { "epoch": 21.31369294605809, "grad_norm": 59.23830032348633, "learning_rate": 1.1478838174273861e-05, "loss": 0.8717, "step": 25683 }, { "epoch": 21.314522821576762, "grad_norm": 27.559946060180664, "learning_rate": 1.147850622406639e-05, "loss": 0.3387, "step": 25684 }, { "epoch": 21.315352697095435, "grad_norm": 43.65193557739258, "learning_rate": 1.1478174273858922e-05, "loss": 0.6541, "step": 25685 }, { "epoch": 21.316182572614107, "grad_norm": 13.79195499420166, "learning_rate": 1.1477842323651454e-05, "loss": 0.2315, "step": 25686 }, { "epoch": 21.31701244813278, "grad_norm": 13.884286880493164, "learning_rate": 1.1477510373443986e-05, "loss": 0.2901, "step": 25687 }, { "epoch": 21.31784232365145, "grad_norm": 121.14578247070312, "learning_rate": 1.1477178423236515e-05, "loss": 0.7079, "step": 25688 }, { "epoch": 21.318672199170123, "grad_norm": 21.82366943359375, "learning_rate": 1.1476846473029047e-05, "loss": 0.363, "step": 25689 }, { "epoch": 21.319502074688796, "grad_norm": 46.804656982421875, "learning_rate": 1.1476514522821577e-05, "loss": 0.6442, "step": 25690 }, { "epoch": 21.320331950207468, "grad_norm": 80.94834899902344, "learning_rate": 1.147618257261411e-05, "loss": 0.7917, "step": 25691 }, { "epoch": 21.32116182572614, "grad_norm": 39.901588439941406, "learning_rate": 1.147585062240664e-05, "loss": 0.4507, "step": 25692 }, { "epoch": 21.321991701244812, "grad_norm": 46.607933044433594, "learning_rate": 1.147551867219917e-05, "loss": 0.5745, "step": 25693 }, { "epoch": 21.322821576763484, "grad_norm": 26.754243850708008, "learning_rate": 1.1475186721991702e-05, "loss": 0.4872, "step": 25694 }, { "epoch": 21.323651452282157, "grad_norm": 114.14580535888672, "learning_rate": 1.1474854771784234e-05, "loss": 0.7986, "step": 25695 }, { "epoch": 21.32448132780083, "grad_norm": 27.48232650756836, "learning_rate": 1.1474522821576763e-05, "loss": 0.5644, "step": 25696 }, { "epoch": 21.3253112033195, "grad_norm": 54.21882247924805, "learning_rate": 1.1474190871369295e-05, "loss": 0.843, "step": 25697 }, { "epoch": 21.326141078838173, "grad_norm": 25.18439483642578, "learning_rate": 1.1473858921161827e-05, "loss": 0.5658, "step": 25698 }, { "epoch": 21.326970954356845, "grad_norm": 27.459590911865234, "learning_rate": 1.1473526970954358e-05, "loss": 0.7691, "step": 25699 }, { "epoch": 21.327800829875518, "grad_norm": 26.888320922851562, "learning_rate": 1.1473195020746888e-05, "loss": 0.4395, "step": 25700 }, { "epoch": 21.32863070539419, "grad_norm": 30.87481117248535, "learning_rate": 1.147286307053942e-05, "loss": 0.4909, "step": 25701 }, { "epoch": 21.329460580912862, "grad_norm": 60.307498931884766, "learning_rate": 1.147253112033195e-05, "loss": 0.8051, "step": 25702 }, { "epoch": 21.330290456431534, "grad_norm": 54.66697692871094, "learning_rate": 1.1472199170124483e-05, "loss": 0.3735, "step": 25703 }, { "epoch": 21.331120331950206, "grad_norm": 33.19649124145508, "learning_rate": 1.1471867219917015e-05, "loss": 0.7271, "step": 25704 }, { "epoch": 21.33195020746888, "grad_norm": 73.45278930664062, "learning_rate": 1.1471535269709544e-05, "loss": 0.6343, "step": 25705 }, { "epoch": 21.33278008298755, "grad_norm": 48.49762725830078, "learning_rate": 1.1471203319502076e-05, "loss": 0.5535, "step": 25706 }, { "epoch": 21.333609958506223, "grad_norm": 45.573760986328125, "learning_rate": 1.1470871369294608e-05, "loss": 0.3662, "step": 25707 }, { "epoch": 21.334439834024895, "grad_norm": 13.563490867614746, "learning_rate": 1.1470539419087138e-05, "loss": 0.3195, "step": 25708 }, { "epoch": 21.335269709543567, "grad_norm": 75.0583724975586, "learning_rate": 1.1470207468879669e-05, "loss": 0.4896, "step": 25709 }, { "epoch": 21.33609958506224, "grad_norm": 11.104483604431152, "learning_rate": 1.14698755186722e-05, "loss": 0.3192, "step": 25710 }, { "epoch": 21.33692946058091, "grad_norm": 16.329469680786133, "learning_rate": 1.1469543568464731e-05, "loss": 0.3079, "step": 25711 }, { "epoch": 21.337759336099584, "grad_norm": 28.5367374420166, "learning_rate": 1.1469211618257263e-05, "loss": 0.6499, "step": 25712 }, { "epoch": 21.338589211618256, "grad_norm": 51.699981689453125, "learning_rate": 1.1468879668049792e-05, "loss": 0.45, "step": 25713 }, { "epoch": 21.33941908713693, "grad_norm": 58.44026565551758, "learning_rate": 1.1468547717842324e-05, "loss": 0.6056, "step": 25714 }, { "epoch": 21.3402489626556, "grad_norm": 31.0120849609375, "learning_rate": 1.1468215767634856e-05, "loss": 0.3002, "step": 25715 }, { "epoch": 21.341078838174273, "grad_norm": 23.016277313232422, "learning_rate": 1.1467883817427388e-05, "loss": 0.4917, "step": 25716 }, { "epoch": 21.341908713692945, "grad_norm": 59.025367736816406, "learning_rate": 1.1467551867219917e-05, "loss": 0.4583, "step": 25717 }, { "epoch": 21.342738589211617, "grad_norm": 48.5986328125, "learning_rate": 1.1467219917012449e-05, "loss": 1.0808, "step": 25718 }, { "epoch": 21.34356846473029, "grad_norm": 18.503711700439453, "learning_rate": 1.1466887966804981e-05, "loss": 0.2072, "step": 25719 }, { "epoch": 21.34439834024896, "grad_norm": 58.05311584472656, "learning_rate": 1.1466556016597512e-05, "loss": 0.9059, "step": 25720 }, { "epoch": 21.345228215767634, "grad_norm": 78.25617980957031, "learning_rate": 1.1466224066390042e-05, "loss": 0.6462, "step": 25721 }, { "epoch": 21.346058091286306, "grad_norm": 46.66209030151367, "learning_rate": 1.1465892116182572e-05, "loss": 0.4211, "step": 25722 }, { "epoch": 21.346887966804978, "grad_norm": 59.316619873046875, "learning_rate": 1.1465560165975105e-05, "loss": 0.9791, "step": 25723 }, { "epoch": 21.34771784232365, "grad_norm": 67.05609893798828, "learning_rate": 1.1465228215767637e-05, "loss": 0.7338, "step": 25724 }, { "epoch": 21.348547717842322, "grad_norm": 70.21663665771484, "learning_rate": 1.1464896265560165e-05, "loss": 0.5597, "step": 25725 }, { "epoch": 21.349377593360995, "grad_norm": 28.629138946533203, "learning_rate": 1.1464564315352697e-05, "loss": 0.4324, "step": 25726 }, { "epoch": 21.350207468879667, "grad_norm": 83.61737060546875, "learning_rate": 1.146423236514523e-05, "loss": 1.0056, "step": 25727 }, { "epoch": 21.35103734439834, "grad_norm": 39.64639663696289, "learning_rate": 1.1463900414937762e-05, "loss": 0.4411, "step": 25728 }, { "epoch": 21.35186721991701, "grad_norm": 28.71026611328125, "learning_rate": 1.1463568464730292e-05, "loss": 0.5634, "step": 25729 }, { "epoch": 21.352697095435683, "grad_norm": 32.56791687011719, "learning_rate": 1.1463236514522823e-05, "loss": 0.365, "step": 25730 }, { "epoch": 21.353526970954356, "grad_norm": 39.03995132446289, "learning_rate": 1.1462904564315353e-05, "loss": 0.4931, "step": 25731 }, { "epoch": 21.354356846473028, "grad_norm": 76.16114807128906, "learning_rate": 1.1462572614107885e-05, "loss": 0.7087, "step": 25732 }, { "epoch": 21.3551867219917, "grad_norm": 27.77016258239746, "learning_rate": 1.1462240663900417e-05, "loss": 0.485, "step": 25733 }, { "epoch": 21.356016597510372, "grad_norm": 93.28404998779297, "learning_rate": 1.1461908713692946e-05, "loss": 0.787, "step": 25734 }, { "epoch": 21.356846473029044, "grad_norm": 45.396881103515625, "learning_rate": 1.1461576763485478e-05, "loss": 0.4919, "step": 25735 }, { "epoch": 21.357676348547717, "grad_norm": 20.066347122192383, "learning_rate": 1.146124481327801e-05, "loss": 0.3644, "step": 25736 }, { "epoch": 21.35850622406639, "grad_norm": 10.643595695495605, "learning_rate": 1.146091286307054e-05, "loss": 0.2737, "step": 25737 }, { "epoch": 21.35933609958506, "grad_norm": 30.923906326293945, "learning_rate": 1.1460580912863071e-05, "loss": 0.5799, "step": 25738 }, { "epoch": 21.360165975103733, "grad_norm": 58.335487365722656, "learning_rate": 1.1460248962655603e-05, "loss": 0.6714, "step": 25739 }, { "epoch": 21.360995850622405, "grad_norm": 23.69747543334961, "learning_rate": 1.1459917012448133e-05, "loss": 0.3365, "step": 25740 }, { "epoch": 21.361825726141078, "grad_norm": 21.841413497924805, "learning_rate": 1.1459585062240666e-05, "loss": 0.5817, "step": 25741 }, { "epoch": 21.36265560165975, "grad_norm": 21.566417694091797, "learning_rate": 1.1459253112033196e-05, "loss": 0.4202, "step": 25742 }, { "epoch": 21.363485477178422, "grad_norm": 38.5838737487793, "learning_rate": 1.1458921161825726e-05, "loss": 0.407, "step": 25743 }, { "epoch": 21.364315352697094, "grad_norm": 48.35762023925781, "learning_rate": 1.1458589211618258e-05, "loss": 0.6024, "step": 25744 }, { "epoch": 21.365145228215766, "grad_norm": 57.608760833740234, "learning_rate": 1.145825726141079e-05, "loss": 0.4084, "step": 25745 }, { "epoch": 21.36597510373444, "grad_norm": 26.6330509185791, "learning_rate": 1.145792531120332e-05, "loss": 0.3386, "step": 25746 }, { "epoch": 21.36680497925311, "grad_norm": 55.48109817504883, "learning_rate": 1.1457593360995851e-05, "loss": 0.3464, "step": 25747 }, { "epoch": 21.367634854771783, "grad_norm": 51.52326965332031, "learning_rate": 1.1457261410788384e-05, "loss": 0.6909, "step": 25748 }, { "epoch": 21.368464730290455, "grad_norm": 120.7615737915039, "learning_rate": 1.1456929460580914e-05, "loss": 0.8933, "step": 25749 }, { "epoch": 21.369294605809127, "grad_norm": 67.56309509277344, "learning_rate": 1.1456597510373444e-05, "loss": 1.3755, "step": 25750 }, { "epoch": 21.3701244813278, "grad_norm": 33.4235954284668, "learning_rate": 1.1456265560165975e-05, "loss": 0.4192, "step": 25751 }, { "epoch": 21.37095435684647, "grad_norm": 26.900936126708984, "learning_rate": 1.1455933609958507e-05, "loss": 0.332, "step": 25752 }, { "epoch": 21.371784232365144, "grad_norm": 29.731569290161133, "learning_rate": 1.1455601659751039e-05, "loss": 0.4791, "step": 25753 }, { "epoch": 21.372614107883816, "grad_norm": 44.450050354003906, "learning_rate": 1.1455269709543568e-05, "loss": 0.4679, "step": 25754 }, { "epoch": 21.37344398340249, "grad_norm": 39.448875427246094, "learning_rate": 1.14549377593361e-05, "loss": 0.4756, "step": 25755 }, { "epoch": 21.37427385892116, "grad_norm": 24.143428802490234, "learning_rate": 1.1454605809128632e-05, "loss": 0.4403, "step": 25756 }, { "epoch": 21.375103734439833, "grad_norm": 37.874637603759766, "learning_rate": 1.1454273858921164e-05, "loss": 0.6235, "step": 25757 }, { "epoch": 21.375933609958505, "grad_norm": 68.52141571044922, "learning_rate": 1.1453941908713694e-05, "loss": 0.7256, "step": 25758 }, { "epoch": 21.376763485477177, "grad_norm": 75.61100006103516, "learning_rate": 1.1453609958506225e-05, "loss": 1.0519, "step": 25759 }, { "epoch": 21.37759336099585, "grad_norm": 51.71034240722656, "learning_rate": 1.1453278008298755e-05, "loss": 0.4671, "step": 25760 }, { "epoch": 21.37842323651452, "grad_norm": 17.687685012817383, "learning_rate": 1.1452946058091287e-05, "loss": 0.2844, "step": 25761 }, { "epoch": 21.379253112033194, "grad_norm": 32.698177337646484, "learning_rate": 1.145261410788382e-05, "loss": 0.4055, "step": 25762 }, { "epoch": 21.380082987551866, "grad_norm": 18.61746597290039, "learning_rate": 1.1452282157676348e-05, "loss": 0.2393, "step": 25763 }, { "epoch": 21.380912863070538, "grad_norm": 35.24666976928711, "learning_rate": 1.145195020746888e-05, "loss": 0.4128, "step": 25764 }, { "epoch": 21.38174273858921, "grad_norm": 58.49177932739258, "learning_rate": 1.1451618257261412e-05, "loss": 0.4026, "step": 25765 }, { "epoch": 21.382572614107882, "grad_norm": 26.299001693725586, "learning_rate": 1.1451286307053944e-05, "loss": 0.5126, "step": 25766 }, { "epoch": 21.383402489626555, "grad_norm": 43.282508850097656, "learning_rate": 1.1450954356846473e-05, "loss": 0.9274, "step": 25767 }, { "epoch": 21.384232365145227, "grad_norm": 21.48047637939453, "learning_rate": 1.1450622406639005e-05, "loss": 0.5512, "step": 25768 }, { "epoch": 21.3850622406639, "grad_norm": 76.26415252685547, "learning_rate": 1.1450290456431536e-05, "loss": 0.8069, "step": 25769 }, { "epoch": 21.38589211618257, "grad_norm": 85.43486785888672, "learning_rate": 1.1449958506224068e-05, "loss": 0.4585, "step": 25770 }, { "epoch": 21.386721991701243, "grad_norm": 57.64588165283203, "learning_rate": 1.1449626556016598e-05, "loss": 0.7379, "step": 25771 }, { "epoch": 21.387551867219916, "grad_norm": 27.675830841064453, "learning_rate": 1.1449294605809129e-05, "loss": 0.3933, "step": 25772 }, { "epoch": 21.388381742738588, "grad_norm": 25.35345458984375, "learning_rate": 1.144896265560166e-05, "loss": 0.2739, "step": 25773 }, { "epoch": 21.38921161825726, "grad_norm": 28.288928985595703, "learning_rate": 1.1448630705394193e-05, "loss": 0.5684, "step": 25774 }, { "epoch": 21.390041493775932, "grad_norm": 28.015060424804688, "learning_rate": 1.1448298755186722e-05, "loss": 0.5306, "step": 25775 }, { "epoch": 21.390871369294604, "grad_norm": 57.94776916503906, "learning_rate": 1.1447966804979254e-05, "loss": 0.8321, "step": 25776 }, { "epoch": 21.391701244813277, "grad_norm": 53.305084228515625, "learning_rate": 1.1447634854771786e-05, "loss": 0.4214, "step": 25777 }, { "epoch": 21.39253112033195, "grad_norm": 70.1392822265625, "learning_rate": 1.1447302904564316e-05, "loss": 0.697, "step": 25778 }, { "epoch": 21.39336099585062, "grad_norm": 119.64268493652344, "learning_rate": 1.1446970954356847e-05, "loss": 0.9685, "step": 25779 }, { "epoch": 21.394190871369293, "grad_norm": 72.35359191894531, "learning_rate": 1.1446639004149379e-05, "loss": 0.3744, "step": 25780 }, { "epoch": 21.395020746887965, "grad_norm": 30.17313575744629, "learning_rate": 1.144630705394191e-05, "loss": 0.5406, "step": 25781 }, { "epoch": 21.395850622406638, "grad_norm": 23.216032028198242, "learning_rate": 1.1445975103734441e-05, "loss": 0.3328, "step": 25782 }, { "epoch": 21.39668049792531, "grad_norm": 39.85845184326172, "learning_rate": 1.1445643153526973e-05, "loss": 0.6832, "step": 25783 }, { "epoch": 21.397510373443982, "grad_norm": 50.17258834838867, "learning_rate": 1.1445311203319502e-05, "loss": 0.556, "step": 25784 }, { "epoch": 21.398340248962654, "grad_norm": 52.09583282470703, "learning_rate": 1.1444979253112034e-05, "loss": 0.3722, "step": 25785 }, { "epoch": 21.399170124481326, "grad_norm": 28.693647384643555, "learning_rate": 1.1444647302904566e-05, "loss": 0.4675, "step": 25786 }, { "epoch": 21.4, "grad_norm": 14.333069801330566, "learning_rate": 1.1444315352697097e-05, "loss": 0.3349, "step": 25787 }, { "epoch": 21.40082987551867, "grad_norm": 57.99890899658203, "learning_rate": 1.1443983402489627e-05, "loss": 0.7379, "step": 25788 }, { "epoch": 21.401659751037343, "grad_norm": 62.74544906616211, "learning_rate": 1.144365145228216e-05, "loss": 0.5432, "step": 25789 }, { "epoch": 21.402489626556015, "grad_norm": 25.4346866607666, "learning_rate": 1.144331950207469e-05, "loss": 0.2858, "step": 25790 }, { "epoch": 21.403319502074687, "grad_norm": 67.15023040771484, "learning_rate": 1.1442987551867222e-05, "loss": 0.7368, "step": 25791 }, { "epoch": 21.40414937759336, "grad_norm": 49.7350959777832, "learning_rate": 1.144265560165975e-05, "loss": 0.8597, "step": 25792 }, { "epoch": 21.40497925311203, "grad_norm": 67.63037109375, "learning_rate": 1.1442323651452283e-05, "loss": 0.3661, "step": 25793 }, { "epoch": 21.405809128630704, "grad_norm": 54.43012237548828, "learning_rate": 1.1441991701244815e-05, "loss": 0.3979, "step": 25794 }, { "epoch": 21.406639004149376, "grad_norm": 39.215232849121094, "learning_rate": 1.1441659751037347e-05, "loss": 0.6953, "step": 25795 }, { "epoch": 21.40746887966805, "grad_norm": 15.350724220275879, "learning_rate": 1.1441327800829876e-05, "loss": 0.4258, "step": 25796 }, { "epoch": 21.40829875518672, "grad_norm": 34.29384231567383, "learning_rate": 1.1440995850622408e-05, "loss": 0.4504, "step": 25797 }, { "epoch": 21.409128630705393, "grad_norm": 15.229174613952637, "learning_rate": 1.1440663900414938e-05, "loss": 0.2878, "step": 25798 }, { "epoch": 21.409958506224065, "grad_norm": 16.523273468017578, "learning_rate": 1.144033195020747e-05, "loss": 0.3339, "step": 25799 }, { "epoch": 21.410788381742737, "grad_norm": 60.92072296142578, "learning_rate": 1.144e-05, "loss": 0.4485, "step": 25800 }, { "epoch": 21.41161825726141, "grad_norm": 132.91297912597656, "learning_rate": 1.1439668049792531e-05, "loss": 0.411, "step": 25801 }, { "epoch": 21.41244813278008, "grad_norm": 25.457294464111328, "learning_rate": 1.1439336099585063e-05, "loss": 0.4097, "step": 25802 }, { "epoch": 21.413278008298754, "grad_norm": 40.61455535888672, "learning_rate": 1.1439004149377595e-05, "loss": 0.6539, "step": 25803 }, { "epoch": 21.414107883817426, "grad_norm": 26.830965042114258, "learning_rate": 1.1438672199170124e-05, "loss": 0.5335, "step": 25804 }, { "epoch": 21.414937759336098, "grad_norm": 17.47569465637207, "learning_rate": 1.1438340248962656e-05, "loss": 0.3317, "step": 25805 }, { "epoch": 21.41576763485477, "grad_norm": 38.02692794799805, "learning_rate": 1.1438008298755188e-05, "loss": 0.4835, "step": 25806 }, { "epoch": 21.416597510373443, "grad_norm": 16.00167465209961, "learning_rate": 1.1437676348547719e-05, "loss": 0.3456, "step": 25807 }, { "epoch": 21.417427385892115, "grad_norm": 42.07611083984375, "learning_rate": 1.1437344398340249e-05, "loss": 0.4236, "step": 25808 }, { "epoch": 21.418257261410787, "grad_norm": 21.923681259155273, "learning_rate": 1.1437012448132781e-05, "loss": 0.2857, "step": 25809 }, { "epoch": 21.41908713692946, "grad_norm": 60.041194915771484, "learning_rate": 1.1436680497925311e-05, "loss": 0.973, "step": 25810 }, { "epoch": 21.41991701244813, "grad_norm": 36.37641525268555, "learning_rate": 1.1436348547717844e-05, "loss": 0.3167, "step": 25811 }, { "epoch": 21.420746887966803, "grad_norm": 42.93668746948242, "learning_rate": 1.1436016597510376e-05, "loss": 0.4011, "step": 25812 }, { "epoch": 21.421576763485476, "grad_norm": 36.580970764160156, "learning_rate": 1.1435684647302904e-05, "loss": 0.6693, "step": 25813 }, { "epoch": 21.422406639004148, "grad_norm": 15.800600051879883, "learning_rate": 1.1435352697095437e-05, "loss": 0.2439, "step": 25814 }, { "epoch": 21.42323651452282, "grad_norm": 94.58758544921875, "learning_rate": 1.1435020746887969e-05, "loss": 0.543, "step": 25815 }, { "epoch": 21.424066390041492, "grad_norm": 97.68802642822266, "learning_rate": 1.1434688796680499e-05, "loss": 0.623, "step": 25816 }, { "epoch": 21.424896265560164, "grad_norm": 34.68812561035156, "learning_rate": 1.143435684647303e-05, "loss": 0.4649, "step": 25817 }, { "epoch": 21.425726141078837, "grad_norm": 76.17527770996094, "learning_rate": 1.1434024896265562e-05, "loss": 0.6446, "step": 25818 }, { "epoch": 21.42655601659751, "grad_norm": 62.35527038574219, "learning_rate": 1.1433692946058092e-05, "loss": 0.5844, "step": 25819 }, { "epoch": 21.42738589211618, "grad_norm": 41.024070739746094, "learning_rate": 1.1433360995850624e-05, "loss": 0.4123, "step": 25820 }, { "epoch": 21.428215767634853, "grad_norm": 33.614112854003906, "learning_rate": 1.1433029045643153e-05, "loss": 0.4761, "step": 25821 }, { "epoch": 21.429045643153525, "grad_norm": 163.86082458496094, "learning_rate": 1.1432697095435685e-05, "loss": 0.8893, "step": 25822 }, { "epoch": 21.429875518672198, "grad_norm": 98.40367126464844, "learning_rate": 1.1432365145228217e-05, "loss": 0.3831, "step": 25823 }, { "epoch": 21.43070539419087, "grad_norm": 55.39850616455078, "learning_rate": 1.1432033195020749e-05, "loss": 0.5667, "step": 25824 }, { "epoch": 21.431535269709542, "grad_norm": 38.74867630004883, "learning_rate": 1.1431701244813278e-05, "loss": 0.4597, "step": 25825 }, { "epoch": 21.432365145228214, "grad_norm": 17.776573181152344, "learning_rate": 1.143136929460581e-05, "loss": 0.4527, "step": 25826 }, { "epoch": 21.433195020746886, "grad_norm": 20.739131927490234, "learning_rate": 1.1431037344398342e-05, "loss": 0.4308, "step": 25827 }, { "epoch": 21.43402489626556, "grad_norm": 35.60910415649414, "learning_rate": 1.1430705394190872e-05, "loss": 0.4681, "step": 25828 }, { "epoch": 21.43485477178423, "grad_norm": 77.30363464355469, "learning_rate": 1.1430373443983403e-05, "loss": 0.5372, "step": 25829 }, { "epoch": 21.435684647302903, "grad_norm": 31.175779342651367, "learning_rate": 1.1430041493775933e-05, "loss": 0.2808, "step": 25830 }, { "epoch": 21.436514522821575, "grad_norm": 33.9113883972168, "learning_rate": 1.1429709543568465e-05, "loss": 0.6463, "step": 25831 }, { "epoch": 21.437344398340247, "grad_norm": 88.44576263427734, "learning_rate": 1.1429377593360998e-05, "loss": 0.6556, "step": 25832 }, { "epoch": 21.43817427385892, "grad_norm": 60.78281784057617, "learning_rate": 1.1429045643153526e-05, "loss": 1.102, "step": 25833 }, { "epoch": 21.439004149377592, "grad_norm": 15.334708213806152, "learning_rate": 1.1428713692946058e-05, "loss": 0.3024, "step": 25834 }, { "epoch": 21.439834024896264, "grad_norm": 20.056529998779297, "learning_rate": 1.142838174273859e-05, "loss": 0.4123, "step": 25835 }, { "epoch": 21.440663900414936, "grad_norm": 69.36893463134766, "learning_rate": 1.1428049792531123e-05, "loss": 0.8921, "step": 25836 }, { "epoch": 21.44149377593361, "grad_norm": 39.936767578125, "learning_rate": 1.1427717842323653e-05, "loss": 0.5231, "step": 25837 }, { "epoch": 21.44232365145228, "grad_norm": 159.1621856689453, "learning_rate": 1.1427385892116183e-05, "loss": 0.5689, "step": 25838 }, { "epoch": 21.443153526970953, "grad_norm": 64.87776184082031, "learning_rate": 1.1427053941908714e-05, "loss": 0.2828, "step": 25839 }, { "epoch": 21.443983402489625, "grad_norm": 26.316814422607422, "learning_rate": 1.1426721991701246e-05, "loss": 0.2875, "step": 25840 }, { "epoch": 21.444813278008297, "grad_norm": 19.263439178466797, "learning_rate": 1.1426390041493778e-05, "loss": 0.338, "step": 25841 }, { "epoch": 21.44564315352697, "grad_norm": 54.84700012207031, "learning_rate": 1.1426058091286307e-05, "loss": 0.7233, "step": 25842 }, { "epoch": 21.44647302904564, "grad_norm": 25.081832885742188, "learning_rate": 1.1425726141078839e-05, "loss": 0.3898, "step": 25843 }, { "epoch": 21.447302904564314, "grad_norm": 48.10673904418945, "learning_rate": 1.1425394190871371e-05, "loss": 0.9391, "step": 25844 }, { "epoch": 21.448132780082986, "grad_norm": 52.14925765991211, "learning_rate": 1.1425062240663903e-05, "loss": 0.329, "step": 25845 }, { "epoch": 21.448962655601658, "grad_norm": 38.94580078125, "learning_rate": 1.1424730290456432e-05, "loss": 0.4158, "step": 25846 }, { "epoch": 21.44979253112033, "grad_norm": 43.376224517822266, "learning_rate": 1.1424398340248964e-05, "loss": 1.1543, "step": 25847 }, { "epoch": 21.450622406639003, "grad_norm": 22.473112106323242, "learning_rate": 1.1424066390041494e-05, "loss": 0.3595, "step": 25848 }, { "epoch": 21.451452282157675, "grad_norm": 24.51230239868164, "learning_rate": 1.1423734439834026e-05, "loss": 0.3419, "step": 25849 }, { "epoch": 21.452282157676347, "grad_norm": 27.117053985595703, "learning_rate": 1.1423402489626557e-05, "loss": 0.8111, "step": 25850 }, { "epoch": 21.45311203319502, "grad_norm": 94.03575134277344, "learning_rate": 1.1423070539419087e-05, "loss": 1.0471, "step": 25851 }, { "epoch": 21.45394190871369, "grad_norm": 27.753162384033203, "learning_rate": 1.142273858921162e-05, "loss": 0.3092, "step": 25852 }, { "epoch": 21.454771784232364, "grad_norm": 31.633258819580078, "learning_rate": 1.1422406639004151e-05, "loss": 0.4571, "step": 25853 }, { "epoch": 21.455601659751036, "grad_norm": 67.70983123779297, "learning_rate": 1.142207468879668e-05, "loss": 1.0031, "step": 25854 }, { "epoch": 21.456431535269708, "grad_norm": 30.234773635864258, "learning_rate": 1.1421742738589212e-05, "loss": 0.2974, "step": 25855 }, { "epoch": 21.45726141078838, "grad_norm": 36.02634048461914, "learning_rate": 1.1421410788381744e-05, "loss": 0.7086, "step": 25856 }, { "epoch": 21.458091286307052, "grad_norm": 105.80828094482422, "learning_rate": 1.1421078838174275e-05, "loss": 0.8836, "step": 25857 }, { "epoch": 21.458921161825725, "grad_norm": 45.164039611816406, "learning_rate": 1.1420746887966805e-05, "loss": 0.8828, "step": 25858 }, { "epoch": 21.459751037344397, "grad_norm": 82.89012145996094, "learning_rate": 1.1420414937759337e-05, "loss": 0.3936, "step": 25859 }, { "epoch": 21.46058091286307, "grad_norm": 41.51997756958008, "learning_rate": 1.1420082987551868e-05, "loss": 0.4412, "step": 25860 }, { "epoch": 21.46141078838174, "grad_norm": 21.762584686279297, "learning_rate": 1.14197510373444e-05, "loss": 0.3733, "step": 25861 }, { "epoch": 21.462240663900413, "grad_norm": 44.8016357421875, "learning_rate": 1.1419419087136932e-05, "loss": 0.4257, "step": 25862 }, { "epoch": 21.463070539419085, "grad_norm": 47.76319885253906, "learning_rate": 1.141908713692946e-05, "loss": 0.9835, "step": 25863 }, { "epoch": 21.463900414937758, "grad_norm": 98.69668579101562, "learning_rate": 1.1418755186721993e-05, "loss": 0.772, "step": 25864 }, { "epoch": 21.46473029045643, "grad_norm": 11.969298362731934, "learning_rate": 1.1418423236514525e-05, "loss": 0.2587, "step": 25865 }, { "epoch": 21.465560165975102, "grad_norm": 31.514408111572266, "learning_rate": 1.1418091286307055e-05, "loss": 0.7197, "step": 25866 }, { "epoch": 21.466390041493774, "grad_norm": 43.03926086425781, "learning_rate": 1.1417759336099586e-05, "loss": 0.7316, "step": 25867 }, { "epoch": 21.467219917012446, "grad_norm": 34.252296447753906, "learning_rate": 1.1417427385892116e-05, "loss": 0.8964, "step": 25868 }, { "epoch": 21.46804979253112, "grad_norm": 45.727943420410156, "learning_rate": 1.1417095435684648e-05, "loss": 0.5264, "step": 25869 }, { "epoch": 21.46887966804979, "grad_norm": 21.040761947631836, "learning_rate": 1.141676348547718e-05, "loss": 0.3556, "step": 25870 }, { "epoch": 21.469709543568463, "grad_norm": 26.52816390991211, "learning_rate": 1.1416431535269709e-05, "loss": 0.3476, "step": 25871 }, { "epoch": 21.470539419087135, "grad_norm": 110.03876495361328, "learning_rate": 1.1416099585062241e-05, "loss": 0.4115, "step": 25872 }, { "epoch": 21.471369294605807, "grad_norm": 22.956207275390625, "learning_rate": 1.1415767634854773e-05, "loss": 0.3409, "step": 25873 }, { "epoch": 21.47219917012448, "grad_norm": 33.04431915283203, "learning_rate": 1.1415435684647305e-05, "loss": 0.5635, "step": 25874 }, { "epoch": 21.473029045643152, "grad_norm": 119.54164123535156, "learning_rate": 1.1415103734439834e-05, "loss": 0.4613, "step": 25875 }, { "epoch": 21.473858921161824, "grad_norm": 52.8806266784668, "learning_rate": 1.1414771784232366e-05, "loss": 0.9595, "step": 25876 }, { "epoch": 21.474688796680496, "grad_norm": 27.858654022216797, "learning_rate": 1.1414439834024897e-05, "loss": 0.4246, "step": 25877 }, { "epoch": 21.47551867219917, "grad_norm": 39.27986526489258, "learning_rate": 1.1414107883817429e-05, "loss": 0.4525, "step": 25878 }, { "epoch": 21.47634854771784, "grad_norm": 42.40468215942383, "learning_rate": 1.141377593360996e-05, "loss": 0.5721, "step": 25879 }, { "epoch": 21.477178423236513, "grad_norm": 37.91190719604492, "learning_rate": 1.141344398340249e-05, "loss": 0.4405, "step": 25880 }, { "epoch": 21.478008298755185, "grad_norm": 40.427433013916016, "learning_rate": 1.1413112033195022e-05, "loss": 0.5109, "step": 25881 }, { "epoch": 21.478838174273857, "grad_norm": 49.8599853515625, "learning_rate": 1.1412780082987554e-05, "loss": 0.2706, "step": 25882 }, { "epoch": 21.47966804979253, "grad_norm": NaN, "learning_rate": 1.1412780082987554e-05, "loss": 1.4277, "step": 25883 }, { "epoch": 21.4804979253112, "grad_norm": 125.02332305908203, "learning_rate": 1.1412448132780083e-05, "loss": 0.6179, "step": 25884 }, { "epoch": 21.481327800829874, "grad_norm": 15.519370079040527, "learning_rate": 1.1412116182572615e-05, "loss": 0.2466, "step": 25885 }, { "epoch": 21.482157676348546, "grad_norm": 21.67671012878418, "learning_rate": 1.1411784232365147e-05, "loss": 0.3592, "step": 25886 }, { "epoch": 21.482987551867218, "grad_norm": 62.27842330932617, "learning_rate": 1.1411452282157677e-05, "loss": 0.739, "step": 25887 }, { "epoch": 21.48381742738589, "grad_norm": 39.923431396484375, "learning_rate": 1.1411120331950208e-05, "loss": 0.8216, "step": 25888 }, { "epoch": 21.484647302904563, "grad_norm": 27.113922119140625, "learning_rate": 1.141078838174274e-05, "loss": 0.6627, "step": 25889 }, { "epoch": 21.485477178423235, "grad_norm": 37.949989318847656, "learning_rate": 1.141045643153527e-05, "loss": 0.5721, "step": 25890 }, { "epoch": 21.486307053941907, "grad_norm": 117.49629211425781, "learning_rate": 1.1410124481327802e-05, "loss": 1.0585, "step": 25891 }, { "epoch": 21.48713692946058, "grad_norm": 63.071075439453125, "learning_rate": 1.1409792531120334e-05, "loss": 0.3445, "step": 25892 }, { "epoch": 21.48796680497925, "grad_norm": 33.756858825683594, "learning_rate": 1.1409460580912863e-05, "loss": 0.3878, "step": 25893 }, { "epoch": 21.488796680497924, "grad_norm": 56.52239990234375, "learning_rate": 1.1409128630705395e-05, "loss": 0.7109, "step": 25894 }, { "epoch": 21.489626556016596, "grad_norm": 45.495391845703125, "learning_rate": 1.1408796680497927e-05, "loss": 0.2587, "step": 25895 }, { "epoch": 21.490456431535268, "grad_norm": 61.98740768432617, "learning_rate": 1.1408464730290458e-05, "loss": 0.6338, "step": 25896 }, { "epoch": 21.49128630705394, "grad_norm": 17.863143920898438, "learning_rate": 1.1408132780082988e-05, "loss": 0.5008, "step": 25897 }, { "epoch": 21.492116182572612, "grad_norm": 21.07445526123047, "learning_rate": 1.140780082987552e-05, "loss": 0.2288, "step": 25898 }, { "epoch": 21.492946058091285, "grad_norm": 19.70798683166504, "learning_rate": 1.140746887966805e-05, "loss": 0.3034, "step": 25899 }, { "epoch": 21.49377593360996, "grad_norm": 50.055599212646484, "learning_rate": 1.1407136929460583e-05, "loss": 0.4625, "step": 25900 }, { "epoch": 21.49460580912863, "grad_norm": 44.07292175292969, "learning_rate": 1.1406804979253111e-05, "loss": 0.3937, "step": 25901 }, { "epoch": 21.495435684647305, "grad_norm": 102.68704986572266, "learning_rate": 1.1406473029045644e-05, "loss": 0.7324, "step": 25902 }, { "epoch": 21.496265560165973, "grad_norm": 20.81400489807129, "learning_rate": 1.1406141078838176e-05, "loss": 0.4012, "step": 25903 }, { "epoch": 21.49709543568465, "grad_norm": 45.844947814941406, "learning_rate": 1.1405809128630708e-05, "loss": 0.5282, "step": 25904 }, { "epoch": 21.497925311203318, "grad_norm": 38.63576889038086, "learning_rate": 1.1405477178423236e-05, "loss": 0.6354, "step": 25905 }, { "epoch": 21.498755186721993, "grad_norm": 96.61444854736328, "learning_rate": 1.1405145228215769e-05, "loss": 0.4344, "step": 25906 }, { "epoch": 21.499585062240662, "grad_norm": 70.04296875, "learning_rate": 1.14048132780083e-05, "loss": 0.5851, "step": 25907 }, { "epoch": 21.500414937759338, "grad_norm": 46.70365905761719, "learning_rate": 1.1404481327800831e-05, "loss": 0.5444, "step": 25908 }, { "epoch": 21.501244813278007, "grad_norm": 63.03081512451172, "learning_rate": 1.1404149377593361e-05, "loss": 0.664, "step": 25909 }, { "epoch": 21.502074688796682, "grad_norm": 70.0023193359375, "learning_rate": 1.1403817427385892e-05, "loss": 0.4328, "step": 25910 }, { "epoch": 21.50290456431535, "grad_norm": 35.20443344116211, "learning_rate": 1.1403485477178424e-05, "loss": 0.4904, "step": 25911 }, { "epoch": 21.503734439834027, "grad_norm": 140.9884490966797, "learning_rate": 1.1403153526970956e-05, "loss": 0.524, "step": 25912 }, { "epoch": 21.504564315352695, "grad_norm": 19.693811416625977, "learning_rate": 1.1402821576763485e-05, "loss": 0.2456, "step": 25913 }, { "epoch": 21.50539419087137, "grad_norm": 20.30451202392578, "learning_rate": 1.1402489626556017e-05, "loss": 0.4114, "step": 25914 }, { "epoch": 21.50622406639004, "grad_norm": 28.589902877807617, "learning_rate": 1.1402157676348549e-05, "loss": 0.6526, "step": 25915 }, { "epoch": 21.507053941908715, "grad_norm": 52.07041931152344, "learning_rate": 1.140182572614108e-05, "loss": 0.5052, "step": 25916 }, { "epoch": 21.507883817427384, "grad_norm": 14.4224853515625, "learning_rate": 1.1401493775933612e-05, "loss": 0.3167, "step": 25917 }, { "epoch": 21.50871369294606, "grad_norm": 19.795799255371094, "learning_rate": 1.1401161825726142e-05, "loss": 0.2879, "step": 25918 }, { "epoch": 21.50954356846473, "grad_norm": 50.08063507080078, "learning_rate": 1.1400829875518672e-05, "loss": 0.9291, "step": 25919 }, { "epoch": 21.510373443983404, "grad_norm": 86.68479919433594, "learning_rate": 1.1400497925311204e-05, "loss": 0.5291, "step": 25920 }, { "epoch": 21.511203319502073, "grad_norm": 29.337793350219727, "learning_rate": 1.1400165975103737e-05, "loss": 0.2852, "step": 25921 }, { "epoch": 21.51203319502075, "grad_norm": 55.179115295410156, "learning_rate": 1.1399834024896265e-05, "loss": 0.7996, "step": 25922 }, { "epoch": 21.512863070539417, "grad_norm": 31.493133544921875, "learning_rate": 1.1399502074688797e-05, "loss": 0.4861, "step": 25923 }, { "epoch": 21.513692946058093, "grad_norm": 17.755075454711914, "learning_rate": 1.139917012448133e-05, "loss": 0.3258, "step": 25924 }, { "epoch": 21.51452282157676, "grad_norm": 23.883953094482422, "learning_rate": 1.139883817427386e-05, "loss": 0.3863, "step": 25925 }, { "epoch": 21.515352697095437, "grad_norm": 41.49729537963867, "learning_rate": 1.139850622406639e-05, "loss": 0.6839, "step": 25926 }, { "epoch": 21.51618257261411, "grad_norm": 19.40155029296875, "learning_rate": 1.1398174273858922e-05, "loss": 0.2903, "step": 25927 }, { "epoch": 21.517012448132782, "grad_norm": 59.2266960144043, "learning_rate": 1.1397842323651453e-05, "loss": 1.0713, "step": 25928 }, { "epoch": 21.517842323651454, "grad_norm": 45.784141540527344, "learning_rate": 1.1397510373443985e-05, "loss": 0.8694, "step": 25929 }, { "epoch": 21.518672199170126, "grad_norm": 22.703651428222656, "learning_rate": 1.1397178423236515e-05, "loss": 0.3716, "step": 25930 }, { "epoch": 21.5195020746888, "grad_norm": 10.581153869628906, "learning_rate": 1.1396846473029046e-05, "loss": 0.2742, "step": 25931 }, { "epoch": 21.52033195020747, "grad_norm": 62.77629470825195, "learning_rate": 1.1396514522821578e-05, "loss": 0.5113, "step": 25932 }, { "epoch": 21.521161825726143, "grad_norm": 25.252513885498047, "learning_rate": 1.139618257261411e-05, "loss": 0.3905, "step": 25933 }, { "epoch": 21.521991701244815, "grad_norm": 112.01090240478516, "learning_rate": 1.1395850622406639e-05, "loss": 0.5017, "step": 25934 }, { "epoch": 21.522821576763487, "grad_norm": 75.17662811279297, "learning_rate": 1.1395518672199171e-05, "loss": 0.7842, "step": 25935 }, { "epoch": 21.52365145228216, "grad_norm": 29.753400802612305, "learning_rate": 1.1395186721991703e-05, "loss": 0.3857, "step": 25936 }, { "epoch": 21.52448132780083, "grad_norm": 19.563953399658203, "learning_rate": 1.1394854771784233e-05, "loss": 0.3417, "step": 25937 }, { "epoch": 21.525311203319504, "grad_norm": 15.805208206176758, "learning_rate": 1.1394522821576764e-05, "loss": 0.3199, "step": 25938 }, { "epoch": 21.526141078838176, "grad_norm": 92.53033447265625, "learning_rate": 1.1394190871369294e-05, "loss": 1.1878, "step": 25939 }, { "epoch": 21.526970954356848, "grad_norm": 41.94819259643555, "learning_rate": 1.1393858921161826e-05, "loss": 0.8883, "step": 25940 }, { "epoch": 21.52780082987552, "grad_norm": 15.14614200592041, "learning_rate": 1.1393526970954358e-05, "loss": 0.4722, "step": 25941 }, { "epoch": 21.528630705394193, "grad_norm": 44.15739822387695, "learning_rate": 1.139319502074689e-05, "loss": 0.5584, "step": 25942 }, { "epoch": 21.529460580912865, "grad_norm": 62.13141632080078, "learning_rate": 1.139286307053942e-05, "loss": 0.6324, "step": 25943 }, { "epoch": 21.530290456431537, "grad_norm": 35.288360595703125, "learning_rate": 1.1392531120331951e-05, "loss": 0.9586, "step": 25944 }, { "epoch": 21.53112033195021, "grad_norm": 14.380349159240723, "learning_rate": 1.1392199170124483e-05, "loss": 0.2608, "step": 25945 }, { "epoch": 21.53195020746888, "grad_norm": 30.070188522338867, "learning_rate": 1.1391867219917014e-05, "loss": 0.3608, "step": 25946 }, { "epoch": 21.532780082987554, "grad_norm": 22.043188095092773, "learning_rate": 1.1391535269709544e-05, "loss": 0.272, "step": 25947 }, { "epoch": 21.533609958506226, "grad_norm": 41.5482292175293, "learning_rate": 1.1391203319502075e-05, "loss": 0.6722, "step": 25948 }, { "epoch": 21.534439834024898, "grad_norm": 62.41926193237305, "learning_rate": 1.1390871369294607e-05, "loss": 0.3361, "step": 25949 }, { "epoch": 21.53526970954357, "grad_norm": 45.03071594238281, "learning_rate": 1.1390539419087139e-05, "loss": 0.49, "step": 25950 }, { "epoch": 21.536099585062242, "grad_norm": 28.65850257873535, "learning_rate": 1.1390207468879668e-05, "loss": 0.2728, "step": 25951 }, { "epoch": 21.536929460580915, "grad_norm": 31.221586227416992, "learning_rate": 1.13898755186722e-05, "loss": 0.4704, "step": 25952 }, { "epoch": 21.537759336099587, "grad_norm": 69.90868377685547, "learning_rate": 1.1389543568464732e-05, "loss": 0.5085, "step": 25953 }, { "epoch": 21.53858921161826, "grad_norm": 34.00309371948242, "learning_rate": 1.1389211618257264e-05, "loss": 0.4264, "step": 25954 }, { "epoch": 21.53941908713693, "grad_norm": 57.227638244628906, "learning_rate": 1.1388879668049793e-05, "loss": 1.4157, "step": 25955 }, { "epoch": 21.540248962655603, "grad_norm": 25.953752517700195, "learning_rate": 1.1388547717842325e-05, "loss": 0.3426, "step": 25956 }, { "epoch": 21.541078838174275, "grad_norm": 45.72493362426758, "learning_rate": 1.1388215767634855e-05, "loss": 0.4778, "step": 25957 }, { "epoch": 21.541908713692948, "grad_norm": 63.44757843017578, "learning_rate": 1.1387883817427387e-05, "loss": 0.4896, "step": 25958 }, { "epoch": 21.54273858921162, "grad_norm": 29.801607131958008, "learning_rate": 1.1387551867219918e-05, "loss": 0.6471, "step": 25959 }, { "epoch": 21.543568464730292, "grad_norm": 111.78459930419922, "learning_rate": 1.1387219917012448e-05, "loss": 0.4565, "step": 25960 }, { "epoch": 21.544398340248964, "grad_norm": 23.06440544128418, "learning_rate": 1.138688796680498e-05, "loss": 0.4451, "step": 25961 }, { "epoch": 21.545228215767636, "grad_norm": 53.559593200683594, "learning_rate": 1.1386556016597512e-05, "loss": 0.501, "step": 25962 }, { "epoch": 21.54605809128631, "grad_norm": 34.70667266845703, "learning_rate": 1.1386224066390041e-05, "loss": 0.3498, "step": 25963 }, { "epoch": 21.54688796680498, "grad_norm": 12.173117637634277, "learning_rate": 1.1385892116182573e-05, "loss": 0.2332, "step": 25964 }, { "epoch": 21.547717842323653, "grad_norm": 198.35797119140625, "learning_rate": 1.1385560165975105e-05, "loss": 0.7061, "step": 25965 }, { "epoch": 21.548547717842325, "grad_norm": 19.67919921875, "learning_rate": 1.1385228215767636e-05, "loss": 0.3027, "step": 25966 }, { "epoch": 21.549377593360997, "grad_norm": 37.47937774658203, "learning_rate": 1.1384896265560166e-05, "loss": 0.3748, "step": 25967 }, { "epoch": 21.55020746887967, "grad_norm": 23.28627586364746, "learning_rate": 1.1384564315352698e-05, "loss": 0.327, "step": 25968 }, { "epoch": 21.551037344398342, "grad_norm": 11.632109642028809, "learning_rate": 1.1384232365145229e-05, "loss": 0.2485, "step": 25969 }, { "epoch": 21.551867219917014, "grad_norm": 60.51052474975586, "learning_rate": 1.138390041493776e-05, "loss": 0.614, "step": 25970 }, { "epoch": 21.552697095435686, "grad_norm": 61.115577697753906, "learning_rate": 1.1383568464730293e-05, "loss": 0.8077, "step": 25971 }, { "epoch": 21.55352697095436, "grad_norm": 18.366342544555664, "learning_rate": 1.1383236514522822e-05, "loss": 0.2976, "step": 25972 }, { "epoch": 21.55435684647303, "grad_norm": 5.489126205444336, "learning_rate": 1.1382904564315354e-05, "loss": 0.2081, "step": 25973 }, { "epoch": 21.555186721991703, "grad_norm": 70.69461822509766, "learning_rate": 1.1382572614107886e-05, "loss": 0.4452, "step": 25974 }, { "epoch": 21.556016597510375, "grad_norm": 56.8210334777832, "learning_rate": 1.1382240663900416e-05, "loss": 1.1222, "step": 25975 }, { "epoch": 21.556846473029047, "grad_norm": 70.70710754394531, "learning_rate": 1.1381908713692947e-05, "loss": 0.9583, "step": 25976 }, { "epoch": 21.55767634854772, "grad_norm": 20.39558219909668, "learning_rate": 1.1381576763485479e-05, "loss": 0.3294, "step": 25977 }, { "epoch": 21.55850622406639, "grad_norm": 35.70943069458008, "learning_rate": 1.1381244813278009e-05, "loss": 0.4274, "step": 25978 }, { "epoch": 21.559336099585064, "grad_norm": 26.07836151123047, "learning_rate": 1.1380912863070541e-05, "loss": 0.4584, "step": 25979 }, { "epoch": 21.560165975103736, "grad_norm": 164.41195678710938, "learning_rate": 1.138058091286307e-05, "loss": 0.8161, "step": 25980 }, { "epoch": 21.560995850622408, "grad_norm": 32.40061950683594, "learning_rate": 1.1380248962655602e-05, "loss": 0.6296, "step": 25981 }, { "epoch": 21.56182572614108, "grad_norm": 22.03688621520996, "learning_rate": 1.1379917012448134e-05, "loss": 0.4649, "step": 25982 }, { "epoch": 21.562655601659753, "grad_norm": 22.319841384887695, "learning_rate": 1.1379585062240666e-05, "loss": 0.2961, "step": 25983 }, { "epoch": 21.563485477178425, "grad_norm": 26.175071716308594, "learning_rate": 1.1379253112033195e-05, "loss": 0.561, "step": 25984 }, { "epoch": 21.564315352697097, "grad_norm": 41.45686340332031, "learning_rate": 1.1378921161825727e-05, "loss": 1.0313, "step": 25985 }, { "epoch": 21.56514522821577, "grad_norm": 52.72477340698242, "learning_rate": 1.1378589211618258e-05, "loss": 0.508, "step": 25986 }, { "epoch": 21.56597510373444, "grad_norm": 52.42531967163086, "learning_rate": 1.137825726141079e-05, "loss": 0.5997, "step": 25987 }, { "epoch": 21.566804979253114, "grad_norm": 31.563003540039062, "learning_rate": 1.137792531120332e-05, "loss": 0.4898, "step": 25988 }, { "epoch": 21.567634854771786, "grad_norm": 196.7426300048828, "learning_rate": 1.137759336099585e-05, "loss": 0.7321, "step": 25989 }, { "epoch": 21.568464730290458, "grad_norm": 44.819950103759766, "learning_rate": 1.1377261410788383e-05, "loss": 0.2974, "step": 25990 }, { "epoch": 21.56929460580913, "grad_norm": 41.248779296875, "learning_rate": 1.1376929460580915e-05, "loss": 0.6589, "step": 25991 }, { "epoch": 21.570124481327802, "grad_norm": 48.52645492553711, "learning_rate": 1.1376597510373443e-05, "loss": 0.3656, "step": 25992 }, { "epoch": 21.570954356846475, "grad_norm": 34.82981872558594, "learning_rate": 1.1376265560165976e-05, "loss": 0.5823, "step": 25993 }, { "epoch": 21.571784232365147, "grad_norm": 27.545488357543945, "learning_rate": 1.1375933609958508e-05, "loss": 0.5458, "step": 25994 }, { "epoch": 21.57261410788382, "grad_norm": 30.020286560058594, "learning_rate": 1.1375601659751038e-05, "loss": 0.2362, "step": 25995 }, { "epoch": 21.57344398340249, "grad_norm": 43.39513397216797, "learning_rate": 1.137526970954357e-05, "loss": 0.791, "step": 25996 }, { "epoch": 21.574273858921163, "grad_norm": 35.4140739440918, "learning_rate": 1.13749377593361e-05, "loss": 0.6063, "step": 25997 }, { "epoch": 21.575103734439836, "grad_norm": 61.94881820678711, "learning_rate": 1.1374605809128631e-05, "loss": 0.5481, "step": 25998 }, { "epoch": 21.575933609958508, "grad_norm": 26.922195434570312, "learning_rate": 1.1374273858921163e-05, "loss": 0.3284, "step": 25999 }, { "epoch": 21.57676348547718, "grad_norm": 13.367977142333984, "learning_rate": 1.1373941908713695e-05, "loss": 0.2499, "step": 26000 }, { "epoch": 21.577593360995852, "grad_norm": 44.59806442260742, "learning_rate": 1.1373609958506224e-05, "loss": 0.6971, "step": 26001 }, { "epoch": 21.578423236514524, "grad_norm": 27.931577682495117, "learning_rate": 1.1373278008298756e-05, "loss": 0.6074, "step": 26002 }, { "epoch": 21.579253112033197, "grad_norm": 52.51653289794922, "learning_rate": 1.1372946058091288e-05, "loss": 0.7069, "step": 26003 }, { "epoch": 21.58008298755187, "grad_norm": 34.37213134765625, "learning_rate": 1.1372614107883819e-05, "loss": 0.3774, "step": 26004 }, { "epoch": 21.58091286307054, "grad_norm": 52.818912506103516, "learning_rate": 1.1372282157676349e-05, "loss": 0.6119, "step": 26005 }, { "epoch": 21.581742738589213, "grad_norm": 34.439781188964844, "learning_rate": 1.1371950207468881e-05, "loss": 0.462, "step": 26006 }, { "epoch": 21.582572614107885, "grad_norm": 23.150644302368164, "learning_rate": 1.1371618257261411e-05, "loss": 0.3139, "step": 26007 }, { "epoch": 21.583402489626557, "grad_norm": 33.529659271240234, "learning_rate": 1.1371286307053944e-05, "loss": 0.5374, "step": 26008 }, { "epoch": 21.58423236514523, "grad_norm": 49.911903381347656, "learning_rate": 1.1370954356846472e-05, "loss": 0.5354, "step": 26009 }, { "epoch": 21.585062240663902, "grad_norm": 87.70604705810547, "learning_rate": 1.1370622406639004e-05, "loss": 0.4138, "step": 26010 }, { "epoch": 21.585892116182574, "grad_norm": 45.207332611083984, "learning_rate": 1.1370290456431537e-05, "loss": 0.5731, "step": 26011 }, { "epoch": 21.586721991701246, "grad_norm": 31.472747802734375, "learning_rate": 1.1369958506224069e-05, "loss": 0.4579, "step": 26012 }, { "epoch": 21.58755186721992, "grad_norm": 85.62655639648438, "learning_rate": 1.1369626556016597e-05, "loss": 0.5377, "step": 26013 }, { "epoch": 21.58838174273859, "grad_norm": 67.6600112915039, "learning_rate": 1.136929460580913e-05, "loss": 1.2889, "step": 26014 }, { "epoch": 21.589211618257263, "grad_norm": 57.24384689331055, "learning_rate": 1.1368962655601662e-05, "loss": 1.065, "step": 26015 }, { "epoch": 21.590041493775935, "grad_norm": 18.538148880004883, "learning_rate": 1.1368630705394192e-05, "loss": 0.339, "step": 26016 }, { "epoch": 21.590871369294607, "grad_norm": 18.771636962890625, "learning_rate": 1.1368298755186722e-05, "loss": 0.2615, "step": 26017 }, { "epoch": 21.59170124481328, "grad_norm": 44.07067108154297, "learning_rate": 1.1367966804979253e-05, "loss": 1.1227, "step": 26018 }, { "epoch": 21.59253112033195, "grad_norm": 36.308555603027344, "learning_rate": 1.1367634854771785e-05, "loss": 0.5971, "step": 26019 }, { "epoch": 21.593360995850624, "grad_norm": 40.062477111816406, "learning_rate": 1.1367302904564317e-05, "loss": 0.4394, "step": 26020 }, { "epoch": 21.594190871369296, "grad_norm": 26.439388275146484, "learning_rate": 1.1366970954356849e-05, "loss": 0.4619, "step": 26021 }, { "epoch": 21.59502074688797, "grad_norm": 27.83189582824707, "learning_rate": 1.1366639004149378e-05, "loss": 0.5424, "step": 26022 }, { "epoch": 21.59585062240664, "grad_norm": 33.88246536254883, "learning_rate": 1.136630705394191e-05, "loss": 0.3579, "step": 26023 }, { "epoch": 21.596680497925313, "grad_norm": 37.60092544555664, "learning_rate": 1.1365975103734442e-05, "loss": 0.9264, "step": 26024 }, { "epoch": 21.597510373443985, "grad_norm": 50.45857238769531, "learning_rate": 1.1365643153526972e-05, "loss": 0.2982, "step": 26025 }, { "epoch": 21.598340248962657, "grad_norm": 13.010392189025879, "learning_rate": 1.1365311203319503e-05, "loss": 0.2545, "step": 26026 }, { "epoch": 21.59917012448133, "grad_norm": 20.545106887817383, "learning_rate": 1.1364979253112033e-05, "loss": 0.432, "step": 26027 }, { "epoch": 21.6, "grad_norm": 29.79067039489746, "learning_rate": 1.1364647302904565e-05, "loss": 0.2254, "step": 26028 }, { "epoch": 21.600829875518674, "grad_norm": 73.38346099853516, "learning_rate": 1.1364315352697098e-05, "loss": 0.6534, "step": 26029 }, { "epoch": 21.601659751037346, "grad_norm": 40.42399978637695, "learning_rate": 1.1363983402489626e-05, "loss": 0.6761, "step": 26030 }, { "epoch": 21.602489626556018, "grad_norm": 35.23045349121094, "learning_rate": 1.1363651452282158e-05, "loss": 0.3709, "step": 26031 }, { "epoch": 21.60331950207469, "grad_norm": 57.1207160949707, "learning_rate": 1.136331950207469e-05, "loss": 0.4032, "step": 26032 }, { "epoch": 21.604149377593362, "grad_norm": 146.8502655029297, "learning_rate": 1.1362987551867221e-05, "loss": 0.6469, "step": 26033 }, { "epoch": 21.604979253112035, "grad_norm": 40.89263916015625, "learning_rate": 1.1362655601659751e-05, "loss": 0.7216, "step": 26034 }, { "epoch": 21.605809128630707, "grad_norm": 44.246089935302734, "learning_rate": 1.1362323651452283e-05, "loss": 0.3495, "step": 26035 }, { "epoch": 21.60663900414938, "grad_norm": 89.93120574951172, "learning_rate": 1.1361991701244814e-05, "loss": 0.5009, "step": 26036 }, { "epoch": 21.60746887966805, "grad_norm": 55.47216033935547, "learning_rate": 1.1361659751037346e-05, "loss": 0.817, "step": 26037 }, { "epoch": 21.608298755186723, "grad_norm": 26.319595336914062, "learning_rate": 1.1361327800829876e-05, "loss": 0.3375, "step": 26038 }, { "epoch": 21.609128630705396, "grad_norm": 20.127349853515625, "learning_rate": 1.1360995850622407e-05, "loss": 0.3197, "step": 26039 }, { "epoch": 21.609958506224068, "grad_norm": 31.21625328063965, "learning_rate": 1.1360663900414939e-05, "loss": 0.3175, "step": 26040 }, { "epoch": 21.61078838174274, "grad_norm": 54.51359939575195, "learning_rate": 1.1360331950207471e-05, "loss": 0.7174, "step": 26041 }, { "epoch": 21.611618257261412, "grad_norm": 40.65071487426758, "learning_rate": 1.136e-05, "loss": 0.6232, "step": 26042 }, { "epoch": 21.612448132780084, "grad_norm": 50.080387115478516, "learning_rate": 1.1359668049792532e-05, "loss": 0.8415, "step": 26043 }, { "epoch": 21.613278008298757, "grad_norm": 33.93007278442383, "learning_rate": 1.1359336099585064e-05, "loss": 0.4326, "step": 26044 }, { "epoch": 21.61410788381743, "grad_norm": 70.3097915649414, "learning_rate": 1.1359004149377594e-05, "loss": 0.8526, "step": 26045 }, { "epoch": 21.6149377593361, "grad_norm": 25.793346405029297, "learning_rate": 1.1358672199170125e-05, "loss": 0.6385, "step": 26046 }, { "epoch": 21.615767634854773, "grad_norm": 34.04240417480469, "learning_rate": 1.1358340248962657e-05, "loss": 0.709, "step": 26047 }, { "epoch": 21.616597510373445, "grad_norm": 74.21245574951172, "learning_rate": 1.1358008298755187e-05, "loss": 0.6794, "step": 26048 }, { "epoch": 21.617427385892118, "grad_norm": 14.116747856140137, "learning_rate": 1.135767634854772e-05, "loss": 0.2907, "step": 26049 }, { "epoch": 21.61825726141079, "grad_norm": 50.19921875, "learning_rate": 1.1357344398340251e-05, "loss": 0.7737, "step": 26050 }, { "epoch": 21.619087136929462, "grad_norm": 113.1417465209961, "learning_rate": 1.135701244813278e-05, "loss": 0.5912, "step": 26051 }, { "epoch": 21.619917012448134, "grad_norm": 109.92446899414062, "learning_rate": 1.1356680497925312e-05, "loss": 0.6517, "step": 26052 }, { "epoch": 21.620746887966806, "grad_norm": 36.12004852294922, "learning_rate": 1.1356348547717844e-05, "loss": 0.5015, "step": 26053 }, { "epoch": 21.62157676348548, "grad_norm": 73.51214599609375, "learning_rate": 1.1356016597510375e-05, "loss": 0.6388, "step": 26054 }, { "epoch": 21.62240663900415, "grad_norm": 151.83242797851562, "learning_rate": 1.1355684647302905e-05, "loss": 0.5835, "step": 26055 }, { "epoch": 21.623236514522823, "grad_norm": 34.49605178833008, "learning_rate": 1.1355352697095436e-05, "loss": 0.4241, "step": 26056 }, { "epoch": 21.624066390041495, "grad_norm": 42.81278610229492, "learning_rate": 1.1355020746887968e-05, "loss": 0.6279, "step": 26057 }, { "epoch": 21.624896265560167, "grad_norm": 33.21476364135742, "learning_rate": 1.13546887966805e-05, "loss": 0.3582, "step": 26058 }, { "epoch": 21.62572614107884, "grad_norm": 47.614810943603516, "learning_rate": 1.1354356846473029e-05, "loss": 0.373, "step": 26059 }, { "epoch": 21.62655601659751, "grad_norm": 26.362619400024414, "learning_rate": 1.135402489626556e-05, "loss": 0.2893, "step": 26060 }, { "epoch": 21.627385892116184, "grad_norm": 22.43590545654297, "learning_rate": 1.1353692946058093e-05, "loss": 0.3737, "step": 26061 }, { "epoch": 21.628215767634856, "grad_norm": 38.828468322753906, "learning_rate": 1.1353360995850625e-05, "loss": 0.6718, "step": 26062 }, { "epoch": 21.62904564315353, "grad_norm": 93.5279312133789, "learning_rate": 1.1353029045643154e-05, "loss": 0.3071, "step": 26063 }, { "epoch": 21.6298755186722, "grad_norm": 34.510955810546875, "learning_rate": 1.1352697095435686e-05, "loss": 0.3695, "step": 26064 }, { "epoch": 21.630705394190873, "grad_norm": 63.207054138183594, "learning_rate": 1.1352365145228216e-05, "loss": 0.5442, "step": 26065 }, { "epoch": 21.631535269709545, "grad_norm": 10.170367240905762, "learning_rate": 1.1352033195020748e-05, "loss": 0.2427, "step": 26066 }, { "epoch": 21.632365145228217, "grad_norm": 19.423294067382812, "learning_rate": 1.1351701244813279e-05, "loss": 0.4463, "step": 26067 }, { "epoch": 21.63319502074689, "grad_norm": 18.101293563842773, "learning_rate": 1.1351369294605809e-05, "loss": 0.3115, "step": 26068 }, { "epoch": 21.63402489626556, "grad_norm": 24.264789581298828, "learning_rate": 1.1351037344398341e-05, "loss": 0.4213, "step": 26069 }, { "epoch": 21.634854771784234, "grad_norm": 64.99349212646484, "learning_rate": 1.1350705394190873e-05, "loss": 1.1758, "step": 26070 }, { "epoch": 21.635684647302906, "grad_norm": 21.247087478637695, "learning_rate": 1.1350373443983402e-05, "loss": 0.3006, "step": 26071 }, { "epoch": 21.636514522821578, "grad_norm": 49.710716247558594, "learning_rate": 1.1350041493775934e-05, "loss": 0.4577, "step": 26072 }, { "epoch": 21.63734439834025, "grad_norm": 27.1028995513916, "learning_rate": 1.1349709543568466e-05, "loss": 0.5827, "step": 26073 }, { "epoch": 21.638174273858922, "grad_norm": 114.73340606689453, "learning_rate": 1.1349377593360997e-05, "loss": 1.6961, "step": 26074 }, { "epoch": 21.639004149377595, "grad_norm": 22.662939071655273, "learning_rate": 1.1349045643153529e-05, "loss": 0.3553, "step": 26075 }, { "epoch": 21.639834024896267, "grad_norm": 66.10657501220703, "learning_rate": 1.1348713692946059e-05, "loss": 0.6147, "step": 26076 }, { "epoch": 21.64066390041494, "grad_norm": 44.31841278076172, "learning_rate": 1.134838174273859e-05, "loss": 0.5861, "step": 26077 }, { "epoch": 21.64149377593361, "grad_norm": 45.15155029296875, "learning_rate": 1.1348049792531122e-05, "loss": 0.6241, "step": 26078 }, { "epoch": 21.642323651452283, "grad_norm": 26.079458236694336, "learning_rate": 1.1347717842323654e-05, "loss": 0.2916, "step": 26079 }, { "epoch": 21.643153526970956, "grad_norm": 71.21021270751953, "learning_rate": 1.1347385892116182e-05, "loss": 0.8446, "step": 26080 }, { "epoch": 21.643983402489628, "grad_norm": 39.44365692138672, "learning_rate": 1.1347053941908715e-05, "loss": 0.5869, "step": 26081 }, { "epoch": 21.6448132780083, "grad_norm": 30.068206787109375, "learning_rate": 1.1346721991701247e-05, "loss": 0.5571, "step": 26082 }, { "epoch": 21.645643153526972, "grad_norm": 89.25739288330078, "learning_rate": 1.1346390041493777e-05, "loss": 0.933, "step": 26083 }, { "epoch": 21.646473029045644, "grad_norm": 18.10310173034668, "learning_rate": 1.1346058091286308e-05, "loss": 0.401, "step": 26084 }, { "epoch": 21.647302904564317, "grad_norm": 22.0028133392334, "learning_rate": 1.134572614107884e-05, "loss": 0.6293, "step": 26085 }, { "epoch": 21.64813278008299, "grad_norm": 83.2748794555664, "learning_rate": 1.134539419087137e-05, "loss": 0.4345, "step": 26086 }, { "epoch": 21.64896265560166, "grad_norm": 30.205047607421875, "learning_rate": 1.1345062240663902e-05, "loss": 0.4385, "step": 26087 }, { "epoch": 21.649792531120333, "grad_norm": 26.460065841674805, "learning_rate": 1.1344730290456431e-05, "loss": 0.3858, "step": 26088 }, { "epoch": 21.650622406639005, "grad_norm": 52.883724212646484, "learning_rate": 1.1344398340248963e-05, "loss": 0.4114, "step": 26089 }, { "epoch": 21.651452282157678, "grad_norm": 35.83620834350586, "learning_rate": 1.1344066390041495e-05, "loss": 0.4092, "step": 26090 }, { "epoch": 21.65228215767635, "grad_norm": 41.3284912109375, "learning_rate": 1.1343734439834027e-05, "loss": 0.7077, "step": 26091 }, { "epoch": 21.653112033195022, "grad_norm": 11.729889869689941, "learning_rate": 1.1343402489626556e-05, "loss": 0.3562, "step": 26092 }, { "epoch": 21.653941908713694, "grad_norm": 34.74364471435547, "learning_rate": 1.1343070539419088e-05, "loss": 0.3615, "step": 26093 }, { "epoch": 21.654771784232366, "grad_norm": 177.16786193847656, "learning_rate": 1.134273858921162e-05, "loss": 0.8068, "step": 26094 }, { "epoch": 21.65560165975104, "grad_norm": 52.3229866027832, "learning_rate": 1.134240663900415e-05, "loss": 0.5117, "step": 26095 }, { "epoch": 21.65643153526971, "grad_norm": 26.03535270690918, "learning_rate": 1.1342074688796681e-05, "loss": 0.4814, "step": 26096 }, { "epoch": 21.657261410788383, "grad_norm": 44.81240463256836, "learning_rate": 1.1341742738589211e-05, "loss": 0.3684, "step": 26097 }, { "epoch": 21.658091286307055, "grad_norm": 40.0223503112793, "learning_rate": 1.1341410788381743e-05, "loss": 0.8393, "step": 26098 }, { "epoch": 21.658921161825727, "grad_norm": 66.27838897705078, "learning_rate": 1.1341078838174276e-05, "loss": 0.5136, "step": 26099 }, { "epoch": 21.6597510373444, "grad_norm": 62.08788299560547, "learning_rate": 1.1340746887966808e-05, "loss": 0.6151, "step": 26100 }, { "epoch": 21.66058091286307, "grad_norm": 115.28477478027344, "learning_rate": 1.1340414937759336e-05, "loss": 0.882, "step": 26101 }, { "epoch": 21.661410788381744, "grad_norm": 30.60395622253418, "learning_rate": 1.1340082987551869e-05, "loss": 0.4569, "step": 26102 }, { "epoch": 21.662240663900416, "grad_norm": 14.28762149810791, "learning_rate": 1.1339751037344399e-05, "loss": 0.2776, "step": 26103 }, { "epoch": 21.66307053941909, "grad_norm": 19.794044494628906, "learning_rate": 1.1339419087136931e-05, "loss": 0.4859, "step": 26104 }, { "epoch": 21.66390041493776, "grad_norm": 20.745161056518555, "learning_rate": 1.1339087136929461e-05, "loss": 0.5146, "step": 26105 }, { "epoch": 21.664730290456433, "grad_norm": 54.261592864990234, "learning_rate": 1.1338755186721992e-05, "loss": 0.9359, "step": 26106 }, { "epoch": 21.665560165975105, "grad_norm": 68.01815795898438, "learning_rate": 1.1338423236514524e-05, "loss": 0.7079, "step": 26107 }, { "epoch": 21.666390041493777, "grad_norm": 50.82231521606445, "learning_rate": 1.1338091286307056e-05, "loss": 1.0877, "step": 26108 }, { "epoch": 21.66721991701245, "grad_norm": 24.955280303955078, "learning_rate": 1.1337759336099585e-05, "loss": 0.3614, "step": 26109 }, { "epoch": 21.66804979253112, "grad_norm": 50.33469009399414, "learning_rate": 1.1337427385892117e-05, "loss": 0.8802, "step": 26110 }, { "epoch": 21.668879668049794, "grad_norm": 15.74454402923584, "learning_rate": 1.1337095435684649e-05, "loss": 0.225, "step": 26111 }, { "epoch": 21.669709543568466, "grad_norm": 53.93056106567383, "learning_rate": 1.133676348547718e-05, "loss": 0.6802, "step": 26112 }, { "epoch": 21.670539419087138, "grad_norm": 73.34294128417969, "learning_rate": 1.133643153526971e-05, "loss": 0.5822, "step": 26113 }, { "epoch": 21.67136929460581, "grad_norm": 145.23910522460938, "learning_rate": 1.1336099585062242e-05, "loss": 0.6225, "step": 26114 }, { "epoch": 21.672199170124482, "grad_norm": 41.007164001464844, "learning_rate": 1.1335767634854772e-05, "loss": 0.4292, "step": 26115 }, { "epoch": 21.673029045643155, "grad_norm": 37.2729377746582, "learning_rate": 1.1335435684647304e-05, "loss": 0.2659, "step": 26116 }, { "epoch": 21.673858921161827, "grad_norm": 51.9808235168457, "learning_rate": 1.1335103734439833e-05, "loss": 0.2789, "step": 26117 }, { "epoch": 21.6746887966805, "grad_norm": 50.962318420410156, "learning_rate": 1.1334771784232365e-05, "loss": 0.7204, "step": 26118 }, { "epoch": 21.67551867219917, "grad_norm": 42.79018020629883, "learning_rate": 1.1334439834024897e-05, "loss": 0.8682, "step": 26119 }, { "epoch": 21.676348547717843, "grad_norm": 19.089632034301758, "learning_rate": 1.133410788381743e-05, "loss": 0.3177, "step": 26120 }, { "epoch": 21.677178423236516, "grad_norm": 76.90782165527344, "learning_rate": 1.1333775933609958e-05, "loss": 0.9686, "step": 26121 }, { "epoch": 21.678008298755188, "grad_norm": 79.19862365722656, "learning_rate": 1.133344398340249e-05, "loss": 0.6431, "step": 26122 }, { "epoch": 21.67883817427386, "grad_norm": 100.34236145019531, "learning_rate": 1.1333112033195022e-05, "loss": 0.9567, "step": 26123 }, { "epoch": 21.679668049792532, "grad_norm": 75.25839233398438, "learning_rate": 1.1332780082987553e-05, "loss": 1.1758, "step": 26124 }, { "epoch": 21.680497925311204, "grad_norm": 15.32469654083252, "learning_rate": 1.1332448132780083e-05, "loss": 0.2622, "step": 26125 }, { "epoch": 21.681327800829877, "grad_norm": 24.885047912597656, "learning_rate": 1.1332116182572614e-05, "loss": 0.3385, "step": 26126 }, { "epoch": 21.68215767634855, "grad_norm": 79.5130844116211, "learning_rate": 1.1331784232365146e-05, "loss": 0.6617, "step": 26127 }, { "epoch": 21.68298755186722, "grad_norm": 21.909568786621094, "learning_rate": 1.1331452282157678e-05, "loss": 0.3348, "step": 26128 }, { "epoch": 21.683817427385893, "grad_norm": 26.88265037536621, "learning_rate": 1.133112033195021e-05, "loss": 0.7084, "step": 26129 }, { "epoch": 21.684647302904565, "grad_norm": 33.4954948425293, "learning_rate": 1.1330788381742739e-05, "loss": 0.7696, "step": 26130 }, { "epoch": 21.685477178423238, "grad_norm": 17.52581787109375, "learning_rate": 1.133045643153527e-05, "loss": 0.4097, "step": 26131 }, { "epoch": 21.68630705394191, "grad_norm": 43.61897277832031, "learning_rate": 1.1330124481327803e-05, "loss": 0.4275, "step": 26132 }, { "epoch": 21.687136929460582, "grad_norm": 31.703683853149414, "learning_rate": 1.1329792531120333e-05, "loss": 0.2876, "step": 26133 }, { "epoch": 21.687966804979254, "grad_norm": 17.139562606811523, "learning_rate": 1.1329460580912864e-05, "loss": 0.3854, "step": 26134 }, { "epoch": 21.688796680497926, "grad_norm": 49.08472442626953, "learning_rate": 1.1329128630705394e-05, "loss": 0.5115, "step": 26135 }, { "epoch": 21.6896265560166, "grad_norm": 32.83544921875, "learning_rate": 1.1328796680497926e-05, "loss": 0.351, "step": 26136 }, { "epoch": 21.69045643153527, "grad_norm": 40.55097579956055, "learning_rate": 1.1328464730290458e-05, "loss": 0.6768, "step": 26137 }, { "epoch": 21.691286307053943, "grad_norm": 50.28491973876953, "learning_rate": 1.1328132780082987e-05, "loss": 0.4884, "step": 26138 }, { "epoch": 21.692116182572615, "grad_norm": 24.90961456298828, "learning_rate": 1.132780082987552e-05, "loss": 0.3893, "step": 26139 }, { "epoch": 21.692946058091287, "grad_norm": 37.48246765136719, "learning_rate": 1.1327468879668051e-05, "loss": 0.4488, "step": 26140 }, { "epoch": 21.69377593360996, "grad_norm": 64.3577880859375, "learning_rate": 1.1327136929460583e-05, "loss": 0.6736, "step": 26141 }, { "epoch": 21.694605809128632, "grad_norm": 61.43791198730469, "learning_rate": 1.1326804979253112e-05, "loss": 0.5013, "step": 26142 }, { "epoch": 21.695435684647304, "grad_norm": 74.09346008300781, "learning_rate": 1.1326473029045644e-05, "loss": 1.0336, "step": 26143 }, { "epoch": 21.696265560165976, "grad_norm": 29.16767120361328, "learning_rate": 1.1326141078838175e-05, "loss": 0.5401, "step": 26144 }, { "epoch": 21.69709543568465, "grad_norm": 42.85404968261719, "learning_rate": 1.1325809128630707e-05, "loss": 0.6497, "step": 26145 }, { "epoch": 21.69792531120332, "grad_norm": 19.179964065551758, "learning_rate": 1.1325477178423237e-05, "loss": 0.338, "step": 26146 }, { "epoch": 21.698755186721993, "grad_norm": 47.189693450927734, "learning_rate": 1.1325145228215768e-05, "loss": 0.6011, "step": 26147 }, { "epoch": 21.699585062240665, "grad_norm": 41.54281234741211, "learning_rate": 1.13248132780083e-05, "loss": 0.5944, "step": 26148 }, { "epoch": 21.700414937759337, "grad_norm": 56.28897476196289, "learning_rate": 1.1324481327800832e-05, "loss": 0.4279, "step": 26149 }, { "epoch": 21.70124481327801, "grad_norm": 26.469066619873047, "learning_rate": 1.132414937759336e-05, "loss": 0.4539, "step": 26150 }, { "epoch": 21.70207468879668, "grad_norm": 53.57976150512695, "learning_rate": 1.1323817427385893e-05, "loss": 0.4948, "step": 26151 }, { "epoch": 21.702904564315354, "grad_norm": 22.19947624206543, "learning_rate": 1.1323485477178425e-05, "loss": 0.3897, "step": 26152 }, { "epoch": 21.703734439834026, "grad_norm": 64.67990112304688, "learning_rate": 1.1323153526970955e-05, "loss": 1.0079, "step": 26153 }, { "epoch": 21.704564315352698, "grad_norm": 21.41146469116211, "learning_rate": 1.1322821576763487e-05, "loss": 0.3949, "step": 26154 }, { "epoch": 21.70539419087137, "grad_norm": 58.353782653808594, "learning_rate": 1.1322489626556018e-05, "loss": 0.537, "step": 26155 }, { "epoch": 21.706224066390043, "grad_norm": 80.74235534667969, "learning_rate": 1.1322157676348548e-05, "loss": 0.4707, "step": 26156 }, { "epoch": 21.707053941908715, "grad_norm": 90.02297973632812, "learning_rate": 1.132182572614108e-05, "loss": 0.4995, "step": 26157 }, { "epoch": 21.707883817427387, "grad_norm": 40.410545349121094, "learning_rate": 1.1321493775933612e-05, "loss": 0.3461, "step": 26158 }, { "epoch": 21.70871369294606, "grad_norm": 126.37127685546875, "learning_rate": 1.1321161825726141e-05, "loss": 0.696, "step": 26159 }, { "epoch": 21.70954356846473, "grad_norm": 29.82927894592285, "learning_rate": 1.1320829875518673e-05, "loss": 0.3987, "step": 26160 }, { "epoch": 21.710373443983404, "grad_norm": 20.540517807006836, "learning_rate": 1.1320497925311205e-05, "loss": 0.3877, "step": 26161 }, { "epoch": 21.711203319502076, "grad_norm": 13.951318740844727, "learning_rate": 1.1320165975103736e-05, "loss": 0.2907, "step": 26162 }, { "epoch": 21.712033195020748, "grad_norm": 63.63444137573242, "learning_rate": 1.1319834024896266e-05, "loss": 0.9044, "step": 26163 }, { "epoch": 21.71286307053942, "grad_norm": 34.40925598144531, "learning_rate": 1.1319502074688798e-05, "loss": 0.4663, "step": 26164 }, { "epoch": 21.713692946058092, "grad_norm": 45.06084442138672, "learning_rate": 1.1319170124481329e-05, "loss": 0.4324, "step": 26165 }, { "epoch": 21.714522821576764, "grad_norm": 66.99051666259766, "learning_rate": 1.131883817427386e-05, "loss": 0.6286, "step": 26166 }, { "epoch": 21.715352697095437, "grad_norm": 41.13205337524414, "learning_rate": 1.131850622406639e-05, "loss": 0.5694, "step": 26167 }, { "epoch": 21.71618257261411, "grad_norm": 20.40243911743164, "learning_rate": 1.1318174273858922e-05, "loss": 0.3007, "step": 26168 }, { "epoch": 21.71701244813278, "grad_norm": 34.06222915649414, "learning_rate": 1.1317842323651454e-05, "loss": 0.4996, "step": 26169 }, { "epoch": 21.717842323651453, "grad_norm": 20.972763061523438, "learning_rate": 1.1317510373443986e-05, "loss": 0.2919, "step": 26170 }, { "epoch": 21.718672199170125, "grad_norm": 28.167890548706055, "learning_rate": 1.1317178423236514e-05, "loss": 0.4963, "step": 26171 }, { "epoch": 21.719502074688798, "grad_norm": 19.98497200012207, "learning_rate": 1.1316846473029047e-05, "loss": 0.2983, "step": 26172 }, { "epoch": 21.72033195020747, "grad_norm": 73.54435729980469, "learning_rate": 1.1316514522821577e-05, "loss": 0.8311, "step": 26173 }, { "epoch": 21.721161825726142, "grad_norm": 67.40089416503906, "learning_rate": 1.1316182572614109e-05, "loss": 0.8526, "step": 26174 }, { "epoch": 21.721991701244814, "grad_norm": 44.018856048583984, "learning_rate": 1.131585062240664e-05, "loss": 0.7611, "step": 26175 }, { "epoch": 21.722821576763486, "grad_norm": 27.27278709411621, "learning_rate": 1.131551867219917e-05, "loss": 0.4514, "step": 26176 }, { "epoch": 21.72365145228216, "grad_norm": 36.21201705932617, "learning_rate": 1.1315186721991702e-05, "loss": 0.3675, "step": 26177 }, { "epoch": 21.72448132780083, "grad_norm": 62.59806823730469, "learning_rate": 1.1314854771784234e-05, "loss": 0.3969, "step": 26178 }, { "epoch": 21.725311203319503, "grad_norm": 43.20895767211914, "learning_rate": 1.1314522821576766e-05, "loss": 0.5412, "step": 26179 }, { "epoch": 21.726141078838175, "grad_norm": 46.5211296081543, "learning_rate": 1.1314190871369295e-05, "loss": 0.6257, "step": 26180 }, { "epoch": 21.726970954356847, "grad_norm": 48.52793502807617, "learning_rate": 1.1313858921161827e-05, "loss": 0.6012, "step": 26181 }, { "epoch": 21.72780082987552, "grad_norm": 51.30620193481445, "learning_rate": 1.1313526970954358e-05, "loss": 0.5597, "step": 26182 }, { "epoch": 21.728630705394192, "grad_norm": 56.204856872558594, "learning_rate": 1.131319502074689e-05, "loss": 0.5673, "step": 26183 }, { "epoch": 21.729460580912864, "grad_norm": 64.62336730957031, "learning_rate": 1.131286307053942e-05, "loss": 0.8356, "step": 26184 }, { "epoch": 21.730290456431536, "grad_norm": 53.255863189697266, "learning_rate": 1.131253112033195e-05, "loss": 0.5845, "step": 26185 }, { "epoch": 21.73112033195021, "grad_norm": 37.408477783203125, "learning_rate": 1.1312199170124483e-05, "loss": 0.489, "step": 26186 }, { "epoch": 21.73195020746888, "grad_norm": 34.86164855957031, "learning_rate": 1.1311867219917015e-05, "loss": 0.6425, "step": 26187 }, { "epoch": 21.732780082987553, "grad_norm": 50.649742126464844, "learning_rate": 1.1311535269709543e-05, "loss": 0.4395, "step": 26188 }, { "epoch": 21.733609958506225, "grad_norm": 26.622398376464844, "learning_rate": 1.1311203319502075e-05, "loss": 0.3187, "step": 26189 }, { "epoch": 21.734439834024897, "grad_norm": 40.876346588134766, "learning_rate": 1.1310871369294608e-05, "loss": 0.5628, "step": 26190 }, { "epoch": 21.73526970954357, "grad_norm": 21.63742446899414, "learning_rate": 1.1310539419087138e-05, "loss": 0.3855, "step": 26191 }, { "epoch": 21.73609958506224, "grad_norm": 62.749000549316406, "learning_rate": 1.1310207468879668e-05, "loss": 0.8341, "step": 26192 }, { "epoch": 21.736929460580914, "grad_norm": 36.5645751953125, "learning_rate": 1.13098755186722e-05, "loss": 0.3445, "step": 26193 }, { "epoch": 21.737759336099586, "grad_norm": 46.18015670776367, "learning_rate": 1.1309543568464731e-05, "loss": 0.3396, "step": 26194 }, { "epoch": 21.738589211618258, "grad_norm": 18.344383239746094, "learning_rate": 1.1309211618257263e-05, "loss": 0.3745, "step": 26195 }, { "epoch": 21.73941908713693, "grad_norm": 41.53495407104492, "learning_rate": 1.1308879668049792e-05, "loss": 0.4532, "step": 26196 }, { "epoch": 21.740248962655603, "grad_norm": 53.96687316894531, "learning_rate": 1.1308547717842324e-05, "loss": 0.8382, "step": 26197 }, { "epoch": 21.741078838174275, "grad_norm": 19.71198272705078, "learning_rate": 1.1308215767634856e-05, "loss": 0.416, "step": 26198 }, { "epoch": 21.741908713692947, "grad_norm": 54.903289794921875, "learning_rate": 1.1307883817427388e-05, "loss": 0.7288, "step": 26199 }, { "epoch": 21.74273858921162, "grad_norm": 28.61815643310547, "learning_rate": 1.1307551867219917e-05, "loss": 0.328, "step": 26200 }, { "epoch": 21.74356846473029, "grad_norm": 33.47589111328125, "learning_rate": 1.1307219917012449e-05, "loss": 0.4804, "step": 26201 }, { "epoch": 21.744398340248964, "grad_norm": 31.31746482849121, "learning_rate": 1.1306887966804981e-05, "loss": 0.3533, "step": 26202 }, { "epoch": 21.745228215767636, "grad_norm": 93.08092498779297, "learning_rate": 1.1306556016597511e-05, "loss": 0.9473, "step": 26203 }, { "epoch": 21.746058091286308, "grad_norm": 60.806884765625, "learning_rate": 1.1306224066390042e-05, "loss": 0.3935, "step": 26204 }, { "epoch": 21.74688796680498, "grad_norm": 27.334754943847656, "learning_rate": 1.1305892116182572e-05, "loss": 0.3586, "step": 26205 }, { "epoch": 21.747717842323652, "grad_norm": 34.062705993652344, "learning_rate": 1.1305560165975104e-05, "loss": 0.8388, "step": 26206 }, { "epoch": 21.748547717842325, "grad_norm": 49.534114837646484, "learning_rate": 1.1305228215767636e-05, "loss": 0.7158, "step": 26207 }, { "epoch": 21.749377593360997, "grad_norm": 30.620635986328125, "learning_rate": 1.1304896265560169e-05, "loss": 0.994, "step": 26208 }, { "epoch": 21.75020746887967, "grad_norm": 40.865394592285156, "learning_rate": 1.1304564315352697e-05, "loss": 0.3947, "step": 26209 }, { "epoch": 21.75103734439834, "grad_norm": 28.460140228271484, "learning_rate": 1.130423236514523e-05, "loss": 0.2987, "step": 26210 }, { "epoch": 21.751867219917013, "grad_norm": 78.66209411621094, "learning_rate": 1.1303900414937762e-05, "loss": 0.7374, "step": 26211 }, { "epoch": 21.752697095435686, "grad_norm": 70.68002319335938, "learning_rate": 1.1303568464730292e-05, "loss": 0.3692, "step": 26212 }, { "epoch": 21.753526970954358, "grad_norm": 24.953115463256836, "learning_rate": 1.1303236514522822e-05, "loss": 0.343, "step": 26213 }, { "epoch": 21.75435684647303, "grad_norm": 54.03120803833008, "learning_rate": 1.1302904564315353e-05, "loss": 0.4579, "step": 26214 }, { "epoch": 21.755186721991702, "grad_norm": 133.2027587890625, "learning_rate": 1.1302572614107885e-05, "loss": 0.8941, "step": 26215 }, { "epoch": 21.756016597510374, "grad_norm": 21.453214645385742, "learning_rate": 1.1302240663900417e-05, "loss": 0.3471, "step": 26216 }, { "epoch": 21.756846473029047, "grad_norm": 16.88976287841797, "learning_rate": 1.1301908713692946e-05, "loss": 0.2794, "step": 26217 }, { "epoch": 21.75767634854772, "grad_norm": 73.68092346191406, "learning_rate": 1.1301576763485478e-05, "loss": 0.8615, "step": 26218 }, { "epoch": 21.75850622406639, "grad_norm": 41.885982513427734, "learning_rate": 1.130124481327801e-05, "loss": 0.6778, "step": 26219 }, { "epoch": 21.759336099585063, "grad_norm": 114.37252044677734, "learning_rate": 1.130091286307054e-05, "loss": 0.4569, "step": 26220 }, { "epoch": 21.760165975103735, "grad_norm": 42.437564849853516, "learning_rate": 1.130058091286307e-05, "loss": 0.5482, "step": 26221 }, { "epoch": 21.760995850622407, "grad_norm": 49.19046401977539, "learning_rate": 1.1300248962655603e-05, "loss": 0.5893, "step": 26222 }, { "epoch": 21.76182572614108, "grad_norm": 43.2248649597168, "learning_rate": 1.1299917012448133e-05, "loss": 0.5269, "step": 26223 }, { "epoch": 21.762655601659752, "grad_norm": 36.67241668701172, "learning_rate": 1.1299585062240665e-05, "loss": 0.4101, "step": 26224 }, { "epoch": 21.763485477178424, "grad_norm": 74.23462677001953, "learning_rate": 1.1299253112033196e-05, "loss": 0.4902, "step": 26225 }, { "epoch": 21.764315352697096, "grad_norm": 32.6046028137207, "learning_rate": 1.1298921161825726e-05, "loss": 0.3465, "step": 26226 }, { "epoch": 21.76514522821577, "grad_norm": 76.10845947265625, "learning_rate": 1.1298589211618258e-05, "loss": 0.4244, "step": 26227 }, { "epoch": 21.76597510373444, "grad_norm": 35.025177001953125, "learning_rate": 1.129825726141079e-05, "loss": 0.6687, "step": 26228 }, { "epoch": 21.766804979253113, "grad_norm": 19.89748191833496, "learning_rate": 1.1297925311203319e-05, "loss": 0.3762, "step": 26229 }, { "epoch": 21.767634854771785, "grad_norm": 20.464004516601562, "learning_rate": 1.1297593360995851e-05, "loss": 0.4379, "step": 26230 }, { "epoch": 21.768464730290457, "grad_norm": 12.606396675109863, "learning_rate": 1.1297261410788383e-05, "loss": 0.2867, "step": 26231 }, { "epoch": 21.76929460580913, "grad_norm": 26.274499893188477, "learning_rate": 1.1296929460580914e-05, "loss": 0.324, "step": 26232 }, { "epoch": 21.7701244813278, "grad_norm": 61.27977752685547, "learning_rate": 1.1296597510373446e-05, "loss": 0.7721, "step": 26233 }, { "epoch": 21.770954356846474, "grad_norm": 20.3458309173584, "learning_rate": 1.1296265560165975e-05, "loss": 0.4641, "step": 26234 }, { "epoch": 21.771784232365146, "grad_norm": 19.441843032836914, "learning_rate": 1.1295933609958507e-05, "loss": 0.2901, "step": 26235 }, { "epoch": 21.77261410788382, "grad_norm": 22.614665985107422, "learning_rate": 1.1295601659751039e-05, "loss": 0.2784, "step": 26236 }, { "epoch": 21.77344398340249, "grad_norm": 20.544939041137695, "learning_rate": 1.1295269709543571e-05, "loss": 0.4763, "step": 26237 }, { "epoch": 21.774273858921163, "grad_norm": 22.32743263244629, "learning_rate": 1.12949377593361e-05, "loss": 0.3211, "step": 26238 }, { "epoch": 21.775103734439835, "grad_norm": 20.610157012939453, "learning_rate": 1.1294605809128632e-05, "loss": 0.2831, "step": 26239 }, { "epoch": 21.775933609958507, "grad_norm": 25.715595245361328, "learning_rate": 1.1294273858921164e-05, "loss": 0.4306, "step": 26240 }, { "epoch": 21.77676348547718, "grad_norm": 18.662002563476562, "learning_rate": 1.1293941908713694e-05, "loss": 0.3011, "step": 26241 }, { "epoch": 21.77759336099585, "grad_norm": 53.77528381347656, "learning_rate": 1.1293609958506225e-05, "loss": 0.7732, "step": 26242 }, { "epoch": 21.778423236514524, "grad_norm": 33.491676330566406, "learning_rate": 1.1293278008298755e-05, "loss": 0.4588, "step": 26243 }, { "epoch": 21.779253112033196, "grad_norm": 196.46185302734375, "learning_rate": 1.1292946058091287e-05, "loss": 0.9397, "step": 26244 }, { "epoch": 21.780082987551868, "grad_norm": 36.54098129272461, "learning_rate": 1.129261410788382e-05, "loss": 0.6077, "step": 26245 }, { "epoch": 21.78091286307054, "grad_norm": 72.06798553466797, "learning_rate": 1.1292282157676348e-05, "loss": 0.3103, "step": 26246 }, { "epoch": 21.781742738589212, "grad_norm": 38.60950469970703, "learning_rate": 1.129195020746888e-05, "loss": 0.7026, "step": 26247 }, { "epoch": 21.782572614107885, "grad_norm": 54.33112716674805, "learning_rate": 1.1291618257261412e-05, "loss": 0.6643, "step": 26248 }, { "epoch": 21.783402489626557, "grad_norm": 88.6744384765625, "learning_rate": 1.1291286307053944e-05, "loss": 0.6534, "step": 26249 }, { "epoch": 21.78423236514523, "grad_norm": 53.99535369873047, "learning_rate": 1.1290954356846473e-05, "loss": 0.3094, "step": 26250 }, { "epoch": 21.7850622406639, "grad_norm": 39.018184661865234, "learning_rate": 1.1290622406639005e-05, "loss": 0.4616, "step": 26251 }, { "epoch": 21.785892116182573, "grad_norm": 37.32588195800781, "learning_rate": 1.1290290456431536e-05, "loss": 0.8228, "step": 26252 }, { "epoch": 21.786721991701246, "grad_norm": 66.66576385498047, "learning_rate": 1.1289958506224068e-05, "loss": 0.5347, "step": 26253 }, { "epoch": 21.787551867219918, "grad_norm": 14.111302375793457, "learning_rate": 1.1289626556016598e-05, "loss": 0.2362, "step": 26254 }, { "epoch": 21.78838174273859, "grad_norm": 46.586238861083984, "learning_rate": 1.1289294605809129e-05, "loss": 0.5778, "step": 26255 }, { "epoch": 21.789211618257262, "grad_norm": 53.951168060302734, "learning_rate": 1.128896265560166e-05, "loss": 0.483, "step": 26256 }, { "epoch": 21.790041493775934, "grad_norm": 109.51647186279297, "learning_rate": 1.1288630705394193e-05, "loss": 0.7752, "step": 26257 }, { "epoch": 21.790871369294607, "grad_norm": 37.704097747802734, "learning_rate": 1.1288298755186725e-05, "loss": 0.472, "step": 26258 }, { "epoch": 21.79170124481328, "grad_norm": 40.900970458984375, "learning_rate": 1.1287966804979254e-05, "loss": 0.4182, "step": 26259 }, { "epoch": 21.79253112033195, "grad_norm": 28.673154830932617, "learning_rate": 1.1287634854771786e-05, "loss": 0.3996, "step": 26260 }, { "epoch": 21.793360995850623, "grad_norm": NaN, "learning_rate": 1.1287634854771786e-05, "loss": 0.6733, "step": 26261 }, { "epoch": 21.794190871369295, "grad_norm": 48.49523162841797, "learning_rate": 1.1287302904564316e-05, "loss": 0.5884, "step": 26262 }, { "epoch": 21.795020746887968, "grad_norm": 39.80149841308594, "learning_rate": 1.1286970954356848e-05, "loss": 0.3672, "step": 26263 }, { "epoch": 21.79585062240664, "grad_norm": 40.64930725097656, "learning_rate": 1.1286639004149379e-05, "loss": 0.5671, "step": 26264 }, { "epoch": 21.796680497925312, "grad_norm": 112.39363861083984, "learning_rate": 1.1286307053941909e-05, "loss": 0.2952, "step": 26265 }, { "epoch": 21.797510373443984, "grad_norm": 98.92317199707031, "learning_rate": 1.1285975103734441e-05, "loss": 0.6299, "step": 26266 }, { "epoch": 21.798340248962656, "grad_norm": 44.925575256347656, "learning_rate": 1.1285643153526973e-05, "loss": 0.4652, "step": 26267 }, { "epoch": 21.79917012448133, "grad_norm": 15.94404411315918, "learning_rate": 1.1285311203319502e-05, "loss": 0.2796, "step": 26268 }, { "epoch": 21.8, "grad_norm": 61.743465423583984, "learning_rate": 1.1284979253112034e-05, "loss": 0.347, "step": 26269 }, { "epoch": 21.800829875518673, "grad_norm": 26.011455535888672, "learning_rate": 1.1284647302904566e-05, "loss": 0.3485, "step": 26270 }, { "epoch": 21.801659751037345, "grad_norm": 65.73572540283203, "learning_rate": 1.1284315352697097e-05, "loss": 0.4321, "step": 26271 }, { "epoch": 21.802489626556017, "grad_norm": 28.764381408691406, "learning_rate": 1.1283983402489627e-05, "loss": 0.4728, "step": 26272 }, { "epoch": 21.80331950207469, "grad_norm": 26.95849609375, "learning_rate": 1.1283651452282159e-05, "loss": 0.5692, "step": 26273 }, { "epoch": 21.80414937759336, "grad_norm": 77.09007263183594, "learning_rate": 1.128331950207469e-05, "loss": 0.5809, "step": 26274 }, { "epoch": 21.804979253112034, "grad_norm": 34.23556900024414, "learning_rate": 1.1282987551867222e-05, "loss": 0.6825, "step": 26275 }, { "epoch": 21.805809128630706, "grad_norm": 57.09517288208008, "learning_rate": 1.128265560165975e-05, "loss": 0.5012, "step": 26276 }, { "epoch": 21.80663900414938, "grad_norm": 28.545475006103516, "learning_rate": 1.1282323651452282e-05, "loss": 0.5278, "step": 26277 }, { "epoch": 21.80746887966805, "grad_norm": 48.298484802246094, "learning_rate": 1.1281991701244815e-05, "loss": 1.2888, "step": 26278 }, { "epoch": 21.808298755186723, "grad_norm": 32.41231918334961, "learning_rate": 1.1281659751037347e-05, "loss": 0.7323, "step": 26279 }, { "epoch": 21.809128630705395, "grad_norm": 61.882728576660156, "learning_rate": 1.1281327800829875e-05, "loss": 0.3533, "step": 26280 }, { "epoch": 21.809958506224067, "grad_norm": 73.59203338623047, "learning_rate": 1.1280995850622407e-05, "loss": 0.5343, "step": 26281 }, { "epoch": 21.81078838174274, "grad_norm": 84.56230163574219, "learning_rate": 1.128066390041494e-05, "loss": 0.3493, "step": 26282 }, { "epoch": 21.81161825726141, "grad_norm": 18.449235916137695, "learning_rate": 1.128033195020747e-05, "loss": 0.3657, "step": 26283 }, { "epoch": 21.812448132780084, "grad_norm": 44.028709411621094, "learning_rate": 1.128e-05, "loss": 0.9319, "step": 26284 }, { "epoch": 21.813278008298756, "grad_norm": 43.43344497680664, "learning_rate": 1.127966804979253e-05, "loss": 0.2988, "step": 26285 }, { "epoch": 21.814107883817428, "grad_norm": 32.86611557006836, "learning_rate": 1.1279336099585063e-05, "loss": 1.2941, "step": 26286 }, { "epoch": 21.8149377593361, "grad_norm": 129.95281982421875, "learning_rate": 1.1279004149377595e-05, "loss": 0.7946, "step": 26287 }, { "epoch": 21.815767634854772, "grad_norm": 73.72151947021484, "learning_rate": 1.1278672199170127e-05, "loss": 0.5296, "step": 26288 }, { "epoch": 21.816597510373445, "grad_norm": 90.95185089111328, "learning_rate": 1.1278340248962656e-05, "loss": 0.426, "step": 26289 }, { "epoch": 21.817427385892117, "grad_norm": 24.539173126220703, "learning_rate": 1.1278008298755188e-05, "loss": 0.2515, "step": 26290 }, { "epoch": 21.81825726141079, "grad_norm": 22.855501174926758, "learning_rate": 1.1277676348547718e-05, "loss": 0.3068, "step": 26291 }, { "epoch": 21.81908713692946, "grad_norm": 9.403514862060547, "learning_rate": 1.127734439834025e-05, "loss": 0.2157, "step": 26292 }, { "epoch": 21.819917012448133, "grad_norm": 79.88577270507812, "learning_rate": 1.1277012448132781e-05, "loss": 0.6553, "step": 26293 }, { "epoch": 21.820746887966806, "grad_norm": 58.81678771972656, "learning_rate": 1.1276680497925311e-05, "loss": 0.5997, "step": 26294 }, { "epoch": 21.821576763485478, "grad_norm": 29.803497314453125, "learning_rate": 1.1276348547717843e-05, "loss": 0.4648, "step": 26295 }, { "epoch": 21.82240663900415, "grad_norm": 38.2263298034668, "learning_rate": 1.1276016597510376e-05, "loss": 0.7231, "step": 26296 }, { "epoch": 21.823236514522822, "grad_norm": 19.07099723815918, "learning_rate": 1.1275684647302904e-05, "loss": 0.3548, "step": 26297 }, { "epoch": 21.824066390041494, "grad_norm": 66.84144592285156, "learning_rate": 1.1275352697095436e-05, "loss": 0.6947, "step": 26298 }, { "epoch": 21.824896265560167, "grad_norm": 33.39281463623047, "learning_rate": 1.1275020746887968e-05, "loss": 0.5017, "step": 26299 }, { "epoch": 21.82572614107884, "grad_norm": 16.45626449584961, "learning_rate": 1.1274688796680499e-05, "loss": 0.3189, "step": 26300 }, { "epoch": 21.82655601659751, "grad_norm": 67.10497283935547, "learning_rate": 1.127435684647303e-05, "loss": 0.5227, "step": 26301 }, { "epoch": 21.827385892116183, "grad_norm": 223.90567016601562, "learning_rate": 1.1274024896265561e-05, "loss": 0.5656, "step": 26302 }, { "epoch": 21.828215767634855, "grad_norm": 25.756650924682617, "learning_rate": 1.1273692946058092e-05, "loss": 0.3569, "step": 26303 }, { "epoch": 21.829045643153528, "grad_norm": 17.0816707611084, "learning_rate": 1.1273360995850624e-05, "loss": 0.2818, "step": 26304 }, { "epoch": 21.8298755186722, "grad_norm": 50.33679962158203, "learning_rate": 1.1273029045643153e-05, "loss": 0.6634, "step": 26305 }, { "epoch": 21.830705394190872, "grad_norm": 84.47122955322266, "learning_rate": 1.1272697095435685e-05, "loss": 0.4086, "step": 26306 }, { "epoch": 21.831535269709544, "grad_norm": 55.383419036865234, "learning_rate": 1.1272365145228217e-05, "loss": 1.0621, "step": 26307 }, { "epoch": 21.832365145228216, "grad_norm": 41.5577392578125, "learning_rate": 1.1272033195020749e-05, "loss": 0.8246, "step": 26308 }, { "epoch": 21.83319502074689, "grad_norm": 20.223697662353516, "learning_rate": 1.1271701244813278e-05, "loss": 0.4221, "step": 26309 }, { "epoch": 21.83402489626556, "grad_norm": 48.115970611572266, "learning_rate": 1.127136929460581e-05, "loss": 1.1042, "step": 26310 }, { "epoch": 21.834854771784233, "grad_norm": 49.30908966064453, "learning_rate": 1.1271037344398342e-05, "loss": 0.2216, "step": 26311 }, { "epoch": 21.835684647302905, "grad_norm": 20.353933334350586, "learning_rate": 1.1270705394190872e-05, "loss": 0.3369, "step": 26312 }, { "epoch": 21.836514522821577, "grad_norm": 38.98200607299805, "learning_rate": 1.1270373443983404e-05, "loss": 0.4316, "step": 26313 }, { "epoch": 21.83734439834025, "grad_norm": 39.90802001953125, "learning_rate": 1.1270041493775933e-05, "loss": 0.8896, "step": 26314 }, { "epoch": 21.83817427385892, "grad_norm": 66.38333129882812, "learning_rate": 1.1269709543568465e-05, "loss": 1.3194, "step": 26315 }, { "epoch": 21.839004149377594, "grad_norm": 79.71503448486328, "learning_rate": 1.1269377593360997e-05, "loss": 0.8484, "step": 26316 }, { "epoch": 21.839834024896266, "grad_norm": 31.47638511657715, "learning_rate": 1.126904564315353e-05, "loss": 0.7763, "step": 26317 }, { "epoch": 21.84066390041494, "grad_norm": 50.791927337646484, "learning_rate": 1.1268713692946058e-05, "loss": 0.486, "step": 26318 }, { "epoch": 21.84149377593361, "grad_norm": 36.65383529663086, "learning_rate": 1.126838174273859e-05, "loss": 0.4046, "step": 26319 }, { "epoch": 21.842323651452283, "grad_norm": 57.95174026489258, "learning_rate": 1.1268049792531122e-05, "loss": 0.8229, "step": 26320 }, { "epoch": 21.843153526970955, "grad_norm": 32.144649505615234, "learning_rate": 1.1267717842323653e-05, "loss": 0.5425, "step": 26321 }, { "epoch": 21.843983402489627, "grad_norm": 37.689064025878906, "learning_rate": 1.1267385892116183e-05, "loss": 0.7276, "step": 26322 }, { "epoch": 21.8448132780083, "grad_norm": 35.547752380371094, "learning_rate": 1.1267053941908714e-05, "loss": 0.5205, "step": 26323 }, { "epoch": 21.84564315352697, "grad_norm": 79.14089965820312, "learning_rate": 1.1266721991701246e-05, "loss": 0.7446, "step": 26324 }, { "epoch": 21.846473029045644, "grad_norm": 50.43354415893555, "learning_rate": 1.1266390041493778e-05, "loss": 0.4301, "step": 26325 }, { "epoch": 21.847302904564316, "grad_norm": 17.507211685180664, "learning_rate": 1.1266058091286307e-05, "loss": 0.3106, "step": 26326 }, { "epoch": 21.848132780082988, "grad_norm": 45.02713394165039, "learning_rate": 1.1265726141078839e-05, "loss": 0.4853, "step": 26327 }, { "epoch": 21.84896265560166, "grad_norm": 23.095458984375, "learning_rate": 1.126539419087137e-05, "loss": 0.3435, "step": 26328 }, { "epoch": 21.849792531120332, "grad_norm": 12.391121864318848, "learning_rate": 1.1265062240663903e-05, "loss": 0.2911, "step": 26329 }, { "epoch": 21.850622406639005, "grad_norm": 13.784778594970703, "learning_rate": 1.1264730290456432e-05, "loss": 0.3531, "step": 26330 }, { "epoch": 21.851452282157677, "grad_norm": 11.720929145812988, "learning_rate": 1.1264398340248964e-05, "loss": 0.3011, "step": 26331 }, { "epoch": 21.85228215767635, "grad_norm": 25.797016143798828, "learning_rate": 1.1264066390041494e-05, "loss": 0.3706, "step": 26332 }, { "epoch": 21.85311203319502, "grad_norm": 31.511186599731445, "learning_rate": 1.1263734439834026e-05, "loss": 0.347, "step": 26333 }, { "epoch": 21.853941908713693, "grad_norm": 41.44806671142578, "learning_rate": 1.1263402489626557e-05, "loss": 0.6376, "step": 26334 }, { "epoch": 21.854771784232366, "grad_norm": 31.591007232666016, "learning_rate": 1.1263070539419087e-05, "loss": 0.5515, "step": 26335 }, { "epoch": 21.855601659751038, "grad_norm": 28.3624267578125, "learning_rate": 1.126273858921162e-05, "loss": 0.3637, "step": 26336 }, { "epoch": 21.85643153526971, "grad_norm": 57.78643035888672, "learning_rate": 1.1262406639004151e-05, "loss": 0.8281, "step": 26337 }, { "epoch": 21.857261410788382, "grad_norm": 18.353107452392578, "learning_rate": 1.126207468879668e-05, "loss": 0.4253, "step": 26338 }, { "epoch": 21.858091286307054, "grad_norm": 58.95690155029297, "learning_rate": 1.1261742738589212e-05, "loss": 1.1231, "step": 26339 }, { "epoch": 21.858921161825727, "grad_norm": 35.64271545410156, "learning_rate": 1.1261410788381744e-05, "loss": 0.5446, "step": 26340 }, { "epoch": 21.8597510373444, "grad_norm": 49.83145523071289, "learning_rate": 1.1261078838174275e-05, "loss": 0.5469, "step": 26341 }, { "epoch": 21.86058091286307, "grad_norm": 25.68242073059082, "learning_rate": 1.1260746887966807e-05, "loss": 0.3572, "step": 26342 }, { "epoch": 21.861410788381743, "grad_norm": 67.97101593017578, "learning_rate": 1.1260414937759337e-05, "loss": 0.4129, "step": 26343 }, { "epoch": 21.862240663900415, "grad_norm": 20.855199813842773, "learning_rate": 1.1260082987551868e-05, "loss": 0.414, "step": 26344 }, { "epoch": 21.863070539419088, "grad_norm": 57.217803955078125, "learning_rate": 1.12597510373444e-05, "loss": 0.4029, "step": 26345 }, { "epoch": 21.86390041493776, "grad_norm": 50.04058074951172, "learning_rate": 1.1259419087136932e-05, "loss": 0.7992, "step": 26346 }, { "epoch": 21.864730290456432, "grad_norm": 13.484389305114746, "learning_rate": 1.125908713692946e-05, "loss": 0.2855, "step": 26347 }, { "epoch": 21.865560165975104, "grad_norm": 36.1651496887207, "learning_rate": 1.1258755186721993e-05, "loss": 0.7907, "step": 26348 }, { "epoch": 21.866390041493776, "grad_norm": 58.11738967895508, "learning_rate": 1.1258423236514525e-05, "loss": 0.9291, "step": 26349 }, { "epoch": 21.86721991701245, "grad_norm": 46.727821350097656, "learning_rate": 1.1258091286307055e-05, "loss": 0.7951, "step": 26350 }, { "epoch": 21.86804979253112, "grad_norm": 31.843215942382812, "learning_rate": 1.1257759336099586e-05, "loss": 0.4565, "step": 26351 }, { "epoch": 21.868879668049793, "grad_norm": 18.92868995666504, "learning_rate": 1.1257427385892116e-05, "loss": 0.4453, "step": 26352 }, { "epoch": 21.869709543568465, "grad_norm": 26.78330421447754, "learning_rate": 1.1257095435684648e-05, "loss": 0.4328, "step": 26353 }, { "epoch": 21.870539419087137, "grad_norm": 86.46538543701172, "learning_rate": 1.125676348547718e-05, "loss": 0.6438, "step": 26354 }, { "epoch": 21.87136929460581, "grad_norm": 30.671756744384766, "learning_rate": 1.1256431535269709e-05, "loss": 0.3959, "step": 26355 }, { "epoch": 21.872199170124482, "grad_norm": 71.68174743652344, "learning_rate": 1.1256099585062241e-05, "loss": 0.7307, "step": 26356 }, { "epoch": 21.873029045643154, "grad_norm": 43.54423904418945, "learning_rate": 1.1255767634854773e-05, "loss": 1.1314, "step": 26357 }, { "epoch": 21.873858921161826, "grad_norm": 76.79622650146484, "learning_rate": 1.1255435684647305e-05, "loss": 0.6126, "step": 26358 }, { "epoch": 21.8746887966805, "grad_norm": 35.023223876953125, "learning_rate": 1.1255103734439834e-05, "loss": 0.5176, "step": 26359 }, { "epoch": 21.87551867219917, "grad_norm": 43.92792510986328, "learning_rate": 1.1254771784232366e-05, "loss": 0.3895, "step": 26360 }, { "epoch": 21.876348547717843, "grad_norm": 84.84140014648438, "learning_rate": 1.1254439834024896e-05, "loss": 0.4738, "step": 26361 }, { "epoch": 21.877178423236515, "grad_norm": 44.70022201538086, "learning_rate": 1.1254107883817429e-05, "loss": 0.5108, "step": 26362 }, { "epoch": 21.878008298755187, "grad_norm": 59.51811981201172, "learning_rate": 1.1253775933609959e-05, "loss": 0.8326, "step": 26363 }, { "epoch": 21.87883817427386, "grad_norm": 41.571632385253906, "learning_rate": 1.125344398340249e-05, "loss": 0.7783, "step": 26364 }, { "epoch": 21.87966804979253, "grad_norm": 150.427734375, "learning_rate": 1.1253112033195022e-05, "loss": 0.6689, "step": 26365 }, { "epoch": 21.880497925311204, "grad_norm": 19.794321060180664, "learning_rate": 1.1252780082987554e-05, "loss": 0.5864, "step": 26366 }, { "epoch": 21.881327800829876, "grad_norm": 66.0368423461914, "learning_rate": 1.1252448132780086e-05, "loss": 0.6855, "step": 26367 }, { "epoch": 21.882157676348548, "grad_norm": 55.82532501220703, "learning_rate": 1.1252116182572614e-05, "loss": 0.4262, "step": 26368 }, { "epoch": 21.88298755186722, "grad_norm": 36.01459503173828, "learning_rate": 1.1251784232365147e-05, "loss": 0.4494, "step": 26369 }, { "epoch": 21.883817427385893, "grad_norm": 86.69307708740234, "learning_rate": 1.1251452282157677e-05, "loss": 0.9138, "step": 26370 }, { "epoch": 21.884647302904565, "grad_norm": 39.707096099853516, "learning_rate": 1.1251120331950209e-05, "loss": 0.6383, "step": 26371 }, { "epoch": 21.885477178423237, "grad_norm": 28.509485244750977, "learning_rate": 1.125078838174274e-05, "loss": 0.6891, "step": 26372 }, { "epoch": 21.88630705394191, "grad_norm": 47.314002990722656, "learning_rate": 1.125045643153527e-05, "loss": 0.6821, "step": 26373 }, { "epoch": 21.88713692946058, "grad_norm": 63.69619369506836, "learning_rate": 1.1250124481327802e-05, "loss": 1.0535, "step": 26374 }, { "epoch": 21.887966804979254, "grad_norm": 39.91340637207031, "learning_rate": 1.1249792531120334e-05, "loss": 0.3965, "step": 26375 }, { "epoch": 21.888796680497926, "grad_norm": 44.74382019042969, "learning_rate": 1.1249460580912863e-05, "loss": 0.3451, "step": 26376 }, { "epoch": 21.889626556016598, "grad_norm": 15.155364990234375, "learning_rate": 1.1249128630705395e-05, "loss": 0.2908, "step": 26377 }, { "epoch": 21.89045643153527, "grad_norm": 39.16278839111328, "learning_rate": 1.1248796680497927e-05, "loss": 0.5139, "step": 26378 }, { "epoch": 21.891286307053942, "grad_norm": 25.857866287231445, "learning_rate": 1.1248464730290457e-05, "loss": 0.2809, "step": 26379 }, { "epoch": 21.892116182572614, "grad_norm": 21.74839210510254, "learning_rate": 1.1248132780082988e-05, "loss": 0.3715, "step": 26380 }, { "epoch": 21.892946058091287, "grad_norm": 44.79844284057617, "learning_rate": 1.124780082987552e-05, "loss": 0.483, "step": 26381 }, { "epoch": 21.89377593360996, "grad_norm": 56.693687438964844, "learning_rate": 1.124746887966805e-05, "loss": 0.3455, "step": 26382 }, { "epoch": 21.89460580912863, "grad_norm": 62.4193000793457, "learning_rate": 1.1247136929460583e-05, "loss": 0.5056, "step": 26383 }, { "epoch": 21.895435684647303, "grad_norm": 88.14012908935547, "learning_rate": 1.1246804979253111e-05, "loss": 0.7761, "step": 26384 }, { "epoch": 21.896265560165975, "grad_norm": 50.373321533203125, "learning_rate": 1.1246473029045643e-05, "loss": 0.8243, "step": 26385 }, { "epoch": 21.897095435684648, "grad_norm": 79.13117980957031, "learning_rate": 1.1246141078838175e-05, "loss": 0.556, "step": 26386 }, { "epoch": 21.89792531120332, "grad_norm": 35.51622772216797, "learning_rate": 1.1245809128630708e-05, "loss": 0.3201, "step": 26387 }, { "epoch": 21.898755186721992, "grad_norm": 66.17959594726562, "learning_rate": 1.1245477178423236e-05, "loss": 0.623, "step": 26388 }, { "epoch": 21.899585062240664, "grad_norm": 26.043643951416016, "learning_rate": 1.1245145228215768e-05, "loss": 0.4497, "step": 26389 }, { "epoch": 21.900414937759336, "grad_norm": 70.99488067626953, "learning_rate": 1.12448132780083e-05, "loss": 0.3546, "step": 26390 }, { "epoch": 21.90124481327801, "grad_norm": 12.88261604309082, "learning_rate": 1.1244481327800831e-05, "loss": 0.2719, "step": 26391 }, { "epoch": 21.90207468879668, "grad_norm": 20.778661727905273, "learning_rate": 1.1244149377593363e-05, "loss": 0.2819, "step": 26392 }, { "epoch": 21.902904564315353, "grad_norm": 55.311851501464844, "learning_rate": 1.1243817427385892e-05, "loss": 0.8061, "step": 26393 }, { "epoch": 21.903734439834025, "grad_norm": 29.665691375732422, "learning_rate": 1.1243485477178424e-05, "loss": 0.2835, "step": 26394 }, { "epoch": 21.904564315352697, "grad_norm": 37.7624397277832, "learning_rate": 1.1243153526970956e-05, "loss": 0.4916, "step": 26395 }, { "epoch": 21.90539419087137, "grad_norm": 35.7885856628418, "learning_rate": 1.1242821576763488e-05, "loss": 0.4214, "step": 26396 }, { "epoch": 21.906224066390042, "grad_norm": 15.906312942504883, "learning_rate": 1.1242489626556017e-05, "loss": 0.2547, "step": 26397 }, { "epoch": 21.907053941908714, "grad_norm": 101.42210388183594, "learning_rate": 1.1242157676348549e-05, "loss": 0.9123, "step": 26398 }, { "epoch": 21.907883817427386, "grad_norm": 31.455766677856445, "learning_rate": 1.1241825726141081e-05, "loss": 0.5076, "step": 26399 }, { "epoch": 21.90871369294606, "grad_norm": 14.50820255279541, "learning_rate": 1.1241493775933611e-05, "loss": 0.2539, "step": 26400 }, { "epoch": 21.90954356846473, "grad_norm": 63.33625411987305, "learning_rate": 1.1241161825726142e-05, "loss": 0.822, "step": 26401 }, { "epoch": 21.910373443983403, "grad_norm": 48.019134521484375, "learning_rate": 1.1240829875518672e-05, "loss": 0.3297, "step": 26402 }, { "epoch": 21.911203319502075, "grad_norm": 50.73992919921875, "learning_rate": 1.1240497925311204e-05, "loss": 0.7495, "step": 26403 }, { "epoch": 21.912033195020747, "grad_norm": 35.49448776245117, "learning_rate": 1.1240165975103736e-05, "loss": 0.3745, "step": 26404 }, { "epoch": 21.91286307053942, "grad_norm": 19.69711685180664, "learning_rate": 1.1239834024896265e-05, "loss": 0.4002, "step": 26405 }, { "epoch": 21.91369294605809, "grad_norm": 45.95336151123047, "learning_rate": 1.1239502074688797e-05, "loss": 0.8209, "step": 26406 }, { "epoch": 21.914522821576764, "grad_norm": 34.670982360839844, "learning_rate": 1.123917012448133e-05, "loss": 0.5366, "step": 26407 }, { "epoch": 21.915352697095436, "grad_norm": 37.692359924316406, "learning_rate": 1.123883817427386e-05, "loss": 0.4443, "step": 26408 }, { "epoch": 21.916182572614108, "grad_norm": 54.62002182006836, "learning_rate": 1.123850622406639e-05, "loss": 0.5701, "step": 26409 }, { "epoch": 21.91701244813278, "grad_norm": 70.88227081298828, "learning_rate": 1.1238174273858922e-05, "loss": 0.5691, "step": 26410 }, { "epoch": 21.917842323651453, "grad_norm": 40.499237060546875, "learning_rate": 1.1237842323651453e-05, "loss": 0.5437, "step": 26411 }, { "epoch": 21.918672199170125, "grad_norm": 118.13095092773438, "learning_rate": 1.1237510373443985e-05, "loss": 0.46, "step": 26412 }, { "epoch": 21.919502074688797, "grad_norm": 28.833866119384766, "learning_rate": 1.1237178423236515e-05, "loss": 0.557, "step": 26413 }, { "epoch": 21.92033195020747, "grad_norm": 21.41427993774414, "learning_rate": 1.1236846473029046e-05, "loss": 0.2696, "step": 26414 }, { "epoch": 21.92116182572614, "grad_norm": 16.353601455688477, "learning_rate": 1.1236514522821578e-05, "loss": 0.2696, "step": 26415 }, { "epoch": 21.921991701244814, "grad_norm": 58.74138641357422, "learning_rate": 1.123618257261411e-05, "loss": 0.5726, "step": 26416 }, { "epoch": 21.922821576763486, "grad_norm": 67.1031723022461, "learning_rate": 1.1235850622406639e-05, "loss": 0.6893, "step": 26417 }, { "epoch": 21.923651452282158, "grad_norm": 142.05511474609375, "learning_rate": 1.123551867219917e-05, "loss": 0.9364, "step": 26418 }, { "epoch": 21.92448132780083, "grad_norm": 30.89107894897461, "learning_rate": 1.1235186721991703e-05, "loss": 0.3513, "step": 26419 }, { "epoch": 21.925311203319502, "grad_norm": 45.52817916870117, "learning_rate": 1.1234854771784233e-05, "loss": 0.356, "step": 26420 }, { "epoch": 21.926141078838175, "grad_norm": 26.774566650390625, "learning_rate": 1.1234522821576765e-05, "loss": 0.2487, "step": 26421 }, { "epoch": 21.926970954356847, "grad_norm": 41.981483459472656, "learning_rate": 1.1234190871369294e-05, "loss": 0.4077, "step": 26422 }, { "epoch": 21.92780082987552, "grad_norm": 56.117523193359375, "learning_rate": 1.1233858921161826e-05, "loss": 0.6474, "step": 26423 }, { "epoch": 21.92863070539419, "grad_norm": 49.75731658935547, "learning_rate": 1.1233526970954358e-05, "loss": 0.6242, "step": 26424 }, { "epoch": 21.929460580912863, "grad_norm": 85.57865142822266, "learning_rate": 1.123319502074689e-05, "loss": 0.7006, "step": 26425 }, { "epoch": 21.930290456431536, "grad_norm": 135.9221649169922, "learning_rate": 1.1232863070539419e-05, "loss": 0.9974, "step": 26426 }, { "epoch": 21.931120331950208, "grad_norm": 62.70806884765625, "learning_rate": 1.1232531120331951e-05, "loss": 0.8927, "step": 26427 }, { "epoch": 21.93195020746888, "grad_norm": 35.866127014160156, "learning_rate": 1.1232199170124483e-05, "loss": 0.473, "step": 26428 }, { "epoch": 21.932780082987552, "grad_norm": 81.59196472167969, "learning_rate": 1.1231867219917014e-05, "loss": 0.3436, "step": 26429 }, { "epoch": 21.933609958506224, "grad_norm": 70.02749633789062, "learning_rate": 1.1231535269709544e-05, "loss": 1.1157, "step": 26430 }, { "epoch": 21.934439834024896, "grad_norm": 17.699119567871094, "learning_rate": 1.1231203319502075e-05, "loss": 0.3621, "step": 26431 }, { "epoch": 21.93526970954357, "grad_norm": 24.377243041992188, "learning_rate": 1.1230871369294607e-05, "loss": 0.4771, "step": 26432 }, { "epoch": 21.93609958506224, "grad_norm": 38.86798858642578, "learning_rate": 1.1230539419087139e-05, "loss": 0.4944, "step": 26433 }, { "epoch": 21.936929460580913, "grad_norm": 46.23577880859375, "learning_rate": 1.1230207468879667e-05, "loss": 0.8253, "step": 26434 }, { "epoch": 21.937759336099585, "grad_norm": 36.293941497802734, "learning_rate": 1.12298755186722e-05, "loss": 0.6111, "step": 26435 }, { "epoch": 21.938589211618257, "grad_norm": 72.27902221679688, "learning_rate": 1.1229543568464732e-05, "loss": 0.6068, "step": 26436 }, { "epoch": 21.93941908713693, "grad_norm": 18.476293563842773, "learning_rate": 1.1229211618257264e-05, "loss": 0.3048, "step": 26437 }, { "epoch": 21.940248962655602, "grad_norm": 10.692895889282227, "learning_rate": 1.1228879668049793e-05, "loss": 0.2517, "step": 26438 }, { "epoch": 21.941078838174274, "grad_norm": 32.31937026977539, "learning_rate": 1.1228547717842325e-05, "loss": 0.625, "step": 26439 }, { "epoch": 21.941908713692946, "grad_norm": 55.906978607177734, "learning_rate": 1.1228215767634855e-05, "loss": 0.5003, "step": 26440 }, { "epoch": 21.94273858921162, "grad_norm": 51.16488265991211, "learning_rate": 1.1227883817427387e-05, "loss": 0.6553, "step": 26441 }, { "epoch": 21.94356846473029, "grad_norm": 47.0910530090332, "learning_rate": 1.1227551867219918e-05, "loss": 0.8747, "step": 26442 }, { "epoch": 21.944398340248963, "grad_norm": 95.94300842285156, "learning_rate": 1.1227219917012448e-05, "loss": 0.6376, "step": 26443 }, { "epoch": 21.945228215767635, "grad_norm": 24.779436111450195, "learning_rate": 1.122688796680498e-05, "loss": 0.3527, "step": 26444 }, { "epoch": 21.946058091286307, "grad_norm": 49.1644401550293, "learning_rate": 1.1226556016597512e-05, "loss": 0.816, "step": 26445 }, { "epoch": 21.94688796680498, "grad_norm": 46.79328918457031, "learning_rate": 1.1226224066390044e-05, "loss": 0.8207, "step": 26446 }, { "epoch": 21.94771784232365, "grad_norm": 155.1556854248047, "learning_rate": 1.1225892116182573e-05, "loss": 0.9624, "step": 26447 }, { "epoch": 21.948547717842324, "grad_norm": 45.32674789428711, "learning_rate": 1.1225560165975105e-05, "loss": 0.5943, "step": 26448 }, { "epoch": 21.949377593360996, "grad_norm": 56.65719223022461, "learning_rate": 1.1225228215767636e-05, "loss": 0.891, "step": 26449 }, { "epoch": 21.95020746887967, "grad_norm": 20.148792266845703, "learning_rate": 1.1224896265560168e-05, "loss": 0.3615, "step": 26450 }, { "epoch": 21.95103734439834, "grad_norm": 26.664831161499023, "learning_rate": 1.1224564315352698e-05, "loss": 0.3941, "step": 26451 }, { "epoch": 21.951867219917013, "grad_norm": 9.9756441116333, "learning_rate": 1.1224232365145228e-05, "loss": 0.236, "step": 26452 }, { "epoch": 21.952697095435685, "grad_norm": 80.10942840576172, "learning_rate": 1.122390041493776e-05, "loss": 0.8132, "step": 26453 }, { "epoch": 21.953526970954357, "grad_norm": 42.46894454956055, "learning_rate": 1.1223568464730293e-05, "loss": 0.7104, "step": 26454 }, { "epoch": 21.95435684647303, "grad_norm": 17.274032592773438, "learning_rate": 1.1223236514522821e-05, "loss": 0.3662, "step": 26455 }, { "epoch": 21.9551867219917, "grad_norm": 84.93476104736328, "learning_rate": 1.1222904564315354e-05, "loss": 0.9519, "step": 26456 }, { "epoch": 21.956016597510374, "grad_norm": 54.18985366821289, "learning_rate": 1.1222572614107886e-05, "loss": 0.7618, "step": 26457 }, { "epoch": 21.956846473029046, "grad_norm": 26.04379653930664, "learning_rate": 1.1222240663900416e-05, "loss": 0.2765, "step": 26458 }, { "epoch": 21.957676348547718, "grad_norm": 71.61229705810547, "learning_rate": 1.1221908713692946e-05, "loss": 1.373, "step": 26459 }, { "epoch": 21.95850622406639, "grad_norm": 63.921356201171875, "learning_rate": 1.1221576763485479e-05, "loss": 0.5361, "step": 26460 }, { "epoch": 21.959336099585062, "grad_norm": 23.944507598876953, "learning_rate": 1.1221244813278009e-05, "loss": 0.4106, "step": 26461 }, { "epoch": 21.960165975103735, "grad_norm": 28.755538940429688, "learning_rate": 1.1220912863070541e-05, "loss": 0.4141, "step": 26462 }, { "epoch": 21.960995850622407, "grad_norm": 19.13408660888672, "learning_rate": 1.122058091286307e-05, "loss": 0.2892, "step": 26463 }, { "epoch": 21.96182572614108, "grad_norm": 27.938310623168945, "learning_rate": 1.1220248962655602e-05, "loss": 0.5129, "step": 26464 }, { "epoch": 21.96265560165975, "grad_norm": 41.46916961669922, "learning_rate": 1.1219917012448134e-05, "loss": 0.7174, "step": 26465 }, { "epoch": 21.963485477178423, "grad_norm": 85.10164642333984, "learning_rate": 1.1219585062240666e-05, "loss": 0.9323, "step": 26466 }, { "epoch": 21.964315352697096, "grad_norm": 18.481433868408203, "learning_rate": 1.1219253112033195e-05, "loss": 0.3112, "step": 26467 }, { "epoch": 21.965145228215768, "grad_norm": 103.90399932861328, "learning_rate": 1.1218921161825727e-05, "loss": 0.3714, "step": 26468 }, { "epoch": 21.96597510373444, "grad_norm": 47.869991302490234, "learning_rate": 1.1218589211618257e-05, "loss": 1.1702, "step": 26469 }, { "epoch": 21.966804979253112, "grad_norm": 54.13930892944336, "learning_rate": 1.121825726141079e-05, "loss": 0.7916, "step": 26470 }, { "epoch": 21.967634854771784, "grad_norm": 48.13210678100586, "learning_rate": 1.1217925311203322e-05, "loss": 1.059, "step": 26471 }, { "epoch": 21.968464730290457, "grad_norm": 59.25819778442383, "learning_rate": 1.121759336099585e-05, "loss": 0.409, "step": 26472 }, { "epoch": 21.96929460580913, "grad_norm": 41.04233169555664, "learning_rate": 1.1217261410788382e-05, "loss": 0.4344, "step": 26473 }, { "epoch": 21.9701244813278, "grad_norm": 47.036827087402344, "learning_rate": 1.1216929460580915e-05, "loss": 0.6818, "step": 26474 }, { "epoch": 21.970954356846473, "grad_norm": 24.545055389404297, "learning_rate": 1.1216597510373447e-05, "loss": 0.3121, "step": 26475 }, { "epoch": 21.971784232365145, "grad_norm": 38.11118698120117, "learning_rate": 1.1216265560165975e-05, "loss": 0.4826, "step": 26476 }, { "epoch": 21.972614107883818, "grad_norm": 43.70600128173828, "learning_rate": 1.1215933609958507e-05, "loss": 0.3996, "step": 26477 }, { "epoch": 21.97344398340249, "grad_norm": 66.86857604980469, "learning_rate": 1.1215601659751038e-05, "loss": 0.7167, "step": 26478 }, { "epoch": 21.974273858921162, "grad_norm": 38.806453704833984, "learning_rate": 1.121526970954357e-05, "loss": 0.5278, "step": 26479 }, { "epoch": 21.975103734439834, "grad_norm": 40.344364166259766, "learning_rate": 1.12149377593361e-05, "loss": 0.6998, "step": 26480 }, { "epoch": 21.975933609958506, "grad_norm": 67.72611999511719, "learning_rate": 1.121460580912863e-05, "loss": 0.5455, "step": 26481 }, { "epoch": 21.97676348547718, "grad_norm": 50.293663024902344, "learning_rate": 1.1214273858921163e-05, "loss": 0.4309, "step": 26482 }, { "epoch": 21.97759336099585, "grad_norm": 36.3729362487793, "learning_rate": 1.1213941908713695e-05, "loss": 0.4749, "step": 26483 }, { "epoch": 21.978423236514523, "grad_norm": 10.194684028625488, "learning_rate": 1.1213609958506224e-05, "loss": 0.3368, "step": 26484 }, { "epoch": 21.979253112033195, "grad_norm": 58.04129409790039, "learning_rate": 1.1213278008298756e-05, "loss": 0.7325, "step": 26485 }, { "epoch": 21.980082987551867, "grad_norm": 146.81468200683594, "learning_rate": 1.1212946058091288e-05, "loss": 0.8263, "step": 26486 }, { "epoch": 21.98091286307054, "grad_norm": 50.56584930419922, "learning_rate": 1.1212614107883818e-05, "loss": 0.5081, "step": 26487 }, { "epoch": 21.98174273858921, "grad_norm": 94.09440612792969, "learning_rate": 1.1212282157676349e-05, "loss": 0.8202, "step": 26488 }, { "epoch": 21.982572614107884, "grad_norm": 14.185315132141113, "learning_rate": 1.1211950207468881e-05, "loss": 0.5009, "step": 26489 }, { "epoch": 21.983402489626556, "grad_norm": 37.557167053222656, "learning_rate": 1.1211618257261411e-05, "loss": 0.8244, "step": 26490 }, { "epoch": 21.98423236514523, "grad_norm": 69.97173309326172, "learning_rate": 1.1211286307053943e-05, "loss": 0.4974, "step": 26491 }, { "epoch": 21.9850622406639, "grad_norm": 52.64583969116211, "learning_rate": 1.1210954356846472e-05, "loss": 0.5344, "step": 26492 }, { "epoch": 21.985892116182573, "grad_norm": 19.35471534729004, "learning_rate": 1.1210622406639004e-05, "loss": 0.3319, "step": 26493 }, { "epoch": 21.986721991701245, "grad_norm": 75.85889434814453, "learning_rate": 1.1210290456431536e-05, "loss": 1.1434, "step": 26494 }, { "epoch": 21.987551867219917, "grad_norm": 30.78802490234375, "learning_rate": 1.1209958506224068e-05, "loss": 0.5369, "step": 26495 }, { "epoch": 21.98838174273859, "grad_norm": 19.513525009155273, "learning_rate": 1.1209626556016597e-05, "loss": 0.2847, "step": 26496 }, { "epoch": 21.98921161825726, "grad_norm": 72.36087799072266, "learning_rate": 1.120929460580913e-05, "loss": 0.6249, "step": 26497 }, { "epoch": 21.990041493775934, "grad_norm": 78.78465270996094, "learning_rate": 1.1208962655601661e-05, "loss": 0.6825, "step": 26498 }, { "epoch": 21.990871369294606, "grad_norm": 21.954771041870117, "learning_rate": 1.1208630705394192e-05, "loss": 0.5005, "step": 26499 }, { "epoch": 21.991701244813278, "grad_norm": 29.73467445373535, "learning_rate": 1.1208298755186724e-05, "loss": 0.3218, "step": 26500 }, { "epoch": 21.99253112033195, "grad_norm": 61.218624114990234, "learning_rate": 1.1207966804979253e-05, "loss": 0.5867, "step": 26501 }, { "epoch": 21.993360995850622, "grad_norm": 30.28488540649414, "learning_rate": 1.1207634854771785e-05, "loss": 0.3543, "step": 26502 }, { "epoch": 21.994190871369295, "grad_norm": 21.861083984375, "learning_rate": 1.1207302904564317e-05, "loss": 0.3008, "step": 26503 }, { "epoch": 21.995020746887967, "grad_norm": 52.18992233276367, "learning_rate": 1.1206970954356849e-05, "loss": 0.6714, "step": 26504 }, { "epoch": 21.99585062240664, "grad_norm": 30.13749885559082, "learning_rate": 1.1206639004149378e-05, "loss": 0.5482, "step": 26505 }, { "epoch": 21.99668049792531, "grad_norm": 54.70965576171875, "learning_rate": 1.120630705394191e-05, "loss": 0.361, "step": 26506 }, { "epoch": 21.997510373443983, "grad_norm": 41.674766540527344, "learning_rate": 1.1205975103734442e-05, "loss": 0.6063, "step": 26507 }, { "epoch": 21.998340248962656, "grad_norm": 35.61292266845703, "learning_rate": 1.1205643153526972e-05, "loss": 0.5638, "step": 26508 }, { "epoch": 21.999170124481328, "grad_norm": 43.66392517089844, "learning_rate": 1.1205311203319503e-05, "loss": 0.4836, "step": 26509 }, { "epoch": 22.0, "grad_norm": 26.680862426757812, "learning_rate": 1.1204979253112033e-05, "loss": 0.7925, "step": 26510 }, { "epoch": 22.000829875518672, "grad_norm": 41.567626953125, "learning_rate": 1.1204647302904565e-05, "loss": 0.3007, "step": 26511 }, { "epoch": 22.001659751037344, "grad_norm": 19.820667266845703, "learning_rate": 1.1204315352697097e-05, "loss": 0.338, "step": 26512 }, { "epoch": 22.002489626556017, "grad_norm": 15.304309844970703, "learning_rate": 1.1203983402489626e-05, "loss": 0.2093, "step": 26513 }, { "epoch": 22.00331950207469, "grad_norm": 32.213863372802734, "learning_rate": 1.1203651452282158e-05, "loss": 0.3411, "step": 26514 }, { "epoch": 22.00414937759336, "grad_norm": 34.608665466308594, "learning_rate": 1.120331950207469e-05, "loss": 0.3546, "step": 26515 }, { "epoch": 22.004979253112033, "grad_norm": 35.68654251098633, "learning_rate": 1.120298755186722e-05, "loss": 0.3307, "step": 26516 }, { "epoch": 22.005809128630705, "grad_norm": 26.887128829956055, "learning_rate": 1.1202655601659751e-05, "loss": 0.1878, "step": 26517 }, { "epoch": 22.006639004149378, "grad_norm": 60.99256896972656, "learning_rate": 1.1202323651452283e-05, "loss": 0.6817, "step": 26518 }, { "epoch": 22.00746887966805, "grad_norm": 74.03702545166016, "learning_rate": 1.1201991701244814e-05, "loss": 0.2713, "step": 26519 }, { "epoch": 22.008298755186722, "grad_norm": 25.48088264465332, "learning_rate": 1.1201659751037346e-05, "loss": 0.3888, "step": 26520 }, { "epoch": 22.009128630705394, "grad_norm": 48.722171783447266, "learning_rate": 1.1201327800829876e-05, "loss": 0.5738, "step": 26521 }, { "epoch": 22.009958506224066, "grad_norm": 65.11437225341797, "learning_rate": 1.1200995850622407e-05, "loss": 0.8908, "step": 26522 }, { "epoch": 22.01078838174274, "grad_norm": 31.792863845825195, "learning_rate": 1.1200663900414939e-05, "loss": 0.5831, "step": 26523 }, { "epoch": 22.01161825726141, "grad_norm": 113.76936340332031, "learning_rate": 1.120033195020747e-05, "loss": 0.5601, "step": 26524 }, { "epoch": 22.012448132780083, "grad_norm": 17.770854949951172, "learning_rate": 1.1200000000000001e-05, "loss": 0.2571, "step": 26525 }, { "epoch": 22.013278008298755, "grad_norm": 13.876374244689941, "learning_rate": 1.1199668049792532e-05, "loss": 0.3676, "step": 26526 }, { "epoch": 22.014107883817427, "grad_norm": 34.31577682495117, "learning_rate": 1.1199336099585064e-05, "loss": 0.2999, "step": 26527 }, { "epoch": 22.0149377593361, "grad_norm": 57.64371871948242, "learning_rate": 1.1199004149377594e-05, "loss": 1.0103, "step": 26528 }, { "epoch": 22.01576763485477, "grad_norm": 99.46343231201172, "learning_rate": 1.1198672199170126e-05, "loss": 0.857, "step": 26529 }, { "epoch": 22.016597510373444, "grad_norm": 64.09217071533203, "learning_rate": 1.1198340248962657e-05, "loss": 0.9197, "step": 26530 }, { "epoch": 22.017427385892116, "grad_norm": 37.69758224487305, "learning_rate": 1.1198008298755187e-05, "loss": 0.5204, "step": 26531 }, { "epoch": 22.01825726141079, "grad_norm": 25.265722274780273, "learning_rate": 1.119767634854772e-05, "loss": 0.2778, "step": 26532 }, { "epoch": 22.01908713692946, "grad_norm": 31.626691818237305, "learning_rate": 1.1197344398340251e-05, "loss": 0.6683, "step": 26533 }, { "epoch": 22.019917012448133, "grad_norm": 34.0341682434082, "learning_rate": 1.119701244813278e-05, "loss": 0.5601, "step": 26534 }, { "epoch": 22.020746887966805, "grad_norm": 21.994352340698242, "learning_rate": 1.1196680497925312e-05, "loss": 0.3193, "step": 26535 }, { "epoch": 22.021576763485477, "grad_norm": 44.97774887084961, "learning_rate": 1.1196348547717844e-05, "loss": 0.5198, "step": 26536 }, { "epoch": 22.02240663900415, "grad_norm": 12.386519432067871, "learning_rate": 1.1196016597510375e-05, "loss": 0.3369, "step": 26537 }, { "epoch": 22.02323651452282, "grad_norm": 33.985164642333984, "learning_rate": 1.1195684647302905e-05, "loss": 0.2864, "step": 26538 }, { "epoch": 22.024066390041494, "grad_norm": 35.6396484375, "learning_rate": 1.1195352697095435e-05, "loss": 0.3035, "step": 26539 }, { "epoch": 22.024896265560166, "grad_norm": 34.175445556640625, "learning_rate": 1.1195020746887968e-05, "loss": 0.3489, "step": 26540 }, { "epoch": 22.025726141078838, "grad_norm": 32.57848358154297, "learning_rate": 1.11946887966805e-05, "loss": 0.3153, "step": 26541 }, { "epoch": 22.02655601659751, "grad_norm": 69.92996978759766, "learning_rate": 1.1194356846473028e-05, "loss": 0.7746, "step": 26542 }, { "epoch": 22.027385892116182, "grad_norm": 44.961666107177734, "learning_rate": 1.119402489626556e-05, "loss": 0.7422, "step": 26543 }, { "epoch": 22.028215767634855, "grad_norm": 17.782365798950195, "learning_rate": 1.1193692946058093e-05, "loss": 0.2697, "step": 26544 }, { "epoch": 22.029045643153527, "grad_norm": 77.47152709960938, "learning_rate": 1.1193360995850625e-05, "loss": 0.5309, "step": 26545 }, { "epoch": 22.0298755186722, "grad_norm": 49.02627944946289, "learning_rate": 1.1193029045643153e-05, "loss": 0.6932, "step": 26546 }, { "epoch": 22.03070539419087, "grad_norm": 29.298917770385742, "learning_rate": 1.1192697095435686e-05, "loss": 0.4972, "step": 26547 }, { "epoch": 22.031535269709543, "grad_norm": 50.59614181518555, "learning_rate": 1.1192365145228216e-05, "loss": 0.7262, "step": 26548 }, { "epoch": 22.032365145228216, "grad_norm": 27.628501892089844, "learning_rate": 1.1192033195020748e-05, "loss": 0.2938, "step": 26549 }, { "epoch": 22.033195020746888, "grad_norm": 21.26498031616211, "learning_rate": 1.119170124481328e-05, "loss": 0.3129, "step": 26550 }, { "epoch": 22.03402489626556, "grad_norm": 20.839218139648438, "learning_rate": 1.1191369294605809e-05, "loss": 0.5163, "step": 26551 }, { "epoch": 22.034854771784232, "grad_norm": 115.09748077392578, "learning_rate": 1.1191037344398341e-05, "loss": 0.4627, "step": 26552 }, { "epoch": 22.035684647302904, "grad_norm": 21.858612060546875, "learning_rate": 1.1190705394190873e-05, "loss": 0.4154, "step": 26553 }, { "epoch": 22.036514522821577, "grad_norm": 73.96235656738281, "learning_rate": 1.1190373443983405e-05, "loss": 0.3969, "step": 26554 }, { "epoch": 22.03734439834025, "grad_norm": 29.628089904785156, "learning_rate": 1.1190041493775934e-05, "loss": 0.6665, "step": 26555 }, { "epoch": 22.03817427385892, "grad_norm": 23.474950790405273, "learning_rate": 1.1189709543568466e-05, "loss": 0.4517, "step": 26556 }, { "epoch": 22.039004149377593, "grad_norm": 77.03372192382812, "learning_rate": 1.1189377593360996e-05, "loss": 0.8712, "step": 26557 }, { "epoch": 22.039834024896265, "grad_norm": 38.18049240112305, "learning_rate": 1.1189045643153529e-05, "loss": 0.5549, "step": 26558 }, { "epoch": 22.040663900414938, "grad_norm": 41.87954330444336, "learning_rate": 1.1188713692946059e-05, "loss": 0.4725, "step": 26559 }, { "epoch": 22.04149377593361, "grad_norm": 38.78015899658203, "learning_rate": 1.118838174273859e-05, "loss": 0.2631, "step": 26560 }, { "epoch": 22.042323651452282, "grad_norm": 93.6047134399414, "learning_rate": 1.1188049792531121e-05, "loss": 0.9143, "step": 26561 }, { "epoch": 22.043153526970954, "grad_norm": 21.412433624267578, "learning_rate": 1.1187717842323654e-05, "loss": 0.3313, "step": 26562 }, { "epoch": 22.043983402489626, "grad_norm": 41.55926513671875, "learning_rate": 1.1187385892116182e-05, "loss": 0.9758, "step": 26563 }, { "epoch": 22.0448132780083, "grad_norm": 36.527503967285156, "learning_rate": 1.1187053941908714e-05, "loss": 0.5149, "step": 26564 }, { "epoch": 22.04564315352697, "grad_norm": 31.064180374145508, "learning_rate": 1.1186721991701247e-05, "loss": 0.5566, "step": 26565 }, { "epoch": 22.046473029045643, "grad_norm": 43.08561706542969, "learning_rate": 1.1186390041493777e-05, "loss": 0.5791, "step": 26566 }, { "epoch": 22.047302904564315, "grad_norm": 17.3187255859375, "learning_rate": 1.1186058091286307e-05, "loss": 0.4085, "step": 26567 }, { "epoch": 22.048132780082987, "grad_norm": 11.295594215393066, "learning_rate": 1.118572614107884e-05, "loss": 0.2233, "step": 26568 }, { "epoch": 22.04896265560166, "grad_norm": 13.226165771484375, "learning_rate": 1.118539419087137e-05, "loss": 0.2457, "step": 26569 }, { "epoch": 22.04979253112033, "grad_norm": 34.238975524902344, "learning_rate": 1.1185062240663902e-05, "loss": 0.4899, "step": 26570 }, { "epoch": 22.050622406639004, "grad_norm": 58.722747802734375, "learning_rate": 1.118473029045643e-05, "loss": 0.4443, "step": 26571 }, { "epoch": 22.051452282157676, "grad_norm": 211.56622314453125, "learning_rate": 1.1184398340248963e-05, "loss": 0.5608, "step": 26572 }, { "epoch": 22.05228215767635, "grad_norm": 126.80657196044922, "learning_rate": 1.1184066390041495e-05, "loss": 0.2618, "step": 26573 }, { "epoch": 22.05311203319502, "grad_norm": 32.44389343261719, "learning_rate": 1.1183734439834027e-05, "loss": 0.3216, "step": 26574 }, { "epoch": 22.053941908713693, "grad_norm": 30.15544891357422, "learning_rate": 1.1183402489626556e-05, "loss": 0.4165, "step": 26575 }, { "epoch": 22.054771784232365, "grad_norm": 59.964210510253906, "learning_rate": 1.1183070539419088e-05, "loss": 0.4807, "step": 26576 }, { "epoch": 22.055601659751037, "grad_norm": 51.355613708496094, "learning_rate": 1.118273858921162e-05, "loss": 0.8676, "step": 26577 }, { "epoch": 22.05643153526971, "grad_norm": 130.42372131347656, "learning_rate": 1.118240663900415e-05, "loss": 0.863, "step": 26578 }, { "epoch": 22.05726141078838, "grad_norm": 66.85857391357422, "learning_rate": 1.1182074688796682e-05, "loss": 0.5195, "step": 26579 }, { "epoch": 22.058091286307054, "grad_norm": 36.09563446044922, "learning_rate": 1.1181742738589211e-05, "loss": 0.6332, "step": 26580 }, { "epoch": 22.058921161825726, "grad_norm": 31.981151580810547, "learning_rate": 1.1181410788381743e-05, "loss": 0.4834, "step": 26581 }, { "epoch": 22.059751037344398, "grad_norm": 36.3726692199707, "learning_rate": 1.1181078838174275e-05, "loss": 0.6891, "step": 26582 }, { "epoch": 22.06058091286307, "grad_norm": 14.19552993774414, "learning_rate": 1.1180746887966808e-05, "loss": 0.3049, "step": 26583 }, { "epoch": 22.061410788381743, "grad_norm": 35.3481559753418, "learning_rate": 1.1180414937759336e-05, "loss": 0.6113, "step": 26584 }, { "epoch": 22.062240663900415, "grad_norm": 38.320919036865234, "learning_rate": 1.1180082987551868e-05, "loss": 0.2188, "step": 26585 }, { "epoch": 22.063070539419087, "grad_norm": 75.07867431640625, "learning_rate": 1.1179751037344399e-05, "loss": 0.7785, "step": 26586 }, { "epoch": 22.06390041493776, "grad_norm": 31.961761474609375, "learning_rate": 1.1179419087136931e-05, "loss": 0.3482, "step": 26587 }, { "epoch": 22.06473029045643, "grad_norm": 43.685089111328125, "learning_rate": 1.1179087136929461e-05, "loss": 0.4703, "step": 26588 }, { "epoch": 22.065560165975104, "grad_norm": 50.415218353271484, "learning_rate": 1.1178755186721992e-05, "loss": 0.6095, "step": 26589 }, { "epoch": 22.066390041493776, "grad_norm": 16.14346694946289, "learning_rate": 1.1178423236514524e-05, "loss": 0.2609, "step": 26590 }, { "epoch": 22.067219917012448, "grad_norm": 41.65348815917969, "learning_rate": 1.1178091286307056e-05, "loss": 0.4075, "step": 26591 }, { "epoch": 22.06804979253112, "grad_norm": 22.280820846557617, "learning_rate": 1.1177759336099585e-05, "loss": 0.2855, "step": 26592 }, { "epoch": 22.068879668049792, "grad_norm": 29.088417053222656, "learning_rate": 1.1177427385892117e-05, "loss": 0.4429, "step": 26593 }, { "epoch": 22.069709543568464, "grad_norm": 105.26921844482422, "learning_rate": 1.1177095435684649e-05, "loss": 0.7708, "step": 26594 }, { "epoch": 22.070539419087137, "grad_norm": 75.1776123046875, "learning_rate": 1.117676348547718e-05, "loss": 0.5212, "step": 26595 }, { "epoch": 22.07136929460581, "grad_norm": 30.16138458251953, "learning_rate": 1.117643153526971e-05, "loss": 0.3917, "step": 26596 }, { "epoch": 22.07219917012448, "grad_norm": 33.19521713256836, "learning_rate": 1.1176099585062242e-05, "loss": 0.4139, "step": 26597 }, { "epoch": 22.073029045643153, "grad_norm": 93.63212585449219, "learning_rate": 1.1175767634854772e-05, "loss": 0.3352, "step": 26598 }, { "epoch": 22.073858921161825, "grad_norm": 51.29605484008789, "learning_rate": 1.1175435684647304e-05, "loss": 0.4753, "step": 26599 }, { "epoch": 22.074688796680498, "grad_norm": 30.460296630859375, "learning_rate": 1.1175103734439835e-05, "loss": 0.4872, "step": 26600 }, { "epoch": 22.07551867219917, "grad_norm": 21.2217960357666, "learning_rate": 1.1174771784232365e-05, "loss": 0.2813, "step": 26601 }, { "epoch": 22.076348547717842, "grad_norm": 16.109935760498047, "learning_rate": 1.1174439834024897e-05, "loss": 0.3178, "step": 26602 }, { "epoch": 22.077178423236514, "grad_norm": 40.85850524902344, "learning_rate": 1.117410788381743e-05, "loss": 1.0188, "step": 26603 }, { "epoch": 22.078008298755186, "grad_norm": 55.839447021484375, "learning_rate": 1.117377593360996e-05, "loss": 0.4011, "step": 26604 }, { "epoch": 22.07883817427386, "grad_norm": 99.33685302734375, "learning_rate": 1.117344398340249e-05, "loss": 0.7108, "step": 26605 }, { "epoch": 22.07966804979253, "grad_norm": 45.521183013916016, "learning_rate": 1.1173112033195022e-05, "loss": 0.4355, "step": 26606 }, { "epoch": 22.080497925311203, "grad_norm": 23.370676040649414, "learning_rate": 1.1172780082987553e-05, "loss": 0.389, "step": 26607 }, { "epoch": 22.081327800829875, "grad_norm": 31.365604400634766, "learning_rate": 1.1172448132780085e-05, "loss": 0.3778, "step": 26608 }, { "epoch": 22.082157676348547, "grad_norm": 58.69792556762695, "learning_rate": 1.1172116182572614e-05, "loss": 0.6916, "step": 26609 }, { "epoch": 22.08298755186722, "grad_norm": 12.434764862060547, "learning_rate": 1.1171784232365146e-05, "loss": 0.2346, "step": 26610 }, { "epoch": 22.083817427385892, "grad_norm": 33.053375244140625, "learning_rate": 1.1171452282157678e-05, "loss": 0.4984, "step": 26611 }, { "epoch": 22.084647302904564, "grad_norm": 32.75045394897461, "learning_rate": 1.117112033195021e-05, "loss": 0.3835, "step": 26612 }, { "epoch": 22.085477178423236, "grad_norm": 124.89019775390625, "learning_rate": 1.1170788381742739e-05, "loss": 0.4216, "step": 26613 }, { "epoch": 22.08630705394191, "grad_norm": 95.96635437011719, "learning_rate": 1.117045643153527e-05, "loss": 0.3721, "step": 26614 }, { "epoch": 22.08713692946058, "grad_norm": 77.11275482177734, "learning_rate": 1.1170124481327803e-05, "loss": 0.7229, "step": 26615 }, { "epoch": 22.087966804979253, "grad_norm": 47.65522003173828, "learning_rate": 1.1169792531120333e-05, "loss": 0.7096, "step": 26616 }, { "epoch": 22.088796680497925, "grad_norm": 45.77704620361328, "learning_rate": 1.1169460580912864e-05, "loss": 0.6242, "step": 26617 }, { "epoch": 22.089626556016597, "grad_norm": 27.161334991455078, "learning_rate": 1.1169128630705394e-05, "loss": 0.3903, "step": 26618 }, { "epoch": 22.09045643153527, "grad_norm": 100.26119232177734, "learning_rate": 1.1168796680497926e-05, "loss": 0.979, "step": 26619 }, { "epoch": 22.09128630705394, "grad_norm": 17.669694900512695, "learning_rate": 1.1168464730290458e-05, "loss": 0.3227, "step": 26620 }, { "epoch": 22.092116182572614, "grad_norm": 25.86861228942871, "learning_rate": 1.1168132780082987e-05, "loss": 0.3724, "step": 26621 }, { "epoch": 22.092946058091286, "grad_norm": 25.727476119995117, "learning_rate": 1.1167800829875519e-05, "loss": 0.4283, "step": 26622 }, { "epoch": 22.093775933609958, "grad_norm": 30.21375274658203, "learning_rate": 1.1167468879668051e-05, "loss": 0.4189, "step": 26623 }, { "epoch": 22.09460580912863, "grad_norm": 30.142839431762695, "learning_rate": 1.1167136929460583e-05, "loss": 0.3344, "step": 26624 }, { "epoch": 22.095435684647303, "grad_norm": 10.915499687194824, "learning_rate": 1.1166804979253112e-05, "loss": 0.3225, "step": 26625 }, { "epoch": 22.096265560165975, "grad_norm": 34.83531951904297, "learning_rate": 1.1166473029045644e-05, "loss": 0.4919, "step": 26626 }, { "epoch": 22.097095435684647, "grad_norm": 60.32775115966797, "learning_rate": 1.1166141078838175e-05, "loss": 0.6421, "step": 26627 }, { "epoch": 22.09792531120332, "grad_norm": 24.171531677246094, "learning_rate": 1.1165809128630707e-05, "loss": 0.2608, "step": 26628 }, { "epoch": 22.09875518672199, "grad_norm": 68.69241333007812, "learning_rate": 1.1165477178423239e-05, "loss": 0.91, "step": 26629 }, { "epoch": 22.099585062240664, "grad_norm": 21.037029266357422, "learning_rate": 1.1165145228215767e-05, "loss": 0.5555, "step": 26630 }, { "epoch": 22.100414937759336, "grad_norm": 16.129222869873047, "learning_rate": 1.11648132780083e-05, "loss": 0.2415, "step": 26631 }, { "epoch": 22.101244813278008, "grad_norm": 37.14611053466797, "learning_rate": 1.1164481327800832e-05, "loss": 0.2865, "step": 26632 }, { "epoch": 22.10207468879668, "grad_norm": 52.14533233642578, "learning_rate": 1.1164149377593362e-05, "loss": 0.7474, "step": 26633 }, { "epoch": 22.102904564315352, "grad_norm": 30.070554733276367, "learning_rate": 1.1163817427385892e-05, "loss": 0.261, "step": 26634 }, { "epoch": 22.103734439834025, "grad_norm": 49.86593246459961, "learning_rate": 1.1163485477178425e-05, "loss": 0.7986, "step": 26635 }, { "epoch": 22.104564315352697, "grad_norm": 34.44419860839844, "learning_rate": 1.1163153526970955e-05, "loss": 0.4362, "step": 26636 }, { "epoch": 22.10539419087137, "grad_norm": 110.96945190429688, "learning_rate": 1.1162821576763487e-05, "loss": 0.5484, "step": 26637 }, { "epoch": 22.10622406639004, "grad_norm": 25.9169979095459, "learning_rate": 1.1162489626556018e-05, "loss": 0.4367, "step": 26638 }, { "epoch": 22.107053941908713, "grad_norm": 44.64999771118164, "learning_rate": 1.1162157676348548e-05, "loss": 0.5395, "step": 26639 }, { "epoch": 22.107883817427386, "grad_norm": 11.8391752243042, "learning_rate": 1.116182572614108e-05, "loss": 0.3433, "step": 26640 }, { "epoch": 22.108713692946058, "grad_norm": 56.99834060668945, "learning_rate": 1.1161493775933612e-05, "loss": 0.5836, "step": 26641 }, { "epoch": 22.10954356846473, "grad_norm": 136.03280639648438, "learning_rate": 1.1161161825726141e-05, "loss": 1.3454, "step": 26642 }, { "epoch": 22.110373443983402, "grad_norm": 75.60426330566406, "learning_rate": 1.1160829875518673e-05, "loss": 0.5957, "step": 26643 }, { "epoch": 22.111203319502074, "grad_norm": 25.28122329711914, "learning_rate": 1.1160497925311205e-05, "loss": 0.3688, "step": 26644 }, { "epoch": 22.112033195020746, "grad_norm": 44.84495162963867, "learning_rate": 1.1160165975103736e-05, "loss": 0.4169, "step": 26645 }, { "epoch": 22.11286307053942, "grad_norm": 39.32244873046875, "learning_rate": 1.1159834024896266e-05, "loss": 0.4693, "step": 26646 }, { "epoch": 22.11369294605809, "grad_norm": 65.0054931640625, "learning_rate": 1.1159502074688798e-05, "loss": 0.4183, "step": 26647 }, { "epoch": 22.114522821576763, "grad_norm": 43.788082122802734, "learning_rate": 1.1159170124481328e-05, "loss": 0.3937, "step": 26648 }, { "epoch": 22.115352697095435, "grad_norm": 140.1504669189453, "learning_rate": 1.115883817427386e-05, "loss": 0.6942, "step": 26649 }, { "epoch": 22.116182572614107, "grad_norm": 17.579957962036133, "learning_rate": 1.115850622406639e-05, "loss": 0.2984, "step": 26650 }, { "epoch": 22.11701244813278, "grad_norm": 91.18191528320312, "learning_rate": 1.1158174273858921e-05, "loss": 0.5092, "step": 26651 }, { "epoch": 22.117842323651452, "grad_norm": 18.02741813659668, "learning_rate": 1.1157842323651453e-05, "loss": 0.3308, "step": 26652 }, { "epoch": 22.118672199170124, "grad_norm": 56.82871627807617, "learning_rate": 1.1157510373443986e-05, "loss": 0.7299, "step": 26653 }, { "epoch": 22.119502074688796, "grad_norm": 28.408231735229492, "learning_rate": 1.1157178423236514e-05, "loss": 0.5662, "step": 26654 }, { "epoch": 22.12033195020747, "grad_norm": 40.893592834472656, "learning_rate": 1.1156846473029046e-05, "loss": 0.4565, "step": 26655 }, { "epoch": 22.12116182572614, "grad_norm": 83.9028549194336, "learning_rate": 1.1156514522821577e-05, "loss": 0.7082, "step": 26656 }, { "epoch": 22.121991701244813, "grad_norm": 21.64937400817871, "learning_rate": 1.1156182572614109e-05, "loss": 0.3863, "step": 26657 }, { "epoch": 22.122821576763485, "grad_norm": 38.93084716796875, "learning_rate": 1.1155850622406641e-05, "loss": 0.4289, "step": 26658 }, { "epoch": 22.123651452282157, "grad_norm": 40.950775146484375, "learning_rate": 1.115551867219917e-05, "loss": 0.4246, "step": 26659 }, { "epoch": 22.12448132780083, "grad_norm": 29.283981323242188, "learning_rate": 1.1155186721991702e-05, "loss": 0.3887, "step": 26660 }, { "epoch": 22.1253112033195, "grad_norm": 47.99026870727539, "learning_rate": 1.1154854771784234e-05, "loss": 0.9032, "step": 26661 }, { "epoch": 22.126141078838174, "grad_norm": 41.254730224609375, "learning_rate": 1.1154522821576766e-05, "loss": 0.445, "step": 26662 }, { "epoch": 22.126970954356846, "grad_norm": 31.18075942993164, "learning_rate": 1.1154190871369295e-05, "loss": 0.354, "step": 26663 }, { "epoch": 22.127800829875518, "grad_norm": 44.771751403808594, "learning_rate": 1.1153858921161827e-05, "loss": 0.3992, "step": 26664 }, { "epoch": 22.12863070539419, "grad_norm": 33.39854431152344, "learning_rate": 1.1153526970954357e-05, "loss": 0.6357, "step": 26665 }, { "epoch": 22.129460580912863, "grad_norm": 74.53475952148438, "learning_rate": 1.115319502074689e-05, "loss": 0.3822, "step": 26666 }, { "epoch": 22.130290456431535, "grad_norm": 70.8080825805664, "learning_rate": 1.115286307053942e-05, "loss": 0.7661, "step": 26667 }, { "epoch": 22.131120331950207, "grad_norm": 21.339311599731445, "learning_rate": 1.115253112033195e-05, "loss": 0.4258, "step": 26668 }, { "epoch": 22.13195020746888, "grad_norm": 53.48051834106445, "learning_rate": 1.1152199170124482e-05, "loss": 0.8871, "step": 26669 }, { "epoch": 22.13278008298755, "grad_norm": 17.34941864013672, "learning_rate": 1.1151867219917014e-05, "loss": 0.3051, "step": 26670 }, { "epoch": 22.133609958506224, "grad_norm": 28.09271240234375, "learning_rate": 1.1151535269709543e-05, "loss": 0.384, "step": 26671 }, { "epoch": 22.134439834024896, "grad_norm": 59.96942901611328, "learning_rate": 1.1151203319502075e-05, "loss": 0.6456, "step": 26672 }, { "epoch": 22.135269709543568, "grad_norm": 106.53512573242188, "learning_rate": 1.1150871369294607e-05, "loss": 0.9075, "step": 26673 }, { "epoch": 22.13609958506224, "grad_norm": 82.08002471923828, "learning_rate": 1.1150539419087138e-05, "loss": 0.6377, "step": 26674 }, { "epoch": 22.136929460580912, "grad_norm": 52.436683654785156, "learning_rate": 1.1150207468879668e-05, "loss": 0.5976, "step": 26675 }, { "epoch": 22.137759336099585, "grad_norm": 29.982324600219727, "learning_rate": 1.11498755186722e-05, "loss": 0.4395, "step": 26676 }, { "epoch": 22.138589211618257, "grad_norm": 110.60018157958984, "learning_rate": 1.114954356846473e-05, "loss": 0.6491, "step": 26677 }, { "epoch": 22.13941908713693, "grad_norm": 47.7744140625, "learning_rate": 1.1149211618257263e-05, "loss": 0.5425, "step": 26678 }, { "epoch": 22.1402489626556, "grad_norm": 54.75325393676758, "learning_rate": 1.1148879668049792e-05, "loss": 0.5745, "step": 26679 }, { "epoch": 22.141078838174273, "grad_norm": 32.56810760498047, "learning_rate": 1.1148547717842324e-05, "loss": 0.7537, "step": 26680 }, { "epoch": 22.141908713692946, "grad_norm": 28.64600944519043, "learning_rate": 1.1148215767634856e-05, "loss": 0.3475, "step": 26681 }, { "epoch": 22.142738589211618, "grad_norm": 13.74524211883545, "learning_rate": 1.1147883817427388e-05, "loss": 0.1852, "step": 26682 }, { "epoch": 22.14356846473029, "grad_norm": 40.36956787109375, "learning_rate": 1.1147551867219918e-05, "loss": 0.6895, "step": 26683 }, { "epoch": 22.144398340248962, "grad_norm": 48.238590240478516, "learning_rate": 1.1147219917012449e-05, "loss": 0.6473, "step": 26684 }, { "epoch": 22.145228215767634, "grad_norm": 69.66387176513672, "learning_rate": 1.1146887966804981e-05, "loss": 1.0809, "step": 26685 }, { "epoch": 22.146058091286307, "grad_norm": 11.214597702026367, "learning_rate": 1.1146556016597511e-05, "loss": 0.2925, "step": 26686 }, { "epoch": 22.14688796680498, "grad_norm": 33.4437370300293, "learning_rate": 1.1146224066390043e-05, "loss": 0.4711, "step": 26687 }, { "epoch": 22.14771784232365, "grad_norm": 41.67251205444336, "learning_rate": 1.1145892116182572e-05, "loss": 0.5003, "step": 26688 }, { "epoch": 22.148547717842323, "grad_norm": 113.08242797851562, "learning_rate": 1.1145560165975104e-05, "loss": 0.9207, "step": 26689 }, { "epoch": 22.149377593360995, "grad_norm": 116.30181884765625, "learning_rate": 1.1145228215767636e-05, "loss": 0.8913, "step": 26690 }, { "epoch": 22.150207468879668, "grad_norm": 13.328124046325684, "learning_rate": 1.1144896265560168e-05, "loss": 0.2637, "step": 26691 }, { "epoch": 22.15103734439834, "grad_norm": 50.73066711425781, "learning_rate": 1.1144564315352697e-05, "loss": 1.1102, "step": 26692 }, { "epoch": 22.151867219917012, "grad_norm": 55.11412048339844, "learning_rate": 1.114423236514523e-05, "loss": 0.4162, "step": 26693 }, { "epoch": 22.152697095435684, "grad_norm": 64.64773559570312, "learning_rate": 1.1143900414937761e-05, "loss": 0.9302, "step": 26694 }, { "epoch": 22.153526970954356, "grad_norm": 32.504608154296875, "learning_rate": 1.1143568464730292e-05, "loss": 0.3168, "step": 26695 }, { "epoch": 22.15435684647303, "grad_norm": 52.621131896972656, "learning_rate": 1.1143236514522822e-05, "loss": 0.4566, "step": 26696 }, { "epoch": 22.1551867219917, "grad_norm": 36.488258361816406, "learning_rate": 1.1142904564315353e-05, "loss": 0.8962, "step": 26697 }, { "epoch": 22.156016597510373, "grad_norm": 21.061037063598633, "learning_rate": 1.1142572614107885e-05, "loss": 0.2357, "step": 26698 }, { "epoch": 22.156846473029045, "grad_norm": 14.655131340026855, "learning_rate": 1.1142240663900417e-05, "loss": 0.2874, "step": 26699 }, { "epoch": 22.157676348547717, "grad_norm": 40.96707534790039, "learning_rate": 1.1141908713692946e-05, "loss": 0.3731, "step": 26700 }, { "epoch": 22.15850622406639, "grad_norm": 66.18376159667969, "learning_rate": 1.1141576763485478e-05, "loss": 0.7342, "step": 26701 }, { "epoch": 22.15933609958506, "grad_norm": 46.699188232421875, "learning_rate": 1.114124481327801e-05, "loss": 0.4732, "step": 26702 }, { "epoch": 22.160165975103734, "grad_norm": 9.891133308410645, "learning_rate": 1.114091286307054e-05, "loss": 0.3139, "step": 26703 }, { "epoch": 22.160995850622406, "grad_norm": 54.861263275146484, "learning_rate": 1.114058091286307e-05, "loss": 0.5391, "step": 26704 }, { "epoch": 22.16182572614108, "grad_norm": 16.90540313720703, "learning_rate": 1.1140248962655603e-05, "loss": 0.2289, "step": 26705 }, { "epoch": 22.16265560165975, "grad_norm": 65.85689544677734, "learning_rate": 1.1139917012448133e-05, "loss": 0.5495, "step": 26706 }, { "epoch": 22.163485477178423, "grad_norm": 47.83174514770508, "learning_rate": 1.1139585062240665e-05, "loss": 0.594, "step": 26707 }, { "epoch": 22.164315352697095, "grad_norm": 135.16441345214844, "learning_rate": 1.1139253112033197e-05, "loss": 0.4067, "step": 26708 }, { "epoch": 22.165145228215767, "grad_norm": 32.05537796020508, "learning_rate": 1.1138921161825726e-05, "loss": 0.4035, "step": 26709 }, { "epoch": 22.16597510373444, "grad_norm": 11.935115814208984, "learning_rate": 1.1138589211618258e-05, "loss": 0.2979, "step": 26710 }, { "epoch": 22.16680497925311, "grad_norm": 31.77410316467285, "learning_rate": 1.113825726141079e-05, "loss": 0.3398, "step": 26711 }, { "epoch": 22.167634854771784, "grad_norm": 82.62274169921875, "learning_rate": 1.113792531120332e-05, "loss": 0.4545, "step": 26712 }, { "epoch": 22.168464730290456, "grad_norm": 86.31169891357422, "learning_rate": 1.1137593360995851e-05, "loss": 0.8867, "step": 26713 }, { "epoch": 22.169294605809128, "grad_norm": 11.685254096984863, "learning_rate": 1.1137261410788383e-05, "loss": 0.2487, "step": 26714 }, { "epoch": 22.1701244813278, "grad_norm": 59.229942321777344, "learning_rate": 1.1136929460580914e-05, "loss": 0.6975, "step": 26715 }, { "epoch": 22.170954356846472, "grad_norm": 52.85774230957031, "learning_rate": 1.1136597510373446e-05, "loss": 0.5979, "step": 26716 }, { "epoch": 22.171784232365145, "grad_norm": 94.75883483886719, "learning_rate": 1.1136265560165974e-05, "loss": 0.7553, "step": 26717 }, { "epoch": 22.172614107883817, "grad_norm": 45.592864990234375, "learning_rate": 1.1135933609958507e-05, "loss": 0.6051, "step": 26718 }, { "epoch": 22.17344398340249, "grad_norm": 63.733497619628906, "learning_rate": 1.1135601659751039e-05, "loss": 0.6979, "step": 26719 }, { "epoch": 22.17427385892116, "grad_norm": 70.31209564208984, "learning_rate": 1.113526970954357e-05, "loss": 0.4339, "step": 26720 }, { "epoch": 22.175103734439833, "grad_norm": 43.285438537597656, "learning_rate": 1.11349377593361e-05, "loss": 0.8684, "step": 26721 }, { "epoch": 22.175933609958506, "grad_norm": 11.727350234985352, "learning_rate": 1.1134605809128632e-05, "loss": 0.2627, "step": 26722 }, { "epoch": 22.176763485477178, "grad_norm": 39.86345672607422, "learning_rate": 1.1134273858921164e-05, "loss": 0.5319, "step": 26723 }, { "epoch": 22.17759336099585, "grad_norm": 25.365175247192383, "learning_rate": 1.1133941908713694e-05, "loss": 0.3157, "step": 26724 }, { "epoch": 22.178423236514522, "grad_norm": 28.376148223876953, "learning_rate": 1.1133609958506225e-05, "loss": 0.3314, "step": 26725 }, { "epoch": 22.179253112033194, "grad_norm": 19.348913192749023, "learning_rate": 1.1133278008298755e-05, "loss": 0.3008, "step": 26726 }, { "epoch": 22.180082987551867, "grad_norm": 20.01818084716797, "learning_rate": 1.1132946058091287e-05, "loss": 0.3288, "step": 26727 }, { "epoch": 22.18091286307054, "grad_norm": 88.53036499023438, "learning_rate": 1.1132614107883819e-05, "loss": 0.9506, "step": 26728 }, { "epoch": 22.18174273858921, "grad_norm": 31.897445678710938, "learning_rate": 1.1132282157676348e-05, "loss": 0.408, "step": 26729 }, { "epoch": 22.182572614107883, "grad_norm": 83.69638061523438, "learning_rate": 1.113195020746888e-05, "loss": 0.4104, "step": 26730 }, { "epoch": 22.183402489626555, "grad_norm": 146.2113800048828, "learning_rate": 1.1131618257261412e-05, "loss": 0.376, "step": 26731 }, { "epoch": 22.184232365145228, "grad_norm": 45.70953369140625, "learning_rate": 1.1131286307053944e-05, "loss": 0.3031, "step": 26732 }, { "epoch": 22.1850622406639, "grad_norm": 135.3907928466797, "learning_rate": 1.1130954356846473e-05, "loss": 1.0418, "step": 26733 }, { "epoch": 22.185892116182572, "grad_norm": 21.791837692260742, "learning_rate": 1.1130622406639005e-05, "loss": 0.2556, "step": 26734 }, { "epoch": 22.186721991701244, "grad_norm": 57.43016815185547, "learning_rate": 1.1130290456431535e-05, "loss": 0.6973, "step": 26735 }, { "epoch": 22.187551867219916, "grad_norm": 68.00945281982422, "learning_rate": 1.1129958506224068e-05, "loss": 0.7125, "step": 26736 }, { "epoch": 22.18838174273859, "grad_norm": 80.98255157470703, "learning_rate": 1.11296265560166e-05, "loss": 1.6463, "step": 26737 }, { "epoch": 22.18921161825726, "grad_norm": 21.919986724853516, "learning_rate": 1.1129294605809128e-05, "loss": 0.3323, "step": 26738 }, { "epoch": 22.190041493775933, "grad_norm": 28.957109451293945, "learning_rate": 1.112896265560166e-05, "loss": 0.4735, "step": 26739 }, { "epoch": 22.190871369294605, "grad_norm": 62.25115203857422, "learning_rate": 1.1128630705394193e-05, "loss": 0.3603, "step": 26740 }, { "epoch": 22.191701244813277, "grad_norm": 61.57569122314453, "learning_rate": 1.1128298755186725e-05, "loss": 0.3826, "step": 26741 }, { "epoch": 22.19253112033195, "grad_norm": 15.049283981323242, "learning_rate": 1.1127966804979253e-05, "loss": 0.2505, "step": 26742 }, { "epoch": 22.19336099585062, "grad_norm": 84.93519592285156, "learning_rate": 1.1127634854771786e-05, "loss": 0.7886, "step": 26743 }, { "epoch": 22.194190871369294, "grad_norm": 66.53797912597656, "learning_rate": 1.1127302904564316e-05, "loss": 0.6337, "step": 26744 }, { "epoch": 22.195020746887966, "grad_norm": 28.487293243408203, "learning_rate": 1.1126970954356848e-05, "loss": 0.2959, "step": 26745 }, { "epoch": 22.19585062240664, "grad_norm": 56.849815368652344, "learning_rate": 1.1126639004149378e-05, "loss": 0.6887, "step": 26746 }, { "epoch": 22.19668049792531, "grad_norm": 22.748550415039062, "learning_rate": 1.1126307053941909e-05, "loss": 0.3914, "step": 26747 }, { "epoch": 22.197510373443983, "grad_norm": 41.33086013793945, "learning_rate": 1.1125975103734441e-05, "loss": 0.6448, "step": 26748 }, { "epoch": 22.198340248962655, "grad_norm": 26.613462448120117, "learning_rate": 1.1125643153526973e-05, "loss": 0.3305, "step": 26749 }, { "epoch": 22.199170124481327, "grad_norm": 36.16169357299805, "learning_rate": 1.1125311203319502e-05, "loss": 0.3799, "step": 26750 }, { "epoch": 22.2, "grad_norm": 40.569374084472656, "learning_rate": 1.1124979253112034e-05, "loss": 0.3552, "step": 26751 }, { "epoch": 22.20082987551867, "grad_norm": 23.274600982666016, "learning_rate": 1.1124647302904566e-05, "loss": 0.2798, "step": 26752 }, { "epoch": 22.201659751037344, "grad_norm": 26.275598526000977, "learning_rate": 1.1124315352697096e-05, "loss": 0.3611, "step": 26753 }, { "epoch": 22.202489626556016, "grad_norm": 15.037940979003906, "learning_rate": 1.1123983402489627e-05, "loss": 0.2683, "step": 26754 }, { "epoch": 22.203319502074688, "grad_norm": 74.26943969726562, "learning_rate": 1.1123651452282159e-05, "loss": 0.396, "step": 26755 }, { "epoch": 22.20414937759336, "grad_norm": 47.431026458740234, "learning_rate": 1.112331950207469e-05, "loss": 0.5188, "step": 26756 }, { "epoch": 22.204979253112032, "grad_norm": 52.162818908691406, "learning_rate": 1.1122987551867221e-05, "loss": 0.6648, "step": 26757 }, { "epoch": 22.205809128630705, "grad_norm": 65.91805267333984, "learning_rate": 1.112265560165975e-05, "loss": 0.3645, "step": 26758 }, { "epoch": 22.206639004149377, "grad_norm": 30.090312957763672, "learning_rate": 1.1122323651452282e-05, "loss": 0.2826, "step": 26759 }, { "epoch": 22.20746887966805, "grad_norm": 15.088776588439941, "learning_rate": 1.1121991701244814e-05, "loss": 0.2751, "step": 26760 }, { "epoch": 22.20829875518672, "grad_norm": 27.132234573364258, "learning_rate": 1.1121659751037346e-05, "loss": 0.3939, "step": 26761 }, { "epoch": 22.209128630705393, "grad_norm": 160.6686248779297, "learning_rate": 1.1121327800829877e-05, "loss": 0.7501, "step": 26762 }, { "epoch": 22.209958506224066, "grad_norm": 35.573089599609375, "learning_rate": 1.1120995850622407e-05, "loss": 0.452, "step": 26763 }, { "epoch": 22.210788381742738, "grad_norm": 94.62017822265625, "learning_rate": 1.112066390041494e-05, "loss": 1.7405, "step": 26764 }, { "epoch": 22.21161825726141, "grad_norm": 58.95885467529297, "learning_rate": 1.112033195020747e-05, "loss": 0.6106, "step": 26765 }, { "epoch": 22.212448132780082, "grad_norm": 79.0636215209961, "learning_rate": 1.1120000000000002e-05, "loss": 0.9529, "step": 26766 }, { "epoch": 22.213278008298754, "grad_norm": 28.656686782836914, "learning_rate": 1.111966804979253e-05, "loss": 0.3366, "step": 26767 }, { "epoch": 22.214107883817427, "grad_norm": 81.75463104248047, "learning_rate": 1.1119336099585063e-05, "loss": 0.8264, "step": 26768 }, { "epoch": 22.2149377593361, "grad_norm": 52.74163055419922, "learning_rate": 1.1119004149377595e-05, "loss": 1.0708, "step": 26769 }, { "epoch": 22.21576763485477, "grad_norm": 20.390600204467773, "learning_rate": 1.1118672199170127e-05, "loss": 0.2945, "step": 26770 }, { "epoch": 22.216597510373443, "grad_norm": 67.35359191894531, "learning_rate": 1.1118340248962656e-05, "loss": 0.5739, "step": 26771 }, { "epoch": 22.217427385892115, "grad_norm": 30.264694213867188, "learning_rate": 1.1118008298755188e-05, "loss": 0.3248, "step": 26772 }, { "epoch": 22.218257261410788, "grad_norm": 58.664546966552734, "learning_rate": 1.1117676348547718e-05, "loss": 0.5104, "step": 26773 }, { "epoch": 22.21908713692946, "grad_norm": 24.950559616088867, "learning_rate": 1.111734439834025e-05, "loss": 0.3506, "step": 26774 }, { "epoch": 22.219917012448132, "grad_norm": 50.9984245300293, "learning_rate": 1.111701244813278e-05, "loss": 0.6106, "step": 26775 }, { "epoch": 22.220746887966804, "grad_norm": 101.28441619873047, "learning_rate": 1.1116680497925311e-05, "loss": 0.5619, "step": 26776 }, { "epoch": 22.221576763485476, "grad_norm": 27.34362030029297, "learning_rate": 1.1116348547717843e-05, "loss": 0.6633, "step": 26777 }, { "epoch": 22.22240663900415, "grad_norm": 9.561736106872559, "learning_rate": 1.1116016597510375e-05, "loss": 0.2305, "step": 26778 }, { "epoch": 22.22323651452282, "grad_norm": 35.345489501953125, "learning_rate": 1.1115684647302904e-05, "loss": 0.5399, "step": 26779 }, { "epoch": 22.224066390041493, "grad_norm": 79.41685485839844, "learning_rate": 1.1115352697095436e-05, "loss": 0.5711, "step": 26780 }, { "epoch": 22.224896265560165, "grad_norm": 10.345274925231934, "learning_rate": 1.1115020746887968e-05, "loss": 0.2553, "step": 26781 }, { "epoch": 22.225726141078837, "grad_norm": 19.920639038085938, "learning_rate": 1.1114688796680499e-05, "loss": 0.3622, "step": 26782 }, { "epoch": 22.22655601659751, "grad_norm": 43.632362365722656, "learning_rate": 1.1114356846473029e-05, "loss": 0.711, "step": 26783 }, { "epoch": 22.22738589211618, "grad_norm": 22.14846420288086, "learning_rate": 1.1114024896265561e-05, "loss": 0.3832, "step": 26784 }, { "epoch": 22.228215767634854, "grad_norm": 51.43025588989258, "learning_rate": 1.1113692946058092e-05, "loss": 0.5956, "step": 26785 }, { "epoch": 22.229045643153526, "grad_norm": 42.43967056274414, "learning_rate": 1.1113360995850624e-05, "loss": 0.5809, "step": 26786 }, { "epoch": 22.2298755186722, "grad_norm": 22.804662704467773, "learning_rate": 1.1113029045643156e-05, "loss": 0.3294, "step": 26787 }, { "epoch": 22.23070539419087, "grad_norm": 37.91945266723633, "learning_rate": 1.1112697095435685e-05, "loss": 0.8607, "step": 26788 }, { "epoch": 22.231535269709543, "grad_norm": 41.265438079833984, "learning_rate": 1.1112365145228217e-05, "loss": 0.4746, "step": 26789 }, { "epoch": 22.232365145228215, "grad_norm": 40.80720520019531, "learning_rate": 1.1112033195020749e-05, "loss": 0.411, "step": 26790 }, { "epoch": 22.233195020746887, "grad_norm": 22.743412017822266, "learning_rate": 1.111170124481328e-05, "loss": 0.2365, "step": 26791 }, { "epoch": 22.23402489626556, "grad_norm": 20.18680763244629, "learning_rate": 1.111136929460581e-05, "loss": 0.2993, "step": 26792 }, { "epoch": 22.23485477178423, "grad_norm": 37.07846450805664, "learning_rate": 1.1111037344398342e-05, "loss": 0.5477, "step": 26793 }, { "epoch": 22.235684647302904, "grad_norm": 28.858322143554688, "learning_rate": 1.1110705394190872e-05, "loss": 0.3967, "step": 26794 }, { "epoch": 22.236514522821576, "grad_norm": 71.1828384399414, "learning_rate": 1.1110373443983404e-05, "loss": 0.4545, "step": 26795 }, { "epoch": 22.237344398340248, "grad_norm": 39.98051834106445, "learning_rate": 1.1110041493775933e-05, "loss": 0.487, "step": 26796 }, { "epoch": 22.23817427385892, "grad_norm": 8.93027114868164, "learning_rate": 1.1109709543568465e-05, "loss": 0.1886, "step": 26797 }, { "epoch": 22.239004149377593, "grad_norm": 48.107173919677734, "learning_rate": 1.1109377593360997e-05, "loss": 0.7796, "step": 26798 }, { "epoch": 22.239834024896265, "grad_norm": 114.49828338623047, "learning_rate": 1.110904564315353e-05, "loss": 0.7012, "step": 26799 }, { "epoch": 22.240663900414937, "grad_norm": 26.43545150756836, "learning_rate": 1.1108713692946058e-05, "loss": 0.4435, "step": 26800 }, { "epoch": 22.24149377593361, "grad_norm": 69.3517074584961, "learning_rate": 1.110838174273859e-05, "loss": 0.6943, "step": 26801 }, { "epoch": 22.24232365145228, "grad_norm": 60.85002136230469, "learning_rate": 1.1108049792531122e-05, "loss": 0.4072, "step": 26802 }, { "epoch": 22.243153526970953, "grad_norm": 33.955352783203125, "learning_rate": 1.1107717842323653e-05, "loss": 0.5402, "step": 26803 }, { "epoch": 22.243983402489626, "grad_norm": 97.50037384033203, "learning_rate": 1.1107385892116183e-05, "loss": 0.7361, "step": 26804 }, { "epoch": 22.244813278008298, "grad_norm": 20.663869857788086, "learning_rate": 1.1107053941908713e-05, "loss": 0.2815, "step": 26805 }, { "epoch": 22.24564315352697, "grad_norm": 22.21554183959961, "learning_rate": 1.1106721991701246e-05, "loss": 0.3584, "step": 26806 }, { "epoch": 22.246473029045642, "grad_norm": 55.19511795043945, "learning_rate": 1.1106390041493778e-05, "loss": 0.3106, "step": 26807 }, { "epoch": 22.247302904564314, "grad_norm": 111.92243957519531, "learning_rate": 1.1106058091286306e-05, "loss": 0.4022, "step": 26808 }, { "epoch": 22.248132780082987, "grad_norm": 57.1818733215332, "learning_rate": 1.1105726141078839e-05, "loss": 0.6666, "step": 26809 }, { "epoch": 22.24896265560166, "grad_norm": 33.878841400146484, "learning_rate": 1.110539419087137e-05, "loss": 0.4891, "step": 26810 }, { "epoch": 22.24979253112033, "grad_norm": 34.22093200683594, "learning_rate": 1.1105062240663903e-05, "loss": 0.6656, "step": 26811 }, { "epoch": 22.250622406639003, "grad_norm": 13.760396003723145, "learning_rate": 1.1104730290456431e-05, "loss": 0.2278, "step": 26812 }, { "epoch": 22.251452282157675, "grad_norm": 27.894987106323242, "learning_rate": 1.1104398340248964e-05, "loss": 0.3081, "step": 26813 }, { "epoch": 22.252282157676348, "grad_norm": 39.51520919799805, "learning_rate": 1.1104066390041494e-05, "loss": 0.3664, "step": 26814 }, { "epoch": 22.25311203319502, "grad_norm": 37.9908447265625, "learning_rate": 1.1103734439834026e-05, "loss": 0.5704, "step": 26815 }, { "epoch": 22.253941908713692, "grad_norm": 25.9239559173584, "learning_rate": 1.1103402489626558e-05, "loss": 0.4036, "step": 26816 }, { "epoch": 22.254771784232364, "grad_norm": 54.062015533447266, "learning_rate": 1.1103070539419087e-05, "loss": 0.6887, "step": 26817 }, { "epoch": 22.255601659751036, "grad_norm": 41.769046783447266, "learning_rate": 1.1102738589211619e-05, "loss": 0.3817, "step": 26818 }, { "epoch": 22.25643153526971, "grad_norm": 68.81334686279297, "learning_rate": 1.1102406639004151e-05, "loss": 0.849, "step": 26819 }, { "epoch": 22.25726141078838, "grad_norm": 53.433197021484375, "learning_rate": 1.1102074688796682e-05, "loss": 0.5169, "step": 26820 }, { "epoch": 22.258091286307053, "grad_norm": 34.20256805419922, "learning_rate": 1.1101742738589212e-05, "loss": 0.6829, "step": 26821 }, { "epoch": 22.258921161825725, "grad_norm": 84.16650390625, "learning_rate": 1.1101410788381744e-05, "loss": 0.9655, "step": 26822 }, { "epoch": 22.259751037344397, "grad_norm": 48.8539924621582, "learning_rate": 1.1101078838174274e-05, "loss": 0.5884, "step": 26823 }, { "epoch": 22.26058091286307, "grad_norm": 52.151885986328125, "learning_rate": 1.1100746887966807e-05, "loss": 1.0382, "step": 26824 }, { "epoch": 22.261410788381742, "grad_norm": 15.62128734588623, "learning_rate": 1.1100414937759337e-05, "loss": 0.2496, "step": 26825 }, { "epoch": 22.262240663900414, "grad_norm": 105.44745635986328, "learning_rate": 1.1100082987551867e-05, "loss": 0.7852, "step": 26826 }, { "epoch": 22.263070539419086, "grad_norm": 109.65994262695312, "learning_rate": 1.10997510373444e-05, "loss": 0.8703, "step": 26827 }, { "epoch": 22.26390041493776, "grad_norm": 78.74614715576172, "learning_rate": 1.1099419087136932e-05, "loss": 0.8064, "step": 26828 }, { "epoch": 22.26473029045643, "grad_norm": 28.77756690979004, "learning_rate": 1.109908713692946e-05, "loss": 0.5136, "step": 26829 }, { "epoch": 22.265560165975103, "grad_norm": 13.251489639282227, "learning_rate": 1.1098755186721992e-05, "loss": 0.1928, "step": 26830 }, { "epoch": 22.266390041493775, "grad_norm": 112.80977630615234, "learning_rate": 1.1098423236514525e-05, "loss": 0.7304, "step": 26831 }, { "epoch": 22.267219917012447, "grad_norm": 76.83110046386719, "learning_rate": 1.1098091286307055e-05, "loss": 0.747, "step": 26832 }, { "epoch": 22.26804979253112, "grad_norm": 38.904396057128906, "learning_rate": 1.1097759336099585e-05, "loss": 0.3714, "step": 26833 }, { "epoch": 22.26887966804979, "grad_norm": 45.06443405151367, "learning_rate": 1.1097427385892116e-05, "loss": 0.6373, "step": 26834 }, { "epoch": 22.269709543568464, "grad_norm": 39.716121673583984, "learning_rate": 1.1097095435684648e-05, "loss": 0.7114, "step": 26835 }, { "epoch": 22.270539419087136, "grad_norm": 49.64698791503906, "learning_rate": 1.109676348547718e-05, "loss": 1.0031, "step": 26836 }, { "epoch": 22.271369294605808, "grad_norm": 19.132217407226562, "learning_rate": 1.1096431535269709e-05, "loss": 0.4068, "step": 26837 }, { "epoch": 22.27219917012448, "grad_norm": 50.3077392578125, "learning_rate": 1.1096099585062241e-05, "loss": 0.2884, "step": 26838 }, { "epoch": 22.273029045643153, "grad_norm": 51.33248519897461, "learning_rate": 1.1095767634854773e-05, "loss": 0.5716, "step": 26839 }, { "epoch": 22.273858921161825, "grad_norm": 122.24544525146484, "learning_rate": 1.1095435684647305e-05, "loss": 0.9475, "step": 26840 }, { "epoch": 22.274688796680497, "grad_norm": 61.664974212646484, "learning_rate": 1.1095103734439835e-05, "loss": 0.5464, "step": 26841 }, { "epoch": 22.27551867219917, "grad_norm": 32.926605224609375, "learning_rate": 1.1094771784232366e-05, "loss": 0.32, "step": 26842 }, { "epoch": 22.27634854771784, "grad_norm": 31.537662506103516, "learning_rate": 1.1094439834024896e-05, "loss": 0.5249, "step": 26843 }, { "epoch": 22.277178423236514, "grad_norm": 41.282447814941406, "learning_rate": 1.1094107883817428e-05, "loss": 0.887, "step": 26844 }, { "epoch": 22.278008298755186, "grad_norm": 17.55438995361328, "learning_rate": 1.109377593360996e-05, "loss": 0.28, "step": 26845 }, { "epoch": 22.278838174273858, "grad_norm": 22.650028228759766, "learning_rate": 1.109344398340249e-05, "loss": 0.4474, "step": 26846 }, { "epoch": 22.27966804979253, "grad_norm": 35.29264831542969, "learning_rate": 1.1093112033195021e-05, "loss": 0.5022, "step": 26847 }, { "epoch": 22.280497925311202, "grad_norm": 86.48147583007812, "learning_rate": 1.1092780082987553e-05, "loss": 0.4182, "step": 26848 }, { "epoch": 22.281327800829875, "grad_norm": 29.35234832763672, "learning_rate": 1.1092448132780086e-05, "loss": 0.4116, "step": 26849 }, { "epoch": 22.282157676348547, "grad_norm": 24.545042037963867, "learning_rate": 1.1092116182572614e-05, "loss": 0.3696, "step": 26850 }, { "epoch": 22.28298755186722, "grad_norm": 20.802867889404297, "learning_rate": 1.1091784232365146e-05, "loss": 0.3566, "step": 26851 }, { "epoch": 22.28381742738589, "grad_norm": 16.60425567626953, "learning_rate": 1.1091452282157677e-05, "loss": 0.4769, "step": 26852 }, { "epoch": 22.284647302904563, "grad_norm": 12.495013236999512, "learning_rate": 1.1091120331950209e-05, "loss": 0.3724, "step": 26853 }, { "epoch": 22.285477178423236, "grad_norm": 46.607460021972656, "learning_rate": 1.109078838174274e-05, "loss": 0.324, "step": 26854 }, { "epoch": 22.286307053941908, "grad_norm": 45.62388610839844, "learning_rate": 1.109045643153527e-05, "loss": 1.0688, "step": 26855 }, { "epoch": 22.28713692946058, "grad_norm": 121.47708892822266, "learning_rate": 1.1090124481327802e-05, "loss": 0.7676, "step": 26856 }, { "epoch": 22.287966804979252, "grad_norm": 76.14944458007812, "learning_rate": 1.1089792531120334e-05, "loss": 0.4124, "step": 26857 }, { "epoch": 22.288796680497924, "grad_norm": 19.720190048217773, "learning_rate": 1.1089460580912863e-05, "loss": 0.5175, "step": 26858 }, { "epoch": 22.289626556016596, "grad_norm": 51.02611541748047, "learning_rate": 1.1089128630705395e-05, "loss": 0.4828, "step": 26859 }, { "epoch": 22.29045643153527, "grad_norm": 10.096549987792969, "learning_rate": 1.1088796680497927e-05, "loss": 0.2134, "step": 26860 }, { "epoch": 22.29128630705394, "grad_norm": 14.619719505310059, "learning_rate": 1.1088464730290457e-05, "loss": 0.2827, "step": 26861 }, { "epoch": 22.292116182572613, "grad_norm": 18.559324264526367, "learning_rate": 1.1088132780082988e-05, "loss": 0.3507, "step": 26862 }, { "epoch": 22.292946058091285, "grad_norm": 37.718406677246094, "learning_rate": 1.108780082987552e-05, "loss": 0.4289, "step": 26863 }, { "epoch": 22.293775933609957, "grad_norm": 26.71547508239746, "learning_rate": 1.108746887966805e-05, "loss": 0.3992, "step": 26864 }, { "epoch": 22.29460580912863, "grad_norm": 49.20730209350586, "learning_rate": 1.1087136929460582e-05, "loss": 0.8252, "step": 26865 }, { "epoch": 22.295435684647302, "grad_norm": 29.9924259185791, "learning_rate": 1.1086804979253111e-05, "loss": 0.3931, "step": 26866 }, { "epoch": 22.296265560165974, "grad_norm": 25.067861557006836, "learning_rate": 1.1086473029045643e-05, "loss": 0.3006, "step": 26867 }, { "epoch": 22.297095435684646, "grad_norm": 9.895247459411621, "learning_rate": 1.1086141078838175e-05, "loss": 0.2262, "step": 26868 }, { "epoch": 22.29792531120332, "grad_norm": 31.001379013061523, "learning_rate": 1.1085809128630707e-05, "loss": 0.3821, "step": 26869 }, { "epoch": 22.29875518672199, "grad_norm": 83.14481353759766, "learning_rate": 1.1085477178423238e-05, "loss": 0.4096, "step": 26870 }, { "epoch": 22.299585062240663, "grad_norm": 52.48903274536133, "learning_rate": 1.1085145228215768e-05, "loss": 0.6452, "step": 26871 }, { "epoch": 22.300414937759335, "grad_norm": 17.65705680847168, "learning_rate": 1.10848132780083e-05, "loss": 0.3978, "step": 26872 }, { "epoch": 22.301244813278007, "grad_norm": 69.34737396240234, "learning_rate": 1.108448132780083e-05, "loss": 0.6323, "step": 26873 }, { "epoch": 22.30207468879668, "grad_norm": 131.30010986328125, "learning_rate": 1.1084149377593363e-05, "loss": 0.5701, "step": 26874 }, { "epoch": 22.30290456431535, "grad_norm": 50.52722930908203, "learning_rate": 1.1083817427385892e-05, "loss": 0.5165, "step": 26875 }, { "epoch": 22.303734439834024, "grad_norm": 53.77658462524414, "learning_rate": 1.1083485477178424e-05, "loss": 0.7988, "step": 26876 }, { "epoch": 22.304564315352696, "grad_norm": 47.519683837890625, "learning_rate": 1.1083153526970956e-05, "loss": 0.3646, "step": 26877 }, { "epoch": 22.305394190871368, "grad_norm": 14.968464851379395, "learning_rate": 1.1082821576763488e-05, "loss": 0.287, "step": 26878 }, { "epoch": 22.30622406639004, "grad_norm": 14.56408405303955, "learning_rate": 1.1082489626556017e-05, "loss": 0.2523, "step": 26879 }, { "epoch": 22.307053941908713, "grad_norm": 30.65369415283203, "learning_rate": 1.1082157676348549e-05, "loss": 0.3903, "step": 26880 }, { "epoch": 22.307883817427385, "grad_norm": 32.30799102783203, "learning_rate": 1.108182572614108e-05, "loss": 0.4515, "step": 26881 }, { "epoch": 22.308713692946057, "grad_norm": 23.37839126586914, "learning_rate": 1.1081493775933611e-05, "loss": 0.2889, "step": 26882 }, { "epoch": 22.30954356846473, "grad_norm": 52.10930633544922, "learning_rate": 1.1081161825726142e-05, "loss": 0.4913, "step": 26883 }, { "epoch": 22.3103734439834, "grad_norm": 98.41741180419922, "learning_rate": 1.1080829875518672e-05, "loss": 0.801, "step": 26884 }, { "epoch": 22.311203319502074, "grad_norm": 23.91714859008789, "learning_rate": 1.1080497925311204e-05, "loss": 0.4905, "step": 26885 }, { "epoch": 22.312033195020746, "grad_norm": 69.44463348388672, "learning_rate": 1.1080165975103736e-05, "loss": 0.5502, "step": 26886 }, { "epoch": 22.312863070539418, "grad_norm": 36.643394470214844, "learning_rate": 1.1079834024896265e-05, "loss": 0.4973, "step": 26887 }, { "epoch": 22.31369294605809, "grad_norm": 36.02939987182617, "learning_rate": 1.1079502074688797e-05, "loss": 0.3233, "step": 26888 }, { "epoch": 22.314522821576762, "grad_norm": 11.632492065429688, "learning_rate": 1.107917012448133e-05, "loss": 0.3594, "step": 26889 }, { "epoch": 22.315352697095435, "grad_norm": 87.36299896240234, "learning_rate": 1.107883817427386e-05, "loss": 0.4566, "step": 26890 }, { "epoch": 22.316182572614107, "grad_norm": 20.58204460144043, "learning_rate": 1.107850622406639e-05, "loss": 0.2645, "step": 26891 }, { "epoch": 22.31701244813278, "grad_norm": 176.0205078125, "learning_rate": 1.1078174273858922e-05, "loss": 0.553, "step": 26892 }, { "epoch": 22.31784232365145, "grad_norm": 148.90206909179688, "learning_rate": 1.1077842323651453e-05, "loss": 0.6456, "step": 26893 }, { "epoch": 22.318672199170123, "grad_norm": 40.95825958251953, "learning_rate": 1.1077510373443985e-05, "loss": 0.745, "step": 26894 }, { "epoch": 22.319502074688796, "grad_norm": 30.083728790283203, "learning_rate": 1.1077178423236517e-05, "loss": 0.6085, "step": 26895 }, { "epoch": 22.320331950207468, "grad_norm": 34.160518646240234, "learning_rate": 1.1076846473029046e-05, "loss": 0.4497, "step": 26896 }, { "epoch": 22.32116182572614, "grad_norm": 57.273468017578125, "learning_rate": 1.1076514522821578e-05, "loss": 0.846, "step": 26897 }, { "epoch": 22.321991701244812, "grad_norm": 55.066593170166016, "learning_rate": 1.107618257261411e-05, "loss": 0.4982, "step": 26898 }, { "epoch": 22.322821576763484, "grad_norm": 35.91872024536133, "learning_rate": 1.107585062240664e-05, "loss": 0.5303, "step": 26899 }, { "epoch": 22.323651452282157, "grad_norm": 22.17915916442871, "learning_rate": 1.107551867219917e-05, "loss": 0.2981, "step": 26900 }, { "epoch": 22.32448132780083, "grad_norm": 61.72651672363281, "learning_rate": 1.1075186721991703e-05, "loss": 0.6475, "step": 26901 }, { "epoch": 22.3253112033195, "grad_norm": 20.950197219848633, "learning_rate": 1.1074854771784233e-05, "loss": 0.2794, "step": 26902 }, { "epoch": 22.326141078838173, "grad_norm": 41.430301666259766, "learning_rate": 1.1074522821576765e-05, "loss": 1.1381, "step": 26903 }, { "epoch": 22.326970954356845, "grad_norm": 53.704185485839844, "learning_rate": 1.1074190871369294e-05, "loss": 0.7755, "step": 26904 }, { "epoch": 22.327800829875518, "grad_norm": 15.304749488830566, "learning_rate": 1.1073858921161826e-05, "loss": 0.2274, "step": 26905 }, { "epoch": 22.32863070539419, "grad_norm": 64.77588653564453, "learning_rate": 1.1073526970954358e-05, "loss": 0.8718, "step": 26906 }, { "epoch": 22.329460580912862, "grad_norm": 20.59052276611328, "learning_rate": 1.107319502074689e-05, "loss": 0.4022, "step": 26907 }, { "epoch": 22.330290456431534, "grad_norm": 18.671875, "learning_rate": 1.1072863070539419e-05, "loss": 0.2169, "step": 26908 }, { "epoch": 22.331120331950206, "grad_norm": 81.74609375, "learning_rate": 1.1072531120331951e-05, "loss": 0.7236, "step": 26909 }, { "epoch": 22.33195020746888, "grad_norm": 62.067718505859375, "learning_rate": 1.1072199170124483e-05, "loss": 0.7609, "step": 26910 }, { "epoch": 22.33278008298755, "grad_norm": 35.2537956237793, "learning_rate": 1.1071867219917014e-05, "loss": 0.4726, "step": 26911 }, { "epoch": 22.333609958506223, "grad_norm": 173.87510681152344, "learning_rate": 1.1071535269709544e-05, "loss": 0.5987, "step": 26912 }, { "epoch": 22.334439834024895, "grad_norm": 47.89902114868164, "learning_rate": 1.1071203319502074e-05, "loss": 0.3958, "step": 26913 }, { "epoch": 22.335269709543567, "grad_norm": 23.41399574279785, "learning_rate": 1.1070871369294606e-05, "loss": 0.4513, "step": 26914 }, { "epoch": 22.33609958506224, "grad_norm": 27.203746795654297, "learning_rate": 1.1070539419087139e-05, "loss": 0.2888, "step": 26915 }, { "epoch": 22.33692946058091, "grad_norm": 72.32716369628906, "learning_rate": 1.1070207468879667e-05, "loss": 0.5925, "step": 26916 }, { "epoch": 22.337759336099584, "grad_norm": 39.291316986083984, "learning_rate": 1.10698755186722e-05, "loss": 0.427, "step": 26917 }, { "epoch": 22.338589211618256, "grad_norm": 90.59574890136719, "learning_rate": 1.1069543568464732e-05, "loss": 0.696, "step": 26918 }, { "epoch": 22.33941908713693, "grad_norm": 35.233604431152344, "learning_rate": 1.1069211618257264e-05, "loss": 0.6105, "step": 26919 }, { "epoch": 22.3402489626556, "grad_norm": 25.65847396850586, "learning_rate": 1.1068879668049794e-05, "loss": 0.3959, "step": 26920 }, { "epoch": 22.341078838174273, "grad_norm": 67.40283203125, "learning_rate": 1.1068547717842324e-05, "loss": 0.3548, "step": 26921 }, { "epoch": 22.341908713692945, "grad_norm": 44.40415573120117, "learning_rate": 1.1068215767634855e-05, "loss": 0.4224, "step": 26922 }, { "epoch": 22.342738589211617, "grad_norm": 72.4872055053711, "learning_rate": 1.1067883817427387e-05, "loss": 0.3296, "step": 26923 }, { "epoch": 22.34356846473029, "grad_norm": 36.26885223388672, "learning_rate": 1.1067551867219919e-05, "loss": 0.3445, "step": 26924 }, { "epoch": 22.34439834024896, "grad_norm": 12.615951538085938, "learning_rate": 1.1067219917012448e-05, "loss": 0.2728, "step": 26925 }, { "epoch": 22.345228215767634, "grad_norm": 17.70139503479004, "learning_rate": 1.106688796680498e-05, "loss": 0.2681, "step": 26926 }, { "epoch": 22.346058091286306, "grad_norm": 37.65168762207031, "learning_rate": 1.1066556016597512e-05, "loss": 0.5076, "step": 26927 }, { "epoch": 22.346887966804978, "grad_norm": 63.17028045654297, "learning_rate": 1.1066224066390044e-05, "loss": 0.5417, "step": 26928 }, { "epoch": 22.34771784232365, "grad_norm": 44.04053497314453, "learning_rate": 1.1065892116182573e-05, "loss": 0.5453, "step": 26929 }, { "epoch": 22.348547717842322, "grad_norm": 60.62773895263672, "learning_rate": 1.1065560165975105e-05, "loss": 1.3, "step": 26930 }, { "epoch": 22.349377593360995, "grad_norm": 36.56423568725586, "learning_rate": 1.1065228215767635e-05, "loss": 0.376, "step": 26931 }, { "epoch": 22.350207468879667, "grad_norm": 213.14651489257812, "learning_rate": 1.1064896265560167e-05, "loss": 1.0199, "step": 26932 }, { "epoch": 22.35103734439834, "grad_norm": 26.55589485168457, "learning_rate": 1.1064564315352698e-05, "loss": 0.2776, "step": 26933 }, { "epoch": 22.35186721991701, "grad_norm": 51.69675064086914, "learning_rate": 1.1064232365145228e-05, "loss": 0.5607, "step": 26934 }, { "epoch": 22.352697095435683, "grad_norm": 31.228017807006836, "learning_rate": 1.106390041493776e-05, "loss": 0.6598, "step": 26935 }, { "epoch": 22.353526970954356, "grad_norm": 28.513538360595703, "learning_rate": 1.1063568464730293e-05, "loss": 0.3479, "step": 26936 }, { "epoch": 22.354356846473028, "grad_norm": 52.28803634643555, "learning_rate": 1.1063236514522821e-05, "loss": 0.5921, "step": 26937 }, { "epoch": 22.3551867219917, "grad_norm": 177.8579864501953, "learning_rate": 1.1062904564315353e-05, "loss": 0.6477, "step": 26938 }, { "epoch": 22.356016597510372, "grad_norm": 220.0169219970703, "learning_rate": 1.1062572614107885e-05, "loss": 0.7494, "step": 26939 }, { "epoch": 22.356846473029044, "grad_norm": 44.44844436645508, "learning_rate": 1.1062240663900416e-05, "loss": 0.5878, "step": 26940 }, { "epoch": 22.357676348547717, "grad_norm": 37.1986083984375, "learning_rate": 1.1061908713692946e-05, "loss": 0.4041, "step": 26941 }, { "epoch": 22.35850622406639, "grad_norm": 58.27280807495117, "learning_rate": 1.1061576763485478e-05, "loss": 0.5599, "step": 26942 }, { "epoch": 22.35933609958506, "grad_norm": 32.883419036865234, "learning_rate": 1.1061244813278009e-05, "loss": 0.3426, "step": 26943 }, { "epoch": 22.360165975103733, "grad_norm": 40.549591064453125, "learning_rate": 1.1060912863070541e-05, "loss": 0.4588, "step": 26944 }, { "epoch": 22.360995850622405, "grad_norm": 104.17626190185547, "learning_rate": 1.106058091286307e-05, "loss": 1.0024, "step": 26945 }, { "epoch": 22.361825726141078, "grad_norm": 70.68225860595703, "learning_rate": 1.1060248962655602e-05, "loss": 0.7164, "step": 26946 }, { "epoch": 22.36265560165975, "grad_norm": 45.863590240478516, "learning_rate": 1.1059917012448134e-05, "loss": 0.5846, "step": 26947 }, { "epoch": 22.363485477178422, "grad_norm": 75.9477310180664, "learning_rate": 1.1059585062240666e-05, "loss": 0.5711, "step": 26948 }, { "epoch": 22.364315352697094, "grad_norm": 21.035890579223633, "learning_rate": 1.1059253112033196e-05, "loss": 0.2354, "step": 26949 }, { "epoch": 22.365145228215766, "grad_norm": 14.0142240524292, "learning_rate": 1.1058921161825727e-05, "loss": 0.2917, "step": 26950 }, { "epoch": 22.36597510373444, "grad_norm": 34.228355407714844, "learning_rate": 1.1058589211618257e-05, "loss": 0.5285, "step": 26951 }, { "epoch": 22.36680497925311, "grad_norm": 40.65362548828125, "learning_rate": 1.105825726141079e-05, "loss": 0.7481, "step": 26952 }, { "epoch": 22.367634854771783, "grad_norm": 31.179677963256836, "learning_rate": 1.1057925311203321e-05, "loss": 0.3416, "step": 26953 }, { "epoch": 22.368464730290455, "grad_norm": 17.34305191040039, "learning_rate": 1.105759336099585e-05, "loss": 0.2245, "step": 26954 }, { "epoch": 22.369294605809127, "grad_norm": 35.64728546142578, "learning_rate": 1.1057261410788382e-05, "loss": 0.4398, "step": 26955 }, { "epoch": 22.3701244813278, "grad_norm": 65.93538665771484, "learning_rate": 1.1056929460580914e-05, "loss": 0.6035, "step": 26956 }, { "epoch": 22.37095435684647, "grad_norm": 29.31764793395996, "learning_rate": 1.1056597510373446e-05, "loss": 0.3983, "step": 26957 }, { "epoch": 22.371784232365144, "grad_norm": 20.345273971557617, "learning_rate": 1.1056265560165975e-05, "loss": 0.2421, "step": 26958 }, { "epoch": 22.372614107883816, "grad_norm": 35.726356506347656, "learning_rate": 1.1055933609958507e-05, "loss": 0.6022, "step": 26959 }, { "epoch": 22.37344398340249, "grad_norm": 38.68943786621094, "learning_rate": 1.1055601659751038e-05, "loss": 0.9242, "step": 26960 }, { "epoch": 22.37427385892116, "grad_norm": 27.665773391723633, "learning_rate": 1.105526970954357e-05, "loss": 0.3796, "step": 26961 }, { "epoch": 22.375103734439833, "grad_norm": 40.05266189575195, "learning_rate": 1.10549377593361e-05, "loss": 0.3078, "step": 26962 }, { "epoch": 22.375933609958505, "grad_norm": 12.506561279296875, "learning_rate": 1.105460580912863e-05, "loss": 0.2946, "step": 26963 }, { "epoch": 22.376763485477177, "grad_norm": 35.54806137084961, "learning_rate": 1.1054273858921163e-05, "loss": 0.462, "step": 26964 }, { "epoch": 22.37759336099585, "grad_norm": 33.21256637573242, "learning_rate": 1.1053941908713695e-05, "loss": 0.4491, "step": 26965 }, { "epoch": 22.37842323651452, "grad_norm": 90.00910949707031, "learning_rate": 1.1053609958506224e-05, "loss": 1.0235, "step": 26966 }, { "epoch": 22.379253112033194, "grad_norm": 52.435726165771484, "learning_rate": 1.1053278008298756e-05, "loss": 0.3829, "step": 26967 }, { "epoch": 22.380082987551866, "grad_norm": 36.62078857421875, "learning_rate": 1.1052946058091288e-05, "loss": 0.7243, "step": 26968 }, { "epoch": 22.380912863070538, "grad_norm": 34.54512023925781, "learning_rate": 1.1052614107883818e-05, "loss": 0.3498, "step": 26969 }, { "epoch": 22.38174273858921, "grad_norm": 85.10074615478516, "learning_rate": 1.1052282157676349e-05, "loss": 0.6938, "step": 26970 }, { "epoch": 22.382572614107882, "grad_norm": 35.44481658935547, "learning_rate": 1.105195020746888e-05, "loss": 0.4739, "step": 26971 }, { "epoch": 22.383402489626555, "grad_norm": 34.700721740722656, "learning_rate": 1.1051618257261411e-05, "loss": 0.457, "step": 26972 }, { "epoch": 22.384232365145227, "grad_norm": 89.84226989746094, "learning_rate": 1.1051286307053943e-05, "loss": 0.6773, "step": 26973 }, { "epoch": 22.3850622406639, "grad_norm": 11.960957527160645, "learning_rate": 1.1050954356846475e-05, "loss": 0.2582, "step": 26974 }, { "epoch": 22.38589211618257, "grad_norm": 28.967947006225586, "learning_rate": 1.1050622406639004e-05, "loss": 0.3972, "step": 26975 }, { "epoch": 22.386721991701243, "grad_norm": 27.693714141845703, "learning_rate": 1.1050290456431536e-05, "loss": 0.4015, "step": 26976 }, { "epoch": 22.387551867219916, "grad_norm": 39.86179733276367, "learning_rate": 1.1049958506224068e-05, "loss": 0.455, "step": 26977 }, { "epoch": 22.388381742738588, "grad_norm": 71.96147155761719, "learning_rate": 1.1049626556016599e-05, "loss": 0.6547, "step": 26978 }, { "epoch": 22.38921161825726, "grad_norm": 37.620059967041016, "learning_rate": 1.1049294605809129e-05, "loss": 0.3396, "step": 26979 }, { "epoch": 22.390041493775932, "grad_norm": 52.77022933959961, "learning_rate": 1.1048962655601661e-05, "loss": 0.7369, "step": 26980 }, { "epoch": 22.390871369294604, "grad_norm": 35.180355072021484, "learning_rate": 1.1048630705394192e-05, "loss": 0.3772, "step": 26981 }, { "epoch": 22.391701244813277, "grad_norm": 10.124897003173828, "learning_rate": 1.1048298755186724e-05, "loss": 0.2225, "step": 26982 }, { "epoch": 22.39253112033195, "grad_norm": 12.340503692626953, "learning_rate": 1.1047966804979252e-05, "loss": 0.412, "step": 26983 }, { "epoch": 22.39336099585062, "grad_norm": 35.967041015625, "learning_rate": 1.1047634854771785e-05, "loss": 0.7623, "step": 26984 }, { "epoch": 22.394190871369293, "grad_norm": 20.73988914489746, "learning_rate": 1.1047302904564317e-05, "loss": 0.3725, "step": 26985 }, { "epoch": 22.395020746887965, "grad_norm": 68.0448989868164, "learning_rate": 1.1046970954356849e-05, "loss": 0.5007, "step": 26986 }, { "epoch": 22.395850622406638, "grad_norm": 68.01226806640625, "learning_rate": 1.1046639004149378e-05, "loss": 0.9957, "step": 26987 }, { "epoch": 22.39668049792531, "grad_norm": 48.46000671386719, "learning_rate": 1.104630705394191e-05, "loss": 0.6348, "step": 26988 }, { "epoch": 22.397510373443982, "grad_norm": 81.33531188964844, "learning_rate": 1.1045975103734442e-05, "loss": 0.4751, "step": 26989 }, { "epoch": 22.398340248962654, "grad_norm": 29.34208106994629, "learning_rate": 1.1045643153526972e-05, "loss": 0.368, "step": 26990 }, { "epoch": 22.399170124481326, "grad_norm": 20.278778076171875, "learning_rate": 1.1045311203319503e-05, "loss": 0.3274, "step": 26991 }, { "epoch": 22.4, "grad_norm": 40.2118034362793, "learning_rate": 1.1044979253112033e-05, "loss": 0.4952, "step": 26992 }, { "epoch": 22.40082987551867, "grad_norm": 60.693023681640625, "learning_rate": 1.1044647302904565e-05, "loss": 0.8721, "step": 26993 }, { "epoch": 22.401659751037343, "grad_norm": 21.486501693725586, "learning_rate": 1.1044315352697097e-05, "loss": 0.5998, "step": 26994 }, { "epoch": 22.402489626556015, "grad_norm": 42.900760650634766, "learning_rate": 1.1043983402489626e-05, "loss": 0.5879, "step": 26995 }, { "epoch": 22.403319502074687, "grad_norm": 135.47021484375, "learning_rate": 1.1043651452282158e-05, "loss": 0.4914, "step": 26996 }, { "epoch": 22.40414937759336, "grad_norm": 89.1722412109375, "learning_rate": 1.104331950207469e-05, "loss": 0.7408, "step": 26997 }, { "epoch": 22.40497925311203, "grad_norm": 37.66767501831055, "learning_rate": 1.1042987551867222e-05, "loss": 0.6149, "step": 26998 }, { "epoch": 22.405809128630704, "grad_norm": 70.79127502441406, "learning_rate": 1.1042655601659753e-05, "loss": 0.2959, "step": 26999 }, { "epoch": 22.406639004149376, "grad_norm": 30.698204040527344, "learning_rate": 1.1042323651452283e-05, "loss": 0.4691, "step": 27000 }, { "epoch": 22.40746887966805, "grad_norm": 32.3969612121582, "learning_rate": 1.1041991701244813e-05, "loss": 0.2697, "step": 27001 }, { "epoch": 22.40829875518672, "grad_norm": 39.63120651245117, "learning_rate": 1.1041659751037346e-05, "loss": 0.6073, "step": 27002 }, { "epoch": 22.409128630705393, "grad_norm": 81.49504852294922, "learning_rate": 1.1041327800829878e-05, "loss": 0.9339, "step": 27003 }, { "epoch": 22.409958506224065, "grad_norm": 28.968185424804688, "learning_rate": 1.1040995850622406e-05, "loss": 0.3085, "step": 27004 }, { "epoch": 22.410788381742737, "grad_norm": 27.164386749267578, "learning_rate": 1.1040663900414939e-05, "loss": 0.422, "step": 27005 }, { "epoch": 22.41161825726141, "grad_norm": 31.565656661987305, "learning_rate": 1.104033195020747e-05, "loss": 0.4115, "step": 27006 }, { "epoch": 22.41244813278008, "grad_norm": 22.82258415222168, "learning_rate": 1.1040000000000001e-05, "loss": 0.3336, "step": 27007 }, { "epoch": 22.413278008298754, "grad_norm": 46.27510452270508, "learning_rate": 1.1039668049792531e-05, "loss": 0.5249, "step": 27008 }, { "epoch": 22.414107883817426, "grad_norm": 25.48164939880371, "learning_rate": 1.1039336099585064e-05, "loss": 0.4893, "step": 27009 }, { "epoch": 22.414937759336098, "grad_norm": 29.50849723815918, "learning_rate": 1.1039004149377594e-05, "loss": 0.4159, "step": 27010 }, { "epoch": 22.41576763485477, "grad_norm": 40.684085845947266, "learning_rate": 1.1038672199170126e-05, "loss": 0.5414, "step": 27011 }, { "epoch": 22.416597510373443, "grad_norm": 57.08254623413086, "learning_rate": 1.1038340248962656e-05, "loss": 0.7932, "step": 27012 }, { "epoch": 22.417427385892115, "grad_norm": 21.493267059326172, "learning_rate": 1.1038008298755187e-05, "loss": 0.3573, "step": 27013 }, { "epoch": 22.418257261410787, "grad_norm": 17.654489517211914, "learning_rate": 1.1037676348547719e-05, "loss": 0.2466, "step": 27014 }, { "epoch": 22.41908713692946, "grad_norm": 67.23263549804688, "learning_rate": 1.1037344398340251e-05, "loss": 0.7529, "step": 27015 }, { "epoch": 22.41991701244813, "grad_norm": 78.9031982421875, "learning_rate": 1.103701244813278e-05, "loss": 1.0241, "step": 27016 }, { "epoch": 22.420746887966803, "grad_norm": 94.94276428222656, "learning_rate": 1.1036680497925312e-05, "loss": 0.3633, "step": 27017 }, { "epoch": 22.421576763485476, "grad_norm": 25.54494857788086, "learning_rate": 1.1036348547717844e-05, "loss": 0.3937, "step": 27018 }, { "epoch": 22.422406639004148, "grad_norm": 10.311392784118652, "learning_rate": 1.1036016597510374e-05, "loss": 0.3279, "step": 27019 }, { "epoch": 22.42323651452282, "grad_norm": 80.39287567138672, "learning_rate": 1.1035684647302905e-05, "loss": 0.3398, "step": 27020 }, { "epoch": 22.424066390041492, "grad_norm": 19.569448471069336, "learning_rate": 1.1035352697095435e-05, "loss": 0.6382, "step": 27021 }, { "epoch": 22.424896265560164, "grad_norm": 11.40552043914795, "learning_rate": 1.1035020746887967e-05, "loss": 0.2314, "step": 27022 }, { "epoch": 22.425726141078837, "grad_norm": 59.71768569946289, "learning_rate": 1.10346887966805e-05, "loss": 0.6347, "step": 27023 }, { "epoch": 22.42655601659751, "grad_norm": 53.95490264892578, "learning_rate": 1.1034356846473028e-05, "loss": 0.7591, "step": 27024 }, { "epoch": 22.42738589211618, "grad_norm": 75.94567108154297, "learning_rate": 1.103402489626556e-05, "loss": 0.5103, "step": 27025 }, { "epoch": 22.428215767634853, "grad_norm": 67.19770050048828, "learning_rate": 1.1033692946058092e-05, "loss": 0.7044, "step": 27026 }, { "epoch": 22.429045643153525, "grad_norm": 58.651145935058594, "learning_rate": 1.1033360995850625e-05, "loss": 0.6192, "step": 27027 }, { "epoch": 22.429875518672198, "grad_norm": 121.70081329345703, "learning_rate": 1.1033029045643155e-05, "loss": 0.9705, "step": 27028 }, { "epoch": 22.43070539419087, "grad_norm": 91.83265686035156, "learning_rate": 1.1032697095435685e-05, "loss": 0.5592, "step": 27029 }, { "epoch": 22.431535269709542, "grad_norm": 59.565391540527344, "learning_rate": 1.1032365145228216e-05, "loss": 0.5264, "step": 27030 }, { "epoch": 22.432365145228214, "grad_norm": 155.01763916015625, "learning_rate": 1.1032033195020748e-05, "loss": 0.498, "step": 27031 }, { "epoch": 22.433195020746886, "grad_norm": 9.436530113220215, "learning_rate": 1.103170124481328e-05, "loss": 0.2886, "step": 27032 }, { "epoch": 22.43402489626556, "grad_norm": 65.77422332763672, "learning_rate": 1.1031369294605809e-05, "loss": 0.8798, "step": 27033 }, { "epoch": 22.43485477178423, "grad_norm": 13.432525634765625, "learning_rate": 1.103103734439834e-05, "loss": 0.2406, "step": 27034 }, { "epoch": 22.435684647302903, "grad_norm": 85.56647491455078, "learning_rate": 1.1030705394190873e-05, "loss": 0.4664, "step": 27035 }, { "epoch": 22.436514522821575, "grad_norm": 22.767488479614258, "learning_rate": 1.1030373443983405e-05, "loss": 0.3443, "step": 27036 }, { "epoch": 22.437344398340247, "grad_norm": 108.09986877441406, "learning_rate": 1.1030041493775934e-05, "loss": 0.5565, "step": 27037 }, { "epoch": 22.43817427385892, "grad_norm": 65.5685806274414, "learning_rate": 1.1029709543568466e-05, "loss": 0.9807, "step": 27038 }, { "epoch": 22.439004149377592, "grad_norm": 65.6343765258789, "learning_rate": 1.1029377593360996e-05, "loss": 0.3721, "step": 27039 }, { "epoch": 22.439834024896264, "grad_norm": 31.078523635864258, "learning_rate": 1.1029045643153528e-05, "loss": 0.7086, "step": 27040 }, { "epoch": 22.440663900414936, "grad_norm": 34.73902893066406, "learning_rate": 1.1028713692946059e-05, "loss": 0.2869, "step": 27041 }, { "epoch": 22.44149377593361, "grad_norm": 13.821929931640625, "learning_rate": 1.102838174273859e-05, "loss": 0.2754, "step": 27042 }, { "epoch": 22.44232365145228, "grad_norm": 26.76251792907715, "learning_rate": 1.1028049792531121e-05, "loss": 0.2748, "step": 27043 }, { "epoch": 22.443153526970953, "grad_norm": 64.99674224853516, "learning_rate": 1.1027717842323653e-05, "loss": 0.6134, "step": 27044 }, { "epoch": 22.443983402489625, "grad_norm": 71.83116912841797, "learning_rate": 1.1027385892116182e-05, "loss": 0.4725, "step": 27045 }, { "epoch": 22.444813278008297, "grad_norm": 19.05030632019043, "learning_rate": 1.1027053941908714e-05, "loss": 0.2503, "step": 27046 }, { "epoch": 22.44564315352697, "grad_norm": 27.53327751159668, "learning_rate": 1.1026721991701246e-05, "loss": 0.4374, "step": 27047 }, { "epoch": 22.44647302904564, "grad_norm": 59.26901626586914, "learning_rate": 1.1026390041493777e-05, "loss": 0.7697, "step": 27048 }, { "epoch": 22.447302904564314, "grad_norm": 112.67743682861328, "learning_rate": 1.1026058091286307e-05, "loss": 0.909, "step": 27049 }, { "epoch": 22.448132780082986, "grad_norm": 15.348289489746094, "learning_rate": 1.102572614107884e-05, "loss": 0.467, "step": 27050 }, { "epoch": 22.448962655601658, "grad_norm": 55.727684020996094, "learning_rate": 1.102539419087137e-05, "loss": 0.4549, "step": 27051 }, { "epoch": 22.44979253112033, "grad_norm": 42.62931442260742, "learning_rate": 1.1025062240663902e-05, "loss": 0.4129, "step": 27052 }, { "epoch": 22.450622406639003, "grad_norm": 26.559459686279297, "learning_rate": 1.1024730290456434e-05, "loss": 0.5, "step": 27053 }, { "epoch": 22.451452282157675, "grad_norm": 68.71794891357422, "learning_rate": 1.1024398340248963e-05, "loss": 0.8074, "step": 27054 }, { "epoch": 22.452282157676347, "grad_norm": 9.652064323425293, "learning_rate": 1.1024066390041495e-05, "loss": 0.2401, "step": 27055 }, { "epoch": 22.45311203319502, "grad_norm": 58.87194061279297, "learning_rate": 1.1023734439834027e-05, "loss": 0.5288, "step": 27056 }, { "epoch": 22.45394190871369, "grad_norm": 24.987613677978516, "learning_rate": 1.1023402489626557e-05, "loss": 0.3795, "step": 27057 }, { "epoch": 22.454771784232364, "grad_norm": 29.550434112548828, "learning_rate": 1.1023070539419088e-05, "loss": 0.6326, "step": 27058 }, { "epoch": 22.455601659751036, "grad_norm": 16.34823989868164, "learning_rate": 1.102273858921162e-05, "loss": 0.2586, "step": 27059 }, { "epoch": 22.456431535269708, "grad_norm": 25.632497787475586, "learning_rate": 1.102240663900415e-05, "loss": 0.2367, "step": 27060 }, { "epoch": 22.45726141078838, "grad_norm": 74.60249328613281, "learning_rate": 1.1022074688796682e-05, "loss": 0.6264, "step": 27061 }, { "epoch": 22.458091286307052, "grad_norm": 38.736846923828125, "learning_rate": 1.1021742738589211e-05, "loss": 0.3381, "step": 27062 }, { "epoch": 22.458921161825725, "grad_norm": 67.02947998046875, "learning_rate": 1.1021410788381743e-05, "loss": 0.6941, "step": 27063 }, { "epoch": 22.459751037344397, "grad_norm": 37.88038635253906, "learning_rate": 1.1021078838174275e-05, "loss": 0.3067, "step": 27064 }, { "epoch": 22.46058091286307, "grad_norm": 74.72759246826172, "learning_rate": 1.1020746887966807e-05, "loss": 0.8701, "step": 27065 }, { "epoch": 22.46141078838174, "grad_norm": 47.14675521850586, "learning_rate": 1.1020414937759336e-05, "loss": 0.5047, "step": 27066 }, { "epoch": 22.462240663900413, "grad_norm": 26.433914184570312, "learning_rate": 1.1020082987551868e-05, "loss": 0.3038, "step": 27067 }, { "epoch": 22.463070539419085, "grad_norm": 30.900230407714844, "learning_rate": 1.1019751037344399e-05, "loss": 0.3565, "step": 27068 }, { "epoch": 22.463900414937758, "grad_norm": 49.13224792480469, "learning_rate": 1.101941908713693e-05, "loss": 0.8135, "step": 27069 }, { "epoch": 22.46473029045643, "grad_norm": 68.79354095458984, "learning_rate": 1.1019087136929461e-05, "loss": 0.5539, "step": 27070 }, { "epoch": 22.465560165975102, "grad_norm": 97.08210754394531, "learning_rate": 1.1018755186721992e-05, "loss": 0.5366, "step": 27071 }, { "epoch": 22.466390041493774, "grad_norm": 57.695552825927734, "learning_rate": 1.1018423236514524e-05, "loss": 0.4259, "step": 27072 }, { "epoch": 22.467219917012446, "grad_norm": 153.89920043945312, "learning_rate": 1.1018091286307056e-05, "loss": 0.72, "step": 27073 }, { "epoch": 22.46804979253112, "grad_norm": 22.176172256469727, "learning_rate": 1.1017759336099584e-05, "loss": 0.3145, "step": 27074 }, { "epoch": 22.46887966804979, "grad_norm": 49.561363220214844, "learning_rate": 1.1017427385892117e-05, "loss": 0.9327, "step": 27075 }, { "epoch": 22.469709543568463, "grad_norm": 49.365501403808594, "learning_rate": 1.1017095435684649e-05, "loss": 0.7221, "step": 27076 }, { "epoch": 22.470539419087135, "grad_norm": 17.427499771118164, "learning_rate": 1.1016763485477179e-05, "loss": 0.2758, "step": 27077 }, { "epoch": 22.471369294605807, "grad_norm": 29.570392608642578, "learning_rate": 1.1016431535269711e-05, "loss": 0.4589, "step": 27078 }, { "epoch": 22.47219917012448, "grad_norm": 31.78345489501953, "learning_rate": 1.1016099585062242e-05, "loss": 0.4794, "step": 27079 }, { "epoch": 22.473029045643152, "grad_norm": 44.353912353515625, "learning_rate": 1.1015767634854772e-05, "loss": 0.4448, "step": 27080 }, { "epoch": 22.473858921161824, "grad_norm": 34.35829162597656, "learning_rate": 1.1015435684647304e-05, "loss": 0.6504, "step": 27081 }, { "epoch": 22.474688796680496, "grad_norm": 43.62752151489258, "learning_rate": 1.1015103734439836e-05, "loss": 0.4254, "step": 27082 }, { "epoch": 22.47551867219917, "grad_norm": 32.523826599121094, "learning_rate": 1.1014771784232365e-05, "loss": 0.3162, "step": 27083 }, { "epoch": 22.47634854771784, "grad_norm": 56.289085388183594, "learning_rate": 1.1014439834024897e-05, "loss": 0.6559, "step": 27084 }, { "epoch": 22.477178423236513, "grad_norm": 35.23044967651367, "learning_rate": 1.101410788381743e-05, "loss": 0.3341, "step": 27085 }, { "epoch": 22.478008298755185, "grad_norm": 46.19120407104492, "learning_rate": 1.101377593360996e-05, "loss": 0.3818, "step": 27086 }, { "epoch": 22.478838174273857, "grad_norm": 51.46610641479492, "learning_rate": 1.101344398340249e-05, "loss": 0.3683, "step": 27087 }, { "epoch": 22.47966804979253, "grad_norm": 116.83953857421875, "learning_rate": 1.1013112033195022e-05, "loss": 0.8208, "step": 27088 }, { "epoch": 22.4804979253112, "grad_norm": 43.20766830444336, "learning_rate": 1.1012780082987553e-05, "loss": 0.6836, "step": 27089 }, { "epoch": 22.481327800829874, "grad_norm": 86.35359191894531, "learning_rate": 1.1012448132780085e-05, "loss": 0.8961, "step": 27090 }, { "epoch": 22.482157676348546, "grad_norm": 21.713544845581055, "learning_rate": 1.1012116182572613e-05, "loss": 0.7113, "step": 27091 }, { "epoch": 22.482987551867218, "grad_norm": 20.88368034362793, "learning_rate": 1.1011784232365145e-05, "loss": 0.2912, "step": 27092 }, { "epoch": 22.48381742738589, "grad_norm": 20.996810913085938, "learning_rate": 1.1011452282157678e-05, "loss": 0.3512, "step": 27093 }, { "epoch": 22.484647302904563, "grad_norm": 50.247528076171875, "learning_rate": 1.101112033195021e-05, "loss": 0.682, "step": 27094 }, { "epoch": 22.485477178423235, "grad_norm": 90.21945190429688, "learning_rate": 1.1010788381742738e-05, "loss": 0.5093, "step": 27095 }, { "epoch": 22.486307053941907, "grad_norm": 65.45696258544922, "learning_rate": 1.101045643153527e-05, "loss": 0.5948, "step": 27096 }, { "epoch": 22.48713692946058, "grad_norm": 39.381553649902344, "learning_rate": 1.1010124481327803e-05, "loss": 0.3766, "step": 27097 }, { "epoch": 22.48796680497925, "grad_norm": 36.19365692138672, "learning_rate": 1.1009792531120333e-05, "loss": 0.458, "step": 27098 }, { "epoch": 22.488796680497924, "grad_norm": 134.4999542236328, "learning_rate": 1.1009460580912863e-05, "loss": 0.4499, "step": 27099 }, { "epoch": 22.489626556016596, "grad_norm": 18.558393478393555, "learning_rate": 1.1009128630705394e-05, "loss": 0.2741, "step": 27100 }, { "epoch": 22.490456431535268, "grad_norm": 43.88621520996094, "learning_rate": 1.1008796680497926e-05, "loss": 0.8659, "step": 27101 }, { "epoch": 22.49128630705394, "grad_norm": 31.649200439453125, "learning_rate": 1.1008464730290458e-05, "loss": 0.3745, "step": 27102 }, { "epoch": 22.492116182572612, "grad_norm": 36.986244201660156, "learning_rate": 1.1008132780082987e-05, "loss": 0.5995, "step": 27103 }, { "epoch": 22.492946058091285, "grad_norm": 52.02304458618164, "learning_rate": 1.1007800829875519e-05, "loss": 0.5529, "step": 27104 }, { "epoch": 22.49377593360996, "grad_norm": 36.328369140625, "learning_rate": 1.1007468879668051e-05, "loss": 0.4874, "step": 27105 }, { "epoch": 22.49460580912863, "grad_norm": 32.80244445800781, "learning_rate": 1.1007136929460583e-05, "loss": 0.4638, "step": 27106 }, { "epoch": 22.495435684647305, "grad_norm": 71.24725341796875, "learning_rate": 1.1006804979253114e-05, "loss": 0.6839, "step": 27107 }, { "epoch": 22.496265560165973, "grad_norm": 53.87470245361328, "learning_rate": 1.1006473029045644e-05, "loss": 0.5588, "step": 27108 }, { "epoch": 22.49709543568465, "grad_norm": 19.886930465698242, "learning_rate": 1.1006141078838174e-05, "loss": 0.4135, "step": 27109 }, { "epoch": 22.497925311203318, "grad_norm": 50.86083221435547, "learning_rate": 1.1005809128630706e-05, "loss": 0.5052, "step": 27110 }, { "epoch": 22.498755186721993, "grad_norm": 94.36767578125, "learning_rate": 1.1005477178423239e-05, "loss": 0.5593, "step": 27111 }, { "epoch": 22.499585062240662, "grad_norm": 27.216026306152344, "learning_rate": 1.1005145228215767e-05, "loss": 0.472, "step": 27112 }, { "epoch": 22.500414937759338, "grad_norm": 63.45839309692383, "learning_rate": 1.10048132780083e-05, "loss": 1.0839, "step": 27113 }, { "epoch": 22.501244813278007, "grad_norm": 24.437999725341797, "learning_rate": 1.1004481327800832e-05, "loss": 0.2424, "step": 27114 }, { "epoch": 22.502074688796682, "grad_norm": 29.953134536743164, "learning_rate": 1.1004149377593364e-05, "loss": 0.4449, "step": 27115 }, { "epoch": 22.50290456431535, "grad_norm": 24.611059188842773, "learning_rate": 1.1003817427385892e-05, "loss": 0.3489, "step": 27116 }, { "epoch": 22.503734439834027, "grad_norm": 56.93659591674805, "learning_rate": 1.1003485477178424e-05, "loss": 0.4487, "step": 27117 }, { "epoch": 22.504564315352695, "grad_norm": 40.14197540283203, "learning_rate": 1.1003153526970955e-05, "loss": 0.8901, "step": 27118 }, { "epoch": 22.50539419087137, "grad_norm": 36.70778274536133, "learning_rate": 1.1002821576763487e-05, "loss": 0.3386, "step": 27119 }, { "epoch": 22.50622406639004, "grad_norm": 25.171051025390625, "learning_rate": 1.1002489626556017e-05, "loss": 0.3533, "step": 27120 }, { "epoch": 22.507053941908715, "grad_norm": 10.087875366210938, "learning_rate": 1.1002157676348548e-05, "loss": 0.2432, "step": 27121 }, { "epoch": 22.507883817427384, "grad_norm": 18.08323097229004, "learning_rate": 1.100182572614108e-05, "loss": 0.2158, "step": 27122 }, { "epoch": 22.50871369294606, "grad_norm": 23.087791442871094, "learning_rate": 1.1001493775933612e-05, "loss": 0.3592, "step": 27123 }, { "epoch": 22.50954356846473, "grad_norm": 50.269710540771484, "learning_rate": 1.100116182572614e-05, "loss": 0.5829, "step": 27124 }, { "epoch": 22.510373443983404, "grad_norm": 60.38079833984375, "learning_rate": 1.1000829875518673e-05, "loss": 0.4415, "step": 27125 }, { "epoch": 22.511203319502073, "grad_norm": 51.58705520629883, "learning_rate": 1.1000497925311205e-05, "loss": 0.6254, "step": 27126 }, { "epoch": 22.51203319502075, "grad_norm": 23.69816780090332, "learning_rate": 1.1000165975103735e-05, "loss": 0.4155, "step": 27127 }, { "epoch": 22.512863070539417, "grad_norm": 20.70624351501465, "learning_rate": 1.0999834024896266e-05, "loss": 0.4358, "step": 27128 }, { "epoch": 22.513692946058093, "grad_norm": 24.088199615478516, "learning_rate": 1.0999502074688798e-05, "loss": 0.3093, "step": 27129 }, { "epoch": 22.51452282157676, "grad_norm": 33.79780578613281, "learning_rate": 1.0999170124481328e-05, "loss": 0.6601, "step": 27130 }, { "epoch": 22.515352697095437, "grad_norm": 25.56694793701172, "learning_rate": 1.099883817427386e-05, "loss": 0.2725, "step": 27131 }, { "epoch": 22.51618257261411, "grad_norm": 78.67990112304688, "learning_rate": 1.0998506224066393e-05, "loss": 0.6817, "step": 27132 }, { "epoch": 22.517012448132782, "grad_norm": 53.57571029663086, "learning_rate": 1.0998174273858921e-05, "loss": 0.5652, "step": 27133 }, { "epoch": 22.517842323651454, "grad_norm": 62.228843688964844, "learning_rate": 1.0997842323651453e-05, "loss": 0.7322, "step": 27134 }, { "epoch": 22.518672199170126, "grad_norm": 20.660079956054688, "learning_rate": 1.0997510373443985e-05, "loss": 0.3235, "step": 27135 }, { "epoch": 22.5195020746888, "grad_norm": 16.608856201171875, "learning_rate": 1.0997178423236516e-05, "loss": 0.2281, "step": 27136 }, { "epoch": 22.52033195020747, "grad_norm": 71.18537902832031, "learning_rate": 1.0996846473029046e-05, "loss": 0.4265, "step": 27137 }, { "epoch": 22.521161825726143, "grad_norm": 41.46337890625, "learning_rate": 1.0996514522821577e-05, "loss": 0.4468, "step": 27138 }, { "epoch": 22.521991701244815, "grad_norm": 53.224342346191406, "learning_rate": 1.0996182572614109e-05, "loss": 0.2553, "step": 27139 }, { "epoch": 22.522821576763487, "grad_norm": 28.21185874938965, "learning_rate": 1.0995850622406641e-05, "loss": 0.39, "step": 27140 }, { "epoch": 22.52365145228216, "grad_norm": 33.45106887817383, "learning_rate": 1.099551867219917e-05, "loss": 0.4124, "step": 27141 }, { "epoch": 22.52448132780083, "grad_norm": 46.41177749633789, "learning_rate": 1.0995186721991702e-05, "loss": 0.9742, "step": 27142 }, { "epoch": 22.525311203319504, "grad_norm": 26.642982482910156, "learning_rate": 1.0994854771784234e-05, "loss": 0.3057, "step": 27143 }, { "epoch": 22.526141078838176, "grad_norm": 12.343782424926758, "learning_rate": 1.0994522821576766e-05, "loss": 0.304, "step": 27144 }, { "epoch": 22.526970954356848, "grad_norm": 26.391845703125, "learning_rate": 1.0994190871369295e-05, "loss": 0.3118, "step": 27145 }, { "epoch": 22.52780082987552, "grad_norm": 14.765473365783691, "learning_rate": 1.0993858921161827e-05, "loss": 0.2877, "step": 27146 }, { "epoch": 22.528630705394193, "grad_norm": 16.022212982177734, "learning_rate": 1.0993526970954357e-05, "loss": 0.2876, "step": 27147 }, { "epoch": 22.529460580912865, "grad_norm": 28.97901153564453, "learning_rate": 1.099319502074689e-05, "loss": 0.5644, "step": 27148 }, { "epoch": 22.530290456431537, "grad_norm": 14.060639381408691, "learning_rate": 1.099286307053942e-05, "loss": 0.2308, "step": 27149 }, { "epoch": 22.53112033195021, "grad_norm": 25.33557891845703, "learning_rate": 1.099253112033195e-05, "loss": 0.447, "step": 27150 }, { "epoch": 22.53195020746888, "grad_norm": 56.4300422668457, "learning_rate": 1.0992199170124482e-05, "loss": 0.897, "step": 27151 }, { "epoch": 22.532780082987554, "grad_norm": 12.053899765014648, "learning_rate": 1.0991867219917014e-05, "loss": 0.2719, "step": 27152 }, { "epoch": 22.533609958506226, "grad_norm": 46.022029876708984, "learning_rate": 1.0991535269709543e-05, "loss": 0.4619, "step": 27153 }, { "epoch": 22.534439834024898, "grad_norm": 22.09272003173828, "learning_rate": 1.0991203319502075e-05, "loss": 0.3413, "step": 27154 }, { "epoch": 22.53526970954357, "grad_norm": 40.08763885498047, "learning_rate": 1.0990871369294607e-05, "loss": 0.626, "step": 27155 }, { "epoch": 22.536099585062242, "grad_norm": 13.356978416442871, "learning_rate": 1.0990539419087138e-05, "loss": 0.3165, "step": 27156 }, { "epoch": 22.536929460580915, "grad_norm": 21.501384735107422, "learning_rate": 1.099020746887967e-05, "loss": 0.3538, "step": 27157 }, { "epoch": 22.537759336099587, "grad_norm": 39.4523811340332, "learning_rate": 1.09898755186722e-05, "loss": 0.4048, "step": 27158 }, { "epoch": 22.53858921161826, "grad_norm": 51.502410888671875, "learning_rate": 1.098954356846473e-05, "loss": 0.7014, "step": 27159 }, { "epoch": 22.53941908713693, "grad_norm": 58.556549072265625, "learning_rate": 1.0989211618257263e-05, "loss": 0.5437, "step": 27160 }, { "epoch": 22.540248962655603, "grad_norm": 18.5617618560791, "learning_rate": 1.0988879668049795e-05, "loss": 0.2737, "step": 27161 }, { "epoch": 22.541078838174275, "grad_norm": 13.94631576538086, "learning_rate": 1.0988547717842324e-05, "loss": 0.3245, "step": 27162 }, { "epoch": 22.541908713692948, "grad_norm": 32.25921630859375, "learning_rate": 1.0988215767634856e-05, "loss": 0.3672, "step": 27163 }, { "epoch": 22.54273858921162, "grad_norm": 62.395057678222656, "learning_rate": 1.0987883817427388e-05, "loss": 0.4462, "step": 27164 }, { "epoch": 22.543568464730292, "grad_norm": 48.325279235839844, "learning_rate": 1.0987551867219918e-05, "loss": 0.3649, "step": 27165 }, { "epoch": 22.544398340248964, "grad_norm": 91.45620727539062, "learning_rate": 1.0987219917012449e-05, "loss": 0.9579, "step": 27166 }, { "epoch": 22.545228215767636, "grad_norm": 29.487829208374023, "learning_rate": 1.098688796680498e-05, "loss": 0.3735, "step": 27167 }, { "epoch": 22.54605809128631, "grad_norm": 43.337684631347656, "learning_rate": 1.0986556016597511e-05, "loss": 0.6449, "step": 27168 }, { "epoch": 22.54688796680498, "grad_norm": 68.73362731933594, "learning_rate": 1.0986224066390043e-05, "loss": 0.3336, "step": 27169 }, { "epoch": 22.547717842323653, "grad_norm": 36.749446868896484, "learning_rate": 1.0985892116182572e-05, "loss": 0.4263, "step": 27170 }, { "epoch": 22.548547717842325, "grad_norm": 42.8079948425293, "learning_rate": 1.0985560165975104e-05, "loss": 0.4927, "step": 27171 }, { "epoch": 22.549377593360997, "grad_norm": 29.46644401550293, "learning_rate": 1.0985228215767636e-05, "loss": 0.4064, "step": 27172 }, { "epoch": 22.55020746887967, "grad_norm": 57.937164306640625, "learning_rate": 1.0984896265560168e-05, "loss": 0.3638, "step": 27173 }, { "epoch": 22.551037344398342, "grad_norm": 74.37094116210938, "learning_rate": 1.0984564315352697e-05, "loss": 0.6563, "step": 27174 }, { "epoch": 22.551867219917014, "grad_norm": 14.43530559539795, "learning_rate": 1.0984232365145229e-05, "loss": 0.3196, "step": 27175 }, { "epoch": 22.552697095435686, "grad_norm": 34.49555587768555, "learning_rate": 1.0983900414937761e-05, "loss": 0.3441, "step": 27176 }, { "epoch": 22.55352697095436, "grad_norm": 51.1314582824707, "learning_rate": 1.0983568464730292e-05, "loss": 0.462, "step": 27177 }, { "epoch": 22.55435684647303, "grad_norm": 47.26592254638672, "learning_rate": 1.0983236514522822e-05, "loss": 0.5874, "step": 27178 }, { "epoch": 22.555186721991703, "grad_norm": 53.36486053466797, "learning_rate": 1.0982904564315352e-05, "loss": 0.4179, "step": 27179 }, { "epoch": 22.556016597510375, "grad_norm": 39.7987174987793, "learning_rate": 1.0982572614107885e-05, "loss": 0.5379, "step": 27180 }, { "epoch": 22.556846473029047, "grad_norm": 81.80008697509766, "learning_rate": 1.0982240663900417e-05, "loss": 0.3806, "step": 27181 }, { "epoch": 22.55767634854772, "grad_norm": 30.27479362487793, "learning_rate": 1.0981908713692945e-05, "loss": 0.8605, "step": 27182 }, { "epoch": 22.55850622406639, "grad_norm": 25.11113929748535, "learning_rate": 1.0981576763485477e-05, "loss": 0.5952, "step": 27183 }, { "epoch": 22.559336099585064, "grad_norm": 22.120683670043945, "learning_rate": 1.098124481327801e-05, "loss": 0.3787, "step": 27184 }, { "epoch": 22.560165975103736, "grad_norm": 74.35063934326172, "learning_rate": 1.098091286307054e-05, "loss": 1.1838, "step": 27185 }, { "epoch": 22.560995850622408, "grad_norm": 68.78347778320312, "learning_rate": 1.0980580912863072e-05, "loss": 1.297, "step": 27186 }, { "epoch": 22.56182572614108, "grad_norm": 27.26466178894043, "learning_rate": 1.0980248962655603e-05, "loss": 0.618, "step": 27187 }, { "epoch": 22.562655601659753, "grad_norm": 77.51875305175781, "learning_rate": 1.0979917012448133e-05, "loss": 0.6514, "step": 27188 }, { "epoch": 22.563485477178425, "grad_norm": 19.00820541381836, "learning_rate": 1.0979585062240665e-05, "loss": 0.3378, "step": 27189 }, { "epoch": 22.564315352697097, "grad_norm": 47.135562896728516, "learning_rate": 1.0979253112033197e-05, "loss": 0.525, "step": 27190 }, { "epoch": 22.56514522821577, "grad_norm": 47.30787658691406, "learning_rate": 1.0978921161825726e-05, "loss": 0.5498, "step": 27191 }, { "epoch": 22.56597510373444, "grad_norm": 59.36552810668945, "learning_rate": 1.0978589211618258e-05, "loss": 1.1148, "step": 27192 }, { "epoch": 22.566804979253114, "grad_norm": 25.509620666503906, "learning_rate": 1.097825726141079e-05, "loss": 0.5498, "step": 27193 }, { "epoch": 22.567634854771786, "grad_norm": 21.897693634033203, "learning_rate": 1.097792531120332e-05, "loss": 0.4424, "step": 27194 }, { "epoch": 22.568464730290458, "grad_norm": 45.819244384765625, "learning_rate": 1.0977593360995851e-05, "loss": 0.7721, "step": 27195 }, { "epoch": 22.56929460580913, "grad_norm": 28.27320671081543, "learning_rate": 1.0977261410788383e-05, "loss": 0.4836, "step": 27196 }, { "epoch": 22.570124481327802, "grad_norm": 66.41573333740234, "learning_rate": 1.0976929460580913e-05, "loss": 0.7712, "step": 27197 }, { "epoch": 22.570954356846475, "grad_norm": 63.45866775512695, "learning_rate": 1.0976597510373446e-05, "loss": 0.8073, "step": 27198 }, { "epoch": 22.571784232365147, "grad_norm": 15.543278694152832, "learning_rate": 1.0976265560165976e-05, "loss": 0.362, "step": 27199 }, { "epoch": 22.57261410788382, "grad_norm": 15.738134384155273, "learning_rate": 1.0975933609958506e-05, "loss": 0.5079, "step": 27200 }, { "epoch": 22.57344398340249, "grad_norm": 61.454166412353516, "learning_rate": 1.0975601659751038e-05, "loss": 0.6093, "step": 27201 }, { "epoch": 22.574273858921163, "grad_norm": 29.701547622680664, "learning_rate": 1.097526970954357e-05, "loss": 0.6436, "step": 27202 }, { "epoch": 22.575103734439836, "grad_norm": 42.45293045043945, "learning_rate": 1.09749377593361e-05, "loss": 0.3578, "step": 27203 }, { "epoch": 22.575933609958508, "grad_norm": 97.58045959472656, "learning_rate": 1.0974605809128631e-05, "loss": 0.6197, "step": 27204 }, { "epoch": 22.57676348547718, "grad_norm": 35.871337890625, "learning_rate": 1.0974273858921164e-05, "loss": 0.2754, "step": 27205 }, { "epoch": 22.577593360995852, "grad_norm": 11.086763381958008, "learning_rate": 1.0973941908713694e-05, "loss": 0.2675, "step": 27206 }, { "epoch": 22.578423236514524, "grad_norm": 73.5465316772461, "learning_rate": 1.0973609958506224e-05, "loss": 0.7457, "step": 27207 }, { "epoch": 22.579253112033197, "grad_norm": 94.80001068115234, "learning_rate": 1.0973278008298755e-05, "loss": 0.4189, "step": 27208 }, { "epoch": 22.58008298755187, "grad_norm": 19.89422035217285, "learning_rate": 1.0972946058091287e-05, "loss": 0.3413, "step": 27209 }, { "epoch": 22.58091286307054, "grad_norm": 29.838150024414062, "learning_rate": 1.0972614107883819e-05, "loss": 0.6191, "step": 27210 }, { "epoch": 22.581742738589213, "grad_norm": 36.1160888671875, "learning_rate": 1.0972282157676351e-05, "loss": 0.3866, "step": 27211 }, { "epoch": 22.582572614107885, "grad_norm": 14.290101051330566, "learning_rate": 1.097195020746888e-05, "loss": 0.268, "step": 27212 }, { "epoch": 22.583402489626557, "grad_norm": 138.01168823242188, "learning_rate": 1.0971618257261412e-05, "loss": 0.5667, "step": 27213 }, { "epoch": 22.58423236514523, "grad_norm": 80.1600341796875, "learning_rate": 1.0971286307053944e-05, "loss": 0.638, "step": 27214 }, { "epoch": 22.585062240663902, "grad_norm": 71.13658142089844, "learning_rate": 1.0970954356846474e-05, "loss": 0.3639, "step": 27215 }, { "epoch": 22.585892116182574, "grad_norm": 41.70537567138672, "learning_rate": 1.0970622406639005e-05, "loss": 0.5087, "step": 27216 }, { "epoch": 22.586721991701246, "grad_norm": 51.04821014404297, "learning_rate": 1.0970290456431535e-05, "loss": 0.715, "step": 27217 }, { "epoch": 22.58755186721992, "grad_norm": 44.7675666809082, "learning_rate": 1.0969958506224067e-05, "loss": 0.6691, "step": 27218 }, { "epoch": 22.58838174273859, "grad_norm": 36.556007385253906, "learning_rate": 1.09696265560166e-05, "loss": 0.3537, "step": 27219 }, { "epoch": 22.589211618257263, "grad_norm": 42.89326858520508, "learning_rate": 1.0969294605809128e-05, "loss": 0.7832, "step": 27220 }, { "epoch": 22.590041493775935, "grad_norm": 77.0140609741211, "learning_rate": 1.096896265560166e-05, "loss": 0.5091, "step": 27221 }, { "epoch": 22.590871369294607, "grad_norm": 22.538063049316406, "learning_rate": 1.0968630705394192e-05, "loss": 0.3285, "step": 27222 }, { "epoch": 22.59170124481328, "grad_norm": 46.63908004760742, "learning_rate": 1.0968298755186725e-05, "loss": 0.7684, "step": 27223 }, { "epoch": 22.59253112033195, "grad_norm": 24.141708374023438, "learning_rate": 1.0967966804979253e-05, "loss": 0.3367, "step": 27224 }, { "epoch": 22.593360995850624, "grad_norm": 125.37223815917969, "learning_rate": 1.0967634854771785e-05, "loss": 1.7703, "step": 27225 }, { "epoch": 22.594190871369296, "grad_norm": 42.40823745727539, "learning_rate": 1.0967302904564316e-05, "loss": 0.4862, "step": 27226 }, { "epoch": 22.59502074688797, "grad_norm": 16.62904167175293, "learning_rate": 1.0966970954356848e-05, "loss": 0.4161, "step": 27227 }, { "epoch": 22.59585062240664, "grad_norm": 29.401330947875977, "learning_rate": 1.0966639004149378e-05, "loss": 0.2949, "step": 27228 }, { "epoch": 22.596680497925313, "grad_norm": 34.4020881652832, "learning_rate": 1.0966307053941909e-05, "loss": 0.49, "step": 27229 }, { "epoch": 22.597510373443985, "grad_norm": 98.53567504882812, "learning_rate": 1.096597510373444e-05, "loss": 0.8755, "step": 27230 }, { "epoch": 22.598340248962657, "grad_norm": 23.06536102294922, "learning_rate": 1.0965643153526973e-05, "loss": 0.3788, "step": 27231 }, { "epoch": 22.59917012448133, "grad_norm": 47.285160064697266, "learning_rate": 1.0965311203319502e-05, "loss": 0.4926, "step": 27232 }, { "epoch": 22.6, "grad_norm": 50.9954719543457, "learning_rate": 1.0964979253112034e-05, "loss": 0.6729, "step": 27233 }, { "epoch": 22.600829875518674, "grad_norm": 81.37124633789062, "learning_rate": 1.0964647302904566e-05, "loss": 0.4632, "step": 27234 }, { "epoch": 22.601659751037346, "grad_norm": 72.31385040283203, "learning_rate": 1.0964315352697096e-05, "loss": 0.9734, "step": 27235 }, { "epoch": 22.602489626556018, "grad_norm": 22.247188568115234, "learning_rate": 1.0963983402489628e-05, "loss": 0.2499, "step": 27236 }, { "epoch": 22.60331950207469, "grad_norm": 123.31168365478516, "learning_rate": 1.0963651452282159e-05, "loss": 0.404, "step": 27237 }, { "epoch": 22.604149377593362, "grad_norm": 56.74541091918945, "learning_rate": 1.096331950207469e-05, "loss": 0.3949, "step": 27238 }, { "epoch": 22.604979253112035, "grad_norm": 109.46978759765625, "learning_rate": 1.0962987551867221e-05, "loss": 0.5872, "step": 27239 }, { "epoch": 22.605809128630707, "grad_norm": 56.06785583496094, "learning_rate": 1.0962655601659753e-05, "loss": 0.7007, "step": 27240 }, { "epoch": 22.60663900414938, "grad_norm": 92.4220199584961, "learning_rate": 1.0962323651452282e-05, "loss": 0.3022, "step": 27241 }, { "epoch": 22.60746887966805, "grad_norm": 24.602359771728516, "learning_rate": 1.0961991701244814e-05, "loss": 0.4186, "step": 27242 }, { "epoch": 22.608298755186723, "grad_norm": 25.5273380279541, "learning_rate": 1.0961659751037346e-05, "loss": 0.2944, "step": 27243 }, { "epoch": 22.609128630705396, "grad_norm": 105.42144012451172, "learning_rate": 1.0961327800829877e-05, "loss": 0.6807, "step": 27244 }, { "epoch": 22.609958506224068, "grad_norm": 67.5830078125, "learning_rate": 1.0960995850622407e-05, "loss": 0.3502, "step": 27245 }, { "epoch": 22.61078838174274, "grad_norm": 16.968338012695312, "learning_rate": 1.096066390041494e-05, "loss": 0.4056, "step": 27246 }, { "epoch": 22.611618257261412, "grad_norm": 68.83295440673828, "learning_rate": 1.096033195020747e-05, "loss": 0.9102, "step": 27247 }, { "epoch": 22.612448132780084, "grad_norm": 49.522972106933594, "learning_rate": 1.0960000000000002e-05, "loss": 0.5998, "step": 27248 }, { "epoch": 22.613278008298757, "grad_norm": 80.71790313720703, "learning_rate": 1.095966804979253e-05, "loss": 0.6813, "step": 27249 }, { "epoch": 22.61410788381743, "grad_norm": 22.62244987487793, "learning_rate": 1.0959336099585063e-05, "loss": 0.3281, "step": 27250 }, { "epoch": 22.6149377593361, "grad_norm": 21.09403419494629, "learning_rate": 1.0959004149377595e-05, "loss": 0.3154, "step": 27251 }, { "epoch": 22.615767634854773, "grad_norm": 18.296815872192383, "learning_rate": 1.0958672199170127e-05, "loss": 0.3023, "step": 27252 }, { "epoch": 22.616597510373445, "grad_norm": 35.82799530029297, "learning_rate": 1.0958340248962656e-05, "loss": 0.2985, "step": 27253 }, { "epoch": 22.617427385892118, "grad_norm": 94.80946350097656, "learning_rate": 1.0958008298755188e-05, "loss": 0.3625, "step": 27254 }, { "epoch": 22.61825726141079, "grad_norm": 37.81945037841797, "learning_rate": 1.0957676348547718e-05, "loss": 0.8389, "step": 27255 }, { "epoch": 22.619087136929462, "grad_norm": 19.74094581604004, "learning_rate": 1.095734439834025e-05, "loss": 0.3067, "step": 27256 }, { "epoch": 22.619917012448134, "grad_norm": 25.887086868286133, "learning_rate": 1.095701244813278e-05, "loss": 0.3717, "step": 27257 }, { "epoch": 22.620746887966806, "grad_norm": 17.02127456665039, "learning_rate": 1.0956680497925311e-05, "loss": 0.236, "step": 27258 }, { "epoch": 22.62157676348548, "grad_norm": 15.122154235839844, "learning_rate": 1.0956348547717843e-05, "loss": 0.3598, "step": 27259 }, { "epoch": 22.62240663900415, "grad_norm": 66.53215026855469, "learning_rate": 1.0956016597510375e-05, "loss": 0.7161, "step": 27260 }, { "epoch": 22.623236514522823, "grad_norm": 14.893364906311035, "learning_rate": 1.0955684647302904e-05, "loss": 0.3622, "step": 27261 }, { "epoch": 22.624066390041495, "grad_norm": 17.955913543701172, "learning_rate": 1.0955352697095436e-05, "loss": 0.4202, "step": 27262 }, { "epoch": 22.624896265560167, "grad_norm": 35.264591217041016, "learning_rate": 1.0955020746887968e-05, "loss": 0.6024, "step": 27263 }, { "epoch": 22.62572614107884, "grad_norm": 30.699987411499023, "learning_rate": 1.0954688796680499e-05, "loss": 0.3167, "step": 27264 }, { "epoch": 22.62655601659751, "grad_norm": 41.16413497924805, "learning_rate": 1.095435684647303e-05, "loss": 0.3143, "step": 27265 }, { "epoch": 22.627385892116184, "grad_norm": 18.54714584350586, "learning_rate": 1.0954024896265561e-05, "loss": 0.2919, "step": 27266 }, { "epoch": 22.628215767634856, "grad_norm": 55.244808197021484, "learning_rate": 1.0953692946058092e-05, "loss": 0.7043, "step": 27267 }, { "epoch": 22.62904564315353, "grad_norm": 41.869869232177734, "learning_rate": 1.0953360995850624e-05, "loss": 0.4852, "step": 27268 }, { "epoch": 22.6298755186722, "grad_norm": 21.098209381103516, "learning_rate": 1.0953029045643156e-05, "loss": 0.3537, "step": 27269 }, { "epoch": 22.630705394190873, "grad_norm": 92.97581481933594, "learning_rate": 1.0952697095435684e-05, "loss": 0.6402, "step": 27270 }, { "epoch": 22.631535269709545, "grad_norm": 39.24649429321289, "learning_rate": 1.0952365145228217e-05, "loss": 0.8208, "step": 27271 }, { "epoch": 22.632365145228217, "grad_norm": 46.35082244873047, "learning_rate": 1.0952033195020749e-05, "loss": 0.7242, "step": 27272 }, { "epoch": 22.63319502074689, "grad_norm": 35.286537170410156, "learning_rate": 1.0951701244813279e-05, "loss": 0.427, "step": 27273 }, { "epoch": 22.63402489626556, "grad_norm": 36.54173278808594, "learning_rate": 1.095136929460581e-05, "loss": 0.495, "step": 27274 }, { "epoch": 22.634854771784234, "grad_norm": 28.293800354003906, "learning_rate": 1.0951037344398342e-05, "loss": 0.5143, "step": 27275 }, { "epoch": 22.635684647302906, "grad_norm": 78.2873306274414, "learning_rate": 1.0950705394190872e-05, "loss": 0.621, "step": 27276 }, { "epoch": 22.636514522821578, "grad_norm": 66.5467758178711, "learning_rate": 1.0950373443983404e-05, "loss": 0.9488, "step": 27277 }, { "epoch": 22.63734439834025, "grad_norm": 28.841123580932617, "learning_rate": 1.0950041493775933e-05, "loss": 0.594, "step": 27278 }, { "epoch": 22.638174273858922, "grad_norm": 31.693645477294922, "learning_rate": 1.0949709543568465e-05, "loss": 0.7458, "step": 27279 }, { "epoch": 22.639004149377595, "grad_norm": 56.517520904541016, "learning_rate": 1.0949377593360997e-05, "loss": 0.4394, "step": 27280 }, { "epoch": 22.639834024896267, "grad_norm": 24.031009674072266, "learning_rate": 1.094904564315353e-05, "loss": 0.4019, "step": 27281 }, { "epoch": 22.64066390041494, "grad_norm": 54.12074279785156, "learning_rate": 1.0948713692946058e-05, "loss": 0.7058, "step": 27282 }, { "epoch": 22.64149377593361, "grad_norm": 24.00156021118164, "learning_rate": 1.094838174273859e-05, "loss": 0.3969, "step": 27283 }, { "epoch": 22.642323651452283, "grad_norm": 11.553816795349121, "learning_rate": 1.0948049792531122e-05, "loss": 0.2287, "step": 27284 }, { "epoch": 22.643153526970956, "grad_norm": 102.1556167602539, "learning_rate": 1.0947717842323653e-05, "loss": 0.4095, "step": 27285 }, { "epoch": 22.643983402489628, "grad_norm": 52.32367706298828, "learning_rate": 1.0947385892116183e-05, "loss": 0.9828, "step": 27286 }, { "epoch": 22.6448132780083, "grad_norm": 33.9377326965332, "learning_rate": 1.0947053941908713e-05, "loss": 0.6296, "step": 27287 }, { "epoch": 22.645643153526972, "grad_norm": 54.85330581665039, "learning_rate": 1.0946721991701245e-05, "loss": 0.4216, "step": 27288 }, { "epoch": 22.646473029045644, "grad_norm": 33.344337463378906, "learning_rate": 1.0946390041493778e-05, "loss": 0.3459, "step": 27289 }, { "epoch": 22.647302904564317, "grad_norm": 34.81095504760742, "learning_rate": 1.094605809128631e-05, "loss": 0.469, "step": 27290 }, { "epoch": 22.64813278008299, "grad_norm": 24.574167251586914, "learning_rate": 1.0945726141078838e-05, "loss": 0.3877, "step": 27291 }, { "epoch": 22.64896265560166, "grad_norm": 28.286266326904297, "learning_rate": 1.094539419087137e-05, "loss": 0.4387, "step": 27292 }, { "epoch": 22.649792531120333, "grad_norm": 41.9683723449707, "learning_rate": 1.0945062240663903e-05, "loss": 0.2698, "step": 27293 }, { "epoch": 22.650622406639005, "grad_norm": 75.75882720947266, "learning_rate": 1.0944730290456433e-05, "loss": 0.6917, "step": 27294 }, { "epoch": 22.651452282157678, "grad_norm": 26.74294662475586, "learning_rate": 1.0944398340248963e-05, "loss": 0.6024, "step": 27295 }, { "epoch": 22.65228215767635, "grad_norm": 49.539615631103516, "learning_rate": 1.0944066390041494e-05, "loss": 0.2392, "step": 27296 }, { "epoch": 22.653112033195022, "grad_norm": 35.972686767578125, "learning_rate": 1.0943734439834026e-05, "loss": 0.4271, "step": 27297 }, { "epoch": 22.653941908713694, "grad_norm": 49.866512298583984, "learning_rate": 1.0943402489626558e-05, "loss": 0.4137, "step": 27298 }, { "epoch": 22.654771784232366, "grad_norm": 17.16720962524414, "learning_rate": 1.0943070539419087e-05, "loss": 0.2926, "step": 27299 }, { "epoch": 22.65560165975104, "grad_norm": 48.95500946044922, "learning_rate": 1.0942738589211619e-05, "loss": 0.3166, "step": 27300 }, { "epoch": 22.65643153526971, "grad_norm": 61.80128860473633, "learning_rate": 1.0942406639004151e-05, "loss": 0.607, "step": 27301 }, { "epoch": 22.657261410788383, "grad_norm": 57.90021514892578, "learning_rate": 1.0942074688796681e-05, "loss": 0.3572, "step": 27302 }, { "epoch": 22.658091286307055, "grad_norm": 40.89360809326172, "learning_rate": 1.0941742738589212e-05, "loss": 0.656, "step": 27303 }, { "epoch": 22.658921161825727, "grad_norm": 84.79811096191406, "learning_rate": 1.0941410788381744e-05, "loss": 0.6623, "step": 27304 }, { "epoch": 22.6597510373444, "grad_norm": 132.5213165283203, "learning_rate": 1.0941078838174274e-05, "loss": 0.5566, "step": 27305 }, { "epoch": 22.66058091286307, "grad_norm": 32.53748321533203, "learning_rate": 1.0940746887966806e-05, "loss": 0.4147, "step": 27306 }, { "epoch": 22.661410788381744, "grad_norm": 103.51455688476562, "learning_rate": 1.0940414937759337e-05, "loss": 0.7831, "step": 27307 }, { "epoch": 22.662240663900416, "grad_norm": 35.59442901611328, "learning_rate": 1.0940082987551867e-05, "loss": 0.2907, "step": 27308 }, { "epoch": 22.66307053941909, "grad_norm": 10.004154205322266, "learning_rate": 1.09397510373444e-05, "loss": 0.288, "step": 27309 }, { "epoch": 22.66390041493776, "grad_norm": 37.63007354736328, "learning_rate": 1.0939419087136931e-05, "loss": 0.6963, "step": 27310 }, { "epoch": 22.664730290456433, "grad_norm": 29.43246078491211, "learning_rate": 1.093908713692946e-05, "loss": 0.3209, "step": 27311 }, { "epoch": 22.665560165975105, "grad_norm": 28.595951080322266, "learning_rate": 1.0938755186721992e-05, "loss": 0.4543, "step": 27312 }, { "epoch": 22.666390041493777, "grad_norm": 35.534542083740234, "learning_rate": 1.0938423236514524e-05, "loss": 1.0451, "step": 27313 }, { "epoch": 22.66721991701245, "grad_norm": 76.54388427734375, "learning_rate": 1.0938091286307055e-05, "loss": 0.7253, "step": 27314 }, { "epoch": 22.66804979253112, "grad_norm": 30.235260009765625, "learning_rate": 1.0937759336099587e-05, "loss": 0.4173, "step": 27315 }, { "epoch": 22.668879668049794, "grad_norm": 83.66963195800781, "learning_rate": 1.0937427385892117e-05, "loss": 0.3908, "step": 27316 }, { "epoch": 22.669709543568466, "grad_norm": 53.016963958740234, "learning_rate": 1.0937095435684648e-05, "loss": 0.5816, "step": 27317 }, { "epoch": 22.670539419087138, "grad_norm": 49.76845932006836, "learning_rate": 1.093676348547718e-05, "loss": 0.7505, "step": 27318 }, { "epoch": 22.67136929460581, "grad_norm": 20.8433780670166, "learning_rate": 1.0936431535269712e-05, "loss": 0.4033, "step": 27319 }, { "epoch": 22.672199170124482, "grad_norm": 106.99980163574219, "learning_rate": 1.093609958506224e-05, "loss": 0.8408, "step": 27320 }, { "epoch": 22.673029045643155, "grad_norm": 220.35763549804688, "learning_rate": 1.0935767634854773e-05, "loss": 0.9843, "step": 27321 }, { "epoch": 22.673858921161827, "grad_norm": 13.972627639770508, "learning_rate": 1.0935435684647305e-05, "loss": 0.2669, "step": 27322 }, { "epoch": 22.6746887966805, "grad_norm": 95.87451934814453, "learning_rate": 1.0935103734439835e-05, "loss": 1.0753, "step": 27323 }, { "epoch": 22.67551867219917, "grad_norm": 60.0553092956543, "learning_rate": 1.0934771784232366e-05, "loss": 0.8664, "step": 27324 }, { "epoch": 22.676348547717843, "grad_norm": 61.2842903137207, "learning_rate": 1.0934439834024896e-05, "loss": 0.9372, "step": 27325 }, { "epoch": 22.677178423236516, "grad_norm": 43.017967224121094, "learning_rate": 1.0934107883817428e-05, "loss": 1.0062, "step": 27326 }, { "epoch": 22.678008298755188, "grad_norm": 40.19314193725586, "learning_rate": 1.093377593360996e-05, "loss": 0.7067, "step": 27327 }, { "epoch": 22.67883817427386, "grad_norm": 25.13450813293457, "learning_rate": 1.0933443983402489e-05, "loss": 0.3431, "step": 27328 }, { "epoch": 22.679668049792532, "grad_norm": 24.497419357299805, "learning_rate": 1.0933112033195021e-05, "loss": 0.2903, "step": 27329 }, { "epoch": 22.680497925311204, "grad_norm": 42.81596755981445, "learning_rate": 1.0932780082987553e-05, "loss": 0.9391, "step": 27330 }, { "epoch": 22.681327800829877, "grad_norm": 37.06427001953125, "learning_rate": 1.0932448132780085e-05, "loss": 0.7931, "step": 27331 }, { "epoch": 22.68215767634855, "grad_norm": 60.52860641479492, "learning_rate": 1.0932116182572614e-05, "loss": 0.5311, "step": 27332 }, { "epoch": 22.68298755186722, "grad_norm": 30.438753128051758, "learning_rate": 1.0931784232365146e-05, "loss": 0.4655, "step": 27333 }, { "epoch": 22.683817427385893, "grad_norm": 28.534900665283203, "learning_rate": 1.0931452282157677e-05, "loss": 0.3035, "step": 27334 }, { "epoch": 22.684647302904565, "grad_norm": 166.41758728027344, "learning_rate": 1.0931120331950209e-05, "loss": 0.7037, "step": 27335 }, { "epoch": 22.685477178423238, "grad_norm": 22.177885055541992, "learning_rate": 1.093078838174274e-05, "loss": 0.4021, "step": 27336 }, { "epoch": 22.68630705394191, "grad_norm": 18.509387969970703, "learning_rate": 1.093045643153527e-05, "loss": 0.2579, "step": 27337 }, { "epoch": 22.687136929460582, "grad_norm": 31.14584732055664, "learning_rate": 1.0930124481327802e-05, "loss": 0.5914, "step": 27338 }, { "epoch": 22.687966804979254, "grad_norm": 37.87395477294922, "learning_rate": 1.0929792531120334e-05, "loss": 0.5459, "step": 27339 }, { "epoch": 22.688796680497926, "grad_norm": 125.76714324951172, "learning_rate": 1.0929460580912863e-05, "loss": 0.4043, "step": 27340 }, { "epoch": 22.6896265560166, "grad_norm": 26.870742797851562, "learning_rate": 1.0929128630705395e-05, "loss": 0.4893, "step": 27341 }, { "epoch": 22.69045643153527, "grad_norm": 53.298248291015625, "learning_rate": 1.0928796680497927e-05, "loss": 0.3556, "step": 27342 }, { "epoch": 22.691286307053943, "grad_norm": 32.80570602416992, "learning_rate": 1.0928464730290457e-05, "loss": 0.4718, "step": 27343 }, { "epoch": 22.692116182572615, "grad_norm": 41.44635009765625, "learning_rate": 1.092813278008299e-05, "loss": 0.404, "step": 27344 }, { "epoch": 22.692946058091287, "grad_norm": 54.40409469604492, "learning_rate": 1.092780082987552e-05, "loss": 0.7044, "step": 27345 }, { "epoch": 22.69377593360996, "grad_norm": 70.12017822265625, "learning_rate": 1.092746887966805e-05, "loss": 0.6219, "step": 27346 }, { "epoch": 22.694605809128632, "grad_norm": 45.760013580322266, "learning_rate": 1.0927136929460582e-05, "loss": 0.4956, "step": 27347 }, { "epoch": 22.695435684647304, "grad_norm": 34.63163375854492, "learning_rate": 1.0926804979253114e-05, "loss": 0.3192, "step": 27348 }, { "epoch": 22.696265560165976, "grad_norm": 28.908039093017578, "learning_rate": 1.0926473029045643e-05, "loss": 0.546, "step": 27349 }, { "epoch": 22.69709543568465, "grad_norm": 91.7754898071289, "learning_rate": 1.0926141078838175e-05, "loss": 0.607, "step": 27350 }, { "epoch": 22.69792531120332, "grad_norm": 38.67006301879883, "learning_rate": 1.0925809128630707e-05, "loss": 0.3869, "step": 27351 }, { "epoch": 22.698755186721993, "grad_norm": 56.73286056518555, "learning_rate": 1.0925477178423238e-05, "loss": 0.4235, "step": 27352 }, { "epoch": 22.699585062240665, "grad_norm": 27.15918731689453, "learning_rate": 1.0925145228215768e-05, "loss": 0.4303, "step": 27353 }, { "epoch": 22.700414937759337, "grad_norm": 50.18073272705078, "learning_rate": 1.09248132780083e-05, "loss": 1.2629, "step": 27354 }, { "epoch": 22.70124481327801, "grad_norm": 13.833874702453613, "learning_rate": 1.092448132780083e-05, "loss": 0.2044, "step": 27355 }, { "epoch": 22.70207468879668, "grad_norm": 18.561378479003906, "learning_rate": 1.0924149377593363e-05, "loss": 0.3485, "step": 27356 }, { "epoch": 22.702904564315354, "grad_norm": 51.37594985961914, "learning_rate": 1.0923817427385891e-05, "loss": 0.3979, "step": 27357 }, { "epoch": 22.703734439834026, "grad_norm": 29.12676239013672, "learning_rate": 1.0923485477178424e-05, "loss": 0.4618, "step": 27358 }, { "epoch": 22.704564315352698, "grad_norm": 18.366640090942383, "learning_rate": 1.0923153526970956e-05, "loss": 0.2719, "step": 27359 }, { "epoch": 22.70539419087137, "grad_norm": 60.22834396362305, "learning_rate": 1.0922821576763488e-05, "loss": 0.5215, "step": 27360 }, { "epoch": 22.706224066390043, "grad_norm": 30.17478370666504, "learning_rate": 1.0922489626556016e-05, "loss": 0.5807, "step": 27361 }, { "epoch": 22.707053941908715, "grad_norm": 52.696475982666016, "learning_rate": 1.0922157676348549e-05, "loss": 1.0651, "step": 27362 }, { "epoch": 22.707883817427387, "grad_norm": 36.43706512451172, "learning_rate": 1.092182572614108e-05, "loss": 0.4738, "step": 27363 }, { "epoch": 22.70871369294606, "grad_norm": 48.954994201660156, "learning_rate": 1.0921493775933611e-05, "loss": 0.7766, "step": 27364 }, { "epoch": 22.70954356846473, "grad_norm": 100.83035278320312, "learning_rate": 1.0921161825726141e-05, "loss": 0.3308, "step": 27365 }, { "epoch": 22.710373443983404, "grad_norm": 119.70349884033203, "learning_rate": 1.0920829875518672e-05, "loss": 0.5962, "step": 27366 }, { "epoch": 22.711203319502076, "grad_norm": 25.132461547851562, "learning_rate": 1.0920497925311204e-05, "loss": 0.3658, "step": 27367 }, { "epoch": 22.712033195020748, "grad_norm": 78.621826171875, "learning_rate": 1.0920165975103736e-05, "loss": 0.8164, "step": 27368 }, { "epoch": 22.71286307053942, "grad_norm": 24.970657348632812, "learning_rate": 1.0919834024896268e-05, "loss": 0.3468, "step": 27369 }, { "epoch": 22.713692946058092, "grad_norm": 39.37042236328125, "learning_rate": 1.0919502074688797e-05, "loss": 0.6359, "step": 27370 }, { "epoch": 22.714522821576764, "grad_norm": 43.303955078125, "learning_rate": 1.0919170124481329e-05, "loss": 0.6335, "step": 27371 }, { "epoch": 22.715352697095437, "grad_norm": 7.151737213134766, "learning_rate": 1.091883817427386e-05, "loss": 0.2184, "step": 27372 }, { "epoch": 22.71618257261411, "grad_norm": 23.45165252685547, "learning_rate": 1.0918506224066392e-05, "loss": 0.261, "step": 27373 }, { "epoch": 22.71701244813278, "grad_norm": 20.762813568115234, "learning_rate": 1.0918174273858922e-05, "loss": 0.3326, "step": 27374 }, { "epoch": 22.717842323651453, "grad_norm": 21.315317153930664, "learning_rate": 1.0917842323651452e-05, "loss": 0.2997, "step": 27375 }, { "epoch": 22.718672199170125, "grad_norm": 18.102834701538086, "learning_rate": 1.0917510373443985e-05, "loss": 0.3268, "step": 27376 }, { "epoch": 22.719502074688798, "grad_norm": 20.211986541748047, "learning_rate": 1.0917178423236517e-05, "loss": 0.3555, "step": 27377 }, { "epoch": 22.72033195020747, "grad_norm": 23.100936889648438, "learning_rate": 1.0916846473029045e-05, "loss": 0.2731, "step": 27378 }, { "epoch": 22.721161825726142, "grad_norm": 81.869140625, "learning_rate": 1.0916514522821577e-05, "loss": 0.6003, "step": 27379 }, { "epoch": 22.721991701244814, "grad_norm": 30.02184295654297, "learning_rate": 1.091618257261411e-05, "loss": 0.5179, "step": 27380 }, { "epoch": 22.722821576763486, "grad_norm": 43.48445510864258, "learning_rate": 1.091585062240664e-05, "loss": 0.8814, "step": 27381 }, { "epoch": 22.72365145228216, "grad_norm": 123.77729797363281, "learning_rate": 1.091551867219917e-05, "loss": 0.6826, "step": 27382 }, { "epoch": 22.72448132780083, "grad_norm": 46.597415924072266, "learning_rate": 1.0915186721991702e-05, "loss": 0.5953, "step": 27383 }, { "epoch": 22.725311203319503, "grad_norm": 7.646053791046143, "learning_rate": 1.0914854771784233e-05, "loss": 0.2367, "step": 27384 }, { "epoch": 22.726141078838175, "grad_norm": 15.529035568237305, "learning_rate": 1.0914522821576765e-05, "loss": 0.2877, "step": 27385 }, { "epoch": 22.726970954356847, "grad_norm": 59.88909149169922, "learning_rate": 1.0914190871369294e-05, "loss": 0.3524, "step": 27386 }, { "epoch": 22.72780082987552, "grad_norm": 88.72749328613281, "learning_rate": 1.0913858921161826e-05, "loss": 0.5048, "step": 27387 }, { "epoch": 22.728630705394192, "grad_norm": 48.94685363769531, "learning_rate": 1.0913526970954358e-05, "loss": 0.4412, "step": 27388 }, { "epoch": 22.729460580912864, "grad_norm": 16.202116012573242, "learning_rate": 1.091319502074689e-05, "loss": 0.4257, "step": 27389 }, { "epoch": 22.730290456431536, "grad_norm": 19.31667137145996, "learning_rate": 1.0912863070539419e-05, "loss": 0.3165, "step": 27390 }, { "epoch": 22.73112033195021, "grad_norm": 116.04334259033203, "learning_rate": 1.0912531120331951e-05, "loss": 0.6299, "step": 27391 }, { "epoch": 22.73195020746888, "grad_norm": 24.21115493774414, "learning_rate": 1.0912199170124483e-05, "loss": 0.4844, "step": 27392 }, { "epoch": 22.732780082987553, "grad_norm": 53.953956604003906, "learning_rate": 1.0911867219917013e-05, "loss": 0.6697, "step": 27393 }, { "epoch": 22.733609958506225, "grad_norm": 62.91774368286133, "learning_rate": 1.0911535269709544e-05, "loss": 0.621, "step": 27394 }, { "epoch": 22.734439834024897, "grad_norm": 61.93149185180664, "learning_rate": 1.0911203319502074e-05, "loss": 0.7515, "step": 27395 }, { "epoch": 22.73526970954357, "grad_norm": 33.69450378417969, "learning_rate": 1.0910871369294606e-05, "loss": 1.055, "step": 27396 }, { "epoch": 22.73609958506224, "grad_norm": 58.12762451171875, "learning_rate": 1.0910539419087138e-05, "loss": 0.3892, "step": 27397 }, { "epoch": 22.736929460580914, "grad_norm": 77.5643539428711, "learning_rate": 1.091020746887967e-05, "loss": 0.4525, "step": 27398 }, { "epoch": 22.737759336099586, "grad_norm": 85.36882781982422, "learning_rate": 1.09098755186722e-05, "loss": 0.5756, "step": 27399 }, { "epoch": 22.738589211618258, "grad_norm": 26.387428283691406, "learning_rate": 1.0909543568464731e-05, "loss": 0.5942, "step": 27400 }, { "epoch": 22.73941908713693, "grad_norm": 43.57817459106445, "learning_rate": 1.0909211618257263e-05, "loss": 0.3356, "step": 27401 }, { "epoch": 22.740248962655603, "grad_norm": 33.91224670410156, "learning_rate": 1.0908879668049794e-05, "loss": 0.6222, "step": 27402 }, { "epoch": 22.741078838174275, "grad_norm": 44.36394119262695, "learning_rate": 1.0908547717842324e-05, "loss": 0.5269, "step": 27403 }, { "epoch": 22.741908713692947, "grad_norm": 136.30088806152344, "learning_rate": 1.0908215767634855e-05, "loss": 0.6948, "step": 27404 }, { "epoch": 22.74273858921162, "grad_norm": 80.82100677490234, "learning_rate": 1.0907883817427387e-05, "loss": 0.5643, "step": 27405 }, { "epoch": 22.74356846473029, "grad_norm": 29.032732009887695, "learning_rate": 1.0907551867219919e-05, "loss": 0.4435, "step": 27406 }, { "epoch": 22.744398340248964, "grad_norm": 56.5186653137207, "learning_rate": 1.0907219917012448e-05, "loss": 0.8003, "step": 27407 }, { "epoch": 22.745228215767636, "grad_norm": 33.71805953979492, "learning_rate": 1.090688796680498e-05, "loss": 0.3785, "step": 27408 }, { "epoch": 22.746058091286308, "grad_norm": 36.0050048828125, "learning_rate": 1.0906556016597512e-05, "loss": 0.5043, "step": 27409 }, { "epoch": 22.74688796680498, "grad_norm": 63.354949951171875, "learning_rate": 1.0906224066390044e-05, "loss": 0.9055, "step": 27410 }, { "epoch": 22.747717842323652, "grad_norm": 22.14582061767578, "learning_rate": 1.0905892116182573e-05, "loss": 0.3871, "step": 27411 }, { "epoch": 22.748547717842325, "grad_norm": 30.74986457824707, "learning_rate": 1.0905560165975105e-05, "loss": 0.5923, "step": 27412 }, { "epoch": 22.749377593360997, "grad_norm": 9.36317253112793, "learning_rate": 1.0905228215767635e-05, "loss": 0.3152, "step": 27413 }, { "epoch": 22.75020746887967, "grad_norm": 146.87428283691406, "learning_rate": 1.0904896265560167e-05, "loss": 0.4274, "step": 27414 }, { "epoch": 22.75103734439834, "grad_norm": 66.96772766113281, "learning_rate": 1.0904564315352698e-05, "loss": 0.4916, "step": 27415 }, { "epoch": 22.751867219917013, "grad_norm": 31.202749252319336, "learning_rate": 1.0904232365145228e-05, "loss": 0.4002, "step": 27416 }, { "epoch": 22.752697095435686, "grad_norm": 69.46781158447266, "learning_rate": 1.090390041493776e-05, "loss": 0.5866, "step": 27417 }, { "epoch": 22.753526970954358, "grad_norm": 8.875670433044434, "learning_rate": 1.0903568464730292e-05, "loss": 0.3078, "step": 27418 }, { "epoch": 22.75435684647303, "grad_norm": 29.13992691040039, "learning_rate": 1.0903236514522821e-05, "loss": 0.9225, "step": 27419 }, { "epoch": 22.755186721991702, "grad_norm": 53.275604248046875, "learning_rate": 1.0902904564315353e-05, "loss": 0.481, "step": 27420 }, { "epoch": 22.756016597510374, "grad_norm": 37.62055206298828, "learning_rate": 1.0902572614107885e-05, "loss": 0.6666, "step": 27421 }, { "epoch": 22.756846473029047, "grad_norm": 20.789440155029297, "learning_rate": 1.0902240663900416e-05, "loss": 0.4166, "step": 27422 }, { "epoch": 22.75767634854772, "grad_norm": 60.9166374206543, "learning_rate": 1.0901908713692948e-05, "loss": 1.1983, "step": 27423 }, { "epoch": 22.75850622406639, "grad_norm": 30.056840896606445, "learning_rate": 1.0901576763485478e-05, "loss": 0.34, "step": 27424 }, { "epoch": 22.759336099585063, "grad_norm": 108.01356506347656, "learning_rate": 1.0901244813278009e-05, "loss": 0.8266, "step": 27425 }, { "epoch": 22.760165975103735, "grad_norm": 17.982141494750977, "learning_rate": 1.090091286307054e-05, "loss": 0.2536, "step": 27426 }, { "epoch": 22.760995850622407, "grad_norm": 85.41487884521484, "learning_rate": 1.0900580912863073e-05, "loss": 0.5278, "step": 27427 }, { "epoch": 22.76182572614108, "grad_norm": 22.977069854736328, "learning_rate": 1.0900248962655602e-05, "loss": 0.2808, "step": 27428 }, { "epoch": 22.762655601659752, "grad_norm": 30.110326766967773, "learning_rate": 1.0899917012448134e-05, "loss": 0.504, "step": 27429 }, { "epoch": 22.763485477178424, "grad_norm": 22.327316284179688, "learning_rate": 1.0899585062240666e-05, "loss": 0.3847, "step": 27430 }, { "epoch": 22.764315352697096, "grad_norm": 70.64862060546875, "learning_rate": 1.0899253112033196e-05, "loss": 0.718, "step": 27431 }, { "epoch": 22.76514522821577, "grad_norm": 27.94772720336914, "learning_rate": 1.0898921161825727e-05, "loss": 0.6554, "step": 27432 }, { "epoch": 22.76597510373444, "grad_norm": 39.59661102294922, "learning_rate": 1.0898589211618257e-05, "loss": 0.4027, "step": 27433 }, { "epoch": 22.766804979253113, "grad_norm": 11.09809684753418, "learning_rate": 1.089825726141079e-05, "loss": 0.3727, "step": 27434 }, { "epoch": 22.767634854771785, "grad_norm": 46.62281036376953, "learning_rate": 1.0897925311203321e-05, "loss": 0.5389, "step": 27435 }, { "epoch": 22.768464730290457, "grad_norm": 42.34102249145508, "learning_rate": 1.089759336099585e-05, "loss": 0.4856, "step": 27436 }, { "epoch": 22.76929460580913, "grad_norm": 58.47883605957031, "learning_rate": 1.0897261410788382e-05, "loss": 0.6264, "step": 27437 }, { "epoch": 22.7701244813278, "grad_norm": 278.18426513671875, "learning_rate": 1.0896929460580914e-05, "loss": 0.8021, "step": 27438 }, { "epoch": 22.770954356846474, "grad_norm": 103.5914535522461, "learning_rate": 1.0896597510373446e-05, "loss": 0.4364, "step": 27439 }, { "epoch": 22.771784232365146, "grad_norm": 67.0809555053711, "learning_rate": 1.0896265560165975e-05, "loss": 0.4314, "step": 27440 }, { "epoch": 22.77261410788382, "grad_norm": 45.54756546020508, "learning_rate": 1.0895933609958507e-05, "loss": 0.4489, "step": 27441 }, { "epoch": 22.77344398340249, "grad_norm": 51.896453857421875, "learning_rate": 1.0895601659751038e-05, "loss": 0.4462, "step": 27442 }, { "epoch": 22.774273858921163, "grad_norm": 84.96440887451172, "learning_rate": 1.089526970954357e-05, "loss": 1.1186, "step": 27443 }, { "epoch": 22.775103734439835, "grad_norm": 41.16475296020508, "learning_rate": 1.08949377593361e-05, "loss": 0.6037, "step": 27444 }, { "epoch": 22.775933609958507, "grad_norm": 45.727882385253906, "learning_rate": 1.089460580912863e-05, "loss": 0.4571, "step": 27445 }, { "epoch": 22.77676348547718, "grad_norm": 34.230079650878906, "learning_rate": 1.0894273858921163e-05, "loss": 0.4595, "step": 27446 }, { "epoch": 22.77759336099585, "grad_norm": 36.48562240600586, "learning_rate": 1.0893941908713695e-05, "loss": 0.488, "step": 27447 }, { "epoch": 22.778423236514524, "grad_norm": 24.705583572387695, "learning_rate": 1.0893609958506227e-05, "loss": 0.3997, "step": 27448 }, { "epoch": 22.779253112033196, "grad_norm": 73.68858337402344, "learning_rate": 1.0893278008298756e-05, "loss": 0.7334, "step": 27449 }, { "epoch": 22.780082987551868, "grad_norm": 27.146453857421875, "learning_rate": 1.0892946058091288e-05, "loss": 0.3359, "step": 27450 }, { "epoch": 22.78091286307054, "grad_norm": 23.44013214111328, "learning_rate": 1.0892614107883818e-05, "loss": 0.4014, "step": 27451 }, { "epoch": 22.781742738589212, "grad_norm": 59.249324798583984, "learning_rate": 1.089228215767635e-05, "loss": 0.5535, "step": 27452 }, { "epoch": 22.782572614107885, "grad_norm": 88.52357482910156, "learning_rate": 1.089195020746888e-05, "loss": 0.4659, "step": 27453 }, { "epoch": 22.783402489626557, "grad_norm": 38.01971435546875, "learning_rate": 1.0891618257261411e-05, "loss": 0.6717, "step": 27454 }, { "epoch": 22.78423236514523, "grad_norm": 9.256851196289062, "learning_rate": 1.0891286307053943e-05, "loss": 0.2125, "step": 27455 }, { "epoch": 22.7850622406639, "grad_norm": 70.7963638305664, "learning_rate": 1.0890954356846475e-05, "loss": 0.6671, "step": 27456 }, { "epoch": 22.785892116182573, "grad_norm": 13.449507713317871, "learning_rate": 1.0890622406639004e-05, "loss": 0.281, "step": 27457 }, { "epoch": 22.786721991701246, "grad_norm": 181.57908630371094, "learning_rate": 1.0890290456431536e-05, "loss": 0.7371, "step": 27458 }, { "epoch": 22.787551867219918, "grad_norm": 26.802898406982422, "learning_rate": 1.0889958506224068e-05, "loss": 0.2721, "step": 27459 }, { "epoch": 22.78838174273859, "grad_norm": 78.6727523803711, "learning_rate": 1.0889626556016599e-05, "loss": 0.9194, "step": 27460 }, { "epoch": 22.789211618257262, "grad_norm": 56.190677642822266, "learning_rate": 1.0889294605809129e-05, "loss": 0.3842, "step": 27461 }, { "epoch": 22.790041493775934, "grad_norm": 16.815343856811523, "learning_rate": 1.0888962655601661e-05, "loss": 0.4042, "step": 27462 }, { "epoch": 22.790871369294607, "grad_norm": 55.42281723022461, "learning_rate": 1.0888630705394191e-05, "loss": 0.4675, "step": 27463 }, { "epoch": 22.79170124481328, "grad_norm": 39.27227783203125, "learning_rate": 1.0888298755186724e-05, "loss": 0.5201, "step": 27464 }, { "epoch": 22.79253112033195, "grad_norm": 40.325927734375, "learning_rate": 1.0887966804979252e-05, "loss": 0.4639, "step": 27465 }, { "epoch": 22.793360995850623, "grad_norm": 24.56337547302246, "learning_rate": 1.0887634854771784e-05, "loss": 0.3099, "step": 27466 }, { "epoch": 22.794190871369295, "grad_norm": 37.22782897949219, "learning_rate": 1.0887302904564317e-05, "loss": 0.3222, "step": 27467 }, { "epoch": 22.795020746887968, "grad_norm": 50.546775817871094, "learning_rate": 1.0886970954356849e-05, "loss": 0.6708, "step": 27468 }, { "epoch": 22.79585062240664, "grad_norm": 34.53950119018555, "learning_rate": 1.0886639004149377e-05, "loss": 0.386, "step": 27469 }, { "epoch": 22.796680497925312, "grad_norm": 70.68292999267578, "learning_rate": 1.088630705394191e-05, "loss": 0.8026, "step": 27470 }, { "epoch": 22.797510373443984, "grad_norm": 82.3423080444336, "learning_rate": 1.0885975103734442e-05, "loss": 0.8443, "step": 27471 }, { "epoch": 22.798340248962656, "grad_norm": 27.971237182617188, "learning_rate": 1.0885643153526972e-05, "loss": 0.3273, "step": 27472 }, { "epoch": 22.79917012448133, "grad_norm": 77.6996841430664, "learning_rate": 1.0885311203319502e-05, "loss": 0.6055, "step": 27473 }, { "epoch": 22.8, "grad_norm": 33.25875473022461, "learning_rate": 1.0884979253112033e-05, "loss": 0.5316, "step": 27474 }, { "epoch": 22.800829875518673, "grad_norm": 122.9992904663086, "learning_rate": 1.0884647302904565e-05, "loss": 0.9034, "step": 27475 }, { "epoch": 22.801659751037345, "grad_norm": 64.01358032226562, "learning_rate": 1.0884315352697097e-05, "loss": 0.6697, "step": 27476 }, { "epoch": 22.802489626556017, "grad_norm": 53.936378479003906, "learning_rate": 1.0883983402489629e-05, "loss": 0.499, "step": 27477 }, { "epoch": 22.80331950207469, "grad_norm": 71.11842346191406, "learning_rate": 1.0883651452282158e-05, "loss": 0.4069, "step": 27478 }, { "epoch": 22.80414937759336, "grad_norm": 61.458473205566406, "learning_rate": 1.088331950207469e-05, "loss": 0.5633, "step": 27479 }, { "epoch": 22.804979253112034, "grad_norm": 24.29892921447754, "learning_rate": 1.0882987551867222e-05, "loss": 0.3798, "step": 27480 }, { "epoch": 22.805809128630706, "grad_norm": 97.76576232910156, "learning_rate": 1.0882655601659752e-05, "loss": 0.5195, "step": 27481 }, { "epoch": 22.80663900414938, "grad_norm": 17.93785858154297, "learning_rate": 1.0882323651452283e-05, "loss": 0.2821, "step": 27482 }, { "epoch": 22.80746887966805, "grad_norm": 16.807886123657227, "learning_rate": 1.0881991701244813e-05, "loss": 0.3339, "step": 27483 }, { "epoch": 22.808298755186723, "grad_norm": 57.04560089111328, "learning_rate": 1.0881659751037345e-05, "loss": 0.4295, "step": 27484 }, { "epoch": 22.809128630705395, "grad_norm": 35.87451171875, "learning_rate": 1.0881327800829878e-05, "loss": 0.8536, "step": 27485 }, { "epoch": 22.809958506224067, "grad_norm": 95.24270629882812, "learning_rate": 1.0880995850622406e-05, "loss": 0.5845, "step": 27486 }, { "epoch": 22.81078838174274, "grad_norm": 44.04648971557617, "learning_rate": 1.0880663900414938e-05, "loss": 0.3378, "step": 27487 }, { "epoch": 22.81161825726141, "grad_norm": 33.0692024230957, "learning_rate": 1.088033195020747e-05, "loss": 0.3103, "step": 27488 }, { "epoch": 22.812448132780084, "grad_norm": 86.0978775024414, "learning_rate": 1.0880000000000001e-05, "loss": 0.5751, "step": 27489 }, { "epoch": 22.813278008298756, "grad_norm": 32.418800354003906, "learning_rate": 1.0879668049792531e-05, "loss": 0.4777, "step": 27490 }, { "epoch": 22.814107883817428, "grad_norm": 49.78883743286133, "learning_rate": 1.0879336099585063e-05, "loss": 0.478, "step": 27491 }, { "epoch": 22.8149377593361, "grad_norm": 20.21338653564453, "learning_rate": 1.0879004149377594e-05, "loss": 0.2961, "step": 27492 }, { "epoch": 22.815767634854772, "grad_norm": 13.665745735168457, "learning_rate": 1.0878672199170126e-05, "loss": 0.2412, "step": 27493 }, { "epoch": 22.816597510373445, "grad_norm": 37.88259506225586, "learning_rate": 1.0878340248962656e-05, "loss": 0.4731, "step": 27494 }, { "epoch": 22.817427385892117, "grad_norm": 67.56553649902344, "learning_rate": 1.0878008298755187e-05, "loss": 0.6784, "step": 27495 }, { "epoch": 22.81825726141079, "grad_norm": 88.62098693847656, "learning_rate": 1.0877676348547719e-05, "loss": 0.6011, "step": 27496 }, { "epoch": 22.81908713692946, "grad_norm": 51.25907897949219, "learning_rate": 1.0877344398340251e-05, "loss": 0.6047, "step": 27497 }, { "epoch": 22.819917012448133, "grad_norm": 135.29859924316406, "learning_rate": 1.087701244813278e-05, "loss": 0.4343, "step": 27498 }, { "epoch": 22.820746887966806, "grad_norm": 45.811317443847656, "learning_rate": 1.0876680497925312e-05, "loss": 0.6561, "step": 27499 }, { "epoch": 22.821576763485478, "grad_norm": 20.47279930114746, "learning_rate": 1.0876348547717844e-05, "loss": 0.2515, "step": 27500 }, { "epoch": 22.82240663900415, "grad_norm": 62.20319747924805, "learning_rate": 1.0876016597510374e-05, "loss": 0.7535, "step": 27501 }, { "epoch": 22.823236514522822, "grad_norm": 51.93734359741211, "learning_rate": 1.0875684647302906e-05, "loss": 0.5071, "step": 27502 }, { "epoch": 22.824066390041494, "grad_norm": 19.8725643157959, "learning_rate": 1.0875352697095435e-05, "loss": 0.3417, "step": 27503 }, { "epoch": 22.824896265560167, "grad_norm": 46.77581787109375, "learning_rate": 1.0875020746887967e-05, "loss": 0.3974, "step": 27504 }, { "epoch": 22.82572614107884, "grad_norm": 45.3446044921875, "learning_rate": 1.08746887966805e-05, "loss": 0.4202, "step": 27505 }, { "epoch": 22.82655601659751, "grad_norm": 12.993513107299805, "learning_rate": 1.0874356846473031e-05, "loss": 0.298, "step": 27506 }, { "epoch": 22.827385892116183, "grad_norm": 22.932952880859375, "learning_rate": 1.087402489626556e-05, "loss": 0.2811, "step": 27507 }, { "epoch": 22.828215767634855, "grad_norm": 36.54844284057617, "learning_rate": 1.0873692946058092e-05, "loss": 0.4438, "step": 27508 }, { "epoch": 22.829045643153528, "grad_norm": 104.60520935058594, "learning_rate": 1.0873360995850624e-05, "loss": 0.7259, "step": 27509 }, { "epoch": 22.8298755186722, "grad_norm": 25.52685546875, "learning_rate": 1.0873029045643155e-05, "loss": 0.3967, "step": 27510 }, { "epoch": 22.830705394190872, "grad_norm": 37.82320022583008, "learning_rate": 1.0872697095435685e-05, "loss": 0.4882, "step": 27511 }, { "epoch": 22.831535269709544, "grad_norm": 87.20307922363281, "learning_rate": 1.0872365145228216e-05, "loss": 0.8637, "step": 27512 }, { "epoch": 22.832365145228216, "grad_norm": 97.3503646850586, "learning_rate": 1.0872033195020748e-05, "loss": 0.5916, "step": 27513 }, { "epoch": 22.83319502074689, "grad_norm": 42.50737380981445, "learning_rate": 1.087170124481328e-05, "loss": 0.7115, "step": 27514 }, { "epoch": 22.83402489626556, "grad_norm": 67.91722106933594, "learning_rate": 1.0871369294605809e-05, "loss": 0.4005, "step": 27515 }, { "epoch": 22.834854771784233, "grad_norm": 72.0320053100586, "learning_rate": 1.087103734439834e-05, "loss": 0.8958, "step": 27516 }, { "epoch": 22.835684647302905, "grad_norm": 65.38187408447266, "learning_rate": 1.0870705394190873e-05, "loss": 0.5825, "step": 27517 }, { "epoch": 22.836514522821577, "grad_norm": 12.815274238586426, "learning_rate": 1.0870373443983405e-05, "loss": 0.3074, "step": 27518 }, { "epoch": 22.83734439834025, "grad_norm": 12.000838279724121, "learning_rate": 1.0870041493775934e-05, "loss": 0.2866, "step": 27519 }, { "epoch": 22.83817427385892, "grad_norm": 45.31966018676758, "learning_rate": 1.0869709543568466e-05, "loss": 0.5824, "step": 27520 }, { "epoch": 22.839004149377594, "grad_norm": 42.718482971191406, "learning_rate": 1.0869377593360996e-05, "loss": 0.7751, "step": 27521 }, { "epoch": 22.839834024896266, "grad_norm": 58.433494567871094, "learning_rate": 1.0869045643153528e-05, "loss": 1.2974, "step": 27522 }, { "epoch": 22.84066390041494, "grad_norm": 49.01630401611328, "learning_rate": 1.0868713692946059e-05, "loss": 0.4162, "step": 27523 }, { "epoch": 22.84149377593361, "grad_norm": 34.141029357910156, "learning_rate": 1.0868381742738589e-05, "loss": 0.5227, "step": 27524 }, { "epoch": 22.842323651452283, "grad_norm": 25.86969757080078, "learning_rate": 1.0868049792531121e-05, "loss": 0.3043, "step": 27525 }, { "epoch": 22.843153526970955, "grad_norm": 33.641971588134766, "learning_rate": 1.0867717842323653e-05, "loss": 0.3377, "step": 27526 }, { "epoch": 22.843983402489627, "grad_norm": 25.05722427368164, "learning_rate": 1.0867385892116185e-05, "loss": 0.3442, "step": 27527 }, { "epoch": 22.8448132780083, "grad_norm": 9.833176612854004, "learning_rate": 1.0867053941908714e-05, "loss": 0.2842, "step": 27528 }, { "epoch": 22.84564315352697, "grad_norm": 33.44649887084961, "learning_rate": 1.0866721991701246e-05, "loss": 0.3749, "step": 27529 }, { "epoch": 22.846473029045644, "grad_norm": 42.39020919799805, "learning_rate": 1.0866390041493777e-05, "loss": 0.5141, "step": 27530 }, { "epoch": 22.847302904564316, "grad_norm": 74.77312469482422, "learning_rate": 1.0866058091286309e-05, "loss": 0.4202, "step": 27531 }, { "epoch": 22.848132780082988, "grad_norm": 17.921419143676758, "learning_rate": 1.0865726141078839e-05, "loss": 0.2784, "step": 27532 }, { "epoch": 22.84896265560166, "grad_norm": 35.999332427978516, "learning_rate": 1.086539419087137e-05, "loss": 0.3721, "step": 27533 }, { "epoch": 22.849792531120332, "grad_norm": 16.338075637817383, "learning_rate": 1.0865062240663902e-05, "loss": 0.2692, "step": 27534 }, { "epoch": 22.850622406639005, "grad_norm": 92.68344116210938, "learning_rate": 1.0864730290456434e-05, "loss": 0.9899, "step": 27535 }, { "epoch": 22.851452282157677, "grad_norm": 73.24723052978516, "learning_rate": 1.0864398340248962e-05, "loss": 0.4032, "step": 27536 }, { "epoch": 22.85228215767635, "grad_norm": 12.76147747039795, "learning_rate": 1.0864066390041495e-05, "loss": 0.3359, "step": 27537 }, { "epoch": 22.85311203319502, "grad_norm": 36.82875061035156, "learning_rate": 1.0863734439834027e-05, "loss": 0.7729, "step": 27538 }, { "epoch": 22.853941908713693, "grad_norm": 24.810829162597656, "learning_rate": 1.0863402489626557e-05, "loss": 0.4694, "step": 27539 }, { "epoch": 22.854771784232366, "grad_norm": 46.84334182739258, "learning_rate": 1.0863070539419088e-05, "loss": 0.8976, "step": 27540 }, { "epoch": 22.855601659751038, "grad_norm": 119.86681365966797, "learning_rate": 1.086273858921162e-05, "loss": 0.9377, "step": 27541 }, { "epoch": 22.85643153526971, "grad_norm": 28.107452392578125, "learning_rate": 1.086240663900415e-05, "loss": 0.4922, "step": 27542 }, { "epoch": 22.857261410788382, "grad_norm": 169.62538146972656, "learning_rate": 1.0862074688796682e-05, "loss": 1.098, "step": 27543 }, { "epoch": 22.858091286307054, "grad_norm": 34.8230094909668, "learning_rate": 1.0861742738589211e-05, "loss": 0.3341, "step": 27544 }, { "epoch": 22.858921161825727, "grad_norm": 48.842987060546875, "learning_rate": 1.0861410788381743e-05, "loss": 0.7523, "step": 27545 }, { "epoch": 22.8597510373444, "grad_norm": 19.024978637695312, "learning_rate": 1.0861078838174275e-05, "loss": 0.3521, "step": 27546 }, { "epoch": 22.86058091286307, "grad_norm": 102.1211929321289, "learning_rate": 1.0860746887966807e-05, "loss": 0.5713, "step": 27547 }, { "epoch": 22.861410788381743, "grad_norm": 42.60988235473633, "learning_rate": 1.0860414937759336e-05, "loss": 0.4705, "step": 27548 }, { "epoch": 22.862240663900415, "grad_norm": 61.632686614990234, "learning_rate": 1.0860082987551868e-05, "loss": 0.87, "step": 27549 }, { "epoch": 22.863070539419088, "grad_norm": 22.552845001220703, "learning_rate": 1.0859751037344398e-05, "loss": 0.2802, "step": 27550 }, { "epoch": 22.86390041493776, "grad_norm": 126.84344482421875, "learning_rate": 1.085941908713693e-05, "loss": 0.6349, "step": 27551 }, { "epoch": 22.864730290456432, "grad_norm": 195.5984344482422, "learning_rate": 1.0859087136929461e-05, "loss": 0.7295, "step": 27552 }, { "epoch": 22.865560165975104, "grad_norm": 69.5362548828125, "learning_rate": 1.0858755186721991e-05, "loss": 0.8599, "step": 27553 }, { "epoch": 22.866390041493776, "grad_norm": 39.24658966064453, "learning_rate": 1.0858423236514523e-05, "loss": 0.4605, "step": 27554 }, { "epoch": 22.86721991701245, "grad_norm": 24.868932723999023, "learning_rate": 1.0858091286307056e-05, "loss": 0.3768, "step": 27555 }, { "epoch": 22.86804979253112, "grad_norm": 32.82416534423828, "learning_rate": 1.0857759336099588e-05, "loss": 0.3637, "step": 27556 }, { "epoch": 22.868879668049793, "grad_norm": 50.62226486206055, "learning_rate": 1.0857427385892116e-05, "loss": 0.5365, "step": 27557 }, { "epoch": 22.869709543568465, "grad_norm": 49.27974319458008, "learning_rate": 1.0857095435684649e-05, "loss": 0.7993, "step": 27558 }, { "epoch": 22.870539419087137, "grad_norm": 21.48384666442871, "learning_rate": 1.0856763485477179e-05, "loss": 0.3971, "step": 27559 }, { "epoch": 22.87136929460581, "grad_norm": 20.687007904052734, "learning_rate": 1.0856431535269711e-05, "loss": 0.4199, "step": 27560 }, { "epoch": 22.872199170124482, "grad_norm": 30.621074676513672, "learning_rate": 1.0856099585062241e-05, "loss": 0.5034, "step": 27561 }, { "epoch": 22.873029045643154, "grad_norm": 31.5576229095459, "learning_rate": 1.0855767634854772e-05, "loss": 0.4397, "step": 27562 }, { "epoch": 22.873858921161826, "grad_norm": 150.24061584472656, "learning_rate": 1.0855435684647304e-05, "loss": 0.4348, "step": 27563 }, { "epoch": 22.8746887966805, "grad_norm": 46.91141891479492, "learning_rate": 1.0855103734439836e-05, "loss": 0.4457, "step": 27564 }, { "epoch": 22.87551867219917, "grad_norm": 50.622127532958984, "learning_rate": 1.0854771784232365e-05, "loss": 0.676, "step": 27565 }, { "epoch": 22.876348547717843, "grad_norm": 8.956730842590332, "learning_rate": 1.0854439834024897e-05, "loss": 0.2522, "step": 27566 }, { "epoch": 22.877178423236515, "grad_norm": 46.50922775268555, "learning_rate": 1.0854107883817429e-05, "loss": 0.5531, "step": 27567 }, { "epoch": 22.878008298755187, "grad_norm": 23.182100296020508, "learning_rate": 1.085377593360996e-05, "loss": 0.3452, "step": 27568 }, { "epoch": 22.87883817427386, "grad_norm": 29.382251739501953, "learning_rate": 1.085344398340249e-05, "loss": 0.4329, "step": 27569 }, { "epoch": 22.87966804979253, "grad_norm": 41.87749099731445, "learning_rate": 1.0853112033195022e-05, "loss": 0.5888, "step": 27570 }, { "epoch": 22.880497925311204, "grad_norm": 87.98974609375, "learning_rate": 1.0852780082987552e-05, "loss": 0.5184, "step": 27571 }, { "epoch": 22.881327800829876, "grad_norm": 38.00360870361328, "learning_rate": 1.0852448132780084e-05, "loss": 0.4118, "step": 27572 }, { "epoch": 22.882157676348548, "grad_norm": 26.571659088134766, "learning_rate": 1.0852116182572613e-05, "loss": 0.6165, "step": 27573 }, { "epoch": 22.88298755186722, "grad_norm": 37.83060073852539, "learning_rate": 1.0851784232365145e-05, "loss": 0.7977, "step": 27574 }, { "epoch": 22.883817427385893, "grad_norm": 48.649715423583984, "learning_rate": 1.0851452282157677e-05, "loss": 0.638, "step": 27575 }, { "epoch": 22.884647302904565, "grad_norm": 23.39130401611328, "learning_rate": 1.085112033195021e-05, "loss": 0.3978, "step": 27576 }, { "epoch": 22.885477178423237, "grad_norm": 63.63467788696289, "learning_rate": 1.0850788381742738e-05, "loss": 0.6893, "step": 27577 }, { "epoch": 22.88630705394191, "grad_norm": 68.22511291503906, "learning_rate": 1.085045643153527e-05, "loss": 0.799, "step": 27578 }, { "epoch": 22.88713692946058, "grad_norm": 29.60308074951172, "learning_rate": 1.0850124481327802e-05, "loss": 0.3575, "step": 27579 }, { "epoch": 22.887966804979254, "grad_norm": 51.69557571411133, "learning_rate": 1.0849792531120333e-05, "loss": 0.3661, "step": 27580 }, { "epoch": 22.888796680497926, "grad_norm": 33.0111083984375, "learning_rate": 1.0849460580912865e-05, "loss": 0.3551, "step": 27581 }, { "epoch": 22.889626556016598, "grad_norm": 50.465065002441406, "learning_rate": 1.0849128630705394e-05, "loss": 0.632, "step": 27582 }, { "epoch": 22.89045643153527, "grad_norm": 20.722551345825195, "learning_rate": 1.0848796680497926e-05, "loss": 0.5449, "step": 27583 }, { "epoch": 22.891286307053942, "grad_norm": 22.920713424682617, "learning_rate": 1.0848464730290458e-05, "loss": 0.3257, "step": 27584 }, { "epoch": 22.892116182572614, "grad_norm": 19.5692138671875, "learning_rate": 1.084813278008299e-05, "loss": 0.2245, "step": 27585 }, { "epoch": 22.892946058091287, "grad_norm": 51.84464645385742, "learning_rate": 1.0847800829875519e-05, "loss": 0.5728, "step": 27586 }, { "epoch": 22.89377593360996, "grad_norm": 55.805076599121094, "learning_rate": 1.0847468879668051e-05, "loss": 1.3565, "step": 27587 }, { "epoch": 22.89460580912863, "grad_norm": 24.339101791381836, "learning_rate": 1.0847136929460583e-05, "loss": 0.2898, "step": 27588 }, { "epoch": 22.895435684647303, "grad_norm": 47.76133346557617, "learning_rate": 1.0846804979253113e-05, "loss": 0.4979, "step": 27589 }, { "epoch": 22.896265560165975, "grad_norm": 22.714805603027344, "learning_rate": 1.0846473029045644e-05, "loss": 0.4683, "step": 27590 }, { "epoch": 22.897095435684648, "grad_norm": 55.523250579833984, "learning_rate": 1.0846141078838174e-05, "loss": 1.1388, "step": 27591 }, { "epoch": 22.89792531120332, "grad_norm": 15.375689506530762, "learning_rate": 1.0845809128630706e-05, "loss": 0.2852, "step": 27592 }, { "epoch": 22.898755186721992, "grad_norm": 53.73299789428711, "learning_rate": 1.0845477178423238e-05, "loss": 0.6426, "step": 27593 }, { "epoch": 22.899585062240664, "grad_norm": 85.02761840820312, "learning_rate": 1.0845145228215767e-05, "loss": 0.8931, "step": 27594 }, { "epoch": 22.900414937759336, "grad_norm": 47.96609878540039, "learning_rate": 1.08448132780083e-05, "loss": 0.342, "step": 27595 }, { "epoch": 22.90124481327801, "grad_norm": 34.17170715332031, "learning_rate": 1.0844481327800831e-05, "loss": 0.3412, "step": 27596 }, { "epoch": 22.90207468879668, "grad_norm": 28.27878761291504, "learning_rate": 1.0844149377593363e-05, "loss": 0.2109, "step": 27597 }, { "epoch": 22.902904564315353, "grad_norm": 87.06365203857422, "learning_rate": 1.0843817427385892e-05, "loss": 0.8147, "step": 27598 }, { "epoch": 22.903734439834025, "grad_norm": 32.97849655151367, "learning_rate": 1.0843485477178424e-05, "loss": 0.3674, "step": 27599 }, { "epoch": 22.904564315352697, "grad_norm": 54.83064270019531, "learning_rate": 1.0843153526970955e-05, "loss": 0.7309, "step": 27600 }, { "epoch": 22.90539419087137, "grad_norm": 18.121793746948242, "learning_rate": 1.0842821576763487e-05, "loss": 0.3693, "step": 27601 }, { "epoch": 22.906224066390042, "grad_norm": 23.230228424072266, "learning_rate": 1.0842489626556017e-05, "loss": 0.3311, "step": 27602 }, { "epoch": 22.907053941908714, "grad_norm": 97.52947998046875, "learning_rate": 1.0842157676348548e-05, "loss": 0.9631, "step": 27603 }, { "epoch": 22.907883817427386, "grad_norm": 90.65189361572266, "learning_rate": 1.084182572614108e-05, "loss": 0.3512, "step": 27604 }, { "epoch": 22.90871369294606, "grad_norm": 64.82653045654297, "learning_rate": 1.0841493775933612e-05, "loss": 0.6467, "step": 27605 }, { "epoch": 22.90954356846473, "grad_norm": 39.48921203613281, "learning_rate": 1.0841161825726142e-05, "loss": 0.6258, "step": 27606 }, { "epoch": 22.910373443983403, "grad_norm": 88.02430725097656, "learning_rate": 1.0840829875518673e-05, "loss": 0.5964, "step": 27607 }, { "epoch": 22.911203319502075, "grad_norm": 17.37238121032715, "learning_rate": 1.0840497925311205e-05, "loss": 0.319, "step": 27608 }, { "epoch": 22.912033195020747, "grad_norm": 59.223182678222656, "learning_rate": 1.0840165975103735e-05, "loss": 0.8689, "step": 27609 }, { "epoch": 22.91286307053942, "grad_norm": 87.47801971435547, "learning_rate": 1.0839834024896267e-05, "loss": 0.4176, "step": 27610 }, { "epoch": 22.91369294605809, "grad_norm": 49.800193786621094, "learning_rate": 1.0839502074688798e-05, "loss": 0.5137, "step": 27611 }, { "epoch": 22.914522821576764, "grad_norm": 31.164400100708008, "learning_rate": 1.0839170124481328e-05, "loss": 0.5668, "step": 27612 }, { "epoch": 22.915352697095436, "grad_norm": 60.03696823120117, "learning_rate": 1.083883817427386e-05, "loss": 0.9034, "step": 27613 }, { "epoch": 22.916182572614108, "grad_norm": 33.039573669433594, "learning_rate": 1.0838506224066392e-05, "loss": 0.3396, "step": 27614 }, { "epoch": 22.91701244813278, "grad_norm": 139.68222045898438, "learning_rate": 1.0838174273858921e-05, "loss": 0.3875, "step": 27615 }, { "epoch": 22.917842323651453, "grad_norm": 27.12410545349121, "learning_rate": 1.0837842323651453e-05, "loss": 0.3899, "step": 27616 }, { "epoch": 22.918672199170125, "grad_norm": 30.213058471679688, "learning_rate": 1.0837510373443985e-05, "loss": 0.3454, "step": 27617 }, { "epoch": 22.919502074688797, "grad_norm": 65.13359069824219, "learning_rate": 1.0837178423236516e-05, "loss": 0.6835, "step": 27618 }, { "epoch": 22.92033195020747, "grad_norm": 28.529129028320312, "learning_rate": 1.0836846473029046e-05, "loss": 0.3633, "step": 27619 }, { "epoch": 22.92116182572614, "grad_norm": 24.696443557739258, "learning_rate": 1.0836514522821577e-05, "loss": 0.3738, "step": 27620 }, { "epoch": 22.921991701244814, "grad_norm": 22.862031936645508, "learning_rate": 1.0836182572614109e-05, "loss": 0.3532, "step": 27621 }, { "epoch": 22.922821576763486, "grad_norm": 18.80437660217285, "learning_rate": 1.083585062240664e-05, "loss": 0.2936, "step": 27622 }, { "epoch": 22.923651452282158, "grad_norm": 77.1911392211914, "learning_rate": 1.083551867219917e-05, "loss": 0.411, "step": 27623 }, { "epoch": 22.92448132780083, "grad_norm": 72.2973403930664, "learning_rate": 1.0835186721991702e-05, "loss": 0.6883, "step": 27624 }, { "epoch": 22.925311203319502, "grad_norm": 35.38170623779297, "learning_rate": 1.0834854771784234e-05, "loss": 0.4557, "step": 27625 }, { "epoch": 22.926141078838175, "grad_norm": 25.377212524414062, "learning_rate": 1.0834522821576766e-05, "loss": 0.3819, "step": 27626 }, { "epoch": 22.926970954356847, "grad_norm": 81.11737823486328, "learning_rate": 1.0834190871369294e-05, "loss": 0.8827, "step": 27627 }, { "epoch": 22.92780082987552, "grad_norm": 20.979724884033203, "learning_rate": 1.0833858921161827e-05, "loss": 0.3982, "step": 27628 }, { "epoch": 22.92863070539419, "grad_norm": 37.19240951538086, "learning_rate": 1.0833526970954357e-05, "loss": 0.3852, "step": 27629 }, { "epoch": 22.929460580912863, "grad_norm": 32.284366607666016, "learning_rate": 1.0833195020746889e-05, "loss": 0.3842, "step": 27630 }, { "epoch": 22.930290456431536, "grad_norm": 56.248321533203125, "learning_rate": 1.083286307053942e-05, "loss": 0.3826, "step": 27631 }, { "epoch": 22.931120331950208, "grad_norm": 11.15469741821289, "learning_rate": 1.083253112033195e-05, "loss": 0.3039, "step": 27632 }, { "epoch": 22.93195020746888, "grad_norm": 60.448028564453125, "learning_rate": 1.0832199170124482e-05, "loss": 1.1025, "step": 27633 }, { "epoch": 22.932780082987552, "grad_norm": 42.81474685668945, "learning_rate": 1.0831867219917014e-05, "loss": 0.5038, "step": 27634 }, { "epoch": 22.933609958506224, "grad_norm": 39.47327423095703, "learning_rate": 1.0831535269709546e-05, "loss": 0.3103, "step": 27635 }, { "epoch": 22.934439834024896, "grad_norm": 113.72481536865234, "learning_rate": 1.0831203319502075e-05, "loss": 0.9464, "step": 27636 }, { "epoch": 22.93526970954357, "grad_norm": 51.9305419921875, "learning_rate": 1.0830871369294607e-05, "loss": 0.6258, "step": 27637 }, { "epoch": 22.93609958506224, "grad_norm": 33.79261016845703, "learning_rate": 1.0830539419087138e-05, "loss": 0.3462, "step": 27638 }, { "epoch": 22.936929460580913, "grad_norm": 39.77766418457031, "learning_rate": 1.083020746887967e-05, "loss": 0.3733, "step": 27639 }, { "epoch": 22.937759336099585, "grad_norm": 45.3614501953125, "learning_rate": 1.08298755186722e-05, "loss": 0.3049, "step": 27640 }, { "epoch": 22.938589211618257, "grad_norm": 53.92017364501953, "learning_rate": 1.082954356846473e-05, "loss": 0.4491, "step": 27641 }, { "epoch": 22.93941908713693, "grad_norm": 27.264873504638672, "learning_rate": 1.0829211618257263e-05, "loss": 0.3755, "step": 27642 }, { "epoch": 22.940248962655602, "grad_norm": 16.030916213989258, "learning_rate": 1.0828879668049795e-05, "loss": 0.3324, "step": 27643 }, { "epoch": 22.941078838174274, "grad_norm": 30.042144775390625, "learning_rate": 1.0828547717842323e-05, "loss": 0.6216, "step": 27644 }, { "epoch": 22.941908713692946, "grad_norm": 94.45796966552734, "learning_rate": 1.0828215767634855e-05, "loss": 0.651, "step": 27645 }, { "epoch": 22.94273858921162, "grad_norm": 58.90956497192383, "learning_rate": 1.0827883817427388e-05, "loss": 0.7243, "step": 27646 }, { "epoch": 22.94356846473029, "grad_norm": 59.76679611206055, "learning_rate": 1.0827551867219918e-05, "loss": 0.6432, "step": 27647 }, { "epoch": 22.944398340248963, "grad_norm": 23.090240478515625, "learning_rate": 1.0827219917012448e-05, "loss": 0.3206, "step": 27648 }, { "epoch": 22.945228215767635, "grad_norm": 81.47774505615234, "learning_rate": 1.082688796680498e-05, "loss": 0.4732, "step": 27649 }, { "epoch": 22.946058091286307, "grad_norm": 19.768173217773438, "learning_rate": 1.0826556016597511e-05, "loss": 0.3513, "step": 27650 }, { "epoch": 22.94688796680498, "grad_norm": 49.144439697265625, "learning_rate": 1.0826224066390043e-05, "loss": 0.4472, "step": 27651 }, { "epoch": 22.94771784232365, "grad_norm": 38.88420486450195, "learning_rate": 1.0825892116182572e-05, "loss": 0.3129, "step": 27652 }, { "epoch": 22.948547717842324, "grad_norm": 37.46050262451172, "learning_rate": 1.0825560165975104e-05, "loss": 0.6419, "step": 27653 }, { "epoch": 22.949377593360996, "grad_norm": 84.9189453125, "learning_rate": 1.0825228215767636e-05, "loss": 0.9571, "step": 27654 }, { "epoch": 22.95020746887967, "grad_norm": 16.47054672241211, "learning_rate": 1.0824896265560168e-05, "loss": 0.2964, "step": 27655 }, { "epoch": 22.95103734439834, "grad_norm": 43.89266586303711, "learning_rate": 1.0824564315352697e-05, "loss": 0.5669, "step": 27656 }, { "epoch": 22.951867219917013, "grad_norm": 52.6298828125, "learning_rate": 1.0824232365145229e-05, "loss": 0.6247, "step": 27657 }, { "epoch": 22.952697095435685, "grad_norm": 83.00016784667969, "learning_rate": 1.0823900414937761e-05, "loss": 0.432, "step": 27658 }, { "epoch": 22.953526970954357, "grad_norm": 46.22380828857422, "learning_rate": 1.0823568464730291e-05, "loss": 0.5994, "step": 27659 }, { "epoch": 22.95435684647303, "grad_norm": 24.806562423706055, "learning_rate": 1.0823236514522824e-05, "loss": 0.4017, "step": 27660 }, { "epoch": 22.9551867219917, "grad_norm": 34.409645080566406, "learning_rate": 1.0822904564315352e-05, "loss": 0.4564, "step": 27661 }, { "epoch": 22.956016597510374, "grad_norm": 26.363679885864258, "learning_rate": 1.0822572614107884e-05, "loss": 0.6953, "step": 27662 }, { "epoch": 22.956846473029046, "grad_norm": 29.099262237548828, "learning_rate": 1.0822240663900416e-05, "loss": 0.4202, "step": 27663 }, { "epoch": 22.957676348547718, "grad_norm": 36.560359954833984, "learning_rate": 1.0821908713692949e-05, "loss": 0.3288, "step": 27664 }, { "epoch": 22.95850622406639, "grad_norm": 60.119991302490234, "learning_rate": 1.0821576763485477e-05, "loss": 0.7056, "step": 27665 }, { "epoch": 22.959336099585062, "grad_norm": 82.9259033203125, "learning_rate": 1.082124481327801e-05, "loss": 0.4893, "step": 27666 }, { "epoch": 22.960165975103735, "grad_norm": 34.81927490234375, "learning_rate": 1.082091286307054e-05, "loss": 0.6893, "step": 27667 }, { "epoch": 22.960995850622407, "grad_norm": 78.26909637451172, "learning_rate": 1.0820580912863072e-05, "loss": 0.4958, "step": 27668 }, { "epoch": 22.96182572614108, "grad_norm": 38.34768295288086, "learning_rate": 1.0820248962655602e-05, "loss": 0.6094, "step": 27669 }, { "epoch": 22.96265560165975, "grad_norm": 66.22840118408203, "learning_rate": 1.0819917012448133e-05, "loss": 1.0416, "step": 27670 }, { "epoch": 22.963485477178423, "grad_norm": 44.00266647338867, "learning_rate": 1.0819585062240665e-05, "loss": 0.5371, "step": 27671 }, { "epoch": 22.964315352697096, "grad_norm": 37.80497360229492, "learning_rate": 1.0819253112033197e-05, "loss": 0.3375, "step": 27672 }, { "epoch": 22.965145228215768, "grad_norm": 11.016257286071777, "learning_rate": 1.0818921161825726e-05, "loss": 0.344, "step": 27673 }, { "epoch": 22.96597510373444, "grad_norm": 70.73480987548828, "learning_rate": 1.0818589211618258e-05, "loss": 0.8513, "step": 27674 }, { "epoch": 22.966804979253112, "grad_norm": 92.67378997802734, "learning_rate": 1.081825726141079e-05, "loss": 0.4583, "step": 27675 }, { "epoch": 22.967634854771784, "grad_norm": 62.46595001220703, "learning_rate": 1.081792531120332e-05, "loss": 0.5973, "step": 27676 }, { "epoch": 22.968464730290457, "grad_norm": 42.943763732910156, "learning_rate": 1.081759336099585e-05, "loss": 0.5962, "step": 27677 }, { "epoch": 22.96929460580913, "grad_norm": 101.13761901855469, "learning_rate": 1.0817261410788383e-05, "loss": 0.3441, "step": 27678 }, { "epoch": 22.9701244813278, "grad_norm": 25.947933197021484, "learning_rate": 1.0816929460580913e-05, "loss": 0.3905, "step": 27679 }, { "epoch": 22.970954356846473, "grad_norm": 79.27378845214844, "learning_rate": 1.0816597510373445e-05, "loss": 1.2938, "step": 27680 }, { "epoch": 22.971784232365145, "grad_norm": 63.31394577026367, "learning_rate": 1.0816265560165976e-05, "loss": 0.8134, "step": 27681 }, { "epoch": 22.972614107883818, "grad_norm": 27.87053108215332, "learning_rate": 1.0815933609958506e-05, "loss": 0.4339, "step": 27682 }, { "epoch": 22.97344398340249, "grad_norm": 75.73780059814453, "learning_rate": 1.0815601659751038e-05, "loss": 0.5799, "step": 27683 }, { "epoch": 22.974273858921162, "grad_norm": 46.996742248535156, "learning_rate": 1.081526970954357e-05, "loss": 0.666, "step": 27684 }, { "epoch": 22.975103734439834, "grad_norm": 28.9917049407959, "learning_rate": 1.08149377593361e-05, "loss": 0.4759, "step": 27685 }, { "epoch": 22.975933609958506, "grad_norm": 106.9627914428711, "learning_rate": 1.0814605809128631e-05, "loss": 0.7212, "step": 27686 }, { "epoch": 22.97676348547718, "grad_norm": 23.571880340576172, "learning_rate": 1.0814273858921163e-05, "loss": 0.292, "step": 27687 }, { "epoch": 22.97759336099585, "grad_norm": 59.10599136352539, "learning_rate": 1.0813941908713694e-05, "loss": 0.3457, "step": 27688 }, { "epoch": 22.978423236514523, "grad_norm": 21.876243591308594, "learning_rate": 1.0813609958506226e-05, "loss": 0.2216, "step": 27689 }, { "epoch": 22.979253112033195, "grad_norm": 78.90577697753906, "learning_rate": 1.0813278008298755e-05, "loss": 1.2091, "step": 27690 }, { "epoch": 22.980082987551867, "grad_norm": 29.431970596313477, "learning_rate": 1.0812946058091287e-05, "loss": 0.4042, "step": 27691 }, { "epoch": 22.98091286307054, "grad_norm": 58.6685905456543, "learning_rate": 1.0812614107883819e-05, "loss": 0.3249, "step": 27692 }, { "epoch": 22.98174273858921, "grad_norm": 56.137508392333984, "learning_rate": 1.0812282157676351e-05, "loss": 0.864, "step": 27693 }, { "epoch": 22.982572614107884, "grad_norm": 52.573631286621094, "learning_rate": 1.081195020746888e-05, "loss": 0.5211, "step": 27694 }, { "epoch": 22.983402489626556, "grad_norm": 54.94719314575195, "learning_rate": 1.0811618257261412e-05, "loss": 0.5138, "step": 27695 }, { "epoch": 22.98423236514523, "grad_norm": 33.935367584228516, "learning_rate": 1.0811286307053944e-05, "loss": 0.3184, "step": 27696 }, { "epoch": 22.9850622406639, "grad_norm": 91.14511108398438, "learning_rate": 1.0810954356846474e-05, "loss": 0.9172, "step": 27697 }, { "epoch": 22.985892116182573, "grad_norm": 54.110836029052734, "learning_rate": 1.0810622406639005e-05, "loss": 0.6227, "step": 27698 }, { "epoch": 22.986721991701245, "grad_norm": 42.387943267822266, "learning_rate": 1.0810290456431535e-05, "loss": 0.4291, "step": 27699 }, { "epoch": 22.987551867219917, "grad_norm": 32.61524200439453, "learning_rate": 1.0809958506224067e-05, "loss": 0.4201, "step": 27700 }, { "epoch": 22.98838174273859, "grad_norm": 55.151790618896484, "learning_rate": 1.08096265560166e-05, "loss": 0.576, "step": 27701 }, { "epoch": 22.98921161825726, "grad_norm": 14.1367769241333, "learning_rate": 1.0809294605809128e-05, "loss": 0.3042, "step": 27702 }, { "epoch": 22.990041493775934, "grad_norm": 28.502267837524414, "learning_rate": 1.080896265560166e-05, "loss": 0.3251, "step": 27703 }, { "epoch": 22.990871369294606, "grad_norm": 20.03140640258789, "learning_rate": 1.0808630705394192e-05, "loss": 0.357, "step": 27704 }, { "epoch": 22.991701244813278, "grad_norm": 35.375648498535156, "learning_rate": 1.0808298755186724e-05, "loss": 0.8317, "step": 27705 }, { "epoch": 22.99253112033195, "grad_norm": 54.54022979736328, "learning_rate": 1.0807966804979253e-05, "loss": 0.4566, "step": 27706 }, { "epoch": 22.993360995850622, "grad_norm": 9.096813201904297, "learning_rate": 1.0807634854771785e-05, "loss": 0.2607, "step": 27707 }, { "epoch": 22.994190871369295, "grad_norm": 90.71009063720703, "learning_rate": 1.0807302904564316e-05, "loss": 1.2065, "step": 27708 }, { "epoch": 22.995020746887967, "grad_norm": 34.65411376953125, "learning_rate": 1.0806970954356848e-05, "loss": 0.8291, "step": 27709 }, { "epoch": 22.99585062240664, "grad_norm": 31.388120651245117, "learning_rate": 1.0806639004149378e-05, "loss": 0.4303, "step": 27710 }, { "epoch": 22.99668049792531, "grad_norm": 17.149328231811523, "learning_rate": 1.0806307053941909e-05, "loss": 0.3287, "step": 27711 }, { "epoch": 22.997510373443983, "grad_norm": 28.54160499572754, "learning_rate": 1.080597510373444e-05, "loss": 0.3518, "step": 27712 }, { "epoch": 22.998340248962656, "grad_norm": 82.73332214355469, "learning_rate": 1.0805643153526973e-05, "loss": 0.7406, "step": 27713 }, { "epoch": 22.999170124481328, "grad_norm": 43.424922943115234, "learning_rate": 1.0805311203319505e-05, "loss": 0.4531, "step": 27714 }, { "epoch": 23.0, "grad_norm": 49.56441879272461, "learning_rate": 1.0804979253112034e-05, "loss": 0.5162, "step": 27715 }, { "epoch": 23.000829875518672, "grad_norm": 86.12910461425781, "learning_rate": 1.0804647302904566e-05, "loss": 1.5389, "step": 27716 }, { "epoch": 23.001659751037344, "grad_norm": 36.79762649536133, "learning_rate": 1.0804315352697096e-05, "loss": 0.6383, "step": 27717 }, { "epoch": 23.002489626556017, "grad_norm": 14.261954307556152, "learning_rate": 1.0803983402489628e-05, "loss": 0.2492, "step": 27718 }, { "epoch": 23.00331950207469, "grad_norm": 62.50841522216797, "learning_rate": 1.0803651452282159e-05, "loss": 0.6806, "step": 27719 }, { "epoch": 23.00414937759336, "grad_norm": 63.8154411315918, "learning_rate": 1.0803319502074689e-05, "loss": 0.3633, "step": 27720 }, { "epoch": 23.004979253112033, "grad_norm": 89.88677978515625, "learning_rate": 1.0802987551867221e-05, "loss": 0.6783, "step": 27721 }, { "epoch": 23.005809128630705, "grad_norm": 14.389925003051758, "learning_rate": 1.0802655601659753e-05, "loss": 0.2729, "step": 27722 }, { "epoch": 23.006639004149378, "grad_norm": 11.274077415466309, "learning_rate": 1.0802323651452282e-05, "loss": 0.2334, "step": 27723 }, { "epoch": 23.00746887966805, "grad_norm": 29.73119354248047, "learning_rate": 1.0801991701244814e-05, "loss": 0.4097, "step": 27724 }, { "epoch": 23.008298755186722, "grad_norm": 69.77518463134766, "learning_rate": 1.0801659751037346e-05, "loss": 0.2921, "step": 27725 }, { "epoch": 23.009128630705394, "grad_norm": 17.096342086791992, "learning_rate": 1.0801327800829877e-05, "loss": 0.1896, "step": 27726 }, { "epoch": 23.009958506224066, "grad_norm": 18.77680015563965, "learning_rate": 1.0800995850622407e-05, "loss": 0.3893, "step": 27727 }, { "epoch": 23.01078838174274, "grad_norm": 66.13846588134766, "learning_rate": 1.0800663900414939e-05, "loss": 0.6824, "step": 27728 }, { "epoch": 23.01161825726141, "grad_norm": 28.45172882080078, "learning_rate": 1.080033195020747e-05, "loss": 0.3032, "step": 27729 }, { "epoch": 23.012448132780083, "grad_norm": 12.904623031616211, "learning_rate": 1.0800000000000002e-05, "loss": 0.3005, "step": 27730 }, { "epoch": 23.013278008298755, "grad_norm": 36.51353073120117, "learning_rate": 1.079966804979253e-05, "loss": 0.3931, "step": 27731 }, { "epoch": 23.014107883817427, "grad_norm": 40.66524124145508, "learning_rate": 1.0799336099585062e-05, "loss": 0.782, "step": 27732 }, { "epoch": 23.0149377593361, "grad_norm": 65.74989318847656, "learning_rate": 1.0799004149377595e-05, "loss": 0.6389, "step": 27733 }, { "epoch": 23.01576763485477, "grad_norm": 49.692630767822266, "learning_rate": 1.0798672199170127e-05, "loss": 0.299, "step": 27734 }, { "epoch": 23.016597510373444, "grad_norm": 39.64755630493164, "learning_rate": 1.0798340248962655e-05, "loss": 0.6177, "step": 27735 }, { "epoch": 23.017427385892116, "grad_norm": 32.04318618774414, "learning_rate": 1.0798008298755187e-05, "loss": 0.5884, "step": 27736 }, { "epoch": 23.01825726141079, "grad_norm": 92.49195098876953, "learning_rate": 1.0797676348547718e-05, "loss": 0.3316, "step": 27737 }, { "epoch": 23.01908713692946, "grad_norm": 31.13711166381836, "learning_rate": 1.079734439834025e-05, "loss": 0.4208, "step": 27738 }, { "epoch": 23.019917012448133, "grad_norm": 40.98609161376953, "learning_rate": 1.0797012448132782e-05, "loss": 0.2884, "step": 27739 }, { "epoch": 23.020746887966805, "grad_norm": 30.91007423400879, "learning_rate": 1.079668049792531e-05, "loss": 0.2977, "step": 27740 }, { "epoch": 23.021576763485477, "grad_norm": 11.978038787841797, "learning_rate": 1.0796348547717843e-05, "loss": 0.2396, "step": 27741 }, { "epoch": 23.02240663900415, "grad_norm": 13.634562492370605, "learning_rate": 1.0796016597510375e-05, "loss": 0.3255, "step": 27742 }, { "epoch": 23.02323651452282, "grad_norm": 44.15032196044922, "learning_rate": 1.0795684647302907e-05, "loss": 0.2598, "step": 27743 }, { "epoch": 23.024066390041494, "grad_norm": 19.609535217285156, "learning_rate": 1.0795352697095436e-05, "loss": 0.2337, "step": 27744 }, { "epoch": 23.024896265560166, "grad_norm": 29.43521499633789, "learning_rate": 1.0795020746887968e-05, "loss": 0.3487, "step": 27745 }, { "epoch": 23.025726141078838, "grad_norm": 52.96370315551758, "learning_rate": 1.0794688796680498e-05, "loss": 0.7245, "step": 27746 }, { "epoch": 23.02655601659751, "grad_norm": 90.06755828857422, "learning_rate": 1.079435684647303e-05, "loss": 0.469, "step": 27747 }, { "epoch": 23.027385892116182, "grad_norm": 43.519588470458984, "learning_rate": 1.0794024896265561e-05, "loss": 0.4519, "step": 27748 }, { "epoch": 23.028215767634855, "grad_norm": 73.03327941894531, "learning_rate": 1.0793692946058091e-05, "loss": 0.3891, "step": 27749 }, { "epoch": 23.029045643153527, "grad_norm": 48.21709442138672, "learning_rate": 1.0793360995850623e-05, "loss": 0.4619, "step": 27750 }, { "epoch": 23.0298755186722, "grad_norm": 61.7421760559082, "learning_rate": 1.0793029045643156e-05, "loss": 0.7929, "step": 27751 }, { "epoch": 23.03070539419087, "grad_norm": 78.56317901611328, "learning_rate": 1.0792697095435684e-05, "loss": 0.7066, "step": 27752 }, { "epoch": 23.031535269709543, "grad_norm": 107.74986267089844, "learning_rate": 1.0792365145228216e-05, "loss": 0.7859, "step": 27753 }, { "epoch": 23.032365145228216, "grad_norm": 14.925601959228516, "learning_rate": 1.0792033195020748e-05, "loss": 0.2376, "step": 27754 }, { "epoch": 23.033195020746888, "grad_norm": 77.40013122558594, "learning_rate": 1.0791701244813279e-05, "loss": 0.7022, "step": 27755 }, { "epoch": 23.03402489626556, "grad_norm": 51.54484939575195, "learning_rate": 1.079136929460581e-05, "loss": 0.4186, "step": 27756 }, { "epoch": 23.034854771784232, "grad_norm": 28.73230743408203, "learning_rate": 1.0791037344398341e-05, "loss": 0.2191, "step": 27757 }, { "epoch": 23.035684647302904, "grad_norm": 31.21853256225586, "learning_rate": 1.0790705394190872e-05, "loss": 0.3355, "step": 27758 }, { "epoch": 23.036514522821577, "grad_norm": 129.99652099609375, "learning_rate": 1.0790373443983404e-05, "loss": 0.4152, "step": 27759 }, { "epoch": 23.03734439834025, "grad_norm": 105.36507415771484, "learning_rate": 1.0790041493775933e-05, "loss": 0.7567, "step": 27760 }, { "epoch": 23.03817427385892, "grad_norm": 35.459659576416016, "learning_rate": 1.0789709543568465e-05, "loss": 0.4045, "step": 27761 }, { "epoch": 23.039004149377593, "grad_norm": 34.193389892578125, "learning_rate": 1.0789377593360997e-05, "loss": 0.51, "step": 27762 }, { "epoch": 23.039834024896265, "grad_norm": 38.29327392578125, "learning_rate": 1.0789045643153529e-05, "loss": 0.4593, "step": 27763 }, { "epoch": 23.040663900414938, "grad_norm": 51.97279357910156, "learning_rate": 1.078871369294606e-05, "loss": 0.6372, "step": 27764 }, { "epoch": 23.04149377593361, "grad_norm": 35.69084930419922, "learning_rate": 1.078838174273859e-05, "loss": 0.5508, "step": 27765 }, { "epoch": 23.042323651452282, "grad_norm": 32.92876052856445, "learning_rate": 1.0788049792531122e-05, "loss": 0.2373, "step": 27766 }, { "epoch": 23.043153526970954, "grad_norm": 15.990301132202148, "learning_rate": 1.0787717842323652e-05, "loss": 0.2281, "step": 27767 }, { "epoch": 23.043983402489626, "grad_norm": 81.83512878417969, "learning_rate": 1.0787385892116184e-05, "loss": 0.4794, "step": 27768 }, { "epoch": 23.0448132780083, "grad_norm": 20.036375045776367, "learning_rate": 1.0787053941908713e-05, "loss": 0.4149, "step": 27769 }, { "epoch": 23.04564315352697, "grad_norm": 70.83141326904297, "learning_rate": 1.0786721991701245e-05, "loss": 0.405, "step": 27770 }, { "epoch": 23.046473029045643, "grad_norm": 30.57321548461914, "learning_rate": 1.0786390041493777e-05, "loss": 0.5122, "step": 27771 }, { "epoch": 23.047302904564315, "grad_norm": 42.54560852050781, "learning_rate": 1.078605809128631e-05, "loss": 0.5386, "step": 27772 }, { "epoch": 23.048132780082987, "grad_norm": 25.475135803222656, "learning_rate": 1.0785726141078838e-05, "loss": 0.36, "step": 27773 }, { "epoch": 23.04896265560166, "grad_norm": 40.9141845703125, "learning_rate": 1.078539419087137e-05, "loss": 0.5573, "step": 27774 }, { "epoch": 23.04979253112033, "grad_norm": 44.66957092285156, "learning_rate": 1.0785062240663902e-05, "loss": 0.4564, "step": 27775 }, { "epoch": 23.050622406639004, "grad_norm": 60.43536376953125, "learning_rate": 1.0784730290456433e-05, "loss": 0.6732, "step": 27776 }, { "epoch": 23.051452282157676, "grad_norm": 56.61857604980469, "learning_rate": 1.0784398340248963e-05, "loss": 0.3284, "step": 27777 }, { "epoch": 23.05228215767635, "grad_norm": 24.483938217163086, "learning_rate": 1.0784066390041494e-05, "loss": 0.3416, "step": 27778 }, { "epoch": 23.05311203319502, "grad_norm": 28.66509437561035, "learning_rate": 1.0783734439834026e-05, "loss": 0.383, "step": 27779 }, { "epoch": 23.053941908713693, "grad_norm": 84.50426483154297, "learning_rate": 1.0783402489626558e-05, "loss": 0.6204, "step": 27780 }, { "epoch": 23.054771784232365, "grad_norm": 54.663753509521484, "learning_rate": 1.0783070539419087e-05, "loss": 0.5632, "step": 27781 }, { "epoch": 23.055601659751037, "grad_norm": 29.5584774017334, "learning_rate": 1.0782738589211619e-05, "loss": 0.4355, "step": 27782 }, { "epoch": 23.05643153526971, "grad_norm": 52.947784423828125, "learning_rate": 1.078240663900415e-05, "loss": 0.3823, "step": 27783 }, { "epoch": 23.05726141078838, "grad_norm": 33.19316101074219, "learning_rate": 1.0782074688796681e-05, "loss": 0.3178, "step": 27784 }, { "epoch": 23.058091286307054, "grad_norm": 24.008485794067383, "learning_rate": 1.0781742738589212e-05, "loss": 0.5247, "step": 27785 }, { "epoch": 23.058921161825726, "grad_norm": 60.85203552246094, "learning_rate": 1.0781410788381744e-05, "loss": 0.3214, "step": 27786 }, { "epoch": 23.059751037344398, "grad_norm": 36.060543060302734, "learning_rate": 1.0781078838174274e-05, "loss": 0.3105, "step": 27787 }, { "epoch": 23.06058091286307, "grad_norm": 39.68279266357422, "learning_rate": 1.0780746887966806e-05, "loss": 0.5279, "step": 27788 }, { "epoch": 23.061410788381743, "grad_norm": 20.002578735351562, "learning_rate": 1.0780414937759337e-05, "loss": 0.3536, "step": 27789 }, { "epoch": 23.062240663900415, "grad_norm": 39.6356201171875, "learning_rate": 1.0780082987551867e-05, "loss": 0.7344, "step": 27790 }, { "epoch": 23.063070539419087, "grad_norm": 30.065797805786133, "learning_rate": 1.07797510373444e-05, "loss": 0.3446, "step": 27791 }, { "epoch": 23.06390041493776, "grad_norm": 77.58645629882812, "learning_rate": 1.0779419087136931e-05, "loss": 0.5302, "step": 27792 }, { "epoch": 23.06473029045643, "grad_norm": 15.553903579711914, "learning_rate": 1.0779087136929462e-05, "loss": 0.294, "step": 27793 }, { "epoch": 23.065560165975104, "grad_norm": 28.028453826904297, "learning_rate": 1.0778755186721992e-05, "loss": 0.5751, "step": 27794 }, { "epoch": 23.066390041493776, "grad_norm": 30.152267456054688, "learning_rate": 1.0778423236514524e-05, "loss": 0.4678, "step": 27795 }, { "epoch": 23.067219917012448, "grad_norm": 34.22262954711914, "learning_rate": 1.0778091286307055e-05, "loss": 0.2813, "step": 27796 }, { "epoch": 23.06804979253112, "grad_norm": 97.34691619873047, "learning_rate": 1.0777759336099587e-05, "loss": 0.2865, "step": 27797 }, { "epoch": 23.068879668049792, "grad_norm": 85.99105072021484, "learning_rate": 1.0777427385892117e-05, "loss": 1.1024, "step": 27798 }, { "epoch": 23.069709543568464, "grad_norm": 12.201375961303711, "learning_rate": 1.0777095435684648e-05, "loss": 0.3579, "step": 27799 }, { "epoch": 23.070539419087137, "grad_norm": 84.32242584228516, "learning_rate": 1.077676348547718e-05, "loss": 0.9219, "step": 27800 }, { "epoch": 23.07136929460581, "grad_norm": 111.10873413085938, "learning_rate": 1.0776431535269712e-05, "loss": 0.6778, "step": 27801 }, { "epoch": 23.07219917012448, "grad_norm": 33.935325622558594, "learning_rate": 1.077609958506224e-05, "loss": 0.5012, "step": 27802 }, { "epoch": 23.073029045643153, "grad_norm": 46.416038513183594, "learning_rate": 1.0775767634854773e-05, "loss": 0.3386, "step": 27803 }, { "epoch": 23.073858921161825, "grad_norm": 45.99101257324219, "learning_rate": 1.0775435684647305e-05, "loss": 0.9869, "step": 27804 }, { "epoch": 23.074688796680498, "grad_norm": 38.713008880615234, "learning_rate": 1.0775103734439835e-05, "loss": 0.3459, "step": 27805 }, { "epoch": 23.07551867219917, "grad_norm": 71.79966735839844, "learning_rate": 1.0774771784232366e-05, "loss": 0.6011, "step": 27806 }, { "epoch": 23.076348547717842, "grad_norm": 21.350791931152344, "learning_rate": 1.0774439834024896e-05, "loss": 0.2579, "step": 27807 }, { "epoch": 23.077178423236514, "grad_norm": 31.963788986206055, "learning_rate": 1.0774107883817428e-05, "loss": 0.7024, "step": 27808 }, { "epoch": 23.078008298755186, "grad_norm": 32.966163635253906, "learning_rate": 1.077377593360996e-05, "loss": 0.4061, "step": 27809 }, { "epoch": 23.07883817427386, "grad_norm": 37.02391052246094, "learning_rate": 1.0773443983402489e-05, "loss": 0.6555, "step": 27810 }, { "epoch": 23.07966804979253, "grad_norm": 59.65336608886719, "learning_rate": 1.0773112033195021e-05, "loss": 0.4053, "step": 27811 }, { "epoch": 23.080497925311203, "grad_norm": 15.941292762756348, "learning_rate": 1.0772780082987553e-05, "loss": 0.2572, "step": 27812 }, { "epoch": 23.081327800829875, "grad_norm": 40.064449310302734, "learning_rate": 1.0772448132780085e-05, "loss": 0.7146, "step": 27813 }, { "epoch": 23.082157676348547, "grad_norm": 52.983272552490234, "learning_rate": 1.0772116182572614e-05, "loss": 0.44, "step": 27814 }, { "epoch": 23.08298755186722, "grad_norm": 44.374202728271484, "learning_rate": 1.0771784232365146e-05, "loss": 0.599, "step": 27815 }, { "epoch": 23.083817427385892, "grad_norm": 45.0340461730957, "learning_rate": 1.0771452282157676e-05, "loss": 0.9104, "step": 27816 }, { "epoch": 23.084647302904564, "grad_norm": 29.120140075683594, "learning_rate": 1.0771120331950209e-05, "loss": 0.3656, "step": 27817 }, { "epoch": 23.085477178423236, "grad_norm": 81.87581634521484, "learning_rate": 1.077078838174274e-05, "loss": 0.366, "step": 27818 }, { "epoch": 23.08630705394191, "grad_norm": 25.890676498413086, "learning_rate": 1.077045643153527e-05, "loss": 0.4951, "step": 27819 }, { "epoch": 23.08713692946058, "grad_norm": 46.178035736083984, "learning_rate": 1.0770124481327802e-05, "loss": 0.4021, "step": 27820 }, { "epoch": 23.087966804979253, "grad_norm": 14.848709106445312, "learning_rate": 1.0769792531120334e-05, "loss": 0.2823, "step": 27821 }, { "epoch": 23.088796680497925, "grad_norm": 28.35845375061035, "learning_rate": 1.0769460580912866e-05, "loss": 0.6835, "step": 27822 }, { "epoch": 23.089626556016597, "grad_norm": 59.641441345214844, "learning_rate": 1.0769128630705394e-05, "loss": 0.6381, "step": 27823 }, { "epoch": 23.09045643153527, "grad_norm": 34.28036880493164, "learning_rate": 1.0768796680497927e-05, "loss": 0.4252, "step": 27824 }, { "epoch": 23.09128630705394, "grad_norm": 34.1357421875, "learning_rate": 1.0768464730290457e-05, "loss": 0.5207, "step": 27825 }, { "epoch": 23.092116182572614, "grad_norm": 25.942668914794922, "learning_rate": 1.0768132780082989e-05, "loss": 0.3427, "step": 27826 }, { "epoch": 23.092946058091286, "grad_norm": 140.5738067626953, "learning_rate": 1.076780082987552e-05, "loss": 0.5949, "step": 27827 }, { "epoch": 23.093775933609958, "grad_norm": 11.56480884552002, "learning_rate": 1.076746887966805e-05, "loss": 0.2612, "step": 27828 }, { "epoch": 23.09460580912863, "grad_norm": 44.432037353515625, "learning_rate": 1.0767136929460582e-05, "loss": 0.515, "step": 27829 }, { "epoch": 23.095435684647303, "grad_norm": 19.782604217529297, "learning_rate": 1.0766804979253114e-05, "loss": 0.2688, "step": 27830 }, { "epoch": 23.096265560165975, "grad_norm": 27.41851234436035, "learning_rate": 1.0766473029045643e-05, "loss": 0.3931, "step": 27831 }, { "epoch": 23.097095435684647, "grad_norm": 50.718265533447266, "learning_rate": 1.0766141078838175e-05, "loss": 0.8532, "step": 27832 }, { "epoch": 23.09792531120332, "grad_norm": 58.65928649902344, "learning_rate": 1.0765809128630707e-05, "loss": 0.4632, "step": 27833 }, { "epoch": 23.09875518672199, "grad_norm": 33.900672912597656, "learning_rate": 1.0765477178423237e-05, "loss": 0.2727, "step": 27834 }, { "epoch": 23.099585062240664, "grad_norm": 35.62792205810547, "learning_rate": 1.0765145228215768e-05, "loss": 0.6356, "step": 27835 }, { "epoch": 23.100414937759336, "grad_norm": 11.631148338317871, "learning_rate": 1.07648132780083e-05, "loss": 0.2906, "step": 27836 }, { "epoch": 23.101244813278008, "grad_norm": 43.03756332397461, "learning_rate": 1.076448132780083e-05, "loss": 0.5402, "step": 27837 }, { "epoch": 23.10207468879668, "grad_norm": 33.745819091796875, "learning_rate": 1.0764149377593363e-05, "loss": 0.3688, "step": 27838 }, { "epoch": 23.102904564315352, "grad_norm": 29.986263275146484, "learning_rate": 1.0763817427385891e-05, "loss": 0.7143, "step": 27839 }, { "epoch": 23.103734439834025, "grad_norm": 16.09942054748535, "learning_rate": 1.0763485477178423e-05, "loss": 0.2348, "step": 27840 }, { "epoch": 23.104564315352697, "grad_norm": 27.668798446655273, "learning_rate": 1.0763153526970955e-05, "loss": 0.5815, "step": 27841 }, { "epoch": 23.10539419087137, "grad_norm": 6.523832321166992, "learning_rate": 1.0762821576763488e-05, "loss": 0.298, "step": 27842 }, { "epoch": 23.10622406639004, "grad_norm": 37.340755462646484, "learning_rate": 1.0762489626556016e-05, "loss": 0.538, "step": 27843 }, { "epoch": 23.107053941908713, "grad_norm": 55.218162536621094, "learning_rate": 1.0762157676348548e-05, "loss": 0.5776, "step": 27844 }, { "epoch": 23.107883817427386, "grad_norm": 9.477734565734863, "learning_rate": 1.076182572614108e-05, "loss": 0.303, "step": 27845 }, { "epoch": 23.108713692946058, "grad_norm": 72.60018157958984, "learning_rate": 1.0761493775933611e-05, "loss": 1.0488, "step": 27846 }, { "epoch": 23.10954356846473, "grad_norm": 56.20881271362305, "learning_rate": 1.0761161825726143e-05, "loss": 0.4119, "step": 27847 }, { "epoch": 23.110373443983402, "grad_norm": 10.925545692443848, "learning_rate": 1.0760829875518672e-05, "loss": 0.2169, "step": 27848 }, { "epoch": 23.111203319502074, "grad_norm": 56.70981216430664, "learning_rate": 1.0760497925311204e-05, "loss": 0.4692, "step": 27849 }, { "epoch": 23.112033195020746, "grad_norm": 12.294378280639648, "learning_rate": 1.0760165975103736e-05, "loss": 0.2874, "step": 27850 }, { "epoch": 23.11286307053942, "grad_norm": 29.05654525756836, "learning_rate": 1.0759834024896268e-05, "loss": 0.3152, "step": 27851 }, { "epoch": 23.11369294605809, "grad_norm": 21.9305477142334, "learning_rate": 1.0759502074688797e-05, "loss": 0.5089, "step": 27852 }, { "epoch": 23.114522821576763, "grad_norm": 75.34049224853516, "learning_rate": 1.0759170124481329e-05, "loss": 0.3442, "step": 27853 }, { "epoch": 23.115352697095435, "grad_norm": 16.419612884521484, "learning_rate": 1.075883817427386e-05, "loss": 0.3996, "step": 27854 }, { "epoch": 23.116182572614107, "grad_norm": 44.18374252319336, "learning_rate": 1.0758506224066391e-05, "loss": 0.4525, "step": 27855 }, { "epoch": 23.11701244813278, "grad_norm": 35.50876235961914, "learning_rate": 1.0758174273858922e-05, "loss": 0.5919, "step": 27856 }, { "epoch": 23.117842323651452, "grad_norm": 38.97549057006836, "learning_rate": 1.0757842323651452e-05, "loss": 0.4851, "step": 27857 }, { "epoch": 23.118672199170124, "grad_norm": 33.078834533691406, "learning_rate": 1.0757510373443984e-05, "loss": 0.4503, "step": 27858 }, { "epoch": 23.119502074688796, "grad_norm": 43.5363655090332, "learning_rate": 1.0757178423236516e-05, "loss": 0.7097, "step": 27859 }, { "epoch": 23.12033195020747, "grad_norm": 40.65872573852539, "learning_rate": 1.0756846473029045e-05, "loss": 0.289, "step": 27860 }, { "epoch": 23.12116182572614, "grad_norm": 34.01559066772461, "learning_rate": 1.0756514522821577e-05, "loss": 0.6328, "step": 27861 }, { "epoch": 23.121991701244813, "grad_norm": 20.33979034423828, "learning_rate": 1.075618257261411e-05, "loss": 0.2736, "step": 27862 }, { "epoch": 23.122821576763485, "grad_norm": 42.73506164550781, "learning_rate": 1.075585062240664e-05, "loss": 0.311, "step": 27863 }, { "epoch": 23.123651452282157, "grad_norm": 52.92985153198242, "learning_rate": 1.075551867219917e-05, "loss": 0.892, "step": 27864 }, { "epoch": 23.12448132780083, "grad_norm": 75.47035217285156, "learning_rate": 1.0755186721991702e-05, "loss": 1.0402, "step": 27865 }, { "epoch": 23.1253112033195, "grad_norm": 22.82916831970215, "learning_rate": 1.0754854771784233e-05, "loss": 0.4455, "step": 27866 }, { "epoch": 23.126141078838174, "grad_norm": 43.052608489990234, "learning_rate": 1.0754522821576765e-05, "loss": 0.9651, "step": 27867 }, { "epoch": 23.126970954356846, "grad_norm": 64.85749816894531, "learning_rate": 1.0754190871369294e-05, "loss": 0.7005, "step": 27868 }, { "epoch": 23.127800829875518, "grad_norm": 38.121246337890625, "learning_rate": 1.0753858921161826e-05, "loss": 0.3835, "step": 27869 }, { "epoch": 23.12863070539419, "grad_norm": 71.14432525634766, "learning_rate": 1.0753526970954358e-05, "loss": 0.3925, "step": 27870 }, { "epoch": 23.129460580912863, "grad_norm": 84.03265380859375, "learning_rate": 1.075319502074689e-05, "loss": 0.5538, "step": 27871 }, { "epoch": 23.130290456431535, "grad_norm": 61.97333526611328, "learning_rate": 1.075286307053942e-05, "loss": 0.8746, "step": 27872 }, { "epoch": 23.131120331950207, "grad_norm": 24.178451538085938, "learning_rate": 1.075253112033195e-05, "loss": 0.345, "step": 27873 }, { "epoch": 23.13195020746888, "grad_norm": 39.762603759765625, "learning_rate": 1.0752199170124483e-05, "loss": 0.5456, "step": 27874 }, { "epoch": 23.13278008298755, "grad_norm": 127.3565902709961, "learning_rate": 1.0751867219917013e-05, "loss": 0.7239, "step": 27875 }, { "epoch": 23.133609958506224, "grad_norm": 39.64067077636719, "learning_rate": 1.0751535269709545e-05, "loss": 0.5945, "step": 27876 }, { "epoch": 23.134439834024896, "grad_norm": 9.79001522064209, "learning_rate": 1.0751203319502074e-05, "loss": 0.2115, "step": 27877 }, { "epoch": 23.135269709543568, "grad_norm": 87.93310546875, "learning_rate": 1.0750871369294606e-05, "loss": 0.393, "step": 27878 }, { "epoch": 23.13609958506224, "grad_norm": 35.77907180786133, "learning_rate": 1.0750539419087138e-05, "loss": 0.4787, "step": 27879 }, { "epoch": 23.136929460580912, "grad_norm": 15.66431713104248, "learning_rate": 1.075020746887967e-05, "loss": 0.2675, "step": 27880 }, { "epoch": 23.137759336099585, "grad_norm": 17.608217239379883, "learning_rate": 1.0749875518672199e-05, "loss": 0.2983, "step": 27881 }, { "epoch": 23.138589211618257, "grad_norm": 17.894655227661133, "learning_rate": 1.0749543568464731e-05, "loss": 0.3304, "step": 27882 }, { "epoch": 23.13941908713693, "grad_norm": 19.163545608520508, "learning_rate": 1.0749211618257263e-05, "loss": 0.3012, "step": 27883 }, { "epoch": 23.1402489626556, "grad_norm": 33.46098709106445, "learning_rate": 1.0748879668049794e-05, "loss": 0.2309, "step": 27884 }, { "epoch": 23.141078838174273, "grad_norm": 39.639713287353516, "learning_rate": 1.0748547717842324e-05, "loss": 0.4524, "step": 27885 }, { "epoch": 23.141908713692946, "grad_norm": 73.81329345703125, "learning_rate": 1.0748215767634855e-05, "loss": 0.8944, "step": 27886 }, { "epoch": 23.142738589211618, "grad_norm": 52.119590759277344, "learning_rate": 1.0747883817427387e-05, "loss": 0.3787, "step": 27887 }, { "epoch": 23.14356846473029, "grad_norm": 12.350263595581055, "learning_rate": 1.0747551867219919e-05, "loss": 0.3359, "step": 27888 }, { "epoch": 23.144398340248962, "grad_norm": 23.022504806518555, "learning_rate": 1.0747219917012447e-05, "loss": 0.2572, "step": 27889 }, { "epoch": 23.145228215767634, "grad_norm": 40.230533599853516, "learning_rate": 1.074688796680498e-05, "loss": 0.6999, "step": 27890 }, { "epoch": 23.146058091286307, "grad_norm": 13.518887519836426, "learning_rate": 1.0746556016597512e-05, "loss": 0.2848, "step": 27891 }, { "epoch": 23.14688796680498, "grad_norm": 72.33234405517578, "learning_rate": 1.0746224066390044e-05, "loss": 0.5447, "step": 27892 }, { "epoch": 23.14771784232365, "grad_norm": 45.182003021240234, "learning_rate": 1.0745892116182573e-05, "loss": 0.5247, "step": 27893 }, { "epoch": 23.148547717842323, "grad_norm": 17.04625129699707, "learning_rate": 1.0745560165975105e-05, "loss": 0.2552, "step": 27894 }, { "epoch": 23.149377593360995, "grad_norm": 50.879085540771484, "learning_rate": 1.0745228215767635e-05, "loss": 0.3543, "step": 27895 }, { "epoch": 23.150207468879668, "grad_norm": 43.434326171875, "learning_rate": 1.0744896265560167e-05, "loss": 0.6193, "step": 27896 }, { "epoch": 23.15103734439834, "grad_norm": 41.14540481567383, "learning_rate": 1.07445643153527e-05, "loss": 0.3533, "step": 27897 }, { "epoch": 23.151867219917012, "grad_norm": 30.706253051757812, "learning_rate": 1.0744232365145228e-05, "loss": 0.3136, "step": 27898 }, { "epoch": 23.152697095435684, "grad_norm": 44.92188262939453, "learning_rate": 1.074390041493776e-05, "loss": 0.5093, "step": 27899 }, { "epoch": 23.153526970954356, "grad_norm": 43.93152618408203, "learning_rate": 1.0743568464730292e-05, "loss": 0.3872, "step": 27900 }, { "epoch": 23.15435684647303, "grad_norm": 61.78009796142578, "learning_rate": 1.0743236514522823e-05, "loss": 0.7581, "step": 27901 }, { "epoch": 23.1551867219917, "grad_norm": 39.80339813232422, "learning_rate": 1.0742904564315353e-05, "loss": 1.0518, "step": 27902 }, { "epoch": 23.156016597510373, "grad_norm": 102.57745361328125, "learning_rate": 1.0742572614107885e-05, "loss": 0.7103, "step": 27903 }, { "epoch": 23.156846473029045, "grad_norm": 79.61945343017578, "learning_rate": 1.0742240663900416e-05, "loss": 0.7689, "step": 27904 }, { "epoch": 23.157676348547717, "grad_norm": 23.062095642089844, "learning_rate": 1.0741908713692948e-05, "loss": 0.284, "step": 27905 }, { "epoch": 23.15850622406639, "grad_norm": 59.92941665649414, "learning_rate": 1.0741576763485478e-05, "loss": 0.4032, "step": 27906 }, { "epoch": 23.15933609958506, "grad_norm": 43.793487548828125, "learning_rate": 1.0741244813278008e-05, "loss": 0.23, "step": 27907 }, { "epoch": 23.160165975103734, "grad_norm": 48.110069274902344, "learning_rate": 1.074091286307054e-05, "loss": 0.8226, "step": 27908 }, { "epoch": 23.160995850622406, "grad_norm": 59.964752197265625, "learning_rate": 1.0740580912863073e-05, "loss": 0.6839, "step": 27909 }, { "epoch": 23.16182572614108, "grad_norm": 27.764978408813477, "learning_rate": 1.0740248962655601e-05, "loss": 0.368, "step": 27910 }, { "epoch": 23.16265560165975, "grad_norm": 53.05888748168945, "learning_rate": 1.0739917012448134e-05, "loss": 0.3019, "step": 27911 }, { "epoch": 23.163485477178423, "grad_norm": 60.44914245605469, "learning_rate": 1.0739585062240666e-05, "loss": 0.3014, "step": 27912 }, { "epoch": 23.164315352697095, "grad_norm": 49.033416748046875, "learning_rate": 1.0739253112033196e-05, "loss": 0.5658, "step": 27913 }, { "epoch": 23.165145228215767, "grad_norm": 23.90863037109375, "learning_rate": 1.0738921161825726e-05, "loss": 0.3961, "step": 27914 }, { "epoch": 23.16597510373444, "grad_norm": 92.72743225097656, "learning_rate": 1.0738589211618259e-05, "loss": 0.3923, "step": 27915 }, { "epoch": 23.16680497925311, "grad_norm": 36.73960494995117, "learning_rate": 1.0738257261410789e-05, "loss": 0.5011, "step": 27916 }, { "epoch": 23.167634854771784, "grad_norm": 33.31509017944336, "learning_rate": 1.0737925311203321e-05, "loss": 0.473, "step": 27917 }, { "epoch": 23.168464730290456, "grad_norm": 18.097505569458008, "learning_rate": 1.073759336099585e-05, "loss": 0.2395, "step": 27918 }, { "epoch": 23.169294605809128, "grad_norm": 17.61530303955078, "learning_rate": 1.0737261410788382e-05, "loss": 0.2203, "step": 27919 }, { "epoch": 23.1701244813278, "grad_norm": 15.402989387512207, "learning_rate": 1.0736929460580914e-05, "loss": 0.2653, "step": 27920 }, { "epoch": 23.170954356846472, "grad_norm": 42.67588806152344, "learning_rate": 1.0736597510373446e-05, "loss": 0.8189, "step": 27921 }, { "epoch": 23.171784232365145, "grad_norm": 86.48255920410156, "learning_rate": 1.0736265560165975e-05, "loss": 0.4163, "step": 27922 }, { "epoch": 23.172614107883817, "grad_norm": 41.34053421020508, "learning_rate": 1.0735933609958507e-05, "loss": 0.5346, "step": 27923 }, { "epoch": 23.17344398340249, "grad_norm": 18.49396324157715, "learning_rate": 1.0735601659751037e-05, "loss": 0.413, "step": 27924 }, { "epoch": 23.17427385892116, "grad_norm": 52.18812561035156, "learning_rate": 1.073526970954357e-05, "loss": 0.8236, "step": 27925 }, { "epoch": 23.175103734439833, "grad_norm": 20.05001449584961, "learning_rate": 1.0734937759336102e-05, "loss": 0.3843, "step": 27926 }, { "epoch": 23.175933609958506, "grad_norm": 44.03919982910156, "learning_rate": 1.073460580912863e-05, "loss": 0.4803, "step": 27927 }, { "epoch": 23.176763485477178, "grad_norm": 17.666648864746094, "learning_rate": 1.0734273858921162e-05, "loss": 0.3047, "step": 27928 }, { "epoch": 23.17759336099585, "grad_norm": 27.67658805847168, "learning_rate": 1.0733941908713695e-05, "loss": 0.4763, "step": 27929 }, { "epoch": 23.178423236514522, "grad_norm": 31.659101486206055, "learning_rate": 1.0733609958506227e-05, "loss": 0.3141, "step": 27930 }, { "epoch": 23.179253112033194, "grad_norm": 59.21427536010742, "learning_rate": 1.0733278008298755e-05, "loss": 0.4818, "step": 27931 }, { "epoch": 23.180082987551867, "grad_norm": 53.750099182128906, "learning_rate": 1.0732946058091287e-05, "loss": 1.0257, "step": 27932 }, { "epoch": 23.18091286307054, "grad_norm": 118.30733489990234, "learning_rate": 1.0732614107883818e-05, "loss": 0.464, "step": 27933 }, { "epoch": 23.18174273858921, "grad_norm": 30.563058853149414, "learning_rate": 1.073228215767635e-05, "loss": 0.5832, "step": 27934 }, { "epoch": 23.182572614107883, "grad_norm": 25.381103515625, "learning_rate": 1.073195020746888e-05, "loss": 0.3242, "step": 27935 }, { "epoch": 23.183402489626555, "grad_norm": 18.81141471862793, "learning_rate": 1.073161825726141e-05, "loss": 0.3017, "step": 27936 }, { "epoch": 23.184232365145228, "grad_norm": 25.408512115478516, "learning_rate": 1.0731286307053943e-05, "loss": 0.2939, "step": 27937 }, { "epoch": 23.1850622406639, "grad_norm": 54.53825759887695, "learning_rate": 1.0730954356846475e-05, "loss": 0.8901, "step": 27938 }, { "epoch": 23.185892116182572, "grad_norm": 19.320606231689453, "learning_rate": 1.0730622406639004e-05, "loss": 0.3348, "step": 27939 }, { "epoch": 23.186721991701244, "grad_norm": 48.046051025390625, "learning_rate": 1.0730290456431536e-05, "loss": 0.6546, "step": 27940 }, { "epoch": 23.187551867219916, "grad_norm": 41.44841766357422, "learning_rate": 1.0729958506224068e-05, "loss": 0.405, "step": 27941 }, { "epoch": 23.18838174273859, "grad_norm": 18.521482467651367, "learning_rate": 1.0729626556016598e-05, "loss": 0.5736, "step": 27942 }, { "epoch": 23.18921161825726, "grad_norm": 49.801597595214844, "learning_rate": 1.0729294605809129e-05, "loss": 0.4721, "step": 27943 }, { "epoch": 23.190041493775933, "grad_norm": 25.68506622314453, "learning_rate": 1.0728962655601661e-05, "loss": 0.3568, "step": 27944 }, { "epoch": 23.190871369294605, "grad_norm": 75.06429290771484, "learning_rate": 1.0728630705394191e-05, "loss": 0.6715, "step": 27945 }, { "epoch": 23.191701244813277, "grad_norm": 8.595128059387207, "learning_rate": 1.0728298755186723e-05, "loss": 0.2191, "step": 27946 }, { "epoch": 23.19253112033195, "grad_norm": 73.0920639038086, "learning_rate": 1.0727966804979252e-05, "loss": 0.7983, "step": 27947 }, { "epoch": 23.19336099585062, "grad_norm": 20.661651611328125, "learning_rate": 1.0727634854771784e-05, "loss": 0.3163, "step": 27948 }, { "epoch": 23.194190871369294, "grad_norm": 24.33747673034668, "learning_rate": 1.0727302904564316e-05, "loss": 0.4473, "step": 27949 }, { "epoch": 23.195020746887966, "grad_norm": 47.70480728149414, "learning_rate": 1.0726970954356848e-05, "loss": 0.5139, "step": 27950 }, { "epoch": 23.19585062240664, "grad_norm": 38.981971740722656, "learning_rate": 1.0726639004149379e-05, "loss": 0.3744, "step": 27951 }, { "epoch": 23.19668049792531, "grad_norm": 18.80390167236328, "learning_rate": 1.072630705394191e-05, "loss": 0.3111, "step": 27952 }, { "epoch": 23.197510373443983, "grad_norm": 43.776771545410156, "learning_rate": 1.0725975103734441e-05, "loss": 0.502, "step": 27953 }, { "epoch": 23.198340248962655, "grad_norm": 23.231800079345703, "learning_rate": 1.0725643153526972e-05, "loss": 0.2899, "step": 27954 }, { "epoch": 23.199170124481327, "grad_norm": 54.66719436645508, "learning_rate": 1.0725311203319504e-05, "loss": 0.4548, "step": 27955 }, { "epoch": 23.2, "grad_norm": 46.87645721435547, "learning_rate": 1.0724979253112033e-05, "loss": 0.3927, "step": 27956 }, { "epoch": 23.20082987551867, "grad_norm": 50.63694763183594, "learning_rate": 1.0724647302904565e-05, "loss": 0.4085, "step": 27957 }, { "epoch": 23.201659751037344, "grad_norm": 42.45140075683594, "learning_rate": 1.0724315352697097e-05, "loss": 0.5943, "step": 27958 }, { "epoch": 23.202489626556016, "grad_norm": 34.250518798828125, "learning_rate": 1.0723983402489629e-05, "loss": 0.5631, "step": 27959 }, { "epoch": 23.203319502074688, "grad_norm": 49.60831069946289, "learning_rate": 1.0723651452282158e-05, "loss": 0.2553, "step": 27960 }, { "epoch": 23.20414937759336, "grad_norm": 13.241844177246094, "learning_rate": 1.072331950207469e-05, "loss": 0.2903, "step": 27961 }, { "epoch": 23.204979253112032, "grad_norm": 34.59405517578125, "learning_rate": 1.0722987551867222e-05, "loss": 0.3251, "step": 27962 }, { "epoch": 23.205809128630705, "grad_norm": 21.573331832885742, "learning_rate": 1.0722655601659752e-05, "loss": 0.6222, "step": 27963 }, { "epoch": 23.206639004149377, "grad_norm": 56.572452545166016, "learning_rate": 1.0722323651452283e-05, "loss": 0.346, "step": 27964 }, { "epoch": 23.20746887966805, "grad_norm": 38.77033996582031, "learning_rate": 1.0721991701244813e-05, "loss": 0.4013, "step": 27965 }, { "epoch": 23.20829875518672, "grad_norm": 8.623095512390137, "learning_rate": 1.0721659751037345e-05, "loss": 0.2434, "step": 27966 }, { "epoch": 23.209128630705393, "grad_norm": 13.682673454284668, "learning_rate": 1.0721327800829877e-05, "loss": 0.3458, "step": 27967 }, { "epoch": 23.209958506224066, "grad_norm": 54.970829010009766, "learning_rate": 1.0720995850622406e-05, "loss": 0.46, "step": 27968 }, { "epoch": 23.210788381742738, "grad_norm": 19.622085571289062, "learning_rate": 1.0720663900414938e-05, "loss": 0.1959, "step": 27969 }, { "epoch": 23.21161825726141, "grad_norm": 23.74482536315918, "learning_rate": 1.072033195020747e-05, "loss": 0.3359, "step": 27970 }, { "epoch": 23.212448132780082, "grad_norm": 42.67356872558594, "learning_rate": 1.072e-05, "loss": 0.7908, "step": 27971 }, { "epoch": 23.213278008298754, "grad_norm": 18.812108993530273, "learning_rate": 1.0719668049792531e-05, "loss": 0.2433, "step": 27972 }, { "epoch": 23.214107883817427, "grad_norm": 69.1335220336914, "learning_rate": 1.0719336099585063e-05, "loss": 0.387, "step": 27973 }, { "epoch": 23.2149377593361, "grad_norm": 49.1724967956543, "learning_rate": 1.0719004149377594e-05, "loss": 0.4141, "step": 27974 }, { "epoch": 23.21576763485477, "grad_norm": 17.417236328125, "learning_rate": 1.0718672199170126e-05, "loss": 0.2643, "step": 27975 }, { "epoch": 23.216597510373443, "grad_norm": 41.33802032470703, "learning_rate": 1.0718340248962658e-05, "loss": 0.5575, "step": 27976 }, { "epoch": 23.217427385892115, "grad_norm": 59.760093688964844, "learning_rate": 1.0718008298755187e-05, "loss": 0.372, "step": 27977 }, { "epoch": 23.218257261410788, "grad_norm": 38.565940856933594, "learning_rate": 1.0717676348547719e-05, "loss": 0.5565, "step": 27978 }, { "epoch": 23.21908713692946, "grad_norm": 40.313411712646484, "learning_rate": 1.071734439834025e-05, "loss": 0.3424, "step": 27979 }, { "epoch": 23.219917012448132, "grad_norm": 21.300708770751953, "learning_rate": 1.0717012448132781e-05, "loss": 0.2707, "step": 27980 }, { "epoch": 23.220746887966804, "grad_norm": 35.448974609375, "learning_rate": 1.0716680497925312e-05, "loss": 0.3265, "step": 27981 }, { "epoch": 23.221576763485476, "grad_norm": 19.94744873046875, "learning_rate": 1.0716348547717844e-05, "loss": 0.3154, "step": 27982 }, { "epoch": 23.22240663900415, "grad_norm": 32.937557220458984, "learning_rate": 1.0716016597510374e-05, "loss": 0.4098, "step": 27983 }, { "epoch": 23.22323651452282, "grad_norm": 36.86116027832031, "learning_rate": 1.0715684647302906e-05, "loss": 0.4277, "step": 27984 }, { "epoch": 23.224066390041493, "grad_norm": 20.489675521850586, "learning_rate": 1.0715352697095435e-05, "loss": 0.3553, "step": 27985 }, { "epoch": 23.224896265560165, "grad_norm": 34.06732940673828, "learning_rate": 1.0715020746887967e-05, "loss": 0.4257, "step": 27986 }, { "epoch": 23.225726141078837, "grad_norm": 98.4453353881836, "learning_rate": 1.07146887966805e-05, "loss": 0.3836, "step": 27987 }, { "epoch": 23.22655601659751, "grad_norm": 60.16402053833008, "learning_rate": 1.0714356846473031e-05, "loss": 0.2704, "step": 27988 }, { "epoch": 23.22738589211618, "grad_norm": 108.8296890258789, "learning_rate": 1.071402489626556e-05, "loss": 0.5975, "step": 27989 }, { "epoch": 23.228215767634854, "grad_norm": 46.1231689453125, "learning_rate": 1.0713692946058092e-05, "loss": 0.9526, "step": 27990 }, { "epoch": 23.229045643153526, "grad_norm": 54.95124435424805, "learning_rate": 1.0713360995850624e-05, "loss": 0.3845, "step": 27991 }, { "epoch": 23.2298755186722, "grad_norm": 76.21037292480469, "learning_rate": 1.0713029045643155e-05, "loss": 0.5914, "step": 27992 }, { "epoch": 23.23070539419087, "grad_norm": 111.3685302734375, "learning_rate": 1.0712697095435685e-05, "loss": 0.4789, "step": 27993 }, { "epoch": 23.231535269709543, "grad_norm": 51.51655960083008, "learning_rate": 1.0712365145228215e-05, "loss": 0.5239, "step": 27994 }, { "epoch": 23.232365145228215, "grad_norm": 65.9014892578125, "learning_rate": 1.0712033195020748e-05, "loss": 0.7673, "step": 27995 }, { "epoch": 23.233195020746887, "grad_norm": 29.043712615966797, "learning_rate": 1.071170124481328e-05, "loss": 0.3369, "step": 27996 }, { "epoch": 23.23402489626556, "grad_norm": 54.697513580322266, "learning_rate": 1.0711369294605808e-05, "loss": 0.5298, "step": 27997 }, { "epoch": 23.23485477178423, "grad_norm": 19.153030395507812, "learning_rate": 1.071103734439834e-05, "loss": 0.395, "step": 27998 }, { "epoch": 23.235684647302904, "grad_norm": 31.08536720275879, "learning_rate": 1.0710705394190873e-05, "loss": 0.3231, "step": 27999 }, { "epoch": 23.236514522821576, "grad_norm": 27.873567581176758, "learning_rate": 1.0710373443983405e-05, "loss": 0.277, "step": 28000 }, { "epoch": 23.237344398340248, "grad_norm": 78.95533752441406, "learning_rate": 1.0710041493775933e-05, "loss": 0.725, "step": 28001 }, { "epoch": 23.23817427385892, "grad_norm": 64.22943878173828, "learning_rate": 1.0709709543568466e-05, "loss": 0.3943, "step": 28002 }, { "epoch": 23.239004149377593, "grad_norm": 83.16536712646484, "learning_rate": 1.0709377593360996e-05, "loss": 0.7524, "step": 28003 }, { "epoch": 23.239834024896265, "grad_norm": 24.80068588256836, "learning_rate": 1.0709045643153528e-05, "loss": 0.4916, "step": 28004 }, { "epoch": 23.240663900414937, "grad_norm": 44.940765380859375, "learning_rate": 1.070871369294606e-05, "loss": 0.858, "step": 28005 }, { "epoch": 23.24149377593361, "grad_norm": 42.462520599365234, "learning_rate": 1.0708381742738589e-05, "loss": 0.5966, "step": 28006 }, { "epoch": 23.24232365145228, "grad_norm": 35.60562515258789, "learning_rate": 1.0708049792531121e-05, "loss": 0.3417, "step": 28007 }, { "epoch": 23.243153526970953, "grad_norm": 57.94914245605469, "learning_rate": 1.0707717842323653e-05, "loss": 0.6072, "step": 28008 }, { "epoch": 23.243983402489626, "grad_norm": 42.934627532958984, "learning_rate": 1.0707385892116185e-05, "loss": 0.4859, "step": 28009 }, { "epoch": 23.244813278008298, "grad_norm": 28.249019622802734, "learning_rate": 1.0707053941908714e-05, "loss": 0.5676, "step": 28010 }, { "epoch": 23.24564315352697, "grad_norm": 24.30139923095703, "learning_rate": 1.0706721991701246e-05, "loss": 0.3228, "step": 28011 }, { "epoch": 23.246473029045642, "grad_norm": 58.74265670776367, "learning_rate": 1.0706390041493776e-05, "loss": 0.6088, "step": 28012 }, { "epoch": 23.247302904564314, "grad_norm": 33.39267349243164, "learning_rate": 1.0706058091286309e-05, "loss": 0.4088, "step": 28013 }, { "epoch": 23.248132780082987, "grad_norm": 33.61754608154297, "learning_rate": 1.0705726141078839e-05, "loss": 0.2762, "step": 28014 }, { "epoch": 23.24896265560166, "grad_norm": 33.17902755737305, "learning_rate": 1.070539419087137e-05, "loss": 0.3188, "step": 28015 }, { "epoch": 23.24979253112033, "grad_norm": 32.46514892578125, "learning_rate": 1.0705062240663901e-05, "loss": 0.6694, "step": 28016 }, { "epoch": 23.250622406639003, "grad_norm": 76.24152374267578, "learning_rate": 1.0704730290456434e-05, "loss": 0.393, "step": 28017 }, { "epoch": 23.251452282157675, "grad_norm": 53.35352325439453, "learning_rate": 1.0704398340248962e-05, "loss": 0.5774, "step": 28018 }, { "epoch": 23.252282157676348, "grad_norm": 36.40756607055664, "learning_rate": 1.0704066390041494e-05, "loss": 0.5177, "step": 28019 }, { "epoch": 23.25311203319502, "grad_norm": 22.34984588623047, "learning_rate": 1.0703734439834027e-05, "loss": 0.3956, "step": 28020 }, { "epoch": 23.253941908713692, "grad_norm": 18.48417091369629, "learning_rate": 1.0703402489626557e-05, "loss": 0.3199, "step": 28021 }, { "epoch": 23.254771784232364, "grad_norm": 30.05733299255371, "learning_rate": 1.0703070539419087e-05, "loss": 0.3152, "step": 28022 }, { "epoch": 23.255601659751036, "grad_norm": 48.14085006713867, "learning_rate": 1.070273858921162e-05, "loss": 0.584, "step": 28023 }, { "epoch": 23.25643153526971, "grad_norm": 24.4505558013916, "learning_rate": 1.070240663900415e-05, "loss": 0.4296, "step": 28024 }, { "epoch": 23.25726141078838, "grad_norm": 34.20676040649414, "learning_rate": 1.0702074688796682e-05, "loss": 0.4916, "step": 28025 }, { "epoch": 23.258091286307053, "grad_norm": 12.83983039855957, "learning_rate": 1.070174273858921e-05, "loss": 0.2914, "step": 28026 }, { "epoch": 23.258921161825725, "grad_norm": 43.580196380615234, "learning_rate": 1.0701410788381743e-05, "loss": 0.5068, "step": 28027 }, { "epoch": 23.259751037344397, "grad_norm": 58.91569900512695, "learning_rate": 1.0701078838174275e-05, "loss": 0.6631, "step": 28028 }, { "epoch": 23.26058091286307, "grad_norm": 23.334306716918945, "learning_rate": 1.0700746887966807e-05, "loss": 0.4217, "step": 28029 }, { "epoch": 23.261410788381742, "grad_norm": 107.47772216796875, "learning_rate": 1.0700414937759337e-05, "loss": 0.6238, "step": 28030 }, { "epoch": 23.262240663900414, "grad_norm": 22.73587417602539, "learning_rate": 1.0700082987551868e-05, "loss": 0.2743, "step": 28031 }, { "epoch": 23.263070539419086, "grad_norm": 16.07663345336914, "learning_rate": 1.06997510373444e-05, "loss": 0.2289, "step": 28032 }, { "epoch": 23.26390041493776, "grad_norm": 41.55402374267578, "learning_rate": 1.069941908713693e-05, "loss": 0.5204, "step": 28033 }, { "epoch": 23.26473029045643, "grad_norm": 40.49006652832031, "learning_rate": 1.0699087136929462e-05, "loss": 0.5657, "step": 28034 }, { "epoch": 23.265560165975103, "grad_norm": 52.65394973754883, "learning_rate": 1.0698755186721991e-05, "loss": 0.2986, "step": 28035 }, { "epoch": 23.266390041493775, "grad_norm": 148.6103973388672, "learning_rate": 1.0698423236514523e-05, "loss": 0.826, "step": 28036 }, { "epoch": 23.267219917012447, "grad_norm": 23.6069393157959, "learning_rate": 1.0698091286307055e-05, "loss": 0.357, "step": 28037 }, { "epoch": 23.26804979253112, "grad_norm": 9.738588333129883, "learning_rate": 1.0697759336099588e-05, "loss": 0.2669, "step": 28038 }, { "epoch": 23.26887966804979, "grad_norm": 42.20587921142578, "learning_rate": 1.0697427385892116e-05, "loss": 0.4902, "step": 28039 }, { "epoch": 23.269709543568464, "grad_norm": 32.39096450805664, "learning_rate": 1.0697095435684648e-05, "loss": 0.288, "step": 28040 }, { "epoch": 23.270539419087136, "grad_norm": 17.657922744750977, "learning_rate": 1.0696763485477179e-05, "loss": 0.3124, "step": 28041 }, { "epoch": 23.271369294605808, "grad_norm": 21.66230010986328, "learning_rate": 1.0696431535269711e-05, "loss": 0.2944, "step": 28042 }, { "epoch": 23.27219917012448, "grad_norm": 99.50345611572266, "learning_rate": 1.0696099585062241e-05, "loss": 0.817, "step": 28043 }, { "epoch": 23.273029045643153, "grad_norm": 55.227195739746094, "learning_rate": 1.0695767634854772e-05, "loss": 0.3711, "step": 28044 }, { "epoch": 23.273858921161825, "grad_norm": 104.298828125, "learning_rate": 1.0695435684647304e-05, "loss": 0.4662, "step": 28045 }, { "epoch": 23.274688796680497, "grad_norm": 48.0404052734375, "learning_rate": 1.0695103734439836e-05, "loss": 0.3157, "step": 28046 }, { "epoch": 23.27551867219917, "grad_norm": 55.60112762451172, "learning_rate": 1.0694771784232365e-05, "loss": 0.5759, "step": 28047 }, { "epoch": 23.27634854771784, "grad_norm": 86.88318634033203, "learning_rate": 1.0694439834024897e-05, "loss": 1.1976, "step": 28048 }, { "epoch": 23.277178423236514, "grad_norm": 90.34098815917969, "learning_rate": 1.0694107883817429e-05, "loss": 0.5711, "step": 28049 }, { "epoch": 23.278008298755186, "grad_norm": 100.65440368652344, "learning_rate": 1.069377593360996e-05, "loss": 0.6119, "step": 28050 }, { "epoch": 23.278838174273858, "grad_norm": 21.712282180786133, "learning_rate": 1.069344398340249e-05, "loss": 0.3583, "step": 28051 }, { "epoch": 23.27966804979253, "grad_norm": 18.467859268188477, "learning_rate": 1.0693112033195022e-05, "loss": 0.3169, "step": 28052 }, { "epoch": 23.280497925311202, "grad_norm": 12.996496200561523, "learning_rate": 1.0692780082987552e-05, "loss": 0.193, "step": 28053 }, { "epoch": 23.281327800829875, "grad_norm": 97.5396957397461, "learning_rate": 1.0692448132780084e-05, "loss": 0.5676, "step": 28054 }, { "epoch": 23.282157676348547, "grad_norm": 18.001087188720703, "learning_rate": 1.0692116182572616e-05, "loss": 0.2908, "step": 28055 }, { "epoch": 23.28298755186722, "grad_norm": 40.016502380371094, "learning_rate": 1.0691784232365145e-05, "loss": 0.9518, "step": 28056 }, { "epoch": 23.28381742738589, "grad_norm": 16.282358169555664, "learning_rate": 1.0691452282157677e-05, "loss": 0.3364, "step": 28057 }, { "epoch": 23.284647302904563, "grad_norm": 16.917272567749023, "learning_rate": 1.069112033195021e-05, "loss": 0.3264, "step": 28058 }, { "epoch": 23.285477178423236, "grad_norm": 50.68459701538086, "learning_rate": 1.069078838174274e-05, "loss": 0.4798, "step": 28059 }, { "epoch": 23.286307053941908, "grad_norm": 26.07304573059082, "learning_rate": 1.069045643153527e-05, "loss": 0.3326, "step": 28060 }, { "epoch": 23.28713692946058, "grad_norm": 45.14698028564453, "learning_rate": 1.0690124481327802e-05, "loss": 0.3973, "step": 28061 }, { "epoch": 23.287966804979252, "grad_norm": 38.941810607910156, "learning_rate": 1.0689792531120333e-05, "loss": 0.5249, "step": 28062 }, { "epoch": 23.288796680497924, "grad_norm": 86.43437194824219, "learning_rate": 1.0689460580912865e-05, "loss": 0.6998, "step": 28063 }, { "epoch": 23.289626556016596, "grad_norm": 44.570919036865234, "learning_rate": 1.0689128630705394e-05, "loss": 0.6388, "step": 28064 }, { "epoch": 23.29045643153527, "grad_norm": 30.185274124145508, "learning_rate": 1.0688796680497926e-05, "loss": 0.4348, "step": 28065 }, { "epoch": 23.29128630705394, "grad_norm": 51.573020935058594, "learning_rate": 1.0688464730290458e-05, "loss": 0.6424, "step": 28066 }, { "epoch": 23.292116182572613, "grad_norm": 28.208829879760742, "learning_rate": 1.068813278008299e-05, "loss": 0.3307, "step": 28067 }, { "epoch": 23.292946058091285, "grad_norm": 15.685709953308105, "learning_rate": 1.0687800829875519e-05, "loss": 0.2464, "step": 28068 }, { "epoch": 23.293775933609957, "grad_norm": 37.0412483215332, "learning_rate": 1.068746887966805e-05, "loss": 0.4829, "step": 28069 }, { "epoch": 23.29460580912863, "grad_norm": 21.2952880859375, "learning_rate": 1.0687136929460583e-05, "loss": 0.2038, "step": 28070 }, { "epoch": 23.295435684647302, "grad_norm": 91.51876831054688, "learning_rate": 1.0686804979253113e-05, "loss": 0.5152, "step": 28071 }, { "epoch": 23.296265560165974, "grad_norm": 65.32322692871094, "learning_rate": 1.0686473029045644e-05, "loss": 0.6414, "step": 28072 }, { "epoch": 23.297095435684646, "grad_norm": 50.64914321899414, "learning_rate": 1.0686141078838174e-05, "loss": 0.4734, "step": 28073 }, { "epoch": 23.29792531120332, "grad_norm": 64.38713836669922, "learning_rate": 1.0685809128630706e-05, "loss": 0.5313, "step": 28074 }, { "epoch": 23.29875518672199, "grad_norm": 14.181396484375, "learning_rate": 1.0685477178423238e-05, "loss": 0.2481, "step": 28075 }, { "epoch": 23.299585062240663, "grad_norm": 65.48686218261719, "learning_rate": 1.0685145228215767e-05, "loss": 0.4056, "step": 28076 }, { "epoch": 23.300414937759335, "grad_norm": 288.764892578125, "learning_rate": 1.0684813278008299e-05, "loss": 0.6864, "step": 28077 }, { "epoch": 23.301244813278007, "grad_norm": 54.10238265991211, "learning_rate": 1.0684481327800831e-05, "loss": 0.2301, "step": 28078 }, { "epoch": 23.30207468879668, "grad_norm": 184.2733154296875, "learning_rate": 1.0684149377593363e-05, "loss": 0.7698, "step": 28079 }, { "epoch": 23.30290456431535, "grad_norm": 12.962462425231934, "learning_rate": 1.0683817427385892e-05, "loss": 0.2868, "step": 28080 }, { "epoch": 23.303734439834024, "grad_norm": 20.28892707824707, "learning_rate": 1.0683485477178424e-05, "loss": 0.2512, "step": 28081 }, { "epoch": 23.304564315352696, "grad_norm": 41.35421371459961, "learning_rate": 1.0683153526970955e-05, "loss": 0.3409, "step": 28082 }, { "epoch": 23.305394190871368, "grad_norm": 61.5411491394043, "learning_rate": 1.0682821576763487e-05, "loss": 0.2373, "step": 28083 }, { "epoch": 23.30622406639004, "grad_norm": 34.4407844543457, "learning_rate": 1.0682489626556019e-05, "loss": 0.3324, "step": 28084 }, { "epoch": 23.307053941908713, "grad_norm": 47.525901794433594, "learning_rate": 1.0682157676348547e-05, "loss": 0.5759, "step": 28085 }, { "epoch": 23.307883817427385, "grad_norm": 24.744415283203125, "learning_rate": 1.068182572614108e-05, "loss": 0.398, "step": 28086 }, { "epoch": 23.308713692946057, "grad_norm": 178.48568725585938, "learning_rate": 1.0681493775933612e-05, "loss": 0.6156, "step": 28087 }, { "epoch": 23.30954356846473, "grad_norm": 44.41511535644531, "learning_rate": 1.0681161825726142e-05, "loss": 0.3219, "step": 28088 }, { "epoch": 23.3103734439834, "grad_norm": 22.66537857055664, "learning_rate": 1.0680829875518673e-05, "loss": 0.2625, "step": 28089 }, { "epoch": 23.311203319502074, "grad_norm": 63.59382247924805, "learning_rate": 1.0680497925311205e-05, "loss": 0.4357, "step": 28090 }, { "epoch": 23.312033195020746, "grad_norm": 25.705547332763672, "learning_rate": 1.0680165975103735e-05, "loss": 0.4789, "step": 28091 }, { "epoch": 23.312863070539418, "grad_norm": 49.507659912109375, "learning_rate": 1.0679834024896267e-05, "loss": 0.7348, "step": 28092 }, { "epoch": 23.31369294605809, "grad_norm": 47.57033920288086, "learning_rate": 1.0679502074688798e-05, "loss": 0.5901, "step": 28093 }, { "epoch": 23.314522821576762, "grad_norm": 137.71975708007812, "learning_rate": 1.0679170124481328e-05, "loss": 0.4638, "step": 28094 }, { "epoch": 23.315352697095435, "grad_norm": 34.57432174682617, "learning_rate": 1.067883817427386e-05, "loss": 0.3846, "step": 28095 }, { "epoch": 23.316182572614107, "grad_norm": 46.09068298339844, "learning_rate": 1.0678506224066392e-05, "loss": 0.5832, "step": 28096 }, { "epoch": 23.31701244813278, "grad_norm": 43.123809814453125, "learning_rate": 1.0678174273858921e-05, "loss": 0.6471, "step": 28097 }, { "epoch": 23.31784232365145, "grad_norm": 25.106813430786133, "learning_rate": 1.0677842323651453e-05, "loss": 0.3256, "step": 28098 }, { "epoch": 23.318672199170123, "grad_norm": 72.03942108154297, "learning_rate": 1.0677510373443985e-05, "loss": 0.4647, "step": 28099 }, { "epoch": 23.319502074688796, "grad_norm": 8.93327522277832, "learning_rate": 1.0677178423236516e-05, "loss": 0.2978, "step": 28100 }, { "epoch": 23.320331950207468, "grad_norm": 16.163837432861328, "learning_rate": 1.0676846473029046e-05, "loss": 0.3269, "step": 28101 }, { "epoch": 23.32116182572614, "grad_norm": 11.086896896362305, "learning_rate": 1.0676514522821576e-05, "loss": 0.2332, "step": 28102 }, { "epoch": 23.321991701244812, "grad_norm": 32.54924774169922, "learning_rate": 1.0676182572614108e-05, "loss": 0.7232, "step": 28103 }, { "epoch": 23.322821576763484, "grad_norm": 100.60527038574219, "learning_rate": 1.067585062240664e-05, "loss": 1.2892, "step": 28104 }, { "epoch": 23.323651452282157, "grad_norm": 51.945133209228516, "learning_rate": 1.067551867219917e-05, "loss": 0.67, "step": 28105 }, { "epoch": 23.32448132780083, "grad_norm": 41.11182403564453, "learning_rate": 1.0675186721991701e-05, "loss": 0.4883, "step": 28106 }, { "epoch": 23.3253112033195, "grad_norm": 61.59062576293945, "learning_rate": 1.0674854771784234e-05, "loss": 0.6665, "step": 28107 }, { "epoch": 23.326141078838173, "grad_norm": 29.35215187072754, "learning_rate": 1.0674522821576766e-05, "loss": 0.5964, "step": 28108 }, { "epoch": 23.326970954356845, "grad_norm": 37.01003646850586, "learning_rate": 1.0674190871369296e-05, "loss": 0.6923, "step": 28109 }, { "epoch": 23.327800829875518, "grad_norm": 27.635976791381836, "learning_rate": 1.0673858921161826e-05, "loss": 0.5088, "step": 28110 }, { "epoch": 23.32863070539419, "grad_norm": 25.23535919189453, "learning_rate": 1.0673526970954357e-05, "loss": 0.5048, "step": 28111 }, { "epoch": 23.329460580912862, "grad_norm": 15.077406883239746, "learning_rate": 1.0673195020746889e-05, "loss": 0.389, "step": 28112 }, { "epoch": 23.330290456431534, "grad_norm": 52.474945068359375, "learning_rate": 1.0672863070539421e-05, "loss": 0.4127, "step": 28113 }, { "epoch": 23.331120331950206, "grad_norm": 14.407707214355469, "learning_rate": 1.067253112033195e-05, "loss": 0.2488, "step": 28114 }, { "epoch": 23.33195020746888, "grad_norm": 24.814395904541016, "learning_rate": 1.0672199170124482e-05, "loss": 0.2805, "step": 28115 }, { "epoch": 23.33278008298755, "grad_norm": 34.58351135253906, "learning_rate": 1.0671867219917014e-05, "loss": 0.5759, "step": 28116 }, { "epoch": 23.333609958506223, "grad_norm": 18.571430206298828, "learning_rate": 1.0671535269709546e-05, "loss": 0.3495, "step": 28117 }, { "epoch": 23.334439834024895, "grad_norm": 70.15143585205078, "learning_rate": 1.0671203319502075e-05, "loss": 0.8731, "step": 28118 }, { "epoch": 23.335269709543567, "grad_norm": 50.1343879699707, "learning_rate": 1.0670871369294607e-05, "loss": 0.3395, "step": 28119 }, { "epoch": 23.33609958506224, "grad_norm": 34.425331115722656, "learning_rate": 1.0670539419087137e-05, "loss": 0.4945, "step": 28120 }, { "epoch": 23.33692946058091, "grad_norm": 53.422584533691406, "learning_rate": 1.067020746887967e-05, "loss": 0.5098, "step": 28121 }, { "epoch": 23.337759336099584, "grad_norm": 34.96653366088867, "learning_rate": 1.06698755186722e-05, "loss": 0.4837, "step": 28122 }, { "epoch": 23.338589211618256, "grad_norm": 40.03837585449219, "learning_rate": 1.066954356846473e-05, "loss": 0.3775, "step": 28123 }, { "epoch": 23.33941908713693, "grad_norm": 30.789613723754883, "learning_rate": 1.0669211618257262e-05, "loss": 0.4545, "step": 28124 }, { "epoch": 23.3402489626556, "grad_norm": 44.34955978393555, "learning_rate": 1.0668879668049795e-05, "loss": 0.5753, "step": 28125 }, { "epoch": 23.341078838174273, "grad_norm": 32.129112243652344, "learning_rate": 1.0668547717842323e-05, "loss": 0.3356, "step": 28126 }, { "epoch": 23.341908713692945, "grad_norm": 24.775156021118164, "learning_rate": 1.0668215767634855e-05, "loss": 0.6478, "step": 28127 }, { "epoch": 23.342738589211617, "grad_norm": 59.28578186035156, "learning_rate": 1.0667883817427387e-05, "loss": 0.4952, "step": 28128 }, { "epoch": 23.34356846473029, "grad_norm": 24.7280330657959, "learning_rate": 1.0667551867219918e-05, "loss": 0.5105, "step": 28129 }, { "epoch": 23.34439834024896, "grad_norm": 55.012142181396484, "learning_rate": 1.0667219917012448e-05, "loss": 0.8512, "step": 28130 }, { "epoch": 23.345228215767634, "grad_norm": 71.87937927246094, "learning_rate": 1.066688796680498e-05, "loss": 0.4001, "step": 28131 }, { "epoch": 23.346058091286306, "grad_norm": 44.29928970336914, "learning_rate": 1.066655601659751e-05, "loss": 0.554, "step": 28132 }, { "epoch": 23.346887966804978, "grad_norm": 41.22327423095703, "learning_rate": 1.0666224066390043e-05, "loss": 0.6048, "step": 28133 }, { "epoch": 23.34771784232365, "grad_norm": 53.37971115112305, "learning_rate": 1.0665892116182575e-05, "loss": 0.7578, "step": 28134 }, { "epoch": 23.348547717842322, "grad_norm": 42.538333892822266, "learning_rate": 1.0665560165975104e-05, "loss": 0.5341, "step": 28135 }, { "epoch": 23.349377593360995, "grad_norm": 11.488569259643555, "learning_rate": 1.0665228215767636e-05, "loss": 0.362, "step": 28136 }, { "epoch": 23.350207468879667, "grad_norm": 41.39192199707031, "learning_rate": 1.0664896265560168e-05, "loss": 0.2885, "step": 28137 }, { "epoch": 23.35103734439834, "grad_norm": 25.092016220092773, "learning_rate": 1.0664564315352698e-05, "loss": 0.2969, "step": 28138 }, { "epoch": 23.35186721991701, "grad_norm": 12.398268699645996, "learning_rate": 1.0664232365145229e-05, "loss": 0.1923, "step": 28139 }, { "epoch": 23.352697095435683, "grad_norm": 24.672588348388672, "learning_rate": 1.0663900414937761e-05, "loss": 0.3378, "step": 28140 }, { "epoch": 23.353526970954356, "grad_norm": 30.80741310119629, "learning_rate": 1.0663568464730291e-05, "loss": 0.4495, "step": 28141 }, { "epoch": 23.354356846473028, "grad_norm": 13.184920310974121, "learning_rate": 1.0663236514522823e-05, "loss": 0.235, "step": 28142 }, { "epoch": 23.3551867219917, "grad_norm": 18.613012313842773, "learning_rate": 1.0662904564315352e-05, "loss": 0.2621, "step": 28143 }, { "epoch": 23.356016597510372, "grad_norm": 43.966896057128906, "learning_rate": 1.0662572614107884e-05, "loss": 0.8868, "step": 28144 }, { "epoch": 23.356846473029044, "grad_norm": 37.69795608520508, "learning_rate": 1.0662240663900416e-05, "loss": 0.8252, "step": 28145 }, { "epoch": 23.357676348547717, "grad_norm": 47.73774337768555, "learning_rate": 1.0661908713692948e-05, "loss": 0.8525, "step": 28146 }, { "epoch": 23.35850622406639, "grad_norm": 24.1431827545166, "learning_rate": 1.0661576763485477e-05, "loss": 0.4236, "step": 28147 }, { "epoch": 23.35933609958506, "grad_norm": 22.825233459472656, "learning_rate": 1.066124481327801e-05, "loss": 0.3711, "step": 28148 }, { "epoch": 23.360165975103733, "grad_norm": 29.49214744567871, "learning_rate": 1.066091286307054e-05, "loss": 0.2882, "step": 28149 }, { "epoch": 23.360995850622405, "grad_norm": 21.16193199157715, "learning_rate": 1.0660580912863072e-05, "loss": 0.4896, "step": 28150 }, { "epoch": 23.361825726141078, "grad_norm": 73.12174987792969, "learning_rate": 1.0660248962655602e-05, "loss": 0.6156, "step": 28151 }, { "epoch": 23.36265560165975, "grad_norm": 50.89143753051758, "learning_rate": 1.0659917012448133e-05, "loss": 0.4074, "step": 28152 }, { "epoch": 23.363485477178422, "grad_norm": 19.47438621520996, "learning_rate": 1.0659585062240665e-05, "loss": 0.2547, "step": 28153 }, { "epoch": 23.364315352697094, "grad_norm": 48.440555572509766, "learning_rate": 1.0659253112033197e-05, "loss": 0.3556, "step": 28154 }, { "epoch": 23.365145228215766, "grad_norm": 40.39576721191406, "learning_rate": 1.0658921161825726e-05, "loss": 0.3797, "step": 28155 }, { "epoch": 23.36597510373444, "grad_norm": 117.3065414428711, "learning_rate": 1.0658589211618258e-05, "loss": 0.609, "step": 28156 }, { "epoch": 23.36680497925311, "grad_norm": 23.984453201293945, "learning_rate": 1.065825726141079e-05, "loss": 0.4907, "step": 28157 }, { "epoch": 23.367634854771783, "grad_norm": 60.30372619628906, "learning_rate": 1.065792531120332e-05, "loss": 0.3434, "step": 28158 }, { "epoch": 23.368464730290455, "grad_norm": 57.71030807495117, "learning_rate": 1.065759336099585e-05, "loss": 0.3993, "step": 28159 }, { "epoch": 23.369294605809127, "grad_norm": 78.01469421386719, "learning_rate": 1.0657261410788383e-05, "loss": 0.4774, "step": 28160 }, { "epoch": 23.3701244813278, "grad_norm": 53.77218246459961, "learning_rate": 1.0656929460580913e-05, "loss": 0.3283, "step": 28161 }, { "epoch": 23.37095435684647, "grad_norm": 53.29444122314453, "learning_rate": 1.0656597510373445e-05, "loss": 0.65, "step": 28162 }, { "epoch": 23.371784232365144, "grad_norm": 34.40705108642578, "learning_rate": 1.0656265560165977e-05, "loss": 0.5681, "step": 28163 }, { "epoch": 23.372614107883816, "grad_norm": 72.58857727050781, "learning_rate": 1.0655933609958506e-05, "loss": 0.9604, "step": 28164 }, { "epoch": 23.37344398340249, "grad_norm": 17.42185401916504, "learning_rate": 1.0655601659751038e-05, "loss": 0.3012, "step": 28165 }, { "epoch": 23.37427385892116, "grad_norm": 60.93709945678711, "learning_rate": 1.065526970954357e-05, "loss": 0.3346, "step": 28166 }, { "epoch": 23.375103734439833, "grad_norm": 58.94135284423828, "learning_rate": 1.06549377593361e-05, "loss": 0.8566, "step": 28167 }, { "epoch": 23.375933609958505, "grad_norm": 107.89613342285156, "learning_rate": 1.0654605809128631e-05, "loss": 0.4598, "step": 28168 }, { "epoch": 23.376763485477177, "grad_norm": 39.46042251586914, "learning_rate": 1.0654273858921163e-05, "loss": 0.7118, "step": 28169 }, { "epoch": 23.37759336099585, "grad_norm": 54.34628677368164, "learning_rate": 1.0653941908713694e-05, "loss": 0.4896, "step": 28170 }, { "epoch": 23.37842323651452, "grad_norm": 61.022789001464844, "learning_rate": 1.0653609958506226e-05, "loss": 0.6356, "step": 28171 }, { "epoch": 23.379253112033194, "grad_norm": 27.591880798339844, "learning_rate": 1.0653278008298754e-05, "loss": 0.3714, "step": 28172 }, { "epoch": 23.380082987551866, "grad_norm": 127.64363098144531, "learning_rate": 1.0652946058091287e-05, "loss": 0.6401, "step": 28173 }, { "epoch": 23.380912863070538, "grad_norm": 111.78471374511719, "learning_rate": 1.0652614107883819e-05, "loss": 0.899, "step": 28174 }, { "epoch": 23.38174273858921, "grad_norm": 43.67997741699219, "learning_rate": 1.065228215767635e-05, "loss": 0.3734, "step": 28175 }, { "epoch": 23.382572614107882, "grad_norm": 24.07756805419922, "learning_rate": 1.065195020746888e-05, "loss": 0.421, "step": 28176 }, { "epoch": 23.383402489626555, "grad_norm": 50.596614837646484, "learning_rate": 1.0651618257261412e-05, "loss": 0.6434, "step": 28177 }, { "epoch": 23.384232365145227, "grad_norm": 29.718454360961914, "learning_rate": 1.0651286307053944e-05, "loss": 0.3497, "step": 28178 }, { "epoch": 23.3850622406639, "grad_norm": 18.216848373413086, "learning_rate": 1.0650954356846474e-05, "loss": 0.431, "step": 28179 }, { "epoch": 23.38589211618257, "grad_norm": 45.56576919555664, "learning_rate": 1.0650622406639005e-05, "loss": 0.3946, "step": 28180 }, { "epoch": 23.386721991701243, "grad_norm": 29.099647521972656, "learning_rate": 1.0650290456431535e-05, "loss": 0.4567, "step": 28181 }, { "epoch": 23.387551867219916, "grad_norm": 50.99724197387695, "learning_rate": 1.0649958506224067e-05, "loss": 0.5181, "step": 28182 }, { "epoch": 23.388381742738588, "grad_norm": 43.59888458251953, "learning_rate": 1.0649626556016599e-05, "loss": 0.6512, "step": 28183 }, { "epoch": 23.38921161825726, "grad_norm": 68.39552307128906, "learning_rate": 1.0649294605809128e-05, "loss": 0.4801, "step": 28184 }, { "epoch": 23.390041493775932, "grad_norm": 28.14064598083496, "learning_rate": 1.064896265560166e-05, "loss": 0.545, "step": 28185 }, { "epoch": 23.390871369294604, "grad_norm": 27.930456161499023, "learning_rate": 1.0648630705394192e-05, "loss": 0.399, "step": 28186 }, { "epoch": 23.391701244813277, "grad_norm": 40.72842788696289, "learning_rate": 1.0648298755186724e-05, "loss": 0.6382, "step": 28187 }, { "epoch": 23.39253112033195, "grad_norm": 98.51367950439453, "learning_rate": 1.0647966804979255e-05, "loss": 0.3418, "step": 28188 }, { "epoch": 23.39336099585062, "grad_norm": 61.683536529541016, "learning_rate": 1.0647634854771785e-05, "loss": 0.4275, "step": 28189 }, { "epoch": 23.394190871369293, "grad_norm": 45.089202880859375, "learning_rate": 1.0647302904564315e-05, "loss": 0.8425, "step": 28190 }, { "epoch": 23.395020746887965, "grad_norm": 17.132522583007812, "learning_rate": 1.0646970954356848e-05, "loss": 0.2125, "step": 28191 }, { "epoch": 23.395850622406638, "grad_norm": 39.84508514404297, "learning_rate": 1.064663900414938e-05, "loss": 0.3845, "step": 28192 }, { "epoch": 23.39668049792531, "grad_norm": 31.870237350463867, "learning_rate": 1.0646307053941908e-05, "loss": 0.3596, "step": 28193 }, { "epoch": 23.397510373443982, "grad_norm": 69.44148254394531, "learning_rate": 1.064597510373444e-05, "loss": 0.496, "step": 28194 }, { "epoch": 23.398340248962654, "grad_norm": 31.043161392211914, "learning_rate": 1.0645643153526973e-05, "loss": 0.3915, "step": 28195 }, { "epoch": 23.399170124481326, "grad_norm": 67.81697845458984, "learning_rate": 1.0645311203319505e-05, "loss": 0.8641, "step": 28196 }, { "epoch": 23.4, "grad_norm": 26.19744873046875, "learning_rate": 1.0644979253112033e-05, "loss": 0.2526, "step": 28197 }, { "epoch": 23.40082987551867, "grad_norm": 133.5641326904297, "learning_rate": 1.0644647302904566e-05, "loss": 0.3959, "step": 28198 }, { "epoch": 23.401659751037343, "grad_norm": 56.97390365600586, "learning_rate": 1.0644315352697096e-05, "loss": 0.334, "step": 28199 }, { "epoch": 23.402489626556015, "grad_norm": 19.868661880493164, "learning_rate": 1.0643983402489628e-05, "loss": 0.2933, "step": 28200 }, { "epoch": 23.403319502074687, "grad_norm": 65.04692077636719, "learning_rate": 1.0643651452282158e-05, "loss": 0.5233, "step": 28201 }, { "epoch": 23.40414937759336, "grad_norm": 81.15592956542969, "learning_rate": 1.0643319502074689e-05, "loss": 0.7711, "step": 28202 }, { "epoch": 23.40497925311203, "grad_norm": 38.16195297241211, "learning_rate": 1.0642987551867221e-05, "loss": 0.4434, "step": 28203 }, { "epoch": 23.405809128630704, "grad_norm": 10.59709358215332, "learning_rate": 1.0642655601659753e-05, "loss": 0.2271, "step": 28204 }, { "epoch": 23.406639004149376, "grad_norm": 52.730369567871094, "learning_rate": 1.0642323651452282e-05, "loss": 0.8554, "step": 28205 }, { "epoch": 23.40746887966805, "grad_norm": 43.337432861328125, "learning_rate": 1.0641991701244814e-05, "loss": 0.8059, "step": 28206 }, { "epoch": 23.40829875518672, "grad_norm": 102.15595245361328, "learning_rate": 1.0641659751037346e-05, "loss": 0.3861, "step": 28207 }, { "epoch": 23.409128630705393, "grad_norm": 43.862239837646484, "learning_rate": 1.0641327800829876e-05, "loss": 0.7393, "step": 28208 }, { "epoch": 23.409958506224065, "grad_norm": 71.99832153320312, "learning_rate": 1.0640995850622407e-05, "loss": 0.6671, "step": 28209 }, { "epoch": 23.410788381742737, "grad_norm": 50.28156661987305, "learning_rate": 1.0640663900414939e-05, "loss": 0.6715, "step": 28210 }, { "epoch": 23.41161825726141, "grad_norm": 14.595813751220703, "learning_rate": 1.064033195020747e-05, "loss": 0.272, "step": 28211 }, { "epoch": 23.41244813278008, "grad_norm": 31.032516479492188, "learning_rate": 1.0640000000000001e-05, "loss": 0.3978, "step": 28212 }, { "epoch": 23.413278008298754, "grad_norm": 13.079211235046387, "learning_rate": 1.0639668049792534e-05, "loss": 0.2622, "step": 28213 }, { "epoch": 23.414107883817426, "grad_norm": 38.099639892578125, "learning_rate": 1.0639336099585062e-05, "loss": 0.4455, "step": 28214 }, { "epoch": 23.414937759336098, "grad_norm": 25.269649505615234, "learning_rate": 1.0639004149377594e-05, "loss": 0.3277, "step": 28215 }, { "epoch": 23.41576763485477, "grad_norm": 21.88813591003418, "learning_rate": 1.0638672199170127e-05, "loss": 0.2609, "step": 28216 }, { "epoch": 23.416597510373443, "grad_norm": 34.97628402709961, "learning_rate": 1.0638340248962657e-05, "loss": 0.6855, "step": 28217 }, { "epoch": 23.417427385892115, "grad_norm": 28.722747802734375, "learning_rate": 1.0638008298755187e-05, "loss": 0.3781, "step": 28218 }, { "epoch": 23.418257261410787, "grad_norm": 58.74469757080078, "learning_rate": 1.0637676348547718e-05, "loss": 0.7232, "step": 28219 }, { "epoch": 23.41908713692946, "grad_norm": 39.23983383178711, "learning_rate": 1.063734439834025e-05, "loss": 0.4231, "step": 28220 }, { "epoch": 23.41991701244813, "grad_norm": 14.842290878295898, "learning_rate": 1.0637012448132782e-05, "loss": 0.2901, "step": 28221 }, { "epoch": 23.420746887966803, "grad_norm": 52.893428802490234, "learning_rate": 1.063668049792531e-05, "loss": 0.8477, "step": 28222 }, { "epoch": 23.421576763485476, "grad_norm": 31.846355438232422, "learning_rate": 1.0636348547717843e-05, "loss": 0.4888, "step": 28223 }, { "epoch": 23.422406639004148, "grad_norm": 42.44023895263672, "learning_rate": 1.0636016597510375e-05, "loss": 0.4529, "step": 28224 }, { "epoch": 23.42323651452282, "grad_norm": 17.344406127929688, "learning_rate": 1.0635684647302907e-05, "loss": 0.271, "step": 28225 }, { "epoch": 23.424066390041492, "grad_norm": 39.04631042480469, "learning_rate": 1.0635352697095436e-05, "loss": 0.3297, "step": 28226 }, { "epoch": 23.424896265560164, "grad_norm": 47.92350387573242, "learning_rate": 1.0635020746887968e-05, "loss": 0.6298, "step": 28227 }, { "epoch": 23.425726141078837, "grad_norm": 23.351167678833008, "learning_rate": 1.0634688796680498e-05, "loss": 0.3621, "step": 28228 }, { "epoch": 23.42655601659751, "grad_norm": 46.972129821777344, "learning_rate": 1.063435684647303e-05, "loss": 0.4029, "step": 28229 }, { "epoch": 23.42738589211618, "grad_norm": 13.523698806762695, "learning_rate": 1.063402489626556e-05, "loss": 0.2771, "step": 28230 }, { "epoch": 23.428215767634853, "grad_norm": 57.26242446899414, "learning_rate": 1.0633692946058091e-05, "loss": 1.1628, "step": 28231 }, { "epoch": 23.429045643153525, "grad_norm": 36.790584564208984, "learning_rate": 1.0633360995850623e-05, "loss": 0.5293, "step": 28232 }, { "epoch": 23.429875518672198, "grad_norm": 27.897003173828125, "learning_rate": 1.0633029045643155e-05, "loss": 0.4581, "step": 28233 }, { "epoch": 23.43070539419087, "grad_norm": 88.84564208984375, "learning_rate": 1.0632697095435684e-05, "loss": 0.5118, "step": 28234 }, { "epoch": 23.431535269709542, "grad_norm": 30.089447021484375, "learning_rate": 1.0632365145228216e-05, "loss": 0.4649, "step": 28235 }, { "epoch": 23.432365145228214, "grad_norm": 71.36152648925781, "learning_rate": 1.0632033195020748e-05, "loss": 0.8466, "step": 28236 }, { "epoch": 23.433195020746886, "grad_norm": 59.79035568237305, "learning_rate": 1.0631701244813279e-05, "loss": 0.4321, "step": 28237 }, { "epoch": 23.43402489626556, "grad_norm": 45.77546310424805, "learning_rate": 1.063136929460581e-05, "loss": 0.4301, "step": 28238 }, { "epoch": 23.43485477178423, "grad_norm": 12.04668140411377, "learning_rate": 1.0631037344398341e-05, "loss": 0.2136, "step": 28239 }, { "epoch": 23.435684647302903, "grad_norm": 16.89421272277832, "learning_rate": 1.0630705394190872e-05, "loss": 0.3288, "step": 28240 }, { "epoch": 23.436514522821575, "grad_norm": 80.6964111328125, "learning_rate": 1.0630373443983404e-05, "loss": 0.4687, "step": 28241 }, { "epoch": 23.437344398340247, "grad_norm": 15.131956100463867, "learning_rate": 1.0630041493775936e-05, "loss": 0.2478, "step": 28242 }, { "epoch": 23.43817427385892, "grad_norm": 32.23851013183594, "learning_rate": 1.0629709543568465e-05, "loss": 0.4265, "step": 28243 }, { "epoch": 23.439004149377592, "grad_norm": 16.771320343017578, "learning_rate": 1.0629377593360997e-05, "loss": 0.3034, "step": 28244 }, { "epoch": 23.439834024896264, "grad_norm": 42.51218795776367, "learning_rate": 1.0629045643153529e-05, "loss": 0.5521, "step": 28245 }, { "epoch": 23.440663900414936, "grad_norm": 53.53926086425781, "learning_rate": 1.062871369294606e-05, "loss": 0.5728, "step": 28246 }, { "epoch": 23.44149377593361, "grad_norm": 26.27942657470703, "learning_rate": 1.062838174273859e-05, "loss": 0.5338, "step": 28247 }, { "epoch": 23.44232365145228, "grad_norm": 14.10272216796875, "learning_rate": 1.0628049792531122e-05, "loss": 0.2814, "step": 28248 }, { "epoch": 23.443153526970953, "grad_norm": 71.8402328491211, "learning_rate": 1.0627717842323652e-05, "loss": 0.3148, "step": 28249 }, { "epoch": 23.443983402489625, "grad_norm": 116.32501220703125, "learning_rate": 1.0627385892116184e-05, "loss": 0.4948, "step": 28250 }, { "epoch": 23.444813278008297, "grad_norm": 51.48579025268555, "learning_rate": 1.0627053941908713e-05, "loss": 0.5621, "step": 28251 }, { "epoch": 23.44564315352697, "grad_norm": 61.353614807128906, "learning_rate": 1.0626721991701245e-05, "loss": 0.7511, "step": 28252 }, { "epoch": 23.44647302904564, "grad_norm": 107.67402648925781, "learning_rate": 1.0626390041493777e-05, "loss": 0.8154, "step": 28253 }, { "epoch": 23.447302904564314, "grad_norm": 20.29522705078125, "learning_rate": 1.062605809128631e-05, "loss": 0.3057, "step": 28254 }, { "epoch": 23.448132780082986, "grad_norm": 54.283416748046875, "learning_rate": 1.0625726141078838e-05, "loss": 0.361, "step": 28255 }, { "epoch": 23.448962655601658, "grad_norm": 16.972034454345703, "learning_rate": 1.062539419087137e-05, "loss": 0.3644, "step": 28256 }, { "epoch": 23.44979253112033, "grad_norm": 10.059671401977539, "learning_rate": 1.0625062240663902e-05, "loss": 0.2596, "step": 28257 }, { "epoch": 23.450622406639003, "grad_norm": 71.48289489746094, "learning_rate": 1.0624730290456433e-05, "loss": 0.5787, "step": 28258 }, { "epoch": 23.451452282157675, "grad_norm": 61.47054672241211, "learning_rate": 1.0624398340248963e-05, "loss": 1.2665, "step": 28259 }, { "epoch": 23.452282157676347, "grad_norm": 19.522947311401367, "learning_rate": 1.0624066390041494e-05, "loss": 0.3404, "step": 28260 }, { "epoch": 23.45311203319502, "grad_norm": 81.14774322509766, "learning_rate": 1.0623734439834026e-05, "loss": 0.531, "step": 28261 }, { "epoch": 23.45394190871369, "grad_norm": 24.916004180908203, "learning_rate": 1.0623402489626558e-05, "loss": 0.3053, "step": 28262 }, { "epoch": 23.454771784232364, "grad_norm": 38.479312896728516, "learning_rate": 1.0623070539419086e-05, "loss": 0.4661, "step": 28263 }, { "epoch": 23.455601659751036, "grad_norm": 79.41800689697266, "learning_rate": 1.0622738589211619e-05, "loss": 0.675, "step": 28264 }, { "epoch": 23.456431535269708, "grad_norm": 15.825111389160156, "learning_rate": 1.062240663900415e-05, "loss": 0.2965, "step": 28265 }, { "epoch": 23.45726141078838, "grad_norm": 43.035945892333984, "learning_rate": 1.0622074688796681e-05, "loss": 0.7811, "step": 28266 }, { "epoch": 23.458091286307052, "grad_norm": 16.2401180267334, "learning_rate": 1.0621742738589213e-05, "loss": 0.3066, "step": 28267 }, { "epoch": 23.458921161825725, "grad_norm": 85.91749572753906, "learning_rate": 1.0621410788381744e-05, "loss": 0.4415, "step": 28268 }, { "epoch": 23.459751037344397, "grad_norm": 143.4710693359375, "learning_rate": 1.0621078838174274e-05, "loss": 1.0038, "step": 28269 }, { "epoch": 23.46058091286307, "grad_norm": 36.56217575073242, "learning_rate": 1.0620746887966806e-05, "loss": 0.4513, "step": 28270 }, { "epoch": 23.46141078838174, "grad_norm": 11.390599250793457, "learning_rate": 1.0620414937759338e-05, "loss": 0.195, "step": 28271 }, { "epoch": 23.462240663900413, "grad_norm": 54.48493576049805, "learning_rate": 1.0620082987551867e-05, "loss": 0.5, "step": 28272 }, { "epoch": 23.463070539419085, "grad_norm": 39.51022720336914, "learning_rate": 1.0619751037344399e-05, "loss": 0.4949, "step": 28273 }, { "epoch": 23.463900414937758, "grad_norm": 40.66103744506836, "learning_rate": 1.0619419087136931e-05, "loss": 0.3514, "step": 28274 }, { "epoch": 23.46473029045643, "grad_norm": 57.93920135498047, "learning_rate": 1.0619087136929462e-05, "loss": 0.5915, "step": 28275 }, { "epoch": 23.465560165975102, "grad_norm": 16.439498901367188, "learning_rate": 1.0618755186721992e-05, "loss": 0.3855, "step": 28276 }, { "epoch": 23.466390041493774, "grad_norm": 27.752710342407227, "learning_rate": 1.0618423236514524e-05, "loss": 0.3341, "step": 28277 }, { "epoch": 23.467219917012446, "grad_norm": 26.72471809387207, "learning_rate": 1.0618091286307055e-05, "loss": 0.3646, "step": 28278 }, { "epoch": 23.46804979253112, "grad_norm": 102.77103424072266, "learning_rate": 1.0617759336099587e-05, "loss": 1.3627, "step": 28279 }, { "epoch": 23.46887966804979, "grad_norm": 22.035524368286133, "learning_rate": 1.0617427385892117e-05, "loss": 0.2725, "step": 28280 }, { "epoch": 23.469709543568463, "grad_norm": 105.48863983154297, "learning_rate": 1.0617095435684647e-05, "loss": 0.5527, "step": 28281 }, { "epoch": 23.470539419087135, "grad_norm": 81.55166625976562, "learning_rate": 1.061676348547718e-05, "loss": 0.8161, "step": 28282 }, { "epoch": 23.471369294605807, "grad_norm": 63.09757614135742, "learning_rate": 1.0616431535269712e-05, "loss": 0.2191, "step": 28283 }, { "epoch": 23.47219917012448, "grad_norm": 60.408164978027344, "learning_rate": 1.061609958506224e-05, "loss": 0.7773, "step": 28284 }, { "epoch": 23.473029045643152, "grad_norm": 15.369781494140625, "learning_rate": 1.0615767634854772e-05, "loss": 0.2706, "step": 28285 }, { "epoch": 23.473858921161824, "grad_norm": 73.59441375732422, "learning_rate": 1.0615435684647305e-05, "loss": 0.5986, "step": 28286 }, { "epoch": 23.474688796680496, "grad_norm": 54.96472930908203, "learning_rate": 1.0615103734439835e-05, "loss": 0.4672, "step": 28287 }, { "epoch": 23.47551867219917, "grad_norm": 34.58168029785156, "learning_rate": 1.0614771784232365e-05, "loss": 0.4439, "step": 28288 }, { "epoch": 23.47634854771784, "grad_norm": 47.83342742919922, "learning_rate": 1.0614439834024896e-05, "loss": 0.57, "step": 28289 }, { "epoch": 23.477178423236513, "grad_norm": 23.043312072753906, "learning_rate": 1.0614107883817428e-05, "loss": 0.3379, "step": 28290 }, { "epoch": 23.478008298755185, "grad_norm": 26.973176956176758, "learning_rate": 1.061377593360996e-05, "loss": 0.4993, "step": 28291 }, { "epoch": 23.478838174273857, "grad_norm": 23.89747428894043, "learning_rate": 1.0613443983402492e-05, "loss": 0.2471, "step": 28292 }, { "epoch": 23.47966804979253, "grad_norm": 27.375591278076172, "learning_rate": 1.0613112033195021e-05, "loss": 0.4336, "step": 28293 }, { "epoch": 23.4804979253112, "grad_norm": 219.10447692871094, "learning_rate": 1.0612780082987553e-05, "loss": 0.6488, "step": 28294 }, { "epoch": 23.481327800829874, "grad_norm": 101.12503814697266, "learning_rate": 1.0612448132780085e-05, "loss": 0.3602, "step": 28295 }, { "epoch": 23.482157676348546, "grad_norm": 35.10409927368164, "learning_rate": 1.0612116182572615e-05, "loss": 0.4289, "step": 28296 }, { "epoch": 23.482987551867218, "grad_norm": 61.537742614746094, "learning_rate": 1.0611784232365146e-05, "loss": 0.7173, "step": 28297 }, { "epoch": 23.48381742738589, "grad_norm": 31.50737762451172, "learning_rate": 1.0611452282157676e-05, "loss": 0.3156, "step": 28298 }, { "epoch": 23.484647302904563, "grad_norm": 31.658048629760742, "learning_rate": 1.0611120331950208e-05, "loss": 0.5856, "step": 28299 }, { "epoch": 23.485477178423235, "grad_norm": 12.581448554992676, "learning_rate": 1.061078838174274e-05, "loss": 0.2924, "step": 28300 }, { "epoch": 23.486307053941907, "grad_norm": 27.672073364257812, "learning_rate": 1.061045643153527e-05, "loss": 0.399, "step": 28301 }, { "epoch": 23.48713692946058, "grad_norm": 89.38491821289062, "learning_rate": 1.0610124481327801e-05, "loss": 1.0884, "step": 28302 }, { "epoch": 23.48796680497925, "grad_norm": 36.39669418334961, "learning_rate": 1.0609792531120333e-05, "loss": 0.3082, "step": 28303 }, { "epoch": 23.488796680497924, "grad_norm": 27.721837997436523, "learning_rate": 1.0609460580912866e-05, "loss": 0.2766, "step": 28304 }, { "epoch": 23.489626556016596, "grad_norm": 77.66189575195312, "learning_rate": 1.0609128630705394e-05, "loss": 1.0331, "step": 28305 }, { "epoch": 23.490456431535268, "grad_norm": 65.28570556640625, "learning_rate": 1.0608796680497926e-05, "loss": 0.4609, "step": 28306 }, { "epoch": 23.49128630705394, "grad_norm": 34.847801208496094, "learning_rate": 1.0608464730290457e-05, "loss": 0.6017, "step": 28307 }, { "epoch": 23.492116182572612, "grad_norm": 85.52220916748047, "learning_rate": 1.0608132780082989e-05, "loss": 0.4617, "step": 28308 }, { "epoch": 23.492946058091285, "grad_norm": 96.60321044921875, "learning_rate": 1.060780082987552e-05, "loss": 0.507, "step": 28309 }, { "epoch": 23.49377593360996, "grad_norm": 19.687768936157227, "learning_rate": 1.060746887966805e-05, "loss": 0.4423, "step": 28310 }, { "epoch": 23.49460580912863, "grad_norm": 51.53606033325195, "learning_rate": 1.0607136929460582e-05, "loss": 0.495, "step": 28311 }, { "epoch": 23.495435684647305, "grad_norm": 37.56342315673828, "learning_rate": 1.0606804979253114e-05, "loss": 0.3846, "step": 28312 }, { "epoch": 23.496265560165973, "grad_norm": 27.519012451171875, "learning_rate": 1.0606473029045643e-05, "loss": 0.4093, "step": 28313 }, { "epoch": 23.49709543568465, "grad_norm": 54.88157272338867, "learning_rate": 1.0606141078838175e-05, "loss": 0.7257, "step": 28314 }, { "epoch": 23.497925311203318, "grad_norm": 29.49013328552246, "learning_rate": 1.0605809128630707e-05, "loss": 0.5012, "step": 28315 }, { "epoch": 23.498755186721993, "grad_norm": 38.21876907348633, "learning_rate": 1.0605477178423237e-05, "loss": 0.6284, "step": 28316 }, { "epoch": 23.499585062240662, "grad_norm": 191.2943572998047, "learning_rate": 1.0605145228215768e-05, "loss": 0.4665, "step": 28317 }, { "epoch": 23.500414937759338, "grad_norm": 70.73542785644531, "learning_rate": 1.06048132780083e-05, "loss": 0.7011, "step": 28318 }, { "epoch": 23.501244813278007, "grad_norm": 19.056766510009766, "learning_rate": 1.060448132780083e-05, "loss": 0.3169, "step": 28319 }, { "epoch": 23.502074688796682, "grad_norm": 41.962337493896484, "learning_rate": 1.0604149377593362e-05, "loss": 0.7436, "step": 28320 }, { "epoch": 23.50290456431535, "grad_norm": 23.425077438354492, "learning_rate": 1.0603817427385894e-05, "loss": 0.4464, "step": 28321 }, { "epoch": 23.503734439834027, "grad_norm": 88.73613739013672, "learning_rate": 1.0603485477178423e-05, "loss": 1.1766, "step": 28322 }, { "epoch": 23.504564315352695, "grad_norm": 6.599068641662598, "learning_rate": 1.0603153526970955e-05, "loss": 0.2068, "step": 28323 }, { "epoch": 23.50539419087137, "grad_norm": 35.85247802734375, "learning_rate": 1.0602821576763487e-05, "loss": 0.4457, "step": 28324 }, { "epoch": 23.50622406639004, "grad_norm": 72.95858001708984, "learning_rate": 1.0602489626556018e-05, "loss": 0.5227, "step": 28325 }, { "epoch": 23.507053941908715, "grad_norm": 45.54335021972656, "learning_rate": 1.0602157676348548e-05, "loss": 0.2751, "step": 28326 }, { "epoch": 23.507883817427384, "grad_norm": 128.41744995117188, "learning_rate": 1.060182572614108e-05, "loss": 1.7513, "step": 28327 }, { "epoch": 23.50871369294606, "grad_norm": 10.847060203552246, "learning_rate": 1.060149377593361e-05, "loss": 0.3086, "step": 28328 }, { "epoch": 23.50954356846473, "grad_norm": 15.640275955200195, "learning_rate": 1.0601161825726143e-05, "loss": 0.4137, "step": 28329 }, { "epoch": 23.510373443983404, "grad_norm": 65.62957763671875, "learning_rate": 1.0600829875518672e-05, "loss": 0.6565, "step": 28330 }, { "epoch": 23.511203319502073, "grad_norm": 26.43541717529297, "learning_rate": 1.0600497925311204e-05, "loss": 0.2862, "step": 28331 }, { "epoch": 23.51203319502075, "grad_norm": 52.76491165161133, "learning_rate": 1.0600165975103736e-05, "loss": 0.4657, "step": 28332 }, { "epoch": 23.512863070539417, "grad_norm": 51.19832229614258, "learning_rate": 1.0599834024896268e-05, "loss": 0.5922, "step": 28333 }, { "epoch": 23.513692946058093, "grad_norm": 29.2532901763916, "learning_rate": 1.0599502074688797e-05, "loss": 0.4833, "step": 28334 }, { "epoch": 23.51452282157676, "grad_norm": 56.29335021972656, "learning_rate": 1.0599170124481329e-05, "loss": 0.7076, "step": 28335 }, { "epoch": 23.515352697095437, "grad_norm": 52.841339111328125, "learning_rate": 1.0598838174273859e-05, "loss": 0.572, "step": 28336 }, { "epoch": 23.51618257261411, "grad_norm": 104.65593719482422, "learning_rate": 1.0598506224066391e-05, "loss": 0.5225, "step": 28337 }, { "epoch": 23.517012448132782, "grad_norm": 42.428504943847656, "learning_rate": 1.0598174273858922e-05, "loss": 0.3498, "step": 28338 }, { "epoch": 23.517842323651454, "grad_norm": 54.46934509277344, "learning_rate": 1.0597842323651452e-05, "loss": 0.3794, "step": 28339 }, { "epoch": 23.518672199170126, "grad_norm": 73.147705078125, "learning_rate": 1.0597510373443984e-05, "loss": 0.9005, "step": 28340 }, { "epoch": 23.5195020746888, "grad_norm": 30.270273208618164, "learning_rate": 1.0597178423236516e-05, "loss": 0.3227, "step": 28341 }, { "epoch": 23.52033195020747, "grad_norm": 54.05704116821289, "learning_rate": 1.0596846473029045e-05, "loss": 0.3949, "step": 28342 }, { "epoch": 23.521161825726143, "grad_norm": 47.421268463134766, "learning_rate": 1.0596514522821577e-05, "loss": 0.2988, "step": 28343 }, { "epoch": 23.521991701244815, "grad_norm": 36.427284240722656, "learning_rate": 1.059618257261411e-05, "loss": 0.3678, "step": 28344 }, { "epoch": 23.522821576763487, "grad_norm": 40.54206466674805, "learning_rate": 1.059585062240664e-05, "loss": 0.563, "step": 28345 }, { "epoch": 23.52365145228216, "grad_norm": 54.01309585571289, "learning_rate": 1.0595518672199172e-05, "loss": 0.429, "step": 28346 }, { "epoch": 23.52448132780083, "grad_norm": 31.59520149230957, "learning_rate": 1.0595186721991702e-05, "loss": 0.3547, "step": 28347 }, { "epoch": 23.525311203319504, "grad_norm": 19.209999084472656, "learning_rate": 1.0594854771784233e-05, "loss": 0.2625, "step": 28348 }, { "epoch": 23.526141078838176, "grad_norm": 36.083675384521484, "learning_rate": 1.0594522821576765e-05, "loss": 0.4643, "step": 28349 }, { "epoch": 23.526970954356848, "grad_norm": 56.860130310058594, "learning_rate": 1.0594190871369297e-05, "loss": 0.4589, "step": 28350 }, { "epoch": 23.52780082987552, "grad_norm": 20.298051834106445, "learning_rate": 1.0593858921161826e-05, "loss": 0.4286, "step": 28351 }, { "epoch": 23.528630705394193, "grad_norm": 55.31494903564453, "learning_rate": 1.0593526970954358e-05, "loss": 0.5796, "step": 28352 }, { "epoch": 23.529460580912865, "grad_norm": 19.49172019958496, "learning_rate": 1.059319502074689e-05, "loss": 0.3331, "step": 28353 }, { "epoch": 23.530290456431537, "grad_norm": 29.409395217895508, "learning_rate": 1.059286307053942e-05, "loss": 0.4651, "step": 28354 }, { "epoch": 23.53112033195021, "grad_norm": 13.81529426574707, "learning_rate": 1.059253112033195e-05, "loss": 0.3271, "step": 28355 }, { "epoch": 23.53195020746888, "grad_norm": 68.0404052734375, "learning_rate": 1.0592199170124483e-05, "loss": 0.3784, "step": 28356 }, { "epoch": 23.532780082987554, "grad_norm": 28.80341339111328, "learning_rate": 1.0591867219917013e-05, "loss": 0.4761, "step": 28357 }, { "epoch": 23.533609958506226, "grad_norm": 34.843502044677734, "learning_rate": 1.0591535269709545e-05, "loss": 0.412, "step": 28358 }, { "epoch": 23.534439834024898, "grad_norm": 64.35335540771484, "learning_rate": 1.0591203319502074e-05, "loss": 0.466, "step": 28359 }, { "epoch": 23.53526970954357, "grad_norm": 83.06724548339844, "learning_rate": 1.0590871369294606e-05, "loss": 0.4894, "step": 28360 }, { "epoch": 23.536099585062242, "grad_norm": 29.272369384765625, "learning_rate": 1.0590539419087138e-05, "loss": 0.351, "step": 28361 }, { "epoch": 23.536929460580915, "grad_norm": 32.83324432373047, "learning_rate": 1.059020746887967e-05, "loss": 0.3057, "step": 28362 }, { "epoch": 23.537759336099587, "grad_norm": 26.70243263244629, "learning_rate": 1.0589875518672199e-05, "loss": 0.3625, "step": 28363 }, { "epoch": 23.53858921161826, "grad_norm": 148.85150146484375, "learning_rate": 1.0589543568464731e-05, "loss": 0.4446, "step": 28364 }, { "epoch": 23.53941908713693, "grad_norm": 12.793055534362793, "learning_rate": 1.0589211618257263e-05, "loss": 0.2725, "step": 28365 }, { "epoch": 23.540248962655603, "grad_norm": 26.75998306274414, "learning_rate": 1.0588879668049794e-05, "loss": 0.2861, "step": 28366 }, { "epoch": 23.541078838174275, "grad_norm": 16.866596221923828, "learning_rate": 1.0588547717842324e-05, "loss": 0.2532, "step": 28367 }, { "epoch": 23.541908713692948, "grad_norm": 54.638301849365234, "learning_rate": 1.0588215767634854e-05, "loss": 0.4319, "step": 28368 }, { "epoch": 23.54273858921162, "grad_norm": 37.095603942871094, "learning_rate": 1.0587883817427387e-05, "loss": 0.587, "step": 28369 }, { "epoch": 23.543568464730292, "grad_norm": 47.44214630126953, "learning_rate": 1.0587551867219919e-05, "loss": 0.5975, "step": 28370 }, { "epoch": 23.544398340248964, "grad_norm": 38.884315490722656, "learning_rate": 1.0587219917012447e-05, "loss": 0.5857, "step": 28371 }, { "epoch": 23.545228215767636, "grad_norm": 28.154870986938477, "learning_rate": 1.058688796680498e-05, "loss": 0.7329, "step": 28372 }, { "epoch": 23.54605809128631, "grad_norm": 71.34797668457031, "learning_rate": 1.0586556016597512e-05, "loss": 0.8922, "step": 28373 }, { "epoch": 23.54688796680498, "grad_norm": 35.68897247314453, "learning_rate": 1.0586224066390044e-05, "loss": 0.6229, "step": 28374 }, { "epoch": 23.547717842323653, "grad_norm": 31.195079803466797, "learning_rate": 1.0585892116182574e-05, "loss": 0.5485, "step": 28375 }, { "epoch": 23.548547717842325, "grad_norm": 18.259122848510742, "learning_rate": 1.0585560165975104e-05, "loss": 0.269, "step": 28376 }, { "epoch": 23.549377593360997, "grad_norm": 11.612186431884766, "learning_rate": 1.0585228215767635e-05, "loss": 0.2693, "step": 28377 }, { "epoch": 23.55020746887967, "grad_norm": 103.2024917602539, "learning_rate": 1.0584896265560167e-05, "loss": 0.9005, "step": 28378 }, { "epoch": 23.551037344398342, "grad_norm": 22.47696876525879, "learning_rate": 1.0584564315352699e-05, "loss": 0.2865, "step": 28379 }, { "epoch": 23.551867219917014, "grad_norm": 15.766617774963379, "learning_rate": 1.0584232365145228e-05, "loss": 0.2829, "step": 28380 }, { "epoch": 23.552697095435686, "grad_norm": 39.051841735839844, "learning_rate": 1.058390041493776e-05, "loss": 0.6857, "step": 28381 }, { "epoch": 23.55352697095436, "grad_norm": 34.163490295410156, "learning_rate": 1.0583568464730292e-05, "loss": 0.5571, "step": 28382 }, { "epoch": 23.55435684647303, "grad_norm": 142.85305786132812, "learning_rate": 1.0583236514522822e-05, "loss": 0.6901, "step": 28383 }, { "epoch": 23.555186721991703, "grad_norm": 40.36769104003906, "learning_rate": 1.0582904564315353e-05, "loss": 0.2998, "step": 28384 }, { "epoch": 23.556016597510375, "grad_norm": 25.34677505493164, "learning_rate": 1.0582572614107885e-05, "loss": 0.5978, "step": 28385 }, { "epoch": 23.556846473029047, "grad_norm": 32.41386032104492, "learning_rate": 1.0582240663900415e-05, "loss": 0.6512, "step": 28386 }, { "epoch": 23.55767634854772, "grad_norm": 20.941823959350586, "learning_rate": 1.0581908713692948e-05, "loss": 0.4172, "step": 28387 }, { "epoch": 23.55850622406639, "grad_norm": 175.50039672851562, "learning_rate": 1.0581576763485478e-05, "loss": 0.4803, "step": 28388 }, { "epoch": 23.559336099585064, "grad_norm": 109.71296691894531, "learning_rate": 1.0581244813278008e-05, "loss": 1.1814, "step": 28389 }, { "epoch": 23.560165975103736, "grad_norm": 30.289457321166992, "learning_rate": 1.058091286307054e-05, "loss": 0.2917, "step": 28390 }, { "epoch": 23.560995850622408, "grad_norm": 14.953166007995605, "learning_rate": 1.0580580912863073e-05, "loss": 0.3001, "step": 28391 }, { "epoch": 23.56182572614108, "grad_norm": 35.89872741699219, "learning_rate": 1.0580248962655601e-05, "loss": 0.3789, "step": 28392 }, { "epoch": 23.562655601659753, "grad_norm": 22.909751892089844, "learning_rate": 1.0579917012448133e-05, "loss": 0.5394, "step": 28393 }, { "epoch": 23.563485477178425, "grad_norm": 78.92263793945312, "learning_rate": 1.0579585062240665e-05, "loss": 1.3085, "step": 28394 }, { "epoch": 23.564315352697097, "grad_norm": 14.500053405761719, "learning_rate": 1.0579253112033196e-05, "loss": 0.2729, "step": 28395 }, { "epoch": 23.56514522821577, "grad_norm": 39.52083206176758, "learning_rate": 1.0578921161825726e-05, "loss": 0.4067, "step": 28396 }, { "epoch": 23.56597510373444, "grad_norm": 28.3655948638916, "learning_rate": 1.0578589211618258e-05, "loss": 0.3756, "step": 28397 }, { "epoch": 23.566804979253114, "grad_norm": 64.29268646240234, "learning_rate": 1.0578257261410789e-05, "loss": 0.3908, "step": 28398 }, { "epoch": 23.567634854771786, "grad_norm": 34.82027053833008, "learning_rate": 1.0577925311203321e-05, "loss": 0.4618, "step": 28399 }, { "epoch": 23.568464730290458, "grad_norm": 108.6723403930664, "learning_rate": 1.0577593360995853e-05, "loss": 0.5699, "step": 28400 }, { "epoch": 23.56929460580913, "grad_norm": 55.34499740600586, "learning_rate": 1.0577261410788382e-05, "loss": 0.7792, "step": 28401 }, { "epoch": 23.570124481327802, "grad_norm": 48.84685516357422, "learning_rate": 1.0576929460580914e-05, "loss": 0.9469, "step": 28402 }, { "epoch": 23.570954356846475, "grad_norm": 64.41632843017578, "learning_rate": 1.0576597510373446e-05, "loss": 0.742, "step": 28403 }, { "epoch": 23.571784232365147, "grad_norm": 15.631187438964844, "learning_rate": 1.0576265560165976e-05, "loss": 0.4149, "step": 28404 }, { "epoch": 23.57261410788382, "grad_norm": 63.13266372680664, "learning_rate": 1.0575933609958507e-05, "loss": 0.9743, "step": 28405 }, { "epoch": 23.57344398340249, "grad_norm": 27.568674087524414, "learning_rate": 1.0575601659751037e-05, "loss": 0.4865, "step": 28406 }, { "epoch": 23.574273858921163, "grad_norm": 82.66500091552734, "learning_rate": 1.057526970954357e-05, "loss": 0.6105, "step": 28407 }, { "epoch": 23.575103734439836, "grad_norm": 45.474979400634766, "learning_rate": 1.0574937759336101e-05, "loss": 0.4601, "step": 28408 }, { "epoch": 23.575933609958508, "grad_norm": 17.069555282592773, "learning_rate": 1.057460580912863e-05, "loss": 0.3, "step": 28409 }, { "epoch": 23.57676348547718, "grad_norm": 61.05656433105469, "learning_rate": 1.0574273858921162e-05, "loss": 0.7969, "step": 28410 }, { "epoch": 23.577593360995852, "grad_norm": 67.97705078125, "learning_rate": 1.0573941908713694e-05, "loss": 0.3918, "step": 28411 }, { "epoch": 23.578423236514524, "grad_norm": 14.180342674255371, "learning_rate": 1.0573609958506226e-05, "loss": 0.2823, "step": 28412 }, { "epoch": 23.579253112033197, "grad_norm": 22.426578521728516, "learning_rate": 1.0573278008298755e-05, "loss": 0.3577, "step": 28413 }, { "epoch": 23.58008298755187, "grad_norm": 91.35912322998047, "learning_rate": 1.0572946058091287e-05, "loss": 0.7396, "step": 28414 }, { "epoch": 23.58091286307054, "grad_norm": 78.24507141113281, "learning_rate": 1.0572614107883818e-05, "loss": 0.5984, "step": 28415 }, { "epoch": 23.581742738589213, "grad_norm": 121.0467529296875, "learning_rate": 1.057228215767635e-05, "loss": 0.6829, "step": 28416 }, { "epoch": 23.582572614107885, "grad_norm": 26.513221740722656, "learning_rate": 1.057195020746888e-05, "loss": 0.3041, "step": 28417 }, { "epoch": 23.583402489626557, "grad_norm": 35.40699005126953, "learning_rate": 1.057161825726141e-05, "loss": 0.3795, "step": 28418 }, { "epoch": 23.58423236514523, "grad_norm": 44.42613983154297, "learning_rate": 1.0571286307053943e-05, "loss": 0.8573, "step": 28419 }, { "epoch": 23.585062240663902, "grad_norm": 26.276803970336914, "learning_rate": 1.0570954356846475e-05, "loss": 0.3304, "step": 28420 }, { "epoch": 23.585892116182574, "grad_norm": 50.82539749145508, "learning_rate": 1.0570622406639004e-05, "loss": 0.784, "step": 28421 }, { "epoch": 23.586721991701246, "grad_norm": 182.2230987548828, "learning_rate": 1.0570290456431536e-05, "loss": 0.6651, "step": 28422 }, { "epoch": 23.58755186721992, "grad_norm": 26.331989288330078, "learning_rate": 1.0569958506224068e-05, "loss": 0.437, "step": 28423 }, { "epoch": 23.58838174273859, "grad_norm": 31.106525421142578, "learning_rate": 1.0569626556016598e-05, "loss": 0.5482, "step": 28424 }, { "epoch": 23.589211618257263, "grad_norm": 32.49524688720703, "learning_rate": 1.056929460580913e-05, "loss": 0.3603, "step": 28425 }, { "epoch": 23.590041493775935, "grad_norm": 47.58671951293945, "learning_rate": 1.056896265560166e-05, "loss": 0.6142, "step": 28426 }, { "epoch": 23.590871369294607, "grad_norm": 26.206838607788086, "learning_rate": 1.0568630705394191e-05, "loss": 0.2858, "step": 28427 }, { "epoch": 23.59170124481328, "grad_norm": 35.08416748046875, "learning_rate": 1.0568298755186723e-05, "loss": 0.8006, "step": 28428 }, { "epoch": 23.59253112033195, "grad_norm": 21.29720687866211, "learning_rate": 1.0567966804979255e-05, "loss": 0.3794, "step": 28429 }, { "epoch": 23.593360995850624, "grad_norm": 35.30762481689453, "learning_rate": 1.0567634854771784e-05, "loss": 0.3987, "step": 28430 }, { "epoch": 23.594190871369296, "grad_norm": 22.35689353942871, "learning_rate": 1.0567302904564316e-05, "loss": 0.4034, "step": 28431 }, { "epoch": 23.59502074688797, "grad_norm": 47.886619567871094, "learning_rate": 1.0566970954356848e-05, "loss": 0.7289, "step": 28432 }, { "epoch": 23.59585062240664, "grad_norm": 122.34854888916016, "learning_rate": 1.0566639004149379e-05, "loss": 0.5312, "step": 28433 }, { "epoch": 23.596680497925313, "grad_norm": 23.66934585571289, "learning_rate": 1.0566307053941909e-05, "loss": 0.3915, "step": 28434 }, { "epoch": 23.597510373443985, "grad_norm": 47.52165985107422, "learning_rate": 1.0565975103734441e-05, "loss": 0.7576, "step": 28435 }, { "epoch": 23.598340248962657, "grad_norm": 45.06499481201172, "learning_rate": 1.0565643153526972e-05, "loss": 0.3676, "step": 28436 }, { "epoch": 23.59917012448133, "grad_norm": 57.29947280883789, "learning_rate": 1.0565311203319504e-05, "loss": 0.503, "step": 28437 }, { "epoch": 23.6, "grad_norm": 45.55440902709961, "learning_rate": 1.0564979253112032e-05, "loss": 0.457, "step": 28438 }, { "epoch": 23.600829875518674, "grad_norm": 117.33165740966797, "learning_rate": 1.0564647302904565e-05, "loss": 0.8574, "step": 28439 }, { "epoch": 23.601659751037346, "grad_norm": 37.69651794433594, "learning_rate": 1.0564315352697097e-05, "loss": 0.4669, "step": 28440 }, { "epoch": 23.602489626556018, "grad_norm": 8.612407684326172, "learning_rate": 1.0563983402489629e-05, "loss": 0.242, "step": 28441 }, { "epoch": 23.60331950207469, "grad_norm": 27.032394409179688, "learning_rate": 1.0563651452282158e-05, "loss": 0.3669, "step": 28442 }, { "epoch": 23.604149377593362, "grad_norm": 44.33055114746094, "learning_rate": 1.056331950207469e-05, "loss": 0.6011, "step": 28443 }, { "epoch": 23.604979253112035, "grad_norm": 20.762317657470703, "learning_rate": 1.0562987551867222e-05, "loss": 0.3272, "step": 28444 }, { "epoch": 23.605809128630707, "grad_norm": 26.827550888061523, "learning_rate": 1.0562655601659752e-05, "loss": 0.413, "step": 28445 }, { "epoch": 23.60663900414938, "grad_norm": 45.406272888183594, "learning_rate": 1.0562323651452283e-05, "loss": 0.6767, "step": 28446 }, { "epoch": 23.60746887966805, "grad_norm": 129.3284149169922, "learning_rate": 1.0561991701244813e-05, "loss": 0.4809, "step": 28447 }, { "epoch": 23.608298755186723, "grad_norm": 27.225635528564453, "learning_rate": 1.0561659751037345e-05, "loss": 0.3485, "step": 28448 }, { "epoch": 23.609128630705396, "grad_norm": 49.11704635620117, "learning_rate": 1.0561327800829877e-05, "loss": 0.9512, "step": 28449 }, { "epoch": 23.609958506224068, "grad_norm": 13.488654136657715, "learning_rate": 1.0560995850622406e-05, "loss": 0.2083, "step": 28450 }, { "epoch": 23.61078838174274, "grad_norm": 16.42527961730957, "learning_rate": 1.0560663900414938e-05, "loss": 0.3236, "step": 28451 }, { "epoch": 23.611618257261412, "grad_norm": 36.828853607177734, "learning_rate": 1.056033195020747e-05, "loss": 1.137, "step": 28452 }, { "epoch": 23.612448132780084, "grad_norm": 22.69255256652832, "learning_rate": 1.056e-05, "loss": 0.3107, "step": 28453 }, { "epoch": 23.613278008298757, "grad_norm": 38.98335266113281, "learning_rate": 1.0559668049792533e-05, "loss": 0.4775, "step": 28454 }, { "epoch": 23.61410788381743, "grad_norm": 26.877456665039062, "learning_rate": 1.0559336099585063e-05, "loss": 0.4797, "step": 28455 }, { "epoch": 23.6149377593361, "grad_norm": 33.178653717041016, "learning_rate": 1.0559004149377593e-05, "loss": 0.7336, "step": 28456 }, { "epoch": 23.615767634854773, "grad_norm": 32.89647674560547, "learning_rate": 1.0558672199170126e-05, "loss": 0.5363, "step": 28457 }, { "epoch": 23.616597510373445, "grad_norm": 80.91108703613281, "learning_rate": 1.0558340248962658e-05, "loss": 0.3957, "step": 28458 }, { "epoch": 23.617427385892118, "grad_norm": 60.79650115966797, "learning_rate": 1.0558008298755186e-05, "loss": 0.6162, "step": 28459 }, { "epoch": 23.61825726141079, "grad_norm": 75.95001220703125, "learning_rate": 1.0557676348547719e-05, "loss": 0.8426, "step": 28460 }, { "epoch": 23.619087136929462, "grad_norm": 30.980953216552734, "learning_rate": 1.055734439834025e-05, "loss": 0.5508, "step": 28461 }, { "epoch": 23.619917012448134, "grad_norm": 104.47982788085938, "learning_rate": 1.0557012448132781e-05, "loss": 0.5586, "step": 28462 }, { "epoch": 23.620746887966806, "grad_norm": 29.478395462036133, "learning_rate": 1.0556680497925311e-05, "loss": 0.4422, "step": 28463 }, { "epoch": 23.62157676348548, "grad_norm": 69.59278106689453, "learning_rate": 1.0556348547717844e-05, "loss": 0.5214, "step": 28464 }, { "epoch": 23.62240663900415, "grad_norm": 31.912303924560547, "learning_rate": 1.0556016597510374e-05, "loss": 0.5102, "step": 28465 }, { "epoch": 23.623236514522823, "grad_norm": 22.37748908996582, "learning_rate": 1.0555684647302906e-05, "loss": 0.3943, "step": 28466 }, { "epoch": 23.624066390041495, "grad_norm": 26.095094680786133, "learning_rate": 1.0555352697095435e-05, "loss": 0.451, "step": 28467 }, { "epoch": 23.624896265560167, "grad_norm": 124.7624282836914, "learning_rate": 1.0555020746887967e-05, "loss": 0.6351, "step": 28468 }, { "epoch": 23.62572614107884, "grad_norm": 30.02752113342285, "learning_rate": 1.0554688796680499e-05, "loss": 0.3587, "step": 28469 }, { "epoch": 23.62655601659751, "grad_norm": 47.02216339111328, "learning_rate": 1.0554356846473031e-05, "loss": 0.3946, "step": 28470 }, { "epoch": 23.627385892116184, "grad_norm": 48.367897033691406, "learning_rate": 1.055402489626556e-05, "loss": 0.7272, "step": 28471 }, { "epoch": 23.628215767634856, "grad_norm": 17.133033752441406, "learning_rate": 1.0553692946058092e-05, "loss": 0.2603, "step": 28472 }, { "epoch": 23.62904564315353, "grad_norm": 52.91028594970703, "learning_rate": 1.0553360995850624e-05, "loss": 0.4317, "step": 28473 }, { "epoch": 23.6298755186722, "grad_norm": 43.41705322265625, "learning_rate": 1.0553029045643154e-05, "loss": 0.4599, "step": 28474 }, { "epoch": 23.630705394190873, "grad_norm": 62.37445068359375, "learning_rate": 1.0552697095435685e-05, "loss": 1.1737, "step": 28475 }, { "epoch": 23.631535269709545, "grad_norm": 44.72018051147461, "learning_rate": 1.0552365145228215e-05, "loss": 0.6413, "step": 28476 }, { "epoch": 23.632365145228217, "grad_norm": 30.81740379333496, "learning_rate": 1.0552033195020747e-05, "loss": 0.4729, "step": 28477 }, { "epoch": 23.63319502074689, "grad_norm": 61.68098068237305, "learning_rate": 1.055170124481328e-05, "loss": 0.4472, "step": 28478 }, { "epoch": 23.63402489626556, "grad_norm": 64.82416534423828, "learning_rate": 1.0551369294605812e-05, "loss": 0.818, "step": 28479 }, { "epoch": 23.634854771784234, "grad_norm": 14.520450592041016, "learning_rate": 1.055103734439834e-05, "loss": 0.3358, "step": 28480 }, { "epoch": 23.635684647302906, "grad_norm": 28.39021873474121, "learning_rate": 1.0550705394190872e-05, "loss": 0.3318, "step": 28481 }, { "epoch": 23.636514522821578, "grad_norm": 36.15203094482422, "learning_rate": 1.0550373443983405e-05, "loss": 0.4817, "step": 28482 }, { "epoch": 23.63734439834025, "grad_norm": 44.27267837524414, "learning_rate": 1.0550041493775935e-05, "loss": 0.4652, "step": 28483 }, { "epoch": 23.638174273858922, "grad_norm": 17.4567813873291, "learning_rate": 1.0549709543568465e-05, "loss": 0.1815, "step": 28484 }, { "epoch": 23.639004149377595, "grad_norm": 44.7297477722168, "learning_rate": 1.0549377593360996e-05, "loss": 0.3719, "step": 28485 }, { "epoch": 23.639834024896267, "grad_norm": 52.65373611450195, "learning_rate": 1.0549045643153528e-05, "loss": 0.3054, "step": 28486 }, { "epoch": 23.64066390041494, "grad_norm": 139.986328125, "learning_rate": 1.054871369294606e-05, "loss": 0.4505, "step": 28487 }, { "epoch": 23.64149377593361, "grad_norm": 21.993791580200195, "learning_rate": 1.0548381742738589e-05, "loss": 0.3849, "step": 28488 }, { "epoch": 23.642323651452283, "grad_norm": 35.00051498413086, "learning_rate": 1.054804979253112e-05, "loss": 0.2461, "step": 28489 }, { "epoch": 23.643153526970956, "grad_norm": 147.2768096923828, "learning_rate": 1.0547717842323653e-05, "loss": 0.5056, "step": 28490 }, { "epoch": 23.643983402489628, "grad_norm": 52.23707962036133, "learning_rate": 1.0547385892116185e-05, "loss": 0.4259, "step": 28491 }, { "epoch": 23.6448132780083, "grad_norm": 39.300315856933594, "learning_rate": 1.0547053941908714e-05, "loss": 0.4816, "step": 28492 }, { "epoch": 23.645643153526972, "grad_norm": 22.59990882873535, "learning_rate": 1.0546721991701246e-05, "loss": 0.2668, "step": 28493 }, { "epoch": 23.646473029045644, "grad_norm": 25.21150779724121, "learning_rate": 1.0546390041493776e-05, "loss": 0.2796, "step": 28494 }, { "epoch": 23.647302904564317, "grad_norm": 50.78819274902344, "learning_rate": 1.0546058091286308e-05, "loss": 0.6666, "step": 28495 }, { "epoch": 23.64813278008299, "grad_norm": 25.861345291137695, "learning_rate": 1.0545726141078839e-05, "loss": 0.3701, "step": 28496 }, { "epoch": 23.64896265560166, "grad_norm": 30.060426712036133, "learning_rate": 1.054539419087137e-05, "loss": 0.306, "step": 28497 }, { "epoch": 23.649792531120333, "grad_norm": 38.163902282714844, "learning_rate": 1.0545062240663901e-05, "loss": 0.3641, "step": 28498 }, { "epoch": 23.650622406639005, "grad_norm": 21.587343215942383, "learning_rate": 1.0544730290456433e-05, "loss": 0.4253, "step": 28499 }, { "epoch": 23.651452282157678, "grad_norm": 24.998754501342773, "learning_rate": 1.0544398340248962e-05, "loss": 0.3708, "step": 28500 }, { "epoch": 23.65228215767635, "grad_norm": 15.01718807220459, "learning_rate": 1.0544066390041494e-05, "loss": 0.4821, "step": 28501 }, { "epoch": 23.653112033195022, "grad_norm": 17.76329231262207, "learning_rate": 1.0543734439834026e-05, "loss": 0.2642, "step": 28502 }, { "epoch": 23.653941908713694, "grad_norm": 26.20064926147461, "learning_rate": 1.0543402489626557e-05, "loss": 0.3274, "step": 28503 }, { "epoch": 23.654771784232366, "grad_norm": 24.291688919067383, "learning_rate": 1.0543070539419089e-05, "loss": 0.3352, "step": 28504 }, { "epoch": 23.65560165975104, "grad_norm": 139.1817169189453, "learning_rate": 1.054273858921162e-05, "loss": 0.6547, "step": 28505 }, { "epoch": 23.65643153526971, "grad_norm": 28.820083618164062, "learning_rate": 1.054240663900415e-05, "loss": 0.2927, "step": 28506 }, { "epoch": 23.657261410788383, "grad_norm": 45.0560417175293, "learning_rate": 1.0542074688796682e-05, "loss": 0.6894, "step": 28507 }, { "epoch": 23.658091286307055, "grad_norm": 46.64976501464844, "learning_rate": 1.0541742738589214e-05, "loss": 0.5828, "step": 28508 }, { "epoch": 23.658921161825727, "grad_norm": 50.09987258911133, "learning_rate": 1.0541410788381743e-05, "loss": 0.4413, "step": 28509 }, { "epoch": 23.6597510373444, "grad_norm": 25.639625549316406, "learning_rate": 1.0541078838174275e-05, "loss": 0.5273, "step": 28510 }, { "epoch": 23.66058091286307, "grad_norm": 22.79353904724121, "learning_rate": 1.0540746887966807e-05, "loss": 0.289, "step": 28511 }, { "epoch": 23.661410788381744, "grad_norm": 65.58199310302734, "learning_rate": 1.0540414937759337e-05, "loss": 0.5689, "step": 28512 }, { "epoch": 23.662240663900416, "grad_norm": 16.907548904418945, "learning_rate": 1.0540082987551868e-05, "loss": 0.2439, "step": 28513 }, { "epoch": 23.66307053941909, "grad_norm": 63.50191879272461, "learning_rate": 1.05397510373444e-05, "loss": 0.6151, "step": 28514 }, { "epoch": 23.66390041493776, "grad_norm": 74.04498291015625, "learning_rate": 1.053941908713693e-05, "loss": 0.689, "step": 28515 }, { "epoch": 23.664730290456433, "grad_norm": 70.49718475341797, "learning_rate": 1.0539087136929462e-05, "loss": 0.528, "step": 28516 }, { "epoch": 23.665560165975105, "grad_norm": 17.321073532104492, "learning_rate": 1.0538755186721991e-05, "loss": 0.343, "step": 28517 }, { "epoch": 23.666390041493777, "grad_norm": 30.01891326904297, "learning_rate": 1.0538423236514523e-05, "loss": 0.36, "step": 28518 }, { "epoch": 23.66721991701245, "grad_norm": 18.04359245300293, "learning_rate": 1.0538091286307055e-05, "loss": 0.4251, "step": 28519 }, { "epoch": 23.66804979253112, "grad_norm": 60.96731948852539, "learning_rate": 1.0537759336099587e-05, "loss": 0.6183, "step": 28520 }, { "epoch": 23.668879668049794, "grad_norm": 56.49092483520508, "learning_rate": 1.0537427385892116e-05, "loss": 0.5318, "step": 28521 }, { "epoch": 23.669709543568466, "grad_norm": 64.78032684326172, "learning_rate": 1.0537095435684648e-05, "loss": 0.4203, "step": 28522 }, { "epoch": 23.670539419087138, "grad_norm": 42.69552993774414, "learning_rate": 1.0536763485477179e-05, "loss": 0.5314, "step": 28523 }, { "epoch": 23.67136929460581, "grad_norm": 68.28518676757812, "learning_rate": 1.053643153526971e-05, "loss": 0.9602, "step": 28524 }, { "epoch": 23.672199170124482, "grad_norm": 23.232099533081055, "learning_rate": 1.0536099585062241e-05, "loss": 0.542, "step": 28525 }, { "epoch": 23.673029045643155, "grad_norm": 53.989646911621094, "learning_rate": 1.0535767634854772e-05, "loss": 0.6725, "step": 28526 }, { "epoch": 23.673858921161827, "grad_norm": 21.297321319580078, "learning_rate": 1.0535435684647304e-05, "loss": 0.3149, "step": 28527 }, { "epoch": 23.6746887966805, "grad_norm": 61.20879364013672, "learning_rate": 1.0535103734439836e-05, "loss": 0.5069, "step": 28528 }, { "epoch": 23.67551867219917, "grad_norm": 35.35726547241211, "learning_rate": 1.0534771784232364e-05, "loss": 0.3144, "step": 28529 }, { "epoch": 23.676348547717843, "grad_norm": 55.8321533203125, "learning_rate": 1.0534439834024897e-05, "loss": 0.6957, "step": 28530 }, { "epoch": 23.677178423236516, "grad_norm": 75.43807220458984, "learning_rate": 1.0534107883817429e-05, "loss": 0.3625, "step": 28531 }, { "epoch": 23.678008298755188, "grad_norm": 66.08940887451172, "learning_rate": 1.0533775933609959e-05, "loss": 0.8228, "step": 28532 }, { "epoch": 23.67883817427386, "grad_norm": 18.953311920166016, "learning_rate": 1.0533443983402491e-05, "loss": 0.2548, "step": 28533 }, { "epoch": 23.679668049792532, "grad_norm": 39.330509185791016, "learning_rate": 1.0533112033195022e-05, "loss": 0.8406, "step": 28534 }, { "epoch": 23.680497925311204, "grad_norm": 92.5636215209961, "learning_rate": 1.0532780082987552e-05, "loss": 1.0294, "step": 28535 }, { "epoch": 23.681327800829877, "grad_norm": 21.716806411743164, "learning_rate": 1.0532448132780084e-05, "loss": 0.6416, "step": 28536 }, { "epoch": 23.68215767634855, "grad_norm": 63.77283477783203, "learning_rate": 1.0532116182572616e-05, "loss": 0.3918, "step": 28537 }, { "epoch": 23.68298755186722, "grad_norm": 22.771995544433594, "learning_rate": 1.0531784232365145e-05, "loss": 0.3148, "step": 28538 }, { "epoch": 23.683817427385893, "grad_norm": 38.62345504760742, "learning_rate": 1.0531452282157677e-05, "loss": 0.7613, "step": 28539 }, { "epoch": 23.684647302904565, "grad_norm": 44.58639144897461, "learning_rate": 1.053112033195021e-05, "loss": 0.4481, "step": 28540 }, { "epoch": 23.685477178423238, "grad_norm": 27.717947006225586, "learning_rate": 1.053078838174274e-05, "loss": 0.3768, "step": 28541 }, { "epoch": 23.68630705394191, "grad_norm": 38.498199462890625, "learning_rate": 1.053045643153527e-05, "loss": 0.593, "step": 28542 }, { "epoch": 23.687136929460582, "grad_norm": 38.89572525024414, "learning_rate": 1.0530124481327802e-05, "loss": 0.2904, "step": 28543 }, { "epoch": 23.687966804979254, "grad_norm": 17.439308166503906, "learning_rate": 1.0529792531120333e-05, "loss": 0.356, "step": 28544 }, { "epoch": 23.688796680497926, "grad_norm": 84.83905029296875, "learning_rate": 1.0529460580912865e-05, "loss": 0.8414, "step": 28545 }, { "epoch": 23.6896265560166, "grad_norm": 30.131553649902344, "learning_rate": 1.0529128630705393e-05, "loss": 0.3948, "step": 28546 }, { "epoch": 23.69045643153527, "grad_norm": 47.455650329589844, "learning_rate": 1.0528796680497925e-05, "loss": 0.5413, "step": 28547 }, { "epoch": 23.691286307053943, "grad_norm": 16.794363021850586, "learning_rate": 1.0528464730290458e-05, "loss": 0.2405, "step": 28548 }, { "epoch": 23.692116182572615, "grad_norm": 45.20697021484375, "learning_rate": 1.052813278008299e-05, "loss": 0.7071, "step": 28549 }, { "epoch": 23.692946058091287, "grad_norm": 78.05217742919922, "learning_rate": 1.0527800829875518e-05, "loss": 0.418, "step": 28550 }, { "epoch": 23.69377593360996, "grad_norm": 103.70759582519531, "learning_rate": 1.052746887966805e-05, "loss": 0.8014, "step": 28551 }, { "epoch": 23.694605809128632, "grad_norm": 78.39244079589844, "learning_rate": 1.0527136929460583e-05, "loss": 0.481, "step": 28552 }, { "epoch": 23.695435684647304, "grad_norm": 22.222366333007812, "learning_rate": 1.0526804979253113e-05, "loss": 0.3107, "step": 28553 }, { "epoch": 23.696265560165976, "grad_norm": 59.48821258544922, "learning_rate": 1.0526473029045643e-05, "loss": 0.7942, "step": 28554 }, { "epoch": 23.69709543568465, "grad_norm": 98.31096649169922, "learning_rate": 1.0526141078838174e-05, "loss": 0.7221, "step": 28555 }, { "epoch": 23.69792531120332, "grad_norm": 34.36119079589844, "learning_rate": 1.0525809128630706e-05, "loss": 0.5128, "step": 28556 }, { "epoch": 23.698755186721993, "grad_norm": 41.61423110961914, "learning_rate": 1.0525477178423238e-05, "loss": 0.534, "step": 28557 }, { "epoch": 23.699585062240665, "grad_norm": 69.10513305664062, "learning_rate": 1.052514522821577e-05, "loss": 0.8057, "step": 28558 }, { "epoch": 23.700414937759337, "grad_norm": 25.124956130981445, "learning_rate": 1.0524813278008299e-05, "loss": 0.4011, "step": 28559 }, { "epoch": 23.70124481327801, "grad_norm": 59.269508361816406, "learning_rate": 1.0524481327800831e-05, "loss": 0.7001, "step": 28560 }, { "epoch": 23.70207468879668, "grad_norm": 98.05469512939453, "learning_rate": 1.0524149377593363e-05, "loss": 0.4737, "step": 28561 }, { "epoch": 23.702904564315354, "grad_norm": 29.925155639648438, "learning_rate": 1.0523817427385894e-05, "loss": 0.481, "step": 28562 }, { "epoch": 23.703734439834026, "grad_norm": 11.907979965209961, "learning_rate": 1.0523485477178424e-05, "loss": 0.3115, "step": 28563 }, { "epoch": 23.704564315352698, "grad_norm": 44.5025634765625, "learning_rate": 1.0523153526970954e-05, "loss": 0.5326, "step": 28564 }, { "epoch": 23.70539419087137, "grad_norm": 73.6332778930664, "learning_rate": 1.0522821576763486e-05, "loss": 0.5076, "step": 28565 }, { "epoch": 23.706224066390043, "grad_norm": 28.11773109436035, "learning_rate": 1.0522489626556019e-05, "loss": 0.5088, "step": 28566 }, { "epoch": 23.707053941908715, "grad_norm": 58.90182113647461, "learning_rate": 1.0522157676348547e-05, "loss": 0.5262, "step": 28567 }, { "epoch": 23.707883817427387, "grad_norm": 13.719740867614746, "learning_rate": 1.052182572614108e-05, "loss": 0.2984, "step": 28568 }, { "epoch": 23.70871369294606, "grad_norm": 43.866336822509766, "learning_rate": 1.0521493775933612e-05, "loss": 0.4184, "step": 28569 }, { "epoch": 23.70954356846473, "grad_norm": 141.3211212158203, "learning_rate": 1.0521161825726142e-05, "loss": 0.7659, "step": 28570 }, { "epoch": 23.710373443983404, "grad_norm": 34.838233947753906, "learning_rate": 1.0520829875518672e-05, "loss": 0.4133, "step": 28571 }, { "epoch": 23.711203319502076, "grad_norm": 80.92899322509766, "learning_rate": 1.0520497925311204e-05, "loss": 0.6448, "step": 28572 }, { "epoch": 23.712033195020748, "grad_norm": 11.640910148620605, "learning_rate": 1.0520165975103735e-05, "loss": 0.2854, "step": 28573 }, { "epoch": 23.71286307053942, "grad_norm": 54.5671501159668, "learning_rate": 1.0519834024896267e-05, "loss": 0.5406, "step": 28574 }, { "epoch": 23.713692946058092, "grad_norm": 23.582408905029297, "learning_rate": 1.0519502074688797e-05, "loss": 0.3751, "step": 28575 }, { "epoch": 23.714522821576764, "grad_norm": 117.08888244628906, "learning_rate": 1.0519170124481328e-05, "loss": 0.4682, "step": 28576 }, { "epoch": 23.715352697095437, "grad_norm": 35.321327209472656, "learning_rate": 1.051883817427386e-05, "loss": 0.576, "step": 28577 }, { "epoch": 23.71618257261411, "grad_norm": 35.4051513671875, "learning_rate": 1.0518506224066392e-05, "loss": 1.2325, "step": 28578 }, { "epoch": 23.71701244813278, "grad_norm": 33.431541442871094, "learning_rate": 1.051817427385892e-05, "loss": 0.3797, "step": 28579 }, { "epoch": 23.717842323651453, "grad_norm": 33.95174026489258, "learning_rate": 1.0517842323651453e-05, "loss": 0.749, "step": 28580 }, { "epoch": 23.718672199170125, "grad_norm": 29.475242614746094, "learning_rate": 1.0517510373443985e-05, "loss": 0.5744, "step": 28581 }, { "epoch": 23.719502074688798, "grad_norm": 55.75644302368164, "learning_rate": 1.0517178423236515e-05, "loss": 0.8259, "step": 28582 }, { "epoch": 23.72033195020747, "grad_norm": 158.37225341796875, "learning_rate": 1.0516846473029047e-05, "loss": 0.9781, "step": 28583 }, { "epoch": 23.721161825726142, "grad_norm": 23.957887649536133, "learning_rate": 1.0516514522821576e-05, "loss": 0.5152, "step": 28584 }, { "epoch": 23.721991701244814, "grad_norm": 42.73679733276367, "learning_rate": 1.0516182572614108e-05, "loss": 1.0258, "step": 28585 }, { "epoch": 23.722821576763486, "grad_norm": 9.219752311706543, "learning_rate": 1.051585062240664e-05, "loss": 0.3372, "step": 28586 }, { "epoch": 23.72365145228216, "grad_norm": 108.04424285888672, "learning_rate": 1.0515518672199173e-05, "loss": 0.6538, "step": 28587 }, { "epoch": 23.72448132780083, "grad_norm": 45.46092987060547, "learning_rate": 1.0515186721991701e-05, "loss": 0.8829, "step": 28588 }, { "epoch": 23.725311203319503, "grad_norm": 28.198928833007812, "learning_rate": 1.0514854771784233e-05, "loss": 0.3719, "step": 28589 }, { "epoch": 23.726141078838175, "grad_norm": 36.594505310058594, "learning_rate": 1.0514522821576765e-05, "loss": 0.3614, "step": 28590 }, { "epoch": 23.726970954356847, "grad_norm": 83.30986785888672, "learning_rate": 1.0514190871369296e-05, "loss": 0.9087, "step": 28591 }, { "epoch": 23.72780082987552, "grad_norm": 50.976829528808594, "learning_rate": 1.0513858921161826e-05, "loss": 0.4539, "step": 28592 }, { "epoch": 23.728630705394192, "grad_norm": 23.006601333618164, "learning_rate": 1.0513526970954357e-05, "loss": 0.3273, "step": 28593 }, { "epoch": 23.729460580912864, "grad_norm": 36.81516647338867, "learning_rate": 1.0513195020746889e-05, "loss": 0.7625, "step": 28594 }, { "epoch": 23.730290456431536, "grad_norm": 34.321556091308594, "learning_rate": 1.0512863070539421e-05, "loss": 0.379, "step": 28595 }, { "epoch": 23.73112033195021, "grad_norm": 26.268884658813477, "learning_rate": 1.051253112033195e-05, "loss": 0.3833, "step": 28596 }, { "epoch": 23.73195020746888, "grad_norm": 46.949867248535156, "learning_rate": 1.0512199170124482e-05, "loss": 0.8344, "step": 28597 }, { "epoch": 23.732780082987553, "grad_norm": 146.8853759765625, "learning_rate": 1.0511867219917014e-05, "loss": 0.8442, "step": 28598 }, { "epoch": 23.733609958506225, "grad_norm": 27.20708465576172, "learning_rate": 1.0511535269709546e-05, "loss": 0.4688, "step": 28599 }, { "epoch": 23.734439834024897, "grad_norm": 29.342113494873047, "learning_rate": 1.0511203319502075e-05, "loss": 0.6296, "step": 28600 }, { "epoch": 23.73526970954357, "grad_norm": 54.467899322509766, "learning_rate": 1.0510871369294607e-05, "loss": 0.6373, "step": 28601 }, { "epoch": 23.73609958506224, "grad_norm": 99.59923553466797, "learning_rate": 1.0510539419087137e-05, "loss": 0.8791, "step": 28602 }, { "epoch": 23.736929460580914, "grad_norm": 18.617942810058594, "learning_rate": 1.051020746887967e-05, "loss": 0.3267, "step": 28603 }, { "epoch": 23.737759336099586, "grad_norm": 28.917600631713867, "learning_rate": 1.05098755186722e-05, "loss": 0.4845, "step": 28604 }, { "epoch": 23.738589211618258, "grad_norm": 27.04266357421875, "learning_rate": 1.050954356846473e-05, "loss": 0.3082, "step": 28605 }, { "epoch": 23.73941908713693, "grad_norm": 25.382122039794922, "learning_rate": 1.0509211618257262e-05, "loss": 0.3153, "step": 28606 }, { "epoch": 23.740248962655603, "grad_norm": 72.42326354980469, "learning_rate": 1.0508879668049794e-05, "loss": 0.8578, "step": 28607 }, { "epoch": 23.741078838174275, "grad_norm": 21.501283645629883, "learning_rate": 1.0508547717842323e-05, "loss": 0.4057, "step": 28608 }, { "epoch": 23.741908713692947, "grad_norm": 46.26237869262695, "learning_rate": 1.0508215767634855e-05, "loss": 0.4326, "step": 28609 }, { "epoch": 23.74273858921162, "grad_norm": 32.73575973510742, "learning_rate": 1.0507883817427387e-05, "loss": 0.2894, "step": 28610 }, { "epoch": 23.74356846473029, "grad_norm": 46.041141510009766, "learning_rate": 1.0507551867219918e-05, "loss": 0.7725, "step": 28611 }, { "epoch": 23.744398340248964, "grad_norm": 37.643898010253906, "learning_rate": 1.050721991701245e-05, "loss": 0.6563, "step": 28612 }, { "epoch": 23.745228215767636, "grad_norm": 28.84851837158203, "learning_rate": 1.050688796680498e-05, "loss": 0.3023, "step": 28613 }, { "epoch": 23.746058091286308, "grad_norm": 57.02701187133789, "learning_rate": 1.050655601659751e-05, "loss": 0.3695, "step": 28614 }, { "epoch": 23.74688796680498, "grad_norm": 34.81699752807617, "learning_rate": 1.0506224066390043e-05, "loss": 0.2824, "step": 28615 }, { "epoch": 23.747717842323652, "grad_norm": 54.25108337402344, "learning_rate": 1.0505892116182575e-05, "loss": 0.3479, "step": 28616 }, { "epoch": 23.748547717842325, "grad_norm": 16.692873001098633, "learning_rate": 1.0505560165975104e-05, "loss": 0.5259, "step": 28617 }, { "epoch": 23.749377593360997, "grad_norm": 57.51014709472656, "learning_rate": 1.0505228215767636e-05, "loss": 0.7007, "step": 28618 }, { "epoch": 23.75020746887967, "grad_norm": 20.48287582397461, "learning_rate": 1.0504896265560168e-05, "loss": 0.3326, "step": 28619 }, { "epoch": 23.75103734439834, "grad_norm": 86.2122573852539, "learning_rate": 1.0504564315352698e-05, "loss": 0.4156, "step": 28620 }, { "epoch": 23.751867219917013, "grad_norm": 28.25285530090332, "learning_rate": 1.0504232365145229e-05, "loss": 0.415, "step": 28621 }, { "epoch": 23.752697095435686, "grad_norm": 39.530799865722656, "learning_rate": 1.050390041493776e-05, "loss": 0.3629, "step": 28622 }, { "epoch": 23.753526970954358, "grad_norm": 48.21879959106445, "learning_rate": 1.0503568464730291e-05, "loss": 0.6347, "step": 28623 }, { "epoch": 23.75435684647303, "grad_norm": 91.33848571777344, "learning_rate": 1.0503236514522823e-05, "loss": 1.0046, "step": 28624 }, { "epoch": 23.755186721991702, "grad_norm": 16.700742721557617, "learning_rate": 1.0502904564315352e-05, "loss": 0.4371, "step": 28625 }, { "epoch": 23.756016597510374, "grad_norm": 33.65321350097656, "learning_rate": 1.0502572614107884e-05, "loss": 0.4777, "step": 28626 }, { "epoch": 23.756846473029047, "grad_norm": 41.13576126098633, "learning_rate": 1.0502240663900416e-05, "loss": 0.5662, "step": 28627 }, { "epoch": 23.75767634854772, "grad_norm": 15.500099182128906, "learning_rate": 1.0501908713692948e-05, "loss": 0.2969, "step": 28628 }, { "epoch": 23.75850622406639, "grad_norm": 28.27553939819336, "learning_rate": 1.0501576763485477e-05, "loss": 0.5685, "step": 28629 }, { "epoch": 23.759336099585063, "grad_norm": 53.63425827026367, "learning_rate": 1.0501244813278009e-05, "loss": 0.4436, "step": 28630 }, { "epoch": 23.760165975103735, "grad_norm": 133.56932067871094, "learning_rate": 1.0500912863070541e-05, "loss": 0.7511, "step": 28631 }, { "epoch": 23.760995850622407, "grad_norm": 40.274959564208984, "learning_rate": 1.0500580912863072e-05, "loss": 0.4243, "step": 28632 }, { "epoch": 23.76182572614108, "grad_norm": 63.58279800415039, "learning_rate": 1.0500248962655602e-05, "loss": 1.0636, "step": 28633 }, { "epoch": 23.762655601659752, "grad_norm": 46.029048919677734, "learning_rate": 1.0499917012448132e-05, "loss": 0.5797, "step": 28634 }, { "epoch": 23.763485477178424, "grad_norm": 35.49668884277344, "learning_rate": 1.0499585062240665e-05, "loss": 1.1113, "step": 28635 }, { "epoch": 23.764315352697096, "grad_norm": 30.390424728393555, "learning_rate": 1.0499253112033197e-05, "loss": 0.4322, "step": 28636 }, { "epoch": 23.76514522821577, "grad_norm": 26.910139083862305, "learning_rate": 1.0498921161825729e-05, "loss": 0.3018, "step": 28637 }, { "epoch": 23.76597510373444, "grad_norm": 44.81931686401367, "learning_rate": 1.0498589211618257e-05, "loss": 0.437, "step": 28638 }, { "epoch": 23.766804979253113, "grad_norm": 33.70487976074219, "learning_rate": 1.049825726141079e-05, "loss": 0.2921, "step": 28639 }, { "epoch": 23.767634854771785, "grad_norm": 23.44938850402832, "learning_rate": 1.049792531120332e-05, "loss": 0.5221, "step": 28640 }, { "epoch": 23.768464730290457, "grad_norm": 20.066139221191406, "learning_rate": 1.0497593360995852e-05, "loss": 0.3273, "step": 28641 }, { "epoch": 23.76929460580913, "grad_norm": 19.719221115112305, "learning_rate": 1.0497261410788383e-05, "loss": 0.3806, "step": 28642 }, { "epoch": 23.7701244813278, "grad_norm": 46.66595458984375, "learning_rate": 1.0496929460580913e-05, "loss": 0.4791, "step": 28643 }, { "epoch": 23.770954356846474, "grad_norm": 14.35488224029541, "learning_rate": 1.0496597510373445e-05, "loss": 0.2409, "step": 28644 }, { "epoch": 23.771784232365146, "grad_norm": 58.013458251953125, "learning_rate": 1.0496265560165977e-05, "loss": 0.8258, "step": 28645 }, { "epoch": 23.77261410788382, "grad_norm": 30.002195358276367, "learning_rate": 1.0495933609958506e-05, "loss": 0.4199, "step": 28646 }, { "epoch": 23.77344398340249, "grad_norm": 51.21616744995117, "learning_rate": 1.0495601659751038e-05, "loss": 0.3506, "step": 28647 }, { "epoch": 23.774273858921163, "grad_norm": 52.506771087646484, "learning_rate": 1.049526970954357e-05, "loss": 0.5452, "step": 28648 }, { "epoch": 23.775103734439835, "grad_norm": 31.52512550354004, "learning_rate": 1.04949377593361e-05, "loss": 0.5627, "step": 28649 }, { "epoch": 23.775933609958507, "grad_norm": 63.53253936767578, "learning_rate": 1.0494605809128631e-05, "loss": 0.5514, "step": 28650 }, { "epoch": 23.77676348547718, "grad_norm": 56.13243865966797, "learning_rate": 1.0494273858921163e-05, "loss": 0.8093, "step": 28651 }, { "epoch": 23.77759336099585, "grad_norm": 46.52476501464844, "learning_rate": 1.0493941908713693e-05, "loss": 0.2737, "step": 28652 }, { "epoch": 23.778423236514524, "grad_norm": 29.222686767578125, "learning_rate": 1.0493609958506226e-05, "loss": 0.4142, "step": 28653 }, { "epoch": 23.779253112033196, "grad_norm": 22.44294548034668, "learning_rate": 1.0493278008298754e-05, "loss": 0.2617, "step": 28654 }, { "epoch": 23.780082987551868, "grad_norm": 12.652368545532227, "learning_rate": 1.0492946058091286e-05, "loss": 0.2887, "step": 28655 }, { "epoch": 23.78091286307054, "grad_norm": 22.10243034362793, "learning_rate": 1.0492614107883818e-05, "loss": 0.3363, "step": 28656 }, { "epoch": 23.781742738589212, "grad_norm": 39.23120880126953, "learning_rate": 1.049228215767635e-05, "loss": 0.8539, "step": 28657 }, { "epoch": 23.782572614107885, "grad_norm": 87.80575561523438, "learning_rate": 1.049195020746888e-05, "loss": 0.3569, "step": 28658 }, { "epoch": 23.783402489626557, "grad_norm": 87.52467346191406, "learning_rate": 1.0491618257261411e-05, "loss": 0.8944, "step": 28659 }, { "epoch": 23.78423236514523, "grad_norm": 28.66638946533203, "learning_rate": 1.0491286307053944e-05, "loss": 0.4649, "step": 28660 }, { "epoch": 23.7850622406639, "grad_norm": 54.8919792175293, "learning_rate": 1.0490954356846474e-05, "loss": 0.6643, "step": 28661 }, { "epoch": 23.785892116182573, "grad_norm": 55.24736785888672, "learning_rate": 1.0490622406639006e-05, "loss": 0.7568, "step": 28662 }, { "epoch": 23.786721991701246, "grad_norm": 50.58272933959961, "learning_rate": 1.0490290456431535e-05, "loss": 0.3071, "step": 28663 }, { "epoch": 23.787551867219918, "grad_norm": 34.630653381347656, "learning_rate": 1.0489958506224067e-05, "loss": 0.4623, "step": 28664 }, { "epoch": 23.78838174273859, "grad_norm": 30.544017791748047, "learning_rate": 1.0489626556016599e-05, "loss": 0.526, "step": 28665 }, { "epoch": 23.789211618257262, "grad_norm": 67.84329986572266, "learning_rate": 1.0489294605809131e-05, "loss": 0.5868, "step": 28666 }, { "epoch": 23.790041493775934, "grad_norm": 18.80967903137207, "learning_rate": 1.048896265560166e-05, "loss": 0.4055, "step": 28667 }, { "epoch": 23.790871369294607, "grad_norm": 39.48723220825195, "learning_rate": 1.0488630705394192e-05, "loss": 0.3323, "step": 28668 }, { "epoch": 23.79170124481328, "grad_norm": 31.742685317993164, "learning_rate": 1.0488298755186724e-05, "loss": 0.4631, "step": 28669 }, { "epoch": 23.79253112033195, "grad_norm": 64.689697265625, "learning_rate": 1.0487966804979254e-05, "loss": 0.7001, "step": 28670 }, { "epoch": 23.793360995850623, "grad_norm": 74.35224914550781, "learning_rate": 1.0487634854771785e-05, "loss": 0.6337, "step": 28671 }, { "epoch": 23.794190871369295, "grad_norm": 24.504911422729492, "learning_rate": 1.0487302904564315e-05, "loss": 0.3779, "step": 28672 }, { "epoch": 23.795020746887968, "grad_norm": 49.61223602294922, "learning_rate": 1.0486970954356847e-05, "loss": 0.6555, "step": 28673 }, { "epoch": 23.79585062240664, "grad_norm": 79.19664001464844, "learning_rate": 1.048663900414938e-05, "loss": 0.5858, "step": 28674 }, { "epoch": 23.796680497925312, "grad_norm": 78.48644256591797, "learning_rate": 1.0486307053941908e-05, "loss": 0.3481, "step": 28675 }, { "epoch": 23.797510373443984, "grad_norm": 70.13385772705078, "learning_rate": 1.048597510373444e-05, "loss": 0.5612, "step": 28676 }, { "epoch": 23.798340248962656, "grad_norm": 53.88031005859375, "learning_rate": 1.0485643153526972e-05, "loss": 0.6738, "step": 28677 }, { "epoch": 23.79917012448133, "grad_norm": 81.45802307128906, "learning_rate": 1.0485311203319505e-05, "loss": 0.4588, "step": 28678 }, { "epoch": 23.8, "grad_norm": 58.152400970458984, "learning_rate": 1.0484979253112033e-05, "loss": 0.4788, "step": 28679 }, { "epoch": 23.800829875518673, "grad_norm": 104.18917846679688, "learning_rate": 1.0484647302904565e-05, "loss": 0.4521, "step": 28680 }, { "epoch": 23.801659751037345, "grad_norm": 32.02531814575195, "learning_rate": 1.0484315352697096e-05, "loss": 0.5208, "step": 28681 }, { "epoch": 23.802489626556017, "grad_norm": 128.01150512695312, "learning_rate": 1.0483983402489628e-05, "loss": 0.4578, "step": 28682 }, { "epoch": 23.80331950207469, "grad_norm": 17.164098739624023, "learning_rate": 1.0483651452282158e-05, "loss": 0.3344, "step": 28683 }, { "epoch": 23.80414937759336, "grad_norm": 54.1053581237793, "learning_rate": 1.0483319502074689e-05, "loss": 0.7543, "step": 28684 }, { "epoch": 23.804979253112034, "grad_norm": 40.41267776489258, "learning_rate": 1.048298755186722e-05, "loss": 0.5989, "step": 28685 }, { "epoch": 23.805809128630706, "grad_norm": 47.276973724365234, "learning_rate": 1.0482655601659753e-05, "loss": 0.6248, "step": 28686 }, { "epoch": 23.80663900414938, "grad_norm": 61.35164260864258, "learning_rate": 1.0482323651452282e-05, "loss": 0.5821, "step": 28687 }, { "epoch": 23.80746887966805, "grad_norm": 137.07333374023438, "learning_rate": 1.0481991701244814e-05, "loss": 0.4164, "step": 28688 }, { "epoch": 23.808298755186723, "grad_norm": 66.49781036376953, "learning_rate": 1.0481659751037346e-05, "loss": 0.71, "step": 28689 }, { "epoch": 23.809128630705395, "grad_norm": 28.226991653442383, "learning_rate": 1.0481327800829876e-05, "loss": 0.4305, "step": 28690 }, { "epoch": 23.809958506224067, "grad_norm": 78.64330291748047, "learning_rate": 1.0480995850622408e-05, "loss": 0.7315, "step": 28691 }, { "epoch": 23.81078838174274, "grad_norm": 105.26332092285156, "learning_rate": 1.0480663900414939e-05, "loss": 0.8198, "step": 28692 }, { "epoch": 23.81161825726141, "grad_norm": 73.90377044677734, "learning_rate": 1.048033195020747e-05, "loss": 0.4447, "step": 28693 }, { "epoch": 23.812448132780084, "grad_norm": 14.647697448730469, "learning_rate": 1.0480000000000001e-05, "loss": 0.2872, "step": 28694 }, { "epoch": 23.813278008298756, "grad_norm": 54.54517364501953, "learning_rate": 1.0479668049792533e-05, "loss": 0.413, "step": 28695 }, { "epoch": 23.814107883817428, "grad_norm": 92.63419342041016, "learning_rate": 1.0479336099585062e-05, "loss": 0.8325, "step": 28696 }, { "epoch": 23.8149377593361, "grad_norm": 58.89577102661133, "learning_rate": 1.0479004149377594e-05, "loss": 0.894, "step": 28697 }, { "epoch": 23.815767634854772, "grad_norm": 19.17176055908203, "learning_rate": 1.0478672199170126e-05, "loss": 0.3452, "step": 28698 }, { "epoch": 23.816597510373445, "grad_norm": 40.41216278076172, "learning_rate": 1.0478340248962657e-05, "loss": 0.6413, "step": 28699 }, { "epoch": 23.817427385892117, "grad_norm": 36.1971435546875, "learning_rate": 1.0478008298755187e-05, "loss": 0.4429, "step": 28700 }, { "epoch": 23.81825726141079, "grad_norm": 63.647029876708984, "learning_rate": 1.0477676348547718e-05, "loss": 0.9538, "step": 28701 }, { "epoch": 23.81908713692946, "grad_norm": 27.27663803100586, "learning_rate": 1.047734439834025e-05, "loss": 0.437, "step": 28702 }, { "epoch": 23.819917012448133, "grad_norm": 51.14152908325195, "learning_rate": 1.0477012448132782e-05, "loss": 0.5302, "step": 28703 }, { "epoch": 23.820746887966806, "grad_norm": 69.0108642578125, "learning_rate": 1.047668049792531e-05, "loss": 1.331, "step": 28704 }, { "epoch": 23.821576763485478, "grad_norm": 15.254044532775879, "learning_rate": 1.0476348547717843e-05, "loss": 0.2395, "step": 28705 }, { "epoch": 23.82240663900415, "grad_norm": 32.91254806518555, "learning_rate": 1.0476016597510375e-05, "loss": 0.2641, "step": 28706 }, { "epoch": 23.823236514522822, "grad_norm": 44.411155700683594, "learning_rate": 1.0475684647302907e-05, "loss": 0.6155, "step": 28707 }, { "epoch": 23.824066390041494, "grad_norm": 55.25845718383789, "learning_rate": 1.0475352697095436e-05, "loss": 0.4119, "step": 28708 }, { "epoch": 23.824896265560167, "grad_norm": 100.45915985107422, "learning_rate": 1.0475020746887968e-05, "loss": 0.481, "step": 28709 }, { "epoch": 23.82572614107884, "grad_norm": 30.607118606567383, "learning_rate": 1.0474688796680498e-05, "loss": 0.434, "step": 28710 }, { "epoch": 23.82655601659751, "grad_norm": 22.727067947387695, "learning_rate": 1.047435684647303e-05, "loss": 0.3964, "step": 28711 }, { "epoch": 23.827385892116183, "grad_norm": 26.452945709228516, "learning_rate": 1.047402489626556e-05, "loss": 0.3922, "step": 28712 }, { "epoch": 23.828215767634855, "grad_norm": 34.71005630493164, "learning_rate": 1.0473692946058091e-05, "loss": 0.4267, "step": 28713 }, { "epoch": 23.829045643153528, "grad_norm": 130.82620239257812, "learning_rate": 1.0473360995850623e-05, "loss": 0.4614, "step": 28714 }, { "epoch": 23.8298755186722, "grad_norm": 57.55595397949219, "learning_rate": 1.0473029045643155e-05, "loss": 0.892, "step": 28715 }, { "epoch": 23.830705394190872, "grad_norm": 84.2421646118164, "learning_rate": 1.0472697095435687e-05, "loss": 0.2816, "step": 28716 }, { "epoch": 23.831535269709544, "grad_norm": 68.71576690673828, "learning_rate": 1.0472365145228216e-05, "loss": 0.4428, "step": 28717 }, { "epoch": 23.832365145228216, "grad_norm": 82.01176452636719, "learning_rate": 1.0472033195020748e-05, "loss": 0.5064, "step": 28718 }, { "epoch": 23.83319502074689, "grad_norm": 47.796295166015625, "learning_rate": 1.0471701244813279e-05, "loss": 0.6234, "step": 28719 }, { "epoch": 23.83402489626556, "grad_norm": 35.1744499206543, "learning_rate": 1.047136929460581e-05, "loss": 0.4407, "step": 28720 }, { "epoch": 23.834854771784233, "grad_norm": 11.437312126159668, "learning_rate": 1.0471037344398341e-05, "loss": 0.28, "step": 28721 }, { "epoch": 23.835684647302905, "grad_norm": 85.42522430419922, "learning_rate": 1.0470705394190872e-05, "loss": 1.1474, "step": 28722 }, { "epoch": 23.836514522821577, "grad_norm": 91.04330444335938, "learning_rate": 1.0470373443983404e-05, "loss": 0.3746, "step": 28723 }, { "epoch": 23.83734439834025, "grad_norm": 47.38300323486328, "learning_rate": 1.0470041493775936e-05, "loss": 0.4065, "step": 28724 }, { "epoch": 23.83817427385892, "grad_norm": 74.67879486083984, "learning_rate": 1.0469709543568464e-05, "loss": 0.7411, "step": 28725 }, { "epoch": 23.839004149377594, "grad_norm": 213.8955078125, "learning_rate": 1.0469377593360997e-05, "loss": 0.9881, "step": 28726 }, { "epoch": 23.839834024896266, "grad_norm": 38.048744201660156, "learning_rate": 1.0469045643153529e-05, "loss": 0.5711, "step": 28727 }, { "epoch": 23.84066390041494, "grad_norm": 29.83339500427246, "learning_rate": 1.0468713692946059e-05, "loss": 0.4696, "step": 28728 }, { "epoch": 23.84149377593361, "grad_norm": 54.90969467163086, "learning_rate": 1.046838174273859e-05, "loss": 0.6131, "step": 28729 }, { "epoch": 23.842323651452283, "grad_norm": 45.354164123535156, "learning_rate": 1.0468049792531122e-05, "loss": 0.3988, "step": 28730 }, { "epoch": 23.843153526970955, "grad_norm": 50.72807693481445, "learning_rate": 1.0467717842323652e-05, "loss": 0.8094, "step": 28731 }, { "epoch": 23.843983402489627, "grad_norm": 69.07130432128906, "learning_rate": 1.0467385892116184e-05, "loss": 0.6699, "step": 28732 }, { "epoch": 23.8448132780083, "grad_norm": 38.4970588684082, "learning_rate": 1.0467053941908713e-05, "loss": 0.4839, "step": 28733 }, { "epoch": 23.84564315352697, "grad_norm": 119.53893280029297, "learning_rate": 1.0466721991701245e-05, "loss": 0.3569, "step": 28734 }, { "epoch": 23.846473029045644, "grad_norm": 23.482507705688477, "learning_rate": 1.0466390041493777e-05, "loss": 0.354, "step": 28735 }, { "epoch": 23.847302904564316, "grad_norm": 43.908294677734375, "learning_rate": 1.046605809128631e-05, "loss": 0.5609, "step": 28736 }, { "epoch": 23.848132780082988, "grad_norm": 45.89064407348633, "learning_rate": 1.0465726141078838e-05, "loss": 0.9156, "step": 28737 }, { "epoch": 23.84896265560166, "grad_norm": 165.51629638671875, "learning_rate": 1.046539419087137e-05, "loss": 0.7715, "step": 28738 }, { "epoch": 23.849792531120332, "grad_norm": 44.16664505004883, "learning_rate": 1.0465062240663902e-05, "loss": 0.5757, "step": 28739 }, { "epoch": 23.850622406639005, "grad_norm": 28.63067054748535, "learning_rate": 1.0464730290456433e-05, "loss": 0.587, "step": 28740 }, { "epoch": 23.851452282157677, "grad_norm": 39.890846252441406, "learning_rate": 1.0464398340248965e-05, "loss": 0.4808, "step": 28741 }, { "epoch": 23.85228215767635, "grad_norm": 26.790803909301758, "learning_rate": 1.0464066390041493e-05, "loss": 0.3482, "step": 28742 }, { "epoch": 23.85311203319502, "grad_norm": 15.069096565246582, "learning_rate": 1.0463734439834025e-05, "loss": 0.2891, "step": 28743 }, { "epoch": 23.853941908713693, "grad_norm": 29.5406551361084, "learning_rate": 1.0463402489626558e-05, "loss": 0.3748, "step": 28744 }, { "epoch": 23.854771784232366, "grad_norm": 37.959651947021484, "learning_rate": 1.046307053941909e-05, "loss": 0.3055, "step": 28745 }, { "epoch": 23.855601659751038, "grad_norm": 30.06818389892578, "learning_rate": 1.0462738589211618e-05, "loss": 0.4658, "step": 28746 }, { "epoch": 23.85643153526971, "grad_norm": 24.862001419067383, "learning_rate": 1.046240663900415e-05, "loss": 0.2638, "step": 28747 }, { "epoch": 23.857261410788382, "grad_norm": 49.864013671875, "learning_rate": 1.0462074688796683e-05, "loss": 0.3383, "step": 28748 }, { "epoch": 23.858091286307054, "grad_norm": 57.209075927734375, "learning_rate": 1.0461742738589213e-05, "loss": 0.8513, "step": 28749 }, { "epoch": 23.858921161825727, "grad_norm": 13.302837371826172, "learning_rate": 1.0461410788381743e-05, "loss": 0.2422, "step": 28750 }, { "epoch": 23.8597510373444, "grad_norm": 14.011520385742188, "learning_rate": 1.0461078838174274e-05, "loss": 0.3022, "step": 28751 }, { "epoch": 23.86058091286307, "grad_norm": 56.17552185058594, "learning_rate": 1.0460746887966806e-05, "loss": 0.9268, "step": 28752 }, { "epoch": 23.861410788381743, "grad_norm": 61.84261703491211, "learning_rate": 1.0460414937759338e-05, "loss": 0.5629, "step": 28753 }, { "epoch": 23.862240663900415, "grad_norm": 51.821014404296875, "learning_rate": 1.0460082987551867e-05, "loss": 0.9152, "step": 28754 }, { "epoch": 23.863070539419088, "grad_norm": 28.774322509765625, "learning_rate": 1.0459751037344399e-05, "loss": 0.5029, "step": 28755 }, { "epoch": 23.86390041493776, "grad_norm": 51.809635162353516, "learning_rate": 1.0459419087136931e-05, "loss": 1.0233, "step": 28756 }, { "epoch": 23.864730290456432, "grad_norm": 20.178424835205078, "learning_rate": 1.0459087136929461e-05, "loss": 0.2983, "step": 28757 }, { "epoch": 23.865560165975104, "grad_norm": 44.975406646728516, "learning_rate": 1.0458755186721992e-05, "loss": 0.4624, "step": 28758 }, { "epoch": 23.866390041493776, "grad_norm": 61.66842269897461, "learning_rate": 1.0458423236514524e-05, "loss": 0.4431, "step": 28759 }, { "epoch": 23.86721991701245, "grad_norm": 30.609500885009766, "learning_rate": 1.0458091286307054e-05, "loss": 0.5164, "step": 28760 }, { "epoch": 23.86804979253112, "grad_norm": 34.33560562133789, "learning_rate": 1.0457759336099586e-05, "loss": 0.6285, "step": 28761 }, { "epoch": 23.868879668049793, "grad_norm": 45.022830963134766, "learning_rate": 1.0457427385892117e-05, "loss": 0.4718, "step": 28762 }, { "epoch": 23.869709543568465, "grad_norm": 40.96895217895508, "learning_rate": 1.0457095435684647e-05, "loss": 0.3369, "step": 28763 }, { "epoch": 23.870539419087137, "grad_norm": 23.85353660583496, "learning_rate": 1.045676348547718e-05, "loss": 0.3238, "step": 28764 }, { "epoch": 23.87136929460581, "grad_norm": 53.08283996582031, "learning_rate": 1.0456431535269711e-05, "loss": 0.5182, "step": 28765 }, { "epoch": 23.872199170124482, "grad_norm": 57.582008361816406, "learning_rate": 1.045609958506224e-05, "loss": 0.4702, "step": 28766 }, { "epoch": 23.873029045643154, "grad_norm": 29.408550262451172, "learning_rate": 1.0455767634854772e-05, "loss": 0.3447, "step": 28767 }, { "epoch": 23.873858921161826, "grad_norm": 47.602027893066406, "learning_rate": 1.0455435684647304e-05, "loss": 0.3729, "step": 28768 }, { "epoch": 23.8746887966805, "grad_norm": 66.48628997802734, "learning_rate": 1.0455103734439835e-05, "loss": 0.3944, "step": 28769 }, { "epoch": 23.87551867219917, "grad_norm": 116.9640121459961, "learning_rate": 1.0454771784232367e-05, "loss": 0.5292, "step": 28770 }, { "epoch": 23.876348547717843, "grad_norm": 35.912994384765625, "learning_rate": 1.0454439834024896e-05, "loss": 0.2637, "step": 28771 }, { "epoch": 23.877178423236515, "grad_norm": 11.782111167907715, "learning_rate": 1.0454107883817428e-05, "loss": 0.2608, "step": 28772 }, { "epoch": 23.878008298755187, "grad_norm": 14.104676246643066, "learning_rate": 1.045377593360996e-05, "loss": 0.2524, "step": 28773 }, { "epoch": 23.87883817427386, "grad_norm": 32.005821228027344, "learning_rate": 1.0453443983402492e-05, "loss": 0.4328, "step": 28774 }, { "epoch": 23.87966804979253, "grad_norm": 33.273990631103516, "learning_rate": 1.045311203319502e-05, "loss": 0.5034, "step": 28775 }, { "epoch": 23.880497925311204, "grad_norm": 22.606689453125, "learning_rate": 1.0452780082987553e-05, "loss": 0.3641, "step": 28776 }, { "epoch": 23.881327800829876, "grad_norm": 80.17716217041016, "learning_rate": 1.0452448132780085e-05, "loss": 0.7532, "step": 28777 }, { "epoch": 23.882157676348548, "grad_norm": 29.1519718170166, "learning_rate": 1.0452116182572615e-05, "loss": 0.3671, "step": 28778 }, { "epoch": 23.88298755186722, "grad_norm": 15.914841651916504, "learning_rate": 1.0451784232365146e-05, "loss": 0.2714, "step": 28779 }, { "epoch": 23.883817427385893, "grad_norm": 41.71342849731445, "learning_rate": 1.0451452282157676e-05, "loss": 0.5261, "step": 28780 }, { "epoch": 23.884647302904565, "grad_norm": 32.6083869934082, "learning_rate": 1.0451120331950208e-05, "loss": 0.3862, "step": 28781 }, { "epoch": 23.885477178423237, "grad_norm": 74.58705139160156, "learning_rate": 1.045078838174274e-05, "loss": 0.493, "step": 28782 }, { "epoch": 23.88630705394191, "grad_norm": 43.70199203491211, "learning_rate": 1.0450456431535269e-05, "loss": 0.3458, "step": 28783 }, { "epoch": 23.88713692946058, "grad_norm": 23.841758728027344, "learning_rate": 1.0450124481327801e-05, "loss": 0.3961, "step": 28784 }, { "epoch": 23.887966804979254, "grad_norm": 34.579715728759766, "learning_rate": 1.0449792531120333e-05, "loss": 0.4053, "step": 28785 }, { "epoch": 23.888796680497926, "grad_norm": 48.740455627441406, "learning_rate": 1.0449460580912865e-05, "loss": 0.4381, "step": 28786 }, { "epoch": 23.889626556016598, "grad_norm": 93.27164459228516, "learning_rate": 1.0449128630705394e-05, "loss": 0.6978, "step": 28787 }, { "epoch": 23.89045643153527, "grad_norm": 9.679335594177246, "learning_rate": 1.0448796680497926e-05, "loss": 0.2301, "step": 28788 }, { "epoch": 23.891286307053942, "grad_norm": 93.77117156982422, "learning_rate": 1.0448464730290457e-05, "loss": 0.7472, "step": 28789 }, { "epoch": 23.892116182572614, "grad_norm": 65.40564727783203, "learning_rate": 1.0448132780082989e-05, "loss": 0.2807, "step": 28790 }, { "epoch": 23.892946058091287, "grad_norm": 65.85477447509766, "learning_rate": 1.044780082987552e-05, "loss": 0.3124, "step": 28791 }, { "epoch": 23.89377593360996, "grad_norm": 28.965242385864258, "learning_rate": 1.044746887966805e-05, "loss": 0.281, "step": 28792 }, { "epoch": 23.89460580912863, "grad_norm": 26.222930908203125, "learning_rate": 1.0447136929460582e-05, "loss": 0.3041, "step": 28793 }, { "epoch": 23.895435684647303, "grad_norm": 70.26414489746094, "learning_rate": 1.0446804979253114e-05, "loss": 0.5596, "step": 28794 }, { "epoch": 23.896265560165975, "grad_norm": 10.003329277038574, "learning_rate": 1.0446473029045646e-05, "loss": 0.2454, "step": 28795 }, { "epoch": 23.897095435684648, "grad_norm": 40.00601577758789, "learning_rate": 1.0446141078838175e-05, "loss": 0.4934, "step": 28796 }, { "epoch": 23.89792531120332, "grad_norm": 23.749847412109375, "learning_rate": 1.0445809128630707e-05, "loss": 0.3223, "step": 28797 }, { "epoch": 23.898755186721992, "grad_norm": 44.74702072143555, "learning_rate": 1.0445477178423237e-05, "loss": 0.6557, "step": 28798 }, { "epoch": 23.899585062240664, "grad_norm": 118.54838562011719, "learning_rate": 1.044514522821577e-05, "loss": 0.3944, "step": 28799 }, { "epoch": 23.900414937759336, "grad_norm": 38.67271423339844, "learning_rate": 1.04448132780083e-05, "loss": 0.4865, "step": 28800 }, { "epoch": 23.90124481327801, "grad_norm": 17.348316192626953, "learning_rate": 1.044448132780083e-05, "loss": 0.3025, "step": 28801 }, { "epoch": 23.90207468879668, "grad_norm": 167.49232482910156, "learning_rate": 1.0444149377593362e-05, "loss": 0.5911, "step": 28802 }, { "epoch": 23.902904564315353, "grad_norm": 54.78078842163086, "learning_rate": 1.0443817427385894e-05, "loss": 0.408, "step": 28803 }, { "epoch": 23.903734439834025, "grad_norm": 27.62749481201172, "learning_rate": 1.0443485477178423e-05, "loss": 0.3608, "step": 28804 }, { "epoch": 23.904564315352697, "grad_norm": 28.295236587524414, "learning_rate": 1.0443153526970955e-05, "loss": 0.4591, "step": 28805 }, { "epoch": 23.90539419087137, "grad_norm": 64.85149383544922, "learning_rate": 1.0442821576763487e-05, "loss": 0.6388, "step": 28806 }, { "epoch": 23.906224066390042, "grad_norm": 41.055274963378906, "learning_rate": 1.0442489626556018e-05, "loss": 0.5996, "step": 28807 }, { "epoch": 23.907053941908714, "grad_norm": 82.60255432128906, "learning_rate": 1.0442157676348548e-05, "loss": 0.7859, "step": 28808 }, { "epoch": 23.907883817427386, "grad_norm": 19.634572982788086, "learning_rate": 1.044182572614108e-05, "loss": 0.2404, "step": 28809 }, { "epoch": 23.90871369294606, "grad_norm": 29.51589012145996, "learning_rate": 1.044149377593361e-05, "loss": 0.3163, "step": 28810 }, { "epoch": 23.90954356846473, "grad_norm": 57.61912536621094, "learning_rate": 1.0441161825726143e-05, "loss": 0.749, "step": 28811 }, { "epoch": 23.910373443983403, "grad_norm": 16.31926918029785, "learning_rate": 1.0440829875518671e-05, "loss": 0.2883, "step": 28812 }, { "epoch": 23.911203319502075, "grad_norm": 122.7461166381836, "learning_rate": 1.0440497925311204e-05, "loss": 0.8487, "step": 28813 }, { "epoch": 23.912033195020747, "grad_norm": 42.279170989990234, "learning_rate": 1.0440165975103736e-05, "loss": 0.4966, "step": 28814 }, { "epoch": 23.91286307053942, "grad_norm": 43.711151123046875, "learning_rate": 1.0439834024896268e-05, "loss": 0.4096, "step": 28815 }, { "epoch": 23.91369294605809, "grad_norm": 32.591182708740234, "learning_rate": 1.0439502074688796e-05, "loss": 0.3006, "step": 28816 }, { "epoch": 23.914522821576764, "grad_norm": 14.228717803955078, "learning_rate": 1.0439170124481329e-05, "loss": 0.2927, "step": 28817 }, { "epoch": 23.915352697095436, "grad_norm": 26.359230041503906, "learning_rate": 1.0438838174273859e-05, "loss": 0.3285, "step": 28818 }, { "epoch": 23.916182572614108, "grad_norm": 77.86827087402344, "learning_rate": 1.0438506224066391e-05, "loss": 1.2125, "step": 28819 }, { "epoch": 23.91701244813278, "grad_norm": 12.305808067321777, "learning_rate": 1.0438174273858923e-05, "loss": 0.3725, "step": 28820 }, { "epoch": 23.917842323651453, "grad_norm": 162.82943725585938, "learning_rate": 1.0437842323651452e-05, "loss": 1.0005, "step": 28821 }, { "epoch": 23.918672199170125, "grad_norm": 49.83736038208008, "learning_rate": 1.0437510373443984e-05, "loss": 0.6479, "step": 28822 }, { "epoch": 23.919502074688797, "grad_norm": 26.74863624572754, "learning_rate": 1.0437178423236516e-05, "loss": 0.4223, "step": 28823 }, { "epoch": 23.92033195020747, "grad_norm": 25.931581497192383, "learning_rate": 1.0436846473029048e-05, "loss": 0.2616, "step": 28824 }, { "epoch": 23.92116182572614, "grad_norm": 34.72332763671875, "learning_rate": 1.0436514522821577e-05, "loss": 0.6865, "step": 28825 }, { "epoch": 23.921991701244814, "grad_norm": 21.053924560546875, "learning_rate": 1.0436182572614109e-05, "loss": 0.3283, "step": 28826 }, { "epoch": 23.922821576763486, "grad_norm": 40.24312973022461, "learning_rate": 1.043585062240664e-05, "loss": 0.7099, "step": 28827 }, { "epoch": 23.923651452282158, "grad_norm": 19.73390007019043, "learning_rate": 1.0435518672199172e-05, "loss": 0.3438, "step": 28828 }, { "epoch": 23.92448132780083, "grad_norm": 47.36387252807617, "learning_rate": 1.0435186721991702e-05, "loss": 0.3866, "step": 28829 }, { "epoch": 23.925311203319502, "grad_norm": 84.02635192871094, "learning_rate": 1.0434854771784232e-05, "loss": 0.395, "step": 28830 }, { "epoch": 23.926141078838175, "grad_norm": 49.460147857666016, "learning_rate": 1.0434522821576765e-05, "loss": 0.4426, "step": 28831 }, { "epoch": 23.926970954356847, "grad_norm": 54.296207427978516, "learning_rate": 1.0434190871369297e-05, "loss": 0.3373, "step": 28832 }, { "epoch": 23.92780082987552, "grad_norm": 47.077877044677734, "learning_rate": 1.0433858921161825e-05, "loss": 0.3717, "step": 28833 }, { "epoch": 23.92863070539419, "grad_norm": 73.77875518798828, "learning_rate": 1.0433526970954357e-05, "loss": 0.81, "step": 28834 }, { "epoch": 23.929460580912863, "grad_norm": 68.61871337890625, "learning_rate": 1.043319502074689e-05, "loss": 0.4414, "step": 28835 }, { "epoch": 23.930290456431536, "grad_norm": 10.63660717010498, "learning_rate": 1.043286307053942e-05, "loss": 0.3231, "step": 28836 }, { "epoch": 23.931120331950208, "grad_norm": 33.76466751098633, "learning_rate": 1.043253112033195e-05, "loss": 0.372, "step": 28837 }, { "epoch": 23.93195020746888, "grad_norm": 23.557649612426758, "learning_rate": 1.0432199170124482e-05, "loss": 0.2634, "step": 28838 }, { "epoch": 23.932780082987552, "grad_norm": 27.79631996154785, "learning_rate": 1.0431867219917013e-05, "loss": 0.4349, "step": 28839 }, { "epoch": 23.933609958506224, "grad_norm": 22.106130599975586, "learning_rate": 1.0431535269709545e-05, "loss": 0.3954, "step": 28840 }, { "epoch": 23.934439834024896, "grad_norm": 74.17286682128906, "learning_rate": 1.0431203319502074e-05, "loss": 0.8287, "step": 28841 }, { "epoch": 23.93526970954357, "grad_norm": 46.33122634887695, "learning_rate": 1.0430871369294606e-05, "loss": 0.444, "step": 28842 }, { "epoch": 23.93609958506224, "grad_norm": 35.60693359375, "learning_rate": 1.0430539419087138e-05, "loss": 0.3378, "step": 28843 }, { "epoch": 23.936929460580913, "grad_norm": 18.22658348083496, "learning_rate": 1.043020746887967e-05, "loss": 0.2816, "step": 28844 }, { "epoch": 23.937759336099585, "grad_norm": 67.81179809570312, "learning_rate": 1.0429875518672199e-05, "loss": 0.4702, "step": 28845 }, { "epoch": 23.938589211618257, "grad_norm": 47.08677291870117, "learning_rate": 1.0429543568464731e-05, "loss": 0.6369, "step": 28846 }, { "epoch": 23.93941908713693, "grad_norm": 30.545503616333008, "learning_rate": 1.0429211618257263e-05, "loss": 0.2713, "step": 28847 }, { "epoch": 23.940248962655602, "grad_norm": 15.5072021484375, "learning_rate": 1.0428879668049793e-05, "loss": 0.3257, "step": 28848 }, { "epoch": 23.941078838174274, "grad_norm": 23.88286018371582, "learning_rate": 1.0428547717842326e-05, "loss": 0.3946, "step": 28849 }, { "epoch": 23.941908713692946, "grad_norm": 25.93494987487793, "learning_rate": 1.0428215767634854e-05, "loss": 0.3121, "step": 28850 }, { "epoch": 23.94273858921162, "grad_norm": 45.9074821472168, "learning_rate": 1.0427883817427386e-05, "loss": 0.5866, "step": 28851 }, { "epoch": 23.94356846473029, "grad_norm": 30.345300674438477, "learning_rate": 1.0427551867219918e-05, "loss": 0.2535, "step": 28852 }, { "epoch": 23.944398340248963, "grad_norm": 27.482534408569336, "learning_rate": 1.042721991701245e-05, "loss": 0.451, "step": 28853 }, { "epoch": 23.945228215767635, "grad_norm": 12.193585395812988, "learning_rate": 1.042688796680498e-05, "loss": 0.2919, "step": 28854 }, { "epoch": 23.946058091286307, "grad_norm": 45.09153747558594, "learning_rate": 1.0426556016597511e-05, "loss": 0.4126, "step": 28855 }, { "epoch": 23.94688796680498, "grad_norm": 141.4405059814453, "learning_rate": 1.0426224066390043e-05, "loss": 0.8683, "step": 28856 }, { "epoch": 23.94771784232365, "grad_norm": 37.69644546508789, "learning_rate": 1.0425892116182574e-05, "loss": 0.4276, "step": 28857 }, { "epoch": 23.948547717842324, "grad_norm": 35.622840881347656, "learning_rate": 1.0425560165975104e-05, "loss": 0.4549, "step": 28858 }, { "epoch": 23.949377593360996, "grad_norm": 26.207857131958008, "learning_rate": 1.0425228215767635e-05, "loss": 0.3786, "step": 28859 }, { "epoch": 23.95020746887967, "grad_norm": 51.71885299682617, "learning_rate": 1.0424896265560167e-05, "loss": 0.6623, "step": 28860 }, { "epoch": 23.95103734439834, "grad_norm": 73.63811492919922, "learning_rate": 1.0424564315352699e-05, "loss": 0.4956, "step": 28861 }, { "epoch": 23.951867219917013, "grad_norm": 49.1778450012207, "learning_rate": 1.0424232365145228e-05, "loss": 0.4286, "step": 28862 }, { "epoch": 23.952697095435685, "grad_norm": 28.26068878173828, "learning_rate": 1.042390041493776e-05, "loss": 0.3949, "step": 28863 }, { "epoch": 23.953526970954357, "grad_norm": 83.24263763427734, "learning_rate": 1.0423568464730292e-05, "loss": 0.55, "step": 28864 }, { "epoch": 23.95435684647303, "grad_norm": 35.8311882019043, "learning_rate": 1.0423236514522822e-05, "loss": 0.6305, "step": 28865 }, { "epoch": 23.9551867219917, "grad_norm": 43.963565826416016, "learning_rate": 1.0422904564315353e-05, "loss": 1.023, "step": 28866 }, { "epoch": 23.956016597510374, "grad_norm": 14.830660820007324, "learning_rate": 1.0422572614107885e-05, "loss": 0.3522, "step": 28867 }, { "epoch": 23.956846473029046, "grad_norm": 31.3945255279541, "learning_rate": 1.0422240663900415e-05, "loss": 0.7774, "step": 28868 }, { "epoch": 23.957676348547718, "grad_norm": 41.864471435546875, "learning_rate": 1.0421908713692947e-05, "loss": 0.4746, "step": 28869 }, { "epoch": 23.95850622406639, "grad_norm": 36.44379425048828, "learning_rate": 1.0421576763485478e-05, "loss": 0.3795, "step": 28870 }, { "epoch": 23.959336099585062, "grad_norm": 26.411876678466797, "learning_rate": 1.0421244813278008e-05, "loss": 0.4479, "step": 28871 }, { "epoch": 23.960165975103735, "grad_norm": 66.09217834472656, "learning_rate": 1.042091286307054e-05, "loss": 0.7357, "step": 28872 }, { "epoch": 23.960995850622407, "grad_norm": 59.165611267089844, "learning_rate": 1.0420580912863072e-05, "loss": 0.3696, "step": 28873 }, { "epoch": 23.96182572614108, "grad_norm": 29.437429428100586, "learning_rate": 1.0420248962655603e-05, "loss": 0.4258, "step": 28874 }, { "epoch": 23.96265560165975, "grad_norm": 16.057092666625977, "learning_rate": 1.0419917012448133e-05, "loss": 0.3223, "step": 28875 }, { "epoch": 23.963485477178423, "grad_norm": 70.04505920410156, "learning_rate": 1.0419585062240665e-05, "loss": 0.4337, "step": 28876 }, { "epoch": 23.964315352697096, "grad_norm": 57.42743682861328, "learning_rate": 1.0419253112033196e-05, "loss": 0.798, "step": 28877 }, { "epoch": 23.965145228215768, "grad_norm": 44.5604362487793, "learning_rate": 1.0418921161825728e-05, "loss": 0.3906, "step": 28878 }, { "epoch": 23.96597510373444, "grad_norm": 12.90249252319336, "learning_rate": 1.0418589211618258e-05, "loss": 0.2747, "step": 28879 }, { "epoch": 23.966804979253112, "grad_norm": 83.33706665039062, "learning_rate": 1.0418257261410789e-05, "loss": 0.597, "step": 28880 }, { "epoch": 23.967634854771784, "grad_norm": 30.62457275390625, "learning_rate": 1.041792531120332e-05, "loss": 0.4158, "step": 28881 }, { "epoch": 23.968464730290457, "grad_norm": 34.469581604003906, "learning_rate": 1.0417593360995853e-05, "loss": 0.3486, "step": 28882 }, { "epoch": 23.96929460580913, "grad_norm": 114.9518051147461, "learning_rate": 1.0417261410788382e-05, "loss": 0.8822, "step": 28883 }, { "epoch": 23.9701244813278, "grad_norm": 28.049640655517578, "learning_rate": 1.0416929460580914e-05, "loss": 0.3213, "step": 28884 }, { "epoch": 23.970954356846473, "grad_norm": 24.73187828063965, "learning_rate": 1.0416597510373446e-05, "loss": 0.4895, "step": 28885 }, { "epoch": 23.971784232365145, "grad_norm": 62.280029296875, "learning_rate": 1.0416265560165976e-05, "loss": 0.5768, "step": 28886 }, { "epoch": 23.972614107883818, "grad_norm": 8.77784538269043, "learning_rate": 1.0415933609958507e-05, "loss": 0.271, "step": 28887 }, { "epoch": 23.97344398340249, "grad_norm": 31.847576141357422, "learning_rate": 1.0415601659751037e-05, "loss": 0.3649, "step": 28888 }, { "epoch": 23.974273858921162, "grad_norm": 31.42720603942871, "learning_rate": 1.041526970954357e-05, "loss": 0.3148, "step": 28889 }, { "epoch": 23.975103734439834, "grad_norm": 38.233089447021484, "learning_rate": 1.0414937759336101e-05, "loss": 0.7662, "step": 28890 }, { "epoch": 23.975933609958506, "grad_norm": 61.290260314941406, "learning_rate": 1.041460580912863e-05, "loss": 1.0761, "step": 28891 }, { "epoch": 23.97676348547718, "grad_norm": 10.237103462219238, "learning_rate": 1.0414273858921162e-05, "loss": 0.2508, "step": 28892 }, { "epoch": 23.97759336099585, "grad_norm": 40.12974548339844, "learning_rate": 1.0413941908713694e-05, "loss": 0.7347, "step": 28893 }, { "epoch": 23.978423236514523, "grad_norm": 38.92969512939453, "learning_rate": 1.0413609958506226e-05, "loss": 0.362, "step": 28894 }, { "epoch": 23.979253112033195, "grad_norm": 27.002878189086914, "learning_rate": 1.0413278008298755e-05, "loss": 0.3365, "step": 28895 }, { "epoch": 23.980082987551867, "grad_norm": 16.494369506835938, "learning_rate": 1.0412946058091287e-05, "loss": 0.3237, "step": 28896 }, { "epoch": 23.98091286307054, "grad_norm": 56.043697357177734, "learning_rate": 1.0412614107883818e-05, "loss": 0.462, "step": 28897 }, { "epoch": 23.98174273858921, "grad_norm": 123.13021087646484, "learning_rate": 1.041228215767635e-05, "loss": 0.5717, "step": 28898 }, { "epoch": 23.982572614107884, "grad_norm": 25.81494903564453, "learning_rate": 1.041195020746888e-05, "loss": 0.3651, "step": 28899 }, { "epoch": 23.983402489626556, "grad_norm": 70.79026794433594, "learning_rate": 1.041161825726141e-05, "loss": 0.5475, "step": 28900 }, { "epoch": 23.98423236514523, "grad_norm": 114.35098266601562, "learning_rate": 1.0411286307053943e-05, "loss": 0.6188, "step": 28901 }, { "epoch": 23.9850622406639, "grad_norm": 57.2332878112793, "learning_rate": 1.0410954356846475e-05, "loss": 0.4523, "step": 28902 }, { "epoch": 23.985892116182573, "grad_norm": 26.015119552612305, "learning_rate": 1.0410622406639007e-05, "loss": 0.3427, "step": 28903 }, { "epoch": 23.986721991701245, "grad_norm": 19.809965133666992, "learning_rate": 1.0410290456431536e-05, "loss": 0.3642, "step": 28904 }, { "epoch": 23.987551867219917, "grad_norm": 41.13433837890625, "learning_rate": 1.0409958506224068e-05, "loss": 0.5769, "step": 28905 }, { "epoch": 23.98838174273859, "grad_norm": 113.00499725341797, "learning_rate": 1.0409626556016598e-05, "loss": 0.3946, "step": 28906 }, { "epoch": 23.98921161825726, "grad_norm": 63.874549865722656, "learning_rate": 1.040929460580913e-05, "loss": 0.7538, "step": 28907 }, { "epoch": 23.990041493775934, "grad_norm": 28.057024002075195, "learning_rate": 1.040896265560166e-05, "loss": 0.4448, "step": 28908 }, { "epoch": 23.990871369294606, "grad_norm": 47.515647888183594, "learning_rate": 1.0408630705394191e-05, "loss": 0.4623, "step": 28909 }, { "epoch": 23.991701244813278, "grad_norm": 47.97981643676758, "learning_rate": 1.0408298755186723e-05, "loss": 0.4566, "step": 28910 }, { "epoch": 23.99253112033195, "grad_norm": 43.84368133544922, "learning_rate": 1.0407966804979255e-05, "loss": 0.3758, "step": 28911 }, { "epoch": 23.993360995850622, "grad_norm": 81.07012176513672, "learning_rate": 1.0407634854771784e-05, "loss": 0.8854, "step": 28912 }, { "epoch": 23.994190871369295, "grad_norm": 42.003692626953125, "learning_rate": 1.0407302904564316e-05, "loss": 0.5402, "step": 28913 }, { "epoch": 23.995020746887967, "grad_norm": 17.52581024169922, "learning_rate": 1.0406970954356848e-05, "loss": 0.2046, "step": 28914 }, { "epoch": 23.99585062240664, "grad_norm": 50.403263092041016, "learning_rate": 1.0406639004149379e-05, "loss": 0.7875, "step": 28915 }, { "epoch": 23.99668049792531, "grad_norm": 28.480148315429688, "learning_rate": 1.0406307053941909e-05, "loss": 0.5276, "step": 28916 }, { "epoch": 23.997510373443983, "grad_norm": 22.4476261138916, "learning_rate": 1.0405975103734441e-05, "loss": 0.398, "step": 28917 }, { "epoch": 23.998340248962656, "grad_norm": 47.32040023803711, "learning_rate": 1.0405643153526971e-05, "loss": 0.978, "step": 28918 }, { "epoch": 23.999170124481328, "grad_norm": 48.516117095947266, "learning_rate": 1.0405311203319504e-05, "loss": 0.6798, "step": 28919 }, { "epoch": 24.0, "grad_norm": 38.34917449951172, "learning_rate": 1.0404979253112032e-05, "loss": 0.4168, "step": 28920 }, { "epoch": 24.000829875518672, "grad_norm": 22.81572723388672, "learning_rate": 1.0404647302904564e-05, "loss": 0.371, "step": 28921 }, { "epoch": 24.001659751037344, "grad_norm": 24.8248348236084, "learning_rate": 1.0404315352697097e-05, "loss": 0.4205, "step": 28922 }, { "epoch": 24.002489626556017, "grad_norm": 26.254993438720703, "learning_rate": 1.0403983402489629e-05, "loss": 0.4096, "step": 28923 }, { "epoch": 24.00331950207469, "grad_norm": 15.541062355041504, "learning_rate": 1.0403651452282157e-05, "loss": 0.3598, "step": 28924 }, { "epoch": 24.00414937759336, "grad_norm": 10.052359580993652, "learning_rate": 1.040331950207469e-05, "loss": 0.2201, "step": 28925 }, { "epoch": 24.004979253112033, "grad_norm": 23.758676528930664, "learning_rate": 1.0402987551867222e-05, "loss": 0.2752, "step": 28926 }, { "epoch": 24.005809128630705, "grad_norm": 20.412778854370117, "learning_rate": 1.0402655601659752e-05, "loss": 0.2937, "step": 28927 }, { "epoch": 24.006639004149378, "grad_norm": 34.1256217956543, "learning_rate": 1.0402323651452284e-05, "loss": 0.6676, "step": 28928 }, { "epoch": 24.00746887966805, "grad_norm": 10.796616554260254, "learning_rate": 1.0401991701244813e-05, "loss": 0.2567, "step": 28929 }, { "epoch": 24.008298755186722, "grad_norm": 41.158775329589844, "learning_rate": 1.0401659751037345e-05, "loss": 0.3071, "step": 28930 }, { "epoch": 24.009128630705394, "grad_norm": 53.88058090209961, "learning_rate": 1.0401327800829877e-05, "loss": 0.5682, "step": 28931 }, { "epoch": 24.009958506224066, "grad_norm": 79.14286804199219, "learning_rate": 1.0400995850622409e-05, "loss": 0.4135, "step": 28932 }, { "epoch": 24.01078838174274, "grad_norm": 81.23846435546875, "learning_rate": 1.0400663900414938e-05, "loss": 1.2845, "step": 28933 }, { "epoch": 24.01161825726141, "grad_norm": 13.61093807220459, "learning_rate": 1.040033195020747e-05, "loss": 0.4246, "step": 28934 }, { "epoch": 24.012448132780083, "grad_norm": 46.929012298583984, "learning_rate": 1.04e-05, "loss": 0.3243, "step": 28935 }, { "epoch": 24.013278008298755, "grad_norm": 127.34793853759766, "learning_rate": 1.0399668049792532e-05, "loss": 0.4309, "step": 28936 }, { "epoch": 24.014107883817427, "grad_norm": 38.461631774902344, "learning_rate": 1.0399336099585063e-05, "loss": 0.5748, "step": 28937 }, { "epoch": 24.0149377593361, "grad_norm": 34.732234954833984, "learning_rate": 1.0399004149377593e-05, "loss": 0.6052, "step": 28938 }, { "epoch": 24.01576763485477, "grad_norm": 34.36310958862305, "learning_rate": 1.0398672199170125e-05, "loss": 0.3368, "step": 28939 }, { "epoch": 24.016597510373444, "grad_norm": 59.788238525390625, "learning_rate": 1.0398340248962658e-05, "loss": 0.455, "step": 28940 }, { "epoch": 24.017427385892116, "grad_norm": 18.282943725585938, "learning_rate": 1.0398008298755186e-05, "loss": 0.3403, "step": 28941 }, { "epoch": 24.01825726141079, "grad_norm": 21.469989776611328, "learning_rate": 1.0397676348547718e-05, "loss": 0.2913, "step": 28942 }, { "epoch": 24.01908713692946, "grad_norm": 27.151941299438477, "learning_rate": 1.039734439834025e-05, "loss": 0.5142, "step": 28943 }, { "epoch": 24.019917012448133, "grad_norm": 100.99739837646484, "learning_rate": 1.0397012448132781e-05, "loss": 0.3879, "step": 28944 }, { "epoch": 24.020746887966805, "grad_norm": 99.88959503173828, "learning_rate": 1.0396680497925311e-05, "loss": 0.6753, "step": 28945 }, { "epoch": 24.021576763485477, "grad_norm": 13.635512351989746, "learning_rate": 1.0396348547717843e-05, "loss": 0.3334, "step": 28946 }, { "epoch": 24.02240663900415, "grad_norm": 12.73128604888916, "learning_rate": 1.0396016597510374e-05, "loss": 0.2913, "step": 28947 }, { "epoch": 24.02323651452282, "grad_norm": 71.98736572265625, "learning_rate": 1.0395684647302906e-05, "loss": 0.7496, "step": 28948 }, { "epoch": 24.024066390041494, "grad_norm": 28.427200317382812, "learning_rate": 1.0395352697095436e-05, "loss": 0.3175, "step": 28949 }, { "epoch": 24.024896265560166, "grad_norm": 39.972145080566406, "learning_rate": 1.0395020746887967e-05, "loss": 0.5993, "step": 28950 }, { "epoch": 24.025726141078838, "grad_norm": 14.433891296386719, "learning_rate": 1.0394688796680499e-05, "loss": 0.2389, "step": 28951 }, { "epoch": 24.02655601659751, "grad_norm": 48.04685592651367, "learning_rate": 1.0394356846473031e-05, "loss": 0.5683, "step": 28952 }, { "epoch": 24.027385892116182, "grad_norm": 20.95355224609375, "learning_rate": 1.0394024896265561e-05, "loss": 0.2842, "step": 28953 }, { "epoch": 24.028215767634855, "grad_norm": 18.522043228149414, "learning_rate": 1.0393692946058092e-05, "loss": 0.2669, "step": 28954 }, { "epoch": 24.029045643153527, "grad_norm": 82.41358184814453, "learning_rate": 1.0393360995850624e-05, "loss": 0.704, "step": 28955 }, { "epoch": 24.0298755186722, "grad_norm": 18.697513580322266, "learning_rate": 1.0393029045643154e-05, "loss": 0.2476, "step": 28956 }, { "epoch": 24.03070539419087, "grad_norm": 21.6533145904541, "learning_rate": 1.0392697095435686e-05, "loss": 0.2866, "step": 28957 }, { "epoch": 24.031535269709543, "grad_norm": 18.263566970825195, "learning_rate": 1.0392365145228215e-05, "loss": 0.2848, "step": 28958 }, { "epoch": 24.032365145228216, "grad_norm": 16.091806411743164, "learning_rate": 1.0392033195020747e-05, "loss": 0.3109, "step": 28959 }, { "epoch": 24.033195020746888, "grad_norm": 57.69033432006836, "learning_rate": 1.039170124481328e-05, "loss": 0.2792, "step": 28960 }, { "epoch": 24.03402489626556, "grad_norm": 27.261960983276367, "learning_rate": 1.0391369294605811e-05, "loss": 0.4196, "step": 28961 }, { "epoch": 24.034854771784232, "grad_norm": 76.25448608398438, "learning_rate": 1.039103734439834e-05, "loss": 0.7749, "step": 28962 }, { "epoch": 24.035684647302904, "grad_norm": 65.57658386230469, "learning_rate": 1.0390705394190872e-05, "loss": 0.5848, "step": 28963 }, { "epoch": 24.036514522821577, "grad_norm": 86.58838653564453, "learning_rate": 1.0390373443983404e-05, "loss": 0.6659, "step": 28964 }, { "epoch": 24.03734439834025, "grad_norm": 22.76245880126953, "learning_rate": 1.0390041493775935e-05, "loss": 0.2741, "step": 28965 }, { "epoch": 24.03817427385892, "grad_norm": 43.0577278137207, "learning_rate": 1.0389709543568465e-05, "loss": 0.4081, "step": 28966 }, { "epoch": 24.039004149377593, "grad_norm": 132.29055786132812, "learning_rate": 1.0389377593360996e-05, "loss": 0.7334, "step": 28967 }, { "epoch": 24.039834024896265, "grad_norm": 20.944154739379883, "learning_rate": 1.0389045643153528e-05, "loss": 0.356, "step": 28968 }, { "epoch": 24.040663900414938, "grad_norm": 14.543899536132812, "learning_rate": 1.038871369294606e-05, "loss": 0.2632, "step": 28969 }, { "epoch": 24.04149377593361, "grad_norm": 47.08913040161133, "learning_rate": 1.0388381742738589e-05, "loss": 0.3564, "step": 28970 }, { "epoch": 24.042323651452282, "grad_norm": 40.564971923828125, "learning_rate": 1.038804979253112e-05, "loss": 0.3043, "step": 28971 }, { "epoch": 24.043153526970954, "grad_norm": 24.382469177246094, "learning_rate": 1.0387717842323653e-05, "loss": 0.3977, "step": 28972 }, { "epoch": 24.043983402489626, "grad_norm": 87.58051300048828, "learning_rate": 1.0387385892116185e-05, "loss": 0.5368, "step": 28973 }, { "epoch": 24.0448132780083, "grad_norm": 13.838716506958008, "learning_rate": 1.0387053941908714e-05, "loss": 0.3236, "step": 28974 }, { "epoch": 24.04564315352697, "grad_norm": 137.02914428710938, "learning_rate": 1.0386721991701246e-05, "loss": 0.3147, "step": 28975 }, { "epoch": 24.046473029045643, "grad_norm": 16.91850471496582, "learning_rate": 1.0386390041493776e-05, "loss": 0.3999, "step": 28976 }, { "epoch": 24.047302904564315, "grad_norm": 28.78518295288086, "learning_rate": 1.0386058091286308e-05, "loss": 0.4286, "step": 28977 }, { "epoch": 24.048132780082987, "grad_norm": 39.13704299926758, "learning_rate": 1.0385726141078839e-05, "loss": 0.4786, "step": 28978 }, { "epoch": 24.04896265560166, "grad_norm": 18.084505081176758, "learning_rate": 1.0385394190871369e-05, "loss": 0.2763, "step": 28979 }, { "epoch": 24.04979253112033, "grad_norm": 18.73065757751465, "learning_rate": 1.0385062240663901e-05, "loss": 0.2517, "step": 28980 }, { "epoch": 24.050622406639004, "grad_norm": 20.666418075561523, "learning_rate": 1.0384730290456433e-05, "loss": 0.4154, "step": 28981 }, { "epoch": 24.051452282157676, "grad_norm": 65.36351013183594, "learning_rate": 1.0384398340248964e-05, "loss": 0.5868, "step": 28982 }, { "epoch": 24.05228215767635, "grad_norm": 45.483070373535156, "learning_rate": 1.0384066390041494e-05, "loss": 0.3743, "step": 28983 }, { "epoch": 24.05311203319502, "grad_norm": 82.32733917236328, "learning_rate": 1.0383734439834026e-05, "loss": 0.3767, "step": 28984 }, { "epoch": 24.053941908713693, "grad_norm": 28.523351669311523, "learning_rate": 1.0383402489626557e-05, "loss": 0.3922, "step": 28985 }, { "epoch": 24.054771784232365, "grad_norm": 15.524325370788574, "learning_rate": 1.0383070539419089e-05, "loss": 0.3082, "step": 28986 }, { "epoch": 24.055601659751037, "grad_norm": 75.12833404541016, "learning_rate": 1.0382738589211619e-05, "loss": 0.3413, "step": 28987 }, { "epoch": 24.05643153526971, "grad_norm": 78.73564910888672, "learning_rate": 1.038240663900415e-05, "loss": 0.8997, "step": 28988 }, { "epoch": 24.05726141078838, "grad_norm": 76.9568099975586, "learning_rate": 1.0382074688796682e-05, "loss": 0.5247, "step": 28989 }, { "epoch": 24.058091286307054, "grad_norm": 29.369163513183594, "learning_rate": 1.0381742738589214e-05, "loss": 0.3924, "step": 28990 }, { "epoch": 24.058921161825726, "grad_norm": 118.54328155517578, "learning_rate": 1.0381410788381742e-05, "loss": 0.745, "step": 28991 }, { "epoch": 24.059751037344398, "grad_norm": 20.261920928955078, "learning_rate": 1.0381078838174275e-05, "loss": 0.4979, "step": 28992 }, { "epoch": 24.06058091286307, "grad_norm": 37.814754486083984, "learning_rate": 1.0380746887966807e-05, "loss": 0.3778, "step": 28993 }, { "epoch": 24.061410788381743, "grad_norm": 18.81315803527832, "learning_rate": 1.0380414937759337e-05, "loss": 0.308, "step": 28994 }, { "epoch": 24.062240663900415, "grad_norm": 65.08274841308594, "learning_rate": 1.0380082987551868e-05, "loss": 0.5965, "step": 28995 }, { "epoch": 24.063070539419087, "grad_norm": 14.926651000976562, "learning_rate": 1.03797510373444e-05, "loss": 0.2301, "step": 28996 }, { "epoch": 24.06390041493776, "grad_norm": 47.99458312988281, "learning_rate": 1.037941908713693e-05, "loss": 0.414, "step": 28997 }, { "epoch": 24.06473029045643, "grad_norm": 24.797992706298828, "learning_rate": 1.0379087136929462e-05, "loss": 0.4723, "step": 28998 }, { "epoch": 24.065560165975104, "grad_norm": 16.36507225036621, "learning_rate": 1.0378755186721991e-05, "loss": 0.2584, "step": 28999 }, { "epoch": 24.066390041493776, "grad_norm": 56.534297943115234, "learning_rate": 1.0378423236514523e-05, "loss": 0.2995, "step": 29000 }, { "epoch": 24.067219917012448, "grad_norm": 65.7926254272461, "learning_rate": 1.0378091286307055e-05, "loss": 0.5714, "step": 29001 }, { "epoch": 24.06804979253112, "grad_norm": 40.80432891845703, "learning_rate": 1.0377759336099587e-05, "loss": 0.3866, "step": 29002 }, { "epoch": 24.068879668049792, "grad_norm": 14.645777702331543, "learning_rate": 1.0377427385892116e-05, "loss": 0.2393, "step": 29003 }, { "epoch": 24.069709543568464, "grad_norm": 46.28572463989258, "learning_rate": 1.0377095435684648e-05, "loss": 0.5265, "step": 29004 }, { "epoch": 24.070539419087137, "grad_norm": 22.030302047729492, "learning_rate": 1.0376763485477178e-05, "loss": 0.2599, "step": 29005 }, { "epoch": 24.07136929460581, "grad_norm": 45.080535888671875, "learning_rate": 1.037643153526971e-05, "loss": 0.2584, "step": 29006 }, { "epoch": 24.07219917012448, "grad_norm": 87.8809585571289, "learning_rate": 1.0376099585062243e-05, "loss": 0.7617, "step": 29007 }, { "epoch": 24.073029045643153, "grad_norm": 37.97475051879883, "learning_rate": 1.0375767634854771e-05, "loss": 0.3171, "step": 29008 }, { "epoch": 24.073858921161825, "grad_norm": 31.343738555908203, "learning_rate": 1.0375435684647303e-05, "loss": 0.3182, "step": 29009 }, { "epoch": 24.074688796680498, "grad_norm": 10.851924896240234, "learning_rate": 1.0375103734439836e-05, "loss": 0.2583, "step": 29010 }, { "epoch": 24.07551867219917, "grad_norm": 70.83467102050781, "learning_rate": 1.0374771784232368e-05, "loss": 0.6126, "step": 29011 }, { "epoch": 24.076348547717842, "grad_norm": 17.323219299316406, "learning_rate": 1.0374439834024896e-05, "loss": 0.2307, "step": 29012 }, { "epoch": 24.077178423236514, "grad_norm": 53.844112396240234, "learning_rate": 1.0374107883817429e-05, "loss": 0.3757, "step": 29013 }, { "epoch": 24.078008298755186, "grad_norm": 13.420313835144043, "learning_rate": 1.0373775933609959e-05, "loss": 0.259, "step": 29014 }, { "epoch": 24.07883817427386, "grad_norm": 162.15151977539062, "learning_rate": 1.0373443983402491e-05, "loss": 0.8551, "step": 29015 }, { "epoch": 24.07966804979253, "grad_norm": 32.8790397644043, "learning_rate": 1.0373112033195021e-05, "loss": 0.2688, "step": 29016 }, { "epoch": 24.080497925311203, "grad_norm": 21.63648796081543, "learning_rate": 1.0372780082987552e-05, "loss": 0.3136, "step": 29017 }, { "epoch": 24.081327800829875, "grad_norm": 72.98580932617188, "learning_rate": 1.0372448132780084e-05, "loss": 0.4849, "step": 29018 }, { "epoch": 24.082157676348547, "grad_norm": 25.909439086914062, "learning_rate": 1.0372116182572616e-05, "loss": 0.265, "step": 29019 }, { "epoch": 24.08298755186722, "grad_norm": 34.93948745727539, "learning_rate": 1.0371784232365145e-05, "loss": 0.4113, "step": 29020 }, { "epoch": 24.083817427385892, "grad_norm": 17.223201751708984, "learning_rate": 1.0371452282157677e-05, "loss": 0.3213, "step": 29021 }, { "epoch": 24.084647302904564, "grad_norm": 22.1031436920166, "learning_rate": 1.0371120331950209e-05, "loss": 0.3168, "step": 29022 }, { "epoch": 24.085477178423236, "grad_norm": 12.1842041015625, "learning_rate": 1.037078838174274e-05, "loss": 0.2329, "step": 29023 }, { "epoch": 24.08630705394191, "grad_norm": 29.11429214477539, "learning_rate": 1.037045643153527e-05, "loss": 0.5561, "step": 29024 }, { "epoch": 24.08713692946058, "grad_norm": 15.331332206726074, "learning_rate": 1.0370124481327802e-05, "loss": 0.3327, "step": 29025 }, { "epoch": 24.087966804979253, "grad_norm": 40.66472244262695, "learning_rate": 1.0369792531120332e-05, "loss": 0.4677, "step": 29026 }, { "epoch": 24.088796680497925, "grad_norm": 32.343360900878906, "learning_rate": 1.0369460580912864e-05, "loss": 0.4133, "step": 29027 }, { "epoch": 24.089626556016597, "grad_norm": 71.64383697509766, "learning_rate": 1.0369128630705393e-05, "loss": 0.4891, "step": 29028 }, { "epoch": 24.09045643153527, "grad_norm": 22.381637573242188, "learning_rate": 1.0368796680497925e-05, "loss": 0.2478, "step": 29029 }, { "epoch": 24.09128630705394, "grad_norm": 83.90869140625, "learning_rate": 1.0368464730290457e-05, "loss": 0.3901, "step": 29030 }, { "epoch": 24.092116182572614, "grad_norm": 144.0170135498047, "learning_rate": 1.036813278008299e-05, "loss": 0.6779, "step": 29031 }, { "epoch": 24.092946058091286, "grad_norm": 20.600767135620117, "learning_rate": 1.036780082987552e-05, "loss": 0.3399, "step": 29032 }, { "epoch": 24.093775933609958, "grad_norm": 66.81385803222656, "learning_rate": 1.036746887966805e-05, "loss": 0.3941, "step": 29033 }, { "epoch": 24.09460580912863, "grad_norm": 212.2354278564453, "learning_rate": 1.0367136929460582e-05, "loss": 0.7386, "step": 29034 }, { "epoch": 24.095435684647303, "grad_norm": 28.243207931518555, "learning_rate": 1.0366804979253113e-05, "loss": 0.2964, "step": 29035 }, { "epoch": 24.096265560165975, "grad_norm": 20.04915428161621, "learning_rate": 1.0366473029045645e-05, "loss": 0.2604, "step": 29036 }, { "epoch": 24.097095435684647, "grad_norm": 17.399707794189453, "learning_rate": 1.0366141078838174e-05, "loss": 0.4239, "step": 29037 }, { "epoch": 24.09792531120332, "grad_norm": 21.665645599365234, "learning_rate": 1.0365809128630706e-05, "loss": 0.4626, "step": 29038 }, { "epoch": 24.09875518672199, "grad_norm": 94.54462432861328, "learning_rate": 1.0365477178423238e-05, "loss": 0.658, "step": 29039 }, { "epoch": 24.099585062240664, "grad_norm": 25.87480354309082, "learning_rate": 1.036514522821577e-05, "loss": 0.3419, "step": 29040 }, { "epoch": 24.100414937759336, "grad_norm": 86.00003051757812, "learning_rate": 1.0364813278008299e-05, "loss": 1.3665, "step": 29041 }, { "epoch": 24.101244813278008, "grad_norm": 26.375574111938477, "learning_rate": 1.0364481327800831e-05, "loss": 0.2927, "step": 29042 }, { "epoch": 24.10207468879668, "grad_norm": 10.023313522338867, "learning_rate": 1.0364149377593363e-05, "loss": 0.1894, "step": 29043 }, { "epoch": 24.102904564315352, "grad_norm": 38.47138595581055, "learning_rate": 1.0363817427385893e-05, "loss": 0.4221, "step": 29044 }, { "epoch": 24.103734439834025, "grad_norm": 39.97119140625, "learning_rate": 1.0363485477178424e-05, "loss": 0.3282, "step": 29045 }, { "epoch": 24.104564315352697, "grad_norm": 117.12885284423828, "learning_rate": 1.0363153526970954e-05, "loss": 0.9325, "step": 29046 }, { "epoch": 24.10539419087137, "grad_norm": 19.750444412231445, "learning_rate": 1.0362821576763486e-05, "loss": 0.2896, "step": 29047 }, { "epoch": 24.10622406639004, "grad_norm": 22.91856575012207, "learning_rate": 1.0362489626556018e-05, "loss": 0.3322, "step": 29048 }, { "epoch": 24.107053941908713, "grad_norm": 9.383075714111328, "learning_rate": 1.0362157676348547e-05, "loss": 0.1921, "step": 29049 }, { "epoch": 24.107883817427386, "grad_norm": 26.79589080810547, "learning_rate": 1.036182572614108e-05, "loss": 0.3165, "step": 29050 }, { "epoch": 24.108713692946058, "grad_norm": 18.809059143066406, "learning_rate": 1.0361493775933611e-05, "loss": 0.2518, "step": 29051 }, { "epoch": 24.10954356846473, "grad_norm": 14.740804672241211, "learning_rate": 1.0361161825726142e-05, "loss": 0.2732, "step": 29052 }, { "epoch": 24.110373443983402, "grad_norm": 19.485370635986328, "learning_rate": 1.0360829875518672e-05, "loss": 0.3597, "step": 29053 }, { "epoch": 24.111203319502074, "grad_norm": 37.23759841918945, "learning_rate": 1.0360497925311204e-05, "loss": 0.362, "step": 29054 }, { "epoch": 24.112033195020746, "grad_norm": 23.55154037475586, "learning_rate": 1.0360165975103735e-05, "loss": 0.3566, "step": 29055 }, { "epoch": 24.11286307053942, "grad_norm": 68.66582489013672, "learning_rate": 1.0359834024896267e-05, "loss": 0.3885, "step": 29056 }, { "epoch": 24.11369294605809, "grad_norm": 41.35734939575195, "learning_rate": 1.0359502074688797e-05, "loss": 0.6434, "step": 29057 }, { "epoch": 24.114522821576763, "grad_norm": 153.0497589111328, "learning_rate": 1.0359170124481328e-05, "loss": 1.0374, "step": 29058 }, { "epoch": 24.115352697095435, "grad_norm": 26.003625869750977, "learning_rate": 1.035883817427386e-05, "loss": 0.4607, "step": 29059 }, { "epoch": 24.116182572614107, "grad_norm": 24.82961654663086, "learning_rate": 1.0358506224066392e-05, "loss": 0.3309, "step": 29060 }, { "epoch": 24.11701244813278, "grad_norm": 26.032821655273438, "learning_rate": 1.0358174273858922e-05, "loss": 0.4461, "step": 29061 }, { "epoch": 24.117842323651452, "grad_norm": 46.95365905761719, "learning_rate": 1.0357842323651453e-05, "loss": 0.9841, "step": 29062 }, { "epoch": 24.118672199170124, "grad_norm": 34.61841583251953, "learning_rate": 1.0357510373443985e-05, "loss": 0.5218, "step": 29063 }, { "epoch": 24.119502074688796, "grad_norm": 32.6524772644043, "learning_rate": 1.0357178423236515e-05, "loss": 0.6382, "step": 29064 }, { "epoch": 24.12033195020747, "grad_norm": 34.23725891113281, "learning_rate": 1.0356846473029047e-05, "loss": 0.4245, "step": 29065 }, { "epoch": 24.12116182572614, "grad_norm": 29.59595489501953, "learning_rate": 1.0356514522821576e-05, "loss": 0.3082, "step": 29066 }, { "epoch": 24.121991701244813, "grad_norm": 40.69774627685547, "learning_rate": 1.0356182572614108e-05, "loss": 0.8245, "step": 29067 }, { "epoch": 24.122821576763485, "grad_norm": 39.85627746582031, "learning_rate": 1.035585062240664e-05, "loss": 0.4302, "step": 29068 }, { "epoch": 24.123651452282157, "grad_norm": 71.94113159179688, "learning_rate": 1.0355518672199172e-05, "loss": 0.5177, "step": 29069 }, { "epoch": 24.12448132780083, "grad_norm": 60.216575622558594, "learning_rate": 1.0355186721991701e-05, "loss": 0.7252, "step": 29070 }, { "epoch": 24.1253112033195, "grad_norm": 45.16213607788086, "learning_rate": 1.0354854771784233e-05, "loss": 0.4159, "step": 29071 }, { "epoch": 24.126141078838174, "grad_norm": 28.063451766967773, "learning_rate": 1.0354522821576765e-05, "loss": 0.4261, "step": 29072 }, { "epoch": 24.126970954356846, "grad_norm": 33.699005126953125, "learning_rate": 1.0354190871369296e-05, "loss": 0.5262, "step": 29073 }, { "epoch": 24.127800829875518, "grad_norm": 17.29785919189453, "learning_rate": 1.0353858921161826e-05, "loss": 0.3328, "step": 29074 }, { "epoch": 24.12863070539419, "grad_norm": 73.89204406738281, "learning_rate": 1.0353526970954357e-05, "loss": 0.6149, "step": 29075 }, { "epoch": 24.129460580912863, "grad_norm": 25.614238739013672, "learning_rate": 1.0353195020746889e-05, "loss": 0.5481, "step": 29076 }, { "epoch": 24.130290456431535, "grad_norm": 36.58578109741211, "learning_rate": 1.035286307053942e-05, "loss": 0.7486, "step": 29077 }, { "epoch": 24.131120331950207, "grad_norm": 50.65105056762695, "learning_rate": 1.035253112033195e-05, "loss": 0.4024, "step": 29078 }, { "epoch": 24.13195020746888, "grad_norm": 41.40503692626953, "learning_rate": 1.0352199170124482e-05, "loss": 0.3065, "step": 29079 }, { "epoch": 24.13278008298755, "grad_norm": 33.65528106689453, "learning_rate": 1.0351867219917014e-05, "loss": 0.3072, "step": 29080 }, { "epoch": 24.133609958506224, "grad_norm": 52.37525177001953, "learning_rate": 1.0351535269709546e-05, "loss": 0.6792, "step": 29081 }, { "epoch": 24.134439834024896, "grad_norm": 89.00145721435547, "learning_rate": 1.0351203319502075e-05, "loss": 0.3759, "step": 29082 }, { "epoch": 24.135269709543568, "grad_norm": 151.86444091796875, "learning_rate": 1.0350871369294607e-05, "loss": 0.3274, "step": 29083 }, { "epoch": 24.13609958506224, "grad_norm": 42.0855827331543, "learning_rate": 1.0350539419087137e-05, "loss": 0.671, "step": 29084 }, { "epoch": 24.136929460580912, "grad_norm": 8.623092651367188, "learning_rate": 1.0350207468879669e-05, "loss": 0.3022, "step": 29085 }, { "epoch": 24.137759336099585, "grad_norm": 53.8520622253418, "learning_rate": 1.0349875518672201e-05, "loss": 0.6613, "step": 29086 }, { "epoch": 24.138589211618257, "grad_norm": 28.36780548095703, "learning_rate": 1.034954356846473e-05, "loss": 0.2828, "step": 29087 }, { "epoch": 24.13941908713693, "grad_norm": 56.09452438354492, "learning_rate": 1.0349211618257262e-05, "loss": 0.6631, "step": 29088 }, { "epoch": 24.1402489626556, "grad_norm": 22.535932540893555, "learning_rate": 1.0348879668049794e-05, "loss": 0.287, "step": 29089 }, { "epoch": 24.141078838174273, "grad_norm": 24.842220306396484, "learning_rate": 1.0348547717842326e-05, "loss": 0.4195, "step": 29090 }, { "epoch": 24.141908713692946, "grad_norm": 76.20726776123047, "learning_rate": 1.0348215767634855e-05, "loss": 0.7441, "step": 29091 }, { "epoch": 24.142738589211618, "grad_norm": 28.93206214904785, "learning_rate": 1.0347883817427387e-05, "loss": 0.237, "step": 29092 }, { "epoch": 24.14356846473029, "grad_norm": 46.43232345581055, "learning_rate": 1.0347551867219918e-05, "loss": 0.4664, "step": 29093 }, { "epoch": 24.144398340248962, "grad_norm": 42.38467025756836, "learning_rate": 1.034721991701245e-05, "loss": 0.3014, "step": 29094 }, { "epoch": 24.145228215767634, "grad_norm": 39.52705001831055, "learning_rate": 1.034688796680498e-05, "loss": 0.5764, "step": 29095 }, { "epoch": 24.146058091286307, "grad_norm": 30.995914459228516, "learning_rate": 1.034655601659751e-05, "loss": 0.3446, "step": 29096 }, { "epoch": 24.14688796680498, "grad_norm": 43.42519760131836, "learning_rate": 1.0346224066390043e-05, "loss": 0.3976, "step": 29097 }, { "epoch": 24.14771784232365, "grad_norm": 55.250244140625, "learning_rate": 1.0345892116182575e-05, "loss": 0.5985, "step": 29098 }, { "epoch": 24.148547717842323, "grad_norm": 43.083316802978516, "learning_rate": 1.0345560165975103e-05, "loss": 0.3682, "step": 29099 }, { "epoch": 24.149377593360995, "grad_norm": 23.718605041503906, "learning_rate": 1.0345228215767636e-05, "loss": 0.3353, "step": 29100 }, { "epoch": 24.150207468879668, "grad_norm": 15.817431449890137, "learning_rate": 1.0344896265560168e-05, "loss": 0.4781, "step": 29101 }, { "epoch": 24.15103734439834, "grad_norm": 33.783836364746094, "learning_rate": 1.0344564315352698e-05, "loss": 0.4819, "step": 29102 }, { "epoch": 24.151867219917012, "grad_norm": 21.590476989746094, "learning_rate": 1.0344232365145228e-05, "loss": 0.2298, "step": 29103 }, { "epoch": 24.152697095435684, "grad_norm": 96.33084869384766, "learning_rate": 1.034390041493776e-05, "loss": 0.9418, "step": 29104 }, { "epoch": 24.153526970954356, "grad_norm": 37.74424362182617, "learning_rate": 1.0343568464730291e-05, "loss": 0.4494, "step": 29105 }, { "epoch": 24.15435684647303, "grad_norm": 45.35783767700195, "learning_rate": 1.0343236514522823e-05, "loss": 0.34, "step": 29106 }, { "epoch": 24.1551867219917, "grad_norm": 29.279085159301758, "learning_rate": 1.0342904564315352e-05, "loss": 0.4178, "step": 29107 }, { "epoch": 24.156016597510373, "grad_norm": 14.632285118103027, "learning_rate": 1.0342572614107884e-05, "loss": 0.2718, "step": 29108 }, { "epoch": 24.156846473029045, "grad_norm": 142.14364624023438, "learning_rate": 1.0342240663900416e-05, "loss": 0.3052, "step": 29109 }, { "epoch": 24.157676348547717, "grad_norm": 31.61603546142578, "learning_rate": 1.0341908713692948e-05, "loss": 0.3694, "step": 29110 }, { "epoch": 24.15850622406639, "grad_norm": 13.220647811889648, "learning_rate": 1.0341576763485479e-05, "loss": 0.2436, "step": 29111 }, { "epoch": 24.15933609958506, "grad_norm": 90.77957153320312, "learning_rate": 1.0341244813278009e-05, "loss": 0.427, "step": 29112 }, { "epoch": 24.160165975103734, "grad_norm": 105.21551513671875, "learning_rate": 1.0340912863070541e-05, "loss": 0.5194, "step": 29113 }, { "epoch": 24.160995850622406, "grad_norm": 21.36522674560547, "learning_rate": 1.0340580912863071e-05, "loss": 0.3488, "step": 29114 }, { "epoch": 24.16182572614108, "grad_norm": 16.645750045776367, "learning_rate": 1.0340248962655604e-05, "loss": 0.3698, "step": 29115 }, { "epoch": 24.16265560165975, "grad_norm": 40.56373977661133, "learning_rate": 1.0339917012448132e-05, "loss": 0.4439, "step": 29116 }, { "epoch": 24.163485477178423, "grad_norm": 16.114683151245117, "learning_rate": 1.0339585062240664e-05, "loss": 0.2978, "step": 29117 }, { "epoch": 24.164315352697095, "grad_norm": 57.948829650878906, "learning_rate": 1.0339253112033196e-05, "loss": 0.5608, "step": 29118 }, { "epoch": 24.165145228215767, "grad_norm": 38.37825393676758, "learning_rate": 1.0338921161825729e-05, "loss": 0.7198, "step": 29119 }, { "epoch": 24.16597510373444, "grad_norm": 13.467967987060547, "learning_rate": 1.0338589211618257e-05, "loss": 0.3335, "step": 29120 }, { "epoch": 24.16680497925311, "grad_norm": 17.311201095581055, "learning_rate": 1.033825726141079e-05, "loss": 0.2685, "step": 29121 }, { "epoch": 24.167634854771784, "grad_norm": 53.60755920410156, "learning_rate": 1.033792531120332e-05, "loss": 0.3757, "step": 29122 }, { "epoch": 24.168464730290456, "grad_norm": 39.25142288208008, "learning_rate": 1.0337593360995852e-05, "loss": 0.3577, "step": 29123 }, { "epoch": 24.169294605809128, "grad_norm": 31.70055389404297, "learning_rate": 1.0337261410788382e-05, "loss": 0.2829, "step": 29124 }, { "epoch": 24.1701244813278, "grad_norm": 51.85874557495117, "learning_rate": 1.0336929460580913e-05, "loss": 0.5768, "step": 29125 }, { "epoch": 24.170954356846472, "grad_norm": 13.348200798034668, "learning_rate": 1.0336597510373445e-05, "loss": 0.2988, "step": 29126 }, { "epoch": 24.171784232365145, "grad_norm": 59.58420944213867, "learning_rate": 1.0336265560165977e-05, "loss": 0.566, "step": 29127 }, { "epoch": 24.172614107883817, "grad_norm": 55.70257568359375, "learning_rate": 1.0335933609958506e-05, "loss": 0.9624, "step": 29128 }, { "epoch": 24.17344398340249, "grad_norm": 26.45342254638672, "learning_rate": 1.0335601659751038e-05, "loss": 0.4209, "step": 29129 }, { "epoch": 24.17427385892116, "grad_norm": 46.63818359375, "learning_rate": 1.033526970954357e-05, "loss": 0.3123, "step": 29130 }, { "epoch": 24.175103734439833, "grad_norm": 53.92099380493164, "learning_rate": 1.03349377593361e-05, "loss": 0.3598, "step": 29131 }, { "epoch": 24.175933609958506, "grad_norm": 54.81077575683594, "learning_rate": 1.033460580912863e-05, "loss": 0.7719, "step": 29132 }, { "epoch": 24.176763485477178, "grad_norm": 46.891334533691406, "learning_rate": 1.0334273858921163e-05, "loss": 0.7501, "step": 29133 }, { "epoch": 24.17759336099585, "grad_norm": 23.22966957092285, "learning_rate": 1.0333941908713693e-05, "loss": 0.4833, "step": 29134 }, { "epoch": 24.178423236514522, "grad_norm": 6.995962142944336, "learning_rate": 1.0333609958506225e-05, "loss": 0.2139, "step": 29135 }, { "epoch": 24.179253112033194, "grad_norm": 33.87348937988281, "learning_rate": 1.0333278008298754e-05, "loss": 0.3432, "step": 29136 }, { "epoch": 24.180082987551867, "grad_norm": 57.125404357910156, "learning_rate": 1.0332946058091286e-05, "loss": 0.396, "step": 29137 }, { "epoch": 24.18091286307054, "grad_norm": 98.0413818359375, "learning_rate": 1.0332614107883818e-05, "loss": 0.466, "step": 29138 }, { "epoch": 24.18174273858921, "grad_norm": 91.97891998291016, "learning_rate": 1.033228215767635e-05, "loss": 0.933, "step": 29139 }, { "epoch": 24.182572614107883, "grad_norm": 33.49000549316406, "learning_rate": 1.033195020746888e-05, "loss": 0.5412, "step": 29140 }, { "epoch": 24.183402489626555, "grad_norm": 108.93319702148438, "learning_rate": 1.0331618257261411e-05, "loss": 0.675, "step": 29141 }, { "epoch": 24.184232365145228, "grad_norm": 32.47894287109375, "learning_rate": 1.0331286307053943e-05, "loss": 0.4137, "step": 29142 }, { "epoch": 24.1850622406639, "grad_norm": 59.41010665893555, "learning_rate": 1.0330954356846474e-05, "loss": 0.7862, "step": 29143 }, { "epoch": 24.185892116182572, "grad_norm": 85.69358825683594, "learning_rate": 1.0330622406639006e-05, "loss": 0.9265, "step": 29144 }, { "epoch": 24.186721991701244, "grad_norm": 18.676294326782227, "learning_rate": 1.0330290456431535e-05, "loss": 0.3581, "step": 29145 }, { "epoch": 24.187551867219916, "grad_norm": 47.66694641113281, "learning_rate": 1.0329958506224067e-05, "loss": 0.6594, "step": 29146 }, { "epoch": 24.18838174273859, "grad_norm": 65.96978759765625, "learning_rate": 1.0329626556016599e-05, "loss": 0.5439, "step": 29147 }, { "epoch": 24.18921161825726, "grad_norm": 44.343997955322266, "learning_rate": 1.0329294605809131e-05, "loss": 0.8392, "step": 29148 }, { "epoch": 24.190041493775933, "grad_norm": 60.63993453979492, "learning_rate": 1.032896265560166e-05, "loss": 0.5627, "step": 29149 }, { "epoch": 24.190871369294605, "grad_norm": 30.815692901611328, "learning_rate": 1.0328630705394192e-05, "loss": 0.3122, "step": 29150 }, { "epoch": 24.191701244813277, "grad_norm": 20.518413543701172, "learning_rate": 1.0328298755186724e-05, "loss": 0.3094, "step": 29151 }, { "epoch": 24.19253112033195, "grad_norm": 40.57992935180664, "learning_rate": 1.0327966804979254e-05, "loss": 0.4179, "step": 29152 }, { "epoch": 24.19336099585062, "grad_norm": 40.49757385253906, "learning_rate": 1.0327634854771785e-05, "loss": 0.7417, "step": 29153 }, { "epoch": 24.194190871369294, "grad_norm": 17.962562561035156, "learning_rate": 1.0327302904564315e-05, "loss": 0.362, "step": 29154 }, { "epoch": 24.195020746887966, "grad_norm": 20.794645309448242, "learning_rate": 1.0326970954356847e-05, "loss": 0.3237, "step": 29155 }, { "epoch": 24.19585062240664, "grad_norm": 26.976104736328125, "learning_rate": 1.032663900414938e-05, "loss": 0.3854, "step": 29156 }, { "epoch": 24.19668049792531, "grad_norm": 32.10861587524414, "learning_rate": 1.0326307053941908e-05, "loss": 0.3535, "step": 29157 }, { "epoch": 24.197510373443983, "grad_norm": 15.362870216369629, "learning_rate": 1.032597510373444e-05, "loss": 0.232, "step": 29158 }, { "epoch": 24.198340248962655, "grad_norm": 53.07827377319336, "learning_rate": 1.0325643153526972e-05, "loss": 0.3171, "step": 29159 }, { "epoch": 24.199170124481327, "grad_norm": 15.399740219116211, "learning_rate": 1.0325311203319504e-05, "loss": 0.2727, "step": 29160 }, { "epoch": 24.2, "grad_norm": 27.17668342590332, "learning_rate": 1.0324979253112033e-05, "loss": 0.384, "step": 29161 }, { "epoch": 24.20082987551867, "grad_norm": 25.904996871948242, "learning_rate": 1.0324647302904565e-05, "loss": 0.2769, "step": 29162 }, { "epoch": 24.201659751037344, "grad_norm": 31.95066261291504, "learning_rate": 1.0324315352697096e-05, "loss": 0.322, "step": 29163 }, { "epoch": 24.202489626556016, "grad_norm": 59.090858459472656, "learning_rate": 1.0323983402489628e-05, "loss": 0.4399, "step": 29164 }, { "epoch": 24.203319502074688, "grad_norm": 16.58562469482422, "learning_rate": 1.032365145228216e-05, "loss": 0.3287, "step": 29165 }, { "epoch": 24.20414937759336, "grad_norm": 34.854366302490234, "learning_rate": 1.0323319502074689e-05, "loss": 0.506, "step": 29166 }, { "epoch": 24.204979253112032, "grad_norm": 47.00818634033203, "learning_rate": 1.032298755186722e-05, "loss": 0.4084, "step": 29167 }, { "epoch": 24.205809128630705, "grad_norm": 147.11282348632812, "learning_rate": 1.0322655601659753e-05, "loss": 0.7887, "step": 29168 }, { "epoch": 24.206639004149377, "grad_norm": 64.50194549560547, "learning_rate": 1.0322323651452283e-05, "loss": 0.6062, "step": 29169 }, { "epoch": 24.20746887966805, "grad_norm": 55.5433464050293, "learning_rate": 1.0321991701244814e-05, "loss": 0.7928, "step": 29170 }, { "epoch": 24.20829875518672, "grad_norm": 11.564773559570312, "learning_rate": 1.0321659751037346e-05, "loss": 0.2354, "step": 29171 }, { "epoch": 24.209128630705393, "grad_norm": 78.79219818115234, "learning_rate": 1.0321327800829876e-05, "loss": 0.2811, "step": 29172 }, { "epoch": 24.209958506224066, "grad_norm": 44.014564514160156, "learning_rate": 1.0320995850622408e-05, "loss": 0.4431, "step": 29173 }, { "epoch": 24.210788381742738, "grad_norm": 22.415504455566406, "learning_rate": 1.0320663900414939e-05, "loss": 0.2262, "step": 29174 }, { "epoch": 24.21161825726141, "grad_norm": 29.296096801757812, "learning_rate": 1.0320331950207469e-05, "loss": 0.499, "step": 29175 }, { "epoch": 24.212448132780082, "grad_norm": 45.9676628112793, "learning_rate": 1.0320000000000001e-05, "loss": 0.8126, "step": 29176 }, { "epoch": 24.213278008298754, "grad_norm": 48.91256332397461, "learning_rate": 1.0319668049792533e-05, "loss": 0.6728, "step": 29177 }, { "epoch": 24.214107883817427, "grad_norm": 30.04269790649414, "learning_rate": 1.0319336099585062e-05, "loss": 0.4778, "step": 29178 }, { "epoch": 24.2149377593361, "grad_norm": 108.71492767333984, "learning_rate": 1.0319004149377594e-05, "loss": 0.3053, "step": 29179 }, { "epoch": 24.21576763485477, "grad_norm": 55.54078674316406, "learning_rate": 1.0318672199170126e-05, "loss": 0.4195, "step": 29180 }, { "epoch": 24.216597510373443, "grad_norm": 42.5993537902832, "learning_rate": 1.0318340248962657e-05, "loss": 0.6394, "step": 29181 }, { "epoch": 24.217427385892115, "grad_norm": 81.27457427978516, "learning_rate": 1.0318008298755187e-05, "loss": 0.4884, "step": 29182 }, { "epoch": 24.218257261410788, "grad_norm": 27.202238082885742, "learning_rate": 1.0317676348547717e-05, "loss": 0.3568, "step": 29183 }, { "epoch": 24.21908713692946, "grad_norm": 23.676742553710938, "learning_rate": 1.031734439834025e-05, "loss": 0.3611, "step": 29184 }, { "epoch": 24.219917012448132, "grad_norm": 145.3368682861328, "learning_rate": 1.0317012448132782e-05, "loss": 0.7245, "step": 29185 }, { "epoch": 24.220746887966804, "grad_norm": 157.32701110839844, "learning_rate": 1.031668049792531e-05, "loss": 0.6174, "step": 29186 }, { "epoch": 24.221576763485476, "grad_norm": 25.982839584350586, "learning_rate": 1.0316348547717842e-05, "loss": 0.3654, "step": 29187 }, { "epoch": 24.22240663900415, "grad_norm": 34.49528884887695, "learning_rate": 1.0316016597510375e-05, "loss": 0.4652, "step": 29188 }, { "epoch": 24.22323651452282, "grad_norm": 43.098846435546875, "learning_rate": 1.0315684647302907e-05, "loss": 0.3905, "step": 29189 }, { "epoch": 24.224066390041493, "grad_norm": 80.24557495117188, "learning_rate": 1.0315352697095437e-05, "loss": 0.4296, "step": 29190 }, { "epoch": 24.224896265560165, "grad_norm": 73.81038665771484, "learning_rate": 1.0315020746887968e-05, "loss": 1.0707, "step": 29191 }, { "epoch": 24.225726141078837, "grad_norm": 39.47231674194336, "learning_rate": 1.0314688796680498e-05, "loss": 0.3298, "step": 29192 }, { "epoch": 24.22655601659751, "grad_norm": 34.78350830078125, "learning_rate": 1.031435684647303e-05, "loss": 0.3652, "step": 29193 }, { "epoch": 24.22738589211618, "grad_norm": 55.75163269042969, "learning_rate": 1.0314024896265562e-05, "loss": 0.4956, "step": 29194 }, { "epoch": 24.228215767634854, "grad_norm": 91.46090698242188, "learning_rate": 1.0313692946058091e-05, "loss": 0.3703, "step": 29195 }, { "epoch": 24.229045643153526, "grad_norm": 66.95240783691406, "learning_rate": 1.0313360995850623e-05, "loss": 0.7534, "step": 29196 }, { "epoch": 24.2298755186722, "grad_norm": 22.263208389282227, "learning_rate": 1.0313029045643155e-05, "loss": 0.2777, "step": 29197 }, { "epoch": 24.23070539419087, "grad_norm": 43.628082275390625, "learning_rate": 1.0312697095435687e-05, "loss": 0.6876, "step": 29198 }, { "epoch": 24.231535269709543, "grad_norm": 52.216102600097656, "learning_rate": 1.0312365145228216e-05, "loss": 1.1663, "step": 29199 }, { "epoch": 24.232365145228215, "grad_norm": 45.11167526245117, "learning_rate": 1.0312033195020748e-05, "loss": 0.8341, "step": 29200 }, { "epoch": 24.233195020746887, "grad_norm": 22.97369384765625, "learning_rate": 1.0311701244813278e-05, "loss": 0.4064, "step": 29201 }, { "epoch": 24.23402489626556, "grad_norm": 32.422882080078125, "learning_rate": 1.031136929460581e-05, "loss": 0.3344, "step": 29202 }, { "epoch": 24.23485477178423, "grad_norm": 61.068145751953125, "learning_rate": 1.0311037344398341e-05, "loss": 0.4877, "step": 29203 }, { "epoch": 24.235684647302904, "grad_norm": 25.446683883666992, "learning_rate": 1.0310705394190871e-05, "loss": 0.2869, "step": 29204 }, { "epoch": 24.236514522821576, "grad_norm": 56.21031951904297, "learning_rate": 1.0310373443983403e-05, "loss": 0.5817, "step": 29205 }, { "epoch": 24.237344398340248, "grad_norm": 32.07741165161133, "learning_rate": 1.0310041493775936e-05, "loss": 0.4383, "step": 29206 }, { "epoch": 24.23817427385892, "grad_norm": 85.45014190673828, "learning_rate": 1.0309709543568464e-05, "loss": 1.3168, "step": 29207 }, { "epoch": 24.239004149377593, "grad_norm": 14.675970077514648, "learning_rate": 1.0309377593360996e-05, "loss": 0.1995, "step": 29208 }, { "epoch": 24.239834024896265, "grad_norm": 25.72536849975586, "learning_rate": 1.0309045643153529e-05, "loss": 0.3064, "step": 29209 }, { "epoch": 24.240663900414937, "grad_norm": 36.18853759765625, "learning_rate": 1.0308713692946059e-05, "loss": 0.3667, "step": 29210 }, { "epoch": 24.24149377593361, "grad_norm": 95.2410659790039, "learning_rate": 1.030838174273859e-05, "loss": 0.8747, "step": 29211 }, { "epoch": 24.24232365145228, "grad_norm": 39.87651824951172, "learning_rate": 1.0308049792531121e-05, "loss": 0.6085, "step": 29212 }, { "epoch": 24.243153526970953, "grad_norm": 16.682985305786133, "learning_rate": 1.0307717842323652e-05, "loss": 0.2557, "step": 29213 }, { "epoch": 24.243983402489626, "grad_norm": 73.14250946044922, "learning_rate": 1.0307385892116184e-05, "loss": 0.6285, "step": 29214 }, { "epoch": 24.244813278008298, "grad_norm": 70.92437744140625, "learning_rate": 1.0307053941908713e-05, "loss": 0.6916, "step": 29215 }, { "epoch": 24.24564315352697, "grad_norm": 20.900104522705078, "learning_rate": 1.0306721991701245e-05, "loss": 0.294, "step": 29216 }, { "epoch": 24.246473029045642, "grad_norm": 15.767337799072266, "learning_rate": 1.0306390041493777e-05, "loss": 0.3338, "step": 29217 }, { "epoch": 24.247302904564314, "grad_norm": 15.926127433776855, "learning_rate": 1.0306058091286309e-05, "loss": 0.3397, "step": 29218 }, { "epoch": 24.248132780082987, "grad_norm": 22.22250747680664, "learning_rate": 1.030572614107884e-05, "loss": 0.4031, "step": 29219 }, { "epoch": 24.24896265560166, "grad_norm": 36.858097076416016, "learning_rate": 1.030539419087137e-05, "loss": 0.5267, "step": 29220 }, { "epoch": 24.24979253112033, "grad_norm": 97.88043975830078, "learning_rate": 1.0305062240663902e-05, "loss": 0.7144, "step": 29221 }, { "epoch": 24.250622406639003, "grad_norm": 24.526676177978516, "learning_rate": 1.0304730290456432e-05, "loss": 0.365, "step": 29222 }, { "epoch": 24.251452282157675, "grad_norm": 69.64215850830078, "learning_rate": 1.0304398340248964e-05, "loss": 0.3932, "step": 29223 }, { "epoch": 24.252282157676348, "grad_norm": 21.35567855834961, "learning_rate": 1.0304066390041493e-05, "loss": 0.2891, "step": 29224 }, { "epoch": 24.25311203319502, "grad_norm": 21.273149490356445, "learning_rate": 1.0303734439834025e-05, "loss": 0.3828, "step": 29225 }, { "epoch": 24.253941908713692, "grad_norm": 26.12552261352539, "learning_rate": 1.0303402489626557e-05, "loss": 0.4641, "step": 29226 }, { "epoch": 24.254771784232364, "grad_norm": 50.220733642578125, "learning_rate": 1.030307053941909e-05, "loss": 0.6331, "step": 29227 }, { "epoch": 24.255601659751036, "grad_norm": 36.031646728515625, "learning_rate": 1.0302738589211618e-05, "loss": 0.7717, "step": 29228 }, { "epoch": 24.25643153526971, "grad_norm": 40.31781005859375, "learning_rate": 1.030240663900415e-05, "loss": 0.2937, "step": 29229 }, { "epoch": 24.25726141078838, "grad_norm": 25.165971755981445, "learning_rate": 1.0302074688796682e-05, "loss": 0.375, "step": 29230 }, { "epoch": 24.258091286307053, "grad_norm": 70.68949127197266, "learning_rate": 1.0301742738589213e-05, "loss": 0.5699, "step": 29231 }, { "epoch": 24.258921161825725, "grad_norm": 36.79307174682617, "learning_rate": 1.0301410788381743e-05, "loss": 0.5197, "step": 29232 }, { "epoch": 24.259751037344397, "grad_norm": 31.46193504333496, "learning_rate": 1.0301078838174274e-05, "loss": 0.464, "step": 29233 }, { "epoch": 24.26058091286307, "grad_norm": 48.453731536865234, "learning_rate": 1.0300746887966806e-05, "loss": 0.3916, "step": 29234 }, { "epoch": 24.261410788381742, "grad_norm": 42.98725509643555, "learning_rate": 1.0300414937759338e-05, "loss": 0.5622, "step": 29235 }, { "epoch": 24.262240663900414, "grad_norm": 26.416013717651367, "learning_rate": 1.0300082987551867e-05, "loss": 0.3703, "step": 29236 }, { "epoch": 24.263070539419086, "grad_norm": 35.74443054199219, "learning_rate": 1.0299751037344399e-05, "loss": 0.3619, "step": 29237 }, { "epoch": 24.26390041493776, "grad_norm": 45.46574020385742, "learning_rate": 1.029941908713693e-05, "loss": 0.4669, "step": 29238 }, { "epoch": 24.26473029045643, "grad_norm": 39.01839828491211, "learning_rate": 1.0299087136929461e-05, "loss": 0.5156, "step": 29239 }, { "epoch": 24.265560165975103, "grad_norm": 70.66098022460938, "learning_rate": 1.0298755186721992e-05, "loss": 0.6673, "step": 29240 }, { "epoch": 24.266390041493775, "grad_norm": 46.63118362426758, "learning_rate": 1.0298423236514524e-05, "loss": 0.5262, "step": 29241 }, { "epoch": 24.267219917012447, "grad_norm": 72.03112030029297, "learning_rate": 1.0298091286307054e-05, "loss": 0.3608, "step": 29242 }, { "epoch": 24.26804979253112, "grad_norm": 21.579608917236328, "learning_rate": 1.0297759336099586e-05, "loss": 0.4641, "step": 29243 }, { "epoch": 24.26887966804979, "grad_norm": 20.090595245361328, "learning_rate": 1.0297427385892118e-05, "loss": 0.6041, "step": 29244 }, { "epoch": 24.269709543568464, "grad_norm": 48.5574836730957, "learning_rate": 1.0297095435684647e-05, "loss": 0.5387, "step": 29245 }, { "epoch": 24.270539419087136, "grad_norm": 10.863561630249023, "learning_rate": 1.029676348547718e-05, "loss": 0.3469, "step": 29246 }, { "epoch": 24.271369294605808, "grad_norm": 21.13663673400879, "learning_rate": 1.0296431535269711e-05, "loss": 0.4666, "step": 29247 }, { "epoch": 24.27219917012448, "grad_norm": 41.080318450927734, "learning_rate": 1.0296099585062242e-05, "loss": 0.4101, "step": 29248 }, { "epoch": 24.273029045643153, "grad_norm": 14.473878860473633, "learning_rate": 1.0295767634854772e-05, "loss": 0.3024, "step": 29249 }, { "epoch": 24.273858921161825, "grad_norm": 33.996856689453125, "learning_rate": 1.0295435684647304e-05, "loss": 0.3925, "step": 29250 }, { "epoch": 24.274688796680497, "grad_norm": 21.344148635864258, "learning_rate": 1.0295103734439835e-05, "loss": 0.3436, "step": 29251 }, { "epoch": 24.27551867219917, "grad_norm": 31.540388107299805, "learning_rate": 1.0294771784232367e-05, "loss": 0.3592, "step": 29252 }, { "epoch": 24.27634854771784, "grad_norm": 92.93193054199219, "learning_rate": 1.0294439834024896e-05, "loss": 0.8155, "step": 29253 }, { "epoch": 24.277178423236514, "grad_norm": 19.029531478881836, "learning_rate": 1.0294107883817428e-05, "loss": 0.3062, "step": 29254 }, { "epoch": 24.278008298755186, "grad_norm": 71.2977066040039, "learning_rate": 1.029377593360996e-05, "loss": 0.536, "step": 29255 }, { "epoch": 24.278838174273858, "grad_norm": 53.80153274536133, "learning_rate": 1.0293443983402492e-05, "loss": 0.4554, "step": 29256 }, { "epoch": 24.27966804979253, "grad_norm": 12.77558422088623, "learning_rate": 1.029311203319502e-05, "loss": 0.2355, "step": 29257 }, { "epoch": 24.280497925311202, "grad_norm": 39.09322738647461, "learning_rate": 1.0292780082987553e-05, "loss": 0.2974, "step": 29258 }, { "epoch": 24.281327800829875, "grad_norm": 11.353007316589355, "learning_rate": 1.0292448132780085e-05, "loss": 0.2372, "step": 29259 }, { "epoch": 24.282157676348547, "grad_norm": 26.487506866455078, "learning_rate": 1.0292116182572615e-05, "loss": 0.2688, "step": 29260 }, { "epoch": 24.28298755186722, "grad_norm": 33.156036376953125, "learning_rate": 1.0291784232365146e-05, "loss": 0.7999, "step": 29261 }, { "epoch": 24.28381742738589, "grad_norm": 33.81487274169922, "learning_rate": 1.0291452282157676e-05, "loss": 0.4583, "step": 29262 }, { "epoch": 24.284647302904563, "grad_norm": 33.283138275146484, "learning_rate": 1.0291120331950208e-05, "loss": 0.4701, "step": 29263 }, { "epoch": 24.285477178423236, "grad_norm": 21.651762008666992, "learning_rate": 1.029078838174274e-05, "loss": 0.2461, "step": 29264 }, { "epoch": 24.286307053941908, "grad_norm": 15.738907814025879, "learning_rate": 1.0290456431535269e-05, "loss": 0.2236, "step": 29265 }, { "epoch": 24.28713692946058, "grad_norm": 21.468320846557617, "learning_rate": 1.0290124481327801e-05, "loss": 0.3588, "step": 29266 }, { "epoch": 24.287966804979252, "grad_norm": 106.1802978515625, "learning_rate": 1.0289792531120333e-05, "loss": 0.9045, "step": 29267 }, { "epoch": 24.288796680497924, "grad_norm": 209.75753784179688, "learning_rate": 1.0289460580912865e-05, "loss": 0.5197, "step": 29268 }, { "epoch": 24.289626556016596, "grad_norm": 255.61070251464844, "learning_rate": 1.0289128630705396e-05, "loss": 0.5279, "step": 29269 }, { "epoch": 24.29045643153527, "grad_norm": 36.972042083740234, "learning_rate": 1.0288796680497926e-05, "loss": 0.3791, "step": 29270 }, { "epoch": 24.29128630705394, "grad_norm": 32.255680084228516, "learning_rate": 1.0288464730290456e-05, "loss": 0.3404, "step": 29271 }, { "epoch": 24.292116182572613, "grad_norm": 39.58708953857422, "learning_rate": 1.0288132780082989e-05, "loss": 0.3549, "step": 29272 }, { "epoch": 24.292946058091285, "grad_norm": 98.59032440185547, "learning_rate": 1.028780082987552e-05, "loss": 0.5012, "step": 29273 }, { "epoch": 24.293775933609957, "grad_norm": 88.4173812866211, "learning_rate": 1.028746887966805e-05, "loss": 0.8279, "step": 29274 }, { "epoch": 24.29460580912863, "grad_norm": 51.62995910644531, "learning_rate": 1.0287136929460582e-05, "loss": 0.3842, "step": 29275 }, { "epoch": 24.295435684647302, "grad_norm": 73.28113555908203, "learning_rate": 1.0286804979253114e-05, "loss": 0.4523, "step": 29276 }, { "epoch": 24.296265560165974, "grad_norm": 33.59001922607422, "learning_rate": 1.0286473029045646e-05, "loss": 0.4585, "step": 29277 }, { "epoch": 24.297095435684646, "grad_norm": 15.718735694885254, "learning_rate": 1.0286141078838174e-05, "loss": 0.2913, "step": 29278 }, { "epoch": 24.29792531120332, "grad_norm": 34.48872756958008, "learning_rate": 1.0285809128630707e-05, "loss": 0.5644, "step": 29279 }, { "epoch": 24.29875518672199, "grad_norm": 20.527896881103516, "learning_rate": 1.0285477178423237e-05, "loss": 0.2703, "step": 29280 }, { "epoch": 24.299585062240663, "grad_norm": 106.3126220703125, "learning_rate": 1.0285145228215769e-05, "loss": 0.7315, "step": 29281 }, { "epoch": 24.300414937759335, "grad_norm": 34.950157165527344, "learning_rate": 1.02848132780083e-05, "loss": 0.5825, "step": 29282 }, { "epoch": 24.301244813278007, "grad_norm": 44.26004409790039, "learning_rate": 1.028448132780083e-05, "loss": 1.1129, "step": 29283 }, { "epoch": 24.30207468879668, "grad_norm": 29.70172691345215, "learning_rate": 1.0284149377593362e-05, "loss": 0.298, "step": 29284 }, { "epoch": 24.30290456431535, "grad_norm": 41.285316467285156, "learning_rate": 1.0283817427385894e-05, "loss": 0.4147, "step": 29285 }, { "epoch": 24.303734439834024, "grad_norm": 70.86206817626953, "learning_rate": 1.0283485477178423e-05, "loss": 0.3064, "step": 29286 }, { "epoch": 24.304564315352696, "grad_norm": 51.76995849609375, "learning_rate": 1.0283153526970955e-05, "loss": 0.6325, "step": 29287 }, { "epoch": 24.305394190871368, "grad_norm": 25.821460723876953, "learning_rate": 1.0282821576763487e-05, "loss": 0.4007, "step": 29288 }, { "epoch": 24.30622406639004, "grad_norm": 40.00942611694336, "learning_rate": 1.0282489626556017e-05, "loss": 0.896, "step": 29289 }, { "epoch": 24.307053941908713, "grad_norm": 36.154144287109375, "learning_rate": 1.0282157676348548e-05, "loss": 0.2821, "step": 29290 }, { "epoch": 24.307883817427385, "grad_norm": 25.05402183532715, "learning_rate": 1.028182572614108e-05, "loss": 0.3078, "step": 29291 }, { "epoch": 24.308713692946057, "grad_norm": 28.516633987426758, "learning_rate": 1.028149377593361e-05, "loss": 0.3938, "step": 29292 }, { "epoch": 24.30954356846473, "grad_norm": 16.740619659423828, "learning_rate": 1.0281161825726143e-05, "loss": 0.257, "step": 29293 }, { "epoch": 24.3103734439834, "grad_norm": 91.88928985595703, "learning_rate": 1.0280829875518671e-05, "loss": 0.4591, "step": 29294 }, { "epoch": 24.311203319502074, "grad_norm": 20.16200828552246, "learning_rate": 1.0280497925311203e-05, "loss": 0.3963, "step": 29295 }, { "epoch": 24.312033195020746, "grad_norm": 22.073095321655273, "learning_rate": 1.0280165975103735e-05, "loss": 0.2656, "step": 29296 }, { "epoch": 24.312863070539418, "grad_norm": 18.05280876159668, "learning_rate": 1.0279834024896268e-05, "loss": 0.2953, "step": 29297 }, { "epoch": 24.31369294605809, "grad_norm": 67.98213958740234, "learning_rate": 1.0279502074688798e-05, "loss": 0.777, "step": 29298 }, { "epoch": 24.314522821576762, "grad_norm": 33.52250671386719, "learning_rate": 1.0279170124481328e-05, "loss": 0.3839, "step": 29299 }, { "epoch": 24.315352697095435, "grad_norm": 35.126068115234375, "learning_rate": 1.0278838174273859e-05, "loss": 0.5267, "step": 29300 }, { "epoch": 24.316182572614107, "grad_norm": 15.86550521850586, "learning_rate": 1.0278506224066391e-05, "loss": 0.366, "step": 29301 }, { "epoch": 24.31701244813278, "grad_norm": 25.943988800048828, "learning_rate": 1.0278174273858923e-05, "loss": 0.6082, "step": 29302 }, { "epoch": 24.31784232365145, "grad_norm": 78.89200592041016, "learning_rate": 1.0277842323651452e-05, "loss": 0.4809, "step": 29303 }, { "epoch": 24.318672199170123, "grad_norm": 29.66168212890625, "learning_rate": 1.0277510373443984e-05, "loss": 0.4352, "step": 29304 }, { "epoch": 24.319502074688796, "grad_norm": 35.12288284301758, "learning_rate": 1.0277178423236516e-05, "loss": 0.5308, "step": 29305 }, { "epoch": 24.320331950207468, "grad_norm": NaN, "learning_rate": 1.0277178423236516e-05, "loss": 1.0341, "step": 29306 }, { "epoch": 24.32116182572614, "grad_norm": 88.72903442382812, "learning_rate": 1.0276846473029048e-05, "loss": 0.4592, "step": 29307 }, { "epoch": 24.321991701244812, "grad_norm": 41.24958419799805, "learning_rate": 1.0276514522821577e-05, "loss": 0.4136, "step": 29308 }, { "epoch": 24.322821576763484, "grad_norm": 35.58343505859375, "learning_rate": 1.0276182572614109e-05, "loss": 0.5246, "step": 29309 }, { "epoch": 24.323651452282157, "grad_norm": 30.30681610107422, "learning_rate": 1.027585062240664e-05, "loss": 0.4179, "step": 29310 }, { "epoch": 24.32448132780083, "grad_norm": 23.658788681030273, "learning_rate": 1.0275518672199171e-05, "loss": 0.3011, "step": 29311 }, { "epoch": 24.3253112033195, "grad_norm": 24.893098831176758, "learning_rate": 1.0275186721991702e-05, "loss": 0.2347, "step": 29312 }, { "epoch": 24.326141078838173, "grad_norm": 56.10066604614258, "learning_rate": 1.0274854771784232e-05, "loss": 0.6551, "step": 29313 }, { "epoch": 24.326970954356845, "grad_norm": 67.35997009277344, "learning_rate": 1.0274522821576764e-05, "loss": 0.615, "step": 29314 }, { "epoch": 24.327800829875518, "grad_norm": 15.83449649810791, "learning_rate": 1.0274190871369296e-05, "loss": 0.2569, "step": 29315 }, { "epoch": 24.32863070539419, "grad_norm": 17.770780563354492, "learning_rate": 1.0273858921161825e-05, "loss": 0.2857, "step": 29316 }, { "epoch": 24.329460580912862, "grad_norm": 147.33665466308594, "learning_rate": 1.0273526970954357e-05, "loss": 0.868, "step": 29317 }, { "epoch": 24.330290456431534, "grad_norm": 11.076504707336426, "learning_rate": 1.027319502074689e-05, "loss": 0.3479, "step": 29318 }, { "epoch": 24.331120331950206, "grad_norm": 53.86740493774414, "learning_rate": 1.027286307053942e-05, "loss": 0.8089, "step": 29319 }, { "epoch": 24.33195020746888, "grad_norm": 18.54978370666504, "learning_rate": 1.027253112033195e-05, "loss": 0.2531, "step": 29320 }, { "epoch": 24.33278008298755, "grad_norm": 37.25910568237305, "learning_rate": 1.0272199170124482e-05, "loss": 0.6975, "step": 29321 }, { "epoch": 24.333609958506223, "grad_norm": 45.69072341918945, "learning_rate": 1.0271867219917013e-05, "loss": 0.6338, "step": 29322 }, { "epoch": 24.334439834024895, "grad_norm": 41.60890579223633, "learning_rate": 1.0271535269709545e-05, "loss": 0.4328, "step": 29323 }, { "epoch": 24.335269709543567, "grad_norm": 84.25578308105469, "learning_rate": 1.0271203319502077e-05, "loss": 0.8111, "step": 29324 }, { "epoch": 24.33609958506224, "grad_norm": 17.036245346069336, "learning_rate": 1.0270871369294606e-05, "loss": 0.2876, "step": 29325 }, { "epoch": 24.33692946058091, "grad_norm": 69.99956512451172, "learning_rate": 1.0270539419087138e-05, "loss": 0.4868, "step": 29326 }, { "epoch": 24.337759336099584, "grad_norm": 19.444496154785156, "learning_rate": 1.027020746887967e-05, "loss": 0.2249, "step": 29327 }, { "epoch": 24.338589211618256, "grad_norm": 36.83536148071289, "learning_rate": 1.02698755186722e-05, "loss": 0.8704, "step": 29328 }, { "epoch": 24.33941908713693, "grad_norm": 105.85684967041016, "learning_rate": 1.026954356846473e-05, "loss": 0.5455, "step": 29329 }, { "epoch": 24.3402489626556, "grad_norm": 50.13836669921875, "learning_rate": 1.0269211618257263e-05, "loss": 0.4705, "step": 29330 }, { "epoch": 24.341078838174273, "grad_norm": 19.906753540039062, "learning_rate": 1.0268879668049793e-05, "loss": 0.2858, "step": 29331 }, { "epoch": 24.341908713692945, "grad_norm": 69.9188461303711, "learning_rate": 1.0268547717842325e-05, "loss": 0.6868, "step": 29332 }, { "epoch": 24.342738589211617, "grad_norm": 31.941015243530273, "learning_rate": 1.0268215767634854e-05, "loss": 0.4726, "step": 29333 }, { "epoch": 24.34356846473029, "grad_norm": 33.83638000488281, "learning_rate": 1.0267883817427386e-05, "loss": 0.3785, "step": 29334 }, { "epoch": 24.34439834024896, "grad_norm": 43.56340789794922, "learning_rate": 1.0267551867219918e-05, "loss": 0.9811, "step": 29335 }, { "epoch": 24.345228215767634, "grad_norm": 29.083677291870117, "learning_rate": 1.026721991701245e-05, "loss": 0.3753, "step": 29336 }, { "epoch": 24.346058091286306, "grad_norm": 86.13972473144531, "learning_rate": 1.0266887966804979e-05, "loss": 0.9868, "step": 29337 }, { "epoch": 24.346887966804978, "grad_norm": 17.359207153320312, "learning_rate": 1.0266556016597511e-05, "loss": 0.2621, "step": 29338 }, { "epoch": 24.34771784232365, "grad_norm": 31.55787467956543, "learning_rate": 1.0266224066390043e-05, "loss": 0.6416, "step": 29339 }, { "epoch": 24.348547717842322, "grad_norm": 21.838245391845703, "learning_rate": 1.0265892116182574e-05, "loss": 0.3408, "step": 29340 }, { "epoch": 24.349377593360995, "grad_norm": 16.19984245300293, "learning_rate": 1.0265560165975104e-05, "loss": 0.3899, "step": 29341 }, { "epoch": 24.350207468879667, "grad_norm": 22.354372024536133, "learning_rate": 1.0265228215767635e-05, "loss": 0.263, "step": 29342 }, { "epoch": 24.35103734439834, "grad_norm": 137.60586547851562, "learning_rate": 1.0264896265560167e-05, "loss": 0.4497, "step": 29343 }, { "epoch": 24.35186721991701, "grad_norm": 97.41902923583984, "learning_rate": 1.0264564315352699e-05, "loss": 0.5029, "step": 29344 }, { "epoch": 24.352697095435683, "grad_norm": 18.057994842529297, "learning_rate": 1.0264232365145228e-05, "loss": 0.3229, "step": 29345 }, { "epoch": 24.353526970954356, "grad_norm": 16.077129364013672, "learning_rate": 1.026390041493776e-05, "loss": 0.3003, "step": 29346 }, { "epoch": 24.354356846473028, "grad_norm": 76.29313659667969, "learning_rate": 1.0263568464730292e-05, "loss": 0.7873, "step": 29347 }, { "epoch": 24.3551867219917, "grad_norm": 19.766630172729492, "learning_rate": 1.0263236514522824e-05, "loss": 0.2558, "step": 29348 }, { "epoch": 24.356016597510372, "grad_norm": 53.47211456298828, "learning_rate": 1.0262904564315354e-05, "loss": 0.5539, "step": 29349 }, { "epoch": 24.356846473029044, "grad_norm": 11.920936584472656, "learning_rate": 1.0262572614107885e-05, "loss": 0.2689, "step": 29350 }, { "epoch": 24.357676348547717, "grad_norm": 46.84453582763672, "learning_rate": 1.0262240663900415e-05, "loss": 0.6937, "step": 29351 }, { "epoch": 24.35850622406639, "grad_norm": 50.90858459472656, "learning_rate": 1.0261908713692947e-05, "loss": 0.7391, "step": 29352 }, { "epoch": 24.35933609958506, "grad_norm": 64.4754638671875, "learning_rate": 1.026157676348548e-05, "loss": 0.5285, "step": 29353 }, { "epoch": 24.360165975103733, "grad_norm": 15.102849960327148, "learning_rate": 1.0261244813278008e-05, "loss": 0.2554, "step": 29354 }, { "epoch": 24.360995850622405, "grad_norm": 20.200719833374023, "learning_rate": 1.026091286307054e-05, "loss": 0.3294, "step": 29355 }, { "epoch": 24.361825726141078, "grad_norm": 40.35336685180664, "learning_rate": 1.0260580912863072e-05, "loss": 0.5323, "step": 29356 }, { "epoch": 24.36265560165975, "grad_norm": 57.075557708740234, "learning_rate": 1.0260248962655603e-05, "loss": 0.7278, "step": 29357 }, { "epoch": 24.363485477178422, "grad_norm": 61.5333137512207, "learning_rate": 1.0259917012448133e-05, "loss": 0.2561, "step": 29358 }, { "epoch": 24.364315352697094, "grad_norm": 30.933610916137695, "learning_rate": 1.0259585062240665e-05, "loss": 0.2994, "step": 29359 }, { "epoch": 24.365145228215766, "grad_norm": 29.863462448120117, "learning_rate": 1.0259253112033196e-05, "loss": 0.4833, "step": 29360 }, { "epoch": 24.36597510373444, "grad_norm": 68.34970092773438, "learning_rate": 1.0258921161825728e-05, "loss": 0.6188, "step": 29361 }, { "epoch": 24.36680497925311, "grad_norm": 14.168465614318848, "learning_rate": 1.0258589211618258e-05, "loss": 0.3108, "step": 29362 }, { "epoch": 24.367634854771783, "grad_norm": 28.154996871948242, "learning_rate": 1.0258257261410789e-05, "loss": 0.311, "step": 29363 }, { "epoch": 24.368464730290455, "grad_norm": 61.82691955566406, "learning_rate": 1.025792531120332e-05, "loss": 0.6213, "step": 29364 }, { "epoch": 24.369294605809127, "grad_norm": 30.3546142578125, "learning_rate": 1.0257593360995853e-05, "loss": 0.4499, "step": 29365 }, { "epoch": 24.3701244813278, "grad_norm": 17.76034927368164, "learning_rate": 1.0257261410788381e-05, "loss": 0.3034, "step": 29366 }, { "epoch": 24.37095435684647, "grad_norm": 90.93858337402344, "learning_rate": 1.0256929460580914e-05, "loss": 0.5267, "step": 29367 }, { "epoch": 24.371784232365144, "grad_norm": 64.02205657958984, "learning_rate": 1.0256597510373446e-05, "loss": 0.6337, "step": 29368 }, { "epoch": 24.372614107883816, "grad_norm": 43.611751556396484, "learning_rate": 1.0256265560165976e-05, "loss": 0.7827, "step": 29369 }, { "epoch": 24.37344398340249, "grad_norm": 45.952247619628906, "learning_rate": 1.0255933609958506e-05, "loss": 0.5127, "step": 29370 }, { "epoch": 24.37427385892116, "grad_norm": 68.99617004394531, "learning_rate": 1.0255601659751037e-05, "loss": 0.7316, "step": 29371 }, { "epoch": 24.375103734439833, "grad_norm": 52.200050354003906, "learning_rate": 1.0255269709543569e-05, "loss": 0.3586, "step": 29372 }, { "epoch": 24.375933609958505, "grad_norm": 61.024925231933594, "learning_rate": 1.0254937759336101e-05, "loss": 0.8802, "step": 29373 }, { "epoch": 24.376763485477177, "grad_norm": 51.240928649902344, "learning_rate": 1.025460580912863e-05, "loss": 0.8385, "step": 29374 }, { "epoch": 24.37759336099585, "grad_norm": 25.875934600830078, "learning_rate": 1.0254273858921162e-05, "loss": 0.4795, "step": 29375 }, { "epoch": 24.37842323651452, "grad_norm": 61.356449127197266, "learning_rate": 1.0253941908713694e-05, "loss": 0.4071, "step": 29376 }, { "epoch": 24.379253112033194, "grad_norm": 26.78290367126465, "learning_rate": 1.0253609958506226e-05, "loss": 0.3343, "step": 29377 }, { "epoch": 24.380082987551866, "grad_norm": 36.05710220336914, "learning_rate": 1.0253278008298757e-05, "loss": 0.457, "step": 29378 }, { "epoch": 24.380912863070538, "grad_norm": 116.71695709228516, "learning_rate": 1.0252946058091287e-05, "loss": 0.4884, "step": 29379 }, { "epoch": 24.38174273858921, "grad_norm": 34.79357147216797, "learning_rate": 1.0252614107883817e-05, "loss": 0.6021, "step": 29380 }, { "epoch": 24.382572614107882, "grad_norm": 13.625340461730957, "learning_rate": 1.025228215767635e-05, "loss": 0.2971, "step": 29381 }, { "epoch": 24.383402489626555, "grad_norm": 17.17007827758789, "learning_rate": 1.0251950207468882e-05, "loss": 0.2681, "step": 29382 }, { "epoch": 24.384232365145227, "grad_norm": 25.3267822265625, "learning_rate": 1.025161825726141e-05, "loss": 0.2657, "step": 29383 }, { "epoch": 24.3850622406639, "grad_norm": 32.610984802246094, "learning_rate": 1.0251286307053942e-05, "loss": 0.4849, "step": 29384 }, { "epoch": 24.38589211618257, "grad_norm": 29.48428726196289, "learning_rate": 1.0250954356846475e-05, "loss": 0.4452, "step": 29385 }, { "epoch": 24.386721991701243, "grad_norm": 19.803056716918945, "learning_rate": 1.0250622406639007e-05, "loss": 0.4152, "step": 29386 }, { "epoch": 24.387551867219916, "grad_norm": 27.840662002563477, "learning_rate": 1.0250290456431535e-05, "loss": 0.6215, "step": 29387 }, { "epoch": 24.388381742738588, "grad_norm": 85.93460845947266, "learning_rate": 1.0249958506224067e-05, "loss": 0.4545, "step": 29388 }, { "epoch": 24.38921161825726, "grad_norm": 15.774852752685547, "learning_rate": 1.0249626556016598e-05, "loss": 0.1994, "step": 29389 }, { "epoch": 24.390041493775932, "grad_norm": 30.674440383911133, "learning_rate": 1.024929460580913e-05, "loss": 0.3126, "step": 29390 }, { "epoch": 24.390871369294604, "grad_norm": 18.964752197265625, "learning_rate": 1.024896265560166e-05, "loss": 0.2775, "step": 29391 }, { "epoch": 24.391701244813277, "grad_norm": 24.623876571655273, "learning_rate": 1.024863070539419e-05, "loss": 0.2515, "step": 29392 }, { "epoch": 24.39253112033195, "grad_norm": 85.89751434326172, "learning_rate": 1.0248298755186723e-05, "loss": 0.4243, "step": 29393 }, { "epoch": 24.39336099585062, "grad_norm": 56.60064697265625, "learning_rate": 1.0247966804979255e-05, "loss": 0.4162, "step": 29394 }, { "epoch": 24.394190871369293, "grad_norm": 44.85444259643555, "learning_rate": 1.0247634854771784e-05, "loss": 0.5782, "step": 29395 }, { "epoch": 24.395020746887965, "grad_norm": 65.27881622314453, "learning_rate": 1.0247302904564316e-05, "loss": 0.4251, "step": 29396 }, { "epoch": 24.395850622406638, "grad_norm": 133.54959106445312, "learning_rate": 1.0246970954356848e-05, "loss": 0.8311, "step": 29397 }, { "epoch": 24.39668049792531, "grad_norm": 102.58549499511719, "learning_rate": 1.0246639004149378e-05, "loss": 0.5405, "step": 29398 }, { "epoch": 24.397510373443982, "grad_norm": 74.78736877441406, "learning_rate": 1.0246307053941909e-05, "loss": 0.383, "step": 29399 }, { "epoch": 24.398340248962654, "grad_norm": 93.5360336303711, "learning_rate": 1.0245975103734441e-05, "loss": 0.7794, "step": 29400 }, { "epoch": 24.399170124481326, "grad_norm": 102.83042907714844, "learning_rate": 1.0245643153526971e-05, "loss": 0.3784, "step": 29401 }, { "epoch": 24.4, "grad_norm": 28.900920867919922, "learning_rate": 1.0245311203319503e-05, "loss": 0.4217, "step": 29402 }, { "epoch": 24.40082987551867, "grad_norm": 98.14400482177734, "learning_rate": 1.0244979253112036e-05, "loss": 0.5083, "step": 29403 }, { "epoch": 24.401659751037343, "grad_norm": 30.99965476989746, "learning_rate": 1.0244647302904564e-05, "loss": 0.6742, "step": 29404 }, { "epoch": 24.402489626556015, "grad_norm": 124.41175842285156, "learning_rate": 1.0244315352697096e-05, "loss": 0.7605, "step": 29405 }, { "epoch": 24.403319502074687, "grad_norm": 24.24708366394043, "learning_rate": 1.0243983402489628e-05, "loss": 0.2271, "step": 29406 }, { "epoch": 24.40414937759336, "grad_norm": 111.09545135498047, "learning_rate": 1.0243651452282159e-05, "loss": 0.8506, "step": 29407 }, { "epoch": 24.40497925311203, "grad_norm": 49.59520721435547, "learning_rate": 1.024331950207469e-05, "loss": 0.453, "step": 29408 }, { "epoch": 24.405809128630704, "grad_norm": 64.57487487792969, "learning_rate": 1.0242987551867221e-05, "loss": 0.4207, "step": 29409 }, { "epoch": 24.406639004149376, "grad_norm": 68.93682861328125, "learning_rate": 1.0242655601659752e-05, "loss": 0.3269, "step": 29410 }, { "epoch": 24.40746887966805, "grad_norm": 44.66211700439453, "learning_rate": 1.0242323651452284e-05, "loss": 0.5046, "step": 29411 }, { "epoch": 24.40829875518672, "grad_norm": 30.47736167907715, "learning_rate": 1.0241991701244813e-05, "loss": 0.3922, "step": 29412 }, { "epoch": 24.409128630705393, "grad_norm": 52.340370178222656, "learning_rate": 1.0241659751037345e-05, "loss": 0.6124, "step": 29413 }, { "epoch": 24.409958506224065, "grad_norm": 26.354883193969727, "learning_rate": 1.0241327800829877e-05, "loss": 0.3424, "step": 29414 }, { "epoch": 24.410788381742737, "grad_norm": 66.12109375, "learning_rate": 1.0240995850622409e-05, "loss": 0.4285, "step": 29415 }, { "epoch": 24.41161825726141, "grad_norm": 64.91026306152344, "learning_rate": 1.0240663900414938e-05, "loss": 0.4433, "step": 29416 }, { "epoch": 24.41244813278008, "grad_norm": 13.94571304321289, "learning_rate": 1.024033195020747e-05, "loss": 0.2498, "step": 29417 }, { "epoch": 24.413278008298754, "grad_norm": 16.58430290222168, "learning_rate": 1.024e-05, "loss": 0.3351, "step": 29418 }, { "epoch": 24.414107883817426, "grad_norm": 42.55348587036133, "learning_rate": 1.0239668049792532e-05, "loss": 0.4104, "step": 29419 }, { "epoch": 24.414937759336098, "grad_norm": 38.99840545654297, "learning_rate": 1.0239336099585063e-05, "loss": 0.4678, "step": 29420 }, { "epoch": 24.41576763485477, "grad_norm": 42.76053237915039, "learning_rate": 1.0239004149377593e-05, "loss": 0.4745, "step": 29421 }, { "epoch": 24.416597510373443, "grad_norm": 53.119178771972656, "learning_rate": 1.0238672199170125e-05, "loss": 0.5103, "step": 29422 }, { "epoch": 24.417427385892115, "grad_norm": 29.10416603088379, "learning_rate": 1.0238340248962657e-05, "loss": 0.3909, "step": 29423 }, { "epoch": 24.418257261410787, "grad_norm": 11.75797176361084, "learning_rate": 1.0238008298755186e-05, "loss": 0.2518, "step": 29424 }, { "epoch": 24.41908713692946, "grad_norm": 49.9010009765625, "learning_rate": 1.0237676348547718e-05, "loss": 0.6685, "step": 29425 }, { "epoch": 24.41991701244813, "grad_norm": 51.96294021606445, "learning_rate": 1.023734439834025e-05, "loss": 0.7962, "step": 29426 }, { "epoch": 24.420746887966803, "grad_norm": 40.65452575683594, "learning_rate": 1.023701244813278e-05, "loss": 0.3857, "step": 29427 }, { "epoch": 24.421576763485476, "grad_norm": 36.961185455322266, "learning_rate": 1.0236680497925311e-05, "loss": 0.4086, "step": 29428 }, { "epoch": 24.422406639004148, "grad_norm": 14.76266098022461, "learning_rate": 1.0236348547717843e-05, "loss": 0.2427, "step": 29429 }, { "epoch": 24.42323651452282, "grad_norm": 21.332622528076172, "learning_rate": 1.0236016597510374e-05, "loss": 0.3008, "step": 29430 }, { "epoch": 24.424066390041492, "grad_norm": 30.443275451660156, "learning_rate": 1.0235684647302906e-05, "loss": 0.4447, "step": 29431 }, { "epoch": 24.424896265560164, "grad_norm": 14.691320419311523, "learning_rate": 1.0235352697095438e-05, "loss": 0.2695, "step": 29432 }, { "epoch": 24.425726141078837, "grad_norm": 38.36577224731445, "learning_rate": 1.0235020746887967e-05, "loss": 0.5307, "step": 29433 }, { "epoch": 24.42655601659751, "grad_norm": 30.74454689025879, "learning_rate": 1.0234688796680499e-05, "loss": 0.5585, "step": 29434 }, { "epoch": 24.42738589211618, "grad_norm": 54.48752212524414, "learning_rate": 1.023435684647303e-05, "loss": 0.3815, "step": 29435 }, { "epoch": 24.428215767634853, "grad_norm": 90.88748931884766, "learning_rate": 1.0234024896265561e-05, "loss": 1.2196, "step": 29436 }, { "epoch": 24.429045643153525, "grad_norm": 34.823307037353516, "learning_rate": 1.0233692946058092e-05, "loss": 0.4484, "step": 29437 }, { "epoch": 24.429875518672198, "grad_norm": 44.19801330566406, "learning_rate": 1.0233360995850624e-05, "loss": 0.3471, "step": 29438 }, { "epoch": 24.43070539419087, "grad_norm": 87.71623992919922, "learning_rate": 1.0233029045643154e-05, "loss": 0.3502, "step": 29439 }, { "epoch": 24.431535269709542, "grad_norm": 8.995797157287598, "learning_rate": 1.0232697095435686e-05, "loss": 0.193, "step": 29440 }, { "epoch": 24.432365145228214, "grad_norm": 23.135780334472656, "learning_rate": 1.0232365145228215e-05, "loss": 0.3177, "step": 29441 }, { "epoch": 24.433195020746886, "grad_norm": 53.01078414916992, "learning_rate": 1.0232033195020747e-05, "loss": 0.7541, "step": 29442 }, { "epoch": 24.43402489626556, "grad_norm": 54.62691879272461, "learning_rate": 1.023170124481328e-05, "loss": 0.5009, "step": 29443 }, { "epoch": 24.43485477178423, "grad_norm": 33.19381332397461, "learning_rate": 1.0231369294605811e-05, "loss": 0.3181, "step": 29444 }, { "epoch": 24.435684647302903, "grad_norm": 31.520551681518555, "learning_rate": 1.023103734439834e-05, "loss": 0.2867, "step": 29445 }, { "epoch": 24.436514522821575, "grad_norm": 33.6527099609375, "learning_rate": 1.0230705394190872e-05, "loss": 0.4078, "step": 29446 }, { "epoch": 24.437344398340247, "grad_norm": 42.15654373168945, "learning_rate": 1.0230373443983404e-05, "loss": 0.4024, "step": 29447 }, { "epoch": 24.43817427385892, "grad_norm": 109.07896423339844, "learning_rate": 1.0230041493775935e-05, "loss": 0.3527, "step": 29448 }, { "epoch": 24.439004149377592, "grad_norm": 20.68003273010254, "learning_rate": 1.0229709543568465e-05, "loss": 0.2955, "step": 29449 }, { "epoch": 24.439834024896264, "grad_norm": 10.309353828430176, "learning_rate": 1.0229377593360995e-05, "loss": 0.2381, "step": 29450 }, { "epoch": 24.440663900414936, "grad_norm": 26.917463302612305, "learning_rate": 1.0229045643153528e-05, "loss": 0.2296, "step": 29451 }, { "epoch": 24.44149377593361, "grad_norm": 90.76424407958984, "learning_rate": 1.022871369294606e-05, "loss": 0.6052, "step": 29452 }, { "epoch": 24.44232365145228, "grad_norm": 95.58516693115234, "learning_rate": 1.0228381742738588e-05, "loss": 0.7262, "step": 29453 }, { "epoch": 24.443153526970953, "grad_norm": 106.61475372314453, "learning_rate": 1.022804979253112e-05, "loss": 0.7789, "step": 29454 }, { "epoch": 24.443983402489625, "grad_norm": 106.23249816894531, "learning_rate": 1.0227717842323653e-05, "loss": 0.5453, "step": 29455 }, { "epoch": 24.444813278008297, "grad_norm": 10.276704788208008, "learning_rate": 1.0227385892116185e-05, "loss": 0.1908, "step": 29456 }, { "epoch": 24.44564315352697, "grad_norm": 92.47560119628906, "learning_rate": 1.0227053941908715e-05, "loss": 0.7534, "step": 29457 }, { "epoch": 24.44647302904564, "grad_norm": 36.71144104003906, "learning_rate": 1.0226721991701246e-05, "loss": 0.3975, "step": 29458 }, { "epoch": 24.447302904564314, "grad_norm": 33.925048828125, "learning_rate": 1.0226390041493776e-05, "loss": 0.4126, "step": 29459 }, { "epoch": 24.448132780082986, "grad_norm": 60.57362365722656, "learning_rate": 1.0226058091286308e-05, "loss": 0.9019, "step": 29460 }, { "epoch": 24.448962655601658, "grad_norm": 75.96438598632812, "learning_rate": 1.022572614107884e-05, "loss": 0.4934, "step": 29461 }, { "epoch": 24.44979253112033, "grad_norm": 32.852073669433594, "learning_rate": 1.0225394190871369e-05, "loss": 0.4278, "step": 29462 }, { "epoch": 24.450622406639003, "grad_norm": 74.6810531616211, "learning_rate": 1.0225062240663901e-05, "loss": 0.8883, "step": 29463 }, { "epoch": 24.451452282157675, "grad_norm": 42.83915710449219, "learning_rate": 1.0224730290456433e-05, "loss": 0.4074, "step": 29464 }, { "epoch": 24.452282157676347, "grad_norm": 49.92988967895508, "learning_rate": 1.0224398340248965e-05, "loss": 0.7503, "step": 29465 }, { "epoch": 24.45311203319502, "grad_norm": 40.79990005493164, "learning_rate": 1.0224066390041494e-05, "loss": 0.4539, "step": 29466 }, { "epoch": 24.45394190871369, "grad_norm": 38.540000915527344, "learning_rate": 1.0223734439834026e-05, "loss": 0.6561, "step": 29467 }, { "epoch": 24.454771784232364, "grad_norm": 18.761756896972656, "learning_rate": 1.0223402489626556e-05, "loss": 0.3362, "step": 29468 }, { "epoch": 24.455601659751036, "grad_norm": 29.022092819213867, "learning_rate": 1.0223070539419089e-05, "loss": 0.3403, "step": 29469 }, { "epoch": 24.456431535269708, "grad_norm": 37.15983581542969, "learning_rate": 1.0222738589211619e-05, "loss": 0.2365, "step": 29470 }, { "epoch": 24.45726141078838, "grad_norm": 43.795318603515625, "learning_rate": 1.022240663900415e-05, "loss": 0.4646, "step": 29471 }, { "epoch": 24.458091286307052, "grad_norm": 17.32964515686035, "learning_rate": 1.0222074688796682e-05, "loss": 0.3283, "step": 29472 }, { "epoch": 24.458921161825725, "grad_norm": 39.02896499633789, "learning_rate": 1.0221742738589214e-05, "loss": 0.3479, "step": 29473 }, { "epoch": 24.459751037344397, "grad_norm": 26.073841094970703, "learning_rate": 1.0221410788381742e-05, "loss": 0.3841, "step": 29474 }, { "epoch": 24.46058091286307, "grad_norm": 19.682321548461914, "learning_rate": 1.0221078838174274e-05, "loss": 0.3255, "step": 29475 }, { "epoch": 24.46141078838174, "grad_norm": 37.86764144897461, "learning_rate": 1.0220746887966807e-05, "loss": 0.5113, "step": 29476 }, { "epoch": 24.462240663900413, "grad_norm": 29.01715660095215, "learning_rate": 1.0220414937759337e-05, "loss": 0.5173, "step": 29477 }, { "epoch": 24.463070539419085, "grad_norm": 67.40391540527344, "learning_rate": 1.0220082987551867e-05, "loss": 0.4355, "step": 29478 }, { "epoch": 24.463900414937758, "grad_norm": 27.924551010131836, "learning_rate": 1.02197510373444e-05, "loss": 0.5382, "step": 29479 }, { "epoch": 24.46473029045643, "grad_norm": 48.940635681152344, "learning_rate": 1.021941908713693e-05, "loss": 0.7676, "step": 29480 }, { "epoch": 24.465560165975102, "grad_norm": 26.705215454101562, "learning_rate": 1.0219087136929462e-05, "loss": 0.2772, "step": 29481 }, { "epoch": 24.466390041493774, "grad_norm": 56.15868377685547, "learning_rate": 1.0218755186721994e-05, "loss": 0.5121, "step": 29482 }, { "epoch": 24.467219917012446, "grad_norm": 31.845962524414062, "learning_rate": 1.0218423236514523e-05, "loss": 0.3298, "step": 29483 }, { "epoch": 24.46804979253112, "grad_norm": 29.481054306030273, "learning_rate": 1.0218091286307055e-05, "loss": 0.3479, "step": 29484 }, { "epoch": 24.46887966804979, "grad_norm": 28.47068214416504, "learning_rate": 1.0217759336099587e-05, "loss": 0.4484, "step": 29485 }, { "epoch": 24.469709543568463, "grad_norm": 54.09294128417969, "learning_rate": 1.0217427385892117e-05, "loss": 0.4846, "step": 29486 }, { "epoch": 24.470539419087135, "grad_norm": 24.768754959106445, "learning_rate": 1.0217095435684648e-05, "loss": 0.3533, "step": 29487 }, { "epoch": 24.471369294605807, "grad_norm": 18.92304801940918, "learning_rate": 1.0216763485477178e-05, "loss": 0.3577, "step": 29488 }, { "epoch": 24.47219917012448, "grad_norm": 50.57278060913086, "learning_rate": 1.021643153526971e-05, "loss": 0.6615, "step": 29489 }, { "epoch": 24.473029045643152, "grad_norm": 39.68144989013672, "learning_rate": 1.0216099585062243e-05, "loss": 0.8306, "step": 29490 }, { "epoch": 24.473858921161824, "grad_norm": 81.7396240234375, "learning_rate": 1.0215767634854771e-05, "loss": 0.291, "step": 29491 }, { "epoch": 24.474688796680496, "grad_norm": 64.18373107910156, "learning_rate": 1.0215435684647303e-05, "loss": 0.3878, "step": 29492 }, { "epoch": 24.47551867219917, "grad_norm": 111.57213592529297, "learning_rate": 1.0215103734439835e-05, "loss": 0.6692, "step": 29493 }, { "epoch": 24.47634854771784, "grad_norm": 64.83464813232422, "learning_rate": 1.0214771784232368e-05, "loss": 0.3705, "step": 29494 }, { "epoch": 24.477178423236513, "grad_norm": 72.6670913696289, "learning_rate": 1.0214439834024896e-05, "loss": 0.5092, "step": 29495 }, { "epoch": 24.478008298755185, "grad_norm": 48.773990631103516, "learning_rate": 1.0214107883817428e-05, "loss": 0.5995, "step": 29496 }, { "epoch": 24.478838174273857, "grad_norm": 88.0572509765625, "learning_rate": 1.0213775933609959e-05, "loss": 1.3357, "step": 29497 }, { "epoch": 24.47966804979253, "grad_norm": 97.7303695678711, "learning_rate": 1.0213443983402491e-05, "loss": 0.6606, "step": 29498 }, { "epoch": 24.4804979253112, "grad_norm": 42.02129364013672, "learning_rate": 1.0213112033195021e-05, "loss": 0.3544, "step": 29499 }, { "epoch": 24.481327800829874, "grad_norm": 74.84292602539062, "learning_rate": 1.0212780082987552e-05, "loss": 0.9001, "step": 29500 }, { "epoch": 24.482157676348546, "grad_norm": 37.81673049926758, "learning_rate": 1.0212448132780084e-05, "loss": 0.6464, "step": 29501 }, { "epoch": 24.482987551867218, "grad_norm": 7.138749122619629, "learning_rate": 1.0212116182572616e-05, "loss": 0.2144, "step": 29502 }, { "epoch": 24.48381742738589, "grad_norm": 57.3221435546875, "learning_rate": 1.0211784232365145e-05, "loss": 0.5489, "step": 29503 }, { "epoch": 24.484647302904563, "grad_norm": 16.164897918701172, "learning_rate": 1.0211452282157677e-05, "loss": 0.3189, "step": 29504 }, { "epoch": 24.485477178423235, "grad_norm": 139.82363891601562, "learning_rate": 1.0211120331950209e-05, "loss": 0.6549, "step": 29505 }, { "epoch": 24.486307053941907, "grad_norm": 75.89739227294922, "learning_rate": 1.021078838174274e-05, "loss": 0.3825, "step": 29506 }, { "epoch": 24.48713692946058, "grad_norm": 46.38182830810547, "learning_rate": 1.021045643153527e-05, "loss": 0.2927, "step": 29507 }, { "epoch": 24.48796680497925, "grad_norm": 23.455549240112305, "learning_rate": 1.0210124481327802e-05, "loss": 0.2818, "step": 29508 }, { "epoch": 24.488796680497924, "grad_norm": 36.75157928466797, "learning_rate": 1.0209792531120332e-05, "loss": 0.6631, "step": 29509 }, { "epoch": 24.489626556016596, "grad_norm": 50.222312927246094, "learning_rate": 1.0209460580912864e-05, "loss": 0.4635, "step": 29510 }, { "epoch": 24.490456431535268, "grad_norm": 10.376285552978516, "learning_rate": 1.0209128630705396e-05, "loss": 0.2642, "step": 29511 }, { "epoch": 24.49128630705394, "grad_norm": 73.48016357421875, "learning_rate": 1.0208796680497925e-05, "loss": 0.3078, "step": 29512 }, { "epoch": 24.492116182572612, "grad_norm": 12.698549270629883, "learning_rate": 1.0208464730290457e-05, "loss": 0.221, "step": 29513 }, { "epoch": 24.492946058091285, "grad_norm": 21.406965255737305, "learning_rate": 1.020813278008299e-05, "loss": 0.2732, "step": 29514 }, { "epoch": 24.49377593360996, "grad_norm": 40.35942840576172, "learning_rate": 1.020780082987552e-05, "loss": 0.588, "step": 29515 }, { "epoch": 24.49460580912863, "grad_norm": 86.42007446289062, "learning_rate": 1.020746887966805e-05, "loss": 0.628, "step": 29516 }, { "epoch": 24.495435684647305, "grad_norm": 30.34659194946289, "learning_rate": 1.0207136929460582e-05, "loss": 0.2932, "step": 29517 }, { "epoch": 24.496265560165973, "grad_norm": 16.70465660095215, "learning_rate": 1.0206804979253113e-05, "loss": 0.3065, "step": 29518 }, { "epoch": 24.49709543568465, "grad_norm": 17.405139923095703, "learning_rate": 1.0206473029045645e-05, "loss": 0.2454, "step": 29519 }, { "epoch": 24.497925311203318, "grad_norm": 52.25577926635742, "learning_rate": 1.0206141078838174e-05, "loss": 0.5364, "step": 29520 }, { "epoch": 24.498755186721993, "grad_norm": 30.38412094116211, "learning_rate": 1.0205809128630706e-05, "loss": 0.6215, "step": 29521 }, { "epoch": 24.499585062240662, "grad_norm": 61.782649993896484, "learning_rate": 1.0205477178423238e-05, "loss": 0.6621, "step": 29522 }, { "epoch": 24.500414937759338, "grad_norm": 132.93807983398438, "learning_rate": 1.020514522821577e-05, "loss": 0.4854, "step": 29523 }, { "epoch": 24.501244813278007, "grad_norm": 34.431297302246094, "learning_rate": 1.0204813278008299e-05, "loss": 0.508, "step": 29524 }, { "epoch": 24.502074688796682, "grad_norm": 58.04227828979492, "learning_rate": 1.020448132780083e-05, "loss": 0.5784, "step": 29525 }, { "epoch": 24.50290456431535, "grad_norm": 70.9825668334961, "learning_rate": 1.0204149377593363e-05, "loss": 1.344, "step": 29526 }, { "epoch": 24.503734439834027, "grad_norm": 55.94693374633789, "learning_rate": 1.0203817427385893e-05, "loss": 0.5019, "step": 29527 }, { "epoch": 24.504564315352695, "grad_norm": 136.05340576171875, "learning_rate": 1.0203485477178424e-05, "loss": 0.7399, "step": 29528 }, { "epoch": 24.50539419087137, "grad_norm": 10.583559036254883, "learning_rate": 1.0203153526970954e-05, "loss": 0.2788, "step": 29529 }, { "epoch": 24.50622406639004, "grad_norm": 16.55122947692871, "learning_rate": 1.0202821576763486e-05, "loss": 0.3958, "step": 29530 }, { "epoch": 24.507053941908715, "grad_norm": 52.1572151184082, "learning_rate": 1.0202489626556018e-05, "loss": 0.5163, "step": 29531 }, { "epoch": 24.507883817427384, "grad_norm": 57.50188064575195, "learning_rate": 1.0202157676348547e-05, "loss": 0.3864, "step": 29532 }, { "epoch": 24.50871369294606, "grad_norm": 24.332189559936523, "learning_rate": 1.0201825726141079e-05, "loss": 0.4075, "step": 29533 }, { "epoch": 24.50954356846473, "grad_norm": 14.622060775756836, "learning_rate": 1.0201493775933611e-05, "loss": 0.3137, "step": 29534 }, { "epoch": 24.510373443983404, "grad_norm": 32.44838333129883, "learning_rate": 1.0201161825726142e-05, "loss": 0.5133, "step": 29535 }, { "epoch": 24.511203319502073, "grad_norm": 60.89099884033203, "learning_rate": 1.0200829875518674e-05, "loss": 0.3428, "step": 29536 }, { "epoch": 24.51203319502075, "grad_norm": 68.9828872680664, "learning_rate": 1.0200497925311204e-05, "loss": 0.4007, "step": 29537 }, { "epoch": 24.512863070539417, "grad_norm": 33.33755874633789, "learning_rate": 1.0200165975103735e-05, "loss": 0.3358, "step": 29538 }, { "epoch": 24.513692946058093, "grad_norm": 51.97766876220703, "learning_rate": 1.0199834024896267e-05, "loss": 0.5683, "step": 29539 }, { "epoch": 24.51452282157676, "grad_norm": 20.920879364013672, "learning_rate": 1.0199502074688799e-05, "loss": 0.3646, "step": 29540 }, { "epoch": 24.515352697095437, "grad_norm": 12.280304908752441, "learning_rate": 1.0199170124481327e-05, "loss": 0.2652, "step": 29541 }, { "epoch": 24.51618257261411, "grad_norm": 33.045528411865234, "learning_rate": 1.019883817427386e-05, "loss": 0.5787, "step": 29542 }, { "epoch": 24.517012448132782, "grad_norm": 19.547874450683594, "learning_rate": 1.0198506224066392e-05, "loss": 0.4426, "step": 29543 }, { "epoch": 24.517842323651454, "grad_norm": 35.35740661621094, "learning_rate": 1.0198174273858922e-05, "loss": 0.6075, "step": 29544 }, { "epoch": 24.518672199170126, "grad_norm": 120.22216033935547, "learning_rate": 1.0197842323651453e-05, "loss": 0.4975, "step": 29545 }, { "epoch": 24.5195020746888, "grad_norm": 10.175267219543457, "learning_rate": 1.0197510373443985e-05, "loss": 0.2778, "step": 29546 }, { "epoch": 24.52033195020747, "grad_norm": 60.78571701049805, "learning_rate": 1.0197178423236515e-05, "loss": 0.3446, "step": 29547 }, { "epoch": 24.521161825726143, "grad_norm": 14.662349700927734, "learning_rate": 1.0196846473029047e-05, "loss": 0.2782, "step": 29548 }, { "epoch": 24.521991701244815, "grad_norm": 31.957977294921875, "learning_rate": 1.0196514522821578e-05, "loss": 0.4905, "step": 29549 }, { "epoch": 24.522821576763487, "grad_norm": 35.85513687133789, "learning_rate": 1.0196182572614108e-05, "loss": 0.4119, "step": 29550 }, { "epoch": 24.52365145228216, "grad_norm": 29.0678653717041, "learning_rate": 1.019585062240664e-05, "loss": 0.3745, "step": 29551 }, { "epoch": 24.52448132780083, "grad_norm": 15.261528968811035, "learning_rate": 1.0195518672199172e-05, "loss": 0.2942, "step": 29552 }, { "epoch": 24.525311203319504, "grad_norm": 18.276649475097656, "learning_rate": 1.0195186721991701e-05, "loss": 0.364, "step": 29553 }, { "epoch": 24.526141078838176, "grad_norm": 46.177425384521484, "learning_rate": 1.0194854771784233e-05, "loss": 0.5644, "step": 29554 }, { "epoch": 24.526970954356848, "grad_norm": 75.71987915039062, "learning_rate": 1.0194522821576765e-05, "loss": 0.6341, "step": 29555 }, { "epoch": 24.52780082987552, "grad_norm": 12.368648529052734, "learning_rate": 1.0194190871369296e-05, "loss": 0.2746, "step": 29556 }, { "epoch": 24.528630705394193, "grad_norm": 29.568134307861328, "learning_rate": 1.0193858921161826e-05, "loss": 0.2865, "step": 29557 }, { "epoch": 24.529460580912865, "grad_norm": 33.499263763427734, "learning_rate": 1.0193526970954356e-05, "loss": 0.4244, "step": 29558 }, { "epoch": 24.530290456431537, "grad_norm": 27.52541732788086, "learning_rate": 1.0193195020746888e-05, "loss": 0.3696, "step": 29559 }, { "epoch": 24.53112033195021, "grad_norm": 44.00815963745117, "learning_rate": 1.019286307053942e-05, "loss": 0.5075, "step": 29560 }, { "epoch": 24.53195020746888, "grad_norm": 39.167884826660156, "learning_rate": 1.0192531120331953e-05, "loss": 0.4142, "step": 29561 }, { "epoch": 24.532780082987554, "grad_norm": 36.9039306640625, "learning_rate": 1.0192199170124481e-05, "loss": 0.4407, "step": 29562 }, { "epoch": 24.533609958506226, "grad_norm": 89.91248321533203, "learning_rate": 1.0191867219917014e-05, "loss": 0.6363, "step": 29563 }, { "epoch": 24.534439834024898, "grad_norm": 90.45143127441406, "learning_rate": 1.0191535269709546e-05, "loss": 0.3277, "step": 29564 }, { "epoch": 24.53526970954357, "grad_norm": 67.29720306396484, "learning_rate": 1.0191203319502076e-05, "loss": 0.5968, "step": 29565 }, { "epoch": 24.536099585062242, "grad_norm": 23.887975692749023, "learning_rate": 1.0190871369294606e-05, "loss": 0.314, "step": 29566 }, { "epoch": 24.536929460580915, "grad_norm": 90.72895050048828, "learning_rate": 1.0190539419087137e-05, "loss": 0.522, "step": 29567 }, { "epoch": 24.537759336099587, "grad_norm": 24.63248634338379, "learning_rate": 1.0190207468879669e-05, "loss": 0.3457, "step": 29568 }, { "epoch": 24.53858921161826, "grad_norm": 18.09902572631836, "learning_rate": 1.0189875518672201e-05, "loss": 0.2753, "step": 29569 }, { "epoch": 24.53941908713693, "grad_norm": 119.30469512939453, "learning_rate": 1.018954356846473e-05, "loss": 0.8725, "step": 29570 }, { "epoch": 24.540248962655603, "grad_norm": 62.50178146362305, "learning_rate": 1.0189211618257262e-05, "loss": 0.5722, "step": 29571 }, { "epoch": 24.541078838174275, "grad_norm": 43.14565658569336, "learning_rate": 1.0188879668049794e-05, "loss": 0.3267, "step": 29572 }, { "epoch": 24.541908713692948, "grad_norm": 74.3918228149414, "learning_rate": 1.0188547717842326e-05, "loss": 0.755, "step": 29573 }, { "epoch": 24.54273858921162, "grad_norm": 71.3382339477539, "learning_rate": 1.0188215767634855e-05, "loss": 0.7264, "step": 29574 }, { "epoch": 24.543568464730292, "grad_norm": 44.000553131103516, "learning_rate": 1.0187883817427387e-05, "loss": 0.4209, "step": 29575 }, { "epoch": 24.544398340248964, "grad_norm": 10.747091293334961, "learning_rate": 1.0187551867219917e-05, "loss": 0.2765, "step": 29576 }, { "epoch": 24.545228215767636, "grad_norm": 96.78874206542969, "learning_rate": 1.018721991701245e-05, "loss": 0.4716, "step": 29577 }, { "epoch": 24.54605809128631, "grad_norm": 50.234336853027344, "learning_rate": 1.018688796680498e-05, "loss": 0.4703, "step": 29578 }, { "epoch": 24.54688796680498, "grad_norm": 55.014007568359375, "learning_rate": 1.018655601659751e-05, "loss": 0.6347, "step": 29579 }, { "epoch": 24.547717842323653, "grad_norm": 38.41750717163086, "learning_rate": 1.0186224066390042e-05, "loss": 0.3816, "step": 29580 }, { "epoch": 24.548547717842325, "grad_norm": 47.933536529541016, "learning_rate": 1.0185892116182575e-05, "loss": 0.457, "step": 29581 }, { "epoch": 24.549377593360997, "grad_norm": 24.49500274658203, "learning_rate": 1.0185560165975103e-05, "loss": 0.2541, "step": 29582 }, { "epoch": 24.55020746887967, "grad_norm": 27.400575637817383, "learning_rate": 1.0185228215767635e-05, "loss": 0.3082, "step": 29583 }, { "epoch": 24.551037344398342, "grad_norm": 57.561092376708984, "learning_rate": 1.0184896265560167e-05, "loss": 0.8837, "step": 29584 }, { "epoch": 24.551867219917014, "grad_norm": 31.35222053527832, "learning_rate": 1.0184564315352698e-05, "loss": 0.2729, "step": 29585 }, { "epoch": 24.552697095435686, "grad_norm": 22.93169593811035, "learning_rate": 1.0184232365145228e-05, "loss": 0.3148, "step": 29586 }, { "epoch": 24.55352697095436, "grad_norm": 50.37188720703125, "learning_rate": 1.018390041493776e-05, "loss": 0.3527, "step": 29587 }, { "epoch": 24.55435684647303, "grad_norm": 62.110198974609375, "learning_rate": 1.018356846473029e-05, "loss": 0.6594, "step": 29588 }, { "epoch": 24.555186721991703, "grad_norm": 49.00288009643555, "learning_rate": 1.0183236514522823e-05, "loss": 0.8227, "step": 29589 }, { "epoch": 24.556016597510375, "grad_norm": 32.179046630859375, "learning_rate": 1.0182904564315355e-05, "loss": 0.3734, "step": 29590 }, { "epoch": 24.556846473029047, "grad_norm": 32.69839096069336, "learning_rate": 1.0182572614107884e-05, "loss": 0.4391, "step": 29591 }, { "epoch": 24.55767634854772, "grad_norm": 69.22076416015625, "learning_rate": 1.0182240663900416e-05, "loss": 0.4049, "step": 29592 }, { "epoch": 24.55850622406639, "grad_norm": 32.055694580078125, "learning_rate": 1.0181908713692948e-05, "loss": 0.5285, "step": 29593 }, { "epoch": 24.559336099585064, "grad_norm": 65.94578552246094, "learning_rate": 1.0181576763485478e-05, "loss": 0.5306, "step": 29594 }, { "epoch": 24.560165975103736, "grad_norm": 57.3612174987793, "learning_rate": 1.0181244813278009e-05, "loss": 0.7096, "step": 29595 }, { "epoch": 24.560995850622408, "grad_norm": 50.97969055175781, "learning_rate": 1.0180912863070541e-05, "loss": 0.3622, "step": 29596 }, { "epoch": 24.56182572614108, "grad_norm": 21.950746536254883, "learning_rate": 1.0180580912863071e-05, "loss": 0.5456, "step": 29597 }, { "epoch": 24.562655601659753, "grad_norm": 34.63375473022461, "learning_rate": 1.0180248962655603e-05, "loss": 0.4208, "step": 29598 }, { "epoch": 24.563485477178425, "grad_norm": 16.616605758666992, "learning_rate": 1.0179917012448132e-05, "loss": 0.3937, "step": 29599 }, { "epoch": 24.564315352697097, "grad_norm": 16.376375198364258, "learning_rate": 1.0179585062240664e-05, "loss": 0.2978, "step": 29600 }, { "epoch": 24.56514522821577, "grad_norm": 16.099872589111328, "learning_rate": 1.0179253112033196e-05, "loss": 0.352, "step": 29601 }, { "epoch": 24.56597510373444, "grad_norm": 13.126104354858398, "learning_rate": 1.0178921161825728e-05, "loss": 0.2736, "step": 29602 }, { "epoch": 24.566804979253114, "grad_norm": 28.585521697998047, "learning_rate": 1.0178589211618257e-05, "loss": 0.6486, "step": 29603 }, { "epoch": 24.567634854771786, "grad_norm": 111.94556427001953, "learning_rate": 1.017825726141079e-05, "loss": 0.4466, "step": 29604 }, { "epoch": 24.568464730290458, "grad_norm": 20.1383113861084, "learning_rate": 1.017792531120332e-05, "loss": 0.2925, "step": 29605 }, { "epoch": 24.56929460580913, "grad_norm": 28.472402572631836, "learning_rate": 1.0177593360995852e-05, "loss": 0.3857, "step": 29606 }, { "epoch": 24.570124481327802, "grad_norm": 29.00782585144043, "learning_rate": 1.0177261410788382e-05, "loss": 0.4159, "step": 29607 }, { "epoch": 24.570954356846475, "grad_norm": 211.01805114746094, "learning_rate": 1.0176929460580913e-05, "loss": 0.705, "step": 29608 }, { "epoch": 24.571784232365147, "grad_norm": 38.71738815307617, "learning_rate": 1.0176597510373445e-05, "loss": 0.5664, "step": 29609 }, { "epoch": 24.57261410788382, "grad_norm": 46.41061782836914, "learning_rate": 1.0176265560165977e-05, "loss": 0.5702, "step": 29610 }, { "epoch": 24.57344398340249, "grad_norm": 10.905795097351074, "learning_rate": 1.0175933609958506e-05, "loss": 0.2639, "step": 29611 }, { "epoch": 24.574273858921163, "grad_norm": 63.56511306762695, "learning_rate": 1.0175601659751038e-05, "loss": 0.4062, "step": 29612 }, { "epoch": 24.575103734439836, "grad_norm": 34.61848449707031, "learning_rate": 1.017526970954357e-05, "loss": 0.4131, "step": 29613 }, { "epoch": 24.575933609958508, "grad_norm": 162.52418518066406, "learning_rate": 1.01749377593361e-05, "loss": 0.6394, "step": 29614 }, { "epoch": 24.57676348547718, "grad_norm": 32.32004928588867, "learning_rate": 1.0174605809128632e-05, "loss": 0.7591, "step": 29615 }, { "epoch": 24.577593360995852, "grad_norm": 27.9491024017334, "learning_rate": 1.0174273858921163e-05, "loss": 0.2787, "step": 29616 }, { "epoch": 24.578423236514524, "grad_norm": 32.055747985839844, "learning_rate": 1.0173941908713693e-05, "loss": 0.4354, "step": 29617 }, { "epoch": 24.579253112033197, "grad_norm": 120.798095703125, "learning_rate": 1.0173609958506225e-05, "loss": 0.4064, "step": 29618 }, { "epoch": 24.58008298755187, "grad_norm": 49.56792449951172, "learning_rate": 1.0173278008298757e-05, "loss": 0.5604, "step": 29619 }, { "epoch": 24.58091286307054, "grad_norm": 31.17536163330078, "learning_rate": 1.0172946058091286e-05, "loss": 0.2065, "step": 29620 }, { "epoch": 24.581742738589213, "grad_norm": 36.415016174316406, "learning_rate": 1.0172614107883818e-05, "loss": 0.3155, "step": 29621 }, { "epoch": 24.582572614107885, "grad_norm": 68.90156555175781, "learning_rate": 1.017228215767635e-05, "loss": 0.4547, "step": 29622 }, { "epoch": 24.583402489626557, "grad_norm": 154.14263916015625, "learning_rate": 1.017195020746888e-05, "loss": 0.638, "step": 29623 }, { "epoch": 24.58423236514523, "grad_norm": 24.925012588500977, "learning_rate": 1.0171618257261411e-05, "loss": 0.4632, "step": 29624 }, { "epoch": 24.585062240663902, "grad_norm": 62.291988372802734, "learning_rate": 1.0171286307053943e-05, "loss": 0.4698, "step": 29625 }, { "epoch": 24.585892116182574, "grad_norm": 81.1778793334961, "learning_rate": 1.0170954356846474e-05, "loss": 0.8243, "step": 29626 }, { "epoch": 24.586721991701246, "grad_norm": 25.769973754882812, "learning_rate": 1.0170622406639006e-05, "loss": 0.3559, "step": 29627 }, { "epoch": 24.58755186721992, "grad_norm": 46.126922607421875, "learning_rate": 1.0170290456431534e-05, "loss": 0.6989, "step": 29628 }, { "epoch": 24.58838174273859, "grad_norm": 31.970123291015625, "learning_rate": 1.0169958506224067e-05, "loss": 0.3249, "step": 29629 }, { "epoch": 24.589211618257263, "grad_norm": 73.68534088134766, "learning_rate": 1.0169626556016599e-05, "loss": 0.6807, "step": 29630 }, { "epoch": 24.590041493775935, "grad_norm": 19.1049861907959, "learning_rate": 1.016929460580913e-05, "loss": 0.3014, "step": 29631 }, { "epoch": 24.590871369294607, "grad_norm": 43.4966926574707, "learning_rate": 1.016896265560166e-05, "loss": 0.4026, "step": 29632 }, { "epoch": 24.59170124481328, "grad_norm": 29.368684768676758, "learning_rate": 1.0168630705394192e-05, "loss": 0.3623, "step": 29633 }, { "epoch": 24.59253112033195, "grad_norm": 74.8700180053711, "learning_rate": 1.0168298755186724e-05, "loss": 0.525, "step": 29634 }, { "epoch": 24.593360995850624, "grad_norm": 34.46477127075195, "learning_rate": 1.0167966804979254e-05, "loss": 0.2395, "step": 29635 }, { "epoch": 24.594190871369296, "grad_norm": 131.29376220703125, "learning_rate": 1.0167634854771785e-05, "loss": 0.4797, "step": 29636 }, { "epoch": 24.59502074688797, "grad_norm": 160.3023681640625, "learning_rate": 1.0167302904564315e-05, "loss": 0.8224, "step": 29637 }, { "epoch": 24.59585062240664, "grad_norm": 130.9024658203125, "learning_rate": 1.0166970954356847e-05, "loss": 0.2949, "step": 29638 }, { "epoch": 24.596680497925313, "grad_norm": 65.2197265625, "learning_rate": 1.016663900414938e-05, "loss": 0.6178, "step": 29639 }, { "epoch": 24.597510373443985, "grad_norm": 9.448450088500977, "learning_rate": 1.0166307053941911e-05, "loss": 0.2507, "step": 29640 }, { "epoch": 24.598340248962657, "grad_norm": 36.82083511352539, "learning_rate": 1.016597510373444e-05, "loss": 0.6369, "step": 29641 }, { "epoch": 24.59917012448133, "grad_norm": 47.042049407958984, "learning_rate": 1.0165643153526972e-05, "loss": 0.6153, "step": 29642 }, { "epoch": 24.6, "grad_norm": 47.258453369140625, "learning_rate": 1.0165311203319504e-05, "loss": 0.5667, "step": 29643 }, { "epoch": 24.600829875518674, "grad_norm": 54.10397720336914, "learning_rate": 1.0164979253112035e-05, "loss": 0.4724, "step": 29644 }, { "epoch": 24.601659751037346, "grad_norm": 54.67192077636719, "learning_rate": 1.0164647302904565e-05, "loss": 0.3527, "step": 29645 }, { "epoch": 24.602489626556018, "grad_norm": 32.12190628051758, "learning_rate": 1.0164315352697095e-05, "loss": 0.5446, "step": 29646 }, { "epoch": 24.60331950207469, "grad_norm": 23.933029174804688, "learning_rate": 1.0163983402489628e-05, "loss": 0.3001, "step": 29647 }, { "epoch": 24.604149377593362, "grad_norm": 41.658775329589844, "learning_rate": 1.016365145228216e-05, "loss": 0.3093, "step": 29648 }, { "epoch": 24.604979253112035, "grad_norm": 53.201377868652344, "learning_rate": 1.0163319502074688e-05, "loss": 0.4088, "step": 29649 }, { "epoch": 24.605809128630707, "grad_norm": 21.27678108215332, "learning_rate": 1.016298755186722e-05, "loss": 0.3235, "step": 29650 }, { "epoch": 24.60663900414938, "grad_norm": 28.358592987060547, "learning_rate": 1.0162655601659753e-05, "loss": 0.3711, "step": 29651 }, { "epoch": 24.60746887966805, "grad_norm": 87.57684326171875, "learning_rate": 1.0162323651452283e-05, "loss": 0.5902, "step": 29652 }, { "epoch": 24.608298755186723, "grad_norm": 48.83927536010742, "learning_rate": 1.0161991701244813e-05, "loss": 0.517, "step": 29653 }, { "epoch": 24.609128630705396, "grad_norm": 81.73109436035156, "learning_rate": 1.0161659751037346e-05, "loss": 0.3356, "step": 29654 }, { "epoch": 24.609958506224068, "grad_norm": 56.191162109375, "learning_rate": 1.0161327800829876e-05, "loss": 0.3718, "step": 29655 }, { "epoch": 24.61078838174274, "grad_norm": 18.757633209228516, "learning_rate": 1.0160995850622408e-05, "loss": 0.333, "step": 29656 }, { "epoch": 24.611618257261412, "grad_norm": 31.869531631469727, "learning_rate": 1.0160663900414938e-05, "loss": 0.3107, "step": 29657 }, { "epoch": 24.612448132780084, "grad_norm": 26.25031089782715, "learning_rate": 1.0160331950207469e-05, "loss": 0.2656, "step": 29658 }, { "epoch": 24.613278008298757, "grad_norm": 26.74568748474121, "learning_rate": 1.0160000000000001e-05, "loss": 0.404, "step": 29659 }, { "epoch": 24.61410788381743, "grad_norm": 61.5103759765625, "learning_rate": 1.0159668049792533e-05, "loss": 0.3708, "step": 29660 }, { "epoch": 24.6149377593361, "grad_norm": 37.049320220947266, "learning_rate": 1.0159336099585062e-05, "loss": 0.4518, "step": 29661 }, { "epoch": 24.615767634854773, "grad_norm": 43.73556900024414, "learning_rate": 1.0159004149377594e-05, "loss": 0.4105, "step": 29662 }, { "epoch": 24.616597510373445, "grad_norm": 61.62445068359375, "learning_rate": 1.0158672199170126e-05, "loss": 0.9019, "step": 29663 }, { "epoch": 24.617427385892118, "grad_norm": 69.4674301147461, "learning_rate": 1.0158340248962656e-05, "loss": 0.4137, "step": 29664 }, { "epoch": 24.61825726141079, "grad_norm": 16.437158584594727, "learning_rate": 1.0158008298755187e-05, "loss": 0.3055, "step": 29665 }, { "epoch": 24.619087136929462, "grad_norm": 19.464555740356445, "learning_rate": 1.0157676348547719e-05, "loss": 0.2631, "step": 29666 }, { "epoch": 24.619917012448134, "grad_norm": 60.46693801879883, "learning_rate": 1.015734439834025e-05, "loss": 0.5002, "step": 29667 }, { "epoch": 24.620746887966806, "grad_norm": 48.089412689208984, "learning_rate": 1.0157012448132781e-05, "loss": 0.7157, "step": 29668 }, { "epoch": 24.62157676348548, "grad_norm": 45.08681106567383, "learning_rate": 1.0156680497925314e-05, "loss": 0.4929, "step": 29669 }, { "epoch": 24.62240663900415, "grad_norm": 28.387832641601562, "learning_rate": 1.0156348547717842e-05, "loss": 0.3736, "step": 29670 }, { "epoch": 24.623236514522823, "grad_norm": 60.784454345703125, "learning_rate": 1.0156016597510374e-05, "loss": 0.3536, "step": 29671 }, { "epoch": 24.624066390041495, "grad_norm": 33.36442565917969, "learning_rate": 1.0155684647302907e-05, "loss": 0.4117, "step": 29672 }, { "epoch": 24.624896265560167, "grad_norm": 65.7335433959961, "learning_rate": 1.0155352697095437e-05, "loss": 0.6608, "step": 29673 }, { "epoch": 24.62572614107884, "grad_norm": 17.29707908630371, "learning_rate": 1.0155020746887967e-05, "loss": 0.4408, "step": 29674 }, { "epoch": 24.62655601659751, "grad_norm": 19.09546661376953, "learning_rate": 1.0154688796680498e-05, "loss": 0.3294, "step": 29675 }, { "epoch": 24.627385892116184, "grad_norm": 21.535865783691406, "learning_rate": 1.015435684647303e-05, "loss": 0.4016, "step": 29676 }, { "epoch": 24.628215767634856, "grad_norm": 39.65067672729492, "learning_rate": 1.0154024896265562e-05, "loss": 0.3087, "step": 29677 }, { "epoch": 24.62904564315353, "grad_norm": 107.10922241210938, "learning_rate": 1.015369294605809e-05, "loss": 0.5405, "step": 29678 }, { "epoch": 24.6298755186722, "grad_norm": 46.956932067871094, "learning_rate": 1.0153360995850623e-05, "loss": 0.7383, "step": 29679 }, { "epoch": 24.630705394190873, "grad_norm": 95.54121398925781, "learning_rate": 1.0153029045643155e-05, "loss": 0.4803, "step": 29680 }, { "epoch": 24.631535269709545, "grad_norm": 190.00552368164062, "learning_rate": 1.0152697095435687e-05, "loss": 0.6064, "step": 29681 }, { "epoch": 24.632365145228217, "grad_norm": 24.31388282775879, "learning_rate": 1.0152365145228216e-05, "loss": 0.3995, "step": 29682 }, { "epoch": 24.63319502074689, "grad_norm": 58.692684173583984, "learning_rate": 1.0152033195020748e-05, "loss": 0.63, "step": 29683 }, { "epoch": 24.63402489626556, "grad_norm": 40.95592498779297, "learning_rate": 1.0151701244813278e-05, "loss": 0.4713, "step": 29684 }, { "epoch": 24.634854771784234, "grad_norm": 18.872957229614258, "learning_rate": 1.015136929460581e-05, "loss": 0.2488, "step": 29685 }, { "epoch": 24.635684647302906, "grad_norm": 22.984643936157227, "learning_rate": 1.015103734439834e-05, "loss": 0.2064, "step": 29686 }, { "epoch": 24.636514522821578, "grad_norm": 58.520606994628906, "learning_rate": 1.0150705394190871e-05, "loss": 0.3663, "step": 29687 }, { "epoch": 24.63734439834025, "grad_norm": 9.07871150970459, "learning_rate": 1.0150373443983403e-05, "loss": 0.1928, "step": 29688 }, { "epoch": 24.638174273858922, "grad_norm": 101.63036346435547, "learning_rate": 1.0150041493775935e-05, "loss": 0.8876, "step": 29689 }, { "epoch": 24.639004149377595, "grad_norm": 63.22736358642578, "learning_rate": 1.0149709543568464e-05, "loss": 1.2132, "step": 29690 }, { "epoch": 24.639834024896267, "grad_norm": 28.372194290161133, "learning_rate": 1.0149377593360996e-05, "loss": 0.2967, "step": 29691 }, { "epoch": 24.64066390041494, "grad_norm": 60.95387268066406, "learning_rate": 1.0149045643153528e-05, "loss": 0.5684, "step": 29692 }, { "epoch": 24.64149377593361, "grad_norm": 32.832908630371094, "learning_rate": 1.0148713692946059e-05, "loss": 0.4694, "step": 29693 }, { "epoch": 24.642323651452283, "grad_norm": 11.76799201965332, "learning_rate": 1.0148381742738591e-05, "loss": 0.2621, "step": 29694 }, { "epoch": 24.643153526970956, "grad_norm": 17.113922119140625, "learning_rate": 1.0148049792531121e-05, "loss": 0.2585, "step": 29695 }, { "epoch": 24.643983402489628, "grad_norm": 25.227270126342773, "learning_rate": 1.0147717842323652e-05, "loss": 0.4764, "step": 29696 }, { "epoch": 24.6448132780083, "grad_norm": 47.73503112792969, "learning_rate": 1.0147385892116184e-05, "loss": 0.6964, "step": 29697 }, { "epoch": 24.645643153526972, "grad_norm": 61.52064895629883, "learning_rate": 1.0147053941908716e-05, "loss": 0.7466, "step": 29698 }, { "epoch": 24.646473029045644, "grad_norm": 52.569034576416016, "learning_rate": 1.0146721991701245e-05, "loss": 0.2523, "step": 29699 }, { "epoch": 24.647302904564317, "grad_norm": 36.4296760559082, "learning_rate": 1.0146390041493777e-05, "loss": 0.7685, "step": 29700 }, { "epoch": 24.64813278008299, "grad_norm": 104.13616180419922, "learning_rate": 1.0146058091286309e-05, "loss": 0.526, "step": 29701 }, { "epoch": 24.64896265560166, "grad_norm": 37.433250427246094, "learning_rate": 1.014572614107884e-05, "loss": 0.5045, "step": 29702 }, { "epoch": 24.649792531120333, "grad_norm": 61.911067962646484, "learning_rate": 1.014539419087137e-05, "loss": 0.7874, "step": 29703 }, { "epoch": 24.650622406639005, "grad_norm": 12.979331016540527, "learning_rate": 1.0145062240663902e-05, "loss": 0.2244, "step": 29704 }, { "epoch": 24.651452282157678, "grad_norm": 13.929401397705078, "learning_rate": 1.0144730290456432e-05, "loss": 0.2544, "step": 29705 }, { "epoch": 24.65228215767635, "grad_norm": 18.40077018737793, "learning_rate": 1.0144398340248964e-05, "loss": 0.2848, "step": 29706 }, { "epoch": 24.653112033195022, "grad_norm": 19.420780181884766, "learning_rate": 1.0144066390041493e-05, "loss": 0.2443, "step": 29707 }, { "epoch": 24.653941908713694, "grad_norm": 50.73994064331055, "learning_rate": 1.0143734439834025e-05, "loss": 0.4973, "step": 29708 }, { "epoch": 24.654771784232366, "grad_norm": 19.186872482299805, "learning_rate": 1.0143402489626557e-05, "loss": 0.414, "step": 29709 }, { "epoch": 24.65560165975104, "grad_norm": 36.613502502441406, "learning_rate": 1.014307053941909e-05, "loss": 0.4778, "step": 29710 }, { "epoch": 24.65643153526971, "grad_norm": 118.09738159179688, "learning_rate": 1.0142738589211618e-05, "loss": 0.4407, "step": 29711 }, { "epoch": 24.657261410788383, "grad_norm": 49.05266189575195, "learning_rate": 1.014240663900415e-05, "loss": 0.3939, "step": 29712 }, { "epoch": 24.658091286307055, "grad_norm": 44.0567741394043, "learning_rate": 1.0142074688796682e-05, "loss": 0.511, "step": 29713 }, { "epoch": 24.658921161825727, "grad_norm": 12.348260879516602, "learning_rate": 1.0141742738589213e-05, "loss": 0.2241, "step": 29714 }, { "epoch": 24.6597510373444, "grad_norm": 76.92552947998047, "learning_rate": 1.0141410788381743e-05, "loss": 0.9107, "step": 29715 }, { "epoch": 24.66058091286307, "grad_norm": 34.12227249145508, "learning_rate": 1.0141078838174274e-05, "loss": 0.3524, "step": 29716 }, { "epoch": 24.661410788381744, "grad_norm": 45.286285400390625, "learning_rate": 1.0140746887966806e-05, "loss": 0.6366, "step": 29717 }, { "epoch": 24.662240663900416, "grad_norm": 41.45177459716797, "learning_rate": 1.0140414937759338e-05, "loss": 1.0597, "step": 29718 }, { "epoch": 24.66307053941909, "grad_norm": 62.72953796386719, "learning_rate": 1.014008298755187e-05, "loss": 0.8148, "step": 29719 }, { "epoch": 24.66390041493776, "grad_norm": 15.231568336486816, "learning_rate": 1.0139751037344399e-05, "loss": 0.3028, "step": 29720 }, { "epoch": 24.664730290456433, "grad_norm": 11.33016300201416, "learning_rate": 1.013941908713693e-05, "loss": 0.3056, "step": 29721 }, { "epoch": 24.665560165975105, "grad_norm": 41.33242416381836, "learning_rate": 1.0139087136929461e-05, "loss": 0.7019, "step": 29722 }, { "epoch": 24.666390041493777, "grad_norm": 49.31591796875, "learning_rate": 1.0138755186721993e-05, "loss": 0.7003, "step": 29723 }, { "epoch": 24.66721991701245, "grad_norm": 25.319406509399414, "learning_rate": 1.0138423236514524e-05, "loss": 0.6732, "step": 29724 }, { "epoch": 24.66804979253112, "grad_norm": 21.984838485717773, "learning_rate": 1.0138091286307054e-05, "loss": 0.3661, "step": 29725 }, { "epoch": 24.668879668049794, "grad_norm": 33.092979431152344, "learning_rate": 1.0137759336099586e-05, "loss": 0.269, "step": 29726 }, { "epoch": 24.669709543568466, "grad_norm": 18.358402252197266, "learning_rate": 1.0137427385892118e-05, "loss": 0.3854, "step": 29727 }, { "epoch": 24.670539419087138, "grad_norm": 58.39529037475586, "learning_rate": 1.0137095435684647e-05, "loss": 0.5814, "step": 29728 }, { "epoch": 24.67136929460581, "grad_norm": 38.90755081176758, "learning_rate": 1.0136763485477179e-05, "loss": 0.5611, "step": 29729 }, { "epoch": 24.672199170124482, "grad_norm": 122.59916687011719, "learning_rate": 1.0136431535269711e-05, "loss": 0.6602, "step": 29730 }, { "epoch": 24.673029045643155, "grad_norm": 19.838525772094727, "learning_rate": 1.0136099585062242e-05, "loss": 0.3562, "step": 29731 }, { "epoch": 24.673858921161827, "grad_norm": 39.10328674316406, "learning_rate": 1.0135767634854772e-05, "loss": 0.5981, "step": 29732 }, { "epoch": 24.6746887966805, "grad_norm": 42.780479431152344, "learning_rate": 1.0135435684647304e-05, "loss": 0.423, "step": 29733 }, { "epoch": 24.67551867219917, "grad_norm": 42.20641326904297, "learning_rate": 1.0135103734439835e-05, "loss": 0.8093, "step": 29734 }, { "epoch": 24.676348547717843, "grad_norm": 85.2676773071289, "learning_rate": 1.0134771784232367e-05, "loss": 1.0566, "step": 29735 }, { "epoch": 24.677178423236516, "grad_norm": 15.221427917480469, "learning_rate": 1.0134439834024895e-05, "loss": 0.2807, "step": 29736 }, { "epoch": 24.678008298755188, "grad_norm": 29.411563873291016, "learning_rate": 1.0134107883817427e-05, "loss": 0.5066, "step": 29737 }, { "epoch": 24.67883817427386, "grad_norm": 12.795074462890625, "learning_rate": 1.013377593360996e-05, "loss": 0.2739, "step": 29738 }, { "epoch": 24.679668049792532, "grad_norm": 31.098819732666016, "learning_rate": 1.0133443983402492e-05, "loss": 0.3505, "step": 29739 }, { "epoch": 24.680497925311204, "grad_norm": 45.538387298583984, "learning_rate": 1.013311203319502e-05, "loss": 0.4433, "step": 29740 }, { "epoch": 24.681327800829877, "grad_norm": 31.05735206604004, "learning_rate": 1.0132780082987552e-05, "loss": 0.4633, "step": 29741 }, { "epoch": 24.68215767634855, "grad_norm": 33.21820068359375, "learning_rate": 1.0132448132780085e-05, "loss": 0.5181, "step": 29742 }, { "epoch": 24.68298755186722, "grad_norm": 68.37236022949219, "learning_rate": 1.0132116182572615e-05, "loss": 0.631, "step": 29743 }, { "epoch": 24.683817427385893, "grad_norm": 19.671730041503906, "learning_rate": 1.0131784232365145e-05, "loss": 0.2915, "step": 29744 }, { "epoch": 24.684647302904565, "grad_norm": 9.567628860473633, "learning_rate": 1.0131452282157676e-05, "loss": 0.2334, "step": 29745 }, { "epoch": 24.685477178423238, "grad_norm": 21.404356002807617, "learning_rate": 1.0131120331950208e-05, "loss": 0.2213, "step": 29746 }, { "epoch": 24.68630705394191, "grad_norm": 58.449501037597656, "learning_rate": 1.013078838174274e-05, "loss": 0.8031, "step": 29747 }, { "epoch": 24.687136929460582, "grad_norm": 88.40367126464844, "learning_rate": 1.0130456431535272e-05, "loss": 1.0502, "step": 29748 }, { "epoch": 24.687966804979254, "grad_norm": 34.99409484863281, "learning_rate": 1.0130124481327801e-05, "loss": 0.4225, "step": 29749 }, { "epoch": 24.688796680497926, "grad_norm": 109.33627319335938, "learning_rate": 1.0129792531120333e-05, "loss": 0.4602, "step": 29750 }, { "epoch": 24.6896265560166, "grad_norm": 35.68037414550781, "learning_rate": 1.0129460580912865e-05, "loss": 0.3661, "step": 29751 }, { "epoch": 24.69045643153527, "grad_norm": 14.130305290222168, "learning_rate": 1.0129128630705396e-05, "loss": 0.4229, "step": 29752 }, { "epoch": 24.691286307053943, "grad_norm": 15.918549537658691, "learning_rate": 1.0128796680497926e-05, "loss": 0.245, "step": 29753 }, { "epoch": 24.692116182572615, "grad_norm": 35.220340728759766, "learning_rate": 1.0128464730290456e-05, "loss": 0.4303, "step": 29754 }, { "epoch": 24.692946058091287, "grad_norm": 85.1912841796875, "learning_rate": 1.0128132780082988e-05, "loss": 1.1252, "step": 29755 }, { "epoch": 24.69377593360996, "grad_norm": 19.92206573486328, "learning_rate": 1.012780082987552e-05, "loss": 0.3082, "step": 29756 }, { "epoch": 24.694605809128632, "grad_norm": 26.057395935058594, "learning_rate": 1.012746887966805e-05, "loss": 0.3014, "step": 29757 }, { "epoch": 24.695435684647304, "grad_norm": 29.69559669494629, "learning_rate": 1.0127136929460581e-05, "loss": 0.3657, "step": 29758 }, { "epoch": 24.696265560165976, "grad_norm": 71.36882019042969, "learning_rate": 1.0126804979253113e-05, "loss": 0.3786, "step": 29759 }, { "epoch": 24.69709543568465, "grad_norm": 32.97492980957031, "learning_rate": 1.0126473029045646e-05, "loss": 0.6103, "step": 29760 }, { "epoch": 24.69792531120332, "grad_norm": 15.565787315368652, "learning_rate": 1.0126141078838174e-05, "loss": 0.25, "step": 29761 }, { "epoch": 24.698755186721993, "grad_norm": 88.50902557373047, "learning_rate": 1.0125809128630706e-05, "loss": 0.8378, "step": 29762 }, { "epoch": 24.699585062240665, "grad_norm": 146.81231689453125, "learning_rate": 1.0125477178423237e-05, "loss": 0.6236, "step": 29763 }, { "epoch": 24.700414937759337, "grad_norm": 25.35987091064453, "learning_rate": 1.0125145228215769e-05, "loss": 0.3644, "step": 29764 }, { "epoch": 24.70124481327801, "grad_norm": 9.21705150604248, "learning_rate": 1.01248132780083e-05, "loss": 0.2535, "step": 29765 }, { "epoch": 24.70207468879668, "grad_norm": 34.355438232421875, "learning_rate": 1.012448132780083e-05, "loss": 0.3055, "step": 29766 }, { "epoch": 24.702904564315354, "grad_norm": 48.63287353515625, "learning_rate": 1.0124149377593362e-05, "loss": 0.9155, "step": 29767 }, { "epoch": 24.703734439834026, "grad_norm": 114.10157775878906, "learning_rate": 1.0123817427385894e-05, "loss": 0.7631, "step": 29768 }, { "epoch": 24.704564315352698, "grad_norm": 48.70670700073242, "learning_rate": 1.0123485477178423e-05, "loss": 0.485, "step": 29769 }, { "epoch": 24.70539419087137, "grad_norm": 58.790103912353516, "learning_rate": 1.0123153526970955e-05, "loss": 0.5218, "step": 29770 }, { "epoch": 24.706224066390043, "grad_norm": 26.587343215942383, "learning_rate": 1.0122821576763487e-05, "loss": 0.3929, "step": 29771 }, { "epoch": 24.707053941908715, "grad_norm": 69.86328125, "learning_rate": 1.0122489626556017e-05, "loss": 0.8466, "step": 29772 }, { "epoch": 24.707883817427387, "grad_norm": 140.3831329345703, "learning_rate": 1.012215767634855e-05, "loss": 0.6767, "step": 29773 }, { "epoch": 24.70871369294606, "grad_norm": 26.536590576171875, "learning_rate": 1.012182572614108e-05, "loss": 0.2899, "step": 29774 }, { "epoch": 24.70954356846473, "grad_norm": 20.488948822021484, "learning_rate": 1.012149377593361e-05, "loss": 0.3689, "step": 29775 }, { "epoch": 24.710373443983404, "grad_norm": 23.802724838256836, "learning_rate": 1.0121161825726142e-05, "loss": 0.282, "step": 29776 }, { "epoch": 24.711203319502076, "grad_norm": 65.10986328125, "learning_rate": 1.0120829875518674e-05, "loss": 0.3413, "step": 29777 }, { "epoch": 24.712033195020748, "grad_norm": 51.918495178222656, "learning_rate": 1.0120497925311203e-05, "loss": 0.9971, "step": 29778 }, { "epoch": 24.71286307053942, "grad_norm": 143.81300354003906, "learning_rate": 1.0120165975103735e-05, "loss": 0.4967, "step": 29779 }, { "epoch": 24.713692946058092, "grad_norm": 39.25100326538086, "learning_rate": 1.0119834024896267e-05, "loss": 0.3447, "step": 29780 }, { "epoch": 24.714522821576764, "grad_norm": 110.0602798461914, "learning_rate": 1.0119502074688798e-05, "loss": 0.7482, "step": 29781 }, { "epoch": 24.715352697095437, "grad_norm": 24.61634635925293, "learning_rate": 1.0119170124481328e-05, "loss": 0.3625, "step": 29782 }, { "epoch": 24.71618257261411, "grad_norm": 97.25870513916016, "learning_rate": 1.0118838174273859e-05, "loss": 0.8594, "step": 29783 }, { "epoch": 24.71701244813278, "grad_norm": 23.779987335205078, "learning_rate": 1.011850622406639e-05, "loss": 0.2471, "step": 29784 }, { "epoch": 24.717842323651453, "grad_norm": 17.89961051940918, "learning_rate": 1.0118174273858923e-05, "loss": 0.281, "step": 29785 }, { "epoch": 24.718672199170125, "grad_norm": 24.714075088500977, "learning_rate": 1.0117842323651452e-05, "loss": 0.6185, "step": 29786 }, { "epoch": 24.719502074688798, "grad_norm": 19.026092529296875, "learning_rate": 1.0117510373443984e-05, "loss": 0.2925, "step": 29787 }, { "epoch": 24.72033195020747, "grad_norm": 37.63215255737305, "learning_rate": 1.0117178423236516e-05, "loss": 0.2698, "step": 29788 }, { "epoch": 24.721161825726142, "grad_norm": 72.62701416015625, "learning_rate": 1.0116846473029048e-05, "loss": 1.0365, "step": 29789 }, { "epoch": 24.721991701244814, "grad_norm": 35.52559280395508, "learning_rate": 1.0116514522821577e-05, "loss": 0.4566, "step": 29790 }, { "epoch": 24.722821576763486, "grad_norm": 15.914318084716797, "learning_rate": 1.0116182572614109e-05, "loss": 0.3175, "step": 29791 }, { "epoch": 24.72365145228216, "grad_norm": 19.65308380126953, "learning_rate": 1.011585062240664e-05, "loss": 0.3882, "step": 29792 }, { "epoch": 24.72448132780083, "grad_norm": 33.36296463012695, "learning_rate": 1.0115518672199171e-05, "loss": 0.5369, "step": 29793 }, { "epoch": 24.725311203319503, "grad_norm": 10.958285331726074, "learning_rate": 1.0115186721991702e-05, "loss": 0.2999, "step": 29794 }, { "epoch": 24.726141078838175, "grad_norm": 41.79685592651367, "learning_rate": 1.0114854771784232e-05, "loss": 0.6288, "step": 29795 }, { "epoch": 24.726970954356847, "grad_norm": 76.65272521972656, "learning_rate": 1.0114522821576764e-05, "loss": 0.7682, "step": 29796 }, { "epoch": 24.72780082987552, "grad_norm": 24.954910278320312, "learning_rate": 1.0114190871369296e-05, "loss": 0.3926, "step": 29797 }, { "epoch": 24.728630705394192, "grad_norm": 22.88335609436035, "learning_rate": 1.0113858921161828e-05, "loss": 0.3501, "step": 29798 }, { "epoch": 24.729460580912864, "grad_norm": 78.46527099609375, "learning_rate": 1.0113526970954357e-05, "loss": 0.7533, "step": 29799 }, { "epoch": 24.730290456431536, "grad_norm": 23.402236938476562, "learning_rate": 1.011319502074689e-05, "loss": 0.2936, "step": 29800 }, { "epoch": 24.73112033195021, "grad_norm": 51.38286590576172, "learning_rate": 1.011286307053942e-05, "loss": 0.2951, "step": 29801 }, { "epoch": 24.73195020746888, "grad_norm": 11.393288612365723, "learning_rate": 1.0112531120331952e-05, "loss": 0.2197, "step": 29802 }, { "epoch": 24.732780082987553, "grad_norm": 89.11702728271484, "learning_rate": 1.0112199170124482e-05, "loss": 0.3713, "step": 29803 }, { "epoch": 24.733609958506225, "grad_norm": 76.13772583007812, "learning_rate": 1.0111867219917013e-05, "loss": 0.5073, "step": 29804 }, { "epoch": 24.734439834024897, "grad_norm": 74.34931945800781, "learning_rate": 1.0111535269709545e-05, "loss": 0.6508, "step": 29805 }, { "epoch": 24.73526970954357, "grad_norm": 37.117698669433594, "learning_rate": 1.0111203319502077e-05, "loss": 0.3266, "step": 29806 }, { "epoch": 24.73609958506224, "grad_norm": 101.55233764648438, "learning_rate": 1.0110871369294606e-05, "loss": 0.8253, "step": 29807 }, { "epoch": 24.736929460580914, "grad_norm": 54.731727600097656, "learning_rate": 1.0110539419087138e-05, "loss": 1.0142, "step": 29808 }, { "epoch": 24.737759336099586, "grad_norm": 81.75320434570312, "learning_rate": 1.011020746887967e-05, "loss": 0.7171, "step": 29809 }, { "epoch": 24.738589211618258, "grad_norm": 56.80038070678711, "learning_rate": 1.01098755186722e-05, "loss": 0.4998, "step": 29810 }, { "epoch": 24.73941908713693, "grad_norm": 38.87818145751953, "learning_rate": 1.010954356846473e-05, "loss": 0.5759, "step": 29811 }, { "epoch": 24.740248962655603, "grad_norm": 25.588117599487305, "learning_rate": 1.0109211618257263e-05, "loss": 0.2991, "step": 29812 }, { "epoch": 24.741078838174275, "grad_norm": 58.76685333251953, "learning_rate": 1.0108879668049793e-05, "loss": 0.7417, "step": 29813 }, { "epoch": 24.741908713692947, "grad_norm": 24.12584686279297, "learning_rate": 1.0108547717842325e-05, "loss": 0.4563, "step": 29814 }, { "epoch": 24.74273858921162, "grad_norm": 29.002750396728516, "learning_rate": 1.0108215767634854e-05, "loss": 0.6218, "step": 29815 }, { "epoch": 24.74356846473029, "grad_norm": 27.98679542541504, "learning_rate": 1.0107883817427386e-05, "loss": 0.3317, "step": 29816 }, { "epoch": 24.744398340248964, "grad_norm": 14.668451309204102, "learning_rate": 1.0107551867219918e-05, "loss": 0.2444, "step": 29817 }, { "epoch": 24.745228215767636, "grad_norm": 53.7201042175293, "learning_rate": 1.010721991701245e-05, "loss": 0.6304, "step": 29818 }, { "epoch": 24.746058091286308, "grad_norm": 23.624414443969727, "learning_rate": 1.0106887966804979e-05, "loss": 0.4183, "step": 29819 }, { "epoch": 24.74688796680498, "grad_norm": 80.74622344970703, "learning_rate": 1.0106556016597511e-05, "loss": 0.4262, "step": 29820 }, { "epoch": 24.747717842323652, "grad_norm": 170.94837951660156, "learning_rate": 1.0106224066390043e-05, "loss": 1.8941, "step": 29821 }, { "epoch": 24.748547717842325, "grad_norm": 74.09191131591797, "learning_rate": 1.0105892116182574e-05, "loss": 0.7071, "step": 29822 }, { "epoch": 24.749377593360997, "grad_norm": 62.19439697265625, "learning_rate": 1.0105560165975104e-05, "loss": 0.6136, "step": 29823 }, { "epoch": 24.75020746887967, "grad_norm": 86.99563598632812, "learning_rate": 1.0105228215767634e-05, "loss": 0.7387, "step": 29824 }, { "epoch": 24.75103734439834, "grad_norm": 18.017290115356445, "learning_rate": 1.0104896265560167e-05, "loss": 0.4222, "step": 29825 }, { "epoch": 24.751867219917013, "grad_norm": 25.165639877319336, "learning_rate": 1.0104564315352699e-05, "loss": 0.2919, "step": 29826 }, { "epoch": 24.752697095435686, "grad_norm": 23.120800018310547, "learning_rate": 1.010423236514523e-05, "loss": 0.3585, "step": 29827 }, { "epoch": 24.753526970954358, "grad_norm": 11.531076431274414, "learning_rate": 1.010390041493776e-05, "loss": 0.2719, "step": 29828 }, { "epoch": 24.75435684647303, "grad_norm": 29.15733528137207, "learning_rate": 1.0103568464730292e-05, "loss": 0.4813, "step": 29829 }, { "epoch": 24.755186721991702, "grad_norm": 48.50836944580078, "learning_rate": 1.0103236514522824e-05, "loss": 0.6171, "step": 29830 }, { "epoch": 24.756016597510374, "grad_norm": 55.656036376953125, "learning_rate": 1.0102904564315354e-05, "loss": 0.8136, "step": 29831 }, { "epoch": 24.756846473029047, "grad_norm": 57.6859016418457, "learning_rate": 1.0102572614107884e-05, "loss": 0.6853, "step": 29832 }, { "epoch": 24.75767634854772, "grad_norm": 26.160253524780273, "learning_rate": 1.0102240663900415e-05, "loss": 0.3209, "step": 29833 }, { "epoch": 24.75850622406639, "grad_norm": 28.459930419921875, "learning_rate": 1.0101908713692947e-05, "loss": 0.2799, "step": 29834 }, { "epoch": 24.759336099585063, "grad_norm": 18.368623733520508, "learning_rate": 1.0101576763485479e-05, "loss": 0.3252, "step": 29835 }, { "epoch": 24.760165975103735, "grad_norm": 13.036918640136719, "learning_rate": 1.0101244813278008e-05, "loss": 0.2695, "step": 29836 }, { "epoch": 24.760995850622407, "grad_norm": 53.05314636230469, "learning_rate": 1.010091286307054e-05, "loss": 0.6195, "step": 29837 }, { "epoch": 24.76182572614108, "grad_norm": 50.7888298034668, "learning_rate": 1.0100580912863072e-05, "loss": 0.7677, "step": 29838 }, { "epoch": 24.762655601659752, "grad_norm": 37.1356086730957, "learning_rate": 1.0100248962655602e-05, "loss": 0.5413, "step": 29839 }, { "epoch": 24.763485477178424, "grad_norm": 69.83379364013672, "learning_rate": 1.0099917012448133e-05, "loss": 0.7876, "step": 29840 }, { "epoch": 24.764315352697096, "grad_norm": 25.567480087280273, "learning_rate": 1.0099585062240665e-05, "loss": 0.4193, "step": 29841 }, { "epoch": 24.76514522821577, "grad_norm": 18.765392303466797, "learning_rate": 1.0099253112033195e-05, "loss": 0.2673, "step": 29842 }, { "epoch": 24.76597510373444, "grad_norm": 96.90048217773438, "learning_rate": 1.0098921161825728e-05, "loss": 0.5767, "step": 29843 }, { "epoch": 24.766804979253113, "grad_norm": 63.2583122253418, "learning_rate": 1.0098589211618258e-05, "loss": 0.8821, "step": 29844 }, { "epoch": 24.767634854771785, "grad_norm": 31.49423599243164, "learning_rate": 1.0098257261410788e-05, "loss": 0.4812, "step": 29845 }, { "epoch": 24.768464730290457, "grad_norm": 10.667871475219727, "learning_rate": 1.009792531120332e-05, "loss": 0.3078, "step": 29846 }, { "epoch": 24.76929460580913, "grad_norm": 32.07032012939453, "learning_rate": 1.0097593360995853e-05, "loss": 0.6386, "step": 29847 }, { "epoch": 24.7701244813278, "grad_norm": 59.719093322753906, "learning_rate": 1.0097261410788381e-05, "loss": 0.4543, "step": 29848 }, { "epoch": 24.770954356846474, "grad_norm": 53.263790130615234, "learning_rate": 1.0096929460580913e-05, "loss": 0.5082, "step": 29849 }, { "epoch": 24.771784232365146, "grad_norm": 81.00688171386719, "learning_rate": 1.0096597510373445e-05, "loss": 0.6535, "step": 29850 }, { "epoch": 24.77261410788382, "grad_norm": 25.979019165039062, "learning_rate": 1.0096265560165976e-05, "loss": 0.4159, "step": 29851 }, { "epoch": 24.77344398340249, "grad_norm": 25.583818435668945, "learning_rate": 1.0095933609958508e-05, "loss": 0.2138, "step": 29852 }, { "epoch": 24.774273858921163, "grad_norm": 76.38148498535156, "learning_rate": 1.0095601659751037e-05, "loss": 0.4396, "step": 29853 }, { "epoch": 24.775103734439835, "grad_norm": 13.856512069702148, "learning_rate": 1.0095269709543569e-05, "loss": 0.2523, "step": 29854 }, { "epoch": 24.775933609958507, "grad_norm": 14.840944290161133, "learning_rate": 1.0094937759336101e-05, "loss": 0.4578, "step": 29855 }, { "epoch": 24.77676348547718, "grad_norm": 29.336679458618164, "learning_rate": 1.0094605809128633e-05, "loss": 0.3696, "step": 29856 }, { "epoch": 24.77759336099585, "grad_norm": 55.59708786010742, "learning_rate": 1.0094273858921162e-05, "loss": 0.6284, "step": 29857 }, { "epoch": 24.778423236514524, "grad_norm": 21.873502731323242, "learning_rate": 1.0093941908713694e-05, "loss": 0.3859, "step": 29858 }, { "epoch": 24.779253112033196, "grad_norm": 30.947956085205078, "learning_rate": 1.0093609958506226e-05, "loss": 0.2753, "step": 29859 }, { "epoch": 24.780082987551868, "grad_norm": 61.178775787353516, "learning_rate": 1.0093278008298756e-05, "loss": 0.3526, "step": 29860 }, { "epoch": 24.78091286307054, "grad_norm": 53.25162887573242, "learning_rate": 1.0092946058091287e-05, "loss": 0.5634, "step": 29861 }, { "epoch": 24.781742738589212, "grad_norm": 33.331451416015625, "learning_rate": 1.0092614107883817e-05, "loss": 0.399, "step": 29862 }, { "epoch": 24.782572614107885, "grad_norm": 33.566829681396484, "learning_rate": 1.009228215767635e-05, "loss": 0.5124, "step": 29863 }, { "epoch": 24.783402489626557, "grad_norm": 48.68564224243164, "learning_rate": 1.0091950207468881e-05, "loss": 0.4535, "step": 29864 }, { "epoch": 24.78423236514523, "grad_norm": 39.72431182861328, "learning_rate": 1.009161825726141e-05, "loss": 0.3492, "step": 29865 }, { "epoch": 24.7850622406639, "grad_norm": 25.901634216308594, "learning_rate": 1.0091286307053942e-05, "loss": 0.4327, "step": 29866 }, { "epoch": 24.785892116182573, "grad_norm": 83.77841186523438, "learning_rate": 1.0090954356846474e-05, "loss": 0.3906, "step": 29867 }, { "epoch": 24.786721991701246, "grad_norm": 88.24906921386719, "learning_rate": 1.0090622406639006e-05, "loss": 0.7305, "step": 29868 }, { "epoch": 24.787551867219918, "grad_norm": 11.797333717346191, "learning_rate": 1.0090290456431535e-05, "loss": 0.2624, "step": 29869 }, { "epoch": 24.78838174273859, "grad_norm": 17.77037239074707, "learning_rate": 1.0089958506224067e-05, "loss": 0.2824, "step": 29870 }, { "epoch": 24.789211618257262, "grad_norm": 31.20946502685547, "learning_rate": 1.0089626556016598e-05, "loss": 0.3309, "step": 29871 }, { "epoch": 24.790041493775934, "grad_norm": 36.71151351928711, "learning_rate": 1.008929460580913e-05, "loss": 0.3126, "step": 29872 }, { "epoch": 24.790871369294607, "grad_norm": 67.78643798828125, "learning_rate": 1.008896265560166e-05, "loss": 0.5677, "step": 29873 }, { "epoch": 24.79170124481328, "grad_norm": 47.84162902832031, "learning_rate": 1.008863070539419e-05, "loss": 0.6642, "step": 29874 }, { "epoch": 24.79253112033195, "grad_norm": 51.20436096191406, "learning_rate": 1.0088298755186723e-05, "loss": 0.6154, "step": 29875 }, { "epoch": 24.793360995850623, "grad_norm": 19.7888240814209, "learning_rate": 1.0087966804979255e-05, "loss": 0.2324, "step": 29876 }, { "epoch": 24.794190871369295, "grad_norm": 28.17568016052246, "learning_rate": 1.0087634854771787e-05, "loss": 0.5127, "step": 29877 }, { "epoch": 24.795020746887968, "grad_norm": 10.227570533752441, "learning_rate": 1.0087302904564316e-05, "loss": 0.3145, "step": 29878 }, { "epoch": 24.79585062240664, "grad_norm": 10.351804733276367, "learning_rate": 1.0086970954356848e-05, "loss": 0.2499, "step": 29879 }, { "epoch": 24.796680497925312, "grad_norm": 50.641632080078125, "learning_rate": 1.0086639004149378e-05, "loss": 0.422, "step": 29880 }, { "epoch": 24.797510373443984, "grad_norm": 35.95967102050781, "learning_rate": 1.008630705394191e-05, "loss": 0.3137, "step": 29881 }, { "epoch": 24.798340248962656, "grad_norm": 24.06287956237793, "learning_rate": 1.008597510373444e-05, "loss": 0.2093, "step": 29882 }, { "epoch": 24.79917012448133, "grad_norm": 34.47779846191406, "learning_rate": 1.0085643153526971e-05, "loss": 0.3737, "step": 29883 }, { "epoch": 24.8, "grad_norm": 24.051362991333008, "learning_rate": 1.0085311203319503e-05, "loss": 0.3296, "step": 29884 }, { "epoch": 24.800829875518673, "grad_norm": 20.41474723815918, "learning_rate": 1.0084979253112035e-05, "loss": 0.2748, "step": 29885 }, { "epoch": 24.801659751037345, "grad_norm": 30.365755081176758, "learning_rate": 1.0084647302904564e-05, "loss": 0.3501, "step": 29886 }, { "epoch": 24.802489626556017, "grad_norm": 45.02622604370117, "learning_rate": 1.0084315352697096e-05, "loss": 0.5194, "step": 29887 }, { "epoch": 24.80331950207469, "grad_norm": 47.26333236694336, "learning_rate": 1.0083983402489628e-05, "loss": 0.3109, "step": 29888 }, { "epoch": 24.80414937759336, "grad_norm": 39.92514419555664, "learning_rate": 1.0083651452282159e-05, "loss": 0.5974, "step": 29889 }, { "epoch": 24.804979253112034, "grad_norm": 19.864465713500977, "learning_rate": 1.0083319502074689e-05, "loss": 0.3376, "step": 29890 }, { "epoch": 24.805809128630706, "grad_norm": 51.645931243896484, "learning_rate": 1.0082987551867221e-05, "loss": 0.2432, "step": 29891 }, { "epoch": 24.80663900414938, "grad_norm": 97.89018249511719, "learning_rate": 1.0082655601659752e-05, "loss": 0.6133, "step": 29892 }, { "epoch": 24.80746887966805, "grad_norm": 43.43604278564453, "learning_rate": 1.0082323651452284e-05, "loss": 0.7987, "step": 29893 }, { "epoch": 24.808298755186723, "grad_norm": 23.409208297729492, "learning_rate": 1.0081991701244812e-05, "loss": 0.3492, "step": 29894 }, { "epoch": 24.809128630705395, "grad_norm": 26.13093376159668, "learning_rate": 1.0081659751037345e-05, "loss": 0.4283, "step": 29895 }, { "epoch": 24.809958506224067, "grad_norm": 99.23326873779297, "learning_rate": 1.0081327800829877e-05, "loss": 0.4139, "step": 29896 }, { "epoch": 24.81078838174274, "grad_norm": 147.8282470703125, "learning_rate": 1.0080995850622409e-05, "loss": 0.5528, "step": 29897 }, { "epoch": 24.81161825726141, "grad_norm": 14.466923713684082, "learning_rate": 1.0080663900414938e-05, "loss": 0.3417, "step": 29898 }, { "epoch": 24.812448132780084, "grad_norm": 119.42160034179688, "learning_rate": 1.008033195020747e-05, "loss": 0.359, "step": 29899 }, { "epoch": 24.813278008298756, "grad_norm": 169.97607421875, "learning_rate": 1.008e-05, "loss": 0.5914, "step": 29900 }, { "epoch": 24.814107883817428, "grad_norm": 13.544194221496582, "learning_rate": 1.0079668049792532e-05, "loss": 0.3403, "step": 29901 }, { "epoch": 24.8149377593361, "grad_norm": 39.936317443847656, "learning_rate": 1.0079336099585063e-05, "loss": 0.5092, "step": 29902 }, { "epoch": 24.815767634854772, "grad_norm": 29.338184356689453, "learning_rate": 1.0079004149377593e-05, "loss": 0.4384, "step": 29903 }, { "epoch": 24.816597510373445, "grad_norm": 50.68689727783203, "learning_rate": 1.0078672199170125e-05, "loss": 0.377, "step": 29904 }, { "epoch": 24.817427385892117, "grad_norm": 48.50374984741211, "learning_rate": 1.0078340248962657e-05, "loss": 0.3412, "step": 29905 }, { "epoch": 24.81825726141079, "grad_norm": 35.7901496887207, "learning_rate": 1.007800829875519e-05, "loss": 0.5316, "step": 29906 }, { "epoch": 24.81908713692946, "grad_norm": 44.71971893310547, "learning_rate": 1.0077676348547718e-05, "loss": 0.4877, "step": 29907 }, { "epoch": 24.819917012448133, "grad_norm": 43.50142288208008, "learning_rate": 1.007734439834025e-05, "loss": 0.6544, "step": 29908 }, { "epoch": 24.820746887966806, "grad_norm": 56.67888259887695, "learning_rate": 1.007701244813278e-05, "loss": 0.5196, "step": 29909 }, { "epoch": 24.821576763485478, "grad_norm": 64.46318817138672, "learning_rate": 1.0076680497925313e-05, "loss": 0.5534, "step": 29910 }, { "epoch": 24.82240663900415, "grad_norm": 24.222043991088867, "learning_rate": 1.0076348547717843e-05, "loss": 0.3838, "step": 29911 }, { "epoch": 24.823236514522822, "grad_norm": 43.805580139160156, "learning_rate": 1.0076016597510373e-05, "loss": 0.525, "step": 29912 }, { "epoch": 24.824066390041494, "grad_norm": 25.918006896972656, "learning_rate": 1.0075684647302906e-05, "loss": 0.4055, "step": 29913 }, { "epoch": 24.824896265560167, "grad_norm": 56.3419303894043, "learning_rate": 1.0075352697095438e-05, "loss": 0.7073, "step": 29914 }, { "epoch": 24.82572614107884, "grad_norm": 75.5934066772461, "learning_rate": 1.0075020746887966e-05, "loss": 0.4476, "step": 29915 }, { "epoch": 24.82655601659751, "grad_norm": 51.5816764831543, "learning_rate": 1.0074688796680499e-05, "loss": 0.5426, "step": 29916 }, { "epoch": 24.827385892116183, "grad_norm": 30.03826141357422, "learning_rate": 1.007435684647303e-05, "loss": 0.3203, "step": 29917 }, { "epoch": 24.828215767634855, "grad_norm": 10.498743057250977, "learning_rate": 1.0074024896265561e-05, "loss": 0.2316, "step": 29918 }, { "epoch": 24.829045643153528, "grad_norm": 27.32986068725586, "learning_rate": 1.0073692946058091e-05, "loss": 0.3669, "step": 29919 }, { "epoch": 24.8298755186722, "grad_norm": 33.18516159057617, "learning_rate": 1.0073360995850624e-05, "loss": 0.5301, "step": 29920 }, { "epoch": 24.830705394190872, "grad_norm": 116.50535583496094, "learning_rate": 1.0073029045643154e-05, "loss": 1.1051, "step": 29921 }, { "epoch": 24.831535269709544, "grad_norm": 44.06476974487305, "learning_rate": 1.0072697095435686e-05, "loss": 0.4689, "step": 29922 }, { "epoch": 24.832365145228216, "grad_norm": 155.6912841796875, "learning_rate": 1.0072365145228215e-05, "loss": 1.1286, "step": 29923 }, { "epoch": 24.83319502074689, "grad_norm": 36.129112243652344, "learning_rate": 1.0072033195020747e-05, "loss": 0.5562, "step": 29924 }, { "epoch": 24.83402489626556, "grad_norm": 72.6073989868164, "learning_rate": 1.0071701244813279e-05, "loss": 0.5472, "step": 29925 }, { "epoch": 24.834854771784233, "grad_norm": 40.589324951171875, "learning_rate": 1.0071369294605811e-05, "loss": 0.3052, "step": 29926 }, { "epoch": 24.835684647302905, "grad_norm": 123.30042266845703, "learning_rate": 1.007103734439834e-05, "loss": 0.3832, "step": 29927 }, { "epoch": 24.836514522821577, "grad_norm": 27.714920043945312, "learning_rate": 1.0070705394190872e-05, "loss": 0.5648, "step": 29928 }, { "epoch": 24.83734439834025, "grad_norm": 112.54155731201172, "learning_rate": 1.0070373443983404e-05, "loss": 0.6905, "step": 29929 }, { "epoch": 24.83817427385892, "grad_norm": 132.8011932373047, "learning_rate": 1.0070041493775934e-05, "loss": 0.4689, "step": 29930 }, { "epoch": 24.839004149377594, "grad_norm": 64.64674377441406, "learning_rate": 1.0069709543568467e-05, "loss": 0.8015, "step": 29931 }, { "epoch": 24.839834024896266, "grad_norm": 42.23360061645508, "learning_rate": 1.0069377593360995e-05, "loss": 0.3384, "step": 29932 }, { "epoch": 24.84066390041494, "grad_norm": 77.53955841064453, "learning_rate": 1.0069045643153527e-05, "loss": 0.3644, "step": 29933 }, { "epoch": 24.84149377593361, "grad_norm": 80.85653686523438, "learning_rate": 1.006871369294606e-05, "loss": 0.6271, "step": 29934 }, { "epoch": 24.842323651452283, "grad_norm": 79.250244140625, "learning_rate": 1.0068381742738592e-05, "loss": 0.4366, "step": 29935 }, { "epoch": 24.843153526970955, "grad_norm": 22.346176147460938, "learning_rate": 1.006804979253112e-05, "loss": 0.2725, "step": 29936 }, { "epoch": 24.843983402489627, "grad_norm": 36.987632751464844, "learning_rate": 1.0067717842323652e-05, "loss": 0.5119, "step": 29937 }, { "epoch": 24.8448132780083, "grad_norm": 45.044864654541016, "learning_rate": 1.0067385892116185e-05, "loss": 0.3598, "step": 29938 }, { "epoch": 24.84564315352697, "grad_norm": 65.1939926147461, "learning_rate": 1.0067053941908715e-05, "loss": 1.0503, "step": 29939 }, { "epoch": 24.846473029045644, "grad_norm": 50.84165573120117, "learning_rate": 1.0066721991701245e-05, "loss": 0.3623, "step": 29940 }, { "epoch": 24.847302904564316, "grad_norm": 33.172935485839844, "learning_rate": 1.0066390041493776e-05, "loss": 0.5276, "step": 29941 }, { "epoch": 24.848132780082988, "grad_norm": 19.39116668701172, "learning_rate": 1.0066058091286308e-05, "loss": 0.3097, "step": 29942 }, { "epoch": 24.84896265560166, "grad_norm": 21.353294372558594, "learning_rate": 1.006572614107884e-05, "loss": 0.2691, "step": 29943 }, { "epoch": 24.849792531120332, "grad_norm": 18.365001678466797, "learning_rate": 1.0065394190871369e-05, "loss": 0.2615, "step": 29944 }, { "epoch": 24.850622406639005, "grad_norm": 51.8045539855957, "learning_rate": 1.0065062240663901e-05, "loss": 0.5743, "step": 29945 }, { "epoch": 24.851452282157677, "grad_norm": 26.82406997680664, "learning_rate": 1.0064730290456433e-05, "loss": 0.3899, "step": 29946 }, { "epoch": 24.85228215767635, "grad_norm": 27.116365432739258, "learning_rate": 1.0064398340248965e-05, "loss": 0.4864, "step": 29947 }, { "epoch": 24.85311203319502, "grad_norm": 80.53800201416016, "learning_rate": 1.0064066390041494e-05, "loss": 0.5457, "step": 29948 }, { "epoch": 24.853941908713693, "grad_norm": 51.14010238647461, "learning_rate": 1.0063734439834026e-05, "loss": 0.4881, "step": 29949 }, { "epoch": 24.854771784232366, "grad_norm": 59.88478469848633, "learning_rate": 1.0063402489626556e-05, "loss": 0.316, "step": 29950 }, { "epoch": 24.855601659751038, "grad_norm": 54.64695739746094, "learning_rate": 1.0063070539419088e-05, "loss": 0.5095, "step": 29951 }, { "epoch": 24.85643153526971, "grad_norm": 13.839530944824219, "learning_rate": 1.0062738589211619e-05, "loss": 0.2493, "step": 29952 }, { "epoch": 24.857261410788382, "grad_norm": 26.105810165405273, "learning_rate": 1.006240663900415e-05, "loss": 0.2918, "step": 29953 }, { "epoch": 24.858091286307054, "grad_norm": 21.881729125976562, "learning_rate": 1.0062074688796681e-05, "loss": 0.448, "step": 29954 }, { "epoch": 24.858921161825727, "grad_norm": 51.25105285644531, "learning_rate": 1.0061742738589213e-05, "loss": 0.3292, "step": 29955 }, { "epoch": 24.8597510373444, "grad_norm": 26.810346603393555, "learning_rate": 1.0061410788381742e-05, "loss": 0.2276, "step": 29956 }, { "epoch": 24.86058091286307, "grad_norm": 23.365676879882812, "learning_rate": 1.0061078838174274e-05, "loss": 0.2665, "step": 29957 }, { "epoch": 24.861410788381743, "grad_norm": 80.52296447753906, "learning_rate": 1.0060746887966806e-05, "loss": 0.4289, "step": 29958 }, { "epoch": 24.862240663900415, "grad_norm": 49.321311950683594, "learning_rate": 1.0060414937759337e-05, "loss": 1.2308, "step": 29959 }, { "epoch": 24.863070539419088, "grad_norm": 50.64446258544922, "learning_rate": 1.0060082987551869e-05, "loss": 0.5164, "step": 29960 }, { "epoch": 24.86390041493776, "grad_norm": 79.80158233642578, "learning_rate": 1.00597510373444e-05, "loss": 0.9147, "step": 29961 }, { "epoch": 24.864730290456432, "grad_norm": 68.36514282226562, "learning_rate": 1.005941908713693e-05, "loss": 0.6049, "step": 29962 }, { "epoch": 24.865560165975104, "grad_norm": 45.54256057739258, "learning_rate": 1.0059087136929462e-05, "loss": 0.4048, "step": 29963 }, { "epoch": 24.866390041493776, "grad_norm": 33.14835739135742, "learning_rate": 1.0058755186721994e-05, "loss": 0.5219, "step": 29964 }, { "epoch": 24.86721991701245, "grad_norm": 39.04981994628906, "learning_rate": 1.0058423236514523e-05, "loss": 0.2993, "step": 29965 }, { "epoch": 24.86804979253112, "grad_norm": 57.11409378051758, "learning_rate": 1.0058091286307055e-05, "loss": 0.7651, "step": 29966 }, { "epoch": 24.868879668049793, "grad_norm": 47.5802001953125, "learning_rate": 1.0057759336099587e-05, "loss": 0.5952, "step": 29967 }, { "epoch": 24.869709543568465, "grad_norm": 31.818775177001953, "learning_rate": 1.0057427385892117e-05, "loss": 0.6393, "step": 29968 }, { "epoch": 24.870539419087137, "grad_norm": 87.59078979492188, "learning_rate": 1.0057095435684648e-05, "loss": 0.4969, "step": 29969 }, { "epoch": 24.87136929460581, "grad_norm": 17.74541473388672, "learning_rate": 1.0056763485477178e-05, "loss": 0.316, "step": 29970 }, { "epoch": 24.872199170124482, "grad_norm": 8.343498229980469, "learning_rate": 1.005643153526971e-05, "loss": 0.2307, "step": 29971 }, { "epoch": 24.873029045643154, "grad_norm": 53.07183837890625, "learning_rate": 1.0056099585062242e-05, "loss": 1.0319, "step": 29972 }, { "epoch": 24.873858921161826, "grad_norm": 42.013511657714844, "learning_rate": 1.0055767634854771e-05, "loss": 0.5615, "step": 29973 }, { "epoch": 24.8746887966805, "grad_norm": 41.90845489501953, "learning_rate": 1.0055435684647303e-05, "loss": 1.0455, "step": 29974 }, { "epoch": 24.87551867219917, "grad_norm": 50.598670959472656, "learning_rate": 1.0055103734439835e-05, "loss": 0.3414, "step": 29975 }, { "epoch": 24.876348547717843, "grad_norm": 47.83329391479492, "learning_rate": 1.0054771784232367e-05, "loss": 0.8567, "step": 29976 }, { "epoch": 24.877178423236515, "grad_norm": 48.70429611206055, "learning_rate": 1.0054439834024896e-05, "loss": 0.7898, "step": 29977 }, { "epoch": 24.878008298755187, "grad_norm": 28.568696975708008, "learning_rate": 1.0054107883817428e-05, "loss": 0.2931, "step": 29978 }, { "epoch": 24.87883817427386, "grad_norm": 31.378450393676758, "learning_rate": 1.0053775933609959e-05, "loss": 0.5991, "step": 29979 }, { "epoch": 24.87966804979253, "grad_norm": 57.637908935546875, "learning_rate": 1.005344398340249e-05, "loss": 0.4428, "step": 29980 }, { "epoch": 24.880497925311204, "grad_norm": 24.711746215820312, "learning_rate": 1.0053112033195021e-05, "loss": 0.3659, "step": 29981 }, { "epoch": 24.881327800829876, "grad_norm": 46.72895431518555, "learning_rate": 1.0052780082987552e-05, "loss": 0.5758, "step": 29982 }, { "epoch": 24.882157676348548, "grad_norm": 55.25111389160156, "learning_rate": 1.0052448132780084e-05, "loss": 0.409, "step": 29983 }, { "epoch": 24.88298755186722, "grad_norm": 79.71065521240234, "learning_rate": 1.0052116182572616e-05, "loss": 0.5005, "step": 29984 }, { "epoch": 24.883817427385893, "grad_norm": 8.703709602355957, "learning_rate": 1.0051784232365148e-05, "loss": 0.2269, "step": 29985 }, { "epoch": 24.884647302904565, "grad_norm": 13.581185340881348, "learning_rate": 1.0051452282157677e-05, "loss": 0.2572, "step": 29986 }, { "epoch": 24.885477178423237, "grad_norm": 60.60272216796875, "learning_rate": 1.0051120331950209e-05, "loss": 0.877, "step": 29987 }, { "epoch": 24.88630705394191, "grad_norm": 45.62736129760742, "learning_rate": 1.0050788381742739e-05, "loss": 0.5091, "step": 29988 }, { "epoch": 24.88713692946058, "grad_norm": 16.571470260620117, "learning_rate": 1.0050456431535271e-05, "loss": 0.2302, "step": 29989 }, { "epoch": 24.887966804979254, "grad_norm": 45.696109771728516, "learning_rate": 1.0050124481327802e-05, "loss": 0.7571, "step": 29990 }, { "epoch": 24.888796680497926, "grad_norm": 35.71884536743164, "learning_rate": 1.0049792531120332e-05, "loss": 0.4098, "step": 29991 }, { "epoch": 24.889626556016598, "grad_norm": 16.30367088317871, "learning_rate": 1.0049460580912864e-05, "loss": 0.2781, "step": 29992 }, { "epoch": 24.89045643153527, "grad_norm": 40.12759017944336, "learning_rate": 1.0049128630705396e-05, "loss": 0.498, "step": 29993 }, { "epoch": 24.891286307053942, "grad_norm": 13.081814765930176, "learning_rate": 1.0048796680497925e-05, "loss": 0.271, "step": 29994 }, { "epoch": 24.892116182572614, "grad_norm": 44.14216232299805, "learning_rate": 1.0048464730290457e-05, "loss": 0.6029, "step": 29995 }, { "epoch": 24.892946058091287, "grad_norm": 42.798545837402344, "learning_rate": 1.004813278008299e-05, "loss": 0.4096, "step": 29996 }, { "epoch": 24.89377593360996, "grad_norm": 38.365901947021484, "learning_rate": 1.004780082987552e-05, "loss": 0.404, "step": 29997 }, { "epoch": 24.89460580912863, "grad_norm": 36.77751922607422, "learning_rate": 1.004746887966805e-05, "loss": 0.4015, "step": 29998 }, { "epoch": 24.895435684647303, "grad_norm": 59.40140151977539, "learning_rate": 1.0047136929460582e-05, "loss": 0.5633, "step": 29999 }, { "epoch": 24.896265560165975, "grad_norm": 38.523067474365234, "learning_rate": 1.0046804979253113e-05, "loss": 0.4566, "step": 30000 } ], "logging_steps": 1, "max_steps": 60250, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.9196792725504e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }