{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998968008255934, "eval_steps": 500, "global_step": 6540, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00045866299736268775, "grad_norm": 11.983423233032227, "learning_rate": 5.076142131979695e-06, "loss": 3.3181, "step": 1 }, { "epoch": 0.0009173259947253755, "grad_norm": 20.939693450927734, "learning_rate": 1.015228426395939e-05, "loss": 4.644, "step": 2 }, { "epoch": 0.0013759889920880633, "grad_norm": 15.526450157165527, "learning_rate": 1.5228426395939088e-05, "loss": 3.3677, "step": 3 }, { "epoch": 0.001834651989450751, "grad_norm": 15.404317855834961, "learning_rate": 2.030456852791878e-05, "loss": 4.1666, "step": 4 }, { "epoch": 0.002293314986813439, "grad_norm": 7.165552616119385, "learning_rate": 2.5380710659898476e-05, "loss": 2.5469, "step": 5 }, { "epoch": 0.0027519779841761265, "grad_norm": 2.2743093967437744, "learning_rate": 3.0456852791878175e-05, "loss": 1.4774, "step": 6 }, { "epoch": 0.0032106409815388145, "grad_norm": 12.052650451660156, "learning_rate": 3.553299492385787e-05, "loss": 3.9692, "step": 7 }, { "epoch": 0.003669303978901502, "grad_norm": 3.099673271179199, "learning_rate": 4.060913705583756e-05, "loss": 1.4465, "step": 8 }, { "epoch": 0.0041279669762641896, "grad_norm": 5.82925271987915, "learning_rate": 4.568527918781726e-05, "loss": 3.3387, "step": 9 }, { "epoch": 0.004586629973626878, "grad_norm": 4.784450531005859, "learning_rate": 5.076142131979695e-05, "loss": 2.6028, "step": 10 }, { "epoch": 0.0050452929709895655, "grad_norm": 6.859081745147705, "learning_rate": 5.583756345177665e-05, "loss": 3.8062, "step": 11 }, { "epoch": 0.005503955968352253, "grad_norm": 2.7776687145233154, "learning_rate": 6.091370558375635e-05, "loss": 2.3401, "step": 12 }, { "epoch": 0.0059626189657149406, "grad_norm": 3.001207113265991, "learning_rate": 6.598984771573605e-05, "loss": 2.3286, "step": 13 }, { "epoch": 0.006421281963077629, "grad_norm": 4.640460968017578, "learning_rate": 7.106598984771574e-05, "loss": 2.7322, "step": 14 }, { "epoch": 0.0068799449604403165, "grad_norm": 3.0203094482421875, "learning_rate": 7.614213197969543e-05, "loss": 1.9691, "step": 15 }, { "epoch": 0.007338607957803004, "grad_norm": 4.914385795593262, "learning_rate": 8.121827411167512e-05, "loss": 3.811, "step": 16 }, { "epoch": 0.007797270955165692, "grad_norm": 1.8599748611450195, "learning_rate": 8.629441624365482e-05, "loss": 1.743, "step": 17 }, { "epoch": 0.008255933952528379, "grad_norm": 4.672982215881348, "learning_rate": 9.137055837563452e-05, "loss": 2.9532, "step": 18 }, { "epoch": 0.008714596949891068, "grad_norm": 2.165299654006958, "learning_rate": 9.644670050761421e-05, "loss": 2.6724, "step": 19 }, { "epoch": 0.009173259947253756, "grad_norm": 2.6288881301879883, "learning_rate": 0.0001015228426395939, "loss": 3.1768, "step": 20 }, { "epoch": 0.009631922944616443, "grad_norm": 3.7706544399261475, "learning_rate": 0.00010659898477157361, "loss": 2.6769, "step": 21 }, { "epoch": 0.010090585941979131, "grad_norm": 1.690014362335205, "learning_rate": 0.0001116751269035533, "loss": 2.0408, "step": 22 }, { "epoch": 0.010549248939341819, "grad_norm": 1.388680100440979, "learning_rate": 0.000116751269035533, "loss": 1.7454, "step": 23 }, { "epoch": 0.011007911936704506, "grad_norm": 1.0925803184509277, "learning_rate": 0.0001218274111675127, "loss": 1.7225, "step": 24 }, { "epoch": 0.011466574934067194, "grad_norm": 1.5495200157165527, "learning_rate": 0.0001269035532994924, "loss": 2.7841, "step": 25 }, { "epoch": 0.011925237931429881, "grad_norm": 0.8375623822212219, "learning_rate": 0.0001319796954314721, "loss": 1.4273, "step": 26 }, { "epoch": 0.01238390092879257, "grad_norm": 1.5786348581314087, "learning_rate": 0.00013705583756345178, "loss": 2.4528, "step": 27 }, { "epoch": 0.012842563926155258, "grad_norm": 0.9938832521438599, "learning_rate": 0.00014213197969543148, "loss": 1.9099, "step": 28 }, { "epoch": 0.013301226923517945, "grad_norm": 1.1450473070144653, "learning_rate": 0.00014720812182741116, "loss": 2.4707, "step": 29 }, { "epoch": 0.013759889920880633, "grad_norm": 0.5717598795890808, "learning_rate": 0.00015228426395939087, "loss": 1.4649, "step": 30 }, { "epoch": 0.01421855291824332, "grad_norm": 1.1892627477645874, "learning_rate": 0.00015736040609137057, "loss": 2.762, "step": 31 }, { "epoch": 0.014677215915606008, "grad_norm": 0.8061103224754333, "learning_rate": 0.00016243654822335025, "loss": 2.5979, "step": 32 }, { "epoch": 0.015135878912968696, "grad_norm": 1.37034010887146, "learning_rate": 0.00016751269035532995, "loss": 2.8462, "step": 33 }, { "epoch": 0.015594541910331383, "grad_norm": 5.973114967346191, "learning_rate": 0.00017258883248730963, "loss": 1.8631, "step": 34 }, { "epoch": 0.01605320490769407, "grad_norm": 1.1910300254821777, "learning_rate": 0.00017766497461928934, "loss": 2.1458, "step": 35 }, { "epoch": 0.016511867905056758, "grad_norm": 0.8178476691246033, "learning_rate": 0.00018274111675126904, "loss": 2.6722, "step": 36 }, { "epoch": 0.016970530902419446, "grad_norm": 0.6606747508049011, "learning_rate": 0.00018781725888324875, "loss": 2.0547, "step": 37 }, { "epoch": 0.017429193899782137, "grad_norm": 0.5291973352432251, "learning_rate": 0.00019289340101522843, "loss": 1.2715, "step": 38 }, { "epoch": 0.017887856897144824, "grad_norm": 1.221416711807251, "learning_rate": 0.00019796954314720813, "loss": 3.2524, "step": 39 }, { "epoch": 0.018346519894507512, "grad_norm": 0.751660168170929, "learning_rate": 0.0002030456852791878, "loss": 2.2274, "step": 40 }, { "epoch": 0.0188051828918702, "grad_norm": 0.6112833023071289, "learning_rate": 0.00020812182741116754, "loss": 1.6378, "step": 41 }, { "epoch": 0.019263845889232887, "grad_norm": 0.40640926361083984, "learning_rate": 0.00021319796954314722, "loss": 1.395, "step": 42 }, { "epoch": 0.019722508886595574, "grad_norm": 0.3838483989238739, "learning_rate": 0.0002182741116751269, "loss": 1.3842, "step": 43 }, { "epoch": 0.020181171883958262, "grad_norm": 0.5843013525009155, "learning_rate": 0.0002233502538071066, "loss": 2.1636, "step": 44 }, { "epoch": 0.02063983488132095, "grad_norm": 0.5792063474655151, "learning_rate": 0.00022842639593908628, "loss": 2.4495, "step": 45 }, { "epoch": 0.021098497878683637, "grad_norm": 0.48895737528800964, "learning_rate": 0.000233502538071066, "loss": 2.0795, "step": 46 }, { "epoch": 0.021557160876046325, "grad_norm": 0.6037505865097046, "learning_rate": 0.0002385786802030457, "loss": 2.6077, "step": 47 }, { "epoch": 0.022015823873409012, "grad_norm": 0.11669456958770752, "learning_rate": 0.0002436548223350254, "loss": 0.535, "step": 48 }, { "epoch": 0.0224744868707717, "grad_norm": 0.5221993327140808, "learning_rate": 0.0002487309644670051, "loss": 1.9393, "step": 49 }, { "epoch": 0.022933149868134387, "grad_norm": 0.6841984391212463, "learning_rate": 0.0002538071065989848, "loss": 2.3887, "step": 50 }, { "epoch": 0.023391812865497075, "grad_norm": 0.5550611019134521, "learning_rate": 0.0002588832487309645, "loss": 2.0587, "step": 51 }, { "epoch": 0.023850475862859762, "grad_norm": 0.5314993858337402, "learning_rate": 0.0002639593908629442, "loss": 2.7202, "step": 52 }, { "epoch": 0.024309138860222453, "grad_norm": 0.6494979858398438, "learning_rate": 0.00026903553299492385, "loss": 2.7383, "step": 53 }, { "epoch": 0.02476780185758514, "grad_norm": 0.6311330795288086, "learning_rate": 0.00027411167512690355, "loss": 2.7266, "step": 54 }, { "epoch": 0.02522646485494783, "grad_norm": 0.6023324728012085, "learning_rate": 0.00027918781725888326, "loss": 2.8926, "step": 55 }, { "epoch": 0.025685127852310516, "grad_norm": 0.4306228458881378, "learning_rate": 0.00028426395939086296, "loss": 1.8117, "step": 56 }, { "epoch": 0.026143790849673203, "grad_norm": 0.4600723385810852, "learning_rate": 0.0002893401015228426, "loss": 1.4924, "step": 57 }, { "epoch": 0.02660245384703589, "grad_norm": 0.5145777463912964, "learning_rate": 0.0002944162436548223, "loss": 1.8133, "step": 58 }, { "epoch": 0.02706111684439858, "grad_norm": 0.4451518654823303, "learning_rate": 0.000299492385786802, "loss": 1.453, "step": 59 }, { "epoch": 0.027519779841761266, "grad_norm": 0.47992944717407227, "learning_rate": 0.00030456852791878173, "loss": 2.0383, "step": 60 }, { "epoch": 0.027978442839123954, "grad_norm": 0.49620696902275085, "learning_rate": 0.00030964467005076144, "loss": 1.7496, "step": 61 }, { "epoch": 0.02843710583648664, "grad_norm": 0.6371163725852966, "learning_rate": 0.00031472081218274114, "loss": 2.4441, "step": 62 }, { "epoch": 0.02889576883384933, "grad_norm": 0.38818782567977905, "learning_rate": 0.00031979695431472085, "loss": 1.4037, "step": 63 }, { "epoch": 0.029354431831212016, "grad_norm": 1.4600757360458374, "learning_rate": 0.0003248730964467005, "loss": 2.6814, "step": 64 }, { "epoch": 0.029813094828574704, "grad_norm": 0.394088476896286, "learning_rate": 0.0003299492385786802, "loss": 1.3568, "step": 65 }, { "epoch": 0.03027175782593739, "grad_norm": 1.3067290782928467, "learning_rate": 0.0003350253807106599, "loss": 2.6444, "step": 66 }, { "epoch": 0.03073042082330008, "grad_norm": 1.0073161125183105, "learning_rate": 0.0003401015228426396, "loss": 2.074, "step": 67 }, { "epoch": 0.031189083820662766, "grad_norm": 0.4958036243915558, "learning_rate": 0.00034517766497461927, "loss": 1.9275, "step": 68 }, { "epoch": 0.03164774681802546, "grad_norm": 0.6386379599571228, "learning_rate": 0.00035025380710659897, "loss": 2.5089, "step": 69 }, { "epoch": 0.03210640981538814, "grad_norm": 0.3506165146827698, "learning_rate": 0.0003553299492385787, "loss": 1.1556, "step": 70 }, { "epoch": 0.03256507281275083, "grad_norm": 0.511293888092041, "learning_rate": 0.0003604060913705584, "loss": 1.7764, "step": 71 }, { "epoch": 0.033023735810113516, "grad_norm": 0.5064987540245056, "learning_rate": 0.0003654822335025381, "loss": 2.0087, "step": 72 }, { "epoch": 0.03348239880747621, "grad_norm": 0.6523762345314026, "learning_rate": 0.0003705583756345178, "loss": 2.1339, "step": 73 }, { "epoch": 0.03394106180483889, "grad_norm": 0.7642560005187988, "learning_rate": 0.0003756345177664975, "loss": 2.7424, "step": 74 }, { "epoch": 0.03439972480220158, "grad_norm": 0.8428514003753662, "learning_rate": 0.00038071065989847715, "loss": 3.062, "step": 75 }, { "epoch": 0.034858387799564274, "grad_norm": 0.5961275696754456, "learning_rate": 0.00038578680203045685, "loss": 1.8469, "step": 76 }, { "epoch": 0.03531705079692696, "grad_norm": 0.7156499624252319, "learning_rate": 0.00039086294416243656, "loss": 2.3454, "step": 77 }, { "epoch": 0.03577571379428965, "grad_norm": 0.4211914837360382, "learning_rate": 0.00039593908629441627, "loss": 1.3773, "step": 78 }, { "epoch": 0.03623437679165233, "grad_norm": 0.25154200196266174, "learning_rate": 0.0004010152284263959, "loss": 0.834, "step": 79 }, { "epoch": 0.036693039789015024, "grad_norm": 0.7084411978721619, "learning_rate": 0.0004060913705583756, "loss": 2.5184, "step": 80 }, { "epoch": 0.03715170278637771, "grad_norm": 0.18035106360912323, "learning_rate": 0.00041116751269035533, "loss": 0.7498, "step": 81 }, { "epoch": 0.0376103657837404, "grad_norm": 0.6386528611183167, "learning_rate": 0.0004162436548223351, "loss": 2.762, "step": 82 }, { "epoch": 0.03806902878110308, "grad_norm": 0.5148580074310303, "learning_rate": 0.00042131979695431474, "loss": 2.1558, "step": 83 }, { "epoch": 0.038527691778465774, "grad_norm": 0.5644561052322388, "learning_rate": 0.00042639593908629444, "loss": 2.4407, "step": 84 }, { "epoch": 0.03898635477582846, "grad_norm": 0.21473997831344604, "learning_rate": 0.00043147208121827415, "loss": 0.9734, "step": 85 }, { "epoch": 0.03944501777319115, "grad_norm": 0.29117411375045776, "learning_rate": 0.0004365482233502538, "loss": 0.8773, "step": 86 }, { "epoch": 0.03990368077055383, "grad_norm": 0.5848171710968018, "learning_rate": 0.0004416243654822335, "loss": 1.5979, "step": 87 }, { "epoch": 0.040362343767916524, "grad_norm": 0.7745836973190308, "learning_rate": 0.0004467005076142132, "loss": 2.6626, "step": 88 }, { "epoch": 0.04082100676527921, "grad_norm": 0.6489730477333069, "learning_rate": 0.0004517766497461929, "loss": 2.2454, "step": 89 }, { "epoch": 0.0412796697626419, "grad_norm": 0.38306012749671936, "learning_rate": 0.00045685279187817257, "loss": 1.1562, "step": 90 }, { "epoch": 0.04173833276000459, "grad_norm": 0.6314185261726379, "learning_rate": 0.0004619289340101523, "loss": 2.3352, "step": 91 }, { "epoch": 0.042196995757367274, "grad_norm": 0.5400259494781494, "learning_rate": 0.000467005076142132, "loss": 2.261, "step": 92 }, { "epoch": 0.042655658754729965, "grad_norm": 0.5282353758811951, "learning_rate": 0.00047208121827411174, "loss": 1.9561, "step": 93 }, { "epoch": 0.04311432175209265, "grad_norm": 0.6686978936195374, "learning_rate": 0.0004771573604060914, "loss": 2.3158, "step": 94 }, { "epoch": 0.04357298474945534, "grad_norm": 0.5369504690170288, "learning_rate": 0.0004822335025380711, "loss": 2.0291, "step": 95 }, { "epoch": 0.044031647746818024, "grad_norm": 0.34599074721336365, "learning_rate": 0.0004873096446700508, "loss": 1.1915, "step": 96 }, { "epoch": 0.044490310744180715, "grad_norm": 0.45278364419937134, "learning_rate": 0.0004923857868020305, "loss": 2.0, "step": 97 }, { "epoch": 0.0449489737415434, "grad_norm": 0.7745117545127869, "learning_rate": 0.0004974619289340102, "loss": 2.5948, "step": 98 }, { "epoch": 0.04540763673890609, "grad_norm": 0.3228628635406494, "learning_rate": 0.0005025380710659899, "loss": 1.236, "step": 99 }, { "epoch": 0.045866299736268774, "grad_norm": 0.49457353353500366, "learning_rate": 0.0005076142131979696, "loss": 1.9651, "step": 100 }, { "epoch": 0.046324962733631465, "grad_norm": 0.6033787727355957, "learning_rate": 0.0005126903553299493, "loss": 2.657, "step": 101 }, { "epoch": 0.04678362573099415, "grad_norm": 0.529002845287323, "learning_rate": 0.000517766497461929, "loss": 2.249, "step": 102 }, { "epoch": 0.04724228872835684, "grad_norm": 0.3869992792606354, "learning_rate": 0.0005228426395939087, "loss": 1.3741, "step": 103 }, { "epoch": 0.047700951725719525, "grad_norm": 0.4783024787902832, "learning_rate": 0.0005279187817258884, "loss": 1.8258, "step": 104 }, { "epoch": 0.048159614723082216, "grad_norm": 0.5782725214958191, "learning_rate": 0.0005329949238578681, "loss": 2.4186, "step": 105 }, { "epoch": 0.04861827772044491, "grad_norm": 0.5999312400817871, "learning_rate": 0.0005380710659898477, "loss": 2.6179, "step": 106 }, { "epoch": 0.04907694071780759, "grad_norm": 0.6067392826080322, "learning_rate": 0.0005431472081218274, "loss": 2.1979, "step": 107 }, { "epoch": 0.04953560371517028, "grad_norm": 0.5408378839492798, "learning_rate": 0.0005482233502538071, "loss": 1.9679, "step": 108 }, { "epoch": 0.049994266712532966, "grad_norm": 0.8069329857826233, "learning_rate": 0.0005532994923857868, "loss": 2.8796, "step": 109 }, { "epoch": 0.05045292970989566, "grad_norm": 0.7999544143676758, "learning_rate": 0.0005583756345177665, "loss": 2.3101, "step": 110 }, { "epoch": 0.05091159270725834, "grad_norm": 0.8346303701400757, "learning_rate": 0.0005634517766497462, "loss": 3.0054, "step": 111 }, { "epoch": 0.05137025570462103, "grad_norm": 0.4418480396270752, "learning_rate": 0.0005685279187817259, "loss": 1.3883, "step": 112 }, { "epoch": 0.051828918701983716, "grad_norm": 0.6058046221733093, "learning_rate": 0.0005736040609137056, "loss": 1.7998, "step": 113 }, { "epoch": 0.05228758169934641, "grad_norm": 0.4952194392681122, "learning_rate": 0.0005786802030456852, "loss": 1.7445, "step": 114 }, { "epoch": 0.05274624469670909, "grad_norm": 0.6057190895080566, "learning_rate": 0.0005837563451776649, "loss": 1.9283, "step": 115 }, { "epoch": 0.05320490769407178, "grad_norm": 0.4411911964416504, "learning_rate": 0.0005888324873096446, "loss": 1.3921, "step": 116 }, { "epoch": 0.053663570691434466, "grad_norm": 0.7016524076461792, "learning_rate": 0.0005939086294416243, "loss": 2.9248, "step": 117 }, { "epoch": 0.05412223368879716, "grad_norm": 0.7117334008216858, "learning_rate": 0.000598984771573604, "loss": 2.7302, "step": 118 }, { "epoch": 0.05458089668615984, "grad_norm": 0.49954333901405334, "learning_rate": 0.0006040609137055838, "loss": 1.8414, "step": 119 }, { "epoch": 0.05503955968352253, "grad_norm": 0.6712334156036377, "learning_rate": 0.0006091370558375635, "loss": 2.4685, "step": 120 }, { "epoch": 0.05549822268088522, "grad_norm": 0.37909215688705444, "learning_rate": 0.0006142131979695432, "loss": 1.1743, "step": 121 }, { "epoch": 0.05595688567824791, "grad_norm": 0.42544159293174744, "learning_rate": 0.0006192893401015229, "loss": 1.4775, "step": 122 }, { "epoch": 0.0564155486756106, "grad_norm": 0.5652533769607544, "learning_rate": 0.0006243654822335026, "loss": 2.1067, "step": 123 }, { "epoch": 0.05687421167297328, "grad_norm": 0.4516564607620239, "learning_rate": 0.0006294416243654823, "loss": 1.7084, "step": 124 }, { "epoch": 0.05733287467033597, "grad_norm": 0.6015462279319763, "learning_rate": 0.000634517766497462, "loss": 2.3374, "step": 125 }, { "epoch": 0.05779153766769866, "grad_norm": 0.5467323660850525, "learning_rate": 0.0006395939086294417, "loss": 2.2559, "step": 126 }, { "epoch": 0.05825020066506135, "grad_norm": 0.5923335552215576, "learning_rate": 0.0006446700507614214, "loss": 2.4232, "step": 127 }, { "epoch": 0.05870886366242403, "grad_norm": 0.6827653050422668, "learning_rate": 0.000649746192893401, "loss": 2.7278, "step": 128 }, { "epoch": 0.05916752665978672, "grad_norm": 0.30980947613716125, "learning_rate": 0.0006548223350253807, "loss": 1.019, "step": 129 }, { "epoch": 0.05962618965714941, "grad_norm": 0.6105501651763916, "learning_rate": 0.0006598984771573604, "loss": 2.1824, "step": 130 }, { "epoch": 0.0600848526545121, "grad_norm": 0.292199969291687, "learning_rate": 0.0006649746192893401, "loss": 1.0942, "step": 131 }, { "epoch": 0.06054351565187478, "grad_norm": 0.6059619188308716, "learning_rate": 0.0006700507614213198, "loss": 2.2701, "step": 132 }, { "epoch": 0.06100217864923747, "grad_norm": 0.27790480852127075, "learning_rate": 0.0006751269035532995, "loss": 1.1452, "step": 133 }, { "epoch": 0.06146084164660016, "grad_norm": 0.47722020745277405, "learning_rate": 0.0006802030456852792, "loss": 2.2821, "step": 134 }, { "epoch": 0.06191950464396285, "grad_norm": 0.6286266446113586, "learning_rate": 0.0006852791878172588, "loss": 2.3686, "step": 135 }, { "epoch": 0.06237816764132553, "grad_norm": 0.6163983345031738, "learning_rate": 0.0006903553299492385, "loss": 2.3126, "step": 136 }, { "epoch": 0.06283683063868822, "grad_norm": 0.5008450150489807, "learning_rate": 0.0006954314720812182, "loss": 1.8563, "step": 137 }, { "epoch": 0.06329549363605091, "grad_norm": 0.6025516986846924, "learning_rate": 0.0007005076142131979, "loss": 2.11, "step": 138 }, { "epoch": 0.0637541566334136, "grad_norm": 0.4481363892555237, "learning_rate": 0.0007055837563451776, "loss": 1.8222, "step": 139 }, { "epoch": 0.06421281963077628, "grad_norm": 0.5932562351226807, "learning_rate": 0.0007106598984771574, "loss": 2.28, "step": 140 }, { "epoch": 0.06467148262813897, "grad_norm": 0.3764328062534332, "learning_rate": 0.0007157360406091371, "loss": 1.6057, "step": 141 }, { "epoch": 0.06513014562550166, "grad_norm": 0.5842316746711731, "learning_rate": 0.0007208121827411168, "loss": 2.1699, "step": 142 }, { "epoch": 0.06558880862286436, "grad_norm": 0.3900845944881439, "learning_rate": 0.0007258883248730965, "loss": 1.5842, "step": 143 }, { "epoch": 0.06604747162022703, "grad_norm": 0.19777247309684753, "learning_rate": 0.0007309644670050762, "loss": 0.7936, "step": 144 }, { "epoch": 0.06650613461758972, "grad_norm": 0.1068076640367508, "learning_rate": 0.0007360406091370559, "loss": 0.4987, "step": 145 }, { "epoch": 0.06696479761495241, "grad_norm": 0.5474305152893066, "learning_rate": 0.0007411167512690356, "loss": 2.032, "step": 146 }, { "epoch": 0.0674234606123151, "grad_norm": 0.6003263592720032, "learning_rate": 0.0007461928934010153, "loss": 2.3611, "step": 147 }, { "epoch": 0.06788212360967778, "grad_norm": 0.3899264633655548, "learning_rate": 0.000751269035532995, "loss": 1.4559, "step": 148 }, { "epoch": 0.06834078660704047, "grad_norm": 0.4279090464115143, "learning_rate": 0.0007563451776649747, "loss": 1.479, "step": 149 }, { "epoch": 0.06879944960440317, "grad_norm": 0.4912429749965668, "learning_rate": 0.0007614213197969543, "loss": 1.6797, "step": 150 }, { "epoch": 0.06925811260176586, "grad_norm": 0.23800012469291687, "learning_rate": 0.000766497461928934, "loss": 0.8729, "step": 151 }, { "epoch": 0.06971677559912855, "grad_norm": 0.6027427315711975, "learning_rate": 0.0007715736040609137, "loss": 1.8105, "step": 152 }, { "epoch": 0.07017543859649122, "grad_norm": 0.6931683421134949, "learning_rate": 0.0007766497461928934, "loss": 2.2175, "step": 153 }, { "epoch": 0.07063410159385392, "grad_norm": 0.4811568558216095, "learning_rate": 0.0007817258883248731, "loss": 1.689, "step": 154 }, { "epoch": 0.0710927645912166, "grad_norm": 0.6901246905326843, "learning_rate": 0.0007868020304568528, "loss": 1.6627, "step": 155 }, { "epoch": 0.0715514275885793, "grad_norm": 12.634415626525879, "learning_rate": 0.0007918781725888325, "loss": 2.9146, "step": 156 }, { "epoch": 0.07201009058594197, "grad_norm": 0.3894835114479065, "learning_rate": 0.0007969543147208121, "loss": 1.461, "step": 157 }, { "epoch": 0.07246875358330467, "grad_norm": 0.4583110511302948, "learning_rate": 0.0008020304568527918, "loss": 1.3597, "step": 158 }, { "epoch": 0.07292741658066736, "grad_norm": 0.5084859728813171, "learning_rate": 0.0008071065989847715, "loss": 2.131, "step": 159 }, { "epoch": 0.07338607957803005, "grad_norm": 0.6448908448219299, "learning_rate": 0.0008121827411167512, "loss": 2.3026, "step": 160 }, { "epoch": 0.07384474257539272, "grad_norm": 0.4896964728832245, "learning_rate": 0.000817258883248731, "loss": 2.0397, "step": 161 }, { "epoch": 0.07430340557275542, "grad_norm": 0.6146819591522217, "learning_rate": 0.0008223350253807107, "loss": 2.1047, "step": 162 }, { "epoch": 0.0747620685701181, "grad_norm": 0.2950705587863922, "learning_rate": 0.0008274111675126904, "loss": 1.1177, "step": 163 }, { "epoch": 0.0752207315674808, "grad_norm": 0.39602458477020264, "learning_rate": 0.0008324873096446702, "loss": 1.6326, "step": 164 }, { "epoch": 0.07567939456484347, "grad_norm": 0.46519267559051514, "learning_rate": 0.0008375634517766498, "loss": 1.7606, "step": 165 }, { "epoch": 0.07613805756220617, "grad_norm": 0.31276392936706543, "learning_rate": 0.0008426395939086295, "loss": 1.2241, "step": 166 }, { "epoch": 0.07659672055956886, "grad_norm": 0.48499414324760437, "learning_rate": 0.0008477157360406092, "loss": 1.9176, "step": 167 }, { "epoch": 0.07705538355693155, "grad_norm": 0.43687963485717773, "learning_rate": 0.0008527918781725889, "loss": 1.6424, "step": 168 }, { "epoch": 0.07751404655429424, "grad_norm": 0.4287634491920471, "learning_rate": 0.0008578680203045686, "loss": 1.8438, "step": 169 }, { "epoch": 0.07797270955165692, "grad_norm": 0.28460821509361267, "learning_rate": 0.0008629441624365483, "loss": 1.1942, "step": 170 }, { "epoch": 0.0784313725490196, "grad_norm": 0.30606964230537415, "learning_rate": 0.000868020304568528, "loss": 1.1019, "step": 171 }, { "epoch": 0.0788900355463823, "grad_norm": 0.38955485820770264, "learning_rate": 0.0008730964467005076, "loss": 1.7985, "step": 172 }, { "epoch": 0.07934869854374499, "grad_norm": 0.41820240020751953, "learning_rate": 0.0008781725888324873, "loss": 1.6667, "step": 173 }, { "epoch": 0.07980736154110767, "grad_norm": 0.47719693183898926, "learning_rate": 0.000883248730964467, "loss": 2.0426, "step": 174 }, { "epoch": 0.08026602453847036, "grad_norm": 0.49073362350463867, "learning_rate": 0.0008883248730964467, "loss": 2.0519, "step": 175 }, { "epoch": 0.08072468753583305, "grad_norm": 0.4831169843673706, "learning_rate": 0.0008934010152284264, "loss": 2.511, "step": 176 }, { "epoch": 0.08118335053319574, "grad_norm": 0.549760103225708, "learning_rate": 0.0008984771573604061, "loss": 2.2359, "step": 177 }, { "epoch": 0.08164201353055842, "grad_norm": 0.5303396582603455, "learning_rate": 0.0009035532994923858, "loss": 2.5146, "step": 178 }, { "epoch": 0.08210067652792111, "grad_norm": 0.47503864765167236, "learning_rate": 0.0009086294416243654, "loss": 2.0219, "step": 179 }, { "epoch": 0.0825593395252838, "grad_norm": 0.4175845682621002, "learning_rate": 0.0009137055837563451, "loss": 1.9651, "step": 180 }, { "epoch": 0.08301800252264649, "grad_norm": 0.536845862865448, "learning_rate": 0.0009187817258883248, "loss": 2.4031, "step": 181 }, { "epoch": 0.08347666552000918, "grad_norm": 0.6141538023948669, "learning_rate": 0.0009238578680203045, "loss": 2.6653, "step": 182 }, { "epoch": 0.08393532851737186, "grad_norm": 0.31810981035232544, "learning_rate": 0.0009289340101522843, "loss": 1.1417, "step": 183 }, { "epoch": 0.08439399151473455, "grad_norm": 0.4755139648914337, "learning_rate": 0.000934010152284264, "loss": 2.1119, "step": 184 }, { "epoch": 0.08485265451209724, "grad_norm": 0.14865882694721222, "learning_rate": 0.0009390862944162437, "loss": 0.6605, "step": 185 }, { "epoch": 0.08531131750945993, "grad_norm": 0.31906870007514954, "learning_rate": 0.0009441624365482235, "loss": 1.2437, "step": 186 }, { "epoch": 0.08576998050682261, "grad_norm": 0.5705398917198181, "learning_rate": 0.0009492385786802031, "loss": 2.3323, "step": 187 }, { "epoch": 0.0862286435041853, "grad_norm": 0.3601418137550354, "learning_rate": 0.0009543147208121828, "loss": 1.5806, "step": 188 }, { "epoch": 0.08668730650154799, "grad_norm": 0.4881735146045685, "learning_rate": 0.0009593908629441625, "loss": 1.9485, "step": 189 }, { "epoch": 0.08714596949891068, "grad_norm": 0.15371385216712952, "learning_rate": 0.0009644670050761422, "loss": 0.683, "step": 190 }, { "epoch": 0.08760463249627336, "grad_norm": 0.41688668727874756, "learning_rate": 0.0009695431472081219, "loss": 1.6802, "step": 191 }, { "epoch": 0.08806329549363605, "grad_norm": 0.429606556892395, "learning_rate": 0.0009746192893401016, "loss": 1.9552, "step": 192 }, { "epoch": 0.08852195849099874, "grad_norm": 0.549892008304596, "learning_rate": 0.0009796954314720812, "loss": 2.1486, "step": 193 }, { "epoch": 0.08898062148836143, "grad_norm": 0.3614405393600464, "learning_rate": 0.000984771573604061, "loss": 1.3867, "step": 194 }, { "epoch": 0.08943928448572411, "grad_norm": 0.43819788098335266, "learning_rate": 0.0009898477157360406, "loss": 1.8317, "step": 195 }, { "epoch": 0.0898979474830868, "grad_norm": 0.14601579308509827, "learning_rate": 0.0009949238578680203, "loss": 0.6363, "step": 196 }, { "epoch": 0.09035661048044949, "grad_norm": 0.2820906937122345, "learning_rate": 0.001, "loss": 1.0806, "step": 197 }, { "epoch": 0.09081527347781218, "grad_norm": 0.41677334904670715, "learning_rate": 0.0009999999386731974, "loss": 1.79, "step": 198 }, { "epoch": 0.09127393647517487, "grad_norm": 0.3319721519947052, "learning_rate": 0.0009999997546928047, "loss": 1.4927, "step": 199 }, { "epoch": 0.09173259947253755, "grad_norm": 0.37898340821266174, "learning_rate": 0.000999999448058867, "loss": 1.6826, "step": 200 }, { "epoch": 0.09219126246990024, "grad_norm": 0.5117692351341248, "learning_rate": 0.0009999990187714593, "loss": 2.368, "step": 201 }, { "epoch": 0.09264992546726293, "grad_norm": 0.44219571352005005, "learning_rate": 0.0009999984668306874, "loss": 2.3706, "step": 202 }, { "epoch": 0.09310858846462562, "grad_norm": 0.38528987765312195, "learning_rate": 0.0009999977922366863, "loss": 1.6523, "step": 203 }, { "epoch": 0.0935672514619883, "grad_norm": 0.5601181983947754, "learning_rate": 0.0009999969949896215, "loss": 2.0021, "step": 204 }, { "epoch": 0.09402591445935099, "grad_norm": 0.34919580817222595, "learning_rate": 0.0009999960750896888, "loss": 1.5776, "step": 205 }, { "epoch": 0.09448457745671368, "grad_norm": 0.5601129531860352, "learning_rate": 0.0009999950325371137, "loss": 2.25, "step": 206 }, { "epoch": 0.09494324045407637, "grad_norm": 0.5060522556304932, "learning_rate": 0.0009999938673321519, "loss": 2.4441, "step": 207 }, { "epoch": 0.09540190345143905, "grad_norm": 0.44873717427253723, "learning_rate": 0.0009999925794750893, "loss": 1.6166, "step": 208 }, { "epoch": 0.09586056644880174, "grad_norm": 0.1810014545917511, "learning_rate": 0.0009999911689662418, "loss": 0.7684, "step": 209 }, { "epoch": 0.09631922944616443, "grad_norm": 0.3902459144592285, "learning_rate": 0.0009999896358059556, "loss": 1.5992, "step": 210 }, { "epoch": 0.09677789244352712, "grad_norm": 0.6398706436157227, "learning_rate": 0.0009999879799946067, "loss": 2.5659, "step": 211 }, { "epoch": 0.09723655544088981, "grad_norm": 0.6179486513137817, "learning_rate": 0.000999986201532601, "loss": 2.6672, "step": 212 }, { "epoch": 0.09769521843825249, "grad_norm": 0.46587255597114563, "learning_rate": 0.0009999843004203748, "loss": 1.9471, "step": 213 }, { "epoch": 0.09815388143561518, "grad_norm": 0.5521425604820251, "learning_rate": 0.0009999822766583947, "loss": 2.51, "step": 214 }, { "epoch": 0.09861254443297787, "grad_norm": 0.43424972891807556, "learning_rate": 0.0009999801302471574, "loss": 1.8136, "step": 215 }, { "epoch": 0.09907120743034056, "grad_norm": 0.3763306140899658, "learning_rate": 0.0009999778611871888, "loss": 1.3743, "step": 216 }, { "epoch": 0.09952987042770324, "grad_norm": 0.34321171045303345, "learning_rate": 0.0009999754694790459, "loss": 1.4966, "step": 217 }, { "epoch": 0.09998853342506593, "grad_norm": 0.46348294615745544, "learning_rate": 0.0009999729551233155, "loss": 1.6602, "step": 218 }, { "epoch": 0.10044719642242862, "grad_norm": 0.5280048847198486, "learning_rate": 0.000999970318120614, "loss": 2.2937, "step": 219 }, { "epoch": 0.10090585941979131, "grad_norm": 0.4680062234401703, "learning_rate": 0.0009999675584715887, "loss": 1.999, "step": 220 }, { "epoch": 0.10136452241715399, "grad_norm": 0.48422807455062866, "learning_rate": 0.0009999646761769162, "loss": 2.1548, "step": 221 }, { "epoch": 0.10182318541451668, "grad_norm": 0.30261266231536865, "learning_rate": 0.0009999616712373035, "loss": 1.3619, "step": 222 }, { "epoch": 0.10228184841187937, "grad_norm": 0.549519956111908, "learning_rate": 0.0009999585436534883, "loss": 2.4467, "step": 223 }, { "epoch": 0.10274051140924206, "grad_norm": 0.4387172758579254, "learning_rate": 0.0009999552934262374, "loss": 2.0479, "step": 224 }, { "epoch": 0.10319917440660474, "grad_norm": 0.49094218015670776, "learning_rate": 0.0009999519205563482, "loss": 2.4268, "step": 225 }, { "epoch": 0.10365783740396743, "grad_norm": 0.5436589121818542, "learning_rate": 0.0009999484250446478, "loss": 2.5486, "step": 226 }, { "epoch": 0.10411650040133012, "grad_norm": 0.5063910484313965, "learning_rate": 0.000999944806891994, "loss": 2.364, "step": 227 }, { "epoch": 0.10457516339869281, "grad_norm": 0.5463952422142029, "learning_rate": 0.0009999410660992743, "loss": 2.6033, "step": 228 }, { "epoch": 0.1050338263960555, "grad_norm": 0.507409393787384, "learning_rate": 0.0009999372026674063, "loss": 2.32, "step": 229 }, { "epoch": 0.10549248939341818, "grad_norm": 0.5893042087554932, "learning_rate": 0.0009999332165973379, "loss": 2.7153, "step": 230 }, { "epoch": 0.10595115239078087, "grad_norm": 0.284311980009079, "learning_rate": 0.0009999291078900466, "loss": 0.9774, "step": 231 }, { "epoch": 0.10640981538814356, "grad_norm": 0.42511793971061707, "learning_rate": 0.0009999248765465406, "loss": 1.924, "step": 232 }, { "epoch": 0.10686847838550625, "grad_norm": 0.6010558605194092, "learning_rate": 0.0009999205225678575, "loss": 2.7175, "step": 233 }, { "epoch": 0.10732714138286893, "grad_norm": 0.3992350697517395, "learning_rate": 0.000999916045955066, "loss": 1.9003, "step": 234 }, { "epoch": 0.10778580438023162, "grad_norm": 0.3539673388004303, "learning_rate": 0.0009999114467092636, "loss": 1.4302, "step": 235 }, { "epoch": 0.10824446737759431, "grad_norm": 0.3749694228172302, "learning_rate": 0.0009999067248315787, "loss": 1.7714, "step": 236 }, { "epoch": 0.108703130374957, "grad_norm": 0.47365519404411316, "learning_rate": 0.0009999018803231697, "loss": 1.9969, "step": 237 }, { "epoch": 0.10916179337231968, "grad_norm": 0.3063698709011078, "learning_rate": 0.000999896913185225, "loss": 1.3087, "step": 238 }, { "epoch": 0.10962045636968237, "grad_norm": 0.2695063352584839, "learning_rate": 0.0009998918234189632, "loss": 1.203, "step": 239 }, { "epoch": 0.11007911936704506, "grad_norm": 0.3138861060142517, "learning_rate": 0.0009998866110256326, "loss": 1.1181, "step": 240 }, { "epoch": 0.11053778236440776, "grad_norm": 0.4734935462474823, "learning_rate": 0.000999881276006512, "loss": 2.2582, "step": 241 }, { "epoch": 0.11099644536177045, "grad_norm": 0.3687781095504761, "learning_rate": 0.0009998758183629099, "loss": 1.6593, "step": 242 }, { "epoch": 0.11145510835913312, "grad_norm": 0.33596163988113403, "learning_rate": 0.0009998702380961655, "loss": 1.4227, "step": 243 }, { "epoch": 0.11191377135649581, "grad_norm": 0.37901684641838074, "learning_rate": 0.0009998645352076471, "loss": 1.6273, "step": 244 }, { "epoch": 0.1123724343538585, "grad_norm": 0.30980101227760315, "learning_rate": 0.0009998587096987544, "loss": 1.0549, "step": 245 }, { "epoch": 0.1128310973512212, "grad_norm": 0.4566017687320709, "learning_rate": 0.0009998527615709158, "loss": 1.9674, "step": 246 }, { "epoch": 0.11328976034858387, "grad_norm": 0.4420877695083618, "learning_rate": 0.0009998466908255907, "loss": 2.1819, "step": 247 }, { "epoch": 0.11374842334594656, "grad_norm": 0.29177185893058777, "learning_rate": 0.0009998404974642684, "loss": 1.3174, "step": 248 }, { "epoch": 0.11420708634330926, "grad_norm": 0.2630375623703003, "learning_rate": 0.000999834181488468, "loss": 1.0881, "step": 249 }, { "epoch": 0.11466574934067195, "grad_norm": 0.36166948080062866, "learning_rate": 0.0009998277428997387, "loss": 1.725, "step": 250 }, { "epoch": 0.11512441233803462, "grad_norm": 0.4986696243286133, "learning_rate": 0.0009998211816996602, "loss": 2.5789, "step": 251 }, { "epoch": 0.11558307533539731, "grad_norm": 0.3712925612926483, "learning_rate": 0.0009998144978898421, "loss": 2.1168, "step": 252 }, { "epoch": 0.11604173833276, "grad_norm": 0.29228702187538147, "learning_rate": 0.0009998076914719237, "loss": 1.5106, "step": 253 }, { "epoch": 0.1165004013301227, "grad_norm": 0.41084229946136475, "learning_rate": 0.0009998007624475746, "loss": 1.99, "step": 254 }, { "epoch": 0.11695906432748537, "grad_norm": 0.394399493932724, "learning_rate": 0.0009997937108184951, "loss": 2.3965, "step": 255 }, { "epoch": 0.11741772732484806, "grad_norm": 0.27776357531547546, "learning_rate": 0.0009997865365864143, "loss": 1.3273, "step": 256 }, { "epoch": 0.11787639032221076, "grad_norm": 0.23769012093544006, "learning_rate": 0.0009997792397530925, "loss": 1.1331, "step": 257 }, { "epoch": 0.11833505331957345, "grad_norm": 0.3943609297275543, "learning_rate": 0.0009997718203203197, "loss": 2.2263, "step": 258 }, { "epoch": 0.11879371631693614, "grad_norm": 0.3729076087474823, "learning_rate": 0.0009997642782899158, "loss": 1.9142, "step": 259 }, { "epoch": 0.11925237931429881, "grad_norm": 0.4220196604728699, "learning_rate": 0.000999756613663731, "loss": 2.4495, "step": 260 }, { "epoch": 0.1197110423116615, "grad_norm": 0.43063583970069885, "learning_rate": 0.0009997488264436455, "loss": 2.1042, "step": 261 }, { "epoch": 0.1201697053090242, "grad_norm": 0.31665846705436707, "learning_rate": 0.000999740916631569, "loss": 1.3811, "step": 262 }, { "epoch": 0.12062836830638689, "grad_norm": 0.5183250904083252, "learning_rate": 0.0009997328842294428, "loss": 2.5891, "step": 263 }, { "epoch": 0.12108703130374956, "grad_norm": 0.5458334684371948, "learning_rate": 0.000999724729239237, "loss": 2.8789, "step": 264 }, { "epoch": 0.12154569430111226, "grad_norm": 0.32633325457572937, "learning_rate": 0.0009997164516629515, "loss": 1.5408, "step": 265 }, { "epoch": 0.12200435729847495, "grad_norm": 0.3069404661655426, "learning_rate": 0.0009997080515026175, "loss": 1.4044, "step": 266 }, { "epoch": 0.12246302029583764, "grad_norm": 0.3677540719509125, "learning_rate": 0.0009996995287602953, "loss": 2.0308, "step": 267 }, { "epoch": 0.12292168329320032, "grad_norm": 0.37254124879837036, "learning_rate": 0.0009996908834380756, "loss": 1.7371, "step": 268 }, { "epoch": 0.123380346290563, "grad_norm": 0.351275771856308, "learning_rate": 0.0009996821155380793, "loss": 1.7554, "step": 269 }, { "epoch": 0.1238390092879257, "grad_norm": 0.45772960782051086, "learning_rate": 0.0009996732250624571, "loss": 2.5547, "step": 270 }, { "epoch": 0.12429767228528839, "grad_norm": 0.3280022144317627, "learning_rate": 0.00099966421201339, "loss": 1.5541, "step": 271 }, { "epoch": 0.12475633528265107, "grad_norm": 0.5368868708610535, "learning_rate": 0.0009996550763930892, "loss": 2.1713, "step": 272 }, { "epoch": 0.12521499828001376, "grad_norm": 0.2520690858364105, "learning_rate": 0.000999645818203795, "loss": 0.9838, "step": 273 }, { "epoch": 0.12567366127737645, "grad_norm": 0.3174075186252594, "learning_rate": 0.0009996364374477793, "loss": 1.5059, "step": 274 }, { "epoch": 0.12613232427473914, "grad_norm": 0.26668769121170044, "learning_rate": 0.0009996269341273427, "loss": 1.3316, "step": 275 }, { "epoch": 0.12659098727210183, "grad_norm": 0.3275998532772064, "learning_rate": 0.000999617308244817, "loss": 1.717, "step": 276 }, { "epoch": 0.12704965026946452, "grad_norm": 0.4276018738746643, "learning_rate": 0.0009996075598025628, "loss": 2.5447, "step": 277 }, { "epoch": 0.1275083132668272, "grad_norm": 0.3532467782497406, "learning_rate": 0.000999597688802972, "loss": 1.939, "step": 278 }, { "epoch": 0.12796697626418987, "grad_norm": 0.3706674575805664, "learning_rate": 0.000999587695248466, "loss": 1.9899, "step": 279 }, { "epoch": 0.12842563926155257, "grad_norm": 0.38228681683540344, "learning_rate": 0.0009995775791414958, "loss": 2.1842, "step": 280 }, { "epoch": 0.12888430225891526, "grad_norm": 0.4526541829109192, "learning_rate": 0.0009995673404845434, "loss": 2.1307, "step": 281 }, { "epoch": 0.12934296525627795, "grad_norm": 0.44101178646087646, "learning_rate": 0.0009995569792801205, "loss": 2.179, "step": 282 }, { "epoch": 0.12980162825364064, "grad_norm": 0.15456262230873108, "learning_rate": 0.0009995464955307684, "loss": 0.6752, "step": 283 }, { "epoch": 0.13026029125100333, "grad_norm": 0.4431912302970886, "learning_rate": 0.000999535889239059, "loss": 2.4602, "step": 284 }, { "epoch": 0.13071895424836602, "grad_norm": 0.4478715658187866, "learning_rate": 0.0009995251604075943, "loss": 2.3623, "step": 285 }, { "epoch": 0.1311776172457287, "grad_norm": 0.332527756690979, "learning_rate": 0.000999514309039006, "loss": 1.4232, "step": 286 }, { "epoch": 0.13163628024309137, "grad_norm": 0.3733260929584503, "learning_rate": 0.0009995033351359558, "loss": 1.7972, "step": 287 }, { "epoch": 0.13209494324045407, "grad_norm": 0.4460364282131195, "learning_rate": 0.000999492238701136, "loss": 2.227, "step": 288 }, { "epoch": 0.13255360623781676, "grad_norm": 0.35513222217559814, "learning_rate": 0.0009994810197372684, "loss": 2.0901, "step": 289 }, { "epoch": 0.13301226923517945, "grad_norm": 0.3551701009273529, "learning_rate": 0.0009994696782471054, "loss": 1.7401, "step": 290 }, { "epoch": 0.13347093223254214, "grad_norm": 0.37001582980155945, "learning_rate": 0.000999458214233429, "loss": 2.0237, "step": 291 }, { "epoch": 0.13392959522990483, "grad_norm": 0.39629852771759033, "learning_rate": 0.0009994466276990511, "loss": 1.9617, "step": 292 }, { "epoch": 0.13438825822726752, "grad_norm": 0.30655738711357117, "learning_rate": 0.0009994349186468144, "loss": 1.4343, "step": 293 }, { "epoch": 0.1348469212246302, "grad_norm": 0.3813563585281372, "learning_rate": 0.000999423087079591, "loss": 2.0818, "step": 294 }, { "epoch": 0.1353055842219929, "grad_norm": 0.4179360866546631, "learning_rate": 0.0009994111330002835, "loss": 2.1136, "step": 295 }, { "epoch": 0.13576424721935557, "grad_norm": 0.341799259185791, "learning_rate": 0.000999399056411824, "loss": 1.9364, "step": 296 }, { "epoch": 0.13622291021671826, "grad_norm": 0.3983851969242096, "learning_rate": 0.0009993868573171748, "loss": 1.4841, "step": 297 }, { "epoch": 0.13668157321408095, "grad_norm": 0.32653865218162537, "learning_rate": 0.0009993745357193293, "loss": 1.5058, "step": 298 }, { "epoch": 0.13714023621144364, "grad_norm": 0.5542972087860107, "learning_rate": 0.0009993620916213092, "loss": 2.5276, "step": 299 }, { "epoch": 0.13759889920880633, "grad_norm": 0.4614558517932892, "learning_rate": 0.0009993495250261676, "loss": 2.0603, "step": 300 }, { "epoch": 0.13805756220616902, "grad_norm": 0.1725156605243683, "learning_rate": 0.0009993368359369867, "loss": 0.7159, "step": 301 }, { "epoch": 0.1385162252035317, "grad_norm": 0.466024786233902, "learning_rate": 0.0009993240243568798, "loss": 2.0669, "step": 302 }, { "epoch": 0.1389748882008944, "grad_norm": 0.5493818521499634, "learning_rate": 0.0009993110902889893, "loss": 2.6599, "step": 303 }, { "epoch": 0.1394335511982571, "grad_norm": 0.4970023036003113, "learning_rate": 0.0009992980337364882, "loss": 2.4265, "step": 304 }, { "epoch": 0.13989221419561976, "grad_norm": 0.37026146054267883, "learning_rate": 0.0009992848547025791, "loss": 1.4705, "step": 305 }, { "epoch": 0.14035087719298245, "grad_norm": 0.4260435402393341, "learning_rate": 0.0009992715531904955, "loss": 1.6935, "step": 306 }, { "epoch": 0.14080954019034514, "grad_norm": 0.08674909919500351, "learning_rate": 0.0009992581292034997, "loss": 0.4348, "step": 307 }, { "epoch": 0.14126820318770783, "grad_norm": 0.3797175884246826, "learning_rate": 0.0009992445827448852, "loss": 1.7039, "step": 308 }, { "epoch": 0.14172686618507052, "grad_norm": 0.24065154790878296, "learning_rate": 0.0009992309138179745, "loss": 1.0594, "step": 309 }, { "epoch": 0.1421855291824332, "grad_norm": 0.4619709849357605, "learning_rate": 0.000999217122426121, "loss": 2.0485, "step": 310 }, { "epoch": 0.1426441921797959, "grad_norm": 0.4283906817436218, "learning_rate": 0.000999203208572708, "loss": 2.2198, "step": 311 }, { "epoch": 0.1431028551771586, "grad_norm": 0.39942678809165955, "learning_rate": 0.0009991891722611484, "loss": 1.8766, "step": 312 }, { "epoch": 0.14356151817452126, "grad_norm": 0.37499570846557617, "learning_rate": 0.0009991750134948857, "loss": 1.4081, "step": 313 }, { "epoch": 0.14402018117188395, "grad_norm": 0.4207157790660858, "learning_rate": 0.0009991607322773928, "loss": 2.1097, "step": 314 }, { "epoch": 0.14447884416924664, "grad_norm": 0.39866119623184204, "learning_rate": 0.000999146328612173, "loss": 1.8325, "step": 315 }, { "epoch": 0.14493750716660933, "grad_norm": 0.507705807685852, "learning_rate": 0.00099913180250276, "loss": 2.0061, "step": 316 }, { "epoch": 0.14539617016397202, "grad_norm": 0.4097268283367157, "learning_rate": 0.0009991171539527168, "loss": 1.9861, "step": 317 }, { "epoch": 0.1458548331613347, "grad_norm": 0.36734098196029663, "learning_rate": 0.000999102382965637, "loss": 1.6204, "step": 318 }, { "epoch": 0.1463134961586974, "grad_norm": 0.36929383873939514, "learning_rate": 0.0009990874895451439, "loss": 1.9098, "step": 319 }, { "epoch": 0.1467721591560601, "grad_norm": 0.40100765228271484, "learning_rate": 0.000999072473694891, "loss": 1.9401, "step": 320 }, { "epoch": 0.14723082215342279, "grad_norm": 0.38179731369018555, "learning_rate": 0.0009990573354185617, "loss": 2.0457, "step": 321 }, { "epoch": 0.14768948515078545, "grad_norm": 0.4014952480792999, "learning_rate": 0.0009990420747198697, "loss": 1.5037, "step": 322 }, { "epoch": 0.14814814814814814, "grad_norm": 0.2382311075925827, "learning_rate": 0.0009990266916025585, "loss": 1.1044, "step": 323 }, { "epoch": 0.14860681114551083, "grad_norm": 0.40865546464920044, "learning_rate": 0.0009990111860704019, "loss": 1.8592, "step": 324 }, { "epoch": 0.14906547414287352, "grad_norm": 0.41064581274986267, "learning_rate": 0.000998995558127203, "loss": 1.9371, "step": 325 }, { "epoch": 0.1495241371402362, "grad_norm": 0.29274582862854004, "learning_rate": 0.0009989798077767959, "loss": 1.3743, "step": 326 }, { "epoch": 0.1499828001375989, "grad_norm": 0.3344130218029022, "learning_rate": 0.0009989639350230439, "loss": 1.3143, "step": 327 }, { "epoch": 0.1504414631349616, "grad_norm": 0.25171759724617004, "learning_rate": 0.0009989479398698413, "loss": 0.9927, "step": 328 }, { "epoch": 0.15090012613232429, "grad_norm": 0.13870039582252502, "learning_rate": 0.0009989318223211112, "loss": 0.7593, "step": 329 }, { "epoch": 0.15135878912968695, "grad_norm": 0.5177074074745178, "learning_rate": 0.0009989155823808076, "loss": 2.2173, "step": 330 }, { "epoch": 0.15181745212704964, "grad_norm": 0.3356158435344696, "learning_rate": 0.0009988992200529144, "loss": 1.5492, "step": 331 }, { "epoch": 0.15227611512441233, "grad_norm": 0.44678938388824463, "learning_rate": 0.000998882735341445, "loss": 1.7162, "step": 332 }, { "epoch": 0.15273477812177502, "grad_norm": 0.31965455412864685, "learning_rate": 0.0009988661282504438, "loss": 1.2234, "step": 333 }, { "epoch": 0.1531934411191377, "grad_norm": 0.4562229812145233, "learning_rate": 0.0009988493987839841, "loss": 2.0798, "step": 334 }, { "epoch": 0.1536521041165004, "grad_norm": 0.36107146739959717, "learning_rate": 0.00099883254694617, "loss": 1.7864, "step": 335 }, { "epoch": 0.1541107671138631, "grad_norm": 0.3504132330417633, "learning_rate": 0.0009988155727411357, "loss": 1.6267, "step": 336 }, { "epoch": 0.1545694301112258, "grad_norm": 0.28946036100387573, "learning_rate": 0.0009987984761730445, "loss": 1.319, "step": 337 }, { "epoch": 0.15502809310858848, "grad_norm": 0.49577978253364563, "learning_rate": 0.0009987812572460905, "loss": 1.9709, "step": 338 }, { "epoch": 0.15548675610595114, "grad_norm": 0.49071890115737915, "learning_rate": 0.000998763915964498, "loss": 2.1024, "step": 339 }, { "epoch": 0.15594541910331383, "grad_norm": 0.3110016882419586, "learning_rate": 0.0009987464523325202, "loss": 1.5035, "step": 340 }, { "epoch": 0.15640408210067652, "grad_norm": 0.3991573452949524, "learning_rate": 0.000998728866354442, "loss": 1.731, "step": 341 }, { "epoch": 0.1568627450980392, "grad_norm": 0.3533564805984497, "learning_rate": 0.0009987111580345763, "loss": 1.319, "step": 342 }, { "epoch": 0.1573214080954019, "grad_norm": 0.4693927466869354, "learning_rate": 0.000998693327377268, "loss": 2.17, "step": 343 }, { "epoch": 0.1577800710927646, "grad_norm": 0.4726630747318268, "learning_rate": 0.0009986753743868905, "loss": 2.1395, "step": 344 }, { "epoch": 0.1582387340901273, "grad_norm": 0.27608227729797363, "learning_rate": 0.0009986572990678482, "loss": 1.3976, "step": 345 }, { "epoch": 0.15869739708748998, "grad_norm": 0.34242314100265503, "learning_rate": 0.000998639101424575, "loss": 1.614, "step": 346 }, { "epoch": 0.15915606008485264, "grad_norm": 0.41097334027290344, "learning_rate": 0.0009986207814615348, "loss": 2.111, "step": 347 }, { "epoch": 0.15961472308221533, "grad_norm": 0.44249075651168823, "learning_rate": 0.0009986023391832216, "loss": 2.4585, "step": 348 }, { "epoch": 0.16007338607957802, "grad_norm": 0.412911593914032, "learning_rate": 0.0009985837745941596, "loss": 1.9757, "step": 349 }, { "epoch": 0.1605320490769407, "grad_norm": 0.22264568507671356, "learning_rate": 0.0009985650876989025, "loss": 0.9562, "step": 350 }, { "epoch": 0.1609907120743034, "grad_norm": 0.449796199798584, "learning_rate": 0.0009985462785020347, "loss": 2.0573, "step": 351 }, { "epoch": 0.1614493750716661, "grad_norm": 0.7880814671516418, "learning_rate": 0.0009985273470081702, "loss": 2.3027, "step": 352 }, { "epoch": 0.1619080380690288, "grad_norm": 10.907687187194824, "learning_rate": 0.0009985082932219527, "loss": 2.3361, "step": 353 }, { "epoch": 0.16236670106639148, "grad_norm": 2.09915828704834, "learning_rate": 0.0009984891171480566, "loss": 1.6033, "step": 354 }, { "epoch": 0.16282536406375417, "grad_norm": 0.3674648106098175, "learning_rate": 0.0009984698187911856, "loss": 1.2239, "step": 355 }, { "epoch": 0.16328402706111683, "grad_norm": 0.5814857482910156, "learning_rate": 0.000998450398156074, "loss": 1.4328, "step": 356 }, { "epoch": 0.16374269005847952, "grad_norm": 0.46351417899131775, "learning_rate": 0.0009984308552474857, "loss": 2.0938, "step": 357 }, { "epoch": 0.16420135305584221, "grad_norm": 0.5045230388641357, "learning_rate": 0.0009984111900702146, "loss": 2.2004, "step": 358 }, { "epoch": 0.1646600160532049, "grad_norm": 0.311603844165802, "learning_rate": 0.000998391402629085, "loss": 1.5096, "step": 359 }, { "epoch": 0.1651186790505676, "grad_norm": 0.40799856185913086, "learning_rate": 0.0009983714929289508, "loss": 2.1035, "step": 360 }, { "epoch": 0.1655773420479303, "grad_norm": 0.1845995932817459, "learning_rate": 0.0009983514609746959, "loss": 0.8358, "step": 361 }, { "epoch": 0.16603600504529298, "grad_norm": 0.36411669850349426, "learning_rate": 0.0009983313067712344, "loss": 1.5809, "step": 362 }, { "epoch": 0.16649466804265567, "grad_norm": 0.3270757794380188, "learning_rate": 0.00099831103032351, "loss": 1.4844, "step": 363 }, { "epoch": 0.16695333104001836, "grad_norm": 0.24170421063899994, "learning_rate": 0.0009982906316364968, "loss": 0.9695, "step": 364 }, { "epoch": 0.16741199403738102, "grad_norm": 0.3997127413749695, "learning_rate": 0.000998270110715199, "loss": 2.1677, "step": 365 }, { "epoch": 0.16787065703474371, "grad_norm": 0.251727432012558, "learning_rate": 0.0009982494675646503, "loss": 1.3295, "step": 366 }, { "epoch": 0.1683293200321064, "grad_norm": 0.27435481548309326, "learning_rate": 0.0009982287021899146, "loss": 1.1655, "step": 367 }, { "epoch": 0.1687879830294691, "grad_norm": 0.3682950437068939, "learning_rate": 0.0009982078145960858, "loss": 1.6586, "step": 368 }, { "epoch": 0.1692466460268318, "grad_norm": 0.3245248794555664, "learning_rate": 0.0009981868047882877, "loss": 1.2151, "step": 369 }, { "epoch": 0.16970530902419448, "grad_norm": 0.22152075171470642, "learning_rate": 0.0009981656727716746, "loss": 1.0169, "step": 370 }, { "epoch": 0.17016397202155717, "grad_norm": 0.3736206293106079, "learning_rate": 0.00099814441855143, "loss": 1.5577, "step": 371 }, { "epoch": 0.17062263501891986, "grad_norm": 0.3287566602230072, "learning_rate": 0.0009981230421327674, "loss": 1.4915, "step": 372 }, { "epoch": 0.17108129801628252, "grad_norm": 0.26681220531463623, "learning_rate": 0.000998101543520931, "loss": 1.1304, "step": 373 }, { "epoch": 0.17153996101364521, "grad_norm": 0.32426249980926514, "learning_rate": 0.0009980799227211946, "loss": 1.4148, "step": 374 }, { "epoch": 0.1719986240110079, "grad_norm": 0.38737696409225464, "learning_rate": 0.0009980581797388616, "loss": 1.6913, "step": 375 }, { "epoch": 0.1724572870083706, "grad_norm": 0.5705090761184692, "learning_rate": 0.0009980363145792662, "loss": 2.7029, "step": 376 }, { "epoch": 0.1729159500057333, "grad_norm": 0.4684537649154663, "learning_rate": 0.0009980143272477718, "loss": 2.1586, "step": 377 }, { "epoch": 0.17337461300309598, "grad_norm": 0.386222779750824, "learning_rate": 0.0009979922177497718, "loss": 2.2925, "step": 378 }, { "epoch": 0.17383327600045867, "grad_norm": 0.2883818447589874, "learning_rate": 0.0009979699860906902, "loss": 1.1465, "step": 379 }, { "epoch": 0.17429193899782136, "grad_norm": 0.30754733085632324, "learning_rate": 0.0009979476322759806, "loss": 0.9909, "step": 380 }, { "epoch": 0.17475060199518405, "grad_norm": 0.42961201071739197, "learning_rate": 0.0009979251563111261, "loss": 2.0005, "step": 381 }, { "epoch": 0.17520926499254671, "grad_norm": 0.41431599855422974, "learning_rate": 0.0009979025582016409, "loss": 1.6347, "step": 382 }, { "epoch": 0.1756679279899094, "grad_norm": 0.39737799763679504, "learning_rate": 0.000997879837953068, "loss": 2.2396, "step": 383 }, { "epoch": 0.1761265909872721, "grad_norm": 0.2799502909183502, "learning_rate": 0.0009978569955709808, "loss": 1.3289, "step": 384 }, { "epoch": 0.1765852539846348, "grad_norm": 0.4221355617046356, "learning_rate": 0.000997834031060983, "loss": 2.1155, "step": 385 }, { "epoch": 0.17704391698199748, "grad_norm": 0.48007211089134216, "learning_rate": 0.0009978109444287078, "loss": 2.677, "step": 386 }, { "epoch": 0.17750257997936017, "grad_norm": 0.3006044626235962, "learning_rate": 0.0009977877356798185, "loss": 1.4993, "step": 387 }, { "epoch": 0.17796124297672286, "grad_norm": 0.30362898111343384, "learning_rate": 0.0009977644048200084, "loss": 1.5726, "step": 388 }, { "epoch": 0.17841990597408555, "grad_norm": 0.25762712955474854, "learning_rate": 0.0009977409518550006, "loss": 1.117, "step": 389 }, { "epoch": 0.17887856897144822, "grad_norm": 0.2970077693462372, "learning_rate": 0.0009977173767905486, "loss": 1.5483, "step": 390 }, { "epoch": 0.1793372319688109, "grad_norm": 0.40200671553611755, "learning_rate": 0.0009976936796324352, "loss": 2.1389, "step": 391 }, { "epoch": 0.1797958949661736, "grad_norm": 0.4564474821090698, "learning_rate": 0.0009976698603864737, "loss": 2.4102, "step": 392 }, { "epoch": 0.1802545579635363, "grad_norm": 0.5073770880699158, "learning_rate": 0.0009976459190585072, "loss": 2.1866, "step": 393 }, { "epoch": 0.18071322096089898, "grad_norm": 0.24557775259017944, "learning_rate": 0.0009976218556544084, "loss": 1.0652, "step": 394 }, { "epoch": 0.18117188395826167, "grad_norm": 0.3804593086242676, "learning_rate": 0.0009975976701800803, "loss": 1.7112, "step": 395 }, { "epoch": 0.18163054695562436, "grad_norm": 0.414326012134552, "learning_rate": 0.000997573362641456, "loss": 1.8792, "step": 396 }, { "epoch": 0.18208920995298705, "grad_norm": 0.34744778275489807, "learning_rate": 0.000997548933044498, "loss": 1.4276, "step": 397 }, { "epoch": 0.18254787295034974, "grad_norm": 0.3834390640258789, "learning_rate": 0.0009975243813951993, "loss": 1.8641, "step": 398 }, { "epoch": 0.1830065359477124, "grad_norm": 0.40305501222610474, "learning_rate": 0.0009974997076995825, "loss": 1.9985, "step": 399 }, { "epoch": 0.1834651989450751, "grad_norm": 0.3667886555194855, "learning_rate": 0.0009974749119637002, "loss": 1.7085, "step": 400 }, { "epoch": 0.1839238619424378, "grad_norm": 0.28065481781959534, "learning_rate": 0.000997449994193635, "loss": 1.42, "step": 401 }, { "epoch": 0.18438252493980048, "grad_norm": 0.37087318301200867, "learning_rate": 0.0009974249543954995, "loss": 1.5901, "step": 402 }, { "epoch": 0.18484118793716317, "grad_norm": 0.3824957013130188, "learning_rate": 0.0009973997925754362, "loss": 2.0566, "step": 403 }, { "epoch": 0.18529985093452586, "grad_norm": 0.3663063049316406, "learning_rate": 0.0009973745087396171, "loss": 2.079, "step": 404 }, { "epoch": 0.18575851393188855, "grad_norm": 0.30660781264305115, "learning_rate": 0.0009973491028942448, "loss": 1.5338, "step": 405 }, { "epoch": 0.18621717692925124, "grad_norm": 0.25807487964630127, "learning_rate": 0.0009973235750455516, "loss": 1.2258, "step": 406 }, { "epoch": 0.1866758399266139, "grad_norm": 0.288686066865921, "learning_rate": 0.0009972979251997994, "loss": 1.6471, "step": 407 }, { "epoch": 0.1871345029239766, "grad_norm": 0.34755846858024597, "learning_rate": 0.0009972721533632804, "loss": 2.1007, "step": 408 }, { "epoch": 0.1875931659213393, "grad_norm": 0.22822384536266327, "learning_rate": 0.000997246259542317, "loss": 1.1318, "step": 409 }, { "epoch": 0.18805182891870198, "grad_norm": 0.12297138571739197, "learning_rate": 0.0009972202437432604, "loss": 0.6918, "step": 410 }, { "epoch": 0.18851049191606467, "grad_norm": 0.3044014871120453, "learning_rate": 0.000997194105972493, "loss": 1.4567, "step": 411 }, { "epoch": 0.18896915491342736, "grad_norm": 0.20039816200733185, "learning_rate": 0.0009971678462364266, "loss": 1.1349, "step": 412 }, { "epoch": 0.18942781791079005, "grad_norm": 0.36312806606292725, "learning_rate": 0.0009971414645415027, "loss": 2.0817, "step": 413 }, { "epoch": 0.18988648090815274, "grad_norm": 0.30179518461227417, "learning_rate": 0.0009971149608941929, "loss": 1.5486, "step": 414 }, { "epoch": 0.19034514390551543, "grad_norm": 0.3225315809249878, "learning_rate": 0.0009970883353009987, "loss": 1.3529, "step": 415 }, { "epoch": 0.1908038069028781, "grad_norm": 0.3565948009490967, "learning_rate": 0.000997061587768452, "loss": 1.9429, "step": 416 }, { "epoch": 0.1912624699002408, "grad_norm": 0.6202376484870911, "learning_rate": 0.0009970347183031133, "loss": 1.3034, "step": 417 }, { "epoch": 0.19172113289760348, "grad_norm": 0.31976190209388733, "learning_rate": 0.0009970077269115748, "loss": 1.833, "step": 418 }, { "epoch": 0.19217979589496617, "grad_norm": 0.4025329649448395, "learning_rate": 0.0009969806136004573, "loss": 1.7521, "step": 419 }, { "epoch": 0.19263845889232886, "grad_norm": 0.20742328464984894, "learning_rate": 0.0009969533783764117, "loss": 1.1387, "step": 420 }, { "epoch": 0.19309712188969155, "grad_norm": 0.2832637429237366, "learning_rate": 0.0009969260212461192, "loss": 1.5901, "step": 421 }, { "epoch": 0.19355578488705424, "grad_norm": 0.21540392935276031, "learning_rate": 0.0009968985422162907, "loss": 1.0676, "step": 422 }, { "epoch": 0.19401444788441694, "grad_norm": 0.31164097785949707, "learning_rate": 0.0009968709412936669, "loss": 1.7977, "step": 423 }, { "epoch": 0.19447311088177963, "grad_norm": 0.3007836937904358, "learning_rate": 0.0009968432184850188, "loss": 1.9207, "step": 424 }, { "epoch": 0.1949317738791423, "grad_norm": 0.34054356813430786, "learning_rate": 0.0009968153737971463, "loss": 2.281, "step": 425 }, { "epoch": 0.19539043687650498, "grad_norm": 0.3539496660232544, "learning_rate": 0.0009967874072368808, "loss": 2.0088, "step": 426 }, { "epoch": 0.19584909987386767, "grad_norm": 0.4037904441356659, "learning_rate": 0.0009967593188110822, "loss": 2.0894, "step": 427 }, { "epoch": 0.19630776287123036, "grad_norm": 0.36563950777053833, "learning_rate": 0.0009967311085266408, "loss": 1.7298, "step": 428 }, { "epoch": 0.19676642586859305, "grad_norm": 0.4053013324737549, "learning_rate": 0.000996702776390477, "loss": 2.3652, "step": 429 }, { "epoch": 0.19722508886595574, "grad_norm": 0.24620242416858673, "learning_rate": 0.0009966743224095406, "loss": 1.1821, "step": 430 }, { "epoch": 0.19768375186331844, "grad_norm": 0.4127357006072998, "learning_rate": 0.0009966457465908117, "loss": 1.9592, "step": 431 }, { "epoch": 0.19814241486068113, "grad_norm": 0.13345493376255035, "learning_rate": 0.0009966170489413002, "loss": 0.6845, "step": 432 }, { "epoch": 0.1986010778580438, "grad_norm": 0.46981745958328247, "learning_rate": 0.000996588229468046, "loss": 2.3521, "step": 433 }, { "epoch": 0.19905974085540648, "grad_norm": 0.414408802986145, "learning_rate": 0.0009965592881781182, "loss": 1.7859, "step": 434 }, { "epoch": 0.19951840385276917, "grad_norm": 0.36975857615470886, "learning_rate": 0.0009965302250786168, "loss": 1.8119, "step": 435 }, { "epoch": 0.19997706685013186, "grad_norm": 0.29966479539871216, "learning_rate": 0.000996501040176671, "loss": 1.6156, "step": 436 }, { "epoch": 0.20043572984749455, "grad_norm": 0.3034079074859619, "learning_rate": 0.00099647173347944, "loss": 1.4443, "step": 437 }, { "epoch": 0.20089439284485724, "grad_norm": 0.23707164824008942, "learning_rate": 0.0009964423049941132, "loss": 1.1766, "step": 438 }, { "epoch": 0.20135305584221994, "grad_norm": 0.37824898958206177, "learning_rate": 0.0009964127547279094, "loss": 2.0259, "step": 439 }, { "epoch": 0.20181171883958263, "grad_norm": 0.26142311096191406, "learning_rate": 0.0009963830826880775, "loss": 1.3159, "step": 440 }, { "epoch": 0.20227038183694532, "grad_norm": 0.34730425477027893, "learning_rate": 0.0009963532888818962, "loss": 1.8903, "step": 441 }, { "epoch": 0.20272904483430798, "grad_norm": 0.505499541759491, "learning_rate": 0.0009963233733166744, "loss": 2.4998, "step": 442 }, { "epoch": 0.20318770783167067, "grad_norm": 0.38120755553245544, "learning_rate": 0.0009962933359997505, "loss": 1.8554, "step": 443 }, { "epoch": 0.20364637082903336, "grad_norm": 0.2800237536430359, "learning_rate": 0.0009962631769384928, "loss": 1.4182, "step": 444 }, { "epoch": 0.20410503382639605, "grad_norm": 0.35025089979171753, "learning_rate": 0.0009962328961402992, "loss": 1.7761, "step": 445 }, { "epoch": 0.20456369682375874, "grad_norm": 0.446393221616745, "learning_rate": 0.0009962024936125986, "loss": 2.626, "step": 446 }, { "epoch": 0.20502235982112144, "grad_norm": 0.26646509766578674, "learning_rate": 0.0009961719693628484, "loss": 1.2512, "step": 447 }, { "epoch": 0.20548102281848413, "grad_norm": 0.4339228868484497, "learning_rate": 0.0009961413233985363, "loss": 2.1897, "step": 448 }, { "epoch": 0.20593968581584682, "grad_norm": 0.31140047311782837, "learning_rate": 0.0009961105557271803, "loss": 1.3533, "step": 449 }, { "epoch": 0.20639834881320948, "grad_norm": 0.34898841381073, "learning_rate": 0.0009960796663563282, "loss": 1.6303, "step": 450 }, { "epoch": 0.20685701181057217, "grad_norm": 0.3299868106842041, "learning_rate": 0.0009960486552935566, "loss": 1.5021, "step": 451 }, { "epoch": 0.20731567480793486, "grad_norm": 0.3481556177139282, "learning_rate": 0.0009960175225464732, "loss": 1.6241, "step": 452 }, { "epoch": 0.20777433780529755, "grad_norm": 0.2556866407394409, "learning_rate": 0.0009959862681227151, "loss": 1.2629, "step": 453 }, { "epoch": 0.20823300080266025, "grad_norm": 0.35603269934654236, "learning_rate": 0.0009959548920299493, "loss": 1.5148, "step": 454 }, { "epoch": 0.20869166380002294, "grad_norm": 0.4437220096588135, "learning_rate": 0.0009959233942758725, "loss": 2.4865, "step": 455 }, { "epoch": 0.20915032679738563, "grad_norm": 0.5016736388206482, "learning_rate": 0.000995891774868211, "loss": 2.6392, "step": 456 }, { "epoch": 0.20960898979474832, "grad_norm": 0.32121479511260986, "learning_rate": 0.000995860033814722, "loss": 1.7887, "step": 457 }, { "epoch": 0.210067652792111, "grad_norm": 0.316918283700943, "learning_rate": 0.000995828171123191, "loss": 1.5659, "step": 458 }, { "epoch": 0.21052631578947367, "grad_norm": 0.2690139710903168, "learning_rate": 0.0009957961868014347, "loss": 1.4294, "step": 459 }, { "epoch": 0.21098497878683636, "grad_norm": 0.3223515450954437, "learning_rate": 0.0009957640808572988, "loss": 1.7502, "step": 460 }, { "epoch": 0.21144364178419905, "grad_norm": 0.267553448677063, "learning_rate": 0.0009957318532986593, "loss": 1.1748, "step": 461 }, { "epoch": 0.21190230478156175, "grad_norm": 0.30810901522636414, "learning_rate": 0.0009956995041334215, "loss": 1.6794, "step": 462 }, { "epoch": 0.21236096777892444, "grad_norm": 0.33142194151878357, "learning_rate": 0.0009956670333695215, "loss": 1.7014, "step": 463 }, { "epoch": 0.21281963077628713, "grad_norm": 0.270020455121994, "learning_rate": 0.0009956344410149242, "loss": 1.2939, "step": 464 }, { "epoch": 0.21327829377364982, "grad_norm": 0.39550352096557617, "learning_rate": 0.0009956017270776247, "loss": 1.9753, "step": 465 }, { "epoch": 0.2137369567710125, "grad_norm": 0.3066462278366089, "learning_rate": 0.0009955688915656482, "loss": 1.4247, "step": 466 }, { "epoch": 0.21419561976837517, "grad_norm": 0.3493700325489044, "learning_rate": 0.0009955359344870495, "loss": 1.6978, "step": 467 }, { "epoch": 0.21465428276573786, "grad_norm": 0.2585456073284149, "learning_rate": 0.0009955028558499125, "loss": 1.2059, "step": 468 }, { "epoch": 0.21511294576310055, "grad_norm": 0.31552475690841675, "learning_rate": 0.0009954696556623529, "loss": 1.6361, "step": 469 }, { "epoch": 0.21557160876046325, "grad_norm": 0.31127098202705383, "learning_rate": 0.0009954363339325137, "loss": 1.5196, "step": 470 }, { "epoch": 0.21603027175782594, "grad_norm": 0.4300401210784912, "learning_rate": 0.0009954028906685698, "loss": 2.5085, "step": 471 }, { "epoch": 0.21648893475518863, "grad_norm": 0.08116165548563004, "learning_rate": 0.0009953693258787247, "loss": 0.5775, "step": 472 }, { "epoch": 0.21694759775255132, "grad_norm": 0.3769618272781372, "learning_rate": 0.0009953356395712121, "loss": 2.0813, "step": 473 }, { "epoch": 0.217406260749914, "grad_norm": 0.3906650245189667, "learning_rate": 0.0009953018317542957, "loss": 2.0854, "step": 474 }, { "epoch": 0.2178649237472767, "grad_norm": 0.24557356536388397, "learning_rate": 0.0009952679024362685, "loss": 1.2442, "step": 475 }, { "epoch": 0.21832358674463936, "grad_norm": 0.3962481915950775, "learning_rate": 0.000995233851625454, "loss": 2.356, "step": 476 }, { "epoch": 0.21878224974200206, "grad_norm": 0.2204056829214096, "learning_rate": 0.0009951996793302047, "loss": 0.9941, "step": 477 }, { "epoch": 0.21924091273936475, "grad_norm": 0.07944503426551819, "learning_rate": 0.0009951653855589035, "loss": 0.4608, "step": 478 }, { "epoch": 0.21969957573672744, "grad_norm": 0.2788356840610504, "learning_rate": 0.000995130970319963, "loss": 1.2136, "step": 479 }, { "epoch": 0.22015823873409013, "grad_norm": 0.10177599638700485, "learning_rate": 0.000995096433621825, "loss": 0.5138, "step": 480 }, { "epoch": 0.22061690173145282, "grad_norm": 0.19846868515014648, "learning_rate": 0.0009950617754729625, "loss": 1.0419, "step": 481 }, { "epoch": 0.2210755647288155, "grad_norm": 0.3579308092594147, "learning_rate": 0.0009950269958818767, "loss": 1.4526, "step": 482 }, { "epoch": 0.2215342277261782, "grad_norm": 0.36856117844581604, "learning_rate": 0.0009949920948570995, "loss": 1.7526, "step": 483 }, { "epoch": 0.2219928907235409, "grad_norm": 0.32889747619628906, "learning_rate": 0.0009949570724071923, "loss": 1.4898, "step": 484 }, { "epoch": 0.22245155372090356, "grad_norm": 0.4301432967185974, "learning_rate": 0.0009949219285407464, "loss": 1.9407, "step": 485 }, { "epoch": 0.22291021671826625, "grad_norm": 0.3047052323818207, "learning_rate": 0.000994886663266383, "loss": 1.3376, "step": 486 }, { "epoch": 0.22336887971562894, "grad_norm": 0.27812737226486206, "learning_rate": 0.0009948512765927525, "loss": 1.2351, "step": 487 }, { "epoch": 0.22382754271299163, "grad_norm": 0.34248921275138855, "learning_rate": 0.0009948157685285362, "loss": 1.8019, "step": 488 }, { "epoch": 0.22428620571035432, "grad_norm": 0.295585036277771, "learning_rate": 0.0009947801390824437, "loss": 1.5117, "step": 489 }, { "epoch": 0.224744868707717, "grad_norm": 0.3885442912578583, "learning_rate": 0.000994744388263216, "loss": 1.9371, "step": 490 }, { "epoch": 0.2252035317050797, "grad_norm": 0.5128207206726074, "learning_rate": 0.0009947085160796221, "loss": 1.957, "step": 491 }, { "epoch": 0.2256621947024424, "grad_norm": 0.3117857873439789, "learning_rate": 0.0009946725225404623, "loss": 1.6161, "step": 492 }, { "epoch": 0.22612085769980506, "grad_norm": 0.3748393952846527, "learning_rate": 0.000994636407654566, "loss": 2.0451, "step": 493 }, { "epoch": 0.22657952069716775, "grad_norm": 0.4382317364215851, "learning_rate": 0.0009946001714307923, "loss": 2.3245, "step": 494 }, { "epoch": 0.22703818369453044, "grad_norm": 0.4299793243408203, "learning_rate": 0.0009945638138780305, "loss": 2.5256, "step": 495 }, { "epoch": 0.22749684669189313, "grad_norm": 0.36116522550582886, "learning_rate": 0.000994527335005199, "loss": 1.8933, "step": 496 }, { "epoch": 0.22795550968925582, "grad_norm": 0.39556559920310974, "learning_rate": 0.0009944907348212464, "loss": 1.9866, "step": 497 }, { "epoch": 0.2284141726866185, "grad_norm": 0.2519306242465973, "learning_rate": 0.0009944540133351512, "loss": 1.0477, "step": 498 }, { "epoch": 0.2288728356839812, "grad_norm": 0.23983240127563477, "learning_rate": 0.0009944171705559214, "loss": 0.9459, "step": 499 }, { "epoch": 0.2293314986813439, "grad_norm": 0.2413322627544403, "learning_rate": 0.0009943802064925947, "loss": 0.9827, "step": 500 }, { "epoch": 0.22979016167870658, "grad_norm": 0.48764920234680176, "learning_rate": 0.0009943431211542387, "loss": 2.16, "step": 501 }, { "epoch": 0.23024882467606925, "grad_norm": 0.13628147542476654, "learning_rate": 0.0009943059145499508, "loss": 0.7079, "step": 502 }, { "epoch": 0.23070748767343194, "grad_norm": 0.3403797447681427, "learning_rate": 0.0009942685866888576, "loss": 1.6746, "step": 503 }, { "epoch": 0.23116615067079463, "grad_norm": 0.4511934518814087, "learning_rate": 0.0009942311375801163, "loss": 1.9286, "step": 504 }, { "epoch": 0.23162481366815732, "grad_norm": 0.3697836399078369, "learning_rate": 0.0009941935672329136, "loss": 1.4048, "step": 505 }, { "epoch": 0.23208347666552, "grad_norm": 0.30644330382347107, "learning_rate": 0.0009941558756564653, "loss": 1.3854, "step": 506 }, { "epoch": 0.2325421396628827, "grad_norm": 0.2954138517379761, "learning_rate": 0.0009941180628600178, "loss": 1.3652, "step": 507 }, { "epoch": 0.2330008026602454, "grad_norm": 0.30790993571281433, "learning_rate": 0.0009940801288528466, "loss": 1.447, "step": 508 }, { "epoch": 0.23345946565760808, "grad_norm": 0.37096887826919556, "learning_rate": 0.0009940420736442575, "loss": 1.4563, "step": 509 }, { "epoch": 0.23391812865497075, "grad_norm": 0.32588744163513184, "learning_rate": 0.0009940038972435852, "loss": 1.4545, "step": 510 }, { "epoch": 0.23437679165233344, "grad_norm": 0.30430012941360474, "learning_rate": 0.000993965599660195, "loss": 1.509, "step": 511 }, { "epoch": 0.23483545464969613, "grad_norm": 0.3035714626312256, "learning_rate": 0.0009939271809034818, "loss": 1.5743, "step": 512 }, { "epoch": 0.23529411764705882, "grad_norm": 0.21552062034606934, "learning_rate": 0.0009938886409828695, "loss": 0.9872, "step": 513 }, { "epoch": 0.2357527806444215, "grad_norm": 0.30967265367507935, "learning_rate": 0.0009938499799078124, "loss": 1.5457, "step": 514 }, { "epoch": 0.2362114436417842, "grad_norm": 0.47051116824150085, "learning_rate": 0.0009938111976877946, "loss": 2.3921, "step": 515 }, { "epoch": 0.2366701066391469, "grad_norm": 0.24845226109027863, "learning_rate": 0.0009937722943323293, "loss": 1.0681, "step": 516 }, { "epoch": 0.23712876963650958, "grad_norm": 0.28985902667045593, "learning_rate": 0.0009937332698509598, "loss": 1.3932, "step": 517 }, { "epoch": 0.23758743263387228, "grad_norm": 0.37183916568756104, "learning_rate": 0.0009936941242532594, "loss": 1.9424, "step": 518 }, { "epoch": 0.23804609563123494, "grad_norm": 0.32926613092422485, "learning_rate": 0.0009936548575488306, "loss": 1.8109, "step": 519 }, { "epoch": 0.23850475862859763, "grad_norm": 0.3398732542991638, "learning_rate": 0.0009936154697473057, "loss": 1.7718, "step": 520 }, { "epoch": 0.23896342162596032, "grad_norm": 0.36996906995773315, "learning_rate": 0.0009935759608583471, "loss": 1.7703, "step": 521 }, { "epoch": 0.239422084623323, "grad_norm": 0.10577350109815598, "learning_rate": 0.0009935363308916463, "loss": 0.5741, "step": 522 }, { "epoch": 0.2398807476206857, "grad_norm": 0.4257866144180298, "learning_rate": 0.0009934965798569248, "loss": 2.071, "step": 523 }, { "epoch": 0.2403394106180484, "grad_norm": 0.23754611611366272, "learning_rate": 0.0009934567077639342, "loss": 1.2965, "step": 524 }, { "epoch": 0.24079807361541108, "grad_norm": 0.2694760859012604, "learning_rate": 0.0009934167146224552, "loss": 1.4501, "step": 525 }, { "epoch": 0.24125673661277378, "grad_norm": 0.35148951411247253, "learning_rate": 0.0009933766004422983, "loss": 1.674, "step": 526 }, { "epoch": 0.24171539961013644, "grad_norm": 0.3345791697502136, "learning_rate": 0.000993336365233304, "loss": 1.4195, "step": 527 }, { "epoch": 0.24217406260749913, "grad_norm": 0.15900827944278717, "learning_rate": 0.0009932960090053417, "loss": 0.7744, "step": 528 }, { "epoch": 0.24263272560486182, "grad_norm": 0.358396977186203, "learning_rate": 0.000993255531768312, "loss": 1.766, "step": 529 }, { "epoch": 0.2430913886022245, "grad_norm": 0.35677099227905273, "learning_rate": 0.0009932149335321438, "loss": 1.7595, "step": 530 }, { "epoch": 0.2435500515995872, "grad_norm": 0.19825054705142975, "learning_rate": 0.000993174214306796, "loss": 0.913, "step": 531 }, { "epoch": 0.2440087145969499, "grad_norm": 0.2745889723300934, "learning_rate": 0.0009931333741022574, "loss": 1.4666, "step": 532 }, { "epoch": 0.24446737759431258, "grad_norm": 0.2890368700027466, "learning_rate": 0.0009930924129285465, "loss": 1.6223, "step": 533 }, { "epoch": 0.24492604059167528, "grad_norm": 0.21782201528549194, "learning_rate": 0.0009930513307957114, "loss": 1.2415, "step": 534 }, { "epoch": 0.24538470358903797, "grad_norm": 0.3002353310585022, "learning_rate": 0.0009930101277138299, "loss": 1.6741, "step": 535 }, { "epoch": 0.24584336658640063, "grad_norm": 0.21679870784282684, "learning_rate": 0.000992968803693009, "loss": 1.2902, "step": 536 }, { "epoch": 0.24630202958376332, "grad_norm": 0.4101315140724182, "learning_rate": 0.0009929273587433863, "loss": 1.9695, "step": 537 }, { "epoch": 0.246760692581126, "grad_norm": 0.355189710855484, "learning_rate": 0.0009928857928751282, "loss": 1.7978, "step": 538 }, { "epoch": 0.2472193555784887, "grad_norm": 0.2238903045654297, "learning_rate": 0.0009928441060984311, "loss": 1.2032, "step": 539 }, { "epoch": 0.2476780185758514, "grad_norm": 0.31160059571266174, "learning_rate": 0.0009928022984235214, "loss": 1.6759, "step": 540 }, { "epoch": 0.24813668157321409, "grad_norm": 0.3302862346172333, "learning_rate": 0.0009927603698606543, "loss": 1.7676, "step": 541 }, { "epoch": 0.24859534457057678, "grad_norm": 0.27699384093284607, "learning_rate": 0.0009927183204201159, "loss": 1.348, "step": 542 }, { "epoch": 0.24905400756793947, "grad_norm": 0.33498769998550415, "learning_rate": 0.0009926761501122205, "loss": 1.6743, "step": 543 }, { "epoch": 0.24951267056530213, "grad_norm": 0.2707799971103668, "learning_rate": 0.0009926338589473134, "loss": 1.3558, "step": 544 }, { "epoch": 0.24997133356266482, "grad_norm": 0.2782471776008606, "learning_rate": 0.0009925914469357685, "loss": 1.2665, "step": 545 }, { "epoch": 0.2504299965600275, "grad_norm": 0.4235142171382904, "learning_rate": 0.0009925489140879898, "loss": 2.0011, "step": 546 }, { "epoch": 0.2508886595573902, "grad_norm": 0.3951006233692169, "learning_rate": 0.000992506260414411, "loss": 1.7933, "step": 547 }, { "epoch": 0.2513473225547529, "grad_norm": 0.26279470324516296, "learning_rate": 0.0009924634859254952, "loss": 1.0981, "step": 548 }, { "epoch": 0.2518059855521156, "grad_norm": 0.3145405650138855, "learning_rate": 0.000992420590631736, "loss": 1.5393, "step": 549 }, { "epoch": 0.2522646485494783, "grad_norm": 0.2764291763305664, "learning_rate": 0.0009923775745436549, "loss": 1.2642, "step": 550 }, { "epoch": 0.25272331154684097, "grad_norm": 0.3677400052547455, "learning_rate": 0.0009923344376718047, "loss": 1.1807, "step": 551 }, { "epoch": 0.25318197454420366, "grad_norm": 0.4418051242828369, "learning_rate": 0.000992291180026767, "loss": 2.4766, "step": 552 }, { "epoch": 0.25364063754156635, "grad_norm": 0.3161998391151428, "learning_rate": 0.0009922478016191531, "loss": 1.3802, "step": 553 }, { "epoch": 0.25409930053892904, "grad_norm": 0.39167219400405884, "learning_rate": 0.0009922043024596045, "loss": 2.0244, "step": 554 }, { "epoch": 0.25455796353629173, "grad_norm": 0.3040946125984192, "learning_rate": 0.0009921606825587913, "loss": 1.6199, "step": 555 }, { "epoch": 0.2550166265336544, "grad_norm": 0.22090859711170197, "learning_rate": 0.000992116941927414, "loss": 1.0001, "step": 556 }, { "epoch": 0.25547528953101706, "grad_norm": 0.29208970069885254, "learning_rate": 0.0009920730805762026, "loss": 1.3642, "step": 557 }, { "epoch": 0.25593395252837975, "grad_norm": 0.3597604036331177, "learning_rate": 0.0009920290985159165, "loss": 1.6378, "step": 558 }, { "epoch": 0.25639261552574244, "grad_norm": 0.36907076835632324, "learning_rate": 0.0009919849957573449, "loss": 2.0097, "step": 559 }, { "epoch": 0.25685127852310513, "grad_norm": 0.5593513250350952, "learning_rate": 0.0009919407723113062, "loss": 2.4558, "step": 560 }, { "epoch": 0.2573099415204678, "grad_norm": 0.38623642921447754, "learning_rate": 0.0009918964281886492, "loss": 1.9032, "step": 561 }, { "epoch": 0.2577686045178305, "grad_norm": 0.3310829997062683, "learning_rate": 0.0009918519634002515, "loss": 1.403, "step": 562 }, { "epoch": 0.2582272675151932, "grad_norm": 0.3338778018951416, "learning_rate": 0.0009918073779570206, "loss": 1.7481, "step": 563 }, { "epoch": 0.2586859305125559, "grad_norm": 0.32507607340812683, "learning_rate": 0.000991762671869894, "loss": 1.4885, "step": 564 }, { "epoch": 0.2591445935099186, "grad_norm": 0.13469289243221283, "learning_rate": 0.0009917178451498382, "loss": 0.7271, "step": 565 }, { "epoch": 0.2596032565072813, "grad_norm": 0.28605028986930847, "learning_rate": 0.0009916728978078494, "loss": 1.3297, "step": 566 }, { "epoch": 0.26006191950464397, "grad_norm": 0.4288422465324402, "learning_rate": 0.0009916278298549538, "loss": 2.4978, "step": 567 }, { "epoch": 0.26052058250200666, "grad_norm": 0.2774169445037842, "learning_rate": 0.0009915826413022064, "loss": 1.5432, "step": 568 }, { "epoch": 0.26097924549936935, "grad_norm": 0.23148323595523834, "learning_rate": 0.0009915373321606928, "loss": 1.2222, "step": 569 }, { "epoch": 0.26143790849673204, "grad_norm": 0.4910981059074402, "learning_rate": 0.0009914919024415275, "loss": 2.5752, "step": 570 }, { "epoch": 0.26189657149409473, "grad_norm": 0.2878377139568329, "learning_rate": 0.0009914463521558546, "loss": 1.4696, "step": 571 }, { "epoch": 0.2623552344914574, "grad_norm": 0.24708285927772522, "learning_rate": 0.000991400681314848, "loss": 1.3876, "step": 572 }, { "epoch": 0.2628138974888201, "grad_norm": 0.38844871520996094, "learning_rate": 0.0009913548899297111, "loss": 2.4133, "step": 573 }, { "epoch": 0.26327256048618275, "grad_norm": 0.3721879720687866, "learning_rate": 0.0009913089780116768, "loss": 1.9523, "step": 574 }, { "epoch": 0.26373122348354544, "grad_norm": 0.304868221282959, "learning_rate": 0.000991262945572008, "loss": 1.7794, "step": 575 }, { "epoch": 0.26418988648090813, "grad_norm": 0.24861812591552734, "learning_rate": 0.000991216792621996, "loss": 1.2682, "step": 576 }, { "epoch": 0.2646485494782708, "grad_norm": 0.44407734274864197, "learning_rate": 0.0009911705191729633, "loss": 2.2016, "step": 577 }, { "epoch": 0.2651072124756335, "grad_norm": 0.33282753825187683, "learning_rate": 0.0009911241252362603, "loss": 1.6449, "step": 578 }, { "epoch": 0.2655658754729962, "grad_norm": 0.3493805527687073, "learning_rate": 0.0009910776108232686, "loss": 1.6497, "step": 579 }, { "epoch": 0.2660245384703589, "grad_norm": 0.36523211002349854, "learning_rate": 0.000991030975945398, "loss": 2.0684, "step": 580 }, { "epoch": 0.2664832014677216, "grad_norm": 0.5898191928863525, "learning_rate": 0.0009909842206140883, "loss": 1.5042, "step": 581 }, { "epoch": 0.2669418644650843, "grad_norm": 0.14400826394557953, "learning_rate": 0.0009909373448408092, "loss": 0.7091, "step": 582 }, { "epoch": 0.26740052746244697, "grad_norm": 0.36130577325820923, "learning_rate": 0.0009908903486370597, "loss": 1.7273, "step": 583 }, { "epoch": 0.26785919045980966, "grad_norm": 0.2697531282901764, "learning_rate": 0.000990843232014368, "loss": 1.4963, "step": 584 }, { "epoch": 0.26831785345717235, "grad_norm": 0.34319204092025757, "learning_rate": 0.0009907959949842925, "loss": 1.804, "step": 585 }, { "epoch": 0.26877651645453504, "grad_norm": 0.19325220584869385, "learning_rate": 0.0009907486375584204, "loss": 0.9886, "step": 586 }, { "epoch": 0.26923517945189773, "grad_norm": 0.23447225987911224, "learning_rate": 0.0009907011597483691, "loss": 1.2031, "step": 587 }, { "epoch": 0.2696938424492604, "grad_norm": 0.32570499181747437, "learning_rate": 0.0009906535615657852, "loss": 1.8141, "step": 588 }, { "epoch": 0.2701525054466231, "grad_norm": 0.40676188468933105, "learning_rate": 0.0009906058430223447, "loss": 2.1973, "step": 589 }, { "epoch": 0.2706111684439858, "grad_norm": 0.3603213429450989, "learning_rate": 0.0009905580041297537, "loss": 1.9151, "step": 590 }, { "epoch": 0.2710698314413485, "grad_norm": 0.3043905794620514, "learning_rate": 0.0009905100448997471, "loss": 1.5438, "step": 591 }, { "epoch": 0.27152849443871113, "grad_norm": 0.1927344799041748, "learning_rate": 0.0009904619653440898, "loss": 0.9269, "step": 592 }, { "epoch": 0.2719871574360738, "grad_norm": 0.29657939076423645, "learning_rate": 0.000990413765474576, "loss": 1.4634, "step": 593 }, { "epoch": 0.2724458204334365, "grad_norm": 0.1932910680770874, "learning_rate": 0.0009903654453030293, "loss": 1.029, "step": 594 }, { "epoch": 0.2729044834307992, "grad_norm": 0.21558885276317596, "learning_rate": 0.0009903170048413032, "loss": 1.0732, "step": 595 }, { "epoch": 0.2733631464281619, "grad_norm": 0.32209712266921997, "learning_rate": 0.0009902684441012804, "loss": 1.5956, "step": 596 }, { "epoch": 0.2738218094255246, "grad_norm": 0.36815398931503296, "learning_rate": 0.0009902197630948734, "loss": 1.8898, "step": 597 }, { "epoch": 0.2742804724228873, "grad_norm": 0.28202107548713684, "learning_rate": 0.0009901709618340237, "loss": 1.4518, "step": 598 }, { "epoch": 0.27473913542024997, "grad_norm": 0.36555325984954834, "learning_rate": 0.0009901220403307027, "loss": 1.9685, "step": 599 }, { "epoch": 0.27519779841761266, "grad_norm": 0.35351619124412537, "learning_rate": 0.0009900729985969114, "loss": 2.0413, "step": 600 }, { "epoch": 0.27565646141497535, "grad_norm": 0.2880600690841675, "learning_rate": 0.0009900238366446798, "loss": 1.527, "step": 601 }, { "epoch": 0.27611512441233804, "grad_norm": 0.24242261052131653, "learning_rate": 0.000989974554486068, "loss": 1.2521, "step": 602 }, { "epoch": 0.27657378740970073, "grad_norm": 0.3457260727882385, "learning_rate": 0.000989925152133165, "loss": 1.9675, "step": 603 }, { "epoch": 0.2770324504070634, "grad_norm": 0.337833046913147, "learning_rate": 0.0009898756295980897, "loss": 1.7602, "step": 604 }, { "epoch": 0.2774911134044261, "grad_norm": 0.6493378281593323, "learning_rate": 0.00098982598689299, "loss": 1.5513, "step": 605 }, { "epoch": 0.2779497764017888, "grad_norm": 0.34193724393844604, "learning_rate": 0.0009897762240300442, "loss": 1.8746, "step": 606 }, { "epoch": 0.2784084393991515, "grad_norm": 0.33631783723831177, "learning_rate": 0.000989726341021459, "loss": 1.6999, "step": 607 }, { "epoch": 0.2788671023965142, "grad_norm": 0.13958394527435303, "learning_rate": 0.0009896763378794712, "loss": 0.6808, "step": 608 }, { "epoch": 0.2793257653938768, "grad_norm": 0.2809588313102722, "learning_rate": 0.0009896262146163472, "loss": 1.3181, "step": 609 }, { "epoch": 0.2797844283912395, "grad_norm": 0.3490467965602875, "learning_rate": 0.0009895759712443821, "loss": 2.0132, "step": 610 }, { "epoch": 0.2802430913886022, "grad_norm": 0.3802107274532318, "learning_rate": 0.0009895256077759014, "loss": 2.2368, "step": 611 }, { "epoch": 0.2807017543859649, "grad_norm": 0.2700074017047882, "learning_rate": 0.0009894751242232596, "loss": 1.6661, "step": 612 }, { "epoch": 0.2811604173833276, "grad_norm": 0.3280056416988373, "learning_rate": 0.0009894245205988401, "loss": 1.6721, "step": 613 }, { "epoch": 0.2816190803806903, "grad_norm": 0.31935131549835205, "learning_rate": 0.0009893737969150572, "loss": 1.3081, "step": 614 }, { "epoch": 0.28207774337805297, "grad_norm": 0.4210816025733948, "learning_rate": 0.000989322953184353, "loss": 2.1636, "step": 615 }, { "epoch": 0.28253640637541566, "grad_norm": 0.38598984479904175, "learning_rate": 0.0009892719894192003, "loss": 2.2451, "step": 616 }, { "epoch": 0.28299506937277835, "grad_norm": 0.28689202666282654, "learning_rate": 0.0009892209056321009, "loss": 1.0069, "step": 617 }, { "epoch": 0.28345373237014104, "grad_norm": 0.23823553323745728, "learning_rate": 0.0009891697018355858, "loss": 1.0115, "step": 618 }, { "epoch": 0.28391239536750373, "grad_norm": 0.4259023666381836, "learning_rate": 0.0009891183780422158, "loss": 1.9298, "step": 619 }, { "epoch": 0.2843710583648664, "grad_norm": 0.3770276606082916, "learning_rate": 0.0009890669342645807, "loss": 1.7634, "step": 620 }, { "epoch": 0.2848297213622291, "grad_norm": 0.42291995882987976, "learning_rate": 0.0009890153705153005, "loss": 1.9407, "step": 621 }, { "epoch": 0.2852883843595918, "grad_norm": 0.35341787338256836, "learning_rate": 0.0009889636868070238, "loss": 1.7191, "step": 622 }, { "epoch": 0.2857470473569545, "grad_norm": 0.3987595736980438, "learning_rate": 0.000988911883152429, "loss": 2.0328, "step": 623 }, { "epoch": 0.2862057103543172, "grad_norm": 0.30864641070365906, "learning_rate": 0.0009888599595642244, "loss": 1.351, "step": 624 }, { "epoch": 0.2866643733516799, "grad_norm": 0.25297942757606506, "learning_rate": 0.0009888079160551466, "loss": 1.2824, "step": 625 }, { "epoch": 0.2871230363490425, "grad_norm": 0.4757532477378845, "learning_rate": 0.0009887557526379626, "loss": 2.8047, "step": 626 }, { "epoch": 0.2875816993464052, "grad_norm": 0.3067730665206909, "learning_rate": 0.0009887034693254683, "loss": 1.437, "step": 627 }, { "epoch": 0.2880403623437679, "grad_norm": 0.3403368294239044, "learning_rate": 0.0009886510661304892, "loss": 1.9881, "step": 628 }, { "epoch": 0.2884990253411306, "grad_norm": 0.21943096816539764, "learning_rate": 0.0009885985430658803, "loss": 1.1086, "step": 629 }, { "epoch": 0.2889576883384933, "grad_norm": 0.19929127395153046, "learning_rate": 0.000988545900144526, "loss": 1.0739, "step": 630 }, { "epoch": 0.28941635133585597, "grad_norm": 0.30291882157325745, "learning_rate": 0.0009884931373793395, "loss": 1.7326, "step": 631 }, { "epoch": 0.28987501433321866, "grad_norm": 0.23580683767795563, "learning_rate": 0.0009884402547832643, "loss": 1.3195, "step": 632 }, { "epoch": 0.29033367733058135, "grad_norm": 0.24452835321426392, "learning_rate": 0.0009883872523692727, "loss": 1.2024, "step": 633 }, { "epoch": 0.29079234032794404, "grad_norm": 0.3366321325302124, "learning_rate": 0.0009883341301503666, "loss": 1.7041, "step": 634 }, { "epoch": 0.29125100332530673, "grad_norm": 0.23907805979251862, "learning_rate": 0.0009882808881395773, "loss": 1.2118, "step": 635 }, { "epoch": 0.2917096663226694, "grad_norm": 0.28174489736557007, "learning_rate": 0.0009882275263499655, "loss": 1.494, "step": 636 }, { "epoch": 0.2921683293200321, "grad_norm": 0.2500317394733429, "learning_rate": 0.0009881740447946212, "loss": 1.379, "step": 637 }, { "epoch": 0.2926269923173948, "grad_norm": 0.3939465880393982, "learning_rate": 0.000988120443486664, "loss": 1.8527, "step": 638 }, { "epoch": 0.2930856553147575, "grad_norm": 0.2760303318500519, "learning_rate": 0.0009880667224392422, "loss": 1.5243, "step": 639 }, { "epoch": 0.2935443183121202, "grad_norm": 0.252908855676651, "learning_rate": 0.0009880128816655343, "loss": 1.0428, "step": 640 }, { "epoch": 0.2940029813094829, "grad_norm": 0.5029557943344116, "learning_rate": 0.0009879589211787478, "loss": 2.1211, "step": 641 }, { "epoch": 0.29446164430684557, "grad_norm": 0.3588217794895172, "learning_rate": 0.0009879048409921196, "loss": 1.4391, "step": 642 }, { "epoch": 0.2949203073042082, "grad_norm": 0.28557729721069336, "learning_rate": 0.000987850641118916, "loss": 1.3001, "step": 643 }, { "epoch": 0.2953789703015709, "grad_norm": 0.42318931221961975, "learning_rate": 0.0009877963215724323, "loss": 1.9521, "step": 644 }, { "epoch": 0.2958376332989336, "grad_norm": 0.22379803657531738, "learning_rate": 0.000987741882365994, "loss": 1.101, "step": 645 }, { "epoch": 0.2962962962962963, "grad_norm": 0.31854167580604553, "learning_rate": 0.000987687323512955, "loss": 1.4523, "step": 646 }, { "epoch": 0.29675495929365897, "grad_norm": 0.2017553597688675, "learning_rate": 0.0009876326450266992, "loss": 0.9974, "step": 647 }, { "epoch": 0.29721362229102166, "grad_norm": 0.19907866418361664, "learning_rate": 0.0009875778469206396, "loss": 0.9805, "step": 648 }, { "epoch": 0.29767228528838435, "grad_norm": 0.36655089259147644, "learning_rate": 0.0009875229292082184, "loss": 2.0823, "step": 649 }, { "epoch": 0.29813094828574704, "grad_norm": 0.30942440032958984, "learning_rate": 0.0009874678919029073, "loss": 1.5203, "step": 650 }, { "epoch": 0.29858961128310973, "grad_norm": 0.3907017707824707, "learning_rate": 0.0009874127350182078, "loss": 2.2189, "step": 651 }, { "epoch": 0.2990482742804724, "grad_norm": 0.3971099555492401, "learning_rate": 0.00098735745856765, "loss": 2.0348, "step": 652 }, { "epoch": 0.2995069372778351, "grad_norm": 0.24980095028877258, "learning_rate": 0.0009873020625647934, "loss": 1.2255, "step": 653 }, { "epoch": 0.2999656002751978, "grad_norm": 0.21592852473258972, "learning_rate": 0.0009872465470232273, "loss": 1.0114, "step": 654 }, { "epoch": 0.3004242632725605, "grad_norm": 0.3222113847732544, "learning_rate": 0.00098719091195657, "loss": 1.4415, "step": 655 }, { "epoch": 0.3008829262699232, "grad_norm": 0.3443264067173004, "learning_rate": 0.000987135157378469, "loss": 1.6269, "step": 656 }, { "epoch": 0.3013415892672859, "grad_norm": 0.32391127943992615, "learning_rate": 0.0009870792833026017, "loss": 1.5997, "step": 657 }, { "epoch": 0.30180025226464857, "grad_norm": 0.20386099815368652, "learning_rate": 0.000987023289742674, "loss": 1.1451, "step": 658 }, { "epoch": 0.30225891526201126, "grad_norm": 0.24229376018047333, "learning_rate": 0.0009869671767124217, "loss": 1.2804, "step": 659 }, { "epoch": 0.3027175782593739, "grad_norm": 0.3021294176578522, "learning_rate": 0.00098691094422561, "loss": 1.8578, "step": 660 }, { "epoch": 0.3031762412567366, "grad_norm": 0.33602842688560486, "learning_rate": 0.0009868545922960326, "loss": 1.9222, "step": 661 }, { "epoch": 0.3036349042540993, "grad_norm": 0.2946925759315491, "learning_rate": 0.0009867981209375134, "loss": 1.6532, "step": 662 }, { "epoch": 0.30409356725146197, "grad_norm": 0.2836867570877075, "learning_rate": 0.0009867415301639051, "loss": 1.5413, "step": 663 }, { "epoch": 0.30455223024882466, "grad_norm": 0.3318963050842285, "learning_rate": 0.00098668481998909, "loss": 1.9834, "step": 664 }, { "epoch": 0.30501089324618735, "grad_norm": 0.2361811101436615, "learning_rate": 0.0009866279904269793, "loss": 1.1826, "step": 665 }, { "epoch": 0.30546955624355004, "grad_norm": 0.20993195474147797, "learning_rate": 0.0009865710414915137, "loss": 1.0908, "step": 666 }, { "epoch": 0.30592821924091274, "grad_norm": 0.11743535846471786, "learning_rate": 0.0009865139731966633, "loss": 0.6957, "step": 667 }, { "epoch": 0.3063868822382754, "grad_norm": 0.4895835816860199, "learning_rate": 0.0009864567855564273, "loss": 2.6428, "step": 668 }, { "epoch": 0.3068455452356381, "grad_norm": 0.3493427038192749, "learning_rate": 0.0009863994785848343, "loss": 1.5343, "step": 669 }, { "epoch": 0.3073042082330008, "grad_norm": 0.36788055300712585, "learning_rate": 0.0009863420522959422, "loss": 1.7888, "step": 670 }, { "epoch": 0.3077628712303635, "grad_norm": 0.3058752715587616, "learning_rate": 0.0009862845067038378, "loss": 1.4943, "step": 671 }, { "epoch": 0.3082215342277262, "grad_norm": 0.29403895139694214, "learning_rate": 0.0009862268418226376, "loss": 1.4614, "step": 672 }, { "epoch": 0.3086801972250889, "grad_norm": 0.3854977488517761, "learning_rate": 0.0009861690576664873, "loss": 1.4861, "step": 673 }, { "epoch": 0.3091388602224516, "grad_norm": 0.404697984457016, "learning_rate": 0.0009861111542495617, "loss": 2.0234, "step": 674 }, { "epoch": 0.30959752321981426, "grad_norm": 0.3911391496658325, "learning_rate": 0.000986053131586065, "loss": 2.011, "step": 675 }, { "epoch": 0.31005618621717695, "grad_norm": 0.2827008068561554, "learning_rate": 0.0009859949896902304, "loss": 1.0028, "step": 676 }, { "epoch": 0.3105148492145396, "grad_norm": 0.3550345003604889, "learning_rate": 0.0009859367285763206, "loss": 1.6724, "step": 677 }, { "epoch": 0.3109735122119023, "grad_norm": 0.33875733613967896, "learning_rate": 0.0009858783482586276, "loss": 1.8332, "step": 678 }, { "epoch": 0.31143217520926497, "grad_norm": 0.3210250437259674, "learning_rate": 0.0009858198487514723, "loss": 1.4062, "step": 679 }, { "epoch": 0.31189083820662766, "grad_norm": 0.24080604314804077, "learning_rate": 0.0009857612300692052, "loss": 1.1792, "step": 680 }, { "epoch": 0.31234950120399035, "grad_norm": 0.2961960434913635, "learning_rate": 0.0009857024922262057, "loss": 1.3523, "step": 681 }, { "epoch": 0.31280816420135305, "grad_norm": 0.32249388098716736, "learning_rate": 0.0009856436352368829, "loss": 1.6275, "step": 682 }, { "epoch": 0.31326682719871574, "grad_norm": 0.23962727189064026, "learning_rate": 0.0009855846591156747, "loss": 1.3343, "step": 683 }, { "epoch": 0.3137254901960784, "grad_norm": 0.24461761116981506, "learning_rate": 0.0009855255638770485, "loss": 1.4974, "step": 684 }, { "epoch": 0.3141841531934411, "grad_norm": 0.36309170722961426, "learning_rate": 0.0009854663495355005, "loss": 1.7183, "step": 685 }, { "epoch": 0.3146428161908038, "grad_norm": 0.35972607135772705, "learning_rate": 0.0009854070161055566, "loss": 1.9509, "step": 686 }, { "epoch": 0.3151014791881665, "grad_norm": 0.3379707932472229, "learning_rate": 0.0009853475636017717, "loss": 1.9371, "step": 687 }, { "epoch": 0.3155601421855292, "grad_norm": 0.3258635401725769, "learning_rate": 0.0009852879920387299, "loss": 1.567, "step": 688 }, { "epoch": 0.3160188051828919, "grad_norm": 0.36305832862854004, "learning_rate": 0.0009852283014310445, "loss": 2.1206, "step": 689 }, { "epoch": 0.3164774681802546, "grad_norm": 0.3250496983528137, "learning_rate": 0.000985168491793358, "loss": 1.7597, "step": 690 }, { "epoch": 0.31693613117761726, "grad_norm": 0.39209747314453125, "learning_rate": 0.0009851085631403425, "loss": 2.2836, "step": 691 }, { "epoch": 0.31739479417497996, "grad_norm": 0.3178749978542328, "learning_rate": 0.0009850485154866983, "loss": 1.8535, "step": 692 }, { "epoch": 0.31785345717234265, "grad_norm": 0.39518505334854126, "learning_rate": 0.0009849883488471562, "loss": 1.8896, "step": 693 }, { "epoch": 0.3183121201697053, "grad_norm": 0.49922698736190796, "learning_rate": 0.000984928063236475, "loss": 1.8992, "step": 694 }, { "epoch": 0.318770783167068, "grad_norm": 0.3633042871952057, "learning_rate": 0.0009848676586694434, "loss": 1.8554, "step": 695 }, { "epoch": 0.31922944616443066, "grad_norm": 0.4339780807495117, "learning_rate": 0.000984807135160879, "loss": 2.4392, "step": 696 }, { "epoch": 0.31968810916179335, "grad_norm": 0.2694472074508667, "learning_rate": 0.0009847464927256288, "loss": 1.1097, "step": 697 }, { "epoch": 0.32014677215915605, "grad_norm": 0.3319615125656128, "learning_rate": 0.000984685731378569, "loss": 1.5832, "step": 698 }, { "epoch": 0.32060543515651874, "grad_norm": 0.39388328790664673, "learning_rate": 0.0009846248511346041, "loss": 2.2849, "step": 699 }, { "epoch": 0.3210640981538814, "grad_norm": 0.42792367935180664, "learning_rate": 0.0009845638520086688, "loss": 2.3538, "step": 700 }, { "epoch": 0.3215227611512441, "grad_norm": 0.4416462182998657, "learning_rate": 0.000984502734015727, "loss": 2.5254, "step": 701 }, { "epoch": 0.3219814241486068, "grad_norm": 0.3952227532863617, "learning_rate": 0.000984441497170771, "loss": 2.0428, "step": 702 }, { "epoch": 0.3224400871459695, "grad_norm": 0.2395290583372116, "learning_rate": 0.000984380141488823, "loss": 0.9864, "step": 703 }, { "epoch": 0.3228987501433322, "grad_norm": 0.4042597711086273, "learning_rate": 0.0009843186669849333, "loss": 1.9032, "step": 704 }, { "epoch": 0.3233574131406949, "grad_norm": 0.3238537013530731, "learning_rate": 0.0009842570736741827, "loss": 1.6617, "step": 705 }, { "epoch": 0.3238160761380576, "grad_norm": 0.3476428985595703, "learning_rate": 0.0009841953615716804, "loss": 1.761, "step": 706 }, { "epoch": 0.32427473913542026, "grad_norm": 0.47932443022727966, "learning_rate": 0.0009841335306925644, "loss": 2.4644, "step": 707 }, { "epoch": 0.32473340213278296, "grad_norm": 0.39882582426071167, "learning_rate": 0.0009840715810520027, "loss": 2.0681, "step": 708 }, { "epoch": 0.32519206513014565, "grad_norm": 0.3567412793636322, "learning_rate": 0.0009840095126651919, "loss": 1.4626, "step": 709 }, { "epoch": 0.32565072812750834, "grad_norm": 0.29540038108825684, "learning_rate": 0.0009839473255473575, "loss": 1.2916, "step": 710 }, { "epoch": 0.32610939112487103, "grad_norm": 0.35967186093330383, "learning_rate": 0.0009838850197137548, "loss": 1.5812, "step": 711 }, { "epoch": 0.32656805412223366, "grad_norm": 0.4076613485813141, "learning_rate": 0.0009838225951796678, "loss": 1.8511, "step": 712 }, { "epoch": 0.32702671711959636, "grad_norm": 0.37241417169570923, "learning_rate": 0.0009837600519604097, "loss": 1.3398, "step": 713 }, { "epoch": 0.32748538011695905, "grad_norm": 0.3827233612537384, "learning_rate": 0.0009836973900713226, "loss": 1.9355, "step": 714 }, { "epoch": 0.32794404311432174, "grad_norm": 0.3503935635089874, "learning_rate": 0.0009836346095277782, "loss": 1.5865, "step": 715 }, { "epoch": 0.32840270611168443, "grad_norm": 0.346055269241333, "learning_rate": 0.000983571710345177, "loss": 1.879, "step": 716 }, { "epoch": 0.3288613691090471, "grad_norm": 0.3340051770210266, "learning_rate": 0.0009835086925389484, "loss": 1.531, "step": 717 }, { "epoch": 0.3293200321064098, "grad_norm": 0.3591856360435486, "learning_rate": 0.0009834455561245512, "loss": 1.5611, "step": 718 }, { "epoch": 0.3297786951037725, "grad_norm": 0.08197461068630219, "learning_rate": 0.0009833823011174735, "loss": 0.45, "step": 719 }, { "epoch": 0.3302373581011352, "grad_norm": 0.35696059465408325, "learning_rate": 0.0009833189275332318, "loss": 2.2035, "step": 720 }, { "epoch": 0.3306960210984979, "grad_norm": 0.21588194370269775, "learning_rate": 0.0009832554353873721, "loss": 1.1911, "step": 721 }, { "epoch": 0.3311546840958606, "grad_norm": 0.313678503036499, "learning_rate": 0.00098319182469547, "loss": 1.956, "step": 722 }, { "epoch": 0.33161334709322327, "grad_norm": 0.3138997554779053, "learning_rate": 0.000983128095473129, "loss": 1.5287, "step": 723 }, { "epoch": 0.33207201009058596, "grad_norm": 0.38969993591308594, "learning_rate": 0.0009830642477359828, "loss": 2.4346, "step": 724 }, { "epoch": 0.33253067308794865, "grad_norm": 0.21810193359851837, "learning_rate": 0.0009830002814996935, "loss": 1.0224, "step": 725 }, { "epoch": 0.33298933608531134, "grad_norm": 0.11237790435552597, "learning_rate": 0.0009829361967799526, "loss": 0.6925, "step": 726 }, { "epoch": 0.33344799908267403, "grad_norm": 0.31989771127700806, "learning_rate": 0.0009828719935924804, "loss": 1.4384, "step": 727 }, { "epoch": 0.3339066620800367, "grad_norm": 0.4660812318325043, "learning_rate": 0.0009828076719530265, "loss": 2.5269, "step": 728 }, { "epoch": 0.33436532507739936, "grad_norm": 0.37333112955093384, "learning_rate": 0.0009827432318773694, "loss": 2.13, "step": 729 }, { "epoch": 0.33482398807476205, "grad_norm": 0.32906556129455566, "learning_rate": 0.000982678673381317, "loss": 1.5657, "step": 730 }, { "epoch": 0.33528265107212474, "grad_norm": 0.3006758689880371, "learning_rate": 0.0009826139964807055, "loss": 1.4011, "step": 731 }, { "epoch": 0.33574131406948743, "grad_norm": 0.3466986119747162, "learning_rate": 0.0009825492011914009, "loss": 2.008, "step": 732 }, { "epoch": 0.3361999770668501, "grad_norm": 0.29313912987709045, "learning_rate": 0.0009824842875292977, "loss": 1.337, "step": 733 }, { "epoch": 0.3366586400642128, "grad_norm": 0.3280053436756134, "learning_rate": 0.0009824192555103202, "loss": 1.6918, "step": 734 }, { "epoch": 0.3371173030615755, "grad_norm": 0.22816585004329681, "learning_rate": 0.0009823541051504207, "loss": 1.1973, "step": 735 }, { "epoch": 0.3375759660589382, "grad_norm": 0.3135545551776886, "learning_rate": 0.0009822888364655813, "loss": 1.6632, "step": 736 }, { "epoch": 0.3380346290563009, "grad_norm": 0.30442047119140625, "learning_rate": 0.0009822234494718128, "loss": 1.3535, "step": 737 }, { "epoch": 0.3384932920536636, "grad_norm": 0.2706790268421173, "learning_rate": 0.0009821579441851552, "loss": 1.0, "step": 738 }, { "epoch": 0.33895195505102627, "grad_norm": 0.39678725600242615, "learning_rate": 0.0009820923206216774, "loss": 1.9822, "step": 739 }, { "epoch": 0.33941061804838896, "grad_norm": 0.38112956285476685, "learning_rate": 0.0009820265787974772, "loss": 2.0559, "step": 740 }, { "epoch": 0.33986928104575165, "grad_norm": 0.32253459095954895, "learning_rate": 0.0009819607187286816, "loss": 1.389, "step": 741 }, { "epoch": 0.34032794404311434, "grad_norm": 0.34095335006713867, "learning_rate": 0.0009818947404314465, "loss": 1.7615, "step": 742 }, { "epoch": 0.34078660704047703, "grad_norm": 0.36802440881729126, "learning_rate": 0.0009818286439219571, "loss": 1.8613, "step": 743 }, { "epoch": 0.3412452700378397, "grad_norm": 0.415279746055603, "learning_rate": 0.0009817624292164271, "loss": 1.9945, "step": 744 }, { "epoch": 0.3417039330352024, "grad_norm": 0.3048365116119385, "learning_rate": 0.0009816960963310998, "loss": 1.6786, "step": 745 }, { "epoch": 0.34216259603256505, "grad_norm": 0.23415561020374298, "learning_rate": 0.0009816296452822464, "loss": 1.0875, "step": 746 }, { "epoch": 0.34262125902992774, "grad_norm": 0.3987458348274231, "learning_rate": 0.0009815630760861684, "loss": 1.94, "step": 747 }, { "epoch": 0.34307992202729043, "grad_norm": 0.31393012404441833, "learning_rate": 0.000981496388759196, "loss": 1.4587, "step": 748 }, { "epoch": 0.3435385850246531, "grad_norm": 0.29491788148880005, "learning_rate": 0.0009814295833176871, "loss": 1.3318, "step": 749 }, { "epoch": 0.3439972480220158, "grad_norm": 0.23884756863117218, "learning_rate": 0.0009813626597780305, "loss": 0.908, "step": 750 }, { "epoch": 0.3444559110193785, "grad_norm": 0.3067830502986908, "learning_rate": 0.0009812956181566425, "loss": 1.4077, "step": 751 }, { "epoch": 0.3449145740167412, "grad_norm": 0.42813050746917725, "learning_rate": 0.000981228458469969, "loss": 2.1209, "step": 752 }, { "epoch": 0.3453732370141039, "grad_norm": 0.20336052775382996, "learning_rate": 0.0009811611807344848, "loss": 0.9812, "step": 753 }, { "epoch": 0.3458319000114666, "grad_norm": 0.35509541630744934, "learning_rate": 0.0009810937849666938, "loss": 2.0477, "step": 754 }, { "epoch": 0.34629056300882927, "grad_norm": 0.3570297360420227, "learning_rate": 0.0009810262711831283, "loss": 2.0165, "step": 755 }, { "epoch": 0.34674922600619196, "grad_norm": 0.3037537932395935, "learning_rate": 0.0009809586394003502, "loss": 1.6942, "step": 756 }, { "epoch": 0.34720788900355465, "grad_norm": 0.30765625834465027, "learning_rate": 0.00098089088963495, "loss": 1.7054, "step": 757 }, { "epoch": 0.34766655200091734, "grad_norm": 0.3930741250514984, "learning_rate": 0.0009808230219035471, "loss": 2.2827, "step": 758 }, { "epoch": 0.34812521499828003, "grad_norm": 0.34178081154823303, "learning_rate": 0.00098075503622279, "loss": 1.7407, "step": 759 }, { "epoch": 0.3485838779956427, "grad_norm": 0.2623182237148285, "learning_rate": 0.0009806869326093562, "loss": 1.2565, "step": 760 }, { "epoch": 0.3490425409930054, "grad_norm": 0.4342869520187378, "learning_rate": 0.0009806187110799518, "loss": 2.2317, "step": 761 }, { "epoch": 0.3495012039903681, "grad_norm": 0.11686166375875473, "learning_rate": 0.0009805503716513121, "loss": 0.5233, "step": 762 }, { "epoch": 0.34995986698773074, "grad_norm": 0.3906189799308777, "learning_rate": 0.0009804819143402013, "loss": 1.9533, "step": 763 }, { "epoch": 0.35041852998509343, "grad_norm": 0.4378143548965454, "learning_rate": 0.0009804133391634125, "loss": 2.1091, "step": 764 }, { "epoch": 0.3508771929824561, "grad_norm": 0.2528671622276306, "learning_rate": 0.0009803446461377676, "loss": 1.0801, "step": 765 }, { "epoch": 0.3513358559798188, "grad_norm": 0.2157050520181656, "learning_rate": 0.0009802758352801177, "loss": 1.1054, "step": 766 }, { "epoch": 0.3517945189771815, "grad_norm": 0.4617224335670471, "learning_rate": 0.0009802069066073423, "loss": 2.3428, "step": 767 }, { "epoch": 0.3522531819745442, "grad_norm": 0.3335023820400238, "learning_rate": 0.0009801378601363502, "loss": 1.601, "step": 768 }, { "epoch": 0.3527118449719069, "grad_norm": 0.3270067870616913, "learning_rate": 0.000980068695884079, "loss": 1.4522, "step": 769 }, { "epoch": 0.3531705079692696, "grad_norm": 0.2701742947101593, "learning_rate": 0.0009799994138674955, "loss": 1.1839, "step": 770 }, { "epoch": 0.35362917096663227, "grad_norm": 0.3580704629421234, "learning_rate": 0.0009799300141035945, "loss": 1.8822, "step": 771 }, { "epoch": 0.35408783396399496, "grad_norm": 0.37125471234321594, "learning_rate": 0.0009798604966094007, "loss": 2.0035, "step": 772 }, { "epoch": 0.35454649696135765, "grad_norm": 0.3791847229003906, "learning_rate": 0.000979790861401967, "loss": 1.8893, "step": 773 }, { "epoch": 0.35500515995872034, "grad_norm": 0.3457506000995636, "learning_rate": 0.0009797211084983757, "loss": 1.3708, "step": 774 }, { "epoch": 0.35546382295608303, "grad_norm": 0.4049646854400635, "learning_rate": 0.0009796512379157375, "loss": 1.923, "step": 775 }, { "epoch": 0.3559224859534457, "grad_norm": 0.41086849570274353, "learning_rate": 0.000979581249671192, "loss": 1.848, "step": 776 }, { "epoch": 0.3563811489508084, "grad_norm": 0.32881003618240356, "learning_rate": 0.000979511143781908, "loss": 1.7467, "step": 777 }, { "epoch": 0.3568398119481711, "grad_norm": 0.3037266135215759, "learning_rate": 0.0009794409202650831, "loss": 1.6274, "step": 778 }, { "epoch": 0.3572984749455338, "grad_norm": 0.34774550795555115, "learning_rate": 0.0009793705791379436, "loss": 1.6964, "step": 779 }, { "epoch": 0.35775713794289643, "grad_norm": 0.19517025351524353, "learning_rate": 0.0009793001204177444, "loss": 1.1024, "step": 780 }, { "epoch": 0.3582158009402591, "grad_norm": 0.25842177867889404, "learning_rate": 0.0009792295441217699, "loss": 1.3525, "step": 781 }, { "epoch": 0.3586744639376218, "grad_norm": 0.2821122407913208, "learning_rate": 0.0009791588502673326, "loss": 1.2322, "step": 782 }, { "epoch": 0.3591331269349845, "grad_norm": 0.40477707982063293, "learning_rate": 0.0009790880388717746, "loss": 1.7263, "step": 783 }, { "epoch": 0.3595917899323472, "grad_norm": 0.39657363295555115, "learning_rate": 0.0009790171099524662, "loss": 2.3909, "step": 784 }, { "epoch": 0.3600504529297099, "grad_norm": 0.31579139828681946, "learning_rate": 0.0009789460635268067, "loss": 1.5805, "step": 785 }, { "epoch": 0.3605091159270726, "grad_norm": 0.4094010889530182, "learning_rate": 0.0009788748996122246, "loss": 2.2917, "step": 786 }, { "epoch": 0.36096777892443527, "grad_norm": 0.31713518500328064, "learning_rate": 0.0009788036182261767, "loss": 1.483, "step": 787 }, { "epoch": 0.36142644192179796, "grad_norm": 0.38277119398117065, "learning_rate": 0.000978732219386149, "loss": 2.1552, "step": 788 }, { "epoch": 0.36188510491916065, "grad_norm": 0.2237308770418167, "learning_rate": 0.000978660703109656, "loss": 1.2989, "step": 789 }, { "epoch": 0.36234376791652334, "grad_norm": 0.32253265380859375, "learning_rate": 0.0009785890694142413, "loss": 1.4193, "step": 790 }, { "epoch": 0.36280243091388603, "grad_norm": 0.28654542565345764, "learning_rate": 0.0009785173183174767, "loss": 0.8873, "step": 791 }, { "epoch": 0.3632610939112487, "grad_norm": 0.313298761844635, "learning_rate": 0.000978445449836964, "loss": 1.4503, "step": 792 }, { "epoch": 0.3637197569086114, "grad_norm": 0.3323945701122284, "learning_rate": 0.0009783734639903325, "loss": 1.4465, "step": 793 }, { "epoch": 0.3641784199059741, "grad_norm": 0.34506741166114807, "learning_rate": 0.0009783013607952411, "loss": 1.4567, "step": 794 }, { "epoch": 0.3646370829033368, "grad_norm": 0.36164945363998413, "learning_rate": 0.000978229140269377, "loss": 1.8277, "step": 795 }, { "epoch": 0.3650957459006995, "grad_norm": 0.40301600098609924, "learning_rate": 0.0009781568024304567, "loss": 1.7805, "step": 796 }, { "epoch": 0.3655544088980621, "grad_norm": 0.2755573093891144, "learning_rate": 0.0009780843472962251, "loss": 1.461, "step": 797 }, { "epoch": 0.3660130718954248, "grad_norm": 0.3744322657585144, "learning_rate": 0.000978011774884456, "loss": 2.009, "step": 798 }, { "epoch": 0.3664717348927875, "grad_norm": 0.4854291081428528, "learning_rate": 0.0009779390852129519, "loss": 1.8751, "step": 799 }, { "epoch": 0.3669303978901502, "grad_norm": 0.21938319504261017, "learning_rate": 0.000977866278299544, "loss": 0.996, "step": 800 }, { "epoch": 0.3673890608875129, "grad_norm": 0.3286993205547333, "learning_rate": 0.0009777933541620925, "loss": 1.4633, "step": 801 }, { "epoch": 0.3678477238848756, "grad_norm": 0.3668621778488159, "learning_rate": 0.000977720312818486, "loss": 1.9308, "step": 802 }, { "epoch": 0.36830638688223827, "grad_norm": 0.36969929933547974, "learning_rate": 0.0009776471542866424, "loss": 1.9707, "step": 803 }, { "epoch": 0.36876504987960096, "grad_norm": 0.22716723382472992, "learning_rate": 0.0009775738785845077, "loss": 0.9617, "step": 804 }, { "epoch": 0.36922371287696365, "grad_norm": 0.1840977519750595, "learning_rate": 0.0009775004857300572, "loss": 1.0723, "step": 805 }, { "epoch": 0.36968237587432634, "grad_norm": 0.2663559019565582, "learning_rate": 0.0009774269757412944, "loss": 1.6182, "step": 806 }, { "epoch": 0.37014103887168903, "grad_norm": 0.22786211967468262, "learning_rate": 0.0009773533486362523, "loss": 1.11, "step": 807 }, { "epoch": 0.3705997018690517, "grad_norm": 0.10840432345867157, "learning_rate": 0.0009772796044329917, "loss": 0.6749, "step": 808 }, { "epoch": 0.3710583648664144, "grad_norm": 0.3396431803703308, "learning_rate": 0.000977205743149603, "loss": 2.0026, "step": 809 }, { "epoch": 0.3715170278637771, "grad_norm": 0.34386134147644043, "learning_rate": 0.0009771317648042043, "loss": 2.0295, "step": 810 }, { "epoch": 0.3719756908611398, "grad_norm": 0.22869761288166046, "learning_rate": 0.0009770576694149435, "loss": 1.061, "step": 811 }, { "epoch": 0.3724343538585025, "grad_norm": 0.20139172673225403, "learning_rate": 0.000976983456999997, "loss": 0.9954, "step": 812 }, { "epoch": 0.3728930168558652, "grad_norm": 0.21274548768997192, "learning_rate": 0.0009769091275775687, "loss": 1.0131, "step": 813 }, { "epoch": 0.3733516798532278, "grad_norm": 0.4451286196708679, "learning_rate": 0.0009768346811658932, "loss": 2.5039, "step": 814 }, { "epoch": 0.3738103428505905, "grad_norm": 0.2519133388996124, "learning_rate": 0.0009767601177832319, "loss": 1.1851, "step": 815 }, { "epoch": 0.3742690058479532, "grad_norm": 0.36184462904930115, "learning_rate": 0.0009766854374478765, "loss": 1.9926, "step": 816 }, { "epoch": 0.3747276688453159, "grad_norm": 0.2603318393230438, "learning_rate": 0.0009766106401781459, "loss": 1.3791, "step": 817 }, { "epoch": 0.3751863318426786, "grad_norm": 0.26677289605140686, "learning_rate": 0.0009765357259923885, "loss": 1.2404, "step": 818 }, { "epoch": 0.37564499484004127, "grad_norm": 0.1966245174407959, "learning_rate": 0.000976460694908982, "loss": 1.0012, "step": 819 }, { "epoch": 0.37610365783740396, "grad_norm": 0.24759866297245026, "learning_rate": 0.0009763855469463314, "loss": 1.4005, "step": 820 }, { "epoch": 0.37656232083476665, "grad_norm": 0.387797474861145, "learning_rate": 0.000976310282122871, "loss": 2.082, "step": 821 }, { "epoch": 0.37702098383212934, "grad_norm": 0.3424549698829651, "learning_rate": 0.0009762349004570644, "loss": 1.84, "step": 822 }, { "epoch": 0.37747964682949203, "grad_norm": 0.35073143243789673, "learning_rate": 0.0009761594019674027, "loss": 1.981, "step": 823 }, { "epoch": 0.3779383098268547, "grad_norm": 0.11143548041582108, "learning_rate": 0.0009760837866724063, "loss": 0.6522, "step": 824 }, { "epoch": 0.3783969728242174, "grad_norm": 0.2800116539001465, "learning_rate": 0.0009760080545906244, "loss": 1.4713, "step": 825 }, { "epoch": 0.3788556358215801, "grad_norm": 0.365592896938324, "learning_rate": 0.0009759322057406346, "loss": 2.3906, "step": 826 }, { "epoch": 0.3793142988189428, "grad_norm": 0.3204135000705719, "learning_rate": 0.0009758562401410429, "loss": 1.6213, "step": 827 }, { "epoch": 0.3797729618163055, "grad_norm": 0.32533779740333557, "learning_rate": 0.0009757801578104846, "loss": 1.8047, "step": 828 }, { "epoch": 0.3802316248136682, "grad_norm": 0.30973535776138306, "learning_rate": 0.0009757039587676229, "loss": 1.8927, "step": 829 }, { "epoch": 0.38069028781103087, "grad_norm": 0.36675432324409485, "learning_rate": 0.0009756276430311501, "loss": 1.9817, "step": 830 }, { "epoch": 0.3811489508083935, "grad_norm": 0.5217725038528442, "learning_rate": 0.000975551210619787, "loss": 1.6754, "step": 831 }, { "epoch": 0.3816076138057562, "grad_norm": 0.22404442727565765, "learning_rate": 0.0009754746615522832, "loss": 1.0305, "step": 832 }, { "epoch": 0.3820662768031189, "grad_norm": 1.0014019012451172, "learning_rate": 0.0009753979958474164, "loss": 1.6633, "step": 833 }, { "epoch": 0.3825249398004816, "grad_norm": 6.931515216827393, "learning_rate": 0.0009753212135239935, "loss": 2.2435, "step": 834 }, { "epoch": 0.38298360279784427, "grad_norm": 4.728778839111328, "learning_rate": 0.0009752443146008496, "loss": 2.3665, "step": 835 }, { "epoch": 0.38344226579520696, "grad_norm": 0.5018776655197144, "learning_rate": 0.0009751672990968486, "loss": 1.4681, "step": 836 }, { "epoch": 0.38390092879256965, "grad_norm": 1.0198359489440918, "learning_rate": 0.0009750901670308831, "loss": 2.5583, "step": 837 }, { "epoch": 0.38435959178993234, "grad_norm": 0.3947281241416931, "learning_rate": 0.0009750129184218741, "loss": 1.788, "step": 838 }, { "epoch": 0.38481825478729503, "grad_norm": 0.4941509962081909, "learning_rate": 0.0009749355532887712, "loss": 1.4786, "step": 839 }, { "epoch": 0.3852769177846577, "grad_norm": 0.20619411766529083, "learning_rate": 0.0009748580716505523, "loss": 0.8842, "step": 840 }, { "epoch": 0.3857355807820204, "grad_norm": 0.4519241750240326, "learning_rate": 0.0009747804735262249, "loss": 1.1354, "step": 841 }, { "epoch": 0.3861942437793831, "grad_norm": 0.5304630398750305, "learning_rate": 0.0009747027589348239, "loss": 2.087, "step": 842 }, { "epoch": 0.3866529067767458, "grad_norm": 0.4589567184448242, "learning_rate": 0.0009746249278954134, "loss": 1.955, "step": 843 }, { "epoch": 0.3871115697741085, "grad_norm": 0.4906143248081207, "learning_rate": 0.0009745469804270857, "loss": 2.0344, "step": 844 }, { "epoch": 0.3875702327714712, "grad_norm": 0.330793172121048, "learning_rate": 0.0009744689165489621, "loss": 1.4565, "step": 845 }, { "epoch": 0.38802889576883387, "grad_norm": 0.3725048899650574, "learning_rate": 0.0009743907362801923, "loss": 1.9084, "step": 846 }, { "epoch": 0.38848755876619656, "grad_norm": 0.5816589593887329, "learning_rate": 0.0009743124396399541, "loss": 2.4114, "step": 847 }, { "epoch": 0.38894622176355925, "grad_norm": 0.2848202586174011, "learning_rate": 0.0009742340266474547, "loss": 1.4275, "step": 848 }, { "epoch": 0.3894048847609219, "grad_norm": 0.28104984760284424, "learning_rate": 0.0009741554973219291, "loss": 1.0759, "step": 849 }, { "epoch": 0.3898635477582846, "grad_norm": 0.39782270789146423, "learning_rate": 0.000974076851682641, "loss": 1.6635, "step": 850 }, { "epoch": 0.39032221075564727, "grad_norm": 0.25713643431663513, "learning_rate": 0.0009739980897488831, "loss": 1.3452, "step": 851 }, { "epoch": 0.39078087375300996, "grad_norm": 0.12562298774719238, "learning_rate": 0.0009739192115399762, "loss": 0.65, "step": 852 }, { "epoch": 0.39123953675037265, "grad_norm": 0.22468183934688568, "learning_rate": 0.0009738402170752693, "loss": 1.1257, "step": 853 }, { "epoch": 0.39169819974773534, "grad_norm": 0.34269779920578003, "learning_rate": 0.0009737611063741407, "loss": 1.5716, "step": 854 }, { "epoch": 0.39215686274509803, "grad_norm": 0.3019089102745056, "learning_rate": 0.0009736818794559967, "loss": 1.5173, "step": 855 }, { "epoch": 0.3926155257424607, "grad_norm": 0.19614103436470032, "learning_rate": 0.0009736025363402723, "loss": 0.6453, "step": 856 }, { "epoch": 0.3930741887398234, "grad_norm": 0.21685221791267395, "learning_rate": 0.000973523077046431, "loss": 1.0789, "step": 857 }, { "epoch": 0.3935328517371861, "grad_norm": 0.30863749980926514, "learning_rate": 0.0009734435015939644, "loss": 1.4852, "step": 858 }, { "epoch": 0.3939915147345488, "grad_norm": 0.1502714902162552, "learning_rate": 0.0009733638100023932, "loss": 0.7736, "step": 859 }, { "epoch": 0.3944501777319115, "grad_norm": 0.3218334913253784, "learning_rate": 0.0009732840022912664, "loss": 1.662, "step": 860 }, { "epoch": 0.3949088407292742, "grad_norm": 0.23470452427864075, "learning_rate": 0.0009732040784801611, "loss": 1.1917, "step": 861 }, { "epoch": 0.39536750372663687, "grad_norm": 0.3882734775543213, "learning_rate": 0.0009731240385886835, "loss": 1.7597, "step": 862 }, { "epoch": 0.39582616672399956, "grad_norm": 0.41674375534057617, "learning_rate": 0.0009730438826364679, "loss": 1.887, "step": 863 }, { "epoch": 0.39628482972136225, "grad_norm": 0.3275260329246521, "learning_rate": 0.0009729636106431768, "loss": 1.5059, "step": 864 }, { "epoch": 0.39674349271872494, "grad_norm": 0.262455016374588, "learning_rate": 0.000972883222628502, "loss": 1.4503, "step": 865 }, { "epoch": 0.3972021557160876, "grad_norm": 0.3371032774448395, "learning_rate": 0.0009728027186121629, "loss": 1.7706, "step": 866 }, { "epoch": 0.39766081871345027, "grad_norm": 0.4705641269683838, "learning_rate": 0.0009727220986139079, "loss": 2.3818, "step": 867 }, { "epoch": 0.39811948171081296, "grad_norm": 0.37057843804359436, "learning_rate": 0.0009726413626535136, "loss": 1.912, "step": 868 }, { "epoch": 0.39857814470817565, "grad_norm": 0.19416803121566772, "learning_rate": 0.0009725605107507851, "loss": 0.9414, "step": 869 }, { "epoch": 0.39903680770553834, "grad_norm": 0.26515352725982666, "learning_rate": 0.0009724795429255559, "loss": 1.5286, "step": 870 }, { "epoch": 0.39949547070290103, "grad_norm": 0.33706986904144287, "learning_rate": 0.0009723984591976882, "loss": 1.4502, "step": 871 }, { "epoch": 0.3999541337002637, "grad_norm": 0.0702093318104744, "learning_rate": 0.0009723172595870724, "loss": 0.4485, "step": 872 }, { "epoch": 0.4004127966976264, "grad_norm": 0.376376211643219, "learning_rate": 0.000972235944113627, "loss": 2.1362, "step": 873 }, { "epoch": 0.4008714596949891, "grad_norm": 0.3718342185020447, "learning_rate": 0.0009721545127972998, "loss": 2.3489, "step": 874 }, { "epoch": 0.4013301226923518, "grad_norm": 0.41915109753608704, "learning_rate": 0.0009720729656580658, "loss": 1.9443, "step": 875 }, { "epoch": 0.4017887856897145, "grad_norm": 0.2805352210998535, "learning_rate": 0.0009719913027159298, "loss": 1.397, "step": 876 }, { "epoch": 0.4022474486870772, "grad_norm": 0.23363643884658813, "learning_rate": 0.000971909523990924, "loss": 0.9596, "step": 877 }, { "epoch": 0.40270611168443987, "grad_norm": 0.41541317105293274, "learning_rate": 0.0009718276295031091, "loss": 1.8851, "step": 878 }, { "epoch": 0.40316477468180256, "grad_norm": 0.37006086111068726, "learning_rate": 0.0009717456192725747, "loss": 1.9034, "step": 879 }, { "epoch": 0.40362343767916525, "grad_norm": 0.334375262260437, "learning_rate": 0.0009716634933194386, "loss": 1.5997, "step": 880 }, { "epoch": 0.40408210067652794, "grad_norm": 0.3790551424026489, "learning_rate": 0.0009715812516638466, "loss": 1.7657, "step": 881 }, { "epoch": 0.40454076367389064, "grad_norm": 0.3914640247821808, "learning_rate": 0.0009714988943259733, "loss": 1.7908, "step": 882 }, { "epoch": 0.40499942667125327, "grad_norm": 0.2521947920322418, "learning_rate": 0.0009714164213260215, "loss": 1.1801, "step": 883 }, { "epoch": 0.40545808966861596, "grad_norm": 0.3940063714981079, "learning_rate": 0.0009713338326842225, "loss": 1.5258, "step": 884 }, { "epoch": 0.40591675266597865, "grad_norm": 0.41144511103630066, "learning_rate": 0.0009712511284208358, "loss": 2.3174, "step": 885 }, { "epoch": 0.40637541566334134, "grad_norm": 0.31135112047195435, "learning_rate": 0.0009711683085561496, "loss": 1.8175, "step": 886 }, { "epoch": 0.40683407866070403, "grad_norm": 0.27680450677871704, "learning_rate": 0.0009710853731104798, "loss": 1.4197, "step": 887 }, { "epoch": 0.4072927416580667, "grad_norm": 0.3753867745399475, "learning_rate": 0.0009710023221041712, "loss": 1.8083, "step": 888 }, { "epoch": 0.4077514046554294, "grad_norm": 0.39393678307533264, "learning_rate": 0.0009709191555575972, "loss": 1.7822, "step": 889 }, { "epoch": 0.4082100676527921, "grad_norm": 0.24905888736248016, "learning_rate": 0.0009708358734911586, "loss": 1.3235, "step": 890 }, { "epoch": 0.4086687306501548, "grad_norm": 0.2505285143852234, "learning_rate": 0.0009707524759252855, "loss": 1.2241, "step": 891 }, { "epoch": 0.4091273936475175, "grad_norm": 0.39112672209739685, "learning_rate": 0.0009706689628804355, "loss": 1.9813, "step": 892 }, { "epoch": 0.4095860566448802, "grad_norm": 0.3590749502182007, "learning_rate": 0.0009705853343770954, "loss": 1.4428, "step": 893 }, { "epoch": 0.41004471964224287, "grad_norm": 0.4543074369430542, "learning_rate": 0.0009705015904357797, "loss": 2.1425, "step": 894 }, { "epoch": 0.41050338263960556, "grad_norm": 0.5381583571434021, "learning_rate": 0.0009704177310770313, "loss": 1.4238, "step": 895 }, { "epoch": 0.41096204563696825, "grad_norm": 0.5682449340820312, "learning_rate": 0.0009703337563214216, "loss": 0.8868, "step": 896 }, { "epoch": 0.41142070863433094, "grad_norm": 0.8263031244277954, "learning_rate": 0.0009702496661895501, "loss": 2.1425, "step": 897 }, { "epoch": 0.41187937163169364, "grad_norm": 0.07518908381462097, "learning_rate": 0.0009701654607020449, "loss": 0.4335, "step": 898 }, { "epoch": 0.4123380346290563, "grad_norm": 0.43035972118377686, "learning_rate": 0.0009700811398795622, "loss": 1.9609, "step": 899 }, { "epoch": 0.41279669762641896, "grad_norm": 0.33483147621154785, "learning_rate": 0.0009699967037427864, "loss": 1.614, "step": 900 }, { "epoch": 0.41325536062378165, "grad_norm": 0.20277926325798035, "learning_rate": 0.0009699121523124301, "loss": 0.9445, "step": 901 }, { "epoch": 0.41371402362114434, "grad_norm": 0.20781390368938446, "learning_rate": 0.0009698274856092348, "loss": 1.0036, "step": 902 }, { "epoch": 0.41417268661850704, "grad_norm": 0.3994620740413666, "learning_rate": 0.0009697427036539696, "loss": 2.8057, "step": 903 }, { "epoch": 0.4146313496158697, "grad_norm": 0.20834676921367645, "learning_rate": 0.0009696578064674322, "loss": 1.2151, "step": 904 }, { "epoch": 0.4150900126132324, "grad_norm": 0.21917149424552917, "learning_rate": 0.0009695727940704484, "loss": 1.2262, "step": 905 }, { "epoch": 0.4155486756105951, "grad_norm": 0.21042834222316742, "learning_rate": 0.0009694876664838725, "loss": 1.0464, "step": 906 }, { "epoch": 0.4160073386079578, "grad_norm": 0.2943893074989319, "learning_rate": 0.0009694024237285868, "loss": 1.5404, "step": 907 }, { "epoch": 0.4164660016053205, "grad_norm": 0.24305228888988495, "learning_rate": 0.000969317065825502, "loss": 1.4753, "step": 908 }, { "epoch": 0.4169246646026832, "grad_norm": 0.29043078422546387, "learning_rate": 0.000969231592795557, "loss": 1.8857, "step": 909 }, { "epoch": 0.4173833276000459, "grad_norm": 0.2063802182674408, "learning_rate": 0.000969146004659719, "loss": 1.133, "step": 910 }, { "epoch": 0.41784199059740856, "grad_norm": 0.38645628094673157, "learning_rate": 0.0009690603014389831, "loss": 2.2798, "step": 911 }, { "epoch": 0.41830065359477125, "grad_norm": 0.2220807522535324, "learning_rate": 0.0009689744831543734, "loss": 0.8575, "step": 912 }, { "epoch": 0.41875931659213395, "grad_norm": 0.4347355365753174, "learning_rate": 0.0009688885498269416, "loss": 2.3926, "step": 913 }, { "epoch": 0.41921797958949664, "grad_norm": 0.45031097531318665, "learning_rate": 0.0009688025014777672, "loss": 2.4253, "step": 914 }, { "epoch": 0.4196766425868593, "grad_norm": 0.19847296178340912, "learning_rate": 0.0009687163381279593, "loss": 0.8247, "step": 915 }, { "epoch": 0.420135305584222, "grad_norm": 0.3677137792110443, "learning_rate": 0.0009686300597986541, "loss": 1.9451, "step": 916 }, { "epoch": 0.42059396858158465, "grad_norm": 0.2694656550884247, "learning_rate": 0.0009685436665110161, "loss": 1.5455, "step": 917 }, { "epoch": 0.42105263157894735, "grad_norm": 0.21316292881965637, "learning_rate": 0.0009684571582862382, "loss": 1.0842, "step": 918 }, { "epoch": 0.42151129457631004, "grad_norm": 0.32941651344299316, "learning_rate": 0.0009683705351455419, "loss": 1.709, "step": 919 }, { "epoch": 0.4219699575736727, "grad_norm": 0.3653143644332886, "learning_rate": 0.0009682837971101762, "loss": 1.8724, "step": 920 }, { "epoch": 0.4224286205710354, "grad_norm": 0.37952494621276855, "learning_rate": 0.0009681969442014183, "loss": 1.7211, "step": 921 }, { "epoch": 0.4228872835683981, "grad_norm": 0.3981945216655731, "learning_rate": 0.0009681099764405743, "loss": 2.2534, "step": 922 }, { "epoch": 0.4233459465657608, "grad_norm": 0.0702991932630539, "learning_rate": 0.0009680228938489777, "loss": 0.4506, "step": 923 }, { "epoch": 0.4238046095631235, "grad_norm": 0.2196217030286789, "learning_rate": 0.0009679356964479908, "loss": 1.088, "step": 924 }, { "epoch": 0.4242632725604862, "grad_norm": 0.2972463071346283, "learning_rate": 0.0009678483842590034, "loss": 1.4828, "step": 925 }, { "epoch": 0.4247219355578489, "grad_norm": 0.10213679820299149, "learning_rate": 0.000967760957303434, "loss": 0.628, "step": 926 }, { "epoch": 0.42518059855521156, "grad_norm": 0.294698029756546, "learning_rate": 0.0009676734156027292, "loss": 1.504, "step": 927 }, { "epoch": 0.42563926155257426, "grad_norm": 0.4071488380432129, "learning_rate": 0.0009675857591783634, "loss": 2.2822, "step": 928 }, { "epoch": 0.42609792454993695, "grad_norm": 0.16480673849582672, "learning_rate": 0.0009674979880518393, "loss": 0.8937, "step": 929 }, { "epoch": 0.42655658754729964, "grad_norm": 0.2734246253967285, "learning_rate": 0.0009674101022446879, "loss": 1.451, "step": 930 }, { "epoch": 0.42701525054466233, "grad_norm": 0.3893333971500397, "learning_rate": 0.0009673221017784683, "loss": 1.9644, "step": 931 }, { "epoch": 0.427473913542025, "grad_norm": 0.2807949483394623, "learning_rate": 0.0009672339866747675, "loss": 1.5402, "step": 932 }, { "epoch": 0.4279325765393877, "grad_norm": 0.37752094864845276, "learning_rate": 0.0009671457569552009, "loss": 2.1098, "step": 933 }, { "epoch": 0.42839123953675035, "grad_norm": 0.29235443472862244, "learning_rate": 0.0009670574126414118, "loss": 1.3341, "step": 934 }, { "epoch": 0.42884990253411304, "grad_norm": 0.3632213771343231, "learning_rate": 0.0009669689537550717, "loss": 1.5638, "step": 935 }, { "epoch": 0.4293085655314757, "grad_norm": 0.30011284351348877, "learning_rate": 0.0009668803803178803, "loss": 1.6048, "step": 936 }, { "epoch": 0.4297672285288384, "grad_norm": 0.21687191724777222, "learning_rate": 0.0009667916923515651, "loss": 0.9312, "step": 937 }, { "epoch": 0.4302258915262011, "grad_norm": 0.2912392020225525, "learning_rate": 0.0009667028898778822, "loss": 1.5125, "step": 938 }, { "epoch": 0.4306845545235638, "grad_norm": 0.3935389816761017, "learning_rate": 0.0009666139729186152, "loss": 2.1857, "step": 939 }, { "epoch": 0.4311432175209265, "grad_norm": 0.4185419976711273, "learning_rate": 0.0009665249414955761, "loss": 2.4136, "step": 940 }, { "epoch": 0.4316018805182892, "grad_norm": 0.10364656150341034, "learning_rate": 0.0009664357956306051, "loss": 0.6298, "step": 941 }, { "epoch": 0.4320605435156519, "grad_norm": 0.2796267867088318, "learning_rate": 0.0009663465353455703, "loss": 1.5578, "step": 942 }, { "epoch": 0.43251920651301456, "grad_norm": 0.2670913338661194, "learning_rate": 0.0009662571606623678, "loss": 1.1976, "step": 943 }, { "epoch": 0.43297786951037726, "grad_norm": 0.41438791155815125, "learning_rate": 0.0009661676716029219, "loss": 2.4133, "step": 944 }, { "epoch": 0.43343653250773995, "grad_norm": 0.31299319863319397, "learning_rate": 0.000966078068189185, "loss": 1.5249, "step": 945 }, { "epoch": 0.43389519550510264, "grad_norm": 0.3784157931804657, "learning_rate": 0.0009659883504431373, "loss": 1.8156, "step": 946 }, { "epoch": 0.43435385850246533, "grad_norm": 0.3633187413215637, "learning_rate": 0.0009658985183867872, "loss": 2.0233, "step": 947 }, { "epoch": 0.434812521499828, "grad_norm": 0.29936981201171875, "learning_rate": 0.0009658085720421714, "loss": 1.4108, "step": 948 }, { "epoch": 0.4352711844971907, "grad_norm": 0.24031949043273926, "learning_rate": 0.0009657185114313541, "loss": 0.9012, "step": 949 }, { "epoch": 0.4357298474945534, "grad_norm": 0.30812880396842957, "learning_rate": 0.000965628336576428, "loss": 1.739, "step": 950 }, { "epoch": 0.43618851049191604, "grad_norm": 0.27589166164398193, "learning_rate": 0.0009655380474995137, "loss": 1.387, "step": 951 }, { "epoch": 0.43664717348927873, "grad_norm": 0.3418666422367096, "learning_rate": 0.0009654476442227595, "loss": 1.8832, "step": 952 }, { "epoch": 0.4371058364866414, "grad_norm": 0.18190939724445343, "learning_rate": 0.000965357126768342, "loss": 0.8432, "step": 953 }, { "epoch": 0.4375644994840041, "grad_norm": 0.336883544921875, "learning_rate": 0.0009652664951584662, "loss": 1.9048, "step": 954 }, { "epoch": 0.4380231624813668, "grad_norm": 0.2961789667606354, "learning_rate": 0.0009651757494153642, "loss": 1.473, "step": 955 }, { "epoch": 0.4384818254787295, "grad_norm": 0.2900097370147705, "learning_rate": 0.0009650848895612969, "loss": 1.2979, "step": 956 }, { "epoch": 0.4389404884760922, "grad_norm": 0.13419854640960693, "learning_rate": 0.0009649939156185526, "loss": 0.8149, "step": 957 }, { "epoch": 0.4393991514734549, "grad_norm": 0.2546023428440094, "learning_rate": 0.000964902827609448, "loss": 1.2925, "step": 958 }, { "epoch": 0.43985781447081757, "grad_norm": 0.28633105754852295, "learning_rate": 0.0009648116255563279, "loss": 1.4249, "step": 959 }, { "epoch": 0.44031647746818026, "grad_norm": 0.34882158041000366, "learning_rate": 0.0009647203094815644, "loss": 1.8254, "step": 960 }, { "epoch": 0.44077514046554295, "grad_norm": 0.2623085677623749, "learning_rate": 0.0009646288794075582, "loss": 1.2006, "step": 961 }, { "epoch": 0.44123380346290564, "grad_norm": 0.3005825877189636, "learning_rate": 0.0009645373353567377, "loss": 1.4745, "step": 962 }, { "epoch": 0.44169246646026833, "grad_norm": 0.4481685757637024, "learning_rate": 0.0009644456773515595, "loss": 1.6587, "step": 963 }, { "epoch": 0.442151129457631, "grad_norm": 0.34620991349220276, "learning_rate": 0.0009643539054145077, "loss": 2.0671, "step": 964 }, { "epoch": 0.4426097924549937, "grad_norm": 0.10259232670068741, "learning_rate": 0.0009642620195680948, "loss": 0.6523, "step": 965 }, { "epoch": 0.4430684554523564, "grad_norm": 0.2266424596309662, "learning_rate": 0.0009641700198348608, "loss": 1.033, "step": 966 }, { "epoch": 0.4435271184497191, "grad_norm": 0.2258816808462143, "learning_rate": 0.0009640779062373743, "loss": 1.3325, "step": 967 }, { "epoch": 0.4439857814470818, "grad_norm": 0.3065774142742157, "learning_rate": 0.0009639856787982313, "loss": 1.7267, "step": 968 }, { "epoch": 0.4444444444444444, "grad_norm": 0.2785046696662903, "learning_rate": 0.0009638933375400555, "loss": 1.4358, "step": 969 }, { "epoch": 0.4449031074418071, "grad_norm": 0.40370169281959534, "learning_rate": 0.0009638008824854995, "loss": 2.2593, "step": 970 }, { "epoch": 0.4453617704391698, "grad_norm": 0.2614261507987976, "learning_rate": 0.0009637083136572426, "loss": 1.4521, "step": 971 }, { "epoch": 0.4458204334365325, "grad_norm": 0.3734627068042755, "learning_rate": 0.0009636156310779928, "loss": 2.2771, "step": 972 }, { "epoch": 0.4462790964338952, "grad_norm": 0.3273562490940094, "learning_rate": 0.0009635228347704861, "loss": 1.7556, "step": 973 }, { "epoch": 0.4467377594312579, "grad_norm": 0.28261798620224, "learning_rate": 0.0009634299247574858, "loss": 1.409, "step": 974 }, { "epoch": 0.44719642242862057, "grad_norm": 0.409447580575943, "learning_rate": 0.0009633369010617834, "loss": 2.3015, "step": 975 }, { "epoch": 0.44765508542598326, "grad_norm": 0.44366922974586487, "learning_rate": 0.0009632437637061982, "loss": 2.4729, "step": 976 }, { "epoch": 0.44811374842334595, "grad_norm": 0.35576725006103516, "learning_rate": 0.0009631505127135778, "loss": 2.0571, "step": 977 }, { "epoch": 0.44857241142070864, "grad_norm": 0.2606424391269684, "learning_rate": 0.000963057148106797, "loss": 1.4125, "step": 978 }, { "epoch": 0.44903107441807133, "grad_norm": 0.3204779028892517, "learning_rate": 0.0009629636699087591, "loss": 1.6771, "step": 979 }, { "epoch": 0.449489737415434, "grad_norm": 0.3472746014595032, "learning_rate": 0.0009628700781423947, "loss": 1.7863, "step": 980 }, { "epoch": 0.4499484004127967, "grad_norm": 0.2507071793079376, "learning_rate": 0.0009627763728306626, "loss": 1.2164, "step": 981 }, { "epoch": 0.4504070634101594, "grad_norm": 0.284404993057251, "learning_rate": 0.0009626825539965497, "loss": 1.3602, "step": 982 }, { "epoch": 0.4508657264075221, "grad_norm": 0.23801104724407196, "learning_rate": 0.00096258862166307, "loss": 1.1913, "step": 983 }, { "epoch": 0.4513243894048848, "grad_norm": 0.08279567956924438, "learning_rate": 0.0009624945758532662, "loss": 0.5228, "step": 984 }, { "epoch": 0.4517830524022475, "grad_norm": 0.31264549493789673, "learning_rate": 0.0009624004165902078, "loss": 1.3771, "step": 985 }, { "epoch": 0.4522417153996101, "grad_norm": 0.290018767118454, "learning_rate": 0.0009623061438969934, "loss": 1.4374, "step": 986 }, { "epoch": 0.4527003783969728, "grad_norm": 0.30359646677970886, "learning_rate": 0.0009622117577967486, "loss": 1.7618, "step": 987 }, { "epoch": 0.4531590413943355, "grad_norm": 0.3439119756221771, "learning_rate": 0.0009621172583126267, "loss": 1.7233, "step": 988 }, { "epoch": 0.4536177043916982, "grad_norm": 0.3586854636669159, "learning_rate": 0.0009620226454678093, "loss": 2.0002, "step": 989 }, { "epoch": 0.4540763673890609, "grad_norm": 0.3362513780593872, "learning_rate": 0.0009619279192855056, "loss": 2.1243, "step": 990 }, { "epoch": 0.45453503038642357, "grad_norm": 0.2629103362560272, "learning_rate": 0.0009618330797889527, "loss": 1.3026, "step": 991 }, { "epoch": 0.45499369338378626, "grad_norm": 0.3186989426612854, "learning_rate": 0.0009617381270014154, "loss": 1.8407, "step": 992 }, { "epoch": 0.45545235638114895, "grad_norm": 0.40748247504234314, "learning_rate": 0.0009616430609461861, "loss": 1.9366, "step": 993 }, { "epoch": 0.45591101937851164, "grad_norm": 0.37845298647880554, "learning_rate": 0.0009615478816465854, "loss": 1.8237, "step": 994 }, { "epoch": 0.45636968237587433, "grad_norm": 0.3755371570587158, "learning_rate": 0.0009614525891259613, "loss": 1.8331, "step": 995 }, { "epoch": 0.456828345373237, "grad_norm": 0.28780463337898254, "learning_rate": 0.00096135718340769, "loss": 1.2989, "step": 996 }, { "epoch": 0.4572870083705997, "grad_norm": 0.3778688609600067, "learning_rate": 0.000961261664515175, "loss": 1.7416, "step": 997 }, { "epoch": 0.4577456713679624, "grad_norm": 0.2470480501651764, "learning_rate": 0.0009611660324718479, "loss": 0.9676, "step": 998 }, { "epoch": 0.4582043343653251, "grad_norm": 0.5491411685943604, "learning_rate": 0.0009610702873011676, "loss": 1.8815, "step": 999 }, { "epoch": 0.4586629973626878, "grad_norm": 0.38404297828674316, "learning_rate": 0.0009609744290266216, "loss": 1.8572, "step": 1000 }, { "epoch": 0.4591216603600505, "grad_norm": 0.4080640971660614, "learning_rate": 0.0009608784576717242, "loss": 2.2996, "step": 1001 }, { "epoch": 0.45958032335741317, "grad_norm": 0.39164450764656067, "learning_rate": 0.000960782373260018, "loss": 1.6304, "step": 1002 }, { "epoch": 0.4600389863547758, "grad_norm": 0.2890069782733917, "learning_rate": 0.0009606861758150733, "loss": 1.2357, "step": 1003 }, { "epoch": 0.4604976493521385, "grad_norm": 0.08920864015817642, "learning_rate": 0.0009605898653604881, "loss": 0.5615, "step": 1004 }, { "epoch": 0.4609563123495012, "grad_norm": 0.13560301065444946, "learning_rate": 0.0009604934419198877, "loss": 0.7203, "step": 1005 }, { "epoch": 0.4614149753468639, "grad_norm": 0.27159520983695984, "learning_rate": 0.0009603969055169258, "loss": 1.6746, "step": 1006 }, { "epoch": 0.46187363834422657, "grad_norm": 0.19911514222621918, "learning_rate": 0.0009603002561752832, "loss": 1.0421, "step": 1007 }, { "epoch": 0.46233230134158926, "grad_norm": 0.3992009162902832, "learning_rate": 0.0009602034939186691, "loss": 2.3098, "step": 1008 }, { "epoch": 0.46279096433895195, "grad_norm": 0.28715813159942627, "learning_rate": 0.0009601066187708194, "loss": 1.3643, "step": 1009 }, { "epoch": 0.46324962733631464, "grad_norm": 0.3220798671245575, "learning_rate": 0.0009600096307554987, "loss": 1.6344, "step": 1010 }, { "epoch": 0.46370829033367733, "grad_norm": 0.3547840416431427, "learning_rate": 0.0009599125298964987, "loss": 1.7168, "step": 1011 }, { "epoch": 0.46416695333104, "grad_norm": 0.29928240180015564, "learning_rate": 0.000959815316217639, "loss": 1.4865, "step": 1012 }, { "epoch": 0.4646256163284027, "grad_norm": 0.32575827836990356, "learning_rate": 0.0009597179897427668, "loss": 1.4995, "step": 1013 }, { "epoch": 0.4650842793257654, "grad_norm": 0.36625438928604126, "learning_rate": 0.000959620550495757, "loss": 1.8375, "step": 1014 }, { "epoch": 0.4655429423231281, "grad_norm": 0.3549942970275879, "learning_rate": 0.0009595229985005121, "loss": 1.7886, "step": 1015 }, { "epoch": 0.4660016053204908, "grad_norm": 0.33019477128982544, "learning_rate": 0.0009594253337809621, "loss": 1.618, "step": 1016 }, { "epoch": 0.4664602683178535, "grad_norm": 0.31643784046173096, "learning_rate": 0.0009593275563610655, "loss": 1.5107, "step": 1017 }, { "epoch": 0.46691893131521617, "grad_norm": 0.33537599444389343, "learning_rate": 0.0009592296662648072, "loss": 1.6339, "step": 1018 }, { "epoch": 0.46737759431257886, "grad_norm": 0.3272285759449005, "learning_rate": 0.0009591316635162006, "loss": 1.4943, "step": 1019 }, { "epoch": 0.4678362573099415, "grad_norm": 0.2963853180408478, "learning_rate": 0.0009590335481392863, "loss": 1.5295, "step": 1020 }, { "epoch": 0.4682949203073042, "grad_norm": 0.38083258271217346, "learning_rate": 0.000958935320158133, "loss": 2.0518, "step": 1021 }, { "epoch": 0.4687535833046669, "grad_norm": 0.2800465524196625, "learning_rate": 0.0009588369795968366, "loss": 1.386, "step": 1022 }, { "epoch": 0.46921224630202957, "grad_norm": 0.31138157844543457, "learning_rate": 0.0009587385264795206, "loss": 1.2996, "step": 1023 }, { "epoch": 0.46967090929939226, "grad_norm": 0.27252325415611267, "learning_rate": 0.0009586399608303364, "loss": 1.4318, "step": 1024 }, { "epoch": 0.47012957229675495, "grad_norm": 0.2721007764339447, "learning_rate": 0.0009585412826734627, "loss": 1.3777, "step": 1025 }, { "epoch": 0.47058823529411764, "grad_norm": 0.28045183420181274, "learning_rate": 0.0009584424920331063, "loss": 1.5048, "step": 1026 }, { "epoch": 0.47104689829148033, "grad_norm": 0.3029325604438782, "learning_rate": 0.000958343588933501, "loss": 1.6938, "step": 1027 }, { "epoch": 0.471505561288843, "grad_norm": 0.2408834993839264, "learning_rate": 0.0009582445733989086, "loss": 1.4271, "step": 1028 }, { "epoch": 0.4719642242862057, "grad_norm": 0.10467322915792465, "learning_rate": 0.0009581454454536182, "loss": 0.6104, "step": 1029 }, { "epoch": 0.4724228872835684, "grad_norm": 0.3287009000778198, "learning_rate": 0.0009580462051219465, "loss": 1.9726, "step": 1030 }, { "epoch": 0.4728815502809311, "grad_norm": 0.37531885504722595, "learning_rate": 0.0009579468524282381, "loss": 2.3677, "step": 1031 }, { "epoch": 0.4733402132782938, "grad_norm": 0.4152393937110901, "learning_rate": 0.0009578473873968649, "loss": 2.207, "step": 1032 }, { "epoch": 0.4737988762756565, "grad_norm": 0.26491448283195496, "learning_rate": 0.0009577478100522261, "loss": 1.406, "step": 1033 }, { "epoch": 0.47425753927301917, "grad_norm": 0.3497820198535919, "learning_rate": 0.0009576481204187492, "loss": 1.9156, "step": 1034 }, { "epoch": 0.47471620227038186, "grad_norm": 0.3127298951148987, "learning_rate": 0.0009575483185208884, "loss": 1.3849, "step": 1035 }, { "epoch": 0.47517486526774455, "grad_norm": 0.386515736579895, "learning_rate": 0.000957448404383126, "loss": 1.9648, "step": 1036 }, { "epoch": 0.4756335282651072, "grad_norm": 0.30607542395591736, "learning_rate": 0.0009573483780299717, "loss": 1.5166, "step": 1037 }, { "epoch": 0.4760921912624699, "grad_norm": 0.3027437627315521, "learning_rate": 0.0009572482394859625, "loss": 1.6056, "step": 1038 }, { "epoch": 0.47655085425983257, "grad_norm": 0.3490167558193207, "learning_rate": 0.0009571479887756633, "loss": 1.7711, "step": 1039 }, { "epoch": 0.47700951725719526, "grad_norm": 0.3557809293270111, "learning_rate": 0.0009570476259236662, "loss": 1.7281, "step": 1040 }, { "epoch": 0.47746818025455795, "grad_norm": 0.3299933075904846, "learning_rate": 0.000956947150954591, "loss": 1.3677, "step": 1041 }, { "epoch": 0.47792684325192064, "grad_norm": 0.4228198528289795, "learning_rate": 0.000956846563893085, "loss": 1.8817, "step": 1042 }, { "epoch": 0.47838550624928333, "grad_norm": 0.40233686566352844, "learning_rate": 0.0009567458647638228, "loss": 2.2048, "step": 1043 }, { "epoch": 0.478844169246646, "grad_norm": 0.3554650843143463, "learning_rate": 0.0009566450535915066, "loss": 1.8723, "step": 1044 }, { "epoch": 0.4793028322440087, "grad_norm": 0.33305272459983826, "learning_rate": 0.0009565441304008661, "loss": 1.67, "step": 1045 }, { "epoch": 0.4797614952413714, "grad_norm": 0.35247936844825745, "learning_rate": 0.0009564430952166587, "loss": 1.9382, "step": 1046 }, { "epoch": 0.4802201582387341, "grad_norm": 0.23185671865940094, "learning_rate": 0.0009563419480636689, "loss": 0.9469, "step": 1047 }, { "epoch": 0.4806788212360968, "grad_norm": 0.29733264446258545, "learning_rate": 0.0009562406889667088, "loss": 1.4415, "step": 1048 }, { "epoch": 0.4811374842334595, "grad_norm": 0.2600105106830597, "learning_rate": 0.0009561393179506181, "loss": 1.2259, "step": 1049 }, { "epoch": 0.48159614723082217, "grad_norm": 0.36176133155822754, "learning_rate": 0.0009560378350402637, "loss": 1.6957, "step": 1050 }, { "epoch": 0.48205481022818486, "grad_norm": 0.29688432812690735, "learning_rate": 0.0009559362402605403, "loss": 1.3275, "step": 1051 }, { "epoch": 0.48251347322554755, "grad_norm": 0.2176387459039688, "learning_rate": 0.0009558345336363695, "loss": 0.9416, "step": 1052 }, { "epoch": 0.48297213622291024, "grad_norm": 0.35077396035194397, "learning_rate": 0.0009557327151927009, "loss": 1.6619, "step": 1053 }, { "epoch": 0.4834307992202729, "grad_norm": 0.35128337144851685, "learning_rate": 0.0009556307849545114, "loss": 1.5695, "step": 1054 }, { "epoch": 0.48388946221763557, "grad_norm": 0.37428727746009827, "learning_rate": 0.000955528742946805, "loss": 1.9629, "step": 1055 }, { "epoch": 0.48434812521499826, "grad_norm": 0.28633585572242737, "learning_rate": 0.0009554265891946133, "loss": 1.4113, "step": 1056 }, { "epoch": 0.48480678821236095, "grad_norm": 0.25998106598854065, "learning_rate": 0.0009553243237229956, "loss": 1.1547, "step": 1057 }, { "epoch": 0.48526545120972364, "grad_norm": 0.4127240478992462, "learning_rate": 0.0009552219465570382, "loss": 2.1592, "step": 1058 }, { "epoch": 0.48572411420708633, "grad_norm": 0.23034657537937164, "learning_rate": 0.000955119457721855, "loss": 1.0124, "step": 1059 }, { "epoch": 0.486182777204449, "grad_norm": 0.4003170430660248, "learning_rate": 0.0009550168572425873, "loss": 1.8563, "step": 1060 }, { "epoch": 0.4866414402018117, "grad_norm": 0.34863001108169556, "learning_rate": 0.0009549141451444036, "loss": 1.8716, "step": 1061 }, { "epoch": 0.4871001031991744, "grad_norm": 0.34055593609809875, "learning_rate": 0.0009548113214525, "loss": 1.8115, "step": 1062 }, { "epoch": 0.4875587661965371, "grad_norm": 0.2745518684387207, "learning_rate": 0.0009547083861921, "loss": 1.3137, "step": 1063 }, { "epoch": 0.4880174291938998, "grad_norm": 0.19711460173130035, "learning_rate": 0.0009546053393884542, "loss": 0.9982, "step": 1064 }, { "epoch": 0.4884760921912625, "grad_norm": 0.2679455280303955, "learning_rate": 0.0009545021810668406, "loss": 1.0912, "step": 1065 }, { "epoch": 0.48893475518862517, "grad_norm": 0.2457709014415741, "learning_rate": 0.000954398911252565, "loss": 0.9668, "step": 1066 }, { "epoch": 0.48939341818598786, "grad_norm": 0.22733516991138458, "learning_rate": 0.0009542955299709601, "loss": 0.9916, "step": 1067 }, { "epoch": 0.48985208118335055, "grad_norm": 0.3566371500492096, "learning_rate": 0.000954192037247386, "loss": 1.8339, "step": 1068 }, { "epoch": 0.49031074418071324, "grad_norm": 0.40455371141433716, "learning_rate": 0.0009540884331072304, "loss": 2.1667, "step": 1069 }, { "epoch": 0.49076940717807593, "grad_norm": 0.43050408363342285, "learning_rate": 0.0009539847175759077, "loss": 2.3535, "step": 1070 }, { "epoch": 0.49122807017543857, "grad_norm": 0.3837912380695343, "learning_rate": 0.0009538808906788608, "loss": 1.995, "step": 1071 }, { "epoch": 0.49168673317280126, "grad_norm": 0.28267940878868103, "learning_rate": 0.0009537769524415585, "loss": 1.4414, "step": 1072 }, { "epoch": 0.49214539617016395, "grad_norm": 0.43233123421669006, "learning_rate": 0.0009536729028894979, "loss": 2.0576, "step": 1073 }, { "epoch": 0.49260405916752664, "grad_norm": 0.27310910820961, "learning_rate": 0.0009535687420482031, "loss": 1.0809, "step": 1074 }, { "epoch": 0.49306272216488933, "grad_norm": 0.2695041596889496, "learning_rate": 0.0009534644699432254, "loss": 1.2148, "step": 1075 }, { "epoch": 0.493521385162252, "grad_norm": 0.4794595241546631, "learning_rate": 0.0009533600866001437, "loss": 2.3083, "step": 1076 }, { "epoch": 0.4939800481596147, "grad_norm": 0.2349144071340561, "learning_rate": 0.0009532555920445638, "loss": 0.9781, "step": 1077 }, { "epoch": 0.4944387111569774, "grad_norm": 0.4289840757846832, "learning_rate": 0.000953150986302119, "loss": 1.9989, "step": 1078 }, { "epoch": 0.4948973741543401, "grad_norm": 0.4208228588104248, "learning_rate": 0.0009530462693984698, "loss": 2.0288, "step": 1079 }, { "epoch": 0.4953560371517028, "grad_norm": 0.34878844022750854, "learning_rate": 0.0009529414413593043, "loss": 1.4258, "step": 1080 }, { "epoch": 0.4958147001490655, "grad_norm": 0.23603618144989014, "learning_rate": 0.0009528365022103371, "loss": 1.1298, "step": 1081 }, { "epoch": 0.49627336314642817, "grad_norm": 0.27409201860427856, "learning_rate": 0.000952731451977311, "loss": 1.2806, "step": 1082 }, { "epoch": 0.49673202614379086, "grad_norm": 0.37235966324806213, "learning_rate": 0.0009526262906859953, "loss": 1.9988, "step": 1083 }, { "epoch": 0.49719068914115355, "grad_norm": 0.3669985830783844, "learning_rate": 0.0009525210183621869, "loss": 1.7487, "step": 1084 }, { "epoch": 0.49764935213851624, "grad_norm": 0.3774386942386627, "learning_rate": 0.0009524156350317099, "loss": 1.0475, "step": 1085 }, { "epoch": 0.49810801513587893, "grad_norm": 0.30981799960136414, "learning_rate": 0.0009523101407204154, "loss": 1.3907, "step": 1086 }, { "epoch": 0.4985666781332416, "grad_norm": 0.3435545861721039, "learning_rate": 0.0009522045354541822, "loss": 1.6863, "step": 1087 }, { "epoch": 0.49902534113060426, "grad_norm": 0.23086117208003998, "learning_rate": 0.0009520988192589158, "loss": 1.0635, "step": 1088 }, { "epoch": 0.49948400412796695, "grad_norm": 0.21928969025611877, "learning_rate": 0.0009519929921605493, "loss": 0.9675, "step": 1089 }, { "epoch": 0.49994266712532964, "grad_norm": 0.2767171263694763, "learning_rate": 0.0009518870541850426, "loss": 1.4333, "step": 1090 }, { "epoch": 0.5004013301226924, "grad_norm": 0.288322776556015, "learning_rate": 0.0009517810053583835, "loss": 1.6043, "step": 1091 }, { "epoch": 0.500859993120055, "grad_norm": 0.32782241702079773, "learning_rate": 0.0009516748457065862, "loss": 1.8718, "step": 1092 }, { "epoch": 0.5013186561174178, "grad_norm": 0.2817911207675934, "learning_rate": 0.0009515685752556924, "loss": 1.5955, "step": 1093 }, { "epoch": 0.5017773191147804, "grad_norm": 0.3410284221172333, "learning_rate": 0.0009514621940317712, "loss": 1.8585, "step": 1094 }, { "epoch": 0.5022359821121432, "grad_norm": 0.24062463641166687, "learning_rate": 0.0009513557020609185, "loss": 1.2591, "step": 1095 }, { "epoch": 0.5026946451095058, "grad_norm": 0.26802435517311096, "learning_rate": 0.0009512490993692578, "loss": 1.3988, "step": 1096 }, { "epoch": 0.5031533081068684, "grad_norm": 0.3186005651950836, "learning_rate": 0.0009511423859829392, "loss": 1.3655, "step": 1097 }, { "epoch": 0.5036119711042312, "grad_norm": 0.378814160823822, "learning_rate": 0.0009510355619281403, "loss": 1.9003, "step": 1098 }, { "epoch": 0.5040706341015938, "grad_norm": 0.12915131449699402, "learning_rate": 0.0009509286272310662, "loss": 0.7532, "step": 1099 }, { "epoch": 0.5045292970989566, "grad_norm": 0.3437194228172302, "learning_rate": 0.0009508215819179484, "loss": 1.8945, "step": 1100 }, { "epoch": 0.5049879600963192, "grad_norm": 0.3381500542163849, "learning_rate": 0.0009507144260150459, "loss": 1.8967, "step": 1101 }, { "epoch": 0.5054466230936819, "grad_norm": 0.31556692719459534, "learning_rate": 0.0009506071595486449, "loss": 1.6499, "step": 1102 }, { "epoch": 0.5059052860910446, "grad_norm": 0.19522079825401306, "learning_rate": 0.0009504997825450586, "loss": 1.1095, "step": 1103 }, { "epoch": 0.5063639490884073, "grad_norm": 0.2746993601322174, "learning_rate": 0.0009503922950306274, "loss": 1.3311, "step": 1104 }, { "epoch": 0.50682261208577, "grad_norm": 0.191048264503479, "learning_rate": 0.0009502846970317187, "loss": 0.9343, "step": 1105 }, { "epoch": 0.5072812750831327, "grad_norm": 0.24246980249881744, "learning_rate": 0.0009501769885747271, "loss": 0.9377, "step": 1106 }, { "epoch": 0.5077399380804953, "grad_norm": 0.2772439122200012, "learning_rate": 0.0009500691696860743, "loss": 1.4328, "step": 1107 }, { "epoch": 0.5081986010778581, "grad_norm": 0.11574332416057587, "learning_rate": 0.0009499612403922091, "loss": 0.7223, "step": 1108 }, { "epoch": 0.5086572640752207, "grad_norm": 0.2554071247577667, "learning_rate": 0.0009498532007196071, "loss": 1.3056, "step": 1109 }, { "epoch": 0.5091159270725835, "grad_norm": 0.2135448306798935, "learning_rate": 0.0009497450506947714, "loss": 1.161, "step": 1110 }, { "epoch": 0.5095745900699461, "grad_norm": 0.1852402538061142, "learning_rate": 0.000949636790344232, "loss": 1.002, "step": 1111 }, { "epoch": 0.5100332530673088, "grad_norm": 0.3562169373035431, "learning_rate": 0.0009495284196945458, "loss": 1.8675, "step": 1112 }, { "epoch": 0.5104919160646715, "grad_norm": 0.336135596036911, "learning_rate": 0.0009494199387722969, "loss": 1.9324, "step": 1113 }, { "epoch": 0.5109505790620341, "grad_norm": 0.38499322533607483, "learning_rate": 0.0009493113476040966, "loss": 2.3125, "step": 1114 }, { "epoch": 0.5114092420593969, "grad_norm": 0.20560574531555176, "learning_rate": 0.0009492026462165831, "loss": 0.9785, "step": 1115 }, { "epoch": 0.5118679050567595, "grad_norm": 0.36569714546203613, "learning_rate": 0.0009490938346364215, "loss": 1.9463, "step": 1116 }, { "epoch": 0.5123265680541222, "grad_norm": 0.2573243975639343, "learning_rate": 0.0009489849128903041, "loss": 0.9407, "step": 1117 }, { "epoch": 0.5127852310514849, "grad_norm": 0.3620086908340454, "learning_rate": 0.0009488758810049503, "loss": 2.1328, "step": 1118 }, { "epoch": 0.5132438940488476, "grad_norm": 0.14623965322971344, "learning_rate": 0.0009487667390071064, "loss": 0.777, "step": 1119 }, { "epoch": 0.5137025570462103, "grad_norm": 0.2728992998600006, "learning_rate": 0.0009486574869235453, "loss": 1.5247, "step": 1120 }, { "epoch": 0.514161220043573, "grad_norm": 0.379833459854126, "learning_rate": 0.0009485481247810681, "loss": 1.8743, "step": 1121 }, { "epoch": 0.5146198830409356, "grad_norm": 0.1932978332042694, "learning_rate": 0.0009484386526065014, "loss": 0.795, "step": 1122 }, { "epoch": 0.5150785460382984, "grad_norm": 0.3890305459499359, "learning_rate": 0.0009483290704266999, "loss": 2.0994, "step": 1123 }, { "epoch": 0.515537209035661, "grad_norm": 0.2669411897659302, "learning_rate": 0.0009482193782685449, "loss": 1.5108, "step": 1124 }, { "epoch": 0.5159958720330238, "grad_norm": 0.2894323766231537, "learning_rate": 0.0009481095761589445, "loss": 1.3339, "step": 1125 }, { "epoch": 0.5164545350303864, "grad_norm": 0.32780617475509644, "learning_rate": 0.0009479996641248339, "loss": 1.2261, "step": 1126 }, { "epoch": 0.5169131980277492, "grad_norm": 0.2773579955101013, "learning_rate": 0.0009478896421931755, "loss": 1.3108, "step": 1127 }, { "epoch": 0.5173718610251118, "grad_norm": 0.25270065665245056, "learning_rate": 0.0009477795103909586, "loss": 1.2581, "step": 1128 }, { "epoch": 0.5178305240224745, "grad_norm": 0.34188172221183777, "learning_rate": 0.000947669268745199, "loss": 1.7618, "step": 1129 }, { "epoch": 0.5182891870198372, "grad_norm": 0.30379557609558105, "learning_rate": 0.00094755891728294, "loss": 1.5002, "step": 1130 }, { "epoch": 0.5187478500171998, "grad_norm": 0.29648905992507935, "learning_rate": 0.0009474484560312514, "loss": 1.1797, "step": 1131 }, { "epoch": 0.5192065130145626, "grad_norm": 0.3193150758743286, "learning_rate": 0.0009473378850172303, "loss": 1.899, "step": 1132 }, { "epoch": 0.5196651760119252, "grad_norm": 0.3248406946659088, "learning_rate": 0.0009472272042680005, "loss": 1.7289, "step": 1133 }, { "epoch": 0.5201238390092879, "grad_norm": 0.26000529527664185, "learning_rate": 0.000947116413810713, "loss": 1.4107, "step": 1134 }, { "epoch": 0.5205825020066506, "grad_norm": 0.27710065245628357, "learning_rate": 0.0009470055136725451, "loss": 1.3737, "step": 1135 }, { "epoch": 0.5210411650040133, "grad_norm": 0.3785195052623749, "learning_rate": 0.0009468945038807018, "loss": 1.7585, "step": 1136 }, { "epoch": 0.521499828001376, "grad_norm": 0.3857591450214386, "learning_rate": 0.0009467833844624142, "loss": 2.188, "step": 1137 }, { "epoch": 0.5219584909987387, "grad_norm": 0.30060064792633057, "learning_rate": 0.0009466721554449412, "loss": 1.424, "step": 1138 }, { "epoch": 0.5224171539961013, "grad_norm": 0.29208019375801086, "learning_rate": 0.0009465608168555677, "loss": 1.2755, "step": 1139 }, { "epoch": 0.5228758169934641, "grad_norm": 5.316793441772461, "learning_rate": 0.0009464493687216058, "loss": 2.1242, "step": 1140 }, { "epoch": 0.5233344799908267, "grad_norm": 0.2195115089416504, "learning_rate": 0.0009463378110703949, "loss": 1.0337, "step": 1141 }, { "epoch": 0.5237931429881895, "grad_norm": 0.24998384714126587, "learning_rate": 0.0009462261439293005, "loss": 1.4111, "step": 1142 }, { "epoch": 0.5242518059855521, "grad_norm": 0.3403361737728119, "learning_rate": 0.0009461143673257156, "loss": 1.9103, "step": 1143 }, { "epoch": 0.5247104689829148, "grad_norm": 0.17883948981761932, "learning_rate": 0.0009460024812870598, "loss": 0.9974, "step": 1144 }, { "epoch": 0.5251691319802775, "grad_norm": 0.24725860357284546, "learning_rate": 0.0009458904858407794, "loss": 1.4016, "step": 1145 }, { "epoch": 0.5256277949776402, "grad_norm": 0.29898151755332947, "learning_rate": 0.0009457783810143479, "loss": 1.701, "step": 1146 }, { "epoch": 0.5260864579750029, "grad_norm": 0.27674156427383423, "learning_rate": 0.0009456661668352652, "loss": 1.3168, "step": 1147 }, { "epoch": 0.5265451209723655, "grad_norm": 0.2375446856021881, "learning_rate": 0.0009455538433310584, "loss": 1.0746, "step": 1148 }, { "epoch": 0.5270037839697282, "grad_norm": 0.3496798574924469, "learning_rate": 0.0009454414105292812, "loss": 1.881, "step": 1149 }, { "epoch": 0.5274624469670909, "grad_norm": 0.3575010895729065, "learning_rate": 0.0009453288684575143, "loss": 1.9427, "step": 1150 }, { "epoch": 0.5279211099644536, "grad_norm": 0.3948631286621094, "learning_rate": 0.0009452162171433648, "loss": 2.2327, "step": 1151 }, { "epoch": 0.5283797729618163, "grad_norm": 0.1527937948703766, "learning_rate": 0.0009451034566144671, "loss": 0.8336, "step": 1152 }, { "epoch": 0.528838435959179, "grad_norm": 0.3415633738040924, "learning_rate": 0.0009449905868984822, "loss": 1.9399, "step": 1153 }, { "epoch": 0.5292970989565416, "grad_norm": 0.2828967571258545, "learning_rate": 0.0009448776080230979, "loss": 1.3918, "step": 1154 }, { "epoch": 0.5297557619539044, "grad_norm": 0.20446327328681946, "learning_rate": 0.0009447645200160285, "loss": 1.025, "step": 1155 }, { "epoch": 0.530214424951267, "grad_norm": 0.3204951286315918, "learning_rate": 0.0009446513229050154, "loss": 1.8108, "step": 1156 }, { "epoch": 0.5306730879486298, "grad_norm": 0.38850653171539307, "learning_rate": 0.0009445380167178266, "loss": 2.2024, "step": 1157 }, { "epoch": 0.5311317509459924, "grad_norm": 0.22903253138065338, "learning_rate": 0.0009444246014822571, "loss": 1.0826, "step": 1158 }, { "epoch": 0.5315904139433552, "grad_norm": 0.2147892266511917, "learning_rate": 0.0009443110772261286, "loss": 1.0546, "step": 1159 }, { "epoch": 0.5320490769407178, "grad_norm": 0.34580257534980774, "learning_rate": 0.0009441974439772889, "loss": 1.7944, "step": 1160 }, { "epoch": 0.5325077399380805, "grad_norm": 0.2733863592147827, "learning_rate": 0.0009440837017636134, "loss": 1.3638, "step": 1161 }, { "epoch": 0.5329664029354432, "grad_norm": 0.3368569016456604, "learning_rate": 0.000943969850613004, "loss": 1.8119, "step": 1162 }, { "epoch": 0.5334250659328059, "grad_norm": 0.25083136558532715, "learning_rate": 0.0009438558905533889, "loss": 1.414, "step": 1163 }, { "epoch": 0.5338837289301686, "grad_norm": 0.3574562966823578, "learning_rate": 0.0009437418216127236, "loss": 2.1129, "step": 1164 }, { "epoch": 0.5343423919275313, "grad_norm": 0.2785975933074951, "learning_rate": 0.0009436276438189899, "loss": 1.5375, "step": 1165 }, { "epoch": 0.5348010549248939, "grad_norm": 0.34642040729522705, "learning_rate": 0.0009435133572001965, "loss": 1.8992, "step": 1166 }, { "epoch": 0.5352597179222566, "grad_norm": 0.4057437479496002, "learning_rate": 0.0009433989617843786, "loss": 2.4216, "step": 1167 }, { "epoch": 0.5357183809196193, "grad_norm": 0.2808201313018799, "learning_rate": 0.0009432844575995983, "loss": 1.3363, "step": 1168 }, { "epoch": 0.536177043916982, "grad_norm": 0.29180237650871277, "learning_rate": 0.0009431698446739443, "loss": 1.7092, "step": 1169 }, { "epoch": 0.5366357069143447, "grad_norm": 0.28107771277427673, "learning_rate": 0.000943055123035532, "loss": 1.2791, "step": 1170 }, { "epoch": 0.5370943699117073, "grad_norm": 0.17355158925056458, "learning_rate": 0.0009429402927125035, "loss": 0.8209, "step": 1171 }, { "epoch": 0.5375530329090701, "grad_norm": 0.31418073177337646, "learning_rate": 0.0009428253537330272, "loss": 1.6716, "step": 1172 }, { "epoch": 0.5380116959064327, "grad_norm": 0.3649182617664337, "learning_rate": 0.0009427103061252989, "loss": 1.8427, "step": 1173 }, { "epoch": 0.5384703589037955, "grad_norm": 0.41031497716903687, "learning_rate": 0.0009425951499175404, "loss": 2.2205, "step": 1174 }, { "epoch": 0.5389290219011581, "grad_norm": 0.2333560585975647, "learning_rate": 0.0009424798851380003, "loss": 0.9493, "step": 1175 }, { "epoch": 0.5393876848985208, "grad_norm": 0.31085172295570374, "learning_rate": 0.0009423645118149539, "loss": 1.4224, "step": 1176 }, { "epoch": 0.5398463478958835, "grad_norm": 0.41655832529067993, "learning_rate": 0.0009422490299767032, "loss": 2.1355, "step": 1177 }, { "epoch": 0.5403050108932462, "grad_norm": 0.390559583902359, "learning_rate": 0.0009421334396515766, "loss": 1.7881, "step": 1178 }, { "epoch": 0.5407636738906089, "grad_norm": 0.3651013970375061, "learning_rate": 0.0009420177408679294, "loss": 1.9978, "step": 1179 }, { "epoch": 0.5412223368879716, "grad_norm": 0.2856077551841736, "learning_rate": 0.0009419019336541431, "loss": 1.3818, "step": 1180 }, { "epoch": 0.5416809998853342, "grad_norm": 0.2839828431606293, "learning_rate": 0.0009417860180386264, "loss": 1.3959, "step": 1181 }, { "epoch": 0.542139662882697, "grad_norm": 0.338052898645401, "learning_rate": 0.0009416699940498139, "loss": 1.8724, "step": 1182 }, { "epoch": 0.5425983258800596, "grad_norm": 0.26623108983039856, "learning_rate": 0.0009415538617161672, "loss": 1.5506, "step": 1183 }, { "epoch": 0.5430569888774223, "grad_norm": 0.3088790774345398, "learning_rate": 0.0009414376210661746, "loss": 1.7001, "step": 1184 }, { "epoch": 0.543515651874785, "grad_norm": 0.2527974545955658, "learning_rate": 0.0009413212721283505, "loss": 1.5046, "step": 1185 }, { "epoch": 0.5439743148721476, "grad_norm": 0.29992103576660156, "learning_rate": 0.0009412048149312364, "loss": 1.405, "step": 1186 }, { "epoch": 0.5444329778695104, "grad_norm": 0.26170700788497925, "learning_rate": 0.0009410882495033998, "loss": 1.1761, "step": 1187 }, { "epoch": 0.544891640866873, "grad_norm": 0.1813340187072754, "learning_rate": 0.0009409715758734352, "loss": 0.9035, "step": 1188 }, { "epoch": 0.5453503038642358, "grad_norm": 0.3251456916332245, "learning_rate": 0.0009408547940699634, "loss": 1.5636, "step": 1189 }, { "epoch": 0.5458089668615984, "grad_norm": 0.3176114857196808, "learning_rate": 0.0009407379041216321, "loss": 1.402, "step": 1190 }, { "epoch": 0.5462676298589612, "grad_norm": 0.21962450444698334, "learning_rate": 0.0009406209060571149, "loss": 1.009, "step": 1191 }, { "epoch": 0.5467262928563238, "grad_norm": 0.25622203946113586, "learning_rate": 0.0009405037999051125, "loss": 0.9907, "step": 1192 }, { "epoch": 0.5471849558536865, "grad_norm": 0.4034391939640045, "learning_rate": 0.0009403865856943516, "loss": 2.1548, "step": 1193 }, { "epoch": 0.5476436188510492, "grad_norm": 0.10982491821050644, "learning_rate": 0.0009402692634535861, "loss": 0.5598, "step": 1194 }, { "epoch": 0.5481022818484119, "grad_norm": 0.21411536633968353, "learning_rate": 0.0009401518332115957, "loss": 0.9482, "step": 1195 }, { "epoch": 0.5485609448457746, "grad_norm": 0.35385677218437195, "learning_rate": 0.0009400342949971868, "loss": 1.8314, "step": 1196 }, { "epoch": 0.5490196078431373, "grad_norm": 0.3468371033668518, "learning_rate": 0.0009399166488391927, "loss": 2.0469, "step": 1197 }, { "epoch": 0.5494782708404999, "grad_norm": 0.315140038728714, "learning_rate": 0.0009397988947664727, "loss": 1.8289, "step": 1198 }, { "epoch": 0.5499369338378627, "grad_norm": 0.2434529811143875, "learning_rate": 0.0009396810328079126, "loss": 1.2443, "step": 1199 }, { "epoch": 0.5503955968352253, "grad_norm": 0.24115651845932007, "learning_rate": 0.0009395630629924248, "loss": 1.2274, "step": 1200 }, { "epoch": 0.550854259832588, "grad_norm": 0.12398785352706909, "learning_rate": 0.0009394449853489484, "loss": 0.6473, "step": 1201 }, { "epoch": 0.5513129228299507, "grad_norm": 0.3112662732601166, "learning_rate": 0.0009393267999064486, "loss": 1.5162, "step": 1202 }, { "epoch": 0.5517715858273133, "grad_norm": 0.36855632066726685, "learning_rate": 0.0009392085066939169, "loss": 1.9268, "step": 1203 }, { "epoch": 0.5522302488246761, "grad_norm": 0.24338075518608093, "learning_rate": 0.0009390901057403716, "loss": 1.1956, "step": 1204 }, { "epoch": 0.5526889118220387, "grad_norm": 0.3083001375198364, "learning_rate": 0.0009389715970748575, "loss": 1.5538, "step": 1205 }, { "epoch": 0.5531475748194015, "grad_norm": 0.2836032807826996, "learning_rate": 0.0009388529807264455, "loss": 1.4889, "step": 1206 }, { "epoch": 0.5536062378167641, "grad_norm": 0.2570768892765045, "learning_rate": 0.000938734256724233, "loss": 1.3693, "step": 1207 }, { "epoch": 0.5540649008141268, "grad_norm": 0.3241545855998993, "learning_rate": 0.0009386154250973438, "loss": 1.6445, "step": 1208 }, { "epoch": 0.5545235638114895, "grad_norm": 0.29821258783340454, "learning_rate": 0.0009384964858749283, "loss": 1.4775, "step": 1209 }, { "epoch": 0.5549822268088522, "grad_norm": 0.22773027420043945, "learning_rate": 0.000938377439086163, "loss": 1.2509, "step": 1210 }, { "epoch": 0.5554408898062149, "grad_norm": 0.31152674555778503, "learning_rate": 0.0009382582847602512, "loss": 1.6638, "step": 1211 }, { "epoch": 0.5558995528035776, "grad_norm": 0.11729778349399567, "learning_rate": 0.0009381390229264221, "loss": 0.5536, "step": 1212 }, { "epoch": 0.5563582158009402, "grad_norm": 0.1624806672334671, "learning_rate": 0.0009380196536139315, "loss": 0.7977, "step": 1213 }, { "epoch": 0.556816878798303, "grad_norm": 0.32244980335235596, "learning_rate": 0.0009379001768520615, "loss": 1.5859, "step": 1214 }, { "epoch": 0.5572755417956656, "grad_norm": 0.2572208344936371, "learning_rate": 0.0009377805926701208, "loss": 1.2651, "step": 1215 }, { "epoch": 0.5577342047930284, "grad_norm": 0.34207504987716675, "learning_rate": 0.0009376609010974442, "loss": 1.817, "step": 1216 }, { "epoch": 0.558192867790391, "grad_norm": 0.33612552285194397, "learning_rate": 0.0009375411021633927, "loss": 1.7496, "step": 1217 }, { "epoch": 0.5586515307877536, "grad_norm": 0.291958212852478, "learning_rate": 0.0009374211958973542, "loss": 1.6475, "step": 1218 }, { "epoch": 0.5591101937851164, "grad_norm": 0.28733986616134644, "learning_rate": 0.0009373011823287422, "loss": 1.434, "step": 1219 }, { "epoch": 0.559568856782479, "grad_norm": 0.2283989042043686, "learning_rate": 0.0009371810614869971, "loss": 0.9546, "step": 1220 }, { "epoch": 0.5600275197798418, "grad_norm": 0.330026239156723, "learning_rate": 0.0009370608334015856, "loss": 1.5756, "step": 1221 }, { "epoch": 0.5604861827772044, "grad_norm": 0.37356528639793396, "learning_rate": 0.000936940498102, "loss": 2.1412, "step": 1222 }, { "epoch": 0.5609448457745672, "grad_norm": 0.35292860865592957, "learning_rate": 0.0009368200556177598, "loss": 1.697, "step": 1223 }, { "epoch": 0.5614035087719298, "grad_norm": 0.30706706643104553, "learning_rate": 0.0009366995059784104, "loss": 1.2438, "step": 1224 }, { "epoch": 0.5618621717692925, "grad_norm": 0.3499191999435425, "learning_rate": 0.0009365788492135235, "loss": 1.5356, "step": 1225 }, { "epoch": 0.5623208347666552, "grad_norm": 0.16763897240161896, "learning_rate": 0.0009364580853526967, "loss": 0.8041, "step": 1226 }, { "epoch": 0.5627794977640179, "grad_norm": 0.2519925534725189, "learning_rate": 0.0009363372144255548, "loss": 1.1698, "step": 1227 }, { "epoch": 0.5632381607613806, "grad_norm": 0.17550460994243622, "learning_rate": 0.0009362162364617479, "loss": 0.9668, "step": 1228 }, { "epoch": 0.5636968237587433, "grad_norm": 0.31531476974487305, "learning_rate": 0.0009360951514909529, "loss": 1.7645, "step": 1229 }, { "epoch": 0.5641554867561059, "grad_norm": 0.26998183131217957, "learning_rate": 0.0009359739595428729, "loss": 1.194, "step": 1230 }, { "epoch": 0.5646141497534687, "grad_norm": 0.3305562734603882, "learning_rate": 0.000935852660647237, "loss": 1.6608, "step": 1231 }, { "epoch": 0.5650728127508313, "grad_norm": 0.3273736536502838, "learning_rate": 0.000935731254833801, "loss": 1.4538, "step": 1232 }, { "epoch": 0.5655314757481941, "grad_norm": 0.3271407186985016, "learning_rate": 0.0009356097421323461, "loss": 1.8257, "step": 1233 }, { "epoch": 0.5659901387455567, "grad_norm": 0.1826079934835434, "learning_rate": 0.0009354881225726808, "loss": 0.8564, "step": 1234 }, { "epoch": 0.5664488017429193, "grad_norm": 0.255459725856781, "learning_rate": 0.0009353663961846389, "loss": 1.4302, "step": 1235 }, { "epoch": 0.5669074647402821, "grad_norm": 0.2576141357421875, "learning_rate": 0.0009352445629980809, "loss": 1.3641, "step": 1236 }, { "epoch": 0.5673661277376447, "grad_norm": 0.31090545654296875, "learning_rate": 0.0009351226230428934, "loss": 1.6024, "step": 1237 }, { "epoch": 0.5678247907350075, "grad_norm": 0.2738056778907776, "learning_rate": 0.0009350005763489888, "loss": 1.4413, "step": 1238 }, { "epoch": 0.5682834537323701, "grad_norm": 0.34732604026794434, "learning_rate": 0.0009348784229463065, "loss": 2.0205, "step": 1239 }, { "epoch": 0.5687421167297328, "grad_norm": 0.2433692067861557, "learning_rate": 0.0009347561628648115, "loss": 1.4734, "step": 1240 }, { "epoch": 0.5692007797270955, "grad_norm": 0.2669577896595001, "learning_rate": 0.0009346337961344948, "loss": 1.3418, "step": 1241 }, { "epoch": 0.5696594427244582, "grad_norm": 0.2639061510562897, "learning_rate": 0.0009345113227853741, "loss": 1.4639, "step": 1242 }, { "epoch": 0.5701181057218209, "grad_norm": 0.29163965582847595, "learning_rate": 0.000934388742847493, "loss": 1.4735, "step": 1243 }, { "epoch": 0.5705767687191836, "grad_norm": 0.1659363955259323, "learning_rate": 0.0009342660563509211, "loss": 0.8228, "step": 1244 }, { "epoch": 0.5710354317165462, "grad_norm": 0.2903018295764923, "learning_rate": 0.0009341432633257543, "loss": 1.3949, "step": 1245 }, { "epoch": 0.571494094713909, "grad_norm": 0.39374253153800964, "learning_rate": 0.0009340203638021149, "loss": 2.1997, "step": 1246 }, { "epoch": 0.5719527577112716, "grad_norm": 0.31729447841644287, "learning_rate": 0.0009338973578101506, "loss": 1.4435, "step": 1247 }, { "epoch": 0.5724114207086344, "grad_norm": 0.12577004730701447, "learning_rate": 0.000933774245380036, "loss": 0.6955, "step": 1248 }, { "epoch": 0.572870083705997, "grad_norm": 0.3122677206993103, "learning_rate": 0.0009336510265419712, "loss": 1.5015, "step": 1249 }, { "epoch": 0.5733287467033598, "grad_norm": 0.2251952886581421, "learning_rate": 0.000933527701326183, "loss": 0.9793, "step": 1250 }, { "epoch": 0.5737874097007224, "grad_norm": 0.2624088227748871, "learning_rate": 0.0009334042697629235, "loss": 1.2029, "step": 1251 }, { "epoch": 0.574246072698085, "grad_norm": 0.2972775399684906, "learning_rate": 0.0009332807318824717, "loss": 1.3044, "step": 1252 }, { "epoch": 0.5747047356954478, "grad_norm": 0.6643463969230652, "learning_rate": 0.0009331570877151324, "loss": 1.9626, "step": 1253 }, { "epoch": 0.5751633986928104, "grad_norm": 0.25192347168922424, "learning_rate": 0.0009330333372912361, "loss": 1.2851, "step": 1254 }, { "epoch": 0.5756220616901732, "grad_norm": 0.28251874446868896, "learning_rate": 0.0009329094806411401, "loss": 1.4486, "step": 1255 }, { "epoch": 0.5760807246875358, "grad_norm": 0.3544906675815582, "learning_rate": 0.0009327855177952267, "loss": 1.7761, "step": 1256 }, { "epoch": 0.5765393876848985, "grad_norm": 0.18498443067073822, "learning_rate": 0.0009326614487839053, "loss": 0.8643, "step": 1257 }, { "epoch": 0.5769980506822612, "grad_norm": 0.2767449617385864, "learning_rate": 0.0009325372736376109, "loss": 1.3204, "step": 1258 }, { "epoch": 0.5774567136796239, "grad_norm": 0.32360634207725525, "learning_rate": 0.0009324129923868048, "loss": 1.555, "step": 1259 }, { "epoch": 0.5779153766769866, "grad_norm": 0.09593556076288223, "learning_rate": 0.0009322886050619735, "loss": 0.5922, "step": 1260 }, { "epoch": 0.5783740396743493, "grad_norm": 0.2054739147424698, "learning_rate": 0.0009321641116936306, "loss": 0.8165, "step": 1261 }, { "epoch": 0.5788327026717119, "grad_norm": 0.6027858257293701, "learning_rate": 0.0009320395123123149, "loss": 1.8843, "step": 1262 }, { "epoch": 0.5792913656690747, "grad_norm": 0.31743377447128296, "learning_rate": 0.0009319148069485917, "loss": 1.2451, "step": 1263 }, { "epoch": 0.5797500286664373, "grad_norm": 0.2888292670249939, "learning_rate": 0.0009317899956330522, "loss": 1.4385, "step": 1264 }, { "epoch": 0.5802086916638001, "grad_norm": 0.17408441007137299, "learning_rate": 0.0009316650783963132, "loss": 0.7488, "step": 1265 }, { "epoch": 0.5806673546611627, "grad_norm": 0.3600836396217346, "learning_rate": 0.0009315400552690181, "loss": 1.5487, "step": 1266 }, { "epoch": 0.5811260176585255, "grad_norm": 0.2011416256427765, "learning_rate": 0.0009314149262818358, "loss": 0.825, "step": 1267 }, { "epoch": 0.5815846806558881, "grad_norm": 0.37469393014907837, "learning_rate": 0.0009312896914654616, "loss": 1.4067, "step": 1268 }, { "epoch": 0.5820433436532507, "grad_norm": 0.2877853512763977, "learning_rate": 0.0009311643508506162, "loss": 1.4527, "step": 1269 }, { "epoch": 0.5825020066506135, "grad_norm": 0.3054576814174652, "learning_rate": 0.0009310389044680467, "loss": 1.39, "step": 1270 }, { "epoch": 0.5829606696479761, "grad_norm": 0.09884677827358246, "learning_rate": 0.000930913352348526, "loss": 0.6243, "step": 1271 }, { "epoch": 0.5834193326453389, "grad_norm": 0.25772392749786377, "learning_rate": 0.0009307876945228528, "loss": 1.4088, "step": 1272 }, { "epoch": 0.5838779956427015, "grad_norm": 0.3259493112564087, "learning_rate": 0.0009306619310218521, "loss": 1.6538, "step": 1273 }, { "epoch": 0.5843366586400642, "grad_norm": 0.09127166122198105, "learning_rate": 0.0009305360618763745, "loss": 0.5361, "step": 1274 }, { "epoch": 0.5847953216374269, "grad_norm": 0.18113906681537628, "learning_rate": 0.0009304100871172967, "loss": 0.9267, "step": 1275 }, { "epoch": 0.5852539846347896, "grad_norm": 0.32655301690101624, "learning_rate": 0.000930284006775521, "loss": 1.6382, "step": 1276 }, { "epoch": 0.5857126476321523, "grad_norm": 0.19575290381908417, "learning_rate": 0.0009301578208819758, "loss": 0.9375, "step": 1277 }, { "epoch": 0.586171310629515, "grad_norm": 0.3941885828971863, "learning_rate": 0.0009300315294676158, "loss": 2.1287, "step": 1278 }, { "epoch": 0.5866299736268776, "grad_norm": 0.28158482909202576, "learning_rate": 0.0009299051325634208, "loss": 1.3167, "step": 1279 }, { "epoch": 0.5870886366242404, "grad_norm": 0.1906464397907257, "learning_rate": 0.000929778630200397, "loss": 0.8625, "step": 1280 }, { "epoch": 0.587547299621603, "grad_norm": 0.1061529740691185, "learning_rate": 0.0009296520224095764, "loss": 0.6863, "step": 1281 }, { "epoch": 0.5880059626189658, "grad_norm": 0.26948490738868713, "learning_rate": 0.0009295253092220166, "loss": 0.886, "step": 1282 }, { "epoch": 0.5884646256163284, "grad_norm": 0.36432507634162903, "learning_rate": 0.0009293984906688016, "loss": 1.8413, "step": 1283 }, { "epoch": 0.5889232886136911, "grad_norm": 0.5231337547302246, "learning_rate": 0.0009292715667810406, "loss": 1.7709, "step": 1284 }, { "epoch": 0.5893819516110538, "grad_norm": 0.3480357825756073, "learning_rate": 0.000929144537589869, "loss": 1.9674, "step": 1285 }, { "epoch": 0.5898406146084164, "grad_norm": 0.24957574903964996, "learning_rate": 0.0009290174031264482, "loss": 1.2784, "step": 1286 }, { "epoch": 0.5902992776057792, "grad_norm": 0.33761247992515564, "learning_rate": 0.000928890163421965, "loss": 1.9197, "step": 1287 }, { "epoch": 0.5907579406031418, "grad_norm": 0.35372909903526306, "learning_rate": 0.0009287628185076322, "loss": 2.0242, "step": 1288 }, { "epoch": 0.5912166036005045, "grad_norm": 0.21915297210216522, "learning_rate": 0.0009286353684146884, "loss": 0.9164, "step": 1289 }, { "epoch": 0.5916752665978672, "grad_norm": 0.3058471083641052, "learning_rate": 0.0009285078131743982, "loss": 1.5614, "step": 1290 }, { "epoch": 0.5921339295952299, "grad_norm": 0.1828153282403946, "learning_rate": 0.0009283801528180517, "loss": 0.8684, "step": 1291 }, { "epoch": 0.5925925925925926, "grad_norm": 0.1839916706085205, "learning_rate": 0.000928252387376965, "loss": 0.8804, "step": 1292 }, { "epoch": 0.5930512555899553, "grad_norm": 0.21669816970825195, "learning_rate": 0.0009281245168824799, "loss": 1.1169, "step": 1293 }, { "epoch": 0.5935099185873179, "grad_norm": 0.2863485813140869, "learning_rate": 0.0009279965413659637, "loss": 1.5664, "step": 1294 }, { "epoch": 0.5939685815846807, "grad_norm": 0.22065885365009308, "learning_rate": 0.00092786846085881, "loss": 1.0018, "step": 1295 }, { "epoch": 0.5944272445820433, "grad_norm": 0.3145039975643158, "learning_rate": 0.0009277402753924376, "loss": 1.4487, "step": 1296 }, { "epoch": 0.5948859075794061, "grad_norm": 0.2897392809391022, "learning_rate": 0.0009276119849982917, "loss": 1.384, "step": 1297 }, { "epoch": 0.5953445705767687, "grad_norm": 0.3938884139060974, "learning_rate": 0.0009274835897078425, "loss": 2.1997, "step": 1298 }, { "epoch": 0.5958032335741315, "grad_norm": 0.17570100724697113, "learning_rate": 0.0009273550895525864, "loss": 0.8196, "step": 1299 }, { "epoch": 0.5962618965714941, "grad_norm": 0.32507872581481934, "learning_rate": 0.0009272264845640455, "loss": 1.4766, "step": 1300 }, { "epoch": 0.5967205595688568, "grad_norm": 0.19803722202777863, "learning_rate": 0.0009270977747737675, "loss": 0.9604, "step": 1301 }, { "epoch": 0.5971792225662195, "grad_norm": 0.32639971375465393, "learning_rate": 0.0009269689602133258, "loss": 1.7936, "step": 1302 }, { "epoch": 0.5976378855635821, "grad_norm": 0.35615068674087524, "learning_rate": 0.0009268400409143195, "loss": 1.7267, "step": 1303 }, { "epoch": 0.5980965485609449, "grad_norm": 0.33645865321159363, "learning_rate": 0.0009267110169083734, "loss": 1.6238, "step": 1304 }, { "epoch": 0.5985552115583075, "grad_norm": 0.29043132066726685, "learning_rate": 0.0009265818882271384, "loss": 1.2636, "step": 1305 }, { "epoch": 0.5990138745556702, "grad_norm": 0.41903120279312134, "learning_rate": 0.0009264526549022903, "loss": 2.3948, "step": 1306 }, { "epoch": 0.5994725375530329, "grad_norm": 0.2502308189868927, "learning_rate": 0.0009263233169655309, "loss": 1.1672, "step": 1307 }, { "epoch": 0.5999312005503956, "grad_norm": 0.1772756576538086, "learning_rate": 0.000926193874448588, "loss": 0.8739, "step": 1308 }, { "epoch": 0.6003898635477583, "grad_norm": 0.24992649257183075, "learning_rate": 0.0009260643273832147, "loss": 1.4184, "step": 1309 }, { "epoch": 0.600848526545121, "grad_norm": 0.1002533808350563, "learning_rate": 0.0009259346758011898, "loss": 0.6456, "step": 1310 }, { "epoch": 0.6013071895424836, "grad_norm": 0.3838384449481964, "learning_rate": 0.0009258049197343177, "loss": 2.1184, "step": 1311 }, { "epoch": 0.6017658525398464, "grad_norm": 0.1766338050365448, "learning_rate": 0.0009256750592144287, "loss": 0.8759, "step": 1312 }, { "epoch": 0.602224515537209, "grad_norm": 0.37328869104385376, "learning_rate": 0.0009255450942733783, "loss": 1.9109, "step": 1313 }, { "epoch": 0.6026831785345718, "grad_norm": 0.19689656794071198, "learning_rate": 0.0009254150249430479, "loss": 0.9383, "step": 1314 }, { "epoch": 0.6031418415319344, "grad_norm": 0.0794055312871933, "learning_rate": 0.0009252848512553447, "loss": 0.4766, "step": 1315 }, { "epoch": 0.6036005045292971, "grad_norm": 0.277407705783844, "learning_rate": 0.0009251545732422009, "loss": 1.7058, "step": 1316 }, { "epoch": 0.6040591675266598, "grad_norm": 0.28111398220062256, "learning_rate": 0.0009250241909355746, "loss": 1.8057, "step": 1317 }, { "epoch": 0.6045178305240225, "grad_norm": 0.2518330514431, "learning_rate": 0.0009248937043674499, "loss": 1.3856, "step": 1318 }, { "epoch": 0.6049764935213852, "grad_norm": 0.3877377510070801, "learning_rate": 0.0009247631135698358, "loss": 2.1638, "step": 1319 }, { "epoch": 0.6054351565187478, "grad_norm": 0.2325863391160965, "learning_rate": 0.0009246324185747672, "loss": 1.3607, "step": 1320 }, { "epoch": 0.6058938195161105, "grad_norm": 0.23192419111728668, "learning_rate": 0.0009245016194143047, "loss": 1.1667, "step": 1321 }, { "epoch": 0.6063524825134732, "grad_norm": 0.0964299887418747, "learning_rate": 0.000924370716120534, "loss": 0.623, "step": 1322 }, { "epoch": 0.6068111455108359, "grad_norm": 0.23401454091072083, "learning_rate": 0.0009242397087255667, "loss": 1.2682, "step": 1323 }, { "epoch": 0.6072698085081986, "grad_norm": 0.30539819598197937, "learning_rate": 0.0009241085972615401, "loss": 1.9392, "step": 1324 }, { "epoch": 0.6077284715055613, "grad_norm": 0.1904824823141098, "learning_rate": 0.0009239773817606165, "loss": 0.8026, "step": 1325 }, { "epoch": 0.6081871345029239, "grad_norm": 0.31935641169548035, "learning_rate": 0.0009238460622549842, "loss": 1.7756, "step": 1326 }, { "epoch": 0.6086457975002867, "grad_norm": 0.232622429728508, "learning_rate": 0.0009237146387768567, "loss": 1.2202, "step": 1327 }, { "epoch": 0.6091044604976493, "grad_norm": 0.22386907041072845, "learning_rate": 0.0009235831113584732, "loss": 1.0802, "step": 1328 }, { "epoch": 0.6095631234950121, "grad_norm": 0.2252710908651352, "learning_rate": 0.0009234514800320983, "loss": 0.9511, "step": 1329 }, { "epoch": 0.6100217864923747, "grad_norm": 0.3520384430885315, "learning_rate": 0.0009233197448300221, "loss": 1.898, "step": 1330 }, { "epoch": 0.6104804494897375, "grad_norm": 0.3444898724555969, "learning_rate": 0.0009231879057845601, "loss": 1.77, "step": 1331 }, { "epoch": 0.6109391124871001, "grad_norm": 0.24436596035957336, "learning_rate": 0.0009230559629280535, "loss": 1.054, "step": 1332 }, { "epoch": 0.6113977754844628, "grad_norm": 0.11772722005844116, "learning_rate": 0.0009229239162928689, "loss": 0.6621, "step": 1333 }, { "epoch": 0.6118564384818255, "grad_norm": 0.2954722046852112, "learning_rate": 0.0009227917659113982, "loss": 1.5235, "step": 1334 }, { "epoch": 0.6123151014791882, "grad_norm": 0.2937714755535126, "learning_rate": 0.0009226595118160588, "loss": 1.3464, "step": 1335 }, { "epoch": 0.6127737644765509, "grad_norm": 0.27231964468955994, "learning_rate": 0.0009225271540392934, "loss": 1.2777, "step": 1336 }, { "epoch": 0.6132324274739135, "grad_norm": 0.19506557285785675, "learning_rate": 0.0009223946926135709, "loss": 0.8657, "step": 1337 }, { "epoch": 0.6136910904712762, "grad_norm": 0.39066511392593384, "learning_rate": 0.0009222621275713844, "loss": 1.8091, "step": 1338 }, { "epoch": 0.6141497534686389, "grad_norm": 0.4800299108028412, "learning_rate": 0.0009221294589452535, "loss": 2.0739, "step": 1339 }, { "epoch": 0.6146084164660016, "grad_norm": 0.3111729621887207, "learning_rate": 0.0009219966867677226, "loss": 1.5457, "step": 1340 }, { "epoch": 0.6150670794633643, "grad_norm": 0.30846676230430603, "learning_rate": 0.0009218638110713615, "loss": 1.4479, "step": 1341 }, { "epoch": 0.615525742460727, "grad_norm": 0.3237318992614746, "learning_rate": 0.0009217308318887659, "loss": 1.5574, "step": 1342 }, { "epoch": 0.6159844054580896, "grad_norm": 0.2283388376235962, "learning_rate": 0.0009215977492525565, "loss": 1.1732, "step": 1343 }, { "epoch": 0.6164430684554524, "grad_norm": 0.3451511263847351, "learning_rate": 0.0009214645631953791, "loss": 1.7261, "step": 1344 }, { "epoch": 0.616901731452815, "grad_norm": 0.3029400110244751, "learning_rate": 0.0009213312737499055, "loss": 1.3227, "step": 1345 }, { "epoch": 0.6173603944501778, "grad_norm": 0.17842943966388702, "learning_rate": 0.0009211978809488327, "loss": 0.5785, "step": 1346 }, { "epoch": 0.6178190574475404, "grad_norm": 0.3857697546482086, "learning_rate": 0.0009210643848248824, "loss": 1.8008, "step": 1347 }, { "epoch": 0.6182777204449031, "grad_norm": 0.23998984694480896, "learning_rate": 0.0009209307854108026, "loss": 1.1037, "step": 1348 }, { "epoch": 0.6187363834422658, "grad_norm": 0.2030552476644516, "learning_rate": 0.0009207970827393661, "loss": 0.8847, "step": 1349 }, { "epoch": 0.6191950464396285, "grad_norm": 0.17879889905452728, "learning_rate": 0.0009206632768433711, "loss": 0.938, "step": 1350 }, { "epoch": 0.6196537094369912, "grad_norm": 0.30431312322616577, "learning_rate": 0.0009205293677556413, "loss": 1.2913, "step": 1351 }, { "epoch": 0.6201123724343539, "grad_norm": 0.09835697710514069, "learning_rate": 0.0009203953555090252, "loss": 0.4719, "step": 1352 }, { "epoch": 0.6205710354317165, "grad_norm": 0.3493775427341461, "learning_rate": 0.0009202612401363972, "loss": 1.7051, "step": 1353 }, { "epoch": 0.6210296984290792, "grad_norm": 0.35240983963012695, "learning_rate": 0.0009201270216706568, "loss": 1.2707, "step": 1354 }, { "epoch": 0.6214883614264419, "grad_norm": 0.08556913584470749, "learning_rate": 0.0009199927001447287, "loss": 0.4551, "step": 1355 }, { "epoch": 0.6219470244238046, "grad_norm": 0.18923942744731903, "learning_rate": 0.000919858275591563, "loss": 0.9558, "step": 1356 }, { "epoch": 0.6224056874211673, "grad_norm": 0.22347323596477509, "learning_rate": 0.000919723748044135, "loss": 1.0806, "step": 1357 }, { "epoch": 0.6228643504185299, "grad_norm": 1.7624882459640503, "learning_rate": 0.0009195891175354451, "loss": 1.8876, "step": 1358 }, { "epoch": 0.6233230134158927, "grad_norm": 0.20518071949481964, "learning_rate": 0.0009194543840985193, "loss": 0.9335, "step": 1359 }, { "epoch": 0.6237816764132553, "grad_norm": 0.24788199365139008, "learning_rate": 0.0009193195477664087, "loss": 1.2626, "step": 1360 }, { "epoch": 0.6242403394106181, "grad_norm": 0.1483655720949173, "learning_rate": 0.0009191846085721896, "loss": 0.8627, "step": 1361 }, { "epoch": 0.6246990024079807, "grad_norm": 0.42687055468559265, "learning_rate": 0.0009190495665489635, "loss": 1.7659, "step": 1362 }, { "epoch": 0.6251576654053435, "grad_norm": 0.23796336352825165, "learning_rate": 0.0009189144217298571, "loss": 0.9816, "step": 1363 }, { "epoch": 0.6256163284027061, "grad_norm": 0.30603915452957153, "learning_rate": 0.0009187791741480227, "loss": 1.3668, "step": 1364 }, { "epoch": 0.6260749914000688, "grad_norm": 0.2691842019557953, "learning_rate": 0.0009186438238366373, "loss": 1.4046, "step": 1365 }, { "epoch": 0.6265336543974315, "grad_norm": 0.3278261125087738, "learning_rate": 0.0009185083708289032, "loss": 1.6694, "step": 1366 }, { "epoch": 0.6269923173947942, "grad_norm": 0.319321870803833, "learning_rate": 0.0009183728151580484, "loss": 1.8997, "step": 1367 }, { "epoch": 0.6274509803921569, "grad_norm": 0.2504161596298218, "learning_rate": 0.0009182371568573252, "loss": 1.1439, "step": 1368 }, { "epoch": 0.6279096433895196, "grad_norm": 0.35240206122398376, "learning_rate": 0.0009181013959600119, "loss": 1.7751, "step": 1369 }, { "epoch": 0.6283683063868822, "grad_norm": 0.30558404326438904, "learning_rate": 0.0009179655324994114, "loss": 1.4472, "step": 1370 }, { "epoch": 0.6288269693842449, "grad_norm": 0.3027516305446625, "learning_rate": 0.0009178295665088522, "loss": 1.431, "step": 1371 }, { "epoch": 0.6292856323816076, "grad_norm": 0.1590060144662857, "learning_rate": 0.0009176934980216876, "loss": 0.7231, "step": 1372 }, { "epoch": 0.6297442953789703, "grad_norm": 0.280381441116333, "learning_rate": 0.0009175573270712961, "loss": 1.2524, "step": 1373 }, { "epoch": 0.630202958376333, "grad_norm": 0.2787134051322937, "learning_rate": 0.0009174210536910816, "loss": 1.3318, "step": 1374 }, { "epoch": 0.6306616213736956, "grad_norm": 0.24251703917980194, "learning_rate": 0.0009172846779144729, "loss": 1.301, "step": 1375 }, { "epoch": 0.6311202843710584, "grad_norm": 0.20021305978298187, "learning_rate": 0.0009171481997749239, "loss": 1.0351, "step": 1376 }, { "epoch": 0.631578947368421, "grad_norm": 0.19602487981319427, "learning_rate": 0.0009170116193059138, "loss": 0.8918, "step": 1377 }, { "epoch": 0.6320376103657838, "grad_norm": 0.2832145690917969, "learning_rate": 0.0009168749365409466, "loss": 1.3265, "step": 1378 }, { "epoch": 0.6324962733631464, "grad_norm": 0.2144933044910431, "learning_rate": 0.0009167381515135515, "loss": 0.8716, "step": 1379 }, { "epoch": 0.6329549363605091, "grad_norm": 0.17951545119285583, "learning_rate": 0.0009166012642572832, "loss": 0.7958, "step": 1380 }, { "epoch": 0.6334135993578718, "grad_norm": 0.06861251592636108, "learning_rate": 0.0009164642748057208, "loss": 0.403, "step": 1381 }, { "epoch": 0.6338722623552345, "grad_norm": 0.08268727362155914, "learning_rate": 0.0009163271831924689, "loss": 0.4848, "step": 1382 }, { "epoch": 0.6343309253525972, "grad_norm": 0.26671648025512695, "learning_rate": 0.0009161899894511572, "loss": 1.2793, "step": 1383 }, { "epoch": 0.6347895883499599, "grad_norm": 0.31374937295913696, "learning_rate": 0.00091605269361544, "loss": 1.6044, "step": 1384 }, { "epoch": 0.6352482513473225, "grad_norm": 0.3432426452636719, "learning_rate": 0.0009159152957189975, "loss": 1.7935, "step": 1385 }, { "epoch": 0.6357069143446853, "grad_norm": 0.3372509479522705, "learning_rate": 0.0009157777957955337, "loss": 1.8975, "step": 1386 }, { "epoch": 0.6361655773420479, "grad_norm": 0.3484379053115845, "learning_rate": 0.000915640193878779, "loss": 1.6467, "step": 1387 }, { "epoch": 0.6366242403394106, "grad_norm": 0.268187016248703, "learning_rate": 0.0009155024900024877, "loss": 1.3937, "step": 1388 }, { "epoch": 0.6370829033367733, "grad_norm": 0.27969205379486084, "learning_rate": 0.0009153646842004396, "loss": 1.2155, "step": 1389 }, { "epoch": 0.637541566334136, "grad_norm": 0.22392551600933075, "learning_rate": 0.0009152267765064395, "loss": 0.8904, "step": 1390 }, { "epoch": 0.6380002293314987, "grad_norm": 0.3370068371295929, "learning_rate": 0.0009150887669543173, "loss": 1.496, "step": 1391 }, { "epoch": 0.6384588923288613, "grad_norm": 0.4173247814178467, "learning_rate": 0.0009149506555779277, "loss": 2.1025, "step": 1392 }, { "epoch": 0.6389175553262241, "grad_norm": 0.3257886469364166, "learning_rate": 0.0009148124424111501, "loss": 1.2627, "step": 1393 }, { "epoch": 0.6393762183235867, "grad_norm": 0.34521573781967163, "learning_rate": 0.0009146741274878896, "loss": 1.6971, "step": 1394 }, { "epoch": 0.6398348813209495, "grad_norm": 0.43050798773765564, "learning_rate": 0.0009145357108420756, "loss": 1.8064, "step": 1395 }, { "epoch": 0.6402935443183121, "grad_norm": 0.31571662425994873, "learning_rate": 0.0009143971925076629, "loss": 1.3123, "step": 1396 }, { "epoch": 0.6407522073156748, "grad_norm": 0.3214929401874542, "learning_rate": 0.0009142585725186307, "loss": 1.5576, "step": 1397 }, { "epoch": 0.6412108703130375, "grad_norm": 0.3679821491241455, "learning_rate": 0.0009141198509089838, "loss": 1.677, "step": 1398 }, { "epoch": 0.6416695333104002, "grad_norm": 0.3775405287742615, "learning_rate": 0.0009139810277127516, "loss": 1.8577, "step": 1399 }, { "epoch": 0.6421281963077629, "grad_norm": 0.12690681219100952, "learning_rate": 0.0009138421029639882, "loss": 0.6524, "step": 1400 }, { "epoch": 0.6425868593051256, "grad_norm": 0.22937628626823425, "learning_rate": 0.0009137030766967731, "loss": 0.9138, "step": 1401 }, { "epoch": 0.6430455223024882, "grad_norm": 0.29358240962028503, "learning_rate": 0.0009135639489452103, "loss": 1.4675, "step": 1402 }, { "epoch": 0.643504185299851, "grad_norm": 0.30726784467697144, "learning_rate": 0.0009134247197434288, "loss": 1.5634, "step": 1403 }, { "epoch": 0.6439628482972136, "grad_norm": 0.2912197411060333, "learning_rate": 0.0009132853891255827, "loss": 1.3088, "step": 1404 }, { "epoch": 0.6444215112945763, "grad_norm": 0.2756345868110657, "learning_rate": 0.0009131459571258507, "loss": 1.5412, "step": 1405 }, { "epoch": 0.644880174291939, "grad_norm": 0.41657257080078125, "learning_rate": 0.0009130064237784364, "loss": 1.8962, "step": 1406 }, { "epoch": 0.6453388372893016, "grad_norm": 0.270457923412323, "learning_rate": 0.0009128667891175685, "loss": 1.441, "step": 1407 }, { "epoch": 0.6457975002866644, "grad_norm": 0.3133867383003235, "learning_rate": 0.0009127270531775003, "loss": 1.7017, "step": 1408 }, { "epoch": 0.646256163284027, "grad_norm": 0.44148820638656616, "learning_rate": 0.00091258721599251, "loss": 1.6197, "step": 1409 }, { "epoch": 0.6467148262813898, "grad_norm": 0.33237019181251526, "learning_rate": 0.0009124472775969006, "loss": 1.6628, "step": 1410 }, { "epoch": 0.6471734892787524, "grad_norm": 0.3485216796398163, "learning_rate": 0.0009123072380250003, "loss": 0.9962, "step": 1411 }, { "epoch": 0.6476321522761151, "grad_norm": 0.25593242049217224, "learning_rate": 0.0009121670973111616, "loss": 1.3634, "step": 1412 }, { "epoch": 0.6480908152734778, "grad_norm": 0.20914927124977112, "learning_rate": 0.000912026855489762, "loss": 0.9994, "step": 1413 }, { "epoch": 0.6485494782708405, "grad_norm": 0.2085810899734497, "learning_rate": 0.0009118865125952038, "loss": 1.0074, "step": 1414 }, { "epoch": 0.6490081412682032, "grad_norm": 0.1792818307876587, "learning_rate": 0.0009117460686619143, "loss": 0.774, "step": 1415 }, { "epoch": 0.6494668042655659, "grad_norm": 0.24848847091197968, "learning_rate": 0.0009116055237243454, "loss": 1.1238, "step": 1416 }, { "epoch": 0.6499254672629285, "grad_norm": 0.2472166121006012, "learning_rate": 0.0009114648778169735, "loss": 1.1925, "step": 1417 }, { "epoch": 0.6503841302602913, "grad_norm": 0.30023932456970215, "learning_rate": 0.0009113241309743003, "loss": 1.6653, "step": 1418 }, { "epoch": 0.6508427932576539, "grad_norm": 0.2693098783493042, "learning_rate": 0.0009111832832308522, "loss": 1.4279, "step": 1419 }, { "epoch": 0.6513014562550167, "grad_norm": 0.2827019691467285, "learning_rate": 0.0009110423346211797, "loss": 1.5595, "step": 1420 }, { "epoch": 0.6517601192523793, "grad_norm": 0.31124547123908997, "learning_rate": 0.0009109012851798588, "loss": 1.7781, "step": 1421 }, { "epoch": 0.6522187822497421, "grad_norm": 0.23482073843479156, "learning_rate": 0.0009107601349414899, "loss": 1.1257, "step": 1422 }, { "epoch": 0.6526774452471047, "grad_norm": 0.34504595398902893, "learning_rate": 0.0009106188839406982, "loss": 1.9086, "step": 1423 }, { "epoch": 0.6531361082444673, "grad_norm": 0.1269492506980896, "learning_rate": 0.0009104775322121334, "loss": 0.7008, "step": 1424 }, { "epoch": 0.6535947712418301, "grad_norm": 0.2349303960800171, "learning_rate": 0.0009103360797904705, "loss": 1.3405, "step": 1425 }, { "epoch": 0.6540534342391927, "grad_norm": 0.27141550183296204, "learning_rate": 0.0009101945267104084, "loss": 1.4506, "step": 1426 }, { "epoch": 0.6545120972365555, "grad_norm": 0.28015798330307007, "learning_rate": 0.0009100528730066712, "loss": 1.3356, "step": 1427 }, { "epoch": 0.6549707602339181, "grad_norm": 0.07972334325313568, "learning_rate": 0.0009099111187140078, "loss": 0.5118, "step": 1428 }, { "epoch": 0.6554294232312808, "grad_norm": 0.2610866129398346, "learning_rate": 0.0009097692638671913, "loss": 1.1424, "step": 1429 }, { "epoch": 0.6558880862286435, "grad_norm": 0.46445217728614807, "learning_rate": 0.0009096273085010197, "loss": 2.2559, "step": 1430 }, { "epoch": 0.6563467492260062, "grad_norm": 0.2951407730579376, "learning_rate": 0.0009094852526503158, "loss": 1.4451, "step": 1431 }, { "epoch": 0.6568054122233689, "grad_norm": 0.11164001375436783, "learning_rate": 0.0009093430963499269, "loss": 0.6334, "step": 1432 }, { "epoch": 0.6572640752207316, "grad_norm": 0.3511189818382263, "learning_rate": 0.0009092008396347249, "loss": 1.946, "step": 1433 }, { "epoch": 0.6577227382180942, "grad_norm": 0.29423317313194275, "learning_rate": 0.0009090584825396064, "loss": 1.4645, "step": 1434 }, { "epoch": 0.658181401215457, "grad_norm": 0.2262077033519745, "learning_rate": 0.0009089160250994928, "loss": 1.0862, "step": 1435 }, { "epoch": 0.6586400642128196, "grad_norm": 0.2860635221004486, "learning_rate": 0.0009087734673493298, "loss": 1.3347, "step": 1436 }, { "epoch": 0.6590987272101824, "grad_norm": 0.3139696717262268, "learning_rate": 0.0009086308093240878, "loss": 1.4883, "step": 1437 }, { "epoch": 0.659557390207545, "grad_norm": 0.3125250041484833, "learning_rate": 0.0009084880510587619, "loss": 1.6454, "step": 1438 }, { "epoch": 0.6600160532049077, "grad_norm": 0.2761354446411133, "learning_rate": 0.0009083451925883716, "loss": 1.4274, "step": 1439 }, { "epoch": 0.6604747162022704, "grad_norm": 0.20550890266895294, "learning_rate": 0.0009082022339479615, "loss": 0.9324, "step": 1440 }, { "epoch": 0.660933379199633, "grad_norm": 0.18471978604793549, "learning_rate": 0.0009080591751726, "loss": 0.9747, "step": 1441 }, { "epoch": 0.6613920421969958, "grad_norm": 0.23342064023017883, "learning_rate": 0.0009079160162973805, "loss": 1.2667, "step": 1442 }, { "epoch": 0.6618507051943584, "grad_norm": 0.4921042025089264, "learning_rate": 0.0009077727573574211, "loss": 2.2029, "step": 1443 }, { "epoch": 0.6623093681917211, "grad_norm": 0.2813926935195923, "learning_rate": 0.000907629398387864, "loss": 1.328, "step": 1444 }, { "epoch": 0.6627680311890838, "grad_norm": 0.18378609418869019, "learning_rate": 0.0009074859394238763, "loss": 0.8692, "step": 1445 }, { "epoch": 0.6632266941864465, "grad_norm": 0.11556479334831238, "learning_rate": 0.0009073423805006495, "loss": 0.556, "step": 1446 }, { "epoch": 0.6636853571838092, "grad_norm": 0.28978320956230164, "learning_rate": 0.0009071987216533999, "loss": 0.9043, "step": 1447 }, { "epoch": 0.6641440201811719, "grad_norm": 0.41565844416618347, "learning_rate": 0.0009070549629173677, "loss": 1.9819, "step": 1448 }, { "epoch": 0.6646026831785345, "grad_norm": 0.3662017285823822, "learning_rate": 0.0009069111043278181, "loss": 1.5942, "step": 1449 }, { "epoch": 0.6650613461758973, "grad_norm": 0.31095772981643677, "learning_rate": 0.0009067671459200406, "loss": 1.3741, "step": 1450 }, { "epoch": 0.6655200091732599, "grad_norm": 0.33298200368881226, "learning_rate": 0.0009066230877293493, "loss": 1.777, "step": 1451 }, { "epoch": 0.6659786721706227, "grad_norm": 0.28431054949760437, "learning_rate": 0.0009064789297910826, "loss": 1.4286, "step": 1452 }, { "epoch": 0.6664373351679853, "grad_norm": 0.32433438301086426, "learning_rate": 0.0009063346721406037, "loss": 1.8176, "step": 1453 }, { "epoch": 0.6668959981653481, "grad_norm": 0.24182525277137756, "learning_rate": 0.0009061903148132997, "loss": 1.2091, "step": 1454 }, { "epoch": 0.6673546611627107, "grad_norm": 0.21921652555465698, "learning_rate": 0.0009060458578445829, "loss": 1.0043, "step": 1455 }, { "epoch": 0.6678133241600734, "grad_norm": 0.3465001881122589, "learning_rate": 0.0009059013012698892, "loss": 1.97, "step": 1456 }, { "epoch": 0.6682719871574361, "grad_norm": 0.3193022310733795, "learning_rate": 0.0009057566451246797, "loss": 1.6008, "step": 1457 }, { "epoch": 0.6687306501547987, "grad_norm": 0.261091947555542, "learning_rate": 0.0009056118894444396, "loss": 1.1853, "step": 1458 }, { "epoch": 0.6691893131521615, "grad_norm": 0.21906504034996033, "learning_rate": 0.0009054670342646782, "loss": 0.846, "step": 1459 }, { "epoch": 0.6696479761495241, "grad_norm": 0.37668684124946594, "learning_rate": 0.0009053220796209298, "loss": 1.3909, "step": 1460 }, { "epoch": 0.6701066391468868, "grad_norm": 0.38679805397987366, "learning_rate": 0.000905177025548753, "loss": 1.8932, "step": 1461 }, { "epoch": 0.6705653021442495, "grad_norm": 0.09460335969924927, "learning_rate": 0.00090503187208373, "loss": 0.5168, "step": 1462 }, { "epoch": 0.6710239651416122, "grad_norm": 0.1873873472213745, "learning_rate": 0.0009048866192614685, "loss": 1.0623, "step": 1463 }, { "epoch": 0.6714826281389749, "grad_norm": 0.34352460503578186, "learning_rate": 0.0009047412671175999, "loss": 1.9176, "step": 1464 }, { "epoch": 0.6719412911363376, "grad_norm": 0.36623653769493103, "learning_rate": 0.0009045958156877801, "loss": 1.3452, "step": 1465 }, { "epoch": 0.6723999541337002, "grad_norm": 0.2123580127954483, "learning_rate": 0.0009044502650076895, "loss": 0.9063, "step": 1466 }, { "epoch": 0.672858617131063, "grad_norm": 0.3140578866004944, "learning_rate": 0.0009043046151130326, "loss": 1.7998, "step": 1467 }, { "epoch": 0.6733172801284256, "grad_norm": 0.32382750511169434, "learning_rate": 0.0009041588660395385, "loss": 1.5197, "step": 1468 }, { "epoch": 0.6737759431257884, "grad_norm": 0.3363771140575409, "learning_rate": 0.0009040130178229604, "loss": 2.1333, "step": 1469 }, { "epoch": 0.674234606123151, "grad_norm": 0.3622763156890869, "learning_rate": 0.0009038670704990759, "loss": 1.6564, "step": 1470 }, { "epoch": 0.6746932691205138, "grad_norm": 0.3316439092159271, "learning_rate": 0.000903721024103687, "loss": 1.965, "step": 1471 }, { "epoch": 0.6751519321178764, "grad_norm": 0.28195494413375854, "learning_rate": 0.0009035748786726199, "loss": 1.2933, "step": 1472 }, { "epoch": 0.6756105951152391, "grad_norm": 0.26023930311203003, "learning_rate": 0.0009034286342417251, "loss": 1.2734, "step": 1473 }, { "epoch": 0.6760692581126018, "grad_norm": 0.39502447843551636, "learning_rate": 0.0009032822908468775, "loss": 2.2822, "step": 1474 }, { "epoch": 0.6765279211099644, "grad_norm": 0.20976006984710693, "learning_rate": 0.0009031358485239761, "loss": 1.0251, "step": 1475 }, { "epoch": 0.6769865841073271, "grad_norm": 0.3310341238975525, "learning_rate": 0.0009029893073089443, "loss": 1.9667, "step": 1476 }, { "epoch": 0.6774452471046898, "grad_norm": 0.2633422911167145, "learning_rate": 0.0009028426672377297, "loss": 1.5052, "step": 1477 }, { "epoch": 0.6779039101020525, "grad_norm": 0.20135562121868134, "learning_rate": 0.0009026959283463044, "loss": 1.1064, "step": 1478 }, { "epoch": 0.6783625730994152, "grad_norm": 0.43766430020332336, "learning_rate": 0.000902549090670664, "loss": 2.6384, "step": 1479 }, { "epoch": 0.6788212360967779, "grad_norm": 0.35967737436294556, "learning_rate": 0.0009024021542468292, "loss": 2.2778, "step": 1480 }, { "epoch": 0.6792798990941405, "grad_norm": 0.2625736892223358, "learning_rate": 0.0009022551191108446, "loss": 1.4241, "step": 1481 }, { "epoch": 0.6797385620915033, "grad_norm": 0.41545170545578003, "learning_rate": 0.0009021079852987788, "loss": 2.2012, "step": 1482 }, { "epoch": 0.6801972250888659, "grad_norm": 0.2915220856666565, "learning_rate": 0.0009019607528467249, "loss": 1.3696, "step": 1483 }, { "epoch": 0.6806558880862287, "grad_norm": 0.274554967880249, "learning_rate": 0.0009018134217907999, "loss": 1.3186, "step": 1484 }, { "epoch": 0.6811145510835913, "grad_norm": 0.2678084671497345, "learning_rate": 0.0009016659921671454, "loss": 1.5616, "step": 1485 }, { "epoch": 0.6815732140809541, "grad_norm": 0.22896720468997955, "learning_rate": 0.000901518464011927, "loss": 1.2181, "step": 1486 }, { "epoch": 0.6820318770783167, "grad_norm": 0.3303893506526947, "learning_rate": 0.0009013708373613341, "loss": 1.9144, "step": 1487 }, { "epoch": 0.6824905400756794, "grad_norm": 0.34536314010620117, "learning_rate": 0.0009012231122515807, "loss": 1.5248, "step": 1488 }, { "epoch": 0.6829492030730421, "grad_norm": 0.2860594093799591, "learning_rate": 0.0009010752887189051, "loss": 1.5133, "step": 1489 }, { "epoch": 0.6834078660704048, "grad_norm": 0.15731200575828552, "learning_rate": 0.0009009273667995691, "loss": 0.7388, "step": 1490 }, { "epoch": 0.6838665290677675, "grad_norm": 0.2773778438568115, "learning_rate": 0.0009007793465298593, "loss": 1.5673, "step": 1491 }, { "epoch": 0.6843251920651301, "grad_norm": 0.3588542342185974, "learning_rate": 0.000900631227946086, "loss": 1.9658, "step": 1492 }, { "epoch": 0.6847838550624928, "grad_norm": 0.3078388273715973, "learning_rate": 0.0009004830110845838, "loss": 1.25, "step": 1493 }, { "epoch": 0.6852425180598555, "grad_norm": 0.34084051847457886, "learning_rate": 0.0009003346959817113, "loss": 1.7067, "step": 1494 }, { "epoch": 0.6857011810572182, "grad_norm": 0.34470683336257935, "learning_rate": 0.0009001862826738514, "loss": 1.8342, "step": 1495 }, { "epoch": 0.6861598440545809, "grad_norm": 0.281382292509079, "learning_rate": 0.0009000377711974109, "loss": 1.2902, "step": 1496 }, { "epoch": 0.6866185070519436, "grad_norm": 0.2875047028064728, "learning_rate": 0.0008998891615888205, "loss": 1.4417, "step": 1497 }, { "epoch": 0.6870771700493062, "grad_norm": 0.37324872612953186, "learning_rate": 0.0008997404538845355, "loss": 1.9172, "step": 1498 }, { "epoch": 0.687535833046669, "grad_norm": 0.35222485661506653, "learning_rate": 0.0008995916481210349, "loss": 1.7999, "step": 1499 }, { "epoch": 0.6879944960440316, "grad_norm": 0.28432413935661316, "learning_rate": 0.0008994427443348217, "loss": 1.3091, "step": 1500 }, { "epoch": 0.6884531590413944, "grad_norm": 0.3234626054763794, "learning_rate": 0.0008992937425624235, "loss": 1.6951, "step": 1501 }, { "epoch": 0.688911822038757, "grad_norm": 0.3846684396266937, "learning_rate": 0.0008991446428403909, "loss": 2.1714, "step": 1502 }, { "epoch": 0.6893704850361198, "grad_norm": 0.3131919205188751, "learning_rate": 0.0008989954452052995, "loss": 1.3457, "step": 1503 }, { "epoch": 0.6898291480334824, "grad_norm": 0.19373203814029694, "learning_rate": 0.0008988461496937485, "loss": 0.9649, "step": 1504 }, { "epoch": 0.6902878110308451, "grad_norm": 0.41193410754203796, "learning_rate": 0.0008986967563423612, "loss": 1.9963, "step": 1505 }, { "epoch": 0.6907464740282078, "grad_norm": 0.3604571223258972, "learning_rate": 0.0008985472651877847, "loss": 1.7517, "step": 1506 }, { "epoch": 0.6912051370255705, "grad_norm": 0.3667939007282257, "learning_rate": 0.0008983976762666905, "loss": 1.6728, "step": 1507 }, { "epoch": 0.6916638000229332, "grad_norm": 0.3759375214576721, "learning_rate": 0.0008982479896157737, "loss": 2.1001, "step": 1508 }, { "epoch": 0.6921224630202958, "grad_norm": 0.37497055530548096, "learning_rate": 0.0008980982052717534, "loss": 1.7802, "step": 1509 }, { "epoch": 0.6925811260176585, "grad_norm": 0.23320765793323517, "learning_rate": 0.0008979483232713731, "loss": 1.096, "step": 1510 }, { "epoch": 0.6930397890150212, "grad_norm": 0.27966445684432983, "learning_rate": 0.0008977983436513997, "loss": 1.3906, "step": 1511 }, { "epoch": 0.6934984520123839, "grad_norm": 0.2946244180202484, "learning_rate": 0.0008976482664486241, "loss": 1.5397, "step": 1512 }, { "epoch": 0.6939571150097466, "grad_norm": 0.3360169231891632, "learning_rate": 0.0008974980916998618, "loss": 1.6011, "step": 1513 }, { "epoch": 0.6944157780071093, "grad_norm": 0.28576546907424927, "learning_rate": 0.0008973478194419515, "loss": 1.4437, "step": 1514 }, { "epoch": 0.6948744410044719, "grad_norm": 0.25533977150917053, "learning_rate": 0.000897197449711756, "loss": 1.0789, "step": 1515 }, { "epoch": 0.6953331040018347, "grad_norm": 0.08754102140665054, "learning_rate": 0.000897046982546162, "loss": 0.4806, "step": 1516 }, { "epoch": 0.6957917669991973, "grad_norm": 0.2931134104728699, "learning_rate": 0.0008968964179820806, "loss": 1.8093, "step": 1517 }, { "epoch": 0.6962504299965601, "grad_norm": 0.10447783023118973, "learning_rate": 0.0008967457560564459, "loss": 0.5872, "step": 1518 }, { "epoch": 0.6967090929939227, "grad_norm": 0.1723310351371765, "learning_rate": 0.0008965949968062166, "loss": 0.758, "step": 1519 }, { "epoch": 0.6971677559912854, "grad_norm": 0.27590087056159973, "learning_rate": 0.000896444140268375, "loss": 1.2286, "step": 1520 }, { "epoch": 0.6976264189886481, "grad_norm": 0.3477742671966553, "learning_rate": 0.0008962931864799272, "loss": 1.5517, "step": 1521 }, { "epoch": 0.6980850819860108, "grad_norm": 0.34647393226623535, "learning_rate": 0.0008961421354779036, "loss": 1.5421, "step": 1522 }, { "epoch": 0.6985437449833735, "grad_norm": 0.36403578519821167, "learning_rate": 0.0008959909872993574, "loss": 1.9447, "step": 1523 }, { "epoch": 0.6990024079807362, "grad_norm": 0.33395373821258545, "learning_rate": 0.0008958397419813671, "loss": 1.6851, "step": 1524 }, { "epoch": 0.6994610709780988, "grad_norm": 0.23508980870246887, "learning_rate": 0.0008956883995610338, "loss": 1.063, "step": 1525 }, { "epoch": 0.6999197339754615, "grad_norm": 0.29683560132980347, "learning_rate": 0.0008955369600754831, "loss": 1.3033, "step": 1526 }, { "epoch": 0.7003783969728242, "grad_norm": 0.238669291138649, "learning_rate": 0.0008953854235618641, "loss": 1.0421, "step": 1527 }, { "epoch": 0.7008370599701869, "grad_norm": 0.31371814012527466, "learning_rate": 0.00089523379005735, "loss": 1.0029, "step": 1528 }, { "epoch": 0.7012957229675496, "grad_norm": 0.2695467472076416, "learning_rate": 0.0008950820595991371, "loss": 1.3376, "step": 1529 }, { "epoch": 0.7017543859649122, "grad_norm": 0.17910736799240112, "learning_rate": 0.0008949302322244465, "loss": 0.9264, "step": 1530 }, { "epoch": 0.702213048962275, "grad_norm": 0.4682910144329071, "learning_rate": 0.0008947783079705223, "loss": 1.4216, "step": 1531 }, { "epoch": 0.7026717119596376, "grad_norm": 0.37215444445610046, "learning_rate": 0.0008946262868746327, "loss": 2.3613, "step": 1532 }, { "epoch": 0.7031303749570004, "grad_norm": 0.25320783257484436, "learning_rate": 0.0008944741689740695, "loss": 1.2519, "step": 1533 }, { "epoch": 0.703589037954363, "grad_norm": 0.24361571669578552, "learning_rate": 0.0008943219543061481, "loss": 1.191, "step": 1534 }, { "epoch": 0.7040477009517258, "grad_norm": 0.3469868004322052, "learning_rate": 0.0008941696429082084, "loss": 1.9496, "step": 1535 }, { "epoch": 0.7045063639490884, "grad_norm": 0.3765385150909424, "learning_rate": 0.0008940172348176132, "loss": 2.0288, "step": 1536 }, { "epoch": 0.7049650269464511, "grad_norm": 0.23416545987129211, "learning_rate": 0.0008938647300717491, "loss": 0.9575, "step": 1537 }, { "epoch": 0.7054236899438138, "grad_norm": 0.3623865246772766, "learning_rate": 0.0008937121287080268, "loss": 1.8331, "step": 1538 }, { "epoch": 0.7058823529411765, "grad_norm": 0.20962877571582794, "learning_rate": 0.0008935594307638806, "loss": 1.1107, "step": 1539 }, { "epoch": 0.7063410159385392, "grad_norm": 0.24036376178264618, "learning_rate": 0.0008934066362767684, "loss": 1.1372, "step": 1540 }, { "epoch": 0.7067996789359019, "grad_norm": 0.0797274112701416, "learning_rate": 0.0008932537452841716, "loss": 0.4638, "step": 1541 }, { "epoch": 0.7072583419332645, "grad_norm": 0.32924991846084595, "learning_rate": 0.0008931007578235957, "loss": 1.4589, "step": 1542 }, { "epoch": 0.7077170049306272, "grad_norm": 0.3090713322162628, "learning_rate": 0.0008929476739325694, "loss": 1.2155, "step": 1543 }, { "epoch": 0.7081756679279899, "grad_norm": 0.32841405272483826, "learning_rate": 0.0008927944936486454, "loss": 1.3251, "step": 1544 }, { "epoch": 0.7086343309253526, "grad_norm": 0.2854306697845459, "learning_rate": 0.0008926412170093998, "loss": 1.2567, "step": 1545 }, { "epoch": 0.7090929939227153, "grad_norm": 0.3006875514984131, "learning_rate": 0.0008924878440524326, "loss": 1.7976, "step": 1546 }, { "epoch": 0.7095516569200779, "grad_norm": 0.42627203464508057, "learning_rate": 0.0008923343748153674, "loss": 1.429, "step": 1547 }, { "epoch": 0.7100103199174407, "grad_norm": 0.36591845750808716, "learning_rate": 0.0008921808093358512, "loss": 2.0798, "step": 1548 }, { "epoch": 0.7104689829148033, "grad_norm": 0.2793489694595337, "learning_rate": 0.0008920271476515547, "loss": 1.7866, "step": 1549 }, { "epoch": 0.7109276459121661, "grad_norm": 0.24239283800125122, "learning_rate": 0.0008918733898001721, "loss": 1.1033, "step": 1550 }, { "epoch": 0.7113863089095287, "grad_norm": 0.42722949385643005, "learning_rate": 0.0008917195358194214, "loss": 1.3158, "step": 1551 }, { "epoch": 0.7118449719068914, "grad_norm": 0.10515722632408142, "learning_rate": 0.0008915655857470443, "loss": 0.5974, "step": 1552 }, { "epoch": 0.7123036349042541, "grad_norm": 0.3216477334499359, "learning_rate": 0.0008914115396208056, "loss": 1.8257, "step": 1553 }, { "epoch": 0.7127622979016168, "grad_norm": 0.27405673265457153, "learning_rate": 0.000891257397478494, "loss": 1.3561, "step": 1554 }, { "epoch": 0.7132209608989795, "grad_norm": 0.2392241358757019, "learning_rate": 0.0008911031593579217, "loss": 1.3283, "step": 1555 }, { "epoch": 0.7136796238963422, "grad_norm": 0.3045576810836792, "learning_rate": 0.0008909488252969244, "loss": 1.3752, "step": 1556 }, { "epoch": 0.7141382868937048, "grad_norm": 0.34698760509490967, "learning_rate": 0.0008907943953333613, "loss": 1.8883, "step": 1557 }, { "epoch": 0.7145969498910676, "grad_norm": 0.3657462000846863, "learning_rate": 0.0008906398695051153, "loss": 2.2324, "step": 1558 }, { "epoch": 0.7150556128884302, "grad_norm": 0.2741200029850006, "learning_rate": 0.0008904852478500927, "loss": 1.3587, "step": 1559 }, { "epoch": 0.7155142758857929, "grad_norm": 0.3094598352909088, "learning_rate": 0.0008903305304062232, "loss": 1.7721, "step": 1560 }, { "epoch": 0.7159729388831556, "grad_norm": 0.45374658703804016, "learning_rate": 0.0008901757172114601, "loss": 2.1138, "step": 1561 }, { "epoch": 0.7164316018805182, "grad_norm": 0.6525793671607971, "learning_rate": 0.0008900208083037804, "loss": 1.7445, "step": 1562 }, { "epoch": 0.716890264877881, "grad_norm": 0.39578354358673096, "learning_rate": 0.0008898658037211842, "loss": 1.2509, "step": 1563 }, { "epoch": 0.7173489278752436, "grad_norm": 0.348197340965271, "learning_rate": 0.0008897107035016952, "loss": 1.8569, "step": 1564 }, { "epoch": 0.7178075908726064, "grad_norm": 0.3598276972770691, "learning_rate": 0.0008895555076833607, "loss": 1.7867, "step": 1565 }, { "epoch": 0.718266253869969, "grad_norm": 0.2803894579410553, "learning_rate": 0.0008894002163042514, "loss": 1.3177, "step": 1566 }, { "epoch": 0.7187249168673318, "grad_norm": 0.3473854064941406, "learning_rate": 0.0008892448294024612, "loss": 1.8008, "step": 1567 }, { "epoch": 0.7191835798646944, "grad_norm": 0.2351778894662857, "learning_rate": 0.0008890893470161078, "loss": 0.7619, "step": 1568 }, { "epoch": 0.7196422428620571, "grad_norm": 0.3453526794910431, "learning_rate": 0.0008889337691833321, "loss": 1.9944, "step": 1569 }, { "epoch": 0.7201009058594198, "grad_norm": 0.40439310669898987, "learning_rate": 0.0008887780959422984, "loss": 1.496, "step": 1570 }, { "epoch": 0.7205595688567825, "grad_norm": 0.3145516514778137, "learning_rate": 0.0008886223273311946, "loss": 1.8382, "step": 1571 }, { "epoch": 0.7210182318541452, "grad_norm": 0.33060380816459656, "learning_rate": 0.0008884664633882317, "loss": 1.4937, "step": 1572 }, { "epoch": 0.7214768948515079, "grad_norm": 0.11799333989620209, "learning_rate": 0.0008883105041516445, "loss": 0.6943, "step": 1573 }, { "epoch": 0.7219355578488705, "grad_norm": 0.3208516538143158, "learning_rate": 0.0008881544496596907, "loss": 1.3055, "step": 1574 }, { "epoch": 0.7223942208462333, "grad_norm": 0.29803258180618286, "learning_rate": 0.0008879982999506518, "loss": 1.3234, "step": 1575 }, { "epoch": 0.7228528838435959, "grad_norm": 0.2925770580768585, "learning_rate": 0.000887842055062832, "loss": 1.3806, "step": 1576 }, { "epoch": 0.7233115468409586, "grad_norm": 0.3469628095626831, "learning_rate": 0.0008876857150345598, "loss": 1.9561, "step": 1577 }, { "epoch": 0.7237702098383213, "grad_norm": 0.2605293393135071, "learning_rate": 0.0008875292799041863, "loss": 1.2745, "step": 1578 }, { "epoch": 0.7242288728356839, "grad_norm": 0.16196803748607635, "learning_rate": 0.0008873727497100862, "loss": 0.758, "step": 1579 }, { "epoch": 0.7246875358330467, "grad_norm": 0.35878610610961914, "learning_rate": 0.0008872161244906576, "loss": 1.5375, "step": 1580 }, { "epoch": 0.7251461988304093, "grad_norm": 0.43217015266418457, "learning_rate": 0.0008870594042843216, "loss": 2.2036, "step": 1581 }, { "epoch": 0.7256048618277721, "grad_norm": 0.42131683230400085, "learning_rate": 0.0008869025891295228, "loss": 0.9508, "step": 1582 }, { "epoch": 0.7260635248251347, "grad_norm": 0.36816248297691345, "learning_rate": 0.0008867456790647292, "loss": 2.1968, "step": 1583 }, { "epoch": 0.7265221878224974, "grad_norm": 0.30012619495391846, "learning_rate": 0.0008865886741284321, "loss": 1.7261, "step": 1584 }, { "epoch": 0.7269808508198601, "grad_norm": 0.29164719581604004, "learning_rate": 0.0008864315743591457, "loss": 1.5405, "step": 1585 }, { "epoch": 0.7274395138172228, "grad_norm": 0.3047489821910858, "learning_rate": 0.0008862743797954078, "loss": 1.7762, "step": 1586 }, { "epoch": 0.7278981768145855, "grad_norm": 0.25507205724716187, "learning_rate": 0.0008861170904757794, "loss": 1.3858, "step": 1587 }, { "epoch": 0.7283568398119482, "grad_norm": 0.21503114700317383, "learning_rate": 0.0008859597064388445, "loss": 1.0641, "step": 1588 }, { "epoch": 0.7288155028093108, "grad_norm": 0.2100914567708969, "learning_rate": 0.0008858022277232107, "loss": 1.0745, "step": 1589 }, { "epoch": 0.7292741658066736, "grad_norm": 0.20805799961090088, "learning_rate": 0.0008856446543675088, "loss": 1.007, "step": 1590 }, { "epoch": 0.7297328288040362, "grad_norm": 0.34034040570259094, "learning_rate": 0.0008854869864103925, "loss": 1.625, "step": 1591 }, { "epoch": 0.730191491801399, "grad_norm": 0.3298024535179138, "learning_rate": 0.000885329223890539, "loss": 1.6735, "step": 1592 }, { "epoch": 0.7306501547987616, "grad_norm": 0.33766958117485046, "learning_rate": 0.0008851713668466484, "loss": 1.7477, "step": 1593 }, { "epoch": 0.7311088177961242, "grad_norm": 0.27652397751808167, "learning_rate": 0.0008850134153174443, "loss": 0.8909, "step": 1594 }, { "epoch": 0.731567480793487, "grad_norm": 0.40618276596069336, "learning_rate": 0.0008848553693416734, "loss": 1.8905, "step": 1595 }, { "epoch": 0.7320261437908496, "grad_norm": 0.3916473090648651, "learning_rate": 0.0008846972289581053, "loss": 1.6833, "step": 1596 }, { "epoch": 0.7324848067882124, "grad_norm": 0.3562237024307251, "learning_rate": 0.0008845389942055333, "loss": 1.2002, "step": 1597 }, { "epoch": 0.732943469785575, "grad_norm": 0.2905832827091217, "learning_rate": 0.0008843806651227733, "loss": 1.2439, "step": 1598 }, { "epoch": 0.7334021327829378, "grad_norm": 0.2873367369174957, "learning_rate": 0.0008842222417486646, "loss": 1.3354, "step": 1599 }, { "epoch": 0.7338607957803004, "grad_norm": 0.26076123118400574, "learning_rate": 0.0008840637241220696, "loss": 1.2548, "step": 1600 }, { "epoch": 0.7343194587776631, "grad_norm": 0.2180601954460144, "learning_rate": 0.0008839051122818737, "loss": 1.0288, "step": 1601 }, { "epoch": 0.7347781217750258, "grad_norm": 0.3309728801250458, "learning_rate": 0.000883746406266986, "loss": 1.4196, "step": 1602 }, { "epoch": 0.7352367847723885, "grad_norm": 0.403773695230484, "learning_rate": 0.0008835876061163377, "loss": 2.1072, "step": 1603 }, { "epoch": 0.7356954477697512, "grad_norm": 0.2361578643321991, "learning_rate": 0.0008834287118688837, "loss": 1.1724, "step": 1604 }, { "epoch": 0.7361541107671139, "grad_norm": 0.3139882981777191, "learning_rate": 0.0008832697235636023, "loss": 1.5528, "step": 1605 }, { "epoch": 0.7366127737644765, "grad_norm": 0.32773545384407043, "learning_rate": 0.0008831106412394938, "loss": 1.7954, "step": 1606 }, { "epoch": 0.7370714367618393, "grad_norm": 0.36932218074798584, "learning_rate": 0.0008829514649355829, "loss": 2.0931, "step": 1607 }, { "epoch": 0.7375300997592019, "grad_norm": 0.3764024078845978, "learning_rate": 0.0008827921946909164, "loss": 1.7544, "step": 1608 }, { "epoch": 0.7379887627565647, "grad_norm": 0.3229760527610779, "learning_rate": 0.0008826328305445644, "loss": 1.8318, "step": 1609 }, { "epoch": 0.7384474257539273, "grad_norm": 0.26198244094848633, "learning_rate": 0.0008824733725356202, "loss": 1.2097, "step": 1610 }, { "epoch": 0.7389060887512899, "grad_norm": 0.24832715094089508, "learning_rate": 0.0008823138207031999, "loss": 1.4565, "step": 1611 }, { "epoch": 0.7393647517486527, "grad_norm": 0.29865747690200806, "learning_rate": 0.0008821541750864428, "loss": 1.2469, "step": 1612 }, { "epoch": 0.7398234147460153, "grad_norm": 0.3033508062362671, "learning_rate": 0.0008819944357245111, "loss": 1.3326, "step": 1613 }, { "epoch": 0.7402820777433781, "grad_norm": 0.2659357190132141, "learning_rate": 0.0008818346026565897, "loss": 1.2563, "step": 1614 }, { "epoch": 0.7407407407407407, "grad_norm": 0.3529365360736847, "learning_rate": 0.0008816746759218874, "loss": 1.9199, "step": 1615 }, { "epoch": 0.7411994037381034, "grad_norm": 0.20898482203483582, "learning_rate": 0.0008815146555596351, "loss": 0.8429, "step": 1616 }, { "epoch": 0.7416580667354661, "grad_norm": 0.39400166273117065, "learning_rate": 0.0008813545416090869, "loss": 1.9886, "step": 1617 }, { "epoch": 0.7421167297328288, "grad_norm": 0.2804252803325653, "learning_rate": 0.0008811943341095199, "loss": 1.4959, "step": 1618 }, { "epoch": 0.7425753927301915, "grad_norm": 0.24878603219985962, "learning_rate": 0.0008810340331002341, "loss": 1.1033, "step": 1619 }, { "epoch": 0.7430340557275542, "grad_norm": 0.3392964005470276, "learning_rate": 0.0008808736386205527, "loss": 1.5595, "step": 1620 }, { "epoch": 0.7434927187249168, "grad_norm": 0.25439828634262085, "learning_rate": 0.0008807131507098213, "loss": 1.127, "step": 1621 }, { "epoch": 0.7439513817222796, "grad_norm": 0.2508736252784729, "learning_rate": 0.0008805525694074093, "loss": 1.2366, "step": 1622 }, { "epoch": 0.7444100447196422, "grad_norm": 0.21780788898468018, "learning_rate": 0.0008803918947527079, "loss": 1.1689, "step": 1623 }, { "epoch": 0.744868707717005, "grad_norm": 0.22065167129039764, "learning_rate": 0.000880231126785132, "loss": 0.8861, "step": 1624 }, { "epoch": 0.7453273707143676, "grad_norm": 0.3314412236213684, "learning_rate": 0.000880070265544119, "loss": 1.9618, "step": 1625 }, { "epoch": 0.7457860337117304, "grad_norm": 0.2850133180618286, "learning_rate": 0.0008799093110691294, "loss": 1.5959, "step": 1626 }, { "epoch": 0.746244696709093, "grad_norm": 0.3236753046512604, "learning_rate": 0.0008797482633996466, "loss": 1.6779, "step": 1627 }, { "epoch": 0.7467033597064556, "grad_norm": 0.29706433415412903, "learning_rate": 0.0008795871225751766, "loss": 1.6093, "step": 1628 }, { "epoch": 0.7471620227038184, "grad_norm": 0.3429460823535919, "learning_rate": 0.0008794258886352485, "loss": 1.5433, "step": 1629 }, { "epoch": 0.747620685701181, "grad_norm": 0.32680225372314453, "learning_rate": 0.0008792645616194141, "loss": 1.8085, "step": 1630 }, { "epoch": 0.7480793486985438, "grad_norm": 0.2640790641307831, "learning_rate": 0.0008791031415672482, "loss": 1.277, "step": 1631 }, { "epoch": 0.7485380116959064, "grad_norm": 0.3999061584472656, "learning_rate": 0.000878941628518348, "loss": 1.9884, "step": 1632 }, { "epoch": 0.7489966746932691, "grad_norm": 0.28930339217185974, "learning_rate": 0.0008787800225123341, "loss": 1.2634, "step": 1633 }, { "epoch": 0.7494553376906318, "grad_norm": 0.25914284586906433, "learning_rate": 0.0008786183235888497, "loss": 1.2084, "step": 1634 }, { "epoch": 0.7499140006879945, "grad_norm": 0.3261665403842926, "learning_rate": 0.0008784565317875604, "loss": 1.5663, "step": 1635 }, { "epoch": 0.7503726636853572, "grad_norm": 0.23450294137001038, "learning_rate": 0.000878294647148155, "loss": 1.1509, "step": 1636 }, { "epoch": 0.7508313266827199, "grad_norm": 0.2073303908109665, "learning_rate": 0.000878132669710345, "loss": 0.9553, "step": 1637 }, { "epoch": 0.7512899896800825, "grad_norm": 0.18613280355930328, "learning_rate": 0.0008779705995138647, "loss": 0.8871, "step": 1638 }, { "epoch": 0.7517486526774453, "grad_norm": 0.3788055181503296, "learning_rate": 0.000877808436598471, "loss": 1.5571, "step": 1639 }, { "epoch": 0.7522073156748079, "grad_norm": 0.2849988639354706, "learning_rate": 0.0008776461810039437, "loss": 1.4867, "step": 1640 }, { "epoch": 0.7526659786721707, "grad_norm": 0.3063529133796692, "learning_rate": 0.0008774838327700852, "loss": 1.3499, "step": 1641 }, { "epoch": 0.7531246416695333, "grad_norm": 0.35424184799194336, "learning_rate": 0.0008773213919367206, "loss": 1.7198, "step": 1642 }, { "epoch": 0.753583304666896, "grad_norm": 0.32375872135162354, "learning_rate": 0.0008771588585436982, "loss": 1.8474, "step": 1643 }, { "epoch": 0.7540419676642587, "grad_norm": 0.3555268943309784, "learning_rate": 0.0008769962326308882, "loss": 1.7086, "step": 1644 }, { "epoch": 0.7545006306616213, "grad_norm": 0.32062697410583496, "learning_rate": 0.000876833514238184, "loss": 1.4244, "step": 1645 }, { "epoch": 0.7549592936589841, "grad_norm": 0.5851942896842957, "learning_rate": 0.0008766707034055017, "loss": 1.8285, "step": 1646 }, { "epoch": 0.7554179566563467, "grad_norm": 0.39790624380111694, "learning_rate": 0.0008765078001727799, "loss": 1.5838, "step": 1647 }, { "epoch": 0.7558766196537094, "grad_norm": 0.22544077038764954, "learning_rate": 0.00087634480457998, "loss": 1.0953, "step": 1648 }, { "epoch": 0.7563352826510721, "grad_norm": 0.4409390985965729, "learning_rate": 0.000876181716667086, "loss": 2.1729, "step": 1649 }, { "epoch": 0.7567939456484348, "grad_norm": 0.09505387395620346, "learning_rate": 0.0008760185364741045, "loss": 0.6027, "step": 1650 }, { "epoch": 0.7572526086457975, "grad_norm": 0.18144452571868896, "learning_rate": 0.0008758552640410647, "loss": 0.7814, "step": 1651 }, { "epoch": 0.7577112716431602, "grad_norm": 0.25609463453292847, "learning_rate": 0.0008756918994080184, "loss": 1.2465, "step": 1652 }, { "epoch": 0.7581699346405228, "grad_norm": 0.3209281265735626, "learning_rate": 0.0008755284426150405, "loss": 1.327, "step": 1653 }, { "epoch": 0.7586285976378856, "grad_norm": 0.28706008195877075, "learning_rate": 0.0008753648937022278, "loss": 1.5922, "step": 1654 }, { "epoch": 0.7590872606352482, "grad_norm": 0.32143130898475647, "learning_rate": 0.0008752012527097003, "loss": 1.2776, "step": 1655 }, { "epoch": 0.759545923632611, "grad_norm": 0.29344642162323, "learning_rate": 0.0008750375196776002, "loss": 1.5062, "step": 1656 }, { "epoch": 0.7600045866299736, "grad_norm": 0.38162222504615784, "learning_rate": 0.0008748736946460922, "loss": 1.892, "step": 1657 }, { "epoch": 0.7604632496273364, "grad_norm": 0.28159940242767334, "learning_rate": 0.0008747097776553639, "loss": 1.3286, "step": 1658 }, { "epoch": 0.760921912624699, "grad_norm": 0.09235129505395889, "learning_rate": 0.0008745457687456255, "loss": 0.5844, "step": 1659 }, { "epoch": 0.7613805756220617, "grad_norm": 0.3361958861351013, "learning_rate": 0.0008743816679571094, "loss": 1.7726, "step": 1660 }, { "epoch": 0.7618392386194244, "grad_norm": 0.31122922897338867, "learning_rate": 0.0008742174753300707, "loss": 1.3808, "step": 1661 }, { "epoch": 0.762297901616787, "grad_norm": 0.3450722396373749, "learning_rate": 0.000874053190904787, "loss": 1.8993, "step": 1662 }, { "epoch": 0.7627565646141498, "grad_norm": 0.3466764986515045, "learning_rate": 0.0008738888147215584, "loss": 1.9905, "step": 1663 }, { "epoch": 0.7632152276115124, "grad_norm": 0.13895724713802338, "learning_rate": 0.0008737243468207079, "loss": 0.6572, "step": 1664 }, { "epoch": 0.7636738906088751, "grad_norm": 0.32592159509658813, "learning_rate": 0.0008735597872425804, "loss": 1.7026, "step": 1665 }, { "epoch": 0.7641325536062378, "grad_norm": 0.2743968367576599, "learning_rate": 0.0008733951360275434, "loss": 1.1798, "step": 1666 }, { "epoch": 0.7645912166036005, "grad_norm": 0.1800556182861328, "learning_rate": 0.0008732303932159873, "loss": 0.8318, "step": 1667 }, { "epoch": 0.7650498796009632, "grad_norm": 0.35121503472328186, "learning_rate": 0.0008730655588483247, "loss": 1.9238, "step": 1668 }, { "epoch": 0.7655085425983259, "grad_norm": 0.31581225991249084, "learning_rate": 0.0008729006329649906, "loss": 1.5878, "step": 1669 }, { "epoch": 0.7659672055956885, "grad_norm": 0.34449562430381775, "learning_rate": 0.0008727356156064424, "loss": 1.348, "step": 1670 }, { "epoch": 0.7664258685930513, "grad_norm": 0.2677781581878662, "learning_rate": 0.0008725705068131599, "loss": 1.2385, "step": 1671 }, { "epoch": 0.7668845315904139, "grad_norm": 0.3324863314628601, "learning_rate": 0.0008724053066256461, "loss": 1.827, "step": 1672 }, { "epoch": 0.7673431945877767, "grad_norm": 0.32999104261398315, "learning_rate": 0.0008722400150844252, "loss": 1.4756, "step": 1673 }, { "epoch": 0.7678018575851393, "grad_norm": 0.15800195932388306, "learning_rate": 0.0008720746322300447, "loss": 0.7594, "step": 1674 }, { "epoch": 0.768260520582502, "grad_norm": 0.3041391968727112, "learning_rate": 0.0008719091581030741, "loss": 1.4358, "step": 1675 }, { "epoch": 0.7687191835798647, "grad_norm": 0.3658222258090973, "learning_rate": 0.0008717435927441053, "loss": 1.684, "step": 1676 }, { "epoch": 0.7691778465772274, "grad_norm": 0.3466409146785736, "learning_rate": 0.0008715779361937528, "loss": 1.8845, "step": 1677 }, { "epoch": 0.7696365095745901, "grad_norm": 0.32669052481651306, "learning_rate": 0.0008714121884926536, "loss": 1.6675, "step": 1678 }, { "epoch": 0.7700951725719528, "grad_norm": 0.35387712717056274, "learning_rate": 0.0008712463496814662, "loss": 1.9104, "step": 1679 }, { "epoch": 0.7705538355693154, "grad_norm": 0.3723506033420563, "learning_rate": 0.0008710804198008727, "loss": 1.8469, "step": 1680 }, { "epoch": 0.7710124985666781, "grad_norm": 0.19466747343540192, "learning_rate": 0.0008709143988915763, "loss": 0.8387, "step": 1681 }, { "epoch": 0.7714711615640408, "grad_norm": 0.33413955569267273, "learning_rate": 0.0008707482869943035, "loss": 1.6544, "step": 1682 }, { "epoch": 0.7719298245614035, "grad_norm": 0.29037922620773315, "learning_rate": 0.0008705820841498029, "loss": 1.5114, "step": 1683 }, { "epoch": 0.7723884875587662, "grad_norm": 0.3319530785083771, "learning_rate": 0.0008704157903988448, "loss": 1.61, "step": 1684 }, { "epoch": 0.7728471505561288, "grad_norm": 0.37831956148147583, "learning_rate": 0.0008702494057822224, "loss": 1.8689, "step": 1685 }, { "epoch": 0.7733058135534916, "grad_norm": 0.24735870957374573, "learning_rate": 0.0008700829303407514, "loss": 1.0308, "step": 1686 }, { "epoch": 0.7737644765508542, "grad_norm": 0.09444022178649902, "learning_rate": 0.000869916364115269, "loss": 0.5718, "step": 1687 }, { "epoch": 0.774223139548217, "grad_norm": 0.4078546166419983, "learning_rate": 0.0008697497071466351, "loss": 1.4986, "step": 1688 }, { "epoch": 0.7746818025455796, "grad_norm": 0.3958517611026764, "learning_rate": 0.0008695829594757323, "loss": 1.8461, "step": 1689 }, { "epoch": 0.7751404655429424, "grad_norm": 0.2555053234100342, "learning_rate": 0.0008694161211434645, "loss": 1.2473, "step": 1690 }, { "epoch": 0.775599128540305, "grad_norm": 0.30251985788345337, "learning_rate": 0.0008692491921907586, "loss": 1.225, "step": 1691 }, { "epoch": 0.7760577915376677, "grad_norm": 0.2819230258464813, "learning_rate": 0.0008690821726585634, "loss": 1.0078, "step": 1692 }, { "epoch": 0.7765164545350304, "grad_norm": 0.32672372460365295, "learning_rate": 0.0008689150625878501, "loss": 1.4543, "step": 1693 }, { "epoch": 0.7769751175323931, "grad_norm": 0.282443642616272, "learning_rate": 0.000868747862019612, "loss": 1.412, "step": 1694 }, { "epoch": 0.7774337805297558, "grad_norm": 0.33812659978866577, "learning_rate": 0.0008685805709948644, "loss": 1.8073, "step": 1695 }, { "epoch": 0.7778924435271185, "grad_norm": 0.29638344049453735, "learning_rate": 0.0008684131895546453, "loss": 1.5007, "step": 1696 }, { "epoch": 0.7783511065244811, "grad_norm": 0.36412540078163147, "learning_rate": 0.0008682457177400141, "loss": 1.8148, "step": 1697 }, { "epoch": 0.7788097695218438, "grad_norm": 0.1184324324131012, "learning_rate": 0.0008680781555920533, "loss": 0.6136, "step": 1698 }, { "epoch": 0.7792684325192065, "grad_norm": 0.32119160890579224, "learning_rate": 0.000867910503151867, "loss": 1.8235, "step": 1699 }, { "epoch": 0.7797270955165692, "grad_norm": 0.36955249309539795, "learning_rate": 0.0008677427604605816, "loss": 2.0277, "step": 1700 }, { "epoch": 0.7801857585139319, "grad_norm": 0.3423328697681427, "learning_rate": 0.0008675749275593454, "loss": 1.6159, "step": 1701 }, { "epoch": 0.7806444215112945, "grad_norm": 0.2848832905292511, "learning_rate": 0.000867407004489329, "loss": 1.5393, "step": 1702 }, { "epoch": 0.7811030845086573, "grad_norm": 0.31208136677742004, "learning_rate": 0.0008672389912917253, "loss": 1.3461, "step": 1703 }, { "epoch": 0.7815617475060199, "grad_norm": 0.35254040360450745, "learning_rate": 0.0008670708880077492, "loss": 1.6527, "step": 1704 }, { "epoch": 0.7820204105033827, "grad_norm": 0.2813601493835449, "learning_rate": 0.0008669026946786375, "loss": 1.372, "step": 1705 }, { "epoch": 0.7824790735007453, "grad_norm": 0.29800623655319214, "learning_rate": 0.0008667344113456495, "loss": 1.4514, "step": 1706 }, { "epoch": 0.782937736498108, "grad_norm": 0.3098766505718231, "learning_rate": 0.0008665660380500657, "loss": 1.6478, "step": 1707 }, { "epoch": 0.7833963994954707, "grad_norm": 0.4111390709877014, "learning_rate": 0.00086639757483319, "loss": 2.3545, "step": 1708 }, { "epoch": 0.7838550624928334, "grad_norm": 0.2358768880367279, "learning_rate": 0.0008662290217363474, "loss": 0.8783, "step": 1709 }, { "epoch": 0.7843137254901961, "grad_norm": 0.3664570152759552, "learning_rate": 0.0008660603788008847, "loss": 2.2107, "step": 1710 }, { "epoch": 0.7847723884875588, "grad_norm": 0.22186599671840668, "learning_rate": 0.0008658916460681721, "loss": 1.1024, "step": 1711 }, { "epoch": 0.7852310514849214, "grad_norm": 0.36577606201171875, "learning_rate": 0.0008657228235796002, "loss": 1.7358, "step": 1712 }, { "epoch": 0.7856897144822842, "grad_norm": 0.2555186450481415, "learning_rate": 0.0008655539113765828, "loss": 0.8061, "step": 1713 }, { "epoch": 0.7861483774796468, "grad_norm": 0.24842973053455353, "learning_rate": 0.0008653849095005551, "loss": 0.882, "step": 1714 }, { "epoch": 0.7866070404770095, "grad_norm": 0.37208500504493713, "learning_rate": 0.0008652158179929746, "loss": 1.7593, "step": 1715 }, { "epoch": 0.7870657034743722, "grad_norm": 0.3234862685203552, "learning_rate": 0.0008650466368953206, "loss": 1.5933, "step": 1716 }, { "epoch": 0.7875243664717348, "grad_norm": 0.36344775557518005, "learning_rate": 0.0008648773662490944, "loss": 1.6957, "step": 1717 }, { "epoch": 0.7879830294690976, "grad_norm": 0.29362058639526367, "learning_rate": 0.0008647080060958194, "loss": 1.246, "step": 1718 }, { "epoch": 0.7884416924664602, "grad_norm": 0.48402926325798035, "learning_rate": 0.0008645385564770409, "loss": 1.8261, "step": 1719 }, { "epoch": 0.788900355463823, "grad_norm": 0.36820700764656067, "learning_rate": 0.0008643690174343258, "loss": 1.7743, "step": 1720 }, { "epoch": 0.7893590184611856, "grad_norm": 0.15446464717388153, "learning_rate": 0.0008641993890092637, "loss": 0.8386, "step": 1721 }, { "epoch": 0.7898176814585484, "grad_norm": 0.3198365271091461, "learning_rate": 0.0008640296712434654, "loss": 1.6002, "step": 1722 }, { "epoch": 0.790276344455911, "grad_norm": 0.15601693093776703, "learning_rate": 0.000863859864178564, "loss": 0.7734, "step": 1723 }, { "epoch": 0.7907350074532737, "grad_norm": 0.38354170322418213, "learning_rate": 0.0008636899678562143, "loss": 1.7987, "step": 1724 }, { "epoch": 0.7911936704506364, "grad_norm": 0.07455827295780182, "learning_rate": 0.0008635199823180931, "loss": 0.4332, "step": 1725 }, { "epoch": 0.7916523334479991, "grad_norm": 0.3557904064655304, "learning_rate": 0.0008633499076058992, "loss": 1.8337, "step": 1726 }, { "epoch": 0.7921109964453618, "grad_norm": 0.20361699163913727, "learning_rate": 0.000863179743761353, "loss": 0.9175, "step": 1727 }, { "epoch": 0.7925696594427245, "grad_norm": 0.2946905493736267, "learning_rate": 0.000863009490826197, "loss": 1.3851, "step": 1728 }, { "epoch": 0.7930283224400871, "grad_norm": 0.26087355613708496, "learning_rate": 0.0008628391488421956, "loss": 1.1733, "step": 1729 }, { "epoch": 0.7934869854374499, "grad_norm": 0.33554574847221375, "learning_rate": 0.0008626687178511346, "loss": 1.6946, "step": 1730 }, { "epoch": 0.7939456484348125, "grad_norm": 0.08757393062114716, "learning_rate": 0.0008624981978948223, "loss": 0.4806, "step": 1731 }, { "epoch": 0.7944043114321752, "grad_norm": 0.3665269911289215, "learning_rate": 0.0008623275890150882, "loss": 1.4377, "step": 1732 }, { "epoch": 0.7948629744295379, "grad_norm": 0.2906312346458435, "learning_rate": 0.0008621568912537841, "loss": 1.0789, "step": 1733 }, { "epoch": 0.7953216374269005, "grad_norm": 0.28736838698387146, "learning_rate": 0.0008619861046527832, "loss": 0.922, "step": 1734 }, { "epoch": 0.7957803004242633, "grad_norm": 0.37983497977256775, "learning_rate": 0.0008618152292539807, "loss": 1.8016, "step": 1735 }, { "epoch": 0.7962389634216259, "grad_norm": 0.347023606300354, "learning_rate": 0.0008616442650992937, "loss": 1.9502, "step": 1736 }, { "epoch": 0.7966976264189887, "grad_norm": 0.3550783097743988, "learning_rate": 0.000861473212230661, "loss": 1.7224, "step": 1737 }, { "epoch": 0.7971562894163513, "grad_norm": 0.3077528774738312, "learning_rate": 0.0008613020706900429, "loss": 0.9033, "step": 1738 }, { "epoch": 0.797614952413714, "grad_norm": 0.4047601819038391, "learning_rate": 0.0008611308405194218, "loss": 2.2925, "step": 1739 }, { "epoch": 0.7980736154110767, "grad_norm": 0.4590601623058319, "learning_rate": 0.0008609595217608015, "loss": 1.0867, "step": 1740 }, { "epoch": 0.7985322784084394, "grad_norm": 0.23011907935142517, "learning_rate": 0.000860788114456208, "loss": 0.8041, "step": 1741 }, { "epoch": 0.7989909414058021, "grad_norm": 0.3282072842121124, "learning_rate": 0.0008606166186476884, "loss": 1.417, "step": 1742 }, { "epoch": 0.7994496044031648, "grad_norm": 0.3150783181190491, "learning_rate": 0.0008604450343773122, "loss": 1.3964, "step": 1743 }, { "epoch": 0.7999082674005275, "grad_norm": 0.268691748380661, "learning_rate": 0.0008602733616871701, "loss": 1.0648, "step": 1744 }, { "epoch": 0.8003669303978902, "grad_norm": 0.31134241819381714, "learning_rate": 0.0008601016006193747, "loss": 0.8968, "step": 1745 }, { "epoch": 0.8008255933952528, "grad_norm": 0.2897193729877472, "learning_rate": 0.0008599297512160602, "loss": 1.3438, "step": 1746 }, { "epoch": 0.8012842563926156, "grad_norm": 0.3896466791629791, "learning_rate": 0.0008597578135193826, "loss": 1.8129, "step": 1747 }, { "epoch": 0.8017429193899782, "grad_norm": 0.34135007858276367, "learning_rate": 0.0008595857875715191, "loss": 1.311, "step": 1748 }, { "epoch": 0.8022015823873409, "grad_norm": 0.49784937500953674, "learning_rate": 0.0008594136734146694, "loss": 1.9587, "step": 1749 }, { "epoch": 0.8026602453847036, "grad_norm": 0.3799792230129242, "learning_rate": 0.0008592414710910541, "loss": 1.6164, "step": 1750 }, { "epoch": 0.8031189083820662, "grad_norm": 0.4176270067691803, "learning_rate": 0.0008590691806429155, "loss": 1.7939, "step": 1751 }, { "epoch": 0.803577571379429, "grad_norm": 0.31819960474967957, "learning_rate": 0.000858896802112518, "loss": 1.2059, "step": 1752 }, { "epoch": 0.8040362343767916, "grad_norm": 0.34135720133781433, "learning_rate": 0.000858724335542147, "loss": 1.4622, "step": 1753 }, { "epoch": 0.8044948973741544, "grad_norm": 0.33655521273612976, "learning_rate": 0.0008585517809741102, "loss": 1.8534, "step": 1754 }, { "epoch": 0.804953560371517, "grad_norm": 0.24837389588356018, "learning_rate": 0.000858379138450736, "loss": 1.2936, "step": 1755 }, { "epoch": 0.8054122233688797, "grad_norm": 0.302501380443573, "learning_rate": 0.0008582064080143752, "loss": 1.5486, "step": 1756 }, { "epoch": 0.8058708863662424, "grad_norm": 0.38520947098731995, "learning_rate": 0.0008580335897073999, "loss": 2.1995, "step": 1757 }, { "epoch": 0.8063295493636051, "grad_norm": 0.05348968505859375, "learning_rate": 0.0008578606835722032, "loss": 0.3607, "step": 1758 }, { "epoch": 0.8067882123609678, "grad_norm": 0.2687481939792633, "learning_rate": 0.0008576876896512006, "loss": 1.4349, "step": 1759 }, { "epoch": 0.8072468753583305, "grad_norm": 0.2733592689037323, "learning_rate": 0.0008575146079868287, "loss": 1.4257, "step": 1760 }, { "epoch": 0.8077055383556931, "grad_norm": 0.27336040139198303, "learning_rate": 0.0008573414386215457, "loss": 1.2039, "step": 1761 }, { "epoch": 0.8081642013530559, "grad_norm": 0.27744030952453613, "learning_rate": 0.000857168181597831, "loss": 1.4286, "step": 1762 }, { "epoch": 0.8086228643504185, "grad_norm": 0.2781740725040436, "learning_rate": 0.0008569948369581864, "loss": 1.3666, "step": 1763 }, { "epoch": 0.8090815273477813, "grad_norm": 0.2565990686416626, "learning_rate": 0.0008568214047451339, "loss": 1.2767, "step": 1764 }, { "epoch": 0.8095401903451439, "grad_norm": 0.34465324878692627, "learning_rate": 0.000856647885001218, "loss": 1.6884, "step": 1765 }, { "epoch": 0.8099988533425065, "grad_norm": 0.3289906978607178, "learning_rate": 0.0008564742777690047, "loss": 1.5647, "step": 1766 }, { "epoch": 0.8104575163398693, "grad_norm": 0.46371495723724365, "learning_rate": 0.0008563005830910804, "loss": 2.2827, "step": 1767 }, { "epoch": 0.8109161793372319, "grad_norm": 0.3772294521331787, "learning_rate": 0.0008561268010100541, "loss": 1.7618, "step": 1768 }, { "epoch": 0.8113748423345947, "grad_norm": 0.3338968753814697, "learning_rate": 0.0008559529315685558, "loss": 1.4556, "step": 1769 }, { "epoch": 0.8118335053319573, "grad_norm": 0.39005744457244873, "learning_rate": 0.0008557789748092369, "loss": 1.8439, "step": 1770 }, { "epoch": 0.81229216832932, "grad_norm": 0.2635294497013092, "learning_rate": 0.00085560493077477, "loss": 0.9517, "step": 1771 }, { "epoch": 0.8127508313266827, "grad_norm": 0.3014441430568695, "learning_rate": 0.0008554307995078495, "loss": 1.2068, "step": 1772 }, { "epoch": 0.8132094943240454, "grad_norm": 0.297055721282959, "learning_rate": 0.0008552565810511912, "loss": 1.3474, "step": 1773 }, { "epoch": 0.8136681573214081, "grad_norm": 0.39429694414138794, "learning_rate": 0.000855082275447532, "loss": 1.3822, "step": 1774 }, { "epoch": 0.8141268203187708, "grad_norm": 0.4196150302886963, "learning_rate": 0.0008549078827396304, "loss": 1.9164, "step": 1775 }, { "epoch": 0.8145854833161335, "grad_norm": 0.33066174387931824, "learning_rate": 0.0008547334029702661, "loss": 1.3359, "step": 1776 }, { "epoch": 0.8150441463134962, "grad_norm": 0.5004546642303467, "learning_rate": 0.0008545588361822403, "loss": 1.6743, "step": 1777 }, { "epoch": 0.8155028093108588, "grad_norm": 0.3157084882259369, "learning_rate": 0.0008543841824183756, "loss": 1.2809, "step": 1778 }, { "epoch": 0.8159614723082216, "grad_norm": 0.4495697021484375, "learning_rate": 0.0008542094417215156, "loss": 2.0178, "step": 1779 }, { "epoch": 0.8164201353055842, "grad_norm": 0.40004342794418335, "learning_rate": 0.0008540346141345254, "loss": 1.8539, "step": 1780 }, { "epoch": 0.816878798302947, "grad_norm": 0.3421812355518341, "learning_rate": 0.0008538596997002918, "loss": 1.2512, "step": 1781 }, { "epoch": 0.8173374613003096, "grad_norm": 0.28738144040107727, "learning_rate": 0.0008536846984617224, "loss": 1.2953, "step": 1782 }, { "epoch": 0.8177961242976722, "grad_norm": 0.40851840376853943, "learning_rate": 0.0008535096104617464, "loss": 2.1592, "step": 1783 }, { "epoch": 0.818254787295035, "grad_norm": 0.2928701937198639, "learning_rate": 0.0008533344357433138, "loss": 1.3396, "step": 1784 }, { "epoch": 0.8187134502923976, "grad_norm": 0.08765708655118942, "learning_rate": 0.0008531591743493966, "loss": 0.4814, "step": 1785 }, { "epoch": 0.8191721132897604, "grad_norm": 0.3306613564491272, "learning_rate": 0.0008529838263229874, "loss": 1.3589, "step": 1786 }, { "epoch": 0.819630776287123, "grad_norm": 0.29500478506088257, "learning_rate": 0.0008528083917071006, "loss": 1.3019, "step": 1787 }, { "epoch": 0.8200894392844857, "grad_norm": 0.1884388029575348, "learning_rate": 0.0008526328705447712, "loss": 0.7717, "step": 1788 }, { "epoch": 0.8205481022818484, "grad_norm": 0.31622594594955444, "learning_rate": 0.0008524572628790562, "loss": 1.669, "step": 1789 }, { "epoch": 0.8210067652792111, "grad_norm": 0.4221099615097046, "learning_rate": 0.0008522815687530333, "loss": 1.6276, "step": 1790 }, { "epoch": 0.8214654282765738, "grad_norm": 0.3094578981399536, "learning_rate": 0.0008521057882098015, "loss": 1.3534, "step": 1791 }, { "epoch": 0.8219240912739365, "grad_norm": 0.2990223169326782, "learning_rate": 0.000851929921292481, "loss": 1.2487, "step": 1792 }, { "epoch": 0.8223827542712991, "grad_norm": 1.4571868181228638, "learning_rate": 0.0008517539680442133, "loss": 1.2966, "step": 1793 }, { "epoch": 0.8228414172686619, "grad_norm": 0.22885964810848236, "learning_rate": 0.0008515779285081608, "loss": 0.8865, "step": 1794 }, { "epoch": 0.8233000802660245, "grad_norm": 0.3707837164402008, "learning_rate": 0.0008514018027275074, "loss": 1.4297, "step": 1795 }, { "epoch": 0.8237587432633873, "grad_norm": 0.34915557503700256, "learning_rate": 0.0008512255907454584, "loss": 1.7339, "step": 1796 }, { "epoch": 0.8242174062607499, "grad_norm": 0.2898045480251312, "learning_rate": 0.0008510492926052393, "loss": 1.4886, "step": 1797 }, { "epoch": 0.8246760692581127, "grad_norm": 0.3200799822807312, "learning_rate": 0.0008508729083500974, "loss": 1.5057, "step": 1798 }, { "epoch": 0.8251347322554753, "grad_norm": 0.23627309501171112, "learning_rate": 0.0008506964380233014, "loss": 1.0652, "step": 1799 }, { "epoch": 0.8255933952528379, "grad_norm": 0.19365330040454865, "learning_rate": 0.0008505198816681403, "loss": 0.8976, "step": 1800 }, { "epoch": 0.8260520582502007, "grad_norm": 0.30963438749313354, "learning_rate": 0.0008503432393279251, "loss": 1.4713, "step": 1801 }, { "epoch": 0.8265107212475633, "grad_norm": 0.30070507526397705, "learning_rate": 0.000850166511045987, "loss": 1.3842, "step": 1802 }, { "epoch": 0.826969384244926, "grad_norm": 0.23169678449630737, "learning_rate": 0.0008499896968656789, "loss": 1.0347, "step": 1803 }, { "epoch": 0.8274280472422887, "grad_norm": 0.27118271589279175, "learning_rate": 0.0008498127968303747, "loss": 1.2278, "step": 1804 }, { "epoch": 0.8278867102396514, "grad_norm": 0.28942152857780457, "learning_rate": 0.0008496358109834691, "loss": 1.2407, "step": 1805 }, { "epoch": 0.8283453732370141, "grad_norm": 0.34731897711753845, "learning_rate": 0.000849458739368378, "loss": 1.7978, "step": 1806 }, { "epoch": 0.8288040362343768, "grad_norm": 0.33974307775497437, "learning_rate": 0.0008492815820285384, "loss": 1.8225, "step": 1807 }, { "epoch": 0.8292626992317395, "grad_norm": 0.36200281977653503, "learning_rate": 0.0008491043390074085, "loss": 1.8848, "step": 1808 }, { "epoch": 0.8297213622291022, "grad_norm": 0.3786148130893707, "learning_rate": 0.0008489270103484668, "loss": 1.8229, "step": 1809 }, { "epoch": 0.8301800252264648, "grad_norm": 0.3229668140411377, "learning_rate": 0.0008487495960952139, "loss": 1.4528, "step": 1810 }, { "epoch": 0.8306386882238276, "grad_norm": 0.34114035964012146, "learning_rate": 0.00084857209629117, "loss": 1.3704, "step": 1811 }, { "epoch": 0.8310973512211902, "grad_norm": 0.22234633564949036, "learning_rate": 0.0008483945109798778, "loss": 0.8441, "step": 1812 }, { "epoch": 0.831556014218553, "grad_norm": 0.2830922305583954, "learning_rate": 0.0008482168402049001, "loss": 1.3244, "step": 1813 }, { "epoch": 0.8320146772159156, "grad_norm": 0.30978924036026, "learning_rate": 0.0008480390840098207, "loss": 1.2952, "step": 1814 }, { "epoch": 0.8324733402132783, "grad_norm": 0.3014751076698303, "learning_rate": 0.0008478612424382444, "loss": 1.5973, "step": 1815 }, { "epoch": 0.832932003210641, "grad_norm": 0.1848728209733963, "learning_rate": 0.0008476833155337971, "loss": 0.7438, "step": 1816 }, { "epoch": 0.8333906662080036, "grad_norm": 0.18665684759616852, "learning_rate": 0.0008475053033401256, "loss": 0.854, "step": 1817 }, { "epoch": 0.8338493292053664, "grad_norm": 0.36908119916915894, "learning_rate": 0.0008473272059008976, "loss": 1.7524, "step": 1818 }, { "epoch": 0.834307992202729, "grad_norm": 0.2789961099624634, "learning_rate": 0.0008471490232598016, "loss": 1.1132, "step": 1819 }, { "epoch": 0.8347666552000917, "grad_norm": 0.3717726171016693, "learning_rate": 0.000846970755460547, "loss": 1.8761, "step": 1820 }, { "epoch": 0.8352253181974544, "grad_norm": 0.38276535272598267, "learning_rate": 0.0008467924025468645, "loss": 2.0549, "step": 1821 }, { "epoch": 0.8356839811948171, "grad_norm": 0.3366381824016571, "learning_rate": 0.000846613964562505, "loss": 1.5439, "step": 1822 }, { "epoch": 0.8361426441921798, "grad_norm": 0.21815823018550873, "learning_rate": 0.0008464354415512409, "loss": 1.1877, "step": 1823 }, { "epoch": 0.8366013071895425, "grad_norm": 0.3503132462501526, "learning_rate": 0.000846256833556865, "loss": 1.9175, "step": 1824 }, { "epoch": 0.8370599701869051, "grad_norm": 0.2623533010482788, "learning_rate": 0.0008460781406231913, "loss": 1.3655, "step": 1825 }, { "epoch": 0.8375186331842679, "grad_norm": 0.3180966377258301, "learning_rate": 0.0008458993627940541, "loss": 1.6582, "step": 1826 }, { "epoch": 0.8379772961816305, "grad_norm": 0.19976475834846497, "learning_rate": 0.0008457205001133093, "loss": 0.8719, "step": 1827 }, { "epoch": 0.8384359591789933, "grad_norm": 0.2790711224079132, "learning_rate": 0.000845541552624833, "loss": 1.736, "step": 1828 }, { "epoch": 0.8388946221763559, "grad_norm": 0.25180596113204956, "learning_rate": 0.0008453625203725224, "loss": 1.3915, "step": 1829 }, { "epoch": 0.8393532851737187, "grad_norm": 0.35062751173973083, "learning_rate": 0.0008451834034002954, "loss": 1.631, "step": 1830 }, { "epoch": 0.8398119481710813, "grad_norm": 0.20606881380081177, "learning_rate": 0.0008450042017520905, "loss": 0.9485, "step": 1831 }, { "epoch": 0.840270611168444, "grad_norm": 0.34081435203552246, "learning_rate": 0.0008448249154718675, "loss": 2.0344, "step": 1832 }, { "epoch": 0.8407292741658067, "grad_norm": 0.2597459852695465, "learning_rate": 0.0008446455446036063, "loss": 1.5004, "step": 1833 }, { "epoch": 0.8411879371631693, "grad_norm": 0.19320417940616608, "learning_rate": 0.0008444660891913079, "loss": 0.9119, "step": 1834 }, { "epoch": 0.841646600160532, "grad_norm": 0.3378828763961792, "learning_rate": 0.0008442865492789943, "loss": 1.5369, "step": 1835 }, { "epoch": 0.8421052631578947, "grad_norm": 0.2489069253206253, "learning_rate": 0.0008441069249107076, "loss": 1.3823, "step": 1836 }, { "epoch": 0.8425639261552574, "grad_norm": 0.299431174993515, "learning_rate": 0.000843927216130511, "loss": 1.6409, "step": 1837 }, { "epoch": 0.8430225891526201, "grad_norm": 0.17401325702667236, "learning_rate": 0.0008437474229824886, "loss": 0.735, "step": 1838 }, { "epoch": 0.8434812521499828, "grad_norm": 0.267111212015152, "learning_rate": 0.0008435675455107448, "loss": 1.342, "step": 1839 }, { "epoch": 0.8439399151473455, "grad_norm": 0.17649328708648682, "learning_rate": 0.0008433875837594048, "loss": 0.9951, "step": 1840 }, { "epoch": 0.8443985781447082, "grad_norm": 0.24475933611392975, "learning_rate": 0.0008432075377726144, "loss": 1.2884, "step": 1841 }, { "epoch": 0.8448572411420708, "grad_norm": 0.2003568857908249, "learning_rate": 0.0008430274075945405, "loss": 1.0649, "step": 1842 }, { "epoch": 0.8453159041394336, "grad_norm": 0.26941803097724915, "learning_rate": 0.0008428471932693701, "loss": 1.4079, "step": 1843 }, { "epoch": 0.8457745671367962, "grad_norm": 0.2373347133398056, "learning_rate": 0.0008426668948413111, "loss": 1.1529, "step": 1844 }, { "epoch": 0.846233230134159, "grad_norm": 0.2616652846336365, "learning_rate": 0.0008424865123545921, "loss": 1.233, "step": 1845 }, { "epoch": 0.8466918931315216, "grad_norm": 0.28413131833076477, "learning_rate": 0.0008423060458534621, "loss": 1.3795, "step": 1846 }, { "epoch": 0.8471505561288843, "grad_norm": 0.2537064552307129, "learning_rate": 0.000842125495382191, "loss": 1.3186, "step": 1847 }, { "epoch": 0.847609219126247, "grad_norm": 0.21368277072906494, "learning_rate": 0.0008419448609850689, "loss": 0.9202, "step": 1848 }, { "epoch": 0.8480678821236097, "grad_norm": 0.2566043734550476, "learning_rate": 0.000841764142706407, "loss": 1.2372, "step": 1849 }, { "epoch": 0.8485265451209724, "grad_norm": 0.34998470544815063, "learning_rate": 0.0008415833405905366, "loss": 1.5085, "step": 1850 }, { "epoch": 0.848985208118335, "grad_norm": 0.13233597576618195, "learning_rate": 0.0008414024546818098, "loss": 0.6812, "step": 1851 }, { "epoch": 0.8494438711156977, "grad_norm": 0.21131940186023712, "learning_rate": 0.0008412214850245991, "loss": 1.2487, "step": 1852 }, { "epoch": 0.8499025341130604, "grad_norm": 0.35627928376197815, "learning_rate": 0.000841040431663298, "loss": 1.9961, "step": 1853 }, { "epoch": 0.8503611971104231, "grad_norm": 0.37637192010879517, "learning_rate": 0.0008408592946423198, "loss": 2.4197, "step": 1854 }, { "epoch": 0.8508198601077858, "grad_norm": 0.12372195720672607, "learning_rate": 0.0008406780740060991, "loss": 0.7192, "step": 1855 }, { "epoch": 0.8512785231051485, "grad_norm": 0.24412862956523895, "learning_rate": 0.0008404967697990902, "loss": 1.1282, "step": 1856 }, { "epoch": 0.8517371861025111, "grad_norm": 0.2743844985961914, "learning_rate": 0.0008403153820657688, "loss": 1.4214, "step": 1857 }, { "epoch": 0.8521958490998739, "grad_norm": 0.3629717230796814, "learning_rate": 0.0008401339108506302, "loss": 1.7277, "step": 1858 }, { "epoch": 0.8526545120972365, "grad_norm": 0.3425946533679962, "learning_rate": 0.0008399523561981908, "loss": 1.9844, "step": 1859 }, { "epoch": 0.8531131750945993, "grad_norm": 0.284390926361084, "learning_rate": 0.0008397707181529873, "loss": 1.2471, "step": 1860 }, { "epoch": 0.8535718380919619, "grad_norm": 0.2141527682542801, "learning_rate": 0.0008395889967595766, "loss": 0.8869, "step": 1861 }, { "epoch": 0.8540305010893247, "grad_norm": 0.29586008191108704, "learning_rate": 0.0008394071920625366, "loss": 1.6176, "step": 1862 }, { "epoch": 0.8544891640866873, "grad_norm": 0.25857093930244446, "learning_rate": 0.0008392253041064652, "loss": 1.2783, "step": 1863 }, { "epoch": 0.85494782708405, "grad_norm": 0.33708396553993225, "learning_rate": 0.0008390433329359806, "loss": 1.4294, "step": 1864 }, { "epoch": 0.8554064900814127, "grad_norm": 0.18086546659469604, "learning_rate": 0.0008388612785957219, "loss": 0.9876, "step": 1865 }, { "epoch": 0.8558651530787754, "grad_norm": 0.2990560531616211, "learning_rate": 0.0008386791411303482, "loss": 1.2799, "step": 1866 }, { "epoch": 0.8563238160761381, "grad_norm": 0.3266335129737854, "learning_rate": 0.0008384969205845391, "loss": 1.6877, "step": 1867 }, { "epoch": 0.8567824790735007, "grad_norm": 0.21711353957653046, "learning_rate": 0.0008383146170029949, "loss": 0.8325, "step": 1868 }, { "epoch": 0.8572411420708634, "grad_norm": 0.33946093916893005, "learning_rate": 0.0008381322304304356, "loss": 1.637, "step": 1869 }, { "epoch": 0.8576998050682261, "grad_norm": 0.35655567049980164, "learning_rate": 0.0008379497609116021, "loss": 1.988, "step": 1870 }, { "epoch": 0.8581584680655888, "grad_norm": 0.29446879029273987, "learning_rate": 0.0008377672084912556, "loss": 1.4559, "step": 1871 }, { "epoch": 0.8586171310629515, "grad_norm": 0.30917033553123474, "learning_rate": 0.0008375845732141773, "loss": 1.2666, "step": 1872 }, { "epoch": 0.8590757940603142, "grad_norm": 0.2967301905155182, "learning_rate": 0.0008374018551251693, "loss": 1.3149, "step": 1873 }, { "epoch": 0.8595344570576768, "grad_norm": 0.37791574001312256, "learning_rate": 0.0008372190542690534, "loss": 1.9565, "step": 1874 }, { "epoch": 0.8599931200550396, "grad_norm": 0.4147866368293762, "learning_rate": 0.0008370361706906719, "loss": 2.095, "step": 1875 }, { "epoch": 0.8604517830524022, "grad_norm": 0.31991320848464966, "learning_rate": 0.0008368532044348876, "loss": 1.0417, "step": 1876 }, { "epoch": 0.860910446049765, "grad_norm": 0.3214443624019623, "learning_rate": 0.0008366701555465834, "loss": 1.3086, "step": 1877 }, { "epoch": 0.8613691090471276, "grad_norm": 0.32853764295578003, "learning_rate": 0.0008364870240706626, "loss": 1.4961, "step": 1878 }, { "epoch": 0.8618277720444903, "grad_norm": 0.3698137700557709, "learning_rate": 0.0008363038100520485, "loss": 1.6169, "step": 1879 }, { "epoch": 0.862286435041853, "grad_norm": 0.3349319100379944, "learning_rate": 0.000836120513535685, "loss": 1.6381, "step": 1880 }, { "epoch": 0.8627450980392157, "grad_norm": 0.354120135307312, "learning_rate": 0.0008359371345665359, "loss": 1.7372, "step": 1881 }, { "epoch": 0.8632037610365784, "grad_norm": 0.31368064880371094, "learning_rate": 0.0008357536731895855, "loss": 1.7236, "step": 1882 }, { "epoch": 0.8636624240339411, "grad_norm": 0.33393827080726624, "learning_rate": 0.0008355701294498381, "loss": 1.7104, "step": 1883 }, { "epoch": 0.8641210870313037, "grad_norm": 0.26583972573280334, "learning_rate": 0.0008353865033923185, "loss": 1.2689, "step": 1884 }, { "epoch": 0.8645797500286664, "grad_norm": 0.33901429176330566, "learning_rate": 0.0008352027950620714, "loss": 1.652, "step": 1885 }, { "epoch": 0.8650384130260291, "grad_norm": 0.371894508600235, "learning_rate": 0.0008350190045041615, "loss": 1.7666, "step": 1886 }, { "epoch": 0.8654970760233918, "grad_norm": 0.19485050439834595, "learning_rate": 0.0008348351317636742, "loss": 0.8739, "step": 1887 }, { "epoch": 0.8659557390207545, "grad_norm": 0.308272123336792, "learning_rate": 0.000834651176885715, "loss": 1.4182, "step": 1888 }, { "epoch": 0.8664144020181171, "grad_norm": 0.3224412500858307, "learning_rate": 0.000834467139915409, "loss": 1.9642, "step": 1889 }, { "epoch": 0.8668730650154799, "grad_norm": 0.24551476538181305, "learning_rate": 0.000834283020897902, "loss": 1.1545, "step": 1890 }, { "epoch": 0.8673317280128425, "grad_norm": 0.3001308739185333, "learning_rate": 0.0008340988198783597, "loss": 1.6565, "step": 1891 }, { "epoch": 0.8677903910102053, "grad_norm": 0.36925092339515686, "learning_rate": 0.0008339145369019678, "loss": 2.0125, "step": 1892 }, { "epoch": 0.8682490540075679, "grad_norm": 0.3273385465145111, "learning_rate": 0.0008337301720139323, "loss": 1.3712, "step": 1893 }, { "epoch": 0.8687077170049307, "grad_norm": 0.3309119939804077, "learning_rate": 0.0008335457252594795, "loss": 1.6698, "step": 1894 }, { "epoch": 0.8691663800022933, "grad_norm": 0.23802553117275238, "learning_rate": 0.0008333611966838552, "loss": 0.9141, "step": 1895 }, { "epoch": 0.869625042999656, "grad_norm": 0.26225998997688293, "learning_rate": 0.0008331765863323255, "loss": 1.04, "step": 1896 }, { "epoch": 0.8700837059970187, "grad_norm": 0.33123326301574707, "learning_rate": 0.0008329918942501772, "loss": 1.7666, "step": 1897 }, { "epoch": 0.8705423689943814, "grad_norm": 0.238576740026474, "learning_rate": 0.000832807120482716, "loss": 1.0255, "step": 1898 }, { "epoch": 0.8710010319917441, "grad_norm": 0.3323553204536438, "learning_rate": 0.0008326222650752686, "loss": 1.4065, "step": 1899 }, { "epoch": 0.8714596949891068, "grad_norm": 0.18547557294368744, "learning_rate": 0.0008324373280731811, "loss": 0.8987, "step": 1900 }, { "epoch": 0.8719183579864694, "grad_norm": 0.3150063455104828, "learning_rate": 0.0008322523095218202, "loss": 1.2314, "step": 1901 }, { "epoch": 0.8723770209838321, "grad_norm": 0.3147270977497101, "learning_rate": 0.0008320672094665722, "loss": 1.2318, "step": 1902 }, { "epoch": 0.8728356839811948, "grad_norm": 0.4290883541107178, "learning_rate": 0.0008318820279528432, "loss": 2.0358, "step": 1903 }, { "epoch": 0.8732943469785575, "grad_norm": 0.4178559482097626, "learning_rate": 0.0008316967650260596, "loss": 2.2466, "step": 1904 }, { "epoch": 0.8737530099759202, "grad_norm": 0.18568667769432068, "learning_rate": 0.0008315114207316682, "loss": 0.9221, "step": 1905 }, { "epoch": 0.8742116729732828, "grad_norm": 0.14113818109035492, "learning_rate": 0.0008313259951151349, "loss": 0.7635, "step": 1906 }, { "epoch": 0.8746703359706456, "grad_norm": 0.3099023103713989, "learning_rate": 0.0008311404882219458, "loss": 1.3216, "step": 1907 }, { "epoch": 0.8751289989680082, "grad_norm": 0.2004273533821106, "learning_rate": 0.0008309549000976075, "loss": 1.1125, "step": 1908 }, { "epoch": 0.875587661965371, "grad_norm": 0.2782037556171417, "learning_rate": 0.0008307692307876458, "loss": 1.4631, "step": 1909 }, { "epoch": 0.8760463249627336, "grad_norm": 0.29525527358055115, "learning_rate": 0.0008305834803376069, "loss": 1.3377, "step": 1910 }, { "epoch": 0.8765049879600963, "grad_norm": 0.28939589858055115, "learning_rate": 0.0008303976487930567, "loss": 1.2637, "step": 1911 }, { "epoch": 0.876963650957459, "grad_norm": 0.24675489962100983, "learning_rate": 0.0008302117361995808, "loss": 1.3734, "step": 1912 }, { "epoch": 0.8774223139548217, "grad_norm": 0.2444259226322174, "learning_rate": 0.0008300257426027851, "loss": 1.0965, "step": 1913 }, { "epoch": 0.8778809769521844, "grad_norm": 0.17775723338127136, "learning_rate": 0.0008298396680482951, "loss": 0.9796, "step": 1914 }, { "epoch": 0.8783396399495471, "grad_norm": 0.24205970764160156, "learning_rate": 0.0008296535125817564, "loss": 1.2806, "step": 1915 }, { "epoch": 0.8787983029469097, "grad_norm": 0.47772538661956787, "learning_rate": 0.000829467276248834, "loss": 1.7538, "step": 1916 }, { "epoch": 0.8792569659442725, "grad_norm": 0.30592289566993713, "learning_rate": 0.0008292809590952132, "loss": 1.6431, "step": 1917 }, { "epoch": 0.8797156289416351, "grad_norm": 0.2858329713344574, "learning_rate": 0.000829094561166599, "loss": 1.5801, "step": 1918 }, { "epoch": 0.8801742919389978, "grad_norm": 0.28482332825660706, "learning_rate": 0.0008289080825087158, "loss": 1.093, "step": 1919 }, { "epoch": 0.8806329549363605, "grad_norm": 0.35098031163215637, "learning_rate": 0.0008287215231673087, "loss": 1.6113, "step": 1920 }, { "epoch": 0.8810916179337231, "grad_norm": 0.15006844699382782, "learning_rate": 0.0008285348831881417, "loss": 0.6972, "step": 1921 }, { "epoch": 0.8815502809310859, "grad_norm": 0.393998384475708, "learning_rate": 0.0008283481626169989, "loss": 1.7005, "step": 1922 }, { "epoch": 0.8820089439284485, "grad_norm": 0.2868679165840149, "learning_rate": 0.0008281613614996842, "loss": 0.8066, "step": 1923 }, { "epoch": 0.8824676069258113, "grad_norm": 0.20780214667320251, "learning_rate": 0.0008279744798820216, "loss": 0.973, "step": 1924 }, { "epoch": 0.8829262699231739, "grad_norm": 0.34180140495300293, "learning_rate": 0.0008277875178098541, "loss": 1.4466, "step": 1925 }, { "epoch": 0.8833849329205367, "grad_norm": 0.229378804564476, "learning_rate": 0.0008276004753290451, "loss": 1.1042, "step": 1926 }, { "epoch": 0.8838435959178993, "grad_norm": 0.27841222286224365, "learning_rate": 0.0008274133524854773, "loss": 1.3994, "step": 1927 }, { "epoch": 0.884302258915262, "grad_norm": 0.21060162782669067, "learning_rate": 0.0008272261493250533, "loss": 1.0967, "step": 1928 }, { "epoch": 0.8847609219126247, "grad_norm": 0.37685856223106384, "learning_rate": 0.0008270388658936956, "loss": 1.8954, "step": 1929 }, { "epoch": 0.8852195849099874, "grad_norm": 0.44438236951828003, "learning_rate": 0.000826851502237346, "loss": 2.2446, "step": 1930 }, { "epoch": 0.8856782479073501, "grad_norm": 0.4858162999153137, "learning_rate": 0.0008266640584019662, "loss": 2.2056, "step": 1931 }, { "epoch": 0.8861369109047128, "grad_norm": 0.2732187509536743, "learning_rate": 0.0008264765344335373, "loss": 1.4948, "step": 1932 }, { "epoch": 0.8865955739020754, "grad_norm": 0.2720052897930145, "learning_rate": 0.0008262889303780607, "loss": 1.3649, "step": 1933 }, { "epoch": 0.8870542368994382, "grad_norm": 0.250740110874176, "learning_rate": 0.0008261012462815567, "loss": 1.2161, "step": 1934 }, { "epoch": 0.8875128998968008, "grad_norm": 0.20551849901676178, "learning_rate": 0.0008259134821900658, "loss": 1.0547, "step": 1935 }, { "epoch": 0.8879715628941636, "grad_norm": 0.23970794677734375, "learning_rate": 0.0008257256381496476, "loss": 1.1497, "step": 1936 }, { "epoch": 0.8884302258915262, "grad_norm": 0.357015997171402, "learning_rate": 0.0008255377142063819, "loss": 1.4954, "step": 1937 }, { "epoch": 0.8888888888888888, "grad_norm": 0.3159123659133911, "learning_rate": 0.0008253497104063676, "loss": 1.3619, "step": 1938 }, { "epoch": 0.8893475518862516, "grad_norm": 0.2619781196117401, "learning_rate": 0.0008251616267957234, "loss": 1.0236, "step": 1939 }, { "epoch": 0.8898062148836142, "grad_norm": 0.2195945680141449, "learning_rate": 0.0008249734634205876, "loss": 0.9583, "step": 1940 }, { "epoch": 0.890264877880977, "grad_norm": 0.11660360544919968, "learning_rate": 0.0008247852203271182, "loss": 0.6041, "step": 1941 }, { "epoch": 0.8907235408783396, "grad_norm": 0.33388739824295044, "learning_rate": 0.0008245968975614924, "loss": 1.6398, "step": 1942 }, { "epoch": 0.8911822038757024, "grad_norm": 0.24743525683879852, "learning_rate": 0.0008244084951699071, "loss": 1.4167, "step": 1943 }, { "epoch": 0.891640866873065, "grad_norm": 0.2996421456336975, "learning_rate": 0.0008242200131985789, "loss": 1.6158, "step": 1944 }, { "epoch": 0.8920995298704277, "grad_norm": 0.24171142280101776, "learning_rate": 0.0008240314516937435, "loss": 1.1656, "step": 1945 }, { "epoch": 0.8925581928677904, "grad_norm": 0.5590919256210327, "learning_rate": 0.0008238428107016568, "loss": 1.809, "step": 1946 }, { "epoch": 0.8930168558651531, "grad_norm": 0.16760781407356262, "learning_rate": 0.0008236540902685937, "loss": 0.8538, "step": 1947 }, { "epoch": 0.8934755188625157, "grad_norm": 0.2322719246149063, "learning_rate": 0.0008234652904408482, "loss": 1.0128, "step": 1948 }, { "epoch": 0.8939341818598785, "grad_norm": 0.28307685256004333, "learning_rate": 0.000823276411264735, "loss": 1.377, "step": 1949 }, { "epoch": 0.8943928448572411, "grad_norm": 0.27569177746772766, "learning_rate": 0.000823087452786587, "loss": 1.4318, "step": 1950 }, { "epoch": 0.8948515078546039, "grad_norm": 0.2631165087223053, "learning_rate": 0.0008228984150527574, "loss": 1.4855, "step": 1951 }, { "epoch": 0.8953101708519665, "grad_norm": 0.18486323952674866, "learning_rate": 0.0008227092981096183, "loss": 0.7875, "step": 1952 }, { "epoch": 0.8957688338493293, "grad_norm": 0.20646964013576508, "learning_rate": 0.0008225201020035615, "loss": 0.9301, "step": 1953 }, { "epoch": 0.8962274968466919, "grad_norm": 0.2068818062543869, "learning_rate": 0.0008223308267809982, "loss": 1.1328, "step": 1954 }, { "epoch": 0.8966861598440545, "grad_norm": 0.1731792837381363, "learning_rate": 0.000822141472488359, "loss": 0.8079, "step": 1955 }, { "epoch": 0.8971448228414173, "grad_norm": 0.36508551239967346, "learning_rate": 0.0008219520391720938, "loss": 2.1365, "step": 1956 }, { "epoch": 0.8976034858387799, "grad_norm": 0.29660528898239136, "learning_rate": 0.000821762526878672, "loss": 1.6558, "step": 1957 }, { "epoch": 0.8980621488361427, "grad_norm": 0.20313714444637299, "learning_rate": 0.0008215729356545823, "loss": 0.9536, "step": 1958 }, { "epoch": 0.8985208118335053, "grad_norm": 0.24836888909339905, "learning_rate": 0.0008213832655463329, "loss": 1.2862, "step": 1959 }, { "epoch": 0.898979474830868, "grad_norm": 0.26236483454704285, "learning_rate": 0.000821193516600451, "loss": 1.5496, "step": 1960 }, { "epoch": 0.8994381378282307, "grad_norm": 0.2642938196659088, "learning_rate": 0.0008210036888634838, "loss": 1.2446, "step": 1961 }, { "epoch": 0.8998968008255934, "grad_norm": 0.2847262918949127, "learning_rate": 0.000820813782381997, "loss": 1.3784, "step": 1962 }, { "epoch": 0.9003554638229561, "grad_norm": 0.3096359372138977, "learning_rate": 0.0008206237972025761, "loss": 1.4408, "step": 1963 }, { "epoch": 0.9008141268203188, "grad_norm": 0.18418483436107635, "learning_rate": 0.0008204337333718261, "loss": 0.8153, "step": 1964 }, { "epoch": 0.9012727898176814, "grad_norm": 0.24402552843093872, "learning_rate": 0.0008202435909363708, "loss": 1.1557, "step": 1965 }, { "epoch": 0.9017314528150442, "grad_norm": 0.2893608808517456, "learning_rate": 0.0008200533699428536, "loss": 1.2294, "step": 1966 }, { "epoch": 0.9021901158124068, "grad_norm": 0.1799740344285965, "learning_rate": 0.000819863070437937, "loss": 0.9913, "step": 1967 }, { "epoch": 0.9026487788097696, "grad_norm": 0.26435887813568115, "learning_rate": 0.0008196726924683029, "loss": 1.302, "step": 1968 }, { "epoch": 0.9031074418071322, "grad_norm": 0.2572958767414093, "learning_rate": 0.0008194822360806525, "loss": 1.5272, "step": 1969 }, { "epoch": 0.903566104804495, "grad_norm": 0.3217867612838745, "learning_rate": 0.0008192917013217059, "loss": 1.6104, "step": 1970 }, { "epoch": 0.9040247678018576, "grad_norm": 0.34428954124450684, "learning_rate": 0.0008191010882382027, "loss": 1.8489, "step": 1971 }, { "epoch": 0.9044834307992202, "grad_norm": 0.23394142091274261, "learning_rate": 0.0008189103968769018, "loss": 1.1929, "step": 1972 }, { "epoch": 0.904942093796583, "grad_norm": 0.21039988100528717, "learning_rate": 0.0008187196272845811, "loss": 1.2102, "step": 1973 }, { "epoch": 0.9054007567939456, "grad_norm": 0.36821311712265015, "learning_rate": 0.0008185287795080377, "loss": 2.1794, "step": 1974 }, { "epoch": 0.9058594197913084, "grad_norm": 0.47129011154174805, "learning_rate": 0.0008183378535940879, "loss": 1.7034, "step": 1975 }, { "epoch": 0.906318082788671, "grad_norm": 0.28976601362228394, "learning_rate": 0.0008181468495895674, "loss": 1.404, "step": 1976 }, { "epoch": 0.9067767457860337, "grad_norm": 0.416605681180954, "learning_rate": 0.0008179557675413307, "loss": 1.8235, "step": 1977 }, { "epoch": 0.9072354087833964, "grad_norm": 0.3279779255390167, "learning_rate": 0.0008177646074962517, "loss": 1.7274, "step": 1978 }, { "epoch": 0.9076940717807591, "grad_norm": 0.2587118148803711, "learning_rate": 0.0008175733695012231, "loss": 1.1779, "step": 1979 }, { "epoch": 0.9081527347781218, "grad_norm": 0.33901700377464294, "learning_rate": 0.0008173820536031574, "loss": 2.0381, "step": 1980 }, { "epoch": 0.9086113977754845, "grad_norm": 0.22714871168136597, "learning_rate": 0.0008171906598489853, "loss": 1.3698, "step": 1981 }, { "epoch": 0.9090700607728471, "grad_norm": 0.32320547103881836, "learning_rate": 0.0008169991882856574, "loss": 1.7041, "step": 1982 }, { "epoch": 0.9095287237702099, "grad_norm": 0.3090137541294098, "learning_rate": 0.0008168076389601427, "loss": 1.3677, "step": 1983 }, { "epoch": 0.9099873867675725, "grad_norm": 0.25512242317199707, "learning_rate": 0.0008166160119194301, "loss": 1.3021, "step": 1984 }, { "epoch": 0.9104460497649353, "grad_norm": 0.328730970621109, "learning_rate": 0.0008164243072105267, "loss": 1.4302, "step": 1985 }, { "epoch": 0.9109047127622979, "grad_norm": 0.30282872915267944, "learning_rate": 0.0008162325248804594, "loss": 1.5295, "step": 1986 }, { "epoch": 0.9113633757596606, "grad_norm": 0.3073786795139313, "learning_rate": 0.0008160406649762735, "loss": 1.605, "step": 1987 }, { "epoch": 0.9118220387570233, "grad_norm": 0.5435524582862854, "learning_rate": 0.0008158487275450335, "loss": 1.8843, "step": 1988 }, { "epoch": 0.9122807017543859, "grad_norm": 0.3203667104244232, "learning_rate": 0.0008156567126338236, "loss": 0.826, "step": 1989 }, { "epoch": 0.9127393647517487, "grad_norm": 0.2148309201002121, "learning_rate": 0.0008154646202897459, "loss": 0.9943, "step": 1990 }, { "epoch": 0.9131980277491113, "grad_norm": 0.2307606041431427, "learning_rate": 0.0008152724505599223, "loss": 1.1053, "step": 1991 }, { "epoch": 0.913656690746474, "grad_norm": 0.30222174525260925, "learning_rate": 0.0008150802034914932, "loss": 1.1675, "step": 1992 }, { "epoch": 0.9141153537438367, "grad_norm": 0.3027079105377197, "learning_rate": 0.0008148878791316184, "loss": 1.2104, "step": 1993 }, { "epoch": 0.9145740167411994, "grad_norm": 0.2728803753852844, "learning_rate": 0.0008146954775274764, "loss": 1.3286, "step": 1994 }, { "epoch": 0.9150326797385621, "grad_norm": 0.413828045129776, "learning_rate": 0.0008145029987262649, "loss": 2.343, "step": 1995 }, { "epoch": 0.9154913427359248, "grad_norm": 0.125292107462883, "learning_rate": 0.0008143104427751998, "loss": 0.7368, "step": 1996 }, { "epoch": 0.9159500057332874, "grad_norm": 0.22482484579086304, "learning_rate": 0.000814117809721517, "loss": 1.1551, "step": 1997 }, { "epoch": 0.9164086687306502, "grad_norm": 0.2546938955783844, "learning_rate": 0.0008139250996124706, "loss": 1.3978, "step": 1998 }, { "epoch": 0.9168673317280128, "grad_norm": 0.31431275606155396, "learning_rate": 0.0008137323124953335, "loss": 1.5389, "step": 1999 }, { "epoch": 0.9173259947253756, "grad_norm": 0.3111874759197235, "learning_rate": 0.0008135394484173981, "loss": 1.564, "step": 2000 }, { "epoch": 0.9177846577227382, "grad_norm": 0.36270517110824585, "learning_rate": 0.0008133465074259754, "loss": 2.1987, "step": 2001 }, { "epoch": 0.918243320720101, "grad_norm": 0.34382519125938416, "learning_rate": 0.000813153489568395, "loss": 2.0159, "step": 2002 }, { "epoch": 0.9187019837174636, "grad_norm": 0.39224573969841003, "learning_rate": 0.0008129603948920056, "loss": 1.6236, "step": 2003 }, { "epoch": 0.9191606467148263, "grad_norm": 0.2750489115715027, "learning_rate": 0.000812767223444175, "loss": 1.2472, "step": 2004 }, { "epoch": 0.919619309712189, "grad_norm": 0.26714855432510376, "learning_rate": 0.0008125739752722891, "loss": 1.5997, "step": 2005 }, { "epoch": 0.9200779727095516, "grad_norm": 0.24979934096336365, "learning_rate": 0.0008123806504237533, "loss": 1.3241, "step": 2006 }, { "epoch": 0.9205366357069144, "grad_norm": 0.08660929650068283, "learning_rate": 0.0008121872489459916, "loss": 0.5286, "step": 2007 }, { "epoch": 0.920995298704277, "grad_norm": 0.13258817791938782, "learning_rate": 0.0008119937708864469, "loss": 0.6686, "step": 2008 }, { "epoch": 0.9214539617016397, "grad_norm": 0.18847809731960297, "learning_rate": 0.0008118002162925804, "loss": 0.839, "step": 2009 }, { "epoch": 0.9219126246990024, "grad_norm": 0.059659842401742935, "learning_rate": 0.0008116065852118728, "loss": 0.3741, "step": 2010 }, { "epoch": 0.9223712876963651, "grad_norm": 0.33892372250556946, "learning_rate": 0.0008114128776918229, "loss": 1.7498, "step": 2011 }, { "epoch": 0.9228299506937278, "grad_norm": 0.2842087149620056, "learning_rate": 0.0008112190937799488, "loss": 1.4104, "step": 2012 }, { "epoch": 0.9232886136910905, "grad_norm": 0.18660642206668854, "learning_rate": 0.0008110252335237868, "loss": 0.9644, "step": 2013 }, { "epoch": 0.9237472766884531, "grad_norm": 0.1834762841463089, "learning_rate": 0.0008108312969708928, "loss": 0.8232, "step": 2014 }, { "epoch": 0.9242059396858159, "grad_norm": 0.3446117639541626, "learning_rate": 0.0008106372841688401, "loss": 2.0298, "step": 2015 }, { "epoch": 0.9246646026831785, "grad_norm": 0.3695407807826996, "learning_rate": 0.0008104431951652219, "loss": 1.6854, "step": 2016 }, { "epoch": 0.9251232656805413, "grad_norm": 0.26748037338256836, "learning_rate": 0.0008102490300076496, "loss": 1.3953, "step": 2017 }, { "epoch": 0.9255819286779039, "grad_norm": 0.08492042124271393, "learning_rate": 0.0008100547887437531, "loss": 0.5242, "step": 2018 }, { "epoch": 0.9260405916752666, "grad_norm": 0.2597675919532776, "learning_rate": 0.0008098604714211813, "loss": 1.1575, "step": 2019 }, { "epoch": 0.9264992546726293, "grad_norm": 0.29596418142318726, "learning_rate": 0.0008096660780876017, "loss": 1.4683, "step": 2020 }, { "epoch": 0.926957917669992, "grad_norm": 0.2640438973903656, "learning_rate": 0.0008094716087907003, "loss": 1.4299, "step": 2021 }, { "epoch": 0.9274165806673547, "grad_norm": 0.1778401881456375, "learning_rate": 0.0008092770635781821, "loss": 0.8381, "step": 2022 }, { "epoch": 0.9278752436647173, "grad_norm": 0.2718510329723358, "learning_rate": 0.0008090824424977699, "loss": 1.5079, "step": 2023 }, { "epoch": 0.92833390666208, "grad_norm": 0.2521864175796509, "learning_rate": 0.0008088877455972062, "loss": 1.0537, "step": 2024 }, { "epoch": 0.9287925696594427, "grad_norm": 0.2869158685207367, "learning_rate": 0.000808692972924251, "loss": 1.6425, "step": 2025 }, { "epoch": 0.9292512326568054, "grad_norm": 0.33233222365379333, "learning_rate": 0.000808498124526684, "loss": 1.9667, "step": 2026 }, { "epoch": 0.9297098956541681, "grad_norm": 0.34734752774238586, "learning_rate": 0.0008083032004523026, "loss": 1.9368, "step": 2027 }, { "epoch": 0.9301685586515308, "grad_norm": 0.37285783886909485, "learning_rate": 0.0008081082007489231, "loss": 1.7972, "step": 2028 }, { "epoch": 0.9306272216488934, "grad_norm": 0.19466300308704376, "learning_rate": 0.0008079131254643804, "loss": 0.8481, "step": 2029 }, { "epoch": 0.9310858846462562, "grad_norm": 0.28476589918136597, "learning_rate": 0.0008077179746465278, "loss": 1.4938, "step": 2030 }, { "epoch": 0.9315445476436188, "grad_norm": 0.22528983652591705, "learning_rate": 0.0008075227483432374, "loss": 1.0093, "step": 2031 }, { "epoch": 0.9320032106409816, "grad_norm": 0.33551692962646484, "learning_rate": 0.0008073274466023994, "loss": 1.4329, "step": 2032 }, { "epoch": 0.9324618736383442, "grad_norm": 0.40014681220054626, "learning_rate": 0.0008071320694719226, "loss": 1.513, "step": 2033 }, { "epoch": 0.932920536635707, "grad_norm": 0.35248538851737976, "learning_rate": 0.000806936616999735, "loss": 1.8031, "step": 2034 }, { "epoch": 0.9333791996330696, "grad_norm": 0.20108796656131744, "learning_rate": 0.0008067410892337819, "loss": 0.9306, "step": 2035 }, { "epoch": 0.9338378626304323, "grad_norm": 0.2716940939426422, "learning_rate": 0.000806545486222028, "loss": 1.2828, "step": 2036 }, { "epoch": 0.934296525627795, "grad_norm": 0.3034631311893463, "learning_rate": 0.0008063498080124559, "loss": 1.4306, "step": 2037 }, { "epoch": 0.9347551886251577, "grad_norm": 0.3705253005027771, "learning_rate": 0.000806154054653067, "loss": 1.8486, "step": 2038 }, { "epoch": 0.9352138516225204, "grad_norm": 0.25508952140808105, "learning_rate": 0.0008059582261918812, "loss": 1.2751, "step": 2039 }, { "epoch": 0.935672514619883, "grad_norm": 0.39250513911247253, "learning_rate": 0.0008057623226769362, "loss": 2.0559, "step": 2040 }, { "epoch": 0.9361311776172457, "grad_norm": 0.2672523558139801, "learning_rate": 0.0008055663441562889, "loss": 1.037, "step": 2041 }, { "epoch": 0.9365898406146084, "grad_norm": 0.19914977252483368, "learning_rate": 0.0008053702906780142, "loss": 0.812, "step": 2042 }, { "epoch": 0.9370485036119711, "grad_norm": 0.20971417427062988, "learning_rate": 0.0008051741622902052, "loss": 0.8383, "step": 2043 }, { "epoch": 0.9375071666093338, "grad_norm": 0.3085592985153198, "learning_rate": 0.0008049779590409739, "loss": 1.4219, "step": 2044 }, { "epoch": 0.9379658296066965, "grad_norm": 0.361795574426651, "learning_rate": 0.0008047816809784502, "loss": 1.9539, "step": 2045 }, { "epoch": 0.9384244926040591, "grad_norm": 0.37029165029525757, "learning_rate": 0.0008045853281507827, "loss": 1.7736, "step": 2046 }, { "epoch": 0.9388831556014219, "grad_norm": 0.3089427053928375, "learning_rate": 0.0008043889006061378, "loss": 1.5316, "step": 2047 }, { "epoch": 0.9393418185987845, "grad_norm": 0.22889067232608795, "learning_rate": 0.0008041923983927009, "loss": 0.8428, "step": 2048 }, { "epoch": 0.9398004815961473, "grad_norm": 0.3076949417591095, "learning_rate": 0.0008039958215586753, "loss": 1.6298, "step": 2049 }, { "epoch": 0.9402591445935099, "grad_norm": 0.3263643682003021, "learning_rate": 0.0008037991701522829, "loss": 1.8137, "step": 2050 }, { "epoch": 0.9407178075908726, "grad_norm": 0.3142676055431366, "learning_rate": 0.0008036024442217636, "loss": 1.4615, "step": 2051 }, { "epoch": 0.9411764705882353, "grad_norm": 0.27744585275650024, "learning_rate": 0.0008034056438153753, "loss": 1.2395, "step": 2052 }, { "epoch": 0.941635133585598, "grad_norm": 0.21574388444423676, "learning_rate": 0.0008032087689813952, "loss": 0.914, "step": 2053 }, { "epoch": 0.9420937965829607, "grad_norm": 0.1983022391796112, "learning_rate": 0.0008030118197681178, "loss": 1.0479, "step": 2054 }, { "epoch": 0.9425524595803234, "grad_norm": 0.2801907956600189, "learning_rate": 0.0008028147962238561, "loss": 1.3931, "step": 2055 }, { "epoch": 0.943011122577686, "grad_norm": 0.10424070060253143, "learning_rate": 0.0008026176983969415, "loss": 0.5858, "step": 2056 }, { "epoch": 0.9434697855750487, "grad_norm": 0.260968953371048, "learning_rate": 0.0008024205263357234, "loss": 1.2817, "step": 2057 }, { "epoch": 0.9439284485724114, "grad_norm": 0.24895592033863068, "learning_rate": 0.0008022232800885697, "loss": 1.2282, "step": 2058 }, { "epoch": 0.9443871115697741, "grad_norm": 0.35649770498275757, "learning_rate": 0.0008020259597038663, "loss": 1.7548, "step": 2059 }, { "epoch": 0.9448457745671368, "grad_norm": 0.3545326292514801, "learning_rate": 0.0008018285652300173, "loss": 1.819, "step": 2060 }, { "epoch": 0.9453044375644994, "grad_norm": 0.35756614804267883, "learning_rate": 0.0008016310967154448, "loss": 1.8008, "step": 2061 }, { "epoch": 0.9457631005618622, "grad_norm": 0.12270601838827133, "learning_rate": 0.0008014335542085896, "loss": 0.6562, "step": 2062 }, { "epoch": 0.9462217635592248, "grad_norm": 0.3161708414554596, "learning_rate": 0.0008012359377579099, "loss": 1.5311, "step": 2063 }, { "epoch": 0.9466804265565876, "grad_norm": 0.20928043127059937, "learning_rate": 0.0008010382474118827, "loss": 0.9492, "step": 2064 }, { "epoch": 0.9471390895539502, "grad_norm": 0.3087616562843323, "learning_rate": 0.0008008404832190028, "loss": 1.3438, "step": 2065 }, { "epoch": 0.947597752551313, "grad_norm": 0.24473027884960175, "learning_rate": 0.0008006426452277833, "loss": 1.151, "step": 2066 }, { "epoch": 0.9480564155486756, "grad_norm": 0.08595964312553406, "learning_rate": 0.0008004447334867551, "loss": 0.43, "step": 2067 }, { "epoch": 0.9485150785460383, "grad_norm": 0.5434077382087708, "learning_rate": 0.0008002467480444675, "loss": 2.1346, "step": 2068 }, { "epoch": 0.948973741543401, "grad_norm": 0.32616284489631653, "learning_rate": 0.0008000486889494877, "loss": 1.3558, "step": 2069 }, { "epoch": 0.9494324045407637, "grad_norm": 0.24117501080036163, "learning_rate": 0.000799850556250401, "loss": 1.0023, "step": 2070 }, { "epoch": 0.9498910675381264, "grad_norm": 0.26819151639938354, "learning_rate": 0.0007996523499958109, "loss": 1.0614, "step": 2071 }, { "epoch": 0.9503497305354891, "grad_norm": 0.28361374139785767, "learning_rate": 0.0007994540702343386, "loss": 1.374, "step": 2072 }, { "epoch": 0.9508083935328517, "grad_norm": 0.4279637038707733, "learning_rate": 0.000799255717014624, "loss": 1.9542, "step": 2073 }, { "epoch": 0.9512670565302144, "grad_norm": 0.4314320981502533, "learning_rate": 0.0007990572903853239, "loss": 1.9646, "step": 2074 }, { "epoch": 0.9517257195275771, "grad_norm": 0.3635926842689514, "learning_rate": 0.0007988587903951143, "loss": 1.699, "step": 2075 }, { "epoch": 0.9521843825249398, "grad_norm": 0.3035918176174164, "learning_rate": 0.0007986602170926885, "loss": 1.4184, "step": 2076 }, { "epoch": 0.9526430455223025, "grad_norm": 0.38614422082901, "learning_rate": 0.0007984615705267581, "loss": 2.1599, "step": 2077 }, { "epoch": 0.9531017085196651, "grad_norm": 0.3324802815914154, "learning_rate": 0.0007982628507460523, "loss": 1.2087, "step": 2078 }, { "epoch": 0.9535603715170279, "grad_norm": 0.243191197514534, "learning_rate": 0.0007980640577993187, "loss": 0.9754, "step": 2079 }, { "epoch": 0.9540190345143905, "grad_norm": 0.2958884537220001, "learning_rate": 0.0007978651917353225, "loss": 1.3487, "step": 2080 }, { "epoch": 0.9544776975117533, "grad_norm": 0.17807622253894806, "learning_rate": 0.0007976662526028473, "loss": 0.9265, "step": 2081 }, { "epoch": 0.9549363605091159, "grad_norm": 0.20209459960460663, "learning_rate": 0.0007974672404506937, "loss": 0.9037, "step": 2082 }, { "epoch": 0.9553950235064786, "grad_norm": 0.31564533710479736, "learning_rate": 0.0007972681553276813, "loss": 1.7772, "step": 2083 }, { "epoch": 0.9558536865038413, "grad_norm": 0.4125016927719116, "learning_rate": 0.0007970689972826471, "loss": 2.0496, "step": 2084 }, { "epoch": 0.956312349501204, "grad_norm": 0.49603113532066345, "learning_rate": 0.0007968697663644457, "loss": 1.1933, "step": 2085 }, { "epoch": 0.9567710124985667, "grad_norm": 0.2608060836791992, "learning_rate": 0.0007966704626219503, "loss": 1.3788, "step": 2086 }, { "epoch": 0.9572296754959294, "grad_norm": 0.3525409400463104, "learning_rate": 0.0007964710861040512, "loss": 1.7322, "step": 2087 }, { "epoch": 0.957688338493292, "grad_norm": 0.3033929169178009, "learning_rate": 0.000796271636859657, "loss": 1.6537, "step": 2088 }, { "epoch": 0.9581470014906548, "grad_norm": 0.24828091263771057, "learning_rate": 0.000796072114937694, "loss": 1.1296, "step": 2089 }, { "epoch": 0.9586056644880174, "grad_norm": 0.2196667641401291, "learning_rate": 0.0007958725203871064, "loss": 1.0247, "step": 2090 }, { "epoch": 0.9590643274853801, "grad_norm": 0.2858245074748993, "learning_rate": 0.0007956728532568563, "loss": 1.3901, "step": 2091 }, { "epoch": 0.9595229904827428, "grad_norm": 0.2958674430847168, "learning_rate": 0.0007954731135959235, "loss": 1.3847, "step": 2092 }, { "epoch": 0.9599816534801054, "grad_norm": 0.25934916734695435, "learning_rate": 0.0007952733014533051, "loss": 1.3095, "step": 2093 }, { "epoch": 0.9604403164774682, "grad_norm": 0.26100558042526245, "learning_rate": 0.0007950734168780171, "loss": 1.519, "step": 2094 }, { "epoch": 0.9608989794748308, "grad_norm": 0.18241111934185028, "learning_rate": 0.0007948734599190924, "loss": 0.9885, "step": 2095 }, { "epoch": 0.9613576424721936, "grad_norm": 1.0444318056106567, "learning_rate": 0.0007946734306255816, "loss": 1.6812, "step": 2096 }, { "epoch": 0.9618163054695562, "grad_norm": 0.31861168146133423, "learning_rate": 0.0007944733290465535, "loss": 2.0449, "step": 2097 }, { "epoch": 0.962274968466919, "grad_norm": 0.28558486700057983, "learning_rate": 0.0007942731552310949, "loss": 1.6525, "step": 2098 }, { "epoch": 0.9627336314642816, "grad_norm": 0.23924612998962402, "learning_rate": 0.0007940729092283092, "loss": 1.2884, "step": 2099 }, { "epoch": 0.9631922944616443, "grad_norm": 0.2011316865682602, "learning_rate": 0.0007938725910873186, "loss": 1.3885, "step": 2100 }, { "epoch": 0.963650957459007, "grad_norm": 0.31433144211769104, "learning_rate": 0.0007936722008572625, "loss": 1.9054, "step": 2101 }, { "epoch": 0.9641096204563697, "grad_norm": 0.26311978697776794, "learning_rate": 0.000793471738587298, "loss": 1.584, "step": 2102 }, { "epoch": 0.9645682834537324, "grad_norm": 0.27694377303123474, "learning_rate": 0.0007932712043266, "loss": 1.2675, "step": 2103 }, { "epoch": 0.9650269464510951, "grad_norm": 0.15894590318202972, "learning_rate": 0.000793070598124361, "loss": 0.8647, "step": 2104 }, { "epoch": 0.9654856094484577, "grad_norm": 0.3218410909175873, "learning_rate": 0.000792869920029791, "loss": 1.8047, "step": 2105 }, { "epoch": 0.9659442724458205, "grad_norm": 0.18074116110801697, "learning_rate": 0.0007926691700921181, "loss": 1.0374, "step": 2106 }, { "epoch": 0.9664029354431831, "grad_norm": 0.3340591490268707, "learning_rate": 0.0007924683483605875, "loss": 1.8395, "step": 2107 }, { "epoch": 0.9668615984405458, "grad_norm": 0.30720779299736023, "learning_rate": 0.0007922674548844622, "loss": 1.6614, "step": 2108 }, { "epoch": 0.9673202614379085, "grad_norm": 0.275790810585022, "learning_rate": 0.0007920664897130228, "loss": 1.6523, "step": 2109 }, { "epoch": 0.9677789244352711, "grad_norm": 0.0825454369187355, "learning_rate": 0.0007918654528955675, "loss": 0.4651, "step": 2110 }, { "epoch": 0.9682375874326339, "grad_norm": 0.31684210896492004, "learning_rate": 0.0007916643444814123, "loss": 1.9248, "step": 2111 }, { "epoch": 0.9686962504299965, "grad_norm": 0.18997028470039368, "learning_rate": 0.0007914631645198904, "loss": 1.0344, "step": 2112 }, { "epoch": 0.9691549134273593, "grad_norm": 0.24045085906982422, "learning_rate": 0.0007912619130603527, "loss": 1.2285, "step": 2113 }, { "epoch": 0.9696135764247219, "grad_norm": 0.3353755474090576, "learning_rate": 0.0007910605901521675, "loss": 1.7612, "step": 2114 }, { "epoch": 0.9700722394220846, "grad_norm": 0.32669898867607117, "learning_rate": 0.0007908591958447209, "loss": 1.4136, "step": 2115 }, { "epoch": 0.9705309024194473, "grad_norm": 0.37192410230636597, "learning_rate": 0.0007906577301874163, "loss": 1.8505, "step": 2116 }, { "epoch": 0.97098956541681, "grad_norm": 0.2398930788040161, "learning_rate": 0.000790456193229675, "loss": 1.0386, "step": 2117 }, { "epoch": 0.9714482284141727, "grad_norm": 0.3819679319858551, "learning_rate": 0.0007902545850209349, "loss": 1.7427, "step": 2118 }, { "epoch": 0.9719068914115354, "grad_norm": 0.2345220297574997, "learning_rate": 0.0007900529056106525, "loss": 1.2595, "step": 2119 }, { "epoch": 0.972365554408898, "grad_norm": 0.3607707917690277, "learning_rate": 0.0007898511550483007, "loss": 2.0488, "step": 2120 }, { "epoch": 0.9728242174062608, "grad_norm": 0.33080729842185974, "learning_rate": 0.0007896493333833707, "loss": 1.4388, "step": 2121 }, { "epoch": 0.9732828804036234, "grad_norm": 0.2641333341598511, "learning_rate": 0.0007894474406653709, "loss": 1.5211, "step": 2122 }, { "epoch": 0.9737415434009862, "grad_norm": 0.18903343379497528, "learning_rate": 0.0007892454769438268, "loss": 0.7437, "step": 2123 }, { "epoch": 0.9742002063983488, "grad_norm": 0.2841821014881134, "learning_rate": 0.0007890434422682817, "loss": 1.4099, "step": 2124 }, { "epoch": 0.9746588693957114, "grad_norm": 0.18609897792339325, "learning_rate": 0.0007888413366882959, "loss": 0.9775, "step": 2125 }, { "epoch": 0.9751175323930742, "grad_norm": 0.3263588845729828, "learning_rate": 0.0007886391602534477, "loss": 1.9374, "step": 2126 }, { "epoch": 0.9755761953904368, "grad_norm": 0.29043829441070557, "learning_rate": 0.0007884369130133325, "loss": 1.6285, "step": 2127 }, { "epoch": 0.9760348583877996, "grad_norm": 0.23733757436275482, "learning_rate": 0.0007882345950175624, "loss": 1.009, "step": 2128 }, { "epoch": 0.9764935213851622, "grad_norm": 0.28636664152145386, "learning_rate": 0.0007880322063157681, "loss": 1.4238, "step": 2129 }, { "epoch": 0.976952184382525, "grad_norm": 0.24077756702899933, "learning_rate": 0.0007878297469575967, "loss": 1.3356, "step": 2130 }, { "epoch": 0.9774108473798876, "grad_norm": 0.21748611330986023, "learning_rate": 0.000787627216992713, "loss": 1.0264, "step": 2131 }, { "epoch": 0.9778695103772503, "grad_norm": 0.17146669328212738, "learning_rate": 0.0007874246164707991, "loss": 0.9994, "step": 2132 }, { "epoch": 0.978328173374613, "grad_norm": 0.28764429688453674, "learning_rate": 0.0007872219454415543, "loss": 1.8247, "step": 2133 }, { "epoch": 0.9787868363719757, "grad_norm": 0.3070955276489258, "learning_rate": 0.0007870192039546954, "loss": 1.7511, "step": 2134 }, { "epoch": 0.9792454993693384, "grad_norm": 0.24031320214271545, "learning_rate": 0.0007868163920599563, "loss": 1.4128, "step": 2135 }, { "epoch": 0.9797041623667011, "grad_norm": 0.260254442691803, "learning_rate": 0.000786613509807088, "loss": 1.5773, "step": 2136 }, { "epoch": 0.9801628253640637, "grad_norm": 0.25486305356025696, "learning_rate": 0.0007864105572458592, "loss": 1.146, "step": 2137 }, { "epoch": 0.9806214883614265, "grad_norm": 0.2558663785457611, "learning_rate": 0.0007862075344260555, "loss": 1.3716, "step": 2138 }, { "epoch": 0.9810801513587891, "grad_norm": 0.2597788870334625, "learning_rate": 0.0007860044413974801, "loss": 1.4079, "step": 2139 }, { "epoch": 0.9815388143561519, "grad_norm": 0.2434176504611969, "learning_rate": 0.000785801278209953, "loss": 1.3001, "step": 2140 }, { "epoch": 0.9819974773535145, "grad_norm": 0.24413499236106873, "learning_rate": 0.0007855980449133116, "loss": 1.4407, "step": 2141 }, { "epoch": 0.9824561403508771, "grad_norm": 0.24809010326862335, "learning_rate": 0.0007853947415574106, "loss": 1.393, "step": 2142 }, { "epoch": 0.9829148033482399, "grad_norm": 0.2992349863052368, "learning_rate": 0.0007851913681921214, "loss": 1.4247, "step": 2143 }, { "epoch": 0.9833734663456025, "grad_norm": 0.28984925150871277, "learning_rate": 0.0007849879248673336, "loss": 1.7083, "step": 2144 }, { "epoch": 0.9838321293429653, "grad_norm": 0.2734730839729309, "learning_rate": 0.0007847844116329527, "loss": 1.2769, "step": 2145 }, { "epoch": 0.9842907923403279, "grad_norm": 0.23517167568206787, "learning_rate": 0.0007845808285389024, "loss": 1.291, "step": 2146 }, { "epoch": 0.9847494553376906, "grad_norm": 0.09210627526044846, "learning_rate": 0.0007843771756351228, "loss": 0.5888, "step": 2147 }, { "epoch": 0.9852081183350533, "grad_norm": 0.25133419036865234, "learning_rate": 0.0007841734529715717, "loss": 1.1768, "step": 2148 }, { "epoch": 0.985666781332416, "grad_norm": 0.2581869065761566, "learning_rate": 0.0007839696605982235, "loss": 1.5143, "step": 2149 }, { "epoch": 0.9861254443297787, "grad_norm": 0.2132362276315689, "learning_rate": 0.00078376579856507, "loss": 0.9654, "step": 2150 }, { "epoch": 0.9865841073271414, "grad_norm": 0.3260076344013214, "learning_rate": 0.00078356186692212, "loss": 1.6854, "step": 2151 }, { "epoch": 0.987042770324504, "grad_norm": 0.1947498768568039, "learning_rate": 0.0007833578657193996, "loss": 0.923, "step": 2152 }, { "epoch": 0.9875014333218668, "grad_norm": 0.25844818353652954, "learning_rate": 0.0007831537950069516, "loss": 1.3283, "step": 2153 }, { "epoch": 0.9879600963192294, "grad_norm": 0.3847392499446869, "learning_rate": 0.000782949654834836, "loss": 1.8961, "step": 2154 }, { "epoch": 0.9884187593165922, "grad_norm": 0.376164048910141, "learning_rate": 0.00078274544525313, "loss": 2.1665, "step": 2155 }, { "epoch": 0.9888774223139548, "grad_norm": 0.14740854501724243, "learning_rate": 0.0007825411663119274, "loss": 0.7855, "step": 2156 }, { "epoch": 0.9893360853113176, "grad_norm": 0.3205507695674896, "learning_rate": 0.0007823368180613395, "loss": 1.5332, "step": 2157 }, { "epoch": 0.9897947483086802, "grad_norm": 0.2271261066198349, "learning_rate": 0.0007821324005514945, "loss": 0.9458, "step": 2158 }, { "epoch": 0.9902534113060428, "grad_norm": 0.2652105987071991, "learning_rate": 0.0007819279138325373, "loss": 1.389, "step": 2159 }, { "epoch": 0.9907120743034056, "grad_norm": 0.2753114104270935, "learning_rate": 0.00078172335795463, "loss": 1.2935, "step": 2160 }, { "epoch": 0.9911707373007682, "grad_norm": 0.10524280369281769, "learning_rate": 0.0007815187329679517, "loss": 0.6075, "step": 2161 }, { "epoch": 0.991629400298131, "grad_norm": 0.22543789446353912, "learning_rate": 0.0007813140389226984, "loss": 1.2902, "step": 2162 }, { "epoch": 0.9920880632954936, "grad_norm": 0.31716790795326233, "learning_rate": 0.0007811092758690829, "loss": 1.8091, "step": 2163 }, { "epoch": 0.9925467262928563, "grad_norm": 0.28479596972465515, "learning_rate": 0.000780904443857335, "loss": 1.4618, "step": 2164 }, { "epoch": 0.993005389290219, "grad_norm": 0.18860182166099548, "learning_rate": 0.0007806995429377019, "loss": 0.9116, "step": 2165 }, { "epoch": 0.9934640522875817, "grad_norm": 0.24793879687786102, "learning_rate": 0.0007804945731604467, "loss": 1.2674, "step": 2166 }, { "epoch": 0.9939227152849444, "grad_norm": 0.2623526453971863, "learning_rate": 0.0007802895345758503, "loss": 1.4075, "step": 2167 }, { "epoch": 0.9943813782823071, "grad_norm": 0.3471762239933014, "learning_rate": 0.0007800844272342103, "loss": 2.0403, "step": 2168 }, { "epoch": 0.9948400412796697, "grad_norm": 0.29183194041252136, "learning_rate": 0.0007798792511858404, "loss": 1.6896, "step": 2169 }, { "epoch": 0.9952987042770325, "grad_norm": 0.30205872654914856, "learning_rate": 0.0007796740064810724, "loss": 1.6495, "step": 2170 }, { "epoch": 0.9957573672743951, "grad_norm": 0.2651442289352417, "learning_rate": 0.000779468693170254, "loss": 1.3665, "step": 2171 }, { "epoch": 0.9962160302717579, "grad_norm": 0.29746365547180176, "learning_rate": 0.0007792633113037501, "loss": 1.4551, "step": 2172 }, { "epoch": 0.9966746932691205, "grad_norm": 0.30460765957832336, "learning_rate": 0.0007790578609319424, "loss": 1.6595, "step": 2173 }, { "epoch": 0.9971333562664833, "grad_norm": 0.2733561396598816, "learning_rate": 0.0007788523421052291, "loss": 0.9736, "step": 2174 }, { "epoch": 0.9975920192638459, "grad_norm": 0.3780529499053955, "learning_rate": 0.0007786467548740259, "loss": 1.7864, "step": 2175 }, { "epoch": 0.9980506822612085, "grad_norm": 0.16774733364582062, "learning_rate": 0.0007784410992887645, "loss": 0.9591, "step": 2176 }, { "epoch": 0.9985093452585713, "grad_norm": 0.31271597743034363, "learning_rate": 0.0007782353753998936, "loss": 1.7136, "step": 2177 }, { "epoch": 0.9989680082559339, "grad_norm": 0.298076331615448, "learning_rate": 0.0007780295832578792, "loss": 1.2218, "step": 2178 }, { "epoch": 0.9994266712532967, "grad_norm": 0.37182536721229553, "learning_rate": 0.0007778237229132032, "loss": 1.7005, "step": 2179 }, { "epoch": 0.9998853342506593, "grad_norm": 0.2658548355102539, "learning_rate": 0.0007776177944163648, "loss": 1.2988, "step": 2180 }, { "epoch": 1.0, "grad_norm": 0.2658548355102539, "learning_rate": 0.0007776177944163648, "loss": 0.5049, "step": 2181 }, { "epoch": 1.0004586629973626, "grad_norm": 0.3670017421245575, "learning_rate": 0.0007774117978178797, "loss": 1.684, "step": 2182 }, { "epoch": 1.0009173259947253, "grad_norm": 0.281902939081192, "learning_rate": 0.0007772057331682802, "loss": 1.1651, "step": 2183 }, { "epoch": 1.0013759889920881, "grad_norm": 0.21292293071746826, "learning_rate": 0.0007769996005181159, "loss": 1.2873, "step": 2184 }, { "epoch": 1.0018346519894508, "grad_norm": 0.3060021996498108, "learning_rate": 0.0007767933999179521, "loss": 1.6379, "step": 2185 }, { "epoch": 1.0022933149868134, "grad_norm": 0.6159801483154297, "learning_rate": 0.0007765871314183715, "loss": 1.8337, "step": 2186 }, { "epoch": 1.002751977984176, "grad_norm": 0.27383625507354736, "learning_rate": 0.0007763807950699734, "loss": 1.1533, "step": 2187 }, { "epoch": 1.003210640981539, "grad_norm": 0.26541003584861755, "learning_rate": 0.0007761743909233733, "loss": 1.7667, "step": 2188 }, { "epoch": 1.0036693039789015, "grad_norm": 0.2529599070549011, "learning_rate": 0.0007759679190292039, "loss": 1.2758, "step": 2189 }, { "epoch": 1.0041279669762642, "grad_norm": 0.3762117326259613, "learning_rate": 0.0007757613794381142, "loss": 1.4408, "step": 2190 }, { "epoch": 1.0045866299736268, "grad_norm": 0.17387861013412476, "learning_rate": 0.0007755547722007696, "loss": 1.2704, "step": 2191 }, { "epoch": 1.0050452929709897, "grad_norm": 0.3292040228843689, "learning_rate": 0.0007753480973678527, "loss": 1.5397, "step": 2192 }, { "epoch": 1.0055039559683523, "grad_norm": 0.2807086408138275, "learning_rate": 0.0007751413549900621, "loss": 1.2153, "step": 2193 }, { "epoch": 1.005962618965715, "grad_norm": 0.3431303799152374, "learning_rate": 0.0007749345451181132, "loss": 1.6964, "step": 2194 }, { "epoch": 1.0064212819630776, "grad_norm": 0.32860153913497925, "learning_rate": 0.0007747276678027379, "loss": 2.2512, "step": 2195 }, { "epoch": 1.0068799449604404, "grad_norm": 0.4267536401748657, "learning_rate": 0.000774520723094685, "loss": 2.2029, "step": 2196 }, { "epoch": 1.007338607957803, "grad_norm": 0.3573954403400421, "learning_rate": 0.0007743137110447194, "loss": 1.3235, "step": 2197 }, { "epoch": 1.0077972709551657, "grad_norm": 0.31035539507865906, "learning_rate": 0.0007741066317036222, "loss": 1.5737, "step": 2198 }, { "epoch": 1.0082559339525283, "grad_norm": 0.43037089705467224, "learning_rate": 0.0007738994851221921, "loss": 1.7621, "step": 2199 }, { "epoch": 1.008714596949891, "grad_norm": 0.3466168940067291, "learning_rate": 0.0007736922713512434, "loss": 2.3148, "step": 2200 }, { "epoch": 1.0091732599472538, "grad_norm": 0.3968213200569153, "learning_rate": 0.000773484990441607, "loss": 2.1924, "step": 2201 }, { "epoch": 1.0096319229446165, "grad_norm": 0.3038836419582367, "learning_rate": 0.0007732776424441307, "loss": 0.9113, "step": 2202 }, { "epoch": 1.010090585941979, "grad_norm": 0.29169583320617676, "learning_rate": 0.0007730702274096782, "loss": 1.7387, "step": 2203 }, { "epoch": 1.0105492489393417, "grad_norm": 0.37365761399269104, "learning_rate": 0.0007728627453891297, "loss": 2.0283, "step": 2204 }, { "epoch": 1.0110079119367046, "grad_norm": 0.35608237981796265, "learning_rate": 0.0007726551964333827, "loss": 0.9617, "step": 2205 }, { "epoch": 1.0114665749340672, "grad_norm": 0.3315924406051636, "learning_rate": 0.0007724475805933498, "loss": 1.8494, "step": 2206 }, { "epoch": 1.0119252379314299, "grad_norm": 0.31007975339889526, "learning_rate": 0.000772239897919961, "loss": 1.2838, "step": 2207 }, { "epoch": 1.0123839009287925, "grad_norm": 0.4036838710308075, "learning_rate": 0.0007720321484641625, "loss": 1.7657, "step": 2208 }, { "epoch": 1.0128425639261553, "grad_norm": 0.32576027512550354, "learning_rate": 0.0007718243322769163, "loss": 1.1251, "step": 2209 }, { "epoch": 1.013301226923518, "grad_norm": 0.3255734145641327, "learning_rate": 0.0007716164494092014, "loss": 1.7505, "step": 2210 }, { "epoch": 1.0137598899208806, "grad_norm": 0.2708061635494232, "learning_rate": 0.0007714084999120132, "loss": 1.2809, "step": 2211 }, { "epoch": 1.0142185529182433, "grad_norm": 0.3722837269306183, "learning_rate": 0.0007712004838363629, "loss": 0.8855, "step": 2212 }, { "epoch": 1.0146772159156061, "grad_norm": 0.19533422589302063, "learning_rate": 0.0007709924012332784, "loss": 0.9815, "step": 2213 }, { "epoch": 1.0151358789129687, "grad_norm": 0.14171691238880157, "learning_rate": 0.000770784252153804, "loss": 1.119, "step": 2214 }, { "epoch": 1.0155945419103314, "grad_norm": 0.4291399121284485, "learning_rate": 0.000770576036649, "loss": 1.8247, "step": 2215 }, { "epoch": 1.016053204907694, "grad_norm": 0.35333549976348877, "learning_rate": 0.0007703677547699435, "loss": 1.5409, "step": 2216 }, { "epoch": 1.0165118679050567, "grad_norm": 0.6529225707054138, "learning_rate": 0.000770159406567727, "loss": 1.7983, "step": 2217 }, { "epoch": 1.0169705309024195, "grad_norm": 0.1469832956790924, "learning_rate": 0.0007699509920934603, "loss": 0.5275, "step": 2218 }, { "epoch": 1.0174291938997821, "grad_norm": 0.35177433490753174, "learning_rate": 0.0007697425113982688, "loss": 2.0906, "step": 2219 }, { "epoch": 1.0178878568971448, "grad_norm": 0.3718166649341583, "learning_rate": 0.0007695339645332943, "loss": 1.5574, "step": 2220 }, { "epoch": 1.0183465198945074, "grad_norm": 0.4496723413467407, "learning_rate": 0.0007693253515496947, "loss": 1.6213, "step": 2221 }, { "epoch": 1.0188051828918703, "grad_norm": 0.3293614089488983, "learning_rate": 0.0007691166724986447, "loss": 1.3462, "step": 2222 }, { "epoch": 1.019263845889233, "grad_norm": 0.2451786994934082, "learning_rate": 0.0007689079274313342, "loss": 1.3479, "step": 2223 }, { "epoch": 1.0197225088865955, "grad_norm": 0.33322182297706604, "learning_rate": 0.0007686991163989704, "loss": 2.0833, "step": 2224 }, { "epoch": 1.0201811718839582, "grad_norm": 0.18467935919761658, "learning_rate": 0.000768490239452776, "loss": 0.8448, "step": 2225 }, { "epoch": 1.020639834881321, "grad_norm": 0.21402765810489655, "learning_rate": 0.0007682812966439896, "loss": 0.7428, "step": 2226 }, { "epoch": 1.0210984978786837, "grad_norm": 0.2612096667289734, "learning_rate": 0.0007680722880238669, "loss": 1.5559, "step": 2227 }, { "epoch": 1.0215571608760463, "grad_norm": 0.2521737217903137, "learning_rate": 0.0007678632136436792, "loss": 1.6891, "step": 2228 }, { "epoch": 1.022015823873409, "grad_norm": 0.2869427502155304, "learning_rate": 0.0007676540735547136, "loss": 0.9844, "step": 2229 }, { "epoch": 1.0224744868707718, "grad_norm": 0.17099249362945557, "learning_rate": 0.0007674448678082741, "loss": 1.3577, "step": 2230 }, { "epoch": 1.0229331498681344, "grad_norm": 0.3532930612564087, "learning_rate": 0.0007672355964556799, "loss": 1.4924, "step": 2231 }, { "epoch": 1.023391812865497, "grad_norm": 0.22827592492103577, "learning_rate": 0.000767026259548267, "loss": 0.8428, "step": 2232 }, { "epoch": 1.0238504758628597, "grad_norm": 0.19980807602405548, "learning_rate": 0.0007668168571373875, "loss": 1.0052, "step": 2233 }, { "epoch": 1.0243091388602223, "grad_norm": 0.1704528033733368, "learning_rate": 0.000766607389274409, "loss": 0.8841, "step": 2234 }, { "epoch": 1.0247678018575852, "grad_norm": 0.28992947936058044, "learning_rate": 0.0007663978560107155, "loss": 1.5616, "step": 2235 }, { "epoch": 1.0252264648549478, "grad_norm": 0.29282909631729126, "learning_rate": 0.000766188257397707, "loss": 1.7693, "step": 2236 }, { "epoch": 1.0256851278523105, "grad_norm": 0.4391837418079376, "learning_rate": 0.0007659785934867999, "loss": 2.3022, "step": 2237 }, { "epoch": 1.026143790849673, "grad_norm": 0.3151383399963379, "learning_rate": 0.0007657688643294259, "loss": 1.6134, "step": 2238 }, { "epoch": 1.026602453847036, "grad_norm": 0.264639288187027, "learning_rate": 0.0007655590699770331, "loss": 1.0672, "step": 2239 }, { "epoch": 1.0270611168443986, "grad_norm": 0.24356485903263092, "learning_rate": 0.0007653492104810858, "loss": 1.1269, "step": 2240 }, { "epoch": 1.0275197798417612, "grad_norm": 0.191898375749588, "learning_rate": 0.000765139285893064, "loss": 0.8673, "step": 2241 }, { "epoch": 1.0279784428391239, "grad_norm": 0.2663523256778717, "learning_rate": 0.0007649292962644634, "loss": 1.2664, "step": 2242 }, { "epoch": 1.0284371058364867, "grad_norm": 0.24687746167182922, "learning_rate": 0.0007647192416467962, "loss": 1.3866, "step": 2243 }, { "epoch": 1.0288957688338494, "grad_norm": 0.26128000020980835, "learning_rate": 0.0007645091220915904, "loss": 1.1718, "step": 2244 }, { "epoch": 1.029354431831212, "grad_norm": 0.3191836476325989, "learning_rate": 0.0007642989376503897, "loss": 1.4741, "step": 2245 }, { "epoch": 1.0298130948285746, "grad_norm": 0.11389821767807007, "learning_rate": 0.0007640886883747539, "loss": 1.0261, "step": 2246 }, { "epoch": 1.0302717578259375, "grad_norm": 0.4371493458747864, "learning_rate": 0.0007638783743162586, "loss": 1.3102, "step": 2247 }, { "epoch": 1.0307304208233001, "grad_norm": 0.2924779951572418, "learning_rate": 0.0007636679955264954, "loss": 1.5544, "step": 2248 }, { "epoch": 1.0311890838206628, "grad_norm": 0.29356637597084045, "learning_rate": 0.0007634575520570719, "loss": 1.4492, "step": 2249 }, { "epoch": 1.0316477468180254, "grad_norm": 0.41005346179008484, "learning_rate": 0.0007632470439596113, "loss": 1.8448, "step": 2250 }, { "epoch": 1.032106409815388, "grad_norm": 0.3051200807094574, "learning_rate": 0.0007630364712857525, "loss": 1.9664, "step": 2251 }, { "epoch": 1.032565072812751, "grad_norm": 0.29568442702293396, "learning_rate": 0.0007628258340871507, "loss": 1.519, "step": 2252 }, { "epoch": 1.0330237358101135, "grad_norm": 0.34316301345825195, "learning_rate": 0.0007626151324154768, "loss": 1.6171, "step": 2253 }, { "epoch": 1.0334823988074762, "grad_norm": 0.34773850440979004, "learning_rate": 0.0007624043663224173, "loss": 1.3993, "step": 2254 }, { "epoch": 1.0339410618048388, "grad_norm": 0.4576634466648102, "learning_rate": 0.000762193535859675, "loss": 1.3873, "step": 2255 }, { "epoch": 1.0343997248022017, "grad_norm": 0.3191813826560974, "learning_rate": 0.0007619826410789676, "loss": 1.6381, "step": 2256 }, { "epoch": 1.0348583877995643, "grad_norm": 0.29539400339126587, "learning_rate": 0.0007617716820320293, "loss": 1.3324, "step": 2257 }, { "epoch": 1.035317050796927, "grad_norm": 0.18559886515140533, "learning_rate": 0.0007615606587706101, "loss": 1.2413, "step": 2258 }, { "epoch": 1.0357757137942896, "grad_norm": 0.33654916286468506, "learning_rate": 0.0007613495713464752, "loss": 1.215, "step": 2259 }, { "epoch": 1.0362343767916524, "grad_norm": 0.2632749378681183, "learning_rate": 0.0007611384198114061, "loss": 1.8086, "step": 2260 }, { "epoch": 1.036693039789015, "grad_norm": 0.36246350407600403, "learning_rate": 0.0007609272042171998, "loss": 1.32, "step": 2261 }, { "epoch": 1.0371517027863777, "grad_norm": 0.21696679294109344, "learning_rate": 0.0007607159246156688, "loss": 1.3752, "step": 2262 }, { "epoch": 1.0376103657837403, "grad_norm": 0.20672181248664856, "learning_rate": 0.0007605045810586415, "loss": 0.3842, "step": 2263 }, { "epoch": 1.0380690287811032, "grad_norm": 0.22013358771800995, "learning_rate": 0.0007602931735979624, "loss": 0.9232, "step": 2264 }, { "epoch": 1.0385276917784658, "grad_norm": 0.2625974118709564, "learning_rate": 0.0007600817022854908, "loss": 1.6846, "step": 2265 }, { "epoch": 1.0389863547758285, "grad_norm": 0.2270309031009674, "learning_rate": 0.0007598701671731025, "loss": 0.6877, "step": 2266 }, { "epoch": 1.039445017773191, "grad_norm": 0.35904768109321594, "learning_rate": 0.0007596585683126883, "loss": 1.9496, "step": 2267 }, { "epoch": 1.0399036807705537, "grad_norm": 0.31437742710113525, "learning_rate": 0.0007594469057561551, "loss": 1.4388, "step": 2268 }, { "epoch": 1.0403623437679166, "grad_norm": 0.31415170431137085, "learning_rate": 0.0007592351795554254, "loss": 2.0281, "step": 2269 }, { "epoch": 1.0408210067652792, "grad_norm": 0.19663023948669434, "learning_rate": 0.0007590233897624367, "loss": 0.5938, "step": 2270 }, { "epoch": 1.0412796697626419, "grad_norm": 0.16666308045387268, "learning_rate": 0.0007588115364291429, "loss": 1.0012, "step": 2271 }, { "epoch": 1.0417383327600045, "grad_norm": 0.10785413533449173, "learning_rate": 0.0007585996196075131, "loss": 0.913, "step": 2272 }, { "epoch": 1.0421969957573674, "grad_norm": 0.34523460268974304, "learning_rate": 0.000758387639349532, "loss": 1.8508, "step": 2273 }, { "epoch": 1.04265565875473, "grad_norm": 0.25581368803977966, "learning_rate": 0.0007581755957072, "loss": 1.0048, "step": 2274 }, { "epoch": 1.0431143217520926, "grad_norm": 0.2878674864768982, "learning_rate": 0.0007579634887325328, "loss": 1.2516, "step": 2275 }, { "epoch": 1.0435729847494553, "grad_norm": 0.43240469694137573, "learning_rate": 0.0007577513184775617, "loss": 1.5632, "step": 2276 }, { "epoch": 1.0440316477468181, "grad_norm": 0.25301140546798706, "learning_rate": 0.0007575390849943337, "loss": 1.3549, "step": 2277 }, { "epoch": 1.0444903107441807, "grad_norm": 0.31979095935821533, "learning_rate": 0.0007573267883349114, "loss": 1.1457, "step": 2278 }, { "epoch": 1.0449489737415434, "grad_norm": 1.364774465560913, "learning_rate": 0.0007571144285513723, "loss": 2.123, "step": 2279 }, { "epoch": 1.045407636738906, "grad_norm": 0.30994194746017456, "learning_rate": 0.00075690200569581, "loss": 1.4648, "step": 2280 }, { "epoch": 1.0458662997362689, "grad_norm": 0.3474901616573334, "learning_rate": 0.0007566895198203334, "loss": 1.8733, "step": 2281 }, { "epoch": 1.0463249627336315, "grad_norm": 0.23491498827934265, "learning_rate": 0.0007564769709770667, "loss": 1.5338, "step": 2282 }, { "epoch": 1.0467836257309941, "grad_norm": 0.25132763385772705, "learning_rate": 0.0007562643592181498, "loss": 1.3498, "step": 2283 }, { "epoch": 1.0472422887283568, "grad_norm": 0.33392879366874695, "learning_rate": 0.0007560516845957377, "loss": 1.4968, "step": 2284 }, { "epoch": 1.0477009517257194, "grad_norm": 0.3323450982570648, "learning_rate": 0.0007558389471620013, "loss": 1.1117, "step": 2285 }, { "epoch": 1.0481596147230823, "grad_norm": 0.2603180408477783, "learning_rate": 0.0007556261469691264, "loss": 1.6954, "step": 2286 }, { "epoch": 1.048618277720445, "grad_norm": 0.35003596544265747, "learning_rate": 0.0007554132840693145, "loss": 1.53, "step": 2287 }, { "epoch": 1.0490769407178075, "grad_norm": 0.3515700101852417, "learning_rate": 0.0007552003585147823, "loss": 1.696, "step": 2288 }, { "epoch": 1.0495356037151702, "grad_norm": 0.2830311059951782, "learning_rate": 0.0007549873703577622, "loss": 1.8374, "step": 2289 }, { "epoch": 1.049994266712533, "grad_norm": 0.2728405296802521, "learning_rate": 0.0007547743196505014, "loss": 0.9555, "step": 2290 }, { "epoch": 1.0504529297098957, "grad_norm": 0.3142251968383789, "learning_rate": 0.0007545612064452632, "loss": 1.657, "step": 2291 }, { "epoch": 1.0509115927072583, "grad_norm": 0.2731315791606903, "learning_rate": 0.0007543480307943256, "loss": 0.8105, "step": 2292 }, { "epoch": 1.051370255704621, "grad_norm": 0.2212861180305481, "learning_rate": 0.0007541347927499818, "loss": 1.3339, "step": 2293 }, { "epoch": 1.0518289187019838, "grad_norm": 0.29469791054725647, "learning_rate": 0.0007539214923645412, "loss": 1.0019, "step": 2294 }, { "epoch": 1.0522875816993464, "grad_norm": 0.20634829998016357, "learning_rate": 0.0007537081296903277, "loss": 1.3472, "step": 2295 }, { "epoch": 1.052746244696709, "grad_norm": 0.3181491792201996, "learning_rate": 0.0007534947047796805, "loss": 2.062, "step": 2296 }, { "epoch": 1.0532049076940717, "grad_norm": 0.31653597950935364, "learning_rate": 0.0007532812176849545, "loss": 1.8488, "step": 2297 }, { "epoch": 1.0536635706914346, "grad_norm": 0.3137333393096924, "learning_rate": 0.0007530676684585194, "loss": 1.6195, "step": 2298 }, { "epoch": 1.0541222336887972, "grad_norm": 0.3669377565383911, "learning_rate": 0.0007528540571527607, "loss": 1.928, "step": 2299 }, { "epoch": 1.0545808966861598, "grad_norm": 0.35595008730888367, "learning_rate": 0.0007526403838200786, "loss": 2.1411, "step": 2300 }, { "epoch": 1.0550395596835225, "grad_norm": 0.4003475606441498, "learning_rate": 0.0007524266485128885, "loss": 1.6777, "step": 2301 }, { "epoch": 1.0554982226808853, "grad_norm": 0.2703254818916321, "learning_rate": 0.0007522128512836217, "loss": 1.7441, "step": 2302 }, { "epoch": 1.055956885678248, "grad_norm": 0.3241822123527527, "learning_rate": 0.0007519989921847236, "loss": 1.4656, "step": 2303 }, { "epoch": 1.0564155486756106, "grad_norm": 0.38216519355773926, "learning_rate": 0.0007517850712686561, "loss": 1.7497, "step": 2304 }, { "epoch": 1.0568742116729732, "grad_norm": 0.24903887510299683, "learning_rate": 0.0007515710885878948, "loss": 1.7455, "step": 2305 }, { "epoch": 1.0573328746703359, "grad_norm": 0.36446714401245117, "learning_rate": 0.0007513570441949319, "loss": 2.2563, "step": 2306 }, { "epoch": 1.0577915376676987, "grad_norm": 0.23342153429985046, "learning_rate": 0.0007511429381422734, "loss": 0.7739, "step": 2307 }, { "epoch": 1.0582502006650614, "grad_norm": 0.3443793058395386, "learning_rate": 0.0007509287704824415, "loss": 0.9252, "step": 2308 }, { "epoch": 1.058708863662424, "grad_norm": 0.4298572540283203, "learning_rate": 0.0007507145412679728, "loss": 2.2942, "step": 2309 }, { "epoch": 1.0591675266597866, "grad_norm": 0.2859693765640259, "learning_rate": 0.0007505002505514194, "loss": 1.3583, "step": 2310 }, { "epoch": 1.0596261896571495, "grad_norm": 0.35848772525787354, "learning_rate": 0.0007502858983853485, "loss": 1.5612, "step": 2311 }, { "epoch": 1.0600848526545121, "grad_norm": 0.1740964949131012, "learning_rate": 0.000750071484822342, "loss": 1.1107, "step": 2312 }, { "epoch": 1.0605435156518748, "grad_norm": 0.24920308589935303, "learning_rate": 0.000749857009914997, "loss": 0.8362, "step": 2313 }, { "epoch": 1.0610021786492374, "grad_norm": 0.07761970907449722, "learning_rate": 0.000749642473715926, "loss": 0.4695, "step": 2314 }, { "epoch": 1.0614608416466003, "grad_norm": 0.17175906896591187, "learning_rate": 0.0007494278762777562, "loss": 1.2153, "step": 2315 }, { "epoch": 1.061919504643963, "grad_norm": 0.20324799418449402, "learning_rate": 0.0007492132176531299, "loss": 0.6542, "step": 2316 }, { "epoch": 1.0623781676413255, "grad_norm": 0.3163895308971405, "learning_rate": 0.0007489984978947044, "loss": 2.1238, "step": 2317 }, { "epoch": 1.0628368306386882, "grad_norm": 0.36171120405197144, "learning_rate": 0.000748783717055152, "loss": 1.8789, "step": 2318 }, { "epoch": 1.0632954936360508, "grad_norm": 0.6624128222465515, "learning_rate": 0.0007485688751871597, "loss": 2.0256, "step": 2319 }, { "epoch": 1.0637541566334137, "grad_norm": 0.3431372046470642, "learning_rate": 0.0007483539723434305, "loss": 1.7518, "step": 2320 }, { "epoch": 1.0642128196307763, "grad_norm": 0.3786202371120453, "learning_rate": 0.0007481390085766808, "loss": 1.8253, "step": 2321 }, { "epoch": 1.064671482628139, "grad_norm": 0.30203521251678467, "learning_rate": 0.0007479239839396434, "loss": 1.3458, "step": 2322 }, { "epoch": 1.0651301456255016, "grad_norm": 0.2725338637828827, "learning_rate": 0.000747708898485065, "loss": 1.3139, "step": 2323 }, { "epoch": 1.0655888086228644, "grad_norm": 0.2869611084461212, "learning_rate": 0.0007474937522657076, "loss": 0.9489, "step": 2324 }, { "epoch": 1.066047471620227, "grad_norm": 0.4075610637664795, "learning_rate": 0.0007472785453343485, "loss": 1.8008, "step": 2325 }, { "epoch": 1.0665061346175897, "grad_norm": 0.16893206536769867, "learning_rate": 0.000747063277743779, "loss": 1.1216, "step": 2326 }, { "epoch": 1.0669647976149523, "grad_norm": 0.3267589211463928, "learning_rate": 0.0007468479495468061, "loss": 1.4798, "step": 2327 }, { "epoch": 1.0674234606123152, "grad_norm": 0.3620133101940155, "learning_rate": 0.0007466325607962516, "loss": 1.8303, "step": 2328 }, { "epoch": 1.0678821236096778, "grad_norm": 0.3057517409324646, "learning_rate": 0.0007464171115449512, "loss": 1.6888, "step": 2329 }, { "epoch": 1.0683407866070405, "grad_norm": 0.2740839719772339, "learning_rate": 0.0007462016018457568, "loss": 1.4534, "step": 2330 }, { "epoch": 1.068799449604403, "grad_norm": 0.20154106616973877, "learning_rate": 0.0007459860317515344, "loss": 1.3666, "step": 2331 }, { "epoch": 1.069258112601766, "grad_norm": 0.24330544471740723, "learning_rate": 0.0007457704013151645, "loss": 0.856, "step": 2332 }, { "epoch": 1.0697167755991286, "grad_norm": 0.23237231373786926, "learning_rate": 0.0007455547105895432, "loss": 1.5328, "step": 2333 }, { "epoch": 1.0701754385964912, "grad_norm": 0.24399161338806152, "learning_rate": 0.0007453389596275808, "loss": 1.0707, "step": 2334 }, { "epoch": 1.0706341015938539, "grad_norm": 0.2570146322250366, "learning_rate": 0.0007451231484822025, "loss": 0.9601, "step": 2335 }, { "epoch": 1.0710927645912167, "grad_norm": 0.33579733967781067, "learning_rate": 0.0007449072772063486, "loss": 1.7599, "step": 2336 }, { "epoch": 1.0715514275885794, "grad_norm": 0.3293912410736084, "learning_rate": 0.0007446913458529738, "loss": 1.2646, "step": 2337 }, { "epoch": 1.072010090585942, "grad_norm": 0.19118434190750122, "learning_rate": 0.0007444753544750475, "loss": 0.8713, "step": 2338 }, { "epoch": 1.0724687535833046, "grad_norm": 0.1878962367773056, "learning_rate": 0.0007442593031255539, "loss": 1.1241, "step": 2339 }, { "epoch": 1.0729274165806673, "grad_norm": 0.3359505534172058, "learning_rate": 0.000744043191857492, "loss": 1.3387, "step": 2340 }, { "epoch": 1.0733860795780301, "grad_norm": 0.2292228788137436, "learning_rate": 0.0007438270207238756, "loss": 1.3343, "step": 2341 }, { "epoch": 1.0738447425753928, "grad_norm": 0.41298791766166687, "learning_rate": 0.000743610789777733, "loss": 2.1614, "step": 2342 }, { "epoch": 1.0743034055727554, "grad_norm": 0.28897812962532043, "learning_rate": 0.0007433944990721071, "loss": 1.3058, "step": 2343 }, { "epoch": 1.074762068570118, "grad_norm": 0.13999620079994202, "learning_rate": 0.0007431781486600556, "loss": 0.7708, "step": 2344 }, { "epoch": 1.0752207315674809, "grad_norm": 0.259048193693161, "learning_rate": 0.0007429617385946507, "loss": 1.1908, "step": 2345 }, { "epoch": 1.0756793945648435, "grad_norm": 0.18018385767936707, "learning_rate": 0.0007427452689289795, "loss": 0.8871, "step": 2346 }, { "epoch": 1.0761380575622062, "grad_norm": 0.2923835217952728, "learning_rate": 0.0007425287397161437, "loss": 1.6891, "step": 2347 }, { "epoch": 1.0765967205595688, "grad_norm": 0.3825288414955139, "learning_rate": 0.0007423121510092593, "loss": 1.6979, "step": 2348 }, { "epoch": 1.0770553835569316, "grad_norm": 0.3247275948524475, "learning_rate": 0.000742095502861457, "loss": 1.6702, "step": 2349 }, { "epoch": 1.0775140465542943, "grad_norm": 0.30282968282699585, "learning_rate": 0.0007418787953258822, "loss": 1.756, "step": 2350 }, { "epoch": 1.077972709551657, "grad_norm": 0.3590962588787079, "learning_rate": 0.000741662028455695, "loss": 2.1335, "step": 2351 }, { "epoch": 1.0784313725490196, "grad_norm": 0.3158145546913147, "learning_rate": 0.0007414452023040697, "loss": 1.6022, "step": 2352 }, { "epoch": 1.0788900355463822, "grad_norm": 0.26424726843833923, "learning_rate": 0.0007412283169241955, "loss": 1.3064, "step": 2353 }, { "epoch": 1.079348698543745, "grad_norm": 0.3655887544155121, "learning_rate": 0.0007410113723692756, "loss": 1.7661, "step": 2354 }, { "epoch": 1.0798073615411077, "grad_norm": 0.22848142683506012, "learning_rate": 0.0007407943686925282, "loss": 0.8983, "step": 2355 }, { "epoch": 1.0802660245384703, "grad_norm": 0.2924310863018036, "learning_rate": 0.0007405773059471863, "loss": 1.9707, "step": 2356 }, { "epoch": 1.080724687535833, "grad_norm": 0.27466198801994324, "learning_rate": 0.0007403601841864964, "loss": 1.3336, "step": 2357 }, { "epoch": 1.0811833505331958, "grad_norm": 0.2992285490036011, "learning_rate": 0.0007401430034637202, "loss": 0.8565, "step": 2358 }, { "epoch": 1.0816420135305584, "grad_norm": 0.27274230122566223, "learning_rate": 0.0007399257638321338, "loss": 1.6908, "step": 2359 }, { "epoch": 1.082100676527921, "grad_norm": 0.2696456015110016, "learning_rate": 0.0007397084653450274, "loss": 0.8956, "step": 2360 }, { "epoch": 1.0825593395252837, "grad_norm": 0.18956485390663147, "learning_rate": 0.0007394911080557063, "loss": 1.3373, "step": 2361 }, { "epoch": 1.0830180025226466, "grad_norm": 0.36281585693359375, "learning_rate": 0.0007392736920174895, "loss": 1.6471, "step": 2362 }, { "epoch": 1.0834766655200092, "grad_norm": 0.30485787987709045, "learning_rate": 0.0007390562172837108, "loss": 1.2273, "step": 2363 }, { "epoch": 1.0839353285173718, "grad_norm": 0.4604003131389618, "learning_rate": 0.0007388386839077182, "loss": 1.9656, "step": 2364 }, { "epoch": 1.0843939915147345, "grad_norm": 0.21141566336154938, "learning_rate": 0.0007386210919428744, "loss": 1.2972, "step": 2365 }, { "epoch": 1.0848526545120973, "grad_norm": 0.26035094261169434, "learning_rate": 0.0007384034414425562, "loss": 1.1507, "step": 2366 }, { "epoch": 1.08531131750946, "grad_norm": 0.2898489832878113, "learning_rate": 0.000738185732460155, "loss": 1.4745, "step": 2367 }, { "epoch": 1.0857699805068226, "grad_norm": 0.3080480992794037, "learning_rate": 0.000737967965049076, "loss": 1.7976, "step": 2368 }, { "epoch": 1.0862286435041852, "grad_norm": 0.2572938799858093, "learning_rate": 0.0007377501392627394, "loss": 1.108, "step": 2369 }, { "epoch": 1.086687306501548, "grad_norm": 0.2855343520641327, "learning_rate": 0.0007375322551545794, "loss": 1.2778, "step": 2370 }, { "epoch": 1.0871459694989107, "grad_norm": 0.22971484065055847, "learning_rate": 0.0007373143127780444, "loss": 1.0277, "step": 2371 }, { "epoch": 1.0876046324962734, "grad_norm": 0.41709139943122864, "learning_rate": 0.0007370963121865974, "loss": 2.3818, "step": 2372 }, { "epoch": 1.088063295493636, "grad_norm": 0.21090003848075867, "learning_rate": 0.0007368782534337156, "loss": 1.0302, "step": 2373 }, { "epoch": 1.0885219584909986, "grad_norm": 0.2724030315876007, "learning_rate": 0.0007366601365728902, "loss": 1.4246, "step": 2374 }, { "epoch": 1.0889806214883615, "grad_norm": 0.45919355750083923, "learning_rate": 0.0007364419616576268, "loss": 2.0112, "step": 2375 }, { "epoch": 1.0894392844857241, "grad_norm": 0.4057621657848358, "learning_rate": 0.0007362237287414455, "loss": 2.0664, "step": 2376 }, { "epoch": 1.0898979474830868, "grad_norm": 0.2805062532424927, "learning_rate": 0.0007360054378778801, "loss": 1.3488, "step": 2377 }, { "epoch": 1.0903566104804494, "grad_norm": 0.38183027505874634, "learning_rate": 0.0007357870891204792, "loss": 1.8075, "step": 2378 }, { "epoch": 1.0908152734778123, "grad_norm": 0.22754809260368347, "learning_rate": 0.0007355686825228053, "loss": 0.8548, "step": 2379 }, { "epoch": 1.091273936475175, "grad_norm": 0.3018571138381958, "learning_rate": 0.0007353502181384349, "loss": 1.9176, "step": 2380 }, { "epoch": 1.0917325994725375, "grad_norm": 0.3934653103351593, "learning_rate": 0.0007351316960209591, "loss": 2.1248, "step": 2381 }, { "epoch": 1.0921912624699002, "grad_norm": 0.25040295720100403, "learning_rate": 0.0007349131162239828, "loss": 1.211, "step": 2382 }, { "epoch": 1.092649925467263, "grad_norm": 0.3955865204334259, "learning_rate": 0.0007346944788011254, "loss": 1.103, "step": 2383 }, { "epoch": 1.0931085884646257, "grad_norm": 0.406610906124115, "learning_rate": 0.0007344757838060203, "loss": 2.2019, "step": 2384 }, { "epoch": 1.0935672514619883, "grad_norm": 0.45408493280410767, "learning_rate": 0.0007342570312923143, "loss": 1.9214, "step": 2385 }, { "epoch": 1.094025914459351, "grad_norm": 0.3225853741168976, "learning_rate": 0.0007340382213136695, "loss": 1.437, "step": 2386 }, { "epoch": 1.0944845774567136, "grad_norm": 0.2830888330936432, "learning_rate": 0.0007338193539237619, "loss": 1.6927, "step": 2387 }, { "epoch": 1.0949432404540764, "grad_norm": 0.3365802466869354, "learning_rate": 0.0007336004291762807, "loss": 1.3085, "step": 2388 }, { "epoch": 1.095401903451439, "grad_norm": 0.2103971689939499, "learning_rate": 0.0007333814471249298, "loss": 1.3113, "step": 2389 }, { "epoch": 1.0958605664488017, "grad_norm": 0.3487602472305298, "learning_rate": 0.0007331624078234272, "loss": 1.2643, "step": 2390 }, { "epoch": 1.0963192294461643, "grad_norm": 0.29860758781433105, "learning_rate": 0.0007329433113255047, "loss": 1.6295, "step": 2391 }, { "epoch": 1.0967778924435272, "grad_norm": 0.21186383068561554, "learning_rate": 0.0007327241576849083, "loss": 1.4821, "step": 2392 }, { "epoch": 1.0972365554408898, "grad_norm": 0.2788501977920532, "learning_rate": 0.0007325049469553981, "loss": 1.2378, "step": 2393 }, { "epoch": 1.0976952184382525, "grad_norm": 0.19782112538814545, "learning_rate": 0.000732285679190748, "loss": 0.8517, "step": 2394 }, { "epoch": 1.098153881435615, "grad_norm": 0.3813456594944, "learning_rate": 0.0007320663544447459, "loss": 1.818, "step": 2395 }, { "epoch": 1.098612544432978, "grad_norm": 0.29733943939208984, "learning_rate": 0.0007318469727711936, "loss": 0.8673, "step": 2396 }, { "epoch": 1.0990712074303406, "grad_norm": 0.18940015137195587, "learning_rate": 0.0007316275342239074, "loss": 0.8093, "step": 2397 }, { "epoch": 1.0995298704277032, "grad_norm": 0.3412417769432068, "learning_rate": 0.0007314080388567168, "loss": 1.7512, "step": 2398 }, { "epoch": 1.0999885334250659, "grad_norm": 0.09079885482788086, "learning_rate": 0.0007311884867234658, "loss": 0.9669, "step": 2399 }, { "epoch": 1.1004471964224287, "grad_norm": 0.34120801091194153, "learning_rate": 0.0007309688778780121, "loss": 1.6829, "step": 2400 }, { "epoch": 1.1009058594197914, "grad_norm": 0.31036293506622314, "learning_rate": 0.0007307492123742271, "loss": 1.2819, "step": 2401 }, { "epoch": 1.101364522417154, "grad_norm": 0.32201525568962097, "learning_rate": 0.0007305294902659967, "loss": 1.3809, "step": 2402 }, { "epoch": 1.1018231854145166, "grad_norm": 0.09801112860441208, "learning_rate": 0.0007303097116072199, "loss": 1.0084, "step": 2403 }, { "epoch": 1.1022818484118795, "grad_norm": 0.28749385476112366, "learning_rate": 0.0007300898764518105, "loss": 1.0966, "step": 2404 }, { "epoch": 1.1027405114092421, "grad_norm": 0.22139480710029602, "learning_rate": 0.0007298699848536953, "loss": 1.0551, "step": 2405 }, { "epoch": 1.1031991744066048, "grad_norm": 0.24073892831802368, "learning_rate": 0.0007296500368668154, "loss": 1.7311, "step": 2406 }, { "epoch": 1.1036578374039674, "grad_norm": 0.3581311106681824, "learning_rate": 0.0007294300325451253, "loss": 1.576, "step": 2407 }, { "epoch": 1.10411650040133, "grad_norm": 0.2959865927696228, "learning_rate": 0.0007292099719425942, "loss": 1.2758, "step": 2408 }, { "epoch": 1.1045751633986929, "grad_norm": 0.24298930168151855, "learning_rate": 0.0007289898551132044, "loss": 1.2407, "step": 2409 }, { "epoch": 1.1050338263960555, "grad_norm": 0.24619142711162567, "learning_rate": 0.0007287696821109517, "loss": 1.2528, "step": 2410 }, { "epoch": 1.1054924893934182, "grad_norm": 0.28459224104881287, "learning_rate": 0.0007285494529898468, "loss": 2.0142, "step": 2411 }, { "epoch": 1.1059511523907808, "grad_norm": 0.295964777469635, "learning_rate": 0.0007283291678039129, "loss": 1.2126, "step": 2412 }, { "epoch": 1.1064098153881436, "grad_norm": 0.23746977746486664, "learning_rate": 0.000728108826607188, "loss": 1.3847, "step": 2413 }, { "epoch": 1.1068684783855063, "grad_norm": 0.38938137888908386, "learning_rate": 0.0007278884294537229, "loss": 2.1191, "step": 2414 }, { "epoch": 1.107327141382869, "grad_norm": 0.1922319382429123, "learning_rate": 0.0007276679763975832, "loss": 0.5424, "step": 2415 }, { "epoch": 1.1077858043802316, "grad_norm": 0.2816941738128662, "learning_rate": 0.0007274474674928472, "loss": 1.7775, "step": 2416 }, { "epoch": 1.1082444673775944, "grad_norm": 0.25834113359451294, "learning_rate": 0.0007272269027936073, "loss": 0.9534, "step": 2417 }, { "epoch": 1.108703130374957, "grad_norm": 0.49492350220680237, "learning_rate": 0.00072700628235397, "loss": 1.386, "step": 2418 }, { "epoch": 1.1091617933723197, "grad_norm": 0.18091417849063873, "learning_rate": 0.0007267856062280547, "loss": 0.86, "step": 2419 }, { "epoch": 1.1096204563696823, "grad_norm": 0.19237549602985382, "learning_rate": 0.0007265648744699951, "loss": 1.0081, "step": 2420 }, { "epoch": 1.110079119367045, "grad_norm": 0.17153310775756836, "learning_rate": 0.0007263440871339382, "loss": 0.7964, "step": 2421 }, { "epoch": 1.1105377823644078, "grad_norm": 0.22889000177383423, "learning_rate": 0.0007261232442740444, "loss": 1.6779, "step": 2422 }, { "epoch": 1.1109964453617704, "grad_norm": 0.3227975070476532, "learning_rate": 0.0007259023459444887, "loss": 1.6245, "step": 2423 }, { "epoch": 1.111455108359133, "grad_norm": 0.38517293334007263, "learning_rate": 0.0007256813921994585, "loss": 1.1709, "step": 2424 }, { "epoch": 1.1119137713564957, "grad_norm": 0.19292643666267395, "learning_rate": 0.0007254603830931555, "loss": 0.8391, "step": 2425 }, { "epoch": 1.1123724343538586, "grad_norm": 0.18455813825130463, "learning_rate": 0.000725239318679795, "loss": 0.7338, "step": 2426 }, { "epoch": 1.1128310973512212, "grad_norm": 0.27097609639167786, "learning_rate": 0.0007250181990136054, "loss": 1.8622, "step": 2427 }, { "epoch": 1.1132897603485838, "grad_norm": 0.30490532517433167, "learning_rate": 0.0007247970241488293, "loss": 1.6636, "step": 2428 }, { "epoch": 1.1137484233459465, "grad_norm": 0.27548128366470337, "learning_rate": 0.0007245757941397223, "loss": 1.4517, "step": 2429 }, { "epoch": 1.1142070863433093, "grad_norm": 0.28075456619262695, "learning_rate": 0.0007243545090405537, "loss": 1.1777, "step": 2430 }, { "epoch": 1.114665749340672, "grad_norm": 0.2504705488681793, "learning_rate": 0.0007241331689056064, "loss": 1.1601, "step": 2431 }, { "epoch": 1.1151244123380346, "grad_norm": 0.25834277272224426, "learning_rate": 0.0007239117737891765, "loss": 1.1205, "step": 2432 }, { "epoch": 1.1155830753353972, "grad_norm": 0.35737091302871704, "learning_rate": 0.0007236903237455741, "loss": 1.629, "step": 2433 }, { "epoch": 1.11604173833276, "grad_norm": 0.3298056423664093, "learning_rate": 0.0007234688188291226, "loss": 1.9195, "step": 2434 }, { "epoch": 1.1165004013301227, "grad_norm": 0.4346219003200531, "learning_rate": 0.0007232472590941582, "loss": 2.2007, "step": 2435 }, { "epoch": 1.1169590643274854, "grad_norm": 0.3693140745162964, "learning_rate": 0.0007230256445950316, "loss": 1.0917, "step": 2436 }, { "epoch": 1.117417727324848, "grad_norm": 0.3174065947532654, "learning_rate": 0.0007228039753861062, "loss": 2.0214, "step": 2437 }, { "epoch": 1.1178763903222109, "grad_norm": 0.37119260430336, "learning_rate": 0.0007225822515217593, "loss": 1.564, "step": 2438 }, { "epoch": 1.1183350533195735, "grad_norm": 0.22383145987987518, "learning_rate": 0.0007223604730563811, "loss": 1.2421, "step": 2439 }, { "epoch": 1.1187937163169361, "grad_norm": 0.34915295243263245, "learning_rate": 0.0007221386400443757, "loss": 1.7604, "step": 2440 }, { "epoch": 1.1192523793142988, "grad_norm": 0.31573063135147095, "learning_rate": 0.00072191675254016, "loss": 1.6093, "step": 2441 }, { "epoch": 1.1197110423116614, "grad_norm": 0.3130987882614136, "learning_rate": 0.0007216948105981649, "loss": 1.3731, "step": 2442 }, { "epoch": 1.1201697053090243, "grad_norm": 0.25755324959754944, "learning_rate": 0.0007214728142728342, "loss": 1.4297, "step": 2443 }, { "epoch": 1.120628368306387, "grad_norm": 0.12649127840995789, "learning_rate": 0.0007212507636186251, "loss": 0.6196, "step": 2444 }, { "epoch": 1.1210870313037495, "grad_norm": 0.20406757295131683, "learning_rate": 0.0007210286586900086, "loss": 1.4123, "step": 2445 }, { "epoch": 1.1215456943011122, "grad_norm": 0.3635023236274719, "learning_rate": 0.0007208064995414686, "loss": 1.8994, "step": 2446 }, { "epoch": 1.122004357298475, "grad_norm": 0.27390024065971375, "learning_rate": 0.0007205842862275019, "loss": 0.8153, "step": 2447 }, { "epoch": 1.1224630202958377, "grad_norm": 0.18709996342658997, "learning_rate": 0.0007203620188026193, "loss": 1.1956, "step": 2448 }, { "epoch": 1.1229216832932003, "grad_norm": 0.2258118987083435, "learning_rate": 0.0007201396973213446, "loss": 1.261, "step": 2449 }, { "epoch": 1.123380346290563, "grad_norm": 0.3747474253177643, "learning_rate": 0.000719917321838215, "loss": 1.8525, "step": 2450 }, { "epoch": 1.1238390092879258, "grad_norm": 0.4060818552970886, "learning_rate": 0.0007196948924077806, "loss": 1.8212, "step": 2451 }, { "epoch": 1.1242976722852884, "grad_norm": 0.35225608944892883, "learning_rate": 0.000719472409084605, "loss": 1.3362, "step": 2452 }, { "epoch": 1.124756335282651, "grad_norm": 0.29107534885406494, "learning_rate": 0.0007192498719232649, "loss": 1.9098, "step": 2453 }, { "epoch": 1.1252149982800137, "grad_norm": 0.35325366258621216, "learning_rate": 0.0007190272809783504, "loss": 1.4185, "step": 2454 }, { "epoch": 1.1256736612773763, "grad_norm": 0.3643795847892761, "learning_rate": 0.0007188046363044646, "loss": 1.0865, "step": 2455 }, { "epoch": 1.1261323242747392, "grad_norm": 0.10707731544971466, "learning_rate": 0.0007185819379562238, "loss": 0.8521, "step": 2456 }, { "epoch": 1.1265909872721018, "grad_norm": 0.9587938189506531, "learning_rate": 0.0007183591859882578, "loss": 2.2844, "step": 2457 }, { "epoch": 1.1270496502694645, "grad_norm": 0.2738874554634094, "learning_rate": 0.0007181363804552086, "loss": 0.9316, "step": 2458 }, { "epoch": 1.1275083132668273, "grad_norm": 0.1969241052865982, "learning_rate": 0.0007179135214117327, "loss": 1.2078, "step": 2459 }, { "epoch": 1.12796697626419, "grad_norm": 0.2931908965110779, "learning_rate": 0.0007176906089124989, "loss": 0.901, "step": 2460 }, { "epoch": 1.1284256392615526, "grad_norm": 0.26238539814949036, "learning_rate": 0.0007174676430121889, "loss": 1.839, "step": 2461 }, { "epoch": 1.1288843022589152, "grad_norm": 0.2989310026168823, "learning_rate": 0.0007172446237654981, "loss": 1.1135, "step": 2462 }, { "epoch": 1.1293429652562779, "grad_norm": 0.42721855640411377, "learning_rate": 0.0007170215512271347, "loss": 1.2623, "step": 2463 }, { "epoch": 1.1298016282536407, "grad_norm": 0.29083552956581116, "learning_rate": 0.0007167984254518199, "loss": 1.5911, "step": 2464 }, { "epoch": 1.1302602912510034, "grad_norm": 0.32871025800704956, "learning_rate": 0.0007165752464942882, "loss": 1.3376, "step": 2465 }, { "epoch": 1.130718954248366, "grad_norm": 0.28994280099868774, "learning_rate": 0.000716352014409287, "loss": 1.7526, "step": 2466 }, { "epoch": 1.1311776172457286, "grad_norm": 0.30776262283325195, "learning_rate": 0.0007161287292515766, "loss": 1.5325, "step": 2467 }, { "epoch": 1.1316362802430913, "grad_norm": 0.22005335986614227, "learning_rate": 0.0007159053910759304, "loss": 1.5527, "step": 2468 }, { "epoch": 1.1320949432404541, "grad_norm": 0.38737305998802185, "learning_rate": 0.0007156819999371353, "loss": 2.2419, "step": 2469 }, { "epoch": 1.1325536062378168, "grad_norm": 0.243751659989357, "learning_rate": 0.0007154585558899902, "loss": 0.8154, "step": 2470 }, { "epoch": 1.1330122692351794, "grad_norm": 0.18171143531799316, "learning_rate": 0.0007152350589893081, "loss": 1.5097, "step": 2471 }, { "epoch": 1.1334709322325422, "grad_norm": 0.2935083508491516, "learning_rate": 0.0007150115092899138, "loss": 1.6631, "step": 2472 }, { "epoch": 1.1339295952299049, "grad_norm": 0.2859863340854645, "learning_rate": 0.0007147879068466462, "loss": 1.7206, "step": 2473 }, { "epoch": 1.1343882582272675, "grad_norm": 0.3790501058101654, "learning_rate": 0.0007145642517143563, "loss": 1.9156, "step": 2474 }, { "epoch": 1.1348469212246302, "grad_norm": 0.23689134418964386, "learning_rate": 0.0007143405439479082, "loss": 0.707, "step": 2475 }, { "epoch": 1.1353055842219928, "grad_norm": 0.3226507008075714, "learning_rate": 0.0007141167836021793, "loss": 1.9152, "step": 2476 }, { "epoch": 1.1357642472193556, "grad_norm": 0.3560091257095337, "learning_rate": 0.0007138929707320596, "loss": 1.97, "step": 2477 }, { "epoch": 1.1362229102167183, "grad_norm": 0.3632325828075409, "learning_rate": 0.0007136691053924519, "loss": 0.7859, "step": 2478 }, { "epoch": 1.136681573214081, "grad_norm": 0.11780796200037003, "learning_rate": 0.0007134451876382719, "loss": 1.0535, "step": 2479 }, { "epoch": 1.1371402362114436, "grad_norm": 0.3758339583873749, "learning_rate": 0.0007132212175244484, "loss": 1.8105, "step": 2480 }, { "epoch": 1.1375988992088064, "grad_norm": 0.3397655785083771, "learning_rate": 0.0007129971951059229, "loss": 1.3989, "step": 2481 }, { "epoch": 1.138057562206169, "grad_norm": 0.3613055944442749, "learning_rate": 0.0007127731204376497, "loss": 1.5942, "step": 2482 }, { "epoch": 1.1385162252035317, "grad_norm": 0.3114750385284424, "learning_rate": 0.0007125489935745958, "loss": 1.447, "step": 2483 }, { "epoch": 1.1389748882008943, "grad_norm": 0.4733560085296631, "learning_rate": 0.0007123248145717412, "loss": 2.2578, "step": 2484 }, { "epoch": 1.1394335511982572, "grad_norm": 0.28705328702926636, "learning_rate": 0.0007121005834840786, "loss": 1.2581, "step": 2485 }, { "epoch": 1.1398922141956198, "grad_norm": 0.22501793503761292, "learning_rate": 0.0007118763003666137, "loss": 1.1338, "step": 2486 }, { "epoch": 1.1403508771929824, "grad_norm": 0.24394629895687103, "learning_rate": 0.0007116519652743644, "loss": 1.2005, "step": 2487 }, { "epoch": 1.140809540190345, "grad_norm": 0.32340019941329956, "learning_rate": 0.0007114275782623622, "loss": 1.2241, "step": 2488 }, { "epoch": 1.1412682031877077, "grad_norm": 0.29200488328933716, "learning_rate": 0.0007112031393856504, "loss": 0.9522, "step": 2489 }, { "epoch": 1.1417268661850706, "grad_norm": 0.11450646817684174, "learning_rate": 0.0007109786486992856, "loss": 0.9667, "step": 2490 }, { "epoch": 1.1421855291824332, "grad_norm": 0.32306355237960815, "learning_rate": 0.0007107541062583372, "loss": 0.917, "step": 2491 }, { "epoch": 1.1426441921797958, "grad_norm": 0.2934865355491638, "learning_rate": 0.0007105295121178867, "loss": 1.5807, "step": 2492 }, { "epoch": 1.1431028551771587, "grad_norm": 0.3221622705459595, "learning_rate": 0.0007103048663330291, "loss": 1.895, "step": 2493 }, { "epoch": 1.1435615181745213, "grad_norm": 0.3509625792503357, "learning_rate": 0.0007100801689588714, "loss": 1.4817, "step": 2494 }, { "epoch": 1.144020181171884, "grad_norm": 0.255881130695343, "learning_rate": 0.0007098554200505334, "loss": 1.564, "step": 2495 }, { "epoch": 1.1444788441692466, "grad_norm": 0.33859267830848694, "learning_rate": 0.0007096306196631478, "loss": 1.1945, "step": 2496 }, { "epoch": 1.1449375071666092, "grad_norm": 0.2730942964553833, "learning_rate": 0.0007094057678518597, "loss": 1.1724, "step": 2497 }, { "epoch": 1.145396170163972, "grad_norm": 0.12389812618494034, "learning_rate": 0.0007091808646718268, "loss": 1.1204, "step": 2498 }, { "epoch": 1.1458548331613347, "grad_norm": 0.3796531558036804, "learning_rate": 0.0007089559101782195, "loss": 1.622, "step": 2499 }, { "epoch": 1.1463134961586974, "grad_norm": 0.2889677584171295, "learning_rate": 0.0007087309044262206, "loss": 0.8866, "step": 2500 }, { "epoch": 1.14677215915606, "grad_norm": 0.3515912890434265, "learning_rate": 0.0007085058474710261, "loss": 1.2305, "step": 2501 }, { "epoch": 1.1472308221534229, "grad_norm": 0.2346973717212677, "learning_rate": 0.0007082807393678439, "loss": 1.3991, "step": 2502 }, { "epoch": 1.1476894851507855, "grad_norm": 0.26319047808647156, "learning_rate": 0.0007080555801718943, "loss": 1.3355, "step": 2503 }, { "epoch": 1.1481481481481481, "grad_norm": 0.38340768218040466, "learning_rate": 0.0007078303699384107, "loss": 1.7297, "step": 2504 }, { "epoch": 1.1486068111455108, "grad_norm": 0.27420321106910706, "learning_rate": 0.0007076051087226389, "loss": 1.7687, "step": 2505 }, { "epoch": 1.1490654741428736, "grad_norm": 0.36036863923072815, "learning_rate": 0.0007073797965798371, "loss": 1.2501, "step": 2506 }, { "epoch": 1.1495241371402363, "grad_norm": 0.2933008074760437, "learning_rate": 0.000707154433565276, "loss": 1.762, "step": 2507 }, { "epoch": 1.149982800137599, "grad_norm": 0.6776466369628906, "learning_rate": 0.0007069290197342385, "loss": 1.1041, "step": 2508 }, { "epoch": 1.1504414631349615, "grad_norm": 0.16171732544898987, "learning_rate": 0.0007067035551420205, "loss": 0.8364, "step": 2509 }, { "epoch": 1.1509001261323242, "grad_norm": 0.28582391142845154, "learning_rate": 0.0007064780398439299, "loss": 1.4814, "step": 2510 }, { "epoch": 1.151358789129687, "grad_norm": 0.31193310022354126, "learning_rate": 0.0007062524738952875, "loss": 1.2991, "step": 2511 }, { "epoch": 1.1518174521270497, "grad_norm": 0.28195393085479736, "learning_rate": 0.0007060268573514259, "loss": 1.8887, "step": 2512 }, { "epoch": 1.1522761151244123, "grad_norm": 0.30946487188339233, "learning_rate": 0.0007058011902676909, "loss": 1.2166, "step": 2513 }, { "epoch": 1.152734778121775, "grad_norm": 0.3007708787918091, "learning_rate": 0.0007055754726994399, "loss": 1.4958, "step": 2514 }, { "epoch": 1.1531934411191378, "grad_norm": 0.21293707191944122, "learning_rate": 0.0007053497047020432, "loss": 1.0453, "step": 2515 }, { "epoch": 1.1536521041165004, "grad_norm": 0.32221704721450806, "learning_rate": 0.0007051238863308832, "loss": 1.4675, "step": 2516 }, { "epoch": 1.154110767113863, "grad_norm": 0.2842558026313782, "learning_rate": 0.0007048980176413549, "loss": 1.9645, "step": 2517 }, { "epoch": 1.1545694301112257, "grad_norm": 0.3128332793712616, "learning_rate": 0.0007046720986888656, "loss": 1.3461, "step": 2518 }, { "epoch": 1.1550280931085886, "grad_norm": 0.3141840994358063, "learning_rate": 0.0007044461295288347, "loss": 2.0339, "step": 2519 }, { "epoch": 1.1554867561059512, "grad_norm": 0.39887315034866333, "learning_rate": 0.0007042201102166939, "loss": 2.0115, "step": 2520 }, { "epoch": 1.1559454191033138, "grad_norm": 0.3459334075450897, "learning_rate": 0.0007039940408078878, "loss": 1.922, "step": 2521 }, { "epoch": 1.1564040821006765, "grad_norm": 0.31680259108543396, "learning_rate": 0.0007037679213578725, "loss": 1.6561, "step": 2522 }, { "epoch": 1.156862745098039, "grad_norm": 0.27092963457107544, "learning_rate": 0.0007035417519221168, "loss": 1.2754, "step": 2523 }, { "epoch": 1.157321408095402, "grad_norm": 0.23691010475158691, "learning_rate": 0.0007033155325561018, "loss": 1.4512, "step": 2524 }, { "epoch": 1.1577800710927646, "grad_norm": 0.3520990014076233, "learning_rate": 0.0007030892633153208, "loss": 1.3797, "step": 2525 }, { "epoch": 1.1582387340901272, "grad_norm": 0.19260728359222412, "learning_rate": 0.0007028629442552788, "loss": 1.3934, "step": 2526 }, { "epoch": 1.15869739708749, "grad_norm": 0.31958216428756714, "learning_rate": 0.0007026365754314943, "loss": 1.1669, "step": 2527 }, { "epoch": 1.1591560600848527, "grad_norm": 0.1669234186410904, "learning_rate": 0.0007024101568994965, "loss": 1.2692, "step": 2528 }, { "epoch": 1.1596147230822154, "grad_norm": 0.3692995011806488, "learning_rate": 0.0007021836887148278, "loss": 1.7561, "step": 2529 }, { "epoch": 1.160073386079578, "grad_norm": 0.21594902873039246, "learning_rate": 0.0007019571709330425, "loss": 1.4732, "step": 2530 }, { "epoch": 1.1605320490769406, "grad_norm": 0.3213531970977783, "learning_rate": 0.0007017306036097068, "loss": 1.6332, "step": 2531 }, { "epoch": 1.1609907120743035, "grad_norm": 0.34258440136909485, "learning_rate": 0.0007015039868003998, "loss": 1.9807, "step": 2532 }, { "epoch": 1.1614493750716661, "grad_norm": 0.4435928165912628, "learning_rate": 0.0007012773205607117, "loss": 1.7593, "step": 2533 }, { "epoch": 1.1619080380690288, "grad_norm": 0.2438933551311493, "learning_rate": 0.0007010506049462456, "loss": 1.1445, "step": 2534 }, { "epoch": 1.1623667010663914, "grad_norm": 0.32633253931999207, "learning_rate": 0.0007008238400126165, "loss": 2.1003, "step": 2535 }, { "epoch": 1.1628253640637543, "grad_norm": 0.3241181969642639, "learning_rate": 0.0007005970258154514, "loss": 1.5915, "step": 2536 }, { "epoch": 1.1632840270611169, "grad_norm": 0.2975384593009949, "learning_rate": 0.0007003701624103895, "loss": 1.5494, "step": 2537 }, { "epoch": 1.1637426900584795, "grad_norm": 0.26780200004577637, "learning_rate": 0.000700143249853082, "loss": 1.3053, "step": 2538 }, { "epoch": 1.1642013530558422, "grad_norm": 0.26362505555152893, "learning_rate": 0.0006999162881991922, "loss": 1.3801, "step": 2539 }, { "epoch": 1.164660016053205, "grad_norm": 0.21280516684055328, "learning_rate": 0.0006996892775043955, "loss": 1.0339, "step": 2540 }, { "epoch": 1.1651186790505677, "grad_norm": 0.2693462073802948, "learning_rate": 0.0006994622178243792, "loss": 1.1786, "step": 2541 }, { "epoch": 1.1655773420479303, "grad_norm": 0.18628975749015808, "learning_rate": 0.0006992351092148426, "loss": 1.4404, "step": 2542 }, { "epoch": 1.166036005045293, "grad_norm": 0.41851601004600525, "learning_rate": 0.0006990079517314971, "loss": 1.853, "step": 2543 }, { "epoch": 1.1664946680426556, "grad_norm": 0.3848247826099396, "learning_rate": 0.0006987807454300662, "loss": 1.8879, "step": 2544 }, { "epoch": 1.1669533310400184, "grad_norm": 0.258331835269928, "learning_rate": 0.0006985534903662851, "loss": 1.3988, "step": 2545 }, { "epoch": 1.167411994037381, "grad_norm": 0.30951839685440063, "learning_rate": 0.0006983261865959011, "loss": 1.3336, "step": 2546 }, { "epoch": 1.1678706570347437, "grad_norm": 0.2942619025707245, "learning_rate": 0.0006980988341746737, "loss": 1.7388, "step": 2547 }, { "epoch": 1.1683293200321063, "grad_norm": 0.2846639156341553, "learning_rate": 0.0006978714331583739, "loss": 1.2854, "step": 2548 }, { "epoch": 1.1687879830294692, "grad_norm": 0.3299280107021332, "learning_rate": 0.0006976439836027848, "loss": 2.272, "step": 2549 }, { "epoch": 1.1692466460268318, "grad_norm": 0.2958674132823944, "learning_rate": 0.0006974164855637015, "loss": 1.6334, "step": 2550 }, { "epoch": 1.1697053090241945, "grad_norm": 0.43015730381011963, "learning_rate": 0.0006971889390969307, "loss": 1.6784, "step": 2551 }, { "epoch": 1.170163972021557, "grad_norm": 0.1926140934228897, "learning_rate": 0.0006969613442582914, "loss": 1.0963, "step": 2552 }, { "epoch": 1.17062263501892, "grad_norm": 0.26699936389923096, "learning_rate": 0.0006967337011036141, "loss": 1.2809, "step": 2553 }, { "epoch": 1.1710812980162826, "grad_norm": 0.34958434104919434, "learning_rate": 0.0006965060096887414, "loss": 2.0635, "step": 2554 }, { "epoch": 1.1715399610136452, "grad_norm": 0.3384473919868469, "learning_rate": 0.0006962782700695278, "loss": 1.3294, "step": 2555 }, { "epoch": 1.1719986240110079, "grad_norm": 0.3625926375389099, "learning_rate": 0.0006960504823018392, "loss": 1.9546, "step": 2556 }, { "epoch": 1.1724572870083705, "grad_norm": 0.23835191130638123, "learning_rate": 0.0006958226464415537, "loss": 1.3571, "step": 2557 }, { "epoch": 1.1729159500057333, "grad_norm": 0.38838183879852295, "learning_rate": 0.0006955947625445611, "loss": 1.9731, "step": 2558 }, { "epoch": 1.173374613003096, "grad_norm": 0.40480363368988037, "learning_rate": 0.000695366830666763, "loss": 2.1235, "step": 2559 }, { "epoch": 1.1738332760004586, "grad_norm": 0.32704851031303406, "learning_rate": 0.0006951388508640725, "loss": 1.7461, "step": 2560 }, { "epoch": 1.1742919389978215, "grad_norm": 0.2514033615589142, "learning_rate": 0.000694910823192415, "loss": 0.8705, "step": 2561 }, { "epoch": 1.174750601995184, "grad_norm": 0.20851144194602966, "learning_rate": 0.0006946827477077271, "loss": 1.0728, "step": 2562 }, { "epoch": 1.1752092649925467, "grad_norm": 0.1491597592830658, "learning_rate": 0.0006944546244659575, "loss": 0.7351, "step": 2563 }, { "epoch": 1.1756679279899094, "grad_norm": 0.2879053056240082, "learning_rate": 0.0006942264535230665, "loss": 1.2752, "step": 2564 }, { "epoch": 1.176126590987272, "grad_norm": 0.4318799078464508, "learning_rate": 0.0006939982349350259, "loss": 1.8137, "step": 2565 }, { "epoch": 1.1765852539846349, "grad_norm": 0.23562239110469818, "learning_rate": 0.0006937699687578195, "loss": 1.4907, "step": 2566 }, { "epoch": 1.1770439169819975, "grad_norm": 0.22170042991638184, "learning_rate": 0.0006935416550474426, "loss": 1.2719, "step": 2567 }, { "epoch": 1.1775025799793601, "grad_norm": 0.3226730227470398, "learning_rate": 0.0006933132938599022, "loss": 1.6284, "step": 2568 }, { "epoch": 1.1779612429767228, "grad_norm": 0.2588571310043335, "learning_rate": 0.000693084885251217, "loss": 0.9474, "step": 2569 }, { "epoch": 1.1784199059740856, "grad_norm": 0.331263929605484, "learning_rate": 0.0006928564292774173, "loss": 2.2437, "step": 2570 }, { "epoch": 1.1788785689714483, "grad_norm": 0.37038788199424744, "learning_rate": 0.0006926279259945447, "loss": 1.5394, "step": 2571 }, { "epoch": 1.179337231968811, "grad_norm": 0.21661770343780518, "learning_rate": 0.0006923993754586532, "loss": 1.4075, "step": 2572 }, { "epoch": 1.1797958949661735, "grad_norm": 0.24760504066944122, "learning_rate": 0.0006921707777258073, "loss": 0.8289, "step": 2573 }, { "epoch": 1.1802545579635364, "grad_norm": 0.29299482703208923, "learning_rate": 0.0006919421328520844, "loss": 1.6813, "step": 2574 }, { "epoch": 1.180713220960899, "grad_norm": 0.21871504187583923, "learning_rate": 0.0006917134408935721, "loss": 1.2732, "step": 2575 }, { "epoch": 1.1811718839582617, "grad_norm": 0.31473806500434875, "learning_rate": 0.0006914847019063705, "loss": 0.5663, "step": 2576 }, { "epoch": 1.1816305469556243, "grad_norm": 0.2423599660396576, "learning_rate": 0.0006912559159465908, "loss": 0.8704, "step": 2577 }, { "epoch": 1.182089209952987, "grad_norm": 0.3309333026409149, "learning_rate": 0.0006910270830703559, "loss": 1.4814, "step": 2578 }, { "epoch": 1.1825478729503498, "grad_norm": 0.281486451625824, "learning_rate": 0.0006907982033338001, "loss": 1.5132, "step": 2579 }, { "epoch": 1.1830065359477124, "grad_norm": 0.3493801951408386, "learning_rate": 0.0006905692767930695, "loss": 1.7191, "step": 2580 }, { "epoch": 1.183465198945075, "grad_norm": 0.3665405213832855, "learning_rate": 0.000690340303504321, "loss": 2.2778, "step": 2581 }, { "epoch": 1.1839238619424377, "grad_norm": 0.36640089750289917, "learning_rate": 0.0006901112835237237, "loss": 1.8275, "step": 2582 }, { "epoch": 1.1843825249398006, "grad_norm": 0.33539673686027527, "learning_rate": 0.0006898822169074577, "loss": 1.6515, "step": 2583 }, { "epoch": 1.1848411879371632, "grad_norm": 0.3045608401298523, "learning_rate": 0.0006896531037117148, "loss": 1.5253, "step": 2584 }, { "epoch": 1.1852998509345258, "grad_norm": 0.18760670721530914, "learning_rate": 0.0006894239439926981, "loss": 0.6896, "step": 2585 }, { "epoch": 1.1857585139318885, "grad_norm": 0.11621358245611191, "learning_rate": 0.0006891947378066223, "loss": 0.916, "step": 2586 }, { "epoch": 1.1862171769292513, "grad_norm": 0.24042093753814697, "learning_rate": 0.000688965485209713, "loss": 0.7641, "step": 2587 }, { "epoch": 1.186675839926614, "grad_norm": 0.1797315925359726, "learning_rate": 0.0006887361862582077, "loss": 1.1713, "step": 2588 }, { "epoch": 1.1871345029239766, "grad_norm": 0.305295467376709, "learning_rate": 0.000688506841008355, "loss": 1.2055, "step": 2589 }, { "epoch": 1.1875931659213392, "grad_norm": 0.18166819214820862, "learning_rate": 0.000688277449516415, "loss": 1.0945, "step": 2590 }, { "epoch": 1.1880518289187019, "grad_norm": 0.23136307299137115, "learning_rate": 0.0006880480118386592, "loss": 0.8324, "step": 2591 }, { "epoch": 1.1885104919160647, "grad_norm": 0.22618937492370605, "learning_rate": 0.00068781852803137, "loss": 1.1397, "step": 2592 }, { "epoch": 1.1889691549134274, "grad_norm": 0.30645951628685, "learning_rate": 0.0006875889981508416, "loss": 1.4277, "step": 2593 }, { "epoch": 1.18942781791079, "grad_norm": 0.3529110848903656, "learning_rate": 0.0006873594222533796, "loss": 1.6997, "step": 2594 }, { "epoch": 1.1898864809081529, "grad_norm": 0.32532528042793274, "learning_rate": 0.0006871298003953004, "loss": 1.8083, "step": 2595 }, { "epoch": 1.1903451439055155, "grad_norm": 0.3486868143081665, "learning_rate": 0.0006869001326329317, "loss": 0.871, "step": 2596 }, { "epoch": 1.1908038069028781, "grad_norm": 0.2799234092235565, "learning_rate": 0.0006866704190226131, "loss": 1.4758, "step": 2597 }, { "epoch": 1.1912624699002408, "grad_norm": 0.17606878280639648, "learning_rate": 0.0006864406596206945, "loss": 1.1527, "step": 2598 }, { "epoch": 1.1917211328976034, "grad_norm": 0.32030197978019714, "learning_rate": 0.0006862108544835379, "loss": 1.5411, "step": 2599 }, { "epoch": 1.1921797958949663, "grad_norm": 0.2235921025276184, "learning_rate": 0.000685981003667516, "loss": 0.9424, "step": 2600 }, { "epoch": 1.192638458892329, "grad_norm": 0.39481407403945923, "learning_rate": 0.0006857511072290128, "loss": 2.1982, "step": 2601 }, { "epoch": 1.1930971218896915, "grad_norm": 0.2962379455566406, "learning_rate": 0.0006855211652244238, "loss": 1.0606, "step": 2602 }, { "epoch": 1.1935557848870542, "grad_norm": 0.2588828504085541, "learning_rate": 0.0006852911777101553, "loss": 1.2921, "step": 2603 }, { "epoch": 1.194014447884417, "grad_norm": 0.09400691092014313, "learning_rate": 0.0006850611447426248, "loss": 0.9105, "step": 2604 }, { "epoch": 1.1944731108817797, "grad_norm": 0.299527645111084, "learning_rate": 0.0006848310663782613, "loss": 1.4135, "step": 2605 }, { "epoch": 1.1949317738791423, "grad_norm": 0.33210527896881104, "learning_rate": 0.0006846009426735045, "loss": 1.6572, "step": 2606 }, { "epoch": 1.195390436876505, "grad_norm": 0.3115682303905487, "learning_rate": 0.0006843707736848052, "loss": 1.3729, "step": 2607 }, { "epoch": 1.1958490998738678, "grad_norm": 0.356736958026886, "learning_rate": 0.0006841405594686259, "loss": 1.7662, "step": 2608 }, { "epoch": 1.1963077628712304, "grad_norm": 0.2792001962661743, "learning_rate": 0.0006839103000814397, "loss": 1.5021, "step": 2609 }, { "epoch": 1.196766425868593, "grad_norm": 0.35029587149620056, "learning_rate": 0.0006836799955797306, "loss": 2.1519, "step": 2610 }, { "epoch": 1.1972250888659557, "grad_norm": 0.24187533557415009, "learning_rate": 0.0006834496460199944, "loss": 0.7456, "step": 2611 }, { "epoch": 1.1976837518633183, "grad_norm": 0.12775690853595734, "learning_rate": 0.0006832192514587372, "loss": 0.6849, "step": 2612 }, { "epoch": 1.1981424148606812, "grad_norm": 0.23146137595176697, "learning_rate": 0.0006829888119524765, "loss": 1.2602, "step": 2613 }, { "epoch": 1.1986010778580438, "grad_norm": 0.3723481595516205, "learning_rate": 0.0006827583275577409, "loss": 1.2093, "step": 2614 }, { "epoch": 1.1990597408554065, "grad_norm": 0.2874572277069092, "learning_rate": 0.0006825277983310697, "loss": 1.7144, "step": 2615 }, { "epoch": 1.199518403852769, "grad_norm": 0.32732078433036804, "learning_rate": 0.0006822972243290136, "loss": 1.6793, "step": 2616 }, { "epoch": 1.199977066850132, "grad_norm": 0.28288617730140686, "learning_rate": 0.0006820666056081339, "loss": 1.5604, "step": 2617 }, { "epoch": 1.2004357298474946, "grad_norm": 0.18358604609966278, "learning_rate": 0.000681835942225003, "loss": 0.4566, "step": 2618 }, { "epoch": 1.2008943928448572, "grad_norm": 0.260851114988327, "learning_rate": 0.0006816052342362045, "loss": 0.9449, "step": 2619 }, { "epoch": 1.2013530558422199, "grad_norm": 0.22726760804653168, "learning_rate": 0.0006813744816983324, "loss": 0.879, "step": 2620 }, { "epoch": 1.2018117188395827, "grad_norm": 0.229889914393425, "learning_rate": 0.0006811436846679923, "loss": 0.899, "step": 2621 }, { "epoch": 1.2022703818369453, "grad_norm": 0.30879998207092285, "learning_rate": 0.0006809128432018003, "loss": 1.7426, "step": 2622 }, { "epoch": 1.202729044834308, "grad_norm": 0.1792958378791809, "learning_rate": 0.0006806819573563832, "loss": 0.9025, "step": 2623 }, { "epoch": 1.2031877078316706, "grad_norm": 0.4172597825527191, "learning_rate": 0.0006804510271883793, "loss": 2.02, "step": 2624 }, { "epoch": 1.2036463708290333, "grad_norm": 0.26834097504615784, "learning_rate": 0.0006802200527544374, "loss": 1.1736, "step": 2625 }, { "epoch": 1.204105033826396, "grad_norm": 0.2656348645687103, "learning_rate": 0.000679989034111217, "loss": 1.2722, "step": 2626 }, { "epoch": 1.2045636968237587, "grad_norm": 0.22897177934646606, "learning_rate": 0.0006797579713153888, "loss": 1.1406, "step": 2627 }, { "epoch": 1.2050223598211214, "grad_norm": 0.2911318838596344, "learning_rate": 0.0006795268644236341, "loss": 0.839, "step": 2628 }, { "epoch": 1.2054810228184842, "grad_norm": 1.6645565032958984, "learning_rate": 0.000679295713492645, "loss": 1.858, "step": 2629 }, { "epoch": 1.2059396858158469, "grad_norm": 0.32228031754493713, "learning_rate": 0.0006790645185791247, "loss": 2.0117, "step": 2630 }, { "epoch": 1.2063983488132095, "grad_norm": 0.1701250672340393, "learning_rate": 0.0006788332797397868, "loss": 0.4305, "step": 2631 }, { "epoch": 1.2068570118105721, "grad_norm": 0.3705667555332184, "learning_rate": 0.0006786019970313559, "loss": 1.7415, "step": 2632 }, { "epoch": 1.2073156748079348, "grad_norm": 0.12469058483839035, "learning_rate": 0.0006783706705105675, "loss": 1.3766, "step": 2633 }, { "epoch": 1.2077743378052976, "grad_norm": 0.5770121216773987, "learning_rate": 0.0006781393002341674, "loss": 1.9946, "step": 2634 }, { "epoch": 1.2082330008026603, "grad_norm": 0.2683452069759369, "learning_rate": 0.0006779078862589126, "loss": 1.5091, "step": 2635 }, { "epoch": 1.208691663800023, "grad_norm": 0.3430030643939972, "learning_rate": 0.0006776764286415704, "loss": 1.6704, "step": 2636 }, { "epoch": 1.2091503267973855, "grad_norm": 0.3244686424732208, "learning_rate": 0.0006774449274389192, "loss": 1.5453, "step": 2637 }, { "epoch": 1.2096089897947484, "grad_norm": 0.31932759284973145, "learning_rate": 0.0006772133827077478, "loss": 1.8912, "step": 2638 }, { "epoch": 1.210067652792111, "grad_norm": 0.3642272353172302, "learning_rate": 0.0006769817945048558, "loss": 1.7648, "step": 2639 }, { "epoch": 1.2105263157894737, "grad_norm": 0.40499117970466614, "learning_rate": 0.0006767501628870536, "loss": 1.9568, "step": 2640 }, { "epoch": 1.2109849787868363, "grad_norm": 0.2826281189918518, "learning_rate": 0.0006765184879111621, "loss": 1.1599, "step": 2641 }, { "epoch": 1.2114436417841992, "grad_norm": 0.2369105964899063, "learning_rate": 0.0006762867696340126, "loss": 1.1869, "step": 2642 }, { "epoch": 1.2119023047815618, "grad_norm": 0.34914588928222656, "learning_rate": 0.0006760550081124475, "loss": 1.6072, "step": 2643 }, { "epoch": 1.2123609677789244, "grad_norm": 0.8818534016609192, "learning_rate": 0.0006758232034033193, "loss": 2.145, "step": 2644 }, { "epoch": 1.212819630776287, "grad_norm": 0.3968670070171356, "learning_rate": 0.0006755913555634918, "loss": 1.8677, "step": 2645 }, { "epoch": 1.2132782937736497, "grad_norm": 0.20291972160339355, "learning_rate": 0.0006753594646498384, "loss": 0.8382, "step": 2646 }, { "epoch": 1.2137369567710126, "grad_norm": 0.3209551274776459, "learning_rate": 0.0006751275307192442, "loss": 1.3622, "step": 2647 }, { "epoch": 1.2141956197683752, "grad_norm": 0.12200617045164108, "learning_rate": 0.000674895553828604, "loss": 0.7502, "step": 2648 }, { "epoch": 1.2146542827657378, "grad_norm": 0.3413892090320587, "learning_rate": 0.0006746635340348232, "loss": 2.4104, "step": 2649 }, { "epoch": 1.2151129457631005, "grad_norm": 0.3582800626754761, "learning_rate": 0.0006744314713948182, "loss": 1.6539, "step": 2650 }, { "epoch": 1.2155716087604633, "grad_norm": 0.386416494846344, "learning_rate": 0.0006741993659655155, "loss": 1.6932, "step": 2651 }, { "epoch": 1.216030271757826, "grad_norm": 0.3171131908893585, "learning_rate": 0.0006739672178038522, "loss": 1.5858, "step": 2652 }, { "epoch": 1.2164889347551886, "grad_norm": 0.35482868552207947, "learning_rate": 0.0006737350269667763, "loss": 1.8237, "step": 2653 }, { "epoch": 1.2169475977525512, "grad_norm": 0.3112960457801819, "learning_rate": 0.0006735027935112454, "loss": 1.3225, "step": 2654 }, { "epoch": 1.217406260749914, "grad_norm": 0.3752141296863556, "learning_rate": 0.0006732705174942283, "loss": 1.3334, "step": 2655 }, { "epoch": 1.2178649237472767, "grad_norm": 0.29117441177368164, "learning_rate": 0.0006730381989727038, "loss": 1.148, "step": 2656 }, { "epoch": 1.2183235867446394, "grad_norm": 0.19909793138504028, "learning_rate": 0.0006728058380036614, "loss": 1.4814, "step": 2657 }, { "epoch": 1.218782249742002, "grad_norm": 0.2802788317203522, "learning_rate": 0.0006725734346441013, "loss": 1.1915, "step": 2658 }, { "epoch": 1.2192409127393646, "grad_norm": 0.34183627367019653, "learning_rate": 0.000672340988951033, "loss": 1.6281, "step": 2659 }, { "epoch": 1.2196995757367275, "grad_norm": 0.09204282611608505, "learning_rate": 0.0006721085009814775, "loss": 0.5768, "step": 2660 }, { "epoch": 1.2201582387340901, "grad_norm": 0.20829735696315765, "learning_rate": 0.0006718759707924658, "loss": 1.2005, "step": 2661 }, { "epoch": 1.2206169017314528, "grad_norm": 0.23320595920085907, "learning_rate": 0.0006716433984410391, "loss": 1.4799, "step": 2662 }, { "epoch": 1.2210755647288156, "grad_norm": 0.446114718914032, "learning_rate": 0.0006714107839842493, "loss": 1.2414, "step": 2663 }, { "epoch": 1.2215342277261783, "grad_norm": 0.26882967352867126, "learning_rate": 0.0006711781274791582, "loss": 1.344, "step": 2664 }, { "epoch": 1.221992890723541, "grad_norm": 0.20352056622505188, "learning_rate": 0.000670945428982838, "loss": 1.0079, "step": 2665 }, { "epoch": 1.2224515537209035, "grad_norm": 0.23894716799259186, "learning_rate": 0.0006707126885523717, "loss": 1.8065, "step": 2666 }, { "epoch": 1.2229102167182662, "grad_norm": 0.3363592028617859, "learning_rate": 0.000670479906244852, "loss": 1.2569, "step": 2667 }, { "epoch": 1.223368879715629, "grad_norm": 0.30896368622779846, "learning_rate": 0.000670247082117382, "loss": 1.9127, "step": 2668 }, { "epoch": 1.2238275427129917, "grad_norm": 0.21522556245326996, "learning_rate": 0.0006700142162270753, "loss": 1.5338, "step": 2669 }, { "epoch": 1.2242862057103543, "grad_norm": 0.30787453055381775, "learning_rate": 0.0006697813086310553, "loss": 1.3445, "step": 2670 }, { "epoch": 1.224744868707717, "grad_norm": 0.2880922853946686, "learning_rate": 0.0006695483593864562, "loss": 1.2805, "step": 2671 }, { "epoch": 1.2252035317050798, "grad_norm": 0.2538045346736908, "learning_rate": 0.0006693153685504221, "loss": 1.4706, "step": 2672 }, { "epoch": 1.2256621947024424, "grad_norm": 0.2669422924518585, "learning_rate": 0.0006690823361801073, "loss": 0.8488, "step": 2673 }, { "epoch": 1.226120857699805, "grad_norm": 0.09968861192464828, "learning_rate": 0.0006688492623326762, "loss": 0.6257, "step": 2674 }, { "epoch": 1.2265795206971677, "grad_norm": 0.06321458518505096, "learning_rate": 0.0006686161470653036, "loss": 0.8595, "step": 2675 }, { "epoch": 1.2270381836945305, "grad_norm": 0.3460922837257385, "learning_rate": 0.0006683829904351742, "loss": 2.0936, "step": 2676 }, { "epoch": 1.2274968466918932, "grad_norm": 0.28384023904800415, "learning_rate": 0.0006681497924994834, "loss": 0.7923, "step": 2677 }, { "epoch": 1.2279555096892558, "grad_norm": 0.22630473971366882, "learning_rate": 0.000667916553315436, "loss": 1.5095, "step": 2678 }, { "epoch": 1.2284141726866185, "grad_norm": 0.3462603688240051, "learning_rate": 0.0006676832729402472, "loss": 1.7954, "step": 2679 }, { "epoch": 1.228872835683981, "grad_norm": 0.18489515781402588, "learning_rate": 0.0006674499514311426, "loss": 0.5131, "step": 2680 }, { "epoch": 1.229331498681344, "grad_norm": 0.2563524544239044, "learning_rate": 0.0006672165888453576, "loss": 1.455, "step": 2681 }, { "epoch": 1.2297901616787066, "grad_norm": 0.28500133752822876, "learning_rate": 0.0006669831852401375, "loss": 1.4278, "step": 2682 }, { "epoch": 1.2302488246760692, "grad_norm": 0.21729940176010132, "learning_rate": 0.0006667497406727382, "loss": 1.6448, "step": 2683 }, { "epoch": 1.2307074876734319, "grad_norm": 0.32196274399757385, "learning_rate": 0.0006665162552004251, "loss": 1.5613, "step": 2684 }, { "epoch": 1.2311661506707947, "grad_norm": 0.2639254927635193, "learning_rate": 0.0006662827288804739, "loss": 1.1826, "step": 2685 }, { "epoch": 1.2316248136681573, "grad_norm": 0.3229156732559204, "learning_rate": 0.0006660491617701704, "loss": 2.0408, "step": 2686 }, { "epoch": 1.23208347666552, "grad_norm": 0.3566708266735077, "learning_rate": 0.0006658155539268103, "loss": 1.7527, "step": 2687 }, { "epoch": 1.2325421396628826, "grad_norm": 0.09745471924543381, "learning_rate": 0.0006655819054076991, "loss": 0.9554, "step": 2688 }, { "epoch": 1.2330008026602455, "grad_norm": 0.37350574135780334, "learning_rate": 0.0006653482162701528, "loss": 1.5167, "step": 2689 }, { "epoch": 1.2334594656576081, "grad_norm": 0.3267020285129547, "learning_rate": 0.0006651144865714966, "loss": 2.0956, "step": 2690 }, { "epoch": 1.2339181286549707, "grad_norm": 0.32065388560295105, "learning_rate": 0.0006648807163690664, "loss": 1.6762, "step": 2691 }, { "epoch": 1.2343767916523334, "grad_norm": 0.5290999412536621, "learning_rate": 0.0006646469057202076, "loss": 1.6888, "step": 2692 }, { "epoch": 1.234835454649696, "grad_norm": 0.3192092478275299, "learning_rate": 0.0006644130546822757, "loss": 1.5339, "step": 2693 }, { "epoch": 1.2352941176470589, "grad_norm": 0.3551962077617645, "learning_rate": 0.000664179163312636, "loss": 1.5905, "step": 2694 }, { "epoch": 1.2357527806444215, "grad_norm": 0.2710552513599396, "learning_rate": 0.0006639452316686638, "loss": 1.394, "step": 2695 }, { "epoch": 1.2362114436417841, "grad_norm": 0.3438358008861542, "learning_rate": 0.0006637112598077441, "loss": 1.373, "step": 2696 }, { "epoch": 1.236670106639147, "grad_norm": 0.33121544122695923, "learning_rate": 0.0006634772477872719, "loss": 1.8278, "step": 2697 }, { "epoch": 1.2371287696365096, "grad_norm": 0.2892778217792511, "learning_rate": 0.0006632431956646522, "loss": 1.3161, "step": 2698 }, { "epoch": 1.2375874326338723, "grad_norm": 0.3857962191104889, "learning_rate": 0.0006630091034972995, "loss": 1.9475, "step": 2699 }, { "epoch": 1.238046095631235, "grad_norm": 0.2920449376106262, "learning_rate": 0.0006627749713426385, "loss": 1.6476, "step": 2700 }, { "epoch": 1.2385047586285975, "grad_norm": 0.29545149207115173, "learning_rate": 0.000662540799258103, "loss": 1.2706, "step": 2701 }, { "epoch": 1.2389634216259604, "grad_norm": 0.32311832904815674, "learning_rate": 0.0006623065873011378, "loss": 0.8628, "step": 2702 }, { "epoch": 1.239422084623323, "grad_norm": 0.20221565663814545, "learning_rate": 0.0006620723355291963, "loss": 0.8695, "step": 2703 }, { "epoch": 1.2398807476206857, "grad_norm": 0.3797789216041565, "learning_rate": 0.0006618380439997423, "loss": 1.8011, "step": 2704 }, { "epoch": 1.2403394106180483, "grad_norm": 0.27936404943466187, "learning_rate": 0.0006616037127702493, "loss": 1.7474, "step": 2705 }, { "epoch": 1.2407980736154112, "grad_norm": 0.305891752243042, "learning_rate": 0.0006613693418982002, "loss": 1.4064, "step": 2706 }, { "epoch": 1.2412567366127738, "grad_norm": 0.22525465488433838, "learning_rate": 0.0006611349314410881, "loss": 1.634, "step": 2707 }, { "epoch": 1.2417153996101364, "grad_norm": 0.2716008126735687, "learning_rate": 0.0006609004814564154, "loss": 1.3457, "step": 2708 }, { "epoch": 1.242174062607499, "grad_norm": 0.28558602929115295, "learning_rate": 0.0006606659920016945, "loss": 1.4251, "step": 2709 }, { "epoch": 1.242632725604862, "grad_norm": 0.36208319664001465, "learning_rate": 0.0006604314631344472, "loss": 1.4049, "step": 2710 }, { "epoch": 1.2430913886022246, "grad_norm": 0.3267166316509247, "learning_rate": 0.0006601968949122053, "loss": 1.9824, "step": 2711 }, { "epoch": 1.2435500515995872, "grad_norm": 0.22049808502197266, "learning_rate": 0.00065996228739251, "loss": 0.8636, "step": 2712 }, { "epoch": 1.2440087145969498, "grad_norm": 0.17521077394485474, "learning_rate": 0.0006597276406329122, "loss": 1.3998, "step": 2713 }, { "epoch": 1.2444673775943125, "grad_norm": 0.4500933289527893, "learning_rate": 0.0006594929546909725, "loss": 1.4056, "step": 2714 }, { "epoch": 1.2449260405916753, "grad_norm": 0.37240904569625854, "learning_rate": 0.0006592582296242609, "loss": 1.4709, "step": 2715 }, { "epoch": 1.245384703589038, "grad_norm": 0.3110402226448059, "learning_rate": 0.0006590234654903574, "loss": 1.8608, "step": 2716 }, { "epoch": 1.2458433665864006, "grad_norm": 0.2775774896144867, "learning_rate": 0.0006587886623468511, "loss": 1.3253, "step": 2717 }, { "epoch": 1.2463020295837632, "grad_norm": 0.2369251698255539, "learning_rate": 0.000658553820251341, "loss": 0.7903, "step": 2718 }, { "epoch": 1.246760692581126, "grad_norm": 0.25416409969329834, "learning_rate": 0.0006583189392614356, "loss": 1.2426, "step": 2719 }, { "epoch": 1.2472193555784887, "grad_norm": 0.36366981267929077, "learning_rate": 0.0006580840194347529, "loss": 2.1185, "step": 2720 }, { "epoch": 1.2476780185758514, "grad_norm": 0.21823568642139435, "learning_rate": 0.0006578490608289204, "loss": 0.8336, "step": 2721 }, { "epoch": 1.248136681573214, "grad_norm": 0.2580203115940094, "learning_rate": 0.0006576140635015749, "loss": 1.243, "step": 2722 }, { "epoch": 1.2485953445705769, "grad_norm": 0.19676536321640015, "learning_rate": 0.0006573790275103635, "loss": 1.3928, "step": 2723 }, { "epoch": 1.2490540075679395, "grad_norm": 0.3913279175758362, "learning_rate": 0.0006571439529129417, "loss": 1.6071, "step": 2724 }, { "epoch": 1.2495126705653021, "grad_norm": 0.33274513483047485, "learning_rate": 0.0006569088397669752, "loss": 1.9368, "step": 2725 }, { "epoch": 1.2499713335626648, "grad_norm": 0.3166465759277344, "learning_rate": 0.0006566736881301389, "loss": 1.193, "step": 2726 }, { "epoch": 1.2504299965600274, "grad_norm": 0.12209037691354752, "learning_rate": 0.0006564384980601172, "loss": 0.7718, "step": 2727 }, { "epoch": 1.2508886595573903, "grad_norm": 0.19902729988098145, "learning_rate": 0.000656203269614604, "loss": 0.83, "step": 2728 }, { "epoch": 1.251347322554753, "grad_norm": 0.23850427567958832, "learning_rate": 0.0006559680028513027, "loss": 1.1411, "step": 2729 }, { "epoch": 1.2518059855521155, "grad_norm": 0.22274386882781982, "learning_rate": 0.0006557326978279255, "loss": 1.2014, "step": 2730 }, { "epoch": 1.2522646485494784, "grad_norm": 0.2648008465766907, "learning_rate": 0.0006554973546021946, "loss": 1.1293, "step": 2731 }, { "epoch": 1.252723311546841, "grad_norm": 0.20041297376155853, "learning_rate": 0.0006552619732318414, "loss": 0.9521, "step": 2732 }, { "epoch": 1.2531819745442037, "grad_norm": 0.3079836666584015, "learning_rate": 0.0006550265537746068, "loss": 1.9308, "step": 2733 }, { "epoch": 1.2536406375415663, "grad_norm": 0.3721216917037964, "learning_rate": 0.0006547910962882407, "loss": 1.7333, "step": 2734 }, { "epoch": 1.254099300538929, "grad_norm": 0.37531670928001404, "learning_rate": 0.0006545556008305025, "loss": 1.4659, "step": 2735 }, { "epoch": 1.2545579635362918, "grad_norm": 0.24703913927078247, "learning_rate": 0.0006543200674591611, "loss": 1.6251, "step": 2736 }, { "epoch": 1.2550166265336544, "grad_norm": 0.3122749924659729, "learning_rate": 0.0006540844962319944, "loss": 0.7767, "step": 2737 }, { "epoch": 1.255475289531017, "grad_norm": 0.41236305236816406, "learning_rate": 0.0006538488872067899, "loss": 2.0791, "step": 2738 }, { "epoch": 1.2559339525283797, "grad_norm": 0.3451601266860962, "learning_rate": 0.000653613240441344, "loss": 1.5688, "step": 2739 }, { "epoch": 1.2563926155257423, "grad_norm": 0.27274802327156067, "learning_rate": 0.0006533775559934624, "loss": 1.2577, "step": 2740 }, { "epoch": 1.2568512785231052, "grad_norm": 0.42158329486846924, "learning_rate": 0.0006531418339209607, "loss": 1.9504, "step": 2741 }, { "epoch": 1.2573099415204678, "grad_norm": 0.336901992559433, "learning_rate": 0.0006529060742816627, "loss": 1.6164, "step": 2742 }, { "epoch": 1.2577686045178305, "grad_norm": 0.2214047908782959, "learning_rate": 0.0006526702771334023, "loss": 1.2476, "step": 2743 }, { "epoch": 1.2582272675151933, "grad_norm": 0.2765026092529297, "learning_rate": 0.000652434442534022, "loss": 1.2798, "step": 2744 }, { "epoch": 1.258685930512556, "grad_norm": 0.4691556692123413, "learning_rate": 0.0006521985705413741, "loss": 2.1313, "step": 2745 }, { "epoch": 1.2591445935099186, "grad_norm": 0.2610776126384735, "learning_rate": 0.0006519626612133192, "loss": 0.9233, "step": 2746 }, { "epoch": 1.2596032565072812, "grad_norm": 0.26943257451057434, "learning_rate": 0.0006517267146077279, "loss": 1.618, "step": 2747 }, { "epoch": 1.2600619195046439, "grad_norm": 0.3554578125476837, "learning_rate": 0.0006514907307824794, "loss": 1.9045, "step": 2748 }, { "epoch": 1.2605205825020067, "grad_norm": 0.28835994005203247, "learning_rate": 0.0006512547097954624, "loss": 0.8852, "step": 2749 }, { "epoch": 1.2609792454993694, "grad_norm": 0.12801244854927063, "learning_rate": 0.0006510186517045744, "loss": 0.7682, "step": 2750 }, { "epoch": 1.261437908496732, "grad_norm": 0.3506546914577484, "learning_rate": 0.0006507825565677225, "loss": 2.2786, "step": 2751 }, { "epoch": 1.2618965714940948, "grad_norm": 0.36096346378326416, "learning_rate": 0.0006505464244428219, "loss": 1.8541, "step": 2752 }, { "epoch": 1.2623552344914575, "grad_norm": 0.18552398681640625, "learning_rate": 0.0006503102553877982, "loss": 0.4271, "step": 2753 }, { "epoch": 1.2628138974888201, "grad_norm": 0.11175768822431564, "learning_rate": 0.0006500740494605848, "loss": 0.7008, "step": 2754 }, { "epoch": 1.2632725604861827, "grad_norm": 0.34600672125816345, "learning_rate": 0.0006498378067191252, "loss": 2.1968, "step": 2755 }, { "epoch": 1.2637312234835454, "grad_norm": 0.32166171073913574, "learning_rate": 0.0006496015272213711, "loss": 1.4522, "step": 2756 }, { "epoch": 1.2641898864809082, "grad_norm": 0.3662795126438141, "learning_rate": 0.0006493652110252838, "loss": 2.3257, "step": 2757 }, { "epoch": 1.2646485494782709, "grad_norm": 0.2909261882305145, "learning_rate": 0.000649128858188833, "loss": 1.1611, "step": 2758 }, { "epoch": 1.2651072124756335, "grad_norm": 0.4382424056529999, "learning_rate": 0.0006488924687699983, "loss": 1.4101, "step": 2759 }, { "epoch": 1.2655658754729961, "grad_norm": 0.17091111838817596, "learning_rate": 0.0006486560428267674, "loss": 0.8147, "step": 2760 }, { "epoch": 1.2660245384703588, "grad_norm": 0.3439600169658661, "learning_rate": 0.0006484195804171371, "loss": 1.7435, "step": 2761 }, { "epoch": 1.2664832014677216, "grad_norm": 0.3564503490924835, "learning_rate": 0.0006481830815991138, "loss": 1.1747, "step": 2762 }, { "epoch": 1.2669418644650843, "grad_norm": 0.2912631034851074, "learning_rate": 0.000647946546430712, "loss": 1.6711, "step": 2763 }, { "epoch": 1.267400527462447, "grad_norm": 0.4116171598434448, "learning_rate": 0.0006477099749699557, "loss": 2.1626, "step": 2764 }, { "epoch": 1.2678591904598098, "grad_norm": 0.2863385081291199, "learning_rate": 0.0006474733672748775, "loss": 0.9852, "step": 2765 }, { "epoch": 1.2683178534571724, "grad_norm": 0.30499163269996643, "learning_rate": 0.0006472367234035189, "loss": 1.5042, "step": 2766 }, { "epoch": 1.268776516454535, "grad_norm": 0.20567883551120758, "learning_rate": 0.0006470000434139306, "loss": 0.7004, "step": 2767 }, { "epoch": 1.2692351794518977, "grad_norm": 0.1941119283437729, "learning_rate": 0.0006467633273641714, "loss": 1.2625, "step": 2768 }, { "epoch": 1.2696938424492603, "grad_norm": 0.33352068066596985, "learning_rate": 0.00064652657531231, "loss": 1.3027, "step": 2769 }, { "epoch": 1.2701525054466232, "grad_norm": 0.23452205955982208, "learning_rate": 0.0006462897873164232, "loss": 1.4857, "step": 2770 }, { "epoch": 1.2706111684439858, "grad_norm": 0.3402770757675171, "learning_rate": 0.0006460529634345967, "loss": 1.4496, "step": 2771 }, { "epoch": 1.2710698314413484, "grad_norm": 0.25416603684425354, "learning_rate": 0.000645816103724925, "loss": 1.1967, "step": 2772 }, { "epoch": 1.271528494438711, "grad_norm": 0.29919010400772095, "learning_rate": 0.0006455792082455118, "loss": 1.3418, "step": 2773 }, { "epoch": 1.2719871574360737, "grad_norm": 0.2903623580932617, "learning_rate": 0.0006453422770544692, "loss": 1.3553, "step": 2774 }, { "epoch": 1.2724458204334366, "grad_norm": 0.478496253490448, "learning_rate": 0.0006451053102099181, "loss": 2.0164, "step": 2775 }, { "epoch": 1.2729044834307992, "grad_norm": 0.2910837233066559, "learning_rate": 0.000644868307769988, "loss": 1.1557, "step": 2776 }, { "epoch": 1.2733631464281618, "grad_norm": 0.19006802141666412, "learning_rate": 0.0006446312697928176, "loss": 1.3286, "step": 2777 }, { "epoch": 1.2738218094255247, "grad_norm": 0.23939000070095062, "learning_rate": 0.0006443941963365539, "loss": 0.9206, "step": 2778 }, { "epoch": 1.2742804724228873, "grad_norm": 0.34608039259910583, "learning_rate": 0.0006441570874593524, "loss": 2.2271, "step": 2779 }, { "epoch": 1.27473913542025, "grad_norm": 0.3570116460323334, "learning_rate": 0.0006439199432193782, "loss": 1.7172, "step": 2780 }, { "epoch": 1.2751977984176126, "grad_norm": 0.21899329125881195, "learning_rate": 0.0006436827636748042, "loss": 1.1881, "step": 2781 }, { "epoch": 1.2756564614149752, "grad_norm": 0.2552841305732727, "learning_rate": 0.0006434455488838121, "loss": 1.5012, "step": 2782 }, { "epoch": 1.276115124412338, "grad_norm": 0.3439258635044098, "learning_rate": 0.0006432082989045926, "loss": 1.7224, "step": 2783 }, { "epoch": 1.2765737874097007, "grad_norm": 0.12296123802661896, "learning_rate": 0.000642971013795345, "loss": 1.0084, "step": 2784 }, { "epoch": 1.2770324504070634, "grad_norm": 0.37332117557525635, "learning_rate": 0.0006427336936142766, "loss": 1.8006, "step": 2785 }, { "epoch": 1.2774911134044262, "grad_norm": 0.1939849704504013, "learning_rate": 0.0006424963384196041, "loss": 0.8541, "step": 2786 }, { "epoch": 1.2779497764017889, "grad_norm": 0.3263346552848816, "learning_rate": 0.0006422589482695523, "loss": 1.8838, "step": 2787 }, { "epoch": 1.2784084393991515, "grad_norm": 0.07417495548725128, "learning_rate": 0.0006420215232223548, "loss": 0.3335, "step": 2788 }, { "epoch": 1.2788671023965141, "grad_norm": 0.27612733840942383, "learning_rate": 0.0006417840633362535, "loss": 1.3939, "step": 2789 }, { "epoch": 1.2793257653938768, "grad_norm": 0.31349727511405945, "learning_rate": 0.0006415465686694993, "loss": 1.7898, "step": 2790 }, { "epoch": 1.2797844283912396, "grad_norm": 0.3115302622318268, "learning_rate": 0.0006413090392803511, "loss": 1.4323, "step": 2791 }, { "epoch": 1.2802430913886023, "grad_norm": 0.29694634675979614, "learning_rate": 0.0006410714752270769, "loss": 1.7373, "step": 2792 }, { "epoch": 1.280701754385965, "grad_norm": 0.2605624496936798, "learning_rate": 0.0006408338765679525, "loss": 1.2168, "step": 2793 }, { "epoch": 1.2811604173833275, "grad_norm": 0.4056708514690399, "learning_rate": 0.0006405962433612625, "loss": 1.701, "step": 2794 }, { "epoch": 1.2816190803806902, "grad_norm": 0.30486351251602173, "learning_rate": 0.0006403585756653004, "loss": 0.8638, "step": 2795 }, { "epoch": 1.282077743378053, "grad_norm": 0.27140724658966064, "learning_rate": 0.0006401208735383677, "loss": 1.5908, "step": 2796 }, { "epoch": 1.2825364063754157, "grad_norm": 0.30335965752601624, "learning_rate": 0.0006398831370387744, "loss": 1.1826, "step": 2797 }, { "epoch": 1.2829950693727783, "grad_norm": 0.22256600856781006, "learning_rate": 0.0006396453662248391, "loss": 1.0292, "step": 2798 }, { "epoch": 1.2834537323701412, "grad_norm": 0.34704825282096863, "learning_rate": 0.0006394075611548882, "loss": 1.5649, "step": 2799 }, { "epoch": 1.2839123953675038, "grad_norm": 0.4331229031085968, "learning_rate": 0.0006391697218872576, "loss": 1.5117, "step": 2800 }, { "epoch": 1.2843710583648664, "grad_norm": 0.25863364338874817, "learning_rate": 0.0006389318484802908, "loss": 1.5621, "step": 2801 }, { "epoch": 1.284829721362229, "grad_norm": 0.31081312894821167, "learning_rate": 0.0006386939409923398, "loss": 1.7057, "step": 2802 }, { "epoch": 1.2852883843595917, "grad_norm": 0.3220570683479309, "learning_rate": 0.0006384559994817649, "loss": 0.9301, "step": 2803 }, { "epoch": 1.2857470473569546, "grad_norm": 0.32526329159736633, "learning_rate": 0.0006382180240069352, "loss": 2.1208, "step": 2804 }, { "epoch": 1.2862057103543172, "grad_norm": 0.33022022247314453, "learning_rate": 0.0006379800146262274, "loss": 1.1948, "step": 2805 }, { "epoch": 1.2866643733516798, "grad_norm": 0.28297072649002075, "learning_rate": 0.0006377419713980274, "loss": 1.351, "step": 2806 }, { "epoch": 1.2871230363490425, "grad_norm": 0.23854339122772217, "learning_rate": 0.0006375038943807284, "loss": 1.46, "step": 2807 }, { "epoch": 1.287581699346405, "grad_norm": 0.2918485999107361, "learning_rate": 0.0006372657836327328, "loss": 0.8088, "step": 2808 }, { "epoch": 1.288040362343768, "grad_norm": 0.19841410219669342, "learning_rate": 0.0006370276392124506, "loss": 1.2504, "step": 2809 }, { "epoch": 1.2884990253411306, "grad_norm": 0.28576546907424927, "learning_rate": 0.0006367894611783006, "loss": 1.4295, "step": 2810 }, { "epoch": 1.2889576883384932, "grad_norm": 0.32226133346557617, "learning_rate": 0.0006365512495887094, "loss": 1.6712, "step": 2811 }, { "epoch": 1.289416351335856, "grad_norm": 0.29555824398994446, "learning_rate": 0.0006363130045021121, "loss": 1.2164, "step": 2812 }, { "epoch": 1.2898750143332187, "grad_norm": 0.17073696851730347, "learning_rate": 0.0006360747259769521, "loss": 0.9707, "step": 2813 }, { "epoch": 1.2903336773305814, "grad_norm": 0.19965320825576782, "learning_rate": 0.0006358364140716805, "loss": 1.3948, "step": 2814 }, { "epoch": 1.290792340327944, "grad_norm": 0.2776814103126526, "learning_rate": 0.0006355980688447571, "loss": 1.6216, "step": 2815 }, { "epoch": 1.2912510033253066, "grad_norm": 0.29604098200798035, "learning_rate": 0.0006353596903546497, "loss": 1.0638, "step": 2816 }, { "epoch": 1.2917096663226695, "grad_norm": 0.2199414223432541, "learning_rate": 0.0006351212786598341, "loss": 1.8243, "step": 2817 }, { "epoch": 1.2921683293200321, "grad_norm": 0.4345381259918213, "learning_rate": 0.0006348828338187949, "loss": 2.1404, "step": 2818 }, { "epoch": 1.2926269923173948, "grad_norm": 0.303279310464859, "learning_rate": 0.0006346443558900239, "loss": 1.6801, "step": 2819 }, { "epoch": 1.2930856553147576, "grad_norm": 0.32503482699394226, "learning_rate": 0.0006344058449320215, "loss": 1.7594, "step": 2820 }, { "epoch": 1.2935443183121202, "grad_norm": 0.2687481641769409, "learning_rate": 0.0006341673010032962, "loss": 0.9268, "step": 2821 }, { "epoch": 1.2940029813094829, "grad_norm": 0.21548904478549957, "learning_rate": 0.0006339287241623646, "loss": 1.2141, "step": 2822 }, { "epoch": 1.2944616443068455, "grad_norm": 0.2961852550506592, "learning_rate": 0.0006336901144677514, "loss": 1.4321, "step": 2823 }, { "epoch": 1.2949203073042082, "grad_norm": 0.3688824474811554, "learning_rate": 0.0006334514719779891, "loss": 1.7928, "step": 2824 }, { "epoch": 1.295378970301571, "grad_norm": 0.3493098318576813, "learning_rate": 0.0006332127967516182, "loss": 1.9301, "step": 2825 }, { "epoch": 1.2958376332989336, "grad_norm": 0.344375878572464, "learning_rate": 0.0006329740888471881, "loss": 1.4666, "step": 2826 }, { "epoch": 1.2962962962962963, "grad_norm": 0.36221787333488464, "learning_rate": 0.000632735348323255, "loss": 1.4453, "step": 2827 }, { "epoch": 1.296754959293659, "grad_norm": 0.3239736557006836, "learning_rate": 0.000632496575238384, "loss": 1.926, "step": 2828 }, { "epoch": 1.2972136222910216, "grad_norm": 0.20216989517211914, "learning_rate": 0.0006322577696511479, "loss": 0.4995, "step": 2829 }, { "epoch": 1.2976722852883844, "grad_norm": 0.713234007358551, "learning_rate": 0.000632018931620127, "loss": 1.2388, "step": 2830 }, { "epoch": 1.298130948285747, "grad_norm": 0.39462095499038696, "learning_rate": 0.0006317800612039103, "loss": 1.3626, "step": 2831 }, { "epoch": 1.2985896112831097, "grad_norm": 0.3811722993850708, "learning_rate": 0.0006315411584610946, "loss": 1.5548, "step": 2832 }, { "epoch": 1.2990482742804725, "grad_norm": 0.1306716352701187, "learning_rate": 0.0006313022234502841, "loss": 1.1012, "step": 2833 }, { "epoch": 1.2995069372778352, "grad_norm": 0.37927016615867615, "learning_rate": 0.0006310632562300917, "loss": 1.6273, "step": 2834 }, { "epoch": 1.2999656002751978, "grad_norm": 0.19377583265304565, "learning_rate": 0.0006308242568591371, "loss": 1.2111, "step": 2835 }, { "epoch": 1.3004242632725604, "grad_norm": 0.3456363379955292, "learning_rate": 0.0006305852253960493, "loss": 2.2551, "step": 2836 }, { "epoch": 1.300882926269923, "grad_norm": 0.3247735798358917, "learning_rate": 0.0006303461618994642, "loss": 1.5282, "step": 2837 }, { "epoch": 1.301341589267286, "grad_norm": 0.22547945380210876, "learning_rate": 0.0006301070664280255, "loss": 1.5934, "step": 2838 }, { "epoch": 1.3018002522646486, "grad_norm": 0.26010963320732117, "learning_rate": 0.0006298679390403854, "loss": 1.6635, "step": 2839 }, { "epoch": 1.3022589152620112, "grad_norm": 0.2988232672214508, "learning_rate": 0.0006296287797952034, "loss": 1.1585, "step": 2840 }, { "epoch": 1.3027175782593738, "grad_norm": 0.3130618631839752, "learning_rate": 0.0006293895887511472, "loss": 2.3552, "step": 2841 }, { "epoch": 1.3031762412567365, "grad_norm": 0.40085339546203613, "learning_rate": 0.0006291503659668916, "loss": 1.7498, "step": 2842 }, { "epoch": 1.3036349042540993, "grad_norm": 0.3021315336227417, "learning_rate": 0.0006289111115011204, "loss": 1.6539, "step": 2843 }, { "epoch": 1.304093567251462, "grad_norm": 0.32519641518592834, "learning_rate": 0.0006286718254125238, "loss": 1.626, "step": 2844 }, { "epoch": 1.3045522302488246, "grad_norm": 0.34999242424964905, "learning_rate": 0.0006284325077598008, "loss": 1.4192, "step": 2845 }, { "epoch": 1.3050108932461875, "grad_norm": 0.48127058148384094, "learning_rate": 0.0006281931586016576, "loss": 1.2933, "step": 2846 }, { "epoch": 1.30546955624355, "grad_norm": 0.3110017776489258, "learning_rate": 0.0006279537779968082, "loss": 1.4093, "step": 2847 }, { "epoch": 1.3059282192409127, "grad_norm": 0.28830286860466003, "learning_rate": 0.0006277143660039746, "loss": 1.553, "step": 2848 }, { "epoch": 1.3063868822382754, "grad_norm": 0.29233160614967346, "learning_rate": 0.000627474922681886, "loss": 1.988, "step": 2849 }, { "epoch": 1.306845545235638, "grad_norm": 0.3408198058605194, "learning_rate": 0.0006272354480892797, "loss": 1.4908, "step": 2850 }, { "epoch": 1.3073042082330009, "grad_norm": 0.3165978491306305, "learning_rate": 0.0006269959422849007, "loss": 1.6967, "step": 2851 }, { "epoch": 1.3077628712303635, "grad_norm": 0.2932069003582001, "learning_rate": 0.0006267564053275012, "loss": 1.9072, "step": 2852 }, { "epoch": 1.3082215342277261, "grad_norm": 0.35463714599609375, "learning_rate": 0.0006265168372758417, "loss": 1.5883, "step": 2853 }, { "epoch": 1.308680197225089, "grad_norm": 0.32562148571014404, "learning_rate": 0.0006262772381886896, "loss": 1.4199, "step": 2854 }, { "epoch": 1.3091388602224516, "grad_norm": 0.3598746955394745, "learning_rate": 0.0006260376081248205, "loss": 1.2422, "step": 2855 }, { "epoch": 1.3095975232198143, "grad_norm": 0.23722459375858307, "learning_rate": 0.0006257979471430174, "loss": 1.5364, "step": 2856 }, { "epoch": 1.310056186217177, "grad_norm": 0.25034475326538086, "learning_rate": 0.0006255582553020708, "loss": 1.2079, "step": 2857 }, { "epoch": 1.3105148492145395, "grad_norm": 0.3492996096611023, "learning_rate": 0.000625318532660779, "loss": 1.614, "step": 2858 }, { "epoch": 1.3109735122119024, "grad_norm": 0.3420841097831726, "learning_rate": 0.0006250787792779473, "loss": 1.7344, "step": 2859 }, { "epoch": 1.311432175209265, "grad_norm": 0.3418578505516052, "learning_rate": 0.0006248389952123895, "loss": 2.0039, "step": 2860 }, { "epoch": 1.3118908382066277, "grad_norm": 0.4057673513889313, "learning_rate": 0.0006245991805229256, "loss": 1.4351, "step": 2861 }, { "epoch": 1.3123495012039903, "grad_norm": 0.1766156405210495, "learning_rate": 0.0006243593352683848, "loss": 0.7945, "step": 2862 }, { "epoch": 1.312808164201353, "grad_norm": 0.32402530312538147, "learning_rate": 0.0006241194595076019, "loss": 1.3118, "step": 2863 }, { "epoch": 1.3132668271987158, "grad_norm": 0.24747517704963684, "learning_rate": 0.000623879553299421, "loss": 1.4999, "step": 2864 }, { "epoch": 1.3137254901960784, "grad_norm": 0.4034758508205414, "learning_rate": 0.0006236396167026923, "loss": 1.2577, "step": 2865 }, { "epoch": 1.314184153193441, "grad_norm": 0.2698371410369873, "learning_rate": 0.0006233996497762741, "loss": 1.4708, "step": 2866 }, { "epoch": 1.314642816190804, "grad_norm": 0.26158544421195984, "learning_rate": 0.000623159652579032, "loss": 1.4747, "step": 2867 }, { "epoch": 1.3151014791881666, "grad_norm": 0.3202878534793854, "learning_rate": 0.0006229196251698394, "loss": 1.4017, "step": 2868 }, { "epoch": 1.3155601421855292, "grad_norm": 0.33199355006217957, "learning_rate": 0.0006226795676075762, "loss": 1.305, "step": 2869 }, { "epoch": 1.3160188051828918, "grad_norm": 0.29111596941947937, "learning_rate": 0.0006224394799511306, "loss": 1.4327, "step": 2870 }, { "epoch": 1.3164774681802545, "grad_norm": 0.2958594858646393, "learning_rate": 0.0006221993622593975, "loss": 1.2742, "step": 2871 }, { "epoch": 1.3169361311776173, "grad_norm": 0.08179383724927902, "learning_rate": 0.00062195921459128, "loss": 0.8383, "step": 2872 }, { "epoch": 1.31739479417498, "grad_norm": 0.3127470314502716, "learning_rate": 0.0006217190370056876, "loss": 1.7184, "step": 2873 }, { "epoch": 1.3178534571723426, "grad_norm": 0.2972666621208191, "learning_rate": 0.0006214788295615381, "loss": 1.6077, "step": 2874 }, { "epoch": 1.3183121201697052, "grad_norm": 0.3870674669742584, "learning_rate": 0.0006212385923177556, "loss": 1.2744, "step": 2875 }, { "epoch": 1.3187707831670679, "grad_norm": 0.2808718979358673, "learning_rate": 0.0006209983253332721, "loss": 1.2932, "step": 2876 }, { "epoch": 1.3192294461644307, "grad_norm": 0.2264634668827057, "learning_rate": 0.0006207580286670271, "loss": 1.3213, "step": 2877 }, { "epoch": 1.3196881091617934, "grad_norm": 0.3030881881713867, "learning_rate": 0.0006205177023779669, "loss": 1.1255, "step": 2878 }, { "epoch": 1.320146772159156, "grad_norm": 0.3098306953907013, "learning_rate": 0.0006202773465250456, "loss": 1.3304, "step": 2879 }, { "epoch": 1.3206054351565188, "grad_norm": 0.26796406507492065, "learning_rate": 0.0006200369611672236, "loss": 1.2411, "step": 2880 }, { "epoch": 1.3210640981538815, "grad_norm": 0.20453116297721863, "learning_rate": 0.0006197965463634696, "loss": 0.9436, "step": 2881 }, { "epoch": 1.3215227611512441, "grad_norm": 0.22560381889343262, "learning_rate": 0.000619556102172759, "loss": 1.673, "step": 2882 }, { "epoch": 1.3219814241486068, "grad_norm": 0.24689380824565887, "learning_rate": 0.0006193156286540745, "loss": 0.8455, "step": 2883 }, { "epoch": 1.3224400871459694, "grad_norm": 0.24661903083324432, "learning_rate": 0.000619075125866406, "loss": 0.9642, "step": 2884 }, { "epoch": 1.3228987501433322, "grad_norm": 0.2800322473049164, "learning_rate": 0.0006188345938687506, "loss": 1.3403, "step": 2885 }, { "epoch": 1.3233574131406949, "grad_norm": 0.21044687926769257, "learning_rate": 0.0006185940327201125, "loss": 0.7411, "step": 2886 }, { "epoch": 1.3238160761380575, "grad_norm": 0.319131463766098, "learning_rate": 0.000618353442479503, "loss": 1.756, "step": 2887 }, { "epoch": 1.3242747391354204, "grad_norm": 0.37177905440330505, "learning_rate": 0.0006181128232059407, "loss": 1.2766, "step": 2888 }, { "epoch": 1.324733402132783, "grad_norm": 0.30548402667045593, "learning_rate": 0.0006178721749584512, "loss": 1.2389, "step": 2889 }, { "epoch": 1.3251920651301456, "grad_norm": 0.30189475417137146, "learning_rate": 0.0006176314977960673, "loss": 1.2825, "step": 2890 }, { "epoch": 1.3256507281275083, "grad_norm": 0.11458157002925873, "learning_rate": 0.0006173907917778288, "loss": 0.5948, "step": 2891 }, { "epoch": 1.326109391124871, "grad_norm": 0.2670215666294098, "learning_rate": 0.0006171500569627827, "loss": 0.8963, "step": 2892 }, { "epoch": 1.3265680541222338, "grad_norm": 0.23610422015190125, "learning_rate": 0.0006169092934099828, "loss": 1.4451, "step": 2893 }, { "epoch": 1.3270267171195964, "grad_norm": 0.3228674530982971, "learning_rate": 0.0006166685011784904, "loss": 1.6901, "step": 2894 }, { "epoch": 1.327485380116959, "grad_norm": 0.323810875415802, "learning_rate": 0.0006164276803273734, "loss": 1.6255, "step": 2895 }, { "epoch": 1.3279440431143217, "grad_norm": 0.2585465908050537, "learning_rate": 0.000616186830915707, "loss": 0.8431, "step": 2896 }, { "epoch": 1.3284027061116843, "grad_norm": 0.2836966812610626, "learning_rate": 0.000615945953002573, "loss": 1.4153, "step": 2897 }, { "epoch": 1.3288613691090472, "grad_norm": 0.3975668251514435, "learning_rate": 0.0006157050466470608, "loss": 1.78, "step": 2898 }, { "epoch": 1.3293200321064098, "grad_norm": 0.3772067129611969, "learning_rate": 0.0006154641119082666, "loss": 2.2615, "step": 2899 }, { "epoch": 1.3297786951037724, "grad_norm": 0.39629077911376953, "learning_rate": 0.0006152231488452931, "loss": 1.9524, "step": 2900 }, { "epoch": 1.3302373581011353, "grad_norm": 0.26282697916030884, "learning_rate": 0.0006149821575172502, "loss": 0.9766, "step": 2901 }, { "epoch": 1.330696021098498, "grad_norm": 0.22965925931930542, "learning_rate": 0.0006147411379832553, "loss": 1.179, "step": 2902 }, { "epoch": 1.3311546840958606, "grad_norm": 0.2938666045665741, "learning_rate": 0.0006145000903024317, "loss": 1.5908, "step": 2903 }, { "epoch": 1.3316133470932232, "grad_norm": 0.35874783992767334, "learning_rate": 0.0006142590145339106, "loss": 1.3643, "step": 2904 }, { "epoch": 1.3320720100905858, "grad_norm": 0.3151531219482422, "learning_rate": 0.0006140179107368291, "loss": 1.5058, "step": 2905 }, { "epoch": 1.3325306730879487, "grad_norm": 0.3742069602012634, "learning_rate": 0.0006137767789703322, "loss": 1.6921, "step": 2906 }, { "epoch": 1.3329893360853113, "grad_norm": 0.19927988946437836, "learning_rate": 0.0006135356192935709, "loss": 1.2602, "step": 2907 }, { "epoch": 1.333447999082674, "grad_norm": 0.31676074862480164, "learning_rate": 0.0006132944317657035, "loss": 1.7969, "step": 2908 }, { "epoch": 1.3339066620800368, "grad_norm": 0.2802976965904236, "learning_rate": 0.0006130532164458952, "loss": 1.3497, "step": 2909 }, { "epoch": 1.3343653250773992, "grad_norm": 0.3053966760635376, "learning_rate": 0.0006128119733933179, "loss": 1.7183, "step": 2910 }, { "epoch": 1.334823988074762, "grad_norm": 0.3425951302051544, "learning_rate": 0.0006125707026671498, "loss": 1.333, "step": 2911 }, { "epoch": 1.3352826510721247, "grad_norm": 0.33375629782676697, "learning_rate": 0.0006123294043265768, "loss": 2.2371, "step": 2912 }, { "epoch": 1.3357413140694874, "grad_norm": 0.358460396528244, "learning_rate": 0.000612088078430791, "loss": 1.4041, "step": 2913 }, { "epoch": 1.3361999770668502, "grad_norm": 0.30156955122947693, "learning_rate": 0.0006118467250389914, "loss": 2.1921, "step": 2914 }, { "epoch": 1.3366586400642129, "grad_norm": 0.32735612988471985, "learning_rate": 0.0006116053442103836, "loss": 1.3602, "step": 2915 }, { "epoch": 1.3371173030615755, "grad_norm": 0.2227545529603958, "learning_rate": 0.0006113639360041803, "loss": 0.9218, "step": 2916 }, { "epoch": 1.3375759660589381, "grad_norm": 0.28981199860572815, "learning_rate": 0.0006111225004796004, "loss": 1.1524, "step": 2917 }, { "epoch": 1.3380346290563008, "grad_norm": 0.2632843554019928, "learning_rate": 0.0006108810376958699, "loss": 1.2719, "step": 2918 }, { "epoch": 1.3384932920536636, "grad_norm": 0.3473341166973114, "learning_rate": 0.0006106395477122213, "loss": 1.3489, "step": 2919 }, { "epoch": 1.3389519550510263, "grad_norm": 0.10266976803541183, "learning_rate": 0.000610398030587894, "loss": 0.5534, "step": 2920 }, { "epoch": 1.339410618048389, "grad_norm": 0.29631996154785156, "learning_rate": 0.0006101564863821338, "loss": 1.2847, "step": 2921 }, { "epoch": 1.3398692810457518, "grad_norm": 0.2845175266265869, "learning_rate": 0.0006099149151541931, "loss": 1.7554, "step": 2922 }, { "epoch": 1.3403279440431144, "grad_norm": 0.2774314284324646, "learning_rate": 0.0006096733169633314, "loss": 1.5494, "step": 2923 }, { "epoch": 1.340786607040477, "grad_norm": 0.31548067927360535, "learning_rate": 0.0006094316918688141, "loss": 1.1424, "step": 2924 }, { "epoch": 1.3412452700378397, "grad_norm": 0.357732892036438, "learning_rate": 0.0006091900399299138, "loss": 1.3782, "step": 2925 }, { "epoch": 1.3417039330352023, "grad_norm": 0.2636417746543884, "learning_rate": 0.0006089483612059093, "loss": 1.6639, "step": 2926 }, { "epoch": 1.3421625960325652, "grad_norm": 0.32517683506011963, "learning_rate": 0.0006087066557560865, "loss": 1.743, "step": 2927 }, { "epoch": 1.3426212590299278, "grad_norm": 0.3475479185581207, "learning_rate": 0.0006084649236397369, "loss": 1.6309, "step": 2928 }, { "epoch": 1.3430799220272904, "grad_norm": 0.3126490116119385, "learning_rate": 0.0006082231649161595, "loss": 1.4764, "step": 2929 }, { "epoch": 1.343538585024653, "grad_norm": 0.24782423675060272, "learning_rate": 0.0006079813796446596, "loss": 0.7667, "step": 2930 }, { "epoch": 1.3439972480220157, "grad_norm": 0.28448745608329773, "learning_rate": 0.0006077395678845486, "loss": 1.4008, "step": 2931 }, { "epoch": 1.3444559110193786, "grad_norm": 0.1474987417459488, "learning_rate": 0.0006074977296951448, "loss": 0.5947, "step": 2932 }, { "epoch": 1.3449145740167412, "grad_norm": 0.20980101823806763, "learning_rate": 0.0006072558651357727, "loss": 1.2209, "step": 2933 }, { "epoch": 1.3453732370141038, "grad_norm": 0.377865731716156, "learning_rate": 0.0006070139742657635, "loss": 2.0645, "step": 2934 }, { "epoch": 1.3458319000114667, "grad_norm": 0.47778239846229553, "learning_rate": 0.0006067720571444551, "loss": 1.5841, "step": 2935 }, { "epoch": 1.3462905630088293, "grad_norm": 0.3798399865627289, "learning_rate": 0.000606530113831191, "loss": 1.7783, "step": 2936 }, { "epoch": 1.346749226006192, "grad_norm": 0.21647045016288757, "learning_rate": 0.0006062881443853218, "loss": 0.4051, "step": 2937 }, { "epoch": 1.3472078890035546, "grad_norm": 0.3337489664554596, "learning_rate": 0.0006060461488662044, "loss": 2.0281, "step": 2938 }, { "epoch": 1.3476665520009172, "grad_norm": 0.3525756895542145, "learning_rate": 0.0006058041273332021, "loss": 1.5044, "step": 2939 }, { "epoch": 1.34812521499828, "grad_norm": 0.24019671976566315, "learning_rate": 0.0006055620798456845, "loss": 1.3129, "step": 2940 }, { "epoch": 1.3485838779956427, "grad_norm": 0.37379053235054016, "learning_rate": 0.0006053200064630276, "loss": 2.0564, "step": 2941 }, { "epoch": 1.3490425409930054, "grad_norm": 0.2772229015827179, "learning_rate": 0.0006050779072446137, "loss": 1.0508, "step": 2942 }, { "epoch": 1.3495012039903682, "grad_norm": 0.15392114222049713, "learning_rate": 0.0006048357822498315, "loss": 1.0146, "step": 2943 }, { "epoch": 1.3499598669877306, "grad_norm": 0.3035467267036438, "learning_rate": 0.000604593631538076, "loss": 1.5189, "step": 2944 }, { "epoch": 1.3504185299850935, "grad_norm": 0.3849334120750427, "learning_rate": 0.0006043514551687484, "loss": 1.7883, "step": 2945 }, { "epoch": 1.3508771929824561, "grad_norm": 0.36944082379341125, "learning_rate": 0.0006041092532012566, "loss": 1.8156, "step": 2946 }, { "epoch": 1.3513358559798188, "grad_norm": 0.5630089044570923, "learning_rate": 0.0006038670256950144, "loss": 1.1769, "step": 2947 }, { "epoch": 1.3517945189771816, "grad_norm": 0.22693516314029694, "learning_rate": 0.0006036247727094418, "loss": 1.3827, "step": 2948 }, { "epoch": 1.3522531819745442, "grad_norm": 0.2991299033164978, "learning_rate": 0.0006033824943039651, "loss": 0.9545, "step": 2949 }, { "epoch": 1.3527118449719069, "grad_norm": 0.21848253905773163, "learning_rate": 0.0006031401905380173, "loss": 0.9471, "step": 2950 }, { "epoch": 1.3531705079692695, "grad_norm": 0.2379998415708542, "learning_rate": 0.000602897861471037, "loss": 1.1672, "step": 2951 }, { "epoch": 1.3536291709666322, "grad_norm": 0.28074660897254944, "learning_rate": 0.0006026555071624694, "loss": 1.7654, "step": 2952 }, { "epoch": 1.354087833963995, "grad_norm": 0.4257287383079529, "learning_rate": 0.0006024131276717657, "loss": 2.002, "step": 2953 }, { "epoch": 1.3545464969613576, "grad_norm": 0.9441617131233215, "learning_rate": 0.0006021707230583834, "loss": 1.7286, "step": 2954 }, { "epoch": 1.3550051599587203, "grad_norm": 0.24226488173007965, "learning_rate": 0.0006019282933817859, "loss": 1.2914, "step": 2955 }, { "epoch": 1.3554638229560831, "grad_norm": 0.3882712125778198, "learning_rate": 0.0006016858387014432, "loss": 2.168, "step": 2956 }, { "epoch": 1.3559224859534458, "grad_norm": 0.2512166202068329, "learning_rate": 0.0006014433590768311, "loss": 1.1456, "step": 2957 }, { "epoch": 1.3563811489508084, "grad_norm": 0.17546239495277405, "learning_rate": 0.0006012008545674316, "loss": 1.2354, "step": 2958 }, { "epoch": 1.356839811948171, "grad_norm": 0.35493841767311096, "learning_rate": 0.0006009583252327326, "loss": 1.7339, "step": 2959 }, { "epoch": 1.3572984749455337, "grad_norm": 0.381168931722641, "learning_rate": 0.0006007157711322286, "loss": 2.2961, "step": 2960 }, { "epoch": 1.3577571379428965, "grad_norm": 0.229428231716156, "learning_rate": 0.0006004731923254197, "loss": 0.6636, "step": 2961 }, { "epoch": 1.3582158009402592, "grad_norm": 0.3353422284126282, "learning_rate": 0.0006002305888718123, "loss": 1.9835, "step": 2962 }, { "epoch": 1.3586744639376218, "grad_norm": 0.32611238956451416, "learning_rate": 0.0005999879608309189, "loss": 1.601, "step": 2963 }, { "epoch": 1.3591331269349844, "grad_norm": 0.29757001996040344, "learning_rate": 0.0005997453082622575, "loss": 1.6636, "step": 2964 }, { "epoch": 1.359591789932347, "grad_norm": 0.2990017235279083, "learning_rate": 0.0005995026312253527, "loss": 1.4233, "step": 2965 }, { "epoch": 1.36005045292971, "grad_norm": 0.3065161108970642, "learning_rate": 0.0005992599297797354, "loss": 1.7319, "step": 2966 }, { "epoch": 1.3605091159270726, "grad_norm": 0.2563233971595764, "learning_rate": 0.0005990172039849413, "loss": 0.6647, "step": 2967 }, { "epoch": 1.3609677789244352, "grad_norm": 0.12097487598657608, "learning_rate": 0.0005987744539005134, "loss": 0.9222, "step": 2968 }, { "epoch": 1.361426441921798, "grad_norm": 0.2720169723033905, "learning_rate": 0.0005985316795859994, "loss": 0.8245, "step": 2969 }, { "epoch": 1.3618851049191607, "grad_norm": 0.2813059985637665, "learning_rate": 0.0005982888811009541, "loss": 1.371, "step": 2970 }, { "epoch": 1.3623437679165233, "grad_norm": 0.32221513986587524, "learning_rate": 0.0005980460585049377, "loss": 1.9143, "step": 2971 }, { "epoch": 1.362802430913886, "grad_norm": 0.3392675817012787, "learning_rate": 0.0005978032118575162, "loss": 1.5422, "step": 2972 }, { "epoch": 1.3632610939112486, "grad_norm": 0.26565980911254883, "learning_rate": 0.0005975603412182615, "loss": 1.222, "step": 2973 }, { "epoch": 1.3637197569086115, "grad_norm": 0.32214289903640747, "learning_rate": 0.0005973174466467516, "loss": 2.3055, "step": 2974 }, { "epoch": 1.364178419905974, "grad_norm": 0.3520325720310211, "learning_rate": 0.0005970745282025705, "loss": 1.8467, "step": 2975 }, { "epoch": 1.3646370829033367, "grad_norm": 0.42569711804389954, "learning_rate": 0.0005968315859453075, "loss": 2.2639, "step": 2976 }, { "epoch": 1.3650957459006996, "grad_norm": 0.32188594341278076, "learning_rate": 0.0005965886199345584, "loss": 1.6467, "step": 2977 }, { "epoch": 1.365554408898062, "grad_norm": 0.3434820771217346, "learning_rate": 0.0005963456302299245, "loss": 1.8241, "step": 2978 }, { "epoch": 1.3660130718954249, "grad_norm": 0.4579332172870636, "learning_rate": 0.0005961026168910126, "loss": 1.7689, "step": 2979 }, { "epoch": 1.3664717348927875, "grad_norm": 0.32917508482933044, "learning_rate": 0.000595859579977436, "loss": 1.7894, "step": 2980 }, { "epoch": 1.3669303978901501, "grad_norm": 0.2983636260032654, "learning_rate": 0.0005956165195488131, "loss": 1.6581, "step": 2981 }, { "epoch": 1.367389060887513, "grad_norm": 0.23157159984111786, "learning_rate": 0.0005953734356647686, "loss": 0.804, "step": 2982 }, { "epoch": 1.3678477238848756, "grad_norm": 0.2740468680858612, "learning_rate": 0.0005951303283849329, "loss": 1.2906, "step": 2983 }, { "epoch": 1.3683063868822383, "grad_norm": 0.15333612263202667, "learning_rate": 0.0005948871977689414, "loss": 0.7368, "step": 2984 }, { "epoch": 1.368765049879601, "grad_norm": 0.3173515796661377, "learning_rate": 0.0005946440438764362, "loss": 2.1804, "step": 2985 }, { "epoch": 1.3692237128769635, "grad_norm": 0.3675207495689392, "learning_rate": 0.0005944008667670646, "loss": 1.7012, "step": 2986 }, { "epoch": 1.3696823758743264, "grad_norm": 0.42129290103912354, "learning_rate": 0.0005941576665004798, "loss": 1.2498, "step": 2987 }, { "epoch": 1.370141038871689, "grad_norm": 0.24997709691524506, "learning_rate": 0.0005939144431363404, "loss": 1.4775, "step": 2988 }, { "epoch": 1.3705997018690517, "grad_norm": 0.2527235448360443, "learning_rate": 0.0005936711967343111, "loss": 1.2377, "step": 2989 }, { "epoch": 1.3710583648664145, "grad_norm": 0.2272566556930542, "learning_rate": 0.0005934279273540616, "loss": 1.5359, "step": 2990 }, { "epoch": 1.3715170278637772, "grad_norm": 0.30363374948501587, "learning_rate": 0.000593184635055268, "loss": 1.6325, "step": 2991 }, { "epoch": 1.3719756908611398, "grad_norm": 0.352542519569397, "learning_rate": 0.0005929413198976115, "loss": 1.656, "step": 2992 }, { "epoch": 1.3724343538585024, "grad_norm": 0.3105866611003876, "learning_rate": 0.0005926979819407791, "loss": 1.9192, "step": 2993 }, { "epoch": 1.372893016855865, "grad_norm": 0.2809374928474426, "learning_rate": 0.0005924546212444634, "loss": 1.3723, "step": 2994 }, { "epoch": 1.373351679853228, "grad_norm": 0.32282719016075134, "learning_rate": 0.0005922112378683624, "loss": 1.6761, "step": 2995 }, { "epoch": 1.3738103428505906, "grad_norm": 0.2368834912776947, "learning_rate": 0.0005919678318721797, "loss": 1.1937, "step": 2996 }, { "epoch": 1.3742690058479532, "grad_norm": 0.2217353880405426, "learning_rate": 0.000591724403315625, "loss": 1.2874, "step": 2997 }, { "epoch": 1.3747276688453158, "grad_norm": 0.2690431773662567, "learning_rate": 0.0005914809522584127, "loss": 1.4406, "step": 2998 }, { "epoch": 1.3751863318426785, "grad_norm": 0.20234516263008118, "learning_rate": 0.0005912374787602632, "loss": 1.3164, "step": 2999 }, { "epoch": 1.3756449948400413, "grad_norm": 0.265828937292099, "learning_rate": 0.0005909939828809024, "loss": 1.2651, "step": 3000 }, { "epoch": 1.376103657837404, "grad_norm": 0.28443530201911926, "learning_rate": 0.0005907504646800613, "loss": 1.1567, "step": 3001 }, { "epoch": 1.3765623208347666, "grad_norm": 0.2254990190267563, "learning_rate": 0.0005905069242174769, "loss": 1.4349, "step": 3002 }, { "epoch": 1.3770209838321295, "grad_norm": 0.2833521366119385, "learning_rate": 0.0005902633615528916, "loss": 1.8909, "step": 3003 }, { "epoch": 1.377479646829492, "grad_norm": 0.3044750392436981, "learning_rate": 0.0005900197767460527, "loss": 1.5297, "step": 3004 }, { "epoch": 1.3779383098268547, "grad_norm": 0.29396411776542664, "learning_rate": 0.0005897761698567135, "loss": 1.2523, "step": 3005 }, { "epoch": 1.3783969728242174, "grad_norm": 0.24863453209400177, "learning_rate": 0.0005895325409446327, "loss": 1.5853, "step": 3006 }, { "epoch": 1.37885563582158, "grad_norm": 0.31140193343162537, "learning_rate": 0.0005892888900695738, "loss": 1.259, "step": 3007 }, { "epoch": 1.3793142988189429, "grad_norm": 0.21979323029518127, "learning_rate": 0.0005890452172913065, "loss": 1.415, "step": 3008 }, { "epoch": 1.3797729618163055, "grad_norm": 0.3025084435939789, "learning_rate": 0.0005888015226696053, "loss": 0.7619, "step": 3009 }, { "epoch": 1.3802316248136681, "grad_norm": 0.11269453167915344, "learning_rate": 0.0005885578062642502, "loss": 1.0517, "step": 3010 }, { "epoch": 1.380690287811031, "grad_norm": 0.2220744490623474, "learning_rate": 0.0005883140681350269, "loss": 0.8101, "step": 3011 }, { "epoch": 1.3811489508083934, "grad_norm": 0.25374385714530945, "learning_rate": 0.0005880703083417258, "loss": 1.2319, "step": 3012 }, { "epoch": 1.3816076138057563, "grad_norm": 0.26675471663475037, "learning_rate": 0.0005878265269441432, "loss": 1.4619, "step": 3013 }, { "epoch": 1.3820662768031189, "grad_norm": 0.09885770082473755, "learning_rate": 0.00058758272400208, "loss": 0.6359, "step": 3014 }, { "epoch": 1.3825249398004815, "grad_norm": 0.16511155664920807, "learning_rate": 0.0005873388995753432, "loss": 1.0938, "step": 3015 }, { "epoch": 1.3829836027978444, "grad_norm": 0.3333290219306946, "learning_rate": 0.0005870950537237446, "loss": 1.7571, "step": 3016 }, { "epoch": 1.383442265795207, "grad_norm": 0.26791778206825256, "learning_rate": 0.0005868511865071013, "loss": 0.9278, "step": 3017 }, { "epoch": 1.3839009287925697, "grad_norm": 0.2120082974433899, "learning_rate": 0.0005866072979852358, "loss": 1.2115, "step": 3018 }, { "epoch": 1.3843595917899323, "grad_norm": 0.41435691714286804, "learning_rate": 0.0005863633882179758, "loss": 2.1711, "step": 3019 }, { "epoch": 1.384818254787295, "grad_norm": 0.2728036046028137, "learning_rate": 0.0005861194572651537, "loss": 1.5656, "step": 3020 }, { "epoch": 1.3852769177846578, "grad_norm": 0.3878629803657532, "learning_rate": 0.0005858755051866078, "loss": 1.8513, "step": 3021 }, { "epoch": 1.3857355807820204, "grad_norm": 0.3821435570716858, "learning_rate": 0.0005856315320421812, "loss": 1.7039, "step": 3022 }, { "epoch": 1.386194243779383, "grad_norm": 0.28816846013069153, "learning_rate": 0.0005853875378917225, "loss": 1.8958, "step": 3023 }, { "epoch": 1.386652906776746, "grad_norm": 0.24760308861732483, "learning_rate": 0.0005851435227950851, "loss": 0.9471, "step": 3024 }, { "epoch": 1.3871115697741085, "grad_norm": 0.20163798332214355, "learning_rate": 0.0005848994868121277, "loss": 1.524, "step": 3025 }, { "epoch": 1.3875702327714712, "grad_norm": 0.6160727143287659, "learning_rate": 0.0005846554300027138, "loss": 1.6717, "step": 3026 }, { "epoch": 1.3880288957688338, "grad_norm": 0.2782137095928192, "learning_rate": 0.0005844113524267128, "loss": 0.8874, "step": 3027 }, { "epoch": 1.3884875587661965, "grad_norm": 0.2697048783302307, "learning_rate": 0.0005841672541439982, "loss": 1.3064, "step": 3028 }, { "epoch": 1.3889462217635593, "grad_norm": 0.2576363980770111, "learning_rate": 0.0005839231352144493, "loss": 1.8171, "step": 3029 }, { "epoch": 1.389404884760922, "grad_norm": 0.34383904933929443, "learning_rate": 0.0005836789956979503, "loss": 1.5734, "step": 3030 }, { "epoch": 1.3898635477582846, "grad_norm": 0.2631419003009796, "learning_rate": 0.0005834348356543903, "loss": 1.2598, "step": 3031 }, { "epoch": 1.3903222107556472, "grad_norm": 0.3006002604961395, "learning_rate": 0.0005831906551436633, "loss": 1.4291, "step": 3032 }, { "epoch": 1.3907808737530098, "grad_norm": 0.13184750080108643, "learning_rate": 0.0005829464542256691, "loss": 0.9445, "step": 3033 }, { "epoch": 1.3912395367503727, "grad_norm": 0.38121703267097473, "learning_rate": 0.0005827022329603114, "loss": 1.3728, "step": 3034 }, { "epoch": 1.3916981997477353, "grad_norm": 0.3366886079311371, "learning_rate": 0.0005824579914074996, "loss": 2.0142, "step": 3035 }, { "epoch": 1.392156862745098, "grad_norm": 0.2520803213119507, "learning_rate": 0.0005822137296271481, "loss": 1.1912, "step": 3036 }, { "epoch": 1.3926155257424608, "grad_norm": 0.2898450493812561, "learning_rate": 0.0005819694476791757, "loss": 1.5584, "step": 3037 }, { "epoch": 1.3930741887398235, "grad_norm": 0.33184388279914856, "learning_rate": 0.000581725145623507, "loss": 1.6064, "step": 3038 }, { "epoch": 1.393532851737186, "grad_norm": 0.40687495470046997, "learning_rate": 0.0005814808235200708, "loss": 2.0935, "step": 3039 }, { "epoch": 1.3939915147345487, "grad_norm": 0.30613189935684204, "learning_rate": 0.000581236481428801, "loss": 1.0827, "step": 3040 }, { "epoch": 1.3944501777319114, "grad_norm": 0.350396066904068, "learning_rate": 0.0005809921194096365, "loss": 1.9321, "step": 3041 }, { "epoch": 1.3949088407292742, "grad_norm": 0.2401767075061798, "learning_rate": 0.0005807477375225212, "loss": 1.1565, "step": 3042 }, { "epoch": 1.3953675037266369, "grad_norm": 0.29170459508895874, "learning_rate": 0.0005805033358274037, "loss": 1.3792, "step": 3043 }, { "epoch": 1.3958261667239995, "grad_norm": 0.3244495689868927, "learning_rate": 0.0005802589143842374, "loss": 1.5734, "step": 3044 }, { "epoch": 1.3962848297213624, "grad_norm": 0.35495296120643616, "learning_rate": 0.0005800144732529808, "loss": 1.4654, "step": 3045 }, { "epoch": 1.396743492718725, "grad_norm": 0.31566861271858215, "learning_rate": 0.0005797700124935968, "loss": 2.1155, "step": 3046 }, { "epoch": 1.3972021557160876, "grad_norm": 0.35331422090530396, "learning_rate": 0.0005795255321660536, "loss": 1.4407, "step": 3047 }, { "epoch": 1.3976608187134503, "grad_norm": 0.46943673491477966, "learning_rate": 0.000579281032330324, "loss": 0.9726, "step": 3048 }, { "epoch": 1.398119481710813, "grad_norm": 0.3449120819568634, "learning_rate": 0.0005790365130463857, "loss": 1.9149, "step": 3049 }, { "epoch": 1.3985781447081758, "grad_norm": 0.45382195711135864, "learning_rate": 0.0005787919743742207, "loss": 1.5399, "step": 3050 }, { "epoch": 1.3990368077055384, "grad_norm": 0.2743489742279053, "learning_rate": 0.0005785474163738163, "loss": 1.3011, "step": 3051 }, { "epoch": 1.399495470702901, "grad_norm": 0.24921023845672607, "learning_rate": 0.0005783028391051642, "loss": 1.4218, "step": 3052 }, { "epoch": 1.3999541337002637, "grad_norm": 0.25878721475601196, "learning_rate": 0.0005780582426282611, "loss": 1.3025, "step": 3053 }, { "epoch": 1.4004127966976263, "grad_norm": 0.24198046326637268, "learning_rate": 0.0005778136270031083, "loss": 0.8168, "step": 3054 }, { "epoch": 1.4008714596949892, "grad_norm": 0.259734183549881, "learning_rate": 0.0005775689922897119, "loss": 0.9978, "step": 3055 }, { "epoch": 1.4013301226923518, "grad_norm": 0.2518146336078644, "learning_rate": 0.0005773243385480822, "loss": 1.4738, "step": 3056 }, { "epoch": 1.4017887856897144, "grad_norm": 0.16398538649082184, "learning_rate": 0.0005770796658382347, "loss": 1.2174, "step": 3057 }, { "epoch": 1.4022474486870773, "grad_norm": 0.2645060122013092, "learning_rate": 0.0005768349742201895, "loss": 1.6903, "step": 3058 }, { "epoch": 1.40270611168444, "grad_norm": 0.30692818760871887, "learning_rate": 0.0005765902637539709, "loss": 1.3245, "step": 3059 }, { "epoch": 1.4031647746818026, "grad_norm": 0.19634264707565308, "learning_rate": 0.0005763455344996086, "loss": 1.1637, "step": 3060 }, { "epoch": 1.4036234376791652, "grad_norm": 0.2435206174850464, "learning_rate": 0.0005761007865171361, "loss": 1.6165, "step": 3061 }, { "epoch": 1.4040821006765278, "grad_norm": 0.17027845978736877, "learning_rate": 0.0005758560198665921, "loss": 0.8738, "step": 3062 }, { "epoch": 1.4045407636738907, "grad_norm": 0.23753295838832855, "learning_rate": 0.0005756112346080192, "loss": 1.6266, "step": 3063 }, { "epoch": 1.4049994266712533, "grad_norm": 0.26798611879348755, "learning_rate": 0.0005753664308014655, "loss": 1.7124, "step": 3064 }, { "epoch": 1.405458089668616, "grad_norm": 0.4665045738220215, "learning_rate": 0.0005751216085069828, "loss": 2.0098, "step": 3065 }, { "epoch": 1.4059167526659786, "grad_norm": 0.279823362827301, "learning_rate": 0.0005748767677846279, "loss": 1.3131, "step": 3066 }, { "epoch": 1.4063754156633412, "grad_norm": 0.27207767963409424, "learning_rate": 0.000574631908694462, "loss": 1.1439, "step": 3067 }, { "epoch": 1.406834078660704, "grad_norm": 0.30169880390167236, "learning_rate": 0.0005743870312965506, "loss": 1.2415, "step": 3068 }, { "epoch": 1.4072927416580667, "grad_norm": 0.20790642499923706, "learning_rate": 0.0005741421356509645, "loss": 1.0963, "step": 3069 }, { "epoch": 1.4077514046554294, "grad_norm": 0.3159151077270508, "learning_rate": 0.0005738972218177774, "loss": 1.3106, "step": 3070 }, { "epoch": 1.4082100676527922, "grad_norm": 0.2372143566608429, "learning_rate": 0.0005736522898570694, "loss": 1.2921, "step": 3071 }, { "epoch": 1.4086687306501549, "grad_norm": 0.37024182081222534, "learning_rate": 0.0005734073398289234, "loss": 2.0061, "step": 3072 }, { "epoch": 1.4091273936475175, "grad_norm": 0.38484346866607666, "learning_rate": 0.0005731623717934275, "loss": 1.5103, "step": 3073 }, { "epoch": 1.4095860566448801, "grad_norm": 0.33676987886428833, "learning_rate": 0.0005729173858106746, "loss": 1.8154, "step": 3074 }, { "epoch": 1.4100447196422428, "grad_norm": 0.35367336869239807, "learning_rate": 0.0005726723819407611, "loss": 1.9507, "step": 3075 }, { "epoch": 1.4105033826396056, "grad_norm": 0.3594896197319031, "learning_rate": 0.0005724273602437882, "loss": 1.793, "step": 3076 }, { "epoch": 1.4109620456369683, "grad_norm": 0.3166644275188446, "learning_rate": 0.0005721823207798616, "loss": 1.2424, "step": 3077 }, { "epoch": 1.411420708634331, "grad_norm": 0.30905553698539734, "learning_rate": 0.0005719372636090914, "loss": 1.4936, "step": 3078 }, { "epoch": 1.4118793716316937, "grad_norm": 0.3719440698623657, "learning_rate": 0.0005716921887915916, "loss": 1.8947, "step": 3079 }, { "epoch": 1.4123380346290564, "grad_norm": 0.35400110483169556, "learning_rate": 0.000571447096387481, "loss": 1.325, "step": 3080 }, { "epoch": 1.412796697626419, "grad_norm": 0.2209610939025879, "learning_rate": 0.0005712019864568827, "loss": 1.1089, "step": 3081 }, { "epoch": 1.4132553606237817, "grad_norm": 0.3398480713367462, "learning_rate": 0.0005709568590599234, "loss": 1.7584, "step": 3082 }, { "epoch": 1.4137140236211443, "grad_norm": 0.3765453100204468, "learning_rate": 0.0005707117142567351, "loss": 2.1033, "step": 3083 }, { "epoch": 1.4141726866185071, "grad_norm": 0.2562631070613861, "learning_rate": 0.0005704665521074534, "loss": 1.5005, "step": 3084 }, { "epoch": 1.4146313496158698, "grad_norm": 0.32559189200401306, "learning_rate": 0.0005702213726722185, "loss": 1.1359, "step": 3085 }, { "epoch": 1.4150900126132324, "grad_norm": 0.2984718084335327, "learning_rate": 0.0005699761760111745, "loss": 1.4721, "step": 3086 }, { "epoch": 1.415548675610595, "grad_norm": 0.3191785216331482, "learning_rate": 0.00056973096218447, "loss": 1.773, "step": 3087 }, { "epoch": 1.4160073386079577, "grad_norm": 0.3590591251850128, "learning_rate": 0.0005694857312522576, "loss": 1.7094, "step": 3088 }, { "epoch": 1.4164660016053205, "grad_norm": 0.23591914772987366, "learning_rate": 0.0005692404832746944, "loss": 1.0517, "step": 3089 }, { "epoch": 1.4169246646026832, "grad_norm": 0.28452879190444946, "learning_rate": 0.0005689952183119413, "loss": 1.6881, "step": 3090 }, { "epoch": 1.4173833276000458, "grad_norm": 0.32875385880470276, "learning_rate": 0.0005687499364241637, "loss": 1.7948, "step": 3091 }, { "epoch": 1.4178419905974087, "grad_norm": 0.3080821931362152, "learning_rate": 0.0005685046376715311, "loss": 1.4707, "step": 3092 }, { "epoch": 1.4183006535947713, "grad_norm": 0.3038639724254608, "learning_rate": 0.0005682593221142168, "loss": 1.2142, "step": 3093 }, { "epoch": 1.418759316592134, "grad_norm": 0.3059542775154114, "learning_rate": 0.0005680139898123985, "loss": 1.2471, "step": 3094 }, { "epoch": 1.4192179795894966, "grad_norm": 0.2539036273956299, "learning_rate": 0.0005677686408262583, "loss": 1.058, "step": 3095 }, { "epoch": 1.4196766425868592, "grad_norm": 0.17899896204471588, "learning_rate": 0.0005675232752159818, "loss": 1.3064, "step": 3096 }, { "epoch": 1.420135305584222, "grad_norm": 0.3230034112930298, "learning_rate": 0.0005672778930417592, "loss": 1.6761, "step": 3097 }, { "epoch": 1.4205939685815847, "grad_norm": 0.2745686173439026, "learning_rate": 0.0005670324943637842, "loss": 0.7442, "step": 3098 }, { "epoch": 1.4210526315789473, "grad_norm": 0.2400350570678711, "learning_rate": 0.0005667870792422547, "loss": 0.937, "step": 3099 }, { "epoch": 1.42151129457631, "grad_norm": 0.3091734051704407, "learning_rate": 0.0005665416477373734, "loss": 1.6765, "step": 3100 }, { "epoch": 1.4219699575736726, "grad_norm": 0.217657670378685, "learning_rate": 0.0005662961999093461, "loss": 0.8566, "step": 3101 }, { "epoch": 1.4224286205710355, "grad_norm": 0.18862880766391754, "learning_rate": 0.0005660507358183829, "loss": 1.1898, "step": 3102 }, { "epoch": 1.422887283568398, "grad_norm": 0.3786904513835907, "learning_rate": 0.000565805255524698, "loss": 1.9863, "step": 3103 }, { "epoch": 1.4233459465657607, "grad_norm": 0.3076726496219635, "learning_rate": 0.0005655597590885095, "loss": 1.2256, "step": 3104 }, { "epoch": 1.4238046095631236, "grad_norm": 0.24448339641094208, "learning_rate": 0.0005653142465700392, "loss": 1.7419, "step": 3105 }, { "epoch": 1.4242632725604862, "grad_norm": 0.28757891058921814, "learning_rate": 0.0005650687180295134, "loss": 1.0779, "step": 3106 }, { "epoch": 1.4247219355578489, "grad_norm": 0.0921797975897789, "learning_rate": 0.0005648231735271619, "loss": 0.5927, "step": 3107 }, { "epoch": 1.4251805985552115, "grad_norm": 0.2944778800010681, "learning_rate": 0.0005645776131232186, "loss": 1.6512, "step": 3108 }, { "epoch": 1.4256392615525741, "grad_norm": 0.24034199118614197, "learning_rate": 0.000564332036877921, "loss": 1.505, "step": 3109 }, { "epoch": 1.426097924549937, "grad_norm": 0.245818093419075, "learning_rate": 0.000564086444851511, "loss": 1.2884, "step": 3110 }, { "epoch": 1.4265565875472996, "grad_norm": 0.29974740743637085, "learning_rate": 0.000563840837104234, "loss": 0.8771, "step": 3111 }, { "epoch": 1.4270152505446623, "grad_norm": 0.2733640968799591, "learning_rate": 0.0005635952136963393, "loss": 1.5035, "step": 3112 }, { "epoch": 1.4274739135420251, "grad_norm": 0.2323792427778244, "learning_rate": 0.0005633495746880801, "loss": 1.1803, "step": 3113 }, { "epoch": 1.4279325765393878, "grad_norm": 0.438142329454422, "learning_rate": 0.0005631039201397136, "loss": 2.2593, "step": 3114 }, { "epoch": 1.4283912395367504, "grad_norm": 0.2616313099861145, "learning_rate": 0.0005628582501115004, "loss": 1.3368, "step": 3115 }, { "epoch": 1.428849902534113, "grad_norm": 0.2123333215713501, "learning_rate": 0.0005626125646637051, "loss": 1.0031, "step": 3116 }, { "epoch": 1.4293085655314757, "grad_norm": 0.32749924063682556, "learning_rate": 0.0005623668638565964, "loss": 1.4506, "step": 3117 }, { "epoch": 1.4297672285288385, "grad_norm": 0.2560507655143738, "learning_rate": 0.0005621211477504463, "loss": 1.7554, "step": 3118 }, { "epoch": 1.4302258915262012, "grad_norm": 0.3207387924194336, "learning_rate": 0.0005618754164055306, "loss": 1.4721, "step": 3119 }, { "epoch": 1.4306845545235638, "grad_norm": 0.3025607764720917, "learning_rate": 0.0005616296698821293, "loss": 0.993, "step": 3120 }, { "epoch": 1.4311432175209264, "grad_norm": 0.22865654528141022, "learning_rate": 0.0005613839082405255, "loss": 1.5714, "step": 3121 }, { "epoch": 1.431601880518289, "grad_norm": 0.31752631068229675, "learning_rate": 0.0005611381315410065, "loss": 1.4345, "step": 3122 }, { "epoch": 1.432060543515652, "grad_norm": 0.29477816820144653, "learning_rate": 0.0005608923398438628, "loss": 1.9834, "step": 3123 }, { "epoch": 1.4325192065130146, "grad_norm": 0.30475521087646484, "learning_rate": 0.0005606465332093892, "loss": 1.5315, "step": 3124 }, { "epoch": 1.4329778695103772, "grad_norm": 0.3452279567718506, "learning_rate": 0.0005604007116978836, "loss": 1.2822, "step": 3125 }, { "epoch": 1.43343653250774, "grad_norm": 0.3388407230377197, "learning_rate": 0.0005601548753696481, "loss": 1.6831, "step": 3126 }, { "epoch": 1.4338951955051027, "grad_norm": 0.3091072142124176, "learning_rate": 0.0005599090242849878, "loss": 0.8168, "step": 3127 }, { "epoch": 1.4343538585024653, "grad_norm": 0.27518755197525024, "learning_rate": 0.000559663158504212, "loss": 1.9319, "step": 3128 }, { "epoch": 1.434812521499828, "grad_norm": 0.2800983488559723, "learning_rate": 0.0005594172780876332, "loss": 1.3327, "step": 3129 }, { "epoch": 1.4352711844971906, "grad_norm": 0.34306713938713074, "learning_rate": 0.0005591713830955674, "loss": 1.2191, "step": 3130 }, { "epoch": 1.4357298474945535, "grad_norm": 0.19693058729171753, "learning_rate": 0.000558925473588335, "loss": 1.3058, "step": 3131 }, { "epoch": 1.436188510491916, "grad_norm": 0.42898425459861755, "learning_rate": 0.000558679549626259, "loss": 1.7591, "step": 3132 }, { "epoch": 1.4366471734892787, "grad_norm": 0.3185971975326538, "learning_rate": 0.0005584336112696663, "loss": 2.0237, "step": 3133 }, { "epoch": 1.4371058364866414, "grad_norm": 0.18667162954807281, "learning_rate": 0.0005581876585788875, "loss": 0.9062, "step": 3134 }, { "epoch": 1.437564499484004, "grad_norm": 0.2841646671295166, "learning_rate": 0.0005579416916142564, "loss": 1.2529, "step": 3135 }, { "epoch": 1.4380231624813669, "grad_norm": 0.27004119753837585, "learning_rate": 0.0005576957104361106, "loss": 1.3765, "step": 3136 }, { "epoch": 1.4384818254787295, "grad_norm": 0.2476406842470169, "learning_rate": 0.0005574497151047911, "loss": 1.1895, "step": 3137 }, { "epoch": 1.4389404884760921, "grad_norm": 0.25219327211380005, "learning_rate": 0.0005572037056806421, "loss": 1.1415, "step": 3138 }, { "epoch": 1.439399151473455, "grad_norm": 0.31320807337760925, "learning_rate": 0.0005569576822240118, "loss": 2.229, "step": 3139 }, { "epoch": 1.4398578144708176, "grad_norm": 0.3259999752044678, "learning_rate": 0.0005567116447952513, "loss": 1.7386, "step": 3140 }, { "epoch": 1.4403164774681803, "grad_norm": 0.1600116491317749, "learning_rate": 0.0005564655934547154, "loss": 0.7964, "step": 3141 }, { "epoch": 1.440775140465543, "grad_norm": 0.2874891757965088, "learning_rate": 0.0005562195282627624, "loss": 1.3448, "step": 3142 }, { "epoch": 1.4412338034629055, "grad_norm": 0.29961147904396057, "learning_rate": 0.0005559734492797536, "loss": 1.353, "step": 3143 }, { "epoch": 1.4416924664602684, "grad_norm": 0.4548627734184265, "learning_rate": 0.0005557273565660541, "loss": 1.9115, "step": 3144 }, { "epoch": 1.442151129457631, "grad_norm": 0.25489258766174316, "learning_rate": 0.0005554812501820322, "loss": 1.1127, "step": 3145 }, { "epoch": 1.4426097924549937, "grad_norm": 0.3416697680950165, "learning_rate": 0.0005552351301880597, "loss": 2.1523, "step": 3146 }, { "epoch": 1.4430684554523565, "grad_norm": 0.29285651445388794, "learning_rate": 0.0005549889966445115, "loss": 1.4188, "step": 3147 }, { "epoch": 1.4435271184497191, "grad_norm": 0.3054216206073761, "learning_rate": 0.0005547428496117659, "loss": 1.0746, "step": 3148 }, { "epoch": 1.4439857814470818, "grad_norm": 0.2599756717681885, "learning_rate": 0.0005544966891502046, "loss": 1.3993, "step": 3149 }, { "epoch": 1.4444444444444444, "grad_norm": 0.3153897821903229, "learning_rate": 0.0005542505153202124, "loss": 1.8159, "step": 3150 }, { "epoch": 1.444903107441807, "grad_norm": 0.5788086652755737, "learning_rate": 0.0005540043281821777, "loss": 2.1663, "step": 3151 }, { "epoch": 1.44536177043917, "grad_norm": 0.2490013986825943, "learning_rate": 0.0005537581277964919, "loss": 1.3651, "step": 3152 }, { "epoch": 1.4458204334365325, "grad_norm": 0.28390905261039734, "learning_rate": 0.0005535119142235499, "loss": 0.8854, "step": 3153 }, { "epoch": 1.4462790964338952, "grad_norm": 0.31385406851768494, "learning_rate": 0.0005532656875237492, "loss": 1.4232, "step": 3154 }, { "epoch": 1.4467377594312578, "grad_norm": 0.3778940439224243, "learning_rate": 0.0005530194477574914, "loss": 1.9998, "step": 3155 }, { "epoch": 1.4471964224286205, "grad_norm": 0.18103332817554474, "learning_rate": 0.0005527731949851809, "loss": 0.7658, "step": 3156 }, { "epoch": 1.4476550854259833, "grad_norm": 0.26208803057670593, "learning_rate": 0.000552526929267225, "loss": 1.3012, "step": 3157 }, { "epoch": 1.448113748423346, "grad_norm": 0.23250733315944672, "learning_rate": 0.0005522806506640346, "loss": 1.4842, "step": 3158 }, { "epoch": 1.4485724114207086, "grad_norm": 0.38623639941215515, "learning_rate": 0.0005520343592360238, "loss": 2.0955, "step": 3159 }, { "epoch": 1.4490310744180714, "grad_norm": 0.30866172909736633, "learning_rate": 0.0005517880550436094, "loss": 1.6685, "step": 3160 }, { "epoch": 1.449489737415434, "grad_norm": 0.1992468386888504, "learning_rate": 0.0005515417381472117, "loss": 0.969, "step": 3161 }, { "epoch": 1.4499484004127967, "grad_norm": 0.29209160804748535, "learning_rate": 0.000551295408607254, "loss": 2.0913, "step": 3162 }, { "epoch": 1.4504070634101593, "grad_norm": 0.19212748110294342, "learning_rate": 0.0005510490664841626, "loss": 0.8058, "step": 3163 }, { "epoch": 1.450865726407522, "grad_norm": 0.31732991337776184, "learning_rate": 0.0005508027118383673, "loss": 1.6986, "step": 3164 }, { "epoch": 1.4513243894048848, "grad_norm": 0.23571406304836273, "learning_rate": 0.0005505563447303004, "loss": 1.1407, "step": 3165 }, { "epoch": 1.4517830524022475, "grad_norm": 0.24879354238510132, "learning_rate": 0.0005503099652203974, "loss": 1.7388, "step": 3166 }, { "epoch": 1.4522417153996101, "grad_norm": 0.28732791543006897, "learning_rate": 0.0005500635733690975, "loss": 1.2711, "step": 3167 }, { "epoch": 1.4527003783969727, "grad_norm": 0.2248416393995285, "learning_rate": 0.0005498171692368419, "loss": 1.1397, "step": 3168 }, { "epoch": 1.4531590413943354, "grad_norm": 0.34044063091278076, "learning_rate": 0.0005495707528840755, "loss": 1.819, "step": 3169 }, { "epoch": 1.4536177043916982, "grad_norm": 0.19185033440589905, "learning_rate": 0.0005493243243712461, "loss": 1.2428, "step": 3170 }, { "epoch": 1.4540763673890609, "grad_norm": 0.2840229570865631, "learning_rate": 0.0005490778837588041, "loss": 1.1735, "step": 3171 }, { "epoch": 1.4545350303864235, "grad_norm": 0.3059384822845459, "learning_rate": 0.0005488314311072035, "loss": 1.9674, "step": 3172 }, { "epoch": 1.4549936933837864, "grad_norm": 0.27836617827415466, "learning_rate": 0.0005485849664769008, "loss": 0.7415, "step": 3173 }, { "epoch": 1.455452356381149, "grad_norm": 0.21108005940914154, "learning_rate": 0.0005483384899283554, "loss": 1.1039, "step": 3174 }, { "epoch": 1.4559110193785116, "grad_norm": 0.24077321588993073, "learning_rate": 0.0005480920015220298, "loss": 1.6875, "step": 3175 }, { "epoch": 1.4563696823758743, "grad_norm": 0.3374916911125183, "learning_rate": 0.0005478455013183895, "loss": 1.938, "step": 3176 }, { "epoch": 1.456828345373237, "grad_norm": 0.3088974356651306, "learning_rate": 0.0005475989893779028, "loss": 1.232, "step": 3177 }, { "epoch": 1.4572870083705998, "grad_norm": 0.3156466782093048, "learning_rate": 0.0005473524657610409, "loss": 2.2339, "step": 3178 }, { "epoch": 1.4577456713679624, "grad_norm": 0.373761922121048, "learning_rate": 0.0005471059305282776, "loss": 1.6581, "step": 3179 }, { "epoch": 1.458204334365325, "grad_norm": 0.18877850472927094, "learning_rate": 0.00054685938374009, "loss": 0.885, "step": 3180 }, { "epoch": 1.458662997362688, "grad_norm": 0.2712680995464325, "learning_rate": 0.0005466128254569575, "loss": 1.3269, "step": 3181 }, { "epoch": 1.4591216603600505, "grad_norm": 0.17114239931106567, "learning_rate": 0.0005463662557393628, "loss": 0.8921, "step": 3182 }, { "epoch": 1.4595803233574132, "grad_norm": 0.22322210669517517, "learning_rate": 0.0005461196746477915, "loss": 1.4297, "step": 3183 }, { "epoch": 1.4600389863547758, "grad_norm": 0.24018515646457672, "learning_rate": 0.0005458730822427314, "loss": 0.8356, "step": 3184 }, { "epoch": 1.4604976493521384, "grad_norm": 0.18391193449497223, "learning_rate": 0.0005456264785846733, "loss": 1.4681, "step": 3185 }, { "epoch": 1.4609563123495013, "grad_norm": 0.33580252528190613, "learning_rate": 0.0005453798637341112, "loss": 1.371, "step": 3186 }, { "epoch": 1.461414975346864, "grad_norm": 0.2702253460884094, "learning_rate": 0.0005451332377515412, "loss": 1.6213, "step": 3187 }, { "epoch": 1.4618736383442266, "grad_norm": 0.32494962215423584, "learning_rate": 0.0005448866006974625, "loss": 1.9729, "step": 3188 }, { "epoch": 1.4623323013415892, "grad_norm": 0.27085816860198975, "learning_rate": 0.000544639952632377, "loss": 1.0135, "step": 3189 }, { "epoch": 1.4627909643389518, "grad_norm": 0.20625822246074677, "learning_rate": 0.0005443932936167895, "loss": 1.3878, "step": 3190 }, { "epoch": 1.4632496273363147, "grad_norm": 0.38351795077323914, "learning_rate": 0.0005441466237112068, "loss": 1.9377, "step": 3191 }, { "epoch": 1.4637082903336773, "grad_norm": 0.20434589684009552, "learning_rate": 0.000543899942976139, "loss": 1.1232, "step": 3192 }, { "epoch": 1.46416695333104, "grad_norm": 0.28791090846061707, "learning_rate": 0.0005436532514720986, "loss": 0.877, "step": 3193 }, { "epoch": 1.4646256163284028, "grad_norm": 0.27952685952186584, "learning_rate": 0.000543406549259601, "loss": 1.4366, "step": 3194 }, { "epoch": 1.4650842793257655, "grad_norm": 0.34718945622444153, "learning_rate": 0.0005431598363991639, "loss": 1.9646, "step": 3195 }, { "epoch": 1.465542942323128, "grad_norm": 0.3882228434085846, "learning_rate": 0.0005429131129513076, "loss": 1.6616, "step": 3196 }, { "epoch": 1.4660016053204907, "grad_norm": 0.2647295296192169, "learning_rate": 0.0005426663789765552, "loss": 1.4854, "step": 3197 }, { "epoch": 1.4664602683178534, "grad_norm": 0.28624817728996277, "learning_rate": 0.0005424196345354326, "loss": 1.9561, "step": 3198 }, { "epoch": 1.4669189313152162, "grad_norm": 0.3558148741722107, "learning_rate": 0.0005421728796884677, "loss": 1.7657, "step": 3199 }, { "epoch": 1.4673775943125789, "grad_norm": 0.29009684920310974, "learning_rate": 0.0005419261144961914, "loss": 1.6457, "step": 3200 }, { "epoch": 1.4678362573099415, "grad_norm": 0.3886021375656128, "learning_rate": 0.0005416793390191369, "loss": 1.6, "step": 3201 }, { "epoch": 1.4682949203073041, "grad_norm": 0.14990092813968658, "learning_rate": 0.0005414325533178398, "loss": 0.8758, "step": 3202 }, { "epoch": 1.4687535833046668, "grad_norm": 0.21671035885810852, "learning_rate": 0.0005411857574528389, "loss": 0.9272, "step": 3203 }, { "epoch": 1.4692122463020296, "grad_norm": 0.2458488643169403, "learning_rate": 0.0005409389514846746, "loss": 1.6566, "step": 3204 }, { "epoch": 1.4696709092993923, "grad_norm": 0.38461047410964966, "learning_rate": 0.0005406921354738904, "loss": 1.5311, "step": 3205 }, { "epoch": 1.470129572296755, "grad_norm": 0.31382817029953003, "learning_rate": 0.000540445309481032, "loss": 1.6455, "step": 3206 }, { "epoch": 1.4705882352941178, "grad_norm": 0.23861967027187347, "learning_rate": 0.0005401984735666474, "loss": 1.2702, "step": 3207 }, { "epoch": 1.4710468982914804, "grad_norm": 0.4511847496032715, "learning_rate": 0.0005399516277912873, "loss": 1.2289, "step": 3208 }, { "epoch": 1.471505561288843, "grad_norm": 0.3328205645084381, "learning_rate": 0.0005397047722155051, "loss": 1.4151, "step": 3209 }, { "epoch": 1.4719642242862057, "grad_norm": 0.10846666991710663, "learning_rate": 0.0005394579068998559, "loss": 0.8251, "step": 3210 }, { "epoch": 1.4724228872835683, "grad_norm": 0.3210654854774475, "learning_rate": 0.0005392110319048975, "loss": 1.7081, "step": 3211 }, { "epoch": 1.4728815502809312, "grad_norm": 0.1869962215423584, "learning_rate": 0.0005389641472911904, "loss": 1.0442, "step": 3212 }, { "epoch": 1.4733402132782938, "grad_norm": 0.3858005404472351, "learning_rate": 0.000538717253119297, "loss": 1.7048, "step": 3213 }, { "epoch": 1.4737988762756564, "grad_norm": 0.22492477297782898, "learning_rate": 0.0005384703494497821, "loss": 1.5041, "step": 3214 }, { "epoch": 1.4742575392730193, "grad_norm": 0.3225257694721222, "learning_rate": 0.0005382234363432134, "loss": 1.8363, "step": 3215 }, { "epoch": 1.474716202270382, "grad_norm": 0.37609943747520447, "learning_rate": 0.0005379765138601598, "loss": 2.0479, "step": 3216 }, { "epoch": 1.4751748652677446, "grad_norm": 0.38782861828804016, "learning_rate": 0.0005377295820611939, "loss": 1.3388, "step": 3217 }, { "epoch": 1.4756335282651072, "grad_norm": 0.40626588463783264, "learning_rate": 0.0005374826410068891, "loss": 1.9222, "step": 3218 }, { "epoch": 1.4760921912624698, "grad_norm": 0.4013312757015228, "learning_rate": 0.0005372356907578224, "loss": 2.2166, "step": 3219 }, { "epoch": 1.4765508542598327, "grad_norm": 0.2755921483039856, "learning_rate": 0.0005369887313745723, "loss": 1.2296, "step": 3220 }, { "epoch": 1.4770095172571953, "grad_norm": 0.17257989943027496, "learning_rate": 0.0005367417629177196, "loss": 0.9679, "step": 3221 }, { "epoch": 1.477468180254558, "grad_norm": 0.28084853291511536, "learning_rate": 0.0005364947854478476, "loss": 1.3068, "step": 3222 }, { "epoch": 1.4779268432519206, "grad_norm": 0.24307353794574738, "learning_rate": 0.0005362477990255416, "loss": 0.8342, "step": 3223 }, { "epoch": 1.4783855062492832, "grad_norm": 0.22293055057525635, "learning_rate": 0.000536000803711389, "loss": 1.3204, "step": 3224 }, { "epoch": 1.478844169246646, "grad_norm": 0.2704020142555237, "learning_rate": 0.0005357537995659798, "loss": 1.0284, "step": 3225 }, { "epoch": 1.4793028322440087, "grad_norm": 0.22363778948783875, "learning_rate": 0.0005355067866499059, "loss": 1.4955, "step": 3226 }, { "epoch": 1.4797614952413713, "grad_norm": 0.3845861256122589, "learning_rate": 0.000535259765023761, "loss": 1.5793, "step": 3227 }, { "epoch": 1.4802201582387342, "grad_norm": 0.2982635498046875, "learning_rate": 0.0005350127347481414, "loss": 1.2065, "step": 3228 }, { "epoch": 1.4806788212360968, "grad_norm": 0.18739233911037445, "learning_rate": 0.0005347656958836457, "loss": 1.3103, "step": 3229 }, { "epoch": 1.4811374842334595, "grad_norm": 0.34681299328804016, "learning_rate": 0.0005345186484908741, "loss": 1.4734, "step": 3230 }, { "epoch": 1.4815961472308221, "grad_norm": 0.3014410734176636, "learning_rate": 0.0005342715926304291, "loss": 1.6099, "step": 3231 }, { "epoch": 1.4820548102281847, "grad_norm": 0.4709073603153229, "learning_rate": 0.0005340245283629155, "loss": 1.7924, "step": 3232 }, { "epoch": 1.4825134732255476, "grad_norm": 0.37753811478614807, "learning_rate": 0.0005337774557489394, "loss": 1.3316, "step": 3233 }, { "epoch": 1.4829721362229102, "grad_norm": 0.31126853823661804, "learning_rate": 0.0005335303748491101, "loss": 0.7645, "step": 3234 }, { "epoch": 1.4834307992202729, "grad_norm": 0.10572401434183121, "learning_rate": 0.0005332832857240381, "loss": 0.9926, "step": 3235 }, { "epoch": 1.4838894622176355, "grad_norm": 0.38704633712768555, "learning_rate": 0.0005330361884343361, "loss": 2.0361, "step": 3236 }, { "epoch": 1.4843481252149981, "grad_norm": 0.2931767702102661, "learning_rate": 0.0005327890830406189, "loss": 0.9363, "step": 3237 }, { "epoch": 1.484806788212361, "grad_norm": 0.11565959453582764, "learning_rate": 0.000532541969603503, "loss": 0.9478, "step": 3238 }, { "epoch": 1.4852654512097236, "grad_norm": 0.3610280156135559, "learning_rate": 0.0005322948481836075, "loss": 1.6384, "step": 3239 }, { "epoch": 1.4857241142070863, "grad_norm": 0.32327285408973694, "learning_rate": 0.0005320477188415529, "loss": 0.8816, "step": 3240 }, { "epoch": 1.4861827772044491, "grad_norm": 0.20238777995109558, "learning_rate": 0.0005318005816379618, "loss": 1.2931, "step": 3241 }, { "epoch": 1.4866414402018118, "grad_norm": 0.29169613122940063, "learning_rate": 0.0005315534366334587, "loss": 1.1745, "step": 3242 }, { "epoch": 1.4871001031991744, "grad_norm": 0.34999263286590576, "learning_rate": 0.00053130628388867, "loss": 1.7833, "step": 3243 }, { "epoch": 1.487558766196537, "grad_norm": 0.25474709272384644, "learning_rate": 0.0005310591234642242, "loss": 1.2222, "step": 3244 }, { "epoch": 1.4880174291938997, "grad_norm": 0.333258718252182, "learning_rate": 0.0005308119554207515, "loss": 1.3405, "step": 3245 }, { "epoch": 1.4884760921912625, "grad_norm": 0.19669318199157715, "learning_rate": 0.0005305647798188839, "loss": 1.2637, "step": 3246 }, { "epoch": 1.4889347551886252, "grad_norm": 0.25908851623535156, "learning_rate": 0.0005303175967192555, "loss": 1.5399, "step": 3247 }, { "epoch": 1.4893934181859878, "grad_norm": 0.32331693172454834, "learning_rate": 0.0005300704061825019, "loss": 1.6418, "step": 3248 }, { "epoch": 1.4898520811833507, "grad_norm": 0.2600635588169098, "learning_rate": 0.0005298232082692609, "loss": 1.0823, "step": 3249 }, { "epoch": 1.4903107441807133, "grad_norm": 0.2755780518054962, "learning_rate": 0.000529576003040172, "loss": 1.6127, "step": 3250 }, { "epoch": 1.490769407178076, "grad_norm": 0.33199846744537354, "learning_rate": 0.0005293287905558762, "loss": 1.6889, "step": 3251 }, { "epoch": 1.4912280701754386, "grad_norm": 0.24848337471485138, "learning_rate": 0.0005290815708770166, "loss": 0.8654, "step": 3252 }, { "epoch": 1.4916867331728012, "grad_norm": 0.2537640333175659, "learning_rate": 0.0005288343440642379, "loss": 1.316, "step": 3253 }, { "epoch": 1.492145396170164, "grad_norm": 0.22786585986614227, "learning_rate": 0.0005285871101781868, "loss": 1.1967, "step": 3254 }, { "epoch": 1.4926040591675267, "grad_norm": 0.30544570088386536, "learning_rate": 0.0005283398692795114, "loss": 1.6742, "step": 3255 }, { "epoch": 1.4930627221648893, "grad_norm": 1.4452085494995117, "learning_rate": 0.0005280926214288617, "loss": 0.8225, "step": 3256 }, { "epoch": 1.493521385162252, "grad_norm": 0.2699876129627228, "learning_rate": 0.0005278453666868896, "loss": 1.3572, "step": 3257 }, { "epoch": 1.4939800481596146, "grad_norm": 0.35777711868286133, "learning_rate": 0.0005275981051142481, "loss": 2.1099, "step": 3258 }, { "epoch": 1.4944387111569775, "grad_norm": 0.19279718399047852, "learning_rate": 0.0005273508367715923, "loss": 0.8453, "step": 3259 }, { "epoch": 1.49489737415434, "grad_norm": 0.29106536507606506, "learning_rate": 0.0005271035617195793, "loss": 1.2269, "step": 3260 }, { "epoch": 1.4953560371517027, "grad_norm": 0.286878377199173, "learning_rate": 0.0005268562800188671, "loss": 1.6506, "step": 3261 }, { "epoch": 1.4958147001490656, "grad_norm": 0.3785189688205719, "learning_rate": 0.0005266089917301158, "loss": 1.5022, "step": 3262 }, { "epoch": 1.4962733631464282, "grad_norm": 0.25897216796875, "learning_rate": 0.0005263616969139868, "loss": 1.6508, "step": 3263 }, { "epoch": 1.4967320261437909, "grad_norm": 0.3450983166694641, "learning_rate": 0.0005261143956311435, "loss": 1.7249, "step": 3264 }, { "epoch": 1.4971906891411535, "grad_norm": 0.28310757875442505, "learning_rate": 0.0005258670879422508, "loss": 1.2171, "step": 3265 }, { "epoch": 1.4976493521385161, "grad_norm": 0.3658524751663208, "learning_rate": 0.0005256197739079749, "loss": 1.3394, "step": 3266 }, { "epoch": 1.498108015135879, "grad_norm": 0.3043535053730011, "learning_rate": 0.0005253724535889836, "loss": 1.3353, "step": 3267 }, { "epoch": 1.4985666781332416, "grad_norm": 0.2485763430595398, "learning_rate": 0.0005251251270459468, "loss": 1.0306, "step": 3268 }, { "epoch": 1.4990253411306043, "grad_norm": 0.26514461636543274, "learning_rate": 0.0005248777943395347, "loss": 1.0359, "step": 3269 }, { "epoch": 1.499484004127967, "grad_norm": 0.26036179065704346, "learning_rate": 0.0005246304555304205, "loss": 1.3007, "step": 3270 }, { "epoch": 1.4999426671253295, "grad_norm": 0.20205332338809967, "learning_rate": 0.000524383110679278, "loss": 0.61, "step": 3271 }, { "epoch": 1.5004013301226924, "grad_norm": 0.36065131425857544, "learning_rate": 0.0005241357598467826, "loss": 2.1804, "step": 3272 }, { "epoch": 1.500859993120055, "grad_norm": 0.3411425054073334, "learning_rate": 0.0005238884030936112, "loss": 1.4267, "step": 3273 }, { "epoch": 1.5013186561174177, "grad_norm": 0.45014554262161255, "learning_rate": 0.0005236410404804425, "loss": 2.0813, "step": 3274 }, { "epoch": 1.5017773191147805, "grad_norm": 0.4235081076622009, "learning_rate": 0.000523393672067956, "loss": 1.2112, "step": 3275 }, { "epoch": 1.5022359821121432, "grad_norm": 0.26777932047843933, "learning_rate": 0.0005231462979168331, "loss": 0.8293, "step": 3276 }, { "epoch": 1.5026946451095058, "grad_norm": 0.20920783281326294, "learning_rate": 0.0005228989180877564, "loss": 1.123, "step": 3277 }, { "epoch": 1.5031533081068684, "grad_norm": 0.27009251713752747, "learning_rate": 0.0005226515326414099, "loss": 1.7735, "step": 3278 }, { "epoch": 1.503611971104231, "grad_norm": 0.3320116698741913, "learning_rate": 0.0005224041416384791, "loss": 1.6422, "step": 3279 }, { "epoch": 1.504070634101594, "grad_norm": 0.2971821129322052, "learning_rate": 0.0005221567451396509, "loss": 0.9693, "step": 3280 }, { "epoch": 1.5045292970989566, "grad_norm": 0.13652589917182922, "learning_rate": 0.0005219093432056133, "loss": 1.0251, "step": 3281 }, { "epoch": 1.5049879600963192, "grad_norm": 0.31048429012298584, "learning_rate": 0.000521661935897056, "loss": 1.3032, "step": 3282 }, { "epoch": 1.505446623093682, "grad_norm": 0.2567446827888489, "learning_rate": 0.0005214145232746696, "loss": 0.8873, "step": 3283 }, { "epoch": 1.5059052860910445, "grad_norm": 0.2890402674674988, "learning_rate": 0.000521167105399146, "loss": 1.3135, "step": 3284 }, { "epoch": 1.5063639490884073, "grad_norm": 0.3106888234615326, "learning_rate": 0.0005209196823311791, "loss": 1.3594, "step": 3285 }, { "epoch": 1.50682261208577, "grad_norm": 0.28741034865379333, "learning_rate": 0.0005206722541314631, "loss": 1.7812, "step": 3286 }, { "epoch": 1.5072812750831326, "grad_norm": 0.32389602065086365, "learning_rate": 0.0005204248208606942, "loss": 1.4911, "step": 3287 }, { "epoch": 1.5077399380804954, "grad_norm": 0.26342904567718506, "learning_rate": 0.0005201773825795694, "loss": 1.6143, "step": 3288 }, { "epoch": 1.508198601077858, "grad_norm": 0.21466577053070068, "learning_rate": 0.0005199299393487872, "loss": 0.3381, "step": 3289 }, { "epoch": 1.5086572640752207, "grad_norm": 0.3118995428085327, "learning_rate": 0.0005196824912290472, "loss": 1.2741, "step": 3290 }, { "epoch": 1.5091159270725836, "grad_norm": 0.3177085220813751, "learning_rate": 0.00051943503828105, "loss": 1.3895, "step": 3291 }, { "epoch": 1.509574590069946, "grad_norm": 0.14354930818080902, "learning_rate": 0.0005191875805654981, "loss": 1.14, "step": 3292 }, { "epoch": 1.5100332530673088, "grad_norm": 0.256320983171463, "learning_rate": 0.0005189401181430941, "loss": 0.592, "step": 3293 }, { "epoch": 1.5104919160646715, "grad_norm": 0.33321627974510193, "learning_rate": 0.0005186926510745427, "loss": 2.0759, "step": 3294 }, { "epoch": 1.5109505790620341, "grad_norm": 0.26053670048713684, "learning_rate": 0.0005184451794205491, "loss": 1.1047, "step": 3295 }, { "epoch": 1.511409242059397, "grad_norm": 0.408119261264801, "learning_rate": 0.00051819770324182, "loss": 1.6132, "step": 3296 }, { "epoch": 1.5118679050567594, "grad_norm": 0.2578348219394684, "learning_rate": 0.0005179502225990632, "loss": 0.9136, "step": 3297 }, { "epoch": 1.5123265680541222, "grad_norm": 0.2745778560638428, "learning_rate": 0.0005177027375529872, "loss": 1.7166, "step": 3298 }, { "epoch": 1.5127852310514849, "grad_norm": 0.3158656656742096, "learning_rate": 0.0005174552481643023, "loss": 1.1242, "step": 3299 }, { "epoch": 1.5132438940488475, "grad_norm": 0.270859032869339, "learning_rate": 0.0005172077544937188, "loss": 1.9039, "step": 3300 }, { "epoch": 1.5137025570462104, "grad_norm": 0.4259583652019501, "learning_rate": 0.0005169602566019493, "loss": 2.1599, "step": 3301 }, { "epoch": 1.514161220043573, "grad_norm": 0.33669549226760864, "learning_rate": 0.0005167127545497066, "loss": 2.24, "step": 3302 }, { "epoch": 1.5146198830409356, "grad_norm": 0.3532778024673462, "learning_rate": 0.0005164652483977044, "loss": 1.3746, "step": 3303 }, { "epoch": 1.5150785460382985, "grad_norm": 0.22524480521678925, "learning_rate": 0.0005162177382066584, "loss": 0.8802, "step": 3304 }, { "epoch": 1.515537209035661, "grad_norm": 0.21874026954174042, "learning_rate": 0.000515970224037284, "loss": 0.9228, "step": 3305 }, { "epoch": 1.5159958720330238, "grad_norm": 0.2486424446105957, "learning_rate": 0.0005157227059502987, "loss": 1.7778, "step": 3306 }, { "epoch": 1.5164545350303864, "grad_norm": 0.28031882643699646, "learning_rate": 0.0005154751840064203, "loss": 1.2578, "step": 3307 }, { "epoch": 1.516913198027749, "grad_norm": 0.4096969664096832, "learning_rate": 0.0005152276582663676, "loss": 2.0068, "step": 3308 }, { "epoch": 1.517371861025112, "grad_norm": 0.3127899467945099, "learning_rate": 0.0005149801287908604, "loss": 1.2456, "step": 3309 }, { "epoch": 1.5178305240224745, "grad_norm": 0.325255811214447, "learning_rate": 0.0005147325956406197, "loss": 1.6535, "step": 3310 }, { "epoch": 1.5182891870198372, "grad_norm": 0.06586357206106186, "learning_rate": 0.000514485058876367, "loss": 0.9711, "step": 3311 }, { "epoch": 1.5187478500171998, "grad_norm": 0.31240516901016235, "learning_rate": 0.000514237518558825, "loss": 1.6185, "step": 3312 }, { "epoch": 1.5192065130145624, "grad_norm": 0.29829588532447815, "learning_rate": 0.0005139899747487171, "loss": 1.2696, "step": 3313 }, { "epoch": 1.5196651760119253, "grad_norm": 0.25990763306617737, "learning_rate": 0.0005137424275067674, "loss": 1.3779, "step": 3314 }, { "epoch": 1.520123839009288, "grad_norm": 0.32825708389282227, "learning_rate": 0.0005134948768937011, "loss": 1.3303, "step": 3315 }, { "epoch": 1.5205825020066506, "grad_norm": 0.2681593894958496, "learning_rate": 0.0005132473229702444, "loss": 1.9851, "step": 3316 }, { "epoch": 1.5210411650040134, "grad_norm": 0.2627713680267334, "learning_rate": 0.0005129997657971236, "loss": 1.3954, "step": 3317 }, { "epoch": 1.5214998280013758, "grad_norm": 0.22389861941337585, "learning_rate": 0.0005127522054350667, "loss": 0.7996, "step": 3318 }, { "epoch": 1.5219584909987387, "grad_norm": 0.06943022459745407, "learning_rate": 0.0005125046419448019, "loss": 0.4316, "step": 3319 }, { "epoch": 1.5224171539961013, "grad_norm": 0.16455991566181183, "learning_rate": 0.0005122570753870582, "loss": 0.9303, "step": 3320 }, { "epoch": 1.522875816993464, "grad_norm": 0.22131837904453278, "learning_rate": 0.0005120095058225654, "loss": 1.1393, "step": 3321 }, { "epoch": 1.5233344799908268, "grad_norm": 0.2995634973049164, "learning_rate": 0.0005117619333120544, "loss": 1.0248, "step": 3322 }, { "epoch": 1.5237931429881895, "grad_norm": 0.2665209472179413, "learning_rate": 0.0005115143579162561, "loss": 1.1959, "step": 3323 }, { "epoch": 1.524251805985552, "grad_norm": 0.41921502351760864, "learning_rate": 0.000511266779695903, "loss": 1.6565, "step": 3324 }, { "epoch": 1.524710468982915, "grad_norm": 0.3324905037879944, "learning_rate": 0.0005110191987117277, "loss": 2.2944, "step": 3325 }, { "epoch": 1.5251691319802774, "grad_norm": 0.22564005851745605, "learning_rate": 0.0005107716150244634, "loss": 0.761, "step": 3326 }, { "epoch": 1.5256277949776402, "grad_norm": 0.25976645946502686, "learning_rate": 0.0005105240286948442, "loss": 1.4489, "step": 3327 }, { "epoch": 1.5260864579750029, "grad_norm": 0.3188987076282501, "learning_rate": 0.0005102764397836049, "loss": 1.4666, "step": 3328 }, { "epoch": 1.5265451209723655, "grad_norm": 0.1813998967409134, "learning_rate": 0.0005100288483514809, "loss": 0.9902, "step": 3329 }, { "epoch": 1.5270037839697284, "grad_norm": 0.24019388854503632, "learning_rate": 0.000509781254459208, "loss": 1.1389, "step": 3330 }, { "epoch": 1.5274624469670908, "grad_norm": 0.27883660793304443, "learning_rate": 0.0005095336581675228, "loss": 1.225, "step": 3331 }, { "epoch": 1.5279211099644536, "grad_norm": 0.3314226269721985, "learning_rate": 0.0005092860595371627, "loss": 2.3223, "step": 3332 }, { "epoch": 1.5283797729618163, "grad_norm": 0.3248099982738495, "learning_rate": 0.000509038458628865, "loss": 1.6426, "step": 3333 }, { "epoch": 1.528838435959179, "grad_norm": 0.3302631378173828, "learning_rate": 0.0005087908555033683, "loss": 1.1074, "step": 3334 }, { "epoch": 1.5292970989565418, "grad_norm": 0.2534301280975342, "learning_rate": 0.0005085432502214116, "loss": 1.6361, "step": 3335 }, { "epoch": 1.5297557619539044, "grad_norm": 0.4103301167488098, "learning_rate": 0.0005082956428437337, "loss": 1.5873, "step": 3336 }, { "epoch": 1.530214424951267, "grad_norm": 0.17628952860832214, "learning_rate": 0.000508048033431075, "loss": 0.789, "step": 3337 }, { "epoch": 1.5306730879486299, "grad_norm": 0.28603991866111755, "learning_rate": 0.0005078004220441756, "loss": 1.2981, "step": 3338 }, { "epoch": 1.5311317509459923, "grad_norm": 0.30459219217300415, "learning_rate": 0.0005075528087437764, "loss": 1.3026, "step": 3339 }, { "epoch": 1.5315904139433552, "grad_norm": 0.09923279285430908, "learning_rate": 0.0005073051935906188, "loss": 0.6987, "step": 3340 }, { "epoch": 1.5320490769407178, "grad_norm": 0.28860870003700256, "learning_rate": 0.0005070575766454445, "loss": 0.8572, "step": 3341 }, { "epoch": 1.5325077399380804, "grad_norm": 0.2449285089969635, "learning_rate": 0.0005068099579689958, "loss": 1.2982, "step": 3342 }, { "epoch": 1.5329664029354433, "grad_norm": 0.26312926411628723, "learning_rate": 0.0005065623376220154, "loss": 1.7064, "step": 3343 }, { "epoch": 1.533425065932806, "grad_norm": 0.36778807640075684, "learning_rate": 0.0005063147156652461, "loss": 1.5251, "step": 3344 }, { "epoch": 1.5338837289301686, "grad_norm": 0.2748435437679291, "learning_rate": 0.0005060670921594316, "loss": 1.427, "step": 3345 }, { "epoch": 1.5343423919275314, "grad_norm": 0.18528130650520325, "learning_rate": 0.0005058194671653156, "loss": 0.4789, "step": 3346 }, { "epoch": 1.5348010549248938, "grad_norm": 0.3140660524368286, "learning_rate": 0.0005055718407436424, "loss": 1.6775, "step": 3347 }, { "epoch": 1.5352597179222567, "grad_norm": 0.37981608510017395, "learning_rate": 0.0005053242129551564, "loss": 1.5612, "step": 3348 }, { "epoch": 1.5357183809196193, "grad_norm": 0.2672194242477417, "learning_rate": 0.0005050765838606027, "loss": 1.0255, "step": 3349 }, { "epoch": 1.536177043916982, "grad_norm": 0.08587975054979324, "learning_rate": 0.0005048289535207264, "loss": 0.9292, "step": 3350 }, { "epoch": 1.5366357069143448, "grad_norm": 0.3729512691497803, "learning_rate": 0.0005045813219962728, "loss": 1.7538, "step": 3351 }, { "epoch": 1.5370943699117072, "grad_norm": 0.29773613810539246, "learning_rate": 0.0005043336893479879, "loss": 1.9409, "step": 3352 }, { "epoch": 1.53755303290907, "grad_norm": 0.27870243787765503, "learning_rate": 0.0005040860556366179, "loss": 0.8977, "step": 3353 }, { "epoch": 1.5380116959064327, "grad_norm": 0.13409045338630676, "learning_rate": 0.0005038384209229089, "loss": 0.6826, "step": 3354 }, { "epoch": 1.5384703589037954, "grad_norm": 0.22798749804496765, "learning_rate": 0.0005035907852676076, "loss": 1.1876, "step": 3355 }, { "epoch": 1.5389290219011582, "grad_norm": 0.3014310300350189, "learning_rate": 0.0005033431487314608, "loss": 1.7822, "step": 3356 }, { "epoch": 1.5393876848985208, "grad_norm": 0.19987566769123077, "learning_rate": 0.0005030955113752155, "loss": 0.8876, "step": 3357 }, { "epoch": 1.5398463478958835, "grad_norm": 0.2765864431858063, "learning_rate": 0.0005028478732596189, "loss": 1.7389, "step": 3358 }, { "epoch": 1.5403050108932463, "grad_norm": 0.26134082674980164, "learning_rate": 0.0005026002344454184, "loss": 0.8819, "step": 3359 }, { "epoch": 1.5407636738906088, "grad_norm": 0.1979461908340454, "learning_rate": 0.0005023525949933618, "loss": 1.2951, "step": 3360 }, { "epoch": 1.5412223368879716, "grad_norm": 0.5640215873718262, "learning_rate": 0.0005021049549641967, "loss": 1.6044, "step": 3361 }, { "epoch": 1.5416809998853342, "grad_norm": 0.4211662709712982, "learning_rate": 0.0005018573144186708, "loss": 1.8833, "step": 3362 }, { "epoch": 1.5421396628826969, "grad_norm": 0.28429386019706726, "learning_rate": 0.0005016096734175324, "loss": 1.2702, "step": 3363 }, { "epoch": 1.5425983258800597, "grad_norm": 0.20547764003276825, "learning_rate": 0.0005013620320215294, "loss": 1.0284, "step": 3364 }, { "epoch": 1.5430569888774222, "grad_norm": 0.3609144985675812, "learning_rate": 0.0005011143902914102, "loss": 1.1247, "step": 3365 }, { "epoch": 1.543515651874785, "grad_norm": 0.3178929388523102, "learning_rate": 0.000500866748287923, "loss": 1.6366, "step": 3366 }, { "epoch": 1.5439743148721476, "grad_norm": 0.22963233292102814, "learning_rate": 0.0005006191060718163, "loss": 1.7095, "step": 3367 }, { "epoch": 1.5444329778695103, "grad_norm": 0.29375985264778137, "learning_rate": 0.0005003714637038381, "loss": 1.2357, "step": 3368 }, { "epoch": 1.5448916408668731, "grad_norm": 0.26059386134147644, "learning_rate": 0.0005001238212447376, "loss": 1.4654, "step": 3369 }, { "epoch": 1.5453503038642358, "grad_norm": 0.2658666670322418, "learning_rate": 0.0004998761787552626, "loss": 1.481, "step": 3370 }, { "epoch": 1.5458089668615984, "grad_norm": 0.26609566807746887, "learning_rate": 0.0004996285362961619, "loss": 1.1959, "step": 3371 }, { "epoch": 1.5462676298589613, "grad_norm": 0.2814446985721588, "learning_rate": 0.0004993808939281839, "loss": 1.764, "step": 3372 }, { "epoch": 1.5467262928563237, "grad_norm": 0.35369497537612915, "learning_rate": 0.0004991332517120771, "loss": 1.0037, "step": 3373 }, { "epoch": 1.5471849558536865, "grad_norm": 0.2843606173992157, "learning_rate": 0.00049888560970859, "loss": 1.7404, "step": 3374 }, { "epoch": 1.5476436188510492, "grad_norm": 0.20866908133029938, "learning_rate": 0.0004986379679784707, "loss": 0.5552, "step": 3375 }, { "epoch": 1.5481022818484118, "grad_norm": 0.25553098320961, "learning_rate": 0.0004983903265824677, "loss": 1.1967, "step": 3376 }, { "epoch": 1.5485609448457747, "grad_norm": 0.26248809695243835, "learning_rate": 0.0004981426855813293, "loss": 1.6772, "step": 3377 }, { "epoch": 1.5490196078431373, "grad_norm": 0.35352829098701477, "learning_rate": 0.0004978950450358036, "loss": 1.5785, "step": 3378 }, { "epoch": 1.5494782708405, "grad_norm": 0.26842719316482544, "learning_rate": 0.0004976474050066384, "loss": 1.6119, "step": 3379 }, { "epoch": 1.5499369338378628, "grad_norm": 0.17810232937335968, "learning_rate": 0.0004973997655545817, "loss": 1.106, "step": 3380 }, { "epoch": 1.5503955968352252, "grad_norm": 0.2810850143432617, "learning_rate": 0.0004971521267403812, "loss": 1.1019, "step": 3381 }, { "epoch": 1.550854259832588, "grad_norm": 0.34839344024658203, "learning_rate": 0.0004969044886247846, "loss": 1.548, "step": 3382 }, { "epoch": 1.5513129228299507, "grad_norm": 0.2651059925556183, "learning_rate": 0.0004966568512685392, "loss": 1.3282, "step": 3383 }, { "epoch": 1.5517715858273133, "grad_norm": 0.2800137996673584, "learning_rate": 0.0004964092147323925, "loss": 2.1348, "step": 3384 }, { "epoch": 1.5522302488246762, "grad_norm": 0.223674938082695, "learning_rate": 0.0004961615790770912, "loss": 0.8656, "step": 3385 }, { "epoch": 1.5526889118220386, "grad_norm": 0.3796307146549225, "learning_rate": 0.0004959139443633823, "loss": 2.2166, "step": 3386 }, { "epoch": 1.5531475748194015, "grad_norm": 0.3234909176826477, "learning_rate": 0.0004956663106520121, "loss": 1.7649, "step": 3387 }, { "epoch": 1.553606237816764, "grad_norm": 0.3189575672149658, "learning_rate": 0.0004954186780037273, "loss": 1.5074, "step": 3388 }, { "epoch": 1.5540649008141267, "grad_norm": 0.2766447365283966, "learning_rate": 0.0004951710464792736, "loss": 1.6479, "step": 3389 }, { "epoch": 1.5545235638114896, "grad_norm": 0.35292649269104004, "learning_rate": 0.0004949234161393974, "loss": 1.7307, "step": 3390 }, { "epoch": 1.5549822268088522, "grad_norm": 0.24963252246379852, "learning_rate": 0.0004946757870448437, "loss": 0.7278, "step": 3391 }, { "epoch": 1.5554408898062149, "grad_norm": 0.17716290056705475, "learning_rate": 0.0004944281592563577, "loss": 0.7671, "step": 3392 }, { "epoch": 1.5558995528035777, "grad_norm": 0.234075665473938, "learning_rate": 0.0004941805328346845, "loss": 1.2981, "step": 3393 }, { "epoch": 1.5563582158009401, "grad_norm": 0.4235064685344696, "learning_rate": 0.0004939329078405683, "loss": 2.1018, "step": 3394 }, { "epoch": 1.556816878798303, "grad_norm": 0.31396913528442383, "learning_rate": 0.0004936852843347541, "loss": 1.6549, "step": 3395 }, { "epoch": 1.5572755417956656, "grad_norm": 0.275888592004776, "learning_rate": 0.0004934376623779848, "loss": 1.3459, "step": 3396 }, { "epoch": 1.5577342047930283, "grad_norm": 0.36937257647514343, "learning_rate": 0.0004931900420310042, "loss": 2.2034, "step": 3397 }, { "epoch": 1.5581928677903911, "grad_norm": 0.45881137251853943, "learning_rate": 0.0004929424233545556, "loss": 1.7052, "step": 3398 }, { "epoch": 1.5586515307877535, "grad_norm": 0.26412880420684814, "learning_rate": 0.0004926948064093811, "loss": 1.5784, "step": 3399 }, { "epoch": 1.5591101937851164, "grad_norm": 0.35005372762680054, "learning_rate": 0.0004924471912562236, "loss": 1.5, "step": 3400 }, { "epoch": 1.559568856782479, "grad_norm": 0.27285388112068176, "learning_rate": 0.0004921995779558246, "loss": 0.7449, "step": 3401 }, { "epoch": 1.5600275197798417, "grad_norm": 0.35796064138412476, "learning_rate": 0.0004919519665689252, "loss": 1.644, "step": 3402 }, { "epoch": 1.5604861827772045, "grad_norm": 0.25865140557289124, "learning_rate": 0.0004917043571562664, "loss": 1.5549, "step": 3403 }, { "epoch": 1.5609448457745672, "grad_norm": 0.3593021631240845, "learning_rate": 0.0004914567497785885, "loss": 1.1608, "step": 3404 }, { "epoch": 1.5614035087719298, "grad_norm": 0.2613557279109955, "learning_rate": 0.0004912091444966316, "loss": 1.0942, "step": 3405 }, { "epoch": 1.5618621717692927, "grad_norm": 0.20529672503471375, "learning_rate": 0.0004909615413711351, "loss": 1.1398, "step": 3406 }, { "epoch": 1.562320834766655, "grad_norm": 0.3400242030620575, "learning_rate": 0.0004907139404628375, "loss": 1.6556, "step": 3407 }, { "epoch": 1.562779497764018, "grad_norm": 0.19008702039718628, "learning_rate": 0.0004904663418324772, "loss": 0.3512, "step": 3408 }, { "epoch": 1.5632381607613806, "grad_norm": 0.28319889307022095, "learning_rate": 0.0004902187455407921, "loss": 1.2299, "step": 3409 }, { "epoch": 1.5636968237587432, "grad_norm": 0.19672320783138275, "learning_rate": 0.0004899711516485192, "loss": 1.1458, "step": 3410 }, { "epoch": 1.564155486756106, "grad_norm": 0.38868579268455505, "learning_rate": 0.0004897235602163952, "loss": 1.691, "step": 3411 }, { "epoch": 1.5646141497534687, "grad_norm": 0.33131128549575806, "learning_rate": 0.0004894759713051561, "loss": 1.6458, "step": 3412 }, { "epoch": 1.5650728127508313, "grad_norm": 0.31250569224357605, "learning_rate": 0.0004892283849755368, "loss": 1.0369, "step": 3413 }, { "epoch": 1.5655314757481942, "grad_norm": 0.3426419198513031, "learning_rate": 0.0004889808012882725, "loss": 1.7825, "step": 3414 }, { "epoch": 1.5659901387455566, "grad_norm": 0.10699386149644852, "learning_rate": 0.000488733220304097, "loss": 0.5646, "step": 3415 }, { "epoch": 1.5664488017429194, "grad_norm": 0.22663241624832153, "learning_rate": 0.0004884856420837438, "loss": 1.1749, "step": 3416 }, { "epoch": 1.566907464740282, "grad_norm": 0.21530061960220337, "learning_rate": 0.0004882380666879457, "loss": 1.2376, "step": 3417 }, { "epoch": 1.5673661277376447, "grad_norm": 0.4145222306251526, "learning_rate": 0.0004879904941774347, "loss": 1.5345, "step": 3418 }, { "epoch": 1.5678247907350076, "grad_norm": 0.28199657797813416, "learning_rate": 0.00048774292461294203, "loss": 1.5359, "step": 3419 }, { "epoch": 1.56828345373237, "grad_norm": 0.35698872804641724, "learning_rate": 0.00048749535805519824, "loss": 1.5125, "step": 3420 }, { "epoch": 1.5687421167297328, "grad_norm": 0.08986271172761917, "learning_rate": 0.00048724779456493333, "loss": 0.5488, "step": 3421 }, { "epoch": 1.5692007797270955, "grad_norm": 0.28814515471458435, "learning_rate": 0.00048700023420287635, "loss": 1.4119, "step": 3422 }, { "epoch": 1.5696594427244581, "grad_norm": 0.33969786763191223, "learning_rate": 0.0004867526770297558, "loss": 2.0642, "step": 3423 }, { "epoch": 1.570118105721821, "grad_norm": 0.18406681716442108, "learning_rate": 0.00048650512310629895, "loss": 0.7826, "step": 3424 }, { "epoch": 1.5705767687191836, "grad_norm": 0.2164044976234436, "learning_rate": 0.0004862575724932327, "loss": 1.1195, "step": 3425 }, { "epoch": 1.5710354317165462, "grad_norm": 0.3818514943122864, "learning_rate": 0.00048601002525128304, "loss": 1.6297, "step": 3426 }, { "epoch": 1.571494094713909, "grad_norm": 0.27068087458610535, "learning_rate": 0.000485762481441175, "loss": 1.4949, "step": 3427 }, { "epoch": 1.5719527577112715, "grad_norm": 0.2720354497432709, "learning_rate": 0.000485514941123633, "loss": 1.3561, "step": 3428 }, { "epoch": 1.5724114207086344, "grad_norm": 0.273271769285202, "learning_rate": 0.00048526740435938045, "loss": 1.4606, "step": 3429 }, { "epoch": 1.572870083705997, "grad_norm": 0.06212342157959938, "learning_rate": 0.0004850198712091397, "loss": 0.6363, "step": 3430 }, { "epoch": 1.5733287467033596, "grad_norm": 0.17056238651275635, "learning_rate": 0.0004847723417336326, "loss": 0.4197, "step": 3431 }, { "epoch": 1.5737874097007225, "grad_norm": 0.3277488648891449, "learning_rate": 0.00048452481599357985, "loss": 1.6985, "step": 3432 }, { "epoch": 1.574246072698085, "grad_norm": 0.29063600301742554, "learning_rate": 0.00048427729404970133, "loss": 1.446, "step": 3433 }, { "epoch": 1.5747047356954478, "grad_norm": 0.2664231061935425, "learning_rate": 0.00048402977596271604, "loss": 1.5747, "step": 3434 }, { "epoch": 1.5751633986928104, "grad_norm": 0.28931164741516113, "learning_rate": 0.0004837822617933417, "loss": 1.275, "step": 3435 }, { "epoch": 1.575622061690173, "grad_norm": 0.3391111195087433, "learning_rate": 0.0004835347516022956, "loss": 2.0002, "step": 3436 }, { "epoch": 1.576080724687536, "grad_norm": 0.3661150336265564, "learning_rate": 0.00048328724545029355, "loss": 1.0988, "step": 3437 }, { "epoch": 1.5765393876848985, "grad_norm": 0.1353694349527359, "learning_rate": 0.00048303974339805074, "loss": 0.6863, "step": 3438 }, { "epoch": 1.5769980506822612, "grad_norm": 0.14725974202156067, "learning_rate": 0.00048279224550628117, "loss": 1.3112, "step": 3439 }, { "epoch": 1.577456713679624, "grad_norm": 0.2503822147846222, "learning_rate": 0.000482544751835698, "loss": 1.1244, "step": 3440 }, { "epoch": 1.5779153766769864, "grad_norm": 0.55783611536026, "learning_rate": 0.0004822972624470128, "loss": 1.9937, "step": 3441 }, { "epoch": 1.5783740396743493, "grad_norm": 0.2923833429813385, "learning_rate": 0.00048204977740093694, "loss": 1.0448, "step": 3442 }, { "epoch": 1.578832702671712, "grad_norm": 0.09032203257083893, "learning_rate": 0.00048180229675817997, "loss": 0.529, "step": 3443 }, { "epoch": 1.5792913656690746, "grad_norm": 0.12156753242015839, "learning_rate": 0.00048155482057945094, "loss": 0.9733, "step": 3444 }, { "epoch": 1.5797500286664374, "grad_norm": 0.3344416916370392, "learning_rate": 0.00048130734892545737, "loss": 1.7934, "step": 3445 }, { "epoch": 1.5802086916638, "grad_norm": 0.35299161076545715, "learning_rate": 0.00048105988185690596, "loss": 1.5703, "step": 3446 }, { "epoch": 1.5806673546611627, "grad_norm": 0.22280535101890564, "learning_rate": 0.00048081241943450205, "loss": 1.6415, "step": 3447 }, { "epoch": 1.5811260176585256, "grad_norm": 0.34450089931488037, "learning_rate": 0.00048056496171895, "loss": 1.4304, "step": 3448 }, { "epoch": 1.581584680655888, "grad_norm": 0.2558358311653137, "learning_rate": 0.0004803175087709529, "loss": 1.5538, "step": 3449 }, { "epoch": 1.5820433436532508, "grad_norm": 0.2851111888885498, "learning_rate": 0.00048007006065121287, "loss": 1.8889, "step": 3450 }, { "epoch": 1.5825020066506135, "grad_norm": 0.21149995923042297, "learning_rate": 0.0004798226174204308, "loss": 0.9199, "step": 3451 }, { "epoch": 1.582960669647976, "grad_norm": 0.31764793395996094, "learning_rate": 0.00047957517913930597, "loss": 1.7222, "step": 3452 }, { "epoch": 1.583419332645339, "grad_norm": 0.27936065196990967, "learning_rate": 0.00047932774586853706, "loss": 1.4536, "step": 3453 }, { "epoch": 1.5838779956427014, "grad_norm": 0.35680124163627625, "learning_rate": 0.0004790803176688211, "loss": 1.7658, "step": 3454 }, { "epoch": 1.5843366586400642, "grad_norm": 0.2759742736816406, "learning_rate": 0.00047883289460085406, "loss": 1.122, "step": 3455 }, { "epoch": 1.5847953216374269, "grad_norm": 0.36273276805877686, "learning_rate": 0.0004785854767253305, "loss": 1.7786, "step": 3456 }, { "epoch": 1.5852539846347895, "grad_norm": 0.2069203108549118, "learning_rate": 0.00047833806410294417, "loss": 0.7401, "step": 3457 }, { "epoch": 1.5857126476321524, "grad_norm": 0.2959374487400055, "learning_rate": 0.00047809065679438675, "loss": 1.3228, "step": 3458 }, { "epoch": 1.586171310629515, "grad_norm": 0.25699517130851746, "learning_rate": 0.00047784325486034917, "loss": 1.2399, "step": 3459 }, { "epoch": 1.5866299736268776, "grad_norm": 0.22081950306892395, "learning_rate": 0.00047759585836152095, "loss": 1.1349, "step": 3460 }, { "epoch": 1.5870886366242405, "grad_norm": 0.304353266954422, "learning_rate": 0.0004773484673585901, "loss": 1.9224, "step": 3461 }, { "epoch": 1.587547299621603, "grad_norm": 0.3038237392902374, "learning_rate": 0.00047710108191224387, "loss": 1.6517, "step": 3462 }, { "epoch": 1.5880059626189658, "grad_norm": 0.24518848955631256, "learning_rate": 0.00047685370208316717, "loss": 0.8, "step": 3463 }, { "epoch": 1.5884646256163284, "grad_norm": 0.2411755472421646, "learning_rate": 0.0004766063279320442, "loss": 1.7107, "step": 3464 }, { "epoch": 1.588923288613691, "grad_norm": 0.31453174352645874, "learning_rate": 0.0004763589595195576, "loss": 1.9206, "step": 3465 }, { "epoch": 1.589381951611054, "grad_norm": 0.36513078212738037, "learning_rate": 0.00047611159690638867, "loss": 1.6099, "step": 3466 }, { "epoch": 1.5898406146084163, "grad_norm": 0.2547532916069031, "learning_rate": 0.00047586424015321735, "loss": 1.4818, "step": 3467 }, { "epoch": 1.5902992776057792, "grad_norm": 0.35492321848869324, "learning_rate": 0.00047561688932072215, "loss": 1.6364, "step": 3468 }, { "epoch": 1.5907579406031418, "grad_norm": 0.2952052652835846, "learning_rate": 0.00047536954446957957, "loss": 1.6503, "step": 3469 }, { "epoch": 1.5912166036005044, "grad_norm": 0.3726816773414612, "learning_rate": 0.0004751222056604654, "loss": 2.0603, "step": 3470 }, { "epoch": 1.5916752665978673, "grad_norm": 0.33943477272987366, "learning_rate": 0.0004748748729540534, "loss": 1.558, "step": 3471 }, { "epoch": 1.59213392959523, "grad_norm": 0.35351845622062683, "learning_rate": 0.00047462754641101635, "loss": 1.6615, "step": 3472 }, { "epoch": 1.5925925925925926, "grad_norm": 0.40344950556755066, "learning_rate": 0.00047438022609202536, "loss": 1.6789, "step": 3473 }, { "epoch": 1.5930512555899554, "grad_norm": 0.16976001858711243, "learning_rate": 0.0004741329120577494, "loss": 0.945, "step": 3474 }, { "epoch": 1.5935099185873178, "grad_norm": 0.25279539823532104, "learning_rate": 0.00047388560436885656, "loss": 1.0529, "step": 3475 }, { "epoch": 1.5939685815846807, "grad_norm": 0.22543933987617493, "learning_rate": 0.0004736383030860132, "loss": 1.2178, "step": 3476 }, { "epoch": 1.5944272445820433, "grad_norm": 0.33394721150398254, "learning_rate": 0.00047339100826988427, "loss": 1.3074, "step": 3477 }, { "epoch": 1.594885907579406, "grad_norm": 0.30631691217422485, "learning_rate": 0.0004731437199811329, "loss": 1.2521, "step": 3478 }, { "epoch": 1.5953445705767688, "grad_norm": 0.20698705315589905, "learning_rate": 0.0004728964382804209, "loss": 1.071, "step": 3479 }, { "epoch": 1.5958032335741315, "grad_norm": 0.5738452076911926, "learning_rate": 0.00047264916322840774, "loss": 1.965, "step": 3480 }, { "epoch": 1.596261896571494, "grad_norm": 0.42928802967071533, "learning_rate": 0.000472401894885752, "loss": 1.6219, "step": 3481 }, { "epoch": 1.596720559568857, "grad_norm": 0.30077821016311646, "learning_rate": 0.00047215463331311047, "loss": 1.8263, "step": 3482 }, { "epoch": 1.5971792225662194, "grad_norm": 0.27442824840545654, "learning_rate": 0.00047190737857113823, "loss": 1.48, "step": 3483 }, { "epoch": 1.5976378855635822, "grad_norm": 0.3932221531867981, "learning_rate": 0.00047166013072048857, "loss": 1.9976, "step": 3484 }, { "epoch": 1.5980965485609449, "grad_norm": 0.30163654685020447, "learning_rate": 0.0004714128898218133, "loss": 1.4284, "step": 3485 }, { "epoch": 1.5985552115583075, "grad_norm": 0.29906541109085083, "learning_rate": 0.0004711656559357621, "loss": 1.1145, "step": 3486 }, { "epoch": 1.5990138745556703, "grad_norm": 0.2641544044017792, "learning_rate": 0.0004709184291229835, "loss": 1.3258, "step": 3487 }, { "epoch": 1.5994725375530328, "grad_norm": 0.2394290268421173, "learning_rate": 0.0004706712094441239, "loss": 1.6715, "step": 3488 }, { "epoch": 1.5999312005503956, "grad_norm": 0.33737921714782715, "learning_rate": 0.0004704239969598281, "loss": 1.3931, "step": 3489 }, { "epoch": 1.6003898635477583, "grad_norm": 0.2610185444355011, "learning_rate": 0.0004701767917307391, "loss": 1.7404, "step": 3490 }, { "epoch": 1.6008485265451209, "grad_norm": 0.22963584959506989, "learning_rate": 0.00046992959381749816, "loss": 1.2682, "step": 3491 }, { "epoch": 1.6013071895424837, "grad_norm": 0.28074997663497925, "learning_rate": 0.00046968240328074465, "loss": 1.2201, "step": 3492 }, { "epoch": 1.6017658525398464, "grad_norm": 0.29938602447509766, "learning_rate": 0.00046943522018111616, "loss": 1.4655, "step": 3493 }, { "epoch": 1.602224515537209, "grad_norm": 0.2991687059402466, "learning_rate": 0.0004691880445792486, "loss": 2.1379, "step": 3494 }, { "epoch": 1.6026831785345719, "grad_norm": 0.2304016351699829, "learning_rate": 0.0004689408765357758, "loss": 0.7851, "step": 3495 }, { "epoch": 1.6031418415319343, "grad_norm": 0.3397028148174286, "learning_rate": 0.0004686937161113301, "loss": 1.3103, "step": 3496 }, { "epoch": 1.6036005045292971, "grad_norm": 0.23544049263000488, "learning_rate": 0.0004684465633665415, "loss": 1.593, "step": 3497 }, { "epoch": 1.6040591675266598, "grad_norm": 0.2129555195569992, "learning_rate": 0.0004681994183620383, "loss": 0.743, "step": 3498 }, { "epoch": 1.6045178305240224, "grad_norm": 0.4064857065677643, "learning_rate": 0.0004679522811584471, "loss": 1.6642, "step": 3499 }, { "epoch": 1.6049764935213853, "grad_norm": 0.1782543659210205, "learning_rate": 0.0004677051518163925, "loss": 0.8695, "step": 3500 }, { "epoch": 1.6054351565187477, "grad_norm": 0.21518990397453308, "learning_rate": 0.00046745803039649703, "loss": 0.8586, "step": 3501 }, { "epoch": 1.6058938195161105, "grad_norm": 0.3452260494232178, "learning_rate": 0.0004672109169593813, "loss": 1.7291, "step": 3502 }, { "epoch": 1.6063524825134732, "grad_norm": 0.20058414340019226, "learning_rate": 0.00046696381156566405, "loss": 1.2682, "step": 3503 }, { "epoch": 1.6068111455108358, "grad_norm": 0.2975291609764099, "learning_rate": 0.00046671671427596194, "loss": 1.2484, "step": 3504 }, { "epoch": 1.6072698085081987, "grad_norm": 0.6816866397857666, "learning_rate": 0.0004664696251508899, "loss": 1.4316, "step": 3505 }, { "epoch": 1.6077284715055613, "grad_norm": 0.27953004837036133, "learning_rate": 0.00046622254425106053, "loss": 1.7111, "step": 3506 }, { "epoch": 1.608187134502924, "grad_norm": 0.34763795137405396, "learning_rate": 0.0004659754716370848, "loss": 1.1754, "step": 3507 }, { "epoch": 1.6086457975002868, "grad_norm": 0.2617149353027344, "learning_rate": 0.00046572840736957096, "loss": 1.636, "step": 3508 }, { "epoch": 1.6091044604976492, "grad_norm": 0.3082357943058014, "learning_rate": 0.00046548135150912596, "loss": 1.7413, "step": 3509 }, { "epoch": 1.609563123495012, "grad_norm": 0.2684236764907837, "learning_rate": 0.00046523430411635436, "loss": 0.8466, "step": 3510 }, { "epoch": 1.6100217864923747, "grad_norm": 0.2572011649608612, "learning_rate": 0.00046498726525185866, "loss": 1.6624, "step": 3511 }, { "epoch": 1.6104804494897373, "grad_norm": 0.27827000617980957, "learning_rate": 0.0004647402349762392, "loss": 1.3497, "step": 3512 }, { "epoch": 1.6109391124871002, "grad_norm": 0.2422563135623932, "learning_rate": 0.00046449321335009444, "loss": 0.9238, "step": 3513 }, { "epoch": 1.6113977754844628, "grad_norm": 0.2140582948923111, "learning_rate": 0.0004642462004340203, "loss": 1.4498, "step": 3514 }, { "epoch": 1.6118564384818255, "grad_norm": 0.3195685148239136, "learning_rate": 0.0004639991962886111, "loss": 1.6652, "step": 3515 }, { "epoch": 1.6123151014791883, "grad_norm": 0.40083667635917664, "learning_rate": 0.0004637522009744586, "loss": 1.7421, "step": 3516 }, { "epoch": 1.6127737644765507, "grad_norm": 0.2781684398651123, "learning_rate": 0.0004635052145521525, "loss": 1.3621, "step": 3517 }, { "epoch": 1.6132324274739136, "grad_norm": 0.3488773703575134, "learning_rate": 0.00046325823708228054, "loss": 1.6398, "step": 3518 }, { "epoch": 1.6136910904712762, "grad_norm": 0.3113149106502533, "learning_rate": 0.0004630112686254279, "loss": 1.9727, "step": 3519 }, { "epoch": 1.6141497534686389, "grad_norm": 0.3670024871826172, "learning_rate": 0.0004627643092421777, "loss": 1.7104, "step": 3520 }, { "epoch": 1.6146084164660017, "grad_norm": 0.2058316022157669, "learning_rate": 0.00046251735899311097, "loss": 1.31, "step": 3521 }, { "epoch": 1.6150670794633641, "grad_norm": 0.2428571581840515, "learning_rate": 0.0004622704179388063, "loss": 1.3736, "step": 3522 }, { "epoch": 1.615525742460727, "grad_norm": 0.4996251165866852, "learning_rate": 0.00046202348613984007, "loss": 1.9026, "step": 3523 }, { "epoch": 1.6159844054580896, "grad_norm": 0.33074381947517395, "learning_rate": 0.0004617765636567869, "loss": 1.9189, "step": 3524 }, { "epoch": 1.6164430684554523, "grad_norm": 0.2520156800746918, "learning_rate": 0.00046152965055021794, "loss": 0.8857, "step": 3525 }, { "epoch": 1.6169017314528151, "grad_norm": 0.28665515780448914, "learning_rate": 0.00046128274688070315, "loss": 1.8541, "step": 3526 }, { "epoch": 1.6173603944501778, "grad_norm": 0.18662500381469727, "learning_rate": 0.0004610358527088097, "loss": 0.7944, "step": 3527 }, { "epoch": 1.6178190574475404, "grad_norm": 0.3536602258682251, "learning_rate": 0.00046078896809510246, "loss": 1.3347, "step": 3528 }, { "epoch": 1.6182777204449033, "grad_norm": 0.3583596348762512, "learning_rate": 0.00046054209310014433, "loss": 1.6628, "step": 3529 }, { "epoch": 1.6187363834422657, "grad_norm": 0.6140498518943787, "learning_rate": 0.0004602952277844951, "loss": 2.1606, "step": 3530 }, { "epoch": 1.6191950464396285, "grad_norm": 1.5802074670791626, "learning_rate": 0.00046004837220871276, "loss": 1.8225, "step": 3531 }, { "epoch": 1.6196537094369912, "grad_norm": 0.33315399289131165, "learning_rate": 0.0004598015264333528, "loss": 1.4759, "step": 3532 }, { "epoch": 1.6201123724343538, "grad_norm": 0.24571913480758667, "learning_rate": 0.00045955469051896816, "loss": 1.5706, "step": 3533 }, { "epoch": 1.6205710354317167, "grad_norm": 0.33706846833229065, "learning_rate": 0.0004593078645261096, "loss": 1.7424, "step": 3534 }, { "epoch": 1.621029698429079, "grad_norm": 0.29877063632011414, "learning_rate": 0.0004590610485153255, "loss": 1.397, "step": 3535 }, { "epoch": 1.621488361426442, "grad_norm": 0.33204445242881775, "learning_rate": 0.00045881424254716127, "loss": 1.1624, "step": 3536 }, { "epoch": 1.6219470244238046, "grad_norm": 0.2581148147583008, "learning_rate": 0.00045856744668216023, "loss": 1.7374, "step": 3537 }, { "epoch": 1.6224056874211672, "grad_norm": 0.33581775426864624, "learning_rate": 0.00045832066098086316, "loss": 1.9, "step": 3538 }, { "epoch": 1.62286435041853, "grad_norm": 0.1745557337999344, "learning_rate": 0.00045807388550380855, "loss": 0.8279, "step": 3539 }, { "epoch": 1.6233230134158927, "grad_norm": 0.31032976508140564, "learning_rate": 0.00045782712031153223, "loss": 1.6848, "step": 3540 }, { "epoch": 1.6237816764132553, "grad_norm": 0.1794162392616272, "learning_rate": 0.0004575803654645675, "loss": 0.5406, "step": 3541 }, { "epoch": 1.6242403394106182, "grad_norm": 0.19714608788490295, "learning_rate": 0.00045733362102344483, "loss": 1.3489, "step": 3542 }, { "epoch": 1.6246990024079806, "grad_norm": 0.3311532139778137, "learning_rate": 0.0004570868870486924, "loss": 1.7387, "step": 3543 }, { "epoch": 1.6251576654053435, "grad_norm": 0.3105451762676239, "learning_rate": 0.00045684016360083614, "loss": 1.4056, "step": 3544 }, { "epoch": 1.625616328402706, "grad_norm": 0.14965184032917023, "learning_rate": 0.00045659345074039893, "loss": 0.7161, "step": 3545 }, { "epoch": 1.6260749914000687, "grad_norm": 0.2359105944633484, "learning_rate": 0.0004563467485279015, "loss": 1.7198, "step": 3546 }, { "epoch": 1.6265336543974316, "grad_norm": 0.3810403645038605, "learning_rate": 0.00045610005702386114, "loss": 1.8206, "step": 3547 }, { "epoch": 1.6269923173947942, "grad_norm": 0.2406771034002304, "learning_rate": 0.0004558533762887932, "loss": 1.6469, "step": 3548 }, { "epoch": 1.6274509803921569, "grad_norm": 0.29384976625442505, "learning_rate": 0.00045560670638321055, "loss": 1.3338, "step": 3549 }, { "epoch": 1.6279096433895197, "grad_norm": 0.3127608597278595, "learning_rate": 0.0004553600473676229, "loss": 1.8749, "step": 3550 }, { "epoch": 1.6283683063868821, "grad_norm": 0.24576979875564575, "learning_rate": 0.0004551133993025374, "loss": 0.7694, "step": 3551 }, { "epoch": 1.628826969384245, "grad_norm": 0.315415620803833, "learning_rate": 0.000454866762248459, "loss": 1.6774, "step": 3552 }, { "epoch": 1.6292856323816076, "grad_norm": 0.2339719533920288, "learning_rate": 0.00045462013626588896, "loss": 0.7991, "step": 3553 }, { "epoch": 1.6297442953789703, "grad_norm": 0.2772282361984253, "learning_rate": 0.0004543735214153267, "loss": 1.3975, "step": 3554 }, { "epoch": 1.6302029583763331, "grad_norm": 0.2241169661283493, "learning_rate": 0.00045412691775726874, "loss": 0.8122, "step": 3555 }, { "epoch": 1.6306616213736955, "grad_norm": 0.10823415219783783, "learning_rate": 0.0004538803253522086, "loss": 0.9379, "step": 3556 }, { "epoch": 1.6311202843710584, "grad_norm": 0.3783659040927887, "learning_rate": 0.00045363374426063717, "loss": 2.1521, "step": 3557 }, { "epoch": 1.631578947368421, "grad_norm": 0.3071146309375763, "learning_rate": 0.00045338717454304265, "loss": 0.8087, "step": 3558 }, { "epoch": 1.6320376103657837, "grad_norm": 0.2720809876918793, "learning_rate": 0.0004531406162599102, "loss": 1.369, "step": 3559 }, { "epoch": 1.6324962733631465, "grad_norm": 0.21303652226924896, "learning_rate": 0.0004528940694717225, "loss": 1.4075, "step": 3560 }, { "epoch": 1.6329549363605091, "grad_norm": 0.2808886766433716, "learning_rate": 0.0004526475342389592, "loss": 1.2719, "step": 3561 }, { "epoch": 1.6334135993578718, "grad_norm": 0.21272645890712738, "learning_rate": 0.0004524010106220972, "loss": 1.0988, "step": 3562 }, { "epoch": 1.6338722623552346, "grad_norm": 0.49326127767562866, "learning_rate": 0.00045215449868161057, "loss": 1.6325, "step": 3563 }, { "epoch": 1.634330925352597, "grad_norm": 0.19443345069885254, "learning_rate": 0.0004519079984779703, "loss": 0.8355, "step": 3564 }, { "epoch": 1.63478958834996, "grad_norm": 0.2636580169200897, "learning_rate": 0.0004516615100716448, "loss": 1.3528, "step": 3565 }, { "epoch": 1.6352482513473225, "grad_norm": 0.2543386220932007, "learning_rate": 0.0004514150335230994, "loss": 1.15, "step": 3566 }, { "epoch": 1.6357069143446852, "grad_norm": 0.18656910955905914, "learning_rate": 0.0004511685688927966, "loss": 1.1378, "step": 3567 }, { "epoch": 1.636165577342048, "grad_norm": 0.2844998836517334, "learning_rate": 0.0004509221162411959, "loss": 1.1173, "step": 3568 }, { "epoch": 1.6366242403394105, "grad_norm": 0.1812886893749237, "learning_rate": 0.00045067567562875405, "loss": 1.0365, "step": 3569 }, { "epoch": 1.6370829033367733, "grad_norm": 0.20662900805473328, "learning_rate": 0.0004504292471159246, "loss": 1.2693, "step": 3570 }, { "epoch": 1.637541566334136, "grad_norm": 0.2970770299434662, "learning_rate": 0.00045018283076315817, "loss": 1.7665, "step": 3571 }, { "epoch": 1.6380002293314986, "grad_norm": 0.22958998382091522, "learning_rate": 0.0004499364266309026, "loss": 0.7569, "step": 3572 }, { "epoch": 1.6384588923288614, "grad_norm": 0.22331257164478302, "learning_rate": 0.0004496900347796025, "loss": 1.5889, "step": 3573 }, { "epoch": 1.638917555326224, "grad_norm": 0.2571808695793152, "learning_rate": 0.00044944365526969983, "loss": 0.8337, "step": 3574 }, { "epoch": 1.6393762183235867, "grad_norm": 0.17887678742408752, "learning_rate": 0.0004491972881616329, "loss": 0.6978, "step": 3575 }, { "epoch": 1.6398348813209496, "grad_norm": 0.24759553372859955, "learning_rate": 0.00044895093351583743, "loss": 1.2387, "step": 3576 }, { "epoch": 1.640293544318312, "grad_norm": 0.3115570545196533, "learning_rate": 0.0004487045913927461, "loss": 1.6693, "step": 3577 }, { "epoch": 1.6407522073156748, "grad_norm": 0.32540813088417053, "learning_rate": 0.00044845826185278836, "loss": 1.0903, "step": 3578 }, { "epoch": 1.6412108703130375, "grad_norm": 0.08525694906711578, "learning_rate": 0.0004482119449563906, "loss": 0.5742, "step": 3579 }, { "epoch": 1.6416695333104, "grad_norm": 0.12443814426660538, "learning_rate": 0.0004479656407639763, "loss": 1.0961, "step": 3580 }, { "epoch": 1.642128196307763, "grad_norm": 0.2620854079723358, "learning_rate": 0.00044771934933596544, "loss": 1.0662, "step": 3581 }, { "epoch": 1.6425868593051256, "grad_norm": 0.20561723411083221, "learning_rate": 0.00044747307073277504, "loss": 1.4257, "step": 3582 }, { "epoch": 1.6430455223024882, "grad_norm": 0.42892375588417053, "learning_rate": 0.00044722680501481924, "loss": 1.651, "step": 3583 }, { "epoch": 1.643504185299851, "grad_norm": 0.18103362619876862, "learning_rate": 0.00044698055224250854, "loss": 1.4236, "step": 3584 }, { "epoch": 1.6439628482972135, "grad_norm": 0.3172457218170166, "learning_rate": 0.0004467343124762509, "loss": 1.7086, "step": 3585 }, { "epoch": 1.6444215112945764, "grad_norm": 0.3391329348087311, "learning_rate": 0.0004464880857764503, "loss": 1.2327, "step": 3586 }, { "epoch": 1.644880174291939, "grad_norm": 0.2603316307067871, "learning_rate": 0.00044624187220350815, "loss": 1.8528, "step": 3587 }, { "epoch": 1.6453388372893016, "grad_norm": 0.2531551122665405, "learning_rate": 0.0004459956718178224, "loss": 1.1763, "step": 3588 }, { "epoch": 1.6457975002866645, "grad_norm": 0.3372352719306946, "learning_rate": 0.00044574948467978763, "loss": 1.3344, "step": 3589 }, { "epoch": 1.646256163284027, "grad_norm": 0.2342667281627655, "learning_rate": 0.0004455033108497954, "loss": 1.1017, "step": 3590 }, { "epoch": 1.6467148262813898, "grad_norm": 0.21960115432739258, "learning_rate": 0.0004452571503882342, "loss": 0.8768, "step": 3591 }, { "epoch": 1.6471734892787524, "grad_norm": 0.24046221375465393, "learning_rate": 0.0004450110033554886, "loss": 1.0574, "step": 3592 }, { "epoch": 1.647632152276115, "grad_norm": 0.22882205247879028, "learning_rate": 0.00044476486981194035, "loss": 1.2656, "step": 3593 }, { "epoch": 1.648090815273478, "grad_norm": 0.21356913447380066, "learning_rate": 0.0004445187498179678, "loss": 0.9254, "step": 3594 }, { "epoch": 1.6485494782708405, "grad_norm": 0.2324090301990509, "learning_rate": 0.00044427264343394583, "loss": 1.6883, "step": 3595 }, { "epoch": 1.6490081412682032, "grad_norm": 0.19994021952152252, "learning_rate": 0.0004440265507202464, "loss": 0.803, "step": 3596 }, { "epoch": 1.649466804265566, "grad_norm": 0.22320851683616638, "learning_rate": 0.0004437804717372378, "loss": 1.3083, "step": 3597 }, { "epoch": 1.6499254672629284, "grad_norm": 0.3010443150997162, "learning_rate": 0.0004435344065452847, "loss": 1.4539, "step": 3598 }, { "epoch": 1.6503841302602913, "grad_norm": 0.27338162064552307, "learning_rate": 0.0004432883552047488, "loss": 1.2612, "step": 3599 }, { "epoch": 1.650842793257654, "grad_norm": 0.2307090312242508, "learning_rate": 0.0004430423177759882, "loss": 0.721, "step": 3600 }, { "epoch": 1.6513014562550166, "grad_norm": 0.26010721921920776, "learning_rate": 0.0004427962943193578, "loss": 1.3625, "step": 3601 }, { "epoch": 1.6517601192523794, "grad_norm": 0.302643746137619, "learning_rate": 0.0004425502848952091, "loss": 1.3484, "step": 3602 }, { "epoch": 1.652218782249742, "grad_norm": 0.32459530234336853, "learning_rate": 0.0004423042895638895, "loss": 1.6387, "step": 3603 }, { "epoch": 1.6526774452471047, "grad_norm": 0.1907692402601242, "learning_rate": 0.0004420583083857437, "loss": 0.7533, "step": 3604 }, { "epoch": 1.6531361082444673, "grad_norm": 0.26839619874954224, "learning_rate": 0.00044181234142111255, "loss": 1.3954, "step": 3605 }, { "epoch": 1.65359477124183, "grad_norm": 0.09783615916967392, "learning_rate": 0.0004415663887303337, "loss": 0.8584, "step": 3606 }, { "epoch": 1.6540534342391928, "grad_norm": 0.42067283391952515, "learning_rate": 0.00044132045037374094, "loss": 1.8727, "step": 3607 }, { "epoch": 1.6545120972365555, "grad_norm": 0.32053786516189575, "learning_rate": 0.00044107452641166514, "loss": 1.4919, "step": 3608 }, { "epoch": 1.654970760233918, "grad_norm": 0.2961629331111908, "learning_rate": 0.0004408286169044326, "loss": 1.5599, "step": 3609 }, { "epoch": 1.655429423231281, "grad_norm": 0.16102895140647888, "learning_rate": 0.0004405827219123669, "loss": 0.568, "step": 3610 }, { "epoch": 1.6558880862286434, "grad_norm": 0.2084360271692276, "learning_rate": 0.0004403368414957881, "loss": 1.0012, "step": 3611 }, { "epoch": 1.6563467492260062, "grad_norm": 0.222111776471138, "learning_rate": 0.00044009097571501217, "loss": 1.2141, "step": 3612 }, { "epoch": 1.6568054122233689, "grad_norm": 0.3039361536502838, "learning_rate": 0.0004398451246303521, "loss": 1.1281, "step": 3613 }, { "epoch": 1.6572640752207315, "grad_norm": 0.19220460951328278, "learning_rate": 0.00043959928830211655, "loss": 1.2178, "step": 3614 }, { "epoch": 1.6577227382180943, "grad_norm": 0.23995845019817352, "learning_rate": 0.0004393534667906109, "loss": 0.944, "step": 3615 }, { "epoch": 1.658181401215457, "grad_norm": 0.2802288830280304, "learning_rate": 0.00043910766015613727, "loss": 0.8565, "step": 3616 }, { "epoch": 1.6586400642128196, "grad_norm": 0.17965088784694672, "learning_rate": 0.00043886186845899366, "loss": 0.9316, "step": 3617 }, { "epoch": 1.6590987272101825, "grad_norm": 0.33390891551971436, "learning_rate": 0.0004386160917594746, "loss": 1.7971, "step": 3618 }, { "epoch": 1.659557390207545, "grad_norm": 0.19103044271469116, "learning_rate": 0.00043837033011787097, "loss": 0.9313, "step": 3619 }, { "epoch": 1.6600160532049077, "grad_norm": 0.23847994208335876, "learning_rate": 0.00043812458359446943, "loss": 1.623, "step": 3620 }, { "epoch": 1.6604747162022704, "grad_norm": 0.26979711651802063, "learning_rate": 0.0004378788522495538, "loss": 1.0571, "step": 3621 }, { "epoch": 1.660933379199633, "grad_norm": 0.2273758053779602, "learning_rate": 0.0004376331361434036, "loss": 1.2567, "step": 3622 }, { "epoch": 1.6613920421969959, "grad_norm": 0.3926750123500824, "learning_rate": 0.00043738743533629486, "loss": 2.0264, "step": 3623 }, { "epoch": 1.6618507051943583, "grad_norm": 0.3111841380596161, "learning_rate": 0.00043714174988849965, "loss": 1.1513, "step": 3624 }, { "epoch": 1.6623093681917211, "grad_norm": 0.12631827592849731, "learning_rate": 0.0004368960798602865, "loss": 0.8732, "step": 3625 }, { "epoch": 1.6627680311890838, "grad_norm": 0.18259353935718536, "learning_rate": 0.0004366504253119199, "loss": 0.5492, "step": 3626 }, { "epoch": 1.6632266941864464, "grad_norm": 0.27222898602485657, "learning_rate": 0.00043640478630366074, "loss": 1.4242, "step": 3627 }, { "epoch": 1.6636853571838093, "grad_norm": 0.31462812423706055, "learning_rate": 0.0004361591628957661, "loss": 1.7146, "step": 3628 }, { "epoch": 1.664144020181172, "grad_norm": 0.28236204385757446, "learning_rate": 0.00043591355514848904, "loss": 1.4069, "step": 3629 }, { "epoch": 1.6646026831785345, "grad_norm": 0.23949186503887177, "learning_rate": 0.0004356679631220791, "loss": 0.7886, "step": 3630 }, { "epoch": 1.6650613461758974, "grad_norm": 0.06866566091775894, "learning_rate": 0.00043542238687678157, "loss": 0.4383, "step": 3631 }, { "epoch": 1.6655200091732598, "grad_norm": 0.2557963728904724, "learning_rate": 0.00043517682647283815, "loss": 1.4226, "step": 3632 }, { "epoch": 1.6659786721706227, "grad_norm": 0.19061607122421265, "learning_rate": 0.00043493128197048657, "loss": 1.2208, "step": 3633 }, { "epoch": 1.6664373351679853, "grad_norm": 0.3818952143192291, "learning_rate": 0.00043468575342996076, "loss": 2.0298, "step": 3634 }, { "epoch": 1.666895998165348, "grad_norm": 0.3634778559207916, "learning_rate": 0.0004344402409114906, "loss": 1.5644, "step": 3635 }, { "epoch": 1.6673546611627108, "grad_norm": 0.17684818804264069, "learning_rate": 0.00043419474447530204, "loss": 1.0081, "step": 3636 }, { "epoch": 1.6678133241600734, "grad_norm": 0.2824413478374481, "learning_rate": 0.0004339492641816171, "loss": 1.5297, "step": 3637 }, { "epoch": 1.668271987157436, "grad_norm": 0.34963420033454895, "learning_rate": 0.00043370380009065396, "loss": 1.3151, "step": 3638 }, { "epoch": 1.6687306501547987, "grad_norm": 0.1694229692220688, "learning_rate": 0.00043345835226262663, "loss": 1.2376, "step": 3639 }, { "epoch": 1.6691893131521613, "grad_norm": 0.26671671867370605, "learning_rate": 0.00043321292075774526, "loss": 1.6826, "step": 3640 }, { "epoch": 1.6696479761495242, "grad_norm": 0.3384977877140045, "learning_rate": 0.00043296750563621614, "loss": 1.525, "step": 3641 }, { "epoch": 1.6701066391468868, "grad_norm": 0.2929702699184418, "learning_rate": 0.0004327221069582411, "loss": 2.0645, "step": 3642 }, { "epoch": 1.6705653021442495, "grad_norm": 0.3001018762588501, "learning_rate": 0.0004324767247840183, "loss": 1.6188, "step": 3643 }, { "epoch": 1.6710239651416123, "grad_norm": 0.2780381441116333, "learning_rate": 0.0004322313591737418, "loss": 1.1631, "step": 3644 }, { "epoch": 1.6714826281389747, "grad_norm": 0.19090348482131958, "learning_rate": 0.00043198601018760145, "loss": 0.9705, "step": 3645 }, { "epoch": 1.6719412911363376, "grad_norm": 0.17542727291584015, "learning_rate": 0.0004317406778857833, "loss": 1.0067, "step": 3646 }, { "epoch": 1.6723999541337002, "grad_norm": 0.3193509876728058, "learning_rate": 0.00043149536232846915, "loss": 2.0522, "step": 3647 }, { "epoch": 1.6728586171310629, "grad_norm": 0.262650728225708, "learning_rate": 0.00043125006357583643, "loss": 1.2916, "step": 3648 }, { "epoch": 1.6733172801284257, "grad_norm": 0.2200448364019394, "learning_rate": 0.0004310047816880588, "loss": 1.0105, "step": 3649 }, { "epoch": 1.6737759431257884, "grad_norm": 0.31807059049606323, "learning_rate": 0.00043075951672530573, "loss": 1.375, "step": 3650 }, { "epoch": 1.674234606123151, "grad_norm": 0.23543456196784973, "learning_rate": 0.0004305142687477425, "loss": 1.4635, "step": 3651 }, { "epoch": 1.6746932691205139, "grad_norm": 0.7807444930076599, "learning_rate": 0.00043026903781553016, "loss": 0.9573, "step": 3652 }, { "epoch": 1.6751519321178763, "grad_norm": 0.18116839230060577, "learning_rate": 0.0004300238239888256, "loss": 1.1865, "step": 3653 }, { "epoch": 1.6756105951152391, "grad_norm": 0.32931381464004517, "learning_rate": 0.00042977862732778154, "loss": 1.6169, "step": 3654 }, { "epoch": 1.6760692581126018, "grad_norm": 0.32929980754852295, "learning_rate": 0.0004295334478925466, "loss": 1.5129, "step": 3655 }, { "epoch": 1.6765279211099644, "grad_norm": 0.26823946833610535, "learning_rate": 0.0004292882857432649, "loss": 1.6238, "step": 3656 }, { "epoch": 1.6769865841073273, "grad_norm": 0.3574983775615692, "learning_rate": 0.00042904314094007655, "loss": 2.0161, "step": 3657 }, { "epoch": 1.6774452471046897, "grad_norm": 0.45830219984054565, "learning_rate": 0.0004287980135431175, "loss": 1.4208, "step": 3658 }, { "epoch": 1.6779039101020525, "grad_norm": 0.3208571672439575, "learning_rate": 0.000428552903612519, "loss": 1.8867, "step": 3659 }, { "epoch": 1.6783625730994152, "grad_norm": 0.11624974012374878, "learning_rate": 0.00042830781120840845, "loss": 1.0681, "step": 3660 }, { "epoch": 1.6788212360967778, "grad_norm": 0.2735399901866913, "learning_rate": 0.0004280627363909087, "loss": 0.9048, "step": 3661 }, { "epoch": 1.6792798990941407, "grad_norm": 0.31739717721939087, "learning_rate": 0.0004278176792201383, "loss": 1.7313, "step": 3662 }, { "epoch": 1.6797385620915033, "grad_norm": 0.3343389332294464, "learning_rate": 0.00042757263975621174, "loss": 1.8636, "step": 3663 }, { "epoch": 1.680197225088866, "grad_norm": 0.35586023330688477, "learning_rate": 0.0004273276180592391, "loss": 1.7568, "step": 3664 }, { "epoch": 1.6806558880862288, "grad_norm": 0.31270912289619446, "learning_rate": 0.0004270826141893256, "loss": 1.6099, "step": 3665 }, { "epoch": 1.6811145510835912, "grad_norm": 0.4064270555973053, "learning_rate": 0.0004268376282065725, "loss": 1.5682, "step": 3666 }, { "epoch": 1.681573214080954, "grad_norm": 0.24555212259292603, "learning_rate": 0.00042659266017107666, "loss": 0.9243, "step": 3667 }, { "epoch": 1.6820318770783167, "grad_norm": 0.37791720032691956, "learning_rate": 0.0004263477101429307, "loss": 1.3367, "step": 3668 }, { "epoch": 1.6824905400756793, "grad_norm": 0.25306499004364014, "learning_rate": 0.0004261027781822227, "loss": 1.5412, "step": 3669 }, { "epoch": 1.6829492030730422, "grad_norm": 0.23409046232700348, "learning_rate": 0.00042585786434903584, "loss": 0.8784, "step": 3670 }, { "epoch": 1.6834078660704048, "grad_norm": 0.23634447157382965, "learning_rate": 0.00042561296870344945, "loss": 1.4553, "step": 3671 }, { "epoch": 1.6838665290677675, "grad_norm": 0.29898911714553833, "learning_rate": 0.0004253680913055381, "loss": 1.2293, "step": 3672 }, { "epoch": 1.68432519206513, "grad_norm": 0.20857423543930054, "learning_rate": 0.00042512323221537206, "loss": 1.5924, "step": 3673 }, { "epoch": 1.6847838550624927, "grad_norm": 0.4438040852546692, "learning_rate": 0.0004248783914930172, "loss": 1.8789, "step": 3674 }, { "epoch": 1.6852425180598556, "grad_norm": 0.33179306983947754, "learning_rate": 0.0004246335691985347, "loss": 1.343, "step": 3675 }, { "epoch": 1.6857011810572182, "grad_norm": 0.3002280294895172, "learning_rate": 0.0004243887653919809, "loss": 1.6851, "step": 3676 }, { "epoch": 1.6861598440545809, "grad_norm": 0.2567557692527771, "learning_rate": 0.00042414398013340806, "loss": 1.5004, "step": 3677 }, { "epoch": 1.6866185070519437, "grad_norm": 0.28894615173339844, "learning_rate": 0.00042389921348286386, "loss": 1.1944, "step": 3678 }, { "epoch": 1.6870771700493061, "grad_norm": 0.30856838822364807, "learning_rate": 0.00042365446550039136, "loss": 1.746, "step": 3679 }, { "epoch": 1.687535833046669, "grad_norm": 0.24894581735134125, "learning_rate": 0.0004234097362460292, "loss": 0.8261, "step": 3680 }, { "epoch": 1.6879944960440316, "grad_norm": 0.16843147575855255, "learning_rate": 0.00042316502577981077, "loss": 1.1577, "step": 3681 }, { "epoch": 1.6884531590413943, "grad_norm": 0.37144291400909424, "learning_rate": 0.00042292033416176534, "loss": 1.601, "step": 3682 }, { "epoch": 1.6889118220387571, "grad_norm": 0.28090983629226685, "learning_rate": 0.0004226756614519179, "loss": 1.4488, "step": 3683 }, { "epoch": 1.6893704850361198, "grad_norm": 0.2437400072813034, "learning_rate": 0.0004224310077102882, "loss": 1.2493, "step": 3684 }, { "epoch": 1.6898291480334824, "grad_norm": 0.25606828927993774, "learning_rate": 0.0004221863729968917, "loss": 1.2882, "step": 3685 }, { "epoch": 1.6902878110308452, "grad_norm": 0.1753174066543579, "learning_rate": 0.00042194175737173906, "loss": 0.7293, "step": 3686 }, { "epoch": 1.6907464740282077, "grad_norm": 0.2955300211906433, "learning_rate": 0.0004216971608948359, "loss": 1.5758, "step": 3687 }, { "epoch": 1.6912051370255705, "grad_norm": 0.3101601004600525, "learning_rate": 0.00042145258362618384, "loss": 1.5141, "step": 3688 }, { "epoch": 1.6916638000229332, "grad_norm": 0.3169883191585541, "learning_rate": 0.00042120802562577945, "loss": 1.5436, "step": 3689 }, { "epoch": 1.6921224630202958, "grad_norm": 0.3298269510269165, "learning_rate": 0.00042096348695361437, "loss": 1.8237, "step": 3690 }, { "epoch": 1.6925811260176586, "grad_norm": 0.3161281645298004, "learning_rate": 0.0004207189676696759, "loss": 1.5731, "step": 3691 }, { "epoch": 1.693039789015021, "grad_norm": 0.3234587609767914, "learning_rate": 0.0004204744678339464, "loss": 1.3514, "step": 3692 }, { "epoch": 1.693498452012384, "grad_norm": 0.19994625449180603, "learning_rate": 0.0004202299875064033, "loss": 0.9323, "step": 3693 }, { "epoch": 1.6939571150097466, "grad_norm": 0.2861116826534271, "learning_rate": 0.0004199855267470193, "loss": 1.4347, "step": 3694 }, { "epoch": 1.6944157780071092, "grad_norm": 0.32218021154403687, "learning_rate": 0.00041974108561576264, "loss": 1.743, "step": 3695 }, { "epoch": 1.694874441004472, "grad_norm": 0.33372506499290466, "learning_rate": 0.00041949666417259637, "loss": 1.1621, "step": 3696 }, { "epoch": 1.6953331040018347, "grad_norm": 0.37846240401268005, "learning_rate": 0.0004192522624774789, "loss": 1.1866, "step": 3697 }, { "epoch": 1.6957917669991973, "grad_norm": 0.2000287026166916, "learning_rate": 0.00041900788059036354, "loss": 1.3976, "step": 3698 }, { "epoch": 1.6962504299965602, "grad_norm": 0.37890589237213135, "learning_rate": 0.0004187635185711991, "loss": 1.7291, "step": 3699 }, { "epoch": 1.6967090929939226, "grad_norm": 0.30530160665512085, "learning_rate": 0.0004185191764799293, "loss": 1.0033, "step": 3700 }, { "epoch": 1.6971677559912854, "grad_norm": 0.24736268818378448, "learning_rate": 0.00041827485437649306, "loss": 1.6701, "step": 3701 }, { "epoch": 1.697626418988648, "grad_norm": 0.2429756373167038, "learning_rate": 0.00041803055232082423, "loss": 0.8325, "step": 3702 }, { "epoch": 1.6980850819860107, "grad_norm": 0.297656774520874, "learning_rate": 0.00041778627037285205, "loss": 1.4572, "step": 3703 }, { "epoch": 1.6985437449833736, "grad_norm": 0.26800161600112915, "learning_rate": 0.0004175420085925005, "loss": 1.7028, "step": 3704 }, { "epoch": 1.6990024079807362, "grad_norm": 0.27825766801834106, "learning_rate": 0.0004172977670396887, "loss": 1.6484, "step": 3705 }, { "epoch": 1.6994610709780988, "grad_norm": 0.2302626520395279, "learning_rate": 0.0004170535457743311, "loss": 0.9787, "step": 3706 }, { "epoch": 1.6999197339754615, "grad_norm": 0.22059178352355957, "learning_rate": 0.0004168093448563367, "loss": 0.9163, "step": 3707 }, { "epoch": 1.7003783969728241, "grad_norm": 0.28476452827453613, "learning_rate": 0.0004165651643456099, "loss": 1.9269, "step": 3708 }, { "epoch": 1.700837059970187, "grad_norm": 0.3191598057746887, "learning_rate": 0.0004163210043020499, "loss": 1.7429, "step": 3709 }, { "epoch": 1.7012957229675496, "grad_norm": 0.5909731984138489, "learning_rate": 0.00041607686478555076, "loss": 1.2906, "step": 3710 }, { "epoch": 1.7017543859649122, "grad_norm": 0.2239387482404709, "learning_rate": 0.00041583274585600195, "loss": 0.8284, "step": 3711 }, { "epoch": 1.702213048962275, "grad_norm": 0.3220686912536621, "learning_rate": 0.0004155886475732874, "loss": 1.9756, "step": 3712 }, { "epoch": 1.7026717119596375, "grad_norm": 0.23089122772216797, "learning_rate": 0.0004153445699972862, "loss": 1.3217, "step": 3713 }, { "epoch": 1.7031303749570004, "grad_norm": 0.2528243064880371, "learning_rate": 0.0004151005131878725, "loss": 0.8455, "step": 3714 }, { "epoch": 1.703589037954363, "grad_norm": 0.10433992743492126, "learning_rate": 0.00041485647720491503, "loss": 1.0346, "step": 3715 }, { "epoch": 1.7040477009517256, "grad_norm": 0.26862263679504395, "learning_rate": 0.0004146124621082775, "loss": 1.2124, "step": 3716 }, { "epoch": 1.7045063639490885, "grad_norm": 0.3269628882408142, "learning_rate": 0.0004143684679578188, "loss": 1.3434, "step": 3717 }, { "epoch": 1.7049650269464511, "grad_norm": 0.2566255033016205, "learning_rate": 0.00041412449481339233, "loss": 1.7516, "step": 3718 }, { "epoch": 1.7054236899438138, "grad_norm": 0.2328820526599884, "learning_rate": 0.0004138805427348464, "loss": 1.1428, "step": 3719 }, { "epoch": 1.7058823529411766, "grad_norm": 0.41982701420783997, "learning_rate": 0.0004136366117820245, "loss": 1.6322, "step": 3720 }, { "epoch": 1.706341015938539, "grad_norm": 0.2226077914237976, "learning_rate": 0.00041339270201476425, "loss": 1.2604, "step": 3721 }, { "epoch": 1.706799678935902, "grad_norm": 0.2776189148426056, "learning_rate": 0.0004131488134928987, "loss": 1.2419, "step": 3722 }, { "epoch": 1.7072583419332645, "grad_norm": 0.39088577032089233, "learning_rate": 0.0004129049462762554, "loss": 1.9712, "step": 3723 }, { "epoch": 1.7077170049306272, "grad_norm": 0.28871458768844604, "learning_rate": 0.0004126611004246568, "loss": 1.5851, "step": 3724 }, { "epoch": 1.70817566792799, "grad_norm": 0.2573470175266266, "learning_rate": 0.00041241727599792015, "loss": 1.0604, "step": 3725 }, { "epoch": 1.7086343309253524, "grad_norm": 0.257318913936615, "learning_rate": 0.00041217347305585707, "loss": 1.5261, "step": 3726 }, { "epoch": 1.7090929939227153, "grad_norm": 0.32521334290504456, "learning_rate": 0.00041192969165827433, "loss": 1.9292, "step": 3727 }, { "epoch": 1.709551656920078, "grad_norm": 0.2845672369003296, "learning_rate": 0.00041168593186497317, "loss": 1.2908, "step": 3728 }, { "epoch": 1.7100103199174406, "grad_norm": 0.3328210115432739, "learning_rate": 0.00041144219373574976, "loss": 2.0022, "step": 3729 }, { "epoch": 1.7104689829148034, "grad_norm": 0.3225518763065338, "learning_rate": 0.0004111984773303946, "loss": 1.4616, "step": 3730 }, { "epoch": 1.710927645912166, "grad_norm": 0.14123696088790894, "learning_rate": 0.0004109547827086937, "loss": 0.4822, "step": 3731 }, { "epoch": 1.7113863089095287, "grad_norm": 0.08275225758552551, "learning_rate": 0.00041071110993042627, "loss": 0.8438, "step": 3732 }, { "epoch": 1.7118449719068916, "grad_norm": 0.2467496544122696, "learning_rate": 0.0004104674590553675, "loss": 1.2802, "step": 3733 }, { "epoch": 1.712303634904254, "grad_norm": 0.25316253304481506, "learning_rate": 0.0004102238301432865, "loss": 0.9283, "step": 3734 }, { "epoch": 1.7127622979016168, "grad_norm": 0.23193414509296417, "learning_rate": 0.00040998022325394723, "loss": 1.4131, "step": 3735 }, { "epoch": 1.7132209608989795, "grad_norm": 0.22135387361049652, "learning_rate": 0.0004097366384471086, "loss": 0.8179, "step": 3736 }, { "epoch": 1.713679623896342, "grad_norm": 0.26501017808914185, "learning_rate": 0.00040949307578252314, "loss": 1.5214, "step": 3737 }, { "epoch": 1.714138286893705, "grad_norm": 0.2112581878900528, "learning_rate": 0.0004092495353199388, "loss": 1.0725, "step": 3738 }, { "epoch": 1.7145969498910676, "grad_norm": 0.21010689437389374, "learning_rate": 0.0004090060171190977, "loss": 0.6734, "step": 3739 }, { "epoch": 1.7150556128884302, "grad_norm": 0.3008427321910858, "learning_rate": 0.00040876252123973677, "loss": 2.1113, "step": 3740 }, { "epoch": 1.7155142758857929, "grad_norm": 0.33156007528305054, "learning_rate": 0.00040851904774158725, "loss": 1.8834, "step": 3741 }, { "epoch": 1.7159729388831555, "grad_norm": 0.2164911925792694, "learning_rate": 0.0004082755966843752, "loss": 0.832, "step": 3742 }, { "epoch": 1.7164316018805184, "grad_norm": 0.2621867060661316, "learning_rate": 0.0004080321681278204, "loss": 0.9056, "step": 3743 }, { "epoch": 1.716890264877881, "grad_norm": 0.2789194881916046, "learning_rate": 0.0004077887621316377, "loss": 1.7653, "step": 3744 }, { "epoch": 1.7173489278752436, "grad_norm": 0.27011173963546753, "learning_rate": 0.0004075453787555367, "loss": 1.3661, "step": 3745 }, { "epoch": 1.7178075908726065, "grad_norm": 0.29901739954948425, "learning_rate": 0.00040730201805922096, "loss": 1.5548, "step": 3746 }, { "epoch": 1.718266253869969, "grad_norm": 0.3142026662826538, "learning_rate": 0.0004070586801023885, "loss": 1.79, "step": 3747 }, { "epoch": 1.7187249168673318, "grad_norm": 0.26456326246261597, "learning_rate": 0.00040681536494473224, "loss": 1.305, "step": 3748 }, { "epoch": 1.7191835798646944, "grad_norm": 0.35430580377578735, "learning_rate": 0.0004065720726459385, "loss": 1.6096, "step": 3749 }, { "epoch": 1.719642242862057, "grad_norm": 0.31563955545425415, "learning_rate": 0.0004063288032656891, "loss": 1.6702, "step": 3750 }, { "epoch": 1.7201009058594199, "grad_norm": 0.31197500228881836, "learning_rate": 0.00040608555686365966, "loss": 0.9615, "step": 3751 }, { "epoch": 1.7205595688567825, "grad_norm": 0.26841220259666443, "learning_rate": 0.00040584233349952027, "loss": 1.7595, "step": 3752 }, { "epoch": 1.7210182318541452, "grad_norm": 0.3827781677246094, "learning_rate": 0.0004055991332329356, "loss": 1.309, "step": 3753 }, { "epoch": 1.721476894851508, "grad_norm": 0.30011874437332153, "learning_rate": 0.00040535595612356393, "loss": 1.0917, "step": 3754 }, { "epoch": 1.7219355578488704, "grad_norm": 0.23119381070137024, "learning_rate": 0.00040511280223105866, "loss": 0.9667, "step": 3755 }, { "epoch": 1.7223942208462333, "grad_norm": 0.3517080545425415, "learning_rate": 0.00040486967161506725, "loss": 1.3582, "step": 3756 }, { "epoch": 1.722852883843596, "grad_norm": 0.12287653982639313, "learning_rate": 0.0004046265643352313, "loss": 0.7969, "step": 3757 }, { "epoch": 1.7233115468409586, "grad_norm": 0.22328488528728485, "learning_rate": 0.0004043834804511868, "loss": 1.2675, "step": 3758 }, { "epoch": 1.7237702098383214, "grad_norm": 0.3529665768146515, "learning_rate": 0.0004041404200225641, "loss": 1.1346, "step": 3759 }, { "epoch": 1.7242288728356838, "grad_norm": 0.283286452293396, "learning_rate": 0.00040389738310898736, "loss": 1.2228, "step": 3760 }, { "epoch": 1.7246875358330467, "grad_norm": 0.181251659989357, "learning_rate": 0.0004036543697700756, "loss": 1.1888, "step": 3761 }, { "epoch": 1.7251461988304093, "grad_norm": 0.2636159062385559, "learning_rate": 0.0004034113800654415, "loss": 0.6633, "step": 3762 }, { "epoch": 1.725604861827772, "grad_norm": 0.2084595412015915, "learning_rate": 0.0004031684140546924, "loss": 1.3781, "step": 3763 }, { "epoch": 1.7260635248251348, "grad_norm": 0.3345155715942383, "learning_rate": 0.0004029254717974297, "loss": 1.3012, "step": 3764 }, { "epoch": 1.7265221878224974, "grad_norm": 0.3305032551288605, "learning_rate": 0.00040268255335324844, "loss": 1.6357, "step": 3765 }, { "epoch": 1.72698085081986, "grad_norm": 0.33602216839790344, "learning_rate": 0.0004024396587817386, "loss": 1.7511, "step": 3766 }, { "epoch": 1.727439513817223, "grad_norm": 0.3443238437175751, "learning_rate": 0.00040219678814248396, "loss": 1.7075, "step": 3767 }, { "epoch": 1.7278981768145854, "grad_norm": 0.3635205924510956, "learning_rate": 0.00040195394149506234, "loss": 1.413, "step": 3768 }, { "epoch": 1.7283568398119482, "grad_norm": 0.2028258591890335, "learning_rate": 0.00040171111889904584, "loss": 0.8682, "step": 3769 }, { "epoch": 1.7288155028093108, "grad_norm": 0.26344314217567444, "learning_rate": 0.0004014683204140006, "loss": 1.6746, "step": 3770 }, { "epoch": 1.7292741658066735, "grad_norm": 0.36593350768089294, "learning_rate": 0.0004012255460994868, "loss": 1.3928, "step": 3771 }, { "epoch": 1.7297328288040363, "grad_norm": 0.2758760154247284, "learning_rate": 0.0004009827960150587, "loss": 0.9172, "step": 3772 }, { "epoch": 1.730191491801399, "grad_norm": 0.348369836807251, "learning_rate": 0.00040074007022026473, "loss": 2.3096, "step": 3773 }, { "epoch": 1.7306501547987616, "grad_norm": 0.3024252653121948, "learning_rate": 0.0004004973687746472, "loss": 1.2795, "step": 3774 }, { "epoch": 1.7311088177961242, "grad_norm": 0.3365992307662964, "learning_rate": 0.00040025469173774256, "loss": 1.1777, "step": 3775 }, { "epoch": 1.7315674807934869, "grad_norm": 0.2750336229801178, "learning_rate": 0.0004000120391690814, "loss": 1.3168, "step": 3776 }, { "epoch": 1.7320261437908497, "grad_norm": 0.32935917377471924, "learning_rate": 0.00039976941112818777, "loss": 1.9238, "step": 3777 }, { "epoch": 1.7324848067882124, "grad_norm": 0.3010925054550171, "learning_rate": 0.00039952680767458036, "loss": 1.0422, "step": 3778 }, { "epoch": 1.732943469785575, "grad_norm": 0.29777830839157104, "learning_rate": 0.0003992842288677715, "loss": 1.7227, "step": 3779 }, { "epoch": 1.7334021327829379, "grad_norm": 0.3348708748817444, "learning_rate": 0.00039904167476726744, "loss": 1.6201, "step": 3780 }, { "epoch": 1.7338607957803003, "grad_norm": 0.2975271940231323, "learning_rate": 0.00039879914543256863, "loss": 1.2951, "step": 3781 }, { "epoch": 1.7343194587776631, "grad_norm": 0.2370220124721527, "learning_rate": 0.000398556640923169, "loss": 0.8168, "step": 3782 }, { "epoch": 1.7347781217750258, "grad_norm": 0.3307121992111206, "learning_rate": 0.0003983141612985569, "loss": 1.6048, "step": 3783 }, { "epoch": 1.7352367847723884, "grad_norm": 0.357295960187912, "learning_rate": 0.00039807170661821414, "loss": 1.0067, "step": 3784 }, { "epoch": 1.7356954477697513, "grad_norm": 0.28571048378944397, "learning_rate": 0.0003978292769416167, "loss": 0.9839, "step": 3785 }, { "epoch": 1.736154110767114, "grad_norm": 0.35537266731262207, "learning_rate": 0.00039758687232823434, "loss": 1.6417, "step": 3786 }, { "epoch": 1.7366127737644765, "grad_norm": 0.28138241171836853, "learning_rate": 0.0003973444928375307, "loss": 1.1609, "step": 3787 }, { "epoch": 1.7370714367618394, "grad_norm": 0.24497286975383759, "learning_rate": 0.0003971021385289631, "loss": 1.7524, "step": 3788 }, { "epoch": 1.7375300997592018, "grad_norm": 0.289495587348938, "learning_rate": 0.0003968598094619828, "loss": 1.1777, "step": 3789 }, { "epoch": 1.7379887627565647, "grad_norm": 0.2697417736053467, "learning_rate": 0.00039661750569603495, "loss": 1.42, "step": 3790 }, { "epoch": 1.7384474257539273, "grad_norm": 0.3958907127380371, "learning_rate": 0.00039637522729055836, "loss": 1.7687, "step": 3791 }, { "epoch": 1.73890608875129, "grad_norm": 0.23908482491970062, "learning_rate": 0.00039613297430498586, "loss": 0.9498, "step": 3792 }, { "epoch": 1.7393647517486528, "grad_norm": 0.262422651052475, "learning_rate": 0.0003958907467987435, "loss": 1.0038, "step": 3793 }, { "epoch": 1.7398234147460152, "grad_norm": 0.3177359998226166, "learning_rate": 0.00039564854483125164, "loss": 1.9203, "step": 3794 }, { "epoch": 1.740282077743378, "grad_norm": 0.3895881772041321, "learning_rate": 0.0003954063684619241, "loss": 1.788, "step": 3795 }, { "epoch": 1.7407407407407407, "grad_norm": 0.2772713005542755, "learning_rate": 0.00039516421775016863, "loss": 1.3488, "step": 3796 }, { "epoch": 1.7411994037381033, "grad_norm": 0.2620038390159607, "learning_rate": 0.00039492209275538624, "loss": 1.4497, "step": 3797 }, { "epoch": 1.7416580667354662, "grad_norm": 0.21062573790550232, "learning_rate": 0.0003946799935369726, "loss": 0.8097, "step": 3798 }, { "epoch": 1.7421167297328288, "grad_norm": 0.313883900642395, "learning_rate": 0.0003944379201543156, "loss": 1.6286, "step": 3799 }, { "epoch": 1.7425753927301915, "grad_norm": 0.28428205847740173, "learning_rate": 0.000394195872666798, "loss": 1.1285, "step": 3800 }, { "epoch": 1.7430340557275543, "grad_norm": 0.33933934569358826, "learning_rate": 0.00039395385113379566, "loss": 1.5898, "step": 3801 }, { "epoch": 1.7434927187249167, "grad_norm": 0.3156382143497467, "learning_rate": 0.00039371185561467827, "loss": 0.9087, "step": 3802 }, { "epoch": 1.7439513817222796, "grad_norm": 0.38781848549842834, "learning_rate": 0.0003934698861688093, "loss": 1.8621, "step": 3803 }, { "epoch": 1.7444100447196422, "grad_norm": 0.2756793200969696, "learning_rate": 0.0003932279428555452, "loss": 1.5569, "step": 3804 }, { "epoch": 1.7448687077170049, "grad_norm": 0.2981017231941223, "learning_rate": 0.0003929860257342366, "loss": 1.1586, "step": 3805 }, { "epoch": 1.7453273707143677, "grad_norm": 0.32057425379753113, "learning_rate": 0.0003927441348642274, "loss": 1.6129, "step": 3806 }, { "epoch": 1.7457860337117304, "grad_norm": 0.359298437833786, "learning_rate": 0.0003925022703048553, "loss": 1.2742, "step": 3807 }, { "epoch": 1.746244696709093, "grad_norm": 0.21599337458610535, "learning_rate": 0.0003922604321154514, "loss": 1.2625, "step": 3808 }, { "epoch": 1.7467033597064556, "grad_norm": 0.25992512702941895, "learning_rate": 0.00039201862035534066, "loss": 1.3849, "step": 3809 }, { "epoch": 1.7471620227038183, "grad_norm": 0.342759907245636, "learning_rate": 0.0003917768350838406, "loss": 1.7155, "step": 3810 }, { "epoch": 1.7476206857011811, "grad_norm": 0.22882609069347382, "learning_rate": 0.0003915350763602632, "loss": 1.1857, "step": 3811 }, { "epoch": 1.7480793486985438, "grad_norm": 0.2731204032897949, "learning_rate": 0.0003912933442439137, "loss": 1.0107, "step": 3812 }, { "epoch": 1.7485380116959064, "grad_norm": 0.21852166950702667, "learning_rate": 0.00039105163879409066, "loss": 1.2848, "step": 3813 }, { "epoch": 1.7489966746932692, "grad_norm": 0.6290088295936584, "learning_rate": 0.00039080996007008625, "loss": 1.4191, "step": 3814 }, { "epoch": 1.7494553376906317, "grad_norm": 0.2989594042301178, "learning_rate": 0.0003905683081311861, "loss": 1.5646, "step": 3815 }, { "epoch": 1.7499140006879945, "grad_norm": 0.256778746843338, "learning_rate": 0.00039032668303666876, "loss": 1.4916, "step": 3816 }, { "epoch": 1.7503726636853572, "grad_norm": 0.19242458045482635, "learning_rate": 0.00039008508484580684, "loss": 0.338, "step": 3817 }, { "epoch": 1.7508313266827198, "grad_norm": 0.24674712121486664, "learning_rate": 0.0003898435136178662, "loss": 1.1446, "step": 3818 }, { "epoch": 1.7512899896800826, "grad_norm": 0.2414608746767044, "learning_rate": 0.000389601969412106, "loss": 1.5443, "step": 3819 }, { "epoch": 1.7517486526774453, "grad_norm": 0.8330361247062683, "learning_rate": 0.00038936045228777884, "loss": 1.5558, "step": 3820 }, { "epoch": 1.752207315674808, "grad_norm": 0.2938498258590698, "learning_rate": 0.0003891189623041302, "loss": 1.0142, "step": 3821 }, { "epoch": 1.7526659786721708, "grad_norm": 0.3147178888320923, "learning_rate": 0.0003888774995203997, "loss": 1.2997, "step": 3822 }, { "epoch": 1.7531246416695332, "grad_norm": 0.26122626662254333, "learning_rate": 0.0003886360639958198, "loss": 1.3746, "step": 3823 }, { "epoch": 1.753583304666896, "grad_norm": 0.08440568298101425, "learning_rate": 0.00038839465578961637, "loss": 0.9028, "step": 3824 }, { "epoch": 1.7540419676642587, "grad_norm": 0.27365854382514954, "learning_rate": 0.00038815327496100863, "loss": 1.2714, "step": 3825 }, { "epoch": 1.7545006306616213, "grad_norm": 0.2890813648700714, "learning_rate": 0.0003879119215692091, "loss": 1.3066, "step": 3826 }, { "epoch": 1.7549592936589842, "grad_norm": 0.315480500459671, "learning_rate": 0.00038767059567342325, "loss": 1.3728, "step": 3827 }, { "epoch": 1.7554179566563466, "grad_norm": 0.22030934691429138, "learning_rate": 0.0003874292973328502, "loss": 1.0422, "step": 3828 }, { "epoch": 1.7558766196537094, "grad_norm": 0.21673201024532318, "learning_rate": 0.0003871880266066823, "loss": 1.2688, "step": 3829 }, { "epoch": 1.756335282651072, "grad_norm": 0.31848639249801636, "learning_rate": 0.0003869467835541048, "loss": 1.376, "step": 3830 }, { "epoch": 1.7567939456484347, "grad_norm": 0.08145318180322647, "learning_rate": 0.0003867055682342966, "loss": 0.4348, "step": 3831 }, { "epoch": 1.7572526086457976, "grad_norm": 0.1764577478170395, "learning_rate": 0.00038646438070642926, "loss": 0.7916, "step": 3832 }, { "epoch": 1.7577112716431602, "grad_norm": 0.32474926114082336, "learning_rate": 0.00038622322102966803, "loss": 1.7656, "step": 3833 }, { "epoch": 1.7581699346405228, "grad_norm": 0.2409229725599289, "learning_rate": 0.00038598208926317096, "loss": 1.5365, "step": 3834 }, { "epoch": 1.7586285976378857, "grad_norm": 0.2720002830028534, "learning_rate": 0.00038574098546608957, "loss": 0.9022, "step": 3835 }, { "epoch": 1.7590872606352481, "grad_norm": 1.557668924331665, "learning_rate": 0.0003854999096975683, "loss": 1.754, "step": 3836 }, { "epoch": 1.759545923632611, "grad_norm": 0.3512636721134186, "learning_rate": 0.00038525886201674485, "loss": 1.4061, "step": 3837 }, { "epoch": 1.7600045866299736, "grad_norm": 0.29787155985832214, "learning_rate": 0.0003850178424827497, "loss": 1.8271, "step": 3838 }, { "epoch": 1.7604632496273362, "grad_norm": 0.3310008645057678, "learning_rate": 0.000384776851154707, "loss": 1.6361, "step": 3839 }, { "epoch": 1.760921912624699, "grad_norm": 0.42053020000457764, "learning_rate": 0.00038453588809173343, "loss": 1.8173, "step": 3840 }, { "epoch": 1.7613805756220617, "grad_norm": 0.41074061393737793, "learning_rate": 0.00038429495335293905, "loss": 1.7546, "step": 3841 }, { "epoch": 1.7618392386194244, "grad_norm": 0.27962028980255127, "learning_rate": 0.00038405404699742694, "loss": 1.4692, "step": 3842 }, { "epoch": 1.762297901616787, "grad_norm": 0.3928588628768921, "learning_rate": 0.0003838131690842932, "loss": 1.9565, "step": 3843 }, { "epoch": 1.7627565646141496, "grad_norm": 0.4200422465801239, "learning_rate": 0.0003835723196726267, "loss": 1.8337, "step": 3844 }, { "epoch": 1.7632152276115125, "grad_norm": 0.3624529540538788, "learning_rate": 0.0003833314988215097, "loss": 1.7949, "step": 3845 }, { "epoch": 1.7636738906088751, "grad_norm": 0.4131540358066559, "learning_rate": 0.00038309070659001723, "loss": 1.7876, "step": 3846 }, { "epoch": 1.7641325536062378, "grad_norm": 0.26267632842063904, "learning_rate": 0.00038284994303721743, "loss": 1.3583, "step": 3847 }, { "epoch": 1.7645912166036006, "grad_norm": 0.31181371212005615, "learning_rate": 0.0003826092082221714, "loss": 1.2529, "step": 3848 }, { "epoch": 1.765049879600963, "grad_norm": 0.2337476760149002, "learning_rate": 0.00038236850220393285, "loss": 1.1989, "step": 3849 }, { "epoch": 1.765508542598326, "grad_norm": 0.2774173319339752, "learning_rate": 0.000382127825041549, "loss": 1.6288, "step": 3850 }, { "epoch": 1.7659672055956885, "grad_norm": 0.3347560465335846, "learning_rate": 0.0003818871767940595, "loss": 1.3356, "step": 3851 }, { "epoch": 1.7664258685930512, "grad_norm": 0.339616596698761, "learning_rate": 0.00038164655752049713, "loss": 1.9014, "step": 3852 }, { "epoch": 1.766884531590414, "grad_norm": 0.3301674425601959, "learning_rate": 0.0003814059672798876, "loss": 1.2728, "step": 3853 }, { "epoch": 1.7673431945877767, "grad_norm": 0.2250106781721115, "learning_rate": 0.0003811654061312495, "loss": 1.7408, "step": 3854 }, { "epoch": 1.7678018575851393, "grad_norm": 0.252518892288208, "learning_rate": 0.00038092487413359405, "loss": 1.2438, "step": 3855 }, { "epoch": 1.7682605205825022, "grad_norm": 0.2107822746038437, "learning_rate": 0.00038068437134592553, "loss": 1.3066, "step": 3856 }, { "epoch": 1.7687191835798646, "grad_norm": 0.33197417855262756, "learning_rate": 0.0003804438978272411, "loss": 1.2697, "step": 3857 }, { "epoch": 1.7691778465772274, "grad_norm": 0.22738035023212433, "learning_rate": 0.0003802034536365305, "loss": 0.8106, "step": 3858 }, { "epoch": 1.76963650957459, "grad_norm": 0.22326631844043732, "learning_rate": 0.0003799630388327766, "loss": 1.5657, "step": 3859 }, { "epoch": 1.7700951725719527, "grad_norm": 0.30757084488868713, "learning_rate": 0.00037972265347495474, "loss": 1.4067, "step": 3860 }, { "epoch": 1.7705538355693156, "grad_norm": 0.22894053161144257, "learning_rate": 0.00037948229762203313, "loss": 0.7838, "step": 3861 }, { "epoch": 1.771012498566678, "grad_norm": 0.3080291152000427, "learning_rate": 0.0003792419713329729, "loss": 1.3486, "step": 3862 }, { "epoch": 1.7714711615640408, "grad_norm": 0.21413278579711914, "learning_rate": 0.00037900167466672793, "loss": 1.0098, "step": 3863 }, { "epoch": 1.7719298245614035, "grad_norm": 0.2879568040370941, "learning_rate": 0.00037876140768224444, "loss": 1.5512, "step": 3864 }, { "epoch": 1.772388487558766, "grad_norm": 0.28043875098228455, "learning_rate": 0.0003785211704384621, "loss": 1.2938, "step": 3865 }, { "epoch": 1.772847150556129, "grad_norm": 0.3483384847640991, "learning_rate": 0.0003782809629943124, "loss": 1.7574, "step": 3866 }, { "epoch": 1.7733058135534916, "grad_norm": 0.33012205362319946, "learning_rate": 0.00037804078540872005, "loss": 1.2911, "step": 3867 }, { "epoch": 1.7737644765508542, "grad_norm": 0.2982887327671051, "learning_rate": 0.0003778006377406025, "loss": 1.8078, "step": 3868 }, { "epoch": 1.774223139548217, "grad_norm": 0.19081206619739532, "learning_rate": 0.0003775605200488694, "loss": 0.4456, "step": 3869 }, { "epoch": 1.7746818025455795, "grad_norm": 0.3630902171134949, "learning_rate": 0.00037732043239242373, "loss": 1.98, "step": 3870 }, { "epoch": 1.7751404655429424, "grad_norm": 0.25359615683555603, "learning_rate": 0.00037708037483016085, "loss": 0.9155, "step": 3871 }, { "epoch": 1.775599128540305, "grad_norm": 0.3517308533191681, "learning_rate": 0.000376840347420968, "loss": 1.8185, "step": 3872 }, { "epoch": 1.7760577915376676, "grad_norm": 0.23731131851673126, "learning_rate": 0.00037660035022372604, "loss": 0.7537, "step": 3873 }, { "epoch": 1.7765164545350305, "grad_norm": 0.2584936320781708, "learning_rate": 0.0003763603832973077, "loss": 1.7855, "step": 3874 }, { "epoch": 1.7769751175323931, "grad_norm": 0.42461255192756653, "learning_rate": 0.00037612044670057906, "loss": 1.1224, "step": 3875 }, { "epoch": 1.7774337805297558, "grad_norm": 0.2801918089389801, "learning_rate": 0.00037588054049239817, "loss": 1.6647, "step": 3876 }, { "epoch": 1.7778924435271186, "grad_norm": 0.3888150751590729, "learning_rate": 0.0003756406647316155, "loss": 1.7823, "step": 3877 }, { "epoch": 1.778351106524481, "grad_norm": 0.4158638119697571, "learning_rate": 0.00037540081947707443, "loss": 1.3085, "step": 3878 }, { "epoch": 1.7788097695218439, "grad_norm": 0.055091023445129395, "learning_rate": 0.0003751610047876106, "loss": 0.7616, "step": 3879 }, { "epoch": 1.7792684325192065, "grad_norm": 0.2197464108467102, "learning_rate": 0.00037492122072205257, "loss": 0.7161, "step": 3880 }, { "epoch": 1.7797270955165692, "grad_norm": 0.39263108372688293, "learning_rate": 0.00037468146733922106, "loss": 1.9539, "step": 3881 }, { "epoch": 1.780185758513932, "grad_norm": 0.43456920981407166, "learning_rate": 0.0003744417446979293, "loss": 1.8441, "step": 3882 }, { "epoch": 1.7806444215112944, "grad_norm": 0.3044719099998474, "learning_rate": 0.0003742020528569827, "loss": 1.7751, "step": 3883 }, { "epoch": 1.7811030845086573, "grad_norm": 0.44964897632598877, "learning_rate": 0.0003739623918751795, "loss": 1.338, "step": 3884 }, { "epoch": 1.78156174750602, "grad_norm": 0.3019511103630066, "learning_rate": 0.00037372276181131043, "loss": 1.298, "step": 3885 }, { "epoch": 1.7820204105033826, "grad_norm": 0.33377906680107117, "learning_rate": 0.0003734831627241584, "loss": 2.1521, "step": 3886 }, { "epoch": 1.7824790735007454, "grad_norm": 0.4243369996547699, "learning_rate": 0.000373243594672499, "loss": 2.0967, "step": 3887 }, { "epoch": 1.782937736498108, "grad_norm": 0.41644036769866943, "learning_rate": 0.0003730040577150995, "loss": 1.89, "step": 3888 }, { "epoch": 1.7833963994954707, "grad_norm": 0.2524632215499878, "learning_rate": 0.0003727645519107204, "loss": 1.5137, "step": 3889 }, { "epoch": 1.7838550624928335, "grad_norm": 0.4522644281387329, "learning_rate": 0.0003725250773181141, "loss": 1.5999, "step": 3890 }, { "epoch": 1.784313725490196, "grad_norm": 0.25750890374183655, "learning_rate": 0.0003722856339960256, "loss": 1.3307, "step": 3891 }, { "epoch": 1.7847723884875588, "grad_norm": 0.37072017788887024, "learning_rate": 0.0003720462220031918, "loss": 1.6609, "step": 3892 }, { "epoch": 1.7852310514849214, "grad_norm": 0.35205331444740295, "learning_rate": 0.0003718068413983425, "loss": 1.0562, "step": 3893 }, { "epoch": 1.785689714482284, "grad_norm": 0.05115019530057907, "learning_rate": 0.00037156749224019923, "loss": 0.6781, "step": 3894 }, { "epoch": 1.786148377479647, "grad_norm": 0.25191041827201843, "learning_rate": 0.00037132817458747613, "loss": 1.4272, "step": 3895 }, { "epoch": 1.7866070404770094, "grad_norm": 0.27612894773483276, "learning_rate": 0.00037108888849887966, "loss": 0.9952, "step": 3896 }, { "epoch": 1.7870657034743722, "grad_norm": 0.2937767803668976, "learning_rate": 0.0003708496340331082, "loss": 1.6864, "step": 3897 }, { "epoch": 1.7875243664717348, "grad_norm": 0.3436873257160187, "learning_rate": 0.00037061041124885285, "loss": 1.9355, "step": 3898 }, { "epoch": 1.7879830294690975, "grad_norm": 0.35292357206344604, "learning_rate": 0.00037037122020479665, "loss": 1.7389, "step": 3899 }, { "epoch": 1.7884416924664603, "grad_norm": 0.3429259955883026, "learning_rate": 0.0003701320609596147, "loss": 1.8025, "step": 3900 }, { "epoch": 1.788900355463823, "grad_norm": 0.38343197107315063, "learning_rate": 0.00036989293357197464, "loss": 2.0798, "step": 3901 }, { "epoch": 1.7893590184611856, "grad_norm": 0.30104926228523254, "learning_rate": 0.00036965383810053597, "loss": 1.3101, "step": 3902 }, { "epoch": 1.7898176814585485, "grad_norm": 0.2541302740573883, "learning_rate": 0.00036941477460395074, "loss": 1.0455, "step": 3903 }, { "epoch": 1.7902763444559109, "grad_norm": 0.2968154847621918, "learning_rate": 0.000369175743140863, "loss": 1.9026, "step": 3904 }, { "epoch": 1.7907350074532737, "grad_norm": 0.33623620867729187, "learning_rate": 0.0003689367437699086, "loss": 1.1528, "step": 3905 }, { "epoch": 1.7911936704506364, "grad_norm": 0.3630521297454834, "learning_rate": 0.00036869777654971594, "loss": 1.7005, "step": 3906 }, { "epoch": 1.791652333447999, "grad_norm": 0.38895827531814575, "learning_rate": 0.0003684588415389055, "loss": 1.7465, "step": 3907 }, { "epoch": 1.7921109964453619, "grad_norm": 0.38369861245155334, "learning_rate": 0.0003682199387960896, "loss": 1.2702, "step": 3908 }, { "epoch": 1.7925696594427245, "grad_norm": 0.222330242395401, "learning_rate": 0.00036798106837987297, "loss": 1.2868, "step": 3909 }, { "epoch": 1.7930283224400871, "grad_norm": 0.2768653631210327, "learning_rate": 0.00036774223034885236, "loss": 1.0325, "step": 3910 }, { "epoch": 1.79348698543745, "grad_norm": 0.4517102837562561, "learning_rate": 0.000367503424761616, "loss": 1.3302, "step": 3911 }, { "epoch": 1.7939456484348124, "grad_norm": 0.31973719596862793, "learning_rate": 0.000367264651676745, "loss": 1.7516, "step": 3912 }, { "epoch": 1.7944043114321753, "grad_norm": 0.36346325278282166, "learning_rate": 0.000367025911152812, "loss": 1.7175, "step": 3913 }, { "epoch": 1.794862974429538, "grad_norm": 0.3458804190158844, "learning_rate": 0.00036678720324838176, "loss": 1.3469, "step": 3914 }, { "epoch": 1.7953216374269005, "grad_norm": 0.3012971580028534, "learning_rate": 0.0003665485280220112, "loss": 1.5734, "step": 3915 }, { "epoch": 1.7957803004242634, "grad_norm": 0.3793402910232544, "learning_rate": 0.00036630988553224887, "loss": 1.6147, "step": 3916 }, { "epoch": 1.7962389634216258, "grad_norm": 0.4072246253490448, "learning_rate": 0.00036607127583763554, "loss": 1.4536, "step": 3917 }, { "epoch": 1.7966976264189887, "grad_norm": 0.32000985741615295, "learning_rate": 0.0003658326989967039, "loss": 1.438, "step": 3918 }, { "epoch": 1.7971562894163513, "grad_norm": 0.2759474217891693, "learning_rate": 0.00036559415506797865, "loss": 1.33, "step": 3919 }, { "epoch": 1.797614952413714, "grad_norm": 0.32986119389533997, "learning_rate": 0.0003653556441099762, "loss": 1.3684, "step": 3920 }, { "epoch": 1.7980736154110768, "grad_norm": 0.23913626372814178, "learning_rate": 0.0003651171661812053, "loss": 1.3252, "step": 3921 }, { "epoch": 1.7985322784084394, "grad_norm": 0.33937355875968933, "learning_rate": 0.0003648787213401659, "loss": 1.4227, "step": 3922 }, { "epoch": 1.798990941405802, "grad_norm": 0.2144535481929779, "learning_rate": 0.00036464030964535044, "loss": 0.9513, "step": 3923 }, { "epoch": 1.799449604403165, "grad_norm": 0.2545612156391144, "learning_rate": 0.00036440193115524306, "loss": 1.1305, "step": 3924 }, { "epoch": 1.7999082674005273, "grad_norm": 0.33486199378967285, "learning_rate": 0.0003641635859283197, "loss": 1.4782, "step": 3925 }, { "epoch": 1.8003669303978902, "grad_norm": 0.16233354806900024, "learning_rate": 0.000363925274023048, "loss": 0.611, "step": 3926 }, { "epoch": 1.8008255933952528, "grad_norm": 0.33829358220100403, "learning_rate": 0.00036368699549788795, "loss": 1.8271, "step": 3927 }, { "epoch": 1.8012842563926155, "grad_norm": 0.2723900079727173, "learning_rate": 0.00036344875041129066, "loss": 1.1435, "step": 3928 }, { "epoch": 1.8017429193899783, "grad_norm": 0.2544368803501129, "learning_rate": 0.00036321053882169954, "loss": 0.8193, "step": 3929 }, { "epoch": 1.8022015823873407, "grad_norm": 0.2767103910446167, "learning_rate": 0.00036297236078754945, "loss": 1.3754, "step": 3930 }, { "epoch": 1.8026602453847036, "grad_norm": 0.1679062396287918, "learning_rate": 0.00036273421636726723, "loss": 0.8304, "step": 3931 }, { "epoch": 1.8031189083820662, "grad_norm": 0.32275891304016113, "learning_rate": 0.0003624961056192717, "loss": 1.2045, "step": 3932 }, { "epoch": 1.8035775713794289, "grad_norm": 0.1948852390050888, "learning_rate": 0.00036225802860197275, "loss": 0.963, "step": 3933 }, { "epoch": 1.8040362343767917, "grad_norm": 0.39679884910583496, "learning_rate": 0.00036201998537377264, "loss": 1.5401, "step": 3934 }, { "epoch": 1.8044948973741544, "grad_norm": 0.2812747657299042, "learning_rate": 0.00036178197599306494, "loss": 2.0823, "step": 3935 }, { "epoch": 1.804953560371517, "grad_norm": 0.42034879326820374, "learning_rate": 0.00036154400051823497, "loss": 2.0599, "step": 3936 }, { "epoch": 1.8054122233688799, "grad_norm": 0.4247654676437378, "learning_rate": 0.00036130605900766024, "loss": 1.8334, "step": 3937 }, { "epoch": 1.8058708863662423, "grad_norm": 0.35334038734436035, "learning_rate": 0.0003610681515197094, "loss": 2.0579, "step": 3938 }, { "epoch": 1.8063295493636051, "grad_norm": 0.3598625063896179, "learning_rate": 0.0003608302781127425, "loss": 2.2363, "step": 3939 }, { "epoch": 1.8067882123609678, "grad_norm": 0.29301023483276367, "learning_rate": 0.00036059243884511185, "loss": 1.0839, "step": 3940 }, { "epoch": 1.8072468753583304, "grad_norm": 0.281200110912323, "learning_rate": 0.0003603546337751611, "loss": 1.0665, "step": 3941 }, { "epoch": 1.8077055383556933, "grad_norm": 0.16020144522190094, "learning_rate": 0.0003601168629612256, "loss": 1.1812, "step": 3942 }, { "epoch": 1.8081642013530559, "grad_norm": 0.3196254372596741, "learning_rate": 0.00035987912646163247, "loss": 1.4895, "step": 3943 }, { "epoch": 1.8086228643504185, "grad_norm": 0.34358012676239014, "learning_rate": 0.0003596414243346997, "loss": 1.4952, "step": 3944 }, { "epoch": 1.8090815273477814, "grad_norm": 0.2578059136867523, "learning_rate": 0.00035940375663873767, "loss": 1.4763, "step": 3945 }, { "epoch": 1.8095401903451438, "grad_norm": 0.33023521304130554, "learning_rate": 0.0003591661234320477, "loss": 1.0975, "step": 3946 }, { "epoch": 1.8099988533425067, "grad_norm": 0.16670942306518555, "learning_rate": 0.00035892852477292325, "loss": 1.1774, "step": 3947 }, { "epoch": 1.8104575163398693, "grad_norm": 0.3831387758255005, "learning_rate": 0.00035869096071964885, "loss": 2.0361, "step": 3948 }, { "epoch": 1.810916179337232, "grad_norm": 0.3592255413532257, "learning_rate": 0.0003584534313305009, "loss": 1.2978, "step": 3949 }, { "epoch": 1.8113748423345948, "grad_norm": 0.05875217914581299, "learning_rate": 0.0003582159366637466, "loss": 0.4578, "step": 3950 }, { "epoch": 1.8118335053319572, "grad_norm": 0.2751762270927429, "learning_rate": 0.00035797847677764526, "loss": 1.256, "step": 3951 }, { "epoch": 1.81229216832932, "grad_norm": 0.20929132401943207, "learning_rate": 0.0003577410517304477, "loss": 1.1679, "step": 3952 }, { "epoch": 1.8127508313266827, "grad_norm": 0.25931280851364136, "learning_rate": 0.00035750366158039594, "loss": 0.9565, "step": 3953 }, { "epoch": 1.8132094943240453, "grad_norm": 0.20596548914909363, "learning_rate": 0.0003572663063857234, "loss": 1.2343, "step": 3954 }, { "epoch": 1.8136681573214082, "grad_norm": 0.2564990818500519, "learning_rate": 0.0003570289862046553, "loss": 1.6389, "step": 3955 }, { "epoch": 1.8141268203187708, "grad_norm": 0.3115670382976532, "learning_rate": 0.0003567917010954074, "loss": 1.24, "step": 3956 }, { "epoch": 1.8145854833161335, "grad_norm": 0.29912760853767395, "learning_rate": 0.0003565544511161879, "loss": 1.6694, "step": 3957 }, { "epoch": 1.8150441463134963, "grad_norm": 0.37836483120918274, "learning_rate": 0.00035631723632519594, "loss": 2.1592, "step": 3958 }, { "epoch": 1.8155028093108587, "grad_norm": 0.3637576401233673, "learning_rate": 0.0003560800567806218, "loss": 1.2883, "step": 3959 }, { "epoch": 1.8159614723082216, "grad_norm": 0.2932051420211792, "learning_rate": 0.0003558429125406476, "loss": 1.4525, "step": 3960 }, { "epoch": 1.8164201353055842, "grad_norm": 0.3286663591861725, "learning_rate": 0.0003556058036634463, "loss": 1.7548, "step": 3961 }, { "epoch": 1.8168787983029469, "grad_norm": 0.3011263310909271, "learning_rate": 0.00035536873020718254, "loss": 1.6652, "step": 3962 }, { "epoch": 1.8173374613003097, "grad_norm": 0.381115585565567, "learning_rate": 0.000355131692230012, "loss": 1.6675, "step": 3963 }, { "epoch": 1.8177961242976721, "grad_norm": 0.2726176083087921, "learning_rate": 0.00035489468979008195, "loss": 1.4564, "step": 3964 }, { "epoch": 1.818254787295035, "grad_norm": 0.32600149512290955, "learning_rate": 0.0003546577229455308, "loss": 1.9954, "step": 3965 }, { "epoch": 1.8187134502923976, "grad_norm": 0.2766259014606476, "learning_rate": 0.0003544207917544882, "loss": 1.4604, "step": 3966 }, { "epoch": 1.8191721132897603, "grad_norm": 0.3894624710083008, "learning_rate": 0.000354183896275075, "loss": 0.8529, "step": 3967 }, { "epoch": 1.819630776287123, "grad_norm": 0.2296787053346634, "learning_rate": 0.00035394703656540345, "loss": 1.3822, "step": 3968 }, { "epoch": 1.8200894392844857, "grad_norm": 0.3591334819793701, "learning_rate": 0.00035371021268357694, "loss": 1.9995, "step": 3969 }, { "epoch": 1.8205481022818484, "grad_norm": 0.33714228868484497, "learning_rate": 0.00035347342468769, "loss": 1.4948, "step": 3970 }, { "epoch": 1.8210067652792112, "grad_norm": 0.2692127227783203, "learning_rate": 0.00035323667263582865, "loss": 1.1647, "step": 3971 }, { "epoch": 1.8214654282765737, "grad_norm": 0.1590389609336853, "learning_rate": 0.00035299995658606963, "loss": 0.9988, "step": 3972 }, { "epoch": 1.8219240912739365, "grad_norm": 0.25324851274490356, "learning_rate": 0.0003527632765964811, "loss": 1.0857, "step": 3973 }, { "epoch": 1.8223827542712991, "grad_norm": 0.29076528549194336, "learning_rate": 0.00035252663272512255, "loss": 1.6969, "step": 3974 }, { "epoch": 1.8228414172686618, "grad_norm": 0.2527695894241333, "learning_rate": 0.0003522900250300443, "loss": 1.2706, "step": 3975 }, { "epoch": 1.8233000802660246, "grad_norm": 0.34270545840263367, "learning_rate": 0.000352053453569288, "loss": 1.6218, "step": 3976 }, { "epoch": 1.8237587432633873, "grad_norm": 0.2784097194671631, "learning_rate": 0.00035181691840088623, "loss": 1.166, "step": 3977 }, { "epoch": 1.82421740626075, "grad_norm": 0.30808866024017334, "learning_rate": 0.0003515804195828629, "loss": 1.2634, "step": 3978 }, { "epoch": 1.8246760692581128, "grad_norm": 0.16882003843784332, "learning_rate": 0.00035134395717323276, "loss": 0.8922, "step": 3979 }, { "epoch": 1.8251347322554752, "grad_norm": 0.1895928829908371, "learning_rate": 0.0003511075312300018, "loss": 0.895, "step": 3980 }, { "epoch": 1.825593395252838, "grad_norm": 0.21428383886814117, "learning_rate": 0.00035087114181116697, "loss": 1.3183, "step": 3981 }, { "epoch": 1.8260520582502007, "grad_norm": 0.28207674622535706, "learning_rate": 0.0003506347889747164, "loss": 0.9283, "step": 3982 }, { "epoch": 1.8265107212475633, "grad_norm": 0.24229072034358978, "learning_rate": 0.000350398472778629, "loss": 1.783, "step": 3983 }, { "epoch": 1.8269693842449262, "grad_norm": 0.309413880109787, "learning_rate": 0.000350162193280875, "loss": 1.2617, "step": 3984 }, { "epoch": 1.8274280472422886, "grad_norm": 0.2385943979024887, "learning_rate": 0.00034992595053941525, "loss": 1.2587, "step": 3985 }, { "epoch": 1.8278867102396514, "grad_norm": 0.31749603152275085, "learning_rate": 0.00034968974461220195, "loss": 1.8345, "step": 3986 }, { "epoch": 1.828345373237014, "grad_norm": 0.28810974955558777, "learning_rate": 0.0003494535755571781, "loss": 1.673, "step": 3987 }, { "epoch": 1.8288040362343767, "grad_norm": 0.310904324054718, "learning_rate": 0.0003492174434322778, "loss": 1.5857, "step": 3988 }, { "epoch": 1.8292626992317396, "grad_norm": 0.20826320350170135, "learning_rate": 0.00034898134829542565, "loss": 0.8103, "step": 3989 }, { "epoch": 1.8297213622291022, "grad_norm": 0.3267036974430084, "learning_rate": 0.0003487452902045377, "loss": 1.3232, "step": 3990 }, { "epoch": 1.8301800252264648, "grad_norm": 0.24371105432510376, "learning_rate": 0.00034850926921752067, "loss": 1.2933, "step": 3991 }, { "epoch": 1.8306386882238277, "grad_norm": 0.3523206412792206, "learning_rate": 0.0003482732853922722, "loss": 1.2762, "step": 3992 }, { "epoch": 1.83109735122119, "grad_norm": 0.5423739552497864, "learning_rate": 0.00034803733878668077, "loss": 0.9312, "step": 3993 }, { "epoch": 1.831556014218553, "grad_norm": 0.31650495529174805, "learning_rate": 0.0003478014294586261, "loss": 2.0525, "step": 3994 }, { "epoch": 1.8320146772159156, "grad_norm": 0.3087564706802368, "learning_rate": 0.000347565557465978, "loss": 1.3784, "step": 3995 }, { "epoch": 1.8324733402132782, "grad_norm": 0.2533722519874573, "learning_rate": 0.0003473297228665978, "loss": 1.3376, "step": 3996 }, { "epoch": 1.832932003210641, "grad_norm": 0.3357933759689331, "learning_rate": 0.0003470939257183373, "loss": 1.7561, "step": 3997 }, { "epoch": 1.8333906662080035, "grad_norm": 0.36861729621887207, "learning_rate": 0.0003468581660790393, "loss": 1.7476, "step": 3998 }, { "epoch": 1.8338493292053664, "grad_norm": 0.26520270109176636, "learning_rate": 0.0003466224440065377, "loss": 1.5038, "step": 3999 }, { "epoch": 1.834307992202729, "grad_norm": 0.3509536385536194, "learning_rate": 0.0003463867595586562, "loss": 1.8193, "step": 4000 }, { "epoch": 1.8347666552000916, "grad_norm": 0.2574576139450073, "learning_rate": 0.0003461511127932103, "loss": 1.2697, "step": 4001 }, { "epoch": 1.8352253181974545, "grad_norm": 0.3160921037197113, "learning_rate": 0.00034591550376800563, "loss": 1.655, "step": 4002 }, { "epoch": 1.8356839811948171, "grad_norm": 0.34427279233932495, "learning_rate": 0.00034567993254083887, "loss": 1.3817, "step": 4003 }, { "epoch": 1.8361426441921798, "grad_norm": 0.31001847982406616, "learning_rate": 0.00034544439916949746, "loss": 1.1495, "step": 4004 }, { "epoch": 1.8366013071895426, "grad_norm": 0.23114445805549622, "learning_rate": 0.0003452089037117595, "loss": 1.3843, "step": 4005 }, { "epoch": 1.837059970186905, "grad_norm": 0.2845550775527954, "learning_rate": 0.0003449734462253934, "loss": 1.0899, "step": 4006 }, { "epoch": 1.837518633184268, "grad_norm": 0.368184894323349, "learning_rate": 0.0003447380267681587, "loss": 1.9531, "step": 4007 }, { "epoch": 1.8379772961816305, "grad_norm": 0.27855613827705383, "learning_rate": 0.0003445026453978054, "loss": 0.8792, "step": 4008 }, { "epoch": 1.8384359591789932, "grad_norm": 0.29736289381980896, "learning_rate": 0.00034426730217207457, "loss": 1.3437, "step": 4009 }, { "epoch": 1.838894622176356, "grad_norm": 0.26978176832199097, "learning_rate": 0.0003440319971486976, "loss": 0.9284, "step": 4010 }, { "epoch": 1.8393532851737187, "grad_norm": 0.24370868504047394, "learning_rate": 0.00034379673038539604, "loss": 1.8301, "step": 4011 }, { "epoch": 1.8398119481710813, "grad_norm": 0.31381699442863464, "learning_rate": 0.0003435615019398828, "loss": 1.2786, "step": 4012 }, { "epoch": 1.8402706111684441, "grad_norm": 0.36527925729751587, "learning_rate": 0.00034332631186986107, "loss": 1.5788, "step": 4013 }, { "epoch": 1.8407292741658066, "grad_norm": 0.29045170545578003, "learning_rate": 0.0003430911602330248, "loss": 1.7421, "step": 4014 }, { "epoch": 1.8411879371631694, "grad_norm": 0.515656590461731, "learning_rate": 0.0003428560470870583, "loss": 2.0176, "step": 4015 }, { "epoch": 1.841646600160532, "grad_norm": 0.2787885367870331, "learning_rate": 0.00034262097248963675, "loss": 1.4663, "step": 4016 }, { "epoch": 1.8421052631578947, "grad_norm": 0.2862381637096405, "learning_rate": 0.00034238593649842515, "loss": 0.9792, "step": 4017 }, { "epoch": 1.8425639261552575, "grad_norm": 0.30698710680007935, "learning_rate": 0.0003421509391710797, "loss": 1.7229, "step": 4018 }, { "epoch": 1.84302258915262, "grad_norm": 0.3188919723033905, "learning_rate": 0.0003419159805652471, "loss": 1.5143, "step": 4019 }, { "epoch": 1.8434812521499828, "grad_norm": 0.3322117328643799, "learning_rate": 0.0003416810607385644, "loss": 1.9916, "step": 4020 }, { "epoch": 1.8439399151473455, "grad_norm": 0.3120747208595276, "learning_rate": 0.00034144617974865896, "loss": 1.4741, "step": 4021 }, { "epoch": 1.844398578144708, "grad_norm": 0.2903960049152374, "learning_rate": 0.00034121133765314905, "loss": 1.6743, "step": 4022 }, { "epoch": 1.844857241142071, "grad_norm": 0.40729033946990967, "learning_rate": 0.00034097653450964265, "loss": 1.9248, "step": 4023 }, { "epoch": 1.8453159041394336, "grad_norm": 0.3693930208683014, "learning_rate": 0.00034074177037573904, "loss": 2.1533, "step": 4024 }, { "epoch": 1.8457745671367962, "grad_norm": 0.2894419729709625, "learning_rate": 0.00034050704530902756, "loss": 1.4656, "step": 4025 }, { "epoch": 1.846233230134159, "grad_norm": 0.32523804903030396, "learning_rate": 0.00034027235936708776, "loss": 1.505, "step": 4026 }, { "epoch": 1.8466918931315215, "grad_norm": 0.2615148723125458, "learning_rate": 0.00034003771260749017, "loss": 1.1829, "step": 4027 }, { "epoch": 1.8471505561288843, "grad_norm": 0.34713688492774963, "learning_rate": 0.00033980310508779476, "loss": 1.1238, "step": 4028 }, { "epoch": 1.847609219126247, "grad_norm": 0.2878703773021698, "learning_rate": 0.0003395685368655528, "loss": 1.0275, "step": 4029 }, { "epoch": 1.8480678821236096, "grad_norm": 0.2368079125881195, "learning_rate": 0.00033933400799830563, "loss": 1.4276, "step": 4030 }, { "epoch": 1.8485265451209725, "grad_norm": 0.20480850338935852, "learning_rate": 0.0003390995185435847, "loss": 1.324, "step": 4031 }, { "epoch": 1.8489852081183349, "grad_norm": 0.27292463183403015, "learning_rate": 0.00033886506855891195, "loss": 1.3118, "step": 4032 }, { "epoch": 1.8494438711156977, "grad_norm": 0.2694958448410034, "learning_rate": 0.00033863065810179986, "loss": 1.5169, "step": 4033 }, { "epoch": 1.8499025341130604, "grad_norm": 0.3515981137752533, "learning_rate": 0.0003383962872297508, "loss": 2.0571, "step": 4034 }, { "epoch": 1.850361197110423, "grad_norm": 0.3751930594444275, "learning_rate": 0.0003381619560002577, "loss": 1.3073, "step": 4035 }, { "epoch": 1.8508198601077859, "grad_norm": 0.3277819752693176, "learning_rate": 0.0003379276644708037, "loss": 1.6505, "step": 4036 }, { "epoch": 1.8512785231051485, "grad_norm": 0.17637912929058075, "learning_rate": 0.00033769341269886225, "loss": 0.4892, "step": 4037 }, { "epoch": 1.8517371861025111, "grad_norm": 0.2396342158317566, "learning_rate": 0.00033745920074189703, "loss": 1.3491, "step": 4038 }, { "epoch": 1.852195849099874, "grad_norm": 0.3997967541217804, "learning_rate": 0.0003372250286573617, "loss": 2.1116, "step": 4039 }, { "epoch": 1.8526545120972364, "grad_norm": 0.27099528908729553, "learning_rate": 0.00033699089650270054, "loss": 1.0897, "step": 4040 }, { "epoch": 1.8531131750945993, "grad_norm": 0.29823604226112366, "learning_rate": 0.00033675680433534785, "loss": 1.5873, "step": 4041 }, { "epoch": 1.853571838091962, "grad_norm": 0.30324751138687134, "learning_rate": 0.0003365227522127281, "loss": 1.3735, "step": 4042 }, { "epoch": 1.8540305010893245, "grad_norm": 0.2946029305458069, "learning_rate": 0.000336288740192256, "loss": 1.4114, "step": 4043 }, { "epoch": 1.8544891640866874, "grad_norm": 0.2893146872520447, "learning_rate": 0.0003360547683313363, "loss": 2.0024, "step": 4044 }, { "epoch": 1.85494782708405, "grad_norm": 0.37933430075645447, "learning_rate": 0.00033582083668736405, "loss": 1.7002, "step": 4045 }, { "epoch": 1.8554064900814127, "grad_norm": 0.27654582262039185, "learning_rate": 0.0003355869453177244, "loss": 0.7906, "step": 4046 }, { "epoch": 1.8558651530787755, "grad_norm": 0.09229593724012375, "learning_rate": 0.00033535309427979245, "loss": 1.09, "step": 4047 }, { "epoch": 1.856323816076138, "grad_norm": 0.3510340750217438, "learning_rate": 0.0003351192836309336, "loss": 1.3514, "step": 4048 }, { "epoch": 1.8567824790735008, "grad_norm": 0.2260064333677292, "learning_rate": 0.0003348855134285034, "loss": 0.8711, "step": 4049 }, { "epoch": 1.8572411420708634, "grad_norm": 0.27447935938835144, "learning_rate": 0.0003346517837298474, "loss": 1.7773, "step": 4050 }, { "epoch": 1.857699805068226, "grad_norm": 0.2299553006887436, "learning_rate": 0.000334418094592301, "loss": 0.9055, "step": 4051 }, { "epoch": 1.858158468065589, "grad_norm": 0.2812572121620178, "learning_rate": 0.0003341844460731899, "loss": 1.3986, "step": 4052 }, { "epoch": 1.8586171310629513, "grad_norm": 0.18786439299583435, "learning_rate": 0.0003339508382298297, "loss": 0.7558, "step": 4053 }, { "epoch": 1.8590757940603142, "grad_norm": 0.06244681030511856, "learning_rate": 0.0003337172711195262, "loss": 0.9041, "step": 4054 }, { "epoch": 1.8595344570576768, "grad_norm": 0.3086715042591095, "learning_rate": 0.00033348374479957513, "loss": 1.2524, "step": 4055 }, { "epoch": 1.8599931200550395, "grad_norm": 0.1853373497724533, "learning_rate": 0.000333250259327262, "loss": 0.4612, "step": 4056 }, { "epoch": 1.8604517830524023, "grad_norm": 0.24042657017707825, "learning_rate": 0.0003330168147598626, "loss": 1.0255, "step": 4057 }, { "epoch": 1.860910446049765, "grad_norm": 0.36561837792396545, "learning_rate": 0.00033278341115464263, "loss": 1.3232, "step": 4058 }, { "epoch": 1.8613691090471276, "grad_norm": 0.2835361361503601, "learning_rate": 0.0003325500485688575, "loss": 1.1592, "step": 4059 }, { "epoch": 1.8618277720444905, "grad_norm": 0.3773711919784546, "learning_rate": 0.0003323167270597528, "loss": 1.4202, "step": 4060 }, { "epoch": 1.8622864350418529, "grad_norm": 0.28732502460479736, "learning_rate": 0.00033208344668456417, "loss": 1.6362, "step": 4061 }, { "epoch": 1.8627450980392157, "grad_norm": 0.35971352458000183, "learning_rate": 0.00033185020750051673, "loss": 1.4979, "step": 4062 }, { "epoch": 1.8632037610365784, "grad_norm": 0.22411774098873138, "learning_rate": 0.00033161700956482574, "loss": 0.8292, "step": 4063 }, { "epoch": 1.863662424033941, "grad_norm": 0.2379181832075119, "learning_rate": 0.00033138385293469654, "loss": 1.1412, "step": 4064 }, { "epoch": 1.8641210870313039, "grad_norm": 0.2763615548610687, "learning_rate": 0.00033115073766732376, "loss": 1.2769, "step": 4065 }, { "epoch": 1.8645797500286663, "grad_norm": 0.23881085216999054, "learning_rate": 0.0003309176638198929, "loss": 0.9071, "step": 4066 }, { "epoch": 1.8650384130260291, "grad_norm": 0.3810456097126007, "learning_rate": 0.000330684631449578, "loss": 1.6372, "step": 4067 }, { "epoch": 1.8654970760233918, "grad_norm": 0.310472309589386, "learning_rate": 0.0003304516406135438, "loss": 1.0613, "step": 4068 }, { "epoch": 1.8659557390207544, "grad_norm": 0.15861466526985168, "learning_rate": 0.0003302186913689448, "loss": 0.6324, "step": 4069 }, { "epoch": 1.8664144020181173, "grad_norm": 0.22283266484737396, "learning_rate": 0.00032998578377292474, "loss": 0.9156, "step": 4070 }, { "epoch": 1.86687306501548, "grad_norm": 1.139766812324524, "learning_rate": 0.00032975291788261794, "loss": 1.3506, "step": 4071 }, { "epoch": 1.8673317280128425, "grad_norm": 0.41252392530441284, "learning_rate": 0.00032952009375514815, "loss": 1.6586, "step": 4072 }, { "epoch": 1.8677903910102054, "grad_norm": 0.1836547702550888, "learning_rate": 0.00032928731144762837, "loss": 0.9196, "step": 4073 }, { "epoch": 1.8682490540075678, "grad_norm": 0.32911989092826843, "learning_rate": 0.00032905457101716195, "loss": 1.861, "step": 4074 }, { "epoch": 1.8687077170049307, "grad_norm": 0.3695148825645447, "learning_rate": 0.00032882187252084185, "loss": 2.0391, "step": 4075 }, { "epoch": 1.8691663800022933, "grad_norm": 0.4213230311870575, "learning_rate": 0.0003285892160157507, "loss": 1.6511, "step": 4076 }, { "epoch": 1.869625042999656, "grad_norm": 0.24618668854236603, "learning_rate": 0.0003283566015589608, "loss": 1.6064, "step": 4077 }, { "epoch": 1.8700837059970188, "grad_norm": 0.43131116032600403, "learning_rate": 0.00032812402920753434, "loss": 1.8835, "step": 4078 }, { "epoch": 1.8705423689943814, "grad_norm": 0.3532734811306, "learning_rate": 0.00032789149901852265, "loss": 0.9022, "step": 4079 }, { "epoch": 1.871001031991744, "grad_norm": 0.21644118428230286, "learning_rate": 0.00032765901104896714, "loss": 1.0367, "step": 4080 }, { "epoch": 1.871459694989107, "grad_norm": 0.13179314136505127, "learning_rate": 0.0003274265653558989, "loss": 1.123, "step": 4081 }, { "epoch": 1.8719183579864693, "grad_norm": 0.3340977430343628, "learning_rate": 0.00032719416199633843, "loss": 1.6467, "step": 4082 }, { "epoch": 1.8723770209838322, "grad_norm": 0.3839796185493469, "learning_rate": 0.0003269618010272963, "loss": 2.147, "step": 4083 }, { "epoch": 1.8728356839811948, "grad_norm": 0.42947497963905334, "learning_rate": 0.0003267294825057719, "loss": 1.448, "step": 4084 }, { "epoch": 1.8732943469785575, "grad_norm": 0.3551345765590668, "learning_rate": 0.0003264972064887546, "loss": 1.4087, "step": 4085 }, { "epoch": 1.8737530099759203, "grad_norm": 0.20159225165843964, "learning_rate": 0.0003262649730332237, "loss": 0.8688, "step": 4086 }, { "epoch": 1.8742116729732827, "grad_norm": 0.17780308425426483, "learning_rate": 0.0003260327821961476, "loss": 0.6803, "step": 4087 }, { "epoch": 1.8746703359706456, "grad_norm": 0.27476388216018677, "learning_rate": 0.0003258006340344845, "loss": 1.5988, "step": 4088 }, { "epoch": 1.8751289989680082, "grad_norm": 0.2936633229255676, "learning_rate": 0.000325568528605182, "loss": 1.5938, "step": 4089 }, { "epoch": 1.8755876619653709, "grad_norm": 0.24134798347949982, "learning_rate": 0.00032533646596517683, "loss": 0.5465, "step": 4090 }, { "epoch": 1.8760463249627337, "grad_norm": 0.23964928090572357, "learning_rate": 0.0003251044461713961, "loss": 1.5688, "step": 4091 }, { "epoch": 1.8765049879600963, "grad_norm": 0.4487531781196594, "learning_rate": 0.0003248724692807558, "loss": 1.3783, "step": 4092 }, { "epoch": 1.876963650957459, "grad_norm": 0.2621656358242035, "learning_rate": 0.00032464053535016145, "loss": 1.6387, "step": 4093 }, { "epoch": 1.8774223139548218, "grad_norm": 0.5254698395729065, "learning_rate": 0.0003244086444365085, "loss": 1.5309, "step": 4094 }, { "epoch": 1.8778809769521843, "grad_norm": 0.4006554186344147, "learning_rate": 0.00032417679659668073, "loss": 1.6442, "step": 4095 }, { "epoch": 1.8783396399495471, "grad_norm": 0.44737085700035095, "learning_rate": 0.00032394499188755267, "loss": 1.443, "step": 4096 }, { "epoch": 1.8787983029469097, "grad_norm": 0.5100827217102051, "learning_rate": 0.0003237132303659875, "loss": 2.3247, "step": 4097 }, { "epoch": 1.8792569659442724, "grad_norm": 0.43226632475852966, "learning_rate": 0.00032348151208883805, "loss": 2.1605, "step": 4098 }, { "epoch": 1.8797156289416352, "grad_norm": 0.35672900080680847, "learning_rate": 0.0003232498371129464, "loss": 1.7168, "step": 4099 }, { "epoch": 1.8801742919389977, "grad_norm": 0.3012166917324066, "learning_rate": 0.0003230182054951443, "loss": 1.6538, "step": 4100 }, { "epoch": 1.8806329549363605, "grad_norm": 0.31628987193107605, "learning_rate": 0.00032278661729225234, "loss": 0.8449, "step": 4101 }, { "epoch": 1.8810916179337231, "grad_norm": 0.25368666648864746, "learning_rate": 0.000322555072561081, "loss": 1.324, "step": 4102 }, { "epoch": 1.8815502809310858, "grad_norm": 0.21288840472698212, "learning_rate": 0.0003223235713584297, "loss": 0.9169, "step": 4103 }, { "epoch": 1.8820089439284486, "grad_norm": 0.24230335652828217, "learning_rate": 0.00032209211374108746, "loss": 1.0717, "step": 4104 }, { "epoch": 1.8824676069258113, "grad_norm": 0.24073179066181183, "learning_rate": 0.0003218606997658326, "loss": 1.5495, "step": 4105 }, { "epoch": 1.882926269923174, "grad_norm": 0.2884657680988312, "learning_rate": 0.00032162932948943257, "loss": 0.7274, "step": 4106 }, { "epoch": 1.8833849329205368, "grad_norm": 0.2240462303161621, "learning_rate": 0.0003213980029686441, "loss": 1.6514, "step": 4107 }, { "epoch": 1.8838435959178992, "grad_norm": 0.3176068663597107, "learning_rate": 0.0003211667202602132, "loss": 1.1339, "step": 4108 }, { "epoch": 1.884302258915262, "grad_norm": 0.2350878119468689, "learning_rate": 0.0003209354814208754, "loss": 0.9783, "step": 4109 }, { "epoch": 1.8847609219126247, "grad_norm": 0.16942141950130463, "learning_rate": 0.00032070428650735506, "loss": 1.2051, "step": 4110 }, { "epoch": 1.8852195849099873, "grad_norm": 0.2659427523612976, "learning_rate": 0.0003204731355763661, "loss": 1.3427, "step": 4111 }, { "epoch": 1.8856782479073502, "grad_norm": 0.2091580033302307, "learning_rate": 0.00032024202868461137, "loss": 1.1494, "step": 4112 }, { "epoch": 1.8861369109047128, "grad_norm": 0.3054332733154297, "learning_rate": 0.0003200109658887831, "loss": 1.0687, "step": 4113 }, { "epoch": 1.8865955739020754, "grad_norm": 0.23581813275814056, "learning_rate": 0.0003197799472455627, "loss": 1.0554, "step": 4114 }, { "epoch": 1.8870542368994383, "grad_norm": 0.3387794494628906, "learning_rate": 0.0003195489728116207, "loss": 2.1521, "step": 4115 }, { "epoch": 1.8875128998968007, "grad_norm": 0.29953324794769287, "learning_rate": 0.00031931804264361674, "loss": 0.8916, "step": 4116 }, { "epoch": 1.8879715628941636, "grad_norm": 0.2123539000749588, "learning_rate": 0.0003190871567981999, "loss": 1.2987, "step": 4117 }, { "epoch": 1.8884302258915262, "grad_norm": 0.40169602632522583, "learning_rate": 0.00031885631533200775, "loss": 1.4036, "step": 4118 }, { "epoch": 1.8888888888888888, "grad_norm": 0.2332458198070526, "learning_rate": 0.00031862551830166765, "loss": 1.162, "step": 4119 }, { "epoch": 1.8893475518862517, "grad_norm": 0.27471116185188293, "learning_rate": 0.0003183947657637957, "loss": 1.3871, "step": 4120 }, { "epoch": 1.889806214883614, "grad_norm": 0.3699858784675598, "learning_rate": 0.00031816405777499704, "loss": 1.5227, "step": 4121 }, { "epoch": 1.890264877880977, "grad_norm": 0.27009811997413635, "learning_rate": 0.0003179333943918663, "loss": 1.3455, "step": 4122 }, { "epoch": 1.8907235408783396, "grad_norm": 0.2861959934234619, "learning_rate": 0.00031770277567098654, "loss": 1.7307, "step": 4123 }, { "epoch": 1.8911822038757022, "grad_norm": 0.31758224964141846, "learning_rate": 0.0003174722016689303, "loss": 1.1325, "step": 4124 }, { "epoch": 1.891640866873065, "grad_norm": 0.32737424969673157, "learning_rate": 0.0003172416724422592, "loss": 1.3532, "step": 4125 }, { "epoch": 1.8920995298704277, "grad_norm": 0.14036279916763306, "learning_rate": 0.00031701118804752353, "loss": 0.6664, "step": 4126 }, { "epoch": 1.8925581928677904, "grad_norm": 0.19789431989192963, "learning_rate": 0.0003167807485412629, "loss": 1.0829, "step": 4127 }, { "epoch": 1.8930168558651532, "grad_norm": 0.2573801577091217, "learning_rate": 0.00031655035398000576, "loss": 1.0196, "step": 4128 }, { "epoch": 1.8934755188625156, "grad_norm": 0.20172810554504395, "learning_rate": 0.00031632000442026947, "loss": 1.053, "step": 4129 }, { "epoch": 1.8939341818598785, "grad_norm": 0.33292561769485474, "learning_rate": 0.00031608969991856053, "loss": 1.9376, "step": 4130 }, { "epoch": 1.8943928448572411, "grad_norm": 0.2850032150745392, "learning_rate": 0.00031585944053137417, "loss": 1.5627, "step": 4131 }, { "epoch": 1.8948515078546038, "grad_norm": 0.46692344546318054, "learning_rate": 0.0003156292263151949, "loss": 1.7141, "step": 4132 }, { "epoch": 1.8953101708519666, "grad_norm": 0.39476048946380615, "learning_rate": 0.00031539905732649555, "loss": 2.0542, "step": 4133 }, { "epoch": 1.8957688338493293, "grad_norm": 0.4325892925262451, "learning_rate": 0.00031516893362173884, "loss": 1.1792, "step": 4134 }, { "epoch": 1.896227496846692, "grad_norm": 0.22258636355400085, "learning_rate": 0.0003149388552573752, "loss": 1.4821, "step": 4135 }, { "epoch": 1.8966861598440545, "grad_norm": 0.47724971175193787, "learning_rate": 0.00031470882228984475, "loss": 2.2678, "step": 4136 }, { "epoch": 1.8971448228414172, "grad_norm": 0.26698240637779236, "learning_rate": 0.0003144788347755763, "loss": 1.0363, "step": 4137 }, { "epoch": 1.89760348583878, "grad_norm": 0.3450622856616974, "learning_rate": 0.0003142488927709871, "loss": 2.0267, "step": 4138 }, { "epoch": 1.8980621488361427, "grad_norm": 0.2508101463317871, "learning_rate": 0.0003140189963324842, "loss": 0.9557, "step": 4139 }, { "epoch": 1.8985208118335053, "grad_norm": 0.2404715120792389, "learning_rate": 0.0003137891455164623, "loss": 1.6057, "step": 4140 }, { "epoch": 1.8989794748308682, "grad_norm": 0.3351982533931732, "learning_rate": 0.00031355934037930567, "loss": 1.451, "step": 4141 }, { "epoch": 1.8994381378282306, "grad_norm": 0.3819112479686737, "learning_rate": 0.00031332958097738707, "loss": 1.3151, "step": 4142 }, { "epoch": 1.8998968008255934, "grad_norm": 0.25655218958854675, "learning_rate": 0.00031309986736706826, "loss": 1.1487, "step": 4143 }, { "epoch": 1.900355463822956, "grad_norm": 0.18810351192951202, "learning_rate": 0.00031287019960469966, "loss": 1.4159, "step": 4144 }, { "epoch": 1.9008141268203187, "grad_norm": 0.3258521854877472, "learning_rate": 0.00031264057774662044, "loss": 1.6307, "step": 4145 }, { "epoch": 1.9012727898176816, "grad_norm": 0.309843510389328, "learning_rate": 0.0003124110018491584, "loss": 1.011, "step": 4146 }, { "epoch": 1.9017314528150442, "grad_norm": 0.2843288481235504, "learning_rate": 0.00031218147196863, "loss": 1.2536, "step": 4147 }, { "epoch": 1.9021901158124068, "grad_norm": 0.08047755807638168, "learning_rate": 0.00031195198816134093, "loss": 0.6959, "step": 4148 }, { "epoch": 1.9026487788097697, "grad_norm": 0.4606052339076996, "learning_rate": 0.000311722550483585, "loss": 1.2046, "step": 4149 }, { "epoch": 1.903107441807132, "grad_norm": 0.2677907645702362, "learning_rate": 0.0003114931589916452, "loss": 1.6278, "step": 4150 }, { "epoch": 1.903566104804495, "grad_norm": 0.32580938935279846, "learning_rate": 0.0003112638137417925, "loss": 1.4391, "step": 4151 }, { "epoch": 1.9040247678018576, "grad_norm": 0.23725193738937378, "learning_rate": 0.00031103451479028713, "loss": 1.2067, "step": 4152 }, { "epoch": 1.9044834307992202, "grad_norm": 0.3534078299999237, "learning_rate": 0.0003108052621933778, "loss": 1.437, "step": 4153 }, { "epoch": 1.904942093796583, "grad_norm": 0.395913690328598, "learning_rate": 0.0003105760560073018, "loss": 2.239, "step": 4154 }, { "epoch": 1.9054007567939455, "grad_norm": 0.38620731234550476, "learning_rate": 0.0003103468962882851, "loss": 1.0384, "step": 4155 }, { "epoch": 1.9058594197913084, "grad_norm": 0.27703335881233215, "learning_rate": 0.00031011778309254247, "loss": 1.6416, "step": 4156 }, { "epoch": 1.906318082788671, "grad_norm": 0.23753665387630463, "learning_rate": 0.00030988871647627645, "loss": 0.7991, "step": 4157 }, { "epoch": 1.9067767457860336, "grad_norm": 0.39035359025001526, "learning_rate": 0.0003096596964956791, "loss": 1.59, "step": 4158 }, { "epoch": 1.9072354087833965, "grad_norm": 0.22406819462776184, "learning_rate": 0.00030943072320693067, "loss": 0.8857, "step": 4159 }, { "epoch": 1.9076940717807591, "grad_norm": 0.36969655752182007, "learning_rate": 0.00030920179666619986, "loss": 1.712, "step": 4160 }, { "epoch": 1.9081527347781218, "grad_norm": 0.3277752697467804, "learning_rate": 0.0003089729169296444, "loss": 1.5408, "step": 4161 }, { "epoch": 1.9086113977754846, "grad_norm": 0.4305424392223358, "learning_rate": 0.0003087440840534093, "loss": 1.7606, "step": 4162 }, { "epoch": 1.909070060772847, "grad_norm": 0.2649680972099304, "learning_rate": 0.0003085152980936296, "loss": 1.4221, "step": 4163 }, { "epoch": 1.9095287237702099, "grad_norm": 0.34129345417022705, "learning_rate": 0.00030828655910642794, "loss": 1.2801, "step": 4164 }, { "epoch": 1.9099873867675725, "grad_norm": 0.16658979654312134, "learning_rate": 0.0003080578671479157, "loss": 0.7952, "step": 4165 }, { "epoch": 1.9104460497649352, "grad_norm": 0.18683701753616333, "learning_rate": 0.0003078292222741925, "loss": 1.1949, "step": 4166 }, { "epoch": 1.910904712762298, "grad_norm": 0.25396600365638733, "learning_rate": 0.00030760062454134697, "loss": 0.8212, "step": 4167 }, { "epoch": 1.9113633757596606, "grad_norm": 0.2024562507867813, "learning_rate": 0.0003073720740054553, "loss": 1.2571, "step": 4168 }, { "epoch": 1.9118220387570233, "grad_norm": 0.3227396309375763, "learning_rate": 0.0003071435707225828, "loss": 1.4965, "step": 4169 }, { "epoch": 1.912280701754386, "grad_norm": 0.29104283452033997, "learning_rate": 0.000306915114748783, "loss": 1.8234, "step": 4170 }, { "epoch": 1.9127393647517485, "grad_norm": 0.3544504642486572, "learning_rate": 0.00030668670614009775, "loss": 1.3737, "step": 4171 }, { "epoch": 1.9131980277491114, "grad_norm": 0.14929145574569702, "learning_rate": 0.0003064583449525574, "loss": 1.0649, "step": 4172 }, { "epoch": 1.913656690746474, "grad_norm": 0.2932626008987427, "learning_rate": 0.0003062300312421806, "loss": 1.3504, "step": 4173 }, { "epoch": 1.9141153537438367, "grad_norm": 0.3478389084339142, "learning_rate": 0.0003060017650649742, "loss": 1.5471, "step": 4174 }, { "epoch": 1.9145740167411995, "grad_norm": 0.30949702858924866, "learning_rate": 0.00030577354647693354, "loss": 1.1508, "step": 4175 }, { "epoch": 1.915032679738562, "grad_norm": 0.2801455855369568, "learning_rate": 0.0003055453755340425, "loss": 1.7807, "step": 4176 }, { "epoch": 1.9154913427359248, "grad_norm": 0.360272616147995, "learning_rate": 0.0003053172522922729, "loss": 1.7485, "step": 4177 }, { "epoch": 1.9159500057332874, "grad_norm": 0.34427425265312195, "learning_rate": 0.0003050891768075851, "loss": 1.1602, "step": 4178 }, { "epoch": 1.91640866873065, "grad_norm": 0.3189522922039032, "learning_rate": 0.00030486114913592753, "loss": 1.6614, "step": 4179 }, { "epoch": 1.916867331728013, "grad_norm": 0.3376857340335846, "learning_rate": 0.00030463316933323717, "loss": 1.2032, "step": 4180 }, { "epoch": 1.9173259947253756, "grad_norm": 0.36120155453681946, "learning_rate": 0.00030440523745543893, "loss": 1.8868, "step": 4181 }, { "epoch": 1.9177846577227382, "grad_norm": 0.3722556531429291, "learning_rate": 0.00030417735355844634, "loss": 1.3034, "step": 4182 }, { "epoch": 1.918243320720101, "grad_norm": 0.27059370279312134, "learning_rate": 0.00030394951769816084, "loss": 0.9223, "step": 4183 }, { "epoch": 1.9187019837174635, "grad_norm": 0.3722488582134247, "learning_rate": 0.0003037217299304723, "loss": 2.0471, "step": 4184 }, { "epoch": 1.9191606467148263, "grad_norm": 0.42110106348991394, "learning_rate": 0.00030349399031125856, "loss": 1.8804, "step": 4185 }, { "epoch": 1.919619309712189, "grad_norm": 0.33694103360176086, "learning_rate": 0.00030326629889638595, "loss": 1.3992, "step": 4186 }, { "epoch": 1.9200779727095516, "grad_norm": 0.26390311121940613, "learning_rate": 0.00030303865574170876, "loss": 1.6186, "step": 4187 }, { "epoch": 1.9205366357069145, "grad_norm": 0.2693273723125458, "learning_rate": 0.0003028110609030694, "loss": 1.4586, "step": 4188 }, { "epoch": 1.9209952987042769, "grad_norm": 0.4307970702648163, "learning_rate": 0.0003025835144362987, "loss": 1.1955, "step": 4189 }, { "epoch": 1.9214539617016397, "grad_norm": 0.11909201741218567, "learning_rate": 0.00030235601639721534, "loss": 1.0518, "step": 4190 }, { "epoch": 1.9219126246990024, "grad_norm": 0.36401188373565674, "learning_rate": 0.00030212856684162613, "loss": 0.9025, "step": 4191 }, { "epoch": 1.922371287696365, "grad_norm": 0.22211536765098572, "learning_rate": 0.00030190116582532627, "loss": 1.1021, "step": 4192 }, { "epoch": 1.9228299506937279, "grad_norm": 0.310380220413208, "learning_rate": 0.0003016738134040988, "loss": 1.709, "step": 4193 }, { "epoch": 1.9232886136910905, "grad_norm": 0.36067163944244385, "learning_rate": 0.00030144650963371487, "loss": 1.153, "step": 4194 }, { "epoch": 1.9237472766884531, "grad_norm": 0.2253832072019577, "learning_rate": 0.00030121925456993396, "loss": 1.1205, "step": 4195 }, { "epoch": 1.924205939685816, "grad_norm": 0.3536555767059326, "learning_rate": 0.000300992048268503, "loss": 1.7184, "step": 4196 }, { "epoch": 1.9246646026831784, "grad_norm": 0.0917428508400917, "learning_rate": 0.0003007648907851576, "loss": 1.0483, "step": 4197 }, { "epoch": 1.9251232656805413, "grad_norm": 0.36443841457366943, "learning_rate": 0.000300537782175621, "loss": 1.7299, "step": 4198 }, { "epoch": 1.925581928677904, "grad_norm": 0.33077698945999146, "learning_rate": 0.0003003107224956046, "loss": 1.6121, "step": 4199 }, { "epoch": 1.9260405916752665, "grad_norm": 0.40068572759628296, "learning_rate": 0.00030008371180080773, "loss": 1.2148, "step": 4200 }, { "epoch": 1.9264992546726294, "grad_norm": 0.363941490650177, "learning_rate": 0.00029985675014691814, "loss": 1.9509, "step": 4201 }, { "epoch": 1.926957917669992, "grad_norm": 0.1912655234336853, "learning_rate": 0.00029962983758961067, "loss": 0.9309, "step": 4202 }, { "epoch": 1.9274165806673547, "grad_norm": 0.2495778501033783, "learning_rate": 0.0002994029741845488, "loss": 1.5195, "step": 4203 }, { "epoch": 1.9278752436647173, "grad_norm": 0.29733824729919434, "learning_rate": 0.00029917615998738365, "loss": 1.0684, "step": 4204 }, { "epoch": 1.92833390666208, "grad_norm": 0.36461958289146423, "learning_rate": 0.0002989493950537544, "loss": 1.8616, "step": 4205 }, { "epoch": 1.9287925696594428, "grad_norm": 0.3024968206882477, "learning_rate": 0.0002987226794392885, "loss": 1.166, "step": 4206 }, { "epoch": 1.9292512326568054, "grad_norm": 0.32046911120414734, "learning_rate": 0.0002984960131996004, "loss": 1.0956, "step": 4207 }, { "epoch": 1.929709895654168, "grad_norm": 0.1437264233827591, "learning_rate": 0.00029826939639029324, "loss": 1.0188, "step": 4208 }, { "epoch": 1.930168558651531, "grad_norm": 0.342047780752182, "learning_rate": 0.00029804282906695765, "loss": 1.4002, "step": 4209 }, { "epoch": 1.9306272216488933, "grad_norm": 0.27232903242111206, "learning_rate": 0.0002978163112851722, "loss": 1.7488, "step": 4210 }, { "epoch": 1.9310858846462562, "grad_norm": 0.30967265367507935, "learning_rate": 0.00029758984310050354, "loss": 1.4791, "step": 4211 }, { "epoch": 1.9315445476436188, "grad_norm": 0.3274848759174347, "learning_rate": 0.000297363424568506, "loss": 1.6986, "step": 4212 }, { "epoch": 1.9320032106409815, "grad_norm": 0.3882467746734619, "learning_rate": 0.0002971370557447213, "loss": 1.7115, "step": 4213 }, { "epoch": 1.9324618736383443, "grad_norm": 0.35071897506713867, "learning_rate": 0.0002969107366846794, "loss": 1.2036, "step": 4214 }, { "epoch": 1.932920536635707, "grad_norm": 0.3669132590293884, "learning_rate": 0.0002966844674438982, "loss": 1.8774, "step": 4215 }, { "epoch": 1.9333791996330696, "grad_norm": 0.3220601975917816, "learning_rate": 0.00029645824807788325, "loss": 1.8994, "step": 4216 }, { "epoch": 1.9338378626304324, "grad_norm": 0.3539913594722748, "learning_rate": 0.00029623207864212775, "loss": 1.5546, "step": 4217 }, { "epoch": 1.9342965256277949, "grad_norm": 0.2368205189704895, "learning_rate": 0.00029600595919211247, "loss": 1.272, "step": 4218 }, { "epoch": 1.9347551886251577, "grad_norm": 0.38960981369018555, "learning_rate": 0.00029577988978330615, "loss": 2.0374, "step": 4219 }, { "epoch": 1.9352138516225204, "grad_norm": 0.3929775059223175, "learning_rate": 0.00029555387047116547, "loss": 2.1378, "step": 4220 }, { "epoch": 1.935672514619883, "grad_norm": 0.34435439109802246, "learning_rate": 0.00029532790131113446, "loss": 1.444, "step": 4221 }, { "epoch": 1.9361311776172458, "grad_norm": 0.27234023809432983, "learning_rate": 0.00029510198235864504, "loss": 1.2824, "step": 4222 }, { "epoch": 1.9365898406146083, "grad_norm": 0.30018359422683716, "learning_rate": 0.000294876113669117, "loss": 1.8326, "step": 4223 }, { "epoch": 1.9370485036119711, "grad_norm": 0.46480369567871094, "learning_rate": 0.00029465029529795696, "loss": 1.6801, "step": 4224 }, { "epoch": 1.9375071666093338, "grad_norm": 0.4117535948753357, "learning_rate": 0.0002944245273005602, "loss": 1.7745, "step": 4225 }, { "epoch": 1.9379658296066964, "grad_norm": 0.19903844594955444, "learning_rate": 0.00029419880973230916, "loss": 0.8529, "step": 4226 }, { "epoch": 1.9384244926040592, "grad_norm": 0.3728976249694824, "learning_rate": 0.00029397314264857405, "loss": 2.2271, "step": 4227 }, { "epoch": 1.9388831556014219, "grad_norm": 0.4130072593688965, "learning_rate": 0.00029374752610471255, "loss": 1.9492, "step": 4228 }, { "epoch": 1.9393418185987845, "grad_norm": 0.09748774021863937, "learning_rate": 0.00029352196015607014, "loss": 0.4857, "step": 4229 }, { "epoch": 1.9398004815961474, "grad_norm": 0.289890319108963, "learning_rate": 0.00029329644485797963, "loss": 1.7548, "step": 4230 }, { "epoch": 1.9402591445935098, "grad_norm": 0.33195921778678894, "learning_rate": 0.00029307098026576156, "loss": 1.767, "step": 4231 }, { "epoch": 1.9407178075908726, "grad_norm": 0.30616116523742676, "learning_rate": 0.0002928455664347241, "loss": 1.5225, "step": 4232 }, { "epoch": 1.9411764705882353, "grad_norm": 0.3174733519554138, "learning_rate": 0.0002926202034201628, "loss": 1.3201, "step": 4233 }, { "epoch": 1.941635133585598, "grad_norm": 0.3208681643009186, "learning_rate": 0.00029239489127736107, "loss": 1.0652, "step": 4234 }, { "epoch": 1.9420937965829608, "grad_norm": 0.21303045749664307, "learning_rate": 0.0002921696300615893, "loss": 1.0555, "step": 4235 }, { "epoch": 1.9425524595803234, "grad_norm": 0.2299261838197708, "learning_rate": 0.0002919444198281058, "loss": 1.4859, "step": 4236 }, { "epoch": 1.943011122577686, "grad_norm": 0.38345012068748474, "learning_rate": 0.0002917192606321563, "loss": 1.7592, "step": 4237 }, { "epoch": 1.9434697855750487, "grad_norm": 0.36465907096862793, "learning_rate": 0.0002914941525289739, "loss": 1.1496, "step": 4238 }, { "epoch": 1.9439284485724113, "grad_norm": 0.4449915885925293, "learning_rate": 0.00029126909557377923, "loss": 1.3264, "step": 4239 }, { "epoch": 1.9443871115697742, "grad_norm": 0.31518861651420593, "learning_rate": 0.0002910440898217808, "loss": 1.6181, "step": 4240 }, { "epoch": 1.9448457745671368, "grad_norm": 0.3502573072910309, "learning_rate": 0.0002908191353281735, "loss": 1.5842, "step": 4241 }, { "epoch": 1.9453044375644994, "grad_norm": 0.3748527467250824, "learning_rate": 0.00029059423214814053, "loss": 1.6724, "step": 4242 }, { "epoch": 1.9457631005618623, "grad_norm": 0.39455685019493103, "learning_rate": 0.0002903693803368522, "loss": 1.4643, "step": 4243 }, { "epoch": 1.9462217635592247, "grad_norm": 0.14820732176303864, "learning_rate": 0.00029014457994946654, "loss": 0.9309, "step": 4244 }, { "epoch": 1.9466804265565876, "grad_norm": 0.18288370966911316, "learning_rate": 0.00028991983104112874, "loss": 0.9465, "step": 4245 }, { "epoch": 1.9471390895539502, "grad_norm": 0.38087254762649536, "learning_rate": 0.00028969513366697096, "loss": 1.2691, "step": 4246 }, { "epoch": 1.9475977525513128, "grad_norm": 0.14294399321079254, "learning_rate": 0.0002894704878821133, "loss": 1.094, "step": 4247 }, { "epoch": 1.9480564155486757, "grad_norm": 0.4238864779472351, "learning_rate": 0.00028924589374166286, "loss": 1.7693, "step": 4248 }, { "epoch": 1.9485150785460383, "grad_norm": 0.2929639220237732, "learning_rate": 0.0002890213513007144, "loss": 1.2274, "step": 4249 }, { "epoch": 1.948973741543401, "grad_norm": 0.25756093859672546, "learning_rate": 0.00028879686061434966, "loss": 1.2784, "step": 4250 }, { "epoch": 1.9494324045407638, "grad_norm": 0.3350880742073059, "learning_rate": 0.0002885724217376381, "loss": 2.0134, "step": 4251 }, { "epoch": 1.9498910675381262, "grad_norm": 0.2928470969200134, "learning_rate": 0.00028834803472563574, "loss": 0.9233, "step": 4252 }, { "epoch": 1.950349730535489, "grad_norm": 0.35262230038642883, "learning_rate": 0.0002881236996333864, "loss": 2.0552, "step": 4253 }, { "epoch": 1.9508083935328517, "grad_norm": 0.3223377466201782, "learning_rate": 0.00028789941651592135, "loss": 1.6767, "step": 4254 }, { "epoch": 1.9512670565302144, "grad_norm": 0.3945949375629425, "learning_rate": 0.0002876751854282588, "loss": 1.5204, "step": 4255 }, { "epoch": 1.9517257195275772, "grad_norm": 0.25893688201904297, "learning_rate": 0.00028745100642540425, "loss": 1.2777, "step": 4256 }, { "epoch": 1.9521843825249396, "grad_norm": 0.34024423360824585, "learning_rate": 0.0002872268795623505, "loss": 1.7372, "step": 4257 }, { "epoch": 1.9526430455223025, "grad_norm": 0.2754332423210144, "learning_rate": 0.00028700280489407713, "loss": 1.1841, "step": 4258 }, { "epoch": 1.9531017085196651, "grad_norm": 0.3347947299480438, "learning_rate": 0.0002867787824755516, "loss": 1.3132, "step": 4259 }, { "epoch": 1.9535603715170278, "grad_norm": 0.21994498372077942, "learning_rate": 0.00028655481236172813, "loss": 1.2384, "step": 4260 }, { "epoch": 1.9540190345143906, "grad_norm": 0.2327728271484375, "learning_rate": 0.00028633089460754826, "loss": 1.3356, "step": 4261 }, { "epoch": 1.9544776975117533, "grad_norm": 0.2511197030544281, "learning_rate": 0.00028610702926794063, "loss": 0.728, "step": 4262 }, { "epoch": 1.954936360509116, "grad_norm": 0.2759820222854614, "learning_rate": 0.0002858832163978207, "loss": 1.6011, "step": 4263 }, { "epoch": 1.9553950235064788, "grad_norm": 0.3019004166126251, "learning_rate": 0.00028565945605209177, "loss": 1.7163, "step": 4264 }, { "epoch": 1.9558536865038412, "grad_norm": 0.3434310555458069, "learning_rate": 0.0002854357482856438, "loss": 1.338, "step": 4265 }, { "epoch": 1.956312349501204, "grad_norm": 0.2556118369102478, "learning_rate": 0.0002852120931533538, "loss": 1.6398, "step": 4266 }, { "epoch": 1.9567710124985667, "grad_norm": 0.39801543951034546, "learning_rate": 0.0002849884907100861, "loss": 1.4964, "step": 4267 }, { "epoch": 1.9572296754959293, "grad_norm": 0.30779194831848145, "learning_rate": 0.0002847649410106921, "loss": 1.7112, "step": 4268 }, { "epoch": 1.9576883384932922, "grad_norm": 0.29870155453681946, "learning_rate": 0.0002845414441100098, "loss": 1.6331, "step": 4269 }, { "epoch": 1.9581470014906548, "grad_norm": 0.2507461905479431, "learning_rate": 0.0002843180000628649, "loss": 1.0191, "step": 4270 }, { "epoch": 1.9586056644880174, "grad_norm": 0.25291094183921814, "learning_rate": 0.00028409460892406957, "loss": 0.802, "step": 4271 }, { "epoch": 1.95906432748538, "grad_norm": 0.06607484817504883, "learning_rate": 0.00028387127074842357, "loss": 0.6714, "step": 4272 }, { "epoch": 1.9595229904827427, "grad_norm": 0.30349382758140564, "learning_rate": 0.00028364798559071315, "loss": 1.3984, "step": 4273 }, { "epoch": 1.9599816534801056, "grad_norm": 0.23646502196788788, "learning_rate": 0.0002834247535057118, "loss": 1.5465, "step": 4274 }, { "epoch": 1.9604403164774682, "grad_norm": 0.32397279143333435, "learning_rate": 0.00028320157454818015, "loss": 1.2772, "step": 4275 }, { "epoch": 1.9608989794748308, "grad_norm": 0.2407788336277008, "learning_rate": 0.00028297844877286536, "loss": 1.336, "step": 4276 }, { "epoch": 1.9613576424721937, "grad_norm": 0.3070877194404602, "learning_rate": 0.00028275537623450187, "loss": 1.8708, "step": 4277 }, { "epoch": 1.961816305469556, "grad_norm": 0.3111575245857239, "learning_rate": 0.0002825323569878111, "loss": 1.2437, "step": 4278 }, { "epoch": 1.962274968466919, "grad_norm": 0.1594800353050232, "learning_rate": 0.0002823093910875013, "loss": 0.7383, "step": 4279 }, { "epoch": 1.9627336314642816, "grad_norm": 0.23517662286758423, "learning_rate": 0.0002820864785882673, "loss": 1.308, "step": 4280 }, { "epoch": 1.9631922944616442, "grad_norm": 0.2781253755092621, "learning_rate": 0.0002818636195447913, "loss": 1.3627, "step": 4281 }, { "epoch": 1.963650957459007, "grad_norm": 0.2628926634788513, "learning_rate": 0.0002816408140117424, "loss": 1.2191, "step": 4282 }, { "epoch": 1.9641096204563697, "grad_norm": 0.2107929289340973, "learning_rate": 0.00028141806204377617, "loss": 1.2504, "step": 4283 }, { "epoch": 1.9645682834537324, "grad_norm": 0.27736082673072815, "learning_rate": 0.0002811953636955354, "loss": 1.0677, "step": 4284 }, { "epoch": 1.9650269464510952, "grad_norm": 0.23589631915092468, "learning_rate": 0.00028097271902164967, "loss": 1.6243, "step": 4285 }, { "epoch": 1.9654856094484576, "grad_norm": 0.458365261554718, "learning_rate": 0.00028075012807673516, "loss": 1.6266, "step": 4286 }, { "epoch": 1.9659442724458205, "grad_norm": 0.25476112961769104, "learning_rate": 0.0002805275909153951, "loss": 1.7578, "step": 4287 }, { "epoch": 1.9664029354431831, "grad_norm": 0.3246137797832489, "learning_rate": 0.00028030510759221943, "loss": 1.1602, "step": 4288 }, { "epoch": 1.9668615984405458, "grad_norm": 0.3675731420516968, "learning_rate": 0.000280082678161785, "loss": 1.2238, "step": 4289 }, { "epoch": 1.9673202614379086, "grad_norm": 0.3241674304008484, "learning_rate": 0.00027986030267865546, "loss": 1.2205, "step": 4290 }, { "epoch": 1.967778924435271, "grad_norm": 0.25529178977012634, "learning_rate": 0.00027963798119738084, "loss": 1.7133, "step": 4291 }, { "epoch": 1.9682375874326339, "grad_norm": 0.2785083055496216, "learning_rate": 0.0002794157137724983, "loss": 1.4641, "step": 4292 }, { "epoch": 1.9686962504299965, "grad_norm": 0.39693623781204224, "learning_rate": 0.00027919350045853167, "loss": 2.1711, "step": 4293 }, { "epoch": 1.9691549134273592, "grad_norm": 0.3063134253025055, "learning_rate": 0.0002789713413099914, "loss": 1.2238, "step": 4294 }, { "epoch": 1.969613576424722, "grad_norm": 0.21434158086776733, "learning_rate": 0.0002787492363813748, "loss": 1.2683, "step": 4295 }, { "epoch": 1.9700722394220846, "grad_norm": 0.26843902468681335, "learning_rate": 0.00027852718572716594, "loss": 1.1436, "step": 4296 }, { "epoch": 1.9705309024194473, "grad_norm": 0.26620641350746155, "learning_rate": 0.00027830518940183527, "loss": 1.2504, "step": 4297 }, { "epoch": 1.9709895654168101, "grad_norm": 0.4168534278869629, "learning_rate": 0.0002780832474598401, "loss": 1.4581, "step": 4298 }, { "epoch": 1.9714482284141726, "grad_norm": 0.5389671325683594, "learning_rate": 0.00027786135995562446, "loss": 1.738, "step": 4299 }, { "epoch": 1.9719068914115354, "grad_norm": 0.18300795555114746, "learning_rate": 0.00027763952694361894, "loss": 0.9293, "step": 4300 }, { "epoch": 1.972365554408898, "grad_norm": 0.18415942788124084, "learning_rate": 0.00027741774847824094, "loss": 1.2104, "step": 4301 }, { "epoch": 1.9728242174062607, "grad_norm": 0.3821175992488861, "learning_rate": 0.00027719602461389394, "loss": 1.6418, "step": 4302 }, { "epoch": 1.9732828804036235, "grad_norm": 0.34809380769729614, "learning_rate": 0.0002769743554049686, "loss": 1.3178, "step": 4303 }, { "epoch": 1.9737415434009862, "grad_norm": 0.34898874163627625, "learning_rate": 0.00027675274090584195, "loss": 0.9742, "step": 4304 }, { "epoch": 1.9742002063983488, "grad_norm": 0.30163076519966125, "learning_rate": 0.0002765311811708775, "loss": 1.6653, "step": 4305 }, { "epoch": 1.9746588693957114, "grad_norm": 0.37521055340766907, "learning_rate": 0.0002763096762544258, "loss": 1.4052, "step": 4306 }, { "epoch": 1.975117532393074, "grad_norm": 0.36267635226249695, "learning_rate": 0.0002760882262108236, "loss": 1.627, "step": 4307 }, { "epoch": 1.975576195390437, "grad_norm": 0.37913310527801514, "learning_rate": 0.0002758668310943938, "loss": 1.8204, "step": 4308 }, { "epoch": 1.9760348583877996, "grad_norm": 0.29660242795944214, "learning_rate": 0.00027564549095944636, "loss": 1.3698, "step": 4309 }, { "epoch": 1.9764935213851622, "grad_norm": 0.3772091865539551, "learning_rate": 0.00027542420586027774, "loss": 1.7451, "step": 4310 }, { "epoch": 1.976952184382525, "grad_norm": 0.32186251878738403, "learning_rate": 0.0002752029758511707, "loss": 1.7236, "step": 4311 }, { "epoch": 1.9774108473798875, "grad_norm": 0.2753252685070038, "learning_rate": 0.0002749818009863945, "loss": 1.0989, "step": 4312 }, { "epoch": 1.9778695103772503, "grad_norm": 0.30564045906066895, "learning_rate": 0.0002747606813202052, "loss": 1.3082, "step": 4313 }, { "epoch": 1.978328173374613, "grad_norm": 0.24932487308979034, "learning_rate": 0.0002745396169068447, "loss": 0.9138, "step": 4314 }, { "epoch": 1.9787868363719756, "grad_norm": 0.26185494661331177, "learning_rate": 0.0002743186078005415, "loss": 1.2475, "step": 4315 }, { "epoch": 1.9792454993693385, "grad_norm": 0.2203855961561203, "learning_rate": 0.00027409765405551136, "loss": 1.3713, "step": 4316 }, { "epoch": 1.979704162366701, "grad_norm": 0.3790103495121002, "learning_rate": 0.0002738767557259555, "loss": 1.4261, "step": 4317 }, { "epoch": 1.9801628253640637, "grad_norm": 0.3733806312084198, "learning_rate": 0.0002736559128660621, "loss": 2.1038, "step": 4318 }, { "epoch": 1.9806214883614266, "grad_norm": 0.20003962516784668, "learning_rate": 0.00027343512553000505, "loss": 0.7565, "step": 4319 }, { "epoch": 1.981080151358789, "grad_norm": 0.2958119511604309, "learning_rate": 0.00027321439377194537, "loss": 0.9704, "step": 4320 }, { "epoch": 1.9815388143561519, "grad_norm": 0.2841302752494812, "learning_rate": 0.00027299371764603, "loss": 1.327, "step": 4321 }, { "epoch": 1.9819974773535145, "grad_norm": 0.3038768172264099, "learning_rate": 0.00027277309720639265, "loss": 1.2805, "step": 4322 }, { "epoch": 1.9824561403508771, "grad_norm": 0.3406788110733032, "learning_rate": 0.00027255253250715286, "loss": 1.4304, "step": 4323 }, { "epoch": 1.98291480334824, "grad_norm": 0.2855389416217804, "learning_rate": 0.000272332023602417, "loss": 1.6539, "step": 4324 }, { "epoch": 1.9833734663456024, "grad_norm": 0.23860915005207062, "learning_rate": 0.000272111570546277, "loss": 1.3198, "step": 4325 }, { "epoch": 1.9838321293429653, "grad_norm": 0.3688766360282898, "learning_rate": 0.0002718911733928121, "loss": 1.1564, "step": 4326 }, { "epoch": 1.984290792340328, "grad_norm": 0.15739460289478302, "learning_rate": 0.00027167083219608706, "loss": 0.6855, "step": 4327 }, { "epoch": 1.9847494553376905, "grad_norm": 0.2910972535610199, "learning_rate": 0.0002714505470101533, "loss": 1.7732, "step": 4328 }, { "epoch": 1.9852081183350534, "grad_norm": 0.3392382562160492, "learning_rate": 0.0002712303178890484, "loss": 1.2592, "step": 4329 }, { "epoch": 1.985666781332416, "grad_norm": 0.27205178141593933, "learning_rate": 0.0002710101448867959, "loss": 1.304, "step": 4330 }, { "epoch": 1.9861254443297787, "grad_norm": 0.31658264994621277, "learning_rate": 0.0002707900280574059, "loss": 1.6132, "step": 4331 }, { "epoch": 1.9865841073271415, "grad_norm": 0.34457242488861084, "learning_rate": 0.00027056996745487475, "loss": 1.6532, "step": 4332 }, { "epoch": 1.987042770324504, "grad_norm": 0.27797314524650574, "learning_rate": 0.00027034996313318483, "loss": 1.3838, "step": 4333 }, { "epoch": 1.9875014333218668, "grad_norm": 0.2671056091785431, "learning_rate": 0.00027013001514630483, "loss": 1.34, "step": 4334 }, { "epoch": 1.9879600963192294, "grad_norm": 0.2947138845920563, "learning_rate": 0.0002699101235481896, "loss": 1.0969, "step": 4335 }, { "epoch": 1.988418759316592, "grad_norm": 0.3621201515197754, "learning_rate": 0.00026969028839278, "loss": 1.7797, "step": 4336 }, { "epoch": 1.988877422313955, "grad_norm": 0.2773393988609314, "learning_rate": 0.00026947050973400333, "loss": 1.5847, "step": 4337 }, { "epoch": 1.9893360853113176, "grad_norm": 0.24329693615436554, "learning_rate": 0.00026925078762577283, "loss": 1.2757, "step": 4338 }, { "epoch": 1.9897947483086802, "grad_norm": 0.3237564265727997, "learning_rate": 0.00026903112212198796, "loss": 1.1135, "step": 4339 }, { "epoch": 1.9902534113060428, "grad_norm": 0.21693646907806396, "learning_rate": 0.0002688115132765344, "loss": 1.2356, "step": 4340 }, { "epoch": 1.9907120743034055, "grad_norm": 0.27011099457740784, "learning_rate": 0.00026859196114328333, "loss": 1.6289, "step": 4341 }, { "epoch": 1.9911707373007683, "grad_norm": 0.34364983439445496, "learning_rate": 0.0002683724657760928, "loss": 1.2663, "step": 4342 }, { "epoch": 1.991629400298131, "grad_norm": 0.189200758934021, "learning_rate": 0.00026815302722880643, "loss": 0.7483, "step": 4343 }, { "epoch": 1.9920880632954936, "grad_norm": 0.23307786881923676, "learning_rate": 0.00026793364555525426, "loss": 0.8417, "step": 4344 }, { "epoch": 1.9925467262928565, "grad_norm": 0.23828265070915222, "learning_rate": 0.00026771432080925205, "loss": 0.7974, "step": 4345 }, { "epoch": 1.9930053892902189, "grad_norm": 0.22147458791732788, "learning_rate": 0.0002674950530446019, "loss": 0.8344, "step": 4346 }, { "epoch": 1.9934640522875817, "grad_norm": 0.2830180525779724, "learning_rate": 0.0002672758423150916, "loss": 1.2985, "step": 4347 }, { "epoch": 1.9939227152849444, "grad_norm": 0.3324924409389496, "learning_rate": 0.0002670566886744953, "loss": 1.7325, "step": 4348 }, { "epoch": 1.994381378282307, "grad_norm": 0.18930895626544952, "learning_rate": 0.0002668375921765728, "loss": 0.7449, "step": 4349 }, { "epoch": 1.9948400412796699, "grad_norm": 0.28275609016418457, "learning_rate": 0.0002666185528750702, "loss": 1.7032, "step": 4350 }, { "epoch": 1.9952987042770325, "grad_norm": 0.2890438735485077, "learning_rate": 0.00026639957082371936, "loss": 1.2205, "step": 4351 }, { "epoch": 1.9957573672743951, "grad_norm": 0.36931562423706055, "learning_rate": 0.0002661806460762383, "loss": 1.7939, "step": 4352 }, { "epoch": 1.996216030271758, "grad_norm": 0.3492710590362549, "learning_rate": 0.0002659617786863304, "loss": 1.6342, "step": 4353 }, { "epoch": 1.9966746932691204, "grad_norm": 0.29765257239341736, "learning_rate": 0.00026574296870768575, "loss": 1.7397, "step": 4354 }, { "epoch": 1.9971333562664833, "grad_norm": 0.37656140327453613, "learning_rate": 0.00026552421619398004, "loss": 2.0061, "step": 4355 }, { "epoch": 1.9975920192638459, "grad_norm": 0.29920002818107605, "learning_rate": 0.0002653055211988746, "loss": 1.6586, "step": 4356 }, { "epoch": 1.9980506822612085, "grad_norm": 0.34368252754211426, "learning_rate": 0.0002650868837760172, "loss": 1.3087, "step": 4357 }, { "epoch": 1.9985093452585714, "grad_norm": 0.26091626286506653, "learning_rate": 0.0002648683039790409, "loss": 1.244, "step": 4358 }, { "epoch": 1.9989680082559338, "grad_norm": 0.18554215133190155, "learning_rate": 0.0002646497818615651, "loss": 1.1157, "step": 4359 }, { "epoch": 1.9994266712532967, "grad_norm": 0.23204344511032104, "learning_rate": 0.00026443131747719474, "loss": 1.2117, "step": 4360 }, { "epoch": 1.9998853342506593, "grad_norm": 0.25021374225616455, "learning_rate": 0.00026421291087952084, "loss": 1.3657, "step": 4361 }, { "epoch": 2.0, "grad_norm": 0.25021374225616455, "learning_rate": 0.00026421291087952084, "loss": 0.5557, "step": 4362 }, { "epoch": 2.000458662997363, "grad_norm": 0.31650856137275696, "learning_rate": 0.0002639945621221199, "loss": 1.4053, "step": 4363 }, { "epoch": 2.0009173259947253, "grad_norm": 0.3828623592853546, "learning_rate": 0.00026377627125855475, "loss": 1.5593, "step": 4364 }, { "epoch": 2.001375988992088, "grad_norm": 0.3297773003578186, "learning_rate": 0.00026355803834237333, "loss": 1.6141, "step": 4365 }, { "epoch": 2.0018346519894505, "grad_norm": 0.20084944367408752, "learning_rate": 0.00026333986342711, "loss": 0.831, "step": 4366 }, { "epoch": 2.0022933149868134, "grad_norm": 0.21461676061153412, "learning_rate": 0.0002631217465662845, "loss": 1.5271, "step": 4367 }, { "epoch": 2.0027519779841763, "grad_norm": 0.3193070590496063, "learning_rate": 0.00026290368781340263, "loss": 1.1464, "step": 4368 }, { "epoch": 2.0032106409815387, "grad_norm": 0.26611328125, "learning_rate": 0.00026268568722195564, "loss": 0.8123, "step": 4369 }, { "epoch": 2.0036693039789015, "grad_norm": 0.07296108454465866, "learning_rate": 0.0002624677448454207, "loss": 0.8224, "step": 4370 }, { "epoch": 2.0041279669762644, "grad_norm": 0.36370766162872314, "learning_rate": 0.00026224986073726064, "loss": 1.5906, "step": 4371 }, { "epoch": 2.004586629973627, "grad_norm": 0.3442339301109314, "learning_rate": 0.0002620320349509241, "loss": 1.3951, "step": 4372 }, { "epoch": 2.0050452929709897, "grad_norm": 0.2694573700428009, "learning_rate": 0.0002618142675398451, "loss": 0.851, "step": 4373 }, { "epoch": 2.005503955968352, "grad_norm": 0.22100800275802612, "learning_rate": 0.00026159655855744374, "loss": 1.3334, "step": 4374 }, { "epoch": 2.005962618965715, "grad_norm": 0.3315063714981079, "learning_rate": 0.00026137890805712574, "loss": 1.7025, "step": 4375 }, { "epoch": 2.006421281963078, "grad_norm": 0.5389212369918823, "learning_rate": 0.0002611613160922819, "loss": 1.4756, "step": 4376 }, { "epoch": 2.00687994496044, "grad_norm": 0.3037826120853424, "learning_rate": 0.0002609437827162894, "loss": 1.66, "step": 4377 }, { "epoch": 2.007338607957803, "grad_norm": 0.3313737213611603, "learning_rate": 0.0002607263079825106, "loss": 1.8962, "step": 4378 }, { "epoch": 2.007797270955166, "grad_norm": 0.3897867500782013, "learning_rate": 0.0002605088919442936, "loss": 1.5619, "step": 4379 }, { "epoch": 2.0082559339525283, "grad_norm": 0.24746671319007874, "learning_rate": 0.00026029153465497243, "loss": 0.7236, "step": 4380 }, { "epoch": 2.008714596949891, "grad_norm": 0.30122828483581543, "learning_rate": 0.0002600742361678663, "loss": 1.7644, "step": 4381 }, { "epoch": 2.0091732599472536, "grad_norm": 0.32251477241516113, "learning_rate": 0.0002598569965362799, "loss": 1.2259, "step": 4382 }, { "epoch": 2.0096319229446165, "grad_norm": 0.219418004155159, "learning_rate": 0.0002596398158135037, "loss": 0.5097, "step": 4383 }, { "epoch": 2.0100905859419793, "grad_norm": 0.3621459901332855, "learning_rate": 0.00025942269405281385, "loss": 1.3002, "step": 4384 }, { "epoch": 2.0105492489393417, "grad_norm": 0.06939633190631866, "learning_rate": 0.00025920563130747167, "loss": 0.3688, "step": 4385 }, { "epoch": 2.0110079119367046, "grad_norm": 0.2349678874015808, "learning_rate": 0.0002589886276307246, "loss": 1.1327, "step": 4386 }, { "epoch": 2.011466574934067, "grad_norm": 0.3204725384712219, "learning_rate": 0.0002587716830758048, "loss": 1.3671, "step": 4387 }, { "epoch": 2.01192523793143, "grad_norm": 0.22198036313056946, "learning_rate": 0.0002585547976959303, "loss": 1.0752, "step": 4388 }, { "epoch": 2.0123839009287927, "grad_norm": 0.30143803358078003, "learning_rate": 0.00025833797154430494, "loss": 1.792, "step": 4389 }, { "epoch": 2.012842563926155, "grad_norm": 0.3181972801685333, "learning_rate": 0.0002581212046741177, "loss": 1.5635, "step": 4390 }, { "epoch": 2.013301226923518, "grad_norm": 0.3738367557525635, "learning_rate": 0.000257904497138543, "loss": 1.2866, "step": 4391 }, { "epoch": 2.013759889920881, "grad_norm": 0.256151020526886, "learning_rate": 0.00025768784899074087, "loss": 1.8867, "step": 4392 }, { "epoch": 2.0142185529182433, "grad_norm": 0.28460898995399475, "learning_rate": 0.00025747126028385643, "loss": 0.8942, "step": 4393 }, { "epoch": 2.014677215915606, "grad_norm": 0.27417483925819397, "learning_rate": 0.0002572547310710205, "loss": 1.547, "step": 4394 }, { "epoch": 2.0151358789129685, "grad_norm": 0.3943521976470947, "learning_rate": 0.00025703826140534937, "loss": 1.222, "step": 4395 }, { "epoch": 2.0155945419103314, "grad_norm": 0.3956080377101898, "learning_rate": 0.00025682185133994457, "loss": 1.679, "step": 4396 }, { "epoch": 2.0160532049076942, "grad_norm": 0.41523680090904236, "learning_rate": 0.0002566055009278932, "loss": 0.6923, "step": 4397 }, { "epoch": 2.0165118679050567, "grad_norm": 0.228208526968956, "learning_rate": 0.00025638921022226704, "loss": 1.4105, "step": 4398 }, { "epoch": 2.0169705309024195, "grad_norm": 0.27708700299263, "learning_rate": 0.00025617297927612437, "loss": 1.4982, "step": 4399 }, { "epoch": 2.017429193899782, "grad_norm": 0.34748536348342896, "learning_rate": 0.0002559568081425079, "loss": 1.3942, "step": 4400 }, { "epoch": 2.017887856897145, "grad_norm": 0.25076958537101746, "learning_rate": 0.00025574069687444613, "loss": 1.3903, "step": 4401 }, { "epoch": 2.0183465198945076, "grad_norm": 0.34007957577705383, "learning_rate": 0.00025552464552495253, "loss": 1.5933, "step": 4402 }, { "epoch": 2.01880518289187, "grad_norm": 0.31931763887405396, "learning_rate": 0.0002553086541470263, "loss": 0.8987, "step": 4403 }, { "epoch": 2.019263845889233, "grad_norm": 0.2598975598812103, "learning_rate": 0.0002550927227936515, "loss": 1.6906, "step": 4404 }, { "epoch": 2.0197225088865958, "grad_norm": 0.3859318196773529, "learning_rate": 0.0002548768515177975, "loss": 2.1326, "step": 4405 }, { "epoch": 2.020181171883958, "grad_norm": 0.35015788674354553, "learning_rate": 0.0002546610403724193, "loss": 0.8087, "step": 4406 }, { "epoch": 2.020639834881321, "grad_norm": 0.25756731629371643, "learning_rate": 0.0002544452894104569, "loss": 1.2472, "step": 4407 }, { "epoch": 2.0210984978786835, "grad_norm": 0.3442479372024536, "learning_rate": 0.0002542295986848355, "loss": 1.7928, "step": 4408 }, { "epoch": 2.0215571608760463, "grad_norm": 0.41881293058395386, "learning_rate": 0.00025401396824846576, "loss": 1.7277, "step": 4409 }, { "epoch": 2.022015823873409, "grad_norm": 0.28389236330986023, "learning_rate": 0.0002537983981542432, "loss": 1.1472, "step": 4410 }, { "epoch": 2.0224744868707716, "grad_norm": 0.19957682490348816, "learning_rate": 0.0002535828884550487, "loss": 0.9713, "step": 4411 }, { "epoch": 2.0229331498681344, "grad_norm": 0.20129139721393585, "learning_rate": 0.0002533674392037485, "loss": 1.2563, "step": 4412 }, { "epoch": 2.023391812865497, "grad_norm": 0.39847332239151, "learning_rate": 0.0002531520504531938, "loss": 1.3053, "step": 4413 }, { "epoch": 2.0238504758628597, "grad_norm": 0.3507583439350128, "learning_rate": 0.00025293672225622113, "loss": 1.7719, "step": 4414 }, { "epoch": 2.0243091388602226, "grad_norm": 0.34539008140563965, "learning_rate": 0.0002527214546656517, "loss": 1.5484, "step": 4415 }, { "epoch": 2.024767801857585, "grad_norm": 0.3002701997756958, "learning_rate": 0.0002525062477342925, "loss": 1.8398, "step": 4416 }, { "epoch": 2.025226464854948, "grad_norm": 0.39402061700820923, "learning_rate": 0.00025229110151493516, "loss": 1.6705, "step": 4417 }, { "epoch": 2.0256851278523107, "grad_norm": 0.28095006942749023, "learning_rate": 0.0002520760160603567, "loss": 1.7056, "step": 4418 }, { "epoch": 2.026143790849673, "grad_norm": 0.33673858642578125, "learning_rate": 0.0002518609914233192, "loss": 1.4712, "step": 4419 }, { "epoch": 2.026602453847036, "grad_norm": 0.3398224711418152, "learning_rate": 0.00025164602765656964, "loss": 1.0774, "step": 4420 }, { "epoch": 2.0270611168443984, "grad_norm": 0.3440173268318176, "learning_rate": 0.00025143112481284017, "loss": 1.2208, "step": 4421 }, { "epoch": 2.0275197798417612, "grad_norm": 0.30769407749176025, "learning_rate": 0.0002512162829448481, "loss": 1.2709, "step": 4422 }, { "epoch": 2.027978442839124, "grad_norm": 0.2576143443584442, "learning_rate": 0.00025100150210529565, "loss": 1.1309, "step": 4423 }, { "epoch": 2.0284371058364865, "grad_norm": 0.17252714931964874, "learning_rate": 0.00025078678234687014, "loss": 0.61, "step": 4424 }, { "epoch": 2.0288957688338494, "grad_norm": 0.21611100435256958, "learning_rate": 0.000250572123722244, "loss": 1.1369, "step": 4425 }, { "epoch": 2.0293544318312122, "grad_norm": 0.3421436846256256, "learning_rate": 0.00025035752628407414, "loss": 1.5566, "step": 4426 }, { "epoch": 2.0298130948285746, "grad_norm": 0.29357588291168213, "learning_rate": 0.00025014299008500315, "loss": 1.1662, "step": 4427 }, { "epoch": 2.0302717578259375, "grad_norm": 0.20734919607639313, "learning_rate": 0.00024992851517765825, "loss": 1.5304, "step": 4428 }, { "epoch": 2.0307304208233, "grad_norm": 0.41112732887268066, "learning_rate": 0.0002497141016146517, "loss": 1.8782, "step": 4429 }, { "epoch": 2.0311890838206628, "grad_norm": 0.33853259682655334, "learning_rate": 0.0002494997494485806, "loss": 1.2181, "step": 4430 }, { "epoch": 2.0316477468180256, "grad_norm": 0.4708898067474365, "learning_rate": 0.0002492854587320272, "loss": 1.8342, "step": 4431 }, { "epoch": 2.032106409815388, "grad_norm": 0.3891645073890686, "learning_rate": 0.00024907122951755856, "loss": 0.9312, "step": 4432 }, { "epoch": 2.032565072812751, "grad_norm": 0.304002583026886, "learning_rate": 0.00024885706185772663, "loss": 1.071, "step": 4433 }, { "epoch": 2.0330237358101133, "grad_norm": 0.26506680250167847, "learning_rate": 0.00024864295580506816, "loss": 1.4494, "step": 4434 }, { "epoch": 2.033482398807476, "grad_norm": 0.33976414799690247, "learning_rate": 0.0002484289114121051, "loss": 1.2593, "step": 4435 }, { "epoch": 2.033941061804839, "grad_norm": 0.2222852259874344, "learning_rate": 0.0002482149287313439, "loss": 0.7598, "step": 4436 }, { "epoch": 2.0343997248022014, "grad_norm": 0.34920746088027954, "learning_rate": 0.00024800100781527645, "loss": 1.8142, "step": 4437 }, { "epoch": 2.0348583877995643, "grad_norm": 0.3795028328895569, "learning_rate": 0.00024778714871637853, "loss": 1.2554, "step": 4438 }, { "epoch": 2.035317050796927, "grad_norm": 0.0961453840136528, "learning_rate": 0.0002475733514871116, "loss": 0.9809, "step": 4439 }, { "epoch": 2.0357757137942896, "grad_norm": 0.2711848020553589, "learning_rate": 0.00024735961617992165, "loss": 0.9706, "step": 4440 }, { "epoch": 2.0362343767916524, "grad_norm": 0.3385700583457947, "learning_rate": 0.0002471459428472393, "loss": 1.4021, "step": 4441 }, { "epoch": 2.036693039789015, "grad_norm": 0.2203783392906189, "learning_rate": 0.00024693233154148063, "loss": 0.8029, "step": 4442 }, { "epoch": 2.0371517027863777, "grad_norm": 0.3024193346500397, "learning_rate": 0.0002467187823150457, "loss": 1.1432, "step": 4443 }, { "epoch": 2.0376103657837406, "grad_norm": 0.3142333924770355, "learning_rate": 0.0002465052952203196, "loss": 1.7221, "step": 4444 }, { "epoch": 2.038069028781103, "grad_norm": 0.3084831237792969, "learning_rate": 0.0002462918703096724, "loss": 1.2093, "step": 4445 }, { "epoch": 2.038527691778466, "grad_norm": 0.38534000515937805, "learning_rate": 0.0002460785076354588, "loss": 1.4895, "step": 4446 }, { "epoch": 2.0389863547758287, "grad_norm": 0.303943008184433, "learning_rate": 0.0002458652072500181, "loss": 0.9302, "step": 4447 }, { "epoch": 2.039445017773191, "grad_norm": 0.26607340574264526, "learning_rate": 0.0002456519692056747, "loss": 1.4809, "step": 4448 }, { "epoch": 2.039903680770554, "grad_norm": 0.3666565418243408, "learning_rate": 0.0002454387935547369, "loss": 1.2939, "step": 4449 }, { "epoch": 2.0403623437679164, "grad_norm": 0.24224300682544708, "learning_rate": 0.00024522568034949865, "loss": 1.0746, "step": 4450 }, { "epoch": 2.040821006765279, "grad_norm": 0.2701547145843506, "learning_rate": 0.0002450126296422377, "loss": 1.1681, "step": 4451 }, { "epoch": 2.041279669762642, "grad_norm": 0.19419100880622864, "learning_rate": 0.0002447996414852176, "loss": 1.3045, "step": 4452 }, { "epoch": 2.0417383327600045, "grad_norm": 0.30444204807281494, "learning_rate": 0.00024458671593068564, "loss": 1.4119, "step": 4453 }, { "epoch": 2.0421969957573674, "grad_norm": 0.3836541473865509, "learning_rate": 0.00024437385303087373, "loss": 1.7348, "step": 4454 }, { "epoch": 2.0426556587547298, "grad_norm": 0.312198281288147, "learning_rate": 0.0002441610528379988, "loss": 1.2589, "step": 4455 }, { "epoch": 2.0431143217520926, "grad_norm": 0.29707929491996765, "learning_rate": 0.00024394831540426232, "loss": 0.8035, "step": 4456 }, { "epoch": 2.0435729847494555, "grad_norm": 0.2303507924079895, "learning_rate": 0.0002437356407818503, "loss": 1.088, "step": 4457 }, { "epoch": 2.044031647746818, "grad_norm": 0.3469330072402954, "learning_rate": 0.00024352302902293333, "loss": 1.6418, "step": 4458 }, { "epoch": 2.0444903107441807, "grad_norm": 0.21516135334968567, "learning_rate": 0.00024331048017966683, "loss": 0.9941, "step": 4459 }, { "epoch": 2.0449489737415436, "grad_norm": 0.38784271478652954, "learning_rate": 0.00024309799430419, "loss": 2.2803, "step": 4460 }, { "epoch": 2.045407636738906, "grad_norm": 0.4177115261554718, "learning_rate": 0.0002428855714486277, "loss": 1.9731, "step": 4461 }, { "epoch": 2.045866299736269, "grad_norm": 0.3393002152442932, "learning_rate": 0.00024267321166508867, "loss": 1.2382, "step": 4462 }, { "epoch": 2.0463249627336313, "grad_norm": 0.28032952547073364, "learning_rate": 0.00024246091500566619, "loss": 1.1954, "step": 4463 }, { "epoch": 2.046783625730994, "grad_norm": 0.28215348720550537, "learning_rate": 0.00024224868152243823, "loss": 0.8485, "step": 4464 }, { "epoch": 2.047242288728357, "grad_norm": 0.26953309774398804, "learning_rate": 0.0002420365112674674, "loss": 1.4311, "step": 4465 }, { "epoch": 2.0477009517257194, "grad_norm": 0.20407378673553467, "learning_rate": 0.0002418244042928001, "loss": 0.9565, "step": 4466 }, { "epoch": 2.0481596147230823, "grad_norm": 0.27266445755958557, "learning_rate": 0.00024161236065046806, "loss": 1.2134, "step": 4467 }, { "epoch": 2.0486182777204447, "grad_norm": 0.2968637943267822, "learning_rate": 0.00024140038039248697, "loss": 1.1802, "step": 4468 }, { "epoch": 2.0490769407178075, "grad_norm": 0.34877461194992065, "learning_rate": 0.00024118846357085717, "loss": 1.5968, "step": 4469 }, { "epoch": 2.0495356037151704, "grad_norm": 0.3103572726249695, "learning_rate": 0.0002409766102375634, "loss": 1.218, "step": 4470 }, { "epoch": 2.049994266712533, "grad_norm": 0.33419668674468994, "learning_rate": 0.00024076482044457477, "loss": 1.781, "step": 4471 }, { "epoch": 2.0504529297098957, "grad_norm": 0.3635629415512085, "learning_rate": 0.00024055309424384486, "loss": 1.4742, "step": 4472 }, { "epoch": 2.0509115927072585, "grad_norm": 0.336867094039917, "learning_rate": 0.00024034143168731172, "loss": 1.2226, "step": 4473 }, { "epoch": 2.051370255704621, "grad_norm": 0.2931895852088928, "learning_rate": 0.00024012983282689754, "loss": 1.1535, "step": 4474 }, { "epoch": 2.051828918701984, "grad_norm": 0.36698460578918457, "learning_rate": 0.00023991829771450912, "loss": 1.2108, "step": 4475 }, { "epoch": 2.052287581699346, "grad_norm": 0.306598037481308, "learning_rate": 0.00023970682640203782, "loss": 1.5089, "step": 4476 }, { "epoch": 2.052746244696709, "grad_norm": 0.37377047538757324, "learning_rate": 0.00023949541894135857, "loss": 1.7544, "step": 4477 }, { "epoch": 2.053204907694072, "grad_norm": 0.3563328981399536, "learning_rate": 0.00023928407538433138, "loss": 1.3446, "step": 4478 }, { "epoch": 2.0536635706914343, "grad_norm": 0.29219722747802734, "learning_rate": 0.0002390727957828004, "loss": 1.7413, "step": 4479 }, { "epoch": 2.054122233688797, "grad_norm": 0.49362048506736755, "learning_rate": 0.000238861580188594, "loss": 1.6321, "step": 4480 }, { "epoch": 2.0545808966861596, "grad_norm": 0.4204590916633606, "learning_rate": 0.00023865042865352487, "loss": 2.0813, "step": 4481 }, { "epoch": 2.0550395596835225, "grad_norm": 0.33447083830833435, "learning_rate": 0.00023843934122938997, "loss": 1.4967, "step": 4482 }, { "epoch": 2.0554982226808853, "grad_norm": 0.3546378016471863, "learning_rate": 0.0002382283179679707, "loss": 0.8393, "step": 4483 }, { "epoch": 2.0559568856782477, "grad_norm": 0.2834445536136627, "learning_rate": 0.00023801735892103244, "loss": 1.6294, "step": 4484 }, { "epoch": 2.0564155486756106, "grad_norm": 0.46338558197021484, "learning_rate": 0.0002378064641403251, "loss": 1.1402, "step": 4485 }, { "epoch": 2.0568742116729735, "grad_norm": 0.14141368865966797, "learning_rate": 0.00023759563367758252, "loss": 1.6619, "step": 4486 }, { "epoch": 2.057332874670336, "grad_norm": 0.35884973406791687, "learning_rate": 0.00023738486758452326, "loss": 1.1481, "step": 4487 }, { "epoch": 2.0577915376676987, "grad_norm": 0.35921135544776917, "learning_rate": 0.0002371741659128494, "loss": 1.7199, "step": 4488 }, { "epoch": 2.058250200665061, "grad_norm": 0.35381534695625305, "learning_rate": 0.00023696352871424765, "loss": 0.8198, "step": 4489 }, { "epoch": 2.058708863662424, "grad_norm": 0.29065418243408203, "learning_rate": 0.00023675295604038893, "loss": 1.0642, "step": 4490 }, { "epoch": 2.059167526659787, "grad_norm": 0.30102601647377014, "learning_rate": 0.00023654244794292823, "loss": 1.451, "step": 4491 }, { "epoch": 2.0596261896571493, "grad_norm": 0.37403160333633423, "learning_rate": 0.00023633200447350462, "loss": 1.8662, "step": 4492 }, { "epoch": 2.060084852654512, "grad_norm": 0.45195087790489197, "learning_rate": 0.00023612162568374147, "loss": 0.8842, "step": 4493 }, { "epoch": 2.060543515651875, "grad_norm": 0.06819970905780792, "learning_rate": 0.0002359113116252462, "loss": 0.9856, "step": 4494 }, { "epoch": 2.0610021786492374, "grad_norm": 0.31288787722587585, "learning_rate": 0.00023570106234961036, "loss": 1.5755, "step": 4495 }, { "epoch": 2.0614608416466003, "grad_norm": 0.38856643438339233, "learning_rate": 0.00023549087790840966, "loss": 1.3749, "step": 4496 }, { "epoch": 2.0619195046439627, "grad_norm": 0.2518812119960785, "learning_rate": 0.00023528075835320378, "loss": 1.7864, "step": 4497 }, { "epoch": 2.0623781676413255, "grad_norm": 0.4028247892856598, "learning_rate": 0.0002350707037355368, "loss": 1.551, "step": 4498 }, { "epoch": 2.0628368306386884, "grad_norm": 0.34255698323249817, "learning_rate": 0.00023486071410693627, "loss": 1.0178, "step": 4499 }, { "epoch": 2.063295493636051, "grad_norm": 0.08957389742136002, "learning_rate": 0.0002346507895189143, "loss": 1.2612, "step": 4500 }, { "epoch": 2.0637541566334137, "grad_norm": 0.3247790038585663, "learning_rate": 0.0002344409300229669, "loss": 1.258, "step": 4501 }, { "epoch": 2.064212819630776, "grad_norm": 0.39857447147369385, "learning_rate": 0.0002342311356705742, "loss": 1.3753, "step": 4502 }, { "epoch": 2.064671482628139, "grad_norm": 0.13461144268512726, "learning_rate": 0.00023402140651320003, "loss": 0.8576, "step": 4503 }, { "epoch": 2.065130145625502, "grad_norm": 0.33944034576416016, "learning_rate": 0.000233811742602293, "loss": 1.5578, "step": 4504 }, { "epoch": 2.065588808622864, "grad_norm": 0.3041727542877197, "learning_rate": 0.0002336021439892846, "loss": 1.278, "step": 4505 }, { "epoch": 2.066047471620227, "grad_norm": 0.28007856011390686, "learning_rate": 0.00023339261072559116, "loss": 1.2006, "step": 4506 }, { "epoch": 2.06650613461759, "grad_norm": 0.2758924663066864, "learning_rate": 0.00023318314286261262, "loss": 1.5737, "step": 4507 }, { "epoch": 2.0669647976149523, "grad_norm": 0.318847119808197, "learning_rate": 0.00023297374045173298, "loss": 1.3716, "step": 4508 }, { "epoch": 2.067423460612315, "grad_norm": 0.3623434007167816, "learning_rate": 0.00023276440354432038, "loss": 1.8606, "step": 4509 }, { "epoch": 2.0678821236096776, "grad_norm": 0.43447864055633545, "learning_rate": 0.00023255513219172625, "loss": 1.9836, "step": 4510 }, { "epoch": 2.0683407866070405, "grad_norm": 0.38707858324050903, "learning_rate": 0.00023234592644528657, "loss": 1.4044, "step": 4511 }, { "epoch": 2.0687994496044033, "grad_norm": 0.43568772077560425, "learning_rate": 0.00023213678635632102, "loss": 2.0554, "step": 4512 }, { "epoch": 2.0692581126017657, "grad_norm": 0.3717525601387024, "learning_rate": 0.000231927711976133, "loss": 1.493, "step": 4513 }, { "epoch": 2.0697167755991286, "grad_norm": 0.21989569067955017, "learning_rate": 0.0002317187033560103, "loss": 0.6022, "step": 4514 }, { "epoch": 2.0701754385964914, "grad_norm": 0.10017600655555725, "learning_rate": 0.0002315097605472243, "loss": 1.3052, "step": 4515 }, { "epoch": 2.070634101593854, "grad_norm": 0.3896404802799225, "learning_rate": 0.00023130088360102968, "loss": 1.0817, "step": 4516 }, { "epoch": 2.0710927645912167, "grad_norm": 0.07273133844137192, "learning_rate": 0.00023109207256866583, "loss": 0.5404, "step": 4517 }, { "epoch": 2.071551427588579, "grad_norm": 0.35691580176353455, "learning_rate": 0.00023088332750135544, "loss": 1.6287, "step": 4518 }, { "epoch": 2.072010090585942, "grad_norm": 0.42362430691719055, "learning_rate": 0.00023067464845030527, "loss": 1.7539, "step": 4519 }, { "epoch": 2.072468753583305, "grad_norm": 0.5343214273452759, "learning_rate": 0.00023046603546670596, "loss": 2.1514, "step": 4520 }, { "epoch": 2.0729274165806673, "grad_norm": 0.4141870439052582, "learning_rate": 0.0002302574886017314, "loss": 1.6187, "step": 4521 }, { "epoch": 2.07338607957803, "grad_norm": 0.2982878088951111, "learning_rate": 0.00023004900790653986, "loss": 1.6875, "step": 4522 }, { "epoch": 2.0738447425753925, "grad_norm": 0.28776493668556213, "learning_rate": 0.00022984059343227292, "loss": 0.642, "step": 4523 }, { "epoch": 2.0743034055727554, "grad_norm": 0.2196163535118103, "learning_rate": 0.00022963224523005654, "loss": 1.3851, "step": 4524 }, { "epoch": 2.0747620685701182, "grad_norm": 0.3807169795036316, "learning_rate": 0.00022942396335099986, "loss": 1.2697, "step": 4525 }, { "epoch": 2.0752207315674807, "grad_norm": 0.3772948086261749, "learning_rate": 0.00022921574784619608, "loss": 1.7243, "step": 4526 }, { "epoch": 2.0756793945648435, "grad_norm": 0.532681405544281, "learning_rate": 0.00022900759876672168, "loss": 1.4512, "step": 4527 }, { "epoch": 2.0761380575622064, "grad_norm": 0.36544162034988403, "learning_rate": 0.00022879951616363727, "loss": 1.5479, "step": 4528 }, { "epoch": 2.076596720559569, "grad_norm": 0.2846382260322571, "learning_rate": 0.0002285915000879869, "loss": 1.2101, "step": 4529 }, { "epoch": 2.0770553835569316, "grad_norm": 0.3465891480445862, "learning_rate": 0.00022838355059079862, "loss": 1.9592, "step": 4530 }, { "epoch": 2.077514046554294, "grad_norm": 0.3925153315067291, "learning_rate": 0.00022817566772308378, "loss": 2.1309, "step": 4531 }, { "epoch": 2.077972709551657, "grad_norm": 0.3107575476169586, "learning_rate": 0.0002279678515358376, "loss": 0.5652, "step": 4532 }, { "epoch": 2.0784313725490198, "grad_norm": 0.19689220190048218, "learning_rate": 0.00022776010208003895, "loss": 1.6448, "step": 4533 }, { "epoch": 2.078890035546382, "grad_norm": 0.3134257197380066, "learning_rate": 0.00022755241940665018, "loss": 1.3657, "step": 4534 }, { "epoch": 2.079348698543745, "grad_norm": 0.4296601116657257, "learning_rate": 0.00022734480356661736, "loss": 1.34, "step": 4535 }, { "epoch": 2.0798073615411075, "grad_norm": 0.3112638294696808, "learning_rate": 0.00022713725461087015, "loss": 1.282, "step": 4536 }, { "epoch": 2.0802660245384703, "grad_norm": 0.13700056076049805, "learning_rate": 0.00022692977259032205, "loss": 1.3737, "step": 4537 }, { "epoch": 2.080724687535833, "grad_norm": 0.4667096436023712, "learning_rate": 0.00022672235755586952, "loss": 1.8356, "step": 4538 }, { "epoch": 2.0811833505331956, "grad_norm": 0.31745702028274536, "learning_rate": 0.00022651500955839305, "loss": 0.7115, "step": 4539 }, { "epoch": 2.0816420135305584, "grad_norm": 0.3241938352584839, "learning_rate": 0.0002263077286487567, "loss": 1.7333, "step": 4540 }, { "epoch": 2.0821006765279213, "grad_norm": 0.2541601359844208, "learning_rate": 0.00022610051487780792, "loss": 1.3767, "step": 4541 }, { "epoch": 2.0825593395252837, "grad_norm": 0.22772157192230225, "learning_rate": 0.00022589336829637776, "loss": 0.4693, "step": 4542 }, { "epoch": 2.0830180025226466, "grad_norm": 0.283128947019577, "learning_rate": 0.00022568628895528077, "loss": 1.7429, "step": 4543 }, { "epoch": 2.083476665520009, "grad_norm": 0.4075316786766052, "learning_rate": 0.000225479276905315, "loss": 1.6674, "step": 4544 }, { "epoch": 2.083935328517372, "grad_norm": 0.36636197566986084, "learning_rate": 0.00022527233219726202, "loss": 1.7224, "step": 4545 }, { "epoch": 2.0843939915147347, "grad_norm": 0.29077982902526855, "learning_rate": 0.00022506545488188678, "loss": 1.3293, "step": 4546 }, { "epoch": 2.084852654512097, "grad_norm": 0.2742144465446472, "learning_rate": 0.0002248586450099379, "loss": 0.9725, "step": 4547 }, { "epoch": 2.08531131750946, "grad_norm": 0.2022017389535904, "learning_rate": 0.00022465190263214747, "loss": 1.2256, "step": 4548 }, { "epoch": 2.0857699805068224, "grad_norm": 0.2726742625236511, "learning_rate": 0.00022444522779923044, "loss": 1.079, "step": 4549 }, { "epoch": 2.0862286435041852, "grad_norm": 0.23730774223804474, "learning_rate": 0.00022423862056188593, "loss": 1.5887, "step": 4550 }, { "epoch": 2.086687306501548, "grad_norm": 1.3285748958587646, "learning_rate": 0.00022403208097079613, "loss": 1.9584, "step": 4551 }, { "epoch": 2.0871459694989105, "grad_norm": 0.32901179790496826, "learning_rate": 0.00022382560907662668, "loss": 1.2246, "step": 4552 }, { "epoch": 2.0876046324962734, "grad_norm": 0.260644793510437, "learning_rate": 0.00022361920493002669, "loss": 1.4697, "step": 4553 }, { "epoch": 2.0880632954936362, "grad_norm": 0.35175642371177673, "learning_rate": 0.0002234128685816285, "loss": 1.5698, "step": 4554 }, { "epoch": 2.0885219584909986, "grad_norm": 0.44502708315849304, "learning_rate": 0.00022320660008204795, "loss": 2.0159, "step": 4555 }, { "epoch": 2.0889806214883615, "grad_norm": 0.37410539388656616, "learning_rate": 0.00022300039948188418, "loss": 1.7451, "step": 4556 }, { "epoch": 2.089439284485724, "grad_norm": 0.4350254237651825, "learning_rate": 0.0002227942668317197, "loss": 1.2582, "step": 4557 }, { "epoch": 2.0898979474830868, "grad_norm": 0.23693208396434784, "learning_rate": 0.00022258820218212035, "loss": 1.162, "step": 4558 }, { "epoch": 2.0903566104804496, "grad_norm": 0.33850157260894775, "learning_rate": 0.0002223822055836352, "loss": 1.2121, "step": 4559 }, { "epoch": 2.090815273477812, "grad_norm": 0.2646215260028839, "learning_rate": 0.00022217627708679693, "loss": 0.4118, "step": 4560 }, { "epoch": 2.091273936475175, "grad_norm": 0.26649442315101624, "learning_rate": 0.00022197041674212092, "loss": 2.1147, "step": 4561 }, { "epoch": 2.0917325994725378, "grad_norm": 0.27695992588996887, "learning_rate": 0.0002217646246001064, "loss": 1.0989, "step": 4562 }, { "epoch": 2.0921912624699, "grad_norm": 0.34253594279289246, "learning_rate": 0.00022155890071123564, "loss": 1.6344, "step": 4563 }, { "epoch": 2.092649925467263, "grad_norm": 0.32384851574897766, "learning_rate": 0.0002213532451259742, "loss": 1.7167, "step": 4564 }, { "epoch": 2.0931085884646254, "grad_norm": 0.35032370686531067, "learning_rate": 0.00022114765789477088, "loss": 1.7271, "step": 4565 }, { "epoch": 2.0935672514619883, "grad_norm": 0.2640658915042877, "learning_rate": 0.0002209421390680577, "loss": 1.6772, "step": 4566 }, { "epoch": 2.094025914459351, "grad_norm": 0.33520472049713135, "learning_rate": 0.00022073668869624995, "loss": 1.4508, "step": 4567 }, { "epoch": 2.0944845774567136, "grad_norm": 0.31736597418785095, "learning_rate": 0.00022053130682974604, "loss": 1.4152, "step": 4568 }, { "epoch": 2.0949432404540764, "grad_norm": 0.31707099080085754, "learning_rate": 0.00022032599351892764, "loss": 1.6255, "step": 4569 }, { "epoch": 2.095401903451439, "grad_norm": 0.4904292821884155, "learning_rate": 0.00022012074881415955, "loss": 1.9502, "step": 4570 }, { "epoch": 2.0958605664488017, "grad_norm": 0.3095618188381195, "learning_rate": 0.00021991557276579, "loss": 1.5995, "step": 4571 }, { "epoch": 2.0963192294461646, "grad_norm": 0.41936981678009033, "learning_rate": 0.0002197104654241498, "loss": 1.7164, "step": 4572 }, { "epoch": 2.096777892443527, "grad_norm": 0.34843704104423523, "learning_rate": 0.00021950542683955344, "loss": 1.2392, "step": 4573 }, { "epoch": 2.09723655544089, "grad_norm": 0.19430193305015564, "learning_rate": 0.00021930045706229835, "loss": 0.9546, "step": 4574 }, { "epoch": 2.0976952184382527, "grad_norm": 0.36258745193481445, "learning_rate": 0.00021909555614266484, "loss": 1.447, "step": 4575 }, { "epoch": 2.098153881435615, "grad_norm": 0.21773214638233185, "learning_rate": 0.00021889072413091727, "loss": 1.3679, "step": 4576 }, { "epoch": 2.098612544432978, "grad_norm": 0.2897671163082123, "learning_rate": 0.00021868596107730176, "loss": 0.8525, "step": 4577 }, { "epoch": 2.0990712074303404, "grad_norm": 0.22055676579475403, "learning_rate": 0.0002184812670320484, "loss": 1.4193, "step": 4578 }, { "epoch": 2.0995298704277032, "grad_norm": 0.24514396488666534, "learning_rate": 0.00021827664204537007, "loss": 0.5204, "step": 4579 }, { "epoch": 2.099988533425066, "grad_norm": 0.11148292571306229, "learning_rate": 0.00021807208616746277, "loss": 0.7519, "step": 4580 }, { "epoch": 2.1004471964224285, "grad_norm": 0.35977640748023987, "learning_rate": 0.00021786759944850554, "loss": 1.3285, "step": 4581 }, { "epoch": 2.1009058594197914, "grad_norm": 0.33894822001457214, "learning_rate": 0.00021766318193866064, "loss": 1.6344, "step": 4582 }, { "epoch": 2.101364522417154, "grad_norm": 0.1890052706003189, "learning_rate": 0.00021745883368807278, "loss": 0.8127, "step": 4583 }, { "epoch": 2.1018231854145166, "grad_norm": 0.3900175094604492, "learning_rate": 0.00021725455474687027, "loss": 1.8716, "step": 4584 }, { "epoch": 2.1022818484118795, "grad_norm": 0.2661358714103699, "learning_rate": 0.00021705034516516396, "loss": 0.8125, "step": 4585 }, { "epoch": 2.102740511409242, "grad_norm": 0.34905606508255005, "learning_rate": 0.00021684620499304836, "loss": 1.2191, "step": 4586 }, { "epoch": 2.1031991744066048, "grad_norm": 0.2521708905696869, "learning_rate": 0.0002166421342806003, "loss": 1.4932, "step": 4587 }, { "epoch": 2.1036578374039676, "grad_norm": 0.31396129727363586, "learning_rate": 0.00021643813307788002, "loss": 0.9659, "step": 4588 }, { "epoch": 2.10411650040133, "grad_norm": 0.22702209651470184, "learning_rate": 0.00021623420143493006, "loss": 0.6938, "step": 4589 }, { "epoch": 2.104575163398693, "grad_norm": 0.10886117815971375, "learning_rate": 0.00021603033940177657, "loss": 0.9339, "step": 4590 }, { "epoch": 2.1050338263960553, "grad_norm": 0.262203186750412, "learning_rate": 0.00021582654702842835, "loss": 1.6595, "step": 4591 }, { "epoch": 2.105492489393418, "grad_norm": 0.5163483023643494, "learning_rate": 0.00021562282436487717, "loss": 1.8124, "step": 4592 }, { "epoch": 2.105951152390781, "grad_norm": 0.31572282314300537, "learning_rate": 0.0002154191714610978, "loss": 1.6561, "step": 4593 }, { "epoch": 2.1064098153881434, "grad_norm": 0.5046913623809814, "learning_rate": 0.0002152155883670474, "loss": 1.4269, "step": 4594 }, { "epoch": 2.1068684783855063, "grad_norm": 0.27989131212234497, "learning_rate": 0.0002150120751326664, "loss": 0.3858, "step": 4595 }, { "epoch": 2.107327141382869, "grad_norm": 0.8256828784942627, "learning_rate": 0.00021480863180787846, "loss": 1.5546, "step": 4596 }, { "epoch": 2.1077858043802316, "grad_norm": 0.2901069223880768, "learning_rate": 0.00021460525844258944, "loss": 1.1568, "step": 4597 }, { "epoch": 2.1082444673775944, "grad_norm": 0.49482643604278564, "learning_rate": 0.00021440195508668836, "loss": 1.4728, "step": 4598 }, { "epoch": 2.108703130374957, "grad_norm": 0.34062066674232483, "learning_rate": 0.00021419872179004714, "loss": 1.5667, "step": 4599 }, { "epoch": 2.1091617933723197, "grad_norm": 0.3349739909172058, "learning_rate": 0.00021399555860251995, "loss": 1.2008, "step": 4600 }, { "epoch": 2.1096204563696825, "grad_norm": 0.26807984709739685, "learning_rate": 0.0002137924655739445, "loss": 0.8011, "step": 4601 }, { "epoch": 2.110079119367045, "grad_norm": 0.17875202000141144, "learning_rate": 0.0002135894427541409, "loss": 1.6011, "step": 4602 }, { "epoch": 2.110537782364408, "grad_norm": 0.3981230854988098, "learning_rate": 0.00021338649019291212, "loss": 1.1253, "step": 4603 }, { "epoch": 2.1109964453617707, "grad_norm": 0.24843645095825195, "learning_rate": 0.00021318360794004388, "loss": 1.1508, "step": 4604 }, { "epoch": 2.111455108359133, "grad_norm": 0.25917336344718933, "learning_rate": 0.00021298079604530464, "loss": 1.8315, "step": 4605 }, { "epoch": 2.111913771356496, "grad_norm": 0.36518481373786926, "learning_rate": 0.00021277805455844568, "loss": 1.3478, "step": 4606 }, { "epoch": 2.1123724343538584, "grad_norm": 0.4103847146034241, "learning_rate": 0.00021257538352920091, "loss": 1.0914, "step": 4607 }, { "epoch": 2.112831097351221, "grad_norm": 0.31975114345550537, "learning_rate": 0.00021237278300728697, "loss": 1.1174, "step": 4608 }, { "epoch": 2.113289760348584, "grad_norm": 0.23576036095619202, "learning_rate": 0.00021217025304240327, "loss": 1.0789, "step": 4609 }, { "epoch": 2.1137484233459465, "grad_norm": 0.3097432255744934, "learning_rate": 0.00021196779368423208, "loss": 0.7991, "step": 4610 }, { "epoch": 2.1142070863433093, "grad_norm": 0.1049700602889061, "learning_rate": 0.00021176540498243768, "loss": 1.429, "step": 4611 }, { "epoch": 2.1146657493406718, "grad_norm": 0.3536173403263092, "learning_rate": 0.00021156308698666777, "loss": 1.2648, "step": 4612 }, { "epoch": 2.1151244123380346, "grad_norm": 0.26619645953178406, "learning_rate": 0.00021136083974655236, "loss": 1.4557, "step": 4613 }, { "epoch": 2.1155830753353975, "grad_norm": 0.308630108833313, "learning_rate": 0.0002111586633117041, "loss": 0.9568, "step": 4614 }, { "epoch": 2.11604173833276, "grad_norm": 0.26167064905166626, "learning_rate": 0.0002109565577317184, "loss": 1.3277, "step": 4615 }, { "epoch": 2.1165004013301227, "grad_norm": 0.26667726039886475, "learning_rate": 0.0002107545230561732, "loss": 1.1866, "step": 4616 }, { "epoch": 2.116959064327485, "grad_norm": 0.35242959856987, "learning_rate": 0.00021055255933462912, "loss": 1.6272, "step": 4617 }, { "epoch": 2.117417727324848, "grad_norm": 0.3653687536716461, "learning_rate": 0.0002103506666166292, "loss": 1.4598, "step": 4618 }, { "epoch": 2.117876390322211, "grad_norm": 0.3237496614456177, "learning_rate": 0.00021014884495169927, "loss": 1.6118, "step": 4619 }, { "epoch": 2.1183350533195733, "grad_norm": 0.4205828011035919, "learning_rate": 0.00020994709438934756, "loss": 1.92, "step": 4620 }, { "epoch": 2.118793716316936, "grad_norm": 0.3911856710910797, "learning_rate": 0.00020974541497906525, "loss": 1.6688, "step": 4621 }, { "epoch": 2.119252379314299, "grad_norm": 0.345805823802948, "learning_rate": 0.00020954380677032526, "loss": 0.8207, "step": 4622 }, { "epoch": 2.1197110423116614, "grad_norm": 0.2564966082572937, "learning_rate": 0.00020934226981258376, "loss": 1.2849, "step": 4623 }, { "epoch": 2.1201697053090243, "grad_norm": 0.18908265233039856, "learning_rate": 0.0002091408041552792, "loss": 1.1646, "step": 4624 }, { "epoch": 2.1206283683063867, "grad_norm": 0.32488518953323364, "learning_rate": 0.00020893940984783262, "loss": 1.7903, "step": 4625 }, { "epoch": 2.1210870313037495, "grad_norm": 0.25629740953445435, "learning_rate": 0.00020873808693964746, "loss": 0.9413, "step": 4626 }, { "epoch": 2.1215456943011124, "grad_norm": 0.2971414029598236, "learning_rate": 0.00020853683548010965, "loss": 1.7205, "step": 4627 }, { "epoch": 2.122004357298475, "grad_norm": 0.2897069454193115, "learning_rate": 0.00020833565551858768, "loss": 1.1088, "step": 4628 }, { "epoch": 2.1224630202958377, "grad_norm": 0.21978043019771576, "learning_rate": 0.0002081345471044324, "loss": 1.2353, "step": 4629 }, { "epoch": 2.1229216832932005, "grad_norm": 0.3789485692977905, "learning_rate": 0.0002079335102869772, "loss": 1.7919, "step": 4630 }, { "epoch": 2.123380346290563, "grad_norm": 0.27863040566444397, "learning_rate": 0.00020773254511553786, "loss": 0.7957, "step": 4631 }, { "epoch": 2.123839009287926, "grad_norm": 0.2726113796234131, "learning_rate": 0.00020753165163941273, "loss": 2.3215, "step": 4632 }, { "epoch": 2.124297672285288, "grad_norm": 0.3731834888458252, "learning_rate": 0.00020733082990788204, "loss": 2.0028, "step": 4633 }, { "epoch": 2.124756335282651, "grad_norm": 0.3649563789367676, "learning_rate": 0.00020713007997020906, "loss": 1.4407, "step": 4634 }, { "epoch": 2.125214998280014, "grad_norm": 0.2045055776834488, "learning_rate": 0.00020692940187563914, "loss": 0.942, "step": 4635 }, { "epoch": 2.1256736612773763, "grad_norm": 0.39572790265083313, "learning_rate": 0.0002067287956734001, "loss": 2.0579, "step": 4636 }, { "epoch": 2.126132324274739, "grad_norm": 0.47954314947128296, "learning_rate": 0.00020652826141270194, "loss": 1.1333, "step": 4637 }, { "epoch": 2.1265909872721016, "grad_norm": 0.30008918046951294, "learning_rate": 0.00020632779914273757, "loss": 0.9181, "step": 4638 }, { "epoch": 2.1270496502694645, "grad_norm": 0.19218164682388306, "learning_rate": 0.00020612740891268145, "loss": 1.7064, "step": 4639 }, { "epoch": 2.1275083132668273, "grad_norm": 0.36220747232437134, "learning_rate": 0.00020592709077169082, "loss": 1.2349, "step": 4640 }, { "epoch": 2.1279669762641897, "grad_norm": 0.2821071445941925, "learning_rate": 0.00020572684476890518, "loss": 1.5732, "step": 4641 }, { "epoch": 2.1284256392615526, "grad_norm": 0.3287753760814667, "learning_rate": 0.00020552667095344635, "loss": 1.9521, "step": 4642 }, { "epoch": 2.1288843022589155, "grad_norm": 0.29272401332855225, "learning_rate": 0.00020532656937441841, "loss": 1.1461, "step": 4643 }, { "epoch": 2.129342965256278, "grad_norm": 0.4012896716594696, "learning_rate": 0.00020512654008090792, "loss": 1.6783, "step": 4644 }, { "epoch": 2.1298016282536407, "grad_norm": 0.2766939401626587, "learning_rate": 0.00020492658312198304, "loss": 0.8703, "step": 4645 }, { "epoch": 2.130260291251003, "grad_norm": 0.20289787650108337, "learning_rate": 0.00020472669854669495, "loss": 1.5026, "step": 4646 }, { "epoch": 2.130718954248366, "grad_norm": 0.37175294756889343, "learning_rate": 0.00020452688640407656, "loss": 1.6858, "step": 4647 }, { "epoch": 2.131177617245729, "grad_norm": 0.28814128041267395, "learning_rate": 0.00020432714674314362, "loss": 1.4595, "step": 4648 }, { "epoch": 2.1316362802430913, "grad_norm": 0.4047505557537079, "learning_rate": 0.00020412747961289364, "loss": 0.8171, "step": 4649 }, { "epoch": 2.132094943240454, "grad_norm": 0.23079468309879303, "learning_rate": 0.0002039278850623061, "loss": 0.9852, "step": 4650 }, { "epoch": 2.132553606237817, "grad_norm": 0.3248424232006073, "learning_rate": 0.0002037283631403431, "loss": 1.979, "step": 4651 }, { "epoch": 2.1330122692351794, "grad_norm": 0.4883231222629547, "learning_rate": 0.0002035289138959489, "loss": 0.9462, "step": 4652 }, { "epoch": 2.1334709322325422, "grad_norm": 0.09132955968379974, "learning_rate": 0.00020332953737804978, "loss": 0.8115, "step": 4653 }, { "epoch": 2.1339295952299047, "grad_norm": 0.31292226910591125, "learning_rate": 0.00020313023363555422, "loss": 1.2776, "step": 4654 }, { "epoch": 2.1343882582272675, "grad_norm": 0.32427236437797546, "learning_rate": 0.00020293100271735303, "loss": 1.2678, "step": 4655 }, { "epoch": 2.1348469212246304, "grad_norm": 0.2879985272884369, "learning_rate": 0.00020273184467231876, "loss": 0.938, "step": 4656 }, { "epoch": 2.135305584221993, "grad_norm": 0.2097647488117218, "learning_rate": 0.00020253275954930621, "loss": 1.1378, "step": 4657 }, { "epoch": 2.1357642472193556, "grad_norm": 0.3992927670478821, "learning_rate": 0.00020233374739715276, "loss": 1.2597, "step": 4658 }, { "epoch": 2.136222910216718, "grad_norm": 0.2745298147201538, "learning_rate": 0.00020213480826467733, "loss": 1.3282, "step": 4659 }, { "epoch": 2.136681573214081, "grad_norm": 0.3620222806930542, "learning_rate": 0.00020193594220068134, "loss": 1.2953, "step": 4660 }, { "epoch": 2.1371402362114438, "grad_norm": 0.4318819046020508, "learning_rate": 0.00020173714925394775, "loss": 2.0383, "step": 4661 }, { "epoch": 2.137598899208806, "grad_norm": 0.4898906946182251, "learning_rate": 0.00020153842947324196, "loss": 1.1289, "step": 4662 }, { "epoch": 2.138057562206169, "grad_norm": 0.32130667567253113, "learning_rate": 0.00020133978290731152, "loss": 1.5143, "step": 4663 }, { "epoch": 2.138516225203532, "grad_norm": 0.196794331073761, "learning_rate": 0.00020114120960488575, "loss": 1.2338, "step": 4664 }, { "epoch": 2.1389748882008943, "grad_norm": 0.3671046197414398, "learning_rate": 0.00020094270961467614, "loss": 0.8875, "step": 4665 }, { "epoch": 2.139433551198257, "grad_norm": 0.3376992344856262, "learning_rate": 0.00020074428298537633, "loss": 1.4139, "step": 4666 }, { "epoch": 2.1398922141956196, "grad_norm": 0.24704553186893463, "learning_rate": 0.00020054592976566132, "loss": 1.4949, "step": 4667 }, { "epoch": 2.1403508771929824, "grad_norm": 0.21429981291294098, "learning_rate": 0.00020034765000418914, "loss": 0.863, "step": 4668 }, { "epoch": 2.1408095401903453, "grad_norm": 0.2140214592218399, "learning_rate": 0.0002001494437495989, "loss": 0.8906, "step": 4669 }, { "epoch": 2.1412682031877077, "grad_norm": 0.27707546949386597, "learning_rate": 0.00019995131105051228, "loss": 1.3032, "step": 4670 }, { "epoch": 2.1417268661850706, "grad_norm": 0.7792873382568359, "learning_rate": 0.00019975325195553263, "loss": 1.3045, "step": 4671 }, { "epoch": 2.1421855291824334, "grad_norm": 0.3791343569755554, "learning_rate": 0.00019955526651324495, "loss": 1.6859, "step": 4672 }, { "epoch": 2.142644192179796, "grad_norm": 1.3164491653442383, "learning_rate": 0.00019935735477221678, "loss": 1.0486, "step": 4673 }, { "epoch": 2.1431028551771587, "grad_norm": 0.318463534116745, "learning_rate": 0.0001991595167809972, "loss": 1.8174, "step": 4674 }, { "epoch": 2.143561518174521, "grad_norm": 0.4365736246109009, "learning_rate": 0.00019896175258811734, "loss": 0.6856, "step": 4675 }, { "epoch": 2.144020181171884, "grad_norm": 0.2647281289100647, "learning_rate": 0.00019876406224209015, "loss": 1.6011, "step": 4676 }, { "epoch": 2.144478844169247, "grad_norm": 0.3120388090610504, "learning_rate": 0.00019856644579141052, "loss": 1.074, "step": 4677 }, { "epoch": 2.1449375071666092, "grad_norm": 0.3103806674480438, "learning_rate": 0.0001983689032845552, "loss": 1.7906, "step": 4678 }, { "epoch": 2.145396170163972, "grad_norm": 0.36245760321617126, "learning_rate": 0.0001981714347699828, "loss": 1.4742, "step": 4679 }, { "epoch": 2.1458548331613345, "grad_norm": 0.36996036767959595, "learning_rate": 0.00019797404029613368, "loss": 1.2744, "step": 4680 }, { "epoch": 2.1463134961586974, "grad_norm": 0.28153517842292786, "learning_rate": 0.00019777671991143026, "loss": 1.4012, "step": 4681 }, { "epoch": 2.1467721591560602, "grad_norm": 0.3398634195327759, "learning_rate": 0.00019757947366427653, "loss": 1.4422, "step": 4682 }, { "epoch": 2.1472308221534226, "grad_norm": 0.17714032530784607, "learning_rate": 0.0001973823016030587, "loss": 0.4065, "step": 4683 }, { "epoch": 2.1476894851507855, "grad_norm": 0.21079567074775696, "learning_rate": 0.00019718520377614407, "loss": 1.353, "step": 4684 }, { "epoch": 2.148148148148148, "grad_norm": 0.46706634759902954, "learning_rate": 0.00019698818023188236, "loss": 1.7202, "step": 4685 }, { "epoch": 2.1486068111455108, "grad_norm": 0.2902261018753052, "learning_rate": 0.00019679123101860491, "loss": 1.7689, "step": 4686 }, { "epoch": 2.1490654741428736, "grad_norm": 0.4393427073955536, "learning_rate": 0.00019659435618462473, "loss": 2.2651, "step": 4687 }, { "epoch": 2.149524137140236, "grad_norm": 0.3855237364768982, "learning_rate": 0.0001963975557782366, "loss": 1.5914, "step": 4688 }, { "epoch": 2.149982800137599, "grad_norm": 0.6791412234306335, "learning_rate": 0.00019620082984771715, "loss": 1.9954, "step": 4689 }, { "epoch": 2.1504414631349618, "grad_norm": 0.38182568550109863, "learning_rate": 0.00019600417844132463, "loss": 1.1696, "step": 4690 }, { "epoch": 2.150900126132324, "grad_norm": 0.2527628540992737, "learning_rate": 0.0001958076016072991, "loss": 0.83, "step": 4691 }, { "epoch": 2.151358789129687, "grad_norm": 0.29251906275749207, "learning_rate": 0.00019561109939386217, "loss": 1.1837, "step": 4692 }, { "epoch": 2.1518174521270494, "grad_norm": 0.3355877995491028, "learning_rate": 0.0001954146718492174, "loss": 1.3884, "step": 4693 }, { "epoch": 2.1522761151244123, "grad_norm": 0.2906731367111206, "learning_rate": 0.0001952183190215499, "loss": 1.2325, "step": 4694 }, { "epoch": 2.152734778121775, "grad_norm": 0.2701873779296875, "learning_rate": 0.0001950220409590262, "loss": 1.6547, "step": 4695 }, { "epoch": 2.1531934411191376, "grad_norm": 0.3364790976047516, "learning_rate": 0.00019482583770979485, "loss": 1.2307, "step": 4696 }, { "epoch": 2.1536521041165004, "grad_norm": 0.22372028231620789, "learning_rate": 0.00019462970932198592, "loss": 1.2701, "step": 4697 }, { "epoch": 2.1541107671138633, "grad_norm": 0.34942564368247986, "learning_rate": 0.00019443365584371114, "loss": 1.3538, "step": 4698 }, { "epoch": 2.1545694301112257, "grad_norm": 0.21902301907539368, "learning_rate": 0.0001942376773230638, "loss": 0.9939, "step": 4699 }, { "epoch": 2.1550280931085886, "grad_norm": 0.3378940224647522, "learning_rate": 0.00019404177380811895, "loss": 1.5621, "step": 4700 }, { "epoch": 2.155486756105951, "grad_norm": 0.44091933965682983, "learning_rate": 0.00019384594534693295, "loss": 1.6776, "step": 4701 }, { "epoch": 2.155945419103314, "grad_norm": 0.3093583285808563, "learning_rate": 0.00019365019198754413, "loss": 1.556, "step": 4702 }, { "epoch": 2.1564040821006767, "grad_norm": 0.30539020895957947, "learning_rate": 0.00019345451377797207, "loss": 0.9653, "step": 4703 }, { "epoch": 2.156862745098039, "grad_norm": 0.2688155174255371, "learning_rate": 0.0001932589107662181, "loss": 1.2151, "step": 4704 }, { "epoch": 2.157321408095402, "grad_norm": 0.29725632071495056, "learning_rate": 0.0001930633830002652, "loss": 1.8734, "step": 4705 }, { "epoch": 2.1577800710927644, "grad_norm": 0.3029474914073944, "learning_rate": 0.00019286793052807744, "loss": 0.8146, "step": 4706 }, { "epoch": 2.1582387340901272, "grad_norm": 0.4033948481082916, "learning_rate": 0.00019267255339760082, "loss": 2.1886, "step": 4707 }, { "epoch": 2.15869739708749, "grad_norm": 0.4187861979007721, "learning_rate": 0.00019247725165676276, "loss": 1.5054, "step": 4708 }, { "epoch": 2.1591560600848525, "grad_norm": 0.22975748777389526, "learning_rate": 0.00019228202535347212, "loss": 1.0834, "step": 4709 }, { "epoch": 2.1596147230822154, "grad_norm": 0.4031769633293152, "learning_rate": 0.00019208687453561957, "loss": 1.534, "step": 4710 }, { "epoch": 2.160073386079578, "grad_norm": 0.36004751920700073, "learning_rate": 0.00019189179925107702, "loss": 1.5185, "step": 4711 }, { "epoch": 2.1605320490769406, "grad_norm": 0.230648472905159, "learning_rate": 0.00019169679954769754, "loss": 0.6958, "step": 4712 }, { "epoch": 2.1609907120743035, "grad_norm": 0.38399839401245117, "learning_rate": 0.0001915018754733161, "loss": 1.9727, "step": 4713 }, { "epoch": 2.161449375071666, "grad_norm": 1.3798997402191162, "learning_rate": 0.00019130702707574905, "loss": 1.0898, "step": 4714 }, { "epoch": 2.1619080380690288, "grad_norm": 0.3109949231147766, "learning_rate": 0.00019111225440279395, "loss": 1.5253, "step": 4715 }, { "epoch": 2.1623667010663916, "grad_norm": 0.24511021375656128, "learning_rate": 0.00019091755750223028, "loss": 1.6026, "step": 4716 }, { "epoch": 2.162825364063754, "grad_norm": 0.4369632601737976, "learning_rate": 0.00019072293642181815, "loss": 1.6895, "step": 4717 }, { "epoch": 2.163284027061117, "grad_norm": 0.4405476748943329, "learning_rate": 0.00019052839120929977, "loss": 1.3796, "step": 4718 }, { "epoch": 2.1637426900584797, "grad_norm": 0.31771278381347656, "learning_rate": 0.00019033392191239817, "loss": 1.1156, "step": 4719 }, { "epoch": 2.164201353055842, "grad_norm": 0.28369247913360596, "learning_rate": 0.0001901395285788186, "loss": 2.0259, "step": 4720 }, { "epoch": 2.164660016053205, "grad_norm": 0.39868542551994324, "learning_rate": 0.0001899452112562468, "loss": 1.8127, "step": 4721 }, { "epoch": 2.1651186790505674, "grad_norm": 0.24997852742671967, "learning_rate": 0.00018975096999235052, "loss": 1.468, "step": 4722 }, { "epoch": 2.1655773420479303, "grad_norm": 0.3659355342388153, "learning_rate": 0.0001895568048347781, "loss": 1.2206, "step": 4723 }, { "epoch": 2.166036005045293, "grad_norm": 0.20338742434978485, "learning_rate": 0.00018936271583115994, "loss": 1.3026, "step": 4724 }, { "epoch": 2.1664946680426556, "grad_norm": 0.39747223258018494, "learning_rate": 0.00018916870302910732, "loss": 1.6281, "step": 4725 }, { "epoch": 2.1669533310400184, "grad_norm": 0.33269479870796204, "learning_rate": 0.00018897476647621308, "loss": 1.9271, "step": 4726 }, { "epoch": 2.167411994037381, "grad_norm": 0.2740723788738251, "learning_rate": 0.00018878090622005138, "loss": 1.1646, "step": 4727 }, { "epoch": 2.1678706570347437, "grad_norm": 0.3122137188911438, "learning_rate": 0.00018858712230817727, "loss": 1.6887, "step": 4728 }, { "epoch": 2.1683293200321065, "grad_norm": 0.3537803590297699, "learning_rate": 0.00018839341478812726, "loss": 1.1824, "step": 4729 }, { "epoch": 2.168787983029469, "grad_norm": 0.273580402135849, "learning_rate": 0.00018819978370741958, "loss": 0.7199, "step": 4730 }, { "epoch": 2.169246646026832, "grad_norm": 0.25088873505592346, "learning_rate": 0.00018800622911355314, "loss": 1.5972, "step": 4731 }, { "epoch": 2.1697053090241947, "grad_norm": 0.38248297572135925, "learning_rate": 0.0001878127510540083, "loss": 1.4888, "step": 4732 }, { "epoch": 2.170163972021557, "grad_norm": 0.23572513461112976, "learning_rate": 0.00018761934957624675, "loss": 1.2231, "step": 4733 }, { "epoch": 2.17062263501892, "grad_norm": 0.35793864727020264, "learning_rate": 0.00018742602472771104, "loss": 1.3505, "step": 4734 }, { "epoch": 2.1710812980162824, "grad_norm": 0.5893045663833618, "learning_rate": 0.00018723277655582516, "loss": 1.1632, "step": 4735 }, { "epoch": 2.171539961013645, "grad_norm": 0.18537980318069458, "learning_rate": 0.0001870396051079944, "loss": 0.7823, "step": 4736 }, { "epoch": 2.171998624011008, "grad_norm": 0.31003275513648987, "learning_rate": 0.00018684651043160506, "loss": 1.2098, "step": 4737 }, { "epoch": 2.1724572870083705, "grad_norm": 0.40928885340690613, "learning_rate": 0.00018665349257402465, "loss": 1.1833, "step": 4738 }, { "epoch": 2.1729159500057333, "grad_norm": 0.27787014842033386, "learning_rate": 0.00018646055158260189, "loss": 1.4373, "step": 4739 }, { "epoch": 2.173374613003096, "grad_norm": 0.2921246886253357, "learning_rate": 0.00018626768750466656, "loss": 1.1901, "step": 4740 }, { "epoch": 2.1738332760004586, "grad_norm": 0.36071351170539856, "learning_rate": 0.00018607490038752956, "loss": 1.4543, "step": 4741 }, { "epoch": 2.1742919389978215, "grad_norm": 0.2633201777935028, "learning_rate": 0.00018588219027848303, "loss": 1.1895, "step": 4742 }, { "epoch": 2.174750601995184, "grad_norm": 0.30791527032852173, "learning_rate": 0.0001856895572248002, "loss": 1.2403, "step": 4743 }, { "epoch": 2.1752092649925467, "grad_norm": 0.2025088369846344, "learning_rate": 0.00018549700127373537, "loss": 1.7025, "step": 4744 }, { "epoch": 2.1756679279899096, "grad_norm": 0.34419453144073486, "learning_rate": 0.00018530452247252367, "loss": 1.7213, "step": 4745 }, { "epoch": 2.176126590987272, "grad_norm": 0.3504865765571594, "learning_rate": 0.00018511212086838163, "loss": 1.312, "step": 4746 }, { "epoch": 2.176585253984635, "grad_norm": 0.2538597881793976, "learning_rate": 0.00018491979650850688, "loss": 0.6609, "step": 4747 }, { "epoch": 2.1770439169819973, "grad_norm": 0.226425439119339, "learning_rate": 0.00018472754944007786, "loss": 1.7527, "step": 4748 }, { "epoch": 2.17750257997936, "grad_norm": 0.3634589910507202, "learning_rate": 0.0001845353797102542, "loss": 0.7849, "step": 4749 }, { "epoch": 2.177961242976723, "grad_norm": 0.19483381509780884, "learning_rate": 0.00018434328736617652, "loss": 1.1454, "step": 4750 }, { "epoch": 2.1784199059740854, "grad_norm": 0.2180272936820984, "learning_rate": 0.00018415127245496643, "loss": 1.18, "step": 4751 }, { "epoch": 2.1788785689714483, "grad_norm": 0.36905816197395325, "learning_rate": 0.0001839593350237266, "loss": 1.0663, "step": 4752 }, { "epoch": 2.1793372319688107, "grad_norm": 0.23431743681430817, "learning_rate": 0.00018376747511954068, "loss": 1.481, "step": 4753 }, { "epoch": 2.1797958949661735, "grad_norm": 0.2120712399482727, "learning_rate": 0.00018357569278947323, "loss": 0.5578, "step": 4754 }, { "epoch": 2.1802545579635364, "grad_norm": 0.29886701703071594, "learning_rate": 0.00018338398808057004, "loss": 1.2928, "step": 4755 }, { "epoch": 2.180713220960899, "grad_norm": 0.09233911335468292, "learning_rate": 0.00018319236103985737, "loss": 1.2831, "step": 4756 }, { "epoch": 2.1811718839582617, "grad_norm": 0.33295729756355286, "learning_rate": 0.00018300081171434285, "loss": 1.4729, "step": 4757 }, { "epoch": 2.1816305469556245, "grad_norm": 0.42028337717056274, "learning_rate": 0.00018280934015101486, "loss": 1.1091, "step": 4758 }, { "epoch": 2.182089209952987, "grad_norm": 0.21170879900455475, "learning_rate": 0.00018261794639684283, "loss": 0.8437, "step": 4759 }, { "epoch": 2.18254787295035, "grad_norm": 0.22131375968456268, "learning_rate": 0.00018242663049877696, "loss": 0.491, "step": 4760 }, { "epoch": 2.183006535947712, "grad_norm": 0.22741609811782837, "learning_rate": 0.00018223539250374844, "loss": 1.4339, "step": 4761 }, { "epoch": 2.183465198945075, "grad_norm": 0.353257417678833, "learning_rate": 0.00018204423245866936, "loss": 1.5006, "step": 4762 }, { "epoch": 2.183923861942438, "grad_norm": 0.21219374239444733, "learning_rate": 0.00018185315041043267, "loss": 0.8634, "step": 4763 }, { "epoch": 2.1843825249398003, "grad_norm": 0.3175276815891266, "learning_rate": 0.00018166214640591205, "loss": 1.6262, "step": 4764 }, { "epoch": 2.184841187937163, "grad_norm": 0.4105018377304077, "learning_rate": 0.0001814712204919623, "loss": 1.631, "step": 4765 }, { "epoch": 2.185299850934526, "grad_norm": 0.36518141627311707, "learning_rate": 0.0001812803727154189, "loss": 1.6072, "step": 4766 }, { "epoch": 2.1857585139318885, "grad_norm": 0.2514982521533966, "learning_rate": 0.0001810896031230983, "loss": 1.1576, "step": 4767 }, { "epoch": 2.1862171769292513, "grad_norm": 0.33633479475975037, "learning_rate": 0.0001808989117617974, "loss": 1.3829, "step": 4768 }, { "epoch": 2.1866758399266137, "grad_norm": 0.2530759274959564, "learning_rate": 0.00018070829867829425, "loss": 1.0331, "step": 4769 }, { "epoch": 2.1871345029239766, "grad_norm": 0.25773900747299194, "learning_rate": 0.0001805177639193476, "loss": 0.5553, "step": 4770 }, { "epoch": 2.1875931659213395, "grad_norm": 0.28829655051231384, "learning_rate": 0.00018032730753169714, "loss": 2.219, "step": 4771 }, { "epoch": 2.188051828918702, "grad_norm": 0.36159905791282654, "learning_rate": 0.00018013692956206302, "loss": 1.0755, "step": 4772 }, { "epoch": 2.1885104919160647, "grad_norm": 0.40641242265701294, "learning_rate": 0.00017994663005714646, "loss": 1.019, "step": 4773 }, { "epoch": 2.188969154913427, "grad_norm": 0.2221248745918274, "learning_rate": 0.00017975640906362923, "loss": 1.5312, "step": 4774 }, { "epoch": 2.18942781791079, "grad_norm": 0.23253923654556274, "learning_rate": 0.00017956626662817387, "loss": 0.9588, "step": 4775 }, { "epoch": 2.189886480908153, "grad_norm": 0.3233194947242737, "learning_rate": 0.00017937620279742384, "loss": 1.1987, "step": 4776 }, { "epoch": 2.1903451439055153, "grad_norm": 0.4366668164730072, "learning_rate": 0.0001791862176180031, "loss": 1.6294, "step": 4777 }, { "epoch": 2.190803806902878, "grad_norm": 0.4189610481262207, "learning_rate": 0.00017899631113651643, "loss": 1.3567, "step": 4778 }, { "epoch": 2.191262469900241, "grad_norm": 0.28253674507141113, "learning_rate": 0.00017880648339954914, "loss": 1.637, "step": 4779 }, { "epoch": 2.1917211328976034, "grad_norm": 0.43727073073387146, "learning_rate": 0.00017861673445366733, "loss": 1.8063, "step": 4780 }, { "epoch": 2.1921797958949663, "grad_norm": 0.2974712550640106, "learning_rate": 0.00017842706434541785, "loss": 1.1469, "step": 4781 }, { "epoch": 2.1926384588923287, "grad_norm": 0.3751980662345886, "learning_rate": 0.00017823747312132798, "loss": 0.908, "step": 4782 }, { "epoch": 2.1930971218896915, "grad_norm": 0.15295343101024628, "learning_rate": 0.00017804796082790636, "loss": 0.8782, "step": 4783 }, { "epoch": 2.1935557848870544, "grad_norm": 0.33498823642730713, "learning_rate": 0.00017785852751164117, "loss": 1.3965, "step": 4784 }, { "epoch": 2.194014447884417, "grad_norm": 0.3449128568172455, "learning_rate": 0.0001776691732190019, "loss": 1.7836, "step": 4785 }, { "epoch": 2.1944731108817797, "grad_norm": 0.3249196708202362, "learning_rate": 0.0001774798979964386, "loss": 1.1383, "step": 4786 }, { "epoch": 2.1949317738791425, "grad_norm": 0.3362160325050354, "learning_rate": 0.00017729070189038176, "loss": 1.3211, "step": 4787 }, { "epoch": 2.195390436876505, "grad_norm": 0.32171592116355896, "learning_rate": 0.00017710158494724265, "loss": 1.9521, "step": 4788 }, { "epoch": 2.195849099873868, "grad_norm": 0.39043116569519043, "learning_rate": 0.0001769125472134131, "loss": 1.2834, "step": 4789 }, { "epoch": 2.19630776287123, "grad_norm": 0.29517433047294617, "learning_rate": 0.00017672358873526518, "loss": 1.4635, "step": 4790 }, { "epoch": 2.196766425868593, "grad_norm": 0.4014551639556885, "learning_rate": 0.0001765347095591517, "loss": 1.3101, "step": 4791 }, { "epoch": 2.197225088865956, "grad_norm": 0.247705340385437, "learning_rate": 0.0001763459097314064, "loss": 1.5868, "step": 4792 }, { "epoch": 2.1976837518633183, "grad_norm": 0.2868292033672333, "learning_rate": 0.00017615718929834317, "loss": 1.5704, "step": 4793 }, { "epoch": 2.198142414860681, "grad_norm": 0.24143101274967194, "learning_rate": 0.00017596854830625642, "loss": 1.4587, "step": 4794 }, { "epoch": 2.1986010778580436, "grad_norm": 0.2784029245376587, "learning_rate": 0.00017577998680142132, "loss": 1.2386, "step": 4795 }, { "epoch": 2.1990597408554065, "grad_norm": 0.336953729391098, "learning_rate": 0.00017559150483009302, "loss": 1.296, "step": 4796 }, { "epoch": 2.1995184038527693, "grad_norm": 0.39816170930862427, "learning_rate": 0.0001754031024385077, "loss": 1.5718, "step": 4797 }, { "epoch": 2.1999770668501317, "grad_norm": 0.34481489658355713, "learning_rate": 0.0001752147796728818, "loss": 2.1771, "step": 4798 }, { "epoch": 2.2004357298474946, "grad_norm": 0.3532600998878479, "learning_rate": 0.0001750265365794123, "loss": 1.6757, "step": 4799 }, { "epoch": 2.2008943928448574, "grad_norm": 0.4431455135345459, "learning_rate": 0.0001748383732042767, "loss": 1.632, "step": 4800 }, { "epoch": 2.20135305584222, "grad_norm": 0.3615950047969818, "learning_rate": 0.00017465028959363238, "loss": 1.4655, "step": 4801 }, { "epoch": 2.2018117188395827, "grad_norm": 0.21566683053970337, "learning_rate": 0.00017446228579361806, "loss": 0.9168, "step": 4802 }, { "epoch": 2.202270381836945, "grad_norm": 0.3532969057559967, "learning_rate": 0.00017427436185035234, "loss": 2.1003, "step": 4803 }, { "epoch": 2.202729044834308, "grad_norm": 0.4000427722930908, "learning_rate": 0.00017408651780993417, "loss": 2.0688, "step": 4804 }, { "epoch": 2.203187707831671, "grad_norm": 0.2674494981765747, "learning_rate": 0.0001738987537184432, "loss": 0.7786, "step": 4805 }, { "epoch": 2.2036463708290333, "grad_norm": 0.2524220049381256, "learning_rate": 0.00017371106962193938, "loss": 1.5247, "step": 4806 }, { "epoch": 2.204105033826396, "grad_norm": 0.3025605082511902, "learning_rate": 0.00017352346556646277, "loss": 0.6016, "step": 4807 }, { "epoch": 2.204563696823759, "grad_norm": 0.07593965530395508, "learning_rate": 0.00017333594159803397, "loss": 0.8819, "step": 4808 }, { "epoch": 2.2050223598211214, "grad_norm": 0.322405606508255, "learning_rate": 0.00017314849776265412, "loss": 0.8851, "step": 4809 }, { "epoch": 2.2054810228184842, "grad_norm": 0.25816667079925537, "learning_rate": 0.0001729611341063045, "loss": 1.9113, "step": 4810 }, { "epoch": 2.2059396858158467, "grad_norm": 0.31706973910331726, "learning_rate": 0.00017277385067494672, "loss": 0.929, "step": 4811 }, { "epoch": 2.2063983488132095, "grad_norm": 0.16823376715183258, "learning_rate": 0.0001725866475145228, "loss": 1.0099, "step": 4812 }, { "epoch": 2.2068570118105724, "grad_norm": 0.28849852085113525, "learning_rate": 0.00017239952467095498, "loss": 1.4562, "step": 4813 }, { "epoch": 2.207315674807935, "grad_norm": 0.3034904897212982, "learning_rate": 0.00017221248219014595, "loss": 1.6464, "step": 4814 }, { "epoch": 2.2077743378052976, "grad_norm": 0.31821388006210327, "learning_rate": 0.00017202552011797852, "loss": 1.2505, "step": 4815 }, { "epoch": 2.20823300080266, "grad_norm": 0.3559507429599762, "learning_rate": 0.00017183863850031572, "loss": 1.3757, "step": 4816 }, { "epoch": 2.208691663800023, "grad_norm": 0.20952913165092468, "learning_rate": 0.00017165183738300133, "loss": 0.3598, "step": 4817 }, { "epoch": 2.2091503267973858, "grad_norm": 0.0632583424448967, "learning_rate": 0.0001714651168118585, "loss": 0.3411, "step": 4818 }, { "epoch": 2.209608989794748, "grad_norm": 0.13204383850097656, "learning_rate": 0.00017127847683269144, "loss": 1.1918, "step": 4819 }, { "epoch": 2.210067652792111, "grad_norm": 0.30054593086242676, "learning_rate": 0.00017109191749128418, "loss": 1.1966, "step": 4820 }, { "epoch": 2.2105263157894735, "grad_norm": 0.35834217071533203, "learning_rate": 0.00017090543883340115, "loss": 2.2354, "step": 4821 }, { "epoch": 2.2109849787868363, "grad_norm": 0.3150874674320221, "learning_rate": 0.00017071904090478686, "loss": 0.3315, "step": 4822 }, { "epoch": 2.211443641784199, "grad_norm": 0.2139630913734436, "learning_rate": 0.00017053272375116603, "loss": 1.3162, "step": 4823 }, { "epoch": 2.2119023047815616, "grad_norm": 0.35393640398979187, "learning_rate": 0.00017034648741824366, "loss": 2.103, "step": 4824 }, { "epoch": 2.2123609677789244, "grad_norm": 0.43089064955711365, "learning_rate": 0.00017016033195170488, "loss": 1.7612, "step": 4825 }, { "epoch": 2.2128196307762873, "grad_norm": 0.3777117431163788, "learning_rate": 0.00016997425739721488, "loss": 1.6729, "step": 4826 }, { "epoch": 2.2132782937736497, "grad_norm": 0.3245319426059723, "learning_rate": 0.00016978826380041923, "loss": 1.1172, "step": 4827 }, { "epoch": 2.2137369567710126, "grad_norm": 0.2487240731716156, "learning_rate": 0.0001696023512069435, "loss": 1.145, "step": 4828 }, { "epoch": 2.214195619768375, "grad_norm": 0.1767636239528656, "learning_rate": 0.00016941651966239325, "loss": 0.94, "step": 4829 }, { "epoch": 2.214654282765738, "grad_norm": 0.2936687171459198, "learning_rate": 0.00016923076921235424, "loss": 1.3027, "step": 4830 }, { "epoch": 2.2151129457631007, "grad_norm": 0.5650012493133545, "learning_rate": 0.00016904509990239258, "loss": 1.048, "step": 4831 }, { "epoch": 2.215571608760463, "grad_norm": 0.266467809677124, "learning_rate": 0.00016885951177805425, "loss": 1.7801, "step": 4832 }, { "epoch": 2.216030271757826, "grad_norm": 0.39118704199790955, "learning_rate": 0.00016867400488486528, "loss": 1.2689, "step": 4833 }, { "epoch": 2.216488934755189, "grad_norm": 0.3572444021701813, "learning_rate": 0.0001684885792683319, "loss": 2.2139, "step": 4834 }, { "epoch": 2.2169475977525512, "grad_norm": 0.351784348487854, "learning_rate": 0.00016830323497394033, "loss": 1.1197, "step": 4835 }, { "epoch": 2.217406260749914, "grad_norm": 0.19013693928718567, "learning_rate": 0.0001681179720471569, "loss": 1.0192, "step": 4836 }, { "epoch": 2.2178649237472765, "grad_norm": 0.35627466440200806, "learning_rate": 0.00016793279053342792, "loss": 1.6245, "step": 4837 }, { "epoch": 2.2183235867446394, "grad_norm": 0.34376806020736694, "learning_rate": 0.00016774769047817978, "loss": 1.575, "step": 4838 }, { "epoch": 2.218782249742002, "grad_norm": 0.24683015048503876, "learning_rate": 0.00016756267192681896, "loss": 0.9045, "step": 4839 }, { "epoch": 2.2192409127393646, "grad_norm": 0.19168370962142944, "learning_rate": 0.00016737773492473152, "loss": 0.8613, "step": 4840 }, { "epoch": 2.2196995757367275, "grad_norm": 0.29889073967933655, "learning_rate": 0.00016719287951728407, "loss": 0.8257, "step": 4841 }, { "epoch": 2.22015823873409, "grad_norm": 0.0963655561208725, "learning_rate": 0.00016700810574982294, "loss": 0.5904, "step": 4842 }, { "epoch": 2.2206169017314528, "grad_norm": 0.2194061130285263, "learning_rate": 0.00016682341366767444, "loss": 1.1664, "step": 4843 }, { "epoch": 2.2210755647288156, "grad_norm": 0.20055797696113586, "learning_rate": 0.0001666388033161448, "loss": 1.1049, "step": 4844 }, { "epoch": 2.221534227726178, "grad_norm": 0.2792820334434509, "learning_rate": 0.0001664542747405206, "loss": 1.011, "step": 4845 }, { "epoch": 2.221992890723541, "grad_norm": 0.243804931640625, "learning_rate": 0.0001662698279860677, "loss": 1.5193, "step": 4846 }, { "epoch": 2.2224515537209037, "grad_norm": 0.42623358964920044, "learning_rate": 0.00016608546309803229, "loss": 1.7656, "step": 4847 }, { "epoch": 2.222910216718266, "grad_norm": 0.5009300112724304, "learning_rate": 0.00016590118012164046, "loss": 1.7704, "step": 4848 }, { "epoch": 2.223368879715629, "grad_norm": 0.20968489348888397, "learning_rate": 0.0001657169791020981, "loss": 0.8382, "step": 4849 }, { "epoch": 2.2238275427129914, "grad_norm": 0.3352392017841339, "learning_rate": 0.00016553286008459117, "loss": 1.5792, "step": 4850 }, { "epoch": 2.2242862057103543, "grad_norm": 0.27464622259140015, "learning_rate": 0.00016534882311428523, "loss": 0.928, "step": 4851 }, { "epoch": 2.224744868707717, "grad_norm": 0.1357714831829071, "learning_rate": 0.00016516486823632586, "loss": 0.797, "step": 4852 }, { "epoch": 2.2252035317050796, "grad_norm": 0.36050960421562195, "learning_rate": 0.00016498099549583866, "loss": 1.7069, "step": 4853 }, { "epoch": 2.2256621947024424, "grad_norm": 0.367965430021286, "learning_rate": 0.00016479720493792872, "loss": 1.2835, "step": 4854 }, { "epoch": 2.2261208576998053, "grad_norm": 0.2768895626068115, "learning_rate": 0.00016461349660768144, "loss": 1.2358, "step": 4855 }, { "epoch": 2.2265795206971677, "grad_norm": 0.3254320025444031, "learning_rate": 0.00016442987055016194, "loss": 0.8148, "step": 4856 }, { "epoch": 2.2270381836945305, "grad_norm": 0.21533845365047455, "learning_rate": 0.00016424632681041456, "loss": 1.7542, "step": 4857 }, { "epoch": 2.227496846691893, "grad_norm": 0.2936849594116211, "learning_rate": 0.00016406286543346415, "loss": 0.4233, "step": 4858 }, { "epoch": 2.227955509689256, "grad_norm": 0.5081586241722107, "learning_rate": 0.0001638794864643151, "loss": 1.5443, "step": 4859 }, { "epoch": 2.2284141726866187, "grad_norm": 0.24850165843963623, "learning_rate": 0.00016369618994795156, "loss": 0.9991, "step": 4860 }, { "epoch": 2.228872835683981, "grad_norm": 0.3500426113605499, "learning_rate": 0.0001635129759293375, "loss": 1.6708, "step": 4861 }, { "epoch": 2.229331498681344, "grad_norm": 0.25747743248939514, "learning_rate": 0.00016332984445341681, "loss": 1.2169, "step": 4862 }, { "epoch": 2.2297901616787064, "grad_norm": 0.31107261776924133, "learning_rate": 0.0001631467955651124, "loss": 1.2371, "step": 4863 }, { "epoch": 2.230248824676069, "grad_norm": 0.25597137212753296, "learning_rate": 0.00016296382930932812, "loss": 0.7018, "step": 4864 }, { "epoch": 2.230707487673432, "grad_norm": 0.21790584921836853, "learning_rate": 0.00016278094573094666, "loss": 1.2848, "step": 4865 }, { "epoch": 2.2311661506707945, "grad_norm": 0.30176883935928345, "learning_rate": 0.00016259814487483066, "loss": 1.7701, "step": 4866 }, { "epoch": 2.2316248136681573, "grad_norm": 0.3356066644191742, "learning_rate": 0.00016241542678582268, "loss": 1.2003, "step": 4867 }, { "epoch": 2.23208347666552, "grad_norm": 0.3005014955997467, "learning_rate": 0.00016223279150874448, "loss": 1.0873, "step": 4868 }, { "epoch": 2.2325421396628826, "grad_norm": 0.2405785322189331, "learning_rate": 0.00016205023908839793, "loss": 1.3601, "step": 4869 }, { "epoch": 2.2330008026602455, "grad_norm": 0.3136742115020752, "learning_rate": 0.00016186776956956451, "loss": 1.1295, "step": 4870 }, { "epoch": 2.233459465657608, "grad_norm": 0.18356893956661224, "learning_rate": 0.00016168538299700519, "loss": 1.1707, "step": 4871 }, { "epoch": 2.2339181286549707, "grad_norm": 0.2986769676208496, "learning_rate": 0.00016150307941546088, "loss": 0.9671, "step": 4872 }, { "epoch": 2.2343767916523336, "grad_norm": 0.2441634237766266, "learning_rate": 0.00016132085886965187, "loss": 0.5982, "step": 4873 }, { "epoch": 2.234835454649696, "grad_norm": 0.18692080676555634, "learning_rate": 0.00016113872140427815, "loss": 1.5326, "step": 4874 }, { "epoch": 2.235294117647059, "grad_norm": 0.3282175362110138, "learning_rate": 0.00016095666706401941, "loss": 1.2403, "step": 4875 }, { "epoch": 2.2357527806444217, "grad_norm": 0.33327755331993103, "learning_rate": 0.00016077469589353483, "loss": 1.1533, "step": 4876 }, { "epoch": 2.236211443641784, "grad_norm": 0.274987131357193, "learning_rate": 0.00016059280793746333, "loss": 1.6538, "step": 4877 }, { "epoch": 2.236670106639147, "grad_norm": 0.43245795369148254, "learning_rate": 0.00016041100324042345, "loss": 1.9604, "step": 4878 }, { "epoch": 2.2371287696365094, "grad_norm": 0.3761482238769531, "learning_rate": 0.00016022928184701286, "loss": 1.5767, "step": 4879 }, { "epoch": 2.2375874326338723, "grad_norm": 0.3432115316390991, "learning_rate": 0.0001600476438018093, "loss": 1.6666, "step": 4880 }, { "epoch": 2.238046095631235, "grad_norm": 0.3119170367717743, "learning_rate": 0.00015986608914936995, "loss": 1.251, "step": 4881 }, { "epoch": 2.2385047586285975, "grad_norm": 0.4060390591621399, "learning_rate": 0.0001596846179342314, "loss": 0.4879, "step": 4882 }, { "epoch": 2.2389634216259604, "grad_norm": 0.20119361579418182, "learning_rate": 0.00015950323020090984, "loss": 1.4327, "step": 4883 }, { "epoch": 2.239422084623323, "grad_norm": 0.21751080453395844, "learning_rate": 0.00015932192599390105, "loss": 1.6125, "step": 4884 }, { "epoch": 2.2398807476206857, "grad_norm": 0.40151306986808777, "learning_rate": 0.00015914070535768022, "loss": 1.9309, "step": 4885 }, { "epoch": 2.2403394106180485, "grad_norm": 0.35669711232185364, "learning_rate": 0.00015895956833670205, "loss": 1.8298, "step": 4886 }, { "epoch": 2.240798073615411, "grad_norm": 0.3669508993625641, "learning_rate": 0.00015877851497540085, "loss": 1.6097, "step": 4887 }, { "epoch": 2.241256736612774, "grad_norm": 0.2940203845500946, "learning_rate": 0.00015859754531819028, "loss": 0.3973, "step": 4888 }, { "epoch": 2.241715399610136, "grad_norm": 0.19039645791053772, "learning_rate": 0.00015841665940946343, "loss": 1.3714, "step": 4889 }, { "epoch": 2.242174062607499, "grad_norm": 0.3116855025291443, "learning_rate": 0.00015823585729359314, "loss": 1.6032, "step": 4890 }, { "epoch": 2.242632725604862, "grad_norm": 0.32782799005508423, "learning_rate": 0.00015805513901493118, "loss": 1.1902, "step": 4891 }, { "epoch": 2.2430913886022243, "grad_norm": 0.1876303255558014, "learning_rate": 0.0001578745046178091, "loss": 0.6099, "step": 4892 }, { "epoch": 2.243550051599587, "grad_norm": 0.32714807987213135, "learning_rate": 0.00015769395414653797, "loss": 1.6815, "step": 4893 }, { "epoch": 2.24400871459695, "grad_norm": 0.2183062732219696, "learning_rate": 0.00015751348764540802, "loss": 0.6329, "step": 4894 }, { "epoch": 2.2444673775943125, "grad_norm": 0.12229790538549423, "learning_rate": 0.00015733310515868897, "loss": 1.269, "step": 4895 }, { "epoch": 2.2449260405916753, "grad_norm": 0.38500019907951355, "learning_rate": 0.00015715280673062997, "loss": 1.7126, "step": 4896 }, { "epoch": 2.245384703589038, "grad_norm": 0.33836498856544495, "learning_rate": 0.00015697259240545958, "loss": 1.7375, "step": 4897 }, { "epoch": 2.2458433665864006, "grad_norm": 0.4215908646583557, "learning_rate": 0.00015679246222738562, "loss": 2.0007, "step": 4898 }, { "epoch": 2.2463020295837635, "grad_norm": 0.38347312808036804, "learning_rate": 0.0001566124162405953, "loss": 1.9561, "step": 4899 }, { "epoch": 2.246760692581126, "grad_norm": 0.34876549243927, "learning_rate": 0.0001564324544892553, "loss": 1.7222, "step": 4900 }, { "epoch": 2.2472193555784887, "grad_norm": 0.29349571466445923, "learning_rate": 0.00015625257701751155, "loss": 1.0293, "step": 4901 }, { "epoch": 2.2476780185758516, "grad_norm": 0.3368821442127228, "learning_rate": 0.00015607278386948909, "loss": 1.2804, "step": 4902 }, { "epoch": 2.248136681573214, "grad_norm": 0.3242160975933075, "learning_rate": 0.00015589307508929258, "loss": 1.6011, "step": 4903 }, { "epoch": 2.248595344570577, "grad_norm": 0.2980966567993164, "learning_rate": 0.0001557134507210059, "loss": 1.2891, "step": 4904 }, { "epoch": 2.2490540075679393, "grad_norm": 0.32442331314086914, "learning_rate": 0.00015553391080869218, "loss": 1.694, "step": 4905 }, { "epoch": 2.249512670565302, "grad_norm": 0.328112930059433, "learning_rate": 0.00015535445539639382, "loss": 1.6802, "step": 4906 }, { "epoch": 2.249971333562665, "grad_norm": 0.275643914937973, "learning_rate": 0.0001551750845281326, "loss": 1.1635, "step": 4907 }, { "epoch": 2.2504299965600274, "grad_norm": 0.31056198477745056, "learning_rate": 0.00015499579824790948, "loss": 1.6135, "step": 4908 }, { "epoch": 2.2508886595573903, "grad_norm": 0.31692421436309814, "learning_rate": 0.0001548165965997047, "loss": 1.5069, "step": 4909 }, { "epoch": 2.2513473225547527, "grad_norm": 0.3236372768878937, "learning_rate": 0.00015463747962747766, "loss": 1.6261, "step": 4910 }, { "epoch": 2.2518059855521155, "grad_norm": 0.3909819722175598, "learning_rate": 0.000154458447375167, "loss": 2.105, "step": 4911 }, { "epoch": 2.2522646485494784, "grad_norm": 0.3079122304916382, "learning_rate": 0.00015427949988669088, "loss": 1.2053, "step": 4912 }, { "epoch": 2.252723311546841, "grad_norm": 0.3336406946182251, "learning_rate": 0.00015410063720594603, "loss": 1.9375, "step": 4913 }, { "epoch": 2.2531819745442037, "grad_norm": 0.38423246145248413, "learning_rate": 0.00015392185937680898, "loss": 1.4344, "step": 4914 }, { "epoch": 2.2536406375415665, "grad_norm": 0.3887491822242737, "learning_rate": 0.00015374316644313512, "loss": 1.4943, "step": 4915 }, { "epoch": 2.254099300538929, "grad_norm": 0.21756817400455475, "learning_rate": 0.00015356455844875905, "loss": 0.9165, "step": 4916 }, { "epoch": 2.254557963536292, "grad_norm": 0.4763137102127075, "learning_rate": 0.0001533860354374949, "loss": 1.7384, "step": 4917 }, { "epoch": 2.2550166265336546, "grad_norm": 0.3908590078353882, "learning_rate": 0.00015320759745313562, "loss": 1.2142, "step": 4918 }, { "epoch": 2.255475289531017, "grad_norm": 0.38368988037109375, "learning_rate": 0.000153029244539453, "loss": 1.4749, "step": 4919 }, { "epoch": 2.25593395252838, "grad_norm": 0.1392853558063507, "learning_rate": 0.0001528509767401985, "loss": 0.4752, "step": 4920 }, { "epoch": 2.2563926155257423, "grad_norm": 0.2166876643896103, "learning_rate": 0.00015267279409910252, "loss": 1.1606, "step": 4921 }, { "epoch": 2.256851278523105, "grad_norm": 0.2793666124343872, "learning_rate": 0.0001524946966598744, "loss": 1.2599, "step": 4922 }, { "epoch": 2.257309941520468, "grad_norm": 0.2081069052219391, "learning_rate": 0.0001523166844662031, "loss": 1.1284, "step": 4923 }, { "epoch": 2.2577686045178305, "grad_norm": 0.2551545202732086, "learning_rate": 0.00015213875756175583, "loss": 1.6474, "step": 4924 }, { "epoch": 2.2582272675151933, "grad_norm": 0.412919819355011, "learning_rate": 0.00015196091599017951, "loss": 1.2211, "step": 4925 }, { "epoch": 2.2586859305125557, "grad_norm": 0.5170382261276245, "learning_rate": 0.00015178315979509988, "loss": 1.3411, "step": 4926 }, { "epoch": 2.2591445935099186, "grad_norm": 0.23865391314029694, "learning_rate": 0.00015160548902012205, "loss": 1.4869, "step": 4927 }, { "epoch": 2.2596032565072814, "grad_norm": 0.31659218668937683, "learning_rate": 0.00015142790370882987, "loss": 0.9003, "step": 4928 }, { "epoch": 2.260061919504644, "grad_norm": 0.09430176764726639, "learning_rate": 0.00015125040390478634, "loss": 0.4091, "step": 4929 }, { "epoch": 2.2605205825020067, "grad_norm": 0.2541019320487976, "learning_rate": 0.0001510729896515332, "loss": 1.455, "step": 4930 }, { "epoch": 2.260979245499369, "grad_norm": 0.25227102637290955, "learning_rate": 0.00015089566099259162, "loss": 1.4478, "step": 4931 }, { "epoch": 2.261437908496732, "grad_norm": 0.37687572836875916, "learning_rate": 0.00015071841797146152, "loss": 1.6046, "step": 4932 }, { "epoch": 2.261896571494095, "grad_norm": 0.25936609506607056, "learning_rate": 0.000150541260631622, "loss": 1.2579, "step": 4933 }, { "epoch": 2.2623552344914573, "grad_norm": 0.3484478294849396, "learning_rate": 0.0001503641890165311, "loss": 1.8318, "step": 4934 }, { "epoch": 2.26281389748882, "grad_norm": 0.33855557441711426, "learning_rate": 0.00015018720316962536, "loss": 1.6309, "step": 4935 }, { "epoch": 2.2632725604861825, "grad_norm": 0.3302558958530426, "learning_rate": 0.00015001030313432107, "loss": 1.1351, "step": 4936 }, { "epoch": 2.2637312234835454, "grad_norm": 0.18633894622325897, "learning_rate": 0.000149833488954013, "loss": 1.4874, "step": 4937 }, { "epoch": 2.2641898864809082, "grad_norm": 0.3469991981983185, "learning_rate": 0.00014965676067207496, "loss": 1.4942, "step": 4938 }, { "epoch": 2.2646485494782707, "grad_norm": 0.39552754163742065, "learning_rate": 0.0001494801183318596, "loss": 1.8694, "step": 4939 }, { "epoch": 2.2651072124756335, "grad_norm": 0.26547738909721375, "learning_rate": 0.0001493035619766987, "loss": 0.8757, "step": 4940 }, { "epoch": 2.2655658754729964, "grad_norm": 0.39476221799850464, "learning_rate": 0.00014912709164990263, "loss": 1.9812, "step": 4941 }, { "epoch": 2.266024538470359, "grad_norm": 0.37706848978996277, "learning_rate": 0.00014895070739476087, "loss": 0.931, "step": 4942 }, { "epoch": 2.2664832014677216, "grad_norm": 0.36269137263298035, "learning_rate": 0.00014877440925454172, "loss": 1.2654, "step": 4943 }, { "epoch": 2.2669418644650845, "grad_norm": 0.23204423487186432, "learning_rate": 0.0001485981972724925, "loss": 1.6955, "step": 4944 }, { "epoch": 2.267400527462447, "grad_norm": 0.4347366392612457, "learning_rate": 0.00014842207149183922, "loss": 1.5891, "step": 4945 }, { "epoch": 2.2678591904598098, "grad_norm": 0.2783931493759155, "learning_rate": 0.00014824603195578683, "loss": 1.1357, "step": 4946 }, { "epoch": 2.268317853457172, "grad_norm": 0.31447914242744446, "learning_rate": 0.00014807007870751908, "loss": 1.8933, "step": 4947 }, { "epoch": 2.268776516454535, "grad_norm": 0.41312742233276367, "learning_rate": 0.00014789421179019858, "loss": 1.8218, "step": 4948 }, { "epoch": 2.269235179451898, "grad_norm": 0.36053013801574707, "learning_rate": 0.0001477184312469667, "loss": 1.2607, "step": 4949 }, { "epoch": 2.2696938424492603, "grad_norm": 0.28373968601226807, "learning_rate": 0.00014754273712094373, "loss": 1.4863, "step": 4950 }, { "epoch": 2.270152505446623, "grad_norm": 0.3220798671245575, "learning_rate": 0.00014736712945522884, "loss": 1.7179, "step": 4951 }, { "epoch": 2.2706111684439856, "grad_norm": 0.29745230078697205, "learning_rate": 0.00014719160829289958, "loss": 1.5435, "step": 4952 }, { "epoch": 2.2710698314413484, "grad_norm": 0.32250505685806274, "learning_rate": 0.0001470161736770127, "loss": 1.0081, "step": 4953 }, { "epoch": 2.2715284944387113, "grad_norm": 0.29016774892807007, "learning_rate": 0.00014684082565060352, "loss": 1.2257, "step": 4954 }, { "epoch": 2.2719871574360737, "grad_norm": 0.3230065107345581, "learning_rate": 0.00014666556425668625, "loss": 1.8289, "step": 4955 }, { "epoch": 2.2724458204334366, "grad_norm": 0.2680938243865967, "learning_rate": 0.00014649038953825372, "loss": 0.6338, "step": 4956 }, { "epoch": 2.272904483430799, "grad_norm": 0.1309625804424286, "learning_rate": 0.00014631530153827755, "loss": 1.0811, "step": 4957 }, { "epoch": 2.273363146428162, "grad_norm": 0.36883193254470825, "learning_rate": 0.00014614030029970815, "loss": 1.3616, "step": 4958 }, { "epoch": 2.2738218094255247, "grad_norm": 0.2308487594127655, "learning_rate": 0.00014596538586547454, "loss": 1.6417, "step": 4959 }, { "epoch": 2.274280472422887, "grad_norm": 0.38618066906929016, "learning_rate": 0.00014579055827848448, "loss": 1.0165, "step": 4960 }, { "epoch": 2.27473913542025, "grad_norm": 0.22514605522155762, "learning_rate": 0.0001456158175816245, "loss": 1.1064, "step": 4961 }, { "epoch": 2.275197798417613, "grad_norm": 0.34494972229003906, "learning_rate": 0.00014544116381775985, "loss": 1.1996, "step": 4962 }, { "epoch": 2.2756564614149752, "grad_norm": 0.2711491882801056, "learning_rate": 0.000145266597029734, "loss": 1.4014, "step": 4963 }, { "epoch": 2.276115124412338, "grad_norm": 0.2802002727985382, "learning_rate": 0.00014509211726036975, "loss": 1.1682, "step": 4964 }, { "epoch": 2.276573787409701, "grad_norm": 0.2963082194328308, "learning_rate": 0.0001449177245524681, "loss": 1.387, "step": 4965 }, { "epoch": 2.2770324504070634, "grad_norm": 0.33113130927085876, "learning_rate": 0.00014474341894880888, "loss": 1.2594, "step": 4966 }, { "epoch": 2.2774911134044262, "grad_norm": 0.37912243604660034, "learning_rate": 0.00014456920049215054, "loss": 1.7935, "step": 4967 }, { "epoch": 2.2779497764017886, "grad_norm": 0.3738153278827667, "learning_rate": 0.00014439506922523016, "loss": 1.233, "step": 4968 }, { "epoch": 2.2784084393991515, "grad_norm": 0.280269980430603, "learning_rate": 0.0001442210251907633, "loss": 1.2351, "step": 4969 }, { "epoch": 2.2788671023965144, "grad_norm": 0.3059045374393463, "learning_rate": 0.00014404706843144423, "loss": 1.5538, "step": 4970 }, { "epoch": 2.2793257653938768, "grad_norm": 0.22319234907627106, "learning_rate": 0.0001438731989899459, "loss": 0.9915, "step": 4971 }, { "epoch": 2.2797844283912396, "grad_norm": 0.2013273686170578, "learning_rate": 0.00014369941690891959, "loss": 0.5437, "step": 4972 }, { "epoch": 2.280243091388602, "grad_norm": 0.33141711354255676, "learning_rate": 0.00014352572223099542, "loss": 1.1842, "step": 4973 }, { "epoch": 2.280701754385965, "grad_norm": 0.24733178317546844, "learning_rate": 0.00014335211499878203, "loss": 1.5018, "step": 4974 }, { "epoch": 2.2811604173833278, "grad_norm": 0.32721972465515137, "learning_rate": 0.00014317859525486625, "loss": 1.2133, "step": 4975 }, { "epoch": 2.28161908038069, "grad_norm": 0.3684854209423065, "learning_rate": 0.00014300516304181389, "loss": 1.7316, "step": 4976 }, { "epoch": 2.282077743378053, "grad_norm": 0.22372929751873016, "learning_rate": 0.0001428318184021691, "loss": 0.6982, "step": 4977 }, { "epoch": 2.2825364063754154, "grad_norm": 0.31195321679115295, "learning_rate": 0.00014265856137845434, "loss": 1.6587, "step": 4978 }, { "epoch": 2.2829950693727783, "grad_norm": 0.2723373472690582, "learning_rate": 0.0001424853920131714, "loss": 1.2561, "step": 4979 }, { "epoch": 2.283453732370141, "grad_norm": 0.4274025559425354, "learning_rate": 0.0001423123103487995, "loss": 2.2205, "step": 4980 }, { "epoch": 2.2839123953675036, "grad_norm": 0.3065417408943176, "learning_rate": 0.00014213931642779686, "loss": 0.9494, "step": 4981 }, { "epoch": 2.2843710583648664, "grad_norm": 0.21199515461921692, "learning_rate": 0.00014196641029260026, "loss": 0.624, "step": 4982 }, { "epoch": 2.2848297213622293, "grad_norm": 0.23199045658111572, "learning_rate": 0.00014179359198562475, "loss": 1.1652, "step": 4983 }, { "epoch": 2.2852883843595917, "grad_norm": 0.29177722334861755, "learning_rate": 0.00014162086154926397, "loss": 1.4468, "step": 4984 }, { "epoch": 2.2857470473569546, "grad_norm": 0.37286636233329773, "learning_rate": 0.00014144821902589, "loss": 1.9711, "step": 4985 }, { "epoch": 2.2862057103543174, "grad_norm": 0.4224682152271271, "learning_rate": 0.00014127566445785306, "loss": 1.3417, "step": 4986 }, { "epoch": 2.28666437335168, "grad_norm": 0.3329372704029083, "learning_rate": 0.00014110319788748215, "loss": 1.4227, "step": 4987 }, { "epoch": 2.2871230363490427, "grad_norm": 0.30778220295906067, "learning_rate": 0.00014093081935708445, "loss": 1.3458, "step": 4988 }, { "epoch": 2.287581699346405, "grad_norm": 0.34832048416137695, "learning_rate": 0.0001407585289089459, "loss": 1.2986, "step": 4989 }, { "epoch": 2.288040362343768, "grad_norm": 0.32428908348083496, "learning_rate": 0.00014058632658533072, "loss": 1.9873, "step": 4990 }, { "epoch": 2.288499025341131, "grad_norm": 0.39319339394569397, "learning_rate": 0.0001404142124284809, "loss": 1.6913, "step": 4991 }, { "epoch": 2.2889576883384932, "grad_norm": 0.3040050268173218, "learning_rate": 0.00014024218648061755, "loss": 1.1154, "step": 4992 }, { "epoch": 2.289416351335856, "grad_norm": 0.4540446102619171, "learning_rate": 0.00014007024878393983, "loss": 1.297, "step": 4993 }, { "epoch": 2.2898750143332185, "grad_norm": 0.29186972975730896, "learning_rate": 0.0001398983993806253, "loss": 1.6358, "step": 4994 }, { "epoch": 2.2903336773305814, "grad_norm": 0.25946202874183655, "learning_rate": 0.0001397266383128299, "loss": 1.522, "step": 4995 }, { "epoch": 2.290792340327944, "grad_norm": 0.27808359265327454, "learning_rate": 0.00013955496562268794, "loss": 1.1537, "step": 4996 }, { "epoch": 2.2912510033253066, "grad_norm": 0.34630027413368225, "learning_rate": 0.0001393833813523117, "loss": 1.5734, "step": 4997 }, { "epoch": 2.2917096663226695, "grad_norm": 0.2861468195915222, "learning_rate": 0.0001392118855437921, "loss": 0.5451, "step": 4998 }, { "epoch": 2.292168329320032, "grad_norm": 0.3561016917228699, "learning_rate": 0.0001390404782391985, "loss": 1.2978, "step": 4999 }, { "epoch": 2.2926269923173948, "grad_norm": 0.2726607024669647, "learning_rate": 0.00013886915948057825, "loss": 1.256, "step": 5000 }, { "epoch": 2.2930856553147576, "grad_norm": 0.41800275444984436, "learning_rate": 0.00013869792930995724, "loss": 1.5729, "step": 5001 }, { "epoch": 2.29354431831212, "grad_norm": 0.3518107533454895, "learning_rate": 0.00013852678776933914, "loss": 1.1977, "step": 5002 }, { "epoch": 2.294002981309483, "grad_norm": 0.14994703233242035, "learning_rate": 0.0001383557349007063, "loss": 0.6909, "step": 5003 }, { "epoch": 2.2944616443068457, "grad_norm": 0.2874891459941864, "learning_rate": 0.00013818477074601933, "loss": 2.2263, "step": 5004 }, { "epoch": 2.294920307304208, "grad_norm": 0.41052594780921936, "learning_rate": 0.00013801389534721692, "loss": 1.6545, "step": 5005 }, { "epoch": 2.295378970301571, "grad_norm": 0.6844239830970764, "learning_rate": 0.00013784310874621604, "loss": 1.9092, "step": 5006 }, { "epoch": 2.2958376332989334, "grad_norm": 0.21883147954940796, "learning_rate": 0.00013767241098491185, "loss": 0.4407, "step": 5007 }, { "epoch": 2.2962962962962963, "grad_norm": 0.06891786307096481, "learning_rate": 0.00013750180210517777, "loss": 0.7355, "step": 5008 }, { "epoch": 2.296754959293659, "grad_norm": 0.2810133099555969, "learning_rate": 0.00013733128214886536, "loss": 0.9875, "step": 5009 }, { "epoch": 2.2972136222910216, "grad_norm": 0.28389817476272583, "learning_rate": 0.00013716085115780447, "loss": 1.6359, "step": 5010 }, { "epoch": 2.2976722852883844, "grad_norm": 0.43318691849708557, "learning_rate": 0.00013699050917380295, "loss": 1.6229, "step": 5011 }, { "epoch": 2.2981309482857473, "grad_norm": 0.36424124240875244, "learning_rate": 0.00013682025623864698, "loss": 1.8982, "step": 5012 }, { "epoch": 2.2985896112831097, "grad_norm": 0.3689767122268677, "learning_rate": 0.00013665009239410098, "loss": 1.2045, "step": 5013 }, { "epoch": 2.2990482742804725, "grad_norm": 0.28414589166641235, "learning_rate": 0.00013648001768190699, "loss": 1.5854, "step": 5014 }, { "epoch": 2.299506937277835, "grad_norm": 0.3238101601600647, "learning_rate": 0.00013631003214378584, "loss": 0.8048, "step": 5015 }, { "epoch": 2.299965600275198, "grad_norm": 0.25493547320365906, "learning_rate": 0.00013614013582143614, "loss": 1.5388, "step": 5016 }, { "epoch": 2.3004242632725607, "grad_norm": 0.3472574055194855, "learning_rate": 0.0001359703287565347, "loss": 1.3253, "step": 5017 }, { "epoch": 2.300882926269923, "grad_norm": 0.21940115094184875, "learning_rate": 0.00013580061099073638, "loss": 1.473, "step": 5018 }, { "epoch": 2.301341589267286, "grad_norm": 0.33092114329338074, "learning_rate": 0.0001356309825656742, "loss": 1.5323, "step": 5019 }, { "epoch": 2.3018002522646483, "grad_norm": 0.3832414150238037, "learning_rate": 0.0001354614435229592, "loss": 1.3107, "step": 5020 }, { "epoch": 2.302258915262011, "grad_norm": 0.27697277069091797, "learning_rate": 0.0001352919939041806, "loss": 1.5306, "step": 5021 }, { "epoch": 2.302717578259374, "grad_norm": 0.30245643854141235, "learning_rate": 0.00013512263375090562, "loss": 1.4718, "step": 5022 }, { "epoch": 2.3031762412567365, "grad_norm": 0.3190750777721405, "learning_rate": 0.00013495336310467943, "loss": 1.4829, "step": 5023 }, { "epoch": 2.3036349042540993, "grad_norm": 0.2573975622653961, "learning_rate": 0.00013478418200702552, "loss": 0.5235, "step": 5024 }, { "epoch": 2.3040935672514617, "grad_norm": 0.13736076653003693, "learning_rate": 0.00013461509049944497, "loss": 0.5812, "step": 5025 }, { "epoch": 2.3045522302488246, "grad_norm": 0.19605790078639984, "learning_rate": 0.00013444608862341734, "loss": 1.2568, "step": 5026 }, { "epoch": 2.3050108932461875, "grad_norm": 0.25617191195487976, "learning_rate": 0.00013427717642039988, "loss": 1.5799, "step": 5027 }, { "epoch": 2.30546955624355, "grad_norm": 0.3717952370643616, "learning_rate": 0.00013410835393182807, "loss": 1.3069, "step": 5028 }, { "epoch": 2.3059282192409127, "grad_norm": 0.37081441283226013, "learning_rate": 0.00013393962119911528, "loss": 1.7627, "step": 5029 }, { "epoch": 2.3063868822382756, "grad_norm": 0.3451120853424072, "learning_rate": 0.0001337709782636528, "loss": 1.7584, "step": 5030 }, { "epoch": 2.306845545235638, "grad_norm": 0.326267272233963, "learning_rate": 0.00013360242516681004, "loss": 1.0427, "step": 5031 }, { "epoch": 2.307304208233001, "grad_norm": 0.2787007987499237, "learning_rate": 0.00013343396194993423, "loss": 1.3569, "step": 5032 }, { "epoch": 2.3077628712303637, "grad_norm": 0.2687190771102905, "learning_rate": 0.0001332655886543506, "loss": 1.8645, "step": 5033 }, { "epoch": 2.308221534227726, "grad_norm": 0.3988434970378876, "learning_rate": 0.00013309730532136245, "loss": 2.0012, "step": 5034 }, { "epoch": 2.308680197225089, "grad_norm": 0.1921907663345337, "learning_rate": 0.0001329291119922509, "loss": 1.2577, "step": 5035 }, { "epoch": 2.3091388602224514, "grad_norm": 0.4280322790145874, "learning_rate": 0.00013276100870827473, "loss": 1.693, "step": 5036 }, { "epoch": 2.3095975232198143, "grad_norm": 0.34726113080978394, "learning_rate": 0.00013259299551067106, "loss": 0.8334, "step": 5037 }, { "epoch": 2.310056186217177, "grad_norm": 0.1919124275445938, "learning_rate": 0.00013242507244065477, "loss": 1.77, "step": 5038 }, { "epoch": 2.3105148492145395, "grad_norm": 0.3817395865917206, "learning_rate": 0.00013225723953941854, "loss": 1.9272, "step": 5039 }, { "epoch": 2.3109735122119024, "grad_norm": 0.41375109553337097, "learning_rate": 0.00013208949684813287, "loss": 1.5177, "step": 5040 }, { "epoch": 2.311432175209265, "grad_norm": 0.29578983783721924, "learning_rate": 0.00013192184440794668, "loss": 1.3288, "step": 5041 }, { "epoch": 2.3118908382066277, "grad_norm": 0.19934503734111786, "learning_rate": 0.00013175428225998593, "loss": 0.9299, "step": 5042 }, { "epoch": 2.3123495012039905, "grad_norm": 0.2743481397628784, "learning_rate": 0.00013158681044535487, "loss": 1.5061, "step": 5043 }, { "epoch": 2.312808164201353, "grad_norm": 0.44560080766677856, "learning_rate": 0.00013141942900513564, "loss": 0.9513, "step": 5044 }, { "epoch": 2.313266827198716, "grad_norm": 0.21826134622097015, "learning_rate": 0.0001312521379803881, "loss": 0.941, "step": 5045 }, { "epoch": 2.313725490196078, "grad_norm": 0.16389012336730957, "learning_rate": 0.00013108493741215, "loss": 1.3627, "step": 5046 }, { "epoch": 2.314184153193441, "grad_norm": 0.3645002543926239, "learning_rate": 0.00013091782734143671, "loss": 1.5707, "step": 5047 }, { "epoch": 2.314642816190804, "grad_norm": 0.3624444901943207, "learning_rate": 0.00013075080780924154, "loss": 2.1348, "step": 5048 }, { "epoch": 2.3151014791881663, "grad_norm": 0.3850797712802887, "learning_rate": 0.00013058387885653562, "loss": 1.4394, "step": 5049 }, { "epoch": 2.315560142185529, "grad_norm": 0.281230092048645, "learning_rate": 0.00013041704052426772, "loss": 1.6204, "step": 5050 }, { "epoch": 2.316018805182892, "grad_norm": 0.2876693308353424, "learning_rate": 0.00013025029285336476, "loss": 1.1374, "step": 5051 }, { "epoch": 2.3164774681802545, "grad_norm": 0.3920785188674927, "learning_rate": 0.00013008363588473115, "loss": 1.639, "step": 5052 }, { "epoch": 2.3169361311776173, "grad_norm": 0.2921794652938843, "learning_rate": 0.00012991706965924876, "loss": 1.5347, "step": 5053 }, { "epoch": 2.31739479417498, "grad_norm": 0.26020175218582153, "learning_rate": 0.00012975059421777759, "loss": 1.1937, "step": 5054 }, { "epoch": 2.3178534571723426, "grad_norm": 0.36081263422966003, "learning_rate": 0.0001295842096011553, "loss": 1.3285, "step": 5055 }, { "epoch": 2.3183121201697054, "grad_norm": 0.48956963419914246, "learning_rate": 0.00012941791585019725, "loss": 1.4641, "step": 5056 }, { "epoch": 2.318770783167068, "grad_norm": 0.1845049411058426, "learning_rate": 0.0001292517130056966, "loss": 1.1712, "step": 5057 }, { "epoch": 2.3192294461644307, "grad_norm": 0.35714197158813477, "learning_rate": 0.00012908560110842383, "loss": 1.4028, "step": 5058 }, { "epoch": 2.3196881091617936, "grad_norm": 0.2765589654445648, "learning_rate": 0.00012891958019912758, "loss": 0.852, "step": 5059 }, { "epoch": 2.320146772159156, "grad_norm": 0.32246285676956177, "learning_rate": 0.00012875365031853376, "loss": 1.725, "step": 5060 }, { "epoch": 2.320605435156519, "grad_norm": 0.3229038715362549, "learning_rate": 0.0001285878115073465, "loss": 1.4902, "step": 5061 }, { "epoch": 2.3210640981538813, "grad_norm": 0.24635767936706543, "learning_rate": 0.0001284220638062471, "loss": 0.9351, "step": 5062 }, { "epoch": 2.321522761151244, "grad_norm": 0.2740919589996338, "learning_rate": 0.00012825640725589477, "loss": 1.7063, "step": 5063 }, { "epoch": 2.321981424148607, "grad_norm": 0.3636001646518707, "learning_rate": 0.00012809084189692604, "loss": 1.3257, "step": 5064 }, { "epoch": 2.3224400871459694, "grad_norm": 0.27342599630355835, "learning_rate": 0.0001279253677699554, "loss": 1.2865, "step": 5065 }, { "epoch": 2.3228987501433322, "grad_norm": 0.3146760165691376, "learning_rate": 0.00012775998491557485, "loss": 1.28, "step": 5066 }, { "epoch": 2.3233574131406947, "grad_norm": 0.22414685785770416, "learning_rate": 0.00012759469337435397, "loss": 1.2673, "step": 5067 }, { "epoch": 2.3238160761380575, "grad_norm": 0.42230096459388733, "learning_rate": 0.00012742949318684, "loss": 1.0508, "step": 5068 }, { "epoch": 2.3242747391354204, "grad_norm": 0.38766834139823914, "learning_rate": 0.00012726438439355787, "loss": 1.6228, "step": 5069 }, { "epoch": 2.324733402132783, "grad_norm": 0.37745577096939087, "learning_rate": 0.00012709936703500947, "loss": 1.8116, "step": 5070 }, { "epoch": 2.3251920651301456, "grad_norm": 0.4257441759109497, "learning_rate": 0.0001269344411516753, "loss": 1.1618, "step": 5071 }, { "epoch": 2.3256507281275085, "grad_norm": 0.3758509159088135, "learning_rate": 0.00012676960678401262, "loss": 1.6102, "step": 5072 }, { "epoch": 2.326109391124871, "grad_norm": 0.27729111909866333, "learning_rate": 0.0001266048639724565, "loss": 0.6324, "step": 5073 }, { "epoch": 2.3265680541222338, "grad_norm": 0.27736055850982666, "learning_rate": 0.0001264402127574198, "loss": 1.5171, "step": 5074 }, { "epoch": 2.327026717119596, "grad_norm": 0.2820827066898346, "learning_rate": 0.0001262756531792922, "loss": 1.6448, "step": 5075 }, { "epoch": 2.327485380116959, "grad_norm": 0.31878018379211426, "learning_rate": 0.0001261111852784416, "loss": 0.9581, "step": 5076 }, { "epoch": 2.327944043114322, "grad_norm": 0.32956355810165405, "learning_rate": 0.0001259468090952131, "loss": 1.3363, "step": 5077 }, { "epoch": 2.3284027061116843, "grad_norm": 0.33068135380744934, "learning_rate": 0.0001257825246699294, "loss": 0.8995, "step": 5078 }, { "epoch": 2.328861369109047, "grad_norm": 0.2901584506034851, "learning_rate": 0.0001256183320428907, "loss": 1.6243, "step": 5079 }, { "epoch": 2.32932003210641, "grad_norm": 0.2884989380836487, "learning_rate": 0.0001254542312543745, "loss": 1.683, "step": 5080 }, { "epoch": 2.3297786951037724, "grad_norm": 0.38220369815826416, "learning_rate": 0.00012529022234463604, "loss": 1.7488, "step": 5081 }, { "epoch": 2.3302373581011353, "grad_norm": 0.3322943150997162, "learning_rate": 0.00012512630535390783, "loss": 1.9446, "step": 5082 }, { "epoch": 2.3306960210984977, "grad_norm": 0.3463505804538727, "learning_rate": 0.00012496248032239988, "loss": 1.0688, "step": 5083 }, { "epoch": 2.3311546840958606, "grad_norm": 0.240933358669281, "learning_rate": 0.00012479874729029968, "loss": 1.6491, "step": 5084 }, { "epoch": 2.3316133470932234, "grad_norm": 0.4334370195865631, "learning_rate": 0.00012463510629777226, "loss": 1.3899, "step": 5085 }, { "epoch": 2.332072010090586, "grad_norm": 0.2391756922006607, "learning_rate": 0.00012447155738495963, "loss": 0.608, "step": 5086 }, { "epoch": 2.3325306730879487, "grad_norm": 0.2066376507282257, "learning_rate": 0.00012430810059198166, "loss": 1.1892, "step": 5087 }, { "epoch": 2.332989336085311, "grad_norm": 0.33126476407051086, "learning_rate": 0.0001241447359589355, "loss": 1.22, "step": 5088 }, { "epoch": 2.333447999082674, "grad_norm": 0.29312899708747864, "learning_rate": 0.00012398146352589568, "loss": 1.2554, "step": 5089 }, { "epoch": 2.333906662080037, "grad_norm": 0.24564377963542938, "learning_rate": 0.0001238182833329141, "loss": 1.3128, "step": 5090 }, { "epoch": 2.3343653250773992, "grad_norm": 0.3976064920425415, "learning_rate": 0.00012365519542002, "loss": 1.4039, "step": 5091 }, { "epoch": 2.334823988074762, "grad_norm": 0.31696027517318726, "learning_rate": 0.0001234921998272201, "loss": 2.1157, "step": 5092 }, { "epoch": 2.3352826510721245, "grad_norm": 0.3862619400024414, "learning_rate": 0.00012332929659449827, "loss": 1.1534, "step": 5093 }, { "epoch": 2.3357413140694874, "grad_norm": 0.2641201317310333, "learning_rate": 0.000123166485761816, "loss": 0.9848, "step": 5094 }, { "epoch": 2.3361999770668502, "grad_norm": 0.3019377291202545, "learning_rate": 0.00012300376736911183, "loss": 1.233, "step": 5095 }, { "epoch": 2.3366586400642126, "grad_norm": 0.280788779258728, "learning_rate": 0.00012284114145630183, "loss": 1.6123, "step": 5096 }, { "epoch": 2.3371173030615755, "grad_norm": 0.3243066370487213, "learning_rate": 0.0001226786080632794, "loss": 0.5487, "step": 5097 }, { "epoch": 2.3375759660589384, "grad_norm": 0.18734464049339294, "learning_rate": 0.00012251616722991492, "loss": 0.9184, "step": 5098 }, { "epoch": 2.3380346290563008, "grad_norm": 0.17624834179878235, "learning_rate": 0.0001223538189960564, "loss": 1.5011, "step": 5099 }, { "epoch": 2.3384932920536636, "grad_norm": 0.2665694057941437, "learning_rate": 0.00012219156340152908, "loss": 0.8928, "step": 5100 }, { "epoch": 2.3389519550510265, "grad_norm": 0.2912820279598236, "learning_rate": 0.00012202940048613542, "loss": 1.5328, "step": 5101 }, { "epoch": 2.339410618048389, "grad_norm": 0.3863449692726135, "learning_rate": 0.00012186733028965507, "loss": 1.4951, "step": 5102 }, { "epoch": 2.3398692810457518, "grad_norm": 0.40611687302589417, "learning_rate": 0.00012170535285184509, "loss": 2.1716, "step": 5103 }, { "epoch": 2.340327944043114, "grad_norm": 0.4273900091648102, "learning_rate": 0.00012154346821243972, "loss": 1.21, "step": 5104 }, { "epoch": 2.340786607040477, "grad_norm": 0.3556171953678131, "learning_rate": 0.0001213816764111504, "loss": 1.3158, "step": 5105 }, { "epoch": 2.34124527003784, "grad_norm": 0.28761526942253113, "learning_rate": 0.00012121997748766583, "loss": 1.6339, "step": 5106 }, { "epoch": 2.3417039330352023, "grad_norm": 0.2865532338619232, "learning_rate": 0.00012105837148165194, "loss": 1.3912, "step": 5107 }, { "epoch": 2.342162596032565, "grad_norm": 0.30358991026878357, "learning_rate": 0.000120896858432752, "loss": 1.1263, "step": 5108 }, { "epoch": 2.3426212590299276, "grad_norm": 0.07187612354755402, "learning_rate": 0.00012073543838058598, "loss": 1.1824, "step": 5109 }, { "epoch": 2.3430799220272904, "grad_norm": 0.2775716483592987, "learning_rate": 0.00012057411136475161, "loss": 1.1338, "step": 5110 }, { "epoch": 2.3435385850246533, "grad_norm": 0.370094358921051, "learning_rate": 0.00012041287742482348, "loss": 0.8284, "step": 5111 }, { "epoch": 2.3439972480220157, "grad_norm": 0.5262327790260315, "learning_rate": 0.00012025173660035338, "loss": 1.6775, "step": 5112 }, { "epoch": 2.3444559110193786, "grad_norm": 0.36328423023223877, "learning_rate": 0.00012009068893087067, "loss": 1.453, "step": 5113 }, { "epoch": 2.344914574016741, "grad_norm": 1.2442697286605835, "learning_rate": 0.00011992973445588112, "loss": 2.0186, "step": 5114 }, { "epoch": 2.345373237014104, "grad_norm": 0.26644742488861084, "learning_rate": 0.00011976887321486813, "loss": 0.9504, "step": 5115 }, { "epoch": 2.3458319000114667, "grad_norm": 0.31766730546951294, "learning_rate": 0.00011960810524729222, "loss": 1.277, "step": 5116 }, { "epoch": 2.346290563008829, "grad_norm": 0.3006284236907959, "learning_rate": 0.00011944743059259078, "loss": 0.7456, "step": 5117 }, { "epoch": 2.346749226006192, "grad_norm": 0.2082628756761551, "learning_rate": 0.00011928684929017859, "loss": 0.9155, "step": 5118 }, { "epoch": 2.347207889003555, "grad_norm": 0.2231052815914154, "learning_rate": 0.0001191263613794475, "loss": 0.882, "step": 5119 }, { "epoch": 2.3476665520009172, "grad_norm": 0.2642306983470917, "learning_rate": 0.00011896596689976602, "loss": 1.3383, "step": 5120 }, { "epoch": 2.34812521499828, "grad_norm": 0.26055455207824707, "learning_rate": 0.00011880566589048031, "loss": 1.2426, "step": 5121 }, { "epoch": 2.348583877995643, "grad_norm": 0.20619313418865204, "learning_rate": 0.00011864545839091312, "loss": 0.7382, "step": 5122 }, { "epoch": 2.3490425409930054, "grad_norm": 0.19531986117362976, "learning_rate": 0.00011848534444036485, "loss": 1.091, "step": 5123 }, { "epoch": 2.349501203990368, "grad_norm": 0.3989899158477783, "learning_rate": 0.00011832532407811247, "loss": 0.8412, "step": 5124 }, { "epoch": 2.3499598669877306, "grad_norm": 0.2260282337665558, "learning_rate": 0.00011816539734341025, "loss": 1.4639, "step": 5125 }, { "epoch": 2.3504185299850935, "grad_norm": 0.26231110095977783, "learning_rate": 0.00011800556427548908, "loss": 1.1874, "step": 5126 }, { "epoch": 2.3508771929824563, "grad_norm": 0.19405876100063324, "learning_rate": 0.00011784582491355727, "loss": 0.5713, "step": 5127 }, { "epoch": 2.3513358559798188, "grad_norm": 0.2558598220348358, "learning_rate": 0.00011768617929680014, "loss": 1.4579, "step": 5128 }, { "epoch": 2.3517945189771816, "grad_norm": 0.2595175504684448, "learning_rate": 0.00011752662746437986, "loss": 1.4692, "step": 5129 }, { "epoch": 2.352253181974544, "grad_norm": 0.4527866244316101, "learning_rate": 0.0001173671694554358, "loss": 1.7926, "step": 5130 }, { "epoch": 2.352711844971907, "grad_norm": 0.24965371191501617, "learning_rate": 0.00011720780530908381, "loss": 0.972, "step": 5131 }, { "epoch": 2.3531705079692697, "grad_norm": 0.21504342555999756, "learning_rate": 0.0001170485350644171, "loss": 0.7318, "step": 5132 }, { "epoch": 2.353629170966632, "grad_norm": 0.46009358763694763, "learning_rate": 0.00011688935876050616, "loss": 1.8917, "step": 5133 }, { "epoch": 2.354087833963995, "grad_norm": 0.35450685024261475, "learning_rate": 0.00011673027643639784, "loss": 0.7528, "step": 5134 }, { "epoch": 2.3545464969613574, "grad_norm": 0.10655711591243744, "learning_rate": 0.00011657128813111622, "loss": 0.8409, "step": 5135 }, { "epoch": 2.3550051599587203, "grad_norm": 0.40161076188087463, "learning_rate": 0.00011641239388366249, "loss": 1.6137, "step": 5136 }, { "epoch": 2.355463822956083, "grad_norm": 0.34276244044303894, "learning_rate": 0.00011625359373301414, "loss": 0.9094, "step": 5137 }, { "epoch": 2.3559224859534456, "grad_norm": 0.3367175757884979, "learning_rate": 0.00011609488771812621, "loss": 1.2706, "step": 5138 }, { "epoch": 2.3563811489508084, "grad_norm": 0.27182361483573914, "learning_rate": 0.00011593627587793043, "loss": 1.489, "step": 5139 }, { "epoch": 2.3568398119481713, "grad_norm": 0.3773139715194702, "learning_rate": 0.00011577775825133546, "loss": 1.0298, "step": 5140 }, { "epoch": 2.3572984749455337, "grad_norm": 0.11614841967821121, "learning_rate": 0.00011561933487722687, "loss": 0.7662, "step": 5141 }, { "epoch": 2.3577571379428965, "grad_norm": 0.24605746567249298, "learning_rate": 0.00011546100579446672, "loss": 0.8525, "step": 5142 }, { "epoch": 2.358215800940259, "grad_norm": 0.4239916503429413, "learning_rate": 0.00011530277104189463, "loss": 1.5721, "step": 5143 }, { "epoch": 2.358674463937622, "grad_norm": 0.2336055487394333, "learning_rate": 0.00011514463065832665, "loss": 1.2539, "step": 5144 }, { "epoch": 2.3591331269349847, "grad_norm": 0.333873987197876, "learning_rate": 0.00011498658468255568, "loss": 1.3782, "step": 5145 }, { "epoch": 2.359591789932347, "grad_norm": 0.35513144731521606, "learning_rate": 0.00011482863315335157, "loss": 1.3682, "step": 5146 }, { "epoch": 2.36005045292971, "grad_norm": 0.2521505057811737, "learning_rate": 0.00011467077610946113, "loss": 1.2587, "step": 5147 }, { "epoch": 2.360509115927073, "grad_norm": 0.30736634135246277, "learning_rate": 0.00011451301358960758, "loss": 1.3173, "step": 5148 }, { "epoch": 2.360967778924435, "grad_norm": 0.3339071273803711, "learning_rate": 0.00011435534563249123, "loss": 1.3926, "step": 5149 }, { "epoch": 2.361426441921798, "grad_norm": 0.3305213749408722, "learning_rate": 0.00011419777227678929, "loss": 1.4092, "step": 5150 }, { "epoch": 2.3618851049191605, "grad_norm": 0.30224788188934326, "learning_rate": 0.00011404029356115558, "loss": 1.4058, "step": 5151 }, { "epoch": 2.3623437679165233, "grad_norm": 0.3267022967338562, "learning_rate": 0.00011388290952422075, "loss": 1.3408, "step": 5152 }, { "epoch": 2.362802430913886, "grad_norm": 0.17730341851711273, "learning_rate": 0.00011372562020459231, "loss": 0.8203, "step": 5153 }, { "epoch": 2.3632610939112486, "grad_norm": 0.2721312344074249, "learning_rate": 0.00011356842564085434, "loss": 0.8142, "step": 5154 }, { "epoch": 2.3637197569086115, "grad_norm": 0.34186965227127075, "learning_rate": 0.00011341132587156793, "loss": 1.9951, "step": 5155 }, { "epoch": 2.364178419905974, "grad_norm": 0.36209332942962646, "learning_rate": 0.00011325432093527077, "loss": 2.0428, "step": 5156 }, { "epoch": 2.3646370829033367, "grad_norm": 0.3819536864757538, "learning_rate": 0.0001130974108704772, "loss": 1.578, "step": 5157 }, { "epoch": 2.3650957459006996, "grad_norm": 0.36597928404808044, "learning_rate": 0.00011294059571567861, "loss": 1.5025, "step": 5158 }, { "epoch": 2.365554408898062, "grad_norm": 0.328401654958725, "learning_rate": 0.0001127838755093426, "loss": 0.9453, "step": 5159 }, { "epoch": 2.366013071895425, "grad_norm": 0.29384055733680725, "learning_rate": 0.00011262725028991388, "loss": 1.6815, "step": 5160 }, { "epoch": 2.3664717348927873, "grad_norm": 0.3777378499507904, "learning_rate": 0.00011247072009581383, "loss": 1.2265, "step": 5161 }, { "epoch": 2.36693039789015, "grad_norm": 0.7261742949485779, "learning_rate": 0.00011231428496544033, "loss": 1.8345, "step": 5162 }, { "epoch": 2.367389060887513, "grad_norm": 0.2863733172416687, "learning_rate": 0.00011215794493716808, "loss": 0.9066, "step": 5163 }, { "epoch": 2.3678477238848754, "grad_norm": 0.3603268265724182, "learning_rate": 0.00011200170004934839, "loss": 1.6718, "step": 5164 }, { "epoch": 2.3683063868822383, "grad_norm": 0.2777169346809387, "learning_rate": 0.00011184555034030936, "loss": 1.1851, "step": 5165 }, { "epoch": 2.368765049879601, "grad_norm": 0.24623733758926392, "learning_rate": 0.00011168949584835553, "loss": 1.5477, "step": 5166 }, { "epoch": 2.3692237128769635, "grad_norm": 0.2616453766822815, "learning_rate": 0.00011153353661176824, "loss": 0.8338, "step": 5167 }, { "epoch": 2.3696823758743264, "grad_norm": 0.24914288520812988, "learning_rate": 0.00011137767266880538, "loss": 0.859, "step": 5168 }, { "epoch": 2.3701410388716893, "grad_norm": 0.21932218968868256, "learning_rate": 0.00011122190405770172, "loss": 1.8062, "step": 5169 }, { "epoch": 2.3705997018690517, "grad_norm": 0.38597074151039124, "learning_rate": 0.00011106623081666806, "loss": 1.8027, "step": 5170 }, { "epoch": 2.3710583648664145, "grad_norm": 0.33088138699531555, "learning_rate": 0.0001109106529838923, "loss": 1.2009, "step": 5171 }, { "epoch": 2.371517027863777, "grad_norm": 0.2947171628475189, "learning_rate": 0.00011075517059753892, "loss": 1.1661, "step": 5172 }, { "epoch": 2.37197569086114, "grad_norm": 0.26863881945610046, "learning_rate": 0.00011059978369574875, "loss": 1.145, "step": 5173 }, { "epoch": 2.3724343538585027, "grad_norm": 0.2579619586467743, "learning_rate": 0.00011044449231663939, "loss": 1.1743, "step": 5174 }, { "epoch": 2.372893016855865, "grad_norm": 0.4377444088459015, "learning_rate": 0.00011028929649830489, "loss": 1.72, "step": 5175 }, { "epoch": 2.373351679853228, "grad_norm": 0.6905438303947449, "learning_rate": 0.00011013419627881587, "loss": 0.7064, "step": 5176 }, { "epoch": 2.3738103428505903, "grad_norm": 0.1522587239742279, "learning_rate": 0.00010997919169621962, "loss": 1.4874, "step": 5177 }, { "epoch": 2.374269005847953, "grad_norm": 0.3481024205684662, "learning_rate": 0.00010982428278853984, "loss": 0.9126, "step": 5178 }, { "epoch": 2.374727668845316, "grad_norm": 0.24200594425201416, "learning_rate": 0.00010966946959377682, "loss": 1.7862, "step": 5179 }, { "epoch": 2.3751863318426785, "grad_norm": 0.40860190987586975, "learning_rate": 0.00010951475214990747, "loss": 1.0853, "step": 5180 }, { "epoch": 2.3756449948400413, "grad_norm": 0.09399288147687912, "learning_rate": 0.0001093601304948848, "loss": 1.2584, "step": 5181 }, { "epoch": 2.3761036578374037, "grad_norm": 0.31636422872543335, "learning_rate": 0.00010920560466663882, "loss": 0.8112, "step": 5182 }, { "epoch": 2.3765623208347666, "grad_norm": 0.3569824695587158, "learning_rate": 0.00010905117470307573, "loss": 1.3749, "step": 5183 }, { "epoch": 2.3770209838321295, "grad_norm": 0.07064448297023773, "learning_rate": 0.00010889684064207845, "loss": 0.731, "step": 5184 }, { "epoch": 2.377479646829492, "grad_norm": 0.2785203456878662, "learning_rate": 0.00010874260252150598, "loss": 1.1004, "step": 5185 }, { "epoch": 2.3779383098268547, "grad_norm": 0.32780954241752625, "learning_rate": 0.00010858846037919451, "loss": 1.3779, "step": 5186 }, { "epoch": 2.3783969728242176, "grad_norm": 0.23089632391929626, "learning_rate": 0.00010843441425295575, "loss": 0.9364, "step": 5187 }, { "epoch": 2.37885563582158, "grad_norm": 0.2271614968776703, "learning_rate": 0.00010828046418057858, "loss": 1.3, "step": 5188 }, { "epoch": 2.379314298818943, "grad_norm": 0.32289209961891174, "learning_rate": 0.00010812661019982795, "loss": 1.4732, "step": 5189 }, { "epoch": 2.3797729618163057, "grad_norm": 0.2017325609922409, "learning_rate": 0.00010797285234844539, "loss": 0.9344, "step": 5190 }, { "epoch": 2.380231624813668, "grad_norm": 0.2721211910247803, "learning_rate": 0.00010781919066414881, "loss": 1.6788, "step": 5191 }, { "epoch": 2.380690287811031, "grad_norm": 0.3644427955150604, "learning_rate": 0.00010766562518463268, "loss": 1.307, "step": 5192 }, { "epoch": 2.3811489508083934, "grad_norm": 0.35269877314567566, "learning_rate": 0.00010751215594756741, "loss": 1.3381, "step": 5193 }, { "epoch": 2.3816076138057563, "grad_norm": 0.3497646450996399, "learning_rate": 0.00010735878299060014, "loss": 1.5481, "step": 5194 }, { "epoch": 2.382066276803119, "grad_norm": 0.28884097933769226, "learning_rate": 0.00010720550635135467, "loss": 1.1808, "step": 5195 }, { "epoch": 2.3825249398004815, "grad_norm": 0.43060022592544556, "learning_rate": 0.00010705232606743066, "loss": 1.9484, "step": 5196 }, { "epoch": 2.3829836027978444, "grad_norm": 0.25279608368873596, "learning_rate": 0.00010689924217640445, "loss": 1.1418, "step": 5197 }, { "epoch": 2.383442265795207, "grad_norm": 0.36996960639953613, "learning_rate": 0.00010674625471582849, "loss": 1.4287, "step": 5198 }, { "epoch": 2.3839009287925697, "grad_norm": 0.3359069228172302, "learning_rate": 0.0001065933637232317, "loss": 1.0623, "step": 5199 }, { "epoch": 2.3843595917899325, "grad_norm": 0.2985363006591797, "learning_rate": 0.00010644056923611939, "loss": 1.366, "step": 5200 }, { "epoch": 2.384818254787295, "grad_norm": 0.5279024243354797, "learning_rate": 0.0001062878712919732, "loss": 2.0491, "step": 5201 }, { "epoch": 2.385276917784658, "grad_norm": 0.3357357680797577, "learning_rate": 0.00010613526992825096, "loss": 1.1142, "step": 5202 }, { "epoch": 2.38573558078202, "grad_norm": 0.2649446427822113, "learning_rate": 0.00010598276518238709, "loss": 0.9735, "step": 5203 }, { "epoch": 2.386194243779383, "grad_norm": 0.23111626505851746, "learning_rate": 0.00010583035709179162, "loss": 0.9959, "step": 5204 }, { "epoch": 2.386652906776746, "grad_norm": 0.18565475940704346, "learning_rate": 0.0001056780456938518, "loss": 1.4027, "step": 5205 }, { "epoch": 2.3871115697741083, "grad_norm": 0.3480309247970581, "learning_rate": 0.00010552583102593061, "loss": 2.1028, "step": 5206 }, { "epoch": 2.387570232771471, "grad_norm": 0.2654421329498291, "learning_rate": 0.00010537371312536736, "loss": 1.1671, "step": 5207 }, { "epoch": 2.388028895768834, "grad_norm": 0.4123840928077698, "learning_rate": 0.00010522169202947784, "loss": 1.4777, "step": 5208 }, { "epoch": 2.3884875587661965, "grad_norm": 0.28358355164527893, "learning_rate": 0.0001050697677755536, "loss": 0.8817, "step": 5209 }, { "epoch": 2.3889462217635593, "grad_norm": 0.29822635650634766, "learning_rate": 0.00010491794040086289, "loss": 1.6391, "step": 5210 }, { "epoch": 2.3894048847609217, "grad_norm": 0.34754717350006104, "learning_rate": 0.00010476620994265013, "loss": 0.8802, "step": 5211 }, { "epoch": 2.3898635477582846, "grad_norm": 0.08379904180765152, "learning_rate": 0.00010461457643813588, "loss": 0.3658, "step": 5212 }, { "epoch": 2.3903222107556474, "grad_norm": 0.06315892189741135, "learning_rate": 0.00010446303992451689, "loss": 0.8891, "step": 5213 }, { "epoch": 2.39078087375301, "grad_norm": 0.31999555230140686, "learning_rate": 0.00010431160043896615, "loss": 1.1747, "step": 5214 }, { "epoch": 2.3912395367503727, "grad_norm": 0.2988559901714325, "learning_rate": 0.00010416025801863289, "loss": 1.2582, "step": 5215 }, { "epoch": 2.3916981997477356, "grad_norm": 0.342385858297348, "learning_rate": 0.0001040090127006425, "loss": 1.0219, "step": 5216 }, { "epoch": 2.392156862745098, "grad_norm": 0.21291863918304443, "learning_rate": 0.00010385786452209656, "loss": 1.1656, "step": 5217 }, { "epoch": 2.392615525742461, "grad_norm": 0.29049792885780334, "learning_rate": 0.00010370681352007272, "loss": 1.4101, "step": 5218 }, { "epoch": 2.3930741887398232, "grad_norm": 0.26569485664367676, "learning_rate": 0.00010355585973162501, "loss": 1.0166, "step": 5219 }, { "epoch": 2.393532851737186, "grad_norm": 0.325291246175766, "learning_rate": 0.00010340500319378348, "loss": 1.544, "step": 5220 }, { "epoch": 2.393991514734549, "grad_norm": 0.30292245745658875, "learning_rate": 0.00010325424394355421, "loss": 1.1687, "step": 5221 }, { "epoch": 2.3944501777319114, "grad_norm": 0.28847742080688477, "learning_rate": 0.00010310358201791953, "loss": 1.7224, "step": 5222 }, { "epoch": 2.3949088407292742, "grad_norm": 0.36918914318084717, "learning_rate": 0.000102953017453838, "loss": 1.5133, "step": 5223 }, { "epoch": 2.3953675037266366, "grad_norm": 0.32062050700187683, "learning_rate": 0.00010280255028824414, "loss": 1.5388, "step": 5224 }, { "epoch": 2.3958261667239995, "grad_norm": 0.4374137222766876, "learning_rate": 0.0001026521805580486, "loss": 1.0036, "step": 5225 }, { "epoch": 2.3962848297213624, "grad_norm": 0.24663524329662323, "learning_rate": 0.00010250190830013823, "loss": 1.4458, "step": 5226 }, { "epoch": 2.3967434927187248, "grad_norm": 0.39408281445503235, "learning_rate": 0.00010235173355137583, "loss": 1.0691, "step": 5227 }, { "epoch": 2.3972021557160876, "grad_norm": 0.33270618319511414, "learning_rate": 0.00010220165634860041, "loss": 1.4243, "step": 5228 }, { "epoch": 2.39766081871345, "grad_norm": 0.4075172543525696, "learning_rate": 0.00010205167672862691, "loss": 1.5062, "step": 5229 }, { "epoch": 2.398119481710813, "grad_norm": 0.28742605447769165, "learning_rate": 0.00010190179472824651, "loss": 1.4183, "step": 5230 }, { "epoch": 2.3985781447081758, "grad_norm": 0.35692864656448364, "learning_rate": 0.00010175201038422644, "loss": 1.2006, "step": 5231 }, { "epoch": 2.399036807705538, "grad_norm": 0.20757196843624115, "learning_rate": 0.00010160232373330963, "loss": 0.9988, "step": 5232 }, { "epoch": 2.399495470702901, "grad_norm": 0.3633536696434021, "learning_rate": 0.00010145273481221534, "loss": 1.744, "step": 5233 }, { "epoch": 2.399954133700264, "grad_norm": 0.3276228904724121, "learning_rate": 0.00010130324365763894, "loss": 0.7654, "step": 5234 }, { "epoch": 2.4004127966976263, "grad_norm": 0.2344532608985901, "learning_rate": 0.00010115385030625157, "loss": 1.82, "step": 5235 }, { "epoch": 2.400871459694989, "grad_norm": 0.4215475022792816, "learning_rate": 0.00010100455479470055, "loss": 1.3953, "step": 5236 }, { "epoch": 2.401330122692352, "grad_norm": 0.3673091232776642, "learning_rate": 0.0001008553571596092, "loss": 2.074, "step": 5237 }, { "epoch": 2.4017887856897144, "grad_norm": 0.34490370750427246, "learning_rate": 0.00010070625743757666, "loss": 1.7086, "step": 5238 }, { "epoch": 2.4022474486870773, "grad_norm": 0.35969072580337524, "learning_rate": 0.00010055725566517826, "loss": 1.1673, "step": 5239 }, { "epoch": 2.4027061116844397, "grad_norm": 0.36554619669914246, "learning_rate": 0.00010040835187896513, "loss": 1.6442, "step": 5240 }, { "epoch": 2.4031647746818026, "grad_norm": 0.39475345611572266, "learning_rate": 0.00010025954611546457, "loss": 0.4012, "step": 5241 }, { "epoch": 2.4036234376791654, "grad_norm": 0.1467778980731964, "learning_rate": 0.00010011083841117968, "loss": 0.8082, "step": 5242 }, { "epoch": 2.404082100676528, "grad_norm": 0.13162125647068024, "learning_rate": 9.996222880258937e-05, "loss": 0.8181, "step": 5243 }, { "epoch": 2.4045407636738907, "grad_norm": 0.20284073054790497, "learning_rate": 9.98137173261488e-05, "loss": 1.213, "step": 5244 }, { "epoch": 2.404999426671253, "grad_norm": 0.303290456533432, "learning_rate": 9.966530401828883e-05, "loss": 1.3629, "step": 5245 }, { "epoch": 2.405458089668616, "grad_norm": 0.4616430997848511, "learning_rate": 9.951698891541633e-05, "loss": 1.5586, "step": 5246 }, { "epoch": 2.405916752665979, "grad_norm": 0.3878755569458008, "learning_rate": 9.936877205391398e-05, "loss": 1.2528, "step": 5247 }, { "epoch": 2.4063754156633412, "grad_norm": 0.29884567856788635, "learning_rate": 9.922065347014075e-05, "loss": 0.9254, "step": 5248 }, { "epoch": 2.406834078660704, "grad_norm": 0.2206033170223236, "learning_rate": 9.907263320043092e-05, "loss": 1.3166, "step": 5249 }, { "epoch": 2.4072927416580665, "grad_norm": 0.3127782642841339, "learning_rate": 9.892471128109498e-05, "loss": 1.0651, "step": 5250 }, { "epoch": 2.4077514046554294, "grad_norm": 0.3565352261066437, "learning_rate": 9.877688774841931e-05, "loss": 1.317, "step": 5251 }, { "epoch": 2.408210067652792, "grad_norm": 0.35081884264945984, "learning_rate": 9.8629162638666e-05, "loss": 0.9689, "step": 5252 }, { "epoch": 2.4086687306501546, "grad_norm": 0.2946298122406006, "learning_rate": 9.848153598807324e-05, "loss": 2.0137, "step": 5253 }, { "epoch": 2.4091273936475175, "grad_norm": 0.3904908001422882, "learning_rate": 9.833400783285474e-05, "loss": 1.0215, "step": 5254 }, { "epoch": 2.4095860566448803, "grad_norm": 0.4543945789337158, "learning_rate": 9.818657820920019e-05, "loss": 1.4427, "step": 5255 }, { "epoch": 2.4100447196422428, "grad_norm": 0.2723967730998993, "learning_rate": 9.803924715327528e-05, "loss": 1.3513, "step": 5256 }, { "epoch": 2.4105033826396056, "grad_norm": 0.2895095646381378, "learning_rate": 9.789201470122121e-05, "loss": 0.9585, "step": 5257 }, { "epoch": 2.4109620456369685, "grad_norm": 0.27330470085144043, "learning_rate": 9.774488088915539e-05, "loss": 1.174, "step": 5258 }, { "epoch": 2.411420708634331, "grad_norm": 0.31435030698776245, "learning_rate": 9.759784575317082e-05, "loss": 1.9413, "step": 5259 }, { "epoch": 2.4118793716316937, "grad_norm": 0.34424829483032227, "learning_rate": 9.74509093293361e-05, "loss": 1.6773, "step": 5260 }, { "epoch": 2.412338034629056, "grad_norm": 0.37348756194114685, "learning_rate": 9.730407165369576e-05, "loss": 1.3452, "step": 5261 }, { "epoch": 2.412796697626419, "grad_norm": 0.3043566346168518, "learning_rate": 9.715733276227029e-05, "loss": 1.5723, "step": 5262 }, { "epoch": 2.413255360623782, "grad_norm": 0.3890668451786041, "learning_rate": 9.701069269105567e-05, "loss": 1.2312, "step": 5263 }, { "epoch": 2.4137140236211443, "grad_norm": 0.30525264143943787, "learning_rate": 9.686415147602401e-05, "loss": 1.858, "step": 5264 }, { "epoch": 2.414172686618507, "grad_norm": 0.33891257643699646, "learning_rate": 9.671770915312267e-05, "loss": 0.9398, "step": 5265 }, { "epoch": 2.4146313496158696, "grad_norm": 0.2957022488117218, "learning_rate": 9.657136575827491e-05, "loss": 1.884, "step": 5266 }, { "epoch": 2.4150900126132324, "grad_norm": 0.5537558197975159, "learning_rate": 9.642512132738012e-05, "loss": 1.1761, "step": 5267 }, { "epoch": 2.4155486756105953, "grad_norm": 0.39335304498672485, "learning_rate": 9.627897589631301e-05, "loss": 1.5337, "step": 5268 }, { "epoch": 2.4160073386079577, "grad_norm": 0.23258255422115326, "learning_rate": 9.613292950092406e-05, "loss": 1.4903, "step": 5269 }, { "epoch": 2.4164660016053205, "grad_norm": 0.39902931451797485, "learning_rate": 9.598698217703972e-05, "loss": 1.5256, "step": 5270 }, { "epoch": 2.416924664602683, "grad_norm": 0.3279167711734772, "learning_rate": 9.584113396046157e-05, "loss": 1.2243, "step": 5271 }, { "epoch": 2.417383327600046, "grad_norm": 0.44774487614631653, "learning_rate": 9.569538488696744e-05, "loss": 1.1367, "step": 5272 }, { "epoch": 2.4178419905974087, "grad_norm": 0.29976382851600647, "learning_rate": 9.55497349923105e-05, "loss": 1.3715, "step": 5273 }, { "epoch": 2.418300653594771, "grad_norm": 0.3518228232860565, "learning_rate": 9.540418431221986e-05, "loss": 1.4482, "step": 5274 }, { "epoch": 2.418759316592134, "grad_norm": 0.3567742109298706, "learning_rate": 9.525873288240011e-05, "loss": 1.937, "step": 5275 }, { "epoch": 2.419217979589497, "grad_norm": 0.35031193494796753, "learning_rate": 9.511338073853149e-05, "loss": 1.551, "step": 5276 }, { "epoch": 2.419676642586859, "grad_norm": 0.1989692896604538, "learning_rate": 9.496812791626996e-05, "loss": 1.1373, "step": 5277 }, { "epoch": 2.420135305584222, "grad_norm": 0.3707393705844879, "learning_rate": 9.48229744512471e-05, "loss": 1.5035, "step": 5278 }, { "epoch": 2.4205939685815845, "grad_norm": 0.3178713023662567, "learning_rate": 9.467792037907008e-05, "loss": 1.8657, "step": 5279 }, { "epoch": 2.4210526315789473, "grad_norm": 0.30721351504325867, "learning_rate": 9.453296573532172e-05, "loss": 1.3541, "step": 5280 }, { "epoch": 2.42151129457631, "grad_norm": 0.4439692199230194, "learning_rate": 9.438811055556057e-05, "loss": 1.243, "step": 5281 }, { "epoch": 2.4219699575736726, "grad_norm": 0.206447571516037, "learning_rate": 9.424335487532037e-05, "loss": 0.8625, "step": 5282 }, { "epoch": 2.4224286205710355, "grad_norm": 0.3932112157344818, "learning_rate": 9.40986987301109e-05, "loss": 1.208, "step": 5283 }, { "epoch": 2.4228872835683983, "grad_norm": 0.28267911076545715, "learning_rate": 9.395414215541731e-05, "loss": 1.3602, "step": 5284 }, { "epoch": 2.4233459465657607, "grad_norm": 0.3156595528125763, "learning_rate": 9.380968518670036e-05, "loss": 1.6733, "step": 5285 }, { "epoch": 2.4238046095631236, "grad_norm": 0.3946542739868164, "learning_rate": 9.366532785939647e-05, "loss": 1.978, "step": 5286 }, { "epoch": 2.424263272560486, "grad_norm": 0.4107709527015686, "learning_rate": 9.352107020891743e-05, "loss": 2.0332, "step": 5287 }, { "epoch": 2.424721935557849, "grad_norm": 0.3392653167247772, "learning_rate": 9.337691227065075e-05, "loss": 1.1757, "step": 5288 }, { "epoch": 2.4251805985552117, "grad_norm": 0.3073945939540863, "learning_rate": 9.32328540799594e-05, "loss": 1.5089, "step": 5289 }, { "epoch": 2.425639261552574, "grad_norm": 0.37497302889823914, "learning_rate": 9.308889567218193e-05, "loss": 1.3739, "step": 5290 }, { "epoch": 2.426097924549937, "grad_norm": 0.2676681578159332, "learning_rate": 9.29450370826323e-05, "loss": 0.9049, "step": 5291 }, { "epoch": 2.4265565875472994, "grad_norm": 0.3354664742946625, "learning_rate": 9.280127834660019e-05, "loss": 1.2768, "step": 5292 }, { "epoch": 2.4270152505446623, "grad_norm": 0.2157263159751892, "learning_rate": 9.265761949935048e-05, "loss": 1.1697, "step": 5293 }, { "epoch": 2.427473913542025, "grad_norm": 0.323390394449234, "learning_rate": 9.251406057612378e-05, "loss": 1.2113, "step": 5294 }, { "epoch": 2.4279325765393875, "grad_norm": 0.2757461965084076, "learning_rate": 9.237060161213612e-05, "loss": 1.727, "step": 5295 }, { "epoch": 2.4283912395367504, "grad_norm": 0.33096444606781006, "learning_rate": 9.222724264257904e-05, "loss": 1.4423, "step": 5296 }, { "epoch": 2.428849902534113, "grad_norm": 0.31503209471702576, "learning_rate": 9.208398370261956e-05, "loss": 1.6621, "step": 5297 }, { "epoch": 2.4293085655314757, "grad_norm": 0.41407549381256104, "learning_rate": 9.194082482740012e-05, "loss": 1.9802, "step": 5298 }, { "epoch": 2.4297672285288385, "grad_norm": 0.31064707040786743, "learning_rate": 9.17977660520386e-05, "loss": 0.7767, "step": 5299 }, { "epoch": 2.430225891526201, "grad_norm": 0.26733309030532837, "learning_rate": 9.165480741162829e-05, "loss": 1.0861, "step": 5300 }, { "epoch": 2.430684554523564, "grad_norm": 0.19575932621955872, "learning_rate": 9.151194894123815e-05, "loss": 1.223, "step": 5301 }, { "epoch": 2.4311432175209267, "grad_norm": 0.22654877603054047, "learning_rate": 9.13691906759122e-05, "loss": 0.4944, "step": 5302 }, { "epoch": 2.431601880518289, "grad_norm": 0.35672181844711304, "learning_rate": 9.122653265067022e-05, "loss": 1.9861, "step": 5303 }, { "epoch": 2.432060543515652, "grad_norm": 0.32909905910491943, "learning_rate": 9.10839749005073e-05, "loss": 0.9556, "step": 5304 }, { "epoch": 2.432519206513015, "grad_norm": 0.32880130410194397, "learning_rate": 9.094151746039364e-05, "loss": 1.7028, "step": 5305 }, { "epoch": 2.432977869510377, "grad_norm": 0.3756924867630005, "learning_rate": 9.079916036527519e-05, "loss": 1.2567, "step": 5306 }, { "epoch": 2.43343653250774, "grad_norm": 0.39229616522789, "learning_rate": 9.065690365007323e-05, "loss": 1.4464, "step": 5307 }, { "epoch": 2.4338951955051025, "grad_norm": 0.19489213824272156, "learning_rate": 9.051474734968429e-05, "loss": 1.6805, "step": 5308 }, { "epoch": 2.4343538585024653, "grad_norm": 0.35982856154441833, "learning_rate": 9.037269149898036e-05, "loss": 0.8735, "step": 5309 }, { "epoch": 2.434812521499828, "grad_norm": 0.30394065380096436, "learning_rate": 9.02307361328088e-05, "loss": 1.3712, "step": 5310 }, { "epoch": 2.4352711844971906, "grad_norm": 0.28870540857315063, "learning_rate": 9.008888128599224e-05, "loss": 2.0938, "step": 5311 }, { "epoch": 2.4357298474945535, "grad_norm": 0.37780362367630005, "learning_rate": 8.994712699332875e-05, "loss": 1.53, "step": 5312 }, { "epoch": 2.436188510491916, "grad_norm": 0.41944536566734314, "learning_rate": 8.98054732895916e-05, "loss": 1.4752, "step": 5313 }, { "epoch": 2.4366471734892787, "grad_norm": 0.2813193202018738, "learning_rate": 8.966392020952952e-05, "loss": 1.2044, "step": 5314 }, { "epoch": 2.4371058364866416, "grad_norm": 0.38517066836357117, "learning_rate": 8.95224677878666e-05, "loss": 1.5339, "step": 5315 }, { "epoch": 2.437564499484004, "grad_norm": 0.38846316933631897, "learning_rate": 8.938111605930194e-05, "loss": 0.9716, "step": 5316 }, { "epoch": 2.438023162481367, "grad_norm": 0.30371803045272827, "learning_rate": 8.923986505851023e-05, "loss": 1.2411, "step": 5317 }, { "epoch": 2.4384818254787293, "grad_norm": 0.28628620505332947, "learning_rate": 8.909871482014132e-05, "loss": 0.8832, "step": 5318 }, { "epoch": 2.438940488476092, "grad_norm": 0.17390215396881104, "learning_rate": 8.895766537882027e-05, "loss": 1.0545, "step": 5319 }, { "epoch": 2.439399151473455, "grad_norm": 0.3960818946361542, "learning_rate": 8.8816716769148e-05, "loss": 1.9539, "step": 5320 }, { "epoch": 2.4398578144708174, "grad_norm": 0.4454388916492462, "learning_rate": 8.867586902569968e-05, "loss": 1.5828, "step": 5321 }, { "epoch": 2.4403164774681803, "grad_norm": 0.33164292573928833, "learning_rate": 8.85351221830265e-05, "loss": 1.5117, "step": 5322 }, { "epoch": 2.440775140465543, "grad_norm": 0.31885433197021484, "learning_rate": 8.839447627565472e-05, "loss": 1.6736, "step": 5323 }, { "epoch": 2.4412338034629055, "grad_norm": 0.28047341108322144, "learning_rate": 8.825393133808574e-05, "loss": 0.3669, "step": 5324 }, { "epoch": 2.4416924664602684, "grad_norm": 0.09215112030506134, "learning_rate": 8.811348740479619e-05, "loss": 1.2191, "step": 5325 }, { "epoch": 2.4421511294576312, "grad_norm": 0.33795610070228577, "learning_rate": 8.797314451023819e-05, "loss": 1.0369, "step": 5326 }, { "epoch": 2.4426097924549937, "grad_norm": 0.34105727076530457, "learning_rate": 8.783290268883859e-05, "loss": 1.9551, "step": 5327 }, { "epoch": 2.4430684554523565, "grad_norm": 0.2725188732147217, "learning_rate": 8.76927619749998e-05, "loss": 0.8592, "step": 5328 }, { "epoch": 2.443527118449719, "grad_norm": 0.36431559920310974, "learning_rate": 8.75527224030993e-05, "loss": 1.7697, "step": 5329 }, { "epoch": 2.443985781447082, "grad_norm": 0.38820746541023254, "learning_rate": 8.741278400749003e-05, "loss": 0.8046, "step": 5330 }, { "epoch": 2.4444444444444446, "grad_norm": 0.25825342535972595, "learning_rate": 8.72729468224997e-05, "loss": 1.3749, "step": 5331 }, { "epoch": 2.444903107441807, "grad_norm": 0.300819993019104, "learning_rate": 8.713321088243159e-05, "loss": 0.8929, "step": 5332 }, { "epoch": 2.44536177043917, "grad_norm": 0.0780341699719429, "learning_rate": 8.699357622156368e-05, "loss": 0.6362, "step": 5333 }, { "epoch": 2.4458204334365323, "grad_norm": 0.28823330998420715, "learning_rate": 8.685404287414939e-05, "loss": 1.5396, "step": 5334 }, { "epoch": 2.446279096433895, "grad_norm": 0.3040931522846222, "learning_rate": 8.671461087441735e-05, "loss": 0.73, "step": 5335 }, { "epoch": 2.446737759431258, "grad_norm": 0.4103958010673523, "learning_rate": 8.657528025657118e-05, "loss": 2.0576, "step": 5336 }, { "epoch": 2.4471964224286205, "grad_norm": 0.33495497703552246, "learning_rate": 8.643605105478986e-05, "loss": 0.8664, "step": 5337 }, { "epoch": 2.4476550854259833, "grad_norm": 0.2784813344478607, "learning_rate": 8.629692330322691e-05, "loss": 1.8088, "step": 5338 }, { "epoch": 2.4481137484233457, "grad_norm": 0.3490314185619354, "learning_rate": 8.615789703601179e-05, "loss": 1.2238, "step": 5339 }, { "epoch": 2.4485724114207086, "grad_norm": 0.2916855812072754, "learning_rate": 8.601897228724842e-05, "loss": 1.0304, "step": 5340 }, { "epoch": 2.4490310744180714, "grad_norm": 0.3161444365978241, "learning_rate": 8.588014909101616e-05, "loss": 1.833, "step": 5341 }, { "epoch": 2.449489737415434, "grad_norm": 0.3349079191684723, "learning_rate": 8.574142748136926e-05, "loss": 0.7148, "step": 5342 }, { "epoch": 2.4499484004127967, "grad_norm": 0.19072647392749786, "learning_rate": 8.560280749233729e-05, "loss": 0.9673, "step": 5343 }, { "epoch": 2.4504070634101596, "grad_norm": 0.09794793277978897, "learning_rate": 8.546428915792449e-05, "loss": 0.7698, "step": 5344 }, { "epoch": 2.450865726407522, "grad_norm": 0.209795743227005, "learning_rate": 8.53258725121105e-05, "loss": 1.0825, "step": 5345 }, { "epoch": 2.451324389404885, "grad_norm": 0.39282190799713135, "learning_rate": 8.518755758884988e-05, "loss": 2.1384, "step": 5346 }, { "epoch": 2.4517830524022477, "grad_norm": 0.3572220504283905, "learning_rate": 8.504934442207241e-05, "loss": 0.8121, "step": 5347 }, { "epoch": 2.45224171539961, "grad_norm": 0.20404981076717377, "learning_rate": 8.491123304568271e-05, "loss": 0.3737, "step": 5348 }, { "epoch": 2.452700378396973, "grad_norm": 0.1945522129535675, "learning_rate": 8.477322349356042e-05, "loss": 1.6915, "step": 5349 }, { "epoch": 2.4531590413943354, "grad_norm": 0.4479086101055145, "learning_rate": 8.46353157995604e-05, "loss": 2.0293, "step": 5350 }, { "epoch": 2.4536177043916982, "grad_norm": 0.3536776602268219, "learning_rate": 8.449750999751238e-05, "loss": 1.1595, "step": 5351 }, { "epoch": 2.454076367389061, "grad_norm": 0.23635387420654297, "learning_rate": 8.435980612122101e-05, "loss": 0.7237, "step": 5352 }, { "epoch": 2.4545350303864235, "grad_norm": 0.32154580950737, "learning_rate": 8.422220420446613e-05, "loss": 1.7249, "step": 5353 }, { "epoch": 2.4549936933837864, "grad_norm": 0.34665876626968384, "learning_rate": 8.408470428100262e-05, "loss": 1.2853, "step": 5354 }, { "epoch": 2.455452356381149, "grad_norm": 0.5096401572227478, "learning_rate": 8.394730638455994e-05, "loss": 0.9551, "step": 5355 }, { "epoch": 2.4559110193785116, "grad_norm": 0.28207308053970337, "learning_rate": 8.381001054884291e-05, "loss": 1.1176, "step": 5356 }, { "epoch": 2.4563696823758745, "grad_norm": 0.23310762643814087, "learning_rate": 8.367281680753114e-05, "loss": 0.9396, "step": 5357 }, { "epoch": 2.456828345373237, "grad_norm": 0.29276517033576965, "learning_rate": 8.353572519427932e-05, "loss": 1.1319, "step": 5358 }, { "epoch": 2.4572870083705998, "grad_norm": 0.18109527230262756, "learning_rate": 8.339873574271694e-05, "loss": 0.9314, "step": 5359 }, { "epoch": 2.457745671367962, "grad_norm": 0.26322728395462036, "learning_rate": 8.326184848644852e-05, "loss": 1.3008, "step": 5360 }, { "epoch": 2.458204334365325, "grad_norm": 0.39521169662475586, "learning_rate": 8.312506345905358e-05, "loss": 1.3013, "step": 5361 }, { "epoch": 2.458662997362688, "grad_norm": 0.2466443032026291, "learning_rate": 8.298838069408632e-05, "loss": 1.2078, "step": 5362 }, { "epoch": 2.4591216603600503, "grad_norm": 0.31641414761543274, "learning_rate": 8.28518002250761e-05, "loss": 1.4739, "step": 5363 }, { "epoch": 2.459580323357413, "grad_norm": 0.34621062874794006, "learning_rate": 8.271532208552712e-05, "loss": 1.1296, "step": 5364 }, { "epoch": 2.4600389863547756, "grad_norm": 0.2962487041950226, "learning_rate": 8.25789463089185e-05, "loss": 1.2483, "step": 5365 }, { "epoch": 2.4604976493521384, "grad_norm": 0.1835666447877884, "learning_rate": 8.2442672928704e-05, "loss": 0.8519, "step": 5366 }, { "epoch": 2.4609563123495013, "grad_norm": 0.290197491645813, "learning_rate": 8.230650197831252e-05, "loss": 0.886, "step": 5367 }, { "epoch": 2.4614149753468637, "grad_norm": 0.27010378241539, "learning_rate": 8.217043349114789e-05, "loss": 1.2361, "step": 5368 }, { "epoch": 2.4618736383442266, "grad_norm": 0.5618923306465149, "learning_rate": 8.203446750058862e-05, "loss": 2.2034, "step": 5369 }, { "epoch": 2.4623323013415894, "grad_norm": 0.3638753294944763, "learning_rate": 8.189860403998816e-05, "loss": 1.4188, "step": 5370 }, { "epoch": 2.462790964338952, "grad_norm": 0.22352086007595062, "learning_rate": 8.176284314267479e-05, "loss": 0.8657, "step": 5371 }, { "epoch": 2.4632496273363147, "grad_norm": 0.32366839051246643, "learning_rate": 8.162718484195169e-05, "loss": 1.392, "step": 5372 }, { "epoch": 2.4637082903336776, "grad_norm": 0.25489479303359985, "learning_rate": 8.14916291710967e-05, "loss": 1.4828, "step": 5373 }, { "epoch": 2.46416695333104, "grad_norm": 0.34520161151885986, "learning_rate": 8.135617616336272e-05, "loss": 1.5386, "step": 5374 }, { "epoch": 2.464625616328403, "grad_norm": 0.4345172643661499, "learning_rate": 8.122082585197732e-05, "loss": 0.7834, "step": 5375 }, { "epoch": 2.4650842793257652, "grad_norm": 0.24464713037014008, "learning_rate": 8.108557827014295e-05, "loss": 0.9882, "step": 5376 }, { "epoch": 2.465542942323128, "grad_norm": 0.2222648561000824, "learning_rate": 8.09504334510367e-05, "loss": 1.5798, "step": 5377 }, { "epoch": 2.466001605320491, "grad_norm": 0.2998507618904114, "learning_rate": 8.081539142781058e-05, "loss": 1.2544, "step": 5378 }, { "epoch": 2.4664602683178534, "grad_norm": 0.3242281973361969, "learning_rate": 8.068045223359144e-05, "loss": 0.8808, "step": 5379 }, { "epoch": 2.4669189313152162, "grad_norm": 0.30032265186309814, "learning_rate": 8.054561590148085e-05, "loss": 1.9047, "step": 5380 }, { "epoch": 2.4673775943125786, "grad_norm": 0.3668994903564453, "learning_rate": 8.041088246455492e-05, "loss": 0.7418, "step": 5381 }, { "epoch": 2.4678362573099415, "grad_norm": 0.18076832592487335, "learning_rate": 8.027625195586519e-05, "loss": 1.2765, "step": 5382 }, { "epoch": 2.4682949203073044, "grad_norm": 0.32863399386405945, "learning_rate": 8.014172440843714e-05, "loss": 1.4399, "step": 5383 }, { "epoch": 2.4687535833046668, "grad_norm": 0.27445659041404724, "learning_rate": 8.000729985527139e-05, "loss": 0.8303, "step": 5384 }, { "epoch": 2.4692122463020296, "grad_norm": 0.23894087970256805, "learning_rate": 7.987297832934326e-05, "loss": 1.2864, "step": 5385 }, { "epoch": 2.469670909299392, "grad_norm": 0.2814047336578369, "learning_rate": 7.973875986360285e-05, "loss": 1.5797, "step": 5386 }, { "epoch": 2.470129572296755, "grad_norm": 0.4583660364151001, "learning_rate": 7.960464449097498e-05, "loss": 1.522, "step": 5387 }, { "epoch": 2.4705882352941178, "grad_norm": 0.3099602460861206, "learning_rate": 7.947063224435897e-05, "loss": 1.2623, "step": 5388 }, { "epoch": 2.47104689829148, "grad_norm": 0.2514623701572418, "learning_rate": 7.933672315662898e-05, "loss": 1.2364, "step": 5389 }, { "epoch": 2.471505561288843, "grad_norm": 0.27281129360198975, "learning_rate": 7.920291726063395e-05, "loss": 1.3347, "step": 5390 }, { "epoch": 2.471964224286206, "grad_norm": 0.2575758099555969, "learning_rate": 7.906921458919731e-05, "loss": 1.0359, "step": 5391 }, { "epoch": 2.4724228872835683, "grad_norm": 0.3928264081478119, "learning_rate": 7.893561517511754e-05, "loss": 1.5787, "step": 5392 }, { "epoch": 2.472881550280931, "grad_norm": 0.2101087123155594, "learning_rate": 7.880211905116747e-05, "loss": 0.6325, "step": 5393 }, { "epoch": 2.473340213278294, "grad_norm": 0.22780464589595795, "learning_rate": 7.866872625009453e-05, "loss": 1.2313, "step": 5394 }, { "epoch": 2.4737988762756564, "grad_norm": 0.19158299267292023, "learning_rate": 7.853543680462094e-05, "loss": 0.5635, "step": 5395 }, { "epoch": 2.4742575392730193, "grad_norm": 0.14477090537548065, "learning_rate": 7.840225074744367e-05, "loss": 1.1157, "step": 5396 }, { "epoch": 2.4747162022703817, "grad_norm": 0.34960320591926575, "learning_rate": 7.826916811123414e-05, "loss": 1.5063, "step": 5397 }, { "epoch": 2.4751748652677446, "grad_norm": 0.510556161403656, "learning_rate": 7.813618892863849e-05, "loss": 1.4665, "step": 5398 }, { "epoch": 2.4756335282651074, "grad_norm": 0.31196311116218567, "learning_rate": 7.80033132322776e-05, "loss": 1.184, "step": 5399 }, { "epoch": 2.47609219126247, "grad_norm": 0.3243480920791626, "learning_rate": 7.787054105474667e-05, "loss": 1.3827, "step": 5400 }, { "epoch": 2.4765508542598327, "grad_norm": 0.21474260091781616, "learning_rate": 7.773787242861557e-05, "loss": 0.7858, "step": 5401 }, { "epoch": 2.477009517257195, "grad_norm": 0.07359279692173004, "learning_rate": 7.760530738642918e-05, "loss": 0.9702, "step": 5402 }, { "epoch": 2.477468180254558, "grad_norm": 0.34640324115753174, "learning_rate": 7.747284596070647e-05, "loss": 1.6594, "step": 5403 }, { "epoch": 2.477926843251921, "grad_norm": 0.217728853225708, "learning_rate": 7.734048818394141e-05, "loss": 1.1982, "step": 5404 }, { "epoch": 2.478385506249283, "grad_norm": 0.3077888488769531, "learning_rate": 7.720823408860195e-05, "loss": 1.1781, "step": 5405 }, { "epoch": 2.478844169246646, "grad_norm": 0.39549943804740906, "learning_rate": 7.707608370713116e-05, "loss": 1.9365, "step": 5406 }, { "epoch": 2.4793028322440085, "grad_norm": 0.3485013246536255, "learning_rate": 7.694403707194647e-05, "loss": 1.3334, "step": 5407 }, { "epoch": 2.4797614952413713, "grad_norm": 0.290386438369751, "learning_rate": 7.681209421543994e-05, "loss": 1.2493, "step": 5408 }, { "epoch": 2.480220158238734, "grad_norm": 0.3037647306919098, "learning_rate": 7.668025516997795e-05, "loss": 1.694, "step": 5409 }, { "epoch": 2.4806788212360966, "grad_norm": 0.4137989282608032, "learning_rate": 7.654851996790174e-05, "loss": 2.085, "step": 5410 }, { "epoch": 2.4811374842334595, "grad_norm": 0.3738190531730652, "learning_rate": 7.64168886415268e-05, "loss": 1.1558, "step": 5411 }, { "epoch": 2.4815961472308223, "grad_norm": 0.3116551339626312, "learning_rate": 7.628536122314328e-05, "loss": 1.9504, "step": 5412 }, { "epoch": 2.4820548102281847, "grad_norm": 0.30240756273269653, "learning_rate": 7.615393774501578e-05, "loss": 0.7979, "step": 5413 }, { "epoch": 2.4825134732255476, "grad_norm": 0.29710403084754944, "learning_rate": 7.602261823938339e-05, "loss": 1.5649, "step": 5414 }, { "epoch": 2.4829721362229105, "grad_norm": 0.3587922155857086, "learning_rate": 7.589140273845995e-05, "loss": 1.4416, "step": 5415 }, { "epoch": 2.483430799220273, "grad_norm": 0.3009377121925354, "learning_rate": 7.576029127443329e-05, "loss": 1.2025, "step": 5416 }, { "epoch": 2.4838894622176357, "grad_norm": 0.3110191226005554, "learning_rate": 7.562928387946611e-05, "loss": 1.5738, "step": 5417 }, { "epoch": 2.484348125214998, "grad_norm": 0.5595056414604187, "learning_rate": 7.549838058569542e-05, "loss": 1.5994, "step": 5418 }, { "epoch": 2.484806788212361, "grad_norm": 0.37332621216773987, "learning_rate": 7.536758142523281e-05, "loss": 1.8284, "step": 5419 }, { "epoch": 2.485265451209724, "grad_norm": 0.34219086170196533, "learning_rate": 7.523688643016424e-05, "loss": 0.7883, "step": 5420 }, { "epoch": 2.4857241142070863, "grad_norm": 0.2129797488451004, "learning_rate": 7.510629563255017e-05, "loss": 1.3984, "step": 5421 }, { "epoch": 2.486182777204449, "grad_norm": 0.2923283874988556, "learning_rate": 7.497580906442537e-05, "loss": 1.4247, "step": 5422 }, { "epoch": 2.4866414402018115, "grad_norm": 0.3322497606277466, "learning_rate": 7.48454267577992e-05, "loss": 1.6427, "step": 5423 }, { "epoch": 2.4871001031991744, "grad_norm": 0.3204174041748047, "learning_rate": 7.471514874465535e-05, "loss": 0.9109, "step": 5424 }, { "epoch": 2.4875587661965373, "grad_norm": 0.23341026902198792, "learning_rate": 7.4584975056952e-05, "loss": 0.7723, "step": 5425 }, { "epoch": 2.4880174291938997, "grad_norm": 0.29962819814682007, "learning_rate": 7.445490572662168e-05, "loss": 1.5294, "step": 5426 }, { "epoch": 2.4884760921912625, "grad_norm": 0.30557799339294434, "learning_rate": 7.432494078557145e-05, "loss": 0.6204, "step": 5427 }, { "epoch": 2.488934755188625, "grad_norm": 0.14737457036972046, "learning_rate": 7.41950802656824e-05, "loss": 1.0, "step": 5428 }, { "epoch": 2.489393418185988, "grad_norm": 0.29757335782051086, "learning_rate": 7.406532419881035e-05, "loss": 1.3304, "step": 5429 }, { "epoch": 2.4898520811833507, "grad_norm": 0.3414275348186493, "learning_rate": 7.39356726167854e-05, "loss": 1.3593, "step": 5430 }, { "epoch": 2.490310744180713, "grad_norm": 0.1296243965625763, "learning_rate": 7.380612555141209e-05, "loss": 0.3876, "step": 5431 }, { "epoch": 2.490769407178076, "grad_norm": 0.2734202444553375, "learning_rate": 7.367668303446917e-05, "loss": 1.3786, "step": 5432 }, { "epoch": 2.4912280701754383, "grad_norm": 0.35316675901412964, "learning_rate": 7.354734509770983e-05, "loss": 1.3578, "step": 5433 }, { "epoch": 2.491686733172801, "grad_norm": 0.33932507038116455, "learning_rate": 7.341811177286167e-05, "loss": 1.4916, "step": 5434 }, { "epoch": 2.492145396170164, "grad_norm": 0.535605788230896, "learning_rate": 7.328898309162652e-05, "loss": 0.992, "step": 5435 }, { "epoch": 2.4926040591675265, "grad_norm": 0.31163036823272705, "learning_rate": 7.315995908568051e-05, "loss": 1.804, "step": 5436 }, { "epoch": 2.4930627221648893, "grad_norm": 0.29368847608566284, "learning_rate": 7.303103978667425e-05, "loss": 1.1411, "step": 5437 }, { "epoch": 2.493521385162252, "grad_norm": 0.40923842787742615, "learning_rate": 7.290222522623263e-05, "loss": 1.6904, "step": 5438 }, { "epoch": 2.4939800481596146, "grad_norm": 0.18885545432567596, "learning_rate": 7.277351543595457e-05, "loss": 0.9873, "step": 5439 }, { "epoch": 2.4944387111569775, "grad_norm": 0.31630420684814453, "learning_rate": 7.264491044741367e-05, "loss": 0.9632, "step": 5440 }, { "epoch": 2.4948973741543403, "grad_norm": 0.0943269282579422, "learning_rate": 7.251641029215761e-05, "loss": 0.6806, "step": 5441 }, { "epoch": 2.4953560371517027, "grad_norm": 0.38258495926856995, "learning_rate": 7.238801500170838e-05, "loss": 1.7886, "step": 5442 }, { "epoch": 2.4958147001490656, "grad_norm": 0.2949416935443878, "learning_rate": 7.225972460756236e-05, "loss": 1.0203, "step": 5443 }, { "epoch": 2.496273363146428, "grad_norm": 0.3290545344352722, "learning_rate": 7.213153914119008e-05, "loss": 1.5944, "step": 5444 }, { "epoch": 2.496732026143791, "grad_norm": 0.19472959637641907, "learning_rate": 7.200345863403629e-05, "loss": 1.22, "step": 5445 }, { "epoch": 2.4971906891411537, "grad_norm": 0.27619174122810364, "learning_rate": 7.187548311752012e-05, "loss": 1.4291, "step": 5446 }, { "epoch": 2.497649352138516, "grad_norm": 0.3692777156829834, "learning_rate": 7.174761262303492e-05, "loss": 1.534, "step": 5447 }, { "epoch": 2.498108015135879, "grad_norm": 0.4094838798046112, "learning_rate": 7.161984718194819e-05, "loss": 1.1218, "step": 5448 }, { "epoch": 2.4985666781332414, "grad_norm": 0.4053116738796234, "learning_rate": 7.149218682560182e-05, "loss": 1.2167, "step": 5449 }, { "epoch": 2.4990253411306043, "grad_norm": 0.32053858041763306, "learning_rate": 7.136463158531159e-05, "loss": 1.58, "step": 5450 }, { "epoch": 2.499484004127967, "grad_norm": 0.287165105342865, "learning_rate": 7.123718149236791e-05, "loss": 1.4514, "step": 5451 }, { "epoch": 2.4999426671253295, "grad_norm": 0.2848725914955139, "learning_rate": 7.11098365780351e-05, "loss": 1.2621, "step": 5452 }, { "epoch": 2.5004013301226924, "grad_norm": 0.398199200630188, "learning_rate": 7.098259687355174e-05, "loss": 1.1001, "step": 5453 }, { "epoch": 2.500859993120055, "grad_norm": 0.29740533232688904, "learning_rate": 7.085546241013085e-05, "loss": 1.9265, "step": 5454 }, { "epoch": 2.5013186561174177, "grad_norm": 0.25716161727905273, "learning_rate": 7.07284332189595e-05, "loss": 0.3225, "step": 5455 }, { "epoch": 2.5017773191147805, "grad_norm": 0.19730542600154877, "learning_rate": 7.060150933119852e-05, "loss": 1.6379, "step": 5456 }, { "epoch": 2.5022359821121434, "grad_norm": 0.3309273421764374, "learning_rate": 7.04746907779834e-05, "loss": 1.517, "step": 5457 }, { "epoch": 2.502694645109506, "grad_norm": 0.32735908031463623, "learning_rate": 7.034797759042371e-05, "loss": 1.1667, "step": 5458 }, { "epoch": 2.503153308106868, "grad_norm": 0.3031359910964966, "learning_rate": 7.022136979960303e-05, "loss": 1.3101, "step": 5459 }, { "epoch": 2.503611971104231, "grad_norm": 0.18799692392349243, "learning_rate": 7.009486743657934e-05, "loss": 0.7399, "step": 5460 }, { "epoch": 2.504070634101594, "grad_norm": 0.3134501576423645, "learning_rate": 6.996847053238437e-05, "loss": 1.1102, "step": 5461 }, { "epoch": 2.5045292970989568, "grad_norm": 0.2856585681438446, "learning_rate": 6.98421791180242e-05, "loss": 1.4576, "step": 5462 }, { "epoch": 2.504987960096319, "grad_norm": 0.34674420952796936, "learning_rate": 6.971599322447903e-05, "loss": 1.7148, "step": 5463 }, { "epoch": 2.505446623093682, "grad_norm": 0.3734150528907776, "learning_rate": 6.958991288270334e-05, "loss": 0.9374, "step": 5464 }, { "epoch": 2.5059052860910445, "grad_norm": 0.27259308099746704, "learning_rate": 6.94639381236254e-05, "loss": 1.8166, "step": 5465 }, { "epoch": 2.5063639490884073, "grad_norm": 0.3674778640270233, "learning_rate": 6.933806897814787e-05, "loss": 1.4604, "step": 5466 }, { "epoch": 2.50682261208577, "grad_norm": 0.45165976881980896, "learning_rate": 6.921230547714719e-05, "loss": 1.4612, "step": 5467 }, { "epoch": 2.5072812750831326, "grad_norm": 0.2723102271556854, "learning_rate": 6.90866476514741e-05, "loss": 0.8123, "step": 5468 }, { "epoch": 2.5077399380804954, "grad_norm": 0.27866679430007935, "learning_rate": 6.896109553195334e-05, "loss": 1.6746, "step": 5469 }, { "epoch": 2.508198601077858, "grad_norm": 0.4871840476989746, "learning_rate": 6.883564914938384e-05, "loss": 2.0107, "step": 5470 }, { "epoch": 2.5086572640752207, "grad_norm": 0.27734771370887756, "learning_rate": 6.871030853453857e-05, "loss": 0.4191, "step": 5471 }, { "epoch": 2.5091159270725836, "grad_norm": 0.3640304207801819, "learning_rate": 6.858507371816424e-05, "loss": 1.7487, "step": 5472 }, { "epoch": 2.509574590069946, "grad_norm": 0.42865926027297974, "learning_rate": 6.845994473098194e-05, "loss": 1.7012, "step": 5473 }, { "epoch": 2.510033253067309, "grad_norm": 0.27769333124160767, "learning_rate": 6.833492160368681e-05, "loss": 0.8013, "step": 5474 }, { "epoch": 2.5104919160646713, "grad_norm": 0.08679754287004471, "learning_rate": 6.821000436694791e-05, "loss": 1.0261, "step": 5475 }, { "epoch": 2.510950579062034, "grad_norm": 0.41627833247184753, "learning_rate": 6.808519305140831e-05, "loss": 1.1582, "step": 5476 }, { "epoch": 2.511409242059397, "grad_norm": 0.19535160064697266, "learning_rate": 6.79604876876852e-05, "loss": 1.0494, "step": 5477 }, { "epoch": 2.5118679050567594, "grad_norm": 0.31459343433380127, "learning_rate": 6.783588830636956e-05, "loss": 1.9631, "step": 5478 }, { "epoch": 2.5123265680541222, "grad_norm": 0.3410641551017761, "learning_rate": 6.771139493802653e-05, "loss": 1.2182, "step": 5479 }, { "epoch": 2.5127852310514847, "grad_norm": 0.3488866090774536, "learning_rate": 6.75870076131953e-05, "loss": 1.1641, "step": 5480 }, { "epoch": 2.5132438940488475, "grad_norm": 0.34782347083091736, "learning_rate": 6.746272636238898e-05, "loss": 1.5015, "step": 5481 }, { "epoch": 2.5137025570462104, "grad_norm": 0.41583070158958435, "learning_rate": 6.733855121609467e-05, "loss": 1.5594, "step": 5482 }, { "epoch": 2.5141612200435732, "grad_norm": 0.3468913435935974, "learning_rate": 6.721448220477333e-05, "loss": 1.8156, "step": 5483 }, { "epoch": 2.5146198830409356, "grad_norm": 0.324745237827301, "learning_rate": 6.709051935886007e-05, "loss": 1.3654, "step": 5484 }, { "epoch": 2.5150785460382985, "grad_norm": 0.31781327724456787, "learning_rate": 6.696666270876389e-05, "loss": 1.3649, "step": 5485 }, { "epoch": 2.515537209035661, "grad_norm": 0.3324166238307953, "learning_rate": 6.684291228486761e-05, "loss": 0.8118, "step": 5486 }, { "epoch": 2.5159958720330238, "grad_norm": 0.3521929085254669, "learning_rate": 6.67192681175282e-05, "loss": 2.1743, "step": 5487 }, { "epoch": 2.5164545350303866, "grad_norm": 0.43346625566482544, "learning_rate": 6.65957302370766e-05, "loss": 1.2647, "step": 5488 }, { "epoch": 2.516913198027749, "grad_norm": 0.10324136167764664, "learning_rate": 6.647229867381722e-05, "loss": 0.815, "step": 5489 }, { "epoch": 2.517371861025112, "grad_norm": 0.23839758336544037, "learning_rate": 6.634897345802888e-05, "loss": 1.7404, "step": 5490 }, { "epoch": 2.5178305240224743, "grad_norm": 0.39846575260162354, "learning_rate": 6.62257546199641e-05, "loss": 0.859, "step": 5491 }, { "epoch": 2.518289187019837, "grad_norm": 0.2599179148674011, "learning_rate": 6.610264218984946e-05, "loss": 1.4614, "step": 5492 }, { "epoch": 2.5187478500172, "grad_norm": 0.3531731963157654, "learning_rate": 6.597963619788521e-05, "loss": 1.4722, "step": 5493 }, { "epoch": 2.5192065130145624, "grad_norm": 0.28518784046173096, "learning_rate": 6.585673667424563e-05, "loss": 0.9644, "step": 5494 }, { "epoch": 2.5196651760119253, "grad_norm": 0.1243680790066719, "learning_rate": 6.573394364907892e-05, "loss": 0.4391, "step": 5495 }, { "epoch": 2.5201238390092877, "grad_norm": 0.11469584703445435, "learning_rate": 6.561125715250704e-05, "loss": 1.0112, "step": 5496 }, { "epoch": 2.5205825020066506, "grad_norm": 0.3533564507961273, "learning_rate": 6.548867721462587e-05, "loss": 1.1036, "step": 5497 }, { "epoch": 2.5210411650040134, "grad_norm": 0.23048144578933716, "learning_rate": 6.536620386550518e-05, "loss": 1.0601, "step": 5498 }, { "epoch": 2.521499828001376, "grad_norm": 0.20298047363758087, "learning_rate": 6.524383713518867e-05, "loss": 0.7747, "step": 5499 }, { "epoch": 2.5219584909987387, "grad_norm": 0.3512571156024933, "learning_rate": 6.512157705369354e-05, "loss": 1.4583, "step": 5500 }, { "epoch": 2.522417153996101, "grad_norm": 0.3375270962715149, "learning_rate": 6.49994236510112e-05, "loss": 1.4264, "step": 5501 }, { "epoch": 2.522875816993464, "grad_norm": 0.27176031470298767, "learning_rate": 6.487737695710677e-05, "loss": 1.6927, "step": 5502 }, { "epoch": 2.523334479990827, "grad_norm": 0.3321675956249237, "learning_rate": 6.475543700191916e-05, "loss": 0.9206, "step": 5503 }, { "epoch": 2.5237931429881897, "grad_norm": 0.19732095301151276, "learning_rate": 6.46336038153611e-05, "loss": 1.333, "step": 5504 }, { "epoch": 2.524251805985552, "grad_norm": 0.3470333218574524, "learning_rate": 6.451187742731923e-05, "loss": 1.6497, "step": 5505 }, { "epoch": 2.524710468982915, "grad_norm": 0.3208187222480774, "learning_rate": 6.43902578676538e-05, "loss": 1.3061, "step": 5506 }, { "epoch": 2.5251691319802774, "grad_norm": 0.353569358587265, "learning_rate": 6.426874516619907e-05, "loss": 2.2356, "step": 5507 }, { "epoch": 2.5256277949776402, "grad_norm": 0.3554728031158447, "learning_rate": 6.41473393527629e-05, "loss": 1.2403, "step": 5508 }, { "epoch": 2.526086457975003, "grad_norm": 0.3198685050010681, "learning_rate": 6.402604045712707e-05, "loss": 0.8093, "step": 5509 }, { "epoch": 2.5265451209723655, "grad_norm": 0.20116372406482697, "learning_rate": 6.39048485090471e-05, "loss": 1.1317, "step": 5510 }, { "epoch": 2.5270037839697284, "grad_norm": 0.33361703157424927, "learning_rate": 6.378376353825222e-05, "loss": 2.0852, "step": 5511 }, { "epoch": 2.5274624469670908, "grad_norm": 0.3906664252281189, "learning_rate": 6.366278557444538e-05, "loss": 1.202, "step": 5512 }, { "epoch": 2.5279211099644536, "grad_norm": 0.2128971815109253, "learning_rate": 6.354191464730335e-05, "loss": 0.8657, "step": 5513 }, { "epoch": 2.5283797729618165, "grad_norm": 0.2909519672393799, "learning_rate": 6.34211507864767e-05, "loss": 1.7382, "step": 5514 }, { "epoch": 2.528838435959179, "grad_norm": 0.4050029218196869, "learning_rate": 6.330049402158955e-05, "loss": 1.5528, "step": 5515 }, { "epoch": 2.5292970989565418, "grad_norm": 0.27755218744277954, "learning_rate": 6.317994438224023e-05, "loss": 1.2415, "step": 5516 }, { "epoch": 2.529755761953904, "grad_norm": 0.3861069083213806, "learning_rate": 6.305950189800003e-05, "loss": 2.1422, "step": 5517 }, { "epoch": 2.530214424951267, "grad_norm": 0.5897046327590942, "learning_rate": 6.293916659841454e-05, "loss": 2.0242, "step": 5518 }, { "epoch": 2.53067308794863, "grad_norm": 0.27490806579589844, "learning_rate": 6.281893851300285e-05, "loss": 0.8, "step": 5519 }, { "epoch": 2.5311317509459923, "grad_norm": 0.35904571413993835, "learning_rate": 6.269881767125778e-05, "loss": 1.6329, "step": 5520 }, { "epoch": 2.531590413943355, "grad_norm": 0.3965526819229126, "learning_rate": 6.257880410264589e-05, "loss": 1.6531, "step": 5521 }, { "epoch": 2.5320490769407176, "grad_norm": 0.3628177046775818, "learning_rate": 6.245889783660735e-05, "loss": 1.5156, "step": 5522 }, { "epoch": 2.5325077399380804, "grad_norm": 0.19028045237064362, "learning_rate": 6.233909890255596e-05, "loss": 0.7489, "step": 5523 }, { "epoch": 2.5329664029354433, "grad_norm": 0.30474984645843506, "learning_rate": 6.22194073298793e-05, "loss": 1.6149, "step": 5524 }, { "epoch": 2.533425065932806, "grad_norm": 0.3913409411907196, "learning_rate": 6.209982314793845e-05, "loss": 1.6556, "step": 5525 }, { "epoch": 2.5338837289301686, "grad_norm": 0.3876248896121979, "learning_rate": 6.198034638606854e-05, "loss": 1.5082, "step": 5526 }, { "epoch": 2.5343423919275314, "grad_norm": 0.324945867061615, "learning_rate": 6.186097707357802e-05, "loss": 1.6067, "step": 5527 }, { "epoch": 2.534801054924894, "grad_norm": 0.26667168736457825, "learning_rate": 6.174171523974886e-05, "loss": 1.201, "step": 5528 }, { "epoch": 2.5352597179222567, "grad_norm": 0.2764081656932831, "learning_rate": 6.1622560913837e-05, "loss": 1.2445, "step": 5529 }, { "epoch": 2.5357183809196195, "grad_norm": 0.39166367053985596, "learning_rate": 6.150351412507176e-05, "loss": 1.3779, "step": 5530 }, { "epoch": 2.536177043916982, "grad_norm": 0.2029440701007843, "learning_rate": 6.138457490265625e-05, "loss": 1.1893, "step": 5531 }, { "epoch": 2.536635706914345, "grad_norm": 0.3429381847381592, "learning_rate": 6.126574327576711e-05, "loss": 1.2501, "step": 5532 }, { "epoch": 2.5370943699117072, "grad_norm": 0.41793501377105713, "learning_rate": 6.11470192735547e-05, "loss": 1.7427, "step": 5533 }, { "epoch": 2.53755303290907, "grad_norm": 0.3555110692977905, "learning_rate": 6.1028402925142665e-05, "loss": 1.5948, "step": 5534 }, { "epoch": 2.538011695906433, "grad_norm": 0.26082339882850647, "learning_rate": 6.090989425962839e-05, "loss": 1.725, "step": 5535 }, { "epoch": 2.5384703589037954, "grad_norm": 0.3338443338871002, "learning_rate": 6.079149330608319e-05, "loss": 1.4976, "step": 5536 }, { "epoch": 2.538929021901158, "grad_norm": 0.35809195041656494, "learning_rate": 6.067320009355148e-05, "loss": 1.532, "step": 5537 }, { "epoch": 2.5393876848985206, "grad_norm": 0.29855048656463623, "learning_rate": 6.055501465105168e-05, "loss": 1.3347, "step": 5538 }, { "epoch": 2.5398463478958835, "grad_norm": 0.3272113800048828, "learning_rate": 6.043693700757519e-05, "loss": 0.8456, "step": 5539 }, { "epoch": 2.5403050108932463, "grad_norm": 0.06610861420631409, "learning_rate": 6.0318967192087516e-05, "loss": 1.141, "step": 5540 }, { "epoch": 2.5407636738906088, "grad_norm": 0.3634183406829834, "learning_rate": 6.0201105233527453e-05, "loss": 0.9582, "step": 5541 }, { "epoch": 2.5412223368879716, "grad_norm": 0.13566255569458008, "learning_rate": 6.008335116080732e-05, "loss": 1.0286, "step": 5542 }, { "epoch": 2.541680999885334, "grad_norm": 0.33389878273010254, "learning_rate": 5.9965705002813174e-05, "loss": 1.2472, "step": 5543 }, { "epoch": 2.542139662882697, "grad_norm": 0.3005581796169281, "learning_rate": 5.984816678840449e-05, "loss": 1.3406, "step": 5544 }, { "epoch": 2.5425983258800597, "grad_norm": 0.44604259729385376, "learning_rate": 5.973073654641392e-05, "loss": 1.6216, "step": 5545 }, { "epoch": 2.543056988877422, "grad_norm": 0.365333616733551, "learning_rate": 5.9613414305648314e-05, "loss": 1.1402, "step": 5546 }, { "epoch": 2.543515651874785, "grad_norm": 0.32972466945648193, "learning_rate": 5.9496200094887546e-05, "loss": 1.101, "step": 5547 }, { "epoch": 2.5439743148721474, "grad_norm": 0.24824364483356476, "learning_rate": 5.937909394288504e-05, "loss": 1.7358, "step": 5548 }, { "epoch": 2.5444329778695103, "grad_norm": 0.433464378118515, "learning_rate": 5.92620958783679e-05, "loss": 1.2081, "step": 5549 }, { "epoch": 2.544891640866873, "grad_norm": 0.07062580436468124, "learning_rate": 5.9145205930036583e-05, "loss": 0.7539, "step": 5550 }, { "epoch": 2.545350303864236, "grad_norm": 0.3250384032726288, "learning_rate": 5.9028424126564894e-05, "loss": 0.9884, "step": 5551 }, { "epoch": 2.5458089668615984, "grad_norm": 0.3027406930923462, "learning_rate": 5.89117504966003e-05, "loss": 1.7088, "step": 5552 }, { "epoch": 2.5462676298589613, "grad_norm": 0.23532333970069885, "learning_rate": 5.879518506876375e-05, "loss": 0.5515, "step": 5553 }, { "epoch": 2.5467262928563237, "grad_norm": 0.25035008788108826, "learning_rate": 5.867872787164952e-05, "loss": 1.5544, "step": 5554 }, { "epoch": 2.5471849558536865, "grad_norm": 0.38063761591911316, "learning_rate": 5.8562378933825486e-05, "loss": 1.6796, "step": 5555 }, { "epoch": 2.5476436188510494, "grad_norm": 0.35923126339912415, "learning_rate": 5.8446138283832826e-05, "loss": 1.0818, "step": 5556 }, { "epoch": 2.548102281848412, "grad_norm": 0.19541509449481964, "learning_rate": 5.833000595018617e-05, "loss": 1.6249, "step": 5557 }, { "epoch": 2.5485609448457747, "grad_norm": 0.38436034321784973, "learning_rate": 5.821398196137368e-05, "loss": 1.3776, "step": 5558 }, { "epoch": 2.549019607843137, "grad_norm": 0.2822701930999756, "learning_rate": 5.8098066345856846e-05, "loss": 0.4237, "step": 5559 }, { "epoch": 2.5494782708405, "grad_norm": 0.15349756181240082, "learning_rate": 5.798225913207067e-05, "loss": 0.5825, "step": 5560 }, { "epoch": 2.549936933837863, "grad_norm": 0.13313986361026764, "learning_rate": 5.786656034842347e-05, "loss": 1.3864, "step": 5561 }, { "epoch": 2.550395596835225, "grad_norm": 0.45757579803466797, "learning_rate": 5.775097002329693e-05, "loss": 0.76, "step": 5562 }, { "epoch": 2.550854259832588, "grad_norm": 0.1999332308769226, "learning_rate": 5.763548818504616e-05, "loss": 1.2987, "step": 5563 }, { "epoch": 2.5513129228299505, "grad_norm": 0.30660200119018555, "learning_rate": 5.752011486199982e-05, "loss": 1.3302, "step": 5564 }, { "epoch": 2.5517715858273133, "grad_norm": 0.27582883834838867, "learning_rate": 5.740485008245966e-05, "loss": 1.0434, "step": 5565 }, { "epoch": 2.552230248824676, "grad_norm": 0.24701207876205444, "learning_rate": 5.728969387470112e-05, "loss": 0.4937, "step": 5566 }, { "epoch": 2.5526889118220386, "grad_norm": 0.25263074040412903, "learning_rate": 5.7174646266972776e-05, "loss": 1.2341, "step": 5567 }, { "epoch": 2.5531475748194015, "grad_norm": 0.3954515755176544, "learning_rate": 5.70597072874966e-05, "loss": 1.4972, "step": 5568 }, { "epoch": 2.553606237816764, "grad_norm": 0.4183090031147003, "learning_rate": 5.694487696446804e-05, "loss": 2.063, "step": 5569 }, { "epoch": 2.5540649008141267, "grad_norm": 0.3172152638435364, "learning_rate": 5.683015532605573e-05, "loss": 0.9099, "step": 5570 }, { "epoch": 2.5545235638114896, "grad_norm": 0.3616786301136017, "learning_rate": 5.671554240040172e-05, "loss": 1.481, "step": 5571 }, { "epoch": 2.5549822268088525, "grad_norm": 0.30609598755836487, "learning_rate": 5.660103821562151e-05, "loss": 1.6557, "step": 5572 }, { "epoch": 2.555440889806215, "grad_norm": 0.4670886993408203, "learning_rate": 5.648664279980365e-05, "loss": 1.2957, "step": 5573 }, { "epoch": 2.5558995528035777, "grad_norm": 0.2856581211090088, "learning_rate": 5.6372356181010164e-05, "loss": 1.1547, "step": 5574 }, { "epoch": 2.55635821580094, "grad_norm": 0.21106205880641937, "learning_rate": 5.625817838727643e-05, "loss": 1.188, "step": 5575 }, { "epoch": 2.556816878798303, "grad_norm": 0.1674429029226303, "learning_rate": 5.614410944661108e-05, "loss": 0.5747, "step": 5576 }, { "epoch": 2.557275541795666, "grad_norm": 0.15634135901927948, "learning_rate": 5.603014938699602e-05, "loss": 1.5959, "step": 5577 }, { "epoch": 2.5577342047930283, "grad_norm": 0.2876671552658081, "learning_rate": 5.591629823638655e-05, "loss": 0.8638, "step": 5578 }, { "epoch": 2.558192867790391, "grad_norm": 0.3428886830806732, "learning_rate": 5.5802556022711115e-05, "loss": 1.4761, "step": 5579 }, { "epoch": 2.5586515307877535, "grad_norm": 0.33712685108184814, "learning_rate": 5.5688922773871555e-05, "loss": 1.5991, "step": 5580 }, { "epoch": 2.5591101937851164, "grad_norm": 0.3278370797634125, "learning_rate": 5.557539851774285e-05, "loss": 1.3031, "step": 5581 }, { "epoch": 2.5595688567824793, "grad_norm": 0.30777499079704285, "learning_rate": 5.546198328217333e-05, "loss": 1.088, "step": 5582 }, { "epoch": 2.5600275197798417, "grad_norm": 0.16568246483802795, "learning_rate": 5.5348677094984755e-05, "loss": 0.9488, "step": 5583 }, { "epoch": 2.5604861827772045, "grad_norm": 0.3086935877799988, "learning_rate": 5.523547998397166e-05, "loss": 1.6018, "step": 5584 }, { "epoch": 2.560944845774567, "grad_norm": 0.3688352406024933, "learning_rate": 5.512239197690222e-05, "loss": 1.5702, "step": 5585 }, { "epoch": 2.56140350877193, "grad_norm": 0.34201377630233765, "learning_rate": 5.5009413101517804e-05, "loss": 1.51, "step": 5586 }, { "epoch": 2.5618621717692927, "grad_norm": 0.5058449506759644, "learning_rate": 5.4896543385532904e-05, "loss": 2.0178, "step": 5587 }, { "epoch": 2.562320834766655, "grad_norm": 0.34568533301353455, "learning_rate": 5.4783782856635156e-05, "loss": 1.5913, "step": 5588 }, { "epoch": 2.562779497764018, "grad_norm": 0.27001887559890747, "learning_rate": 5.467113154248582e-05, "loss": 0.7074, "step": 5589 }, { "epoch": 2.5632381607613803, "grad_norm": 0.3905376195907593, "learning_rate": 5.455858947071885e-05, "loss": 2.2896, "step": 5590 }, { "epoch": 2.563696823758743, "grad_norm": 0.34636062383651733, "learning_rate": 5.444615666894165e-05, "loss": 1.4116, "step": 5591 }, { "epoch": 2.564155486756106, "grad_norm": 0.3349077105522156, "learning_rate": 5.433383316473484e-05, "loss": 0.8179, "step": 5592 }, { "epoch": 2.564614149753469, "grad_norm": 0.20421668887138367, "learning_rate": 5.42216189856522e-05, "loss": 1.3837, "step": 5593 }, { "epoch": 2.5650728127508313, "grad_norm": 0.36774739623069763, "learning_rate": 5.410951415922072e-05, "loss": 1.0772, "step": 5594 }, { "epoch": 2.565531475748194, "grad_norm": 0.27548274397850037, "learning_rate": 5.399751871294034e-05, "loss": 0.7288, "step": 5595 }, { "epoch": 2.5659901387455566, "grad_norm": 0.27638116478919983, "learning_rate": 5.388563267428448e-05, "loss": 1.6965, "step": 5596 }, { "epoch": 2.5664488017429194, "grad_norm": 0.3485362231731415, "learning_rate": 5.377385607069951e-05, "loss": 1.7012, "step": 5597 }, { "epoch": 2.5669074647402823, "grad_norm": 0.4059440493583679, "learning_rate": 5.366218892960517e-05, "loss": 1.7341, "step": 5598 }, { "epoch": 2.5673661277376447, "grad_norm": 0.3992423415184021, "learning_rate": 5.3550631278394156e-05, "loss": 1.6401, "step": 5599 }, { "epoch": 2.5678247907350076, "grad_norm": 0.34082409739494324, "learning_rate": 5.3439183144432414e-05, "loss": 1.1353, "step": 5600 }, { "epoch": 2.56828345373237, "grad_norm": 0.2575600743293762, "learning_rate": 5.33278445550589e-05, "loss": 0.4625, "step": 5601 }, { "epoch": 2.568742116729733, "grad_norm": 0.30971759557724, "learning_rate": 5.321661553758572e-05, "loss": 1.5112, "step": 5602 }, { "epoch": 2.5692007797270957, "grad_norm": 0.16957223415374756, "learning_rate": 5.3105496119298266e-05, "loss": 0.7928, "step": 5603 }, { "epoch": 2.569659442724458, "grad_norm": 0.22056140005588531, "learning_rate": 5.299448632745485e-05, "loss": 0.8559, "step": 5604 }, { "epoch": 2.570118105721821, "grad_norm": 0.2757347524166107, "learning_rate": 5.288358618928707e-05, "loss": 1.0507, "step": 5605 }, { "epoch": 2.5705767687191834, "grad_norm": 0.2410385012626648, "learning_rate": 5.27727957319995e-05, "loss": 1.358, "step": 5606 }, { "epoch": 2.5710354317165462, "grad_norm": 0.39340847730636597, "learning_rate": 5.266211498276968e-05, "loss": 1.5098, "step": 5607 }, { "epoch": 2.571494094713909, "grad_norm": 0.23687437176704407, "learning_rate": 5.2551543968748564e-05, "loss": 1.2888, "step": 5608 }, { "epoch": 2.5719527577112715, "grad_norm": 0.32233813405036926, "learning_rate": 5.244108271706005e-05, "loss": 1.6709, "step": 5609 }, { "epoch": 2.5724114207086344, "grad_norm": 0.40911930799484253, "learning_rate": 5.233073125480098e-05, "loss": 1.2322, "step": 5610 }, { "epoch": 2.572870083705997, "grad_norm": 0.24620671570301056, "learning_rate": 5.2220489609041466e-05, "loss": 0.8972, "step": 5611 }, { "epoch": 2.5733287467033596, "grad_norm": 0.2210768610239029, "learning_rate": 5.2110357806824494e-05, "loss": 1.2861, "step": 5612 }, { "epoch": 2.5737874097007225, "grad_norm": 0.9936086535453796, "learning_rate": 5.2000335875166117e-05, "loss": 1.4037, "step": 5613 }, { "epoch": 2.574246072698085, "grad_norm": 0.22487474977970123, "learning_rate": 5.1890423841055636e-05, "loss": 1.1986, "step": 5614 }, { "epoch": 2.5747047356954478, "grad_norm": 0.36320793628692627, "learning_rate": 5.17806217314552e-05, "loss": 1.7546, "step": 5615 }, { "epoch": 2.57516339869281, "grad_norm": 0.3797517418861389, "learning_rate": 5.167092957330011e-05, "loss": 0.8319, "step": 5616 }, { "epoch": 2.575622061690173, "grad_norm": 0.1391095519065857, "learning_rate": 5.15613473934986e-05, "loss": 0.9581, "step": 5617 }, { "epoch": 2.576080724687536, "grad_norm": 0.3511468470096588, "learning_rate": 5.145187521893202e-05, "loss": 1.5688, "step": 5618 }, { "epoch": 2.5765393876848988, "grad_norm": 0.33020275831222534, "learning_rate": 5.13425130764546e-05, "loss": 1.8931, "step": 5619 }, { "epoch": 2.576998050682261, "grad_norm": 0.3568331003189087, "learning_rate": 5.1233260992893715e-05, "loss": 1.1766, "step": 5620 }, { "epoch": 2.577456713679624, "grad_norm": 0.3786485195159912, "learning_rate": 5.112411899504971e-05, "loss": 1.5772, "step": 5621 }, { "epoch": 2.5779153766769864, "grad_norm": 0.3674635887145996, "learning_rate": 5.1015087109695934e-05, "loss": 2.0232, "step": 5622 }, { "epoch": 2.5783740396743493, "grad_norm": 0.428074449300766, "learning_rate": 5.090616536357856e-05, "loss": 0.9435, "step": 5623 }, { "epoch": 2.578832702671712, "grad_norm": 0.10987308621406555, "learning_rate": 5.0797353783416986e-05, "loss": 0.4183, "step": 5624 }, { "epoch": 2.5792913656690746, "grad_norm": 0.29732468724250793, "learning_rate": 5.068865239590342e-05, "loss": 0.8677, "step": 5625 }, { "epoch": 2.5797500286664374, "grad_norm": 0.272393137216568, "learning_rate": 5.0580061227703145e-05, "loss": 0.9888, "step": 5626 }, { "epoch": 2.5802086916638, "grad_norm": 0.23563776910305023, "learning_rate": 5.047158030545434e-05, "loss": 1.5809, "step": 5627 }, { "epoch": 2.5806673546611627, "grad_norm": 0.4073021411895752, "learning_rate": 5.036320965576813e-05, "loss": 1.1274, "step": 5628 }, { "epoch": 2.5811260176585256, "grad_norm": 0.4311738610267639, "learning_rate": 5.0254949305228614e-05, "loss": 0.9954, "step": 5629 }, { "epoch": 2.581584680655888, "grad_norm": 0.2439291924238205, "learning_rate": 5.01467992803929e-05, "loss": 1.2027, "step": 5630 }, { "epoch": 2.582043343653251, "grad_norm": 0.241475909948349, "learning_rate": 5.003875960779097e-05, "loss": 1.0108, "step": 5631 }, { "epoch": 2.5825020066506132, "grad_norm": 0.25994980335235596, "learning_rate": 4.9930830313925645e-05, "loss": 1.0152, "step": 5632 }, { "epoch": 2.582960669647976, "grad_norm": 0.234562948346138, "learning_rate": 4.982301142527279e-05, "loss": 0.7739, "step": 5633 }, { "epoch": 2.583419332645339, "grad_norm": 0.2599080801010132, "learning_rate": 4.971530296828131e-05, "loss": 1.2073, "step": 5634 }, { "epoch": 2.5838779956427014, "grad_norm": 0.3854179382324219, "learning_rate": 4.960770496937267e-05, "loss": 1.1806, "step": 5635 }, { "epoch": 2.5843366586400642, "grad_norm": 0.21886670589447021, "learning_rate": 4.9500217454941434e-05, "loss": 1.6209, "step": 5636 }, { "epoch": 2.5847953216374266, "grad_norm": 0.4453312158584595, "learning_rate": 4.939284045135517e-05, "loss": 1.5825, "step": 5637 }, { "epoch": 2.5852539846347895, "grad_norm": 0.24376074969768524, "learning_rate": 4.9285573984954144e-05, "loss": 1.1272, "step": 5638 }, { "epoch": 2.5857126476321524, "grad_norm": 0.3159853219985962, "learning_rate": 4.91784180820517e-05, "loss": 0.9751, "step": 5639 }, { "epoch": 2.586171310629515, "grad_norm": 0.39112159609794617, "learning_rate": 4.907137276893381e-05, "loss": 1.7409, "step": 5640 }, { "epoch": 2.5866299736268776, "grad_norm": 0.39343714714050293, "learning_rate": 4.896443807185963e-05, "loss": 1.5584, "step": 5641 }, { "epoch": 2.5870886366242405, "grad_norm": 0.361751526594162, "learning_rate": 4.88576140170609e-05, "loss": 1.7249, "step": 5642 }, { "epoch": 2.587547299621603, "grad_norm": 0.273359090089798, "learning_rate": 4.875090063074233e-05, "loss": 0.8665, "step": 5643 }, { "epoch": 2.5880059626189658, "grad_norm": 0.11545918881893158, "learning_rate": 4.864429793908154e-05, "loss": 1.1998, "step": 5644 }, { "epoch": 2.5884646256163286, "grad_norm": 1.5149635076522827, "learning_rate": 4.8537805968228985e-05, "loss": 1.7397, "step": 5645 }, { "epoch": 2.588923288613691, "grad_norm": 0.37379226088523865, "learning_rate": 4.843142474430773e-05, "loss": 1.7961, "step": 5646 }, { "epoch": 2.589381951611054, "grad_norm": 0.41186100244522095, "learning_rate": 4.8325154293414e-05, "loss": 1.182, "step": 5647 }, { "epoch": 2.5898406146084163, "grad_norm": 0.23608848452568054, "learning_rate": 4.8218994641616606e-05, "loss": 1.4241, "step": 5648 }, { "epoch": 2.590299277605779, "grad_norm": 0.2904841899871826, "learning_rate": 4.8112945814957396e-05, "loss": 0.8572, "step": 5649 }, { "epoch": 2.590757940603142, "grad_norm": 0.20750272274017334, "learning_rate": 4.800700783945083e-05, "loss": 1.0704, "step": 5650 }, { "epoch": 2.5912166036005044, "grad_norm": 0.2611089050769806, "learning_rate": 4.790118074108429e-05, "loss": 1.7778, "step": 5651 }, { "epoch": 2.5916752665978673, "grad_norm": 0.4068661332130432, "learning_rate": 4.77954645458179e-05, "loss": 1.3848, "step": 5652 }, { "epoch": 2.5921339295952297, "grad_norm": 0.2891179919242859, "learning_rate": 4.768985927958464e-05, "loss": 1.014, "step": 5653 }, { "epoch": 2.5925925925925926, "grad_norm": 0.3051091432571411, "learning_rate": 4.758436496829016e-05, "loss": 1.1055, "step": 5654 }, { "epoch": 2.5930512555899554, "grad_norm": 0.3727298080921173, "learning_rate": 4.747898163781311e-05, "loss": 1.7048, "step": 5655 }, { "epoch": 2.593509918587318, "grad_norm": 0.26991981267929077, "learning_rate": 4.737370931400475e-05, "loss": 0.955, "step": 5656 }, { "epoch": 2.5939685815846807, "grad_norm": 0.29294559359550476, "learning_rate": 4.726854802268909e-05, "loss": 0.7957, "step": 5657 }, { "epoch": 2.594427244582043, "grad_norm": 0.28209206461906433, "learning_rate": 4.716349778966289e-05, "loss": 1.6527, "step": 5658 }, { "epoch": 2.594885907579406, "grad_norm": 0.2640170454978943, "learning_rate": 4.7058558640695805e-05, "loss": 0.8565, "step": 5659 }, { "epoch": 2.595344570576769, "grad_norm": 0.13128653168678284, "learning_rate": 4.695373060153013e-05, "loss": 0.6372, "step": 5660 }, { "epoch": 2.5958032335741317, "grad_norm": 0.22628602385520935, "learning_rate": 4.684901369788097e-05, "loss": 1.334, "step": 5661 }, { "epoch": 2.596261896571494, "grad_norm": 0.33850499987602234, "learning_rate": 4.674440795543633e-05, "loss": 1.685, "step": 5662 }, { "epoch": 2.596720559568857, "grad_norm": 0.3774524927139282, "learning_rate": 4.663991339985641e-05, "loss": 1.4136, "step": 5663 }, { "epoch": 2.5971792225662194, "grad_norm": 0.38456571102142334, "learning_rate": 4.653553005677463e-05, "loss": 1.7705, "step": 5664 }, { "epoch": 2.597637885563582, "grad_norm": 0.2402615249156952, "learning_rate": 4.643125795179698e-05, "loss": 0.9066, "step": 5665 }, { "epoch": 2.598096548560945, "grad_norm": 0.31829625368118286, "learning_rate": 4.632709711050215e-05, "loss": 1.3141, "step": 5666 }, { "epoch": 2.5985552115583075, "grad_norm": 0.2783154547214508, "learning_rate": 4.622304755844164e-05, "loss": 1.2182, "step": 5667 }, { "epoch": 2.5990138745556703, "grad_norm": 0.2189006507396698, "learning_rate": 4.6119109321139384e-05, "loss": 0.6295, "step": 5668 }, { "epoch": 2.5994725375530328, "grad_norm": 0.20123249292373657, "learning_rate": 4.6015282424092196e-05, "loss": 1.3485, "step": 5669 }, { "epoch": 2.5999312005503956, "grad_norm": 0.4019489884376526, "learning_rate": 4.591156689276971e-05, "loss": 1.5854, "step": 5670 }, { "epoch": 2.6003898635477585, "grad_norm": 0.335580438375473, "learning_rate": 4.580796275261395e-05, "loss": 1.5316, "step": 5671 }, { "epoch": 2.600848526545121, "grad_norm": 0.31390464305877686, "learning_rate": 4.570447002903988e-05, "loss": 1.3737, "step": 5672 }, { "epoch": 2.6013071895424837, "grad_norm": 0.34774190187454224, "learning_rate": 4.560108874743507e-05, "loss": 1.2221, "step": 5673 }, { "epoch": 2.601765852539846, "grad_norm": 0.13695718348026276, "learning_rate": 4.5497818933159406e-05, "loss": 1.0302, "step": 5674 }, { "epoch": 2.602224515537209, "grad_norm": 0.28607991337776184, "learning_rate": 4.5394660611545955e-05, "loss": 0.8901, "step": 5675 }, { "epoch": 2.602683178534572, "grad_norm": 0.2588864266872406, "learning_rate": 4.529161380790009e-05, "loss": 1.4887, "step": 5676 }, { "epoch": 2.6031418415319343, "grad_norm": 0.2673766314983368, "learning_rate": 4.5188678547499976e-05, "loss": 1.3279, "step": 5677 }, { "epoch": 2.603600504529297, "grad_norm": 0.22360926866531372, "learning_rate": 4.508585485559652e-05, "loss": 1.3532, "step": 5678 }, { "epoch": 2.6040591675266596, "grad_norm": 0.35530173778533936, "learning_rate": 4.498314275741272e-05, "loss": 1.1174, "step": 5679 }, { "epoch": 2.6045178305240224, "grad_norm": 0.20542040467262268, "learning_rate": 4.488054227814497e-05, "loss": 1.0656, "step": 5680 }, { "epoch": 2.6049764935213853, "grad_norm": 0.3311353325843811, "learning_rate": 4.4778053442961764e-05, "loss": 0.7792, "step": 5681 }, { "epoch": 2.6054351565187477, "grad_norm": 0.09570083022117615, "learning_rate": 4.467567627700436e-05, "loss": 1.1345, "step": 5682 }, { "epoch": 2.6058938195161105, "grad_norm": 0.26272979378700256, "learning_rate": 4.4573410805386624e-05, "loss": 1.1146, "step": 5683 }, { "epoch": 2.606352482513473, "grad_norm": 0.368933767080307, "learning_rate": 4.447125705319516e-05, "loss": 1.2422, "step": 5684 }, { "epoch": 2.606811145510836, "grad_norm": 0.3198513686656952, "learning_rate": 4.436921504548874e-05, "loss": 1.2469, "step": 5685 }, { "epoch": 2.6072698085081987, "grad_norm": 0.3850035071372986, "learning_rate": 4.426728480729914e-05, "loss": 1.6652, "step": 5686 }, { "epoch": 2.6077284715055615, "grad_norm": 0.35521817207336426, "learning_rate": 4.4165466363630556e-05, "loss": 1.4731, "step": 5687 }, { "epoch": 2.608187134502924, "grad_norm": 0.2965492308139801, "learning_rate": 4.406375973945981e-05, "loss": 1.5766, "step": 5688 }, { "epoch": 2.608645797500287, "grad_norm": 0.3511826694011688, "learning_rate": 4.396216495973632e-05, "loss": 1.9038, "step": 5689 }, { "epoch": 2.609104460497649, "grad_norm": 0.3754914104938507, "learning_rate": 4.386068204938193e-05, "loss": 1.5048, "step": 5690 }, { "epoch": 2.609563123495012, "grad_norm": 0.374553382396698, "learning_rate": 4.375931103329117e-05, "loss": 1.7387, "step": 5691 }, { "epoch": 2.610021786492375, "grad_norm": 0.3759397268295288, "learning_rate": 4.36580519363311e-05, "loss": 1.6533, "step": 5692 }, { "epoch": 2.6104804494897373, "grad_norm": 0.3354843258857727, "learning_rate": 4.355690478334129e-05, "loss": 0.8031, "step": 5693 }, { "epoch": 2.6109391124871, "grad_norm": 0.209818497300148, "learning_rate": 4.3455869599133834e-05, "loss": 1.2783, "step": 5694 }, { "epoch": 2.6113977754844626, "grad_norm": 0.36011189222335815, "learning_rate": 4.3354946408493524e-05, "loss": 2.0714, "step": 5695 }, { "epoch": 2.6118564384818255, "grad_norm": 0.2897484302520752, "learning_rate": 4.325413523617733e-05, "loss": 1.3004, "step": 5696 }, { "epoch": 2.6123151014791883, "grad_norm": 0.4306742548942566, "learning_rate": 4.315343610691508e-05, "loss": 1.3275, "step": 5697 }, { "epoch": 2.6127737644765507, "grad_norm": 0.14981167018413544, "learning_rate": 4.305284904540901e-05, "loss": 1.0299, "step": 5698 }, { "epoch": 2.6132324274739136, "grad_norm": 0.3096173405647278, "learning_rate": 4.295237407633379e-05, "loss": 1.6353, "step": 5699 }, { "epoch": 2.613691090471276, "grad_norm": 0.33780768513679504, "learning_rate": 4.285201122433674e-05, "loss": 0.4902, "step": 5700 }, { "epoch": 2.614149753468639, "grad_norm": 0.06309907138347626, "learning_rate": 4.2751760514037506e-05, "loss": 1.1826, "step": 5701 }, { "epoch": 2.6146084164660017, "grad_norm": 0.39696255326271057, "learning_rate": 4.2651621970028366e-05, "loss": 1.5219, "step": 5702 }, { "epoch": 2.615067079463364, "grad_norm": 0.4694381058216095, "learning_rate": 4.2551595616874e-05, "loss": 1.8362, "step": 5703 }, { "epoch": 2.615525742460727, "grad_norm": 0.29091694951057434, "learning_rate": 4.24516814791116e-05, "loss": 1.6509, "step": 5704 }, { "epoch": 2.6159844054580894, "grad_norm": 0.40303540229797363, "learning_rate": 4.235187958125086e-05, "loss": 1.3851, "step": 5705 }, { "epoch": 2.6164430684554523, "grad_norm": 0.3498489558696747, "learning_rate": 4.225218994777397e-05, "loss": 1.513, "step": 5706 }, { "epoch": 2.616901731452815, "grad_norm": 0.3207775354385376, "learning_rate": 4.2152612603135244e-05, "loss": 1.484, "step": 5707 }, { "epoch": 2.617360394450178, "grad_norm": 0.2937675416469574, "learning_rate": 4.2053147571761985e-05, "loss": 1.2656, "step": 5708 }, { "epoch": 2.6178190574475404, "grad_norm": 0.3286793529987335, "learning_rate": 4.1953794878053565e-05, "loss": 1.202, "step": 5709 }, { "epoch": 2.6182777204449033, "grad_norm": 0.22947460412979126, "learning_rate": 4.185455454638193e-05, "loss": 1.2922, "step": 5710 }, { "epoch": 2.6187363834422657, "grad_norm": 0.22816713154315948, "learning_rate": 4.17554266010915e-05, "loss": 0.9779, "step": 5711 }, { "epoch": 2.6191950464396285, "grad_norm": 0.34349822998046875, "learning_rate": 4.165641106649898e-05, "loss": 1.5968, "step": 5712 }, { "epoch": 2.6196537094369914, "grad_norm": 0.3354540169239044, "learning_rate": 4.155750796689373e-05, "loss": 0.8374, "step": 5713 }, { "epoch": 2.620112372434354, "grad_norm": 0.3276011645793915, "learning_rate": 4.1458717326537276e-05, "loss": 1.5134, "step": 5714 }, { "epoch": 2.6205710354317167, "grad_norm": 0.3458406925201416, "learning_rate": 4.1360039169663685e-05, "loss": 1.4265, "step": 5715 }, { "epoch": 2.621029698429079, "grad_norm": 0.3710658550262451, "learning_rate": 4.1261473520479483e-05, "loss": 1.4791, "step": 5716 }, { "epoch": 2.621488361426442, "grad_norm": 0.3279964029788971, "learning_rate": 4.116302040316361e-05, "loss": 1.191, "step": 5717 }, { "epoch": 2.621947024423805, "grad_norm": 0.2667905390262604, "learning_rate": 4.106467984186707e-05, "loss": 1.1352, "step": 5718 }, { "epoch": 2.622405687421167, "grad_norm": 0.3724762201309204, "learning_rate": 4.0966451860713736e-05, "loss": 1.4844, "step": 5719 }, { "epoch": 2.62286435041853, "grad_norm": 0.3980143070220947, "learning_rate": 4.0868336483799505e-05, "loss": 1.5843, "step": 5720 }, { "epoch": 2.6233230134158925, "grad_norm": 0.2862567603588104, "learning_rate": 4.07703337351929e-05, "loss": 1.5024, "step": 5721 }, { "epoch": 2.6237816764132553, "grad_norm": 0.3262464106082916, "learning_rate": 4.067244363893452e-05, "loss": 1.1802, "step": 5722 }, { "epoch": 2.624240339410618, "grad_norm": 0.3230985999107361, "learning_rate": 4.0574666219037824e-05, "loss": 1.7435, "step": 5723 }, { "epoch": 2.6246990024079806, "grad_norm": 0.3618335425853729, "learning_rate": 4.047700149948802e-05, "loss": 1.3806, "step": 5724 }, { "epoch": 2.6251576654053435, "grad_norm": 0.33226069808006287, "learning_rate": 4.037944950424311e-05, "loss": 1.197, "step": 5725 }, { "epoch": 2.625616328402706, "grad_norm": 0.22331880033016205, "learning_rate": 4.028201025723327e-05, "loss": 1.3807, "step": 5726 }, { "epoch": 2.6260749914000687, "grad_norm": 0.27581825852394104, "learning_rate": 4.018468378236106e-05, "loss": 1.0424, "step": 5727 }, { "epoch": 2.6265336543974316, "grad_norm": 0.3891744315624237, "learning_rate": 4.008747010350133e-05, "loss": 1.9856, "step": 5728 }, { "epoch": 2.6269923173947944, "grad_norm": 0.3294212222099304, "learning_rate": 3.9990369244501445e-05, "loss": 1.2145, "step": 5729 }, { "epoch": 2.627450980392157, "grad_norm": 0.35058024525642395, "learning_rate": 3.989338122918068e-05, "loss": 1.1814, "step": 5730 }, { "epoch": 2.6279096433895197, "grad_norm": 0.3072217106819153, "learning_rate": 3.979650608133112e-05, "loss": 1.6852, "step": 5731 }, { "epoch": 2.628368306386882, "grad_norm": 0.3541480004787445, "learning_rate": 3.969974382471669e-05, "loss": 1.1888, "step": 5732 }, { "epoch": 2.628826969384245, "grad_norm": 0.26909348368644714, "learning_rate": 3.96030944830742e-05, "loss": 1.2612, "step": 5733 }, { "epoch": 2.629285632381608, "grad_norm": 0.33335986733436584, "learning_rate": 3.950655808011233e-05, "loss": 1.7208, "step": 5734 }, { "epoch": 2.6297442953789703, "grad_norm": 0.3109550178050995, "learning_rate": 3.9410134639511986e-05, "loss": 1.0018, "step": 5735 }, { "epoch": 2.630202958376333, "grad_norm": 0.3982895612716675, "learning_rate": 3.931382418492663e-05, "loss": 1.1909, "step": 5736 }, { "epoch": 2.6306616213736955, "grad_norm": 0.31409791111946106, "learning_rate": 3.921762673998197e-05, "loss": 1.0483, "step": 5737 }, { "epoch": 2.6311202843710584, "grad_norm": 0.24062570929527283, "learning_rate": 3.912154232827581e-05, "loss": 0.5005, "step": 5738 }, { "epoch": 2.6315789473684212, "grad_norm": 0.33497846126556396, "learning_rate": 3.90255709733785e-05, "loss": 1.5151, "step": 5739 }, { "epoch": 2.6320376103657837, "grad_norm": 0.2941315770149231, "learning_rate": 3.8929712698832445e-05, "loss": 0.8872, "step": 5740 }, { "epoch": 2.6324962733631465, "grad_norm": 0.23103603720664978, "learning_rate": 3.883396752815221e-05, "loss": 0.6996, "step": 5741 }, { "epoch": 2.632954936360509, "grad_norm": 0.2940506935119629, "learning_rate": 3.873833548482503e-05, "loss": 1.7249, "step": 5742 }, { "epoch": 2.633413599357872, "grad_norm": 0.24942685663700104, "learning_rate": 3.864281659231001e-05, "loss": 0.5719, "step": 5743 }, { "epoch": 2.6338722623552346, "grad_norm": 0.1974136382341385, "learning_rate": 3.8547410874038625e-05, "loss": 1.5941, "step": 5744 }, { "epoch": 2.634330925352597, "grad_norm": 0.31970781087875366, "learning_rate": 3.845211835341472e-05, "loss": 1.0022, "step": 5745 }, { "epoch": 2.63478958834996, "grad_norm": 0.32989394664764404, "learning_rate": 3.835693905381399e-05, "loss": 1.4321, "step": 5746 }, { "epoch": 2.6352482513473223, "grad_norm": 0.3487594723701477, "learning_rate": 3.826187299858474e-05, "loss": 1.421, "step": 5747 }, { "epoch": 2.635706914344685, "grad_norm": 0.2510157823562622, "learning_rate": 3.816692021104734e-05, "loss": 0.8154, "step": 5748 }, { "epoch": 2.636165577342048, "grad_norm": 0.10467905551195145, "learning_rate": 3.807208071449442e-05, "loss": 1.0183, "step": 5749 }, { "epoch": 2.6366242403394105, "grad_norm": 0.34240782260894775, "learning_rate": 3.7977354532190754e-05, "loss": 1.3793, "step": 5750 }, { "epoch": 2.6370829033367733, "grad_norm": 0.33239027857780457, "learning_rate": 3.788274168737338e-05, "loss": 1.5253, "step": 5751 }, { "epoch": 2.6375415663341357, "grad_norm": 0.3215150535106659, "learning_rate": 3.77882422032515e-05, "loss": 1.173, "step": 5752 }, { "epoch": 2.6380002293314986, "grad_norm": 0.3687779903411865, "learning_rate": 3.7693856103006574e-05, "loss": 1.6017, "step": 5753 }, { "epoch": 2.6384588923288614, "grad_norm": 0.3637235164642334, "learning_rate": 3.759958340979208e-05, "loss": 1.7386, "step": 5754 }, { "epoch": 2.6389175553262243, "grad_norm": 0.3436271846294403, "learning_rate": 3.750542414673391e-05, "loss": 0.7148, "step": 5755 }, { "epoch": 2.6393762183235867, "grad_norm": 0.10530371963977814, "learning_rate": 3.7411378336929925e-05, "loss": 1.4356, "step": 5756 }, { "epoch": 2.6398348813209496, "grad_norm": 0.4608929753303528, "learning_rate": 3.731744600345038e-05, "loss": 2.1367, "step": 5757 }, { "epoch": 2.640293544318312, "grad_norm": 0.31003305315971375, "learning_rate": 3.722362716933736e-05, "loss": 1.2718, "step": 5758 }, { "epoch": 2.640752207315675, "grad_norm": 0.34914591908454895, "learning_rate": 3.712992185760533e-05, "loss": 1.3382, "step": 5759 }, { "epoch": 2.6412108703130377, "grad_norm": 0.4381144344806671, "learning_rate": 3.703633009124102e-05, "loss": 1.5305, "step": 5760 }, { "epoch": 2.6416695333104, "grad_norm": 0.20248155295848846, "learning_rate": 3.694285189320301e-05, "loss": 1.1224, "step": 5761 }, { "epoch": 2.642128196307763, "grad_norm": 0.3816075325012207, "learning_rate": 3.684948728642229e-05, "loss": 1.5889, "step": 5762 }, { "epoch": 2.6425868593051254, "grad_norm": 0.3584928810596466, "learning_rate": 3.675623629380181e-05, "loss": 1.4312, "step": 5763 }, { "epoch": 2.6430455223024882, "grad_norm": 0.4676406681537628, "learning_rate": 3.6663098938216734e-05, "loss": 1.2666, "step": 5764 }, { "epoch": 2.643504185299851, "grad_norm": 0.2964949309825897, "learning_rate": 3.657007524251427e-05, "loss": 1.8092, "step": 5765 }, { "epoch": 2.6439628482972135, "grad_norm": 0.28791379928588867, "learning_rate": 3.6477165229513885e-05, "loss": 0.9804, "step": 5766 }, { "epoch": 2.6444215112945764, "grad_norm": 0.3603127598762512, "learning_rate": 3.638436892200708e-05, "loss": 1.5343, "step": 5767 }, { "epoch": 2.644880174291939, "grad_norm": 0.28618577122688293, "learning_rate": 3.629168634275748e-05, "loss": 0.933, "step": 5768 }, { "epoch": 2.6453388372893016, "grad_norm": 0.31322944164276123, "learning_rate": 3.619911751450061e-05, "loss": 1.6573, "step": 5769 }, { "epoch": 2.6457975002866645, "grad_norm": 0.3839041590690613, "learning_rate": 3.610666245994443e-05, "loss": 0.8116, "step": 5770 }, { "epoch": 2.646256163284027, "grad_norm": 0.11088380962610245, "learning_rate": 3.601432120176879e-05, "loss": 0.9275, "step": 5771 }, { "epoch": 2.6467148262813898, "grad_norm": 0.29426756501197815, "learning_rate": 3.5922093762625674e-05, "loss": 0.9716, "step": 5772 }, { "epoch": 2.647173489278752, "grad_norm": 0.21911796927452087, "learning_rate": 3.582998016513911e-05, "loss": 1.243, "step": 5773 }, { "epoch": 2.647632152276115, "grad_norm": 0.40464484691619873, "learning_rate": 3.573798043190524e-05, "loss": 1.4725, "step": 5774 }, { "epoch": 2.648090815273478, "grad_norm": 0.32122787833213806, "learning_rate": 3.564609458549234e-05, "loss": 1.5024, "step": 5775 }, { "epoch": 2.6485494782708408, "grad_norm": 0.28623196482658386, "learning_rate": 3.555432264844055e-05, "loss": 1.4161, "step": 5776 }, { "epoch": 2.649008141268203, "grad_norm": 0.28445377945899963, "learning_rate": 3.5462664643262255e-05, "loss": 1.1441, "step": 5777 }, { "epoch": 2.649466804265566, "grad_norm": 0.27459508180618286, "learning_rate": 3.53711205924418e-05, "loss": 1.0607, "step": 5778 }, { "epoch": 2.6499254672629284, "grad_norm": 0.3308379352092743, "learning_rate": 3.5279690518435656e-05, "loss": 0.8458, "step": 5779 }, { "epoch": 2.6503841302602913, "grad_norm": 0.23437727987766266, "learning_rate": 3.518837444367223e-05, "loss": 1.3428, "step": 5780 }, { "epoch": 2.650842793257654, "grad_norm": 0.30645015835762024, "learning_rate": 3.509717239055199e-05, "loss": 0.9823, "step": 5781 }, { "epoch": 2.6513014562550166, "grad_norm": 0.20318692922592163, "learning_rate": 3.500608438144748e-05, "loss": 1.3997, "step": 5782 }, { "epoch": 2.6517601192523794, "grad_norm": 0.3624488115310669, "learning_rate": 3.491511043870321e-05, "loss": 0.991, "step": 5783 }, { "epoch": 2.652218782249742, "grad_norm": 0.3760741949081421, "learning_rate": 3.482425058463579e-05, "loss": 1.937, "step": 5784 }, { "epoch": 2.6526774452471047, "grad_norm": 0.4173126816749573, "learning_rate": 3.473350484153387e-05, "loss": 1.5058, "step": 5785 }, { "epoch": 2.6531361082444676, "grad_norm": 0.2894061803817749, "learning_rate": 3.4642873231657934e-05, "loss": 1.5995, "step": 5786 }, { "epoch": 2.65359477124183, "grad_norm": 0.34869733452796936, "learning_rate": 3.4552355777240585e-05, "loss": 1.4747, "step": 5787 }, { "epoch": 2.654053434239193, "grad_norm": 0.3714545965194702, "learning_rate": 3.446195250048639e-05, "loss": 1.4193, "step": 5788 }, { "epoch": 2.6545120972365552, "grad_norm": 0.28376656770706177, "learning_rate": 3.437166342357195e-05, "loss": 1.3256, "step": 5789 }, { "epoch": 2.654970760233918, "grad_norm": 0.37970438599586487, "learning_rate": 3.4281488568645934e-05, "loss": 1.584, "step": 5790 }, { "epoch": 2.655429423231281, "grad_norm": 0.3399360179901123, "learning_rate": 3.4191427957828705e-05, "loss": 1.4299, "step": 5791 }, { "epoch": 2.6558880862286434, "grad_norm": 0.4417398273944855, "learning_rate": 3.41014816132128e-05, "loss": 1.6428, "step": 5792 }, { "epoch": 2.656346749226006, "grad_norm": 0.3878077268600464, "learning_rate": 3.401164955686281e-05, "loss": 1.587, "step": 5793 }, { "epoch": 2.6568054122233686, "grad_norm": 0.5019458532333374, "learning_rate": 3.392193181081504e-05, "loss": 1.2179, "step": 5794 }, { "epoch": 2.6572640752207315, "grad_norm": 0.3489948809146881, "learning_rate": 3.383232839707806e-05, "loss": 1.608, "step": 5795 }, { "epoch": 2.6577227382180943, "grad_norm": 0.43875566124916077, "learning_rate": 3.3742839337632223e-05, "loss": 1.3773, "step": 5796 }, { "epoch": 2.658181401215457, "grad_norm": 0.2078205943107605, "learning_rate": 3.365346465442976e-05, "loss": 1.2278, "step": 5797 }, { "epoch": 2.6586400642128196, "grad_norm": 0.36126843094825745, "learning_rate": 3.356420436939489e-05, "loss": 1.799, "step": 5798 }, { "epoch": 2.6590987272101825, "grad_norm": 0.40285730361938477, "learning_rate": 3.347505850442395e-05, "loss": 1.5258, "step": 5799 }, { "epoch": 2.659557390207545, "grad_norm": 0.29165950417518616, "learning_rate": 3.3386027081384886e-05, "loss": 1.3616, "step": 5800 }, { "epoch": 2.6600160532049077, "grad_norm": 0.2867632210254669, "learning_rate": 3.3297110122118005e-05, "loss": 1.2755, "step": 5801 }, { "epoch": 2.6604747162022706, "grad_norm": 0.33592167496681213, "learning_rate": 3.3208307648434964e-05, "loss": 1.5153, "step": 5802 }, { "epoch": 2.660933379199633, "grad_norm": 0.2837589979171753, "learning_rate": 3.311961968211979e-05, "loss": 1.204, "step": 5803 }, { "epoch": 2.661392042196996, "grad_norm": 0.2693648040294647, "learning_rate": 3.303104624492825e-05, "loss": 1.2056, "step": 5804 }, { "epoch": 2.6618507051943583, "grad_norm": 0.206869438290596, "learning_rate": 3.294258735858818e-05, "loss": 0.9919, "step": 5805 }, { "epoch": 2.662309368191721, "grad_norm": 0.3209756314754486, "learning_rate": 3.2854243044799056e-05, "loss": 1.6161, "step": 5806 }, { "epoch": 2.662768031189084, "grad_norm": 0.301139235496521, "learning_rate": 3.276601332523249e-05, "loss": 1.2684, "step": 5807 }, { "epoch": 2.6632266941864464, "grad_norm": 0.2972935140132904, "learning_rate": 3.267789822153172e-05, "loss": 1.5424, "step": 5808 }, { "epoch": 2.6636853571838093, "grad_norm": 0.3825678527355194, "learning_rate": 3.258989775531212e-05, "loss": 1.5092, "step": 5809 }, { "epoch": 2.6641440201811717, "grad_norm": 0.22569845616817474, "learning_rate": 3.250201194816077e-05, "loss": 1.0415, "step": 5810 }, { "epoch": 2.6646026831785345, "grad_norm": 0.2539108991622925, "learning_rate": 3.2414240821636685e-05, "loss": 1.3038, "step": 5811 }, { "epoch": 2.6650613461758974, "grad_norm": 0.3397371470928192, "learning_rate": 3.232658439727082e-05, "loss": 1.1019, "step": 5812 }, { "epoch": 2.66552000917326, "grad_norm": 0.35667502880096436, "learning_rate": 3.2239042696565915e-05, "loss": 1.9956, "step": 5813 }, { "epoch": 2.6659786721706227, "grad_norm": 0.37884020805358887, "learning_rate": 3.2151615740996565e-05, "loss": 1.2626, "step": 5814 }, { "epoch": 2.666437335167985, "grad_norm": 0.38707906007766724, "learning_rate": 3.2064303552009236e-05, "loss": 1.2571, "step": 5815 }, { "epoch": 2.666895998165348, "grad_norm": 0.3102846145629883, "learning_rate": 3.1977106151022226e-05, "loss": 1.2002, "step": 5816 }, { "epoch": 2.667354661162711, "grad_norm": 0.28075534105300903, "learning_rate": 3.189002355942572e-05, "loss": 1.3118, "step": 5817 }, { "epoch": 2.6678133241600737, "grad_norm": 0.2957073748111725, "learning_rate": 3.1803055798581725e-05, "loss": 1.1588, "step": 5818 }, { "epoch": 2.668271987157436, "grad_norm": 0.24751010537147522, "learning_rate": 3.1716202889823966e-05, "loss": 0.9748, "step": 5819 }, { "epoch": 2.6687306501547985, "grad_norm": 0.301859587430954, "learning_rate": 3.162946485445817e-05, "loss": 1.3465, "step": 5820 }, { "epoch": 2.6691893131521613, "grad_norm": 0.39302027225494385, "learning_rate": 3.15428417137617e-05, "loss": 1.4904, "step": 5821 }, { "epoch": 2.669647976149524, "grad_norm": 0.27386486530303955, "learning_rate": 3.145633348898397e-05, "loss": 1.6713, "step": 5822 }, { "epoch": 2.670106639146887, "grad_norm": 0.33279913663864136, "learning_rate": 3.136994020134598e-05, "loss": 1.2917, "step": 5823 }, { "epoch": 2.6705653021442495, "grad_norm": 0.3797074258327484, "learning_rate": 3.1283661872040626e-05, "loss": 1.4031, "step": 5824 }, { "epoch": 2.6710239651416123, "grad_norm": 0.3788340091705322, "learning_rate": 3.119749852223269e-05, "loss": 1.2273, "step": 5825 }, { "epoch": 2.6714826281389747, "grad_norm": 0.2667097747325897, "learning_rate": 3.1111450173058553e-05, "loss": 0.9149, "step": 5826 }, { "epoch": 2.6719412911363376, "grad_norm": 0.3300883173942566, "learning_rate": 3.10255168456266e-05, "loss": 1.7291, "step": 5827 }, { "epoch": 2.6723999541337005, "grad_norm": 0.33332276344299316, "learning_rate": 3.093969856101686e-05, "loss": 1.4482, "step": 5828 }, { "epoch": 2.672858617131063, "grad_norm": 0.27086135745048523, "learning_rate": 3.085399534028116e-05, "loss": 0.9669, "step": 5829 }, { "epoch": 2.6733172801284257, "grad_norm": 0.10022089630365372, "learning_rate": 3.0768407204443126e-05, "loss": 0.5217, "step": 5830 }, { "epoch": 2.673775943125788, "grad_norm": 0.1253892332315445, "learning_rate": 3.068293417449808e-05, "loss": 0.8534, "step": 5831 }, { "epoch": 2.674234606123151, "grad_norm": 0.3180748224258423, "learning_rate": 3.05975762714133e-05, "loss": 1.609, "step": 5832 }, { "epoch": 2.674693269120514, "grad_norm": 0.3440166413784027, "learning_rate": 3.051233351612759e-05, "loss": 0.7612, "step": 5833 }, { "epoch": 2.6751519321178763, "grad_norm": 0.3188319802284241, "learning_rate": 3.0427205929551615e-05, "loss": 2.0381, "step": 5834 }, { "epoch": 2.675610595115239, "grad_norm": 0.30568626523017883, "learning_rate": 3.0342193532567842e-05, "loss": 0.9399, "step": 5835 }, { "epoch": 2.6760692581126015, "grad_norm": 0.373977392911911, "learning_rate": 3.0257296346030416e-05, "loss": 1.6466, "step": 5836 }, { "epoch": 2.6765279211099644, "grad_norm": 0.372895210981369, "learning_rate": 3.0172514390765238e-05, "loss": 1.7245, "step": 5837 }, { "epoch": 2.6769865841073273, "grad_norm": 0.37782806158065796, "learning_rate": 3.0087847687569893e-05, "loss": 1.6417, "step": 5838 }, { "epoch": 2.6774452471046897, "grad_norm": 0.3993247449398041, "learning_rate": 3.0003296257213708e-05, "loss": 1.9526, "step": 5839 }, { "epoch": 2.6779039101020525, "grad_norm": 0.3877170979976654, "learning_rate": 2.9918860120437873e-05, "loss": 1.5638, "step": 5840 }, { "epoch": 2.678362573099415, "grad_norm": 0.38056743144989014, "learning_rate": 2.9834539297955156e-05, "loss": 1.2691, "step": 5841 }, { "epoch": 2.678821236096778, "grad_norm": 0.3735964298248291, "learning_rate": 2.975033381044995e-05, "loss": 2.0476, "step": 5842 }, { "epoch": 2.6792798990941407, "grad_norm": 0.37376493215560913, "learning_rate": 2.966624367857851e-05, "loss": 1.5699, "step": 5843 }, { "epoch": 2.6797385620915035, "grad_norm": 0.3251396119594574, "learning_rate": 2.958226892296878e-05, "loss": 0.6622, "step": 5844 }, { "epoch": 2.680197225088866, "grad_norm": 0.20608735084533691, "learning_rate": 2.9498409564220396e-05, "loss": 0.7508, "step": 5845 }, { "epoch": 2.680655888086229, "grad_norm": 0.2004690021276474, "learning_rate": 2.9414665622904623e-05, "loss": 0.8976, "step": 5846 }, { "epoch": 2.681114551083591, "grad_norm": 0.30501410365104675, "learning_rate": 2.9331037119564473e-05, "loss": 1.0064, "step": 5847 }, { "epoch": 2.681573214080954, "grad_norm": 0.2096373289823532, "learning_rate": 2.9247524074714594e-05, "loss": 1.6792, "step": 5848 }, { "epoch": 2.682031877078317, "grad_norm": 0.4180261790752411, "learning_rate": 2.9164126508841428e-05, "loss": 1.6902, "step": 5849 }, { "epoch": 2.6824905400756793, "grad_norm": 0.2524012327194214, "learning_rate": 2.9080844442402887e-05, "loss": 0.4348, "step": 5850 }, { "epoch": 2.682949203073042, "grad_norm": 0.2333446592092514, "learning_rate": 2.8997677895828688e-05, "loss": 1.4499, "step": 5851 }, { "epoch": 2.6834078660704046, "grad_norm": 0.4264712631702423, "learning_rate": 2.891462688952029e-05, "loss": 1.3736, "step": 5852 }, { "epoch": 2.6838665290677675, "grad_norm": 0.3079976737499237, "learning_rate": 2.883169144385056e-05, "loss": 1.2166, "step": 5853 }, { "epoch": 2.6843251920651303, "grad_norm": 0.18179012835025787, "learning_rate": 2.874887157916417e-05, "loss": 0.6641, "step": 5854 }, { "epoch": 2.6847838550624927, "grad_norm": 0.32719552516937256, "learning_rate": 2.8666167315777535e-05, "loss": 1.2209, "step": 5855 }, { "epoch": 2.6852425180598556, "grad_norm": 0.30747801065444946, "learning_rate": 2.8583578673978483e-05, "loss": 1.7491, "step": 5856 }, { "epoch": 2.685701181057218, "grad_norm": 0.39954906702041626, "learning_rate": 2.8501105674026808e-05, "loss": 1.2025, "step": 5857 }, { "epoch": 2.686159844054581, "grad_norm": 0.29626455903053284, "learning_rate": 2.841874833615349e-05, "loss": 1.8599, "step": 5858 }, { "epoch": 2.6866185070519437, "grad_norm": 0.44469526410102844, "learning_rate": 2.833650668056148e-05, "loss": 1.537, "step": 5859 }, { "epoch": 2.687077170049306, "grad_norm": 0.325447142124176, "learning_rate": 2.8254380727425255e-05, "loss": 1.2097, "step": 5860 }, { "epoch": 2.687535833046669, "grad_norm": 0.32134348154067993, "learning_rate": 2.817237049689092e-05, "loss": 1.6572, "step": 5861 }, { "epoch": 2.6879944960440314, "grad_norm": 0.2648102045059204, "learning_rate": 2.809047600907616e-05, "loss": 0.9762, "step": 5862 }, { "epoch": 2.6884531590413943, "grad_norm": 0.3223074972629547, "learning_rate": 2.800869728407035e-05, "loss": 1.5692, "step": 5863 }, { "epoch": 2.688911822038757, "grad_norm": 0.2671777307987213, "learning_rate": 2.792703434193422e-05, "loss": 1.4353, "step": 5864 }, { "epoch": 2.68937048503612, "grad_norm": 0.3055770695209503, "learning_rate": 2.7845487202700416e-05, "loss": 0.8293, "step": 5865 }, { "epoch": 2.6898291480334824, "grad_norm": 0.2750590145587921, "learning_rate": 2.7764055886372987e-05, "loss": 1.4748, "step": 5866 }, { "epoch": 2.6902878110308452, "grad_norm": 0.3781544268131256, "learning_rate": 2.768274041292762e-05, "loss": 0.7972, "step": 5867 }, { "epoch": 2.6907464740282077, "grad_norm": 0.18787944316864014, "learning_rate": 2.760154080231175e-05, "loss": 1.6368, "step": 5868 }, { "epoch": 2.6912051370255705, "grad_norm": 0.41110900044441223, "learning_rate": 2.7520457074444005e-05, "loss": 1.1579, "step": 5869 }, { "epoch": 2.6916638000229334, "grad_norm": 0.28122058510780334, "learning_rate": 2.743948924921491e-05, "loss": 0.9766, "step": 5870 }, { "epoch": 2.692122463020296, "grad_norm": 0.1648864597082138, "learning_rate": 2.7358637346486414e-05, "loss": 1.1961, "step": 5871 }, { "epoch": 2.6925811260176586, "grad_norm": 0.45734626054763794, "learning_rate": 2.7277901386092096e-05, "loss": 1.5893, "step": 5872 }, { "epoch": 2.693039789015021, "grad_norm": 0.31361323595046997, "learning_rate": 2.7197281387837114e-05, "loss": 0.8256, "step": 5873 }, { "epoch": 2.693498452012384, "grad_norm": 0.22070448100566864, "learning_rate": 2.7116777371498145e-05, "loss": 1.4343, "step": 5874 }, { "epoch": 2.6939571150097468, "grad_norm": 0.428138792514801, "learning_rate": 2.703638935682323e-05, "loss": 1.6013, "step": 5875 }, { "epoch": 2.694415778007109, "grad_norm": 0.3450068235397339, "learning_rate": 2.6956117363532207e-05, "loss": 1.2018, "step": 5876 }, { "epoch": 2.694874441004472, "grad_norm": 0.34051015973091125, "learning_rate": 2.687596141131654e-05, "loss": 1.6506, "step": 5877 }, { "epoch": 2.6953331040018345, "grad_norm": 0.31475841999053955, "learning_rate": 2.6795921519838895e-05, "loss": 1.5417, "step": 5878 }, { "epoch": 2.6957917669991973, "grad_norm": 0.216132253408432, "learning_rate": 2.6715997708733675e-05, "loss": 0.3737, "step": 5879 }, { "epoch": 2.69625042999656, "grad_norm": 0.2257094830274582, "learning_rate": 2.6636189997606864e-05, "loss": 1.4002, "step": 5880 }, { "epoch": 2.6967090929939226, "grad_norm": 0.3286268413066864, "learning_rate": 2.655649840603569e-05, "loss": 1.5927, "step": 5881 }, { "epoch": 2.6971677559912854, "grad_norm": 0.32692962884902954, "learning_rate": 2.6476922953569127e-05, "loss": 0.7494, "step": 5882 }, { "epoch": 2.697626418988648, "grad_norm": 0.29675719141960144, "learning_rate": 2.6397463659727672e-05, "loss": 1.6597, "step": 5883 }, { "epoch": 2.6980850819860107, "grad_norm": 0.3485512137413025, "learning_rate": 2.6318120544003234e-05, "loss": 1.2645, "step": 5884 }, { "epoch": 2.6985437449833736, "grad_norm": 0.41130709648132324, "learning_rate": 2.623889362585924e-05, "loss": 1.9763, "step": 5885 }, { "epoch": 2.6990024079807364, "grad_norm": 0.2589356005191803, "learning_rate": 2.6159782924730647e-05, "loss": 0.3383, "step": 5886 }, { "epoch": 2.699461070978099, "grad_norm": 0.15324239432811737, "learning_rate": 2.6080788460023875e-05, "loss": 1.1049, "step": 5887 }, { "epoch": 2.6999197339754613, "grad_norm": 0.2607985734939575, "learning_rate": 2.6001910251116812e-05, "loss": 1.1836, "step": 5888 }, { "epoch": 2.700378396972824, "grad_norm": 0.29069429636001587, "learning_rate": 2.5923148317358926e-05, "loss": 0.8517, "step": 5889 }, { "epoch": 2.700837059970187, "grad_norm": 0.2796778976917267, "learning_rate": 2.584450267807098e-05, "loss": 0.8593, "step": 5890 }, { "epoch": 2.70129572296755, "grad_norm": 0.2640216648578644, "learning_rate": 2.5765973352545436e-05, "loss": 1.5907, "step": 5891 }, { "epoch": 2.7017543859649122, "grad_norm": 0.39561182260513306, "learning_rate": 2.5687560360045935e-05, "loss": 1.3107, "step": 5892 }, { "epoch": 2.702213048962275, "grad_norm": 0.3599591553211212, "learning_rate": 2.5609263719807875e-05, "loss": 1.8921, "step": 5893 }, { "epoch": 2.7026717119596375, "grad_norm": 0.37715888023376465, "learning_rate": 2.553108345103794e-05, "loss": 1.3452, "step": 5894 }, { "epoch": 2.7031303749570004, "grad_norm": 0.3551565110683441, "learning_rate": 2.545301957291435e-05, "loss": 0.9477, "step": 5895 }, { "epoch": 2.7035890379543632, "grad_norm": 0.12380225211381912, "learning_rate": 2.5375072104586726e-05, "loss": 0.5937, "step": 5896 }, { "epoch": 2.7040477009517256, "grad_norm": 0.21938225626945496, "learning_rate": 2.5297241065176168e-05, "loss": 1.0974, "step": 5897 }, { "epoch": 2.7045063639490885, "grad_norm": 0.3853704333305359, "learning_rate": 2.5219526473775122e-05, "loss": 1.6407, "step": 5898 }, { "epoch": 2.704965026946451, "grad_norm": 0.2925291657447815, "learning_rate": 2.5141928349447563e-05, "loss": 1.2876, "step": 5899 }, { "epoch": 2.7054236899438138, "grad_norm": 0.21706360578536987, "learning_rate": 2.5064446711228872e-05, "loss": 0.7466, "step": 5900 }, { "epoch": 2.7058823529411766, "grad_norm": 0.32736656069755554, "learning_rate": 2.4987081578125904e-05, "loss": 2.0911, "step": 5901 }, { "epoch": 2.706341015938539, "grad_norm": 0.4860953390598297, "learning_rate": 2.4909832969116918e-05, "loss": 1.9189, "step": 5902 }, { "epoch": 2.706799678935902, "grad_norm": 0.3114350736141205, "learning_rate": 2.4832700903151374e-05, "loss": 0.5055, "step": 5903 }, { "epoch": 2.7072583419332643, "grad_norm": 0.12829411029815674, "learning_rate": 2.4755685399150463e-05, "loss": 0.6061, "step": 5904 }, { "epoch": 2.707717004930627, "grad_norm": 0.3164483308792114, "learning_rate": 2.467878647600663e-05, "loss": 1.7654, "step": 5905 }, { "epoch": 2.70817566792799, "grad_norm": 0.37488433718681335, "learning_rate": 2.460200415258368e-05, "loss": 1.6296, "step": 5906 }, { "epoch": 2.7086343309253524, "grad_norm": 0.28757867217063904, "learning_rate": 2.4525338447716928e-05, "loss": 1.1265, "step": 5907 }, { "epoch": 2.7090929939227153, "grad_norm": 0.32620272040367126, "learning_rate": 2.4448789380213e-05, "loss": 1.1835, "step": 5908 }, { "epoch": 2.7095516569200777, "grad_norm": 0.293837308883667, "learning_rate": 2.437235696884993e-05, "loss": 1.1319, "step": 5909 }, { "epoch": 2.7100103199174406, "grad_norm": 0.1874661147594452, "learning_rate": 2.4296041232377165e-05, "loss": 0.5175, "step": 5910 }, { "epoch": 2.7104689829148034, "grad_norm": 0.2423352599143982, "learning_rate": 2.421984218951545e-05, "loss": 1.2948, "step": 5911 }, { "epoch": 2.7109276459121663, "grad_norm": 0.32054775953292847, "learning_rate": 2.414375985895706e-05, "loss": 1.2799, "step": 5912 }, { "epoch": 2.7113863089095287, "grad_norm": 0.3801712989807129, "learning_rate": 2.4067794259365504e-05, "loss": 1.63, "step": 5913 }, { "epoch": 2.7118449719068916, "grad_norm": 0.29861852526664734, "learning_rate": 2.3991945409375604e-05, "loss": 1.3125, "step": 5914 }, { "epoch": 2.712303634904254, "grad_norm": 0.3143099248409271, "learning_rate": 2.3916213327593694e-05, "loss": 0.8409, "step": 5915 }, { "epoch": 2.712762297901617, "grad_norm": 0.09029946476221085, "learning_rate": 2.3840598032597417e-05, "loss": 0.7455, "step": 5916 }, { "epoch": 2.7132209608989797, "grad_norm": 0.4967743158340454, "learning_rate": 2.376509954293571e-05, "loss": 1.5552, "step": 5917 }, { "epoch": 2.713679623896342, "grad_norm": 0.37226957082748413, "learning_rate": 2.3689717877128815e-05, "loss": 1.7788, "step": 5918 }, { "epoch": 2.714138286893705, "grad_norm": 0.41915664076805115, "learning_rate": 2.3614453053668716e-05, "loss": 1.508, "step": 5919 }, { "epoch": 2.7145969498910674, "grad_norm": 0.30586564540863037, "learning_rate": 2.3539305091018038e-05, "loss": 1.7762, "step": 5920 }, { "epoch": 2.7150556128884302, "grad_norm": 0.4077855944633484, "learning_rate": 2.3464274007611364e-05, "loss": 1.6046, "step": 5921 }, { "epoch": 2.715514275885793, "grad_norm": 4.286491870880127, "learning_rate": 2.338935982185425e-05, "loss": 1.3848, "step": 5922 }, { "epoch": 2.7159729388831555, "grad_norm": 0.2541790306568146, "learning_rate": 2.3314562552123663e-05, "loss": 1.7035, "step": 5923 }, { "epoch": 2.7164316018805184, "grad_norm": 0.4110478162765503, "learning_rate": 2.3239882216768093e-05, "loss": 2.1335, "step": 5924 }, { "epoch": 2.7168902648778808, "grad_norm": 0.4424358606338501, "learning_rate": 2.3165318834106942e-05, "loss": 1.9265, "step": 5925 }, { "epoch": 2.7173489278752436, "grad_norm": 0.3610834777355194, "learning_rate": 2.309087242243124e-05, "loss": 1.1285, "step": 5926 }, { "epoch": 2.7178075908726065, "grad_norm": 0.2572648525238037, "learning_rate": 2.3016543000003222e-05, "loss": 0.8428, "step": 5927 }, { "epoch": 2.718266253869969, "grad_norm": 0.36045536398887634, "learning_rate": 2.2942330585056347e-05, "loss": 1.8323, "step": 5928 }, { "epoch": 2.7187249168673318, "grad_norm": 0.21581125259399414, "learning_rate": 2.2868235195795672e-05, "loss": 1.3904, "step": 5929 }, { "epoch": 2.719183579864694, "grad_norm": 0.38794851303100586, "learning_rate": 2.279425685039721e-05, "loss": 1.0128, "step": 5930 }, { "epoch": 2.719642242862057, "grad_norm": 0.3242373466491699, "learning_rate": 2.2720395567008334e-05, "loss": 1.4001, "step": 5931 }, { "epoch": 2.72010090585942, "grad_norm": 0.130849227309227, "learning_rate": 2.2646651363747773e-05, "loss": 1.1965, "step": 5932 }, { "epoch": 2.7205595688567827, "grad_norm": 0.3577413856983185, "learning_rate": 2.2573024258705554e-05, "loss": 0.9914, "step": 5933 }, { "epoch": 2.721018231854145, "grad_norm": 0.2112027406692505, "learning_rate": 2.24995142699429e-05, "loss": 1.7675, "step": 5934 }, { "epoch": 2.721476894851508, "grad_norm": 0.28808677196502686, "learning_rate": 2.242612141549233e-05, "loss": 0.3857, "step": 5935 }, { "epoch": 2.7219355578488704, "grad_norm": 0.2923356294631958, "learning_rate": 2.2352845713357772e-05, "loss": 1.5071, "step": 5936 }, { "epoch": 2.7223942208462333, "grad_norm": 0.24712756276130676, "learning_rate": 2.2279687181514076e-05, "loss": 1.0919, "step": 5937 }, { "epoch": 2.722852883843596, "grad_norm": 0.3377127945423126, "learning_rate": 2.2206645837907602e-05, "loss": 1.8531, "step": 5938 }, { "epoch": 2.7233115468409586, "grad_norm": 0.5375214219093323, "learning_rate": 2.2133721700456022e-05, "loss": 1.3341, "step": 5939 }, { "epoch": 2.7237702098383214, "grad_norm": 0.37174880504608154, "learning_rate": 2.2060914787048136e-05, "loss": 1.8732, "step": 5940 }, { "epoch": 2.724228872835684, "grad_norm": 0.319526344537735, "learning_rate": 2.198822511554399e-05, "loss": 0.9168, "step": 5941 }, { "epoch": 2.7246875358330467, "grad_norm": 0.32301315665245056, "learning_rate": 2.1915652703774824e-05, "loss": 1.5364, "step": 5942 }, { "epoch": 2.7251461988304095, "grad_norm": 0.2971673309803009, "learning_rate": 2.184319756954323e-05, "loss": 0.6956, "step": 5943 }, { "epoch": 2.725604861827772, "grad_norm": 0.1800968199968338, "learning_rate": 2.177085973062293e-05, "loss": 0.9738, "step": 5944 }, { "epoch": 2.726063524825135, "grad_norm": 0.3155420124530792, "learning_rate": 2.1698639204759006e-05, "loss": 1.2188, "step": 5945 }, { "epoch": 2.7265221878224972, "grad_norm": 0.2110133171081543, "learning_rate": 2.1626536009667575e-05, "loss": 1.0003, "step": 5946 }, { "epoch": 2.72698085081986, "grad_norm": 0.45827966928482056, "learning_rate": 2.1554550163036145e-05, "loss": 1.7184, "step": 5947 }, { "epoch": 2.727439513817223, "grad_norm": 0.25445568561553955, "learning_rate": 2.1482681682523263e-05, "loss": 0.6922, "step": 5948 }, { "epoch": 2.7278981768145854, "grad_norm": 0.3437955379486084, "learning_rate": 2.141093058575888e-05, "loss": 1.2655, "step": 5949 }, { "epoch": 2.728356839811948, "grad_norm": 0.12270909547805786, "learning_rate": 2.133929689034403e-05, "loss": 0.6745, "step": 5950 }, { "epoch": 2.7288155028093106, "grad_norm": 0.31163090467453003, "learning_rate": 2.1267780613850986e-05, "loss": 2.0465, "step": 5951 }, { "epoch": 2.7292741658066735, "grad_norm": 0.2993125021457672, "learning_rate": 2.119638177382327e-05, "loss": 1.076, "step": 5952 }, { "epoch": 2.7297328288040363, "grad_norm": 0.3309202790260315, "learning_rate": 2.1125100387775375e-05, "loss": 0.924, "step": 5953 }, { "epoch": 2.730191491801399, "grad_norm": 0.35655951499938965, "learning_rate": 2.105393647319326e-05, "loss": 1.8235, "step": 5954 }, { "epoch": 2.7306501547987616, "grad_norm": 0.3149603307247162, "learning_rate": 2.0982890047533898e-05, "loss": 1.5205, "step": 5955 }, { "epoch": 2.731108817796124, "grad_norm": 0.43384552001953125, "learning_rate": 2.0911961128225466e-05, "loss": 1.6622, "step": 5956 }, { "epoch": 2.731567480793487, "grad_norm": 0.30674198269844055, "learning_rate": 2.0841149732667375e-05, "loss": 1.2803, "step": 5957 }, { "epoch": 2.7320261437908497, "grad_norm": 0.2231689691543579, "learning_rate": 2.0770455878230178e-05, "loss": 0.6584, "step": 5958 }, { "epoch": 2.7324848067882126, "grad_norm": 0.23369106650352478, "learning_rate": 2.069987958225561e-05, "loss": 1.7311, "step": 5959 }, { "epoch": 2.732943469785575, "grad_norm": 0.4220520853996277, "learning_rate": 2.062942086205649e-05, "loss": 1.9563, "step": 5960 }, { "epoch": 2.733402132782938, "grad_norm": 0.4430352747440338, "learning_rate": 2.055907973491683e-05, "loss": 1.5979, "step": 5961 }, { "epoch": 2.7338607957803003, "grad_norm": 0.22902828454971313, "learning_rate": 2.0488856218091935e-05, "loss": 1.2991, "step": 5962 }, { "epoch": 2.734319458777663, "grad_norm": 0.2558428943157196, "learning_rate": 2.0418750328808024e-05, "loss": 1.2835, "step": 5963 }, { "epoch": 2.734778121775026, "grad_norm": 0.5350182056427002, "learning_rate": 2.034876208426267e-05, "loss": 0.9716, "step": 5964 }, { "epoch": 2.7352367847723884, "grad_norm": 0.13574962317943573, "learning_rate": 2.0278891501624375e-05, "loss": 1.4147, "step": 5965 }, { "epoch": 2.7356954477697513, "grad_norm": 0.3386705815792084, "learning_rate": 2.0209138598033026e-05, "loss": 1.1691, "step": 5966 }, { "epoch": 2.7361541107671137, "grad_norm": 0.34220030903816223, "learning_rate": 2.013950339059939e-05, "loss": 2.25, "step": 5967 }, { "epoch": 2.7366127737644765, "grad_norm": 0.47397106885910034, "learning_rate": 2.0069985896405574e-05, "loss": 1.6315, "step": 5968 }, { "epoch": 2.7370714367618394, "grad_norm": 0.3405340015888214, "learning_rate": 2.0000586132504662e-05, "loss": 1.6804, "step": 5969 }, { "epoch": 2.737530099759202, "grad_norm": 0.3932589292526245, "learning_rate": 1.993130411592098e-05, "loss": 1.1376, "step": 5970 }, { "epoch": 2.7379887627565647, "grad_norm": 0.3252299427986145, "learning_rate": 1.986213986364982e-05, "loss": 1.9539, "step": 5971 }, { "epoch": 2.738447425753927, "grad_norm": 0.38866326212882996, "learning_rate": 1.979309339265778e-05, "loss": 1.2502, "step": 5972 }, { "epoch": 2.73890608875129, "grad_norm": 0.2286403626203537, "learning_rate": 1.9724164719882367e-05, "loss": 1.0532, "step": 5973 }, { "epoch": 2.739364751748653, "grad_norm": 0.35474932193756104, "learning_rate": 1.9655353862232326e-05, "loss": 2.0171, "step": 5974 }, { "epoch": 2.739823414746015, "grad_norm": 0.3784157633781433, "learning_rate": 1.9586660836587554e-05, "loss": 1.5926, "step": 5975 }, { "epoch": 2.740282077743378, "grad_norm": 0.4135417640209198, "learning_rate": 1.9518085659798734e-05, "loss": 1.5249, "step": 5976 }, { "epoch": 2.7407407407407405, "grad_norm": 0.37199753522872925, "learning_rate": 1.9449628348687964e-05, "loss": 1.5783, "step": 5977 }, { "epoch": 2.7411994037381033, "grad_norm": 0.29575780034065247, "learning_rate": 1.9381288920048255e-05, "loss": 1.1942, "step": 5978 }, { "epoch": 2.741658066735466, "grad_norm": 0.2783096730709076, "learning_rate": 1.9313067390643866e-05, "loss": 1.7344, "step": 5979 }, { "epoch": 2.742116729732829, "grad_norm": 0.37723588943481445, "learning_rate": 1.9244963777209967e-05, "loss": 1.3625, "step": 5980 }, { "epoch": 2.7425753927301915, "grad_norm": 0.26153478026390076, "learning_rate": 1.917697809645291e-05, "loss": 0.8029, "step": 5981 }, { "epoch": 2.7430340557275543, "grad_norm": 0.28612053394317627, "learning_rate": 1.910911036505003e-05, "loss": 1.6521, "step": 5982 }, { "epoch": 2.7434927187249167, "grad_norm": 0.23514768481254578, "learning_rate": 1.9041360599649725e-05, "loss": 1.1054, "step": 5983 }, { "epoch": 2.7439513817222796, "grad_norm": 0.44542109966278076, "learning_rate": 1.8973728816871592e-05, "loss": 1.3016, "step": 5984 }, { "epoch": 2.7444100447196424, "grad_norm": 0.19931384921073914, "learning_rate": 1.8906215033306196e-05, "loss": 1.5551, "step": 5985 }, { "epoch": 2.744868707717005, "grad_norm": 0.2757704555988312, "learning_rate": 1.8838819265515117e-05, "loss": 1.2303, "step": 5986 }, { "epoch": 2.7453273707143677, "grad_norm": 0.39500927925109863, "learning_rate": 1.8771541530031023e-05, "loss": 1.5353, "step": 5987 }, { "epoch": 2.74578603371173, "grad_norm": 0.350881963968277, "learning_rate": 1.8704381843357598e-05, "loss": 1.248, "step": 5988 }, { "epoch": 2.746244696709093, "grad_norm": 0.30257681012153625, "learning_rate": 1.8637340221969613e-05, "loss": 0.8883, "step": 5989 }, { "epoch": 2.746703359706456, "grad_norm": 0.258948415517807, "learning_rate": 1.8570416682312908e-05, "loss": 1.0292, "step": 5990 }, { "epoch": 2.7471620227038183, "grad_norm": 0.16734470427036285, "learning_rate": 1.8503611240804186e-05, "loss": 1.1654, "step": 5991 }, { "epoch": 2.747620685701181, "grad_norm": 0.2848394811153412, "learning_rate": 1.8436923913831506e-05, "loss": 1.6372, "step": 5992 }, { "epoch": 2.7480793486985435, "grad_norm": 0.3137653172016144, "learning_rate": 1.8370354717753612e-05, "loss": 0.653, "step": 5993 }, { "epoch": 2.7485380116959064, "grad_norm": 0.21349652111530304, "learning_rate": 1.8303903668900446e-05, "loss": 1.3882, "step": 5994 }, { "epoch": 2.7489966746932692, "grad_norm": 0.3199220895767212, "learning_rate": 1.823757078357291e-05, "loss": 0.9641, "step": 5995 }, { "epoch": 2.7494553376906317, "grad_norm": 0.31056568026542664, "learning_rate": 1.8171356078042932e-05, "loss": 1.7107, "step": 5996 }, { "epoch": 2.7499140006879945, "grad_norm": 0.3857615888118744, "learning_rate": 1.8105259568553524e-05, "loss": 1.0108, "step": 5997 }, { "epoch": 2.750372663685357, "grad_norm": 0.2920821011066437, "learning_rate": 1.803928127131854e-05, "loss": 1.8806, "step": 5998 }, { "epoch": 2.75083132668272, "grad_norm": 0.28004190325737, "learning_rate": 1.797342120252299e-05, "loss": 0.4187, "step": 5999 }, { "epoch": 2.7512899896800826, "grad_norm": 0.26144489645957947, "learning_rate": 1.7907679378322716e-05, "loss": 1.446, "step": 6000 }, { "epoch": 2.7517486526774455, "grad_norm": 0.2627753019332886, "learning_rate": 1.7842055814844828e-05, "loss": 1.212, "step": 6001 }, { "epoch": 2.752207315674808, "grad_norm": 0.3469284772872925, "learning_rate": 1.777655052818722e-05, "loss": 0.7107, "step": 6002 }, { "epoch": 2.7526659786721708, "grad_norm": 0.06406933069229126, "learning_rate": 1.771116353441876e-05, "loss": 0.7278, "step": 6003 }, { "epoch": 2.753124641669533, "grad_norm": 0.2524787485599518, "learning_rate": 1.7645894849579403e-05, "loss": 0.8452, "step": 6004 }, { "epoch": 2.753583304666896, "grad_norm": 0.2804563343524933, "learning_rate": 1.7580744489679945e-05, "loss": 0.7473, "step": 6005 }, { "epoch": 2.754041967664259, "grad_norm": 0.2500944435596466, "learning_rate": 1.7515712470702272e-05, "loss": 1.0894, "step": 6006 }, { "epoch": 2.7545006306616213, "grad_norm": 0.2973553538322449, "learning_rate": 1.7450798808599234e-05, "loss": 1.3237, "step": 6007 }, { "epoch": 2.754959293658984, "grad_norm": 0.201811283826828, "learning_rate": 1.73860035192947e-05, "loss": 1.2607, "step": 6008 }, { "epoch": 2.7554179566563466, "grad_norm": 0.4674420952796936, "learning_rate": 1.7321326618683243e-05, "loss": 1.6664, "step": 6009 }, { "epoch": 2.7558766196537094, "grad_norm": 0.37687787413597107, "learning_rate": 1.7256768122630607e-05, "loss": 1.213, "step": 6010 }, { "epoch": 2.7563352826510723, "grad_norm": 0.20660851895809174, "learning_rate": 1.7192328046973572e-05, "loss": 0.5505, "step": 6011 }, { "epoch": 2.7567939456484347, "grad_norm": 0.573073148727417, "learning_rate": 1.712800640751966e-05, "loss": 1.4566, "step": 6012 }, { "epoch": 2.7572526086457976, "grad_norm": 0.20739376544952393, "learning_rate": 1.7063803220047524e-05, "loss": 1.18, "step": 6013 }, { "epoch": 2.75771127164316, "grad_norm": 0.3631076514720917, "learning_rate": 1.6999718500306626e-05, "loss": 1.2751, "step": 6014 }, { "epoch": 2.758169934640523, "grad_norm": 0.3393692076206207, "learning_rate": 1.6935752264017334e-05, "loss": 1.3281, "step": 6015 }, { "epoch": 2.7586285976378857, "grad_norm": 0.435544490814209, "learning_rate": 1.6871904526871096e-05, "loss": 1.7262, "step": 6016 }, { "epoch": 2.759087260635248, "grad_norm": 0.20737430453300476, "learning_rate": 1.680817530453016e-05, "loss": 0.6926, "step": 6017 }, { "epoch": 2.759545923632611, "grad_norm": 0.2829097807407379, "learning_rate": 1.674456461262791e-05, "loss": 1.6014, "step": 6018 }, { "epoch": 2.7600045866299734, "grad_norm": 0.3296710252761841, "learning_rate": 1.6681072466768367e-05, "loss": 2.1915, "step": 6019 }, { "epoch": 2.7604632496273362, "grad_norm": 0.3263249099254608, "learning_rate": 1.6617698882526623e-05, "loss": 1.3439, "step": 6020 }, { "epoch": 2.760921912624699, "grad_norm": 0.34194350242614746, "learning_rate": 1.6554443875448744e-05, "loss": 1.4912, "step": 6021 }, { "epoch": 2.761380575622062, "grad_norm": 0.36899879574775696, "learning_rate": 1.6491307461051595e-05, "loss": 1.7578, "step": 6022 }, { "epoch": 2.7618392386194244, "grad_norm": 0.3341805934906006, "learning_rate": 1.6428289654823014e-05, "loss": 1.1788, "step": 6023 }, { "epoch": 2.762297901616787, "grad_norm": 0.32973915338516235, "learning_rate": 1.6365390472221742e-05, "loss": 1.2469, "step": 6024 }, { "epoch": 2.7627565646141496, "grad_norm": 0.463471919298172, "learning_rate": 1.6302609928677382e-05, "loss": 1.6849, "step": 6025 }, { "epoch": 2.7632152276115125, "grad_norm": 0.40361595153808594, "learning_rate": 1.6239948039590393e-05, "loss": 1.0661, "step": 6026 }, { "epoch": 2.7636738906088754, "grad_norm": 0.2925053834915161, "learning_rate": 1.6177404820332253e-05, "loss": 1.342, "step": 6027 }, { "epoch": 2.7641325536062378, "grad_norm": 0.30632370710372925, "learning_rate": 1.611498028624525e-05, "loss": 1.151, "step": 6028 }, { "epoch": 2.7645912166036006, "grad_norm": 0.22111092507839203, "learning_rate": 1.605267445264258e-05, "loss": 0.4576, "step": 6029 }, { "epoch": 2.765049879600963, "grad_norm": 0.14749830961227417, "learning_rate": 1.5990487334808292e-05, "loss": 1.0352, "step": 6030 }, { "epoch": 2.765508542598326, "grad_norm": 0.2582720220088959, "learning_rate": 1.59284189479974e-05, "loss": 1.6917, "step": 6031 }, { "epoch": 2.7659672055956888, "grad_norm": 0.41496822237968445, "learning_rate": 1.5866469307435626e-05, "loss": 1.5566, "step": 6032 }, { "epoch": 2.766425868593051, "grad_norm": 0.3769316077232361, "learning_rate": 1.5804638428319694e-05, "loss": 1.65, "step": 6033 }, { "epoch": 2.766884531590414, "grad_norm": 0.23421497642993927, "learning_rate": 1.5742926325817253e-05, "loss": 1.1492, "step": 6034 }, { "epoch": 2.7673431945877764, "grad_norm": 0.33073118329048157, "learning_rate": 1.5681333015066635e-05, "loss": 1.5674, "step": 6035 }, { "epoch": 2.7678018575851393, "grad_norm": 0.48587143421173096, "learning_rate": 1.56198585111772e-05, "loss": 0.745, "step": 6036 }, { "epoch": 2.768260520582502, "grad_norm": 0.20380260050296783, "learning_rate": 1.5558502829228937e-05, "loss": 0.9425, "step": 6037 }, { "epoch": 2.7687191835798646, "grad_norm": 0.2204909473657608, "learning_rate": 1.549726598427298e-05, "loss": 1.633, "step": 6038 }, { "epoch": 2.7691778465772274, "grad_norm": 0.34915247559547424, "learning_rate": 1.5436147991331083e-05, "loss": 0.5378, "step": 6039 }, { "epoch": 2.76963650957459, "grad_norm": 0.24728168547153473, "learning_rate": 1.5375148865396038e-05, "loss": 1.2788, "step": 6040 }, { "epoch": 2.7700951725719527, "grad_norm": 0.24167189002037048, "learning_rate": 1.531426862143126e-05, "loss": 1.3767, "step": 6041 }, { "epoch": 2.7705538355693156, "grad_norm": 0.3961116075515747, "learning_rate": 1.5253507274371137e-05, "loss": 1.0845, "step": 6042 }, { "epoch": 2.771012498566678, "grad_norm": 0.28995588421821594, "learning_rate": 1.5192864839120912e-05, "loss": 1.2602, "step": 6043 }, { "epoch": 2.771471161564041, "grad_norm": 0.2699783742427826, "learning_rate": 1.5132341330556576e-05, "loss": 0.9651, "step": 6044 }, { "epoch": 2.7719298245614032, "grad_norm": 0.2684682011604309, "learning_rate": 1.5071936763524974e-05, "loss": 1.4155, "step": 6045 }, { "epoch": 2.772388487558766, "grad_norm": 0.27157121896743774, "learning_rate": 1.5011651152843809e-05, "loss": 1.6052, "step": 6046 }, { "epoch": 2.772847150556129, "grad_norm": 0.3431642949581146, "learning_rate": 1.4951484513301583e-05, "loss": 1.454, "step": 6047 }, { "epoch": 2.773305813553492, "grad_norm": 0.32993775606155396, "learning_rate": 1.4891436859657604e-05, "loss": 0.9063, "step": 6048 }, { "epoch": 2.7737644765508542, "grad_norm": 0.28404250741004944, "learning_rate": 1.483150820664192e-05, "loss": 1.5258, "step": 6049 }, { "epoch": 2.774223139548217, "grad_norm": 0.4539187252521515, "learning_rate": 1.477169856895555e-05, "loss": 1.155, "step": 6050 }, { "epoch": 2.7746818025455795, "grad_norm": 0.20708614587783813, "learning_rate": 1.4712007961270146e-05, "loss": 0.9088, "step": 6051 }, { "epoch": 2.7751404655429424, "grad_norm": 0.09351902455091476, "learning_rate": 1.4652436398228385e-05, "loss": 0.8234, "step": 6052 }, { "epoch": 2.775599128540305, "grad_norm": 0.2884185016155243, "learning_rate": 1.4592983894443468e-05, "loss": 1.8303, "step": 6053 }, { "epoch": 2.7760577915376676, "grad_norm": 0.3106091618537903, "learning_rate": 1.4533650464499559e-05, "loss": 1.0048, "step": 6054 }, { "epoch": 2.7765164545350305, "grad_norm": 0.42278361320495605, "learning_rate": 1.4474436122951572e-05, "loss": 1.63, "step": 6055 }, { "epoch": 2.776975117532393, "grad_norm": 0.37238556146621704, "learning_rate": 1.4415340884325223e-05, "loss": 1.6514, "step": 6056 }, { "epoch": 2.7774337805297558, "grad_norm": 0.28060218691825867, "learning_rate": 1.4356364763117024e-05, "loss": 0.8222, "step": 6057 }, { "epoch": 2.7778924435271186, "grad_norm": 0.27165743708610535, "learning_rate": 1.4297507773794239e-05, "loss": 0.7359, "step": 6058 }, { "epoch": 2.778351106524481, "grad_norm": 0.22083696722984314, "learning_rate": 1.4238769930794926e-05, "loss": 1.2175, "step": 6059 }, { "epoch": 2.778809769521844, "grad_norm": 0.3628489375114441, "learning_rate": 1.4180151248527784e-05, "loss": 2.0398, "step": 6060 }, { "epoch": 2.7792684325192063, "grad_norm": 0.36967116594314575, "learning_rate": 1.4121651741372533e-05, "loss": 1.1404, "step": 6061 }, { "epoch": 2.779727095516569, "grad_norm": 0.30434438586235046, "learning_rate": 1.4063271423679469e-05, "loss": 1.418, "step": 6062 }, { "epoch": 2.780185758513932, "grad_norm": 0.34148743748664856, "learning_rate": 1.4005010309769638e-05, "loss": 1.3456, "step": 6063 }, { "epoch": 2.7806444215112944, "grad_norm": 0.2640988528728485, "learning_rate": 1.3946868413935055e-05, "loss": 1.0164, "step": 6064 }, { "epoch": 2.7811030845086573, "grad_norm": 0.2465277761220932, "learning_rate": 1.3888845750438306e-05, "loss": 0.7947, "step": 6065 }, { "epoch": 2.7815617475060197, "grad_norm": 0.35097736120224, "learning_rate": 1.3830942333512675e-05, "loss": 1.9631, "step": 6066 }, { "epoch": 2.7820204105033826, "grad_norm": 0.409355103969574, "learning_rate": 1.3773158177362356e-05, "loss": 1.48, "step": 6067 }, { "epoch": 2.7824790735007454, "grad_norm": 0.324785441160202, "learning_rate": 1.371549329616223e-05, "loss": 1.4276, "step": 6068 }, { "epoch": 2.7829377364981083, "grad_norm": 0.372164249420166, "learning_rate": 1.3657947704057872e-05, "loss": 1.144, "step": 6069 }, { "epoch": 2.7833963994954707, "grad_norm": 0.160455122590065, "learning_rate": 1.3600521415165712e-05, "loss": 1.3252, "step": 6070 }, { "epoch": 2.7838550624928335, "grad_norm": 0.3352322280406952, "learning_rate": 1.354321444357276e-05, "loss": 1.2853, "step": 6071 }, { "epoch": 2.784313725490196, "grad_norm": 0.2882045805454254, "learning_rate": 1.3486026803336715e-05, "loss": 1.1088, "step": 6072 }, { "epoch": 2.784772388487559, "grad_norm": 0.28998827934265137, "learning_rate": 1.3428958508486355e-05, "loss": 1.6669, "step": 6073 }, { "epoch": 2.7852310514849217, "grad_norm": 0.6466904282569885, "learning_rate": 1.3372009573020816e-05, "loss": 1.6118, "step": 6074 }, { "epoch": 2.785689714482284, "grad_norm": 0.3447767496109009, "learning_rate": 1.3315180010910145e-05, "loss": 1.3989, "step": 6075 }, { "epoch": 2.786148377479647, "grad_norm": 0.24244464933872223, "learning_rate": 1.3258469836094911e-05, "loss": 0.4131, "step": 6076 }, { "epoch": 2.7866070404770094, "grad_norm": 0.264919251203537, "learning_rate": 1.3201879062486655e-05, "loss": 1.6393, "step": 6077 }, { "epoch": 2.787065703474372, "grad_norm": 0.266245037317276, "learning_rate": 1.3145407703967438e-05, "loss": 1.474, "step": 6078 }, { "epoch": 2.787524366471735, "grad_norm": 0.3023591935634613, "learning_rate": 1.3089055774390124e-05, "loss": 1.1628, "step": 6079 }, { "epoch": 2.7879830294690975, "grad_norm": 0.28299427032470703, "learning_rate": 1.3032823287578266e-05, "loss": 1.1443, "step": 6080 }, { "epoch": 2.7884416924664603, "grad_norm": 0.30756518244743347, "learning_rate": 1.2976710257326053e-05, "loss": 1.1689, "step": 6081 }, { "epoch": 2.7889003554638228, "grad_norm": 0.2923178970813751, "learning_rate": 1.2920716697398416e-05, "loss": 1.3088, "step": 6082 }, { "epoch": 2.7893590184611856, "grad_norm": 0.3415941894054413, "learning_rate": 1.2864842621530982e-05, "loss": 1.1528, "step": 6083 }, { "epoch": 2.7898176814585485, "grad_norm": 0.37786003947257996, "learning_rate": 1.2809088043430116e-05, "loss": 1.2742, "step": 6084 }, { "epoch": 2.790276344455911, "grad_norm": 0.27256593108177185, "learning_rate": 1.2753452976772773e-05, "loss": 1.4647, "step": 6085 }, { "epoch": 2.7907350074532737, "grad_norm": 0.3987545073032379, "learning_rate": 1.2697937435206642e-05, "loss": 1.9492, "step": 6086 }, { "epoch": 2.791193670450636, "grad_norm": 0.36392930150032043, "learning_rate": 1.2642541432350108e-05, "loss": 1.2538, "step": 6087 }, { "epoch": 2.791652333447999, "grad_norm": 0.31756749749183655, "learning_rate": 1.258726498179219e-05, "loss": 0.9692, "step": 6088 }, { "epoch": 2.792110996445362, "grad_norm": 0.20181064307689667, "learning_rate": 1.2532108097092598e-05, "loss": 0.8022, "step": 6089 }, { "epoch": 2.7925696594427247, "grad_norm": 0.1365479826927185, "learning_rate": 1.2477070791781675e-05, "loss": 0.9823, "step": 6090 }, { "epoch": 2.793028322440087, "grad_norm": 0.20917746424674988, "learning_rate": 1.242215307936051e-05, "loss": 0.7463, "step": 6091 }, { "epoch": 2.79348698543745, "grad_norm": 0.3799673318862915, "learning_rate": 1.2367354973300881e-05, "loss": 1.5071, "step": 6092 }, { "epoch": 2.7939456484348124, "grad_norm": 0.3432566523551941, "learning_rate": 1.2312676487045038e-05, "loss": 1.736, "step": 6093 }, { "epoch": 2.7944043114321753, "grad_norm": 0.33574342727661133, "learning_rate": 1.2258117634006028e-05, "loss": 0.94, "step": 6094 }, { "epoch": 2.794862974429538, "grad_norm": 0.35387691855430603, "learning_rate": 1.2203678427567588e-05, "loss": 1.8061, "step": 6095 }, { "epoch": 2.7953216374269005, "grad_norm": 0.2692587971687317, "learning_rate": 1.2149358881084039e-05, "loss": 1.1821, "step": 6096 }, { "epoch": 2.7957803004242634, "grad_norm": 0.36804336309432983, "learning_rate": 1.2095159007880385e-05, "loss": 1.5841, "step": 6097 }, { "epoch": 2.796238963421626, "grad_norm": 0.36808767914772034, "learning_rate": 1.204107882125216e-05, "loss": 1.8313, "step": 6098 }, { "epoch": 2.7966976264189887, "grad_norm": 0.33622780442237854, "learning_rate": 1.1987118334465696e-05, "loss": 1.2008, "step": 6099 }, { "epoch": 2.7971562894163515, "grad_norm": 0.27367860078811646, "learning_rate": 1.1933277560757793e-05, "loss": 1.3219, "step": 6100 }, { "epoch": 2.797614952413714, "grad_norm": 0.28815925121307373, "learning_rate": 1.187955651333611e-05, "loss": 0.7678, "step": 6101 }, { "epoch": 2.798073615411077, "grad_norm": 0.27640852332115173, "learning_rate": 1.1825955205378713e-05, "loss": 1.75, "step": 6102 }, { "epoch": 2.798532278408439, "grad_norm": 0.32954832911491394, "learning_rate": 1.1772473650034421e-05, "loss": 1.0438, "step": 6103 }, { "epoch": 2.798990941405802, "grad_norm": 0.3560253381729126, "learning_rate": 1.1719111860422627e-05, "loss": 1.1265, "step": 6104 }, { "epoch": 2.799449604403165, "grad_norm": 0.11905957758426666, "learning_rate": 1.1665869849633414e-05, "loss": 1.2286, "step": 6105 }, { "epoch": 2.7999082674005273, "grad_norm": 0.2836627960205078, "learning_rate": 1.1612747630727394e-05, "loss": 1.1157, "step": 6106 }, { "epoch": 2.80036693039789, "grad_norm": 0.33813512325286865, "learning_rate": 1.1559745216735806e-05, "loss": 0.8221, "step": 6107 }, { "epoch": 2.8008255933952526, "grad_norm": 0.24317127466201782, "learning_rate": 1.1506862620660586e-05, "loss": 1.5408, "step": 6108 }, { "epoch": 2.8012842563926155, "grad_norm": 0.33810973167419434, "learning_rate": 1.1454099855474242e-05, "loss": 1.2968, "step": 6109 }, { "epoch": 2.8017429193899783, "grad_norm": 0.31549063324928284, "learning_rate": 1.1401456934119703e-05, "loss": 1.26, "step": 6110 }, { "epoch": 2.8022015823873407, "grad_norm": 0.2274772822856903, "learning_rate": 1.1348933869510802e-05, "loss": 0.9997, "step": 6111 }, { "epoch": 2.8026602453847036, "grad_norm": 0.3890027403831482, "learning_rate": 1.1296530674531735e-05, "loss": 1.1818, "step": 6112 }, { "epoch": 2.803118908382066, "grad_norm": 0.2179887443780899, "learning_rate": 1.1244247362037496e-05, "loss": 1.7811, "step": 6113 }, { "epoch": 2.803577571379429, "grad_norm": 0.3312167227268219, "learning_rate": 1.1192083944853438e-05, "loss": 1.1515, "step": 6114 }, { "epoch": 2.8040362343767917, "grad_norm": 0.24871665239334106, "learning_rate": 1.1140040435775655e-05, "loss": 1.3264, "step": 6115 }, { "epoch": 2.8044948973741546, "grad_norm": 0.30252861976623535, "learning_rate": 1.1088116847570885e-05, "loss": 1.4769, "step": 6116 }, { "epoch": 2.804953560371517, "grad_norm": 0.31379151344299316, "learning_rate": 1.1036313192976266e-05, "loss": 1.2689, "step": 6117 }, { "epoch": 2.80541222336888, "grad_norm": 0.30472010374069214, "learning_rate": 1.0984629484699582e-05, "loss": 0.8448, "step": 6118 }, { "epoch": 2.8058708863662423, "grad_norm": 0.3143567144870758, "learning_rate": 1.09330657354193e-05, "loss": 1.4705, "step": 6119 }, { "epoch": 2.806329549363605, "grad_norm": 0.34770259261131287, "learning_rate": 1.0881621957784416e-05, "loss": 1.1561, "step": 6120 }, { "epoch": 2.806788212360968, "grad_norm": 0.09801855683326721, "learning_rate": 1.0830298164414331e-05, "loss": 0.8337, "step": 6121 }, { "epoch": 2.8072468753583304, "grad_norm": 0.2806740701198578, "learning_rate": 1.0779094367899201e-05, "loss": 1.6565, "step": 6122 }, { "epoch": 2.8077055383556933, "grad_norm": 0.7073934674263, "learning_rate": 1.0728010580799696e-05, "loss": 1.5767, "step": 6123 }, { "epoch": 2.8081642013530557, "grad_norm": 0.32200196385383606, "learning_rate": 1.0677046815647018e-05, "loss": 0.7768, "step": 6124 }, { "epoch": 2.8086228643504185, "grad_norm": 0.08174686133861542, "learning_rate": 1.0626203084942886e-05, "loss": 1.385, "step": 6125 }, { "epoch": 2.8090815273477814, "grad_norm": 0.3870648741722107, "learning_rate": 1.0575479401159827e-05, "loss": 1.4463, "step": 6126 }, { "epoch": 2.809540190345144, "grad_norm": 0.289341539144516, "learning_rate": 1.052487577674055e-05, "loss": 1.167, "step": 6127 }, { "epoch": 2.8099988533425067, "grad_norm": 0.31475889682769775, "learning_rate": 1.0474392224098572e-05, "loss": 0.841, "step": 6128 }, { "epoch": 2.810457516339869, "grad_norm": 0.29922276735305786, "learning_rate": 1.042402875561782e-05, "loss": 2.0278, "step": 6129 }, { "epoch": 2.810916179337232, "grad_norm": 0.4082167148590088, "learning_rate": 1.0373785383652856e-05, "loss": 1.6776, "step": 6130 }, { "epoch": 2.811374842334595, "grad_norm": 0.3589903712272644, "learning_rate": 1.0323662120528765e-05, "loss": 1.1015, "step": 6131 }, { "epoch": 2.811833505331957, "grad_norm": 0.24516572058200836, "learning_rate": 1.0273658978541044e-05, "loss": 1.232, "step": 6132 }, { "epoch": 2.81229216832932, "grad_norm": 0.21401365101337433, "learning_rate": 1.0223775969955883e-05, "loss": 1.2669, "step": 6133 }, { "epoch": 2.8127508313266825, "grad_norm": 0.4826924800872803, "learning_rate": 1.0174013107009938e-05, "loss": 1.4506, "step": 6134 }, { "epoch": 2.8132094943240453, "grad_norm": 0.5444715023040771, "learning_rate": 1.0124370401910388e-05, "loss": 1.0889, "step": 6135 }, { "epoch": 2.813668157321408, "grad_norm": 0.32295656204223633, "learning_rate": 1.0074847866834991e-05, "loss": 1.4926, "step": 6136 }, { "epoch": 2.814126820318771, "grad_norm": 0.3280322253704071, "learning_rate": 1.002544551393203e-05, "loss": 1.8701, "step": 6137 }, { "epoch": 2.8145854833161335, "grad_norm": 0.3465189039707184, "learning_rate": 9.976163355320089e-06, "loss": 1.6473, "step": 6138 }, { "epoch": 2.8150441463134963, "grad_norm": 0.406650573015213, "learning_rate": 9.92700140308861e-06, "loss": 1.2048, "step": 6139 }, { "epoch": 2.8155028093108587, "grad_norm": 0.32793253660202026, "learning_rate": 9.87795966929722e-06, "loss": 1.0226, "step": 6140 }, { "epoch": 2.8159614723082216, "grad_norm": 0.24950462579727173, "learning_rate": 9.8290381659763e-06, "loss": 1.614, "step": 6141 }, { "epoch": 2.8164201353055844, "grad_norm": 0.27943155169487, "learning_rate": 9.780236905126694e-06, "loss": 1.1877, "step": 6142 }, { "epoch": 2.816878798302947, "grad_norm": 0.33849433064460754, "learning_rate": 9.731555898719601e-06, "loss": 1.5759, "step": 6143 }, { "epoch": 2.8173374613003097, "grad_norm": 0.28267404437065125, "learning_rate": 9.682995158696806e-06, "loss": 0.7854, "step": 6144 }, { "epoch": 2.817796124297672, "grad_norm": 0.28812381625175476, "learning_rate": 9.63455469697072e-06, "loss": 1.5585, "step": 6145 }, { "epoch": 2.818254787295035, "grad_norm": 0.3284319341182709, "learning_rate": 9.58623452542412e-06, "loss": 1.6185, "step": 6146 }, { "epoch": 2.818713450292398, "grad_norm": 0.391230046749115, "learning_rate": 9.538034655910189e-06, "loss": 1.822, "step": 6147 }, { "epoch": 2.8191721132897603, "grad_norm": 0.38372287154197693, "learning_rate": 9.489955100252855e-06, "loss": 1.2379, "step": 6148 }, { "epoch": 2.819630776287123, "grad_norm": 0.37333911657333374, "learning_rate": 9.441995870246244e-06, "loss": 1.638, "step": 6149 }, { "epoch": 2.8200894392844855, "grad_norm": 0.33912524580955505, "learning_rate": 9.394156977655165e-06, "loss": 1.0467, "step": 6150 }, { "epoch": 2.8205481022818484, "grad_norm": 0.24765615165233612, "learning_rate": 9.346438434214843e-06, "loss": 1.575, "step": 6151 }, { "epoch": 2.8210067652792112, "grad_norm": 0.6433941125869751, "learning_rate": 9.298840251630913e-06, "loss": 1.5814, "step": 6152 }, { "epoch": 2.8214654282765737, "grad_norm": 0.2799464464187622, "learning_rate": 9.251362441579646e-06, "loss": 1.0583, "step": 6153 }, { "epoch": 2.8219240912739365, "grad_norm": 0.21018120646476746, "learning_rate": 9.204005015707673e-06, "loss": 1.2, "step": 6154 }, { "epoch": 2.822382754271299, "grad_norm": 0.2773264944553375, "learning_rate": 9.15676798563203e-06, "loss": 1.0197, "step": 6155 }, { "epoch": 2.822841417268662, "grad_norm": 0.34957289695739746, "learning_rate": 9.109651362940397e-06, "loss": 1.3116, "step": 6156 }, { "epoch": 2.8233000802660246, "grad_norm": 0.3279551863670349, "learning_rate": 9.062655159190802e-06, "loss": 2.0532, "step": 6157 }, { "epoch": 2.8237587432633875, "grad_norm": 0.4215746521949768, "learning_rate": 9.015779385911748e-06, "loss": 1.4172, "step": 6158 }, { "epoch": 2.82421740626075, "grad_norm": 0.41953399777412415, "learning_rate": 8.969024054602204e-06, "loss": 1.0516, "step": 6159 }, { "epoch": 2.8246760692581128, "grad_norm": 0.08408641070127487, "learning_rate": 8.922389176731549e-06, "loss": 1.189, "step": 6160 }, { "epoch": 2.825134732255475, "grad_norm": 0.3760605752468109, "learning_rate": 8.875874763739633e-06, "loss": 1.4748, "step": 6161 }, { "epoch": 2.825593395252838, "grad_norm": 0.21087396144866943, "learning_rate": 8.829480827036884e-06, "loss": 0.7344, "step": 6162 }, { "epoch": 2.826052058250201, "grad_norm": 0.19473138451576233, "learning_rate": 8.783207378003977e-06, "loss": 0.9306, "step": 6163 }, { "epoch": 2.8265107212475633, "grad_norm": 0.3104762136936188, "learning_rate": 8.737054427992164e-06, "loss": 1.9001, "step": 6164 }, { "epoch": 2.826969384244926, "grad_norm": 0.4818089008331299, "learning_rate": 8.691021988323111e-06, "loss": 1.6207, "step": 6165 }, { "epoch": 2.8274280472422886, "grad_norm": 0.31389549374580383, "learning_rate": 8.645110070288897e-06, "loss": 1.3694, "step": 6166 }, { "epoch": 2.8278867102396514, "grad_norm": 0.352982759475708, "learning_rate": 8.599318685152014e-06, "loss": 1.1738, "step": 6167 }, { "epoch": 2.8283453732370143, "grad_norm": 0.20497311651706696, "learning_rate": 8.553647844145418e-06, "loss": 0.7585, "step": 6168 }, { "epoch": 2.8288040362343767, "grad_norm": 0.30926766991615295, "learning_rate": 8.508097558472538e-06, "loss": 1.0177, "step": 6169 }, { "epoch": 2.8292626992317396, "grad_norm": 0.3021996319293976, "learning_rate": 8.462667839307159e-06, "loss": 1.1897, "step": 6170 }, { "epoch": 2.829721362229102, "grad_norm": 0.2463819533586502, "learning_rate": 8.417358697793587e-06, "loss": 1.6089, "step": 6171 }, { "epoch": 2.830180025226465, "grad_norm": 0.3797639012336731, "learning_rate": 8.372170145046376e-06, "loss": 1.718, "step": 6172 }, { "epoch": 2.8306386882238277, "grad_norm": 0.26537811756134033, "learning_rate": 8.327102192150604e-06, "loss": 1.5723, "step": 6173 }, { "epoch": 2.83109735122119, "grad_norm": 0.26074454188346863, "learning_rate": 8.282154850161871e-06, "loss": 0.9977, "step": 6174 }, { "epoch": 2.831556014218553, "grad_norm": 0.29502686858177185, "learning_rate": 8.23732813010597e-06, "loss": 1.4637, "step": 6175 }, { "epoch": 2.8320146772159154, "grad_norm": 0.42862004041671753, "learning_rate": 8.192622042979325e-06, "loss": 1.2069, "step": 6176 }, { "epoch": 2.8324733402132782, "grad_norm": 0.197200745344162, "learning_rate": 8.148036599748554e-06, "loss": 1.193, "step": 6177 }, { "epoch": 2.832932003210641, "grad_norm": 0.34668564796447754, "learning_rate": 8.103571811350851e-06, "loss": 1.3786, "step": 6178 }, { "epoch": 2.8333906662080035, "grad_norm": 0.22151388227939606, "learning_rate": 8.059227688693771e-06, "loss": 1.4785, "step": 6179 }, { "epoch": 2.8338493292053664, "grad_norm": 0.38517090678215027, "learning_rate": 8.015004242655222e-06, "loss": 1.0836, "step": 6180 }, { "epoch": 2.8343079922027288, "grad_norm": 0.40751755237579346, "learning_rate": 7.970901484083471e-06, "loss": 2.0908, "step": 6181 }, { "epoch": 2.8347666552000916, "grad_norm": 0.32812926173210144, "learning_rate": 7.926919423797362e-06, "loss": 0.83, "step": 6182 }, { "epoch": 2.8352253181974545, "grad_norm": 0.36086252331733704, "learning_rate": 7.883058072585935e-06, "loss": 1.5337, "step": 6183 }, { "epoch": 2.8356839811948173, "grad_norm": 0.3034031391143799, "learning_rate": 7.839317441208693e-06, "loss": 1.334, "step": 6184 }, { "epoch": 2.8361426441921798, "grad_norm": 0.16254828870296478, "learning_rate": 7.795697540395552e-06, "loss": 0.5699, "step": 6185 }, { "epoch": 2.8366013071895426, "grad_norm": 0.30455073714256287, "learning_rate": 7.752198380846787e-06, "loss": 1.2536, "step": 6186 }, { "epoch": 2.837059970186905, "grad_norm": 0.21652016043663025, "learning_rate": 7.708819973233028e-06, "loss": 1.4794, "step": 6187 }, { "epoch": 2.837518633184268, "grad_norm": 0.2286727875471115, "learning_rate": 7.665562328195375e-06, "loss": 0.6623, "step": 6188 }, { "epoch": 2.8379772961816307, "grad_norm": 0.31880277395248413, "learning_rate": 7.622425456345172e-06, "loss": 1.2148, "step": 6189 }, { "epoch": 2.838435959178993, "grad_norm": 0.2918204367160797, "learning_rate": 7.5794093682641785e-06, "loss": 1.5525, "step": 6190 }, { "epoch": 2.838894622176356, "grad_norm": 0.3420119881629944, "learning_rate": 7.536514074504675e-06, "loss": 1.3251, "step": 6191 }, { "epoch": 2.8393532851737184, "grad_norm": 0.3030669391155243, "learning_rate": 7.4937395855890765e-06, "loss": 1.1624, "step": 6192 }, { "epoch": 2.8398119481710813, "grad_norm": 0.22030462324619293, "learning_rate": 7.45108591201038e-06, "loss": 1.4805, "step": 6193 }, { "epoch": 2.840270611168444, "grad_norm": 0.2560308873653412, "learning_rate": 7.408553064231716e-06, "loss": 0.6927, "step": 6194 }, { "epoch": 2.8407292741658066, "grad_norm": 0.28317221999168396, "learning_rate": 7.366141052686737e-06, "loss": 1.3214, "step": 6195 }, { "epoch": 2.8411879371631694, "grad_norm": 0.29174163937568665, "learning_rate": 7.3238498877794544e-06, "loss": 1.5961, "step": 6196 }, { "epoch": 2.841646600160532, "grad_norm": 0.4265110492706299, "learning_rate": 7.281679579884126e-06, "loss": 1.8915, "step": 6197 }, { "epoch": 2.8421052631578947, "grad_norm": 0.36179837584495544, "learning_rate": 7.239630139345532e-06, "loss": 0.9835, "step": 6198 }, { "epoch": 2.8425639261552575, "grad_norm": 0.1963047832250595, "learning_rate": 7.197701576478699e-06, "loss": 1.0536, "step": 6199 }, { "epoch": 2.84302258915262, "grad_norm": 0.26563286781311035, "learning_rate": 7.1558939015689e-06, "loss": 1.058, "step": 6200 }, { "epoch": 2.843481252149983, "grad_norm": 0.295461505651474, "learning_rate": 7.114207124871874e-06, "loss": 1.3314, "step": 6201 }, { "epoch": 2.8439399151473452, "grad_norm": 0.287113219499588, "learning_rate": 7.072641256613777e-06, "loss": 0.7959, "step": 6202 }, { "epoch": 2.844398578144708, "grad_norm": 0.2837468385696411, "learning_rate": 7.031196306991005e-06, "loss": 1.5568, "step": 6203 }, { "epoch": 2.844857241142071, "grad_norm": 0.2658827602863312, "learning_rate": 6.989872286170262e-06, "loss": 0.6964, "step": 6204 }, { "epoch": 2.845315904139434, "grad_norm": 0.3154827654361725, "learning_rate": 6.948669204288604e-06, "loss": 1.7673, "step": 6205 }, { "epoch": 2.845774567136796, "grad_norm": 0.3644176423549652, "learning_rate": 6.907587071453447e-06, "loss": 0.5235, "step": 6206 }, { "epoch": 2.846233230134159, "grad_norm": 0.2641429603099823, "learning_rate": 6.866625897742562e-06, "loss": 1.2719, "step": 6207 }, { "epoch": 2.8466918931315215, "grad_norm": 0.34545841813087463, "learning_rate": 6.825785693204023e-06, "loss": 1.8203, "step": 6208 }, { "epoch": 2.8471505561288843, "grad_norm": 0.34669947624206543, "learning_rate": 6.78506646785626e-06, "loss": 1.2168, "step": 6209 }, { "epoch": 2.847609219126247, "grad_norm": 0.2993127405643463, "learning_rate": 6.744468231688006e-06, "loss": 1.5106, "step": 6210 }, { "epoch": 2.8480678821236096, "grad_norm": 0.36481523513793945, "learning_rate": 6.7039909946581825e-06, "loss": 0.9908, "step": 6211 }, { "epoch": 2.8485265451209725, "grad_norm": 0.24906761944293976, "learning_rate": 6.663634766696236e-06, "loss": 1.5602, "step": 6212 }, { "epoch": 2.848985208118335, "grad_norm": 0.3981691002845764, "learning_rate": 6.623399557701803e-06, "loss": 1.237, "step": 6213 }, { "epoch": 2.8494438711156977, "grad_norm": 0.25347137451171875, "learning_rate": 6.5832853775448784e-06, "loss": 1.5596, "step": 6214 }, { "epoch": 2.8499025341130606, "grad_norm": 0.33858397603034973, "learning_rate": 6.543292236065812e-06, "loss": 1.1393, "step": 6215 }, { "epoch": 2.850361197110423, "grad_norm": 0.31756460666656494, "learning_rate": 6.50342014307509e-06, "loss": 2.2029, "step": 6216 }, { "epoch": 2.850819860107786, "grad_norm": 0.4458785653114319, "learning_rate": 6.463669108353776e-06, "loss": 1.5674, "step": 6217 }, { "epoch": 2.8512785231051483, "grad_norm": 0.3402140438556671, "learning_rate": 6.42403914165296e-06, "loss": 1.3496, "step": 6218 }, { "epoch": 2.851737186102511, "grad_norm": 0.3040880858898163, "learning_rate": 6.384530252694254e-06, "loss": 1.3495, "step": 6219 }, { "epoch": 2.852195849099874, "grad_norm": 0.4113820493221283, "learning_rate": 6.345142451169405e-06, "loss": 1.2961, "step": 6220 }, { "epoch": 2.8526545120972364, "grad_norm": 0.3598925471305847, "learning_rate": 6.305875746740574e-06, "loss": 1.4615, "step": 6221 }, { "epoch": 2.8531131750945993, "grad_norm": 0.3074336051940918, "learning_rate": 6.266730149040112e-06, "loss": 1.2253, "step": 6222 }, { "epoch": 2.8535718380919617, "grad_norm": 0.3465123474597931, "learning_rate": 6.22770566767078e-06, "loss": 1.5122, "step": 6223 }, { "epoch": 2.8540305010893245, "grad_norm": 0.3549046218395233, "learning_rate": 6.188802312205477e-06, "loss": 1.9436, "step": 6224 }, { "epoch": 2.8544891640866874, "grad_norm": 0.30973848700523376, "learning_rate": 6.1500200921875695e-06, "loss": 0.8639, "step": 6225 }, { "epoch": 2.8549478270840503, "grad_norm": 0.21493889391422272, "learning_rate": 6.111359017130558e-06, "loss": 1.3644, "step": 6226 }, { "epoch": 2.8554064900814127, "grad_norm": 0.2825007140636444, "learning_rate": 6.072819096518301e-06, "loss": 1.3103, "step": 6227 }, { "epoch": 2.8558651530787755, "grad_norm": 0.3511542081832886, "learning_rate": 6.034400339804902e-06, "loss": 1.46, "step": 6228 }, { "epoch": 2.856323816076138, "grad_norm": 0.27357274293899536, "learning_rate": 5.996102756414823e-06, "loss": 0.7502, "step": 6229 }, { "epoch": 2.856782479073501, "grad_norm": 0.22305287420749664, "learning_rate": 5.95792635574266e-06, "loss": 1.3862, "step": 6230 }, { "epoch": 2.8572411420708637, "grad_norm": 0.3165452778339386, "learning_rate": 5.919871147153422e-06, "loss": 1.2199, "step": 6231 }, { "epoch": 2.857699805068226, "grad_norm": 0.2813020646572113, "learning_rate": 5.881937139982307e-06, "loss": 0.8148, "step": 6232 }, { "epoch": 2.858158468065589, "grad_norm": 0.32087957859039307, "learning_rate": 5.844124343534707e-06, "loss": 1.5054, "step": 6233 }, { "epoch": 2.8586171310629513, "grad_norm": 0.22067861258983612, "learning_rate": 5.806432767086534e-06, "loss": 0.9947, "step": 6234 }, { "epoch": 2.859075794060314, "grad_norm": 0.28397810459136963, "learning_rate": 5.768862419883669e-06, "loss": 1.1807, "step": 6235 }, { "epoch": 2.859534457057677, "grad_norm": 0.28341740369796753, "learning_rate": 5.731413311142464e-06, "loss": 0.7978, "step": 6236 }, { "epoch": 2.8599931200550395, "grad_norm": 0.30934879183769226, "learning_rate": 5.694085450049402e-06, "loss": 1.3267, "step": 6237 }, { "epoch": 2.8604517830524023, "grad_norm": 0.32292166352272034, "learning_rate": 5.656878845761326e-06, "loss": 1.7622, "step": 6238 }, { "epoch": 2.8609104460497647, "grad_norm": 0.41772255301475525, "learning_rate": 5.619793507405324e-06, "loss": 1.4691, "step": 6239 }, { "epoch": 2.8613691090471276, "grad_norm": 0.3389241695404053, "learning_rate": 5.582829444078563e-06, "loss": 1.1824, "step": 6240 }, { "epoch": 2.8618277720444905, "grad_norm": 0.15310680866241455, "learning_rate": 5.5459866648487345e-06, "loss": 0.7682, "step": 6241 }, { "epoch": 2.862286435041853, "grad_norm": 0.33451229333877563, "learning_rate": 5.509265178753497e-06, "loss": 1.092, "step": 6242 }, { "epoch": 2.8627450980392157, "grad_norm": 0.23663324117660522, "learning_rate": 5.472664994801091e-06, "loss": 1.3781, "step": 6243 }, { "epoch": 2.863203761036578, "grad_norm": 0.3743632137775421, "learning_rate": 5.436186121969611e-06, "loss": 1.5989, "step": 6244 }, { "epoch": 2.863662424033941, "grad_norm": 0.3161230683326721, "learning_rate": 5.3998285692076765e-06, "loss": 1.1666, "step": 6245 }, { "epoch": 2.864121087031304, "grad_norm": 0.16574889421463013, "learning_rate": 5.363592345434043e-06, "loss": 1.4562, "step": 6246 }, { "epoch": 2.8645797500286663, "grad_norm": 0.35472193360328674, "learning_rate": 5.327477459537711e-06, "loss": 0.7368, "step": 6247 }, { "epoch": 2.865038413026029, "grad_norm": 0.2824570834636688, "learning_rate": 5.291483920377926e-06, "loss": 1.7105, "step": 6248 }, { "epoch": 2.8654970760233915, "grad_norm": 0.2135975956916809, "learning_rate": 5.255611736784183e-06, "loss": 1.493, "step": 6249 }, { "epoch": 2.8659557390207544, "grad_norm": 0.29121702909469604, "learning_rate": 5.219860917556163e-06, "loss": 1.2036, "step": 6250 }, { "epoch": 2.8664144020181173, "grad_norm": 0.43849441409111023, "learning_rate": 5.184231471463852e-06, "loss": 1.9182, "step": 6251 }, { "epoch": 2.86687306501548, "grad_norm": 0.3722185492515564, "learning_rate": 5.1487234072473135e-06, "loss": 1.6022, "step": 6252 }, { "epoch": 2.8673317280128425, "grad_norm": 0.3918931782245636, "learning_rate": 5.1133367336170245e-06, "loss": 1.9431, "step": 6253 }, { "epoch": 2.8677903910102054, "grad_norm": 0.4557853937149048, "learning_rate": 5.078071459253541e-06, "loss": 1.2453, "step": 6254 }, { "epoch": 2.868249054007568, "grad_norm": 0.11661559343338013, "learning_rate": 5.042927592807722e-06, "loss": 0.6514, "step": 6255 }, { "epoch": 2.8687077170049307, "grad_norm": 0.3350408673286438, "learning_rate": 5.00790514290056e-06, "loss": 1.8324, "step": 6256 }, { "epoch": 2.8691663800022935, "grad_norm": 0.37260258197784424, "learning_rate": 4.973004118123348e-06, "loss": 2.1526, "step": 6257 }, { "epoch": 2.869625042999656, "grad_norm": 0.6885807514190674, "learning_rate": 4.938224527037516e-06, "loss": 1.2802, "step": 6258 }, { "epoch": 2.870083705997019, "grad_norm": 0.27935993671417236, "learning_rate": 4.903566378174795e-06, "loss": 1.3413, "step": 6259 }, { "epoch": 2.870542368994381, "grad_norm": 0.3012205958366394, "learning_rate": 4.869029680037162e-06, "loss": 1.4973, "step": 6260 }, { "epoch": 2.871001031991744, "grad_norm": 0.4477826654911041, "learning_rate": 4.834614441096563e-06, "loss": 1.712, "step": 6261 }, { "epoch": 2.871459694989107, "grad_norm": 0.3502928912639618, "learning_rate": 4.800320669795355e-06, "loss": 1.2868, "step": 6262 }, { "epoch": 2.8719183579864693, "grad_norm": 0.19043685495853424, "learning_rate": 4.766148374546087e-06, "loss": 1.1846, "step": 6263 }, { "epoch": 2.872377020983832, "grad_norm": 0.27802857756614685, "learning_rate": 4.7320975637314415e-06, "loss": 1.6084, "step": 6264 }, { "epoch": 2.8728356839811946, "grad_norm": 0.3813520669937134, "learning_rate": 4.698168245704349e-06, "loss": 1.7174, "step": 6265 }, { "epoch": 2.8732943469785575, "grad_norm": 0.3918079137802124, "learning_rate": 4.6643604287878726e-06, "loss": 1.5721, "step": 6266 }, { "epoch": 2.8737530099759203, "grad_norm": 0.2761102020740509, "learning_rate": 4.630674121275325e-06, "loss": 1.1836, "step": 6267 }, { "epoch": 2.8742116729732827, "grad_norm": 0.33837902545928955, "learning_rate": 4.59710933143026e-06, "loss": 1.2286, "step": 6268 }, { "epoch": 2.8746703359706456, "grad_norm": 0.24910743534564972, "learning_rate": 4.56366606748626e-06, "loss": 1.6427, "step": 6269 }, { "epoch": 2.875128998968008, "grad_norm": 0.37364476919174194, "learning_rate": 4.5303443376472635e-06, "loss": 1.169, "step": 6270 }, { "epoch": 2.875587661965371, "grad_norm": 0.38252532482147217, "learning_rate": 4.49714415008734e-06, "loss": 1.5502, "step": 6271 }, { "epoch": 2.8760463249627337, "grad_norm": 0.35861456394195557, "learning_rate": 4.464065512950754e-06, "loss": 1.1111, "step": 6272 }, { "epoch": 2.8765049879600966, "grad_norm": 0.3056200444698334, "learning_rate": 4.4311084343518496e-06, "loss": 1.8339, "step": 6273 }, { "epoch": 2.876963650957459, "grad_norm": 0.2743782103061676, "learning_rate": 4.398272922375268e-06, "loss": 1.0009, "step": 6274 }, { "epoch": 2.877422313954822, "grad_norm": 0.3063773214817047, "learning_rate": 4.365558985075846e-06, "loss": 1.2168, "step": 6275 }, { "epoch": 2.8778809769521843, "grad_norm": 0.2390686422586441, "learning_rate": 4.332966630478497e-06, "loss": 0.8118, "step": 6276 }, { "epoch": 2.878339639949547, "grad_norm": 0.3186044991016388, "learning_rate": 4.300495866578435e-06, "loss": 1.162, "step": 6277 }, { "epoch": 2.87879830294691, "grad_norm": 0.32464686036109924, "learning_rate": 4.268146701340847e-06, "loss": 1.9541, "step": 6278 }, { "epoch": 2.8792569659442724, "grad_norm": 0.3670261800289154, "learning_rate": 4.235919142701272e-06, "loss": 1.6052, "step": 6279 }, { "epoch": 2.8797156289416352, "grad_norm": 0.2734290361404419, "learning_rate": 4.203813198565387e-06, "loss": 0.9119, "step": 6280 }, { "epoch": 2.8801742919389977, "grad_norm": 0.20262974500656128, "learning_rate": 4.171828876809003e-06, "loss": 1.2017, "step": 6281 }, { "epoch": 2.8806329549363605, "grad_norm": 0.3488999307155609, "learning_rate": 4.1399661852781764e-06, "loss": 1.2915, "step": 6282 }, { "epoch": 2.8810916179337234, "grad_norm": 0.39077028632164, "learning_rate": 4.108225131788934e-06, "loss": 2.073, "step": 6283 }, { "epoch": 2.881550280931086, "grad_norm": 0.3423274755477905, "learning_rate": 4.076605724127602e-06, "loss": 1.2798, "step": 6284 }, { "epoch": 2.8820089439284486, "grad_norm": 0.3173692226409912, "learning_rate": 4.045107970050699e-06, "loss": 1.0493, "step": 6285 }, { "epoch": 2.882467606925811, "grad_norm": 0.3519616425037384, "learning_rate": 4.0137318772848205e-06, "loss": 1.5614, "step": 6286 }, { "epoch": 2.882926269923174, "grad_norm": 0.3195357024669647, "learning_rate": 3.982477453526756e-06, "loss": 1.1449, "step": 6287 }, { "epoch": 2.8833849329205368, "grad_norm": 0.21202579140663147, "learning_rate": 3.951344706443427e-06, "loss": 1.2399, "step": 6288 }, { "epoch": 2.883843595917899, "grad_norm": 0.28453898429870605, "learning_rate": 3.920333643672003e-06, "loss": 1.2978, "step": 6289 }, { "epoch": 2.884302258915262, "grad_norm": 0.3411079943180084, "learning_rate": 3.88944427281962e-06, "loss": 1.5203, "step": 6290 }, { "epoch": 2.8847609219126245, "grad_norm": 0.32296404242515564, "learning_rate": 3.858676601463662e-06, "loss": 1.1908, "step": 6291 }, { "epoch": 2.8852195849099873, "grad_norm": 0.3648724853992462, "learning_rate": 3.828030637151758e-06, "loss": 1.6689, "step": 6292 }, { "epoch": 2.88567824790735, "grad_norm": 0.26379936933517456, "learning_rate": 3.797506387401506e-06, "loss": 1.2486, "step": 6293 }, { "epoch": 2.886136910904713, "grad_norm": 0.3017299175262451, "learning_rate": 3.767103859700749e-06, "loss": 0.9213, "step": 6294 }, { "epoch": 2.8865955739020754, "grad_norm": 0.3231751322746277, "learning_rate": 3.7368230615074105e-06, "loss": 1.9851, "step": 6295 }, { "epoch": 2.8870542368994383, "grad_norm": 0.401769757270813, "learning_rate": 3.70666400024966e-06, "loss": 1.3682, "step": 6296 }, { "epoch": 2.8875128998968007, "grad_norm": 0.3910331428050995, "learning_rate": 3.6766266833256346e-06, "loss": 1.2714, "step": 6297 }, { "epoch": 2.8879715628941636, "grad_norm": 0.3329823911190033, "learning_rate": 3.646711118103774e-06, "loss": 1.8126, "step": 6298 }, { "epoch": 2.8884302258915264, "grad_norm": 0.4459758996963501, "learning_rate": 3.616917311922596e-06, "loss": 1.793, "step": 6299 }, { "epoch": 2.888888888888889, "grad_norm": 0.4213806986808777, "learning_rate": 3.5872452720907e-06, "loss": 1.9231, "step": 6300 }, { "epoch": 2.8893475518862517, "grad_norm": 0.4440397620201111, "learning_rate": 3.557695005886874e-06, "loss": 1.5308, "step": 6301 }, { "epoch": 2.889806214883614, "grad_norm": 0.263126939535141, "learning_rate": 3.5282665205599306e-06, "loss": 0.9987, "step": 6302 }, { "epoch": 2.890264877880977, "grad_norm": 0.2880323529243469, "learning_rate": 3.49895982332904e-06, "loss": 1.6559, "step": 6303 }, { "epoch": 2.89072354087834, "grad_norm": 0.9521461725234985, "learning_rate": 3.4697749213832284e-06, "loss": 1.5062, "step": 6304 }, { "epoch": 2.8911822038757022, "grad_norm": 0.28019800782203674, "learning_rate": 3.4407118218818256e-06, "loss": 1.2073, "step": 6305 }, { "epoch": 2.891640866873065, "grad_norm": 0.35234352946281433, "learning_rate": 3.411770531954128e-06, "loss": 1.5915, "step": 6306 }, { "epoch": 2.8920995298704275, "grad_norm": 0.37154096364974976, "learning_rate": 3.382951058699735e-06, "loss": 0.9094, "step": 6307 }, { "epoch": 2.8925581928677904, "grad_norm": 0.08573618531227112, "learning_rate": 3.354253409188268e-06, "loss": 1.1077, "step": 6308 }, { "epoch": 2.8930168558651532, "grad_norm": 0.3723006844520569, "learning_rate": 3.3256775904594307e-06, "loss": 0.845, "step": 6309 }, { "epoch": 2.8934755188625156, "grad_norm": 0.10502085089683533, "learning_rate": 3.297223609523059e-06, "loss": 1.2752, "step": 6310 }, { "epoch": 2.8939341818598785, "grad_norm": 0.4659182131290436, "learning_rate": 3.2688914733591814e-06, "loss": 0.976, "step": 6311 }, { "epoch": 2.894392844857241, "grad_norm": 0.3109420835971832, "learning_rate": 3.2406811889177933e-06, "loss": 0.9553, "step": 6312 }, { "epoch": 2.8948515078546038, "grad_norm": 0.2608180344104767, "learning_rate": 3.2125927631191933e-06, "loss": 0.8474, "step": 6313 }, { "epoch": 2.8953101708519666, "grad_norm": 0.21614636480808258, "learning_rate": 3.184626202853591e-06, "loss": 1.3879, "step": 6314 }, { "epoch": 2.8957688338493295, "grad_norm": 0.3109268546104431, "learning_rate": 3.1567815149813885e-06, "loss": 1.1218, "step": 6315 }, { "epoch": 2.896227496846692, "grad_norm": 0.2876843214035034, "learning_rate": 3.129058706333121e-06, "loss": 1.6882, "step": 6316 }, { "epoch": 2.8966861598440543, "grad_norm": 0.38865727186203003, "learning_rate": 3.1014577837093496e-06, "loss": 0.799, "step": 6317 }, { "epoch": 2.897144822841417, "grad_norm": 0.21412920951843262, "learning_rate": 3.073978753880824e-06, "loss": 1.5184, "step": 6318 }, { "epoch": 2.89760348583878, "grad_norm": 0.4019043743610382, "learning_rate": 3.046621623588375e-06, "loss": 1.6321, "step": 6319 }, { "epoch": 2.898062148836143, "grad_norm": 0.18866117298603058, "learning_rate": 3.0193863995428005e-06, "loss": 1.1956, "step": 6320 }, { "epoch": 2.8985208118335053, "grad_norm": 0.3983370065689087, "learning_rate": 2.992273088425146e-06, "loss": 0.9323, "step": 6321 }, { "epoch": 2.898979474830868, "grad_norm": 0.17893198132514954, "learning_rate": 2.96528169688659e-06, "loss": 1.5333, "step": 6322 }, { "epoch": 2.8994381378282306, "grad_norm": 0.3908742368221283, "learning_rate": 2.938412231548171e-06, "loss": 1.3581, "step": 6323 }, { "epoch": 2.8998968008255934, "grad_norm": 0.28429678082466125, "learning_rate": 2.911664699001282e-06, "loss": 1.7076, "step": 6324 }, { "epoch": 2.9003554638229563, "grad_norm": 0.36891356110572815, "learning_rate": 2.8850391058071747e-06, "loss": 1.2607, "step": 6325 }, { "epoch": 2.9008141268203187, "grad_norm": 0.2793447971343994, "learning_rate": 2.8585354584974022e-06, "loss": 1.29, "step": 6326 }, { "epoch": 2.9012727898176816, "grad_norm": 0.29457083344459534, "learning_rate": 2.832153763573486e-06, "loss": 0.7886, "step": 6327 }, { "epoch": 2.901731452815044, "grad_norm": 0.3590116798877716, "learning_rate": 2.8058940275069722e-06, "loss": 1.5443, "step": 6328 }, { "epoch": 2.902190115812407, "grad_norm": 0.43476438522338867, "learning_rate": 2.7797562567395963e-06, "loss": 1.8802, "step": 6329 }, { "epoch": 2.9026487788097697, "grad_norm": 0.3066580891609192, "learning_rate": 2.7537404576831737e-06, "loss": 0.789, "step": 6330 }, { "epoch": 2.903107441807132, "grad_norm": 0.07547781616449356, "learning_rate": 2.727846636719544e-06, "loss": 0.803, "step": 6331 }, { "epoch": 2.903566104804495, "grad_norm": 0.38495829701423645, "learning_rate": 2.7020748002006824e-06, "loss": 2.0801, "step": 6332 }, { "epoch": 2.9040247678018574, "grad_norm": 0.5015888810157776, "learning_rate": 2.676424954448531e-06, "loss": 1.3515, "step": 6333 }, { "epoch": 2.9044834307992202, "grad_norm": 0.4295569658279419, "learning_rate": 2.650897105755279e-06, "loss": 1.6154, "step": 6334 }, { "epoch": 2.904942093796583, "grad_norm": 0.24834929406642914, "learning_rate": 2.625491260382973e-06, "loss": 0.7925, "step": 6335 }, { "epoch": 2.9054007567939455, "grad_norm": 0.3073466420173645, "learning_rate": 2.600207424563961e-06, "loss": 1.0556, "step": 6336 }, { "epoch": 2.9058594197913084, "grad_norm": 0.21239478886127472, "learning_rate": 2.5750456045005035e-06, "loss": 1.2195, "step": 6337 }, { "epoch": 2.9063180827886708, "grad_norm": 0.32616496086120605, "learning_rate": 2.5500058063649965e-06, "loss": 0.7554, "step": 6338 }, { "epoch": 2.9067767457860336, "grad_norm": 0.08231624960899353, "learning_rate": 2.5250880362998607e-06, "loss": 1.3146, "step": 6339 }, { "epoch": 2.9072354087833965, "grad_norm": 0.4305974245071411, "learning_rate": 2.500292300417595e-06, "loss": 1.6447, "step": 6340 }, { "epoch": 2.9076940717807593, "grad_norm": 0.27547237277030945, "learning_rate": 2.4756186048007225e-06, "loss": 1.3123, "step": 6341 }, { "epoch": 2.9081527347781218, "grad_norm": 0.25122833251953125, "learning_rate": 2.4510669555020125e-06, "loss": 1.1544, "step": 6342 }, { "epoch": 2.9086113977754846, "grad_norm": 0.27697306871414185, "learning_rate": 2.4266373585440924e-06, "loss": 0.4727, "step": 6343 }, { "epoch": 2.909070060772847, "grad_norm": 0.3418446481227875, "learning_rate": 2.402329819919724e-06, "loss": 1.6129, "step": 6344 }, { "epoch": 2.90952872377021, "grad_norm": 0.4216833710670471, "learning_rate": 2.3781443455916927e-06, "loss": 1.2566, "step": 6345 }, { "epoch": 2.9099873867675727, "grad_norm": 0.3174903690814972, "learning_rate": 2.3540809414929196e-06, "loss": 1.3995, "step": 6346 }, { "epoch": 2.910446049764935, "grad_norm": 0.2971450984477997, "learning_rate": 2.3301396135262387e-06, "loss": 1.087, "step": 6347 }, { "epoch": 2.910904712762298, "grad_norm": 0.41538989543914795, "learning_rate": 2.3063203675647848e-06, "loss": 1.3513, "step": 6348 }, { "epoch": 2.9113633757596604, "grad_norm": 0.36953091621398926, "learning_rate": 2.28262320945144e-06, "loss": 1.7742, "step": 6349 }, { "epoch": 2.9118220387570233, "grad_norm": 0.31279754638671875, "learning_rate": 2.259048144999387e-06, "loss": 1.3542, "step": 6350 }, { "epoch": 2.912280701754386, "grad_norm": 0.2468571960926056, "learning_rate": 2.2355951799916674e-06, "loss": 1.3647, "step": 6351 }, { "epoch": 2.9127393647517485, "grad_norm": 0.577455461025238, "learning_rate": 2.212264320181567e-06, "loss": 1.2788, "step": 6352 }, { "epoch": 2.9131980277491114, "grad_norm": 0.3301372230052948, "learning_rate": 2.1890555712922313e-06, "loss": 1.6881, "step": 6353 }, { "epoch": 2.913656690746474, "grad_norm": 0.3213047385215759, "learning_rate": 2.1659689390169934e-06, "loss": 1.3643, "step": 6354 }, { "epoch": 2.9141153537438367, "grad_norm": 0.3952171504497528, "learning_rate": 2.1430044290191573e-06, "loss": 1.7974, "step": 6355 }, { "epoch": 2.9145740167411995, "grad_norm": 0.3204817771911621, "learning_rate": 2.1201620469320503e-06, "loss": 1.4843, "step": 6356 }, { "epoch": 2.915032679738562, "grad_norm": 0.4052104949951172, "learning_rate": 2.0974417983590787e-06, "loss": 1.7517, "step": 6357 }, { "epoch": 2.915491342735925, "grad_norm": 0.28286999464035034, "learning_rate": 2.0748436888737286e-06, "loss": 1.0675, "step": 6358 }, { "epoch": 2.915950005733287, "grad_norm": 0.4025648534297943, "learning_rate": 2.0523677240193994e-06, "loss": 1.6526, "step": 6359 }, { "epoch": 2.91640866873065, "grad_norm": 0.41483741998672485, "learning_rate": 2.030013909309736e-06, "loss": 2.1431, "step": 6360 }, { "epoch": 2.916867331728013, "grad_norm": 0.36023521423339844, "learning_rate": 2.0077822502281295e-06, "loss": 1.3888, "step": 6361 }, { "epoch": 2.917325994725376, "grad_norm": 0.3408292233943939, "learning_rate": 1.9856727522282734e-06, "loss": 1.2604, "step": 6362 }, { "epoch": 2.917784657722738, "grad_norm": 0.2545235753059387, "learning_rate": 1.963685420733774e-06, "loss": 1.4949, "step": 6363 }, { "epoch": 2.918243320720101, "grad_norm": 0.27326932549476624, "learning_rate": 1.9418202611382607e-06, "loss": 0.7846, "step": 6364 }, { "epoch": 2.9187019837174635, "grad_norm": 0.33998316526412964, "learning_rate": 1.920077278805443e-06, "loss": 1.5005, "step": 6365 }, { "epoch": 2.9191606467148263, "grad_norm": 0.3685235381126404, "learning_rate": 1.8984564790689996e-06, "loss": 1.4185, "step": 6366 }, { "epoch": 2.919619309712189, "grad_norm": 0.18980297446250916, "learning_rate": 1.8769578672326316e-06, "loss": 1.2739, "step": 6367 }, { "epoch": 2.9200779727095516, "grad_norm": 0.30294135212898254, "learning_rate": 1.8555814485702316e-06, "loss": 1.1948, "step": 6368 }, { "epoch": 2.9205366357069145, "grad_norm": 0.3762916922569275, "learning_rate": 1.8343272283254386e-06, "loss": 1.7822, "step": 6369 }, { "epoch": 2.920995298704277, "grad_norm": 0.3765801787376404, "learning_rate": 1.813195211712193e-06, "loss": 1.1016, "step": 6370 }, { "epoch": 2.9214539617016397, "grad_norm": 0.22130340337753296, "learning_rate": 1.792185403914237e-06, "loss": 1.6736, "step": 6371 }, { "epoch": 2.9219126246990026, "grad_norm": 0.33724433183670044, "learning_rate": 1.7712978100854482e-06, "loss": 1.2495, "step": 6372 }, { "epoch": 2.922371287696365, "grad_norm": 0.30301743745803833, "learning_rate": 1.7505324353497831e-06, "loss": 1.7449, "step": 6373 }, { "epoch": 2.922829950693728, "grad_norm": 0.2647053003311157, "learning_rate": 1.7298892848010006e-06, "loss": 1.3491, "step": 6374 }, { "epoch": 2.9232886136910903, "grad_norm": 0.47797808051109314, "learning_rate": 1.7093683635031609e-06, "loss": 1.2687, "step": 6375 }, { "epoch": 2.923747276688453, "grad_norm": 0.32065433263778687, "learning_rate": 1.6889696764900708e-06, "loss": 1.3747, "step": 6376 }, { "epoch": 2.924205939685816, "grad_norm": 0.24311882257461548, "learning_rate": 1.668693228765783e-06, "loss": 0.6366, "step": 6377 }, { "epoch": 2.9246646026831784, "grad_norm": 0.15887467563152313, "learning_rate": 1.6485390253041521e-06, "loss": 1.2029, "step": 6378 }, { "epoch": 2.9251232656805413, "grad_norm": 0.33973756432533264, "learning_rate": 1.6285070710492233e-06, "loss": 1.6896, "step": 6379 }, { "epoch": 2.9255819286779037, "grad_norm": 0.38102298974990845, "learning_rate": 1.6085973709149548e-06, "loss": 1.0677, "step": 6380 }, { "epoch": 2.9260405916752665, "grad_norm": 0.3507750332355499, "learning_rate": 1.5888099297853288e-06, "loss": 1.2086, "step": 6381 }, { "epoch": 2.9264992546726294, "grad_norm": 0.1782960146665573, "learning_rate": 1.5691447525143509e-06, "loss": 1.05, "step": 6382 }, { "epoch": 2.9269579176699922, "grad_norm": 0.25808510184288025, "learning_rate": 1.5496018439260518e-06, "loss": 0.8826, "step": 6383 }, { "epoch": 2.9274165806673547, "grad_norm": 0.2699086368083954, "learning_rate": 1.5301812088144296e-06, "loss": 0.7635, "step": 6384 }, { "epoch": 2.927875243664717, "grad_norm": 0.12165196985006332, "learning_rate": 1.510882851943507e-06, "loss": 1.2339, "step": 6385 }, { "epoch": 2.92833390666208, "grad_norm": 0.27769702672958374, "learning_rate": 1.4917067780473304e-06, "loss": 0.799, "step": 6386 }, { "epoch": 2.928792569659443, "grad_norm": 0.29953867197036743, "learning_rate": 1.4726529918299148e-06, "loss": 1.9141, "step": 6387 }, { "epoch": 2.9292512326568056, "grad_norm": 0.39282310009002686, "learning_rate": 1.4537214979652435e-06, "loss": 1.6138, "step": 6388 }, { "epoch": 2.929709895654168, "grad_norm": 0.28682559728622437, "learning_rate": 1.4349123010974908e-06, "loss": 0.9948, "step": 6389 }, { "epoch": 2.930168558651531, "grad_norm": 0.3742808699607849, "learning_rate": 1.416225405840521e-06, "loss": 1.5709, "step": 6390 }, { "epoch": 2.9306272216488933, "grad_norm": 0.36167392134666443, "learning_rate": 1.39766081677839e-06, "loss": 1.3771, "step": 6391 }, { "epoch": 2.931085884646256, "grad_norm": 0.33344385027885437, "learning_rate": 1.3792185384652322e-06, "loss": 1.4467, "step": 6392 }, { "epoch": 2.931544547643619, "grad_norm": 0.3055780231952667, "learning_rate": 1.3608985754249848e-06, "loss": 1.4431, "step": 6393 }, { "epoch": 2.9320032106409815, "grad_norm": 0.39428263902664185, "learning_rate": 1.3427009321517191e-06, "loss": 2.0354, "step": 6394 }, { "epoch": 2.9324618736383443, "grad_norm": 0.4126293361186981, "learning_rate": 1.3246256131093647e-06, "loss": 1.7002, "step": 6395 }, { "epoch": 2.9329205366357067, "grad_norm": 0.2903102934360504, "learning_rate": 1.3066726227320414e-06, "loss": 1.148, "step": 6396 }, { "epoch": 2.9333791996330696, "grad_norm": 0.35992351174354553, "learning_rate": 1.2888419654236706e-06, "loss": 1.39, "step": 6397 }, { "epoch": 2.9338378626304324, "grad_norm": 0.37300676107406616, "learning_rate": 1.2711336455582533e-06, "loss": 1.2245, "step": 6398 }, { "epoch": 2.934296525627795, "grad_norm": 0.21308903396129608, "learning_rate": 1.2535476674797596e-06, "loss": 1.2061, "step": 6399 }, { "epoch": 2.9347551886251577, "grad_norm": 0.2780902683734894, "learning_rate": 1.2360840355022386e-06, "loss": 1.5712, "step": 6400 }, { "epoch": 2.93521385162252, "grad_norm": 0.36661916971206665, "learning_rate": 1.2187427539094854e-06, "loss": 1.1625, "step": 6401 }, { "epoch": 2.935672514619883, "grad_norm": 0.28895169496536255, "learning_rate": 1.2015238269555972e-06, "loss": 0.7739, "step": 6402 }, { "epoch": 2.936131177617246, "grad_norm": 0.2886188328266144, "learning_rate": 1.184427258864418e-06, "loss": 1.7198, "step": 6403 }, { "epoch": 2.9365898406146083, "grad_norm": 0.36322250962257385, "learning_rate": 1.16745305382987e-06, "loss": 0.7532, "step": 6404 }, { "epoch": 2.937048503611971, "grad_norm": 0.3152855634689331, "learning_rate": 1.1506012160158452e-06, "loss": 1.7031, "step": 6405 }, { "epoch": 2.9375071666093335, "grad_norm": 0.39350810647010803, "learning_rate": 1.133871749556259e-06, "loss": 1.6578, "step": 6406 }, { "epoch": 2.9379658296066964, "grad_norm": 0.23590238392353058, "learning_rate": 1.1172646585549396e-06, "loss": 1.6073, "step": 6407 }, { "epoch": 2.9384244926040592, "grad_norm": 0.3189743161201477, "learning_rate": 1.1007799470857393e-06, "loss": 1.1992, "step": 6408 }, { "epoch": 2.938883155601422, "grad_norm": 0.45767655968666077, "learning_rate": 1.0844176191924237e-06, "loss": 1.8853, "step": 6409 }, { "epoch": 2.9393418185987845, "grad_norm": 0.3135172128677368, "learning_rate": 1.068177678888893e-06, "loss": 1.7242, "step": 6410 }, { "epoch": 2.9398004815961474, "grad_norm": 0.3678114712238312, "learning_rate": 1.0520601301588494e-06, "loss": 1.4546, "step": 6411 }, { "epoch": 2.94025914459351, "grad_norm": 0.33061683177948, "learning_rate": 1.0360649769560193e-06, "loss": 1.214, "step": 6412 }, { "epoch": 2.9407178075908726, "grad_norm": 0.31986305117607117, "learning_rate": 1.0201922232041528e-06, "loss": 1.6663, "step": 6413 }, { "epoch": 2.9411764705882355, "grad_norm": 0.4389913082122803, "learning_rate": 1.0044418727970238e-06, "loss": 1.4183, "step": 6414 }, { "epoch": 2.941635133585598, "grad_norm": 0.22724516689777374, "learning_rate": 9.88813929598209e-07, "loss": 1.3397, "step": 6415 }, { "epoch": 2.9420937965829608, "grad_norm": 0.41958609223365784, "learning_rate": 9.733083974414193e-07, "loss": 1.3082, "step": 6416 }, { "epoch": 2.942552459580323, "grad_norm": 0.3317757546901703, "learning_rate": 9.579252801302785e-07, "loss": 1.5914, "step": 6417 }, { "epoch": 2.943011122577686, "grad_norm": 0.2724321782588959, "learning_rate": 9.426645814382683e-07, "loss": 1.1695, "step": 6418 }, { "epoch": 2.943469785575049, "grad_norm": 0.3204740285873413, "learning_rate": 9.27526305109061e-07, "loss": 1.5457, "step": 6419 }, { "epoch": 2.9439284485724113, "grad_norm": 0.34586411714553833, "learning_rate": 9.125104548561857e-07, "loss": 1.2657, "step": 6420 }, { "epoch": 2.944387111569774, "grad_norm": 0.3183816075325012, "learning_rate": 8.976170343630297e-07, "loss": 0.9479, "step": 6421 }, { "epoch": 2.9448457745671366, "grad_norm": 0.23148377239704132, "learning_rate": 8.828460472832256e-07, "loss": 1.7264, "step": 6422 }, { "epoch": 2.9453044375644994, "grad_norm": 0.37857529520988464, "learning_rate": 8.68197497240042e-07, "loss": 0.9766, "step": 6423 }, { "epoch": 2.9457631005618623, "grad_norm": 0.10237754881381989, "learning_rate": 8.536713878269376e-07, "loss": 0.4709, "step": 6424 }, { "epoch": 2.9462217635592247, "grad_norm": 0.21351243555545807, "learning_rate": 8.392677226072843e-07, "loss": 1.2244, "step": 6425 }, { "epoch": 2.9466804265565876, "grad_norm": 0.2921064794063568, "learning_rate": 8.249865051143668e-07, "loss": 1.2155, "step": 6426 }, { "epoch": 2.94713908955395, "grad_norm": 0.3035675585269928, "learning_rate": 8.108277388515495e-07, "loss": 1.0669, "step": 6427 }, { "epoch": 2.947597752551313, "grad_norm": 0.2804781198501587, "learning_rate": 7.967914272919985e-07, "loss": 0.9862, "step": 6428 }, { "epoch": 2.9480564155486757, "grad_norm": 0.34943005442619324, "learning_rate": 7.828775738789595e-07, "loss": 1.194, "step": 6429 }, { "epoch": 2.9485150785460386, "grad_norm": 0.13486827909946442, "learning_rate": 7.690861820255912e-07, "loss": 0.9005, "step": 6430 }, { "epoch": 2.948973741543401, "grad_norm": 0.3738475739955902, "learning_rate": 7.554172551150206e-07, "loss": 1.5659, "step": 6431 }, { "epoch": 2.949432404540764, "grad_norm": 0.2654702961444855, "learning_rate": 7.418707965003435e-07, "loss": 1.1194, "step": 6432 }, { "epoch": 2.9498910675381262, "grad_norm": 0.36275702714920044, "learning_rate": 7.284468095045682e-07, "loss": 1.5873, "step": 6433 }, { "epoch": 2.950349730535489, "grad_norm": 0.386787474155426, "learning_rate": 7.151452974207828e-07, "loss": 1.2502, "step": 6434 }, { "epoch": 2.950808393532852, "grad_norm": 0.1252823770046234, "learning_rate": 7.019662635118218e-07, "loss": 0.8411, "step": 6435 }, { "epoch": 2.9512670565302144, "grad_norm": 0.24427084624767303, "learning_rate": 6.8890971101071e-07, "loss": 0.7842, "step": 6436 }, { "epoch": 2.9517257195275772, "grad_norm": 0.26967909932136536, "learning_rate": 6.759756431202746e-07, "loss": 1.2764, "step": 6437 }, { "epoch": 2.9521843825249396, "grad_norm": 0.3875824511051178, "learning_rate": 6.631640630133106e-07, "loss": 1.6353, "step": 6438 }, { "epoch": 2.9526430455223025, "grad_norm": 0.3367568850517273, "learning_rate": 6.504749738325822e-07, "loss": 1.6357, "step": 6439 }, { "epoch": 2.9531017085196654, "grad_norm": 0.36924004554748535, "learning_rate": 6.379083786908768e-07, "loss": 1.1762, "step": 6440 }, { "epoch": 2.9535603715170278, "grad_norm": 0.09012544900178909, "learning_rate": 6.254642806707845e-07, "loss": 0.7179, "step": 6441 }, { "epoch": 2.9540190345143906, "grad_norm": 0.40828028321266174, "learning_rate": 6.131426828250297e-07, "loss": 2.0659, "step": 6442 }, { "epoch": 2.954477697511753, "grad_norm": 0.7955999374389648, "learning_rate": 6.009435881760838e-07, "loss": 1.0877, "step": 6443 }, { "epoch": 2.954936360509116, "grad_norm": 0.1659400314092636, "learning_rate": 5.888669997165529e-07, "loss": 1.1334, "step": 6444 }, { "epoch": 2.9553950235064788, "grad_norm": 0.3118785619735718, "learning_rate": 5.769129204089007e-07, "loss": 1.781, "step": 6445 }, { "epoch": 2.955853686503841, "grad_norm": 0.4559882879257202, "learning_rate": 5.650813531855592e-07, "loss": 2.0151, "step": 6446 }, { "epoch": 2.956312349501204, "grad_norm": 0.4162604808807373, "learning_rate": 5.533723009488734e-07, "loss": 1.5276, "step": 6447 }, { "epoch": 2.9567710124985664, "grad_norm": 0.3055538237094879, "learning_rate": 5.417857665711012e-07, "loss": 0.8876, "step": 6448 }, { "epoch": 2.9572296754959293, "grad_norm": 0.28854599595069885, "learning_rate": 5.303217528945802e-07, "loss": 1.6246, "step": 6449 }, { "epoch": 2.957688338493292, "grad_norm": 0.3452959358692169, "learning_rate": 5.18980262731561e-07, "loss": 0.9966, "step": 6450 }, { "epoch": 2.958147001490655, "grad_norm": 0.16069626808166504, "learning_rate": 5.077612988640401e-07, "loss": 0.626, "step": 6451 }, { "epoch": 2.9586056644880174, "grad_norm": 0.2646504342556, "learning_rate": 4.966648640442606e-07, "loss": 1.5773, "step": 6452 }, { "epoch": 2.95906432748538, "grad_norm": 0.40957656502723694, "learning_rate": 4.856909609941562e-07, "loss": 1.1276, "step": 6453 }, { "epoch": 2.9595229904827427, "grad_norm": 0.27915382385253906, "learning_rate": 4.7483959240574025e-07, "loss": 1.4488, "step": 6454 }, { "epoch": 2.9599816534801056, "grad_norm": 0.36528947949409485, "learning_rate": 4.6411076094099447e-07, "loss": 1.6633, "step": 6455 }, { "epoch": 2.9604403164774684, "grad_norm": 0.3213283121585846, "learning_rate": 4.53504469231647e-07, "loss": 1.697, "step": 6456 }, { "epoch": 2.960898979474831, "grad_norm": 0.3525945842266083, "learning_rate": 4.430207198796166e-07, "loss": 1.1332, "step": 6457 }, { "epoch": 2.9613576424721937, "grad_norm": 0.3523452877998352, "learning_rate": 4.3265951545656823e-07, "loss": 1.9775, "step": 6458 }, { "epoch": 2.961816305469556, "grad_norm": 0.3788760006427765, "learning_rate": 4.224208585042466e-07, "loss": 1.3544, "step": 6459 }, { "epoch": 2.962274968466919, "grad_norm": 0.3480757772922516, "learning_rate": 4.123047515341982e-07, "loss": 1.3392, "step": 6460 }, { "epoch": 2.962733631464282, "grad_norm": 0.33295026421546936, "learning_rate": 4.0231119702799355e-07, "loss": 1.4835, "step": 6461 }, { "epoch": 2.9631922944616442, "grad_norm": 0.32605263590812683, "learning_rate": 3.924401974371716e-07, "loss": 1.1706, "step": 6462 }, { "epoch": 2.963650957459007, "grad_norm": 0.3713498115539551, "learning_rate": 3.826917551831288e-07, "loss": 1.5077, "step": 6463 }, { "epoch": 2.9641096204563695, "grad_norm": 0.41880208253860474, "learning_rate": 3.7306587265717453e-07, "loss": 1.3178, "step": 6464 }, { "epoch": 2.9645682834537324, "grad_norm": 0.19970370829105377, "learning_rate": 3.6356255222069756e-07, "loss": 0.8712, "step": 6465 }, { "epoch": 2.965026946451095, "grad_norm": 0.27967116236686707, "learning_rate": 3.5418179620488876e-07, "loss": 1.4177, "step": 6466 }, { "epoch": 2.9654856094484576, "grad_norm": 0.43477344512939453, "learning_rate": 3.449236069109074e-07, "loss": 1.1676, "step": 6467 }, { "epoch": 2.9659442724458205, "grad_norm": 0.274976909160614, "learning_rate": 3.357879866098812e-07, "loss": 1.6171, "step": 6468 }, { "epoch": 2.966402935443183, "grad_norm": 0.28893306851387024, "learning_rate": 3.267749375427953e-07, "loss": 0.8573, "step": 6469 }, { "epoch": 2.9668615984405458, "grad_norm": 0.12806662917137146, "learning_rate": 3.178844619207144e-07, "loss": 0.8589, "step": 6470 }, { "epoch": 2.9673202614379086, "grad_norm": 0.24989382922649384, "learning_rate": 3.091165619243941e-07, "loss": 1.3807, "step": 6471 }, { "epoch": 2.967778924435271, "grad_norm": 0.2839709520339966, "learning_rate": 3.004712397047804e-07, "loss": 1.2, "step": 6472 }, { "epoch": 2.968237587432634, "grad_norm": 0.4096415638923645, "learning_rate": 2.919484973826214e-07, "loss": 1.9077, "step": 6473 }, { "epoch": 2.9686962504299963, "grad_norm": 0.32572391629219055, "learning_rate": 2.8354833704852257e-07, "loss": 1.0639, "step": 6474 }, { "epoch": 2.969154913427359, "grad_norm": 0.3826916515827179, "learning_rate": 2.752707607631688e-07, "loss": 1.9023, "step": 6475 }, { "epoch": 2.969613576424722, "grad_norm": 0.48622024059295654, "learning_rate": 2.6711577055710255e-07, "loss": 1.61, "step": 6476 }, { "epoch": 2.970072239422085, "grad_norm": 0.262005478143692, "learning_rate": 2.590833684307792e-07, "loss": 1.0063, "step": 6477 }, { "epoch": 2.9705309024194473, "grad_norm": 0.21147648990154266, "learning_rate": 2.51173556354678e-07, "loss": 0.8564, "step": 6478 }, { "epoch": 2.97098956541681, "grad_norm": 0.34185197949409485, "learning_rate": 2.433863362690247e-07, "loss": 1.8838, "step": 6479 }, { "epoch": 2.9714482284141726, "grad_norm": 0.334324449300766, "learning_rate": 2.357217100841802e-07, "loss": 0.827, "step": 6480 }, { "epoch": 2.9719068914115354, "grad_norm": 0.28424781560897827, "learning_rate": 2.2817967968025155e-07, "loss": 1.6332, "step": 6481 }, { "epoch": 2.9723655544088983, "grad_norm": 0.31317272782325745, "learning_rate": 2.2076024690742546e-07, "loss": 1.1931, "step": 6482 }, { "epoch": 2.9728242174062607, "grad_norm": 0.24583421647548676, "learning_rate": 2.1346341358569054e-07, "loss": 0.6048, "step": 6483 }, { "epoch": 2.9732828804036235, "grad_norm": 0.38179898262023926, "learning_rate": 2.0628918150500387e-07, "loss": 1.2948, "step": 6484 }, { "epoch": 2.973741543400986, "grad_norm": 0.18613789975643158, "learning_rate": 1.9923755242529094e-07, "loss": 1.0493, "step": 6485 }, { "epoch": 2.974200206398349, "grad_norm": 0.25649017095565796, "learning_rate": 1.9230852807639031e-07, "loss": 1.1181, "step": 6486 }, { "epoch": 2.9746588693957117, "grad_norm": 0.30037227272987366, "learning_rate": 1.8550211015794239e-07, "loss": 1.7121, "step": 6487 }, { "epoch": 2.975117532393074, "grad_norm": 0.31024396419525146, "learning_rate": 1.7881830033972258e-07, "loss": 1.5497, "step": 6488 }, { "epoch": 2.975576195390437, "grad_norm": 0.4017513692378998, "learning_rate": 1.7225710026125275e-07, "loss": 1.1522, "step": 6489 }, { "epoch": 2.9760348583877994, "grad_norm": 0.34826523065567017, "learning_rate": 1.658185115320787e-07, "loss": 1.6506, "step": 6490 }, { "epoch": 2.976493521385162, "grad_norm": 0.36130425333976746, "learning_rate": 1.5950253573160377e-07, "loss": 1.4875, "step": 6491 }, { "epoch": 2.976952184382525, "grad_norm": 0.31285083293914795, "learning_rate": 1.5330917440919967e-07, "loss": 1.5449, "step": 6492 }, { "epoch": 2.9774108473798875, "grad_norm": 0.3980162441730499, "learning_rate": 1.472384290841511e-07, "loss": 1.8258, "step": 6493 }, { "epoch": 2.9778695103772503, "grad_norm": 0.3458695709705353, "learning_rate": 1.4129030124560016e-07, "loss": 0.7553, "step": 6494 }, { "epoch": 2.9783281733746128, "grad_norm": 0.2076312005519867, "learning_rate": 1.3546479235276854e-07, "loss": 1.4727, "step": 6495 }, { "epoch": 2.9787868363719756, "grad_norm": 0.3255481421947479, "learning_rate": 1.2976190383456876e-07, "loss": 0.87, "step": 6496 }, { "epoch": 2.9792454993693385, "grad_norm": 0.2653532326221466, "learning_rate": 1.2418163709004836e-07, "loss": 1.4395, "step": 6497 }, { "epoch": 2.9797041623667013, "grad_norm": 0.22563296556472778, "learning_rate": 1.1872399348805685e-07, "loss": 1.1741, "step": 6498 }, { "epoch": 2.9801628253640637, "grad_norm": 0.3683829605579376, "learning_rate": 1.1338897436741213e-07, "loss": 0.9402, "step": 6499 }, { "epoch": 2.9806214883614266, "grad_norm": 0.10145165026187897, "learning_rate": 1.0817658103684513e-07, "loss": 0.6497, "step": 6500 }, { "epoch": 2.981080151358789, "grad_norm": 0.24345941841602325, "learning_rate": 1.030868147749442e-07, "loss": 1.7396, "step": 6501 }, { "epoch": 2.981538814356152, "grad_norm": 0.37198102474212646, "learning_rate": 9.811967683026613e-08, "loss": 1.5526, "step": 6502 }, { "epoch": 2.9819974773535147, "grad_norm": 0.3141441345214844, "learning_rate": 9.32751684213362e-08, "loss": 1.2018, "step": 6503 }, { "epoch": 2.982456140350877, "grad_norm": 0.40361887216567993, "learning_rate": 8.855329073653717e-08, "loss": 1.7252, "step": 6504 }, { "epoch": 2.98291480334824, "grad_norm": 0.4085117280483246, "learning_rate": 8.395404493410919e-08, "loss": 1.1441, "step": 6505 }, { "epoch": 2.9833734663456024, "grad_norm": 0.20205476880073547, "learning_rate": 7.947743214237191e-08, "loss": 0.9854, "step": 6506 }, { "epoch": 2.9838321293429653, "grad_norm": 0.31978803873062134, "learning_rate": 7.512345345944693e-08, "loss": 1.3225, "step": 6507 }, { "epoch": 2.984290792340328, "grad_norm": 0.30714917182922363, "learning_rate": 7.089210995336881e-08, "loss": 1.1396, "step": 6508 }, { "epoch": 2.9847494553376905, "grad_norm": 0.07466486096382141, "learning_rate": 6.678340266214056e-08, "loss": 1.0693, "step": 6509 }, { "epoch": 2.9852081183350534, "grad_norm": 0.3598880469799042, "learning_rate": 6.279733259362264e-08, "loss": 0.9477, "step": 6510 }, { "epoch": 2.985666781332416, "grad_norm": 0.12736907601356506, "learning_rate": 5.8933900725699486e-08, "loss": 0.9232, "step": 6511 }, { "epoch": 2.9861254443297787, "grad_norm": 0.20195496082305908, "learning_rate": 5.519310800600197e-08, "loss": 0.8135, "step": 6512 }, { "epoch": 2.9865841073271415, "grad_norm": 0.2941180169582367, "learning_rate": 5.157495535229595e-08, "loss": 1.6885, "step": 6513 }, { "epoch": 2.987042770324504, "grad_norm": 0.5080846548080444, "learning_rate": 4.807944365198269e-08, "loss": 1.3666, "step": 6514 }, { "epoch": 2.987501433321867, "grad_norm": 0.2823677659034729, "learning_rate": 4.470657376265397e-08, "loss": 0.9075, "step": 6515 }, { "epoch": 2.987960096319229, "grad_norm": 0.18029870092868805, "learning_rate": 4.145634651170349e-08, "loss": 0.8622, "step": 6516 }, { "epoch": 2.988418759316592, "grad_norm": 0.267039954662323, "learning_rate": 3.83287626963269e-08, "loss": 1.145, "step": 6517 }, { "epoch": 2.988877422313955, "grad_norm": 0.28512319922447205, "learning_rate": 3.5323823083854845e-08, "loss": 1.2058, "step": 6518 }, { "epoch": 2.989336085311318, "grad_norm": 0.27092644572257996, "learning_rate": 3.24415284114199e-08, "loss": 1.3474, "step": 6519 }, { "epoch": 2.98979474830868, "grad_norm": 0.34065067768096924, "learning_rate": 2.9681879385956566e-08, "loss": 2.1089, "step": 6520 }, { "epoch": 2.9902534113060426, "grad_norm": 0.4673296809196472, "learning_rate": 2.704487668453437e-08, "loss": 2.0068, "step": 6521 }, { "epoch": 2.9907120743034055, "grad_norm": 0.2979157865047455, "learning_rate": 2.4530520954024748e-08, "loss": 0.7067, "step": 6522 }, { "epoch": 2.9911707373007683, "grad_norm": 0.2104611098766327, "learning_rate": 2.2138812811156596e-08, "loss": 1.7247, "step": 6523 }, { "epoch": 2.991629400298131, "grad_norm": 0.4868721067905426, "learning_rate": 1.9869752842682777e-08, "loss": 1.6491, "step": 6524 }, { "epoch": 2.9920880632954936, "grad_norm": 0.33822518587112427, "learning_rate": 1.7723341605158095e-08, "loss": 2.0029, "step": 6525 }, { "epoch": 2.9925467262928565, "grad_norm": 0.3121846616268158, "learning_rate": 1.5699579625216842e-08, "loss": 1.4298, "step": 6526 }, { "epoch": 2.993005389290219, "grad_norm": 0.3756405711174011, "learning_rate": 1.3798467399184222e-08, "loss": 1.7493, "step": 6527 }, { "epoch": 2.9934640522875817, "grad_norm": 0.37064328789711, "learning_rate": 1.2020005393520439e-08, "loss": 1.5358, "step": 6528 }, { "epoch": 2.9939227152849446, "grad_norm": 0.2886911928653717, "learning_rate": 1.0364194044432117e-08, "loss": 1.3199, "step": 6529 }, { "epoch": 2.994381378282307, "grad_norm": 0.3759532868862152, "learning_rate": 8.831033758149865e-09, "loss": 1.3483, "step": 6530 }, { "epoch": 2.99484004127967, "grad_norm": 0.21223415434360504, "learning_rate": 7.420524910706217e-09, "loss": 0.9441, "step": 6531 }, { "epoch": 2.9952987042770323, "grad_norm": 0.2493351846933365, "learning_rate": 6.13266784810218e-09, "loss": 0.9465, "step": 6532 }, { "epoch": 2.995757367274395, "grad_norm": 0.27161523699760437, "learning_rate": 4.967462886362739e-09, "loss": 1.5159, "step": 6533 }, { "epoch": 2.996216030271758, "grad_norm": 0.40845414996147156, "learning_rate": 3.924910311203789e-09, "loss": 1.468, "step": 6534 }, { "epoch": 2.9966746932691204, "grad_norm": 0.281252920627594, "learning_rate": 3.005010378476225e-09, "loss": 0.8934, "step": 6535 }, { "epoch": 2.9971333562664833, "grad_norm": 0.27226901054382324, "learning_rate": 2.207763313777367e-09, "loss": 0.8065, "step": 6536 }, { "epoch": 2.9975920192638457, "grad_norm": 0.0754655972123146, "learning_rate": 1.5331693126174884e-09, "loss": 0.4985, "step": 6537 }, { "epoch": 2.9980506822612085, "grad_norm": 0.20777086913585663, "learning_rate": 9.812285406418653e-10, "loss": 1.425, "step": 6538 }, { "epoch": 2.9985093452585714, "grad_norm": 0.3428349196910858, "learning_rate": 5.519411330756619e-10, "loss": 1.5652, "step": 6539 }, { "epoch": 2.998968008255934, "grad_norm": 0.3928774297237396, "learning_rate": 2.4530719533455425e-10, "loss": 1.4103, "step": 6540 }, { "epoch": 2.998968008255934, "step": 6540, "total_flos": 4.3151621005372293e+18, "train_loss": 1.4276331583658854, "train_runtime": 125872.0926, "train_samples_per_second": 13.302, "train_steps_per_second": 0.052 } ], "logging_steps": 1.0, "max_steps": 6540, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.3151621005372293e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }