{ "best_metric": 1.4838331937789917, "best_model_checkpoint": "miner_id_24/checkpoint-3800", "epoch": 0.6108097247337754, "eval_steps": 200, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016073940124573035, "grad_norm": 0.6146062612533569, "learning_rate": 6.666666666666667e-06, "loss": 2.4562, "step": 1 }, { "epoch": 0.00016073940124573035, "eval_loss": 1.8800766468048096, "eval_runtime": 46.1041, "eval_samples_per_second": 5.444, "eval_steps_per_second": 2.733, "step": 1 }, { "epoch": 0.0003214788024914607, "grad_norm": 0.5687244534492493, "learning_rate": 1.3333333333333333e-05, "loss": 1.9791, "step": 2 }, { "epoch": 0.0004822182037371911, "grad_norm": 0.5558443665504456, "learning_rate": 2e-05, "loss": 2.0161, "step": 3 }, { "epoch": 0.0006429576049829214, "grad_norm": 0.5337421298027039, "learning_rate": 2.6666666666666667e-05, "loss": 1.9071, "step": 4 }, { "epoch": 0.0008036970062286518, "grad_norm": 0.5502483248710632, "learning_rate": 3.3333333333333335e-05, "loss": 2.0349, "step": 5 }, { "epoch": 0.0009644364074743822, "grad_norm": 0.546974778175354, "learning_rate": 4e-05, "loss": 2.1145, "step": 6 }, { "epoch": 0.0011251758087201125, "grad_norm": 0.6227491497993469, "learning_rate": 4.666666666666667e-05, "loss": 2.0561, "step": 7 }, { "epoch": 0.0012859152099658428, "grad_norm": 0.5480761528015137, "learning_rate": 5.333333333333333e-05, "loss": 1.7662, "step": 8 }, { "epoch": 0.0014466546112115732, "grad_norm": 0.5654782652854919, "learning_rate": 6e-05, "loss": 1.842, "step": 9 }, { "epoch": 0.0016073940124573037, "grad_norm": 0.5720047950744629, "learning_rate": 6.666666666666667e-05, "loss": 1.7238, "step": 10 }, { "epoch": 0.001768133413703034, "grad_norm": 0.488892138004303, "learning_rate": 7.333333333333333e-05, "loss": 1.5829, "step": 11 }, { "epoch": 0.0019288728149487644, "grad_norm": 0.5832931995391846, "learning_rate": 8e-05, "loss": 1.9504, "step": 12 }, { "epoch": 0.002089612216194495, "grad_norm": 0.5047398805618286, "learning_rate": 8.666666666666667e-05, "loss": 1.6885, "step": 13 }, { "epoch": 0.002250351617440225, "grad_norm": 0.5305578112602234, "learning_rate": 9.333333333333334e-05, "loss": 1.6642, "step": 14 }, { "epoch": 0.0024110910186859553, "grad_norm": 0.57685786485672, "learning_rate": 0.0001, "loss": 1.8461, "step": 15 }, { "epoch": 0.0025718304199316856, "grad_norm": 0.654364287853241, "learning_rate": 0.00010666666666666667, "loss": 1.7632, "step": 16 }, { "epoch": 0.0027325698211774162, "grad_norm": 0.6196379661560059, "learning_rate": 0.00011333333333333334, "loss": 1.6611, "step": 17 }, { "epoch": 0.0028933092224231465, "grad_norm": 0.6956469416618347, "learning_rate": 0.00012, "loss": 1.795, "step": 18 }, { "epoch": 0.0030540486236688767, "grad_norm": 0.8601729273796082, "learning_rate": 0.00012666666666666666, "loss": 2.1705, "step": 19 }, { "epoch": 0.0032147880249146074, "grad_norm": 0.6868798136711121, "learning_rate": 0.00013333333333333334, "loss": 1.7211, "step": 20 }, { "epoch": 0.0033755274261603376, "grad_norm": 0.6202433705329895, "learning_rate": 0.00014, "loss": 1.5298, "step": 21 }, { "epoch": 0.003536266827406068, "grad_norm": 0.6545760035514832, "learning_rate": 0.00014666666666666666, "loss": 1.9106, "step": 22 }, { "epoch": 0.003697006228651798, "grad_norm": 0.6812022924423218, "learning_rate": 0.00015333333333333334, "loss": 1.7376, "step": 23 }, { "epoch": 0.0038577456298975288, "grad_norm": 0.6827796697616577, "learning_rate": 0.00016, "loss": 1.6954, "step": 24 }, { "epoch": 0.004018485031143259, "grad_norm": 0.6145235300064087, "learning_rate": 0.0001666666666666667, "loss": 1.963, "step": 25 }, { "epoch": 0.00417922443238899, "grad_norm": 0.6488357186317444, "learning_rate": 0.00017333333333333334, "loss": 1.8421, "step": 26 }, { "epoch": 0.00433996383363472, "grad_norm": 0.6817061901092529, "learning_rate": 0.00018, "loss": 1.7975, "step": 27 }, { "epoch": 0.00450070323488045, "grad_norm": 0.7456655502319336, "learning_rate": 0.0001866666666666667, "loss": 1.614, "step": 28 }, { "epoch": 0.00466144263612618, "grad_norm": 0.5470588207244873, "learning_rate": 0.00019333333333333333, "loss": 1.4163, "step": 29 }, { "epoch": 0.004822182037371911, "grad_norm": 0.7085727453231812, "learning_rate": 0.0002, "loss": 1.6551, "step": 30 }, { "epoch": 0.004982921438617641, "grad_norm": 0.8203132152557373, "learning_rate": 0.00019999999857863972, "loss": 1.7143, "step": 31 }, { "epoch": 0.005143660839863371, "grad_norm": 0.6805604696273804, "learning_rate": 0.00019999999431455888, "loss": 1.8898, "step": 32 }, { "epoch": 0.005304400241109102, "grad_norm": 0.6662057638168335, "learning_rate": 0.00019999998720775754, "loss": 1.5896, "step": 33 }, { "epoch": 0.0054651396423548325, "grad_norm": 0.7231802344322205, "learning_rate": 0.00019999997725823603, "loss": 1.7113, "step": 34 }, { "epoch": 0.005625879043600563, "grad_norm": 0.7186694145202637, "learning_rate": 0.00019999996446599453, "loss": 1.7156, "step": 35 }, { "epoch": 0.005786618444846293, "grad_norm": 0.5861710906028748, "learning_rate": 0.00019999994883103348, "loss": 1.7798, "step": 36 }, { "epoch": 0.005947357846092023, "grad_norm": 0.622209906578064, "learning_rate": 0.00019999993035335327, "loss": 1.4324, "step": 37 }, { "epoch": 0.006108097247337753, "grad_norm": 0.6815835237503052, "learning_rate": 0.00019999990903295443, "loss": 1.8903, "step": 38 }, { "epoch": 0.006268836648583484, "grad_norm": 0.6280589699745178, "learning_rate": 0.00019999988486983758, "loss": 1.7587, "step": 39 }, { "epoch": 0.006429576049829215, "grad_norm": 0.5866552591323853, "learning_rate": 0.00019999985786400343, "loss": 1.533, "step": 40 }, { "epoch": 0.006590315451074945, "grad_norm": 0.6842410564422607, "learning_rate": 0.0001999998280154527, "loss": 1.6896, "step": 41 }, { "epoch": 0.006751054852320675, "grad_norm": 0.6225414276123047, "learning_rate": 0.00019999979532418625, "loss": 1.7056, "step": 42 }, { "epoch": 0.0069117942535664055, "grad_norm": 0.5919414758682251, "learning_rate": 0.00019999975979020506, "loss": 1.6127, "step": 43 }, { "epoch": 0.007072533654812136, "grad_norm": 0.7076655030250549, "learning_rate": 0.00019999972141351003, "loss": 1.7316, "step": 44 }, { "epoch": 0.007233273056057866, "grad_norm": 0.693417489528656, "learning_rate": 0.00019999968019410237, "loss": 1.6352, "step": 45 }, { "epoch": 0.007394012457303596, "grad_norm": 0.640809178352356, "learning_rate": 0.00019999963613198324, "loss": 2.0136, "step": 46 }, { "epoch": 0.007554751858549327, "grad_norm": 0.6811522841453552, "learning_rate": 0.00019999958922715377, "loss": 1.9979, "step": 47 }, { "epoch": 0.0077154912597950575, "grad_norm": 0.6250655055046082, "learning_rate": 0.00019999953947961543, "loss": 1.8884, "step": 48 }, { "epoch": 0.007876230661040788, "grad_norm": 0.6390895843505859, "learning_rate": 0.00019999948688936958, "loss": 1.8005, "step": 49 }, { "epoch": 0.008036970062286517, "grad_norm": 0.7204399704933167, "learning_rate": 0.0001999994314564177, "loss": 1.848, "step": 50 }, { "epoch": 0.008197709463532248, "grad_norm": 0.6431483626365662, "learning_rate": 0.0001999993731807614, "loss": 1.5865, "step": 51 }, { "epoch": 0.00835844886477798, "grad_norm": 0.6491577625274658, "learning_rate": 0.00019999931206240233, "loss": 1.5799, "step": 52 }, { "epoch": 0.008519188266023709, "grad_norm": 0.6422392129898071, "learning_rate": 0.00019999924810134217, "loss": 1.8216, "step": 53 }, { "epoch": 0.00867992766726944, "grad_norm": 0.6124479174613953, "learning_rate": 0.00019999918129758284, "loss": 1.499, "step": 54 }, { "epoch": 0.00884066706851517, "grad_norm": 0.5777962803840637, "learning_rate": 0.00019999911165112616, "loss": 1.5374, "step": 55 }, { "epoch": 0.0090014064697609, "grad_norm": 0.5487947463989258, "learning_rate": 0.00019999903916197417, "loss": 1.8562, "step": 56 }, { "epoch": 0.00916214587100663, "grad_norm": 0.5540621876716614, "learning_rate": 0.00019999896383012883, "loss": 1.4139, "step": 57 }, { "epoch": 0.00932288527225236, "grad_norm": 0.5495641231536865, "learning_rate": 0.00019999888565559238, "loss": 1.7241, "step": 58 }, { "epoch": 0.009483624673498092, "grad_norm": 0.5788256525993347, "learning_rate": 0.000199998804638367, "loss": 1.5788, "step": 59 }, { "epoch": 0.009644364074743821, "grad_norm": 0.5524935126304626, "learning_rate": 0.00019999872077845502, "loss": 1.6883, "step": 60 }, { "epoch": 0.009805103475989552, "grad_norm": 0.5422489643096924, "learning_rate": 0.0001999986340758588, "loss": 1.5146, "step": 61 }, { "epoch": 0.009965842877235282, "grad_norm": 0.559472918510437, "learning_rate": 0.00019999854453058081, "loss": 1.6218, "step": 62 }, { "epoch": 0.010126582278481013, "grad_norm": 0.6507068872451782, "learning_rate": 0.0001999984521426236, "loss": 1.5814, "step": 63 }, { "epoch": 0.010287321679726742, "grad_norm": 0.5180988311767578, "learning_rate": 0.00019999835691198977, "loss": 1.4473, "step": 64 }, { "epoch": 0.010448061080972473, "grad_norm": 0.6105542778968811, "learning_rate": 0.00019999825883868207, "loss": 1.3512, "step": 65 }, { "epoch": 0.010608800482218204, "grad_norm": 0.6106534004211426, "learning_rate": 0.0001999981579227033, "loss": 1.7397, "step": 66 }, { "epoch": 0.010769539883463934, "grad_norm": 0.5836473107337952, "learning_rate": 0.00019999805416405625, "loss": 1.6683, "step": 67 }, { "epoch": 0.010930279284709665, "grad_norm": 0.5084603428840637, "learning_rate": 0.00019999794756274394, "loss": 1.391, "step": 68 }, { "epoch": 0.011091018685955394, "grad_norm": 0.5803837776184082, "learning_rate": 0.00019999783811876936, "loss": 1.5025, "step": 69 }, { "epoch": 0.011251758087201125, "grad_norm": 0.6616635322570801, "learning_rate": 0.00019999772583213562, "loss": 1.7606, "step": 70 }, { "epoch": 0.011412497488446855, "grad_norm": 0.5798701643943787, "learning_rate": 0.00019999761070284598, "loss": 1.6723, "step": 71 }, { "epoch": 0.011573236889692586, "grad_norm": 0.5627177953720093, "learning_rate": 0.00019999749273090362, "loss": 1.599, "step": 72 }, { "epoch": 0.011733976290938317, "grad_norm": 0.5469239950180054, "learning_rate": 0.00019999737191631197, "loss": 1.4389, "step": 73 }, { "epoch": 0.011894715692184046, "grad_norm": 0.6494703888893127, "learning_rate": 0.0001999972482590744, "loss": 1.5761, "step": 74 }, { "epoch": 0.012055455093429777, "grad_norm": 0.6485840678215027, "learning_rate": 0.00019999712175919449, "loss": 1.6965, "step": 75 }, { "epoch": 0.012216194494675507, "grad_norm": 0.5390245318412781, "learning_rate": 0.00019999699241667575, "loss": 1.4385, "step": 76 }, { "epoch": 0.012376933895921238, "grad_norm": 0.5867407321929932, "learning_rate": 0.00019999686023152195, "loss": 1.6617, "step": 77 }, { "epoch": 0.012537673297166967, "grad_norm": 0.5673894882202148, "learning_rate": 0.0001999967252037368, "loss": 1.6431, "step": 78 }, { "epoch": 0.012698412698412698, "grad_norm": 0.6245514750480652, "learning_rate": 0.00019999658733332415, "loss": 1.7781, "step": 79 }, { "epoch": 0.01285915209965843, "grad_norm": 0.5269338488578796, "learning_rate": 0.0001999964466202879, "loss": 1.4988, "step": 80 }, { "epoch": 0.013019891500904159, "grad_norm": 0.64048832654953, "learning_rate": 0.0001999963030646321, "loss": 1.4648, "step": 81 }, { "epoch": 0.01318063090214989, "grad_norm": 0.6036165356636047, "learning_rate": 0.00019999615666636074, "loss": 1.5798, "step": 82 }, { "epoch": 0.01334137030339562, "grad_norm": 0.5646976828575134, "learning_rate": 0.00019999600742547807, "loss": 1.796, "step": 83 }, { "epoch": 0.01350210970464135, "grad_norm": 0.6046854853630066, "learning_rate": 0.00019999585534198828, "loss": 1.6529, "step": 84 }, { "epoch": 0.01366284910588708, "grad_norm": 0.6673730611801147, "learning_rate": 0.0001999957004158957, "loss": 1.6296, "step": 85 }, { "epoch": 0.013823588507132811, "grad_norm": 0.5602283477783203, "learning_rate": 0.0001999955426472048, "loss": 1.7198, "step": 86 }, { "epoch": 0.013984327908378542, "grad_norm": 0.6285644769668579, "learning_rate": 0.00019999538203591997, "loss": 1.5761, "step": 87 }, { "epoch": 0.014145067309624271, "grad_norm": 0.6506261825561523, "learning_rate": 0.0001999952185820458, "loss": 1.9915, "step": 88 }, { "epoch": 0.014305806710870003, "grad_norm": 0.6009521484375, "learning_rate": 0.000199995052285587, "loss": 1.5645, "step": 89 }, { "epoch": 0.014466546112115732, "grad_norm": 0.6257418990135193, "learning_rate": 0.00019999488314654827, "loss": 1.6225, "step": 90 }, { "epoch": 0.014627285513361463, "grad_norm": 0.5345893502235413, "learning_rate": 0.00019999471116493434, "loss": 1.4062, "step": 91 }, { "epoch": 0.014788024914607192, "grad_norm": 0.5701767802238464, "learning_rate": 0.00019999453634075018, "loss": 1.6436, "step": 92 }, { "epoch": 0.014948764315852923, "grad_norm": 0.6432588696479797, "learning_rate": 0.00019999435867400073, "loss": 1.5179, "step": 93 }, { "epoch": 0.015109503717098655, "grad_norm": 0.5169962644577026, "learning_rate": 0.00019999417816469106, "loss": 1.6126, "step": 94 }, { "epoch": 0.015270243118344384, "grad_norm": 0.5223938226699829, "learning_rate": 0.0001999939948128263, "loss": 1.5733, "step": 95 }, { "epoch": 0.015430982519590115, "grad_norm": 0.5852014422416687, "learning_rate": 0.00019999380861841163, "loss": 1.7466, "step": 96 }, { "epoch": 0.015591721920835844, "grad_norm": 0.5293176174163818, "learning_rate": 0.00019999361958145237, "loss": 1.5117, "step": 97 }, { "epoch": 0.015752461322081576, "grad_norm": 0.5117928981781006, "learning_rate": 0.0001999934277019539, "loss": 1.429, "step": 98 }, { "epoch": 0.015913200723327307, "grad_norm": 0.4751776158809662, "learning_rate": 0.00019999323297992169, "loss": 1.3713, "step": 99 }, { "epoch": 0.016073940124573034, "grad_norm": 0.5721314549446106, "learning_rate": 0.0001999930354153612, "loss": 1.3649, "step": 100 }, { "epoch": 0.016234679525818765, "grad_norm": 0.5435962677001953, "learning_rate": 0.00019999283500827812, "loss": 1.54, "step": 101 }, { "epoch": 0.016395418927064496, "grad_norm": 0.4738254249095917, "learning_rate": 0.0001999926317586781, "loss": 1.2795, "step": 102 }, { "epoch": 0.016556158328310228, "grad_norm": 0.5297900438308716, "learning_rate": 0.00019999242566656698, "loss": 1.309, "step": 103 }, { "epoch": 0.01671689772955596, "grad_norm": 0.5342819690704346, "learning_rate": 0.00019999221673195052, "loss": 1.7379, "step": 104 }, { "epoch": 0.016877637130801686, "grad_norm": 0.5869962573051453, "learning_rate": 0.00019999200495483477, "loss": 1.5729, "step": 105 }, { "epoch": 0.017038376532047417, "grad_norm": 0.5355800986289978, "learning_rate": 0.00019999179033522565, "loss": 1.5899, "step": 106 }, { "epoch": 0.01719911593329315, "grad_norm": 0.6515146493911743, "learning_rate": 0.00019999157287312936, "loss": 1.7698, "step": 107 }, { "epoch": 0.01735985533453888, "grad_norm": 0.5469892024993896, "learning_rate": 0.00019999135256855202, "loss": 1.4373, "step": 108 }, { "epoch": 0.01752059473578461, "grad_norm": 0.572085976600647, "learning_rate": 0.0001999911294214999, "loss": 1.5545, "step": 109 }, { "epoch": 0.01768133413703034, "grad_norm": 0.6045322418212891, "learning_rate": 0.00019999090343197936, "loss": 1.6601, "step": 110 }, { "epoch": 0.01784207353827607, "grad_norm": 0.5288821458816528, "learning_rate": 0.00019999067459999676, "loss": 1.4338, "step": 111 }, { "epoch": 0.0180028129395218, "grad_norm": 0.6269596815109253, "learning_rate": 0.0001999904429255587, "loss": 1.7966, "step": 112 }, { "epoch": 0.01816355234076753, "grad_norm": 0.6155001521110535, "learning_rate": 0.0001999902084086717, "loss": 1.4822, "step": 113 }, { "epoch": 0.01832429174201326, "grad_norm": 0.5912888050079346, "learning_rate": 0.00019998997104934247, "loss": 1.5701, "step": 114 }, { "epoch": 0.01848503114325899, "grad_norm": 0.5434785485267639, "learning_rate": 0.00019998973084757771, "loss": 1.5136, "step": 115 }, { "epoch": 0.01864577054450472, "grad_norm": 0.6013758182525635, "learning_rate": 0.0001999894878033843, "loss": 1.6653, "step": 116 }, { "epoch": 0.018806509945750453, "grad_norm": 0.5760058760643005, "learning_rate": 0.00019998924191676914, "loss": 1.5809, "step": 117 }, { "epoch": 0.018967249346996184, "grad_norm": 0.5517994165420532, "learning_rate": 0.00019998899318773917, "loss": 1.598, "step": 118 }, { "epoch": 0.01912798874824191, "grad_norm": 0.6126778721809387, "learning_rate": 0.0001999887416163015, "loss": 1.6497, "step": 119 }, { "epoch": 0.019288728149487643, "grad_norm": 0.5817735195159912, "learning_rate": 0.00019998848720246328, "loss": 1.3496, "step": 120 }, { "epoch": 0.019449467550733374, "grad_norm": 0.5723729729652405, "learning_rate": 0.00019998822994623174, "loss": 1.3819, "step": 121 }, { "epoch": 0.019610206951979105, "grad_norm": 0.6477592587471008, "learning_rate": 0.0001999879698476142, "loss": 1.6774, "step": 122 }, { "epoch": 0.019770946353224836, "grad_norm": 0.6544284820556641, "learning_rate": 0.00019998770690661803, "loss": 1.8183, "step": 123 }, { "epoch": 0.019931685754470563, "grad_norm": 0.5253116488456726, "learning_rate": 0.00019998744112325074, "loss": 1.3606, "step": 124 }, { "epoch": 0.020092425155716295, "grad_norm": 0.5207403302192688, "learning_rate": 0.00019998717249751984, "loss": 1.3521, "step": 125 }, { "epoch": 0.020253164556962026, "grad_norm": 0.6499771475791931, "learning_rate": 0.000199986901029433, "loss": 1.7398, "step": 126 }, { "epoch": 0.020413903958207757, "grad_norm": 0.5993442535400391, "learning_rate": 0.00019998662671899794, "loss": 1.6894, "step": 127 }, { "epoch": 0.020574643359453484, "grad_norm": 0.5501679182052612, "learning_rate": 0.0001999863495662224, "loss": 1.658, "step": 128 }, { "epoch": 0.020735382760699216, "grad_norm": 0.5819227695465088, "learning_rate": 0.0001999860695711144, "loss": 1.7308, "step": 129 }, { "epoch": 0.020896122161944947, "grad_norm": 0.5369065403938293, "learning_rate": 0.00019998578673368172, "loss": 1.5093, "step": 130 }, { "epoch": 0.021056861563190678, "grad_norm": 0.5569128394126892, "learning_rate": 0.00019998550105393252, "loss": 1.5935, "step": 131 }, { "epoch": 0.02121760096443641, "grad_norm": 0.5772825479507446, "learning_rate": 0.00019998521253187487, "loss": 1.6905, "step": 132 }, { "epoch": 0.021378340365682136, "grad_norm": 0.5244413018226624, "learning_rate": 0.00019998492116751702, "loss": 1.5972, "step": 133 }, { "epoch": 0.021539079766927868, "grad_norm": 0.5664756298065186, "learning_rate": 0.00019998462696086718, "loss": 1.5403, "step": 134 }, { "epoch": 0.0216998191681736, "grad_norm": 0.5392235517501831, "learning_rate": 0.00019998432991193377, "loss": 1.6381, "step": 135 }, { "epoch": 0.02186055856941933, "grad_norm": 0.586540162563324, "learning_rate": 0.00019998403002072522, "loss": 1.5034, "step": 136 }, { "epoch": 0.02202129797066506, "grad_norm": 0.6401904225349426, "learning_rate": 0.00019998372728725005, "loss": 1.4966, "step": 137 }, { "epoch": 0.02218203737191079, "grad_norm": 0.8401621580123901, "learning_rate": 0.0001999834217115169, "loss": 1.6566, "step": 138 }, { "epoch": 0.02234277677315652, "grad_norm": 0.6305237412452698, "learning_rate": 0.00019998311329353437, "loss": 1.5463, "step": 139 }, { "epoch": 0.02250351617440225, "grad_norm": 0.584318220615387, "learning_rate": 0.00019998280203331132, "loss": 1.9107, "step": 140 }, { "epoch": 0.022664255575647982, "grad_norm": 0.6372249126434326, "learning_rate": 0.00019998248793085655, "loss": 1.7393, "step": 141 }, { "epoch": 0.02282499497689371, "grad_norm": 0.6097559332847595, "learning_rate": 0.00019998217098617902, "loss": 1.6399, "step": 142 }, { "epoch": 0.02298573437813944, "grad_norm": 0.5757220983505249, "learning_rate": 0.00019998185119928769, "loss": 1.5608, "step": 143 }, { "epoch": 0.02314647377938517, "grad_norm": 0.6350814700126648, "learning_rate": 0.0001999815285701917, "loss": 1.819, "step": 144 }, { "epoch": 0.023307213180630903, "grad_norm": 0.582371711730957, "learning_rate": 0.0001999812030989002, "loss": 1.6436, "step": 145 }, { "epoch": 0.023467952581876634, "grad_norm": 0.5847258567810059, "learning_rate": 0.00019998087478542245, "loss": 1.5109, "step": 146 }, { "epoch": 0.02362869198312236, "grad_norm": 0.5616840720176697, "learning_rate": 0.00019998054362976777, "loss": 1.5682, "step": 147 }, { "epoch": 0.023789431384368093, "grad_norm": 0.45165371894836426, "learning_rate": 0.00019998020963194558, "loss": 1.306, "step": 148 }, { "epoch": 0.023950170785613824, "grad_norm": 0.5621107220649719, "learning_rate": 0.00019997987279196536, "loss": 1.6454, "step": 149 }, { "epoch": 0.024110910186859555, "grad_norm": 0.5720399022102356, "learning_rate": 0.00019997953310983674, "loss": 1.5393, "step": 150 }, { "epoch": 0.024271649588105286, "grad_norm": 0.5469704866409302, "learning_rate": 0.00019997919058556933, "loss": 1.5437, "step": 151 }, { "epoch": 0.024432388989351014, "grad_norm": 0.607407808303833, "learning_rate": 0.00019997884521917288, "loss": 1.4615, "step": 152 }, { "epoch": 0.024593128390596745, "grad_norm": 0.5676476955413818, "learning_rate": 0.00019997849701065717, "loss": 1.5862, "step": 153 }, { "epoch": 0.024753867791842476, "grad_norm": 0.5178918838500977, "learning_rate": 0.00019997814596003214, "loss": 1.4128, "step": 154 }, { "epoch": 0.024914607193088207, "grad_norm": 0.555792510509491, "learning_rate": 0.00019997779206730777, "loss": 1.3723, "step": 155 }, { "epoch": 0.025075346594333935, "grad_norm": 0.5667513608932495, "learning_rate": 0.00019997743533249412, "loss": 1.6411, "step": 156 }, { "epoch": 0.025236085995579666, "grad_norm": 0.6014764904975891, "learning_rate": 0.00019997707575560128, "loss": 1.7638, "step": 157 }, { "epoch": 0.025396825396825397, "grad_norm": 0.6081196665763855, "learning_rate": 0.00019997671333663958, "loss": 1.6497, "step": 158 }, { "epoch": 0.025557564798071128, "grad_norm": 0.6969033479690552, "learning_rate": 0.00019997634807561923, "loss": 1.8579, "step": 159 }, { "epoch": 0.02571830419931686, "grad_norm": 0.6027001738548279, "learning_rate": 0.0001999759799725506, "loss": 1.4378, "step": 160 }, { "epoch": 0.025879043600562587, "grad_norm": 0.5889347791671753, "learning_rate": 0.00019997560902744425, "loss": 1.5519, "step": 161 }, { "epoch": 0.026039783001808318, "grad_norm": 0.551889181137085, "learning_rate": 0.00019997523524031062, "loss": 1.4729, "step": 162 }, { "epoch": 0.02620052240305405, "grad_norm": 0.6988946795463562, "learning_rate": 0.00019997485861116042, "loss": 1.8647, "step": 163 }, { "epoch": 0.02636126180429978, "grad_norm": 0.5777518153190613, "learning_rate": 0.00019997447914000432, "loss": 1.5285, "step": 164 }, { "epoch": 0.02652200120554551, "grad_norm": 0.6907120943069458, "learning_rate": 0.0001999740968268531, "loss": 1.7111, "step": 165 }, { "epoch": 0.02668274060679124, "grad_norm": 0.6089081168174744, "learning_rate": 0.00019997371167171767, "loss": 1.621, "step": 166 }, { "epoch": 0.02684348000803697, "grad_norm": 0.6227884292602539, "learning_rate": 0.0001999733236746089, "loss": 1.6838, "step": 167 }, { "epoch": 0.0270042194092827, "grad_norm": 0.6011916995048523, "learning_rate": 0.00019997293283553788, "loss": 1.661, "step": 168 }, { "epoch": 0.027164958810528432, "grad_norm": 0.5606417059898376, "learning_rate": 0.00019997253915451569, "loss": 1.4559, "step": 169 }, { "epoch": 0.02732569821177416, "grad_norm": 0.6192370653152466, "learning_rate": 0.00019997214263155354, "loss": 1.5256, "step": 170 }, { "epoch": 0.02748643761301989, "grad_norm": 0.6023882627487183, "learning_rate": 0.0001999717432666627, "loss": 1.6291, "step": 171 }, { "epoch": 0.027647177014265622, "grad_norm": 0.5901930928230286, "learning_rate": 0.00019997134105985453, "loss": 1.5135, "step": 172 }, { "epoch": 0.027807916415511353, "grad_norm": 0.5880249738693237, "learning_rate": 0.00019997093601114045, "loss": 1.4506, "step": 173 }, { "epoch": 0.027968655816757084, "grad_norm": 0.5684868097305298, "learning_rate": 0.00019997052812053198, "loss": 1.6294, "step": 174 }, { "epoch": 0.02812939521800281, "grad_norm": 0.5687061548233032, "learning_rate": 0.00019997011738804072, "loss": 1.5584, "step": 175 }, { "epoch": 0.028290134619248543, "grad_norm": 0.48914963006973267, "learning_rate": 0.00019996970381367835, "loss": 1.3336, "step": 176 }, { "epoch": 0.028450874020494274, "grad_norm": 0.6474471092224121, "learning_rate": 0.00019996928739745662, "loss": 1.6332, "step": 177 }, { "epoch": 0.028611613421740005, "grad_norm": 0.5832691192626953, "learning_rate": 0.00019996886813938733, "loss": 1.4721, "step": 178 }, { "epoch": 0.028772352822985733, "grad_norm": 0.527232825756073, "learning_rate": 0.00019996844603948247, "loss": 1.4806, "step": 179 }, { "epoch": 0.028933092224231464, "grad_norm": 0.634027898311615, "learning_rate": 0.00019996802109775396, "loss": 1.7349, "step": 180 }, { "epoch": 0.029093831625477195, "grad_norm": 0.5957772731781006, "learning_rate": 0.00019996759331421395, "loss": 1.649, "step": 181 }, { "epoch": 0.029254571026722926, "grad_norm": 0.5642923712730408, "learning_rate": 0.00019996716268887458, "loss": 1.6033, "step": 182 }, { "epoch": 0.029415310427968657, "grad_norm": 0.6112290024757385, "learning_rate": 0.00019996672922174808, "loss": 1.5424, "step": 183 }, { "epoch": 0.029576049829214385, "grad_norm": 0.5684464573860168, "learning_rate": 0.0001999662929128468, "loss": 1.5925, "step": 184 }, { "epoch": 0.029736789230460116, "grad_norm": 0.6065992116928101, "learning_rate": 0.00019996585376218308, "loss": 1.7697, "step": 185 }, { "epoch": 0.029897528631705847, "grad_norm": 0.5482378005981445, "learning_rate": 0.00019996541176976943, "loss": 1.5841, "step": 186 }, { "epoch": 0.030058268032951578, "grad_norm": 0.5863565802574158, "learning_rate": 0.00019996496693561845, "loss": 1.5255, "step": 187 }, { "epoch": 0.03021900743419731, "grad_norm": 0.5618999600410461, "learning_rate": 0.0001999645192597428, "loss": 1.4487, "step": 188 }, { "epoch": 0.030379746835443037, "grad_norm": 0.6198532581329346, "learning_rate": 0.00019996406874215513, "loss": 1.7207, "step": 189 }, { "epoch": 0.030540486236688768, "grad_norm": 0.7240882515907288, "learning_rate": 0.0001999636153828683, "loss": 2.0277, "step": 190 }, { "epoch": 0.0307012256379345, "grad_norm": 0.5573434233665466, "learning_rate": 0.00019996315918189517, "loss": 1.4414, "step": 191 }, { "epoch": 0.03086196503918023, "grad_norm": 0.5780049562454224, "learning_rate": 0.00019996270013924875, "loss": 1.5145, "step": 192 }, { "epoch": 0.031022704440425958, "grad_norm": 0.6340463757514954, "learning_rate": 0.00019996223825494201, "loss": 1.5458, "step": 193 }, { "epoch": 0.03118344384167169, "grad_norm": 0.5492103099822998, "learning_rate": 0.0001999617735289882, "loss": 1.3887, "step": 194 }, { "epoch": 0.03134418324291742, "grad_norm": 0.6537782549858093, "learning_rate": 0.0001999613059614004, "loss": 1.4066, "step": 195 }, { "epoch": 0.03150492264416315, "grad_norm": 0.5743723511695862, "learning_rate": 0.00019996083555219203, "loss": 1.7008, "step": 196 }, { "epoch": 0.03166566204540888, "grad_norm": 0.5864811539649963, "learning_rate": 0.00019996036230137635, "loss": 1.4182, "step": 197 }, { "epoch": 0.03182640144665461, "grad_norm": 0.5790386199951172, "learning_rate": 0.00019995988620896687, "loss": 1.3932, "step": 198 }, { "epoch": 0.031987140847900344, "grad_norm": 0.5721076130867004, "learning_rate": 0.00019995940727497713, "loss": 1.6413, "step": 199 }, { "epoch": 0.03214788024914607, "grad_norm": 0.6769488453865051, "learning_rate": 0.0001999589254994207, "loss": 1.6489, "step": 200 }, { "epoch": 0.03214788024914607, "eval_loss": 1.6234830617904663, "eval_runtime": 46.2592, "eval_samples_per_second": 5.426, "eval_steps_per_second": 2.724, "step": 200 }, { "epoch": 0.0323086196503918, "grad_norm": 0.5356457829475403, "learning_rate": 0.00019995844088231132, "loss": 1.5644, "step": 201 }, { "epoch": 0.03246935905163753, "grad_norm": 0.5797910690307617, "learning_rate": 0.00019995795342366276, "loss": 1.2978, "step": 202 }, { "epoch": 0.03263009845288326, "grad_norm": 0.538854718208313, "learning_rate": 0.0001999574631234889, "loss": 1.4656, "step": 203 }, { "epoch": 0.03279083785412899, "grad_norm": 0.925990879535675, "learning_rate": 0.00019995696998180359, "loss": 1.7461, "step": 204 }, { "epoch": 0.032951577255374724, "grad_norm": 0.6368277072906494, "learning_rate": 0.00019995647399862092, "loss": 1.5181, "step": 205 }, { "epoch": 0.033112316656620455, "grad_norm": 0.7856408953666687, "learning_rate": 0.000199955975173955, "loss": 1.764, "step": 206 }, { "epoch": 0.033273056057866186, "grad_norm": 0.49969542026519775, "learning_rate": 0.00019995547350781993, "loss": 1.3169, "step": 207 }, { "epoch": 0.03343379545911192, "grad_norm": 0.6214454770088196, "learning_rate": 0.00019995496900023006, "loss": 1.3759, "step": 208 }, { "epoch": 0.03359453486035765, "grad_norm": 0.5567578673362732, "learning_rate": 0.00019995446165119968, "loss": 1.4833, "step": 209 }, { "epoch": 0.03375527426160337, "grad_norm": 0.5712791085243225, "learning_rate": 0.00019995395146074328, "loss": 1.3142, "step": 210 }, { "epoch": 0.033916013662849104, "grad_norm": 0.614951491355896, "learning_rate": 0.00019995343842887526, "loss": 1.7391, "step": 211 }, { "epoch": 0.034076753064094835, "grad_norm": 0.6302178502082825, "learning_rate": 0.00019995292255561026, "loss": 1.6737, "step": 212 }, { "epoch": 0.034237492465340566, "grad_norm": 0.5917184948921204, "learning_rate": 0.00019995240384096296, "loss": 1.6743, "step": 213 }, { "epoch": 0.0343982318665863, "grad_norm": 0.6026787161827087, "learning_rate": 0.0001999518822849481, "loss": 1.5202, "step": 214 }, { "epoch": 0.03455897126783203, "grad_norm": 0.5704429745674133, "learning_rate": 0.00019995135788758046, "loss": 1.6485, "step": 215 }, { "epoch": 0.03471971066907776, "grad_norm": 0.7044981122016907, "learning_rate": 0.00019995083064887502, "loss": 1.449, "step": 216 }, { "epoch": 0.03488045007032349, "grad_norm": 0.6931586861610413, "learning_rate": 0.0001999503005688467, "loss": 1.5014, "step": 217 }, { "epoch": 0.03504118947156922, "grad_norm": 0.6349878907203674, "learning_rate": 0.0001999497676475106, "loss": 1.5019, "step": 218 }, { "epoch": 0.035201928872814946, "grad_norm": 0.8351258039474487, "learning_rate": 0.00019994923188488187, "loss": 1.4662, "step": 219 }, { "epoch": 0.03536266827406068, "grad_norm": 0.7162835597991943, "learning_rate": 0.0001999486932809757, "loss": 1.7242, "step": 220 }, { "epoch": 0.03552340767530641, "grad_norm": 0.599843442440033, "learning_rate": 0.0001999481518358075, "loss": 1.6287, "step": 221 }, { "epoch": 0.03568414707655214, "grad_norm": 0.6619487404823303, "learning_rate": 0.0001999476075493926, "loss": 1.9685, "step": 222 }, { "epoch": 0.03584488647779787, "grad_norm": 0.6789863705635071, "learning_rate": 0.00019994706042174642, "loss": 1.6707, "step": 223 }, { "epoch": 0.0360056258790436, "grad_norm": 0.5666345357894897, "learning_rate": 0.0001999465104528846, "loss": 1.5432, "step": 224 }, { "epoch": 0.03616636528028933, "grad_norm": 0.5827881693840027, "learning_rate": 0.00019994595764282275, "loss": 1.5185, "step": 225 }, { "epoch": 0.03632710468153506, "grad_norm": 0.5503188967704773, "learning_rate": 0.00019994540199157657, "loss": 1.5328, "step": 226 }, { "epoch": 0.036487844082780795, "grad_norm": 0.615378201007843, "learning_rate": 0.00019994484349916182, "loss": 1.6027, "step": 227 }, { "epoch": 0.03664858348402652, "grad_norm": 0.5945441722869873, "learning_rate": 0.00019994428216559443, "loss": 1.5582, "step": 228 }, { "epoch": 0.03680932288527225, "grad_norm": 0.5576664805412292, "learning_rate": 0.00019994371799089038, "loss": 1.1833, "step": 229 }, { "epoch": 0.03697006228651798, "grad_norm": 0.5956182479858398, "learning_rate": 0.00019994315097506565, "loss": 1.3828, "step": 230 }, { "epoch": 0.03713080168776371, "grad_norm": 0.5349995493888855, "learning_rate": 0.0001999425811181364, "loss": 1.3876, "step": 231 }, { "epoch": 0.03729154108900944, "grad_norm": 0.5421299934387207, "learning_rate": 0.0001999420084201188, "loss": 1.5232, "step": 232 }, { "epoch": 0.037452280490255174, "grad_norm": 0.6363821625709534, "learning_rate": 0.00019994143288102912, "loss": 1.611, "step": 233 }, { "epoch": 0.037613019891500905, "grad_norm": 0.5957868099212646, "learning_rate": 0.00019994085450088375, "loss": 1.7261, "step": 234 }, { "epoch": 0.037773759292746636, "grad_norm": 0.6640423536300659, "learning_rate": 0.0001999402732796991, "loss": 1.4984, "step": 235 }, { "epoch": 0.03793449869399237, "grad_norm": 0.5804933309555054, "learning_rate": 0.00019993968921749175, "loss": 1.7019, "step": 236 }, { "epoch": 0.0380952380952381, "grad_norm": 0.707933247089386, "learning_rate": 0.00019993910231427828, "loss": 1.7083, "step": 237 }, { "epoch": 0.03825597749648382, "grad_norm": 0.5395154356956482, "learning_rate": 0.00019993851257007534, "loss": 1.3753, "step": 238 }, { "epoch": 0.038416716897729554, "grad_norm": 0.5755935907363892, "learning_rate": 0.00019993791998489972, "loss": 1.4126, "step": 239 }, { "epoch": 0.038577456298975285, "grad_norm": 0.6702831387519836, "learning_rate": 0.00019993732455876826, "loss": 1.8517, "step": 240 }, { "epoch": 0.038738195700221016, "grad_norm": 0.6203488111495972, "learning_rate": 0.00019993672629169788, "loss": 1.5618, "step": 241 }, { "epoch": 0.03889893510146675, "grad_norm": 0.5781505107879639, "learning_rate": 0.0001999361251837056, "loss": 1.5662, "step": 242 }, { "epoch": 0.03905967450271248, "grad_norm": 1.7474713325500488, "learning_rate": 0.00019993552123480853, "loss": 1.4853, "step": 243 }, { "epoch": 0.03922041390395821, "grad_norm": 0.597203254699707, "learning_rate": 0.00019993491444502383, "loss": 1.6702, "step": 244 }, { "epoch": 0.03938115330520394, "grad_norm": 0.6134242415428162, "learning_rate": 0.00019993430481436865, "loss": 1.5502, "step": 245 }, { "epoch": 0.03954189270644967, "grad_norm": 0.5579332709312439, "learning_rate": 0.0001999336923428605, "loss": 1.4967, "step": 246 }, { "epoch": 0.039702632107695396, "grad_norm": 0.5886154770851135, "learning_rate": 0.0001999330770305166, "loss": 1.5669, "step": 247 }, { "epoch": 0.03986337150894113, "grad_norm": 0.6750704646110535, "learning_rate": 0.0001999324588773546, "loss": 1.7451, "step": 248 }, { "epoch": 0.04002411091018686, "grad_norm": 0.5973857045173645, "learning_rate": 0.00019993183788339196, "loss": 1.6935, "step": 249 }, { "epoch": 0.04018485031143259, "grad_norm": 0.584227442741394, "learning_rate": 0.0001999312140486464, "loss": 1.6541, "step": 250 }, { "epoch": 0.04034558971267832, "grad_norm": 0.5845524668693542, "learning_rate": 0.00019993058737313566, "loss": 1.5829, "step": 251 }, { "epoch": 0.04050632911392405, "grad_norm": 0.646384060382843, "learning_rate": 0.0001999299578568775, "loss": 1.5758, "step": 252 }, { "epoch": 0.04066706851516978, "grad_norm": 0.5996095538139343, "learning_rate": 0.00019992932549988985, "loss": 1.6093, "step": 253 }, { "epoch": 0.040827807916415514, "grad_norm": 0.6200376152992249, "learning_rate": 0.00019992869030219067, "loss": 1.6895, "step": 254 }, { "epoch": 0.040988547317661245, "grad_norm": 0.6104289293289185, "learning_rate": 0.00019992805226379804, "loss": 1.8817, "step": 255 }, { "epoch": 0.04114928671890697, "grad_norm": 0.6010470390319824, "learning_rate": 0.00019992741138473007, "loss": 1.5524, "step": 256 }, { "epoch": 0.0413100261201527, "grad_norm": 0.5624374151229858, "learning_rate": 0.00019992676766500503, "loss": 1.4278, "step": 257 }, { "epoch": 0.04147076552139843, "grad_norm": 0.651246964931488, "learning_rate": 0.00019992612110464116, "loss": 1.595, "step": 258 }, { "epoch": 0.04163150492264416, "grad_norm": 0.6038187742233276, "learning_rate": 0.00019992547170365684, "loss": 1.5336, "step": 259 }, { "epoch": 0.04179224432388989, "grad_norm": 0.5885361433029175, "learning_rate": 0.0001999248194620706, "loss": 1.5408, "step": 260 }, { "epoch": 0.041952983725135624, "grad_norm": 0.6367528438568115, "learning_rate": 0.0001999241643799009, "loss": 1.5367, "step": 261 }, { "epoch": 0.042113723126381356, "grad_norm": 0.7345449924468994, "learning_rate": 0.0001999235064571664, "loss": 1.6964, "step": 262 }, { "epoch": 0.04227446252762709, "grad_norm": 0.6258195042610168, "learning_rate": 0.0001999228456938858, "loss": 1.5711, "step": 263 }, { "epoch": 0.04243520192887282, "grad_norm": 0.6058852672576904, "learning_rate": 0.00019992218209007788, "loss": 1.7711, "step": 264 }, { "epoch": 0.04259594133011854, "grad_norm": 0.6320091485977173, "learning_rate": 0.0001999215156457615, "loss": 1.6628, "step": 265 }, { "epoch": 0.04275668073136427, "grad_norm": 0.751410722732544, "learning_rate": 0.00019992084636095565, "loss": 1.5991, "step": 266 }, { "epoch": 0.042917420132610004, "grad_norm": 0.5646713376045227, "learning_rate": 0.0001999201742356793, "loss": 1.4752, "step": 267 }, { "epoch": 0.043078159533855735, "grad_norm": 0.5128264427185059, "learning_rate": 0.00019991949926995156, "loss": 1.4058, "step": 268 }, { "epoch": 0.043238898935101466, "grad_norm": 0.6299881339073181, "learning_rate": 0.00019991882146379165, "loss": 1.4386, "step": 269 }, { "epoch": 0.0433996383363472, "grad_norm": 0.6468172669410706, "learning_rate": 0.0001999181408172188, "loss": 1.7494, "step": 270 }, { "epoch": 0.04356037773759293, "grad_norm": 0.5506256222724915, "learning_rate": 0.00019991745733025237, "loss": 1.5738, "step": 271 }, { "epoch": 0.04372111713883866, "grad_norm": 0.5802133083343506, "learning_rate": 0.00019991677100291184, "loss": 1.4441, "step": 272 }, { "epoch": 0.04388185654008439, "grad_norm": 0.6318967938423157, "learning_rate": 0.00019991608183521665, "loss": 1.6202, "step": 273 }, { "epoch": 0.04404259594133012, "grad_norm": 0.705115556716919, "learning_rate": 0.00019991538982718643, "loss": 1.8007, "step": 274 }, { "epoch": 0.044203335342575846, "grad_norm": 0.5781870484352112, "learning_rate": 0.00019991469497884084, "loss": 1.7901, "step": 275 }, { "epoch": 0.04436407474382158, "grad_norm": 0.529068648815155, "learning_rate": 0.00019991399729019964, "loss": 1.4107, "step": 276 }, { "epoch": 0.04452481414506731, "grad_norm": 0.6441764831542969, "learning_rate": 0.00019991329676128267, "loss": 1.7896, "step": 277 }, { "epoch": 0.04468555354631304, "grad_norm": 0.5982893705368042, "learning_rate": 0.00019991259339210982, "loss": 1.5343, "step": 278 }, { "epoch": 0.04484629294755877, "grad_norm": 0.6560002565383911, "learning_rate": 0.00019991188718270107, "loss": 1.6739, "step": 279 }, { "epoch": 0.0450070323488045, "grad_norm": 0.6477662324905396, "learning_rate": 0.00019991117813307656, "loss": 1.6007, "step": 280 }, { "epoch": 0.04516777175005023, "grad_norm": 0.6429880261421204, "learning_rate": 0.0001999104662432564, "loss": 1.6164, "step": 281 }, { "epoch": 0.045328511151295964, "grad_norm": 0.6076593399047852, "learning_rate": 0.00019990975151326084, "loss": 1.5689, "step": 282 }, { "epoch": 0.045489250552541695, "grad_norm": 0.6441612839698792, "learning_rate": 0.0001999090339431102, "loss": 1.6559, "step": 283 }, { "epoch": 0.04564998995378742, "grad_norm": 0.6516476273536682, "learning_rate": 0.00019990831353282484, "loss": 1.7818, "step": 284 }, { "epoch": 0.04581072935503315, "grad_norm": 0.6648668646812439, "learning_rate": 0.00019990759028242527, "loss": 1.5688, "step": 285 }, { "epoch": 0.04597146875627888, "grad_norm": 0.6995611190795898, "learning_rate": 0.00019990686419193207, "loss": 1.7994, "step": 286 }, { "epoch": 0.04613220815752461, "grad_norm": 0.5029324889183044, "learning_rate": 0.00019990613526136588, "loss": 1.4403, "step": 287 }, { "epoch": 0.04629294755877034, "grad_norm": 0.6697545647621155, "learning_rate": 0.0001999054034907474, "loss": 1.9321, "step": 288 }, { "epoch": 0.046453686960016075, "grad_norm": 0.5747056603431702, "learning_rate": 0.0001999046688800974, "loss": 1.6158, "step": 289 }, { "epoch": 0.046614426361261806, "grad_norm": 0.6005735993385315, "learning_rate": 0.00019990393142943683, "loss": 1.6345, "step": 290 }, { "epoch": 0.04677516576250754, "grad_norm": 0.5443317890167236, "learning_rate": 0.00019990319113878663, "loss": 1.4447, "step": 291 }, { "epoch": 0.04693590516375327, "grad_norm": 0.5290477871894836, "learning_rate": 0.00019990244800816782, "loss": 1.4554, "step": 292 }, { "epoch": 0.04709664456499899, "grad_norm": 0.5552376508712769, "learning_rate": 0.00019990170203760155, "loss": 1.5626, "step": 293 }, { "epoch": 0.04725738396624472, "grad_norm": 0.6352964043617249, "learning_rate": 0.000199900953227109, "loss": 1.722, "step": 294 }, { "epoch": 0.047418123367490454, "grad_norm": 0.547515869140625, "learning_rate": 0.0001999002015767115, "loss": 1.4364, "step": 295 }, { "epoch": 0.047578862768736185, "grad_norm": 0.594281792640686, "learning_rate": 0.00019989944708643037, "loss": 1.6468, "step": 296 }, { "epoch": 0.047739602169981916, "grad_norm": 0.5578073859214783, "learning_rate": 0.00019989868975628708, "loss": 1.491, "step": 297 }, { "epoch": 0.04790034157122765, "grad_norm": 0.586048424243927, "learning_rate": 0.00019989792958630318, "loss": 1.6453, "step": 298 }, { "epoch": 0.04806108097247338, "grad_norm": 0.5096251368522644, "learning_rate": 0.00019989716657650025, "loss": 1.3068, "step": 299 }, { "epoch": 0.04822182037371911, "grad_norm": 0.6047227382659912, "learning_rate": 0.00019989640072689996, "loss": 1.4892, "step": 300 }, { "epoch": 0.04838255977496484, "grad_norm": 0.6483644843101501, "learning_rate": 0.00019989563203752416, "loss": 1.541, "step": 301 }, { "epoch": 0.04854329917621057, "grad_norm": 0.5434318780899048, "learning_rate": 0.00019989486050839462, "loss": 1.3414, "step": 302 }, { "epoch": 0.048704038577456296, "grad_norm": 0.6717758178710938, "learning_rate": 0.0001998940861395333, "loss": 1.8509, "step": 303 }, { "epoch": 0.04886477797870203, "grad_norm": 0.5458473563194275, "learning_rate": 0.00019989330893096222, "loss": 1.2812, "step": 304 }, { "epoch": 0.04902551737994776, "grad_norm": 0.6006124019622803, "learning_rate": 0.0001998925288827035, "loss": 1.4206, "step": 305 }, { "epoch": 0.04918625678119349, "grad_norm": 0.6227914690971375, "learning_rate": 0.00019989174599477924, "loss": 1.5814, "step": 306 }, { "epoch": 0.04934699618243922, "grad_norm": 0.6855230331420898, "learning_rate": 0.00019989096026721176, "loss": 1.6188, "step": 307 }, { "epoch": 0.04950773558368495, "grad_norm": 0.5799712538719177, "learning_rate": 0.0001998901717000234, "loss": 1.4838, "step": 308 }, { "epoch": 0.04966847498493068, "grad_norm": 0.5779968500137329, "learning_rate": 0.00019988938029323651, "loss": 1.6156, "step": 309 }, { "epoch": 0.049829214386176414, "grad_norm": 0.6195431351661682, "learning_rate": 0.00019988858604687366, "loss": 1.4551, "step": 310 }, { "epoch": 0.049989953787422145, "grad_norm": 0.6116679906845093, "learning_rate": 0.0001998877889609574, "loss": 1.5327, "step": 311 }, { "epoch": 0.05015069318866787, "grad_norm": 3.5499792098999023, "learning_rate": 0.00019988698903551037, "loss": 1.5691, "step": 312 }, { "epoch": 0.0503114325899136, "grad_norm": 0.6839916706085205, "learning_rate": 0.00019988618627055535, "loss": 1.4663, "step": 313 }, { "epoch": 0.05047217199115933, "grad_norm": 0.5719752311706543, "learning_rate": 0.00019988538066611513, "loss": 1.6389, "step": 314 }, { "epoch": 0.05063291139240506, "grad_norm": 0.691422700881958, "learning_rate": 0.0001998845722222126, "loss": 1.4551, "step": 315 }, { "epoch": 0.050793650793650794, "grad_norm": 0.7763272523880005, "learning_rate": 0.00019988376093887076, "loss": 1.5663, "step": 316 }, { "epoch": 0.050954390194896525, "grad_norm": 0.6143157482147217, "learning_rate": 0.0001998829468161127, "loss": 1.7976, "step": 317 }, { "epoch": 0.051115129596142256, "grad_norm": 0.6413480639457703, "learning_rate": 0.00019988212985396154, "loss": 1.5625, "step": 318 }, { "epoch": 0.05127586899738799, "grad_norm": 0.6182782649993896, "learning_rate": 0.00019988131005244046, "loss": 1.6025, "step": 319 }, { "epoch": 0.05143660839863372, "grad_norm": 0.5921430587768555, "learning_rate": 0.00019988048741157284, "loss": 1.5781, "step": 320 }, { "epoch": 0.05159734779987944, "grad_norm": 0.7113355398178101, "learning_rate": 0.00019987966193138202, "loss": 1.771, "step": 321 }, { "epoch": 0.05175808720112517, "grad_norm": 0.5943952798843384, "learning_rate": 0.00019987883361189147, "loss": 1.5874, "step": 322 }, { "epoch": 0.051918826602370904, "grad_norm": 0.57746422290802, "learning_rate": 0.00019987800245312474, "loss": 1.4783, "step": 323 }, { "epoch": 0.052079566003616636, "grad_norm": 0.7863821983337402, "learning_rate": 0.00019987716845510547, "loss": 1.5036, "step": 324 }, { "epoch": 0.05224030540486237, "grad_norm": 0.6647295951843262, "learning_rate": 0.00019987633161785737, "loss": 1.8746, "step": 325 }, { "epoch": 0.0524010448061081, "grad_norm": 0.716094970703125, "learning_rate": 0.00019987549194140422, "loss": 1.7538, "step": 326 }, { "epoch": 0.05256178420735383, "grad_norm": 0.692552924156189, "learning_rate": 0.00019987464942576987, "loss": 1.7466, "step": 327 }, { "epoch": 0.05272252360859956, "grad_norm": 0.6179649233818054, "learning_rate": 0.0001998738040709783, "loss": 1.5899, "step": 328 }, { "epoch": 0.05288326300984529, "grad_norm": 0.626240611076355, "learning_rate": 0.00019987295587705354, "loss": 1.5672, "step": 329 }, { "epoch": 0.05304400241109102, "grad_norm": 0.6765881180763245, "learning_rate": 0.00019987210484401967, "loss": 1.7949, "step": 330 }, { "epoch": 0.053204741812336746, "grad_norm": 0.6223539113998413, "learning_rate": 0.00019987125097190088, "loss": 1.4808, "step": 331 }, { "epoch": 0.05336548121358248, "grad_norm": 0.6364530920982361, "learning_rate": 0.0001998703942607215, "loss": 1.6427, "step": 332 }, { "epoch": 0.05352622061482821, "grad_norm": 0.6387436389923096, "learning_rate": 0.00019986953471050583, "loss": 1.6342, "step": 333 }, { "epoch": 0.05368696001607394, "grad_norm": 0.7294160723686218, "learning_rate": 0.00019986867232127834, "loss": 1.8639, "step": 334 }, { "epoch": 0.05384769941731967, "grad_norm": 0.6104435920715332, "learning_rate": 0.00019986780709306355, "loss": 1.6161, "step": 335 }, { "epoch": 0.0540084388185654, "grad_norm": 0.7539715766906738, "learning_rate": 0.000199866939025886, "loss": 1.8329, "step": 336 }, { "epoch": 0.05416917821981113, "grad_norm": 0.5917460322380066, "learning_rate": 0.00019986606811977042, "loss": 1.596, "step": 337 }, { "epoch": 0.054329917621056864, "grad_norm": 0.6303766965866089, "learning_rate": 0.00019986519437474156, "loss": 1.7071, "step": 338 }, { "epoch": 0.054490657022302595, "grad_norm": 0.6537228226661682, "learning_rate": 0.00019986431779082424, "loss": 1.5729, "step": 339 }, { "epoch": 0.05465139642354832, "grad_norm": 0.6939976215362549, "learning_rate": 0.00019986343836804338, "loss": 1.7603, "step": 340 }, { "epoch": 0.05481213582479405, "grad_norm": 0.6060411334037781, "learning_rate": 0.00019986255610642402, "loss": 1.7272, "step": 341 }, { "epoch": 0.05497287522603978, "grad_norm": 0.5748594403266907, "learning_rate": 0.00019986167100599116, "loss": 1.5214, "step": 342 }, { "epoch": 0.05513361462728551, "grad_norm": 0.6385700106620789, "learning_rate": 0.00019986078306677004, "loss": 1.5505, "step": 343 }, { "epoch": 0.055294354028531244, "grad_norm": 0.5272520184516907, "learning_rate": 0.00019985989228878587, "loss": 1.4574, "step": 344 }, { "epoch": 0.055455093429776975, "grad_norm": 0.5139601230621338, "learning_rate": 0.00019985899867206393, "loss": 1.4295, "step": 345 }, { "epoch": 0.055615832831022706, "grad_norm": 0.7300357222557068, "learning_rate": 0.00019985810221662972, "loss": 1.6096, "step": 346 }, { "epoch": 0.05577657223226844, "grad_norm": 0.5310646295547485, "learning_rate": 0.00019985720292250864, "loss": 1.2839, "step": 347 }, { "epoch": 0.05593731163351417, "grad_norm": 0.6625987887382507, "learning_rate": 0.00019985630078972632, "loss": 1.4928, "step": 348 }, { "epoch": 0.05609805103475989, "grad_norm": 0.6264753341674805, "learning_rate": 0.0001998553958183083, "loss": 1.6288, "step": 349 }, { "epoch": 0.05625879043600562, "grad_norm": 0.5819182395935059, "learning_rate": 0.00019985448800828045, "loss": 1.5008, "step": 350 }, { "epoch": 0.056419529837251355, "grad_norm": 0.577702522277832, "learning_rate": 0.00019985357735966846, "loss": 1.3536, "step": 351 }, { "epoch": 0.056580269238497086, "grad_norm": 0.5564020276069641, "learning_rate": 0.00019985266387249824, "loss": 1.3574, "step": 352 }, { "epoch": 0.05674100863974282, "grad_norm": 0.561368465423584, "learning_rate": 0.0001998517475467958, "loss": 1.4779, "step": 353 }, { "epoch": 0.05690174804098855, "grad_norm": 0.5645065903663635, "learning_rate": 0.00019985082838258717, "loss": 1.2618, "step": 354 }, { "epoch": 0.05706248744223428, "grad_norm": 0.6145490407943726, "learning_rate": 0.00019984990637989845, "loss": 1.5856, "step": 355 }, { "epoch": 0.05722322684348001, "grad_norm": 0.6135921478271484, "learning_rate": 0.0001998489815387559, "loss": 1.4448, "step": 356 }, { "epoch": 0.05738396624472574, "grad_norm": 0.6801473498344421, "learning_rate": 0.0001998480538591858, "loss": 1.6004, "step": 357 }, { "epoch": 0.057544705645971465, "grad_norm": 0.5556286573410034, "learning_rate": 0.00019984712334121448, "loss": 1.5283, "step": 358 }, { "epoch": 0.057705445047217196, "grad_norm": 0.6407539248466492, "learning_rate": 0.00019984618998486838, "loss": 1.6432, "step": 359 }, { "epoch": 0.05786618444846293, "grad_norm": 0.606516420841217, "learning_rate": 0.00019984525379017413, "loss": 1.5866, "step": 360 }, { "epoch": 0.05802692384970866, "grad_norm": 0.6309589743614197, "learning_rate": 0.00019984431475715826, "loss": 1.5989, "step": 361 }, { "epoch": 0.05818766325095439, "grad_norm": 0.5992937684059143, "learning_rate": 0.0001998433728858475, "loss": 1.4856, "step": 362 }, { "epoch": 0.05834840265220012, "grad_norm": 0.7079967856407166, "learning_rate": 0.00019984242817626863, "loss": 1.2697, "step": 363 }, { "epoch": 0.05850914205344585, "grad_norm": 0.6007310152053833, "learning_rate": 0.00019984148062844844, "loss": 1.3473, "step": 364 }, { "epoch": 0.05866988145469158, "grad_norm": 0.6346818208694458, "learning_rate": 0.00019984053024241393, "loss": 1.531, "step": 365 }, { "epoch": 0.058830620855937314, "grad_norm": 0.5969633460044861, "learning_rate": 0.00019983957701819212, "loss": 1.3058, "step": 366 }, { "epoch": 0.058991360257183045, "grad_norm": 0.8155881762504578, "learning_rate": 0.00019983862095581008, "loss": 1.8501, "step": 367 }, { "epoch": 0.05915209965842877, "grad_norm": 0.6024547815322876, "learning_rate": 0.000199837662055295, "loss": 1.4207, "step": 368 }, { "epoch": 0.0593128390596745, "grad_norm": 0.6766681671142578, "learning_rate": 0.00019983670031667412, "loss": 1.7069, "step": 369 }, { "epoch": 0.05947357846092023, "grad_norm": 0.5584944486618042, "learning_rate": 0.00019983573573997482, "loss": 1.3881, "step": 370 }, { "epoch": 0.05963431786216596, "grad_norm": 0.5835119485855103, "learning_rate": 0.00019983476832522446, "loss": 1.3361, "step": 371 }, { "epoch": 0.059795057263411694, "grad_norm": 0.6722255945205688, "learning_rate": 0.00019983379807245058, "loss": 1.5121, "step": 372 }, { "epoch": 0.059955796664657425, "grad_norm": 0.667070746421814, "learning_rate": 0.00019983282498168078, "loss": 1.6108, "step": 373 }, { "epoch": 0.060116536065903156, "grad_norm": 0.5611769556999207, "learning_rate": 0.0001998318490529427, "loss": 1.7019, "step": 374 }, { "epoch": 0.06027727546714889, "grad_norm": 0.5403540134429932, "learning_rate": 0.00019983087028626408, "loss": 1.4892, "step": 375 }, { "epoch": 0.06043801486839462, "grad_norm": 0.6327553391456604, "learning_rate": 0.00019982988868167274, "loss": 1.5433, "step": 376 }, { "epoch": 0.06059875426964034, "grad_norm": 0.6810361742973328, "learning_rate": 0.0001998289042391966, "loss": 1.7237, "step": 377 }, { "epoch": 0.060759493670886074, "grad_norm": 0.606086790561676, "learning_rate": 0.00019982791695886363, "loss": 1.4027, "step": 378 }, { "epoch": 0.060920233072131805, "grad_norm": 0.6024448275566101, "learning_rate": 0.0001998269268407019, "loss": 1.3602, "step": 379 }, { "epoch": 0.061080972473377536, "grad_norm": 0.587409496307373, "learning_rate": 0.00019982593388473955, "loss": 1.6551, "step": 380 }, { "epoch": 0.06124171187462327, "grad_norm": 0.6076338291168213, "learning_rate": 0.00019982493809100484, "loss": 1.5792, "step": 381 }, { "epoch": 0.061402451275869, "grad_norm": 0.6409688591957092, "learning_rate": 0.00019982393945952606, "loss": 1.4956, "step": 382 }, { "epoch": 0.06156319067711473, "grad_norm": 0.6293362379074097, "learning_rate": 0.00019982293799033154, "loss": 1.4316, "step": 383 }, { "epoch": 0.06172393007836046, "grad_norm": 0.6377556324005127, "learning_rate": 0.00019982193368344985, "loss": 1.7162, "step": 384 }, { "epoch": 0.06188466947960619, "grad_norm": 0.6847701668739319, "learning_rate": 0.00019982092653890945, "loss": 1.8226, "step": 385 }, { "epoch": 0.062045408880851916, "grad_norm": 0.5529607534408569, "learning_rate": 0.00019981991655673904, "loss": 1.4147, "step": 386 }, { "epoch": 0.06220614828209765, "grad_norm": 0.5428644418716431, "learning_rate": 0.00019981890373696728, "loss": 1.4556, "step": 387 }, { "epoch": 0.06236688768334338, "grad_norm": 0.6835181713104248, "learning_rate": 0.000199817888079623, "loss": 1.6396, "step": 388 }, { "epoch": 0.06252762708458912, "grad_norm": 0.5669711232185364, "learning_rate": 0.00019981686958473504, "loss": 1.4008, "step": 389 }, { "epoch": 0.06268836648583484, "grad_norm": 0.5712982416152954, "learning_rate": 0.00019981584825233236, "loss": 1.3887, "step": 390 }, { "epoch": 0.06284910588708056, "grad_norm": 5.4191107749938965, "learning_rate": 0.00019981482408244403, "loss": 1.5087, "step": 391 }, { "epoch": 0.0630098452883263, "grad_norm": 0.5237439274787903, "learning_rate": 0.0001998137970750991, "loss": 1.3722, "step": 392 }, { "epoch": 0.06317058468957203, "grad_norm": 0.556926429271698, "learning_rate": 0.00019981276723032685, "loss": 1.3679, "step": 393 }, { "epoch": 0.06333132409081776, "grad_norm": 0.6172389984130859, "learning_rate": 0.00019981173454815647, "loss": 1.613, "step": 394 }, { "epoch": 0.06349206349206349, "grad_norm": 0.5963334441184998, "learning_rate": 0.00019981069902861735, "loss": 1.4822, "step": 395 }, { "epoch": 0.06365280289330923, "grad_norm": 0.6886758208274841, "learning_rate": 0.00019980966067173895, "loss": 1.64, "step": 396 }, { "epoch": 0.06381354229455495, "grad_norm": 0.6077172160148621, "learning_rate": 0.00019980861947755077, "loss": 1.5326, "step": 397 }, { "epoch": 0.06397428169580069, "grad_norm": 0.6141343712806702, "learning_rate": 0.00019980757544608237, "loss": 1.5118, "step": 398 }, { "epoch": 0.06413502109704641, "grad_norm": 0.6389530301094055, "learning_rate": 0.00019980652857736348, "loss": 1.5656, "step": 399 }, { "epoch": 0.06429576049829214, "grad_norm": 0.5553394556045532, "learning_rate": 0.00019980547887142388, "loss": 1.355, "step": 400 }, { "epoch": 0.06429576049829214, "eval_loss": 1.587803602218628, "eval_runtime": 46.2276, "eval_samples_per_second": 5.43, "eval_steps_per_second": 2.726, "step": 400 }, { "epoch": 0.06445649989953788, "grad_norm": 0.7059817314147949, "learning_rate": 0.0001998044263282933, "loss": 1.5423, "step": 401 }, { "epoch": 0.0646172393007836, "grad_norm": 0.7183067202568054, "learning_rate": 0.00019980337094800178, "loss": 1.5851, "step": 402 }, { "epoch": 0.06477797870202934, "grad_norm": 0.6690342426300049, "learning_rate": 0.0001998023127305793, "loss": 1.8248, "step": 403 }, { "epoch": 0.06493871810327506, "grad_norm": 0.779579222202301, "learning_rate": 0.00019980125167605588, "loss": 1.6004, "step": 404 }, { "epoch": 0.0650994575045208, "grad_norm": 0.7198390364646912, "learning_rate": 0.00019980018778446173, "loss": 1.5453, "step": 405 }, { "epoch": 0.06526019690576652, "grad_norm": 0.5720181465148926, "learning_rate": 0.00019979912105582708, "loss": 1.4679, "step": 406 }, { "epoch": 0.06542093630701226, "grad_norm": 0.575020968914032, "learning_rate": 0.00019979805149018226, "loss": 1.5673, "step": 407 }, { "epoch": 0.06558167570825799, "grad_norm": 0.6905900239944458, "learning_rate": 0.0001997969790875577, "loss": 1.6503, "step": 408 }, { "epoch": 0.06574241510950372, "grad_norm": 0.741786539554596, "learning_rate": 0.00019979590384798382, "loss": 1.4486, "step": 409 }, { "epoch": 0.06590315451074945, "grad_norm": 0.6186914443969727, "learning_rate": 0.00019979482577149124, "loss": 1.6426, "step": 410 }, { "epoch": 0.06606389391199517, "grad_norm": 0.6028152108192444, "learning_rate": 0.0001997937448581106, "loss": 1.6345, "step": 411 }, { "epoch": 0.06622463331324091, "grad_norm": 0.6358482837677002, "learning_rate": 0.00019979266110787262, "loss": 1.2546, "step": 412 }, { "epoch": 0.06638537271448663, "grad_norm": 0.7118525505065918, "learning_rate": 0.0001997915745208081, "loss": 1.7311, "step": 413 }, { "epoch": 0.06654611211573237, "grad_norm": 0.6252378225326538, "learning_rate": 0.00019979048509694792, "loss": 1.8325, "step": 414 }, { "epoch": 0.0667068515169781, "grad_norm": 0.7109782695770264, "learning_rate": 0.00019978939283632306, "loss": 1.5203, "step": 415 }, { "epoch": 0.06686759091822383, "grad_norm": 0.569715678691864, "learning_rate": 0.00019978829773896462, "loss": 1.3094, "step": 416 }, { "epoch": 0.06702833031946956, "grad_norm": 0.8700321316719055, "learning_rate": 0.00019978719980490365, "loss": 1.631, "step": 417 }, { "epoch": 0.0671890697207153, "grad_norm": 0.6423460245132446, "learning_rate": 0.00019978609903417144, "loss": 1.6771, "step": 418 }, { "epoch": 0.06734980912196102, "grad_norm": 0.6819444894790649, "learning_rate": 0.00019978499542679917, "loss": 1.556, "step": 419 }, { "epoch": 0.06751054852320675, "grad_norm": 0.6211190223693848, "learning_rate": 0.00019978388898281833, "loss": 1.5882, "step": 420 }, { "epoch": 0.06767128792445248, "grad_norm": 0.623430609703064, "learning_rate": 0.00019978277970226032, "loss": 1.4186, "step": 421 }, { "epoch": 0.06783202732569821, "grad_norm": 0.7197887301445007, "learning_rate": 0.00019978166758515667, "loss": 1.7932, "step": 422 }, { "epoch": 0.06799276672694395, "grad_norm": 0.7138811945915222, "learning_rate": 0.00019978055263153896, "loss": 1.5363, "step": 423 }, { "epoch": 0.06815350612818967, "grad_norm": 0.6475142240524292, "learning_rate": 0.00019977943484143898, "loss": 1.3738, "step": 424 }, { "epoch": 0.06831424552943541, "grad_norm": 0.6482046842575073, "learning_rate": 0.00019977831421488844, "loss": 1.4641, "step": 425 }, { "epoch": 0.06847498493068113, "grad_norm": 0.5909495949745178, "learning_rate": 0.00019977719075191922, "loss": 1.3673, "step": 426 }, { "epoch": 0.06863572433192687, "grad_norm": 0.6355801820755005, "learning_rate": 0.00019977606445256322, "loss": 1.5666, "step": 427 }, { "epoch": 0.0687964637331726, "grad_norm": 0.6076622605323792, "learning_rate": 0.0001997749353168525, "loss": 1.4744, "step": 428 }, { "epoch": 0.06895720313441832, "grad_norm": 0.5940968990325928, "learning_rate": 0.00019977380334481915, "loss": 1.5617, "step": 429 }, { "epoch": 0.06911794253566406, "grad_norm": 0.6179224252700806, "learning_rate": 0.0001997726685364953, "loss": 1.3521, "step": 430 }, { "epoch": 0.06927868193690978, "grad_norm": 0.6652457118034363, "learning_rate": 0.00019977153089191328, "loss": 1.5025, "step": 431 }, { "epoch": 0.06943942133815552, "grad_norm": 0.7741058468818665, "learning_rate": 0.0001997703904111054, "loss": 1.906, "step": 432 }, { "epoch": 0.06960016073940124, "grad_norm": 0.5475508570671082, "learning_rate": 0.0001997692470941041, "loss": 1.3717, "step": 433 }, { "epoch": 0.06976090014064698, "grad_norm": 0.6023842692375183, "learning_rate": 0.00019976810094094185, "loss": 1.4897, "step": 434 }, { "epoch": 0.0699216395418927, "grad_norm": 0.6700923442840576, "learning_rate": 0.00019976695195165123, "loss": 1.7758, "step": 435 }, { "epoch": 0.07008237894313844, "grad_norm": 0.5874378085136414, "learning_rate": 0.0001997658001262649, "loss": 1.4047, "step": 436 }, { "epoch": 0.07024311834438417, "grad_norm": 0.580768883228302, "learning_rate": 0.00019976464546481566, "loss": 1.5148, "step": 437 }, { "epoch": 0.07040385774562989, "grad_norm": 0.6560660004615784, "learning_rate": 0.0001997634879673363, "loss": 1.7059, "step": 438 }, { "epoch": 0.07056459714687563, "grad_norm": 0.6333310008049011, "learning_rate": 0.00019976232763385966, "loss": 1.5655, "step": 439 }, { "epoch": 0.07072533654812135, "grad_norm": 0.6392038464546204, "learning_rate": 0.00019976116446441885, "loss": 1.4994, "step": 440 }, { "epoch": 0.07088607594936709, "grad_norm": 0.5628023743629456, "learning_rate": 0.00019975999845904682, "loss": 1.5268, "step": 441 }, { "epoch": 0.07104681535061282, "grad_norm": 0.5980152487754822, "learning_rate": 0.00019975882961777676, "loss": 1.3754, "step": 442 }, { "epoch": 0.07120755475185855, "grad_norm": 0.6738753914833069, "learning_rate": 0.00019975765794064194, "loss": 1.6101, "step": 443 }, { "epoch": 0.07136829415310428, "grad_norm": 0.7291564345359802, "learning_rate": 0.0001997564834276756, "loss": 1.7236, "step": 444 }, { "epoch": 0.07152903355435002, "grad_norm": 0.6501093506813049, "learning_rate": 0.0001997553060789112, "loss": 1.4152, "step": 445 }, { "epoch": 0.07168977295559574, "grad_norm": 0.6595898270606995, "learning_rate": 0.00019975412589438212, "loss": 1.875, "step": 446 }, { "epoch": 0.07185051235684146, "grad_norm": 0.6450464725494385, "learning_rate": 0.00019975294287412197, "loss": 1.6641, "step": 447 }, { "epoch": 0.0720112517580872, "grad_norm": 0.6315407156944275, "learning_rate": 0.00019975175701816435, "loss": 1.3332, "step": 448 }, { "epoch": 0.07217199115933293, "grad_norm": 0.5908608436584473, "learning_rate": 0.000199750568326543, "loss": 1.3999, "step": 449 }, { "epoch": 0.07233273056057866, "grad_norm": 0.5615253448486328, "learning_rate": 0.00019974937679929168, "loss": 1.4268, "step": 450 }, { "epoch": 0.07249346996182439, "grad_norm": 0.651449978351593, "learning_rate": 0.00019974818243644431, "loss": 1.5633, "step": 451 }, { "epoch": 0.07265420936307013, "grad_norm": 0.6930869817733765, "learning_rate": 0.0001997469852380348, "loss": 1.7563, "step": 452 }, { "epoch": 0.07281494876431585, "grad_norm": 0.671590268611908, "learning_rate": 0.0001997457852040972, "loss": 1.8498, "step": 453 }, { "epoch": 0.07297568816556159, "grad_norm": 0.6522193551063538, "learning_rate": 0.00019974458233466559, "loss": 1.5533, "step": 454 }, { "epoch": 0.07313642756680731, "grad_norm": 0.6381151080131531, "learning_rate": 0.00019974337662977422, "loss": 1.4845, "step": 455 }, { "epoch": 0.07329716696805304, "grad_norm": 0.7573384046554565, "learning_rate": 0.00019974216808945735, "loss": 2.0107, "step": 456 }, { "epoch": 0.07345790636929878, "grad_norm": 0.6075372695922852, "learning_rate": 0.00019974095671374927, "loss": 1.4005, "step": 457 }, { "epoch": 0.0736186457705445, "grad_norm": 0.6831279993057251, "learning_rate": 0.00019973974250268452, "loss": 1.6679, "step": 458 }, { "epoch": 0.07377938517179024, "grad_norm": 0.6422876715660095, "learning_rate": 0.0001997385254562975, "loss": 1.6774, "step": 459 }, { "epoch": 0.07394012457303596, "grad_norm": 0.5633629560470581, "learning_rate": 0.00019973730557462297, "loss": 1.2682, "step": 460 }, { "epoch": 0.0741008639742817, "grad_norm": 0.614403486251831, "learning_rate": 0.00019973608285769542, "loss": 1.4848, "step": 461 }, { "epoch": 0.07426160337552742, "grad_norm": 0.6385796070098877, "learning_rate": 0.00019973485730554976, "loss": 1.5976, "step": 462 }, { "epoch": 0.07442234277677316, "grad_norm": 0.6652841567993164, "learning_rate": 0.00019973362891822073, "loss": 1.6646, "step": 463 }, { "epoch": 0.07458308217801889, "grad_norm": 0.6633283495903015, "learning_rate": 0.0001997323976957433, "loss": 1.5313, "step": 464 }, { "epoch": 0.07474382157926461, "grad_norm": 0.6783115863800049, "learning_rate": 0.00019973116363815248, "loss": 1.8874, "step": 465 }, { "epoch": 0.07490456098051035, "grad_norm": 0.5849350690841675, "learning_rate": 0.0001997299267454833, "loss": 1.4148, "step": 466 }, { "epoch": 0.07506530038175607, "grad_norm": 0.5903291702270508, "learning_rate": 0.00019972868701777094, "loss": 1.429, "step": 467 }, { "epoch": 0.07522603978300181, "grad_norm": 0.5895156860351562, "learning_rate": 0.00019972744445505066, "loss": 1.4765, "step": 468 }, { "epoch": 0.07538677918424753, "grad_norm": 0.8161036968231201, "learning_rate": 0.0001997261990573578, "loss": 1.841, "step": 469 }, { "epoch": 0.07554751858549327, "grad_norm": 0.637617826461792, "learning_rate": 0.0001997249508247277, "loss": 1.3648, "step": 470 }, { "epoch": 0.075708257986739, "grad_norm": 0.6305023431777954, "learning_rate": 0.0001997236997571959, "loss": 1.5196, "step": 471 }, { "epoch": 0.07586899738798474, "grad_norm": 0.6307979226112366, "learning_rate": 0.00019972244585479796, "loss": 1.3995, "step": 472 }, { "epoch": 0.07602973678923046, "grad_norm": 0.5925201177597046, "learning_rate": 0.00019972118911756949, "loss": 1.4853, "step": 473 }, { "epoch": 0.0761904761904762, "grad_norm": 0.6347175240516663, "learning_rate": 0.00019971992954554625, "loss": 1.6311, "step": 474 }, { "epoch": 0.07635121559172192, "grad_norm": 0.7065414786338806, "learning_rate": 0.000199718667138764, "loss": 1.7222, "step": 475 }, { "epoch": 0.07651195499296765, "grad_norm": 0.5543337464332581, "learning_rate": 0.0001997174018972587, "loss": 1.1992, "step": 476 }, { "epoch": 0.07667269439421338, "grad_norm": 0.6446788311004639, "learning_rate": 0.00019971613382106624, "loss": 1.6094, "step": 477 }, { "epoch": 0.07683343379545911, "grad_norm": 0.6127315759658813, "learning_rate": 0.00019971486291022275, "loss": 1.5303, "step": 478 }, { "epoch": 0.07699417319670485, "grad_norm": 0.6123241782188416, "learning_rate": 0.00019971358916476425, "loss": 1.601, "step": 479 }, { "epoch": 0.07715491259795057, "grad_norm": 0.7107446193695068, "learning_rate": 0.00019971231258472707, "loss": 1.8143, "step": 480 }, { "epoch": 0.07731565199919631, "grad_norm": 0.5746397376060486, "learning_rate": 0.00019971103317014741, "loss": 1.2907, "step": 481 }, { "epoch": 0.07747639140044203, "grad_norm": 1.5981017351150513, "learning_rate": 0.00019970975092106167, "loss": 1.9001, "step": 482 }, { "epoch": 0.07763713080168777, "grad_norm": 0.617061197757721, "learning_rate": 0.0001997084658375063, "loss": 1.7105, "step": 483 }, { "epoch": 0.0777978702029335, "grad_norm": 0.6086183190345764, "learning_rate": 0.00019970717791951788, "loss": 1.5589, "step": 484 }, { "epoch": 0.07795860960417922, "grad_norm": 0.7108407616615295, "learning_rate": 0.00019970588716713292, "loss": 1.7767, "step": 485 }, { "epoch": 0.07811934900542496, "grad_norm": 0.7608966827392578, "learning_rate": 0.0001997045935803882, "loss": 2.0693, "step": 486 }, { "epoch": 0.07828008840667068, "grad_norm": 0.6452834010124207, "learning_rate": 0.00019970329715932045, "loss": 1.5442, "step": 487 }, { "epoch": 0.07844082780791642, "grad_norm": 0.6057897210121155, "learning_rate": 0.00019970199790396654, "loss": 1.6126, "step": 488 }, { "epoch": 0.07860156720916214, "grad_norm": 0.5783177018165588, "learning_rate": 0.00019970069581436338, "loss": 1.3875, "step": 489 }, { "epoch": 0.07876230661040788, "grad_norm": 0.6528737545013428, "learning_rate": 0.000199699390890548, "loss": 1.5366, "step": 490 }, { "epoch": 0.0789230460116536, "grad_norm": 0.6061338186264038, "learning_rate": 0.0001996980831325575, "loss": 1.5588, "step": 491 }, { "epoch": 0.07908378541289934, "grad_norm": 0.6319064497947693, "learning_rate": 0.00019969677254042906, "loss": 1.593, "step": 492 }, { "epoch": 0.07924452481414507, "grad_norm": 0.6446017026901245, "learning_rate": 0.00019969545911419995, "loss": 1.6094, "step": 493 }, { "epoch": 0.07940526421539079, "grad_norm": 0.662975549697876, "learning_rate": 0.00019969414285390746, "loss": 1.4766, "step": 494 }, { "epoch": 0.07956600361663653, "grad_norm": 0.757055401802063, "learning_rate": 0.00019969282375958906, "loss": 1.7831, "step": 495 }, { "epoch": 0.07972674301788225, "grad_norm": 0.6217721104621887, "learning_rate": 0.0001996915018312822, "loss": 1.5658, "step": 496 }, { "epoch": 0.07988748241912799, "grad_norm": 0.6112630367279053, "learning_rate": 0.00019969017706902454, "loss": 1.6268, "step": 497 }, { "epoch": 0.08004822182037372, "grad_norm": 0.5972138047218323, "learning_rate": 0.00019968884947285363, "loss": 1.4105, "step": 498 }, { "epoch": 0.08020896122161945, "grad_norm": 0.5799359083175659, "learning_rate": 0.00019968751904280726, "loss": 1.514, "step": 499 }, { "epoch": 0.08036970062286518, "grad_norm": 0.701223611831665, "learning_rate": 0.00019968618577892325, "loss": 1.82, "step": 500 }, { "epoch": 0.08053044002411092, "grad_norm": 0.5340350270271301, "learning_rate": 0.00019968484968123953, "loss": 1.3407, "step": 501 }, { "epoch": 0.08069117942535664, "grad_norm": 0.7034162878990173, "learning_rate": 0.00019968351074979403, "loss": 1.6271, "step": 502 }, { "epoch": 0.08085191882660236, "grad_norm": 0.6257761716842651, "learning_rate": 0.00019968216898462484, "loss": 1.4905, "step": 503 }, { "epoch": 0.0810126582278481, "grad_norm": 0.6463956832885742, "learning_rate": 0.0001996808243857701, "loss": 1.4385, "step": 504 }, { "epoch": 0.08117339762909383, "grad_norm": 0.6040690541267395, "learning_rate": 0.00019967947695326806, "loss": 1.5325, "step": 505 }, { "epoch": 0.08133413703033956, "grad_norm": 0.630862295627594, "learning_rate": 0.00019967812668715698, "loss": 1.5697, "step": 506 }, { "epoch": 0.08149487643158529, "grad_norm": 0.6305744647979736, "learning_rate": 0.00019967677358747525, "loss": 1.4222, "step": 507 }, { "epoch": 0.08165561583283103, "grad_norm": 0.7834799885749817, "learning_rate": 0.00019967541765426134, "loss": 1.6415, "step": 508 }, { "epoch": 0.08181635523407675, "grad_norm": 0.6755993962287903, "learning_rate": 0.00019967405888755382, "loss": 1.5845, "step": 509 }, { "epoch": 0.08197709463532249, "grad_norm": 0.7624217867851257, "learning_rate": 0.0001996726972873913, "loss": 1.5399, "step": 510 }, { "epoch": 0.08213783403656821, "grad_norm": 0.6393696069717407, "learning_rate": 0.00019967133285381244, "loss": 1.6329, "step": 511 }, { "epoch": 0.08229857343781394, "grad_norm": 0.5832926630973816, "learning_rate": 0.0001996699655868561, "loss": 1.3829, "step": 512 }, { "epoch": 0.08245931283905968, "grad_norm": 0.6505093574523926, "learning_rate": 0.00019966859548656114, "loss": 1.3813, "step": 513 }, { "epoch": 0.0826200522403054, "grad_norm": 0.6465361714363098, "learning_rate": 0.00019966722255296645, "loss": 1.8458, "step": 514 }, { "epoch": 0.08278079164155114, "grad_norm": 0.634793758392334, "learning_rate": 0.0001996658467861111, "loss": 1.6937, "step": 515 }, { "epoch": 0.08294153104279686, "grad_norm": 0.6424897313117981, "learning_rate": 0.00019966446818603422, "loss": 1.7386, "step": 516 }, { "epoch": 0.0831022704440426, "grad_norm": 0.7349180579185486, "learning_rate": 0.00019966308675277493, "loss": 1.6977, "step": 517 }, { "epoch": 0.08326300984528832, "grad_norm": 0.8031671643257141, "learning_rate": 0.0001996617024863726, "loss": 1.4675, "step": 518 }, { "epoch": 0.08342374924653406, "grad_norm": 0.5658145546913147, "learning_rate": 0.0001996603153868665, "loss": 1.3471, "step": 519 }, { "epoch": 0.08358448864777979, "grad_norm": 0.7613710761070251, "learning_rate": 0.0001996589254542961, "loss": 1.5777, "step": 520 }, { "epoch": 0.08374522804902551, "grad_norm": 0.6134573221206665, "learning_rate": 0.00019965753268870085, "loss": 1.181, "step": 521 }, { "epoch": 0.08390596745027125, "grad_norm": 0.6931266188621521, "learning_rate": 0.00019965613709012044, "loss": 1.7091, "step": 522 }, { "epoch": 0.08406670685151697, "grad_norm": 0.5839043855667114, "learning_rate": 0.00019965473865859445, "loss": 1.5548, "step": 523 }, { "epoch": 0.08422744625276271, "grad_norm": 0.6097489595413208, "learning_rate": 0.0001996533373941627, "loss": 1.3886, "step": 524 }, { "epoch": 0.08438818565400844, "grad_norm": 0.6436520218849182, "learning_rate": 0.000199651933296865, "loss": 1.5111, "step": 525 }, { "epoch": 0.08454892505525417, "grad_norm": 0.7383518218994141, "learning_rate": 0.00019965052636674126, "loss": 1.6701, "step": 526 }, { "epoch": 0.0847096644564999, "grad_norm": 0.6361198425292969, "learning_rate": 0.0001996491166038315, "loss": 1.5444, "step": 527 }, { "epoch": 0.08487040385774564, "grad_norm": 0.6361669898033142, "learning_rate": 0.00019964770400817577, "loss": 1.6683, "step": 528 }, { "epoch": 0.08503114325899136, "grad_norm": 0.6143077611923218, "learning_rate": 0.00019964628857981423, "loss": 1.341, "step": 529 }, { "epoch": 0.08519188266023708, "grad_norm": 0.7071183323860168, "learning_rate": 0.00019964487031878708, "loss": 1.776, "step": 530 }, { "epoch": 0.08535262206148282, "grad_norm": 0.760584831237793, "learning_rate": 0.0001996434492251347, "loss": 1.6837, "step": 531 }, { "epoch": 0.08551336146272855, "grad_norm": 0.7462098002433777, "learning_rate": 0.00019964202529889748, "loss": 1.8215, "step": 532 }, { "epoch": 0.08567410086397428, "grad_norm": 0.6214757561683655, "learning_rate": 0.0001996405985401159, "loss": 1.5357, "step": 533 }, { "epoch": 0.08583484026522001, "grad_norm": 0.6516343355178833, "learning_rate": 0.00019963916894883048, "loss": 1.6333, "step": 534 }, { "epoch": 0.08599557966646575, "grad_norm": 0.6190442442893982, "learning_rate": 0.00019963773652508188, "loss": 1.4878, "step": 535 }, { "epoch": 0.08615631906771147, "grad_norm": 0.6450837850570679, "learning_rate": 0.00019963630126891083, "loss": 1.459, "step": 536 }, { "epoch": 0.08631705846895721, "grad_norm": 0.5769215822219849, "learning_rate": 0.0001996348631803581, "loss": 1.4613, "step": 537 }, { "epoch": 0.08647779787020293, "grad_norm": 0.619052529335022, "learning_rate": 0.00019963342225946457, "loss": 1.3725, "step": 538 }, { "epoch": 0.08663853727144867, "grad_norm": 0.5731704831123352, "learning_rate": 0.00019963197850627124, "loss": 1.5319, "step": 539 }, { "epoch": 0.0867992766726944, "grad_norm": 0.6295083165168762, "learning_rate": 0.00019963053192081914, "loss": 1.5521, "step": 540 }, { "epoch": 0.08696001607394012, "grad_norm": 0.6366294622421265, "learning_rate": 0.0001996290825031494, "loss": 1.6139, "step": 541 }, { "epoch": 0.08712075547518586, "grad_norm": 0.8283463716506958, "learning_rate": 0.00019962763025330323, "loss": 1.6223, "step": 542 }, { "epoch": 0.08728149487643158, "grad_norm": 0.5769314765930176, "learning_rate": 0.00019962617517132184, "loss": 1.4057, "step": 543 }, { "epoch": 0.08744223427767732, "grad_norm": 0.6063826680183411, "learning_rate": 0.0001996247172572467, "loss": 1.5547, "step": 544 }, { "epoch": 0.08760297367892304, "grad_norm": 0.6279756426811218, "learning_rate": 0.00019962325651111919, "loss": 1.4748, "step": 545 }, { "epoch": 0.08776371308016878, "grad_norm": 0.6309914588928223, "learning_rate": 0.0001996217929329808, "loss": 1.4919, "step": 546 }, { "epoch": 0.0879244524814145, "grad_norm": 0.6625733375549316, "learning_rate": 0.00019962032652287318, "loss": 1.4905, "step": 547 }, { "epoch": 0.08808519188266024, "grad_norm": 0.6475465297698975, "learning_rate": 0.0001996188572808381, "loss": 1.5906, "step": 548 }, { "epoch": 0.08824593128390597, "grad_norm": 0.7302984595298767, "learning_rate": 0.00019961738520691714, "loss": 1.7626, "step": 549 }, { "epoch": 0.08840667068515169, "grad_norm": 0.575646162033081, "learning_rate": 0.00019961591030115235, "loss": 1.4858, "step": 550 }, { "epoch": 0.08856741008639743, "grad_norm": 0.6621109247207642, "learning_rate": 0.00019961443256358549, "loss": 1.5332, "step": 551 }, { "epoch": 0.08872814948764315, "grad_norm": 0.619243860244751, "learning_rate": 0.00019961295199425868, "loss": 1.6195, "step": 552 }, { "epoch": 0.08888888888888889, "grad_norm": 0.8868658542633057, "learning_rate": 0.0001996114685932139, "loss": 1.3383, "step": 553 }, { "epoch": 0.08904962829013462, "grad_norm": 0.6067079901695251, "learning_rate": 0.00019960998236049347, "loss": 1.4275, "step": 554 }, { "epoch": 0.08921036769138035, "grad_norm": 0.6056930422782898, "learning_rate": 0.00019960849329613952, "loss": 1.3311, "step": 555 }, { "epoch": 0.08937110709262608, "grad_norm": 0.6663597226142883, "learning_rate": 0.00019960700140019436, "loss": 1.5162, "step": 556 }, { "epoch": 0.08953184649387182, "grad_norm": 0.6127684116363525, "learning_rate": 0.0001996055066727005, "loss": 1.6339, "step": 557 }, { "epoch": 0.08969258589511754, "grad_norm": 0.6266404390335083, "learning_rate": 0.00019960400911370042, "loss": 1.7908, "step": 558 }, { "epoch": 0.08985332529636327, "grad_norm": 0.8539959788322449, "learning_rate": 0.00019960250872323658, "loss": 1.6815, "step": 559 }, { "epoch": 0.090014064697609, "grad_norm": 0.6749199032783508, "learning_rate": 0.00019960100550135178, "loss": 1.407, "step": 560 }, { "epoch": 0.09017480409885473, "grad_norm": 0.6766108274459839, "learning_rate": 0.0001995994994480886, "loss": 1.6166, "step": 561 }, { "epoch": 0.09033554350010047, "grad_norm": 0.6386959552764893, "learning_rate": 0.00019959799056349002, "loss": 1.5815, "step": 562 }, { "epoch": 0.09049628290134619, "grad_norm": 0.5865458846092224, "learning_rate": 0.0001995964788475988, "loss": 1.5828, "step": 563 }, { "epoch": 0.09065702230259193, "grad_norm": 0.5895900726318359, "learning_rate": 0.00019959496430045797, "loss": 1.5472, "step": 564 }, { "epoch": 0.09081776170383765, "grad_norm": 0.6310082077980042, "learning_rate": 0.00019959344692211056, "loss": 1.545, "step": 565 }, { "epoch": 0.09097850110508339, "grad_norm": 0.6117277145385742, "learning_rate": 0.00019959192671259975, "loss": 1.5038, "step": 566 }, { "epoch": 0.09113924050632911, "grad_norm": 0.7194087505340576, "learning_rate": 0.0001995904036719687, "loss": 1.427, "step": 567 }, { "epoch": 0.09129997990757484, "grad_norm": 0.6708573698997498, "learning_rate": 0.00019958887780026073, "loss": 1.552, "step": 568 }, { "epoch": 0.09146071930882058, "grad_norm": 0.5400412082672119, "learning_rate": 0.00019958734909751921, "loss": 1.3284, "step": 569 }, { "epoch": 0.0916214587100663, "grad_norm": 0.6532981395721436, "learning_rate": 0.00019958581756378765, "loss": 1.5654, "step": 570 }, { "epoch": 0.09178219811131204, "grad_norm": 0.6110222339630127, "learning_rate": 0.0001995842831991095, "loss": 1.4931, "step": 571 }, { "epoch": 0.09194293751255776, "grad_norm": 0.649152934551239, "learning_rate": 0.00019958274600352842, "loss": 1.3625, "step": 572 }, { "epoch": 0.0921036769138035, "grad_norm": 0.5515139102935791, "learning_rate": 0.00019958120597708811, "loss": 1.5149, "step": 573 }, { "epoch": 0.09226441631504922, "grad_norm": 0.7164463996887207, "learning_rate": 0.00019957966311983234, "loss": 1.659, "step": 574 }, { "epoch": 0.09242515571629496, "grad_norm": 0.632805347442627, "learning_rate": 0.00019957811743180498, "loss": 1.4248, "step": 575 }, { "epoch": 0.09258589511754069, "grad_norm": 0.6712649464607239, "learning_rate": 0.00019957656891305, "loss": 1.3908, "step": 576 }, { "epoch": 0.09274663451878641, "grad_norm": 0.6602980494499207, "learning_rate": 0.00019957501756361133, "loss": 1.491, "step": 577 }, { "epoch": 0.09290737392003215, "grad_norm": 0.6166489720344543, "learning_rate": 0.00019957346338353316, "loss": 1.3428, "step": 578 }, { "epoch": 0.09306811332127787, "grad_norm": 0.5745552778244019, "learning_rate": 0.0001995719063728596, "loss": 1.5109, "step": 579 }, { "epoch": 0.09322885272252361, "grad_norm": 0.6544909477233887, "learning_rate": 0.00019957034653163497, "loss": 1.4802, "step": 580 }, { "epoch": 0.09338959212376934, "grad_norm": 0.6837307214736938, "learning_rate": 0.00019956878385990362, "loss": 1.5512, "step": 581 }, { "epoch": 0.09355033152501507, "grad_norm": 0.6078664064407349, "learning_rate": 0.00019956721835770992, "loss": 1.514, "step": 582 }, { "epoch": 0.0937110709262608, "grad_norm": 0.5998193025588989, "learning_rate": 0.00019956565002509838, "loss": 1.657, "step": 583 }, { "epoch": 0.09387181032750654, "grad_norm": 0.6141971945762634, "learning_rate": 0.0001995640788621136, "loss": 1.5525, "step": 584 }, { "epoch": 0.09403254972875226, "grad_norm": 0.6732306480407715, "learning_rate": 0.00019956250486880025, "loss": 1.7539, "step": 585 }, { "epoch": 0.09419328912999798, "grad_norm": 0.6068583130836487, "learning_rate": 0.00019956092804520304, "loss": 1.3999, "step": 586 }, { "epoch": 0.09435402853124372, "grad_norm": 0.7253121733665466, "learning_rate": 0.00019955934839136684, "loss": 1.6497, "step": 587 }, { "epoch": 0.09451476793248945, "grad_norm": 0.5836654305458069, "learning_rate": 0.00019955776590733655, "loss": 1.3454, "step": 588 }, { "epoch": 0.09467550733373518, "grad_norm": 0.6415271162986755, "learning_rate": 0.00019955618059315714, "loss": 1.545, "step": 589 }, { "epoch": 0.09483624673498091, "grad_norm": 0.6690226197242737, "learning_rate": 0.00019955459244887366, "loss": 1.7013, "step": 590 }, { "epoch": 0.09499698613622665, "grad_norm": 0.7103183269500732, "learning_rate": 0.00019955300147453126, "loss": 1.4967, "step": 591 }, { "epoch": 0.09515772553747237, "grad_norm": 0.9099798798561096, "learning_rate": 0.00019955140767017522, "loss": 1.4393, "step": 592 }, { "epoch": 0.09531846493871811, "grad_norm": 0.5896414518356323, "learning_rate": 0.00019954981103585077, "loss": 1.294, "step": 593 }, { "epoch": 0.09547920433996383, "grad_norm": 0.6813402771949768, "learning_rate": 0.00019954821157160338, "loss": 1.5865, "step": 594 }, { "epoch": 0.09563994374120956, "grad_norm": 0.6607721447944641, "learning_rate": 0.00019954660927747845, "loss": 1.4448, "step": 595 }, { "epoch": 0.0958006831424553, "grad_norm": 0.6820605993270874, "learning_rate": 0.00019954500415352152, "loss": 1.3699, "step": 596 }, { "epoch": 0.09596142254370102, "grad_norm": 0.7431395649909973, "learning_rate": 0.0001995433961997783, "loss": 1.5895, "step": 597 }, { "epoch": 0.09612216194494676, "grad_norm": 0.7309276461601257, "learning_rate": 0.0001995417854162944, "loss": 1.6496, "step": 598 }, { "epoch": 0.09628290134619248, "grad_norm": 0.6555226445198059, "learning_rate": 0.00019954017180311574, "loss": 1.6926, "step": 599 }, { "epoch": 0.09644364074743822, "grad_norm": 0.763521134853363, "learning_rate": 0.00019953855536028808, "loss": 1.5393, "step": 600 }, { "epoch": 0.09644364074743822, "eval_loss": 1.5743680000305176, "eval_runtime": 46.1932, "eval_samples_per_second": 5.434, "eval_steps_per_second": 2.728, "step": 600 }, { "epoch": 0.09660438014868394, "grad_norm": 0.5218728184700012, "learning_rate": 0.00019953693608785737, "loss": 1.2607, "step": 601 }, { "epoch": 0.09676511954992968, "grad_norm": 0.6307021975517273, "learning_rate": 0.0001995353139858697, "loss": 1.5492, "step": 602 }, { "epoch": 0.0969258589511754, "grad_norm": 0.6398903727531433, "learning_rate": 0.00019953368905437115, "loss": 1.4776, "step": 603 }, { "epoch": 0.09708659835242114, "grad_norm": 0.6246693730354309, "learning_rate": 0.00019953206129340792, "loss": 1.396, "step": 604 }, { "epoch": 0.09724733775366687, "grad_norm": 0.6644021272659302, "learning_rate": 0.0001995304307030263, "loss": 1.4333, "step": 605 }, { "epoch": 0.09740807715491259, "grad_norm": 0.5822995901107788, "learning_rate": 0.0001995287972832726, "loss": 1.454, "step": 606 }, { "epoch": 0.09756881655615833, "grad_norm": 0.6700337529182434, "learning_rate": 0.0001995271610341933, "loss": 1.5318, "step": 607 }, { "epoch": 0.09772955595740405, "grad_norm": 0.5907222628593445, "learning_rate": 0.00019952552195583487, "loss": 1.291, "step": 608 }, { "epoch": 0.09789029535864979, "grad_norm": 0.6502540111541748, "learning_rate": 0.00019952388004824395, "loss": 1.649, "step": 609 }, { "epoch": 0.09805103475989552, "grad_norm": 0.6386947631835938, "learning_rate": 0.00019952223531146716, "loss": 1.3696, "step": 610 }, { "epoch": 0.09821177416114125, "grad_norm": 0.6419175267219543, "learning_rate": 0.00019952058774555133, "loss": 1.5512, "step": 611 }, { "epoch": 0.09837251356238698, "grad_norm": 0.6677020788192749, "learning_rate": 0.00019951893735054322, "loss": 1.6689, "step": 612 }, { "epoch": 0.09853325296363272, "grad_norm": 0.6155129671096802, "learning_rate": 0.00019951728412648977, "loss": 1.4741, "step": 613 }, { "epoch": 0.09869399236487844, "grad_norm": 0.7248343825340271, "learning_rate": 0.000199515628073438, "loss": 1.738, "step": 614 }, { "epoch": 0.09885473176612417, "grad_norm": 0.6911106705665588, "learning_rate": 0.000199513969191435, "loss": 1.458, "step": 615 }, { "epoch": 0.0990154711673699, "grad_norm": 0.7387794256210327, "learning_rate": 0.00019951230748052785, "loss": 1.6454, "step": 616 }, { "epoch": 0.09917621056861563, "grad_norm": 0.6735286116600037, "learning_rate": 0.00019951064294076388, "loss": 1.427, "step": 617 }, { "epoch": 0.09933694996986137, "grad_norm": 0.6684578061103821, "learning_rate": 0.00019950897557219032, "loss": 1.5539, "step": 618 }, { "epoch": 0.09949768937110709, "grad_norm": 0.629774808883667, "learning_rate": 0.00019950730537485463, "loss": 1.5368, "step": 619 }, { "epoch": 0.09965842877235283, "grad_norm": 0.6495603919029236, "learning_rate": 0.0001995056323488043, "loss": 1.4604, "step": 620 }, { "epoch": 0.09981916817359855, "grad_norm": 0.6542315483093262, "learning_rate": 0.00019950395649408683, "loss": 1.6887, "step": 621 }, { "epoch": 0.09997990757484429, "grad_norm": 0.647136390209198, "learning_rate": 0.0001995022778107499, "loss": 1.4804, "step": 622 }, { "epoch": 0.10014064697609001, "grad_norm": 0.6931506991386414, "learning_rate": 0.00019950059629884125, "loss": 1.6062, "step": 623 }, { "epoch": 0.10030138637733574, "grad_norm": 0.6098160147666931, "learning_rate": 0.00019949891195840863, "loss": 1.5434, "step": 624 }, { "epoch": 0.10046212577858148, "grad_norm": 0.5653876662254333, "learning_rate": 0.00019949722478949995, "loss": 1.2769, "step": 625 }, { "epoch": 0.1006228651798272, "grad_norm": 0.6484172344207764, "learning_rate": 0.00019949553479216316, "loss": 1.5125, "step": 626 }, { "epoch": 0.10078360458107294, "grad_norm": 0.6980167031288147, "learning_rate": 0.00019949384196644632, "loss": 1.4447, "step": 627 }, { "epoch": 0.10094434398231866, "grad_norm": 0.7955688834190369, "learning_rate": 0.00019949214631239753, "loss": 1.5048, "step": 628 }, { "epoch": 0.1011050833835644, "grad_norm": 0.7239707112312317, "learning_rate": 0.000199490447830065, "loss": 1.4537, "step": 629 }, { "epoch": 0.10126582278481013, "grad_norm": 0.7593023777008057, "learning_rate": 0.00019948874651949701, "loss": 1.3794, "step": 630 }, { "epoch": 0.10142656218605586, "grad_norm": 0.6131907105445862, "learning_rate": 0.00019948704238074196, "loss": 1.4368, "step": 631 }, { "epoch": 0.10158730158730159, "grad_norm": 0.7544803619384766, "learning_rate": 0.00019948533541384823, "loss": 1.8243, "step": 632 }, { "epoch": 0.10174804098854731, "grad_norm": 0.6097567677497864, "learning_rate": 0.0001994836256188644, "loss": 1.3235, "step": 633 }, { "epoch": 0.10190878038979305, "grad_norm": 0.6920642852783203, "learning_rate": 0.00019948191299583906, "loss": 1.5069, "step": 634 }, { "epoch": 0.10206951979103877, "grad_norm": 0.6324285268783569, "learning_rate": 0.00019948019754482087, "loss": 1.5447, "step": 635 }, { "epoch": 0.10223025919228451, "grad_norm": 0.638663649559021, "learning_rate": 0.0001994784792658586, "loss": 1.3485, "step": 636 }, { "epoch": 0.10239099859353024, "grad_norm": 0.58576899766922, "learning_rate": 0.00019947675815900113, "loss": 1.4652, "step": 637 }, { "epoch": 0.10255173799477597, "grad_norm": 0.6359608769416809, "learning_rate": 0.00019947503422429737, "loss": 1.5481, "step": 638 }, { "epoch": 0.1027124773960217, "grad_norm": 0.6485562324523926, "learning_rate": 0.00019947330746179632, "loss": 1.3471, "step": 639 }, { "epoch": 0.10287321679726744, "grad_norm": 0.6607482433319092, "learning_rate": 0.00019947157787154706, "loss": 1.6094, "step": 640 }, { "epoch": 0.10303395619851316, "grad_norm": 0.7602476477622986, "learning_rate": 0.00019946984545359876, "loss": 1.6346, "step": 641 }, { "epoch": 0.10319469559975888, "grad_norm": 0.605478048324585, "learning_rate": 0.00019946811020800067, "loss": 1.5363, "step": 642 }, { "epoch": 0.10335543500100462, "grad_norm": 0.7752895951271057, "learning_rate": 0.00019946637213480212, "loss": 1.7284, "step": 643 }, { "epoch": 0.10351617440225035, "grad_norm": 0.7096112370491028, "learning_rate": 0.00019946463123405255, "loss": 1.7381, "step": 644 }, { "epoch": 0.10367691380349608, "grad_norm": 0.7004408836364746, "learning_rate": 0.00019946288750580138, "loss": 1.4747, "step": 645 }, { "epoch": 0.10383765320474181, "grad_norm": 0.826971173286438, "learning_rate": 0.00019946114095009825, "loss": 1.8057, "step": 646 }, { "epoch": 0.10399839260598755, "grad_norm": 4.103292942047119, "learning_rate": 0.00019945939156699275, "loss": 1.8826, "step": 647 }, { "epoch": 0.10415913200723327, "grad_norm": 0.7409517765045166, "learning_rate": 0.0001994576393565347, "loss": 1.7937, "step": 648 }, { "epoch": 0.10431987140847901, "grad_norm": 0.6974817514419556, "learning_rate": 0.0001994558843187738, "loss": 1.705, "step": 649 }, { "epoch": 0.10448061080972473, "grad_norm": 0.617879331111908, "learning_rate": 0.00019945412645375998, "loss": 1.513, "step": 650 }, { "epoch": 0.10464135021097046, "grad_norm": 0.8045983910560608, "learning_rate": 0.00019945236576154326, "loss": 1.6674, "step": 651 }, { "epoch": 0.1048020896122162, "grad_norm": 0.6745765209197998, "learning_rate": 0.00019945060224217363, "loss": 1.6048, "step": 652 }, { "epoch": 0.10496282901346192, "grad_norm": 0.6924740672111511, "learning_rate": 0.00019944883589570125, "loss": 1.5982, "step": 653 }, { "epoch": 0.10512356841470766, "grad_norm": 0.6305418610572815, "learning_rate": 0.00019944706672217632, "loss": 1.6551, "step": 654 }, { "epoch": 0.10528430781595338, "grad_norm": 0.7626046538352966, "learning_rate": 0.00019944529472164916, "loss": 1.4724, "step": 655 }, { "epoch": 0.10544504721719912, "grad_norm": 0.5870013236999512, "learning_rate": 0.00019944351989417008, "loss": 1.3176, "step": 656 }, { "epoch": 0.10560578661844484, "grad_norm": 0.594842255115509, "learning_rate": 0.0001994417422397896, "loss": 1.534, "step": 657 }, { "epoch": 0.10576652601969058, "grad_norm": 0.6321095824241638, "learning_rate": 0.00019943996175855822, "loss": 1.5502, "step": 658 }, { "epoch": 0.1059272654209363, "grad_norm": 0.6540301442146301, "learning_rate": 0.0001994381784505266, "loss": 1.6229, "step": 659 }, { "epoch": 0.10608800482218204, "grad_norm": 0.7605463862419128, "learning_rate": 0.00019943639231574537, "loss": 1.9393, "step": 660 }, { "epoch": 0.10624874422342777, "grad_norm": 0.7090712785720825, "learning_rate": 0.00019943460335426533, "loss": 1.7994, "step": 661 }, { "epoch": 0.10640948362467349, "grad_norm": 0.6289608478546143, "learning_rate": 0.00019943281156613738, "loss": 1.4138, "step": 662 }, { "epoch": 0.10657022302591923, "grad_norm": 0.7806804776191711, "learning_rate": 0.00019943101695141238, "loss": 1.7935, "step": 663 }, { "epoch": 0.10673096242716495, "grad_norm": 0.6812963485717773, "learning_rate": 0.00019942921951014136, "loss": 1.8051, "step": 664 }, { "epoch": 0.10689170182841069, "grad_norm": 0.7252092957496643, "learning_rate": 0.00019942741924237548, "loss": 1.7485, "step": 665 }, { "epoch": 0.10705244122965642, "grad_norm": 0.688176691532135, "learning_rate": 0.00019942561614816584, "loss": 1.5778, "step": 666 }, { "epoch": 0.10721318063090216, "grad_norm": 0.5974223613739014, "learning_rate": 0.00019942381022756373, "loss": 1.4269, "step": 667 }, { "epoch": 0.10737392003214788, "grad_norm": 0.6459078192710876, "learning_rate": 0.00019942200148062052, "loss": 1.7232, "step": 668 }, { "epoch": 0.10753465943339362, "grad_norm": 0.6913591623306274, "learning_rate": 0.00019942018990738754, "loss": 1.903, "step": 669 }, { "epoch": 0.10769539883463934, "grad_norm": 0.7316176891326904, "learning_rate": 0.00019941837550791638, "loss": 1.6828, "step": 670 }, { "epoch": 0.10785613823588507, "grad_norm": 0.685525119304657, "learning_rate": 0.0001994165582822586, "loss": 1.6852, "step": 671 }, { "epoch": 0.1080168776371308, "grad_norm": 0.6922297477722168, "learning_rate": 0.0001994147382304658, "loss": 1.6955, "step": 672 }, { "epoch": 0.10817761703837653, "grad_norm": 0.6421271562576294, "learning_rate": 0.00019941291535258978, "loss": 1.5839, "step": 673 }, { "epoch": 0.10833835643962227, "grad_norm": 0.6666224598884583, "learning_rate": 0.0001994110896486823, "loss": 1.4503, "step": 674 }, { "epoch": 0.10849909584086799, "grad_norm": 0.566118597984314, "learning_rate": 0.00019940926111879534, "loss": 1.4208, "step": 675 }, { "epoch": 0.10865983524211373, "grad_norm": 0.6604273915290833, "learning_rate": 0.00019940742976298084, "loss": 1.515, "step": 676 }, { "epoch": 0.10882057464335945, "grad_norm": 0.6288338303565979, "learning_rate": 0.00019940559558129085, "loss": 1.2276, "step": 677 }, { "epoch": 0.10898131404460519, "grad_norm": 0.8125700354576111, "learning_rate": 0.00019940375857377748, "loss": 2.1139, "step": 678 }, { "epoch": 0.10914205344585091, "grad_norm": 0.6770904064178467, "learning_rate": 0.00019940191874049304, "loss": 1.3178, "step": 679 }, { "epoch": 0.10930279284709664, "grad_norm": 0.657373309135437, "learning_rate": 0.00019940007608148976, "loss": 1.5786, "step": 680 }, { "epoch": 0.10946353224834238, "grad_norm": 0.682761549949646, "learning_rate": 0.00019939823059682003, "loss": 1.6035, "step": 681 }, { "epoch": 0.1096242716495881, "grad_norm": 0.7806504964828491, "learning_rate": 0.00019939638228653634, "loss": 1.7045, "step": 682 }, { "epoch": 0.10978501105083384, "grad_norm": 0.7440876364707947, "learning_rate": 0.00019939453115069123, "loss": 1.6677, "step": 683 }, { "epoch": 0.10994575045207956, "grad_norm": 0.610008180141449, "learning_rate": 0.0001993926771893373, "loss": 1.2616, "step": 684 }, { "epoch": 0.1101064898533253, "grad_norm": 0.6119703054428101, "learning_rate": 0.00019939082040252724, "loss": 1.3262, "step": 685 }, { "epoch": 0.11026722925457103, "grad_norm": 0.6153454184532166, "learning_rate": 0.0001993889607903139, "loss": 1.4028, "step": 686 }, { "epoch": 0.11042796865581676, "grad_norm": 0.656809389591217, "learning_rate": 0.00019938709835275002, "loss": 1.5955, "step": 687 }, { "epoch": 0.11058870805706249, "grad_norm": 0.7600885033607483, "learning_rate": 0.0001993852330898887, "loss": 1.7876, "step": 688 }, { "epoch": 0.11074944745830821, "grad_norm": 0.6269267201423645, "learning_rate": 0.00019938336500178284, "loss": 1.6224, "step": 689 }, { "epoch": 0.11091018685955395, "grad_norm": 0.7481472492218018, "learning_rate": 0.00019938149408848564, "loss": 1.7385, "step": 690 }, { "epoch": 0.11107092626079967, "grad_norm": 0.623175323009491, "learning_rate": 0.0001993796203500502, "loss": 1.3764, "step": 691 }, { "epoch": 0.11123166566204541, "grad_norm": 0.6218768954277039, "learning_rate": 0.00019937774378652983, "loss": 1.4884, "step": 692 }, { "epoch": 0.11139240506329114, "grad_norm": 0.709305465221405, "learning_rate": 0.00019937586439797787, "loss": 1.4289, "step": 693 }, { "epoch": 0.11155314446453687, "grad_norm": 0.7094429731369019, "learning_rate": 0.0001993739821844477, "loss": 1.5149, "step": 694 }, { "epoch": 0.1117138838657826, "grad_norm": 0.648709774017334, "learning_rate": 0.00019937209714599295, "loss": 1.5662, "step": 695 }, { "epoch": 0.11187462326702834, "grad_norm": 0.6053886413574219, "learning_rate": 0.00019937020928266708, "loss": 1.3132, "step": 696 }, { "epoch": 0.11203536266827406, "grad_norm": 0.6875995993614197, "learning_rate": 0.0001993683185945238, "loss": 1.3922, "step": 697 }, { "epoch": 0.11219610206951978, "grad_norm": 1.0409282445907593, "learning_rate": 0.00019936642508161688, "loss": 1.5644, "step": 698 }, { "epoch": 0.11235684147076552, "grad_norm": 0.7041425108909607, "learning_rate": 0.0001993645287440001, "loss": 1.7987, "step": 699 }, { "epoch": 0.11251758087201125, "grad_norm": 0.6613534092903137, "learning_rate": 0.0001993626295817274, "loss": 1.6016, "step": 700 }, { "epoch": 0.11267832027325698, "grad_norm": 0.6498948931694031, "learning_rate": 0.00019936072759485276, "loss": 1.5736, "step": 701 }, { "epoch": 0.11283905967450271, "grad_norm": 0.7355228662490845, "learning_rate": 0.00019935882278343027, "loss": 1.6047, "step": 702 }, { "epoch": 0.11299979907574845, "grad_norm": 0.5751492977142334, "learning_rate": 0.00019935691514751404, "loss": 1.3861, "step": 703 }, { "epoch": 0.11316053847699417, "grad_norm": 0.6898406744003296, "learning_rate": 0.0001993550046871583, "loss": 1.6372, "step": 704 }, { "epoch": 0.11332127787823991, "grad_norm": 0.7032646536827087, "learning_rate": 0.00019935309140241741, "loss": 1.6963, "step": 705 }, { "epoch": 0.11348201727948563, "grad_norm": 0.6693851947784424, "learning_rate": 0.00019935117529334574, "loss": 1.5671, "step": 706 }, { "epoch": 0.11364275668073136, "grad_norm": 0.663316011428833, "learning_rate": 0.00019934925635999772, "loss": 1.4841, "step": 707 }, { "epoch": 0.1138034960819771, "grad_norm": 0.7034291625022888, "learning_rate": 0.0001993473346024279, "loss": 1.6643, "step": 708 }, { "epoch": 0.11396423548322282, "grad_norm": 0.6770520210266113, "learning_rate": 0.00019934541002069098, "loss": 1.5274, "step": 709 }, { "epoch": 0.11412497488446856, "grad_norm": 0.7061966061592102, "learning_rate": 0.00019934348261484158, "loss": 1.6196, "step": 710 }, { "epoch": 0.11428571428571428, "grad_norm": 0.5612440705299377, "learning_rate": 0.00019934155238493457, "loss": 1.2691, "step": 711 }, { "epoch": 0.11444645368696002, "grad_norm": 5.862006664276123, "learning_rate": 0.0001993396193310248, "loss": 1.5635, "step": 712 }, { "epoch": 0.11460719308820574, "grad_norm": 0.7458844780921936, "learning_rate": 0.00019933768345316717, "loss": 1.5509, "step": 713 }, { "epoch": 0.11476793248945148, "grad_norm": 0.6545864343643188, "learning_rate": 0.00019933574475141676, "loss": 1.439, "step": 714 }, { "epoch": 0.1149286718906972, "grad_norm": 0.6924874782562256, "learning_rate": 0.00019933380322582867, "loss": 1.5073, "step": 715 }, { "epoch": 0.11508941129194293, "grad_norm": 0.6511202454566956, "learning_rate": 0.00019933185887645812, "loss": 1.201, "step": 716 }, { "epoch": 0.11525015069318867, "grad_norm": 0.604914665222168, "learning_rate": 0.00019932991170336034, "loss": 1.3925, "step": 717 }, { "epoch": 0.11541089009443439, "grad_norm": 0.6269446015357971, "learning_rate": 0.0001993279617065907, "loss": 1.4341, "step": 718 }, { "epoch": 0.11557162949568013, "grad_norm": 1.745778203010559, "learning_rate": 0.00019932600888620465, "loss": 1.3766, "step": 719 }, { "epoch": 0.11573236889692586, "grad_norm": 0.6639474630355835, "learning_rate": 0.00019932405324225768, "loss": 1.5567, "step": 720 }, { "epoch": 0.1158931082981716, "grad_norm": 0.6309482455253601, "learning_rate": 0.00019932209477480537, "loss": 1.3647, "step": 721 }, { "epoch": 0.11605384769941732, "grad_norm": 0.7566007375717163, "learning_rate": 0.0001993201334839034, "loss": 1.7892, "step": 722 }, { "epoch": 0.11621458710066306, "grad_norm": 0.7342691421508789, "learning_rate": 0.00019931816936960756, "loss": 1.6885, "step": 723 }, { "epoch": 0.11637532650190878, "grad_norm": 0.7034913897514343, "learning_rate": 0.0001993162024319737, "loss": 1.6636, "step": 724 }, { "epoch": 0.11653606590315452, "grad_norm": 0.7233126759529114, "learning_rate": 0.00019931423267105763, "loss": 1.7815, "step": 725 }, { "epoch": 0.11669680530440024, "grad_norm": 0.6273208260536194, "learning_rate": 0.00019931226008691543, "loss": 1.2798, "step": 726 }, { "epoch": 0.11685754470564597, "grad_norm": 0.7143549919128418, "learning_rate": 0.00019931028467960317, "loss": 1.4996, "step": 727 }, { "epoch": 0.1170182841068917, "grad_norm": 0.7304338812828064, "learning_rate": 0.00019930830644917697, "loss": 1.6491, "step": 728 }, { "epoch": 0.11717902350813743, "grad_norm": 0.6778528094291687, "learning_rate": 0.00019930632539569312, "loss": 1.6225, "step": 729 }, { "epoch": 0.11733976290938317, "grad_norm": 0.6520379781723022, "learning_rate": 0.00019930434151920788, "loss": 1.613, "step": 730 }, { "epoch": 0.11750050231062889, "grad_norm": 0.5926263928413391, "learning_rate": 0.00019930235481977767, "loss": 1.3785, "step": 731 }, { "epoch": 0.11766124171187463, "grad_norm": 0.5851619243621826, "learning_rate": 0.00019930036529745897, "loss": 1.322, "step": 732 }, { "epoch": 0.11782198111312035, "grad_norm": 0.6841761469841003, "learning_rate": 0.00019929837295230832, "loss": 1.61, "step": 733 }, { "epoch": 0.11798272051436609, "grad_norm": 0.6961073875427246, "learning_rate": 0.00019929637778438237, "loss": 1.6523, "step": 734 }, { "epoch": 0.11814345991561181, "grad_norm": 0.7046864628791809, "learning_rate": 0.00019929437979373783, "loss": 1.8082, "step": 735 }, { "epoch": 0.11830419931685754, "grad_norm": 1.2361716032028198, "learning_rate": 0.00019929237898043154, "loss": 1.5248, "step": 736 }, { "epoch": 0.11846493871810328, "grad_norm": 0.6281763315200806, "learning_rate": 0.0001992903753445203, "loss": 1.4235, "step": 737 }, { "epoch": 0.118625678119349, "grad_norm": 0.8867708444595337, "learning_rate": 0.00019928836888606112, "loss": 1.6512, "step": 738 }, { "epoch": 0.11878641752059474, "grad_norm": 0.6271432638168335, "learning_rate": 0.00019928635960511103, "loss": 1.366, "step": 739 }, { "epoch": 0.11894715692184046, "grad_norm": 0.6667734980583191, "learning_rate": 0.00019928434750172717, "loss": 1.5137, "step": 740 }, { "epoch": 0.1191078963230862, "grad_norm": 0.6987286806106567, "learning_rate": 0.00019928233257596664, "loss": 1.5225, "step": 741 }, { "epoch": 0.11926863572433193, "grad_norm": 0.6297409534454346, "learning_rate": 0.00019928031482788688, "loss": 1.6279, "step": 742 }, { "epoch": 0.11942937512557766, "grad_norm": 0.8042590022087097, "learning_rate": 0.0001992782942575451, "loss": 1.5192, "step": 743 }, { "epoch": 0.11959011452682339, "grad_norm": 0.6817503571510315, "learning_rate": 0.0001992762708649988, "loss": 1.4542, "step": 744 }, { "epoch": 0.11975085392806911, "grad_norm": 0.6983264088630676, "learning_rate": 0.00019927424465030554, "loss": 1.6687, "step": 745 }, { "epoch": 0.11991159332931485, "grad_norm": 0.6430609226226807, "learning_rate": 0.00019927221561352282, "loss": 1.3681, "step": 746 }, { "epoch": 0.12007233273056057, "grad_norm": 0.739104688167572, "learning_rate": 0.0001992701837547084, "loss": 1.5199, "step": 747 }, { "epoch": 0.12023307213180631, "grad_norm": 0.6807653307914734, "learning_rate": 0.00019926814907392007, "loss": 1.6988, "step": 748 }, { "epoch": 0.12039381153305204, "grad_norm": 0.6290826201438904, "learning_rate": 0.00019926611157121556, "loss": 1.7055, "step": 749 }, { "epoch": 0.12055455093429777, "grad_norm": 0.7281848788261414, "learning_rate": 0.00019926407124665286, "loss": 1.5902, "step": 750 }, { "epoch": 0.1207152903355435, "grad_norm": 0.6773608326911926, "learning_rate": 0.00019926202810029, "loss": 1.4593, "step": 751 }, { "epoch": 0.12087602973678924, "grad_norm": 0.8636390566825867, "learning_rate": 0.00019925998213218496, "loss": 1.3525, "step": 752 }, { "epoch": 0.12103676913803496, "grad_norm": 0.6255557537078857, "learning_rate": 0.00019925793334239598, "loss": 1.5722, "step": 753 }, { "epoch": 0.12119750853928069, "grad_norm": 0.6483657360076904, "learning_rate": 0.00019925588173098132, "loss": 1.4566, "step": 754 }, { "epoch": 0.12135824794052642, "grad_norm": 0.6870735287666321, "learning_rate": 0.00019925382729799922, "loss": 1.5257, "step": 755 }, { "epoch": 0.12151898734177215, "grad_norm": 0.6138218641281128, "learning_rate": 0.00019925177004350816, "loss": 1.4891, "step": 756 }, { "epoch": 0.12167972674301789, "grad_norm": 0.6980711221694946, "learning_rate": 0.00019924970996756654, "loss": 1.8002, "step": 757 }, { "epoch": 0.12184046614426361, "grad_norm": 0.7765911817550659, "learning_rate": 0.000199247647070233, "loss": 1.6597, "step": 758 }, { "epoch": 0.12200120554550935, "grad_norm": 0.6895900964736938, "learning_rate": 0.00019924558135156616, "loss": 1.7125, "step": 759 }, { "epoch": 0.12216194494675507, "grad_norm": 0.6932492256164551, "learning_rate": 0.00019924351281162475, "loss": 1.687, "step": 760 }, { "epoch": 0.12232268434800081, "grad_norm": 0.5935447812080383, "learning_rate": 0.00019924144145046753, "loss": 1.4544, "step": 761 }, { "epoch": 0.12248342374924653, "grad_norm": 0.8339934945106506, "learning_rate": 0.0001992393672681534, "loss": 1.733, "step": 762 }, { "epoch": 0.12264416315049226, "grad_norm": 0.5683078765869141, "learning_rate": 0.0001992372902647414, "loss": 1.5128, "step": 763 }, { "epoch": 0.122804902551738, "grad_norm": 0.6973103284835815, "learning_rate": 0.00019923521044029044, "loss": 1.752, "step": 764 }, { "epoch": 0.12296564195298372, "grad_norm": 0.5890379548072815, "learning_rate": 0.00019923312779485973, "loss": 1.2009, "step": 765 }, { "epoch": 0.12312638135422946, "grad_norm": 0.6498704552650452, "learning_rate": 0.00019923104232850847, "loss": 1.5988, "step": 766 }, { "epoch": 0.12328712075547518, "grad_norm": 0.7161079049110413, "learning_rate": 0.00019922895404129594, "loss": 1.548, "step": 767 }, { "epoch": 0.12344786015672092, "grad_norm": 0.7079645991325378, "learning_rate": 0.00019922686293328146, "loss": 1.5902, "step": 768 }, { "epoch": 0.12360859955796664, "grad_norm": 0.6773389577865601, "learning_rate": 0.00019922476900452452, "loss": 1.3929, "step": 769 }, { "epoch": 0.12376933895921238, "grad_norm": 0.6970142722129822, "learning_rate": 0.00019922267225508465, "loss": 1.4485, "step": 770 }, { "epoch": 0.1239300783604581, "grad_norm": 0.6700478792190552, "learning_rate": 0.0001992205726850214, "loss": 1.4602, "step": 771 }, { "epoch": 0.12409081776170383, "grad_norm": 0.6463156938552856, "learning_rate": 0.00019921847029439452, "loss": 1.3614, "step": 772 }, { "epoch": 0.12425155716294957, "grad_norm": 0.7356074452400208, "learning_rate": 0.00019921636508326375, "loss": 1.5735, "step": 773 }, { "epoch": 0.1244122965641953, "grad_norm": 0.6936205625534058, "learning_rate": 0.00019921425705168893, "loss": 1.7772, "step": 774 }, { "epoch": 0.12457303596544103, "grad_norm": 0.698839545249939, "learning_rate": 0.00019921214619973, "loss": 1.7214, "step": 775 }, { "epoch": 0.12473377536668676, "grad_norm": 0.7005491256713867, "learning_rate": 0.00019921003252744692, "loss": 1.9014, "step": 776 }, { "epoch": 0.1248945147679325, "grad_norm": 0.6561487317085266, "learning_rate": 0.00019920791603489984, "loss": 1.5546, "step": 777 }, { "epoch": 0.12505525416917823, "grad_norm": 0.7013425230979919, "learning_rate": 0.00019920579672214888, "loss": 1.5653, "step": 778 }, { "epoch": 0.12521599357042396, "grad_norm": 0.658262312412262, "learning_rate": 0.0001992036745892543, "loss": 1.3685, "step": 779 }, { "epoch": 0.12537673297166968, "grad_norm": 0.6645136475563049, "learning_rate": 0.0001992015496362764, "loss": 1.7553, "step": 780 }, { "epoch": 0.1255374723729154, "grad_norm": 0.7373960614204407, "learning_rate": 0.0001991994218632757, "loss": 1.7487, "step": 781 }, { "epoch": 0.12569821177416113, "grad_norm": 0.6117732524871826, "learning_rate": 0.00019919729127031253, "loss": 1.3395, "step": 782 }, { "epoch": 0.12585895117540688, "grad_norm": 0.5637193918228149, "learning_rate": 0.00019919515785744757, "loss": 1.3608, "step": 783 }, { "epoch": 0.1260196905766526, "grad_norm": 0.6707691550254822, "learning_rate": 0.00019919302162474137, "loss": 1.4832, "step": 784 }, { "epoch": 0.12618042997789833, "grad_norm": 0.6191288828849792, "learning_rate": 0.00019919088257225475, "loss": 1.4879, "step": 785 }, { "epoch": 0.12634116937914405, "grad_norm": 0.7111828327178955, "learning_rate": 0.00019918874070004846, "loss": 1.637, "step": 786 }, { "epoch": 0.1265019087803898, "grad_norm": 0.5899950265884399, "learning_rate": 0.00019918659600818344, "loss": 1.3288, "step": 787 }, { "epoch": 0.12666264818163553, "grad_norm": 0.7809910774230957, "learning_rate": 0.0001991844484967206, "loss": 1.5941, "step": 788 }, { "epoch": 0.12682338758288125, "grad_norm": 0.7662188410758972, "learning_rate": 0.000199182298165721, "loss": 1.623, "step": 789 }, { "epoch": 0.12698412698412698, "grad_norm": 0.8164969682693481, "learning_rate": 0.00019918014501524582, "loss": 1.5462, "step": 790 }, { "epoch": 0.1271448663853727, "grad_norm": 0.6697986721992493, "learning_rate": 0.0001991779890453562, "loss": 1.5047, "step": 791 }, { "epoch": 0.12730560578661845, "grad_norm": 0.6984097957611084, "learning_rate": 0.00019917583025611348, "loss": 1.5293, "step": 792 }, { "epoch": 0.12746634518786418, "grad_norm": 0.6618137955665588, "learning_rate": 0.00019917366864757895, "loss": 1.501, "step": 793 }, { "epoch": 0.1276270845891099, "grad_norm": 0.6388329863548279, "learning_rate": 0.00019917150421981417, "loss": 1.5704, "step": 794 }, { "epoch": 0.12778782399035563, "grad_norm": 0.649771511554718, "learning_rate": 0.0001991693369728806, "loss": 1.4984, "step": 795 }, { "epoch": 0.12794856339160138, "grad_norm": 0.6515612006187439, "learning_rate": 0.00019916716690683984, "loss": 1.5405, "step": 796 }, { "epoch": 0.1281093027928471, "grad_norm": 0.5527495741844177, "learning_rate": 0.00019916499402175363, "loss": 1.3137, "step": 797 }, { "epoch": 0.12827004219409283, "grad_norm": 0.774154543876648, "learning_rate": 0.00019916281831768368, "loss": 1.8923, "step": 798 }, { "epoch": 0.12843078159533855, "grad_norm": 0.6038574576377869, "learning_rate": 0.00019916063979469187, "loss": 1.501, "step": 799 }, { "epoch": 0.12859152099658427, "grad_norm": 0.7038726806640625, "learning_rate": 0.00019915845845284012, "loss": 1.4288, "step": 800 }, { "epoch": 0.12859152099658427, "eval_loss": 1.5651003122329712, "eval_runtime": 46.229, "eval_samples_per_second": 5.429, "eval_steps_per_second": 2.726, "step": 800 }, { "epoch": 0.12875226039783003, "grad_norm": 0.7549550533294678, "learning_rate": 0.00019915627429219047, "loss": 1.787, "step": 801 }, { "epoch": 0.12891299979907575, "grad_norm": 0.7718234062194824, "learning_rate": 0.000199154087312805, "loss": 1.6984, "step": 802 }, { "epoch": 0.12907373920032147, "grad_norm": 0.6238852739334106, "learning_rate": 0.00019915189751474582, "loss": 1.3803, "step": 803 }, { "epoch": 0.1292344786015672, "grad_norm": 0.6319724917411804, "learning_rate": 0.00019914970489807524, "loss": 1.5266, "step": 804 }, { "epoch": 0.12939521800281295, "grad_norm": 0.5758875012397766, "learning_rate": 0.0001991475094628556, "loss": 1.3069, "step": 805 }, { "epoch": 0.12955595740405867, "grad_norm": 0.8921329975128174, "learning_rate": 0.00019914531120914926, "loss": 1.7309, "step": 806 }, { "epoch": 0.1297166968053044, "grad_norm": 0.6804137825965881, "learning_rate": 0.00019914311013701874, "loss": 1.3945, "step": 807 }, { "epoch": 0.12987743620655012, "grad_norm": 0.7251747250556946, "learning_rate": 0.00019914090624652663, "loss": 1.7814, "step": 808 }, { "epoch": 0.13003817560779585, "grad_norm": 0.6967126727104187, "learning_rate": 0.0001991386995377355, "loss": 1.5222, "step": 809 }, { "epoch": 0.1301989150090416, "grad_norm": 0.6057139039039612, "learning_rate": 0.0001991364900107082, "loss": 1.456, "step": 810 }, { "epoch": 0.13035965441028732, "grad_norm": 0.5635544657707214, "learning_rate": 0.00019913427766550746, "loss": 1.2696, "step": 811 }, { "epoch": 0.13052039381153305, "grad_norm": 0.721896231174469, "learning_rate": 0.00019913206250219618, "loss": 1.4851, "step": 812 }, { "epoch": 0.13068113321277877, "grad_norm": 0.8013779520988464, "learning_rate": 0.00019912984452083733, "loss": 1.3169, "step": 813 }, { "epoch": 0.13084187261402452, "grad_norm": 0.6125296354293823, "learning_rate": 0.00019912762372149397, "loss": 1.3577, "step": 814 }, { "epoch": 0.13100261201527025, "grad_norm": 0.6918944716453552, "learning_rate": 0.00019912540010422926, "loss": 1.4831, "step": 815 }, { "epoch": 0.13116335141651597, "grad_norm": 0.6841842532157898, "learning_rate": 0.00019912317366910638, "loss": 1.465, "step": 816 }, { "epoch": 0.1313240908177617, "grad_norm": 0.6134369969367981, "learning_rate": 0.0001991209444161886, "loss": 1.3842, "step": 817 }, { "epoch": 0.13148483021900745, "grad_norm": 0.7412641048431396, "learning_rate": 0.00019911871234553933, "loss": 1.6958, "step": 818 }, { "epoch": 0.13164556962025317, "grad_norm": 0.657463788986206, "learning_rate": 0.00019911647745722203, "loss": 1.5505, "step": 819 }, { "epoch": 0.1318063090214989, "grad_norm": 0.6422082781791687, "learning_rate": 0.00019911423975130016, "loss": 1.4468, "step": 820 }, { "epoch": 0.13196704842274462, "grad_norm": 0.6404008865356445, "learning_rate": 0.0001991119992278374, "loss": 1.3953, "step": 821 }, { "epoch": 0.13212778782399034, "grad_norm": 0.6203340888023376, "learning_rate": 0.00019910975588689742, "loss": 1.4017, "step": 822 }, { "epoch": 0.1322885272252361, "grad_norm": 0.6254974007606506, "learning_rate": 0.000199107509728544, "loss": 1.3452, "step": 823 }, { "epoch": 0.13244926662648182, "grad_norm": 0.7361843585968018, "learning_rate": 0.000199105260752841, "loss": 1.5013, "step": 824 }, { "epoch": 0.13261000602772754, "grad_norm": 0.7060967683792114, "learning_rate": 0.00019910300895985232, "loss": 1.7722, "step": 825 }, { "epoch": 0.13277074542897327, "grad_norm": 0.5935456156730652, "learning_rate": 0.00019910075434964198, "loss": 1.2428, "step": 826 }, { "epoch": 0.13293148483021902, "grad_norm": 0.6876825094223022, "learning_rate": 0.00019909849692227412, "loss": 1.3956, "step": 827 }, { "epoch": 0.13309222423146475, "grad_norm": 0.645142674446106, "learning_rate": 0.00019909623667781285, "loss": 1.57, "step": 828 }, { "epoch": 0.13325296363271047, "grad_norm": 0.7231163382530212, "learning_rate": 0.00019909397361632245, "loss": 1.4789, "step": 829 }, { "epoch": 0.1334137030339562, "grad_norm": 0.7301658987998962, "learning_rate": 0.00019909170773786726, "loss": 1.8114, "step": 830 }, { "epoch": 0.13357444243520192, "grad_norm": 0.6705829501152039, "learning_rate": 0.00019908943904251166, "loss": 1.5723, "step": 831 }, { "epoch": 0.13373518183644767, "grad_norm": 0.6201861500740051, "learning_rate": 0.00019908716753032018, "loss": 1.4384, "step": 832 }, { "epoch": 0.1338959212376934, "grad_norm": 0.6643863320350647, "learning_rate": 0.00019908489320135737, "loss": 1.4677, "step": 833 }, { "epoch": 0.13405666063893912, "grad_norm": 0.6056952476501465, "learning_rate": 0.0001990826160556879, "loss": 1.4499, "step": 834 }, { "epoch": 0.13421740004018484, "grad_norm": 0.6622712016105652, "learning_rate": 0.0001990803360933765, "loss": 1.4957, "step": 835 }, { "epoch": 0.1343781394414306, "grad_norm": 0.6943784952163696, "learning_rate": 0.00019907805331448796, "loss": 1.361, "step": 836 }, { "epoch": 0.13453887884267632, "grad_norm": 0.671981155872345, "learning_rate": 0.0001990757677190872, "loss": 1.4373, "step": 837 }, { "epoch": 0.13469961824392204, "grad_norm": 0.6549744606018066, "learning_rate": 0.00019907347930723919, "loss": 1.4115, "step": 838 }, { "epoch": 0.13486035764516777, "grad_norm": 0.6217756271362305, "learning_rate": 0.00019907118807900896, "loss": 1.4239, "step": 839 }, { "epoch": 0.1350210970464135, "grad_norm": 0.744147539138794, "learning_rate": 0.00019906889403446167, "loss": 1.3963, "step": 840 }, { "epoch": 0.13518183644765924, "grad_norm": 0.7378054261207581, "learning_rate": 0.00019906659717366254, "loss": 1.4283, "step": 841 }, { "epoch": 0.13534257584890497, "grad_norm": 0.65228670835495, "learning_rate": 0.0001990642974966768, "loss": 1.3339, "step": 842 }, { "epoch": 0.1355033152501507, "grad_norm": 0.6627105474472046, "learning_rate": 0.0001990619950035699, "loss": 1.4416, "step": 843 }, { "epoch": 0.13566405465139642, "grad_norm": 0.774928867816925, "learning_rate": 0.00019905968969440724, "loss": 1.4592, "step": 844 }, { "epoch": 0.13582479405264217, "grad_norm": 0.6546207070350647, "learning_rate": 0.00019905738156925443, "loss": 1.4172, "step": 845 }, { "epoch": 0.1359855334538879, "grad_norm": 0.6330466270446777, "learning_rate": 0.00019905507062817698, "loss": 1.3778, "step": 846 }, { "epoch": 0.13614627285513362, "grad_norm": 0.7801320552825928, "learning_rate": 0.00019905275687124064, "loss": 1.8104, "step": 847 }, { "epoch": 0.13630701225637934, "grad_norm": 0.5979370474815369, "learning_rate": 0.0001990504402985112, "loss": 1.5713, "step": 848 }, { "epoch": 0.13646775165762506, "grad_norm": 0.72013258934021, "learning_rate": 0.00019904812091005447, "loss": 1.4486, "step": 849 }, { "epoch": 0.13662849105887082, "grad_norm": 0.6431047916412354, "learning_rate": 0.00019904579870593642, "loss": 1.5675, "step": 850 }, { "epoch": 0.13678923046011654, "grad_norm": 0.63852858543396, "learning_rate": 0.00019904347368622302, "loss": 1.485, "step": 851 }, { "epoch": 0.13694996986136226, "grad_norm": 0.6229876279830933, "learning_rate": 0.00019904114585098042, "loss": 1.3739, "step": 852 }, { "epoch": 0.137110709262608, "grad_norm": 0.6466566324234009, "learning_rate": 0.00019903881520027476, "loss": 1.409, "step": 853 }, { "epoch": 0.13727144866385374, "grad_norm": 0.6603915095329285, "learning_rate": 0.00019903648173417228, "loss": 1.6853, "step": 854 }, { "epoch": 0.13743218806509946, "grad_norm": 0.6589502692222595, "learning_rate": 0.00019903414545273935, "loss": 1.4606, "step": 855 }, { "epoch": 0.1375929274663452, "grad_norm": 0.6473212838172913, "learning_rate": 0.0001990318063560424, "loss": 1.4626, "step": 856 }, { "epoch": 0.1377536668675909, "grad_norm": 0.7595370411872864, "learning_rate": 0.00019902946444414786, "loss": 1.6761, "step": 857 }, { "epoch": 0.13791440626883664, "grad_norm": 0.6858698129653931, "learning_rate": 0.00019902711971712231, "loss": 1.6205, "step": 858 }, { "epoch": 0.1380751456700824, "grad_norm": 0.6466687917709351, "learning_rate": 0.00019902477217503245, "loss": 1.5295, "step": 859 }, { "epoch": 0.1382358850713281, "grad_norm": 0.660130500793457, "learning_rate": 0.00019902242181794503, "loss": 1.4437, "step": 860 }, { "epoch": 0.13839662447257384, "grad_norm": 0.7279735803604126, "learning_rate": 0.00019902006864592678, "loss": 1.6526, "step": 861 }, { "epoch": 0.13855736387381956, "grad_norm": 0.7440725564956665, "learning_rate": 0.00019901771265904466, "loss": 1.6403, "step": 862 }, { "epoch": 0.1387181032750653, "grad_norm": 0.8062863945960999, "learning_rate": 0.00019901535385736561, "loss": 1.7975, "step": 863 }, { "epoch": 0.13887884267631104, "grad_norm": 0.6700236797332764, "learning_rate": 0.00019901299224095673, "loss": 1.6256, "step": 864 }, { "epoch": 0.13903958207755676, "grad_norm": 0.8144811391830444, "learning_rate": 0.0001990106278098851, "loss": 1.5578, "step": 865 }, { "epoch": 0.13920032147880249, "grad_norm": 0.7370176315307617, "learning_rate": 0.00019900826056421796, "loss": 1.7076, "step": 866 }, { "epoch": 0.1393610608800482, "grad_norm": 0.8470463156700134, "learning_rate": 0.00019900589050402264, "loss": 1.6687, "step": 867 }, { "epoch": 0.13952180028129396, "grad_norm": 0.7004806399345398, "learning_rate": 0.00019900351762936643, "loss": 1.6447, "step": 868 }, { "epoch": 0.13968253968253969, "grad_norm": 0.6967143416404724, "learning_rate": 0.00019900114194031684, "loss": 1.6395, "step": 869 }, { "epoch": 0.1398432790837854, "grad_norm": 0.7178137302398682, "learning_rate": 0.0001989987634369414, "loss": 1.632, "step": 870 }, { "epoch": 0.14000401848503113, "grad_norm": 0.7159430384635925, "learning_rate": 0.00019899638211930774, "loss": 1.5624, "step": 871 }, { "epoch": 0.1401647578862769, "grad_norm": 0.687585175037384, "learning_rate": 0.00019899399798748352, "loss": 1.3897, "step": 872 }, { "epoch": 0.1403254972875226, "grad_norm": 0.6372073888778687, "learning_rate": 0.00019899161104153652, "loss": 1.5096, "step": 873 }, { "epoch": 0.14048623668876833, "grad_norm": 0.6880161166191101, "learning_rate": 0.0001989892212815346, "loss": 1.5196, "step": 874 }, { "epoch": 0.14064697609001406, "grad_norm": 0.6887964606285095, "learning_rate": 0.00019898682870754574, "loss": 1.5457, "step": 875 }, { "epoch": 0.14080771549125978, "grad_norm": 0.7862657904624939, "learning_rate": 0.00019898443331963785, "loss": 1.7016, "step": 876 }, { "epoch": 0.14096845489250553, "grad_norm": 0.6612958312034607, "learning_rate": 0.0001989820351178791, "loss": 1.6106, "step": 877 }, { "epoch": 0.14112919429375126, "grad_norm": 0.625680148601532, "learning_rate": 0.00019897963410233767, "loss": 1.3734, "step": 878 }, { "epoch": 0.14128993369499698, "grad_norm": 0.7089071273803711, "learning_rate": 0.00019897723027308177, "loss": 1.634, "step": 879 }, { "epoch": 0.1414506730962427, "grad_norm": 0.7161985635757446, "learning_rate": 0.00019897482363017977, "loss": 1.5666, "step": 880 }, { "epoch": 0.14161141249748846, "grad_norm": 0.6604340672492981, "learning_rate": 0.00019897241417370006, "loss": 1.5332, "step": 881 }, { "epoch": 0.14177215189873418, "grad_norm": 0.6513153314590454, "learning_rate": 0.00019897000190371118, "loss": 1.4327, "step": 882 }, { "epoch": 0.1419328912999799, "grad_norm": 0.6528116464614868, "learning_rate": 0.00019896758682028164, "loss": 1.3382, "step": 883 }, { "epoch": 0.14209363070122563, "grad_norm": 0.6076498031616211, "learning_rate": 0.0001989651689234801, "loss": 1.3119, "step": 884 }, { "epoch": 0.14225437010247136, "grad_norm": 0.6551336646080017, "learning_rate": 0.00019896274821337538, "loss": 1.3221, "step": 885 }, { "epoch": 0.1424151095037171, "grad_norm": 0.6853318214416504, "learning_rate": 0.00019896032469003618, "loss": 1.5869, "step": 886 }, { "epoch": 0.14257584890496283, "grad_norm": 0.7239701747894287, "learning_rate": 0.0001989578983535315, "loss": 1.6037, "step": 887 }, { "epoch": 0.14273658830620856, "grad_norm": 0.6677623391151428, "learning_rate": 0.0001989554692039302, "loss": 1.3876, "step": 888 }, { "epoch": 0.14289732770745428, "grad_norm": 0.6452102661132812, "learning_rate": 0.0001989530372413014, "loss": 1.6994, "step": 889 }, { "epoch": 0.14305806710870003, "grad_norm": 0.6314398646354675, "learning_rate": 0.00019895060246571426, "loss": 1.5786, "step": 890 }, { "epoch": 0.14321880650994576, "grad_norm": 0.6119491457939148, "learning_rate": 0.00019894816487723795, "loss": 1.2975, "step": 891 }, { "epoch": 0.14337954591119148, "grad_norm": 0.6476348638534546, "learning_rate": 0.00019894572447594176, "loss": 1.475, "step": 892 }, { "epoch": 0.1435402853124372, "grad_norm": 0.6522913575172424, "learning_rate": 0.0001989432812618951, "loss": 1.5774, "step": 893 }, { "epoch": 0.14370102471368293, "grad_norm": 0.6449733972549438, "learning_rate": 0.0001989408352351674, "loss": 1.5462, "step": 894 }, { "epoch": 0.14386176411492868, "grad_norm": 1.6922575235366821, "learning_rate": 0.00019893838639582821, "loss": 1.4942, "step": 895 }, { "epoch": 0.1440225035161744, "grad_norm": 0.6645534634590149, "learning_rate": 0.00019893593474394709, "loss": 1.3943, "step": 896 }, { "epoch": 0.14418324291742013, "grad_norm": 0.669668436050415, "learning_rate": 0.00019893348027959378, "loss": 1.5218, "step": 897 }, { "epoch": 0.14434398231866585, "grad_norm": 0.7632279396057129, "learning_rate": 0.00019893102300283806, "loss": 1.6226, "step": 898 }, { "epoch": 0.1445047217199116, "grad_norm": 0.7309955954551697, "learning_rate": 0.00019892856291374978, "loss": 1.5699, "step": 899 }, { "epoch": 0.14466546112115733, "grad_norm": 0.6028813719749451, "learning_rate": 0.00019892610001239887, "loss": 1.3368, "step": 900 }, { "epoch": 0.14482620052240305, "grad_norm": 0.76590496301651, "learning_rate": 0.00019892363429885532, "loss": 1.7323, "step": 901 }, { "epoch": 0.14498693992364878, "grad_norm": 0.6376199722290039, "learning_rate": 0.00019892116577318924, "loss": 1.3932, "step": 902 }, { "epoch": 0.1451476793248945, "grad_norm": 0.658080518245697, "learning_rate": 0.0001989186944354708, "loss": 1.5432, "step": 903 }, { "epoch": 0.14530841872614025, "grad_norm": 0.6664960384368896, "learning_rate": 0.00019891622028577025, "loss": 1.5179, "step": 904 }, { "epoch": 0.14546915812738598, "grad_norm": 0.6579408645629883, "learning_rate": 0.00019891374332415797, "loss": 1.659, "step": 905 }, { "epoch": 0.1456298975286317, "grad_norm": 0.7166823744773865, "learning_rate": 0.0001989112635507043, "loss": 1.7993, "step": 906 }, { "epoch": 0.14579063692987743, "grad_norm": 0.6960421800613403, "learning_rate": 0.0001989087809654798, "loss": 1.6346, "step": 907 }, { "epoch": 0.14595137633112318, "grad_norm": 0.6491423845291138, "learning_rate": 0.00019890629556855498, "loss": 1.4787, "step": 908 }, { "epoch": 0.1461121157323689, "grad_norm": 0.7200158834457397, "learning_rate": 0.00019890380736000053, "loss": 1.5186, "step": 909 }, { "epoch": 0.14627285513361463, "grad_norm": 0.6660404801368713, "learning_rate": 0.00019890131633988717, "loss": 1.4879, "step": 910 }, { "epoch": 0.14643359453486035, "grad_norm": 0.6411269307136536, "learning_rate": 0.00019889882250828574, "loss": 1.6159, "step": 911 }, { "epoch": 0.14659433393610607, "grad_norm": 0.5771762728691101, "learning_rate": 0.0001988963258652671, "loss": 1.2783, "step": 912 }, { "epoch": 0.14675507333735183, "grad_norm": 0.5841484665870667, "learning_rate": 0.00019889382641090223, "loss": 1.1437, "step": 913 }, { "epoch": 0.14691581273859755, "grad_norm": 0.7764959931373596, "learning_rate": 0.00019889132414526218, "loss": 1.7029, "step": 914 }, { "epoch": 0.14707655213984328, "grad_norm": 0.6933385133743286, "learning_rate": 0.00019888881906841808, "loss": 1.7896, "step": 915 }, { "epoch": 0.147237291541089, "grad_norm": 0.6109792590141296, "learning_rate": 0.00019888631118044118, "loss": 1.2823, "step": 916 }, { "epoch": 0.14739803094233475, "grad_norm": 0.7449894547462463, "learning_rate": 0.00019888380048140272, "loss": 1.604, "step": 917 }, { "epoch": 0.14755877034358048, "grad_norm": 0.6307390332221985, "learning_rate": 0.00019888128697137408, "loss": 1.6751, "step": 918 }, { "epoch": 0.1477195097448262, "grad_norm": 0.6786504983901978, "learning_rate": 0.00019887877065042676, "loss": 1.5239, "step": 919 }, { "epoch": 0.14788024914607192, "grad_norm": 0.6443576812744141, "learning_rate": 0.00019887625151863224, "loss": 1.2494, "step": 920 }, { "epoch": 0.14804098854731765, "grad_norm": 0.7144865393638611, "learning_rate": 0.00019887372957606218, "loss": 1.4065, "step": 921 }, { "epoch": 0.1482017279485634, "grad_norm": 0.8856385946273804, "learning_rate": 0.00019887120482278822, "loss": 1.8236, "step": 922 }, { "epoch": 0.14836246734980912, "grad_norm": 0.6051225066184998, "learning_rate": 0.00019886867725888216, "loss": 1.2579, "step": 923 }, { "epoch": 0.14852320675105485, "grad_norm": 0.6743772625923157, "learning_rate": 0.00019886614688441586, "loss": 1.5979, "step": 924 }, { "epoch": 0.14868394615230057, "grad_norm": 0.6850600242614746, "learning_rate": 0.0001988636136994612, "loss": 1.1798, "step": 925 }, { "epoch": 0.14884468555354632, "grad_norm": 0.7614739537239075, "learning_rate": 0.00019886107770409025, "loss": 1.5913, "step": 926 }, { "epoch": 0.14900542495479205, "grad_norm": 0.6173290014266968, "learning_rate": 0.0001988585388983751, "loss": 1.3682, "step": 927 }, { "epoch": 0.14916616435603777, "grad_norm": 0.6632800102233887, "learning_rate": 0.0001988559972823879, "loss": 1.5841, "step": 928 }, { "epoch": 0.1493269037572835, "grad_norm": 0.7230693697929382, "learning_rate": 0.00019885345285620084, "loss": 1.7003, "step": 929 }, { "epoch": 0.14948764315852922, "grad_norm": 0.6311423182487488, "learning_rate": 0.00019885090561988638, "loss": 1.362, "step": 930 }, { "epoch": 0.14964838255977497, "grad_norm": 0.7760431170463562, "learning_rate": 0.00019884835557351682, "loss": 1.7574, "step": 931 }, { "epoch": 0.1498091219610207, "grad_norm": 0.6092554926872253, "learning_rate": 0.00019884580271716472, "loss": 1.5864, "step": 932 }, { "epoch": 0.14996986136226642, "grad_norm": 0.649867057800293, "learning_rate": 0.00019884324705090264, "loss": 1.4144, "step": 933 }, { "epoch": 0.15013060076351215, "grad_norm": 0.7480477690696716, "learning_rate": 0.00019884068857480317, "loss": 1.6461, "step": 934 }, { "epoch": 0.1502913401647579, "grad_norm": 0.6854903101921082, "learning_rate": 0.00019883812728893914, "loss": 1.8292, "step": 935 }, { "epoch": 0.15045207956600362, "grad_norm": 0.5948757529258728, "learning_rate": 0.00019883556319338328, "loss": 1.2011, "step": 936 }, { "epoch": 0.15061281896724935, "grad_norm": 0.7343953847885132, "learning_rate": 0.0001988329962882085, "loss": 1.5302, "step": 937 }, { "epoch": 0.15077355836849507, "grad_norm": 0.6474173665046692, "learning_rate": 0.0001988304265734878, "loss": 1.5246, "step": 938 }, { "epoch": 0.1509342977697408, "grad_norm": 0.6967902779579163, "learning_rate": 0.00019882785404929418, "loss": 1.4229, "step": 939 }, { "epoch": 0.15109503717098655, "grad_norm": 0.7217636704444885, "learning_rate": 0.0001988252787157008, "loss": 1.8352, "step": 940 }, { "epoch": 0.15125577657223227, "grad_norm": 0.6979879140853882, "learning_rate": 0.00019882270057278086, "loss": 1.6971, "step": 941 }, { "epoch": 0.151416515973478, "grad_norm": 0.621940016746521, "learning_rate": 0.0001988201196206077, "loss": 1.4267, "step": 942 }, { "epoch": 0.15157725537472372, "grad_norm": 0.6317644119262695, "learning_rate": 0.0001988175358592546, "loss": 1.3395, "step": 943 }, { "epoch": 0.15173799477596947, "grad_norm": 0.5996818542480469, "learning_rate": 0.00019881494928879504, "loss": 1.217, "step": 944 }, { "epoch": 0.1518987341772152, "grad_norm": 0.6294602751731873, "learning_rate": 0.0001988123599093026, "loss": 1.4057, "step": 945 }, { "epoch": 0.15205947357846092, "grad_norm": 0.6475471258163452, "learning_rate": 0.00019880976772085083, "loss": 1.4866, "step": 946 }, { "epoch": 0.15222021297970664, "grad_norm": 0.6705943942070007, "learning_rate": 0.00019880717272351345, "loss": 1.5145, "step": 947 }, { "epoch": 0.1523809523809524, "grad_norm": 0.6845306754112244, "learning_rate": 0.00019880457491736422, "loss": 1.3694, "step": 948 }, { "epoch": 0.15254169178219812, "grad_norm": 0.7190065979957581, "learning_rate": 0.000198801974302477, "loss": 1.6911, "step": 949 }, { "epoch": 0.15270243118344384, "grad_norm": 0.6512144207954407, "learning_rate": 0.00019879937087892565, "loss": 1.4382, "step": 950 }, { "epoch": 0.15286317058468957, "grad_norm": 0.6675736308097839, "learning_rate": 0.00019879676464678428, "loss": 1.5352, "step": 951 }, { "epoch": 0.1530239099859353, "grad_norm": 0.7176043391227722, "learning_rate": 0.00019879415560612694, "loss": 1.6525, "step": 952 }, { "epoch": 0.15318464938718104, "grad_norm": 0.7029895186424255, "learning_rate": 0.00019879154375702774, "loss": 1.3181, "step": 953 }, { "epoch": 0.15334538878842677, "grad_norm": 0.6634067893028259, "learning_rate": 0.00019878892909956102, "loss": 1.403, "step": 954 }, { "epoch": 0.1535061281896725, "grad_norm": 0.679446280002594, "learning_rate": 0.00019878631163380104, "loss": 1.4203, "step": 955 }, { "epoch": 0.15366686759091822, "grad_norm": 4.015530109405518, "learning_rate": 0.00019878369135982224, "loss": 1.6058, "step": 956 }, { "epoch": 0.15382760699216397, "grad_norm": 0.7728213667869568, "learning_rate": 0.00019878106827769912, "loss": 1.7615, "step": 957 }, { "epoch": 0.1539883463934097, "grad_norm": 0.6806004643440247, "learning_rate": 0.00019877844238750617, "loss": 1.6045, "step": 958 }, { "epoch": 0.15414908579465542, "grad_norm": 0.7532920837402344, "learning_rate": 0.00019877581368931812, "loss": 1.4933, "step": 959 }, { "epoch": 0.15430982519590114, "grad_norm": 0.7204965949058533, "learning_rate": 0.0001987731821832097, "loss": 1.3613, "step": 960 }, { "epoch": 0.15447056459714686, "grad_norm": 0.6429884433746338, "learning_rate": 0.00019877054786925563, "loss": 1.6066, "step": 961 }, { "epoch": 0.15463130399839262, "grad_norm": 0.6855032444000244, "learning_rate": 0.00019876791074753086, "loss": 1.6921, "step": 962 }, { "epoch": 0.15479204339963834, "grad_norm": 0.6783963441848755, "learning_rate": 0.0001987652708181104, "loss": 1.4775, "step": 963 }, { "epoch": 0.15495278280088406, "grad_norm": 0.6645909547805786, "learning_rate": 0.00019876262808106917, "loss": 1.3109, "step": 964 }, { "epoch": 0.1551135222021298, "grad_norm": 0.5964141488075256, "learning_rate": 0.00019875998253648242, "loss": 1.2871, "step": 965 }, { "epoch": 0.15527426160337554, "grad_norm": 0.639638364315033, "learning_rate": 0.00019875733418442528, "loss": 1.5462, "step": 966 }, { "epoch": 0.15543500100462126, "grad_norm": 0.7185594439506531, "learning_rate": 0.00019875468302497308, "loss": 1.5771, "step": 967 }, { "epoch": 0.155595740405867, "grad_norm": 0.6564749479293823, "learning_rate": 0.00019875202905820114, "loss": 1.3989, "step": 968 }, { "epoch": 0.1557564798071127, "grad_norm": 0.8153662085533142, "learning_rate": 0.00019874937228418494, "loss": 1.7867, "step": 969 }, { "epoch": 0.15591721920835844, "grad_norm": 0.7256589531898499, "learning_rate": 0.000198746712703, "loss": 1.7138, "step": 970 }, { "epoch": 0.1560779586096042, "grad_norm": 0.6659133434295654, "learning_rate": 0.0001987440503147219, "loss": 1.4592, "step": 971 }, { "epoch": 0.1562386980108499, "grad_norm": 0.66960209608078, "learning_rate": 0.00019874138511942634, "loss": 1.5041, "step": 972 }, { "epoch": 0.15639943741209564, "grad_norm": 0.7306337356567383, "learning_rate": 0.0001987387171171891, "loss": 1.4215, "step": 973 }, { "epoch": 0.15656017681334136, "grad_norm": 0.6185557842254639, "learning_rate": 0.00019873604630808603, "loss": 1.4233, "step": 974 }, { "epoch": 0.1567209162145871, "grad_norm": 0.7348726987838745, "learning_rate": 0.00019873337269219298, "loss": 1.6125, "step": 975 }, { "epoch": 0.15688165561583284, "grad_norm": 0.6273520588874817, "learning_rate": 0.00019873069626958606, "loss": 1.322, "step": 976 }, { "epoch": 0.15704239501707856, "grad_norm": 0.6084796786308289, "learning_rate": 0.00019872801704034126, "loss": 1.5506, "step": 977 }, { "epoch": 0.1572031344183243, "grad_norm": 0.7158278226852417, "learning_rate": 0.0001987253350045348, "loss": 1.4674, "step": 978 }, { "epoch": 0.15736387381957, "grad_norm": 0.7221397161483765, "learning_rate": 0.00019872265016224292, "loss": 1.5394, "step": 979 }, { "epoch": 0.15752461322081576, "grad_norm": 0.7340632677078247, "learning_rate": 0.0001987199625135419, "loss": 1.2534, "step": 980 }, { "epoch": 0.1576853526220615, "grad_norm": 0.7043496370315552, "learning_rate": 0.00019871727205850818, "loss": 1.5201, "step": 981 }, { "epoch": 0.1578460920233072, "grad_norm": 0.7329210638999939, "learning_rate": 0.00019871457879721825, "loss": 1.6347, "step": 982 }, { "epoch": 0.15800683142455293, "grad_norm": 0.6648560166358948, "learning_rate": 0.00019871188272974863, "loss": 1.5331, "step": 983 }, { "epoch": 0.1581675708257987, "grad_norm": 0.7216227054595947, "learning_rate": 0.000198709183856176, "loss": 1.6288, "step": 984 }, { "epoch": 0.1583283102270444, "grad_norm": 0.7017501592636108, "learning_rate": 0.00019870648217657709, "loss": 1.4392, "step": 985 }, { "epoch": 0.15848904962829014, "grad_norm": 0.6124290823936462, "learning_rate": 0.00019870377769102862, "loss": 1.4824, "step": 986 }, { "epoch": 0.15864978902953586, "grad_norm": 0.7193689942359924, "learning_rate": 0.0001987010703996076, "loss": 1.5512, "step": 987 }, { "epoch": 0.15881052843078158, "grad_norm": 0.6952188611030579, "learning_rate": 0.00019869836030239087, "loss": 1.4627, "step": 988 }, { "epoch": 0.15897126783202734, "grad_norm": 0.6692978143692017, "learning_rate": 0.00019869564739945553, "loss": 1.5379, "step": 989 }, { "epoch": 0.15913200723327306, "grad_norm": 0.7201446890830994, "learning_rate": 0.0001986929316908787, "loss": 1.5616, "step": 990 }, { "epoch": 0.15929274663451878, "grad_norm": 0.7659215331077576, "learning_rate": 0.00019869021317673758, "loss": 1.6914, "step": 991 }, { "epoch": 0.1594534860357645, "grad_norm": 0.7293421030044556, "learning_rate": 0.00019868749185710942, "loss": 1.6955, "step": 992 }, { "epoch": 0.15961422543701026, "grad_norm": 0.7558579444885254, "learning_rate": 0.0001986847677320716, "loss": 1.6968, "step": 993 }, { "epoch": 0.15977496483825598, "grad_norm": 0.6371296048164368, "learning_rate": 0.0001986820408017016, "loss": 1.2852, "step": 994 }, { "epoch": 0.1599357042395017, "grad_norm": 0.6267312169075012, "learning_rate": 0.00019867931106607687, "loss": 1.5069, "step": 995 }, { "epoch": 0.16009644364074743, "grad_norm": 0.6306993961334229, "learning_rate": 0.00019867657852527503, "loss": 1.4435, "step": 996 }, { "epoch": 0.16025718304199316, "grad_norm": 0.6783279776573181, "learning_rate": 0.00019867384317937377, "loss": 1.5285, "step": 997 }, { "epoch": 0.1604179224432389, "grad_norm": 0.6650464534759521, "learning_rate": 0.00019867110502845087, "loss": 1.5271, "step": 998 }, { "epoch": 0.16057866184448463, "grad_norm": 0.7117879986763, "learning_rate": 0.0001986683640725841, "loss": 1.5345, "step": 999 }, { "epoch": 0.16073940124573036, "grad_norm": 0.6345593929290771, "learning_rate": 0.00019866562031185147, "loss": 1.4072, "step": 1000 }, { "epoch": 0.16073940124573036, "eval_loss": 1.551641821861267, "eval_runtime": 46.2232, "eval_samples_per_second": 5.43, "eval_steps_per_second": 2.726, "step": 1000 }, { "epoch": 0.16090014064697608, "grad_norm": 0.7225771546363831, "learning_rate": 0.0001986628737463309, "loss": 1.8378, "step": 1001 }, { "epoch": 0.16106088004822183, "grad_norm": 0.7000516057014465, "learning_rate": 0.0001986601243761005, "loss": 1.5117, "step": 1002 }, { "epoch": 0.16122161944946756, "grad_norm": 0.6626244187355042, "learning_rate": 0.00019865737220123842, "loss": 1.3532, "step": 1003 }, { "epoch": 0.16138235885071328, "grad_norm": 0.5916775465011597, "learning_rate": 0.0001986546172218229, "loss": 1.4413, "step": 1004 }, { "epoch": 0.161543098251959, "grad_norm": 0.9063904881477356, "learning_rate": 0.00019865185943793225, "loss": 1.749, "step": 1005 }, { "epoch": 0.16170383765320473, "grad_norm": 0.7838590145111084, "learning_rate": 0.00019864909884964487, "loss": 1.5255, "step": 1006 }, { "epoch": 0.16186457705445048, "grad_norm": 0.7252520322799683, "learning_rate": 0.00019864633545703923, "loss": 1.4801, "step": 1007 }, { "epoch": 0.1620253164556962, "grad_norm": 0.6418090462684631, "learning_rate": 0.00019864356926019392, "loss": 1.5304, "step": 1008 }, { "epoch": 0.16218605585694193, "grad_norm": 0.6760355830192566, "learning_rate": 0.0001986408002591875, "loss": 1.5612, "step": 1009 }, { "epoch": 0.16234679525818765, "grad_norm": 0.6813499331474304, "learning_rate": 0.00019863802845409878, "loss": 1.4838, "step": 1010 }, { "epoch": 0.1625075346594334, "grad_norm": 0.6252709031105042, "learning_rate": 0.00019863525384500652, "loss": 1.5444, "step": 1011 }, { "epoch": 0.16266827406067913, "grad_norm": 0.8622866868972778, "learning_rate": 0.00019863247643198953, "loss": 1.5366, "step": 1012 }, { "epoch": 0.16282901346192485, "grad_norm": 0.7004911303520203, "learning_rate": 0.00019862969621512684, "loss": 1.4161, "step": 1013 }, { "epoch": 0.16298975286317058, "grad_norm": 0.5497576594352722, "learning_rate": 0.00019862691319449746, "loss": 1.2008, "step": 1014 }, { "epoch": 0.1631504922644163, "grad_norm": 0.6949265003204346, "learning_rate": 0.0001986241273701805, "loss": 1.4692, "step": 1015 }, { "epoch": 0.16331123166566205, "grad_norm": 0.6392108201980591, "learning_rate": 0.00019862133874225517, "loss": 1.4214, "step": 1016 }, { "epoch": 0.16347197106690778, "grad_norm": 0.6645724177360535, "learning_rate": 0.0001986185473108007, "loss": 1.4427, "step": 1017 }, { "epoch": 0.1636327104681535, "grad_norm": 0.7121353149414062, "learning_rate": 0.00019861575307589648, "loss": 1.4942, "step": 1018 }, { "epoch": 0.16379344986939923, "grad_norm": 0.7042354941368103, "learning_rate": 0.00019861295603762195, "loss": 1.4549, "step": 1019 }, { "epoch": 0.16395418927064498, "grad_norm": 0.7140592336654663, "learning_rate": 0.00019861015619605657, "loss": 1.5206, "step": 1020 }, { "epoch": 0.1641149286718907, "grad_norm": 0.6991243958473206, "learning_rate": 0.00019860735355128003, "loss": 1.4777, "step": 1021 }, { "epoch": 0.16427566807313643, "grad_norm": 0.7442986369132996, "learning_rate": 0.0001986045481033719, "loss": 1.7236, "step": 1022 }, { "epoch": 0.16443640747438215, "grad_norm": 0.7165995836257935, "learning_rate": 0.00019860173985241197, "loss": 1.5731, "step": 1023 }, { "epoch": 0.16459714687562788, "grad_norm": 0.6917805671691895, "learning_rate": 0.00019859892879848007, "loss": 1.3567, "step": 1024 }, { "epoch": 0.16475788627687363, "grad_norm": 0.6753543615341187, "learning_rate": 0.00019859611494165612, "loss": 1.574, "step": 1025 }, { "epoch": 0.16491862567811935, "grad_norm": 0.6695388555526733, "learning_rate": 0.0001985932982820201, "loss": 1.6975, "step": 1026 }, { "epoch": 0.16507936507936508, "grad_norm": 0.6854733228683472, "learning_rate": 0.00019859047881965207, "loss": 1.5108, "step": 1027 }, { "epoch": 0.1652401044806108, "grad_norm": 0.7704520225524902, "learning_rate": 0.0001985876565546322, "loss": 1.7854, "step": 1028 }, { "epoch": 0.16540084388185655, "grad_norm": 0.6610824465751648, "learning_rate": 0.0001985848314870407, "loss": 1.5834, "step": 1029 }, { "epoch": 0.16556158328310228, "grad_norm": 0.6934378743171692, "learning_rate": 0.0001985820036169579, "loss": 1.5005, "step": 1030 }, { "epoch": 0.165722322684348, "grad_norm": 0.6758600473403931, "learning_rate": 0.0001985791729444642, "loss": 1.3293, "step": 1031 }, { "epoch": 0.16588306208559372, "grad_norm": 0.7267760634422302, "learning_rate": 0.00019857633946964, "loss": 1.5037, "step": 1032 }, { "epoch": 0.16604380148683945, "grad_norm": 0.7609508633613586, "learning_rate": 0.00019857350319256591, "loss": 1.5267, "step": 1033 }, { "epoch": 0.1662045408880852, "grad_norm": 0.6085457801818848, "learning_rate": 0.00019857066411332254, "loss": 1.3881, "step": 1034 }, { "epoch": 0.16636528028933092, "grad_norm": 0.6737141013145447, "learning_rate": 0.0001985678222319906, "loss": 1.3852, "step": 1035 }, { "epoch": 0.16652601969057665, "grad_norm": 0.7095346450805664, "learning_rate": 0.0001985649775486509, "loss": 1.8289, "step": 1036 }, { "epoch": 0.16668675909182237, "grad_norm": 0.6328777074813843, "learning_rate": 0.00019856213006338424, "loss": 1.5646, "step": 1037 }, { "epoch": 0.16684749849306812, "grad_norm": 0.6994189620018005, "learning_rate": 0.0001985592797762716, "loss": 1.5283, "step": 1038 }, { "epoch": 0.16700823789431385, "grad_norm": 0.7421449422836304, "learning_rate": 0.00019855642668739404, "loss": 1.4818, "step": 1039 }, { "epoch": 0.16716897729555957, "grad_norm": 0.7323822379112244, "learning_rate": 0.00019855357079683265, "loss": 1.3528, "step": 1040 }, { "epoch": 0.1673297166968053, "grad_norm": 0.9041878581047058, "learning_rate": 0.00019855071210466858, "loss": 1.6483, "step": 1041 }, { "epoch": 0.16749045609805102, "grad_norm": 0.6512113809585571, "learning_rate": 0.0001985478506109831, "loss": 1.449, "step": 1042 }, { "epoch": 0.16765119549929677, "grad_norm": 0.659520149230957, "learning_rate": 0.0001985449863158576, "loss": 1.5318, "step": 1043 }, { "epoch": 0.1678119349005425, "grad_norm": 0.7753905653953552, "learning_rate": 0.00019854211921937345, "loss": 1.5051, "step": 1044 }, { "epoch": 0.16797267430178822, "grad_norm": 0.7129497528076172, "learning_rate": 0.00019853924932161217, "loss": 1.3628, "step": 1045 }, { "epoch": 0.16813341370303395, "grad_norm": 0.7877184748649597, "learning_rate": 0.0001985363766226554, "loss": 1.7946, "step": 1046 }, { "epoch": 0.1682941531042797, "grad_norm": 0.7665697932243347, "learning_rate": 0.00019853350112258473, "loss": 1.8454, "step": 1047 }, { "epoch": 0.16845489250552542, "grad_norm": 0.687444806098938, "learning_rate": 0.00019853062282148193, "loss": 1.5665, "step": 1048 }, { "epoch": 0.16861563190677115, "grad_norm": 0.7188471555709839, "learning_rate": 0.00019852774171942881, "loss": 1.4008, "step": 1049 }, { "epoch": 0.16877637130801687, "grad_norm": 0.6884022951126099, "learning_rate": 0.0001985248578165073, "loss": 1.7111, "step": 1050 }, { "epoch": 0.1689371107092626, "grad_norm": 0.7210416197776794, "learning_rate": 0.00019852197111279937, "loss": 1.4486, "step": 1051 }, { "epoch": 0.16909785011050835, "grad_norm": 0.71469646692276, "learning_rate": 0.00019851908160838704, "loss": 1.5063, "step": 1052 }, { "epoch": 0.16925858951175407, "grad_norm": 0.7382471561431885, "learning_rate": 0.00019851618930335246, "loss": 1.8837, "step": 1053 }, { "epoch": 0.1694193289129998, "grad_norm": 0.684877872467041, "learning_rate": 0.00019851329419777794, "loss": 1.5668, "step": 1054 }, { "epoch": 0.16958006831424552, "grad_norm": 0.7576097249984741, "learning_rate": 0.00019851039629174567, "loss": 1.6739, "step": 1055 }, { "epoch": 0.16974080771549127, "grad_norm": 0.6636502742767334, "learning_rate": 0.0001985074955853381, "loss": 1.3393, "step": 1056 }, { "epoch": 0.169901547116737, "grad_norm": 0.6560344099998474, "learning_rate": 0.00019850459207863765, "loss": 1.3216, "step": 1057 }, { "epoch": 0.17006228651798272, "grad_norm": 0.729120135307312, "learning_rate": 0.00019850168577172688, "loss": 1.5237, "step": 1058 }, { "epoch": 0.17022302591922844, "grad_norm": 0.7053074836730957, "learning_rate": 0.00019849877666468837, "loss": 1.6883, "step": 1059 }, { "epoch": 0.17038376532047417, "grad_norm": 0.6908878684043884, "learning_rate": 0.00019849586475760485, "loss": 1.5082, "step": 1060 }, { "epoch": 0.17054450472171992, "grad_norm": 0.7621721625328064, "learning_rate": 0.00019849295005055915, "loss": 1.6633, "step": 1061 }, { "epoch": 0.17070524412296564, "grad_norm": 0.7849704027175903, "learning_rate": 0.00019849003254363403, "loss": 1.6213, "step": 1062 }, { "epoch": 0.17086598352421137, "grad_norm": 0.6999383568763733, "learning_rate": 0.00019848711223691246, "loss": 1.6254, "step": 1063 }, { "epoch": 0.1710267229254571, "grad_norm": 0.7956300973892212, "learning_rate": 0.00019848418913047747, "loss": 1.7525, "step": 1064 }, { "epoch": 0.17118746232670284, "grad_norm": 0.770805299282074, "learning_rate": 0.00019848126322441213, "loss": 1.593, "step": 1065 }, { "epoch": 0.17134820172794857, "grad_norm": 0.7180036902427673, "learning_rate": 0.00019847833451879966, "loss": 1.8319, "step": 1066 }, { "epoch": 0.1715089411291943, "grad_norm": 0.6324453949928284, "learning_rate": 0.0001984754030137233, "loss": 1.5374, "step": 1067 }, { "epoch": 0.17166968053044002, "grad_norm": 0.7412252426147461, "learning_rate": 0.00019847246870926638, "loss": 1.773, "step": 1068 }, { "epoch": 0.17183041993168577, "grad_norm": 0.7266322374343872, "learning_rate": 0.00019846953160551224, "loss": 1.6866, "step": 1069 }, { "epoch": 0.1719911593329315, "grad_norm": 0.5894017815589905, "learning_rate": 0.00019846659170254453, "loss": 1.3229, "step": 1070 }, { "epoch": 0.17215189873417722, "grad_norm": 0.6515121459960938, "learning_rate": 0.00019846364900044666, "loss": 1.6382, "step": 1071 }, { "epoch": 0.17231263813542294, "grad_norm": 0.6758244633674622, "learning_rate": 0.00019846070349930243, "loss": 1.4756, "step": 1072 }, { "epoch": 0.17247337753666866, "grad_norm": 0.6360596418380737, "learning_rate": 0.00019845775519919542, "loss": 1.3721, "step": 1073 }, { "epoch": 0.17263411693791442, "grad_norm": 0.6617791056632996, "learning_rate": 0.00019845480410020957, "loss": 1.6511, "step": 1074 }, { "epoch": 0.17279485633916014, "grad_norm": 0.8265364766120911, "learning_rate": 0.00019845185020242874, "loss": 1.7422, "step": 1075 }, { "epoch": 0.17295559574040587, "grad_norm": 0.6883860230445862, "learning_rate": 0.00019844889350593685, "loss": 1.4726, "step": 1076 }, { "epoch": 0.1731163351416516, "grad_norm": 0.6648023724555969, "learning_rate": 0.00019844593401081802, "loss": 1.4124, "step": 1077 }, { "epoch": 0.17327707454289734, "grad_norm": 0.7084590196609497, "learning_rate": 0.00019844297171715632, "loss": 1.6625, "step": 1078 }, { "epoch": 0.17343781394414307, "grad_norm": 0.6979174613952637, "learning_rate": 0.000198440006625036, "loss": 1.5694, "step": 1079 }, { "epoch": 0.1735985533453888, "grad_norm": 0.7147260904312134, "learning_rate": 0.0001984370387345413, "loss": 1.6325, "step": 1080 }, { "epoch": 0.1737592927466345, "grad_norm": 0.7963399887084961, "learning_rate": 0.00019843406804575663, "loss": 1.7035, "step": 1081 }, { "epoch": 0.17392003214788024, "grad_norm": 0.6787152886390686, "learning_rate": 0.00019843109455876644, "loss": 1.6148, "step": 1082 }, { "epoch": 0.174080771549126, "grad_norm": 0.7645273208618164, "learning_rate": 0.00019842811827365528, "loss": 1.4212, "step": 1083 }, { "epoch": 0.17424151095037171, "grad_norm": 0.678469181060791, "learning_rate": 0.0001984251391905077, "loss": 1.4622, "step": 1084 }, { "epoch": 0.17440225035161744, "grad_norm": 0.6680808663368225, "learning_rate": 0.00019842215730940844, "loss": 1.6335, "step": 1085 }, { "epoch": 0.17456298975286316, "grad_norm": 0.7212764620780945, "learning_rate": 0.0001984191726304422, "loss": 1.6698, "step": 1086 }, { "epoch": 0.17472372915410891, "grad_norm": 0.7381146550178528, "learning_rate": 0.00019841618515369392, "loss": 1.5218, "step": 1087 }, { "epoch": 0.17488446855535464, "grad_norm": 0.5814352035522461, "learning_rate": 0.00019841319487924844, "loss": 1.2711, "step": 1088 }, { "epoch": 0.17504520795660036, "grad_norm": 0.7143793106079102, "learning_rate": 0.00019841020180719077, "loss": 1.5597, "step": 1089 }, { "epoch": 0.1752059473578461, "grad_norm": 0.6894948482513428, "learning_rate": 0.00019840720593760605, "loss": 1.5295, "step": 1090 }, { "epoch": 0.1753666867590918, "grad_norm": 0.8058619499206543, "learning_rate": 0.00019840420727057942, "loss": 1.5008, "step": 1091 }, { "epoch": 0.17552742616033756, "grad_norm": 0.7381027936935425, "learning_rate": 0.0001984012058061961, "loss": 1.5549, "step": 1092 }, { "epoch": 0.1756881655615833, "grad_norm": 0.7720760107040405, "learning_rate": 0.00019839820154454144, "loss": 1.5733, "step": 1093 }, { "epoch": 0.175848904962829, "grad_norm": 0.6476446390151978, "learning_rate": 0.00019839519448570087, "loss": 1.5706, "step": 1094 }, { "epoch": 0.17600964436407474, "grad_norm": 0.6488350033760071, "learning_rate": 0.0001983921846297598, "loss": 1.4877, "step": 1095 }, { "epoch": 0.1761703837653205, "grad_norm": 0.7370501160621643, "learning_rate": 0.00019838917197680385, "loss": 1.7007, "step": 1096 }, { "epoch": 0.1763311231665662, "grad_norm": 0.6938601136207581, "learning_rate": 0.00019838615652691865, "loss": 1.5086, "step": 1097 }, { "epoch": 0.17649186256781194, "grad_norm": 0.6535276174545288, "learning_rate": 0.0001983831382801899, "loss": 1.3049, "step": 1098 }, { "epoch": 0.17665260196905766, "grad_norm": 0.7703623175621033, "learning_rate": 0.0001983801172367034, "loss": 1.6853, "step": 1099 }, { "epoch": 0.17681334137030338, "grad_norm": 0.6503168940544128, "learning_rate": 0.00019837709339654507, "loss": 1.519, "step": 1100 }, { "epoch": 0.17697408077154914, "grad_norm": 0.6881818175315857, "learning_rate": 0.0001983740667598008, "loss": 1.35, "step": 1101 }, { "epoch": 0.17713482017279486, "grad_norm": 0.7774071097373962, "learning_rate": 0.0001983710373265567, "loss": 1.7335, "step": 1102 }, { "epoch": 0.17729555957404058, "grad_norm": 0.6751627326011658, "learning_rate": 0.00019836800509689888, "loss": 1.5279, "step": 1103 }, { "epoch": 0.1774562989752863, "grad_norm": 0.6278048157691956, "learning_rate": 0.0001983649700709135, "loss": 1.1899, "step": 1104 }, { "epoch": 0.17761703837653206, "grad_norm": 0.8055717945098877, "learning_rate": 0.0001983619322486868, "loss": 1.4201, "step": 1105 }, { "epoch": 0.17777777777777778, "grad_norm": 0.6947066187858582, "learning_rate": 0.00019835889163030525, "loss": 1.5135, "step": 1106 }, { "epoch": 0.1779385171790235, "grad_norm": 0.687809944152832, "learning_rate": 0.00019835584821585521, "loss": 1.5144, "step": 1107 }, { "epoch": 0.17809925658026923, "grad_norm": 0.7255254983901978, "learning_rate": 0.00019835280200542323, "loss": 1.4453, "step": 1108 }, { "epoch": 0.17825999598151496, "grad_norm": 0.6668116450309753, "learning_rate": 0.00019834975299909586, "loss": 1.4498, "step": 1109 }, { "epoch": 0.1784207353827607, "grad_norm": 0.8271561861038208, "learning_rate": 0.00019834670119695983, "loss": 1.7276, "step": 1110 }, { "epoch": 0.17858147478400643, "grad_norm": 0.7470852732658386, "learning_rate": 0.00019834364659910184, "loss": 1.6124, "step": 1111 }, { "epoch": 0.17874221418525216, "grad_norm": 0.6716722249984741, "learning_rate": 0.00019834058920560878, "loss": 1.3291, "step": 1112 }, { "epoch": 0.17890295358649788, "grad_norm": 0.6623573899269104, "learning_rate": 0.0001983375290165675, "loss": 1.4264, "step": 1113 }, { "epoch": 0.17906369298774363, "grad_norm": 0.6230528950691223, "learning_rate": 0.00019833446603206506, "loss": 1.4637, "step": 1114 }, { "epoch": 0.17922443238898936, "grad_norm": 0.728524386882782, "learning_rate": 0.0001983314002521885, "loss": 1.6136, "step": 1115 }, { "epoch": 0.17938517179023508, "grad_norm": 0.7206562161445618, "learning_rate": 0.00019832833167702496, "loss": 1.6077, "step": 1116 }, { "epoch": 0.1795459111914808, "grad_norm": 0.6815255880355835, "learning_rate": 0.00019832526030666168, "loss": 1.5656, "step": 1117 }, { "epoch": 0.17970665059272653, "grad_norm": 0.7240070700645447, "learning_rate": 0.00019832218614118595, "loss": 1.5702, "step": 1118 }, { "epoch": 0.17986738999397228, "grad_norm": 0.6560856103897095, "learning_rate": 0.00019831910918068521, "loss": 1.5864, "step": 1119 }, { "epoch": 0.180028129395218, "grad_norm": 0.6097758412361145, "learning_rate": 0.00019831602942524688, "loss": 1.5383, "step": 1120 }, { "epoch": 0.18018886879646373, "grad_norm": 0.6129916906356812, "learning_rate": 0.00019831294687495856, "loss": 1.5235, "step": 1121 }, { "epoch": 0.18034960819770945, "grad_norm": 0.7319486737251282, "learning_rate": 0.00019830986152990783, "loss": 1.613, "step": 1122 }, { "epoch": 0.1805103475989552, "grad_norm": 0.6205679774284363, "learning_rate": 0.0001983067733901824, "loss": 1.2797, "step": 1123 }, { "epoch": 0.18067108700020093, "grad_norm": 0.7159222960472107, "learning_rate": 0.0001983036824558701, "loss": 1.4252, "step": 1124 }, { "epoch": 0.18083182640144665, "grad_norm": 0.643804669380188, "learning_rate": 0.00019830058872705877, "loss": 1.4383, "step": 1125 }, { "epoch": 0.18099256580269238, "grad_norm": 0.7257186770439148, "learning_rate": 0.00019829749220383634, "loss": 1.6941, "step": 1126 }, { "epoch": 0.1811533052039381, "grad_norm": 0.6857256889343262, "learning_rate": 0.00019829439288629087, "loss": 1.495, "step": 1127 }, { "epoch": 0.18131404460518386, "grad_norm": 0.6572636365890503, "learning_rate": 0.00019829129077451044, "loss": 1.5921, "step": 1128 }, { "epoch": 0.18147478400642958, "grad_norm": 0.6171921491622925, "learning_rate": 0.00019828818586858322, "loss": 1.2297, "step": 1129 }, { "epoch": 0.1816355234076753, "grad_norm": 0.5797328352928162, "learning_rate": 0.0001982850781685975, "loss": 1.2571, "step": 1130 }, { "epoch": 0.18179626280892103, "grad_norm": 0.6669569611549377, "learning_rate": 0.0001982819676746416, "loss": 1.5222, "step": 1131 }, { "epoch": 0.18195700221016678, "grad_norm": 0.7394613027572632, "learning_rate": 0.00019827885438680404, "loss": 1.5659, "step": 1132 }, { "epoch": 0.1821177416114125, "grad_norm": 0.6819843649864197, "learning_rate": 0.00019827573830517316, "loss": 1.5453, "step": 1133 }, { "epoch": 0.18227848101265823, "grad_norm": 0.7327653169631958, "learning_rate": 0.00019827261942983765, "loss": 1.512, "step": 1134 }, { "epoch": 0.18243922041390395, "grad_norm": 0.6030712127685547, "learning_rate": 0.00019826949776088613, "loss": 1.4482, "step": 1135 }, { "epoch": 0.18259995981514968, "grad_norm": 0.624788224697113, "learning_rate": 0.0001982663732984074, "loss": 1.3857, "step": 1136 }, { "epoch": 0.18276069921639543, "grad_norm": 0.7882757782936096, "learning_rate": 0.0001982632460424902, "loss": 1.7236, "step": 1137 }, { "epoch": 0.18292143861764115, "grad_norm": 0.6593135595321655, "learning_rate": 0.00019826011599322348, "loss": 1.3985, "step": 1138 }, { "epoch": 0.18308217801888688, "grad_norm": 0.6808831095695496, "learning_rate": 0.0001982569831506962, "loss": 1.6841, "step": 1139 }, { "epoch": 0.1832429174201326, "grad_norm": 0.6774009466171265, "learning_rate": 0.0001982538475149974, "loss": 1.4947, "step": 1140 }, { "epoch": 0.18340365682137835, "grad_norm": 0.8102741837501526, "learning_rate": 0.00019825070908621622, "loss": 1.5518, "step": 1141 }, { "epoch": 0.18356439622262408, "grad_norm": 0.6931270360946655, "learning_rate": 0.00019824756786444194, "loss": 1.5949, "step": 1142 }, { "epoch": 0.1837251356238698, "grad_norm": 0.7466008067131042, "learning_rate": 0.00019824442384976383, "loss": 1.7412, "step": 1143 }, { "epoch": 0.18388587502511552, "grad_norm": 0.6525118947029114, "learning_rate": 0.0001982412770422712, "loss": 1.6031, "step": 1144 }, { "epoch": 0.18404661442636125, "grad_norm": 0.6427403092384338, "learning_rate": 0.00019823812744205354, "loss": 1.3683, "step": 1145 }, { "epoch": 0.184207353827607, "grad_norm": 0.9868360757827759, "learning_rate": 0.00019823497504920044, "loss": 1.4138, "step": 1146 }, { "epoch": 0.18436809322885273, "grad_norm": 0.7683231830596924, "learning_rate": 0.00019823181986380144, "loss": 1.6514, "step": 1147 }, { "epoch": 0.18452883263009845, "grad_norm": 0.747455358505249, "learning_rate": 0.00019822866188594628, "loss": 1.6209, "step": 1148 }, { "epoch": 0.18468957203134417, "grad_norm": 0.7727821469306946, "learning_rate": 0.00019822550111572472, "loss": 1.8704, "step": 1149 }, { "epoch": 0.18485031143258993, "grad_norm": 0.720368504524231, "learning_rate": 0.0001982223375532266, "loss": 1.5895, "step": 1150 }, { "epoch": 0.18501105083383565, "grad_norm": 0.7449154853820801, "learning_rate": 0.00019821917119854185, "loss": 1.8602, "step": 1151 }, { "epoch": 0.18517179023508137, "grad_norm": 0.7370375990867615, "learning_rate": 0.0001982160020517605, "loss": 1.9699, "step": 1152 }, { "epoch": 0.1853325296363271, "grad_norm": 0.8645741939544678, "learning_rate": 0.0001982128301129726, "loss": 1.7505, "step": 1153 }, { "epoch": 0.18549326903757282, "grad_norm": 0.7449392676353455, "learning_rate": 0.00019820965538226837, "loss": 1.3975, "step": 1154 }, { "epoch": 0.18565400843881857, "grad_norm": 0.6876583099365234, "learning_rate": 0.00019820647785973803, "loss": 1.3119, "step": 1155 }, { "epoch": 0.1858147478400643, "grad_norm": 0.698652446269989, "learning_rate": 0.00019820329754547192, "loss": 1.1955, "step": 1156 }, { "epoch": 0.18597548724131002, "grad_norm": 0.6823303699493408, "learning_rate": 0.00019820011443956044, "loss": 1.528, "step": 1157 }, { "epoch": 0.18613622664255575, "grad_norm": 0.6354515552520752, "learning_rate": 0.00019819692854209408, "loss": 1.3939, "step": 1158 }, { "epoch": 0.1862969660438015, "grad_norm": 0.6599262356758118, "learning_rate": 0.00019819373985316343, "loss": 1.4588, "step": 1159 }, { "epoch": 0.18645770544504722, "grad_norm": 0.6898670196533203, "learning_rate": 0.00019819054837285908, "loss": 1.7867, "step": 1160 }, { "epoch": 0.18661844484629295, "grad_norm": 0.7092880010604858, "learning_rate": 0.00019818735410127177, "loss": 1.588, "step": 1161 }, { "epoch": 0.18677918424753867, "grad_norm": 1.602591872215271, "learning_rate": 0.00019818415703849236, "loss": 1.573, "step": 1162 }, { "epoch": 0.1869399236487844, "grad_norm": 0.6434016227722168, "learning_rate": 0.00019818095718461166, "loss": 1.5382, "step": 1163 }, { "epoch": 0.18710066305003015, "grad_norm": 0.7199103832244873, "learning_rate": 0.0001981777545397207, "loss": 1.616, "step": 1164 }, { "epoch": 0.18726140245127587, "grad_norm": 0.7093287110328674, "learning_rate": 0.00019817454910391047, "loss": 1.619, "step": 1165 }, { "epoch": 0.1874221418525216, "grad_norm": 0.6770147681236267, "learning_rate": 0.00019817134087727213, "loss": 1.5449, "step": 1166 }, { "epoch": 0.18758288125376732, "grad_norm": 0.6199904680252075, "learning_rate": 0.00019816812985989685, "loss": 1.4052, "step": 1167 }, { "epoch": 0.18774362065501307, "grad_norm": 0.6689044833183289, "learning_rate": 0.00019816491605187587, "loss": 1.4155, "step": 1168 }, { "epoch": 0.1879043600562588, "grad_norm": 0.6551892161369324, "learning_rate": 0.00019816169945330065, "loss": 1.3969, "step": 1169 }, { "epoch": 0.18806509945750452, "grad_norm": 0.6753897666931152, "learning_rate": 0.00019815848006426258, "loss": 1.5734, "step": 1170 }, { "epoch": 0.18822583885875024, "grad_norm": 0.7320741415023804, "learning_rate": 0.0001981552578848532, "loss": 1.5605, "step": 1171 }, { "epoch": 0.18838657825999597, "grad_norm": 0.8213657736778259, "learning_rate": 0.00019815203291516405, "loss": 1.3277, "step": 1172 }, { "epoch": 0.18854731766124172, "grad_norm": 0.7016989588737488, "learning_rate": 0.00019814880515528683, "loss": 1.6446, "step": 1173 }, { "epoch": 0.18870805706248744, "grad_norm": 0.7025212645530701, "learning_rate": 0.00019814557460531336, "loss": 1.3588, "step": 1174 }, { "epoch": 0.18886879646373317, "grad_norm": 0.6115427017211914, "learning_rate": 0.00019814234126533538, "loss": 1.3844, "step": 1175 }, { "epoch": 0.1890295358649789, "grad_norm": 0.7857159376144409, "learning_rate": 0.00019813910513544487, "loss": 1.5626, "step": 1176 }, { "epoch": 0.18919027526622464, "grad_norm": 0.7048535346984863, "learning_rate": 0.00019813586621573377, "loss": 1.5825, "step": 1177 }, { "epoch": 0.18935101466747037, "grad_norm": 0.7286770343780518, "learning_rate": 0.0001981326245062942, "loss": 1.621, "step": 1178 }, { "epoch": 0.1895117540687161, "grad_norm": 0.7552583813667297, "learning_rate": 0.00019812938000721833, "loss": 1.4786, "step": 1179 }, { "epoch": 0.18967249346996182, "grad_norm": 0.7632306814193726, "learning_rate": 0.00019812613271859837, "loss": 1.513, "step": 1180 }, { "epoch": 0.18983323287120754, "grad_norm": 0.7808113098144531, "learning_rate": 0.0001981228826405266, "loss": 1.5381, "step": 1181 }, { "epoch": 0.1899939722724533, "grad_norm": 0.7085968852043152, "learning_rate": 0.0001981196297730954, "loss": 1.7298, "step": 1182 }, { "epoch": 0.19015471167369902, "grad_norm": 0.6686164140701294, "learning_rate": 0.00019811637411639728, "loss": 1.4052, "step": 1183 }, { "epoch": 0.19031545107494474, "grad_norm": 0.6762080192565918, "learning_rate": 0.00019811311567052484, "loss": 1.4973, "step": 1184 }, { "epoch": 0.19047619047619047, "grad_norm": 0.7162718772888184, "learning_rate": 0.0001981098544355706, "loss": 1.6384, "step": 1185 }, { "epoch": 0.19063692987743622, "grad_norm": 0.7517910599708557, "learning_rate": 0.00019810659041162736, "loss": 1.5907, "step": 1186 }, { "epoch": 0.19079766927868194, "grad_norm": 0.599751353263855, "learning_rate": 0.00019810332359878783, "loss": 1.3331, "step": 1187 }, { "epoch": 0.19095840867992767, "grad_norm": 0.817899227142334, "learning_rate": 0.00019810005399714493, "loss": 1.5899, "step": 1188 }, { "epoch": 0.1911191480811734, "grad_norm": 0.6834189295768738, "learning_rate": 0.00019809678160679156, "loss": 1.5459, "step": 1189 }, { "epoch": 0.19127988748241911, "grad_norm": 0.673085629940033, "learning_rate": 0.0001980935064278208, "loss": 1.3817, "step": 1190 }, { "epoch": 0.19144062688366487, "grad_norm": 0.6122509837150574, "learning_rate": 0.00019809022846032574, "loss": 1.339, "step": 1191 }, { "epoch": 0.1916013662849106, "grad_norm": 0.701103150844574, "learning_rate": 0.00019808694770439954, "loss": 1.5397, "step": 1192 }, { "epoch": 0.19176210568615631, "grad_norm": 0.6010926961898804, "learning_rate": 0.00019808366416013544, "loss": 1.3007, "step": 1193 }, { "epoch": 0.19192284508740204, "grad_norm": 0.6925703287124634, "learning_rate": 0.00019808037782762685, "loss": 1.5775, "step": 1194 }, { "epoch": 0.1920835844886478, "grad_norm": 0.7055982947349548, "learning_rate": 0.00019807708870696713, "loss": 1.4841, "step": 1195 }, { "epoch": 0.19224432388989351, "grad_norm": 0.6629607677459717, "learning_rate": 0.00019807379679824983, "loss": 1.2402, "step": 1196 }, { "epoch": 0.19240506329113924, "grad_norm": 0.7545508146286011, "learning_rate": 0.00019807050210156853, "loss": 1.6469, "step": 1197 }, { "epoch": 0.19256580269238496, "grad_norm": 0.6428886651992798, "learning_rate": 0.0001980672046170168, "loss": 1.3159, "step": 1198 }, { "epoch": 0.19272654209363071, "grad_norm": 0.730990469455719, "learning_rate": 0.0001980639043446885, "loss": 1.4165, "step": 1199 }, { "epoch": 0.19288728149487644, "grad_norm": 0.7445430159568787, "learning_rate": 0.0001980606012846774, "loss": 1.756, "step": 1200 }, { "epoch": 0.19288728149487644, "eval_loss": 1.545569658279419, "eval_runtime": 46.2382, "eval_samples_per_second": 5.428, "eval_steps_per_second": 2.725, "step": 1200 }, { "epoch": 0.19304802089612216, "grad_norm": 0.67179274559021, "learning_rate": 0.00019805729543707733, "loss": 1.6741, "step": 1201 }, { "epoch": 0.1932087602973679, "grad_norm": 0.6980414390563965, "learning_rate": 0.00019805398680198234, "loss": 1.4657, "step": 1202 }, { "epoch": 0.1933694996986136, "grad_norm": 0.6632696390151978, "learning_rate": 0.00019805067537948648, "loss": 1.4316, "step": 1203 }, { "epoch": 0.19353023909985936, "grad_norm": 0.6613320708274841, "learning_rate": 0.0001980473611696839, "loss": 1.3664, "step": 1204 }, { "epoch": 0.1936909785011051, "grad_norm": 0.7007024884223938, "learning_rate": 0.00019804404417266875, "loss": 1.7447, "step": 1205 }, { "epoch": 0.1938517179023508, "grad_norm": 0.638676643371582, "learning_rate": 0.00019804072438853537, "loss": 1.3621, "step": 1206 }, { "epoch": 0.19401245730359654, "grad_norm": 0.6111052632331848, "learning_rate": 0.00019803740181737815, "loss": 1.3634, "step": 1207 }, { "epoch": 0.1941731967048423, "grad_norm": 0.7045163512229919, "learning_rate": 0.0001980340764592915, "loss": 1.4668, "step": 1208 }, { "epoch": 0.194333936106088, "grad_norm": 0.6874828338623047, "learning_rate": 0.00019803074831436997, "loss": 1.3373, "step": 1209 }, { "epoch": 0.19449467550733374, "grad_norm": 0.7531641125679016, "learning_rate": 0.00019802741738270813, "loss": 1.5993, "step": 1210 }, { "epoch": 0.19465541490857946, "grad_norm": 0.7526398301124573, "learning_rate": 0.00019802408366440072, "loss": 1.5801, "step": 1211 }, { "epoch": 0.19481615430982518, "grad_norm": 0.8172652721405029, "learning_rate": 0.00019802074715954252, "loss": 1.6697, "step": 1212 }, { "epoch": 0.19497689371107094, "grad_norm": 0.667203962802887, "learning_rate": 0.00019801740786822833, "loss": 1.5597, "step": 1213 }, { "epoch": 0.19513763311231666, "grad_norm": 0.65141361951828, "learning_rate": 0.0001980140657905531, "loss": 1.529, "step": 1214 }, { "epoch": 0.19529837251356238, "grad_norm": 0.6994457244873047, "learning_rate": 0.00019801072092661182, "loss": 1.5217, "step": 1215 }, { "epoch": 0.1954591119148081, "grad_norm": 0.6590254306793213, "learning_rate": 0.0001980073732764996, "loss": 1.1239, "step": 1216 }, { "epoch": 0.19561985131605386, "grad_norm": 0.7323867678642273, "learning_rate": 0.0001980040228403116, "loss": 1.6854, "step": 1217 }, { "epoch": 0.19578059071729959, "grad_norm": 0.7700620889663696, "learning_rate": 0.00019800066961814304, "loss": 1.8292, "step": 1218 }, { "epoch": 0.1959413301185453, "grad_norm": 0.6915645003318787, "learning_rate": 0.00019799731361008925, "loss": 1.5719, "step": 1219 }, { "epoch": 0.19610206951979103, "grad_norm": 0.6746589541435242, "learning_rate": 0.00019799395481624564, "loss": 1.5518, "step": 1220 }, { "epoch": 0.19626280892103676, "grad_norm": 0.6694447994232178, "learning_rate": 0.0001979905932367077, "loss": 1.284, "step": 1221 }, { "epoch": 0.1964235483222825, "grad_norm": 0.7434717416763306, "learning_rate": 0.000197987228871571, "loss": 1.6464, "step": 1222 }, { "epoch": 0.19658428772352823, "grad_norm": 0.769331157207489, "learning_rate": 0.00019798386172093112, "loss": 1.6859, "step": 1223 }, { "epoch": 0.19674502712477396, "grad_norm": 0.6608198881149292, "learning_rate": 0.00019798049178488384, "loss": 1.4835, "step": 1224 }, { "epoch": 0.19690576652601968, "grad_norm": 0.6092655658721924, "learning_rate": 0.00019797711906352497, "loss": 1.3608, "step": 1225 }, { "epoch": 0.19706650592726543, "grad_norm": 0.9822006821632385, "learning_rate": 0.00019797374355695033, "loss": 1.4617, "step": 1226 }, { "epoch": 0.19722724532851116, "grad_norm": 0.6498011946678162, "learning_rate": 0.0001979703652652559, "loss": 1.5431, "step": 1227 }, { "epoch": 0.19738798472975688, "grad_norm": 0.7252870798110962, "learning_rate": 0.0001979669841885377, "loss": 1.5367, "step": 1228 }, { "epoch": 0.1975487241310026, "grad_norm": 0.6279309391975403, "learning_rate": 0.00019796360032689188, "loss": 1.4126, "step": 1229 }, { "epoch": 0.19770946353224833, "grad_norm": 0.8412936925888062, "learning_rate": 0.00019796021368041463, "loss": 1.8251, "step": 1230 }, { "epoch": 0.19787020293349408, "grad_norm": 0.7239282131195068, "learning_rate": 0.00019795682424920218, "loss": 1.6416, "step": 1231 }, { "epoch": 0.1980309423347398, "grad_norm": 0.7501507997512817, "learning_rate": 0.00019795343203335091, "loss": 1.5328, "step": 1232 }, { "epoch": 0.19819168173598553, "grad_norm": 0.7320225834846497, "learning_rate": 0.00019795003703295725, "loss": 1.5339, "step": 1233 }, { "epoch": 0.19835242113723126, "grad_norm": 0.7265242338180542, "learning_rate": 0.00019794663924811772, "loss": 1.5287, "step": 1234 }, { "epoch": 0.198513160538477, "grad_norm": 0.6104831099510193, "learning_rate": 0.00019794323867892887, "loss": 1.4941, "step": 1235 }, { "epoch": 0.19867389993972273, "grad_norm": 0.6700667142868042, "learning_rate": 0.00019793983532548747, "loss": 1.4621, "step": 1236 }, { "epoch": 0.19883463934096846, "grad_norm": 0.7081518173217773, "learning_rate": 0.00019793642918789015, "loss": 1.6547, "step": 1237 }, { "epoch": 0.19899537874221418, "grad_norm": 0.7635113000869751, "learning_rate": 0.00019793302026623378, "loss": 1.6588, "step": 1238 }, { "epoch": 0.1991561181434599, "grad_norm": 0.7206833362579346, "learning_rate": 0.00019792960856061527, "loss": 1.4002, "step": 1239 }, { "epoch": 0.19931685754470566, "grad_norm": 0.6874716281890869, "learning_rate": 0.00019792619407113163, "loss": 1.4115, "step": 1240 }, { "epoch": 0.19947759694595138, "grad_norm": 0.7777805924415588, "learning_rate": 0.00019792277679787988, "loss": 1.5639, "step": 1241 }, { "epoch": 0.1996383363471971, "grad_norm": 0.6741442084312439, "learning_rate": 0.00019791935674095718, "loss": 1.4002, "step": 1242 }, { "epoch": 0.19979907574844283, "grad_norm": 0.7018924951553345, "learning_rate": 0.00019791593390046077, "loss": 1.5663, "step": 1243 }, { "epoch": 0.19995981514968858, "grad_norm": 0.7381893396377563, "learning_rate": 0.00019791250827648794, "loss": 1.5898, "step": 1244 }, { "epoch": 0.2001205545509343, "grad_norm": 0.6832000613212585, "learning_rate": 0.00019790907986913604, "loss": 1.4411, "step": 1245 }, { "epoch": 0.20028129395218003, "grad_norm": 0.7954955697059631, "learning_rate": 0.00019790564867850258, "loss": 1.6312, "step": 1246 }, { "epoch": 0.20044203335342575, "grad_norm": 0.7542526721954346, "learning_rate": 0.0001979022147046851, "loss": 1.6512, "step": 1247 }, { "epoch": 0.20060277275467148, "grad_norm": 0.7277452945709229, "learning_rate": 0.00019789877794778115, "loss": 1.4357, "step": 1248 }, { "epoch": 0.20076351215591723, "grad_norm": 0.6967638731002808, "learning_rate": 0.0001978953384078885, "loss": 1.6104, "step": 1249 }, { "epoch": 0.20092425155716295, "grad_norm": 0.7819350957870483, "learning_rate": 0.0001978918960851049, "loss": 1.6361, "step": 1250 }, { "epoch": 0.20108499095840868, "grad_norm": 0.6336327195167542, "learning_rate": 0.0001978884509795282, "loss": 1.3227, "step": 1251 }, { "epoch": 0.2012457303596544, "grad_norm": 0.7807596325874329, "learning_rate": 0.00019788500309125636, "loss": 1.5108, "step": 1252 }, { "epoch": 0.20140646976090015, "grad_norm": 0.665412187576294, "learning_rate": 0.00019788155242038736, "loss": 1.3882, "step": 1253 }, { "epoch": 0.20156720916214588, "grad_norm": 0.791504979133606, "learning_rate": 0.0001978780989670193, "loss": 1.5171, "step": 1254 }, { "epoch": 0.2017279485633916, "grad_norm": 0.828032910823822, "learning_rate": 0.00019787464273125037, "loss": 1.6291, "step": 1255 }, { "epoch": 0.20188868796463733, "grad_norm": 0.6686581969261169, "learning_rate": 0.0001978711837131788, "loss": 1.4995, "step": 1256 }, { "epoch": 0.20204942736588305, "grad_norm": 0.6874831318855286, "learning_rate": 0.00019786772191290292, "loss": 1.2959, "step": 1257 }, { "epoch": 0.2022101667671288, "grad_norm": 0.7285637259483337, "learning_rate": 0.00019786425733052116, "loss": 1.4933, "step": 1258 }, { "epoch": 0.20237090616837453, "grad_norm": 0.7068295478820801, "learning_rate": 0.000197860789966132, "loss": 1.57, "step": 1259 }, { "epoch": 0.20253164556962025, "grad_norm": 0.793444037437439, "learning_rate": 0.00019785731981983404, "loss": 1.5983, "step": 1260 }, { "epoch": 0.20269238497086597, "grad_norm": 0.8472902178764343, "learning_rate": 0.00019785384689172583, "loss": 1.541, "step": 1261 }, { "epoch": 0.20285312437211173, "grad_norm": 0.606543779373169, "learning_rate": 0.0001978503711819062, "loss": 1.2982, "step": 1262 }, { "epoch": 0.20301386377335745, "grad_norm": 0.7543560266494751, "learning_rate": 0.00019784689269047393, "loss": 1.5784, "step": 1263 }, { "epoch": 0.20317460317460317, "grad_norm": 0.746442437171936, "learning_rate": 0.00019784341141752785, "loss": 1.561, "step": 1264 }, { "epoch": 0.2033353425758489, "grad_norm": 0.6784173250198364, "learning_rate": 0.00019783992736316697, "loss": 1.2906, "step": 1265 }, { "epoch": 0.20349608197709462, "grad_norm": 0.7834222912788391, "learning_rate": 0.0001978364405274903, "loss": 1.4911, "step": 1266 }, { "epoch": 0.20365682137834037, "grad_norm": 0.6738572120666504, "learning_rate": 0.000197832950910597, "loss": 1.3069, "step": 1267 }, { "epoch": 0.2038175607795861, "grad_norm": 0.7621511816978455, "learning_rate": 0.00019782945851258628, "loss": 1.5429, "step": 1268 }, { "epoch": 0.20397830018083182, "grad_norm": 0.70794677734375, "learning_rate": 0.00019782596333355737, "loss": 1.6054, "step": 1269 }, { "epoch": 0.20413903958207755, "grad_norm": 0.7890564799308777, "learning_rate": 0.00019782246537360963, "loss": 1.9386, "step": 1270 }, { "epoch": 0.2042997789833233, "grad_norm": 0.7436431050300598, "learning_rate": 0.00019781896463284254, "loss": 1.6176, "step": 1271 }, { "epoch": 0.20446051838456902, "grad_norm": 0.8092471361160278, "learning_rate": 0.0001978154611113556, "loss": 1.659, "step": 1272 }, { "epoch": 0.20462125778581475, "grad_norm": 0.7299943566322327, "learning_rate": 0.00019781195480924837, "loss": 1.4607, "step": 1273 }, { "epoch": 0.20478199718706047, "grad_norm": 0.6902081966400146, "learning_rate": 0.00019780844572662057, "loss": 1.4626, "step": 1274 }, { "epoch": 0.2049427365883062, "grad_norm": 0.6944059133529663, "learning_rate": 0.00019780493386357194, "loss": 1.1627, "step": 1275 }, { "epoch": 0.20510347598955195, "grad_norm": 0.7431588768959045, "learning_rate": 0.00019780141922020228, "loss": 1.5735, "step": 1276 }, { "epoch": 0.20526421539079767, "grad_norm": 0.7686641812324524, "learning_rate": 0.00019779790179661158, "loss": 1.8253, "step": 1277 }, { "epoch": 0.2054249547920434, "grad_norm": 0.7286105751991272, "learning_rate": 0.00019779438159289975, "loss": 1.3048, "step": 1278 }, { "epoch": 0.20558569419328912, "grad_norm": 0.7042977809906006, "learning_rate": 0.00019779085860916688, "loss": 1.4048, "step": 1279 }, { "epoch": 0.20574643359453487, "grad_norm": 0.6236233711242676, "learning_rate": 0.00019778733284551314, "loss": 1.518, "step": 1280 }, { "epoch": 0.2059071729957806, "grad_norm": 0.7067104578018188, "learning_rate": 0.00019778380430203875, "loss": 1.5732, "step": 1281 }, { "epoch": 0.20606791239702632, "grad_norm": 0.7254002690315247, "learning_rate": 0.000197780272978844, "loss": 1.6661, "step": 1282 }, { "epoch": 0.20622865179827204, "grad_norm": 0.7225091457366943, "learning_rate": 0.00019777673887602933, "loss": 1.57, "step": 1283 }, { "epoch": 0.20638939119951777, "grad_norm": 0.908635139465332, "learning_rate": 0.00019777320199369515, "loss": 1.4183, "step": 1284 }, { "epoch": 0.20655013060076352, "grad_norm": 0.6938445568084717, "learning_rate": 0.00019776966233194196, "loss": 1.316, "step": 1285 }, { "epoch": 0.20671087000200924, "grad_norm": 0.7093690037727356, "learning_rate": 0.0001977661198908705, "loss": 1.4816, "step": 1286 }, { "epoch": 0.20687160940325497, "grad_norm": 0.7317521572113037, "learning_rate": 0.0001977625746705814, "loss": 1.7116, "step": 1287 }, { "epoch": 0.2070323488045007, "grad_norm": 0.755088746547699, "learning_rate": 0.00019775902667117543, "loss": 1.6772, "step": 1288 }, { "epoch": 0.20719308820574645, "grad_norm": 0.7315171957015991, "learning_rate": 0.0001977554758927535, "loss": 1.4317, "step": 1289 }, { "epoch": 0.20735382760699217, "grad_norm": 0.6786336302757263, "learning_rate": 0.0001977519223354165, "loss": 1.6495, "step": 1290 }, { "epoch": 0.2075145670082379, "grad_norm": 0.8056592345237732, "learning_rate": 0.00019774836599926546, "loss": 1.9436, "step": 1291 }, { "epoch": 0.20767530640948362, "grad_norm": 0.6870949268341064, "learning_rate": 0.00019774480688440152, "loss": 1.4769, "step": 1292 }, { "epoch": 0.20783604581072934, "grad_norm": 0.7655941247940063, "learning_rate": 0.00019774124499092582, "loss": 1.5108, "step": 1293 }, { "epoch": 0.2079967852119751, "grad_norm": 0.7392338514328003, "learning_rate": 0.0001977376803189396, "loss": 1.52, "step": 1294 }, { "epoch": 0.20815752461322082, "grad_norm": 0.6936318278312683, "learning_rate": 0.00019773411286854418, "loss": 1.5916, "step": 1295 }, { "epoch": 0.20831826401446654, "grad_norm": 0.6606235504150391, "learning_rate": 0.00019773054263984104, "loss": 1.4436, "step": 1296 }, { "epoch": 0.20847900341571227, "grad_norm": 0.7673588991165161, "learning_rate": 0.00019772696963293163, "loss": 1.4643, "step": 1297 }, { "epoch": 0.20863974281695802, "grad_norm": 0.6829037070274353, "learning_rate": 0.00019772339384791749, "loss": 1.4748, "step": 1298 }, { "epoch": 0.20880048221820374, "grad_norm": 0.7443533539772034, "learning_rate": 0.00019771981528490034, "loss": 1.6911, "step": 1299 }, { "epoch": 0.20896122161944947, "grad_norm": 0.7020978927612305, "learning_rate": 0.00019771623394398188, "loss": 1.5838, "step": 1300 }, { "epoch": 0.2091219610206952, "grad_norm": 0.722138524055481, "learning_rate": 0.0001977126498252639, "loss": 1.2501, "step": 1301 }, { "epoch": 0.20928270042194091, "grad_norm": 0.7305482625961304, "learning_rate": 0.0001977090629288483, "loss": 1.4785, "step": 1302 }, { "epoch": 0.20944343982318667, "grad_norm": 0.7888860106468201, "learning_rate": 0.000197705473254837, "loss": 1.5229, "step": 1303 }, { "epoch": 0.2096041792244324, "grad_norm": 0.7461264133453369, "learning_rate": 0.00019770188080333212, "loss": 1.5321, "step": 1304 }, { "epoch": 0.20976491862567812, "grad_norm": 0.7168543934822083, "learning_rate": 0.00019769828557443574, "loss": 1.4967, "step": 1305 }, { "epoch": 0.20992565802692384, "grad_norm": 0.7029650807380676, "learning_rate": 0.00019769468756825008, "loss": 1.793, "step": 1306 }, { "epoch": 0.2100863974281696, "grad_norm": 0.7450799942016602, "learning_rate": 0.0001976910867848774, "loss": 1.7109, "step": 1307 }, { "epoch": 0.21024713682941532, "grad_norm": 0.6896770596504211, "learning_rate": 0.00019768748322442008, "loss": 1.428, "step": 1308 }, { "epoch": 0.21040787623066104, "grad_norm": 0.6107721328735352, "learning_rate": 0.00019768387688698055, "loss": 1.3899, "step": 1309 }, { "epoch": 0.21056861563190676, "grad_norm": 0.6338157057762146, "learning_rate": 0.00019768026777266132, "loss": 1.3852, "step": 1310 }, { "epoch": 0.2107293550331525, "grad_norm": 0.8358496427536011, "learning_rate": 0.00019767665588156502, "loss": 1.5284, "step": 1311 }, { "epoch": 0.21089009443439824, "grad_norm": 0.6763738989830017, "learning_rate": 0.00019767304121379433, "loss": 1.3295, "step": 1312 }, { "epoch": 0.21105083383564396, "grad_norm": 0.7071878910064697, "learning_rate": 0.00019766942376945194, "loss": 1.4394, "step": 1313 }, { "epoch": 0.2112115732368897, "grad_norm": 0.7351413369178772, "learning_rate": 0.00019766580354864074, "loss": 1.5911, "step": 1314 }, { "epoch": 0.2113723126381354, "grad_norm": 0.7541038990020752, "learning_rate": 0.00019766218055146357, "loss": 1.5336, "step": 1315 }, { "epoch": 0.21153305203938116, "grad_norm": 1.0637314319610596, "learning_rate": 0.00019765855477802354, "loss": 1.5893, "step": 1316 }, { "epoch": 0.2116937914406269, "grad_norm": 0.6544129848480225, "learning_rate": 0.00019765492622842362, "loss": 1.4508, "step": 1317 }, { "epoch": 0.2118545308418726, "grad_norm": 0.656480610370636, "learning_rate": 0.00019765129490276702, "loss": 1.3438, "step": 1318 }, { "epoch": 0.21201527024311834, "grad_norm": 0.6886926889419556, "learning_rate": 0.00019764766080115694, "loss": 1.1569, "step": 1319 }, { "epoch": 0.2121760096443641, "grad_norm": 0.781882643699646, "learning_rate": 0.0001976440239236967, "loss": 1.7614, "step": 1320 }, { "epoch": 0.2123367490456098, "grad_norm": 0.7606453895568848, "learning_rate": 0.00019764038427048965, "loss": 1.569, "step": 1321 }, { "epoch": 0.21249748844685554, "grad_norm": 0.6708787083625793, "learning_rate": 0.00019763674184163928, "loss": 1.3577, "step": 1322 }, { "epoch": 0.21265822784810126, "grad_norm": 0.6980111002922058, "learning_rate": 0.00019763309663724918, "loss": 1.7473, "step": 1323 }, { "epoch": 0.21281896724934699, "grad_norm": 0.7390298843383789, "learning_rate": 0.00019762944865742293, "loss": 1.4104, "step": 1324 }, { "epoch": 0.21297970665059274, "grad_norm": 0.7732172012329102, "learning_rate": 0.00019762579790226422, "loss": 1.6043, "step": 1325 }, { "epoch": 0.21314044605183846, "grad_norm": 0.7688336372375488, "learning_rate": 0.00019762214437187684, "loss": 1.7178, "step": 1326 }, { "epoch": 0.21330118545308419, "grad_norm": 0.6932942867279053, "learning_rate": 0.00019761848806636463, "loss": 1.351, "step": 1327 }, { "epoch": 0.2134619248543299, "grad_norm": 0.7135957479476929, "learning_rate": 0.00019761482898583157, "loss": 1.4938, "step": 1328 }, { "epoch": 0.21362266425557566, "grad_norm": 0.7713948488235474, "learning_rate": 0.00019761116713038165, "loss": 1.3911, "step": 1329 }, { "epoch": 0.21378340365682139, "grad_norm": 0.9659371376037598, "learning_rate": 0.000197607502500119, "loss": 1.2676, "step": 1330 }, { "epoch": 0.2139441430580671, "grad_norm": 0.6909787654876709, "learning_rate": 0.00019760383509514776, "loss": 1.5638, "step": 1331 }, { "epoch": 0.21410488245931283, "grad_norm": 0.8176058530807495, "learning_rate": 0.00019760016491557217, "loss": 1.5326, "step": 1332 }, { "epoch": 0.21426562186055856, "grad_norm": 0.7246799468994141, "learning_rate": 0.00019759649196149663, "loss": 1.406, "step": 1333 }, { "epoch": 0.2144263612618043, "grad_norm": 0.8099504709243774, "learning_rate": 0.0001975928162330255, "loss": 1.2367, "step": 1334 }, { "epoch": 0.21458710066305003, "grad_norm": 0.6552138924598694, "learning_rate": 0.00019758913773026325, "loss": 1.4288, "step": 1335 }, { "epoch": 0.21474784006429576, "grad_norm": 0.7793170809745789, "learning_rate": 0.00019758545645331453, "loss": 1.6676, "step": 1336 }, { "epoch": 0.21490857946554148, "grad_norm": 0.7399846911430359, "learning_rate": 0.00019758177240228387, "loss": 1.7003, "step": 1337 }, { "epoch": 0.21506931886678723, "grad_norm": 0.7318131923675537, "learning_rate": 0.00019757808557727613, "loss": 1.6328, "step": 1338 }, { "epoch": 0.21523005826803296, "grad_norm": 0.7355200052261353, "learning_rate": 0.00019757439597839602, "loss": 1.5077, "step": 1339 }, { "epoch": 0.21539079766927868, "grad_norm": 0.6127026677131653, "learning_rate": 0.00019757070360574845, "loss": 1.4387, "step": 1340 }, { "epoch": 0.2155515370705244, "grad_norm": 0.6979219317436218, "learning_rate": 0.00019756700845943842, "loss": 1.3754, "step": 1341 }, { "epoch": 0.21571227647177013, "grad_norm": 0.6740726232528687, "learning_rate": 0.00019756331053957094, "loss": 1.3754, "step": 1342 }, { "epoch": 0.21587301587301588, "grad_norm": 0.6519074440002441, "learning_rate": 0.0001975596098462511, "loss": 1.6164, "step": 1343 }, { "epoch": 0.2160337552742616, "grad_norm": 0.7296764254570007, "learning_rate": 0.00019755590637958415, "loss": 1.5754, "step": 1344 }, { "epoch": 0.21619449467550733, "grad_norm": 0.7005932331085205, "learning_rate": 0.00019755220013967538, "loss": 1.4878, "step": 1345 }, { "epoch": 0.21635523407675306, "grad_norm": 0.7072553038597107, "learning_rate": 0.0001975484911266301, "loss": 1.8121, "step": 1346 }, { "epoch": 0.2165159734779988, "grad_norm": 0.6771455407142639, "learning_rate": 0.00019754477934055378, "loss": 1.6194, "step": 1347 }, { "epoch": 0.21667671287924453, "grad_norm": 0.7661219239234924, "learning_rate": 0.00019754106478155192, "loss": 1.657, "step": 1348 }, { "epoch": 0.21683745228049026, "grad_norm": 0.8463554382324219, "learning_rate": 0.00019753734744973008, "loss": 1.7663, "step": 1349 }, { "epoch": 0.21699819168173598, "grad_norm": 0.7401540279388428, "learning_rate": 0.00019753362734519403, "loss": 1.4607, "step": 1350 }, { "epoch": 0.2171589310829817, "grad_norm": 0.5521244406700134, "learning_rate": 0.00019752990446804947, "loss": 1.2979, "step": 1351 }, { "epoch": 0.21731967048422746, "grad_norm": 0.7465888857841492, "learning_rate": 0.00019752617881840218, "loss": 1.5033, "step": 1352 }, { "epoch": 0.21748040988547318, "grad_norm": 0.6680389046669006, "learning_rate": 0.00019752245039635815, "loss": 1.3693, "step": 1353 }, { "epoch": 0.2176411492867189, "grad_norm": 0.7882882356643677, "learning_rate": 0.0001975187192020233, "loss": 1.72, "step": 1354 }, { "epoch": 0.21780188868796463, "grad_norm": 0.791904866695404, "learning_rate": 0.00019751498523550376, "loss": 1.6057, "step": 1355 }, { "epoch": 0.21796262808921038, "grad_norm": 0.6888086795806885, "learning_rate": 0.00019751124849690563, "loss": 1.4086, "step": 1356 }, { "epoch": 0.2181233674904561, "grad_norm": 0.669700026512146, "learning_rate": 0.00019750750898633518, "loss": 1.3308, "step": 1357 }, { "epoch": 0.21828410689170183, "grad_norm": 0.7366477847099304, "learning_rate": 0.00019750376670389866, "loss": 1.5087, "step": 1358 }, { "epoch": 0.21844484629294755, "grad_norm": 0.7193100452423096, "learning_rate": 0.0001975000216497025, "loss": 1.7334, "step": 1359 }, { "epoch": 0.21860558569419328, "grad_norm": 0.7943227291107178, "learning_rate": 0.00019749627382385313, "loss": 1.4731, "step": 1360 }, { "epoch": 0.21876632509543903, "grad_norm": 0.7057292461395264, "learning_rate": 0.0001974925232264571, "loss": 1.4216, "step": 1361 }, { "epoch": 0.21892706449668475, "grad_norm": 0.6292518377304077, "learning_rate": 0.00019748876985762104, "loss": 1.2865, "step": 1362 }, { "epoch": 0.21908780389793048, "grad_norm": 0.7365127801895142, "learning_rate": 0.00019748501371745164, "loss": 1.5949, "step": 1363 }, { "epoch": 0.2192485432991762, "grad_norm": 0.7037795782089233, "learning_rate": 0.00019748125480605565, "loss": 1.5181, "step": 1364 }, { "epoch": 0.21940928270042195, "grad_norm": 0.7325087785720825, "learning_rate": 0.00019747749312353996, "loss": 1.5521, "step": 1365 }, { "epoch": 0.21957002210166768, "grad_norm": 0.7194676399230957, "learning_rate": 0.0001974737286700115, "loss": 1.5564, "step": 1366 }, { "epoch": 0.2197307615029134, "grad_norm": 0.6708190441131592, "learning_rate": 0.0001974699614455773, "loss": 1.3061, "step": 1367 }, { "epoch": 0.21989150090415913, "grad_norm": 0.657558023929596, "learning_rate": 0.00019746619145034438, "loss": 1.3375, "step": 1368 }, { "epoch": 0.22005224030540485, "grad_norm": 0.6997655630111694, "learning_rate": 0.00019746241868442, "loss": 1.2802, "step": 1369 }, { "epoch": 0.2202129797066506, "grad_norm": 0.7380467653274536, "learning_rate": 0.00019745864314791136, "loss": 1.5948, "step": 1370 }, { "epoch": 0.22037371910789633, "grad_norm": 0.6596185564994812, "learning_rate": 0.0001974548648409258, "loss": 1.3751, "step": 1371 }, { "epoch": 0.22053445850914205, "grad_norm": 0.7361506819725037, "learning_rate": 0.00019745108376357072, "loss": 1.4007, "step": 1372 }, { "epoch": 0.22069519791038777, "grad_norm": 0.6861355304718018, "learning_rate": 0.00019744729991595357, "loss": 1.3341, "step": 1373 }, { "epoch": 0.22085593731163353, "grad_norm": 0.785514771938324, "learning_rate": 0.000197443513298182, "loss": 1.7745, "step": 1374 }, { "epoch": 0.22101667671287925, "grad_norm": 0.672110915184021, "learning_rate": 0.00019743972391036358, "loss": 1.4018, "step": 1375 }, { "epoch": 0.22117741611412498, "grad_norm": 0.6416640281677246, "learning_rate": 0.0001974359317526061, "loss": 1.2499, "step": 1376 }, { "epoch": 0.2213381555153707, "grad_norm": 0.6889432668685913, "learning_rate": 0.00019743213682501723, "loss": 1.4406, "step": 1377 }, { "epoch": 0.22149889491661642, "grad_norm": 0.7781295776367188, "learning_rate": 0.000197428339127705, "loss": 1.5603, "step": 1378 }, { "epoch": 0.22165963431786218, "grad_norm": 0.680404007434845, "learning_rate": 0.00019742453866077733, "loss": 1.4132, "step": 1379 }, { "epoch": 0.2218203737191079, "grad_norm": 0.7101126909255981, "learning_rate": 0.00019742073542434218, "loss": 1.624, "step": 1380 }, { "epoch": 0.22198111312035362, "grad_norm": 0.6722477078437805, "learning_rate": 0.00019741692941850774, "loss": 1.4696, "step": 1381 }, { "epoch": 0.22214185252159935, "grad_norm": 0.7227091789245605, "learning_rate": 0.00019741312064338218, "loss": 1.5043, "step": 1382 }, { "epoch": 0.2223025919228451, "grad_norm": 0.6815536618232727, "learning_rate": 0.00019740930909907376, "loss": 1.54, "step": 1383 }, { "epoch": 0.22246333132409082, "grad_norm": 0.8260564208030701, "learning_rate": 0.00019740549478569085, "loss": 1.7086, "step": 1384 }, { "epoch": 0.22262407072533655, "grad_norm": 0.6997241377830505, "learning_rate": 0.0001974016777033419, "loss": 1.6005, "step": 1385 }, { "epoch": 0.22278481012658227, "grad_norm": 0.7637419700622559, "learning_rate": 0.00019739785785213536, "loss": 1.7637, "step": 1386 }, { "epoch": 0.222945549527828, "grad_norm": 0.704301655292511, "learning_rate": 0.00019739403523217987, "loss": 1.4049, "step": 1387 }, { "epoch": 0.22310628892907375, "grad_norm": 0.7317809462547302, "learning_rate": 0.00019739020984358408, "loss": 1.5613, "step": 1388 }, { "epoch": 0.22326702833031947, "grad_norm": 0.654913604259491, "learning_rate": 0.00019738638168645674, "loss": 1.3394, "step": 1389 }, { "epoch": 0.2234277677315652, "grad_norm": 0.8053205013275146, "learning_rate": 0.00019738255076090665, "loss": 1.6133, "step": 1390 }, { "epoch": 0.22358850713281092, "grad_norm": 0.6664522290229797, "learning_rate": 0.00019737871706704274, "loss": 1.3962, "step": 1391 }, { "epoch": 0.22374924653405667, "grad_norm": 0.6868380308151245, "learning_rate": 0.00019737488060497398, "loss": 1.5471, "step": 1392 }, { "epoch": 0.2239099859353024, "grad_norm": 0.8988280296325684, "learning_rate": 0.00019737104137480943, "loss": 1.4826, "step": 1393 }, { "epoch": 0.22407072533654812, "grad_norm": 0.6629508137702942, "learning_rate": 0.00019736719937665825, "loss": 1.5792, "step": 1394 }, { "epoch": 0.22423146473779385, "grad_norm": 0.8656284213066101, "learning_rate": 0.0001973633546106296, "loss": 1.7012, "step": 1395 }, { "epoch": 0.22439220413903957, "grad_norm": 0.7026286721229553, "learning_rate": 0.0001973595070768328, "loss": 1.5172, "step": 1396 }, { "epoch": 0.22455294354028532, "grad_norm": 0.6253560781478882, "learning_rate": 0.00019735565677537727, "loss": 1.4713, "step": 1397 }, { "epoch": 0.22471368294153105, "grad_norm": 0.7205674648284912, "learning_rate": 0.00019735180370637244, "loss": 1.4025, "step": 1398 }, { "epoch": 0.22487442234277677, "grad_norm": 0.7228774428367615, "learning_rate": 0.00019734794786992782, "loss": 1.4402, "step": 1399 }, { "epoch": 0.2250351617440225, "grad_norm": 0.6778046488761902, "learning_rate": 0.00019734408926615308, "loss": 1.5481, "step": 1400 }, { "epoch": 0.2250351617440225, "eval_loss": 1.5407177209854126, "eval_runtime": 46.2492, "eval_samples_per_second": 5.427, "eval_steps_per_second": 2.724, "step": 1400 }, { "epoch": 0.22519590114526825, "grad_norm": 0.694013237953186, "learning_rate": 0.0001973402278951578, "loss": 1.7726, "step": 1401 }, { "epoch": 0.22535664054651397, "grad_norm": 0.7833890318870544, "learning_rate": 0.00019733636375705182, "loss": 1.6661, "step": 1402 }, { "epoch": 0.2255173799477597, "grad_norm": 0.7424169778823853, "learning_rate": 0.000197332496851945, "loss": 1.636, "step": 1403 }, { "epoch": 0.22567811934900542, "grad_norm": 0.6978761553764343, "learning_rate": 0.00019732862717994722, "loss": 1.5885, "step": 1404 }, { "epoch": 0.22583885875025114, "grad_norm": 0.7243667840957642, "learning_rate": 0.00019732475474116854, "loss": 1.6548, "step": 1405 }, { "epoch": 0.2259995981514969, "grad_norm": 0.6841996312141418, "learning_rate": 0.00019732087953571898, "loss": 1.3699, "step": 1406 }, { "epoch": 0.22616033755274262, "grad_norm": 0.6805827617645264, "learning_rate": 0.00019731700156370874, "loss": 1.4973, "step": 1407 }, { "epoch": 0.22632107695398834, "grad_norm": 1.8069205284118652, "learning_rate": 0.00019731312082524807, "loss": 1.8448, "step": 1408 }, { "epoch": 0.22648181635523407, "grad_norm": 0.7073130011558533, "learning_rate": 0.0001973092373204472, "loss": 1.5624, "step": 1409 }, { "epoch": 0.22664255575647982, "grad_norm": 1.1331696510314941, "learning_rate": 0.00019730535104941666, "loss": 1.7573, "step": 1410 }, { "epoch": 0.22680329515772554, "grad_norm": 0.6937152743339539, "learning_rate": 0.00019730146201226686, "loss": 1.4995, "step": 1411 }, { "epoch": 0.22696403455897127, "grad_norm": 0.655080258846283, "learning_rate": 0.00019729757020910834, "loss": 1.5359, "step": 1412 }, { "epoch": 0.227124773960217, "grad_norm": 0.7997439503669739, "learning_rate": 0.00019729367564005177, "loss": 1.5791, "step": 1413 }, { "epoch": 0.22728551336146272, "grad_norm": 0.6660904884338379, "learning_rate": 0.0001972897783052078, "loss": 1.4116, "step": 1414 }, { "epoch": 0.22744625276270847, "grad_norm": 0.7106243968009949, "learning_rate": 0.0001972858782046873, "loss": 1.6668, "step": 1415 }, { "epoch": 0.2276069921639542, "grad_norm": 0.6419256925582886, "learning_rate": 0.00019728197533860109, "loss": 1.4085, "step": 1416 }, { "epoch": 0.22776773156519992, "grad_norm": 0.8591603636741638, "learning_rate": 0.00019727806970706014, "loss": 2.0589, "step": 1417 }, { "epoch": 0.22792847096644564, "grad_norm": 0.7074927091598511, "learning_rate": 0.00019727416131017544, "loss": 1.5451, "step": 1418 }, { "epoch": 0.2280892103676914, "grad_norm": 0.688016951084137, "learning_rate": 0.00019727025014805815, "loss": 1.3276, "step": 1419 }, { "epoch": 0.22824994976893712, "grad_norm": 0.7403061389923096, "learning_rate": 0.0001972663362208194, "loss": 1.4251, "step": 1420 }, { "epoch": 0.22841068917018284, "grad_norm": 0.6824564933776855, "learning_rate": 0.00019726241952857046, "loss": 1.4929, "step": 1421 }, { "epoch": 0.22857142857142856, "grad_norm": 0.6732343435287476, "learning_rate": 0.00019725850007142273, "loss": 1.3892, "step": 1422 }, { "epoch": 0.2287321679726743, "grad_norm": 0.8433747887611389, "learning_rate": 0.00019725457784948756, "loss": 1.5676, "step": 1423 }, { "epoch": 0.22889290737392004, "grad_norm": 0.7220037579536438, "learning_rate": 0.00019725065286287644, "loss": 1.4886, "step": 1424 }, { "epoch": 0.22905364677516576, "grad_norm": 0.7229512929916382, "learning_rate": 0.00019724672511170103, "loss": 1.6735, "step": 1425 }, { "epoch": 0.2292143861764115, "grad_norm": 0.8087705969810486, "learning_rate": 0.00019724279459607292, "loss": 1.8812, "step": 1426 }, { "epoch": 0.2293751255776572, "grad_norm": 0.7153734564781189, "learning_rate": 0.00019723886131610386, "loss": 1.3575, "step": 1427 }, { "epoch": 0.22953586497890296, "grad_norm": 0.7441943883895874, "learning_rate": 0.00019723492527190563, "loss": 1.4724, "step": 1428 }, { "epoch": 0.2296966043801487, "grad_norm": 0.7115532159805298, "learning_rate": 0.00019723098646359018, "loss": 1.2639, "step": 1429 }, { "epoch": 0.2298573437813944, "grad_norm": 0.7130035161972046, "learning_rate": 0.00019722704489126945, "loss": 1.6123, "step": 1430 }, { "epoch": 0.23001808318264014, "grad_norm": 0.6615103483200073, "learning_rate": 0.00019722310055505547, "loss": 1.3638, "step": 1431 }, { "epoch": 0.23017882258388586, "grad_norm": 0.8150805830955505, "learning_rate": 0.00019721915345506042, "loss": 1.4364, "step": 1432 }, { "epoch": 0.2303395619851316, "grad_norm": 0.7070127725601196, "learning_rate": 0.00019721520359139642, "loss": 1.5666, "step": 1433 }, { "epoch": 0.23050030138637734, "grad_norm": 0.7641879320144653, "learning_rate": 0.00019721125096417583, "loss": 1.2854, "step": 1434 }, { "epoch": 0.23066104078762306, "grad_norm": 0.7574676275253296, "learning_rate": 0.00019720729557351098, "loss": 1.4154, "step": 1435 }, { "epoch": 0.23082178018886879, "grad_norm": 0.721956729888916, "learning_rate": 0.0001972033374195143, "loss": 1.3966, "step": 1436 }, { "epoch": 0.23098251959011454, "grad_norm": 0.6984285712242126, "learning_rate": 0.00019719937650229837, "loss": 1.4438, "step": 1437 }, { "epoch": 0.23114325899136026, "grad_norm": 0.833499014377594, "learning_rate": 0.0001971954128219757, "loss": 1.5342, "step": 1438 }, { "epoch": 0.231303998392606, "grad_norm": 0.7875803112983704, "learning_rate": 0.00019719144637865904, "loss": 1.8439, "step": 1439 }, { "epoch": 0.2314647377938517, "grad_norm": 0.6940380334854126, "learning_rate": 0.00019718747717246108, "loss": 1.329, "step": 1440 }, { "epoch": 0.23162547719509743, "grad_norm": 0.6891353726387024, "learning_rate": 0.0001971835052034947, "loss": 1.4655, "step": 1441 }, { "epoch": 0.2317862165963432, "grad_norm": 0.777470052242279, "learning_rate": 0.00019717953047187283, "loss": 1.68, "step": 1442 }, { "epoch": 0.2319469559975889, "grad_norm": 0.7493615746498108, "learning_rate": 0.0001971755529777084, "loss": 1.4926, "step": 1443 }, { "epoch": 0.23210769539883463, "grad_norm": 0.7804036140441895, "learning_rate": 0.0001971715727211145, "loss": 1.5715, "step": 1444 }, { "epoch": 0.23226843480008036, "grad_norm": 0.6464049220085144, "learning_rate": 0.0001971675897022043, "loss": 1.4153, "step": 1445 }, { "epoch": 0.2324291742013261, "grad_norm": 0.6479018926620483, "learning_rate": 0.00019716360392109102, "loss": 1.305, "step": 1446 }, { "epoch": 0.23258991360257184, "grad_norm": 0.6475626826286316, "learning_rate": 0.00019715961537788794, "loss": 1.4537, "step": 1447 }, { "epoch": 0.23275065300381756, "grad_norm": 0.7109667658805847, "learning_rate": 0.00019715562407270846, "loss": 1.4869, "step": 1448 }, { "epoch": 0.23291139240506328, "grad_norm": 0.7047341465950012, "learning_rate": 0.00019715163000566605, "loss": 1.5403, "step": 1449 }, { "epoch": 0.23307213180630904, "grad_norm": 0.6944315433502197, "learning_rate": 0.00019714763317687424, "loss": 1.4607, "step": 1450 }, { "epoch": 0.23323287120755476, "grad_norm": 0.7634619474411011, "learning_rate": 0.00019714363358644667, "loss": 1.4653, "step": 1451 }, { "epoch": 0.23339361060880048, "grad_norm": 0.6927353143692017, "learning_rate": 0.000197139631234497, "loss": 1.5208, "step": 1452 }, { "epoch": 0.2335543500100462, "grad_norm": 0.8091005682945251, "learning_rate": 0.00019713562612113901, "loss": 1.7616, "step": 1453 }, { "epoch": 0.23371508941129193, "grad_norm": 0.761034369468689, "learning_rate": 0.00019713161824648658, "loss": 1.4041, "step": 1454 }, { "epoch": 0.23387582881253768, "grad_norm": 0.8238832950592041, "learning_rate": 0.00019712760761065364, "loss": 1.7323, "step": 1455 }, { "epoch": 0.2340365682137834, "grad_norm": 0.7328007221221924, "learning_rate": 0.0001971235942137542, "loss": 1.4264, "step": 1456 }, { "epoch": 0.23419730761502913, "grad_norm": 0.6987152099609375, "learning_rate": 0.00019711957805590232, "loss": 1.5128, "step": 1457 }, { "epoch": 0.23435804701627486, "grad_norm": 0.6820961833000183, "learning_rate": 0.0001971155591372122, "loss": 1.2764, "step": 1458 }, { "epoch": 0.2345187864175206, "grad_norm": 0.5968998074531555, "learning_rate": 0.00019711153745779805, "loss": 1.3064, "step": 1459 }, { "epoch": 0.23467952581876633, "grad_norm": 0.72563236951828, "learning_rate": 0.00019710751301777423, "loss": 1.5623, "step": 1460 }, { "epoch": 0.23484026522001206, "grad_norm": 0.8228567838668823, "learning_rate": 0.00019710348581725517, "loss": 1.4843, "step": 1461 }, { "epoch": 0.23500100462125778, "grad_norm": 0.70112544298172, "learning_rate": 0.0001970994558563553, "loss": 1.4598, "step": 1462 }, { "epoch": 0.2351617440225035, "grad_norm": 0.8298900723457336, "learning_rate": 0.00019709542313518918, "loss": 1.3995, "step": 1463 }, { "epoch": 0.23532248342374926, "grad_norm": 0.8033958077430725, "learning_rate": 0.0001970913876538715, "loss": 1.6308, "step": 1464 }, { "epoch": 0.23548322282499498, "grad_norm": 0.7013473510742188, "learning_rate": 0.0001970873494125169, "loss": 1.2786, "step": 1465 }, { "epoch": 0.2356439622262407, "grad_norm": 0.762883186340332, "learning_rate": 0.00019708330841124025, "loss": 1.7727, "step": 1466 }, { "epoch": 0.23580470162748643, "grad_norm": 0.7408198714256287, "learning_rate": 0.00019707926465015638, "loss": 1.5733, "step": 1467 }, { "epoch": 0.23596544102873218, "grad_norm": 0.7526084184646606, "learning_rate": 0.0001970752181293803, "loss": 1.5219, "step": 1468 }, { "epoch": 0.2361261804299779, "grad_norm": 0.7172741293907166, "learning_rate": 0.00019707116884902693, "loss": 1.4719, "step": 1469 }, { "epoch": 0.23628691983122363, "grad_norm": 0.7066395878791809, "learning_rate": 0.0001970671168092115, "loss": 1.3594, "step": 1470 }, { "epoch": 0.23644765923246935, "grad_norm": 0.7266785502433777, "learning_rate": 0.0001970630620100491, "loss": 1.5116, "step": 1471 }, { "epoch": 0.23660839863371508, "grad_norm": 0.7274678945541382, "learning_rate": 0.00019705900445165505, "loss": 1.4462, "step": 1472 }, { "epoch": 0.23676913803496083, "grad_norm": 0.7007945775985718, "learning_rate": 0.00019705494413414473, "loss": 1.5114, "step": 1473 }, { "epoch": 0.23692987743620655, "grad_norm": 0.6715408563613892, "learning_rate": 0.0001970508810576335, "loss": 1.3891, "step": 1474 }, { "epoch": 0.23709061683745228, "grad_norm": 0.7479880452156067, "learning_rate": 0.00019704681522223687, "loss": 1.4154, "step": 1475 }, { "epoch": 0.237251356238698, "grad_norm": 0.7109825015068054, "learning_rate": 0.00019704274662807043, "loss": 1.5061, "step": 1476 }, { "epoch": 0.23741209563994375, "grad_norm": 0.6982572674751282, "learning_rate": 0.00019703867527524985, "loss": 1.5695, "step": 1477 }, { "epoch": 0.23757283504118948, "grad_norm": 0.6960623264312744, "learning_rate": 0.00019703460116389087, "loss": 1.5356, "step": 1478 }, { "epoch": 0.2377335744424352, "grad_norm": 0.8089622259140015, "learning_rate": 0.0001970305242941093, "loss": 1.5421, "step": 1479 }, { "epoch": 0.23789431384368093, "grad_norm": 0.8301507234573364, "learning_rate": 0.00019702644466602104, "loss": 1.5842, "step": 1480 }, { "epoch": 0.23805505324492665, "grad_norm": 0.7400312423706055, "learning_rate": 0.00019702236227974202, "loss": 1.5649, "step": 1481 }, { "epoch": 0.2382157926461724, "grad_norm": 0.7655078768730164, "learning_rate": 0.00019701827713538834, "loss": 1.5365, "step": 1482 }, { "epoch": 0.23837653204741813, "grad_norm": 0.665272057056427, "learning_rate": 0.0001970141892330761, "loss": 1.3468, "step": 1483 }, { "epoch": 0.23853727144866385, "grad_norm": 0.7163267731666565, "learning_rate": 0.0001970100985729215, "loss": 1.5718, "step": 1484 }, { "epoch": 0.23869801084990958, "grad_norm": 0.7582484483718872, "learning_rate": 0.0001970060051550409, "loss": 1.4996, "step": 1485 }, { "epoch": 0.23885875025115533, "grad_norm": 0.785639762878418, "learning_rate": 0.00019700190897955057, "loss": 1.5142, "step": 1486 }, { "epoch": 0.23901948965240105, "grad_norm": 0.621972382068634, "learning_rate": 0.00019699781004656699, "loss": 1.2669, "step": 1487 }, { "epoch": 0.23918022905364678, "grad_norm": 0.7224661707878113, "learning_rate": 0.0001969937083562067, "loss": 1.4649, "step": 1488 }, { "epoch": 0.2393409684548925, "grad_norm": 0.8222696781158447, "learning_rate": 0.00019698960390858629, "loss": 1.674, "step": 1489 }, { "epoch": 0.23950170785613822, "grad_norm": 0.7743018865585327, "learning_rate": 0.0001969854967038224, "loss": 1.429, "step": 1490 }, { "epoch": 0.23966244725738398, "grad_norm": 0.8077917695045471, "learning_rate": 0.00019698138674203186, "loss": 1.737, "step": 1491 }, { "epoch": 0.2398231866586297, "grad_norm": 0.6828585267066956, "learning_rate": 0.00019697727402333143, "loss": 1.3885, "step": 1492 }, { "epoch": 0.23998392605987542, "grad_norm": 0.7380383014678955, "learning_rate": 0.00019697315854783808, "loss": 1.463, "step": 1493 }, { "epoch": 0.24014466546112115, "grad_norm": 0.8323180675506592, "learning_rate": 0.00019696904031566876, "loss": 1.4186, "step": 1494 }, { "epoch": 0.2403054048623669, "grad_norm": 0.7419959902763367, "learning_rate": 0.00019696491932694055, "loss": 1.6754, "step": 1495 }, { "epoch": 0.24046614426361262, "grad_norm": 0.6754377484321594, "learning_rate": 0.00019696079558177064, "loss": 1.3658, "step": 1496 }, { "epoch": 0.24062688366485835, "grad_norm": 0.661657452583313, "learning_rate": 0.00019695666908027616, "loss": 1.2271, "step": 1497 }, { "epoch": 0.24078762306610407, "grad_norm": 0.6742766499519348, "learning_rate": 0.00019695253982257452, "loss": 1.2437, "step": 1498 }, { "epoch": 0.2409483624673498, "grad_norm": 0.7387527227401733, "learning_rate": 0.00019694840780878307, "loss": 1.7622, "step": 1499 }, { "epoch": 0.24110910186859555, "grad_norm": 0.6849592328071594, "learning_rate": 0.00019694427303901924, "loss": 1.2599, "step": 1500 }, { "epoch": 0.24126984126984127, "grad_norm": 0.6753179430961609, "learning_rate": 0.00019694013551340057, "loss": 1.7631, "step": 1501 }, { "epoch": 0.241430580671087, "grad_norm": 0.822068452835083, "learning_rate": 0.00019693599523204472, "loss": 1.8766, "step": 1502 }, { "epoch": 0.24159132007233272, "grad_norm": 0.6555389165878296, "learning_rate": 0.0001969318521950694, "loss": 1.3731, "step": 1503 }, { "epoch": 0.24175205947357847, "grad_norm": 0.7246274948120117, "learning_rate": 0.00019692770640259233, "loss": 1.4747, "step": 1504 }, { "epoch": 0.2419127988748242, "grad_norm": 0.7334820628166199, "learning_rate": 0.00019692355785473135, "loss": 1.506, "step": 1505 }, { "epoch": 0.24207353827606992, "grad_norm": 0.7595199346542358, "learning_rate": 0.00019691940655160446, "loss": 1.6709, "step": 1506 }, { "epoch": 0.24223427767731565, "grad_norm": 0.6497197151184082, "learning_rate": 0.00019691525249332964, "loss": 1.3532, "step": 1507 }, { "epoch": 0.24239501707856137, "grad_norm": 0.7928779721260071, "learning_rate": 0.00019691109568002494, "loss": 1.4959, "step": 1508 }, { "epoch": 0.24255575647980712, "grad_norm": 0.7007573246955872, "learning_rate": 0.0001969069361118086, "loss": 1.302, "step": 1509 }, { "epoch": 0.24271649588105285, "grad_norm": 0.6843436360359192, "learning_rate": 0.00019690277378879882, "loss": 1.4008, "step": 1510 }, { "epoch": 0.24287723528229857, "grad_norm": 0.6149760484695435, "learning_rate": 0.00019689860871111391, "loss": 1.2063, "step": 1511 }, { "epoch": 0.2430379746835443, "grad_norm": 0.7107071876525879, "learning_rate": 0.0001968944408788723, "loss": 1.5858, "step": 1512 }, { "epoch": 0.24319871408479005, "grad_norm": 0.6812729239463806, "learning_rate": 0.00019689027029219246, "loss": 1.6419, "step": 1513 }, { "epoch": 0.24335945348603577, "grad_norm": 0.6917579174041748, "learning_rate": 0.00019688609695119296, "loss": 1.4637, "step": 1514 }, { "epoch": 0.2435201928872815, "grad_norm": 0.7263210415840149, "learning_rate": 0.00019688192085599239, "loss": 1.4343, "step": 1515 }, { "epoch": 0.24368093228852722, "grad_norm": 0.8450407981872559, "learning_rate": 0.00019687774200670955, "loss": 1.5401, "step": 1516 }, { "epoch": 0.24384167168977294, "grad_norm": 0.7348352074623108, "learning_rate": 0.00019687356040346313, "loss": 1.5956, "step": 1517 }, { "epoch": 0.2440024110910187, "grad_norm": 0.7020547389984131, "learning_rate": 0.0001968693760463721, "loss": 1.3916, "step": 1518 }, { "epoch": 0.24416315049226442, "grad_norm": 0.9069629311561584, "learning_rate": 0.00019686518893555533, "loss": 1.437, "step": 1519 }, { "epoch": 0.24432388989351014, "grad_norm": 0.7389059662818909, "learning_rate": 0.0001968609990711319, "loss": 1.5164, "step": 1520 }, { "epoch": 0.24448462929475587, "grad_norm": 0.6068881750106812, "learning_rate": 0.00019685680645322088, "loss": 1.3485, "step": 1521 }, { "epoch": 0.24464536869600162, "grad_norm": 0.8388203382492065, "learning_rate": 0.0001968526110819415, "loss": 1.5776, "step": 1522 }, { "epoch": 0.24480610809724734, "grad_norm": 0.8337732553482056, "learning_rate": 0.00019684841295741299, "loss": 1.6393, "step": 1523 }, { "epoch": 0.24496684749849307, "grad_norm": 0.7154551148414612, "learning_rate": 0.00019684421207975467, "loss": 1.6836, "step": 1524 }, { "epoch": 0.2451275868997388, "grad_norm": 0.6481832265853882, "learning_rate": 0.00019684000844908601, "loss": 1.4766, "step": 1525 }, { "epoch": 0.24528832630098452, "grad_norm": 0.7186468839645386, "learning_rate": 0.00019683580206552646, "loss": 1.6649, "step": 1526 }, { "epoch": 0.24544906570223027, "grad_norm": 0.7353458404541016, "learning_rate": 0.00019683159292919564, "loss": 1.4901, "step": 1527 }, { "epoch": 0.245609805103476, "grad_norm": 0.6500330567359924, "learning_rate": 0.00019682738104021317, "loss": 1.3116, "step": 1528 }, { "epoch": 0.24577054450472172, "grad_norm": 0.7538241744041443, "learning_rate": 0.0001968231663986988, "loss": 1.3275, "step": 1529 }, { "epoch": 0.24593128390596744, "grad_norm": 0.7536467909812927, "learning_rate": 0.0001968189490047723, "loss": 1.6894, "step": 1530 }, { "epoch": 0.2460920233072132, "grad_norm": 0.687702476978302, "learning_rate": 0.00019681472885855366, "loss": 1.3892, "step": 1531 }, { "epoch": 0.24625276270845892, "grad_norm": 0.7167207598686218, "learning_rate": 0.00019681050596016272, "loss": 1.6734, "step": 1532 }, { "epoch": 0.24641350210970464, "grad_norm": 0.7865960001945496, "learning_rate": 0.0001968062803097196, "loss": 1.3667, "step": 1533 }, { "epoch": 0.24657424151095036, "grad_norm": 0.7258956432342529, "learning_rate": 0.00019680205190734443, "loss": 1.4017, "step": 1534 }, { "epoch": 0.2467349809121961, "grad_norm": 0.6925902366638184, "learning_rate": 0.00019679782075315738, "loss": 1.4594, "step": 1535 }, { "epoch": 0.24689572031344184, "grad_norm": 0.70027095079422, "learning_rate": 0.00019679358684727874, "loss": 1.5531, "step": 1536 }, { "epoch": 0.24705645971468757, "grad_norm": 0.6459099054336548, "learning_rate": 0.00019678935018982888, "loss": 1.4358, "step": 1537 }, { "epoch": 0.2472171991159333, "grad_norm": 0.676213264465332, "learning_rate": 0.0001967851107809282, "loss": 1.506, "step": 1538 }, { "epoch": 0.247377938517179, "grad_norm": 0.6511147618293762, "learning_rate": 0.00019678086862069726, "loss": 1.3003, "step": 1539 }, { "epoch": 0.24753867791842477, "grad_norm": 0.6735742688179016, "learning_rate": 0.00019677662370925662, "loss": 1.4186, "step": 1540 }, { "epoch": 0.2476994173196705, "grad_norm": 0.6598384976387024, "learning_rate": 0.000196772376046727, "loss": 1.3946, "step": 1541 }, { "epoch": 0.2478601567209162, "grad_norm": 0.6586984395980835, "learning_rate": 0.00019676812563322905, "loss": 1.5024, "step": 1542 }, { "epoch": 0.24802089612216194, "grad_norm": 0.5864682197570801, "learning_rate": 0.0001967638724688837, "loss": 1.1757, "step": 1543 }, { "epoch": 0.24818163552340766, "grad_norm": 0.7529594898223877, "learning_rate": 0.00019675961655381182, "loss": 1.7062, "step": 1544 }, { "epoch": 0.24834237492465341, "grad_norm": 0.7743858098983765, "learning_rate": 0.00019675535788813434, "loss": 1.583, "step": 1545 }, { "epoch": 0.24850311432589914, "grad_norm": 0.7300025820732117, "learning_rate": 0.00019675109647197242, "loss": 1.6411, "step": 1546 }, { "epoch": 0.24866385372714486, "grad_norm": 0.9588526487350464, "learning_rate": 0.00019674683230544714, "loss": 1.7438, "step": 1547 }, { "epoch": 0.2488245931283906, "grad_norm": 0.7916878461837769, "learning_rate": 0.00019674256538867972, "loss": 1.4761, "step": 1548 }, { "epoch": 0.24898533252963634, "grad_norm": 0.7149609923362732, "learning_rate": 0.0001967382957217915, "loss": 1.403, "step": 1549 }, { "epoch": 0.24914607193088206, "grad_norm": 0.6742051243782043, "learning_rate": 0.00019673402330490377, "loss": 1.2679, "step": 1550 }, { "epoch": 0.2493068113321278, "grad_norm": 0.7785698175430298, "learning_rate": 0.00019672974813813806, "loss": 1.6574, "step": 1551 }, { "epoch": 0.2494675507333735, "grad_norm": 0.7202755212783813, "learning_rate": 0.0001967254702216159, "loss": 1.463, "step": 1552 }, { "epoch": 0.24962829013461924, "grad_norm": 0.743236243724823, "learning_rate": 0.00019672118955545883, "loss": 1.4685, "step": 1553 }, { "epoch": 0.249789029535865, "grad_norm": 0.7590855956077576, "learning_rate": 0.0001967169061397886, "loss": 1.6046, "step": 1554 }, { "epoch": 0.2499497689371107, "grad_norm": 1.032386302947998, "learning_rate": 0.00019671261997472698, "loss": 1.8729, "step": 1555 }, { "epoch": 0.25011050833835646, "grad_norm": 0.7855166792869568, "learning_rate": 0.00019670833106039573, "loss": 1.6579, "step": 1556 }, { "epoch": 0.2502712477396022, "grad_norm": 0.7365078926086426, "learning_rate": 0.00019670403939691687, "loss": 1.1935, "step": 1557 }, { "epoch": 0.2504319871408479, "grad_norm": 0.7432130575180054, "learning_rate": 0.0001966997449844124, "loss": 1.5783, "step": 1558 }, { "epoch": 0.25059272654209364, "grad_norm": 0.9698072075843811, "learning_rate": 0.0001966954478230043, "loss": 1.4754, "step": 1559 }, { "epoch": 0.25075346594333936, "grad_norm": 0.897502064704895, "learning_rate": 0.0001966911479128148, "loss": 1.6273, "step": 1560 }, { "epoch": 0.2509142053445851, "grad_norm": 0.8384021520614624, "learning_rate": 0.00019668684525396614, "loss": 1.6497, "step": 1561 }, { "epoch": 0.2510749447458308, "grad_norm": 0.8934840559959412, "learning_rate": 0.00019668253984658063, "loss": 1.5799, "step": 1562 }, { "epoch": 0.25123568414707653, "grad_norm": 0.820021390914917, "learning_rate": 0.00019667823169078062, "loss": 1.7742, "step": 1563 }, { "epoch": 0.25139642354832226, "grad_norm": 0.6670164465904236, "learning_rate": 0.0001966739207866886, "loss": 1.3386, "step": 1564 }, { "epoch": 0.25155716294956804, "grad_norm": 0.7409904599189758, "learning_rate": 0.00019666960713442713, "loss": 1.7916, "step": 1565 }, { "epoch": 0.25171790235081376, "grad_norm": 0.6709502339363098, "learning_rate": 0.0001966652907341188, "loss": 1.2402, "step": 1566 }, { "epoch": 0.2518786417520595, "grad_norm": 0.814135730266571, "learning_rate": 0.00019666097158588635, "loss": 1.5968, "step": 1567 }, { "epoch": 0.2520393811533052, "grad_norm": 0.6643016934394836, "learning_rate": 0.00019665664968985258, "loss": 1.4138, "step": 1568 }, { "epoch": 0.25220012055455093, "grad_norm": 0.6828664541244507, "learning_rate": 0.00019665232504614027, "loss": 1.401, "step": 1569 }, { "epoch": 0.25236085995579666, "grad_norm": 0.6380752325057983, "learning_rate": 0.00019664799765487244, "loss": 1.3577, "step": 1570 }, { "epoch": 0.2525215993570424, "grad_norm": 0.7182545065879822, "learning_rate": 0.00019664366751617206, "loss": 1.6042, "step": 1571 }, { "epoch": 0.2526823387582881, "grad_norm": 0.8989008069038391, "learning_rate": 0.00019663933463016226, "loss": 1.6205, "step": 1572 }, { "epoch": 0.25284307815953383, "grad_norm": 0.6589969396591187, "learning_rate": 0.00019663499899696618, "loss": 1.3392, "step": 1573 }, { "epoch": 0.2530038175607796, "grad_norm": 0.8368903398513794, "learning_rate": 0.00019663066061670708, "loss": 1.8438, "step": 1574 }, { "epoch": 0.25316455696202533, "grad_norm": 0.7152459621429443, "learning_rate": 0.00019662631948950824, "loss": 1.661, "step": 1575 }, { "epoch": 0.25332529636327106, "grad_norm": 0.6813668608665466, "learning_rate": 0.0001966219756154932, "loss": 1.3873, "step": 1576 }, { "epoch": 0.2534860357645168, "grad_norm": 0.624844491481781, "learning_rate": 0.0001966176289947853, "loss": 1.4771, "step": 1577 }, { "epoch": 0.2536467751657625, "grad_norm": 0.7787290215492249, "learning_rate": 0.00019661327962750816, "loss": 1.5283, "step": 1578 }, { "epoch": 0.25380751456700823, "grad_norm": 0.7830667495727539, "learning_rate": 0.00019660892751378542, "loss": 1.5258, "step": 1579 }, { "epoch": 0.25396825396825395, "grad_norm": 0.7170423269271851, "learning_rate": 0.0001966045726537408, "loss": 1.3075, "step": 1580 }, { "epoch": 0.2541289933694997, "grad_norm": 0.7142941355705261, "learning_rate": 0.00019660021504749807, "loss": 1.4187, "step": 1581 }, { "epoch": 0.2542897327707454, "grad_norm": 0.7713121175765991, "learning_rate": 0.00019659585469518115, "loss": 1.5293, "step": 1582 }, { "epoch": 0.2544504721719912, "grad_norm": 0.7558179497718811, "learning_rate": 0.000196591491596914, "loss": 1.5671, "step": 1583 }, { "epoch": 0.2546112115732369, "grad_norm": 0.6452392339706421, "learning_rate": 0.00019658712575282058, "loss": 1.2178, "step": 1584 }, { "epoch": 0.25477195097448263, "grad_norm": 0.6976699233055115, "learning_rate": 0.0001965827571630251, "loss": 1.376, "step": 1585 }, { "epoch": 0.25493269037572835, "grad_norm": 0.6386720538139343, "learning_rate": 0.00019657838582765163, "loss": 1.3416, "step": 1586 }, { "epoch": 0.2550934297769741, "grad_norm": 0.6441293358802795, "learning_rate": 0.0001965740117468245, "loss": 1.4501, "step": 1587 }, { "epoch": 0.2552541691782198, "grad_norm": 0.6711699962615967, "learning_rate": 0.00019656963492066805, "loss": 1.2981, "step": 1588 }, { "epoch": 0.2554149085794655, "grad_norm": 0.724409818649292, "learning_rate": 0.0001965652553493067, "loss": 1.5262, "step": 1589 }, { "epoch": 0.25557564798071125, "grad_norm": 0.6757197976112366, "learning_rate": 0.00019656087303286492, "loss": 1.3758, "step": 1590 }, { "epoch": 0.255736387381957, "grad_norm": 0.7497650980949402, "learning_rate": 0.00019655648797146734, "loss": 1.5379, "step": 1591 }, { "epoch": 0.25589712678320276, "grad_norm": 0.665921688079834, "learning_rate": 0.00019655210016523858, "loss": 1.4069, "step": 1592 }, { "epoch": 0.2560578661844485, "grad_norm": 0.7497138977050781, "learning_rate": 0.0001965477096143034, "loss": 1.4088, "step": 1593 }, { "epoch": 0.2562186055856942, "grad_norm": 0.7505781650543213, "learning_rate": 0.00019654331631878657, "loss": 1.574, "step": 1594 }, { "epoch": 0.2563793449869399, "grad_norm": 0.6879168748855591, "learning_rate": 0.00019653892027881301, "loss": 1.3814, "step": 1595 }, { "epoch": 0.25654008438818565, "grad_norm": 0.7706241607666016, "learning_rate": 0.00019653452149450768, "loss": 1.517, "step": 1596 }, { "epoch": 0.2567008237894314, "grad_norm": 0.7070291638374329, "learning_rate": 0.00019653011996599562, "loss": 1.2031, "step": 1597 }, { "epoch": 0.2568615631906771, "grad_norm": 0.7949171662330627, "learning_rate": 0.00019652571569340194, "loss": 1.5768, "step": 1598 }, { "epoch": 0.2570223025919228, "grad_norm": 0.6534023284912109, "learning_rate": 0.00019652130867685189, "loss": 1.2807, "step": 1599 }, { "epoch": 0.25718304199316855, "grad_norm": 0.7498084306716919, "learning_rate": 0.0001965168989164707, "loss": 1.4303, "step": 1600 }, { "epoch": 0.25718304199316855, "eval_loss": 1.5318647623062134, "eval_runtime": 46.2179, "eval_samples_per_second": 5.431, "eval_steps_per_second": 2.726, "step": 1600 }, { "epoch": 0.25734378139441433, "grad_norm": 0.8093550205230713, "learning_rate": 0.00019651248641238374, "loss": 1.5491, "step": 1601 }, { "epoch": 0.25750452079566005, "grad_norm": 0.7076656222343445, "learning_rate": 0.00019650807116471642, "loss": 1.5667, "step": 1602 }, { "epoch": 0.2576652601969058, "grad_norm": 0.8039729595184326, "learning_rate": 0.00019650365317359434, "loss": 1.4792, "step": 1603 }, { "epoch": 0.2578259995981515, "grad_norm": 0.7946512699127197, "learning_rate": 0.00019649923243914298, "loss": 1.9384, "step": 1604 }, { "epoch": 0.2579867389993972, "grad_norm": 0.626259982585907, "learning_rate": 0.00019649480896148812, "loss": 1.3397, "step": 1605 }, { "epoch": 0.25814747840064295, "grad_norm": 0.7110534310340881, "learning_rate": 0.0001964903827407554, "loss": 1.3808, "step": 1606 }, { "epoch": 0.2583082178018887, "grad_norm": 0.679188072681427, "learning_rate": 0.0001964859537770707, "loss": 1.3083, "step": 1607 }, { "epoch": 0.2584689572031344, "grad_norm": 0.7218394875526428, "learning_rate": 0.00019648152207055994, "loss": 1.6852, "step": 1608 }, { "epoch": 0.2586296966043801, "grad_norm": 0.8155487775802612, "learning_rate": 0.00019647708762134906, "loss": 1.5694, "step": 1609 }, { "epoch": 0.2587904360056259, "grad_norm": 0.823634684085846, "learning_rate": 0.00019647265042956416, "loss": 1.7184, "step": 1610 }, { "epoch": 0.2589511754068716, "grad_norm": 0.6550993919372559, "learning_rate": 0.0001964682104953313, "loss": 1.5003, "step": 1611 }, { "epoch": 0.25911191480811735, "grad_norm": 0.6750533580780029, "learning_rate": 0.0001964637678187768, "loss": 1.4727, "step": 1612 }, { "epoch": 0.2592726542093631, "grad_norm": 0.7100902199745178, "learning_rate": 0.00019645932240002688, "loss": 1.4468, "step": 1613 }, { "epoch": 0.2594333936106088, "grad_norm": 5.360440731048584, "learning_rate": 0.00019645487423920798, "loss": 1.6361, "step": 1614 }, { "epoch": 0.2595941330118545, "grad_norm": 0.6357104778289795, "learning_rate": 0.00019645042333644644, "loss": 1.2288, "step": 1615 }, { "epoch": 0.25975487241310025, "grad_norm": 0.7214581966400146, "learning_rate": 0.00019644596969186888, "loss": 1.6094, "step": 1616 }, { "epoch": 0.25991561181434597, "grad_norm": 0.6937226057052612, "learning_rate": 0.00019644151330560185, "loss": 1.3339, "step": 1617 }, { "epoch": 0.2600763512155917, "grad_norm": 0.7995303869247437, "learning_rate": 0.00019643705417777207, "loss": 1.6978, "step": 1618 }, { "epoch": 0.2602370906168375, "grad_norm": 0.7214376330375671, "learning_rate": 0.00019643259230850627, "loss": 1.3483, "step": 1619 }, { "epoch": 0.2603978300180832, "grad_norm": 0.6865725517272949, "learning_rate": 0.00019642812769793134, "loss": 1.3727, "step": 1620 }, { "epoch": 0.2605585694193289, "grad_norm": 0.841166079044342, "learning_rate": 0.00019642366034617413, "loss": 1.5736, "step": 1621 }, { "epoch": 0.26071930882057465, "grad_norm": 0.7595505714416504, "learning_rate": 0.00019641919025336168, "loss": 1.4307, "step": 1622 }, { "epoch": 0.26088004822182037, "grad_norm": 0.9063478112220764, "learning_rate": 0.00019641471741962106, "loss": 1.6573, "step": 1623 }, { "epoch": 0.2610407876230661, "grad_norm": 0.7255083322525024, "learning_rate": 0.00019641024184507942, "loss": 1.4717, "step": 1624 }, { "epoch": 0.2612015270243118, "grad_norm": 0.6958248019218445, "learning_rate": 0.00019640576352986393, "loss": 1.3245, "step": 1625 }, { "epoch": 0.26136226642555754, "grad_norm": 0.7781933546066284, "learning_rate": 0.00019640128247410196, "loss": 1.3524, "step": 1626 }, { "epoch": 0.26152300582680327, "grad_norm": 0.7462252378463745, "learning_rate": 0.00019639679867792089, "loss": 1.4674, "step": 1627 }, { "epoch": 0.26168374522804905, "grad_norm": 0.7144051790237427, "learning_rate": 0.00019639231214144815, "loss": 1.5038, "step": 1628 }, { "epoch": 0.26184448462929477, "grad_norm": 0.6715185046195984, "learning_rate": 0.0001963878228648113, "loss": 1.371, "step": 1629 }, { "epoch": 0.2620052240305405, "grad_norm": 0.71574467420578, "learning_rate": 0.00019638333084813796, "loss": 1.4856, "step": 1630 }, { "epoch": 0.2621659634317862, "grad_norm": 0.6717959046363831, "learning_rate": 0.0001963788360915558, "loss": 1.41, "step": 1631 }, { "epoch": 0.26232670283303194, "grad_norm": 0.7588745355606079, "learning_rate": 0.00019637433859519266, "loss": 1.4285, "step": 1632 }, { "epoch": 0.26248744223427767, "grad_norm": 0.7737516164779663, "learning_rate": 0.0001963698383591763, "loss": 1.6588, "step": 1633 }, { "epoch": 0.2626481816355234, "grad_norm": 0.7601302266120911, "learning_rate": 0.00019636533538363473, "loss": 1.4475, "step": 1634 }, { "epoch": 0.2628089210367691, "grad_norm": 0.8184059858322144, "learning_rate": 0.00019636082966869588, "loss": 1.6411, "step": 1635 }, { "epoch": 0.2629696604380149, "grad_norm": 0.7917036414146423, "learning_rate": 0.00019635632121448794, "loss": 1.5233, "step": 1636 }, { "epoch": 0.2631303998392606, "grad_norm": 0.7402708530426025, "learning_rate": 0.00019635181002113897, "loss": 1.5004, "step": 1637 }, { "epoch": 0.26329113924050634, "grad_norm": 0.7139912247657776, "learning_rate": 0.00019634729608877723, "loss": 1.4143, "step": 1638 }, { "epoch": 0.26345187864175207, "grad_norm": 0.7694075703620911, "learning_rate": 0.00019634277941753107, "loss": 1.6382, "step": 1639 }, { "epoch": 0.2636126180429978, "grad_norm": 0.7341703772544861, "learning_rate": 0.00019633826000752888, "loss": 1.5546, "step": 1640 }, { "epoch": 0.2637733574442435, "grad_norm": 0.7025135159492493, "learning_rate": 0.00019633373785889912, "loss": 1.2795, "step": 1641 }, { "epoch": 0.26393409684548924, "grad_norm": 0.721380352973938, "learning_rate": 0.00019632921297177037, "loss": 1.6226, "step": 1642 }, { "epoch": 0.26409483624673497, "grad_norm": 0.7444823384284973, "learning_rate": 0.00019632468534627124, "loss": 1.7011, "step": 1643 }, { "epoch": 0.2642555756479807, "grad_norm": 0.7994872331619263, "learning_rate": 0.00019632015498253043, "loss": 1.3947, "step": 1644 }, { "epoch": 0.26441631504922647, "grad_norm": 0.8896515965461731, "learning_rate": 0.00019631562188067672, "loss": 1.7644, "step": 1645 }, { "epoch": 0.2645770544504722, "grad_norm": 0.6936803460121155, "learning_rate": 0.000196311086040839, "loss": 1.4622, "step": 1646 }, { "epoch": 0.2647377938517179, "grad_norm": 0.6570136547088623, "learning_rate": 0.0001963065474631462, "loss": 1.4693, "step": 1647 }, { "epoch": 0.26489853325296364, "grad_norm": 0.7054909467697144, "learning_rate": 0.00019630200614772732, "loss": 1.3985, "step": 1648 }, { "epoch": 0.26505927265420937, "grad_norm": 0.8264179825782776, "learning_rate": 0.00019629746209471152, "loss": 1.6347, "step": 1649 }, { "epoch": 0.2652200120554551, "grad_norm": 0.6133151054382324, "learning_rate": 0.00019629291530422787, "loss": 1.3219, "step": 1650 }, { "epoch": 0.2653807514567008, "grad_norm": 0.6896864771842957, "learning_rate": 0.00019628836577640572, "loss": 1.5948, "step": 1651 }, { "epoch": 0.26554149085794654, "grad_norm": 0.6851738095283508, "learning_rate": 0.00019628381351137434, "loss": 1.3706, "step": 1652 }, { "epoch": 0.26570223025919226, "grad_norm": 0.8668655157089233, "learning_rate": 0.0001962792585092632, "loss": 1.5612, "step": 1653 }, { "epoch": 0.26586296966043804, "grad_norm": 0.7680171728134155, "learning_rate": 0.0001962747007702017, "loss": 1.6516, "step": 1654 }, { "epoch": 0.26602370906168377, "grad_norm": 0.6498892307281494, "learning_rate": 0.0001962701402943195, "loss": 1.3585, "step": 1655 }, { "epoch": 0.2661844484629295, "grad_norm": 0.7213436961174011, "learning_rate": 0.00019626557708174615, "loss": 1.5545, "step": 1656 }, { "epoch": 0.2663451878641752, "grad_norm": 0.7516255974769592, "learning_rate": 0.00019626101113261144, "loss": 1.632, "step": 1657 }, { "epoch": 0.26650592726542094, "grad_norm": 0.726803183555603, "learning_rate": 0.0001962564424470451, "loss": 1.4903, "step": 1658 }, { "epoch": 0.26666666666666666, "grad_norm": 0.7004909515380859, "learning_rate": 0.00019625187102517705, "loss": 1.4159, "step": 1659 }, { "epoch": 0.2668274060679124, "grad_norm": 0.8573302030563354, "learning_rate": 0.00019624729686713724, "loss": 1.5537, "step": 1660 }, { "epoch": 0.2669881454691581, "grad_norm": 0.8372601866722107, "learning_rate": 0.00019624271997305574, "loss": 1.5836, "step": 1661 }, { "epoch": 0.26714888487040384, "grad_norm": 0.7443369030952454, "learning_rate": 0.00019623814034306258, "loss": 1.4065, "step": 1662 }, { "epoch": 0.2673096242716496, "grad_norm": 0.6149259209632874, "learning_rate": 0.000196233557977288, "loss": 1.3162, "step": 1663 }, { "epoch": 0.26747036367289534, "grad_norm": 0.7198801636695862, "learning_rate": 0.0001962289728758622, "loss": 1.2631, "step": 1664 }, { "epoch": 0.26763110307414106, "grad_norm": 0.8075472712516785, "learning_rate": 0.0001962243850389156, "loss": 1.5819, "step": 1665 }, { "epoch": 0.2677918424753868, "grad_norm": 0.7082637548446655, "learning_rate": 0.0001962197944665786, "loss": 1.7381, "step": 1666 }, { "epoch": 0.2679525818766325, "grad_norm": 0.8326120376586914, "learning_rate": 0.00019621520115898163, "loss": 1.7869, "step": 1667 }, { "epoch": 0.26811332127787824, "grad_norm": 0.7998780608177185, "learning_rate": 0.00019621060511625538, "loss": 1.5945, "step": 1668 }, { "epoch": 0.26827406067912396, "grad_norm": 0.7199069261550903, "learning_rate": 0.00019620600633853038, "loss": 1.3832, "step": 1669 }, { "epoch": 0.2684348000803697, "grad_norm": 0.7943974733352661, "learning_rate": 0.00019620140482593746, "loss": 1.8508, "step": 1670 }, { "epoch": 0.2685955394816154, "grad_norm": 0.787676990032196, "learning_rate": 0.00019619680057860738, "loss": 1.5143, "step": 1671 }, { "epoch": 0.2687562788828612, "grad_norm": 0.71796715259552, "learning_rate": 0.00019619219359667104, "loss": 1.4779, "step": 1672 }, { "epoch": 0.2689170182841069, "grad_norm": 0.8380251526832581, "learning_rate": 0.0001961875838802594, "loss": 1.6996, "step": 1673 }, { "epoch": 0.26907775768535264, "grad_norm": 0.7394995093345642, "learning_rate": 0.0001961829714295035, "loss": 1.5564, "step": 1674 }, { "epoch": 0.26923849708659836, "grad_norm": 0.7847248315811157, "learning_rate": 0.00019617835624453445, "loss": 1.4636, "step": 1675 }, { "epoch": 0.2693992364878441, "grad_norm": 0.7087221741676331, "learning_rate": 0.00019617373832548344, "loss": 1.6115, "step": 1676 }, { "epoch": 0.2695599758890898, "grad_norm": 0.7215599417686462, "learning_rate": 0.0001961691176724818, "loss": 1.5287, "step": 1677 }, { "epoch": 0.26972071529033553, "grad_norm": 0.6829606294631958, "learning_rate": 0.00019616449428566082, "loss": 1.6017, "step": 1678 }, { "epoch": 0.26988145469158126, "grad_norm": 0.6882917881011963, "learning_rate": 0.00019615986816515194, "loss": 1.3706, "step": 1679 }, { "epoch": 0.270042194092827, "grad_norm": 0.7686020135879517, "learning_rate": 0.0001961552393110867, "loss": 1.4801, "step": 1680 }, { "epoch": 0.27020293349407276, "grad_norm": 0.8321248888969421, "learning_rate": 0.00019615060772359665, "loss": 1.5164, "step": 1681 }, { "epoch": 0.2703636728953185, "grad_norm": 0.7155603766441345, "learning_rate": 0.00019614597340281349, "loss": 1.4663, "step": 1682 }, { "epoch": 0.2705244122965642, "grad_norm": 0.7928398251533508, "learning_rate": 0.00019614133634886894, "loss": 1.549, "step": 1683 }, { "epoch": 0.27068515169780993, "grad_norm": 0.9488728046417236, "learning_rate": 0.0001961366965618948, "loss": 1.6779, "step": 1684 }, { "epoch": 0.27084589109905566, "grad_norm": 0.7183840870857239, "learning_rate": 0.00019613205404202298, "loss": 1.6065, "step": 1685 }, { "epoch": 0.2710066305003014, "grad_norm": 0.6456723213195801, "learning_rate": 0.0001961274087893855, "loss": 1.6476, "step": 1686 }, { "epoch": 0.2711673699015471, "grad_norm": 0.6874651312828064, "learning_rate": 0.0001961227608041143, "loss": 1.4903, "step": 1687 }, { "epoch": 0.27132810930279283, "grad_norm": 0.7253707647323608, "learning_rate": 0.00019611811008634164, "loss": 1.5574, "step": 1688 }, { "epoch": 0.27148884870403855, "grad_norm": 0.7089287042617798, "learning_rate": 0.00019611345663619965, "loss": 1.4639, "step": 1689 }, { "epoch": 0.27164958810528433, "grad_norm": 0.6750966906547546, "learning_rate": 0.00019610880045382062, "loss": 1.4822, "step": 1690 }, { "epoch": 0.27181032750653006, "grad_norm": 0.7041635513305664, "learning_rate": 0.00019610414153933692, "loss": 1.5746, "step": 1691 }, { "epoch": 0.2719710669077758, "grad_norm": 0.687125563621521, "learning_rate": 0.000196099479892881, "loss": 1.4013, "step": 1692 }, { "epoch": 0.2721318063090215, "grad_norm": 0.7340260744094849, "learning_rate": 0.00019609481551458537, "loss": 1.5536, "step": 1693 }, { "epoch": 0.27229254571026723, "grad_norm": 0.7592375874519348, "learning_rate": 0.00019609014840458262, "loss": 1.5976, "step": 1694 }, { "epoch": 0.27245328511151296, "grad_norm": 0.8700218200683594, "learning_rate": 0.00019608547856300544, "loss": 1.558, "step": 1695 }, { "epoch": 0.2726140245127587, "grad_norm": 0.7545492649078369, "learning_rate": 0.00019608080598998654, "loss": 1.4878, "step": 1696 }, { "epoch": 0.2727747639140044, "grad_norm": 0.8087036609649658, "learning_rate": 0.0001960761306856588, "loss": 1.6311, "step": 1697 }, { "epoch": 0.2729355033152501, "grad_norm": 0.7341361045837402, "learning_rate": 0.00019607145265015512, "loss": 1.4271, "step": 1698 }, { "epoch": 0.2730962427164959, "grad_norm": 0.7688615322113037, "learning_rate": 0.00019606677188360844, "loss": 1.4854, "step": 1699 }, { "epoch": 0.27325698211774163, "grad_norm": 0.6700354218482971, "learning_rate": 0.00019606208838615183, "loss": 1.4793, "step": 1700 }, { "epoch": 0.27341772151898736, "grad_norm": 0.6207538843154907, "learning_rate": 0.0001960574021579185, "loss": 1.4791, "step": 1701 }, { "epoch": 0.2735784609202331, "grad_norm": 0.7077017426490784, "learning_rate": 0.00019605271319904157, "loss": 1.4939, "step": 1702 }, { "epoch": 0.2737392003214788, "grad_norm": 0.6984448432922363, "learning_rate": 0.00019604802150965437, "loss": 1.2629, "step": 1703 }, { "epoch": 0.27389993972272453, "grad_norm": 0.7925651669502258, "learning_rate": 0.0001960433270898903, "loss": 1.5279, "step": 1704 }, { "epoch": 0.27406067912397025, "grad_norm": 0.7918320298194885, "learning_rate": 0.0001960386299398828, "loss": 1.6326, "step": 1705 }, { "epoch": 0.274221418525216, "grad_norm": 0.7487118244171143, "learning_rate": 0.0001960339300597654, "loss": 1.6813, "step": 1706 }, { "epoch": 0.2743821579264617, "grad_norm": 0.7152796983718872, "learning_rate": 0.00019602922744967164, "loss": 1.5396, "step": 1707 }, { "epoch": 0.2745428973277075, "grad_norm": 0.8120810985565186, "learning_rate": 0.00019602452210973527, "loss": 1.7764, "step": 1708 }, { "epoch": 0.2747036367289532, "grad_norm": 0.738877534866333, "learning_rate": 0.00019601981404009, "loss": 1.5652, "step": 1709 }, { "epoch": 0.27486437613019893, "grad_norm": 0.7369695901870728, "learning_rate": 0.00019601510324086972, "loss": 1.5211, "step": 1710 }, { "epoch": 0.27502511553144465, "grad_norm": 0.7273932099342346, "learning_rate": 0.00019601038971220835, "loss": 1.4729, "step": 1711 }, { "epoch": 0.2751858549326904, "grad_norm": 5.070930480957031, "learning_rate": 0.00019600567345423985, "loss": 2.1083, "step": 1712 }, { "epoch": 0.2753465943339361, "grad_norm": 0.7458380460739136, "learning_rate": 0.00019600095446709826, "loss": 1.5407, "step": 1713 }, { "epoch": 0.2755073337351818, "grad_norm": 0.7630818486213684, "learning_rate": 0.00019599623275091778, "loss": 1.6615, "step": 1714 }, { "epoch": 0.27566807313642755, "grad_norm": 0.6232661604881287, "learning_rate": 0.0001959915083058326, "loss": 1.4101, "step": 1715 }, { "epoch": 0.2758288125376733, "grad_norm": 0.6795737743377686, "learning_rate": 0.00019598678113197707, "loss": 1.4389, "step": 1716 }, { "epoch": 0.27598955193891905, "grad_norm": 0.7753338813781738, "learning_rate": 0.00019598205122948553, "loss": 1.6564, "step": 1717 }, { "epoch": 0.2761502913401648, "grad_norm": 0.8242252469062805, "learning_rate": 0.00019597731859849246, "loss": 1.8094, "step": 1718 }, { "epoch": 0.2763110307414105, "grad_norm": 0.7415739893913269, "learning_rate": 0.00019597258323913238, "loss": 1.6103, "step": 1719 }, { "epoch": 0.2764717701426562, "grad_norm": 0.7215705513954163, "learning_rate": 0.0001959678451515399, "loss": 1.6084, "step": 1720 }, { "epoch": 0.27663250954390195, "grad_norm": 0.8113778233528137, "learning_rate": 0.00019596310433584973, "loss": 1.8629, "step": 1721 }, { "epoch": 0.2767932489451477, "grad_norm": 0.7781134247779846, "learning_rate": 0.0001959583607921966, "loss": 1.2819, "step": 1722 }, { "epoch": 0.2769539883463934, "grad_norm": 0.6916248202323914, "learning_rate": 0.0001959536145207154, "loss": 1.3823, "step": 1723 }, { "epoch": 0.2771147277476391, "grad_norm": 0.6217666864395142, "learning_rate": 0.00019594886552154104, "loss": 1.3532, "step": 1724 }, { "epoch": 0.27727546714888485, "grad_norm": 0.7000452876091003, "learning_rate": 0.00019594411379480852, "loss": 1.4624, "step": 1725 }, { "epoch": 0.2774362065501306, "grad_norm": 0.7417541742324829, "learning_rate": 0.0001959393593406529, "loss": 1.3843, "step": 1726 }, { "epoch": 0.27759694595137635, "grad_norm": 0.6922398209571838, "learning_rate": 0.00019593460215920933, "loss": 1.4358, "step": 1727 }, { "epoch": 0.2777576853526221, "grad_norm": 0.7309592366218567, "learning_rate": 0.0001959298422506131, "loss": 1.3494, "step": 1728 }, { "epoch": 0.2779184247538678, "grad_norm": 0.7502502202987671, "learning_rate": 0.00019592507961499947, "loss": 1.5048, "step": 1729 }, { "epoch": 0.2780791641551135, "grad_norm": 0.7927303910255432, "learning_rate": 0.00019592031425250383, "loss": 1.7822, "step": 1730 }, { "epoch": 0.27823990355635925, "grad_norm": 0.7631879448890686, "learning_rate": 0.00019591554616326165, "loss": 1.5857, "step": 1731 }, { "epoch": 0.27840064295760497, "grad_norm": 0.732484757900238, "learning_rate": 0.00019591077534740855, "loss": 1.2208, "step": 1732 }, { "epoch": 0.2785613823588507, "grad_norm": 0.7121373414993286, "learning_rate": 0.00019590600180508002, "loss": 1.2972, "step": 1733 }, { "epoch": 0.2787221217600964, "grad_norm": 0.6996657252311707, "learning_rate": 0.00019590122553641185, "loss": 1.4378, "step": 1734 }, { "epoch": 0.2788828611613422, "grad_norm": 0.729198157787323, "learning_rate": 0.00019589644654153978, "loss": 1.7269, "step": 1735 }, { "epoch": 0.2790436005625879, "grad_norm": 0.7722163200378418, "learning_rate": 0.00019589166482059965, "loss": 1.7297, "step": 1736 }, { "epoch": 0.27920433996383365, "grad_norm": 0.7337856888771057, "learning_rate": 0.00019588688037372744, "loss": 1.4624, "step": 1737 }, { "epoch": 0.27936507936507937, "grad_norm": 0.840616762638092, "learning_rate": 0.00019588209320105913, "loss": 1.3243, "step": 1738 }, { "epoch": 0.2795258187663251, "grad_norm": 0.7321349382400513, "learning_rate": 0.00019587730330273079, "loss": 1.3312, "step": 1739 }, { "epoch": 0.2796865581675708, "grad_norm": 0.6240214109420776, "learning_rate": 0.00019587251067887856, "loss": 1.1723, "step": 1740 }, { "epoch": 0.27984729756881654, "grad_norm": 0.8392481803894043, "learning_rate": 0.0001958677153296388, "loss": 1.5762, "step": 1741 }, { "epoch": 0.28000803697006227, "grad_norm": 0.7819835543632507, "learning_rate": 0.0001958629172551477, "loss": 1.5649, "step": 1742 }, { "epoch": 0.280168776371308, "grad_norm": 0.6471049785614014, "learning_rate": 0.00019585811645554172, "loss": 1.1884, "step": 1743 }, { "epoch": 0.2803295157725538, "grad_norm": 0.8587349057197571, "learning_rate": 0.00019585331293095733, "loss": 1.6982, "step": 1744 }, { "epoch": 0.2804902551737995, "grad_norm": 0.7137045860290527, "learning_rate": 0.00019584850668153105, "loss": 1.5206, "step": 1745 }, { "epoch": 0.2806509945750452, "grad_norm": 0.8205111026763916, "learning_rate": 0.00019584369770739955, "loss": 1.6416, "step": 1746 }, { "epoch": 0.28081173397629094, "grad_norm": 0.8024592399597168, "learning_rate": 0.00019583888600869948, "loss": 1.4802, "step": 1747 }, { "epoch": 0.28097247337753667, "grad_norm": 0.7223048806190491, "learning_rate": 0.00019583407158556767, "loss": 1.6042, "step": 1748 }, { "epoch": 0.2811332127787824, "grad_norm": 1.0248265266418457, "learning_rate": 0.000195829254438141, "loss": 1.4577, "step": 1749 }, { "epoch": 0.2812939521800281, "grad_norm": 0.6822168231010437, "learning_rate": 0.00019582443456655634, "loss": 1.2886, "step": 1750 }, { "epoch": 0.28145469158127384, "grad_norm": 0.6475033760070801, "learning_rate": 0.00019581961197095074, "loss": 1.442, "step": 1751 }, { "epoch": 0.28161543098251957, "grad_norm": 0.6828655004501343, "learning_rate": 0.0001958147866514613, "loss": 1.2866, "step": 1752 }, { "epoch": 0.28177617038376535, "grad_norm": 0.8061894774436951, "learning_rate": 0.00019580995860822518, "loss": 1.4898, "step": 1753 }, { "epoch": 0.28193690978501107, "grad_norm": 0.7883077263832092, "learning_rate": 0.00019580512784137964, "loss": 1.7058, "step": 1754 }, { "epoch": 0.2820976491862568, "grad_norm": 0.798542320728302, "learning_rate": 0.000195800294351062, "loss": 1.4696, "step": 1755 }, { "epoch": 0.2822583885875025, "grad_norm": 0.7340161204338074, "learning_rate": 0.00019579545813740962, "loss": 1.5175, "step": 1756 }, { "epoch": 0.28241912798874824, "grad_norm": 0.6685411930084229, "learning_rate": 0.00019579061920056004, "loss": 1.4491, "step": 1757 }, { "epoch": 0.28257986738999397, "grad_norm": 0.7393726110458374, "learning_rate": 0.0001957857775406508, "loss": 1.4475, "step": 1758 }, { "epoch": 0.2827406067912397, "grad_norm": 0.6501362919807434, "learning_rate": 0.00019578093315781954, "loss": 1.2191, "step": 1759 }, { "epoch": 0.2829013461924854, "grad_norm": 0.817628800868988, "learning_rate": 0.00019577608605220395, "loss": 1.658, "step": 1760 }, { "epoch": 0.28306208559373114, "grad_norm": 0.710851788520813, "learning_rate": 0.00019577123622394185, "loss": 1.3806, "step": 1761 }, { "epoch": 0.2832228249949769, "grad_norm": 0.7496859431266785, "learning_rate": 0.00019576638367317108, "loss": 1.4281, "step": 1762 }, { "epoch": 0.28338356439622264, "grad_norm": 0.6502282619476318, "learning_rate": 0.00019576152840002962, "loss": 1.3932, "step": 1763 }, { "epoch": 0.28354430379746837, "grad_norm": 0.8078073859214783, "learning_rate": 0.00019575667040465543, "loss": 1.6897, "step": 1764 }, { "epoch": 0.2837050431987141, "grad_norm": 0.6786291003227234, "learning_rate": 0.00019575180968718666, "loss": 1.3004, "step": 1765 }, { "epoch": 0.2838657825999598, "grad_norm": 0.8619006872177124, "learning_rate": 0.0001957469462477615, "loss": 1.5548, "step": 1766 }, { "epoch": 0.28402652200120554, "grad_norm": 0.7469272613525391, "learning_rate": 0.00019574208008651814, "loss": 1.5135, "step": 1767 }, { "epoch": 0.28418726140245126, "grad_norm": 0.6974870562553406, "learning_rate": 0.00019573721120359495, "loss": 1.3514, "step": 1768 }, { "epoch": 0.284348000803697, "grad_norm": 0.7391507029533386, "learning_rate": 0.00019573233959913037, "loss": 1.4264, "step": 1769 }, { "epoch": 0.2845087402049427, "grad_norm": 0.7250444293022156, "learning_rate": 0.0001957274652732628, "loss": 1.6326, "step": 1770 }, { "epoch": 0.2846694796061885, "grad_norm": 0.6790604591369629, "learning_rate": 0.00019572258822613087, "loss": 1.2035, "step": 1771 }, { "epoch": 0.2848302190074342, "grad_norm": 0.7253982424736023, "learning_rate": 0.00019571770845787324, "loss": 1.6137, "step": 1772 }, { "epoch": 0.28499095840867994, "grad_norm": 0.730661153793335, "learning_rate": 0.00019571282596862857, "loss": 1.4646, "step": 1773 }, { "epoch": 0.28515169780992566, "grad_norm": 0.6931976079940796, "learning_rate": 0.0001957079407585357, "loss": 1.5267, "step": 1774 }, { "epoch": 0.2853124372111714, "grad_norm": 0.715356707572937, "learning_rate": 0.00019570305282773346, "loss": 1.2429, "step": 1775 }, { "epoch": 0.2854731766124171, "grad_norm": 0.6905343532562256, "learning_rate": 0.00019569816217636086, "loss": 1.5934, "step": 1776 }, { "epoch": 0.28563391601366284, "grad_norm": 0.7632275819778442, "learning_rate": 0.00019569326880455686, "loss": 1.5265, "step": 1777 }, { "epoch": 0.28579465541490856, "grad_norm": 0.7823320627212524, "learning_rate": 0.0001956883727124606, "loss": 1.6074, "step": 1778 }, { "epoch": 0.2859553948161543, "grad_norm": 0.7078681588172913, "learning_rate": 0.00019568347390021124, "loss": 1.4773, "step": 1779 }, { "epoch": 0.28611613421740006, "grad_norm": 0.7854443192481995, "learning_rate": 0.0001956785723679481, "loss": 1.6786, "step": 1780 }, { "epoch": 0.2862768736186458, "grad_norm": 0.6327061653137207, "learning_rate": 0.00019567366811581045, "loss": 1.2412, "step": 1781 }, { "epoch": 0.2864376130198915, "grad_norm": 0.7581678628921509, "learning_rate": 0.00019566876114393772, "loss": 1.4438, "step": 1782 }, { "epoch": 0.28659835242113724, "grad_norm": 0.7287297248840332, "learning_rate": 0.00019566385145246943, "loss": 1.5443, "step": 1783 }, { "epoch": 0.28675909182238296, "grad_norm": 0.740134596824646, "learning_rate": 0.00019565893904154513, "loss": 1.4922, "step": 1784 }, { "epoch": 0.2869198312236287, "grad_norm": 0.8680222034454346, "learning_rate": 0.00019565402391130446, "loss": 1.5302, "step": 1785 }, { "epoch": 0.2870805706248744, "grad_norm": 0.6669192910194397, "learning_rate": 0.00019564910606188716, "loss": 1.4151, "step": 1786 }, { "epoch": 0.28724131002612013, "grad_norm": 0.7341873645782471, "learning_rate": 0.000195644185493433, "loss": 1.4362, "step": 1787 }, { "epoch": 0.28740204942736586, "grad_norm": 0.7436135411262512, "learning_rate": 0.00019563926220608185, "loss": 1.5115, "step": 1788 }, { "epoch": 0.28756278882861164, "grad_norm": 0.7490450143814087, "learning_rate": 0.00019563433619997376, "loss": 1.343, "step": 1789 }, { "epoch": 0.28772352822985736, "grad_norm": 0.6228693127632141, "learning_rate": 0.00019562940747524867, "loss": 1.4655, "step": 1790 }, { "epoch": 0.2878842676311031, "grad_norm": 0.7271324992179871, "learning_rate": 0.0001956244760320467, "loss": 1.4523, "step": 1791 }, { "epoch": 0.2880450070323488, "grad_norm": 0.9495688080787659, "learning_rate": 0.00019561954187050806, "loss": 1.4711, "step": 1792 }, { "epoch": 0.28820574643359453, "grad_norm": 0.7670509815216064, "learning_rate": 0.00019561460499077302, "loss": 1.3579, "step": 1793 }, { "epoch": 0.28836648583484026, "grad_norm": 0.8466856479644775, "learning_rate": 0.0001956096653929819, "loss": 1.5816, "step": 1794 }, { "epoch": 0.288527225236086, "grad_norm": 0.7295258641242981, "learning_rate": 0.0001956047230772751, "loss": 1.5493, "step": 1795 }, { "epoch": 0.2886879646373317, "grad_norm": 0.7593228220939636, "learning_rate": 0.00019559977804379318, "loss": 1.4779, "step": 1796 }, { "epoch": 0.28884870403857743, "grad_norm": 0.7118123173713684, "learning_rate": 0.00019559483029267668, "loss": 1.3938, "step": 1797 }, { "epoch": 0.2890094434398232, "grad_norm": 0.7356933951377869, "learning_rate": 0.00019558987982406625, "loss": 1.6372, "step": 1798 }, { "epoch": 0.28917018284106893, "grad_norm": 0.9255940318107605, "learning_rate": 0.0001955849266381026, "loss": 1.761, "step": 1799 }, { "epoch": 0.28933092224231466, "grad_norm": 0.7357968091964722, "learning_rate": 0.00019557997073492653, "loss": 1.286, "step": 1800 }, { "epoch": 0.28933092224231466, "eval_loss": 1.5273641347885132, "eval_runtime": 46.2204, "eval_samples_per_second": 5.431, "eval_steps_per_second": 2.726, "step": 1800 }, { "epoch": 0.2894916616435604, "grad_norm": 0.8159569501876831, "learning_rate": 0.00019557501211467897, "loss": 1.5659, "step": 1801 }, { "epoch": 0.2896524010448061, "grad_norm": 0.7745828628540039, "learning_rate": 0.00019557005077750086, "loss": 1.5544, "step": 1802 }, { "epoch": 0.28981314044605183, "grad_norm": 0.6736118793487549, "learning_rate": 0.00019556508672353325, "loss": 1.2717, "step": 1803 }, { "epoch": 0.28997387984729756, "grad_norm": 0.88382488489151, "learning_rate": 0.00019556011995291718, "loss": 1.6508, "step": 1804 }, { "epoch": 0.2901346192485433, "grad_norm": 0.7488189935684204, "learning_rate": 0.00019555515046579394, "loss": 1.4878, "step": 1805 }, { "epoch": 0.290295358649789, "grad_norm": 0.7845442295074463, "learning_rate": 0.00019555017826230472, "loss": 1.5077, "step": 1806 }, { "epoch": 0.2904560980510348, "grad_norm": 0.8274258375167847, "learning_rate": 0.00019554520334259093, "loss": 1.689, "step": 1807 }, { "epoch": 0.2906168374522805, "grad_norm": 0.7586215138435364, "learning_rate": 0.00019554022570679393, "loss": 1.5317, "step": 1808 }, { "epoch": 0.29077757685352623, "grad_norm": 0.7927312254905701, "learning_rate": 0.00019553524535505527, "loss": 1.5224, "step": 1809 }, { "epoch": 0.29093831625477196, "grad_norm": 0.7274184226989746, "learning_rate": 0.00019553026228751654, "loss": 1.4529, "step": 1810 }, { "epoch": 0.2910990556560177, "grad_norm": 0.6740259528160095, "learning_rate": 0.00019552527650431936, "loss": 1.3914, "step": 1811 }, { "epoch": 0.2912597950572634, "grad_norm": 0.7771034836769104, "learning_rate": 0.00019552028800560542, "loss": 1.5535, "step": 1812 }, { "epoch": 0.29142053445850913, "grad_norm": 0.7485671043395996, "learning_rate": 0.00019551529679151663, "loss": 1.6479, "step": 1813 }, { "epoch": 0.29158127385975485, "grad_norm": 0.6776421070098877, "learning_rate": 0.00019551030286219477, "loss": 1.4391, "step": 1814 }, { "epoch": 0.2917420132610006, "grad_norm": 0.7143004536628723, "learning_rate": 0.0001955053062177819, "loss": 1.5013, "step": 1815 }, { "epoch": 0.29190275266224636, "grad_norm": 0.7563156485557556, "learning_rate": 0.00019550030685842003, "loss": 1.3728, "step": 1816 }, { "epoch": 0.2920634920634921, "grad_norm": 0.7423736453056335, "learning_rate": 0.00019549530478425123, "loss": 1.5132, "step": 1817 }, { "epoch": 0.2922242314647378, "grad_norm": 0.8167420029640198, "learning_rate": 0.00019549029999541774, "loss": 1.6146, "step": 1818 }, { "epoch": 0.29238497086598353, "grad_norm": 0.7630956768989563, "learning_rate": 0.0001954852924920618, "loss": 1.5854, "step": 1819 }, { "epoch": 0.29254571026722925, "grad_norm": 0.8258614540100098, "learning_rate": 0.00019548028227432584, "loss": 1.2832, "step": 1820 }, { "epoch": 0.292706449668475, "grad_norm": 0.643609344959259, "learning_rate": 0.00019547526934235218, "loss": 1.5864, "step": 1821 }, { "epoch": 0.2928671890697207, "grad_norm": 0.7913107872009277, "learning_rate": 0.0001954702536962834, "loss": 1.4032, "step": 1822 }, { "epoch": 0.2930279284709664, "grad_norm": 0.6217637658119202, "learning_rate": 0.00019546523533626203, "loss": 1.2486, "step": 1823 }, { "epoch": 0.29318866787221215, "grad_norm": 0.7212017178535461, "learning_rate": 0.00019546021426243077, "loss": 1.4242, "step": 1824 }, { "epoch": 0.29334940727345793, "grad_norm": 0.6720592975616455, "learning_rate": 0.00019545519047493233, "loss": 1.3924, "step": 1825 }, { "epoch": 0.29351014667470365, "grad_norm": 0.7858215570449829, "learning_rate": 0.00019545016397390952, "loss": 1.5014, "step": 1826 }, { "epoch": 0.2936708860759494, "grad_norm": 0.6990989446640015, "learning_rate": 0.00019544513475950527, "loss": 1.4063, "step": 1827 }, { "epoch": 0.2938316254771951, "grad_norm": 0.641735851764679, "learning_rate": 0.00019544010283186248, "loss": 1.1877, "step": 1828 }, { "epoch": 0.2939923648784408, "grad_norm": 0.9111005663871765, "learning_rate": 0.0001954350681911242, "loss": 1.5982, "step": 1829 }, { "epoch": 0.29415310427968655, "grad_norm": 0.7046544551849365, "learning_rate": 0.00019543003083743364, "loss": 1.3368, "step": 1830 }, { "epoch": 0.2943138436809323, "grad_norm": 0.7334467172622681, "learning_rate": 0.00019542499077093391, "loss": 1.3433, "step": 1831 }, { "epoch": 0.294474583082178, "grad_norm": 0.7178285121917725, "learning_rate": 0.00019541994799176832, "loss": 1.268, "step": 1832 }, { "epoch": 0.2946353224834237, "grad_norm": 0.6926621198654175, "learning_rate": 0.00019541490250008016, "loss": 1.529, "step": 1833 }, { "epoch": 0.2947960618846695, "grad_norm": 0.702636182308197, "learning_rate": 0.00019540985429601299, "loss": 1.5333, "step": 1834 }, { "epoch": 0.2949568012859152, "grad_norm": 0.6292463541030884, "learning_rate": 0.00019540480337971018, "loss": 1.4662, "step": 1835 }, { "epoch": 0.29511754068716095, "grad_norm": 0.6803421378135681, "learning_rate": 0.0001953997497513154, "loss": 1.4529, "step": 1836 }, { "epoch": 0.2952782800884067, "grad_norm": 0.8185573220252991, "learning_rate": 0.00019539469341097227, "loss": 1.7448, "step": 1837 }, { "epoch": 0.2954390194896524, "grad_norm": 0.6489648818969727, "learning_rate": 0.00019538963435882458, "loss": 1.2475, "step": 1838 }, { "epoch": 0.2955997588908981, "grad_norm": 0.7701742053031921, "learning_rate": 0.00019538457259501608, "loss": 1.6101, "step": 1839 }, { "epoch": 0.29576049829214385, "grad_norm": 0.7491713762283325, "learning_rate": 0.00019537950811969067, "loss": 1.3031, "step": 1840 }, { "epoch": 0.29592123769338957, "grad_norm": 0.684795081615448, "learning_rate": 0.00019537444093299232, "loss": 1.3382, "step": 1841 }, { "epoch": 0.2960819770946353, "grad_norm": 0.791256308555603, "learning_rate": 0.00019536937103506517, "loss": 1.3718, "step": 1842 }, { "epoch": 0.2962427164958811, "grad_norm": 0.8579769134521484, "learning_rate": 0.0001953642984260532, "loss": 1.6708, "step": 1843 }, { "epoch": 0.2964034558971268, "grad_norm": 0.8508551716804504, "learning_rate": 0.0001953592231061007, "loss": 1.7773, "step": 1844 }, { "epoch": 0.2965641952983725, "grad_norm": 0.7096222639083862, "learning_rate": 0.0001953541450753519, "loss": 1.4191, "step": 1845 }, { "epoch": 0.29672493469961825, "grad_norm": 0.768624484539032, "learning_rate": 0.0001953490643339512, "loss": 1.508, "step": 1846 }, { "epoch": 0.29688567410086397, "grad_norm": 0.7323924899101257, "learning_rate": 0.00019534398088204296, "loss": 1.5496, "step": 1847 }, { "epoch": 0.2970464135021097, "grad_norm": 0.6767928600311279, "learning_rate": 0.0001953388947197718, "loss": 1.508, "step": 1848 }, { "epoch": 0.2972071529033554, "grad_norm": 0.7929152250289917, "learning_rate": 0.00019533380584728222, "loss": 1.7328, "step": 1849 }, { "epoch": 0.29736789230460114, "grad_norm": 0.6758385896682739, "learning_rate": 0.0001953287142647189, "loss": 1.2921, "step": 1850 }, { "epoch": 0.29752863170584687, "grad_norm": 0.6301515698432922, "learning_rate": 0.0001953236199722266, "loss": 1.3995, "step": 1851 }, { "epoch": 0.29768937110709265, "grad_norm": 0.7264116406440735, "learning_rate": 0.00019531852296995008, "loss": 1.4589, "step": 1852 }, { "epoch": 0.2978501105083384, "grad_norm": 0.7495424747467041, "learning_rate": 0.0001953134232580343, "loss": 1.5624, "step": 1853 }, { "epoch": 0.2980108499095841, "grad_norm": 0.7863854169845581, "learning_rate": 0.00019530832083662422, "loss": 1.358, "step": 1854 }, { "epoch": 0.2981715893108298, "grad_norm": 7.086207866668701, "learning_rate": 0.00019530321570586482, "loss": 1.7244, "step": 1855 }, { "epoch": 0.29833232871207555, "grad_norm": 0.7535813450813293, "learning_rate": 0.0001952981078659013, "loss": 1.7134, "step": 1856 }, { "epoch": 0.29849306811332127, "grad_norm": 0.7647032737731934, "learning_rate": 0.00019529299731687883, "loss": 1.2534, "step": 1857 }, { "epoch": 0.298653807514567, "grad_norm": 0.7452340126037598, "learning_rate": 0.00019528788405894274, "loss": 1.4634, "step": 1858 }, { "epoch": 0.2988145469158127, "grad_norm": 0.8338569402694702, "learning_rate": 0.00019528276809223832, "loss": 1.5753, "step": 1859 }, { "epoch": 0.29897528631705844, "grad_norm": 0.6665852665901184, "learning_rate": 0.00019527764941691103, "loss": 1.3292, "step": 1860 }, { "epoch": 0.2991360257183042, "grad_norm": 0.764971911907196, "learning_rate": 0.00019527252803310633, "loss": 1.4407, "step": 1861 }, { "epoch": 0.29929676511954995, "grad_norm": 0.6457330584526062, "learning_rate": 0.0001952674039409699, "loss": 1.4092, "step": 1862 }, { "epoch": 0.29945750452079567, "grad_norm": 0.7332586646080017, "learning_rate": 0.00019526227714064732, "loss": 1.5506, "step": 1863 }, { "epoch": 0.2996182439220414, "grad_norm": 0.7351371645927429, "learning_rate": 0.00019525714763228439, "loss": 1.6477, "step": 1864 }, { "epoch": 0.2997789833232871, "grad_norm": 0.7257173657417297, "learning_rate": 0.0001952520154160269, "loss": 1.5408, "step": 1865 }, { "epoch": 0.29993972272453284, "grad_norm": 0.6988550424575806, "learning_rate": 0.00019524688049202076, "loss": 1.2406, "step": 1866 }, { "epoch": 0.30010046212577857, "grad_norm": 0.7477837204933167, "learning_rate": 0.0001952417428604119, "loss": 1.648, "step": 1867 }, { "epoch": 0.3002612015270243, "grad_norm": 0.7383190989494324, "learning_rate": 0.0001952366025213464, "loss": 1.4676, "step": 1868 }, { "epoch": 0.30042194092827, "grad_norm": 0.7140072584152222, "learning_rate": 0.00019523145947497042, "loss": 1.2551, "step": 1869 }, { "epoch": 0.3005826803295158, "grad_norm": 0.7595769762992859, "learning_rate": 0.00019522631372143008, "loss": 1.5639, "step": 1870 }, { "epoch": 0.3007434197307615, "grad_norm": 0.8666731119155884, "learning_rate": 0.0001952211652608717, "loss": 1.5471, "step": 1871 }, { "epoch": 0.30090415913200724, "grad_norm": 0.7966938018798828, "learning_rate": 0.00019521601409344167, "loss": 1.5138, "step": 1872 }, { "epoch": 0.30106489853325297, "grad_norm": 0.8172158002853394, "learning_rate": 0.0001952108602192864, "loss": 1.7009, "step": 1873 }, { "epoch": 0.3012256379344987, "grad_norm": 0.7705273628234863, "learning_rate": 0.0001952057036385524, "loss": 1.5722, "step": 1874 }, { "epoch": 0.3013863773357444, "grad_norm": 0.6659271717071533, "learning_rate": 0.0001952005443513862, "loss": 1.4075, "step": 1875 }, { "epoch": 0.30154711673699014, "grad_norm": 0.818271815776825, "learning_rate": 0.00019519538235793457, "loss": 1.3662, "step": 1876 }, { "epoch": 0.30170785613823586, "grad_norm": 0.8522061109542847, "learning_rate": 0.00019519021765834418, "loss": 1.5306, "step": 1877 }, { "epoch": 0.3018685955394816, "grad_norm": 0.7067330479621887, "learning_rate": 0.00019518505025276187, "loss": 1.4088, "step": 1878 }, { "epoch": 0.30202933494072737, "grad_norm": 0.8015034794807434, "learning_rate": 0.0001951798801413345, "loss": 1.3394, "step": 1879 }, { "epoch": 0.3021900743419731, "grad_norm": 0.7417199611663818, "learning_rate": 0.00019517470732420908, "loss": 1.4584, "step": 1880 }, { "epoch": 0.3023508137432188, "grad_norm": 0.6581502556800842, "learning_rate": 0.0001951695318015327, "loss": 1.2796, "step": 1881 }, { "epoch": 0.30251155314446454, "grad_norm": 0.8534151911735535, "learning_rate": 0.00019516435357345237, "loss": 1.7451, "step": 1882 }, { "epoch": 0.30267229254571026, "grad_norm": 0.8175339102745056, "learning_rate": 0.0001951591726401154, "loss": 1.6403, "step": 1883 }, { "epoch": 0.302833031946956, "grad_norm": 0.738314151763916, "learning_rate": 0.00019515398900166902, "loss": 1.4523, "step": 1884 }, { "epoch": 0.3029937713482017, "grad_norm": 0.7905836701393127, "learning_rate": 0.0001951488026582606, "loss": 1.671, "step": 1885 }, { "epoch": 0.30315451074944744, "grad_norm": 0.6882311105728149, "learning_rate": 0.00019514361361003754, "loss": 1.4489, "step": 1886 }, { "epoch": 0.3033152501506932, "grad_norm": 0.6772042512893677, "learning_rate": 0.0001951384218571474, "loss": 1.5693, "step": 1887 }, { "epoch": 0.30347598955193894, "grad_norm": 0.6368200182914734, "learning_rate": 0.00019513322739973774, "loss": 1.3179, "step": 1888 }, { "epoch": 0.30363672895318466, "grad_norm": 0.7065636515617371, "learning_rate": 0.00019512803023795622, "loss": 1.6242, "step": 1889 }, { "epoch": 0.3037974683544304, "grad_norm": 0.7708346247673035, "learning_rate": 0.0001951228303719506, "loss": 1.5562, "step": 1890 }, { "epoch": 0.3039582077556761, "grad_norm": 0.8005404472351074, "learning_rate": 0.0001951176278018687, "loss": 1.7569, "step": 1891 }, { "epoch": 0.30411894715692184, "grad_norm": 0.7585576772689819, "learning_rate": 0.0001951124225278584, "loss": 1.413, "step": 1892 }, { "epoch": 0.30427968655816756, "grad_norm": 0.837166428565979, "learning_rate": 0.00019510721455006765, "loss": 1.6201, "step": 1893 }, { "epoch": 0.3044404259594133, "grad_norm": 0.6896528005599976, "learning_rate": 0.00019510200386864453, "loss": 1.5501, "step": 1894 }, { "epoch": 0.304601165360659, "grad_norm": 0.6824984550476074, "learning_rate": 0.00019509679048373716, "loss": 1.351, "step": 1895 }, { "epoch": 0.3047619047619048, "grad_norm": 0.861028254032135, "learning_rate": 0.00019509157439549373, "loss": 2.0512, "step": 1896 }, { "epoch": 0.3049226441631505, "grad_norm": 0.7813217639923096, "learning_rate": 0.00019508635560406252, "loss": 1.4574, "step": 1897 }, { "epoch": 0.30508338356439624, "grad_norm": 0.7047039270401001, "learning_rate": 0.00019508113410959192, "loss": 1.4411, "step": 1898 }, { "epoch": 0.30524412296564196, "grad_norm": 0.7046558856964111, "learning_rate": 0.00019507590991223032, "loss": 1.3002, "step": 1899 }, { "epoch": 0.3054048623668877, "grad_norm": 0.6301303505897522, "learning_rate": 0.00019507068301212626, "loss": 1.2334, "step": 1900 }, { "epoch": 0.3055656017681334, "grad_norm": 0.7324370741844177, "learning_rate": 0.00019506545340942827, "loss": 1.4422, "step": 1901 }, { "epoch": 0.30572634116937913, "grad_norm": 0.6943685412406921, "learning_rate": 0.0001950602211042851, "loss": 1.3434, "step": 1902 }, { "epoch": 0.30588708057062486, "grad_norm": 0.7574571371078491, "learning_rate": 0.00019505498609684543, "loss": 1.4419, "step": 1903 }, { "epoch": 0.3060478199718706, "grad_norm": 0.7797039747238159, "learning_rate": 0.0001950497483872581, "loss": 1.4408, "step": 1904 }, { "epoch": 0.30620855937311636, "grad_norm": 0.6961244344711304, "learning_rate": 0.00019504450797567198, "loss": 1.3691, "step": 1905 }, { "epoch": 0.3063692987743621, "grad_norm": 0.7512276768684387, "learning_rate": 0.00019503926486223603, "loss": 1.7382, "step": 1906 }, { "epoch": 0.3065300381756078, "grad_norm": 0.7178542613983154, "learning_rate": 0.00019503401904709934, "loss": 1.2817, "step": 1907 }, { "epoch": 0.30669077757685353, "grad_norm": 0.6552561521530151, "learning_rate": 0.00019502877053041102, "loss": 1.3702, "step": 1908 }, { "epoch": 0.30685151697809926, "grad_norm": 0.7941184639930725, "learning_rate": 0.00019502351931232028, "loss": 1.6928, "step": 1909 }, { "epoch": 0.307012256379345, "grad_norm": 0.6746003031730652, "learning_rate": 0.00019501826539297638, "loss": 1.4966, "step": 1910 }, { "epoch": 0.3071729957805907, "grad_norm": 0.6826715469360352, "learning_rate": 0.00019501300877252867, "loss": 1.4228, "step": 1911 }, { "epoch": 0.30733373518183643, "grad_norm": 0.7328510284423828, "learning_rate": 0.00019500774945112656, "loss": 1.4609, "step": 1912 }, { "epoch": 0.30749447458308216, "grad_norm": 0.7704254984855652, "learning_rate": 0.00019500248742891963, "loss": 1.6022, "step": 1913 }, { "epoch": 0.30765521398432794, "grad_norm": 0.6713767647743225, "learning_rate": 0.0001949972227060574, "loss": 1.3702, "step": 1914 }, { "epoch": 0.30781595338557366, "grad_norm": 0.84490567445755, "learning_rate": 0.00019499195528268953, "loss": 1.7268, "step": 1915 }, { "epoch": 0.3079766927868194, "grad_norm": 0.7415431141853333, "learning_rate": 0.00019498668515896582, "loss": 1.628, "step": 1916 }, { "epoch": 0.3081374321880651, "grad_norm": 0.7595188617706299, "learning_rate": 0.00019498141233503604, "loss": 1.3849, "step": 1917 }, { "epoch": 0.30829817158931083, "grad_norm": 0.6734738945960999, "learning_rate": 0.00019497613681105006, "loss": 1.3686, "step": 1918 }, { "epoch": 0.30845891099055656, "grad_norm": 0.7804045677185059, "learning_rate": 0.00019497085858715793, "loss": 1.3738, "step": 1919 }, { "epoch": 0.3086196503918023, "grad_norm": 0.6939103007316589, "learning_rate": 0.00019496557766350957, "loss": 1.4033, "step": 1920 }, { "epoch": 0.308780389793048, "grad_norm": 0.7344081401824951, "learning_rate": 0.0001949602940402552, "loss": 1.5768, "step": 1921 }, { "epoch": 0.30894112919429373, "grad_norm": 0.7366380095481873, "learning_rate": 0.000194955007717545, "loss": 1.4777, "step": 1922 }, { "epoch": 0.3091018685955395, "grad_norm": 0.8381442427635193, "learning_rate": 0.00019494971869552923, "loss": 1.681, "step": 1923 }, { "epoch": 0.30926260799678523, "grad_norm": 0.7479463815689087, "learning_rate": 0.00019494442697435825, "loss": 1.2921, "step": 1924 }, { "epoch": 0.30942334739803096, "grad_norm": 0.7435135245323181, "learning_rate": 0.0001949391325541825, "loss": 1.6195, "step": 1925 }, { "epoch": 0.3095840867992767, "grad_norm": 0.733580470085144, "learning_rate": 0.00019493383543515248, "loss": 1.3114, "step": 1926 }, { "epoch": 0.3097448262005224, "grad_norm": 0.8246141672134399, "learning_rate": 0.00019492853561741872, "loss": 1.47, "step": 1927 }, { "epoch": 0.30990556560176813, "grad_norm": 0.723534882068634, "learning_rate": 0.00019492323310113196, "loss": 1.5011, "step": 1928 }, { "epoch": 0.31006630500301385, "grad_norm": 0.8053141236305237, "learning_rate": 0.0001949179278864429, "loss": 1.5821, "step": 1929 }, { "epoch": 0.3102270444042596, "grad_norm": 0.7588620185852051, "learning_rate": 0.00019491261997350235, "loss": 1.5372, "step": 1930 }, { "epoch": 0.3103877838055053, "grad_norm": 0.7031117677688599, "learning_rate": 0.00019490730936246118, "loss": 1.4278, "step": 1931 }, { "epoch": 0.3105485232067511, "grad_norm": 0.7136793732643127, "learning_rate": 0.00019490199605347037, "loss": 1.4397, "step": 1932 }, { "epoch": 0.3107092626079968, "grad_norm": 0.8097334504127502, "learning_rate": 0.000194896680046681, "loss": 1.5244, "step": 1933 }, { "epoch": 0.31087000200924253, "grad_norm": 0.7743566632270813, "learning_rate": 0.00019489136134224415, "loss": 1.6595, "step": 1934 }, { "epoch": 0.31103074141048825, "grad_norm": 0.7147049307823181, "learning_rate": 0.00019488603994031103, "loss": 1.4342, "step": 1935 }, { "epoch": 0.311191480811734, "grad_norm": 0.7129170298576355, "learning_rate": 0.00019488071584103288, "loss": 1.4328, "step": 1936 }, { "epoch": 0.3113522202129797, "grad_norm": 0.7598567605018616, "learning_rate": 0.0001948753890445611, "loss": 1.4105, "step": 1937 }, { "epoch": 0.3115129596142254, "grad_norm": 0.6877344846725464, "learning_rate": 0.0001948700595510471, "loss": 1.4353, "step": 1938 }, { "epoch": 0.31167369901547115, "grad_norm": 0.6878502368927002, "learning_rate": 0.0001948647273606424, "loss": 1.3551, "step": 1939 }, { "epoch": 0.3118344384167169, "grad_norm": 0.6353282928466797, "learning_rate": 0.0001948593924734985, "loss": 1.3206, "step": 1940 }, { "epoch": 0.31199517781796265, "grad_norm": 0.8005960583686829, "learning_rate": 0.00019485405488976713, "loss": 1.4825, "step": 1941 }, { "epoch": 0.3121559172192084, "grad_norm": 0.7243984341621399, "learning_rate": 0.00019484871460960003, "loss": 1.4579, "step": 1942 }, { "epoch": 0.3123166566204541, "grad_norm": 0.6316642761230469, "learning_rate": 0.00019484337163314894, "loss": 1.3982, "step": 1943 }, { "epoch": 0.3124773960216998, "grad_norm": 0.7245927453041077, "learning_rate": 0.00019483802596056583, "loss": 1.5038, "step": 1944 }, { "epoch": 0.31263813542294555, "grad_norm": 0.7662303447723389, "learning_rate": 0.0001948326775920026, "loss": 1.4581, "step": 1945 }, { "epoch": 0.3127988748241913, "grad_norm": 0.7946755886077881, "learning_rate": 0.00019482732652761127, "loss": 1.4406, "step": 1946 }, { "epoch": 0.312959614225437, "grad_norm": 0.7217317819595337, "learning_rate": 0.00019482197276754403, "loss": 1.2919, "step": 1947 }, { "epoch": 0.3131203536266827, "grad_norm": 0.8502883911132812, "learning_rate": 0.00019481661631195305, "loss": 1.6034, "step": 1948 }, { "epoch": 0.31328109302792845, "grad_norm": 0.5980620980262756, "learning_rate": 0.00019481125716099058, "loss": 1.2239, "step": 1949 }, { "epoch": 0.3134418324291742, "grad_norm": 0.7837242484092712, "learning_rate": 0.000194805895314809, "loss": 1.5822, "step": 1950 }, { "epoch": 0.31360257183041995, "grad_norm": 0.7406986951828003, "learning_rate": 0.00019480053077356067, "loss": 1.4287, "step": 1951 }, { "epoch": 0.3137633112316657, "grad_norm": 0.8091289401054382, "learning_rate": 0.00019479516353739814, "loss": 1.871, "step": 1952 }, { "epoch": 0.3139240506329114, "grad_norm": 0.6862747669219971, "learning_rate": 0.00019478979360647395, "loss": 1.2912, "step": 1953 }, { "epoch": 0.3140847900341571, "grad_norm": 0.6282302737236023, "learning_rate": 0.00019478442098094076, "loss": 1.4299, "step": 1954 }, { "epoch": 0.31424552943540285, "grad_norm": 0.718873143196106, "learning_rate": 0.00019477904566095132, "loss": 1.5225, "step": 1955 }, { "epoch": 0.3144062688366486, "grad_norm": 0.7712340354919434, "learning_rate": 0.00019477366764665845, "loss": 1.4091, "step": 1956 }, { "epoch": 0.3145670082378943, "grad_norm": 0.7332442998886108, "learning_rate": 0.00019476828693821503, "loss": 1.3574, "step": 1957 }, { "epoch": 0.31472774763914, "grad_norm": 0.6750005483627319, "learning_rate": 0.00019476290353577397, "loss": 1.3321, "step": 1958 }, { "epoch": 0.3148884870403858, "grad_norm": 0.7110670208930969, "learning_rate": 0.0001947575174394883, "loss": 1.3359, "step": 1959 }, { "epoch": 0.3150492264416315, "grad_norm": 0.7288092374801636, "learning_rate": 0.00019475212864951118, "loss": 1.6711, "step": 1960 }, { "epoch": 0.31520996584287725, "grad_norm": 0.8349742293357849, "learning_rate": 0.00019474673716599582, "loss": 1.5136, "step": 1961 }, { "epoch": 0.315370705244123, "grad_norm": 0.6484586596488953, "learning_rate": 0.00019474134298909542, "loss": 1.3504, "step": 1962 }, { "epoch": 0.3155314446453687, "grad_norm": 0.7560660243034363, "learning_rate": 0.00019473594611896334, "loss": 1.5919, "step": 1963 }, { "epoch": 0.3156921840466144, "grad_norm": 0.715729296207428, "learning_rate": 0.000194730546555753, "loss": 1.5641, "step": 1964 }, { "epoch": 0.31585292344786015, "grad_norm": 0.7323583960533142, "learning_rate": 0.0001947251442996179, "loss": 1.3648, "step": 1965 }, { "epoch": 0.31601366284910587, "grad_norm": 0.8011003732681274, "learning_rate": 0.00019471973935071164, "loss": 1.4948, "step": 1966 }, { "epoch": 0.3161744022503516, "grad_norm": 0.7884029150009155, "learning_rate": 0.0001947143317091878, "loss": 1.4809, "step": 1967 }, { "epoch": 0.3163351416515974, "grad_norm": 0.6941714882850647, "learning_rate": 0.00019470892137520016, "loss": 1.489, "step": 1968 }, { "epoch": 0.3164958810528431, "grad_norm": 0.7309722900390625, "learning_rate": 0.0001947035083489025, "loss": 1.4566, "step": 1969 }, { "epoch": 0.3166566204540888, "grad_norm": 0.7765544056892395, "learning_rate": 0.0001946980926304487, "loss": 1.657, "step": 1970 }, { "epoch": 0.31681735985533455, "grad_norm": 0.7176536917686462, "learning_rate": 0.00019469267421999274, "loss": 1.609, "step": 1971 }, { "epoch": 0.31697809925658027, "grad_norm": 0.8065797090530396, "learning_rate": 0.0001946872531176886, "loss": 1.4743, "step": 1972 }, { "epoch": 0.317138838657826, "grad_norm": 0.7637238502502441, "learning_rate": 0.0001946818293236904, "loss": 1.3319, "step": 1973 }, { "epoch": 0.3172995780590717, "grad_norm": 0.8534308671951294, "learning_rate": 0.0001946764028381524, "loss": 1.8366, "step": 1974 }, { "epoch": 0.31746031746031744, "grad_norm": 0.7124952673912048, "learning_rate": 0.00019467097366122871, "loss": 1.3296, "step": 1975 }, { "epoch": 0.31762105686156317, "grad_norm": 0.6884899139404297, "learning_rate": 0.0001946655417930738, "loss": 1.4174, "step": 1976 }, { "epoch": 0.31778179626280895, "grad_norm": 0.7411704063415527, "learning_rate": 0.00019466010723384201, "loss": 1.408, "step": 1977 }, { "epoch": 0.31794253566405467, "grad_norm": 0.8043212294578552, "learning_rate": 0.00019465466998368788, "loss": 1.5707, "step": 1978 }, { "epoch": 0.3181032750653004, "grad_norm": 0.7290241718292236, "learning_rate": 0.00019464923004276595, "loss": 1.6667, "step": 1979 }, { "epoch": 0.3182640144665461, "grad_norm": 0.7794786691665649, "learning_rate": 0.00019464378741123085, "loss": 1.5291, "step": 1980 }, { "epoch": 0.31842475386779184, "grad_norm": 0.8076086640357971, "learning_rate": 0.00019463834208923727, "loss": 1.6548, "step": 1981 }, { "epoch": 0.31858549326903757, "grad_norm": 0.7052130699157715, "learning_rate": 0.0001946328940769401, "loss": 1.3053, "step": 1982 }, { "epoch": 0.3187462326702833, "grad_norm": 0.7842814326286316, "learning_rate": 0.00019462744337449413, "loss": 1.6753, "step": 1983 }, { "epoch": 0.318906972071529, "grad_norm": 0.8086152672767639, "learning_rate": 0.00019462198998205433, "loss": 1.7925, "step": 1984 }, { "epoch": 0.31906771147277474, "grad_norm": 0.6950156092643738, "learning_rate": 0.00019461653389977576, "loss": 1.4027, "step": 1985 }, { "epoch": 0.3192284508740205, "grad_norm": 0.6914092898368835, "learning_rate": 0.00019461107512781347, "loss": 1.5741, "step": 1986 }, { "epoch": 0.31938919027526624, "grad_norm": 0.7415165305137634, "learning_rate": 0.00019460561366632266, "loss": 1.4936, "step": 1987 }, { "epoch": 0.31954992967651197, "grad_norm": 0.7181268930435181, "learning_rate": 0.00019460014951545857, "loss": 1.4924, "step": 1988 }, { "epoch": 0.3197106690777577, "grad_norm": 0.7184364795684814, "learning_rate": 0.00019459468267537657, "loss": 1.6843, "step": 1989 }, { "epoch": 0.3198714084790034, "grad_norm": 0.7725842595100403, "learning_rate": 0.00019458921314623203, "loss": 1.639, "step": 1990 }, { "epoch": 0.32003214788024914, "grad_norm": 0.7753854990005493, "learning_rate": 0.00019458374092818045, "loss": 1.5381, "step": 1991 }, { "epoch": 0.32019288728149486, "grad_norm": 0.7399712800979614, "learning_rate": 0.0001945782660213774, "loss": 1.5562, "step": 1992 }, { "epoch": 0.3203536266827406, "grad_norm": 0.7187180519104004, "learning_rate": 0.00019457278842597845, "loss": 1.4769, "step": 1993 }, { "epoch": 0.3205143660839863, "grad_norm": 0.7596205472946167, "learning_rate": 0.0001945673081421394, "loss": 1.5454, "step": 1994 }, { "epoch": 0.3206751054852321, "grad_norm": 0.6996777653694153, "learning_rate": 0.000194561825170016, "loss": 1.6346, "step": 1995 }, { "epoch": 0.3208358448864778, "grad_norm": 0.6790615320205688, "learning_rate": 0.0001945563395097641, "loss": 1.3314, "step": 1996 }, { "epoch": 0.32099658428772354, "grad_norm": 0.8039390444755554, "learning_rate": 0.00019455085116153967, "loss": 1.4235, "step": 1997 }, { "epoch": 0.32115732368896927, "grad_norm": 0.8522948026657104, "learning_rate": 0.00019454536012549875, "loss": 1.4465, "step": 1998 }, { "epoch": 0.321318063090215, "grad_norm": 0.7068861126899719, "learning_rate": 0.00019453986640179737, "loss": 1.3643, "step": 1999 }, { "epoch": 0.3214788024914607, "grad_norm": 0.6214325428009033, "learning_rate": 0.00019453436999059172, "loss": 1.2528, "step": 2000 }, { "epoch": 0.3214788024914607, "eval_loss": 1.5127888917922974, "eval_runtime": 46.2201, "eval_samples_per_second": 5.431, "eval_steps_per_second": 2.726, "step": 2000 }, { "epoch": 0.32163954189270644, "grad_norm": 0.7505484223365784, "learning_rate": 0.00019452887089203806, "loss": 1.4404, "step": 2001 }, { "epoch": 0.32180028129395216, "grad_norm": 0.6439511775970459, "learning_rate": 0.00019452336910629275, "loss": 1.1809, "step": 2002 }, { "epoch": 0.3219610206951979, "grad_norm": 0.8749121427536011, "learning_rate": 0.00019451786463351215, "loss": 1.7298, "step": 2003 }, { "epoch": 0.32212176009644367, "grad_norm": 0.7245616912841797, "learning_rate": 0.00019451235747385272, "loss": 1.6238, "step": 2004 }, { "epoch": 0.3222824994976894, "grad_norm": 0.6500371098518372, "learning_rate": 0.00019450684762747106, "loss": 1.3656, "step": 2005 }, { "epoch": 0.3224432388989351, "grad_norm": 0.6778035759925842, "learning_rate": 0.00019450133509452372, "loss": 1.2043, "step": 2006 }, { "epoch": 0.32260397830018084, "grad_norm": 0.7743569016456604, "learning_rate": 0.00019449581987516753, "loss": 1.6449, "step": 2007 }, { "epoch": 0.32276471770142656, "grad_norm": 0.7830923199653625, "learning_rate": 0.00019449030196955918, "loss": 1.3967, "step": 2008 }, { "epoch": 0.3229254571026723, "grad_norm": 0.7440866231918335, "learning_rate": 0.00019448478137785554, "loss": 1.6733, "step": 2009 }, { "epoch": 0.323086196503918, "grad_norm": 0.676243245601654, "learning_rate": 0.00019447925810021357, "loss": 1.3699, "step": 2010 }, { "epoch": 0.32324693590516373, "grad_norm": 0.8330350518226624, "learning_rate": 0.00019447373213679026, "loss": 1.708, "step": 2011 }, { "epoch": 0.32340767530640946, "grad_norm": 0.7480425238609314, "learning_rate": 0.0001944682034877427, "loss": 1.4225, "step": 2012 }, { "epoch": 0.32356841470765524, "grad_norm": 0.7006250619888306, "learning_rate": 0.0001944626721532281, "loss": 1.3687, "step": 2013 }, { "epoch": 0.32372915410890096, "grad_norm": 1.0370951890945435, "learning_rate": 0.00019445713813340363, "loss": 1.5836, "step": 2014 }, { "epoch": 0.3238898935101467, "grad_norm": 0.7768998742103577, "learning_rate": 0.00019445160142842665, "loss": 1.5512, "step": 2015 }, { "epoch": 0.3240506329113924, "grad_norm": 0.8018186092376709, "learning_rate": 0.00019444606203845453, "loss": 1.5544, "step": 2016 }, { "epoch": 0.32421137231263814, "grad_norm": 0.6360357403755188, "learning_rate": 0.00019444051996364477, "loss": 1.2352, "step": 2017 }, { "epoch": 0.32437211171388386, "grad_norm": 0.8317101001739502, "learning_rate": 0.00019443497520415484, "loss": 1.2432, "step": 2018 }, { "epoch": 0.3245328511151296, "grad_norm": 0.7960757613182068, "learning_rate": 0.00019442942776014247, "loss": 1.5486, "step": 2019 }, { "epoch": 0.3246935905163753, "grad_norm": 0.8305114507675171, "learning_rate": 0.0001944238776317653, "loss": 1.5988, "step": 2020 }, { "epoch": 0.32485432991762103, "grad_norm": 0.7590983510017395, "learning_rate": 0.0001944183248191811, "loss": 1.5056, "step": 2021 }, { "epoch": 0.3250150693188668, "grad_norm": 0.7335648536682129, "learning_rate": 0.00019441276932254773, "loss": 1.5907, "step": 2022 }, { "epoch": 0.32517580872011254, "grad_norm": 0.7400011420249939, "learning_rate": 0.00019440721114202312, "loss": 1.545, "step": 2023 }, { "epoch": 0.32533654812135826, "grad_norm": 0.6664589643478394, "learning_rate": 0.00019440165027776528, "loss": 1.2951, "step": 2024 }, { "epoch": 0.325497287522604, "grad_norm": 0.7551453709602356, "learning_rate": 0.0001943960867299323, "loss": 1.6502, "step": 2025 }, { "epoch": 0.3256580269238497, "grad_norm": 0.7481452226638794, "learning_rate": 0.00019439052049868228, "loss": 1.5212, "step": 2026 }, { "epoch": 0.32581876632509543, "grad_norm": 0.6871473789215088, "learning_rate": 0.0001943849515841735, "loss": 1.4507, "step": 2027 }, { "epoch": 0.32597950572634116, "grad_norm": 0.6986726522445679, "learning_rate": 0.00019437937998656427, "loss": 1.3945, "step": 2028 }, { "epoch": 0.3261402451275869, "grad_norm": 0.8094003796577454, "learning_rate": 0.00019437380570601297, "loss": 1.4512, "step": 2029 }, { "epoch": 0.3263009845288326, "grad_norm": 0.8168009519577026, "learning_rate": 0.00019436822874267803, "loss": 1.4616, "step": 2030 }, { "epoch": 0.3264617239300784, "grad_norm": 0.6345058679580688, "learning_rate": 0.00019436264909671803, "loss": 1.2907, "step": 2031 }, { "epoch": 0.3266224633313241, "grad_norm": 0.6828338503837585, "learning_rate": 0.00019435706676829156, "loss": 1.3405, "step": 2032 }, { "epoch": 0.32678320273256983, "grad_norm": 0.6946799159049988, "learning_rate": 0.00019435148175755735, "loss": 1.443, "step": 2033 }, { "epoch": 0.32694394213381556, "grad_norm": 0.7313020825386047, "learning_rate": 0.00019434589406467409, "loss": 1.5371, "step": 2034 }, { "epoch": 0.3271046815350613, "grad_norm": 0.7641522288322449, "learning_rate": 0.0001943403036898007, "loss": 1.5516, "step": 2035 }, { "epoch": 0.327265420936307, "grad_norm": 0.6273677945137024, "learning_rate": 0.00019433471063309604, "loss": 1.2381, "step": 2036 }, { "epoch": 0.32742616033755273, "grad_norm": 0.6714562773704529, "learning_rate": 0.00019432911489471915, "loss": 1.3644, "step": 2037 }, { "epoch": 0.32758689973879845, "grad_norm": 0.7753521203994751, "learning_rate": 0.00019432351647482904, "loss": 1.687, "step": 2038 }, { "epoch": 0.3277476391400442, "grad_norm": 0.801613450050354, "learning_rate": 0.00019431791537358493, "loss": 1.4345, "step": 2039 }, { "epoch": 0.32790837854128996, "grad_norm": 0.6919375061988831, "learning_rate": 0.000194312311591146, "loss": 1.564, "step": 2040 }, { "epoch": 0.3280691179425357, "grad_norm": 0.670372486114502, "learning_rate": 0.00019430670512767158, "loss": 1.2775, "step": 2041 }, { "epoch": 0.3282298573437814, "grad_norm": 0.8679447770118713, "learning_rate": 0.000194301095983321, "loss": 1.6219, "step": 2042 }, { "epoch": 0.32839059674502713, "grad_norm": 0.7232159972190857, "learning_rate": 0.00019429548415825378, "loss": 1.5247, "step": 2043 }, { "epoch": 0.32855133614627285, "grad_norm": 0.7579728364944458, "learning_rate": 0.00019428986965262937, "loss": 1.5867, "step": 2044 }, { "epoch": 0.3287120755475186, "grad_norm": 0.7473894357681274, "learning_rate": 0.00019428425246660743, "loss": 1.5542, "step": 2045 }, { "epoch": 0.3288728149487643, "grad_norm": 0.7803595662117004, "learning_rate": 0.00019427863260034763, "loss": 1.874, "step": 2046 }, { "epoch": 0.32903355435001, "grad_norm": 0.6553272604942322, "learning_rate": 0.0001942730100540097, "loss": 1.3356, "step": 2047 }, { "epoch": 0.32919429375125575, "grad_norm": 0.7175074815750122, "learning_rate": 0.00019426738482775351, "loss": 1.3497, "step": 2048 }, { "epoch": 0.32935503315250153, "grad_norm": 0.6783370971679688, "learning_rate": 0.00019426175692173895, "loss": 1.4134, "step": 2049 }, { "epoch": 0.32951577255374725, "grad_norm": 0.7931109666824341, "learning_rate": 0.000194256126336126, "loss": 1.5206, "step": 2050 }, { "epoch": 0.329676511954993, "grad_norm": 0.7344026565551758, "learning_rate": 0.00019425049307107475, "loss": 1.6285, "step": 2051 }, { "epoch": 0.3298372513562387, "grad_norm": 0.7932571768760681, "learning_rate": 0.0001942448571267453, "loss": 1.4662, "step": 2052 }, { "epoch": 0.3299979907574844, "grad_norm": 0.688258945941925, "learning_rate": 0.00019423921850329792, "loss": 1.6924, "step": 2053 }, { "epoch": 0.33015873015873015, "grad_norm": 0.6177288889884949, "learning_rate": 0.00019423357720089283, "loss": 1.3753, "step": 2054 }, { "epoch": 0.3303194695599759, "grad_norm": 0.8088375329971313, "learning_rate": 0.00019422793321969043, "loss": 1.5282, "step": 2055 }, { "epoch": 0.3304802089612216, "grad_norm": 0.9034358263015747, "learning_rate": 0.00019422228655985118, "loss": 1.7698, "step": 2056 }, { "epoch": 0.3306409483624673, "grad_norm": 0.7226942777633667, "learning_rate": 0.00019421663722153556, "loss": 1.5758, "step": 2057 }, { "epoch": 0.3308016877637131, "grad_norm": 0.6641901135444641, "learning_rate": 0.0001942109852049042, "loss": 1.4353, "step": 2058 }, { "epoch": 0.33096242716495883, "grad_norm": 0.688288152217865, "learning_rate": 0.00019420533051011776, "loss": 1.4219, "step": 2059 }, { "epoch": 0.33112316656620455, "grad_norm": 0.7629148960113525, "learning_rate": 0.00019419967313733698, "loss": 1.4231, "step": 2060 }, { "epoch": 0.3312839059674503, "grad_norm": 0.7656545639038086, "learning_rate": 0.0001941940130867227, "loss": 1.4985, "step": 2061 }, { "epoch": 0.331444645368696, "grad_norm": 0.609709620475769, "learning_rate": 0.0001941883503584358, "loss": 1.3435, "step": 2062 }, { "epoch": 0.3316053847699417, "grad_norm": 0.8883032202720642, "learning_rate": 0.00019418268495263724, "loss": 1.5118, "step": 2063 }, { "epoch": 0.33176612417118745, "grad_norm": 0.716799259185791, "learning_rate": 0.00019417701686948811, "loss": 1.3509, "step": 2064 }, { "epoch": 0.3319268635724332, "grad_norm": 0.7871211171150208, "learning_rate": 0.00019417134610914954, "loss": 1.5786, "step": 2065 }, { "epoch": 0.3320876029736789, "grad_norm": 0.7642374634742737, "learning_rate": 0.0001941656726717827, "loss": 1.3267, "step": 2066 }, { "epoch": 0.3322483423749247, "grad_norm": 0.7114086747169495, "learning_rate": 0.0001941599965575489, "loss": 1.3739, "step": 2067 }, { "epoch": 0.3324090817761704, "grad_norm": 0.7489867806434631, "learning_rate": 0.00019415431776660945, "loss": 1.5421, "step": 2068 }, { "epoch": 0.3325698211774161, "grad_norm": 0.7904501557350159, "learning_rate": 0.00019414863629912586, "loss": 1.5762, "step": 2069 }, { "epoch": 0.33273056057866185, "grad_norm": 0.7209787368774414, "learning_rate": 0.00019414295215525957, "loss": 1.317, "step": 2070 }, { "epoch": 0.3328912999799076, "grad_norm": 0.7359424829483032, "learning_rate": 0.00019413726533517217, "loss": 1.6589, "step": 2071 }, { "epoch": 0.3330520393811533, "grad_norm": 0.7597746253013611, "learning_rate": 0.00019413157583902536, "loss": 1.4738, "step": 2072 }, { "epoch": 0.333212778782399, "grad_norm": 0.816109836101532, "learning_rate": 0.00019412588366698086, "loss": 1.5473, "step": 2073 }, { "epoch": 0.33337351818364475, "grad_norm": 0.708386242389679, "learning_rate": 0.00019412018881920045, "loss": 1.1374, "step": 2074 }, { "epoch": 0.33353425758489047, "grad_norm": 0.6714842319488525, "learning_rate": 0.00019411449129584605, "loss": 1.2333, "step": 2075 }, { "epoch": 0.33369499698613625, "grad_norm": 0.7182421088218689, "learning_rate": 0.00019410879109707966, "loss": 1.6015, "step": 2076 }, { "epoch": 0.333855736387382, "grad_norm": 0.8135740160942078, "learning_rate": 0.00019410308822306324, "loss": 1.5191, "step": 2077 }, { "epoch": 0.3340164757886277, "grad_norm": 0.7274398803710938, "learning_rate": 0.00019409738267395894, "loss": 1.2882, "step": 2078 }, { "epoch": 0.3341772151898734, "grad_norm": 0.8338702917098999, "learning_rate": 0.000194091674449929, "loss": 1.4068, "step": 2079 }, { "epoch": 0.33433795459111915, "grad_norm": 0.7535920739173889, "learning_rate": 0.00019408596355113562, "loss": 1.5902, "step": 2080 }, { "epoch": 0.33449869399236487, "grad_norm": 0.9295662045478821, "learning_rate": 0.00019408024997774116, "loss": 1.5352, "step": 2081 }, { "epoch": 0.3346594333936106, "grad_norm": 0.7534677386283875, "learning_rate": 0.0001940745337299081, "loss": 1.3696, "step": 2082 }, { "epoch": 0.3348201727948563, "grad_norm": 0.7092940211296082, "learning_rate": 0.00019406881480779883, "loss": 1.5515, "step": 2083 }, { "epoch": 0.33498091219610204, "grad_norm": 0.7924948334693909, "learning_rate": 0.00019406309321157603, "loss": 1.6316, "step": 2084 }, { "epoch": 0.3351416515973478, "grad_norm": 0.7275951504707336, "learning_rate": 0.0001940573689414023, "loss": 1.3749, "step": 2085 }, { "epoch": 0.33530239099859355, "grad_norm": 0.8009904026985168, "learning_rate": 0.00019405164199744036, "loss": 1.4478, "step": 2086 }, { "epoch": 0.33546313039983927, "grad_norm": 0.7103733420372009, "learning_rate": 0.00019404591237985304, "loss": 1.5939, "step": 2087 }, { "epoch": 0.335623869801085, "grad_norm": 0.7061387896537781, "learning_rate": 0.00019404018008880317, "loss": 1.4456, "step": 2088 }, { "epoch": 0.3357846092023307, "grad_norm": 0.7599693536758423, "learning_rate": 0.00019403444512445375, "loss": 1.345, "step": 2089 }, { "epoch": 0.33594534860357644, "grad_norm": 0.7330188155174255, "learning_rate": 0.00019402870748696778, "loss": 1.4323, "step": 2090 }, { "epoch": 0.33610608800482217, "grad_norm": 0.9368942379951477, "learning_rate": 0.00019402296717650836, "loss": 1.6319, "step": 2091 }, { "epoch": 0.3362668274060679, "grad_norm": 0.747577428817749, "learning_rate": 0.0001940172241932387, "loss": 1.5652, "step": 2092 }, { "epoch": 0.3364275668073136, "grad_norm": 0.6478877663612366, "learning_rate": 0.00019401147853732206, "loss": 1.275, "step": 2093 }, { "epoch": 0.3365883062085594, "grad_norm": 0.6463618874549866, "learning_rate": 0.00019400573020892173, "loss": 1.1353, "step": 2094 }, { "epoch": 0.3367490456098051, "grad_norm": 0.8834207057952881, "learning_rate": 0.00019399997920820113, "loss": 1.8025, "step": 2095 }, { "epoch": 0.33690978501105084, "grad_norm": 0.6607515811920166, "learning_rate": 0.00019399422553532377, "loss": 1.3378, "step": 2096 }, { "epoch": 0.33707052441229657, "grad_norm": 0.7668684124946594, "learning_rate": 0.00019398846919045323, "loss": 1.528, "step": 2097 }, { "epoch": 0.3372312638135423, "grad_norm": 0.8688121438026428, "learning_rate": 0.00019398271017375307, "loss": 1.4391, "step": 2098 }, { "epoch": 0.337392003214788, "grad_norm": 0.8070032596588135, "learning_rate": 0.0001939769484853871, "loss": 1.4995, "step": 2099 }, { "epoch": 0.33755274261603374, "grad_norm": 0.738919734954834, "learning_rate": 0.000193971184125519, "loss": 1.532, "step": 2100 }, { "epoch": 0.33771348201727946, "grad_norm": 0.7620943188667297, "learning_rate": 0.00019396541709431272, "loss": 1.5063, "step": 2101 }, { "epoch": 0.3378742214185252, "grad_norm": 0.8296343684196472, "learning_rate": 0.0001939596473919322, "loss": 1.6175, "step": 2102 }, { "epoch": 0.33803496081977097, "grad_norm": 0.7092347145080566, "learning_rate": 0.0001939538750185414, "loss": 1.4605, "step": 2103 }, { "epoch": 0.3381957002210167, "grad_norm": 0.7562525868415833, "learning_rate": 0.00019394809997430444, "loss": 1.4543, "step": 2104 }, { "epoch": 0.3383564396222624, "grad_norm": 0.8061599731445312, "learning_rate": 0.0001939423222593855, "loss": 1.4537, "step": 2105 }, { "epoch": 0.33851717902350814, "grad_norm": 0.8124790787696838, "learning_rate": 0.0001939365418739488, "loss": 1.5694, "step": 2106 }, { "epoch": 0.33867791842475387, "grad_norm": 0.9142446517944336, "learning_rate": 0.0001939307588181587, "loss": 1.5766, "step": 2107 }, { "epoch": 0.3388386578259996, "grad_norm": 0.8223974108695984, "learning_rate": 0.00019392497309217953, "loss": 1.8317, "step": 2108 }, { "epoch": 0.3389993972272453, "grad_norm": 0.7288771867752075, "learning_rate": 0.00019391918469617583, "loss": 1.438, "step": 2109 }, { "epoch": 0.33916013662849104, "grad_norm": 0.8331245183944702, "learning_rate": 0.00019391339363031214, "loss": 1.4539, "step": 2110 }, { "epoch": 0.33932087602973676, "grad_norm": 0.7558780908584595, "learning_rate": 0.000193907599894753, "loss": 1.5721, "step": 2111 }, { "epoch": 0.33948161543098254, "grad_norm": 0.8709877133369446, "learning_rate": 0.00019390180348966322, "loss": 1.8281, "step": 2112 }, { "epoch": 0.33964235483222827, "grad_norm": 0.723092794418335, "learning_rate": 0.00019389600441520755, "loss": 1.2927, "step": 2113 }, { "epoch": 0.339803094233474, "grad_norm": 0.6817798614501953, "learning_rate": 0.00019389020267155077, "loss": 1.455, "step": 2114 }, { "epoch": 0.3399638336347197, "grad_norm": 0.7030403017997742, "learning_rate": 0.0001938843982588579, "loss": 1.4019, "step": 2115 }, { "epoch": 0.34012457303596544, "grad_norm": 0.8767930269241333, "learning_rate": 0.0001938785911772939, "loss": 1.5713, "step": 2116 }, { "epoch": 0.34028531243721116, "grad_norm": 0.7330083847045898, "learning_rate": 0.00019387278142702385, "loss": 1.3688, "step": 2117 }, { "epoch": 0.3404460518384569, "grad_norm": 0.8090125918388367, "learning_rate": 0.0001938669690082129, "loss": 1.4336, "step": 2118 }, { "epoch": 0.3406067912397026, "grad_norm": 0.7080844044685364, "learning_rate": 0.00019386115392102627, "loss": 1.3399, "step": 2119 }, { "epoch": 0.34076753064094834, "grad_norm": 0.7361130714416504, "learning_rate": 0.0001938553361656293, "loss": 1.6709, "step": 2120 }, { "epoch": 0.3409282700421941, "grad_norm": 0.7920860648155212, "learning_rate": 0.00019384951574218735, "loss": 1.4049, "step": 2121 }, { "epoch": 0.34108900944343984, "grad_norm": 0.8998194932937622, "learning_rate": 0.00019384369265086588, "loss": 1.5314, "step": 2122 }, { "epoch": 0.34124974884468556, "grad_norm": 0.7443515658378601, "learning_rate": 0.00019383786689183046, "loss": 1.5369, "step": 2123 }, { "epoch": 0.3414104882459313, "grad_norm": 0.6997665166854858, "learning_rate": 0.00019383203846524662, "loss": 1.3659, "step": 2124 }, { "epoch": 0.341571227647177, "grad_norm": 0.6972091794013977, "learning_rate": 0.0001938262073712801, "loss": 1.3672, "step": 2125 }, { "epoch": 0.34173196704842274, "grad_norm": 0.675041913986206, "learning_rate": 0.0001938203736100967, "loss": 1.4124, "step": 2126 }, { "epoch": 0.34189270644966846, "grad_norm": 0.8498409390449524, "learning_rate": 0.00019381453718186217, "loss": 1.5171, "step": 2127 }, { "epoch": 0.3420534458509142, "grad_norm": 0.7085187435150146, "learning_rate": 0.0001938086980867425, "loss": 1.342, "step": 2128 }, { "epoch": 0.3422141852521599, "grad_norm": 0.7532504200935364, "learning_rate": 0.00019380285632490364, "loss": 1.5053, "step": 2129 }, { "epoch": 0.3423749246534057, "grad_norm": 0.720268964767456, "learning_rate": 0.00019379701189651164, "loss": 1.4802, "step": 2130 }, { "epoch": 0.3425356640546514, "grad_norm": 0.787757396697998, "learning_rate": 0.0001937911648017327, "loss": 1.427, "step": 2131 }, { "epoch": 0.34269640345589714, "grad_norm": 0.7282241582870483, "learning_rate": 0.00019378531504073295, "loss": 1.4105, "step": 2132 }, { "epoch": 0.34285714285714286, "grad_norm": 0.8171801567077637, "learning_rate": 0.00019377946261367877, "loss": 1.4436, "step": 2133 }, { "epoch": 0.3430178822583886, "grad_norm": 0.7977651357650757, "learning_rate": 0.00019377360752073647, "loss": 1.5832, "step": 2134 }, { "epoch": 0.3431786216596343, "grad_norm": 0.6639115810394287, "learning_rate": 0.0001937677497620725, "loss": 1.2875, "step": 2135 }, { "epoch": 0.34333936106088003, "grad_norm": 0.7354109883308411, "learning_rate": 0.00019376188933785345, "loss": 1.3889, "step": 2136 }, { "epoch": 0.34350010046212576, "grad_norm": 0.7293756604194641, "learning_rate": 0.0001937560262482458, "loss": 1.4332, "step": 2137 }, { "epoch": 0.34366083986337154, "grad_norm": 0.7222396731376648, "learning_rate": 0.0001937501604934163, "loss": 1.5018, "step": 2138 }, { "epoch": 0.34382157926461726, "grad_norm": 0.7465559244155884, "learning_rate": 0.00019374429207353168, "loss": 1.5506, "step": 2139 }, { "epoch": 0.343982318665863, "grad_norm": 0.7535027265548706, "learning_rate": 0.00019373842098875877, "loss": 1.375, "step": 2140 }, { "epoch": 0.3441430580671087, "grad_norm": 0.8267285227775574, "learning_rate": 0.0001937325472392644, "loss": 1.4734, "step": 2141 }, { "epoch": 0.34430379746835443, "grad_norm": 0.7610061168670654, "learning_rate": 0.00019372667082521568, "loss": 1.4622, "step": 2142 }, { "epoch": 0.34446453686960016, "grad_norm": 0.7598025798797607, "learning_rate": 0.00019372079174677953, "loss": 1.4476, "step": 2143 }, { "epoch": 0.3446252762708459, "grad_norm": 0.8619739413261414, "learning_rate": 0.00019371491000412316, "loss": 1.5087, "step": 2144 }, { "epoch": 0.3447860156720916, "grad_norm": 0.8524287939071655, "learning_rate": 0.0001937090255974137, "loss": 1.6512, "step": 2145 }, { "epoch": 0.34494675507333733, "grad_norm": 0.7970044612884521, "learning_rate": 0.00019370313852681853, "loss": 1.3124, "step": 2146 }, { "epoch": 0.3451074944745831, "grad_norm": 0.8038520812988281, "learning_rate": 0.00019369724879250488, "loss": 1.5086, "step": 2147 }, { "epoch": 0.34526823387582883, "grad_norm": 0.759846568107605, "learning_rate": 0.00019369135639464027, "loss": 1.4324, "step": 2148 }, { "epoch": 0.34542897327707456, "grad_norm": 0.6999998092651367, "learning_rate": 0.00019368546133339214, "loss": 1.3576, "step": 2149 }, { "epoch": 0.3455897126783203, "grad_norm": 0.7629761099815369, "learning_rate": 0.00019367956360892815, "loss": 1.5906, "step": 2150 }, { "epoch": 0.345750452079566, "grad_norm": 0.731194257736206, "learning_rate": 0.00019367366322141586, "loss": 1.5683, "step": 2151 }, { "epoch": 0.34591119148081173, "grad_norm": 0.7427955269813538, "learning_rate": 0.00019366776017102307, "loss": 1.5467, "step": 2152 }, { "epoch": 0.34607193088205745, "grad_norm": 0.6888127326965332, "learning_rate": 0.0001936618544579176, "loss": 1.4103, "step": 2153 }, { "epoch": 0.3462326702833032, "grad_norm": 0.8561480045318604, "learning_rate": 0.00019365594608226726, "loss": 1.542, "step": 2154 }, { "epoch": 0.3463934096845489, "grad_norm": 0.7888673543930054, "learning_rate": 0.00019365003504424007, "loss": 1.6185, "step": 2155 }, { "epoch": 0.3465541490857947, "grad_norm": 0.7830891609191895, "learning_rate": 0.00019364412134400402, "loss": 1.4856, "step": 2156 }, { "epoch": 0.3467148884870404, "grad_norm": 0.9270341992378235, "learning_rate": 0.00019363820498172728, "loss": 1.7748, "step": 2157 }, { "epoch": 0.34687562788828613, "grad_norm": 0.6934037208557129, "learning_rate": 0.00019363228595757796, "loss": 1.4616, "step": 2158 }, { "epoch": 0.34703636728953186, "grad_norm": 0.7118710279464722, "learning_rate": 0.00019362636427172435, "loss": 1.4431, "step": 2159 }, { "epoch": 0.3471971066907776, "grad_norm": 0.7213820219039917, "learning_rate": 0.00019362043992433484, "loss": 1.3547, "step": 2160 }, { "epoch": 0.3473578460920233, "grad_norm": 0.6597811579704285, "learning_rate": 0.0001936145129155778, "loss": 1.5702, "step": 2161 }, { "epoch": 0.347518585493269, "grad_norm": 0.7204716205596924, "learning_rate": 0.00019360858324562167, "loss": 1.5093, "step": 2162 }, { "epoch": 0.34767932489451475, "grad_norm": 0.7529557347297668, "learning_rate": 0.0001936026509146351, "loss": 1.4925, "step": 2163 }, { "epoch": 0.3478400642957605, "grad_norm": 0.7782800197601318, "learning_rate": 0.0001935967159227867, "loss": 1.4652, "step": 2164 }, { "epoch": 0.34800080369700626, "grad_norm": 0.8154022097587585, "learning_rate": 0.00019359077827024517, "loss": 1.3978, "step": 2165 }, { "epoch": 0.348161543098252, "grad_norm": 0.7241103649139404, "learning_rate": 0.00019358483795717927, "loss": 1.4562, "step": 2166 }, { "epoch": 0.3483222824994977, "grad_norm": 0.7978832125663757, "learning_rate": 0.00019357889498375794, "loss": 1.4014, "step": 2167 }, { "epoch": 0.34848302190074343, "grad_norm": 0.7748092412948608, "learning_rate": 0.0001935729493501501, "loss": 1.5634, "step": 2168 }, { "epoch": 0.34864376130198915, "grad_norm": 0.7420588135719299, "learning_rate": 0.00019356700105652475, "loss": 1.3456, "step": 2169 }, { "epoch": 0.3488045007032349, "grad_norm": 0.6847753524780273, "learning_rate": 0.00019356105010305097, "loss": 1.2742, "step": 2170 }, { "epoch": 0.3489652401044806, "grad_norm": 0.7458052039146423, "learning_rate": 0.00019355509648989793, "loss": 1.5859, "step": 2171 }, { "epoch": 0.3491259795057263, "grad_norm": 0.7403494119644165, "learning_rate": 0.0001935491402172349, "loss": 1.5288, "step": 2172 }, { "epoch": 0.34928671890697205, "grad_norm": 0.7207682132720947, "learning_rate": 0.00019354318128523123, "loss": 1.5666, "step": 2173 }, { "epoch": 0.34944745830821783, "grad_norm": 0.7435784339904785, "learning_rate": 0.00019353721969405621, "loss": 1.5865, "step": 2174 }, { "epoch": 0.34960819770946355, "grad_norm": 0.7609650492668152, "learning_rate": 0.00019353125544387944, "loss": 1.45, "step": 2175 }, { "epoch": 0.3497689371107093, "grad_norm": 0.759573757648468, "learning_rate": 0.0001935252885348704, "loss": 1.4709, "step": 2176 }, { "epoch": 0.349929676511955, "grad_norm": 0.6559799909591675, "learning_rate": 0.0001935193189671987, "loss": 1.2884, "step": 2177 }, { "epoch": 0.3500904159132007, "grad_norm": 0.7408475279808044, "learning_rate": 0.00019351334674103405, "loss": 1.5918, "step": 2178 }, { "epoch": 0.35025115531444645, "grad_norm": 0.7220862507820129, "learning_rate": 0.00019350737185654625, "loss": 1.3807, "step": 2179 }, { "epoch": 0.3504118947156922, "grad_norm": 0.8684597015380859, "learning_rate": 0.0001935013943139051, "loss": 2.0635, "step": 2180 }, { "epoch": 0.3505726341169379, "grad_norm": 0.7820222973823547, "learning_rate": 0.00019349541411328058, "loss": 1.3697, "step": 2181 }, { "epoch": 0.3507333735181836, "grad_norm": 0.7763898968696594, "learning_rate": 0.00019348943125484266, "loss": 1.3815, "step": 2182 }, { "epoch": 0.3508941129194294, "grad_norm": 0.8525013327598572, "learning_rate": 0.0001934834457387614, "loss": 1.5007, "step": 2183 }, { "epoch": 0.3510548523206751, "grad_norm": 0.7885863184928894, "learning_rate": 0.000193477457565207, "loss": 1.588, "step": 2184 }, { "epoch": 0.35121559172192085, "grad_norm": 0.621495246887207, "learning_rate": 0.00019347146673434965, "loss": 1.2103, "step": 2185 }, { "epoch": 0.3513763311231666, "grad_norm": 0.703859806060791, "learning_rate": 0.00019346547324635967, "loss": 1.4526, "step": 2186 }, { "epoch": 0.3515370705244123, "grad_norm": 0.7636280655860901, "learning_rate": 0.0001934594771014074, "loss": 1.4813, "step": 2187 }, { "epoch": 0.351697809925658, "grad_norm": 0.7564209699630737, "learning_rate": 0.00019345347829966334, "loss": 1.4911, "step": 2188 }, { "epoch": 0.35185854932690375, "grad_norm": 0.7715060114860535, "learning_rate": 0.000193447476841298, "loss": 1.5232, "step": 2189 }, { "epoch": 0.35201928872814947, "grad_norm": 0.7512679696083069, "learning_rate": 0.000193441472726482, "loss": 1.3335, "step": 2190 }, { "epoch": 0.3521800281293952, "grad_norm": 0.7714465856552124, "learning_rate": 0.000193435465955386, "loss": 1.6173, "step": 2191 }, { "epoch": 0.352340767530641, "grad_norm": 0.7679740786552429, "learning_rate": 0.00019342945652818077, "loss": 1.3016, "step": 2192 }, { "epoch": 0.3525015069318867, "grad_norm": 0.8274876475334167, "learning_rate": 0.0001934234444450371, "loss": 1.4584, "step": 2193 }, { "epoch": 0.3526622463331324, "grad_norm": 0.9472848773002625, "learning_rate": 0.00019341742970612597, "loss": 1.6295, "step": 2194 }, { "epoch": 0.35282298573437815, "grad_norm": 0.659957230091095, "learning_rate": 0.0001934114123116183, "loss": 1.2197, "step": 2195 }, { "epoch": 0.35298372513562387, "grad_norm": 0.7512854337692261, "learning_rate": 0.0001934053922616852, "loss": 1.4747, "step": 2196 }, { "epoch": 0.3531444645368696, "grad_norm": 0.776055097579956, "learning_rate": 0.00019339936955649773, "loss": 1.5564, "step": 2197 }, { "epoch": 0.3533052039381153, "grad_norm": 0.7723532319068909, "learning_rate": 0.00019339334419622717, "loss": 1.278, "step": 2198 }, { "epoch": 0.35346594333936104, "grad_norm": 0.6564828753471375, "learning_rate": 0.00019338731618104478, "loss": 1.3063, "step": 2199 }, { "epoch": 0.35362668274060677, "grad_norm": 0.7153809666633606, "learning_rate": 0.0001933812855111219, "loss": 1.4168, "step": 2200 }, { "epoch": 0.35362668274060677, "eval_loss": 1.5143771171569824, "eval_runtime": 46.2214, "eval_samples_per_second": 5.43, "eval_steps_per_second": 2.726, "step": 2200 }, { "epoch": 0.35378742214185255, "grad_norm": 0.7495766282081604, "learning_rate": 0.00019337525218663, "loss": 1.7142, "step": 2201 }, { "epoch": 0.35394816154309827, "grad_norm": 0.7165125012397766, "learning_rate": 0.00019336921620774055, "loss": 1.5212, "step": 2202 }, { "epoch": 0.354108900944344, "grad_norm": 0.7984814643859863, "learning_rate": 0.0001933631775746252, "loss": 1.4035, "step": 2203 }, { "epoch": 0.3542696403455897, "grad_norm": 0.8182279467582703, "learning_rate": 0.00019335713628745554, "loss": 1.6378, "step": 2204 }, { "epoch": 0.35443037974683544, "grad_norm": 0.6949149370193481, "learning_rate": 0.00019335109234640334, "loss": 1.3932, "step": 2205 }, { "epoch": 0.35459111914808117, "grad_norm": 0.7815496325492859, "learning_rate": 0.00019334504575164042, "loss": 1.5448, "step": 2206 }, { "epoch": 0.3547518585493269, "grad_norm": 0.6857227683067322, "learning_rate": 0.00019333899650333862, "loss": 1.522, "step": 2207 }, { "epoch": 0.3549125979505726, "grad_norm": 0.7727954983711243, "learning_rate": 0.00019333294460166997, "loss": 1.5224, "step": 2208 }, { "epoch": 0.35507333735181834, "grad_norm": 0.7631040811538696, "learning_rate": 0.00019332689004680645, "loss": 1.6203, "step": 2209 }, { "epoch": 0.3552340767530641, "grad_norm": 0.7805564999580383, "learning_rate": 0.00019332083283892025, "loss": 1.4037, "step": 2210 }, { "epoch": 0.35539481615430984, "grad_norm": 0.7283132076263428, "learning_rate": 0.00019331477297818348, "loss": 1.3343, "step": 2211 }, { "epoch": 0.35555555555555557, "grad_norm": 0.8132778406143188, "learning_rate": 0.00019330871046476845, "loss": 1.5701, "step": 2212 }, { "epoch": 0.3557162949568013, "grad_norm": 0.7506296038627625, "learning_rate": 0.00019330264529884747, "loss": 1.4379, "step": 2213 }, { "epoch": 0.355877034358047, "grad_norm": 0.8073461651802063, "learning_rate": 0.000193296577480593, "loss": 1.6643, "step": 2214 }, { "epoch": 0.35603777375929274, "grad_norm": 0.699687123298645, "learning_rate": 0.00019329050701017748, "loss": 1.5034, "step": 2215 }, { "epoch": 0.35619851316053847, "grad_norm": 0.7187511324882507, "learning_rate": 0.0001932844338877735, "loss": 1.2797, "step": 2216 }, { "epoch": 0.3563592525617842, "grad_norm": 0.7717531323432922, "learning_rate": 0.00019327835811355372, "loss": 1.7194, "step": 2217 }, { "epoch": 0.3565199919630299, "grad_norm": 0.7352010011672974, "learning_rate": 0.00019327227968769084, "loss": 1.4279, "step": 2218 }, { "epoch": 0.3566807313642757, "grad_norm": 0.7050693035125732, "learning_rate": 0.00019326619861035767, "loss": 1.5139, "step": 2219 }, { "epoch": 0.3568414707655214, "grad_norm": 0.8724512457847595, "learning_rate": 0.00019326011488172702, "loss": 1.7741, "step": 2220 }, { "epoch": 0.35700221016676714, "grad_norm": 0.7488328814506531, "learning_rate": 0.0001932540285019719, "loss": 1.4352, "step": 2221 }, { "epoch": 0.35716294956801287, "grad_norm": 0.8010830283164978, "learning_rate": 0.00019324793947126528, "loss": 1.3858, "step": 2222 }, { "epoch": 0.3573236889692586, "grad_norm": 0.9072941541671753, "learning_rate": 0.0001932418477897803, "loss": 2.1224, "step": 2223 }, { "epoch": 0.3574844283705043, "grad_norm": 0.7651730179786682, "learning_rate": 0.00019323575345769008, "loss": 1.5592, "step": 2224 }, { "epoch": 0.35764516777175004, "grad_norm": 0.6986821889877319, "learning_rate": 0.00019322965647516793, "loss": 1.2897, "step": 2225 }, { "epoch": 0.35780590717299576, "grad_norm": 0.758841872215271, "learning_rate": 0.0001932235568423871, "loss": 1.5775, "step": 2226 }, { "epoch": 0.3579666465742415, "grad_norm": 0.7240057587623596, "learning_rate": 0.00019321745455952102, "loss": 1.4515, "step": 2227 }, { "epoch": 0.35812738597548727, "grad_norm": 0.7248893976211548, "learning_rate": 0.00019321134962674318, "loss": 1.6741, "step": 2228 }, { "epoch": 0.358288125376733, "grad_norm": 0.7072656750679016, "learning_rate": 0.00019320524204422709, "loss": 1.2574, "step": 2229 }, { "epoch": 0.3584488647779787, "grad_norm": 0.7599988579750061, "learning_rate": 0.0001931991318121464, "loss": 1.3369, "step": 2230 }, { "epoch": 0.35860960417922444, "grad_norm": 0.887819230556488, "learning_rate": 0.00019319301893067477, "loss": 1.6359, "step": 2231 }, { "epoch": 0.35877034358047016, "grad_norm": 0.8321847915649414, "learning_rate": 0.00019318690339998598, "loss": 1.6225, "step": 2232 }, { "epoch": 0.3589310829817159, "grad_norm": 0.7438744306564331, "learning_rate": 0.00019318078522025393, "loss": 1.6248, "step": 2233 }, { "epoch": 0.3590918223829616, "grad_norm": 0.7518550157546997, "learning_rate": 0.00019317466439165248, "loss": 1.5878, "step": 2234 }, { "epoch": 0.35925256178420734, "grad_norm": 0.8194841146469116, "learning_rate": 0.00019316854091435567, "loss": 1.6244, "step": 2235 }, { "epoch": 0.35941330118545306, "grad_norm": 0.878357470035553, "learning_rate": 0.00019316241478853753, "loss": 1.6394, "step": 2236 }, { "epoch": 0.35957404058669884, "grad_norm": 0.7760539054870605, "learning_rate": 0.00019315628601437225, "loss": 1.4656, "step": 2237 }, { "epoch": 0.35973477998794456, "grad_norm": 0.7040606737136841, "learning_rate": 0.00019315015459203406, "loss": 1.3785, "step": 2238 }, { "epoch": 0.3598955193891903, "grad_norm": 0.7283849716186523, "learning_rate": 0.00019314402052169724, "loss": 1.3634, "step": 2239 }, { "epoch": 0.360056258790436, "grad_norm": 0.6238592267036438, "learning_rate": 0.00019313788380353612, "loss": 1.2639, "step": 2240 }, { "epoch": 0.36021699819168174, "grad_norm": 0.6958089470863342, "learning_rate": 0.00019313174443772523, "loss": 1.3799, "step": 2241 }, { "epoch": 0.36037773759292746, "grad_norm": 0.8894790410995483, "learning_rate": 0.00019312560242443902, "loss": 1.7578, "step": 2242 }, { "epoch": 0.3605384769941732, "grad_norm": 0.8378298878669739, "learning_rate": 0.00019311945776385217, "loss": 1.5952, "step": 2243 }, { "epoch": 0.3606992163954189, "grad_norm": 0.8080736994743347, "learning_rate": 0.00019311331045613928, "loss": 1.4612, "step": 2244 }, { "epoch": 0.36085995579666463, "grad_norm": 0.7705124020576477, "learning_rate": 0.00019310716050147515, "loss": 1.5189, "step": 2245 }, { "epoch": 0.3610206951979104, "grad_norm": 0.8197687268257141, "learning_rate": 0.00019310100790003457, "loss": 1.8104, "step": 2246 }, { "epoch": 0.36118143459915614, "grad_norm": 0.7763803601264954, "learning_rate": 0.0001930948526519925, "loss": 1.4344, "step": 2247 }, { "epoch": 0.36134217400040186, "grad_norm": 0.6883619427680969, "learning_rate": 0.00019308869475752384, "loss": 1.2675, "step": 2248 }, { "epoch": 0.3615029134016476, "grad_norm": 0.9139236211776733, "learning_rate": 0.00019308253421680368, "loss": 1.3459, "step": 2249 }, { "epoch": 0.3616636528028933, "grad_norm": 0.8060486316680908, "learning_rate": 0.00019307637103000715, "loss": 1.8648, "step": 2250 }, { "epoch": 0.36182439220413903, "grad_norm": 0.7091608643531799, "learning_rate": 0.00019307020519730947, "loss": 1.5016, "step": 2251 }, { "epoch": 0.36198513160538476, "grad_norm": 0.7042751908302307, "learning_rate": 0.0001930640367188859, "loss": 1.4864, "step": 2252 }, { "epoch": 0.3621458710066305, "grad_norm": 0.8090089559555054, "learning_rate": 0.00019305786559491175, "loss": 1.8382, "step": 2253 }, { "epoch": 0.3623066104078762, "grad_norm": 0.7579192519187927, "learning_rate": 0.00019305169182556254, "loss": 1.7053, "step": 2254 }, { "epoch": 0.362467349809122, "grad_norm": 0.6348238587379456, "learning_rate": 0.00019304551541101365, "loss": 1.2383, "step": 2255 }, { "epoch": 0.3626280892103677, "grad_norm": 0.7195169925689697, "learning_rate": 0.00019303933635144078, "loss": 1.5119, "step": 2256 }, { "epoch": 0.36278882861161343, "grad_norm": 0.8304538130760193, "learning_rate": 0.0001930331546470195, "loss": 1.3961, "step": 2257 }, { "epoch": 0.36294956801285916, "grad_norm": 0.7505934834480286, "learning_rate": 0.0001930269702979256, "loss": 1.587, "step": 2258 }, { "epoch": 0.3631103074141049, "grad_norm": 0.7606253623962402, "learning_rate": 0.00019302078330433485, "loss": 1.4681, "step": 2259 }, { "epoch": 0.3632710468153506, "grad_norm": 0.6835800409317017, "learning_rate": 0.00019301459366642313, "loss": 1.3351, "step": 2260 }, { "epoch": 0.36343178621659633, "grad_norm": 0.748271644115448, "learning_rate": 0.00019300840138436636, "loss": 1.5088, "step": 2261 }, { "epoch": 0.36359252561784206, "grad_norm": 0.7252227067947388, "learning_rate": 0.00019300220645834064, "loss": 1.7293, "step": 2262 }, { "epoch": 0.3637532650190878, "grad_norm": 0.767500638961792, "learning_rate": 0.00019299600888852202, "loss": 1.4244, "step": 2263 }, { "epoch": 0.36391400442033356, "grad_norm": 0.7022566199302673, "learning_rate": 0.0001929898086750867, "loss": 1.2922, "step": 2264 }, { "epoch": 0.3640747438215793, "grad_norm": 0.7437481880187988, "learning_rate": 0.00019298360581821098, "loss": 1.4842, "step": 2265 }, { "epoch": 0.364235483222825, "grad_norm": 0.7092146277427673, "learning_rate": 0.0001929774003180711, "loss": 1.2841, "step": 2266 }, { "epoch": 0.36439622262407073, "grad_norm": 0.8568287491798401, "learning_rate": 0.0001929711921748435, "loss": 1.6742, "step": 2267 }, { "epoch": 0.36455696202531646, "grad_norm": 0.7950097322463989, "learning_rate": 0.0001929649813887047, "loss": 1.408, "step": 2268 }, { "epoch": 0.3647177014265622, "grad_norm": 0.8774558901786804, "learning_rate": 0.00019295876795983122, "loss": 1.3312, "step": 2269 }, { "epoch": 0.3648784408278079, "grad_norm": 0.8537526726722717, "learning_rate": 0.00019295255188839968, "loss": 1.6799, "step": 2270 }, { "epoch": 0.36503918022905363, "grad_norm": 0.8359127044677734, "learning_rate": 0.00019294633317458682, "loss": 1.5372, "step": 2271 }, { "epoch": 0.36519991963029935, "grad_norm": 0.8758941292762756, "learning_rate": 0.00019294011181856938, "loss": 2.0401, "step": 2272 }, { "epoch": 0.36536065903154513, "grad_norm": 0.9309197664260864, "learning_rate": 0.00019293388782052427, "loss": 1.7943, "step": 2273 }, { "epoch": 0.36552139843279086, "grad_norm": 0.66529381275177, "learning_rate": 0.00019292766118062836, "loss": 1.4481, "step": 2274 }, { "epoch": 0.3656821378340366, "grad_norm": 0.8920289278030396, "learning_rate": 0.0001929214318990587, "loss": 1.6575, "step": 2275 }, { "epoch": 0.3658428772352823, "grad_norm": 0.6659005880355835, "learning_rate": 0.00019291519997599235, "loss": 1.5059, "step": 2276 }, { "epoch": 0.36600361663652803, "grad_norm": 0.6753971576690674, "learning_rate": 0.00019290896541160644, "loss": 1.3676, "step": 2277 }, { "epoch": 0.36616435603777375, "grad_norm": 0.7083390951156616, "learning_rate": 0.00019290272820607826, "loss": 1.4571, "step": 2278 }, { "epoch": 0.3663250954390195, "grad_norm": 0.7430787682533264, "learning_rate": 0.0001928964883595851, "loss": 1.6284, "step": 2279 }, { "epoch": 0.3664858348402652, "grad_norm": 0.7871437072753906, "learning_rate": 0.00019289024587230432, "loss": 1.6624, "step": 2280 }, { "epoch": 0.3666465742415109, "grad_norm": 0.7920075058937073, "learning_rate": 0.00019288400074441337, "loss": 1.52, "step": 2281 }, { "epoch": 0.3668073136427567, "grad_norm": 0.7953509092330933, "learning_rate": 0.0001928777529760898, "loss": 1.5505, "step": 2282 }, { "epoch": 0.36696805304400243, "grad_norm": 0.6874988079071045, "learning_rate": 0.00019287150256751124, "loss": 1.2056, "step": 2283 }, { "epoch": 0.36712879244524815, "grad_norm": 0.6509885191917419, "learning_rate": 0.00019286524951885533, "loss": 1.273, "step": 2284 }, { "epoch": 0.3672895318464939, "grad_norm": 0.8529267907142639, "learning_rate": 0.00019285899383029986, "loss": 1.4958, "step": 2285 }, { "epoch": 0.3674502712477396, "grad_norm": 0.7961730360984802, "learning_rate": 0.00019285273550202262, "loss": 1.6553, "step": 2286 }, { "epoch": 0.3676110106489853, "grad_norm": 0.8984256982803345, "learning_rate": 0.00019284647453420158, "loss": 1.794, "step": 2287 }, { "epoch": 0.36777175005023105, "grad_norm": 0.733128011226654, "learning_rate": 0.00019284021092701463, "loss": 1.3276, "step": 2288 }, { "epoch": 0.3679324894514768, "grad_norm": 0.744466245174408, "learning_rate": 0.00019283394468063992, "loss": 1.3326, "step": 2289 }, { "epoch": 0.3680932288527225, "grad_norm": 0.7705268263816833, "learning_rate": 0.00019282767579525555, "loss": 1.376, "step": 2290 }, { "epoch": 0.3682539682539683, "grad_norm": 0.6793444752693176, "learning_rate": 0.00019282140427103966, "loss": 1.5043, "step": 2291 }, { "epoch": 0.368414707655214, "grad_norm": 0.8116929531097412, "learning_rate": 0.00019281513010817065, "loss": 1.5121, "step": 2292 }, { "epoch": 0.3685754470564597, "grad_norm": 0.7831950783729553, "learning_rate": 0.0001928088533068268, "loss": 1.5968, "step": 2293 }, { "epoch": 0.36873618645770545, "grad_norm": 0.8486713171005249, "learning_rate": 0.00019280257386718655, "loss": 1.8002, "step": 2294 }, { "epoch": 0.3688969258589512, "grad_norm": 0.798450767993927, "learning_rate": 0.0001927962917894284, "loss": 1.5203, "step": 2295 }, { "epoch": 0.3690576652601969, "grad_norm": 0.7468360066413879, "learning_rate": 0.000192790007073731, "loss": 1.2209, "step": 2296 }, { "epoch": 0.3692184046614426, "grad_norm": 0.78008633852005, "learning_rate": 0.0001927837197202729, "loss": 1.5781, "step": 2297 }, { "epoch": 0.36937914406268835, "grad_norm": 0.8948492407798767, "learning_rate": 0.00019277742972923293, "loss": 1.5488, "step": 2298 }, { "epoch": 0.36953988346393407, "grad_norm": 0.7671533823013306, "learning_rate": 0.00019277113710078984, "loss": 1.5109, "step": 2299 }, { "epoch": 0.36970062286517985, "grad_norm": 0.695842444896698, "learning_rate": 0.00019276484183512253, "loss": 1.4661, "step": 2300 }, { "epoch": 0.3698613622664256, "grad_norm": 0.7124637961387634, "learning_rate": 0.00019275854393240995, "loss": 1.4039, "step": 2301 }, { "epoch": 0.3700221016676713, "grad_norm": 0.6826514005661011, "learning_rate": 0.00019275224339283114, "loss": 1.308, "step": 2302 }, { "epoch": 0.370182841068917, "grad_norm": 0.7096145153045654, "learning_rate": 0.0001927459402165652, "loss": 1.1908, "step": 2303 }, { "epoch": 0.37034358047016275, "grad_norm": 0.8001224398612976, "learning_rate": 0.0001927396344037913, "loss": 1.7123, "step": 2304 }, { "epoch": 0.37050431987140847, "grad_norm": 0.7160031199455261, "learning_rate": 0.00019273332595468872, "loss": 1.476, "step": 2305 }, { "epoch": 0.3706650592726542, "grad_norm": 0.7758086323738098, "learning_rate": 0.00019272701486943675, "loss": 1.3404, "step": 2306 }, { "epoch": 0.3708257986738999, "grad_norm": 0.7404614090919495, "learning_rate": 0.00019272070114821488, "loss": 1.25, "step": 2307 }, { "epoch": 0.37098653807514564, "grad_norm": 0.8242363333702087, "learning_rate": 0.0001927143847912025, "loss": 1.7162, "step": 2308 }, { "epoch": 0.3711472774763914, "grad_norm": 0.6747803092002869, "learning_rate": 0.00019270806579857924, "loss": 1.3534, "step": 2309 }, { "epoch": 0.37130801687763715, "grad_norm": 0.7487573027610779, "learning_rate": 0.0001927017441705247, "loss": 1.3976, "step": 2310 }, { "epoch": 0.3714687562788829, "grad_norm": 0.7340654730796814, "learning_rate": 0.00019269541990721856, "loss": 1.0985, "step": 2311 }, { "epoch": 0.3716294956801286, "grad_norm": 0.7396987080574036, "learning_rate": 0.0001926890930088406, "loss": 1.6197, "step": 2312 }, { "epoch": 0.3717902350813743, "grad_norm": 0.8385146856307983, "learning_rate": 0.00019268276347557075, "loss": 1.4441, "step": 2313 }, { "epoch": 0.37195097448262004, "grad_norm": 0.9103909134864807, "learning_rate": 0.00019267643130758887, "loss": 1.6599, "step": 2314 }, { "epoch": 0.37211171388386577, "grad_norm": 0.7072083353996277, "learning_rate": 0.000192670096505075, "loss": 1.4166, "step": 2315 }, { "epoch": 0.3722724532851115, "grad_norm": 0.7907953262329102, "learning_rate": 0.00019266375906820918, "loss": 1.616, "step": 2316 }, { "epoch": 0.3724331926863572, "grad_norm": 0.7019078135490417, "learning_rate": 0.00019265741899717162, "loss": 1.3339, "step": 2317 }, { "epoch": 0.372593932087603, "grad_norm": 0.686255693435669, "learning_rate": 0.0001926510762921425, "loss": 1.289, "step": 2318 }, { "epoch": 0.3727546714888487, "grad_norm": 0.8415122032165527, "learning_rate": 0.00019264473095330215, "loss": 1.5528, "step": 2319 }, { "epoch": 0.37291541089009445, "grad_norm": 0.9293020367622375, "learning_rate": 0.00019263838298083096, "loss": 1.6633, "step": 2320 }, { "epoch": 0.37307615029134017, "grad_norm": 0.8252938389778137, "learning_rate": 0.00019263203237490938, "loss": 1.7066, "step": 2321 }, { "epoch": 0.3732368896925859, "grad_norm": 0.8954980969429016, "learning_rate": 0.00019262567913571794, "loss": 1.8335, "step": 2322 }, { "epoch": 0.3733976290938316, "grad_norm": 0.7476145029067993, "learning_rate": 0.00019261932326343723, "loss": 1.3248, "step": 2323 }, { "epoch": 0.37355836849507734, "grad_norm": 0.7900554537773132, "learning_rate": 0.00019261296475824797, "loss": 1.3867, "step": 2324 }, { "epoch": 0.37371910789632307, "grad_norm": 0.8005098700523376, "learning_rate": 0.00019260660362033086, "loss": 1.7113, "step": 2325 }, { "epoch": 0.3738798472975688, "grad_norm": 0.8625285029411316, "learning_rate": 0.0001926002398498667, "loss": 1.7917, "step": 2326 }, { "epoch": 0.37404058669881457, "grad_norm": 0.8334097266197205, "learning_rate": 0.00019259387344703653, "loss": 1.5181, "step": 2327 }, { "epoch": 0.3742013261000603, "grad_norm": 0.772881269454956, "learning_rate": 0.0001925875044120212, "loss": 1.6191, "step": 2328 }, { "epoch": 0.374362065501306, "grad_norm": 0.8097652196884155, "learning_rate": 0.00019258113274500183, "loss": 1.615, "step": 2329 }, { "epoch": 0.37452280490255174, "grad_norm": 0.8663483262062073, "learning_rate": 0.00019257475844615954, "loss": 1.553, "step": 2330 }, { "epoch": 0.37468354430379747, "grad_norm": 0.7867934703826904, "learning_rate": 0.0001925683815156755, "loss": 1.6194, "step": 2331 }, { "epoch": 0.3748442837050432, "grad_norm": 0.8059807419776917, "learning_rate": 0.000192562001953731, "loss": 1.5606, "step": 2332 }, { "epoch": 0.3750050231062889, "grad_norm": 0.7633818984031677, "learning_rate": 0.00019255561976050744, "loss": 1.5435, "step": 2333 }, { "epoch": 0.37516576250753464, "grad_norm": 0.7099105715751648, "learning_rate": 0.0001925492349361862, "loss": 1.2542, "step": 2334 }, { "epoch": 0.37532650190878036, "grad_norm": 0.9166895747184753, "learning_rate": 0.00019254284748094878, "loss": 1.7569, "step": 2335 }, { "epoch": 0.37548724131002614, "grad_norm": 0.7682256102561951, "learning_rate": 0.00019253645739497678, "loss": 1.5458, "step": 2336 }, { "epoch": 0.37564798071127187, "grad_norm": 0.7776126265525818, "learning_rate": 0.00019253006467845186, "loss": 1.6083, "step": 2337 }, { "epoch": 0.3758087201125176, "grad_norm": 0.7671796679496765, "learning_rate": 0.0001925236693315557, "loss": 1.5142, "step": 2338 }, { "epoch": 0.3759694595137633, "grad_norm": 0.6975173354148865, "learning_rate": 0.00019251727135447016, "loss": 1.361, "step": 2339 }, { "epoch": 0.37613019891500904, "grad_norm": 0.9343434572219849, "learning_rate": 0.00019251087074737705, "loss": 1.5593, "step": 2340 }, { "epoch": 0.37629093831625476, "grad_norm": 0.8887360095977783, "learning_rate": 0.0001925044675104584, "loss": 1.6803, "step": 2341 }, { "epoch": 0.3764516777175005, "grad_norm": 0.7514050602912903, "learning_rate": 0.0001924980616438962, "loss": 1.5644, "step": 2342 }, { "epoch": 0.3766124171187462, "grad_norm": 0.7580198645591736, "learning_rate": 0.00019249165314787252, "loss": 1.4488, "step": 2343 }, { "epoch": 0.37677315651999194, "grad_norm": 0.6759437322616577, "learning_rate": 0.0001924852420225696, "loss": 1.2637, "step": 2344 }, { "epoch": 0.3769338959212377, "grad_norm": 0.7365924715995789, "learning_rate": 0.00019247882826816962, "loss": 1.6136, "step": 2345 }, { "epoch": 0.37709463532248344, "grad_norm": 0.760028064250946, "learning_rate": 0.00019247241188485495, "loss": 1.3269, "step": 2346 }, { "epoch": 0.37725537472372916, "grad_norm": 0.7896841168403625, "learning_rate": 0.00019246599287280799, "loss": 1.58, "step": 2347 }, { "epoch": 0.3774161141249749, "grad_norm": 0.8547706604003906, "learning_rate": 0.0001924595712322112, "loss": 1.6285, "step": 2348 }, { "epoch": 0.3775768535262206, "grad_norm": 0.6900953054428101, "learning_rate": 0.0001924531469632471, "loss": 1.3439, "step": 2349 }, { "epoch": 0.37773759292746634, "grad_norm": 0.7686839699745178, "learning_rate": 0.00019244672006609844, "loss": 1.6517, "step": 2350 }, { "epoch": 0.37789833232871206, "grad_norm": 0.7887119054794312, "learning_rate": 0.00019244029054094774, "loss": 1.7429, "step": 2351 }, { "epoch": 0.3780590717299578, "grad_norm": 0.8073592782020569, "learning_rate": 0.0001924338583879779, "loss": 1.4368, "step": 2352 }, { "epoch": 0.3782198111312035, "grad_norm": 0.7444460391998291, "learning_rate": 0.0001924274236073717, "loss": 1.4863, "step": 2353 }, { "epoch": 0.3783805505324493, "grad_norm": 0.7118504047393799, "learning_rate": 0.0001924209861993121, "loss": 1.4776, "step": 2354 }, { "epoch": 0.378541289933695, "grad_norm": 0.90787672996521, "learning_rate": 0.0001924145461639821, "loss": 1.5224, "step": 2355 }, { "epoch": 0.37870202933494074, "grad_norm": 0.6589099168777466, "learning_rate": 0.00019240810350156475, "loss": 1.3551, "step": 2356 }, { "epoch": 0.37886276873618646, "grad_norm": 0.7486380338668823, "learning_rate": 0.0001924016582122432, "loss": 1.4905, "step": 2357 }, { "epoch": 0.3790235081374322, "grad_norm": 0.8906408548355103, "learning_rate": 0.0001923952102962007, "loss": 1.9237, "step": 2358 }, { "epoch": 0.3791842475386779, "grad_norm": 0.8645241856575012, "learning_rate": 0.00019238875975362052, "loss": 1.7999, "step": 2359 }, { "epoch": 0.37934498693992363, "grad_norm": 0.6842097640037537, "learning_rate": 0.000192382306584686, "loss": 1.3833, "step": 2360 }, { "epoch": 0.37950572634116936, "grad_norm": 0.766603946685791, "learning_rate": 0.00019237585078958066, "loss": 1.4222, "step": 2361 }, { "epoch": 0.3796664657424151, "grad_norm": 0.7449361681938171, "learning_rate": 0.00019236939236848798, "loss": 1.3194, "step": 2362 }, { "epoch": 0.37982720514366086, "grad_norm": 0.706528902053833, "learning_rate": 0.00019236293132159155, "loss": 1.516, "step": 2363 }, { "epoch": 0.3799879445449066, "grad_norm": 0.6842591166496277, "learning_rate": 0.00019235646764907504, "loss": 1.2355, "step": 2364 }, { "epoch": 0.3801486839461523, "grad_norm": 0.7580458521842957, "learning_rate": 0.0001923500013511222, "loss": 1.4033, "step": 2365 }, { "epoch": 0.38030942334739803, "grad_norm": 0.792582094669342, "learning_rate": 0.00019234353242791684, "loss": 1.4261, "step": 2366 }, { "epoch": 0.38047016274864376, "grad_norm": 0.8267690539360046, "learning_rate": 0.00019233706087964288, "loss": 1.5539, "step": 2367 }, { "epoch": 0.3806309021498895, "grad_norm": 0.7797790765762329, "learning_rate": 0.00019233058670648428, "loss": 1.5667, "step": 2368 }, { "epoch": 0.3807916415511352, "grad_norm": 0.7665966749191284, "learning_rate": 0.00019232410990862504, "loss": 1.3916, "step": 2369 }, { "epoch": 0.38095238095238093, "grad_norm": 0.8257808685302734, "learning_rate": 0.00019231763048624935, "loss": 1.6051, "step": 2370 }, { "epoch": 0.38111312035362666, "grad_norm": 0.8190930485725403, "learning_rate": 0.00019231114843954133, "loss": 1.739, "step": 2371 }, { "epoch": 0.38127385975487244, "grad_norm": 0.6170780062675476, "learning_rate": 0.0001923046637686853, "loss": 1.236, "step": 2372 }, { "epoch": 0.38143459915611816, "grad_norm": 0.7124181389808655, "learning_rate": 0.00019229817647386556, "loss": 1.3284, "step": 2373 }, { "epoch": 0.3815953385573639, "grad_norm": 0.6498758792877197, "learning_rate": 0.00019229168655526656, "loss": 1.1779, "step": 2374 }, { "epoch": 0.3817560779586096, "grad_norm": 0.72782301902771, "learning_rate": 0.00019228519401307277, "loss": 1.4747, "step": 2375 }, { "epoch": 0.38191681735985533, "grad_norm": 0.8231451511383057, "learning_rate": 0.00019227869884746876, "loss": 1.5292, "step": 2376 }, { "epoch": 0.38207755676110106, "grad_norm": 0.7748918533325195, "learning_rate": 0.00019227220105863917, "loss": 1.4252, "step": 2377 }, { "epoch": 0.3822382961623468, "grad_norm": 0.7030318975448608, "learning_rate": 0.0001922657006467687, "loss": 1.454, "step": 2378 }, { "epoch": 0.3823990355635925, "grad_norm": 0.7370429039001465, "learning_rate": 0.00019225919761204217, "loss": 1.5446, "step": 2379 }, { "epoch": 0.38255977496483823, "grad_norm": 0.7779253125190735, "learning_rate": 0.00019225269195464445, "loss": 1.7222, "step": 2380 }, { "epoch": 0.382720514366084, "grad_norm": 1.6347994804382324, "learning_rate": 0.00019224618367476038, "loss": 1.6214, "step": 2381 }, { "epoch": 0.38288125376732973, "grad_norm": 0.8312540054321289, "learning_rate": 0.00019223967277257512, "loss": 1.7487, "step": 2382 }, { "epoch": 0.38304199316857546, "grad_norm": 0.8323525190353394, "learning_rate": 0.00019223315924827365, "loss": 1.555, "step": 2383 }, { "epoch": 0.3832027325698212, "grad_norm": 0.881650984287262, "learning_rate": 0.00019222664310204114, "loss": 1.7877, "step": 2384 }, { "epoch": 0.3833634719710669, "grad_norm": 0.7704045176506042, "learning_rate": 0.0001922201243340629, "loss": 1.4358, "step": 2385 }, { "epoch": 0.38352421137231263, "grad_norm": 0.7783375978469849, "learning_rate": 0.00019221360294452414, "loss": 1.3637, "step": 2386 }, { "epoch": 0.38368495077355835, "grad_norm": 0.8749060034751892, "learning_rate": 0.00019220707893361034, "loss": 1.6471, "step": 2387 }, { "epoch": 0.3838456901748041, "grad_norm": 0.6973546147346497, "learning_rate": 0.00019220055230150692, "loss": 1.383, "step": 2388 }, { "epoch": 0.38400642957604986, "grad_norm": 0.7314361333847046, "learning_rate": 0.00019219402304839934, "loss": 1.399, "step": 2389 }, { "epoch": 0.3841671689772956, "grad_norm": 0.7928193807601929, "learning_rate": 0.00019218749117447335, "loss": 1.3409, "step": 2390 }, { "epoch": 0.3843279083785413, "grad_norm": 0.7341200709342957, "learning_rate": 0.0001921809566799145, "loss": 1.424, "step": 2391 }, { "epoch": 0.38448864777978703, "grad_norm": 0.8165944218635559, "learning_rate": 0.00019217441956490865, "loss": 1.3758, "step": 2392 }, { "epoch": 0.38464938718103275, "grad_norm": 0.8424252867698669, "learning_rate": 0.00019216787982964157, "loss": 1.6537, "step": 2393 }, { "epoch": 0.3848101265822785, "grad_norm": 0.7609081864356995, "learning_rate": 0.0001921613374742992, "loss": 1.4728, "step": 2394 }, { "epoch": 0.3849708659835242, "grad_norm": 0.7429969906806946, "learning_rate": 0.00019215479249906747, "loss": 1.4494, "step": 2395 }, { "epoch": 0.3851316053847699, "grad_norm": 0.9065263867378235, "learning_rate": 0.00019214824490413251, "loss": 1.5377, "step": 2396 }, { "epoch": 0.38529234478601565, "grad_norm": 0.7464486956596375, "learning_rate": 0.0001921416946896804, "loss": 1.4226, "step": 2397 }, { "epoch": 0.38545308418726143, "grad_norm": 0.7384240627288818, "learning_rate": 0.00019213514185589732, "loss": 1.3648, "step": 2398 }, { "epoch": 0.38561382358850715, "grad_norm": 0.81768798828125, "learning_rate": 0.00019212858640296965, "loss": 1.4683, "step": 2399 }, { "epoch": 0.3857745629897529, "grad_norm": 0.799252450466156, "learning_rate": 0.0001921220283310836, "loss": 1.5398, "step": 2400 }, { "epoch": 0.3857745629897529, "eval_loss": 1.5082051753997803, "eval_runtime": 46.2158, "eval_samples_per_second": 5.431, "eval_steps_per_second": 2.726, "step": 2400 }, { "epoch": 0.3859353023909986, "grad_norm": 0.8741388320922852, "learning_rate": 0.00019211546764042576, "loss": 1.6068, "step": 2401 }, { "epoch": 0.3860960417922443, "grad_norm": 0.866570770740509, "learning_rate": 0.0001921089043311825, "loss": 1.5436, "step": 2402 }, { "epoch": 0.38625678119349005, "grad_norm": 0.8164660930633545, "learning_rate": 0.00019210233840354044, "loss": 1.4539, "step": 2403 }, { "epoch": 0.3864175205947358, "grad_norm": 0.6310888528823853, "learning_rate": 0.00019209576985768624, "loss": 1.2747, "step": 2404 }, { "epoch": 0.3865782599959815, "grad_norm": 0.691762387752533, "learning_rate": 0.0001920891986938066, "loss": 1.3887, "step": 2405 }, { "epoch": 0.3867389993972272, "grad_norm": 0.7432323694229126, "learning_rate": 0.0001920826249120884, "loss": 1.4434, "step": 2406 }, { "epoch": 0.386899738798473, "grad_norm": 0.8574045896530151, "learning_rate": 0.00019207604851271837, "loss": 1.8737, "step": 2407 }, { "epoch": 0.3870604781997187, "grad_norm": 0.8199409246444702, "learning_rate": 0.0001920694694958836, "loss": 1.435, "step": 2408 }, { "epoch": 0.38722121760096445, "grad_norm": 0.807316243648529, "learning_rate": 0.00019206288786177103, "loss": 1.6067, "step": 2409 }, { "epoch": 0.3873819570022102, "grad_norm": 0.7533063888549805, "learning_rate": 0.0001920563036105678, "loss": 1.2983, "step": 2410 }, { "epoch": 0.3875426964034559, "grad_norm": 0.7844280004501343, "learning_rate": 0.00019204971674246108, "loss": 1.5913, "step": 2411 }, { "epoch": 0.3877034358047016, "grad_norm": 0.8542926907539368, "learning_rate": 0.00019204312725763808, "loss": 1.7305, "step": 2412 }, { "epoch": 0.38786417520594735, "grad_norm": 0.9152733087539673, "learning_rate": 0.00019203653515628615, "loss": 1.8451, "step": 2413 }, { "epoch": 0.38802491460719307, "grad_norm": 0.7698416709899902, "learning_rate": 0.00019202994043859267, "loss": 1.3058, "step": 2414 }, { "epoch": 0.3881856540084388, "grad_norm": 0.7626280188560486, "learning_rate": 0.00019202334310474512, "loss": 1.4574, "step": 2415 }, { "epoch": 0.3883463934096846, "grad_norm": 0.8784126043319702, "learning_rate": 0.00019201674315493105, "loss": 1.8397, "step": 2416 }, { "epoch": 0.3885071328109303, "grad_norm": 0.7910972833633423, "learning_rate": 0.00019201014058933805, "loss": 1.356, "step": 2417 }, { "epoch": 0.388667872212176, "grad_norm": 0.7871951460838318, "learning_rate": 0.00019200353540815385, "loss": 1.5214, "step": 2418 }, { "epoch": 0.38882861161342175, "grad_norm": 0.8846908211708069, "learning_rate": 0.00019199692761156617, "loss": 1.5697, "step": 2419 }, { "epoch": 0.3889893510146675, "grad_norm": 0.831390917301178, "learning_rate": 0.0001919903171997629, "loss": 1.4534, "step": 2420 }, { "epoch": 0.3891500904159132, "grad_norm": 0.6768481135368347, "learning_rate": 0.00019198370417293194, "loss": 1.4076, "step": 2421 }, { "epoch": 0.3893108298171589, "grad_norm": 0.8559091687202454, "learning_rate": 0.00019197708853126126, "loss": 1.9239, "step": 2422 }, { "epoch": 0.38947156921840465, "grad_norm": 0.6950740814208984, "learning_rate": 0.00019197047027493894, "loss": 1.2761, "step": 2423 }, { "epoch": 0.38963230861965037, "grad_norm": 0.7733438014984131, "learning_rate": 0.00019196384940415313, "loss": 1.4589, "step": 2424 }, { "epoch": 0.38979304802089615, "grad_norm": 0.7419054508209229, "learning_rate": 0.000191957225919092, "loss": 1.413, "step": 2425 }, { "epoch": 0.3899537874221419, "grad_norm": 0.8241652846336365, "learning_rate": 0.0001919505998199439, "loss": 1.6169, "step": 2426 }, { "epoch": 0.3901145268233876, "grad_norm": 0.8601486086845398, "learning_rate": 0.00019194397110689716, "loss": 1.6535, "step": 2427 }, { "epoch": 0.3902752662246333, "grad_norm": 0.7629940509796143, "learning_rate": 0.00019193733978014018, "loss": 1.3734, "step": 2428 }, { "epoch": 0.39043600562587905, "grad_norm": 0.7254976034164429, "learning_rate": 0.00019193070583986153, "loss": 1.5675, "step": 2429 }, { "epoch": 0.39059674502712477, "grad_norm": 0.7720105051994324, "learning_rate": 0.00019192406928624975, "loss": 1.7799, "step": 2430 }, { "epoch": 0.3907574844283705, "grad_norm": 0.7474191188812256, "learning_rate": 0.00019191743011949351, "loss": 1.5524, "step": 2431 }, { "epoch": 0.3909182238296162, "grad_norm": 0.7156115174293518, "learning_rate": 0.00019191078833978158, "loss": 1.3429, "step": 2432 }, { "epoch": 0.39107896323086194, "grad_norm": 0.8242292404174805, "learning_rate": 0.00019190414394730274, "loss": 1.4411, "step": 2433 }, { "epoch": 0.3912397026321077, "grad_norm": 0.7398666739463806, "learning_rate": 0.00019189749694224585, "loss": 1.4868, "step": 2434 }, { "epoch": 0.39140044203335345, "grad_norm": 0.705066442489624, "learning_rate": 0.0001918908473247999, "loss": 1.5139, "step": 2435 }, { "epoch": 0.39156118143459917, "grad_norm": 0.8408405184745789, "learning_rate": 0.0001918841950951539, "loss": 1.4792, "step": 2436 }, { "epoch": 0.3917219208358449, "grad_norm": 0.6993088126182556, "learning_rate": 0.00019187754025349696, "loss": 1.4445, "step": 2437 }, { "epoch": 0.3918826602370906, "grad_norm": 0.7460142374038696, "learning_rate": 0.00019187088280001824, "loss": 1.2761, "step": 2438 }, { "epoch": 0.39204339963833634, "grad_norm": 0.7373493313789368, "learning_rate": 0.00019186422273490702, "loss": 1.4692, "step": 2439 }, { "epoch": 0.39220413903958207, "grad_norm": 1.0283539295196533, "learning_rate": 0.00019185756005835262, "loss": 1.5329, "step": 2440 }, { "epoch": 0.3923648784408278, "grad_norm": 0.7191708087921143, "learning_rate": 0.00019185089477054444, "loss": 1.3354, "step": 2441 }, { "epoch": 0.3925256178420735, "grad_norm": 0.8513048887252808, "learning_rate": 0.000191844226871672, "loss": 1.3842, "step": 2442 }, { "epoch": 0.3926863572433193, "grad_norm": 0.8539868593215942, "learning_rate": 0.00019183755636192472, "loss": 1.4748, "step": 2443 }, { "epoch": 0.392847096644565, "grad_norm": 0.7290206551551819, "learning_rate": 0.00019183088324149235, "loss": 1.2671, "step": 2444 }, { "epoch": 0.39300783604581074, "grad_norm": 0.8441441059112549, "learning_rate": 0.00019182420751056456, "loss": 1.5243, "step": 2445 }, { "epoch": 0.39316857544705647, "grad_norm": 0.7868830561637878, "learning_rate": 0.00019181752916933107, "loss": 1.3597, "step": 2446 }, { "epoch": 0.3933293148483022, "grad_norm": 0.7663295269012451, "learning_rate": 0.0001918108482179818, "loss": 1.399, "step": 2447 }, { "epoch": 0.3934900542495479, "grad_norm": 0.7421337962150574, "learning_rate": 0.00019180416465670663, "loss": 1.3331, "step": 2448 }, { "epoch": 0.39365079365079364, "grad_norm": 0.8677128553390503, "learning_rate": 0.00019179747848569557, "loss": 1.802, "step": 2449 }, { "epoch": 0.39381153305203936, "grad_norm": 0.7898246049880981, "learning_rate": 0.00019179078970513864, "loss": 1.6206, "step": 2450 }, { "epoch": 0.3939722724532851, "grad_norm": 0.8156419396400452, "learning_rate": 0.00019178409831522607, "loss": 1.5564, "step": 2451 }, { "epoch": 0.39413301185453087, "grad_norm": 0.7897803783416748, "learning_rate": 0.000191777404316148, "loss": 1.6144, "step": 2452 }, { "epoch": 0.3942937512557766, "grad_norm": 0.7829393148422241, "learning_rate": 0.00019177070770809475, "loss": 1.4232, "step": 2453 }, { "epoch": 0.3944544906570223, "grad_norm": 0.816796600818634, "learning_rate": 0.00019176400849125672, "loss": 1.4597, "step": 2454 }, { "epoch": 0.39461523005826804, "grad_norm": 0.9215002655982971, "learning_rate": 0.0001917573066658243, "loss": 1.5298, "step": 2455 }, { "epoch": 0.39477596945951376, "grad_norm": 0.8834329843521118, "learning_rate": 0.00019175060223198802, "loss": 1.5117, "step": 2456 }, { "epoch": 0.3949367088607595, "grad_norm": 0.7040292620658875, "learning_rate": 0.00019174389518993848, "loss": 1.2956, "step": 2457 }, { "epoch": 0.3950974482620052, "grad_norm": 0.8531547784805298, "learning_rate": 0.00019173718553986633, "loss": 1.5724, "step": 2458 }, { "epoch": 0.39525818766325094, "grad_norm": 0.7761810421943665, "learning_rate": 0.0001917304732819623, "loss": 1.4851, "step": 2459 }, { "epoch": 0.39541892706449666, "grad_norm": 0.7124666571617126, "learning_rate": 0.0001917237584164172, "loss": 1.4784, "step": 2460 }, { "epoch": 0.39557966646574244, "grad_norm": 0.8797900080680847, "learning_rate": 0.00019171704094342196, "loss": 1.5257, "step": 2461 }, { "epoch": 0.39574040586698817, "grad_norm": 0.7967567443847656, "learning_rate": 0.0001917103208631675, "loss": 1.3281, "step": 2462 }, { "epoch": 0.3959011452682339, "grad_norm": 0.8096486330032349, "learning_rate": 0.0001917035981758448, "loss": 1.4863, "step": 2463 }, { "epoch": 0.3960618846694796, "grad_norm": 0.9239474534988403, "learning_rate": 0.0001916968728816451, "loss": 1.5382, "step": 2464 }, { "epoch": 0.39622262407072534, "grad_norm": 0.7920538783073425, "learning_rate": 0.0001916901449807595, "loss": 1.5403, "step": 2465 }, { "epoch": 0.39638336347197106, "grad_norm": 0.8198341131210327, "learning_rate": 0.00019168341447337923, "loss": 1.5475, "step": 2466 }, { "epoch": 0.3965441028732168, "grad_norm": 0.8576249480247498, "learning_rate": 0.00019167668135969566, "loss": 1.7403, "step": 2467 }, { "epoch": 0.3967048422744625, "grad_norm": 0.7855527400970459, "learning_rate": 0.0001916699456399002, "loss": 1.5089, "step": 2468 }, { "epoch": 0.39686558167570823, "grad_norm": 0.728420615196228, "learning_rate": 0.00019166320731418428, "loss": 1.6858, "step": 2469 }, { "epoch": 0.397026321076954, "grad_norm": 0.6604316830635071, "learning_rate": 0.0001916564663827395, "loss": 1.2502, "step": 2470 }, { "epoch": 0.39718706047819974, "grad_norm": 0.7883817553520203, "learning_rate": 0.0001916497228457575, "loss": 1.5041, "step": 2471 }, { "epoch": 0.39734779987944546, "grad_norm": 0.7678667902946472, "learning_rate": 0.00019164297670342992, "loss": 1.4547, "step": 2472 }, { "epoch": 0.3975085392806912, "grad_norm": 0.8633100986480713, "learning_rate": 0.00019163622795594857, "loss": 1.5464, "step": 2473 }, { "epoch": 0.3976692786819369, "grad_norm": 0.7038389444351196, "learning_rate": 0.0001916294766035053, "loss": 1.4363, "step": 2474 }, { "epoch": 0.39783001808318263, "grad_norm": 0.8199782371520996, "learning_rate": 0.000191622722646292, "loss": 1.5687, "step": 2475 }, { "epoch": 0.39799075748442836, "grad_norm": 0.7010809183120728, "learning_rate": 0.00019161596608450072, "loss": 1.3987, "step": 2476 }, { "epoch": 0.3981514968856741, "grad_norm": 0.8572189211845398, "learning_rate": 0.00019160920691832346, "loss": 1.7254, "step": 2477 }, { "epoch": 0.3983122362869198, "grad_norm": 0.7598956823348999, "learning_rate": 0.00019160244514795243, "loss": 1.576, "step": 2478 }, { "epoch": 0.3984729756881656, "grad_norm": 0.8609632849693298, "learning_rate": 0.00019159568077357984, "loss": 1.6984, "step": 2479 }, { "epoch": 0.3986337150894113, "grad_norm": 0.752980649471283, "learning_rate": 0.00019158891379539793, "loss": 1.3589, "step": 2480 }, { "epoch": 0.39879445449065704, "grad_norm": 0.7623554468154907, "learning_rate": 0.00019158214421359913, "loss": 1.5142, "step": 2481 }, { "epoch": 0.39895519389190276, "grad_norm": 0.7593144178390503, "learning_rate": 0.00019157537202837586, "loss": 1.5067, "step": 2482 }, { "epoch": 0.3991159332931485, "grad_norm": 0.8836040496826172, "learning_rate": 0.0001915685972399206, "loss": 1.4238, "step": 2483 }, { "epoch": 0.3992766726943942, "grad_norm": 0.666590690612793, "learning_rate": 0.000191561819848426, "loss": 1.3503, "step": 2484 }, { "epoch": 0.39943741209563993, "grad_norm": 0.7041436433792114, "learning_rate": 0.00019155503985408465, "loss": 1.4389, "step": 2485 }, { "epoch": 0.39959815149688566, "grad_norm": 0.8154047131538391, "learning_rate": 0.00019154825725708935, "loss": 1.4853, "step": 2486 }, { "epoch": 0.3997588908981314, "grad_norm": 0.6580129265785217, "learning_rate": 0.00019154147205763285, "loss": 1.2218, "step": 2487 }, { "epoch": 0.39991963029937716, "grad_norm": 0.7523859739303589, "learning_rate": 0.00019153468425590808, "loss": 1.5735, "step": 2488 }, { "epoch": 0.4000803697006229, "grad_norm": 0.7516841292381287, "learning_rate": 0.00019152789385210798, "loss": 1.5317, "step": 2489 }, { "epoch": 0.4002411091018686, "grad_norm": 0.734634518623352, "learning_rate": 0.00019152110084642564, "loss": 1.4937, "step": 2490 }, { "epoch": 0.40040184850311433, "grad_norm": 0.8040934801101685, "learning_rate": 0.00019151430523905405, "loss": 1.5787, "step": 2491 }, { "epoch": 0.40056258790436006, "grad_norm": 0.7011284828186035, "learning_rate": 0.00019150750703018647, "loss": 1.487, "step": 2492 }, { "epoch": 0.4007233273056058, "grad_norm": 0.7092073559761047, "learning_rate": 0.00019150070622001616, "loss": 1.4053, "step": 2493 }, { "epoch": 0.4008840667068515, "grad_norm": 0.8188302516937256, "learning_rate": 0.0001914939028087364, "loss": 1.7908, "step": 2494 }, { "epoch": 0.40104480610809723, "grad_norm": 0.7635495066642761, "learning_rate": 0.00019148709679654065, "loss": 1.5558, "step": 2495 }, { "epoch": 0.40120554550934295, "grad_norm": 0.8380181193351746, "learning_rate": 0.00019148028818362231, "loss": 1.6305, "step": 2496 }, { "epoch": 0.40136628491058873, "grad_norm": 0.9192288517951965, "learning_rate": 0.00019147347697017503, "loss": 1.9395, "step": 2497 }, { "epoch": 0.40152702431183446, "grad_norm": 0.6755523681640625, "learning_rate": 0.00019146666315639235, "loss": 1.3986, "step": 2498 }, { "epoch": 0.4016877637130802, "grad_norm": 0.7035761475563049, "learning_rate": 0.00019145984674246797, "loss": 1.3963, "step": 2499 }, { "epoch": 0.4018485031143259, "grad_norm": 0.7133774757385254, "learning_rate": 0.00019145302772859572, "loss": 1.4521, "step": 2500 }, { "epoch": 0.40200924251557163, "grad_norm": 0.8242147564888, "learning_rate": 0.00019144620611496942, "loss": 1.5293, "step": 2501 }, { "epoch": 0.40216998191681735, "grad_norm": 0.7020365595817566, "learning_rate": 0.00019143938190178296, "loss": 1.5372, "step": 2502 }, { "epoch": 0.4023307213180631, "grad_norm": 0.7574473023414612, "learning_rate": 0.00019143255508923036, "loss": 1.4105, "step": 2503 }, { "epoch": 0.4024914607193088, "grad_norm": 0.9006623029708862, "learning_rate": 0.0001914257256775057, "loss": 1.5187, "step": 2504 }, { "epoch": 0.4026522001205545, "grad_norm": 0.6694271564483643, "learning_rate": 0.00019141889366680308, "loss": 1.2849, "step": 2505 }, { "epoch": 0.4028129395218003, "grad_norm": 0.7777587175369263, "learning_rate": 0.00019141205905731675, "loss": 1.5399, "step": 2506 }, { "epoch": 0.40297367892304603, "grad_norm": 0.8897342085838318, "learning_rate": 0.00019140522184924098, "loss": 1.6269, "step": 2507 }, { "epoch": 0.40313441832429175, "grad_norm": 0.6776940822601318, "learning_rate": 0.00019139838204277015, "loss": 1.3365, "step": 2508 }, { "epoch": 0.4032951577255375, "grad_norm": 0.7567303776741028, "learning_rate": 0.00019139153963809868, "loss": 1.4218, "step": 2509 }, { "epoch": 0.4034558971267832, "grad_norm": 0.8732535243034363, "learning_rate": 0.0001913846946354211, "loss": 1.6308, "step": 2510 }, { "epoch": 0.4036166365280289, "grad_norm": 0.8265684247016907, "learning_rate": 0.00019137784703493198, "loss": 1.5013, "step": 2511 }, { "epoch": 0.40377737592927465, "grad_norm": 0.7229642271995544, "learning_rate": 0.00019137099683682595, "loss": 1.2967, "step": 2512 }, { "epoch": 0.4039381153305204, "grad_norm": 0.7564324736595154, "learning_rate": 0.0001913641440412978, "loss": 1.4381, "step": 2513 }, { "epoch": 0.4040988547317661, "grad_norm": 0.8421205878257751, "learning_rate": 0.00019135728864854231, "loss": 1.9243, "step": 2514 }, { "epoch": 0.4042595941330119, "grad_norm": 0.7868829369544983, "learning_rate": 0.00019135043065875436, "loss": 1.5767, "step": 2515 }, { "epoch": 0.4044203335342576, "grad_norm": 0.8534952402114868, "learning_rate": 0.0001913435700721289, "loss": 1.6924, "step": 2516 }, { "epoch": 0.4045810729355033, "grad_norm": 0.8495595455169678, "learning_rate": 0.0001913367068888609, "loss": 1.344, "step": 2517 }, { "epoch": 0.40474181233674905, "grad_norm": 0.8766266107559204, "learning_rate": 0.0001913298411091456, "loss": 1.6318, "step": 2518 }, { "epoch": 0.4049025517379948, "grad_norm": 0.7362377643585205, "learning_rate": 0.00019132297273317806, "loss": 1.529, "step": 2519 }, { "epoch": 0.4050632911392405, "grad_norm": 0.6785436272621155, "learning_rate": 0.00019131610176115356, "loss": 1.5427, "step": 2520 }, { "epoch": 0.4052240305404862, "grad_norm": 0.7282110452651978, "learning_rate": 0.00019130922819326744, "loss": 1.554, "step": 2521 }, { "epoch": 0.40538476994173195, "grad_norm": 0.8519697785377502, "learning_rate": 0.00019130235202971508, "loss": 1.5345, "step": 2522 }, { "epoch": 0.4055455093429777, "grad_norm": 0.808031439781189, "learning_rate": 0.00019129547327069191, "loss": 1.3164, "step": 2523 }, { "epoch": 0.40570624874422345, "grad_norm": 0.8923338055610657, "learning_rate": 0.00019128859191639357, "loss": 1.5083, "step": 2524 }, { "epoch": 0.4058669881454692, "grad_norm": 0.7588145732879639, "learning_rate": 0.0001912817079670156, "loss": 1.3985, "step": 2525 }, { "epoch": 0.4060277275467149, "grad_norm": 0.6829206943511963, "learning_rate": 0.00019127482142275372, "loss": 1.2544, "step": 2526 }, { "epoch": 0.4061884669479606, "grad_norm": 0.7919324040412903, "learning_rate": 0.0001912679322838037, "loss": 1.4859, "step": 2527 }, { "epoch": 0.40634920634920635, "grad_norm": 0.7178294658660889, "learning_rate": 0.00019126104055036132, "loss": 1.4519, "step": 2528 }, { "epoch": 0.4065099457504521, "grad_norm": 0.7910966277122498, "learning_rate": 0.0001912541462226226, "loss": 1.6246, "step": 2529 }, { "epoch": 0.4066706851516978, "grad_norm": 0.724397599697113, "learning_rate": 0.00019124724930078346, "loss": 1.4955, "step": 2530 }, { "epoch": 0.4068314245529435, "grad_norm": 0.779293954372406, "learning_rate": 0.00019124034978503995, "loss": 1.4475, "step": 2531 }, { "epoch": 0.40699216395418925, "grad_norm": 0.8705267310142517, "learning_rate": 0.00019123344767558824, "loss": 1.3409, "step": 2532 }, { "epoch": 0.407152903355435, "grad_norm": 0.728273332118988, "learning_rate": 0.0001912265429726245, "loss": 1.3604, "step": 2533 }, { "epoch": 0.40731364275668075, "grad_norm": 0.854556143283844, "learning_rate": 0.00019121963567634507, "loss": 1.7441, "step": 2534 }, { "epoch": 0.4074743821579265, "grad_norm": 0.7915385961532593, "learning_rate": 0.00019121272578694623, "loss": 1.7528, "step": 2535 }, { "epoch": 0.4076351215591722, "grad_norm": 0.8166208267211914, "learning_rate": 0.00019120581330462445, "loss": 1.4672, "step": 2536 }, { "epoch": 0.4077958609604179, "grad_norm": 0.8997832536697388, "learning_rate": 0.00019119889822957623, "loss": 1.2305, "step": 2537 }, { "epoch": 0.40795660036166365, "grad_norm": 0.7414897084236145, "learning_rate": 0.00019119198056199814, "loss": 1.629, "step": 2538 }, { "epoch": 0.40811733976290937, "grad_norm": 0.8681387901306152, "learning_rate": 0.00019118506030208683, "loss": 1.919, "step": 2539 }, { "epoch": 0.4082780791641551, "grad_norm": 0.758531928062439, "learning_rate": 0.00019117813745003905, "loss": 1.5904, "step": 2540 }, { "epoch": 0.4084388185654008, "grad_norm": 0.7885474562644958, "learning_rate": 0.00019117121200605157, "loss": 1.23, "step": 2541 }, { "epoch": 0.4085995579666466, "grad_norm": 0.7174012064933777, "learning_rate": 0.00019116428397032126, "loss": 1.5082, "step": 2542 }, { "epoch": 0.4087602973678923, "grad_norm": 0.7934616804122925, "learning_rate": 0.00019115735334304508, "loss": 1.6117, "step": 2543 }, { "epoch": 0.40892103676913805, "grad_norm": 0.6988117694854736, "learning_rate": 0.00019115042012442005, "loss": 1.4009, "step": 2544 }, { "epoch": 0.40908177617038377, "grad_norm": 0.7445558309555054, "learning_rate": 0.00019114348431464322, "loss": 1.3829, "step": 2545 }, { "epoch": 0.4092425155716295, "grad_norm": 0.7500491142272949, "learning_rate": 0.0001911365459139118, "loss": 1.2914, "step": 2546 }, { "epoch": 0.4094032549728752, "grad_norm": 0.7058566808700562, "learning_rate": 0.000191129604922423, "loss": 1.4142, "step": 2547 }, { "epoch": 0.40956399437412094, "grad_norm": 0.8256326913833618, "learning_rate": 0.00019112266134037418, "loss": 1.5339, "step": 2548 }, { "epoch": 0.40972473377536667, "grad_norm": 0.8417503833770752, "learning_rate": 0.00019111571516796267, "loss": 1.7031, "step": 2549 }, { "epoch": 0.4098854731766124, "grad_norm": 0.7888121604919434, "learning_rate": 0.000191108766405386, "loss": 1.5261, "step": 2550 }, { "epoch": 0.41004621257785817, "grad_norm": 0.8570287227630615, "learning_rate": 0.0001911018150528416, "loss": 1.9682, "step": 2551 }, { "epoch": 0.4102069519791039, "grad_norm": 0.7501027584075928, "learning_rate": 0.00019109486111052717, "loss": 1.4484, "step": 2552 }, { "epoch": 0.4103676913803496, "grad_norm": 0.7676083445549011, "learning_rate": 0.00019108790457864037, "loss": 1.2654, "step": 2553 }, { "epoch": 0.41052843078159534, "grad_norm": 0.8131934404373169, "learning_rate": 0.00019108094545737894, "loss": 1.6082, "step": 2554 }, { "epoch": 0.41068917018284107, "grad_norm": 0.6952484250068665, "learning_rate": 0.00019107398374694067, "loss": 1.3425, "step": 2555 }, { "epoch": 0.4108499095840868, "grad_norm": 0.7467429637908936, "learning_rate": 0.00019106701944752357, "loss": 1.5451, "step": 2556 }, { "epoch": 0.4110106489853325, "grad_norm": 0.7462307214736938, "learning_rate": 0.0001910600525593255, "loss": 1.3775, "step": 2557 }, { "epoch": 0.41117138838657824, "grad_norm": 0.7316749691963196, "learning_rate": 0.00019105308308254456, "loss": 1.4342, "step": 2558 }, { "epoch": 0.41133212778782396, "grad_norm": 0.8575615286827087, "learning_rate": 0.0001910461110173789, "loss": 1.5322, "step": 2559 }, { "epoch": 0.41149286718906974, "grad_norm": 0.8049458861351013, "learning_rate": 0.00019103913636402668, "loss": 1.5624, "step": 2560 }, { "epoch": 0.41165360659031547, "grad_norm": 0.8062714338302612, "learning_rate": 0.00019103215912268616, "loss": 1.4805, "step": 2561 }, { "epoch": 0.4118143459915612, "grad_norm": 0.8398598432540894, "learning_rate": 0.00019102517929355572, "loss": 1.3415, "step": 2562 }, { "epoch": 0.4119750853928069, "grad_norm": 0.7831631898880005, "learning_rate": 0.00019101819687683378, "loss": 1.4274, "step": 2563 }, { "epoch": 0.41213582479405264, "grad_norm": 0.9175931215286255, "learning_rate": 0.00019101121187271874, "loss": 1.5851, "step": 2564 }, { "epoch": 0.41229656419529837, "grad_norm": 0.7551541328430176, "learning_rate": 0.0001910042242814093, "loss": 1.5656, "step": 2565 }, { "epoch": 0.4124573035965441, "grad_norm": 0.7885438799858093, "learning_rate": 0.00019099723410310402, "loss": 1.626, "step": 2566 }, { "epoch": 0.4126180429977898, "grad_norm": 0.7020472288131714, "learning_rate": 0.0001909902413380016, "loss": 1.3183, "step": 2567 }, { "epoch": 0.41277878239903554, "grad_norm": 0.6641147136688232, "learning_rate": 0.00019098324598630084, "loss": 1.3101, "step": 2568 }, { "epoch": 0.4129395218002813, "grad_norm": 0.7966509461402893, "learning_rate": 0.00019097624804820064, "loss": 1.5994, "step": 2569 }, { "epoch": 0.41310026120152704, "grad_norm": 0.7732152938842773, "learning_rate": 0.00019096924752389988, "loss": 1.5144, "step": 2570 }, { "epoch": 0.41326100060277277, "grad_norm": 0.7617549896240234, "learning_rate": 0.00019096224441359758, "loss": 1.2988, "step": 2571 }, { "epoch": 0.4134217400040185, "grad_norm": 0.7711753845214844, "learning_rate": 0.00019095523871749283, "loss": 1.6663, "step": 2572 }, { "epoch": 0.4135824794052642, "grad_norm": 0.6879797577857971, "learning_rate": 0.00019094823043578477, "loss": 1.3788, "step": 2573 }, { "epoch": 0.41374321880650994, "grad_norm": 0.8469770550727844, "learning_rate": 0.00019094121956867262, "loss": 1.5416, "step": 2574 }, { "epoch": 0.41390395820775566, "grad_norm": 0.8052458167076111, "learning_rate": 0.00019093420611635567, "loss": 1.5361, "step": 2575 }, { "epoch": 0.4140646976090014, "grad_norm": 0.6764427423477173, "learning_rate": 0.00019092719007903335, "loss": 1.3502, "step": 2576 }, { "epoch": 0.4142254370102471, "grad_norm": 0.7669712901115417, "learning_rate": 0.00019092017145690504, "loss": 1.6257, "step": 2577 }, { "epoch": 0.4143861764114929, "grad_norm": 0.743462860584259, "learning_rate": 0.0001909131502501703, "loss": 1.5404, "step": 2578 }, { "epoch": 0.4145469158127386, "grad_norm": 0.7373762726783752, "learning_rate": 0.0001909061264590287, "loss": 1.3076, "step": 2579 }, { "epoch": 0.41470765521398434, "grad_norm": 0.7353813648223877, "learning_rate": 0.00019089910008367994, "loss": 1.3511, "step": 2580 }, { "epoch": 0.41486839461523006, "grad_norm": 0.7919554114341736, "learning_rate": 0.00019089207112432376, "loss": 1.4794, "step": 2581 }, { "epoch": 0.4150291340164758, "grad_norm": 0.8093348145484924, "learning_rate": 0.0001908850395811599, "loss": 1.689, "step": 2582 }, { "epoch": 0.4151898734177215, "grad_norm": 0.7761676907539368, "learning_rate": 0.0001908780054543883, "loss": 1.4126, "step": 2583 }, { "epoch": 0.41535061281896724, "grad_norm": 0.8488290309906006, "learning_rate": 0.00019087096874420895, "loss": 1.7031, "step": 2584 }, { "epoch": 0.41551135222021296, "grad_norm": 0.733219563961029, "learning_rate": 0.00019086392945082183, "loss": 1.2286, "step": 2585 }, { "epoch": 0.4156720916214587, "grad_norm": 0.8025944232940674, "learning_rate": 0.00019085688757442709, "loss": 1.5614, "step": 2586 }, { "epoch": 0.41583283102270446, "grad_norm": 0.6854115128517151, "learning_rate": 0.0001908498431152249, "loss": 1.4134, "step": 2587 }, { "epoch": 0.4159935704239502, "grad_norm": 0.7893211245536804, "learning_rate": 0.00019084279607341545, "loss": 1.457, "step": 2588 }, { "epoch": 0.4161543098251959, "grad_norm": 0.6414058208465576, "learning_rate": 0.0001908357464491992, "loss": 1.2185, "step": 2589 }, { "epoch": 0.41631504922644164, "grad_norm": 0.7997243404388428, "learning_rate": 0.00019082869424277642, "loss": 1.609, "step": 2590 }, { "epoch": 0.41647578862768736, "grad_norm": 0.7873114347457886, "learning_rate": 0.00019082163945434766, "loss": 1.347, "step": 2591 }, { "epoch": 0.4166365280289331, "grad_norm": 0.9302279949188232, "learning_rate": 0.00019081458208411345, "loss": 1.6598, "step": 2592 }, { "epoch": 0.4167972674301788, "grad_norm": 0.7830442786216736, "learning_rate": 0.0001908075221322744, "loss": 1.6496, "step": 2593 }, { "epoch": 0.41695800683142453, "grad_norm": 0.7612038254737854, "learning_rate": 0.00019080045959903123, "loss": 1.4389, "step": 2594 }, { "epoch": 0.41711874623267026, "grad_norm": 0.794469952583313, "learning_rate": 0.00019079339448458467, "loss": 1.8183, "step": 2595 }, { "epoch": 0.41727948563391604, "grad_norm": 0.8343592286109924, "learning_rate": 0.00019078632678913563, "loss": 1.4873, "step": 2596 }, { "epoch": 0.41744022503516176, "grad_norm": 0.6997950673103333, "learning_rate": 0.0001907792565128849, "loss": 1.4044, "step": 2597 }, { "epoch": 0.4176009644364075, "grad_norm": 0.8039801120758057, "learning_rate": 0.0001907721836560336, "loss": 1.6544, "step": 2598 }, { "epoch": 0.4177617038376532, "grad_norm": 0.8064226508140564, "learning_rate": 0.00019076510821878272, "loss": 1.5199, "step": 2599 }, { "epoch": 0.41792244323889893, "grad_norm": 0.76320481300354, "learning_rate": 0.00019075803020133342, "loss": 1.5112, "step": 2600 }, { "epoch": 0.41792244323889893, "eval_loss": 1.5074462890625, "eval_runtime": 46.2567, "eval_samples_per_second": 5.426, "eval_steps_per_second": 2.724, "step": 2600 }, { "epoch": 0.41808318264014466, "grad_norm": 0.6669580340385437, "learning_rate": 0.0001907509496038869, "loss": 1.3042, "step": 2601 }, { "epoch": 0.4182439220413904, "grad_norm": 0.8234513401985168, "learning_rate": 0.00019074386642664443, "loss": 1.4137, "step": 2602 }, { "epoch": 0.4184046614426361, "grad_norm": 0.7960114479064941, "learning_rate": 0.00019073678066980742, "loss": 1.4177, "step": 2603 }, { "epoch": 0.41856540084388183, "grad_norm": 0.8011956810951233, "learning_rate": 0.0001907296923335772, "loss": 1.4546, "step": 2604 }, { "epoch": 0.4187261402451276, "grad_norm": 0.6946028470993042, "learning_rate": 0.00019072260141815534, "loss": 1.4712, "step": 2605 }, { "epoch": 0.41888687964637333, "grad_norm": 0.7497368454933167, "learning_rate": 0.00019071550792374342, "loss": 1.5259, "step": 2606 }, { "epoch": 0.41904761904761906, "grad_norm": 0.6839972138404846, "learning_rate": 0.00019070841185054305, "loss": 1.4185, "step": 2607 }, { "epoch": 0.4192083584488648, "grad_norm": 0.7909924983978271, "learning_rate": 0.00019070131319875598, "loss": 1.4886, "step": 2608 }, { "epoch": 0.4193690978501105, "grad_norm": 0.7504199743270874, "learning_rate": 0.000190694211968584, "loss": 1.5284, "step": 2609 }, { "epoch": 0.41952983725135623, "grad_norm": 0.8247617483139038, "learning_rate": 0.000190687108160229, "loss": 1.575, "step": 2610 }, { "epoch": 0.41969057665260195, "grad_norm": 0.7475968599319458, "learning_rate": 0.00019068000177389287, "loss": 1.623, "step": 2611 }, { "epoch": 0.4198513160538477, "grad_norm": 0.6632437109947205, "learning_rate": 0.00019067289280977763, "loss": 1.2734, "step": 2612 }, { "epoch": 0.4200120554550934, "grad_norm": 0.768788754940033, "learning_rate": 0.00019066578126808543, "loss": 1.3458, "step": 2613 }, { "epoch": 0.4201727948563392, "grad_norm": 0.6962963938713074, "learning_rate": 0.00019065866714901834, "loss": 1.3326, "step": 2614 }, { "epoch": 0.4203335342575849, "grad_norm": 0.7447150349617004, "learning_rate": 0.0001906515504527787, "loss": 1.249, "step": 2615 }, { "epoch": 0.42049427365883063, "grad_norm": 0.6960676312446594, "learning_rate": 0.00019064443117956873, "loss": 1.2524, "step": 2616 }, { "epoch": 0.42065501306007635, "grad_norm": 0.8433355093002319, "learning_rate": 0.00019063730932959084, "loss": 1.4261, "step": 2617 }, { "epoch": 0.4208157524613221, "grad_norm": 0.7685472965240479, "learning_rate": 0.00019063018490304753, "loss": 1.4554, "step": 2618 }, { "epoch": 0.4209764918625678, "grad_norm": 0.8265612125396729, "learning_rate": 0.00019062305790014126, "loss": 1.6182, "step": 2619 }, { "epoch": 0.4211372312638135, "grad_norm": 0.9686467051506042, "learning_rate": 0.00019061592832107462, "loss": 1.4564, "step": 2620 }, { "epoch": 0.42129797066505925, "grad_norm": 1.1638507843017578, "learning_rate": 0.00019060879616605037, "loss": 1.7006, "step": 2621 }, { "epoch": 0.421458710066305, "grad_norm": 0.7678912281990051, "learning_rate": 0.00019060166143527118, "loss": 1.5115, "step": 2622 }, { "epoch": 0.42161944946755076, "grad_norm": 2.437485933303833, "learning_rate": 0.0001905945241289399, "loss": 1.9144, "step": 2623 }, { "epoch": 0.4217801888687965, "grad_norm": 0.836601972579956, "learning_rate": 0.00019058738424725943, "loss": 1.4915, "step": 2624 }, { "epoch": 0.4219409282700422, "grad_norm": 0.7602325677871704, "learning_rate": 0.00019058024179043274, "loss": 1.44, "step": 2625 }, { "epoch": 0.42210166767128793, "grad_norm": 0.8712754249572754, "learning_rate": 0.00019057309675866283, "loss": 1.6097, "step": 2626 }, { "epoch": 0.42226240707253365, "grad_norm": 0.7456492185592651, "learning_rate": 0.00019056594915215286, "loss": 1.5503, "step": 2627 }, { "epoch": 0.4224231464737794, "grad_norm": 0.6857098340988159, "learning_rate": 0.000190558798971106, "loss": 1.5283, "step": 2628 }, { "epoch": 0.4225838858750251, "grad_norm": 0.7309983372688293, "learning_rate": 0.0001905516462157255, "loss": 1.3201, "step": 2629 }, { "epoch": 0.4227446252762708, "grad_norm": 0.8483695983886719, "learning_rate": 0.00019054449088621472, "loss": 1.3672, "step": 2630 }, { "epoch": 0.42290536467751655, "grad_norm": 0.7800455689430237, "learning_rate": 0.000190537332982777, "loss": 1.5061, "step": 2631 }, { "epoch": 0.42306610407876233, "grad_norm": 0.7860884666442871, "learning_rate": 0.00019053017250561592, "loss": 1.3465, "step": 2632 }, { "epoch": 0.42322684348000805, "grad_norm": 0.7632943987846375, "learning_rate": 0.00019052300945493494, "loss": 1.5796, "step": 2633 }, { "epoch": 0.4233875828812538, "grad_norm": 0.7963200807571411, "learning_rate": 0.00019051584383093776, "loss": 1.4056, "step": 2634 }, { "epoch": 0.4235483222824995, "grad_norm": 0.9116247296333313, "learning_rate": 0.000190508675633828, "loss": 1.867, "step": 2635 }, { "epoch": 0.4237090616837452, "grad_norm": 0.7254156470298767, "learning_rate": 0.00019050150486380952, "loss": 1.4328, "step": 2636 }, { "epoch": 0.42386980108499095, "grad_norm": 0.7402780055999756, "learning_rate": 0.0001904943315210861, "loss": 1.4966, "step": 2637 }, { "epoch": 0.4240305404862367, "grad_norm": 0.8103384971618652, "learning_rate": 0.00019048715560586165, "loss": 1.57, "step": 2638 }, { "epoch": 0.4241912798874824, "grad_norm": 0.7658784985542297, "learning_rate": 0.0001904799771183402, "loss": 1.5178, "step": 2639 }, { "epoch": 0.4243520192887282, "grad_norm": 0.7443107962608337, "learning_rate": 0.00019047279605872582, "loss": 1.3147, "step": 2640 }, { "epoch": 0.4245127586899739, "grad_norm": 0.7935131192207336, "learning_rate": 0.00019046561242722264, "loss": 1.5355, "step": 2641 }, { "epoch": 0.4246734980912196, "grad_norm": 0.8438889980316162, "learning_rate": 0.00019045842622403484, "loss": 1.3592, "step": 2642 }, { "epoch": 0.42483423749246535, "grad_norm": 0.8501654267311096, "learning_rate": 0.00019045123744936674, "loss": 1.6912, "step": 2643 }, { "epoch": 0.4249949768937111, "grad_norm": 0.6749202609062195, "learning_rate": 0.00019044404610342266, "loss": 1.3214, "step": 2644 }, { "epoch": 0.4251557162949568, "grad_norm": 0.7281819581985474, "learning_rate": 0.00019043685218640704, "loss": 1.2946, "step": 2645 }, { "epoch": 0.4253164556962025, "grad_norm": 0.7276479601860046, "learning_rate": 0.0001904296556985244, "loss": 1.3138, "step": 2646 }, { "epoch": 0.42547719509744825, "grad_norm": 0.8975667357444763, "learning_rate": 0.00019042245663997934, "loss": 1.4072, "step": 2647 }, { "epoch": 0.42563793449869397, "grad_norm": 0.7393861413002014, "learning_rate": 0.00019041525501097649, "loss": 1.5464, "step": 2648 }, { "epoch": 0.42579867389993975, "grad_norm": 0.8440849781036377, "learning_rate": 0.00019040805081172054, "loss": 1.4511, "step": 2649 }, { "epoch": 0.4259594133011855, "grad_norm": 0.7948325872421265, "learning_rate": 0.00019040084404241633, "loss": 1.3069, "step": 2650 }, { "epoch": 0.4261201527024312, "grad_norm": 0.7619486451148987, "learning_rate": 0.00019039363470326864, "loss": 1.3753, "step": 2651 }, { "epoch": 0.4262808921036769, "grad_norm": 0.9130421876907349, "learning_rate": 0.00019038642279448255, "loss": 1.6639, "step": 2652 }, { "epoch": 0.42644163150492265, "grad_norm": 0.7614704370498657, "learning_rate": 0.00019037920831626294, "loss": 1.3954, "step": 2653 }, { "epoch": 0.42660237090616837, "grad_norm": 0.8686303496360779, "learning_rate": 0.00019037199126881497, "loss": 1.5009, "step": 2654 }, { "epoch": 0.4267631103074141, "grad_norm": 0.7876929044723511, "learning_rate": 0.00019036477165234382, "loss": 1.4521, "step": 2655 }, { "epoch": 0.4269238497086598, "grad_norm": 0.6822977662086487, "learning_rate": 0.0001903575494670547, "loss": 1.2475, "step": 2656 }, { "epoch": 0.42708458910990554, "grad_norm": 0.8352903723716736, "learning_rate": 0.0001903503247131529, "loss": 1.3868, "step": 2657 }, { "epoch": 0.4272453285111513, "grad_norm": 0.7971694469451904, "learning_rate": 0.00019034309739084378, "loss": 1.4956, "step": 2658 }, { "epoch": 0.42740606791239705, "grad_norm": 0.8683745861053467, "learning_rate": 0.00019033586750033283, "loss": 1.76, "step": 2659 }, { "epoch": 0.42756680731364277, "grad_norm": 0.7387479543685913, "learning_rate": 0.00019032863504182557, "loss": 1.4167, "step": 2660 }, { "epoch": 0.4277275467148885, "grad_norm": 0.7041024565696716, "learning_rate": 0.00019032140001552762, "loss": 1.4001, "step": 2661 }, { "epoch": 0.4278882861161342, "grad_norm": 0.7785133123397827, "learning_rate": 0.0001903141624216446, "loss": 1.4828, "step": 2662 }, { "epoch": 0.42804902551737994, "grad_norm": 0.8773160576820374, "learning_rate": 0.00019030692226038227, "loss": 1.4238, "step": 2663 }, { "epoch": 0.42820976491862567, "grad_norm": 0.6535639762878418, "learning_rate": 0.0001902996795319465, "loss": 1.2101, "step": 2664 }, { "epoch": 0.4283705043198714, "grad_norm": 0.7616334557533264, "learning_rate": 0.00019029243423654307, "loss": 1.1343, "step": 2665 }, { "epoch": 0.4285312437211171, "grad_norm": 0.7101039886474609, "learning_rate": 0.0001902851863743781, "loss": 1.3661, "step": 2666 }, { "epoch": 0.4286919831223629, "grad_norm": 0.8013532757759094, "learning_rate": 0.0001902779359456575, "loss": 1.4695, "step": 2667 }, { "epoch": 0.4288527225236086, "grad_norm": 0.8337529301643372, "learning_rate": 0.00019027068295058742, "loss": 1.6964, "step": 2668 }, { "epoch": 0.42901346192485434, "grad_norm": 0.8380796313285828, "learning_rate": 0.000190263427389374, "loss": 1.4822, "step": 2669 }, { "epoch": 0.42917420132610007, "grad_norm": 0.8143289089202881, "learning_rate": 0.0001902561692622236, "loss": 1.6902, "step": 2670 }, { "epoch": 0.4293349407273458, "grad_norm": 0.9107784628868103, "learning_rate": 0.00019024890856934247, "loss": 1.6402, "step": 2671 }, { "epoch": 0.4294956801285915, "grad_norm": 0.8039049506187439, "learning_rate": 0.00019024164531093702, "loss": 1.6241, "step": 2672 }, { "epoch": 0.42965641952983724, "grad_norm": 0.8069022297859192, "learning_rate": 0.00019023437948721372, "loss": 1.5649, "step": 2673 }, { "epoch": 0.42981715893108297, "grad_norm": 0.8613746166229248, "learning_rate": 0.00019022711109837915, "loss": 1.5435, "step": 2674 }, { "epoch": 0.4299778983323287, "grad_norm": 0.8960596919059753, "learning_rate": 0.00019021984014463988, "loss": 1.6113, "step": 2675 }, { "epoch": 0.43013863773357447, "grad_norm": 0.8086533546447754, "learning_rate": 0.00019021256662620262, "loss": 1.64, "step": 2676 }, { "epoch": 0.4302993771348202, "grad_norm": 0.7773278951644897, "learning_rate": 0.00019020529054327419, "loss": 1.2506, "step": 2677 }, { "epoch": 0.4304601165360659, "grad_norm": 0.7165824770927429, "learning_rate": 0.00019019801189606135, "loss": 1.2807, "step": 2678 }, { "epoch": 0.43062085593731164, "grad_norm": 0.6767386198043823, "learning_rate": 0.00019019073068477106, "loss": 1.2242, "step": 2679 }, { "epoch": 0.43078159533855737, "grad_norm": 0.7907811403274536, "learning_rate": 0.0001901834469096103, "loss": 1.5887, "step": 2680 }, { "epoch": 0.4309423347398031, "grad_norm": 0.8587985634803772, "learning_rate": 0.00019017616057078612, "loss": 1.4885, "step": 2681 }, { "epoch": 0.4311030741410488, "grad_norm": 0.6900464296340942, "learning_rate": 0.00019016887166850563, "loss": 1.3347, "step": 2682 }, { "epoch": 0.43126381354229454, "grad_norm": 0.9689234495162964, "learning_rate": 0.00019016158020297605, "loss": 1.8569, "step": 2683 }, { "epoch": 0.43142455294354026, "grad_norm": 0.7085142731666565, "learning_rate": 0.00019015428617440467, "loss": 1.4131, "step": 2684 }, { "epoch": 0.43158529234478604, "grad_norm": 0.8004345893859863, "learning_rate": 0.0001901469895829988, "loss": 1.3673, "step": 2685 }, { "epoch": 0.43174603174603177, "grad_norm": 0.7614530920982361, "learning_rate": 0.00019013969042896593, "loss": 1.5032, "step": 2686 }, { "epoch": 0.4319067711472775, "grad_norm": 0.8605929613113403, "learning_rate": 0.0001901323887125135, "loss": 1.7237, "step": 2687 }, { "epoch": 0.4320675105485232, "grad_norm": 0.9093371033668518, "learning_rate": 0.0001901250844338491, "loss": 1.6888, "step": 2688 }, { "epoch": 0.43222824994976894, "grad_norm": 0.7600734233856201, "learning_rate": 0.00019011777759318034, "loss": 1.4565, "step": 2689 }, { "epoch": 0.43238898935101466, "grad_norm": 0.7170428037643433, "learning_rate": 0.00019011046819071496, "loss": 1.3888, "step": 2690 }, { "epoch": 0.4325497287522604, "grad_norm": 0.7323361039161682, "learning_rate": 0.00019010315622666074, "loss": 1.2076, "step": 2691 }, { "epoch": 0.4327104681535061, "grad_norm": 0.7810441255569458, "learning_rate": 0.00019009584170122552, "loss": 1.6239, "step": 2692 }, { "epoch": 0.43287120755475184, "grad_norm": 0.7457548379898071, "learning_rate": 0.00019008852461461728, "loss": 1.5324, "step": 2693 }, { "epoch": 0.4330319469559976, "grad_norm": 0.8733008503913879, "learning_rate": 0.00019008120496704397, "loss": 1.5143, "step": 2694 }, { "epoch": 0.43319268635724334, "grad_norm": 0.8363498449325562, "learning_rate": 0.00019007388275871372, "loss": 1.7588, "step": 2695 }, { "epoch": 0.43335342575848906, "grad_norm": 0.7771193981170654, "learning_rate": 0.00019006655798983461, "loss": 1.5705, "step": 2696 }, { "epoch": 0.4335141651597348, "grad_norm": 0.7185562252998352, "learning_rate": 0.00019005923066061492, "loss": 1.3929, "step": 2697 }, { "epoch": 0.4336749045609805, "grad_norm": 0.744644045829773, "learning_rate": 0.00019005190077126295, "loss": 1.5325, "step": 2698 }, { "epoch": 0.43383564396222624, "grad_norm": 0.8672025799751282, "learning_rate": 0.00019004456832198704, "loss": 1.4808, "step": 2699 }, { "epoch": 0.43399638336347196, "grad_norm": 0.7728167176246643, "learning_rate": 0.0001900372333129956, "loss": 1.5898, "step": 2700 }, { "epoch": 0.4341571227647177, "grad_norm": 0.7093779444694519, "learning_rate": 0.00019002989574449725, "loss": 1.4694, "step": 2701 }, { "epoch": 0.4343178621659634, "grad_norm": 0.7422767281532288, "learning_rate": 0.00019002255561670047, "loss": 1.5099, "step": 2702 }, { "epoch": 0.4344786015672092, "grad_norm": 0.7139518857002258, "learning_rate": 0.00019001521292981395, "loss": 1.3428, "step": 2703 }, { "epoch": 0.4346393409684549, "grad_norm": 0.8266845941543579, "learning_rate": 0.00019000786768404642, "loss": 1.5684, "step": 2704 }, { "epoch": 0.43480008036970064, "grad_norm": 0.8057905435562134, "learning_rate": 0.00019000051987960674, "loss": 1.8528, "step": 2705 }, { "epoch": 0.43496081977094636, "grad_norm": 0.7661305665969849, "learning_rate": 0.00018999316951670372, "loss": 1.5455, "step": 2706 }, { "epoch": 0.4351215591721921, "grad_norm": 0.6503386497497559, "learning_rate": 0.00018998581659554633, "loss": 1.2102, "step": 2707 }, { "epoch": 0.4352822985734378, "grad_norm": 0.8815152049064636, "learning_rate": 0.00018997846111634358, "loss": 1.8327, "step": 2708 }, { "epoch": 0.43544303797468353, "grad_norm": 0.7388945817947388, "learning_rate": 0.0001899711030793046, "loss": 1.3122, "step": 2709 }, { "epoch": 0.43560377737592926, "grad_norm": 0.8803600668907166, "learning_rate": 0.00018996374248463855, "loss": 1.4834, "step": 2710 }, { "epoch": 0.435764516777175, "grad_norm": 0.885927140712738, "learning_rate": 0.00018995637933255465, "loss": 1.537, "step": 2711 }, { "epoch": 0.43592525617842076, "grad_norm": 0.7668323516845703, "learning_rate": 0.00018994901362326223, "loss": 1.2991, "step": 2712 }, { "epoch": 0.4360859955796665, "grad_norm": 0.8507269620895386, "learning_rate": 0.00018994164535697067, "loss": 1.6367, "step": 2713 }, { "epoch": 0.4362467349809122, "grad_norm": 0.9149341583251953, "learning_rate": 0.0001899342745338894, "loss": 1.5085, "step": 2714 }, { "epoch": 0.43640747438215793, "grad_norm": 0.8333558440208435, "learning_rate": 0.00018992690115422805, "loss": 1.7034, "step": 2715 }, { "epoch": 0.43656821378340366, "grad_norm": 1.1058484315872192, "learning_rate": 0.0001899195252181961, "loss": 1.5943, "step": 2716 }, { "epoch": 0.4367289531846494, "grad_norm": 0.9545875191688538, "learning_rate": 0.00018991214672600332, "loss": 1.6378, "step": 2717 }, { "epoch": 0.4368896925858951, "grad_norm": 0.7860174179077148, "learning_rate": 0.0001899047656778594, "loss": 1.5737, "step": 2718 }, { "epoch": 0.43705043198714083, "grad_norm": 0.8003320097923279, "learning_rate": 0.00018989738207397417, "loss": 1.4851, "step": 2719 }, { "epoch": 0.43721117138838655, "grad_norm": 0.7648971080780029, "learning_rate": 0.00018988999591455758, "loss": 1.6087, "step": 2720 }, { "epoch": 0.43737191078963233, "grad_norm": 0.7869349718093872, "learning_rate": 0.00018988260719981955, "loss": 1.4782, "step": 2721 }, { "epoch": 0.43753265019087806, "grad_norm": 0.8131594657897949, "learning_rate": 0.00018987521592997013, "loss": 1.6846, "step": 2722 }, { "epoch": 0.4376933895921238, "grad_norm": 0.7748615741729736, "learning_rate": 0.00018986782210521943, "loss": 1.4117, "step": 2723 }, { "epoch": 0.4378541289933695, "grad_norm": 0.728078305721283, "learning_rate": 0.00018986042572577766, "loss": 1.4034, "step": 2724 }, { "epoch": 0.43801486839461523, "grad_norm": 0.8429229259490967, "learning_rate": 0.000189853026791855, "loss": 1.4092, "step": 2725 }, { "epoch": 0.43817560779586096, "grad_norm": 0.9177899956703186, "learning_rate": 0.00018984562530366191, "loss": 1.7323, "step": 2726 }, { "epoch": 0.4383363471971067, "grad_norm": 0.7953061461448669, "learning_rate": 0.00018983822126140867, "loss": 1.5043, "step": 2727 }, { "epoch": 0.4384970865983524, "grad_norm": 0.7755813002586365, "learning_rate": 0.00018983081466530583, "loss": 1.3129, "step": 2728 }, { "epoch": 0.4386578259995981, "grad_norm": 0.8553951978683472, "learning_rate": 0.00018982340551556393, "loss": 1.4342, "step": 2729 }, { "epoch": 0.4388185654008439, "grad_norm": 0.6644697785377502, "learning_rate": 0.00018981599381239357, "loss": 1.2102, "step": 2730 }, { "epoch": 0.43897930480208963, "grad_norm": 0.7269904613494873, "learning_rate": 0.00018980857955600545, "loss": 1.403, "step": 2731 }, { "epoch": 0.43914004420333536, "grad_norm": 0.9667109251022339, "learning_rate": 0.00018980116274661032, "loss": 1.5666, "step": 2732 }, { "epoch": 0.4393007836045811, "grad_norm": 0.7088979482650757, "learning_rate": 0.00018979374338441908, "loss": 1.3385, "step": 2733 }, { "epoch": 0.4394615230058268, "grad_norm": 0.9289289712905884, "learning_rate": 0.00018978632146964257, "loss": 1.4923, "step": 2734 }, { "epoch": 0.43962226240707253, "grad_norm": 0.8073968291282654, "learning_rate": 0.0001897788970024918, "loss": 1.5548, "step": 2735 }, { "epoch": 0.43978300180831825, "grad_norm": 0.7598000168800354, "learning_rate": 0.00018977146998317785, "loss": 1.5266, "step": 2736 }, { "epoch": 0.439943741209564, "grad_norm": 0.8078286051750183, "learning_rate": 0.00018976404041191183, "loss": 1.432, "step": 2737 }, { "epoch": 0.4401044806108097, "grad_norm": 1.1132135391235352, "learning_rate": 0.00018975660828890494, "loss": 1.977, "step": 2738 }, { "epoch": 0.4402652200120555, "grad_norm": 0.772860586643219, "learning_rate": 0.00018974917361436848, "loss": 1.3456, "step": 2739 }, { "epoch": 0.4404259594133012, "grad_norm": 0.6908577084541321, "learning_rate": 0.00018974173638851372, "loss": 1.3531, "step": 2740 }, { "epoch": 0.44058669881454693, "grad_norm": 0.7876496911048889, "learning_rate": 0.0001897342966115522, "loss": 1.4441, "step": 2741 }, { "epoch": 0.44074743821579265, "grad_norm": 0.8200158476829529, "learning_rate": 0.0001897268542836953, "loss": 1.4554, "step": 2742 }, { "epoch": 0.4409081776170384, "grad_norm": 0.7606135010719299, "learning_rate": 0.00018971940940515464, "loss": 1.5361, "step": 2743 }, { "epoch": 0.4410689170182841, "grad_norm": 0.877440869808197, "learning_rate": 0.00018971196197614188, "loss": 1.7555, "step": 2744 }, { "epoch": 0.4412296564195298, "grad_norm": 0.6056745648384094, "learning_rate": 0.00018970451199686866, "loss": 1.1607, "step": 2745 }, { "epoch": 0.44139039582077555, "grad_norm": 0.8325817584991455, "learning_rate": 0.0001896970594675468, "loss": 1.6711, "step": 2746 }, { "epoch": 0.4415511352220213, "grad_norm": 0.7495518922805786, "learning_rate": 0.00018968960438838815, "loss": 1.38, "step": 2747 }, { "epoch": 0.44171187462326705, "grad_norm": 0.770063579082489, "learning_rate": 0.00018968214675960468, "loss": 1.4488, "step": 2748 }, { "epoch": 0.4418726140245128, "grad_norm": 0.7555387616157532, "learning_rate": 0.0001896746865814083, "loss": 1.3623, "step": 2749 }, { "epoch": 0.4420333534257585, "grad_norm": 0.7905963063240051, "learning_rate": 0.0001896672238540112, "loss": 1.4486, "step": 2750 }, { "epoch": 0.4421940928270042, "grad_norm": 0.856794536113739, "learning_rate": 0.0001896597585776254, "loss": 1.4523, "step": 2751 }, { "epoch": 0.44235483222824995, "grad_norm": 0.7405420541763306, "learning_rate": 0.00018965229075246318, "loss": 1.4289, "step": 2752 }, { "epoch": 0.4425155716294957, "grad_norm": 0.7155880331993103, "learning_rate": 0.00018964482037873683, "loss": 1.4412, "step": 2753 }, { "epoch": 0.4426763110307414, "grad_norm": 0.7356750965118408, "learning_rate": 0.0001896373474566587, "loss": 1.3169, "step": 2754 }, { "epoch": 0.4428370504319871, "grad_norm": 0.7580761313438416, "learning_rate": 0.00018962987198644125, "loss": 1.353, "step": 2755 }, { "epoch": 0.44299778983323285, "grad_norm": 0.7933869957923889, "learning_rate": 0.00018962239396829697, "loss": 1.3201, "step": 2756 }, { "epoch": 0.4431585292344786, "grad_norm": 0.8382774591445923, "learning_rate": 0.00018961491340243843, "loss": 1.3562, "step": 2757 }, { "epoch": 0.44331926863572435, "grad_norm": 0.7766973376274109, "learning_rate": 0.00018960743028907827, "loss": 1.5513, "step": 2758 }, { "epoch": 0.4434800080369701, "grad_norm": 0.790673017501831, "learning_rate": 0.00018959994462842926, "loss": 1.4338, "step": 2759 }, { "epoch": 0.4436407474382158, "grad_norm": 0.861019492149353, "learning_rate": 0.00018959245642070414, "loss": 1.4096, "step": 2760 }, { "epoch": 0.4438014868394615, "grad_norm": 0.7573986053466797, "learning_rate": 0.0001895849656661158, "loss": 1.539, "step": 2761 }, { "epoch": 0.44396222624070725, "grad_norm": 0.813173770904541, "learning_rate": 0.00018957747236487718, "loss": 1.3943, "step": 2762 }, { "epoch": 0.44412296564195297, "grad_norm": 0.7574154138565063, "learning_rate": 0.00018956997651720133, "loss": 1.5293, "step": 2763 }, { "epoch": 0.4442837050431987, "grad_norm": 0.8298190236091614, "learning_rate": 0.0001895624781233013, "loss": 1.222, "step": 2764 }, { "epoch": 0.4444444444444444, "grad_norm": 0.7141277194023132, "learning_rate": 0.0001895549771833903, "loss": 1.3353, "step": 2765 }, { "epoch": 0.4446051838456902, "grad_norm": 0.8122501969337463, "learning_rate": 0.00018954747369768146, "loss": 1.5122, "step": 2766 }, { "epoch": 0.4447659232469359, "grad_norm": 0.7439101338386536, "learning_rate": 0.00018953996766638817, "loss": 1.4686, "step": 2767 }, { "epoch": 0.44492666264818165, "grad_norm": 0.850338876247406, "learning_rate": 0.00018953245908972374, "loss": 1.593, "step": 2768 }, { "epoch": 0.44508740204942737, "grad_norm": 0.7070902585983276, "learning_rate": 0.00018952494796790168, "loss": 1.4083, "step": 2769 }, { "epoch": 0.4452481414506731, "grad_norm": 0.8441824316978455, "learning_rate": 0.0001895174343011355, "loss": 1.6166, "step": 2770 }, { "epoch": 0.4454088808519188, "grad_norm": 0.7670905590057373, "learning_rate": 0.00018950991808963874, "loss": 1.4571, "step": 2771 }, { "epoch": 0.44556962025316454, "grad_norm": 0.734880268573761, "learning_rate": 0.00018950239933362512, "loss": 1.2486, "step": 2772 }, { "epoch": 0.44573035965441027, "grad_norm": 0.833345353603363, "learning_rate": 0.00018949487803330834, "loss": 1.2918, "step": 2773 }, { "epoch": 0.445891099055656, "grad_norm": 0.745611846446991, "learning_rate": 0.00018948735418890226, "loss": 1.3821, "step": 2774 }, { "epoch": 0.4460518384569018, "grad_norm": 0.8172146081924438, "learning_rate": 0.0001894798278006207, "loss": 1.5485, "step": 2775 }, { "epoch": 0.4462125778581475, "grad_norm": 0.8007433414459229, "learning_rate": 0.00018947229886867763, "loss": 1.4436, "step": 2776 }, { "epoch": 0.4463733172593932, "grad_norm": 0.751836359500885, "learning_rate": 0.00018946476739328713, "loss": 1.2771, "step": 2777 }, { "epoch": 0.44653405666063894, "grad_norm": 0.7507577538490295, "learning_rate": 0.00018945723337466323, "loss": 1.4479, "step": 2778 }, { "epoch": 0.44669479606188467, "grad_norm": 0.7452636361122131, "learning_rate": 0.00018944969681302016, "loss": 1.4128, "step": 2779 }, { "epoch": 0.4468555354631304, "grad_norm": 0.7933104038238525, "learning_rate": 0.0001894421577085721, "loss": 1.4579, "step": 2780 }, { "epoch": 0.4470162748643761, "grad_norm": 0.7679053544998169, "learning_rate": 0.0001894346160615334, "loss": 1.4659, "step": 2781 }, { "epoch": 0.44717701426562184, "grad_norm": 0.8884227275848389, "learning_rate": 0.0001894270718721185, "loss": 1.6599, "step": 2782 }, { "epoch": 0.44733775366686757, "grad_norm": 0.8673688173294067, "learning_rate": 0.00018941952514054175, "loss": 1.5065, "step": 2783 }, { "epoch": 0.44749849306811335, "grad_norm": 0.772279679775238, "learning_rate": 0.0001894119758670178, "loss": 1.3873, "step": 2784 }, { "epoch": 0.44765923246935907, "grad_norm": 0.7628627419471741, "learning_rate": 0.00018940442405176113, "loss": 1.261, "step": 2785 }, { "epoch": 0.4478199718706048, "grad_norm": 0.7988528609275818, "learning_rate": 0.00018939686969498648, "loss": 1.4847, "step": 2786 }, { "epoch": 0.4479807112718505, "grad_norm": 1.0682181119918823, "learning_rate": 0.00018938931279690864, "loss": 1.4773, "step": 2787 }, { "epoch": 0.44814145067309624, "grad_norm": 0.8976332545280457, "learning_rate": 0.00018938175335774237, "loss": 1.6789, "step": 2788 }, { "epoch": 0.44830219007434197, "grad_norm": 0.7305929660797119, "learning_rate": 0.0001893741913777026, "loss": 1.4455, "step": 2789 }, { "epoch": 0.4484629294755877, "grad_norm": 0.7219006419181824, "learning_rate": 0.00018936662685700427, "loss": 1.3469, "step": 2790 }, { "epoch": 0.4486236688768334, "grad_norm": 0.755734384059906, "learning_rate": 0.00018935905979586246, "loss": 1.5542, "step": 2791 }, { "epoch": 0.44878440827807914, "grad_norm": 0.8060997128486633, "learning_rate": 0.0001893514901944922, "loss": 1.5448, "step": 2792 }, { "epoch": 0.4489451476793249, "grad_norm": 0.7600473165512085, "learning_rate": 0.00018934391805310876, "loss": 1.6636, "step": 2793 }, { "epoch": 0.44910588708057064, "grad_norm": 0.8374873399734497, "learning_rate": 0.00018933634337192735, "loss": 1.6601, "step": 2794 }, { "epoch": 0.44926662648181637, "grad_norm": 0.8258968591690063, "learning_rate": 0.00018932876615116332, "loss": 1.4604, "step": 2795 }, { "epoch": 0.4494273658830621, "grad_norm": 0.7406456470489502, "learning_rate": 0.00018932118639103203, "loss": 1.5654, "step": 2796 }, { "epoch": 0.4495881052843078, "grad_norm": 0.7643452882766724, "learning_rate": 0.00018931360409174896, "loss": 1.495, "step": 2797 }, { "epoch": 0.44974884468555354, "grad_norm": 0.7696071267127991, "learning_rate": 0.0001893060192535297, "loss": 1.5947, "step": 2798 }, { "epoch": 0.44990958408679926, "grad_norm": 0.8160179853439331, "learning_rate": 0.00018929843187658985, "loss": 1.4066, "step": 2799 }, { "epoch": 0.450070323488045, "grad_norm": 0.6990664601325989, "learning_rate": 0.00018929084196114504, "loss": 1.4062, "step": 2800 }, { "epoch": 0.450070323488045, "eval_loss": 1.5043162107467651, "eval_runtime": 46.2469, "eval_samples_per_second": 5.427, "eval_steps_per_second": 2.725, "step": 2800 }, { "epoch": 0.4502310628892907, "grad_norm": 0.766503095626831, "learning_rate": 0.0001892832495074111, "loss": 1.3244, "step": 2801 }, { "epoch": 0.4503918022905365, "grad_norm": 0.653872013092041, "learning_rate": 0.00018927565451560383, "loss": 1.2134, "step": 2802 }, { "epoch": 0.4505525416917822, "grad_norm": 0.8987058401107788, "learning_rate": 0.0001892680569859391, "loss": 1.5096, "step": 2803 }, { "epoch": 0.45071328109302794, "grad_norm": 0.8892366290092468, "learning_rate": 0.00018926045691863302, "loss": 1.6922, "step": 2804 }, { "epoch": 0.45087402049427366, "grad_norm": 0.8682940006256104, "learning_rate": 0.00018925285431390147, "loss": 1.5852, "step": 2805 }, { "epoch": 0.4510347598955194, "grad_norm": 0.871757447719574, "learning_rate": 0.00018924524917196067, "loss": 1.7085, "step": 2806 }, { "epoch": 0.4511954992967651, "grad_norm": 0.7730371356010437, "learning_rate": 0.00018923764149302675, "loss": 1.5047, "step": 2807 }, { "epoch": 0.45135623869801084, "grad_norm": 0.7852548360824585, "learning_rate": 0.00018923003127731608, "loss": 1.2421, "step": 2808 }, { "epoch": 0.45151697809925656, "grad_norm": 0.9632487893104553, "learning_rate": 0.0001892224185250449, "loss": 1.6071, "step": 2809 }, { "epoch": 0.4516777175005023, "grad_norm": 0.735626757144928, "learning_rate": 0.00018921480323642965, "loss": 1.4427, "step": 2810 }, { "epoch": 0.45183845690174806, "grad_norm": 0.7501117587089539, "learning_rate": 0.0001892071854116868, "loss": 1.6787, "step": 2811 }, { "epoch": 0.4519991963029938, "grad_norm": 0.7624385356903076, "learning_rate": 0.00018919956505103293, "loss": 1.6305, "step": 2812 }, { "epoch": 0.4521599357042395, "grad_norm": 0.7763976454734802, "learning_rate": 0.00018919194215468466, "loss": 1.4245, "step": 2813 }, { "epoch": 0.45232067510548524, "grad_norm": 0.795971691608429, "learning_rate": 0.00018918431672285864, "loss": 1.6754, "step": 2814 }, { "epoch": 0.45248141450673096, "grad_norm": 0.8608537316322327, "learning_rate": 0.00018917668875577173, "loss": 1.4521, "step": 2815 }, { "epoch": 0.4526421539079767, "grad_norm": 0.7681630253791809, "learning_rate": 0.0001891690582536407, "loss": 1.5485, "step": 2816 }, { "epoch": 0.4528028933092224, "grad_norm": 0.7775387763977051, "learning_rate": 0.00018916142521668245, "loss": 1.4916, "step": 2817 }, { "epoch": 0.45296363271046813, "grad_norm": 0.7680997848510742, "learning_rate": 0.000189153789645114, "loss": 1.4986, "step": 2818 }, { "epoch": 0.45312437211171386, "grad_norm": 0.7971798181533813, "learning_rate": 0.00018914615153915244, "loss": 1.6185, "step": 2819 }, { "epoch": 0.45328511151295964, "grad_norm": 0.7503923177719116, "learning_rate": 0.00018913851089901487, "loss": 1.3574, "step": 2820 }, { "epoch": 0.45344585091420536, "grad_norm": 0.898608922958374, "learning_rate": 0.00018913086772491848, "loss": 1.8302, "step": 2821 }, { "epoch": 0.4536065903154511, "grad_norm": 0.7606514096260071, "learning_rate": 0.00018912322201708058, "loss": 1.4879, "step": 2822 }, { "epoch": 0.4537673297166968, "grad_norm": 0.8142838478088379, "learning_rate": 0.00018911557377571843, "loss": 1.3024, "step": 2823 }, { "epoch": 0.45392806911794253, "grad_norm": 0.8586592674255371, "learning_rate": 0.00018910792300104955, "loss": 1.4938, "step": 2824 }, { "epoch": 0.45408880851918826, "grad_norm": 0.9128136038780212, "learning_rate": 0.00018910026969329137, "loss": 1.5519, "step": 2825 }, { "epoch": 0.454249547920434, "grad_norm": 0.8256163001060486, "learning_rate": 0.0001890926138526615, "loss": 1.5605, "step": 2826 }, { "epoch": 0.4544102873216797, "grad_norm": 0.681378185749054, "learning_rate": 0.0001890849554793775, "loss": 1.2928, "step": 2827 }, { "epoch": 0.45457102672292543, "grad_norm": 0.7240657210350037, "learning_rate": 0.00018907729457365714, "loss": 1.3377, "step": 2828 }, { "epoch": 0.4547317661241712, "grad_norm": 0.7723994255065918, "learning_rate": 0.0001890696311357182, "loss": 1.4586, "step": 2829 }, { "epoch": 0.45489250552541693, "grad_norm": 0.7634881734848022, "learning_rate": 0.0001890619651657785, "loss": 1.4617, "step": 2830 }, { "epoch": 0.45505324492666266, "grad_norm": 0.8226473331451416, "learning_rate": 0.00018905429666405595, "loss": 1.5069, "step": 2831 }, { "epoch": 0.4552139843279084, "grad_norm": 0.7757888436317444, "learning_rate": 0.00018904662563076858, "loss": 1.3604, "step": 2832 }, { "epoch": 0.4553747237291541, "grad_norm": 0.8132414817810059, "learning_rate": 0.00018903895206613441, "loss": 1.5056, "step": 2833 }, { "epoch": 0.45553546313039983, "grad_norm": 0.8456790447235107, "learning_rate": 0.00018903127597037168, "loss": 1.542, "step": 2834 }, { "epoch": 0.45569620253164556, "grad_norm": 0.7444630265235901, "learning_rate": 0.00018902359734369845, "loss": 1.4416, "step": 2835 }, { "epoch": 0.4558569419328913, "grad_norm": 0.7790692448616028, "learning_rate": 0.00018901591618633313, "loss": 1.3671, "step": 2836 }, { "epoch": 0.456017681334137, "grad_norm": 0.7049763798713684, "learning_rate": 0.00018900823249849399, "loss": 1.2203, "step": 2837 }, { "epoch": 0.4561784207353828, "grad_norm": 0.9115387797355652, "learning_rate": 0.0001890005462803995, "loss": 1.7875, "step": 2838 }, { "epoch": 0.4563391601366285, "grad_norm": 0.8126285672187805, "learning_rate": 0.0001889928575322682, "loss": 1.5069, "step": 2839 }, { "epoch": 0.45649989953787423, "grad_norm": 0.8538519740104675, "learning_rate": 0.00018898516625431855, "loss": 1.4433, "step": 2840 }, { "epoch": 0.45666063893911996, "grad_norm": 0.9076743125915527, "learning_rate": 0.00018897747244676926, "loss": 1.4025, "step": 2841 }, { "epoch": 0.4568213783403657, "grad_norm": 0.9765421748161316, "learning_rate": 0.00018896977610983906, "loss": 1.5819, "step": 2842 }, { "epoch": 0.4569821177416114, "grad_norm": 0.8433426022529602, "learning_rate": 0.00018896207724374667, "loss": 1.5614, "step": 2843 }, { "epoch": 0.45714285714285713, "grad_norm": 0.7224279046058655, "learning_rate": 0.000188954375848711, "loss": 1.3448, "step": 2844 }, { "epoch": 0.45730359654410285, "grad_norm": 0.7107641696929932, "learning_rate": 0.00018894667192495099, "loss": 1.2435, "step": 2845 }, { "epoch": 0.4574643359453486, "grad_norm": 0.8799521327018738, "learning_rate": 0.0001889389654726856, "loss": 1.5695, "step": 2846 }, { "epoch": 0.45762507534659436, "grad_norm": 0.8327635526657104, "learning_rate": 0.0001889312564921339, "loss": 1.5416, "step": 2847 }, { "epoch": 0.4577858147478401, "grad_norm": 0.865990161895752, "learning_rate": 0.00018892354498351504, "loss": 1.5553, "step": 2848 }, { "epoch": 0.4579465541490858, "grad_norm": 0.8708232641220093, "learning_rate": 0.0001889158309470483, "loss": 1.7047, "step": 2849 }, { "epoch": 0.45810729355033153, "grad_norm": 0.8408650159835815, "learning_rate": 0.00018890811438295287, "loss": 1.4423, "step": 2850 }, { "epoch": 0.45826803295157725, "grad_norm": 0.6997664570808411, "learning_rate": 0.00018890039529144819, "loss": 1.197, "step": 2851 }, { "epoch": 0.458428772352823, "grad_norm": 0.77046799659729, "learning_rate": 0.00018889267367275364, "loss": 1.4817, "step": 2852 }, { "epoch": 0.4585895117540687, "grad_norm": 0.7814663648605347, "learning_rate": 0.00018888494952708877, "loss": 1.3271, "step": 2853 }, { "epoch": 0.4587502511553144, "grad_norm": 0.7686842679977417, "learning_rate": 0.00018887722285467314, "loss": 1.5037, "step": 2854 }, { "epoch": 0.45891099055656015, "grad_norm": 0.8226576447486877, "learning_rate": 0.00018886949365572636, "loss": 1.4494, "step": 2855 }, { "epoch": 0.45907172995780593, "grad_norm": 0.8393173813819885, "learning_rate": 0.00018886176193046817, "loss": 1.5505, "step": 2856 }, { "epoch": 0.45923246935905165, "grad_norm": 0.7715250253677368, "learning_rate": 0.00018885402767911837, "loss": 1.297, "step": 2857 }, { "epoch": 0.4593932087602974, "grad_norm": 0.8182443380355835, "learning_rate": 0.00018884629090189684, "loss": 1.4562, "step": 2858 }, { "epoch": 0.4595539481615431, "grad_norm": 0.7686094641685486, "learning_rate": 0.0001888385515990235, "loss": 1.4769, "step": 2859 }, { "epoch": 0.4597146875627888, "grad_norm": 0.775140106678009, "learning_rate": 0.00018883080977071835, "loss": 1.5826, "step": 2860 }, { "epoch": 0.45987542696403455, "grad_norm": 0.6676628589630127, "learning_rate": 0.00018882306541720146, "loss": 1.1304, "step": 2861 }, { "epoch": 0.4600361663652803, "grad_norm": 0.8045805096626282, "learning_rate": 0.00018881531853869304, "loss": 1.5293, "step": 2862 }, { "epoch": 0.460196905766526, "grad_norm": 0.8089798092842102, "learning_rate": 0.0001888075691354132, "loss": 1.598, "step": 2863 }, { "epoch": 0.4603576451677717, "grad_norm": 0.8937901854515076, "learning_rate": 0.00018879981720758236, "loss": 1.5993, "step": 2864 }, { "epoch": 0.4605183845690175, "grad_norm": 0.8392865061759949, "learning_rate": 0.00018879206275542075, "loss": 1.4054, "step": 2865 }, { "epoch": 0.4606791239702632, "grad_norm": 0.7857552766799927, "learning_rate": 0.00018878430577914896, "loss": 1.5138, "step": 2866 }, { "epoch": 0.46083986337150895, "grad_norm": 0.7558716535568237, "learning_rate": 0.0001887765462789874, "loss": 1.3792, "step": 2867 }, { "epoch": 0.4610006027727547, "grad_norm": 0.9357433319091797, "learning_rate": 0.00018876878425515666, "loss": 1.4289, "step": 2868 }, { "epoch": 0.4611613421740004, "grad_norm": 0.7710779309272766, "learning_rate": 0.00018876101970787738, "loss": 1.4113, "step": 2869 }, { "epoch": 0.4613220815752461, "grad_norm": 0.791892409324646, "learning_rate": 0.00018875325263737033, "loss": 1.4749, "step": 2870 }, { "epoch": 0.46148282097649185, "grad_norm": 0.8201519250869751, "learning_rate": 0.00018874548304385628, "loss": 1.3188, "step": 2871 }, { "epoch": 0.46164356037773757, "grad_norm": 0.7344081401824951, "learning_rate": 0.00018873771092755616, "loss": 1.3891, "step": 2872 }, { "epoch": 0.4618042997789833, "grad_norm": 0.7867786288261414, "learning_rate": 0.00018872993628869079, "loss": 1.2049, "step": 2873 }, { "epoch": 0.4619650391802291, "grad_norm": 0.7076240181922913, "learning_rate": 0.00018872215912748127, "loss": 1.3603, "step": 2874 }, { "epoch": 0.4621257785814748, "grad_norm": 0.7652766108512878, "learning_rate": 0.00018871437944414864, "loss": 1.5932, "step": 2875 }, { "epoch": 0.4622865179827205, "grad_norm": 0.7662140727043152, "learning_rate": 0.00018870659723891405, "loss": 1.3481, "step": 2876 }, { "epoch": 0.46244725738396625, "grad_norm": 0.8903924226760864, "learning_rate": 0.0001886988125119988, "loss": 1.2641, "step": 2877 }, { "epoch": 0.462607996785212, "grad_norm": 0.8183878064155579, "learning_rate": 0.00018869102526362412, "loss": 1.5928, "step": 2878 }, { "epoch": 0.4627687361864577, "grad_norm": 0.7489455938339233, "learning_rate": 0.0001886832354940114, "loss": 1.3948, "step": 2879 }, { "epoch": 0.4629294755877034, "grad_norm": 0.8464653491973877, "learning_rate": 0.00018867544320338205, "loss": 1.5328, "step": 2880 }, { "epoch": 0.46309021498894914, "grad_norm": 0.726719081401825, "learning_rate": 0.0001886676483919576, "loss": 1.3189, "step": 2881 }, { "epoch": 0.46325095439019487, "grad_norm": 0.8854729533195496, "learning_rate": 0.00018865985105995969, "loss": 1.514, "step": 2882 }, { "epoch": 0.46341169379144065, "grad_norm": 0.941740095615387, "learning_rate": 0.0001886520512076099, "loss": 1.587, "step": 2883 }, { "epoch": 0.4635724331926864, "grad_norm": 0.7029240727424622, "learning_rate": 0.00018864424883513, "loss": 1.413, "step": 2884 }, { "epoch": 0.4637331725939321, "grad_norm": 0.8306525349617004, "learning_rate": 0.0001886364439427418, "loss": 1.561, "step": 2885 }, { "epoch": 0.4638939119951778, "grad_norm": 0.8826491236686707, "learning_rate": 0.00018862863653066713, "loss": 1.7483, "step": 2886 }, { "epoch": 0.46405465139642355, "grad_norm": 0.8835345506668091, "learning_rate": 0.00018862082659912795, "loss": 1.5466, "step": 2887 }, { "epoch": 0.46421539079766927, "grad_norm": 0.8100755214691162, "learning_rate": 0.00018861301414834628, "loss": 1.5449, "step": 2888 }, { "epoch": 0.464376130198915, "grad_norm": 0.6933585405349731, "learning_rate": 0.0001886051991785442, "loss": 1.5049, "step": 2889 }, { "epoch": 0.4645368696001607, "grad_norm": 0.7443497180938721, "learning_rate": 0.0001885973816899439, "loss": 1.371, "step": 2890 }, { "epoch": 0.4646976090014065, "grad_norm": 0.8682059049606323, "learning_rate": 0.00018858956168276756, "loss": 1.7767, "step": 2891 }, { "epoch": 0.4648583484026522, "grad_norm": 0.8782916069030762, "learning_rate": 0.00018858173915723748, "loss": 1.5957, "step": 2892 }, { "epoch": 0.46501908780389795, "grad_norm": 0.8463097810745239, "learning_rate": 0.0001885739141135761, "loss": 1.5336, "step": 2893 }, { "epoch": 0.46517982720514367, "grad_norm": 0.8426713347434998, "learning_rate": 0.00018856608655200578, "loss": 1.4453, "step": 2894 }, { "epoch": 0.4653405666063894, "grad_norm": 0.7894969582557678, "learning_rate": 0.0001885582564727491, "loss": 1.3544, "step": 2895 }, { "epoch": 0.4655013060076351, "grad_norm": 0.7904619574546814, "learning_rate": 0.0001885504238760286, "loss": 1.3613, "step": 2896 }, { "epoch": 0.46566204540888084, "grad_norm": 0.860861599445343, "learning_rate": 0.00018854258876206697, "loss": 1.4363, "step": 2897 }, { "epoch": 0.46582278481012657, "grad_norm": 0.7854633331298828, "learning_rate": 0.00018853475113108693, "loss": 1.4077, "step": 2898 }, { "epoch": 0.4659835242113723, "grad_norm": 0.8162265419960022, "learning_rate": 0.0001885269109833113, "loss": 1.7133, "step": 2899 }, { "epoch": 0.46614426361261807, "grad_norm": 0.8009337186813354, "learning_rate": 0.0001885190683189629, "loss": 1.4696, "step": 2900 }, { "epoch": 0.4663050030138638, "grad_norm": 0.7052198648452759, "learning_rate": 0.0001885112231382647, "loss": 1.3227, "step": 2901 }, { "epoch": 0.4664657424151095, "grad_norm": 0.8282212018966675, "learning_rate": 0.0001885033754414398, "loss": 1.6169, "step": 2902 }, { "epoch": 0.46662648181635524, "grad_norm": 0.7678226828575134, "learning_rate": 0.0001884955252287112, "loss": 1.6589, "step": 2903 }, { "epoch": 0.46678722121760097, "grad_norm": 0.8356521725654602, "learning_rate": 0.00018848767250030201, "loss": 1.6842, "step": 2904 }, { "epoch": 0.4669479606188467, "grad_norm": 0.780298113822937, "learning_rate": 0.00018847981725643556, "loss": 1.5379, "step": 2905 }, { "epoch": 0.4671087000200924, "grad_norm": 0.6779277324676514, "learning_rate": 0.00018847195949733516, "loss": 1.1579, "step": 2906 }, { "epoch": 0.46726943942133814, "grad_norm": 0.6840459704399109, "learning_rate": 0.0001884640992232241, "loss": 1.2707, "step": 2907 }, { "epoch": 0.46743017882258386, "grad_norm": 0.8536396026611328, "learning_rate": 0.0001884562364343259, "loss": 1.4673, "step": 2908 }, { "epoch": 0.46759091822382964, "grad_norm": 0.7350811958312988, "learning_rate": 0.000188448371130864, "loss": 1.3919, "step": 2909 }, { "epoch": 0.46775165762507537, "grad_norm": 0.792595386505127, "learning_rate": 0.00018844050331306208, "loss": 1.4267, "step": 2910 }, { "epoch": 0.4679123970263211, "grad_norm": 0.7827614545822144, "learning_rate": 0.00018843263298114372, "loss": 1.5669, "step": 2911 }, { "epoch": 0.4680731364275668, "grad_norm": 0.7562287449836731, "learning_rate": 0.0001884247601353327, "loss": 1.2974, "step": 2912 }, { "epoch": 0.46823387582881254, "grad_norm": 0.7217016816139221, "learning_rate": 0.00018841688477585282, "loss": 1.4445, "step": 2913 }, { "epoch": 0.46839461523005826, "grad_norm": 0.8294912576675415, "learning_rate": 0.00018840900690292794, "loss": 1.5441, "step": 2914 }, { "epoch": 0.468555354631304, "grad_norm": 0.7550749182701111, "learning_rate": 0.000188401126516782, "loss": 1.5066, "step": 2915 }, { "epoch": 0.4687160940325497, "grad_norm": 0.9464187622070312, "learning_rate": 0.00018839324361763903, "loss": 1.6476, "step": 2916 }, { "epoch": 0.46887683343379544, "grad_norm": 0.7907178401947021, "learning_rate": 0.00018838535820572315, "loss": 1.4932, "step": 2917 }, { "epoch": 0.4690375728350412, "grad_norm": 0.906710684299469, "learning_rate": 0.00018837747028125845, "loss": 1.4877, "step": 2918 }, { "epoch": 0.46919831223628694, "grad_norm": 0.7501565217971802, "learning_rate": 0.00018836957984446918, "loss": 1.4505, "step": 2919 }, { "epoch": 0.46935905163753266, "grad_norm": 0.8057777881622314, "learning_rate": 0.0001883616868955797, "loss": 1.3445, "step": 2920 }, { "epoch": 0.4695197910387784, "grad_norm": 0.8211376667022705, "learning_rate": 0.00018835379143481432, "loss": 1.5289, "step": 2921 }, { "epoch": 0.4696805304400241, "grad_norm": 0.7627571821212769, "learning_rate": 0.00018834589346239752, "loss": 1.5016, "step": 2922 }, { "epoch": 0.46984126984126984, "grad_norm": 0.7351404428482056, "learning_rate": 0.0001883379929785538, "loss": 1.3641, "step": 2923 }, { "epoch": 0.47000200924251556, "grad_norm": 0.7296054363250732, "learning_rate": 0.0001883300899835078, "loss": 1.3511, "step": 2924 }, { "epoch": 0.4701627486437613, "grad_norm": 0.773335874080658, "learning_rate": 0.00018832218447748411, "loss": 1.2931, "step": 2925 }, { "epoch": 0.470323488045007, "grad_norm": 0.756523072719574, "learning_rate": 0.00018831427646070746, "loss": 1.4037, "step": 2926 }, { "epoch": 0.4704842274462528, "grad_norm": 0.8867207765579224, "learning_rate": 0.00018830636593340273, "loss": 1.3932, "step": 2927 }, { "epoch": 0.4706449668474985, "grad_norm": 0.8005340099334717, "learning_rate": 0.0001882984528957947, "loss": 1.328, "step": 2928 }, { "epoch": 0.47080570624874424, "grad_norm": 0.7142349481582642, "learning_rate": 0.0001882905373481084, "loss": 1.3023, "step": 2929 }, { "epoch": 0.47096644564998996, "grad_norm": 0.7634740471839905, "learning_rate": 0.00018828261929056876, "loss": 1.6438, "step": 2930 }, { "epoch": 0.4711271850512357, "grad_norm": 0.8208639025688171, "learning_rate": 0.000188274698723401, "loss": 1.319, "step": 2931 }, { "epoch": 0.4712879244524814, "grad_norm": 0.8191777467727661, "learning_rate": 0.0001882667756468301, "loss": 1.3715, "step": 2932 }, { "epoch": 0.47144866385372713, "grad_norm": 0.8014425039291382, "learning_rate": 0.00018825885006108143, "loss": 1.5895, "step": 2933 }, { "epoch": 0.47160940325497286, "grad_norm": 0.7797102928161621, "learning_rate": 0.00018825092196638024, "loss": 1.4793, "step": 2934 }, { "epoch": 0.4717701426562186, "grad_norm": 0.7197870016098022, "learning_rate": 0.00018824299136295194, "loss": 1.4257, "step": 2935 }, { "epoch": 0.47193088205746436, "grad_norm": 0.8597614169120789, "learning_rate": 0.00018823505825102196, "loss": 1.7274, "step": 2936 }, { "epoch": 0.4720916214587101, "grad_norm": 0.8797577023506165, "learning_rate": 0.00018822712263081575, "loss": 1.6053, "step": 2937 }, { "epoch": 0.4722523608599558, "grad_norm": 0.6997210383415222, "learning_rate": 0.00018821918450255898, "loss": 1.3125, "step": 2938 }, { "epoch": 0.47241310026120154, "grad_norm": 0.8568103909492493, "learning_rate": 0.00018821124386647728, "loss": 1.6066, "step": 2939 }, { "epoch": 0.47257383966244726, "grad_norm": 0.8416555523872375, "learning_rate": 0.00018820330072279637, "loss": 1.3781, "step": 2940 }, { "epoch": 0.472734579063693, "grad_norm": 0.8763321042060852, "learning_rate": 0.00018819535507174208, "loss": 1.4897, "step": 2941 }, { "epoch": 0.4728953184649387, "grad_norm": 0.7751544117927551, "learning_rate": 0.00018818740691354025, "loss": 1.4716, "step": 2942 }, { "epoch": 0.47305605786618443, "grad_norm": 0.8499418497085571, "learning_rate": 0.00018817945624841683, "loss": 1.5519, "step": 2943 }, { "epoch": 0.47321679726743016, "grad_norm": 0.7760657668113708, "learning_rate": 0.00018817150307659785, "loss": 1.3402, "step": 2944 }, { "epoch": 0.47337753666867594, "grad_norm": 0.7016104459762573, "learning_rate": 0.00018816354739830941, "loss": 1.2206, "step": 2945 }, { "epoch": 0.47353827606992166, "grad_norm": 0.8473197221755981, "learning_rate": 0.00018815558921377764, "loss": 1.7626, "step": 2946 }, { "epoch": 0.4736990154711674, "grad_norm": 0.8197193741798401, "learning_rate": 0.00018814762852322877, "loss": 1.5701, "step": 2947 }, { "epoch": 0.4738597548724131, "grad_norm": 0.9320449233055115, "learning_rate": 0.0001881396653268891, "loss": 1.1815, "step": 2948 }, { "epoch": 0.47402049427365883, "grad_norm": 0.8535776138305664, "learning_rate": 0.00018813169962498503, "loss": 1.7312, "step": 2949 }, { "epoch": 0.47418123367490456, "grad_norm": 0.730904757976532, "learning_rate": 0.00018812373141774294, "loss": 1.3569, "step": 2950 }, { "epoch": 0.4743419730761503, "grad_norm": 0.6728198528289795, "learning_rate": 0.00018811576070538942, "loss": 1.2376, "step": 2951 }, { "epoch": 0.474502712477396, "grad_norm": 0.8863263130187988, "learning_rate": 0.00018810778748815104, "loss": 1.8738, "step": 2952 }, { "epoch": 0.47466345187864173, "grad_norm": 0.8072320222854614, "learning_rate": 0.00018809981176625437, "loss": 1.4669, "step": 2953 }, { "epoch": 0.4748241912798875, "grad_norm": 0.7074745297431946, "learning_rate": 0.00018809183353992625, "loss": 1.3454, "step": 2954 }, { "epoch": 0.47498493068113323, "grad_norm": 0.809503972530365, "learning_rate": 0.0001880838528093934, "loss": 1.5608, "step": 2955 }, { "epoch": 0.47514567008237896, "grad_norm": 0.751179575920105, "learning_rate": 0.00018807586957488278, "loss": 1.4927, "step": 2956 }, { "epoch": 0.4753064094836247, "grad_norm": 0.8200694918632507, "learning_rate": 0.00018806788383662123, "loss": 1.3131, "step": 2957 }, { "epoch": 0.4754671488848704, "grad_norm": 0.7984315156936646, "learning_rate": 0.00018805989559483584, "loss": 1.5114, "step": 2958 }, { "epoch": 0.47562788828611613, "grad_norm": 0.966468095779419, "learning_rate": 0.00018805190484975362, "loss": 1.7043, "step": 2959 }, { "epoch": 0.47578862768736185, "grad_norm": 0.7723186612129211, "learning_rate": 0.00018804391160160178, "loss": 1.4991, "step": 2960 }, { "epoch": 0.4759493670886076, "grad_norm": 0.7930955290794373, "learning_rate": 0.00018803591585060754, "loss": 1.277, "step": 2961 }, { "epoch": 0.4761101064898533, "grad_norm": 0.7610566020011902, "learning_rate": 0.00018802791759699815, "loss": 1.5443, "step": 2962 }, { "epoch": 0.4762708458910991, "grad_norm": 0.8335841298103333, "learning_rate": 0.00018801991684100103, "loss": 1.4721, "step": 2963 }, { "epoch": 0.4764315852923448, "grad_norm": 0.897123396396637, "learning_rate": 0.00018801191358284363, "loss": 1.6827, "step": 2964 }, { "epoch": 0.47659232469359053, "grad_norm": 0.7690529823303223, "learning_rate": 0.0001880039078227534, "loss": 1.5123, "step": 2965 }, { "epoch": 0.47675306409483625, "grad_norm": 0.76187664270401, "learning_rate": 0.00018799589956095797, "loss": 1.2684, "step": 2966 }, { "epoch": 0.476913803496082, "grad_norm": 0.8220367431640625, "learning_rate": 0.00018798788879768497, "loss": 1.4949, "step": 2967 }, { "epoch": 0.4770745428973277, "grad_norm": 0.7867986559867859, "learning_rate": 0.00018797987553316215, "loss": 1.501, "step": 2968 }, { "epoch": 0.4772352822985734, "grad_norm": 0.7467446327209473, "learning_rate": 0.00018797185976761726, "loss": 1.5682, "step": 2969 }, { "epoch": 0.47739602169981915, "grad_norm": 0.7024979591369629, "learning_rate": 0.00018796384150127822, "loss": 1.4734, "step": 2970 }, { "epoch": 0.4775567611010649, "grad_norm": 0.8486789464950562, "learning_rate": 0.00018795582073437292, "loss": 1.5092, "step": 2971 }, { "epoch": 0.47771750050231065, "grad_norm": 0.7336635589599609, "learning_rate": 0.00018794779746712938, "loss": 1.2366, "step": 2972 }, { "epoch": 0.4778782399035564, "grad_norm": 0.8405641913414001, "learning_rate": 0.0001879397716997757, "loss": 1.5354, "step": 2973 }, { "epoch": 0.4780389793048021, "grad_norm": 0.7261552810668945, "learning_rate": 0.00018793174343254002, "loss": 1.3405, "step": 2974 }, { "epoch": 0.4781997187060478, "grad_norm": 0.8380393385887146, "learning_rate": 0.00018792371266565053, "loss": 1.439, "step": 2975 }, { "epoch": 0.47836045810729355, "grad_norm": 0.7824360132217407, "learning_rate": 0.00018791567939933557, "loss": 1.5729, "step": 2976 }, { "epoch": 0.4785211975085393, "grad_norm": 0.850030779838562, "learning_rate": 0.0001879076436338235, "loss": 1.4931, "step": 2977 }, { "epoch": 0.478681936909785, "grad_norm": 0.8455753922462463, "learning_rate": 0.0001878996053693427, "loss": 1.6824, "step": 2978 }, { "epoch": 0.4788426763110307, "grad_norm": 0.8237616419792175, "learning_rate": 0.0001878915646061217, "loss": 1.332, "step": 2979 }, { "epoch": 0.47900341571227645, "grad_norm": 0.8146539330482483, "learning_rate": 0.00018788352134438914, "loss": 1.7403, "step": 2980 }, { "epoch": 0.4791641551135222, "grad_norm": 0.7863816618919373, "learning_rate": 0.00018787547558437359, "loss": 1.5767, "step": 2981 }, { "epoch": 0.47932489451476795, "grad_norm": 0.7275273203849792, "learning_rate": 0.0001878674273263038, "loss": 1.3439, "step": 2982 }, { "epoch": 0.4794856339160137, "grad_norm": 0.7563725709915161, "learning_rate": 0.00018785937657040855, "loss": 1.4664, "step": 2983 }, { "epoch": 0.4796463733172594, "grad_norm": 0.8111528754234314, "learning_rate": 0.0001878513233169167, "loss": 1.5855, "step": 2984 }, { "epoch": 0.4798071127185051, "grad_norm": 0.8729495406150818, "learning_rate": 0.0001878432675660572, "loss": 1.8342, "step": 2985 }, { "epoch": 0.47996785211975085, "grad_norm": 0.8619919419288635, "learning_rate": 0.00018783520931805905, "loss": 1.6167, "step": 2986 }, { "epoch": 0.4801285915209966, "grad_norm": 0.8355671167373657, "learning_rate": 0.00018782714857315126, "loss": 1.5532, "step": 2987 }, { "epoch": 0.4802893309222423, "grad_norm": 0.7451460361480713, "learning_rate": 0.00018781908533156306, "loss": 1.4682, "step": 2988 }, { "epoch": 0.480450070323488, "grad_norm": 0.755795955657959, "learning_rate": 0.00018781101959352366, "loss": 1.655, "step": 2989 }, { "epoch": 0.4806108097247338, "grad_norm": 0.7799263000488281, "learning_rate": 0.00018780295135926227, "loss": 1.4959, "step": 2990 }, { "epoch": 0.4807715491259795, "grad_norm": 0.812443196773529, "learning_rate": 0.00018779488062900833, "loss": 1.3486, "step": 2991 }, { "epoch": 0.48093228852722525, "grad_norm": 0.7659725546836853, "learning_rate": 0.0001877868074029912, "loss": 1.5219, "step": 2992 }, { "epoch": 0.481093027928471, "grad_norm": 0.7665232419967651, "learning_rate": 0.00018777873168144041, "loss": 1.4652, "step": 2993 }, { "epoch": 0.4812537673297167, "grad_norm": 0.7937904596328735, "learning_rate": 0.00018777065346458558, "loss": 1.6073, "step": 2994 }, { "epoch": 0.4814145067309624, "grad_norm": 0.8096777200698853, "learning_rate": 0.00018776257275265627, "loss": 1.2869, "step": 2995 }, { "epoch": 0.48157524613220815, "grad_norm": 0.8350524306297302, "learning_rate": 0.00018775448954588223, "loss": 1.5491, "step": 2996 }, { "epoch": 0.48173598553345387, "grad_norm": 0.7538176774978638, "learning_rate": 0.00018774640384449328, "loss": 1.3758, "step": 2997 }, { "epoch": 0.4818967249346996, "grad_norm": 0.6863182783126831, "learning_rate": 0.00018773831564871917, "loss": 1.3655, "step": 2998 }, { "epoch": 0.4820574643359454, "grad_norm": 0.7904782295227051, "learning_rate": 0.00018773022495878992, "loss": 1.385, "step": 2999 }, { "epoch": 0.4822182037371911, "grad_norm": 0.7956998944282532, "learning_rate": 0.0001877221317749355, "loss": 1.5178, "step": 3000 }, { "epoch": 0.4822182037371911, "eval_loss": 1.4961515665054321, "eval_runtime": 46.2249, "eval_samples_per_second": 5.43, "eval_steps_per_second": 2.726, "step": 3000 }, { "epoch": 0.4823789431384368, "grad_norm": 0.7863808870315552, "learning_rate": 0.00018771403609738597, "loss": 1.6412, "step": 3001 }, { "epoch": 0.48253968253968255, "grad_norm": 0.8360217809677124, "learning_rate": 0.00018770593792637145, "loss": 1.5148, "step": 3002 }, { "epoch": 0.48270042194092827, "grad_norm": 0.7766129970550537, "learning_rate": 0.00018769783726212217, "loss": 1.5242, "step": 3003 }, { "epoch": 0.482861161342174, "grad_norm": 0.6268093585968018, "learning_rate": 0.00018768973410486842, "loss": 1.3398, "step": 3004 }, { "epoch": 0.4830219007434197, "grad_norm": 0.7765529751777649, "learning_rate": 0.00018768162845484053, "loss": 1.5621, "step": 3005 }, { "epoch": 0.48318264014466544, "grad_norm": 0.6491487622261047, "learning_rate": 0.00018767352031226893, "loss": 1.1643, "step": 3006 }, { "epoch": 0.48334337954591117, "grad_norm": 0.782910168170929, "learning_rate": 0.00018766540967738408, "loss": 1.6186, "step": 3007 }, { "epoch": 0.48350411894715695, "grad_norm": 0.7737633585929871, "learning_rate": 0.0001876572965504166, "loss": 1.4544, "step": 3008 }, { "epoch": 0.48366485834840267, "grad_norm": 0.7950130701065063, "learning_rate": 0.00018764918093159712, "loss": 1.6371, "step": 3009 }, { "epoch": 0.4838255977496484, "grad_norm": 0.7656119465827942, "learning_rate": 0.00018764106282115626, "loss": 1.4536, "step": 3010 }, { "epoch": 0.4839863371508941, "grad_norm": 0.8267123103141785, "learning_rate": 0.0001876329422193249, "loss": 1.5876, "step": 3011 }, { "epoch": 0.48414707655213984, "grad_norm": 0.7911815047264099, "learning_rate": 0.00018762481912633382, "loss": 1.5277, "step": 3012 }, { "epoch": 0.48430781595338557, "grad_norm": 0.8751125335693359, "learning_rate": 0.00018761669354241397, "loss": 1.5451, "step": 3013 }, { "epoch": 0.4844685553546313, "grad_norm": 0.878654420375824, "learning_rate": 0.00018760856546779634, "loss": 1.4161, "step": 3014 }, { "epoch": 0.484629294755877, "grad_norm": 0.6976590752601624, "learning_rate": 0.00018760043490271194, "loss": 1.3901, "step": 3015 }, { "epoch": 0.48479003415712274, "grad_norm": 0.759381890296936, "learning_rate": 0.0001875923018473919, "loss": 1.4682, "step": 3016 }, { "epoch": 0.4849507735583685, "grad_norm": 0.9249528050422668, "learning_rate": 0.00018758416630206752, "loss": 1.4616, "step": 3017 }, { "epoch": 0.48511151295961424, "grad_norm": 0.8221263885498047, "learning_rate": 0.00018757602826697, "loss": 1.4317, "step": 3018 }, { "epoch": 0.48527225236085997, "grad_norm": 0.6771602034568787, "learning_rate": 0.00018756788774233066, "loss": 1.3687, "step": 3019 }, { "epoch": 0.4854329917621057, "grad_norm": 0.9947450757026672, "learning_rate": 0.00018755974472838094, "loss": 1.2866, "step": 3020 }, { "epoch": 0.4855937311633514, "grad_norm": 0.8600178360939026, "learning_rate": 0.0001875515992253523, "loss": 1.4562, "step": 3021 }, { "epoch": 0.48575447056459714, "grad_norm": 0.807303249835968, "learning_rate": 0.00018754345123347634, "loss": 1.3234, "step": 3022 }, { "epoch": 0.48591520996584286, "grad_norm": 0.8487992286682129, "learning_rate": 0.00018753530075298468, "loss": 1.3296, "step": 3023 }, { "epoch": 0.4860759493670886, "grad_norm": 0.853323221206665, "learning_rate": 0.00018752714778410895, "loss": 1.4908, "step": 3024 }, { "epoch": 0.4862366887683343, "grad_norm": 0.786767840385437, "learning_rate": 0.00018751899232708098, "loss": 1.4261, "step": 3025 }, { "epoch": 0.4863974281695801, "grad_norm": 0.9010907411575317, "learning_rate": 0.0001875108343821326, "loss": 1.5285, "step": 3026 }, { "epoch": 0.4865581675708258, "grad_norm": 0.8729403018951416, "learning_rate": 0.0001875026739494957, "loss": 1.461, "step": 3027 }, { "epoch": 0.48671890697207154, "grad_norm": 0.7810288667678833, "learning_rate": 0.00018749451102940224, "loss": 1.3207, "step": 3028 }, { "epoch": 0.48687964637331727, "grad_norm": 0.6946190595626831, "learning_rate": 0.00018748634562208434, "loss": 1.1331, "step": 3029 }, { "epoch": 0.487040385774563, "grad_norm": 0.744159460067749, "learning_rate": 0.00018747817772777402, "loss": 1.4589, "step": 3030 }, { "epoch": 0.4872011251758087, "grad_norm": 0.8552001714706421, "learning_rate": 0.00018747000734670356, "loss": 1.5041, "step": 3031 }, { "epoch": 0.48736186457705444, "grad_norm": 0.7490576505661011, "learning_rate": 0.00018746183447910516, "loss": 1.5488, "step": 3032 }, { "epoch": 0.48752260397830016, "grad_norm": 0.7648716568946838, "learning_rate": 0.00018745365912521117, "loss": 1.4549, "step": 3033 }, { "epoch": 0.4876833433795459, "grad_norm": 0.7240163087844849, "learning_rate": 0.000187445481285254, "loss": 1.4274, "step": 3034 }, { "epoch": 0.48784408278079167, "grad_norm": 0.9832366704940796, "learning_rate": 0.00018743730095946616, "loss": 1.8718, "step": 3035 }, { "epoch": 0.4880048221820374, "grad_norm": 0.7275018692016602, "learning_rate": 0.00018742911814808013, "loss": 1.3867, "step": 3036 }, { "epoch": 0.4881655615832831, "grad_norm": 0.8046236038208008, "learning_rate": 0.00018742093285132856, "loss": 1.5234, "step": 3037 }, { "epoch": 0.48832630098452884, "grad_norm": 1.5333490371704102, "learning_rate": 0.0001874127450694441, "loss": 1.426, "step": 3038 }, { "epoch": 0.48848704038577456, "grad_norm": 0.8429914116859436, "learning_rate": 0.00018740455480265955, "loss": 1.5262, "step": 3039 }, { "epoch": 0.4886477797870203, "grad_norm": 0.7735593318939209, "learning_rate": 0.0001873963620512077, "loss": 1.268, "step": 3040 }, { "epoch": 0.488808519188266, "grad_norm": 0.7138094305992126, "learning_rate": 0.0001873881668153215, "loss": 1.4084, "step": 3041 }, { "epoch": 0.48896925858951173, "grad_norm": 0.8222596049308777, "learning_rate": 0.00018737996909523383, "loss": 1.5573, "step": 3042 }, { "epoch": 0.48912999799075746, "grad_norm": 0.8204774856567383, "learning_rate": 0.0001873717688911778, "loss": 1.6809, "step": 3043 }, { "epoch": 0.48929073739200324, "grad_norm": 0.8382197022438049, "learning_rate": 0.0001873635662033865, "loss": 1.5328, "step": 3044 }, { "epoch": 0.48945147679324896, "grad_norm": 0.7293194532394409, "learning_rate": 0.0001873553610320931, "loss": 1.315, "step": 3045 }, { "epoch": 0.4896122161944947, "grad_norm": 0.8799089789390564, "learning_rate": 0.00018734715337753087, "loss": 1.6008, "step": 3046 }, { "epoch": 0.4897729555957404, "grad_norm": 0.8712655901908875, "learning_rate": 0.00018733894323993313, "loss": 1.7032, "step": 3047 }, { "epoch": 0.48993369499698614, "grad_norm": 0.8212409615516663, "learning_rate": 0.00018733073061953323, "loss": 1.3093, "step": 3048 }, { "epoch": 0.49009443439823186, "grad_norm": 0.7716673016548157, "learning_rate": 0.00018732251551656468, "loss": 1.4599, "step": 3049 }, { "epoch": 0.4902551737994776, "grad_norm": 0.8705503940582275, "learning_rate": 0.00018731429793126102, "loss": 1.6148, "step": 3050 }, { "epoch": 0.4904159132007233, "grad_norm": 0.7890222668647766, "learning_rate": 0.00018730607786385578, "loss": 1.5713, "step": 3051 }, { "epoch": 0.49057665260196903, "grad_norm": 0.7627784013748169, "learning_rate": 0.0001872978553145827, "loss": 1.3813, "step": 3052 }, { "epoch": 0.4907373920032148, "grad_norm": 0.8467469215393066, "learning_rate": 0.00018728963028367553, "loss": 1.6589, "step": 3053 }, { "epoch": 0.49089813140446054, "grad_norm": 1.093711495399475, "learning_rate": 0.00018728140277136804, "loss": 1.6293, "step": 3054 }, { "epoch": 0.49105887080570626, "grad_norm": 0.7645581364631653, "learning_rate": 0.0001872731727778941, "loss": 1.3167, "step": 3055 }, { "epoch": 0.491219610206952, "grad_norm": 0.6614938974380493, "learning_rate": 0.00018726494030348774, "loss": 1.2237, "step": 3056 }, { "epoch": 0.4913803496081977, "grad_norm": 0.9002680778503418, "learning_rate": 0.00018725670534838296, "loss": 1.6722, "step": 3057 }, { "epoch": 0.49154108900944343, "grad_norm": 0.7616742849349976, "learning_rate": 0.0001872484679128138, "loss": 1.3044, "step": 3058 }, { "epoch": 0.49170182841068916, "grad_norm": 0.8118720054626465, "learning_rate": 0.0001872402279970145, "loss": 1.4692, "step": 3059 }, { "epoch": 0.4918625678119349, "grad_norm": 0.9224057793617249, "learning_rate": 0.00018723198560121923, "loss": 1.3744, "step": 3060 }, { "epoch": 0.4920233072131806, "grad_norm": 0.7476219534873962, "learning_rate": 0.00018722374072566237, "loss": 1.4406, "step": 3061 }, { "epoch": 0.4921840466144264, "grad_norm": 0.7826265096664429, "learning_rate": 0.0001872154933705783, "loss": 1.3442, "step": 3062 }, { "epoch": 0.4923447860156721, "grad_norm": 0.8348650932312012, "learning_rate": 0.00018720724353620138, "loss": 1.3802, "step": 3063 }, { "epoch": 0.49250552541691783, "grad_norm": 0.8834213018417358, "learning_rate": 0.0001871989912227662, "loss": 1.5421, "step": 3064 }, { "epoch": 0.49266626481816356, "grad_norm": 0.8299037218093872, "learning_rate": 0.00018719073643050734, "loss": 1.5157, "step": 3065 }, { "epoch": 0.4928270042194093, "grad_norm": 0.9005885124206543, "learning_rate": 0.00018718247915965947, "loss": 1.5619, "step": 3066 }, { "epoch": 0.492987743620655, "grad_norm": 0.7389284372329712, "learning_rate": 0.00018717421941045727, "loss": 1.6033, "step": 3067 }, { "epoch": 0.49314848302190073, "grad_norm": 0.7576968669891357, "learning_rate": 0.0001871659571831356, "loss": 1.322, "step": 3068 }, { "epoch": 0.49330922242314645, "grad_norm": 0.7969149351119995, "learning_rate": 0.00018715769247792932, "loss": 1.7988, "step": 3069 }, { "epoch": 0.4934699618243922, "grad_norm": 0.7364991307258606, "learning_rate": 0.00018714942529507336, "loss": 1.4626, "step": 3070 }, { "epoch": 0.49363070122563796, "grad_norm": 0.792208194732666, "learning_rate": 0.0001871411556348027, "loss": 1.4916, "step": 3071 }, { "epoch": 0.4937914406268837, "grad_norm": 0.8527035713195801, "learning_rate": 0.00018713288349735252, "loss": 1.5125, "step": 3072 }, { "epoch": 0.4939521800281294, "grad_norm": 0.7737846970558167, "learning_rate": 0.00018712460888295785, "loss": 1.4731, "step": 3073 }, { "epoch": 0.49411291942937513, "grad_norm": 0.9243038892745972, "learning_rate": 0.000187116331791854, "loss": 1.4924, "step": 3074 }, { "epoch": 0.49427365883062085, "grad_norm": 0.714449405670166, "learning_rate": 0.00018710805222427627, "loss": 1.3938, "step": 3075 }, { "epoch": 0.4944343982318666, "grad_norm": 0.8024658560752869, "learning_rate": 0.00018709977018045997, "loss": 1.4619, "step": 3076 }, { "epoch": 0.4945951376331123, "grad_norm": 0.7849716544151306, "learning_rate": 0.00018709148566064057, "loss": 1.467, "step": 3077 }, { "epoch": 0.494755877034358, "grad_norm": 0.7598495483398438, "learning_rate": 0.00018708319866505356, "loss": 1.3472, "step": 3078 }, { "epoch": 0.49491661643560375, "grad_norm": 0.7255474925041199, "learning_rate": 0.00018707490919393451, "loss": 1.3512, "step": 3079 }, { "epoch": 0.49507735583684953, "grad_norm": 0.7781193256378174, "learning_rate": 0.0001870666172475191, "loss": 1.4196, "step": 3080 }, { "epoch": 0.49523809523809526, "grad_norm": 0.8142919540405273, "learning_rate": 0.00018705832282604303, "loss": 1.5064, "step": 3081 }, { "epoch": 0.495398834639341, "grad_norm": 0.6899041533470154, "learning_rate": 0.00018705002592974207, "loss": 1.3805, "step": 3082 }, { "epoch": 0.4955595740405867, "grad_norm": 0.8000766634941101, "learning_rate": 0.00018704172655885213, "loss": 1.6559, "step": 3083 }, { "epoch": 0.4957203134418324, "grad_norm": 0.7867529988288879, "learning_rate": 0.00018703342471360906, "loss": 1.6064, "step": 3084 }, { "epoch": 0.49588105284307815, "grad_norm": 0.8446916937828064, "learning_rate": 0.00018702512039424888, "loss": 1.3823, "step": 3085 }, { "epoch": 0.4960417922443239, "grad_norm": 0.7959084510803223, "learning_rate": 0.00018701681360100772, "loss": 1.365, "step": 3086 }, { "epoch": 0.4962025316455696, "grad_norm": 0.7449185848236084, "learning_rate": 0.00018700850433412167, "loss": 1.2844, "step": 3087 }, { "epoch": 0.4963632710468153, "grad_norm": 0.7274401783943176, "learning_rate": 0.00018700019259382695, "loss": 1.3382, "step": 3088 }, { "epoch": 0.4965240104480611, "grad_norm": 0.7250974774360657, "learning_rate": 0.0001869918783803598, "loss": 1.4737, "step": 3089 }, { "epoch": 0.49668474984930683, "grad_norm": 0.8500939607620239, "learning_rate": 0.00018698356169395664, "loss": 1.5512, "step": 3090 }, { "epoch": 0.49684548925055255, "grad_norm": 0.7853917479515076, "learning_rate": 0.00018697524253485383, "loss": 1.2851, "step": 3091 }, { "epoch": 0.4970062286517983, "grad_norm": 0.6461275219917297, "learning_rate": 0.00018696692090328792, "loss": 1.2272, "step": 3092 }, { "epoch": 0.497166968053044, "grad_norm": 0.7561212778091431, "learning_rate": 0.00018695859679949538, "loss": 1.4943, "step": 3093 }, { "epoch": 0.4973277074542897, "grad_norm": 0.8315651416778564, "learning_rate": 0.00018695027022371292, "loss": 1.5294, "step": 3094 }, { "epoch": 0.49748844685553545, "grad_norm": 0.763672411441803, "learning_rate": 0.00018694194117617726, "loss": 1.5216, "step": 3095 }, { "epoch": 0.4976491862567812, "grad_norm": 0.8527069687843323, "learning_rate": 0.00018693360965712507, "loss": 1.4519, "step": 3096 }, { "epoch": 0.4978099256580269, "grad_norm": 0.8073843717575073, "learning_rate": 0.00018692527566679327, "loss": 1.3888, "step": 3097 }, { "epoch": 0.4979706650592727, "grad_norm": 0.7755822539329529, "learning_rate": 0.00018691693920541878, "loss": 1.5096, "step": 3098 }, { "epoch": 0.4981314044605184, "grad_norm": 0.7321110367774963, "learning_rate": 0.00018690860027323854, "loss": 1.4813, "step": 3099 }, { "epoch": 0.4982921438617641, "grad_norm": 0.6772292852401733, "learning_rate": 0.0001869002588704896, "loss": 1.1564, "step": 3100 }, { "epoch": 0.49845288326300985, "grad_norm": 0.7912636399269104, "learning_rate": 0.0001868919149974091, "loss": 1.2544, "step": 3101 }, { "epoch": 0.4986136226642556, "grad_norm": 0.7495132088661194, "learning_rate": 0.00018688356865423424, "loss": 1.543, "step": 3102 }, { "epoch": 0.4987743620655013, "grad_norm": 0.9035565853118896, "learning_rate": 0.00018687521984120232, "loss": 1.4545, "step": 3103 }, { "epoch": 0.498935101466747, "grad_norm": 0.8575536608695984, "learning_rate": 0.00018686686855855057, "loss": 1.5056, "step": 3104 }, { "epoch": 0.49909584086799275, "grad_norm": 0.9508048295974731, "learning_rate": 0.0001868585148065165, "loss": 1.6402, "step": 3105 }, { "epoch": 0.49925658026923847, "grad_norm": 0.8258174061775208, "learning_rate": 0.0001868501585853375, "loss": 1.4066, "step": 3106 }, { "epoch": 0.49941731967048425, "grad_norm": 0.8645395636558533, "learning_rate": 0.00018684179989525117, "loss": 1.5187, "step": 3107 }, { "epoch": 0.49957805907173, "grad_norm": 0.7824612259864807, "learning_rate": 0.0001868334387364951, "loss": 1.4827, "step": 3108 }, { "epoch": 0.4997387984729757, "grad_norm": 0.6720359325408936, "learning_rate": 0.00018682507510930699, "loss": 1.3296, "step": 3109 }, { "epoch": 0.4998995378742214, "grad_norm": 0.7265015244483948, "learning_rate": 0.00018681670901392458, "loss": 1.4092, "step": 3110 }, { "epoch": 0.5000602772754672, "grad_norm": 0.7822834253311157, "learning_rate": 0.0001868083404505857, "loss": 1.5919, "step": 3111 }, { "epoch": 0.5002210166767129, "grad_norm": 0.770401656627655, "learning_rate": 0.00018679996941952824, "loss": 1.5498, "step": 3112 }, { "epoch": 0.5003817560779587, "grad_norm": 0.7985139489173889, "learning_rate": 0.0001867915959209902, "loss": 1.3829, "step": 3113 }, { "epoch": 0.5005424954792044, "grad_norm": 0.6841533780097961, "learning_rate": 0.00018678321995520955, "loss": 1.2893, "step": 3114 }, { "epoch": 0.5007032348804501, "grad_norm": 0.8827111124992371, "learning_rate": 0.00018677484152242445, "loss": 1.7256, "step": 3115 }, { "epoch": 0.5008639742816958, "grad_norm": 0.7568870782852173, "learning_rate": 0.00018676646062287304, "loss": 1.5074, "step": 3116 }, { "epoch": 0.5010247136829415, "grad_norm": 0.8887837529182434, "learning_rate": 0.0001867580772567936, "loss": 1.4068, "step": 3117 }, { "epoch": 0.5011854530841873, "grad_norm": 0.7081626057624817, "learning_rate": 0.0001867496914244244, "loss": 1.2057, "step": 3118 }, { "epoch": 0.501346192485433, "grad_norm": 0.8205781579017639, "learning_rate": 0.00018674130312600385, "loss": 1.5362, "step": 3119 }, { "epoch": 0.5015069318866787, "grad_norm": 0.7894141674041748, "learning_rate": 0.00018673291236177044, "loss": 1.4486, "step": 3120 }, { "epoch": 0.5016676712879244, "grad_norm": 0.7532516717910767, "learning_rate": 0.00018672451913196266, "loss": 1.3618, "step": 3121 }, { "epoch": 0.5018284106891702, "grad_norm": 0.9315890669822693, "learning_rate": 0.0001867161234368191, "loss": 1.6892, "step": 3122 }, { "epoch": 0.5019891500904159, "grad_norm": 0.7458954453468323, "learning_rate": 0.00018670772527657845, "loss": 1.3034, "step": 3123 }, { "epoch": 0.5021498894916616, "grad_norm": 0.754393994808197, "learning_rate": 0.00018669932465147944, "loss": 1.2885, "step": 3124 }, { "epoch": 0.5023106288929073, "grad_norm": 0.7372221350669861, "learning_rate": 0.00018669092156176084, "loss": 1.4563, "step": 3125 }, { "epoch": 0.5024713682941531, "grad_norm": 0.8573106527328491, "learning_rate": 0.00018668251600766157, "loss": 1.6308, "step": 3126 }, { "epoch": 0.5026321076953988, "grad_norm": 0.7989649176597595, "learning_rate": 0.00018667410798942056, "loss": 1.5764, "step": 3127 }, { "epoch": 0.5027928470966445, "grad_norm": 0.8737038969993591, "learning_rate": 0.00018666569750727682, "loss": 1.8142, "step": 3128 }, { "epoch": 0.5029535864978903, "grad_norm": 0.9746984839439392, "learning_rate": 0.00018665728456146945, "loss": 1.6028, "step": 3129 }, { "epoch": 0.5031143258991361, "grad_norm": 0.7257863283157349, "learning_rate": 0.00018664886915223763, "loss": 1.1538, "step": 3130 }, { "epoch": 0.5032750653003818, "grad_norm": 0.7146979570388794, "learning_rate": 0.0001866404512798205, "loss": 1.3193, "step": 3131 }, { "epoch": 0.5034358047016275, "grad_norm": 0.7978997826576233, "learning_rate": 0.00018663203094445744, "loss": 1.4023, "step": 3132 }, { "epoch": 0.5035965441028732, "grad_norm": 0.7153508067131042, "learning_rate": 0.00018662360814638781, "loss": 1.3259, "step": 3133 }, { "epoch": 0.503757283504119, "grad_norm": 0.8842895030975342, "learning_rate": 0.00018661518288585102, "loss": 1.6364, "step": 3134 }, { "epoch": 0.5039180229053647, "grad_norm": 0.8861570954322815, "learning_rate": 0.0001866067551630866, "loss": 1.4957, "step": 3135 }, { "epoch": 0.5040787623066104, "grad_norm": 0.7854073643684387, "learning_rate": 0.00018659832497833407, "loss": 1.5938, "step": 3136 }, { "epoch": 0.5042395017078561, "grad_norm": 0.7810068130493164, "learning_rate": 0.00018658989233183318, "loss": 1.3609, "step": 3137 }, { "epoch": 0.5044002411091019, "grad_norm": 0.6898432970046997, "learning_rate": 0.00018658145722382353, "loss": 1.271, "step": 3138 }, { "epoch": 0.5045609805103476, "grad_norm": 0.7210073471069336, "learning_rate": 0.000186573019654545, "loss": 1.1567, "step": 3139 }, { "epoch": 0.5047217199115933, "grad_norm": 0.7412274479866028, "learning_rate": 0.00018656457962423737, "loss": 1.3831, "step": 3140 }, { "epoch": 0.504882459312839, "grad_norm": 0.8479989171028137, "learning_rate": 0.0001865561371331406, "loss": 1.444, "step": 3141 }, { "epoch": 0.5050431987140848, "grad_norm": 0.7228153944015503, "learning_rate": 0.00018654769218149473, "loss": 1.343, "step": 3142 }, { "epoch": 0.5052039381153305, "grad_norm": 0.7500528693199158, "learning_rate": 0.00018653924476953976, "loss": 1.4467, "step": 3143 }, { "epoch": 0.5053646775165762, "grad_norm": 0.7808296084403992, "learning_rate": 0.00018653079489751586, "loss": 1.6298, "step": 3144 }, { "epoch": 0.5055254169178219, "grad_norm": 0.7842546701431274, "learning_rate": 0.00018652234256566323, "loss": 1.4401, "step": 3145 }, { "epoch": 0.5056861563190677, "grad_norm": 0.7207222580909729, "learning_rate": 0.00018651388777422213, "loss": 1.3442, "step": 3146 }, { "epoch": 0.5058468957203135, "grad_norm": 0.8083255887031555, "learning_rate": 0.00018650543052343296, "loss": 1.4056, "step": 3147 }, { "epoch": 0.5060076351215592, "grad_norm": 0.7824476361274719, "learning_rate": 0.00018649697081353606, "loss": 1.5283, "step": 3148 }, { "epoch": 0.5061683745228049, "grad_norm": 0.8362530469894409, "learning_rate": 0.00018648850864477198, "loss": 1.3975, "step": 3149 }, { "epoch": 0.5063291139240507, "grad_norm": 0.6803069710731506, "learning_rate": 0.00018648004401738123, "loss": 1.411, "step": 3150 }, { "epoch": 0.5064898533252964, "grad_norm": 0.7800229787826538, "learning_rate": 0.00018647157693160447, "loss": 1.4823, "step": 3151 }, { "epoch": 0.5066505927265421, "grad_norm": 0.7498127222061157, "learning_rate": 0.00018646310738768234, "loss": 1.3019, "step": 3152 }, { "epoch": 0.5068113321277878, "grad_norm": 0.8328988552093506, "learning_rate": 0.0001864546353858557, "loss": 1.3604, "step": 3153 }, { "epoch": 0.5069720715290336, "grad_norm": 0.7578277587890625, "learning_rate": 0.00018644616092636533, "loss": 1.2687, "step": 3154 }, { "epoch": 0.5071328109302793, "grad_norm": 0.756123960018158, "learning_rate": 0.00018643768400945212, "loss": 1.5491, "step": 3155 }, { "epoch": 0.507293550331525, "grad_norm": 0.8967506289482117, "learning_rate": 0.00018642920463535705, "loss": 1.5321, "step": 3156 }, { "epoch": 0.5074542897327707, "grad_norm": 0.6763865947723389, "learning_rate": 0.0001864207228043212, "loss": 1.3149, "step": 3157 }, { "epoch": 0.5076150291340165, "grad_norm": 0.7586849331855774, "learning_rate": 0.00018641223851658564, "loss": 1.4394, "step": 3158 }, { "epoch": 0.5077757685352622, "grad_norm": 0.9428778290748596, "learning_rate": 0.00018640375177239157, "loss": 1.5734, "step": 3159 }, { "epoch": 0.5079365079365079, "grad_norm": 0.8846228718757629, "learning_rate": 0.00018639526257198025, "loss": 1.641, "step": 3160 }, { "epoch": 0.5080972473377536, "grad_norm": 0.8499528765678406, "learning_rate": 0.00018638677091559305, "loss": 1.5569, "step": 3161 }, { "epoch": 0.5082579867389994, "grad_norm": 0.8576180934906006, "learning_rate": 0.0001863782768034713, "loss": 1.3313, "step": 3162 }, { "epoch": 0.5084187261402451, "grad_norm": 0.7093580961227417, "learning_rate": 0.00018636978023585645, "loss": 1.4935, "step": 3163 }, { "epoch": 0.5085794655414908, "grad_norm": 0.8200436234474182, "learning_rate": 0.0001863612812129901, "loss": 1.5253, "step": 3164 }, { "epoch": 0.5087402049427366, "grad_norm": 0.8415714502334595, "learning_rate": 0.0001863527797351138, "loss": 1.4595, "step": 3165 }, { "epoch": 0.5089009443439824, "grad_norm": 0.8642652034759521, "learning_rate": 0.00018634427580246926, "loss": 1.6083, "step": 3166 }, { "epoch": 0.5090616837452281, "grad_norm": 0.789482057094574, "learning_rate": 0.0001863357694152982, "loss": 1.3244, "step": 3167 }, { "epoch": 0.5092224231464738, "grad_norm": 0.7869203686714172, "learning_rate": 0.00018632726057384242, "loss": 1.3781, "step": 3168 }, { "epoch": 0.5093831625477195, "grad_norm": 0.805454432964325, "learning_rate": 0.00018631874927834384, "loss": 1.3958, "step": 3169 }, { "epoch": 0.5095439019489653, "grad_norm": 0.8130379915237427, "learning_rate": 0.0001863102355290444, "loss": 1.4964, "step": 3170 }, { "epoch": 0.509704641350211, "grad_norm": 0.8864771127700806, "learning_rate": 0.0001863017193261861, "loss": 1.7032, "step": 3171 }, { "epoch": 0.5098653807514567, "grad_norm": 0.7456620931625366, "learning_rate": 0.00018629320067001107, "loss": 1.2497, "step": 3172 }, { "epoch": 0.5100261201527024, "grad_norm": 0.8703574538230896, "learning_rate": 0.00018628467956076144, "loss": 1.5584, "step": 3173 }, { "epoch": 0.5101868595539482, "grad_norm": 0.7685459852218628, "learning_rate": 0.00018627615599867942, "loss": 1.2707, "step": 3174 }, { "epoch": 0.5103475989551939, "grad_norm": 0.7151654362678528, "learning_rate": 0.00018626762998400737, "loss": 1.2934, "step": 3175 }, { "epoch": 0.5105083383564396, "grad_norm": 0.7609535455703735, "learning_rate": 0.0001862591015169876, "loss": 1.4061, "step": 3176 }, { "epoch": 0.5106690777576853, "grad_norm": 0.9644106030464172, "learning_rate": 0.00018625057059786263, "loss": 1.5783, "step": 3177 }, { "epoch": 0.510829817158931, "grad_norm": 0.8666108846664429, "learning_rate": 0.0001862420372268749, "loss": 1.5909, "step": 3178 }, { "epoch": 0.5109905565601768, "grad_norm": 0.8962516188621521, "learning_rate": 0.00018623350140426704, "loss": 1.5011, "step": 3179 }, { "epoch": 0.5111512959614225, "grad_norm": 0.7635895013809204, "learning_rate": 0.00018622496313028166, "loss": 1.6093, "step": 3180 }, { "epoch": 0.5113120353626682, "grad_norm": 0.8403144478797913, "learning_rate": 0.00018621642240516147, "loss": 1.7169, "step": 3181 }, { "epoch": 0.511472774763914, "grad_norm": 1.1031138896942139, "learning_rate": 0.0001862078792291493, "loss": 1.5322, "step": 3182 }, { "epoch": 0.5116335141651598, "grad_norm": 0.9801281094551086, "learning_rate": 0.000186199333602488, "loss": 1.9365, "step": 3183 }, { "epoch": 0.5117942535664055, "grad_norm": 0.8643022775650024, "learning_rate": 0.00018619078552542046, "loss": 1.5743, "step": 3184 }, { "epoch": 0.5119549929676512, "grad_norm": 0.799892008304596, "learning_rate": 0.00018618223499818973, "loss": 1.4868, "step": 3185 }, { "epoch": 0.512115732368897, "grad_norm": 0.7903233766555786, "learning_rate": 0.00018617368202103886, "loss": 1.4214, "step": 3186 }, { "epoch": 0.5122764717701427, "grad_norm": 0.7701752781867981, "learning_rate": 0.00018616512659421093, "loss": 1.4296, "step": 3187 }, { "epoch": 0.5124372111713884, "grad_norm": 0.6535817980766296, "learning_rate": 0.00018615656871794924, "loss": 1.2288, "step": 3188 }, { "epoch": 0.5125979505726341, "grad_norm": 0.9529690742492676, "learning_rate": 0.00018614800839249703, "loss": 1.407, "step": 3189 }, { "epoch": 0.5127586899738799, "grad_norm": 0.6976677775382996, "learning_rate": 0.00018613944561809759, "loss": 1.4704, "step": 3190 }, { "epoch": 0.5129194293751256, "grad_norm": 0.7200442552566528, "learning_rate": 0.00018613088039499444, "loss": 1.4809, "step": 3191 }, { "epoch": 0.5130801687763713, "grad_norm": 0.8080162405967712, "learning_rate": 0.00018612231272343102, "loss": 1.4657, "step": 3192 }, { "epoch": 0.513240908177617, "grad_norm": 0.7882272601127625, "learning_rate": 0.0001861137426036508, "loss": 1.2985, "step": 3193 }, { "epoch": 0.5134016475788628, "grad_norm": 0.6760850548744202, "learning_rate": 0.00018610517003589755, "loss": 1.3946, "step": 3194 }, { "epoch": 0.5135623869801085, "grad_norm": 0.7582597136497498, "learning_rate": 0.00018609659502041487, "loss": 1.3333, "step": 3195 }, { "epoch": 0.5137231263813542, "grad_norm": 0.8666666150093079, "learning_rate": 0.00018608801755744657, "loss": 1.6628, "step": 3196 }, { "epoch": 0.5138838657825999, "grad_norm": 0.7751832604408264, "learning_rate": 0.00018607943764723642, "loss": 1.4471, "step": 3197 }, { "epoch": 0.5140446051838456, "grad_norm": 0.8726323246955872, "learning_rate": 0.0001860708552900284, "loss": 1.5766, "step": 3198 }, { "epoch": 0.5142053445850914, "grad_norm": 0.7992368936538696, "learning_rate": 0.00018606227048606646, "loss": 1.5152, "step": 3199 }, { "epoch": 0.5143660839863371, "grad_norm": 0.7765289545059204, "learning_rate": 0.0001860536832355946, "loss": 1.7286, "step": 3200 }, { "epoch": 0.5143660839863371, "eval_loss": 1.4931443929672241, "eval_runtime": 46.2577, "eval_samples_per_second": 5.426, "eval_steps_per_second": 2.724, "step": 3200 }, { "epoch": 0.5145268233875829, "grad_norm": 0.8541935682296753, "learning_rate": 0.00018604509353885694, "loss": 2.0232, "step": 3201 }, { "epoch": 0.5146875627888287, "grad_norm": 0.7530422210693359, "learning_rate": 0.00018603650139609773, "loss": 1.5164, "step": 3202 }, { "epoch": 0.5148483021900744, "grad_norm": 0.8122800588607788, "learning_rate": 0.00018602790680756118, "loss": 1.5982, "step": 3203 }, { "epoch": 0.5150090415913201, "grad_norm": 0.7053490877151489, "learning_rate": 0.00018601930977349155, "loss": 1.2421, "step": 3204 }, { "epoch": 0.5151697809925658, "grad_norm": 0.7418240904808044, "learning_rate": 0.00018601071029413331, "loss": 1.4113, "step": 3205 }, { "epoch": 0.5153305203938116, "grad_norm": 0.7965177893638611, "learning_rate": 0.00018600210836973089, "loss": 1.5034, "step": 3206 }, { "epoch": 0.5154912597950573, "grad_norm": 0.7014358043670654, "learning_rate": 0.00018599350400052883, "loss": 1.2735, "step": 3207 }, { "epoch": 0.515651999196303, "grad_norm": 0.7904342412948608, "learning_rate": 0.0001859848971867717, "loss": 1.5788, "step": 3208 }, { "epoch": 0.5158127385975487, "grad_norm": 0.8906430006027222, "learning_rate": 0.0001859762879287042, "loss": 1.7, "step": 3209 }, { "epoch": 0.5159734779987944, "grad_norm": 0.7783416509628296, "learning_rate": 0.00018596767622657104, "loss": 1.6071, "step": 3210 }, { "epoch": 0.5161342174000402, "grad_norm": 0.7956631183624268, "learning_rate": 0.00018595906208061707, "loss": 1.3444, "step": 3211 }, { "epoch": 0.5162949568012859, "grad_norm": 0.7637962102890015, "learning_rate": 0.0001859504454910871, "loss": 1.3372, "step": 3212 }, { "epoch": 0.5164556962025316, "grad_norm": 0.7456954121589661, "learning_rate": 0.00018594182645822614, "loss": 1.3977, "step": 3213 }, { "epoch": 0.5166164356037773, "grad_norm": 0.8508870601654053, "learning_rate": 0.00018593320498227914, "loss": 1.6368, "step": 3214 }, { "epoch": 0.5167771750050231, "grad_norm": 0.6762863397598267, "learning_rate": 0.00018592458106349126, "loss": 1.3411, "step": 3215 }, { "epoch": 0.5169379144062688, "grad_norm": 0.8175486922264099, "learning_rate": 0.0001859159547021076, "loss": 1.4413, "step": 3216 }, { "epoch": 0.5170986538075145, "grad_norm": 0.86971116065979, "learning_rate": 0.0001859073258983734, "loss": 1.5145, "step": 3217 }, { "epoch": 0.5172593932087602, "grad_norm": 0.9065823554992676, "learning_rate": 0.00018589869465253394, "loss": 1.7302, "step": 3218 }, { "epoch": 0.5174201326100061, "grad_norm": 0.8765391111373901, "learning_rate": 0.00018589006096483458, "loss": 1.7279, "step": 3219 }, { "epoch": 0.5175808720112518, "grad_norm": 0.8304060697555542, "learning_rate": 0.0001858814248355208, "loss": 1.4744, "step": 3220 }, { "epoch": 0.5177416114124975, "grad_norm": 0.785413384437561, "learning_rate": 0.00018587278626483805, "loss": 1.4817, "step": 3221 }, { "epoch": 0.5179023508137433, "grad_norm": 0.7497190833091736, "learning_rate": 0.00018586414525303193, "loss": 1.4684, "step": 3222 }, { "epoch": 0.518063090214989, "grad_norm": 0.8957980275154114, "learning_rate": 0.00018585550180034806, "loss": 1.4137, "step": 3223 }, { "epoch": 0.5182238296162347, "grad_norm": 0.7416465282440186, "learning_rate": 0.00018584685590703213, "loss": 1.5414, "step": 3224 }, { "epoch": 0.5183845690174804, "grad_norm": 0.8946303129196167, "learning_rate": 0.00018583820757333, "loss": 1.5228, "step": 3225 }, { "epoch": 0.5185453084187261, "grad_norm": 0.7932084202766418, "learning_rate": 0.0001858295567994874, "loss": 1.5331, "step": 3226 }, { "epoch": 0.5187060478199719, "grad_norm": 0.854015588760376, "learning_rate": 0.00018582090358575034, "loss": 1.5853, "step": 3227 }, { "epoch": 0.5188667872212176, "grad_norm": 0.8538203835487366, "learning_rate": 0.00018581224793236479, "loss": 1.4788, "step": 3228 }, { "epoch": 0.5190275266224633, "grad_norm": 0.8172786235809326, "learning_rate": 0.00018580358983957674, "loss": 1.4009, "step": 3229 }, { "epoch": 0.519188266023709, "grad_norm": 0.7360570430755615, "learning_rate": 0.00018579492930763242, "loss": 1.347, "step": 3230 }, { "epoch": 0.5193490054249548, "grad_norm": 0.7721585035324097, "learning_rate": 0.00018578626633677795, "loss": 1.6243, "step": 3231 }, { "epoch": 0.5195097448262005, "grad_norm": 0.9231551289558411, "learning_rate": 0.0001857776009272596, "loss": 1.6949, "step": 3232 }, { "epoch": 0.5196704842274462, "grad_norm": 0.837478756904602, "learning_rate": 0.00018576893307932374, "loss": 1.5876, "step": 3233 }, { "epoch": 0.5198312236286919, "grad_norm": 0.8564349412918091, "learning_rate": 0.00018576026279321678, "loss": 1.4926, "step": 3234 }, { "epoch": 0.5199919630299377, "grad_norm": 0.8425780534744263, "learning_rate": 0.00018575159006918511, "loss": 1.6162, "step": 3235 }, { "epoch": 0.5201527024311834, "grad_norm": 0.8263263702392578, "learning_rate": 0.00018574291490747538, "loss": 1.55, "step": 3236 }, { "epoch": 0.5203134418324292, "grad_norm": 0.8159098029136658, "learning_rate": 0.0001857342373083341, "loss": 1.5944, "step": 3237 }, { "epoch": 0.520474181233675, "grad_norm": 0.7525100708007812, "learning_rate": 0.00018572555727200803, "loss": 1.3546, "step": 3238 }, { "epoch": 0.5206349206349207, "grad_norm": 0.7954659461975098, "learning_rate": 0.00018571687479874386, "loss": 1.3472, "step": 3239 }, { "epoch": 0.5207956600361664, "grad_norm": 0.882394552230835, "learning_rate": 0.00018570818988878843, "loss": 1.5075, "step": 3240 }, { "epoch": 0.5209563994374121, "grad_norm": 0.6685717105865479, "learning_rate": 0.00018569950254238867, "loss": 1.1479, "step": 3241 }, { "epoch": 0.5211171388386578, "grad_norm": 0.7421262264251709, "learning_rate": 0.00018569081275979145, "loss": 1.4706, "step": 3242 }, { "epoch": 0.5212778782399036, "grad_norm": 0.841813325881958, "learning_rate": 0.00018568212054124387, "loss": 1.5812, "step": 3243 }, { "epoch": 0.5214386176411493, "grad_norm": 0.7762925028800964, "learning_rate": 0.00018567342588699299, "loss": 1.4953, "step": 3244 }, { "epoch": 0.521599357042395, "grad_norm": 0.9392681121826172, "learning_rate": 0.000185664728797286, "loss": 1.5375, "step": 3245 }, { "epoch": 0.5217600964436407, "grad_norm": 0.8206340074539185, "learning_rate": 0.0001856560292723701, "loss": 1.5184, "step": 3246 }, { "epoch": 0.5219208358448865, "grad_norm": 0.7266818284988403, "learning_rate": 0.00018564732731249261, "loss": 1.3673, "step": 3247 }, { "epoch": 0.5220815752461322, "grad_norm": 0.849731981754303, "learning_rate": 0.00018563862291790092, "loss": 1.6291, "step": 3248 }, { "epoch": 0.5222423146473779, "grad_norm": 0.8300021290779114, "learning_rate": 0.0001856299160888425, "loss": 1.6345, "step": 3249 }, { "epoch": 0.5224030540486236, "grad_norm": 0.7429481148719788, "learning_rate": 0.00018562120682556472, "loss": 1.4749, "step": 3250 }, { "epoch": 0.5225637934498694, "grad_norm": 0.7573351860046387, "learning_rate": 0.0001856124951283153, "loss": 1.4571, "step": 3251 }, { "epoch": 0.5227245328511151, "grad_norm": 0.9759644865989685, "learning_rate": 0.0001856037809973419, "loss": 1.5486, "step": 3252 }, { "epoch": 0.5228852722523608, "grad_norm": 0.8568567037582397, "learning_rate": 0.00018559506443289211, "loss": 1.6903, "step": 3253 }, { "epoch": 0.5230460116536065, "grad_norm": 0.8426090478897095, "learning_rate": 0.00018558634543521383, "loss": 1.439, "step": 3254 }, { "epoch": 0.5232067510548524, "grad_norm": 0.6596956849098206, "learning_rate": 0.00018557762400455484, "loss": 1.2792, "step": 3255 }, { "epoch": 0.5233674904560981, "grad_norm": 0.917637288570404, "learning_rate": 0.00018556890014116318, "loss": 1.776, "step": 3256 }, { "epoch": 0.5235282298573438, "grad_norm": 0.7367154359817505, "learning_rate": 0.0001855601738452867, "loss": 1.2243, "step": 3257 }, { "epoch": 0.5236889692585895, "grad_norm": 0.7813432216644287, "learning_rate": 0.00018555144511717356, "loss": 1.457, "step": 3258 }, { "epoch": 0.5238497086598353, "grad_norm": 0.6810093522071838, "learning_rate": 0.00018554271395707187, "loss": 1.3941, "step": 3259 }, { "epoch": 0.524010448061081, "grad_norm": 0.8087904453277588, "learning_rate": 0.00018553398036522982, "loss": 1.3688, "step": 3260 }, { "epoch": 0.5241711874623267, "grad_norm": 0.8361377120018005, "learning_rate": 0.0001855252443418957, "loss": 1.5056, "step": 3261 }, { "epoch": 0.5243319268635724, "grad_norm": 0.9677451252937317, "learning_rate": 0.00018551650588731784, "loss": 1.5404, "step": 3262 }, { "epoch": 0.5244926662648182, "grad_norm": 0.8938940167427063, "learning_rate": 0.00018550776500174466, "loss": 1.6679, "step": 3263 }, { "epoch": 0.5246534056660639, "grad_norm": 0.751091480255127, "learning_rate": 0.0001854990216854246, "loss": 1.3274, "step": 3264 }, { "epoch": 0.5248141450673096, "grad_norm": 0.8059028387069702, "learning_rate": 0.00018549027593860626, "loss": 1.2289, "step": 3265 }, { "epoch": 0.5249748844685553, "grad_norm": 0.7425819039344788, "learning_rate": 0.00018548152776153826, "loss": 1.4981, "step": 3266 }, { "epoch": 0.5251356238698011, "grad_norm": 0.740841805934906, "learning_rate": 0.00018547277715446923, "loss": 1.3887, "step": 3267 }, { "epoch": 0.5252963632710468, "grad_norm": 0.6427505016326904, "learning_rate": 0.00018546402411764797, "loss": 1.261, "step": 3268 }, { "epoch": 0.5254571026722925, "grad_norm": 0.6863730549812317, "learning_rate": 0.00018545526865132328, "loss": 1.2171, "step": 3269 }, { "epoch": 0.5256178420735382, "grad_norm": 0.737600564956665, "learning_rate": 0.00018544651075574407, "loss": 1.3609, "step": 3270 }, { "epoch": 0.525778581474784, "grad_norm": 0.7653276324272156, "learning_rate": 0.0001854377504311593, "loss": 1.5206, "step": 3271 }, { "epoch": 0.5259393208760298, "grad_norm": 0.8990992307662964, "learning_rate": 0.000185428987677818, "loss": 1.4709, "step": 3272 }, { "epoch": 0.5261000602772755, "grad_norm": 0.76966792345047, "learning_rate": 0.00018542022249596926, "loss": 1.4829, "step": 3273 }, { "epoch": 0.5262607996785212, "grad_norm": 0.7187246084213257, "learning_rate": 0.00018541145488586229, "loss": 1.4161, "step": 3274 }, { "epoch": 0.526421539079767, "grad_norm": 0.8304179906845093, "learning_rate": 0.0001854026848477463, "loss": 1.2775, "step": 3275 }, { "epoch": 0.5265822784810127, "grad_norm": 0.761699914932251, "learning_rate": 0.00018539391238187055, "loss": 1.3707, "step": 3276 }, { "epoch": 0.5267430178822584, "grad_norm": 0.7719839811325073, "learning_rate": 0.0001853851374884845, "loss": 1.3814, "step": 3277 }, { "epoch": 0.5269037572835041, "grad_norm": 0.7285048961639404, "learning_rate": 0.00018537636016783754, "loss": 1.4875, "step": 3278 }, { "epoch": 0.5270644966847499, "grad_norm": 0.7954995632171631, "learning_rate": 0.00018536758042017923, "loss": 1.4043, "step": 3279 }, { "epoch": 0.5272252360859956, "grad_norm": 0.7614687085151672, "learning_rate": 0.0001853587982457591, "loss": 1.4088, "step": 3280 }, { "epoch": 0.5273859754872413, "grad_norm": 0.760796844959259, "learning_rate": 0.00018535001364482684, "loss": 1.4955, "step": 3281 }, { "epoch": 0.527546714888487, "grad_norm": 0.7797730565071106, "learning_rate": 0.00018534122661763216, "loss": 1.3871, "step": 3282 }, { "epoch": 0.5277074542897328, "grad_norm": 0.8547179698944092, "learning_rate": 0.00018533243716442485, "loss": 1.3409, "step": 3283 }, { "epoch": 0.5278681936909785, "grad_norm": 0.8011171817779541, "learning_rate": 0.00018532364528545475, "loss": 1.5323, "step": 3284 }, { "epoch": 0.5280289330922242, "grad_norm": 0.7849488258361816, "learning_rate": 0.00018531485098097184, "loss": 1.4696, "step": 3285 }, { "epoch": 0.5281896724934699, "grad_norm": 0.8917866349220276, "learning_rate": 0.00018530605425122611, "loss": 1.3903, "step": 3286 }, { "epoch": 0.5283504118947157, "grad_norm": 0.8635839223861694, "learning_rate": 0.00018529725509646756, "loss": 1.7157, "step": 3287 }, { "epoch": 0.5285111512959614, "grad_norm": 0.7912312746047974, "learning_rate": 0.0001852884535169464, "loss": 1.5481, "step": 3288 }, { "epoch": 0.5286718906972071, "grad_norm": 0.7080885767936707, "learning_rate": 0.00018527964951291277, "loss": 1.5174, "step": 3289 }, { "epoch": 0.5288326300984529, "grad_norm": 0.8341657519340515, "learning_rate": 0.000185270843084617, "loss": 1.4355, "step": 3290 }, { "epoch": 0.5289933694996987, "grad_norm": 0.7897726893424988, "learning_rate": 0.0001852620342323094, "loss": 1.3098, "step": 3291 }, { "epoch": 0.5291541089009444, "grad_norm": 0.8012301921844482, "learning_rate": 0.00018525322295624038, "loss": 1.3543, "step": 3292 }, { "epoch": 0.5293148483021901, "grad_norm": 0.8380632996559143, "learning_rate": 0.00018524440925666048, "loss": 1.6445, "step": 3293 }, { "epoch": 0.5294755877034358, "grad_norm": 0.9531186819076538, "learning_rate": 0.00018523559313382015, "loss": 1.8615, "step": 3294 }, { "epoch": 0.5296363271046816, "grad_norm": 0.7453944087028503, "learning_rate": 0.00018522677458797008, "loss": 1.2749, "step": 3295 }, { "epoch": 0.5297970665059273, "grad_norm": 0.7704547643661499, "learning_rate": 0.0001852179536193609, "loss": 1.6659, "step": 3296 }, { "epoch": 0.529957805907173, "grad_norm": 0.7032475471496582, "learning_rate": 0.00018520913022824345, "loss": 1.2315, "step": 3297 }, { "epoch": 0.5301185453084187, "grad_norm": 0.8031439781188965, "learning_rate": 0.00018520030441486845, "loss": 1.42, "step": 3298 }, { "epoch": 0.5302792847096645, "grad_norm": 0.7874877452850342, "learning_rate": 0.0001851914761794869, "loss": 1.4585, "step": 3299 }, { "epoch": 0.5304400241109102, "grad_norm": 0.7787345051765442, "learning_rate": 0.00018518264552234967, "loss": 1.4016, "step": 3300 }, { "epoch": 0.5306007635121559, "grad_norm": 0.8302061557769775, "learning_rate": 0.00018517381244370783, "loss": 1.6784, "step": 3301 }, { "epoch": 0.5307615029134016, "grad_norm": 0.6849681735038757, "learning_rate": 0.0001851649769438125, "loss": 1.1773, "step": 3302 }, { "epoch": 0.5309222423146474, "grad_norm": 0.8231149911880493, "learning_rate": 0.00018515613902291483, "loss": 1.4614, "step": 3303 }, { "epoch": 0.5310829817158931, "grad_norm": 0.8596336245536804, "learning_rate": 0.00018514729868126603, "loss": 1.4711, "step": 3304 }, { "epoch": 0.5312437211171388, "grad_norm": 0.8970978260040283, "learning_rate": 0.00018513845591911746, "loss": 1.9615, "step": 3305 }, { "epoch": 0.5314044605183845, "grad_norm": 0.8078962564468384, "learning_rate": 0.00018512961073672044, "loss": 1.7241, "step": 3306 }, { "epoch": 0.5315651999196302, "grad_norm": 1.2718873023986816, "learning_rate": 0.00018512076313432645, "loss": 1.3717, "step": 3307 }, { "epoch": 0.5317259393208761, "grad_norm": 0.8012800812721252, "learning_rate": 0.000185111913112187, "loss": 1.6785, "step": 3308 }, { "epoch": 0.5318866787221218, "grad_norm": 0.7391458749771118, "learning_rate": 0.00018510306067055364, "loss": 1.3205, "step": 3309 }, { "epoch": 0.5320474181233675, "grad_norm": 0.8265613913536072, "learning_rate": 0.00018509420580967807, "loss": 1.3645, "step": 3310 }, { "epoch": 0.5322081575246133, "grad_norm": 0.8628754019737244, "learning_rate": 0.00018508534852981198, "loss": 1.4986, "step": 3311 }, { "epoch": 0.532368896925859, "grad_norm": 0.7587042450904846, "learning_rate": 0.00018507648883120715, "loss": 1.3636, "step": 3312 }, { "epoch": 0.5325296363271047, "grad_norm": 0.8225183486938477, "learning_rate": 0.00018506762671411547, "loss": 1.3968, "step": 3313 }, { "epoch": 0.5326903757283504, "grad_norm": 0.8508570194244385, "learning_rate": 0.00018505876217878882, "loss": 1.4494, "step": 3314 }, { "epoch": 0.5328511151295962, "grad_norm": 0.8812022805213928, "learning_rate": 0.0001850498952254792, "loss": 1.6349, "step": 3315 }, { "epoch": 0.5330118545308419, "grad_norm": 0.8578895330429077, "learning_rate": 0.00018504102585443875, "loss": 1.5098, "step": 3316 }, { "epoch": 0.5331725939320876, "grad_norm": 0.7924550771713257, "learning_rate": 0.00018503215406591949, "loss": 1.4885, "step": 3317 }, { "epoch": 0.5333333333333333, "grad_norm": 0.7468888759613037, "learning_rate": 0.0001850232798601737, "loss": 1.4191, "step": 3318 }, { "epoch": 0.533494072734579, "grad_norm": 0.7940313220024109, "learning_rate": 0.00018501440323745357, "loss": 1.4326, "step": 3319 }, { "epoch": 0.5336548121358248, "grad_norm": 0.6616223454475403, "learning_rate": 0.00018500552419801154, "loss": 1.3375, "step": 3320 }, { "epoch": 0.5338155515370705, "grad_norm": 0.7967734932899475, "learning_rate": 0.00018499664274209993, "loss": 1.5282, "step": 3321 }, { "epoch": 0.5339762909383162, "grad_norm": 0.8318758010864258, "learning_rate": 0.0001849877588699713, "loss": 1.585, "step": 3322 }, { "epoch": 0.534137030339562, "grad_norm": 0.7232612371444702, "learning_rate": 0.0001849788725818781, "loss": 1.437, "step": 3323 }, { "epoch": 0.5342977697408077, "grad_norm": 0.7981652021408081, "learning_rate": 0.00018496998387807298, "loss": 1.3855, "step": 3324 }, { "epoch": 0.5344585091420534, "grad_norm": 0.743877112865448, "learning_rate": 0.00018496109275880864, "loss": 1.3545, "step": 3325 }, { "epoch": 0.5346192485432992, "grad_norm": 0.6819697022438049, "learning_rate": 0.00018495219922433782, "loss": 1.3887, "step": 3326 }, { "epoch": 0.534779987944545, "grad_norm": 0.7201501131057739, "learning_rate": 0.00018494330327491331, "loss": 1.3843, "step": 3327 }, { "epoch": 0.5349407273457907, "grad_norm": 0.8287776112556458, "learning_rate": 0.00018493440491078805, "loss": 1.3751, "step": 3328 }, { "epoch": 0.5351014667470364, "grad_norm": 0.8437519073486328, "learning_rate": 0.00018492550413221496, "loss": 1.5746, "step": 3329 }, { "epoch": 0.5352622061482821, "grad_norm": 0.7958610653877258, "learning_rate": 0.00018491660093944705, "loss": 1.5523, "step": 3330 }, { "epoch": 0.5354229455495279, "grad_norm": 0.8160830736160278, "learning_rate": 0.00018490769533273746, "loss": 1.5238, "step": 3331 }, { "epoch": 0.5355836849507736, "grad_norm": 0.8264061212539673, "learning_rate": 0.0001848987873123393, "loss": 1.5469, "step": 3332 }, { "epoch": 0.5357444243520193, "grad_norm": 0.7421874403953552, "learning_rate": 0.00018488987687850585, "loss": 1.3911, "step": 3333 }, { "epoch": 0.535905163753265, "grad_norm": 0.882710337638855, "learning_rate": 0.00018488096403149038, "loss": 1.5791, "step": 3334 }, { "epoch": 0.5360659031545107, "grad_norm": 0.708991289138794, "learning_rate": 0.00018487204877154625, "loss": 1.4892, "step": 3335 }, { "epoch": 0.5362266425557565, "grad_norm": 0.8283132910728455, "learning_rate": 0.00018486313109892692, "loss": 1.4233, "step": 3336 }, { "epoch": 0.5363873819570022, "grad_norm": 0.7657718658447266, "learning_rate": 0.00018485421101388586, "loss": 1.3739, "step": 3337 }, { "epoch": 0.5365481213582479, "grad_norm": 0.8362711071968079, "learning_rate": 0.0001848452885166767, "loss": 1.5645, "step": 3338 }, { "epoch": 0.5367088607594936, "grad_norm": 0.8162758946418762, "learning_rate": 0.00018483636360755302, "loss": 1.4889, "step": 3339 }, { "epoch": 0.5368696001607394, "grad_norm": 0.8083055019378662, "learning_rate": 0.00018482743628676855, "loss": 1.5101, "step": 3340 }, { "epoch": 0.5370303395619851, "grad_norm": 0.7159759402275085, "learning_rate": 0.0001848185065545771, "loss": 1.2628, "step": 3341 }, { "epoch": 0.5371910789632308, "grad_norm": 0.8316575884819031, "learning_rate": 0.00018480957441123244, "loss": 1.4327, "step": 3342 }, { "epoch": 0.5373518183644765, "grad_norm": 0.8548365831375122, "learning_rate": 0.00018480063985698862, "loss": 1.5206, "step": 3343 }, { "epoch": 0.5375125577657224, "grad_norm": 0.8024420738220215, "learning_rate": 0.0001847917028920995, "loss": 1.4826, "step": 3344 }, { "epoch": 0.5376732971669681, "grad_norm": 0.8056092858314514, "learning_rate": 0.00018478276351681917, "loss": 1.6029, "step": 3345 }, { "epoch": 0.5378340365682138, "grad_norm": 0.7622305154800415, "learning_rate": 0.00018477382173140176, "loss": 1.4532, "step": 3346 }, { "epoch": 0.5379947759694595, "grad_norm": 0.7563185095787048, "learning_rate": 0.00018476487753610145, "loss": 1.4579, "step": 3347 }, { "epoch": 0.5381555153707053, "grad_norm": 0.714819073677063, "learning_rate": 0.00018475593093117255, "loss": 1.4069, "step": 3348 }, { "epoch": 0.538316254771951, "grad_norm": 0.8602470755577087, "learning_rate": 0.0001847469819168693, "loss": 1.5044, "step": 3349 }, { "epoch": 0.5384769941731967, "grad_norm": 0.8590700030326843, "learning_rate": 0.00018473803049344617, "loss": 1.4614, "step": 3350 }, { "epoch": 0.5386377335744424, "grad_norm": 0.9124669432640076, "learning_rate": 0.00018472907666115755, "loss": 1.6651, "step": 3351 }, { "epoch": 0.5387984729756882, "grad_norm": 0.8647240996360779, "learning_rate": 0.00018472012042025803, "loss": 1.4227, "step": 3352 }, { "epoch": 0.5389592123769339, "grad_norm": 0.7770558595657349, "learning_rate": 0.00018471116177100222, "loss": 1.5086, "step": 3353 }, { "epoch": 0.5391199517781796, "grad_norm": 0.7929955720901489, "learning_rate": 0.00018470220071364476, "loss": 1.3672, "step": 3354 }, { "epoch": 0.5392806911794253, "grad_norm": 0.8219648599624634, "learning_rate": 0.00018469323724844036, "loss": 1.4248, "step": 3355 }, { "epoch": 0.5394414305806711, "grad_norm": 0.8101730346679688, "learning_rate": 0.0001846842713756439, "loss": 1.4145, "step": 3356 }, { "epoch": 0.5396021699819168, "grad_norm": 0.7860954999923706, "learning_rate": 0.00018467530309551017, "loss": 1.4025, "step": 3357 }, { "epoch": 0.5397629093831625, "grad_norm": 0.7054822444915771, "learning_rate": 0.0001846663324082942, "loss": 1.2522, "step": 3358 }, { "epoch": 0.5399236487844082, "grad_norm": 0.7910926938056946, "learning_rate": 0.00018465735931425092, "loss": 1.5539, "step": 3359 }, { "epoch": 0.540084388185654, "grad_norm": 0.8198317289352417, "learning_rate": 0.00018464838381363548, "loss": 1.4939, "step": 3360 }, { "epoch": 0.5402451275868997, "grad_norm": 0.9830654859542847, "learning_rate": 0.00018463940590670298, "loss": 1.6687, "step": 3361 }, { "epoch": 0.5404058669881455, "grad_norm": 0.7338623404502869, "learning_rate": 0.00018463042559370866, "loss": 1.4941, "step": 3362 }, { "epoch": 0.5405666063893912, "grad_norm": 0.8205195069313049, "learning_rate": 0.0001846214428749078, "loss": 1.3692, "step": 3363 }, { "epoch": 0.540727345790637, "grad_norm": 0.7883062362670898, "learning_rate": 0.0001846124577505558, "loss": 1.5084, "step": 3364 }, { "epoch": 0.5408880851918827, "grad_norm": 0.8614766001701355, "learning_rate": 0.00018460347022090793, "loss": 1.5657, "step": 3365 }, { "epoch": 0.5410488245931284, "grad_norm": 0.6877971291542053, "learning_rate": 0.00018459448028621987, "loss": 1.2782, "step": 3366 }, { "epoch": 0.5412095639943741, "grad_norm": 0.8057857751846313, "learning_rate": 0.00018458548794674705, "loss": 1.2792, "step": 3367 }, { "epoch": 0.5413703033956199, "grad_norm": 0.8015730381011963, "learning_rate": 0.00018457649320274518, "loss": 1.547, "step": 3368 }, { "epoch": 0.5415310427968656, "grad_norm": 0.8146819472312927, "learning_rate": 0.0001845674960544699, "loss": 1.4916, "step": 3369 }, { "epoch": 0.5416917821981113, "grad_norm": 0.650114893913269, "learning_rate": 0.00018455849650217697, "loss": 1.2217, "step": 3370 }, { "epoch": 0.541852521599357, "grad_norm": 0.8225253224372864, "learning_rate": 0.00018454949454612226, "loss": 1.487, "step": 3371 }, { "epoch": 0.5420132610006028, "grad_norm": 0.7451386451721191, "learning_rate": 0.00018454049018656163, "loss": 1.4001, "step": 3372 }, { "epoch": 0.5421740004018485, "grad_norm": 0.8658048510551453, "learning_rate": 0.00018453148342375107, "loss": 1.3773, "step": 3373 }, { "epoch": 0.5423347398030942, "grad_norm": 0.7653456926345825, "learning_rate": 0.00018452247425794667, "loss": 1.5931, "step": 3374 }, { "epoch": 0.5424954792043399, "grad_norm": 0.7824321985244751, "learning_rate": 0.00018451346268940445, "loss": 1.7242, "step": 3375 }, { "epoch": 0.5426562186055857, "grad_norm": 0.8325675129890442, "learning_rate": 0.00018450444871838062, "loss": 1.5107, "step": 3376 }, { "epoch": 0.5428169580068314, "grad_norm": 0.8157557249069214, "learning_rate": 0.00018449543234513142, "loss": 1.4285, "step": 3377 }, { "epoch": 0.5429776974080771, "grad_norm": 0.8847950100898743, "learning_rate": 0.00018448641356991313, "loss": 1.4829, "step": 3378 }, { "epoch": 0.5431384368093228, "grad_norm": 0.856139063835144, "learning_rate": 0.0001844773923929822, "loss": 1.3546, "step": 3379 }, { "epoch": 0.5432991762105687, "grad_norm": 0.7851885557174683, "learning_rate": 0.00018446836881459504, "loss": 1.5413, "step": 3380 }, { "epoch": 0.5434599156118144, "grad_norm": 0.8247860074043274, "learning_rate": 0.00018445934283500815, "loss": 1.3986, "step": 3381 }, { "epoch": 0.5436206550130601, "grad_norm": 0.8349491357803345, "learning_rate": 0.00018445031445447812, "loss": 1.5491, "step": 3382 }, { "epoch": 0.5437813944143058, "grad_norm": 0.775696337223053, "learning_rate": 0.0001844412836732616, "loss": 1.2537, "step": 3383 }, { "epoch": 0.5439421338155516, "grad_norm": 0.7671049237251282, "learning_rate": 0.0001844322504916153, "loss": 1.3845, "step": 3384 }, { "epoch": 0.5441028732167973, "grad_norm": 0.8033993244171143, "learning_rate": 0.00018442321490979603, "loss": 1.5031, "step": 3385 }, { "epoch": 0.544263612618043, "grad_norm": 0.8441072106361389, "learning_rate": 0.00018441417692806068, "loss": 1.3995, "step": 3386 }, { "epoch": 0.5444243520192887, "grad_norm": 0.907695472240448, "learning_rate": 0.0001844051365466661, "loss": 1.1331, "step": 3387 }, { "epoch": 0.5445850914205345, "grad_norm": 0.7442054748535156, "learning_rate": 0.0001843960937658693, "loss": 1.2081, "step": 3388 }, { "epoch": 0.5447458308217802, "grad_norm": 0.8805733323097229, "learning_rate": 0.0001843870485859274, "loss": 1.8348, "step": 3389 }, { "epoch": 0.5449065702230259, "grad_norm": 0.7400468587875366, "learning_rate": 0.00018437800100709744, "loss": 1.3888, "step": 3390 }, { "epoch": 0.5450673096242716, "grad_norm": 0.7798414826393127, "learning_rate": 0.0001843689510296367, "loss": 1.5811, "step": 3391 }, { "epoch": 0.5452280490255174, "grad_norm": 0.7735156416893005, "learning_rate": 0.00018435989865380243, "loss": 1.4198, "step": 3392 }, { "epoch": 0.5453887884267631, "grad_norm": 0.7765915989875793, "learning_rate": 0.00018435084387985194, "loss": 1.3977, "step": 3393 }, { "epoch": 0.5455495278280088, "grad_norm": 0.7338336706161499, "learning_rate": 0.0001843417867080426, "loss": 1.2919, "step": 3394 }, { "epoch": 0.5457102672292545, "grad_norm": 0.69191974401474, "learning_rate": 0.0001843327271386319, "loss": 1.2431, "step": 3395 }, { "epoch": 0.5458710066305003, "grad_norm": 0.7926873564720154, "learning_rate": 0.00018432366517187745, "loss": 1.6068, "step": 3396 }, { "epoch": 0.546031746031746, "grad_norm": 0.8520327806472778, "learning_rate": 0.00018431460080803677, "loss": 1.5555, "step": 3397 }, { "epoch": 0.5461924854329918, "grad_norm": 0.8541330099105835, "learning_rate": 0.00018430553404736756, "loss": 1.5456, "step": 3398 }, { "epoch": 0.5463532248342375, "grad_norm": 0.732483983039856, "learning_rate": 0.00018429646489012759, "loss": 1.4058, "step": 3399 }, { "epoch": 0.5465139642354833, "grad_norm": 0.7677106261253357, "learning_rate": 0.00018428739333657465, "loss": 1.5872, "step": 3400 }, { "epoch": 0.5465139642354833, "eval_loss": 1.4874334335327148, "eval_runtime": 46.2584, "eval_samples_per_second": 5.426, "eval_steps_per_second": 2.724, "step": 3400 }, { "epoch": 0.546674703636729, "grad_norm": 0.8547418713569641, "learning_rate": 0.0001842783193869666, "loss": 1.5056, "step": 3401 }, { "epoch": 0.5468354430379747, "grad_norm": 0.7882863879203796, "learning_rate": 0.00018426924304156138, "loss": 1.4055, "step": 3402 }, { "epoch": 0.5469961824392204, "grad_norm": 0.782737672328949, "learning_rate": 0.00018426016430061708, "loss": 1.4642, "step": 3403 }, { "epoch": 0.5471569218404662, "grad_norm": 0.7308638095855713, "learning_rate": 0.00018425108316439174, "loss": 1.3799, "step": 3404 }, { "epoch": 0.5473176612417119, "grad_norm": 0.8885317444801331, "learning_rate": 0.00018424199963314346, "loss": 1.5583, "step": 3405 }, { "epoch": 0.5474784006429576, "grad_norm": 0.7803483009338379, "learning_rate": 0.0001842329137071305, "loss": 1.5434, "step": 3406 }, { "epoch": 0.5476391400442033, "grad_norm": 0.9327698945999146, "learning_rate": 0.00018422382538661118, "loss": 1.9708, "step": 3407 }, { "epoch": 0.5477998794454491, "grad_norm": 0.7588239908218384, "learning_rate": 0.00018421473467184382, "loss": 1.3409, "step": 3408 }, { "epoch": 0.5479606188466948, "grad_norm": 0.6719919443130493, "learning_rate": 0.00018420564156308687, "loss": 1.2773, "step": 3409 }, { "epoch": 0.5481213582479405, "grad_norm": 0.6549716591835022, "learning_rate": 0.00018419654606059878, "loss": 1.2431, "step": 3410 }, { "epoch": 0.5482820976491862, "grad_norm": 0.7733752131462097, "learning_rate": 0.00018418744816463812, "loss": 1.5152, "step": 3411 }, { "epoch": 0.548442837050432, "grad_norm": 0.7603765726089478, "learning_rate": 0.00018417834787546355, "loss": 1.3646, "step": 3412 }, { "epoch": 0.5486035764516777, "grad_norm": 0.937150776386261, "learning_rate": 0.00018416924519333377, "loss": 1.7231, "step": 3413 }, { "epoch": 0.5487643158529234, "grad_norm": 0.7216455936431885, "learning_rate": 0.00018416014011850745, "loss": 1.4371, "step": 3414 }, { "epoch": 0.5489250552541691, "grad_norm": 0.7946565747261047, "learning_rate": 0.00018415103265124354, "loss": 1.5204, "step": 3415 }, { "epoch": 0.549085794655415, "grad_norm": 0.794370710849762, "learning_rate": 0.00018414192279180087, "loss": 1.5416, "step": 3416 }, { "epoch": 0.5492465340566607, "grad_norm": 0.8013623356819153, "learning_rate": 0.00018413281054043847, "loss": 1.3143, "step": 3417 }, { "epoch": 0.5494072734579064, "grad_norm": 0.7810857892036438, "learning_rate": 0.0001841236958974153, "loss": 1.5244, "step": 3418 }, { "epoch": 0.5495680128591521, "grad_norm": 0.7712723612785339, "learning_rate": 0.0001841145788629905, "loss": 1.302, "step": 3419 }, { "epoch": 0.5497287522603979, "grad_norm": 0.8773642182350159, "learning_rate": 0.00018410545943742325, "loss": 1.6297, "step": 3420 }, { "epoch": 0.5498894916616436, "grad_norm": 0.855289101600647, "learning_rate": 0.00018409633762097276, "loss": 1.7535, "step": 3421 }, { "epoch": 0.5500502310628893, "grad_norm": 0.7963067293167114, "learning_rate": 0.00018408721341389836, "loss": 1.442, "step": 3422 }, { "epoch": 0.550210970464135, "grad_norm": 0.9864539504051208, "learning_rate": 0.00018407808681645947, "loss": 1.7095, "step": 3423 }, { "epoch": 0.5503717098653808, "grad_norm": 0.8141933083534241, "learning_rate": 0.00018406895782891543, "loss": 1.2587, "step": 3424 }, { "epoch": 0.5505324492666265, "grad_norm": 0.7481880784034729, "learning_rate": 0.00018405982645152582, "loss": 1.4753, "step": 3425 }, { "epoch": 0.5506931886678722, "grad_norm": 0.7643571496009827, "learning_rate": 0.00018405069268455024, "loss": 1.3064, "step": 3426 }, { "epoch": 0.5508539280691179, "grad_norm": 0.9446070790290833, "learning_rate": 0.00018404155652824829, "loss": 1.3712, "step": 3427 }, { "epoch": 0.5510146674703637, "grad_norm": 0.8087760806083679, "learning_rate": 0.0001840324179828797, "loss": 1.3824, "step": 3428 }, { "epoch": 0.5511754068716094, "grad_norm": 0.9354684352874756, "learning_rate": 0.00018402327704870425, "loss": 1.5243, "step": 3429 }, { "epoch": 0.5513361462728551, "grad_norm": 0.7447128891944885, "learning_rate": 0.0001840141337259818, "loss": 1.2098, "step": 3430 }, { "epoch": 0.5514968856741008, "grad_norm": 0.6258952617645264, "learning_rate": 0.00018400498801497225, "loss": 1.2759, "step": 3431 }, { "epoch": 0.5516576250753465, "grad_norm": 0.7399110794067383, "learning_rate": 0.0001839958399159356, "loss": 1.3586, "step": 3432 }, { "epoch": 0.5518183644765923, "grad_norm": 0.8511313796043396, "learning_rate": 0.00018398668942913194, "loss": 1.3737, "step": 3433 }, { "epoch": 0.5519791038778381, "grad_norm": 0.7959484457969666, "learning_rate": 0.00018397753655482134, "loss": 1.323, "step": 3434 }, { "epoch": 0.5521398432790838, "grad_norm": 0.7975229620933533, "learning_rate": 0.000183968381293264, "loss": 1.3714, "step": 3435 }, { "epoch": 0.5523005826803296, "grad_norm": 0.8922042846679688, "learning_rate": 0.0001839592236447202, "loss": 1.9249, "step": 3436 }, { "epoch": 0.5524613220815753, "grad_norm": 0.7569393515586853, "learning_rate": 0.00018395006360945025, "loss": 1.5926, "step": 3437 }, { "epoch": 0.552622061482821, "grad_norm": 0.7032526731491089, "learning_rate": 0.0001839409011877146, "loss": 1.2975, "step": 3438 }, { "epoch": 0.5527828008840667, "grad_norm": 0.8664374947547913, "learning_rate": 0.0001839317363797736, "loss": 1.5843, "step": 3439 }, { "epoch": 0.5529435402853125, "grad_norm": 0.8368168473243713, "learning_rate": 0.00018392256918588787, "loss": 1.5089, "step": 3440 }, { "epoch": 0.5531042796865582, "grad_norm": 0.849844217300415, "learning_rate": 0.00018391339960631794, "loss": 1.3327, "step": 3441 }, { "epoch": 0.5532650190878039, "grad_norm": 0.8162112236022949, "learning_rate": 0.00018390422764132453, "loss": 1.5279, "step": 3442 }, { "epoch": 0.5534257584890496, "grad_norm": 0.8311508893966675, "learning_rate": 0.00018389505329116838, "loss": 1.7123, "step": 3443 }, { "epoch": 0.5535864978902953, "grad_norm": 0.8905399441719055, "learning_rate": 0.00018388587655611028, "loss": 1.5369, "step": 3444 }, { "epoch": 0.5537472372915411, "grad_norm": 0.860458254814148, "learning_rate": 0.00018387669743641106, "loss": 1.631, "step": 3445 }, { "epoch": 0.5539079766927868, "grad_norm": 0.7295376658439636, "learning_rate": 0.00018386751593233168, "loss": 1.2728, "step": 3446 }, { "epoch": 0.5540687160940325, "grad_norm": 0.8431108593940735, "learning_rate": 0.00018385833204413317, "loss": 1.494, "step": 3447 }, { "epoch": 0.5542294554952782, "grad_norm": 0.8596949577331543, "learning_rate": 0.00018384914577207657, "loss": 1.4665, "step": 3448 }, { "epoch": 0.554390194896524, "grad_norm": 0.7842774391174316, "learning_rate": 0.00018383995711642302, "loss": 1.5249, "step": 3449 }, { "epoch": 0.5545509342977697, "grad_norm": 0.7341744303703308, "learning_rate": 0.00018383076607743376, "loss": 1.1438, "step": 3450 }, { "epoch": 0.5547116736990154, "grad_norm": 0.9056680798530579, "learning_rate": 0.00018382157265537004, "loss": 1.4604, "step": 3451 }, { "epoch": 0.5548724131002613, "grad_norm": 0.8012903332710266, "learning_rate": 0.0001838123768504932, "loss": 1.6805, "step": 3452 }, { "epoch": 0.555033152501507, "grad_norm": 0.9109938740730286, "learning_rate": 0.00018380317866306464, "loss": 1.5579, "step": 3453 }, { "epoch": 0.5551938919027527, "grad_norm": 0.8787387013435364, "learning_rate": 0.0001837939780933459, "loss": 1.555, "step": 3454 }, { "epoch": 0.5553546313039984, "grad_norm": 0.8159571290016174, "learning_rate": 0.00018378477514159846, "loss": 1.38, "step": 3455 }, { "epoch": 0.5555153707052441, "grad_norm": 0.6941602230072021, "learning_rate": 0.00018377556980808397, "loss": 1.3412, "step": 3456 }, { "epoch": 0.5556761101064899, "grad_norm": 0.9344702959060669, "learning_rate": 0.0001837663620930641, "loss": 1.6012, "step": 3457 }, { "epoch": 0.5558368495077356, "grad_norm": 0.7718198299407959, "learning_rate": 0.0001837571519968006, "loss": 1.4974, "step": 3458 }, { "epoch": 0.5559975889089813, "grad_norm": 0.719251811504364, "learning_rate": 0.00018374793951955528, "loss": 1.2737, "step": 3459 }, { "epoch": 0.556158328310227, "grad_norm": 0.7487744092941284, "learning_rate": 0.00018373872466159003, "loss": 1.4077, "step": 3460 }, { "epoch": 0.5563190677114728, "grad_norm": 0.8294851779937744, "learning_rate": 0.00018372950742316681, "loss": 1.4041, "step": 3461 }, { "epoch": 0.5564798071127185, "grad_norm": 0.9241026639938354, "learning_rate": 0.00018372028780454765, "loss": 1.5195, "step": 3462 }, { "epoch": 0.5566405465139642, "grad_norm": 0.8246049284934998, "learning_rate": 0.0001837110658059946, "loss": 1.4784, "step": 3463 }, { "epoch": 0.5568012859152099, "grad_norm": 0.8178633451461792, "learning_rate": 0.00018370184142776986, "loss": 1.3113, "step": 3464 }, { "epoch": 0.5569620253164557, "grad_norm": 0.9420003294944763, "learning_rate": 0.00018369261467013562, "loss": 1.5045, "step": 3465 }, { "epoch": 0.5571227647177014, "grad_norm": 0.7491409182548523, "learning_rate": 0.00018368338553335418, "loss": 1.3278, "step": 3466 }, { "epoch": 0.5572835041189471, "grad_norm": 0.9637048840522766, "learning_rate": 0.00018367415401768792, "loss": 1.5834, "step": 3467 }, { "epoch": 0.5574442435201928, "grad_norm": 0.8200823068618774, "learning_rate": 0.0001836649201233992, "loss": 1.5716, "step": 3468 }, { "epoch": 0.5576049829214386, "grad_norm": 0.7579705715179443, "learning_rate": 0.00018365568385075062, "loss": 1.321, "step": 3469 }, { "epoch": 0.5577657223226844, "grad_norm": 0.8488672971725464, "learning_rate": 0.00018364644520000467, "loss": 1.4687, "step": 3470 }, { "epoch": 0.5579264617239301, "grad_norm": 0.8835506439208984, "learning_rate": 0.00018363720417142397, "loss": 1.5661, "step": 3471 }, { "epoch": 0.5580872011251758, "grad_norm": 0.7685453295707703, "learning_rate": 0.00018362796076527128, "loss": 1.204, "step": 3472 }, { "epoch": 0.5582479405264216, "grad_norm": 0.9423272609710693, "learning_rate": 0.0001836187149818093, "loss": 1.5675, "step": 3473 }, { "epoch": 0.5584086799276673, "grad_norm": 0.8542371988296509, "learning_rate": 0.00018360946682130088, "loss": 1.6619, "step": 3474 }, { "epoch": 0.558569419328913, "grad_norm": 0.7364301085472107, "learning_rate": 0.00018360021628400892, "loss": 1.5459, "step": 3475 }, { "epoch": 0.5587301587301587, "grad_norm": 6.780817031860352, "learning_rate": 0.00018359096337019638, "loss": 1.4864, "step": 3476 }, { "epoch": 0.5588908981314045, "grad_norm": 0.8980083465576172, "learning_rate": 0.00018358170808012631, "loss": 1.7447, "step": 3477 }, { "epoch": 0.5590516375326502, "grad_norm": 0.6896567940711975, "learning_rate": 0.00018357245041406183, "loss": 1.2883, "step": 3478 }, { "epoch": 0.5592123769338959, "grad_norm": 0.9001600742340088, "learning_rate": 0.00018356319037226608, "loss": 1.5896, "step": 3479 }, { "epoch": 0.5593731163351416, "grad_norm": 0.7801342606544495, "learning_rate": 0.0001835539279550023, "loss": 1.4654, "step": 3480 }, { "epoch": 0.5595338557363874, "grad_norm": 0.8106282353401184, "learning_rate": 0.00018354466316253382, "loss": 1.4703, "step": 3481 }, { "epoch": 0.5596945951376331, "grad_norm": 0.6558858752250671, "learning_rate": 0.00018353539599512395, "loss": 1.2083, "step": 3482 }, { "epoch": 0.5598553345388788, "grad_norm": 0.7697267532348633, "learning_rate": 0.00018352612645303618, "loss": 1.4069, "step": 3483 }, { "epoch": 0.5600160739401245, "grad_norm": 0.8283537030220032, "learning_rate": 0.00018351685453653403, "loss": 1.4165, "step": 3484 }, { "epoch": 0.5601768133413703, "grad_norm": 0.6744006276130676, "learning_rate": 0.00018350758024588106, "loss": 1.3273, "step": 3485 }, { "epoch": 0.560337552742616, "grad_norm": 0.8205862641334534, "learning_rate": 0.0001834983035813409, "loss": 1.4465, "step": 3486 }, { "epoch": 0.5604982921438617, "grad_norm": 0.8396273851394653, "learning_rate": 0.00018348902454317726, "loss": 1.6407, "step": 3487 }, { "epoch": 0.5606590315451075, "grad_norm": 0.8097394108772278, "learning_rate": 0.0001834797431316539, "loss": 1.3629, "step": 3488 }, { "epoch": 0.5608197709463533, "grad_norm": 0.8776138424873352, "learning_rate": 0.0001834704593470347, "loss": 1.2357, "step": 3489 }, { "epoch": 0.560980510347599, "grad_norm": 0.7313289642333984, "learning_rate": 0.00018346117318958355, "loss": 1.2672, "step": 3490 }, { "epoch": 0.5611412497488447, "grad_norm": 0.7351658940315247, "learning_rate": 0.00018345188465956444, "loss": 1.2434, "step": 3491 }, { "epoch": 0.5613019891500904, "grad_norm": 0.7830203771591187, "learning_rate": 0.00018344259375724145, "loss": 1.3425, "step": 3492 }, { "epoch": 0.5614627285513362, "grad_norm": 0.8343597650527954, "learning_rate": 0.00018343330048287864, "loss": 1.6124, "step": 3493 }, { "epoch": 0.5616234679525819, "grad_norm": 0.9303722381591797, "learning_rate": 0.0001834240048367402, "loss": 1.5776, "step": 3494 }, { "epoch": 0.5617842073538276, "grad_norm": 0.7689720988273621, "learning_rate": 0.0001834147068190904, "loss": 1.3182, "step": 3495 }, { "epoch": 0.5619449467550733, "grad_norm": 0.8655688762664795, "learning_rate": 0.00018340540643019355, "loss": 1.71, "step": 3496 }, { "epoch": 0.5621056861563191, "grad_norm": 0.8428177237510681, "learning_rate": 0.000183396103670314, "loss": 1.5182, "step": 3497 }, { "epoch": 0.5622664255575648, "grad_norm": 0.704310417175293, "learning_rate": 0.00018338679853971627, "loss": 1.4134, "step": 3498 }, { "epoch": 0.5624271649588105, "grad_norm": 0.8697547912597656, "learning_rate": 0.00018337749103866484, "loss": 1.5691, "step": 3499 }, { "epoch": 0.5625879043600562, "grad_norm": 0.8562211394309998, "learning_rate": 0.00018336818116742427, "loss": 1.4765, "step": 3500 }, { "epoch": 0.562748643761302, "grad_norm": 0.7913533449172974, "learning_rate": 0.00018335886892625926, "loss": 1.1787, "step": 3501 }, { "epoch": 0.5629093831625477, "grad_norm": 0.7344976663589478, "learning_rate": 0.00018334955431543453, "loss": 1.404, "step": 3502 }, { "epoch": 0.5630701225637934, "grad_norm": 0.840094804763794, "learning_rate": 0.00018334023733521478, "loss": 1.4018, "step": 3503 }, { "epoch": 0.5632308619650391, "grad_norm": 0.7555215358734131, "learning_rate": 0.00018333091798586503, "loss": 1.3866, "step": 3504 }, { "epoch": 0.5633916013662849, "grad_norm": 0.7541857957839966, "learning_rate": 0.00018332159626765004, "loss": 1.3739, "step": 3505 }, { "epoch": 0.5635523407675307, "grad_norm": 0.82652348279953, "learning_rate": 0.0001833122721808349, "loss": 1.456, "step": 3506 }, { "epoch": 0.5637130801687764, "grad_norm": 0.7439965009689331, "learning_rate": 0.00018330294572568466, "loss": 1.4646, "step": 3507 }, { "epoch": 0.5638738195700221, "grad_norm": 0.7989190816879272, "learning_rate": 0.00018329361690246437, "loss": 1.4223, "step": 3508 }, { "epoch": 0.5640345589712679, "grad_norm": 0.9072486162185669, "learning_rate": 0.00018328428571143932, "loss": 1.2378, "step": 3509 }, { "epoch": 0.5641952983725136, "grad_norm": 0.8002077341079712, "learning_rate": 0.0001832749521528747, "loss": 1.3504, "step": 3510 }, { "epoch": 0.5643560377737593, "grad_norm": 0.771520733833313, "learning_rate": 0.00018326561622703587, "loss": 1.5216, "step": 3511 }, { "epoch": 0.564516777175005, "grad_norm": 0.8173030018806458, "learning_rate": 0.0001832562779341882, "loss": 1.5692, "step": 3512 }, { "epoch": 0.5646775165762508, "grad_norm": 0.8047986626625061, "learning_rate": 0.0001832469372745972, "loss": 1.3961, "step": 3513 }, { "epoch": 0.5648382559774965, "grad_norm": 0.8184692859649658, "learning_rate": 0.00018323759424852836, "loss": 1.4309, "step": 3514 }, { "epoch": 0.5649989953787422, "grad_norm": 0.7703588008880615, "learning_rate": 0.00018322824885624726, "loss": 1.4827, "step": 3515 }, { "epoch": 0.5651597347799879, "grad_norm": 0.9133789539337158, "learning_rate": 0.0001832189010980196, "loss": 1.623, "step": 3516 }, { "epoch": 0.5653204741812337, "grad_norm": 0.6975560784339905, "learning_rate": 0.00018320955097411108, "loss": 1.2888, "step": 3517 }, { "epoch": 0.5654812135824794, "grad_norm": 0.8293776512145996, "learning_rate": 0.00018320019848478753, "loss": 1.3558, "step": 3518 }, { "epoch": 0.5656419529837251, "grad_norm": 0.8176863789558411, "learning_rate": 0.0001831908436303148, "loss": 1.3205, "step": 3519 }, { "epoch": 0.5658026923849708, "grad_norm": 0.7486855387687683, "learning_rate": 0.00018318148641095883, "loss": 1.3347, "step": 3520 }, { "epoch": 0.5659634317862166, "grad_norm": 0.8870848417282104, "learning_rate": 0.0001831721268269856, "loss": 1.4796, "step": 3521 }, { "epoch": 0.5661241711874623, "grad_norm": 0.8714601397514343, "learning_rate": 0.00018316276487866117, "loss": 1.3692, "step": 3522 }, { "epoch": 0.5662849105887081, "grad_norm": 0.789484977722168, "learning_rate": 0.00018315340056625172, "loss": 1.4999, "step": 3523 }, { "epoch": 0.5664456499899538, "grad_norm": 0.7682178616523743, "learning_rate": 0.00018314403389002343, "loss": 1.3781, "step": 3524 }, { "epoch": 0.5666063893911996, "grad_norm": 0.76296466588974, "learning_rate": 0.00018313466485024257, "loss": 1.4181, "step": 3525 }, { "epoch": 0.5667671287924453, "grad_norm": 0.7934573888778687, "learning_rate": 0.0001831252934471754, "loss": 1.6167, "step": 3526 }, { "epoch": 0.566927868193691, "grad_norm": 0.8610479831695557, "learning_rate": 0.00018311591968108844, "loss": 1.4827, "step": 3527 }, { "epoch": 0.5670886075949367, "grad_norm": 0.8090446591377258, "learning_rate": 0.00018310654355224812, "loss": 1.574, "step": 3528 }, { "epoch": 0.5672493469961825, "grad_norm": 1.0797876119613647, "learning_rate": 0.00018309716506092097, "loss": 1.5846, "step": 3529 }, { "epoch": 0.5674100863974282, "grad_norm": 0.8803074359893799, "learning_rate": 0.0001830877842073736, "loss": 1.6595, "step": 3530 }, { "epoch": 0.5675708257986739, "grad_norm": 0.6558796763420105, "learning_rate": 0.00018307840099187264, "loss": 1.2479, "step": 3531 }, { "epoch": 0.5677315651999196, "grad_norm": 0.8579014539718628, "learning_rate": 0.00018306901541468486, "loss": 1.3684, "step": 3532 }, { "epoch": 0.5678923046011654, "grad_norm": 0.8699368238449097, "learning_rate": 0.00018305962747607708, "loss": 1.743, "step": 3533 }, { "epoch": 0.5680530440024111, "grad_norm": 0.7640133500099182, "learning_rate": 0.00018305023717631616, "loss": 1.3804, "step": 3534 }, { "epoch": 0.5682137834036568, "grad_norm": 0.7888444066047668, "learning_rate": 0.00018304084451566905, "loss": 1.3579, "step": 3535 }, { "epoch": 0.5683745228049025, "grad_norm": 0.7729140520095825, "learning_rate": 0.00018303144949440275, "loss": 1.4692, "step": 3536 }, { "epoch": 0.5685352622061483, "grad_norm": 0.9253979921340942, "learning_rate": 0.0001830220521127843, "loss": 1.6244, "step": 3537 }, { "epoch": 0.568696001607394, "grad_norm": 0.8450219631195068, "learning_rate": 0.0001830126523710809, "loss": 1.3658, "step": 3538 }, { "epoch": 0.5688567410086397, "grad_norm": 0.8708456754684448, "learning_rate": 0.00018300325026955974, "loss": 1.3416, "step": 3539 }, { "epoch": 0.5690174804098854, "grad_norm": 0.9159559607505798, "learning_rate": 0.00018299384580848805, "loss": 1.3184, "step": 3540 }, { "epoch": 0.5691782198111313, "grad_norm": 0.9485965967178345, "learning_rate": 0.00018298443898813327, "loss": 1.5163, "step": 3541 }, { "epoch": 0.569338959212377, "grad_norm": 0.7586264610290527, "learning_rate": 0.00018297502980876268, "loss": 1.2261, "step": 3542 }, { "epoch": 0.5694996986136227, "grad_norm": 0.8161360025405884, "learning_rate": 0.00018296561827064388, "loss": 1.3039, "step": 3543 }, { "epoch": 0.5696604380148684, "grad_norm": 0.7738651037216187, "learning_rate": 0.00018295620437404434, "loss": 1.4654, "step": 3544 }, { "epoch": 0.5698211774161142, "grad_norm": 0.8391285538673401, "learning_rate": 0.00018294678811923168, "loss": 1.5028, "step": 3545 }, { "epoch": 0.5699819168173599, "grad_norm": 0.8255013227462769, "learning_rate": 0.0001829373695064736, "loss": 1.2561, "step": 3546 }, { "epoch": 0.5701426562186056, "grad_norm": 0.8152461051940918, "learning_rate": 0.00018292794853603782, "loss": 1.3583, "step": 3547 }, { "epoch": 0.5703033956198513, "grad_norm": 0.8787049651145935, "learning_rate": 0.0001829185252081922, "loss": 1.4508, "step": 3548 }, { "epoch": 0.570464135021097, "grad_norm": 0.7311258912086487, "learning_rate": 0.00018290909952320457, "loss": 1.4889, "step": 3549 }, { "epoch": 0.5706248744223428, "grad_norm": 0.8246176838874817, "learning_rate": 0.00018289967148134287, "loss": 1.3151, "step": 3550 }, { "epoch": 0.5707856138235885, "grad_norm": 0.8691884279251099, "learning_rate": 0.00018289024108287513, "loss": 1.3415, "step": 3551 }, { "epoch": 0.5709463532248342, "grad_norm": 0.8864870071411133, "learning_rate": 0.00018288080832806947, "loss": 1.6408, "step": 3552 }, { "epoch": 0.57110709262608, "grad_norm": 0.8469099998474121, "learning_rate": 0.00018287137321719397, "loss": 1.4378, "step": 3553 }, { "epoch": 0.5712678320273257, "grad_norm": 0.7355675101280212, "learning_rate": 0.00018286193575051688, "loss": 1.3712, "step": 3554 }, { "epoch": 0.5714285714285714, "grad_norm": 0.8527761697769165, "learning_rate": 0.00018285249592830648, "loss": 1.4176, "step": 3555 }, { "epoch": 0.5715893108298171, "grad_norm": 0.8498595356941223, "learning_rate": 0.0001828430537508311, "loss": 1.6733, "step": 3556 }, { "epoch": 0.5717500502310628, "grad_norm": 0.7803249359130859, "learning_rate": 0.00018283360921835917, "loss": 1.2986, "step": 3557 }, { "epoch": 0.5719107896323086, "grad_norm": 0.7896379828453064, "learning_rate": 0.00018282416233115917, "loss": 1.3964, "step": 3558 }, { "epoch": 0.5720715290335544, "grad_norm": 0.7736493945121765, "learning_rate": 0.00018281471308949967, "loss": 1.3914, "step": 3559 }, { "epoch": 0.5722322684348001, "grad_norm": 0.8273385763168335, "learning_rate": 0.0001828052614936492, "loss": 1.3998, "step": 3560 }, { "epoch": 0.5723930078360459, "grad_norm": 0.8132904171943665, "learning_rate": 0.00018279580754387658, "loss": 1.5559, "step": 3561 }, { "epoch": 0.5725537472372916, "grad_norm": 0.6619194746017456, "learning_rate": 0.00018278635124045043, "loss": 1.2204, "step": 3562 }, { "epoch": 0.5727144866385373, "grad_norm": 0.8050054311752319, "learning_rate": 0.00018277689258363965, "loss": 1.506, "step": 3563 }, { "epoch": 0.572875226039783, "grad_norm": 0.8875206708908081, "learning_rate": 0.00018276743157371307, "loss": 1.5657, "step": 3564 }, { "epoch": 0.5730359654410287, "grad_norm": 0.935675859451294, "learning_rate": 0.00018275796821093967, "loss": 1.5008, "step": 3565 }, { "epoch": 0.5731967048422745, "grad_norm": 0.7848905920982361, "learning_rate": 0.00018274850249558848, "loss": 1.3902, "step": 3566 }, { "epoch": 0.5733574442435202, "grad_norm": 0.8881785869598389, "learning_rate": 0.00018273903442792854, "loss": 1.4688, "step": 3567 }, { "epoch": 0.5735181836447659, "grad_norm": 0.8983811140060425, "learning_rate": 0.00018272956400822905, "loss": 1.6156, "step": 3568 }, { "epoch": 0.5736789230460116, "grad_norm": 0.8703747391700745, "learning_rate": 0.00018272009123675918, "loss": 1.4617, "step": 3569 }, { "epoch": 0.5738396624472574, "grad_norm": 0.8837721347808838, "learning_rate": 0.00018271061611378826, "loss": 1.6641, "step": 3570 }, { "epoch": 0.5740004018485031, "grad_norm": 0.8597355484962463, "learning_rate": 0.0001827011386395856, "loss": 1.655, "step": 3571 }, { "epoch": 0.5741611412497488, "grad_norm": 0.7949037551879883, "learning_rate": 0.00018269165881442065, "loss": 1.4002, "step": 3572 }, { "epoch": 0.5743218806509945, "grad_norm": 0.7416331768035889, "learning_rate": 0.0001826821766385629, "loss": 1.1628, "step": 3573 }, { "epoch": 0.5744826200522403, "grad_norm": 0.7875040769577026, "learning_rate": 0.00018267269211228184, "loss": 1.4303, "step": 3574 }, { "epoch": 0.574643359453486, "grad_norm": 0.8152592182159424, "learning_rate": 0.00018266320523584715, "loss": 1.4172, "step": 3575 }, { "epoch": 0.5748040988547317, "grad_norm": 0.8573201894760132, "learning_rate": 0.0001826537160095285, "loss": 1.5459, "step": 3576 }, { "epoch": 0.5749648382559776, "grad_norm": 0.7887688875198364, "learning_rate": 0.00018264422443359563, "loss": 1.5199, "step": 3577 }, { "epoch": 0.5751255776572233, "grad_norm": 0.9749796390533447, "learning_rate": 0.00018263473050831837, "loss": 1.523, "step": 3578 }, { "epoch": 0.575286317058469, "grad_norm": 0.8879985809326172, "learning_rate": 0.00018262523423396662, "loss": 1.3985, "step": 3579 }, { "epoch": 0.5754470564597147, "grad_norm": 0.61961430311203, "learning_rate": 0.00018261573561081031, "loss": 1.0912, "step": 3580 }, { "epoch": 0.5756077958609604, "grad_norm": 0.8372465968132019, "learning_rate": 0.00018260623463911947, "loss": 1.4631, "step": 3581 }, { "epoch": 0.5757685352622062, "grad_norm": 0.787183940410614, "learning_rate": 0.00018259673131916417, "loss": 1.4696, "step": 3582 }, { "epoch": 0.5759292746634519, "grad_norm": 0.8781692981719971, "learning_rate": 0.00018258722565121457, "loss": 1.666, "step": 3583 }, { "epoch": 0.5760900140646976, "grad_norm": 0.745153546333313, "learning_rate": 0.0001825777176355409, "loss": 1.4375, "step": 3584 }, { "epoch": 0.5762507534659433, "grad_norm": 0.8475265502929688, "learning_rate": 0.00018256820727241346, "loss": 1.4029, "step": 3585 }, { "epoch": 0.5764114928671891, "grad_norm": 0.9090588688850403, "learning_rate": 0.00018255869456210258, "loss": 1.7783, "step": 3586 }, { "epoch": 0.5765722322684348, "grad_norm": 0.7442242503166199, "learning_rate": 0.00018254917950487868, "loss": 1.3415, "step": 3587 }, { "epoch": 0.5767329716696805, "grad_norm": 0.7352378964424133, "learning_rate": 0.00018253966210101224, "loss": 1.2056, "step": 3588 }, { "epoch": 0.5768937110709262, "grad_norm": 0.8588562607765198, "learning_rate": 0.00018253014235077383, "loss": 1.4402, "step": 3589 }, { "epoch": 0.577054450472172, "grad_norm": 0.8207905888557434, "learning_rate": 0.00018252062025443406, "loss": 1.4028, "step": 3590 }, { "epoch": 0.5772151898734177, "grad_norm": 0.6777616739273071, "learning_rate": 0.00018251109581226361, "loss": 1.1815, "step": 3591 }, { "epoch": 0.5773759292746634, "grad_norm": 0.8747861385345459, "learning_rate": 0.00018250156902453326, "loss": 1.5673, "step": 3592 }, { "epoch": 0.5775366686759091, "grad_norm": 0.6920707821846008, "learning_rate": 0.0001824920398915138, "loss": 1.3132, "step": 3593 }, { "epoch": 0.5776974080771549, "grad_norm": 0.7359331250190735, "learning_rate": 0.00018248250841347617, "loss": 1.3675, "step": 3594 }, { "epoch": 0.5778581474784007, "grad_norm": 0.8641143441200256, "learning_rate": 0.00018247297459069123, "loss": 1.5476, "step": 3595 }, { "epoch": 0.5780188868796464, "grad_norm": 0.7159335017204285, "learning_rate": 0.0001824634384234301, "loss": 1.4403, "step": 3596 }, { "epoch": 0.5781796262808921, "grad_norm": 0.7588579058647156, "learning_rate": 0.00018245389991196378, "loss": 1.4324, "step": 3597 }, { "epoch": 0.5783403656821379, "grad_norm": 0.8039872050285339, "learning_rate": 0.00018244435905656348, "loss": 1.4148, "step": 3598 }, { "epoch": 0.5785011050833836, "grad_norm": 0.79317706823349, "learning_rate": 0.0001824348158575004, "loss": 1.5543, "step": 3599 }, { "epoch": 0.5786618444846293, "grad_norm": 0.8184112906455994, "learning_rate": 0.00018242527031504585, "loss": 1.6351, "step": 3600 }, { "epoch": 0.5786618444846293, "eval_loss": 1.4872082471847534, "eval_runtime": 46.2321, "eval_samples_per_second": 5.429, "eval_steps_per_second": 2.725, "step": 3600 }, { "epoch": 0.578822583885875, "grad_norm": 0.7528217434883118, "learning_rate": 0.00018241572242947112, "loss": 1.4148, "step": 3601 }, { "epoch": 0.5789833232871208, "grad_norm": 0.8018618226051331, "learning_rate": 0.0001824061722010477, "loss": 1.2186, "step": 3602 }, { "epoch": 0.5791440626883665, "grad_norm": 0.8759437203407288, "learning_rate": 0.00018239661963004704, "loss": 1.5752, "step": 3603 }, { "epoch": 0.5793048020896122, "grad_norm": 0.7461703419685364, "learning_rate": 0.00018238706471674073, "loss": 1.6099, "step": 3604 }, { "epoch": 0.5794655414908579, "grad_norm": 0.7844128608703613, "learning_rate": 0.0001823775074614003, "loss": 1.3776, "step": 3605 }, { "epoch": 0.5796262808921037, "grad_norm": 0.9594727158546448, "learning_rate": 0.00018236794786429753, "loss": 1.4306, "step": 3606 }, { "epoch": 0.5797870202933494, "grad_norm": 0.8263266086578369, "learning_rate": 0.00018235838592570416, "loss": 1.275, "step": 3607 }, { "epoch": 0.5799477596945951, "grad_norm": 0.9031165838241577, "learning_rate": 0.00018234882164589196, "loss": 1.4176, "step": 3608 }, { "epoch": 0.5801084990958408, "grad_norm": 0.979155957698822, "learning_rate": 0.00018233925502513288, "loss": 1.6688, "step": 3609 }, { "epoch": 0.5802692384970866, "grad_norm": 0.8131352066993713, "learning_rate": 0.0001823296860636988, "loss": 1.4453, "step": 3610 }, { "epoch": 0.5804299778983323, "grad_norm": 0.7670798301696777, "learning_rate": 0.0001823201147618618, "loss": 1.3424, "step": 3611 }, { "epoch": 0.580590717299578, "grad_norm": 0.8556586503982544, "learning_rate": 0.0001823105411198939, "loss": 1.5312, "step": 3612 }, { "epoch": 0.5807514567008238, "grad_norm": 0.7436866760253906, "learning_rate": 0.00018230096513806733, "loss": 1.4551, "step": 3613 }, { "epoch": 0.5809121961020696, "grad_norm": 0.864037036895752, "learning_rate": 0.00018229138681665426, "loss": 1.7248, "step": 3614 }, { "epoch": 0.5810729355033153, "grad_norm": 0.7645609378814697, "learning_rate": 0.000182281806155927, "loss": 1.3653, "step": 3615 }, { "epoch": 0.581233674904561, "grad_norm": 0.8260241746902466, "learning_rate": 0.00018227222315615786, "loss": 1.5514, "step": 3616 }, { "epoch": 0.5813944143058067, "grad_norm": 0.7814425230026245, "learning_rate": 0.00018226263781761931, "loss": 1.5331, "step": 3617 }, { "epoch": 0.5815551537070525, "grad_norm": 0.7925345301628113, "learning_rate": 0.0001822530501405838, "loss": 1.4229, "step": 3618 }, { "epoch": 0.5817158931082982, "grad_norm": 0.8228952884674072, "learning_rate": 0.0001822434601253239, "loss": 1.5176, "step": 3619 }, { "epoch": 0.5818766325095439, "grad_norm": 0.6458499431610107, "learning_rate": 0.0001822338677721122, "loss": 1.1203, "step": 3620 }, { "epoch": 0.5820373719107896, "grad_norm": 0.6864424347877502, "learning_rate": 0.0001822242730812214, "loss": 1.228, "step": 3621 }, { "epoch": 0.5821981113120354, "grad_norm": 0.808828592300415, "learning_rate": 0.0001822146760529243, "loss": 1.5513, "step": 3622 }, { "epoch": 0.5823588507132811, "grad_norm": 0.858120322227478, "learning_rate": 0.0001822050766874936, "loss": 1.415, "step": 3623 }, { "epoch": 0.5825195901145268, "grad_norm": 0.7679414749145508, "learning_rate": 0.0001821954749852023, "loss": 1.4252, "step": 3624 }, { "epoch": 0.5826803295157725, "grad_norm": 0.8275029063224792, "learning_rate": 0.0001821858709463233, "loss": 1.4278, "step": 3625 }, { "epoch": 0.5828410689170183, "grad_norm": 0.7636759877204895, "learning_rate": 0.00018217626457112958, "loss": 1.6626, "step": 3626 }, { "epoch": 0.583001808318264, "grad_norm": 0.7782408595085144, "learning_rate": 0.00018216665585989432, "loss": 1.3827, "step": 3627 }, { "epoch": 0.5831625477195097, "grad_norm": 0.7547385692596436, "learning_rate": 0.00018215704481289055, "loss": 1.263, "step": 3628 }, { "epoch": 0.5833232871207554, "grad_norm": 0.9035071134567261, "learning_rate": 0.00018214743143039156, "loss": 1.6616, "step": 3629 }, { "epoch": 0.5834840265220012, "grad_norm": 0.8114115595817566, "learning_rate": 0.00018213781571267066, "loss": 1.2582, "step": 3630 }, { "epoch": 0.583644765923247, "grad_norm": 0.859399139881134, "learning_rate": 0.00018212819766000112, "loss": 1.4414, "step": 3631 }, { "epoch": 0.5838055053244927, "grad_norm": 0.8403969407081604, "learning_rate": 0.00018211857727265635, "loss": 1.336, "step": 3632 }, { "epoch": 0.5839662447257384, "grad_norm": 0.8457290530204773, "learning_rate": 0.00018210895455090994, "loss": 1.3768, "step": 3633 }, { "epoch": 0.5841269841269842, "grad_norm": 0.8492411971092224, "learning_rate": 0.0001820993294950353, "loss": 1.5672, "step": 3634 }, { "epoch": 0.5842877235282299, "grad_norm": 0.9314752221107483, "learning_rate": 0.00018208970210530616, "loss": 1.6484, "step": 3635 }, { "epoch": 0.5844484629294756, "grad_norm": 0.8824315071105957, "learning_rate": 0.0001820800723819961, "loss": 1.3677, "step": 3636 }, { "epoch": 0.5846092023307213, "grad_norm": 0.8490722179412842, "learning_rate": 0.00018207044032537897, "loss": 1.5372, "step": 3637 }, { "epoch": 0.5847699417319671, "grad_norm": 0.7830153703689575, "learning_rate": 0.0001820608059357285, "loss": 1.5823, "step": 3638 }, { "epoch": 0.5849306811332128, "grad_norm": 0.7777186036109924, "learning_rate": 0.00018205116921331856, "loss": 1.2204, "step": 3639 }, { "epoch": 0.5850914205344585, "grad_norm": 0.7347624897956848, "learning_rate": 0.0001820415301584232, "loss": 1.4693, "step": 3640 }, { "epoch": 0.5852521599357042, "grad_norm": 0.7492866516113281, "learning_rate": 0.00018203188877131634, "loss": 1.4546, "step": 3641 }, { "epoch": 0.58541289933695, "grad_norm": 0.8413015007972717, "learning_rate": 0.00018202224505227207, "loss": 1.4991, "step": 3642 }, { "epoch": 0.5855736387381957, "grad_norm": 0.7110021114349365, "learning_rate": 0.00018201259900156453, "loss": 1.4083, "step": 3643 }, { "epoch": 0.5857343781394414, "grad_norm": 0.7323606610298157, "learning_rate": 0.00018200295061946795, "loss": 1.2658, "step": 3644 }, { "epoch": 0.5858951175406871, "grad_norm": 0.7900603413581848, "learning_rate": 0.00018199329990625663, "loss": 1.4276, "step": 3645 }, { "epoch": 0.5860558569419329, "grad_norm": 0.8965500593185425, "learning_rate": 0.00018198364686220487, "loss": 1.7896, "step": 3646 }, { "epoch": 0.5862165963431786, "grad_norm": 0.9012687802314758, "learning_rate": 0.0001819739914875871, "loss": 1.5813, "step": 3647 }, { "epoch": 0.5863773357444243, "grad_norm": 0.9058600068092346, "learning_rate": 0.00018196433378267777, "loss": 1.5633, "step": 3648 }, { "epoch": 0.5865380751456701, "grad_norm": 0.8417165875434875, "learning_rate": 0.00018195467374775145, "loss": 1.393, "step": 3649 }, { "epoch": 0.5866988145469159, "grad_norm": 0.8096698522567749, "learning_rate": 0.00018194501138308274, "loss": 1.3462, "step": 3650 }, { "epoch": 0.5868595539481616, "grad_norm": 0.8650566935539246, "learning_rate": 0.0001819353466889463, "loss": 1.4604, "step": 3651 }, { "epoch": 0.5870202933494073, "grad_norm": 0.7909032106399536, "learning_rate": 0.00018192567966561688, "loss": 1.4289, "step": 3652 }, { "epoch": 0.587181032750653, "grad_norm": 0.789933979511261, "learning_rate": 0.0001819160103133693, "loss": 1.5194, "step": 3653 }, { "epoch": 0.5873417721518988, "grad_norm": 0.8069058060646057, "learning_rate": 0.00018190633863247847, "loss": 1.4561, "step": 3654 }, { "epoch": 0.5875025115531445, "grad_norm": 0.882792055606842, "learning_rate": 0.0001818966646232192, "loss": 1.7359, "step": 3655 }, { "epoch": 0.5876632509543902, "grad_norm": 0.8428239822387695, "learning_rate": 0.0001818869882858666, "loss": 1.3956, "step": 3656 }, { "epoch": 0.5878239903556359, "grad_norm": 0.8675400018692017, "learning_rate": 0.00018187730962069575, "loss": 1.5099, "step": 3657 }, { "epoch": 0.5879847297568817, "grad_norm": 0.7427046895027161, "learning_rate": 0.0001818676286279817, "loss": 1.3147, "step": 3658 }, { "epoch": 0.5881454691581274, "grad_norm": 0.8740839958190918, "learning_rate": 0.00018185794530799974, "loss": 1.57, "step": 3659 }, { "epoch": 0.5883062085593731, "grad_norm": 0.7777817845344543, "learning_rate": 0.0001818482596610251, "loss": 1.4949, "step": 3660 }, { "epoch": 0.5884669479606188, "grad_norm": 0.7431259751319885, "learning_rate": 0.0001818385716873331, "loss": 1.3401, "step": 3661 }, { "epoch": 0.5886276873618645, "grad_norm": 0.7448675036430359, "learning_rate": 0.00018182888138719924, "loss": 1.4713, "step": 3662 }, { "epoch": 0.5887884267631103, "grad_norm": 0.8758003115653992, "learning_rate": 0.00018181918876089884, "loss": 1.5525, "step": 3663 }, { "epoch": 0.588949166164356, "grad_norm": 0.8097133040428162, "learning_rate": 0.00018180949380870755, "loss": 1.4829, "step": 3664 }, { "epoch": 0.5891099055656017, "grad_norm": 0.7894575595855713, "learning_rate": 0.0001817997965309009, "loss": 1.4029, "step": 3665 }, { "epoch": 0.5892706449668474, "grad_norm": 0.7532247304916382, "learning_rate": 0.0001817900969277546, "loss": 1.4283, "step": 3666 }, { "epoch": 0.5894313843680933, "grad_norm": 0.7733650207519531, "learning_rate": 0.00018178039499954437, "loss": 1.4176, "step": 3667 }, { "epoch": 0.589592123769339, "grad_norm": 0.8910491466522217, "learning_rate": 0.000181770690746546, "loss": 1.4035, "step": 3668 }, { "epoch": 0.5897528631705847, "grad_norm": 0.7591543197631836, "learning_rate": 0.00018176098416903535, "loss": 1.3643, "step": 3669 }, { "epoch": 0.5899136025718305, "grad_norm": 0.7821176052093506, "learning_rate": 0.00018175127526728834, "loss": 1.4535, "step": 3670 }, { "epoch": 0.5900743419730762, "grad_norm": 0.8687539100646973, "learning_rate": 0.00018174156404158104, "loss": 1.4257, "step": 3671 }, { "epoch": 0.5902350813743219, "grad_norm": 0.7204382419586182, "learning_rate": 0.00018173185049218945, "loss": 1.2511, "step": 3672 }, { "epoch": 0.5903958207755676, "grad_norm": 0.8722280263900757, "learning_rate": 0.00018172213461938968, "loss": 1.297, "step": 3673 }, { "epoch": 0.5905565601768133, "grad_norm": 0.8667067289352417, "learning_rate": 0.000181712416423458, "loss": 1.5202, "step": 3674 }, { "epoch": 0.5907172995780591, "grad_norm": 0.8912860155105591, "learning_rate": 0.0001817026959046706, "loss": 1.4046, "step": 3675 }, { "epoch": 0.5908780389793048, "grad_norm": 0.8770840167999268, "learning_rate": 0.00018169297306330382, "loss": 1.33, "step": 3676 }, { "epoch": 0.5910387783805505, "grad_norm": 0.8713551163673401, "learning_rate": 0.0001816832478996341, "loss": 1.3679, "step": 3677 }, { "epoch": 0.5911995177817962, "grad_norm": 0.8583324551582336, "learning_rate": 0.00018167352041393783, "loss": 1.6945, "step": 3678 }, { "epoch": 0.591360257183042, "grad_norm": 0.8254200220108032, "learning_rate": 0.0001816637906064916, "loss": 1.5261, "step": 3679 }, { "epoch": 0.5915209965842877, "grad_norm": 0.8377071022987366, "learning_rate": 0.00018165405847757197, "loss": 1.3657, "step": 3680 }, { "epoch": 0.5916817359855334, "grad_norm": 0.7493349313735962, "learning_rate": 0.0001816443240274556, "loss": 1.1766, "step": 3681 }, { "epoch": 0.5918424753867791, "grad_norm": 0.7360992431640625, "learning_rate": 0.0001816345872564192, "loss": 1.2527, "step": 3682 }, { "epoch": 0.5920032147880249, "grad_norm": 0.8222139477729797, "learning_rate": 0.00018162484816473958, "loss": 1.3253, "step": 3683 }, { "epoch": 0.5921639541892706, "grad_norm": 0.8094049096107483, "learning_rate": 0.0001816151067526936, "loss": 1.4964, "step": 3684 }, { "epoch": 0.5923246935905164, "grad_norm": 0.6552943587303162, "learning_rate": 0.00018160536302055816, "loss": 1.3322, "step": 3685 }, { "epoch": 0.5924854329917622, "grad_norm": 0.9407137632369995, "learning_rate": 0.00018159561696861026, "loss": 1.5793, "step": 3686 }, { "epoch": 0.5926461723930079, "grad_norm": 0.8417989015579224, "learning_rate": 0.00018158586859712693, "loss": 1.549, "step": 3687 }, { "epoch": 0.5928069117942536, "grad_norm": 0.7849283218383789, "learning_rate": 0.00018157611790638534, "loss": 1.5159, "step": 3688 }, { "epoch": 0.5929676511954993, "grad_norm": 0.821814239025116, "learning_rate": 0.0001815663648966626, "loss": 1.6834, "step": 3689 }, { "epoch": 0.593128390596745, "grad_norm": 0.8287963271141052, "learning_rate": 0.00018155660956823604, "loss": 1.5942, "step": 3690 }, { "epoch": 0.5932891299979908, "grad_norm": 0.8353708982467651, "learning_rate": 0.0001815468519213829, "loss": 1.5027, "step": 3691 }, { "epoch": 0.5934498693992365, "grad_norm": 0.7299961447715759, "learning_rate": 0.00018153709195638064, "loss": 1.5006, "step": 3692 }, { "epoch": 0.5936106088004822, "grad_norm": 0.7976347804069519, "learning_rate": 0.00018152732967350669, "loss": 1.4962, "step": 3693 }, { "epoch": 0.5937713482017279, "grad_norm": 0.7902625799179077, "learning_rate": 0.0001815175650730385, "loss": 1.2712, "step": 3694 }, { "epoch": 0.5939320876029737, "grad_norm": 0.7606790065765381, "learning_rate": 0.00018150779815525372, "loss": 1.3739, "step": 3695 }, { "epoch": 0.5940928270042194, "grad_norm": 0.70188307762146, "learning_rate": 0.00018149802892042995, "loss": 1.3356, "step": 3696 }, { "epoch": 0.5942535664054651, "grad_norm": 0.7931273579597473, "learning_rate": 0.00018148825736884495, "loss": 1.5998, "step": 3697 }, { "epoch": 0.5944143058067108, "grad_norm": 1.0433954000473022, "learning_rate": 0.00018147848350077647, "loss": 1.5181, "step": 3698 }, { "epoch": 0.5945750452079566, "grad_norm": 0.8985570669174194, "learning_rate": 0.00018146870731650233, "loss": 1.5607, "step": 3699 }, { "epoch": 0.5947357846092023, "grad_norm": 0.7515749335289001, "learning_rate": 0.0001814589288163005, "loss": 1.2315, "step": 3700 }, { "epoch": 0.594896524010448, "grad_norm": 0.7470755577087402, "learning_rate": 0.0001814491480004489, "loss": 1.4232, "step": 3701 }, { "epoch": 0.5950572634116937, "grad_norm": 0.7948623299598694, "learning_rate": 0.0001814393648692256, "loss": 1.4657, "step": 3702 }, { "epoch": 0.5952180028129396, "grad_norm": 0.7484851479530334, "learning_rate": 0.0001814295794229087, "loss": 1.2337, "step": 3703 }, { "epoch": 0.5953787422141853, "grad_norm": 0.7571722865104675, "learning_rate": 0.00018141979166177635, "loss": 1.1797, "step": 3704 }, { "epoch": 0.595539481615431, "grad_norm": 0.8501807451248169, "learning_rate": 0.00018141000158610688, "loss": 1.6867, "step": 3705 }, { "epoch": 0.5957002210166767, "grad_norm": 0.774314284324646, "learning_rate": 0.00018140020919617846, "loss": 1.5818, "step": 3706 }, { "epoch": 0.5958609604179225, "grad_norm": 0.9612992405891418, "learning_rate": 0.00018139041449226955, "loss": 1.8204, "step": 3707 }, { "epoch": 0.5960216998191682, "grad_norm": 0.8349434733390808, "learning_rate": 0.00018138061747465856, "loss": 1.4394, "step": 3708 }, { "epoch": 0.5961824392204139, "grad_norm": 0.8350684642791748, "learning_rate": 0.000181370818143624, "loss": 1.317, "step": 3709 }, { "epoch": 0.5963431786216596, "grad_norm": 0.7730976343154907, "learning_rate": 0.00018136101649944442, "loss": 1.3897, "step": 3710 }, { "epoch": 0.5965039180229054, "grad_norm": 0.7344859838485718, "learning_rate": 0.00018135121254239843, "loss": 1.2824, "step": 3711 }, { "epoch": 0.5966646574241511, "grad_norm": 0.7821740508079529, "learning_rate": 0.00018134140627276484, "loss": 1.2432, "step": 3712 }, { "epoch": 0.5968253968253968, "grad_norm": 0.7955597639083862, "learning_rate": 0.00018133159769082225, "loss": 1.3529, "step": 3713 }, { "epoch": 0.5969861362266425, "grad_norm": 0.8677069544792175, "learning_rate": 0.00018132178679684964, "loss": 1.3015, "step": 3714 }, { "epoch": 0.5971468756278883, "grad_norm": 0.8404643535614014, "learning_rate": 0.00018131197359112585, "loss": 1.6486, "step": 3715 }, { "epoch": 0.597307615029134, "grad_norm": 0.8434663414955139, "learning_rate": 0.00018130215807392983, "loss": 1.5239, "step": 3716 }, { "epoch": 0.5974683544303797, "grad_norm": 0.9369305372238159, "learning_rate": 0.00018129234024554062, "loss": 1.6832, "step": 3717 }, { "epoch": 0.5976290938316254, "grad_norm": 0.7326930165290833, "learning_rate": 0.00018128252010623727, "loss": 1.4893, "step": 3718 }, { "epoch": 0.5977898332328712, "grad_norm": 0.9300792217254639, "learning_rate": 0.000181272697656299, "loss": 1.4438, "step": 3719 }, { "epoch": 0.5979505726341169, "grad_norm": 0.9124138355255127, "learning_rate": 0.00018126287289600503, "loss": 1.6215, "step": 3720 }, { "epoch": 0.5981113120353627, "grad_norm": 0.8568860292434692, "learning_rate": 0.00018125304582563464, "loss": 1.358, "step": 3721 }, { "epoch": 0.5982720514366084, "grad_norm": 0.8504576086997986, "learning_rate": 0.00018124321644546717, "loss": 1.6332, "step": 3722 }, { "epoch": 0.5984327908378542, "grad_norm": 0.8040387630462646, "learning_rate": 0.00018123338475578202, "loss": 1.3837, "step": 3723 }, { "epoch": 0.5985935302390999, "grad_norm": 0.7997492551803589, "learning_rate": 0.00018122355075685876, "loss": 1.3742, "step": 3724 }, { "epoch": 0.5987542696403456, "grad_norm": 0.7933949828147888, "learning_rate": 0.00018121371444897687, "loss": 1.5881, "step": 3725 }, { "epoch": 0.5989150090415913, "grad_norm": 0.8566396832466125, "learning_rate": 0.00018120387583241596, "loss": 1.5905, "step": 3726 }, { "epoch": 0.5990757484428371, "grad_norm": 0.7068170309066772, "learning_rate": 0.00018119403490745578, "loss": 1.1968, "step": 3727 }, { "epoch": 0.5992364878440828, "grad_norm": 0.7053247094154358, "learning_rate": 0.000181184191674376, "loss": 1.3447, "step": 3728 }, { "epoch": 0.5993972272453285, "grad_norm": 0.7613917589187622, "learning_rate": 0.00018117434613345652, "loss": 1.3665, "step": 3729 }, { "epoch": 0.5995579666465742, "grad_norm": 0.8570252656936646, "learning_rate": 0.00018116449828497718, "loss": 1.5134, "step": 3730 }, { "epoch": 0.59971870604782, "grad_norm": 0.8509315848350525, "learning_rate": 0.00018115464812921793, "loss": 1.4006, "step": 3731 }, { "epoch": 0.5998794454490657, "grad_norm": 0.898302435874939, "learning_rate": 0.00018114479566645877, "loss": 1.6407, "step": 3732 }, { "epoch": 0.6000401848503114, "grad_norm": 0.8953514695167542, "learning_rate": 0.0001811349408969798, "loss": 1.5149, "step": 3733 }, { "epoch": 0.6002009242515571, "grad_norm": 0.753764808177948, "learning_rate": 0.00018112508382106115, "loss": 1.4518, "step": 3734 }, { "epoch": 0.6003616636528029, "grad_norm": 0.839428186416626, "learning_rate": 0.000181115224438983, "loss": 1.8347, "step": 3735 }, { "epoch": 0.6005224030540486, "grad_norm": 0.807718813419342, "learning_rate": 0.0001811053627510257, "loss": 1.326, "step": 3736 }, { "epoch": 0.6006831424552943, "grad_norm": 0.8619174361228943, "learning_rate": 0.00018109549875746953, "loss": 1.4947, "step": 3737 }, { "epoch": 0.60084388185654, "grad_norm": 0.8958014845848083, "learning_rate": 0.0001810856324585949, "loss": 1.5178, "step": 3738 }, { "epoch": 0.6010046212577859, "grad_norm": 0.7751129865646362, "learning_rate": 0.00018107576385468233, "loss": 1.3066, "step": 3739 }, { "epoch": 0.6011653606590316, "grad_norm": 0.773658275604248, "learning_rate": 0.00018106589294601227, "loss": 1.3381, "step": 3740 }, { "epoch": 0.6013261000602773, "grad_norm": 0.9120597839355469, "learning_rate": 0.0001810560197328654, "loss": 1.6399, "step": 3741 }, { "epoch": 0.601486839461523, "grad_norm": 0.7968156933784485, "learning_rate": 0.0001810461442155224, "loss": 1.5155, "step": 3742 }, { "epoch": 0.6016475788627688, "grad_norm": 0.9005950689315796, "learning_rate": 0.0001810362663942639, "loss": 1.5162, "step": 3743 }, { "epoch": 0.6018083182640145, "grad_norm": 0.7130471467971802, "learning_rate": 0.0001810263862693708, "loss": 1.3079, "step": 3744 }, { "epoch": 0.6019690576652602, "grad_norm": 0.7735755443572998, "learning_rate": 0.00018101650384112392, "loss": 1.3289, "step": 3745 }, { "epoch": 0.6021297970665059, "grad_norm": 0.8187049627304077, "learning_rate": 0.0001810066191098042, "loss": 1.3108, "step": 3746 }, { "epoch": 0.6022905364677517, "grad_norm": 0.8278025984764099, "learning_rate": 0.00018099673207569263, "loss": 1.4337, "step": 3747 }, { "epoch": 0.6024512758689974, "grad_norm": 0.9100396633148193, "learning_rate": 0.00018098684273907026, "loss": 1.6135, "step": 3748 }, { "epoch": 0.6026120152702431, "grad_norm": 0.7987372279167175, "learning_rate": 0.0001809769511002182, "loss": 1.2536, "step": 3749 }, { "epoch": 0.6027727546714888, "grad_norm": 1.1697989702224731, "learning_rate": 0.00018096705715941775, "loss": 1.6774, "step": 3750 }, { "epoch": 0.6029334940727346, "grad_norm": 0.8988845348358154, "learning_rate": 0.00018095716091695002, "loss": 1.4912, "step": 3751 }, { "epoch": 0.6030942334739803, "grad_norm": 0.8764055371284485, "learning_rate": 0.00018094726237309646, "loss": 1.5176, "step": 3752 }, { "epoch": 0.603254972875226, "grad_norm": 0.8616553544998169, "learning_rate": 0.00018093736152813836, "loss": 1.425, "step": 3753 }, { "epoch": 0.6034157122764717, "grad_norm": 0.7865896224975586, "learning_rate": 0.00018092745838235722, "loss": 1.5219, "step": 3754 }, { "epoch": 0.6035764516777175, "grad_norm": 0.7704793214797974, "learning_rate": 0.00018091755293603455, "loss": 1.4627, "step": 3755 }, { "epoch": 0.6037371910789632, "grad_norm": 0.8521960377693176, "learning_rate": 0.00018090764518945195, "loss": 1.5079, "step": 3756 }, { "epoch": 0.603897930480209, "grad_norm": 0.9701086282730103, "learning_rate": 0.00018089773514289102, "loss": 1.5404, "step": 3757 }, { "epoch": 0.6040586698814547, "grad_norm": 1.0885487794876099, "learning_rate": 0.00018088782279663352, "loss": 1.1888, "step": 3758 }, { "epoch": 0.6042194092827005, "grad_norm": 0.8525895476341248, "learning_rate": 0.00018087790815096125, "loss": 1.531, "step": 3759 }, { "epoch": 0.6043801486839462, "grad_norm": 0.7228007316589355, "learning_rate": 0.00018086799120615597, "loss": 1.206, "step": 3760 }, { "epoch": 0.6045408880851919, "grad_norm": 0.7948485612869263, "learning_rate": 0.0001808580719624997, "loss": 1.5672, "step": 3761 }, { "epoch": 0.6047016274864376, "grad_norm": 0.8333085775375366, "learning_rate": 0.00018084815042027435, "loss": 1.485, "step": 3762 }, { "epoch": 0.6048623668876834, "grad_norm": 0.7149414420127869, "learning_rate": 0.00018083822657976196, "loss": 1.3211, "step": 3763 }, { "epoch": 0.6050231062889291, "grad_norm": 0.7888516783714294, "learning_rate": 0.00018082830044124465, "loss": 1.3568, "step": 3764 }, { "epoch": 0.6051838456901748, "grad_norm": 0.807858943939209, "learning_rate": 0.0001808183720050046, "loss": 1.3067, "step": 3765 }, { "epoch": 0.6053445850914205, "grad_norm": 0.8449767827987671, "learning_rate": 0.00018080844127132405, "loss": 1.3851, "step": 3766 }, { "epoch": 0.6055053244926663, "grad_norm": 0.921038031578064, "learning_rate": 0.0001807985082404853, "loss": 1.4452, "step": 3767 }, { "epoch": 0.605666063893912, "grad_norm": 0.932400643825531, "learning_rate": 0.0001807885729127707, "loss": 1.6684, "step": 3768 }, { "epoch": 0.6058268032951577, "grad_norm": 0.7853494882583618, "learning_rate": 0.00018077863528846271, "loss": 1.6653, "step": 3769 }, { "epoch": 0.6059875426964034, "grad_norm": 0.8361904621124268, "learning_rate": 0.0001807686953678438, "loss": 1.2109, "step": 3770 }, { "epoch": 0.6061482820976491, "grad_norm": 0.9145835041999817, "learning_rate": 0.00018075875315119654, "loss": 1.2192, "step": 3771 }, { "epoch": 0.6063090214988949, "grad_norm": 0.9017598032951355, "learning_rate": 0.0001807488086388036, "loss": 1.4493, "step": 3772 }, { "epoch": 0.6064697609001406, "grad_norm": 0.768317699432373, "learning_rate": 0.00018073886183094762, "loss": 1.5136, "step": 3773 }, { "epoch": 0.6066305003013864, "grad_norm": 0.7461726069450378, "learning_rate": 0.00018072891272791138, "loss": 1.2408, "step": 3774 }, { "epoch": 0.6067912397026322, "grad_norm": 0.9705711603164673, "learning_rate": 0.00018071896132997774, "loss": 1.4389, "step": 3775 }, { "epoch": 0.6069519791038779, "grad_norm": 0.9792290329933167, "learning_rate": 0.00018070900763742954, "loss": 1.8006, "step": 3776 }, { "epoch": 0.6071127185051236, "grad_norm": 0.76834636926651, "learning_rate": 0.00018069905165054974, "loss": 1.4672, "step": 3777 }, { "epoch": 0.6072734579063693, "grad_norm": 0.8297131061553955, "learning_rate": 0.00018068909336962138, "loss": 1.5131, "step": 3778 }, { "epoch": 0.607434197307615, "grad_norm": 0.8736981749534607, "learning_rate": 0.00018067913279492756, "loss": 1.5907, "step": 3779 }, { "epoch": 0.6075949367088608, "grad_norm": 0.7620412111282349, "learning_rate": 0.0001806691699267514, "loss": 1.3978, "step": 3780 }, { "epoch": 0.6077556761101065, "grad_norm": 0.7704584002494812, "learning_rate": 0.00018065920476537616, "loss": 1.4127, "step": 3781 }, { "epoch": 0.6079164155113522, "grad_norm": 0.7565736770629883, "learning_rate": 0.00018064923731108505, "loss": 1.3097, "step": 3782 }, { "epoch": 0.608077154912598, "grad_norm": 0.7868773937225342, "learning_rate": 0.0001806392675641615, "loss": 1.461, "step": 3783 }, { "epoch": 0.6082378943138437, "grad_norm": 0.8495548963546753, "learning_rate": 0.00018062929552488884, "loss": 1.2411, "step": 3784 }, { "epoch": 0.6083986337150894, "grad_norm": 0.7776492238044739, "learning_rate": 0.0001806193211935506, "loss": 1.4262, "step": 3785 }, { "epoch": 0.6085593731163351, "grad_norm": 0.8739528059959412, "learning_rate": 0.0001806093445704303, "loss": 1.486, "step": 3786 }, { "epoch": 0.6087201125175808, "grad_norm": 0.8195846676826477, "learning_rate": 0.0001805993656558116, "loss": 1.2955, "step": 3787 }, { "epoch": 0.6088808519188266, "grad_norm": 0.8094857335090637, "learning_rate": 0.0001805893844499781, "loss": 1.5479, "step": 3788 }, { "epoch": 0.6090415913200723, "grad_norm": 0.8348665833473206, "learning_rate": 0.00018057940095321358, "loss": 1.5388, "step": 3789 }, { "epoch": 0.609202330721318, "grad_norm": 0.8143827319145203, "learning_rate": 0.0001805694151658018, "loss": 1.4557, "step": 3790 }, { "epoch": 0.6093630701225637, "grad_norm": 0.8822208046913147, "learning_rate": 0.00018055942708802668, "loss": 1.6374, "step": 3791 }, { "epoch": 0.6095238095238096, "grad_norm": 0.7860540151596069, "learning_rate": 0.00018054943672017213, "loss": 1.318, "step": 3792 }, { "epoch": 0.6096845489250553, "grad_norm": 0.8239076733589172, "learning_rate": 0.00018053944406252215, "loss": 1.4162, "step": 3793 }, { "epoch": 0.609845288326301, "grad_norm": 0.8407960534095764, "learning_rate": 0.0001805294491153608, "loss": 1.4742, "step": 3794 }, { "epoch": 0.6100060277275468, "grad_norm": 0.7489108443260193, "learning_rate": 0.00018051945187897223, "loss": 1.2645, "step": 3795 }, { "epoch": 0.6101667671287925, "grad_norm": 0.9327155351638794, "learning_rate": 0.0001805094523536406, "loss": 1.5033, "step": 3796 }, { "epoch": 0.6103275065300382, "grad_norm": 0.7914235591888428, "learning_rate": 0.0001804994505396502, "loss": 1.5223, "step": 3797 }, { "epoch": 0.6104882459312839, "grad_norm": 0.8573668599128723, "learning_rate": 0.00018048944643728532, "loss": 1.3934, "step": 3798 }, { "epoch": 0.6106489853325296, "grad_norm": 0.7822215557098389, "learning_rate": 0.00018047944004683037, "loss": 1.4106, "step": 3799 }, { "epoch": 0.6108097247337754, "grad_norm": 0.7630478143692017, "learning_rate": 0.0001804694313685698, "loss": 1.2981, "step": 3800 }, { "epoch": 0.6108097247337754, "eval_loss": 1.4838331937789917, "eval_runtime": 46.2589, "eval_samples_per_second": 5.426, "eval_steps_per_second": 2.724, "step": 3800 } ], "logging_steps": 1, "max_steps": 18663, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.634850546598543e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }