{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1545, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006472491909385113, "grad_norm": 131.13400268554688, "learning_rate": 1.0000000000000002e-06, "loss": 19.7936, "step": 1 }, { "epoch": 0.0012944983818770227, "grad_norm": 134.03347778320312, "learning_rate": 2.0000000000000003e-06, "loss": 19.9265, "step": 2 }, { "epoch": 0.001941747572815534, "grad_norm": 135.04904174804688, "learning_rate": 3e-06, "loss": 19.6922, "step": 3 }, { "epoch": 0.0025889967637540453, "grad_norm": 133.32534790039062, "learning_rate": 4.000000000000001e-06, "loss": 19.9163, "step": 4 }, { "epoch": 0.003236245954692557, "grad_norm": 133.73779296875, "learning_rate": 5e-06, "loss": 19.5099, "step": 5 }, { "epoch": 0.003883495145631068, "grad_norm": 136.7259521484375, "learning_rate": 6e-06, "loss": 19.9571, "step": 6 }, { "epoch": 0.004530744336569579, "grad_norm": 135.7963104248047, "learning_rate": 7.000000000000001e-06, "loss": 19.6701, "step": 7 }, { "epoch": 0.005177993527508091, "grad_norm": 144.41856384277344, "learning_rate": 8.000000000000001e-06, "loss": 19.5384, "step": 8 }, { "epoch": 0.005825242718446602, "grad_norm": 137.59848022460938, "learning_rate": 9e-06, "loss": 19.4975, "step": 9 }, { "epoch": 0.006472491909385114, "grad_norm": 137.11566162109375, "learning_rate": 1e-05, "loss": 19.1857, "step": 10 }, { "epoch": 0.007119741100323625, "grad_norm": 140.35537719726562, "learning_rate": 1.1000000000000001e-05, "loss": 17.4217, "step": 11 }, { "epoch": 0.007766990291262136, "grad_norm": 136.8153533935547, "learning_rate": 1.2e-05, "loss": 16.8234, "step": 12 }, { "epoch": 0.008414239482200648, "grad_norm": 133.3092041015625, "learning_rate": 1.3000000000000001e-05, "loss": 15.1626, "step": 13 }, { "epoch": 0.009061488673139158, "grad_norm": 129.0071258544922, "learning_rate": 1.4000000000000001e-05, "loss": 14.1965, "step": 14 }, { "epoch": 0.009708737864077669, "grad_norm": 126.3600082397461, "learning_rate": 1.5e-05, "loss": 12.8533, "step": 15 }, { "epoch": 0.010355987055016181, "grad_norm": 124.45155334472656, "learning_rate": 1.6000000000000003e-05, "loss": 11.6695, "step": 16 }, { "epoch": 0.011003236245954692, "grad_norm": 121.65681457519531, "learning_rate": 1.7000000000000003e-05, "loss": 10.6721, "step": 17 }, { "epoch": 0.011650485436893204, "grad_norm": 129.11764526367188, "learning_rate": 1.8e-05, "loss": 9.8295, "step": 18 }, { "epoch": 0.012297734627831715, "grad_norm": 130.87242126464844, "learning_rate": 1.9e-05, "loss": 9.0234, "step": 19 }, { "epoch": 0.012944983818770227, "grad_norm": 132.683349609375, "learning_rate": 2e-05, "loss": 7.5961, "step": 20 }, { "epoch": 0.013592233009708738, "grad_norm": 125.61486053466797, "learning_rate": 2.1e-05, "loss": 6.1994, "step": 21 }, { "epoch": 0.01423948220064725, "grad_norm": 110.60992431640625, "learning_rate": 2.2000000000000003e-05, "loss": 4.9317, "step": 22 }, { "epoch": 0.01488673139158576, "grad_norm": 104.66587829589844, "learning_rate": 2.3000000000000003e-05, "loss": 3.7229, "step": 23 }, { "epoch": 0.015533980582524271, "grad_norm": 100.7620620727539, "learning_rate": 2.4e-05, "loss": 2.6222, "step": 24 }, { "epoch": 0.016181229773462782, "grad_norm": 89.32363891601562, "learning_rate": 2.5e-05, "loss": 1.7139, "step": 25 }, { "epoch": 0.016828478964401296, "grad_norm": 76.68353271484375, "learning_rate": 2.6000000000000002e-05, "loss": 1.2252, "step": 26 }, { "epoch": 0.017475728155339806, "grad_norm": 65.069580078125, "learning_rate": 2.7000000000000002e-05, "loss": 0.8928, "step": 27 }, { "epoch": 0.018122977346278317, "grad_norm": 60.632843017578125, "learning_rate": 2.8000000000000003e-05, "loss": 0.7026, "step": 28 }, { "epoch": 0.018770226537216828, "grad_norm": 55.2016487121582, "learning_rate": 2.9e-05, "loss": 0.473, "step": 29 }, { "epoch": 0.019417475728155338, "grad_norm": 39.60809326171875, "learning_rate": 3e-05, "loss": 0.2482, "step": 30 }, { "epoch": 0.020064724919093852, "grad_norm": 22.28648567199707, "learning_rate": 3.1e-05, "loss": 0.1241, "step": 31 }, { "epoch": 0.020711974110032363, "grad_norm": 13.423832893371582, "learning_rate": 3.2000000000000005e-05, "loss": 0.0634, "step": 32 }, { "epoch": 0.021359223300970873, "grad_norm": 3.592240810394287, "learning_rate": 3.3e-05, "loss": 0.0174, "step": 33 }, { "epoch": 0.022006472491909384, "grad_norm": 2.819695472717285, "learning_rate": 3.4000000000000007e-05, "loss": 0.0127, "step": 34 }, { "epoch": 0.022653721682847898, "grad_norm": 1.1562986373901367, "learning_rate": 3.5e-05, "loss": 0.0056, "step": 35 }, { "epoch": 0.02330097087378641, "grad_norm": 0.2208443284034729, "learning_rate": 3.6e-05, "loss": 0.0013, "step": 36 }, { "epoch": 0.02394822006472492, "grad_norm": 0.0697128102183342, "learning_rate": 3.7e-05, "loss": 0.0005, "step": 37 }, { "epoch": 0.02459546925566343, "grad_norm": 0.04032439365983009, "learning_rate": 3.8e-05, "loss": 0.0003, "step": 38 }, { "epoch": 0.02524271844660194, "grad_norm": 0.02219025231897831, "learning_rate": 3.9000000000000006e-05, "loss": 0.0001, "step": 39 }, { "epoch": 0.025889967637540454, "grad_norm": 0.039339829236269, "learning_rate": 4e-05, "loss": 0.0002, "step": 40 }, { "epoch": 0.026537216828478965, "grad_norm": 0.019809404388070107, "learning_rate": 4.1e-05, "loss": 0.0001, "step": 41 }, { "epoch": 0.027184466019417475, "grad_norm": 1.500696063041687, "learning_rate": 4.2e-05, "loss": 0.005, "step": 42 }, { "epoch": 0.027831715210355986, "grad_norm": 0.0020276184659451246, "learning_rate": 4.3e-05, "loss": 0.0, "step": 43 }, { "epoch": 0.0284789644012945, "grad_norm": 0.0019223797135055065, "learning_rate": 4.4000000000000006e-05, "loss": 0.0, "step": 44 }, { "epoch": 0.02912621359223301, "grad_norm": 0.0024162745103240013, "learning_rate": 4.5e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.02977346278317152, "grad_norm": 0.002317259320989251, "learning_rate": 4.600000000000001e-05, "loss": 0.0, "step": 46 }, { "epoch": 0.030420711974110032, "grad_norm": 0.002440544543787837, "learning_rate": 4.7e-05, "loss": 0.0, "step": 47 }, { "epoch": 0.031067961165048542, "grad_norm": 0.0025529128033667803, "learning_rate": 4.8e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.03171521035598705, "grad_norm": 0.0027895700186491013, "learning_rate": 4.9e-05, "loss": 0.0, "step": 49 }, { "epoch": 0.032362459546925564, "grad_norm": 0.0028908746317029, "learning_rate": 5e-05, "loss": 0.0, "step": 50 }, { "epoch": 0.03300970873786408, "grad_norm": 0.0015969317173585296, "learning_rate": 5.1000000000000006e-05, "loss": 0.0, "step": 51 }, { "epoch": 0.03365695792880259, "grad_norm": 0.0016695812810212374, "learning_rate": 5.2000000000000004e-05, "loss": 0.0, "step": 52 }, { "epoch": 0.0343042071197411, "grad_norm": 0.001670464058406651, "learning_rate": 5.300000000000001e-05, "loss": 0.0, "step": 53 }, { "epoch": 0.03495145631067961, "grad_norm": 0.0014726085355505347, "learning_rate": 5.4000000000000005e-05, "loss": 0.0, "step": 54 }, { "epoch": 0.03559870550161812, "grad_norm": 0.0015096040442585945, "learning_rate": 5.500000000000001e-05, "loss": 0.0, "step": 55 }, { "epoch": 0.036245954692556634, "grad_norm": 0.0015506166964769363, "learning_rate": 5.6000000000000006e-05, "loss": 0.0, "step": 56 }, { "epoch": 0.036893203883495145, "grad_norm": 0.0015059434808790684, "learning_rate": 5.6999999999999996e-05, "loss": 0.0, "step": 57 }, { "epoch": 0.037540453074433655, "grad_norm": 0.0015852790093049407, "learning_rate": 5.8e-05, "loss": 0.0, "step": 58 }, { "epoch": 0.038187702265372166, "grad_norm": 0.0014398024650290608, "learning_rate": 5.9e-05, "loss": 0.0, "step": 59 }, { "epoch": 0.038834951456310676, "grad_norm": 0.0017769384430721402, "learning_rate": 6e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.039482200647249194, "grad_norm": 0.0019574002362787724, "learning_rate": 6.1e-05, "loss": 0.0, "step": 61 }, { "epoch": 0.040129449838187704, "grad_norm": 0.0019588808063417673, "learning_rate": 6.2e-05, "loss": 0.0, "step": 62 }, { "epoch": 0.040776699029126215, "grad_norm": 0.0017840194050222635, "learning_rate": 6.3e-05, "loss": 0.0, "step": 63 }, { "epoch": 0.041423948220064725, "grad_norm": 0.001379096764139831, "learning_rate": 6.400000000000001e-05, "loss": 0.0, "step": 64 }, { "epoch": 0.042071197411003236, "grad_norm": 0.0015804730355739594, "learning_rate": 6.500000000000001e-05, "loss": 0.0, "step": 65 }, { "epoch": 0.04271844660194175, "grad_norm": 0.001860379590652883, "learning_rate": 6.6e-05, "loss": 0.0, "step": 66 }, { "epoch": 0.04336569579288026, "grad_norm": 0.001473184092901647, "learning_rate": 6.7e-05, "loss": 0.0, "step": 67 }, { "epoch": 0.04401294498381877, "grad_norm": 0.001431824523024261, "learning_rate": 6.800000000000001e-05, "loss": 0.0, "step": 68 }, { "epoch": 0.04466019417475728, "grad_norm": 0.0015007014153525233, "learning_rate": 6.9e-05, "loss": 0.0, "step": 69 }, { "epoch": 0.045307443365695796, "grad_norm": 0.0013901310740038753, "learning_rate": 7e-05, "loss": 0.0, "step": 70 }, { "epoch": 0.045954692556634306, "grad_norm": 0.00129253591876477, "learning_rate": 7.1e-05, "loss": 0.0, "step": 71 }, { "epoch": 0.04660194174757282, "grad_norm": 0.001261955825611949, "learning_rate": 7.2e-05, "loss": 0.0, "step": 72 }, { "epoch": 0.04724919093851133, "grad_norm": 0.0013884572545066476, "learning_rate": 7.3e-05, "loss": 0.0, "step": 73 }, { "epoch": 0.04789644012944984, "grad_norm": 0.001199840335175395, "learning_rate": 7.4e-05, "loss": 0.0, "step": 74 }, { "epoch": 0.04854368932038835, "grad_norm": 0.0012201342033222318, "learning_rate": 7.500000000000001e-05, "loss": 0.0, "step": 75 }, { "epoch": 0.04919093851132686, "grad_norm": 0.001058132154867053, "learning_rate": 7.6e-05, "loss": 0.0, "step": 76 }, { "epoch": 0.04983818770226537, "grad_norm": 0.0009809430921450257, "learning_rate": 7.7e-05, "loss": 0.0, "step": 77 }, { "epoch": 0.05048543689320388, "grad_norm": 0.000997965456917882, "learning_rate": 7.800000000000001e-05, "loss": 0.0, "step": 78 }, { "epoch": 0.0511326860841424, "grad_norm": 0.0008985965396277606, "learning_rate": 7.900000000000001e-05, "loss": 0.0, "step": 79 }, { "epoch": 0.05177993527508091, "grad_norm": 0.000818964617792517, "learning_rate": 8e-05, "loss": 0.0, "step": 80 }, { "epoch": 0.05242718446601942, "grad_norm": 0.0007367520011030138, "learning_rate": 8.1e-05, "loss": 0.0, "step": 81 }, { "epoch": 0.05307443365695793, "grad_norm": 0.0006985078798606992, "learning_rate": 8.2e-05, "loss": 0.0, "step": 82 }, { "epoch": 0.05372168284789644, "grad_norm": 0.0006616997416131198, "learning_rate": 8.3e-05, "loss": 0.0, "step": 83 }, { "epoch": 0.05436893203883495, "grad_norm": 0.0006774890352971852, "learning_rate": 8.4e-05, "loss": 0.0, "step": 84 }, { "epoch": 0.05501618122977346, "grad_norm": 0.0006354961660690606, "learning_rate": 8.5e-05, "loss": 0.0, "step": 85 }, { "epoch": 0.05566343042071197, "grad_norm": 0.00062626349972561, "learning_rate": 8.6e-05, "loss": 0.0, "step": 86 }, { "epoch": 0.05631067961165048, "grad_norm": 0.0006652774172835052, "learning_rate": 8.7e-05, "loss": 0.0, "step": 87 }, { "epoch": 0.056957928802589, "grad_norm": 0.00057514215586707, "learning_rate": 8.800000000000001e-05, "loss": 0.0, "step": 88 }, { "epoch": 0.05760517799352751, "grad_norm": 0.0005757372709922493, "learning_rate": 8.900000000000001e-05, "loss": 0.0, "step": 89 }, { "epoch": 0.05825242718446602, "grad_norm": 0.0005509501788765192, "learning_rate": 9e-05, "loss": 0.0, "step": 90 }, { "epoch": 0.05889967637540453, "grad_norm": 0.0005754235316999257, "learning_rate": 9.1e-05, "loss": 0.0, "step": 91 }, { "epoch": 0.05954692556634304, "grad_norm": 0.0005598037387244403, "learning_rate": 9.200000000000001e-05, "loss": 0.0, "step": 92 }, { "epoch": 0.06019417475728155, "grad_norm": 0.0004931841394864023, "learning_rate": 9.300000000000001e-05, "loss": 0.0, "step": 93 }, { "epoch": 0.060841423948220064, "grad_norm": 0.0005290132248774171, "learning_rate": 9.4e-05, "loss": 0.0, "step": 94 }, { "epoch": 0.061488673139158574, "grad_norm": 0.0004422192578203976, "learning_rate": 9.5e-05, "loss": 0.0, "step": 95 }, { "epoch": 0.062135922330097085, "grad_norm": 0.00048634959966875613, "learning_rate": 9.6e-05, "loss": 0.0, "step": 96 }, { "epoch": 0.0627831715210356, "grad_norm": 0.0004640915140043944, "learning_rate": 9.7e-05, "loss": 0.0, "step": 97 }, { "epoch": 0.0634304207119741, "grad_norm": 0.0004267208860255778, "learning_rate": 9.8e-05, "loss": 0.0, "step": 98 }, { "epoch": 0.06407766990291262, "grad_norm": 0.0004712632216978818, "learning_rate": 9.900000000000001e-05, "loss": 0.0, "step": 99 }, { "epoch": 0.06472491909385113, "grad_norm": 0.0004027521354146302, "learning_rate": 0.0001, "loss": 0.0, "step": 100 }, { "epoch": 0.06537216828478964, "grad_norm": 0.0003516871656756848, "learning_rate": 9.999988183091062e-05, "loss": 0.0, "step": 101 }, { "epoch": 0.06601941747572816, "grad_norm": 0.0003226413100492209, "learning_rate": 9.999952732420107e-05, "loss": 0.0, "step": 102 }, { "epoch": 0.06666666666666667, "grad_norm": 0.0003031356318388134, "learning_rate": 9.999893648154699e-05, "loss": 0.0, "step": 103 }, { "epoch": 0.06731391585760518, "grad_norm": 0.00033037332468666136, "learning_rate": 9.999810930574118e-05, "loss": 0.0, "step": 104 }, { "epoch": 0.06796116504854369, "grad_norm": 0.0003044827317353338, "learning_rate": 9.999704580069346e-05, "loss": 0.0, "step": 105 }, { "epoch": 0.0686084142394822, "grad_norm": 0.0003044347104150802, "learning_rate": 9.999574597143082e-05, "loss": 0.0, "step": 106 }, { "epoch": 0.06925566343042071, "grad_norm": 0.00029745750362053514, "learning_rate": 9.999420982409722e-05, "loss": 0.0, "step": 107 }, { "epoch": 0.06990291262135923, "grad_norm": 0.0003329005849082023, "learning_rate": 9.999243736595365e-05, "loss": 0.0, "step": 108 }, { "epoch": 0.07055016181229773, "grad_norm": 0.0003853734233416617, "learning_rate": 9.999042860537814e-05, "loss": 0.0, "step": 109 }, { "epoch": 0.07119741100323625, "grad_norm": 0.0003464236797299236, "learning_rate": 9.99881835518656e-05, "loss": 0.0, "step": 110 }, { "epoch": 0.07184466019417475, "grad_norm": 0.00030969688668847084, "learning_rate": 9.998570221602785e-05, "loss": 0.0, "step": 111 }, { "epoch": 0.07249190938511327, "grad_norm": 0.0003167603281326592, "learning_rate": 9.998298460959364e-05, "loss": 0.0, "step": 112 }, { "epoch": 0.07313915857605179, "grad_norm": 0.0003232895105611533, "learning_rate": 9.998003074540838e-05, "loss": 0.0, "step": 113 }, { "epoch": 0.07378640776699029, "grad_norm": 0.00031525976373814046, "learning_rate": 9.997684063743433e-05, "loss": 0.0, "step": 114 }, { "epoch": 0.0744336569579288, "grad_norm": 0.00029791181441396475, "learning_rate": 9.997341430075036e-05, "loss": 0.0, "step": 115 }, { "epoch": 0.07508090614886731, "grad_norm": 0.0002939928963314742, "learning_rate": 9.996975175155196e-05, "loss": 0.0, "step": 116 }, { "epoch": 0.07572815533980583, "grad_norm": 0.0003014066896867007, "learning_rate": 9.996585300715116e-05, "loss": 0.0, "step": 117 }, { "epoch": 0.07637540453074433, "grad_norm": 0.00028381680021993816, "learning_rate": 9.996171808597634e-05, "loss": 0.0, "step": 118 }, { "epoch": 0.07702265372168285, "grad_norm": 0.00030152060207910836, "learning_rate": 9.995734700757235e-05, "loss": 0.0, "step": 119 }, { "epoch": 0.07766990291262135, "grad_norm": 0.0003169839910697192, "learning_rate": 9.995273979260022e-05, "loss": 0.0, "step": 120 }, { "epoch": 0.07831715210355987, "grad_norm": 0.00029620027635246515, "learning_rate": 9.994789646283719e-05, "loss": 0.0, "step": 121 }, { "epoch": 0.07896440129449839, "grad_norm": 0.00030353505280800164, "learning_rate": 9.99428170411765e-05, "loss": 0.0, "step": 122 }, { "epoch": 0.07961165048543689, "grad_norm": 0.0002934718213509768, "learning_rate": 9.99375015516274e-05, "loss": 0.0, "step": 123 }, { "epoch": 0.08025889967637541, "grad_norm": 0.00028418784495443106, "learning_rate": 9.993195001931493e-05, "loss": 0.0, "step": 124 }, { "epoch": 0.08090614886731391, "grad_norm": 0.0002818236534949392, "learning_rate": 9.99261624704799e-05, "loss": 0.0, "step": 125 }, { "epoch": 0.08155339805825243, "grad_norm": 0.0002681802143342793, "learning_rate": 9.992013893247866e-05, "loss": 0.0, "step": 126 }, { "epoch": 0.08220064724919093, "grad_norm": 0.00027062796289101243, "learning_rate": 9.991387943378307e-05, "loss": 0.0, "step": 127 }, { "epoch": 0.08284789644012945, "grad_norm": 0.0002684716018848121, "learning_rate": 9.990738400398027e-05, "loss": 0.0, "step": 128 }, { "epoch": 0.08349514563106795, "grad_norm": 0.00023824458185117692, "learning_rate": 9.990065267377267e-05, "loss": 0.0, "step": 129 }, { "epoch": 0.08414239482200647, "grad_norm": 0.00027047592448070645, "learning_rate": 9.989368547497763e-05, "loss": 0.0, "step": 130 }, { "epoch": 0.08478964401294499, "grad_norm": 0.00025663335691206157, "learning_rate": 9.988648244052747e-05, "loss": 0.0, "step": 131 }, { "epoch": 0.0854368932038835, "grad_norm": 0.00023156039242167026, "learning_rate": 9.987904360446922e-05, "loss": 0.0, "step": 132 }, { "epoch": 0.08608414239482201, "grad_norm": 0.0002463631972204894, "learning_rate": 9.987136900196453e-05, "loss": 0.0, "step": 133 }, { "epoch": 0.08673139158576051, "grad_norm": 0.00025282951537519693, "learning_rate": 9.986345866928941e-05, "loss": 0.0, "step": 134 }, { "epoch": 0.08737864077669903, "grad_norm": 0.0002451727050356567, "learning_rate": 9.985531264383412e-05, "loss": 0.0, "step": 135 }, { "epoch": 0.08802588996763754, "grad_norm": 0.0002607478527352214, "learning_rate": 9.984693096410303e-05, "loss": 0.0, "step": 136 }, { "epoch": 0.08867313915857605, "grad_norm": 0.00024905591271817684, "learning_rate": 9.983831366971432e-05, "loss": 0.0, "step": 137 }, { "epoch": 0.08932038834951456, "grad_norm": 0.00024017650866881013, "learning_rate": 9.982946080139992e-05, "loss": 0.0, "step": 138 }, { "epoch": 0.08996763754045307, "grad_norm": 0.00024329974257852882, "learning_rate": 9.982037240100526e-05, "loss": 0.0, "step": 139 }, { "epoch": 0.09061488673139159, "grad_norm": 0.00023055482597555965, "learning_rate": 9.981104851148904e-05, "loss": 0.0, "step": 140 }, { "epoch": 0.0912621359223301, "grad_norm": 0.00023951951880007982, "learning_rate": 9.980148917692309e-05, "loss": 0.0, "step": 141 }, { "epoch": 0.09190938511326861, "grad_norm": 0.0002374964242335409, "learning_rate": 9.979169444249212e-05, "loss": 0.0, "step": 142 }, { "epoch": 0.09255663430420712, "grad_norm": 0.00024244707310572267, "learning_rate": 9.978166435449352e-05, "loss": 0.0, "step": 143 }, { "epoch": 0.09320388349514563, "grad_norm": 0.00022375534172169864, "learning_rate": 9.977139896033716e-05, "loss": 0.0, "step": 144 }, { "epoch": 0.09385113268608414, "grad_norm": 0.00022595201153308153, "learning_rate": 9.976089830854514e-05, "loss": 0.0, "step": 145 }, { "epoch": 0.09449838187702266, "grad_norm": 0.00021144020138308406, "learning_rate": 9.975016244875151e-05, "loss": 0.0, "step": 146 }, { "epoch": 0.09514563106796116, "grad_norm": 0.00021918013226240873, "learning_rate": 9.973919143170218e-05, "loss": 0.0, "step": 147 }, { "epoch": 0.09579288025889968, "grad_norm": 0.00022214902855921537, "learning_rate": 9.972798530925456e-05, "loss": 0.0, "step": 148 }, { "epoch": 0.0964401294498382, "grad_norm": 0.00022321178403217345, "learning_rate": 9.971654413437732e-05, "loss": 0.0, "step": 149 }, { "epoch": 0.0970873786407767, "grad_norm": 0.0002452544867992401, "learning_rate": 9.97048679611502e-05, "loss": 0.0, "step": 150 }, { "epoch": 0.09773462783171522, "grad_norm": 0.0005459377425722778, "learning_rate": 9.96929568447637e-05, "loss": 0.0, "step": 151 }, { "epoch": 0.09838187702265372, "grad_norm": 0.00016863649943843484, "learning_rate": 9.968081084151884e-05, "loss": 0.0, "step": 152 }, { "epoch": 0.09902912621359224, "grad_norm": 0.00017263075278606266, "learning_rate": 9.966843000882695e-05, "loss": 0.0, "step": 153 }, { "epoch": 0.09967637540453074, "grad_norm": 0.00018214034207630903, "learning_rate": 9.965581440520925e-05, "loss": 0.0, "step": 154 }, { "epoch": 0.10032362459546926, "grad_norm": 0.00018302416719961911, "learning_rate": 9.964296409029675e-05, "loss": 0.0, "step": 155 }, { "epoch": 0.10097087378640776, "grad_norm": 0.00020016191410832107, "learning_rate": 9.962987912482984e-05, "loss": 0.0, "step": 156 }, { "epoch": 0.10161812297734628, "grad_norm": 0.00018125724454876035, "learning_rate": 9.961655957065806e-05, "loss": 0.0, "step": 157 }, { "epoch": 0.1022653721682848, "grad_norm": 0.00016165344277396798, "learning_rate": 9.960300549073978e-05, "loss": 0.0, "step": 158 }, { "epoch": 0.1029126213592233, "grad_norm": 0.00017234106780961156, "learning_rate": 9.958921694914194e-05, "loss": 0.0, "step": 159 }, { "epoch": 0.10355987055016182, "grad_norm": 0.00018339157395530492, "learning_rate": 9.957519401103972e-05, "loss": 0.0, "step": 160 }, { "epoch": 0.10420711974110032, "grad_norm": 0.00018937568529509008, "learning_rate": 9.956093674271622e-05, "loss": 0.0, "step": 161 }, { "epoch": 0.10485436893203884, "grad_norm": 0.0001959225774044171, "learning_rate": 9.95464452115622e-05, "loss": 0.0, "step": 162 }, { "epoch": 0.10550161812297734, "grad_norm": 0.0001900671049952507, "learning_rate": 9.953171948607569e-05, "loss": 0.0, "step": 163 }, { "epoch": 0.10614886731391586, "grad_norm": 0.00021061516599729657, "learning_rate": 9.95167596358617e-05, "loss": 0.0, "step": 164 }, { "epoch": 0.10679611650485436, "grad_norm": 0.00019292550859972835, "learning_rate": 9.950156573163192e-05, "loss": 0.0, "step": 165 }, { "epoch": 0.10744336569579288, "grad_norm": 0.00019005850481335074, "learning_rate": 9.948613784520434e-05, "loss": 0.0, "step": 166 }, { "epoch": 0.1080906148867314, "grad_norm": 0.00017910161113832146, "learning_rate": 9.947047604950294e-05, "loss": 0.0, "step": 167 }, { "epoch": 0.1087378640776699, "grad_norm": 0.00018016544345300645, "learning_rate": 9.94545804185573e-05, "loss": 0.0, "step": 168 }, { "epoch": 0.10938511326860842, "grad_norm": 0.0001976732019102201, "learning_rate": 9.943845102750234e-05, "loss": 0.0, "step": 169 }, { "epoch": 0.11003236245954692, "grad_norm": 0.00019848241936415434, "learning_rate": 9.942208795257786e-05, "loss": 0.0, "step": 170 }, { "epoch": 0.11067961165048544, "grad_norm": 0.00019966305990237743, "learning_rate": 9.940549127112823e-05, "loss": 0.0, "step": 171 }, { "epoch": 0.11132686084142394, "grad_norm": 0.00018405074661131948, "learning_rate": 9.93886610616021e-05, "loss": 0.0, "step": 172 }, { "epoch": 0.11197411003236246, "grad_norm": 0.00019481593335513026, "learning_rate": 9.937159740355182e-05, "loss": 0.0, "step": 173 }, { "epoch": 0.11262135922330097, "grad_norm": 0.0001841599150793627, "learning_rate": 9.93543003776333e-05, "loss": 0.0, "step": 174 }, { "epoch": 0.11326860841423948, "grad_norm": 0.00018602985073812306, "learning_rate": 9.93367700656055e-05, "loss": 0.0, "step": 175 }, { "epoch": 0.113915857605178, "grad_norm": 0.00017339715850539505, "learning_rate": 9.931900655033004e-05, "loss": 0.0, "step": 176 }, { "epoch": 0.1145631067961165, "grad_norm": 0.0001882009528344497, "learning_rate": 9.930100991577089e-05, "loss": 0.0, "step": 177 }, { "epoch": 0.11521035598705502, "grad_norm": 0.00018708783318288624, "learning_rate": 9.928278024699385e-05, "loss": 0.0, "step": 178 }, { "epoch": 0.11585760517799353, "grad_norm": 0.00017336831660941243, "learning_rate": 9.926431763016626e-05, "loss": 0.0, "step": 179 }, { "epoch": 0.11650485436893204, "grad_norm": 0.00015800120308995247, "learning_rate": 9.924562215255655e-05, "loss": 0.0, "step": 180 }, { "epoch": 0.11715210355987055, "grad_norm": 0.00017240441229660064, "learning_rate": 9.922669390253385e-05, "loss": 0.0, "step": 181 }, { "epoch": 0.11779935275080906, "grad_norm": 0.0001618278183741495, "learning_rate": 9.920753296956749e-05, "loss": 0.0, "step": 182 }, { "epoch": 0.11844660194174757, "grad_norm": 0.004332312382757664, "learning_rate": 9.918813944422667e-05, "loss": 0.0, "step": 183 }, { "epoch": 0.11909385113268608, "grad_norm": 0.0001733552635414526, "learning_rate": 9.916851341818001e-05, "loss": 0.0, "step": 184 }, { "epoch": 0.11974110032362459, "grad_norm": 0.00018002038996201009, "learning_rate": 9.91486549841951e-05, "loss": 0.0, "step": 185 }, { "epoch": 0.1203883495145631, "grad_norm": 0.00016852655971888453, "learning_rate": 9.912856423613804e-05, "loss": 0.0, "step": 186 }, { "epoch": 0.12103559870550162, "grad_norm": 0.00016074655286502093, "learning_rate": 9.910824126897308e-05, "loss": 0.0, "step": 187 }, { "epoch": 0.12168284789644013, "grad_norm": 0.00018699858628679067, "learning_rate": 9.908768617876205e-05, "loss": 0.0, "step": 188 }, { "epoch": 0.12233009708737864, "grad_norm": 0.00017236075655091554, "learning_rate": 9.906689906266401e-05, "loss": 0.0, "step": 189 }, { "epoch": 0.12297734627831715, "grad_norm": 0.0001635546504985541, "learning_rate": 9.904588001893477e-05, "loss": 0.0, "step": 190 }, { "epoch": 0.12362459546925567, "grad_norm": 0.00016175238124560565, "learning_rate": 9.902462914692633e-05, "loss": 0.0, "step": 191 }, { "epoch": 0.12427184466019417, "grad_norm": 0.19127343595027924, "learning_rate": 9.900314654708659e-05, "loss": 0.0006, "step": 192 }, { "epoch": 0.12491909385113269, "grad_norm": 0.00015970351523719728, "learning_rate": 9.898143232095869e-05, "loss": 0.0, "step": 193 }, { "epoch": 0.1255663430420712, "grad_norm": 0.00017519606626592577, "learning_rate": 9.895948657118064e-05, "loss": 0.0, "step": 194 }, { "epoch": 0.1262135922330097, "grad_norm": 0.00017720655887387693, "learning_rate": 9.893730940148482e-05, "loss": 0.0, "step": 195 }, { "epoch": 0.1268608414239482, "grad_norm": 0.0001791470276657492, "learning_rate": 9.891490091669747e-05, "loss": 0.0, "step": 196 }, { "epoch": 0.12750809061488674, "grad_norm": 0.00019451871048659086, "learning_rate": 9.88922612227382e-05, "loss": 0.0, "step": 197 }, { "epoch": 0.12815533980582525, "grad_norm": 0.0001841304765548557, "learning_rate": 9.886939042661949e-05, "loss": 0.0, "step": 198 }, { "epoch": 0.12880258899676375, "grad_norm": 0.00019261128909420222, "learning_rate": 9.884628863644617e-05, "loss": 0.0, "step": 199 }, { "epoch": 0.12944983818770225, "grad_norm": 0.00020318938186392188, "learning_rate": 9.882295596141496e-05, "loss": 0.0, "step": 200 }, { "epoch": 0.13009708737864079, "grad_norm": 0.00020783746731467545, "learning_rate": 9.87993925118139e-05, "loss": 0.0, "step": 201 }, { "epoch": 0.1307443365695793, "grad_norm": 0.00019535954925231636, "learning_rate": 9.877559839902184e-05, "loss": 0.0, "step": 202 }, { "epoch": 0.1313915857605178, "grad_norm": 0.00019633652118500322, "learning_rate": 9.875157373550792e-05, "loss": 0.0, "step": 203 }, { "epoch": 0.13203883495145632, "grad_norm": 0.00019633228657767177, "learning_rate": 9.872731863483105e-05, "loss": 0.0, "step": 204 }, { "epoch": 0.13268608414239483, "grad_norm": 0.00020233304530847818, "learning_rate": 9.870283321163934e-05, "loss": 0.0, "step": 205 }, { "epoch": 0.13333333333333333, "grad_norm": 0.00019641958351712674, "learning_rate": 9.867811758166963e-05, "loss": 0.0, "step": 206 }, { "epoch": 0.13398058252427184, "grad_norm": 0.00019531269208528101, "learning_rate": 9.865317186174683e-05, "loss": 0.0, "step": 207 }, { "epoch": 0.13462783171521037, "grad_norm": 0.00021375597862061113, "learning_rate": 9.862799616978347e-05, "loss": 0.0, "step": 208 }, { "epoch": 0.13527508090614887, "grad_norm": 0.00019547225383576006, "learning_rate": 9.86025906247791e-05, "loss": 0.0, "step": 209 }, { "epoch": 0.13592233009708737, "grad_norm": 0.00022372894454747438, "learning_rate": 9.85769553468197e-05, "loss": 0.0, "step": 210 }, { "epoch": 0.13656957928802588, "grad_norm": 0.00020901462994515896, "learning_rate": 9.85510904570772e-05, "loss": 0.0, "step": 211 }, { "epoch": 0.1372168284789644, "grad_norm": 0.00021962117170915008, "learning_rate": 9.852499607780881e-05, "loss": 0.0, "step": 212 }, { "epoch": 0.1378640776699029, "grad_norm": 0.0002420559903839603, "learning_rate": 9.849867233235647e-05, "loss": 0.0, "step": 213 }, { "epoch": 0.13851132686084142, "grad_norm": 0.00020637971465475857, "learning_rate": 9.847211934514636e-05, "loss": 0.0, "step": 214 }, { "epoch": 0.13915857605177995, "grad_norm": 0.0002189079677918926, "learning_rate": 9.844533724168809e-05, "loss": 0.0, "step": 215 }, { "epoch": 0.13980582524271845, "grad_norm": 0.00023518242232967168, "learning_rate": 9.84183261485744e-05, "loss": 0.0, "step": 216 }, { "epoch": 0.14045307443365695, "grad_norm": 0.0002103596634697169, "learning_rate": 9.839108619348029e-05, "loss": 0.0, "step": 217 }, { "epoch": 0.14110032362459546, "grad_norm": 0.00021611133706755936, "learning_rate": 9.836361750516262e-05, "loss": 0.0, "step": 218 }, { "epoch": 0.141747572815534, "grad_norm": 0.00023783188953530043, "learning_rate": 9.833592021345937e-05, "loss": 0.0, "step": 219 }, { "epoch": 0.1423948220064725, "grad_norm": 0.00024469022173434496, "learning_rate": 9.83079944492891e-05, "loss": 0.0, "step": 220 }, { "epoch": 0.143042071197411, "grad_norm": 0.0002635126293171197, "learning_rate": 9.82798403446503e-05, "loss": 0.0, "step": 221 }, { "epoch": 0.1436893203883495, "grad_norm": 0.00025157438358291984, "learning_rate": 9.825145803262075e-05, "loss": 0.0, "step": 222 }, { "epoch": 0.14433656957928803, "grad_norm": 0.00023788264661561698, "learning_rate": 9.822284764735694e-05, "loss": 0.0, "step": 223 }, { "epoch": 0.14498381877022654, "grad_norm": 0.0002576147671788931, "learning_rate": 9.819400932409339e-05, "loss": 0.0, "step": 224 }, { "epoch": 0.14563106796116504, "grad_norm": 0.00025982639635913074, "learning_rate": 9.816494319914203e-05, "loss": 0.0, "step": 225 }, { "epoch": 0.14627831715210357, "grad_norm": 0.00024497153935953975, "learning_rate": 9.813564940989157e-05, "loss": 0.0, "step": 226 }, { "epoch": 0.14692556634304207, "grad_norm": 0.0002377478958806023, "learning_rate": 9.810612809480684e-05, "loss": 0.0, "step": 227 }, { "epoch": 0.14757281553398058, "grad_norm": 0.0002221636677859351, "learning_rate": 9.807637939342809e-05, "loss": 0.0, "step": 228 }, { "epoch": 0.14822006472491908, "grad_norm": 0.00021500229195225984, "learning_rate": 9.804640344637042e-05, "loss": 0.0, "step": 229 }, { "epoch": 0.1488673139158576, "grad_norm": 0.00021654285956174135, "learning_rate": 9.801620039532302e-05, "loss": 0.0, "step": 230 }, { "epoch": 0.14951456310679612, "grad_norm": 0.0002289855619892478, "learning_rate": 9.798577038304859e-05, "loss": 0.0, "step": 231 }, { "epoch": 0.15016181229773462, "grad_norm": 0.00022614987392444164, "learning_rate": 9.79551135533826e-05, "loss": 0.0, "step": 232 }, { "epoch": 0.15080906148867315, "grad_norm": 0.00022708874894306064, "learning_rate": 9.792423005123265e-05, "loss": 0.0, "step": 233 }, { "epoch": 0.15145631067961166, "grad_norm": 0.00020380753267090768, "learning_rate": 9.789312002257774e-05, "loss": 0.0, "step": 234 }, { "epoch": 0.15210355987055016, "grad_norm": 0.00019009801326319575, "learning_rate": 9.786178361446759e-05, "loss": 0.0, "step": 235 }, { "epoch": 0.15275080906148866, "grad_norm": 0.00020547406165860593, "learning_rate": 9.783022097502204e-05, "loss": 0.0, "step": 236 }, { "epoch": 0.1533980582524272, "grad_norm": 0.0002082720020553097, "learning_rate": 9.779843225343022e-05, "loss": 0.0, "step": 237 }, { "epoch": 0.1540453074433657, "grad_norm": 0.00019760527356993407, "learning_rate": 9.776641759994987e-05, "loss": 0.0, "step": 238 }, { "epoch": 0.1546925566343042, "grad_norm": 0.00019591070304159075, "learning_rate": 9.77341771659067e-05, "loss": 0.0, "step": 239 }, { "epoch": 0.1553398058252427, "grad_norm": 0.0002074109361274168, "learning_rate": 9.770171110369362e-05, "loss": 0.0, "step": 240 }, { "epoch": 0.15598705501618124, "grad_norm": 0.00018994529091287404, "learning_rate": 9.766901956677005e-05, "loss": 0.0, "step": 241 }, { "epoch": 0.15663430420711974, "grad_norm": 0.00021605798974633217, "learning_rate": 9.763610270966114e-05, "loss": 0.0, "step": 242 }, { "epoch": 0.15728155339805824, "grad_norm": 0.00019959028577432036, "learning_rate": 9.760296068795709e-05, "loss": 0.0, "step": 243 }, { "epoch": 0.15792880258899678, "grad_norm": 0.00020188541384413838, "learning_rate": 9.75695936583124e-05, "loss": 0.0, "step": 244 }, { "epoch": 0.15857605177993528, "grad_norm": 0.0001783160405466333, "learning_rate": 9.753600177844513e-05, "loss": 0.0, "step": 245 }, { "epoch": 0.15922330097087378, "grad_norm": 0.0001695117389317602, "learning_rate": 9.750218520713617e-05, "loss": 0.0, "step": 246 }, { "epoch": 0.15987055016181229, "grad_norm": 0.00018214939336758107, "learning_rate": 9.746814410422842e-05, "loss": 0.0, "step": 247 }, { "epoch": 0.16051779935275082, "grad_norm": 0.00017151051724795252, "learning_rate": 9.743387863062617e-05, "loss": 0.0, "step": 248 }, { "epoch": 0.16116504854368932, "grad_norm": 0.00017129718617070466, "learning_rate": 9.739938894829419e-05, "loss": 0.0, "step": 249 }, { "epoch": 0.16181229773462782, "grad_norm": 0.00016355953994207084, "learning_rate": 9.736467522025705e-05, "loss": 0.0, "step": 250 }, { "epoch": 0.16245954692556636, "grad_norm": 0.00017517742526251823, "learning_rate": 9.732973761059836e-05, "loss": 0.0, "step": 251 }, { "epoch": 0.16310679611650486, "grad_norm": 0.00015581738261971623, "learning_rate": 9.72945762844599e-05, "loss": 0.0, "step": 252 }, { "epoch": 0.16375404530744336, "grad_norm": 0.00016475978191010654, "learning_rate": 9.725919140804099e-05, "loss": 0.0, "step": 253 }, { "epoch": 0.16440129449838187, "grad_norm": 0.00018481945153325796, "learning_rate": 9.722358314859753e-05, "loss": 0.0, "step": 254 }, { "epoch": 0.1650485436893204, "grad_norm": 0.00014623126480728388, "learning_rate": 9.718775167444139e-05, "loss": 0.0, "step": 255 }, { "epoch": 0.1656957928802589, "grad_norm": 0.00013941845099907368, "learning_rate": 9.715169715493944e-05, "loss": 0.0, "step": 256 }, { "epoch": 0.1663430420711974, "grad_norm": 0.0001636711967876181, "learning_rate": 9.711541976051288e-05, "loss": 0.0, "step": 257 }, { "epoch": 0.1669902912621359, "grad_norm": 0.00016159283404704183, "learning_rate": 9.707891966263638e-05, "loss": 0.0, "step": 258 }, { "epoch": 0.16763754045307444, "grad_norm": 0.00015674662427045405, "learning_rate": 9.704219703383728e-05, "loss": 0.0, "step": 259 }, { "epoch": 0.16828478964401294, "grad_norm": 0.00016930657147895545, "learning_rate": 9.700525204769475e-05, "loss": 0.0, "step": 260 }, { "epoch": 0.16893203883495145, "grad_norm": 0.00015592656563967466, "learning_rate": 9.696808487883902e-05, "loss": 0.0, "step": 261 }, { "epoch": 0.16957928802588998, "grad_norm": 0.0001606248551979661, "learning_rate": 9.69306957029505e-05, "loss": 0.0, "step": 262 }, { "epoch": 0.17022653721682848, "grad_norm": 0.00017652320093475282, "learning_rate": 9.689308469675899e-05, "loss": 0.0, "step": 263 }, { "epoch": 0.170873786407767, "grad_norm": 0.00016366803902201355, "learning_rate": 9.685525203804281e-05, "loss": 0.0, "step": 264 }, { "epoch": 0.1715210355987055, "grad_norm": 0.0001626458833925426, "learning_rate": 9.681719790562801e-05, "loss": 0.0, "step": 265 }, { "epoch": 0.17216828478964402, "grad_norm": 0.00016154437616933137, "learning_rate": 9.677892247938747e-05, "loss": 0.0, "step": 266 }, { "epoch": 0.17281553398058253, "grad_norm": 0.00014621441368944943, "learning_rate": 9.674042594024008e-05, "loss": 0.0, "step": 267 }, { "epoch": 0.17346278317152103, "grad_norm": 0.00015805299335625023, "learning_rate": 9.670170847014989e-05, "loss": 0.0, "step": 268 }, { "epoch": 0.17411003236245956, "grad_norm": 0.00014822720550000668, "learning_rate": 9.66627702521252e-05, "loss": 0.0, "step": 269 }, { "epoch": 0.17475728155339806, "grad_norm": 0.0001588267186889425, "learning_rate": 9.662361147021779e-05, "loss": 0.0, "step": 270 }, { "epoch": 0.17540453074433657, "grad_norm": 0.00017156188550870866, "learning_rate": 9.658423230952196e-05, "loss": 0.0, "step": 271 }, { "epoch": 0.17605177993527507, "grad_norm": 0.00017044706328306347, "learning_rate": 9.654463295617367e-05, "loss": 0.0, "step": 272 }, { "epoch": 0.1766990291262136, "grad_norm": 0.0001705262257019058, "learning_rate": 9.650481359734973e-05, "loss": 0.0, "step": 273 }, { "epoch": 0.1773462783171521, "grad_norm": 0.00015603989595547318, "learning_rate": 9.64647744212668e-05, "loss": 0.0, "step": 274 }, { "epoch": 0.1779935275080906, "grad_norm": 0.00015888724010437727, "learning_rate": 9.642451561718064e-05, "loss": 0.0, "step": 275 }, { "epoch": 0.1786407766990291, "grad_norm": 0.0001627927995286882, "learning_rate": 9.638403737538506e-05, "loss": 0.0, "step": 276 }, { "epoch": 0.17928802588996764, "grad_norm": 0.00015135972353164107, "learning_rate": 9.634333988721117e-05, "loss": 0.0, "step": 277 }, { "epoch": 0.17993527508090615, "grad_norm": 0.00016265231533907354, "learning_rate": 9.630242334502637e-05, "loss": 0.0, "step": 278 }, { "epoch": 0.18058252427184465, "grad_norm": 0.00014934848877601326, "learning_rate": 9.626128794223346e-05, "loss": 0.0, "step": 279 }, { "epoch": 0.18122977346278318, "grad_norm": 0.00014458075747825205, "learning_rate": 9.621993387326978e-05, "loss": 0.0, "step": 280 }, { "epoch": 0.1818770226537217, "grad_norm": 0.00014640596054960042, "learning_rate": 9.617836133360622e-05, "loss": 0.0, "step": 281 }, { "epoch": 0.1825242718446602, "grad_norm": 0.00014636709238402545, "learning_rate": 9.613657051974637e-05, "loss": 0.0, "step": 282 }, { "epoch": 0.1831715210355987, "grad_norm": 0.00014051252219360322, "learning_rate": 9.609456162922553e-05, "loss": 0.0, "step": 283 }, { "epoch": 0.18381877022653723, "grad_norm": 0.00013853148266207427, "learning_rate": 9.605233486060976e-05, "loss": 0.0, "step": 284 }, { "epoch": 0.18446601941747573, "grad_norm": 0.0001366595533909276, "learning_rate": 9.600989041349505e-05, "loss": 0.0, "step": 285 }, { "epoch": 0.18511326860841423, "grad_norm": 0.00014638857101090252, "learning_rate": 9.596722848850623e-05, "loss": 0.0, "step": 286 }, { "epoch": 0.18576051779935276, "grad_norm": 0.00014042359543964267, "learning_rate": 9.592434928729616e-05, "loss": 0.0, "step": 287 }, { "epoch": 0.18640776699029127, "grad_norm": 0.00014442209794651717, "learning_rate": 9.588125301254467e-05, "loss": 0.0, "step": 288 }, { "epoch": 0.18705501618122977, "grad_norm": 0.0001433340075891465, "learning_rate": 9.583793986795768e-05, "loss": 0.0, "step": 289 }, { "epoch": 0.18770226537216828, "grad_norm": 0.00014637032290920615, "learning_rate": 9.579441005826618e-05, "loss": 0.0, "step": 290 }, { "epoch": 0.1883495145631068, "grad_norm": 0.0001394706778228283, "learning_rate": 9.575066378922526e-05, "loss": 0.0, "step": 291 }, { "epoch": 0.1889967637540453, "grad_norm": 0.00013563538959715515, "learning_rate": 9.570670126761323e-05, "loss": 0.0, "step": 292 }, { "epoch": 0.18964401294498381, "grad_norm": 0.00013950714492239058, "learning_rate": 9.566252270123052e-05, "loss": 0.0, "step": 293 }, { "epoch": 0.19029126213592232, "grad_norm": 0.000131804816192016, "learning_rate": 9.561812829889876e-05, "loss": 0.0, "step": 294 }, { "epoch": 0.19093851132686085, "grad_norm": 0.00013947054685559124, "learning_rate": 9.557351827045981e-05, "loss": 0.0, "step": 295 }, { "epoch": 0.19158576051779935, "grad_norm": 0.0001240325509570539, "learning_rate": 9.552869282677471e-05, "loss": 0.0, "step": 296 }, { "epoch": 0.19223300970873786, "grad_norm": 0.0001297966664424166, "learning_rate": 9.548365217972275e-05, "loss": 0.0, "step": 297 }, { "epoch": 0.1928802588996764, "grad_norm": 0.0001288438797928393, "learning_rate": 9.543839654220042e-05, "loss": 0.0, "step": 298 }, { "epoch": 0.1935275080906149, "grad_norm": 0.0001119391163229011, "learning_rate": 9.539292612812038e-05, "loss": 0.0, "step": 299 }, { "epoch": 0.1941747572815534, "grad_norm": 0.00012494742986746132, "learning_rate": 9.534724115241059e-05, "loss": 0.0, "step": 300 }, { "epoch": 0.1948220064724919, "grad_norm": 0.0001220121412188746, "learning_rate": 9.53013418310131e-05, "loss": 0.0, "step": 301 }, { "epoch": 0.19546925566343043, "grad_norm": 0.00013270293129608035, "learning_rate": 9.525522838088314e-05, "loss": 0.0, "step": 302 }, { "epoch": 0.19611650485436893, "grad_norm": 0.00012014736421406269, "learning_rate": 9.52089010199881e-05, "loss": 0.0, "step": 303 }, { "epoch": 0.19676375404530744, "grad_norm": 0.00011824521789094433, "learning_rate": 9.516235996730645e-05, "loss": 0.0, "step": 304 }, { "epoch": 0.19741100323624594, "grad_norm": 0.00012209865963086486, "learning_rate": 9.511560544282676e-05, "loss": 0.0, "step": 305 }, { "epoch": 0.19805825242718447, "grad_norm": 0.00012866368342656642, "learning_rate": 9.506863766754659e-05, "loss": 0.0, "step": 306 }, { "epoch": 0.19870550161812298, "grad_norm": 0.00012406735913828015, "learning_rate": 9.502145686347153e-05, "loss": 0.0, "step": 307 }, { "epoch": 0.19935275080906148, "grad_norm": 0.000119224758236669, "learning_rate": 9.497406325361407e-05, "loss": 0.0, "step": 308 }, { "epoch": 0.2, "grad_norm": 0.00011337314936099574, "learning_rate": 9.492645706199263e-05, "loss": 0.0, "step": 309 }, { "epoch": 0.20064724919093851, "grad_norm": 0.00012155854346929118, "learning_rate": 9.487863851363038e-05, "loss": 0.0, "step": 310 }, { "epoch": 0.20129449838187702, "grad_norm": 0.00012016961409244686, "learning_rate": 9.483060783455432e-05, "loss": 0.0, "step": 311 }, { "epoch": 0.20194174757281552, "grad_norm": 0.00012892392987851053, "learning_rate": 9.478236525179412e-05, "loss": 0.0, "step": 312 }, { "epoch": 0.20258899676375405, "grad_norm": 0.00010667412425391376, "learning_rate": 9.473391099338102e-05, "loss": 0.0, "step": 313 }, { "epoch": 0.20323624595469256, "grad_norm": 0.00011101656855316833, "learning_rate": 9.468524528834689e-05, "loss": 0.0, "step": 314 }, { "epoch": 0.20388349514563106, "grad_norm": 0.00012014104140689597, "learning_rate": 9.4636368366723e-05, "loss": 0.0, "step": 315 }, { "epoch": 0.2045307443365696, "grad_norm": 0.00011004837142536417, "learning_rate": 9.4587280459539e-05, "loss": 0.0, "step": 316 }, { "epoch": 0.2051779935275081, "grad_norm": 0.00011101589188911021, "learning_rate": 9.45379817988218e-05, "loss": 0.0, "step": 317 }, { "epoch": 0.2058252427184466, "grad_norm": 0.00011392335727578029, "learning_rate": 9.448847261759455e-05, "loss": 0.0, "step": 318 }, { "epoch": 0.2064724919093851, "grad_norm": 0.00011291310511296615, "learning_rate": 9.443875314987543e-05, "loss": 0.0, "step": 319 }, { "epoch": 0.20711974110032363, "grad_norm": 0.00011398544302210212, "learning_rate": 9.43888236306766e-05, "loss": 0.0, "step": 320 }, { "epoch": 0.20776699029126214, "grad_norm": 0.0001211681155837141, "learning_rate": 9.43386842960031e-05, "loss": 0.0, "step": 321 }, { "epoch": 0.20841423948220064, "grad_norm": 0.00011831600568257272, "learning_rate": 9.428833538285171e-05, "loss": 0.0, "step": 322 }, { "epoch": 0.20906148867313915, "grad_norm": 0.00011874661868205294, "learning_rate": 9.423777712920985e-05, "loss": 0.0, "step": 323 }, { "epoch": 0.20970873786407768, "grad_norm": 0.00012406357564032078, "learning_rate": 9.418700977405441e-05, "loss": 0.0, "step": 324 }, { "epoch": 0.21035598705501618, "grad_norm": 0.0001169187089544721, "learning_rate": 9.413603355735069e-05, "loss": 0.0, "step": 325 }, { "epoch": 0.21100323624595468, "grad_norm": 0.00011733340215869248, "learning_rate": 9.40848487200512e-05, "loss": 0.0, "step": 326 }, { "epoch": 0.21165048543689322, "grad_norm": 0.00011395466572139412, "learning_rate": 9.403345550409459e-05, "loss": 0.0, "step": 327 }, { "epoch": 0.21229773462783172, "grad_norm": 0.00011013524635927752, "learning_rate": 9.398185415240443e-05, "loss": 0.0, "step": 328 }, { "epoch": 0.21294498381877022, "grad_norm": 0.00011641709716059268, "learning_rate": 9.39300449088881e-05, "loss": 0.0, "step": 329 }, { "epoch": 0.21359223300970873, "grad_norm": 0.00011008779256371781, "learning_rate": 9.387802801843563e-05, "loss": 0.0, "step": 330 }, { "epoch": 0.21423948220064726, "grad_norm": 0.0001168397138826549, "learning_rate": 9.382580372691861e-05, "loss": 0.0, "step": 331 }, { "epoch": 0.21488673139158576, "grad_norm": 0.0001086967095034197, "learning_rate": 9.377337228118889e-05, "loss": 0.0, "step": 332 }, { "epoch": 0.21553398058252426, "grad_norm": 0.00010186529834754765, "learning_rate": 9.372073392907752e-05, "loss": 0.0, "step": 333 }, { "epoch": 0.2161812297734628, "grad_norm": 0.00011004404223058373, "learning_rate": 9.366788891939353e-05, "loss": 0.0, "step": 334 }, { "epoch": 0.2168284789644013, "grad_norm": 0.0001134696212830022, "learning_rate": 9.361483750192282e-05, "loss": 0.0, "step": 335 }, { "epoch": 0.2174757281553398, "grad_norm": 0.00011293300485704094, "learning_rate": 9.356157992742688e-05, "loss": 0.0, "step": 336 }, { "epoch": 0.2181229773462783, "grad_norm": 0.00011053043272113428, "learning_rate": 9.350811644764166e-05, "loss": 0.0, "step": 337 }, { "epoch": 0.21877022653721684, "grad_norm": 0.00010769651998998597, "learning_rate": 9.345444731527642e-05, "loss": 0.0, "step": 338 }, { "epoch": 0.21941747572815534, "grad_norm": 0.00011102236749138683, "learning_rate": 9.340057278401245e-05, "loss": 0.0, "step": 339 }, { "epoch": 0.22006472491909385, "grad_norm": 0.00011152233491884544, "learning_rate": 9.334649310850189e-05, "loss": 0.0, "step": 340 }, { "epoch": 0.22071197411003235, "grad_norm": 0.0001052344887284562, "learning_rate": 9.32922085443666e-05, "loss": 0.0, "step": 341 }, { "epoch": 0.22135922330097088, "grad_norm": 0.00010328974894946441, "learning_rate": 9.323771934819691e-05, "loss": 0.0, "step": 342 }, { "epoch": 0.22200647249190938, "grad_norm": 0.0001047630503308028, "learning_rate": 9.318302577755033e-05, "loss": 0.0, "step": 343 }, { "epoch": 0.2226537216828479, "grad_norm": 9.896550182020292e-05, "learning_rate": 9.312812809095046e-05, "loss": 0.0, "step": 344 }, { "epoch": 0.22330097087378642, "grad_norm": 0.00011004482075804844, "learning_rate": 9.307302654788568e-05, "loss": 0.0, "step": 345 }, { "epoch": 0.22394822006472492, "grad_norm": 9.414890519110486e-05, "learning_rate": 9.301772140880793e-05, "loss": 0.0, "step": 346 }, { "epoch": 0.22459546925566343, "grad_norm": 0.00010673429642338306, "learning_rate": 9.296221293513156e-05, "loss": 0.0, "step": 347 }, { "epoch": 0.22524271844660193, "grad_norm": 8.838909707264975e-05, "learning_rate": 9.290650138923201e-05, "loss": 0.0, "step": 348 }, { "epoch": 0.22588996763754046, "grad_norm": 9.604336810298264e-05, "learning_rate": 9.285058703444454e-05, "loss": 0.0, "step": 349 }, { "epoch": 0.22653721682847897, "grad_norm": 9.314525232184678e-05, "learning_rate": 9.279447013506313e-05, "loss": 0.0, "step": 350 }, { "epoch": 0.22718446601941747, "grad_norm": 0.00010178219235967845, "learning_rate": 9.273815095633907e-05, "loss": 0.0, "step": 351 }, { "epoch": 0.227831715210356, "grad_norm": 9.605660306988284e-05, "learning_rate": 9.268162976447981e-05, "loss": 0.0, "step": 352 }, { "epoch": 0.2284789644012945, "grad_norm": 8.977685502031818e-05, "learning_rate": 9.262490682664767e-05, "loss": 0.0, "step": 353 }, { "epoch": 0.229126213592233, "grad_norm": 9.409833000972867e-05, "learning_rate": 9.256798241095854e-05, "loss": 0.0, "step": 354 }, { "epoch": 0.2297734627831715, "grad_norm": 8.450518362224102e-05, "learning_rate": 9.251085678648072e-05, "loss": 0.0, "step": 355 }, { "epoch": 0.23042071197411004, "grad_norm": 8.501296542817727e-05, "learning_rate": 9.245353022323349e-05, "loss": 0.0, "step": 356 }, { "epoch": 0.23106796116504855, "grad_norm": 8.210219675675035e-05, "learning_rate": 9.239600299218595e-05, "loss": 0.0, "step": 357 }, { "epoch": 0.23171521035598705, "grad_norm": 9.697939094621688e-05, "learning_rate": 9.23382753652558e-05, "loss": 0.0, "step": 358 }, { "epoch": 0.23236245954692555, "grad_norm": 9.749876335263252e-05, "learning_rate": 9.228034761530779e-05, "loss": 0.0, "step": 359 }, { "epoch": 0.23300970873786409, "grad_norm": 9.551032417220995e-05, "learning_rate": 9.222222001615274e-05, "loss": 0.0, "step": 360 }, { "epoch": 0.2336569579288026, "grad_norm": 0.00010811651009134948, "learning_rate": 9.216389284254608e-05, "loss": 0.0, "step": 361 }, { "epoch": 0.2343042071197411, "grad_norm": 9.555898577673361e-05, "learning_rate": 9.210536637018654e-05, "loss": 0.0, "step": 362 }, { "epoch": 0.23495145631067962, "grad_norm": 9.75175789790228e-05, "learning_rate": 9.204664087571496e-05, "loss": 0.0, "step": 363 }, { "epoch": 0.23559870550161813, "grad_norm": 8.981506834970787e-05, "learning_rate": 9.198771663671282e-05, "loss": 0.0, "step": 364 }, { "epoch": 0.23624595469255663, "grad_norm": 9.2737638624385e-05, "learning_rate": 9.192859393170108e-05, "loss": 0.0, "step": 365 }, { "epoch": 0.23689320388349513, "grad_norm": 9.075032721739262e-05, "learning_rate": 9.186927304013881e-05, "loss": 0.0, "step": 366 }, { "epoch": 0.23754045307443367, "grad_norm": 8.599292777944356e-05, "learning_rate": 9.180975424242183e-05, "loss": 0.0, "step": 367 }, { "epoch": 0.23818770226537217, "grad_norm": 8.066848386079073e-05, "learning_rate": 9.175003781988142e-05, "loss": 0.0, "step": 368 }, { "epoch": 0.23883495145631067, "grad_norm": 9.653223241912201e-05, "learning_rate": 9.169012405478299e-05, "loss": 0.0, "step": 369 }, { "epoch": 0.23948220064724918, "grad_norm": 9.420322021469474e-05, "learning_rate": 9.163001323032474e-05, "loss": 0.0, "step": 370 }, { "epoch": 0.2401294498381877, "grad_norm": 9.322749974671751e-05, "learning_rate": 9.156970563063634e-05, "loss": 0.0, "step": 371 }, { "epoch": 0.2407766990291262, "grad_norm": 9.559163299854845e-05, "learning_rate": 9.150920154077754e-05, "loss": 0.0, "step": 372 }, { "epoch": 0.24142394822006472, "grad_norm": 9.618017793400213e-05, "learning_rate": 9.144850124673688e-05, "loss": 0.0, "step": 373 }, { "epoch": 0.24207119741100325, "grad_norm": 9.705295815365389e-05, "learning_rate": 9.138760503543028e-05, "loss": 0.0, "step": 374 }, { "epoch": 0.24271844660194175, "grad_norm": 9.22878971323371e-05, "learning_rate": 9.132651319469975e-05, "loss": 0.0, "step": 375 }, { "epoch": 0.24336569579288025, "grad_norm": 9.562398918205872e-05, "learning_rate": 9.126522601331198e-05, "loss": 0.0, "step": 376 }, { "epoch": 0.24401294498381876, "grad_norm": 9.663856326369569e-05, "learning_rate": 9.120374378095698e-05, "loss": 0.0, "step": 377 }, { "epoch": 0.2446601941747573, "grad_norm": 9.469805809203535e-05, "learning_rate": 9.114206678824673e-05, "loss": 0.0, "step": 378 }, { "epoch": 0.2453074433656958, "grad_norm": 9.132693230640143e-05, "learning_rate": 9.108019532671376e-05, "loss": 0.0, "step": 379 }, { "epoch": 0.2459546925566343, "grad_norm": 8.263797644758597e-05, "learning_rate": 9.10181296888099e-05, "loss": 0.0, "step": 380 }, { "epoch": 0.24660194174757283, "grad_norm": 8.940459520090371e-05, "learning_rate": 9.095587016790472e-05, "loss": 0.0, "step": 381 }, { "epoch": 0.24724919093851133, "grad_norm": 8.260810864157975e-05, "learning_rate": 9.089341705828424e-05, "loss": 0.0, "step": 382 }, { "epoch": 0.24789644012944984, "grad_norm": 8.649061783216894e-05, "learning_rate": 9.083077065514955e-05, "loss": 0.0, "step": 383 }, { "epoch": 0.24854368932038834, "grad_norm": 8.1180791312363e-05, "learning_rate": 9.076793125461541e-05, "loss": 0.0, "step": 384 }, { "epoch": 0.24919093851132687, "grad_norm": 8.068848546827212e-05, "learning_rate": 9.070489915370877e-05, "loss": 0.0, "step": 385 }, { "epoch": 0.24983818770226537, "grad_norm": 8.740034536458552e-05, "learning_rate": 9.06416746503675e-05, "loss": 0.0, "step": 386 }, { "epoch": 0.2504854368932039, "grad_norm": 8.502198033966124e-05, "learning_rate": 9.057825804343887e-05, "loss": 0.0, "step": 387 }, { "epoch": 0.2511326860841424, "grad_norm": 9.366994345327839e-05, "learning_rate": 9.05146496326782e-05, "loss": 0.0, "step": 388 }, { "epoch": 0.2517799352750809, "grad_norm": 8.837025961838663e-05, "learning_rate": 9.045084971874738e-05, "loss": 0.0, "step": 389 }, { "epoch": 0.2524271844660194, "grad_norm": 8.259070455096662e-05, "learning_rate": 9.038685860321354e-05, "loss": 0.0, "step": 390 }, { "epoch": 0.2530744336569579, "grad_norm": 8.451177563983947e-05, "learning_rate": 9.032267658854754e-05, "loss": 0.0, "step": 391 }, { "epoch": 0.2537216828478964, "grad_norm": 8.83806569618173e-05, "learning_rate": 9.025830397812261e-05, "loss": 0.0, "step": 392 }, { "epoch": 0.2543689320388349, "grad_norm": 8.645427442388609e-05, "learning_rate": 9.019374107621286e-05, "loss": 0.0, "step": 393 }, { "epoch": 0.2550161812297735, "grad_norm": 8.210589294321835e-05, "learning_rate": 9.012898818799183e-05, "loss": 0.0, "step": 394 }, { "epoch": 0.255663430420712, "grad_norm": 8.405525295529515e-05, "learning_rate": 9.006404561953114e-05, "loss": 0.0, "step": 395 }, { "epoch": 0.2563106796116505, "grad_norm": 8.019573579076678e-05, "learning_rate": 8.999891367779896e-05, "loss": 0.0, "step": 396 }, { "epoch": 0.256957928802589, "grad_norm": 8.742338104639202e-05, "learning_rate": 8.993359267065857e-05, "loss": 0.0, "step": 397 }, { "epoch": 0.2576051779935275, "grad_norm": 8.159227581927553e-05, "learning_rate": 8.986808290686693e-05, "loss": 0.0, "step": 398 }, { "epoch": 0.258252427184466, "grad_norm": 7.63337520766072e-05, "learning_rate": 8.98023846960732e-05, "loss": 0.0, "step": 399 }, { "epoch": 0.2588996763754045, "grad_norm": 8.065193105721846e-05, "learning_rate": 8.97364983488173e-05, "loss": 0.0, "step": 400 }, { "epoch": 0.25954692556634307, "grad_norm": 8.155446994351223e-05, "learning_rate": 8.967042417652842e-05, "loss": 0.0, "step": 401 }, { "epoch": 0.26019417475728157, "grad_norm": 7.630332402186468e-05, "learning_rate": 8.960416249152352e-05, "loss": 0.0, "step": 402 }, { "epoch": 0.2608414239482201, "grad_norm": 8.205570338759571e-05, "learning_rate": 8.953771360700596e-05, "loss": 0.0, "step": 403 }, { "epoch": 0.2614886731391586, "grad_norm": 7.726506009930745e-05, "learning_rate": 8.947107783706387e-05, "loss": 0.0, "step": 404 }, { "epoch": 0.2621359223300971, "grad_norm": 7.391454710159451e-05, "learning_rate": 8.940425549666881e-05, "loss": 0.0, "step": 405 }, { "epoch": 0.2627831715210356, "grad_norm": 7.636560621904209e-05, "learning_rate": 8.933724690167417e-05, "loss": 0.0, "step": 406 }, { "epoch": 0.2634304207119741, "grad_norm": 8.111628267215565e-05, "learning_rate": 8.927005236881374e-05, "loss": 0.0, "step": 407 }, { "epoch": 0.26407766990291265, "grad_norm": 7.86913515185006e-05, "learning_rate": 8.920267221570018e-05, "loss": 0.0, "step": 408 }, { "epoch": 0.26472491909385115, "grad_norm": 7.242967694764957e-05, "learning_rate": 8.913510676082356e-05, "loss": 0.0, "step": 409 }, { "epoch": 0.26537216828478966, "grad_norm": 7.819798338459805e-05, "learning_rate": 8.906735632354979e-05, "loss": 0.0, "step": 410 }, { "epoch": 0.26601941747572816, "grad_norm": 7.539886428276077e-05, "learning_rate": 8.899942122411919e-05, "loss": 0.0, "step": 411 }, { "epoch": 0.26666666666666666, "grad_norm": 7.823609485058114e-05, "learning_rate": 8.893130178364493e-05, "loss": 0.0, "step": 412 }, { "epoch": 0.26731391585760517, "grad_norm": 8.305019582621753e-05, "learning_rate": 8.886299832411145e-05, "loss": 0.0, "step": 413 }, { "epoch": 0.26796116504854367, "grad_norm": 7.822398038115352e-05, "learning_rate": 8.879451116837307e-05, "loss": 0.0, "step": 414 }, { "epoch": 0.2686084142394822, "grad_norm": 8.164405153365806e-05, "learning_rate": 8.872584064015241e-05, "loss": 0.0, "step": 415 }, { "epoch": 0.26925566343042073, "grad_norm": 8.450495806755498e-05, "learning_rate": 8.865698706403881e-05, "loss": 0.0, "step": 416 }, { "epoch": 0.26990291262135924, "grad_norm": 8.017011714400724e-05, "learning_rate": 8.858795076548683e-05, "loss": 0.0, "step": 417 }, { "epoch": 0.27055016181229774, "grad_norm": 8.017443906283006e-05, "learning_rate": 8.851873207081474e-05, "loss": 0.0, "step": 418 }, { "epoch": 0.27119741100323624, "grad_norm": 8.206439815694466e-05, "learning_rate": 8.844933130720295e-05, "loss": 0.0, "step": 419 }, { "epoch": 0.27184466019417475, "grad_norm": 8.36331382743083e-05, "learning_rate": 8.837974880269246e-05, "loss": 0.0, "step": 420 }, { "epoch": 0.27249190938511325, "grad_norm": 8.112064097076654e-05, "learning_rate": 8.830998488618332e-05, "loss": 0.0, "step": 421 }, { "epoch": 0.27313915857605175, "grad_norm": 7.637172529939562e-05, "learning_rate": 8.824003988743306e-05, "loss": 0.0, "step": 422 }, { "epoch": 0.2737864077669903, "grad_norm": 8.311059355037287e-05, "learning_rate": 8.816991413705516e-05, "loss": 0.0, "step": 423 }, { "epoch": 0.2744336569579288, "grad_norm": 7.925470708869398e-05, "learning_rate": 8.809960796651745e-05, "loss": 0.0, "step": 424 }, { "epoch": 0.2750809061488673, "grad_norm": 8.071874617598951e-05, "learning_rate": 8.802912170814059e-05, "loss": 0.0, "step": 425 }, { "epoch": 0.2757281553398058, "grad_norm": 7.97762768343091e-05, "learning_rate": 8.795845569509646e-05, "loss": 0.0, "step": 426 }, { "epoch": 0.27637540453074433, "grad_norm": 7.878767792135477e-05, "learning_rate": 8.788761026140659e-05, "loss": 0.0, "step": 427 }, { "epoch": 0.27702265372168283, "grad_norm": 7.688791811233386e-05, "learning_rate": 8.781658574194059e-05, "loss": 0.0, "step": 428 }, { "epoch": 0.27766990291262134, "grad_norm": 7.537515921285376e-05, "learning_rate": 8.774538247241459e-05, "loss": 0.0, "step": 429 }, { "epoch": 0.2783171521035599, "grad_norm": 7.39284441806376e-05, "learning_rate": 8.767400078938959e-05, "loss": 0.0, "step": 430 }, { "epoch": 0.2789644012944984, "grad_norm": 8.11739155324176e-05, "learning_rate": 8.760244103026993e-05, "loss": 0.0, "step": 431 }, { "epoch": 0.2796116504854369, "grad_norm": 7.108607678674161e-05, "learning_rate": 8.75307035333017e-05, "loss": 0.0, "step": 432 }, { "epoch": 0.2802588996763754, "grad_norm": 6.962429324630648e-05, "learning_rate": 8.745878863757105e-05, "loss": 0.0, "step": 433 }, { "epoch": 0.2809061488673139, "grad_norm": 7.295518298633397e-05, "learning_rate": 8.738669668300272e-05, "loss": 0.0, "step": 434 }, { "epoch": 0.2815533980582524, "grad_norm": 8.264243660960346e-05, "learning_rate": 8.731442801035831e-05, "loss": 0.0, "step": 435 }, { "epoch": 0.2822006472491909, "grad_norm": 7.29759776731953e-05, "learning_rate": 8.724198296123477e-05, "loss": 0.0, "step": 436 }, { "epoch": 0.2828478964401295, "grad_norm": 7.20171447028406e-05, "learning_rate": 8.716936187806272e-05, "loss": 0.0, "step": 437 }, { "epoch": 0.283495145631068, "grad_norm": 7.440879562636837e-05, "learning_rate": 8.709656510410483e-05, "loss": 0.0, "step": 438 }, { "epoch": 0.2841423948220065, "grad_norm": 7.922831719042733e-05, "learning_rate": 8.702359298345426e-05, "loss": 0.0, "step": 439 }, { "epoch": 0.284789644012945, "grad_norm": 7.731041841907427e-05, "learning_rate": 8.695044586103296e-05, "loss": 0.0, "step": 440 }, { "epoch": 0.2854368932038835, "grad_norm": 7.636252121301368e-05, "learning_rate": 8.687712408259008e-05, "loss": 0.0, "step": 441 }, { "epoch": 0.286084142394822, "grad_norm": 7.443011418217793e-05, "learning_rate": 8.680362799470037e-05, "loss": 0.0, "step": 442 }, { "epoch": 0.2867313915857605, "grad_norm": 7.297143747564405e-05, "learning_rate": 8.67299579447624e-05, "loss": 0.0, "step": 443 }, { "epoch": 0.287378640776699, "grad_norm": 7.006343366811052e-05, "learning_rate": 8.665611428099712e-05, "loss": 0.0, "step": 444 }, { "epoch": 0.28802588996763756, "grad_norm": 7.20312018529512e-05, "learning_rate": 8.658209735244604e-05, "loss": 0.0, "step": 445 }, { "epoch": 0.28867313915857606, "grad_norm": 7.391376129817218e-05, "learning_rate": 8.650790750896974e-05, "loss": 0.0, "step": 446 }, { "epoch": 0.28932038834951457, "grad_norm": 6.860474240966141e-05, "learning_rate": 8.643354510124599e-05, "loss": 0.0, "step": 447 }, { "epoch": 0.28996763754045307, "grad_norm": 7.24408746464178e-05, "learning_rate": 8.635901048076837e-05, "loss": 0.0, "step": 448 }, { "epoch": 0.2906148867313916, "grad_norm": 7.580941746709868e-05, "learning_rate": 8.62843039998444e-05, "loss": 0.0, "step": 449 }, { "epoch": 0.2912621359223301, "grad_norm": 7.150036981329322e-05, "learning_rate": 8.620942601159394e-05, "loss": 0.0, "step": 450 }, { "epoch": 0.2919093851132686, "grad_norm": 6.906647467985749e-05, "learning_rate": 8.613437686994752e-05, "loss": 0.0, "step": 451 }, { "epoch": 0.29255663430420714, "grad_norm": 7.243747677421197e-05, "learning_rate": 8.605915692964472e-05, "loss": 0.0, "step": 452 }, { "epoch": 0.29320388349514565, "grad_norm": 6.670635775662959e-05, "learning_rate": 8.598376654623242e-05, "loss": 0.0, "step": 453 }, { "epoch": 0.29385113268608415, "grad_norm": 6.571456469828263e-05, "learning_rate": 8.59082060760631e-05, "loss": 0.0, "step": 454 }, { "epoch": 0.29449838187702265, "grad_norm": 7.348477083723992e-05, "learning_rate": 8.583247587629326e-05, "loss": 0.0, "step": 455 }, { "epoch": 0.29514563106796116, "grad_norm": 6.911531090736389e-05, "learning_rate": 8.575657630488166e-05, "loss": 0.0, "step": 456 }, { "epoch": 0.29579288025889966, "grad_norm": 6.575648149009794e-05, "learning_rate": 8.568050772058762e-05, "loss": 0.0, "step": 457 }, { "epoch": 0.29644012944983816, "grad_norm": 7.443017966579646e-05, "learning_rate": 8.560427048296934e-05, "loss": 0.0, "step": 458 }, { "epoch": 0.2970873786407767, "grad_norm": 6.954690616112202e-05, "learning_rate": 8.552786495238226e-05, "loss": 0.0, "step": 459 }, { "epoch": 0.2977346278317152, "grad_norm": 6.811664934502915e-05, "learning_rate": 8.54512914899772e-05, "loss": 0.0, "step": 460 }, { "epoch": 0.29838187702265373, "grad_norm": 6.907618808327243e-05, "learning_rate": 8.537455045769884e-05, "loss": 0.0, "step": 461 }, { "epoch": 0.29902912621359223, "grad_norm": 7.246116729220375e-05, "learning_rate": 8.529764221828392e-05, "loss": 0.0, "step": 462 }, { "epoch": 0.29967637540453074, "grad_norm": 6.524915079353377e-05, "learning_rate": 8.522056713525945e-05, "loss": 0.0, "step": 463 }, { "epoch": 0.30032362459546924, "grad_norm": 6.715708877891302e-05, "learning_rate": 8.514332557294119e-05, "loss": 0.0, "step": 464 }, { "epoch": 0.30097087378640774, "grad_norm": 7.728147465968505e-05, "learning_rate": 8.50659178964317e-05, "loss": 0.0, "step": 465 }, { "epoch": 0.3016181229773463, "grad_norm": 6.76482668495737e-05, "learning_rate": 8.498834447161877e-05, "loss": 0.0, "step": 466 }, { "epoch": 0.3022653721682848, "grad_norm": 6.765009311493486e-05, "learning_rate": 8.491060566517365e-05, "loss": 0.0, "step": 467 }, { "epoch": 0.3029126213592233, "grad_norm": 8.558189438190311e-05, "learning_rate": 8.48327018445493e-05, "loss": 0.0, "step": 468 }, { "epoch": 0.3035598705501618, "grad_norm": 6.813398067606613e-05, "learning_rate": 8.475463337797864e-05, "loss": 0.0, "step": 469 }, { "epoch": 0.3042071197411003, "grad_norm": 7.054232992231846e-05, "learning_rate": 8.467640063447289e-05, "loss": 0.0, "step": 470 }, { "epoch": 0.3048543689320388, "grad_norm": 7.246794120874256e-05, "learning_rate": 8.45980039838197e-05, "loss": 0.0, "step": 471 }, { "epoch": 0.3055016181229773, "grad_norm": 7.008689863141626e-05, "learning_rate": 8.451944379658152e-05, "loss": 0.0, "step": 472 }, { "epoch": 0.3061488673139159, "grad_norm": 7.197912782430649e-05, "learning_rate": 8.444072044409377e-05, "loss": 0.0, "step": 473 }, { "epoch": 0.3067961165048544, "grad_norm": 6.862673035357147e-05, "learning_rate": 8.436183429846313e-05, "loss": 0.0, "step": 474 }, { "epoch": 0.3074433656957929, "grad_norm": 6.671810115221888e-05, "learning_rate": 8.428278573256578e-05, "loss": 0.0, "step": 475 }, { "epoch": 0.3080906148867314, "grad_norm": 7.101190567482263e-05, "learning_rate": 8.420357512004557e-05, "loss": 0.0, "step": 476 }, { "epoch": 0.3087378640776699, "grad_norm": 6.8177621869836e-05, "learning_rate": 8.412420283531234e-05, "loss": 0.0, "step": 477 }, { "epoch": 0.3093851132686084, "grad_norm": 7.198721141321585e-05, "learning_rate": 8.404466925354016e-05, "loss": 0.0, "step": 478 }, { "epoch": 0.3100323624595469, "grad_norm": 7.054059096844867e-05, "learning_rate": 8.396497475066541e-05, "loss": 0.0, "step": 479 }, { "epoch": 0.3106796116504854, "grad_norm": 6.672196468571201e-05, "learning_rate": 8.388511970338517e-05, "loss": 0.0, "step": 480 }, { "epoch": 0.31132686084142397, "grad_norm": 6.669166759820655e-05, "learning_rate": 8.380510448915541e-05, "loss": 0.0, "step": 481 }, { "epoch": 0.3119741100323625, "grad_norm": 6.815550295868888e-05, "learning_rate": 8.372492948618908e-05, "loss": 0.0, "step": 482 }, { "epoch": 0.312621359223301, "grad_norm": 7.388958329102024e-05, "learning_rate": 8.364459507345452e-05, "loss": 0.0, "step": 483 }, { "epoch": 0.3132686084142395, "grad_norm": 7.101796654751524e-05, "learning_rate": 8.356410163067346e-05, "loss": 0.0, "step": 484 }, { "epoch": 0.313915857605178, "grad_norm": 6.62219972582534e-05, "learning_rate": 8.34834495383194e-05, "loss": 0.0, "step": 485 }, { "epoch": 0.3145631067961165, "grad_norm": 6.5733322117012e-05, "learning_rate": 8.340263917761569e-05, "loss": 0.0, "step": 486 }, { "epoch": 0.315210355987055, "grad_norm": 6.766369915567338e-05, "learning_rate": 8.332167093053378e-05, "loss": 0.0, "step": 487 }, { "epoch": 0.31585760517799355, "grad_norm": 6.959314487176016e-05, "learning_rate": 8.32405451797915e-05, "loss": 0.0, "step": 488 }, { "epoch": 0.31650485436893205, "grad_norm": 6.671955634374171e-05, "learning_rate": 8.315926230885104e-05, "loss": 0.0, "step": 489 }, { "epoch": 0.31715210355987056, "grad_norm": 6.57304044580087e-05, "learning_rate": 8.307782270191732e-05, "loss": 0.0, "step": 490 }, { "epoch": 0.31779935275080906, "grad_norm": 7.443082722602412e-05, "learning_rate": 8.299622674393614e-05, "loss": 0.0, "step": 491 }, { "epoch": 0.31844660194174756, "grad_norm": 6.767906597815454e-05, "learning_rate": 8.291447482059225e-05, "loss": 0.0, "step": 492 }, { "epoch": 0.31909385113268607, "grad_norm": 6.116896838648245e-05, "learning_rate": 8.283256731830771e-05, "loss": 0.0, "step": 493 }, { "epoch": 0.31974110032362457, "grad_norm": 6.476581620518118e-05, "learning_rate": 8.27505046242399e-05, "loss": 0.0, "step": 494 }, { "epoch": 0.32038834951456313, "grad_norm": 6.526013748953119e-05, "learning_rate": 8.266828712627976e-05, "loss": 0.0, "step": 495 }, { "epoch": 0.32103559870550163, "grad_norm": 6.526774086523801e-05, "learning_rate": 8.258591521305e-05, "loss": 0.0, "step": 496 }, { "epoch": 0.32168284789644014, "grad_norm": 6.427988409996033e-05, "learning_rate": 8.250338927390313e-05, "loss": 0.0, "step": 497 }, { "epoch": 0.32233009708737864, "grad_norm": 6.283738912316039e-05, "learning_rate": 8.242070969891979e-05, "loss": 0.0, "step": 498 }, { "epoch": 0.32297734627831715, "grad_norm": 6.332938937703148e-05, "learning_rate": 8.233787687890677e-05, "loss": 0.0, "step": 499 }, { "epoch": 0.32362459546925565, "grad_norm": 7.100054790498689e-05, "learning_rate": 8.225489120539522e-05, "loss": 0.0, "step": 500 }, { "epoch": 0.32427184466019415, "grad_norm": 6.665269029326737e-05, "learning_rate": 8.217175307063883e-05, "loss": 0.0, "step": 501 }, { "epoch": 0.3249190938511327, "grad_norm": 6.185962411109358e-05, "learning_rate": 8.208846286761186e-05, "loss": 0.0, "step": 502 }, { "epoch": 0.3255663430420712, "grad_norm": 6.0010035667801276e-05, "learning_rate": 8.200502099000746e-05, "loss": 0.0, "step": 503 }, { "epoch": 0.3262135922330097, "grad_norm": 6.136797310318798e-05, "learning_rate": 8.192142783223563e-05, "loss": 0.0, "step": 504 }, { "epoch": 0.3268608414239482, "grad_norm": 6.573896098416299e-05, "learning_rate": 8.183768378942143e-05, "loss": 0.0, "step": 505 }, { "epoch": 0.3275080906148867, "grad_norm": 6.192978617036715e-05, "learning_rate": 8.175378925740321e-05, "loss": 0.0, "step": 506 }, { "epoch": 0.32815533980582523, "grad_norm": 5.7814930187305436e-05, "learning_rate": 8.166974463273056e-05, "loss": 0.0, "step": 507 }, { "epoch": 0.32880258899676373, "grad_norm": 6.283526454353705e-05, "learning_rate": 8.158555031266254e-05, "loss": 0.0, "step": 508 }, { "epoch": 0.3294498381877023, "grad_norm": 6.477363058365881e-05, "learning_rate": 8.150120669516583e-05, "loss": 0.0, "step": 509 }, { "epoch": 0.3300970873786408, "grad_norm": 6.426102481782436e-05, "learning_rate": 8.141671417891274e-05, "loss": 0.0, "step": 510 }, { "epoch": 0.3307443365695793, "grad_norm": 6.37875054962933e-05, "learning_rate": 8.133207316327943e-05, "loss": 0.0, "step": 511 }, { "epoch": 0.3313915857605178, "grad_norm": 6.379558908520266e-05, "learning_rate": 8.124728404834396e-05, "loss": 0.0, "step": 512 }, { "epoch": 0.3320388349514563, "grad_norm": 6.715062772855163e-05, "learning_rate": 8.116234723488443e-05, "loss": 0.0, "step": 513 }, { "epoch": 0.3326860841423948, "grad_norm": 6.718301301589236e-05, "learning_rate": 8.107726312437709e-05, "loss": 0.0, "step": 514 }, { "epoch": 0.3333333333333333, "grad_norm": 6.61970698274672e-05, "learning_rate": 8.099203211899441e-05, "loss": 0.0, "step": 515 }, { "epoch": 0.3339805825242718, "grad_norm": 6.13810116192326e-05, "learning_rate": 8.090665462160318e-05, "loss": 0.0, "step": 516 }, { "epoch": 0.3346278317152104, "grad_norm": 6.140068580862135e-05, "learning_rate": 8.082113103576269e-05, "loss": 0.0, "step": 517 }, { "epoch": 0.3352750809061489, "grad_norm": 6.189435953274369e-05, "learning_rate": 8.073546176572267e-05, "loss": 0.0, "step": 518 }, { "epoch": 0.3359223300970874, "grad_norm": 6.333187775453553e-05, "learning_rate": 8.064964721642151e-05, "loss": 0.0, "step": 519 }, { "epoch": 0.3365695792880259, "grad_norm": 6.094420677982271e-05, "learning_rate": 8.056368779348431e-05, "loss": 0.0, "step": 520 }, { "epoch": 0.3372168284789644, "grad_norm": 6.474891415564343e-05, "learning_rate": 8.047758390322094e-05, "loss": 0.0, "step": 521 }, { "epoch": 0.3378640776699029, "grad_norm": 6.479680450865999e-05, "learning_rate": 8.039133595262411e-05, "loss": 0.0, "step": 522 }, { "epoch": 0.3385113268608414, "grad_norm": 6.290060264291242e-05, "learning_rate": 8.030494434936753e-05, "loss": 0.0, "step": 523 }, { "epoch": 0.33915857605177996, "grad_norm": 6.383266008924693e-05, "learning_rate": 8.021840950180384e-05, "loss": 0.0, "step": 524 }, { "epoch": 0.33980582524271846, "grad_norm": 6.763410056009889e-05, "learning_rate": 8.013173181896283e-05, "loss": 0.0, "step": 525 }, { "epoch": 0.34045307443365697, "grad_norm": 6.860763824079186e-05, "learning_rate": 8.00449117105494e-05, "loss": 0.0, "step": 526 }, { "epoch": 0.34110032362459547, "grad_norm": 6.914659752510488e-05, "learning_rate": 7.99579495869417e-05, "loss": 0.0, "step": 527 }, { "epoch": 0.341747572815534, "grad_norm": 6.14271339145489e-05, "learning_rate": 7.987084585918911e-05, "loss": 0.0, "step": 528 }, { "epoch": 0.3423948220064725, "grad_norm": 6.28829438937828e-05, "learning_rate": 7.978360093901036e-05, "loss": 0.0, "step": 529 }, { "epoch": 0.343042071197411, "grad_norm": 5.7578912674216554e-05, "learning_rate": 7.969621523879156e-05, "loss": 0.0, "step": 530 }, { "epoch": 0.34368932038834954, "grad_norm": 5.9723191952798516e-05, "learning_rate": 7.960868917158426e-05, "loss": 0.0, "step": 531 }, { "epoch": 0.34433656957928804, "grad_norm": 6.139493780210614e-05, "learning_rate": 7.952102315110348e-05, "loss": 0.0, "step": 532 }, { "epoch": 0.34498381877022655, "grad_norm": 6.382521678460762e-05, "learning_rate": 7.943321759172579e-05, "loss": 0.0, "step": 533 }, { "epoch": 0.34563106796116505, "grad_norm": 6.381669663824141e-05, "learning_rate": 7.934527290848728e-05, "loss": 0.0, "step": 534 }, { "epoch": 0.34627831715210355, "grad_norm": 6.11504219705239e-05, "learning_rate": 7.925718951708169e-05, "loss": 0.0, "step": 535 }, { "epoch": 0.34692556634304206, "grad_norm": 6.234991451492533e-05, "learning_rate": 7.916896783385838e-05, "loss": 0.0, "step": 536 }, { "epoch": 0.34757281553398056, "grad_norm": 6.283271068241447e-05, "learning_rate": 7.908060827582043e-05, "loss": 0.0, "step": 537 }, { "epoch": 0.3482200647249191, "grad_norm": 6.28512934781611e-05, "learning_rate": 7.899211126062252e-05, "loss": 0.0, "step": 538 }, { "epoch": 0.3488673139158576, "grad_norm": 6.091139221098274e-05, "learning_rate": 7.890347720656914e-05, "loss": 0.0, "step": 539 }, { "epoch": 0.34951456310679613, "grad_norm": 6.140362529549748e-05, "learning_rate": 7.881470653261252e-05, "loss": 0.0, "step": 540 }, { "epoch": 0.35016181229773463, "grad_norm": 6.283146649366245e-05, "learning_rate": 7.872579965835063e-05, "loss": 0.0, "step": 541 }, { "epoch": 0.35080906148867314, "grad_norm": 7.002944767009467e-05, "learning_rate": 7.863675700402526e-05, "loss": 0.0, "step": 542 }, { "epoch": 0.35145631067961164, "grad_norm": 6.283973198151216e-05, "learning_rate": 7.854757899051999e-05, "loss": 0.0, "step": 543 }, { "epoch": 0.35210355987055014, "grad_norm": 6.14111268077977e-05, "learning_rate": 7.845826603935819e-05, "loss": 0.0, "step": 544 }, { "epoch": 0.35275080906148865, "grad_norm": 6.044120164006017e-05, "learning_rate": 7.836881857270107e-05, "loss": 0.0, "step": 545 }, { "epoch": 0.3533980582524272, "grad_norm": 5.515399607247673e-05, "learning_rate": 7.827923701334565e-05, "loss": 0.0, "step": 546 }, { "epoch": 0.3540453074433657, "grad_norm": 6.069052324164659e-05, "learning_rate": 7.81895217847228e-05, "loss": 0.0, "step": 547 }, { "epoch": 0.3546925566343042, "grad_norm": 5.875090937479399e-05, "learning_rate": 7.809967331089518e-05, "loss": 0.0, "step": 548 }, { "epoch": 0.3553398058252427, "grad_norm": 5.73221841477789e-05, "learning_rate": 7.80096920165553e-05, "loss": 0.0, "step": 549 }, { "epoch": 0.3559870550161812, "grad_norm": 5.708009848603979e-05, "learning_rate": 7.791957832702343e-05, "loss": 0.0, "step": 550 }, { "epoch": 0.3566343042071197, "grad_norm": 5.967615652480163e-05, "learning_rate": 7.78293326682457e-05, "loss": 0.0, "step": 551 }, { "epoch": 0.3572815533980582, "grad_norm": 6.064411354600452e-05, "learning_rate": 7.773895546679202e-05, "loss": 0.0, "step": 552 }, { "epoch": 0.3579288025889968, "grad_norm": 5.27522133779712e-05, "learning_rate": 7.764844714985402e-05, "loss": 0.0, "step": 553 }, { "epoch": 0.3585760517799353, "grad_norm": 5.876754948985763e-05, "learning_rate": 7.755780814524314e-05, "loss": 0.0, "step": 554 }, { "epoch": 0.3592233009708738, "grad_norm": 5.973816951154731e-05, "learning_rate": 7.746703888138849e-05, "loss": 0.0, "step": 555 }, { "epoch": 0.3598705501618123, "grad_norm": 5.6833298003766686e-05, "learning_rate": 7.737613978733497e-05, "loss": 0.0, "step": 556 }, { "epoch": 0.3605177993527508, "grad_norm": 5.827989662066102e-05, "learning_rate": 7.728511129274106e-05, "loss": 0.0, "step": 557 }, { "epoch": 0.3611650485436893, "grad_norm": 5.7787648984231055e-05, "learning_rate": 7.719395382787697e-05, "loss": 0.0, "step": 558 }, { "epoch": 0.3618122977346278, "grad_norm": 6.161349301692098e-05, "learning_rate": 7.710266782362247e-05, "loss": 0.0, "step": 559 }, { "epoch": 0.36245954692556637, "grad_norm": 5.8728790463646874e-05, "learning_rate": 7.701125371146492e-05, "loss": 0.0, "step": 560 }, { "epoch": 0.36310679611650487, "grad_norm": 5.6330987717956305e-05, "learning_rate": 7.69197119234972e-05, "loss": 0.0, "step": 561 }, { "epoch": 0.3637540453074434, "grad_norm": 5.85031411901582e-05, "learning_rate": 7.682804289241572e-05, "loss": 0.0, "step": 562 }, { "epoch": 0.3644012944983819, "grad_norm": 6.284066330408677e-05, "learning_rate": 7.673624705151831e-05, "loss": 0.0, "step": 563 }, { "epoch": 0.3650485436893204, "grad_norm": 6.618916086154059e-05, "learning_rate": 7.664432483470223e-05, "loss": 0.0, "step": 564 }, { "epoch": 0.3656957928802589, "grad_norm": 5.5395365052390844e-05, "learning_rate": 7.655227667646201e-05, "loss": 0.0, "step": 565 }, { "epoch": 0.3663430420711974, "grad_norm": 5.4666437790729105e-05, "learning_rate": 7.646010301188759e-05, "loss": 0.0, "step": 566 }, { "epoch": 0.36699029126213595, "grad_norm": 4.986231215298176e-05, "learning_rate": 7.636780427666203e-05, "loss": 0.0, "step": 567 }, { "epoch": 0.36763754045307445, "grad_norm": 6.184963422128931e-05, "learning_rate": 7.62753809070597e-05, "loss": 0.0, "step": 568 }, { "epoch": 0.36828478964401296, "grad_norm": 5.203453474678099e-05, "learning_rate": 7.618283333994398e-05, "loss": 0.0, "step": 569 }, { "epoch": 0.36893203883495146, "grad_norm": 5.853091352037154e-05, "learning_rate": 7.609016201276533e-05, "loss": 0.0, "step": 570 }, { "epoch": 0.36957928802588996, "grad_norm": 5.777838305220939e-05, "learning_rate": 7.599736736355921e-05, "loss": 0.0, "step": 571 }, { "epoch": 0.37022653721682847, "grad_norm": 5.7322562497574836e-05, "learning_rate": 7.5904449830944e-05, "loss": 0.0, "step": 572 }, { "epoch": 0.37087378640776697, "grad_norm": 5.5886779591673985e-05, "learning_rate": 7.58114098541189e-05, "loss": 0.0, "step": 573 }, { "epoch": 0.37152103559870553, "grad_norm": 5.731352939619683e-05, "learning_rate": 7.571824787286188e-05, "loss": 0.0, "step": 574 }, { "epoch": 0.37216828478964403, "grad_norm": 6.044963447493501e-05, "learning_rate": 7.562496432752761e-05, "loss": 0.0, "step": 575 }, { "epoch": 0.37281553398058254, "grad_norm": 5.947915997239761e-05, "learning_rate": 7.553155965904535e-05, "loss": 0.0, "step": 576 }, { "epoch": 0.37346278317152104, "grad_norm": 5.7571054639993235e-05, "learning_rate": 7.543803430891689e-05, "loss": 0.0, "step": 577 }, { "epoch": 0.37411003236245954, "grad_norm": 5.5904041801113635e-05, "learning_rate": 7.534438871921443e-05, "loss": 0.0, "step": 578 }, { "epoch": 0.37475728155339805, "grad_norm": 5.634295303025283e-05, "learning_rate": 7.525062333257855e-05, "loss": 0.0, "step": 579 }, { "epoch": 0.37540453074433655, "grad_norm": 5.5638389312662184e-05, "learning_rate": 7.515673859221606e-05, "loss": 0.0, "step": 580 }, { "epoch": 0.37605177993527505, "grad_norm": 5.516174860531464e-05, "learning_rate": 7.50627349418979e-05, "loss": 0.0, "step": 581 }, { "epoch": 0.3766990291262136, "grad_norm": 5.27628835698124e-05, "learning_rate": 7.496861282595717e-05, "loss": 0.0, "step": 582 }, { "epoch": 0.3773462783171521, "grad_norm": 5.610935477307066e-05, "learning_rate": 7.487437268928678e-05, "loss": 0.0, "step": 583 }, { "epoch": 0.3779935275080906, "grad_norm": 5.611890082946047e-05, "learning_rate": 7.478001497733765e-05, "loss": 0.0, "step": 584 }, { "epoch": 0.3786407766990291, "grad_norm": 5.0826984079321846e-05, "learning_rate": 7.468554013611633e-05, "loss": 0.0, "step": 585 }, { "epoch": 0.37928802588996763, "grad_norm": 5.012617839383893e-05, "learning_rate": 7.459094861218306e-05, "loss": 0.0, "step": 586 }, { "epoch": 0.37993527508090613, "grad_norm": 5.565696119447239e-05, "learning_rate": 7.449624085264962e-05, "loss": 0.0, "step": 587 }, { "epoch": 0.38058252427184464, "grad_norm": 5.489849718287587e-05, "learning_rate": 7.44014173051772e-05, "loss": 0.0, "step": 588 }, { "epoch": 0.3812297734627832, "grad_norm": 6.020487489877269e-05, "learning_rate": 7.430647841797427e-05, "loss": 0.0, "step": 589 }, { "epoch": 0.3818770226537217, "grad_norm": 5.924495781073347e-05, "learning_rate": 7.421142463979453e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.3825242718446602, "grad_norm": 5.348097693058662e-05, "learning_rate": 7.411625641993472e-05, "loss": 0.0, "step": 591 }, { "epoch": 0.3831715210355987, "grad_norm": 5.490216426551342e-05, "learning_rate": 7.402097420823249e-05, "loss": 0.0, "step": 592 }, { "epoch": 0.3838187702265372, "grad_norm": 5.3233994549373165e-05, "learning_rate": 7.392557845506432e-05, "loss": 0.0, "step": 593 }, { "epoch": 0.3844660194174757, "grad_norm": 5.4190910304896533e-05, "learning_rate": 7.383006961134343e-05, "loss": 0.0, "step": 594 }, { "epoch": 0.3851132686084142, "grad_norm": 5.2745817811228335e-05, "learning_rate": 7.373444812851751e-05, "loss": 0.0, "step": 595 }, { "epoch": 0.3857605177993528, "grad_norm": 5.180158041184768e-05, "learning_rate": 7.363871445856669e-05, "loss": 0.0, "step": 596 }, { "epoch": 0.3864077669902913, "grad_norm": 4.9377929826732725e-05, "learning_rate": 7.354286905400142e-05, "loss": 0.0, "step": 597 }, { "epoch": 0.3870550161812298, "grad_norm": 5.393326500779949e-05, "learning_rate": 7.344691236786026e-05, "loss": 0.0, "step": 598 }, { "epoch": 0.3877022653721683, "grad_norm": 5.7773089793045074e-05, "learning_rate": 7.335084485370777e-05, "loss": 0.0, "step": 599 }, { "epoch": 0.3883495145631068, "grad_norm": 5.2968716772738844e-05, "learning_rate": 7.325466696563238e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.3889967637540453, "grad_norm": 5.539034100365825e-05, "learning_rate": 7.315837915824422e-05, "loss": 0.0, "step": 601 }, { "epoch": 0.3896440129449838, "grad_norm": 4.986220665159635e-05, "learning_rate": 7.306198188667303e-05, "loss": 0.0, "step": 602 }, { "epoch": 0.39029126213592236, "grad_norm": 5.679861351381987e-05, "learning_rate": 7.296547560656585e-05, "loss": 0.0, "step": 603 }, { "epoch": 0.39093851132686086, "grad_norm": 5.057294401922263e-05, "learning_rate": 7.286886077408513e-05, "loss": 0.0, "step": 604 }, { "epoch": 0.39158576051779936, "grad_norm": 5.4458399972645566e-05, "learning_rate": 7.27721378459063e-05, "loss": 0.0, "step": 605 }, { "epoch": 0.39223300970873787, "grad_norm": 5.3743897296953946e-05, "learning_rate": 7.267530727921578e-05, "loss": 0.0, "step": 606 }, { "epoch": 0.39288025889967637, "grad_norm": 5.3922216466162354e-05, "learning_rate": 7.257836953170875e-05, "loss": 0.0, "step": 607 }, { "epoch": 0.3935275080906149, "grad_norm": 6.94370610290207e-05, "learning_rate": 7.248132506158704e-05, "loss": 0.0, "step": 608 }, { "epoch": 0.3941747572815534, "grad_norm": 5.17721964570228e-05, "learning_rate": 7.238417432755691e-05, "loss": 0.0, "step": 609 }, { "epoch": 0.3948220064724919, "grad_norm": 5.3923315135762095e-05, "learning_rate": 7.228691778882693e-05, "loss": 0.0, "step": 610 }, { "epoch": 0.39546925566343044, "grad_norm": 5.3210827900329605e-05, "learning_rate": 7.218955590510572e-05, "loss": 0.0, "step": 611 }, { "epoch": 0.39611650485436894, "grad_norm": 5.176143895369023e-05, "learning_rate": 7.209208913659994e-05, "loss": 0.0, "step": 612 }, { "epoch": 0.39676375404530745, "grad_norm": 5.393975516199134e-05, "learning_rate": 7.199451794401193e-05, "loss": 0.0, "step": 613 }, { "epoch": 0.39741100323624595, "grad_norm": 5.251104448689148e-05, "learning_rate": 7.189684278853764e-05, "loss": 0.0, "step": 614 }, { "epoch": 0.39805825242718446, "grad_norm": 4.9369609769200906e-05, "learning_rate": 7.179906413186447e-05, "loss": 0.0, "step": 615 }, { "epoch": 0.39870550161812296, "grad_norm": 5.082436837255955e-05, "learning_rate": 7.170118243616899e-05, "loss": 0.0, "step": 616 }, { "epoch": 0.39935275080906146, "grad_norm": 5.1067559979856014e-05, "learning_rate": 7.160319816411484e-05, "loss": 0.0, "step": 617 }, { "epoch": 0.4, "grad_norm": 5.393195533542894e-05, "learning_rate": 7.15051117788505e-05, "loss": 0.0, "step": 618 }, { "epoch": 0.4006472491909385, "grad_norm": 5.059207614976913e-05, "learning_rate": 7.140692374400712e-05, "loss": 0.0, "step": 619 }, { "epoch": 0.40129449838187703, "grad_norm": 5.036088259657845e-05, "learning_rate": 7.130863452369636e-05, "loss": 0.0, "step": 620 }, { "epoch": 0.40194174757281553, "grad_norm": 5.442826295620762e-05, "learning_rate": 7.121024458250809e-05, "loss": 0.0, "step": 621 }, { "epoch": 0.40258899676375404, "grad_norm": 5.733288708142936e-05, "learning_rate": 7.111175438550832e-05, "loss": 0.0, "step": 622 }, { "epoch": 0.40323624595469254, "grad_norm": 5.5671185691608116e-05, "learning_rate": 7.10131643982369e-05, "loss": 0.0, "step": 623 }, { "epoch": 0.40388349514563104, "grad_norm": 5.4703054047422484e-05, "learning_rate": 7.091447508670543e-05, "loss": 0.0, "step": 624 }, { "epoch": 0.4045307443365696, "grad_norm": 5.154523023520596e-05, "learning_rate": 7.081568691739492e-05, "loss": 0.0, "step": 625 }, { "epoch": 0.4051779935275081, "grad_norm": 5.468021481647156e-05, "learning_rate": 7.07168003572537e-05, "loss": 0.0, "step": 626 }, { "epoch": 0.4058252427184466, "grad_norm": 5.179894287721254e-05, "learning_rate": 7.061781587369519e-05, "loss": 0.0, "step": 627 }, { "epoch": 0.4064724919093851, "grad_norm": 5.227432120591402e-05, "learning_rate": 7.051873393459558e-05, "loss": 0.0, "step": 628 }, { "epoch": 0.4071197411003236, "grad_norm": 5.037782830186188e-05, "learning_rate": 7.041955500829181e-05, "loss": 0.0, "step": 629 }, { "epoch": 0.4077669902912621, "grad_norm": 5.2756837249035016e-05, "learning_rate": 7.032027956357923e-05, "loss": 0.0, "step": 630 }, { "epoch": 0.4084142394822006, "grad_norm": 5.346348916646093e-05, "learning_rate": 7.022090806970937e-05, "loss": 0.0, "step": 631 }, { "epoch": 0.4090614886731392, "grad_norm": 4.8679852625355124e-05, "learning_rate": 7.012144099638779e-05, "loss": 0.0, "step": 632 }, { "epoch": 0.4097087378640777, "grad_norm": 5.563655940932222e-05, "learning_rate": 7.002187881377183e-05, "loss": 0.0, "step": 633 }, { "epoch": 0.4103559870550162, "grad_norm": 5.6568733270978555e-05, "learning_rate": 6.99222219924684e-05, "loss": 0.0, "step": 634 }, { "epoch": 0.4110032362459547, "grad_norm": 5.179947038413957e-05, "learning_rate": 6.982247100353172e-05, "loss": 0.0, "step": 635 }, { "epoch": 0.4116504854368932, "grad_norm": 5.131082434672862e-05, "learning_rate": 6.972262631846113e-05, "loss": 0.0, "step": 636 }, { "epoch": 0.4122977346278317, "grad_norm": 5.130294812261127e-05, "learning_rate": 6.962268840919887e-05, "loss": 0.0, "step": 637 }, { "epoch": 0.4129449838187702, "grad_norm": 5.30007055203896e-05, "learning_rate": 6.952265774812779e-05, "loss": 0.0, "step": 638 }, { "epoch": 0.41359223300970877, "grad_norm": 5.083163341623731e-05, "learning_rate": 6.942253480806917e-05, "loss": 0.0, "step": 639 }, { "epoch": 0.41423948220064727, "grad_norm": 5.2279017836553976e-05, "learning_rate": 6.932232006228051e-05, "loss": 0.0, "step": 640 }, { "epoch": 0.4148867313915858, "grad_norm": 5.0820457545341924e-05, "learning_rate": 6.922201398445319e-05, "loss": 0.0, "step": 641 }, { "epoch": 0.4155339805825243, "grad_norm": 5.323597360984422e-05, "learning_rate": 6.912161704871032e-05, "loss": 0.0, "step": 642 }, { "epoch": 0.4161812297734628, "grad_norm": 5.1305454690009356e-05, "learning_rate": 6.90211297296045e-05, "loss": 0.0, "step": 643 }, { "epoch": 0.4168284789644013, "grad_norm": 5.370499275159091e-05, "learning_rate": 6.892055250211552e-05, "loss": 0.0, "step": 644 }, { "epoch": 0.4174757281553398, "grad_norm": 4.937902122037485e-05, "learning_rate": 6.881988584164816e-05, "loss": 0.0, "step": 645 }, { "epoch": 0.4181229773462783, "grad_norm": 4.936994446325116e-05, "learning_rate": 6.871913022402991e-05, "loss": 0.0, "step": 646 }, { "epoch": 0.41877022653721685, "grad_norm": 5.080566188553348e-05, "learning_rate": 6.861828612550876e-05, "loss": 0.0, "step": 647 }, { "epoch": 0.41941747572815535, "grad_norm": 5.1540286222007126e-05, "learning_rate": 6.851735402275094e-05, "loss": 0.0, "step": 648 }, { "epoch": 0.42006472491909386, "grad_norm": 5.034006971982308e-05, "learning_rate": 6.841633439283862e-05, "loss": 0.0, "step": 649 }, { "epoch": 0.42071197411003236, "grad_norm": 5.419103763415478e-05, "learning_rate": 6.831522771326769e-05, "loss": 0.0, "step": 650 }, { "epoch": 0.42135922330097086, "grad_norm": 5.4842279496369883e-05, "learning_rate": 6.821403446194555e-05, "loss": 0.0, "step": 651 }, { "epoch": 0.42200647249190937, "grad_norm": 5.229767702985555e-05, "learning_rate": 6.811275511718878e-05, "loss": 0.0, "step": 652 }, { "epoch": 0.42265372168284787, "grad_norm": 5.225661880103871e-05, "learning_rate": 6.801139015772085e-05, "loss": 0.0, "step": 653 }, { "epoch": 0.42330097087378643, "grad_norm": 4.986854764865711e-05, "learning_rate": 6.790994006267002e-05, "loss": 0.0, "step": 654 }, { "epoch": 0.42394822006472493, "grad_norm": 4.937982157571241e-05, "learning_rate": 6.780840531156685e-05, "loss": 0.0, "step": 655 }, { "epoch": 0.42459546925566344, "grad_norm": 5.059117393102497e-05, "learning_rate": 6.770678638434213e-05, "loss": 0.0, "step": 656 }, { "epoch": 0.42524271844660194, "grad_norm": 4.699871351476759e-05, "learning_rate": 6.760508376132452e-05, "loss": 0.0, "step": 657 }, { "epoch": 0.42588996763754045, "grad_norm": 4.815890497411601e-05, "learning_rate": 6.750329792323824e-05, "loss": 0.0, "step": 658 }, { "epoch": 0.42653721682847895, "grad_norm": 4.67232457594946e-05, "learning_rate": 6.740142935120089e-05, "loss": 0.0, "step": 659 }, { "epoch": 0.42718446601941745, "grad_norm": 5.342940494301729e-05, "learning_rate": 6.729947852672114e-05, "loss": 0.0, "step": 660 }, { "epoch": 0.427831715210356, "grad_norm": 4.8874368076212704e-05, "learning_rate": 6.719744593169641e-05, "loss": 0.0, "step": 661 }, { "epoch": 0.4284789644012945, "grad_norm": 5.0067625124938786e-05, "learning_rate": 6.709533204841068e-05, "loss": 0.0, "step": 662 }, { "epoch": 0.429126213592233, "grad_norm": 5.081708513898775e-05, "learning_rate": 6.699313735953213e-05, "loss": 0.0, "step": 663 }, { "epoch": 0.4297734627831715, "grad_norm": 4.936955883749761e-05, "learning_rate": 6.689086234811087e-05, "loss": 0.0, "step": 664 }, { "epoch": 0.43042071197411, "grad_norm": 5.058174428995699e-05, "learning_rate": 6.678850749757673e-05, "loss": 0.0, "step": 665 }, { "epoch": 0.43106796116504853, "grad_norm": 4.890113268629648e-05, "learning_rate": 6.668607329173686e-05, "loss": 0.0, "step": 666 }, { "epoch": 0.43171521035598703, "grad_norm": 4.6269604354165494e-05, "learning_rate": 6.658356021477356e-05, "loss": 0.0, "step": 667 }, { "epoch": 0.4323624595469256, "grad_norm": 4.9595349992159754e-05, "learning_rate": 6.648096875124188e-05, "loss": 0.0, "step": 668 }, { "epoch": 0.4330097087378641, "grad_norm": 4.770729719894007e-05, "learning_rate": 6.637829938606743e-05, "loss": 0.0, "step": 669 }, { "epoch": 0.4336569579288026, "grad_norm": 4.913725933874957e-05, "learning_rate": 6.627555260454403e-05, "loss": 0.0, "step": 670 }, { "epoch": 0.4343042071197411, "grad_norm": 5.011080065742135e-05, "learning_rate": 6.617272889233142e-05, "loss": 0.0, "step": 671 }, { "epoch": 0.4349514563106796, "grad_norm": 4.914828241453506e-05, "learning_rate": 6.606982873545296e-05, "loss": 0.0, "step": 672 }, { "epoch": 0.4355987055016181, "grad_norm": 5.132549267727882e-05, "learning_rate": 6.596685262029339e-05, "loss": 0.0, "step": 673 }, { "epoch": 0.4362459546925566, "grad_norm": 5.059219620306976e-05, "learning_rate": 6.586380103359646e-05, "loss": 0.0, "step": 674 }, { "epoch": 0.4368932038834951, "grad_norm": 4.890982017968781e-05, "learning_rate": 6.576067446246263e-05, "loss": 0.0, "step": 675 }, { "epoch": 0.4375404530744337, "grad_norm": 5.15807478222996e-05, "learning_rate": 6.565747339434683e-05, "loss": 0.0, "step": 676 }, { "epoch": 0.4381877022653722, "grad_norm": 5.034143396187574e-05, "learning_rate": 6.555419831705613e-05, "loss": 0.0, "step": 677 }, { "epoch": 0.4388349514563107, "grad_norm": 4.819418609258719e-05, "learning_rate": 6.545084971874738e-05, "loss": 0.0, "step": 678 }, { "epoch": 0.4394822006472492, "grad_norm": 4.846400042879395e-05, "learning_rate": 6.534742808792498e-05, "loss": 0.0, "step": 679 }, { "epoch": 0.4401294498381877, "grad_norm": 4.771102248923853e-05, "learning_rate": 6.524393391343853e-05, "loss": 0.0, "step": 680 }, { "epoch": 0.4407766990291262, "grad_norm": 4.8666381189832464e-05, "learning_rate": 6.514036768448051e-05, "loss": 0.0, "step": 681 }, { "epoch": 0.4414239482200647, "grad_norm": 4.9625832616584376e-05, "learning_rate": 6.5036729890584e-05, "loss": 0.0, "step": 682 }, { "epoch": 0.44207119741100326, "grad_norm": 4.936987170367502e-05, "learning_rate": 6.493302102162037e-05, "loss": 0.0, "step": 683 }, { "epoch": 0.44271844660194176, "grad_norm": 4.840767360292375e-05, "learning_rate": 6.482924156779692e-05, "loss": 0.0, "step": 684 }, { "epoch": 0.44336569579288027, "grad_norm": 4.722722223959863e-05, "learning_rate": 6.472539201965457e-05, "loss": 0.0, "step": 685 }, { "epoch": 0.44401294498381877, "grad_norm": 4.770583109348081e-05, "learning_rate": 6.462147286806559e-05, "loss": 0.0, "step": 686 }, { "epoch": 0.4446601941747573, "grad_norm": 4.8435787903144956e-05, "learning_rate": 6.451748460423126e-05, "loss": 0.0, "step": 687 }, { "epoch": 0.4453074433656958, "grad_norm": 5.057609814684838e-05, "learning_rate": 6.441342771967951e-05, "loss": 0.0, "step": 688 }, { "epoch": 0.4459546925566343, "grad_norm": 4.961356899002567e-05, "learning_rate": 6.430930270626261e-05, "loss": 0.0, "step": 689 }, { "epoch": 0.44660194174757284, "grad_norm": 4.961855665897019e-05, "learning_rate": 6.42051100561549e-05, "loss": 0.0, "step": 690 }, { "epoch": 0.44724919093851134, "grad_norm": 4.912636723020114e-05, "learning_rate": 6.41008502618504e-05, "loss": 0.0, "step": 691 }, { "epoch": 0.44789644012944985, "grad_norm": 4.8427446017740294e-05, "learning_rate": 6.399652381616052e-05, "loss": 0.0, "step": 692 }, { "epoch": 0.44854368932038835, "grad_norm": 4.674507363233715e-05, "learning_rate": 6.389213121221166e-05, "loss": 0.0, "step": 693 }, { "epoch": 0.44919093851132685, "grad_norm": 4.937741323374212e-05, "learning_rate": 6.378767294344304e-05, "loss": 0.0, "step": 694 }, { "epoch": 0.44983818770226536, "grad_norm": 4.841889312956482e-05, "learning_rate": 6.368314950360415e-05, "loss": 0.0, "step": 695 }, { "epoch": 0.45048543689320386, "grad_norm": 4.937868288834579e-05, "learning_rate": 6.35785613867526e-05, "loss": 0.0, "step": 696 }, { "epoch": 0.4511326860841424, "grad_norm": 4.889784031547606e-05, "learning_rate": 6.347390908725168e-05, "loss": 0.0, "step": 697 }, { "epoch": 0.4517799352750809, "grad_norm": 5.035642971051857e-05, "learning_rate": 6.336919309976809e-05, "loss": 0.0, "step": 698 }, { "epoch": 0.4524271844660194, "grad_norm": 4.410008841659874e-05, "learning_rate": 6.326441391926952e-05, "loss": 0.0, "step": 699 }, { "epoch": 0.45307443365695793, "grad_norm": 4.744217949337326e-05, "learning_rate": 6.31595720410224e-05, "loss": 0.0, "step": 700 }, { "epoch": 0.45372168284789643, "grad_norm": 5.192052776692435e-05, "learning_rate": 6.305466796058949e-05, "loss": 0.0, "step": 701 }, { "epoch": 0.45436893203883494, "grad_norm": 4.217282184981741e-05, "learning_rate": 6.294970217382758e-05, "loss": 0.0, "step": 702 }, { "epoch": 0.45501618122977344, "grad_norm": 4.672434079111554e-05, "learning_rate": 6.284467517688515e-05, "loss": 0.0, "step": 703 }, { "epoch": 0.455663430420712, "grad_norm": 5.224458800512366e-05, "learning_rate": 6.273958746619994e-05, "loss": 0.0, "step": 704 }, { "epoch": 0.4563106796116505, "grad_norm": 4.629791874322109e-05, "learning_rate": 6.263443953849674e-05, "loss": 0.0, "step": 705 }, { "epoch": 0.456957928802589, "grad_norm": 4.8453850467922166e-05, "learning_rate": 6.252923189078495e-05, "loss": 0.0, "step": 706 }, { "epoch": 0.4576051779935275, "grad_norm": 4.7916651965351775e-05, "learning_rate": 6.242396502035622e-05, "loss": 0.0, "step": 707 }, { "epoch": 0.458252427184466, "grad_norm": 4.4088556023780257e-05, "learning_rate": 6.231863942478217e-05, "loss": 0.0, "step": 708 }, { "epoch": 0.4588996763754045, "grad_norm": 4.866320523433387e-05, "learning_rate": 6.221325560191203e-05, "loss": 0.0, "step": 709 }, { "epoch": 0.459546925566343, "grad_norm": 4.6970821131253615e-05, "learning_rate": 6.210781404987016e-05, "loss": 0.0, "step": 710 }, { "epoch": 0.4601941747572815, "grad_norm": 5.0590839236974716e-05, "learning_rate": 6.200231526705386e-05, "loss": 0.0, "step": 711 }, { "epoch": 0.4608414239482201, "grad_norm": 4.982233440387063e-05, "learning_rate": 6.189675975213094e-05, "loss": 0.0, "step": 712 }, { "epoch": 0.4614886731391586, "grad_norm": 4.5049651816952974e-05, "learning_rate": 6.179114800403738e-05, "loss": 0.0, "step": 713 }, { "epoch": 0.4621359223300971, "grad_norm": 4.480017742025666e-05, "learning_rate": 6.168548052197492e-05, "loss": 0.0, "step": 714 }, { "epoch": 0.4627831715210356, "grad_norm": 4.360647653811611e-05, "learning_rate": 6.157975780540877e-05, "loss": 0.0, "step": 715 }, { "epoch": 0.4634304207119741, "grad_norm": 4.217790046823211e-05, "learning_rate": 6.147398035406523e-05, "loss": 0.0, "step": 716 }, { "epoch": 0.4640776699029126, "grad_norm": 4.674730007536709e-05, "learning_rate": 6.136814866792928e-05, "loss": 0.0, "step": 717 }, { "epoch": 0.4647249190938511, "grad_norm": 4.721017103292979e-05, "learning_rate": 6.126226324724231e-05, "loss": 0.0, "step": 718 }, { "epoch": 0.46537216828478967, "grad_norm": 4.769161023432389e-05, "learning_rate": 6.115632459249963e-05, "loss": 0.0, "step": 719 }, { "epoch": 0.46601941747572817, "grad_norm": 4.67365316580981e-05, "learning_rate": 6.105033320444824e-05, "loss": 0.0, "step": 720 }, { "epoch": 0.4666666666666667, "grad_norm": 4.8410933231934905e-05, "learning_rate": 6.094428958408439e-05, "loss": 0.0, "step": 721 }, { "epoch": 0.4673139158576052, "grad_norm": 4.8187197535298765e-05, "learning_rate": 6.083819423265116e-05, "loss": 0.0, "step": 722 }, { "epoch": 0.4679611650485437, "grad_norm": 4.889259798801504e-05, "learning_rate": 6.073204765163621e-05, "loss": 0.0, "step": 723 }, { "epoch": 0.4686084142394822, "grad_norm": 4.845989678869955e-05, "learning_rate": 6.062585034276935e-05, "loss": 0.0, "step": 724 }, { "epoch": 0.4692556634304207, "grad_norm": 4.8425132263218984e-05, "learning_rate": 6.0519602808020136e-05, "loss": 0.0, "step": 725 }, { "epoch": 0.46990291262135925, "grad_norm": 4.724468453787267e-05, "learning_rate": 6.0413305549595546e-05, "loss": 0.0, "step": 726 }, { "epoch": 0.47055016181229775, "grad_norm": 4.559083026833832e-05, "learning_rate": 6.0306959069937584e-05, "loss": 0.0, "step": 727 }, { "epoch": 0.47119741100323626, "grad_norm": 4.701264697359875e-05, "learning_rate": 6.020056387172093e-05, "loss": 0.0, "step": 728 }, { "epoch": 0.47184466019417476, "grad_norm": 4.7216435632435605e-05, "learning_rate": 6.009412045785051e-05, "loss": 0.0, "step": 729 }, { "epoch": 0.47249190938511326, "grad_norm": 4.624966823030263e-05, "learning_rate": 5.9987629331459206e-05, "loss": 0.0, "step": 730 }, { "epoch": 0.47313915857605177, "grad_norm": 4.9847843911265954e-05, "learning_rate": 5.9881090995905375e-05, "loss": 0.0, "step": 731 }, { "epoch": 0.47378640776699027, "grad_norm": 4.362803156254813e-05, "learning_rate": 5.9774505954770546e-05, "loss": 0.0, "step": 732 }, { "epoch": 0.47443365695792883, "grad_norm": 4.6514705900335684e-05, "learning_rate": 5.9667874711857e-05, "loss": 0.0, "step": 733 }, { "epoch": 0.47508090614886733, "grad_norm": 4.6251367166405544e-05, "learning_rate": 5.9561197771185416e-05, "loss": 0.0, "step": 734 }, { "epoch": 0.47572815533980584, "grad_norm": 4.5785654947394505e-05, "learning_rate": 5.9454475636992476e-05, "loss": 0.0, "step": 735 }, { "epoch": 0.47637540453074434, "grad_norm": 4.218309550196864e-05, "learning_rate": 5.934770881372849e-05, "loss": 0.0, "step": 736 }, { "epoch": 0.47702265372168284, "grad_norm": 4.553909820970148e-05, "learning_rate": 5.924089780605496e-05, "loss": 0.0, "step": 737 }, { "epoch": 0.47766990291262135, "grad_norm": 4.5806194975739345e-05, "learning_rate": 5.9134043118842284e-05, "loss": 0.0, "step": 738 }, { "epoch": 0.47831715210355985, "grad_norm": 4.506959521677345e-05, "learning_rate": 5.9027145257167306e-05, "loss": 0.0, "step": 739 }, { "epoch": 0.47896440129449835, "grad_norm": 4.746875856653787e-05, "learning_rate": 5.892020472631092e-05, "loss": 0.0, "step": 740 }, { "epoch": 0.4796116504854369, "grad_norm": 4.4823613279731944e-05, "learning_rate": 5.8813222031755775e-05, "loss": 0.0, "step": 741 }, { "epoch": 0.4802588996763754, "grad_norm": 4.484160672291182e-05, "learning_rate": 5.870619767918376e-05, "loss": 0.0, "step": 742 }, { "epoch": 0.4809061488673139, "grad_norm": 4.4349209929350764e-05, "learning_rate": 5.859913217447367e-05, "loss": 0.0, "step": 743 }, { "epoch": 0.4815533980582524, "grad_norm": 4.578748848871328e-05, "learning_rate": 5.849202602369883e-05, "loss": 0.0, "step": 744 }, { "epoch": 0.48220064724919093, "grad_norm": 4.12247609347105e-05, "learning_rate": 5.838487973312472e-05, "loss": 0.0, "step": 745 }, { "epoch": 0.48284789644012943, "grad_norm": 3.980035398853943e-05, "learning_rate": 5.82776938092065e-05, "loss": 0.0, "step": 746 }, { "epoch": 0.48349514563106794, "grad_norm": 4.1708455682964996e-05, "learning_rate": 5.81704687585867e-05, "loss": 0.0, "step": 747 }, { "epoch": 0.4841423948220065, "grad_norm": 4.09717031288892e-05, "learning_rate": 5.8063205088092786e-05, "loss": 0.0, "step": 748 }, { "epoch": 0.484789644012945, "grad_norm": 4.625094879884273e-05, "learning_rate": 5.795590330473476e-05, "loss": 0.0, "step": 749 }, { "epoch": 0.4854368932038835, "grad_norm": 4.768732105731033e-05, "learning_rate": 5.784856391570279e-05, "loss": 0.0, "step": 750 }, { "epoch": 0.486084142394822, "grad_norm": 4.750502193928696e-05, "learning_rate": 5.774118742836478e-05, "loss": 0.0, "step": 751 }, { "epoch": 0.4867313915857605, "grad_norm": 4.3600306526059285e-05, "learning_rate": 5.763377435026403e-05, "loss": 0.0, "step": 752 }, { "epoch": 0.487378640776699, "grad_norm": 4.24023273808416e-05, "learning_rate": 5.752632518911673e-05, "loss": 0.0, "step": 753 }, { "epoch": 0.4880258899676375, "grad_norm": 4.360372622613795e-05, "learning_rate": 5.7418840452809654e-05, "loss": 0.0, "step": 754 }, { "epoch": 0.4886731391585761, "grad_norm": 4.409926259540953e-05, "learning_rate": 5.731132064939777e-05, "loss": 0.0, "step": 755 }, { "epoch": 0.4893203883495146, "grad_norm": 4.003573485533707e-05, "learning_rate": 5.720376628710176e-05, "loss": 0.0, "step": 756 }, { "epoch": 0.4899676375404531, "grad_norm": 4.168773739365861e-05, "learning_rate": 5.7096177874305654e-05, "loss": 0.0, "step": 757 }, { "epoch": 0.4906148867313916, "grad_norm": 4.455407542991452e-05, "learning_rate": 5.698855591955447e-05, "loss": 0.0, "step": 758 }, { "epoch": 0.4912621359223301, "grad_norm": 4.5999982830835506e-05, "learning_rate": 5.6880900931551703e-05, "loss": 0.0, "step": 759 }, { "epoch": 0.4919093851132686, "grad_norm": 4.0020739106694236e-05, "learning_rate": 5.677321341915707e-05, "loss": 0.0, "step": 760 }, { "epoch": 0.4925566343042071, "grad_norm": 4.288522177375853e-05, "learning_rate": 5.666549389138395e-05, "loss": 0.0, "step": 761 }, { "epoch": 0.49320388349514566, "grad_norm": 4.168828672845848e-05, "learning_rate": 5.655774285739709e-05, "loss": 0.0, "step": 762 }, { "epoch": 0.49385113268608416, "grad_norm": 4.145455750403926e-05, "learning_rate": 5.644996082651017e-05, "loss": 0.0, "step": 763 }, { "epoch": 0.49449838187702266, "grad_norm": 4.4583764974959195e-05, "learning_rate": 5.634214830818335e-05, "loss": 0.0, "step": 764 }, { "epoch": 0.49514563106796117, "grad_norm": 4.145608181715943e-05, "learning_rate": 5.623430581202091e-05, "loss": 0.0, "step": 765 }, { "epoch": 0.49579288025889967, "grad_norm": 4.1686416807351634e-05, "learning_rate": 5.612643384776886e-05, "loss": 0.0, "step": 766 }, { "epoch": 0.4964401294498382, "grad_norm": 4.24332611146383e-05, "learning_rate": 5.601853292531245e-05, "loss": 0.0, "step": 767 }, { "epoch": 0.4970873786407767, "grad_norm": 4.4572119804797694e-05, "learning_rate": 5.591060355467385e-05, "loss": 0.0, "step": 768 }, { "epoch": 0.49773462783171524, "grad_norm": 4.5774020691169426e-05, "learning_rate": 5.5802646246009626e-05, "loss": 0.0, "step": 769 }, { "epoch": 0.49838187702265374, "grad_norm": 4.267479744157754e-05, "learning_rate": 5.569466150960852e-05, "loss": 0.0, "step": 770 }, { "epoch": 0.49902912621359224, "grad_norm": 4.0287806768901646e-05, "learning_rate": 5.558664985588879e-05, "loss": 0.0, "step": 771 }, { "epoch": 0.49967637540453075, "grad_norm": 4.5812648750143126e-05, "learning_rate": 5.547861179539604e-05, "loss": 0.0, "step": 772 }, { "epoch": 0.5003236245954693, "grad_norm": 4.697679469245486e-05, "learning_rate": 5.537054783880061e-05, "loss": 0.0, "step": 773 }, { "epoch": 0.5009708737864078, "grad_norm": 4.484185774344951e-05, "learning_rate": 5.526245849689528e-05, "loss": 0.0, "step": 774 }, { "epoch": 0.5016181229773463, "grad_norm": 4.5308384869713336e-05, "learning_rate": 5.51543442805928e-05, "loss": 0.0, "step": 775 }, { "epoch": 0.5022653721682848, "grad_norm": 4.4594686187338084e-05, "learning_rate": 5.504620570092351e-05, "loss": 0.0, "step": 776 }, { "epoch": 0.5029126213592233, "grad_norm": 4.171795444563031e-05, "learning_rate": 5.4938043269032944e-05, "loss": 0.0, "step": 777 }, { "epoch": 0.5035598705501618, "grad_norm": 4.628577153198421e-05, "learning_rate": 5.4829857496179335e-05, "loss": 0.0, "step": 778 }, { "epoch": 0.5042071197411003, "grad_norm": 3.835981624433771e-05, "learning_rate": 5.472164889373123e-05, "loss": 0.0, "step": 779 }, { "epoch": 0.5048543689320388, "grad_norm": 4.481455835048109e-05, "learning_rate": 5.4613417973165106e-05, "loss": 0.0, "step": 780 }, { "epoch": 0.5055016181229773, "grad_norm": 4.41069933003746e-05, "learning_rate": 5.450516524606296e-05, "loss": 0.0, "step": 781 }, { "epoch": 0.5061488673139158, "grad_norm": 4.291281948098913e-05, "learning_rate": 5.439689122410982e-05, "loss": 0.0, "step": 782 }, { "epoch": 0.5067961165048543, "grad_norm": 4.5069289626553655e-05, "learning_rate": 5.428859641909142e-05, "loss": 0.0, "step": 783 }, { "epoch": 0.5074433656957928, "grad_norm": 4.146617720834911e-05, "learning_rate": 5.418028134289167e-05, "loss": 0.0, "step": 784 }, { "epoch": 0.5080906148867314, "grad_norm": 4.3376108806114644e-05, "learning_rate": 5.4071946507490336e-05, "loss": 0.0, "step": 785 }, { "epoch": 0.5087378640776699, "grad_norm": 4.409583561937325e-05, "learning_rate": 5.396359242496056e-05, "loss": 0.0, "step": 786 }, { "epoch": 0.5093851132686085, "grad_norm": 4.265968163963407e-05, "learning_rate": 5.385521960746649e-05, "loss": 0.0, "step": 787 }, { "epoch": 0.510032362459547, "grad_norm": 4.195278597762808e-05, "learning_rate": 5.374682856726081e-05, "loss": 0.0, "step": 788 }, { "epoch": 0.5106796116504855, "grad_norm": 4.387125591165386e-05, "learning_rate": 5.363841981668235e-05, "loss": 0.0, "step": 789 }, { "epoch": 0.511326860841424, "grad_norm": 4.073786476510577e-05, "learning_rate": 5.3529993868153604e-05, "loss": 0.0, "step": 790 }, { "epoch": 0.5119741100323625, "grad_norm": 4.3137239117641e-05, "learning_rate": 5.3421551234178436e-05, "loss": 0.0, "step": 791 }, { "epoch": 0.512621359223301, "grad_norm": 4.4347219954943284e-05, "learning_rate": 5.331309242733953e-05, "loss": 0.0, "step": 792 }, { "epoch": 0.5132686084142395, "grad_norm": 4.243478906573728e-05, "learning_rate": 5.320461796029601e-05, "loss": 0.0, "step": 793 }, { "epoch": 0.513915857605178, "grad_norm": 3.9776128687663004e-05, "learning_rate": 5.3096128345781046e-05, "loss": 0.0, "step": 794 }, { "epoch": 0.5145631067961165, "grad_norm": 3.715413549798541e-05, "learning_rate": 5.29876240965994e-05, "loss": 0.0, "step": 795 }, { "epoch": 0.515210355987055, "grad_norm": 3.713977639563382e-05, "learning_rate": 5.287910572562499e-05, "loss": 0.0, "step": 796 }, { "epoch": 0.5158576051779935, "grad_norm": 3.929944432456978e-05, "learning_rate": 5.27705737457985e-05, "loss": 0.0, "step": 797 }, { "epoch": 0.516504854368932, "grad_norm": 4.098166391486302e-05, "learning_rate": 5.2662028670124954e-05, "loss": 0.0, "step": 798 }, { "epoch": 0.5171521035598705, "grad_norm": 4.410306064528413e-05, "learning_rate": 5.255347101167127e-05, "loss": 0.0, "step": 799 }, { "epoch": 0.517799352750809, "grad_norm": 4.0035927668213844e-05, "learning_rate": 5.244490128356381e-05, "loss": 0.0, "step": 800 }, { "epoch": 0.5184466019417475, "grad_norm": 4.335114863351919e-05, "learning_rate": 5.2336319998986e-05, "loss": 0.0, "step": 801 }, { "epoch": 0.5190938511326861, "grad_norm": 4.144165723118931e-05, "learning_rate": 5.222772767117593e-05, "loss": 0.0, "step": 802 }, { "epoch": 0.5197411003236246, "grad_norm": 3.7855093978578225e-05, "learning_rate": 5.211912481342386e-05, "loss": 0.0, "step": 803 }, { "epoch": 0.5203883495145631, "grad_norm": 4.216072920826264e-05, "learning_rate": 5.201051193906981e-05, "loss": 0.0, "step": 804 }, { "epoch": 0.5210355987055016, "grad_norm": 4.026843816973269e-05, "learning_rate": 5.190188956150115e-05, "loss": 0.0, "step": 805 }, { "epoch": 0.5216828478964401, "grad_norm": 3.954804560635239e-05, "learning_rate": 5.179325819415019e-05, "loss": 0.0, "step": 806 }, { "epoch": 0.5223300970873787, "grad_norm": 4.337954305810854e-05, "learning_rate": 5.168461835049172e-05, "loss": 0.0, "step": 807 }, { "epoch": 0.5229773462783172, "grad_norm": 4.708663254859857e-05, "learning_rate": 5.1575970544040595e-05, "loss": 0.0, "step": 808 }, { "epoch": 0.5236245954692557, "grad_norm": 4.1917221096809953e-05, "learning_rate": 5.1467315288349296e-05, "loss": 0.0, "step": 809 }, { "epoch": 0.5242718446601942, "grad_norm": 3.9785547414794564e-05, "learning_rate": 5.135865309700556e-05, "loss": 0.0, "step": 810 }, { "epoch": 0.5249190938511327, "grad_norm": 3.7391633668448776e-05, "learning_rate": 5.124998448362984e-05, "loss": 0.0, "step": 811 }, { "epoch": 0.5255663430420712, "grad_norm": 4.0735230868449435e-05, "learning_rate": 5.1141309961872985e-05, "loss": 0.0, "step": 812 }, { "epoch": 0.5262135922330097, "grad_norm": 4.2899075197055936e-05, "learning_rate": 5.1032630045413785e-05, "loss": 0.0, "step": 813 }, { "epoch": 0.5268608414239482, "grad_norm": 3.8340593164321035e-05, "learning_rate": 5.092394524795649e-05, "loss": 0.0, "step": 814 }, { "epoch": 0.5275080906148867, "grad_norm": 4.2898951505776495e-05, "learning_rate": 5.081525608322847e-05, "loss": 0.0, "step": 815 }, { "epoch": 0.5281553398058253, "grad_norm": 4.168473242316395e-05, "learning_rate": 5.0706563064977685e-05, "loss": 0.0, "step": 816 }, { "epoch": 0.5288025889967638, "grad_norm": 3.762538472074084e-05, "learning_rate": 5.059786670697034e-05, "loss": 0.0, "step": 817 }, { "epoch": 0.5294498381877023, "grad_norm": 3.9535229007015005e-05, "learning_rate": 5.0489167522988426e-05, "loss": 0.0, "step": 818 }, { "epoch": 0.5300970873786408, "grad_norm": 4.0033439290709794e-05, "learning_rate": 5.038046602682728e-05, "loss": 0.0, "step": 819 }, { "epoch": 0.5307443365695793, "grad_norm": 4.124932820559479e-05, "learning_rate": 5.027176273229317e-05, "loss": 0.0, "step": 820 }, { "epoch": 0.5313915857605178, "grad_norm": 3.931449828087352e-05, "learning_rate": 5.016305815320088e-05, "loss": 0.0, "step": 821 }, { "epoch": 0.5320388349514563, "grad_norm": 4.170251122559421e-05, "learning_rate": 5.0054352803371253e-05, "loss": 0.0, "step": 822 }, { "epoch": 0.5326860841423948, "grad_norm": 4.79331320093479e-05, "learning_rate": 4.994564719662876e-05, "loss": 0.0, "step": 823 }, { "epoch": 0.5333333333333333, "grad_norm": 4.363759217085317e-05, "learning_rate": 4.9836941846799135e-05, "loss": 0.0, "step": 824 }, { "epoch": 0.5339805825242718, "grad_norm": 4.2189978557871655e-05, "learning_rate": 4.972823726770685e-05, "loss": 0.0, "step": 825 }, { "epoch": 0.5346278317152103, "grad_norm": 4.4108546717325225e-05, "learning_rate": 4.961953397317274e-05, "loss": 0.0, "step": 826 }, { "epoch": 0.5352750809061488, "grad_norm": 4.14824862673413e-05, "learning_rate": 4.951083247701158e-05, "loss": 0.0, "step": 827 }, { "epoch": 0.5359223300970873, "grad_norm": 4.5063185098115355e-05, "learning_rate": 4.940213329302968e-05, "loss": 0.0, "step": 828 }, { "epoch": 0.5365695792880258, "grad_norm": 4.38556817243807e-05, "learning_rate": 4.929343693502233e-05, "loss": 0.0, "step": 829 }, { "epoch": 0.5372168284789643, "grad_norm": 4.098743374925107e-05, "learning_rate": 4.918474391677154e-05, "loss": 0.0, "step": 830 }, { "epoch": 0.537864077669903, "grad_norm": 4.2665244109230116e-05, "learning_rate": 4.907605475204352e-05, "loss": 0.0, "step": 831 }, { "epoch": 0.5385113268608415, "grad_norm": 3.908294820575975e-05, "learning_rate": 4.8967369954586226e-05, "loss": 0.0, "step": 832 }, { "epoch": 0.53915857605178, "grad_norm": 3.9310954889515415e-05, "learning_rate": 4.885869003812703e-05, "loss": 0.0, "step": 833 }, { "epoch": 0.5398058252427185, "grad_norm": 3.811647547991015e-05, "learning_rate": 4.875001551637017e-05, "loss": 0.0, "step": 834 }, { "epoch": 0.540453074433657, "grad_norm": 4.3149182602064684e-05, "learning_rate": 4.864134690299445e-05, "loss": 0.0, "step": 835 }, { "epoch": 0.5411003236245955, "grad_norm": 4.5287582906894386e-05, "learning_rate": 4.85326847116507e-05, "loss": 0.0, "step": 836 }, { "epoch": 0.541747572815534, "grad_norm": 4.122861719224602e-05, "learning_rate": 4.8424029455959416e-05, "loss": 0.0, "step": 837 }, { "epoch": 0.5423948220064725, "grad_norm": 3.9791273593436927e-05, "learning_rate": 4.831538164950828e-05, "loss": 0.0, "step": 838 }, { "epoch": 0.543042071197411, "grad_norm": 3.930980165023357e-05, "learning_rate": 4.820674180584983e-05, "loss": 0.0, "step": 839 }, { "epoch": 0.5436893203883495, "grad_norm": 3.8583581044804305e-05, "learning_rate": 4.8098110438498874e-05, "loss": 0.0, "step": 840 }, { "epoch": 0.544336569579288, "grad_norm": 3.978141467086971e-05, "learning_rate": 4.798948806093021e-05, "loss": 0.0, "step": 841 }, { "epoch": 0.5449838187702265, "grad_norm": 3.954466956201941e-05, "learning_rate": 4.788087518657615e-05, "loss": 0.0, "step": 842 }, { "epoch": 0.545631067961165, "grad_norm": 3.810201451415196e-05, "learning_rate": 4.777227232882407e-05, "loss": 0.0, "step": 843 }, { "epoch": 0.5462783171521035, "grad_norm": 3.931068567908369e-05, "learning_rate": 4.766368000101401e-05, "loss": 0.0, "step": 844 }, { "epoch": 0.5469255663430421, "grad_norm": 3.9310780266532674e-05, "learning_rate": 4.755509871643621e-05, "loss": 0.0, "step": 845 }, { "epoch": 0.5475728155339806, "grad_norm": 3.284717968199402e-05, "learning_rate": 4.744652898832874e-05, "loss": 0.0, "step": 846 }, { "epoch": 0.5482200647249191, "grad_norm": 3.691027086460963e-05, "learning_rate": 4.7337971329875044e-05, "loss": 0.0, "step": 847 }, { "epoch": 0.5488673139158576, "grad_norm": 3.7618287024088204e-05, "learning_rate": 4.72294262542015e-05, "loss": 0.0, "step": 848 }, { "epoch": 0.5495145631067961, "grad_norm": 3.642432056949474e-05, "learning_rate": 4.712089427437504e-05, "loss": 0.0, "step": 849 }, { "epoch": 0.5501618122977346, "grad_norm": 4.0243976400233805e-05, "learning_rate": 4.701237590340063e-05, "loss": 0.0, "step": 850 }, { "epoch": 0.5508090614886731, "grad_norm": 5.5335309298243374e-05, "learning_rate": 4.6903871654218965e-05, "loss": 0.0, "step": 851 }, { "epoch": 0.5514563106796116, "grad_norm": 4.105950938537717e-05, "learning_rate": 4.6795382039704005e-05, "loss": 0.0, "step": 852 }, { "epoch": 0.5521035598705502, "grad_norm": 4.026710303151049e-05, "learning_rate": 4.668690757266049e-05, "loss": 0.0, "step": 853 }, { "epoch": 0.5527508090614887, "grad_norm": 3.9539718272862956e-05, "learning_rate": 4.6578448765821575e-05, "loss": 0.0, "step": 854 }, { "epoch": 0.5533980582524272, "grad_norm": 3.761908374144696e-05, "learning_rate": 4.647000613184641e-05, "loss": 0.0, "step": 855 }, { "epoch": 0.5540453074433657, "grad_norm": 3.8575835787924007e-05, "learning_rate": 4.636158018331766e-05, "loss": 0.0, "step": 856 }, { "epoch": 0.5546925566343042, "grad_norm": 3.5938013752456754e-05, "learning_rate": 4.625317143273919e-05, "loss": 0.0, "step": 857 }, { "epoch": 0.5553398058252427, "grad_norm": 4.050503048347309e-05, "learning_rate": 4.614478039253351e-05, "loss": 0.0, "step": 858 }, { "epoch": 0.5559870550161812, "grad_norm": 3.904608092852868e-05, "learning_rate": 4.6036407575039444e-05, "loss": 0.0, "step": 859 }, { "epoch": 0.5566343042071198, "grad_norm": 3.999320324510336e-05, "learning_rate": 4.592805349250969e-05, "loss": 0.0, "step": 860 }, { "epoch": 0.5572815533980583, "grad_norm": 3.761429252335802e-05, "learning_rate": 4.5819718657108354e-05, "loss": 0.0, "step": 861 }, { "epoch": 0.5579288025889968, "grad_norm": 3.61740640073549e-05, "learning_rate": 4.571140358090859e-05, "loss": 0.0, "step": 862 }, { "epoch": 0.5585760517799353, "grad_norm": 3.4761589631671086e-05, "learning_rate": 4.560310877589018e-05, "loss": 0.0, "step": 863 }, { "epoch": 0.5592233009708738, "grad_norm": 3.8587808376178145e-05, "learning_rate": 4.549483475393706e-05, "loss": 0.0, "step": 864 }, { "epoch": 0.5598705501618123, "grad_norm": 4.0003327740123495e-05, "learning_rate": 4.5386582026834906e-05, "loss": 0.0, "step": 865 }, { "epoch": 0.5605177993527508, "grad_norm": 3.78553886548616e-05, "learning_rate": 4.527835110626878e-05, "loss": 0.0, "step": 866 }, { "epoch": 0.5611650485436893, "grad_norm": 4.241176065988839e-05, "learning_rate": 4.517014250382067e-05, "loss": 0.0, "step": 867 }, { "epoch": 0.5618122977346278, "grad_norm": 3.667032069643028e-05, "learning_rate": 4.506195673096705e-05, "loss": 0.0, "step": 868 }, { "epoch": 0.5624595469255663, "grad_norm": 3.9302700315602124e-05, "learning_rate": 4.495379429907648e-05, "loss": 0.0, "step": 869 }, { "epoch": 0.5631067961165048, "grad_norm": 3.8357979065040126e-05, "learning_rate": 4.484565571940722e-05, "loss": 0.0, "step": 870 }, { "epoch": 0.5637540453074433, "grad_norm": 4.05075479648076e-05, "learning_rate": 4.473754150310475e-05, "loss": 0.0, "step": 871 }, { "epoch": 0.5644012944983818, "grad_norm": 3.977595770265907e-05, "learning_rate": 4.4629452161199415e-05, "loss": 0.0, "step": 872 }, { "epoch": 0.5650485436893203, "grad_norm": 3.810037742368877e-05, "learning_rate": 4.452138820460397e-05, "loss": 0.0, "step": 873 }, { "epoch": 0.565695792880259, "grad_norm": 4.028689727419987e-05, "learning_rate": 4.441335014411121e-05, "loss": 0.0, "step": 874 }, { "epoch": 0.5663430420711975, "grad_norm": 4.002659989055246e-05, "learning_rate": 4.43053384903915e-05, "loss": 0.0, "step": 875 }, { "epoch": 0.566990291262136, "grad_norm": 3.8862632209202275e-05, "learning_rate": 4.4197353753990386e-05, "loss": 0.0, "step": 876 }, { "epoch": 0.5676375404530745, "grad_norm": 3.978524910053238e-05, "learning_rate": 4.408939644532616e-05, "loss": 0.0, "step": 877 }, { "epoch": 0.568284789644013, "grad_norm": 3.716369246831164e-05, "learning_rate": 4.398146707468755e-05, "loss": 0.0, "step": 878 }, { "epoch": 0.5689320388349515, "grad_norm": 3.7393459933809936e-05, "learning_rate": 4.387356615223114e-05, "loss": 0.0, "step": 879 }, { "epoch": 0.56957928802589, "grad_norm": 4.025745511171408e-05, "learning_rate": 4.3765694187979085e-05, "loss": 0.0, "step": 880 }, { "epoch": 0.5702265372168285, "grad_norm": 3.8581474655075e-05, "learning_rate": 4.365785169181667e-05, "loss": 0.0, "step": 881 }, { "epoch": 0.570873786407767, "grad_norm": 3.882512464770116e-05, "learning_rate": 4.3550039173489845e-05, "loss": 0.0, "step": 882 }, { "epoch": 0.5715210355987055, "grad_norm": 3.666793054435402e-05, "learning_rate": 4.344225714260293e-05, "loss": 0.0, "step": 883 }, { "epoch": 0.572168284789644, "grad_norm": 3.6187768273521215e-05, "learning_rate": 4.3334506108616064e-05, "loss": 0.0, "step": 884 }, { "epoch": 0.5728155339805825, "grad_norm": 3.836168616544455e-05, "learning_rate": 4.322678658084294e-05, "loss": 0.0, "step": 885 }, { "epoch": 0.573462783171521, "grad_norm": 3.880737494910136e-05, "learning_rate": 4.31190990684483e-05, "loss": 0.0, "step": 886 }, { "epoch": 0.5741100323624595, "grad_norm": 4.146282299188897e-05, "learning_rate": 4.3011444080445545e-05, "loss": 0.0, "step": 887 }, { "epoch": 0.574757281553398, "grad_norm": 3.571038178051822e-05, "learning_rate": 4.290382212569434e-05, "loss": 0.0, "step": 888 }, { "epoch": 0.5754045307443366, "grad_norm": 3.737618317245506e-05, "learning_rate": 4.279623371289824e-05, "loss": 0.0, "step": 889 }, { "epoch": 0.5760517799352751, "grad_norm": 3.7148860428715125e-05, "learning_rate": 4.268867935060223e-05, "loss": 0.0, "step": 890 }, { "epoch": 0.5766990291262136, "grad_norm": 4.338242069934495e-05, "learning_rate": 4.258115954719036e-05, "loss": 0.0, "step": 891 }, { "epoch": 0.5773462783171521, "grad_norm": 3.595654197852127e-05, "learning_rate": 4.2473674810883304e-05, "loss": 0.0, "step": 892 }, { "epoch": 0.5779935275080906, "grad_norm": 3.618249320425093e-05, "learning_rate": 4.2366225649736e-05, "loss": 0.0, "step": 893 }, { "epoch": 0.5786407766990291, "grad_norm": 3.833202208625153e-05, "learning_rate": 4.225881257163523e-05, "loss": 0.0, "step": 894 }, { "epoch": 0.5792880258899676, "grad_norm": 3.235747863072902e-05, "learning_rate": 4.2151436084297216e-05, "loss": 0.0, "step": 895 }, { "epoch": 0.5799352750809061, "grad_norm": 3.378332985448651e-05, "learning_rate": 4.2044096695265245e-05, "loss": 0.0, "step": 896 }, { "epoch": 0.5805825242718446, "grad_norm": 3.2112446206156164e-05, "learning_rate": 4.1936794911907226e-05, "loss": 0.0, "step": 897 }, { "epoch": 0.5812297734627832, "grad_norm": 3.6183479096507654e-05, "learning_rate": 4.182953124141331e-05, "loss": 0.0, "step": 898 }, { "epoch": 0.5818770226537217, "grad_norm": 3.5949447919847444e-05, "learning_rate": 4.1722306190793495e-05, "loss": 0.0, "step": 899 }, { "epoch": 0.5825242718446602, "grad_norm": 4.0268481825478375e-05, "learning_rate": 4.161512026687528e-05, "loss": 0.0, "step": 900 }, { "epoch": 0.5831715210355987, "grad_norm": 3.71346395695582e-05, "learning_rate": 4.150797397630116e-05, "loss": 0.0, "step": 901 }, { "epoch": 0.5838187702265372, "grad_norm": 4.287898264010437e-05, "learning_rate": 4.140086782552635e-05, "loss": 0.0, "step": 902 }, { "epoch": 0.5844660194174758, "grad_norm": 3.8591711927438155e-05, "learning_rate": 4.1293802320816256e-05, "loss": 0.0, "step": 903 }, { "epoch": 0.5851132686084143, "grad_norm": 3.785927765420638e-05, "learning_rate": 4.118677796824424e-05, "loss": 0.0, "step": 904 }, { "epoch": 0.5857605177993528, "grad_norm": 3.592877328628674e-05, "learning_rate": 4.1079795273689084e-05, "loss": 0.0, "step": 905 }, { "epoch": 0.5864077669902913, "grad_norm": 4.05135506298393e-05, "learning_rate": 4.0972854742832705e-05, "loss": 0.0, "step": 906 }, { "epoch": 0.5870550161812298, "grad_norm": 3.644360913312994e-05, "learning_rate": 4.086595688115773e-05, "loss": 0.0, "step": 907 }, { "epoch": 0.5877022653721683, "grad_norm": 3.4268621675437316e-05, "learning_rate": 4.075910219394505e-05, "loss": 0.0, "step": 908 }, { "epoch": 0.5883495145631068, "grad_norm": 3.6900357372360304e-05, "learning_rate": 4.065229118627153e-05, "loss": 0.0, "step": 909 }, { "epoch": 0.5889967637540453, "grad_norm": 3.642131559900008e-05, "learning_rate": 4.054552436300752e-05, "loss": 0.0, "step": 910 }, { "epoch": 0.5896440129449838, "grad_norm": 3.785706576309167e-05, "learning_rate": 4.043880222881459e-05, "loss": 0.0, "step": 911 }, { "epoch": 0.5902912621359223, "grad_norm": 4.0750954212853685e-05, "learning_rate": 4.033212528814302e-05, "loss": 0.0, "step": 912 }, { "epoch": 0.5909385113268608, "grad_norm": 3.76205425709486e-05, "learning_rate": 4.022549404522947e-05, "loss": 0.0, "step": 913 }, { "epoch": 0.5915857605177993, "grad_norm": 3.833993832813576e-05, "learning_rate": 4.011890900409464e-05, "loss": 0.0, "step": 914 }, { "epoch": 0.5922330097087378, "grad_norm": 3.689317964017391e-05, "learning_rate": 4.001237066854081e-05, "loss": 0.0, "step": 915 }, { "epoch": 0.5928802588996763, "grad_norm": 3.47445638908539e-05, "learning_rate": 3.99058795421495e-05, "loss": 0.0, "step": 916 }, { "epoch": 0.5935275080906149, "grad_norm": 3.7394223909359425e-05, "learning_rate": 3.9799436128279076e-05, "loss": 0.0, "step": 917 }, { "epoch": 0.5941747572815534, "grad_norm": 3.7161229556659237e-05, "learning_rate": 3.969304093006242e-05, "loss": 0.0, "step": 918 }, { "epoch": 0.594822006472492, "grad_norm": 3.883292083628476e-05, "learning_rate": 3.9586694450404466e-05, "loss": 0.0, "step": 919 }, { "epoch": 0.5954692556634305, "grad_norm": 3.6900295526720583e-05, "learning_rate": 3.9480397191979876e-05, "loss": 0.0, "step": 920 }, { "epoch": 0.596116504854369, "grad_norm": 3.8830996345495805e-05, "learning_rate": 3.9374149657230645e-05, "loss": 0.0, "step": 921 }, { "epoch": 0.5967637540453075, "grad_norm": 3.668271529022604e-05, "learning_rate": 3.926795234836381e-05, "loss": 0.0, "step": 922 }, { "epoch": 0.597411003236246, "grad_norm": 3.8828358810860664e-05, "learning_rate": 3.916180576734886e-05, "loss": 0.0, "step": 923 }, { "epoch": 0.5980582524271845, "grad_norm": 3.7411311495816335e-05, "learning_rate": 3.9055710415915634e-05, "loss": 0.0, "step": 924 }, { "epoch": 0.598705501618123, "grad_norm": 3.9070429920684546e-05, "learning_rate": 3.894966679555177e-05, "loss": 0.0, "step": 925 }, { "epoch": 0.5993527508090615, "grad_norm": 3.715547427418642e-05, "learning_rate": 3.884367540750039e-05, "loss": 0.0, "step": 926 }, { "epoch": 0.6, "grad_norm": 3.6680274206446484e-05, "learning_rate": 3.873773675275771e-05, "loss": 0.0, "step": 927 }, { "epoch": 0.6006472491909385, "grad_norm": 3.979651955887675e-05, "learning_rate": 3.863185133207073e-05, "loss": 0.0, "step": 928 }, { "epoch": 0.601294498381877, "grad_norm": 3.645272954599932e-05, "learning_rate": 3.8526019645934785e-05, "loss": 0.0, "step": 929 }, { "epoch": 0.6019417475728155, "grad_norm": 3.4284861612832174e-05, "learning_rate": 3.842024219459124e-05, "loss": 0.0, "step": 930 }, { "epoch": 0.602588996763754, "grad_norm": 3.308402665425092e-05, "learning_rate": 3.831451947802509e-05, "loss": 0.0, "step": 931 }, { "epoch": 0.6032362459546926, "grad_norm": 3.666753400466405e-05, "learning_rate": 3.820885199596262e-05, "loss": 0.0, "step": 932 }, { "epoch": 0.6038834951456311, "grad_norm": 3.570736225810833e-05, "learning_rate": 3.8103240247869075e-05, "loss": 0.0, "step": 933 }, { "epoch": 0.6045307443365696, "grad_norm": 3.188321352354251e-05, "learning_rate": 3.7997684732946154e-05, "loss": 0.0, "step": 934 }, { "epoch": 0.6051779935275081, "grad_norm": 3.283888509031385e-05, "learning_rate": 3.789218595012986e-05, "loss": 0.0, "step": 935 }, { "epoch": 0.6058252427184466, "grad_norm": 3.595947055146098e-05, "learning_rate": 3.778674439808799e-05, "loss": 0.0, "step": 936 }, { "epoch": 0.6064724919093851, "grad_norm": 3.5464996472001076e-05, "learning_rate": 3.768136057521783e-05, "loss": 0.0, "step": 937 }, { "epoch": 0.6071197411003236, "grad_norm": 3.594781082938425e-05, "learning_rate": 3.757603497964379e-05, "loss": 0.0, "step": 938 }, { "epoch": 0.6077669902912621, "grad_norm": 3.618836490204558e-05, "learning_rate": 3.7470768109215066e-05, "loss": 0.0, "step": 939 }, { "epoch": 0.6084142394822006, "grad_norm": 3.260416997363791e-05, "learning_rate": 3.736556046150327e-05, "loss": 0.0, "step": 940 }, { "epoch": 0.6090614886731391, "grad_norm": 3.738398299901746e-05, "learning_rate": 3.726041253380007e-05, "loss": 0.0, "step": 941 }, { "epoch": 0.6097087378640776, "grad_norm": 3.4758362744469196e-05, "learning_rate": 3.715532482311487e-05, "loss": 0.0, "step": 942 }, { "epoch": 0.6103559870550161, "grad_norm": 3.355454100528732e-05, "learning_rate": 3.705029782617243e-05, "loss": 0.0, "step": 943 }, { "epoch": 0.6110032362459547, "grad_norm": 3.1404786568600684e-05, "learning_rate": 3.6945332039410526e-05, "loss": 0.0, "step": 944 }, { "epoch": 0.6116504854368932, "grad_norm": 3.1153653253568336e-05, "learning_rate": 3.684042795897761e-05, "loss": 0.0, "step": 945 }, { "epoch": 0.6122977346278318, "grad_norm": 3.008799467352219e-05, "learning_rate": 3.673558608073049e-05, "loss": 0.0, "step": 946 }, { "epoch": 0.6129449838187703, "grad_norm": 3.377695247763768e-05, "learning_rate": 3.663080690023192e-05, "loss": 0.0, "step": 947 }, { "epoch": 0.6135922330097088, "grad_norm": 3.330317122163251e-05, "learning_rate": 3.652609091274832e-05, "loss": 0.0, "step": 948 }, { "epoch": 0.6142394822006473, "grad_norm": 3.546431980794296e-05, "learning_rate": 3.64214386132474e-05, "loss": 0.0, "step": 949 }, { "epoch": 0.6148867313915858, "grad_norm": 3.834381641354412e-05, "learning_rate": 3.631685049639586e-05, "loss": 0.0, "step": 950 }, { "epoch": 0.6155339805825243, "grad_norm": 3.666271368274465e-05, "learning_rate": 3.621232705655697e-05, "loss": 0.0, "step": 951 }, { "epoch": 0.6161812297734628, "grad_norm": 3.378514156793244e-05, "learning_rate": 3.610786878778834e-05, "loss": 0.0, "step": 952 }, { "epoch": 0.6168284789644013, "grad_norm": 3.6062778235645965e-05, "learning_rate": 3.600347618383949e-05, "loss": 0.0, "step": 953 }, { "epoch": 0.6174757281553398, "grad_norm": 3.545705112628639e-05, "learning_rate": 3.589914973814962e-05, "loss": 0.0, "step": 954 }, { "epoch": 0.6181229773462783, "grad_norm": 3.283487603766844e-05, "learning_rate": 3.5794889943845115e-05, "loss": 0.0, "step": 955 }, { "epoch": 0.6187702265372168, "grad_norm": 3.691442543640733e-05, "learning_rate": 3.56906972937374e-05, "loss": 0.0, "step": 956 }, { "epoch": 0.6194174757281553, "grad_norm": 3.4522981877671555e-05, "learning_rate": 3.5586572280320505e-05, "loss": 0.0, "step": 957 }, { "epoch": 0.6200647249190938, "grad_norm": 3.784717773669399e-05, "learning_rate": 3.5482515395768744e-05, "loss": 0.0, "step": 958 }, { "epoch": 0.6207119741100323, "grad_norm": 3.904738332494162e-05, "learning_rate": 3.5378527131934415e-05, "loss": 0.0, "step": 959 }, { "epoch": 0.6213592233009708, "grad_norm": 3.809253030340187e-05, "learning_rate": 3.527460798034543e-05, "loss": 0.0, "step": 960 }, { "epoch": 0.6220064724919094, "grad_norm": 3.450105577940121e-05, "learning_rate": 3.5170758432203085e-05, "loss": 0.0, "step": 961 }, { "epoch": 0.6226537216828479, "grad_norm": 3.641380317276344e-05, "learning_rate": 3.506697897837963e-05, "loss": 0.0, "step": 962 }, { "epoch": 0.6233009708737864, "grad_norm": 3.715801358339377e-05, "learning_rate": 3.4963270109416005e-05, "loss": 0.0, "step": 963 }, { "epoch": 0.623948220064725, "grad_norm": 3.594182271626778e-05, "learning_rate": 3.485963231551952e-05, "loss": 0.0, "step": 964 }, { "epoch": 0.6245954692556634, "grad_norm": 3.523491614032537e-05, "learning_rate": 3.47560660865615e-05, "loss": 0.0, "step": 965 }, { "epoch": 0.625242718446602, "grad_norm": 2.9603990697069094e-05, "learning_rate": 3.465257191207505e-05, "loss": 0.0, "step": 966 }, { "epoch": 0.6258899676375405, "grad_norm": 3.475336416158825e-05, "learning_rate": 3.4549150281252636e-05, "loss": 0.0, "step": 967 }, { "epoch": 0.626537216828479, "grad_norm": 3.474638651823625e-05, "learning_rate": 3.4445801682943886e-05, "loss": 0.0, "step": 968 }, { "epoch": 0.6271844660194175, "grad_norm": 3.452376404311508e-05, "learning_rate": 3.434252660565318e-05, "loss": 0.0, "step": 969 }, { "epoch": 0.627831715210356, "grad_norm": 3.3316562621621415e-05, "learning_rate": 3.4239325537537386e-05, "loss": 0.0, "step": 970 }, { "epoch": 0.6284789644012945, "grad_norm": 3.381279020686634e-05, "learning_rate": 3.413619896640355e-05, "loss": 0.0, "step": 971 }, { "epoch": 0.629126213592233, "grad_norm": 3.9554368413519114e-05, "learning_rate": 3.403314737970661e-05, "loss": 0.0, "step": 972 }, { "epoch": 0.6297734627831715, "grad_norm": 3.548313179635443e-05, "learning_rate": 3.393017126454704e-05, "loss": 0.0, "step": 973 }, { "epoch": 0.63042071197411, "grad_norm": 3.715874481713399e-05, "learning_rate": 3.382727110766859e-05, "loss": 0.0, "step": 974 }, { "epoch": 0.6310679611650486, "grad_norm": 3.811107308138162e-05, "learning_rate": 3.372444739545598e-05, "loss": 0.0, "step": 975 }, { "epoch": 0.6317152103559871, "grad_norm": 3.667830242193304e-05, "learning_rate": 3.362170061393258e-05, "loss": 0.0, "step": 976 }, { "epoch": 0.6323624595469256, "grad_norm": 3.5484237741911784e-05, "learning_rate": 3.3519031248758134e-05, "loss": 0.0, "step": 977 }, { "epoch": 0.6330097087378641, "grad_norm": 3.380448106327094e-05, "learning_rate": 3.341643978522646e-05, "loss": 0.0, "step": 978 }, { "epoch": 0.6336569579288026, "grad_norm": 3.1414980185218155e-05, "learning_rate": 3.331392670826315e-05, "loss": 0.0, "step": 979 }, { "epoch": 0.6343042071197411, "grad_norm": 3.427364936214872e-05, "learning_rate": 3.321149250242329e-05, "loss": 0.0, "step": 980 }, { "epoch": 0.6349514563106796, "grad_norm": 3.476115671219304e-05, "learning_rate": 3.3109137651889146e-05, "loss": 0.0, "step": 981 }, { "epoch": 0.6355987055016181, "grad_norm": 3.497935540508479e-05, "learning_rate": 3.3006862640467876e-05, "loss": 0.0, "step": 982 }, { "epoch": 0.6362459546925566, "grad_norm": 2.8885409847134724e-05, "learning_rate": 3.290466795158932e-05, "loss": 0.0, "step": 983 }, { "epoch": 0.6368932038834951, "grad_norm": 3.3321553928544745e-05, "learning_rate": 3.2802554068303596e-05, "loss": 0.0, "step": 984 }, { "epoch": 0.6375404530744336, "grad_norm": 3.3801097742980346e-05, "learning_rate": 3.270052147327889e-05, "loss": 0.0, "step": 985 }, { "epoch": 0.6381877022653721, "grad_norm": 3.428285344853066e-05, "learning_rate": 3.259857064879913e-05, "loss": 0.0, "step": 986 }, { "epoch": 0.6388349514563106, "grad_norm": 3.284764534328133e-05, "learning_rate": 3.249670207676179e-05, "loss": 0.0, "step": 987 }, { "epoch": 0.6394822006472491, "grad_norm": 3.380359703442082e-05, "learning_rate": 3.23949162386755e-05, "loss": 0.0, "step": 988 }, { "epoch": 0.6401294498381876, "grad_norm": 3.570237458916381e-05, "learning_rate": 3.229321361565787e-05, "loss": 0.0, "step": 989 }, { "epoch": 0.6407766990291263, "grad_norm": 3.9065951568773016e-05, "learning_rate": 3.219159468843316e-05, "loss": 0.0, "step": 990 }, { "epoch": 0.6414239482200648, "grad_norm": 3.546904190443456e-05, "learning_rate": 3.2090059937329994e-05, "loss": 0.0, "step": 991 }, { "epoch": 0.6420711974110033, "grad_norm": 3.308341183583252e-05, "learning_rate": 3.1988609842279146e-05, "loss": 0.0, "step": 992 }, { "epoch": 0.6427184466019418, "grad_norm": 3.7390971556305885e-05, "learning_rate": 3.188724488281123e-05, "loss": 0.0, "step": 993 }, { "epoch": 0.6433656957928803, "grad_norm": 3.091857070103288e-05, "learning_rate": 3.1785965538054445e-05, "loss": 0.0, "step": 994 }, { "epoch": 0.6440129449838188, "grad_norm": 3.331078551127575e-05, "learning_rate": 3.168477228673231e-05, "loss": 0.0, "step": 995 }, { "epoch": 0.6446601941747573, "grad_norm": 3.259514778619632e-05, "learning_rate": 3.158366560716141e-05, "loss": 0.0, "step": 996 }, { "epoch": 0.6453074433656958, "grad_norm": 3.378557084943168e-05, "learning_rate": 3.148264597724908e-05, "loss": 0.0, "step": 997 }, { "epoch": 0.6459546925566343, "grad_norm": 3.1164061510935426e-05, "learning_rate": 3.138171387449125e-05, "loss": 0.0, "step": 998 }, { "epoch": 0.6466019417475728, "grad_norm": 3.163354631396942e-05, "learning_rate": 3.12808697759701e-05, "loss": 0.0, "step": 999 }, { "epoch": 0.6472491909385113, "grad_norm": 3.5229764762334526e-05, "learning_rate": 3.1180114158351856e-05, "loss": 0.0, "step": 1000 }, { "epoch": 0.6478964401294498, "grad_norm": 4.244422598276287e-05, "learning_rate": 3.107944749788449e-05, "loss": 0.0, "step": 1001 }, { "epoch": 0.6485436893203883, "grad_norm": 3.816741082118824e-05, "learning_rate": 3.097887027039551e-05, "loss": 0.0, "step": 1002 }, { "epoch": 0.6491909385113268, "grad_norm": 3.114972059847787e-05, "learning_rate": 3.087838295128969e-05, "loss": 0.0, "step": 1003 }, { "epoch": 0.6498381877022654, "grad_norm": 3.5686480259755626e-05, "learning_rate": 3.077798601554682e-05, "loss": 0.0, "step": 1004 }, { "epoch": 0.6504854368932039, "grad_norm": 3.976903826696798e-05, "learning_rate": 3.0677679937719495e-05, "loss": 0.0, "step": 1005 }, { "epoch": 0.6511326860841424, "grad_norm": 3.2581807317910716e-05, "learning_rate": 3.057746519193083e-05, "loss": 0.0, "step": 1006 }, { "epoch": 0.6517799352750809, "grad_norm": 3.283064506831579e-05, "learning_rate": 3.047734225187222e-05, "loss": 0.0, "step": 1007 }, { "epoch": 0.6524271844660194, "grad_norm": 3.35707918566186e-05, "learning_rate": 3.0377311590801145e-05, "loss": 0.0, "step": 1008 }, { "epoch": 0.6530744336569579, "grad_norm": 3.449892392382026e-05, "learning_rate": 3.027737368153888e-05, "loss": 0.0, "step": 1009 }, { "epoch": 0.6537216828478964, "grad_norm": 3.138006286462769e-05, "learning_rate": 3.0177528996468286e-05, "loss": 0.0, "step": 1010 }, { "epoch": 0.654368932038835, "grad_norm": 3.402231232030317e-05, "learning_rate": 3.007777800753161e-05, "loss": 0.0, "step": 1011 }, { "epoch": 0.6550161812297735, "grad_norm": 3.500101956888102e-05, "learning_rate": 2.9978121186228177e-05, "loss": 0.0, "step": 1012 }, { "epoch": 0.655663430420712, "grad_norm": 3.738741361303255e-05, "learning_rate": 2.9878559003612223e-05, "loss": 0.0, "step": 1013 }, { "epoch": 0.6563106796116505, "grad_norm": 3.007899795193225e-05, "learning_rate": 2.9779091930290638e-05, "loss": 0.0, "step": 1014 }, { "epoch": 0.656957928802589, "grad_norm": 3.378254405106418e-05, "learning_rate": 2.967972043642077e-05, "loss": 0.0, "step": 1015 }, { "epoch": 0.6576051779935275, "grad_norm": 3.473546894383617e-05, "learning_rate": 2.958044499170818e-05, "loss": 0.0, "step": 1016 }, { "epoch": 0.658252427184466, "grad_norm": 3.380428461241536e-05, "learning_rate": 2.9481266065404434e-05, "loss": 0.0, "step": 1017 }, { "epoch": 0.6588996763754046, "grad_norm": 4.190361869405024e-05, "learning_rate": 2.9382184126304834e-05, "loss": 0.0, "step": 1018 }, { "epoch": 0.6595469255663431, "grad_norm": 3.595928501454182e-05, "learning_rate": 2.9283199642746305e-05, "loss": 0.0, "step": 1019 }, { "epoch": 0.6601941747572816, "grad_norm": 3.523435225361027e-05, "learning_rate": 2.918431308260508e-05, "loss": 0.0, "step": 1020 }, { "epoch": 0.6608414239482201, "grad_norm": 3.7379188142949715e-05, "learning_rate": 2.9085524913294583e-05, "loss": 0.0, "step": 1021 }, { "epoch": 0.6614886731391586, "grad_norm": 3.40419755957555e-05, "learning_rate": 2.8986835601763096e-05, "loss": 0.0, "step": 1022 }, { "epoch": 0.6621359223300971, "grad_norm": 3.2842832297319546e-05, "learning_rate": 2.8888245614491682e-05, "loss": 0.0, "step": 1023 }, { "epoch": 0.6627831715210356, "grad_norm": 3.643509990070015e-05, "learning_rate": 2.8789755417491915e-05, "loss": 0.0, "step": 1024 }, { "epoch": 0.6634304207119741, "grad_norm": 3.5505388950696215e-05, "learning_rate": 2.869136547630364e-05, "loss": 0.0, "step": 1025 }, { "epoch": 0.6640776699029126, "grad_norm": 3.620008646976203e-05, "learning_rate": 2.859307625599288e-05, "loss": 0.0, "step": 1026 }, { "epoch": 0.6647249190938511, "grad_norm": 3.791077688219957e-05, "learning_rate": 2.8494888221149523e-05, "loss": 0.0, "step": 1027 }, { "epoch": 0.6653721682847896, "grad_norm": 3.3800261007854715e-05, "learning_rate": 2.839680183588518e-05, "loss": 0.0, "step": 1028 }, { "epoch": 0.6660194174757281, "grad_norm": 3.141585330013186e-05, "learning_rate": 2.8298817563831036e-05, "loss": 0.0, "step": 1029 }, { "epoch": 0.6666666666666666, "grad_norm": 3.379257759661414e-05, "learning_rate": 2.820093586813555e-05, "loss": 0.0, "step": 1030 }, { "epoch": 0.6673139158576051, "grad_norm": 3.068348451051861e-05, "learning_rate": 2.810315721146236e-05, "loss": 0.0, "step": 1031 }, { "epoch": 0.6679611650485436, "grad_norm": 3.7380465073511004e-05, "learning_rate": 2.800548205598809e-05, "loss": 0.0, "step": 1032 }, { "epoch": 0.6686084142394823, "grad_norm": 3.332350024720654e-05, "learning_rate": 2.7907910863400065e-05, "loss": 0.0, "step": 1033 }, { "epoch": 0.6692556634304208, "grad_norm": 3.042920980078634e-05, "learning_rate": 2.781044409489427e-05, "loss": 0.0, "step": 1034 }, { "epoch": 0.6699029126213593, "grad_norm": 2.8905817089253105e-05, "learning_rate": 2.771308221117309e-05, "loss": 0.0, "step": 1035 }, { "epoch": 0.6705501618122978, "grad_norm": 3.211893272236921e-05, "learning_rate": 2.7615825672443085e-05, "loss": 0.0, "step": 1036 }, { "epoch": 0.6711974110032363, "grad_norm": 2.9967004593345337e-05, "learning_rate": 2.7518674938412976e-05, "loss": 0.0, "step": 1037 }, { "epoch": 0.6718446601941748, "grad_norm": 2.9723929401370697e-05, "learning_rate": 2.7421630468291276e-05, "loss": 0.0, "step": 1038 }, { "epoch": 0.6724919093851133, "grad_norm": 3.284518970758654e-05, "learning_rate": 2.732469272078424e-05, "loss": 0.0, "step": 1039 }, { "epoch": 0.6731391585760518, "grad_norm": 3.139886393910274e-05, "learning_rate": 2.722786215409372e-05, "loss": 0.0, "step": 1040 }, { "epoch": 0.6737864077669903, "grad_norm": 3.355855733389035e-05, "learning_rate": 2.7131139225914877e-05, "loss": 0.0, "step": 1041 }, { "epoch": 0.6744336569579288, "grad_norm": 3.3809294109232724e-05, "learning_rate": 2.7034524393434135e-05, "loss": 0.0, "step": 1042 }, { "epoch": 0.6750809061488673, "grad_norm": 3.032558561244514e-05, "learning_rate": 2.693801811332699e-05, "loss": 0.0, "step": 1043 }, { "epoch": 0.6757281553398058, "grad_norm": 2.9264803742989898e-05, "learning_rate": 2.6841620841755778e-05, "loss": 0.0, "step": 1044 }, { "epoch": 0.6763754045307443, "grad_norm": 2.9483397156582214e-05, "learning_rate": 2.6745333034367626e-05, "loss": 0.0, "step": 1045 }, { "epoch": 0.6770226537216828, "grad_norm": 3.1380735890707e-05, "learning_rate": 2.664915514629225e-05, "loss": 0.0, "step": 1046 }, { "epoch": 0.6776699029126214, "grad_norm": 3.138905594823882e-05, "learning_rate": 2.655308763213975e-05, "loss": 0.0, "step": 1047 }, { "epoch": 0.6783171521035599, "grad_norm": 2.983898957609199e-05, "learning_rate": 2.6457130945998593e-05, "loss": 0.0, "step": 1048 }, { "epoch": 0.6789644012944984, "grad_norm": 3.2105639547808096e-05, "learning_rate": 2.6361285541433333e-05, "loss": 0.0, "step": 1049 }, { "epoch": 0.6796116504854369, "grad_norm": 3.16245095746126e-05, "learning_rate": 2.6265551871482507e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.6802588996763754, "grad_norm": 3.952919360017404e-05, "learning_rate": 2.6169930388656592e-05, "loss": 0.0, "step": 1051 }, { "epoch": 0.6809061488673139, "grad_norm": 3.247856875532307e-05, "learning_rate": 2.607442154493568e-05, "loss": 0.0, "step": 1052 }, { "epoch": 0.6815533980582524, "grad_norm": 3.3095642720581964e-05, "learning_rate": 2.597902579176752e-05, "loss": 0.0, "step": 1053 }, { "epoch": 0.6822006472491909, "grad_norm": 3.522561746649444e-05, "learning_rate": 2.58837435800653e-05, "loss": 0.0, "step": 1054 }, { "epoch": 0.6828478964401294, "grad_norm": 2.8278780519030988e-05, "learning_rate": 2.5788575360205468e-05, "loss": 0.0, "step": 1055 }, { "epoch": 0.683495145631068, "grad_norm": 3.887992352247238e-05, "learning_rate": 2.5693521582025725e-05, "loss": 0.0, "step": 1056 }, { "epoch": 0.6841423948220064, "grad_norm": 2.973111804749351e-05, "learning_rate": 2.5598582694822814e-05, "loss": 0.0, "step": 1057 }, { "epoch": 0.684789644012945, "grad_norm": 3.187929905834608e-05, "learning_rate": 2.5503759147350383e-05, "loss": 0.0, "step": 1058 }, { "epoch": 0.6854368932038835, "grad_norm": 3.6388835724210367e-05, "learning_rate": 2.5409051387816952e-05, "loss": 0.0, "step": 1059 }, { "epoch": 0.686084142394822, "grad_norm": 3.233198731322773e-05, "learning_rate": 2.5314459863883693e-05, "loss": 0.0, "step": 1060 }, { "epoch": 0.6867313915857605, "grad_norm": 3.0659986805403605e-05, "learning_rate": 2.521998502266236e-05, "loss": 0.0, "step": 1061 }, { "epoch": 0.6873786407766991, "grad_norm": 3.0658375180792063e-05, "learning_rate": 2.5125627310713206e-05, "loss": 0.0, "step": 1062 }, { "epoch": 0.6880258899676376, "grad_norm": 3.0427248930209316e-05, "learning_rate": 2.5031387174042848e-05, "loss": 0.0, "step": 1063 }, { "epoch": 0.6886731391585761, "grad_norm": 3.449451833148487e-05, "learning_rate": 2.4937265058102093e-05, "loss": 0.0, "step": 1064 }, { "epoch": 0.6893203883495146, "grad_norm": 3.379070039954968e-05, "learning_rate": 2.4843261407783968e-05, "loss": 0.0, "step": 1065 }, { "epoch": 0.6899676375404531, "grad_norm": 3.69020490325056e-05, "learning_rate": 2.4749376667421466e-05, "loss": 0.0, "step": 1066 }, { "epoch": 0.6906148867313916, "grad_norm": 3.0211767807486467e-05, "learning_rate": 2.465561128078557e-05, "loss": 0.0, "step": 1067 }, { "epoch": 0.6912621359223301, "grad_norm": 3.282434772700071e-05, "learning_rate": 2.4561965691083123e-05, "loss": 0.0, "step": 1068 }, { "epoch": 0.6919093851132686, "grad_norm": 2.888195558625739e-05, "learning_rate": 2.446844034095466e-05, "loss": 0.0, "step": 1069 }, { "epoch": 0.6925566343042071, "grad_norm": 3.140112676192075e-05, "learning_rate": 2.4375035672472395e-05, "loss": 0.0, "step": 1070 }, { "epoch": 0.6932038834951456, "grad_norm": 3.092505721724592e-05, "learning_rate": 2.4281752127138135e-05, "loss": 0.0, "step": 1071 }, { "epoch": 0.6938511326860841, "grad_norm": 3.1878615118330345e-05, "learning_rate": 2.4188590145881107e-05, "loss": 0.0, "step": 1072 }, { "epoch": 0.6944983818770226, "grad_norm": 3.283288242528215e-05, "learning_rate": 2.4095550169056003e-05, "loss": 0.0, "step": 1073 }, { "epoch": 0.6951456310679611, "grad_norm": 3.381393980816938e-05, "learning_rate": 2.4002632636440797e-05, "loss": 0.0, "step": 1074 }, { "epoch": 0.6957928802588996, "grad_norm": 3.3803728001657873e-05, "learning_rate": 2.3909837987234678e-05, "loss": 0.0, "step": 1075 }, { "epoch": 0.6964401294498382, "grad_norm": 3.2384254154749215e-05, "learning_rate": 2.3817166660056038e-05, "loss": 0.0, "step": 1076 }, { "epoch": 0.6970873786407767, "grad_norm": 3.3068110496969894e-05, "learning_rate": 2.3724619092940302e-05, "loss": 0.0, "step": 1077 }, { "epoch": 0.6977346278317152, "grad_norm": 3.906304482370615e-05, "learning_rate": 2.363219572333795e-05, "loss": 0.0, "step": 1078 }, { "epoch": 0.6983818770226538, "grad_norm": 3.189729977748357e-05, "learning_rate": 2.3539896988112424e-05, "loss": 0.0, "step": 1079 }, { "epoch": 0.6990291262135923, "grad_norm": 3.33312782458961e-05, "learning_rate": 2.3447723323538002e-05, "loss": 0.0, "step": 1080 }, { "epoch": 0.6996763754045308, "grad_norm": 3.331431435071863e-05, "learning_rate": 2.3355675165297784e-05, "loss": 0.0, "step": 1081 }, { "epoch": 0.7003236245954693, "grad_norm": 3.259932418586686e-05, "learning_rate": 2.3263752948481698e-05, "loss": 0.0, "step": 1082 }, { "epoch": 0.7009708737864078, "grad_norm": 2.961006794066634e-05, "learning_rate": 2.317195710758428e-05, "loss": 0.0, "step": 1083 }, { "epoch": 0.7016181229773463, "grad_norm": 3.283592013758607e-05, "learning_rate": 2.3080288076502793e-05, "loss": 0.0, "step": 1084 }, { "epoch": 0.7022653721682848, "grad_norm": 3.475217454251833e-05, "learning_rate": 2.2988746288535097e-05, "loss": 0.0, "step": 1085 }, { "epoch": 0.7029126213592233, "grad_norm": 3.1388237403007224e-05, "learning_rate": 2.2897332176377528e-05, "loss": 0.0, "step": 1086 }, { "epoch": 0.7035598705501618, "grad_norm": 2.97311635222286e-05, "learning_rate": 2.2806046172123035e-05, "loss": 0.0, "step": 1087 }, { "epoch": 0.7042071197411003, "grad_norm": 3.356492015882395e-05, "learning_rate": 2.2714888707258937e-05, "loss": 0.0, "step": 1088 }, { "epoch": 0.7048543689320388, "grad_norm": 3.258557626395486e-05, "learning_rate": 2.2623860212665032e-05, "loss": 0.0, "step": 1089 }, { "epoch": 0.7055016181229773, "grad_norm": 3.0323428291012533e-05, "learning_rate": 2.2532961118611527e-05, "loss": 0.0, "step": 1090 }, { "epoch": 0.7061488673139159, "grad_norm": 3.283713522250764e-05, "learning_rate": 2.244219185475689e-05, "loss": 0.0, "step": 1091 }, { "epoch": 0.7067961165048544, "grad_norm": 3.115700019407086e-05, "learning_rate": 2.235155285014599e-05, "loss": 0.0, "step": 1092 }, { "epoch": 0.7074433656957929, "grad_norm": 3.116889638477005e-05, "learning_rate": 2.2261044533208e-05, "loss": 0.0, "step": 1093 }, { "epoch": 0.7080906148867314, "grad_norm": 2.947829671029467e-05, "learning_rate": 2.21706673317543e-05, "loss": 0.0, "step": 1094 }, { "epoch": 0.7087378640776699, "grad_norm": 3.092719634878449e-05, "learning_rate": 2.208042167297657e-05, "loss": 0.0, "step": 1095 }, { "epoch": 0.7093851132686084, "grad_norm": 3.0435337976086885e-05, "learning_rate": 2.1990307983444718e-05, "loss": 0.0, "step": 1096 }, { "epoch": 0.7100323624595469, "grad_norm": 3.2349777029594406e-05, "learning_rate": 2.1900326689104815e-05, "loss": 0.0, "step": 1097 }, { "epoch": 0.7106796116504854, "grad_norm": 2.9600430934806354e-05, "learning_rate": 2.181047821527721e-05, "loss": 0.0, "step": 1098 }, { "epoch": 0.7113268608414239, "grad_norm": 3.187784750480205e-05, "learning_rate": 2.1720762986654348e-05, "loss": 0.0, "step": 1099 }, { "epoch": 0.7119741100323624, "grad_norm": 2.9615328458021395e-05, "learning_rate": 2.1631181427298945e-05, "loss": 0.0, "step": 1100 }, { "epoch": 0.7126213592233009, "grad_norm": 3.044502591365017e-05, "learning_rate": 2.1541733960641835e-05, "loss": 0.0, "step": 1101 }, { "epoch": 0.7132686084142394, "grad_norm": 3.3053285733330995e-05, "learning_rate": 2.145242100948003e-05, "loss": 0.0, "step": 1102 }, { "epoch": 0.713915857605178, "grad_norm": 3.521789039950818e-05, "learning_rate": 2.136324299597474e-05, "loss": 0.0, "step": 1103 }, { "epoch": 0.7145631067961165, "grad_norm": 3.164279041811824e-05, "learning_rate": 2.1274200341649388e-05, "loss": 0.0, "step": 1104 }, { "epoch": 0.7152103559870551, "grad_norm": 3.4013526601484045e-05, "learning_rate": 2.1185293467387494e-05, "loss": 0.0, "step": 1105 }, { "epoch": 0.7158576051779936, "grad_norm": 2.922323801612947e-05, "learning_rate": 2.1096522793430867e-05, "loss": 0.0, "step": 1106 }, { "epoch": 0.7165048543689321, "grad_norm": 3.2598956750007346e-05, "learning_rate": 2.1007888739377502e-05, "loss": 0.0, "step": 1107 }, { "epoch": 0.7171521035598706, "grad_norm": 3.353502688696608e-05, "learning_rate": 2.0919391724179584e-05, "loss": 0.0, "step": 1108 }, { "epoch": 0.7177993527508091, "grad_norm": 2.9709415684919804e-05, "learning_rate": 2.083103216614162e-05, "loss": 0.0, "step": 1109 }, { "epoch": 0.7184466019417476, "grad_norm": 3.28201858792454e-05, "learning_rate": 2.0742810482918313e-05, "loss": 0.0, "step": 1110 }, { "epoch": 0.7190938511326861, "grad_norm": 3.2333467970602214e-05, "learning_rate": 2.065472709151274e-05, "loss": 0.0, "step": 1111 }, { "epoch": 0.7197411003236246, "grad_norm": 3.234647374483757e-05, "learning_rate": 2.0566782408274243e-05, "loss": 0.0, "step": 1112 }, { "epoch": 0.7203883495145631, "grad_norm": 5.125478492118418e-05, "learning_rate": 2.047897684889653e-05, "loss": 0.0, "step": 1113 }, { "epoch": 0.7210355987055016, "grad_norm": 3.054843909922056e-05, "learning_rate": 2.0391310828415746e-05, "loss": 0.0, "step": 1114 }, { "epoch": 0.7216828478964401, "grad_norm": 3.235318945371546e-05, "learning_rate": 2.0303784761208455e-05, "loss": 0.0, "step": 1115 }, { "epoch": 0.7223300970873786, "grad_norm": 3.140069748042151e-05, "learning_rate": 2.0216399060989648e-05, "loss": 0.0, "step": 1116 }, { "epoch": 0.7229773462783171, "grad_norm": 3.164092777296901e-05, "learning_rate": 2.012915414081089e-05, "loss": 0.0, "step": 1117 }, { "epoch": 0.7236245954692556, "grad_norm": 3.6412478948477656e-05, "learning_rate": 2.004205041305831e-05, "loss": 0.0, "step": 1118 }, { "epoch": 0.7242718446601941, "grad_norm": 3.1168889108812436e-05, "learning_rate": 1.9955088289450598e-05, "loss": 0.0, "step": 1119 }, { "epoch": 0.7249190938511327, "grad_norm": 3.190989446011372e-05, "learning_rate": 1.9868268181037185e-05, "loss": 0.0, "step": 1120 }, { "epoch": 0.7255663430420712, "grad_norm": 3.1650251912651584e-05, "learning_rate": 1.978159049819619e-05, "loss": 0.0, "step": 1121 }, { "epoch": 0.7262135922330097, "grad_norm": 3.0340281227836385e-05, "learning_rate": 1.9695055650632493e-05, "loss": 0.0, "step": 1122 }, { "epoch": 0.7268608414239482, "grad_norm": 3.332859341753647e-05, "learning_rate": 1.960866404737589e-05, "loss": 0.0, "step": 1123 }, { "epoch": 0.7275080906148867, "grad_norm": 3.284904232714325e-05, "learning_rate": 1.952241609677908e-05, "loss": 0.0, "step": 1124 }, { "epoch": 0.7281553398058253, "grad_norm": 3.23673193634022e-05, "learning_rate": 1.9436312206515694e-05, "loss": 0.0, "step": 1125 }, { "epoch": 0.7288025889967638, "grad_norm": 3.2832926081027836e-05, "learning_rate": 1.9350352783578503e-05, "loss": 0.0, "step": 1126 }, { "epoch": 0.7294498381877023, "grad_norm": 3.236222983105108e-05, "learning_rate": 1.9264538234277342e-05, "loss": 0.0, "step": 1127 }, { "epoch": 0.7300970873786408, "grad_norm": 3.212531009921804e-05, "learning_rate": 1.9178868964237312e-05, "loss": 0.0, "step": 1128 }, { "epoch": 0.7307443365695793, "grad_norm": 3.188625487382524e-05, "learning_rate": 1.9093345378396816e-05, "loss": 0.0, "step": 1129 }, { "epoch": 0.7313915857605178, "grad_norm": 3.1399355066241696e-05, "learning_rate": 1.900796788100559e-05, "loss": 0.0, "step": 1130 }, { "epoch": 0.7320388349514563, "grad_norm": 2.8900312827317975e-05, "learning_rate": 1.8922736875622914e-05, "loss": 0.0, "step": 1131 }, { "epoch": 0.7326860841423948, "grad_norm": 2.8303416911512613e-05, "learning_rate": 1.883765276511558e-05, "loss": 0.0, "step": 1132 }, { "epoch": 0.7333333333333333, "grad_norm": 3.05672874674201e-05, "learning_rate": 1.8752715951656053e-05, "loss": 0.0, "step": 1133 }, { "epoch": 0.7339805825242719, "grad_norm": 3.019818541361019e-05, "learning_rate": 1.8667926836720574e-05, "loss": 0.0, "step": 1134 }, { "epoch": 0.7346278317152104, "grad_norm": 3.306631333543919e-05, "learning_rate": 1.8583285821087272e-05, "loss": 0.0, "step": 1135 }, { "epoch": 0.7352750809061489, "grad_norm": 2.996233888552524e-05, "learning_rate": 1.849879330483417e-05, "loss": 0.0, "step": 1136 }, { "epoch": 0.7359223300970874, "grad_norm": 3.055156776099466e-05, "learning_rate": 1.8414449687337464e-05, "loss": 0.0, "step": 1137 }, { "epoch": 0.7365695792880259, "grad_norm": 3.0332636015373282e-05, "learning_rate": 1.833025536726945e-05, "loss": 0.0, "step": 1138 }, { "epoch": 0.7372168284789644, "grad_norm": 3.283974001533352e-05, "learning_rate": 1.8246210742596786e-05, "loss": 0.0, "step": 1139 }, { "epoch": 0.7378640776699029, "grad_norm": 2.913233583967667e-05, "learning_rate": 1.816231621057857e-05, "loss": 0.0, "step": 1140 }, { "epoch": 0.7385113268608414, "grad_norm": 2.7797306756838225e-05, "learning_rate": 1.8078572167764386e-05, "loss": 0.0, "step": 1141 }, { "epoch": 0.7391585760517799, "grad_norm": 3.187457696185447e-05, "learning_rate": 1.799497900999255e-05, "loss": 0.0, "step": 1142 }, { "epoch": 0.7398058252427184, "grad_norm": 3.138705505989492e-05, "learning_rate": 1.791153713238815e-05, "loss": 0.0, "step": 1143 }, { "epoch": 0.7404530744336569, "grad_norm": 3.09158640448004e-05, "learning_rate": 1.7828246929361188e-05, "loss": 0.0, "step": 1144 }, { "epoch": 0.7411003236245954, "grad_norm": 2.84122670564102e-05, "learning_rate": 1.7745108794604775e-05, "loss": 0.0, "step": 1145 }, { "epoch": 0.7417475728155339, "grad_norm": 2.8633057809202e-05, "learning_rate": 1.7662123121093244e-05, "loss": 0.0, "step": 1146 }, { "epoch": 0.7423948220064724, "grad_norm": 3.1379666324937716e-05, "learning_rate": 1.757929030108022e-05, "loss": 0.0, "step": 1147 }, { "epoch": 0.7430420711974111, "grad_norm": 2.9830018320353702e-05, "learning_rate": 1.7496610726096892e-05, "loss": 0.0, "step": 1148 }, { "epoch": 0.7436893203883496, "grad_norm": 2.983623562613502e-05, "learning_rate": 1.741408478695002e-05, "loss": 0.0, "step": 1149 }, { "epoch": 0.7443365695792881, "grad_norm": 3.3068496122723445e-05, "learning_rate": 1.7331712873720236e-05, "loss": 0.0, "step": 1150 }, { "epoch": 0.7449838187702266, "grad_norm": 3.361059134476818e-05, "learning_rate": 1.724949537576011e-05, "loss": 0.0, "step": 1151 }, { "epoch": 0.7456310679611651, "grad_norm": 3.185556852258742e-05, "learning_rate": 1.7167432681692287e-05, "loss": 0.0, "step": 1152 }, { "epoch": 0.7462783171521036, "grad_norm": 2.868829687940888e-05, "learning_rate": 1.708552517940775e-05, "loss": 0.0, "step": 1153 }, { "epoch": 0.7469255663430421, "grad_norm": 4.373143019620329e-05, "learning_rate": 1.700377325606388e-05, "loss": 0.0, "step": 1154 }, { "epoch": 0.7475728155339806, "grad_norm": 3.1859577575232834e-05, "learning_rate": 1.692217729808268e-05, "loss": 0.0, "step": 1155 }, { "epoch": 0.7482200647249191, "grad_norm": 3.032912172784563e-05, "learning_rate": 1.6840737691148965e-05, "loss": 0.0, "step": 1156 }, { "epoch": 0.7488673139158576, "grad_norm": 2.783163290587254e-05, "learning_rate": 1.6759454820208513e-05, "loss": 0.0, "step": 1157 }, { "epoch": 0.7495145631067961, "grad_norm": 3.055029810639098e-05, "learning_rate": 1.6678329069466214e-05, "loss": 0.0, "step": 1158 }, { "epoch": 0.7501618122977346, "grad_norm": 3.113222919637337e-05, "learning_rate": 1.6597360822384345e-05, "loss": 0.0, "step": 1159 }, { "epoch": 0.7508090614886731, "grad_norm": 3.3797608921304345e-05, "learning_rate": 1.6516550461680624e-05, "loss": 0.0, "step": 1160 }, { "epoch": 0.7514563106796116, "grad_norm": 2.838747968780808e-05, "learning_rate": 1.6435898369326542e-05, "loss": 0.0, "step": 1161 }, { "epoch": 0.7521035598705501, "grad_norm": 3.424755414016545e-05, "learning_rate": 1.6355404926545492e-05, "loss": 0.0, "step": 1162 }, { "epoch": 0.7527508090614887, "grad_norm": 3.332311462145299e-05, "learning_rate": 1.6275070513810925e-05, "loss": 0.0, "step": 1163 }, { "epoch": 0.7533980582524272, "grad_norm": 3.056655259570107e-05, "learning_rate": 1.6194895510844604e-05, "loss": 0.0, "step": 1164 }, { "epoch": 0.7540453074433657, "grad_norm": 3.233298775739968e-05, "learning_rate": 1.6114880296614844e-05, "loss": 0.0, "step": 1165 }, { "epoch": 0.7546925566343042, "grad_norm": 2.946914901258424e-05, "learning_rate": 1.603502524933461e-05, "loss": 0.0, "step": 1166 }, { "epoch": 0.7553398058252427, "grad_norm": 3.2134699722519144e-05, "learning_rate": 1.5955330746459858e-05, "loss": 0.0, "step": 1167 }, { "epoch": 0.7559870550161812, "grad_norm": 3.210958311683498e-05, "learning_rate": 1.587579716468766e-05, "loss": 0.0, "step": 1168 }, { "epoch": 0.7566343042071197, "grad_norm": 3.2368618121836334e-05, "learning_rate": 1.5796424879954437e-05, "loss": 0.0, "step": 1169 }, { "epoch": 0.7572815533980582, "grad_norm": 3.166342503391206e-05, "learning_rate": 1.571721426743423e-05, "loss": 0.0, "step": 1170 }, { "epoch": 0.7579288025889968, "grad_norm": 2.9358689062064514e-05, "learning_rate": 1.5638165701536868e-05, "loss": 0.0, "step": 1171 }, { "epoch": 0.7585760517799353, "grad_norm": 3.3322103263344616e-05, "learning_rate": 1.5559279555906227e-05, "loss": 0.0, "step": 1172 }, { "epoch": 0.7592233009708738, "grad_norm": 3.1424486223841086e-05, "learning_rate": 1.5480556203418488e-05, "loss": 0.0, "step": 1173 }, { "epoch": 0.7598705501618123, "grad_norm": 3.499459126032889e-05, "learning_rate": 1.5401996016180313e-05, "loss": 0.0, "step": 1174 }, { "epoch": 0.7605177993527508, "grad_norm": 3.354872751515359e-05, "learning_rate": 1.5323599365527118e-05, "loss": 0.0, "step": 1175 }, { "epoch": 0.7611650485436893, "grad_norm": 3.1881816539680585e-05, "learning_rate": 1.5245366622021367e-05, "loss": 0.0, "step": 1176 }, { "epoch": 0.7618122977346279, "grad_norm": 3.0203555070329458e-05, "learning_rate": 1.5167298155450716e-05, "loss": 0.0, "step": 1177 }, { "epoch": 0.7624595469255664, "grad_norm": 2.889079405576922e-05, "learning_rate": 1.508939433482635e-05, "loss": 0.0, "step": 1178 }, { "epoch": 0.7631067961165049, "grad_norm": 3.057411959161982e-05, "learning_rate": 1.501165552838124e-05, "loss": 0.0, "step": 1179 }, { "epoch": 0.7637540453074434, "grad_norm": 3.187909896951169e-05, "learning_rate": 1.4934082103568308e-05, "loss": 0.0, "step": 1180 }, { "epoch": 0.7644012944983819, "grad_norm": 3.0201445042621344e-05, "learning_rate": 1.4856674427058825e-05, "loss": 0.0, "step": 1181 }, { "epoch": 0.7650485436893204, "grad_norm": 3.139544423902407e-05, "learning_rate": 1.4779432864740544e-05, "loss": 0.0, "step": 1182 }, { "epoch": 0.7656957928802589, "grad_norm": 2.889475399570074e-05, "learning_rate": 1.4702357781716091e-05, "loss": 0.0, "step": 1183 }, { "epoch": 0.7663430420711974, "grad_norm": 2.85307160083903e-05, "learning_rate": 1.4625449542301167e-05, "loss": 0.0, "step": 1184 }, { "epoch": 0.7669902912621359, "grad_norm": 3.043177457584534e-05, "learning_rate": 1.4548708510022824e-05, "loss": 0.0, "step": 1185 }, { "epoch": 0.7676375404530744, "grad_norm": 2.9717173674725927e-05, "learning_rate": 1.4472135047617764e-05, "loss": 0.0, "step": 1186 }, { "epoch": 0.7682847896440129, "grad_norm": 3.0679202609462664e-05, "learning_rate": 1.439572951703067e-05, "loss": 0.0, "step": 1187 }, { "epoch": 0.7689320388349514, "grad_norm": 3.3071111829485744e-05, "learning_rate": 1.4319492279412388e-05, "loss": 0.0, "step": 1188 }, { "epoch": 0.7695792880258899, "grad_norm": 3.162126085953787e-05, "learning_rate": 1.4243423695118341e-05, "loss": 0.0, "step": 1189 }, { "epoch": 0.7702265372168284, "grad_norm": 2.996356852236204e-05, "learning_rate": 1.4167524123706743e-05, "loss": 0.0, "step": 1190 }, { "epoch": 0.7708737864077669, "grad_norm": 2.8282775019761175e-05, "learning_rate": 1.4091793923936902e-05, "loss": 0.0, "step": 1191 }, { "epoch": 0.7715210355987056, "grad_norm": 3.055834895349108e-05, "learning_rate": 1.4016233453767601e-05, "loss": 0.0, "step": 1192 }, { "epoch": 0.772168284789644, "grad_norm": 2.888802373490762e-05, "learning_rate": 1.3940843070355281e-05, "loss": 0.0, "step": 1193 }, { "epoch": 0.7728155339805826, "grad_norm": 3.115186336799525e-05, "learning_rate": 1.3865623130052474e-05, "loss": 0.0, "step": 1194 }, { "epoch": 0.7734627831715211, "grad_norm": 2.9237791750347242e-05, "learning_rate": 1.3790573988406074e-05, "loss": 0.0, "step": 1195 }, { "epoch": 0.7741100323624596, "grad_norm": 2.7803118427982554e-05, "learning_rate": 1.3715696000155614e-05, "loss": 0.0, "step": 1196 }, { "epoch": 0.7747572815533981, "grad_norm": 2.7793254048447125e-05, "learning_rate": 1.3640989519231629e-05, "loss": 0.0, "step": 1197 }, { "epoch": 0.7754045307443366, "grad_norm": 2.8403444957803003e-05, "learning_rate": 1.3566454898754016e-05, "loss": 0.0, "step": 1198 }, { "epoch": 0.7760517799352751, "grad_norm": 3.0428616810240783e-05, "learning_rate": 1.3492092491030279e-05, "loss": 0.0, "step": 1199 }, { "epoch": 0.7766990291262136, "grad_norm": 2.8647311410168186e-05, "learning_rate": 1.3417902647553948e-05, "loss": 0.0, "step": 1200 }, { "epoch": 0.7773462783171521, "grad_norm": 3.5604185541160405e-05, "learning_rate": 1.3343885719002896e-05, "loss": 0.0, "step": 1201 }, { "epoch": 0.7779935275080906, "grad_norm": 3.1396841222885996e-05, "learning_rate": 1.3270042055237609e-05, "loss": 0.0, "step": 1202 }, { "epoch": 0.7786407766990291, "grad_norm": 3.281456883996725e-05, "learning_rate": 1.3196372005299656e-05, "loss": 0.0, "step": 1203 }, { "epoch": 0.7792880258899676, "grad_norm": 2.9937604267615825e-05, "learning_rate": 1.3122875917409916e-05, "loss": 0.0, "step": 1204 }, { "epoch": 0.7799352750809061, "grad_norm": 2.828240758390166e-05, "learning_rate": 1.3049554138967051e-05, "loss": 0.0, "step": 1205 }, { "epoch": 0.7805825242718447, "grad_norm": 3.450288932071999e-05, "learning_rate": 1.2976407016545744e-05, "loss": 0.0, "step": 1206 }, { "epoch": 0.7812297734627832, "grad_norm": 3.089702659053728e-05, "learning_rate": 1.2903434895895183e-05, "loss": 0.0, "step": 1207 }, { "epoch": 0.7818770226537217, "grad_norm": 3.3058688131859526e-05, "learning_rate": 1.2830638121937289e-05, "loss": 0.0, "step": 1208 }, { "epoch": 0.7825242718446602, "grad_norm": 3.49712063325569e-05, "learning_rate": 1.275801703876524e-05, "loss": 0.0, "step": 1209 }, { "epoch": 0.7831715210355987, "grad_norm": 3.258398282923736e-05, "learning_rate": 1.2685571989641698e-05, "loss": 0.0, "step": 1210 }, { "epoch": 0.7838187702265372, "grad_norm": 3.234569157939404e-05, "learning_rate": 1.2613303316997288e-05, "loss": 0.0, "step": 1211 }, { "epoch": 0.7844660194174757, "grad_norm": 2.958671575470362e-05, "learning_rate": 1.2541211362428962e-05, "loss": 0.0, "step": 1212 }, { "epoch": 0.7851132686084142, "grad_norm": 2.8513155484688468e-05, "learning_rate": 1.246929646669831e-05, "loss": 0.0, "step": 1213 }, { "epoch": 0.7857605177993527, "grad_norm": 3.2108306186273694e-05, "learning_rate": 1.2397558969730077e-05, "loss": 0.0, "step": 1214 }, { "epoch": 0.7864077669902912, "grad_norm": 3.305998325231485e-05, "learning_rate": 1.2325999210610423e-05, "loss": 0.0, "step": 1215 }, { "epoch": 0.7870550161812297, "grad_norm": 2.8868706067441963e-05, "learning_rate": 1.2254617527585433e-05, "loss": 0.0, "step": 1216 }, { "epoch": 0.7877022653721683, "grad_norm": 3.401190770091489e-05, "learning_rate": 1.2183414258059417e-05, "loss": 0.0, "step": 1217 }, { "epoch": 0.7883495145631068, "grad_norm": 2.9487531719496474e-05, "learning_rate": 1.2112389738593427e-05, "loss": 0.0, "step": 1218 }, { "epoch": 0.7889967637540453, "grad_norm": 3.0200997571228072e-05, "learning_rate": 1.2041544304903552e-05, "loss": 0.0, "step": 1219 }, { "epoch": 0.7896440129449838, "grad_norm": 2.9967310183565132e-05, "learning_rate": 1.1970878291859423e-05, "loss": 0.0, "step": 1220 }, { "epoch": 0.7902912621359224, "grad_norm": 3.139051841571927e-05, "learning_rate": 1.190039203348256e-05, "loss": 0.0, "step": 1221 }, { "epoch": 0.7909385113268609, "grad_norm": 3.090596874244511e-05, "learning_rate": 1.183008586294485e-05, "loss": 0.0, "step": 1222 }, { "epoch": 0.7915857605177994, "grad_norm": 3.282758916611783e-05, "learning_rate": 1.1759960112566953e-05, "loss": 0.0, "step": 1223 }, { "epoch": 0.7922330097087379, "grad_norm": 2.9495713533833623e-05, "learning_rate": 1.1690015113816689e-05, "loss": 0.0, "step": 1224 }, { "epoch": 0.7928802588996764, "grad_norm": 3.188411210430786e-05, "learning_rate": 1.1620251197307535e-05, "loss": 0.0, "step": 1225 }, { "epoch": 0.7935275080906149, "grad_norm": 3.187660331605002e-05, "learning_rate": 1.1550668692797062e-05, "loss": 0.0, "step": 1226 }, { "epoch": 0.7941747572815534, "grad_norm": 3.0207180316210724e-05, "learning_rate": 1.148126792918527e-05, "loss": 0.0, "step": 1227 }, { "epoch": 0.7948220064724919, "grad_norm": 2.8910977562190965e-05, "learning_rate": 1.1412049234513178e-05, "loss": 0.0, "step": 1228 }, { "epoch": 0.7954692556634304, "grad_norm": 3.139706313959323e-05, "learning_rate": 1.1343012935961206e-05, "loss": 0.0, "step": 1229 }, { "epoch": 0.7961165048543689, "grad_norm": 2.9498503863578662e-05, "learning_rate": 1.1274159359847591e-05, "loss": 0.0, "step": 1230 }, { "epoch": 0.7967637540453074, "grad_norm": 2.889409734052606e-05, "learning_rate": 1.1205488831626936e-05, "loss": 0.0, "step": 1231 }, { "epoch": 0.7974110032362459, "grad_norm": 2.7833795684273355e-05, "learning_rate": 1.1137001675888564e-05, "loss": 0.0, "step": 1232 }, { "epoch": 0.7980582524271844, "grad_norm": 2.9727289074799046e-05, "learning_rate": 1.1068698216355084e-05, "loss": 0.0, "step": 1233 }, { "epoch": 0.7987055016181229, "grad_norm": 2.9720979000558145e-05, "learning_rate": 1.100057877588081e-05, "loss": 0.0, "step": 1234 }, { "epoch": 0.7993527508090615, "grad_norm": 3.0549646908184513e-05, "learning_rate": 1.0932643676450205e-05, "loss": 0.0, "step": 1235 }, { "epoch": 0.8, "grad_norm": 2.8757727704942226e-05, "learning_rate": 1.0864893239176438e-05, "loss": 0.0, "step": 1236 }, { "epoch": 0.8006472491909385, "grad_norm": 2.9481703677447513e-05, "learning_rate": 1.0797327784299837e-05, "loss": 0.0, "step": 1237 }, { "epoch": 0.801294498381877, "grad_norm": 3.140431363135576e-05, "learning_rate": 1.0729947631186276e-05, "loss": 0.0, "step": 1238 }, { "epoch": 0.8019417475728156, "grad_norm": 2.90126190520823e-05, "learning_rate": 1.066275309832584e-05, "loss": 0.0, "step": 1239 }, { "epoch": 0.8025889967637541, "grad_norm": 2.924008913396392e-05, "learning_rate": 1.0595744503331207e-05, "loss": 0.0, "step": 1240 }, { "epoch": 0.8032362459546926, "grad_norm": 3.188064874848351e-05, "learning_rate": 1.0528922162936134e-05, "loss": 0.0, "step": 1241 }, { "epoch": 0.8038834951456311, "grad_norm": 2.780007525871042e-05, "learning_rate": 1.0462286392994058e-05, "loss": 0.0, "step": 1242 }, { "epoch": 0.8045307443365696, "grad_norm": 2.7808411687146872e-05, "learning_rate": 1.0395837508476486e-05, "loss": 0.0, "step": 1243 }, { "epoch": 0.8051779935275081, "grad_norm": 2.9482629543053918e-05, "learning_rate": 1.0329575823471588e-05, "loss": 0.0, "step": 1244 }, { "epoch": 0.8058252427184466, "grad_norm": 2.779496571747586e-05, "learning_rate": 1.0263501651182706e-05, "loss": 0.0, "step": 1245 }, { "epoch": 0.8064724919093851, "grad_norm": 2.971193680423312e-05, "learning_rate": 1.0197615303926794e-05, "loss": 0.0, "step": 1246 }, { "epoch": 0.8071197411003236, "grad_norm": 2.7200567274121568e-05, "learning_rate": 1.0131917093133075e-05, "loss": 0.0, "step": 1247 }, { "epoch": 0.8077669902912621, "grad_norm": 2.7336840503267013e-05, "learning_rate": 1.0066407329341443e-05, "loss": 0.0, "step": 1248 }, { "epoch": 0.8084142394822007, "grad_norm": 3.140536500723101e-05, "learning_rate": 1.0001086322201048e-05, "loss": 0.0, "step": 1249 }, { "epoch": 0.8090614886731392, "grad_norm": 3.138635292998515e-05, "learning_rate": 9.935954380468859e-06, "loss": 0.0, "step": 1250 }, { "epoch": 0.8097087378640777, "grad_norm": 3.832921720459126e-05, "learning_rate": 9.87101181200818e-06, "loss": 0.0, "step": 1251 }, { "epoch": 0.8103559870550162, "grad_norm": 2.887231130443979e-05, "learning_rate": 9.806258923787154e-06, "loss": 0.0, "step": 1252 }, { "epoch": 0.8110032362459547, "grad_norm": 3.210567228961736e-05, "learning_rate": 9.7416960218774e-06, "loss": 0.0, "step": 1253 }, { "epoch": 0.8116504854368932, "grad_norm": 3.331467814859934e-05, "learning_rate": 9.67732341145246e-06, "loss": 0.0, "step": 1254 }, { "epoch": 0.8122977346278317, "grad_norm": 2.97059250442544e-05, "learning_rate": 9.613141396786462e-06, "loss": 0.0, "step": 1255 }, { "epoch": 0.8129449838187702, "grad_norm": 3.066575663979165e-05, "learning_rate": 9.549150281252633e-06, "loss": 0.0, "step": 1256 }, { "epoch": 0.8135922330097087, "grad_norm": 2.9601724236272275e-05, "learning_rate": 9.48535036732181e-06, "loss": 0.0, "step": 1257 }, { "epoch": 0.8142394822006472, "grad_norm": 2.846097959263716e-05, "learning_rate": 9.421741956561137e-06, "loss": 0.0, "step": 1258 }, { "epoch": 0.8148867313915857, "grad_norm": 3.0182511181919836e-05, "learning_rate": 9.358325349632514e-06, "loss": 0.0, "step": 1259 }, { "epoch": 0.8155339805825242, "grad_norm": 3.1620755180483684e-05, "learning_rate": 9.295100846291238e-06, "loss": 0.0, "step": 1260 }, { "epoch": 0.8161812297734627, "grad_norm": 3.21077459375374e-05, "learning_rate": 9.232068745384603e-06, "loss": 0.0, "step": 1261 }, { "epoch": 0.8168284789644013, "grad_norm": 3.1390874937642366e-05, "learning_rate": 9.16922934485046e-06, "loss": 0.0, "step": 1262 }, { "epoch": 0.8174757281553398, "grad_norm": 3.0309969588415697e-05, "learning_rate": 9.106582941715768e-06, "loss": 0.0, "step": 1263 }, { "epoch": 0.8181229773462784, "grad_norm": 2.8271257178857923e-05, "learning_rate": 9.044129832095299e-06, "loss": 0.0, "step": 1264 }, { "epoch": 0.8187702265372169, "grad_norm": 7.29700768715702e-05, "learning_rate": 8.981870311190099e-06, "loss": 0.0, "step": 1265 }, { "epoch": 0.8194174757281554, "grad_norm": 3.4984001104021445e-05, "learning_rate": 8.91980467328623e-06, "loss": 0.0, "step": 1266 }, { "epoch": 0.8200647249190939, "grad_norm": 2.912871968874242e-05, "learning_rate": 8.857933211753289e-06, "loss": 0.0, "step": 1267 }, { "epoch": 0.8207119741100324, "grad_norm": 3.0196642910595983e-05, "learning_rate": 8.796256219043042e-06, "loss": 0.0, "step": 1268 }, { "epoch": 0.8213592233009709, "grad_norm": 2.9139126127120107e-05, "learning_rate": 8.734773986688032e-06, "loss": 0.0, "step": 1269 }, { "epoch": 0.8220064724919094, "grad_norm": 3.0069269996602088e-05, "learning_rate": 8.673486805300263e-06, "loss": 0.0, "step": 1270 }, { "epoch": 0.8226537216828479, "grad_norm": 3.189206836395897e-05, "learning_rate": 8.61239496456973e-06, "loss": 0.0, "step": 1271 }, { "epoch": 0.8233009708737864, "grad_norm": 3.237631972297095e-05, "learning_rate": 8.551498753263133e-06, "loss": 0.0, "step": 1272 }, { "epoch": 0.8239482200647249, "grad_norm": 3.1884130294201896e-05, "learning_rate": 8.490798459222476e-06, "loss": 0.0, "step": 1273 }, { "epoch": 0.8245954692556634, "grad_norm": 3.139725959044881e-05, "learning_rate": 8.430294369363667e-06, "loss": 0.0, "step": 1274 }, { "epoch": 0.8252427184466019, "grad_norm": 3.044573895749636e-05, "learning_rate": 8.369986769675269e-06, "loss": 0.0, "step": 1275 }, { "epoch": 0.8258899676375404, "grad_norm": 3.236419433960691e-05, "learning_rate": 8.309875945217022e-06, "loss": 0.0, "step": 1276 }, { "epoch": 0.8265372168284789, "grad_norm": 3.04457080346765e-05, "learning_rate": 8.249962180118581e-06, "loss": 0.0, "step": 1277 }, { "epoch": 0.8271844660194175, "grad_norm": 3.3080803405027837e-05, "learning_rate": 8.190245757578175e-06, "loss": 0.0, "step": 1278 }, { "epoch": 0.827831715210356, "grad_norm": 3.0458842957159504e-05, "learning_rate": 8.130726959861201e-06, "loss": 0.0, "step": 1279 }, { "epoch": 0.8284789644012945, "grad_norm": 2.9737420845776796e-05, "learning_rate": 8.071406068298926e-06, "loss": 0.0, "step": 1280 }, { "epoch": 0.829126213592233, "grad_norm": 2.828111610142514e-05, "learning_rate": 8.012283363287205e-06, "loss": 0.0, "step": 1281 }, { "epoch": 0.8297734627831715, "grad_norm": 2.9597998945973814e-05, "learning_rate": 7.95335912428506e-06, "loss": 0.0, "step": 1282 }, { "epoch": 0.83042071197411, "grad_norm": 2.8280543119763024e-05, "learning_rate": 7.894633629813458e-06, "loss": 0.0, "step": 1283 }, { "epoch": 0.8310679611650486, "grad_norm": 2.780561408144422e-05, "learning_rate": 7.836107157453937e-06, "loss": 0.0, "step": 1284 }, { "epoch": 0.8317152103559871, "grad_norm": 3.162684879498556e-05, "learning_rate": 7.77777998384726e-06, "loss": 0.0, "step": 1285 }, { "epoch": 0.8323624595469256, "grad_norm": 2.8393124011927284e-05, "learning_rate": 7.719652384692216e-06, "loss": 0.0, "step": 1286 }, { "epoch": 0.8330097087378641, "grad_norm": 2.7938367566093802e-05, "learning_rate": 7.661724634744221e-06, "loss": 0.0, "step": 1287 }, { "epoch": 0.8336569579288026, "grad_norm": 2.7803946068161167e-05, "learning_rate": 7.6039970078140345e-06, "loss": 0.0, "step": 1288 }, { "epoch": 0.8343042071197411, "grad_norm": 2.935515840363223e-05, "learning_rate": 7.5464697767665234e-06, "loss": 0.0, "step": 1289 }, { "epoch": 0.8349514563106796, "grad_norm": 2.8421714887372218e-05, "learning_rate": 7.489143213519301e-06, "loss": 0.0, "step": 1290 }, { "epoch": 0.8355987055016181, "grad_norm": 2.9472732421709225e-05, "learning_rate": 7.432017589041463e-06, "loss": 0.0, "step": 1291 }, { "epoch": 0.8362459546925566, "grad_norm": 3.0081349905231036e-05, "learning_rate": 7.375093173352348e-06, "loss": 0.0, "step": 1292 }, { "epoch": 0.8368932038834952, "grad_norm": 2.7322215828462504e-05, "learning_rate": 7.3183702355202e-06, "loss": 0.0, "step": 1293 }, { "epoch": 0.8375404530744337, "grad_norm": 2.9117414669599384e-05, "learning_rate": 7.26184904366094e-06, "loss": 0.0, "step": 1294 }, { "epoch": 0.8381877022653722, "grad_norm": 2.838773616531398e-05, "learning_rate": 7.205529864936883e-06, "loss": 0.0, "step": 1295 }, { "epoch": 0.8388349514563107, "grad_norm": 2.8870774258393794e-05, "learning_rate": 7.149412965555463e-06, "loss": 0.0, "step": 1296 }, { "epoch": 0.8394822006472492, "grad_norm": 2.9239281502668746e-05, "learning_rate": 7.093498610768001e-06, "loss": 0.0, "step": 1297 }, { "epoch": 0.8401294498381877, "grad_norm": 2.780013528536074e-05, "learning_rate": 7.0377870648684394e-06, "loss": 0.0, "step": 1298 }, { "epoch": 0.8407766990291262, "grad_norm": 2.7307174605084583e-05, "learning_rate": 6.9822785911920766e-06, "loss": 0.0, "step": 1299 }, { "epoch": 0.8414239482200647, "grad_norm": 2.685461367946118e-05, "learning_rate": 6.926973452114338e-06, "loss": 0.0, "step": 1300 }, { "epoch": 0.8420711974110032, "grad_norm": 3.219491190975532e-05, "learning_rate": 6.871871909049554e-06, "loss": 0.0, "step": 1301 }, { "epoch": 0.8427184466019417, "grad_norm": 3.353790452820249e-05, "learning_rate": 6.816974222449674e-06, "loss": 0.0, "step": 1302 }, { "epoch": 0.8433656957928802, "grad_norm": 3.2828796975081787e-05, "learning_rate": 6.762280651803105e-06, "loss": 0.0, "step": 1303 }, { "epoch": 0.8440129449838187, "grad_norm": 3.161135100526735e-05, "learning_rate": 6.7077914556334e-06, "loss": 0.0, "step": 1304 }, { "epoch": 0.8446601941747572, "grad_norm": 3.0181634429027326e-05, "learning_rate": 6.653506891498118e-06, "loss": 0.0, "step": 1305 }, { "epoch": 0.8453074433656957, "grad_norm": 3.0183506169123575e-05, "learning_rate": 6.599427215987574e-06, "loss": 0.0, "step": 1306 }, { "epoch": 0.8459546925566344, "grad_norm": 2.7570862584980205e-05, "learning_rate": 6.5455526847235825e-06, "loss": 0.0, "step": 1307 }, { "epoch": 0.8466019417475729, "grad_norm": 2.912186027970165e-05, "learning_rate": 6.49188355235833e-06, "loss": 0.0, "step": 1308 }, { "epoch": 0.8472491909385114, "grad_norm": 5.2388324547791854e-05, "learning_rate": 6.438420072573126e-06, "loss": 0.0, "step": 1309 }, { "epoch": 0.8478964401294499, "grad_norm": 2.9694594559259713e-05, "learning_rate": 6.3851624980771905e-06, "loss": 0.0, "step": 1310 }, { "epoch": 0.8485436893203884, "grad_norm": 3.2102074328577146e-05, "learning_rate": 6.332111080606467e-06, "loss": 0.0, "step": 1311 }, { "epoch": 0.8491909385113269, "grad_norm": 3.619419294409454e-05, "learning_rate": 6.279266070922496e-06, "loss": 0.0, "step": 1312 }, { "epoch": 0.8498381877022654, "grad_norm": 3.186412141076289e-05, "learning_rate": 6.226627718811118e-06, "loss": 0.0, "step": 1313 }, { "epoch": 0.8504854368932039, "grad_norm": 3.2827385439304635e-05, "learning_rate": 6.1741962730814e-06, "loss": 0.0, "step": 1314 }, { "epoch": 0.8511326860841424, "grad_norm": 2.8862363251391798e-05, "learning_rate": 6.121971981564367e-06, "loss": 0.0, "step": 1315 }, { "epoch": 0.8517799352750809, "grad_norm": 2.9111837648088112e-05, "learning_rate": 6.069955091111912e-06, "loss": 0.0, "step": 1316 }, { "epoch": 0.8524271844660194, "grad_norm": 2.9940256354166195e-05, "learning_rate": 6.018145847595585e-06, "loss": 0.0, "step": 1317 }, { "epoch": 0.8530744336569579, "grad_norm": 3.0056986361159943e-05, "learning_rate": 5.966544495905408e-06, "loss": 0.0, "step": 1318 }, { "epoch": 0.8537216828478964, "grad_norm": 3.307464066892862e-05, "learning_rate": 5.915151279948788e-06, "loss": 0.0, "step": 1319 }, { "epoch": 0.8543689320388349, "grad_norm": 3.189213020959869e-05, "learning_rate": 5.863966442649327e-06, "loss": 0.0, "step": 1320 }, { "epoch": 0.8550161812297734, "grad_norm": 2.996783223352395e-05, "learning_rate": 5.812990225945603e-06, "loss": 0.0, "step": 1321 }, { "epoch": 0.855663430420712, "grad_norm": 3.2352731068385765e-05, "learning_rate": 5.762222870790163e-06, "loss": 0.0, "step": 1322 }, { "epoch": 0.8563106796116505, "grad_norm": 3.427658521104604e-05, "learning_rate": 5.711664617148299e-06, "loss": 0.0, "step": 1323 }, { "epoch": 0.856957928802589, "grad_norm": 3.427062256378122e-05, "learning_rate": 5.6613157039969055e-06, "loss": 0.0, "step": 1324 }, { "epoch": 0.8576051779935275, "grad_norm": 3.057840876863338e-05, "learning_rate": 5.611176369323412e-06, "loss": 0.0, "step": 1325 }, { "epoch": 0.858252427184466, "grad_norm": 3.3830769098130986e-05, "learning_rate": 5.56124685012458e-06, "loss": 0.0, "step": 1326 }, { "epoch": 0.8588996763754045, "grad_norm": 3.0684615921927616e-05, "learning_rate": 5.511527382405451e-06, "loss": 0.0, "step": 1327 }, { "epoch": 0.859546925566343, "grad_norm": 2.877054976124782e-05, "learning_rate": 5.462018201178204e-06, "loss": 0.0, "step": 1328 }, { "epoch": 0.8601941747572815, "grad_norm": 2.8292075512581505e-05, "learning_rate": 5.412719540461015e-06, "loss": 0.0, "step": 1329 }, { "epoch": 0.86084142394822, "grad_norm": 4.5770942961098626e-05, "learning_rate": 5.363631633277006e-06, "loss": 0.0, "step": 1330 }, { "epoch": 0.8614886731391586, "grad_norm": 2.9364460715441965e-05, "learning_rate": 5.314754711653125e-06, "loss": 0.0, "step": 1331 }, { "epoch": 0.8621359223300971, "grad_norm": 2.9971184630994685e-05, "learning_rate": 5.266089006618991e-06, "loss": 0.0, "step": 1332 }, { "epoch": 0.8627831715210356, "grad_norm": 3.090947211603634e-05, "learning_rate": 5.217634748205902e-06, "loss": 0.0, "step": 1333 }, { "epoch": 0.8634304207119741, "grad_norm": 2.972232505271677e-05, "learning_rate": 5.169392165445691e-06, "loss": 0.0, "step": 1334 }, { "epoch": 0.8640776699029126, "grad_norm": 2.7802563636214472e-05, "learning_rate": 5.121361486369625e-06, "loss": 0.0, "step": 1335 }, { "epoch": 0.8647249190938512, "grad_norm": 2.7798199880635366e-05, "learning_rate": 5.0735429380073865e-06, "loss": 0.0, "step": 1336 }, { "epoch": 0.8653721682847897, "grad_norm": 2.84150373772718e-05, "learning_rate": 5.025936746385928e-06, "loss": 0.0, "step": 1337 }, { "epoch": 0.8660194174757282, "grad_norm": 2.972891707031522e-05, "learning_rate": 4.978543136528474e-06, "loss": 0.0, "step": 1338 }, { "epoch": 0.8666666666666667, "grad_norm": 2.9013235689490102e-05, "learning_rate": 4.931362332453421e-06, "loss": 0.0, "step": 1339 }, { "epoch": 0.8673139158576052, "grad_norm": 2.887454320443794e-05, "learning_rate": 4.884394557173249e-06, "loss": 0.0, "step": 1340 }, { "epoch": 0.8679611650485437, "grad_norm": 3.425277827773243e-05, "learning_rate": 4.837640032693558e-06, "loss": 0.0, "step": 1341 }, { "epoch": 0.8686084142394822, "grad_norm": 2.8632548492169008e-05, "learning_rate": 4.791098980011921e-06, "loss": 0.0, "step": 1342 }, { "epoch": 0.8692556634304207, "grad_norm": 2.853093610610813e-05, "learning_rate": 4.744771619116872e-06, "loss": 0.0, "step": 1343 }, { "epoch": 0.8699029126213592, "grad_norm": 2.9239194191177376e-05, "learning_rate": 4.698658168986908e-06, "loss": 0.0, "step": 1344 }, { "epoch": 0.8705501618122977, "grad_norm": 2.6730338504421525e-05, "learning_rate": 4.652758847589416e-06, "loss": 0.0, "step": 1345 }, { "epoch": 0.8711974110032362, "grad_norm": 2.781052353384439e-05, "learning_rate": 4.607073871879613e-06, "loss": 0.0, "step": 1346 }, { "epoch": 0.8718446601941747, "grad_norm": 2.9233944587758742e-05, "learning_rate": 4.5616034577995955e-06, "loss": 0.0, "step": 1347 }, { "epoch": 0.8724919093851132, "grad_norm": 2.815485822793562e-05, "learning_rate": 4.5163478202772615e-06, "loss": 0.0, "step": 1348 }, { "epoch": 0.8731391585760517, "grad_norm": 2.8653046683757566e-05, "learning_rate": 4.471307173225292e-06, "loss": 0.0, "step": 1349 }, { "epoch": 0.8737864077669902, "grad_norm": 3.0307597626233473e-05, "learning_rate": 4.426481729540205e-06, "loss": 0.0, "step": 1350 }, { "epoch": 0.8744336569579289, "grad_norm": 3.496236968203448e-05, "learning_rate": 4.381871701101248e-06, "loss": 0.0, "step": 1351 }, { "epoch": 0.8750809061488674, "grad_norm": 3.211486182408407e-05, "learning_rate": 4.337477298769493e-06, "loss": 0.0, "step": 1352 }, { "epoch": 0.8757281553398059, "grad_norm": 2.8743703296640888e-05, "learning_rate": 4.293298732386786e-06, "loss": 0.0, "step": 1353 }, { "epoch": 0.8763754045307444, "grad_norm": 3.354837099323049e-05, "learning_rate": 4.249336210774746e-06, "loss": 0.0, "step": 1354 }, { "epoch": 0.8770226537216829, "grad_norm": 3.329403989482671e-05, "learning_rate": 4.205589941733834e-06, "loss": 0.0, "step": 1355 }, { "epoch": 0.8776699029126214, "grad_norm": 2.9112465199432336e-05, "learning_rate": 4.162060132042333e-06, "loss": 0.0, "step": 1356 }, { "epoch": 0.8783171521035599, "grad_norm": 3.116092921118252e-05, "learning_rate": 4.118746987455336e-06, "loss": 0.0, "step": 1357 }, { "epoch": 0.8789644012944984, "grad_norm": 3.0895193049218506e-05, "learning_rate": 4.075650712703849e-06, "loss": 0.0, "step": 1358 }, { "epoch": 0.8796116504854369, "grad_norm": 3.210348222637549e-05, "learning_rate": 4.032771511493782e-06, "loss": 0.0, "step": 1359 }, { "epoch": 0.8802588996763754, "grad_norm": 3.0547904316335917e-05, "learning_rate": 3.990109586504965e-06, "loss": 0.0, "step": 1360 }, { "epoch": 0.8809061488673139, "grad_norm": 2.946665154013317e-05, "learning_rate": 3.9476651393902494e-06, "loss": 0.0, "step": 1361 }, { "epoch": 0.8815533980582524, "grad_norm": 3.0193479688023217e-05, "learning_rate": 3.905438370774495e-06, "loss": 0.0, "step": 1362 }, { "epoch": 0.8822006472491909, "grad_norm": 3.0318025892484002e-05, "learning_rate": 3.8634294802536384e-06, "loss": 0.0, "step": 1363 }, { "epoch": 0.8828478964401294, "grad_norm": 3.162304710713215e-05, "learning_rate": 3.82163866639379e-06, "loss": 0.0, "step": 1364 }, { "epoch": 0.883495145631068, "grad_norm": 3.113932689302601e-05, "learning_rate": 3.7800661267302417e-06, "loss": 0.0, "step": 1365 }, { "epoch": 0.8841423948220065, "grad_norm": 3.0186189178493805e-05, "learning_rate": 3.7387120577665524e-06, "loss": 0.0, "step": 1366 }, { "epoch": 0.884789644012945, "grad_norm": 3.017430026375223e-05, "learning_rate": 3.69757665497365e-06, "loss": 0.0, "step": 1367 }, { "epoch": 0.8854368932038835, "grad_norm": 3.1866406061453745e-05, "learning_rate": 3.6566601127888344e-06, "loss": 0.0, "step": 1368 }, { "epoch": 0.886084142394822, "grad_norm": 2.9112245101714507e-05, "learning_rate": 3.615962624614938e-06, "loss": 0.0, "step": 1369 }, { "epoch": 0.8867313915857605, "grad_norm": 2.9124956199666485e-05, "learning_rate": 3.5754843828193716e-06, "loss": 0.0, "step": 1370 }, { "epoch": 0.887378640776699, "grad_norm": 3.236894553992897e-05, "learning_rate": 3.535225578733198e-06, "loss": 0.0, "step": 1371 }, { "epoch": 0.8880258899676375, "grad_norm": 3.188128903275356e-05, "learning_rate": 3.4951864026502854e-06, "loss": 0.0, "step": 1372 }, { "epoch": 0.888673139158576, "grad_norm": 3.475479024928063e-05, "learning_rate": 3.4553670438263408e-06, "loss": 0.0, "step": 1373 }, { "epoch": 0.8893203883495145, "grad_norm": 3.189170456607826e-05, "learning_rate": 3.41576769047805e-06, "loss": 0.0, "step": 1374 }, { "epoch": 0.889967637540453, "grad_norm": 3.0447605240624398e-05, "learning_rate": 3.376388529782215e-06, "loss": 0.0, "step": 1375 }, { "epoch": 0.8906148867313916, "grad_norm": 3.116185325779952e-05, "learning_rate": 3.3372297478748038e-06, "loss": 0.0, "step": 1376 }, { "epoch": 0.8912621359223301, "grad_norm": 2.9720120437559672e-05, "learning_rate": 3.2982915298501173e-06, "loss": 0.0, "step": 1377 }, { "epoch": 0.8919093851132686, "grad_norm": 3.618436312535778e-05, "learning_rate": 3.2595740597599234e-06, "loss": 0.0, "step": 1378 }, { "epoch": 0.8925566343042072, "grad_norm": 2.9376269594649784e-05, "learning_rate": 3.221077520612531e-06, "loss": 0.0, "step": 1379 }, { "epoch": 0.8932038834951457, "grad_norm": 3.2120238756760955e-05, "learning_rate": 3.1828020943719894e-06, "loss": 0.0, "step": 1380 }, { "epoch": 0.8938511326860842, "grad_norm": 3.238513454562053e-05, "learning_rate": 3.144747961957195e-06, "loss": 0.0, "step": 1381 }, { "epoch": 0.8944983818770227, "grad_norm": 3.115571234957315e-05, "learning_rate": 3.1069153032410147e-06, "loss": 0.0, "step": 1382 }, { "epoch": 0.8951456310679612, "grad_norm": 2.8277658202569e-05, "learning_rate": 3.069304297049508e-06, "loss": 0.0, "step": 1383 }, { "epoch": 0.8957928802588997, "grad_norm": 2.8890643079648726e-05, "learning_rate": 3.0319151211609886e-06, "loss": 0.0, "step": 1384 }, { "epoch": 0.8964401294498382, "grad_norm": 3.032093809451908e-05, "learning_rate": 2.9947479523052548e-06, "loss": 0.0, "step": 1385 }, { "epoch": 0.8970873786407767, "grad_norm": 3.0906725442036986e-05, "learning_rate": 2.9578029661627314e-06, "loss": 0.0, "step": 1386 }, { "epoch": 0.8977346278317152, "grad_norm": 3.1875537388259545e-05, "learning_rate": 2.921080337363624e-06, "loss": 0.0, "step": 1387 }, { "epoch": 0.8983818770226537, "grad_norm": 2.8284761356189847e-05, "learning_rate": 2.884580239487128e-06, "loss": 0.0, "step": 1388 }, { "epoch": 0.8990291262135922, "grad_norm": 2.9141221602912992e-05, "learning_rate": 2.8483028450605742e-06, "loss": 0.0, "step": 1389 }, { "epoch": 0.8996763754045307, "grad_norm": 2.780696377158165e-05, "learning_rate": 2.8122483255586252e-06, "loss": 0.0, "step": 1390 }, { "epoch": 0.9003236245954692, "grad_norm": 3.0303663152153604e-05, "learning_rate": 2.776416851402469e-06, "loss": 0.0, "step": 1391 }, { "epoch": 0.9009708737864077, "grad_norm": 2.9251741580083035e-05, "learning_rate": 2.7408085919590264e-06, "loss": 0.0, "step": 1392 }, { "epoch": 0.9016181229773462, "grad_norm": 2.935952397820074e-05, "learning_rate": 2.705423715540101e-06, "loss": 0.0, "step": 1393 }, { "epoch": 0.9022653721682848, "grad_norm": 3.0076322218519635e-05, "learning_rate": 2.670262389401651e-06, "loss": 0.0, "step": 1394 }, { "epoch": 0.9029126213592233, "grad_norm": 2.7805677746073343e-05, "learning_rate": 2.6353247797429535e-06, "loss": 0.0, "step": 1395 }, { "epoch": 0.9035598705501618, "grad_norm": 2.8380973162711598e-05, "learning_rate": 2.6006110517058144e-06, "loss": 0.0, "step": 1396 }, { "epoch": 0.9042071197411004, "grad_norm": 2.719420808716677e-05, "learning_rate": 2.566121369373836e-06, "loss": 0.0, "step": 1397 }, { "epoch": 0.9048543689320389, "grad_norm": 2.673549715836998e-05, "learning_rate": 2.531855895771579e-06, "loss": 0.0, "step": 1398 }, { "epoch": 0.9055016181229774, "grad_norm": 3.116455263807438e-05, "learning_rate": 2.4978147928638397e-06, "loss": 0.0, "step": 1399 }, { "epoch": 0.9061488673139159, "grad_norm": 2.9239681680337526e-05, "learning_rate": 2.463998221554875e-06, "loss": 0.0, "step": 1400 }, { "epoch": 0.9067961165048544, "grad_norm": 3.38866411766503e-05, "learning_rate": 2.430406341687608e-06, "loss": 0.0, "step": 1401 }, { "epoch": 0.9074433656957929, "grad_norm": 3.031195592484437e-05, "learning_rate": 2.3970393120429145e-06, "loss": 0.0, "step": 1402 }, { "epoch": 0.9080906148867314, "grad_norm": 3.054170520044863e-05, "learning_rate": 2.363897290338868e-06, "loss": 0.0, "step": 1403 }, { "epoch": 0.9087378640776699, "grad_norm": 2.8882692276965827e-05, "learning_rate": 2.3309804332299566e-06, "loss": 0.0, "step": 1404 }, { "epoch": 0.9093851132686084, "grad_norm": 2.8627364372368902e-05, "learning_rate": 2.2982888963063774e-06, "loss": 0.0, "step": 1405 }, { "epoch": 0.9100323624595469, "grad_norm": 2.7794889319920912e-05, "learning_rate": 2.2658228340933117e-06, "loss": 0.0, "step": 1406 }, { "epoch": 0.9106796116504854, "grad_norm": 3.1401821615872905e-05, "learning_rate": 2.2335824000501437e-06, "loss": 0.0, "step": 1407 }, { "epoch": 0.911326860841424, "grad_norm": 2.909961222030688e-05, "learning_rate": 2.201567746569794e-06, "loss": 0.0, "step": 1408 }, { "epoch": 0.9119741100323625, "grad_norm": 3.055423076148145e-05, "learning_rate": 2.1697790249779636e-06, "loss": 0.0, "step": 1409 }, { "epoch": 0.912621359223301, "grad_norm": 2.8502765417215414e-05, "learning_rate": 2.13821638553241e-06, "loss": 0.0, "step": 1410 }, { "epoch": 0.9132686084142395, "grad_norm": 3.2103438570629805e-05, "learning_rate": 2.106879977422277e-06, "loss": 0.0, "step": 1411 }, { "epoch": 0.913915857605178, "grad_norm": 3.0755229090573266e-05, "learning_rate": 2.0757699487673533e-06, "loss": 0.0, "step": 1412 }, { "epoch": 0.9145631067961165, "grad_norm": 2.8395124900271185e-05, "learning_rate": 2.044886446617389e-06, "loss": 0.0, "step": 1413 }, { "epoch": 0.915210355987055, "grad_norm": 3.307309452793561e-05, "learning_rate": 2.0142296169514073e-06, "loss": 0.0, "step": 1414 }, { "epoch": 0.9158576051779935, "grad_norm": 2.995224713231437e-05, "learning_rate": 1.9837996046769837e-06, "loss": 0.0, "step": 1415 }, { "epoch": 0.916504854368932, "grad_norm": 3.0005847293068655e-05, "learning_rate": 1.9535965536295885e-06, "loss": 0.0, "step": 1416 }, { "epoch": 0.9171521035598705, "grad_norm": 3.1141447834670544e-05, "learning_rate": 1.923620606571919e-06, "loss": 0.0, "step": 1417 }, { "epoch": 0.917799352750809, "grad_norm": 2.9125007131369784e-05, "learning_rate": 1.8938719051931674e-06, "loss": 0.0, "step": 1418 }, { "epoch": 0.9184466019417475, "grad_norm": 3.1149378628470004e-05, "learning_rate": 1.8643505901084268e-06, "loss": 0.0, "step": 1419 }, { "epoch": 0.919093851132686, "grad_norm": 3.2131334592122585e-05, "learning_rate": 1.8350568008579705e-06, "loss": 0.0, "step": 1420 }, { "epoch": 0.9197411003236245, "grad_norm": 3.188562550349161e-05, "learning_rate": 1.8059906759066159e-06, "loss": 0.0, "step": 1421 }, { "epoch": 0.920388349514563, "grad_norm": 3.0224360671127215e-05, "learning_rate": 1.777152352643069e-06, "loss": 0.0, "step": 1422 }, { "epoch": 0.9210355987055017, "grad_norm": 3.379662666702643e-05, "learning_rate": 1.7485419673792524e-06, "loss": 0.0, "step": 1423 }, { "epoch": 0.9216828478964402, "grad_norm": 3.2119089155457914e-05, "learning_rate": 1.7201596553497013e-06, "loss": 0.0, "step": 1424 }, { "epoch": 0.9223300970873787, "grad_norm": 3.09307397401426e-05, "learning_rate": 1.692005550710901e-06, "loss": 0.0, "step": 1425 }, { "epoch": 0.9229773462783172, "grad_norm": 3.093123814323917e-05, "learning_rate": 1.6640797865406288e-06, "loss": 0.0, "step": 1426 }, { "epoch": 0.9236245954692557, "grad_norm": 3.285687125753611e-05, "learning_rate": 1.6363824948373852e-06, "loss": 0.0, "step": 1427 }, { "epoch": 0.9242718446601942, "grad_norm": 2.959301127702929e-05, "learning_rate": 1.6089138065197185e-06, "loss": 0.0, "step": 1428 }, { "epoch": 0.9249190938511327, "grad_norm": 3.1159772333921865e-05, "learning_rate": 1.5816738514256135e-06, "loss": 0.0, "step": 1429 }, { "epoch": 0.9255663430420712, "grad_norm": 2.8291822673054412e-05, "learning_rate": 1.5546627583119088e-06, "loss": 0.0, "step": 1430 }, { "epoch": 0.9262135922330097, "grad_norm": 2.936238524853252e-05, "learning_rate": 1.5278806548536584e-06, "loss": 0.0, "step": 1431 }, { "epoch": 0.9268608414239482, "grad_norm": 3.067820944124833e-05, "learning_rate": 1.501327667643515e-06, "loss": 0.0, "step": 1432 }, { "epoch": 0.9275080906148867, "grad_norm": 2.8284572181291878e-05, "learning_rate": 1.4750039221911926e-06, "loss": 0.0, "step": 1433 }, { "epoch": 0.9281553398058252, "grad_norm": 2.9370548872975633e-05, "learning_rate": 1.4489095429227995e-06, "loss": 0.0, "step": 1434 }, { "epoch": 0.9288025889967637, "grad_norm": 3.283307160018012e-05, "learning_rate": 1.4230446531803e-06, "loss": 0.0, "step": 1435 }, { "epoch": 0.9294498381877022, "grad_norm": 2.6713743864092976e-05, "learning_rate": 1.3974093752209206e-06, "loss": 0.0, "step": 1436 }, { "epoch": 0.9300970873786408, "grad_norm": 2.7796757422038354e-05, "learning_rate": 1.372003830216545e-06, "loss": 0.0, "step": 1437 }, { "epoch": 0.9307443365695793, "grad_norm": 2.8533981094369665e-05, "learning_rate": 1.3468281382531866e-06, "loss": 0.0, "step": 1438 }, { "epoch": 0.9313915857605178, "grad_norm": 2.888152448576875e-05, "learning_rate": 1.321882418330389e-06, "loss": 0.0, "step": 1439 }, { "epoch": 0.9320388349514563, "grad_norm": 2.925360058725346e-05, "learning_rate": 1.2971667883606652e-06, "loss": 0.0, "step": 1440 }, { "epoch": 0.9326860841423948, "grad_norm": 2.8393138563842513e-05, "learning_rate": 1.272681365168965e-06, "loss": 0.0, "step": 1441 }, { "epoch": 0.9333333333333333, "grad_norm": 3.054711487493478e-05, "learning_rate": 1.2484262644920918e-06, "loss": 0.0, "step": 1442 }, { "epoch": 0.9339805825242719, "grad_norm": 2.7200458134757355e-05, "learning_rate": 1.2244016009781701e-06, "loss": 0.0, "step": 1443 }, { "epoch": 0.9346278317152104, "grad_norm": 2.731495442276355e-05, "learning_rate": 1.2006074881861063e-06, "loss": 0.0, "step": 1444 }, { "epoch": 0.9352750809061489, "grad_norm": 2.8398326321621425e-05, "learning_rate": 1.1770440385850401e-06, "loss": 0.0, "step": 1445 }, { "epoch": 0.9359223300970874, "grad_norm": 2.7919622880290262e-05, "learning_rate": 1.1537113635538332e-06, "loss": 0.0, "step": 1446 }, { "epoch": 0.9365695792880259, "grad_norm": 2.779790520435199e-05, "learning_rate": 1.1306095733805254e-06, "loss": 0.0, "step": 1447 }, { "epoch": 0.9372168284789644, "grad_norm": 2.8276941520744003e-05, "learning_rate": 1.1077387772618075e-06, "loss": 0.0, "step": 1448 }, { "epoch": 0.9378640776699029, "grad_norm": 2.8997719709877856e-05, "learning_rate": 1.0850990833025322e-06, "loss": 0.0, "step": 1449 }, { "epoch": 0.9385113268608414, "grad_norm": 2.779282112896908e-05, "learning_rate": 1.062690598515187e-06, "loss": 0.0, "step": 1450 }, { "epoch": 0.9391585760517799, "grad_norm": 3.428855779930018e-05, "learning_rate": 1.0405134288193674e-06, "loss": 0.0, "step": 1451 }, { "epoch": 0.9398058252427185, "grad_norm": 3.5010241845157e-05, "learning_rate": 1.0185676790413213e-06, "loss": 0.0, "step": 1452 }, { "epoch": 0.940453074433657, "grad_norm": 3.0191284167813137e-05, "learning_rate": 9.968534529134154e-07, "loss": 0.0, "step": 1453 }, { "epoch": 0.9411003236245955, "grad_norm": 3.0183244234649464e-05, "learning_rate": 9.75370853073665e-07, "loss": 0.0, "step": 1454 }, { "epoch": 0.941747572815534, "grad_norm": 2.900006620620843e-05, "learning_rate": 9.54119981065238e-07, "loss": 0.0, "step": 1455 }, { "epoch": 0.9423948220064725, "grad_norm": 4.836772131966427e-05, "learning_rate": 9.3310093733599e-07, "loss": 0.0, "step": 1456 }, { "epoch": 0.943042071197411, "grad_norm": 3.2600761187495664e-05, "learning_rate": 9.123138212379534e-07, "loss": 0.0, "step": 1457 }, { "epoch": 0.9436893203883495, "grad_norm": 2.8977845431654714e-05, "learning_rate": 8.917587310269315e-07, "loss": 0.0, "step": 1458 }, { "epoch": 0.944336569579288, "grad_norm": 3.258264041505754e-05, "learning_rate": 8.714357638619608e-07, "loss": 0.0, "step": 1459 }, { "epoch": 0.9449838187702265, "grad_norm": 3.353532883920707e-05, "learning_rate": 8.513450158049108e-07, "loss": 0.0, "step": 1460 }, { "epoch": 0.945631067961165, "grad_norm": 3.066585122724064e-05, "learning_rate": 8.314865818200013e-07, "loss": 0.0, "step": 1461 }, { "epoch": 0.9462783171521035, "grad_norm": 2.8273796488065273e-05, "learning_rate": 8.118605557733417e-07, "loss": 0.0, "step": 1462 }, { "epoch": 0.946925566343042, "grad_norm": 2.887805203499738e-05, "learning_rate": 7.924670304325199e-07, "loss": 0.0, "step": 1463 }, { "epoch": 0.9475728155339805, "grad_norm": 3.234121322748251e-05, "learning_rate": 7.733060974661588e-07, "loss": 0.0, "step": 1464 }, { "epoch": 0.948220064724919, "grad_norm": 2.8881553589599207e-05, "learning_rate": 7.543778474434438e-07, "loss": 0.0, "step": 1465 }, { "epoch": 0.9488673139158577, "grad_norm": 2.9947719667688943e-05, "learning_rate": 7.356823698337512e-07, "loss": 0.0, "step": 1466 }, { "epoch": 0.9495145631067962, "grad_norm": 3.2102212571771815e-05, "learning_rate": 7.172197530061708e-07, "loss": 0.0, "step": 1467 }, { "epoch": 0.9501618122977347, "grad_norm": 2.9960867323097773e-05, "learning_rate": 6.989900842291286e-07, "loss": 0.0, "step": 1468 }, { "epoch": 0.9508090614886732, "grad_norm": 2.9122256819391623e-05, "learning_rate": 6.809934496699588e-07, "loss": 0.0, "step": 1469 }, { "epoch": 0.9514563106796117, "grad_norm": 3.0438421163125895e-05, "learning_rate": 6.632299343945103e-07, "loss": 0.0, "step": 1470 }, { "epoch": 0.9521035598705502, "grad_norm": 3.2355859730159864e-05, "learning_rate": 6.456996223667022e-07, "loss": 0.0, "step": 1471 }, { "epoch": 0.9527508090614887, "grad_norm": 2.8882805054308847e-05, "learning_rate": 6.28402596448191e-07, "loss": 0.0, "step": 1472 }, { "epoch": 0.9533980582524272, "grad_norm": 2.912832860602066e-05, "learning_rate": 6.113389383979151e-07, "loss": 0.0, "step": 1473 }, { "epoch": 0.9540453074433657, "grad_norm": 2.939105615951121e-05, "learning_rate": 5.945087288717622e-07, "loss": 0.0, "step": 1474 }, { "epoch": 0.9546925566343042, "grad_norm": 3.0436318411375396e-05, "learning_rate": 5.779120474221522e-07, "loss": 0.0, "step": 1475 }, { "epoch": 0.9553398058252427, "grad_norm": 2.996389230247587e-05, "learning_rate": 5.615489724976664e-07, "loss": 0.0, "step": 1476 }, { "epoch": 0.9559870550161812, "grad_norm": 3.117434971500188e-05, "learning_rate": 5.454195814427021e-07, "loss": 0.0, "step": 1477 }, { "epoch": 0.9566343042071197, "grad_norm": 2.7828933525597677e-05, "learning_rate": 5.295239504970739e-07, "loss": 0.0, "step": 1478 }, { "epoch": 0.9572815533980582, "grad_norm": 2.7228026738157496e-05, "learning_rate": 5.138621547956635e-07, "loss": 0.0, "step": 1479 }, { "epoch": 0.9579288025889967, "grad_norm": 2.887405025830958e-05, "learning_rate": 4.984342683680809e-07, "loss": 0.0, "step": 1480 }, { "epoch": 0.9585760517799353, "grad_norm": 2.721715827647131e-05, "learning_rate": 4.832403641383044e-07, "loss": 0.0, "step": 1481 }, { "epoch": 0.9592233009708738, "grad_norm": 2.828839569701813e-05, "learning_rate": 4.6828051392431847e-07, "loss": 0.0, "step": 1482 }, { "epoch": 0.9598705501618123, "grad_norm": 2.8300539270276204e-05, "learning_rate": 4.535547884378044e-07, "loss": 0.0, "step": 1483 }, { "epoch": 0.9605177993527508, "grad_norm": 2.900650360970758e-05, "learning_rate": 4.390632572837783e-07, "loss": 0.0, "step": 1484 }, { "epoch": 0.9611650485436893, "grad_norm": 2.971368121507112e-05, "learning_rate": 4.2480598896028624e-07, "loss": 0.0, "step": 1485 }, { "epoch": 0.9618122977346278, "grad_norm": 3.18863385473378e-05, "learning_rate": 4.1078305085807124e-07, "loss": 0.0, "step": 1486 }, { "epoch": 0.9624595469255663, "grad_norm": 2.9233349778223783e-05, "learning_rate": 3.9699450926022896e-07, "loss": 0.0, "step": 1487 }, { "epoch": 0.9631067961165048, "grad_norm": 2.9725508284172975e-05, "learning_rate": 3.8344042934195246e-07, "loss": 0.0, "step": 1488 }, { "epoch": 0.9637540453074434, "grad_norm": 2.8876263968413696e-05, "learning_rate": 3.7012087517016567e-07, "loss": 0.0, "step": 1489 }, { "epoch": 0.9644012944983819, "grad_norm": 2.9116605219314806e-05, "learning_rate": 3.570359097032516e-07, "loss": 0.0, "step": 1490 }, { "epoch": 0.9650485436893204, "grad_norm": 2.7797896109404974e-05, "learning_rate": 3.441855947907524e-07, "loss": 0.0, "step": 1491 }, { "epoch": 0.9656957928802589, "grad_norm": 2.8875567295472138e-05, "learning_rate": 3.315699911730641e-07, "loss": 0.0, "step": 1492 }, { "epoch": 0.9663430420711974, "grad_norm": 2.9713237381656654e-05, "learning_rate": 3.1918915848115903e-07, "loss": 0.0, "step": 1493 }, { "epoch": 0.9669902912621359, "grad_norm": 2.8407523132045753e-05, "learning_rate": 3.0704315523631953e-07, "loss": 0.0, "step": 1494 }, { "epoch": 0.9676375404530745, "grad_norm": 2.7202309865970165e-05, "learning_rate": 2.9513203884981577e-07, "loss": 0.0, "step": 1495 }, { "epoch": 0.968284789644013, "grad_norm": 2.7205003789276816e-05, "learning_rate": 2.8345586562268934e-07, "loss": 0.0, "step": 1496 }, { "epoch": 0.9689320388349515, "grad_norm": 2.9845659810234793e-05, "learning_rate": 2.7201469074544795e-07, "loss": 0.0, "step": 1497 }, { "epoch": 0.96957928802589, "grad_norm": 2.86362374026794e-05, "learning_rate": 2.608085682978212e-07, "loss": 0.0, "step": 1498 }, { "epoch": 0.9702265372168285, "grad_norm": 2.9241513402666897e-05, "learning_rate": 2.498375512484941e-07, "loss": 0.0, "step": 1499 }, { "epoch": 0.970873786407767, "grad_norm": 3.067816942348145e-05, "learning_rate": 2.3910169145487936e-07, "loss": 0.0, "step": 1500 }, { "epoch": 0.9715210355987055, "grad_norm": 2.9163411454646848e-05, "learning_rate": 2.2860103966284019e-07, "loss": 0.0, "step": 1501 }, { "epoch": 0.972168284789644, "grad_norm": 3.45797925547231e-05, "learning_rate": 2.183356455064789e-07, "loss": 0.0, "step": 1502 }, { "epoch": 0.9728155339805825, "grad_norm": 2.8640923119382933e-05, "learning_rate": 2.0830555750788738e-07, "loss": 0.0, "step": 1503 }, { "epoch": 0.973462783171521, "grad_norm": 3.0909341148799285e-05, "learning_rate": 1.9851082307691948e-07, "loss": 0.0, "step": 1504 }, { "epoch": 0.9741100323624595, "grad_norm": 3.211084185750224e-05, "learning_rate": 1.889514885109689e-07, "loss": 0.0, "step": 1505 }, { "epoch": 0.974757281553398, "grad_norm": 2.8628259315155447e-05, "learning_rate": 1.7962759899474713e-07, "loss": 0.0, "step": 1506 }, { "epoch": 0.9754045307443365, "grad_norm": 2.9366896342253312e-05, "learning_rate": 1.7053919860007816e-07, "loss": 0.0, "step": 1507 }, { "epoch": 0.976051779935275, "grad_norm": 2.9947226721560583e-05, "learning_rate": 1.6168633028568747e-07, "loss": 0.0, "step": 1508 }, { "epoch": 0.9766990291262136, "grad_norm": 3.185419336659834e-05, "learning_rate": 1.5306903589698552e-07, "loss": 0.0, "step": 1509 }, { "epoch": 0.9773462783171522, "grad_norm": 3.955503780161962e-05, "learning_rate": 1.4468735616587904e-07, "loss": 0.0, "step": 1510 }, { "epoch": 0.9779935275080907, "grad_norm": 3.210457362001762e-05, "learning_rate": 1.3654133071059893e-07, "loss": 0.0, "step": 1511 }, { "epoch": 0.9786407766990292, "grad_norm": 3.211268631275743e-05, "learning_rate": 1.2863099803547274e-07, "loss": 0.0, "step": 1512 }, { "epoch": 0.9792880258899677, "grad_norm": 3.0189445169526152e-05, "learning_rate": 1.2095639553077466e-07, "loss": 0.0, "step": 1513 }, { "epoch": 0.9799352750809062, "grad_norm": 2.912877607741393e-05, "learning_rate": 1.1351755947253684e-07, "loss": 0.0, "step": 1514 }, { "epoch": 0.9805825242718447, "grad_norm": 2.8272341296542436e-05, "learning_rate": 1.0631452502237737e-07, "loss": 0.0, "step": 1515 }, { "epoch": 0.9812297734627832, "grad_norm": 2.7096859412267804e-05, "learning_rate": 9.934732622734477e-08, "loss": 0.0, "step": 1516 }, { "epoch": 0.9818770226537217, "grad_norm": 3.0457478715106845e-05, "learning_rate": 9.261599601972926e-08, "loss": 0.0, "step": 1517 }, { "epoch": 0.9825242718446602, "grad_norm": 2.9363456633291207e-05, "learning_rate": 8.612056621694064e-08, "loss": 0.0, "step": 1518 }, { "epoch": 0.9831715210355987, "grad_norm": 2.9489487133105285e-05, "learning_rate": 7.986106752134737e-08, "loss": 0.0, "step": 1519 }, { "epoch": 0.9838187702265372, "grad_norm": 3.068981095566414e-05, "learning_rate": 7.383752952010992e-08, "loss": 0.0, "step": 1520 }, { "epoch": 0.9844660194174757, "grad_norm": 2.996692455781158e-05, "learning_rate": 6.80499806850754e-08, "loss": 0.0, "step": 1521 }, { "epoch": 0.9851132686084142, "grad_norm": 3.164894587825984e-05, "learning_rate": 6.249844837261654e-08, "loss": 0.0, "step": 1522 }, { "epoch": 0.9857605177993527, "grad_norm": 2.961757309094537e-05, "learning_rate": 5.718295882350955e-08, "loss": 0.0, "step": 1523 }, { "epoch": 0.9864077669902913, "grad_norm": 3.163444489473477e-05, "learning_rate": 5.2103537162817576e-08, "loss": 0.0, "step": 1524 }, { "epoch": 0.9870550161812298, "grad_norm": 2.9721826649620198e-05, "learning_rate": 4.7260207399774105e-08, "loss": 0.0, "step": 1525 }, { "epoch": 0.9877022653721683, "grad_norm": 3.056014247704297e-05, "learning_rate": 4.265299242764975e-08, "loss": 0.0, "step": 1526 }, { "epoch": 0.9883495145631068, "grad_norm": 3.068634759983979e-05, "learning_rate": 3.8281914023657886e-08, "loss": 0.0, "step": 1527 }, { "epoch": 0.9889967637540453, "grad_norm": 2.780274735414423e-05, "learning_rate": 3.4146992848854695e-08, "loss": 0.0, "step": 1528 }, { "epoch": 0.9896440129449838, "grad_norm": 2.7697720724972896e-05, "learning_rate": 3.0248248448033757e-08, "loss": 0.0, "step": 1529 }, { "epoch": 0.9902912621359223, "grad_norm": 3.008269595738966e-05, "learning_rate": 2.6585699249642716e-08, "loss": 0.0, "step": 1530 }, { "epoch": 0.9909385113268608, "grad_norm": 2.8287571694818325e-05, "learning_rate": 2.3159362565677857e-08, "loss": 0.0, "step": 1531 }, { "epoch": 0.9915857605177993, "grad_norm": 2.7803509510704316e-05, "learning_rate": 1.996925459162857e-08, "loss": 0.0, "step": 1532 }, { "epoch": 0.9922330097087378, "grad_norm": 2.8281427148613147e-05, "learning_rate": 1.7015390406377453e-08, "loss": 0.0, "step": 1533 }, { "epoch": 0.9928802588996763, "grad_norm": 3.0304252504720353e-05, "learning_rate": 1.4297783972144763e-08, "loss": 0.0, "step": 1534 }, { "epoch": 0.9935275080906149, "grad_norm": 2.8403026590240188e-05, "learning_rate": 1.181644813441074e-08, "loss": 0.0, "step": 1535 }, { "epoch": 0.9941747572815534, "grad_norm": 3.0313709430629387e-05, "learning_rate": 9.571394621865626e-09, "loss": 0.0, "step": 1536 }, { "epoch": 0.9948220064724919, "grad_norm": 2.7807334845419973e-05, "learning_rate": 7.562634046348604e-09, "loss": 0.0, "step": 1537 }, { "epoch": 0.9954692556634305, "grad_norm": 2.8401254894561134e-05, "learning_rate": 5.790175902786743e-09, "loss": 0.0, "step": 1538 }, { "epoch": 0.996116504854369, "grad_norm": 2.612316529848613e-05, "learning_rate": 4.254028569183888e-09, "loss": 0.0, "step": 1539 }, { "epoch": 0.9967637540453075, "grad_norm": 2.9241215088404715e-05, "learning_rate": 2.9541993065373976e-09, "loss": 0.0, "step": 1540 }, { "epoch": 0.997411003236246, "grad_norm": 2.732668144744821e-05, "learning_rate": 1.8906942588325927e-09, "loss": 0.0, "step": 1541 }, { "epoch": 0.9980582524271845, "grad_norm": 2.7786840291810222e-05, "learning_rate": 1.063518453009449e-09, "loss": 0.0, "step": 1542 }, { "epoch": 0.998705501618123, "grad_norm": 2.719917392823845e-05, "learning_rate": 4.726757989348407e-10, "loss": 0.0, "step": 1543 }, { "epoch": 0.9993527508090615, "grad_norm": 2.7324498660163954e-05, "learning_rate": 1.1816908937478664e-10, "loss": 0.0, "step": 1544 }, { "epoch": 1.0, "grad_norm": 2.888041126425378e-05, "learning_rate": 0.0, "loss": 0.0, "step": 1545 } ], "logging_steps": 1, "max_steps": 1545, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.528157851813675e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }