{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 4632, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4388489208633095e-07, "loss": 2.1844, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.877697841726619e-07, "loss": 2.1893, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.3165467625899287e-07, "loss": 2.1796, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.755395683453238e-07, "loss": 2.1547, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.194244604316547e-07, "loss": 2.1515, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.633093525179857e-07, "loss": 2.1022, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.0071942446043167e-06, "loss": 2.0443, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.1510791366906476e-06, "loss": 1.9914, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.2949640287769785e-06, "loss": 1.8641, "step": 9 }, { "epoch": 0.01, "learning_rate": 1.4388489208633094e-06, "loss": 1.6794, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.5827338129496403e-06, "loss": 1.606, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.7266187050359715e-06, "loss": 1.4297, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.8705035971223024e-06, "loss": 1.3425, "step": 13 }, { "epoch": 0.01, "learning_rate": 2.0143884892086333e-06, "loss": 1.2541, "step": 14 }, { "epoch": 0.01, "learning_rate": 2.158273381294964e-06, "loss": 1.1884, "step": 15 }, { "epoch": 0.01, "learning_rate": 2.302158273381295e-06, "loss": 1.1375, "step": 16 }, { "epoch": 0.01, "learning_rate": 2.4460431654676263e-06, "loss": 1.0204, "step": 17 }, { "epoch": 0.01, "learning_rate": 2.589928057553957e-06, "loss": 0.9947, "step": 18 }, { "epoch": 0.01, "learning_rate": 2.733812949640288e-06, "loss": 0.8948, "step": 19 }, { "epoch": 0.01, "learning_rate": 2.877697841726619e-06, "loss": 0.9189, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.02158273381295e-06, "loss": 0.7881, "step": 21 }, { "epoch": 0.01, "learning_rate": 3.1654676258992807e-06, "loss": 0.7942, "step": 22 }, { "epoch": 0.01, "learning_rate": 3.309352517985612e-06, "loss": 0.7449, "step": 23 }, { "epoch": 0.02, "learning_rate": 3.453237410071943e-06, "loss": 0.764, "step": 24 }, { "epoch": 0.02, "learning_rate": 3.5971223021582737e-06, "loss": 0.787, "step": 25 }, { "epoch": 0.02, "learning_rate": 3.741007194244605e-06, "loss": 0.747, "step": 26 }, { "epoch": 0.02, "learning_rate": 3.884892086330936e-06, "loss": 0.6941, "step": 27 }, { "epoch": 0.02, "learning_rate": 4.028776978417267e-06, "loss": 0.7365, "step": 28 }, { "epoch": 0.02, "learning_rate": 4.172661870503597e-06, "loss": 0.6677, "step": 29 }, { "epoch": 0.02, "learning_rate": 4.316546762589928e-06, "loss": 0.6956, "step": 30 }, { "epoch": 0.02, "learning_rate": 4.46043165467626e-06, "loss": 0.6735, "step": 31 }, { "epoch": 0.02, "learning_rate": 4.60431654676259e-06, "loss": 0.6534, "step": 32 }, { "epoch": 0.02, "learning_rate": 4.748201438848921e-06, "loss": 0.6641, "step": 33 }, { "epoch": 0.02, "learning_rate": 4.892086330935253e-06, "loss": 0.6164, "step": 34 }, { "epoch": 0.02, "learning_rate": 5.035971223021583e-06, "loss": 0.5862, "step": 35 }, { "epoch": 0.02, "learning_rate": 5.179856115107914e-06, "loss": 0.5927, "step": 36 }, { "epoch": 0.02, "learning_rate": 5.3237410071942456e-06, "loss": 0.5767, "step": 37 }, { "epoch": 0.02, "learning_rate": 5.467625899280576e-06, "loss": 0.5555, "step": 38 }, { "epoch": 0.03, "learning_rate": 5.611510791366906e-06, "loss": 0.6159, "step": 39 }, { "epoch": 0.03, "learning_rate": 5.755395683453238e-06, "loss": 0.5584, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.899280575539568e-06, "loss": 0.5553, "step": 41 }, { "epoch": 0.03, "learning_rate": 6.0431654676259e-06, "loss": 0.5499, "step": 42 }, { "epoch": 0.03, "learning_rate": 6.1870503597122315e-06, "loss": 0.5476, "step": 43 }, { "epoch": 0.03, "learning_rate": 6.330935251798561e-06, "loss": 0.5577, "step": 44 }, { "epoch": 0.03, "learning_rate": 6.474820143884892e-06, "loss": 0.5549, "step": 45 }, { "epoch": 0.03, "learning_rate": 6.618705035971224e-06, "loss": 0.5283, "step": 46 }, { "epoch": 0.03, "learning_rate": 6.762589928057554e-06, "loss": 0.5727, "step": 47 }, { "epoch": 0.03, "learning_rate": 6.906474820143886e-06, "loss": 0.5116, "step": 48 }, { "epoch": 0.03, "learning_rate": 7.050359712230216e-06, "loss": 0.5078, "step": 49 }, { "epoch": 0.03, "learning_rate": 7.194244604316547e-06, "loss": 0.521, "step": 50 }, { "epoch": 0.03, "learning_rate": 7.338129496402878e-06, "loss": 0.5048, "step": 51 }, { "epoch": 0.03, "learning_rate": 7.48201438848921e-06, "loss": 0.4875, "step": 52 }, { "epoch": 0.03, "learning_rate": 7.62589928057554e-06, "loss": 0.5074, "step": 53 }, { "epoch": 0.03, "learning_rate": 7.769784172661872e-06, "loss": 0.5119, "step": 54 }, { "epoch": 0.04, "learning_rate": 7.913669064748202e-06, "loss": 0.5099, "step": 55 }, { "epoch": 0.04, "learning_rate": 8.057553956834533e-06, "loss": 0.5675, "step": 56 }, { "epoch": 0.04, "learning_rate": 8.201438848920865e-06, "loss": 0.5406, "step": 57 }, { "epoch": 0.04, "learning_rate": 8.345323741007195e-06, "loss": 0.529, "step": 58 }, { "epoch": 0.04, "learning_rate": 8.489208633093526e-06, "loss": 0.492, "step": 59 }, { "epoch": 0.04, "learning_rate": 8.633093525179856e-06, "loss": 0.5313, "step": 60 }, { "epoch": 0.04, "learning_rate": 8.776978417266188e-06, "loss": 0.5139, "step": 61 }, { "epoch": 0.04, "learning_rate": 8.92086330935252e-06, "loss": 0.5294, "step": 62 }, { "epoch": 0.04, "learning_rate": 9.064748201438849e-06, "loss": 0.4897, "step": 63 }, { "epoch": 0.04, "learning_rate": 9.20863309352518e-06, "loss": 0.545, "step": 64 }, { "epoch": 0.04, "learning_rate": 9.35251798561151e-06, "loss": 0.5091, "step": 65 }, { "epoch": 0.04, "learning_rate": 9.496402877697842e-06, "loss": 0.475, "step": 66 }, { "epoch": 0.04, "learning_rate": 9.640287769784174e-06, "loss": 0.4928, "step": 67 }, { "epoch": 0.04, "learning_rate": 9.784172661870505e-06, "loss": 0.5235, "step": 68 }, { "epoch": 0.04, "learning_rate": 9.928057553956835e-06, "loss": 0.492, "step": 69 }, { "epoch": 0.05, "learning_rate": 1.0071942446043167e-05, "loss": 0.5093, "step": 70 }, { "epoch": 0.05, "learning_rate": 1.0215827338129498e-05, "loss": 0.5148, "step": 71 }, { "epoch": 0.05, "learning_rate": 1.0359712230215828e-05, "loss": 0.5031, "step": 72 }, { "epoch": 0.05, "learning_rate": 1.0503597122302158e-05, "loss": 0.5167, "step": 73 }, { "epoch": 0.05, "learning_rate": 1.0647482014388491e-05, "loss": 0.502, "step": 74 }, { "epoch": 0.05, "learning_rate": 1.0791366906474821e-05, "loss": 0.5126, "step": 75 }, { "epoch": 0.05, "learning_rate": 1.0935251798561153e-05, "loss": 0.5106, "step": 76 }, { "epoch": 0.05, "learning_rate": 1.1079136690647482e-05, "loss": 0.4679, "step": 77 }, { "epoch": 0.05, "learning_rate": 1.1223021582733812e-05, "loss": 0.4858, "step": 78 }, { "epoch": 0.05, "learning_rate": 1.1366906474820146e-05, "loss": 0.4947, "step": 79 }, { "epoch": 0.05, "learning_rate": 1.1510791366906475e-05, "loss": 0.4695, "step": 80 }, { "epoch": 0.05, "learning_rate": 1.1654676258992807e-05, "loss": 0.4801, "step": 81 }, { "epoch": 0.05, "learning_rate": 1.1798561151079137e-05, "loss": 0.4813, "step": 82 }, { "epoch": 0.05, "learning_rate": 1.1942446043165468e-05, "loss": 0.5091, "step": 83 }, { "epoch": 0.05, "learning_rate": 1.20863309352518e-05, "loss": 0.4993, "step": 84 }, { "epoch": 0.06, "learning_rate": 1.223021582733813e-05, "loss": 0.4784, "step": 85 }, { "epoch": 0.06, "learning_rate": 1.2374100719424463e-05, "loss": 0.4914, "step": 86 }, { "epoch": 0.06, "learning_rate": 1.2517985611510793e-05, "loss": 0.4943, "step": 87 }, { "epoch": 0.06, "learning_rate": 1.2661870503597123e-05, "loss": 0.5173, "step": 88 }, { "epoch": 0.06, "learning_rate": 1.2805755395683454e-05, "loss": 0.4924, "step": 89 }, { "epoch": 0.06, "learning_rate": 1.2949640287769784e-05, "loss": 0.511, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.3093525179856117e-05, "loss": 0.4753, "step": 91 }, { "epoch": 0.06, "learning_rate": 1.3237410071942447e-05, "loss": 0.4901, "step": 92 }, { "epoch": 0.06, "learning_rate": 1.3381294964028777e-05, "loss": 0.4809, "step": 93 }, { "epoch": 0.06, "learning_rate": 1.3525179856115109e-05, "loss": 0.4668, "step": 94 }, { "epoch": 0.06, "learning_rate": 1.3669064748201439e-05, "loss": 0.5013, "step": 95 }, { "epoch": 0.06, "learning_rate": 1.3812949640287772e-05, "loss": 0.5127, "step": 96 }, { "epoch": 0.06, "learning_rate": 1.3956834532374102e-05, "loss": 0.4874, "step": 97 }, { "epoch": 0.06, "learning_rate": 1.4100719424460432e-05, "loss": 0.495, "step": 98 }, { "epoch": 0.06, "learning_rate": 1.4244604316546765e-05, "loss": 0.5044, "step": 99 }, { "epoch": 0.06, "learning_rate": 1.4388489208633095e-05, "loss": 0.491, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.4532374100719426e-05, "loss": 0.534, "step": 101 }, { "epoch": 0.07, "learning_rate": 1.4676258992805756e-05, "loss": 0.48, "step": 102 }, { "epoch": 0.07, "learning_rate": 1.4820143884892086e-05, "loss": 0.4872, "step": 103 }, { "epoch": 0.07, "learning_rate": 1.496402877697842e-05, "loss": 0.4366, "step": 104 }, { "epoch": 0.07, "learning_rate": 1.5107913669064749e-05, "loss": 0.4966, "step": 105 }, { "epoch": 0.07, "learning_rate": 1.525179856115108e-05, "loss": 0.4834, "step": 106 }, { "epoch": 0.07, "learning_rate": 1.5395683453237412e-05, "loss": 0.4949, "step": 107 }, { "epoch": 0.07, "learning_rate": 1.5539568345323744e-05, "loss": 0.4982, "step": 108 }, { "epoch": 0.07, "learning_rate": 1.5683453237410072e-05, "loss": 0.4992, "step": 109 }, { "epoch": 0.07, "learning_rate": 1.5827338129496403e-05, "loss": 0.5236, "step": 110 }, { "epoch": 0.07, "learning_rate": 1.5971223021582735e-05, "loss": 0.4981, "step": 111 }, { "epoch": 0.07, "learning_rate": 1.6115107913669067e-05, "loss": 0.4757, "step": 112 }, { "epoch": 0.07, "learning_rate": 1.6258992805755398e-05, "loss": 0.4929, "step": 113 }, { "epoch": 0.07, "learning_rate": 1.640287769784173e-05, "loss": 0.5041, "step": 114 }, { "epoch": 0.07, "learning_rate": 1.6546762589928058e-05, "loss": 0.4927, "step": 115 }, { "epoch": 0.08, "learning_rate": 1.669064748201439e-05, "loss": 0.4622, "step": 116 }, { "epoch": 0.08, "learning_rate": 1.683453237410072e-05, "loss": 0.4843, "step": 117 }, { "epoch": 0.08, "learning_rate": 1.6978417266187053e-05, "loss": 0.4714, "step": 118 }, { "epoch": 0.08, "learning_rate": 1.7122302158273384e-05, "loss": 0.4677, "step": 119 }, { "epoch": 0.08, "learning_rate": 1.7266187050359712e-05, "loss": 0.4748, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.7410071942446044e-05, "loss": 0.4795, "step": 121 }, { "epoch": 0.08, "learning_rate": 1.7553956834532375e-05, "loss": 0.4709, "step": 122 }, { "epoch": 0.08, "learning_rate": 1.7697841726618707e-05, "loss": 0.4747, "step": 123 }, { "epoch": 0.08, "learning_rate": 1.784172661870504e-05, "loss": 0.4811, "step": 124 }, { "epoch": 0.08, "learning_rate": 1.7985611510791367e-05, "loss": 0.4631, "step": 125 }, { "epoch": 0.08, "learning_rate": 1.8129496402877698e-05, "loss": 0.5079, "step": 126 }, { "epoch": 0.08, "learning_rate": 1.827338129496403e-05, "loss": 0.4561, "step": 127 }, { "epoch": 0.08, "learning_rate": 1.841726618705036e-05, "loss": 0.5007, "step": 128 }, { "epoch": 0.08, "learning_rate": 1.8561151079136693e-05, "loss": 0.4695, "step": 129 }, { "epoch": 0.08, "learning_rate": 1.870503597122302e-05, "loss": 0.5053, "step": 130 }, { "epoch": 0.08, "learning_rate": 1.8848920863309356e-05, "loss": 0.4881, "step": 131 }, { "epoch": 0.09, "learning_rate": 1.8992805755395684e-05, "loss": 0.4922, "step": 132 }, { "epoch": 0.09, "learning_rate": 1.9136690647482016e-05, "loss": 0.4874, "step": 133 }, { "epoch": 0.09, "learning_rate": 1.9280575539568347e-05, "loss": 0.4692, "step": 134 }, { "epoch": 0.09, "learning_rate": 1.9424460431654675e-05, "loss": 0.4856, "step": 135 }, { "epoch": 0.09, "learning_rate": 1.956834532374101e-05, "loss": 0.4946, "step": 136 }, { "epoch": 0.09, "learning_rate": 1.971223021582734e-05, "loss": 0.4709, "step": 137 }, { "epoch": 0.09, "learning_rate": 1.985611510791367e-05, "loss": 0.4627, "step": 138 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 0.4551, "step": 139 }, { "epoch": 0.09, "learning_rate": 1.9999997555461425e-05, "loss": 0.4826, "step": 140 }, { "epoch": 0.09, "learning_rate": 1.9999990221846898e-05, "loss": 0.4731, "step": 141 }, { "epoch": 0.09, "learning_rate": 1.999997799916e-05, "loss": 0.4794, "step": 142 }, { "epoch": 0.09, "learning_rate": 1.9999960887406704e-05, "loss": 0.5143, "step": 143 }, { "epoch": 0.09, "learning_rate": 1.9999938886595385e-05, "loss": 0.4936, "step": 144 }, { "epoch": 0.09, "learning_rate": 1.9999911996736796e-05, "loss": 0.464, "step": 145 }, { "epoch": 0.09, "learning_rate": 1.9999880217844077e-05, "loss": 0.5109, "step": 146 }, { "epoch": 0.1, "learning_rate": 1.9999843549932775e-05, "loss": 0.4622, "step": 147 }, { "epoch": 0.1, "learning_rate": 1.9999801993020814e-05, "loss": 0.4921, "step": 148 }, { "epoch": 0.1, "learning_rate": 1.9999755547128506e-05, "loss": 0.4289, "step": 149 }, { "epoch": 0.1, "learning_rate": 1.9999704212278564e-05, "loss": 0.4802, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.999964798849609e-05, "loss": 0.4569, "step": 151 }, { "epoch": 0.1, "learning_rate": 1.999958687580856e-05, "loss": 0.4724, "step": 152 }, { "epoch": 0.1, "learning_rate": 1.9999520874245865e-05, "loss": 0.51, "step": 153 }, { "epoch": 0.1, "learning_rate": 1.9999449983840265e-05, "loss": 0.4764, "step": 154 }, { "epoch": 0.1, "learning_rate": 1.9999374204626422e-05, "loss": 0.4824, "step": 155 }, { "epoch": 0.1, "learning_rate": 1.9999293536641387e-05, "loss": 0.4659, "step": 156 }, { "epoch": 0.1, "learning_rate": 1.99992079799246e-05, "loss": 0.4806, "step": 157 }, { "epoch": 0.1, "learning_rate": 1.9999117534517884e-05, "loss": 0.4818, "step": 158 }, { "epoch": 0.1, "learning_rate": 1.9999022200465464e-05, "loss": 0.4912, "step": 159 }, { "epoch": 0.1, "learning_rate": 1.9998921977813947e-05, "loss": 0.4717, "step": 160 }, { "epoch": 0.1, "learning_rate": 1.9998816866612337e-05, "loss": 0.4453, "step": 161 }, { "epoch": 0.1, "learning_rate": 1.9998706866912013e-05, "loss": 0.4917, "step": 162 }, { "epoch": 0.11, "learning_rate": 1.9998591978766768e-05, "loss": 0.4798, "step": 163 }, { "epoch": 0.11, "learning_rate": 1.999847220223276e-05, "loss": 0.4858, "step": 164 }, { "epoch": 0.11, "learning_rate": 1.999834753736856e-05, "loss": 0.4291, "step": 165 }, { "epoch": 0.11, "learning_rate": 1.9998217984235108e-05, "loss": 0.4414, "step": 166 }, { "epoch": 0.11, "learning_rate": 1.999808354289575e-05, "loss": 0.4864, "step": 167 }, { "epoch": 0.11, "learning_rate": 1.9997944213416213e-05, "loss": 0.4234, "step": 168 }, { "epoch": 0.11, "learning_rate": 1.9997799995864613e-05, "loss": 0.4598, "step": 169 }, { "epoch": 0.11, "learning_rate": 1.9997650890311465e-05, "loss": 0.4689, "step": 170 }, { "epoch": 0.11, "learning_rate": 1.9997496896829664e-05, "loss": 0.4835, "step": 171 }, { "epoch": 0.11, "learning_rate": 1.9997338015494496e-05, "loss": 0.4656, "step": 172 }, { "epoch": 0.11, "learning_rate": 1.9997174246383647e-05, "loss": 0.4415, "step": 173 }, { "epoch": 0.11, "learning_rate": 1.999700558957718e-05, "loss": 0.4655, "step": 174 }, { "epoch": 0.11, "learning_rate": 1.999683204515755e-05, "loss": 0.4586, "step": 175 }, { "epoch": 0.11, "learning_rate": 1.9996653613209617e-05, "loss": 0.4754, "step": 176 }, { "epoch": 0.11, "learning_rate": 1.99964702938206e-05, "loss": 0.4574, "step": 177 }, { "epoch": 0.12, "learning_rate": 1.9996282087080138e-05, "loss": 0.4663, "step": 178 }, { "epoch": 0.12, "learning_rate": 1.999608899308024e-05, "loss": 0.4288, "step": 179 }, { "epoch": 0.12, "learning_rate": 1.9995891011915312e-05, "loss": 0.4586, "step": 180 }, { "epoch": 0.12, "learning_rate": 1.9995688143682154e-05, "loss": 0.4919, "step": 181 }, { "epoch": 0.12, "learning_rate": 1.9995480388479942e-05, "loss": 0.4672, "step": 182 }, { "epoch": 0.12, "learning_rate": 1.9995267746410256e-05, "loss": 0.461, "step": 183 }, { "epoch": 0.12, "learning_rate": 1.9995050217577056e-05, "loss": 0.4808, "step": 184 }, { "epoch": 0.12, "learning_rate": 1.999482780208669e-05, "loss": 0.4842, "step": 185 }, { "epoch": 0.12, "learning_rate": 1.9994600500047903e-05, "loss": 0.4556, "step": 186 }, { "epoch": 0.12, "learning_rate": 1.9994368311571822e-05, "loss": 0.434, "step": 187 }, { "epoch": 0.12, "learning_rate": 1.9994131236771967e-05, "loss": 0.45, "step": 188 }, { "epoch": 0.12, "learning_rate": 1.999388927576425e-05, "loss": 0.4465, "step": 189 }, { "epoch": 0.12, "learning_rate": 1.999364242866696e-05, "loss": 0.4408, "step": 190 }, { "epoch": 0.12, "learning_rate": 1.9993390695600787e-05, "loss": 0.4793, "step": 191 }, { "epoch": 0.12, "learning_rate": 1.99931340766888e-05, "loss": 0.4642, "step": 192 }, { "epoch": 0.12, "learning_rate": 1.999287257205647e-05, "loss": 0.4645, "step": 193 }, { "epoch": 0.13, "learning_rate": 1.9992606181831643e-05, "loss": 0.5041, "step": 194 }, { "epoch": 0.13, "learning_rate": 1.9992334906144562e-05, "loss": 0.4733, "step": 195 }, { "epoch": 0.13, "learning_rate": 1.9992058745127853e-05, "loss": 0.4626, "step": 196 }, { "epoch": 0.13, "learning_rate": 1.9991777698916535e-05, "loss": 0.4673, "step": 197 }, { "epoch": 0.13, "learning_rate": 1.999149176764802e-05, "loss": 0.4428, "step": 198 }, { "epoch": 0.13, "learning_rate": 1.999120095146209e-05, "loss": 0.4567, "step": 199 }, { "epoch": 0.13, "learning_rate": 1.999090525050093e-05, "loss": 0.422, "step": 200 }, { "epoch": 0.13, "learning_rate": 1.9990604664909116e-05, "loss": 0.4558, "step": 201 }, { "epoch": 0.13, "learning_rate": 1.9990299194833605e-05, "loss": 0.4825, "step": 202 }, { "epoch": 0.13, "learning_rate": 1.9989988840423743e-05, "loss": 0.4734, "step": 203 }, { "epoch": 0.13, "learning_rate": 1.998967360183126e-05, "loss": 0.4792, "step": 204 }, { "epoch": 0.13, "learning_rate": 1.9989353479210286e-05, "loss": 0.4833, "step": 205 }, { "epoch": 0.13, "learning_rate": 1.998902847271733e-05, "loss": 0.4632, "step": 206 }, { "epoch": 0.13, "learning_rate": 1.9988698582511285e-05, "loss": 0.4645, "step": 207 }, { "epoch": 0.13, "learning_rate": 1.998836380875344e-05, "loss": 0.4865, "step": 208 }, { "epoch": 0.14, "learning_rate": 1.998802415160747e-05, "loss": 0.454, "step": 209 }, { "epoch": 0.14, "learning_rate": 1.998767961123944e-05, "loss": 0.4393, "step": 210 }, { "epoch": 0.14, "learning_rate": 1.9987330187817785e-05, "loss": 0.4356, "step": 211 }, { "epoch": 0.14, "learning_rate": 1.9986975881513353e-05, "loss": 0.4276, "step": 212 }, { "epoch": 0.14, "learning_rate": 1.998661669249936e-05, "loss": 0.4683, "step": 213 }, { "epoch": 0.14, "learning_rate": 1.9986252620951428e-05, "loss": 0.4447, "step": 214 }, { "epoch": 0.14, "learning_rate": 1.9985883667047538e-05, "loss": 0.4505, "step": 215 }, { "epoch": 0.14, "learning_rate": 1.9985509830968085e-05, "loss": 0.438, "step": 216 }, { "epoch": 0.14, "learning_rate": 1.9985131112895835e-05, "loss": 0.433, "step": 217 }, { "epoch": 0.14, "learning_rate": 1.998474751301595e-05, "loss": 0.4549, "step": 218 }, { "epoch": 0.14, "learning_rate": 1.9984359031515974e-05, "loss": 0.4515, "step": 219 }, { "epoch": 0.14, "learning_rate": 1.9983965668585838e-05, "loss": 0.4091, "step": 220 }, { "epoch": 0.14, "learning_rate": 1.998356742441786e-05, "loss": 0.432, "step": 221 }, { "epoch": 0.14, "learning_rate": 1.998316429920675e-05, "loss": 0.4327, "step": 222 }, { "epoch": 0.14, "learning_rate": 1.9982756293149586e-05, "loss": 0.4621, "step": 223 }, { "epoch": 0.15, "learning_rate": 1.9982343406445857e-05, "loss": 0.4354, "step": 224 }, { "epoch": 0.15, "learning_rate": 1.9981925639297427e-05, "loss": 0.4433, "step": 225 }, { "epoch": 0.15, "learning_rate": 1.9981502991908535e-05, "loss": 0.4763, "step": 226 }, { "epoch": 0.15, "learning_rate": 1.9981075464485827e-05, "loss": 0.4552, "step": 227 }, { "epoch": 0.15, "learning_rate": 1.9980643057238317e-05, "loss": 0.4338, "step": 228 }, { "epoch": 0.15, "learning_rate": 1.998020577037742e-05, "loss": 0.4249, "step": 229 }, { "epoch": 0.15, "learning_rate": 1.9979763604116922e-05, "loss": 0.4151, "step": 230 }, { "epoch": 0.15, "learning_rate": 1.9979316558673005e-05, "loss": 0.4839, "step": 231 }, { "epoch": 0.15, "learning_rate": 1.997886463426423e-05, "loss": 0.4418, "step": 232 }, { "epoch": 0.15, "learning_rate": 1.997840783111155e-05, "loss": 0.4472, "step": 233 }, { "epoch": 0.15, "learning_rate": 1.9977946149438302e-05, "loss": 0.4613, "step": 234 }, { "epoch": 0.15, "learning_rate": 1.9977479589470197e-05, "loss": 0.4259, "step": 235 }, { "epoch": 0.15, "learning_rate": 1.9977008151435346e-05, "loss": 0.4092, "step": 236 }, { "epoch": 0.15, "learning_rate": 1.9976531835564235e-05, "loss": 0.448, "step": 237 }, { "epoch": 0.15, "learning_rate": 1.9976050642089744e-05, "loss": 0.433, "step": 238 }, { "epoch": 0.15, "learning_rate": 1.9975564571247128e-05, "loss": 0.4533, "step": 239 }, { "epoch": 0.16, "learning_rate": 1.9975073623274033e-05, "loss": 0.4461, "step": 240 }, { "epoch": 0.16, "learning_rate": 1.9974577798410485e-05, "loss": 0.4562, "step": 241 }, { "epoch": 0.16, "learning_rate": 1.9974077096898895e-05, "loss": 0.4474, "step": 242 }, { "epoch": 0.16, "learning_rate": 1.9973571518984065e-05, "loss": 0.4138, "step": 243 }, { "epoch": 0.16, "learning_rate": 1.9973061064913173e-05, "loss": 0.4297, "step": 244 }, { "epoch": 0.16, "learning_rate": 1.9972545734935786e-05, "loss": 0.4285, "step": 245 }, { "epoch": 0.16, "learning_rate": 1.9972025529303848e-05, "loss": 0.4251, "step": 246 }, { "epoch": 0.16, "learning_rate": 1.9971500448271696e-05, "loss": 0.4438, "step": 247 }, { "epoch": 0.16, "learning_rate": 1.9970970492096046e-05, "loss": 0.4414, "step": 248 }, { "epoch": 0.16, "learning_rate": 1.9970435661035995e-05, "loss": 0.4146, "step": 249 }, { "epoch": 0.16, "learning_rate": 1.9969895955353028e-05, "loss": 0.422, "step": 250 }, { "epoch": 0.16, "learning_rate": 1.996935137531101e-05, "loss": 0.4176, "step": 251 }, { "epoch": 0.16, "learning_rate": 1.996880192117619e-05, "loss": 0.4413, "step": 252 }, { "epoch": 0.16, "learning_rate": 1.9968247593217205e-05, "loss": 0.3922, "step": 253 }, { "epoch": 0.16, "learning_rate": 1.9967688391705066e-05, "loss": 0.4346, "step": 254 }, { "epoch": 0.17, "learning_rate": 1.9967124316913165e-05, "loss": 0.4251, "step": 255 }, { "epoch": 0.17, "learning_rate": 1.99665553691173e-05, "loss": 0.4388, "step": 256 }, { "epoch": 0.17, "learning_rate": 1.9965981548595616e-05, "loss": 0.4467, "step": 257 }, { "epoch": 0.17, "learning_rate": 1.9965402855628667e-05, "loss": 0.4132, "step": 258 }, { "epoch": 0.17, "learning_rate": 1.996481929049938e-05, "loss": 0.4381, "step": 259 }, { "epoch": 0.17, "learning_rate": 1.9964230853493064e-05, "loss": 0.471, "step": 260 }, { "epoch": 0.17, "learning_rate": 1.9963637544897407e-05, "loss": 0.4152, "step": 261 }, { "epoch": 0.17, "learning_rate": 1.9963039365002487e-05, "loss": 0.443, "step": 262 }, { "epoch": 0.17, "learning_rate": 1.9962436314100758e-05, "loss": 0.4772, "step": 263 }, { "epoch": 0.17, "learning_rate": 1.996182839248705e-05, "loss": 0.4336, "step": 264 }, { "epoch": 0.17, "learning_rate": 1.996121560045859e-05, "loss": 0.446, "step": 265 }, { "epoch": 0.17, "learning_rate": 1.996059793831497e-05, "loss": 0.408, "step": 266 }, { "epoch": 0.17, "learning_rate": 1.995997540635817e-05, "loss": 0.4484, "step": 267 }, { "epoch": 0.17, "learning_rate": 1.995934800489256e-05, "loss": 0.4328, "step": 268 }, { "epoch": 0.17, "learning_rate": 1.995871573422487e-05, "loss": 0.4274, "step": 269 }, { "epoch": 0.17, "learning_rate": 1.9958078594664225e-05, "loss": 0.4244, "step": 270 }, { "epoch": 0.18, "learning_rate": 1.9957436586522128e-05, "loss": 0.4111, "step": 271 }, { "epoch": 0.18, "learning_rate": 1.9956789710112465e-05, "loss": 0.4249, "step": 272 }, { "epoch": 0.18, "learning_rate": 1.9956137965751498e-05, "loss": 0.4395, "step": 273 }, { "epoch": 0.18, "learning_rate": 1.9955481353757863e-05, "loss": 0.4544, "step": 274 }, { "epoch": 0.18, "learning_rate": 1.995481987445259e-05, "loss": 0.4214, "step": 275 }, { "epoch": 0.18, "learning_rate": 1.9954153528159077e-05, "loss": 0.4374, "step": 276 }, { "epoch": 0.18, "learning_rate": 1.995348231520311e-05, "loss": 0.4356, "step": 277 }, { "epoch": 0.18, "learning_rate": 1.9952806235912848e-05, "loss": 0.4494, "step": 278 }, { "epoch": 0.18, "learning_rate": 1.9952125290618834e-05, "loss": 0.4204, "step": 279 }, { "epoch": 0.18, "learning_rate": 1.995143947965398e-05, "loss": 0.4108, "step": 280 }, { "epoch": 0.18, "learning_rate": 1.9950748803353593e-05, "loss": 0.4162, "step": 281 }, { "epoch": 0.18, "learning_rate": 1.9950053262055343e-05, "loss": 0.4633, "step": 282 }, { "epoch": 0.18, "learning_rate": 1.994935285609929e-05, "loss": 0.4247, "step": 283 }, { "epoch": 0.18, "learning_rate": 1.9948647585827867e-05, "loss": 0.4289, "step": 284 }, { "epoch": 0.18, "learning_rate": 1.9947937451585882e-05, "loss": 0.4328, "step": 285 }, { "epoch": 0.19, "learning_rate": 1.9947222453720535e-05, "loss": 0.3979, "step": 286 }, { "epoch": 0.19, "learning_rate": 1.9946502592581382e-05, "loss": 0.4369, "step": 287 }, { "epoch": 0.19, "learning_rate": 1.994577786852038e-05, "loss": 0.3939, "step": 288 }, { "epoch": 0.19, "learning_rate": 1.994504828189184e-05, "loss": 0.4296, "step": 289 }, { "epoch": 0.19, "learning_rate": 1.9944313833052472e-05, "loss": 0.3832, "step": 290 }, { "epoch": 0.19, "learning_rate": 1.994357452236135e-05, "loss": 0.4216, "step": 291 }, { "epoch": 0.19, "learning_rate": 1.9942830350179934e-05, "loss": 0.428, "step": 292 }, { "epoch": 0.19, "learning_rate": 1.9942081316872045e-05, "loss": 0.4207, "step": 293 }, { "epoch": 0.19, "learning_rate": 1.99413274228039e-05, "loss": 0.4414, "step": 294 }, { "epoch": 0.19, "learning_rate": 1.994056866834408e-05, "loss": 0.4231, "step": 295 }, { "epoch": 0.19, "learning_rate": 1.993980505386355e-05, "loss": 0.4479, "step": 296 }, { "epoch": 0.19, "learning_rate": 1.9939036579735636e-05, "loss": 0.4415, "step": 297 }, { "epoch": 0.19, "learning_rate": 1.9938263246336067e-05, "loss": 0.4106, "step": 298 }, { "epoch": 0.19, "learning_rate": 1.993748505404292e-05, "loss": 0.4183, "step": 299 }, { "epoch": 0.19, "learning_rate": 1.9936702003236663e-05, "loss": 0.418, "step": 300 }, { "epoch": 0.19, "learning_rate": 1.9935914094300132e-05, "loss": 0.4481, "step": 301 }, { "epoch": 0.2, "learning_rate": 1.9935121327618547e-05, "loss": 0.4174, "step": 302 }, { "epoch": 0.2, "learning_rate": 1.9934323703579498e-05, "loss": 0.441, "step": 303 }, { "epoch": 0.2, "learning_rate": 1.993352122257294e-05, "loss": 0.4263, "step": 304 }, { "epoch": 0.2, "learning_rate": 1.9932713884991226e-05, "loss": 0.4235, "step": 305 }, { "epoch": 0.2, "learning_rate": 1.993190169122906e-05, "loss": 0.4426, "step": 306 }, { "epoch": 0.2, "learning_rate": 1.9931084641683534e-05, "loss": 0.4332, "step": 307 }, { "epoch": 0.2, "learning_rate": 1.9930262736754107e-05, "loss": 0.4074, "step": 308 }, { "epoch": 0.2, "learning_rate": 1.992943597684262e-05, "loss": 0.4551, "step": 309 }, { "epoch": 0.2, "learning_rate": 1.9928604362353273e-05, "loss": 0.4082, "step": 310 }, { "epoch": 0.2, "learning_rate": 1.9927767893692654e-05, "loss": 0.4116, "step": 311 }, { "epoch": 0.2, "learning_rate": 1.9926926571269725e-05, "loss": 0.4169, "step": 312 }, { "epoch": 0.2, "learning_rate": 1.9926080395495805e-05, "loss": 0.4315, "step": 313 }, { "epoch": 0.2, "learning_rate": 1.99252293667846e-05, "loss": 0.4696, "step": 314 }, { "epoch": 0.2, "learning_rate": 1.9924373485552185e-05, "loss": 0.406, "step": 315 }, { "epoch": 0.2, "learning_rate": 1.9923512752217007e-05, "loss": 0.4225, "step": 316 }, { "epoch": 0.21, "learning_rate": 1.9922647167199886e-05, "loss": 0.4265, "step": 317 }, { "epoch": 0.21, "learning_rate": 1.992177673092401e-05, "loss": 0.4423, "step": 318 }, { "epoch": 0.21, "learning_rate": 1.992090144381494e-05, "loss": 0.4011, "step": 319 }, { "epoch": 0.21, "learning_rate": 1.9920021306300622e-05, "loss": 0.4499, "step": 320 }, { "epoch": 0.21, "learning_rate": 1.991913631881135e-05, "loss": 0.4038, "step": 321 }, { "epoch": 0.21, "learning_rate": 1.991824648177981e-05, "loss": 0.4154, "step": 322 }, { "epoch": 0.21, "learning_rate": 1.991735179564104e-05, "loss": 0.4167, "step": 323 }, { "epoch": 0.21, "learning_rate": 1.991645226083247e-05, "loss": 0.408, "step": 324 }, { "epoch": 0.21, "learning_rate": 1.991554787779388e-05, "loss": 0.4012, "step": 325 }, { "epoch": 0.21, "learning_rate": 1.9914638646967436e-05, "loss": 0.4273, "step": 326 }, { "epoch": 0.21, "learning_rate": 1.991372456879767e-05, "loss": 0.4643, "step": 327 }, { "epoch": 0.21, "learning_rate": 1.991280564373147e-05, "loss": 0.4271, "step": 328 }, { "epoch": 0.21, "learning_rate": 1.9911881872218116e-05, "loss": 0.3992, "step": 329 }, { "epoch": 0.21, "learning_rate": 1.991095325470925e-05, "loss": 0.4337, "step": 330 }, { "epoch": 0.21, "learning_rate": 1.991001979165887e-05, "loss": 0.4064, "step": 331 }, { "epoch": 0.22, "learning_rate": 1.990908148352336e-05, "loss": 0.4117, "step": 332 }, { "epoch": 0.22, "learning_rate": 1.990813833076146e-05, "loss": 0.4247, "step": 333 }, { "epoch": 0.22, "learning_rate": 1.9907190333834293e-05, "loss": 0.4079, "step": 334 }, { "epoch": 0.22, "learning_rate": 1.990623749320534e-05, "loss": 0.4183, "step": 335 }, { "epoch": 0.22, "learning_rate": 1.990527980934045e-05, "loss": 0.4196, "step": 336 }, { "epoch": 0.22, "learning_rate": 1.990431728270784e-05, "loss": 0.4018, "step": 337 }, { "epoch": 0.22, "learning_rate": 1.9903349913778097e-05, "loss": 0.3871, "step": 338 }, { "epoch": 0.22, "learning_rate": 1.9902377703024175e-05, "loss": 0.4094, "step": 339 }, { "epoch": 0.22, "learning_rate": 1.9901400650921403e-05, "loss": 0.4262, "step": 340 }, { "epoch": 0.22, "learning_rate": 1.990041875794746e-05, "loss": 0.4172, "step": 341 }, { "epoch": 0.22, "learning_rate": 1.989943202458241e-05, "loss": 0.4382, "step": 342 }, { "epoch": 0.22, "learning_rate": 1.989844045130866e-05, "loss": 0.4365, "step": 343 }, { "epoch": 0.22, "learning_rate": 1.9897444038611015e-05, "loss": 0.4267, "step": 344 }, { "epoch": 0.22, "learning_rate": 1.9896442786976617e-05, "loss": 0.4125, "step": 345 }, { "epoch": 0.22, "learning_rate": 1.989543669689499e-05, "loss": 0.4043, "step": 346 }, { "epoch": 0.22, "learning_rate": 1.989442576885802e-05, "loss": 0.3846, "step": 347 }, { "epoch": 0.23, "learning_rate": 1.9893410003359955e-05, "loss": 0.4457, "step": 348 }, { "epoch": 0.23, "learning_rate": 1.989238940089741e-05, "loss": 0.4091, "step": 349 }, { "epoch": 0.23, "learning_rate": 1.989136396196937e-05, "loss": 0.4255, "step": 350 }, { "epoch": 0.23, "learning_rate": 1.9890333687077174e-05, "loss": 0.4241, "step": 351 }, { "epoch": 0.23, "learning_rate": 1.9889298576724538e-05, "loss": 0.4153, "step": 352 }, { "epoch": 0.23, "learning_rate": 1.988825863141753e-05, "loss": 0.4233, "step": 353 }, { "epoch": 0.23, "learning_rate": 1.988721385166459e-05, "loss": 0.4332, "step": 354 }, { "epoch": 0.23, "learning_rate": 1.9886164237976523e-05, "loss": 0.4118, "step": 355 }, { "epoch": 0.23, "learning_rate": 1.9885109790866482e-05, "loss": 0.4021, "step": 356 }, { "epoch": 0.23, "learning_rate": 1.988405051085e-05, "loss": 0.4212, "step": 357 }, { "epoch": 0.23, "learning_rate": 1.9882986398444974e-05, "loss": 0.4205, "step": 358 }, { "epoch": 0.23, "learning_rate": 1.9881917454171646e-05, "loss": 0.4017, "step": 359 }, { "epoch": 0.23, "learning_rate": 1.9880843678552635e-05, "loss": 0.43, "step": 360 }, { "epoch": 0.23, "learning_rate": 1.9879765072112922e-05, "loss": 0.4166, "step": 361 }, { "epoch": 0.23, "learning_rate": 1.9878681635379842e-05, "loss": 0.4174, "step": 362 }, { "epoch": 0.24, "learning_rate": 1.9877593368883094e-05, "loss": 0.4047, "step": 363 }, { "epoch": 0.24, "learning_rate": 1.9876500273154748e-05, "loss": 0.3952, "step": 364 }, { "epoch": 0.24, "learning_rate": 1.987540234872922e-05, "loss": 0.4158, "step": 365 }, { "epoch": 0.24, "learning_rate": 1.987429959614329e-05, "loss": 0.3917, "step": 366 }, { "epoch": 0.24, "learning_rate": 1.987319201593611e-05, "loss": 0.4467, "step": 367 }, { "epoch": 0.24, "learning_rate": 1.9872079608649183e-05, "loss": 0.406, "step": 368 }, { "epoch": 0.24, "learning_rate": 1.987096237482637e-05, "loss": 0.423, "step": 369 }, { "epoch": 0.24, "learning_rate": 1.98698403150139e-05, "loss": 0.408, "step": 370 }, { "epoch": 0.24, "learning_rate": 1.9868713429760354e-05, "loss": 0.437, "step": 371 }, { "epoch": 0.24, "learning_rate": 1.9867581719616672e-05, "loss": 0.4183, "step": 372 }, { "epoch": 0.24, "learning_rate": 1.9866445185136164e-05, "loss": 0.3972, "step": 373 }, { "epoch": 0.24, "learning_rate": 1.9865303826874484e-05, "loss": 0.4267, "step": 374 }, { "epoch": 0.24, "learning_rate": 1.986415764538965e-05, "loss": 0.4178, "step": 375 }, { "epoch": 0.24, "learning_rate": 1.9863006641242043e-05, "loss": 0.4138, "step": 376 }, { "epoch": 0.24, "learning_rate": 1.9861850814994397e-05, "loss": 0.405, "step": 377 }, { "epoch": 0.24, "learning_rate": 1.9860690167211802e-05, "loss": 0.3956, "step": 378 }, { "epoch": 0.25, "learning_rate": 1.985952469846171e-05, "loss": 0.4248, "step": 379 }, { "epoch": 0.25, "learning_rate": 1.9858354409313924e-05, "loss": 0.4084, "step": 380 }, { "epoch": 0.25, "learning_rate": 1.9857179300340613e-05, "loss": 0.4062, "step": 381 }, { "epoch": 0.25, "learning_rate": 1.9855999372116293e-05, "loss": 0.4043, "step": 382 }, { "epoch": 0.25, "learning_rate": 1.985481462521784e-05, "loss": 0.4327, "step": 383 }, { "epoch": 0.25, "learning_rate": 1.985362506022449e-05, "loss": 0.3947, "step": 384 }, { "epoch": 0.25, "learning_rate": 1.9852430677717826e-05, "loss": 0.4245, "step": 385 }, { "epoch": 0.25, "learning_rate": 1.9851231478281794e-05, "loss": 0.4044, "step": 386 }, { "epoch": 0.25, "learning_rate": 1.9850027462502685e-05, "loss": 0.3968, "step": 387 }, { "epoch": 0.25, "learning_rate": 1.984881863096916e-05, "loss": 0.3947, "step": 388 }, { "epoch": 0.25, "learning_rate": 1.9847604984272224e-05, "loss": 0.4097, "step": 389 }, { "epoch": 0.25, "learning_rate": 1.9846386523005235e-05, "loss": 0.4433, "step": 390 }, { "epoch": 0.25, "learning_rate": 1.984516324776391e-05, "loss": 0.4208, "step": 391 }, { "epoch": 0.25, "learning_rate": 1.984393515914632e-05, "loss": 0.4369, "step": 392 }, { "epoch": 0.25, "learning_rate": 1.9842702257752882e-05, "loss": 0.4152, "step": 393 }, { "epoch": 0.26, "learning_rate": 1.9841464544186378e-05, "loss": 0.4128, "step": 394 }, { "epoch": 0.26, "learning_rate": 1.9840222019051926e-05, "loss": 0.3973, "step": 395 }, { "epoch": 0.26, "learning_rate": 1.9838974682957017e-05, "loss": 0.389, "step": 396 }, { "epoch": 0.26, "learning_rate": 1.9837722536511473e-05, "loss": 0.3932, "step": 397 }, { "epoch": 0.26, "learning_rate": 1.983646558032749e-05, "loss": 0.4055, "step": 398 }, { "epoch": 0.26, "learning_rate": 1.983520381501959e-05, "loss": 0.4262, "step": 399 }, { "epoch": 0.26, "learning_rate": 1.9833937241204668e-05, "loss": 0.4172, "step": 400 }, { "epoch": 0.26, "learning_rate": 1.983266585950196e-05, "loss": 0.4101, "step": 401 }, { "epoch": 0.26, "learning_rate": 1.9831389670533054e-05, "loss": 0.4341, "step": 402 }, { "epoch": 0.26, "learning_rate": 1.983010867492189e-05, "loss": 0.4301, "step": 403 }, { "epoch": 0.26, "learning_rate": 1.9828822873294756e-05, "loss": 0.4013, "step": 404 }, { "epoch": 0.26, "learning_rate": 1.9827532266280288e-05, "loss": 0.3624, "step": 405 }, { "epoch": 0.26, "learning_rate": 1.982623685450948e-05, "loss": 0.4005, "step": 406 }, { "epoch": 0.26, "learning_rate": 1.9824936638615658e-05, "loss": 0.4035, "step": 407 }, { "epoch": 0.26, "learning_rate": 1.982363161923452e-05, "loss": 0.4094, "step": 408 }, { "epoch": 0.26, "learning_rate": 1.982232179700409e-05, "loss": 0.423, "step": 409 }, { "epoch": 0.27, "learning_rate": 1.9821007172564756e-05, "loss": 0.3948, "step": 410 }, { "epoch": 0.27, "learning_rate": 1.9819687746559248e-05, "loss": 0.3964, "step": 411 }, { "epoch": 0.27, "learning_rate": 1.9818363519632638e-05, "loss": 0.4018, "step": 412 }, { "epoch": 0.27, "learning_rate": 1.9817034492432358e-05, "loss": 0.3682, "step": 413 }, { "epoch": 0.27, "learning_rate": 1.9815700665608174e-05, "loss": 0.4235, "step": 414 }, { "epoch": 0.27, "learning_rate": 1.981436203981221e-05, "loss": 0.3922, "step": 415 }, { "epoch": 0.27, "learning_rate": 1.9813018615698926e-05, "loss": 0.4037, "step": 416 }, { "epoch": 0.27, "learning_rate": 1.981167039392513e-05, "loss": 0.4249, "step": 417 }, { "epoch": 0.27, "learning_rate": 1.9810317375149987e-05, "loss": 0.4123, "step": 418 }, { "epoch": 0.27, "learning_rate": 1.9808959560034987e-05, "loss": 0.4377, "step": 419 }, { "epoch": 0.27, "learning_rate": 1.9807596949243985e-05, "loss": 0.4211, "step": 420 }, { "epoch": 0.27, "learning_rate": 1.980622954344317e-05, "loss": 0.4169, "step": 421 }, { "epoch": 0.27, "learning_rate": 1.9804857343301074e-05, "loss": 0.4012, "step": 422 }, { "epoch": 0.27, "learning_rate": 1.980348034948858e-05, "loss": 0.4257, "step": 423 }, { "epoch": 0.27, "learning_rate": 1.9802098562678907e-05, "loss": 0.4207, "step": 424 }, { "epoch": 0.28, "learning_rate": 1.9800711983547626e-05, "loss": 0.416, "step": 425 }, { "epoch": 0.28, "learning_rate": 1.9799320612772647e-05, "loss": 0.4142, "step": 426 }, { "epoch": 0.28, "learning_rate": 1.9797924451034214e-05, "loss": 0.4063, "step": 427 }, { "epoch": 0.28, "learning_rate": 1.9796523499014925e-05, "loss": 0.4233, "step": 428 }, { "epoch": 0.28, "learning_rate": 1.979511775739972e-05, "loss": 0.3952, "step": 429 }, { "epoch": 0.28, "learning_rate": 1.9793707226875877e-05, "loss": 0.4182, "step": 430 }, { "epoch": 0.28, "learning_rate": 1.9792291908133007e-05, "loss": 0.4065, "step": 431 }, { "epoch": 0.28, "learning_rate": 1.9790871801863077e-05, "loss": 0.4351, "step": 432 }, { "epoch": 0.28, "learning_rate": 1.978944690876039e-05, "loss": 0.4077, "step": 433 }, { "epoch": 0.28, "learning_rate": 1.978801722952158e-05, "loss": 0.4245, "step": 434 }, { "epoch": 0.28, "learning_rate": 1.9786582764845634e-05, "loss": 0.4446, "step": 435 }, { "epoch": 0.28, "learning_rate": 1.9785143515433866e-05, "loss": 0.4076, "step": 436 }, { "epoch": 0.28, "learning_rate": 1.9783699481989945e-05, "loss": 0.3971, "step": 437 }, { "epoch": 0.28, "learning_rate": 1.9782250665219868e-05, "loss": 0.4158, "step": 438 }, { "epoch": 0.28, "learning_rate": 1.978079706583197e-05, "loss": 0.4359, "step": 439 }, { "epoch": 0.28, "learning_rate": 1.9779338684536923e-05, "loss": 0.3932, "step": 440 }, { "epoch": 0.29, "learning_rate": 1.977787552204775e-05, "loss": 0.3978, "step": 441 }, { "epoch": 0.29, "learning_rate": 1.97764075790798e-05, "loss": 0.4192, "step": 442 }, { "epoch": 0.29, "learning_rate": 1.9774934856350756e-05, "loss": 0.3858, "step": 443 }, { "epoch": 0.29, "learning_rate": 1.977345735458065e-05, "loss": 0.391, "step": 444 }, { "epoch": 0.29, "learning_rate": 1.977197507449184e-05, "loss": 0.3799, "step": 445 }, { "epoch": 0.29, "learning_rate": 1.977048801680902e-05, "loss": 0.4296, "step": 446 }, { "epoch": 0.29, "learning_rate": 1.9768996182259236e-05, "loss": 0.4302, "step": 447 }, { "epoch": 0.29, "learning_rate": 1.976749957157185e-05, "loss": 0.4352, "step": 448 }, { "epoch": 0.29, "learning_rate": 1.9765998185478567e-05, "loss": 0.3862, "step": 449 }, { "epoch": 0.29, "learning_rate": 1.9764492024713428e-05, "loss": 0.4189, "step": 450 }, { "epoch": 0.29, "learning_rate": 1.97629810900128e-05, "loss": 0.3845, "step": 451 }, { "epoch": 0.29, "learning_rate": 1.97614653821154e-05, "loss": 0.4136, "step": 452 }, { "epoch": 0.29, "learning_rate": 1.9759944901762264e-05, "loss": 0.3863, "step": 453 }, { "epoch": 0.29, "learning_rate": 1.9758419649696767e-05, "loss": 0.401, "step": 454 }, { "epoch": 0.29, "learning_rate": 1.9756889626664618e-05, "loss": 0.4124, "step": 455 }, { "epoch": 0.3, "learning_rate": 1.9755354833413855e-05, "loss": 0.4235, "step": 456 }, { "epoch": 0.3, "learning_rate": 1.9753815270694852e-05, "loss": 0.4326, "step": 457 }, { "epoch": 0.3, "learning_rate": 1.9752270939260316e-05, "loss": 0.4297, "step": 458 }, { "epoch": 0.3, "learning_rate": 1.9750721839865274e-05, "loss": 0.3998, "step": 459 }, { "epoch": 0.3, "learning_rate": 1.97491679732671e-05, "loss": 0.4154, "step": 460 }, { "epoch": 0.3, "learning_rate": 1.974760934022549e-05, "loss": 0.4098, "step": 461 }, { "epoch": 0.3, "learning_rate": 1.9746045941502468e-05, "loss": 0.4097, "step": 462 }, { "epoch": 0.3, "learning_rate": 1.9744477777862398e-05, "loss": 0.3799, "step": 463 }, { "epoch": 0.3, "learning_rate": 1.9742904850071964e-05, "loss": 0.384, "step": 464 }, { "epoch": 0.3, "learning_rate": 1.974132715890018e-05, "loss": 0.4003, "step": 465 }, { "epoch": 0.3, "learning_rate": 1.9739744705118395e-05, "loss": 0.3968, "step": 466 }, { "epoch": 0.3, "learning_rate": 1.9738157489500282e-05, "loss": 0.4101, "step": 467 }, { "epoch": 0.3, "learning_rate": 1.9736565512821838e-05, "loss": 0.3968, "step": 468 }, { "epoch": 0.3, "learning_rate": 1.9734968775861403e-05, "loss": 0.4021, "step": 469 }, { "epoch": 0.3, "learning_rate": 1.9733367279399624e-05, "loss": 0.3963, "step": 470 }, { "epoch": 0.31, "learning_rate": 1.9731761024219494e-05, "loss": 0.4154, "step": 471 }, { "epoch": 0.31, "learning_rate": 1.9730150011106316e-05, "loss": 0.4115, "step": 472 }, { "epoch": 0.31, "learning_rate": 1.972853424084773e-05, "loss": 0.4039, "step": 473 }, { "epoch": 0.31, "learning_rate": 1.9726913714233697e-05, "loss": 0.3986, "step": 474 }, { "epoch": 0.31, "learning_rate": 1.9725288432056507e-05, "loss": 0.4116, "step": 475 }, { "epoch": 0.31, "learning_rate": 1.972365839511077e-05, "loss": 0.4184, "step": 476 }, { "epoch": 0.31, "learning_rate": 1.972202360419343e-05, "loss": 0.4049, "step": 477 }, { "epoch": 0.31, "learning_rate": 1.9720384060103745e-05, "loss": 0.4134, "step": 478 }, { "epoch": 0.31, "learning_rate": 1.9718739763643294e-05, "loss": 0.3852, "step": 479 }, { "epoch": 0.31, "learning_rate": 1.9717090715615996e-05, "loss": 0.4029, "step": 480 }, { "epoch": 0.31, "learning_rate": 1.9715436916828082e-05, "loss": 0.3799, "step": 481 }, { "epoch": 0.31, "learning_rate": 1.9713778368088102e-05, "loss": 0.3648, "step": 482 }, { "epoch": 0.31, "learning_rate": 1.9712115070206937e-05, "loss": 0.3889, "step": 483 }, { "epoch": 0.31, "learning_rate": 1.9710447023997784e-05, "loss": 0.3973, "step": 484 }, { "epoch": 0.31, "learning_rate": 1.970877423027617e-05, "loss": 0.4126, "step": 485 }, { "epoch": 0.31, "learning_rate": 1.9707096689859928e-05, "loss": 0.4116, "step": 486 }, { "epoch": 0.32, "learning_rate": 1.9705414403569225e-05, "loss": 0.398, "step": 487 }, { "epoch": 0.32, "learning_rate": 1.970372737222654e-05, "loss": 0.3769, "step": 488 }, { "epoch": 0.32, "learning_rate": 1.9702035596656684e-05, "loss": 0.4334, "step": 489 }, { "epoch": 0.32, "learning_rate": 1.9700339077686767e-05, "loss": 0.4001, "step": 490 }, { "epoch": 0.32, "learning_rate": 1.9698637816146242e-05, "loss": 0.3876, "step": 491 }, { "epoch": 0.32, "learning_rate": 1.9696931812866863e-05, "loss": 0.3964, "step": 492 }, { "epoch": 0.32, "learning_rate": 1.9695221068682708e-05, "loss": 0.3939, "step": 493 }, { "epoch": 0.32, "learning_rate": 1.9693505584430176e-05, "loss": 0.3766, "step": 494 }, { "epoch": 0.32, "learning_rate": 1.9691785360947975e-05, "loss": 0.3847, "step": 495 }, { "epoch": 0.32, "learning_rate": 1.969006039907714e-05, "loss": 0.4065, "step": 496 }, { "epoch": 0.32, "learning_rate": 1.968833069966102e-05, "loss": 0.3967, "step": 497 }, { "epoch": 0.32, "learning_rate": 1.9686596263545273e-05, "loss": 0.3872, "step": 498 }, { "epoch": 0.32, "learning_rate": 1.968485709157788e-05, "loss": 0.3645, "step": 499 }, { "epoch": 0.32, "learning_rate": 1.9683113184609137e-05, "loss": 0.372, "step": 500 }, { "epoch": 0.32, "learning_rate": 1.9681364543491656e-05, "loss": 0.3936, "step": 501 }, { "epoch": 0.33, "learning_rate": 1.9679611169080354e-05, "loss": 0.3748, "step": 502 }, { "epoch": 0.33, "learning_rate": 1.9677853062232474e-05, "loss": 0.4114, "step": 503 }, { "epoch": 0.33, "learning_rate": 1.967609022380757e-05, "loss": 0.3963, "step": 504 }, { "epoch": 0.33, "learning_rate": 1.9674322654667502e-05, "loss": 0.3649, "step": 505 }, { "epoch": 0.33, "learning_rate": 1.967255035567645e-05, "loss": 0.3758, "step": 506 }, { "epoch": 0.33, "learning_rate": 1.967077332770091e-05, "loss": 0.4076, "step": 507 }, { "epoch": 0.33, "learning_rate": 1.9668991571609677e-05, "loss": 0.3751, "step": 508 }, { "epoch": 0.33, "learning_rate": 1.9667205088273868e-05, "loss": 0.397, "step": 509 }, { "epoch": 0.33, "learning_rate": 1.966541387856691e-05, "loss": 0.3861, "step": 510 }, { "epoch": 0.33, "learning_rate": 1.9663617943364536e-05, "loss": 0.4167, "step": 511 }, { "epoch": 0.33, "learning_rate": 1.9661817283544796e-05, "loss": 0.4201, "step": 512 }, { "epoch": 0.33, "learning_rate": 1.9660011899988046e-05, "loss": 0.3946, "step": 513 }, { "epoch": 0.33, "learning_rate": 1.9658201793576956e-05, "loss": 0.376, "step": 514 }, { "epoch": 0.33, "learning_rate": 1.965638696519649e-05, "loss": 0.4006, "step": 515 }, { "epoch": 0.33, "learning_rate": 1.9654567415733938e-05, "loss": 0.3823, "step": 516 }, { "epoch": 0.33, "learning_rate": 1.9652743146078892e-05, "loss": 0.3946, "step": 517 }, { "epoch": 0.34, "learning_rate": 1.9650914157123253e-05, "loss": 0.3925, "step": 518 }, { "epoch": 0.34, "learning_rate": 1.9649080449761224e-05, "loss": 0.3996, "step": 519 }, { "epoch": 0.34, "learning_rate": 1.964724202488932e-05, "loss": 0.3775, "step": 520 }, { "epoch": 0.34, "learning_rate": 1.9645398883406366e-05, "loss": 0.3898, "step": 521 }, { "epoch": 0.34, "learning_rate": 1.964355102621348e-05, "loss": 0.4077, "step": 522 }, { "epoch": 0.34, "learning_rate": 1.9641698454214096e-05, "loss": 0.4413, "step": 523 }, { "epoch": 0.34, "learning_rate": 1.9639841168313954e-05, "loss": 0.3869, "step": 524 }, { "epoch": 0.34, "learning_rate": 1.9637979169421094e-05, "loss": 0.3972, "step": 525 }, { "epoch": 0.34, "learning_rate": 1.9636112458445862e-05, "loss": 0.3799, "step": 526 }, { "epoch": 0.34, "learning_rate": 1.9634241036300906e-05, "loss": 0.379, "step": 527 }, { "epoch": 0.34, "learning_rate": 1.9632364903901178e-05, "loss": 0.3849, "step": 528 }, { "epoch": 0.34, "learning_rate": 1.963048406216394e-05, "loss": 0.3976, "step": 529 }, { "epoch": 0.34, "learning_rate": 1.9628598512008735e-05, "loss": 0.3843, "step": 530 }, { "epoch": 0.34, "learning_rate": 1.962670825435744e-05, "loss": 0.3848, "step": 531 }, { "epoch": 0.34, "learning_rate": 1.9624813290134207e-05, "loss": 0.4001, "step": 532 }, { "epoch": 0.35, "learning_rate": 1.9622913620265504e-05, "loss": 0.3866, "step": 533 }, { "epoch": 0.35, "learning_rate": 1.962100924568009e-05, "loss": 0.3976, "step": 534 }, { "epoch": 0.35, "learning_rate": 1.9619100167309027e-05, "loss": 0.3934, "step": 535 }, { "epoch": 0.35, "learning_rate": 1.9617186386085682e-05, "loss": 0.3756, "step": 536 }, { "epoch": 0.35, "learning_rate": 1.961526790294572e-05, "loss": 0.4099, "step": 537 }, { "epoch": 0.35, "learning_rate": 1.9613344718827095e-05, "loss": 0.3786, "step": 538 }, { "epoch": 0.35, "learning_rate": 1.961141683467007e-05, "loss": 0.37, "step": 539 }, { "epoch": 0.35, "learning_rate": 1.9609484251417202e-05, "loss": 0.3877, "step": 540 }, { "epoch": 0.35, "learning_rate": 1.960754697001335e-05, "loss": 0.3704, "step": 541 }, { "epoch": 0.35, "learning_rate": 1.960560499140566e-05, "loss": 0.4081, "step": 542 }, { "epoch": 0.35, "learning_rate": 1.9603658316543583e-05, "loss": 0.3856, "step": 543 }, { "epoch": 0.35, "learning_rate": 1.9601706946378865e-05, "loss": 0.3637, "step": 544 }, { "epoch": 0.35, "learning_rate": 1.9599750881865542e-05, "loss": 0.3912, "step": 545 }, { "epoch": 0.35, "learning_rate": 1.9597790123959948e-05, "loss": 0.3864, "step": 546 }, { "epoch": 0.35, "learning_rate": 1.959582467362072e-05, "loss": 0.3881, "step": 547 }, { "epoch": 0.35, "learning_rate": 1.9593854531808775e-05, "loss": 0.4137, "step": 548 }, { "epoch": 0.36, "learning_rate": 1.9591879699487333e-05, "loss": 0.3894, "step": 549 }, { "epoch": 0.36, "learning_rate": 1.9589900177621905e-05, "loss": 0.3588, "step": 550 }, { "epoch": 0.36, "learning_rate": 1.9587915967180295e-05, "loss": 0.3796, "step": 551 }, { "epoch": 0.36, "learning_rate": 1.9585927069132595e-05, "loss": 0.3713, "step": 552 }, { "epoch": 0.36, "learning_rate": 1.95839334844512e-05, "loss": 0.4125, "step": 553 }, { "epoch": 0.36, "learning_rate": 1.958193521411078e-05, "loss": 0.3927, "step": 554 }, { "epoch": 0.36, "learning_rate": 1.9579932259088313e-05, "loss": 0.4011, "step": 555 }, { "epoch": 0.36, "learning_rate": 1.9577924620363055e-05, "loss": 0.3643, "step": 556 }, { "epoch": 0.36, "learning_rate": 1.9575912298916554e-05, "loss": 0.3671, "step": 557 }, { "epoch": 0.36, "learning_rate": 1.9573895295732655e-05, "loss": 0.4181, "step": 558 }, { "epoch": 0.36, "learning_rate": 1.9571873611797482e-05, "loss": 0.3785, "step": 559 }, { "epoch": 0.36, "learning_rate": 1.9569847248099452e-05, "loss": 0.4163, "step": 560 }, { "epoch": 0.36, "learning_rate": 1.9567816205629272e-05, "loss": 0.3739, "step": 561 }, { "epoch": 0.36, "learning_rate": 1.9565780485379932e-05, "loss": 0.3791, "step": 562 }, { "epoch": 0.36, "learning_rate": 1.9563740088346715e-05, "loss": 0.3995, "step": 563 }, { "epoch": 0.37, "learning_rate": 1.9561695015527187e-05, "loss": 0.3798, "step": 564 }, { "epoch": 0.37, "learning_rate": 1.955964526792119e-05, "loss": 0.411, "step": 565 }, { "epoch": 0.37, "learning_rate": 1.9557590846530877e-05, "loss": 0.3554, "step": 566 }, { "epoch": 0.37, "learning_rate": 1.955553175236066e-05, "loss": 0.3937, "step": 567 }, { "epoch": 0.37, "learning_rate": 1.9553467986417248e-05, "loss": 0.3475, "step": 568 }, { "epoch": 0.37, "learning_rate": 1.9551399549709632e-05, "loss": 0.3951, "step": 569 }, { "epoch": 0.37, "learning_rate": 1.9549326443249086e-05, "loss": 0.3761, "step": 570 }, { "epoch": 0.37, "learning_rate": 1.954724866804917e-05, "loss": 0.3732, "step": 571 }, { "epoch": 0.37, "learning_rate": 1.9545166225125725e-05, "loss": 0.4032, "step": 572 }, { "epoch": 0.37, "learning_rate": 1.954307911549687e-05, "loss": 0.3606, "step": 573 }, { "epoch": 0.37, "learning_rate": 1.954098734018301e-05, "loss": 0.4073, "step": 574 }, { "epoch": 0.37, "learning_rate": 1.9538890900206833e-05, "loss": 0.4025, "step": 575 }, { "epoch": 0.37, "learning_rate": 1.95367897965933e-05, "loss": 0.3956, "step": 576 }, { "epoch": 0.37, "learning_rate": 1.9534684030369662e-05, "loss": 0.3751, "step": 577 }, { "epoch": 0.37, "learning_rate": 1.9532573602565438e-05, "loss": 0.3657, "step": 578 }, { "epoch": 0.38, "learning_rate": 1.953045851421244e-05, "loss": 0.3727, "step": 579 }, { "epoch": 0.38, "learning_rate": 1.9528338766344743e-05, "loss": 0.4335, "step": 580 }, { "epoch": 0.38, "learning_rate": 1.9526214359998714e-05, "loss": 0.3802, "step": 581 }, { "epoch": 0.38, "learning_rate": 1.952408529621299e-05, "loss": 0.4165, "step": 582 }, { "epoch": 0.38, "learning_rate": 1.9521951576028482e-05, "loss": 0.3787, "step": 583 }, { "epoch": 0.38, "learning_rate": 1.9519813200488397e-05, "loss": 0.3921, "step": 584 }, { "epoch": 0.38, "learning_rate": 1.9517670170638185e-05, "loss": 0.3914, "step": 585 }, { "epoch": 0.38, "learning_rate": 1.95155224875256e-05, "loss": 0.378, "step": 586 }, { "epoch": 0.38, "learning_rate": 1.9513370152200657e-05, "loss": 0.3725, "step": 587 }, { "epoch": 0.38, "learning_rate": 1.9511213165715653e-05, "loss": 0.3674, "step": 588 }, { "epoch": 0.38, "learning_rate": 1.9509051529125153e-05, "loss": 0.3725, "step": 589 }, { "epoch": 0.38, "learning_rate": 1.9506885243485997e-05, "loss": 0.3678, "step": 590 }, { "epoch": 0.38, "learning_rate": 1.95047143098573e-05, "loss": 0.3896, "step": 591 }, { "epoch": 0.38, "learning_rate": 1.9502538729300452e-05, "loss": 0.3719, "step": 592 }, { "epoch": 0.38, "learning_rate": 1.9500358502879103e-05, "loss": 0.3773, "step": 593 }, { "epoch": 0.38, "learning_rate": 1.9498173631659186e-05, "loss": 0.3831, "step": 594 }, { "epoch": 0.39, "learning_rate": 1.9495984116708906e-05, "loss": 0.3992, "step": 595 }, { "epoch": 0.39, "learning_rate": 1.949378995909873e-05, "loss": 0.375, "step": 596 }, { "epoch": 0.39, "learning_rate": 1.9491591159901396e-05, "loss": 0.3843, "step": 597 }, { "epoch": 0.39, "learning_rate": 1.9489387720191916e-05, "loss": 0.3599, "step": 598 }, { "epoch": 0.39, "learning_rate": 1.948717964104757e-05, "loss": 0.3757, "step": 599 }, { "epoch": 0.39, "learning_rate": 1.9484966923547904e-05, "loss": 0.3779, "step": 600 }, { "epoch": 0.39, "learning_rate": 1.9482749568774733e-05, "loss": 0.3431, "step": 601 }, { "epoch": 0.39, "learning_rate": 1.948052757781214e-05, "loss": 0.3936, "step": 602 }, { "epoch": 0.39, "learning_rate": 1.9478300951746467e-05, "loss": 0.3589, "step": 603 }, { "epoch": 0.39, "learning_rate": 1.947606969166634e-05, "loss": 0.3852, "step": 604 }, { "epoch": 0.39, "learning_rate": 1.947383379866263e-05, "loss": 0.4018, "step": 605 }, { "epoch": 0.39, "learning_rate": 1.9471593273828485e-05, "loss": 0.3723, "step": 606 }, { "epoch": 0.39, "learning_rate": 1.9469348118259316e-05, "loss": 0.393, "step": 607 }, { "epoch": 0.39, "learning_rate": 1.9467098333052796e-05, "loss": 0.3706, "step": 608 }, { "epoch": 0.39, "learning_rate": 1.946484391930886e-05, "loss": 0.4096, "step": 609 }, { "epoch": 0.4, "learning_rate": 1.9462584878129714e-05, "loss": 0.3972, "step": 610 }, { "epoch": 0.4, "learning_rate": 1.9460321210619816e-05, "loss": 0.3802, "step": 611 }, { "epoch": 0.4, "learning_rate": 1.945805291788589e-05, "loss": 0.4039, "step": 612 }, { "epoch": 0.4, "learning_rate": 1.9455780001036927e-05, "loss": 0.368, "step": 613 }, { "epoch": 0.4, "learning_rate": 1.945350246118417e-05, "loss": 0.376, "step": 614 }, { "epoch": 0.4, "learning_rate": 1.945122029944112e-05, "loss": 0.3744, "step": 615 }, { "epoch": 0.4, "learning_rate": 1.9448933516923557e-05, "loss": 0.3929, "step": 616 }, { "epoch": 0.4, "learning_rate": 1.9446642114749497e-05, "loss": 0.3881, "step": 617 }, { "epoch": 0.4, "learning_rate": 1.9444346094039223e-05, "loss": 0.4086, "step": 618 }, { "epoch": 0.4, "learning_rate": 1.9442045455915285e-05, "loss": 0.3811, "step": 619 }, { "epoch": 0.4, "learning_rate": 1.9439740201502476e-05, "loss": 0.4099, "step": 620 }, { "epoch": 0.4, "learning_rate": 1.9437430331927855e-05, "loss": 0.3678, "step": 621 }, { "epoch": 0.4, "learning_rate": 1.9435115848320736e-05, "loss": 0.3999, "step": 622 }, { "epoch": 0.4, "learning_rate": 1.943279675181269e-05, "loss": 0.3794, "step": 623 }, { "epoch": 0.4, "learning_rate": 1.9430473043537533e-05, "loss": 0.3587, "step": 624 }, { "epoch": 0.4, "learning_rate": 1.942814472463135e-05, "loss": 0.391, "step": 625 }, { "epoch": 0.41, "learning_rate": 1.942581179623247e-05, "loss": 0.3913, "step": 626 }, { "epoch": 0.41, "learning_rate": 1.9423474259481493e-05, "loss": 0.3977, "step": 627 }, { "epoch": 0.41, "learning_rate": 1.942113211552124e-05, "loss": 0.3934, "step": 628 }, { "epoch": 0.41, "learning_rate": 1.9418785365496814e-05, "loss": 0.4173, "step": 629 }, { "epoch": 0.41, "learning_rate": 1.9416434010555558e-05, "loss": 0.3953, "step": 630 }, { "epoch": 0.41, "learning_rate": 1.9414078051847064e-05, "loss": 0.4062, "step": 631 }, { "epoch": 0.41, "learning_rate": 1.9411717490523182e-05, "loss": 0.3923, "step": 632 }, { "epoch": 0.41, "learning_rate": 1.940935232773801e-05, "loss": 0.3558, "step": 633 }, { "epoch": 0.41, "learning_rate": 1.940698256464789e-05, "loss": 0.3765, "step": 634 }, { "epoch": 0.41, "learning_rate": 1.9404608202411417e-05, "loss": 0.3981, "step": 635 }, { "epoch": 0.41, "learning_rate": 1.9402229242189442e-05, "loss": 0.379, "step": 636 }, { "epoch": 0.41, "learning_rate": 1.939984568514505e-05, "loss": 0.385, "step": 637 }, { "epoch": 0.41, "learning_rate": 1.939745753244358e-05, "loss": 0.3668, "step": 638 }, { "epoch": 0.41, "learning_rate": 1.9395064785252626e-05, "loss": 0.3549, "step": 639 }, { "epoch": 0.41, "learning_rate": 1.9392667444742013e-05, "loss": 0.386, "step": 640 }, { "epoch": 0.42, "learning_rate": 1.9390265512083825e-05, "loss": 0.3646, "step": 641 }, { "epoch": 0.42, "learning_rate": 1.938785898845238e-05, "loss": 0.3732, "step": 642 }, { "epoch": 0.42, "learning_rate": 1.9385447875024246e-05, "loss": 0.3383, "step": 643 }, { "epoch": 0.42, "learning_rate": 1.9383032172978237e-05, "loss": 0.3889, "step": 644 }, { "epoch": 0.42, "learning_rate": 1.9380611883495414e-05, "loss": 0.3808, "step": 645 }, { "epoch": 0.42, "learning_rate": 1.9378187007759064e-05, "loss": 0.3474, "step": 646 }, { "epoch": 0.42, "learning_rate": 1.9375757546954735e-05, "loss": 0.4054, "step": 647 }, { "epoch": 0.42, "learning_rate": 1.937332350227021e-05, "loss": 0.3848, "step": 648 }, { "epoch": 0.42, "learning_rate": 1.9370884874895507e-05, "loss": 0.3831, "step": 649 }, { "epoch": 0.42, "learning_rate": 1.9368441666022895e-05, "loss": 0.384, "step": 650 }, { "epoch": 0.42, "learning_rate": 1.9365993876846872e-05, "loss": 0.3653, "step": 651 }, { "epoch": 0.42, "learning_rate": 1.9363541508564186e-05, "loss": 0.3914, "step": 652 }, { "epoch": 0.42, "learning_rate": 1.936108456237382e-05, "loss": 0.3843, "step": 653 }, { "epoch": 0.42, "learning_rate": 1.9358623039476988e-05, "loss": 0.3725, "step": 654 }, { "epoch": 0.42, "learning_rate": 1.9356156941077148e-05, "loss": 0.4112, "step": 655 }, { "epoch": 0.42, "learning_rate": 1.935368626838e-05, "loss": 0.3641, "step": 656 }, { "epoch": 0.43, "learning_rate": 1.935121102259347e-05, "loss": 0.4091, "step": 657 }, { "epoch": 0.43, "learning_rate": 1.9348731204927727e-05, "loss": 0.406, "step": 658 }, { "epoch": 0.43, "learning_rate": 1.934624681659517e-05, "loss": 0.3731, "step": 659 }, { "epoch": 0.43, "learning_rate": 1.934375785881044e-05, "loss": 0.3899, "step": 660 }, { "epoch": 0.43, "learning_rate": 1.9341264332790408e-05, "loss": 0.3829, "step": 661 }, { "epoch": 0.43, "learning_rate": 1.9338766239754176e-05, "loss": 0.3574, "step": 662 }, { "epoch": 0.43, "learning_rate": 1.9336263580923078e-05, "loss": 0.3817, "step": 663 }, { "epoch": 0.43, "learning_rate": 1.933375635752069e-05, "loss": 0.3647, "step": 664 }, { "epoch": 0.43, "learning_rate": 1.9331244570772805e-05, "loss": 0.387, "step": 665 }, { "epoch": 0.43, "learning_rate": 1.932872822190746e-05, "loss": 0.3762, "step": 666 }, { "epoch": 0.43, "learning_rate": 1.9326207312154915e-05, "loss": 0.3645, "step": 667 }, { "epoch": 0.43, "learning_rate": 1.9323681842747666e-05, "loss": 0.3885, "step": 668 }, { "epoch": 0.43, "learning_rate": 1.932115181492043e-05, "loss": 0.3771, "step": 669 }, { "epoch": 0.43, "learning_rate": 1.931861722991016e-05, "loss": 0.3584, "step": 670 }, { "epoch": 0.43, "learning_rate": 1.9316078088956033e-05, "loss": 0.4064, "step": 671 }, { "epoch": 0.44, "learning_rate": 1.9313534393299454e-05, "loss": 0.3709, "step": 672 }, { "epoch": 0.44, "learning_rate": 1.931098614418406e-05, "loss": 0.3765, "step": 673 }, { "epoch": 0.44, "learning_rate": 1.9308433342855705e-05, "loss": 0.3607, "step": 674 }, { "epoch": 0.44, "learning_rate": 1.930587599056247e-05, "loss": 0.3817, "step": 675 }, { "epoch": 0.44, "learning_rate": 1.9303314088554673e-05, "loss": 0.3634, "step": 676 }, { "epoch": 0.44, "learning_rate": 1.930074763808484e-05, "loss": 0.3798, "step": 677 }, { "epoch": 0.44, "learning_rate": 1.9298176640407732e-05, "loss": 0.4132, "step": 678 }, { "epoch": 0.44, "learning_rate": 1.929560109678033e-05, "loss": 0.3631, "step": 679 }, { "epoch": 0.44, "learning_rate": 1.929302100846184e-05, "loss": 0.3992, "step": 680 }, { "epoch": 0.44, "learning_rate": 1.9290436376713678e-05, "loss": 0.359, "step": 681 }, { "epoch": 0.44, "learning_rate": 1.9287847202799495e-05, "loss": 0.3599, "step": 682 }, { "epoch": 0.44, "learning_rate": 1.9285253487985162e-05, "loss": 0.3732, "step": 683 }, { "epoch": 0.44, "learning_rate": 1.9282655233538762e-05, "loss": 0.3757, "step": 684 }, { "epoch": 0.44, "learning_rate": 1.9280052440730603e-05, "loss": 0.366, "step": 685 }, { "epoch": 0.44, "learning_rate": 1.9277445110833213e-05, "loss": 0.3989, "step": 686 }, { "epoch": 0.44, "learning_rate": 1.927483324512133e-05, "loss": 0.3806, "step": 687 }, { "epoch": 0.45, "learning_rate": 1.9272216844871918e-05, "loss": 0.3783, "step": 688 }, { "epoch": 0.45, "learning_rate": 1.9269595911364157e-05, "loss": 0.3612, "step": 689 }, { "epoch": 0.45, "learning_rate": 1.926697044587944e-05, "loss": 0.3697, "step": 690 }, { "epoch": 0.45, "learning_rate": 1.9264340449701376e-05, "loss": 0.3961, "step": 691 }, { "epoch": 0.45, "learning_rate": 1.926170592411579e-05, "loss": 0.3651, "step": 692 }, { "epoch": 0.45, "learning_rate": 1.9259066870410725e-05, "loss": 0.3911, "step": 693 }, { "epoch": 0.45, "learning_rate": 1.9256423289876437e-05, "loss": 0.3915, "step": 694 }, { "epoch": 0.45, "learning_rate": 1.9253775183805387e-05, "loss": 0.3885, "step": 695 }, { "epoch": 0.45, "learning_rate": 1.9251122553492256e-05, "loss": 0.3693, "step": 696 }, { "epoch": 0.45, "learning_rate": 1.9248465400233938e-05, "loss": 0.3752, "step": 697 }, { "epoch": 0.45, "learning_rate": 1.9245803725329532e-05, "loss": 0.3718, "step": 698 }, { "epoch": 0.45, "learning_rate": 1.9243137530080354e-05, "loss": 0.3592, "step": 699 }, { "epoch": 0.45, "learning_rate": 1.924046681578993e-05, "loss": 0.3725, "step": 700 }, { "epoch": 0.45, "learning_rate": 1.9237791583763983e-05, "loss": 0.3818, "step": 701 }, { "epoch": 0.45, "learning_rate": 1.9235111835310467e-05, "loss": 0.3981, "step": 702 }, { "epoch": 0.46, "learning_rate": 1.9232427571739524e-05, "loss": 0.3807, "step": 703 }, { "epoch": 0.46, "learning_rate": 1.9229738794363514e-05, "loss": 0.3782, "step": 704 }, { "epoch": 0.46, "learning_rate": 1.9227045504496998e-05, "loss": 0.3446, "step": 705 }, { "epoch": 0.46, "learning_rate": 1.922434770345675e-05, "loss": 0.3753, "step": 706 }, { "epoch": 0.46, "learning_rate": 1.9221645392561745e-05, "loss": 0.345, "step": 707 }, { "epoch": 0.46, "learning_rate": 1.9218938573133162e-05, "loss": 0.4118, "step": 708 }, { "epoch": 0.46, "learning_rate": 1.9216227246494385e-05, "loss": 0.4009, "step": 709 }, { "epoch": 0.46, "learning_rate": 1.9213511413971004e-05, "loss": 0.3764, "step": 710 }, { "epoch": 0.46, "learning_rate": 1.9210791076890813e-05, "loss": 0.3649, "step": 711 }, { "epoch": 0.46, "learning_rate": 1.92080662365838e-05, "loss": 0.3524, "step": 712 }, { "epoch": 0.46, "learning_rate": 1.9205336894382166e-05, "loss": 0.3673, "step": 713 }, { "epoch": 0.46, "learning_rate": 1.9202603051620305e-05, "loss": 0.3881, "step": 714 }, { "epoch": 0.46, "learning_rate": 1.9199864709634815e-05, "loss": 0.384, "step": 715 }, { "epoch": 0.46, "learning_rate": 1.919712186976449e-05, "loss": 0.3895, "step": 716 }, { "epoch": 0.46, "learning_rate": 1.9194374533350327e-05, "loss": 0.3786, "step": 717 }, { "epoch": 0.47, "learning_rate": 1.9191622701735522e-05, "loss": 0.3727, "step": 718 }, { "epoch": 0.47, "learning_rate": 1.9188866376265465e-05, "loss": 0.3648, "step": 719 }, { "epoch": 0.47, "learning_rate": 1.9186105558287743e-05, "loss": 0.3583, "step": 720 }, { "epoch": 0.47, "learning_rate": 1.9183340249152142e-05, "loss": 0.3703, "step": 721 }, { "epoch": 0.47, "learning_rate": 1.9180570450210644e-05, "loss": 0.3777, "step": 722 }, { "epoch": 0.47, "learning_rate": 1.9177796162817426e-05, "loss": 0.378, "step": 723 }, { "epoch": 0.47, "learning_rate": 1.9175017388328856e-05, "loss": 0.37, "step": 724 }, { "epoch": 0.47, "learning_rate": 1.9172234128103503e-05, "loss": 0.3742, "step": 725 }, { "epoch": 0.47, "learning_rate": 1.9169446383502118e-05, "loss": 0.3627, "step": 726 }, { "epoch": 0.47, "learning_rate": 1.9166654155887657e-05, "loss": 0.3875, "step": 727 }, { "epoch": 0.47, "learning_rate": 1.9163857446625257e-05, "loss": 0.3401, "step": 728 }, { "epoch": 0.47, "learning_rate": 1.916105625708225e-05, "loss": 0.3624, "step": 729 }, { "epoch": 0.47, "learning_rate": 1.9158250588628165e-05, "loss": 0.3848, "step": 730 }, { "epoch": 0.47, "learning_rate": 1.915544044263471e-05, "loss": 0.3475, "step": 731 }, { "epoch": 0.47, "learning_rate": 1.9152625820475786e-05, "loss": 0.3662, "step": 732 }, { "epoch": 0.47, "learning_rate": 1.914980672352749e-05, "loss": 0.3726, "step": 733 }, { "epoch": 0.48, "learning_rate": 1.9146983153168092e-05, "loss": 0.3753, "step": 734 }, { "epoch": 0.48, "learning_rate": 1.9144155110778065e-05, "loss": 0.3898, "step": 735 }, { "epoch": 0.48, "learning_rate": 1.9141322597740057e-05, "loss": 0.3258, "step": 736 }, { "epoch": 0.48, "learning_rate": 1.9138485615438905e-05, "loss": 0.3349, "step": 737 }, { "epoch": 0.48, "learning_rate": 1.913564416526163e-05, "loss": 0.3807, "step": 738 }, { "epoch": 0.48, "learning_rate": 1.9132798248597443e-05, "loss": 0.3617, "step": 739 }, { "epoch": 0.48, "learning_rate": 1.9129947866837736e-05, "loss": 0.3516, "step": 740 }, { "epoch": 0.48, "learning_rate": 1.9127093021376075e-05, "loss": 0.3603, "step": 741 }, { "epoch": 0.48, "learning_rate": 1.9124233713608222e-05, "loss": 0.3457, "step": 742 }, { "epoch": 0.48, "learning_rate": 1.9121369944932112e-05, "loss": 0.3663, "step": 743 }, { "epoch": 0.48, "learning_rate": 1.9118501716747868e-05, "loss": 0.3739, "step": 744 }, { "epoch": 0.48, "learning_rate": 1.9115629030457783e-05, "loss": 0.3483, "step": 745 }, { "epoch": 0.48, "learning_rate": 1.911275188746634e-05, "loss": 0.3628, "step": 746 }, { "epoch": 0.48, "learning_rate": 1.9109870289180192e-05, "loss": 0.3643, "step": 747 }, { "epoch": 0.48, "learning_rate": 1.910698423700818e-05, "loss": 0.3565, "step": 748 }, { "epoch": 0.49, "learning_rate": 1.910409373236131e-05, "loss": 0.4115, "step": 749 }, { "epoch": 0.49, "learning_rate": 1.9101198776652777e-05, "loss": 0.3411, "step": 750 }, { "epoch": 0.49, "learning_rate": 1.9098299371297946e-05, "loss": 0.3598, "step": 751 }, { "epoch": 0.49, "learning_rate": 1.9095395517714364e-05, "loss": 0.3802, "step": 752 }, { "epoch": 0.49, "learning_rate": 1.9092487217321737e-05, "loss": 0.3821, "step": 753 }, { "epoch": 0.49, "learning_rate": 1.9089574471541964e-05, "loss": 0.3601, "step": 754 }, { "epoch": 0.49, "learning_rate": 1.9086657281799107e-05, "loss": 0.3638, "step": 755 }, { "epoch": 0.49, "learning_rate": 1.9083735649519396e-05, "loss": 0.3781, "step": 756 }, { "epoch": 0.49, "learning_rate": 1.9080809576131248e-05, "loss": 0.3809, "step": 757 }, { "epoch": 0.49, "learning_rate": 1.9077879063065243e-05, "loss": 0.3559, "step": 758 }, { "epoch": 0.49, "learning_rate": 1.907494411175413e-05, "loss": 0.3497, "step": 759 }, { "epoch": 0.49, "learning_rate": 1.907200472363282e-05, "loss": 0.3924, "step": 760 }, { "epoch": 0.49, "learning_rate": 1.9069060900138414e-05, "loss": 0.369, "step": 761 }, { "epoch": 0.49, "learning_rate": 1.9066112642710165e-05, "loss": 0.3755, "step": 762 }, { "epoch": 0.49, "learning_rate": 1.9063159952789502e-05, "loss": 0.3637, "step": 763 }, { "epoch": 0.49, "learning_rate": 1.9060202831820015e-05, "loss": 0.3683, "step": 764 }, { "epoch": 0.5, "learning_rate": 1.905724128124746e-05, "loss": 0.359, "step": 765 }, { "epoch": 0.5, "learning_rate": 1.9054275302519772e-05, "loss": 0.3783, "step": 766 }, { "epoch": 0.5, "learning_rate": 1.9051304897087034e-05, "loss": 0.3838, "step": 767 }, { "epoch": 0.5, "learning_rate": 1.9048330066401502e-05, "loss": 0.3862, "step": 768 }, { "epoch": 0.5, "learning_rate": 1.904535081191759e-05, "loss": 0.3875, "step": 769 }, { "epoch": 0.5, "learning_rate": 1.904236713509188e-05, "loss": 0.3571, "step": 770 }, { "epoch": 0.5, "learning_rate": 1.903937903738312e-05, "loss": 0.369, "step": 771 }, { "epoch": 0.5, "learning_rate": 1.9036386520252208e-05, "loss": 0.3769, "step": 772 }, { "epoch": 0.5, "learning_rate": 1.9033389585162208e-05, "loss": 0.382, "step": 773 }, { "epoch": 0.5, "learning_rate": 1.903038823357835e-05, "loss": 0.3518, "step": 774 }, { "epoch": 0.5, "learning_rate": 1.9027382466968018e-05, "loss": 0.3577, "step": 775 }, { "epoch": 0.5, "learning_rate": 1.902437228680075e-05, "loss": 0.3426, "step": 776 }, { "epoch": 0.5, "learning_rate": 1.9021357694548243e-05, "loss": 0.3623, "step": 777 }, { "epoch": 0.5, "learning_rate": 1.9018338691684365e-05, "loss": 0.3646, "step": 778 }, { "epoch": 0.5, "learning_rate": 1.9015315279685124e-05, "loss": 0.3681, "step": 779 }, { "epoch": 0.51, "learning_rate": 1.9012287460028688e-05, "loss": 0.3928, "step": 780 }, { "epoch": 0.51, "learning_rate": 1.9009255234195382e-05, "loss": 0.3448, "step": 781 }, { "epoch": 0.51, "learning_rate": 1.9006218603667686e-05, "loss": 0.3986, "step": 782 }, { "epoch": 0.51, "learning_rate": 1.9003177569930233e-05, "loss": 0.3761, "step": 783 }, { "epoch": 0.51, "learning_rate": 1.9000132134469806e-05, "loss": 0.3712, "step": 784 }, { "epoch": 0.51, "learning_rate": 1.8997082298775343e-05, "loss": 0.3468, "step": 785 }, { "epoch": 0.51, "learning_rate": 1.8994028064337934e-05, "loss": 0.3551, "step": 786 }, { "epoch": 0.51, "learning_rate": 1.8990969432650813e-05, "loss": 0.3769, "step": 787 }, { "epoch": 0.51, "learning_rate": 1.8987906405209372e-05, "loss": 0.3943, "step": 788 }, { "epoch": 0.51, "learning_rate": 1.8984838983511147e-05, "loss": 0.3643, "step": 789 }, { "epoch": 0.51, "learning_rate": 1.8981767169055824e-05, "loss": 0.332, "step": 790 }, { "epoch": 0.51, "learning_rate": 1.8978690963345243e-05, "loss": 0.3594, "step": 791 }, { "epoch": 0.51, "learning_rate": 1.8975610367883372e-05, "loss": 0.3785, "step": 792 }, { "epoch": 0.51, "learning_rate": 1.897252538417635e-05, "loss": 0.3466, "step": 793 }, { "epoch": 0.51, "learning_rate": 1.8969436013732444e-05, "loss": 0.3634, "step": 794 }, { "epoch": 0.51, "learning_rate": 1.896634225806207e-05, "loss": 0.3537, "step": 795 }, { "epoch": 0.52, "learning_rate": 1.896324411867779e-05, "loss": 0.3569, "step": 796 }, { "epoch": 0.52, "learning_rate": 1.8960141597094315e-05, "loss": 0.3704, "step": 797 }, { "epoch": 0.52, "learning_rate": 1.895703469482848e-05, "loss": 0.3605, "step": 798 }, { "epoch": 0.52, "learning_rate": 1.8953923413399283e-05, "loss": 0.3697, "step": 799 }, { "epoch": 0.52, "learning_rate": 1.8950807754327848e-05, "loss": 0.3355, "step": 800 }, { "epoch": 0.52, "learning_rate": 1.8947687719137446e-05, "loss": 0.3703, "step": 801 }, { "epoch": 0.52, "learning_rate": 1.894456330935349e-05, "loss": 0.374, "step": 802 }, { "epoch": 0.52, "learning_rate": 1.894143452650352e-05, "loss": 0.3437, "step": 803 }, { "epoch": 0.52, "learning_rate": 1.893830137211723e-05, "loss": 0.3563, "step": 804 }, { "epoch": 0.52, "learning_rate": 1.893516384772644e-05, "loss": 0.3763, "step": 805 }, { "epoch": 0.52, "learning_rate": 1.893202195486511e-05, "loss": 0.3725, "step": 806 }, { "epoch": 0.52, "learning_rate": 1.8928875695069334e-05, "loss": 0.3522, "step": 807 }, { "epoch": 0.52, "learning_rate": 1.8925725069877344e-05, "loss": 0.3671, "step": 808 }, { "epoch": 0.52, "learning_rate": 1.892257008082951e-05, "loss": 0.3737, "step": 809 }, { "epoch": 0.52, "learning_rate": 1.8919410729468323e-05, "loss": 0.3757, "step": 810 }, { "epoch": 0.53, "learning_rate": 1.8916247017338418e-05, "loss": 0.3486, "step": 811 }, { "epoch": 0.53, "learning_rate": 1.8913078945986555e-05, "loss": 0.3574, "step": 812 }, { "epoch": 0.53, "learning_rate": 1.8909906516961633e-05, "loss": 0.3871, "step": 813 }, { "epoch": 0.53, "learning_rate": 1.8906729731814676e-05, "loss": 0.3801, "step": 814 }, { "epoch": 0.53, "learning_rate": 1.8903548592098836e-05, "loss": 0.3447, "step": 815 }, { "epoch": 0.53, "learning_rate": 1.89003630993694e-05, "loss": 0.3417, "step": 816 }, { "epoch": 0.53, "learning_rate": 1.889717325518378e-05, "loss": 0.3751, "step": 817 }, { "epoch": 0.53, "learning_rate": 1.889397906110151e-05, "loss": 0.3716, "step": 818 }, { "epoch": 0.53, "learning_rate": 1.889078051868426e-05, "loss": 0.3578, "step": 819 }, { "epoch": 0.53, "learning_rate": 1.8887577629495827e-05, "loss": 0.353, "step": 820 }, { "epoch": 0.53, "learning_rate": 1.888437039510212e-05, "loss": 0.3603, "step": 821 }, { "epoch": 0.53, "learning_rate": 1.8881158817071183e-05, "loss": 0.3851, "step": 822 }, { "epoch": 0.53, "learning_rate": 1.8877942896973182e-05, "loss": 0.3524, "step": 823 }, { "epoch": 0.53, "learning_rate": 1.8874722636380408e-05, "loss": 0.3714, "step": 824 }, { "epoch": 0.53, "learning_rate": 1.887149803686727e-05, "loss": 0.3763, "step": 825 }, { "epoch": 0.53, "learning_rate": 1.8868269100010294e-05, "loss": 0.3525, "step": 826 }, { "epoch": 0.54, "learning_rate": 1.886503582738814e-05, "loss": 0.3739, "step": 827 }, { "epoch": 0.54, "learning_rate": 1.8861798220581577e-05, "loss": 0.3743, "step": 828 }, { "epoch": 0.54, "learning_rate": 1.885855628117349e-05, "loss": 0.3409, "step": 829 }, { "epoch": 0.54, "learning_rate": 1.88553100107489e-05, "loss": 0.3854, "step": 830 }, { "epoch": 0.54, "learning_rate": 1.885205941089492e-05, "loss": 0.3643, "step": 831 }, { "epoch": 0.54, "learning_rate": 1.8848804483200803e-05, "loss": 0.3938, "step": 832 }, { "epoch": 0.54, "learning_rate": 1.8845545229257908e-05, "loss": 0.3777, "step": 833 }, { "epoch": 0.54, "learning_rate": 1.8842281650659708e-05, "loss": 0.3571, "step": 834 }, { "epoch": 0.54, "learning_rate": 1.8839013749001788e-05, "loss": 0.3438, "step": 835 }, { "epoch": 0.54, "learning_rate": 1.883574152588185e-05, "loss": 0.377, "step": 836 }, { "epoch": 0.54, "learning_rate": 1.883246498289972e-05, "loss": 0.3682, "step": 837 }, { "epoch": 0.54, "learning_rate": 1.882918412165731e-05, "loss": 0.392, "step": 838 }, { "epoch": 0.54, "learning_rate": 1.8825898943758672e-05, "loss": 0.3877, "step": 839 }, { "epoch": 0.54, "learning_rate": 1.8822609450809945e-05, "loss": 0.3598, "step": 840 }, { "epoch": 0.54, "learning_rate": 1.8819315644419392e-05, "loss": 0.363, "step": 841 }, { "epoch": 0.55, "learning_rate": 1.8816017526197383e-05, "loss": 0.3595, "step": 842 }, { "epoch": 0.55, "learning_rate": 1.8812715097756386e-05, "loss": 0.3839, "step": 843 }, { "epoch": 0.55, "learning_rate": 1.880940836071099e-05, "loss": 0.3617, "step": 844 }, { "epoch": 0.55, "learning_rate": 1.880609731667788e-05, "loss": 0.3801, "step": 845 }, { "epoch": 0.55, "learning_rate": 1.880278196727585e-05, "loss": 0.3777, "step": 846 }, { "epoch": 0.55, "learning_rate": 1.879946231412581e-05, "loss": 0.3681, "step": 847 }, { "epoch": 0.55, "learning_rate": 1.879613835885075e-05, "loss": 0.358, "step": 848 }, { "epoch": 0.55, "learning_rate": 1.8792810103075793e-05, "loss": 0.3333, "step": 849 }, { "epoch": 0.55, "learning_rate": 1.878947754842813e-05, "loss": 0.3613, "step": 850 }, { "epoch": 0.55, "learning_rate": 1.878614069653709e-05, "loss": 0.38, "step": 851 }, { "epoch": 0.55, "learning_rate": 1.8782799549034078e-05, "loss": 0.3547, "step": 852 }, { "epoch": 0.55, "learning_rate": 1.87794541075526e-05, "loss": 0.3401, "step": 853 }, { "epoch": 0.55, "learning_rate": 1.8776104373728283e-05, "loss": 0.342, "step": 854 }, { "epoch": 0.55, "learning_rate": 1.8772750349198827e-05, "loss": 0.3497, "step": 855 }, { "epoch": 0.55, "learning_rate": 1.8769392035604042e-05, "loss": 0.33, "step": 856 }, { "epoch": 0.56, "learning_rate": 1.876602943458584e-05, "loss": 0.3664, "step": 857 }, { "epoch": 0.56, "learning_rate": 1.8762662547788216e-05, "loss": 0.356, "step": 858 }, { "epoch": 0.56, "learning_rate": 1.8759291376857268e-05, "loss": 0.3489, "step": 859 }, { "epoch": 0.56, "learning_rate": 1.875591592344119e-05, "loss": 0.373, "step": 860 }, { "epoch": 0.56, "learning_rate": 1.875253618919026e-05, "loss": 0.369, "step": 861 }, { "epoch": 0.56, "learning_rate": 1.8749152175756865e-05, "loss": 0.3732, "step": 862 }, { "epoch": 0.56, "learning_rate": 1.874576388479547e-05, "loss": 0.3728, "step": 863 }, { "epoch": 0.56, "learning_rate": 1.874237131796264e-05, "loss": 0.3443, "step": 864 }, { "epoch": 0.56, "learning_rate": 1.8738974476917026e-05, "loss": 0.3454, "step": 865 }, { "epoch": 0.56, "learning_rate": 1.8735573363319367e-05, "loss": 0.3655, "step": 866 }, { "epoch": 0.56, "learning_rate": 1.8732167978832493e-05, "loss": 0.3647, "step": 867 }, { "epoch": 0.56, "learning_rate": 1.872875832512133e-05, "loss": 0.3613, "step": 868 }, { "epoch": 0.56, "learning_rate": 1.8725344403852877e-05, "loss": 0.3659, "step": 869 }, { "epoch": 0.56, "learning_rate": 1.872192621669623e-05, "loss": 0.372, "step": 870 }, { "epoch": 0.56, "learning_rate": 1.8718503765322568e-05, "loss": 0.3587, "step": 871 }, { "epoch": 0.56, "learning_rate": 1.871507705140515e-05, "loss": 0.3656, "step": 872 }, { "epoch": 0.57, "learning_rate": 1.8711646076619325e-05, "loss": 0.3467, "step": 873 }, { "epoch": 0.57, "learning_rate": 1.8708210842642524e-05, "loss": 0.3678, "step": 874 }, { "epoch": 0.57, "learning_rate": 1.8704771351154257e-05, "loss": 0.375, "step": 875 }, { "epoch": 0.57, "learning_rate": 1.870132760383612e-05, "loss": 0.3747, "step": 876 }, { "epoch": 0.57, "learning_rate": 1.8697879602371785e-05, "loss": 0.3434, "step": 877 }, { "epoch": 0.57, "learning_rate": 1.8694427348447012e-05, "loss": 0.3408, "step": 878 }, { "epoch": 0.57, "learning_rate": 1.8690970843749626e-05, "loss": 0.3651, "step": 879 }, { "epoch": 0.57, "learning_rate": 1.868751008996955e-05, "loss": 0.3661, "step": 880 }, { "epoch": 0.57, "learning_rate": 1.8684045088798762e-05, "loss": 0.3365, "step": 881 }, { "epoch": 0.57, "learning_rate": 1.8680575841931336e-05, "loss": 0.3456, "step": 882 }, { "epoch": 0.57, "learning_rate": 1.8677102351063406e-05, "loss": 0.3688, "step": 883 }, { "epoch": 0.57, "learning_rate": 1.8673624617893197e-05, "loss": 0.3459, "step": 884 }, { "epoch": 0.57, "learning_rate": 1.867014264412099e-05, "loss": 0.3636, "step": 885 }, { "epoch": 0.57, "learning_rate": 1.8666656431449162e-05, "loss": 0.3604, "step": 886 }, { "epoch": 0.57, "learning_rate": 1.8663165981582137e-05, "loss": 0.3304, "step": 887 }, { "epoch": 0.58, "learning_rate": 1.865967129622643e-05, "loss": 0.3492, "step": 888 }, { "epoch": 0.58, "learning_rate": 1.8656172377090614e-05, "loss": 0.3677, "step": 889 }, { "epoch": 0.58, "learning_rate": 1.865266922588534e-05, "loss": 0.3288, "step": 890 }, { "epoch": 0.58, "learning_rate": 1.8649161844323327e-05, "loss": 0.3369, "step": 891 }, { "epoch": 0.58, "learning_rate": 1.864565023411936e-05, "loss": 0.3348, "step": 892 }, { "epoch": 0.58, "learning_rate": 1.8642134396990294e-05, "loss": 0.3647, "step": 893 }, { "epoch": 0.58, "learning_rate": 1.8638614334655046e-05, "loss": 0.3487, "step": 894 }, { "epoch": 0.58, "learning_rate": 1.8635090048834607e-05, "loss": 0.3488, "step": 895 }, { "epoch": 0.58, "learning_rate": 1.8631561541252017e-05, "loss": 0.371, "step": 896 }, { "epoch": 0.58, "learning_rate": 1.86280288136324e-05, "loss": 0.3598, "step": 897 }, { "epoch": 0.58, "learning_rate": 1.8624491867702933e-05, "loss": 0.3687, "step": 898 }, { "epoch": 0.58, "learning_rate": 1.8620950705192852e-05, "loss": 0.3565, "step": 899 }, { "epoch": 0.58, "learning_rate": 1.861740532783346e-05, "loss": 0.366, "step": 900 }, { "epoch": 0.58, "learning_rate": 1.861385573735812e-05, "loss": 0.3702, "step": 901 }, { "epoch": 0.58, "learning_rate": 1.8610301935502255e-05, "loss": 0.349, "step": 902 }, { "epoch": 0.58, "learning_rate": 1.8606743924003346e-05, "loss": 0.353, "step": 903 }, { "epoch": 0.59, "learning_rate": 1.860318170460093e-05, "loss": 0.3745, "step": 904 }, { "epoch": 0.59, "learning_rate": 1.859961527903661e-05, "loss": 0.3629, "step": 905 }, { "epoch": 0.59, "learning_rate": 1.8596044649054025e-05, "loss": 0.3919, "step": 906 }, { "epoch": 0.59, "learning_rate": 1.85924698163989e-05, "loss": 0.3455, "step": 907 }, { "epoch": 0.59, "learning_rate": 1.858889078281899e-05, "loss": 0.3692, "step": 908 }, { "epoch": 0.59, "learning_rate": 1.858530755006411e-05, "loss": 0.3494, "step": 909 }, { "epoch": 0.59, "learning_rate": 1.8581720119886136e-05, "loss": 0.3599, "step": 910 }, { "epoch": 0.59, "learning_rate": 1.8578128494038984e-05, "loss": 0.3664, "step": 911 }, { "epoch": 0.59, "learning_rate": 1.8574532674278637e-05, "loss": 0.3688, "step": 912 }, { "epoch": 0.59, "learning_rate": 1.8570932662363108e-05, "loss": 0.3462, "step": 913 }, { "epoch": 0.59, "learning_rate": 1.8567328460052475e-05, "loss": 0.3812, "step": 914 }, { "epoch": 0.59, "learning_rate": 1.8563720069108867e-05, "loss": 0.3632, "step": 915 }, { "epoch": 0.59, "learning_rate": 1.856010749129644e-05, "loss": 0.3609, "step": 916 }, { "epoch": 0.59, "learning_rate": 1.8556490728381426e-05, "loss": 0.344, "step": 917 }, { "epoch": 0.59, "learning_rate": 1.855286978213208e-05, "loss": 0.3449, "step": 918 }, { "epoch": 0.6, "learning_rate": 1.8549244654318714e-05, "loss": 0.3509, "step": 919 }, { "epoch": 0.6, "learning_rate": 1.8545615346713674e-05, "loss": 0.3676, "step": 920 }, { "epoch": 0.6, "learning_rate": 1.8541981861091364e-05, "loss": 0.3814, "step": 921 }, { "epoch": 0.6, "learning_rate": 1.8538344199228223e-05, "loss": 0.3337, "step": 922 }, { "epoch": 0.6, "learning_rate": 1.853470236290273e-05, "loss": 0.3446, "step": 923 }, { "epoch": 0.6, "learning_rate": 1.8531056353895403e-05, "loss": 0.3544, "step": 924 }, { "epoch": 0.6, "learning_rate": 1.852740617398881e-05, "loss": 0.364, "step": 925 }, { "epoch": 0.6, "learning_rate": 1.852375182496755e-05, "loss": 0.3355, "step": 926 }, { "epoch": 0.6, "learning_rate": 1.852009330861826e-05, "loss": 0.3492, "step": 927 }, { "epoch": 0.6, "learning_rate": 1.851643062672962e-05, "loss": 0.3471, "step": 928 }, { "epoch": 0.6, "learning_rate": 1.8512763781092343e-05, "loss": 0.3471, "step": 929 }, { "epoch": 0.6, "learning_rate": 1.8509092773499178e-05, "loss": 0.3514, "step": 930 }, { "epoch": 0.6, "learning_rate": 1.850541760574491e-05, "loss": 0.3438, "step": 931 }, { "epoch": 0.6, "learning_rate": 1.850173827962635e-05, "loss": 0.337, "step": 932 }, { "epoch": 0.6, "learning_rate": 1.849805479694236e-05, "loss": 0.3678, "step": 933 }, { "epoch": 0.6, "learning_rate": 1.849436715949381e-05, "loss": 0.36, "step": 934 }, { "epoch": 0.61, "learning_rate": 1.849067536908363e-05, "loss": 0.3406, "step": 935 }, { "epoch": 0.61, "learning_rate": 1.8486979427516753e-05, "loss": 0.3767, "step": 936 }, { "epoch": 0.61, "learning_rate": 1.8483279336600155e-05, "loss": 0.3565, "step": 937 }, { "epoch": 0.61, "learning_rate": 1.8479575098142845e-05, "loss": 0.3798, "step": 938 }, { "epoch": 0.61, "learning_rate": 1.8475866713955846e-05, "loss": 0.3613, "step": 939 }, { "epoch": 0.61, "learning_rate": 1.847215418585222e-05, "loss": 0.3436, "step": 940 }, { "epoch": 0.61, "learning_rate": 1.846843751564705e-05, "loss": 0.348, "step": 941 }, { "epoch": 0.61, "learning_rate": 1.8464716705157446e-05, "loss": 0.3657, "step": 942 }, { "epoch": 0.61, "learning_rate": 1.846099175620254e-05, "loss": 0.3419, "step": 943 }, { "epoch": 0.61, "learning_rate": 1.8457262670603485e-05, "loss": 0.3663, "step": 944 }, { "epoch": 0.61, "learning_rate": 1.8453529450183464e-05, "loss": 0.3489, "step": 945 }, { "epoch": 0.61, "learning_rate": 1.844979209676768e-05, "loss": 0.3413, "step": 946 }, { "epoch": 0.61, "learning_rate": 1.8446050612183345e-05, "loss": 0.362, "step": 947 }, { "epoch": 0.61, "learning_rate": 1.8442304998259705e-05, "loss": 0.3534, "step": 948 }, { "epoch": 0.61, "learning_rate": 1.843855525682802e-05, "loss": 0.336, "step": 949 }, { "epoch": 0.62, "learning_rate": 1.8434801389721568e-05, "loss": 0.3654, "step": 950 }, { "epoch": 0.62, "learning_rate": 1.8431043398775642e-05, "loss": 0.365, "step": 951 }, { "epoch": 0.62, "learning_rate": 1.8427281285827548e-05, "loss": 0.3638, "step": 952 }, { "epoch": 0.62, "learning_rate": 1.8423515052716622e-05, "loss": 0.3609, "step": 953 }, { "epoch": 0.62, "learning_rate": 1.8419744701284198e-05, "loss": 0.3737, "step": 954 }, { "epoch": 0.62, "learning_rate": 1.8415970233373632e-05, "loss": 0.3567, "step": 955 }, { "epoch": 0.62, "learning_rate": 1.841219165083029e-05, "loss": 0.3439, "step": 956 }, { "epoch": 0.62, "learning_rate": 1.840840895550155e-05, "loss": 0.3606, "step": 957 }, { "epoch": 0.62, "learning_rate": 1.84046221492368e-05, "loss": 0.3568, "step": 958 }, { "epoch": 0.62, "learning_rate": 1.8400831233887437e-05, "loss": 0.368, "step": 959 }, { "epoch": 0.62, "learning_rate": 1.8397036211306875e-05, "loss": 0.3768, "step": 960 }, { "epoch": 0.62, "learning_rate": 1.8393237083350525e-05, "loss": 0.3517, "step": 961 }, { "epoch": 0.62, "learning_rate": 1.838943385187581e-05, "loss": 0.3341, "step": 962 }, { "epoch": 0.62, "learning_rate": 1.8385626518742164e-05, "loss": 0.3393, "step": 963 }, { "epoch": 0.62, "learning_rate": 1.8381815085811016e-05, "loss": 0.3414, "step": 964 }, { "epoch": 0.62, "learning_rate": 1.8377999554945805e-05, "loss": 0.3783, "step": 965 }, { "epoch": 0.63, "learning_rate": 1.8374179928011977e-05, "loss": 0.3488, "step": 966 }, { "epoch": 0.63, "learning_rate": 1.8370356206876975e-05, "loss": 0.3423, "step": 967 }, { "epoch": 0.63, "learning_rate": 1.8366528393410247e-05, "loss": 0.3769, "step": 968 }, { "epoch": 0.63, "learning_rate": 1.8362696489483242e-05, "loss": 0.3272, "step": 969 }, { "epoch": 0.63, "learning_rate": 1.83588604969694e-05, "loss": 0.35, "step": 970 }, { "epoch": 0.63, "learning_rate": 1.835502041774418e-05, "loss": 0.3482, "step": 971 }, { "epoch": 0.63, "learning_rate": 1.835117625368501e-05, "loss": 0.339, "step": 972 }, { "epoch": 0.63, "learning_rate": 1.8347328006671346e-05, "loss": 0.358, "step": 973 }, { "epoch": 0.63, "learning_rate": 1.834347567858462e-05, "loss": 0.3593, "step": 974 }, { "epoch": 0.63, "learning_rate": 1.8339619271308263e-05, "loss": 0.3402, "step": 975 }, { "epoch": 0.63, "learning_rate": 1.8335758786727705e-05, "loss": 0.3438, "step": 976 }, { "epoch": 0.63, "learning_rate": 1.8331894226730364e-05, "loss": 0.3658, "step": 977 }, { "epoch": 0.63, "learning_rate": 1.8328025593205657e-05, "loss": 0.3489, "step": 978 }, { "epoch": 0.63, "learning_rate": 1.8324152888044987e-05, "loss": 0.3491, "step": 979 }, { "epoch": 0.63, "learning_rate": 1.8320276113141747e-05, "loss": 0.3571, "step": 980 }, { "epoch": 0.64, "learning_rate": 1.8316395270391327e-05, "loss": 0.3262, "step": 981 }, { "epoch": 0.64, "learning_rate": 1.8312510361691097e-05, "loss": 0.3496, "step": 982 }, { "epoch": 0.64, "learning_rate": 1.8308621388940423e-05, "loss": 0.3618, "step": 983 }, { "epoch": 0.64, "learning_rate": 1.8304728354040647e-05, "loss": 0.3395, "step": 984 }, { "epoch": 0.64, "learning_rate": 1.830083125889511e-05, "loss": 0.3523, "step": 985 }, { "epoch": 0.64, "learning_rate": 1.829693010540913e-05, "loss": 0.3597, "step": 986 }, { "epoch": 0.64, "learning_rate": 1.829302489549001e-05, "loss": 0.3539, "step": 987 }, { "epoch": 0.64, "learning_rate": 1.8289115631047035e-05, "loss": 0.3501, "step": 988 }, { "epoch": 0.64, "learning_rate": 1.8285202313991483e-05, "loss": 0.3771, "step": 989 }, { "epoch": 0.64, "learning_rate": 1.8281284946236596e-05, "loss": 0.3489, "step": 990 }, { "epoch": 0.64, "learning_rate": 1.8277363529697608e-05, "loss": 0.3319, "step": 991 }, { "epoch": 0.64, "learning_rate": 1.8273438066291733e-05, "loss": 0.3204, "step": 992 }, { "epoch": 0.64, "learning_rate": 1.826950855793816e-05, "loss": 0.3679, "step": 993 }, { "epoch": 0.64, "learning_rate": 1.826557500655805e-05, "loss": 0.3719, "step": 994 }, { "epoch": 0.64, "learning_rate": 1.8261637414074553e-05, "loss": 0.3553, "step": 995 }, { "epoch": 0.65, "learning_rate": 1.8257695782412783e-05, "loss": 0.3671, "step": 996 }, { "epoch": 0.65, "learning_rate": 1.8253750113499838e-05, "loss": 0.3674, "step": 997 }, { "epoch": 0.65, "learning_rate": 1.8249800409264784e-05, "loss": 0.3847, "step": 998 }, { "epoch": 0.65, "learning_rate": 1.8245846671638665e-05, "loss": 0.3443, "step": 999 }, { "epoch": 0.65, "learning_rate": 1.824188890255449e-05, "loss": 0.3384, "step": 1000 }, { "epoch": 0.65, "learning_rate": 1.8237927103947247e-05, "loss": 0.3496, "step": 1001 }, { "epoch": 0.65, "learning_rate": 1.8233961277753886e-05, "loss": 0.3664, "step": 1002 }, { "epoch": 0.65, "learning_rate": 1.8229991425913328e-05, "loss": 0.3443, "step": 1003 }, { "epoch": 0.65, "learning_rate": 1.8226017550366472e-05, "loss": 0.3602, "step": 1004 }, { "epoch": 0.65, "learning_rate": 1.822203965305617e-05, "loss": 0.3216, "step": 1005 }, { "epoch": 0.65, "learning_rate": 1.821805773592725e-05, "loss": 0.3601, "step": 1006 }, { "epoch": 0.65, "learning_rate": 1.8214071800926497e-05, "loss": 0.3552, "step": 1007 }, { "epoch": 0.65, "learning_rate": 1.8210081850002674e-05, "loss": 0.3834, "step": 1008 }, { "epoch": 0.65, "learning_rate": 1.8206087885106488e-05, "loss": 0.3411, "step": 1009 }, { "epoch": 0.65, "learning_rate": 1.820208990819063e-05, "loss": 0.3804, "step": 1010 }, { "epoch": 0.65, "learning_rate": 1.8198087921209736e-05, "loss": 0.3408, "step": 1011 }, { "epoch": 0.66, "learning_rate": 1.8194081926120407e-05, "loss": 0.3601, "step": 1012 }, { "epoch": 0.66, "learning_rate": 1.8190071924881212e-05, "loss": 0.3295, "step": 1013 }, { "epoch": 0.66, "learning_rate": 1.8186057919452663e-05, "loss": 0.3534, "step": 1014 }, { "epoch": 0.66, "learning_rate": 1.8182039911797242e-05, "loss": 0.3173, "step": 1015 }, { "epoch": 0.66, "learning_rate": 1.8178017903879383e-05, "loss": 0.3442, "step": 1016 }, { "epoch": 0.66, "learning_rate": 1.8173991897665478e-05, "loss": 0.3826, "step": 1017 }, { "epoch": 0.66, "learning_rate": 1.8169961895123874e-05, "loss": 0.341, "step": 1018 }, { "epoch": 0.66, "learning_rate": 1.8165927898224866e-05, "loss": 0.3255, "step": 1019 }, { "epoch": 0.66, "learning_rate": 1.8161889908940708e-05, "loss": 0.3241, "step": 1020 }, { "epoch": 0.66, "learning_rate": 1.8157847929245605e-05, "loss": 0.392, "step": 1021 }, { "epoch": 0.66, "learning_rate": 1.8153801961115714e-05, "loss": 0.346, "step": 1022 }, { "epoch": 0.66, "learning_rate": 1.8149752006529136e-05, "loss": 0.3727, "step": 1023 }, { "epoch": 0.66, "learning_rate": 1.8145698067465926e-05, "loss": 0.3675, "step": 1024 }, { "epoch": 0.66, "learning_rate": 1.8141640145908088e-05, "loss": 0.3312, "step": 1025 }, { "epoch": 0.66, "learning_rate": 1.813757824383957e-05, "loss": 0.3566, "step": 1026 }, { "epoch": 0.67, "learning_rate": 1.8133512363246266e-05, "loss": 0.3376, "step": 1027 }, { "epoch": 0.67, "learning_rate": 1.812944250611602e-05, "loss": 0.3273, "step": 1028 }, { "epoch": 0.67, "learning_rate": 1.8125368674438614e-05, "loss": 0.3556, "step": 1029 }, { "epoch": 0.67, "learning_rate": 1.812129087020577e-05, "loss": 0.3231, "step": 1030 }, { "epoch": 0.67, "learning_rate": 1.811720909541117e-05, "loss": 0.3529, "step": 1031 }, { "epoch": 0.67, "learning_rate": 1.8113123352050417e-05, "loss": 0.3572, "step": 1032 }, { "epoch": 0.67, "learning_rate": 1.8109033642121067e-05, "loss": 0.3259, "step": 1033 }, { "epoch": 0.67, "learning_rate": 1.8104939967622606e-05, "loss": 0.3708, "step": 1034 }, { "epoch": 0.67, "learning_rate": 1.8100842330556467e-05, "loss": 0.3514, "step": 1035 }, { "epoch": 0.67, "learning_rate": 1.8096740732926014e-05, "loss": 0.3374, "step": 1036 }, { "epoch": 0.67, "learning_rate": 1.8092635176736548e-05, "loss": 0.3489, "step": 1037 }, { "epoch": 0.67, "learning_rate": 1.8088525663995314e-05, "loss": 0.349, "step": 1038 }, { "epoch": 0.67, "learning_rate": 1.8084412196711474e-05, "loss": 0.3574, "step": 1039 }, { "epoch": 0.67, "learning_rate": 1.8080294776896146e-05, "loss": 0.3648, "step": 1040 }, { "epoch": 0.67, "learning_rate": 1.8076173406562358e-05, "loss": 0.3267, "step": 1041 }, { "epoch": 0.67, "learning_rate": 1.8072048087725085e-05, "loss": 0.3392, "step": 1042 }, { "epoch": 0.68, "learning_rate": 1.806791882240123e-05, "loss": 0.353, "step": 1043 }, { "epoch": 0.68, "learning_rate": 1.8063785612609613e-05, "loss": 0.3394, "step": 1044 }, { "epoch": 0.68, "learning_rate": 1.8059648460371e-05, "loss": 0.3489, "step": 1045 }, { "epoch": 0.68, "learning_rate": 1.8055507367708074e-05, "loss": 0.3429, "step": 1046 }, { "epoch": 0.68, "learning_rate": 1.805136233664545e-05, "loss": 0.35, "step": 1047 }, { "epoch": 0.68, "learning_rate": 1.8047213369209662e-05, "loss": 0.3484, "step": 1048 }, { "epoch": 0.68, "learning_rate": 1.8043060467429173e-05, "loss": 0.3471, "step": 1049 }, { "epoch": 0.68, "learning_rate": 1.8038903633334367e-05, "loss": 0.354, "step": 1050 }, { "epoch": 0.68, "learning_rate": 1.803474286895756e-05, "loss": 0.3551, "step": 1051 }, { "epoch": 0.68, "learning_rate": 1.8030578176332977e-05, "loss": 0.3756, "step": 1052 }, { "epoch": 0.68, "learning_rate": 1.8026409557496763e-05, "loss": 0.3351, "step": 1053 }, { "epoch": 0.68, "learning_rate": 1.8022237014486994e-05, "loss": 0.3629, "step": 1054 }, { "epoch": 0.68, "learning_rate": 1.8018060549343655e-05, "loss": 0.3256, "step": 1055 }, { "epoch": 0.68, "learning_rate": 1.8013880164108658e-05, "loss": 0.3603, "step": 1056 }, { "epoch": 0.68, "learning_rate": 1.800969586082582e-05, "loss": 0.3589, "step": 1057 }, { "epoch": 0.69, "learning_rate": 1.8005507641540882e-05, "loss": 0.343, "step": 1058 }, { "epoch": 0.69, "learning_rate": 1.8001315508301494e-05, "loss": 0.3427, "step": 1059 }, { "epoch": 0.69, "learning_rate": 1.7997119463157225e-05, "loss": 0.3382, "step": 1060 }, { "epoch": 0.69, "learning_rate": 1.7992919508159555e-05, "loss": 0.3509, "step": 1061 }, { "epoch": 0.69, "learning_rate": 1.798871564536187e-05, "loss": 0.3596, "step": 1062 }, { "epoch": 0.69, "learning_rate": 1.7984507876819474e-05, "loss": 0.3247, "step": 1063 }, { "epoch": 0.69, "learning_rate": 1.7980296204589574e-05, "loss": 0.3687, "step": 1064 }, { "epoch": 0.69, "learning_rate": 1.7976080630731294e-05, "loss": 0.3678, "step": 1065 }, { "epoch": 0.69, "learning_rate": 1.7971861157305656e-05, "loss": 0.3305, "step": 1066 }, { "epoch": 0.69, "learning_rate": 1.79676377863756e-05, "loss": 0.3495, "step": 1067 }, { "epoch": 0.69, "learning_rate": 1.7963410520005956e-05, "loss": 0.3629, "step": 1068 }, { "epoch": 0.69, "learning_rate": 1.795917936026347e-05, "loss": 0.3601, "step": 1069 }, { "epoch": 0.69, "learning_rate": 1.7954944309216794e-05, "loss": 0.3344, "step": 1070 }, { "epoch": 0.69, "learning_rate": 1.795070536893647e-05, "loss": 0.3428, "step": 1071 }, { "epoch": 0.69, "learning_rate": 1.794646254149495e-05, "loss": 0.354, "step": 1072 }, { "epoch": 0.69, "learning_rate": 1.794221582896659e-05, "loss": 0.3531, "step": 1073 }, { "epoch": 0.7, "learning_rate": 1.7937965233427638e-05, "loss": 0.3562, "step": 1074 }, { "epoch": 0.7, "learning_rate": 1.793371075695624e-05, "loss": 0.3606, "step": 1075 }, { "epoch": 0.7, "learning_rate": 1.7929452401632443e-05, "loss": 0.3658, "step": 1076 }, { "epoch": 0.7, "learning_rate": 1.7925190169538193e-05, "loss": 0.3509, "step": 1077 }, { "epoch": 0.7, "learning_rate": 1.7920924062757327e-05, "loss": 0.3433, "step": 1078 }, { "epoch": 0.7, "learning_rate": 1.7916654083375576e-05, "loss": 0.3265, "step": 1079 }, { "epoch": 0.7, "learning_rate": 1.7912380233480563e-05, "loss": 0.332, "step": 1080 }, { "epoch": 0.7, "learning_rate": 1.7908102515161818e-05, "loss": 0.3388, "step": 1081 }, { "epoch": 0.7, "learning_rate": 1.790382093051074e-05, "loss": 0.3625, "step": 1082 }, { "epoch": 0.7, "learning_rate": 1.7899535481620626e-05, "loss": 0.3546, "step": 1083 }, { "epoch": 0.7, "learning_rate": 1.7895246170586673e-05, "loss": 0.3379, "step": 1084 }, { "epoch": 0.7, "learning_rate": 1.7890952999505956e-05, "loss": 0.3572, "step": 1085 }, { "epoch": 0.7, "learning_rate": 1.7886655970477438e-05, "loss": 0.3522, "step": 1086 }, { "epoch": 0.7, "learning_rate": 1.788235508560197e-05, "loss": 0.3428, "step": 1087 }, { "epoch": 0.7, "learning_rate": 1.787805034698229e-05, "loss": 0.3396, "step": 1088 }, { "epoch": 0.71, "learning_rate": 1.7873741756723016e-05, "loss": 0.351, "step": 1089 }, { "epoch": 0.71, "learning_rate": 1.7869429316930653e-05, "loss": 0.3777, "step": 1090 }, { "epoch": 0.71, "learning_rate": 1.7865113029713582e-05, "loss": 0.3492, "step": 1091 }, { "epoch": 0.71, "learning_rate": 1.7860792897182073e-05, "loss": 0.3417, "step": 1092 }, { "epoch": 0.71, "learning_rate": 1.7856468921448268e-05, "loss": 0.3354, "step": 1093 }, { "epoch": 0.71, "learning_rate": 1.7852141104626198e-05, "loss": 0.3502, "step": 1094 }, { "epoch": 0.71, "learning_rate": 1.7847809448831758e-05, "loss": 0.3403, "step": 1095 }, { "epoch": 0.71, "learning_rate": 1.7843473956182737e-05, "loss": 0.3624, "step": 1096 }, { "epoch": 0.71, "learning_rate": 1.7839134628798787e-05, "loss": 0.3527, "step": 1097 }, { "epoch": 0.71, "learning_rate": 1.7834791468801436e-05, "loss": 0.3527, "step": 1098 }, { "epoch": 0.71, "learning_rate": 1.783044447831409e-05, "loss": 0.3428, "step": 1099 }, { "epoch": 0.71, "learning_rate": 1.782609365946203e-05, "loss": 0.3627, "step": 1100 }, { "epoch": 0.71, "learning_rate": 1.78217390143724e-05, "loss": 0.3333, "step": 1101 }, { "epoch": 0.71, "learning_rate": 1.781738054517422e-05, "loss": 0.3526, "step": 1102 }, { "epoch": 0.71, "learning_rate": 1.781301825399838e-05, "loss": 0.3252, "step": 1103 }, { "epoch": 0.72, "learning_rate": 1.780865214297764e-05, "loss": 0.342, "step": 1104 }, { "epoch": 0.72, "learning_rate": 1.7804282214246626e-05, "loss": 0.3542, "step": 1105 }, { "epoch": 0.72, "learning_rate": 1.779990846994182e-05, "loss": 0.3264, "step": 1106 }, { "epoch": 0.72, "learning_rate": 1.7795530912201593e-05, "loss": 0.3597, "step": 1107 }, { "epoch": 0.72, "learning_rate": 1.779114954316616e-05, "loss": 0.3577, "step": 1108 }, { "epoch": 0.72, "learning_rate": 1.7786764364977605e-05, "loss": 0.3502, "step": 1109 }, { "epoch": 0.72, "learning_rate": 1.7782375379779876e-05, "loss": 0.3455, "step": 1110 }, { "epoch": 0.72, "learning_rate": 1.7777982589718784e-05, "loss": 0.3494, "step": 1111 }, { "epoch": 0.72, "learning_rate": 1.7773585996942e-05, "loss": 0.3677, "step": 1112 }, { "epoch": 0.72, "learning_rate": 1.7769185603599048e-05, "loss": 0.3471, "step": 1113 }, { "epoch": 0.72, "learning_rate": 1.776478141184131e-05, "loss": 0.334, "step": 1114 }, { "epoch": 0.72, "learning_rate": 1.7760373423822042e-05, "loss": 0.3172, "step": 1115 }, { "epoch": 0.72, "learning_rate": 1.7755961641696332e-05, "loss": 0.3279, "step": 1116 }, { "epoch": 0.72, "learning_rate": 1.7751546067621137e-05, "loss": 0.3485, "step": 1117 }, { "epoch": 0.72, "learning_rate": 1.7747126703755267e-05, "loss": 0.3336, "step": 1118 }, { "epoch": 0.72, "learning_rate": 1.7742703552259383e-05, "loss": 0.3336, "step": 1119 }, { "epoch": 0.73, "learning_rate": 1.7738276615296e-05, "loss": 0.3455, "step": 1120 }, { "epoch": 0.73, "learning_rate": 1.7733845895029476e-05, "loss": 0.3628, "step": 1121 }, { "epoch": 0.73, "learning_rate": 1.772941139362603e-05, "loss": 0.3331, "step": 1122 }, { "epoch": 0.73, "learning_rate": 1.772497311325372e-05, "loss": 0.3418, "step": 1123 }, { "epoch": 0.73, "learning_rate": 1.7720531056082454e-05, "loss": 0.3704, "step": 1124 }, { "epoch": 0.73, "learning_rate": 1.7716085224283994e-05, "loss": 0.3397, "step": 1125 }, { "epoch": 0.73, "learning_rate": 1.7711635620031937e-05, "loss": 0.3454, "step": 1126 }, { "epoch": 0.73, "learning_rate": 1.7707182245501732e-05, "loss": 0.3555, "step": 1127 }, { "epoch": 0.73, "learning_rate": 1.7702725102870665e-05, "loss": 0.3447, "step": 1128 }, { "epoch": 0.73, "learning_rate": 1.7698264194317867e-05, "loss": 0.3516, "step": 1129 }, { "epoch": 0.73, "learning_rate": 1.7693799522024315e-05, "loss": 0.343, "step": 1130 }, { "epoch": 0.73, "learning_rate": 1.7689331088172818e-05, "loss": 0.3755, "step": 1131 }, { "epoch": 0.73, "learning_rate": 1.7684858894948028e-05, "loss": 0.3475, "step": 1132 }, { "epoch": 0.73, "learning_rate": 1.7680382944536436e-05, "loss": 0.3415, "step": 1133 }, { "epoch": 0.73, "learning_rate": 1.7675903239126372e-05, "loss": 0.3299, "step": 1134 }, { "epoch": 0.74, "learning_rate": 1.7671419780907992e-05, "loss": 0.3343, "step": 1135 }, { "epoch": 0.74, "learning_rate": 1.7666932572073296e-05, "loss": 0.343, "step": 1136 }, { "epoch": 0.74, "learning_rate": 1.7662441614816115e-05, "loss": 0.3337, "step": 1137 }, { "epoch": 0.74, "learning_rate": 1.7657946911332114e-05, "loss": 0.3379, "step": 1138 }, { "epoch": 0.74, "learning_rate": 1.7653448463818786e-05, "loss": 0.3295, "step": 1139 }, { "epoch": 0.74, "learning_rate": 1.7648946274475455e-05, "loss": 0.3335, "step": 1140 }, { "epoch": 0.74, "learning_rate": 1.7644440345503285e-05, "loss": 0.3395, "step": 1141 }, { "epoch": 0.74, "learning_rate": 1.763993067910525e-05, "loss": 0.3403, "step": 1142 }, { "epoch": 0.74, "learning_rate": 1.7635417277486164e-05, "loss": 0.3491, "step": 1143 }, { "epoch": 0.74, "learning_rate": 1.7630900142852666e-05, "loss": 0.3335, "step": 1144 }, { "epoch": 0.74, "learning_rate": 1.762637927741321e-05, "loss": 0.3496, "step": 1145 }, { "epoch": 0.74, "learning_rate": 1.7621854683378093e-05, "loss": 0.369, "step": 1146 }, { "epoch": 0.74, "learning_rate": 1.7617326362959418e-05, "loss": 0.3362, "step": 1147 }, { "epoch": 0.74, "learning_rate": 1.7612794318371115e-05, "loss": 0.338, "step": 1148 }, { "epoch": 0.74, "learning_rate": 1.7608258551828942e-05, "loss": 0.3431, "step": 1149 }, { "epoch": 0.74, "learning_rate": 1.760371906555046e-05, "loss": 0.3417, "step": 1150 }, { "epoch": 0.75, "learning_rate": 1.759917586175507e-05, "loss": 0.3496, "step": 1151 }, { "epoch": 0.75, "learning_rate": 1.7594628942663966e-05, "loss": 0.3458, "step": 1152 }, { "epoch": 0.75, "learning_rate": 1.7590078310500184e-05, "loss": 0.3531, "step": 1153 }, { "epoch": 0.75, "learning_rate": 1.7585523967488557e-05, "loss": 0.3387, "step": 1154 }, { "epoch": 0.75, "learning_rate": 1.758096591585574e-05, "loss": 0.3491, "step": 1155 }, { "epoch": 0.75, "learning_rate": 1.75764041578302e-05, "loss": 0.3268, "step": 1156 }, { "epoch": 0.75, "learning_rate": 1.7571838695642217e-05, "loss": 0.33, "step": 1157 }, { "epoch": 0.75, "learning_rate": 1.7567269531523876e-05, "loss": 0.3391, "step": 1158 }, { "epoch": 0.75, "learning_rate": 1.7562696667709082e-05, "loss": 0.3581, "step": 1159 }, { "epoch": 0.75, "learning_rate": 1.755812010643354e-05, "loss": 0.3525, "step": 1160 }, { "epoch": 0.75, "learning_rate": 1.7553539849934768e-05, "loss": 0.3387, "step": 1161 }, { "epoch": 0.75, "learning_rate": 1.7548955900452087e-05, "loss": 0.3112, "step": 1162 }, { "epoch": 0.75, "learning_rate": 1.7544368260226624e-05, "loss": 0.3149, "step": 1163 }, { "epoch": 0.75, "learning_rate": 1.7539776931501318e-05, "loss": 0.3321, "step": 1164 }, { "epoch": 0.75, "learning_rate": 1.75351819165209e-05, "loss": 0.3363, "step": 1165 }, { "epoch": 0.76, "learning_rate": 1.7530583217531906e-05, "loss": 0.3454, "step": 1166 }, { "epoch": 0.76, "learning_rate": 1.7525980836782683e-05, "loss": 0.3536, "step": 1167 }, { "epoch": 0.76, "learning_rate": 1.7521374776523364e-05, "loss": 0.3516, "step": 1168 }, { "epoch": 0.76, "learning_rate": 1.751676503900589e-05, "loss": 0.3408, "step": 1169 }, { "epoch": 0.76, "learning_rate": 1.7512151626483997e-05, "loss": 0.3594, "step": 1170 }, { "epoch": 0.76, "learning_rate": 1.7507534541213217e-05, "loss": 0.3338, "step": 1171 }, { "epoch": 0.76, "learning_rate": 1.7502913785450877e-05, "loss": 0.3295, "step": 1172 }, { "epoch": 0.76, "learning_rate": 1.7498289361456106e-05, "loss": 0.3519, "step": 1173 }, { "epoch": 0.76, "learning_rate": 1.7493661271489816e-05, "loss": 0.356, "step": 1174 }, { "epoch": 0.76, "learning_rate": 1.7489029517814714e-05, "loss": 0.3362, "step": 1175 }, { "epoch": 0.76, "learning_rate": 1.7484394102695306e-05, "loss": 0.3514, "step": 1176 }, { "epoch": 0.76, "learning_rate": 1.747975502839788e-05, "loss": 0.3609, "step": 1177 }, { "epoch": 0.76, "learning_rate": 1.747511229719051e-05, "loss": 0.3313, "step": 1178 }, { "epoch": 0.76, "learning_rate": 1.7470465911343072e-05, "loss": 0.3541, "step": 1179 }, { "epoch": 0.76, "learning_rate": 1.7465815873127212e-05, "loss": 0.3663, "step": 1180 }, { "epoch": 0.76, "learning_rate": 1.7461162184816376e-05, "loss": 0.3449, "step": 1181 }, { "epoch": 0.77, "learning_rate": 1.7456504848685782e-05, "loss": 0.3295, "step": 1182 }, { "epoch": 0.77, "learning_rate": 1.745184386701244e-05, "loss": 0.3412, "step": 1183 }, { "epoch": 0.77, "learning_rate": 1.7447179242075146e-05, "loss": 0.3433, "step": 1184 }, { "epoch": 0.77, "learning_rate": 1.7442510976154462e-05, "loss": 0.3352, "step": 1185 }, { "epoch": 0.77, "learning_rate": 1.7437839071532743e-05, "loss": 0.3373, "step": 1186 }, { "epoch": 0.77, "learning_rate": 1.743316353049412e-05, "loss": 0.3431, "step": 1187 }, { "epoch": 0.77, "learning_rate": 1.7428484355324498e-05, "loss": 0.3435, "step": 1188 }, { "epoch": 0.77, "learning_rate": 1.7423801548311566e-05, "loss": 0.3259, "step": 1189 }, { "epoch": 0.77, "learning_rate": 1.7419115111744783e-05, "loss": 0.3256, "step": 1190 }, { "epoch": 0.77, "learning_rate": 1.7414425047915382e-05, "loss": 0.3621, "step": 1191 }, { "epoch": 0.77, "learning_rate": 1.740973135911637e-05, "loss": 0.3343, "step": 1192 }, { "epoch": 0.77, "learning_rate": 1.7405034047642537e-05, "loss": 0.3338, "step": 1193 }, { "epoch": 0.77, "learning_rate": 1.7400333115790424e-05, "loss": 0.3328, "step": 1194 }, { "epoch": 0.77, "learning_rate": 1.739562856585836e-05, "loss": 0.3562, "step": 1195 }, { "epoch": 0.77, "learning_rate": 1.7390920400146433e-05, "loss": 0.3451, "step": 1196 }, { "epoch": 0.78, "learning_rate": 1.7386208620956498e-05, "loss": 0.3242, "step": 1197 }, { "epoch": 0.78, "learning_rate": 1.7381493230592184e-05, "loss": 0.3425, "step": 1198 }, { "epoch": 0.78, "learning_rate": 1.7376774231358882e-05, "loss": 0.3415, "step": 1199 }, { "epoch": 0.78, "learning_rate": 1.7372051625563747e-05, "loss": 0.3597, "step": 1200 }, { "epoch": 0.78, "learning_rate": 1.73673254155157e-05, "loss": 0.3161, "step": 1201 }, { "epoch": 0.78, "learning_rate": 1.7362595603525414e-05, "loss": 0.3425, "step": 1202 }, { "epoch": 0.78, "learning_rate": 1.7357862191905338e-05, "loss": 0.3434, "step": 1203 }, { "epoch": 0.78, "learning_rate": 1.7353125182969667e-05, "loss": 0.3517, "step": 1204 }, { "epoch": 0.78, "learning_rate": 1.734838457903437e-05, "loss": 0.3366, "step": 1205 }, { "epoch": 0.78, "learning_rate": 1.7343640382417155e-05, "loss": 0.3507, "step": 1206 }, { "epoch": 0.78, "learning_rate": 1.7338892595437503e-05, "loss": 0.3617, "step": 1207 }, { "epoch": 0.78, "learning_rate": 1.7334141220416644e-05, "loss": 0.3412, "step": 1208 }, { "epoch": 0.78, "learning_rate": 1.7329386259677553e-05, "loss": 0.3459, "step": 1209 }, { "epoch": 0.78, "learning_rate": 1.732462771554498e-05, "loss": 0.3111, "step": 1210 }, { "epoch": 0.78, "learning_rate": 1.7319865590345404e-05, "loss": 0.3444, "step": 1211 }, { "epoch": 0.78, "learning_rate": 1.7315099886407067e-05, "loss": 0.3233, "step": 1212 }, { "epoch": 0.79, "learning_rate": 1.7310330606059963e-05, "loss": 0.3271, "step": 1213 }, { "epoch": 0.79, "learning_rate": 1.7305557751635826e-05, "loss": 0.3455, "step": 1214 }, { "epoch": 0.79, "learning_rate": 1.730078132546814e-05, "loss": 0.3425, "step": 1215 }, { "epoch": 0.79, "learning_rate": 1.729600132989214e-05, "loss": 0.3193, "step": 1216 }, { "epoch": 0.79, "learning_rate": 1.72912177672448e-05, "loss": 0.3524, "step": 1217 }, { "epoch": 0.79, "learning_rate": 1.7286430639864846e-05, "loss": 0.315, "step": 1218 }, { "epoch": 0.79, "learning_rate": 1.728163995009273e-05, "loss": 0.3386, "step": 1219 }, { "epoch": 0.79, "learning_rate": 1.7276845700270672e-05, "loss": 0.3669, "step": 1220 }, { "epoch": 0.79, "learning_rate": 1.7272047892742608e-05, "loss": 0.3429, "step": 1221 }, { "epoch": 0.79, "learning_rate": 1.7267246529854222e-05, "loss": 0.3467, "step": 1222 }, { "epoch": 0.79, "learning_rate": 1.7262441613952944e-05, "loss": 0.3302, "step": 1223 }, { "epoch": 0.79, "learning_rate": 1.725763314738793e-05, "loss": 0.3548, "step": 1224 }, { "epoch": 0.79, "learning_rate": 1.725282113251008e-05, "loss": 0.3451, "step": 1225 }, { "epoch": 0.79, "learning_rate": 1.724800557167202e-05, "loss": 0.317, "step": 1226 }, { "epoch": 0.79, "learning_rate": 1.724318646722812e-05, "loss": 0.3243, "step": 1227 }, { "epoch": 0.8, "learning_rate": 1.7238363821534477e-05, "loss": 0.3823, "step": 1228 }, { "epoch": 0.8, "learning_rate": 1.7233537636948913e-05, "loss": 0.3418, "step": 1229 }, { "epoch": 0.8, "learning_rate": 1.7228707915830992e-05, "loss": 0.3348, "step": 1230 }, { "epoch": 0.8, "learning_rate": 1.7223874660542007e-05, "loss": 0.3455, "step": 1231 }, { "epoch": 0.8, "learning_rate": 1.7219037873444963e-05, "loss": 0.3278, "step": 1232 }, { "epoch": 0.8, "learning_rate": 1.7214197556904607e-05, "loss": 0.3276, "step": 1233 }, { "epoch": 0.8, "learning_rate": 1.720935371328741e-05, "loss": 0.3545, "step": 1234 }, { "epoch": 0.8, "learning_rate": 1.720450634496156e-05, "loss": 0.3223, "step": 1235 }, { "epoch": 0.8, "learning_rate": 1.719965545429698e-05, "loss": 0.3387, "step": 1236 }, { "epoch": 0.8, "learning_rate": 1.7194801043665303e-05, "loss": 0.3301, "step": 1237 }, { "epoch": 0.8, "learning_rate": 1.7189943115439888e-05, "loss": 0.3387, "step": 1238 }, { "epoch": 0.8, "learning_rate": 1.718508167199581e-05, "loss": 0.3515, "step": 1239 }, { "epoch": 0.8, "learning_rate": 1.7180216715709873e-05, "loss": 0.3451, "step": 1240 }, { "epoch": 0.8, "learning_rate": 1.7175348248960584e-05, "loss": 0.3331, "step": 1241 }, { "epoch": 0.8, "learning_rate": 1.7170476274128182e-05, "loss": 0.3312, "step": 1242 }, { "epoch": 0.81, "learning_rate": 1.7165600793594608e-05, "loss": 0.3565, "step": 1243 }, { "epoch": 0.81, "learning_rate": 1.7160721809743525e-05, "loss": 0.3384, "step": 1244 }, { "epoch": 0.81, "learning_rate": 1.7155839324960303e-05, "loss": 0.322, "step": 1245 }, { "epoch": 0.81, "learning_rate": 1.7150953341632027e-05, "loss": 0.3506, "step": 1246 }, { "epoch": 0.81, "learning_rate": 1.7146063862147495e-05, "loss": 0.3102, "step": 1247 }, { "epoch": 0.81, "learning_rate": 1.7141170888897208e-05, "loss": 0.3633, "step": 1248 }, { "epoch": 0.81, "learning_rate": 1.713627442427338e-05, "loss": 0.323, "step": 1249 }, { "epoch": 0.81, "learning_rate": 1.7131374470669926e-05, "loss": 0.3362, "step": 1250 }, { "epoch": 0.81, "learning_rate": 1.712647103048248e-05, "loss": 0.3637, "step": 1251 }, { "epoch": 0.81, "learning_rate": 1.7121564106108363e-05, "loss": 0.3124, "step": 1252 }, { "epoch": 0.81, "learning_rate": 1.7116653699946614e-05, "loss": 0.323, "step": 1253 }, { "epoch": 0.81, "learning_rate": 1.711173981439796e-05, "loss": 0.3418, "step": 1254 }, { "epoch": 0.81, "learning_rate": 1.7106822451864854e-05, "loss": 0.3116, "step": 1255 }, { "epoch": 0.81, "learning_rate": 1.7101901614751416e-05, "loss": 0.3512, "step": 1256 }, { "epoch": 0.81, "learning_rate": 1.7096977305463486e-05, "loss": 0.3532, "step": 1257 }, { "epoch": 0.81, "learning_rate": 1.7092049526408604e-05, "loss": 0.3316, "step": 1258 }, { "epoch": 0.82, "learning_rate": 1.7087118279995987e-05, "loss": 0.3392, "step": 1259 }, { "epoch": 0.82, "learning_rate": 1.708218356863657e-05, "loss": 0.3159, "step": 1260 }, { "epoch": 0.82, "learning_rate": 1.7077245394742967e-05, "loss": 0.3279, "step": 1261 }, { "epoch": 0.82, "learning_rate": 1.707230376072949e-05, "loss": 0.3363, "step": 1262 }, { "epoch": 0.82, "learning_rate": 1.7067358669012138e-05, "loss": 0.323, "step": 1263 }, { "epoch": 0.82, "learning_rate": 1.7062410122008613e-05, "loss": 0.3628, "step": 1264 }, { "epoch": 0.82, "learning_rate": 1.705745812213829e-05, "loss": 0.3337, "step": 1265 }, { "epoch": 0.82, "learning_rate": 1.7052502671822243e-05, "loss": 0.3256, "step": 1266 }, { "epoch": 0.82, "learning_rate": 1.704754377348323e-05, "loss": 0.3394, "step": 1267 }, { "epoch": 0.82, "learning_rate": 1.7042581429545695e-05, "loss": 0.3656, "step": 1268 }, { "epoch": 0.82, "learning_rate": 1.7037615642435766e-05, "loss": 0.3561, "step": 1269 }, { "epoch": 0.82, "learning_rate": 1.7032646414581255e-05, "loss": 0.332, "step": 1270 }, { "epoch": 0.82, "learning_rate": 1.7027673748411653e-05, "loss": 0.3269, "step": 1271 }, { "epoch": 0.82, "learning_rate": 1.7022697646358137e-05, "loss": 0.3465, "step": 1272 }, { "epoch": 0.82, "learning_rate": 1.7017718110853564e-05, "loss": 0.3279, "step": 1273 }, { "epoch": 0.83, "learning_rate": 1.7012735144332462e-05, "loss": 0.3543, "step": 1274 }, { "epoch": 0.83, "learning_rate": 1.7007748749231045e-05, "loss": 0.3447, "step": 1275 }, { "epoch": 0.83, "learning_rate": 1.70027589279872e-05, "loss": 0.3246, "step": 1276 }, { "epoch": 0.83, "learning_rate": 1.6997765683040494e-05, "loss": 0.3237, "step": 1277 }, { "epoch": 0.83, "learning_rate": 1.699276901683215e-05, "loss": 0.3309, "step": 1278 }, { "epoch": 0.83, "learning_rate": 1.6987768931805088e-05, "loss": 0.3413, "step": 1279 }, { "epoch": 0.83, "learning_rate": 1.6982765430403885e-05, "loss": 0.3462, "step": 1280 }, { "epoch": 0.83, "learning_rate": 1.697775851507479e-05, "loss": 0.3565, "step": 1281 }, { "epoch": 0.83, "learning_rate": 1.6972748188265726e-05, "loss": 0.3238, "step": 1282 }, { "epoch": 0.83, "learning_rate": 1.6967734452426277e-05, "loss": 0.3307, "step": 1283 }, { "epoch": 0.83, "learning_rate": 1.69627173100077e-05, "loss": 0.3397, "step": 1284 }, { "epoch": 0.83, "learning_rate": 1.695769676346291e-05, "loss": 0.331, "step": 1285 }, { "epoch": 0.83, "learning_rate": 1.6952672815246492e-05, "loss": 0.3498, "step": 1286 }, { "epoch": 0.83, "learning_rate": 1.6947645467814697e-05, "loss": 0.3502, "step": 1287 }, { "epoch": 0.83, "learning_rate": 1.6942614723625435e-05, "loss": 0.3248, "step": 1288 }, { "epoch": 0.83, "learning_rate": 1.693758058513827e-05, "loss": 0.3195, "step": 1289 }, { "epoch": 0.84, "learning_rate": 1.693254305481443e-05, "loss": 0.3422, "step": 1290 }, { "epoch": 0.84, "learning_rate": 1.692750213511681e-05, "loss": 0.3393, "step": 1291 }, { "epoch": 0.84, "learning_rate": 1.692245782850995e-05, "loss": 0.3383, "step": 1292 }, { "epoch": 0.84, "learning_rate": 1.691741013746005e-05, "loss": 0.3764, "step": 1293 }, { "epoch": 0.84, "learning_rate": 1.6912359064434967e-05, "loss": 0.3569, "step": 1294 }, { "epoch": 0.84, "learning_rate": 1.6907304611904208e-05, "loss": 0.3365, "step": 1295 }, { "epoch": 0.84, "learning_rate": 1.6902246782338934e-05, "loss": 0.3479, "step": 1296 }, { "epoch": 0.84, "learning_rate": 1.689718557821196e-05, "loss": 0.3451, "step": 1297 }, { "epoch": 0.84, "learning_rate": 1.6892121001997742e-05, "loss": 0.3508, "step": 1298 }, { "epoch": 0.84, "learning_rate": 1.6887053056172394e-05, "loss": 0.3445, "step": 1299 }, { "epoch": 0.84, "learning_rate": 1.6881981743213674e-05, "loss": 0.3418, "step": 1300 }, { "epoch": 0.84, "learning_rate": 1.6876907065600984e-05, "loss": 0.3514, "step": 1301 }, { "epoch": 0.84, "learning_rate": 1.6871829025815375e-05, "loss": 0.3214, "step": 1302 }, { "epoch": 0.84, "learning_rate": 1.686674762633954e-05, "loss": 0.3585, "step": 1303 }, { "epoch": 0.84, "learning_rate": 1.6861662869657815e-05, "loss": 0.3567, "step": 1304 }, { "epoch": 0.85, "learning_rate": 1.6856574758256176e-05, "loss": 0.3293, "step": 1305 }, { "epoch": 0.85, "learning_rate": 1.6851483294622236e-05, "loss": 0.3086, "step": 1306 }, { "epoch": 0.85, "learning_rate": 1.6846388481245254e-05, "loss": 0.3282, "step": 1307 }, { "epoch": 0.85, "learning_rate": 1.6841290320616126e-05, "loss": 0.3334, "step": 1308 }, { "epoch": 0.85, "learning_rate": 1.6836188815227375e-05, "loss": 0.335, "step": 1309 }, { "epoch": 0.85, "learning_rate": 1.6831083967573175e-05, "loss": 0.3234, "step": 1310 }, { "epoch": 0.85, "learning_rate": 1.6825975780149324e-05, "loss": 0.3274, "step": 1311 }, { "epoch": 0.85, "learning_rate": 1.682086425545325e-05, "loss": 0.3127, "step": 1312 }, { "epoch": 0.85, "learning_rate": 1.6815749395984016e-05, "loss": 0.3373, "step": 1313 }, { "epoch": 0.85, "learning_rate": 1.681063120424232e-05, "loss": 0.3454, "step": 1314 }, { "epoch": 0.85, "learning_rate": 1.680550968273049e-05, "loss": 0.3213, "step": 1315 }, { "epoch": 0.85, "learning_rate": 1.6800384833952464e-05, "loss": 0.3358, "step": 1316 }, { "epoch": 0.85, "learning_rate": 1.679525666041383e-05, "loss": 0.3417, "step": 1317 }, { "epoch": 0.85, "learning_rate": 1.6790125164621787e-05, "loss": 0.3141, "step": 1318 }, { "epoch": 0.85, "learning_rate": 1.678499034908517e-05, "loss": 0.3242, "step": 1319 }, { "epoch": 0.85, "learning_rate": 1.6779852216314424e-05, "loss": 0.3424, "step": 1320 }, { "epoch": 0.86, "learning_rate": 1.677471076882162e-05, "loss": 0.352, "step": 1321 }, { "epoch": 0.86, "learning_rate": 1.6769566009120458e-05, "loss": 0.3378, "step": 1322 }, { "epoch": 0.86, "learning_rate": 1.6764417939726244e-05, "loss": 0.3652, "step": 1323 }, { "epoch": 0.86, "learning_rate": 1.6759266563155914e-05, "loss": 0.3112, "step": 1324 }, { "epoch": 0.86, "learning_rate": 1.675411188192801e-05, "loss": 0.3381, "step": 1325 }, { "epoch": 0.86, "learning_rate": 1.6748953898562705e-05, "loss": 0.3484, "step": 1326 }, { "epoch": 0.86, "learning_rate": 1.6743792615581766e-05, "loss": 0.3377, "step": 1327 }, { "epoch": 0.86, "learning_rate": 1.673862803550859e-05, "loss": 0.3352, "step": 1328 }, { "epoch": 0.86, "learning_rate": 1.673346016086818e-05, "loss": 0.2898, "step": 1329 }, { "epoch": 0.86, "learning_rate": 1.672828899418715e-05, "loss": 0.3286, "step": 1330 }, { "epoch": 0.86, "learning_rate": 1.672311453799372e-05, "loss": 0.3227, "step": 1331 }, { "epoch": 0.86, "learning_rate": 1.6717936794817723e-05, "loss": 0.3422, "step": 1332 }, { "epoch": 0.86, "learning_rate": 1.67127557671906e-05, "loss": 0.3385, "step": 1333 }, { "epoch": 0.86, "learning_rate": 1.6707571457645393e-05, "loss": 0.3385, "step": 1334 }, { "epoch": 0.86, "learning_rate": 1.670238386871675e-05, "loss": 0.3312, "step": 1335 }, { "epoch": 0.87, "learning_rate": 1.6697193002940926e-05, "loss": 0.3514, "step": 1336 }, { "epoch": 0.87, "learning_rate": 1.6691998862855777e-05, "loss": 0.3504, "step": 1337 }, { "epoch": 0.87, "learning_rate": 1.668680145100075e-05, "loss": 0.3353, "step": 1338 }, { "epoch": 0.87, "learning_rate": 1.668160076991691e-05, "loss": 0.3469, "step": 1339 }, { "epoch": 0.87, "learning_rate": 1.66763968221469e-05, "loss": 0.3312, "step": 1340 }, { "epoch": 0.87, "learning_rate": 1.6671189610234977e-05, "loss": 0.3357, "step": 1341 }, { "epoch": 0.87, "learning_rate": 1.666597913672699e-05, "loss": 0.3467, "step": 1342 }, { "epoch": 0.87, "learning_rate": 1.666076540417037e-05, "loss": 0.3513, "step": 1343 }, { "epoch": 0.87, "learning_rate": 1.6655548415114155e-05, "loss": 0.3372, "step": 1344 }, { "epoch": 0.87, "learning_rate": 1.665032817210898e-05, "loss": 0.3196, "step": 1345 }, { "epoch": 0.87, "learning_rate": 1.6645104677707048e-05, "loss": 0.3405, "step": 1346 }, { "epoch": 0.87, "learning_rate": 1.663987793446217e-05, "loss": 0.3267, "step": 1347 }, { "epoch": 0.87, "learning_rate": 1.663464794492975e-05, "loss": 0.3394, "step": 1348 }, { "epoch": 0.87, "learning_rate": 1.6629414711666757e-05, "loss": 0.3223, "step": 1349 }, { "epoch": 0.87, "learning_rate": 1.662417823723177e-05, "loss": 0.3342, "step": 1350 }, { "epoch": 0.88, "learning_rate": 1.661893852418494e-05, "loss": 0.3427, "step": 1351 }, { "epoch": 0.88, "learning_rate": 1.6613695575087994e-05, "loss": 0.3458, "step": 1352 }, { "epoch": 0.88, "learning_rate": 1.660844939250426e-05, "loss": 0.3361, "step": 1353 }, { "epoch": 0.88, "learning_rate": 1.6603199978998632e-05, "loss": 0.3301, "step": 1354 }, { "epoch": 0.88, "learning_rate": 1.6597947337137593e-05, "loss": 0.3612, "step": 1355 }, { "epoch": 0.88, "learning_rate": 1.6592691469489194e-05, "loss": 0.3132, "step": 1356 }, { "epoch": 0.88, "learning_rate": 1.6587432378623078e-05, "loss": 0.3533, "step": 1357 }, { "epoch": 0.88, "learning_rate": 1.6582170067110447e-05, "loss": 0.3236, "step": 1358 }, { "epoch": 0.88, "learning_rate": 1.657690453752409e-05, "loss": 0.3326, "step": 1359 }, { "epoch": 0.88, "learning_rate": 1.657163579243836e-05, "loss": 0.3628, "step": 1360 }, { "epoch": 0.88, "learning_rate": 1.65663638344292e-05, "loss": 0.3463, "step": 1361 }, { "epoch": 0.88, "learning_rate": 1.6561088666074096e-05, "loss": 0.3313, "step": 1362 }, { "epoch": 0.88, "learning_rate": 1.6555810289952123e-05, "loss": 0.3255, "step": 1363 }, { "epoch": 0.88, "learning_rate": 1.6550528708643922e-05, "loss": 0.3362, "step": 1364 }, { "epoch": 0.88, "learning_rate": 1.65452439247317e-05, "loss": 0.3112, "step": 1365 }, { "epoch": 0.88, "learning_rate": 1.6539955940799225e-05, "loss": 0.3306, "step": 1366 }, { "epoch": 0.89, "learning_rate": 1.6534664759431835e-05, "loss": 0.3363, "step": 1367 }, { "epoch": 0.89, "learning_rate": 1.652937038321643e-05, "loss": 0.3174, "step": 1368 }, { "epoch": 0.89, "learning_rate": 1.6524072814741473e-05, "loss": 0.3324, "step": 1369 }, { "epoch": 0.89, "learning_rate": 1.651877205659698e-05, "loss": 0.3527, "step": 1370 }, { "epoch": 0.89, "learning_rate": 1.651346811137454e-05, "loss": 0.363, "step": 1371 }, { "epoch": 0.89, "learning_rate": 1.6508160981667285e-05, "loss": 0.3444, "step": 1372 }, { "epoch": 0.89, "learning_rate": 1.6502850670069917e-05, "loss": 0.3377, "step": 1373 }, { "epoch": 0.89, "learning_rate": 1.6497537179178687e-05, "loss": 0.3144, "step": 1374 }, { "epoch": 0.89, "learning_rate": 1.6492220511591402e-05, "loss": 0.3404, "step": 1375 }, { "epoch": 0.89, "learning_rate": 1.648690066990742e-05, "loss": 0.3255, "step": 1376 }, { "epoch": 0.89, "learning_rate": 1.6481577656727655e-05, "loss": 0.307, "step": 1377 }, { "epoch": 0.89, "learning_rate": 1.647625147465457e-05, "loss": 0.3435, "step": 1378 }, { "epoch": 0.89, "learning_rate": 1.6470922126292174e-05, "loss": 0.3531, "step": 1379 }, { "epoch": 0.89, "learning_rate": 1.646558961424603e-05, "loss": 0.3243, "step": 1380 }, { "epoch": 0.89, "learning_rate": 1.6460253941123238e-05, "loss": 0.3372, "step": 1381 }, { "epoch": 0.9, "learning_rate": 1.645491510953245e-05, "loss": 0.3071, "step": 1382 }, { "epoch": 0.9, "learning_rate": 1.644957312208387e-05, "loss": 0.3191, "step": 1383 }, { "epoch": 0.9, "learning_rate": 1.644422798138923e-05, "loss": 0.346, "step": 1384 }, { "epoch": 0.9, "learning_rate": 1.6438879690061817e-05, "loss": 0.3429, "step": 1385 }, { "epoch": 0.9, "learning_rate": 1.6433528250716448e-05, "loss": 0.3349, "step": 1386 }, { "epoch": 0.9, "learning_rate": 1.6428173665969476e-05, "loss": 0.3397, "step": 1387 }, { "epoch": 0.9, "learning_rate": 1.642281593843881e-05, "loss": 0.3235, "step": 1388 }, { "epoch": 0.9, "learning_rate": 1.6417455070743877e-05, "loss": 0.3153, "step": 1389 }, { "epoch": 0.9, "learning_rate": 1.6412091065505652e-05, "loss": 0.3585, "step": 1390 }, { "epoch": 0.9, "learning_rate": 1.6406723925346635e-05, "loss": 0.338, "step": 1391 }, { "epoch": 0.9, "learning_rate": 1.6401353652890865e-05, "loss": 0.3587, "step": 1392 }, { "epoch": 0.9, "learning_rate": 1.6395980250763903e-05, "loss": 0.3478, "step": 1393 }, { "epoch": 0.9, "learning_rate": 1.6390603721592853e-05, "loss": 0.3344, "step": 1394 }, { "epoch": 0.9, "learning_rate": 1.6385224068006343e-05, "loss": 0.3373, "step": 1395 }, { "epoch": 0.9, "learning_rate": 1.6379841292634522e-05, "loss": 0.3428, "step": 1396 }, { "epoch": 0.9, "learning_rate": 1.6374455398109074e-05, "loss": 0.3599, "step": 1397 }, { "epoch": 0.91, "learning_rate": 1.63690663870632e-05, "loss": 0.3369, "step": 1398 }, { "epoch": 0.91, "learning_rate": 1.6363674262131635e-05, "loss": 0.3405, "step": 1399 }, { "epoch": 0.91, "learning_rate": 1.6358279025950628e-05, "loss": 0.33, "step": 1400 }, { "epoch": 0.91, "learning_rate": 1.635288068115795e-05, "loss": 0.3235, "step": 1401 }, { "epoch": 0.91, "learning_rate": 1.6347479230392892e-05, "loss": 0.3134, "step": 1402 }, { "epoch": 0.91, "learning_rate": 1.634207467629627e-05, "loss": 0.3106, "step": 1403 }, { "epoch": 0.91, "learning_rate": 1.633666702151041e-05, "loss": 0.3098, "step": 1404 }, { "epoch": 0.91, "learning_rate": 1.6331256268679156e-05, "loss": 0.3151, "step": 1405 }, { "epoch": 0.91, "learning_rate": 1.6325842420447867e-05, "loss": 0.3355, "step": 1406 }, { "epoch": 0.91, "learning_rate": 1.6320425479463412e-05, "loss": 0.3403, "step": 1407 }, { "epoch": 0.91, "learning_rate": 1.6315005448374183e-05, "loss": 0.3199, "step": 1408 }, { "epoch": 0.91, "learning_rate": 1.6309582329830064e-05, "loss": 0.3243, "step": 1409 }, { "epoch": 0.91, "learning_rate": 1.630415612648247e-05, "loss": 0.3309, "step": 1410 }, { "epoch": 0.91, "learning_rate": 1.629872684098431e-05, "loss": 0.3606, "step": 1411 }, { "epoch": 0.91, "learning_rate": 1.629329447599e-05, "loss": 0.3694, "step": 1412 }, { "epoch": 0.92, "learning_rate": 1.628785903415547e-05, "loss": 0.3478, "step": 1413 }, { "epoch": 0.92, "learning_rate": 1.6282420518138145e-05, "loss": 0.3412, "step": 1414 }, { "epoch": 0.92, "learning_rate": 1.627697893059696e-05, "loss": 0.331, "step": 1415 }, { "epoch": 0.92, "learning_rate": 1.6271534274192352e-05, "loss": 0.32, "step": 1416 }, { "epoch": 0.92, "learning_rate": 1.6266086551586248e-05, "loss": 0.3317, "step": 1417 }, { "epoch": 0.92, "learning_rate": 1.6260635765442093e-05, "loss": 0.3341, "step": 1418 }, { "epoch": 0.92, "learning_rate": 1.6255181918424805e-05, "loss": 0.3405, "step": 1419 }, { "epoch": 0.92, "learning_rate": 1.6249725013200822e-05, "loss": 0.3623, "step": 1420 }, { "epoch": 0.92, "learning_rate": 1.6244265052438062e-05, "loss": 0.317, "step": 1421 }, { "epoch": 0.92, "learning_rate": 1.623880203880594e-05, "loss": 0.3226, "step": 1422 }, { "epoch": 0.92, "learning_rate": 1.6233335974975375e-05, "loss": 0.321, "step": 1423 }, { "epoch": 0.92, "learning_rate": 1.6227866863618758e-05, "loss": 0.3379, "step": 1424 }, { "epoch": 0.92, "learning_rate": 1.6222394707409984e-05, "loss": 0.3337, "step": 1425 }, { "epoch": 0.92, "learning_rate": 1.6216919509024435e-05, "loss": 0.3282, "step": 1426 }, { "epoch": 0.92, "learning_rate": 1.621144127113897e-05, "loss": 0.3205, "step": 1427 }, { "epoch": 0.92, "learning_rate": 1.620595999643195e-05, "loss": 0.31, "step": 1428 }, { "epoch": 0.93, "learning_rate": 1.6200475687583206e-05, "loss": 0.3366, "step": 1429 }, { "epoch": 0.93, "learning_rate": 1.6194988347274066e-05, "loss": 0.3394, "step": 1430 }, { "epoch": 0.93, "learning_rate": 1.6189497978187322e-05, "loss": 0.3319, "step": 1431 }, { "epoch": 0.93, "learning_rate": 1.6184004583007272e-05, "loss": 0.3144, "step": 1432 }, { "epoch": 0.93, "learning_rate": 1.617850816441967e-05, "loss": 0.3118, "step": 1433 }, { "epoch": 0.93, "learning_rate": 1.6173008725111758e-05, "loss": 0.3235, "step": 1434 }, { "epoch": 0.93, "learning_rate": 1.616750626777225e-05, "loss": 0.3519, "step": 1435 }, { "epoch": 0.93, "learning_rate": 1.6162000795091354e-05, "loss": 0.3293, "step": 1436 }, { "epoch": 0.93, "learning_rate": 1.6156492309760726e-05, "loss": 0.3363, "step": 1437 }, { "epoch": 0.93, "learning_rate": 1.615098081447351e-05, "loss": 0.3019, "step": 1438 }, { "epoch": 0.93, "learning_rate": 1.6145466311924318e-05, "loss": 0.3529, "step": 1439 }, { "epoch": 0.93, "learning_rate": 1.6139948804809235e-05, "loss": 0.3287, "step": 1440 }, { "epoch": 0.93, "learning_rate": 1.6134428295825812e-05, "loss": 0.3183, "step": 1441 }, { "epoch": 0.93, "learning_rate": 1.612890478767307e-05, "loss": 0.3072, "step": 1442 }, { "epoch": 0.93, "learning_rate": 1.612337828305149e-05, "loss": 0.3233, "step": 1443 }, { "epoch": 0.94, "learning_rate": 1.6117848784663026e-05, "loss": 0.3301, "step": 1444 }, { "epoch": 0.94, "learning_rate": 1.6112316295211093e-05, "loss": 0.3262, "step": 1445 }, { "epoch": 0.94, "learning_rate": 1.610678081740057e-05, "loss": 0.3345, "step": 1446 }, { "epoch": 0.94, "learning_rate": 1.6101242353937785e-05, "loss": 0.3205, "step": 1447 }, { "epoch": 0.94, "learning_rate": 1.6095700907530543e-05, "loss": 0.3177, "step": 1448 }, { "epoch": 0.94, "learning_rate": 1.6090156480888102e-05, "loss": 0.3586, "step": 1449 }, { "epoch": 0.94, "learning_rate": 1.608460907672117e-05, "loss": 0.3258, "step": 1450 }, { "epoch": 0.94, "learning_rate": 1.607905869774192e-05, "loss": 0.3122, "step": 1451 }, { "epoch": 0.94, "learning_rate": 1.6073505346663973e-05, "loss": 0.3291, "step": 1452 }, { "epoch": 0.94, "learning_rate": 1.6067949026202405e-05, "loss": 0.293, "step": 1453 }, { "epoch": 0.94, "learning_rate": 1.6062389739073743e-05, "loss": 0.3172, "step": 1454 }, { "epoch": 0.94, "learning_rate": 1.6056827487995965e-05, "loss": 0.325, "step": 1455 }, { "epoch": 0.94, "learning_rate": 1.6051262275688502e-05, "loss": 0.3163, "step": 1456 }, { "epoch": 0.94, "learning_rate": 1.6045694104872228e-05, "loss": 0.3123, "step": 1457 }, { "epoch": 0.94, "learning_rate": 1.604012297826946e-05, "loss": 0.3171, "step": 1458 }, { "epoch": 0.94, "learning_rate": 1.6034548898603967e-05, "loss": 0.3367, "step": 1459 }, { "epoch": 0.95, "learning_rate": 1.6028971868600967e-05, "loss": 0.3429, "step": 1460 }, { "epoch": 0.95, "learning_rate": 1.6023391890987105e-05, "loss": 0.3448, "step": 1461 }, { "epoch": 0.95, "learning_rate": 1.6017808968490475e-05, "loss": 0.3429, "step": 1462 }, { "epoch": 0.95, "learning_rate": 1.601222310384061e-05, "loss": 0.3279, "step": 1463 }, { "epoch": 0.95, "learning_rate": 1.600663429976849e-05, "loss": 0.3274, "step": 1464 }, { "epoch": 0.95, "learning_rate": 1.600104255900652e-05, "loss": 0.3262, "step": 1465 }, { "epoch": 0.95, "learning_rate": 1.5995447884288544e-05, "loss": 0.3282, "step": 1466 }, { "epoch": 0.95, "learning_rate": 1.598985027834984e-05, "loss": 0.3142, "step": 1467 }, { "epoch": 0.95, "learning_rate": 1.5984249743927126e-05, "loss": 0.3137, "step": 1468 }, { "epoch": 0.95, "learning_rate": 1.5978646283758547e-05, "loss": 0.3087, "step": 1469 }, { "epoch": 0.95, "learning_rate": 1.597303990058367e-05, "loss": 0.3257, "step": 1470 }, { "epoch": 0.95, "learning_rate": 1.5967430597143504e-05, "loss": 0.3193, "step": 1471 }, { "epoch": 0.95, "learning_rate": 1.596181837618048e-05, "loss": 0.3261, "step": 1472 }, { "epoch": 0.95, "learning_rate": 1.595620324043846e-05, "loss": 0.351, "step": 1473 }, { "epoch": 0.95, "learning_rate": 1.595058519266272e-05, "loss": 0.3312, "step": 1474 }, { "epoch": 0.96, "learning_rate": 1.5944964235599974e-05, "loss": 0.3472, "step": 1475 }, { "epoch": 0.96, "learning_rate": 1.5939340371998345e-05, "loss": 0.3186, "step": 1476 }, { "epoch": 0.96, "learning_rate": 1.5933713604607386e-05, "loss": 0.3603, "step": 1477 }, { "epoch": 0.96, "learning_rate": 1.592808393617807e-05, "loss": 0.34, "step": 1478 }, { "epoch": 0.96, "learning_rate": 1.5922451369462783e-05, "loss": 0.3044, "step": 1479 }, { "epoch": 0.96, "learning_rate": 1.5916815907215327e-05, "loss": 0.3289, "step": 1480 }, { "epoch": 0.96, "learning_rate": 1.5911177552190924e-05, "loss": 0.3308, "step": 1481 }, { "epoch": 0.96, "learning_rate": 1.5905536307146216e-05, "loss": 0.3436, "step": 1482 }, { "epoch": 0.96, "learning_rate": 1.589989217483924e-05, "loss": 0.3008, "step": 1483 }, { "epoch": 0.96, "learning_rate": 1.5894245158029464e-05, "loss": 0.3284, "step": 1484 }, { "epoch": 0.96, "learning_rate": 1.588859525947776e-05, "loss": 0.342, "step": 1485 }, { "epoch": 0.96, "learning_rate": 1.58829424819464e-05, "loss": 0.3134, "step": 1486 }, { "epoch": 0.96, "learning_rate": 1.5877286828199078e-05, "loss": 0.3238, "step": 1487 }, { "epoch": 0.96, "learning_rate": 1.5871628301000875e-05, "loss": 0.3323, "step": 1488 }, { "epoch": 0.96, "learning_rate": 1.5865966903118303e-05, "loss": 0.3432, "step": 1489 }, { "epoch": 0.97, "learning_rate": 1.5860302637319255e-05, "loss": 0.3269, "step": 1490 }, { "epoch": 0.97, "learning_rate": 1.585463550637303e-05, "loss": 0.3224, "step": 1491 }, { "epoch": 0.97, "learning_rate": 1.5848965513050344e-05, "loss": 0.3259, "step": 1492 }, { "epoch": 0.97, "learning_rate": 1.5843292660123292e-05, "loss": 0.3162, "step": 1493 }, { "epoch": 0.97, "learning_rate": 1.583761695036538e-05, "loss": 0.308, "step": 1494 }, { "epoch": 0.97, "learning_rate": 1.58319383865515e-05, "loss": 0.3345, "step": 1495 }, { "epoch": 0.97, "learning_rate": 1.5826256971457948e-05, "loss": 0.3252, "step": 1496 }, { "epoch": 0.97, "learning_rate": 1.5820572707862418e-05, "loss": 0.3363, "step": 1497 }, { "epoch": 0.97, "learning_rate": 1.581488559854398e-05, "loss": 0.3272, "step": 1498 }, { "epoch": 0.97, "learning_rate": 1.5809195646283114e-05, "loss": 0.329, "step": 1499 }, { "epoch": 0.97, "learning_rate": 1.5803502853861677e-05, "loss": 0.3122, "step": 1500 }, { "epoch": 0.97, "learning_rate": 1.5797807224062927e-05, "loss": 0.3314, "step": 1501 }, { "epoch": 0.97, "learning_rate": 1.579210875967149e-05, "loss": 0.324, "step": 1502 }, { "epoch": 0.97, "learning_rate": 1.5786407463473394e-05, "loss": 0.3359, "step": 1503 }, { "epoch": 0.97, "learning_rate": 1.5780703338256046e-05, "loss": 0.2872, "step": 1504 }, { "epoch": 0.97, "learning_rate": 1.577499638680824e-05, "loss": 0.3366, "step": 1505 }, { "epoch": 0.98, "learning_rate": 1.576928661192015e-05, "loss": 0.3233, "step": 1506 }, { "epoch": 0.98, "learning_rate": 1.576357401638332e-05, "loss": 0.3239, "step": 1507 }, { "epoch": 0.98, "learning_rate": 1.575785860299069e-05, "loss": 0.3088, "step": 1508 }, { "epoch": 0.98, "learning_rate": 1.5752140374536564e-05, "loss": 0.3494, "step": 1509 }, { "epoch": 0.98, "learning_rate": 1.5746419333816634e-05, "loss": 0.3174, "step": 1510 }, { "epoch": 0.98, "learning_rate": 1.574069548362796e-05, "loss": 0.3344, "step": 1511 }, { "epoch": 0.98, "learning_rate": 1.573496882676897e-05, "loss": 0.3269, "step": 1512 }, { "epoch": 0.98, "learning_rate": 1.5729239366039476e-05, "loss": 0.3244, "step": 1513 }, { "epoch": 0.98, "learning_rate": 1.5723507104240657e-05, "loss": 0.3489, "step": 1514 }, { "epoch": 0.98, "learning_rate": 1.5717772044175056e-05, "loss": 0.3044, "step": 1515 }, { "epoch": 0.98, "learning_rate": 1.5712034188646595e-05, "loss": 0.3133, "step": 1516 }, { "epoch": 0.98, "learning_rate": 1.5706293540460546e-05, "loss": 0.3381, "step": 1517 }, { "epoch": 0.98, "learning_rate": 1.5700550102423563e-05, "loss": 0.3407, "step": 1518 }, { "epoch": 0.98, "learning_rate": 1.5694803877343658e-05, "loss": 0.3105, "step": 1519 }, { "epoch": 0.98, "learning_rate": 1.5689054868030195e-05, "loss": 0.3389, "step": 1520 }, { "epoch": 0.99, "learning_rate": 1.5683303077293922e-05, "loss": 0.3155, "step": 1521 }, { "epoch": 0.99, "learning_rate": 1.5677548507946924e-05, "loss": 0.3454, "step": 1522 }, { "epoch": 0.99, "learning_rate": 1.567179116280266e-05, "loss": 0.3567, "step": 1523 }, { "epoch": 0.99, "learning_rate": 1.5666031044675937e-05, "loss": 0.3398, "step": 1524 }, { "epoch": 0.99, "learning_rate": 1.5660268156382925e-05, "loss": 0.3177, "step": 1525 }, { "epoch": 0.99, "learning_rate": 1.565450250074114e-05, "loss": 0.3314, "step": 1526 }, { "epoch": 0.99, "learning_rate": 1.5648734080569463e-05, "loss": 0.3319, "step": 1527 }, { "epoch": 0.99, "learning_rate": 1.5642962898688108e-05, "loss": 0.342, "step": 1528 }, { "epoch": 0.99, "learning_rate": 1.5637188957918664e-05, "loss": 0.3195, "step": 1529 }, { "epoch": 0.99, "learning_rate": 1.563141226108404e-05, "loss": 0.3184, "step": 1530 }, { "epoch": 0.99, "learning_rate": 1.562563281100852e-05, "loss": 0.3555, "step": 1531 }, { "epoch": 0.99, "learning_rate": 1.5619850610517715e-05, "loss": 0.3299, "step": 1532 }, { "epoch": 0.99, "learning_rate": 1.561406566243859e-05, "loss": 0.3276, "step": 1533 }, { "epoch": 0.99, "learning_rate": 1.560827796959945e-05, "loss": 0.3064, "step": 1534 }, { "epoch": 0.99, "learning_rate": 1.5602487534829945e-05, "loss": 0.3453, "step": 1535 }, { "epoch": 0.99, "learning_rate": 1.5596694360961057e-05, "loss": 0.3251, "step": 1536 }, { "epoch": 1.0, "learning_rate": 1.559089845082512e-05, "loss": 0.3432, "step": 1537 }, { "epoch": 1.0, "learning_rate": 1.5585099807255794e-05, "loss": 0.3091, "step": 1538 }, { "epoch": 1.0, "learning_rate": 1.5579298433088084e-05, "loss": 0.3264, "step": 1539 }, { "epoch": 1.0, "learning_rate": 1.5573494331158328e-05, "loss": 0.3201, "step": 1540 }, { "epoch": 1.0, "learning_rate": 1.5567687504304197e-05, "loss": 0.3244, "step": 1541 }, { "epoch": 1.0, "learning_rate": 1.5561877955364685e-05, "loss": 0.3361, "step": 1542 }, { "epoch": 1.0, "learning_rate": 1.5556065687180133e-05, "loss": 0.3227, "step": 1543 }, { "epoch": 1.0, "learning_rate": 1.55502507025922e-05, "loss": 0.3217, "step": 1544 }, { "epoch": 1.0, "learning_rate": 1.5544433004443877e-05, "loss": 0.2691, "step": 1545 }, { "epoch": 1.0, "learning_rate": 1.5538612595579484e-05, "loss": 0.2883, "step": 1546 }, { "epoch": 1.0, "learning_rate": 1.5532789478844667e-05, "loss": 0.2628, "step": 1547 }, { "epoch": 1.0, "learning_rate": 1.5526963657086386e-05, "loss": 0.2846, "step": 1548 }, { "epoch": 1.0, "learning_rate": 1.552113513315293e-05, "loss": 0.2789, "step": 1549 }, { "epoch": 1.0, "learning_rate": 1.5515303909893914e-05, "loss": 0.2653, "step": 1550 }, { "epoch": 1.0, "learning_rate": 1.5509469990160265e-05, "loss": 0.2814, "step": 1551 }, { "epoch": 1.01, "learning_rate": 1.5503633376804235e-05, "loss": 0.287, "step": 1552 }, { "epoch": 1.01, "learning_rate": 1.5497794072679384e-05, "loss": 0.2617, "step": 1553 }, { "epoch": 1.01, "learning_rate": 1.5491952080640595e-05, "loss": 0.2715, "step": 1554 }, { "epoch": 1.01, "learning_rate": 1.5486107403544067e-05, "loss": 0.2764, "step": 1555 }, { "epoch": 1.01, "learning_rate": 1.5480260044247302e-05, "loss": 0.2652, "step": 1556 }, { "epoch": 1.01, "learning_rate": 1.547441000560912e-05, "loss": 0.26, "step": 1557 }, { "epoch": 1.01, "learning_rate": 1.5468557290489653e-05, "loss": 0.2668, "step": 1558 }, { "epoch": 1.01, "learning_rate": 1.5462701901750333e-05, "loss": 0.2728, "step": 1559 }, { "epoch": 1.01, "learning_rate": 1.545684384225391e-05, "loss": 0.2678, "step": 1560 }, { "epoch": 1.01, "learning_rate": 1.545098311486444e-05, "loss": 0.2751, "step": 1561 }, { "epoch": 1.01, "learning_rate": 1.5445119722447263e-05, "loss": 0.2677, "step": 1562 }, { "epoch": 1.01, "learning_rate": 1.5439253667869048e-05, "loss": 0.2821, "step": 1563 }, { "epoch": 1.01, "learning_rate": 1.543338495399775e-05, "loss": 0.2852, "step": 1564 }, { "epoch": 1.01, "learning_rate": 1.5427513583702628e-05, "loss": 0.2748, "step": 1565 }, { "epoch": 1.01, "learning_rate": 1.542163955985424e-05, "loss": 0.2948, "step": 1566 }, { "epoch": 1.01, "learning_rate": 1.541576288532445e-05, "loss": 0.2745, "step": 1567 }, { "epoch": 1.02, "learning_rate": 1.5409883562986398e-05, "loss": 0.2645, "step": 1568 }, { "epoch": 1.02, "learning_rate": 1.5404001595714534e-05, "loss": 0.2567, "step": 1569 }, { "epoch": 1.02, "learning_rate": 1.5398116986384603e-05, "loss": 0.2787, "step": 1570 }, { "epoch": 1.02, "learning_rate": 1.5392229737873627e-05, "loss": 0.2623, "step": 1571 }, { "epoch": 1.02, "learning_rate": 1.5386339853059934e-05, "loss": 0.2676, "step": 1572 }, { "epoch": 1.02, "learning_rate": 1.538044733482313e-05, "loss": 0.2664, "step": 1573 }, { "epoch": 1.02, "learning_rate": 1.537455218604411e-05, "loss": 0.2765, "step": 1574 }, { "epoch": 1.02, "learning_rate": 1.5368654409605067e-05, "loss": 0.2658, "step": 1575 }, { "epoch": 1.02, "learning_rate": 1.5362754008389464e-05, "loss": 0.2729, "step": 1576 }, { "epoch": 1.02, "learning_rate": 1.5356850985282048e-05, "loss": 0.2768, "step": 1577 }, { "epoch": 1.02, "learning_rate": 1.535094534316886e-05, "loss": 0.2837, "step": 1578 }, { "epoch": 1.02, "learning_rate": 1.5345037084937212e-05, "loss": 0.2666, "step": 1579 }, { "epoch": 1.02, "learning_rate": 1.5339126213475696e-05, "loss": 0.2807, "step": 1580 }, { "epoch": 1.02, "learning_rate": 1.5333212731674182e-05, "loss": 0.2591, "step": 1581 }, { "epoch": 1.02, "learning_rate": 1.5327296642423818e-05, "loss": 0.2861, "step": 1582 }, { "epoch": 1.03, "learning_rate": 1.532137794861703e-05, "loss": 0.2752, "step": 1583 }, { "epoch": 1.03, "learning_rate": 1.53154566531475e-05, "loss": 0.2549, "step": 1584 }, { "epoch": 1.03, "learning_rate": 1.530953275891021e-05, "loss": 0.2645, "step": 1585 }, { "epoch": 1.03, "learning_rate": 1.5303606268801386e-05, "loss": 0.2754, "step": 1586 }, { "epoch": 1.03, "learning_rate": 1.529767718571854e-05, "loss": 0.2665, "step": 1587 }, { "epoch": 1.03, "learning_rate": 1.529174551256045e-05, "loss": 0.2608, "step": 1588 }, { "epoch": 1.03, "learning_rate": 1.5285811252227156e-05, "loss": 0.2706, "step": 1589 }, { "epoch": 1.03, "learning_rate": 1.527987440761996e-05, "loss": 0.2854, "step": 1590 }, { "epoch": 1.03, "learning_rate": 1.5273934981641427e-05, "loss": 0.2686, "step": 1591 }, { "epoch": 1.03, "learning_rate": 1.5267992977195394e-05, "loss": 0.2742, "step": 1592 }, { "epoch": 1.03, "learning_rate": 1.5262048397186954e-05, "loss": 0.2569, "step": 1593 }, { "epoch": 1.03, "learning_rate": 1.5256101244522456e-05, "loss": 0.2768, "step": 1594 }, { "epoch": 1.03, "learning_rate": 1.5250151522109509e-05, "loss": 0.2561, "step": 1595 }, { "epoch": 1.03, "learning_rate": 1.5244199232856978e-05, "loss": 0.2729, "step": 1596 }, { "epoch": 1.03, "learning_rate": 1.5238244379674978e-05, "loss": 0.2818, "step": 1597 }, { "epoch": 1.03, "learning_rate": 1.5232286965474892e-05, "loss": 0.2694, "step": 1598 }, { "epoch": 1.04, "learning_rate": 1.5226326993169342e-05, "loss": 0.2617, "step": 1599 }, { "epoch": 1.04, "learning_rate": 1.5220364465672205e-05, "loss": 0.2729, "step": 1600 }, { "epoch": 1.04, "learning_rate": 1.5214399385898604e-05, "loss": 0.2664, "step": 1601 }, { "epoch": 1.04, "learning_rate": 1.5208431756764913e-05, "loss": 0.2895, "step": 1602 }, { "epoch": 1.04, "learning_rate": 1.5202461581188757e-05, "loss": 0.2655, "step": 1603 }, { "epoch": 1.04, "learning_rate": 1.5196488862088993e-05, "loss": 0.2732, "step": 1604 }, { "epoch": 1.04, "learning_rate": 1.5190513602385734e-05, "loss": 0.265, "step": 1605 }, { "epoch": 1.04, "learning_rate": 1.518453580500033e-05, "loss": 0.2641, "step": 1606 }, { "epoch": 1.04, "learning_rate": 1.5178555472855375e-05, "loss": 0.2627, "step": 1607 }, { "epoch": 1.04, "learning_rate": 1.5172572608874694e-05, "loss": 0.2579, "step": 1608 }, { "epoch": 1.04, "learning_rate": 1.516658721598336e-05, "loss": 0.2696, "step": 1609 }, { "epoch": 1.04, "learning_rate": 1.5160599297107674e-05, "loss": 0.2635, "step": 1610 }, { "epoch": 1.04, "learning_rate": 1.515460885517518e-05, "loss": 0.2613, "step": 1611 }, { "epoch": 1.04, "learning_rate": 1.5148615893114649e-05, "loss": 0.2726, "step": 1612 }, { "epoch": 1.04, "learning_rate": 1.5142620413856084e-05, "loss": 0.2844, "step": 1613 }, { "epoch": 1.05, "learning_rate": 1.5136622420330724e-05, "loss": 0.2767, "step": 1614 }, { "epoch": 1.05, "learning_rate": 1.5130621915471034e-05, "loss": 0.2565, "step": 1615 }, { "epoch": 1.05, "learning_rate": 1.5124618902210708e-05, "loss": 0.2877, "step": 1616 }, { "epoch": 1.05, "learning_rate": 1.5118613383484663e-05, "loss": 0.2727, "step": 1617 }, { "epoch": 1.05, "learning_rate": 1.5112605362229044e-05, "loss": 0.2652, "step": 1618 }, { "epoch": 1.05, "learning_rate": 1.5106594841381222e-05, "loss": 0.2514, "step": 1619 }, { "epoch": 1.05, "learning_rate": 1.5100581823879785e-05, "loss": 0.2706, "step": 1620 }, { "epoch": 1.05, "learning_rate": 1.509456631266454e-05, "loss": 0.2621, "step": 1621 }, { "epoch": 1.05, "learning_rate": 1.5088548310676522e-05, "loss": 0.2619, "step": 1622 }, { "epoch": 1.05, "learning_rate": 1.5082527820857978e-05, "loss": 0.2804, "step": 1623 }, { "epoch": 1.05, "learning_rate": 1.5076504846152368e-05, "loss": 0.263, "step": 1624 }, { "epoch": 1.05, "learning_rate": 1.5070479389504377e-05, "loss": 0.261, "step": 1625 }, { "epoch": 1.05, "learning_rate": 1.5064451453859895e-05, "loss": 0.265, "step": 1626 }, { "epoch": 1.05, "learning_rate": 1.5058421042166019e-05, "loss": 0.2685, "step": 1627 }, { "epoch": 1.05, "learning_rate": 1.5052388157371072e-05, "loss": 0.2758, "step": 1628 }, { "epoch": 1.06, "learning_rate": 1.5046352802424578e-05, "loss": 0.2599, "step": 1629 }, { "epoch": 1.06, "learning_rate": 1.5040314980277265e-05, "loss": 0.2582, "step": 1630 }, { "epoch": 1.06, "learning_rate": 1.503427469388107e-05, "loss": 0.2717, "step": 1631 }, { "epoch": 1.06, "learning_rate": 1.5028231946189142e-05, "loss": 0.2647, "step": 1632 }, { "epoch": 1.06, "learning_rate": 1.5022186740155819e-05, "loss": 0.255, "step": 1633 }, { "epoch": 1.06, "learning_rate": 1.5016139078736656e-05, "loss": 0.2662, "step": 1634 }, { "epoch": 1.06, "learning_rate": 1.5010088964888394e-05, "loss": 0.2679, "step": 1635 }, { "epoch": 1.06, "learning_rate": 1.5004036401568987e-05, "loss": 0.2633, "step": 1636 }, { "epoch": 1.06, "learning_rate": 1.4997981391737577e-05, "loss": 0.2626, "step": 1637 }, { "epoch": 1.06, "learning_rate": 1.4991923938354504e-05, "loss": 0.2729, "step": 1638 }, { "epoch": 1.06, "learning_rate": 1.4985864044381306e-05, "loss": 0.2708, "step": 1639 }, { "epoch": 1.06, "learning_rate": 1.4979801712780713e-05, "loss": 0.2695, "step": 1640 }, { "epoch": 1.06, "learning_rate": 1.4973736946516639e-05, "loss": 0.2674, "step": 1641 }, { "epoch": 1.06, "learning_rate": 1.49676697485542e-05, "loss": 0.2806, "step": 1642 }, { "epoch": 1.06, "learning_rate": 1.4961600121859699e-05, "loss": 0.2747, "step": 1643 }, { "epoch": 1.06, "learning_rate": 1.4955528069400617e-05, "loss": 0.2427, "step": 1644 }, { "epoch": 1.07, "learning_rate": 1.4949453594145631e-05, "loss": 0.2755, "step": 1645 }, { "epoch": 1.07, "learning_rate": 1.4943376699064598e-05, "loss": 0.262, "step": 1646 }, { "epoch": 1.07, "learning_rate": 1.4937297387128557e-05, "loss": 0.2753, "step": 1647 }, { "epoch": 1.07, "learning_rate": 1.4931215661309735e-05, "loss": 0.268, "step": 1648 }, { "epoch": 1.07, "learning_rate": 1.4925131524581528e-05, "loss": 0.2758, "step": 1649 }, { "epoch": 1.07, "learning_rate": 1.4919044979918525e-05, "loss": 0.2504, "step": 1650 }, { "epoch": 1.07, "learning_rate": 1.4912956030296479e-05, "loss": 0.2706, "step": 1651 }, { "epoch": 1.07, "learning_rate": 1.4906864678692328e-05, "loss": 0.2788, "step": 1652 }, { "epoch": 1.07, "learning_rate": 1.4900770928084179e-05, "loss": 0.2672, "step": 1653 }, { "epoch": 1.07, "learning_rate": 1.489467478145131e-05, "loss": 0.2575, "step": 1654 }, { "epoch": 1.07, "learning_rate": 1.4888576241774182e-05, "loss": 0.2682, "step": 1655 }, { "epoch": 1.07, "learning_rate": 1.4882475312034414e-05, "loss": 0.2618, "step": 1656 }, { "epoch": 1.07, "learning_rate": 1.4876371995214798e-05, "loss": 0.2591, "step": 1657 }, { "epoch": 1.07, "learning_rate": 1.487026629429929e-05, "loss": 0.2775, "step": 1658 }, { "epoch": 1.07, "learning_rate": 1.4864158212273019e-05, "loss": 0.2765, "step": 1659 }, { "epoch": 1.08, "learning_rate": 1.485804775212227e-05, "loss": 0.2713, "step": 1660 }, { "epoch": 1.08, "learning_rate": 1.4851934916834494e-05, "loss": 0.2748, "step": 1661 }, { "epoch": 1.08, "learning_rate": 1.4845819709398306e-05, "loss": 0.2746, "step": 1662 }, { "epoch": 1.08, "learning_rate": 1.4839702132803477e-05, "loss": 0.2802, "step": 1663 }, { "epoch": 1.08, "learning_rate": 1.4833582190040936e-05, "loss": 0.2624, "step": 1664 }, { "epoch": 1.08, "learning_rate": 1.4827459884102773e-05, "loss": 0.269, "step": 1665 }, { "epoch": 1.08, "learning_rate": 1.4821335217982227e-05, "loss": 0.2833, "step": 1666 }, { "epoch": 1.08, "learning_rate": 1.4815208194673698e-05, "loss": 0.2621, "step": 1667 }, { "epoch": 1.08, "learning_rate": 1.4809078817172729e-05, "loss": 0.2738, "step": 1668 }, { "epoch": 1.08, "learning_rate": 1.4802947088476027e-05, "loss": 0.2583, "step": 1669 }, { "epoch": 1.08, "learning_rate": 1.4796813011581442e-05, "loss": 0.2783, "step": 1670 }, { "epoch": 1.08, "learning_rate": 1.4790676589487961e-05, "loss": 0.276, "step": 1671 }, { "epoch": 1.08, "learning_rate": 1.4784537825195742e-05, "loss": 0.262, "step": 1672 }, { "epoch": 1.08, "learning_rate": 1.4778396721706067e-05, "loss": 0.269, "step": 1673 }, { "epoch": 1.08, "learning_rate": 1.4772253282021367e-05, "loss": 0.2833, "step": 1674 }, { "epoch": 1.08, "learning_rate": 1.4766107509145223e-05, "loss": 0.2686, "step": 1675 }, { "epoch": 1.09, "learning_rate": 1.4759959406082342e-05, "loss": 0.2698, "step": 1676 }, { "epoch": 1.09, "learning_rate": 1.4753808975838589e-05, "loss": 0.2705, "step": 1677 }, { "epoch": 1.09, "learning_rate": 1.4747656221420949e-05, "loss": 0.2659, "step": 1678 }, { "epoch": 1.09, "learning_rate": 1.4741501145837556e-05, "loss": 0.2561, "step": 1679 }, { "epoch": 1.09, "learning_rate": 1.4735343752097673e-05, "loss": 0.2798, "step": 1680 }, { "epoch": 1.09, "learning_rate": 1.4729184043211695e-05, "loss": 0.2645, "step": 1681 }, { "epoch": 1.09, "learning_rate": 1.4723022022191152e-05, "loss": 0.2694, "step": 1682 }, { "epoch": 1.09, "learning_rate": 1.4716857692048705e-05, "loss": 0.2507, "step": 1683 }, { "epoch": 1.09, "learning_rate": 1.4710691055798143e-05, "loss": 0.261, "step": 1684 }, { "epoch": 1.09, "learning_rate": 1.4704522116454379e-05, "loss": 0.2663, "step": 1685 }, { "epoch": 1.09, "learning_rate": 1.4698350877033458e-05, "loss": 0.278, "step": 1686 }, { "epoch": 1.09, "learning_rate": 1.4692177340552544e-05, "loss": 0.2636, "step": 1687 }, { "epoch": 1.09, "learning_rate": 1.4686001510029928e-05, "loss": 0.2606, "step": 1688 }, { "epoch": 1.09, "learning_rate": 1.4679823388485023e-05, "loss": 0.2635, "step": 1689 }, { "epoch": 1.09, "learning_rate": 1.4673642978938359e-05, "loss": 0.2926, "step": 1690 }, { "epoch": 1.1, "learning_rate": 1.4667460284411585e-05, "loss": 0.2764, "step": 1691 }, { "epoch": 1.1, "learning_rate": 1.4661275307927467e-05, "loss": 0.2504, "step": 1692 }, { "epoch": 1.1, "learning_rate": 1.4655088052509892e-05, "loss": 0.281, "step": 1693 }, { "epoch": 1.1, "learning_rate": 1.4648898521183852e-05, "loss": 0.2821, "step": 1694 }, { "epoch": 1.1, "learning_rate": 1.4642706716975457e-05, "loss": 0.2752, "step": 1695 }, { "epoch": 1.1, "learning_rate": 1.4636512642911933e-05, "loss": 0.2583, "step": 1696 }, { "epoch": 1.1, "learning_rate": 1.4630316302021605e-05, "loss": 0.2697, "step": 1697 }, { "epoch": 1.1, "learning_rate": 1.4624117697333917e-05, "loss": 0.2692, "step": 1698 }, { "epoch": 1.1, "learning_rate": 1.461791683187941e-05, "loss": 0.2759, "step": 1699 }, { "epoch": 1.1, "learning_rate": 1.4611713708689736e-05, "loss": 0.2657, "step": 1700 }, { "epoch": 1.1, "learning_rate": 1.4605508330797653e-05, "loss": 0.2677, "step": 1701 }, { "epoch": 1.1, "learning_rate": 1.4599300701237014e-05, "loss": 0.2908, "step": 1702 }, { "epoch": 1.1, "learning_rate": 1.4593090823042779e-05, "loss": 0.275, "step": 1703 }, { "epoch": 1.1, "learning_rate": 1.4586878699251003e-05, "loss": 0.2539, "step": 1704 }, { "epoch": 1.1, "learning_rate": 1.4580664332898847e-05, "loss": 0.2541, "step": 1705 }, { "epoch": 1.1, "learning_rate": 1.4574447727024556e-05, "loss": 0.2728, "step": 1706 }, { "epoch": 1.11, "learning_rate": 1.4568228884667479e-05, "loss": 0.2684, "step": 1707 }, { "epoch": 1.11, "learning_rate": 1.456200780886806e-05, "loss": 0.2774, "step": 1708 }, { "epoch": 1.11, "learning_rate": 1.4555784502667823e-05, "loss": 0.2628, "step": 1709 }, { "epoch": 1.11, "learning_rate": 1.4549558969109394e-05, "loss": 0.2793, "step": 1710 }, { "epoch": 1.11, "learning_rate": 1.4543331211236488e-05, "loss": 0.2653, "step": 1711 }, { "epoch": 1.11, "learning_rate": 1.4537101232093902e-05, "loss": 0.2539, "step": 1712 }, { "epoch": 1.11, "learning_rate": 1.453086903472752e-05, "loss": 0.2762, "step": 1713 }, { "epoch": 1.11, "learning_rate": 1.452463462218431e-05, "loss": 0.2739, "step": 1714 }, { "epoch": 1.11, "learning_rate": 1.4518397997512326e-05, "loss": 0.2563, "step": 1715 }, { "epoch": 1.11, "learning_rate": 1.4512159163760702e-05, "loss": 0.2793, "step": 1716 }, { "epoch": 1.11, "learning_rate": 1.4505918123979652e-05, "loss": 0.2584, "step": 1717 }, { "epoch": 1.11, "learning_rate": 1.4499674881220468e-05, "loss": 0.2781, "step": 1718 }, { "epoch": 1.11, "learning_rate": 1.4493429438535518e-05, "loss": 0.2708, "step": 1719 }, { "epoch": 1.11, "learning_rate": 1.448718179897825e-05, "loss": 0.2733, "step": 1720 }, { "epoch": 1.11, "learning_rate": 1.448093196560318e-05, "loss": 0.2745, "step": 1721 }, { "epoch": 1.12, "learning_rate": 1.4474679941465904e-05, "loss": 0.2604, "step": 1722 }, { "epoch": 1.12, "learning_rate": 1.4468425729623082e-05, "loss": 0.2575, "step": 1723 }, { "epoch": 1.12, "learning_rate": 1.4462169333132448e-05, "loss": 0.2669, "step": 1724 }, { "epoch": 1.12, "learning_rate": 1.4455910755052798e-05, "loss": 0.2771, "step": 1725 }, { "epoch": 1.12, "learning_rate": 1.4449649998444007e-05, "loss": 0.2835, "step": 1726 }, { "epoch": 1.12, "learning_rate": 1.4443387066366999e-05, "loss": 0.2613, "step": 1727 }, { "epoch": 1.12, "learning_rate": 1.4437121961883776e-05, "loss": 0.277, "step": 1728 }, { "epoch": 1.12, "learning_rate": 1.443085468805739e-05, "loss": 0.2717, "step": 1729 }, { "epoch": 1.12, "learning_rate": 1.4424585247951966e-05, "loss": 0.2764, "step": 1730 }, { "epoch": 1.12, "learning_rate": 1.4418313644632677e-05, "loss": 0.261, "step": 1731 }, { "epoch": 1.12, "learning_rate": 1.441203988116576e-05, "loss": 0.2617, "step": 1732 }, { "epoch": 1.12, "learning_rate": 1.440576396061851e-05, "loss": 0.2555, "step": 1733 }, { "epoch": 1.12, "learning_rate": 1.4399485886059266e-05, "loss": 0.27, "step": 1734 }, { "epoch": 1.12, "learning_rate": 1.439320566055743e-05, "loss": 0.2764, "step": 1735 }, { "epoch": 1.12, "learning_rate": 1.4386923287183454e-05, "loss": 0.2542, "step": 1736 }, { "epoch": 1.12, "learning_rate": 1.4380638769008837e-05, "loss": 0.2883, "step": 1737 }, { "epoch": 1.13, "learning_rate": 1.437435210910613e-05, "loss": 0.2699, "step": 1738 }, { "epoch": 1.13, "learning_rate": 1.436806331054893e-05, "loss": 0.2619, "step": 1739 }, { "epoch": 1.13, "learning_rate": 1.4361772376411876e-05, "loss": 0.2744, "step": 1740 }, { "epoch": 1.13, "learning_rate": 1.4355479309770655e-05, "loss": 0.268, "step": 1741 }, { "epoch": 1.13, "learning_rate": 1.4349184113701999e-05, "loss": 0.2674, "step": 1742 }, { "epoch": 1.13, "learning_rate": 1.4342886791283674e-05, "loss": 0.2622, "step": 1743 }, { "epoch": 1.13, "learning_rate": 1.4336587345594495e-05, "loss": 0.2722, "step": 1744 }, { "epoch": 1.13, "learning_rate": 1.4330285779714304e-05, "loss": 0.2678, "step": 1745 }, { "epoch": 1.13, "learning_rate": 1.4323982096723988e-05, "loss": 0.2791, "step": 1746 }, { "epoch": 1.13, "learning_rate": 1.4317676299705462e-05, "loss": 0.2792, "step": 1747 }, { "epoch": 1.13, "learning_rate": 1.4311368391741687e-05, "loss": 0.2661, "step": 1748 }, { "epoch": 1.13, "learning_rate": 1.430505837591664e-05, "loss": 0.2742, "step": 1749 }, { "epoch": 1.13, "learning_rate": 1.4298746255315339e-05, "loss": 0.2723, "step": 1750 }, { "epoch": 1.13, "learning_rate": 1.429243203302383e-05, "loss": 0.2711, "step": 1751 }, { "epoch": 1.13, "learning_rate": 1.4286115712129184e-05, "loss": 0.2779, "step": 1752 }, { "epoch": 1.14, "learning_rate": 1.4279797295719502e-05, "loss": 0.2525, "step": 1753 }, { "epoch": 1.14, "learning_rate": 1.42734767868839e-05, "loss": 0.2701, "step": 1754 }, { "epoch": 1.14, "learning_rate": 1.4267154188712529e-05, "loss": 0.2631, "step": 1755 }, { "epoch": 1.14, "learning_rate": 1.4260829504296552e-05, "loss": 0.262, "step": 1756 }, { "epoch": 1.14, "learning_rate": 1.4254502736728159e-05, "loss": 0.2803, "step": 1757 }, { "epoch": 1.14, "learning_rate": 1.4248173889100555e-05, "loss": 0.2659, "step": 1758 }, { "epoch": 1.14, "learning_rate": 1.4241842964507963e-05, "loss": 0.2753, "step": 1759 }, { "epoch": 1.14, "learning_rate": 1.4235509966045619e-05, "loss": 0.2731, "step": 1760 }, { "epoch": 1.14, "learning_rate": 1.4229174896809777e-05, "loss": 0.2578, "step": 1761 }, { "epoch": 1.14, "learning_rate": 1.4222837759897696e-05, "loss": 0.2758, "step": 1762 }, { "epoch": 1.14, "learning_rate": 1.421649855840766e-05, "loss": 0.28, "step": 1763 }, { "epoch": 1.14, "learning_rate": 1.4210157295438945e-05, "loss": 0.2789, "step": 1764 }, { "epoch": 1.14, "learning_rate": 1.4203813974091847e-05, "loss": 0.2755, "step": 1765 }, { "epoch": 1.14, "learning_rate": 1.4197468597467666e-05, "loss": 0.2595, "step": 1766 }, { "epoch": 1.14, "learning_rate": 1.4191121168668705e-05, "loss": 0.2649, "step": 1767 }, { "epoch": 1.15, "learning_rate": 1.418477169079827e-05, "loss": 0.2652, "step": 1768 }, { "epoch": 1.15, "learning_rate": 1.4178420166960668e-05, "loss": 0.2497, "step": 1769 }, { "epoch": 1.15, "learning_rate": 1.4172066600261209e-05, "loss": 0.2658, "step": 1770 }, { "epoch": 1.15, "learning_rate": 1.4165710993806201e-05, "loss": 0.2588, "step": 1771 }, { "epoch": 1.15, "learning_rate": 1.4159353350702953e-05, "loss": 0.2647, "step": 1772 }, { "epoch": 1.15, "learning_rate": 1.4152993674059764e-05, "loss": 0.2601, "step": 1773 }, { "epoch": 1.15, "learning_rate": 1.4146631966985922e-05, "loss": 0.257, "step": 1774 }, { "epoch": 1.15, "learning_rate": 1.4140268232591725e-05, "loss": 0.2743, "step": 1775 }, { "epoch": 1.15, "learning_rate": 1.4133902473988445e-05, "loss": 0.2826, "step": 1776 }, { "epoch": 1.15, "learning_rate": 1.4127534694288353e-05, "loss": 0.2763, "step": 1777 }, { "epoch": 1.15, "learning_rate": 1.4121164896604705e-05, "loss": 0.2546, "step": 1778 }, { "epoch": 1.15, "learning_rate": 1.4114793084051743e-05, "loss": 0.2679, "step": 1779 }, { "epoch": 1.15, "learning_rate": 1.4108419259744699e-05, "loss": 0.2519, "step": 1780 }, { "epoch": 1.15, "learning_rate": 1.410204342679978e-05, "loss": 0.2677, "step": 1781 }, { "epoch": 1.15, "learning_rate": 1.4095665588334185e-05, "loss": 0.2802, "step": 1782 }, { "epoch": 1.15, "learning_rate": 1.4089285747466084e-05, "loss": 0.266, "step": 1783 }, { "epoch": 1.16, "learning_rate": 1.4082903907314634e-05, "loss": 0.2673, "step": 1784 }, { "epoch": 1.16, "learning_rate": 1.4076520070999961e-05, "loss": 0.2648, "step": 1785 }, { "epoch": 1.16, "learning_rate": 1.4070134241643174e-05, "loss": 0.2796, "step": 1786 }, { "epoch": 1.16, "learning_rate": 1.406374642236636e-05, "loss": 0.2595, "step": 1787 }, { "epoch": 1.16, "learning_rate": 1.4057356616292566e-05, "loss": 0.2479, "step": 1788 }, { "epoch": 1.16, "learning_rate": 1.405096482654582e-05, "loss": 0.2592, "step": 1789 }, { "epoch": 1.16, "learning_rate": 1.4044571056251118e-05, "loss": 0.2638, "step": 1790 }, { "epoch": 1.16, "learning_rate": 1.4038175308534421e-05, "loss": 0.269, "step": 1791 }, { "epoch": 1.16, "learning_rate": 1.4031777586522664e-05, "loss": 0.2666, "step": 1792 }, { "epoch": 1.16, "learning_rate": 1.4025377893343735e-05, "loss": 0.2646, "step": 1793 }, { "epoch": 1.16, "learning_rate": 1.40189762321265e-05, "loss": 0.2625, "step": 1794 }, { "epoch": 1.16, "learning_rate": 1.4012572606000781e-05, "loss": 0.251, "step": 1795 }, { "epoch": 1.16, "learning_rate": 1.4006167018097357e-05, "loss": 0.269, "step": 1796 }, { "epoch": 1.16, "learning_rate": 1.399975947154797e-05, "loss": 0.2653, "step": 1797 }, { "epoch": 1.16, "learning_rate": 1.399334996948532e-05, "loss": 0.2641, "step": 1798 }, { "epoch": 1.17, "learning_rate": 1.398693851504306e-05, "loss": 0.2756, "step": 1799 }, { "epoch": 1.17, "learning_rate": 1.3980525111355802e-05, "loss": 0.2691, "step": 1800 }, { "epoch": 1.17, "learning_rate": 1.3974109761559107e-05, "loss": 0.2582, "step": 1801 }, { "epoch": 1.17, "learning_rate": 1.3967692468789492e-05, "loss": 0.2705, "step": 1802 }, { "epoch": 1.17, "learning_rate": 1.3961273236184415e-05, "loss": 0.2697, "step": 1803 }, { "epoch": 1.17, "learning_rate": 1.3954852066882292e-05, "loss": 0.251, "step": 1804 }, { "epoch": 1.17, "learning_rate": 1.3948428964022482e-05, "loss": 0.2656, "step": 1805 }, { "epoch": 1.17, "learning_rate": 1.394200393074529e-05, "loss": 0.2786, "step": 1806 }, { "epoch": 1.17, "learning_rate": 1.3935576970191962e-05, "loss": 0.2526, "step": 1807 }, { "epoch": 1.17, "learning_rate": 1.3929148085504691e-05, "loss": 0.253, "step": 1808 }, { "epoch": 1.17, "learning_rate": 1.392271727982661e-05, "loss": 0.2692, "step": 1809 }, { "epoch": 1.17, "learning_rate": 1.3916284556301784e-05, "loss": 0.2542, "step": 1810 }, { "epoch": 1.17, "learning_rate": 1.3909849918075226e-05, "loss": 0.2595, "step": 1811 }, { "epoch": 1.17, "learning_rate": 1.3903413368292877e-05, "loss": 0.266, "step": 1812 }, { "epoch": 1.17, "learning_rate": 1.389697491010162e-05, "loss": 0.2665, "step": 1813 }, { "epoch": 1.17, "learning_rate": 1.3890534546649263e-05, "loss": 0.2628, "step": 1814 }, { "epoch": 1.18, "learning_rate": 1.3884092281084551e-05, "loss": 0.258, "step": 1815 }, { "epoch": 1.18, "learning_rate": 1.3877648116557157e-05, "loss": 0.282, "step": 1816 }, { "epoch": 1.18, "learning_rate": 1.3871202056217682e-05, "loss": 0.2767, "step": 1817 }, { "epoch": 1.18, "learning_rate": 1.3864754103217657e-05, "loss": 0.2723, "step": 1818 }, { "epoch": 1.18, "learning_rate": 1.3858304260709531e-05, "loss": 0.252, "step": 1819 }, { "epoch": 1.18, "learning_rate": 1.3851852531846691e-05, "loss": 0.2843, "step": 1820 }, { "epoch": 1.18, "learning_rate": 1.3845398919783425e-05, "loss": 0.2735, "step": 1821 }, { "epoch": 1.18, "learning_rate": 1.3838943427674965e-05, "loss": 0.2712, "step": 1822 }, { "epoch": 1.18, "learning_rate": 1.3832486058677444e-05, "loss": 0.2554, "step": 1823 }, { "epoch": 1.18, "learning_rate": 1.3826026815947921e-05, "loss": 0.2872, "step": 1824 }, { "epoch": 1.18, "learning_rate": 1.3819565702644369e-05, "loss": 0.2722, "step": 1825 }, { "epoch": 1.18, "learning_rate": 1.3813102721925678e-05, "loss": 0.2695, "step": 1826 }, { "epoch": 1.18, "learning_rate": 1.3806637876951649e-05, "loss": 0.2758, "step": 1827 }, { "epoch": 1.18, "learning_rate": 1.3800171170882993e-05, "loss": 0.2496, "step": 1828 }, { "epoch": 1.18, "learning_rate": 1.3793702606881331e-05, "loss": 0.2736, "step": 1829 }, { "epoch": 1.19, "learning_rate": 1.37872321881092e-05, "loss": 0.2515, "step": 1830 }, { "epoch": 1.19, "learning_rate": 1.3780759917730032e-05, "loss": 0.2638, "step": 1831 }, { "epoch": 1.19, "learning_rate": 1.377428579890817e-05, "loss": 0.2597, "step": 1832 }, { "epoch": 1.19, "learning_rate": 1.3767809834808863e-05, "loss": 0.2518, "step": 1833 }, { "epoch": 1.19, "learning_rate": 1.3761332028598262e-05, "loss": 0.2795, "step": 1834 }, { "epoch": 1.19, "learning_rate": 1.375485238344341e-05, "loss": 0.2596, "step": 1835 }, { "epoch": 1.19, "learning_rate": 1.3748370902512262e-05, "loss": 0.2496, "step": 1836 }, { "epoch": 1.19, "learning_rate": 1.3741887588973661e-05, "loss": 0.263, "step": 1837 }, { "epoch": 1.19, "learning_rate": 1.3735402445997346e-05, "loss": 0.2567, "step": 1838 }, { "epoch": 1.19, "learning_rate": 1.3728915476753959e-05, "loss": 0.2679, "step": 1839 }, { "epoch": 1.19, "learning_rate": 1.3722426684415027e-05, "loss": 0.2627, "step": 1840 }, { "epoch": 1.19, "learning_rate": 1.3715936072152969e-05, "loss": 0.2938, "step": 1841 }, { "epoch": 1.19, "learning_rate": 1.3709443643141097e-05, "loss": 0.2659, "step": 1842 }, { "epoch": 1.19, "learning_rate": 1.3702949400553612e-05, "loss": 0.2585, "step": 1843 }, { "epoch": 1.19, "learning_rate": 1.3696453347565596e-05, "loss": 0.2722, "step": 1844 }, { "epoch": 1.19, "learning_rate": 1.368995548735302e-05, "loss": 0.2862, "step": 1845 }, { "epoch": 1.2, "learning_rate": 1.3683455823092737e-05, "loss": 0.2504, "step": 1846 }, { "epoch": 1.2, "learning_rate": 1.3676954357962484e-05, "loss": 0.2733, "step": 1847 }, { "epoch": 1.2, "learning_rate": 1.367045109514088e-05, "loss": 0.2529, "step": 1848 }, { "epoch": 1.2, "learning_rate": 1.3663946037807416e-05, "loss": 0.2765, "step": 1849 }, { "epoch": 1.2, "learning_rate": 1.365743918914247e-05, "loss": 0.2644, "step": 1850 }, { "epoch": 1.2, "learning_rate": 1.3650930552327287e-05, "loss": 0.2833, "step": 1851 }, { "epoch": 1.2, "learning_rate": 1.3644420130543985e-05, "loss": 0.2763, "step": 1852 }, { "epoch": 1.2, "learning_rate": 1.3637907926975567e-05, "loss": 0.2692, "step": 1853 }, { "epoch": 1.2, "learning_rate": 1.3631393944805898e-05, "loss": 0.2621, "step": 1854 }, { "epoch": 1.2, "learning_rate": 1.3624878187219711e-05, "loss": 0.27, "step": 1855 }, { "epoch": 1.2, "learning_rate": 1.3618360657402617e-05, "loss": 0.2792, "step": 1856 }, { "epoch": 1.2, "learning_rate": 1.3611841358541077e-05, "loss": 0.2686, "step": 1857 }, { "epoch": 1.2, "learning_rate": 1.3605320293822434e-05, "loss": 0.2806, "step": 1858 }, { "epoch": 1.2, "learning_rate": 1.3598797466434885e-05, "loss": 0.2579, "step": 1859 }, { "epoch": 1.2, "learning_rate": 1.359227287956749e-05, "loss": 0.268, "step": 1860 }, { "epoch": 1.21, "learning_rate": 1.3585746536410169e-05, "loss": 0.2719, "step": 1861 }, { "epoch": 1.21, "learning_rate": 1.3579218440153703e-05, "loss": 0.2636, "step": 1862 }, { "epoch": 1.21, "learning_rate": 1.3572688593989727e-05, "loss": 0.2547, "step": 1863 }, { "epoch": 1.21, "learning_rate": 1.3566157001110734e-05, "loss": 0.2618, "step": 1864 }, { "epoch": 1.21, "learning_rate": 1.355962366471007e-05, "loss": 0.2803, "step": 1865 }, { "epoch": 1.21, "learning_rate": 1.3553088587981935e-05, "loss": 0.2772, "step": 1866 }, { "epoch": 1.21, "learning_rate": 1.3546551774121379e-05, "loss": 0.2712, "step": 1867 }, { "epoch": 1.21, "learning_rate": 1.3540013226324296e-05, "loss": 0.2751, "step": 1868 }, { "epoch": 1.21, "learning_rate": 1.3533472947787435e-05, "loss": 0.2608, "step": 1869 }, { "epoch": 1.21, "learning_rate": 1.3526930941708391e-05, "loss": 0.2665, "step": 1870 }, { "epoch": 1.21, "learning_rate": 1.3520387211285599e-05, "loss": 0.2433, "step": 1871 }, { "epoch": 1.21, "learning_rate": 1.351384175971834e-05, "loss": 0.2754, "step": 1872 }, { "epoch": 1.21, "learning_rate": 1.3507294590206734e-05, "loss": 0.2786, "step": 1873 }, { "epoch": 1.21, "learning_rate": 1.3500745705951745e-05, "loss": 0.2449, "step": 1874 }, { "epoch": 1.21, "learning_rate": 1.3494195110155173e-05, "loss": 0.2721, "step": 1875 }, { "epoch": 1.22, "learning_rate": 1.3487642806019654e-05, "loss": 0.268, "step": 1876 }, { "epoch": 1.22, "learning_rate": 1.3481088796748658e-05, "loss": 0.2767, "step": 1877 }, { "epoch": 1.22, "learning_rate": 1.3474533085546495e-05, "loss": 0.261, "step": 1878 }, { "epoch": 1.22, "learning_rate": 1.3467975675618299e-05, "loss": 0.2567, "step": 1879 }, { "epoch": 1.22, "learning_rate": 1.346141657017004e-05, "loss": 0.2769, "step": 1880 }, { "epoch": 1.22, "learning_rate": 1.3454855772408515e-05, "loss": 0.2734, "step": 1881 }, { "epoch": 1.22, "learning_rate": 1.344829328554135e-05, "loss": 0.2746, "step": 1882 }, { "epoch": 1.22, "learning_rate": 1.3441729112776994e-05, "loss": 0.2656, "step": 1883 }, { "epoch": 1.22, "learning_rate": 1.3435163257324717e-05, "loss": 0.2575, "step": 1884 }, { "epoch": 1.22, "learning_rate": 1.3428595722394626e-05, "loss": 0.2812, "step": 1885 }, { "epoch": 1.22, "learning_rate": 1.3422026511197634e-05, "loss": 0.273, "step": 1886 }, { "epoch": 1.22, "learning_rate": 1.3415455626945479e-05, "loss": 0.2509, "step": 1887 }, { "epoch": 1.22, "learning_rate": 1.3408883072850718e-05, "loss": 0.2644, "step": 1888 }, { "epoch": 1.22, "learning_rate": 1.3402308852126725e-05, "loss": 0.2607, "step": 1889 }, { "epoch": 1.22, "learning_rate": 1.3395732967987684e-05, "loss": 0.261, "step": 1890 }, { "epoch": 1.22, "learning_rate": 1.3389155423648596e-05, "loss": 0.2601, "step": 1891 }, { "epoch": 1.23, "learning_rate": 1.3382576222325277e-05, "loss": 0.2536, "step": 1892 }, { "epoch": 1.23, "learning_rate": 1.3375995367234345e-05, "loss": 0.2533, "step": 1893 }, { "epoch": 1.23, "learning_rate": 1.3369412861593229e-05, "loss": 0.2721, "step": 1894 }, { "epoch": 1.23, "learning_rate": 1.3362828708620173e-05, "loss": 0.2731, "step": 1895 }, { "epoch": 1.23, "learning_rate": 1.3356242911534216e-05, "loss": 0.2748, "step": 1896 }, { "epoch": 1.23, "learning_rate": 1.3349655473555208e-05, "loss": 0.2653, "step": 1897 }, { "epoch": 1.23, "learning_rate": 1.3343066397903794e-05, "loss": 0.2595, "step": 1898 }, { "epoch": 1.23, "learning_rate": 1.333647568780143e-05, "loss": 0.2865, "step": 1899 }, { "epoch": 1.23, "learning_rate": 1.3329883346470358e-05, "loss": 0.2799, "step": 1900 }, { "epoch": 1.23, "learning_rate": 1.3323289377133629e-05, "loss": 0.2728, "step": 1901 }, { "epoch": 1.23, "learning_rate": 1.3316693783015085e-05, "loss": 0.2769, "step": 1902 }, { "epoch": 1.23, "learning_rate": 1.3310096567339362e-05, "loss": 0.2806, "step": 1903 }, { "epoch": 1.23, "learning_rate": 1.3303497733331887e-05, "loss": 0.2614, "step": 1904 }, { "epoch": 1.23, "learning_rate": 1.3296897284218888e-05, "loss": 0.2684, "step": 1905 }, { "epoch": 1.23, "learning_rate": 1.329029522322737e-05, "loss": 0.2637, "step": 1906 }, { "epoch": 1.24, "learning_rate": 1.3283691553585132e-05, "loss": 0.2818, "step": 1907 }, { "epoch": 1.24, "learning_rate": 1.3277086278520759e-05, "loss": 0.2732, "step": 1908 }, { "epoch": 1.24, "learning_rate": 1.3270479401263622e-05, "loss": 0.2663, "step": 1909 }, { "epoch": 1.24, "learning_rate": 1.3263870925043875e-05, "loss": 0.268, "step": 1910 }, { "epoch": 1.24, "learning_rate": 1.3257260853092453e-05, "loss": 0.2557, "step": 1911 }, { "epoch": 1.24, "learning_rate": 1.325064918864107e-05, "loss": 0.2627, "step": 1912 }, { "epoch": 1.24, "learning_rate": 1.3244035934922217e-05, "loss": 0.26, "step": 1913 }, { "epoch": 1.24, "learning_rate": 1.3237421095169167e-05, "loss": 0.2524, "step": 1914 }, { "epoch": 1.24, "learning_rate": 1.3230804672615971e-05, "loss": 0.2666, "step": 1915 }, { "epoch": 1.24, "learning_rate": 1.3224186670497443e-05, "loss": 0.2646, "step": 1916 }, { "epoch": 1.24, "learning_rate": 1.3217567092049178e-05, "loss": 0.2558, "step": 1917 }, { "epoch": 1.24, "learning_rate": 1.3210945940507538e-05, "loss": 0.2683, "step": 1918 }, { "epoch": 1.24, "learning_rate": 1.3204323219109654e-05, "loss": 0.2626, "step": 1919 }, { "epoch": 1.24, "learning_rate": 1.3197698931093427e-05, "loss": 0.2588, "step": 1920 }, { "epoch": 1.24, "learning_rate": 1.3191073079697526e-05, "loss": 0.251, "step": 1921 }, { "epoch": 1.24, "learning_rate": 1.3184445668161375e-05, "loss": 0.273, "step": 1922 }, { "epoch": 1.25, "learning_rate": 1.3177816699725168e-05, "loss": 0.2645, "step": 1923 }, { "epoch": 1.25, "learning_rate": 1.317118617762986e-05, "loss": 0.2667, "step": 1924 }, { "epoch": 1.25, "learning_rate": 1.3164554105117162e-05, "loss": 0.2544, "step": 1925 }, { "epoch": 1.25, "learning_rate": 1.3157920485429552e-05, "loss": 0.2526, "step": 1926 }, { "epoch": 1.25, "learning_rate": 1.3151285321810249e-05, "loss": 0.2768, "step": 1927 }, { "epoch": 1.25, "learning_rate": 1.3144648617503243e-05, "loss": 0.265, "step": 1928 }, { "epoch": 1.25, "learning_rate": 1.3138010375753266e-05, "loss": 0.2813, "step": 1929 }, { "epoch": 1.25, "learning_rate": 1.3131370599805804e-05, "loss": 0.2771, "step": 1930 }, { "epoch": 1.25, "learning_rate": 1.3124729292907101e-05, "loss": 0.2565, "step": 1931 }, { "epoch": 1.25, "learning_rate": 1.3118086458304138e-05, "loss": 0.266, "step": 1932 }, { "epoch": 1.25, "learning_rate": 1.3111442099244651e-05, "loss": 0.2598, "step": 1933 }, { "epoch": 1.25, "learning_rate": 1.3104796218977117e-05, "loss": 0.2559, "step": 1934 }, { "epoch": 1.25, "learning_rate": 1.3098148820750755e-05, "loss": 0.2735, "step": 1935 }, { "epoch": 1.25, "learning_rate": 1.3091499907815532e-05, "loss": 0.2584, "step": 1936 }, { "epoch": 1.25, "learning_rate": 1.3084849483422155e-05, "loss": 0.2541, "step": 1937 }, { "epoch": 1.26, "learning_rate": 1.3078197550822068e-05, "loss": 0.2634, "step": 1938 }, { "epoch": 1.26, "learning_rate": 1.3071544113267448e-05, "loss": 0.2698, "step": 1939 }, { "epoch": 1.26, "learning_rate": 1.3064889174011216e-05, "loss": 0.2705, "step": 1940 }, { "epoch": 1.26, "learning_rate": 1.3058232736307019e-05, "loss": 0.2659, "step": 1941 }, { "epoch": 1.26, "learning_rate": 1.3051574803409244e-05, "loss": 0.2628, "step": 1942 }, { "epoch": 1.26, "learning_rate": 1.3044915378573004e-05, "loss": 0.2621, "step": 1943 }, { "epoch": 1.26, "learning_rate": 1.3038254465054145e-05, "loss": 0.2581, "step": 1944 }, { "epoch": 1.26, "learning_rate": 1.3031592066109238e-05, "loss": 0.2701, "step": 1945 }, { "epoch": 1.26, "learning_rate": 1.302492818499558e-05, "loss": 0.2838, "step": 1946 }, { "epoch": 1.26, "learning_rate": 1.3018262824971195e-05, "loss": 0.2658, "step": 1947 }, { "epoch": 1.26, "learning_rate": 1.301159598929483e-05, "loss": 0.2726, "step": 1948 }, { "epoch": 1.26, "learning_rate": 1.3004927681225952e-05, "loss": 0.2645, "step": 1949 }, { "epoch": 1.26, "learning_rate": 1.2998257904024747e-05, "loss": 0.2558, "step": 1950 }, { "epoch": 1.26, "learning_rate": 1.299158666095212e-05, "loss": 0.2603, "step": 1951 }, { "epoch": 1.26, "learning_rate": 1.2984913955269696e-05, "loss": 0.2729, "step": 1952 }, { "epoch": 1.26, "learning_rate": 1.297823979023981e-05, "loss": 0.2945, "step": 1953 }, { "epoch": 1.27, "learning_rate": 1.2971564169125515e-05, "loss": 0.2784, "step": 1954 }, { "epoch": 1.27, "learning_rate": 1.2964887095190565e-05, "loss": 0.2605, "step": 1955 }, { "epoch": 1.27, "learning_rate": 1.2958208571699449e-05, "loss": 0.2553, "step": 1956 }, { "epoch": 1.27, "learning_rate": 1.2951528601917336e-05, "loss": 0.2642, "step": 1957 }, { "epoch": 1.27, "learning_rate": 1.294484718911012e-05, "loss": 0.2583, "step": 1958 }, { "epoch": 1.27, "learning_rate": 1.2938164336544392e-05, "loss": 0.2573, "step": 1959 }, { "epoch": 1.27, "learning_rate": 1.2931480047487452e-05, "loss": 0.2761, "step": 1960 }, { "epoch": 1.27, "learning_rate": 1.2924794325207301e-05, "loss": 0.2711, "step": 1961 }, { "epoch": 1.27, "learning_rate": 1.291810717297264e-05, "loss": 0.2833, "step": 1962 }, { "epoch": 1.27, "learning_rate": 1.2911418594052868e-05, "loss": 0.2598, "step": 1963 }, { "epoch": 1.27, "learning_rate": 1.2904728591718086e-05, "loss": 0.2581, "step": 1964 }, { "epoch": 1.27, "learning_rate": 1.2898037169239084e-05, "loss": 0.2478, "step": 1965 }, { "epoch": 1.27, "learning_rate": 1.2891344329887352e-05, "loss": 0.2663, "step": 1966 }, { "epoch": 1.27, "learning_rate": 1.288465007693507e-05, "loss": 0.2669, "step": 1967 }, { "epoch": 1.27, "learning_rate": 1.287795441365511e-05, "loss": 0.2636, "step": 1968 }, { "epoch": 1.28, "learning_rate": 1.2871257343321033e-05, "loss": 0.2901, "step": 1969 }, { "epoch": 1.28, "learning_rate": 1.2864558869207091e-05, "loss": 0.2549, "step": 1970 }, { "epoch": 1.28, "learning_rate": 1.2857858994588219e-05, "loss": 0.258, "step": 1971 }, { "epoch": 1.28, "learning_rate": 1.2851157722740033e-05, "loss": 0.2595, "step": 1972 }, { "epoch": 1.28, "learning_rate": 1.2844455056938841e-05, "loss": 0.272, "step": 1973 }, { "epoch": 1.28, "learning_rate": 1.283775100046163e-05, "loss": 0.2788, "step": 1974 }, { "epoch": 1.28, "learning_rate": 1.2831045556586055e-05, "loss": 0.274, "step": 1975 }, { "epoch": 1.28, "learning_rate": 1.282433872859047e-05, "loss": 0.2669, "step": 1976 }, { "epoch": 1.28, "learning_rate": 1.2817630519753888e-05, "loss": 0.2492, "step": 1977 }, { "epoch": 1.28, "learning_rate": 1.281092093335601e-05, "loss": 0.2702, "step": 1978 }, { "epoch": 1.28, "learning_rate": 1.28042099726772e-05, "loss": 0.2637, "step": 1979 }, { "epoch": 1.28, "learning_rate": 1.2797497640998503e-05, "loss": 0.2893, "step": 1980 }, { "epoch": 1.28, "learning_rate": 1.2790783941601621e-05, "loss": 0.2663, "step": 1981 }, { "epoch": 1.28, "learning_rate": 1.2784068877768943e-05, "loss": 0.2539, "step": 1982 }, { "epoch": 1.28, "learning_rate": 1.277735245278351e-05, "loss": 0.2613, "step": 1983 }, { "epoch": 1.28, "learning_rate": 1.2770634669929036e-05, "loss": 0.2689, "step": 1984 }, { "epoch": 1.29, "learning_rate": 1.2763915532489897e-05, "loss": 0.2647, "step": 1985 }, { "epoch": 1.29, "learning_rate": 1.2757195043751129e-05, "loss": 0.2574, "step": 1986 }, { "epoch": 1.29, "learning_rate": 1.2750473206998436e-05, "loss": 0.2771, "step": 1987 }, { "epoch": 1.29, "learning_rate": 1.2743750025518168e-05, "loss": 0.2599, "step": 1988 }, { "epoch": 1.29, "learning_rate": 1.2737025502597349e-05, "loss": 0.2589, "step": 1989 }, { "epoch": 1.29, "learning_rate": 1.2730299641523643e-05, "loss": 0.2693, "step": 1990 }, { "epoch": 1.29, "learning_rate": 1.2723572445585376e-05, "loss": 0.2628, "step": 1991 }, { "epoch": 1.29, "learning_rate": 1.2716843918071531e-05, "loss": 0.2505, "step": 1992 }, { "epoch": 1.29, "learning_rate": 1.2710114062271735e-05, "loss": 0.2584, "step": 1993 }, { "epoch": 1.29, "learning_rate": 1.2703382881476263e-05, "loss": 0.2622, "step": 1994 }, { "epoch": 1.29, "learning_rate": 1.2696650378976046e-05, "loss": 0.2698, "step": 1995 }, { "epoch": 1.29, "learning_rate": 1.2689916558062649e-05, "loss": 0.265, "step": 1996 }, { "epoch": 1.29, "learning_rate": 1.2683181422028298e-05, "loss": 0.2556, "step": 1997 }, { "epoch": 1.29, "learning_rate": 1.2676444974165848e-05, "loss": 0.2776, "step": 1998 }, { "epoch": 1.29, "learning_rate": 1.2669707217768804e-05, "loss": 0.2731, "step": 1999 }, { "epoch": 1.3, "learning_rate": 1.2662968156131301e-05, "loss": 0.2763, "step": 2000 }, { "epoch": 1.3, "learning_rate": 1.2656227792548124e-05, "loss": 0.2542, "step": 2001 }, { "epoch": 1.3, "learning_rate": 1.2649486130314686e-05, "loss": 0.2859, "step": 2002 }, { "epoch": 1.3, "learning_rate": 1.2642743172727036e-05, "loss": 0.2643, "step": 2003 }, { "epoch": 1.3, "learning_rate": 1.2635998923081863e-05, "loss": 0.2671, "step": 2004 }, { "epoch": 1.3, "learning_rate": 1.2629253384676478e-05, "loss": 0.2746, "step": 2005 }, { "epoch": 1.3, "learning_rate": 1.262250656080883e-05, "loss": 0.2682, "step": 2006 }, { "epoch": 1.3, "learning_rate": 1.2615758454777492e-05, "loss": 0.2656, "step": 2007 }, { "epoch": 1.3, "learning_rate": 1.2609009069881664e-05, "loss": 0.2483, "step": 2008 }, { "epoch": 1.3, "learning_rate": 1.2602258409421173e-05, "loss": 0.2578, "step": 2009 }, { "epoch": 1.3, "learning_rate": 1.2595506476696468e-05, "loss": 0.2578, "step": 2010 }, { "epoch": 1.3, "learning_rate": 1.2588753275008625e-05, "loss": 0.2549, "step": 2011 }, { "epoch": 1.3, "learning_rate": 1.2581998807659333e-05, "loss": 0.2617, "step": 2012 }, { "epoch": 1.3, "learning_rate": 1.2575243077950903e-05, "loss": 0.2568, "step": 2013 }, { "epoch": 1.3, "learning_rate": 1.2568486089186264e-05, "loss": 0.2711, "step": 2014 }, { "epoch": 1.31, "learning_rate": 1.256172784466896e-05, "loss": 0.2646, "step": 2015 }, { "epoch": 1.31, "learning_rate": 1.2554968347703152e-05, "loss": 0.2584, "step": 2016 }, { "epoch": 1.31, "learning_rate": 1.2548207601593601e-05, "loss": 0.2778, "step": 2017 }, { "epoch": 1.31, "learning_rate": 1.2541445609645697e-05, "loss": 0.2567, "step": 2018 }, { "epoch": 1.31, "learning_rate": 1.253468237516543e-05, "loss": 0.2647, "step": 2019 }, { "epoch": 1.31, "learning_rate": 1.2527917901459392e-05, "loss": 0.264, "step": 2020 }, { "epoch": 1.31, "learning_rate": 1.252115219183479e-05, "loss": 0.2625, "step": 2021 }, { "epoch": 1.31, "learning_rate": 1.2514385249599433e-05, "loss": 0.262, "step": 2022 }, { "epoch": 1.31, "learning_rate": 1.2507617078061726e-05, "loss": 0.2685, "step": 2023 }, { "epoch": 1.31, "learning_rate": 1.2500847680530684e-05, "loss": 0.2636, "step": 2024 }, { "epoch": 1.31, "learning_rate": 1.2494077060315917e-05, "loss": 0.2634, "step": 2025 }, { "epoch": 1.31, "learning_rate": 1.2487305220727635e-05, "loss": 0.2662, "step": 2026 }, { "epoch": 1.31, "learning_rate": 1.2480532165076636e-05, "loss": 0.2539, "step": 2027 }, { "epoch": 1.31, "learning_rate": 1.2473757896674328e-05, "loss": 0.2532, "step": 2028 }, { "epoch": 1.31, "learning_rate": 1.2466982418832698e-05, "loss": 0.2702, "step": 2029 }, { "epoch": 1.31, "learning_rate": 1.2460205734864333e-05, "loss": 0.261, "step": 2030 }, { "epoch": 1.32, "learning_rate": 1.2453427848082403e-05, "loss": 0.2741, "step": 2031 }, { "epoch": 1.32, "learning_rate": 1.244664876180067e-05, "loss": 0.2724, "step": 2032 }, { "epoch": 1.32, "learning_rate": 1.2439868479333482e-05, "loss": 0.2649, "step": 2033 }, { "epoch": 1.32, "learning_rate": 1.2433087003995767e-05, "loss": 0.2585, "step": 2034 }, { "epoch": 1.32, "learning_rate": 1.2426304339103048e-05, "loss": 0.2718, "step": 2035 }, { "epoch": 1.32, "learning_rate": 1.2419520487971419e-05, "loss": 0.2588, "step": 2036 }, { "epoch": 1.32, "learning_rate": 1.2412735453917555e-05, "loss": 0.2517, "step": 2037 }, { "epoch": 1.32, "learning_rate": 1.2405949240258712e-05, "loss": 0.2645, "step": 2038 }, { "epoch": 1.32, "learning_rate": 1.2399161850312726e-05, "loss": 0.2722, "step": 2039 }, { "epoch": 1.32, "learning_rate": 1.2392373287398002e-05, "loss": 0.2532, "step": 2040 }, { "epoch": 1.32, "learning_rate": 1.238558355483352e-05, "loss": 0.2641, "step": 2041 }, { "epoch": 1.32, "learning_rate": 1.2378792655938834e-05, "loss": 0.2589, "step": 2042 }, { "epoch": 1.32, "learning_rate": 1.2372000594034067e-05, "loss": 0.2657, "step": 2043 }, { "epoch": 1.32, "learning_rate": 1.236520737243991e-05, "loss": 0.2536, "step": 2044 }, { "epoch": 1.32, "learning_rate": 1.2358412994477617e-05, "loss": 0.2651, "step": 2045 }, { "epoch": 1.33, "learning_rate": 1.2351617463469019e-05, "loss": 0.2682, "step": 2046 }, { "epoch": 1.33, "learning_rate": 1.23448207827365e-05, "loss": 0.2539, "step": 2047 }, { "epoch": 1.33, "learning_rate": 1.2338022955603011e-05, "loss": 0.2578, "step": 2048 }, { "epoch": 1.33, "learning_rate": 1.233122398539206e-05, "loss": 0.2449, "step": 2049 }, { "epoch": 1.33, "learning_rate": 1.2324423875427716e-05, "loss": 0.258, "step": 2050 }, { "epoch": 1.33, "learning_rate": 1.2317622629034607e-05, "loss": 0.2683, "step": 2051 }, { "epoch": 1.33, "learning_rate": 1.2310820249537916e-05, "loss": 0.2674, "step": 2052 }, { "epoch": 1.33, "learning_rate": 1.2304016740263375e-05, "loss": 0.2608, "step": 2053 }, { "epoch": 1.33, "learning_rate": 1.2297212104537275e-05, "loss": 0.26, "step": 2054 }, { "epoch": 1.33, "learning_rate": 1.2290406345686455e-05, "loss": 0.2761, "step": 2055 }, { "epoch": 1.33, "learning_rate": 1.2283599467038301e-05, "loss": 0.2702, "step": 2056 }, { "epoch": 1.33, "learning_rate": 1.227679147192075e-05, "loss": 0.2477, "step": 2057 }, { "epoch": 1.33, "learning_rate": 1.226998236366228e-05, "loss": 0.2634, "step": 2058 }, { "epoch": 1.33, "learning_rate": 1.2263172145591924e-05, "loss": 0.2491, "step": 2059 }, { "epoch": 1.33, "learning_rate": 1.2256360821039243e-05, "loss": 0.2654, "step": 2060 }, { "epoch": 1.33, "learning_rate": 1.2249548393334352e-05, "loss": 0.2677, "step": 2061 }, { "epoch": 1.34, "learning_rate": 1.2242734865807892e-05, "loss": 0.271, "step": 2062 }, { "epoch": 1.34, "learning_rate": 1.2235920241791058e-05, "loss": 0.2556, "step": 2063 }, { "epoch": 1.34, "learning_rate": 1.2229104524615565e-05, "loss": 0.2732, "step": 2064 }, { "epoch": 1.34, "learning_rate": 1.2222287717613673e-05, "loss": 0.2622, "step": 2065 }, { "epoch": 1.34, "learning_rate": 1.2215469824118169e-05, "loss": 0.2478, "step": 2066 }, { "epoch": 1.34, "learning_rate": 1.2208650847462378e-05, "loss": 0.2619, "step": 2067 }, { "epoch": 1.34, "learning_rate": 1.2201830790980147e-05, "loss": 0.2533, "step": 2068 }, { "epoch": 1.34, "learning_rate": 1.2195009658005855e-05, "loss": 0.2722, "step": 2069 }, { "epoch": 1.34, "learning_rate": 1.2188187451874407e-05, "loss": 0.2405, "step": 2070 }, { "epoch": 1.34, "learning_rate": 1.2181364175921233e-05, "loss": 0.2648, "step": 2071 }, { "epoch": 1.34, "learning_rate": 1.2174539833482282e-05, "loss": 0.2653, "step": 2072 }, { "epoch": 1.34, "learning_rate": 1.2167714427894028e-05, "loss": 0.2417, "step": 2073 }, { "epoch": 1.34, "learning_rate": 1.216088796249347e-05, "loss": 0.2559, "step": 2074 }, { "epoch": 1.34, "learning_rate": 1.2154060440618116e-05, "loss": 0.274, "step": 2075 }, { "epoch": 1.34, "learning_rate": 1.2147231865605992e-05, "loss": 0.2859, "step": 2076 }, { "epoch": 1.35, "learning_rate": 1.2140402240795646e-05, "loss": 0.2523, "step": 2077 }, { "epoch": 1.35, "learning_rate": 1.213357156952613e-05, "loss": 0.2676, "step": 2078 }, { "epoch": 1.35, "learning_rate": 1.2126739855137013e-05, "loss": 0.2883, "step": 2079 }, { "epoch": 1.35, "learning_rate": 1.2119907100968372e-05, "loss": 0.2643, "step": 2080 }, { "epoch": 1.35, "learning_rate": 1.2113073310360794e-05, "loss": 0.2719, "step": 2081 }, { "epoch": 1.35, "learning_rate": 1.2106238486655373e-05, "loss": 0.2713, "step": 2082 }, { "epoch": 1.35, "learning_rate": 1.2099402633193707e-05, "loss": 0.2774, "step": 2083 }, { "epoch": 1.35, "learning_rate": 1.2092565753317896e-05, "loss": 0.2665, "step": 2084 }, { "epoch": 1.35, "learning_rate": 1.2085727850370546e-05, "loss": 0.2668, "step": 2085 }, { "epoch": 1.35, "learning_rate": 1.2078888927694756e-05, "loss": 0.2526, "step": 2086 }, { "epoch": 1.35, "learning_rate": 1.207204898863413e-05, "loss": 0.2611, "step": 2087 }, { "epoch": 1.35, "learning_rate": 1.2065208036532771e-05, "loss": 0.25, "step": 2088 }, { "epoch": 1.35, "learning_rate": 1.2058366074735269e-05, "loss": 0.2582, "step": 2089 }, { "epoch": 1.35, "learning_rate": 1.205152310658671e-05, "loss": 0.2513, "step": 2090 }, { "epoch": 1.35, "learning_rate": 1.204467913543268e-05, "loss": 0.2583, "step": 2091 }, { "epoch": 1.35, "learning_rate": 1.2037834164619244e-05, "loss": 0.2552, "step": 2092 }, { "epoch": 1.36, "learning_rate": 1.2030988197492965e-05, "loss": 0.261, "step": 2093 }, { "epoch": 1.36, "learning_rate": 1.2024141237400886e-05, "loss": 0.2524, "step": 2094 }, { "epoch": 1.36, "learning_rate": 1.201729328769054e-05, "loss": 0.2577, "step": 2095 }, { "epoch": 1.36, "learning_rate": 1.2010444351709943e-05, "loss": 0.261, "step": 2096 }, { "epoch": 1.36, "learning_rate": 1.200359443280759e-05, "loss": 0.2672, "step": 2097 }, { "epoch": 1.36, "learning_rate": 1.1996743534332462e-05, "loss": 0.2683, "step": 2098 }, { "epoch": 1.36, "learning_rate": 1.1989891659634011e-05, "loss": 0.2621, "step": 2099 }, { "epoch": 1.36, "learning_rate": 1.1983038812062182e-05, "loss": 0.2732, "step": 2100 }, { "epoch": 1.36, "learning_rate": 1.1976184994967377e-05, "loss": 0.2754, "step": 2101 }, { "epoch": 1.36, "learning_rate": 1.196933021170048e-05, "loss": 0.2539, "step": 2102 }, { "epoch": 1.36, "learning_rate": 1.196247446561285e-05, "loss": 0.2689, "step": 2103 }, { "epoch": 1.36, "learning_rate": 1.1955617760056315e-05, "loss": 0.26, "step": 2104 }, { "epoch": 1.36, "learning_rate": 1.1948760098383167e-05, "loss": 0.2629, "step": 2105 }, { "epoch": 1.36, "learning_rate": 1.1941901483946175e-05, "loss": 0.2733, "step": 2106 }, { "epoch": 1.36, "learning_rate": 1.1935041920098562e-05, "loss": 0.2661, "step": 2107 }, { "epoch": 1.37, "learning_rate": 1.1928181410194028e-05, "loss": 0.2767, "step": 2108 }, { "epoch": 1.37, "learning_rate": 1.1921319957586728e-05, "loss": 0.2715, "step": 2109 }, { "epoch": 1.37, "learning_rate": 1.1914457565631275e-05, "loss": 0.2562, "step": 2110 }, { "epoch": 1.37, "learning_rate": 1.1907594237682749e-05, "loss": 0.2672, "step": 2111 }, { "epoch": 1.37, "learning_rate": 1.1900729977096683e-05, "loss": 0.2597, "step": 2112 }, { "epoch": 1.37, "learning_rate": 1.1893864787229068e-05, "loss": 0.2719, "step": 2113 }, { "epoch": 1.37, "learning_rate": 1.1886998671436347e-05, "loss": 0.2704, "step": 2114 }, { "epoch": 1.37, "learning_rate": 1.1880131633075416e-05, "loss": 0.2581, "step": 2115 }, { "epoch": 1.37, "learning_rate": 1.1873263675503626e-05, "loss": 0.2818, "step": 2116 }, { "epoch": 1.37, "learning_rate": 1.1866394802078773e-05, "loss": 0.2665, "step": 2117 }, { "epoch": 1.37, "learning_rate": 1.1859525016159099e-05, "loss": 0.2491, "step": 2118 }, { "epoch": 1.37, "learning_rate": 1.18526543211033e-05, "loss": 0.2682, "step": 2119 }, { "epoch": 1.37, "learning_rate": 1.1845782720270511e-05, "loss": 0.2634, "step": 2120 }, { "epoch": 1.37, "learning_rate": 1.1838910217020309e-05, "loss": 0.2603, "step": 2121 }, { "epoch": 1.37, "learning_rate": 1.1832036814712718e-05, "loss": 0.2703, "step": 2122 }, { "epoch": 1.38, "learning_rate": 1.1825162516708191e-05, "loss": 0.2567, "step": 2123 }, { "epoch": 1.38, "learning_rate": 1.181828732636763e-05, "loss": 0.2638, "step": 2124 }, { "epoch": 1.38, "learning_rate": 1.1811411247052362e-05, "loss": 0.2515, "step": 2125 }, { "epoch": 1.38, "learning_rate": 1.1804534282124167e-05, "loss": 0.2535, "step": 2126 }, { "epoch": 1.38, "learning_rate": 1.1797656434945237e-05, "loss": 0.2699, "step": 2127 }, { "epoch": 1.38, "learning_rate": 1.1790777708878207e-05, "loss": 0.2689, "step": 2128 }, { "epoch": 1.38, "learning_rate": 1.1783898107286139e-05, "loss": 0.25, "step": 2129 }, { "epoch": 1.38, "learning_rate": 1.1777017633532524e-05, "loss": 0.2613, "step": 2130 }, { "epoch": 1.38, "learning_rate": 1.1770136290981277e-05, "loss": 0.268, "step": 2131 }, { "epoch": 1.38, "learning_rate": 1.1763254082996742e-05, "loss": 0.2613, "step": 2132 }, { "epoch": 1.38, "learning_rate": 1.1756371012943684e-05, "loss": 0.2602, "step": 2133 }, { "epoch": 1.38, "learning_rate": 1.1749487084187285e-05, "loss": 0.2686, "step": 2134 }, { "epoch": 1.38, "learning_rate": 1.1742602300093154e-05, "loss": 0.2656, "step": 2135 }, { "epoch": 1.38, "learning_rate": 1.1735716664027314e-05, "loss": 0.2571, "step": 2136 }, { "epoch": 1.38, "learning_rate": 1.1728830179356208e-05, "loss": 0.2662, "step": 2137 }, { "epoch": 1.38, "learning_rate": 1.1721942849446686e-05, "loss": 0.2499, "step": 2138 }, { "epoch": 1.39, "learning_rate": 1.1715054677666023e-05, "loss": 0.2734, "step": 2139 }, { "epoch": 1.39, "learning_rate": 1.1708165667381892e-05, "loss": 0.269, "step": 2140 }, { "epoch": 1.39, "learning_rate": 1.1701275821962392e-05, "loss": 0.266, "step": 2141 }, { "epoch": 1.39, "learning_rate": 1.1694385144776015e-05, "loss": 0.2666, "step": 2142 }, { "epoch": 1.39, "learning_rate": 1.1687493639191672e-05, "loss": 0.2651, "step": 2143 }, { "epoch": 1.39, "learning_rate": 1.1680601308578667e-05, "loss": 0.2604, "step": 2144 }, { "epoch": 1.39, "learning_rate": 1.1673708156306715e-05, "loss": 0.2588, "step": 2145 }, { "epoch": 1.39, "learning_rate": 1.1666814185745936e-05, "loss": 0.2729, "step": 2146 }, { "epoch": 1.39, "learning_rate": 1.165991940026684e-05, "loss": 0.2714, "step": 2147 }, { "epoch": 1.39, "learning_rate": 1.1653023803240346e-05, "loss": 0.2638, "step": 2148 }, { "epoch": 1.39, "learning_rate": 1.1646127398037759e-05, "loss": 0.2542, "step": 2149 }, { "epoch": 1.39, "learning_rate": 1.1639230188030788e-05, "loss": 0.267, "step": 2150 }, { "epoch": 1.39, "learning_rate": 1.1632332176591529e-05, "loss": 0.2668, "step": 2151 }, { "epoch": 1.39, "learning_rate": 1.1625433367092481e-05, "loss": 0.2597, "step": 2152 }, { "epoch": 1.39, "learning_rate": 1.1618533762906515e-05, "loss": 0.27, "step": 2153 }, { "epoch": 1.4, "learning_rate": 1.1611633367406907e-05, "loss": 0.2665, "step": 2154 }, { "epoch": 1.4, "learning_rate": 1.1604732183967311e-05, "loss": 0.2588, "step": 2155 }, { "epoch": 1.4, "learning_rate": 1.159783021596177e-05, "loss": 0.2633, "step": 2156 }, { "epoch": 1.4, "learning_rate": 1.159092746676471e-05, "loss": 0.2538, "step": 2157 }, { "epoch": 1.4, "learning_rate": 1.1584023939750936e-05, "loss": 0.2616, "step": 2158 }, { "epoch": 1.4, "learning_rate": 1.1577119638295638e-05, "loss": 0.2597, "step": 2159 }, { "epoch": 1.4, "learning_rate": 1.157021456577438e-05, "loss": 0.264, "step": 2160 }, { "epoch": 1.4, "learning_rate": 1.1563308725563107e-05, "loss": 0.2654, "step": 2161 }, { "epoch": 1.4, "learning_rate": 1.1556402121038139e-05, "loss": 0.2522, "step": 2162 }, { "epoch": 1.4, "learning_rate": 1.1549494755576164e-05, "loss": 0.2738, "step": 2163 }, { "epoch": 1.4, "learning_rate": 1.1542586632554251e-05, "loss": 0.267, "step": 2164 }, { "epoch": 1.4, "learning_rate": 1.1535677755349832e-05, "loss": 0.2513, "step": 2165 }, { "epoch": 1.4, "learning_rate": 1.152876812734071e-05, "loss": 0.259, "step": 2166 }, { "epoch": 1.4, "learning_rate": 1.1521857751905057e-05, "loss": 0.2698, "step": 2167 }, { "epoch": 1.4, "learning_rate": 1.1514946632421408e-05, "loss": 0.2653, "step": 2168 }, { "epoch": 1.4, "learning_rate": 1.1508034772268661e-05, "loss": 0.27, "step": 2169 }, { "epoch": 1.41, "learning_rate": 1.150112217482608e-05, "loss": 0.2782, "step": 2170 }, { "epoch": 1.41, "learning_rate": 1.1494208843473284e-05, "loss": 0.2688, "step": 2171 }, { "epoch": 1.41, "learning_rate": 1.1487294781590261e-05, "loss": 0.2555, "step": 2172 }, { "epoch": 1.41, "learning_rate": 1.1480379992557343e-05, "loss": 0.2621, "step": 2173 }, { "epoch": 1.41, "learning_rate": 1.1473464479755226e-05, "loss": 0.263, "step": 2174 }, { "epoch": 1.41, "learning_rate": 1.1466548246564958e-05, "loss": 0.2747, "step": 2175 }, { "epoch": 1.41, "learning_rate": 1.1459631296367937e-05, "loss": 0.2718, "step": 2176 }, { "epoch": 1.41, "learning_rate": 1.1452713632545913e-05, "loss": 0.2702, "step": 2177 }, { "epoch": 1.41, "learning_rate": 1.1445795258480987e-05, "loss": 0.2627, "step": 2178 }, { "epoch": 1.41, "learning_rate": 1.1438876177555605e-05, "loss": 0.2737, "step": 2179 }, { "epoch": 1.41, "learning_rate": 1.143195639315256e-05, "loss": 0.2576, "step": 2180 }, { "epoch": 1.41, "learning_rate": 1.1425035908654983e-05, "loss": 0.2663, "step": 2181 }, { "epoch": 1.41, "learning_rate": 1.1418114727446359e-05, "loss": 0.2676, "step": 2182 }, { "epoch": 1.41, "learning_rate": 1.1411192852910504e-05, "loss": 0.2658, "step": 2183 }, { "epoch": 1.41, "learning_rate": 1.1404270288431576e-05, "loss": 0.2657, "step": 2184 }, { "epoch": 1.42, "learning_rate": 1.139734703739407e-05, "loss": 0.2554, "step": 2185 }, { "epoch": 1.42, "learning_rate": 1.1390423103182818e-05, "loss": 0.2488, "step": 2186 }, { "epoch": 1.42, "learning_rate": 1.1383498489182982e-05, "loss": 0.2676, "step": 2187 }, { "epoch": 1.42, "learning_rate": 1.137657319878006e-05, "loss": 0.2655, "step": 2188 }, { "epoch": 1.42, "learning_rate": 1.1369647235359884e-05, "loss": 0.2848, "step": 2189 }, { "epoch": 1.42, "learning_rate": 1.1362720602308605e-05, "loss": 0.2619, "step": 2190 }, { "epoch": 1.42, "learning_rate": 1.1355793303012713e-05, "loss": 0.2576, "step": 2191 }, { "epoch": 1.42, "learning_rate": 1.1348865340859012e-05, "loss": 0.27, "step": 2192 }, { "epoch": 1.42, "learning_rate": 1.1341936719234643e-05, "loss": 0.265, "step": 2193 }, { "epoch": 1.42, "learning_rate": 1.1335007441527055e-05, "loss": 0.2641, "step": 2194 }, { "epoch": 1.42, "learning_rate": 1.1328077511124032e-05, "loss": 0.2554, "step": 2195 }, { "epoch": 1.42, "learning_rate": 1.1321146931413666e-05, "loss": 0.2541, "step": 2196 }, { "epoch": 1.42, "learning_rate": 1.1314215705784374e-05, "loss": 0.2724, "step": 2197 }, { "epoch": 1.42, "learning_rate": 1.1307283837624886e-05, "loss": 0.2733, "step": 2198 }, { "epoch": 1.42, "learning_rate": 1.1300351330324239e-05, "loss": 0.2658, "step": 2199 }, { "epoch": 1.42, "learning_rate": 1.1293418187271799e-05, "loss": 0.2641, "step": 2200 }, { "epoch": 1.43, "learning_rate": 1.1286484411857223e-05, "loss": 0.2644, "step": 2201 }, { "epoch": 1.43, "learning_rate": 1.12795500074705e-05, "loss": 0.2711, "step": 2202 }, { "epoch": 1.43, "learning_rate": 1.12726149775019e-05, "loss": 0.2819, "step": 2203 }, { "epoch": 1.43, "learning_rate": 1.1265679325342025e-05, "loss": 0.2688, "step": 2204 }, { "epoch": 1.43, "learning_rate": 1.125874305438176e-05, "loss": 0.257, "step": 2205 }, { "epoch": 1.43, "learning_rate": 1.1251806168012305e-05, "loss": 0.2579, "step": 2206 }, { "epoch": 1.43, "learning_rate": 1.1244868669625158e-05, "loss": 0.2707, "step": 2207 }, { "epoch": 1.43, "learning_rate": 1.1237930562612111e-05, "loss": 0.2573, "step": 2208 }, { "epoch": 1.43, "learning_rate": 1.1230991850365265e-05, "loss": 0.2599, "step": 2209 }, { "epoch": 1.43, "learning_rate": 1.1224052536277006e-05, "loss": 0.2557, "step": 2210 }, { "epoch": 1.43, "learning_rate": 1.1217112623740017e-05, "loss": 0.2612, "step": 2211 }, { "epoch": 1.43, "learning_rate": 1.1210172116147275e-05, "loss": 0.2586, "step": 2212 }, { "epoch": 1.43, "learning_rate": 1.120323101689205e-05, "loss": 0.2717, "step": 2213 }, { "epoch": 1.43, "learning_rate": 1.1196289329367899e-05, "loss": 0.2498, "step": 2214 }, { "epoch": 1.43, "learning_rate": 1.1189347056968666e-05, "loss": 0.2612, "step": 2215 }, { "epoch": 1.44, "learning_rate": 1.1182404203088478e-05, "loss": 0.2608, "step": 2216 }, { "epoch": 1.44, "learning_rate": 1.1175460771121755e-05, "loss": 0.2738, "step": 2217 }, { "epoch": 1.44, "learning_rate": 1.1168516764463192e-05, "loss": 0.2533, "step": 2218 }, { "epoch": 1.44, "learning_rate": 1.1161572186507768e-05, "loss": 0.2528, "step": 2219 }, { "epoch": 1.44, "learning_rate": 1.1154627040650741e-05, "loss": 0.2629, "step": 2220 }, { "epoch": 1.44, "learning_rate": 1.1147681330287645e-05, "loss": 0.2543, "step": 2221 }, { "epoch": 1.44, "learning_rate": 1.114073505881429e-05, "loss": 0.265, "step": 2222 }, { "epoch": 1.44, "learning_rate": 1.1133788229626767e-05, "loss": 0.253, "step": 2223 }, { "epoch": 1.44, "learning_rate": 1.1126840846121432e-05, "loss": 0.2443, "step": 2224 }, { "epoch": 1.44, "learning_rate": 1.1119892911694912e-05, "loss": 0.2649, "step": 2225 }, { "epoch": 1.44, "learning_rate": 1.1112944429744107e-05, "loss": 0.2598, "step": 2226 }, { "epoch": 1.44, "learning_rate": 1.1105995403666185e-05, "loss": 0.2616, "step": 2227 }, { "epoch": 1.44, "learning_rate": 1.109904583685858e-05, "loss": 0.2656, "step": 2228 }, { "epoch": 1.44, "learning_rate": 1.1092095732718983e-05, "loss": 0.2657, "step": 2229 }, { "epoch": 1.44, "learning_rate": 1.1085145094645357e-05, "loss": 0.2603, "step": 2230 }, { "epoch": 1.44, "learning_rate": 1.1078193926035923e-05, "loss": 0.2646, "step": 2231 }, { "epoch": 1.45, "learning_rate": 1.1071242230289161e-05, "loss": 0.2718, "step": 2232 }, { "epoch": 1.45, "learning_rate": 1.1064290010803806e-05, "loss": 0.2666, "step": 2233 }, { "epoch": 1.45, "learning_rate": 1.1057337270978857e-05, "loss": 0.2657, "step": 2234 }, { "epoch": 1.45, "learning_rate": 1.1050384014213555e-05, "loss": 0.2587, "step": 2235 }, { "epoch": 1.45, "learning_rate": 1.1043430243907407e-05, "loss": 0.2629, "step": 2236 }, { "epoch": 1.45, "learning_rate": 1.1036475963460162e-05, "loss": 0.2669, "step": 2237 }, { "epoch": 1.45, "learning_rate": 1.1029521176271823e-05, "loss": 0.2643, "step": 2238 }, { "epoch": 1.45, "learning_rate": 1.1022565885742637e-05, "loss": 0.2655, "step": 2239 }, { "epoch": 1.45, "learning_rate": 1.1015610095273101e-05, "loss": 0.2498, "step": 2240 }, { "epoch": 1.45, "learning_rate": 1.1008653808263956e-05, "loss": 0.2599, "step": 2241 }, { "epoch": 1.45, "learning_rate": 1.1001697028116176e-05, "loss": 0.2659, "step": 2242 }, { "epoch": 1.45, "learning_rate": 1.0994739758230997e-05, "loss": 0.2677, "step": 2243 }, { "epoch": 1.45, "learning_rate": 1.0987782002009873e-05, "loss": 0.2593, "step": 2244 }, { "epoch": 1.45, "learning_rate": 1.098082376285451e-05, "loss": 0.2579, "step": 2245 }, { "epoch": 1.45, "learning_rate": 1.097386504416684e-05, "loss": 0.2581, "step": 2246 }, { "epoch": 1.46, "learning_rate": 1.0966905849349037e-05, "loss": 0.2565, "step": 2247 }, { "epoch": 1.46, "learning_rate": 1.0959946181803505e-05, "loss": 0.2641, "step": 2248 }, { "epoch": 1.46, "learning_rate": 1.0952986044932879e-05, "loss": 0.2673, "step": 2249 }, { "epoch": 1.46, "learning_rate": 1.0946025442140023e-05, "loss": 0.2576, "step": 2250 }, { "epoch": 1.46, "learning_rate": 1.0939064376828031e-05, "loss": 0.2624, "step": 2251 }, { "epoch": 1.46, "learning_rate": 1.0932102852400218e-05, "loss": 0.2678, "step": 2252 }, { "epoch": 1.46, "learning_rate": 1.0925140872260129e-05, "loss": 0.2628, "step": 2253 }, { "epoch": 1.46, "learning_rate": 1.0918178439811534e-05, "loss": 0.2705, "step": 2254 }, { "epoch": 1.46, "learning_rate": 1.0911215558458413e-05, "loss": 0.2695, "step": 2255 }, { "epoch": 1.46, "learning_rate": 1.0904252231604975e-05, "loss": 0.2479, "step": 2256 }, { "epoch": 1.46, "learning_rate": 1.0897288462655644e-05, "loss": 0.2608, "step": 2257 }, { "epoch": 1.46, "learning_rate": 1.0890324255015063e-05, "loss": 0.2567, "step": 2258 }, { "epoch": 1.46, "learning_rate": 1.0883359612088084e-05, "loss": 0.2512, "step": 2259 }, { "epoch": 1.46, "learning_rate": 1.0876394537279778e-05, "loss": 0.2631, "step": 2260 }, { "epoch": 1.46, "learning_rate": 1.086942903399542e-05, "loss": 0.2521, "step": 2261 }, { "epoch": 1.47, "learning_rate": 1.0862463105640494e-05, "loss": 0.2638, "step": 2262 }, { "epoch": 1.47, "learning_rate": 1.085549675562071e-05, "loss": 0.2657, "step": 2263 }, { "epoch": 1.47, "learning_rate": 1.0848529987341958e-05, "loss": 0.2702, "step": 2264 }, { "epoch": 1.47, "learning_rate": 1.0841562804210349e-05, "loss": 0.2476, "step": 2265 }, { "epoch": 1.47, "learning_rate": 1.0834595209632194e-05, "loss": 0.2715, "step": 2266 }, { "epoch": 1.47, "learning_rate": 1.0827627207014e-05, "loss": 0.2719, "step": 2267 }, { "epoch": 1.47, "learning_rate": 1.082065879976248e-05, "loss": 0.2528, "step": 2268 }, { "epoch": 1.47, "learning_rate": 1.0813689991284541e-05, "loss": 0.2767, "step": 2269 }, { "epoch": 1.47, "learning_rate": 1.0806720784987288e-05, "loss": 0.2679, "step": 2270 }, { "epoch": 1.47, "learning_rate": 1.079975118427802e-05, "loss": 0.2718, "step": 2271 }, { "epoch": 1.47, "learning_rate": 1.0792781192564227e-05, "loss": 0.2562, "step": 2272 }, { "epoch": 1.47, "learning_rate": 1.0785810813253595e-05, "loss": 0.2608, "step": 2273 }, { "epoch": 1.47, "learning_rate": 1.077884004975399e-05, "loss": 0.2373, "step": 2274 }, { "epoch": 1.47, "learning_rate": 1.077186890547348e-05, "loss": 0.2572, "step": 2275 }, { "epoch": 1.47, "learning_rate": 1.0764897383820306e-05, "loss": 0.2717, "step": 2276 }, { "epoch": 1.47, "learning_rate": 1.0757925488202898e-05, "loss": 0.2595, "step": 2277 }, { "epoch": 1.48, "learning_rate": 1.0750953222029874e-05, "loss": 0.255, "step": 2278 }, { "epoch": 1.48, "learning_rate": 1.0743980588710026e-05, "loss": 0.2575, "step": 2279 }, { "epoch": 1.48, "learning_rate": 1.0737007591652327e-05, "loss": 0.2742, "step": 2280 }, { "epoch": 1.48, "learning_rate": 1.0730034234265929e-05, "loss": 0.2539, "step": 2281 }, { "epoch": 1.48, "learning_rate": 1.0723060519960165e-05, "loss": 0.2549, "step": 2282 }, { "epoch": 1.48, "learning_rate": 1.071608645214453e-05, "loss": 0.2444, "step": 2283 }, { "epoch": 1.48, "learning_rate": 1.0709112034228708e-05, "loss": 0.2584, "step": 2284 }, { "epoch": 1.48, "learning_rate": 1.0702137269622539e-05, "loss": 0.2735, "step": 2285 }, { "epoch": 1.48, "learning_rate": 1.0695162161736044e-05, "loss": 0.2489, "step": 2286 }, { "epoch": 1.48, "learning_rate": 1.0688186713979405e-05, "loss": 0.2522, "step": 2287 }, { "epoch": 1.48, "learning_rate": 1.0681210929762971e-05, "loss": 0.2677, "step": 2288 }, { "epoch": 1.48, "learning_rate": 1.0674234812497257e-05, "loss": 0.2754, "step": 2289 }, { "epoch": 1.48, "learning_rate": 1.0667258365592943e-05, "loss": 0.2651, "step": 2290 }, { "epoch": 1.48, "learning_rate": 1.0660281592460866e-05, "loss": 0.2436, "step": 2291 }, { "epoch": 1.48, "learning_rate": 1.0653304496512024e-05, "loss": 0.2873, "step": 2292 }, { "epoch": 1.49, "learning_rate": 1.0646327081157575e-05, "loss": 0.2667, "step": 2293 }, { "epoch": 1.49, "learning_rate": 1.0639349349808828e-05, "loss": 0.2661, "step": 2294 }, { "epoch": 1.49, "learning_rate": 1.0632371305877252e-05, "loss": 0.2644, "step": 2295 }, { "epoch": 1.49, "learning_rate": 1.0625392952774465e-05, "loss": 0.2855, "step": 2296 }, { "epoch": 1.49, "learning_rate": 1.061841429391224e-05, "loss": 0.2596, "step": 2297 }, { "epoch": 1.49, "learning_rate": 1.0611435332702494e-05, "loss": 0.2757, "step": 2298 }, { "epoch": 1.49, "learning_rate": 1.0604456072557299e-05, "loss": 0.2537, "step": 2299 }, { "epoch": 1.49, "learning_rate": 1.0597476516888863e-05, "loss": 0.2731, "step": 2300 }, { "epoch": 1.49, "learning_rate": 1.0590496669109552e-05, "loss": 0.2563, "step": 2301 }, { "epoch": 1.49, "learning_rate": 1.0583516532631861e-05, "loss": 0.2577, "step": 2302 }, { "epoch": 1.49, "learning_rate": 1.0576536110868437e-05, "loss": 0.2626, "step": 2303 }, { "epoch": 1.49, "learning_rate": 1.0569555407232057e-05, "loss": 0.2634, "step": 2304 }, { "epoch": 1.49, "learning_rate": 1.0562574425135648e-05, "loss": 0.2664, "step": 2305 }, { "epoch": 1.49, "learning_rate": 1.0555593167992261e-05, "loss": 0.2769, "step": 2306 }, { "epoch": 1.49, "learning_rate": 1.054861163921509e-05, "loss": 0.2568, "step": 2307 }, { "epoch": 1.49, "learning_rate": 1.0541629842217453e-05, "loss": 0.252, "step": 2308 }, { "epoch": 1.5, "learning_rate": 1.053464778041281e-05, "loss": 0.2717, "step": 2309 }, { "epoch": 1.5, "learning_rate": 1.052766545721474e-05, "loss": 0.2565, "step": 2310 }, { "epoch": 1.5, "learning_rate": 1.0520682876036958e-05, "loss": 0.2548, "step": 2311 }, { "epoch": 1.5, "learning_rate": 1.0513700040293302e-05, "loss": 0.2713, "step": 2312 }, { "epoch": 1.5, "learning_rate": 1.0506716953397733e-05, "loss": 0.2563, "step": 2313 }, { "epoch": 1.5, "learning_rate": 1.0499733618764331e-05, "loss": 0.2688, "step": 2314 }, { "epoch": 1.5, "learning_rate": 1.0492750039807313e-05, "loss": 0.262, "step": 2315 }, { "epoch": 1.5, "learning_rate": 1.0485766219940998e-05, "loss": 0.2776, "step": 2316 }, { "epoch": 1.5, "learning_rate": 1.047878216257983e-05, "loss": 0.2583, "step": 2317 }, { "epoch": 1.5, "learning_rate": 1.0471797871138369e-05, "loss": 0.2705, "step": 2318 }, { "epoch": 1.5, "learning_rate": 1.0464813349031289e-05, "loss": 0.2629, "step": 2319 }, { "epoch": 1.5, "learning_rate": 1.0457828599673377e-05, "loss": 0.2603, "step": 2320 }, { "epoch": 1.5, "learning_rate": 1.045084362647953e-05, "loss": 0.2677, "step": 2321 }, { "epoch": 1.5, "learning_rate": 1.0443858432864755e-05, "loss": 0.245, "step": 2322 }, { "epoch": 1.5, "learning_rate": 1.0436873022244169e-05, "loss": 0.2686, "step": 2323 }, { "epoch": 1.51, "learning_rate": 1.042988739803299e-05, "loss": 0.2709, "step": 2324 }, { "epoch": 1.51, "learning_rate": 1.0422901563646546e-05, "loss": 0.2548, "step": 2325 }, { "epoch": 1.51, "learning_rate": 1.0415915522500267e-05, "loss": 0.2606, "step": 2326 }, { "epoch": 1.51, "learning_rate": 1.0408929278009678e-05, "loss": 0.2637, "step": 2327 }, { "epoch": 1.51, "learning_rate": 1.0401942833590413e-05, "loss": 0.253, "step": 2328 }, { "epoch": 1.51, "learning_rate": 1.0394956192658192e-05, "loss": 0.254, "step": 2329 }, { "epoch": 1.51, "learning_rate": 1.0387969358628844e-05, "loss": 0.2651, "step": 2330 }, { "epoch": 1.51, "learning_rate": 1.0380982334918284e-05, "loss": 0.268, "step": 2331 }, { "epoch": 1.51, "learning_rate": 1.037399512494252e-05, "loss": 0.2715, "step": 2332 }, { "epoch": 1.51, "learning_rate": 1.0367007732117653e-05, "loss": 0.2515, "step": 2333 }, { "epoch": 1.51, "learning_rate": 1.0360020159859876e-05, "loss": 0.2577, "step": 2334 }, { "epoch": 1.51, "learning_rate": 1.0353032411585464e-05, "loss": 0.2494, "step": 2335 }, { "epoch": 1.51, "learning_rate": 1.0346044490710783e-05, "loss": 0.2709, "step": 2336 }, { "epoch": 1.51, "learning_rate": 1.0339056400652281e-05, "loss": 0.2615, "step": 2337 }, { "epoch": 1.51, "learning_rate": 1.0332068144826488e-05, "loss": 0.2771, "step": 2338 }, { "epoch": 1.51, "learning_rate": 1.0325079726650017e-05, "loss": 0.2755, "step": 2339 }, { "epoch": 1.52, "learning_rate": 1.031809114953956e-05, "loss": 0.2781, "step": 2340 }, { "epoch": 1.52, "learning_rate": 1.0311102416911887e-05, "loss": 0.2549, "step": 2341 }, { "epoch": 1.52, "learning_rate": 1.0304113532183842e-05, "loss": 0.266, "step": 2342 }, { "epoch": 1.52, "learning_rate": 1.0297124498772345e-05, "loss": 0.2656, "step": 2343 }, { "epoch": 1.52, "learning_rate": 1.0290135320094385e-05, "loss": 0.2553, "step": 2344 }, { "epoch": 1.52, "learning_rate": 1.028314599956703e-05, "loss": 0.2569, "step": 2345 }, { "epoch": 1.52, "learning_rate": 1.0276156540607411e-05, "loss": 0.2685, "step": 2346 }, { "epoch": 1.52, "learning_rate": 1.0269166946632731e-05, "loss": 0.2747, "step": 2347 }, { "epoch": 1.52, "learning_rate": 1.0262177221060255e-05, "loss": 0.2702, "step": 2348 }, { "epoch": 1.52, "learning_rate": 1.025518736730731e-05, "loss": 0.2635, "step": 2349 }, { "epoch": 1.52, "learning_rate": 1.0248197388791294e-05, "loss": 0.2714, "step": 2350 }, { "epoch": 1.52, "learning_rate": 1.024120728892966e-05, "loss": 0.2331, "step": 2351 }, { "epoch": 1.52, "learning_rate": 1.023421707113992e-05, "loss": 0.2547, "step": 2352 }, { "epoch": 1.52, "learning_rate": 1.0227226738839649e-05, "loss": 0.2487, "step": 2353 }, { "epoch": 1.52, "learning_rate": 1.022023629544647e-05, "loss": 0.2775, "step": 2354 }, { "epoch": 1.53, "learning_rate": 1.0213245744378069e-05, "loss": 0.2593, "step": 2355 }, { "epoch": 1.53, "learning_rate": 1.0206255089052176e-05, "loss": 0.2549, "step": 2356 }, { "epoch": 1.53, "learning_rate": 1.0199264332886583e-05, "loss": 0.2469, "step": 2357 }, { "epoch": 1.53, "learning_rate": 1.0192273479299118e-05, "loss": 0.2735, "step": 2358 }, { "epoch": 1.53, "learning_rate": 1.0185282531707663e-05, "loss": 0.2592, "step": 2359 }, { "epoch": 1.53, "learning_rate": 1.017829149353015e-05, "loss": 0.2491, "step": 2360 }, { "epoch": 1.53, "learning_rate": 1.0171300368184552e-05, "loss": 0.2555, "step": 2361 }, { "epoch": 1.53, "learning_rate": 1.0164309159088881e-05, "loss": 0.2581, "step": 2362 }, { "epoch": 1.53, "learning_rate": 1.0157317869661194e-05, "loss": 0.2718, "step": 2363 }, { "epoch": 1.53, "learning_rate": 1.0150326503319589e-05, "loss": 0.2741, "step": 2364 }, { "epoch": 1.53, "learning_rate": 1.0143335063482196e-05, "loss": 0.2659, "step": 2365 }, { "epoch": 1.53, "learning_rate": 1.0136343553567186e-05, "loss": 0.2746, "step": 2366 }, { "epoch": 1.53, "learning_rate": 1.0129351976992756e-05, "loss": 0.2586, "step": 2367 }, { "epoch": 1.53, "learning_rate": 1.0122360337177149e-05, "loss": 0.2718, "step": 2368 }, { "epoch": 1.53, "learning_rate": 1.011536863753863e-05, "loss": 0.2559, "step": 2369 }, { "epoch": 1.53, "learning_rate": 1.0108376881495495e-05, "loss": 0.2579, "step": 2370 }, { "epoch": 1.54, "learning_rate": 1.0101385072466066e-05, "loss": 0.2617, "step": 2371 }, { "epoch": 1.54, "learning_rate": 1.0094393213868693e-05, "loss": 0.2721, "step": 2372 }, { "epoch": 1.54, "learning_rate": 1.0087401309121747e-05, "loss": 0.258, "step": 2373 }, { "epoch": 1.54, "learning_rate": 1.0080409361643628e-05, "loss": 0.2499, "step": 2374 }, { "epoch": 1.54, "learning_rate": 1.0073417374852753e-05, "loss": 0.2642, "step": 2375 }, { "epoch": 1.54, "learning_rate": 1.0066425352167555e-05, "loss": 0.2414, "step": 2376 }, { "epoch": 1.54, "learning_rate": 1.0059433297006491e-05, "loss": 0.2696, "step": 2377 }, { "epoch": 1.54, "learning_rate": 1.0052441212788032e-05, "loss": 0.261, "step": 2378 }, { "epoch": 1.54, "learning_rate": 1.0045449102930655e-05, "loss": 0.2639, "step": 2379 }, { "epoch": 1.54, "learning_rate": 1.0038456970852864e-05, "loss": 0.2628, "step": 2380 }, { "epoch": 1.54, "learning_rate": 1.0031464819973162e-05, "loss": 0.2682, "step": 2381 }, { "epoch": 1.54, "learning_rate": 1.0024472653710066e-05, "loss": 0.2595, "step": 2382 }, { "epoch": 1.54, "learning_rate": 1.0017480475482105e-05, "loss": 0.2568, "step": 2383 }, { "epoch": 1.54, "learning_rate": 1.0010488288707801e-05, "loss": 0.2683, "step": 2384 }, { "epoch": 1.54, "learning_rate": 1.0003496096805692e-05, "loss": 0.2732, "step": 2385 }, { "epoch": 1.55, "learning_rate": 9.996503903194313e-06, "loss": 0.2618, "step": 2386 }, { "epoch": 1.55, "learning_rate": 9.989511711292202e-06, "loss": 0.2575, "step": 2387 }, { "epoch": 1.55, "learning_rate": 9.982519524517902e-06, "loss": 0.2415, "step": 2388 }, { "epoch": 1.55, "learning_rate": 9.975527346289935e-06, "loss": 0.2601, "step": 2389 }, { "epoch": 1.55, "learning_rate": 9.968535180026843e-06, "loss": 0.2616, "step": 2390 }, { "epoch": 1.55, "learning_rate": 9.96154302914714e-06, "loss": 0.2728, "step": 2391 }, { "epoch": 1.55, "learning_rate": 9.95455089706935e-06, "loss": 0.2612, "step": 2392 }, { "epoch": 1.55, "learning_rate": 9.947558787211973e-06, "loss": 0.2821, "step": 2393 }, { "epoch": 1.55, "learning_rate": 9.940566702993512e-06, "loss": 0.263, "step": 2394 }, { "epoch": 1.55, "learning_rate": 9.933574647832446e-06, "loss": 0.2479, "step": 2395 }, { "epoch": 1.55, "learning_rate": 9.926582625147249e-06, "loss": 0.2518, "step": 2396 }, { "epoch": 1.55, "learning_rate": 9.919590638356374e-06, "loss": 0.2778, "step": 2397 }, { "epoch": 1.55, "learning_rate": 9.912598690878253e-06, "loss": 0.2788, "step": 2398 }, { "epoch": 1.55, "learning_rate": 9.905606786131312e-06, "loss": 0.2667, "step": 2399 }, { "epoch": 1.55, "learning_rate": 9.898614927533936e-06, "loss": 0.254, "step": 2400 }, { "epoch": 1.56, "learning_rate": 9.891623118504509e-06, "loss": 0.266, "step": 2401 }, { "epoch": 1.56, "learning_rate": 9.884631362461371e-06, "loss": 0.2759, "step": 2402 }, { "epoch": 1.56, "learning_rate": 9.877639662822853e-06, "loss": 0.2546, "step": 2403 }, { "epoch": 1.56, "learning_rate": 9.870648023007246e-06, "loss": 0.2612, "step": 2404 }, { "epoch": 1.56, "learning_rate": 9.86365644643282e-06, "loss": 0.268, "step": 2405 }, { "epoch": 1.56, "learning_rate": 9.856664936517807e-06, "loss": 0.2702, "step": 2406 }, { "epoch": 1.56, "learning_rate": 9.849673496680414e-06, "loss": 0.2483, "step": 2407 }, { "epoch": 1.56, "learning_rate": 9.842682130338808e-06, "loss": 0.2636, "step": 2408 }, { "epoch": 1.56, "learning_rate": 9.835690840911122e-06, "loss": 0.2553, "step": 2409 }, { "epoch": 1.56, "learning_rate": 9.828699631815452e-06, "loss": 0.2646, "step": 2410 }, { "epoch": 1.56, "learning_rate": 9.821708506469854e-06, "loss": 0.2669, "step": 2411 }, { "epoch": 1.56, "learning_rate": 9.81471746829234e-06, "loss": 0.2594, "step": 2412 }, { "epoch": 1.56, "learning_rate": 9.807726520700889e-06, "loss": 0.2902, "step": 2413 }, { "epoch": 1.56, "learning_rate": 9.800735667113422e-06, "loss": 0.27, "step": 2414 }, { "epoch": 1.56, "learning_rate": 9.793744910947829e-06, "loss": 0.2618, "step": 2415 }, { "epoch": 1.56, "learning_rate": 9.786754255621935e-06, "loss": 0.2589, "step": 2416 }, { "epoch": 1.57, "learning_rate": 9.779763704553531e-06, "loss": 0.2544, "step": 2417 }, { "epoch": 1.57, "learning_rate": 9.772773261160354e-06, "loss": 0.2565, "step": 2418 }, { "epoch": 1.57, "learning_rate": 9.76578292886008e-06, "loss": 0.2648, "step": 2419 }, { "epoch": 1.57, "learning_rate": 9.758792711070344e-06, "loss": 0.265, "step": 2420 }, { "epoch": 1.57, "learning_rate": 9.751802611208708e-06, "loss": 0.2677, "step": 2421 }, { "epoch": 1.57, "learning_rate": 9.744812632692693e-06, "loss": 0.2533, "step": 2422 }, { "epoch": 1.57, "learning_rate": 9.737822778939747e-06, "loss": 0.255, "step": 2423 }, { "epoch": 1.57, "learning_rate": 9.73083305336727e-06, "loss": 0.276, "step": 2424 }, { "epoch": 1.57, "learning_rate": 9.723843459392589e-06, "loss": 0.2756, "step": 2425 }, { "epoch": 1.57, "learning_rate": 9.716854000432974e-06, "loss": 0.2643, "step": 2426 }, { "epoch": 1.57, "learning_rate": 9.709864679905617e-06, "loss": 0.2512, "step": 2427 }, { "epoch": 1.57, "learning_rate": 9.702875501227662e-06, "loss": 0.2869, "step": 2428 }, { "epoch": 1.57, "learning_rate": 9.695886467816163e-06, "loss": 0.2718, "step": 2429 }, { "epoch": 1.57, "learning_rate": 9.688897583088118e-06, "loss": 0.2707, "step": 2430 }, { "epoch": 1.57, "learning_rate": 9.681908850460444e-06, "loss": 0.2675, "step": 2431 }, { "epoch": 1.58, "learning_rate": 9.674920273349988e-06, "loss": 0.2757, "step": 2432 }, { "epoch": 1.58, "learning_rate": 9.667931855173516e-06, "loss": 0.2547, "step": 2433 }, { "epoch": 1.58, "learning_rate": 9.660943599347725e-06, "loss": 0.2744, "step": 2434 }, { "epoch": 1.58, "learning_rate": 9.65395550928922e-06, "loss": 0.2535, "step": 2435 }, { "epoch": 1.58, "learning_rate": 9.646967588414541e-06, "loss": 0.2545, "step": 2436 }, { "epoch": 1.58, "learning_rate": 9.639979840140129e-06, "loss": 0.2476, "step": 2437 }, { "epoch": 1.58, "learning_rate": 9.632992267882349e-06, "loss": 0.2363, "step": 2438 }, { "epoch": 1.58, "learning_rate": 9.626004875057485e-06, "loss": 0.2627, "step": 2439 }, { "epoch": 1.58, "learning_rate": 9.619017665081717e-06, "loss": 0.2534, "step": 2440 }, { "epoch": 1.58, "learning_rate": 9.612030641371158e-06, "loss": 0.2731, "step": 2441 }, { "epoch": 1.58, "learning_rate": 9.605043807341808e-06, "loss": 0.2652, "step": 2442 }, { "epoch": 1.58, "learning_rate": 9.598057166409592e-06, "loss": 0.2762, "step": 2443 }, { "epoch": 1.58, "learning_rate": 9.591070721990322e-06, "loss": 0.2662, "step": 2444 }, { "epoch": 1.58, "learning_rate": 9.584084477499736e-06, "loss": 0.2669, "step": 2445 }, { "epoch": 1.58, "learning_rate": 9.577098436353454e-06, "loss": 0.2666, "step": 2446 }, { "epoch": 1.58, "learning_rate": 9.570112601967013e-06, "loss": 0.2515, "step": 2447 }, { "epoch": 1.59, "learning_rate": 9.563126977755833e-06, "loss": 0.2555, "step": 2448 }, { "epoch": 1.59, "learning_rate": 9.55614156713525e-06, "loss": 0.2704, "step": 2449 }, { "epoch": 1.59, "learning_rate": 9.549156373520472e-06, "loss": 0.2757, "step": 2450 }, { "epoch": 1.59, "learning_rate": 9.542171400326626e-06, "loss": 0.2609, "step": 2451 }, { "epoch": 1.59, "learning_rate": 9.535186650968715e-06, "loss": 0.2678, "step": 2452 }, { "epoch": 1.59, "learning_rate": 9.528202128861633e-06, "loss": 0.2477, "step": 2453 }, { "epoch": 1.59, "learning_rate": 9.521217837420173e-06, "loss": 0.269, "step": 2454 }, { "epoch": 1.59, "learning_rate": 9.514233780059004e-06, "loss": 0.2571, "step": 2455 }, { "epoch": 1.59, "learning_rate": 9.50724996019269e-06, "loss": 0.2765, "step": 2456 }, { "epoch": 1.59, "learning_rate": 9.50026638123567e-06, "loss": 0.2606, "step": 2457 }, { "epoch": 1.59, "learning_rate": 9.49328304660227e-06, "loss": 0.2636, "step": 2458 }, { "epoch": 1.59, "learning_rate": 9.486299959706701e-06, "loss": 0.2535, "step": 2459 }, { "epoch": 1.59, "learning_rate": 9.479317123963044e-06, "loss": 0.2794, "step": 2460 }, { "epoch": 1.59, "learning_rate": 9.472334542785262e-06, "loss": 0.2552, "step": 2461 }, { "epoch": 1.59, "learning_rate": 9.465352219587194e-06, "loss": 0.2662, "step": 2462 }, { "epoch": 1.6, "learning_rate": 9.45837015778255e-06, "loss": 0.2603, "step": 2463 }, { "epoch": 1.6, "learning_rate": 9.451388360784913e-06, "loss": 0.2632, "step": 2464 }, { "epoch": 1.6, "learning_rate": 9.444406832007738e-06, "loss": 0.2533, "step": 2465 }, { "epoch": 1.6, "learning_rate": 9.437425574864355e-06, "loss": 0.2681, "step": 2466 }, { "epoch": 1.6, "learning_rate": 9.430444592767943e-06, "loss": 0.2777, "step": 2467 }, { "epoch": 1.6, "learning_rate": 9.423463889131568e-06, "loss": 0.2716, "step": 2468 }, { "epoch": 1.6, "learning_rate": 9.416483467368142e-06, "loss": 0.2499, "step": 2469 }, { "epoch": 1.6, "learning_rate": 9.409503330890453e-06, "loss": 0.2679, "step": 2470 }, { "epoch": 1.6, "learning_rate": 9.402523483111138e-06, "loss": 0.2545, "step": 2471 }, { "epoch": 1.6, "learning_rate": 9.395543927442708e-06, "loss": 0.2453, "step": 2472 }, { "epoch": 1.6, "learning_rate": 9.388564667297508e-06, "loss": 0.2839, "step": 2473 }, { "epoch": 1.6, "learning_rate": 9.381585706087765e-06, "loss": 0.2626, "step": 2474 }, { "epoch": 1.6, "learning_rate": 9.374607047225537e-06, "loss": 0.2597, "step": 2475 }, { "epoch": 1.6, "learning_rate": 9.367628694122753e-06, "loss": 0.2627, "step": 2476 }, { "epoch": 1.6, "learning_rate": 9.360650650191174e-06, "loss": 0.273, "step": 2477 }, { "epoch": 1.6, "learning_rate": 9.353672918842425e-06, "loss": 0.257, "step": 2478 }, { "epoch": 1.61, "learning_rate": 9.346695503487977e-06, "loss": 0.2506, "step": 2479 }, { "epoch": 1.61, "learning_rate": 9.339718407539134e-06, "loss": 0.2556, "step": 2480 }, { "epoch": 1.61, "learning_rate": 9.332741634407059e-06, "loss": 0.2454, "step": 2481 }, { "epoch": 1.61, "learning_rate": 9.325765187502743e-06, "loss": 0.2691, "step": 2482 }, { "epoch": 1.61, "learning_rate": 9.318789070237032e-06, "loss": 0.2623, "step": 2483 }, { "epoch": 1.61, "learning_rate": 9.311813286020597e-06, "loss": 0.2545, "step": 2484 }, { "epoch": 1.61, "learning_rate": 9.30483783826396e-06, "loss": 0.26, "step": 2485 }, { "epoch": 1.61, "learning_rate": 9.297862730377461e-06, "loss": 0.2417, "step": 2486 }, { "epoch": 1.61, "learning_rate": 9.290887965771295e-06, "loss": 0.2697, "step": 2487 }, { "epoch": 1.61, "learning_rate": 9.28391354785547e-06, "loss": 0.2618, "step": 2488 }, { "epoch": 1.61, "learning_rate": 9.27693948003984e-06, "loss": 0.2667, "step": 2489 }, { "epoch": 1.61, "learning_rate": 9.269965765734073e-06, "loss": 0.2639, "step": 2490 }, { "epoch": 1.61, "learning_rate": 9.262992408347678e-06, "loss": 0.2646, "step": 2491 }, { "epoch": 1.61, "learning_rate": 9.256019411289978e-06, "loss": 0.2569, "step": 2492 }, { "epoch": 1.61, "learning_rate": 9.249046777970131e-06, "loss": 0.2696, "step": 2493 }, { "epoch": 1.62, "learning_rate": 9.242074511797103e-06, "loss": 0.2721, "step": 2494 }, { "epoch": 1.62, "learning_rate": 9.235102616179699e-06, "loss": 0.2652, "step": 2495 }, { "epoch": 1.62, "learning_rate": 9.228131094526523e-06, "loss": 0.2566, "step": 2496 }, { "epoch": 1.62, "learning_rate": 9.221159950246011e-06, "loss": 0.2559, "step": 2497 }, { "epoch": 1.62, "learning_rate": 9.214189186746408e-06, "loss": 0.2639, "step": 2498 }, { "epoch": 1.62, "learning_rate": 9.207218807435773e-06, "loss": 0.2475, "step": 2499 }, { "epoch": 1.62, "learning_rate": 9.200248815721983e-06, "loss": 0.2813, "step": 2500 }, { "epoch": 1.62, "learning_rate": 9.193279215012713e-06, "loss": 0.2631, "step": 2501 }, { "epoch": 1.62, "learning_rate": 9.186310008715462e-06, "loss": 0.2658, "step": 2502 }, { "epoch": 1.62, "learning_rate": 9.179341200237522e-06, "loss": 0.2581, "step": 2503 }, { "epoch": 1.62, "learning_rate": 9.172372792986001e-06, "loss": 0.2711, "step": 2504 }, { "epoch": 1.62, "learning_rate": 9.16540479036781e-06, "loss": 0.2592, "step": 2505 }, { "epoch": 1.62, "learning_rate": 9.158437195789653e-06, "loss": 0.2576, "step": 2506 }, { "epoch": 1.62, "learning_rate": 9.151470012658045e-06, "loss": 0.2495, "step": 2507 }, { "epoch": 1.62, "learning_rate": 9.144503244379295e-06, "loss": 0.2567, "step": 2508 }, { "epoch": 1.62, "learning_rate": 9.137536894359507e-06, "loss": 0.2632, "step": 2509 }, { "epoch": 1.63, "learning_rate": 9.130570966004586e-06, "loss": 0.2815, "step": 2510 }, { "epoch": 1.63, "learning_rate": 9.123605462720225e-06, "loss": 0.2549, "step": 2511 }, { "epoch": 1.63, "learning_rate": 9.11664038791192e-06, "loss": 0.2483, "step": 2512 }, { "epoch": 1.63, "learning_rate": 9.109675744984939e-06, "loss": 0.2579, "step": 2513 }, { "epoch": 1.63, "learning_rate": 9.10271153734436e-06, "loss": 0.2745, "step": 2514 }, { "epoch": 1.63, "learning_rate": 9.095747768395029e-06, "loss": 0.2582, "step": 2515 }, { "epoch": 1.63, "learning_rate": 9.088784441541594e-06, "loss": 0.2615, "step": 2516 }, { "epoch": 1.63, "learning_rate": 9.081821560188471e-06, "loss": 0.2637, "step": 2517 }, { "epoch": 1.63, "learning_rate": 9.074859127739876e-06, "loss": 0.2709, "step": 2518 }, { "epoch": 1.63, "learning_rate": 9.067897147599786e-06, "loss": 0.2719, "step": 2519 }, { "epoch": 1.63, "learning_rate": 9.06093562317197e-06, "loss": 0.2641, "step": 2520 }, { "epoch": 1.63, "learning_rate": 9.05397455785998e-06, "loss": 0.2766, "step": 2521 }, { "epoch": 1.63, "learning_rate": 9.047013955067121e-06, "loss": 0.2554, "step": 2522 }, { "epoch": 1.63, "learning_rate": 9.040053818196498e-06, "loss": 0.2474, "step": 2523 }, { "epoch": 1.63, "learning_rate": 9.033094150650964e-06, "loss": 0.2654, "step": 2524 }, { "epoch": 1.64, "learning_rate": 9.026134955833163e-06, "loss": 0.2685, "step": 2525 }, { "epoch": 1.64, "learning_rate": 9.019176237145492e-06, "loss": 0.2536, "step": 2526 }, { "epoch": 1.64, "learning_rate": 9.012217997990129e-06, "loss": 0.2707, "step": 2527 }, { "epoch": 1.64, "learning_rate": 9.005260241769004e-06, "loss": 0.2508, "step": 2528 }, { "epoch": 1.64, "learning_rate": 8.998302971883825e-06, "loss": 0.2562, "step": 2529 }, { "epoch": 1.64, "learning_rate": 8.991346191736047e-06, "loss": 0.2643, "step": 2530 }, { "epoch": 1.64, "learning_rate": 8.984389904726904e-06, "loss": 0.2398, "step": 2531 }, { "epoch": 1.64, "learning_rate": 8.977434114257365e-06, "loss": 0.2585, "step": 2532 }, { "epoch": 1.64, "learning_rate": 8.970478823728182e-06, "loss": 0.2624, "step": 2533 }, { "epoch": 1.64, "learning_rate": 8.96352403653984e-06, "loss": 0.2443, "step": 2534 }, { "epoch": 1.64, "learning_rate": 8.956569756092597e-06, "loss": 0.2629, "step": 2535 }, { "epoch": 1.64, "learning_rate": 8.949615985786447e-06, "loss": 0.2739, "step": 2536 }, { "epoch": 1.64, "learning_rate": 8.942662729021148e-06, "loss": 0.2617, "step": 2537 }, { "epoch": 1.64, "learning_rate": 8.935709989196195e-06, "loss": 0.259, "step": 2538 }, { "epoch": 1.64, "learning_rate": 8.928757769710844e-06, "loss": 0.2797, "step": 2539 }, { "epoch": 1.65, "learning_rate": 8.921806073964079e-06, "loss": 0.2518, "step": 2540 }, { "epoch": 1.65, "learning_rate": 8.914854905354643e-06, "loss": 0.2635, "step": 2541 }, { "epoch": 1.65, "learning_rate": 8.90790426728102e-06, "loss": 0.2764, "step": 2542 }, { "epoch": 1.65, "learning_rate": 8.900954163141421e-06, "loss": 0.2675, "step": 2543 }, { "epoch": 1.65, "learning_rate": 8.894004596333817e-06, "loss": 0.2648, "step": 2544 }, { "epoch": 1.65, "learning_rate": 8.887055570255892e-06, "loss": 0.2622, "step": 2545 }, { "epoch": 1.65, "learning_rate": 8.880107088305091e-06, "loss": 0.2597, "step": 2546 }, { "epoch": 1.65, "learning_rate": 8.87315915387857e-06, "loss": 0.2562, "step": 2547 }, { "epoch": 1.65, "learning_rate": 8.866211770373237e-06, "loss": 0.2711, "step": 2548 }, { "epoch": 1.65, "learning_rate": 8.859264941185711e-06, "loss": 0.2809, "step": 2549 }, { "epoch": 1.65, "learning_rate": 8.852318669712357e-06, "loss": 0.2487, "step": 2550 }, { "epoch": 1.65, "learning_rate": 8.845372959349262e-06, "loss": 0.2885, "step": 2551 }, { "epoch": 1.65, "learning_rate": 8.838427813492235e-06, "loss": 0.2704, "step": 2552 }, { "epoch": 1.65, "learning_rate": 8.83148323553681e-06, "loss": 0.2671, "step": 2553 }, { "epoch": 1.65, "learning_rate": 8.824539228878248e-06, "loss": 0.2587, "step": 2554 }, { "epoch": 1.65, "learning_rate": 8.817595796911524e-06, "loss": 0.2481, "step": 2555 }, { "epoch": 1.66, "learning_rate": 8.810652943031339e-06, "loss": 0.2554, "step": 2556 }, { "epoch": 1.66, "learning_rate": 8.803710670632103e-06, "loss": 0.2624, "step": 2557 }, { "epoch": 1.66, "learning_rate": 8.796768983107954e-06, "loss": 0.2525, "step": 2558 }, { "epoch": 1.66, "learning_rate": 8.789827883852728e-06, "loss": 0.2489, "step": 2559 }, { "epoch": 1.66, "learning_rate": 8.782887376259987e-06, "loss": 0.2599, "step": 2560 }, { "epoch": 1.66, "learning_rate": 8.775947463722999e-06, "loss": 0.2611, "step": 2561 }, { "epoch": 1.66, "learning_rate": 8.769008149634735e-06, "loss": 0.2534, "step": 2562 }, { "epoch": 1.66, "learning_rate": 8.76206943738789e-06, "loss": 0.272, "step": 2563 }, { "epoch": 1.66, "learning_rate": 8.755131330374844e-06, "loss": 0.2625, "step": 2564 }, { "epoch": 1.66, "learning_rate": 8.748193831987699e-06, "loss": 0.2538, "step": 2565 }, { "epoch": 1.66, "learning_rate": 8.741256945618241e-06, "loss": 0.2554, "step": 2566 }, { "epoch": 1.66, "learning_rate": 8.734320674657978e-06, "loss": 0.2578, "step": 2567 }, { "epoch": 1.66, "learning_rate": 8.7273850224981e-06, "loss": 0.2598, "step": 2568 }, { "epoch": 1.66, "learning_rate": 8.720449992529505e-06, "loss": 0.2574, "step": 2569 }, { "epoch": 1.66, "learning_rate": 8.713515588142777e-06, "loss": 0.254, "step": 2570 }, { "epoch": 1.67, "learning_rate": 8.706581812728206e-06, "loss": 0.2556, "step": 2571 }, { "epoch": 1.67, "learning_rate": 8.699648669675763e-06, "loss": 0.2628, "step": 2572 }, { "epoch": 1.67, "learning_rate": 8.692716162375121e-06, "loss": 0.2788, "step": 2573 }, { "epoch": 1.67, "learning_rate": 8.685784294215627e-06, "loss": 0.2582, "step": 2574 }, { "epoch": 1.67, "learning_rate": 8.678853068586339e-06, "loss": 0.2677, "step": 2575 }, { "epoch": 1.67, "learning_rate": 8.67192248887597e-06, "loss": 0.2553, "step": 2576 }, { "epoch": 1.67, "learning_rate": 8.66499255847295e-06, "loss": 0.2527, "step": 2577 }, { "epoch": 1.67, "learning_rate": 8.65806328076536e-06, "loss": 0.263, "step": 2578 }, { "epoch": 1.67, "learning_rate": 8.651134659140993e-06, "loss": 0.2734, "step": 2579 }, { "epoch": 1.67, "learning_rate": 8.64420669698729e-06, "loss": 0.2619, "step": 2580 }, { "epoch": 1.67, "learning_rate": 8.637279397691395e-06, "loss": 0.2643, "step": 2581 }, { "epoch": 1.67, "learning_rate": 8.63035276464012e-06, "loss": 0.2693, "step": 2582 }, { "epoch": 1.67, "learning_rate": 8.62342680121994e-06, "loss": 0.2483, "step": 2583 }, { "epoch": 1.67, "learning_rate": 8.616501510817021e-06, "loss": 0.2601, "step": 2584 }, { "epoch": 1.67, "learning_rate": 8.609576896817184e-06, "loss": 0.2596, "step": 2585 }, { "epoch": 1.67, "learning_rate": 8.602652962605933e-06, "loss": 0.2525, "step": 2586 }, { "epoch": 1.68, "learning_rate": 8.595729711568424e-06, "loss": 0.2556, "step": 2587 }, { "epoch": 1.68, "learning_rate": 8.588807147089498e-06, "loss": 0.2796, "step": 2588 }, { "epoch": 1.68, "learning_rate": 8.581885272553641e-06, "loss": 0.2763, "step": 2589 }, { "epoch": 1.68, "learning_rate": 8.574964091345019e-06, "loss": 0.2417, "step": 2590 }, { "epoch": 1.68, "learning_rate": 8.568043606847444e-06, "loss": 0.2758, "step": 2591 }, { "epoch": 1.68, "learning_rate": 8.561123822444399e-06, "loss": 0.2632, "step": 2592 }, { "epoch": 1.68, "learning_rate": 8.554204741519015e-06, "loss": 0.2567, "step": 2593 }, { "epoch": 1.68, "learning_rate": 8.547286367454089e-06, "loss": 0.2728, "step": 2594 }, { "epoch": 1.68, "learning_rate": 8.540368703632066e-06, "loss": 0.2552, "step": 2595 }, { "epoch": 1.68, "learning_rate": 8.533451753435046e-06, "loss": 0.2433, "step": 2596 }, { "epoch": 1.68, "learning_rate": 8.526535520244776e-06, "loss": 0.2622, "step": 2597 }, { "epoch": 1.68, "learning_rate": 8.519620007442659e-06, "loss": 0.2657, "step": 2598 }, { "epoch": 1.68, "learning_rate": 8.512705218409742e-06, "loss": 0.257, "step": 2599 }, { "epoch": 1.68, "learning_rate": 8.505791156526718e-06, "loss": 0.2542, "step": 2600 }, { "epoch": 1.68, "learning_rate": 8.498877825173922e-06, "loss": 0.2667, "step": 2601 }, { "epoch": 1.69, "learning_rate": 8.491965227731342e-06, "loss": 0.272, "step": 2602 }, { "epoch": 1.69, "learning_rate": 8.485053367578595e-06, "loss": 0.2682, "step": 2603 }, { "epoch": 1.69, "learning_rate": 8.478142248094947e-06, "loss": 0.2703, "step": 2604 }, { "epoch": 1.69, "learning_rate": 8.471231872659293e-06, "loss": 0.2592, "step": 2605 }, { "epoch": 1.69, "learning_rate": 8.464322244650171e-06, "loss": 0.268, "step": 2606 }, { "epoch": 1.69, "learning_rate": 8.45741336744575e-06, "loss": 0.2611, "step": 2607 }, { "epoch": 1.69, "learning_rate": 8.450505244423834e-06, "loss": 0.2657, "step": 2608 }, { "epoch": 1.69, "learning_rate": 8.443597878961864e-06, "loss": 0.2544, "step": 2609 }, { "epoch": 1.69, "learning_rate": 8.436691274436891e-06, "loss": 0.2777, "step": 2610 }, { "epoch": 1.69, "learning_rate": 8.429785434225624e-06, "loss": 0.261, "step": 2611 }, { "epoch": 1.69, "learning_rate": 8.422880361704363e-06, "loss": 0.2771, "step": 2612 }, { "epoch": 1.69, "learning_rate": 8.415976060249069e-06, "loss": 0.2374, "step": 2613 }, { "epoch": 1.69, "learning_rate": 8.409072533235293e-06, "loss": 0.2551, "step": 2614 }, { "epoch": 1.69, "learning_rate": 8.402169784038234e-06, "loss": 0.2807, "step": 2615 }, { "epoch": 1.69, "learning_rate": 8.395267816032692e-06, "loss": 0.2587, "step": 2616 }, { "epoch": 1.69, "learning_rate": 8.388366632593099e-06, "loss": 0.265, "step": 2617 }, { "epoch": 1.7, "learning_rate": 8.381466237093488e-06, "loss": 0.2556, "step": 2618 }, { "epoch": 1.7, "learning_rate": 8.374566632907526e-06, "loss": 0.2561, "step": 2619 }, { "epoch": 1.7, "learning_rate": 8.367667823408473e-06, "loss": 0.274, "step": 2620 }, { "epoch": 1.7, "learning_rate": 8.360769811969217e-06, "loss": 0.2747, "step": 2621 }, { "epoch": 1.7, "learning_rate": 8.353872601962244e-06, "loss": 0.2603, "step": 2622 }, { "epoch": 1.7, "learning_rate": 8.346976196759656e-06, "loss": 0.2717, "step": 2623 }, { "epoch": 1.7, "learning_rate": 8.34008059973316e-06, "loss": 0.2799, "step": 2624 }, { "epoch": 1.7, "learning_rate": 8.333185814254064e-06, "loss": 0.2582, "step": 2625 }, { "epoch": 1.7, "learning_rate": 8.326291843693287e-06, "loss": 0.2385, "step": 2626 }, { "epoch": 1.7, "learning_rate": 8.319398691421334e-06, "loss": 0.2683, "step": 2627 }, { "epoch": 1.7, "learning_rate": 8.312506360808333e-06, "loss": 0.2616, "step": 2628 }, { "epoch": 1.7, "learning_rate": 8.305614855223985e-06, "loss": 0.259, "step": 2629 }, { "epoch": 1.7, "learning_rate": 8.298724178037611e-06, "loss": 0.241, "step": 2630 }, { "epoch": 1.7, "learning_rate": 8.291834332618109e-06, "loss": 0.2433, "step": 2631 }, { "epoch": 1.7, "learning_rate": 8.284945322333982e-06, "loss": 0.2607, "step": 2632 }, { "epoch": 1.71, "learning_rate": 8.278057150553315e-06, "loss": 0.2589, "step": 2633 }, { "epoch": 1.71, "learning_rate": 8.271169820643798e-06, "loss": 0.2566, "step": 2634 }, { "epoch": 1.71, "learning_rate": 8.264283335972688e-06, "loss": 0.2677, "step": 2635 }, { "epoch": 1.71, "learning_rate": 8.257397699906851e-06, "loss": 0.2537, "step": 2636 }, { "epoch": 1.71, "learning_rate": 8.250512915812717e-06, "loss": 0.2688, "step": 2637 }, { "epoch": 1.71, "learning_rate": 8.243628987056321e-06, "loss": 0.2522, "step": 2638 }, { "epoch": 1.71, "learning_rate": 8.23674591700326e-06, "loss": 0.2659, "step": 2639 }, { "epoch": 1.71, "learning_rate": 8.229863709018725e-06, "loss": 0.265, "step": 2640 }, { "epoch": 1.71, "learning_rate": 8.22298236646748e-06, "loss": 0.2463, "step": 2641 }, { "epoch": 1.71, "learning_rate": 8.216101892713863e-06, "loss": 0.2693, "step": 2642 }, { "epoch": 1.71, "learning_rate": 8.209222291121796e-06, "loss": 0.2679, "step": 2643 }, { "epoch": 1.71, "learning_rate": 8.202343565054765e-06, "loss": 0.27, "step": 2644 }, { "epoch": 1.71, "learning_rate": 8.195465717875836e-06, "loss": 0.278, "step": 2645 }, { "epoch": 1.71, "learning_rate": 8.188588752947636e-06, "loss": 0.2532, "step": 2646 }, { "epoch": 1.71, "learning_rate": 8.181712673632373e-06, "loss": 0.2611, "step": 2647 }, { "epoch": 1.72, "learning_rate": 8.174837483291812e-06, "loss": 0.2526, "step": 2648 }, { "epoch": 1.72, "learning_rate": 8.167963185287286e-06, "loss": 0.2543, "step": 2649 }, { "epoch": 1.72, "learning_rate": 8.161089782979693e-06, "loss": 0.2551, "step": 2650 }, { "epoch": 1.72, "learning_rate": 8.15421727972949e-06, "loss": 0.2677, "step": 2651 }, { "epoch": 1.72, "learning_rate": 8.147345678896703e-06, "loss": 0.2681, "step": 2652 }, { "epoch": 1.72, "learning_rate": 8.140474983840903e-06, "loss": 0.2635, "step": 2653 }, { "epoch": 1.72, "learning_rate": 8.13360519792123e-06, "loss": 0.243, "step": 2654 }, { "epoch": 1.72, "learning_rate": 8.126736324496379e-06, "loss": 0.2637, "step": 2655 }, { "epoch": 1.72, "learning_rate": 8.119868366924585e-06, "loss": 0.2599, "step": 2656 }, { "epoch": 1.72, "learning_rate": 8.11300132856366e-06, "loss": 0.2732, "step": 2657 }, { "epoch": 1.72, "learning_rate": 8.106135212770935e-06, "loss": 0.2374, "step": 2658 }, { "epoch": 1.72, "learning_rate": 8.099270022903322e-06, "loss": 0.2798, "step": 2659 }, { "epoch": 1.72, "learning_rate": 8.092405762317253e-06, "loss": 0.2574, "step": 2660 }, { "epoch": 1.72, "learning_rate": 8.08554243436873e-06, "loss": 0.2666, "step": 2661 }, { "epoch": 1.72, "learning_rate": 8.078680042413275e-06, "loss": 0.2593, "step": 2662 }, { "epoch": 1.72, "learning_rate": 8.071818589805972e-06, "loss": 0.2759, "step": 2663 }, { "epoch": 1.73, "learning_rate": 8.06495807990144e-06, "loss": 0.267, "step": 2664 }, { "epoch": 1.73, "learning_rate": 8.058098516053827e-06, "loss": 0.2481, "step": 2665 }, { "epoch": 1.73, "learning_rate": 8.051239901616835e-06, "loss": 0.2653, "step": 2666 }, { "epoch": 1.73, "learning_rate": 8.044382239943686e-06, "loss": 0.2405, "step": 2667 }, { "epoch": 1.73, "learning_rate": 8.037525534387151e-06, "loss": 0.2643, "step": 2668 }, { "epoch": 1.73, "learning_rate": 8.030669788299521e-06, "loss": 0.2439, "step": 2669 }, { "epoch": 1.73, "learning_rate": 8.023815005032626e-06, "loss": 0.2574, "step": 2670 }, { "epoch": 1.73, "learning_rate": 8.01696118793782e-06, "loss": 0.255, "step": 2671 }, { "epoch": 1.73, "learning_rate": 8.01010834036599e-06, "loss": 0.27, "step": 2672 }, { "epoch": 1.73, "learning_rate": 8.003256465667542e-06, "loss": 0.2491, "step": 2673 }, { "epoch": 1.73, "learning_rate": 7.996405567192415e-06, "loss": 0.2688, "step": 2674 }, { "epoch": 1.73, "learning_rate": 7.989555648290062e-06, "loss": 0.2768, "step": 2675 }, { "epoch": 1.73, "learning_rate": 7.982706712309466e-06, "loss": 0.2546, "step": 2676 }, { "epoch": 1.73, "learning_rate": 7.975858762599117e-06, "loss": 0.2474, "step": 2677 }, { "epoch": 1.73, "learning_rate": 7.96901180250704e-06, "loss": 0.2647, "step": 2678 }, { "epoch": 1.74, "learning_rate": 7.962165835380758e-06, "loss": 0.2563, "step": 2679 }, { "epoch": 1.74, "learning_rate": 7.955320864567325e-06, "loss": 0.2616, "step": 2680 }, { "epoch": 1.74, "learning_rate": 7.948476893413291e-06, "loss": 0.2672, "step": 2681 }, { "epoch": 1.74, "learning_rate": 7.941633925264736e-06, "loss": 0.2642, "step": 2682 }, { "epoch": 1.74, "learning_rate": 7.934791963467232e-06, "loss": 0.246, "step": 2683 }, { "epoch": 1.74, "learning_rate": 7.927951011365868e-06, "loss": 0.2568, "step": 2684 }, { "epoch": 1.74, "learning_rate": 7.921111072305248e-06, "loss": 0.2801, "step": 2685 }, { "epoch": 1.74, "learning_rate": 7.914272149629456e-06, "loss": 0.2548, "step": 2686 }, { "epoch": 1.74, "learning_rate": 7.907434246682106e-06, "loss": 0.2769, "step": 2687 }, { "epoch": 1.74, "learning_rate": 7.900597366806293e-06, "loss": 0.2738, "step": 2688 }, { "epoch": 1.74, "learning_rate": 7.89376151334463e-06, "loss": 0.2808, "step": 2689 }, { "epoch": 1.74, "learning_rate": 7.886926689639208e-06, "loss": 0.271, "step": 2690 }, { "epoch": 1.74, "learning_rate": 7.880092899031633e-06, "loss": 0.2771, "step": 2691 }, { "epoch": 1.74, "learning_rate": 7.87326014486299e-06, "loss": 0.2581, "step": 2692 }, { "epoch": 1.74, "learning_rate": 7.866428430473873e-06, "loss": 0.2582, "step": 2693 }, { "epoch": 1.74, "learning_rate": 7.859597759204357e-06, "loss": 0.273, "step": 2694 }, { "epoch": 1.75, "learning_rate": 7.85276813439401e-06, "loss": 0.2531, "step": 2695 }, { "epoch": 1.75, "learning_rate": 7.845939559381887e-06, "loss": 0.2642, "step": 2696 }, { "epoch": 1.75, "learning_rate": 7.839112037506533e-06, "loss": 0.2513, "step": 2697 }, { "epoch": 1.75, "learning_rate": 7.832285572105973e-06, "loss": 0.2682, "step": 2698 }, { "epoch": 1.75, "learning_rate": 7.825460166517721e-06, "loss": 0.2588, "step": 2699 }, { "epoch": 1.75, "learning_rate": 7.81863582407877e-06, "loss": 0.2504, "step": 2700 }, { "epoch": 1.75, "learning_rate": 7.811812548125598e-06, "loss": 0.2747, "step": 2701 }, { "epoch": 1.75, "learning_rate": 7.804990341994147e-06, "loss": 0.2611, "step": 2702 }, { "epoch": 1.75, "learning_rate": 7.798169209019858e-06, "loss": 0.2672, "step": 2703 }, { "epoch": 1.75, "learning_rate": 7.791349152537627e-06, "loss": 0.2612, "step": 2704 }, { "epoch": 1.75, "learning_rate": 7.784530175881833e-06, "loss": 0.2449, "step": 2705 }, { "epoch": 1.75, "learning_rate": 7.777712282386332e-06, "loss": 0.2794, "step": 2706 }, { "epoch": 1.75, "learning_rate": 7.770895475384436e-06, "loss": 0.2487, "step": 2707 }, { "epoch": 1.75, "learning_rate": 7.764079758208947e-06, "loss": 0.2523, "step": 2708 }, { "epoch": 1.75, "learning_rate": 7.757265134192108e-06, "loss": 0.261, "step": 2709 }, { "epoch": 1.76, "learning_rate": 7.750451606665652e-06, "loss": 0.26, "step": 2710 }, { "epoch": 1.76, "learning_rate": 7.743639178960757e-06, "loss": 0.2627, "step": 2711 }, { "epoch": 1.76, "learning_rate": 7.736827854408079e-06, "loss": 0.243, "step": 2712 }, { "epoch": 1.76, "learning_rate": 7.73001763633772e-06, "loss": 0.2677, "step": 2713 }, { "epoch": 1.76, "learning_rate": 7.723208528079255e-06, "loss": 0.2728, "step": 2714 }, { "epoch": 1.76, "learning_rate": 7.716400532961702e-06, "loss": 0.2756, "step": 2715 }, { "epoch": 1.76, "learning_rate": 7.70959365431355e-06, "loss": 0.2658, "step": 2716 }, { "epoch": 1.76, "learning_rate": 7.702787895462726e-06, "loss": 0.2627, "step": 2717 }, { "epoch": 1.76, "learning_rate": 7.69598325973663e-06, "loss": 0.2624, "step": 2718 }, { "epoch": 1.76, "learning_rate": 7.689179750462087e-06, "loss": 0.2808, "step": 2719 }, { "epoch": 1.76, "learning_rate": 7.682377370965398e-06, "loss": 0.2487, "step": 2720 }, { "epoch": 1.76, "learning_rate": 7.675576124572288e-06, "loss": 0.2649, "step": 2721 }, { "epoch": 1.76, "learning_rate": 7.668776014607947e-06, "loss": 0.2602, "step": 2722 }, { "epoch": 1.76, "learning_rate": 7.661977044396994e-06, "loss": 0.2712, "step": 2723 }, { "epoch": 1.76, "learning_rate": 7.655179217263505e-06, "loss": 0.2379, "step": 2724 }, { "epoch": 1.76, "learning_rate": 7.648382536530984e-06, "loss": 0.2512, "step": 2725 }, { "epoch": 1.77, "learning_rate": 7.641587005522383e-06, "loss": 0.2578, "step": 2726 }, { "epoch": 1.77, "learning_rate": 7.634792627560095e-06, "loss": 0.2606, "step": 2727 }, { "epoch": 1.77, "learning_rate": 7.627999405965933e-06, "loss": 0.2631, "step": 2728 }, { "epoch": 1.77, "learning_rate": 7.621207344061168e-06, "loss": 0.2626, "step": 2729 }, { "epoch": 1.77, "learning_rate": 7.614416445166479e-06, "loss": 0.26, "step": 2730 }, { "epoch": 1.77, "learning_rate": 7.607626712602e-06, "loss": 0.251, "step": 2731 }, { "epoch": 1.77, "learning_rate": 7.600838149687274e-06, "loss": 0.2555, "step": 2732 }, { "epoch": 1.77, "learning_rate": 7.594050759741289e-06, "loss": 0.266, "step": 2733 }, { "epoch": 1.77, "learning_rate": 7.587264546082447e-06, "loss": 0.269, "step": 2734 }, { "epoch": 1.77, "learning_rate": 7.580479512028586e-06, "loss": 0.2557, "step": 2735 }, { "epoch": 1.77, "learning_rate": 7.573695660896954e-06, "loss": 0.259, "step": 2736 }, { "epoch": 1.77, "learning_rate": 7.5669129960042365e-06, "loss": 0.2519, "step": 2737 }, { "epoch": 1.77, "learning_rate": 7.5601315206665224e-06, "loss": 0.2772, "step": 2738 }, { "epoch": 1.77, "learning_rate": 7.553351238199334e-06, "loss": 0.2565, "step": 2739 }, { "epoch": 1.77, "learning_rate": 7.5465721519176e-06, "loss": 0.2759, "step": 2740 }, { "epoch": 1.78, "learning_rate": 7.539794265135669e-06, "loss": 0.2581, "step": 2741 }, { "epoch": 1.78, "learning_rate": 7.533017581167303e-06, "loss": 0.2703, "step": 2742 }, { "epoch": 1.78, "learning_rate": 7.526242103325675e-06, "loss": 0.2534, "step": 2743 }, { "epoch": 1.78, "learning_rate": 7.5194678349233665e-06, "loss": 0.2747, "step": 2744 }, { "epoch": 1.78, "learning_rate": 7.512694779272369e-06, "loss": 0.2631, "step": 2745 }, { "epoch": 1.78, "learning_rate": 7.505922939684086e-06, "loss": 0.2578, "step": 2746 }, { "epoch": 1.78, "learning_rate": 7.499152319469319e-06, "loss": 0.2586, "step": 2747 }, { "epoch": 1.78, "learning_rate": 7.492382921938277e-06, "loss": 0.2584, "step": 2748 }, { "epoch": 1.78, "learning_rate": 7.4856147504005715e-06, "loss": 0.275, "step": 2749 }, { "epoch": 1.78, "learning_rate": 7.478847808165213e-06, "loss": 0.2641, "step": 2750 }, { "epoch": 1.78, "learning_rate": 7.47208209854061e-06, "loss": 0.2594, "step": 2751 }, { "epoch": 1.78, "learning_rate": 7.465317624834574e-06, "loss": 0.2651, "step": 2752 }, { "epoch": 1.78, "learning_rate": 7.458554390354302e-06, "loss": 0.2588, "step": 2753 }, { "epoch": 1.78, "learning_rate": 7.4517923984064014e-06, "loss": 0.2662, "step": 2754 }, { "epoch": 1.78, "learning_rate": 7.445031652296853e-06, "loss": 0.2696, "step": 2755 }, { "epoch": 1.78, "learning_rate": 7.4382721553310445e-06, "loss": 0.2473, "step": 2756 }, { "epoch": 1.79, "learning_rate": 7.431513910813739e-06, "loss": 0.2595, "step": 2757 }, { "epoch": 1.79, "learning_rate": 7.424756922049103e-06, "loss": 0.2798, "step": 2758 }, { "epoch": 1.79, "learning_rate": 7.418001192340672e-06, "loss": 0.2561, "step": 2759 }, { "epoch": 1.79, "learning_rate": 7.41124672499138e-06, "loss": 0.264, "step": 2760 }, { "epoch": 1.79, "learning_rate": 7.404493523303533e-06, "loss": 0.2635, "step": 2761 }, { "epoch": 1.79, "learning_rate": 7.397741590578833e-06, "loss": 0.2615, "step": 2762 }, { "epoch": 1.79, "learning_rate": 7.390990930118341e-06, "loss": 0.2659, "step": 2763 }, { "epoch": 1.79, "learning_rate": 7.3842415452225145e-06, "loss": 0.265, "step": 2764 }, { "epoch": 1.79, "learning_rate": 7.3774934391911725e-06, "loss": 0.26, "step": 2765 }, { "epoch": 1.79, "learning_rate": 7.3707466153235215e-06, "loss": 0.2645, "step": 2766 }, { "epoch": 1.79, "learning_rate": 7.36400107691814e-06, "loss": 0.2637, "step": 2767 }, { "epoch": 1.79, "learning_rate": 7.357256827272964e-06, "loss": 0.273, "step": 2768 }, { "epoch": 1.79, "learning_rate": 7.350513869685319e-06, "loss": 0.2735, "step": 2769 }, { "epoch": 1.79, "learning_rate": 7.343772207451878e-06, "loss": 0.2587, "step": 2770 }, { "epoch": 1.79, "learning_rate": 7.3370318438687015e-06, "loss": 0.2583, "step": 2771 }, { "epoch": 1.8, "learning_rate": 7.330292782231199e-06, "loss": 0.2566, "step": 2772 }, { "epoch": 1.8, "learning_rate": 7.323555025834155e-06, "loss": 0.2626, "step": 2773 }, { "epoch": 1.8, "learning_rate": 7.3168185779717026e-06, "loss": 0.2792, "step": 2774 }, { "epoch": 1.8, "learning_rate": 7.310083441937355e-06, "loss": 0.2524, "step": 2775 }, { "epoch": 1.8, "learning_rate": 7.303349621023959e-06, "loss": 0.2698, "step": 2776 }, { "epoch": 1.8, "learning_rate": 7.296617118523741e-06, "loss": 0.2685, "step": 2777 }, { "epoch": 1.8, "learning_rate": 7.289885937728269e-06, "loss": 0.2746, "step": 2778 }, { "epoch": 1.8, "learning_rate": 7.283156081928473e-06, "loss": 0.2495, "step": 2779 }, { "epoch": 1.8, "learning_rate": 7.2764275544146244e-06, "loss": 0.2564, "step": 2780 }, { "epoch": 1.8, "learning_rate": 7.269700358476363e-06, "loss": 0.2531, "step": 2781 }, { "epoch": 1.8, "learning_rate": 7.262974497402655e-06, "loss": 0.2577, "step": 2782 }, { "epoch": 1.8, "learning_rate": 7.256249974481834e-06, "loss": 0.2584, "step": 2783 }, { "epoch": 1.8, "learning_rate": 7.249526793001568e-06, "loss": 0.2568, "step": 2784 }, { "epoch": 1.8, "learning_rate": 7.242804956248872e-06, "loss": 0.2549, "step": 2785 }, { "epoch": 1.8, "learning_rate": 7.236084467510106e-06, "loss": 0.2567, "step": 2786 }, { "epoch": 1.81, "learning_rate": 7.229365330070965e-06, "loss": 0.2522, "step": 2787 }, { "epoch": 1.81, "learning_rate": 7.222647547216494e-06, "loss": 0.2553, "step": 2788 }, { "epoch": 1.81, "learning_rate": 7.215931122231059e-06, "loss": 0.2537, "step": 2789 }, { "epoch": 1.81, "learning_rate": 7.209216058398381e-06, "loss": 0.2644, "step": 2790 }, { "epoch": 1.81, "learning_rate": 7.202502359001502e-06, "loss": 0.2704, "step": 2791 }, { "epoch": 1.81, "learning_rate": 7.195790027322802e-06, "loss": 0.2577, "step": 2792 }, { "epoch": 1.81, "learning_rate": 7.189079066643994e-06, "loss": 0.2603, "step": 2793 }, { "epoch": 1.81, "learning_rate": 7.182369480246114e-06, "loss": 0.2489, "step": 2794 }, { "epoch": 1.81, "learning_rate": 7.175661271409534e-06, "loss": 0.2572, "step": 2795 }, { "epoch": 1.81, "learning_rate": 7.168954443413949e-06, "loss": 0.2559, "step": 2796 }, { "epoch": 1.81, "learning_rate": 7.162248999538375e-06, "loss": 0.2414, "step": 2797 }, { "epoch": 1.81, "learning_rate": 7.155544943061164e-06, "loss": 0.2636, "step": 2798 }, { "epoch": 1.81, "learning_rate": 7.148842277259969e-06, "loss": 0.2687, "step": 2799 }, { "epoch": 1.81, "learning_rate": 7.142141005411787e-06, "loss": 0.2698, "step": 2800 }, { "epoch": 1.81, "learning_rate": 7.13544113079291e-06, "loss": 0.2633, "step": 2801 }, { "epoch": 1.81, "learning_rate": 7.128742656678971e-06, "loss": 0.2568, "step": 2802 }, { "epoch": 1.82, "learning_rate": 7.122045586344892e-06, "loss": 0.2611, "step": 2803 }, { "epoch": 1.82, "learning_rate": 7.1153499230649355e-06, "loss": 0.2606, "step": 2804 }, { "epoch": 1.82, "learning_rate": 7.108655670112652e-06, "loss": 0.2658, "step": 2805 }, { "epoch": 1.82, "learning_rate": 7.101962830760923e-06, "loss": 0.274, "step": 2806 }, { "epoch": 1.82, "learning_rate": 7.095271408281917e-06, "loss": 0.2457, "step": 2807 }, { "epoch": 1.82, "learning_rate": 7.088581405947132e-06, "loss": 0.2624, "step": 2808 }, { "epoch": 1.82, "learning_rate": 7.081892827027364e-06, "loss": 0.2637, "step": 2809 }, { "epoch": 1.82, "learning_rate": 7.0752056747927e-06, "loss": 0.2573, "step": 2810 }, { "epoch": 1.82, "learning_rate": 7.068519952512551e-06, "loss": 0.2563, "step": 2811 }, { "epoch": 1.82, "learning_rate": 7.06183566345561e-06, "loss": 0.2634, "step": 2812 }, { "epoch": 1.82, "learning_rate": 7.055152810889885e-06, "loss": 0.2653, "step": 2813 }, { "epoch": 1.82, "learning_rate": 7.0484713980826655e-06, "loss": 0.2722, "step": 2814 }, { "epoch": 1.82, "learning_rate": 7.041791428300555e-06, "loss": 0.2563, "step": 2815 }, { "epoch": 1.82, "learning_rate": 7.035112904809433e-06, "loss": 0.2464, "step": 2816 }, { "epoch": 1.82, "learning_rate": 7.028435830874491e-06, "loss": 0.2651, "step": 2817 }, { "epoch": 1.83, "learning_rate": 7.021760209760194e-06, "loss": 0.253, "step": 2818 }, { "epoch": 1.83, "learning_rate": 7.015086044730309e-06, "loss": 0.2629, "step": 2819 }, { "epoch": 1.83, "learning_rate": 7.008413339047883e-06, "loss": 0.2621, "step": 2820 }, { "epoch": 1.83, "learning_rate": 7.0017420959752595e-06, "loss": 0.2617, "step": 2821 }, { "epoch": 1.83, "learning_rate": 6.9950723187740525e-06, "loss": 0.2524, "step": 2822 }, { "epoch": 1.83, "learning_rate": 6.988404010705174e-06, "loss": 0.2686, "step": 2823 }, { "epoch": 1.83, "learning_rate": 6.981737175028806e-06, "loss": 0.2742, "step": 2824 }, { "epoch": 1.83, "learning_rate": 6.975071815004425e-06, "loss": 0.245, "step": 2825 }, { "epoch": 1.83, "learning_rate": 6.968407933890766e-06, "loss": 0.2725, "step": 2826 }, { "epoch": 1.83, "learning_rate": 6.961745534945856e-06, "loss": 0.256, "step": 2827 }, { "epoch": 1.83, "learning_rate": 6.955084621426998e-06, "loss": 0.2649, "step": 2828 }, { "epoch": 1.83, "learning_rate": 6.948425196590758e-06, "loss": 0.255, "step": 2829 }, { "epoch": 1.83, "learning_rate": 6.9417672636929845e-06, "loss": 0.2526, "step": 2830 }, { "epoch": 1.83, "learning_rate": 6.935110825988787e-06, "loss": 0.251, "step": 2831 }, { "epoch": 1.83, "learning_rate": 6.928455886732555e-06, "loss": 0.2638, "step": 2832 }, { "epoch": 1.83, "learning_rate": 6.921802449177933e-06, "loss": 0.2494, "step": 2833 }, { "epoch": 1.84, "learning_rate": 6.915150516577847e-06, "loss": 0.2616, "step": 2834 }, { "epoch": 1.84, "learning_rate": 6.9085000921844684e-06, "loss": 0.2785, "step": 2835 }, { "epoch": 1.84, "learning_rate": 6.901851179249248e-06, "loss": 0.2523, "step": 2836 }, { "epoch": 1.84, "learning_rate": 6.895203781022887e-06, "loss": 0.2518, "step": 2837 }, { "epoch": 1.84, "learning_rate": 6.888557900755352e-06, "loss": 0.2619, "step": 2838 }, { "epoch": 1.84, "learning_rate": 6.881913541695864e-06, "loss": 0.2605, "step": 2839 }, { "epoch": 1.84, "learning_rate": 6.8752707070929015e-06, "loss": 0.259, "step": 2840 }, { "epoch": 1.84, "learning_rate": 6.868629400194198e-06, "loss": 0.2681, "step": 2841 }, { "epoch": 1.84, "learning_rate": 6.861989624246739e-06, "loss": 0.2624, "step": 2842 }, { "epoch": 1.84, "learning_rate": 6.85535138249676e-06, "loss": 0.251, "step": 2843 }, { "epoch": 1.84, "learning_rate": 6.848714678189754e-06, "loss": 0.2668, "step": 2844 }, { "epoch": 1.84, "learning_rate": 6.842079514570452e-06, "loss": 0.2621, "step": 2845 }, { "epoch": 1.84, "learning_rate": 6.835445894882842e-06, "loss": 0.2679, "step": 2846 }, { "epoch": 1.84, "learning_rate": 6.828813822370144e-06, "loss": 0.2508, "step": 2847 }, { "epoch": 1.84, "learning_rate": 6.8221833002748364e-06, "loss": 0.2518, "step": 2848 }, { "epoch": 1.85, "learning_rate": 6.81555433183863e-06, "loss": 0.2565, "step": 2849 }, { "epoch": 1.85, "learning_rate": 6.808926920302477e-06, "loss": 0.2429, "step": 2850 }, { "epoch": 1.85, "learning_rate": 6.8023010689065736e-06, "loss": 0.252, "step": 2851 }, { "epoch": 1.85, "learning_rate": 6.795676780890346e-06, "loss": 0.2611, "step": 2852 }, { "epoch": 1.85, "learning_rate": 6.789054059492466e-06, "loss": 0.2449, "step": 2853 }, { "epoch": 1.85, "learning_rate": 6.782432907950823e-06, "loss": 0.2566, "step": 2854 }, { "epoch": 1.85, "learning_rate": 6.775813329502561e-06, "loss": 0.26, "step": 2855 }, { "epoch": 1.85, "learning_rate": 6.76919532738403e-06, "loss": 0.2742, "step": 2856 }, { "epoch": 1.85, "learning_rate": 6.762578904830834e-06, "loss": 0.276, "step": 2857 }, { "epoch": 1.85, "learning_rate": 6.755964065077786e-06, "loss": 0.2633, "step": 2858 }, { "epoch": 1.85, "learning_rate": 6.749350811358937e-06, "loss": 0.2623, "step": 2859 }, { "epoch": 1.85, "learning_rate": 6.742739146907551e-06, "loss": 0.2614, "step": 2860 }, { "epoch": 1.85, "learning_rate": 6.736129074956129e-06, "loss": 0.2602, "step": 2861 }, { "epoch": 1.85, "learning_rate": 6.72952059873638e-06, "loss": 0.2604, "step": 2862 }, { "epoch": 1.85, "learning_rate": 6.722913721479246e-06, "loss": 0.2688, "step": 2863 }, { "epoch": 1.85, "learning_rate": 6.716308446414872e-06, "loss": 0.2479, "step": 2864 }, { "epoch": 1.86, "learning_rate": 6.709704776772636e-06, "loss": 0.2509, "step": 2865 }, { "epoch": 1.86, "learning_rate": 6.703102715781116e-06, "loss": 0.2655, "step": 2866 }, { "epoch": 1.86, "learning_rate": 6.6965022666681164e-06, "loss": 0.2483, "step": 2867 }, { "epoch": 1.86, "learning_rate": 6.689903432660642e-06, "loss": 0.2783, "step": 2868 }, { "epoch": 1.86, "learning_rate": 6.683306216984917e-06, "loss": 0.2561, "step": 2869 }, { "epoch": 1.86, "learning_rate": 6.676710622866374e-06, "loss": 0.2624, "step": 2870 }, { "epoch": 1.86, "learning_rate": 6.670116653529644e-06, "loss": 0.2512, "step": 2871 }, { "epoch": 1.86, "learning_rate": 6.663524312198575e-06, "loss": 0.2858, "step": 2872 }, { "epoch": 1.86, "learning_rate": 6.656933602096206e-06, "loss": 0.2586, "step": 2873 }, { "epoch": 1.86, "learning_rate": 6.650344526444796e-06, "loss": 0.2524, "step": 2874 }, { "epoch": 1.86, "learning_rate": 6.643757088465785e-06, "loss": 0.2648, "step": 2875 }, { "epoch": 1.86, "learning_rate": 6.6371712913798315e-06, "loss": 0.2516, "step": 2876 }, { "epoch": 1.86, "learning_rate": 6.6305871384067726e-06, "loss": 0.257, "step": 2877 }, { "epoch": 1.86, "learning_rate": 6.624004632765661e-06, "loss": 0.2584, "step": 2878 }, { "epoch": 1.86, "learning_rate": 6.617423777674727e-06, "loss": 0.2628, "step": 2879 }, { "epoch": 1.87, "learning_rate": 6.610844576351407e-06, "loss": 0.2475, "step": 2880 }, { "epoch": 1.87, "learning_rate": 6.6042670320123195e-06, "loss": 0.2497, "step": 2881 }, { "epoch": 1.87, "learning_rate": 6.597691147873277e-06, "loss": 0.259, "step": 2882 }, { "epoch": 1.87, "learning_rate": 6.591116927149284e-06, "loss": 0.2577, "step": 2883 }, { "epoch": 1.87, "learning_rate": 6.584544373054524e-06, "loss": 0.2415, "step": 2884 }, { "epoch": 1.87, "learning_rate": 6.57797348880237e-06, "loss": 0.2614, "step": 2885 }, { "epoch": 1.87, "learning_rate": 6.5714042776053775e-06, "loss": 0.2614, "step": 2886 }, { "epoch": 1.87, "learning_rate": 6.564836742675284e-06, "loss": 0.2585, "step": 2887 }, { "epoch": 1.87, "learning_rate": 6.558270887223013e-06, "loss": 0.2641, "step": 2888 }, { "epoch": 1.87, "learning_rate": 6.551706714458653e-06, "loss": 0.2852, "step": 2889 }, { "epoch": 1.87, "learning_rate": 6.545144227591486e-06, "loss": 0.243, "step": 2890 }, { "epoch": 1.87, "learning_rate": 6.538583429829963e-06, "loss": 0.285, "step": 2891 }, { "epoch": 1.87, "learning_rate": 6.5320243243817036e-06, "loss": 0.2668, "step": 2892 }, { "epoch": 1.87, "learning_rate": 6.525466914453509e-06, "loss": 0.2633, "step": 2893 }, { "epoch": 1.87, "learning_rate": 6.518911203251345e-06, "loss": 0.2731, "step": 2894 }, { "epoch": 1.88, "learning_rate": 6.51235719398035e-06, "loss": 0.2483, "step": 2895 }, { "epoch": 1.88, "learning_rate": 6.505804889844828e-06, "loss": 0.2713, "step": 2896 }, { "epoch": 1.88, "learning_rate": 6.499254294048257e-06, "loss": 0.2638, "step": 2897 }, { "epoch": 1.88, "learning_rate": 6.492705409793266e-06, "loss": 0.249, "step": 2898 }, { "epoch": 1.88, "learning_rate": 6.486158240281664e-06, "loss": 0.262, "step": 2899 }, { "epoch": 1.88, "learning_rate": 6.479612788714402e-06, "loss": 0.2585, "step": 2900 }, { "epoch": 1.88, "learning_rate": 6.473069058291612e-06, "loss": 0.2618, "step": 2901 }, { "epoch": 1.88, "learning_rate": 6.466527052212566e-06, "loss": 0.2667, "step": 2902 }, { "epoch": 1.88, "learning_rate": 6.459986773675708e-06, "loss": 0.2461, "step": 2903 }, { "epoch": 1.88, "learning_rate": 6.4534482258786246e-06, "loss": 0.2705, "step": 2904 }, { "epoch": 1.88, "learning_rate": 6.4469114120180684e-06, "loss": 0.2612, "step": 2905 }, { "epoch": 1.88, "learning_rate": 6.4403763352899305e-06, "loss": 0.2634, "step": 2906 }, { "epoch": 1.88, "learning_rate": 6.433842998889271e-06, "loss": 0.2693, "step": 2907 }, { "epoch": 1.88, "learning_rate": 6.427311406010275e-06, "loss": 0.2499, "step": 2908 }, { "epoch": 1.88, "learning_rate": 6.420781559846299e-06, "loss": 0.259, "step": 2909 }, { "epoch": 1.88, "learning_rate": 6.414253463589833e-06, "loss": 0.2674, "step": 2910 }, { "epoch": 1.89, "learning_rate": 6.40772712043251e-06, "loss": 0.2569, "step": 2911 }, { "epoch": 1.89, "learning_rate": 6.401202533565116e-06, "loss": 0.2473, "step": 2912 }, { "epoch": 1.89, "learning_rate": 6.394679706177564e-06, "loss": 0.2644, "step": 2913 }, { "epoch": 1.89, "learning_rate": 6.388158641458924e-06, "loss": 0.2655, "step": 2914 }, { "epoch": 1.89, "learning_rate": 6.381639342597384e-06, "loss": 0.2679, "step": 2915 }, { "epoch": 1.89, "learning_rate": 6.375121812780289e-06, "loss": 0.2474, "step": 2916 }, { "epoch": 1.89, "learning_rate": 6.3686060551941035e-06, "loss": 0.2646, "step": 2917 }, { "epoch": 1.89, "learning_rate": 6.362092073024436e-06, "loss": 0.2567, "step": 2918 }, { "epoch": 1.89, "learning_rate": 6.355579869456017e-06, "loss": 0.2468, "step": 2919 }, { "epoch": 1.89, "learning_rate": 6.34906944767272e-06, "loss": 0.2395, "step": 2920 }, { "epoch": 1.89, "learning_rate": 6.342560810857533e-06, "loss": 0.252, "step": 2921 }, { "epoch": 1.89, "learning_rate": 6.336053962192587e-06, "loss": 0.2515, "step": 2922 }, { "epoch": 1.89, "learning_rate": 6.329548904859122e-06, "loss": 0.2642, "step": 2923 }, { "epoch": 1.89, "learning_rate": 6.32304564203752e-06, "loss": 0.2454, "step": 2924 }, { "epoch": 1.89, "learning_rate": 6.316544176907265e-06, "loss": 0.2611, "step": 2925 }, { "epoch": 1.9, "learning_rate": 6.310044512646985e-06, "loss": 0.2611, "step": 2926 }, { "epoch": 1.9, "learning_rate": 6.303546652434407e-06, "loss": 0.2572, "step": 2927 }, { "epoch": 1.9, "learning_rate": 6.29705059944639e-06, "loss": 0.2635, "step": 2928 }, { "epoch": 1.9, "learning_rate": 6.290556356858904e-06, "loss": 0.2743, "step": 2929 }, { "epoch": 1.9, "learning_rate": 6.28406392784703e-06, "loss": 0.2558, "step": 2930 }, { "epoch": 1.9, "learning_rate": 6.277573315584978e-06, "loss": 0.2638, "step": 2931 }, { "epoch": 1.9, "learning_rate": 6.271084523246043e-06, "loss": 0.2681, "step": 2932 }, { "epoch": 1.9, "learning_rate": 6.2645975540026575e-06, "loss": 0.2574, "step": 2933 }, { "epoch": 1.9, "learning_rate": 6.258112411026342e-06, "loss": 0.244, "step": 2934 }, { "epoch": 1.9, "learning_rate": 6.251629097487741e-06, "loss": 0.2802, "step": 2935 }, { "epoch": 1.9, "learning_rate": 6.245147616556591e-06, "loss": 0.2695, "step": 2936 }, { "epoch": 1.9, "learning_rate": 6.238667971401742e-06, "loss": 0.2747, "step": 2937 }, { "epoch": 1.9, "learning_rate": 6.232190165191139e-06, "loss": 0.2702, "step": 2938 }, { "epoch": 1.9, "learning_rate": 6.2257142010918325e-06, "loss": 0.2601, "step": 2939 }, { "epoch": 1.9, "learning_rate": 6.219240082269973e-06, "loss": 0.2593, "step": 2940 }, { "epoch": 1.9, "learning_rate": 6.212767811890805e-06, "loss": 0.2381, "step": 2941 }, { "epoch": 1.91, "learning_rate": 6.20629739311867e-06, "loss": 0.2594, "step": 2942 }, { "epoch": 1.91, "learning_rate": 6.1998288291170115e-06, "loss": 0.2711, "step": 2943 }, { "epoch": 1.91, "learning_rate": 6.193362123048354e-06, "loss": 0.2529, "step": 2944 }, { "epoch": 1.91, "learning_rate": 6.186897278074327e-06, "loss": 0.2734, "step": 2945 }, { "epoch": 1.91, "learning_rate": 6.180434297355633e-06, "loss": 0.2679, "step": 2946 }, { "epoch": 1.91, "learning_rate": 6.173973184052085e-06, "loss": 0.2355, "step": 2947 }, { "epoch": 1.91, "learning_rate": 6.167513941322559e-06, "loss": 0.265, "step": 2948 }, { "epoch": 1.91, "learning_rate": 6.161056572325039e-06, "loss": 0.2601, "step": 2949 }, { "epoch": 1.91, "learning_rate": 6.154601080216575e-06, "loss": 0.2578, "step": 2950 }, { "epoch": 1.91, "learning_rate": 6.148147468153311e-06, "loss": 0.2567, "step": 2951 }, { "epoch": 1.91, "learning_rate": 6.1416957392904695e-06, "loss": 0.2713, "step": 2952 }, { "epoch": 1.91, "learning_rate": 6.135245896782345e-06, "loss": 0.2569, "step": 2953 }, { "epoch": 1.91, "learning_rate": 6.128797943782321e-06, "loss": 0.2658, "step": 2954 }, { "epoch": 1.91, "learning_rate": 6.122351883442844e-06, "loss": 0.2581, "step": 2955 }, { "epoch": 1.91, "learning_rate": 6.1159077189154525e-06, "loss": 0.2544, "step": 2956 }, { "epoch": 1.92, "learning_rate": 6.109465453350738e-06, "loss": 0.2719, "step": 2957 }, { "epoch": 1.92, "learning_rate": 6.103025089898383e-06, "loss": 0.2809, "step": 2958 }, { "epoch": 1.92, "learning_rate": 6.096586631707123e-06, "loss": 0.2629, "step": 2959 }, { "epoch": 1.92, "learning_rate": 6.090150081924777e-06, "loss": 0.2646, "step": 2960 }, { "epoch": 1.92, "learning_rate": 6.083715443698217e-06, "loss": 0.247, "step": 2961 }, { "epoch": 1.92, "learning_rate": 6.0772827201733955e-06, "loss": 0.2547, "step": 2962 }, { "epoch": 1.92, "learning_rate": 6.070851914495311e-06, "loss": 0.2605, "step": 2963 }, { "epoch": 1.92, "learning_rate": 6.064423029808042e-06, "loss": 0.2635, "step": 2964 }, { "epoch": 1.92, "learning_rate": 6.057996069254713e-06, "loss": 0.2633, "step": 2965 }, { "epoch": 1.92, "learning_rate": 6.051571035977523e-06, "loss": 0.2401, "step": 2966 }, { "epoch": 1.92, "learning_rate": 6.045147933117712e-06, "loss": 0.2597, "step": 2967 }, { "epoch": 1.92, "learning_rate": 6.03872676381559e-06, "loss": 0.2756, "step": 2968 }, { "epoch": 1.92, "learning_rate": 6.032307531210513e-06, "loss": 0.2535, "step": 2969 }, { "epoch": 1.92, "learning_rate": 6.0258902384408965e-06, "loss": 0.2672, "step": 2970 }, { "epoch": 1.92, "learning_rate": 6.0194748886442e-06, "loss": 0.2607, "step": 2971 }, { "epoch": 1.92, "learning_rate": 6.013061484956939e-06, "loss": 0.2473, "step": 2972 }, { "epoch": 1.93, "learning_rate": 6.0066500305146824e-06, "loss": 0.2713, "step": 2973 }, { "epoch": 1.93, "learning_rate": 6.00024052845203e-06, "loss": 0.2642, "step": 2974 }, { "epoch": 1.93, "learning_rate": 5.993832981902645e-06, "loss": 0.2681, "step": 2975 }, { "epoch": 1.93, "learning_rate": 5.987427393999219e-06, "loss": 0.2666, "step": 2976 }, { "epoch": 1.93, "learning_rate": 5.981023767873501e-06, "loss": 0.2698, "step": 2977 }, { "epoch": 1.93, "learning_rate": 5.974622106656266e-06, "loss": 0.2545, "step": 2978 }, { "epoch": 1.93, "learning_rate": 5.96822241347734e-06, "loss": 0.2457, "step": 2979 }, { "epoch": 1.93, "learning_rate": 5.961824691465581e-06, "loss": 0.266, "step": 2980 }, { "epoch": 1.93, "learning_rate": 5.955428943748887e-06, "loss": 0.2678, "step": 2981 }, { "epoch": 1.93, "learning_rate": 5.949035173454183e-06, "loss": 0.2548, "step": 2982 }, { "epoch": 1.93, "learning_rate": 5.9426433837074385e-06, "loss": 0.2643, "step": 2983 }, { "epoch": 1.93, "learning_rate": 5.936253577633644e-06, "loss": 0.2634, "step": 2984 }, { "epoch": 1.93, "learning_rate": 5.929865758356827e-06, "loss": 0.261, "step": 2985 }, { "epoch": 1.93, "learning_rate": 5.923479929000042e-06, "loss": 0.2316, "step": 2986 }, { "epoch": 1.93, "learning_rate": 5.917096092685373e-06, "loss": 0.2602, "step": 2987 }, { "epoch": 1.94, "learning_rate": 5.91071425253392e-06, "loss": 0.2523, "step": 2988 }, { "epoch": 1.94, "learning_rate": 5.90433441166582e-06, "loss": 0.2616, "step": 2989 }, { "epoch": 1.94, "learning_rate": 5.897956573200223e-06, "loss": 0.25, "step": 2990 }, { "epoch": 1.94, "learning_rate": 5.891580740255305e-06, "loss": 0.2506, "step": 2991 }, { "epoch": 1.94, "learning_rate": 5.885206915948259e-06, "loss": 0.2636, "step": 2992 }, { "epoch": 1.94, "learning_rate": 5.878835103395296e-06, "loss": 0.2575, "step": 2993 }, { "epoch": 1.94, "learning_rate": 5.87246530571165e-06, "loss": 0.2417, "step": 2994 }, { "epoch": 1.94, "learning_rate": 5.866097526011556e-06, "loss": 0.252, "step": 2995 }, { "epoch": 1.94, "learning_rate": 5.859731767408279e-06, "loss": 0.256, "step": 2996 }, { "epoch": 1.94, "learning_rate": 5.853368033014078e-06, "loss": 0.2804, "step": 2997 }, { "epoch": 1.94, "learning_rate": 5.847006325940241e-06, "loss": 0.2713, "step": 2998 }, { "epoch": 1.94, "learning_rate": 5.840646649297047e-06, "loss": 0.242, "step": 2999 }, { "epoch": 1.94, "learning_rate": 5.834289006193801e-06, "loss": 0.2469, "step": 3000 }, { "epoch": 1.94, "learning_rate": 5.827933399738793e-06, "loss": 0.2541, "step": 3001 }, { "epoch": 1.94, "learning_rate": 5.821579833039338e-06, "loss": 0.2516, "step": 3002 }, { "epoch": 1.94, "learning_rate": 5.815228309201734e-06, "loss": 0.2765, "step": 3003 }, { "epoch": 1.95, "learning_rate": 5.808878831331301e-06, "loss": 0.2439, "step": 3004 }, { "epoch": 1.95, "learning_rate": 5.802531402532337e-06, "loss": 0.2669, "step": 3005 }, { "epoch": 1.95, "learning_rate": 5.796186025908157e-06, "loss": 0.2679, "step": 3006 }, { "epoch": 1.95, "learning_rate": 5.7898427045610585e-06, "loss": 0.2853, "step": 3007 }, { "epoch": 1.95, "learning_rate": 5.7835014415923465e-06, "loss": 0.2539, "step": 3008 }, { "epoch": 1.95, "learning_rate": 5.777162240102305e-06, "loss": 0.2796, "step": 3009 }, { "epoch": 1.95, "learning_rate": 5.770825103190229e-06, "loss": 0.25, "step": 3010 }, { "epoch": 1.95, "learning_rate": 5.764490033954384e-06, "loss": 0.26, "step": 3011 }, { "epoch": 1.95, "learning_rate": 5.758157035492038e-06, "loss": 0.2746, "step": 3012 }, { "epoch": 1.95, "learning_rate": 5.751826110899446e-06, "loss": 0.2605, "step": 3013 }, { "epoch": 1.95, "learning_rate": 5.745497263271839e-06, "loss": 0.2587, "step": 3014 }, { "epoch": 1.95, "learning_rate": 5.73917049570345e-06, "loss": 0.245, "step": 3015 }, { "epoch": 1.95, "learning_rate": 5.732845811287472e-06, "loss": 0.2882, "step": 3016 }, { "epoch": 1.95, "learning_rate": 5.726523213116102e-06, "loss": 0.249, "step": 3017 }, { "epoch": 1.95, "learning_rate": 5.7202027042804996e-06, "loss": 0.2654, "step": 3018 }, { "epoch": 1.96, "learning_rate": 5.713884287870817e-06, "loss": 0.2739, "step": 3019 }, { "epoch": 1.96, "learning_rate": 5.707567966976169e-06, "loss": 0.2719, "step": 3020 }, { "epoch": 1.96, "learning_rate": 5.7012537446846625e-06, "loss": 0.2499, "step": 3021 }, { "epoch": 1.96, "learning_rate": 5.694941624083361e-06, "loss": 0.2581, "step": 3022 }, { "epoch": 1.96, "learning_rate": 5.688631608258318e-06, "loss": 0.2626, "step": 3023 }, { "epoch": 1.96, "learning_rate": 5.6823237002945386e-06, "loss": 0.2572, "step": 3024 }, { "epoch": 1.96, "learning_rate": 5.676017903276017e-06, "loss": 0.2905, "step": 3025 }, { "epoch": 1.96, "learning_rate": 5.6697142202857e-06, "loss": 0.263, "step": 3026 }, { "epoch": 1.96, "learning_rate": 5.6634126544055105e-06, "loss": 0.2704, "step": 3027 }, { "epoch": 1.96, "learning_rate": 5.657113208716327e-06, "loss": 0.2434, "step": 3028 }, { "epoch": 1.96, "learning_rate": 5.650815886298005e-06, "loss": 0.255, "step": 3029 }, { "epoch": 1.96, "learning_rate": 5.644520690229347e-06, "loss": 0.258, "step": 3030 }, { "epoch": 1.96, "learning_rate": 5.638227623588131e-06, "loss": 0.2439, "step": 3031 }, { "epoch": 1.96, "learning_rate": 5.631936689451075e-06, "loss": 0.2564, "step": 3032 }, { "epoch": 1.96, "learning_rate": 5.625647890893872e-06, "loss": 0.2502, "step": 3033 }, { "epoch": 1.97, "learning_rate": 5.619361230991167e-06, "loss": 0.2447, "step": 3034 }, { "epoch": 1.97, "learning_rate": 5.613076712816549e-06, "loss": 0.2505, "step": 3035 }, { "epoch": 1.97, "learning_rate": 5.606794339442574e-06, "loss": 0.263, "step": 3036 }, { "epoch": 1.97, "learning_rate": 5.600514113940737e-06, "loss": 0.2435, "step": 3037 }, { "epoch": 1.97, "learning_rate": 5.594236039381496e-06, "loss": 0.2724, "step": 3038 }, { "epoch": 1.97, "learning_rate": 5.587960118834241e-06, "loss": 0.2669, "step": 3039 }, { "epoch": 1.97, "learning_rate": 5.581686355367327e-06, "loss": 0.2665, "step": 3040 }, { "epoch": 1.97, "learning_rate": 5.575414752048037e-06, "loss": 0.2413, "step": 3041 }, { "epoch": 1.97, "learning_rate": 5.569145311942614e-06, "loss": 0.2534, "step": 3042 }, { "epoch": 1.97, "learning_rate": 5.56287803811623e-06, "loss": 0.2262, "step": 3043 }, { "epoch": 1.97, "learning_rate": 5.556612933633008e-06, "loss": 0.2866, "step": 3044 }, { "epoch": 1.97, "learning_rate": 5.550350001556e-06, "loss": 0.2602, "step": 3045 }, { "epoch": 1.97, "learning_rate": 5.5440892449472085e-06, "loss": 0.2707, "step": 3046 }, { "epoch": 1.97, "learning_rate": 5.537830666867559e-06, "loss": 0.2654, "step": 3047 }, { "epoch": 1.97, "learning_rate": 5.53157427037692e-06, "loss": 0.2604, "step": 3048 }, { "epoch": 1.97, "learning_rate": 5.525320058534101e-06, "loss": 0.2685, "step": 3049 }, { "epoch": 1.98, "learning_rate": 5.5190680343968215e-06, "loss": 0.2664, "step": 3050 }, { "epoch": 1.98, "learning_rate": 5.5128182010217554e-06, "loss": 0.259, "step": 3051 }, { "epoch": 1.98, "learning_rate": 5.506570561464486e-06, "loss": 0.2621, "step": 3052 }, { "epoch": 1.98, "learning_rate": 5.500325118779538e-06, "loss": 0.2693, "step": 3053 }, { "epoch": 1.98, "learning_rate": 5.494081876020352e-06, "loss": 0.2573, "step": 3054 }, { "epoch": 1.98, "learning_rate": 5.487840836239298e-06, "loss": 0.2657, "step": 3055 }, { "epoch": 1.98, "learning_rate": 5.481602002487676e-06, "loss": 0.2511, "step": 3056 }, { "epoch": 1.98, "learning_rate": 5.475365377815691e-06, "loss": 0.2617, "step": 3057 }, { "epoch": 1.98, "learning_rate": 5.469130965272483e-06, "loss": 0.2465, "step": 3058 }, { "epoch": 1.98, "learning_rate": 5.4628987679060975e-06, "loss": 0.257, "step": 3059 }, { "epoch": 1.98, "learning_rate": 5.456668788763513e-06, "loss": 0.2586, "step": 3060 }, { "epoch": 1.98, "learning_rate": 5.4504410308906055e-06, "loss": 0.2762, "step": 3061 }, { "epoch": 1.98, "learning_rate": 5.444215497332177e-06, "loss": 0.2625, "step": 3062 }, { "epoch": 1.98, "learning_rate": 5.4379921911319445e-06, "loss": 0.2524, "step": 3063 }, { "epoch": 1.98, "learning_rate": 5.43177111533252e-06, "loss": 0.2676, "step": 3064 }, { "epoch": 1.99, "learning_rate": 5.425552272975447e-06, "loss": 0.2607, "step": 3065 }, { "epoch": 1.99, "learning_rate": 5.419335667101155e-06, "loss": 0.2574, "step": 3066 }, { "epoch": 1.99, "learning_rate": 5.413121300748998e-06, "loss": 0.2558, "step": 3067 }, { "epoch": 1.99, "learning_rate": 5.406909176957223e-06, "loss": 0.268, "step": 3068 }, { "epoch": 1.99, "learning_rate": 5.400699298762989e-06, "loss": 0.251, "step": 3069 }, { "epoch": 1.99, "learning_rate": 5.3944916692023485e-06, "loss": 0.2675, "step": 3070 }, { "epoch": 1.99, "learning_rate": 5.388286291310265e-06, "loss": 0.2761, "step": 3071 }, { "epoch": 1.99, "learning_rate": 5.382083168120591e-06, "loss": 0.2669, "step": 3072 }, { "epoch": 1.99, "learning_rate": 5.375882302666082e-06, "loss": 0.2672, "step": 3073 }, { "epoch": 1.99, "learning_rate": 5.369683697978394e-06, "loss": 0.2749, "step": 3074 }, { "epoch": 1.99, "learning_rate": 5.363487357088066e-06, "loss": 0.2638, "step": 3075 }, { "epoch": 1.99, "learning_rate": 5.357293283024543e-06, "loss": 0.2623, "step": 3076 }, { "epoch": 1.99, "learning_rate": 5.351101478816148e-06, "loss": 0.2697, "step": 3077 }, { "epoch": 1.99, "learning_rate": 5.3449119474901116e-06, "loss": 0.2694, "step": 3078 }, { "epoch": 1.99, "learning_rate": 5.3387246920725336e-06, "loss": 0.2461, "step": 3079 }, { "epoch": 1.99, "learning_rate": 5.332539715588418e-06, "loss": 0.2547, "step": 3080 }, { "epoch": 2.0, "learning_rate": 5.326357021061642e-06, "loss": 0.2667, "step": 3081 }, { "epoch": 2.0, "learning_rate": 5.32017661151498e-06, "loss": 0.2568, "step": 3082 }, { "epoch": 2.0, "learning_rate": 5.3139984899700715e-06, "loss": 0.2641, "step": 3083 }, { "epoch": 2.0, "learning_rate": 5.307822659447459e-06, "loss": 0.2496, "step": 3084 }, { "epoch": 2.0, "learning_rate": 5.301649122966545e-06, "loss": 0.2608, "step": 3085 }, { "epoch": 2.0, "learning_rate": 5.2954778835456255e-06, "loss": 0.2616, "step": 3086 }, { "epoch": 2.0, "learning_rate": 5.28930894420186e-06, "loss": 0.2591, "step": 3087 }, { "epoch": 2.0, "learning_rate": 5.283142307951299e-06, "loss": 0.2266, "step": 3088 }, { "epoch": 2.0, "learning_rate": 5.27697797780885e-06, "loss": 0.1971, "step": 3089 }, { "epoch": 2.0, "learning_rate": 5.270815956788311e-06, "loss": 0.2025, "step": 3090 }, { "epoch": 2.0, "learning_rate": 5.2646562479023315e-06, "loss": 0.2005, "step": 3091 }, { "epoch": 2.0, "learning_rate": 5.258498854162448e-06, "loss": 0.1883, "step": 3092 }, { "epoch": 2.0, "learning_rate": 5.252343778579054e-06, "loss": 0.1908, "step": 3093 }, { "epoch": 2.0, "learning_rate": 5.2461910241614135e-06, "loss": 0.1863, "step": 3094 }, { "epoch": 2.0, "learning_rate": 5.24004059391766e-06, "loss": 0.1943, "step": 3095 }, { "epoch": 2.01, "learning_rate": 5.2338924908547795e-06, "loss": 0.189, "step": 3096 }, { "epoch": 2.01, "learning_rate": 5.2277467179786355e-06, "loss": 0.1823, "step": 3097 }, { "epoch": 2.01, "learning_rate": 5.221603278293934e-06, "loss": 0.1817, "step": 3098 }, { "epoch": 2.01, "learning_rate": 5.21546217480426e-06, "loss": 0.1742, "step": 3099 }, { "epoch": 2.01, "learning_rate": 5.209323410512037e-06, "loss": 0.1989, "step": 3100 }, { "epoch": 2.01, "learning_rate": 5.203186988418562e-06, "loss": 0.1859, "step": 3101 }, { "epoch": 2.01, "learning_rate": 5.1970529115239735e-06, "loss": 0.1773, "step": 3102 }, { "epoch": 2.01, "learning_rate": 5.190921182827273e-06, "loss": 0.1817, "step": 3103 }, { "epoch": 2.01, "learning_rate": 5.184791805326306e-06, "loss": 0.1824, "step": 3104 }, { "epoch": 2.01, "learning_rate": 5.178664782017777e-06, "loss": 0.1799, "step": 3105 }, { "epoch": 2.01, "learning_rate": 5.172540115897231e-06, "loss": 0.1843, "step": 3106 }, { "epoch": 2.01, "learning_rate": 5.166417809959068e-06, "loss": 0.1854, "step": 3107 }, { "epoch": 2.01, "learning_rate": 5.160297867196526e-06, "loss": 0.1773, "step": 3108 }, { "epoch": 2.01, "learning_rate": 5.154180290601698e-06, "loss": 0.1799, "step": 3109 }, { "epoch": 2.01, "learning_rate": 5.148065083165508e-06, "loss": 0.1841, "step": 3110 }, { "epoch": 2.01, "learning_rate": 5.141952247877735e-06, "loss": 0.1859, "step": 3111 }, { "epoch": 2.02, "learning_rate": 5.135841787726985e-06, "loss": 0.1855, "step": 3112 }, { "epoch": 2.02, "learning_rate": 5.129733705700715e-06, "loss": 0.1779, "step": 3113 }, { "epoch": 2.02, "learning_rate": 5.123628004785206e-06, "loss": 0.1881, "step": 3114 }, { "epoch": 2.02, "learning_rate": 5.117524687965587e-06, "loss": 0.1872, "step": 3115 }, { "epoch": 2.02, "learning_rate": 5.11142375822582e-06, "loss": 0.1738, "step": 3116 }, { "epoch": 2.02, "learning_rate": 5.105325218548689e-06, "loss": 0.1866, "step": 3117 }, { "epoch": 2.02, "learning_rate": 5.099229071915826e-06, "loss": 0.1873, "step": 3118 }, { "epoch": 2.02, "learning_rate": 5.093135321307674e-06, "loss": 0.1965, "step": 3119 }, { "epoch": 2.02, "learning_rate": 5.087043969703525e-06, "loss": 0.1745, "step": 3120 }, { "epoch": 2.02, "learning_rate": 5.080955020081477e-06, "loss": 0.186, "step": 3121 }, { "epoch": 2.02, "learning_rate": 5.074868475418475e-06, "loss": 0.1902, "step": 3122 }, { "epoch": 2.02, "learning_rate": 5.068784338690268e-06, "loss": 0.1766, "step": 3123 }, { "epoch": 2.02, "learning_rate": 5.0627026128714465e-06, "loss": 0.1747, "step": 3124 }, { "epoch": 2.02, "learning_rate": 5.056623300935405e-06, "loss": 0.2014, "step": 3125 }, { "epoch": 2.02, "learning_rate": 5.050546405854373e-06, "loss": 0.1859, "step": 3126 }, { "epoch": 2.03, "learning_rate": 5.044471930599386e-06, "loss": 0.1792, "step": 3127 }, { "epoch": 2.03, "learning_rate": 5.0383998781403064e-06, "loss": 0.1864, "step": 3128 }, { "epoch": 2.03, "learning_rate": 5.032330251445802e-06, "loss": 0.1806, "step": 3129 }, { "epoch": 2.03, "learning_rate": 5.026263053483367e-06, "loss": 0.194, "step": 3130 }, { "epoch": 2.03, "learning_rate": 5.020198287219293e-06, "loss": 0.1784, "step": 3131 }, { "epoch": 2.03, "learning_rate": 5.0141359556187e-06, "loss": 0.1869, "step": 3132 }, { "epoch": 2.03, "learning_rate": 5.008076061645499e-06, "loss": 0.1845, "step": 3133 }, { "epoch": 2.03, "learning_rate": 5.002018608262429e-06, "loss": 0.1806, "step": 3134 }, { "epoch": 2.03, "learning_rate": 4.9959635984310165e-06, "loss": 0.1819, "step": 3135 }, { "epoch": 2.03, "learning_rate": 4.989911035111607e-06, "loss": 0.1801, "step": 3136 }, { "epoch": 2.03, "learning_rate": 4.983860921263349e-06, "loss": 0.1744, "step": 3137 }, { "epoch": 2.03, "learning_rate": 4.9778132598441835e-06, "loss": 0.1983, "step": 3138 }, { "epoch": 2.03, "learning_rate": 4.971768053810863e-06, "loss": 0.1697, "step": 3139 }, { "epoch": 2.03, "learning_rate": 4.965725306118932e-06, "loss": 0.1929, "step": 3140 }, { "epoch": 2.03, "learning_rate": 4.959685019722741e-06, "loss": 0.1798, "step": 3141 }, { "epoch": 2.03, "learning_rate": 4.953647197575426e-06, "loss": 0.1822, "step": 3142 }, { "epoch": 2.04, "learning_rate": 4.947611842628932e-06, "loss": 0.1823, "step": 3143 }, { "epoch": 2.04, "learning_rate": 4.9415789578339845e-06, "loss": 0.1905, "step": 3144 }, { "epoch": 2.04, "learning_rate": 4.9355485461401096e-06, "loss": 0.179, "step": 3145 }, { "epoch": 2.04, "learning_rate": 4.929520610495627e-06, "loss": 0.1881, "step": 3146 }, { "epoch": 2.04, "learning_rate": 4.923495153847633e-06, "loss": 0.1757, "step": 3147 }, { "epoch": 2.04, "learning_rate": 4.917472179142026e-06, "loss": 0.1876, "step": 3148 }, { "epoch": 2.04, "learning_rate": 4.91145168932348e-06, "loss": 0.1862, "step": 3149 }, { "epoch": 2.04, "learning_rate": 4.9054336873354636e-06, "loss": 0.1819, "step": 3150 }, { "epoch": 2.04, "learning_rate": 4.8994181761202185e-06, "loss": 0.1836, "step": 3151 }, { "epoch": 2.04, "learning_rate": 4.893405158618778e-06, "loss": 0.178, "step": 3152 }, { "epoch": 2.04, "learning_rate": 4.887394637770957e-06, "loss": 0.1993, "step": 3153 }, { "epoch": 2.04, "learning_rate": 4.8813866165153375e-06, "loss": 0.1757, "step": 3154 }, { "epoch": 2.04, "learning_rate": 4.875381097789296e-06, "loss": 0.1807, "step": 3155 }, { "epoch": 2.04, "learning_rate": 4.869378084528966e-06, "loss": 0.179, "step": 3156 }, { "epoch": 2.04, "learning_rate": 4.863377579669278e-06, "loss": 0.1912, "step": 3157 }, { "epoch": 2.05, "learning_rate": 4.857379586143917e-06, "loss": 0.1818, "step": 3158 }, { "epoch": 2.05, "learning_rate": 4.8513841068853514e-06, "loss": 0.1889, "step": 3159 }, { "epoch": 2.05, "learning_rate": 4.8453911448248215e-06, "loss": 0.1847, "step": 3160 }, { "epoch": 2.05, "learning_rate": 4.8394007028923255e-06, "loss": 0.1875, "step": 3161 }, { "epoch": 2.05, "learning_rate": 4.8334127840166425e-06, "loss": 0.1825, "step": 3162 }, { "epoch": 2.05, "learning_rate": 4.8274273911253055e-06, "loss": 0.1899, "step": 3163 }, { "epoch": 2.05, "learning_rate": 4.821444527144628e-06, "loss": 0.1895, "step": 3164 }, { "epoch": 2.05, "learning_rate": 4.8154641949996685e-06, "loss": 0.1756, "step": 3165 }, { "epoch": 2.05, "learning_rate": 4.809486397614268e-06, "loss": 0.1871, "step": 3166 }, { "epoch": 2.05, "learning_rate": 4.8035111379110075e-06, "loss": 0.1895, "step": 3167 }, { "epoch": 2.05, "learning_rate": 4.797538418811247e-06, "loss": 0.1855, "step": 3168 }, { "epoch": 2.05, "learning_rate": 4.791568243235087e-06, "loss": 0.1884, "step": 3169 }, { "epoch": 2.05, "learning_rate": 4.785600614101399e-06, "loss": 0.1845, "step": 3170 }, { "epoch": 2.05, "learning_rate": 4.779635534327797e-06, "loss": 0.1819, "step": 3171 }, { "epoch": 2.05, "learning_rate": 4.77367300683066e-06, "loss": 0.1964, "step": 3172 }, { "epoch": 2.06, "learning_rate": 4.767713034525108e-06, "loss": 0.1818, "step": 3173 }, { "epoch": 2.06, "learning_rate": 4.761755620325024e-06, "loss": 0.1877, "step": 3174 }, { "epoch": 2.06, "learning_rate": 4.755800767143026e-06, "loss": 0.1829, "step": 3175 }, { "epoch": 2.06, "learning_rate": 4.749848477890492e-06, "loss": 0.1811, "step": 3176 }, { "epoch": 2.06, "learning_rate": 4.7438987554775464e-06, "loss": 0.1877, "step": 3177 }, { "epoch": 2.06, "learning_rate": 4.7379516028130455e-06, "loss": 0.1782, "step": 3178 }, { "epoch": 2.06, "learning_rate": 4.732007022804607e-06, "loss": 0.1802, "step": 3179 }, { "epoch": 2.06, "learning_rate": 4.726065018358574e-06, "loss": 0.1837, "step": 3180 }, { "epoch": 2.06, "learning_rate": 4.720125592380045e-06, "loss": 0.1935, "step": 3181 }, { "epoch": 2.06, "learning_rate": 4.714188747772844e-06, "loss": 0.1896, "step": 3182 }, { "epoch": 2.06, "learning_rate": 4.708254487439548e-06, "loss": 0.187, "step": 3183 }, { "epoch": 2.06, "learning_rate": 4.702322814281457e-06, "loss": 0.1787, "step": 3184 }, { "epoch": 2.06, "learning_rate": 4.6963937311986155e-06, "loss": 0.1925, "step": 3185 }, { "epoch": 2.06, "learning_rate": 4.690467241089793e-06, "loss": 0.1763, "step": 3186 }, { "epoch": 2.06, "learning_rate": 4.684543346852502e-06, "loss": 0.1895, "step": 3187 }, { "epoch": 2.06, "learning_rate": 4.678622051382975e-06, "loss": 0.1888, "step": 3188 }, { "epoch": 2.07, "learning_rate": 4.672703357576185e-06, "loss": 0.1889, "step": 3189 }, { "epoch": 2.07, "learning_rate": 4.66678726832582e-06, "loss": 0.1776, "step": 3190 }, { "epoch": 2.07, "learning_rate": 4.6608737865243084e-06, "loss": 0.1964, "step": 3191 }, { "epoch": 2.07, "learning_rate": 4.6549629150627894e-06, "loss": 0.1912, "step": 3192 }, { "epoch": 2.07, "learning_rate": 4.649054656831143e-06, "loss": 0.1903, "step": 3193 }, { "epoch": 2.07, "learning_rate": 4.643149014717953e-06, "loss": 0.1994, "step": 3194 }, { "epoch": 2.07, "learning_rate": 4.637245991610543e-06, "loss": 0.18, "step": 3195 }, { "epoch": 2.07, "learning_rate": 4.6313455903949355e-06, "loss": 0.1903, "step": 3196 }, { "epoch": 2.07, "learning_rate": 4.625447813955889e-06, "loss": 0.1824, "step": 3197 }, { "epoch": 2.07, "learning_rate": 4.619552665176874e-06, "loss": 0.1818, "step": 3198 }, { "epoch": 2.07, "learning_rate": 4.613660146940069e-06, "loss": 0.1867, "step": 3199 }, { "epoch": 2.07, "learning_rate": 4.607770262126376e-06, "loss": 0.1833, "step": 3200 }, { "epoch": 2.07, "learning_rate": 4.601883013615399e-06, "loss": 0.1826, "step": 3201 }, { "epoch": 2.07, "learning_rate": 4.5959984042854665e-06, "loss": 0.1929, "step": 3202 }, { "epoch": 2.07, "learning_rate": 4.590116437013603e-06, "loss": 0.1869, "step": 3203 }, { "epoch": 2.08, "learning_rate": 4.584237114675555e-06, "loss": 0.1875, "step": 3204 }, { "epoch": 2.08, "learning_rate": 4.57836044014576e-06, "loss": 0.1927, "step": 3205 }, { "epoch": 2.08, "learning_rate": 4.572486416297377e-06, "loss": 0.1831, "step": 3206 }, { "epoch": 2.08, "learning_rate": 4.566615046002254e-06, "loss": 0.1983, "step": 3207 }, { "epoch": 2.08, "learning_rate": 4.560746332130958e-06, "loss": 0.1855, "step": 3208 }, { "epoch": 2.08, "learning_rate": 4.55488027755274e-06, "loss": 0.1902, "step": 3209 }, { "epoch": 2.08, "learning_rate": 4.549016885135568e-06, "loss": 0.1956, "step": 3210 }, { "epoch": 2.08, "learning_rate": 4.54315615774609e-06, "loss": 0.1772, "step": 3211 }, { "epoch": 2.08, "learning_rate": 4.537298098249671e-06, "loss": 0.1836, "step": 3212 }, { "epoch": 2.08, "learning_rate": 4.531442709510352e-06, "loss": 0.1901, "step": 3213 }, { "epoch": 2.08, "learning_rate": 4.5255899943908865e-06, "loss": 0.1862, "step": 3214 }, { "epoch": 2.08, "learning_rate": 4.5197399557527034e-06, "loss": 0.1823, "step": 3215 }, { "epoch": 2.08, "learning_rate": 4.51389259645594e-06, "loss": 0.1852, "step": 3216 }, { "epoch": 2.08, "learning_rate": 4.508047919359408e-06, "loss": 0.1812, "step": 3217 }, { "epoch": 2.08, "learning_rate": 4.502205927320618e-06, "loss": 0.1941, "step": 3218 }, { "epoch": 2.08, "learning_rate": 4.496366623195769e-06, "loss": 0.1817, "step": 3219 }, { "epoch": 2.09, "learning_rate": 4.490530009839735e-06, "loss": 0.1785, "step": 3220 }, { "epoch": 2.09, "learning_rate": 4.484696090106089e-06, "loss": 0.185, "step": 3221 }, { "epoch": 2.09, "learning_rate": 4.478864866847072e-06, "loss": 0.1896, "step": 3222 }, { "epoch": 2.09, "learning_rate": 4.473036342913619e-06, "loss": 0.186, "step": 3223 }, { "epoch": 2.09, "learning_rate": 4.467210521155335e-06, "loss": 0.1861, "step": 3224 }, { "epoch": 2.09, "learning_rate": 4.461387404420518e-06, "loss": 0.194, "step": 3225 }, { "epoch": 2.09, "learning_rate": 4.4555669955561245e-06, "loss": 0.1844, "step": 3226 }, { "epoch": 2.09, "learning_rate": 4.449749297407805e-06, "loss": 0.1859, "step": 3227 }, { "epoch": 2.09, "learning_rate": 4.443934312819872e-06, "loss": 0.1936, "step": 3228 }, { "epoch": 2.09, "learning_rate": 4.438122044635321e-06, "loss": 0.1866, "step": 3229 }, { "epoch": 2.09, "learning_rate": 4.432312495695809e-06, "loss": 0.186, "step": 3230 }, { "epoch": 2.09, "learning_rate": 4.426505668841676e-06, "loss": 0.1815, "step": 3231 }, { "epoch": 2.09, "learning_rate": 4.420701566911917e-06, "loss": 0.1924, "step": 3232 }, { "epoch": 2.09, "learning_rate": 4.4149001927442105e-06, "loss": 0.2017, "step": 3233 }, { "epoch": 2.09, "learning_rate": 4.409101549174885e-06, "loss": 0.193, "step": 3234 }, { "epoch": 2.1, "learning_rate": 4.40330563903895e-06, "loss": 0.1887, "step": 3235 }, { "epoch": 2.1, "learning_rate": 4.397512465170061e-06, "loss": 0.1778, "step": 3236 }, { "epoch": 2.1, "learning_rate": 4.391722030400553e-06, "loss": 0.1933, "step": 3237 }, { "epoch": 2.1, "learning_rate": 4.385934337561414e-06, "loss": 0.1811, "step": 3238 }, { "epoch": 2.1, "learning_rate": 4.380149389482286e-06, "loss": 0.1862, "step": 3239 }, { "epoch": 2.1, "learning_rate": 4.374367188991484e-06, "loss": 0.1894, "step": 3240 }, { "epoch": 2.1, "learning_rate": 4.368587738915961e-06, "loss": 0.1894, "step": 3241 }, { "epoch": 2.1, "learning_rate": 4.362811042081342e-06, "loss": 0.1893, "step": 3242 }, { "epoch": 2.1, "learning_rate": 4.357037101311892e-06, "loss": 0.1834, "step": 3243 }, { "epoch": 2.1, "learning_rate": 4.351265919430538e-06, "loss": 0.1878, "step": 3244 }, { "epoch": 2.1, "learning_rate": 4.34549749925886e-06, "loss": 0.1817, "step": 3245 }, { "epoch": 2.1, "learning_rate": 4.339731843617075e-06, "loss": 0.1945, "step": 3246 }, { "epoch": 2.1, "learning_rate": 4.333968955324065e-06, "loss": 0.2004, "step": 3247 }, { "epoch": 2.1, "learning_rate": 4.328208837197342e-06, "loss": 0.1949, "step": 3248 }, { "epoch": 2.1, "learning_rate": 4.322451492053079e-06, "loss": 0.1928, "step": 3249 }, { "epoch": 2.1, "learning_rate": 4.316696922706082e-06, "loss": 0.184, "step": 3250 }, { "epoch": 2.11, "learning_rate": 4.3109451319698045e-06, "loss": 0.1896, "step": 3251 }, { "epoch": 2.11, "learning_rate": 4.305196122656348e-06, "loss": 0.1926, "step": 3252 }, { "epoch": 2.11, "learning_rate": 4.299449897576437e-06, "loss": 0.1793, "step": 3253 }, { "epoch": 2.11, "learning_rate": 4.2937064595394565e-06, "loss": 0.1805, "step": 3254 }, { "epoch": 2.11, "learning_rate": 4.287965811353406e-06, "loss": 0.1934, "step": 3255 }, { "epoch": 2.11, "learning_rate": 4.282227955824945e-06, "loss": 0.1897, "step": 3256 }, { "epoch": 2.11, "learning_rate": 4.276492895759343e-06, "loss": 0.183, "step": 3257 }, { "epoch": 2.11, "learning_rate": 4.270760633960522e-06, "loss": 0.172, "step": 3258 }, { "epoch": 2.11, "learning_rate": 4.265031173231031e-06, "loss": 0.1872, "step": 3259 }, { "epoch": 2.11, "learning_rate": 4.259304516372042e-06, "loss": 0.1854, "step": 3260 }, { "epoch": 2.11, "learning_rate": 4.253580666183368e-06, "loss": 0.1935, "step": 3261 }, { "epoch": 2.11, "learning_rate": 4.2478596254634354e-06, "loss": 0.1872, "step": 3262 }, { "epoch": 2.11, "learning_rate": 4.242141397009314e-06, "loss": 0.1886, "step": 3263 }, { "epoch": 2.11, "learning_rate": 4.236425983616681e-06, "loss": 0.191, "step": 3264 }, { "epoch": 2.11, "learning_rate": 4.230713388079856e-06, "loss": 0.1889, "step": 3265 }, { "epoch": 2.12, "learning_rate": 4.2250036131917594e-06, "loss": 0.1782, "step": 3266 }, { "epoch": 2.12, "learning_rate": 4.219296661743956e-06, "loss": 0.175, "step": 3267 }, { "epoch": 2.12, "learning_rate": 4.213592536526607e-06, "loss": 0.199, "step": 3268 }, { "epoch": 2.12, "learning_rate": 4.2078912403285145e-06, "loss": 0.1896, "step": 3269 }, { "epoch": 2.12, "learning_rate": 4.202192775937076e-06, "loss": 0.1941, "step": 3270 }, { "epoch": 2.12, "learning_rate": 4.196497146138324e-06, "loss": 0.1895, "step": 3271 }, { "epoch": 2.12, "learning_rate": 4.190804353716888e-06, "loss": 0.1902, "step": 3272 }, { "epoch": 2.12, "learning_rate": 4.185114401456023e-06, "loss": 0.1892, "step": 3273 }, { "epoch": 2.12, "learning_rate": 4.179427292137587e-06, "loss": 0.1904, "step": 3274 }, { "epoch": 2.12, "learning_rate": 4.1737430285420555e-06, "loss": 0.1832, "step": 3275 }, { "epoch": 2.12, "learning_rate": 4.168061613448504e-06, "loss": 0.194, "step": 3276 }, { "epoch": 2.12, "learning_rate": 4.1623830496346276e-06, "loss": 0.1903, "step": 3277 }, { "epoch": 2.12, "learning_rate": 4.15670733987671e-06, "loss": 0.1855, "step": 3278 }, { "epoch": 2.12, "learning_rate": 4.151034486949657e-06, "loss": 0.1833, "step": 3279 }, { "epoch": 2.12, "learning_rate": 4.1453644936269695e-06, "loss": 0.1734, "step": 3280 }, { "epoch": 2.12, "learning_rate": 4.139697362680747e-06, "loss": 0.1896, "step": 3281 }, { "epoch": 2.13, "learning_rate": 4.1340330968817e-06, "loss": 0.1868, "step": 3282 }, { "epoch": 2.13, "learning_rate": 4.1283716989991226e-06, "loss": 0.188, "step": 3283 }, { "epoch": 2.13, "learning_rate": 4.122713171800927e-06, "loss": 0.1959, "step": 3284 }, { "epoch": 2.13, "learning_rate": 4.1170575180536e-06, "loss": 0.1971, "step": 3285 }, { "epoch": 2.13, "learning_rate": 4.111404740522243e-06, "loss": 0.1832, "step": 3286 }, { "epoch": 2.13, "learning_rate": 4.105754841970535e-06, "loss": 0.1954, "step": 3287 }, { "epoch": 2.13, "learning_rate": 4.100107825160763e-06, "loss": 0.1949, "step": 3288 }, { "epoch": 2.13, "learning_rate": 4.094463692853789e-06, "loss": 0.1907, "step": 3289 }, { "epoch": 2.13, "learning_rate": 4.08882244780908e-06, "loss": 0.1924, "step": 3290 }, { "epoch": 2.13, "learning_rate": 4.083184092784677e-06, "loss": 0.1838, "step": 3291 }, { "epoch": 2.13, "learning_rate": 4.077548630537225e-06, "loss": 0.1939, "step": 3292 }, { "epoch": 2.13, "learning_rate": 4.071916063821933e-06, "loss": 0.1753, "step": 3293 }, { "epoch": 2.13, "learning_rate": 4.066286395392618e-06, "loss": 0.1883, "step": 3294 }, { "epoch": 2.13, "learning_rate": 4.060659628001657e-06, "loss": 0.2067, "step": 3295 }, { "epoch": 2.13, "learning_rate": 4.0550357644000315e-06, "loss": 0.1863, "step": 3296 }, { "epoch": 2.14, "learning_rate": 4.0494148073372815e-06, "loss": 0.1952, "step": 3297 }, { "epoch": 2.14, "learning_rate": 4.0437967595615455e-06, "loss": 0.1835, "step": 3298 }, { "epoch": 2.14, "learning_rate": 4.038181623819522e-06, "loss": 0.1843, "step": 3299 }, { "epoch": 2.14, "learning_rate": 4.032569402856497e-06, "loss": 0.1808, "step": 3300 }, { "epoch": 2.14, "learning_rate": 4.0269600994163336e-06, "loss": 0.1847, "step": 3301 }, { "epoch": 2.14, "learning_rate": 4.0213537162414565e-06, "loss": 0.1939, "step": 3302 }, { "epoch": 2.14, "learning_rate": 4.015750256072876e-06, "loss": 0.1831, "step": 3303 }, { "epoch": 2.14, "learning_rate": 4.01014972165016e-06, "loss": 0.1905, "step": 3304 }, { "epoch": 2.14, "learning_rate": 4.00455211571146e-06, "loss": 0.1942, "step": 3305 }, { "epoch": 2.14, "learning_rate": 3.9989574409934815e-06, "loss": 0.1807, "step": 3306 }, { "epoch": 2.14, "learning_rate": 3.993365700231513e-06, "loss": 0.1937, "step": 3307 }, { "epoch": 2.14, "learning_rate": 3.98777689615939e-06, "loss": 0.197, "step": 3308 }, { "epoch": 2.14, "learning_rate": 3.982191031509532e-06, "loss": 0.1886, "step": 3309 }, { "epoch": 2.14, "learning_rate": 3.9766081090129e-06, "loss": 0.1919, "step": 3310 }, { "epoch": 2.14, "learning_rate": 3.971028131399039e-06, "loss": 0.1863, "step": 3311 }, { "epoch": 2.15, "learning_rate": 3.965451101396034e-06, "loss": 0.1936, "step": 3312 }, { "epoch": 2.15, "learning_rate": 3.959877021730546e-06, "loss": 0.182, "step": 3313 }, { "epoch": 2.15, "learning_rate": 3.954305895127778e-06, "loss": 0.2008, "step": 3314 }, { "epoch": 2.15, "learning_rate": 3.948737724311503e-06, "loss": 0.1843, "step": 3315 }, { "epoch": 2.15, "learning_rate": 3.943172512004038e-06, "loss": 0.1883, "step": 3316 }, { "epoch": 2.15, "learning_rate": 3.9376102609262625e-06, "loss": 0.1921, "step": 3317 }, { "epoch": 2.15, "learning_rate": 3.932050973797598e-06, "loss": 0.1949, "step": 3318 }, { "epoch": 2.15, "learning_rate": 3.9264946533360286e-06, "loss": 0.1855, "step": 3319 }, { "epoch": 2.15, "learning_rate": 3.920941302258081e-06, "loss": 0.1862, "step": 3320 }, { "epoch": 2.15, "learning_rate": 3.915390923278829e-06, "loss": 0.1843, "step": 3321 }, { "epoch": 2.15, "learning_rate": 3.9098435191119e-06, "loss": 0.1882, "step": 3322 }, { "epoch": 2.15, "learning_rate": 3.904299092469457e-06, "loss": 0.188, "step": 3323 }, { "epoch": 2.15, "learning_rate": 3.898757646062219e-06, "loss": 0.203, "step": 3324 }, { "epoch": 2.15, "learning_rate": 3.893219182599436e-06, "loss": 0.1871, "step": 3325 }, { "epoch": 2.15, "learning_rate": 3.8876837047889115e-06, "loss": 0.1926, "step": 3326 }, { "epoch": 2.15, "learning_rate": 3.882151215336976e-06, "loss": 0.1812, "step": 3327 }, { "epoch": 2.16, "learning_rate": 3.876621716948514e-06, "loss": 0.1833, "step": 3328 }, { "epoch": 2.16, "learning_rate": 3.8710952123269345e-06, "loss": 0.1824, "step": 3329 }, { "epoch": 2.16, "learning_rate": 3.865571704174192e-06, "loss": 0.1904, "step": 3330 }, { "epoch": 2.16, "learning_rate": 3.8600511951907685e-06, "loss": 0.187, "step": 3331 }, { "epoch": 2.16, "learning_rate": 3.854533688075687e-06, "loss": 0.1801, "step": 3332 }, { "epoch": 2.16, "learning_rate": 3.849019185526495e-06, "loss": 0.1917, "step": 3333 }, { "epoch": 2.16, "learning_rate": 3.843507690239278e-06, "loss": 0.1864, "step": 3334 }, { "epoch": 2.16, "learning_rate": 3.837999204908651e-06, "loss": 0.1915, "step": 3335 }, { "epoch": 2.16, "learning_rate": 3.83249373222775e-06, "loss": 0.1889, "step": 3336 }, { "epoch": 2.16, "learning_rate": 3.82699127488825e-06, "loss": 0.1906, "step": 3337 }, { "epoch": 2.16, "learning_rate": 3.821491835580335e-06, "loss": 0.1865, "step": 3338 }, { "epoch": 2.16, "learning_rate": 3.8159954169927335e-06, "loss": 0.1793, "step": 3339 }, { "epoch": 2.16, "learning_rate": 3.8105020218126775e-06, "loss": 0.193, "step": 3340 }, { "epoch": 2.16, "learning_rate": 3.8050116527259364e-06, "loss": 0.1905, "step": 3341 }, { "epoch": 2.16, "learning_rate": 3.7995243124167947e-06, "loss": 0.182, "step": 3342 }, { "epoch": 2.17, "learning_rate": 3.794040003568051e-06, "loss": 0.194, "step": 3343 }, { "epoch": 2.17, "learning_rate": 3.7885587288610317e-06, "loss": 0.1875, "step": 3344 }, { "epoch": 2.17, "learning_rate": 3.783080490975567e-06, "loss": 0.192, "step": 3345 }, { "epoch": 2.17, "learning_rate": 3.7776052925900175e-06, "loss": 0.1901, "step": 3346 }, { "epoch": 2.17, "learning_rate": 3.772133136381242e-06, "loss": 0.1823, "step": 3347 }, { "epoch": 2.17, "learning_rate": 3.7666640250246246e-06, "loss": 0.1872, "step": 3348 }, { "epoch": 2.17, "learning_rate": 3.7611979611940597e-06, "loss": 0.2038, "step": 3349 }, { "epoch": 2.17, "learning_rate": 3.75573494756194e-06, "loss": 0.1931, "step": 3350 }, { "epoch": 2.17, "learning_rate": 3.7502749867991815e-06, "loss": 0.1974, "step": 3351 }, { "epoch": 2.17, "learning_rate": 3.7448180815751955e-06, "loss": 0.1906, "step": 3352 }, { "epoch": 2.17, "learning_rate": 3.7393642345579116e-06, "loss": 0.1926, "step": 3353 }, { "epoch": 2.17, "learning_rate": 3.7339134484137506e-06, "loss": 0.1988, "step": 3354 }, { "epoch": 2.17, "learning_rate": 3.728465725807652e-06, "loss": 0.1778, "step": 3355 }, { "epoch": 2.17, "learning_rate": 3.72302106940304e-06, "loss": 0.1926, "step": 3356 }, { "epoch": 2.17, "learning_rate": 3.7175794818618583e-06, "loss": 0.1892, "step": 3357 }, { "epoch": 2.17, "learning_rate": 3.7121409658445328e-06, "loss": 0.1924, "step": 3358 }, { "epoch": 2.18, "learning_rate": 3.7067055240100036e-06, "loss": 0.1929, "step": 3359 }, { "epoch": 2.18, "learning_rate": 3.7012731590156923e-06, "loss": 0.1815, "step": 3360 }, { "epoch": 2.18, "learning_rate": 3.6958438735175296e-06, "loss": 0.1855, "step": 3361 }, { "epoch": 2.18, "learning_rate": 3.6904176701699358e-06, "loss": 0.1955, "step": 3362 }, { "epoch": 2.18, "learning_rate": 3.6849945516258177e-06, "loss": 0.1842, "step": 3363 }, { "epoch": 2.18, "learning_rate": 3.6795745205365884e-06, "loss": 0.1927, "step": 3364 }, { "epoch": 2.18, "learning_rate": 3.674157579552133e-06, "loss": 0.1826, "step": 3365 }, { "epoch": 2.18, "learning_rate": 3.668743731320845e-06, "loss": 0.1889, "step": 3366 }, { "epoch": 2.18, "learning_rate": 3.66333297848959e-06, "loss": 0.1784, "step": 3367 }, { "epoch": 2.18, "learning_rate": 3.6579253237037317e-06, "loss": 0.2014, "step": 3368 }, { "epoch": 2.18, "learning_rate": 3.6525207696071075e-06, "loss": 0.1918, "step": 3369 }, { "epoch": 2.18, "learning_rate": 3.647119318842054e-06, "loss": 0.1897, "step": 3370 }, { "epoch": 2.18, "learning_rate": 3.6417209740493743e-06, "loss": 0.1814, "step": 3371 }, { "epoch": 2.18, "learning_rate": 3.636325737868368e-06, "loss": 0.1975, "step": 3372 }, { "epoch": 2.18, "learning_rate": 3.6309336129368e-06, "loss": 0.1959, "step": 3373 }, { "epoch": 2.19, "learning_rate": 3.62554460189093e-06, "loss": 0.1891, "step": 3374 }, { "epoch": 2.19, "learning_rate": 3.62015870736548e-06, "loss": 0.1822, "step": 3375 }, { "epoch": 2.19, "learning_rate": 3.6147759319936604e-06, "loss": 0.2082, "step": 3376 }, { "epoch": 2.19, "learning_rate": 3.6093962784071467e-06, "loss": 0.1912, "step": 3377 }, { "epoch": 2.19, "learning_rate": 3.604019749236101e-06, "loss": 0.1909, "step": 3378 }, { "epoch": 2.19, "learning_rate": 3.598646347109139e-06, "loss": 0.1786, "step": 3379 }, { "epoch": 2.19, "learning_rate": 3.5932760746533692e-06, "loss": 0.1953, "step": 3380 }, { "epoch": 2.19, "learning_rate": 3.587908934494351e-06, "loss": 0.2001, "step": 3381 }, { "epoch": 2.19, "learning_rate": 3.582544929256123e-06, "loss": 0.1864, "step": 3382 }, { "epoch": 2.19, "learning_rate": 3.577184061561193e-06, "loss": 0.1893, "step": 3383 }, { "epoch": 2.19, "learning_rate": 3.571826334030525e-06, "loss": 0.1873, "step": 3384 }, { "epoch": 2.19, "learning_rate": 3.566471749283558e-06, "loss": 0.1835, "step": 3385 }, { "epoch": 2.19, "learning_rate": 3.5611203099381842e-06, "loss": 0.1784, "step": 3386 }, { "epoch": 2.19, "learning_rate": 3.5557720186107703e-06, "loss": 0.1932, "step": 3387 }, { "epoch": 2.19, "learning_rate": 3.55042687791613e-06, "loss": 0.1933, "step": 3388 }, { "epoch": 2.19, "learning_rate": 3.545084890467552e-06, "loss": 0.1944, "step": 3389 }, { "epoch": 2.2, "learning_rate": 3.539746058876766e-06, "loss": 0.1814, "step": 3390 }, { "epoch": 2.2, "learning_rate": 3.534410385753977e-06, "loss": 0.1869, "step": 3391 }, { "epoch": 2.2, "learning_rate": 3.529077873707828e-06, "loss": 0.1921, "step": 3392 }, { "epoch": 2.2, "learning_rate": 3.5237485253454327e-06, "loss": 0.2011, "step": 3393 }, { "epoch": 2.2, "learning_rate": 3.518422343272345e-06, "loss": 0.1896, "step": 3394 }, { "epoch": 2.2, "learning_rate": 3.513099330092582e-06, "loss": 0.1909, "step": 3395 }, { "epoch": 2.2, "learning_rate": 3.5077794884086005e-06, "loss": 0.2003, "step": 3396 }, { "epoch": 2.2, "learning_rate": 3.502462820821317e-06, "loss": 0.1893, "step": 3397 }, { "epoch": 2.2, "learning_rate": 3.497149329930085e-06, "loss": 0.1958, "step": 3398 }, { "epoch": 2.2, "learning_rate": 3.49183901833272e-06, "loss": 0.1858, "step": 3399 }, { "epoch": 2.2, "learning_rate": 3.4865318886254653e-06, "loss": 0.1925, "step": 3400 }, { "epoch": 2.2, "learning_rate": 3.481227943403024e-06, "loss": 0.2003, "step": 3401 }, { "epoch": 2.2, "learning_rate": 3.475927185258531e-06, "loss": 0.1875, "step": 3402 }, { "epoch": 2.2, "learning_rate": 3.4706296167835696e-06, "loss": 0.2027, "step": 3403 }, { "epoch": 2.2, "learning_rate": 3.465335240568166e-06, "loss": 0.1836, "step": 3404 }, { "epoch": 2.21, "learning_rate": 3.4600440592007758e-06, "loss": 0.1807, "step": 3405 }, { "epoch": 2.21, "learning_rate": 3.4547560752683027e-06, "loss": 0.1815, "step": 3406 }, { "epoch": 2.21, "learning_rate": 3.449471291356079e-06, "loss": 0.1933, "step": 3407 }, { "epoch": 2.21, "learning_rate": 3.4441897100478804e-06, "loss": 0.1839, "step": 3408 }, { "epoch": 2.21, "learning_rate": 3.4389113339259084e-06, "loss": 0.1931, "step": 3409 }, { "epoch": 2.21, "learning_rate": 3.433636165570806e-06, "loss": 0.1939, "step": 3410 }, { "epoch": 2.21, "learning_rate": 3.4283642075616387e-06, "loss": 0.2007, "step": 3411 }, { "epoch": 2.21, "learning_rate": 3.4230954624759137e-06, "loss": 0.1961, "step": 3412 }, { "epoch": 2.21, "learning_rate": 3.417829932889556e-06, "loss": 0.2035, "step": 3413 }, { "epoch": 2.21, "learning_rate": 3.4125676213769266e-06, "loss": 0.1947, "step": 3414 }, { "epoch": 2.21, "learning_rate": 3.407308530510807e-06, "loss": 0.1977, "step": 3415 }, { "epoch": 2.21, "learning_rate": 3.402052662862413e-06, "loss": 0.1971, "step": 3416 }, { "epoch": 2.21, "learning_rate": 3.3968000210013707e-06, "loss": 0.1902, "step": 3417 }, { "epoch": 2.21, "learning_rate": 3.391550607495746e-06, "loss": 0.1924, "step": 3418 }, { "epoch": 2.21, "learning_rate": 3.38630442491201e-06, "loss": 0.1883, "step": 3419 }, { "epoch": 2.22, "learning_rate": 3.3810614758150683e-06, "loss": 0.1933, "step": 3420 }, { "epoch": 2.22, "learning_rate": 3.3758217627682334e-06, "loss": 0.198, "step": 3421 }, { "epoch": 2.22, "learning_rate": 3.3705852883332435e-06, "loss": 0.1903, "step": 3422 }, { "epoch": 2.22, "learning_rate": 3.3653520550702557e-06, "loss": 0.2006, "step": 3423 }, { "epoch": 2.22, "learning_rate": 3.360122065537831e-06, "loss": 0.1961, "step": 3424 }, { "epoch": 2.22, "learning_rate": 3.3548953222929582e-06, "loss": 0.1856, "step": 3425 }, { "epoch": 2.22, "learning_rate": 3.3496718278910256e-06, "loss": 0.1847, "step": 3426 }, { "epoch": 2.22, "learning_rate": 3.344451584885847e-06, "loss": 0.1848, "step": 3427 }, { "epoch": 2.22, "learning_rate": 3.3392345958296336e-06, "loss": 0.2004, "step": 3428 }, { "epoch": 2.22, "learning_rate": 3.3340208632730162e-06, "loss": 0.1924, "step": 3429 }, { "epoch": 2.22, "learning_rate": 3.3288103897650237e-06, "loss": 0.1851, "step": 3430 }, { "epoch": 2.22, "learning_rate": 3.3236031778531043e-06, "loss": 0.1818, "step": 3431 }, { "epoch": 2.22, "learning_rate": 3.3183992300830946e-06, "loss": 0.1947, "step": 3432 }, { "epoch": 2.22, "learning_rate": 3.3131985489992512e-06, "loss": 0.1943, "step": 3433 }, { "epoch": 2.22, "learning_rate": 3.308001137144228e-06, "loss": 0.1948, "step": 3434 }, { "epoch": 2.22, "learning_rate": 3.302806997059075e-06, "loss": 0.1826, "step": 3435 }, { "epoch": 2.23, "learning_rate": 3.2976161312832522e-06, "loss": 0.1881, "step": 3436 }, { "epoch": 2.23, "learning_rate": 3.29242854235461e-06, "loss": 0.1916, "step": 3437 }, { "epoch": 2.23, "learning_rate": 3.2872442328094043e-06, "loss": 0.1921, "step": 3438 }, { "epoch": 2.23, "learning_rate": 3.2820632051822787e-06, "loss": 0.1928, "step": 3439 }, { "epoch": 2.23, "learning_rate": 3.276885462006282e-06, "loss": 0.2049, "step": 3440 }, { "epoch": 2.23, "learning_rate": 3.271711005812853e-06, "loss": 0.1889, "step": 3441 }, { "epoch": 2.23, "learning_rate": 3.2665398391318205e-06, "loss": 0.1906, "step": 3442 }, { "epoch": 2.23, "learning_rate": 3.2613719644914123e-06, "loss": 0.1924, "step": 3443 }, { "epoch": 2.23, "learning_rate": 3.256207384418235e-06, "loss": 0.1954, "step": 3444 }, { "epoch": 2.23, "learning_rate": 3.2510461014372995e-06, "loss": 0.1909, "step": 3445 }, { "epoch": 2.23, "learning_rate": 3.2458881180719893e-06, "loss": 0.1894, "step": 3446 }, { "epoch": 2.23, "learning_rate": 3.240733436844087e-06, "loss": 0.1878, "step": 3447 }, { "epoch": 2.23, "learning_rate": 3.2355820602737574e-06, "loss": 0.1937, "step": 3448 }, { "epoch": 2.23, "learning_rate": 3.2304339908795436e-06, "loss": 0.1901, "step": 3449 }, { "epoch": 2.23, "learning_rate": 3.2252892311783825e-06, "loss": 0.195, "step": 3450 }, { "epoch": 2.24, "learning_rate": 3.220147783685579e-06, "loss": 0.2051, "step": 3451 }, { "epoch": 2.24, "learning_rate": 3.215009650914833e-06, "loss": 0.1885, "step": 3452 }, { "epoch": 2.24, "learning_rate": 3.2098748353782128e-06, "loss": 0.1862, "step": 3453 }, { "epoch": 2.24, "learning_rate": 3.2047433395861737e-06, "loss": 0.2016, "step": 3454 }, { "epoch": 2.24, "learning_rate": 3.1996151660475384e-06, "loss": 0.1783, "step": 3455 }, { "epoch": 2.24, "learning_rate": 3.194490317269516e-06, "loss": 0.194, "step": 3456 }, { "epoch": 2.24, "learning_rate": 3.18936879575768e-06, "loss": 0.194, "step": 3457 }, { "epoch": 2.24, "learning_rate": 3.1842506040159858e-06, "loss": 0.189, "step": 3458 }, { "epoch": 2.24, "learning_rate": 3.179135744546752e-06, "loss": 0.1809, "step": 3459 }, { "epoch": 2.24, "learning_rate": 3.174024219850679e-06, "loss": 0.1891, "step": 3460 }, { "epoch": 2.24, "learning_rate": 3.168916032426824e-06, "loss": 0.1991, "step": 3461 }, { "epoch": 2.24, "learning_rate": 3.163811184772625e-06, "loss": 0.188, "step": 3462 }, { "epoch": 2.24, "learning_rate": 3.1587096793838766e-06, "loss": 0.194, "step": 3463 }, { "epoch": 2.24, "learning_rate": 3.1536115187547456e-06, "loss": 0.2012, "step": 3464 }, { "epoch": 2.24, "learning_rate": 3.1485167053777665e-06, "loss": 0.1954, "step": 3465 }, { "epoch": 2.24, "learning_rate": 3.143425241743826e-06, "loss": 0.1967, "step": 3466 }, { "epoch": 2.25, "learning_rate": 3.138337130342186e-06, "loss": 0.1991, "step": 3467 }, { "epoch": 2.25, "learning_rate": 3.133252373660458e-06, "loss": 0.1873, "step": 3468 }, { "epoch": 2.25, "learning_rate": 3.1281709741846243e-06, "loss": 0.19, "step": 3469 }, { "epoch": 2.25, "learning_rate": 3.1230929343990146e-06, "loss": 0.1891, "step": 3470 }, { "epoch": 2.25, "learning_rate": 3.118018256786328e-06, "loss": 0.2021, "step": 3471 }, { "epoch": 2.25, "learning_rate": 3.1129469438276063e-06, "loss": 0.1891, "step": 3472 }, { "epoch": 2.25, "learning_rate": 3.1078789980022605e-06, "loss": 0.1892, "step": 3473 }, { "epoch": 2.25, "learning_rate": 3.102814421788043e-06, "loss": 0.2002, "step": 3474 }, { "epoch": 2.25, "learning_rate": 3.097753217661068e-06, "loss": 0.2039, "step": 3475 }, { "epoch": 2.25, "learning_rate": 3.0926953880957933e-06, "loss": 0.1869, "step": 3476 }, { "epoch": 2.25, "learning_rate": 3.087640935565036e-06, "loss": 0.191, "step": 3477 }, { "epoch": 2.25, "learning_rate": 3.082589862539952e-06, "loss": 0.1954, "step": 3478 }, { "epoch": 2.25, "learning_rate": 3.0775421714900534e-06, "loss": 0.191, "step": 3479 }, { "epoch": 2.25, "learning_rate": 3.0724978648831916e-06, "loss": 0.1931, "step": 3480 }, { "epoch": 2.25, "learning_rate": 3.067456945185573e-06, "loss": 0.1877, "step": 3481 }, { "epoch": 2.26, "learning_rate": 3.062419414861735e-06, "loss": 0.1842, "step": 3482 }, { "epoch": 2.26, "learning_rate": 3.0573852763745704e-06, "loss": 0.1958, "step": 3483 }, { "epoch": 2.26, "learning_rate": 3.052354532185304e-06, "loss": 0.1843, "step": 3484 }, { "epoch": 2.26, "learning_rate": 3.0473271847535078e-06, "loss": 0.1951, "step": 3485 }, { "epoch": 2.26, "learning_rate": 3.0423032365370942e-06, "loss": 0.1936, "step": 3486 }, { "epoch": 2.26, "learning_rate": 3.0372826899923037e-06, "loss": 0.1956, "step": 3487 }, { "epoch": 2.26, "learning_rate": 3.032265547573726e-06, "loss": 0.1809, "step": 3488 }, { "epoch": 2.26, "learning_rate": 3.027251811734275e-06, "loss": 0.1891, "step": 3489 }, { "epoch": 2.26, "learning_rate": 3.0222414849252113e-06, "loss": 0.1878, "step": 3490 }, { "epoch": 2.26, "learning_rate": 3.017234569596116e-06, "loss": 0.1892, "step": 3491 }, { "epoch": 2.26, "learning_rate": 3.012231068194915e-06, "loss": 0.1825, "step": 3492 }, { "epoch": 2.26, "learning_rate": 3.0072309831678515e-06, "loss": 0.1877, "step": 3493 }, { "epoch": 2.26, "learning_rate": 3.0022343169595124e-06, "loss": 0.1778, "step": 3494 }, { "epoch": 2.26, "learning_rate": 2.997241072012801e-06, "loss": 0.1992, "step": 3495 }, { "epoch": 2.26, "learning_rate": 2.9922512507689582e-06, "loss": 0.1992, "step": 3496 }, { "epoch": 2.26, "learning_rate": 2.987264855667541e-06, "loss": 0.188, "step": 3497 }, { "epoch": 2.27, "learning_rate": 2.9822818891464423e-06, "loss": 0.196, "step": 3498 }, { "epoch": 2.27, "learning_rate": 2.9773023536418666e-06, "loss": 0.1974, "step": 3499 }, { "epoch": 2.27, "learning_rate": 2.972326251588352e-06, "loss": 0.1915, "step": 3500 }, { "epoch": 2.27, "learning_rate": 2.9673535854187496e-06, "loss": 0.1937, "step": 3501 }, { "epoch": 2.27, "learning_rate": 2.9623843575642397e-06, "loss": 0.1939, "step": 3502 }, { "epoch": 2.27, "learning_rate": 2.957418570454308e-06, "loss": 0.1976, "step": 3503 }, { "epoch": 2.27, "learning_rate": 2.952456226516771e-06, "loss": 0.1889, "step": 3504 }, { "epoch": 2.27, "learning_rate": 2.94749732817776e-06, "loss": 0.1905, "step": 3505 }, { "epoch": 2.27, "learning_rate": 2.942541877861712e-06, "loss": 0.1902, "step": 3506 }, { "epoch": 2.27, "learning_rate": 2.9375898779913916e-06, "loss": 0.1956, "step": 3507 }, { "epoch": 2.27, "learning_rate": 2.932641330987863e-06, "loss": 0.1854, "step": 3508 }, { "epoch": 2.27, "learning_rate": 2.9276962392705156e-06, "loss": 0.1876, "step": 3509 }, { "epoch": 2.27, "learning_rate": 2.9227546052570353e-06, "loss": 0.1995, "step": 3510 }, { "epoch": 2.27, "learning_rate": 2.9178164313634337e-06, "loss": 0.1967, "step": 3511 }, { "epoch": 2.27, "learning_rate": 2.912881720004014e-06, "loss": 0.1939, "step": 3512 }, { "epoch": 2.28, "learning_rate": 2.907950473591402e-06, "loss": 0.1875, "step": 3513 }, { "epoch": 2.28, "learning_rate": 2.9030226945365157e-06, "loss": 0.1789, "step": 3514 }, { "epoch": 2.28, "learning_rate": 2.89809838524859e-06, "loss": 0.1954, "step": 3515 }, { "epoch": 2.28, "learning_rate": 2.893177548135151e-06, "loss": 0.1852, "step": 3516 }, { "epoch": 2.28, "learning_rate": 2.888260185602042e-06, "loss": 0.1951, "step": 3517 }, { "epoch": 2.28, "learning_rate": 2.8833463000533913e-06, "loss": 0.1866, "step": 3518 }, { "epoch": 2.28, "learning_rate": 2.8784358938916423e-06, "loss": 0.1868, "step": 3519 }, { "epoch": 2.28, "learning_rate": 2.873528969517525e-06, "loss": 0.1901, "step": 3520 }, { "epoch": 2.28, "learning_rate": 2.868625529330079e-06, "loss": 0.191, "step": 3521 }, { "epoch": 2.28, "learning_rate": 2.863725575726626e-06, "loss": 0.1839, "step": 3522 }, { "epoch": 2.28, "learning_rate": 2.8588291111027954e-06, "loss": 0.1942, "step": 3523 }, { "epoch": 2.28, "learning_rate": 2.85393613785251e-06, "loss": 0.1869, "step": 3524 }, { "epoch": 2.28, "learning_rate": 2.8490466583679755e-06, "loss": 0.1911, "step": 3525 }, { "epoch": 2.28, "learning_rate": 2.844160675039702e-06, "loss": 0.1966, "step": 3526 }, { "epoch": 2.28, "learning_rate": 2.8392781902564794e-06, "loss": 0.1809, "step": 3527 }, { "epoch": 2.28, "learning_rate": 2.834399206405396e-06, "loss": 0.1926, "step": 3528 }, { "epoch": 2.29, "learning_rate": 2.829523725871821e-06, "loss": 0.1921, "step": 3529 }, { "epoch": 2.29, "learning_rate": 2.8246517510394165e-06, "loss": 0.1883, "step": 3530 }, { "epoch": 2.29, "learning_rate": 2.8197832842901317e-06, "loss": 0.1976, "step": 3531 }, { "epoch": 2.29, "learning_rate": 2.8149183280041923e-06, "loss": 0.1898, "step": 3532 }, { "epoch": 2.29, "learning_rate": 2.8100568845601173e-06, "loss": 0.1878, "step": 3533 }, { "epoch": 2.29, "learning_rate": 2.805198956334698e-06, "loss": 0.2045, "step": 3534 }, { "epoch": 2.29, "learning_rate": 2.800344545703021e-06, "loss": 0.1872, "step": 3535 }, { "epoch": 2.29, "learning_rate": 2.795493655038438e-06, "loss": 0.1796, "step": 3536 }, { "epoch": 2.29, "learning_rate": 2.7906462867125893e-06, "loss": 0.1888, "step": 3537 }, { "epoch": 2.29, "learning_rate": 2.7858024430953947e-06, "loss": 0.1937, "step": 3538 }, { "epoch": 2.29, "learning_rate": 2.780962126555039e-06, "loss": 0.1963, "step": 3539 }, { "epoch": 2.29, "learning_rate": 2.776125339457998e-06, "loss": 0.1944, "step": 3540 }, { "epoch": 2.29, "learning_rate": 2.7712920841690073e-06, "loss": 0.1898, "step": 3541 }, { "epoch": 2.29, "learning_rate": 2.76646236305109e-06, "loss": 0.1924, "step": 3542 }, { "epoch": 2.29, "learning_rate": 2.7616361784655265e-06, "loss": 0.1983, "step": 3543 }, { "epoch": 2.3, "learning_rate": 2.7568135327718816e-06, "loss": 0.1894, "step": 3544 }, { "epoch": 2.3, "learning_rate": 2.75199442832798e-06, "loss": 0.184, "step": 3545 }, { "epoch": 2.3, "learning_rate": 2.747178867489919e-06, "loss": 0.196, "step": 3546 }, { "epoch": 2.3, "learning_rate": 2.7423668526120696e-06, "loss": 0.1907, "step": 3547 }, { "epoch": 2.3, "learning_rate": 2.7375583860470547e-06, "loss": 0.1901, "step": 3548 }, { "epoch": 2.3, "learning_rate": 2.732753470145778e-06, "loss": 0.1965, "step": 3549 }, { "epoch": 2.3, "learning_rate": 2.7279521072573934e-06, "loss": 0.1863, "step": 3550 }, { "epoch": 2.3, "learning_rate": 2.7231542997293305e-06, "loss": 0.1933, "step": 3551 }, { "epoch": 2.3, "learning_rate": 2.718360049907267e-06, "loss": 0.1978, "step": 3552 }, { "epoch": 2.3, "learning_rate": 2.7135693601351576e-06, "loss": 0.1963, "step": 3553 }, { "epoch": 2.3, "learning_rate": 2.7087822327551995e-06, "loss": 0.1932, "step": 3554 }, { "epoch": 2.3, "learning_rate": 2.7039986701078615e-06, "loss": 0.1973, "step": 3555 }, { "epoch": 2.3, "learning_rate": 2.6992186745318593e-06, "loss": 0.1952, "step": 3556 }, { "epoch": 2.3, "learning_rate": 2.6944422483641762e-06, "loss": 0.1926, "step": 3557 }, { "epoch": 2.3, "learning_rate": 2.6896693939400377e-06, "loss": 0.1917, "step": 3558 }, { "epoch": 2.31, "learning_rate": 2.684900113592933e-06, "loss": 0.1875, "step": 3559 }, { "epoch": 2.31, "learning_rate": 2.6801344096545965e-06, "loss": 0.1986, "step": 3560 }, { "epoch": 2.31, "learning_rate": 2.6753722844550232e-06, "loss": 0.1918, "step": 3561 }, { "epoch": 2.31, "learning_rate": 2.6706137403224466e-06, "loss": 0.1947, "step": 3562 }, { "epoch": 2.31, "learning_rate": 2.665858779583361e-06, "loss": 0.1994, "step": 3563 }, { "epoch": 2.31, "learning_rate": 2.6611074045624985e-06, "loss": 0.2059, "step": 3564 }, { "epoch": 2.31, "learning_rate": 2.6563596175828476e-06, "loss": 0.1935, "step": 3565 }, { "epoch": 2.31, "learning_rate": 2.651615420965633e-06, "loss": 0.1939, "step": 3566 }, { "epoch": 2.31, "learning_rate": 2.6468748170303314e-06, "loss": 0.1928, "step": 3567 }, { "epoch": 2.31, "learning_rate": 2.642137808094665e-06, "loss": 0.1887, "step": 3568 }, { "epoch": 2.31, "learning_rate": 2.6374043964745864e-06, "loss": 0.1949, "step": 3569 }, { "epoch": 2.31, "learning_rate": 2.6326745844843037e-06, "loss": 0.1903, "step": 3570 }, { "epoch": 2.31, "learning_rate": 2.627948374436253e-06, "loss": 0.1987, "step": 3571 }, { "epoch": 2.31, "learning_rate": 2.62322576864112e-06, "loss": 0.192, "step": 3572 }, { "epoch": 2.31, "learning_rate": 2.618506769407817e-06, "loss": 0.1878, "step": 3573 }, { "epoch": 2.31, "learning_rate": 2.6137913790435066e-06, "loss": 0.1856, "step": 3574 }, { "epoch": 2.32, "learning_rate": 2.609079599853571e-06, "loss": 0.2185, "step": 3575 }, { "epoch": 2.32, "learning_rate": 2.604371434141645e-06, "loss": 0.1872, "step": 3576 }, { "epoch": 2.32, "learning_rate": 2.5996668842095776e-06, "loss": 0.1871, "step": 3577 }, { "epoch": 2.32, "learning_rate": 2.5949659523574664e-06, "loss": 0.1785, "step": 3578 }, { "epoch": 2.32, "learning_rate": 2.5902686408836296e-06, "loss": 0.2022, "step": 3579 }, { "epoch": 2.32, "learning_rate": 2.585574952084622e-06, "loss": 0.1898, "step": 3580 }, { "epoch": 2.32, "learning_rate": 2.5808848882552207e-06, "loss": 0.1955, "step": 3581 }, { "epoch": 2.32, "learning_rate": 2.5761984516884375e-06, "loss": 0.1835, "step": 3582 }, { "epoch": 2.32, "learning_rate": 2.571515644675504e-06, "loss": 0.1892, "step": 3583 }, { "epoch": 2.32, "learning_rate": 2.5668364695058847e-06, "loss": 0.2016, "step": 3584 }, { "epoch": 2.32, "learning_rate": 2.5621609284672597e-06, "loss": 0.1893, "step": 3585 }, { "epoch": 2.32, "learning_rate": 2.55748902384554e-06, "loss": 0.201, "step": 3586 }, { "epoch": 2.32, "learning_rate": 2.5528207579248577e-06, "loss": 0.1988, "step": 3587 }, { "epoch": 2.32, "learning_rate": 2.5481561329875592e-06, "loss": 0.1965, "step": 3588 }, { "epoch": 2.32, "learning_rate": 2.54349515131422e-06, "loss": 0.1952, "step": 3589 }, { "epoch": 2.33, "learning_rate": 2.538837815183627e-06, "loss": 0.1936, "step": 3590 }, { "epoch": 2.33, "learning_rate": 2.534184126872791e-06, "loss": 0.1872, "step": 3591 }, { "epoch": 2.33, "learning_rate": 2.529534088656931e-06, "loss": 0.1857, "step": 3592 }, { "epoch": 2.33, "learning_rate": 2.524887702809493e-06, "loss": 0.2016, "step": 3593 }, { "epoch": 2.33, "learning_rate": 2.5202449716021237e-06, "loss": 0.187, "step": 3594 }, { "epoch": 2.33, "learning_rate": 2.5156058973046983e-06, "loss": 0.1876, "step": 3595 }, { "epoch": 2.33, "learning_rate": 2.5109704821852877e-06, "loss": 0.1864, "step": 3596 }, { "epoch": 2.33, "learning_rate": 2.506338728510189e-06, "loss": 0.191, "step": 3597 }, { "epoch": 2.33, "learning_rate": 2.501710638543897e-06, "loss": 0.1948, "step": 3598 }, { "epoch": 2.33, "learning_rate": 2.497086214549126e-06, "loss": 0.1888, "step": 3599 }, { "epoch": 2.33, "learning_rate": 2.492465458786787e-06, "loss": 0.1922, "step": 3600 }, { "epoch": 2.33, "learning_rate": 2.4878483735160084e-06, "loss": 0.191, "step": 3601 }, { "epoch": 2.33, "learning_rate": 2.483234960994113e-06, "loss": 0.1984, "step": 3602 }, { "epoch": 2.33, "learning_rate": 2.4786252234766407e-06, "loss": 0.1861, "step": 3603 }, { "epoch": 2.33, "learning_rate": 2.4740191632173195e-06, "loss": 0.2043, "step": 3604 }, { "epoch": 2.33, "learning_rate": 2.469416782468096e-06, "loss": 0.1952, "step": 3605 }, { "epoch": 2.34, "learning_rate": 2.4648180834791036e-06, "loss": 0.1821, "step": 3606 }, { "epoch": 2.34, "learning_rate": 2.460223068498684e-06, "loss": 0.1885, "step": 3607 }, { "epoch": 2.34, "learning_rate": 2.4556317397733774e-06, "loss": 0.202, "step": 3608 }, { "epoch": 2.34, "learning_rate": 2.451044099547916e-06, "loss": 0.1908, "step": 3609 }, { "epoch": 2.34, "learning_rate": 2.4464601500652362e-06, "loss": 0.197, "step": 3610 }, { "epoch": 2.34, "learning_rate": 2.4418798935664623e-06, "loss": 0.1942, "step": 3611 }, { "epoch": 2.34, "learning_rate": 2.437303332290922e-06, "loss": 0.1927, "step": 3612 }, { "epoch": 2.34, "learning_rate": 2.4327304684761265e-06, "loss": 0.2003, "step": 3613 }, { "epoch": 2.34, "learning_rate": 2.428161304357789e-06, "loss": 0.1981, "step": 3614 }, { "epoch": 2.34, "learning_rate": 2.4235958421698025e-06, "loss": 0.204, "step": 3615 }, { "epoch": 2.34, "learning_rate": 2.419034084144265e-06, "loss": 0.1877, "step": 3616 }, { "epoch": 2.34, "learning_rate": 2.4144760325114468e-06, "loss": 0.1898, "step": 3617 }, { "epoch": 2.34, "learning_rate": 2.4099216894998225e-06, "loss": 0.1926, "step": 3618 }, { "epoch": 2.34, "learning_rate": 2.4053710573360377e-06, "loss": 0.1932, "step": 3619 }, { "epoch": 2.34, "learning_rate": 2.400824138244939e-06, "loss": 0.1925, "step": 3620 }, { "epoch": 2.35, "learning_rate": 2.396280934449543e-06, "loss": 0.1748, "step": 3621 }, { "epoch": 2.35, "learning_rate": 2.391741448171062e-06, "loss": 0.1898, "step": 3622 }, { "epoch": 2.35, "learning_rate": 2.3872056816288868e-06, "loss": 0.191, "step": 3623 }, { "epoch": 2.35, "learning_rate": 2.3826736370405835e-06, "loss": 0.1921, "step": 3624 }, { "epoch": 2.35, "learning_rate": 2.378145316621909e-06, "loss": 0.1953, "step": 3625 }, { "epoch": 2.35, "learning_rate": 2.37362072258679e-06, "loss": 0.1964, "step": 3626 }, { "epoch": 2.35, "learning_rate": 2.3690998571473402e-06, "loss": 0.1935, "step": 3627 }, { "epoch": 2.35, "learning_rate": 2.3645827225138386e-06, "loss": 0.1886, "step": 3628 }, { "epoch": 2.35, "learning_rate": 2.360069320894751e-06, "loss": 0.1968, "step": 3629 }, { "epoch": 2.35, "learning_rate": 2.3555596544967187e-06, "loss": 0.1929, "step": 3630 }, { "epoch": 2.35, "learning_rate": 2.3510537255245438e-06, "loss": 0.1816, "step": 3631 }, { "epoch": 2.35, "learning_rate": 2.3465515361812173e-06, "loss": 0.1943, "step": 3632 }, { "epoch": 2.35, "learning_rate": 2.342053088667888e-06, "loss": 0.1964, "step": 3633 }, { "epoch": 2.35, "learning_rate": 2.337558385183888e-06, "loss": 0.1988, "step": 3634 }, { "epoch": 2.35, "learning_rate": 2.3330674279267064e-06, "loss": 0.1923, "step": 3635 }, { "epoch": 2.35, "learning_rate": 2.3285802190920093e-06, "loss": 0.1861, "step": 3636 }, { "epoch": 2.36, "learning_rate": 2.324096760873631e-06, "loss": 0.1942, "step": 3637 }, { "epoch": 2.36, "learning_rate": 2.3196170554635633e-06, "loss": 0.1885, "step": 3638 }, { "epoch": 2.36, "learning_rate": 2.3151411050519735e-06, "loss": 0.2013, "step": 3639 }, { "epoch": 2.36, "learning_rate": 2.310668911827183e-06, "loss": 0.1882, "step": 3640 }, { "epoch": 2.36, "learning_rate": 2.3062004779756875e-06, "loss": 0.1971, "step": 3641 }, { "epoch": 2.36, "learning_rate": 2.3017358056821335e-06, "loss": 0.1908, "step": 3642 }, { "epoch": 2.36, "learning_rate": 2.297274897129339e-06, "loss": 0.2018, "step": 3643 }, { "epoch": 2.36, "learning_rate": 2.292817754498271e-06, "loss": 0.1965, "step": 3644 }, { "epoch": 2.36, "learning_rate": 2.288364379968067e-06, "loss": 0.1965, "step": 3645 }, { "epoch": 2.36, "learning_rate": 2.2839147757160086e-06, "loss": 0.1988, "step": 3646 }, { "epoch": 2.36, "learning_rate": 2.279468943917549e-06, "loss": 0.198, "step": 3647 }, { "epoch": 2.36, "learning_rate": 2.2750268867462845e-06, "loss": 0.1846, "step": 3648 }, { "epoch": 2.36, "learning_rate": 2.2705886063739725e-06, "loss": 0.188, "step": 3649 }, { "epoch": 2.36, "learning_rate": 2.266154104970526e-06, "loss": 0.1935, "step": 3650 }, { "epoch": 2.36, "learning_rate": 2.261723384704002e-06, "loss": 0.1952, "step": 3651 }, { "epoch": 2.37, "learning_rate": 2.257296447740618e-06, "loss": 0.1805, "step": 3652 }, { "epoch": 2.37, "learning_rate": 2.252873296244733e-06, "loss": 0.1848, "step": 3653 }, { "epoch": 2.37, "learning_rate": 2.2484539323788655e-06, "loss": 0.1996, "step": 3654 }, { "epoch": 2.37, "learning_rate": 2.2440383583036707e-06, "loss": 0.204, "step": 3655 }, { "epoch": 2.37, "learning_rate": 2.2396265761779624e-06, "loss": 0.2027, "step": 3656 }, { "epoch": 2.37, "learning_rate": 2.2352185881586885e-06, "loss": 0.2012, "step": 3657 }, { "epoch": 2.37, "learning_rate": 2.2308143964009566e-06, "loss": 0.188, "step": 3658 }, { "epoch": 2.37, "learning_rate": 2.226414003058002e-06, "loss": 0.1946, "step": 3659 }, { "epoch": 2.37, "learning_rate": 2.2220174102812165e-06, "loss": 0.1971, "step": 3660 }, { "epoch": 2.37, "learning_rate": 2.2176246202201236e-06, "loss": 0.1934, "step": 3661 }, { "epoch": 2.37, "learning_rate": 2.213235635022398e-06, "loss": 0.1921, "step": 3662 }, { "epoch": 2.37, "learning_rate": 2.208850456833843e-06, "loss": 0.1938, "step": 3663 }, { "epoch": 2.37, "learning_rate": 2.20446908779841e-06, "loss": 0.192, "step": 3664 }, { "epoch": 2.37, "learning_rate": 2.2000915300581805e-06, "loss": 0.1931, "step": 3665 }, { "epoch": 2.37, "learning_rate": 2.1957177857533808e-06, "loss": 0.1919, "step": 3666 }, { "epoch": 2.38, "learning_rate": 2.1913478570223625e-06, "loss": 0.2023, "step": 3667 }, { "epoch": 2.38, "learning_rate": 2.186981746001621e-06, "loss": 0.1845, "step": 3668 }, { "epoch": 2.38, "learning_rate": 2.1826194548257827e-06, "loss": 0.1923, "step": 3669 }, { "epoch": 2.38, "learning_rate": 2.1782609856276025e-06, "loss": 0.1868, "step": 3670 }, { "epoch": 2.38, "learning_rate": 2.1739063405379733e-06, "loss": 0.2042, "step": 3671 }, { "epoch": 2.38, "learning_rate": 2.1695555216859098e-06, "loss": 0.1937, "step": 3672 }, { "epoch": 2.38, "learning_rate": 2.165208531198567e-06, "loss": 0.1854, "step": 3673 }, { "epoch": 2.38, "learning_rate": 2.160865371201215e-06, "loss": 0.2015, "step": 3674 }, { "epoch": 2.38, "learning_rate": 2.1565260438172654e-06, "loss": 0.1885, "step": 3675 }, { "epoch": 2.38, "learning_rate": 2.1521905511682415e-06, "loss": 0.1978, "step": 3676 }, { "epoch": 2.38, "learning_rate": 2.1478588953738065e-06, "loss": 0.1859, "step": 3677 }, { "epoch": 2.38, "learning_rate": 2.1435310785517337e-06, "loss": 0.2037, "step": 3678 }, { "epoch": 2.38, "learning_rate": 2.1392071028179318e-06, "loss": 0.1969, "step": 3679 }, { "epoch": 2.38, "learning_rate": 2.1348869702864204e-06, "loss": 0.1978, "step": 3680 }, { "epoch": 2.38, "learning_rate": 2.1305706830693517e-06, "loss": 0.1898, "step": 3681 }, { "epoch": 2.38, "learning_rate": 2.126258243276985e-06, "loss": 0.1978, "step": 3682 }, { "epoch": 2.39, "learning_rate": 2.121949653017713e-06, "loss": 0.2023, "step": 3683 }, { "epoch": 2.39, "learning_rate": 2.1176449143980317e-06, "loss": 0.208, "step": 3684 }, { "epoch": 2.39, "learning_rate": 2.113344029522566e-06, "loss": 0.2049, "step": 3685 }, { "epoch": 2.39, "learning_rate": 2.1090470004940467e-06, "loss": 0.2015, "step": 3686 }, { "epoch": 2.39, "learning_rate": 2.104753829413332e-06, "loss": 0.1981, "step": 3687 }, { "epoch": 2.39, "learning_rate": 2.100464518379377e-06, "loss": 0.1776, "step": 3688 }, { "epoch": 2.39, "learning_rate": 2.096179069489266e-06, "loss": 0.1807, "step": 3689 }, { "epoch": 2.39, "learning_rate": 2.091897484838187e-06, "loss": 0.1865, "step": 3690 }, { "epoch": 2.39, "learning_rate": 2.0876197665194362e-06, "loss": 0.1998, "step": 3691 }, { "epoch": 2.39, "learning_rate": 2.0833459166244286e-06, "loss": 0.1893, "step": 3692 }, { "epoch": 2.39, "learning_rate": 2.079075937242676e-06, "loss": 0.1924, "step": 3693 }, { "epoch": 2.39, "learning_rate": 2.0748098304618103e-06, "loss": 0.1996, "step": 3694 }, { "epoch": 2.39, "learning_rate": 2.070547598367558e-06, "loss": 0.1989, "step": 3695 }, { "epoch": 2.39, "learning_rate": 2.066289243043764e-06, "loss": 0.196, "step": 3696 }, { "epoch": 2.39, "learning_rate": 2.062034766572365e-06, "loss": 0.1903, "step": 3697 }, { "epoch": 2.4, "learning_rate": 2.0577841710334124e-06, "loss": 0.1989, "step": 3698 }, { "epoch": 2.4, "learning_rate": 2.0535374585050506e-06, "loss": 0.1959, "step": 3699 }, { "epoch": 2.4, "learning_rate": 2.0492946310635344e-06, "loss": 0.1931, "step": 3700 }, { "epoch": 2.4, "learning_rate": 2.0450556907832095e-06, "loss": 0.2007, "step": 3701 }, { "epoch": 2.4, "learning_rate": 2.040820639736533e-06, "loss": 0.1869, "step": 3702 }, { "epoch": 2.4, "learning_rate": 2.036589479994048e-06, "loss": 0.1843, "step": 3703 }, { "epoch": 2.4, "learning_rate": 2.032362213624406e-06, "loss": 0.1893, "step": 3704 }, { "epoch": 2.4, "learning_rate": 2.028138842694346e-06, "loss": 0.1942, "step": 3705 }, { "epoch": 2.4, "learning_rate": 2.023919369268711e-06, "loss": 0.1893, "step": 3706 }, { "epoch": 2.4, "learning_rate": 2.019703795410429e-06, "loss": 0.1889, "step": 3707 }, { "epoch": 2.4, "learning_rate": 2.0154921231805323e-06, "loss": 0.1997, "step": 3708 }, { "epoch": 2.4, "learning_rate": 2.0112843546381346e-06, "loss": 0.2, "step": 3709 }, { "epoch": 2.4, "learning_rate": 2.0070804918404476e-06, "loss": 0.2149, "step": 3710 }, { "epoch": 2.4, "learning_rate": 2.002880536842777e-06, "loss": 0.1936, "step": 3711 }, { "epoch": 2.4, "learning_rate": 1.998684491698507e-06, "loss": 0.1902, "step": 3712 }, { "epoch": 2.4, "learning_rate": 1.9944923584591212e-06, "loss": 0.1986, "step": 3713 }, { "epoch": 2.41, "learning_rate": 1.990304139174182e-06, "loss": 0.1884, "step": 3714 }, { "epoch": 2.41, "learning_rate": 1.986119835891346e-06, "loss": 0.1855, "step": 3715 }, { "epoch": 2.41, "learning_rate": 1.981939450656346e-06, "loss": 0.1998, "step": 3716 }, { "epoch": 2.41, "learning_rate": 1.977762985513011e-06, "loss": 0.1947, "step": 3717 }, { "epoch": 2.41, "learning_rate": 1.9735904425032416e-06, "loss": 0.2038, "step": 3718 }, { "epoch": 2.41, "learning_rate": 1.969421823667028e-06, "loss": 0.1912, "step": 3719 }, { "epoch": 2.41, "learning_rate": 1.9652571310424428e-06, "loss": 0.1837, "step": 3720 }, { "epoch": 2.41, "learning_rate": 1.9610963666656323e-06, "loss": 0.1986, "step": 3721 }, { "epoch": 2.41, "learning_rate": 1.956939532570831e-06, "loss": 0.1876, "step": 3722 }, { "epoch": 2.41, "learning_rate": 1.9527866307903408e-06, "loss": 0.1817, "step": 3723 }, { "epoch": 2.41, "learning_rate": 1.9486376633545547e-06, "loss": 0.1934, "step": 3724 }, { "epoch": 2.41, "learning_rate": 1.944492632291928e-06, "loss": 0.2061, "step": 3725 }, { "epoch": 2.41, "learning_rate": 1.9403515396290017e-06, "loss": 0.1888, "step": 3726 }, { "epoch": 2.41, "learning_rate": 1.9362143873903903e-06, "loss": 0.1913, "step": 3727 }, { "epoch": 2.41, "learning_rate": 1.9320811775987747e-06, "loss": 0.192, "step": 3728 }, { "epoch": 2.42, "learning_rate": 1.927951912274917e-06, "loss": 0.1953, "step": 3729 }, { "epoch": 2.42, "learning_rate": 1.923826593437643e-06, "loss": 0.1922, "step": 3730 }, { "epoch": 2.42, "learning_rate": 1.919705223103858e-06, "loss": 0.2018, "step": 3731 }, { "epoch": 2.42, "learning_rate": 1.9155878032885255e-06, "loss": 0.1938, "step": 3732 }, { "epoch": 2.42, "learning_rate": 1.9114743360046882e-06, "loss": 0.1964, "step": 3733 }, { "epoch": 2.42, "learning_rate": 1.9073648232634524e-06, "loss": 0.1823, "step": 3734 }, { "epoch": 2.42, "learning_rate": 1.9032592670739868e-06, "loss": 0.1936, "step": 3735 }, { "epoch": 2.42, "learning_rate": 1.8991576694435355e-06, "loss": 0.193, "step": 3736 }, { "epoch": 2.42, "learning_rate": 1.8950600323773938e-06, "loss": 0.1901, "step": 3737 }, { "epoch": 2.42, "learning_rate": 1.890966357878935e-06, "loss": 0.2009, "step": 3738 }, { "epoch": 2.42, "learning_rate": 1.8868766479495815e-06, "loss": 0.192, "step": 3739 }, { "epoch": 2.42, "learning_rate": 1.8827909045888304e-06, "loss": 0.1975, "step": 3740 }, { "epoch": 2.42, "learning_rate": 1.8787091297942284e-06, "loss": 0.1796, "step": 3741 }, { "epoch": 2.42, "learning_rate": 1.8746313255613902e-06, "loss": 0.2054, "step": 3742 }, { "epoch": 2.42, "learning_rate": 1.870557493883982e-06, "loss": 0.2011, "step": 3743 }, { "epoch": 2.42, "learning_rate": 1.8664876367537355e-06, "loss": 0.1941, "step": 3744 }, { "epoch": 2.43, "learning_rate": 1.862421756160432e-06, "loss": 0.1934, "step": 3745 }, { "epoch": 2.43, "learning_rate": 1.8583598540919146e-06, "loss": 0.2044, "step": 3746 }, { "epoch": 2.43, "learning_rate": 1.854301932534075e-06, "loss": 0.194, "step": 3747 }, { "epoch": 2.43, "learning_rate": 1.8502479934708673e-06, "loss": 0.201, "step": 3748 }, { "epoch": 2.43, "learning_rate": 1.846198038884288e-06, "loss": 0.2012, "step": 3749 }, { "epoch": 2.43, "learning_rate": 1.842152070754395e-06, "loss": 0.192, "step": 3750 }, { "epoch": 2.43, "learning_rate": 1.838110091059293e-06, "loss": 0.1934, "step": 3751 }, { "epoch": 2.43, "learning_rate": 1.8340721017751352e-06, "loss": 0.1917, "step": 3752 }, { "epoch": 2.43, "learning_rate": 1.8300381048761296e-06, "loss": 0.1865, "step": 3753 }, { "epoch": 2.43, "learning_rate": 1.8260081023345222e-06, "loss": 0.186, "step": 3754 }, { "epoch": 2.43, "learning_rate": 1.8219820961206191e-06, "loss": 0.2, "step": 3755 }, { "epoch": 2.43, "learning_rate": 1.8179600882027605e-06, "loss": 0.1959, "step": 3756 }, { "epoch": 2.43, "learning_rate": 1.8139420805473407e-06, "loss": 0.1997, "step": 3757 }, { "epoch": 2.43, "learning_rate": 1.8099280751187908e-06, "loss": 0.1987, "step": 3758 }, { "epoch": 2.43, "learning_rate": 1.8059180738795935e-06, "loss": 0.1908, "step": 3759 }, { "epoch": 2.44, "learning_rate": 1.8019120787902655e-06, "loss": 0.2019, "step": 3760 }, { "epoch": 2.44, "learning_rate": 1.7979100918093718e-06, "loss": 0.1995, "step": 3761 }, { "epoch": 2.44, "learning_rate": 1.7939121148935113e-06, "loss": 0.1884, "step": 3762 }, { "epoch": 2.44, "learning_rate": 1.7899181499973306e-06, "loss": 0.2048, "step": 3763 }, { "epoch": 2.44, "learning_rate": 1.7859281990735034e-06, "loss": 0.1982, "step": 3764 }, { "epoch": 2.44, "learning_rate": 1.7819422640727547e-06, "loss": 0.1802, "step": 3765 }, { "epoch": 2.44, "learning_rate": 1.7779603469438322e-06, "loss": 0.1932, "step": 3766 }, { "epoch": 2.44, "learning_rate": 1.7739824496335322e-06, "loss": 0.1939, "step": 3767 }, { "epoch": 2.44, "learning_rate": 1.7700085740866735e-06, "loss": 0.191, "step": 3768 }, { "epoch": 2.44, "learning_rate": 1.7660387222461196e-06, "loss": 0.1925, "step": 3769 }, { "epoch": 2.44, "learning_rate": 1.7620728960527566e-06, "loss": 0.2035, "step": 3770 }, { "epoch": 2.44, "learning_rate": 1.7581110974455106e-06, "loss": 0.1942, "step": 3771 }, { "epoch": 2.44, "learning_rate": 1.7541533283613377e-06, "loss": 0.1934, "step": 3772 }, { "epoch": 2.44, "learning_rate": 1.7501995907352165e-06, "loss": 0.1929, "step": 3773 }, { "epoch": 2.44, "learning_rate": 1.7462498865001643e-06, "loss": 0.1994, "step": 3774 }, { "epoch": 2.44, "learning_rate": 1.7423042175872185e-06, "loss": 0.1835, "step": 3775 }, { "epoch": 2.45, "learning_rate": 1.7383625859254515e-06, "loss": 0.1846, "step": 3776 }, { "epoch": 2.45, "learning_rate": 1.7344249934419521e-06, "loss": 0.1938, "step": 3777 }, { "epoch": 2.45, "learning_rate": 1.7304914420618447e-06, "loss": 0.1909, "step": 3778 }, { "epoch": 2.45, "learning_rate": 1.7265619337082673e-06, "loss": 0.1857, "step": 3779 }, { "epoch": 2.45, "learning_rate": 1.7226364703023924e-06, "loss": 0.2011, "step": 3780 }, { "epoch": 2.45, "learning_rate": 1.7187150537634056e-06, "loss": 0.2027, "step": 3781 }, { "epoch": 2.45, "learning_rate": 1.7147976860085203e-06, "loss": 0.1982, "step": 3782 }, { "epoch": 2.45, "learning_rate": 1.7108843689529653e-06, "loss": 0.191, "step": 3783 }, { "epoch": 2.45, "learning_rate": 1.7069751045099948e-06, "loss": 0.1953, "step": 3784 }, { "epoch": 2.45, "learning_rate": 1.703069894590873e-06, "loss": 0.1931, "step": 3785 }, { "epoch": 2.45, "learning_rate": 1.699168741104894e-06, "loss": 0.1865, "step": 3786 }, { "epoch": 2.45, "learning_rate": 1.6952716459593555e-06, "loss": 0.1931, "step": 3787 }, { "epoch": 2.45, "learning_rate": 1.691378611059582e-06, "loss": 0.1985, "step": 3788 }, { "epoch": 2.45, "learning_rate": 1.6874896383089045e-06, "loss": 0.203, "step": 3789 }, { "epoch": 2.45, "learning_rate": 1.6836047296086766e-06, "loss": 0.1954, "step": 3790 }, { "epoch": 2.46, "learning_rate": 1.6797238868582545e-06, "loss": 0.1904, "step": 3791 }, { "epoch": 2.46, "learning_rate": 1.6758471119550157e-06, "loss": 0.2, "step": 3792 }, { "epoch": 2.46, "learning_rate": 1.6719744067943466e-06, "loss": 0.1955, "step": 3793 }, { "epoch": 2.46, "learning_rate": 1.668105773269637e-06, "loss": 0.1964, "step": 3794 }, { "epoch": 2.46, "learning_rate": 1.6642412132722997e-06, "loss": 0.195, "step": 3795 }, { "epoch": 2.46, "learning_rate": 1.6603807286917394e-06, "loss": 0.1928, "step": 3796 }, { "epoch": 2.46, "learning_rate": 1.6565243214153847e-06, "loss": 0.1946, "step": 3797 }, { "epoch": 2.46, "learning_rate": 1.6526719933286562e-06, "loss": 0.1789, "step": 3798 }, { "epoch": 2.46, "learning_rate": 1.6488237463149914e-06, "loss": 0.1924, "step": 3799 }, { "epoch": 2.46, "learning_rate": 1.6449795822558245e-06, "loss": 0.188, "step": 3800 }, { "epoch": 2.46, "learning_rate": 1.641139503030601e-06, "loss": 0.2019, "step": 3801 }, { "epoch": 2.46, "learning_rate": 1.6373035105167613e-06, "loss": 0.1984, "step": 3802 }, { "epoch": 2.46, "learning_rate": 1.633471606589755e-06, "loss": 0.2083, "step": 3803 }, { "epoch": 2.46, "learning_rate": 1.6296437931230258e-06, "loss": 0.2003, "step": 3804 }, { "epoch": 2.46, "learning_rate": 1.6258200719880256e-06, "loss": 0.2124, "step": 3805 }, { "epoch": 2.47, "learning_rate": 1.622000445054197e-06, "loss": 0.201, "step": 3806 }, { "epoch": 2.47, "learning_rate": 1.6181849141889893e-06, "loss": 0.1974, "step": 3807 }, { "epoch": 2.47, "learning_rate": 1.6143734812578394e-06, "loss": 0.1927, "step": 3808 }, { "epoch": 2.47, "learning_rate": 1.610566148124193e-06, "loss": 0.1937, "step": 3809 }, { "epoch": 2.47, "learning_rate": 1.606762916649478e-06, "loss": 0.1999, "step": 3810 }, { "epoch": 2.47, "learning_rate": 1.6029637886931271e-06, "loss": 0.1926, "step": 3811 }, { "epoch": 2.47, "learning_rate": 1.5991687661125655e-06, "loss": 0.1987, "step": 3812 }, { "epoch": 2.47, "learning_rate": 1.5953778507632035e-06, "loss": 0.1952, "step": 3813 }, { "epoch": 2.47, "learning_rate": 1.591591044498455e-06, "loss": 0.2064, "step": 3814 }, { "epoch": 2.47, "learning_rate": 1.5878083491697128e-06, "loss": 0.1899, "step": 3815 }, { "epoch": 2.47, "learning_rate": 1.5840297666263715e-06, "loss": 0.2008, "step": 3816 }, { "epoch": 2.47, "learning_rate": 1.5802552987158038e-06, "loss": 0.1936, "step": 3817 }, { "epoch": 2.47, "learning_rate": 1.5764849472833787e-06, "loss": 0.2011, "step": 3818 }, { "epoch": 2.47, "learning_rate": 1.572718714172452e-06, "loss": 0.1851, "step": 3819 }, { "epoch": 2.47, "learning_rate": 1.5689566012243608e-06, "loss": 0.1962, "step": 3820 }, { "epoch": 2.47, "learning_rate": 1.5651986102784344e-06, "loss": 0.1997, "step": 3821 }, { "epoch": 2.48, "learning_rate": 1.5614447431719803e-06, "loss": 0.1989, "step": 3822 }, { "epoch": 2.48, "learning_rate": 1.557695001740297e-06, "loss": 0.1978, "step": 3823 }, { "epoch": 2.48, "learning_rate": 1.5539493878166566e-06, "loss": 0.1866, "step": 3824 }, { "epoch": 2.48, "learning_rate": 1.5502079032323214e-06, "loss": 0.1809, "step": 3825 }, { "epoch": 2.48, "learning_rate": 1.5464705498165355e-06, "loss": 0.1934, "step": 3826 }, { "epoch": 2.48, "learning_rate": 1.5427373293965142e-06, "loss": 0.1894, "step": 3827 }, { "epoch": 2.48, "learning_rate": 1.539008243797463e-06, "loss": 0.1984, "step": 3828 }, { "epoch": 2.48, "learning_rate": 1.5352832948425544e-06, "loss": 0.1891, "step": 3829 }, { "epoch": 2.48, "learning_rate": 1.531562484352951e-06, "loss": 0.1878, "step": 3830 }, { "epoch": 2.48, "learning_rate": 1.5278458141477814e-06, "loss": 0.187, "step": 3831 }, { "epoch": 2.48, "learning_rate": 1.524133286044155e-06, "loss": 0.1943, "step": 3832 }, { "epoch": 2.48, "learning_rate": 1.5204249018571583e-06, "loss": 0.197, "step": 3833 }, { "epoch": 2.48, "learning_rate": 1.5167206633998454e-06, "loss": 0.2009, "step": 3834 }, { "epoch": 2.48, "learning_rate": 1.5130205724832503e-06, "loss": 0.1871, "step": 3835 }, { "epoch": 2.48, "learning_rate": 1.5093246309163722e-06, "loss": 0.1767, "step": 3836 }, { "epoch": 2.49, "learning_rate": 1.5056328405061904e-06, "loss": 0.1889, "step": 3837 }, { "epoch": 2.49, "learning_rate": 1.501945203057643e-06, "loss": 0.1991, "step": 3838 }, { "epoch": 2.49, "learning_rate": 1.4982617203736516e-06, "loss": 0.2, "step": 3839 }, { "epoch": 2.49, "learning_rate": 1.494582394255094e-06, "loss": 0.1806, "step": 3840 }, { "epoch": 2.49, "learning_rate": 1.4909072265008251e-06, "loss": 0.1882, "step": 3841 }, { "epoch": 2.49, "learning_rate": 1.4872362189076583e-06, "loss": 0.1955, "step": 3842 }, { "epoch": 2.49, "learning_rate": 1.4835693732703814e-06, "loss": 0.1867, "step": 3843 }, { "epoch": 2.49, "learning_rate": 1.4799066913817406e-06, "loss": 0.2012, "step": 3844 }, { "epoch": 2.49, "learning_rate": 1.4762481750324531e-06, "loss": 0.1987, "step": 3845 }, { "epoch": 2.49, "learning_rate": 1.4725938260111916e-06, "loss": 0.1879, "step": 3846 }, { "epoch": 2.49, "learning_rate": 1.4689436461045992e-06, "loss": 0.2041, "step": 3847 }, { "epoch": 2.49, "learning_rate": 1.4652976370972726e-06, "loss": 0.195, "step": 3848 }, { "epoch": 2.49, "learning_rate": 1.4616558007717797e-06, "loss": 0.189, "step": 3849 }, { "epoch": 2.49, "learning_rate": 1.4580181389086355e-06, "loss": 0.1983, "step": 3850 }, { "epoch": 2.49, "learning_rate": 1.4543846532863282e-06, "loss": 0.1935, "step": 3851 }, { "epoch": 2.49, "learning_rate": 1.4507553456812894e-06, "loss": 0.2024, "step": 3852 }, { "epoch": 2.5, "learning_rate": 1.4471302178679203e-06, "loss": 0.1991, "step": 3853 }, { "epoch": 2.5, "learning_rate": 1.443509271618575e-06, "loss": 0.1914, "step": 3854 }, { "epoch": 2.5, "learning_rate": 1.4398925087035586e-06, "loss": 0.192, "step": 3855 }, { "epoch": 2.5, "learning_rate": 1.4362799308911368e-06, "loss": 0.1868, "step": 3856 }, { "epoch": 2.5, "learning_rate": 1.4326715399475244e-06, "loss": 0.2069, "step": 3857 }, { "epoch": 2.5, "learning_rate": 1.4290673376368957e-06, "loss": 0.1935, "step": 3858 }, { "epoch": 2.5, "learning_rate": 1.4254673257213668e-06, "loss": 0.1931, "step": 3859 }, { "epoch": 2.5, "learning_rate": 1.4218715059610177e-06, "loss": 0.1961, "step": 3860 }, { "epoch": 2.5, "learning_rate": 1.4182798801138676e-06, "loss": 0.206, "step": 3861 }, { "epoch": 2.5, "learning_rate": 1.414692449935894e-06, "loss": 0.198, "step": 3862 }, { "epoch": 2.5, "learning_rate": 1.4111092171810137e-06, "loss": 0.1977, "step": 3863 }, { "epoch": 2.5, "learning_rate": 1.4075301836011045e-06, "loss": 0.1779, "step": 3864 }, { "epoch": 2.5, "learning_rate": 1.4039553509459758e-06, "loss": 0.2017, "step": 3865 }, { "epoch": 2.5, "learning_rate": 1.4003847209633969e-06, "loss": 0.1896, "step": 3866 }, { "epoch": 2.5, "learning_rate": 1.3968182953990716e-06, "loss": 0.1904, "step": 3867 }, { "epoch": 2.51, "learning_rate": 1.3932560759966584e-06, "loss": 0.1975, "step": 3868 }, { "epoch": 2.51, "learning_rate": 1.3896980644977477e-06, "loss": 0.186, "step": 3869 }, { "epoch": 2.51, "learning_rate": 1.386144262641883e-06, "loss": 0.1895, "step": 3870 }, { "epoch": 2.51, "learning_rate": 1.3825946721665428e-06, "loss": 0.2053, "step": 3871 }, { "epoch": 2.51, "learning_rate": 1.379049294807152e-06, "loss": 0.1906, "step": 3872 }, { "epoch": 2.51, "learning_rate": 1.3755081322970708e-06, "loss": 0.1946, "step": 3873 }, { "epoch": 2.51, "learning_rate": 1.3719711863676e-06, "loss": 0.1909, "step": 3874 }, { "epoch": 2.51, "learning_rate": 1.3684384587479848e-06, "loss": 0.1865, "step": 3875 }, { "epoch": 2.51, "learning_rate": 1.3649099511653964e-06, "loss": 0.2024, "step": 3876 }, { "epoch": 2.51, "learning_rate": 1.3613856653449552e-06, "loss": 0.1915, "step": 3877 }, { "epoch": 2.51, "learning_rate": 1.357865603009707e-06, "loss": 0.2049, "step": 3878 }, { "epoch": 2.51, "learning_rate": 1.3543497658806414e-06, "loss": 0.1894, "step": 3879 }, { "epoch": 2.51, "learning_rate": 1.3508381556766737e-06, "loss": 0.1818, "step": 3880 }, { "epoch": 2.51, "learning_rate": 1.3473307741146625e-06, "loss": 0.1891, "step": 3881 }, { "epoch": 2.51, "learning_rate": 1.3438276229093894e-06, "loss": 0.189, "step": 3882 }, { "epoch": 2.51, "learning_rate": 1.3403287037735746e-06, "loss": 0.2003, "step": 3883 }, { "epoch": 2.52, "learning_rate": 1.3368340184178653e-06, "loss": 0.2062, "step": 3884 }, { "epoch": 2.52, "learning_rate": 1.333343568550841e-06, "loss": 0.2012, "step": 3885 }, { "epoch": 2.52, "learning_rate": 1.3298573558790084e-06, "loss": 0.2071, "step": 3886 }, { "epoch": 2.52, "learning_rate": 1.326375382106807e-06, "loss": 0.1865, "step": 3887 }, { "epoch": 2.52, "learning_rate": 1.3228976489365953e-06, "loss": 0.2034, "step": 3888 }, { "epoch": 2.52, "learning_rate": 1.3194241580686683e-06, "loss": 0.191, "step": 3889 }, { "epoch": 2.52, "learning_rate": 1.3159549112012404e-06, "loss": 0.1941, "step": 3890 }, { "epoch": 2.52, "learning_rate": 1.3124899100304544e-06, "loss": 0.1916, "step": 3891 }, { "epoch": 2.52, "learning_rate": 1.3090291562503742e-06, "loss": 0.1979, "step": 3892 }, { "epoch": 2.52, "learning_rate": 1.305572651552992e-06, "loss": 0.1917, "step": 3893 }, { "epoch": 2.52, "learning_rate": 1.302120397628216e-06, "loss": 0.1997, "step": 3894 }, { "epoch": 2.52, "learning_rate": 1.2986723961638814e-06, "loss": 0.2006, "step": 3895 }, { "epoch": 2.52, "learning_rate": 1.2952286488457455e-06, "loss": 0.1892, "step": 3896 }, { "epoch": 2.52, "learning_rate": 1.2917891573574782e-06, "loss": 0.1919, "step": 3897 }, { "epoch": 2.52, "learning_rate": 1.2883539233806774e-06, "loss": 0.2034, "step": 3898 }, { "epoch": 2.53, "learning_rate": 1.2849229485948511e-06, "loss": 0.2108, "step": 3899 }, { "epoch": 2.53, "learning_rate": 1.2814962346774352e-06, "loss": 0.1852, "step": 3900 }, { "epoch": 2.53, "learning_rate": 1.2780737833037703e-06, "loss": 0.1967, "step": 3901 }, { "epoch": 2.53, "learning_rate": 1.2746555961471253e-06, "loss": 0.1986, "step": 3902 }, { "epoch": 2.53, "learning_rate": 1.2712416748786726e-06, "loss": 0.1954, "step": 3903 }, { "epoch": 2.53, "learning_rate": 1.2678320211675098e-06, "loss": 0.1924, "step": 3904 }, { "epoch": 2.53, "learning_rate": 1.264426636680638e-06, "loss": 0.2088, "step": 3905 }, { "epoch": 2.53, "learning_rate": 1.2610255230829805e-06, "loss": 0.1935, "step": 3906 }, { "epoch": 2.53, "learning_rate": 1.257628682037364e-06, "loss": 0.1988, "step": 3907 }, { "epoch": 2.53, "learning_rate": 1.2542361152045324e-06, "loss": 0.2067, "step": 3908 }, { "epoch": 2.53, "learning_rate": 1.2508478242431387e-06, "loss": 0.1914, "step": 3909 }, { "epoch": 2.53, "learning_rate": 1.2474638108097425e-06, "loss": 0.185, "step": 3910 }, { "epoch": 2.53, "learning_rate": 1.2440840765588157e-06, "loss": 0.1888, "step": 3911 }, { "epoch": 2.53, "learning_rate": 1.2407086231427345e-06, "loss": 0.199, "step": 3912 }, { "epoch": 2.53, "learning_rate": 1.2373374522117876e-06, "loss": 0.1809, "step": 3913 }, { "epoch": 2.53, "learning_rate": 1.233970565414161e-06, "loss": 0.1992, "step": 3914 }, { "epoch": 2.54, "learning_rate": 1.2306079643959557e-06, "loss": 0.1943, "step": 3915 }, { "epoch": 2.54, "learning_rate": 1.2272496508011744e-06, "loss": 0.1931, "step": 3916 }, { "epoch": 2.54, "learning_rate": 1.2238956262717183e-06, "loss": 0.1984, "step": 3917 }, { "epoch": 2.54, "learning_rate": 1.2205458924474e-06, "loss": 0.191, "step": 3918 }, { "epoch": 2.54, "learning_rate": 1.2172004509659264e-06, "loss": 0.1855, "step": 3919 }, { "epoch": 2.54, "learning_rate": 1.213859303462913e-06, "loss": 0.1926, "step": 3920 }, { "epoch": 2.54, "learning_rate": 1.2105224515718693e-06, "loss": 0.1865, "step": 3921 }, { "epoch": 2.54, "learning_rate": 1.2071898969242102e-06, "loss": 0.2041, "step": 3922 }, { "epoch": 2.54, "learning_rate": 1.2038616411492488e-06, "loss": 0.1905, "step": 3923 }, { "epoch": 2.54, "learning_rate": 1.2005376858741914e-06, "loss": 0.1904, "step": 3924 }, { "epoch": 2.54, "learning_rate": 1.197218032724149e-06, "loss": 0.1879, "step": 3925 }, { "epoch": 2.54, "learning_rate": 1.1939026833221211e-06, "loss": 0.2025, "step": 3926 }, { "epoch": 2.54, "learning_rate": 1.1905916392890127e-06, "loss": 0.2012, "step": 3927 }, { "epoch": 2.54, "learning_rate": 1.187284902243615e-06, "loss": 0.1872, "step": 3928 }, { "epoch": 2.54, "learning_rate": 1.1839824738026195e-06, "loss": 0.1953, "step": 3929 }, { "epoch": 2.55, "learning_rate": 1.1806843555806069e-06, "loss": 0.1902, "step": 3930 }, { "epoch": 2.55, "learning_rate": 1.177390549190055e-06, "loss": 0.1962, "step": 3931 }, { "epoch": 2.55, "learning_rate": 1.1741010562413291e-06, "loss": 0.1893, "step": 3932 }, { "epoch": 2.55, "learning_rate": 1.1708158783426893e-06, "loss": 0.1808, "step": 3933 }, { "epoch": 2.55, "learning_rate": 1.167535017100282e-06, "loss": 0.1906, "step": 3934 }, { "epoch": 2.55, "learning_rate": 1.1642584741181473e-06, "loss": 0.1988, "step": 3935 }, { "epoch": 2.55, "learning_rate": 1.160986250998215e-06, "loss": 0.1851, "step": 3936 }, { "epoch": 2.55, "learning_rate": 1.1577183493402943e-06, "loss": 0.1946, "step": 3937 }, { "epoch": 2.55, "learning_rate": 1.154454770742094e-06, "loss": 0.2085, "step": 3938 }, { "epoch": 2.55, "learning_rate": 1.1511955167991963e-06, "loss": 0.1955, "step": 3939 }, { "epoch": 2.55, "learning_rate": 1.147940589105082e-06, "loss": 0.1934, "step": 3940 }, { "epoch": 2.55, "learning_rate": 1.1446899892511042e-06, "loss": 0.1989, "step": 3941 }, { "epoch": 2.55, "learning_rate": 1.1414437188265114e-06, "loss": 0.1921, "step": 3942 }, { "epoch": 2.55, "learning_rate": 1.1382017794184275e-06, "loss": 0.1979, "step": 3943 }, { "epoch": 2.55, "learning_rate": 1.1349641726118631e-06, "loss": 0.1845, "step": 3944 }, { "epoch": 2.56, "learning_rate": 1.1317308999897058e-06, "loss": 0.193, "step": 3945 }, { "epoch": 2.56, "learning_rate": 1.1285019631327331e-06, "loss": 0.191, "step": 3946 }, { "epoch": 2.56, "learning_rate": 1.1252773636195923e-06, "loss": 0.1942, "step": 3947 }, { "epoch": 2.56, "learning_rate": 1.1220571030268179e-06, "loss": 0.1794, "step": 3948 }, { "epoch": 2.56, "learning_rate": 1.118841182928818e-06, "loss": 0.2012, "step": 3949 }, { "epoch": 2.56, "learning_rate": 1.1156296048978833e-06, "loss": 0.1955, "step": 3950 }, { "epoch": 2.56, "learning_rate": 1.1124223705041758e-06, "loss": 0.184, "step": 3951 }, { "epoch": 2.56, "learning_rate": 1.1092194813157408e-06, "loss": 0.1998, "step": 3952 }, { "epoch": 2.56, "learning_rate": 1.1060209388984921e-06, "loss": 0.1912, "step": 3953 }, { "epoch": 2.56, "learning_rate": 1.1028267448162243e-06, "loss": 0.1928, "step": 3954 }, { "epoch": 2.56, "learning_rate": 1.0996369006306019e-06, "loss": 0.1898, "step": 3955 }, { "epoch": 2.56, "learning_rate": 1.0964514079011645e-06, "loss": 0.2021, "step": 3956 }, { "epoch": 2.56, "learning_rate": 1.0932702681853258e-06, "loss": 0.1927, "step": 3957 }, { "epoch": 2.56, "learning_rate": 1.090093483038367e-06, "loss": 0.193, "step": 3958 }, { "epoch": 2.56, "learning_rate": 1.0869210540134456e-06, "loss": 0.1848, "step": 3959 }, { "epoch": 2.56, "learning_rate": 1.0837529826615844e-06, "loss": 0.1951, "step": 3960 }, { "epoch": 2.57, "learning_rate": 1.0805892705316802e-06, "loss": 0.2042, "step": 3961 }, { "epoch": 2.57, "learning_rate": 1.0774299191704917e-06, "loss": 0.1981, "step": 3962 }, { "epoch": 2.57, "learning_rate": 1.0742749301226562e-06, "loss": 0.1924, "step": 3963 }, { "epoch": 2.57, "learning_rate": 1.0711243049306674e-06, "loss": 0.1945, "step": 3964 }, { "epoch": 2.57, "learning_rate": 1.0679780451348943e-06, "loss": 0.2075, "step": 3965 }, { "epoch": 2.57, "learning_rate": 1.0648361522735629e-06, "loss": 0.1938, "step": 3966 }, { "epoch": 2.57, "learning_rate": 1.0616986278827734e-06, "loss": 0.1959, "step": 3967 }, { "epoch": 2.57, "learning_rate": 1.0585654734964813e-06, "loss": 0.1896, "step": 3968 }, { "epoch": 2.57, "learning_rate": 1.0554366906465153e-06, "loss": 0.2023, "step": 3969 }, { "epoch": 2.57, "learning_rate": 1.0523122808625552e-06, "loss": 0.1942, "step": 3970 }, { "epoch": 2.57, "learning_rate": 1.049192245672155e-06, "loss": 0.1991, "step": 3971 }, { "epoch": 2.57, "learning_rate": 1.0460765866007194e-06, "loss": 0.2063, "step": 3972 }, { "epoch": 2.57, "learning_rate": 1.0429653051715217e-06, "loss": 0.1898, "step": 3973 }, { "epoch": 2.57, "learning_rate": 1.039858402905688e-06, "loss": 0.1881, "step": 3974 }, { "epoch": 2.57, "learning_rate": 1.03675588132221e-06, "loss": 0.2011, "step": 3975 }, { "epoch": 2.58, "learning_rate": 1.0336577419379324e-06, "loss": 0.1932, "step": 3976 }, { "epoch": 2.58, "learning_rate": 1.0305639862675588e-06, "loss": 0.1909, "step": 3977 }, { "epoch": 2.58, "learning_rate": 1.0274746158236536e-06, "loss": 0.2041, "step": 3978 }, { "epoch": 2.58, "learning_rate": 1.0243896321166292e-06, "loss": 0.2034, "step": 3979 }, { "epoch": 2.58, "learning_rate": 1.0213090366547628e-06, "loss": 0.1991, "step": 3980 }, { "epoch": 2.58, "learning_rate": 1.018232830944177e-06, "loss": 0.1939, "step": 3981 }, { "epoch": 2.58, "learning_rate": 1.0151610164888558e-06, "loss": 0.2059, "step": 3982 }, { "epoch": 2.58, "learning_rate": 1.0120935947906308e-06, "loss": 0.1866, "step": 3983 }, { "epoch": 2.58, "learning_rate": 1.0090305673491906e-06, "loss": 0.1935, "step": 3984 }, { "epoch": 2.58, "learning_rate": 1.005971935662069e-06, "loss": 0.1909, "step": 3985 }, { "epoch": 2.58, "learning_rate": 1.0029177012246582e-06, "loss": 0.1922, "step": 3986 }, { "epoch": 2.58, "learning_rate": 9.99867865530194e-07, "loss": 0.1868, "step": 3987 }, { "epoch": 2.58, "learning_rate": 9.968224300697692e-07, "loss": 0.1974, "step": 3988 }, { "epoch": 2.58, "learning_rate": 9.937813963323151e-07, "loss": 0.2017, "step": 3989 }, { "epoch": 2.58, "learning_rate": 9.90744765804621e-07, "loss": 0.2002, "step": 3990 }, { "epoch": 2.58, "learning_rate": 9.87712539971315e-07, "loss": 0.1914, "step": 3991 }, { "epoch": 2.59, "learning_rate": 9.846847203148802e-07, "loss": 0.1807, "step": 3992 }, { "epoch": 2.59, "learning_rate": 9.816613083156368e-07, "loss": 0.2065, "step": 3993 }, { "epoch": 2.59, "learning_rate": 9.786423054517592e-07, "loss": 0.1902, "step": 3994 }, { "epoch": 2.59, "learning_rate": 9.756277131992542e-07, "loss": 0.2065, "step": 3995 }, { "epoch": 2.59, "learning_rate": 9.726175330319865e-07, "loss": 0.1879, "step": 3996 }, { "epoch": 2.59, "learning_rate": 9.696117664216508e-07, "loss": 0.1983, "step": 3997 }, { "epoch": 2.59, "learning_rate": 9.666104148377919e-07, "loss": 0.1901, "step": 3998 }, { "epoch": 2.59, "learning_rate": 9.636134797477948e-07, "loss": 0.2009, "step": 3999 }, { "epoch": 2.59, "learning_rate": 9.606209626168828e-07, "loss": 0.1855, "step": 4000 }, { "epoch": 2.59, "learning_rate": 9.576328649081213e-07, "loss": 0.1906, "step": 4001 }, { "epoch": 2.59, "learning_rate": 9.546491880824128e-07, "loss": 0.1927, "step": 4002 }, { "epoch": 2.59, "learning_rate": 9.516699335985025e-07, "loss": 0.2005, "step": 4003 }, { "epoch": 2.59, "learning_rate": 9.486951029129676e-07, "loss": 0.2052, "step": 4004 }, { "epoch": 2.59, "learning_rate": 9.457246974802293e-07, "loss": 0.193, "step": 4005 }, { "epoch": 2.59, "learning_rate": 9.427587187525389e-07, "loss": 0.1982, "step": 4006 }, { "epoch": 2.6, "learning_rate": 9.397971681799866e-07, "loss": 0.2002, "step": 4007 }, { "epoch": 2.6, "learning_rate": 9.36840047210501e-07, "loss": 0.1831, "step": 4008 }, { "epoch": 2.6, "learning_rate": 9.338873572898355e-07, "loss": 0.1884, "step": 4009 }, { "epoch": 2.6, "learning_rate": 9.309390998615886e-07, "loss": 0.1891, "step": 4010 }, { "epoch": 2.6, "learning_rate": 9.279952763671818e-07, "loss": 0.184, "step": 4011 }, { "epoch": 2.6, "learning_rate": 9.25055888245876e-07, "loss": 0.185, "step": 4012 }, { "epoch": 2.6, "learning_rate": 9.221209369347583e-07, "loss": 0.1919, "step": 4013 }, { "epoch": 2.6, "learning_rate": 9.191904238687499e-07, "loss": 0.1819, "step": 4014 }, { "epoch": 2.6, "learning_rate": 9.162643504806035e-07, "loss": 0.185, "step": 4015 }, { "epoch": 2.6, "learning_rate": 9.133427182008958e-07, "loss": 0.1995, "step": 4016 }, { "epoch": 2.6, "learning_rate": 9.104255284580377e-07, "loss": 0.1864, "step": 4017 }, { "epoch": 2.6, "learning_rate": 9.075127826782637e-07, "loss": 0.1912, "step": 4018 }, { "epoch": 2.6, "learning_rate": 9.046044822856392e-07, "loss": 0.1889, "step": 4019 }, { "epoch": 2.6, "learning_rate": 9.017006287020536e-07, "loss": 0.1978, "step": 4020 }, { "epoch": 2.6, "learning_rate": 8.988012233472232e-07, "loss": 0.2009, "step": 4021 }, { "epoch": 2.6, "learning_rate": 8.95906267638692e-07, "loss": 0.1948, "step": 4022 }, { "epoch": 2.61, "learning_rate": 8.930157629918235e-07, "loss": 0.1954, "step": 4023 }, { "epoch": 2.61, "learning_rate": 8.901297108198104e-07, "loss": 0.1933, "step": 4024 }, { "epoch": 2.61, "learning_rate": 8.872481125336629e-07, "loss": 0.1973, "step": 4025 }, { "epoch": 2.61, "learning_rate": 8.843709695422187e-07, "loss": 0.1875, "step": 4026 }, { "epoch": 2.61, "learning_rate": 8.814982832521335e-07, "loss": 0.2033, "step": 4027 }, { "epoch": 2.61, "learning_rate": 8.786300550678883e-07, "loss": 0.2046, "step": 4028 }, { "epoch": 2.61, "learning_rate": 8.757662863917782e-07, "loss": 0.1981, "step": 4029 }, { "epoch": 2.61, "learning_rate": 8.729069786239264e-07, "loss": 0.1839, "step": 4030 }, { "epoch": 2.61, "learning_rate": 8.700521331622657e-07, "loss": 0.2021, "step": 4031 }, { "epoch": 2.61, "learning_rate": 8.672017514025566e-07, "loss": 0.1892, "step": 4032 }, { "epoch": 2.61, "learning_rate": 8.643558347383695e-07, "loss": 0.1956, "step": 4033 }, { "epoch": 2.61, "learning_rate": 8.615143845610974e-07, "loss": 0.1959, "step": 4034 }, { "epoch": 2.61, "learning_rate": 8.586774022599443e-07, "loss": 0.1952, "step": 4035 }, { "epoch": 2.61, "learning_rate": 8.558448892219373e-07, "loss": 0.1983, "step": 4036 }, { "epoch": 2.61, "learning_rate": 8.530168468319077e-07, "loss": 0.1878, "step": 4037 }, { "epoch": 2.62, "learning_rate": 8.501932764725118e-07, "loss": 0.1944, "step": 4038 }, { "epoch": 2.62, "learning_rate": 8.47374179524214e-07, "loss": 0.1954, "step": 4039 }, { "epoch": 2.62, "learning_rate": 8.44559557365292e-07, "loss": 0.1918, "step": 4040 }, { "epoch": 2.62, "learning_rate": 8.417494113718372e-07, "loss": 0.1994, "step": 4041 }, { "epoch": 2.62, "learning_rate": 8.389437429177494e-07, "loss": 0.193, "step": 4042 }, { "epoch": 2.62, "learning_rate": 8.361425533747458e-07, "loss": 0.1773, "step": 4043 }, { "epoch": 2.62, "learning_rate": 8.333458441123443e-07, "loss": 0.1917, "step": 4044 }, { "epoch": 2.62, "learning_rate": 8.305536164978823e-07, "loss": 0.1955, "step": 4045 }, { "epoch": 2.62, "learning_rate": 8.277658718964976e-07, "loss": 0.1953, "step": 4046 }, { "epoch": 2.62, "learning_rate": 8.249826116711434e-07, "loss": 0.1813, "step": 4047 }, { "epoch": 2.62, "learning_rate": 8.222038371825736e-07, "loss": 0.1962, "step": 4048 }, { "epoch": 2.62, "learning_rate": 8.194295497893568e-07, "loss": 0.1989, "step": 4049 }, { "epoch": 2.62, "learning_rate": 8.166597508478591e-07, "loss": 0.201, "step": 4050 }, { "epoch": 2.62, "learning_rate": 8.138944417122607e-07, "loss": 0.1956, "step": 4051 }, { "epoch": 2.62, "learning_rate": 8.111336237345379e-07, "loss": 0.2053, "step": 4052 }, { "epoch": 2.62, "learning_rate": 8.08377298264481e-07, "loss": 0.199, "step": 4053 }, { "epoch": 2.63, "learning_rate": 8.056254666496744e-07, "loss": 0.178, "step": 4054 }, { "epoch": 2.63, "learning_rate": 8.028781302355127e-07, "loss": 0.206, "step": 4055 }, { "epoch": 2.63, "learning_rate": 8.001352903651871e-07, "loss": 0.19, "step": 4056 }, { "epoch": 2.63, "learning_rate": 7.973969483796973e-07, "loss": 0.2008, "step": 4057 }, { "epoch": 2.63, "learning_rate": 7.946631056178356e-07, "loss": 0.2008, "step": 4058 }, { "epoch": 2.63, "learning_rate": 7.919337634162005e-07, "loss": 0.2014, "step": 4059 }, { "epoch": 2.63, "learning_rate": 7.892089231091904e-07, "loss": 0.1954, "step": 4060 }, { "epoch": 2.63, "learning_rate": 7.864885860289972e-07, "loss": 0.2002, "step": 4061 }, { "epoch": 2.63, "learning_rate": 7.837727535056183e-07, "loss": 0.1953, "step": 4062 }, { "epoch": 2.63, "learning_rate": 7.810614268668404e-07, "loss": 0.1932, "step": 4063 }, { "epoch": 2.63, "learning_rate": 7.783546074382587e-07, "loss": 0.1976, "step": 4064 }, { "epoch": 2.63, "learning_rate": 7.756522965432511e-07, "loss": 0.1931, "step": 4065 }, { "epoch": 2.63, "learning_rate": 7.729544955030033e-07, "loss": 0.1953, "step": 4066 }, { "epoch": 2.63, "learning_rate": 7.702612056364878e-07, "loss": 0.1958, "step": 4067 }, { "epoch": 2.63, "learning_rate": 7.67572428260478e-07, "loss": 0.2007, "step": 4068 }, { "epoch": 2.64, "learning_rate": 7.648881646895345e-07, "loss": 0.177, "step": 4069 }, { "epoch": 2.64, "learning_rate": 7.622084162360177e-07, "loss": 0.1844, "step": 4070 }, { "epoch": 2.64, "learning_rate": 7.595331842100739e-07, "loss": 0.1881, "step": 4071 }, { "epoch": 2.64, "learning_rate": 7.56862469919648e-07, "loss": 0.2007, "step": 4072 }, { "epoch": 2.64, "learning_rate": 7.5419627467047e-07, "loss": 0.2051, "step": 4073 }, { "epoch": 2.64, "learning_rate": 7.515345997660661e-07, "loss": 0.1994, "step": 4074 }, { "epoch": 2.64, "learning_rate": 7.488774465077465e-07, "loss": 0.1829, "step": 4075 }, { "epoch": 2.64, "learning_rate": 7.462248161946173e-07, "loss": 0.1966, "step": 4076 }, { "epoch": 2.64, "learning_rate": 7.435767101235658e-07, "loss": 0.2033, "step": 4077 }, { "epoch": 2.64, "learning_rate": 7.409331295892763e-07, "loss": 0.1991, "step": 4078 }, { "epoch": 2.64, "learning_rate": 7.382940758842116e-07, "loss": 0.1995, "step": 4079 }, { "epoch": 2.64, "learning_rate": 7.356595502986274e-07, "loss": 0.1976, "step": 4080 }, { "epoch": 2.64, "learning_rate": 7.330295541205645e-07, "loss": 0.1996, "step": 4081 }, { "epoch": 2.64, "learning_rate": 7.304040886358466e-07, "loss": 0.2023, "step": 4082 }, { "epoch": 2.64, "learning_rate": 7.277831551280856e-07, "loss": 0.1934, "step": 4083 }, { "epoch": 2.65, "learning_rate": 7.251667548786734e-07, "loss": 0.1937, "step": 4084 }, { "epoch": 2.65, "learning_rate": 7.225548891667911e-07, "loss": 0.1927, "step": 4085 }, { "epoch": 2.65, "learning_rate": 7.199475592693983e-07, "loss": 0.2113, "step": 4086 }, { "epoch": 2.65, "learning_rate": 7.173447664612399e-07, "loss": 0.1957, "step": 4087 }, { "epoch": 2.65, "learning_rate": 7.1474651201484e-07, "loss": 0.1934, "step": 4088 }, { "epoch": 2.65, "learning_rate": 7.121527972005071e-07, "loss": 0.2124, "step": 4089 }, { "epoch": 2.65, "learning_rate": 7.095636232863256e-07, "loss": 0.1872, "step": 4090 }, { "epoch": 2.65, "learning_rate": 7.069789915381664e-07, "loss": 0.1982, "step": 4091 }, { "epoch": 2.65, "learning_rate": 7.043989032196719e-07, "loss": 0.1836, "step": 4092 }, { "epoch": 2.65, "learning_rate": 7.018233595922708e-07, "loss": 0.1909, "step": 4093 }, { "epoch": 2.65, "learning_rate": 6.992523619151626e-07, "loss": 0.1929, "step": 4094 }, { "epoch": 2.65, "learning_rate": 6.966859114453317e-07, "loss": 0.1999, "step": 4095 }, { "epoch": 2.65, "learning_rate": 6.941240094375323e-07, "loss": 0.1884, "step": 4096 }, { "epoch": 2.65, "learning_rate": 6.915666571442992e-07, "loss": 0.1922, "step": 4097 }, { "epoch": 2.65, "learning_rate": 6.890138558159432e-07, "loss": 0.1966, "step": 4098 }, { "epoch": 2.65, "learning_rate": 6.86465606700546e-07, "loss": 0.1869, "step": 4099 }, { "epoch": 2.66, "learning_rate": 6.839219110439687e-07, "loss": 0.2062, "step": 4100 }, { "epoch": 2.66, "learning_rate": 6.813827700898412e-07, "loss": 0.2082, "step": 4101 }, { "epoch": 2.66, "learning_rate": 6.788481850795726e-07, "loss": 0.1928, "step": 4102 }, { "epoch": 2.66, "learning_rate": 6.763181572523369e-07, "loss": 0.1901, "step": 4103 }, { "epoch": 2.66, "learning_rate": 6.737926878450862e-07, "loss": 0.1835, "step": 4104 }, { "epoch": 2.66, "learning_rate": 6.712717780925437e-07, "loss": 0.194, "step": 4105 }, { "epoch": 2.66, "learning_rate": 6.68755429227198e-07, "loss": 0.2027, "step": 4106 }, { "epoch": 2.66, "learning_rate": 6.662436424793151e-07, "loss": 0.1895, "step": 4107 }, { "epoch": 2.66, "learning_rate": 6.637364190769236e-07, "loss": 0.1844, "step": 4108 }, { "epoch": 2.66, "learning_rate": 6.612337602458274e-07, "loss": 0.1988, "step": 4109 }, { "epoch": 2.66, "learning_rate": 6.587356672095935e-07, "loss": 0.1977, "step": 4110 }, { "epoch": 2.66, "learning_rate": 6.562421411895592e-07, "loss": 0.2038, "step": 4111 }, { "epoch": 2.66, "learning_rate": 6.537531834048305e-07, "loss": 0.1929, "step": 4112 }, { "epoch": 2.66, "learning_rate": 6.512687950722751e-07, "loss": 0.1744, "step": 4113 }, { "epoch": 2.66, "learning_rate": 6.487889774065326e-07, "loss": 0.1981, "step": 4114 }, { "epoch": 2.67, "learning_rate": 6.46313731620003e-07, "loss": 0.1957, "step": 4115 }, { "epoch": 2.67, "learning_rate": 6.438430589228539e-07, "loss": 0.1869, "step": 4116 }, { "epoch": 2.67, "learning_rate": 6.413769605230147e-07, "loss": 0.1889, "step": 4117 }, { "epoch": 2.67, "learning_rate": 6.38915437626183e-07, "loss": 0.1856, "step": 4118 }, { "epoch": 2.67, "learning_rate": 6.364584914358129e-07, "loss": 0.203, "step": 4119 }, { "epoch": 2.67, "learning_rate": 6.340061231531258e-07, "loss": 0.1938, "step": 4120 }, { "epoch": 2.67, "learning_rate": 6.315583339771058e-07, "loss": 0.1871, "step": 4121 }, { "epoch": 2.67, "learning_rate": 6.291151251044913e-07, "loss": 0.1927, "step": 4122 }, { "epoch": 2.67, "learning_rate": 6.266764977297901e-07, "loss": 0.1989, "step": 4123 }, { "epoch": 2.67, "learning_rate": 6.242424530452628e-07, "loss": 0.1904, "step": 4124 }, { "epoch": 2.67, "learning_rate": 6.218129922409367e-07, "loss": 0.1917, "step": 4125 }, { "epoch": 2.67, "learning_rate": 6.19388116504589e-07, "loss": 0.1891, "step": 4126 }, { "epoch": 2.67, "learning_rate": 6.169678270217638e-07, "loss": 0.195, "step": 4127 }, { "epoch": 2.67, "learning_rate": 6.145521249757558e-07, "loss": 0.1897, "step": 4128 }, { "epoch": 2.67, "learning_rate": 6.121410115476256e-07, "loss": 0.2044, "step": 4129 }, { "epoch": 2.67, "learning_rate": 6.097344879161793e-07, "loss": 0.1957, "step": 4130 }, { "epoch": 2.68, "learning_rate": 6.073325552579901e-07, "loss": 0.185, "step": 4131 }, { "epoch": 2.68, "learning_rate": 6.049352147473764e-07, "loss": 0.1955, "step": 4132 }, { "epoch": 2.68, "learning_rate": 6.025424675564217e-07, "loss": 0.1922, "step": 4133 }, { "epoch": 2.68, "learning_rate": 6.001543148549538e-07, "loss": 0.1991, "step": 4134 }, { "epoch": 2.68, "learning_rate": 5.977707578105619e-07, "loss": 0.2043, "step": 4135 }, { "epoch": 2.68, "learning_rate": 5.953917975885848e-07, "loss": 0.2029, "step": 4136 }, { "epoch": 2.68, "learning_rate": 5.930174353521146e-07, "loss": 0.1914, "step": 4137 }, { "epoch": 2.68, "learning_rate": 5.906476722619936e-07, "loss": 0.1999, "step": 4138 }, { "epoch": 2.68, "learning_rate": 5.882825094768207e-07, "loss": 0.1991, "step": 4139 }, { "epoch": 2.68, "learning_rate": 5.859219481529377e-07, "loss": 0.2012, "step": 4140 }, { "epoch": 2.68, "learning_rate": 5.835659894444445e-07, "loss": 0.1874, "step": 4141 }, { "epoch": 2.68, "learning_rate": 5.812146345031888e-07, "loss": 0.1925, "step": 4142 }, { "epoch": 2.68, "learning_rate": 5.788678844787621e-07, "loss": 0.2065, "step": 4143 }, { "epoch": 2.68, "learning_rate": 5.765257405185121e-07, "loss": 0.193, "step": 4144 }, { "epoch": 2.68, "learning_rate": 5.741882037675273e-07, "loss": 0.1864, "step": 4145 }, { "epoch": 2.69, "learning_rate": 5.718552753686524e-07, "loss": 0.1928, "step": 4146 }, { "epoch": 2.69, "learning_rate": 5.695269564624695e-07, "loss": 0.1946, "step": 4147 }, { "epoch": 2.69, "learning_rate": 5.672032481873146e-07, "loss": 0.1984, "step": 4148 }, { "epoch": 2.69, "learning_rate": 5.648841516792647e-07, "loss": 0.1917, "step": 4149 }, { "epoch": 2.69, "learning_rate": 5.625696680721459e-07, "loss": 0.1924, "step": 4150 }, { "epoch": 2.69, "learning_rate": 5.602597984975256e-07, "loss": 0.1952, "step": 4151 }, { "epoch": 2.69, "learning_rate": 5.579545440847179e-07, "loss": 0.1978, "step": 4152 }, { "epoch": 2.69, "learning_rate": 5.556539059607769e-07, "loss": 0.1947, "step": 4153 }, { "epoch": 2.69, "learning_rate": 5.533578852505072e-07, "loss": 0.1938, "step": 4154 }, { "epoch": 2.69, "learning_rate": 5.510664830764456e-07, "loss": 0.1845, "step": 4155 }, { "epoch": 2.69, "learning_rate": 5.487797005588802e-07, "loss": 0.1943, "step": 4156 }, { "epoch": 2.69, "learning_rate": 5.464975388158344e-07, "loss": 0.1875, "step": 4157 }, { "epoch": 2.69, "learning_rate": 5.44219998963077e-07, "loss": 0.1995, "step": 4158 }, { "epoch": 2.69, "learning_rate": 5.419470821141115e-07, "loss": 0.1984, "step": 4159 }, { "epoch": 2.69, "learning_rate": 5.39678789380188e-07, "loss": 0.1804, "step": 4160 }, { "epoch": 2.69, "learning_rate": 5.374151218702894e-07, "loss": 0.1777, "step": 4161 }, { "epoch": 2.7, "learning_rate": 5.351560806911415e-07, "loss": 0.196, "step": 4162 }, { "epoch": 2.7, "learning_rate": 5.329016669472087e-07, "loss": 0.2064, "step": 4163 }, { "epoch": 2.7, "learning_rate": 5.306518817406869e-07, "loss": 0.2029, "step": 4164 }, { "epoch": 2.7, "learning_rate": 5.284067261715187e-07, "loss": 0.1997, "step": 4165 }, { "epoch": 2.7, "learning_rate": 5.261662013373736e-07, "loss": 0.1865, "step": 4166 }, { "epoch": 2.7, "learning_rate": 5.239303083336633e-07, "loss": 0.2027, "step": 4167 }, { "epoch": 2.7, "learning_rate": 5.216990482535333e-07, "loss": 0.1945, "step": 4168 }, { "epoch": 2.7, "learning_rate": 5.194724221878645e-07, "loss": 0.2009, "step": 4169 }, { "epoch": 2.7, "learning_rate": 5.17250431225269e-07, "loss": 0.1997, "step": 4170 }, { "epoch": 2.7, "learning_rate": 5.150330764520983e-07, "loss": 0.1959, "step": 4171 }, { "epoch": 2.7, "learning_rate": 5.128203589524316e-07, "loss": 0.1897, "step": 4172 }, { "epoch": 2.7, "learning_rate": 5.10612279808087e-07, "loss": 0.188, "step": 4173 }, { "epoch": 2.7, "learning_rate": 5.084088400986076e-07, "loss": 0.1873, "step": 4174 }, { "epoch": 2.7, "learning_rate": 5.062100409012749e-07, "loss": 0.1826, "step": 4175 }, { "epoch": 2.7, "learning_rate": 5.040158832910958e-07, "loss": 0.1993, "step": 4176 }, { "epoch": 2.71, "learning_rate": 5.018263683408153e-07, "loss": 0.1926, "step": 4177 }, { "epoch": 2.71, "learning_rate": 4.996414971208996e-07, "loss": 0.1831, "step": 4178 }, { "epoch": 2.71, "learning_rate": 4.97461270699553e-07, "loss": 0.1933, "step": 4179 }, { "epoch": 2.71, "learning_rate": 4.952856901427005e-07, "loss": 0.197, "step": 4180 }, { "epoch": 2.71, "learning_rate": 4.931147565140038e-07, "loss": 0.196, "step": 4181 }, { "epoch": 2.71, "learning_rate": 4.909484708748502e-07, "loss": 0.2029, "step": 4182 }, { "epoch": 2.71, "learning_rate": 4.887868342843494e-07, "loss": 0.1889, "step": 4183 }, { "epoch": 2.71, "learning_rate": 4.866298477993458e-07, "loss": 0.1934, "step": 4184 }, { "epoch": 2.71, "learning_rate": 4.844775124744028e-07, "loss": 0.1879, "step": 4185 }, { "epoch": 2.71, "learning_rate": 4.823298293618184e-07, "loss": 0.1811, "step": 4186 }, { "epoch": 2.71, "learning_rate": 4.801867995116083e-07, "loss": 0.1891, "step": 4187 }, { "epoch": 2.71, "learning_rate": 4.780484239715177e-07, "loss": 0.1978, "step": 4188 }, { "epoch": 2.71, "learning_rate": 4.7591470378701266e-07, "loss": 0.1951, "step": 4189 }, { "epoch": 2.71, "learning_rate": 4.737856400012897e-07, "loss": 0.2006, "step": 4190 }, { "epoch": 2.71, "learning_rate": 4.716612336552595e-07, "loss": 0.2046, "step": 4191 }, { "epoch": 2.72, "learning_rate": 4.6954148578756466e-07, "loss": 0.2, "step": 4192 }, { "epoch": 2.72, "learning_rate": 4.6742639743456385e-07, "loss": 0.2061, "step": 4193 }, { "epoch": 2.72, "learning_rate": 4.653159696303422e-07, "loss": 0.2001, "step": 4194 }, { "epoch": 2.72, "learning_rate": 4.632102034067021e-07, "loss": 0.1924, "step": 4195 }, { "epoch": 2.72, "learning_rate": 4.6110909979316996e-07, "loss": 0.1923, "step": 4196 }, { "epoch": 2.72, "learning_rate": 4.5901265981699196e-07, "loss": 0.2101, "step": 4197 }, { "epoch": 2.72, "learning_rate": 4.5692088450313256e-07, "loss": 0.1898, "step": 4198 }, { "epoch": 2.72, "learning_rate": 4.5483377487427817e-07, "loss": 0.2017, "step": 4199 }, { "epoch": 2.72, "learning_rate": 4.527513319508314e-07, "loss": 0.1906, "step": 4200 }, { "epoch": 2.72, "learning_rate": 4.506735567509157e-07, "loss": 0.2207, "step": 4201 }, { "epoch": 2.72, "learning_rate": 4.486004502903707e-07, "loss": 0.1926, "step": 4202 }, { "epoch": 2.72, "learning_rate": 4.4653201358275444e-07, "loss": 0.1974, "step": 4203 }, { "epoch": 2.72, "learning_rate": 4.444682476393425e-07, "loss": 0.1944, "step": 4204 }, { "epoch": 2.72, "learning_rate": 4.424091534691244e-07, "loss": 0.1807, "step": 4205 }, { "epoch": 2.72, "learning_rate": 4.4035473207880817e-07, "loss": 0.2019, "step": 4206 }, { "epoch": 2.72, "learning_rate": 4.38304984472816e-07, "loss": 0.2068, "step": 4207 }, { "epoch": 2.73, "learning_rate": 4.362599116532851e-07, "loss": 0.1904, "step": 4208 }, { "epoch": 2.73, "learning_rate": 4.3421951462006784e-07, "loss": 0.1928, "step": 4209 }, { "epoch": 2.73, "learning_rate": 4.3218379437072853e-07, "loss": 0.1955, "step": 4210 }, { "epoch": 2.73, "learning_rate": 4.3015275190054997e-07, "loss": 0.1955, "step": 4211 }, { "epoch": 2.73, "learning_rate": 4.2812638820251995e-07, "loss": 0.1929, "step": 4212 }, { "epoch": 2.73, "learning_rate": 4.2610470426734827e-07, "loss": 0.2002, "step": 4213 }, { "epoch": 2.73, "learning_rate": 4.2408770108344657e-07, "loss": 0.1978, "step": 4214 }, { "epoch": 2.73, "learning_rate": 4.2207537963694813e-07, "loss": 0.1916, "step": 4215 }, { "epoch": 2.73, "learning_rate": 4.200677409116882e-07, "loss": 0.1929, "step": 4216 }, { "epoch": 2.73, "learning_rate": 4.180647858892206e-07, "loss": 0.2021, "step": 4217 }, { "epoch": 2.73, "learning_rate": 4.1606651554880197e-07, "loss": 0.1925, "step": 4218 }, { "epoch": 2.73, "learning_rate": 4.140729308674052e-07, "loss": 0.204, "step": 4219 }, { "epoch": 2.73, "learning_rate": 4.1208403281970733e-07, "loss": 0.1944, "step": 4220 }, { "epoch": 2.73, "learning_rate": 4.100998223780972e-07, "loss": 0.2033, "step": 4221 }, { "epoch": 2.73, "learning_rate": 4.081203005126688e-07, "loss": 0.1991, "step": 4222 }, { "epoch": 2.74, "learning_rate": 4.061454681912258e-07, "loss": 0.2051, "step": 4223 }, { "epoch": 2.74, "learning_rate": 4.0417532637928245e-07, "loss": 0.1981, "step": 4224 }, { "epoch": 2.74, "learning_rate": 4.0220987604005166e-07, "loss": 0.1866, "step": 4225 }, { "epoch": 2.74, "learning_rate": 4.0024911813446143e-07, "loss": 0.1884, "step": 4226 }, { "epoch": 2.74, "learning_rate": 3.982930536211371e-07, "loss": 0.1959, "step": 4227 }, { "epoch": 2.74, "learning_rate": 3.963416834564171e-07, "loss": 0.1898, "step": 4228 }, { "epoch": 2.74, "learning_rate": 3.9439500859434045e-07, "loss": 0.1996, "step": 4229 }, { "epoch": 2.74, "learning_rate": 3.9245302998665137e-07, "loss": 0.1957, "step": 4230 }, { "epoch": 2.74, "learning_rate": 3.9051574858279595e-07, "loss": 0.2006, "step": 4231 }, { "epoch": 2.74, "learning_rate": 3.8858316532993103e-07, "loss": 0.2024, "step": 4232 }, { "epoch": 2.74, "learning_rate": 3.866552811729063e-07, "loss": 0.1933, "step": 4233 }, { "epoch": 2.74, "learning_rate": 3.847320970542834e-07, "loss": 0.2019, "step": 4234 }, { "epoch": 2.74, "learning_rate": 3.828136139143179e-07, "loss": 0.1791, "step": 4235 }, { "epoch": 2.74, "learning_rate": 3.808998326909752e-07, "loss": 0.1906, "step": 4236 }, { "epoch": 2.74, "learning_rate": 3.789907543199134e-07, "loss": 0.1994, "step": 4237 }, { "epoch": 2.74, "learning_rate": 3.7708637973449926e-07, "loss": 0.1976, "step": 4238 }, { "epoch": 2.75, "learning_rate": 3.7518670986579355e-07, "loss": 0.1999, "step": 4239 }, { "epoch": 2.75, "learning_rate": 3.732917456425622e-07, "loss": 0.1954, "step": 4240 }, { "epoch": 2.75, "learning_rate": 3.714014879912653e-07, "loss": 0.2066, "step": 4241 }, { "epoch": 2.75, "learning_rate": 3.6951593783606686e-07, "loss": 0.1893, "step": 4242 }, { "epoch": 2.75, "learning_rate": 3.67635096098824e-07, "loss": 0.1852, "step": 4243 }, { "epoch": 2.75, "learning_rate": 3.657589636990955e-07, "loss": 0.1844, "step": 4244 }, { "epoch": 2.75, "learning_rate": 3.6388754155413984e-07, "loss": 0.1976, "step": 4245 }, { "epoch": 2.75, "learning_rate": 3.6202083057890636e-07, "loss": 0.2018, "step": 4246 }, { "epoch": 2.75, "learning_rate": 3.6015883168604604e-07, "loss": 0.1929, "step": 4247 }, { "epoch": 2.75, "learning_rate": 3.5830154578590403e-07, "loss": 0.2028, "step": 4248 }, { "epoch": 2.75, "learning_rate": 3.564489737865229e-07, "loss": 0.2026, "step": 4249 }, { "epoch": 2.75, "learning_rate": 3.5460111659363694e-07, "loss": 0.1959, "step": 4250 }, { "epoch": 2.75, "learning_rate": 3.527579751106802e-07, "loss": 0.1968, "step": 4251 }, { "epoch": 2.75, "learning_rate": 3.5091955023877745e-07, "loss": 0.2011, "step": 4252 }, { "epoch": 2.75, "learning_rate": 3.4908584287674963e-07, "loss": 0.1944, "step": 4253 }, { "epoch": 2.76, "learning_rate": 3.4725685392110854e-07, "loss": 0.1904, "step": 4254 }, { "epoch": 2.76, "learning_rate": 3.454325842660644e-07, "loss": 0.1837, "step": 4255 }, { "epoch": 2.76, "learning_rate": 3.436130348035127e-07, "loss": 0.1935, "step": 4256 }, { "epoch": 2.76, "learning_rate": 3.417982064230485e-07, "loss": 0.1998, "step": 4257 }, { "epoch": 2.76, "learning_rate": 3.399881000119531e-07, "loss": 0.1896, "step": 4258 }, { "epoch": 2.76, "learning_rate": 3.381827164552043e-07, "loss": 0.1947, "step": 4259 }, { "epoch": 2.76, "learning_rate": 3.3638205663546364e-07, "loss": 0.1846, "step": 4260 }, { "epoch": 2.76, "learning_rate": 3.3458612143309254e-07, "loss": 0.2062, "step": 4261 }, { "epoch": 2.76, "learning_rate": 3.327949117261342e-07, "loss": 0.1904, "step": 4262 }, { "epoch": 2.76, "learning_rate": 3.3100842839032476e-07, "loss": 0.1973, "step": 4263 }, { "epoch": 2.76, "learning_rate": 3.292266722990933e-07, "loss": 0.2048, "step": 4264 }, { "epoch": 2.76, "learning_rate": 3.2744964432354977e-07, "loss": 0.2034, "step": 4265 }, { "epoch": 2.76, "learning_rate": 3.256773453325002e-07, "loss": 0.1972, "step": 4266 }, { "epoch": 2.76, "learning_rate": 3.2390977619243257e-07, "loss": 0.2049, "step": 4267 }, { "epoch": 2.76, "learning_rate": 3.221469377675279e-07, "loss": 0.1944, "step": 4268 }, { "epoch": 2.76, "learning_rate": 3.203888309196479e-07, "loss": 0.1903, "step": 4269 }, { "epoch": 2.77, "learning_rate": 3.186354565083483e-07, "loss": 0.1925, "step": 4270 }, { "epoch": 2.77, "learning_rate": 3.1688681539086443e-07, "loss": 0.2004, "step": 4271 }, { "epoch": 2.77, "learning_rate": 3.151429084221225e-07, "loss": 0.1929, "step": 4272 }, { "epoch": 2.77, "learning_rate": 3.134037364547293e-07, "loss": 0.1932, "step": 4273 }, { "epoch": 2.77, "learning_rate": 3.116693003389837e-07, "loss": 0.1809, "step": 4274 }, { "epoch": 2.77, "learning_rate": 3.099396009228617e-07, "loss": 0.1988, "step": 4275 }, { "epoch": 2.77, "learning_rate": 3.0821463905202796e-07, "loss": 0.1921, "step": 4276 }, { "epoch": 2.77, "learning_rate": 3.0649441556982793e-07, "loss": 0.174, "step": 4277 }, { "epoch": 2.77, "learning_rate": 3.047789313172944e-07, "loss": 0.1897, "step": 4278 }, { "epoch": 2.77, "learning_rate": 3.030681871331398e-07, "loss": 0.1891, "step": 4279 }, { "epoch": 2.77, "learning_rate": 3.013621838537606e-07, "loss": 0.1899, "step": 4280 }, { "epoch": 2.77, "learning_rate": 2.99660922313233e-07, "loss": 0.1956, "step": 4281 }, { "epoch": 2.77, "learning_rate": 2.9796440334332044e-07, "loss": 0.195, "step": 4282 }, { "epoch": 2.77, "learning_rate": 2.9627262777346066e-07, "loss": 0.1931, "step": 4283 }, { "epoch": 2.77, "learning_rate": 2.9458559643077756e-07, "loss": 0.1947, "step": 4284 }, { "epoch": 2.78, "learning_rate": 2.9290331014007466e-07, "loss": 0.1858, "step": 4285 }, { "epoch": 2.78, "learning_rate": 2.91225769723833e-07, "loss": 0.1875, "step": 4286 }, { "epoch": 2.78, "learning_rate": 2.895529760022153e-07, "loss": 0.1695, "step": 4287 }, { "epoch": 2.78, "learning_rate": 2.8788492979306413e-07, "loss": 0.1921, "step": 4288 }, { "epoch": 2.78, "learning_rate": 2.8622163191189933e-07, "loss": 0.1987, "step": 4289 }, { "epoch": 2.78, "learning_rate": 2.845630831719204e-07, "loss": 0.1925, "step": 4290 }, { "epoch": 2.78, "learning_rate": 2.8290928438400533e-07, "loss": 0.1833, "step": 4291 }, { "epoch": 2.78, "learning_rate": 2.812602363567074e-07, "loss": 0.1914, "step": 4292 }, { "epoch": 2.78, "learning_rate": 2.796159398962595e-07, "loss": 0.1896, "step": 4293 }, { "epoch": 2.78, "learning_rate": 2.779763958065718e-07, "loss": 0.1952, "step": 4294 }, { "epoch": 2.78, "learning_rate": 2.763416048892298e-07, "loss": 0.2024, "step": 4295 }, { "epoch": 2.78, "learning_rate": 2.747115679434953e-07, "loss": 0.1995, "step": 4296 }, { "epoch": 2.78, "learning_rate": 2.7308628576630524e-07, "loss": 0.1781, "step": 4297 }, { "epoch": 2.78, "learning_rate": 2.7146575915227403e-07, "loss": 0.2069, "step": 4298 }, { "epoch": 2.78, "learning_rate": 2.6984998889368673e-07, "loss": 0.1942, "step": 4299 }, { "epoch": 2.78, "learning_rate": 2.6823897578050927e-07, "loss": 0.1965, "step": 4300 }, { "epoch": 2.79, "learning_rate": 2.6663272060037713e-07, "loss": 0.2075, "step": 4301 }, { "epoch": 2.79, "learning_rate": 2.650312241385999e-07, "loss": 0.1997, "step": 4302 }, { "epoch": 2.79, "learning_rate": 2.634344871781636e-07, "loss": 0.1981, "step": 4303 }, { "epoch": 2.79, "learning_rate": 2.618425104997224e-07, "loss": 0.2055, "step": 4304 }, { "epoch": 2.79, "learning_rate": 2.602552948816095e-07, "loss": 0.2012, "step": 4305 }, { "epoch": 2.79, "learning_rate": 2.5867284109982295e-07, "loss": 0.1905, "step": 4306 }, { "epoch": 2.79, "learning_rate": 2.570951499280394e-07, "loss": 0.1962, "step": 4307 }, { "epoch": 2.79, "learning_rate": 2.555222221376041e-07, "loss": 0.1774, "step": 4308 }, { "epoch": 2.79, "learning_rate": 2.53954058497532e-07, "loss": 0.1902, "step": 4309 }, { "epoch": 2.79, "learning_rate": 2.5239065977451296e-07, "loss": 0.187, "step": 4310 }, { "epoch": 2.79, "learning_rate": 2.508320267329012e-07, "loss": 0.2002, "step": 4311 }, { "epoch": 2.79, "learning_rate": 2.4927816013472717e-07, "loss": 0.2047, "step": 4312 }, { "epoch": 2.79, "learning_rate": 2.477290607396876e-07, "loss": 0.1925, "step": 4313 }, { "epoch": 2.79, "learning_rate": 2.461847293051489e-07, "loss": 0.1801, "step": 4314 }, { "epoch": 2.79, "learning_rate": 2.44645166586146e-07, "loss": 0.1964, "step": 4315 }, { "epoch": 2.8, "learning_rate": 2.431103733353846e-07, "loss": 0.2022, "step": 4316 }, { "epoch": 2.8, "learning_rate": 2.415803503032343e-07, "loss": 0.2054, "step": 4317 }, { "epoch": 2.8, "learning_rate": 2.400550982377381e-07, "loss": 0.1998, "step": 4318 }, { "epoch": 2.8, "learning_rate": 2.3853461788460153e-07, "loss": 0.1931, "step": 4319 }, { "epoch": 2.8, "learning_rate": 2.3701890998720112e-07, "loss": 0.194, "step": 4320 }, { "epoch": 2.8, "learning_rate": 2.3550797528657632e-07, "loss": 0.1954, "step": 4321 }, { "epoch": 2.8, "learning_rate": 2.3400181452143512e-07, "loss": 0.184, "step": 4322 }, { "epoch": 2.8, "learning_rate": 2.325004284281518e-07, "loss": 0.1822, "step": 4323 }, { "epoch": 2.8, "learning_rate": 2.3100381774076474e-07, "loss": 0.183, "step": 4324 }, { "epoch": 2.8, "learning_rate": 2.2951198319097868e-07, "loss": 0.2024, "step": 4325 }, { "epoch": 2.8, "learning_rate": 2.2802492550816347e-07, "loss": 0.1905, "step": 4326 }, { "epoch": 2.8, "learning_rate": 2.2654264541935312e-07, "loss": 0.2047, "step": 4327 }, { "epoch": 2.8, "learning_rate": 2.2506514364924459e-07, "loss": 0.198, "step": 4328 }, { "epoch": 2.8, "learning_rate": 2.2359242092020227e-07, "loss": 0.1859, "step": 4329 }, { "epoch": 2.8, "learning_rate": 2.2212447795224912e-07, "loss": 0.1815, "step": 4330 }, { "epoch": 2.81, "learning_rate": 2.2066131546307656e-07, "loss": 0.1946, "step": 4331 }, { "epoch": 2.81, "learning_rate": 2.192029341680324e-07, "loss": 0.1894, "step": 4332 }, { "epoch": 2.81, "learning_rate": 2.1774933478013404e-07, "loss": 0.1954, "step": 4333 }, { "epoch": 2.81, "learning_rate": 2.1630051801005526e-07, "loss": 0.196, "step": 4334 }, { "epoch": 2.81, "learning_rate": 2.1485648456613383e-07, "loss": 0.1817, "step": 4335 }, { "epoch": 2.81, "learning_rate": 2.1341723515436952e-07, "loss": 0.1948, "step": 4336 }, { "epoch": 2.81, "learning_rate": 2.1198277047842386e-07, "loss": 0.1859, "step": 4337 }, { "epoch": 2.81, "learning_rate": 2.1055309123961366e-07, "loss": 0.1923, "step": 4338 }, { "epoch": 2.81, "learning_rate": 2.091281981369253e-07, "loss": 0.2023, "step": 4339 }, { "epoch": 2.81, "learning_rate": 2.0770809186699493e-07, "loss": 0.1968, "step": 4340 }, { "epoch": 2.81, "learning_rate": 2.062927731241271e-07, "loss": 0.1848, "step": 4341 }, { "epoch": 2.81, "learning_rate": 2.0488224260028055e-07, "loss": 0.194, "step": 4342 }, { "epoch": 2.81, "learning_rate": 2.0347650098507587e-07, "loss": 0.1813, "step": 4343 }, { "epoch": 2.81, "learning_rate": 2.0207554896578772e-07, "loss": 0.1923, "step": 4344 }, { "epoch": 2.81, "learning_rate": 2.0067938722735603e-07, "loss": 0.1948, "step": 4345 }, { "epoch": 2.81, "learning_rate": 1.992880164523736e-07, "loss": 0.1861, "step": 4346 }, { "epoch": 2.82, "learning_rate": 1.9790143732109192e-07, "loss": 0.2022, "step": 4347 }, { "epoch": 2.82, "learning_rate": 1.9651965051142085e-07, "loss": 0.1847, "step": 4348 }, { "epoch": 2.82, "learning_rate": 1.9514265669892673e-07, "loss": 0.1899, "step": 4349 }, { "epoch": 2.82, "learning_rate": 1.9377045655683213e-07, "loss": 0.1933, "step": 4350 }, { "epoch": 2.82, "learning_rate": 1.9240305075601484e-07, "loss": 0.182, "step": 4351 }, { "epoch": 2.82, "learning_rate": 1.9104043996501341e-07, "loss": 0.204, "step": 4352 }, { "epoch": 2.82, "learning_rate": 1.896826248500161e-07, "loss": 0.1987, "step": 4353 }, { "epoch": 2.82, "learning_rate": 1.883296060748696e-07, "loss": 0.1889, "step": 4354 }, { "epoch": 2.82, "learning_rate": 1.8698138430107592e-07, "loss": 0.2027, "step": 4355 }, { "epoch": 2.82, "learning_rate": 1.8563796018779112e-07, "loss": 0.1927, "step": 4356 }, { "epoch": 2.82, "learning_rate": 1.8429933439182534e-07, "loss": 0.1987, "step": 4357 }, { "epoch": 2.82, "learning_rate": 1.8296550756764287e-07, "loss": 0.1871, "step": 4358 }, { "epoch": 2.82, "learning_rate": 1.8163648036736202e-07, "loss": 0.1889, "step": 4359 }, { "epoch": 2.82, "learning_rate": 1.803122534407542e-07, "loss": 0.1906, "step": 4360 }, { "epoch": 2.82, "learning_rate": 1.789928274352448e-07, "loss": 0.187, "step": 4361 }, { "epoch": 2.83, "learning_rate": 1.7767820299591122e-07, "loss": 0.1862, "step": 4362 }, { "epoch": 2.83, "learning_rate": 1.7636838076548258e-07, "loss": 0.2091, "step": 4363 }, { "epoch": 2.83, "learning_rate": 1.7506336138434331e-07, "loss": 0.1871, "step": 4364 }, { "epoch": 2.83, "learning_rate": 1.737631454905242e-07, "loss": 0.2025, "step": 4365 }, { "epoch": 2.83, "learning_rate": 1.7246773371971227e-07, "loss": 0.2024, "step": 4366 }, { "epoch": 2.83, "learning_rate": 1.7117712670524644e-07, "loss": 0.1924, "step": 4367 }, { "epoch": 2.83, "learning_rate": 1.698913250781109e-07, "loss": 0.2043, "step": 4368 }, { "epoch": 2.83, "learning_rate": 1.6861032946694722e-07, "loss": 0.179, "step": 4369 }, { "epoch": 2.83, "learning_rate": 1.673341404980422e-07, "loss": 0.1936, "step": 4370 }, { "epoch": 2.83, "learning_rate": 1.6606275879533452e-07, "loss": 0.2006, "step": 4371 }, { "epoch": 2.83, "learning_rate": 1.647961849804125e-07, "loss": 0.1994, "step": 4372 }, { "epoch": 2.83, "learning_rate": 1.6353441967251525e-07, "loss": 0.1959, "step": 4373 }, { "epoch": 2.83, "learning_rate": 1.6227746348852713e-07, "loss": 0.1903, "step": 4374 }, { "epoch": 2.83, "learning_rate": 1.610253170429854e-07, "loss": 0.2011, "step": 4375 }, { "epoch": 2.83, "learning_rate": 1.5977798094807373e-07, "loss": 0.1917, "step": 4376 }, { "epoch": 2.83, "learning_rate": 1.585354558136254e-07, "loss": 0.1869, "step": 4377 }, { "epoch": 2.84, "learning_rate": 1.5729774224711892e-07, "loss": 0.2052, "step": 4378 }, { "epoch": 2.84, "learning_rate": 1.5606484085368246e-07, "loss": 0.1813, "step": 4379 }, { "epoch": 2.84, "learning_rate": 1.5483675223609053e-07, "loss": 0.2011, "step": 4380 }, { "epoch": 2.84, "learning_rate": 1.5361347699476724e-07, "loss": 0.206, "step": 4381 }, { "epoch": 2.84, "learning_rate": 1.523950157277776e-07, "loss": 0.2068, "step": 4382 }, { "epoch": 2.84, "learning_rate": 1.5118136903084168e-07, "loss": 0.1881, "step": 4383 }, { "epoch": 2.84, "learning_rate": 1.49972537497316e-07, "loss": 0.1948, "step": 4384 }, { "epoch": 2.84, "learning_rate": 1.4876852171821e-07, "loss": 0.2075, "step": 4385 }, { "epoch": 2.84, "learning_rate": 1.4756932228217613e-07, "loss": 0.203, "step": 4386 }, { "epoch": 2.84, "learning_rate": 1.4637493977551097e-07, "loss": 0.2028, "step": 4387 }, { "epoch": 2.84, "learning_rate": 1.4518537478215967e-07, "loss": 0.2034, "step": 4388 }, { "epoch": 2.84, "learning_rate": 1.4400062788370806e-07, "loss": 0.1849, "step": 4389 }, { "epoch": 2.84, "learning_rate": 1.428206996593895e-07, "loss": 0.1978, "step": 4390 }, { "epoch": 2.84, "learning_rate": 1.4164559068607699e-07, "loss": 0.1916, "step": 4391 }, { "epoch": 2.84, "learning_rate": 1.4047530153829314e-07, "loss": 0.1976, "step": 4392 }, { "epoch": 2.85, "learning_rate": 1.393098327882003e-07, "loss": 0.188, "step": 4393 }, { "epoch": 2.85, "learning_rate": 1.3814918500560605e-07, "loss": 0.1891, "step": 4394 }, { "epoch": 2.85, "learning_rate": 1.369933587579586e-07, "loss": 0.183, "step": 4395 }, { "epoch": 2.85, "learning_rate": 1.3584235461035155e-07, "loss": 0.1927, "step": 4396 }, { "epoch": 2.85, "learning_rate": 1.346961731255192e-07, "loss": 0.1914, "step": 4397 }, { "epoch": 2.85, "learning_rate": 1.335548148638377e-07, "loss": 0.1882, "step": 4398 }, { "epoch": 2.85, "learning_rate": 1.3241828038332738e-07, "loss": 0.2004, "step": 4399 }, { "epoch": 2.85, "learning_rate": 1.312865702396482e-07, "loss": 0.1904, "step": 4400 }, { "epoch": 2.85, "learning_rate": 1.3015968498610087e-07, "loss": 0.194, "step": 4401 }, { "epoch": 2.85, "learning_rate": 1.2903762517363028e-07, "loss": 0.1995, "step": 4402 }, { "epoch": 2.85, "learning_rate": 1.279203913508187e-07, "loss": 0.1926, "step": 4403 }, { "epoch": 2.85, "learning_rate": 1.2680798406389138e-07, "loss": 0.1932, "step": 4404 }, { "epoch": 2.85, "learning_rate": 1.2570040385671112e-07, "loss": 0.1965, "step": 4405 }, { "epoch": 2.85, "learning_rate": 1.2459765127078473e-07, "loss": 0.2063, "step": 4406 }, { "epoch": 2.85, "learning_rate": 1.2349972684525534e-07, "loss": 0.2004, "step": 4407 }, { "epoch": 2.85, "learning_rate": 1.2240663111690476e-07, "loss": 0.2043, "step": 4408 }, { "epoch": 2.86, "learning_rate": 1.2131836462015988e-07, "loss": 0.2038, "step": 4409 }, { "epoch": 2.86, "learning_rate": 1.2023492788707957e-07, "loss": 0.1986, "step": 4410 }, { "epoch": 2.86, "learning_rate": 1.1915632144736566e-07, "loss": 0.1893, "step": 4411 }, { "epoch": 2.86, "learning_rate": 1.1808254582835632e-07, "loss": 0.1976, "step": 4412 }, { "epoch": 2.86, "learning_rate": 1.1701360155502939e-07, "loss": 0.1877, "step": 4413 }, { "epoch": 2.86, "learning_rate": 1.1594948914999904e-07, "loss": 0.2051, "step": 4414 }, { "epoch": 2.86, "learning_rate": 1.1489020913352022e-07, "loss": 0.1938, "step": 4415 }, { "epoch": 2.86, "learning_rate": 1.138357620234809e-07, "loss": 0.1895, "step": 4416 }, { "epoch": 2.86, "learning_rate": 1.127861483354098e-07, "loss": 0.1985, "step": 4417 }, { "epoch": 2.86, "learning_rate": 1.1174136858246976e-07, "loss": 0.188, "step": 4418 }, { "epoch": 2.86, "learning_rate": 1.107014232754633e-07, "loss": 0.1968, "step": 4419 }, { "epoch": 2.86, "learning_rate": 1.0966631292282592e-07, "loss": 0.198, "step": 4420 }, { "epoch": 2.86, "learning_rate": 1.0863603803063172e-07, "loss": 0.1908, "step": 4421 }, { "epoch": 2.86, "learning_rate": 1.0761059910259108e-07, "loss": 0.2028, "step": 4422 }, { "epoch": 2.86, "learning_rate": 1.0658999664004854e-07, "loss": 0.1985, "step": 4423 }, { "epoch": 2.87, "learning_rate": 1.0557423114198273e-07, "loss": 0.1808, "step": 4424 }, { "epoch": 2.87, "learning_rate": 1.0456330310501195e-07, "loss": 0.2089, "step": 4425 }, { "epoch": 2.87, "learning_rate": 1.0355721302338528e-07, "loss": 0.1926, "step": 4426 }, { "epoch": 2.87, "learning_rate": 1.0255596138898815e-07, "loss": 0.1895, "step": 4427 }, { "epoch": 2.87, "learning_rate": 1.015595486913401e-07, "loss": 0.1956, "step": 4428 }, { "epoch": 2.87, "learning_rate": 1.0056797541759478e-07, "loss": 0.1789, "step": 4429 }, { "epoch": 2.87, "learning_rate": 9.958124205254104e-08, "loss": 0.201, "step": 4430 }, { "epoch": 2.87, "learning_rate": 9.859934907859859e-08, "loss": 0.1958, "step": 4431 }, { "epoch": 2.87, "learning_rate": 9.762229697582448e-08, "loss": 0.1937, "step": 4432 }, { "epoch": 2.87, "learning_rate": 9.665008622190552e-08, "loss": 0.1993, "step": 4433 }, { "epoch": 2.87, "learning_rate": 9.568271729216483e-08, "loss": 0.2078, "step": 4434 }, { "epoch": 2.87, "learning_rate": 9.472019065955407e-08, "loss": 0.1883, "step": 4435 }, { "epoch": 2.87, "learning_rate": 9.376250679466126e-08, "loss": 0.1983, "step": 4436 }, { "epoch": 2.87, "learning_rate": 9.28096661657063e-08, "loss": 0.1899, "step": 4437 }, { "epoch": 2.87, "learning_rate": 9.186166923853878e-08, "loss": 0.197, "step": 4438 }, { "epoch": 2.88, "learning_rate": 9.09185164766424e-08, "loss": 0.19, "step": 4439 }, { "epoch": 2.88, "learning_rate": 8.998020834113163e-08, "loss": 0.1904, "step": 4440 }, { "epoch": 2.88, "learning_rate": 8.904674529075285e-08, "loss": 0.1768, "step": 4441 }, { "epoch": 2.88, "learning_rate": 8.811812778188433e-08, "loss": 0.1876, "step": 4442 }, { "epoch": 2.88, "learning_rate": 8.719435626853179e-08, "loss": 0.1981, "step": 4443 }, { "epoch": 2.88, "learning_rate": 8.627543120233617e-08, "loss": 0.19, "step": 4444 }, { "epoch": 2.88, "learning_rate": 8.536135303256588e-08, "loss": 0.1914, "step": 4445 }, { "epoch": 2.88, "learning_rate": 8.445212220612232e-08, "loss": 0.1876, "step": 4446 }, { "epoch": 2.88, "learning_rate": 8.354773916753434e-08, "loss": 0.2066, "step": 4447 }, { "epoch": 2.88, "learning_rate": 8.264820435896159e-08, "loss": 0.1931, "step": 4448 }, { "epoch": 2.88, "learning_rate": 8.17535182201945e-08, "loss": 0.1997, "step": 4449 }, { "epoch": 2.88, "learning_rate": 8.086368118865095e-08, "loss": 0.1928, "step": 4450 }, { "epoch": 2.88, "learning_rate": 7.997869369937961e-08, "loss": 0.1948, "step": 4451 }, { "epoch": 2.88, "learning_rate": 7.90985561850588e-08, "loss": 0.1965, "step": 4452 }, { "epoch": 2.88, "learning_rate": 7.82232690759932e-08, "loss": 0.1881, "step": 4453 }, { "epoch": 2.88, "learning_rate": 7.735283280011719e-08, "loss": 0.199, "step": 4454 }, { "epoch": 2.89, "learning_rate": 7.648724778299477e-08, "loss": 0.1949, "step": 4455 }, { "epoch": 2.89, "learning_rate": 7.562651444781633e-08, "loss": 0.1968, "step": 4456 }, { "epoch": 2.89, "learning_rate": 7.47706332154019e-08, "loss": 0.1818, "step": 4457 }, { "epoch": 2.89, "learning_rate": 7.391960450419788e-08, "loss": 0.1917, "step": 4458 }, { "epoch": 2.89, "learning_rate": 7.307342873027812e-08, "loss": 0.193, "step": 4459 }, { "epoch": 2.89, "learning_rate": 7.223210630734611e-08, "loss": 0.1896, "step": 4460 }, { "epoch": 2.89, "learning_rate": 7.139563764672953e-08, "loss": 0.1878, "step": 4461 }, { "epoch": 2.89, "learning_rate": 7.056402315738453e-08, "loss": 0.2028, "step": 4462 }, { "epoch": 2.89, "learning_rate": 6.97372632458948e-08, "loss": 0.2017, "step": 4463 }, { "epoch": 2.89, "learning_rate": 6.891535831646811e-08, "loss": 0.1938, "step": 4464 }, { "epoch": 2.89, "learning_rate": 6.809830877094192e-08, "loss": 0.1935, "step": 4465 }, { "epoch": 2.89, "learning_rate": 6.728611500877668e-08, "loss": 0.1874, "step": 4466 }, { "epoch": 2.89, "learning_rate": 6.647877742706032e-08, "loss": 0.2048, "step": 4467 }, { "epoch": 2.89, "learning_rate": 6.56762964205071e-08, "loss": 0.1912, "step": 4468 }, { "epoch": 2.89, "learning_rate": 6.48786723814554e-08, "loss": 0.1898, "step": 4469 }, { "epoch": 2.9, "learning_rate": 6.408590569986994e-08, "loss": 0.204, "step": 4470 }, { "epoch": 2.9, "learning_rate": 6.329799676334069e-08, "loss": 0.206, "step": 4471 }, { "epoch": 2.9, "learning_rate": 6.251494595708286e-08, "loss": 0.1961, "step": 4472 }, { "epoch": 2.9, "learning_rate": 6.173675366393572e-08, "loss": 0.1971, "step": 4473 }, { "epoch": 2.9, "learning_rate": 6.096342026436274e-08, "loss": 0.1842, "step": 4474 }, { "epoch": 2.9, "learning_rate": 6.019494613645372e-08, "loss": 0.1945, "step": 4475 }, { "epoch": 2.9, "learning_rate": 5.94313316559203e-08, "loss": 0.2026, "step": 4476 }, { "epoch": 2.9, "learning_rate": 5.8672577196100535e-08, "loss": 0.1905, "step": 4477 }, { "epoch": 2.9, "learning_rate": 5.7918683127955456e-08, "loss": 0.1954, "step": 4478 }, { "epoch": 2.9, "learning_rate": 5.7169649820069115e-08, "loss": 0.2017, "step": 4479 }, { "epoch": 2.9, "learning_rate": 5.6425477638649694e-08, "loss": 0.2017, "step": 4480 }, { "epoch": 2.9, "learning_rate": 5.568616694752838e-08, "loss": 0.2032, "step": 4481 }, { "epoch": 2.9, "learning_rate": 5.495171810816047e-08, "loss": 0.1867, "step": 4482 }, { "epoch": 2.9, "learning_rate": 5.422213147962319e-08, "loss": 0.2057, "step": 4483 }, { "epoch": 2.9, "learning_rate": 5.3497407418617864e-08, "loss": 0.1903, "step": 4484 }, { "epoch": 2.9, "learning_rate": 5.277754627946774e-08, "loss": 0.1984, "step": 4485 }, { "epoch": 2.91, "learning_rate": 5.206254841411795e-08, "loss": 0.2009, "step": 4486 }, { "epoch": 2.91, "learning_rate": 5.1352414172135546e-08, "loss": 0.2036, "step": 4487 }, { "epoch": 2.91, "learning_rate": 5.064714390071168e-08, "loss": 0.191, "step": 4488 }, { "epoch": 2.91, "learning_rate": 4.9946737944659426e-08, "loss": 0.2026, "step": 4489 }, { "epoch": 2.91, "learning_rate": 4.9251196646410426e-08, "loss": 0.1839, "step": 4490 }, { "epoch": 2.91, "learning_rate": 4.856052034602154e-08, "loss": 0.1975, "step": 4491 }, { "epoch": 2.91, "learning_rate": 4.7874709381170445e-08, "loss": 0.1838, "step": 4492 }, { "epoch": 2.91, "learning_rate": 4.7193764087153374e-08, "loss": 0.2, "step": 4493 }, { "epoch": 2.91, "learning_rate": 4.651768479689067e-08, "loss": 0.2032, "step": 4494 }, { "epoch": 2.91, "learning_rate": 4.5846471840923504e-08, "loss": 0.1918, "step": 4495 }, { "epoch": 2.91, "learning_rate": 4.518012554741269e-08, "loss": 0.2017, "step": 4496 }, { "epoch": 2.91, "learning_rate": 4.451864624213875e-08, "loss": 0.1936, "step": 4497 }, { "epoch": 2.91, "learning_rate": 4.386203424850632e-08, "loss": 0.1901, "step": 4498 }, { "epoch": 2.91, "learning_rate": 4.321028988753639e-08, "loss": 0.1982, "step": 4499 }, { "epoch": 2.91, "learning_rate": 4.256341347787185e-08, "loss": 0.1978, "step": 4500 }, { "epoch": 2.92, "learning_rate": 4.1921405335776376e-08, "loss": 0.1951, "step": 4501 }, { "epoch": 2.92, "learning_rate": 4.128426577513223e-08, "loss": 0.1837, "step": 4502 }, { "epoch": 2.92, "learning_rate": 4.0651995107442444e-08, "loss": 0.1994, "step": 4503 }, { "epoch": 2.92, "learning_rate": 4.002459364182754e-08, "loss": 0.1932, "step": 4504 }, { "epoch": 2.92, "learning_rate": 3.940206168503102e-08, "loss": 0.1829, "step": 4505 }, { "epoch": 2.92, "learning_rate": 3.8784399541411664e-08, "loss": 0.1897, "step": 4506 }, { "epoch": 2.92, "learning_rate": 3.817160751295124e-08, "loss": 0.1905, "step": 4507 }, { "epoch": 2.92, "learning_rate": 3.756368589924564e-08, "loss": 0.203, "step": 4508 }, { "epoch": 2.92, "learning_rate": 3.69606349975149e-08, "loss": 0.1882, "step": 4509 }, { "epoch": 2.92, "learning_rate": 3.6362455102594286e-08, "loss": 0.1929, "step": 4510 }, { "epoch": 2.92, "learning_rate": 3.5769146506938745e-08, "loss": 0.187, "step": 4511 }, { "epoch": 2.92, "learning_rate": 3.51807095006218e-08, "loss": 0.2045, "step": 4512 }, { "epoch": 2.92, "learning_rate": 3.459714437133332e-08, "loss": 0.1928, "step": 4513 }, { "epoch": 2.92, "learning_rate": 3.401845140438509e-08, "loss": 0.2018, "step": 4514 }, { "epoch": 2.92, "learning_rate": 3.3444630882703e-08, "loss": 0.1975, "step": 4515 }, { "epoch": 2.92, "learning_rate": 3.287568308683375e-08, "loss": 0.19, "step": 4516 }, { "epoch": 2.93, "learning_rate": 3.231160829493818e-08, "loss": 0.1995, "step": 4517 }, { "epoch": 2.93, "learning_rate": 3.1752406782797895e-08, "loss": 0.1875, "step": 4518 }, { "epoch": 2.93, "learning_rate": 3.1198078823810875e-08, "loss": 0.2032, "step": 4519 }, { "epoch": 2.93, "learning_rate": 3.0648624688992545e-08, "loss": 0.1995, "step": 4520 }, { "epoch": 2.93, "learning_rate": 3.0104044646974694e-08, "loss": 0.2032, "step": 4521 }, { "epoch": 2.93, "learning_rate": 2.9564338964007676e-08, "loss": 0.2064, "step": 4522 }, { "epoch": 2.93, "learning_rate": 2.902950790395709e-08, "loss": 0.2024, "step": 4523 }, { "epoch": 2.93, "learning_rate": 2.8499551728305986e-08, "loss": 0.1844, "step": 4524 }, { "epoch": 2.93, "learning_rate": 2.7974470696153778e-08, "loss": 0.1892, "step": 4525 }, { "epoch": 2.93, "learning_rate": 2.7454265064217334e-08, "loss": 0.1958, "step": 4526 }, { "epoch": 2.93, "learning_rate": 2.6938935086828765e-08, "loss": 0.1853, "step": 4527 }, { "epoch": 2.93, "learning_rate": 2.6428481015936536e-08, "loss": 0.1948, "step": 4528 }, { "epoch": 2.93, "learning_rate": 2.5922903101105458e-08, "loss": 0.1967, "step": 4529 }, { "epoch": 2.93, "learning_rate": 2.5422201589517804e-08, "loss": 0.1807, "step": 4530 }, { "epoch": 2.93, "learning_rate": 2.492637672596998e-08, "loss": 0.1924, "step": 4531 }, { "epoch": 2.94, "learning_rate": 2.4435428752872524e-08, "loss": 0.1814, "step": 4532 }, { "epoch": 2.94, "learning_rate": 2.3949357910256766e-08, "loss": 0.1874, "step": 4533 }, { "epoch": 2.94, "learning_rate": 2.346816443576483e-08, "loss": 0.1877, "step": 4534 }, { "epoch": 2.94, "learning_rate": 2.2991848564656306e-08, "loss": 0.2092, "step": 4535 }, { "epoch": 2.94, "learning_rate": 2.2520410529804915e-08, "loss": 0.1922, "step": 4536 }, { "epoch": 2.94, "learning_rate": 2.205385056170073e-08, "loss": 0.2021, "step": 4537 }, { "epoch": 2.94, "learning_rate": 2.159216888844906e-08, "loss": 0.2004, "step": 4538 }, { "epoch": 2.94, "learning_rate": 2.1135365735769354e-08, "loss": 0.1989, "step": 4539 }, { "epoch": 2.94, "learning_rate": 2.068344132699629e-08, "loss": 0.1891, "step": 4540 }, { "epoch": 2.94, "learning_rate": 2.0236395883079794e-08, "loss": 0.1909, "step": 4541 }, { "epoch": 2.94, "learning_rate": 1.9794229622581705e-08, "loss": 0.1871, "step": 4542 }, { "epoch": 2.94, "learning_rate": 1.935694276168354e-08, "loss": 0.1999, "step": 4543 }, { "epoch": 2.94, "learning_rate": 1.892453551417539e-08, "loss": 0.1961, "step": 4544 }, { "epoch": 2.94, "learning_rate": 1.849700809146593e-08, "loss": 0.1934, "step": 4545 }, { "epoch": 2.94, "learning_rate": 1.8074360702576844e-08, "loss": 0.1994, "step": 4546 }, { "epoch": 2.94, "learning_rate": 1.7656593554142844e-08, "loss": 0.1946, "step": 4547 }, { "epoch": 2.95, "learning_rate": 1.7243706850413877e-08, "loss": 0.2047, "step": 4548 }, { "epoch": 2.95, "learning_rate": 1.6835700793254028e-08, "loss": 0.1904, "step": 4549 }, { "epoch": 2.95, "learning_rate": 1.6432575582139287e-08, "loss": 0.2025, "step": 4550 }, { "epoch": 2.95, "learning_rate": 1.6034331414161997e-08, "loss": 0.1997, "step": 4551 }, { "epoch": 2.95, "learning_rate": 1.564096848402641e-08, "loss": 0.1832, "step": 4552 }, { "epoch": 2.95, "learning_rate": 1.525248698405091e-08, "loss": 0.19, "step": 4553 }, { "epoch": 2.95, "learning_rate": 1.4868887104165785e-08, "loss": 0.1939, "step": 4554 }, { "epoch": 2.95, "learning_rate": 1.4490169031917689e-08, "loss": 0.1984, "step": 4555 }, { "epoch": 2.95, "learning_rate": 1.4116332952464062e-08, "loss": 0.1923, "step": 4556 }, { "epoch": 2.95, "learning_rate": 1.3747379048575371e-08, "loss": 0.1951, "step": 4557 }, { "epoch": 2.95, "learning_rate": 1.3383307500637321e-08, "loss": 0.1866, "step": 4558 }, { "epoch": 2.95, "learning_rate": 1.3024118486647531e-08, "loss": 0.2017, "step": 4559 }, { "epoch": 2.95, "learning_rate": 1.2669812182214414e-08, "loss": 0.1981, "step": 4560 }, { "epoch": 2.95, "learning_rate": 1.232038876056274e-08, "loss": 0.202, "step": 4561 }, { "epoch": 2.95, "learning_rate": 1.1975848392529188e-08, "loss": 0.1931, "step": 4562 }, { "epoch": 2.96, "learning_rate": 1.1636191246559014e-08, "loss": 0.1843, "step": 4563 }, { "epoch": 2.96, "learning_rate": 1.130141748871605e-08, "loss": 0.1872, "step": 4564 }, { "epoch": 2.96, "learning_rate": 1.09715272826727e-08, "loss": 0.1938, "step": 4565 }, { "epoch": 2.96, "learning_rate": 1.0646520789714398e-08, "loss": 0.1926, "step": 4566 }, { "epoch": 2.96, "learning_rate": 1.0326398168740703e-08, "loss": 0.2006, "step": 4567 }, { "epoch": 2.96, "learning_rate": 1.0011159576259755e-08, "loss": 0.1964, "step": 4568 }, { "epoch": 2.96, "learning_rate": 9.700805166397154e-09, "loss": 0.2017, "step": 4569 }, { "epoch": 2.96, "learning_rate": 9.39533509088486e-09, "loss": 0.1956, "step": 4570 }, { "epoch": 2.96, "learning_rate": 9.094749499071186e-09, "loss": 0.2029, "step": 4571 }, { "epoch": 2.96, "learning_rate": 8.799048537914135e-09, "loss": 0.1878, "step": 4572 }, { "epoch": 2.96, "learning_rate": 8.508232351984724e-09, "loss": 0.1989, "step": 4573 }, { "epoch": 2.96, "learning_rate": 8.222301083463668e-09, "loss": 0.2054, "step": 4574 }, { "epoch": 2.96, "learning_rate": 7.94125487214692e-09, "loss": 0.188, "step": 4575 }, { "epoch": 2.96, "learning_rate": 7.665093855439009e-09, "loss": 0.2022, "step": 4576 }, { "epoch": 2.96, "learning_rate": 7.393818168357492e-09, "loss": 0.1877, "step": 4577 }, { "epoch": 2.97, "learning_rate": 7.127427943530718e-09, "loss": 0.1986, "step": 4578 }, { "epoch": 2.97, "learning_rate": 6.865923311200062e-09, "loss": 0.2036, "step": 4579 }, { "epoch": 2.97, "learning_rate": 6.609304399216587e-09, "loss": 0.2031, "step": 4580 }, { "epoch": 2.97, "learning_rate": 6.357571333042156e-09, "loss": 0.197, "step": 4581 }, { "epoch": 2.97, "learning_rate": 6.110724235752763e-09, "loss": 0.2021, "step": 4582 }, { "epoch": 2.97, "learning_rate": 5.868763228032981e-09, "loss": 0.2004, "step": 4583 }, { "epoch": 2.97, "learning_rate": 5.6316884281792934e-09, "loss": 0.2057, "step": 4584 }, { "epoch": 2.97, "learning_rate": 5.3994999520989855e-09, "loss": 0.1892, "step": 4585 }, { "epoch": 2.97, "learning_rate": 5.17219791331236e-09, "loss": 0.2056, "step": 4586 }, { "epoch": 2.97, "learning_rate": 4.949782422947191e-09, "loss": 0.1947, "step": 4587 }, { "epoch": 2.97, "learning_rate": 4.732253589745384e-09, "loss": 0.1965, "step": 4588 }, { "epoch": 2.97, "learning_rate": 4.519611520058531e-09, "loss": 0.2032, "step": 4589 }, { "epoch": 2.97, "learning_rate": 4.311856317849028e-09, "loss": 0.1853, "step": 4590 }, { "epoch": 2.97, "learning_rate": 4.108988084688958e-09, "loss": 0.1929, "step": 4591 }, { "epoch": 2.97, "learning_rate": 3.911006919763427e-09, "loss": 0.1842, "step": 4592 }, { "epoch": 2.97, "learning_rate": 3.7179129198650077e-09, "loss": 0.204, "step": 4593 }, { "epoch": 2.98, "learning_rate": 3.529706179401515e-09, "loss": 0.1991, "step": 4594 }, { "epoch": 2.98, "learning_rate": 3.346386790387124e-09, "loss": 0.1921, "step": 4595 }, { "epoch": 2.98, "learning_rate": 3.167954842447918e-09, "loss": 0.1755, "step": 4596 }, { "epoch": 2.98, "learning_rate": 2.9944104228207814e-09, "loss": 0.207, "step": 4597 }, { "epoch": 2.98, "learning_rate": 2.8257536163545097e-09, "loss": 0.1934, "step": 4598 }, { "epoch": 2.98, "learning_rate": 2.661984505504256e-09, "loss": 0.1777, "step": 4599 }, { "epoch": 2.98, "learning_rate": 2.503103170339305e-09, "loss": 0.192, "step": 4600 }, { "epoch": 2.98, "learning_rate": 2.3491096885375207e-09, "loss": 0.1986, "step": 4601 }, { "epoch": 2.98, "learning_rate": 2.2000041353886783e-09, "loss": 0.1929, "step": 4602 }, { "epoch": 2.98, "learning_rate": 2.0557865837900204e-09, "loss": 0.2024, "step": 4603 }, { "epoch": 2.98, "learning_rate": 1.9164571042518123e-09, "loss": 0.194, "step": 4604 }, { "epoch": 2.98, "learning_rate": 1.7820157648917869e-09, "loss": 0.1995, "step": 4605 }, { "epoch": 2.98, "learning_rate": 1.6524626314418091e-09, "loss": 0.2014, "step": 4606 }, { "epoch": 2.98, "learning_rate": 1.5277977672389921e-09, "loss": 0.2057, "step": 4607 }, { "epoch": 2.98, "learning_rate": 1.40802123323458e-09, "loss": 0.1986, "step": 4608 }, { "epoch": 2.99, "learning_rate": 1.2931330879872861e-09, "loss": 0.2019, "step": 4609 }, { "epoch": 2.99, "learning_rate": 1.183133387666624e-09, "loss": 0.1859, "step": 4610 }, { "epoch": 2.99, "learning_rate": 1.0780221860540174e-09, "loss": 0.1852, "step": 4611 }, { "epoch": 2.99, "learning_rate": 9.777995345372493e-10, "loss": 0.2078, "step": 4612 }, { "epoch": 2.99, "learning_rate": 8.824654821171231e-10, "loss": 0.1928, "step": 4613 }, { "epoch": 2.99, "learning_rate": 7.920200754019114e-10, "loss": 0.1934, "step": 4614 }, { "epoch": 2.99, "learning_rate": 7.064633586129077e-10, "loss": 0.1926, "step": 4615 }, { "epoch": 2.99, "learning_rate": 6.257953735777644e-10, "loss": 0.2067, "step": 4616 }, { "epoch": 2.99, "learning_rate": 5.500161597360443e-10, "loss": 0.1923, "step": 4617 }, { "epoch": 2.99, "learning_rate": 4.791257541369998e-10, "loss": 0.1856, "step": 4618 }, { "epoch": 2.99, "learning_rate": 4.131241914406836e-10, "loss": 0.2008, "step": 4619 }, { "epoch": 2.99, "learning_rate": 3.5201150391350746e-10, "loss": 0.1943, "step": 4620 }, { "epoch": 2.99, "learning_rate": 2.9578772143601387e-10, "loss": 0.2073, "step": 4621 }, { "epoch": 2.99, "learning_rate": 2.4445287149510443e-10, "loss": 0.1919, "step": 4622 }, { "epoch": 2.99, "learning_rate": 1.9800697918848087e-10, "loss": 0.1988, "step": 4623 }, { "epoch": 2.99, "learning_rate": 1.5645006722575517e-10, "loss": 0.192, "step": 4624 }, { "epoch": 3.0, "learning_rate": 1.1978215592289845e-10, "loss": 0.1922, "step": 4625 }, { "epoch": 3.0, "learning_rate": 8.800326320668184e-11, "loss": 0.2029, "step": 4626 }, { "epoch": 3.0, "learning_rate": 6.111340461578685e-11, "loss": 0.194, "step": 4627 }, { "epoch": 3.0, "learning_rate": 3.91125932963643e-11, "loss": 0.2006, "step": 4628 }, { "epoch": 3.0, "learning_rate": 2.200084000314462e-11, "loss": 0.2116, "step": 4629 }, { "epoch": 3.0, "learning_rate": 9.778153104988974e-12, "loss": 0.2027, "step": 4630 }, { "epoch": 3.0, "learning_rate": 2.4445385748972373e-12, "loss": 0.1874, "step": 4631 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.1665, "step": 4632 }, { "epoch": 3.0, "step": 4632, "total_flos": 1080541604413440.0, "train_loss": 0.12577433147128278, "train_runtime": 29791.914, "train_samples_per_second": 9.945, "train_steps_per_second": 0.155 } ], "logging_steps": 1.0, "max_steps": 4632, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1080541604413440.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }